diff --git a/3rdparty/gpdf/.gitignore b/3rdparty/gpdf/.gitignore deleted file mode 100644 index 56641e7b..00000000 --- a/3rdparty/gpdf/.gitignore +++ /dev/null @@ -1,20 +0,0 @@ -################################################################################ -# 此 .gitignore 文件已由 Microsoft(R) Visual Studio 自动创建。 -################################################################################ - -/.vs -/debug -/release/qmake/qtvars_Win32_Release.props -/release/qmake -/Win32 -/Makefile.Release -/Makefile.Debug -/Makefile -/hg_gpdf.vcxproj.user -/hg_gpdf.vcxproj.filters -/hg_gpdf.vcxproj -/hg_gpdf.sln -/hg_gpdf.pro.user -/.qmake.stash -/3rdparty/tesseract/tessdata -/main.cpp diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/Makefile.am b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/Makefile.am deleted file mode 100644 index 7bdec541..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/Makefile.am +++ /dev/null @@ -1,102 +0,0 @@ -AM_CPPFLAGS = $(ZLIB_CFLAGS) $(LIBPNG_CFLAGS) $(JPEG_CFLAGS) $(LIBTIFF_CFLAGS) $(LIBWEBP_CFLAGS) $(LIBWEBPMUX_CFLAGS) $(LIBJP2K_CFLAGS) - -lib_LTLIBRARIES = liblept.la -liblept_la_LIBADD = $(LIBPNG_LIBS) $(JPEG_LIBS) $(GIFLIB_LIBS) $(LIBTIFF_LIBS) $(LIBWEBP_LIBS) $(LIBWEBPMUX_LIBS) $(LIBJP2K_LIBS) $(GDI_LIBS) $(LIBM) $(ZLIB_LIBS) - -liblept_la_LDFLAGS = -no-undefined -version-info 5:3:0 - -liblept_la_SOURCES = adaptmap.c affine.c \ - affinecompose.c arrayaccess.c \ - bardecode.c baseline.c bbuffer.c \ - bilateral.c bilinear.c binarize.c \ - binexpand.c binreduce.c \ - blend.c bmf.c bmpio.c bmpiostub.c \ - bootnumgen1.c bootnumgen2.c \ - bootnumgen3.c bootnumgen4.c \ - boxbasic.c boxfunc1.c boxfunc2.c boxfunc3.c \ - boxfunc4.c boxfunc5.c bytearray.c \ - ccbord.c ccthin.c checkerboard.c \ - classapp.c colorcontent.c coloring.c \ - colormap.c colormorph.c \ - colorquant1.c colorquant2.c \ - colorseg.c colorspace.c \ - compare.c conncomp.c convertfiles.c \ - convolve.c correlscore.c \ - dewarp1.c dewarp2.c dewarp3.c dewarp4.c \ - dnabasic.c dnafunc1.c dnahash.c \ - dwacomb.2.c dwacomblow.2.c \ - edge.c encoding.c enhance.c \ - fhmtauto.c fhmtgen.1.c fhmtgenlow.1.c \ - finditalic.c flipdetect.c fliphmtgen.c \ - fmorphauto.c fmorphgen.1.c fmorphgenlow.1.c \ - fpix1.c fpix2.c gifio.c gifiostub.c \ - gplot.c graphics.c graymorph.c \ - grayquant.c heap.c jbclass.c \ - jp2kheader.c jp2kheaderstub.c \ - jp2kio.c jp2kiostub.c jpegio.c jpegiostub.c \ - kernel.c leptwin.c libversions.c list.c map.c maze.c \ - morph.c morphapp.c morphdwa.c morphseq.c \ - numabasic.c numafunc1.c numafunc2.c \ - pageseg.c paintcmap.c \ - parseprotos.c partify.c partition.c \ - pdfio1.c pdfio1stub.c pdfio2.c pdfio2stub.c \ - pix1.c pix2.c pix3.c pix4.c pix5.c \ - pixabasic.c pixacc.c pixafunc1.c pixafunc2.c \ - pixalloc.c pixarith.c pixcomp.c pixconv.c \ - pixlabel.c pixtiling.c pngio.c pngiostub.c \ - pnmio.c pnmiostub.c projective.c \ - psio1.c psio1stub.c psio2.c psio2stub.c \ - ptabasic.c ptafunc1.c ptafunc2.c ptra.c \ - quadtree.c queue.c rank.c rbtree.c \ - readbarcode.c readfile.c \ - recogbasic.c recogdid.c recogident.c \ - recogtrain.c regutils.c \ - rop.c roplow.c \ - rotate.c rotateam.c rotateorth.c rotateshear.c \ - runlength.c sarray1.c sarray2.c \ - scale1.c scale2.c seedfill.c \ - sel1.c sel2.c selgen.c \ - shear.c skew.c spixio.c \ - stack.c stringcode.c \ - strokes.c sudoku.c textops.c \ - tiffio.c tiffiostub.c \ - utils1.c utils2.c warper.c watershed.c \ - webpio.c webpiostub.c webpanimio.c webpanimiostub.c \ - writefile.c zlibmem.c zlibmemstub.c - -pkginclude_HEADERS = allheaders.h alltypes.h \ - array.h arrayaccess.h bbuffer.h bilateral.h \ - bmf.h bmfdata.h bmp.h ccbord.h \ - dewarp.h endianness.h environ.h \ - gplot.h heap.h imageio.h jbclass.h \ - leptwin.h list.h \ - morph.h pix.h ptra.h queue.h rbtree.h \ - readbarcode.h recog.h regutils.h stack.h \ - stringcode.h sudoku.h watershed.h - -LDADD = liblept.la - -EXTRA_DIST = hmttemplate1.txt hmttemplate2.txt \ - leptonica-license.txt \ - morphtemplate1.txt morphtemplate2.txt \ - stringtemplate1.txt stringtemplate2.txt - -$(top_builddir)/prog/xtractprotos$(EXEEXT): liblept.la - $(MAKE) -C $(top_builddir)/prog xtractprotos$(EXEEXT) - -allheaders: $(top_builddir)/prog/xtractprotos$(EXEEXT) $(liblept_la_SOURCES) - cd $(srcdir) && $(abs_top_builddir)/prog/xtractprotos$(EXEEXT) -prestring=LEPT_DLL -protos=inline $(liblept_la_SOURCES) - -install-data-hook: - cd $(DESTDIR)$(libdir);\ - for ext in a la so sl dylib; do\ - if test -f liblept.$$ext; then\ - $(LN_S) liblept.$$ext libleptonica.$$ext;\ - fi;\ - done - -uninstall-hook: - cd $(DESTDIR)$(libdir);\ - for ext in a la so sl dylib; do\ - rm -f libleptonica.$$ext;\ - done diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/adaptmap.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/adaptmap.c deleted file mode 100644 index 634be33b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/adaptmap.c +++ /dev/null @@ -1,2950 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file adaptmap.c - *
- *
- *  -------------------------------------------------------------------
- *
- *  Image binarization algorithms are found in:
- *     grayquant.c:   standard, simple, general grayscale quantization
- *     adaptmap.c:    local adaptive; mostly gray-to-gray in preparation
- *                    for binarization
- *     binarize.c:    special binarization methods, locally adaptive.
- *
- *  -------------------------------------------------------------------
- *
- *      Clean background to white using background normalization
- *          PIX       *pixCleanBackgroundToWhite()
- *
- *      Adaptive background normalization (top-level functions)
- *          PIX       *pixBackgroundNormSimple()     8 and 32 bpp
- *          PIX       *pixBackgroundNorm()           8 and 32 bpp
- *          PIX       *pixBackgroundNormMorph()      8 and 32 bpp
- *
- *      Arrays of inverted background values for normalization (16 bpp)
- *          l_int32    pixBackgroundNormGrayArray()   8 bpp input
- *          l_int32    pixBackgroundNormRGBArrays()   32 bpp input
- *          l_int32    pixBackgroundNormGrayArrayMorph()   8 bpp input
- *          l_int32    pixBackgroundNormRGBArraysMorph()   32 bpp input
- *
- *      Measurement of local background
- *          l_int32    pixGetBackgroundGrayMap()        8 bpp
- *          l_int32    pixGetBackgroundRGBMap()         32 bpp
- *          l_int32    pixGetBackgroundGrayMapMorph()   8 bpp
- *          l_int32    pixGetBackgroundRGBMapMorph()    32 bpp
- *          l_int32    pixFillMapHoles()
- *          PIX       *pixExtendByReplication()         8 bpp
- *          l_int32    pixSmoothConnectedRegions()      8 bpp
- *
- *      Measurement of local foreground
- *          l_int32    pixGetForegroundGrayMap()        8 bpp
- *
- *      Generate inverted background map for each component
- *          PIX       *pixGetInvBackgroundMap()   16 bpp
- *
- *      Apply inverse background map to image
- *          PIX       *pixApplyInvBackgroundGrayMap()   8 bpp
- *          PIX       *pixApplyInvBackgroundRGBMap()    32 bpp
- *
- *      Apply variable map
- *          PIX       *pixApplyVariableGrayMap()        8 bpp
- *
- *      Non-adaptive (global) mapping
- *          PIX       *pixGlobalNormRGB()               32 bpp or cmapped
- *          PIX       *pixGlobalNormNoSatRGB()          32 bpp
- *
- *      Adaptive threshold spread normalization
- *          l_int32    pixThresholdSpreadNorm()         8 bpp
- *
- *      Adaptive background normalization (flexible adaptaption)
- *          PIX       *pixBackgroundNormFlex()          8 bpp
- *
- *      Adaptive contrast normalization
- *          PIX             *pixContrastNorm()          8 bpp
- *          l_int32          pixMinMaxTiles()
- *          l_int32          pixSetLowContrast()
- *          PIX             *pixLinearTRCTiled()
- *          static l_int32  *iaaGetLinearTRC()
- *
- *  Background normalization is done by generating a reduced map (or set
- *  of maps) representing the estimated background value of the
- *  input image, and using this to shift the pixel values so that
- *  this background value is set to some constant value.
- *
- *  Specifically, normalization has 3 steps:
- *    (1) Generate a background map at a reduced scale.
- *    (2) Make the array of inverted background values by inverting
- *        the map.  The result is an array of local multiplicative factors.
- *    (3) Apply this inverse background map to the image
- *
- *  The inverse background arrays can be generated in two different ways here:
- *    (1) Remove the 'foreground' pixels and average over the remaining
- *        pixels in each tile.  Propagate values into tiles where
- *        values have not been assigned, either because there was not
- *        enough background in the tile or because the tile is covered
- *        by a foreground region described by an image mask.
- *        After the background map is made, the inverse map is generated by
- *        smoothing over some number of adjacent tiles
- *        (block convolution) and then inverting.
- *    (2) Remove the foreground pixels using a morphological closing
- *        on a subsampled version of the image.  Propagate values
- *        into pixels covered by an optional image mask.  Invert the
- *        background map without preconditioning by convolutional smoothing.
- *
- *  Other methods for adaptively normalizing the image are also given here.
- *
- *  (1) pixThresholdSpreadNorm() computes a local threshold over the image
- *      and normalizes the input pixel values so that this computed threshold
- *      is a constant across the entire image.
- *
- *  (2) pixContrastNorm() computes and applies a local TRC so that the
- *      local dynamic range is expanded to the full 8 bits, where the
- *      darkest pixels are mapped to 0 and the lightest to 255.  This is
- *      useful for improving the appearance of pages with very light
- *      foreground or very dark background, and where the local TRC
- *      function doesn't change rapidly with position.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - - /* Default input parameters for pixBackgroundNormSimple() - * Notes: - * (1) mincount must never exceed the tile area (width * height) - * (2) bgval must be sufficiently below 255 to avoid accidental - * saturation; otherwise it should be large to avoid - * shrinking the dynamic range - * (3) results should otherwise not be sensitive to these values - */ -static const l_int32 DefaultTileWidth = 10; /*!< default tile width */ -static const l_int32 DefaultTileHeight = 15; /*!< default tile height */ -static const l_int32 DefaultFgThreshold = 60; /*!< default fg threshold */ -static const l_int32 DefaultMinCount = 40; /*!< default minimum count */ -static const l_int32 DefaultBgVal = 200; /*!< default bg value */ -static const l_int32 DefaultXSmoothSize = 2; /*!< default x smooth size */ -static const l_int32 DefaultYSmoothSize = 1; /*!< default y smooth size */ - -static l_int32 *iaaGetLinearTRC(l_int32 **iaa, l_int32 diff); - -#ifndef NO_CONSOLE_IO -#define DEBUG_GLOBAL 0 /*!< set to 1 to debug pixGlobalNormNoSatRGB() */ -#endif /* ~NO_CONSOLE_IO */ - -/*------------------------------------------------------------------* - * Clean background to white using background normalization * - *------------------------------------------------------------------*/ -/*! - * \brief pixCleanBackgroundToWhite() - * - * \param[in] pixs 8 bpp grayscale or 32 bpp rgb - * \param[in] pixim [optional] 1 bpp 'image' mask; can be null - * \param[in] pixg [optional] 8 bpp grayscale version; can be null - * \param[in] gamma gamma correction; must be > 0.0; typically ~1.0 - * \param[in] blackval dark value to set to black (0) - * \param[in] whiteval light value to set to white (255) - * \return pixd 8 bpp or 32 bpp rgb, or NULL on error - * - *
- * Notes:
- *    (1) This is a simplified interface for cleaning an image.
- *        For comparison, see pixAdaptThresholdToBinaryGen().
- *    (2) The suggested default values for the input parameters are:
- *          gamma:    1.0  (reduce this to increase the contrast; e.g.,
- *                          for light text)
- *          blackval   70  (a bit more than 60)
- *          whiteval  190  (a bit less than 200)
- * 
- */ -PIX * -pixCleanBackgroundToWhite(PIX *pixs, - PIX *pixim, - PIX *pixg, - l_float32 gamma, - l_int32 blackval, - l_int32 whiteval) -{ -l_int32 d; -PIX *pixd; - - PROCNAME("pixCleanBackgroundToWhite"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return (PIX *)ERROR_PTR("depth not 8 or 32", procName, NULL); - - pixd = pixBackgroundNormSimple(pixs, pixim, pixg); - if (!pixd) - return (PIX *)ERROR_PTR("background norm failedd", procName, NULL); - pixGammaTRC(pixd, pixd, gamma, blackval, whiteval); - return pixd; -} - - -/*------------------------------------------------------------------* - * Adaptive background normalization * - *------------------------------------------------------------------*/ -/*! - * \brief pixBackgroundNormSimple() - * - * \param[in] pixs 8 bpp grayscale or 32 bpp rgb - * \param[in] pixim [optional] 1 bpp 'image' mask; can be null - * \param[in] pixg [optional] 8 bpp grayscale version; can be null - * \return pixd 8 bpp or 32 bpp rgb, or NULL on error - * - *
- * Notes:
- *    (1) This is a simplified interface to pixBackgroundNorm(),
- *        where seven parameters are defaulted.
- *    (2) The input image is either grayscale or rgb.
- *    (3) See pixBackgroundNorm() for usage and function.
- * 
- */ -PIX * -pixBackgroundNormSimple(PIX *pixs, - PIX *pixim, - PIX *pixg) -{ - return pixBackgroundNorm(pixs, pixim, pixg, - DefaultTileWidth, DefaultTileHeight, - DefaultFgThreshold, DefaultMinCount, - DefaultBgVal, DefaultXSmoothSize, - DefaultYSmoothSize); -} - - -/*! - * \brief pixBackgroundNorm() - * - * \param[in] pixs 8 bpp grayscale or 32 bpp rgb - * \param[in] pixim [optional] 1 bpp 'image' mask; can be null - * \param[in] pixg [optional] 8 bpp grayscale version; can be null - * \param[in] sx, sy tile size in pixels - * \param[in] thresh threshold for determining foreground - * \param[in] mincount min threshold on counts in a tile - * \param[in] bgval target bg val; typ. > 128 - * \param[in] smoothx half-width of block convolution kernel width - * \param[in] smoothy half-width of block convolution kernel height - * \return pixd 8 bpp or 32 bpp rgb, or NULL on error - * - *
- * Notes:
- *    (1) This is a top-level interface for normalizing the image intensity
- *        by mapping the image so that the background is near the input
- *        value 'bgval'.
- *    (2) The input image is either grayscale or rgb.
- *    (3) For each component in the input image, the background value
- *        in each tile is estimated using the values in the tile that
- *        are not part of the foreground, where the foreground is
- *        determined by the input 'thresh' argument.
- *    (4) An optional binary mask can be specified, with the foreground
- *        pixels typically over image regions.  The resulting background
- *        map values will be determined by surrounding pixels that are
- *        not under the mask foreground.  The origin (0,0) of this mask
- *        is assumed to be aligned with the origin of the input image.
- *        This binary mask must not fully cover pixs, because then there
- *        will be no pixels in the input image available to compute
- *        the background.
- *    (5) An optional grayscale version of the input pixs can be supplied.
- *        The only reason to do this is if the input is RGB and this
- *        grayscale version can be used elsewhere.  If the input is RGB
- *        and this is not supplied, it is made internally using only
- *        the green component, and destroyed after use.
- *    (6) The dimensions of the pixel tile (sx, sy) give the amount by
- *        by which the map is reduced in size from the input image.
- *    (7) The threshold is used to binarize the input image, in order to
- *        locate the foreground components.  If this is set too low,
- *        some actual foreground may be used to determine the maps;
- *        if set too high, there may not be enough background
- *        to determine the map values accurately.  Typically, it's
- *        better to err by setting the threshold too high.
- *    (8) A 'mincount' threshold is a minimum count of pixels in a
- *        tile for which a background reading is made, in order for that
- *        pixel in the map to be valid.  This number should perhaps be
- *        at least 1/3 the size of the tile.
- *    (9) A 'bgval' target background value for the normalized image.  This
- *        should be at least 128.  If set too close to 255, some
- *        clipping will occur in the result.
- *    (10) Two factors, 'smoothx' and 'smoothy', are input for smoothing
- *        the map.  Each low-pass filter kernel dimension is
- *        is 2 * (smoothing factor) + 1, so a
- *        value of 0 means no smoothing. A value of 1 or 2 is recommended.
- * 
- */ -PIX * -pixBackgroundNorm(PIX *pixs, - PIX *pixim, - PIX *pixg, - l_int32 sx, - l_int32 sy, - l_int32 thresh, - l_int32 mincount, - l_int32 bgval, - l_int32 smoothx, - l_int32 smoothy) -{ -l_int32 d, allfg; -PIX *pixm, *pixmi, *pixd; -PIX *pixmr, *pixmg, *pixmb, *pixmri, *pixmgi, *pixmbi; - - PROCNAME("pixBackgroundNorm"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL); - if (sx < 4 || sy < 4) - return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL); - if (mincount > sx * sy) { - L_WARNING("mincount too large for tile size\n", procName); - mincount = (sx * sy) / 3; - } - - /* If pixim exists, verify that it is not all foreground. */ - if (pixim) { - pixInvert(pixim, pixim); - pixZero(pixim, &allfg); - pixInvert(pixim, pixim); - if (allfg) - return (PIX *)ERROR_PTR("pixim all foreground", procName, NULL); - } - - pixd = NULL; - if (d == 8) { - pixm = NULL; - pixGetBackgroundGrayMap(pixs, pixim, sx, sy, thresh, mincount, &pixm); - if (!pixm) { - L_WARNING("map not made; return a copy of the source\n", procName); - return pixCopy(NULL, pixs); - } - - pixmi = pixGetInvBackgroundMap(pixm, bgval, smoothx, smoothy); - if (!pixmi) { - L_WARNING("pixmi not made; return a copy of source\n", procName); - pixDestroy(&pixm); - return pixCopy(NULL, pixs); - } else { - pixd = pixApplyInvBackgroundGrayMap(pixs, pixmi, sx, sy); - } - - pixDestroy(&pixm); - pixDestroy(&pixmi); - } - else { - pixmr = pixmg = pixmb = NULL; - pixGetBackgroundRGBMap(pixs, pixim, pixg, sx, sy, thresh, - mincount, &pixmr, &pixmg, &pixmb); - if (!pixmr || !pixmg || !pixmb) { - pixDestroy(&pixmr); - pixDestroy(&pixmg); - pixDestroy(&pixmb); - L_WARNING("map not made; return a copy of the source\n", procName); - return pixCopy(NULL, pixs); - } - - pixmri = pixGetInvBackgroundMap(pixmr, bgval, smoothx, smoothy); - pixmgi = pixGetInvBackgroundMap(pixmg, bgval, smoothx, smoothy); - pixmbi = pixGetInvBackgroundMap(pixmb, bgval, smoothx, smoothy); - if (!pixmri || !pixmgi || !pixmbi) { - L_WARNING("not all pixm*i are made; return src copy\n", procName); - pixd = pixCopy(NULL, pixs); - } else { - pixd = pixApplyInvBackgroundRGBMap(pixs, pixmri, pixmgi, pixmbi, - sx, sy); - } - - pixDestroy(&pixmr); - pixDestroy(&pixmg); - pixDestroy(&pixmb); - pixDestroy(&pixmri); - pixDestroy(&pixmgi); - pixDestroy(&pixmbi); - } - - if (!pixd) - ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - return pixd; -} - - -/*! - * \brief pixBackgroundNormMorph() - * - * \param[in] pixs 8 bpp grayscale or 32 bpp rgb - * \param[in] pixim [optional] 1 bpp 'image' mask; can be null - * \param[in] reduction at which morph closings are done; between 2 and 16 - * \param[in] size of square Sel for the closing; use an odd number - * \param[in] bgval target bg val; typ. > 128 - * \return pixd 8 bpp, or NULL on error - * - *
- * Notes:
- *    (1) This is a top-level interface for normalizing the image intensity
- *        by mapping the image so that the background is near the input
- *        value 'bgval'.
- *    (2) The input image is either grayscale or rgb.
- *    (3) For each component in the input image, the background value
- *        is estimated using a grayscale closing; hence the 'Morph'
- *        in the function name.
- *    (4) An optional binary mask can be specified, with the foreground
- *        pixels typically over image regions.  The resulting background
- *        map values will be determined by surrounding pixels that are
- *        not under the mask foreground.  The origin (0,0) of this mask
- *        is assumed to be aligned with the origin of the input image.
- *        This binary mask must not fully cover pixs, because then there
- *        will be no pixels in the input image available to compute
- *        the background.
- *    (5) The map is computed at reduced size (given by 'reduction')
- *        from the input pixs and optional pixim.  At this scale,
- *        pixs is closed to remove the background, using a square Sel
- *        of odd dimension.  The product of reduction * size should be
- *        large enough to remove most of the text foreground.
- *    (6) No convolutional smoothing needs to be done on the map before
- *        inverting it.
- *    (7) A 'bgval' target background value for the normalized image.  This
- *        should be at least 128.  If set too close to 255, some
- *        clipping will occur in the result.
- * 
- */ -PIX * -pixBackgroundNormMorph(PIX *pixs, - PIX *pixim, - l_int32 reduction, - l_int32 size, - l_int32 bgval) -{ -l_int32 d, allfg; -PIX *pixm, *pixmi, *pixd; -PIX *pixmr, *pixmg, *pixmb, *pixmri, *pixmgi, *pixmbi; - - PROCNAME("pixBackgroundNormMorph"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL); - if (reduction < 2 || reduction > 16) - return (PIX *)ERROR_PTR("reduction must be between 2 and 16", - procName, NULL); - - /* If pixim exists, verify that it is not all foreground. */ - if (pixim) { - pixInvert(pixim, pixim); - pixZero(pixim, &allfg); - pixInvert(pixim, pixim); - if (allfg) - return (PIX *)ERROR_PTR("pixim all foreground", procName, NULL); - } - - pixd = NULL; - if (d == 8) { - pixGetBackgroundGrayMapMorph(pixs, pixim, reduction, size, &pixm); - if (!pixm) - return (PIX *)ERROR_PTR("pixm not made", procName, NULL); - pixmi = pixGetInvBackgroundMap(pixm, bgval, 0, 0); - if (!pixmi) - ERROR_PTR("pixmi not made", procName, NULL); - else - pixd = pixApplyInvBackgroundGrayMap(pixs, pixmi, - reduction, reduction); - pixDestroy(&pixm); - pixDestroy(&pixmi); - } - else { /* d == 32 */ - pixmr = pixmg = pixmb = NULL; - pixGetBackgroundRGBMapMorph(pixs, pixim, reduction, size, - &pixmr, &pixmg, &pixmb); - if (!pixmr || !pixmg || !pixmb) { - pixDestroy(&pixmr); - pixDestroy(&pixmg); - pixDestroy(&pixmb); - return (PIX *)ERROR_PTR("not all pixm*", procName, NULL); - } - - pixmri = pixGetInvBackgroundMap(pixmr, bgval, 0, 0); - pixmgi = pixGetInvBackgroundMap(pixmg, bgval, 0, 0); - pixmbi = pixGetInvBackgroundMap(pixmb, bgval, 0, 0); - if (!pixmri || !pixmgi || !pixmbi) - ERROR_PTR("not all pixm*i are made", procName, NULL); - else - pixd = pixApplyInvBackgroundRGBMap(pixs, pixmri, pixmgi, pixmbi, - reduction, reduction); - - pixDestroy(&pixmr); - pixDestroy(&pixmg); - pixDestroy(&pixmb); - pixDestroy(&pixmri); - pixDestroy(&pixmgi); - pixDestroy(&pixmbi); - } - - if (!pixd) - ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - return pixd; -} - - -/*-------------------------------------------------------------------------* - * Arrays of inverted background values for normalization * - *-------------------------------------------------------------------------* - * Notes for these four functions: * - * (1) They are useful if you need to save the actual mapping array. * - * (2) They could be used in the top-level functions but are * - * not because their use makes those functions less clear. * - * (3) Each component in the input pixs generates a 16 bpp pix array. * - *-------------------------------------------------------------------------*/ -/*! - * \brief pixBackgroundNormGrayArray() - * - * \param[in] pixs 8 bpp grayscale - * \param[in] pixim [optional] 1 bpp 'image' mask; can be null - * \param[in] sx, sy tile size in pixels - * \param[in] thresh threshold for determining foreground - * \param[in] mincount min threshold on counts in a tile - * \param[in] bgval target bg val; typ. > 128 - * \param[in] smoothx half-width of block convolution kernel width - * \param[in] smoothy half-width of block convolution kernel height - * \param[out] ppixd 16 bpp array of inverted background value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *    (1) See notes in pixBackgroundNorm().
- *    (2) This returns a 16 bpp pix that can be used by
- *        pixApplyInvBackgroundGrayMap() to generate a normalized version
- *        of the input pixs.
- * 
- */ -l_ok -pixBackgroundNormGrayArray(PIX *pixs, - PIX *pixim, - l_int32 sx, - l_int32 sy, - l_int32 thresh, - l_int32 mincount, - l_int32 bgval, - l_int32 smoothx, - l_int32 smoothy, - PIX **ppixd) -{ -l_int32 allfg; -PIX *pixm; - - PROCNAME("pixBackgroundNormGrayArray"); - - if (!ppixd) - return ERROR_INT("&pixd not defined", procName, 1); - *ppixd = NULL; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (pixGetColormap(pixs)) - return ERROR_INT("pixs is colormapped", procName, 1); - if (pixim && pixGetDepth(pixim) != 1) - return ERROR_INT("pixim not 1 bpp", procName, 1); - if (sx < 4 || sy < 4) - return ERROR_INT("sx and sy must be >= 4", procName, 1); - if (mincount > sx * sy) { - L_WARNING("mincount too large for tile size\n", procName); - mincount = (sx * sy) / 3; - } - - /* If pixim exists, verify that it is not all foreground. */ - if (pixim) { - pixInvert(pixim, pixim); - pixZero(pixim, &allfg); - pixInvert(pixim, pixim); - if (allfg) - return ERROR_INT("pixim all foreground", procName, 1); - } - - pixGetBackgroundGrayMap(pixs, pixim, sx, sy, thresh, mincount, &pixm); - if (!pixm) - return ERROR_INT("pixm not made", procName, 1); - *ppixd = pixGetInvBackgroundMap(pixm, bgval, smoothx, smoothy); - pixCopyResolution(*ppixd, pixs); - pixDestroy(&pixm); - return 0; -} - - -/*! - * \brief pixBackgroundNormRGBArrays() - * - * \param[in] pixs 32 bpp rgb - * \param[in] pixim [optional] 1 bpp 'image' mask; can be null - * \param[in] pixg [optional] 8 bpp grayscale version; can be null - * \param[in] sx, sy tile size in pixels - * \param[in] thresh threshold for determining foreground - * \param[in] mincount min threshold on counts in a tile - * \param[in] bgval target bg val; typ. > 128 - * \param[in] smoothx half-width of block convolution kernel width - * \param[in] smoothy half-width of block convolution kernel height - * \param[out] ppixr 16 bpp array of inverted R background value - * \param[out] ppixg 16 bpp array of inverted G background value - * \param[out] ppixb 16 bpp array of inverted B background value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *    (1) See notes in pixBackgroundNorm().
- *    (2) This returns a set of three 16 bpp pix that can be used by
- *        pixApplyInvBackgroundGrayMap() to generate a normalized version
- *        of each component of the input pixs.
- * 
- */ -l_ok -pixBackgroundNormRGBArrays(PIX *pixs, - PIX *pixim, - PIX *pixg, - l_int32 sx, - l_int32 sy, - l_int32 thresh, - l_int32 mincount, - l_int32 bgval, - l_int32 smoothx, - l_int32 smoothy, - PIX **ppixr, - PIX **ppixg, - PIX **ppixb) -{ -l_int32 allfg; -PIX *pixmr, *pixmg, *pixmb; - - PROCNAME("pixBackgroundNormRGBArrays"); - - if (!ppixr || !ppixg || !ppixb) - return ERROR_INT("&pixr, &pixg, &pixb not all defined", procName, 1); - *ppixr = *ppixg = *ppixb = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not 32 bpp", procName, 1); - if (pixim && pixGetDepth(pixim) != 1) - return ERROR_INT("pixim not 1 bpp", procName, 1); - if (sx < 4 || sy < 4) - return ERROR_INT("sx and sy must be >= 4", procName, 1); - if (mincount > sx * sy) { - L_WARNING("mincount too large for tile size\n", procName); - mincount = (sx * sy) / 3; - } - - /* If pixim exists, verify that it is not all foreground. */ - if (pixim) { - pixInvert(pixim, pixim); - pixZero(pixim, &allfg); - pixInvert(pixim, pixim); - if (allfg) - return ERROR_INT("pixim all foreground", procName, 1); - } - - pixGetBackgroundRGBMap(pixs, pixim, pixg, sx, sy, thresh, mincount, - &pixmr, &pixmg, &pixmb); - if (!pixmr || !pixmg || !pixmb) { - pixDestroy(&pixmr); - pixDestroy(&pixmg); - pixDestroy(&pixmb); - return ERROR_INT("not all pixm* made", procName, 1); - } - - *ppixr = pixGetInvBackgroundMap(pixmr, bgval, smoothx, smoothy); - *ppixg = pixGetInvBackgroundMap(pixmg, bgval, smoothx, smoothy); - *ppixb = pixGetInvBackgroundMap(pixmb, bgval, smoothx, smoothy); - pixDestroy(&pixmr); - pixDestroy(&pixmg); - pixDestroy(&pixmb); - return 0; -} - - -/*! - * \brief pixBackgroundNormGrayArrayMorph() - * - * \param[in] pixs 8 bpp grayscale - * \param[in] pixim [optional] 1 bpp 'image' mask; can be null - * \param[in] reduction at which morph closings are done; between 2 and 16 - * \param[in] size of square Sel for the closing; use an odd number - * \param[in] bgval target bg val; typ. > 128 - * \param[out] ppixd 16 bpp array of inverted background value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *    (1) See notes in pixBackgroundNormMorph().
- *    (2) This returns a 16 bpp pix that can be used by
- *        pixApplyInvBackgroundGrayMap() to generate a normalized version
- *        of the input pixs.
- * 
- */ -l_ok -pixBackgroundNormGrayArrayMorph(PIX *pixs, - PIX *pixim, - l_int32 reduction, - l_int32 size, - l_int32 bgval, - PIX **ppixd) -{ -l_int32 allfg; -PIX *pixm; - - PROCNAME("pixBackgroundNormGrayArrayMorph"); - - if (!ppixd) - return ERROR_INT("&pixd not defined", procName, 1); - *ppixd = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not 8 bpp", procName, 1); - if (pixim && pixGetDepth(pixim) != 1) - return ERROR_INT("pixim not 1 bpp", procName, 1); - if (reduction < 2 || reduction > 16) - return ERROR_INT("reduction must be between 2 and 16", procName, 1); - - /* If pixim exists, verify that it is not all foreground. */ - if (pixim) { - pixInvert(pixim, pixim); - pixZero(pixim, &allfg); - pixInvert(pixim, pixim); - if (allfg) - return ERROR_INT("pixim all foreground", procName, 1); - } - - pixGetBackgroundGrayMapMorph(pixs, pixim, reduction, size, &pixm); - if (!pixm) - return ERROR_INT("pixm not made", procName, 1); - *ppixd = pixGetInvBackgroundMap(pixm, bgval, 0, 0); - pixCopyResolution(*ppixd, pixs); - pixDestroy(&pixm); - return 0; -} - - -/*! - * \brief pixBackgroundNormRGBArraysMorph() - * - * \param[in] pixs 32 bpp rgb - * \param[in] pixim [optional] 1 bpp 'image' mask; can be null - * \param[in] reduction at which morph closings are done; between 2 and 16 - * \param[in] size of square Sel for the closing; use an odd number - * \param[in] bgval target bg val; typ. > 128 - * \param[out] ppixr 16 bpp array of inverted R background value - * \param[out] ppixg 16 bpp array of inverted G background value - * \param[out] ppixb 16 bpp array of inverted B background value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *    (1) See notes in pixBackgroundNormMorph().
- *    (2) This returns a set of three 16 bpp pix that can be used by
- *        pixApplyInvBackgroundGrayMap() to generate a normalized version
- *        of each component of the input pixs.
- * 
- */ -l_ok -pixBackgroundNormRGBArraysMorph(PIX *pixs, - PIX *pixim, - l_int32 reduction, - l_int32 size, - l_int32 bgval, - PIX **ppixr, - PIX **ppixg, - PIX **ppixb) -{ -l_int32 allfg; -PIX *pixmr, *pixmg, *pixmb; - - PROCNAME("pixBackgroundNormRGBArraysMorph"); - - if (!ppixr || !ppixg || !ppixb) - return ERROR_INT("&pixr, &pixg, &pixb not all defined", procName, 1); - *ppixr = *ppixg = *ppixb = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not 32 bpp", procName, 1); - if (pixim && pixGetDepth(pixim) != 1) - return ERROR_INT("pixim not 1 bpp", procName, 1); - if (reduction < 2 || reduction > 16) - return ERROR_INT("reduction must be between 2 and 16", procName, 1); - - /* If pixim exists, verify that it is not all foreground. */ - if (pixim) { - pixInvert(pixim, pixim); - pixZero(pixim, &allfg); - pixInvert(pixim, pixim); - if (allfg) - return ERROR_INT("pixim all foreground", procName, 1); - } - - pixGetBackgroundRGBMapMorph(pixs, pixim, reduction, size, - &pixmr, &pixmg, &pixmb); - if (!pixmr || !pixmg || !pixmb) { - pixDestroy(&pixmr); - pixDestroy(&pixmg); - pixDestroy(&pixmb); - return ERROR_INT("not all pixm* made", procName, 1); - } - - *ppixr = pixGetInvBackgroundMap(pixmr, bgval, 0, 0); - *ppixg = pixGetInvBackgroundMap(pixmg, bgval, 0, 0); - *ppixb = pixGetInvBackgroundMap(pixmb, bgval, 0, 0); - pixDestroy(&pixmr); - pixDestroy(&pixmg); - pixDestroy(&pixmb); - return 0; -} - - -/*------------------------------------------------------------------* - * Measurement of local background * - *------------------------------------------------------------------*/ -/*! - * \brief pixGetBackgroundGrayMap() - * - * \param[in] pixs 8 bpp grayscale; not cmapped - * \param[in] pixim [optional] 1 bpp 'image' mask; can be null; - * it should not have only foreground pixels - * \param[in] sx, sy tile size in pixels - * \param[in] thresh threshold for determining foreground - * \param[in] mincount min threshold on counts in a tile - * \param[out] ppixd 8 bpp grayscale map - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The background is measured in regions that don't have
- *          images.  It is then propagated into the image regions,
- *          and finally smoothed in each image region.
- * 
- */ -l_ok -pixGetBackgroundGrayMap(PIX *pixs, - PIX *pixim, - l_int32 sx, - l_int32 sy, - l_int32 thresh, - l_int32 mincount, - PIX **ppixd) -{ -l_int32 w, h, wd, hd, wim, him, wpls, wplim, wpld, wplf; -l_int32 xim, yim, delx, nx, ny, i, j, k, m; -l_int32 count, sum, val8; -l_int32 empty, fgpixels; -l_uint32 *datas, *dataim, *datad, *dataf, *lines, *lineim, *lined, *linef; -l_float32 scalex, scaley; -PIX *pixd, *piximi, *pixb, *pixf, *pixims; - - PROCNAME("pixGetBackgroundGrayMap"); - - if (!ppixd) - return ERROR_INT("&pixd not defined", procName, 1); - *ppixd = NULL; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (pixGetColormap(pixs)) - return ERROR_INT("pixs is colormapped", procName, 1); - if (pixim && pixGetDepth(pixim) != 1) - return ERROR_INT("pixim not 1 bpp", procName, 1); - if (sx < 4 || sy < 4) - return ERROR_INT("sx and sy must be >= 4", procName, 1); - if (mincount > sx * sy) { - L_WARNING("mincount too large for tile size\n", procName); - mincount = (sx * sy) / 3; - } - - /* Evaluate the 'image' mask, pixim, and make sure - * it is not all fg. */ - fgpixels = 0; /* boolean for existence of fg pixels in the image mask. */ - if (pixim) { - piximi = pixInvert(NULL, pixim); /* set non-'image' pixels to 1 */ - pixZero(piximi, &empty); - pixDestroy(&piximi); - if (empty) - return ERROR_INT("pixim all fg; no background", procName, 1); - pixZero(pixim, &empty); - if (!empty) /* there are fg pixels in pixim */ - fgpixels = 1; - } - - /* Generate the foreground mask, pixf, which is at - * full resolution. These pixels will be ignored when - * computing the background values. */ - pixb = pixThresholdToBinary(pixs, thresh); - pixf = pixMorphSequence(pixb, "d7.1 + d1.7", 0); - pixDestroy(&pixb); - - - /* ------------- Set up the output map pixd --------------- */ - /* Generate pixd, which is reduced by the factors (sx, sy). */ - w = pixGetWidth(pixs); - h = pixGetHeight(pixs); - wd = (w + sx - 1) / sx; - hd = (h + sy - 1) / sy; - pixd = pixCreate(wd, hd, 8); - - /* Note: we only compute map values in tiles that are complete. - * In general, tiles at right and bottom edges will not be - * complete, and we must fill them in later. */ - nx = w / sx; - ny = h / sy; - wpls = pixGetWpl(pixs); - datas = pixGetData(pixs); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - wplf = pixGetWpl(pixf); - dataf = pixGetData(pixf); - for (i = 0; i < ny; i++) { - lines = datas + sy * i * wpls; - linef = dataf + sy * i * wplf; - lined = datad + i * wpld; - for (j = 0; j < nx; j++) { - delx = j * sx; - sum = 0; - count = 0; - for (k = 0; k < sy; k++) { - for (m = 0; m < sx; m++) { - if (GET_DATA_BIT(linef + k * wplf, delx + m) == 0) { - sum += GET_DATA_BYTE(lines + k * wpls, delx + m); - count++; - } - } - } - if (count >= mincount) { - val8 = sum / count; - SET_DATA_BYTE(lined, j, val8); - } - } - } - pixDestroy(&pixf); - - /* If there is an optional mask with fg pixels, erase the previous - * calculation for the corresponding map pixels, setting the - * map values to 0. Then, when all the map holes are filled, - * these erased pixels will be set by the surrounding map values. - * - * The calculation here is relatively efficient: for each pixel - * in pixd (which corresponds to a tile of mask pixels in pixim) - * we look only at the pixel in pixim that is at the center - * of the tile. If the mask pixel is ON, we reset the map - * pixel in pixd to 0, so that it can later be filled in. */ - pixims = NULL; - if (pixim && fgpixels) { - wim = pixGetWidth(pixim); - him = pixGetHeight(pixim); - dataim = pixGetData(pixim); - wplim = pixGetWpl(pixim); - for (i = 0; i < ny; i++) { - yim = i * sy + sy / 2; - if (yim >= him) - break; - lineim = dataim + yim * wplim; - for (j = 0; j < nx; j++) { - xim = j * sx + sx / 2; - if (xim >= wim) - break; - if (GET_DATA_BIT(lineim, xim)) - pixSetPixel(pixd, j, i, 0); - } - } - } - - /* Fill all the holes in the map. */ - if (pixFillMapHoles(pixd, nx, ny, L_FILL_BLACK)) { - pixDestroy(&pixd); - L_WARNING("can't make the map\n", procName); - return 1; - } - - /* Finally, for each connected region corresponding to the - * 'image' mask, reset all pixels to their average value. - * Each of these components represents an image (or part of one) - * in the input, and this smooths the background values - * in each of these regions. */ - if (pixim && fgpixels) { - scalex = 1. / (l_float32)sx; - scaley = 1. / (l_float32)sy; - pixims = pixScaleBySampling(pixim, scalex, scaley); - pixSmoothConnectedRegions(pixd, pixims, 2); - pixDestroy(&pixims); - } - - *ppixd = pixd; - pixCopyResolution(*ppixd, pixs); - return 0; -} - - -/*! - * \brief pixGetBackgroundRGBMap() - * - * \param[in] pixs 32 bpp rgb - * \param[in] pixim [optional] 1 bpp 'image' mask; can be null; it - * should not have all foreground pixels - * \param[in] pixg [optional] 8 bpp grayscale version; can be null - * \param[in] sx, sy tile size in pixels - * \param[in] thresh threshold for determining foreground - * \param[in] mincount min threshold on counts in a tile - * \param[out] ppixmr red component map - * \param[out] ppixmg green component map - * \param[out] ppixmb blue component map - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If pixg, which is a grayscale version of pixs, is provided,
- *          use this internally to generate the foreground mask.
- *          Otherwise, a grayscale version of pixs will be generated
- *          from the green component only, used, and destroyed.
- * 
- */ -l_ok -pixGetBackgroundRGBMap(PIX *pixs, - PIX *pixim, - PIX *pixg, - l_int32 sx, - l_int32 sy, - l_int32 thresh, - l_int32 mincount, - PIX **ppixmr, - PIX **ppixmg, - PIX **ppixmb) -{ -l_int32 w, h, wm, hm, wim, him, wpls, wplim, wplf; -l_int32 xim, yim, delx, nx, ny, i, j, k, m; -l_int32 count, rsum, gsum, bsum, rval, gval, bval; -l_int32 empty, fgpixels; -l_uint32 pixel; -l_uint32 *datas, *dataim, *dataf, *lines, *lineim, *linef; -l_float32 scalex, scaley; -PIX *piximi, *pixgc, *pixb, *pixf, *pixims; -PIX *pixmr, *pixmg, *pixmb; - - PROCNAME("pixGetBackgroundRGBMap"); - - if (!ppixmr || !ppixmg || !ppixmb) - return ERROR_INT("&pixm* not all defined", procName, 1); - *ppixmr = *ppixmg = *ppixmb = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not 32 bpp", procName, 1); - if (pixim && pixGetDepth(pixim) != 1) - return ERROR_INT("pixim not 1 bpp", procName, 1); - if (sx < 4 || sy < 4) - return ERROR_INT("sx and sy must be >= 4", procName, 1); - if (mincount > sx * sy) { - L_WARNING("mincount too large for tile size\n", procName); - mincount = (sx * sy) / 3; - } - - /* Evaluate the mask pixim and make sure it is not all foreground */ - fgpixels = 0; /* boolean for existence of fg mask pixels */ - if (pixim) { - piximi = pixInvert(NULL, pixim); /* set non-'image' pixels to 1 */ - pixZero(piximi, &empty); - pixDestroy(&piximi); - if (empty) - return ERROR_INT("pixim all fg; no background", procName, 1); - pixZero(pixim, &empty); - if (!empty) /* there are fg pixels in pixim */ - fgpixels = 1; - } - - /* Generate the foreground mask. These pixels will be - * ignored when computing the background values. */ - if (pixg) /* use the input grayscale version if it is provided */ - pixgc = pixClone(pixg); - else - pixgc = pixConvertRGBToGrayFast(pixs); - pixb = pixThresholdToBinary(pixgc, thresh); - pixf = pixMorphSequence(pixb, "d7.1 + d1.7", 0); - pixDestroy(&pixgc); - pixDestroy(&pixb); - - /* Generate the output mask images */ - w = pixGetWidth(pixs); - h = pixGetHeight(pixs); - wm = (w + sx - 1) / sx; - hm = (h + sy - 1) / sy; - pixmr = pixCreate(wm, hm, 8); - pixmg = pixCreate(wm, hm, 8); - pixmb = pixCreate(wm, hm, 8); - - /* ------------- Set up the mapping images --------------- */ - /* Note: we only compute map values in tiles that are complete. - * In general, tiles at right and bottom edges will not be - * complete, and we must fill them in later. */ - nx = w / sx; - ny = h / sy; - wpls = pixGetWpl(pixs); - datas = pixGetData(pixs); - wplf = pixGetWpl(pixf); - dataf = pixGetData(pixf); - for (i = 0; i < ny; i++) { - lines = datas + sy * i * wpls; - linef = dataf + sy * i * wplf; - for (j = 0; j < nx; j++) { - delx = j * sx; - rsum = gsum = bsum = 0; - count = 0; - for (k = 0; k < sy; k++) { - for (m = 0; m < sx; m++) { - if (GET_DATA_BIT(linef + k * wplf, delx + m) == 0) { - pixel = *(lines + k * wpls + delx + m); - rsum += (pixel >> 24); - gsum += ((pixel >> 16) & 0xff); - bsum += ((pixel >> 8) & 0xff); - count++; - } - } - } - if (count >= mincount) { - rval = rsum / count; - gval = gsum / count; - bval = bsum / count; - pixSetPixel(pixmr, j, i, rval); - pixSetPixel(pixmg, j, i, gval); - pixSetPixel(pixmb, j, i, bval); - } - } - } - pixDestroy(&pixf); - - /* If there is an optional mask with fg pixels, erase the previous - * calculation for the corresponding map pixels, setting the - * map values in each of the 3 color maps to 0. Then, when - * all the map holes are filled, these erased pixels will - * be set by the surrounding map values. */ - if (pixim) { - wim = pixGetWidth(pixim); - him = pixGetHeight(pixim); - dataim = pixGetData(pixim); - wplim = pixGetWpl(pixim); - for (i = 0; i < ny; i++) { - yim = i * sy + sy / 2; - if (yim >= him) - break; - lineim = dataim + yim * wplim; - for (j = 0; j < nx; j++) { - xim = j * sx + sx / 2; - if (xim >= wim) - break; - if (GET_DATA_BIT(lineim, xim)) { - pixSetPixel(pixmr, j, i, 0); - pixSetPixel(pixmg, j, i, 0); - pixSetPixel(pixmb, j, i, 0); - } - } - } - } - - /* ----------------- Now fill in the holes ----------------------- */ - if (pixFillMapHoles(pixmr, nx, ny, L_FILL_BLACK) || - pixFillMapHoles(pixmg, nx, ny, L_FILL_BLACK) || - pixFillMapHoles(pixmb, nx, ny, L_FILL_BLACK)) { - pixDestroy(&pixmr); - pixDestroy(&pixmg); - pixDestroy(&pixmb); - L_WARNING("can't make the maps\n", procName); - return 1; - } - - /* Finally, for each connected region corresponding to the - * fg mask, reset all pixels to their average value. */ - if (pixim && fgpixels) { - scalex = 1. / (l_float32)sx; - scaley = 1. / (l_float32)sy; - pixims = pixScaleBySampling(pixim, scalex, scaley); - pixSmoothConnectedRegions(pixmr, pixims, 2); - pixSmoothConnectedRegions(pixmg, pixims, 2); - pixSmoothConnectedRegions(pixmb, pixims, 2); - pixDestroy(&pixims); - } - - *ppixmr = pixmr; - *ppixmg = pixmg; - *ppixmb = pixmb; - pixCopyResolution(*ppixmr, pixs); - pixCopyResolution(*ppixmg, pixs); - pixCopyResolution(*ppixmb, pixs); - return 0; -} - - -/*! - * \brief pixGetBackgroundGrayMapMorph() - * - * \param[in] pixs 8 bpp grayscale; not cmapped - * \param[in] pixim [optional] 1 bpp 'image' mask; can be null; it - * should not have all foreground pixels - * \param[in] reduction factor at which closing is performed - * \param[in] size of square Sel for the closing; use an odd number - * \param[out] ppixm grayscale map - * \return 0 if OK, 1 on error - */ -l_ok -pixGetBackgroundGrayMapMorph(PIX *pixs, - PIX *pixim, - l_int32 reduction, - l_int32 size, - PIX **ppixm) -{ -l_int32 nx, ny, empty, fgpixels; -l_float32 scale; -PIX *pixm, *pix1, *pix2, *pix3, *pixims; - - PROCNAME("pixGetBackgroundGrayMapMorph"); - - if (!ppixm) - return ERROR_INT("&pixm not defined", procName, 1); - *ppixm = NULL; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (pixGetColormap(pixs)) - return ERROR_INT("pixs is colormapped", procName, 1); - if (pixim && pixGetDepth(pixim) != 1) - return ERROR_INT("pixim not 1 bpp", procName, 1); - - /* Evaluate the mask pixim and make sure it is not all foreground. */ - fgpixels = 0; /* boolean for existence of fg mask pixels */ - if (pixim) { - pixInvert(pixim, pixim); /* set background pixels to 1 */ - pixZero(pixim, &empty); - if (empty) - return ERROR_INT("pixim all fg; no background", procName, 1); - pixInvert(pixim, pixim); /* revert to original mask */ - pixZero(pixim, &empty); - if (!empty) /* there are fg pixels in pixim */ - fgpixels = 1; - } - - /* Downscale as requested and do the closing to get the background. */ - scale = 1. / (l_float32)reduction; - pix1 = pixScaleBySampling(pixs, scale, scale); - pix2 = pixCloseGray(pix1, size, size); - pix3 = pixExtendByReplication(pix2, 1, 1); - pixDestroy(&pix1); - pixDestroy(&pix2); - - /* Downscale the image mask, if any, and remove it from the - * background. These pixels will be filled in (twice). */ - pixims = NULL; - if (pixim) { - pixims = pixScale(pixim, scale, scale); - pixm = pixConvertTo8(pixims, FALSE); - pixAnd(pixm, pixm, pix3); - } - else - pixm = pixClone(pix3); - pixDestroy(&pix3); - - /* Fill all the holes in the map. */ - nx = pixGetWidth(pixs) / reduction; - ny = pixGetHeight(pixs) / reduction; - if (pixFillMapHoles(pixm, nx, ny, L_FILL_BLACK)) { - pixDestroy(&pixm); - pixDestroy(&pixims); - L_WARNING("can't make the map\n", procName); - return 1; - } - - /* Finally, for each connected region corresponding to the - * fg mask, reset all pixels to their average value. */ - if (pixim && fgpixels) - pixSmoothConnectedRegions(pixm, pixims, 2); - pixDestroy(&pixims); - - *ppixm = pixm; - pixCopyResolution(*ppixm, pixs); - return 0; -} - - -/*! - * \brief pixGetBackgroundRGBMapMorph() - * - * \param[in] pixs 32 bpp rgb - * \param[in] pixim [optional] 1 bpp 'image' mask; can be null; it - * should not have all foreground pixels - * \param[in] reduction factor at which closing is performed - * \param[in] size of square Sel for the closing; use an odd number - * \param[out] ppixmr red component map - * \param[out] ppixmg green component map - * \param[out] ppixmb blue component map - * \return 0 if OK, 1 on error - */ -l_ok -pixGetBackgroundRGBMapMorph(PIX *pixs, - PIX *pixim, - l_int32 reduction, - l_int32 size, - PIX **ppixmr, - PIX **ppixmg, - PIX **ppixmb) -{ -l_int32 nx, ny, empty, fgpixels; -l_float32 scale; -PIX *pixm, *pixmr, *pixmg, *pixmb, *pix1, *pix2, *pix3, *pixims; - - PROCNAME("pixGetBackgroundRGBMapMorph"); - - if (!ppixmr || !ppixmg || !ppixmb) - return ERROR_INT("&pixm* not all defined", procName, 1); - *ppixmr = *ppixmg = *ppixmb = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not 32 bpp", procName, 1); - if (pixim && pixGetDepth(pixim) != 1) - return ERROR_INT("pixim not 1 bpp", procName, 1); - - /* Evaluate the mask pixim and make sure it is not all foreground. */ - fgpixels = 0; /* boolean for existence of fg mask pixels */ - if (pixim) { - pixInvert(pixim, pixim); /* set background pixels to 1 */ - pixZero(pixim, &empty); - if (empty) - return ERROR_INT("pixim all fg; no background", procName, 1); - pixInvert(pixim, pixim); /* revert to original mask */ - pixZero(pixim, &empty); - if (!empty) /* there are fg pixels in pixim */ - fgpixels = 1; - } - - /* Generate an 8 bpp version of the image mask, if it exists */ - scale = 1. / (l_float32)reduction; - pixims = NULL; - pixm = NULL; - if (pixim) { - pixims = pixScale(pixim, scale, scale); - pixm = pixConvertTo8(pixims, FALSE); - } - - /* Downscale as requested and do the closing to get the background. - * Then remove the image mask pixels from the background. They - * will be filled in (twice) later. Do this for all 3 components. */ - pix1 = pixScaleRGBToGrayFast(pixs, reduction, COLOR_RED); - pix2 = pixCloseGray(pix1, size, size); - pix3 = pixExtendByReplication(pix2, 1, 1); - if (pixim) - pixmr = pixAnd(NULL, pixm, pix3); - else - pixmr = pixClone(pix3); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - - pix1 = pixScaleRGBToGrayFast(pixs, reduction, COLOR_GREEN); - pix2 = pixCloseGray(pix1, size, size); - pix3 = pixExtendByReplication(pix2, 1, 1); - if (pixim) - pixmg = pixAnd(NULL, pixm, pix3); - else - pixmg = pixClone(pix3); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - - pix1 = pixScaleRGBToGrayFast(pixs, reduction, COLOR_BLUE); - pix2 = pixCloseGray(pix1, size, size); - pix3 = pixExtendByReplication(pix2, 1, 1); - if (pixim) - pixmb = pixAnd(NULL, pixm, pix3); - else - pixmb = pixClone(pix3); - pixDestroy(&pixm); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - - /* Fill all the holes in the three maps. */ - nx = pixGetWidth(pixs) / reduction; - ny = pixGetHeight(pixs) / reduction; - if (pixFillMapHoles(pixmr, nx, ny, L_FILL_BLACK) || - pixFillMapHoles(pixmg, nx, ny, L_FILL_BLACK) || - pixFillMapHoles(pixmb, nx, ny, L_FILL_BLACK)) { - pixDestroy(&pixmr); - pixDestroy(&pixmg); - pixDestroy(&pixmb); - pixDestroy(&pixims); - L_WARNING("can't make the maps\n", procName); - return 1; - } - - /* Finally, for each connected region corresponding to the - * fg mask in each component, reset all pixels to their - * average value. */ - if (pixim && fgpixels) { - pixSmoothConnectedRegions(pixmr, pixims, 2); - pixSmoothConnectedRegions(pixmg, pixims, 2); - pixSmoothConnectedRegions(pixmb, pixims, 2); - pixDestroy(&pixims); - } - - *ppixmr = pixmr; - *ppixmg = pixmg; - *ppixmb = pixmb; - pixCopyResolution(*ppixmr, pixs); - pixCopyResolution(*ppixmg, pixs); - pixCopyResolution(*ppixmb, pixs); - return 0; -} - - -/*! - * \brief pixFillMapHoles() - * - * \param[in] pix 8 bpp; a map, with one pixel for each tile in - * a larger image - * \param[in] nx number of horizontal pixel tiles that are entirely - * covered with pixels in the original source image - * \param[in] ny ditto for the number of vertical pixel tiles - * \param[in] filltype L_FILL_WHITE or L_FILL_BLACK - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is an in-place operation on pix (the map).  pix is
- *          typically a low-resolution version of some other image
- *          from which it was derived, where each pixel in pix
- *          corresponds to a rectangular tile (say, m x n) of pixels
- *          in the larger image.  All we need to know about the larger
- *          image is whether or not the rightmost column and bottommost
- *          row of pixels in pix correspond to tiles that are
- *          only partially covered by pixels in the larger image.
- *      (2) Typically, some number of pixels in the input map are
- *          not known, and their values must be determined by near
- *          pixels that are known.  These unknown pixels are the 'holes'.
- *          They can take on only two values, 0 and 255, and the
- *          instruction about which to fill is given by the filltype flag.
- *      (3) The "holes" can come from two sources.  The first is when there
- *          are not enough foreground or background pixels in a tile;
- *          the second is when a tile is at least partially covered
- *          by an image mask.  If we're filling holes in a fg mask,
- *          the holes are initialized to black (0) and use L_FILL_BLACK.
- *          For filling holes in a bg mask, initialize the holes to
- *          white (255) and use L_FILL_WHITE.
- *      (4) If w is the map width, nx = w or nx = w - 1; ditto for h and ny.
- * 
- */ -l_ok -pixFillMapHoles(PIX *pix, - l_int32 nx, - l_int32 ny, - l_int32 filltype) -{ -l_int32 w, h, y, nmiss, goodcol, i, j, found, ival, valtest; -l_uint32 val, lastval; -NUMA *na; /* indicates if there is any data in the column */ -PIX *pixt; - - PROCNAME("pixFillMapHoles"); - - if (!pix || pixGetDepth(pix) != 8) - return ERROR_INT("pix not defined or not 8 bpp", procName, 1); - if (pixGetColormap(pix)) - return ERROR_INT("pix is colormapped", procName, 1); - - /* ------------- Fill holes in the mapping image columns ----------- */ - pixGetDimensions(pix, &w, &h, NULL); - na = numaCreate(0); /* holds flag for which columns have data */ - nmiss = 0; - valtest = (filltype == L_FILL_WHITE) ? 255 : 0; - for (j = 0; j < nx; j++) { /* do it by columns */ - found = FALSE; - for (i = 0; i < ny; i++) { - pixGetPixel(pix, j, i, &val); - if (val != valtest) { - y = i; - found = TRUE; - break; - } - } - if (found == FALSE) { - numaAddNumber(na, 0); /* no data in the column */ - nmiss++; - } - else { - numaAddNumber(na, 1); /* data in the column */ - for (i = y - 1; i >= 0; i--) /* replicate upwards to top */ - pixSetPixel(pix, j, i, val); - pixGetPixel(pix, j, 0, &lastval); - for (i = 1; i < h; i++) { /* set going down to bottom */ - pixGetPixel(pix, j, i, &val); - if (val == valtest) - pixSetPixel(pix, j, i, lastval); - else - lastval = val; - } - } - } - numaAddNumber(na, 0); /* last column */ - - if (nmiss == nx) { /* no data in any column! */ - numaDestroy(&na); - L_WARNING("no bg found; no data in any column\n", procName); - return 1; - } - - /* ---------- Fill in missing columns by replication ----------- */ - if (nmiss > 0) { /* replicate columns */ - pixt = pixCopy(NULL, pix); - /* Find the first good column */ - goodcol = 0; - for (j = 0; j < w; j++) { - numaGetIValue(na, j, &ival); - if (ival == 1) { - goodcol = j; - break; - } - } - if (goodcol > 0) { /* copy cols backward */ - for (j = goodcol - 1; j >= 0; j--) { - pixRasterop(pix, j, 0, 1, h, PIX_SRC, pixt, j + 1, 0); - pixRasterop(pixt, j, 0, 1, h, PIX_SRC, pix, j, 0); - } - } - for (j = goodcol + 1; j < w; j++) { /* copy cols forward */ - numaGetIValue(na, j, &ival); - if (ival == 0) { - /* Copy the column to the left of j */ - pixRasterop(pix, j, 0, 1, h, PIX_SRC, pixt, j - 1, 0); - pixRasterop(pixt, j, 0, 1, h, PIX_SRC, pix, j, 0); - } - } - pixDestroy(&pixt); - } - if (w > nx) { /* replicate the last column */ - for (i = 0; i < h; i++) { - pixGetPixel(pix, w - 2, i, &val); - pixSetPixel(pix, w - 1, i, val); - } - } - - numaDestroy(&na); - return 0; -} - - -/*! - * \brief pixExtendByReplication() - * - * \param[in] pixs 8 bpp - * \param[in] addw number of extra pixels horizontally to add - * \param[in] addh number of extra pixels vertically to add - * \return pixd extended with replicated pixel values, or NULL on error - * - *
- * Notes:
- *      (1) The pixel values are extended to the left and down, as required.
- * 
- */ -PIX * -pixExtendByReplication(PIX *pixs, - l_int32 addw, - l_int32 addh) -{ -l_int32 w, h, i, j; -l_uint32 val; -PIX *pixd; - - PROCNAME("pixExtendByReplication"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - - if (addw == 0 && addh == 0) - return pixCopy(NULL, pixs); - - pixGetDimensions(pixs, &w, &h, NULL); - if ((pixd = pixCreate(w + addw, h + addh, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixRasterop(pixd, 0, 0, w, h, PIX_SRC, pixs, 0, 0); - - if (addw > 0) { - for (i = 0; i < h; i++) { - pixGetPixel(pixd, w - 1, i, &val); - for (j = 0; j < addw; j++) - pixSetPixel(pixd, w + j, i, val); - } - } - - if (addh > 0) { - for (j = 0; j < w + addw; j++) { - pixGetPixel(pixd, j, h - 1, &val); - for (i = 0; i < addh; i++) - pixSetPixel(pixd, j, h + i, val); - } - } - - pixCopyResolution(pixd, pixs); - return pixd; -} - - -/*! - * \brief pixSmoothConnectedRegions() - * - * \param[in] pixs 8 bpp grayscale; no colormap - * \param[in] pixm [optional] 1 bpp; if null, this is a no-op - * \param[in] factor subsampling factor for getting average; >= 1 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The pixels in pixs corresponding to those in each
- *          8-connected region in the mask are set to the average value.
- *      (2) This is required for adaptive mapping to avoid the
- *          generation of stripes in the background map, due to
- *          variations in the pixel values near the edges of mask regions.
- *      (3) This function is optimized for background smoothing, where
- *          there are a relatively small number of components.  It will
- *          be inefficient if used where there are many small components.
- * 
- */ -l_ok -pixSmoothConnectedRegions(PIX *pixs, - PIX *pixm, - l_int32 factor) -{ -l_int32 empty, i, n, x, y; -l_float32 aveval; -BOXA *boxa; -PIX *pixmc; -PIXA *pixa; - - PROCNAME("pixSmoothConnectedRegions"); - - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (pixGetColormap(pixs)) - return ERROR_INT("pixs has colormap", procName, 1); - if (!pixm) { - L_INFO("pixm not defined\n", procName); - return 0; - } - if (pixGetDepth(pixm) != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - pixZero(pixm, &empty); - if (empty) { - L_INFO("pixm has no fg pixels; nothing to do\n", procName); - return 0; - } - - boxa = pixConnComp(pixm, &pixa, 8); - n = boxaGetCount(boxa); - for (i = 0; i < n; i++) { - if ((pixmc = pixaGetPix(pixa, i, L_CLONE)) == NULL) { - L_WARNING("missing pixmc!\n", procName); - continue; - } - boxaGetBoxGeometry(boxa, i, &x, &y, NULL, NULL); - pixGetAverageMasked(pixs, pixmc, x, y, factor, L_MEAN_ABSVAL, &aveval); - pixPaintThroughMask(pixs, pixmc, x, y, (l_int32)aveval); - pixDestroy(&pixmc); - } - - boxaDestroy(&boxa); - pixaDestroy(&pixa); - return 0; -} - - -/*------------------------------------------------------------------* - * Measurement of local foreground * - *------------------------------------------------------------------*/ -#if 0 /* Not working properly: do not use */ - -/*! - * \brief pixGetForegroundGrayMap() - * - * \param[in] pixs 8 bpp - * \param[in] pixim [optional] 1 bpp 'image' mask; can be null - * \param[in] sx, sy src tile size, in pixels - * \param[in] thresh threshold for determining foreground - * \param[out] ppixd 8 bpp grayscale map - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Each (sx, sy) tile of pixs gets mapped to one pixel in pixd.
- *      (2) pixd is the estimate of the fg (darkest) value within each tile.
- *      (3) All pixels in pixd that are in 'image' regions, as specified
- *          by pixim, are given the background value 0.
- *      (4) For pixels in pixd that can't directly be given a fg value,
- *          the value is inferred by propagating from neighboring pixels.
- *      (5) In practice, pixd can be used to normalize the fg, and
- *          it can be done after background normalization.
- *      (6) The overall procedure is:
- *            ~ reduce 2x by sampling
- *            ~ paint all 'image' pixels white, so that they don't
- *            ~ participate in the Min reduction
- *            ~ do a further (sx, sy) Min reduction -- think of
- *              it as a large opening followed by subsampling by the
- *              reduction factors
- *            ~ threshold the result to identify fg, and set the
- *              bg pixels to 255 (these are 'holes')
- *            ~ fill holes by propagation from fg values
- *            ~ replicatively expand by 2x, arriving at the final
- *              resolution of pixd
- *            ~ smooth with a 17x17 kernel
- *            ~ paint the 'image' regions black
- * 
- */ -l_ok -pixGetForegroundGrayMap(PIX *pixs, - PIX *pixim, - l_int32 sx, - l_int32 sy, - l_int32 thresh, - PIX **ppixd) -{ -l_int32 w, h, d, wd, hd; -l_int32 empty, fgpixels; -PIX *pixd, *piximi, *pixim2, *pixims, *pixs2, *pixb, *pixt1, *pixt2, *pixt3; - - PROCNAME("pixGetForegroundGrayMap"); - - if (!ppixd) - return ERROR_INT("&pixd not defined", procName, 1); - *ppixd = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return ERROR_INT("pixs not 8 bpp", procName, 1); - if (pixim && pixGetDepth(pixim) != 1) - return ERROR_INT("pixim not 1 bpp", procName, 1); - if (sx < 2 || sy < 2) - return ERROR_INT("sx and sy must be >= 2", procName, 1); - - /* Generate pixd, which is reduced by the factors (sx, sy). */ - wd = (w + sx - 1) / sx; - hd = (h + sy - 1) / sy; - pixd = pixCreate(wd, hd, 8); - *ppixd = pixd; - - /* Evaluate the 'image' mask, pixim. If it is all fg, - * the output pixd has all pixels with value 0. */ - fgpixels = 0; /* boolean for existence of fg pixels in the image mask. */ - if (pixim) { - piximi = pixInvert(NULL, pixim); /* set non-image pixels to 1 */ - pixZero(piximi, &empty); - pixDestroy(&piximi); - if (empty) /* all 'image'; return with all pixels set to 0 */ - return 0; - pixZero(pixim, &empty); - if (!empty) /* there are fg pixels in pixim */ - fgpixels = 1; - } - - /* 2x subsampling; paint white through 'image' mask. */ - pixs2 = pixScaleBySampling(pixs, 0.5, 0.5); - if (pixim && fgpixels) { - pixim2 = pixReduceBinary2(pixim, NULL); - pixPaintThroughMask(pixs2, pixim2, 0, 0, 255); - pixDestroy(&pixim2); - } - - /* Min (erosion) downscaling; total reduction (4 sx, 4 sy). */ - pixt1 = pixScaleGrayMinMax(pixs2, sx, sy, L_CHOOSE_MIN); - -/* pixDisplay(pixt1, 300, 200); */ - - /* Threshold to identify fg; paint bg pixels to white. */ - pixb = pixThresholdToBinary(pixt1, thresh); /* fg pixels */ - pixInvert(pixb, pixb); - pixPaintThroughMask(pixt1, pixb, 0, 0, 255); - pixDestroy(&pixb); - - /* Replicative expansion by 2x to (sx, sy). */ - pixt2 = pixExpandReplicate(pixt1, 2); - -/* pixDisplay(pixt2, 500, 200); */ - - /* Fill holes in the fg by propagation */ - pixFillMapHoles(pixt2, w / sx, h / sy, L_FILL_WHITE); - -/* pixDisplay(pixt2, 700, 200); */ - - /* Smooth with 17x17 kernel. */ - pixt3 = pixBlockconv(pixt2, 8, 8); - pixRasterop(pixd, 0, 0, wd, hd, PIX_SRC, pixt3, 0, 0); - - /* Paint the image parts black. */ - pixims = pixScaleBySampling(pixim, 1. / sx, 1. / sy); - pixPaintThroughMask(pixd, pixims, 0, 0, 0); - - pixDestroy(&pixs2); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - pixDestroy(&pixt3); - return 0; -} -#endif /* Not working properly: do not use */ - - -/*------------------------------------------------------------------* - * Generate inverted background map * - *------------------------------------------------------------------*/ -/*! - * \brief pixGetInvBackgroundMap() - * - * \param[in] pixs 8 bpp grayscale; no colormap - * \param[in] bgval target bg val; typ. > 128 - * \param[in] smoothx half-width of block convolution kernel width - * \param[in] smoothy half-width of block convolution kernel height - * \return pixd 16 bpp, or NULL on error - * - *
- * Notes:
- *     (1) bgval should typically be > 120 and < 240
- *     (2) pixd is a normalization image; the original image is
- *       multiplied by pixd and the result is divided by 256.
- * 
- */ -PIX * -pixGetInvBackgroundMap(PIX *pixs, - l_int32 bgval, - l_int32 smoothx, - l_int32 smoothy) -{ -l_int32 w, h, wplsm, wpld, i, j; -l_int32 val, val16; -l_uint32 *datasm, *datad, *linesm, *lined; -PIX *pixsm, *pixd; - - PROCNAME("pixGetInvBackgroundMap"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs has colormap", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - if (w < 5 || h < 5) - return (PIX *)ERROR_PTR("w and h must be >= 5", procName, NULL); - - /* smooth the map image */ - pixsm = pixBlockconv(pixs, smoothx, smoothy); - datasm = pixGetData(pixsm); - wplsm = pixGetWpl(pixsm); - - /* invert the map image, scaling up to preserve dynamic range */ - pixd = pixCreate(w, h, 16); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - linesm = datasm + i * wplsm; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(linesm, j); - if (val > 0) - val16 = (256 * bgval) / val; - else { /* shouldn't happen */ - L_WARNING("smoothed bg has 0 pixel!\n", procName); - val16 = bgval / 2; - } - SET_DATA_TWO_BYTES(lined, j, val16); - } - } - - pixDestroy(&pixsm); - pixCopyResolution(pixd, pixs); - return pixd; -} - - -/*------------------------------------------------------------------* - * Apply background map to image * - *------------------------------------------------------------------*/ -/*! - * \brief pixApplyInvBackgroundGrayMap() - * - * \param[in] pixs 8 bpp grayscale; no colormap - * \param[in] pixm 16 bpp, inverse background map - * \param[in] sx tile width in pixels - * \param[in] sy tile height in pixels - * \return pixd 8 bpp, or NULL on error - */ -PIX * -pixApplyInvBackgroundGrayMap(PIX *pixs, - PIX *pixm, - l_int32 sx, - l_int32 sy) -{ -l_int32 w, h, wm, hm, wpls, wpld, i, j, k, m, xoff, yoff; -l_int32 vals, vald; -l_uint32 val16; -l_uint32 *datas, *datad, *lines, *lined, *flines, *flined; -PIX *pixd; - - PROCNAME("pixApplyInvBackgroundGrayMap"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs has colormap", procName, NULL); - if (!pixm || pixGetDepth(pixm) != 16) - return (PIX *)ERROR_PTR("pixm undefined or not 16 bpp", procName, NULL); - if (sx == 0 || sy == 0) - return (PIX *)ERROR_PTR("invalid sx and/or sy", procName, NULL); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixGetDimensions(pixs, &w, &h, NULL); - pixGetDimensions(pixm, &wm, &hm, NULL); - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < hm; i++) { - lines = datas + sy * i * wpls; - lined = datad + sy * i * wpld; - yoff = sy * i; - for (j = 0; j < wm; j++) { - pixGetPixel(pixm, j, i, &val16); - xoff = sx * j; - for (k = 0; k < sy && yoff + k < h; k++) { - flines = lines + k * wpls; - flined = lined + k * wpld; - for (m = 0; m < sx && xoff + m < w; m++) { - vals = GET_DATA_BYTE(flines, xoff + m); - vald = (vals * val16) / 256; - vald = L_MIN(vald, 255); - SET_DATA_BYTE(flined, xoff + m, vald); - } - } - } - } - - return pixd; -} - - -/*! - * \brief pixApplyInvBackgroundRGBMap() - * - * \param[in] pixs 32 bpp rbg - * \param[in] pixmr 16 bpp, red inverse background map - * \param[in] pixmg 16 bpp, green inverse background map - * \param[in] pixmb 16 bpp, blue inverse background map - * \param[in] sx tile width in pixels - * \param[in] sy tile height in pixels - * \return pixd 32 bpp rbg, or NULL on error - */ -PIX * -pixApplyInvBackgroundRGBMap(PIX *pixs, - PIX *pixmr, - PIX *pixmg, - PIX *pixmb, - l_int32 sx, - l_int32 sy) -{ -l_int32 w, h, wm, hm, wpls, wpld, i, j, k, m, xoff, yoff; -l_int32 rvald, gvald, bvald; -l_uint32 vals; -l_uint32 rval16, gval16, bval16; -l_uint32 *datas, *datad, *lines, *lined, *flines, *flined; -PIX *pixd; - - PROCNAME("pixApplyInvBackgroundRGBMap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (!pixmr || !pixmg || !pixmb) - return (PIX *)ERROR_PTR("pix maps not all defined", procName, NULL); - if (pixGetDepth(pixmr) != 16 || pixGetDepth(pixmg) != 16 || - pixGetDepth(pixmb) != 16) - return (PIX *)ERROR_PTR("pix maps not all 16 bpp", procName, NULL); - if (sx == 0 || sy == 0) - return (PIX *)ERROR_PTR("invalid sx and/or sy", procName, NULL); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - w = pixGetWidth(pixs); - h = pixGetHeight(pixs); - wm = pixGetWidth(pixmr); - hm = pixGetHeight(pixmr); - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < hm; i++) { - lines = datas + sy * i * wpls; - lined = datad + sy * i * wpld; - yoff = sy * i; - for (j = 0; j < wm; j++) { - pixGetPixel(pixmr, j, i, &rval16); - pixGetPixel(pixmg, j, i, &gval16); - pixGetPixel(pixmb, j, i, &bval16); - xoff = sx * j; - for (k = 0; k < sy && yoff + k < h; k++) { - flines = lines + k * wpls; - flined = lined + k * wpld; - for (m = 0; m < sx && xoff + m < w; m++) { - vals = *(flines + xoff + m); - rvald = ((vals >> 24) * rval16) / 256; - rvald = L_MIN(rvald, 255); - gvald = (((vals >> 16) & 0xff) * gval16) / 256; - gvald = L_MIN(gvald, 255); - bvald = (((vals >> 8) & 0xff) * bval16) / 256; - bvald = L_MIN(bvald, 255); - composeRGBPixel(rvald, gvald, bvald, flined + xoff + m); - } - } - } - } - - return pixd; -} - - -/*------------------------------------------------------------------* - * Apply variable map * - *------------------------------------------------------------------*/ -/*! - * \brief pixApplyVariableGrayMap() - * - * \param[in] pixs 8 bpp - * \param[in] pixg 8 bpp, variable map - * \param[in] target typ. 128 for threshold - * \return pixd 8 bpp, or NULL on error - * - *
- * Notes:
- *      (1) Suppose you have an image that you want to transform based
- *          on some photometric measurement at each point, such as the
- *          threshold value for binarization.  Representing the photometric
- *          measurement as an image pixg, you can threshold in input image
- *          using pixVarThresholdToBinary().  Alternatively, you can map
- *          the input image pointwise so that the threshold over the
- *          entire image becomes a constant, such as 128.  For example,
- *          if a pixel in pixg is 150 and the target is 128, the
- *          corresponding pixel in pixs is mapped linearly to a value
- *          (128/150) of the input value.  If the resulting mapped image
- *          pixd were then thresholded at 128, you would obtain the
- *          same result as a direct binarization using pixg with
- *          pixVarThresholdToBinary().
- *      (2) The sizes of pixs and pixg must be equal.
- * 
- */ -PIX * -pixApplyVariableGrayMap(PIX *pixs, - PIX *pixg, - l_int32 target) -{ -l_int32 i, j, w, h, d, wpls, wplg, wpld, vals, valg, vald; -l_uint8 *lut; -l_uint32 *datas, *datag, *datad, *lines, *lineg, *lined; -l_float32 fval; -PIX *pixd; - - PROCNAME("pixApplyVariableGrayMap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!pixg) - return (PIX *)ERROR_PTR("pixg not defined", procName, NULL); - if (!pixSizesEqual(pixs, pixg)) - return (PIX *)ERROR_PTR("pix sizes not equal", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("depth not 8 bpp", procName, NULL); - - /* Generate a LUT for the mapping if the image is large enough - * to warrant the overhead. The LUT is of size 2^16. For the - * index to the table, get the MSB from pixs and the LSB from pixg. - * Note: this LUT is bigger than the typical 32K L1 cache, so - * we expect cache misses. L2 latencies are about 5ns. But - * division is slooooow. For large images, this function is about - * 4x faster when using the LUT. C'est la vie. */ - lut = NULL; - if (w * h > 100000) { /* more pixels than 2^16 */ - if ((lut = (l_uint8 *)LEPT_CALLOC(0x10000, sizeof(l_uint8))) == NULL) - return (PIX *)ERROR_PTR("lut not made", procName, NULL); - for (i = 0; i < 256; i++) { - for (j = 0; j < 256; j++) { - fval = (l_float32)(i * target) / (j + 0.5); - lut[(i << 8) + j] = L_MIN(255, (l_int32)(fval + 0.5)); - } - } - } - - if ((pixd = pixCreateNoInit(w, h, 8)) == NULL) { - LEPT_FREE(lut); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixCopyResolution(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datag = pixGetData(pixg); - wplg = pixGetWpl(pixg); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lineg = datag + i * wplg; - lined = datad + i * wpld; - if (lut) { - for (j = 0; j < w; j++) { - vals = GET_DATA_BYTE(lines, j); - valg = GET_DATA_BYTE(lineg, j); - vald = lut[(vals << 8) + valg]; - SET_DATA_BYTE(lined, j, vald); - } - } - else { - for (j = 0; j < w; j++) { - vals = GET_DATA_BYTE(lines, j); - valg = GET_DATA_BYTE(lineg, j); - fval = (l_float32)(vals * target) / (valg + 0.5); - vald = L_MIN(255, (l_int32)(fval + 0.5)); - SET_DATA_BYTE(lined, j, vald); - } - } - } - - LEPT_FREE(lut); - return pixd; -} - - -/*------------------------------------------------------------------* - * Non-adaptive (global) mapping * - *------------------------------------------------------------------*/ -/*! - * \brief pixGlobalNormRGB() - * - * \param[in] pixd [optional] null, existing or equal to pixs - * \param[in] pixs 32 bpp rgb, or colormapped - * \param[in] rval, gval, bval pixel values in pixs that are - * linearly mapped to mapval - * \param[in] mapval use 255 for mapping to white - * \return pixd 32 bpp rgb or colormapped, or NULL on error - * - *
- * Notes:
- *    (1) The value of pixd determines if the results are written to a
- *        new pix (use NULL), in-place to pixs (use pixs), or to some
- *        other existing pix.
- *    (2) This does a global normalization of an image where the
- *        r,g,b color components are not balanced.  Thus, white in pixs is
- *        represented by a set of r,g,b values that are not all 255.
- *    (3) The input values (rval, gval, bval) should be chosen to
- *        represent the gray color (mapval, mapval, mapval) in src.
- *        Thus, this function will map (rval, gval, bval) to that gray color.
- *    (4) Typically, mapval = 255, so that (rval, gval, bval)
- *        corresponds to the white point of src.  In that case, these
- *        parameters should be chosen so that few pixels have higher values.
- *    (5) In all cases, we do a linear TRC separately on each of the
- *        components, saturating at 255.
- *    (6) If the input pix is 8 bpp without a colormap, you can get
- *        this functionality with mapval = 255 by calling:
- *            pixGammaTRC(pixd, pixs, 1.0, 0, bgval);
- *        where bgval is the value you want to be mapped to 255.
- *        Or more generally, if you want bgval to be mapped to mapval:
- *            pixGammaTRC(pixd, pixs, 1.0, 0, 255 * bgval / mapval);
- * 
- */ -PIX * -pixGlobalNormRGB(PIX *pixd, - PIX *pixs, - l_int32 rval, - l_int32 gval, - l_int32 bval, - l_int32 mapval) -{ -l_int32 w, h, d, i, j, ncolors, rv, gv, bv, wpl; -l_int32 *rarray, *garray, *barray; -l_uint32 *data, *line; -NUMA *nar, *nag, *nab; -PIXCMAP *cmap; - - PROCNAME("pixGlobalNormRGB"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - cmap = pixGetColormap(pixs); - pixGetDimensions(pixs, &w, &h, &d); - if (!cmap && d != 32) - return (PIX *)ERROR_PTR("pixs not cmapped or 32 bpp", procName, NULL); - if (mapval <= 0) { - L_WARNING("mapval must be > 0; setting to 255\n", procName); - mapval = 255; - } - - /* Prepare pixd to be a copy of pixs */ - if ((pixd = pixCopy(pixd, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - - /* Generate the TRC maps for each component. Make sure the - * upper range for each color is greater than zero. */ - nar = numaGammaTRC(1.0, 0, L_MAX(1, 255 * rval / mapval)); - nag = numaGammaTRC(1.0, 0, L_MAX(1, 255 * gval / mapval)); - nab = numaGammaTRC(1.0, 0, L_MAX(1, 255 * bval / mapval)); - - /* Extract copies of the internal arrays */ - rarray = numaGetIArray(nar); - garray = numaGetIArray(nag); - barray = numaGetIArray(nab); - if (!nar || !nag || !nab || !rarray || !garray || !barray) { - L_ERROR("allocation failure in arrays\n", procName); - goto cleanup_arrays; - } - - if (cmap) { - ncolors = pixcmapGetCount(cmap); - for (i = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &rv, &gv, &bv); - pixcmapResetColor(cmap, i, rarray[rv], garray[gv], barray[bv]); - } - } - else { - data = pixGetData(pixd); - wpl = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - extractRGBValues(line[j], &rv, &gv, &bv); - composeRGBPixel(rarray[rv], garray[gv], barray[bv], line + j); - } - } - } - -cleanup_arrays: - numaDestroy(&nar); - numaDestroy(&nag); - numaDestroy(&nab); - LEPT_FREE(rarray); - LEPT_FREE(garray); - LEPT_FREE(barray); - return pixd; -} - - -/*! - * \brief pixGlobalNormNoSatRGB() - * - * \param[in] pixd [optional] null, existing or equal to pixs - * \param[in] pixs 32 bpp rgb - * \param[in] rval, gval, bval pixel values in pixs that are - * linearly mapped to mapval; but see below - * \param[in] factor subsampling factor; integer >= 1 - * \param[in] rank between 0.0 and 1.0; typ. use a value near 1.0 - * \return pixd 32 bpp rgb, or NULL on error - * - *
- * Notes:
- *    (1) This is a version of pixGlobalNormRGB(), where the output
- *        intensity is scaled back so that a controlled fraction of
- *        pixel components is allowed to saturate.  See comments in
- *        pixGlobalNormRGB().
- *    (2) The value of pixd determines if the results are written to a
- *        new pix (use NULL), in-place to pixs (use pixs), or to some
- *        other existing pix.
- *    (3) This does a global normalization of an image where the
- *        r,g,b color components are not balanced.  Thus, white in pixs is
- *        represented by a set of r,g,b values that are not all 255.
- *    (4) The input values (rval, gval, bval) can be chosen to be the
- *        color that, after normalization, becomes white background.
- *        For images that are mostly background, the closer these values
- *        are to the median component values, the closer the resulting
- *        background will be to gray, becoming white at the brightest places.
- *    (5) The mapval used in pixGlobalNormRGB() is computed here to
- *        avoid saturation of any component in the image (save for a
- *        fraction of the pixels given by the input rank value).
- * 
- */ -PIX * -pixGlobalNormNoSatRGB(PIX *pixd, - PIX *pixs, - l_int32 rval, - l_int32 gval, - l_int32 bval, - l_int32 factor, - l_float32 rank) -{ -l_int32 mapval; -l_float32 rankrval, rankgval, rankbval; -l_float32 rfract, gfract, bfract, maxfract; - - PROCNAME("pixGlobalNormNoSatRGB"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (factor < 1) - return (PIX *)ERROR_PTR("sampling factor < 1", procName, NULL); - if (rank < 0.0 || rank > 1.0) - return (PIX *)ERROR_PTR("rank not in [0.0 ... 1.0]", procName, NULL); - if (rval <= 0 || gval <= 0 || bval <= 0) - return (PIX *)ERROR_PTR("invalid estim. color values", procName, NULL); - - /* The max value for each component may be larger than the - * input estimated background value. In that case, mapping - * for those pixels would saturate. To prevent saturation, - * we compute the fraction for each component by which we - * would oversaturate. Then take the max of these, and - * reduce, uniformly over all components, the output intensity - * by this value. Then no component will saturate. - * In practice, if rank < 1.0, a fraction of pixels - * may have a component saturate. By keeping rank close to 1.0, - * that fraction can be made arbitrarily small. */ - pixGetRankValueMaskedRGB(pixs, NULL, 0, 0, factor, rank, &rankrval, - &rankgval, &rankbval); - rfract = rankrval / (l_float32)rval; - gfract = rankgval / (l_float32)gval; - bfract = rankbval / (l_float32)bval; - maxfract = L_MAX(rfract, gfract); - maxfract = L_MAX(maxfract, bfract); -#if DEBUG_GLOBAL - lept_stderr("rankrval = %7.2f, rankgval = %7.2f, rankbval = %7.2f\n", - rankrval, rankgval, rankbval); - lept_stderr("rfract = %7.4f, gfract = %7.4f, bfract = %7.4f\n", - rfract, gfract, bfract); -#endif /* DEBUG_GLOBAL */ - - mapval = (l_int32)(255. / maxfract); - pixd = pixGlobalNormRGB(pixd, pixs, rval, gval, bval, mapval); - return pixd; -} - - -/*------------------------------------------------------------------* - * Adaptive threshold spread normalization * - *------------------------------------------------------------------*/ -/*! - * \brief pixThresholdSpreadNorm() - * - * \param[in] pixs 8 bpp grayscale; not colormapped - * \param[in] filtertype L_SOBEL_EDGE or L_TWO_SIDED_EDGE; - * \param[in] edgethresh threshold on magnitude of edge filter; - * typ 10-20 - * \param[in] smoothx, smoothy half-width of convolution kernel applied to - * spread threshold: use 0 for no smoothing - * \param[in] gamma gamma correction; typ. about 0.7 - * \param[in] minval input value that gives 0 for output; typ. -25 - * \param[in] maxval input value that gives 255 for output; - * typ. 255 - * \param[in] targetthresh target threshold for normalization - * \param[out] ppixth [optional] computed local threshold value - * \param[out] ppixb [optional] thresholded normalized image - * \param[out] ppixd [optional] normalized image - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The basis of this approach is the use of seed spreading
- *          on a (possibly) sparse set of estimates for the local threshold.
- *          The resulting dense estimates are smoothed by convolution
- *          and used to either threshold the input image or normalize it
- *          with a local transformation that linearly maps the pixels so
- *          that the local threshold estimate becomes constant over the
- *          resulting image.  This approach is one of several that
- *          have been suggested (and implemented) by Ray Smith.
- *      (2) You can use either the Sobel or TwoSided edge filters.
- *          The results appear to be similar, using typical values
- *          of edgethresh in the rang 10-20.
- *      (3) To skip the trc enhancement, use gamma = 1.0, minval = 0
- *          and maxval = 255.
- *      (4) For the normalized image pixd, each pixel is linearly mapped
- *          in such a way that the local threshold is equal to targetthresh.
- *      (5) The full width and height of the convolution kernel
- *          are (2 * smoothx + 1) and (2 * smoothy + 1).
- *      (6) This function can be used with the pixtiling utility if the
- *          images are too large.  See pixOtsuAdaptiveThreshold() for
- *          an example of this.
- * 
- */ -l_ok -pixThresholdSpreadNorm(PIX *pixs, - l_int32 filtertype, - l_int32 edgethresh, - l_int32 smoothx, - l_int32 smoothy, - l_float32 gamma, - l_int32 minval, - l_int32 maxval, - l_int32 targetthresh, - PIX **ppixth, - PIX **ppixb, - PIX **ppixd) -{ -PIX *pixe, *pixet, *pixsd, *pixg1, *pixg2, *pixth; - - PROCNAME("pixThresholdSpreadNorm"); - - if (ppixth) *ppixth = NULL; - if (ppixb) *ppixb = NULL; - if (ppixd) *ppixd = NULL; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (pixGetColormap(pixs)) - return ERROR_INT("pixs is colormapped", procName, 1); - if (!ppixth && !ppixb && !ppixd) - return ERROR_INT("no output requested", procName, 1); - if (filtertype != L_SOBEL_EDGE && filtertype != L_TWO_SIDED_EDGE) - return ERROR_INT("invalid filter type", procName, 1); - - /* Get the thresholded edge pixels. These are the ones - * that have values in pixs near the local optimal fg/bg threshold. */ - if (filtertype == L_SOBEL_EDGE) - pixe = pixSobelEdgeFilter(pixs, L_VERTICAL_EDGES); - else /* L_TWO_SIDED_EDGE */ - pixe = pixTwoSidedEdgeFilter(pixs, L_VERTICAL_EDGES); - pixet = pixThresholdToBinary(pixe, edgethresh); - pixInvert(pixet, pixet); - - /* Build a seed image whose only nonzero values are those - * values of pixs corresponding to pixels in the fg of pixet. */ - pixsd = pixCreateTemplate(pixs); - pixCombineMasked(pixsd, pixs, pixet); - - /* Spread the seed and optionally smooth to reduce noise */ - pixg1 = pixSeedspread(pixsd, 4); - pixg2 = pixBlockconv(pixg1, smoothx, smoothy); - - /* Optionally do a gamma enhancement */ - pixth = pixGammaTRC(NULL, pixg2, gamma, minval, maxval); - - /* Do the mapping and thresholding */ - if (ppixd) { - *ppixd = pixApplyVariableGrayMap(pixs, pixth, targetthresh); - if (ppixb) - *ppixb = pixThresholdToBinary(*ppixd, targetthresh); - } - else if (ppixb) - *ppixb = pixVarThresholdToBinary(pixs, pixth); - - if (ppixth) - *ppixth = pixth; - else - pixDestroy(&pixth); - - pixDestroy(&pixe); - pixDestroy(&pixet); - pixDestroy(&pixsd); - pixDestroy(&pixg1); - pixDestroy(&pixg2); - return 0; -} - - -/*------------------------------------------------------------------* - * Adaptive background normalization (flexible adaptaption) * - *------------------------------------------------------------------*/ -/*! - * \brief pixBackgroundNormFlex() - * - * \param[in] pixs 8 bpp grayscale; not colormapped - * \param[in] sx, sy desired tile dimensions; size may vary; - * use values between 3 and 10 - * \param[in] smoothx, smoothy half-width of convolution kernel applied to - * threshold array: use values between 1 and 3 - * \param[in] delta difference parameter in basin filling; - * use 0 to skip - * \return pixd 8 bpp, background-normalized), or NULL on error - * - *
- * Notes:
- *      (1) This does adaptation flexibly to a quickly varying background.
- *          For that reason, all input parameters should be small.
- *      (2) sx and sy give the tile size; they should be in [5 - 7].
- *      (3) The full width and height of the convolution kernel
- *          are (2 * smoothx + 1) and (2 * smoothy + 1).  They
- *          should be in [1 - 2].
- *      (4) Basin filling is used to fill the large fg regions.  The
- *          parameter %delta measures the height that the black
- *          background is raised from the local minima.  By raising
- *          the background, it is possible to threshold the large
- *          fg regions to foreground.  If %delta is too large,
- *          bg regions will be lifted, causing thickening of
- *          the fg regions.  Use 0 to skip.
- * 
- */ -PIX * -pixBackgroundNormFlex(PIX *pixs, - l_int32 sx, - l_int32 sy, - l_int32 smoothx, - l_int32 smoothy, - l_int32 delta) -{ -l_float32 scalex, scaley; -PIX *pixt, *pixsd, *pixmin, *pixbg, *pixbgi, *pixd; - - PROCNAME("pixBackgroundNormFlex"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL); - if (sx < 3 || sy < 3) - return (PIX *)ERROR_PTR("sx and/or sy less than 3", procName, NULL); - if (sx > 10 || sy > 10) - return (PIX *)ERROR_PTR("sx and/or sy exceed 10", procName, NULL); - if (smoothx < 1 || smoothy < 1) - return (PIX *)ERROR_PTR("smooth params less than 1", procName, NULL); - if (smoothx > 3 || smoothy > 3) - return (PIX *)ERROR_PTR("smooth params exceed 3", procName, NULL); - - /* Generate the bg estimate using smoothed average with subsampling */ - scalex = 1. / (l_float32)sx; - scaley = 1. / (l_float32)sy; - pixt = pixScaleSmooth(pixs, scalex, scaley); - - /* Do basin filling on the bg estimate if requested */ - if (delta <= 0) - pixsd = pixClone(pixt); - else { - pixLocalExtrema(pixt, 0, 0, &pixmin, NULL); - pixsd = pixSeedfillGrayBasin(pixmin, pixt, delta, 4); - pixDestroy(&pixmin); - } - pixbg = pixExtendByReplication(pixsd, 1, 1); - - /* Map the bg to 200 */ - pixbgi = pixGetInvBackgroundMap(pixbg, 200, smoothx, smoothy); - pixd = pixApplyInvBackgroundGrayMap(pixs, pixbgi, sx, sy); - - pixDestroy(&pixt); - pixDestroy(&pixsd); - pixDestroy(&pixbg); - pixDestroy(&pixbgi); - return pixd; -} - - -/*------------------------------------------------------------------* - * Adaptive contrast normalization * - *------------------------------------------------------------------*/ -/*! - * \brief pixContrastNorm() - * - * \param[in] pixd [optional] 8 bpp; null or equal to pixs - * \param[in] pixs 8 bpp grayscale; not colormapped - * \param[in] sx, sy tile dimensions - * \param[in] mindiff minimum difference to accept as valid - * \param[in] smoothx, smoothy half-width of convolution kernel applied to - * min and max arrays: use 0 for no smoothing - * \return pixd always - * - *
- * Notes:
- *      (1) This function adaptively attempts to expand the contrast
- *          to the full dynamic range in each tile.  If the contrast in
- *          a tile is smaller than %mindiff, it uses the min and max
- *          pixel values from neighboring tiles.  It also can use
- *          convolution to smooth the min and max values from
- *          neighboring tiles.  After all that processing, it is
- *          possible that the actual pixel values in the tile are outside
- *          the computed [min ... max] range for local contrast
- *          normalization.  Such pixels are taken to be at either 0
- *          (if below the min) or 255 (if above the max).
- *      (2) pixd can be equal to pixs (in-place operation) or
- *          null (makes a new pixd).
- *      (3) sx and sy give the tile size; they are typically at least 20.
- *      (4) mindiff is used to eliminate results for tiles where it is
- *          likely that either fg or bg is missing.  A value around 50
- *          or more is reasonable.
- *      (5) The full width and height of the convolution kernel
- *          are (2 * smoothx + 1) and (2 * smoothy + 1).  Some smoothing
- *          is typically useful, and we limit the smoothing half-widths
- *          to the range from 0 to 8.
- *      (6) A linear TRC (gamma = 1.0) is applied to increase the contrast
- *          in each tile.  The result can subsequently be globally corrected,
- *          by applying pixGammaTRC() with arbitrary values of gamma
- *          and the 0 and 255 points of the mapping.
- * 
- */ -PIX * -pixContrastNorm(PIX *pixd, - PIX *pixs, - l_int32 sx, - l_int32 sy, - l_int32 mindiff, - l_int32 smoothx, - l_int32 smoothy) -{ -PIX *pixmin, *pixmax; - - PROCNAME("pixContrastNorm"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, pixd); - if (pixd && pixd != pixs) - return (PIX *)ERROR_PTR("pixd not null or == pixs", procName, pixd); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs is colormapped", procName, pixd); - if (sx < 5 || sy < 5) - return (PIX *)ERROR_PTR("sx and/or sy less than 5", procName, pixd); - if (smoothx < 0 || smoothy < 0) - return (PIX *)ERROR_PTR("smooth params less than 0", procName, pixd); - if (smoothx > 8 || smoothy > 8) - return (PIX *)ERROR_PTR("smooth params exceed 8", procName, pixd); - - /* Get the min and max pixel values in each tile, and represent - * each value as a pixel in pixmin and pixmax, respectively. */ - pixMinMaxTiles(pixs, sx, sy, mindiff, smoothx, smoothy, &pixmin, &pixmax); - - /* For each tile, do a linear expansion of the dynamic range - * of pixels so that the min value is mapped to 0 and the - * max value is mapped to 255. */ - pixd = pixLinearTRCTiled(pixd, pixs, sx, sy, pixmin, pixmax); - - pixDestroy(&pixmin); - pixDestroy(&pixmax); - return pixd; -} - - -/*! - * \brief pixMinMaxTiles() - * - * \param[in] pixs 8 bpp grayscale; not colormapped - * \param[in] sx, sy tile dimensions - * \param[in] mindiff minimum difference to accept as valid - * \param[in] smoothx, smoothy half-width of convolution kernel applied to - * min and max arrays: use 0 for no smoothing - * \param[out] ppixmin tiled minima - * \param[out] ppixmax tiled maxima - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This computes filtered and smoothed values for the min and
- *          max pixel values in each tile of the image.
- *      (2) See pixContrastNorm() for usage.
- * 
- */ -l_ok -pixMinMaxTiles(PIX *pixs, - l_int32 sx, - l_int32 sy, - l_int32 mindiff, - l_int32 smoothx, - l_int32 smoothy, - PIX **ppixmin, - PIX **ppixmax) -{ -l_int32 w, h; -PIX *pixmin1, *pixmax1, *pixmin2, *pixmax2; - - PROCNAME("pixMinMaxTiles"); - - if (ppixmin) *ppixmin = NULL; - if (ppixmax) *ppixmax = NULL; - if (!ppixmin || !ppixmax) - return ERROR_INT("&pixmin or &pixmax undefined", procName, 1); - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs undefined or not 8 bpp", procName, 1); - if (pixGetColormap(pixs)) - return ERROR_INT("pixs is colormapped", procName, 1); - if (sx < 5 || sy < 5) - return ERROR_INT("sx and/or sy less than 3", procName, 1); - if (smoothx < 0 || smoothy < 0) - return ERROR_INT("smooth params less than 0", procName, 1); - if (smoothx > 5 || smoothy > 5) - return ERROR_INT("smooth params exceed 5", procName, 1); - - /* Get the min and max values in each tile */ - pixmin1 = pixScaleGrayMinMax(pixs, sx, sy, L_CHOOSE_MIN); - pixmax1 = pixScaleGrayMinMax(pixs, sx, sy, L_CHOOSE_MAX); - - pixmin2 = pixExtendByReplication(pixmin1, 1, 1); - pixmax2 = pixExtendByReplication(pixmax1, 1, 1); - pixDestroy(&pixmin1); - pixDestroy(&pixmax1); - - /* Make sure no value is 0 */ - pixAddConstantGray(pixmin2, 1); - pixAddConstantGray(pixmax2, 1); - - /* Generate holes where the contrast is too small */ - pixSetLowContrast(pixmin2, pixmax2, mindiff); - - /* Fill the holes (0 values) */ - pixGetDimensions(pixmin2, &w, &h, NULL); - pixFillMapHoles(pixmin2, w, h, L_FILL_BLACK); - pixFillMapHoles(pixmax2, w, h, L_FILL_BLACK); - - /* Smooth if requested */ - if (smoothx > 0 || smoothy > 0) { - smoothx = L_MIN(smoothx, (w - 1) / 2); - smoothy = L_MIN(smoothy, (h - 1) / 2); - *ppixmin = pixBlockconv(pixmin2, smoothx, smoothy); - *ppixmax = pixBlockconv(pixmax2, smoothx, smoothy); - } - else { - *ppixmin = pixClone(pixmin2); - *ppixmax = pixClone(pixmax2); - } - pixCopyResolution(*ppixmin, pixs); - pixCopyResolution(*ppixmax, pixs); - pixDestroy(&pixmin2); - pixDestroy(&pixmax2); - - return 0; -} - - -/*! - * \brief pixSetLowContrast() - * - * \param[in] pixs1 8 bpp - * \param[in] pixs2 8 bpp - * \param[in] mindiff minimum difference to accept as valid - * \return 0 if OK; 1 if no pixel diffs are large enough, or on error - * - *
- * Notes:
- *      (1) This compares corresponding pixels in pixs1 and pixs2.
- *          When they differ by less than %mindiff, set the pixel
- *          values to 0 in each.  Each pixel typically represents a tile
- *          in a larger image, and a very small difference between
- *          the min and max in the tile indicates that the min and max
- *          values are not to be trusted.
- *      (2) If contrast (pixel difference) detection is expected to fail,
- *          caller should check return value.
- * 
- */ -l_ok -pixSetLowContrast(PIX *pixs1, - PIX *pixs2, - l_int32 mindiff) -{ -l_int32 i, j, w, h, d, wpl, val1, val2, found; -l_uint32 *data1, *data2, *line1, *line2; - - PROCNAME("pixSetLowContrast"); - - if (!pixs1 || !pixs2) - return ERROR_INT("pixs1 and pixs2 not both defined", procName, 1); - if (pixSizesEqual(pixs1, pixs2) == 0) - return ERROR_INT("pixs1 and pixs2 not equal size", procName, 1); - pixGetDimensions(pixs1, &w, &h, &d); - if (d != 8) - return ERROR_INT("depth not 8 bpp", procName, 1); - if (mindiff > 254) return 0; - - data1 = pixGetData(pixs1); - data2 = pixGetData(pixs2); - wpl = pixGetWpl(pixs1); - found = 0; /* init to not finding any diffs >= mindiff */ - for (i = 0; i < h; i++) { - line1 = data1 + i * wpl; - line2 = data2 + i * wpl; - for (j = 0; j < w; j++) { - val1 = GET_DATA_BYTE(line1, j); - val2 = GET_DATA_BYTE(line2, j); - if (L_ABS(val1 - val2) >= mindiff) { - found = 1; - break; - } - } - if (found) break; - } - if (!found) { - L_WARNING("no pixel pair diffs as large as mindiff\n", procName); - pixClearAll(pixs1); - pixClearAll(pixs2); - return 1; - } - - for (i = 0; i < h; i++) { - line1 = data1 + i * wpl; - line2 = data2 + i * wpl; - for (j = 0; j < w; j++) { - val1 = GET_DATA_BYTE(line1, j); - val2 = GET_DATA_BYTE(line2, j); - if (L_ABS(val1 - val2) < mindiff) { - SET_DATA_BYTE(line1, j, 0); - SET_DATA_BYTE(line2, j, 0); - } - } - } - - return 0; -} - - -/*! - * \brief pixLinearTRCTiled() - * - * \param[in] pixd [optional] 8 bpp - * \param[in] pixs 8 bpp, not colormapped - * \param[in] sx, sy tile dimensions - * \param[in] pixmin pix of min values in tiles - * \param[in] pixmax pix of max values in tiles - * \return pixd always - * - *
- * Notes:
- *      (1) pixd can be equal to pixs (in-place operation) or
- *          null (makes a new pixd).
- *      (2) sx and sy give the tile size; they are typically at least 20.
- *      (3) pixmin and pixmax are generated by pixMinMaxTiles()
- *      (4) For each tile, this does a linear expansion of the dynamic
- *          range so that the min value in the tile becomes 0 and the
- *          max value in the tile becomes 255.
- *      (5) The LUTs that do the mapping are generated as needed
- *          and stored for reuse in an integer array within the ptr array iaa[].
- * 
- */ -PIX * -pixLinearTRCTiled(PIX *pixd, - PIX *pixs, - l_int32 sx, - l_int32 sy, - PIX *pixmin, - PIX *pixmax) -{ -l_int32 i, j, k, m, w, h, wt, ht, wpl, wplt, xoff, yoff; -l_int32 minval, maxval, val, sval; -l_int32 *ia; -l_int32 **iaa; -l_uint32 *data, *datamin, *datamax, *line, *tline, *linemin, *linemax; - - PROCNAME("pixLinearTRCTiled"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, pixd); - if (pixd && pixd != pixs) - return (PIX *)ERROR_PTR("pixd not null or == pixs", procName, pixd); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs is colormapped", procName, pixd); - if (!pixmin || !pixmax) - return (PIX *)ERROR_PTR("pixmin & pixmax not defined", procName, pixd); - if (sx < 5 || sy < 5) - return (PIX *)ERROR_PTR("sx and/or sy less than 5", procName, pixd); - - if ((iaa = (l_int32 **)LEPT_CALLOC(256, sizeof(l_int32 *))) == NULL) - return (PIX *)ERROR_PTR("iaa not made", procName, NULL); - if ((pixd = pixCopy(pixd, pixs)) == NULL) { - LEPT_FREE(iaa); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixGetDimensions(pixd, &w, &h, NULL); - - data = pixGetData(pixd); - wpl = pixGetWpl(pixd); - datamin = pixGetData(pixmin); - datamax = pixGetData(pixmax); - wplt = pixGetWpl(pixmin); - pixGetDimensions(pixmin, &wt, &ht, NULL); - for (i = 0; i < ht; i++) { - line = data + sy * i * wpl; - linemin = datamin + i * wplt; - linemax = datamax + i * wplt; - yoff = sy * i; - for (j = 0; j < wt; j++) { - xoff = sx * j; - minval = GET_DATA_BYTE(linemin, j); - maxval = GET_DATA_BYTE(linemax, j); - if (maxval == minval) { - L_ERROR("shouldn't happen! i,j = %d,%d, minval = %d\n", - procName, i, j, minval); - continue; - } - if ((ia = iaaGetLinearTRC(iaa, maxval - minval)) == NULL) { - L_ERROR("failure to make ia for j = %d!\n", procName, j); - continue; - } - for (k = 0; k < sy && yoff + k < h; k++) { - tline = line + k * wpl; - for (m = 0; m < sx && xoff + m < w; m++) { - val = GET_DATA_BYTE(tline, xoff + m); - sval = val - minval; - sval = L_MAX(0, sval); - SET_DATA_BYTE(tline, xoff + m, ia[sval]); - } - } - } - } - - for (i = 0; i < 256; i++) - LEPT_FREE(iaa[i]); - LEPT_FREE(iaa); - return pixd; -} - - -/*! - * \brief iaaGetLinearTRC() - * - * \param[in] iaa bare array of ptrs to l_int32 - * \param[in] diff between min and max pixel values that are - * to be mapped to 0 and 255 - * \return ia LUT with input (val - minval) and output a - * value between 0 and 255) - */ -static l_int32 * -iaaGetLinearTRC(l_int32 **iaa, - l_int32 diff) -{ -l_int32 i; -l_int32 *ia; -l_float32 factor; - - PROCNAME("iaaGetLinearTRC"); - - if (!iaa) - return (l_int32 *)ERROR_PTR("iaa not defined", procName, NULL); - - if (iaa[diff] != NULL) /* already have it */ - return iaa[diff]; - - if ((ia = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32))) == NULL) - return (l_int32 *)ERROR_PTR("ia not made", procName, NULL); - iaa[diff] = ia; - if (diff == 0) { /* shouldn't happen */ - for (i = 0; i < 256; i++) - ia[i] = 128; - } - else { - factor = 255. / (l_float32)diff; - for (i = 0; i < diff + 1; i++) - ia[i] = (l_int32)(factor * i + 0.5); - for (i = diff + 1; i < 256; i++) - ia[i] = 255; - } - - return ia; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/affine.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/affine.c deleted file mode 100644 index 5c0214e4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/affine.c +++ /dev/null @@ -1,1624 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file affine.c - *
- *
- *      Affine (3 pt) image transformation using a sampled
- *      (to nearest integer) transform on each dest point
- *           PIX        *pixAffineSampledPta()
- *           PIX        *pixAffineSampled()
- *
- *      Affine (3 pt) image transformation using interpolation
- *      (or area mapping) for anti-aliasing images that are
- *      2, 4, or 8 bpp gray, or colormapped, or 32 bpp RGB
- *           PIX        *pixAffinePta()
- *           PIX        *pixAffine()
- *           PIX        *pixAffinePtaColor()
- *           PIX        *pixAffineColor()
- *           PIX        *pixAffinePtaGray()
- *           PIX        *pixAffineGray()
- *
- *      Affine transform including alpha (blend) component
- *           PIX        *pixAffinePtaWithAlpha()
- *
- *      Affine coordinate transformation
- *           l_int32     getAffineXformCoeffs()
- *           l_int32     affineInvertXform()
- *           l_int32     affineXformSampledPt()
- *           l_int32     affineXformPt()
- *
- *      Interpolation helper functions
- *           l_int32     linearInterpolatePixelGray()
- *           l_int32     linearInterpolatePixelColor()
- *
- *      Gauss-jordan linear equation solver
- *           l_int32     gaussjordan()
- *
- *      Affine image transformation using a sequence of
- *      shear/scale/translation operations
- *           PIX        *pixAffineSequential()
- *
- *      One can define a coordinate space by the location of the origin,
- *      the orientation of x and y axes, and the unit scaling along
- *      each axis.  An affine transform is a general linear
- *      transformation from one coordinate space to another.
- *
- *      For the general case, we can define the affine transform using
- *      two sets of three (noncollinear) points in a plane.  One set
- *      corresponds to the input (src) coordinate space; the other to the
- *      transformed (dest) coordinate space.  Each point in the
- *      src corresponds to one of the points in the dest.  With two
- *      sets of three points, we get a set of 6 equations in 6 unknowns
- *      that specifies the mapping between the coordinate spaces.
- *      The interface here allows you to specify either the corresponding
- *      sets of 3 points, or the transform itself (as a vector of 6
- *      coefficients).
- *
- *      Given the transform as a vector of 6 coefficients, we can compute
- *      both a a pointwise affine coordinate transformation and an
- *      affine image transformation.
- *
- *      To compute the coordinate transform, we need the coordinate
- *      value (x',y') in the transformed space for any point (x,y)
- *      in the original space.  To derive this transform from the
- *      three corresponding points, it is convenient to express the affine
- *      coordinate transformation using an LU decomposition of
- *      a set of six linear equations that express the six coordinates
- *      of the three points in the transformed space as a function of
- *      the six coordinates in the original space.  Once we have
- *      this transform matrix , we can transform an image by
- *      finding, for each destination pixel, the pixel (or pixels)
- *      in the source that give rise to it.
- *
- *      This 'pointwise' transformation can be done either by sampling
- *      and picking a single pixel in the src to replicate into the dest,
- *      or by interpolating (or averaging) over four src pixels to
- *      determine the value of the dest pixel.  The first method is
- *      implemented by pixAffineSampled() and the second method by
- *      pixAffine().  The interpolated method can only be used for
- *      images with more than 1 bpp, but for these, the image quality
- *      is significantly better than the sampled method, due to
- *      the 'antialiasing' effect of weighting the src pixels.
- *
- *      Interpolation works well when there is relatively little scaling,
- *      or if there is image expansion in general.  However, if there
- *      is significant image reduction, one should apply a low-pass
- *      filter before subsampling to avoid aliasing the high frequencies.
- *
- *      A typical application might be to align two images, which
- *      may be scaled, rotated and translated versions of each other.
- *      Through some pre-processing, three corresponding points are
- *      located in each of the two images.  One of the images is
- *      then to be (affine) transformed to align with the other.
- *      As mentioned, the standard way to do this is to use three
- *      sets of points, compute the 6 transformation coefficients
- *      from these points that describe the linear transformation,
- *
- *          x' = ax + by + c
- *          y' = dx + ey + f
- *
- *      and use this in a pointwise manner to transform the image.
- *
- *      N.B.  Be sure to see the comment in getAffineXformCoeffs(),
- *      regarding using the inverse of the affine transform for points
- *      to transform images.
- *
- *      There is another way to do this transformation; namely,
- *      by doing a sequence of simple affine transforms, without
- *      computing directly the affine coordinate transformation.
- *      We have at our disposal (1) translations (using rasterop),
- *      (2) horizontal and vertical shear about any horizontal and vertical
- *      line, respectively, and (3) non-isotropic scaling by two
- *      arbitrary x and y scaling factors.  We also have rotation
- *      about an arbitrary point, but this is equivalent to a set
- *      of three shears so we do not need to use it.
- *
- *      Why might we do this?  For binary images, it is usually
- *      more efficient to do such transformations by a sequence
- *      of word parallel operations.  Shear and translation can be
- *      done in-place and word parallel; arbitrary scaling is
- *      mostly pixel-wise.
- *
- *      Suppose that we are transforming image 1 to correspond to image 2.
- *      We have a set of three points, describing the coordinate space
- *      embedded in image 1, and we need to transform image 1 until
- *      those three points exactly correspond to the new coordinate space
- *      defined by the second set of three points.  In our image
- *      matching application, the latter set of three points was
- *      found to be the corresponding points in image 2.
- *
- *      The most elegant way I can think of to do such a sequential
- *      implementation is to imagine that we're going to transform
- *      BOTH images until they're aligned.  (We don't really want
- *      to transform both, because in fact we may only have one image
- *      that is undergoing a general affine transformation.)
- *
- *      Choose the 3 corresponding points as follows:
- *         ~ The 1st point is an origin
- *         ~ The 2nd point gives the orientation and scaling of the
- *           "x" axis with respect to the origin
- *         ~ The 3rd point does likewise for the "y" axis.
- *      These "axes" must not be collinear; otherwise they are
- *      arbitrary (although some strange things will happen if
- *      the handedness sweeping through the minimum angle between
- *      the axes is opposite).
- *
- *      An important constraint is that we have shear operations
- *      about an arbitrary horizontal or vertical line, but always
- *      parallel to the x or y axis.  If we continue to pretend that
- *      we have an unprimed coordinate space embedded in image 1 and
- *      a primed coordinate space embedded in image 2, we imagine
- *      (a) transforming image 1 by horizontal and vertical shears about
- *      point 1 to align points 3 and 2 along the y and x axes,
- *      respectively, and (b) transforming image 2 by horizontal and
- *      vertical shears about point 1' to align points 3' and 2' along
- *      the y and x axes.  Then we scale image 1 so that the distances
- *      from 1 to 2 and from 1 to 3 are equal to the distances in
- *      image 2 from 1' to 2' and from 1' to 3'.  This scaling operation
- *      leaves the true image origin, at (0,0) invariant, and will in
- *      general translate point 1.  The original points 1 and 1' will
- *      typically not coincide in any event, so we must translate
- *      the origin of image 1, at its current point 1, to the origin
- *      of image 2 at 1'.  The images should now be aligned.  But
- *      because we never really transformed image 2 (and image 2 may
- *      not even exist), we now perform  on image 1 the reverse of
- *      the shear transforms that we imagined doing on image 2;
- *      namely, the negative vertical shear followed by the negative
- *      horizontal shear.  Image 1 should now have its transformed
- *      unprimed coordinates aligned with the original primed
- *      coordinates.  In all this, it is only necessary to keep track
- *      of the shear angles and translations of points during the shears.
- *      What has been accomplished is a general affine transformation
- *      on image 1.
- *
- *      Having described all this, if you are going to use an
- *      affine transformation in an application, this is what you
- *      need to know:
- *
- *          (1) You should NEVER use the sequential method, because
- *              the image quality for 1 bpp text is much poorer
- *              (even though it is about 2x faster than the pointwise sampled
- *              method), and for images with depth greater than 1, it is
- *              nearly 20x slower than the pointwise sampled method
- *              and over 10x slower than the pointwise interpolated method!
- *              The sequential method is given here for purely
- *              pedagogical reasons.
- *
- *          (2) For 1 bpp images, use the pointwise sampled function
- *              pixAffineSampled().  For all other images, the best
- *              quality results result from using the pointwise
- *              interpolated function pixAffinePta() or pixAffine();
- *              the cost is less than a doubling of the computation time
- *              with respect to the sampled function.  If you use
- *              interpolation on colormapped images, the colormap will
- *              be removed, resulting in either a grayscale or color
- *              image, depending on the values in the colormap.
- *              If you want to retain the colormap, use pixAffineSampled().
- *
- *      Typical relative timing of pointwise transforms (sampled = 1.0):
- *      8 bpp:   sampled        1.0
- *               interpolated   1.6
- *      32 bpp:  sampled        1.0
- *               interpolated   1.8
- *      Additionally, the computation time/pixel is nearly the same
- *      for 8 bpp and 32 bpp, for both sampled and interpolated.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -extern l_float32 AlphaMaskBorderVals[2]; - -#ifndef NO_CONSOLE_IO -#define DEBUG 0 -#endif /* ~NO_CONSOLE_IO */ - -/*-------------------------------------------------------------* - * Sampled affine image transformation * - *-------------------------------------------------------------*/ -/*! - * \brief pixAffineSampledPta() - * - * \param[in] pixs all depths - * \param[in] ptad 3 pts of final coordinate space - * \param[in] ptas 3 pts of initial coordinate space - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Brings in either black or white pixels from the boundary.
- *      (2) Retains colormap, which you can do for a sampled transform..
- *      (3) The 3 points must not be collinear.
- *      (4) The order of the 3 points is arbitrary; however, to compare
- *          with the sequential transform they must be in these locations
- *          and in this order: origin, x-axis, y-axis.
- *      (5) For 1 bpp images, this has much better quality results
- *          than pixAffineSequential(), particularly for text.
- *          It is about 3x slower, but does not require additional
- *          border pixels.  The poor quality of pixAffineSequential()
- *          is due to repeated quantized transforms.  It is strongly
- *          recommended that pixAffineSampled() be used for 1 bpp images.
- *      (6) For 8 or 32 bpp, much better quality is obtained by the
- *          somewhat slower pixAffinePta().  See that function
- *          for relative timings between sampled and interpolated.
- *      (7) To repeat, use of the sequential transform,
- *          pixAffineSequential(), for any images, is discouraged.
- * 
- */ -PIX * -pixAffineSampledPta(PIX *pixs, - PTA *ptad, - PTA *ptas, - l_int32 incolor) -{ -l_float32 *vc; -PIX *pixd; - - PROCNAME("pixAffineSampledPta"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - if (ptaGetCount(ptas) != 3) - return (PIX *)ERROR_PTR("ptas count not 3", procName, NULL); - if (ptaGetCount(ptad) != 3) - return (PIX *)ERROR_PTR("ptad count not 3", procName, NULL); - - /* Get backwards transform from dest to src, and apply it */ - getAffineXformCoeffs(ptad, ptas, &vc); - pixd = pixAffineSampled(pixs, vc, incolor); - LEPT_FREE(vc); - - return pixd; -} - - -/*! - * \brief pixAffineSampled() - * - * \param[in] pixs all depths - * \param[in] vc vector of 6 coefficients for affine transformation - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Brings in either black or white pixels from the boundary.
- *      (2) Retains colormap, which you can do for a sampled transform..
- *      (3) For 8 or 32 bpp, much better quality is obtained by the
- *          somewhat slower pixAffine().  See that function
- *          for relative timings between sampled and interpolated.
- * 
- */ -PIX * -pixAffineSampled(PIX *pixs, - l_float32 *vc, - l_int32 incolor) -{ -l_int32 i, j, w, h, d, x, y, wpls, wpld, color, cmapindex; -l_uint32 val; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixAffineSampled"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!vc) - return (PIX *)ERROR_PTR("vc not defined", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 32) - return (PIX *)ERROR_PTR("depth not 1, 2, 4, 8 or 16", procName, NULL); - - /* Init all dest pixels to color to be brought in from outside */ - pixd = pixCreateTemplate(pixs); - if ((cmap = pixGetColormap(pixs)) != NULL) { - if (incolor == L_BRING_IN_WHITE) - color = 1; - else - color = 0; - pixcmapAddBlackOrWhite(cmap, color, &cmapindex); - pixSetAllArbitrary(pixd, cmapindex); - } else { - if ((d == 1 && incolor == L_BRING_IN_WHITE) || - (d > 1 && incolor == L_BRING_IN_BLACK)) { - pixClearAll(pixd); - } else { - pixSetAll(pixd); - } - } - - /* Scan over the dest pixels */ - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - affineXformSampledPt(vc, j, i, &x, &y); - if (x < 0 || y < 0 || x >=w || y >= h) - continue; - lines = datas + y * wpls; - if (d == 1) { - val = GET_DATA_BIT(lines, x); - SET_DATA_BIT_VAL(lined, j, val); - } else if (d == 8) { - val = GET_DATA_BYTE(lines, x); - SET_DATA_BYTE(lined, j, val); - } else if (d == 32) { - lined[j] = lines[x]; - } else if (d == 2) { - val = GET_DATA_DIBIT(lines, x); - SET_DATA_DIBIT(lined, j, val); - } else if (d == 4) { - val = GET_DATA_QBIT(lines, x); - SET_DATA_QBIT(lined, j, val); - } - } - } - - return pixd; -} - - -/*---------------------------------------------------------------------* - * Interpolated affine image transformation * - *---------------------------------------------------------------------*/ -/*! - * \brief pixAffinePta() - * - * \param[in] pixs all depths; colormap ok - * \param[in] ptad 3 pts of final coordinate space - * \param[in] ptas 3 pts of initial coordinate space - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Brings in either black or white pixels from the boundary
- *      (2) Removes any existing colormap, if necessary, before transforming
- * 
- */ -PIX * -pixAffinePta(PIX *pixs, - PTA *ptad, - PTA *ptas, - l_int32 incolor) -{ -l_int32 d; -l_uint32 colorval; -PIX *pixt1, *pixt2, *pixd; - - PROCNAME("pixAffinePta"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - if (ptaGetCount(ptas) != 3) - return (PIX *)ERROR_PTR("ptas count not 3", procName, NULL); - if (ptaGetCount(ptad) != 3) - return (PIX *)ERROR_PTR("ptad count not 3", procName, NULL); - - if (pixGetDepth(pixs) == 1) - return pixAffineSampledPta(pixs, ptad, ptas, incolor); - - /* Remove cmap if it exists, and unpack to 8 bpp if necessary */ - pixt1 = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - d = pixGetDepth(pixt1); - if (d < 8) - pixt2 = pixConvertTo8(pixt1, FALSE); - else - pixt2 = pixClone(pixt1); - d = pixGetDepth(pixt2); - - /* Compute actual color to bring in from edges */ - colorval = 0; - if (incolor == L_BRING_IN_WHITE) { - if (d == 8) - colorval = 255; - else /* d == 32 */ - colorval = 0xffffff00; - } - - if (d == 8) - pixd = pixAffinePtaGray(pixt2, ptad, ptas, colorval); - else /* d == 32 */ - pixd = pixAffinePtaColor(pixt2, ptad, ptas, colorval); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - return pixd; -} - - -/*! - * \brief pixAffine() - * - * \param[in] pixs all depths; colormap ok - * \param[in] vc vector of 6 coefficients for affine transformation - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Brings in either black or white pixels from the boundary
- *      (2) Removes any existing colormap, if necessary, before transforming
- * 
- */ -PIX * -pixAffine(PIX *pixs, - l_float32 *vc, - l_int32 incolor) -{ -l_int32 d; -l_uint32 colorval; -PIX *pixt1, *pixt2, *pixd; - - PROCNAME("pixAffine"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!vc) - return (PIX *)ERROR_PTR("vc not defined", procName, NULL); - - if (pixGetDepth(pixs) == 1) - return pixAffineSampled(pixs, vc, incolor); - - /* Remove cmap if it exists, and unpack to 8 bpp if necessary */ - pixt1 = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - d = pixGetDepth(pixt1); - if (d < 8) - pixt2 = pixConvertTo8(pixt1, FALSE); - else - pixt2 = pixClone(pixt1); - d = pixGetDepth(pixt2); - - /* Compute actual color to bring in from edges */ - colorval = 0; - if (incolor == L_BRING_IN_WHITE) { - if (d == 8) - colorval = 255; - else /* d == 32 */ - colorval = 0xffffff00; - } - - if (d == 8) - pixd = pixAffineGray(pixt2, vc, colorval); - else /* d == 32 */ - pixd = pixAffineColor(pixt2, vc, colorval); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - return pixd; -} - - -/*! - * \brief pixAffinePtaColor() - * - * \param[in] pixs 32 bpp - * \param[in] ptad 3 pts of final coordinate space - * \param[in] ptas 3 pts of initial coordinate space - * \param[in] colorval e.g.: 0 to bring in BLACK, 0xffffff00 for WHITE - * \return pixd, or NULL on error - */ -PIX * -pixAffinePtaColor(PIX *pixs, - PTA *ptad, - PTA *ptas, - l_uint32 colorval) -{ -l_float32 *vc; -PIX *pixd; - - PROCNAME("pixAffinePtaColor"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs must be 32 bpp", procName, NULL); - if (ptaGetCount(ptas) != 3) - return (PIX *)ERROR_PTR("ptas count not 3", procName, NULL); - if (ptaGetCount(ptad) != 3) - return (PIX *)ERROR_PTR("ptad count not 3", procName, NULL); - - /* Get backwards transform from dest to src, and apply it */ - getAffineXformCoeffs(ptad, ptas, &vc); - pixd = pixAffineColor(pixs, vc, colorval); - LEPT_FREE(vc); - - return pixd; -} - - -/*! - * \brief pixAffineColor() - * - * \param[in] pixs 32 bpp - * \param[in] vc vector of 6 coefficients for affine transformation - * \param[in] colorval e.g.: 0 to bring in BLACK, 0xffffff00 for WHITE - * \return pixd, or NULL on error - */ -PIX * -pixAffineColor(PIX *pixs, - l_float32 *vc, - l_uint32 colorval) -{ -l_int32 i, j, w, h, d, wpls, wpld; -l_uint32 val; -l_uint32 *datas, *datad, *lined; -l_float32 x, y; -PIX *pix1, *pix2, *pixd; - - PROCNAME("pixAffineColor"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 32) - return (PIX *)ERROR_PTR("pixs must be 32 bpp", procName, NULL); - if (!vc) - return (PIX *)ERROR_PTR("vc not defined", procName, NULL); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreateTemplate(pixs); - pixSetAllArbitrary(pixd, colorval); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* Iterate over destination pixels */ - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - /* Compute float src pixel location corresponding to (i,j) */ - affineXformPt(vc, j, i, &x, &y); - linearInterpolatePixelColor(datas, wpls, w, h, x, y, colorval, - &val); - *(lined + j) = val; - } - } - - /* If rgba, transform the pixs alpha channel and insert in pixd */ - if (pixGetSpp(pixs) == 4) { - pix1 = pixGetRGBComponent(pixs, L_ALPHA_CHANNEL); - pix2 = pixAffineGray(pix1, vc, 255); /* bring in opaque */ - pixSetRGBComponent(pixd, pix2, L_ALPHA_CHANNEL); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - return pixd; -} - - -/*! - * \brief pixAffinePtaGray() - * - * \param[in] pixs 8 bpp - * \param[in] ptad 3 pts of final coordinate space - * \param[in] ptas 3 pts of initial coordinate space - * \param[in] grayval e.g.: 0 to bring in BLACK, 255 for WHITE - * \return pixd, or NULL on error - */ -PIX * -pixAffinePtaGray(PIX *pixs, - PTA *ptad, - PTA *ptas, - l_uint8 grayval) -{ -l_float32 *vc; -PIX *pixd; - - PROCNAME("pixAffinePtaGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs must be 8 bpp", procName, NULL); - if (ptaGetCount(ptas) != 3) - return (PIX *)ERROR_PTR("ptas count not 3", procName, NULL); - if (ptaGetCount(ptad) != 3) - return (PIX *)ERROR_PTR("ptad count not 3", procName, NULL); - - /* Get backwards transform from dest to src, and apply it */ - getAffineXformCoeffs(ptad, ptas, &vc); - pixd = pixAffineGray(pixs, vc, grayval); - LEPT_FREE(vc); - - return pixd; -} - - - -/*! - * \brief pixAffineGray() - * - * \param[in] pixs 8 bpp - * \param[in] vc vector of 6 coefficients for affine transformation - * \param[in] grayval e.g.: 0 to bring in BLACK, 255 for WHITE - * \return pixd, or NULL on error - */ -PIX * -pixAffineGray(PIX *pixs, - l_float32 *vc, - l_uint8 grayval) -{ -l_int32 i, j, w, h, wpls, wpld, val; -l_uint32 *datas, *datad, *lined; -l_float32 x, y; -PIX *pixd; - - PROCNAME("pixAffineGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs must be 8 bpp", procName, NULL); - if (!vc) - return (PIX *)ERROR_PTR("vc not defined", procName, NULL); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreateTemplate(pixs); - pixSetAllArbitrary(pixd, grayval); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* Iterate over destination pixels */ - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - /* Compute float src pixel location corresponding to (i,j) */ - affineXformPt(vc, j, i, &x, &y); - linearInterpolatePixelGray(datas, wpls, w, h, x, y, grayval, &val); - SET_DATA_BYTE(lined, j, val); - } - } - - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Affine transform including alpha (blend) component * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixAffinePtaWithAlpha() - * - * \param[in] pixs 32 bpp rgb - * \param[in] ptad 3 pts of final coordinate space - * \param[in] ptas 3 pts of initial coordinate space - * \param[in] pixg [optional] 8 bpp, can be null - * \param[in] fract between 0.0 and 1.0, with 0.0 fully transparent - * and 1.0 fully opaque - * \param[in] border of pixels added to capture transformed source pixels - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The alpha channel is transformed separately from pixs,
- *          and aligns with it, being fully transparent outside the
- *          boundary of the transformed pixs.  For pixels that are fully
- *          transparent, a blending function like pixBlendWithGrayMask()
- *          will give zero weight to corresponding pixels in pixs.
- *      (2) If pixg is NULL, it is generated as an alpha layer that is
- *          partially opaque, using %fract.  Otherwise, it is cropped
- *          to pixs if required and %fract is ignored.  The alpha channel
- *          in pixs is never used.
- *      (3) Colormaps are removed.
- *      (4) When pixs is transformed, it doesn't matter what color is brought
- *          in because the alpha channel will be transparent (0) there.
- *      (5) To avoid losing source pixels in the destination, it may be
- *          necessary to add a border to the source pix before doing
- *          the affine transformation.  This can be any non-negative number.
- *      (6) The input %ptad and %ptas are in a coordinate space before
- *          the border is added.  Internally, we compensate for this
- *          before doing the affine transform on the image after the border
- *          is added.
- *      (7) The default setting for the border values in the alpha channel
- *          is 0 (transparent) for the outermost ring of pixels and
- *          (0.5 * fract * 255) for the second ring.  When blended over
- *          a second image, this
- *          (a) shrinks the visible image to make a clean overlap edge
- *              with an image below, and
- *          (b) softens the edges by weakening the aliasing there.
- *          Use l_setAlphaMaskBorder() to change these values.
- * 
- */ -PIX * -pixAffinePtaWithAlpha(PIX *pixs, - PTA *ptad, - PTA *ptas, - PIX *pixg, - l_float32 fract, - l_int32 border) -{ -l_int32 ws, hs, d; -PIX *pixd, *pixb1, *pixb2, *pixg2, *pixga; -PTA *ptad2, *ptas2; - - PROCNAME("pixAffinePtaWithAlpha"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &ws, &hs, &d); - if (d != 32 && pixGetColormap(pixs) == NULL) - return (PIX *)ERROR_PTR("pixs not cmapped or 32 bpp", procName, NULL); - if (pixg && pixGetDepth(pixg) != 8) { - L_WARNING("pixg not 8 bpp; using 'fract' transparent alpha\n", - procName); - pixg = NULL; - } - if (!pixg && (fract < 0.0 || fract > 1.0)) { - L_WARNING("invalid fract; using 1.0 (fully transparent)\n", procName); - fract = 1.0; - } - if (!pixg && fract == 0.0) - L_WARNING("fully opaque alpha; image will not be blended\n", procName); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - - /* Add border; the color doesn't matter */ - pixb1 = pixAddBorder(pixs, border, 0); - - /* Transform the ptr arrays to work on the bordered image */ - ptad2 = ptaTransform(ptad, border, border, 1.0, 1.0); - ptas2 = ptaTransform(ptas, border, border, 1.0, 1.0); - - /* Do separate affine transform of rgb channels of pixs and of pixg */ - pixd = pixAffinePtaColor(pixb1, ptad2, ptas2, 0); - if (!pixg) { - pixg2 = pixCreate(ws, hs, 8); - if (fract == 1.0) - pixSetAll(pixg2); - else - pixSetAllArbitrary(pixg2, (l_int32)(255.0 * fract)); - } else { - pixg2 = pixResizeToMatch(pixg, NULL, ws, hs); - } - if (ws > 10 && hs > 10) { /* see note 7 */ - pixSetBorderRingVal(pixg2, 1, - (l_int32)(255.0 * fract * AlphaMaskBorderVals[0])); - pixSetBorderRingVal(pixg2, 2, - (l_int32)(255.0 * fract * AlphaMaskBorderVals[1])); - - } - pixb2 = pixAddBorder(pixg2, border, 0); /* must be black border */ - pixga = pixAffinePtaGray(pixb2, ptad2, ptas2, 0); - pixSetRGBComponent(pixd, pixga, L_ALPHA_CHANNEL); - pixSetSpp(pixd, 4); - - pixDestroy(&pixg2); - pixDestroy(&pixb1); - pixDestroy(&pixb2); - pixDestroy(&pixga); - ptaDestroy(&ptad2); - ptaDestroy(&ptas2); - return pixd; -} - - -/*-------------------------------------------------------------* - * Affine coordinate transformation * - *-------------------------------------------------------------*/ -/*! - * \brief getAffineXformCoeffs() - * - * \param[in] ptas source 3 points; unprimed - * \param[in] ptad transformed 3 points; primed - * \param[out] pvc vector of coefficients of transform - * \return 0 if OK; 1 on error - * - *
- *  We have a set of six equations, describing the affine
- *  transformation that takes 3 points ptas into 3 other
- *  points ptad.  These equations are:
- *
- *          x1' = c[0]*x1 + c[1]*y1 + c[2]
- *          y1' = c[3]*x1 + c[4]*y1 + c[5]
- *          x2' = c[0]*x2 + c[1]*y2 + c[2]
- *          y2' = c[3]*x2 + c[4]*y2 + c[5]
- *          x3' = c[0]*x3 + c[1]*y3 + c[2]
- *          y3' = c[3]*x3 + c[4]*y3 + c[5]
- *
- *  This can be represented as
- *
- *          AC = B
- *
- *  where B and C are column vectors
- *
- *          B = [ x1' y1' x2' y2' x3' y3' ]
- *          C = [ c[0] c[1] c[2] c[3] c[4] c[5] c[6] ]
- *
- *  and A is the 6x6 matrix
- *
- *          x1   y1   1   0    0    0
- *           0    0   0   x1   y1   1
- *          x2   y2   1   0    0    0
- *           0    0   0   x2   y2   1
- *          x3   y3   1   0    0    0
- *           0    0   0   x3   y3   1
- *
- *  These six equations are solved here for the coefficients C.
- *
- *  These six coefficients can then be used to find the dest
- *  point x',y') corresponding to any src point (x,y, according
- *  to the equations
- *
- *           x' = c[0]x + c[1]y + c[2]
- *           y' = c[3]x + c[4]y + c[5]
- *
- *  that are implemented in affineXformPt.
- *
- *  !!!!!!!!!!!!!!!!!!   Very important   !!!!!!!!!!!!!!!!!!!!!!
- *
- *  When the affine transform is composed from a set of simple
- *  operations such as translation, scaling and rotation,
- *  it is built in a form to convert from the un-transformed src
- *  point to the transformed dest point.  However, when an
- *  affine transform is used on images, it is used in an inverted
- *  way: it converts from the transformed dest point to the
- *  un-transformed src point.  So, for example, if you transform
- *  a boxa using transform A, to transform an image in the same
- *  way you must use the inverse of A.
- *
- *  For example, if you transform a boxa with a 3x3 affine matrix
- *  'mat', the analogous image transformation must use 'matinv':
- * \code
- *     boxad = boxaAffineTransform(boxas, mat);
- *     affineInvertXform(mat, &matinv);
- *     pixd = pixAffine(pixs, matinv, L_BRING_IN_WHITE);
- * \endcode
- *  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- * 
- */ -l_ok -getAffineXformCoeffs(PTA *ptas, - PTA *ptad, - l_float32 **pvc) -{ -l_int32 i; -l_float32 x1, y1, x2, y2, x3, y3; -l_float32 *b; /* rhs vector of primed coords X'; coeffs returned in *pvc */ -l_float32 *a[6]; /* 6x6 matrix A */ - - PROCNAME("getAffineXformCoeffs"); - - if (!ptas) - return ERROR_INT("ptas not defined", procName, 1); - if (!ptad) - return ERROR_INT("ptad not defined", procName, 1); - if (!pvc) - return ERROR_INT("&vc not defined", procName, 1); - - if ((b = (l_float32 *)LEPT_CALLOC(6, sizeof(l_float32))) == NULL) - return ERROR_INT("b not made", procName, 1); - *pvc = b; - - ptaGetPt(ptas, 0, &x1, &y1); - ptaGetPt(ptas, 1, &x2, &y2); - ptaGetPt(ptas, 2, &x3, &y3); - ptaGetPt(ptad, 0, &b[0], &b[1]); - ptaGetPt(ptad, 1, &b[2], &b[3]); - ptaGetPt(ptad, 2, &b[4], &b[5]); - - for (i = 0; i < 6; i++) - if ((a[i] = (l_float32 *)LEPT_CALLOC(6, sizeof(l_float32))) == NULL) - return ERROR_INT("a[i] not made", procName, 1); - - a[0][0] = x1; - a[0][1] = y1; - a[0][2] = 1.; - a[1][3] = x1; - a[1][4] = y1; - a[1][5] = 1.; - a[2][0] = x2; - a[2][1] = y2; - a[2][2] = 1.; - a[3][3] = x2; - a[3][4] = y2; - a[3][5] = 1.; - a[4][0] = x3; - a[4][1] = y3; - a[4][2] = 1.; - a[5][3] = x3; - a[5][4] = y3; - a[5][5] = 1.; - - gaussjordan(a, b, 6); - - for (i = 0; i < 6; i++) - LEPT_FREE(a[i]); - - return 0; -} - - -/*! - * \brief affineInvertXform() - * - * \param[in] vc vector of 6 coefficients - * \param[out] pvci inverted transform - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The 6 affine transform coefficients are the first
- *          two rows of a 3x3 matrix where the last row has
- *          only a 1 in the third column.  We invert this
- *          using gaussjordan(), and select the first 2 rows
- *          as the coefficients of the inverse affine transform.
- *      (2) Alternatively, we can find the inverse transform
- *          coefficients by inverting the 2x2 submatrix,
- *          and treating the top 2 coefficients in the 3rd column as
- *          a RHS vector for that 2x2 submatrix.  Then the
- *          6 inverted transform coefficients are composed of
- *          the inverted 2x2 submatrix and the negative of the
- *          transformed RHS vector.  Why is this so?  We have
- *             Y = AX + R  (2 equations in 6 unknowns)
- *          Then
- *             X = A'Y - A'R
- *          Gauss-jordan solves
- *             AF = R
- *          and puts the solution for F, which is A'R,
- *          into the input R vector.
- *
- * 
- */ -l_ok -affineInvertXform(l_float32 *vc, - l_float32 **pvci) -{ -l_int32 i; -l_float32 *vci; -l_float32 *a[3]; -l_float32 b[3] = {1.0, 1.0, 1.0}; /* anything; results ignored */ - - PROCNAME("affineInvertXform"); - - if (!pvci) - return ERROR_INT("&vci not defined", procName, 1); - *pvci = NULL; - if (!vc) - return ERROR_INT("vc not defined", procName, 1); - -#if 1 - for (i = 0; i < 3; i++) - a[i] = (l_float32 *)LEPT_CALLOC(3, sizeof(l_float32)); - a[0][0] = vc[0]; - a[0][1] = vc[1]; - a[0][2] = vc[2]; - a[1][0] = vc[3]; - a[1][1] = vc[4]; - a[1][2] = vc[5]; - a[2][2] = 1.0; - gaussjordan(a, b, 3); /* this inverts matrix a */ - vci = (l_float32 *)LEPT_CALLOC(6, sizeof(l_float32)); - *pvci = vci; - vci[0] = a[0][0]; - vci[1] = a[0][1]; - vci[2] = a[0][2]; - vci[3] = a[1][0]; - vci[4] = a[1][1]; - vci[5] = a[1][2]; - for (i = 0; i < 3; i++) - LEPT_FREE(a[i]); - -#else - - /* Alternative version, inverting a 2x2 matrix */ - { l_float32 *a2[2]; - for (i = 0; i < 2; i++) - a2[i] = (l_float32 *)LEPT_CALLOC(2, sizeof(l_float32)); - a2[0][0] = vc[0]; - a2[0][1] = vc[1]; - a2[1][0] = vc[3]; - a2[1][1] = vc[4]; - b[0] = vc[2]; - b[1] = vc[5]; - gaussjordan(a2, b, 2); /* this inverts matrix a2 */ - vci = (l_float32 *)LEPT_CALLOC(6, sizeof(l_float32)); - *pvci = vci; - vci[0] = a2[0][0]; - vci[1] = a2[0][1]; - vci[2] = -b[0]; /* note sign */ - vci[3] = a2[1][0]; - vci[4] = a2[1][1]; - vci[5] = -b[1]; /* note sign */ - for (i = 0; i < 2; i++) - LEPT_FREE(a2[i]); - } -#endif - - return 0; -} - - -/*! - * \brief affineXformSampledPt() - * - * \param[in] vc vector of 6 coefficients - * \param[in] x, y initial point - * \param[out] pxp, pyp transformed point - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This finds the nearest pixel coordinates of the transformed point.
- *      (2) It does not check ptrs for returned data!
- * 
- */ -l_ok -affineXformSampledPt(l_float32 *vc, - l_int32 x, - l_int32 y, - l_int32 *pxp, - l_int32 *pyp) -{ - PROCNAME("affineXformSampledPt"); - - if (!vc) - return ERROR_INT("vc not defined", procName, 1); - - *pxp = (l_int32)(vc[0] * x + vc[1] * y + vc[2] + 0.5); - *pyp = (l_int32)(vc[3] * x + vc[4] * y + vc[5] + 0.5); - return 0; -} - - -/*! - * \brief affineXformPt() - * - * \param[in] vc vector of 6 coefficients - * \param[in] x, y initial point - * \param[out] pxp, pyp transformed point - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This computes the floating point location of the transformed point.
- *      (2) It does not check ptrs for returned data!
- * 
- */ -l_ok -affineXformPt(l_float32 *vc, - l_int32 x, - l_int32 y, - l_float32 *pxp, - l_float32 *pyp) -{ - PROCNAME("affineXformPt"); - - if (!vc) - return ERROR_INT("vc not defined", procName, 1); - - *pxp = vc[0] * x + vc[1] * y + vc[2]; - *pyp = vc[3] * x + vc[4] * y + vc[5]; - return 0; -} - - -/*-------------------------------------------------------------* - * Interpolation helper functions * - *-------------------------------------------------------------*/ -/*! - * \brief linearInterpolatePixelColor() - * - * \param[in] datas ptr to beginning of image data - * \param[in] wpls 32-bit word/line for this data array - * \param[in] w, h of image - * \param[in] x, y floating pt location for evaluation - * \param[in] colorval color brought in from the outside when the - * input x,y location is outside the image; - * in 0xrrggbb00 format) - * \param[out] pval interpolated color value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is a standard linear interpolation function.  It is
- *          equivalent to area weighting on each component, and
- *          avoids "jaggies" when rendering sharp edges.
- * 
- */ -l_ok -linearInterpolatePixelColor(l_uint32 *datas, - l_int32 wpls, - l_int32 w, - l_int32 h, - l_float32 x, - l_float32 y, - l_uint32 colorval, - l_uint32 *pval) -{ -l_int32 valid, xpm, ypm, xp, xp2, yp, xf, yf; -l_int32 rval, gval, bval; -l_uint32 word00, word01, word10, word11; -l_uint32 *lines; - - PROCNAME("linearInterpolatePixelColor"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = colorval; - if (!datas) - return ERROR_INT("datas not defined", procName, 1); - - /* Skip if x or y are invalid. (x,y) must be in the source image. - * Failure to detect an invalid point will cause a mem address fault. - * Occasionally, x or y will be a nan, and relational checks always - * fail for nans. Therefore we check if the point is inside the pix */ - valid = (x >= 0.0 && y >= 0.0 && x < w && y < h); - if (!valid) return 0; - - xpm = (l_int32)(16.0 * x); - ypm = (l_int32)(16.0 * y); - xp = xpm >> 4; - xp2 = xp + 1 < w ? xp + 1 : xp; - yp = ypm >> 4; - if (yp + 1 >= h) wpls = 0; - xf = xpm & 0x0f; - yf = ypm & 0x0f; - -#if DEBUG - if (xf < 0 || yf < 0) - lept_stderr("xp = %d, yp = %d, xf = %d, yf = %d\n", xp, yp, xf, yf); -#endif /* DEBUG */ - - /* Do area weighting (eqiv. to linear interpolation) */ - lines = datas + yp * wpls; - word00 = *(lines + xp); - word10 = *(lines + xp2); - word01 = *(lines + wpls + xp); - word11 = *(lines + wpls + xp2); - rval = ((16 - xf) * (16 - yf) * ((word00 >> L_RED_SHIFT) & 0xff) + - xf * (16 - yf) * ((word10 >> L_RED_SHIFT) & 0xff) + - (16 - xf) * yf * ((word01 >> L_RED_SHIFT) & 0xff) + - xf * yf * ((word11 >> L_RED_SHIFT) & 0xff)) / 256; - gval = ((16 - xf) * (16 - yf) * ((word00 >> L_GREEN_SHIFT) & 0xff) + - xf * (16 - yf) * ((word10 >> L_GREEN_SHIFT) & 0xff) + - (16 - xf) * yf * ((word01 >> L_GREEN_SHIFT) & 0xff) + - xf * yf * ((word11 >> L_GREEN_SHIFT) & 0xff)) / 256; - bval = ((16 - xf) * (16 - yf) * ((word00 >> L_BLUE_SHIFT) & 0xff) + - xf * (16 - yf) * ((word10 >> L_BLUE_SHIFT) & 0xff) + - (16 - xf) * yf * ((word01 >> L_BLUE_SHIFT) & 0xff) + - xf * yf * ((word11 >> L_BLUE_SHIFT) & 0xff)) / 256; - composeRGBPixel(rval, gval, bval, pval); - return 0; -} - - -/*! - * \brief linearInterpolatePixelGray() - * - * \param[in] datas ptr to beginning of image data - * \param[in] wpls 32-bit word/line for this data array - * \param[in] w, h of image - * \param[in] x, y floating pt location for evaluation - * \param[in] grayval color brought in from the outside when the - * input x,y location is outside the image - * \param[out] pval interpolated gray value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is a standard linear interpolation function.  It is
- *          equivalent to area weighting on each component, and
- *          avoids "jaggies" when rendering sharp edges.
- * 
- */ -l_ok -linearInterpolatePixelGray(l_uint32 *datas, - l_int32 wpls, - l_int32 w, - l_int32 h, - l_float32 x, - l_float32 y, - l_int32 grayval, - l_int32 *pval) -{ -l_int32 valid, xpm, ypm, xp, xp2, yp, xf, yf, v00, v10, v01, v11; -l_uint32 *lines; - - PROCNAME("linearInterpolatePixelGray"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = grayval; - if (!datas) - return ERROR_INT("datas not defined", procName, 1); - - /* Skip if x or y is invalid. (x,y) must be in the source image. - * Failure to detect an invalid point will cause a mem address fault. - * Occasionally, x or y will be a nan, and relational checks always - * fail for nans. Therefore we check if the point is inside the pix */ - valid = (x >= 0.0 && y >= 0.0 && x < w && y < h); - if (!valid) return 0; - - xpm = (l_int32)(16.0 * x); - ypm = (l_int32)(16.0 * y); - xp = xpm >> 4; - xp2 = xp + 1 < w ? xp + 1 : xp; - yp = ypm >> 4; - if (yp + 1 >= h) wpls = 0; - xf = xpm & 0x0f; - yf = ypm & 0x0f; - -#if DEBUG - if (xf < 0 || yf < 0) - lept_stderr("xp = %d, yp = %d, xf = %d, yf = %d\n", xp, yp, xf, yf); -#endif /* DEBUG */ - - /* Interpolate by area weighting. */ - lines = datas + yp * wpls; - v00 = (16 - xf) * (16 - yf) * GET_DATA_BYTE(lines, xp); - v10 = xf * (16 - yf) * GET_DATA_BYTE(lines, xp2); - v01 = (16 - xf) * yf * GET_DATA_BYTE(lines + wpls, xp); - v11 = xf * yf * GET_DATA_BYTE(lines + wpls, xp2); - *pval = (v00 + v01 + v10 + v11) / 256; - return 0; -} - - - -/*-------------------------------------------------------------* - * Gauss-jordan linear equation solver * - *-------------------------------------------------------------*/ -#define SWAP(a,b) {temp = (a); (a) = (b); (b) = temp;} - -/*! - * \brief gaussjordan() - * - * \param[in] a n x n matrix - * \param[in] b n x 1 right-hand side column vector - * \param[in] n dimension - * \return 0 if ok, 1 on error - * - *
- * Notes:
- *      (1) There are two side-effects:
- *          * The matrix a is transformed to its inverse A
- *          * The rhs vector b is transformed to the solution x
- *            of the linear equation ax = b
- *      (2) The inverse A can then be used to solve the same equation with
- *          different rhs vectors c by multiplication: x = Ac
- *      (3) Adapted from "Numerical Recipes in C, Second Edition", 1992,
- *          pp. 36-41 (gauss-jordan elimination)
- * 
- */ -l_int32 -gaussjordan(l_float32 **a, - l_float32 *b, - l_int32 n) -{ -l_int32 i, icol, irow, j, k, col, row, success; -l_int32 *indexc, *indexr, *ipiv; -l_float32 maxval, val, pivinv, temp; - - PROCNAME("gaussjordan"); - - if (!a) - return ERROR_INT("a not defined", procName, 1); - if (!b) - return ERROR_INT("b not defined", procName, 1); - - success = TRUE; - indexc = (l_int32 *)LEPT_CALLOC(n, sizeof(l_int32)); - indexr = (l_int32 *)LEPT_CALLOC(n, sizeof(l_int32)); - ipiv = (l_int32 *)LEPT_CALLOC(n, sizeof(l_int32)); - if (!indexc || !indexr || !ipiv) { - L_ERROR("array not made\n", procName); - success = FALSE; - goto cleanup_arrays; - } - - icol = irow = 0; /* silence static checker */ - for (i = 0; i < n; i++) { - maxval = 0.0; - for (j = 0; j < n; j++) { - if (ipiv[j] != 1) { - for (k = 0; k < n; k++) { - if (ipiv[k] == 0) { - if (fabs(a[j][k]) >= maxval) { - maxval = fabs(a[j][k]); - irow = j; - icol = k; - } - } else if (ipiv[k] > 1) { - L_ERROR("singular matrix\n", procName); - success = FALSE; - goto cleanup_arrays; - } - } - } - } - ++(ipiv[icol]); - - if (irow != icol) { - for (col = 0; col < n; col++) - SWAP(a[irow][col], a[icol][col]); - SWAP(b[irow], b[icol]); - } - - indexr[i] = irow; - indexc[i] = icol; - if (a[icol][icol] == 0.0) { - L_ERROR("singular matrix\n", procName); - success = FALSE; - goto cleanup_arrays; - } - pivinv = 1.0 / a[icol][icol]; - a[icol][icol] = 1.0; - for (col = 0; col < n; col++) - a[icol][col] *= pivinv; - b[icol] *= pivinv; - - for (row = 0; row < n; row++) { - if (row != icol) { - val = a[row][icol]; - a[row][icol] = 0.0; - for (col = 0; col < n; col++) - a[row][col] -= a[icol][col] * val; - b[row] -= b[icol] * val; - } - } - } - - for (col = n - 1; col >= 0; col--) { - if (indexr[col] != indexc[col]) { - for (k = 0; k < n; k++) - SWAP(a[k][indexr[col]], a[k][indexc[col]]); - } - } - -cleanup_arrays: - LEPT_FREE(indexr); - LEPT_FREE(indexc); - LEPT_FREE(ipiv); - return (success) ? 0 : 1; -} - - -/*-------------------------------------------------------------* - * Sequential affine image transformation * - *-------------------------------------------------------------*/ -/*! - * \brief pixAffineSequential() - * - * \param[in] pixs - * \param[in] ptad 3 pts of final coordinate space - * \param[in] ptas 3 pts of initial coordinate space - * \param[in] bw pixels of additional border width during computation - * \param[in] bh pixels of additional border height during computation - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The 3 pts must not be collinear.
- *      (2) The 3 pts must be given in this order:
- *           ~ origin
- *           ~ a location along the x-axis
- *           ~ a location along the y-axis.
- *      (3) You must guess how much border must be added so that no
- *          pixels are lost in the transformations from src to
- *          dest coordinate space.  (This can be calculated but it
- *          is a lot of work!)  For coordinate spaces that are nearly
- *          at right angles, on a 300 ppi scanned page, the addition
- *          of 1000 pixels on each side is usually sufficient.
- *      (4) This is here for pedagogical reasons.  It is about 3x faster
- *          on 1 bpp images than pixAffineSampled(), but the results
- *          on text are much inferior.
- * 
- */ -PIX * -pixAffineSequential(PIX *pixs, - PTA *ptad, - PTA *ptas, - l_int32 bw, - l_int32 bh) -{ -l_int32 x1, y1, x2, y2, x3, y3; /* ptas */ -l_int32 x1p, y1p, x2p, y2p, x3p, y3p; /* ptad */ -l_int32 x1sc, y1sc; /* scaled origin */ -l_float32 x2s, x2sp, scalex, scaley; -l_float32 th3, th3p, ph2, ph2p; -#if DEBUG -l_float32 rad2deg; -#endif /* DEBUG */ -PIX *pix1, *pix2, *pixd; - - PROCNAME("pixAffineSequential"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - - if (ptaGetCount(ptas) != 3) - return (PIX *)ERROR_PTR("ptas count not 3", procName, NULL); - if (ptaGetCount(ptad) != 3) - return (PIX *)ERROR_PTR("ptad count not 3", procName, NULL); - ptaGetIPt(ptas, 0, &x1, &y1); - ptaGetIPt(ptas, 1, &x2, &y2); - ptaGetIPt(ptas, 2, &x3, &y3); - ptaGetIPt(ptad, 0, &x1p, &y1p); - ptaGetIPt(ptad, 1, &x2p, &y2p); - ptaGetIPt(ptad, 2, &x3p, &y3p); - - pix1 = pix2 = pixd = NULL; - - if (y1 == y3) - return (PIX *)ERROR_PTR("y1 == y3!", procName, NULL); - if (y1p == y3p) - return (PIX *)ERROR_PTR("y1p == y3p!", procName, NULL); - - if (bw != 0 || bh != 0) { - /* resize all points and add border to pixs */ - x1 = x1 + bw; - y1 = y1 + bh; - x2 = x2 + bw; - y2 = y2 + bh; - x3 = x3 + bw; - y3 = y3 + bh; - x1p = x1p + bw; - y1p = y1p + bh; - x2p = x2p + bw; - y2p = y2p + bh; - x3p = x3p + bw; - y3p = y3p + bh; - - if ((pix1 = pixAddBorderGeneral(pixs, bw, bw, bh, bh, 0)) == NULL) - return (PIX *)ERROR_PTR("pix1 not made", procName, NULL); - } else { - pix1 = pixCopy(NULL, pixs); - } - - /*-------------------------------------------------------------* - The horizontal shear is done to move the 3rd point to the - y axis. This moves the 2nd point either towards or away - from the y axis, depending on whether it is above or below - the x axis. That motion must be computed so that we know - the angle of vertical shear to use to get the 2nd point - on the x axis. We must also know the x coordinate of the - 2nd point in order to compute how much scaling is required - to match points on the axis. - *-------------------------------------------------------------*/ - - /* Shear angles required to put src points on x and y axes */ - th3 = atan2((l_float64)(x1 - x3), (l_float64)(y1 - y3)); - x2s = (l_float32)(x2 - ((l_float32)(y1 - y2) * (x3 - x1)) / (y1 - y3)); - if (x2s == (l_float32)x1) { - L_ERROR("x2s == x1!\n", procName); - goto cleanup_pix; - } - ph2 = atan2((l_float64)(y1 - y2), (l_float64)(x2s - x1)); - - /* Shear angles required to put dest points on x and y axes. - * Use the negative of these values to instead move the - * src points from the axes to the actual dest position. - * These values are also needed to scale the image. */ - th3p = atan2((l_float64)(x1p - x3p), (l_float64)(y1p - y3p)); - x2sp = (l_float32)(x2p - - ((l_float32)(y1p - y2p) * (x3p - x1p)) / (y1p - y3p)); - if (x2sp == (l_float32)x1p) { - L_ERROR("x2sp == x1p!\n", procName); - goto cleanup_pix; - } - ph2p = atan2((l_float64)(y1p - y2p), (l_float64)(x2sp - x1p)); - - /* Shear image to first put src point 3 on the y axis, - * and then to put src point 2 on the x axis */ - pixHShearIP(pix1, y1, th3, L_BRING_IN_WHITE); - pixVShearIP(pix1, x1, ph2, L_BRING_IN_WHITE); - - /* Scale image to match dest scale. The dest scale - * is calculated above from the angles th3p and ph2p - * that would be required to move the dest points to - * the x and y axes. */ - scalex = (l_float32)(x2sp - x1p) / (x2s - x1); - scaley = (l_float32)(y3p - y1p) / (y3 - y1); - if ((pix2 = pixScale(pix1, scalex, scaley)) == NULL) { - L_ERROR("pix2 not made\n", procName); - goto cleanup_pix; - } - -#if DEBUG - rad2deg = 180. / 3.1415926535; - lept_stderr("th3 = %5.1f deg, ph2 = %5.1f deg\n", - rad2deg * th3, rad2deg * ph2); - lept_stderr("th3' = %5.1f deg, ph2' = %5.1f deg\n", - rad2deg * th3p, rad2deg * ph2p); - lept_stderr("scalex = %6.3f, scaley = %6.3f\n", scalex, scaley); -#endif /* DEBUG */ - - /*-------------------------------------------------------------* - Scaling moves the 1st src point, which is the origin. - It must now be moved again to coincide with the origin - (1st point) of the dest. After this is done, the 2nd - and 3rd points must be sheared back to the original - positions of the 2nd and 3rd dest points. We use the - negative of the angles that were previously computed - for shearing those points in the dest image to x and y - axes, and take the shears in reverse order as well. - *-------------------------------------------------------------*/ - /* Shift image to match dest origin. */ - x1sc = (l_int32)(scalex * x1 + 0.5); /* x comp of origin after scaling */ - y1sc = (l_int32)(scaley * y1 + 0.5); /* y comp of origin after scaling */ - pixRasteropIP(pix2, x1p - x1sc, y1p - y1sc, L_BRING_IN_WHITE); - - /* Shear image to take points 2 and 3 off the axis and - * put them in the original dest position */ - pixVShearIP(pix2, x1p, -ph2p, L_BRING_IN_WHITE); - pixHShearIP(pix2, y1p, -th3p, L_BRING_IN_WHITE); - - if (bw != 0 || bh != 0) { - if ((pixd = pixRemoveBorderGeneral(pix2, bw, bw, bh, bh)) == NULL) - L_ERROR("pixd not made\n", procName); - } else { - pixd = pixClone(pix2); - } - -cleanup_pix: - pixDestroy(&pix1); - pixDestroy(&pix2); - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/affinecompose.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/affinecompose.c deleted file mode 100644 index 8f4805b1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/affinecompose.c +++ /dev/null @@ -1,665 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file affinecompose.c - *
- *
- *      Composable coordinate transforms
- *           l_float32   *createMatrix2dTranslate()
- *           l_float32   *createMatrix2dScale()
- *           l_float32   *createMatrix2dRotate()
- *
- *      Special coordinate transforms on pta
- *           PTA         *ptaTranslate()
- *           PTA         *ptaScale()
- *           PTA         *ptaRotate()
- *
- *      Special coordinate transforms on boxa
- *           BOXA        *boxaTranslate()
- *           BOXA        *boxaScale()
- *           BOXA        *boxaRotate()
- *
- *      General coordinate transform on pta and boxa
- *           PTA         *ptaAffineTransform()
- *           BOXA        *boxaAffineTransform()
- *
- *      Matrix operations
- *           l_int32      l_productMatVec()
- *           l_int32      l_productMat2()
- *           l_int32      l_productMat3()
- *           l_int32      l_productMat4()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*-------------------------------------------------------------* - * Composable coordinate transforms * - *-------------------------------------------------------------*/ -/*! - * \brief createMatrix2dTranslate() - * - * \param[in] transx x component of translation wrt. the origin - * \param[in] transy y component of translation wrt. the origin - * \return 3x3 transform matrix, or NULL on error - * - *
- * Notes:
- *      (1) The translation is equivalent to:
- *             v' = Av
- *          where v and v' are 1x3 column vectors in the form
- *             v = [x, y, 1]^    ^ denotes transpose
- *          and the affine translation matrix is
- *             A = [ 1   0   tx
- *                   0   1   ty
- *                   0   0    1  ]
- *
- *      (2) We consider translation as with respect to a fixed origin.
- *          In a clipping operation, the origin moves and the points
- *          are fixed, and you use (-tx, -ty) where (tx, ty) is the
- *          translation vector of the origin.
- * 
- */ -l_float32 * -createMatrix2dTranslate(l_float32 transx, - l_float32 transy) -{ -l_float32 *mat; - - mat = (l_float32 *)LEPT_CALLOC(9, sizeof(l_float32)); - mat[0] = mat[4] = mat[8] = 1; - mat[2] = transx; - mat[5] = transy; - return mat; -} - - -/*! - * \brief createMatrix2dScale() - * - * \param[in] scalex horizontal scale factor - * \param[in] scaley vertical scale factor - * \return 3x3 transform matrix, or NULL on error - * - *
- * Notes:
- *      (1) The scaling is equivalent to:
- *             v' = Av
- *         where v and v' are 1x3 column vectors in the form
- *              v = [x, y, 1]^    ^ denotes transpose
- *         and the affine scaling matrix is
- *             A = [ sx  0    0
- *                   0   sy   0
- *                   0   0    1  ]
- *
- *      (2) We consider scaling as with respect to a fixed origin.
- *          In other words, the origin is the only point that doesn't
- *          move in the scaling transform.
- * 
- */ -l_float32 * -createMatrix2dScale(l_float32 scalex, - l_float32 scaley) -{ -l_float32 *mat; - - mat = (l_float32 *)LEPT_CALLOC(9, sizeof(l_float32)); - mat[0] = scalex; - mat[4] = scaley; - mat[8] = 1; - return mat; -} - - -/*! - * \brief createMatrix2dRotate() - * - * \param[in] xc, yc location of center of rotation - * \param[in] angle rotation in radians; clockwise is positive - * \return 3x3 transform matrix, or NULL on error - * - *
- * Notes:
- *      (1) The rotation is equivalent to:
- *             v' = Av
- *          where v and v' are 1x3 column vectors in the form
- *             v = [x, y, 1]^    ^ denotes transpose
- *          and the affine rotation matrix is
- *             A = [ cosa   -sina    xc*1-cosa + yc*sina
- *                   sina    cosa    yc*1-cosa - xc*sina
- *                     0       0                 1         ]
- *
- *          If the rotation is about the origin, xc, yc) = (0, 0 and
- *          this simplifies to
- *             A = [ cosa   -sina    0
- *                   sina    cosa    0
- *                     0       0     1 ]
- *
- *          These relations follow from the following equations, which
- *          you can convince yourself are correct as follows.  Draw a
- *          circle centered on xc,yc) and passing through (x,y), with
- *          (x',y') on the arc at an angle 'a' clockwise from (x,y).
- *           [ Hint: cosa + b = cosa * cosb - sina * sinb
- *                   sina + b = sina * cosb + cosa * sinb ]
- *
- *            x' - xc =  x - xc) * cosa - (y - yc * sina
- *            y' - yc =  x - xc) * sina + (y - yc * cosa
- * 
- */ -l_float32 * -createMatrix2dRotate(l_float32 xc, - l_float32 yc, - l_float32 angle) -{ -l_float32 sina, cosa; -l_float32 *mat; - - mat = (l_float32 *)LEPT_CALLOC(9, sizeof(l_float32)); - sina = sin(angle); - cosa = cos(angle); - mat[0] = mat[4] = cosa; - mat[1] = -sina; - mat[2] = xc * (1.0 - cosa) + yc * sina; - mat[3] = sina; - mat[5] = yc * (1.0 - cosa) - xc * sina; - mat[8] = 1; - return mat; -} - - - -/*-------------------------------------------------------------* - * Special coordinate transforms on pta * - *-------------------------------------------------------------*/ -/*! - * \brief ptaTranslate() - * - * \param[in] ptas for initial points - * \param[in] transx x component of translation wrt. the origin - * \param[in] transy y component of translation wrt. the origin - * \return ptad translated points, or NULL on error - * - *
- * Notes:
- *      (1) See createMatrix2dTranslate() for details of transform.
- * 
- */ -PTA * -ptaTranslate(PTA *ptas, - l_float32 transx, - l_float32 transy) -{ -l_int32 i, npts; -l_float32 x, y; -PTA *ptad; - - PROCNAME("ptaTranslate"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - - npts = ptaGetCount(ptas); - if ((ptad = ptaCreate(npts)) == NULL) - return (PTA *)ERROR_PTR("ptad not made", procName, NULL); - for (i = 0; i < npts; i++) { - ptaGetPt(ptas, i, &x, &y); - ptaAddPt(ptad, x + transx, y + transy); - } - - return ptad; -} - - -/*! - * \brief ptaScale() - * - * \param[in] ptas for initial points - * \param[in] scalex horizontal scale factor - * \param[in] scaley vertical scale factor - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) See createMatrix2dScale() for details of transform.
- * 
- */ -PTA * -ptaScale(PTA *ptas, - l_float32 scalex, - l_float32 scaley) -{ -l_int32 i, npts; -l_float32 x, y; -PTA *ptad; - - PROCNAME("ptaScale"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - - npts = ptaGetCount(ptas); - if ((ptad = ptaCreate(npts)) == NULL) - return (PTA *)ERROR_PTR("ptad not made", procName, NULL); - for (i = 0; i < npts; i++) { - ptaGetPt(ptas, i, &x, &y); - ptaAddPt(ptad, scalex * x, scaley * y); - } - - return ptad; -} - - -/*! - * \brief ptaRotate() - * - * \param[in] ptas for initial points - * \param[in] xc, yc location of center of rotation - * \param[in] angle rotation in radians; clockwise is positive - * \return 0 if OK; 1 on error - * - *
- * Notes;
- *      (1) See createMatrix2dScale() for details of transform.
- *      (2) This transform can be thought of as composed of the
- *          sum of two parts:
- *           a) an (x,y)-dependent rotation about the origin:
- *              xr = x * cosa - y * sina
- *              yr = x * sina + y * cosa
- *           b) an (x,y)-independent translation that depends on the
- *              rotation center and the angle:
- *              xt = xc - xc * cosa + yc * sina
- *              yt = yc - xc * sina - yc * cosa
- *          The translation part (xt,yt) is equal to the difference
- *          between the center (xc,yc) and the location of the
- *          center after it is rotated about the origin.
- * 
- */ -PTA * -ptaRotate(PTA *ptas, - l_float32 xc, - l_float32 yc, - l_float32 angle) -{ -l_int32 i, npts; -l_float32 x, y, xp, yp, sina, cosa; -PTA *ptad; - - PROCNAME("ptaRotate"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - - npts = ptaGetCount(ptas); - if ((ptad = ptaCreate(npts)) == NULL) - return (PTA *)ERROR_PTR("ptad not made", procName, NULL); - sina = sin(angle); - cosa = cos(angle); - for (i = 0; i < npts; i++) { - ptaGetPt(ptas, i, &x, &y); - xp = xc + (x - xc) * cosa - (y - yc) * sina; - yp = yc + (x - xc) * sina + (y - yc) * cosa; - ptaAddPt(ptad, xp, yp); - } - - return ptad; -} - - -/*-------------------------------------------------------------* - * Special coordinate transforms on boxa * - *-------------------------------------------------------------*/ -/*! - * \brief boxaTranslate() - * - * \param[in] boxas - * \param[in] transx x component of translation wrt. the origin - * \param[in] transy y component of translation wrt. the origin - * \return boxad translated boxas, or NULL on error - * - * Notes: - * (1) See createMatrix2dTranslate() for details of transform. - */ -BOXA * -boxaTranslate(BOXA *boxas, - l_float32 transx, - l_float32 transy) -{ -PTA *ptas, *ptad; -BOXA *boxad; - - PROCNAME("boxaTranslate"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - - ptas = boxaConvertToPta(boxas, 4); - ptad = ptaTranslate(ptas, transx, transy); - boxad = ptaConvertToBoxa(ptad, 4); - ptaDestroy(&ptas); - ptaDestroy(&ptad); - return boxad; -} - - -/*! - * \brief boxaScale() - * - * \param[in] boxas - * \param[in] scalex horizontal scale factor - * \param[in] scaley vertical scale factor - * \return boxad scaled boxas, or NULL on error - * - * Notes: - * (1) See createMatrix2dScale() for details of transform. - */ -BOXA * -boxaScale(BOXA *boxas, - l_float32 scalex, - l_float32 scaley) -{ -PTA *ptas, *ptad; -BOXA *boxad; - - PROCNAME("boxaScale"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - - ptas = boxaConvertToPta(boxas, 4); - ptad = ptaScale(ptas, scalex, scaley); - boxad = ptaConvertToBoxa(ptad, 4); - ptaDestroy(&ptas); - ptaDestroy(&ptad); - return boxad; -} - - -/*! - * \brief boxaRotate() - * - * \param[in] boxas - * \param[in] xc, yc location of center of rotation - * \param[in] angle rotation in radians; clockwise is positive - * \return boxad scaled boxas, or NULL on error - * - * Notes: - * (1) See createMatrix2dRotate() for details of transform. - */ -BOXA * -boxaRotate(BOXA *boxas, - l_float32 xc, - l_float32 yc, - l_float32 angle) -{ -PTA *ptas, *ptad; -BOXA *boxad; - - PROCNAME("boxaRotate"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - - ptas = boxaConvertToPta(boxas, 4); - ptad = ptaRotate(ptas, xc, yc, angle); - boxad = ptaConvertToBoxa(ptad, 4); - ptaDestroy(&ptas); - ptaDestroy(&ptad); - return boxad; -} - - -/*-------------------------------------------------------------* - * General affine coordinate transform * - *-------------------------------------------------------------*/ -/*! - * \brief ptaAffineTransform() - * - * \param[in] ptas for initial points - * \param[in] mat 3x3 transform matrix; canonical form - * \return ptad transformed points, or NULL on error - */ -PTA * -ptaAffineTransform(PTA *ptas, - l_float32 *mat) -{ -l_int32 i, npts; -l_float32 vecs[3], vecd[3]; -PTA *ptad; - - PROCNAME("ptaAffineTransform"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - if (!mat) - return (PTA *)ERROR_PTR("transform not defined", procName, NULL); - - vecs[2] = 1; - npts = ptaGetCount(ptas); - if ((ptad = ptaCreate(npts)) == NULL) - return (PTA *)ERROR_PTR("ptad not made", procName, NULL); - for (i = 0; i < npts; i++) { - ptaGetPt(ptas, i, &vecs[0], &vecs[1]); - l_productMatVec(mat, vecs, vecd, 3); - ptaAddPt(ptad, vecd[0], vecd[1]); - } - - return ptad; -} - - -/*! - * \brief boxaAffineTransform() - * - * \param[in] boxas - * \param[in] mat 3x3 transform matrix; canonical form - * \return boxad transformed boxas, or NULL on error - */ -BOXA * -boxaAffineTransform(BOXA *boxas, - l_float32 *mat) -{ -PTA *ptas, *ptad; -BOXA *boxad; - - PROCNAME("boxaAffineTransform"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (!mat) - return (BOXA *)ERROR_PTR("transform not defined", procName, NULL); - - ptas = boxaConvertToPta(boxas, 4); - ptad = ptaAffineTransform(ptas, mat); - boxad = ptaConvertToBoxa(ptad, 4); - ptaDestroy(&ptas); - ptaDestroy(&ptad); - return boxad; -} - - -/*-------------------------------------------------------------* - * Matrix operations * - *-------------------------------------------------------------*/ -/*! - * \brief l_productMatVec() - * - * \param[in] mat square matrix, as a 1-dimensional %size^2 array - * \param[in] vecs input column vector of length %size - * \param[in] vecd result column vector - * \param[in] size matrix is %size x %size; vectors are length %size - * \return 0 if OK, 1 on error - */ -l_ok -l_productMatVec(l_float32 *mat, - l_float32 *vecs, - l_float32 *vecd, - l_int32 size) -{ -l_int32 i, j; - - PROCNAME("l_productMatVec"); - - if (!mat) - return ERROR_INT("matrix not defined", procName, 1); - if (!vecs) - return ERROR_INT("input vector not defined", procName, 1); - if (!vecd) - return ERROR_INT("result vector not defined", procName, 1); - - for (i = 0; i < size; i++) { - vecd[i] = 0; - for (j = 0; j < size; j++) { - vecd[i] += mat[size * i + j] * vecs[j]; - } - } - return 0; -} - - -/*! - * \brief l_productMat2() - * - * \param[in] mat1 square matrix, as a 1-dimensional size^2 array - * \param[in] mat2 square matrix, as a 1-dimensional size^2 array - * \param[in] matd square matrix; product stored here - * \param[in] size of matrices - * \return 0 if OK, 1 on error - */ -l_ok -l_productMat2(l_float32 *mat1, - l_float32 *mat2, - l_float32 *matd, - l_int32 size) -{ -l_int32 i, j, k, index; - - PROCNAME("l_productMat2"); - - if (!mat1) - return ERROR_INT("matrix 1 not defined", procName, 1); - if (!mat2) - return ERROR_INT("matrix 2 not defined", procName, 1); - if (!matd) - return ERROR_INT("result matrix not defined", procName, 1); - - for (i = 0; i < size; i++) { - for (j = 0; j < size; j++) { - index = size * i + j; - matd[index] = 0; - for (k = 0; k < size; k++) - matd[index] += mat1[size * i + k] * mat2[size * k + j]; - } - } - return 0; -} - - -/*! - * \brief l_productMat3() - * - * \param[in] mat1 square matrix, as a 1-dimensional size^2 array - * \param[in] mat2 square matrix, as a 1-dimensional size^2 array - * \param[in] mat3 square matrix, as a 1-dimensional size^2 array - * \param[in] matd square matrix; product stored here - * \param[in] size of matrices - * \return 0 if OK, 1 on error - */ -l_ok -l_productMat3(l_float32 *mat1, - l_float32 *mat2, - l_float32 *mat3, - l_float32 *matd, - l_int32 size) -{ -l_float32 *matt; - - PROCNAME("l_productMat3"); - - if (!mat1) - return ERROR_INT("matrix 1 not defined", procName, 1); - if (!mat2) - return ERROR_INT("matrix 2 not defined", procName, 1); - if (!mat3) - return ERROR_INT("matrix 3 not defined", procName, 1); - if (!matd) - return ERROR_INT("result matrix not defined", procName, 1); - - if ((matt = (l_float32 *)LEPT_CALLOC((size_t)size * size, - sizeof(l_float32))) == NULL) - return ERROR_INT("matt not made", procName, 1); - l_productMat2(mat1, mat2, matt, size); - l_productMat2(matt, mat3, matd, size); - LEPT_FREE(matt); - return 0; -} - - -/*! - * \brief l_productMat4() - * - * \param[in] mat1 square matrix, as a 1-dimensional size^2 array - * \param[in] mat2 square matrix, as a 1-dimensional size^2 array - * \param[in] mat3 square matrix, as a 1-dimensional size^2 array - * \param[in] mat4 square matrix, as a 1-dimensional size^2 array - * \param[in] matd square matrix; product stored here - * \param[in] size of matrices - * \return 0 if OK, 1 on error - */ -l_ok -l_productMat4(l_float32 *mat1, - l_float32 *mat2, - l_float32 *mat3, - l_float32 *mat4, - l_float32 *matd, - l_int32 size) -{ -l_float32 *matt; - - PROCNAME("l_productMat4"); - - if (!mat1) - return ERROR_INT("matrix 1 not defined", procName, 1); - if (!mat2) - return ERROR_INT("matrix 2 not defined", procName, 1); - if (!mat3) - return ERROR_INT("matrix 3 not defined", procName, 1); - if (!matd) - return ERROR_INT("result matrix not defined", procName, 1); - - if ((matt = (l_float32 *)LEPT_CALLOC((size_t)size * size, - sizeof(l_float32))) == NULL) - return ERROR_INT("matt not made", procName, 1); - l_productMat3(mat1, mat2, mat3, matt, size); - l_productMat2(matt, mat4, matd, size); - LEPT_FREE(matt); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/allheaders.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/allheaders.h deleted file mode 100644 index 1c00620d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/allheaders.h +++ /dev/null @@ -1,2768 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_ALLHEADERS_H -#define LEPTONICA_ALLHEADERS_H - - -#define LIBLEPT_MAJOR_VERSION 1 -#define LIBLEPT_MINOR_VERSION 79 -#define LIBLEPT_PATCH_VERSION 0 - -#include "alltypes.h" - -#ifndef NO_PROTOS -/* - * These prototypes were autogen'd by xtractprotos, v. 1.5 - */ -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -LEPT_DLL extern PIX * pixCleanBackgroundToWhite ( PIX *pixs, PIX *pixim, PIX *pixg, l_float32 gamma, l_int32 blackval, l_int32 whiteval ); -LEPT_DLL extern PIX * pixBackgroundNormSimple ( PIX *pixs, PIX *pixim, PIX *pixg ); -LEPT_DLL extern PIX * pixBackgroundNorm ( PIX *pixs, PIX *pixim, PIX *pixg, l_int32 sx, l_int32 sy, l_int32 thresh, l_int32 mincount, l_int32 bgval, l_int32 smoothx, l_int32 smoothy ); -LEPT_DLL extern PIX * pixBackgroundNormMorph ( PIX *pixs, PIX *pixim, l_int32 reduction, l_int32 size, l_int32 bgval ); -LEPT_DLL extern l_ok pixBackgroundNormGrayArray ( PIX *pixs, PIX *pixim, l_int32 sx, l_int32 sy, l_int32 thresh, l_int32 mincount, l_int32 bgval, l_int32 smoothx, l_int32 smoothy, PIX **ppixd ); -LEPT_DLL extern l_ok pixBackgroundNormRGBArrays ( PIX *pixs, PIX *pixim, PIX *pixg, l_int32 sx, l_int32 sy, l_int32 thresh, l_int32 mincount, l_int32 bgval, l_int32 smoothx, l_int32 smoothy, PIX **ppixr, PIX **ppixg, PIX **ppixb ); -LEPT_DLL extern l_ok pixBackgroundNormGrayArrayMorph ( PIX *pixs, PIX *pixim, l_int32 reduction, l_int32 size, l_int32 bgval, PIX **ppixd ); -LEPT_DLL extern l_ok pixBackgroundNormRGBArraysMorph ( PIX *pixs, PIX *pixim, l_int32 reduction, l_int32 size, l_int32 bgval, PIX **ppixr, PIX **ppixg, PIX **ppixb ); -LEPT_DLL extern l_ok pixGetBackgroundGrayMap ( PIX *pixs, PIX *pixim, l_int32 sx, l_int32 sy, l_int32 thresh, l_int32 mincount, PIX **ppixd ); -LEPT_DLL extern l_ok pixGetBackgroundRGBMap ( PIX *pixs, PIX *pixim, PIX *pixg, l_int32 sx, l_int32 sy, l_int32 thresh, l_int32 mincount, PIX **ppixmr, PIX **ppixmg, PIX **ppixmb ); -LEPT_DLL extern l_ok pixGetBackgroundGrayMapMorph ( PIX *pixs, PIX *pixim, l_int32 reduction, l_int32 size, PIX **ppixm ); -LEPT_DLL extern l_ok pixGetBackgroundRGBMapMorph ( PIX *pixs, PIX *pixim, l_int32 reduction, l_int32 size, PIX **ppixmr, PIX **ppixmg, PIX **ppixmb ); -LEPT_DLL extern l_ok pixFillMapHoles ( PIX *pix, l_int32 nx, l_int32 ny, l_int32 filltype ); -LEPT_DLL extern PIX * pixExtendByReplication ( PIX *pixs, l_int32 addw, l_int32 addh ); -LEPT_DLL extern l_ok pixSmoothConnectedRegions ( PIX *pixs, PIX *pixm, l_int32 factor ); -LEPT_DLL extern PIX * pixGetInvBackgroundMap ( PIX *pixs, l_int32 bgval, l_int32 smoothx, l_int32 smoothy ); -LEPT_DLL extern PIX * pixApplyInvBackgroundGrayMap ( PIX *pixs, PIX *pixm, l_int32 sx, l_int32 sy ); -LEPT_DLL extern PIX * pixApplyInvBackgroundRGBMap ( PIX *pixs, PIX *pixmr, PIX *pixmg, PIX *pixmb, l_int32 sx, l_int32 sy ); -LEPT_DLL extern PIX * pixApplyVariableGrayMap ( PIX *pixs, PIX *pixg, l_int32 target ); -LEPT_DLL extern PIX * pixGlobalNormRGB ( PIX *pixd, PIX *pixs, l_int32 rval, l_int32 gval, l_int32 bval, l_int32 mapval ); -LEPT_DLL extern PIX * pixGlobalNormNoSatRGB ( PIX *pixd, PIX *pixs, l_int32 rval, l_int32 gval, l_int32 bval, l_int32 factor, l_float32 rank ); -LEPT_DLL extern l_ok pixThresholdSpreadNorm ( PIX *pixs, l_int32 filtertype, l_int32 edgethresh, l_int32 smoothx, l_int32 smoothy, l_float32 gamma, l_int32 minval, l_int32 maxval, l_int32 targetthresh, PIX **ppixth, PIX **ppixb, PIX **ppixd ); -LEPT_DLL extern PIX * pixBackgroundNormFlex ( PIX *pixs, l_int32 sx, l_int32 sy, l_int32 smoothx, l_int32 smoothy, l_int32 delta ); -LEPT_DLL extern PIX * pixContrastNorm ( PIX *pixd, PIX *pixs, l_int32 sx, l_int32 sy, l_int32 mindiff, l_int32 smoothx, l_int32 smoothy ); -LEPT_DLL extern l_ok pixMinMaxTiles ( PIX *pixs, l_int32 sx, l_int32 sy, l_int32 mindiff, l_int32 smoothx, l_int32 smoothy, PIX **ppixmin, PIX **ppixmax ); -LEPT_DLL extern l_ok pixSetLowContrast ( PIX *pixs1, PIX *pixs2, l_int32 mindiff ); -LEPT_DLL extern PIX * pixLinearTRCTiled ( PIX *pixd, PIX *pixs, l_int32 sx, l_int32 sy, PIX *pixmin, PIX *pixmax ); -LEPT_DLL extern PIX * pixAffineSampledPta ( PIX *pixs, PTA *ptad, PTA *ptas, l_int32 incolor ); -LEPT_DLL extern PIX * pixAffineSampled ( PIX *pixs, l_float32 *vc, l_int32 incolor ); -LEPT_DLL extern PIX * pixAffinePta ( PIX *pixs, PTA *ptad, PTA *ptas, l_int32 incolor ); -LEPT_DLL extern PIX * pixAffine ( PIX *pixs, l_float32 *vc, l_int32 incolor ); -LEPT_DLL extern PIX * pixAffinePtaColor ( PIX *pixs, PTA *ptad, PTA *ptas, l_uint32 colorval ); -LEPT_DLL extern PIX * pixAffineColor ( PIX *pixs, l_float32 *vc, l_uint32 colorval ); -LEPT_DLL extern PIX * pixAffinePtaGray ( PIX *pixs, PTA *ptad, PTA *ptas, l_uint8 grayval ); -LEPT_DLL extern PIX * pixAffineGray ( PIX *pixs, l_float32 *vc, l_uint8 grayval ); -LEPT_DLL extern PIX * pixAffinePtaWithAlpha ( PIX *pixs, PTA *ptad, PTA *ptas, PIX *pixg, l_float32 fract, l_int32 border ); -LEPT_DLL extern l_ok getAffineXformCoeffs ( PTA *ptas, PTA *ptad, l_float32 **pvc ); -LEPT_DLL extern l_ok affineInvertXform ( l_float32 *vc, l_float32 **pvci ); -LEPT_DLL extern l_ok affineXformSampledPt ( l_float32 *vc, l_int32 x, l_int32 y, l_int32 *pxp, l_int32 *pyp ); -LEPT_DLL extern l_ok affineXformPt ( l_float32 *vc, l_int32 x, l_int32 y, l_float32 *pxp, l_float32 *pyp ); -LEPT_DLL extern l_ok linearInterpolatePixelColor ( l_uint32 *datas, l_int32 wpls, l_int32 w, l_int32 h, l_float32 x, l_float32 y, l_uint32 colorval, l_uint32 *pval ); -LEPT_DLL extern l_ok linearInterpolatePixelGray ( l_uint32 *datas, l_int32 wpls, l_int32 w, l_int32 h, l_float32 x, l_float32 y, l_int32 grayval, l_int32 *pval ); -LEPT_DLL extern l_int32 gaussjordan ( l_float32 **a, l_float32 *b, l_int32 n ); -LEPT_DLL extern PIX * pixAffineSequential ( PIX *pixs, PTA *ptad, PTA *ptas, l_int32 bw, l_int32 bh ); -LEPT_DLL extern l_float32 * createMatrix2dTranslate ( l_float32 transx, l_float32 transy ); -LEPT_DLL extern l_float32 * createMatrix2dScale ( l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern l_float32 * createMatrix2dRotate ( l_float32 xc, l_float32 yc, l_float32 angle ); -LEPT_DLL extern PTA * ptaTranslate ( PTA *ptas, l_float32 transx, l_float32 transy ); -LEPT_DLL extern PTA * ptaScale ( PTA *ptas, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern PTA * ptaRotate ( PTA *ptas, l_float32 xc, l_float32 yc, l_float32 angle ); -LEPT_DLL extern BOXA * boxaTranslate ( BOXA *boxas, l_float32 transx, l_float32 transy ); -LEPT_DLL extern BOXA * boxaScale ( BOXA *boxas, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern BOXA * boxaRotate ( BOXA *boxas, l_float32 xc, l_float32 yc, l_float32 angle ); -LEPT_DLL extern PTA * ptaAffineTransform ( PTA *ptas, l_float32 *mat ); -LEPT_DLL extern BOXA * boxaAffineTransform ( BOXA *boxas, l_float32 *mat ); -LEPT_DLL extern l_ok l_productMatVec ( l_float32 *mat, l_float32 *vecs, l_float32 *vecd, l_int32 size ); -LEPT_DLL extern l_ok l_productMat2 ( l_float32 *mat1, l_float32 *mat2, l_float32 *matd, l_int32 size ); -LEPT_DLL extern l_ok l_productMat3 ( l_float32 *mat1, l_float32 *mat2, l_float32 *mat3, l_float32 *matd, l_int32 size ); -LEPT_DLL extern l_ok l_productMat4 ( l_float32 *mat1, l_float32 *mat2, l_float32 *mat3, l_float32 *mat4, l_float32 *matd, l_int32 size ); -LEPT_DLL extern l_int32 l_getDataBit ( const void *line, l_int32 n ); -LEPT_DLL extern void l_setDataBit ( void *line, l_int32 n ); -LEPT_DLL extern void l_clearDataBit ( void *line, l_int32 n ); -LEPT_DLL extern void l_setDataBitVal ( void *line, l_int32 n, l_int32 val ); -LEPT_DLL extern l_int32 l_getDataDibit ( const void *line, l_int32 n ); -LEPT_DLL extern void l_setDataDibit ( void *line, l_int32 n, l_int32 val ); -LEPT_DLL extern void l_clearDataDibit ( void *line, l_int32 n ); -LEPT_DLL extern l_int32 l_getDataQbit ( const void *line, l_int32 n ); -LEPT_DLL extern void l_setDataQbit ( void *line, l_int32 n, l_int32 val ); -LEPT_DLL extern void l_clearDataQbit ( void *line, l_int32 n ); -LEPT_DLL extern l_int32 l_getDataByte ( const void *line, l_int32 n ); -LEPT_DLL extern void l_setDataByte ( void *line, l_int32 n, l_int32 val ); -LEPT_DLL extern l_int32 l_getDataTwoBytes ( const void *line, l_int32 n ); -LEPT_DLL extern void l_setDataTwoBytes ( void *line, l_int32 n, l_int32 val ); -LEPT_DLL extern l_int32 l_getDataFourBytes ( const void *line, l_int32 n ); -LEPT_DLL extern void l_setDataFourBytes ( void *line, l_int32 n, l_int32 val ); -LEPT_DLL extern char * barcodeDispatchDecoder ( char *barstr, l_int32 format, l_int32 debugflag ); -LEPT_DLL extern l_int32 barcodeFormatIsSupported ( l_int32 format ); -LEPT_DLL extern NUMA * pixFindBaselines ( PIX *pixs, PTA **ppta, PIXA *pixadb ); -LEPT_DLL extern PIX * pixDeskewLocal ( PIX *pixs, l_int32 nslices, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta ); -LEPT_DLL extern l_ok pixGetLocalSkewTransform ( PIX *pixs, l_int32 nslices, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta, PTA **pptas, PTA **pptad ); -LEPT_DLL extern NUMA * pixGetLocalSkewAngles ( PIX *pixs, l_int32 nslices, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta, l_float32 *pa, l_float32 *pb, l_int32 debug ); -LEPT_DLL extern L_BBUFFER * bbufferCreate ( const l_uint8 *indata, l_int32 nalloc ); -LEPT_DLL extern void bbufferDestroy ( L_BBUFFER **pbb ); -LEPT_DLL extern l_uint8 * bbufferDestroyAndSaveData ( L_BBUFFER **pbb, size_t *pnbytes ); -LEPT_DLL extern l_ok bbufferRead ( L_BBUFFER *bb, l_uint8 *src, l_int32 nbytes ); -LEPT_DLL extern l_ok bbufferReadStream ( L_BBUFFER *bb, FILE *fp, l_int32 nbytes ); -LEPT_DLL extern l_ok bbufferExtendArray ( L_BBUFFER *bb, l_int32 nbytes ); -LEPT_DLL extern l_ok bbufferWrite ( L_BBUFFER *bb, l_uint8 *dest, size_t nbytes, size_t *pnout ); -LEPT_DLL extern l_ok bbufferWriteStream ( L_BBUFFER *bb, FILE *fp, size_t nbytes, size_t *pnout ); -LEPT_DLL extern PIX * pixBilateral ( PIX *pixs, l_float32 spatial_stdev, l_float32 range_stdev, l_int32 ncomps, l_int32 reduction ); -LEPT_DLL extern PIX * pixBilateralGray ( PIX *pixs, l_float32 spatial_stdev, l_float32 range_stdev, l_int32 ncomps, l_int32 reduction ); -LEPT_DLL extern PIX * pixBilateralExact ( PIX *pixs, L_KERNEL *spatial_kel, L_KERNEL *range_kel ); -LEPT_DLL extern PIX * pixBilateralGrayExact ( PIX *pixs, L_KERNEL *spatial_kel, L_KERNEL *range_kel ); -LEPT_DLL extern PIX* pixBlockBilateralExact ( PIX *pixs, l_float32 spatial_stdev, l_float32 range_stdev ); -LEPT_DLL extern L_KERNEL * makeRangeKernel ( l_float32 range_stdev ); -LEPT_DLL extern PIX * pixBilinearSampledPta ( PIX *pixs, PTA *ptad, PTA *ptas, l_int32 incolor ); -LEPT_DLL extern PIX * pixBilinearSampled ( PIX *pixs, l_float32 *vc, l_int32 incolor ); -LEPT_DLL extern PIX * pixBilinearPta ( PIX *pixs, PTA *ptad, PTA *ptas, l_int32 incolor ); -LEPT_DLL extern PIX * pixBilinear ( PIX *pixs, l_float32 *vc, l_int32 incolor ); -LEPT_DLL extern PIX * pixBilinearPtaColor ( PIX *pixs, PTA *ptad, PTA *ptas, l_uint32 colorval ); -LEPT_DLL extern PIX * pixBilinearColor ( PIX *pixs, l_float32 *vc, l_uint32 colorval ); -LEPT_DLL extern PIX * pixBilinearPtaGray ( PIX *pixs, PTA *ptad, PTA *ptas, l_uint8 grayval ); -LEPT_DLL extern PIX * pixBilinearGray ( PIX *pixs, l_float32 *vc, l_uint8 grayval ); -LEPT_DLL extern PIX * pixBilinearPtaWithAlpha ( PIX *pixs, PTA *ptad, PTA *ptas, PIX *pixg, l_float32 fract, l_int32 border ); -LEPT_DLL extern l_ok getBilinearXformCoeffs ( PTA *ptas, PTA *ptad, l_float32 **pvc ); -LEPT_DLL extern l_ok bilinearXformSampledPt ( l_float32 *vc, l_int32 x, l_int32 y, l_int32 *pxp, l_int32 *pyp ); -LEPT_DLL extern l_ok bilinearXformPt ( l_float32 *vc, l_int32 x, l_int32 y, l_float32 *pxp, l_float32 *pyp ); -LEPT_DLL extern l_ok pixOtsuAdaptiveThreshold ( PIX *pixs, l_int32 sx, l_int32 sy, l_int32 smoothx, l_int32 smoothy, l_float32 scorefract, PIX **ppixth, PIX **ppixd ); -LEPT_DLL extern PIX * pixOtsuThreshOnBackgroundNorm ( PIX *pixs, PIX *pixim, l_int32 sx, l_int32 sy, l_int32 thresh, l_int32 mincount, l_int32 bgval, l_int32 smoothx, l_int32 smoothy, l_float32 scorefract, l_int32 *pthresh ); -LEPT_DLL extern PIX * pixMaskedThreshOnBackgroundNorm ( PIX *pixs, PIX *pixim, l_int32 sx, l_int32 sy, l_int32 thresh, l_int32 mincount, l_int32 smoothx, l_int32 smoothy, l_float32 scorefract, l_int32 *pthresh ); -LEPT_DLL extern l_ok pixSauvolaBinarizeTiled ( PIX *pixs, l_int32 whsize, l_float32 factor, l_int32 nx, l_int32 ny, PIX **ppixth, PIX **ppixd ); -LEPT_DLL extern l_ok pixSauvolaBinarize ( PIX *pixs, l_int32 whsize, l_float32 factor, l_int32 addborder, PIX **ppixm, PIX **ppixsd, PIX **ppixth, PIX **ppixd ); -LEPT_DLL extern l_ok pixThresholdByConnComp ( PIX *pixs, PIX *pixm, l_int32 start, l_int32 end, l_int32 incr, l_float32 thresh48, l_float32 threshdiff, l_int32 *pglobthresh, PIX **ppixd, l_int32 debugflag ); -LEPT_DLL extern l_ok pixThresholdByHisto ( PIX *pixs, l_int32 factor, l_int32 halfw, l_float32 delta, l_int32 *pthresh, PIX **ppixd, PIX **ppixhisto ); -LEPT_DLL extern PIX * pixExpandBinaryReplicate ( PIX *pixs, l_int32 xfact, l_int32 yfact ); -LEPT_DLL extern PIX * pixExpandBinaryPower2 ( PIX *pixs, l_int32 factor ); -LEPT_DLL extern PIX * pixReduceBinary2 ( PIX *pixs, l_uint8 *intab ); -LEPT_DLL extern PIX * pixReduceRankBinaryCascade ( PIX *pixs, l_int32 level1, l_int32 level2, l_int32 level3, l_int32 level4 ); -LEPT_DLL extern PIX * pixReduceRankBinary2 ( PIX *pixs, l_int32 level, l_uint8 *intab ); -LEPT_DLL extern l_uint8 * makeSubsampleTab2x ( void ); -LEPT_DLL extern PIX * pixBlend ( PIX *pixs1, PIX *pixs2, l_int32 x, l_int32 y, l_float32 fract ); -LEPT_DLL extern PIX * pixBlendMask ( PIX *pixd, PIX *pixs1, PIX *pixs2, l_int32 x, l_int32 y, l_float32 fract, l_int32 type ); -LEPT_DLL extern PIX * pixBlendGray ( PIX *pixd, PIX *pixs1, PIX *pixs2, l_int32 x, l_int32 y, l_float32 fract, l_int32 type, l_int32 transparent, l_uint32 transpix ); -LEPT_DLL extern PIX * pixBlendGrayInverse ( PIX *pixd, PIX *pixs1, PIX *pixs2, l_int32 x, l_int32 y, l_float32 fract ); -LEPT_DLL extern PIX * pixBlendColor ( PIX *pixd, PIX *pixs1, PIX *pixs2, l_int32 x, l_int32 y, l_float32 fract, l_int32 transparent, l_uint32 transpix ); -LEPT_DLL extern PIX * pixBlendColorByChannel ( PIX *pixd, PIX *pixs1, PIX *pixs2, l_int32 x, l_int32 y, l_float32 rfract, l_float32 gfract, l_float32 bfract, l_int32 transparent, l_uint32 transpix ); -LEPT_DLL extern PIX * pixBlendGrayAdapt ( PIX *pixd, PIX *pixs1, PIX *pixs2, l_int32 x, l_int32 y, l_float32 fract, l_int32 shift ); -LEPT_DLL extern PIX * pixFadeWithGray ( PIX *pixs, PIX *pixb, l_float32 factor, l_int32 type ); -LEPT_DLL extern PIX * pixBlendHardLight ( PIX *pixd, PIX *pixs1, PIX *pixs2, l_int32 x, l_int32 y, l_float32 fract ); -LEPT_DLL extern l_ok pixBlendCmap ( PIX *pixs, PIX *pixb, l_int32 x, l_int32 y, l_int32 sindex ); -LEPT_DLL extern PIX * pixBlendWithGrayMask ( PIX *pixs1, PIX *pixs2, PIX *pixg, l_int32 x, l_int32 y ); -LEPT_DLL extern PIX * pixBlendBackgroundToColor ( PIX *pixd, PIX *pixs, BOX *box, l_uint32 color, l_float32 gamma, l_int32 minval, l_int32 maxval ); -LEPT_DLL extern PIX * pixMultiplyByColor ( PIX *pixd, PIX *pixs, BOX *box, l_uint32 color ); -LEPT_DLL extern PIX * pixAlphaBlendUniform ( PIX *pixs, l_uint32 color ); -LEPT_DLL extern PIX * pixAddAlphaToBlend ( PIX *pixs, l_float32 fract, l_int32 invert ); -LEPT_DLL extern PIX * pixSetAlphaOverWhite ( PIX *pixs ); -LEPT_DLL extern l_ok pixLinearEdgeFade ( PIX *pixs, l_int32 dir, l_int32 fadeto, l_float32 distfract, l_float32 maxfade ); -LEPT_DLL extern L_BMF * bmfCreate ( const char *dir, l_int32 fontsize ); -LEPT_DLL extern void bmfDestroy ( L_BMF **pbmf ); -LEPT_DLL extern PIX * bmfGetPix ( L_BMF *bmf, char chr ); -LEPT_DLL extern l_ok bmfGetWidth ( L_BMF *bmf, char chr, l_int32 *pw ); -LEPT_DLL extern l_ok bmfGetBaseline ( L_BMF *bmf, char chr, l_int32 *pbaseline ); -LEPT_DLL extern PIXA * pixaGetFont ( const char *dir, l_int32 fontsize, l_int32 *pbl0, l_int32 *pbl1, l_int32 *pbl2 ); -LEPT_DLL extern l_ok pixaSaveFont ( const char *indir, const char *outdir, l_int32 fontsize ); -LEPT_DLL extern PIX * pixReadStreamBmp ( FILE *fp ); -LEPT_DLL extern PIX * pixReadMemBmp ( const l_uint8 *cdata, size_t size ); -LEPT_DLL extern l_ok pixWriteStreamBmp ( FILE *fp, PIX *pix ); -LEPT_DLL extern l_ok pixWriteMemBmp ( l_uint8 **pfdata, size_t *pfsize, PIX *pixs ); -LEPT_DLL extern PIXA * l_bootnum_gen1 ( void ); -LEPT_DLL extern PIXA * l_bootnum_gen2 ( void ); -LEPT_DLL extern PIXA * l_bootnum_gen3 ( void ); -LEPT_DLL extern PIXA * l_bootnum_gen4 ( l_int32 nsamp ); -LEPT_DLL extern BOX * boxCreate ( l_int32 x, l_int32 y, l_int32 w, l_int32 h ); -LEPT_DLL extern BOX * boxCreateValid ( l_int32 x, l_int32 y, l_int32 w, l_int32 h ); -LEPT_DLL extern BOX * boxCopy ( BOX *box ); -LEPT_DLL extern BOX * boxClone ( BOX *box ); -LEPT_DLL extern void boxDestroy ( BOX **pbox ); -LEPT_DLL extern l_ok boxGetGeometry ( BOX *box, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph ); -LEPT_DLL extern l_ok boxSetGeometry ( BOX *box, l_int32 x, l_int32 y, l_int32 w, l_int32 h ); -LEPT_DLL extern l_ok boxGetSideLocations ( BOX *box, l_int32 *pl, l_int32 *pr, l_int32 *pt, l_int32 *pb ); -LEPT_DLL extern l_ok boxSetSideLocations ( BOX *box, l_int32 l, l_int32 r, l_int32 t, l_int32 b ); -LEPT_DLL extern l_int32 boxGetRefcount ( BOX *box ); -LEPT_DLL extern l_ok boxChangeRefcount ( BOX *box, l_int32 delta ); -LEPT_DLL extern l_ok boxIsValid ( BOX *box, l_int32 *pvalid ); -LEPT_DLL extern BOXA * boxaCreate ( l_int32 n ); -LEPT_DLL extern BOXA * boxaCopy ( BOXA *boxa, l_int32 copyflag ); -LEPT_DLL extern void boxaDestroy ( BOXA **pboxa ); -LEPT_DLL extern l_ok boxaAddBox ( BOXA *boxa, BOX *box, l_int32 copyflag ); -LEPT_DLL extern l_ok boxaExtendArray ( BOXA *boxa ); -LEPT_DLL extern l_ok boxaExtendArrayToSize ( BOXA *boxa, l_int32 size ); -LEPT_DLL extern l_int32 boxaGetCount ( BOXA *boxa ); -LEPT_DLL extern l_int32 boxaGetValidCount ( BOXA *boxa ); -LEPT_DLL extern BOX * boxaGetBox ( BOXA *boxa, l_int32 index, l_int32 accessflag ); -LEPT_DLL extern BOX * boxaGetValidBox ( BOXA *boxa, l_int32 index, l_int32 accessflag ); -LEPT_DLL extern NUMA * boxaFindInvalidBoxes ( BOXA *boxa ); -LEPT_DLL extern l_ok boxaGetBoxGeometry ( BOXA *boxa, l_int32 index, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph ); -LEPT_DLL extern l_ok boxaIsFull ( BOXA *boxa, l_int32 *pfull ); -LEPT_DLL extern l_ok boxaReplaceBox ( BOXA *boxa, l_int32 index, BOX *box ); -LEPT_DLL extern l_ok boxaInsertBox ( BOXA *boxa, l_int32 index, BOX *box ); -LEPT_DLL extern l_ok boxaRemoveBox ( BOXA *boxa, l_int32 index ); -LEPT_DLL extern l_ok boxaRemoveBoxAndSave ( BOXA *boxa, l_int32 index, BOX **pbox ); -LEPT_DLL extern BOXA * boxaSaveValid ( BOXA *boxas, l_int32 copyflag ); -LEPT_DLL extern l_ok boxaInitFull ( BOXA *boxa, BOX *box ); -LEPT_DLL extern l_ok boxaClear ( BOXA *boxa ); -LEPT_DLL extern BOXAA * boxaaCreate ( l_int32 n ); -LEPT_DLL extern BOXAA * boxaaCopy ( BOXAA *baas, l_int32 copyflag ); -LEPT_DLL extern void boxaaDestroy ( BOXAA **pbaa ); -LEPT_DLL extern l_ok boxaaAddBoxa ( BOXAA *baa, BOXA *ba, l_int32 copyflag ); -LEPT_DLL extern l_ok boxaaExtendArray ( BOXAA *baa ); -LEPT_DLL extern l_ok boxaaExtendArrayToSize ( BOXAA *baa, l_int32 size ); -LEPT_DLL extern l_int32 boxaaGetCount ( BOXAA *baa ); -LEPT_DLL extern l_int32 boxaaGetBoxCount ( BOXAA *baa ); -LEPT_DLL extern BOXA * boxaaGetBoxa ( BOXAA *baa, l_int32 index, l_int32 accessflag ); -LEPT_DLL extern BOX * boxaaGetBox ( BOXAA *baa, l_int32 iboxa, l_int32 ibox, l_int32 accessflag ); -LEPT_DLL extern l_ok boxaaInitFull ( BOXAA *baa, BOXA *boxa ); -LEPT_DLL extern l_ok boxaaExtendWithInit ( BOXAA *baa, l_int32 maxindex, BOXA *boxa ); -LEPT_DLL extern l_ok boxaaReplaceBoxa ( BOXAA *baa, l_int32 index, BOXA *boxa ); -LEPT_DLL extern l_ok boxaaInsertBoxa ( BOXAA *baa, l_int32 index, BOXA *boxa ); -LEPT_DLL extern l_ok boxaaRemoveBoxa ( BOXAA *baa, l_int32 index ); -LEPT_DLL extern l_ok boxaaAddBox ( BOXAA *baa, l_int32 index, BOX *box, l_int32 accessflag ); -LEPT_DLL extern BOXAA * boxaaReadFromFiles ( const char *dirname, const char *substr, l_int32 first, l_int32 nfiles ); -LEPT_DLL extern BOXAA * boxaaRead ( const char *filename ); -LEPT_DLL extern BOXAA * boxaaReadStream ( FILE *fp ); -LEPT_DLL extern BOXAA * boxaaReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok boxaaWrite ( const char *filename, BOXAA *baa ); -LEPT_DLL extern l_ok boxaaWriteStream ( FILE *fp, BOXAA *baa ); -LEPT_DLL extern l_ok boxaaWriteMem ( l_uint8 **pdata, size_t *psize, BOXAA *baa ); -LEPT_DLL extern BOXA * boxaRead ( const char *filename ); -LEPT_DLL extern BOXA * boxaReadStream ( FILE *fp ); -LEPT_DLL extern BOXA * boxaReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok boxaWriteDebug ( const char *filename, BOXA *boxa ); -LEPT_DLL extern l_ok boxaWrite ( const char *filename, BOXA *boxa ); -LEPT_DLL extern l_ok boxaWriteStream ( FILE *fp, BOXA *boxa ); -LEPT_DLL extern l_ok boxaWriteStderr ( BOXA *boxa ); -LEPT_DLL extern l_ok boxaWriteMem ( l_uint8 **pdata, size_t *psize, BOXA *boxa ); -LEPT_DLL extern l_ok boxPrintStreamInfo ( FILE *fp, BOX *box ); -LEPT_DLL extern l_ok boxContains ( BOX *box1, BOX *box2, l_int32 *presult ); -LEPT_DLL extern l_ok boxIntersects ( BOX *box1, BOX *box2, l_int32 *presult ); -LEPT_DLL extern BOXA * boxaContainedInBox ( BOXA *boxas, BOX *box ); -LEPT_DLL extern l_ok boxaContainedInBoxCount ( BOXA *boxa, BOX *box, l_int32 *pcount ); -LEPT_DLL extern l_ok boxaContainedInBoxa ( BOXA *boxa1, BOXA *boxa2, l_int32 *pcontained ); -LEPT_DLL extern BOXA * boxaIntersectsBox ( BOXA *boxas, BOX *box ); -LEPT_DLL extern l_ok boxaIntersectsBoxCount ( BOXA *boxa, BOX *box, l_int32 *pcount ); -LEPT_DLL extern BOXA * boxaClipToBox ( BOXA *boxas, BOX *box ); -LEPT_DLL extern BOXA * boxaCombineOverlaps ( BOXA *boxas, PIXA *pixadb ); -LEPT_DLL extern l_ok boxaCombineOverlapsInPair ( BOXA *boxas1, BOXA *boxas2, BOXA **pboxad1, BOXA **pboxad2, PIXA *pixadb ); -LEPT_DLL extern BOX * boxOverlapRegion ( BOX *box1, BOX *box2 ); -LEPT_DLL extern BOX * boxBoundingRegion ( BOX *box1, BOX *box2 ); -LEPT_DLL extern l_ok boxOverlapFraction ( BOX *box1, BOX *box2, l_float32 *pfract ); -LEPT_DLL extern l_ok boxOverlapArea ( BOX *box1, BOX *box2, l_int32 *parea ); -LEPT_DLL extern BOXA * boxaHandleOverlaps ( BOXA *boxas, l_int32 op, l_int32 range, l_float32 min_overlap, l_float32 max_ratio, NUMA **pnamap ); -LEPT_DLL extern l_ok boxOverlapDistance ( BOX *box1, BOX *box2, l_int32 *ph_ovl, l_int32 *pv_ovl ); -LEPT_DLL extern l_ok boxSeparationDistance ( BOX *box1, BOX *box2, l_int32 *ph_sep, l_int32 *pv_sep ); -LEPT_DLL extern l_ok boxCompareSize ( BOX *box1, BOX *box2, l_int32 type, l_int32 *prel ); -LEPT_DLL extern l_ok boxContainsPt ( BOX *box, l_float32 x, l_float32 y, l_int32 *pcontains ); -LEPT_DLL extern BOX * boxaGetNearestToPt ( BOXA *boxa, l_int32 x, l_int32 y ); -LEPT_DLL extern BOX * boxaGetNearestToLine ( BOXA *boxa, l_int32 x, l_int32 y ); -LEPT_DLL extern l_ok boxaFindNearestBoxes ( BOXA *boxa, l_int32 dist_select, l_int32 range, NUMAA **pnaaindex, NUMAA **pnaadist ); -LEPT_DLL extern l_ok boxaGetNearestByDirection ( BOXA *boxa, l_int32 i, l_int32 dir, l_int32 dist_select, l_int32 range, l_int32 *pindex, l_int32 *pdist ); -LEPT_DLL extern l_ok boxGetCenter ( BOX *box, l_float32 *pcx, l_float32 *pcy ); -LEPT_DLL extern l_ok boxIntersectByLine ( BOX *box, l_int32 x, l_int32 y, l_float32 slope, l_int32 *px1, l_int32 *py1, l_int32 *px2, l_int32 *py2, l_int32 *pn ); -LEPT_DLL extern BOX * boxClipToRectangle ( BOX *box, l_int32 wi, l_int32 hi ); -LEPT_DLL extern l_ok boxClipToRectangleParams ( BOX *box, l_int32 w, l_int32 h, l_int32 *pxstart, l_int32 *pystart, l_int32 *pxend, l_int32 *pyend, l_int32 *pbw, l_int32 *pbh ); -LEPT_DLL extern BOX * boxRelocateOneSide ( BOX *boxd, BOX *boxs, l_int32 loc, l_int32 sideflag ); -LEPT_DLL extern BOXA * boxaAdjustSides ( BOXA *boxas, l_int32 delleft, l_int32 delright, l_int32 deltop, l_int32 delbot ); -LEPT_DLL extern l_ok boxaAdjustBoxSides ( BOXA *boxa, l_int32 index, l_int32 delleft, l_int32 delright, l_int32 deltop, l_int32 delbot ); -LEPT_DLL extern BOX * boxAdjustSides ( BOX *boxd, BOX *boxs, l_int32 delleft, l_int32 delright, l_int32 deltop, l_int32 delbot ); -LEPT_DLL extern BOXA * boxaSetSide ( BOXA *boxad, BOXA *boxas, l_int32 side, l_int32 val, l_int32 thresh ); -LEPT_DLL extern l_ok boxSetSide ( BOX *boxs, l_int32 side, l_int32 val, l_int32 thresh ); -LEPT_DLL extern BOXA * boxaAdjustWidthToTarget ( BOXA *boxad, BOXA *boxas, l_int32 sides, l_int32 target, l_int32 thresh ); -LEPT_DLL extern BOXA * boxaAdjustHeightToTarget ( BOXA *boxad, BOXA *boxas, l_int32 sides, l_int32 target, l_int32 thresh ); -LEPT_DLL extern l_ok boxEqual ( BOX *box1, BOX *box2, l_int32 *psame ); -LEPT_DLL extern l_ok boxaEqual ( BOXA *boxa1, BOXA *boxa2, l_int32 maxdist, NUMA **pnaindex, l_int32 *psame ); -LEPT_DLL extern l_ok boxSimilar ( BOX *box1, BOX *box2, l_int32 leftdiff, l_int32 rightdiff, l_int32 topdiff, l_int32 botdiff, l_int32 *psimilar ); -LEPT_DLL extern l_ok boxaSimilar ( BOXA *boxa1, BOXA *boxa2, l_int32 leftdiff, l_int32 rightdiff, l_int32 topdiff, l_int32 botdiff, l_int32 debug, l_int32 *psimilar, NUMA **pnasim ); -LEPT_DLL extern l_ok boxaJoin ( BOXA *boxad, BOXA *boxas, l_int32 istart, l_int32 iend ); -LEPT_DLL extern l_ok boxaaJoin ( BOXAA *baad, BOXAA *baas, l_int32 istart, l_int32 iend ); -LEPT_DLL extern l_ok boxaSplitEvenOdd ( BOXA *boxa, l_int32 fillflag, BOXA **pboxae, BOXA **pboxao ); -LEPT_DLL extern BOXA * boxaMergeEvenOdd ( BOXA *boxae, BOXA *boxao, l_int32 fillflag ); -LEPT_DLL extern BOXA * boxaTransform ( BOXA *boxas, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern BOX * boxTransform ( BOX *box, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern BOXA * boxaTransformOrdered ( BOXA *boxas, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley, l_int32 xcen, l_int32 ycen, l_float32 angle, l_int32 order ); -LEPT_DLL extern BOX * boxTransformOrdered ( BOX *boxs, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley, l_int32 xcen, l_int32 ycen, l_float32 angle, l_int32 order ); -LEPT_DLL extern BOXA * boxaRotateOrth ( BOXA *boxas, l_int32 w, l_int32 h, l_int32 rotation ); -LEPT_DLL extern BOX * boxRotateOrth ( BOX *box, l_int32 w, l_int32 h, l_int32 rotation ); -LEPT_DLL extern BOXA * boxaShiftWithPta ( BOXA *boxas, PTA *pta, l_int32 dir ); -LEPT_DLL extern BOXA * boxaSort ( BOXA *boxas, l_int32 sorttype, l_int32 sortorder, NUMA **pnaindex ); -LEPT_DLL extern BOXA * boxaBinSort ( BOXA *boxas, l_int32 sorttype, l_int32 sortorder, NUMA **pnaindex ); -LEPT_DLL extern BOXA * boxaSortByIndex ( BOXA *boxas, NUMA *naindex ); -LEPT_DLL extern BOXAA * boxaSort2d ( BOXA *boxas, NUMAA **pnaad, l_int32 delta1, l_int32 delta2, l_int32 minh1 ); -LEPT_DLL extern BOXAA * boxaSort2dByIndex ( BOXA *boxas, NUMAA *naa ); -LEPT_DLL extern l_ok boxaExtractAsNuma ( BOXA *boxa, NUMA **pnal, NUMA **pnat, NUMA **pnar, NUMA **pnab, NUMA **pnaw, NUMA **pnah, l_int32 keepinvalid ); -LEPT_DLL extern l_ok boxaExtractAsPta ( BOXA *boxa, PTA **pptal, PTA **pptat, PTA **pptar, PTA **pptab, PTA **pptaw, PTA **pptah, l_int32 keepinvalid ); -LEPT_DLL extern PTA * boxaExtractCorners ( BOXA *boxa, l_int32 loc ); -LEPT_DLL extern l_ok boxaGetRankVals ( BOXA *boxa, l_float32 fract, l_int32 *px, l_int32 *py, l_int32 *pr, l_int32 *pb, l_int32 *pw, l_int32 *ph ); -LEPT_DLL extern l_ok boxaGetMedianVals ( BOXA *boxa, l_int32 *px, l_int32 *py, l_int32 *pr, l_int32 *pb, l_int32 *pw, l_int32 *ph ); -LEPT_DLL extern l_ok boxaGetAverageSize ( BOXA *boxa, l_float32 *pw, l_float32 *ph ); -LEPT_DLL extern l_ok boxaaGetExtent ( BOXAA *baa, l_int32 *pw, l_int32 *ph, BOX **pbox, BOXA **pboxa ); -LEPT_DLL extern BOXA * boxaaFlattenToBoxa ( BOXAA *baa, NUMA **pnaindex, l_int32 copyflag ); -LEPT_DLL extern BOXA * boxaaFlattenAligned ( BOXAA *baa, l_int32 num, BOX *fillerbox, l_int32 copyflag ); -LEPT_DLL extern BOXAA * boxaEncapsulateAligned ( BOXA *boxa, l_int32 num, l_int32 copyflag ); -LEPT_DLL extern BOXAA * boxaaTranspose ( BOXAA *baas ); -LEPT_DLL extern l_ok boxaaAlignBox ( BOXAA *baa, BOX *box, l_int32 delta, l_int32 *pindex ); -LEPT_DLL extern PIX * pixMaskConnComp ( PIX *pixs, l_int32 connectivity, BOXA **pboxa ); -LEPT_DLL extern PIX * pixMaskBoxa ( PIX *pixd, PIX *pixs, BOXA *boxa, l_int32 op ); -LEPT_DLL extern PIX * pixPaintBoxa ( PIX *pixs, BOXA *boxa, l_uint32 val ); -LEPT_DLL extern PIX * pixSetBlackOrWhiteBoxa ( PIX *pixs, BOXA *boxa, l_int32 op ); -LEPT_DLL extern PIX * pixPaintBoxaRandom ( PIX *pixs, BOXA *boxa ); -LEPT_DLL extern PIX * pixBlendBoxaRandom ( PIX *pixs, BOXA *boxa, l_float32 fract ); -LEPT_DLL extern PIX * pixDrawBoxa ( PIX *pixs, BOXA *boxa, l_int32 width, l_uint32 val ); -LEPT_DLL extern PIX * pixDrawBoxaRandom ( PIX *pixs, BOXA *boxa, l_int32 width ); -LEPT_DLL extern PIX * boxaaDisplay ( PIX *pixs, BOXAA *baa, l_int32 linewba, l_int32 linewb, l_uint32 colorba, l_uint32 colorb, l_int32 w, l_int32 h ); -LEPT_DLL extern PIXA * pixaDisplayBoxaa ( PIXA *pixas, BOXAA *baa, l_int32 colorflag, l_int32 width ); -LEPT_DLL extern BOXA * pixSplitIntoBoxa ( PIX *pixs, l_int32 minsum, l_int32 skipdist, l_int32 delta, l_int32 maxbg, l_int32 maxcomps, l_int32 remainder ); -LEPT_DLL extern BOXA * pixSplitComponentIntoBoxa ( PIX *pix, BOX *box, l_int32 minsum, l_int32 skipdist, l_int32 delta, l_int32 maxbg, l_int32 maxcomps, l_int32 remainder ); -LEPT_DLL extern BOXA * makeMosaicStrips ( l_int32 w, l_int32 h, l_int32 direction, l_int32 size ); -LEPT_DLL extern l_ok boxaCompareRegions ( BOXA *boxa1, BOXA *boxa2, l_int32 areathresh, l_int32 *pnsame, l_float32 *pdiffarea, l_float32 *pdiffxor, PIX **ppixdb ); -LEPT_DLL extern BOX * pixSelectLargeULComp ( PIX *pixs, l_float32 areaslop, l_int32 yslop, l_int32 connectivity ); -LEPT_DLL extern BOX * boxaSelectLargeULBox ( BOXA *boxas, l_float32 areaslop, l_int32 yslop ); -LEPT_DLL extern BOXA * boxaSelectRange ( BOXA *boxas, l_int32 first, l_int32 last, l_int32 copyflag ); -LEPT_DLL extern BOXAA * boxaaSelectRange ( BOXAA *baas, l_int32 first, l_int32 last, l_int32 copyflag ); -LEPT_DLL extern BOXA * boxaSelectBySize ( BOXA *boxas, l_int32 width, l_int32 height, l_int32 type, l_int32 relation, l_int32 *pchanged ); -LEPT_DLL extern NUMA * boxaMakeSizeIndicator ( BOXA *boxa, l_int32 width, l_int32 height, l_int32 type, l_int32 relation ); -LEPT_DLL extern BOXA * boxaSelectByArea ( BOXA *boxas, l_int32 area, l_int32 relation, l_int32 *pchanged ); -LEPT_DLL extern NUMA * boxaMakeAreaIndicator ( BOXA *boxa, l_int32 area, l_int32 relation ); -LEPT_DLL extern BOXA * boxaSelectByWHRatio ( BOXA *boxas, l_float32 ratio, l_int32 relation, l_int32 *pchanged ); -LEPT_DLL extern NUMA * boxaMakeWHRatioIndicator ( BOXA *boxa, l_float32 ratio, l_int32 relation ); -LEPT_DLL extern BOXA * boxaSelectWithIndicator ( BOXA *boxas, NUMA *na, l_int32 *pchanged ); -LEPT_DLL extern BOXA * boxaPermutePseudorandom ( BOXA *boxas ); -LEPT_DLL extern BOXA * boxaPermuteRandom ( BOXA *boxad, BOXA *boxas ); -LEPT_DLL extern l_ok boxaSwapBoxes ( BOXA *boxa, l_int32 i, l_int32 j ); -LEPT_DLL extern PTA * boxaConvertToPta ( BOXA *boxa, l_int32 ncorners ); -LEPT_DLL extern BOXA * ptaConvertToBoxa ( PTA *pta, l_int32 ncorners ); -LEPT_DLL extern PTA * boxConvertToPta ( BOX *box, l_int32 ncorners ); -LEPT_DLL extern BOX * ptaConvertToBox ( PTA *pta ); -LEPT_DLL extern l_ok boxaGetExtent ( BOXA *boxa, l_int32 *pw, l_int32 *ph, BOX **pbox ); -LEPT_DLL extern l_ok boxaGetCoverage ( BOXA *boxa, l_int32 wc, l_int32 hc, l_int32 exactflag, l_float32 *pfract ); -LEPT_DLL extern l_ok boxaaSizeRange ( BOXAA *baa, l_int32 *pminw, l_int32 *pminh, l_int32 *pmaxw, l_int32 *pmaxh ); -LEPT_DLL extern l_ok boxaSizeRange ( BOXA *boxa, l_int32 *pminw, l_int32 *pminh, l_int32 *pmaxw, l_int32 *pmaxh ); -LEPT_DLL extern l_ok boxaLocationRange ( BOXA *boxa, l_int32 *pminx, l_int32 *pminy, l_int32 *pmaxx, l_int32 *pmaxy ); -LEPT_DLL extern l_ok boxaGetSizes ( BOXA *boxa, NUMA **pnaw, NUMA **pnah ); -LEPT_DLL extern l_ok boxaGetArea ( BOXA *boxa, l_int32 *parea ); -LEPT_DLL extern PIX * boxaDisplayTiled ( BOXA *boxas, PIXA *pixa, l_int32 first, l_int32 last, l_int32 maxwidth, l_int32 linewidth, l_float32 scalefactor, l_int32 background, l_int32 spacing, l_int32 border ); -LEPT_DLL extern BOXA * boxaSmoothSequenceMedian ( BOXA *boxas, l_int32 halfwin, l_int32 subflag, l_int32 maxdiff, l_int32 extrapixels, l_int32 debug ); -LEPT_DLL extern BOXA * boxaWindowedMedian ( BOXA *boxas, l_int32 halfwin, l_int32 debug ); -LEPT_DLL extern BOXA * boxaModifyWithBoxa ( BOXA *boxas, BOXA *boxam, l_int32 subflag, l_int32 maxdiff, l_int32 extrapixels ); -LEPT_DLL extern BOXA * boxaConstrainSize ( BOXA *boxas, l_int32 width, l_int32 widthflag, l_int32 height, l_int32 heightflag ); -LEPT_DLL extern BOXA * boxaReconcileEvenOddHeight ( BOXA *boxas, l_int32 sides, l_int32 delh, l_int32 op, l_float32 factor, l_int32 start ); -LEPT_DLL extern BOXA * boxaReconcilePairWidth ( BOXA *boxas, l_int32 delw, l_int32 op, l_float32 factor, NUMA *na ); -LEPT_DLL extern l_ok boxaSizeConsistency1 ( BOXA *boxas, l_int32 type, l_float32 threshp, l_float32 threshm, l_float32 *pfvarp, l_float32 *pfvarm, l_int32 *psame ); -LEPT_DLL extern l_ok boxaSizeConsistency2 ( BOXA *boxas, l_float32 *pfdevw, l_float32 *pfdevh, l_int32 debug ); -LEPT_DLL extern BOXA * boxaReconcileAllByMedian ( BOXA *boxas, l_int32 select1, l_int32 select2, l_int32 thresh, l_int32 extra, PIXA *pixadb ); -LEPT_DLL extern BOXA * boxaReconcileSidesByMedian ( BOXA *boxas, l_int32 select, l_int32 thresh, l_int32 extra, PIXA *pixadb ); -LEPT_DLL extern BOXA * boxaReconcileSizeByMedian ( BOXA *boxas, l_int32 type, l_float32 dfract, l_float32 sfract, l_float32 factor, NUMA **pnadelw, NUMA **pnadelh, l_float32 *pratiowh ); -LEPT_DLL extern l_ok boxaPlotSides ( BOXA *boxa, const char *plotname, NUMA **pnal, NUMA **pnat, NUMA **pnar, NUMA **pnab, PIX **ppixd ); -LEPT_DLL extern l_ok boxaPlotSizes ( BOXA *boxa, const char *plotname, NUMA **pnaw, NUMA **pnah, PIX **ppixd ); -LEPT_DLL extern BOXA * boxaFillSequence ( BOXA *boxas, l_int32 useflag, l_int32 debug ); -LEPT_DLL extern l_ok boxaSizeVariation ( BOXA *boxa, l_int32 type, l_float32 *pdel_evenodd, l_float32 *prms_even, l_float32 *prms_odd, l_float32 *prms_all ); -LEPT_DLL extern l_ok boxaMedianDimensions ( BOXA *boxas, l_int32 *pmedw, l_int32 *pmedh, l_int32 *pmedwe, l_int32 *pmedwo, l_int32 *pmedhe, l_int32 *pmedho, NUMA **pnadelw, NUMA **pnadelh ); -LEPT_DLL extern L_BYTEA * l_byteaCreate ( size_t nbytes ); -LEPT_DLL extern L_BYTEA * l_byteaInitFromMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern L_BYTEA * l_byteaInitFromFile ( const char *fname ); -LEPT_DLL extern L_BYTEA * l_byteaInitFromStream ( FILE *fp ); -LEPT_DLL extern L_BYTEA * l_byteaCopy ( L_BYTEA *bas, l_int32 copyflag ); -LEPT_DLL extern void l_byteaDestroy ( L_BYTEA **pba ); -LEPT_DLL extern size_t l_byteaGetSize ( L_BYTEA *ba ); -LEPT_DLL extern l_uint8 * l_byteaGetData ( L_BYTEA *ba, size_t *psize ); -LEPT_DLL extern l_uint8 * l_byteaCopyData ( L_BYTEA *ba, size_t *psize ); -LEPT_DLL extern l_ok l_byteaAppendData ( L_BYTEA *ba, const l_uint8 *newdata, size_t newbytes ); -LEPT_DLL extern l_ok l_byteaAppendString ( L_BYTEA *ba, const char *str ); -LEPT_DLL extern l_ok l_byteaJoin ( L_BYTEA *ba1, L_BYTEA **pba2 ); -LEPT_DLL extern l_ok l_byteaSplit ( L_BYTEA *ba1, size_t splitloc, L_BYTEA **pba2 ); -LEPT_DLL extern l_ok l_byteaFindEachSequence ( L_BYTEA *ba, const l_uint8 *sequence, size_t seqlen, L_DNA **pda ); -LEPT_DLL extern l_ok l_byteaWrite ( const char *fname, L_BYTEA *ba, size_t startloc, size_t nbytes ); -LEPT_DLL extern l_ok l_byteaWriteStream ( FILE *fp, L_BYTEA *ba, size_t startloc, size_t nbytes ); -LEPT_DLL extern CCBORDA * ccbaCreate ( PIX *pixs, l_int32 n ); -LEPT_DLL extern void ccbaDestroy ( CCBORDA **pccba ); -LEPT_DLL extern CCBORD * ccbCreate ( PIX *pixs ); -LEPT_DLL extern void ccbDestroy ( CCBORD **pccb ); -LEPT_DLL extern l_ok ccbaAddCcb ( CCBORDA *ccba, CCBORD *ccb ); -LEPT_DLL extern l_int32 ccbaGetCount ( CCBORDA *ccba ); -LEPT_DLL extern CCBORD * ccbaGetCcb ( CCBORDA *ccba, l_int32 index ); -LEPT_DLL extern CCBORDA * pixGetAllCCBorders ( PIX *pixs ); -LEPT_DLL extern PTAA * pixGetOuterBordersPtaa ( PIX *pixs ); -LEPT_DLL extern l_ok pixGetOuterBorder ( CCBORD *ccb, PIX *pixs, BOX *box ); -LEPT_DLL extern l_ok ccbaGenerateGlobalLocs ( CCBORDA *ccba ); -LEPT_DLL extern l_ok ccbaGenerateStepChains ( CCBORDA *ccba ); -LEPT_DLL extern l_ok ccbaStepChainsToPixCoords ( CCBORDA *ccba, l_int32 coordtype ); -LEPT_DLL extern l_ok ccbaGenerateSPGlobalLocs ( CCBORDA *ccba, l_int32 ptsflag ); -LEPT_DLL extern l_ok ccbaGenerateSinglePath ( CCBORDA *ccba ); -LEPT_DLL extern PTA * getCutPathForHole ( PIX *pix, PTA *pta, BOX *boxinner, l_int32 *pdir, l_int32 *plen ); -LEPT_DLL extern PIX * ccbaDisplayBorder ( CCBORDA *ccba ); -LEPT_DLL extern PIX * ccbaDisplaySPBorder ( CCBORDA *ccba ); -LEPT_DLL extern PIX * ccbaDisplayImage1 ( CCBORDA *ccba ); -LEPT_DLL extern PIX * ccbaDisplayImage2 ( CCBORDA *ccba ); -LEPT_DLL extern l_ok ccbaWrite ( const char *filename, CCBORDA *ccba ); -LEPT_DLL extern l_ok ccbaWriteStream ( FILE *fp, CCBORDA *ccba ); -LEPT_DLL extern CCBORDA * ccbaRead ( const char *filename ); -LEPT_DLL extern CCBORDA * ccbaReadStream ( FILE *fp ); -LEPT_DLL extern l_ok ccbaWriteSVG ( const char *filename, CCBORDA *ccba ); -LEPT_DLL extern char * ccbaWriteSVGString ( const char *filename, CCBORDA *ccba ); -LEPT_DLL extern PIXA * pixaThinConnected ( PIXA *pixas, l_int32 type, l_int32 connectivity, l_int32 maxiters ); -LEPT_DLL extern PIX * pixThinConnected ( PIX *pixs, l_int32 type, l_int32 connectivity, l_int32 maxiters ); -LEPT_DLL extern PIX * pixThinConnectedBySet ( PIX *pixs, l_int32 type, SELA *sela, l_int32 maxiters ); -LEPT_DLL extern SELA * selaMakeThinSets ( l_int32 index, l_int32 debug ); -LEPT_DLL extern l_ok pixFindCheckerboardCorners ( PIX *pixs, l_int32 size, l_int32 dilation, l_int32 nsels, PIX **ppix_corners, PTA **ppta_corners, PIXA *pixadb ); -LEPT_DLL extern SELA * makeCheckerboardCornerSela ( l_int32 size, l_int32 dilation, l_int32 nsels, PIXA *pixadb ); -LEPT_DLL extern l_ok jbCorrelation ( const char *dirin, l_float32 thresh, l_float32 weight, l_int32 components, const char *rootname, l_int32 firstpage, l_int32 npages, l_int32 renderflag ); -LEPT_DLL extern l_ok jbRankHaus ( const char *dirin, l_int32 size, l_float32 rank, l_int32 components, const char *rootname, l_int32 firstpage, l_int32 npages, l_int32 renderflag ); -LEPT_DLL extern JBCLASSER * jbWordsInTextlines ( const char *dirin, l_int32 reduction, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weight, NUMA **pnatl, l_int32 firstpage, l_int32 npages ); -LEPT_DLL extern l_ok pixGetWordsInTextlines ( PIX *pixs, l_int32 minwidth, l_int32 minheight, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, PIXA **ppixad, NUMA **pnai ); -LEPT_DLL extern l_ok pixGetWordBoxesInTextlines ( PIX *pixs, l_int32 minwidth, l_int32 minheight, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, NUMA **pnai ); -LEPT_DLL extern l_ok pixFindWordAndCharacterBoxes ( PIX *pixs, BOX *boxs, l_int32 thresh, BOXA **pboxaw, BOXAA **pboxaac, const char *debugdir ); -LEPT_DLL extern NUMAA * boxaExtractSortedPattern ( BOXA *boxa, NUMA *na ); -LEPT_DLL extern l_ok numaaCompareImagesByBoxes ( NUMAA *naa1, NUMAA *naa2, l_int32 nperline, l_int32 nreq, l_int32 maxshiftx, l_int32 maxshifty, l_int32 delx, l_int32 dely, l_int32 *psame, l_int32 debugflag ); -LEPT_DLL extern l_ok pixColorContent ( PIX *pixs, l_int32 rref, l_int32 gref, l_int32 bref, l_int32 mingray, PIX **ppixr, PIX **ppixg, PIX **ppixb ); -LEPT_DLL extern PIX * pixColorMagnitude ( PIX *pixs, l_int32 rref, l_int32 gref, l_int32 bref, l_int32 type ); -LEPT_DLL extern PIX * pixMaskOverColorPixels ( PIX *pixs, l_int32 threshdiff, l_int32 mindist ); -LEPT_DLL extern PIX * pixMaskOverGrayPixels ( PIX *pixs, l_int32 maxlimit, l_int32 satlimit ); -LEPT_DLL extern PIX * pixMaskOverColorRange ( PIX *pixs, l_int32 rmin, l_int32 rmax, l_int32 gmin, l_int32 gmax, l_int32 bmin, l_int32 bmax ); -LEPT_DLL extern l_ok pixColorFraction ( PIX *pixs, l_int32 darkthresh, l_int32 lightthresh, l_int32 diffthresh, l_int32 factor, l_float32 *ppixfract, l_float32 *pcolorfract ); -LEPT_DLL extern l_ok pixFindColorRegions ( PIX *pixs, PIX *pixm, l_int32 factor, l_int32 lightthresh, l_int32 darkthresh, l_int32 mindiff, l_int32 colordiff, l_float32 edgefract, l_float32 *pcolorfract, PIX **pcolormask1, PIX **pcolormask2, PIXA *pixadb ); -LEPT_DLL extern l_ok pixNumSignificantGrayColors ( PIX *pixs, l_int32 darkthresh, l_int32 lightthresh, l_float32 minfract, l_int32 factor, l_int32 *pncolors ); -LEPT_DLL extern l_ok pixColorsForQuantization ( PIX *pixs, l_int32 thresh, l_int32 *pncolors, l_int32 *piscolor, l_int32 debug ); -LEPT_DLL extern l_ok pixNumColors ( PIX *pixs, l_int32 factor, l_int32 *pncolors ); -LEPT_DLL extern PIX * pixConvertRGBToCmap ( PIX *pixs ); -LEPT_DLL extern l_ok pixGetMostPopulatedColors ( PIX *pixs, l_int32 sigbits, l_int32 factor, l_int32 ncolors, l_uint32 **parray, PIXCMAP **pcmap ); -LEPT_DLL extern PIX * pixSimpleColorQuantize ( PIX *pixs, l_int32 sigbits, l_int32 factor, l_int32 ncolors ); -LEPT_DLL extern NUMA * pixGetRGBHistogram ( PIX *pixs, l_int32 sigbits, l_int32 factor ); -LEPT_DLL extern l_ok makeRGBIndexTables ( l_uint32 **prtab, l_uint32 **pgtab, l_uint32 **pbtab, l_int32 sigbits ); -LEPT_DLL extern l_ok getRGBFromIndex ( l_uint32 index, l_int32 sigbits, l_int32 *prval, l_int32 *pgval, l_int32 *pbval ); -LEPT_DLL extern l_ok pixHasHighlightRed ( PIX *pixs, l_int32 factor, l_float32 fract, l_float32 fthresh, l_int32 *phasred, l_float32 *pratio, PIX **ppixdb ); -LEPT_DLL extern PIX * pixColorGrayRegions ( PIX *pixs, BOXA *boxa, l_int32 type, l_int32 thresh, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern l_ok pixColorGray ( PIX *pixs, BOX *box, l_int32 type, l_int32 thresh, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern PIX * pixColorGrayMasked ( PIX *pixs, PIX *pixm, l_int32 type, l_int32 thresh, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern PIX * pixSnapColor ( PIX *pixd, PIX *pixs, l_uint32 srcval, l_uint32 dstval, l_int32 diff ); -LEPT_DLL extern PIX * pixSnapColorCmap ( PIX *pixd, PIX *pixs, l_uint32 srcval, l_uint32 dstval, l_int32 diff ); -LEPT_DLL extern PIX * pixLinearMapToTargetColor ( PIX *pixd, PIX *pixs, l_uint32 srcval, l_uint32 dstval ); -LEPT_DLL extern l_ok pixelLinearMapToTargetColor ( l_uint32 scolor, l_uint32 srcmap, l_uint32 dstmap, l_uint32 *pdcolor ); -LEPT_DLL extern PIX * pixShiftByComponent ( PIX *pixd, PIX *pixs, l_uint32 srcval, l_uint32 dstval ); -LEPT_DLL extern l_ok pixelShiftByComponent ( l_int32 rval, l_int32 gval, l_int32 bval, l_uint32 srcval, l_uint32 dstval, l_uint32 *ppixel ); -LEPT_DLL extern l_ok pixelFractionalShift ( l_int32 rval, l_int32 gval, l_int32 bval, l_float32 fraction, l_uint32 *ppixel ); -LEPT_DLL extern PIXCMAP * pixcmapCreate ( l_int32 depth ); -LEPT_DLL extern PIXCMAP * pixcmapCreateRandom ( l_int32 depth, l_int32 hasblack, l_int32 haswhite ); -LEPT_DLL extern PIXCMAP * pixcmapCreateLinear ( l_int32 d, l_int32 nlevels ); -LEPT_DLL extern PIXCMAP * pixcmapCopy ( const PIXCMAP *cmaps ); -LEPT_DLL extern void pixcmapDestroy ( PIXCMAP **pcmap ); -LEPT_DLL extern l_ok pixcmapIsValid ( const PIXCMAP *cmap, l_int32 *pvalid ); -LEPT_DLL extern l_ok pixcmapAddColor ( PIXCMAP *cmap, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern l_ok pixcmapAddRGBA ( PIXCMAP *cmap, l_int32 rval, l_int32 gval, l_int32 bval, l_int32 aval ); -LEPT_DLL extern l_ok pixcmapAddNewColor ( PIXCMAP *cmap, l_int32 rval, l_int32 gval, l_int32 bval, l_int32 *pindex ); -LEPT_DLL extern l_ok pixcmapAddNearestColor ( PIXCMAP *cmap, l_int32 rval, l_int32 gval, l_int32 bval, l_int32 *pindex ); -LEPT_DLL extern l_ok pixcmapUsableColor ( PIXCMAP *cmap, l_int32 rval, l_int32 gval, l_int32 bval, l_int32 *pusable ); -LEPT_DLL extern l_ok pixcmapAddBlackOrWhite ( PIXCMAP *cmap, l_int32 color, l_int32 *pindex ); -LEPT_DLL extern l_ok pixcmapSetBlackAndWhite ( PIXCMAP *cmap, l_int32 setblack, l_int32 setwhite ); -LEPT_DLL extern l_int32 pixcmapGetCount ( const PIXCMAP *cmap ); -LEPT_DLL extern l_int32 pixcmapGetFreeCount ( PIXCMAP *cmap ); -LEPT_DLL extern l_int32 pixcmapGetDepth ( PIXCMAP *cmap ); -LEPT_DLL extern l_ok pixcmapGetMinDepth ( PIXCMAP *cmap, l_int32 *pmindepth ); -LEPT_DLL extern l_ok pixcmapClear ( PIXCMAP *cmap ); -LEPT_DLL extern l_ok pixcmapGetColor ( PIXCMAP *cmap, l_int32 index, l_int32 *prval, l_int32 *pgval, l_int32 *pbval ); -LEPT_DLL extern l_ok pixcmapGetColor32 ( PIXCMAP *cmap, l_int32 index, l_uint32 *pval32 ); -LEPT_DLL extern l_ok pixcmapGetRGBA ( PIXCMAP *cmap, l_int32 index, l_int32 *prval, l_int32 *pgval, l_int32 *pbval, l_int32 *paval ); -LEPT_DLL extern l_ok pixcmapGetRGBA32 ( PIXCMAP *cmap, l_int32 index, l_uint32 *pval32 ); -LEPT_DLL extern l_ok pixcmapResetColor ( PIXCMAP *cmap, l_int32 index, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern l_ok pixcmapSetAlpha ( PIXCMAP *cmap, l_int32 index, l_int32 aval ); -LEPT_DLL extern l_int32 pixcmapGetIndex ( PIXCMAP *cmap, l_int32 rval, l_int32 gval, l_int32 bval, l_int32 *pindex ); -LEPT_DLL extern l_ok pixcmapHasColor ( PIXCMAP *cmap, l_int32 *pcolor ); -LEPT_DLL extern l_ok pixcmapIsOpaque ( PIXCMAP *cmap, l_int32 *popaque ); -LEPT_DLL extern l_ok pixcmapIsBlackAndWhite ( PIXCMAP *cmap, l_int32 *pblackwhite ); -LEPT_DLL extern l_ok pixcmapCountGrayColors ( PIXCMAP *cmap, l_int32 *pngray ); -LEPT_DLL extern l_ok pixcmapGetRankIntensity ( PIXCMAP *cmap, l_float32 rankval, l_int32 *pindex ); -LEPT_DLL extern l_ok pixcmapGetNearestIndex ( PIXCMAP *cmap, l_int32 rval, l_int32 gval, l_int32 bval, l_int32 *pindex ); -LEPT_DLL extern l_ok pixcmapGetNearestGrayIndex ( PIXCMAP *cmap, l_int32 val, l_int32 *pindex ); -LEPT_DLL extern l_ok pixcmapGetDistanceToColor ( PIXCMAP *cmap, l_int32 index, l_int32 rval, l_int32 gval, l_int32 bval, l_int32 *pdist ); -LEPT_DLL extern l_ok pixcmapGetRangeValues ( PIXCMAP *cmap, l_int32 select, l_int32 *pminval, l_int32 *pmaxval, l_int32 *pminindex, l_int32 *pmaxindex ); -LEPT_DLL extern PIXCMAP * pixcmapGrayToColor ( l_uint32 color ); -LEPT_DLL extern PIXCMAP * pixcmapColorToGray ( PIXCMAP *cmaps, l_float32 rwt, l_float32 gwt, l_float32 bwt ); -LEPT_DLL extern PIXCMAP * pixcmapConvertTo4 ( PIXCMAP *cmaps ); -LEPT_DLL extern PIXCMAP * pixcmapConvertTo8 ( PIXCMAP *cmaps ); -LEPT_DLL extern PIXCMAP * pixcmapRead ( const char *filename ); -LEPT_DLL extern PIXCMAP * pixcmapReadStream ( FILE *fp ); -LEPT_DLL extern PIXCMAP * pixcmapReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok pixcmapWrite ( const char *filename, const PIXCMAP *cmap ); -LEPT_DLL extern l_ok pixcmapWriteStream ( FILE *fp, const PIXCMAP *cmap ); -LEPT_DLL extern l_ok pixcmapWriteMem ( l_uint8 **pdata, size_t *psize, const PIXCMAP *cmap ); -LEPT_DLL extern l_ok pixcmapToArrays ( const PIXCMAP *cmap, l_int32 **prmap, l_int32 **pgmap, l_int32 **pbmap, l_int32 **pamap ); -LEPT_DLL extern l_ok pixcmapToRGBTable ( PIXCMAP *cmap, l_uint32 **ptab, l_int32 *pncolors ); -LEPT_DLL extern l_ok pixcmapSerializeToMemory ( PIXCMAP *cmap, l_int32 cpc, l_int32 *pncolors, l_uint8 **pdata ); -LEPT_DLL extern PIXCMAP * pixcmapDeserializeFromMemory ( l_uint8 *data, l_int32 cpc, l_int32 ncolors ); -LEPT_DLL extern char * pixcmapConvertToHex ( l_uint8 *data, l_int32 ncolors ); -LEPT_DLL extern l_ok pixcmapGammaTRC ( PIXCMAP *cmap, l_float32 gamma, l_int32 minval, l_int32 maxval ); -LEPT_DLL extern l_ok pixcmapContrastTRC ( PIXCMAP *cmap, l_float32 factor ); -LEPT_DLL extern l_ok pixcmapShiftIntensity ( PIXCMAP *cmap, l_float32 fraction ); -LEPT_DLL extern l_ok pixcmapShiftByComponent ( PIXCMAP *cmap, l_uint32 srcval, l_uint32 dstval ); -LEPT_DLL extern PIX * pixColorMorph ( PIX *pixs, l_int32 type, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixOctreeColorQuant ( PIX *pixs, l_int32 colors, l_int32 ditherflag ); -LEPT_DLL extern PIX * pixOctreeColorQuantGeneral ( PIX *pixs, l_int32 colors, l_int32 ditherflag, l_float32 validthresh, l_float32 colorthresh ); -LEPT_DLL extern l_ok makeRGBToIndexTables ( l_int32 cqlevels, l_uint32 **prtab, l_uint32 **pgtab, l_uint32 **pbtab ); -LEPT_DLL extern void getOctcubeIndexFromRGB ( l_int32 rval, l_int32 gval, l_int32 bval, l_uint32 *rtab, l_uint32 *gtab, l_uint32 *btab, l_uint32 *pindex ); -LEPT_DLL extern PIX * pixOctreeQuantByPopulation ( PIX *pixs, l_int32 level, l_int32 ditherflag ); -LEPT_DLL extern PIX * pixOctreeQuantNumColors ( PIX *pixs, l_int32 maxcolors, l_int32 subsample ); -LEPT_DLL extern PIX * pixOctcubeQuantMixedWithGray ( PIX *pixs, l_int32 depth, l_int32 graylevels, l_int32 delta ); -LEPT_DLL extern PIX * pixFixedOctcubeQuant256 ( PIX *pixs, l_int32 ditherflag ); -LEPT_DLL extern PIX * pixFewColorsOctcubeQuant1 ( PIX *pixs, l_int32 level ); -LEPT_DLL extern PIX * pixFewColorsOctcubeQuant2 ( PIX *pixs, l_int32 level, NUMA *na, l_int32 ncolors, l_int32 *pnerrors ); -LEPT_DLL extern PIX * pixFewColorsOctcubeQuantMixed ( PIX *pixs, l_int32 level, l_int32 darkthresh, l_int32 lightthresh, l_int32 diffthresh, l_float32 minfract, l_int32 maxspan ); -LEPT_DLL extern PIX * pixFixedOctcubeQuantGenRGB ( PIX *pixs, l_int32 level ); -LEPT_DLL extern PIX * pixQuantFromCmap ( PIX *pixs, PIXCMAP *cmap, l_int32 mindepth, l_int32 level, l_int32 metric ); -LEPT_DLL extern PIX * pixOctcubeQuantFromCmap ( PIX *pixs, PIXCMAP *cmap, l_int32 mindepth, l_int32 level, l_int32 metric ); -LEPT_DLL extern NUMA * pixOctcubeHistogram ( PIX *pixs, l_int32 level, l_int32 *pncolors ); -LEPT_DLL extern l_int32 * pixcmapToOctcubeLUT ( PIXCMAP *cmap, l_int32 level, l_int32 metric ); -LEPT_DLL extern l_ok pixRemoveUnusedColors ( PIX *pixs ); -LEPT_DLL extern l_ok pixNumberOccupiedOctcubes ( PIX *pix, l_int32 level, l_int32 mincount, l_float32 minfract, l_int32 *pncolors ); -LEPT_DLL extern PIX * pixMedianCutQuant ( PIX *pixs, l_int32 ditherflag ); -LEPT_DLL extern PIX * pixMedianCutQuantGeneral ( PIX *pixs, l_int32 ditherflag, l_int32 outdepth, l_int32 maxcolors, l_int32 sigbits, l_int32 maxsub, l_int32 checkbw ); -LEPT_DLL extern PIX * pixMedianCutQuantMixed ( PIX *pixs, l_int32 ncolor, l_int32 ngray, l_int32 darkthresh, l_int32 lightthresh, l_int32 diffthresh ); -LEPT_DLL extern PIX * pixFewColorsMedianCutQuantMixed ( PIX *pixs, l_int32 ncolor, l_int32 ngray, l_int32 maxncolors, l_int32 darkthresh, l_int32 lightthresh, l_int32 diffthresh ); -LEPT_DLL extern l_int32 * pixMedianCutHisto ( PIX *pixs, l_int32 sigbits, l_int32 subsample ); -LEPT_DLL extern PIX * pixColorSegment ( PIX *pixs, l_int32 maxdist, l_int32 maxcolors, l_int32 selsize, l_int32 finalcolors, l_int32 debugflag ); -LEPT_DLL extern PIX * pixColorSegmentCluster ( PIX *pixs, l_int32 maxdist, l_int32 maxcolors, l_int32 debugflag ); -LEPT_DLL extern l_ok pixAssignToNearestColor ( PIX *pixd, PIX *pixs, PIX *pixm, l_int32 level, l_int32 *countarray ); -LEPT_DLL extern l_ok pixColorSegmentClean ( PIX *pixs, l_int32 selsize, l_int32 *countarray ); -LEPT_DLL extern l_ok pixColorSegmentRemoveColors ( PIX *pixd, PIX *pixs, l_int32 finalcolors ); -LEPT_DLL extern PIX * pixConvertRGBToHSV ( PIX *pixd, PIX *pixs ); -LEPT_DLL extern PIX * pixConvertHSVToRGB ( PIX *pixd, PIX *pixs ); -LEPT_DLL extern l_ok convertRGBToHSV ( l_int32 rval, l_int32 gval, l_int32 bval, l_int32 *phval, l_int32 *psval, l_int32 *pvval ); -LEPT_DLL extern l_ok convertHSVToRGB ( l_int32 hval, l_int32 sval, l_int32 vval, l_int32 *prval, l_int32 *pgval, l_int32 *pbval ); -LEPT_DLL extern l_ok pixcmapConvertRGBToHSV ( PIXCMAP *cmap ); -LEPT_DLL extern l_ok pixcmapConvertHSVToRGB ( PIXCMAP *cmap ); -LEPT_DLL extern PIX * pixConvertRGBToHue ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvertRGBToSaturation ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvertRGBToValue ( PIX *pixs ); -LEPT_DLL extern PIX * pixMakeRangeMaskHS ( PIX *pixs, l_int32 huecenter, l_int32 huehw, l_int32 satcenter, l_int32 sathw, l_int32 regionflag ); -LEPT_DLL extern PIX * pixMakeRangeMaskHV ( PIX *pixs, l_int32 huecenter, l_int32 huehw, l_int32 valcenter, l_int32 valhw, l_int32 regionflag ); -LEPT_DLL extern PIX * pixMakeRangeMaskSV ( PIX *pixs, l_int32 satcenter, l_int32 sathw, l_int32 valcenter, l_int32 valhw, l_int32 regionflag ); -LEPT_DLL extern PIX * pixMakeHistoHS ( PIX *pixs, l_int32 factor, NUMA **pnahue, NUMA **pnasat ); -LEPT_DLL extern PIX * pixMakeHistoHV ( PIX *pixs, l_int32 factor, NUMA **pnahue, NUMA **pnaval ); -LEPT_DLL extern PIX * pixMakeHistoSV ( PIX *pixs, l_int32 factor, NUMA **pnasat, NUMA **pnaval ); -LEPT_DLL extern l_ok pixFindHistoPeaksHSV ( PIX *pixs, l_int32 type, l_int32 width, l_int32 height, l_int32 npeaks, l_float32 erasefactor, PTA **ppta, NUMA **pnatot, PIXA **ppixa ); -LEPT_DLL extern PIX * displayHSVColorRange ( l_int32 hval, l_int32 sval, l_int32 vval, l_int32 huehw, l_int32 sathw, l_int32 nsamp, l_int32 factor ); -LEPT_DLL extern PIX * pixConvertRGBToYUV ( PIX *pixd, PIX *pixs ); -LEPT_DLL extern PIX * pixConvertYUVToRGB ( PIX *pixd, PIX *pixs ); -LEPT_DLL extern l_ok convertRGBToYUV ( l_int32 rval, l_int32 gval, l_int32 bval, l_int32 *pyval, l_int32 *puval, l_int32 *pvval ); -LEPT_DLL extern l_ok convertYUVToRGB ( l_int32 yval, l_int32 uval, l_int32 vval, l_int32 *prval, l_int32 *pgval, l_int32 *pbval ); -LEPT_DLL extern l_ok pixcmapConvertRGBToYUV ( PIXCMAP *cmap ); -LEPT_DLL extern l_ok pixcmapConvertYUVToRGB ( PIXCMAP *cmap ); -LEPT_DLL extern FPIXA * pixConvertRGBToXYZ ( PIX *pixs ); -LEPT_DLL extern PIX * fpixaConvertXYZToRGB ( FPIXA *fpixa ); -LEPT_DLL extern l_ok convertRGBToXYZ ( l_int32 rval, l_int32 gval, l_int32 bval, l_float32 *pfxval, l_float32 *pfyval, l_float32 *pfzval ); -LEPT_DLL extern l_ok convertXYZToRGB ( l_float32 fxval, l_float32 fyval, l_float32 fzval, l_int32 blackout, l_int32 *prval, l_int32 *pgval, l_int32 *pbval ); -LEPT_DLL extern FPIXA * fpixaConvertXYZToLAB ( FPIXA *fpixas ); -LEPT_DLL extern FPIXA * fpixaConvertLABToXYZ ( FPIXA *fpixas ); -LEPT_DLL extern l_ok convertXYZToLAB ( l_float32 xval, l_float32 yval, l_float32 zval, l_float32 *plval, l_float32 *paval, l_float32 *pbval ); -LEPT_DLL extern l_ok convertLABToXYZ ( l_float32 lval, l_float32 aval, l_float32 bval, l_float32 *pxval, l_float32 *pyval, l_float32 *pzval ); -LEPT_DLL extern FPIXA * pixConvertRGBToLAB ( PIX *pixs ); -LEPT_DLL extern PIX * fpixaConvertLABToRGB ( FPIXA *fpixa ); -LEPT_DLL extern l_ok convertRGBToLAB ( l_int32 rval, l_int32 gval, l_int32 bval, l_float32 *pflval, l_float32 *pfaval, l_float32 *pfbval ); -LEPT_DLL extern l_ok convertLABToRGB ( l_float32 flval, l_float32 faval, l_float32 fbval, l_int32 *prval, l_int32 *pgval, l_int32 *pbval ); -LEPT_DLL extern l_ok pixEqual ( PIX *pix1, PIX *pix2, l_int32 *psame ); -LEPT_DLL extern l_ok pixEqualWithAlpha ( PIX *pix1, PIX *pix2, l_int32 use_alpha, l_int32 *psame ); -LEPT_DLL extern l_ok pixEqualWithCmap ( PIX *pix1, PIX *pix2, l_int32 *psame ); -LEPT_DLL extern l_ok cmapEqual ( PIXCMAP *cmap1, PIXCMAP *cmap2, l_int32 ncomps, l_int32 *psame ); -LEPT_DLL extern l_ok pixUsesCmapColor ( PIX *pixs, l_int32 *pcolor ); -LEPT_DLL extern l_ok pixCorrelationBinary ( PIX *pix1, PIX *pix2, l_float32 *pval ); -LEPT_DLL extern PIX * pixDisplayDiffBinary ( PIX *pix1, PIX *pix2 ); -LEPT_DLL extern l_ok pixCompareBinary ( PIX *pix1, PIX *pix2, l_int32 comptype, l_float32 *pfract, PIX **ppixdiff ); -LEPT_DLL extern l_ok pixCompareGrayOrRGB ( PIX *pix1, PIX *pix2, l_int32 comptype, l_int32 plottype, l_int32 *psame, l_float32 *pdiff, l_float32 *prmsdiff, PIX **ppixdiff ); -LEPT_DLL extern l_ok pixCompareGray ( PIX *pix1, PIX *pix2, l_int32 comptype, l_int32 plottype, l_int32 *psame, l_float32 *pdiff, l_float32 *prmsdiff, PIX **ppixdiff ); -LEPT_DLL extern l_ok pixCompareRGB ( PIX *pix1, PIX *pix2, l_int32 comptype, l_int32 plottype, l_int32 *psame, l_float32 *pdiff, l_float32 *prmsdiff, PIX **ppixdiff ); -LEPT_DLL extern l_ok pixCompareTiled ( PIX *pix1, PIX *pix2, l_int32 sx, l_int32 sy, l_int32 type, PIX **ppixdiff ); -LEPT_DLL extern NUMA * pixCompareRankDifference ( PIX *pix1, PIX *pix2, l_int32 factor ); -LEPT_DLL extern l_ok pixTestForSimilarity ( PIX *pix1, PIX *pix2, l_int32 factor, l_int32 mindiff, l_float32 maxfract, l_float32 maxave, l_int32 *psimilar, l_int32 details ); -LEPT_DLL extern l_ok pixGetDifferenceStats ( PIX *pix1, PIX *pix2, l_int32 factor, l_int32 mindiff, l_float32 *pfractdiff, l_float32 *pavediff, l_int32 details ); -LEPT_DLL extern NUMA * pixGetDifferenceHistogram ( PIX *pix1, PIX *pix2, l_int32 factor ); -LEPT_DLL extern l_ok pixGetPerceptualDiff ( PIX *pixs1, PIX *pixs2, l_int32 sampling, l_int32 dilation, l_int32 mindiff, l_float32 *pfract, PIX **ppixdiff1, PIX **ppixdiff2 ); -LEPT_DLL extern l_ok pixGetPSNR ( PIX *pix1, PIX *pix2, l_int32 factor, l_float32 *ppsnr ); -LEPT_DLL extern l_ok pixaComparePhotoRegionsByHisto ( PIXA *pixa, l_float32 minratio, l_float32 textthresh, l_int32 factor, l_int32 n, l_float32 simthresh, NUMA **pnai, l_float32 **pscores, PIX **ppixd, l_int32 debug ); -LEPT_DLL extern l_ok pixComparePhotoRegionsByHisto ( PIX *pix1, PIX *pix2, BOX *box1, BOX *box2, l_float32 minratio, l_int32 factor, l_int32 n, l_float32 *pscore, l_int32 debugflag ); -LEPT_DLL extern l_ok pixGenPhotoHistos ( PIX *pixs, BOX *box, l_int32 factor, l_float32 thresh, l_int32 n, NUMAA **pnaa, l_int32 *pw, l_int32 *ph, l_int32 debugindex ); -LEPT_DLL extern PIX * pixPadToCenterCentroid ( PIX *pixs, l_int32 factor ); -LEPT_DLL extern l_ok pixCentroid8 ( PIX *pixs, l_int32 factor, l_float32 *pcx, l_float32 *pcy ); -LEPT_DLL extern l_ok pixDecideIfPhotoImage ( PIX *pix, l_int32 factor, l_float32 thresh, l_int32 n, NUMAA **pnaa, PIXA *pixadebug ); -LEPT_DLL extern l_ok compareTilesByHisto ( NUMAA *naa1, NUMAA *naa2, l_float32 minratio, l_int32 w1, l_int32 h1, l_int32 w2, l_int32 h2, l_float32 *pscore, PIXA *pixadebug ); -LEPT_DLL extern l_ok pixCompareGrayByHisto ( PIX *pix1, PIX *pix2, BOX *box1, BOX *box2, l_float32 minratio, l_int32 maxgray, l_int32 factor, l_int32 n, l_float32 *pscore, l_int32 debugflag ); -LEPT_DLL extern l_ok pixCropAlignedToCentroid ( PIX *pix1, PIX *pix2, l_int32 factor, BOX **pbox1, BOX **pbox2 ); -LEPT_DLL extern l_uint8 * l_compressGrayHistograms ( NUMAA *naa, l_int32 w, l_int32 h, size_t *psize ); -LEPT_DLL extern NUMAA * l_uncompressGrayHistograms ( l_uint8 *bytea, size_t size, l_int32 *pw, l_int32 *ph ); -LEPT_DLL extern l_ok pixCompareWithTranslation ( PIX *pix1, PIX *pix2, l_int32 thresh, l_int32 *pdelx, l_int32 *pdely, l_float32 *pscore, l_int32 debugflag ); -LEPT_DLL extern l_ok pixBestCorrelation ( PIX *pix1, PIX *pix2, l_int32 area1, l_int32 area2, l_int32 etransx, l_int32 etransy, l_int32 maxshift, l_int32 *tab8, l_int32 *pdelx, l_int32 *pdely, l_float32 *pscore, l_int32 debugflag ); -LEPT_DLL extern BOXA * pixConnComp ( PIX *pixs, PIXA **ppixa, l_int32 connectivity ); -LEPT_DLL extern BOXA * pixConnCompPixa ( PIX *pixs, PIXA **ppixa, l_int32 connectivity ); -LEPT_DLL extern BOXA * pixConnCompBB ( PIX *pixs, l_int32 connectivity ); -LEPT_DLL extern l_ok pixCountConnComp ( PIX *pixs, l_int32 connectivity, l_int32 *pcount ); -LEPT_DLL extern l_int32 nextOnPixelInRaster ( PIX *pixs, l_int32 xstart, l_int32 ystart, l_int32 *px, l_int32 *py ); -LEPT_DLL extern BOX * pixSeedfillBB ( PIX *pixs, L_STACK *stack, l_int32 x, l_int32 y, l_int32 connectivity ); -LEPT_DLL extern BOX * pixSeedfill4BB ( PIX *pixs, L_STACK *stack, l_int32 x, l_int32 y ); -LEPT_DLL extern BOX * pixSeedfill8BB ( PIX *pixs, L_STACK *stack, l_int32 x, l_int32 y ); -LEPT_DLL extern l_ok pixSeedfill ( PIX *pixs, L_STACK *stack, l_int32 x, l_int32 y, l_int32 connectivity ); -LEPT_DLL extern l_ok pixSeedfill4 ( PIX *pixs, L_STACK *stack, l_int32 x, l_int32 y ); -LEPT_DLL extern l_ok pixSeedfill8 ( PIX *pixs, L_STACK *stack, l_int32 x, l_int32 y ); -LEPT_DLL extern l_ok convertFilesTo1bpp ( const char *dirin, const char *substr, l_int32 upscaling, l_int32 thresh, l_int32 firstpage, l_int32 npages, const char *dirout, l_int32 outformat ); -LEPT_DLL extern PIX * pixBlockconv ( PIX *pix, l_int32 wc, l_int32 hc ); -LEPT_DLL extern PIX * pixBlockconvGray ( PIX *pixs, PIX *pixacc, l_int32 wc, l_int32 hc ); -LEPT_DLL extern PIX * pixBlockconvAccum ( PIX *pixs ); -LEPT_DLL extern PIX * pixBlockconvGrayUnnormalized ( PIX *pixs, l_int32 wc, l_int32 hc ); -LEPT_DLL extern PIX * pixBlockconvTiled ( PIX *pix, l_int32 wc, l_int32 hc, l_int32 nx, l_int32 ny ); -LEPT_DLL extern PIX * pixBlockconvGrayTile ( PIX *pixs, PIX *pixacc, l_int32 wc, l_int32 hc ); -LEPT_DLL extern l_ok pixWindowedStats ( PIX *pixs, l_int32 wc, l_int32 hc, l_int32 hasborder, PIX **ppixm, PIX **ppixms, FPIX **pfpixv, FPIX **pfpixrv ); -LEPT_DLL extern PIX * pixWindowedMean ( PIX *pixs, l_int32 wc, l_int32 hc, l_int32 hasborder, l_int32 normflag ); -LEPT_DLL extern PIX * pixWindowedMeanSquare ( PIX *pixs, l_int32 wc, l_int32 hc, l_int32 hasborder ); -LEPT_DLL extern l_ok pixWindowedVariance ( PIX *pixm, PIX *pixms, FPIX **pfpixv, FPIX **pfpixrv ); -LEPT_DLL extern DPIX * pixMeanSquareAccum ( PIX *pixs ); -LEPT_DLL extern PIX * pixBlockrank ( PIX *pixs, PIX *pixacc, l_int32 wc, l_int32 hc, l_float32 rank ); -LEPT_DLL extern PIX * pixBlocksum ( PIX *pixs, PIX *pixacc, l_int32 wc, l_int32 hc ); -LEPT_DLL extern PIX * pixCensusTransform ( PIX *pixs, l_int32 halfsize, PIX *pixacc ); -LEPT_DLL extern PIX * pixConvolve ( PIX *pixs, L_KERNEL *kel, l_int32 outdepth, l_int32 normflag ); -LEPT_DLL extern PIX * pixConvolveSep ( PIX *pixs, L_KERNEL *kelx, L_KERNEL *kely, l_int32 outdepth, l_int32 normflag ); -LEPT_DLL extern PIX * pixConvolveRGB ( PIX *pixs, L_KERNEL *kel ); -LEPT_DLL extern PIX * pixConvolveRGBSep ( PIX *pixs, L_KERNEL *kelx, L_KERNEL *kely ); -LEPT_DLL extern FPIX * fpixConvolve ( FPIX *fpixs, L_KERNEL *kel, l_int32 normflag ); -LEPT_DLL extern FPIX * fpixConvolveSep ( FPIX *fpixs, L_KERNEL *kelx, L_KERNEL *kely, l_int32 normflag ); -LEPT_DLL extern PIX * pixConvolveWithBias ( PIX *pixs, L_KERNEL *kel1, L_KERNEL *kel2, l_int32 force8, l_int32 *pbias ); -LEPT_DLL extern void l_setConvolveSampling ( l_int32 xfact, l_int32 yfact ); -LEPT_DLL extern PIX * pixAddGaussianNoise ( PIX *pixs, l_float32 stdev ); -LEPT_DLL extern l_float32 gaussDistribSampling ( void ); -LEPT_DLL extern l_ok pixCorrelationScore ( PIX *pix1, PIX *pix2, l_int32 area1, l_int32 area2, l_float32 delx, l_float32 dely, l_int32 maxdiffw, l_int32 maxdiffh, l_int32 *tab, l_float32 *pscore ); -LEPT_DLL extern l_int32 pixCorrelationScoreThresholded ( PIX *pix1, PIX *pix2, l_int32 area1, l_int32 area2, l_float32 delx, l_float32 dely, l_int32 maxdiffw, l_int32 maxdiffh, l_int32 *tab, l_int32 *downcount, l_float32 score_threshold ); -LEPT_DLL extern l_ok pixCorrelationScoreSimple ( PIX *pix1, PIX *pix2, l_int32 area1, l_int32 area2, l_float32 delx, l_float32 dely, l_int32 maxdiffw, l_int32 maxdiffh, l_int32 *tab, l_float32 *pscore ); -LEPT_DLL extern l_ok pixCorrelationScoreShifted ( PIX *pix1, PIX *pix2, l_int32 area1, l_int32 area2, l_int32 delx, l_int32 dely, l_int32 *tab, l_float32 *pscore ); -LEPT_DLL extern L_DEWARP * dewarpCreate ( PIX *pixs, l_int32 pageno ); -LEPT_DLL extern L_DEWARP * dewarpCreateRef ( l_int32 pageno, l_int32 refpage ); -LEPT_DLL extern void dewarpDestroy ( L_DEWARP **pdew ); -LEPT_DLL extern L_DEWARPA * dewarpaCreate ( l_int32 nptrs, l_int32 sampling, l_int32 redfactor, l_int32 minlines, l_int32 maxdist ); -LEPT_DLL extern L_DEWARPA * dewarpaCreateFromPixacomp ( PIXAC *pixac, l_int32 useboth, l_int32 sampling, l_int32 minlines, l_int32 maxdist ); -LEPT_DLL extern void dewarpaDestroy ( L_DEWARPA **pdewa ); -LEPT_DLL extern l_ok dewarpaDestroyDewarp ( L_DEWARPA *dewa, l_int32 pageno ); -LEPT_DLL extern l_ok dewarpaInsertDewarp ( L_DEWARPA *dewa, L_DEWARP *dew ); -LEPT_DLL extern L_DEWARP * dewarpaGetDewarp ( L_DEWARPA *dewa, l_int32 index ); -LEPT_DLL extern l_ok dewarpaSetCurvatures ( L_DEWARPA *dewa, l_int32 max_linecurv, l_int32 min_diff_linecurv, l_int32 max_diff_linecurv, l_int32 max_edgecurv, l_int32 max_diff_edgecurv, l_int32 max_edgeslope ); -LEPT_DLL extern l_ok dewarpaUseBothArrays ( L_DEWARPA *dewa, l_int32 useboth ); -LEPT_DLL extern l_ok dewarpaSetCheckColumns ( L_DEWARPA *dewa, l_int32 check_columns ); -LEPT_DLL extern l_ok dewarpaSetMaxDistance ( L_DEWARPA *dewa, l_int32 maxdist ); -LEPT_DLL extern L_DEWARP * dewarpRead ( const char *filename ); -LEPT_DLL extern L_DEWARP * dewarpReadStream ( FILE *fp ); -LEPT_DLL extern L_DEWARP * dewarpReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok dewarpWrite ( const char *filename, L_DEWARP *dew ); -LEPT_DLL extern l_ok dewarpWriteStream ( FILE *fp, L_DEWARP *dew ); -LEPT_DLL extern l_ok dewarpWriteMem ( l_uint8 **pdata, size_t *psize, L_DEWARP *dew ); -LEPT_DLL extern L_DEWARPA * dewarpaRead ( const char *filename ); -LEPT_DLL extern L_DEWARPA * dewarpaReadStream ( FILE *fp ); -LEPT_DLL extern L_DEWARPA * dewarpaReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok dewarpaWrite ( const char *filename, L_DEWARPA *dewa ); -LEPT_DLL extern l_ok dewarpaWriteStream ( FILE *fp, L_DEWARPA *dewa ); -LEPT_DLL extern l_ok dewarpaWriteMem ( l_uint8 **pdata, size_t *psize, L_DEWARPA *dewa ); -LEPT_DLL extern l_ok dewarpBuildPageModel ( L_DEWARP *dew, const char *debugfile ); -LEPT_DLL extern l_ok dewarpFindVertDisparity ( L_DEWARP *dew, PTAA *ptaa, l_int32 rotflag ); -LEPT_DLL extern l_ok dewarpFindHorizDisparity ( L_DEWARP *dew, PTAA *ptaa ); -LEPT_DLL extern PTAA * dewarpGetTextlineCenters ( PIX *pixs, l_int32 debugflag ); -LEPT_DLL extern PTAA * dewarpRemoveShortLines ( PIX *pixs, PTAA *ptaas, l_float32 fract, l_int32 debugflag ); -LEPT_DLL extern l_ok dewarpFindHorizSlopeDisparity ( L_DEWARP *dew, PIX *pixb, l_float32 fractthresh, l_int32 parity ); -LEPT_DLL extern l_ok dewarpBuildLineModel ( L_DEWARP *dew, l_int32 opensize, const char *debugfile ); -LEPT_DLL extern l_ok dewarpaModelStatus ( L_DEWARPA *dewa, l_int32 pageno, l_int32 *pvsuccess, l_int32 *phsuccess ); -LEPT_DLL extern l_ok dewarpaApplyDisparity ( L_DEWARPA *dewa, l_int32 pageno, PIX *pixs, l_int32 grayin, l_int32 x, l_int32 y, PIX **ppixd, const char *debugfile ); -LEPT_DLL extern l_ok dewarpaApplyDisparityBoxa ( L_DEWARPA *dewa, l_int32 pageno, PIX *pixs, BOXA *boxas, l_int32 mapdir, l_int32 x, l_int32 y, BOXA **pboxad, const char *debugfile ); -LEPT_DLL extern l_ok dewarpMinimize ( L_DEWARP *dew ); -LEPT_DLL extern l_ok dewarpPopulateFullRes ( L_DEWARP *dew, PIX *pix, l_int32 x, l_int32 y ); -LEPT_DLL extern l_ok dewarpSinglePage ( PIX *pixs, l_int32 thresh, l_int32 adaptive, l_int32 useboth, l_int32 check_columns, PIX **ppixd, L_DEWARPA **pdewa, l_int32 debug ); -LEPT_DLL extern l_ok dewarpSinglePageInit ( PIX *pixs, l_int32 thresh, l_int32 adaptive, l_int32 useboth, l_int32 check_columns, PIX **ppixb, L_DEWARPA **pdewa ); -LEPT_DLL extern l_ok dewarpSinglePageRun ( PIX *pixs, PIX *pixb, L_DEWARPA *dewa, PIX **ppixd, l_int32 debug ); -LEPT_DLL extern l_ok dewarpaListPages ( L_DEWARPA *dewa ); -LEPT_DLL extern l_ok dewarpaSetValidModels ( L_DEWARPA *dewa, l_int32 notests, l_int32 debug ); -LEPT_DLL extern l_ok dewarpaInsertRefModels ( L_DEWARPA *dewa, l_int32 notests, l_int32 debug ); -LEPT_DLL extern l_ok dewarpaStripRefModels ( L_DEWARPA *dewa ); -LEPT_DLL extern l_ok dewarpaRestoreModels ( L_DEWARPA *dewa ); -LEPT_DLL extern l_ok dewarpaInfo ( FILE *fp, L_DEWARPA *dewa ); -LEPT_DLL extern l_ok dewarpaModelStats ( L_DEWARPA *dewa, l_int32 *pnnone, l_int32 *pnvsuccess, l_int32 *pnvvalid, l_int32 *pnhsuccess, l_int32 *pnhvalid, l_int32 *pnref ); -LEPT_DLL extern l_ok dewarpaShowArrays ( L_DEWARPA *dewa, l_float32 scalefact, l_int32 first, l_int32 last ); -LEPT_DLL extern l_ok dewarpDebug ( L_DEWARP *dew, const char *subdirs, l_int32 index ); -LEPT_DLL extern l_ok dewarpShowResults ( L_DEWARPA *dewa, SARRAY *sa, BOXA *boxa, l_int32 firstpage, l_int32 lastpage, const char *pdfout ); -LEPT_DLL extern L_DNA * l_dnaCreate ( l_int32 n ); -LEPT_DLL extern L_DNA * l_dnaCreateFromIArray ( l_int32 *iarray, l_int32 size ); -LEPT_DLL extern L_DNA * l_dnaCreateFromDArray ( l_float64 *darray, l_int32 size, l_int32 copyflag ); -LEPT_DLL extern L_DNA * l_dnaMakeSequence ( l_float64 startval, l_float64 increment, l_int32 size ); -LEPT_DLL extern void l_dnaDestroy ( L_DNA **pda ); -LEPT_DLL extern L_DNA * l_dnaCopy ( L_DNA *da ); -LEPT_DLL extern L_DNA * l_dnaClone ( L_DNA *da ); -LEPT_DLL extern l_ok l_dnaEmpty ( L_DNA *da ); -LEPT_DLL extern l_ok l_dnaAddNumber ( L_DNA *da, l_float64 val ); -LEPT_DLL extern l_ok l_dnaInsertNumber ( L_DNA *da, l_int32 index, l_float64 val ); -LEPT_DLL extern l_ok l_dnaRemoveNumber ( L_DNA *da, l_int32 index ); -LEPT_DLL extern l_ok l_dnaReplaceNumber ( L_DNA *da, l_int32 index, l_float64 val ); -LEPT_DLL extern l_int32 l_dnaGetCount ( L_DNA *da ); -LEPT_DLL extern l_ok l_dnaSetCount ( L_DNA *da, l_int32 newcount ); -LEPT_DLL extern l_ok l_dnaGetDValue ( L_DNA *da, l_int32 index, l_float64 *pval ); -LEPT_DLL extern l_ok l_dnaGetIValue ( L_DNA *da, l_int32 index, l_int32 *pival ); -LEPT_DLL extern l_ok l_dnaSetValue ( L_DNA *da, l_int32 index, l_float64 val ); -LEPT_DLL extern l_ok l_dnaShiftValue ( L_DNA *da, l_int32 index, l_float64 diff ); -LEPT_DLL extern l_int32 * l_dnaGetIArray ( L_DNA *da ); -LEPT_DLL extern l_float64 * l_dnaGetDArray ( L_DNA *da, l_int32 copyflag ); -LEPT_DLL extern l_int32 l_dnaGetRefcount ( L_DNA *da ); -LEPT_DLL extern l_ok l_dnaChangeRefcount ( L_DNA *da, l_int32 delta ); -LEPT_DLL extern l_ok l_dnaGetParameters ( L_DNA *da, l_float64 *pstartx, l_float64 *pdelx ); -LEPT_DLL extern l_ok l_dnaSetParameters ( L_DNA *da, l_float64 startx, l_float64 delx ); -LEPT_DLL extern l_ok l_dnaCopyParameters ( L_DNA *dad, L_DNA *das ); -LEPT_DLL extern L_DNA * l_dnaRead ( const char *filename ); -LEPT_DLL extern L_DNA * l_dnaReadStream ( FILE *fp ); -LEPT_DLL extern l_ok l_dnaWrite ( const char *filename, L_DNA *da ); -LEPT_DLL extern l_ok l_dnaWriteStream ( FILE *fp, L_DNA *da ); -LEPT_DLL extern L_DNAA * l_dnaaCreate ( l_int32 n ); -LEPT_DLL extern L_DNAA * l_dnaaCreateFull ( l_int32 nptr, l_int32 n ); -LEPT_DLL extern l_ok l_dnaaTruncate ( L_DNAA *daa ); -LEPT_DLL extern void l_dnaaDestroy ( L_DNAA **pdaa ); -LEPT_DLL extern l_ok l_dnaaAddDna ( L_DNAA *daa, L_DNA *da, l_int32 copyflag ); -LEPT_DLL extern l_int32 l_dnaaGetCount ( L_DNAA *daa ); -LEPT_DLL extern l_int32 l_dnaaGetDnaCount ( L_DNAA *daa, l_int32 index ); -LEPT_DLL extern l_int32 l_dnaaGetNumberCount ( L_DNAA *daa ); -LEPT_DLL extern L_DNA * l_dnaaGetDna ( L_DNAA *daa, l_int32 index, l_int32 accessflag ); -LEPT_DLL extern l_ok l_dnaaReplaceDna ( L_DNAA *daa, l_int32 index, L_DNA *da ); -LEPT_DLL extern l_ok l_dnaaGetValue ( L_DNAA *daa, l_int32 i, l_int32 j, l_float64 *pval ); -LEPT_DLL extern l_ok l_dnaaAddNumber ( L_DNAA *daa, l_int32 index, l_float64 val ); -LEPT_DLL extern L_DNAA * l_dnaaRead ( const char *filename ); -LEPT_DLL extern L_DNAA * l_dnaaReadStream ( FILE *fp ); -LEPT_DLL extern l_ok l_dnaaWrite ( const char *filename, L_DNAA *daa ); -LEPT_DLL extern l_ok l_dnaaWriteStream ( FILE *fp, L_DNAA *daa ); -LEPT_DLL extern l_ok l_dnaJoin ( L_DNA *dad, L_DNA *das, l_int32 istart, l_int32 iend ); -LEPT_DLL extern L_DNA * l_dnaaFlattenToDna ( L_DNAA *daa ); -LEPT_DLL extern NUMA * l_dnaConvertToNuma ( L_DNA *da ); -LEPT_DLL extern L_DNA * numaConvertToDna ( NUMA *na ); -LEPT_DLL extern L_DNA * l_dnaUnionByAset ( L_DNA *da1, L_DNA *da2 ); -LEPT_DLL extern L_DNA * l_dnaRemoveDupsByAset ( L_DNA *das ); -LEPT_DLL extern L_DNA * l_dnaIntersectionByAset ( L_DNA *da1, L_DNA *da2 ); -LEPT_DLL extern L_ASET * l_asetCreateFromDna ( L_DNA *da ); -LEPT_DLL extern L_DNA * l_dnaDiffAdjValues ( L_DNA *das ); -LEPT_DLL extern L_DNAHASH * l_dnaHashCreate ( l_int32 nbuckets, l_int32 initsize ); -LEPT_DLL extern void l_dnaHashDestroy ( L_DNAHASH **pdahash ); -LEPT_DLL extern l_int32 l_dnaHashGetCount ( L_DNAHASH *dahash ); -LEPT_DLL extern l_int32 l_dnaHashGetTotalCount ( L_DNAHASH *dahash ); -LEPT_DLL extern L_DNA * l_dnaHashGetDna ( L_DNAHASH *dahash, l_uint64 key, l_int32 copyflag ); -LEPT_DLL extern l_ok l_dnaHashAdd ( L_DNAHASH *dahash, l_uint64 key, l_float64 value ); -LEPT_DLL extern L_DNAHASH * l_dnaHashCreateFromDna ( L_DNA *da ); -LEPT_DLL extern l_ok l_dnaRemoveDupsByHash ( L_DNA *das, L_DNA **pdad, L_DNAHASH **pdahash ); -LEPT_DLL extern l_ok l_dnaMakeHistoByHash ( L_DNA *das, L_DNAHASH **pdahash, L_DNA **pdav, L_DNA **pdac ); -LEPT_DLL extern L_DNA * l_dnaIntersectionByHash ( L_DNA *da1, L_DNA *da2 ); -LEPT_DLL extern l_ok l_dnaFindValByHash ( L_DNA *da, L_DNAHASH *dahash, l_float64 val, l_int32 *pindex ); -LEPT_DLL extern PIX * pixMorphDwa_2 ( PIX *pixd, PIX *pixs, l_int32 operation, char *selname ); -LEPT_DLL extern PIX * pixFMorphopGen_2 ( PIX *pixd, PIX *pixs, l_int32 operation, char *selname ); -LEPT_DLL extern l_int32 fmorphopgen_low_2 ( l_uint32 *datad, l_int32 w, l_int32 h, l_int32 wpld, l_uint32 *datas, l_int32 wpls, l_int32 index ); -LEPT_DLL extern PIX * pixSobelEdgeFilter ( PIX *pixs, l_int32 orientflag ); -LEPT_DLL extern PIX * pixTwoSidedEdgeFilter ( PIX *pixs, l_int32 orientflag ); -LEPT_DLL extern l_ok pixMeasureEdgeSmoothness ( PIX *pixs, l_int32 side, l_int32 minjump, l_int32 minreversal, l_float32 *pjpl, l_float32 *pjspl, l_float32 *prpl, const char *debugfile ); -LEPT_DLL extern NUMA * pixGetEdgeProfile ( PIX *pixs, l_int32 side, const char *debugfile ); -LEPT_DLL extern l_ok pixGetLastOffPixelInRun ( PIX *pixs, l_int32 x, l_int32 y, l_int32 direction, l_int32 *ploc ); -LEPT_DLL extern l_int32 pixGetLastOnPixelInRun ( PIX *pixs, l_int32 x, l_int32 y, l_int32 direction, l_int32 *ploc ); -LEPT_DLL extern char * encodeBase64 ( const l_uint8 *inarray, l_int32 insize, l_int32 *poutsize ); -LEPT_DLL extern l_uint8 * decodeBase64 ( const char *inarray, l_int32 insize, l_int32 *poutsize ); -LEPT_DLL extern char * encodeAscii85 ( const l_uint8 *inarray, l_int32 insize, l_int32 *poutsize ); -LEPT_DLL extern l_uint8 * decodeAscii85 ( const char *inarray, l_int32 insize, l_int32 *poutsize ); -LEPT_DLL extern char * reformatPacked64 ( const char *inarray, l_int32 insize, l_int32 leadspace, l_int32 linechars, l_int32 addquotes, l_int32 *poutsize ); -LEPT_DLL extern PIX * pixGammaTRC ( PIX *pixd, PIX *pixs, l_float32 gamma, l_int32 minval, l_int32 maxval ); -LEPT_DLL extern PIX * pixGammaTRCMasked ( PIX *pixd, PIX *pixs, PIX *pixm, l_float32 gamma, l_int32 minval, l_int32 maxval ); -LEPT_DLL extern PIX * pixGammaTRCWithAlpha ( PIX *pixd, PIX *pixs, l_float32 gamma, l_int32 minval, l_int32 maxval ); -LEPT_DLL extern NUMA * numaGammaTRC ( l_float32 gamma, l_int32 minval, l_int32 maxval ); -LEPT_DLL extern PIX * pixContrastTRC ( PIX *pixd, PIX *pixs, l_float32 factor ); -LEPT_DLL extern PIX * pixContrastTRCMasked ( PIX *pixd, PIX *pixs, PIX *pixm, l_float32 factor ); -LEPT_DLL extern NUMA * numaContrastTRC ( l_float32 factor ); -LEPT_DLL extern PIX * pixEqualizeTRC ( PIX *pixd, PIX *pixs, l_float32 fract, l_int32 factor ); -LEPT_DLL extern NUMA * numaEqualizeTRC ( PIX *pix, l_float32 fract, l_int32 factor ); -LEPT_DLL extern l_int32 pixTRCMap ( PIX *pixs, PIX *pixm, NUMA *na ); -LEPT_DLL extern l_int32 pixTRCMapGeneral ( PIX *pixs, PIX *pixm, NUMA *nar, NUMA *nag, NUMA *nab ); -LEPT_DLL extern PIX * pixUnsharpMasking ( PIX *pixs, l_int32 halfwidth, l_float32 fract ); -LEPT_DLL extern PIX * pixUnsharpMaskingGray ( PIX *pixs, l_int32 halfwidth, l_float32 fract ); -LEPT_DLL extern PIX * pixUnsharpMaskingFast ( PIX *pixs, l_int32 halfwidth, l_float32 fract, l_int32 direction ); -LEPT_DLL extern PIX * pixUnsharpMaskingGrayFast ( PIX *pixs, l_int32 halfwidth, l_float32 fract, l_int32 direction ); -LEPT_DLL extern PIX * pixUnsharpMaskingGray1D ( PIX *pixs, l_int32 halfwidth, l_float32 fract, l_int32 direction ); -LEPT_DLL extern PIX * pixUnsharpMaskingGray2D ( PIX *pixs, l_int32 halfwidth, l_float32 fract ); -LEPT_DLL extern PIX * pixModifyHue ( PIX *pixd, PIX *pixs, l_float32 fract ); -LEPT_DLL extern PIX * pixModifySaturation ( PIX *pixd, PIX *pixs, l_float32 fract ); -LEPT_DLL extern l_int32 pixMeasureSaturation ( PIX *pixs, l_int32 factor, l_float32 *psat ); -LEPT_DLL extern PIX * pixModifyBrightness ( PIX *pixd, PIX *pixs, l_float32 fract ); -LEPT_DLL extern PIX * pixMosaicColorShiftRGB ( PIX *pixs, l_float32 roff, l_float32 goff, l_float32 boff, l_float32 delta, l_int32 nincr ); -LEPT_DLL extern PIX * pixColorShiftRGB ( PIX *pixs, l_float32 rfract, l_float32 gfract, l_float32 bfract ); -LEPT_DLL extern PIX * pixDarkenGray ( PIX *pixd, PIX *pixs, l_int32 thresh, l_int32 satlimit ); -LEPT_DLL extern PIX * pixMultConstantColor ( PIX *pixs, l_float32 rfact, l_float32 gfact, l_float32 bfact ); -LEPT_DLL extern PIX * pixMultMatrixColor ( PIX *pixs, L_KERNEL *kel ); -LEPT_DLL extern PIX * pixHalfEdgeByBandpass ( PIX *pixs, l_int32 sm1h, l_int32 sm1v, l_int32 sm2h, l_int32 sm2v ); -LEPT_DLL extern l_ok fhmtautogen ( SELA *sela, l_int32 fileindex, const char *filename ); -LEPT_DLL extern l_ok fhmtautogen1 ( SELA *sela, l_int32 fileindex, const char *filename ); -LEPT_DLL extern l_ok fhmtautogen2 ( SELA *sela, l_int32 fileindex, const char *filename ); -LEPT_DLL extern PIX * pixHMTDwa_1 ( PIX *pixd, PIX *pixs, const char *selname ); -LEPT_DLL extern PIX * pixFHMTGen_1 ( PIX *pixd, PIX *pixs, const char *selname ); -LEPT_DLL extern l_int32 fhmtgen_low_1 ( l_uint32 *datad, l_int32 w, l_int32 h, l_int32 wpld, l_uint32 *datas, l_int32 wpls, l_int32 index ); -LEPT_DLL extern l_ok pixItalicWords ( PIX *pixs, BOXA *boxaw, PIX *pixw, BOXA **pboxa, l_int32 debugflag ); -LEPT_DLL extern PIX * pixOrientCorrect ( PIX *pixs, l_float32 minupconf, l_float32 minratio, l_float32 *pupconf, l_float32 *pleftconf, l_int32 *protation, l_int32 debug ); -LEPT_DLL extern l_ok pixOrientDetect ( PIX *pixs, l_float32 *pupconf, l_float32 *pleftconf, l_int32 mincount, l_int32 debug ); -LEPT_DLL extern l_ok makeOrientDecision ( l_float32 upconf, l_float32 leftconf, l_float32 minupconf, l_float32 minratio, l_int32 *porient, l_int32 debug ); -LEPT_DLL extern l_ok pixUpDownDetect ( PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug ); -LEPT_DLL extern l_ok pixUpDownDetectGeneral ( PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 npixels, l_int32 debug ); -LEPT_DLL extern l_ok pixOrientDetectDwa ( PIX *pixs, l_float32 *pupconf, l_float32 *pleftconf, l_int32 mincount, l_int32 debug ); -LEPT_DLL extern l_ok pixUpDownDetectDwa ( PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug ); -LEPT_DLL extern l_ok pixUpDownDetectGeneralDwa ( PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 npixels, l_int32 debug ); -LEPT_DLL extern l_ok pixMirrorDetect ( PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug ); -LEPT_DLL extern l_ok pixMirrorDetectDwa ( PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug ); -LEPT_DLL extern PIX * pixFlipFHMTGen ( PIX *pixd, PIX *pixs, const char *selname ); -LEPT_DLL extern l_ok fmorphautogen ( SELA *sela, l_int32 fileindex, const char *filename ); -LEPT_DLL extern l_ok fmorphautogen1 ( SELA *sela, l_int32 fileindex, const char *filename ); -LEPT_DLL extern l_int32 fmorphautogen2 ( SELA *sela, l_int32 fileindex, const char *filename ); -LEPT_DLL extern PIX * pixMorphDwa_1 ( PIX *pixd, PIX *pixs, l_int32 operation, char *selname ); -LEPT_DLL extern PIX * pixFMorphopGen_1 ( PIX *pixd, PIX *pixs, l_int32 operation, char *selname ); -LEPT_DLL extern l_int32 fmorphopgen_low_1 ( l_uint32 *datad, l_int32 w, l_int32 h, l_int32 wpld, l_uint32 *datas, l_int32 wpls, l_int32 index ); -LEPT_DLL extern FPIX * fpixCreate ( l_int32 width, l_int32 height ); -LEPT_DLL extern FPIX * fpixCreateTemplate ( FPIX *fpixs ); -LEPT_DLL extern FPIX * fpixClone ( FPIX *fpix ); -LEPT_DLL extern FPIX * fpixCopy ( FPIX *fpixd, FPIX *fpixs ); -LEPT_DLL extern l_ok fpixResizeImageData ( FPIX *fpixd, FPIX *fpixs ); -LEPT_DLL extern void fpixDestroy ( FPIX **pfpix ); -LEPT_DLL extern l_ok fpixGetDimensions ( FPIX *fpix, l_int32 *pw, l_int32 *ph ); -LEPT_DLL extern l_ok fpixSetDimensions ( FPIX *fpix, l_int32 w, l_int32 h ); -LEPT_DLL extern l_int32 fpixGetWpl ( FPIX *fpix ); -LEPT_DLL extern l_ok fpixSetWpl ( FPIX *fpix, l_int32 wpl ); -LEPT_DLL extern l_int32 fpixGetRefcount ( FPIX *fpix ); -LEPT_DLL extern l_ok fpixChangeRefcount ( FPIX *fpix, l_int32 delta ); -LEPT_DLL extern l_ok fpixGetResolution ( FPIX *fpix, l_int32 *pxres, l_int32 *pyres ); -LEPT_DLL extern l_ok fpixSetResolution ( FPIX *fpix, l_int32 xres, l_int32 yres ); -LEPT_DLL extern l_ok fpixCopyResolution ( FPIX *fpixd, FPIX *fpixs ); -LEPT_DLL extern l_float32 * fpixGetData ( FPIX *fpix ); -LEPT_DLL extern l_ok fpixSetData ( FPIX *fpix, l_float32 *data ); -LEPT_DLL extern l_ok fpixGetPixel ( FPIX *fpix, l_int32 x, l_int32 y, l_float32 *pval ); -LEPT_DLL extern l_ok fpixSetPixel ( FPIX *fpix, l_int32 x, l_int32 y, l_float32 val ); -LEPT_DLL extern FPIXA * fpixaCreate ( l_int32 n ); -LEPT_DLL extern FPIXA * fpixaCopy ( FPIXA *fpixa, l_int32 copyflag ); -LEPT_DLL extern void fpixaDestroy ( FPIXA **pfpixa ); -LEPT_DLL extern l_ok fpixaAddFPix ( FPIXA *fpixa, FPIX *fpix, l_int32 copyflag ); -LEPT_DLL extern l_int32 fpixaGetCount ( FPIXA *fpixa ); -LEPT_DLL extern l_ok fpixaChangeRefcount ( FPIXA *fpixa, l_int32 delta ); -LEPT_DLL extern FPIX * fpixaGetFPix ( FPIXA *fpixa, l_int32 index, l_int32 accesstype ); -LEPT_DLL extern l_ok fpixaGetFPixDimensions ( FPIXA *fpixa, l_int32 index, l_int32 *pw, l_int32 *ph ); -LEPT_DLL extern l_float32 * fpixaGetData ( FPIXA *fpixa, l_int32 index ); -LEPT_DLL extern l_ok fpixaGetPixel ( FPIXA *fpixa, l_int32 index, l_int32 x, l_int32 y, l_float32 *pval ); -LEPT_DLL extern l_ok fpixaSetPixel ( FPIXA *fpixa, l_int32 index, l_int32 x, l_int32 y, l_float32 val ); -LEPT_DLL extern DPIX * dpixCreate ( l_int32 width, l_int32 height ); -LEPT_DLL extern DPIX * dpixCreateTemplate ( DPIX *dpixs ); -LEPT_DLL extern DPIX * dpixClone ( DPIX *dpix ); -LEPT_DLL extern DPIX * dpixCopy ( DPIX *dpixd, DPIX *dpixs ); -LEPT_DLL extern l_ok dpixResizeImageData ( DPIX *dpixd, DPIX *dpixs ); -LEPT_DLL extern void dpixDestroy ( DPIX **pdpix ); -LEPT_DLL extern l_ok dpixGetDimensions ( DPIX *dpix, l_int32 *pw, l_int32 *ph ); -LEPT_DLL extern l_ok dpixSetDimensions ( DPIX *dpix, l_int32 w, l_int32 h ); -LEPT_DLL extern l_int32 dpixGetWpl ( DPIX *dpix ); -LEPT_DLL extern l_ok dpixSetWpl ( DPIX *dpix, l_int32 wpl ); -LEPT_DLL extern l_int32 dpixGetRefcount ( DPIX *dpix ); -LEPT_DLL extern l_ok dpixChangeRefcount ( DPIX *dpix, l_int32 delta ); -LEPT_DLL extern l_ok dpixGetResolution ( DPIX *dpix, l_int32 *pxres, l_int32 *pyres ); -LEPT_DLL extern l_ok dpixSetResolution ( DPIX *dpix, l_int32 xres, l_int32 yres ); -LEPT_DLL extern l_ok dpixCopyResolution ( DPIX *dpixd, DPIX *dpixs ); -LEPT_DLL extern l_float64 * dpixGetData ( DPIX *dpix ); -LEPT_DLL extern l_ok dpixSetData ( DPIX *dpix, l_float64 *data ); -LEPT_DLL extern l_ok dpixGetPixel ( DPIX *dpix, l_int32 x, l_int32 y, l_float64 *pval ); -LEPT_DLL extern l_ok dpixSetPixel ( DPIX *dpix, l_int32 x, l_int32 y, l_float64 val ); -LEPT_DLL extern FPIX * fpixRead ( const char *filename ); -LEPT_DLL extern FPIX * fpixReadStream ( FILE *fp ); -LEPT_DLL extern FPIX * fpixReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok fpixWrite ( const char *filename, FPIX *fpix ); -LEPT_DLL extern l_ok fpixWriteStream ( FILE *fp, FPIX *fpix ); -LEPT_DLL extern l_ok fpixWriteMem ( l_uint8 **pdata, size_t *psize, FPIX *fpix ); -LEPT_DLL extern FPIX * fpixEndianByteSwap ( FPIX *fpixd, FPIX *fpixs ); -LEPT_DLL extern DPIX * dpixRead ( const char *filename ); -LEPT_DLL extern DPIX * dpixReadStream ( FILE *fp ); -LEPT_DLL extern DPIX * dpixReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok dpixWrite ( const char *filename, DPIX *dpix ); -LEPT_DLL extern l_ok dpixWriteStream ( FILE *fp, DPIX *dpix ); -LEPT_DLL extern l_ok dpixWriteMem ( l_uint8 **pdata, size_t *psize, DPIX *dpix ); -LEPT_DLL extern DPIX * dpixEndianByteSwap ( DPIX *dpixd, DPIX *dpixs ); -LEPT_DLL extern l_ok fpixPrintStream ( FILE *fp, FPIX *fpix, l_int32 factor ); -LEPT_DLL extern FPIX * pixConvertToFPix ( PIX *pixs, l_int32 ncomps ); -LEPT_DLL extern DPIX * pixConvertToDPix ( PIX *pixs, l_int32 ncomps ); -LEPT_DLL extern PIX * fpixConvertToPix ( FPIX *fpixs, l_int32 outdepth, l_int32 negvals, l_int32 errorflag ); -LEPT_DLL extern PIX * fpixDisplayMaxDynamicRange ( FPIX *fpixs ); -LEPT_DLL extern DPIX * fpixConvertToDPix ( FPIX *fpix ); -LEPT_DLL extern PIX * dpixConvertToPix ( DPIX *dpixs, l_int32 outdepth, l_int32 negvals, l_int32 errorflag ); -LEPT_DLL extern FPIX * dpixConvertToFPix ( DPIX *dpix ); -LEPT_DLL extern l_ok fpixGetMin ( FPIX *fpix, l_float32 *pminval, l_int32 *pxminloc, l_int32 *pyminloc ); -LEPT_DLL extern l_ok fpixGetMax ( FPIX *fpix, l_float32 *pmaxval, l_int32 *pxmaxloc, l_int32 *pymaxloc ); -LEPT_DLL extern l_ok dpixGetMin ( DPIX *dpix, l_float64 *pminval, l_int32 *pxminloc, l_int32 *pyminloc ); -LEPT_DLL extern l_ok dpixGetMax ( DPIX *dpix, l_float64 *pmaxval, l_int32 *pxmaxloc, l_int32 *pymaxloc ); -LEPT_DLL extern FPIX * fpixScaleByInteger ( FPIX *fpixs, l_int32 factor ); -LEPT_DLL extern DPIX * dpixScaleByInteger ( DPIX *dpixs, l_int32 factor ); -LEPT_DLL extern FPIX * fpixLinearCombination ( FPIX *fpixd, FPIX *fpixs1, FPIX *fpixs2, l_float32 a, l_float32 b ); -LEPT_DLL extern l_ok fpixAddMultConstant ( FPIX *fpix, l_float32 addc, l_float32 multc ); -LEPT_DLL extern DPIX * dpixLinearCombination ( DPIX *dpixd, DPIX *dpixs1, DPIX *dpixs2, l_float32 a, l_float32 b ); -LEPT_DLL extern l_ok dpixAddMultConstant ( DPIX *dpix, l_float64 addc, l_float64 multc ); -LEPT_DLL extern l_ok fpixSetAllArbitrary ( FPIX *fpix, l_float32 inval ); -LEPT_DLL extern l_ok dpixSetAllArbitrary ( DPIX *dpix, l_float64 inval ); -LEPT_DLL extern FPIX * fpixAddBorder ( FPIX *fpixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot ); -LEPT_DLL extern FPIX * fpixRemoveBorder ( FPIX *fpixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot ); -LEPT_DLL extern FPIX * fpixAddMirroredBorder ( FPIX *fpixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot ); -LEPT_DLL extern FPIX * fpixAddContinuedBorder ( FPIX *fpixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot ); -LEPT_DLL extern FPIX * fpixAddSlopeBorder ( FPIX *fpixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot ); -LEPT_DLL extern l_ok fpixRasterop ( FPIX *fpixd, l_int32 dx, l_int32 dy, l_int32 dw, l_int32 dh, FPIX *fpixs, l_int32 sx, l_int32 sy ); -LEPT_DLL extern FPIX * fpixRotateOrth ( FPIX *fpixs, l_int32 quads ); -LEPT_DLL extern FPIX * fpixRotate180 ( FPIX *fpixd, FPIX *fpixs ); -LEPT_DLL extern FPIX * fpixRotate90 ( FPIX *fpixs, l_int32 direction ); -LEPT_DLL extern FPIX * fpixFlipLR ( FPIX *fpixd, FPIX *fpixs ); -LEPT_DLL extern FPIX * fpixFlipTB ( FPIX *fpixd, FPIX *fpixs ); -LEPT_DLL extern FPIX * fpixAffinePta ( FPIX *fpixs, PTA *ptad, PTA *ptas, l_int32 border, l_float32 inval ); -LEPT_DLL extern FPIX * fpixAffine ( FPIX *fpixs, l_float32 *vc, l_float32 inval ); -LEPT_DLL extern FPIX * fpixProjectivePta ( FPIX *fpixs, PTA *ptad, PTA *ptas, l_int32 border, l_float32 inval ); -LEPT_DLL extern FPIX * fpixProjective ( FPIX *fpixs, l_float32 *vc, l_float32 inval ); -LEPT_DLL extern l_ok linearInterpolatePixelFloat ( l_float32 *datas, l_int32 w, l_int32 h, l_float32 x, l_float32 y, l_float32 inval, l_float32 *pval ); -LEPT_DLL extern PIX * fpixThresholdToPix ( FPIX *fpix, l_float32 thresh ); -LEPT_DLL extern FPIX * pixComponentFunction ( PIX *pix, l_float32 rnum, l_float32 gnum, l_float32 bnum, l_float32 rdenom, l_float32 gdenom, l_float32 bdenom ); -LEPT_DLL extern PIX * pixReadStreamGif ( FILE *fp ); -LEPT_DLL extern PIX * pixReadMemGif ( const l_uint8 *cdata, size_t size ); -LEPT_DLL extern l_ok pixWriteStreamGif ( FILE *fp, PIX *pix ); -LEPT_DLL extern l_ok pixWriteMemGif ( l_uint8 **pdata, size_t *psize, PIX *pix ); -LEPT_DLL extern GPLOT * gplotCreate ( const char *rootname, l_int32 outformat, const char *title, const char *xlabel, const char *ylabel ); -LEPT_DLL extern void gplotDestroy ( GPLOT **pgplot ); -LEPT_DLL extern l_ok gplotAddPlot ( GPLOT *gplot, NUMA *nax, NUMA *nay, l_int32 plotstyle, const char *plotlabel ); -LEPT_DLL extern l_ok gplotSetScaling ( GPLOT *gplot, l_int32 scaling ); -LEPT_DLL extern PIX * gplotMakeOutputPix ( GPLOT *gplot ); -LEPT_DLL extern l_ok gplotMakeOutput ( GPLOT *gplot ); -LEPT_DLL extern l_ok gplotGenCommandFile ( GPLOT *gplot ); -LEPT_DLL extern l_ok gplotGenDataFiles ( GPLOT *gplot ); -LEPT_DLL extern l_ok gplotSimple1 ( NUMA *na, l_int32 outformat, const char *outroot, const char *title ); -LEPT_DLL extern l_ok gplotSimple2 ( NUMA *na1, NUMA *na2, l_int32 outformat, const char *outroot, const char *title ); -LEPT_DLL extern l_ok gplotSimpleN ( NUMAA *naa, l_int32 outformat, const char *outroot, const char *title ); -LEPT_DLL extern PIX * gplotSimplePix1 ( NUMA *na, const char *title ); -LEPT_DLL extern PIX * gplotSimplePix2 ( NUMA *na1, NUMA *na2, const char *title ); -LEPT_DLL extern PIX * gplotSimplePixN ( NUMAA *naa, const char *title ); -LEPT_DLL extern GPLOT * gplotSimpleXY1 ( NUMA *nax, NUMA *nay, l_int32 plotstyle, l_int32 outformat, const char *outroot, const char *title ); -LEPT_DLL extern GPLOT * gplotSimpleXY2 ( NUMA *nax, NUMA *nay1, NUMA *nay2, l_int32 plotstyle, l_int32 outformat, const char *outroot, const char *title ); -LEPT_DLL extern GPLOT * gplotSimpleXYN ( NUMA *nax, NUMAA *naay, l_int32 plotstyle, l_int32 outformat, const char *outroot, const char *title ); -LEPT_DLL extern PIX * gplotGeneralPix1 ( NUMA *na, l_int32 plotstyle, const char *rootname, const char *title, const char *xlabel, const char *ylabel ); -LEPT_DLL extern PIX * gplotGeneralPix2 ( NUMA *na1, NUMA *na2, l_int32 plotstyle, const char *rootname, const char *title, const char *xlabel, const char *ylabel ); -LEPT_DLL extern PIX * gplotGeneralPixN ( NUMA *nax, NUMAA *naay, l_int32 plotstyle, const char *rootname, const char *title, const char *xlabel, const char *ylabel ); -LEPT_DLL extern GPLOT * gplotRead ( const char *filename ); -LEPT_DLL extern l_ok gplotWrite ( const char *filename, GPLOT *gplot ); -LEPT_DLL extern PTA * generatePtaLine ( l_int32 x1, l_int32 y1, l_int32 x2, l_int32 y2 ); -LEPT_DLL extern PTA * generatePtaWideLine ( l_int32 x1, l_int32 y1, l_int32 x2, l_int32 y2, l_int32 width ); -LEPT_DLL extern PTA * generatePtaBox ( BOX *box, l_int32 width ); -LEPT_DLL extern PTA * generatePtaBoxa ( BOXA *boxa, l_int32 width, l_int32 removedups ); -LEPT_DLL extern PTA * generatePtaHashBox ( BOX *box, l_int32 spacing, l_int32 width, l_int32 orient, l_int32 outline ); -LEPT_DLL extern PTA * generatePtaHashBoxa ( BOXA *boxa, l_int32 spacing, l_int32 width, l_int32 orient, l_int32 outline, l_int32 removedups ); -LEPT_DLL extern PTAA * generatePtaaBoxa ( BOXA *boxa ); -LEPT_DLL extern PTAA * generatePtaaHashBoxa ( BOXA *boxa, l_int32 spacing, l_int32 width, l_int32 orient, l_int32 outline ); -LEPT_DLL extern PTA * generatePtaPolyline ( PTA *ptas, l_int32 width, l_int32 closeflag, l_int32 removedups ); -LEPT_DLL extern PTA * generatePtaGrid ( l_int32 w, l_int32 h, l_int32 nx, l_int32 ny, l_int32 width ); -LEPT_DLL extern PTA * convertPtaLineTo4cc ( PTA *ptas ); -LEPT_DLL extern PTA * generatePtaFilledCircle ( l_int32 radius ); -LEPT_DLL extern PTA * generatePtaFilledSquare ( l_int32 side ); -LEPT_DLL extern PTA * generatePtaLineFromPt ( l_int32 x, l_int32 y, l_float64 length, l_float64 radang ); -LEPT_DLL extern l_ok locatePtRadially ( l_int32 xr, l_int32 yr, l_float64 dist, l_float64 radang, l_float64 *px, l_float64 *py ); -LEPT_DLL extern l_ok pixRenderPlotFromNuma ( PIX **ppix, NUMA *na, l_int32 plotloc, l_int32 linewidth, l_int32 max, l_uint32 color ); -LEPT_DLL extern PTA * makePlotPtaFromNuma ( NUMA *na, l_int32 size, l_int32 plotloc, l_int32 linewidth, l_int32 max ); -LEPT_DLL extern l_ok pixRenderPlotFromNumaGen ( PIX **ppix, NUMA *na, l_int32 orient, l_int32 linewidth, l_int32 refpos, l_int32 max, l_int32 drawref, l_uint32 color ); -LEPT_DLL extern PTA * makePlotPtaFromNumaGen ( NUMA *na, l_int32 orient, l_int32 linewidth, l_int32 refpos, l_int32 max, l_int32 drawref ); -LEPT_DLL extern l_ok pixRenderPta ( PIX *pix, PTA *pta, l_int32 op ); -LEPT_DLL extern l_ok pixRenderPtaArb ( PIX *pix, PTA *pta, l_uint8 rval, l_uint8 gval, l_uint8 bval ); -LEPT_DLL extern l_ok pixRenderPtaBlend ( PIX *pix, PTA *pta, l_uint8 rval, l_uint8 gval, l_uint8 bval, l_float32 fract ); -LEPT_DLL extern l_ok pixRenderLine ( PIX *pix, l_int32 x1, l_int32 y1, l_int32 x2, l_int32 y2, l_int32 width, l_int32 op ); -LEPT_DLL extern l_ok pixRenderLineArb ( PIX *pix, l_int32 x1, l_int32 y1, l_int32 x2, l_int32 y2, l_int32 width, l_uint8 rval, l_uint8 gval, l_uint8 bval ); -LEPT_DLL extern l_ok pixRenderLineBlend ( PIX *pix, l_int32 x1, l_int32 y1, l_int32 x2, l_int32 y2, l_int32 width, l_uint8 rval, l_uint8 gval, l_uint8 bval, l_float32 fract ); -LEPT_DLL extern l_ok pixRenderBox ( PIX *pix, BOX *box, l_int32 width, l_int32 op ); -LEPT_DLL extern l_ok pixRenderBoxArb ( PIX *pix, BOX *box, l_int32 width, l_uint8 rval, l_uint8 gval, l_uint8 bval ); -LEPT_DLL extern l_ok pixRenderBoxBlend ( PIX *pix, BOX *box, l_int32 width, l_uint8 rval, l_uint8 gval, l_uint8 bval, l_float32 fract ); -LEPT_DLL extern l_ok pixRenderBoxa ( PIX *pix, BOXA *boxa, l_int32 width, l_int32 op ); -LEPT_DLL extern l_ok pixRenderBoxaArb ( PIX *pix, BOXA *boxa, l_int32 width, l_uint8 rval, l_uint8 gval, l_uint8 bval ); -LEPT_DLL extern l_ok pixRenderBoxaBlend ( PIX *pix, BOXA *boxa, l_int32 width, l_uint8 rval, l_uint8 gval, l_uint8 bval, l_float32 fract, l_int32 removedups ); -LEPT_DLL extern l_ok pixRenderHashBox ( PIX *pix, BOX *box, l_int32 spacing, l_int32 width, l_int32 orient, l_int32 outline, l_int32 op ); -LEPT_DLL extern l_ok pixRenderHashBoxArb ( PIX *pix, BOX *box, l_int32 spacing, l_int32 width, l_int32 orient, l_int32 outline, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern l_ok pixRenderHashBoxBlend ( PIX *pix, BOX *box, l_int32 spacing, l_int32 width, l_int32 orient, l_int32 outline, l_int32 rval, l_int32 gval, l_int32 bval, l_float32 fract ); -LEPT_DLL extern l_ok pixRenderHashMaskArb ( PIX *pix, PIX *pixm, l_int32 x, l_int32 y, l_int32 spacing, l_int32 width, l_int32 orient, l_int32 outline, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern l_ok pixRenderHashBoxa ( PIX *pix, BOXA *boxa, l_int32 spacing, l_int32 width, l_int32 orient, l_int32 outline, l_int32 op ); -LEPT_DLL extern l_ok pixRenderHashBoxaArb ( PIX *pix, BOXA *boxa, l_int32 spacing, l_int32 width, l_int32 orient, l_int32 outline, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern l_ok pixRenderHashBoxaBlend ( PIX *pix, BOXA *boxa, l_int32 spacing, l_int32 width, l_int32 orient, l_int32 outline, l_int32 rval, l_int32 gval, l_int32 bval, l_float32 fract ); -LEPT_DLL extern l_ok pixRenderPolyline ( PIX *pix, PTA *ptas, l_int32 width, l_int32 op, l_int32 closeflag ); -LEPT_DLL extern l_ok pixRenderPolylineArb ( PIX *pix, PTA *ptas, l_int32 width, l_uint8 rval, l_uint8 gval, l_uint8 bval, l_int32 closeflag ); -LEPT_DLL extern l_ok pixRenderPolylineBlend ( PIX *pix, PTA *ptas, l_int32 width, l_uint8 rval, l_uint8 gval, l_uint8 bval, l_float32 fract, l_int32 closeflag, l_int32 removedups ); -LEPT_DLL extern l_ok pixRenderGridArb ( PIX *pix, l_int32 nx, l_int32 ny, l_int32 width, l_uint8 rval, l_uint8 gval, l_uint8 bval ); -LEPT_DLL extern PIX * pixRenderRandomCmapPtaa ( PIX *pix, PTAA *ptaa, l_int32 polyflag, l_int32 width, l_int32 closeflag ); -LEPT_DLL extern PIX * pixRenderPolygon ( PTA *ptas, l_int32 width, l_int32 *pxmin, l_int32 *pymin ); -LEPT_DLL extern PIX * pixFillPolygon ( PIX *pixs, PTA *pta, l_int32 xmin, l_int32 ymin ); -LEPT_DLL extern PIX * pixRenderContours ( PIX *pixs, l_int32 startval, l_int32 incr, l_int32 outdepth ); -LEPT_DLL extern PIX * fpixAutoRenderContours ( FPIX *fpix, l_int32 ncontours ); -LEPT_DLL extern PIX * fpixRenderContours ( FPIX *fpixs, l_float32 incr, l_float32 proxim ); -LEPT_DLL extern PTA * pixGeneratePtaBoundary ( PIX *pixs, l_int32 width ); -LEPT_DLL extern PIX * pixErodeGray ( PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixDilateGray ( PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixOpenGray ( PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixCloseGray ( PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixErodeGray3 ( PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixDilateGray3 ( PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixOpenGray3 ( PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixCloseGray3 ( PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixDitherToBinary ( PIX *pixs ); -LEPT_DLL extern PIX * pixDitherToBinarySpec ( PIX *pixs, l_int32 lowerclip, l_int32 upperclip ); -LEPT_DLL extern void ditherToBinaryLineLow ( l_uint32 *lined, l_int32 w, l_uint32 *bufs1, l_uint32 *bufs2, l_int32 lowerclip, l_int32 upperclip, l_int32 lastlineflag ); -LEPT_DLL extern PIX * pixThresholdToBinary ( PIX *pixs, l_int32 thresh ); -LEPT_DLL extern void thresholdToBinaryLineLow ( l_uint32 *lined, l_int32 w, l_uint32 *lines, l_int32 d, l_int32 thresh ); -LEPT_DLL extern PIX * pixVarThresholdToBinary ( PIX *pixs, PIX *pixg ); -LEPT_DLL extern PIX * pixAdaptThresholdToBinary ( PIX *pixs, PIX *pixm, l_float32 gamma ); -LEPT_DLL extern PIX * pixAdaptThresholdToBinaryGen ( PIX *pixs, PIX *pixm, l_float32 gamma, l_int32 blackval, l_int32 whiteval, l_int32 thresh ); -LEPT_DLL extern PIX * pixGenerateMaskByValue ( PIX *pixs, l_int32 val, l_int32 usecmap ); -LEPT_DLL extern PIX * pixGenerateMaskByBand ( PIX *pixs, l_int32 lower, l_int32 upper, l_int32 inband, l_int32 usecmap ); -LEPT_DLL extern PIX * pixDitherTo2bpp ( PIX *pixs, l_int32 cmapflag ); -LEPT_DLL extern PIX * pixDitherTo2bppSpec ( PIX *pixs, l_int32 lowerclip, l_int32 upperclip, l_int32 cmapflag ); -LEPT_DLL extern PIX * pixThresholdTo2bpp ( PIX *pixs, l_int32 nlevels, l_int32 cmapflag ); -LEPT_DLL extern PIX * pixThresholdTo4bpp ( PIX *pixs, l_int32 nlevels, l_int32 cmapflag ); -LEPT_DLL extern PIX * pixThresholdOn8bpp ( PIX *pixs, l_int32 nlevels, l_int32 cmapflag ); -LEPT_DLL extern PIX * pixThresholdGrayArb ( PIX *pixs, const char *edgevals, l_int32 outdepth, l_int32 use_average, l_int32 setblack, l_int32 setwhite ); -LEPT_DLL extern l_int32 * makeGrayQuantIndexTable ( l_int32 nlevels ); -LEPT_DLL extern l_ok makeGrayQuantTableArb ( NUMA *na, l_int32 outdepth, l_int32 **ptab, PIXCMAP **pcmap ); -LEPT_DLL extern PIX * pixGenerateMaskByBand32 ( PIX *pixs, l_uint32 refval, l_int32 delm, l_int32 delp, l_float32 fractm, l_float32 fractp ); -LEPT_DLL extern PIX * pixGenerateMaskByDiscr32 ( PIX *pixs, l_uint32 refval1, l_uint32 refval2, l_int32 distflag ); -LEPT_DLL extern PIX * pixGrayQuantFromHisto ( PIX *pixd, PIX *pixs, PIX *pixm, l_float32 minfract, l_int32 maxsize ); -LEPT_DLL extern PIX * pixGrayQuantFromCmap ( PIX *pixs, PIXCMAP *cmap, l_int32 mindepth ); -LEPT_DLL extern L_HEAP * lheapCreate ( l_int32 n, l_int32 direction ); -LEPT_DLL extern void lheapDestroy ( L_HEAP **plh, l_int32 freeflag ); -LEPT_DLL extern l_ok lheapAdd ( L_HEAP *lh, void *item ); -LEPT_DLL extern void * lheapRemove ( L_HEAP *lh ); -LEPT_DLL extern l_int32 lheapGetCount ( L_HEAP *lh ); -LEPT_DLL extern void * lheapGetElement ( L_HEAP *lh, l_int32 index ); -LEPT_DLL extern l_ok lheapSort ( L_HEAP *lh ); -LEPT_DLL extern l_ok lheapSortStrictOrder ( L_HEAP *lh ); -LEPT_DLL extern l_ok lheapPrint ( FILE *fp, L_HEAP *lh ); -LEPT_DLL extern JBCLASSER * jbRankHausInit ( l_int32 components, l_int32 maxwidth, l_int32 maxheight, l_int32 size, l_float32 rank ); -LEPT_DLL extern JBCLASSER * jbCorrelationInit ( l_int32 components, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weightfactor ); -LEPT_DLL extern JBCLASSER * jbCorrelationInitWithoutComponents ( l_int32 components, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weightfactor ); -LEPT_DLL extern l_ok jbAddPages ( JBCLASSER *classer, SARRAY *safiles ); -LEPT_DLL extern l_ok jbAddPage ( JBCLASSER *classer, PIX *pixs ); -LEPT_DLL extern l_ok jbAddPageComponents ( JBCLASSER *classer, PIX *pixs, BOXA *boxas, PIXA *pixas ); -LEPT_DLL extern l_ok jbClassifyRankHaus ( JBCLASSER *classer, BOXA *boxa, PIXA *pixas ); -LEPT_DLL extern l_int32 pixHaustest ( PIX *pix1, PIX *pix2, PIX *pix3, PIX *pix4, l_float32 delx, l_float32 dely, l_int32 maxdiffw, l_int32 maxdiffh ); -LEPT_DLL extern l_int32 pixRankHaustest ( PIX *pix1, PIX *pix2, PIX *pix3, PIX *pix4, l_float32 delx, l_float32 dely, l_int32 maxdiffw, l_int32 maxdiffh, l_int32 area1, l_int32 area3, l_float32 rank, l_int32 *tab8 ); -LEPT_DLL extern l_ok jbClassifyCorrelation ( JBCLASSER *classer, BOXA *boxa, PIXA *pixas ); -LEPT_DLL extern l_ok jbGetComponents ( PIX *pixs, l_int32 components, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, PIXA **ppixad ); -LEPT_DLL extern l_ok pixWordMaskByDilation ( PIX *pixs, PIX **ppixm, l_int32 *psize, PIXA *pixadb ); -LEPT_DLL extern l_ok pixWordBoxesByDilation ( PIX *pixs, l_int32 minwidth, l_int32 minheight, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxa, l_int32 *psize, PIXA *pixadb ); -LEPT_DLL extern PIXA * jbAccumulateComposites ( PIXAA *pixaa, NUMA **pna, PTA **pptat ); -LEPT_DLL extern PIXA * jbTemplatesFromComposites ( PIXA *pixac, NUMA *na ); -LEPT_DLL extern JBCLASSER * jbClasserCreate ( l_int32 method, l_int32 components ); -LEPT_DLL extern void jbClasserDestroy ( JBCLASSER **pclasser ); -LEPT_DLL extern JBDATA * jbDataSave ( JBCLASSER *classer ); -LEPT_DLL extern void jbDataDestroy ( JBDATA **pdata ); -LEPT_DLL extern l_ok jbDataWrite ( const char *rootout, JBDATA *jbdata ); -LEPT_DLL extern JBDATA * jbDataRead ( const char *rootname ); -LEPT_DLL extern PIXA * jbDataRender ( JBDATA *data, l_int32 debugflag ); -LEPT_DLL extern l_ok jbGetULCorners ( JBCLASSER *classer, PIX *pixs, BOXA *boxa ); -LEPT_DLL extern l_ok jbGetLLCorners ( JBCLASSER *classer ); -LEPT_DLL extern l_ok readHeaderJp2k ( const char *filename, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp ); -LEPT_DLL extern l_ok freadHeaderJp2k ( FILE *fp, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp ); -LEPT_DLL extern l_ok readHeaderMemJp2k ( const l_uint8 *data, size_t size, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp ); -LEPT_DLL extern l_int32 fgetJp2kResolution ( FILE *fp, l_int32 *pxres, l_int32 *pyres ); -LEPT_DLL extern PIX * pixReadJp2k ( const char *filename, l_uint32 reduction, BOX *box, l_int32 hint, l_int32 debug ); -LEPT_DLL extern PIX * pixReadStreamJp2k ( FILE *fp, l_uint32 reduction, BOX *box, l_int32 hint, l_int32 debug ); -LEPT_DLL extern l_ok pixWriteJp2k ( const char *filename, PIX *pix, l_int32 quality, l_int32 nlevels, l_int32 hint, l_int32 debug ); -LEPT_DLL extern l_ok pixWriteStreamJp2k ( FILE *fp, PIX *pix, l_int32 quality, l_int32 nlevels, l_int32 hint, l_int32 debug ); -LEPT_DLL extern PIX * pixReadMemJp2k ( const l_uint8 *data, size_t size, l_uint32 reduction, BOX *box, l_int32 hint, l_int32 debug ); -LEPT_DLL extern l_ok pixWriteMemJp2k ( l_uint8 **pdata, size_t *psize, PIX *pix, l_int32 quality, l_int32 nlevels, l_int32 hint, l_int32 debug ); -LEPT_DLL extern PIX * pixReadJpeg ( const char *filename, l_int32 cmapflag, l_int32 reduction, l_int32 *pnwarn, l_int32 hint ); -LEPT_DLL extern PIX * pixReadStreamJpeg ( FILE *fp, l_int32 cmapflag, l_int32 reduction, l_int32 *pnwarn, l_int32 hint ); -LEPT_DLL extern l_ok readHeaderJpeg ( const char *filename, l_int32 *pw, l_int32 *ph, l_int32 *pspp, l_int32 *pycck, l_int32 *pcmyk ); -LEPT_DLL extern l_ok freadHeaderJpeg ( FILE *fp, l_int32 *pw, l_int32 *ph, l_int32 *pspp, l_int32 *pycck, l_int32 *pcmyk ); -LEPT_DLL extern l_int32 fgetJpegResolution ( FILE *fp, l_int32 *pxres, l_int32 *pyres ); -LEPT_DLL extern l_int32 fgetJpegComment ( FILE *fp, l_uint8 **pcomment ); -LEPT_DLL extern l_ok pixWriteJpeg ( const char *filename, PIX *pix, l_int32 quality, l_int32 progressive ); -LEPT_DLL extern l_ok pixWriteStreamJpeg ( FILE *fp, PIX *pixs, l_int32 quality, l_int32 progressive ); -LEPT_DLL extern PIX * pixReadMemJpeg ( const l_uint8 *data, size_t size, l_int32 cmflag, l_int32 reduction, l_int32 *pnwarn, l_int32 hint ); -LEPT_DLL extern l_ok readHeaderMemJpeg ( const l_uint8 *data, size_t size, l_int32 *pw, l_int32 *ph, l_int32 *pspp, l_int32 *pycck, l_int32 *pcmyk ); -LEPT_DLL extern l_ok readResolutionMemJpeg ( const l_uint8 *data, size_t size, l_int32 *pxres, l_int32 *pyres ); -LEPT_DLL extern l_ok pixWriteMemJpeg ( l_uint8 **pdata, size_t *psize, PIX *pix, l_int32 quality, l_int32 progressive ); -LEPT_DLL extern l_ok pixSetChromaSampling ( PIX *pix, l_int32 sampling ); -LEPT_DLL extern L_KERNEL * kernelCreate ( l_int32 height, l_int32 width ); -LEPT_DLL extern void kernelDestroy ( L_KERNEL **pkel ); -LEPT_DLL extern L_KERNEL * kernelCopy ( L_KERNEL *kels ); -LEPT_DLL extern l_ok kernelGetElement ( L_KERNEL *kel, l_int32 row, l_int32 col, l_float32 *pval ); -LEPT_DLL extern l_ok kernelSetElement ( L_KERNEL *kel, l_int32 row, l_int32 col, l_float32 val ); -LEPT_DLL extern l_ok kernelGetParameters ( L_KERNEL *kel, l_int32 *psy, l_int32 *psx, l_int32 *pcy, l_int32 *pcx ); -LEPT_DLL extern l_ok kernelSetOrigin ( L_KERNEL *kel, l_int32 cy, l_int32 cx ); -LEPT_DLL extern l_ok kernelGetSum ( L_KERNEL *kel, l_float32 *psum ); -LEPT_DLL extern l_ok kernelGetMinMax ( L_KERNEL *kel, l_float32 *pmin, l_float32 *pmax ); -LEPT_DLL extern L_KERNEL * kernelNormalize ( L_KERNEL *kels, l_float32 normsum ); -LEPT_DLL extern L_KERNEL * kernelInvert ( L_KERNEL *kels ); -LEPT_DLL extern l_float32 ** create2dFloatArray ( l_int32 sy, l_int32 sx ); -LEPT_DLL extern L_KERNEL * kernelRead ( const char *fname ); -LEPT_DLL extern L_KERNEL * kernelReadStream ( FILE *fp ); -LEPT_DLL extern l_ok kernelWrite ( const char *fname, L_KERNEL *kel ); -LEPT_DLL extern l_ok kernelWriteStream ( FILE *fp, L_KERNEL *kel ); -LEPT_DLL extern L_KERNEL * kernelCreateFromString ( l_int32 h, l_int32 w, l_int32 cy, l_int32 cx, const char *kdata ); -LEPT_DLL extern L_KERNEL * kernelCreateFromFile ( const char *filename ); -LEPT_DLL extern L_KERNEL * kernelCreateFromPix ( PIX *pix, l_int32 cy, l_int32 cx ); -LEPT_DLL extern PIX * kernelDisplayInPix ( L_KERNEL *kel, l_int32 size, l_int32 gthick ); -LEPT_DLL extern NUMA * parseStringForNumbers ( const char *str, const char *seps ); -LEPT_DLL extern L_KERNEL * makeFlatKernel ( l_int32 height, l_int32 width, l_int32 cy, l_int32 cx ); -LEPT_DLL extern L_KERNEL * makeGaussianKernel ( l_int32 halfh, l_int32 halfw, l_float32 stdev, l_float32 max ); -LEPT_DLL extern l_ok makeGaussianKernelSep ( l_int32 halfh, l_int32 halfw, l_float32 stdev, l_float32 max, L_KERNEL **pkelx, L_KERNEL **pkely ); -LEPT_DLL extern L_KERNEL * makeDoGKernel ( l_int32 halfh, l_int32 halfw, l_float32 stdev, l_float32 ratio ); -LEPT_DLL extern char * getImagelibVersions ( void ); -LEPT_DLL extern void listDestroy ( DLLIST **phead ); -LEPT_DLL extern l_ok listAddToHead ( DLLIST **phead, void *data ); -LEPT_DLL extern l_ok listAddToTail ( DLLIST **phead, DLLIST **ptail, void *data ); -LEPT_DLL extern l_ok listInsertBefore ( DLLIST **phead, DLLIST *elem, void *data ); -LEPT_DLL extern l_ok listInsertAfter ( DLLIST **phead, DLLIST *elem, void *data ); -LEPT_DLL extern void * listRemoveElement ( DLLIST **phead, DLLIST *elem ); -LEPT_DLL extern void * listRemoveFromHead ( DLLIST **phead ); -LEPT_DLL extern void * listRemoveFromTail ( DLLIST **phead, DLLIST **ptail ); -LEPT_DLL extern DLLIST * listFindElement ( DLLIST *head, void *data ); -LEPT_DLL extern DLLIST * listFindTail ( DLLIST *head ); -LEPT_DLL extern l_int32 listGetCount ( DLLIST *head ); -LEPT_DLL extern l_ok listReverse ( DLLIST **phead ); -LEPT_DLL extern l_ok listJoin ( DLLIST **phead1, DLLIST **phead2 ); -LEPT_DLL extern L_AMAP * l_amapCreate ( l_int32 keytype ); -LEPT_DLL extern RB_TYPE * l_amapFind ( L_AMAP *m, RB_TYPE key ); -LEPT_DLL extern void l_amapInsert ( L_AMAP *m, RB_TYPE key, RB_TYPE value ); -LEPT_DLL extern void l_amapDelete ( L_AMAP *m, RB_TYPE key ); -LEPT_DLL extern void l_amapDestroy ( L_AMAP **pm ); -LEPT_DLL extern L_AMAP_NODE * l_amapGetFirst ( L_AMAP *m ); -LEPT_DLL extern L_AMAP_NODE * l_amapGetNext ( L_AMAP_NODE *n ); -LEPT_DLL extern L_AMAP_NODE * l_amapGetLast ( L_AMAP *m ); -LEPT_DLL extern L_AMAP_NODE * l_amapGetPrev ( L_AMAP_NODE *n ); -LEPT_DLL extern l_int32 l_amapSize ( L_AMAP *m ); -LEPT_DLL extern L_ASET * l_asetCreate ( l_int32 keytype ); -LEPT_DLL extern RB_TYPE * l_asetFind ( L_ASET *s, RB_TYPE key ); -LEPT_DLL extern void l_asetInsert ( L_ASET *s, RB_TYPE key ); -LEPT_DLL extern void l_asetDelete ( L_ASET *s, RB_TYPE key ); -LEPT_DLL extern void l_asetDestroy ( L_ASET **ps ); -LEPT_DLL extern L_ASET_NODE * l_asetGetFirst ( L_ASET *s ); -LEPT_DLL extern L_ASET_NODE * l_asetGetNext ( L_ASET_NODE *n ); -LEPT_DLL extern L_ASET_NODE * l_asetGetLast ( L_ASET *s ); -LEPT_DLL extern L_ASET_NODE * l_asetGetPrev ( L_ASET_NODE *n ); -LEPT_DLL extern l_int32 l_asetSize ( L_ASET *s ); -LEPT_DLL extern PIX * generateBinaryMaze ( l_int32 w, l_int32 h, l_int32 xi, l_int32 yi, l_float32 wallps, l_float32 ranis ); -LEPT_DLL extern PTA * pixSearchBinaryMaze ( PIX *pixs, l_int32 xi, l_int32 yi, l_int32 xf, l_int32 yf, PIX **ppixd ); -LEPT_DLL extern PTA * pixSearchGrayMaze ( PIX *pixs, l_int32 xi, l_int32 yi, l_int32 xf, l_int32 yf, PIX **ppixd ); -LEPT_DLL extern PIX * pixDilate ( PIX *pixd, PIX *pixs, SEL *sel ); -LEPT_DLL extern PIX * pixErode ( PIX *pixd, PIX *pixs, SEL *sel ); -LEPT_DLL extern PIX * pixHMT ( PIX *pixd, PIX *pixs, SEL *sel ); -LEPT_DLL extern PIX * pixOpen ( PIX *pixd, PIX *pixs, SEL *sel ); -LEPT_DLL extern PIX * pixClose ( PIX *pixd, PIX *pixs, SEL *sel ); -LEPT_DLL extern PIX * pixCloseSafe ( PIX *pixd, PIX *pixs, SEL *sel ); -LEPT_DLL extern PIX * pixOpenGeneralized ( PIX *pixd, PIX *pixs, SEL *sel ); -LEPT_DLL extern PIX * pixCloseGeneralized ( PIX *pixd, PIX *pixs, SEL *sel ); -LEPT_DLL extern PIX * pixDilateBrick ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixErodeBrick ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixOpenBrick ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixCloseBrick ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixCloseSafeBrick ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern l_int32 selectComposableSels ( l_int32 size, l_int32 direction, SEL **psel1, SEL **psel2 ); -LEPT_DLL extern l_ok selectComposableSizes ( l_int32 size, l_int32 *pfactor1, l_int32 *pfactor2 ); -LEPT_DLL extern PIX * pixDilateCompBrick ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixErodeCompBrick ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixOpenCompBrick ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixCloseCompBrick ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixCloseSafeCompBrick ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern void resetMorphBoundaryCondition ( l_int32 bc ); -LEPT_DLL extern l_uint32 getMorphBorderPixelColor ( l_int32 type, l_int32 depth ); -LEPT_DLL extern PIX * pixExtractBoundary ( PIX *pixs, l_int32 type ); -LEPT_DLL extern PIX * pixMorphSequenceMasked ( PIX *pixs, PIX *pixm, const char *sequence, l_int32 dispsep ); -LEPT_DLL extern PIX * pixMorphSequenceByComponent ( PIX *pixs, const char *sequence, l_int32 connectivity, l_int32 minw, l_int32 minh, BOXA **pboxa ); -LEPT_DLL extern PIXA * pixaMorphSequenceByComponent ( PIXA *pixas, const char *sequence, l_int32 minw, l_int32 minh ); -LEPT_DLL extern PIX * pixMorphSequenceByRegion ( PIX *pixs, PIX *pixm, const char *sequence, l_int32 connectivity, l_int32 minw, l_int32 minh, BOXA **pboxa ); -LEPT_DLL extern PIXA * pixaMorphSequenceByRegion ( PIX *pixs, PIXA *pixam, const char *sequence, l_int32 minw, l_int32 minh ); -LEPT_DLL extern PIX * pixUnionOfMorphOps ( PIX *pixs, SELA *sela, l_int32 type ); -LEPT_DLL extern PIX * pixIntersectionOfMorphOps ( PIX *pixs, SELA *sela, l_int32 type ); -LEPT_DLL extern PIX * pixSelectiveConnCompFill ( PIX *pixs, l_int32 connectivity, l_int32 minw, l_int32 minh ); -LEPT_DLL extern l_ok pixRemoveMatchedPattern ( PIX *pixs, PIX *pixp, PIX *pixe, l_int32 x0, l_int32 y0, l_int32 dsize ); -LEPT_DLL extern PIX * pixDisplayMatchedPattern ( PIX *pixs, PIX *pixp, PIX *pixe, l_int32 x0, l_int32 y0, l_uint32 color, l_float32 scale, l_int32 nlevels ); -LEPT_DLL extern PIXA * pixaExtendByMorph ( PIXA *pixas, l_int32 type, l_int32 niters, SEL *sel, l_int32 include ); -LEPT_DLL extern PIXA * pixaExtendByScaling ( PIXA *pixas, NUMA *nasc, l_int32 type, l_int32 include ); -LEPT_DLL extern PIX * pixSeedfillMorph ( PIX *pixs, PIX *pixm, l_int32 maxiters, l_int32 connectivity ); -LEPT_DLL extern NUMA * pixRunHistogramMorph ( PIX *pixs, l_int32 runtype, l_int32 direction, l_int32 maxsize ); -LEPT_DLL extern PIX * pixTophat ( PIX *pixs, l_int32 hsize, l_int32 vsize, l_int32 type ); -LEPT_DLL extern PIX * pixHDome ( PIX *pixs, l_int32 height, l_int32 connectivity ); -LEPT_DLL extern PIX * pixFastTophat ( PIX *pixs, l_int32 xsize, l_int32 ysize, l_int32 type ); -LEPT_DLL extern PIX * pixMorphGradient ( PIX *pixs, l_int32 hsize, l_int32 vsize, l_int32 smoothing ); -LEPT_DLL extern PTA * pixaCentroids ( PIXA *pixa ); -LEPT_DLL extern l_ok pixCentroid ( PIX *pix, l_int32 *centtab, l_int32 *sumtab, l_float32 *pxave, l_float32 *pyave ); -LEPT_DLL extern PIX * pixDilateBrickDwa ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixErodeBrickDwa ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixOpenBrickDwa ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixCloseBrickDwa ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixDilateCompBrickDwa ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixErodeCompBrickDwa ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixOpenCompBrickDwa ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixCloseCompBrickDwa ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixDilateCompBrickExtendDwa ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixErodeCompBrickExtendDwa ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixOpenCompBrickExtendDwa ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern PIX * pixCloseCompBrickExtendDwa ( PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern l_ok getExtendedCompositeParameters ( l_int32 size, l_int32 *pn, l_int32 *pextra, l_int32 *pactualsize ); -LEPT_DLL extern PIX * pixMorphSequence ( PIX *pixs, const char *sequence, l_int32 dispsep ); -LEPT_DLL extern PIX * pixMorphCompSequence ( PIX *pixs, const char *sequence, l_int32 dispsep ); -LEPT_DLL extern PIX * pixMorphSequenceDwa ( PIX *pixs, const char *sequence, l_int32 dispsep ); -LEPT_DLL extern PIX * pixMorphCompSequenceDwa ( PIX *pixs, const char *sequence, l_int32 dispsep ); -LEPT_DLL extern l_int32 morphSequenceVerify ( SARRAY *sa ); -LEPT_DLL extern PIX * pixGrayMorphSequence ( PIX *pixs, const char *sequence, l_int32 dispsep, l_int32 dispy ); -LEPT_DLL extern PIX * pixColorMorphSequence ( PIX *pixs, const char *sequence, l_int32 dispsep, l_int32 dispy ); -LEPT_DLL extern NUMA * numaCreate ( l_int32 n ); -LEPT_DLL extern NUMA * numaCreateFromIArray ( l_int32 *iarray, l_int32 size ); -LEPT_DLL extern NUMA * numaCreateFromFArray ( l_float32 *farray, l_int32 size, l_int32 copyflag ); -LEPT_DLL extern NUMA * numaCreateFromString ( const char *str ); -LEPT_DLL extern void numaDestroy ( NUMA **pna ); -LEPT_DLL extern NUMA * numaCopy ( NUMA *na ); -LEPT_DLL extern NUMA * numaClone ( NUMA *na ); -LEPT_DLL extern l_ok numaEmpty ( NUMA *na ); -LEPT_DLL extern l_ok numaAddNumber ( NUMA *na, l_float32 val ); -LEPT_DLL extern l_ok numaInsertNumber ( NUMA *na, l_int32 index, l_float32 val ); -LEPT_DLL extern l_ok numaRemoveNumber ( NUMA *na, l_int32 index ); -LEPT_DLL extern l_ok numaReplaceNumber ( NUMA *na, l_int32 index, l_float32 val ); -LEPT_DLL extern l_int32 numaGetCount ( NUMA *na ); -LEPT_DLL extern l_ok numaSetCount ( NUMA *na, l_int32 newcount ); -LEPT_DLL extern l_ok numaGetFValue ( NUMA *na, l_int32 index, l_float32 *pval ); -LEPT_DLL extern l_ok numaGetIValue ( NUMA *na, l_int32 index, l_int32 *pival ); -LEPT_DLL extern l_ok numaSetValue ( NUMA *na, l_int32 index, l_float32 val ); -LEPT_DLL extern l_ok numaShiftValue ( NUMA *na, l_int32 index, l_float32 diff ); -LEPT_DLL extern l_int32 * numaGetIArray ( NUMA *na ); -LEPT_DLL extern l_float32 * numaGetFArray ( NUMA *na, l_int32 copyflag ); -LEPT_DLL extern l_int32 numaGetRefcount ( NUMA *na ); -LEPT_DLL extern l_ok numaChangeRefcount ( NUMA *na, l_int32 delta ); -LEPT_DLL extern l_ok numaGetParameters ( NUMA *na, l_float32 *pstartx, l_float32 *pdelx ); -LEPT_DLL extern l_ok numaSetParameters ( NUMA *na, l_float32 startx, l_float32 delx ); -LEPT_DLL extern l_ok numaCopyParameters ( NUMA *nad, NUMA *nas ); -LEPT_DLL extern SARRAY * numaConvertToSarray ( NUMA *na, l_int32 size1, l_int32 size2, l_int32 addzeros, l_int32 type ); -LEPT_DLL extern NUMA * numaRead ( const char *filename ); -LEPT_DLL extern NUMA * numaReadStream ( FILE *fp ); -LEPT_DLL extern NUMA * numaReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok numaWriteDebug ( const char *filename, NUMA *na ); -LEPT_DLL extern l_ok numaWrite ( const char *filename, NUMA *na ); -LEPT_DLL extern l_ok numaWriteStream ( FILE *fp, NUMA *na ); -LEPT_DLL extern l_ok numaWriteStderr ( NUMA *na ); -LEPT_DLL extern l_ok numaWriteMem ( l_uint8 **pdata, size_t *psize, NUMA *na ); -LEPT_DLL extern NUMAA * numaaCreate ( l_int32 n ); -LEPT_DLL extern NUMAA * numaaCreateFull ( l_int32 nptr, l_int32 n ); -LEPT_DLL extern l_ok numaaTruncate ( NUMAA *naa ); -LEPT_DLL extern void numaaDestroy ( NUMAA **pnaa ); -LEPT_DLL extern l_ok numaaAddNuma ( NUMAA *naa, NUMA *na, l_int32 copyflag ); -LEPT_DLL extern l_int32 numaaGetCount ( NUMAA *naa ); -LEPT_DLL extern l_int32 numaaGetNumaCount ( NUMAA *naa, l_int32 index ); -LEPT_DLL extern l_int32 numaaGetNumberCount ( NUMAA *naa ); -LEPT_DLL extern NUMA ** numaaGetPtrArray ( NUMAA *naa ); -LEPT_DLL extern NUMA * numaaGetNuma ( NUMAA *naa, l_int32 index, l_int32 accessflag ); -LEPT_DLL extern l_ok numaaReplaceNuma ( NUMAA *naa, l_int32 index, NUMA *na ); -LEPT_DLL extern l_ok numaaGetValue ( NUMAA *naa, l_int32 i, l_int32 j, l_float32 *pfval, l_int32 *pival ); -LEPT_DLL extern l_ok numaaAddNumber ( NUMAA *naa, l_int32 index, l_float32 val ); -LEPT_DLL extern NUMAA * numaaRead ( const char *filename ); -LEPT_DLL extern NUMAA * numaaReadStream ( FILE *fp ); -LEPT_DLL extern NUMAA * numaaReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok numaaWrite ( const char *filename, NUMAA *naa ); -LEPT_DLL extern l_ok numaaWriteStream ( FILE *fp, NUMAA *naa ); -LEPT_DLL extern l_ok numaaWriteMem ( l_uint8 **pdata, size_t *psize, NUMAA *naa ); -LEPT_DLL extern NUMA * numaArithOp ( NUMA *nad, NUMA *na1, NUMA *na2, l_int32 op ); -LEPT_DLL extern NUMA * numaLogicalOp ( NUMA *nad, NUMA *na1, NUMA *na2, l_int32 op ); -LEPT_DLL extern NUMA * numaInvert ( NUMA *nad, NUMA *nas ); -LEPT_DLL extern l_int32 numaSimilar ( NUMA *na1, NUMA *na2, l_float32 maxdiff, l_int32 *psimilar ); -LEPT_DLL extern l_ok numaAddToNumber ( NUMA *na, l_int32 index, l_float32 val ); -LEPT_DLL extern l_ok numaGetMin ( NUMA *na, l_float32 *pminval, l_int32 *piminloc ); -LEPT_DLL extern l_ok numaGetMax ( NUMA *na, l_float32 *pmaxval, l_int32 *pimaxloc ); -LEPT_DLL extern l_ok numaGetSum ( NUMA *na, l_float32 *psum ); -LEPT_DLL extern NUMA * numaGetPartialSums ( NUMA *na ); -LEPT_DLL extern l_ok numaGetSumOnInterval ( NUMA *na, l_int32 first, l_int32 last, l_float32 *psum ); -LEPT_DLL extern l_ok numaHasOnlyIntegers ( NUMA *na, l_int32 maxsamples, l_int32 *pallints ); -LEPT_DLL extern NUMA * numaSubsample ( NUMA *nas, l_int32 subfactor ); -LEPT_DLL extern NUMA * numaMakeDelta ( NUMA *nas ); -LEPT_DLL extern NUMA * numaMakeSequence ( l_float32 startval, l_float32 increment, l_int32 size ); -LEPT_DLL extern NUMA * numaMakeConstant ( l_float32 val, l_int32 size ); -LEPT_DLL extern NUMA * numaMakeAbsValue ( NUMA *nad, NUMA *nas ); -LEPT_DLL extern NUMA * numaAddBorder ( NUMA *nas, l_int32 left, l_int32 right, l_float32 val ); -LEPT_DLL extern NUMA * numaAddSpecifiedBorder ( NUMA *nas, l_int32 left, l_int32 right, l_int32 type ); -LEPT_DLL extern NUMA * numaRemoveBorder ( NUMA *nas, l_int32 left, l_int32 right ); -LEPT_DLL extern l_ok numaCountNonzeroRuns ( NUMA *na, l_int32 *pcount ); -LEPT_DLL extern l_ok numaGetNonzeroRange ( NUMA *na, l_float32 eps, l_int32 *pfirst, l_int32 *plast ); -LEPT_DLL extern l_ok numaGetCountRelativeToZero ( NUMA *na, l_int32 type, l_int32 *pcount ); -LEPT_DLL extern NUMA * numaClipToInterval ( NUMA *nas, l_int32 first, l_int32 last ); -LEPT_DLL extern NUMA * numaMakeThresholdIndicator ( NUMA *nas, l_float32 thresh, l_int32 type ); -LEPT_DLL extern NUMA * numaUniformSampling ( NUMA *nas, l_int32 nsamp ); -LEPT_DLL extern NUMA * numaReverse ( NUMA *nad, NUMA *nas ); -LEPT_DLL extern NUMA * numaLowPassIntervals ( NUMA *nas, l_float32 thresh, l_float32 maxn ); -LEPT_DLL extern NUMA * numaThresholdEdges ( NUMA *nas, l_float32 thresh1, l_float32 thresh2, l_float32 maxn ); -LEPT_DLL extern l_int32 numaGetSpanValues ( NUMA *na, l_int32 span, l_int32 *pstart, l_int32 *pend ); -LEPT_DLL extern l_int32 numaGetEdgeValues ( NUMA *na, l_int32 edge, l_int32 *pstart, l_int32 *pend, l_int32 *psign ); -LEPT_DLL extern l_ok numaInterpolateEqxVal ( l_float32 startx, l_float32 deltax, NUMA *nay, l_int32 type, l_float32 xval, l_float32 *pyval ); -LEPT_DLL extern l_ok numaInterpolateArbxVal ( NUMA *nax, NUMA *nay, l_int32 type, l_float32 xval, l_float32 *pyval ); -LEPT_DLL extern l_ok numaInterpolateEqxInterval ( l_float32 startx, l_float32 deltax, NUMA *nasy, l_int32 type, l_float32 x0, l_float32 x1, l_int32 npts, NUMA **pnax, NUMA **pnay ); -LEPT_DLL extern l_ok numaInterpolateArbxInterval ( NUMA *nax, NUMA *nay, l_int32 type, l_float32 x0, l_float32 x1, l_int32 npts, NUMA **pnadx, NUMA **pnady ); -LEPT_DLL extern l_ok numaFitMax ( NUMA *na, l_float32 *pmaxval, NUMA *naloc, l_float32 *pmaxloc ); -LEPT_DLL extern l_ok numaDifferentiateInterval ( NUMA *nax, NUMA *nay, l_float32 x0, l_float32 x1, l_int32 npts, NUMA **pnadx, NUMA **pnady ); -LEPT_DLL extern l_ok numaIntegrateInterval ( NUMA *nax, NUMA *nay, l_float32 x0, l_float32 x1, l_int32 npts, l_float32 *psum ); -LEPT_DLL extern l_ok numaSortGeneral ( NUMA *na, NUMA **pnasort, NUMA **pnaindex, NUMA **pnainvert, l_int32 sortorder, l_int32 sorttype ); -LEPT_DLL extern NUMA * numaSortAutoSelect ( NUMA *nas, l_int32 sortorder ); -LEPT_DLL extern NUMA * numaSortIndexAutoSelect ( NUMA *nas, l_int32 sortorder ); -LEPT_DLL extern l_int32 numaChooseSortType ( NUMA *nas ); -LEPT_DLL extern NUMA * numaSort ( NUMA *naout, NUMA *nain, l_int32 sortorder ); -LEPT_DLL extern NUMA * numaBinSort ( NUMA *nas, l_int32 sortorder ); -LEPT_DLL extern NUMA * numaGetSortIndex ( NUMA *na, l_int32 sortorder ); -LEPT_DLL extern NUMA * numaGetBinSortIndex ( NUMA *nas, l_int32 sortorder ); -LEPT_DLL extern NUMA * numaSortByIndex ( NUMA *nas, NUMA *naindex ); -LEPT_DLL extern l_int32 numaIsSorted ( NUMA *nas, l_int32 sortorder, l_int32 *psorted ); -LEPT_DLL extern l_ok numaSortPair ( NUMA *nax, NUMA *nay, l_int32 sortorder, NUMA **pnasx, NUMA **pnasy ); -LEPT_DLL extern NUMA * numaInvertMap ( NUMA *nas ); -LEPT_DLL extern NUMA * numaPseudorandomSequence ( l_int32 size, l_int32 seed ); -LEPT_DLL extern NUMA * numaRandomPermutation ( NUMA *nas, l_int32 seed ); -LEPT_DLL extern l_ok numaGetRankValue ( NUMA *na, l_float32 fract, NUMA *nasort, l_int32 usebins, l_float32 *pval ); -LEPT_DLL extern l_ok numaGetMedian ( NUMA *na, l_float32 *pval ); -LEPT_DLL extern l_ok numaGetBinnedMedian ( NUMA *na, l_int32 *pval ); -LEPT_DLL extern l_ok numaGetMeanDevFromMedian ( NUMA *na, l_float32 med, l_float32 *pdev ); -LEPT_DLL extern l_ok numaGetMedianDevFromMedian ( NUMA *na, l_float32 *pmed, l_float32 *pdev ); -LEPT_DLL extern l_ok numaGetMode ( NUMA *na, l_float32 *pval, l_int32 *pcount ); -LEPT_DLL extern l_ok numaJoin ( NUMA *nad, NUMA *nas, l_int32 istart, l_int32 iend ); -LEPT_DLL extern l_ok numaaJoin ( NUMAA *naad, NUMAA *naas, l_int32 istart, l_int32 iend ); -LEPT_DLL extern NUMA * numaaFlattenToNuma ( NUMAA *naa ); -LEPT_DLL extern NUMA * numaErode ( NUMA *nas, l_int32 size ); -LEPT_DLL extern NUMA * numaDilate ( NUMA *nas, l_int32 size ); -LEPT_DLL extern NUMA * numaOpen ( NUMA *nas, l_int32 size ); -LEPT_DLL extern NUMA * numaClose ( NUMA *nas, l_int32 size ); -LEPT_DLL extern NUMA * numaTransform ( NUMA *nas, l_float32 shift, l_float32 scale ); -LEPT_DLL extern l_ok numaSimpleStats ( NUMA *na, l_int32 first, l_int32 last, l_float32 *pmean, l_float32 *pvar, l_float32 *prvar ); -LEPT_DLL extern l_ok numaWindowedStats ( NUMA *nas, l_int32 wc, NUMA **pnam, NUMA **pnams, NUMA **pnav, NUMA **pnarv ); -LEPT_DLL extern NUMA * numaWindowedMean ( NUMA *nas, l_int32 wc ); -LEPT_DLL extern NUMA * numaWindowedMeanSquare ( NUMA *nas, l_int32 wc ); -LEPT_DLL extern l_ok numaWindowedVariance ( NUMA *nam, NUMA *nams, NUMA **pnav, NUMA **pnarv ); -LEPT_DLL extern NUMA * numaWindowedMedian ( NUMA *nas, l_int32 halfwin ); -LEPT_DLL extern NUMA * numaConvertToInt ( NUMA *nas ); -LEPT_DLL extern NUMA * numaMakeHistogram ( NUMA *na, l_int32 maxbins, l_int32 *pbinsize, l_int32 *pbinstart ); -LEPT_DLL extern NUMA * numaMakeHistogramAuto ( NUMA *na, l_int32 maxbins ); -LEPT_DLL extern NUMA * numaMakeHistogramClipped ( NUMA *na, l_float32 binsize, l_float32 maxsize ); -LEPT_DLL extern NUMA * numaRebinHistogram ( NUMA *nas, l_int32 newsize ); -LEPT_DLL extern NUMA * numaNormalizeHistogram ( NUMA *nas, l_float32 tsum ); -LEPT_DLL extern l_ok numaGetStatsUsingHistogram ( NUMA *na, l_int32 maxbins, l_float32 *pmin, l_float32 *pmax, l_float32 *pmean, l_float32 *pvariance, l_float32 *pmedian, l_float32 rank, l_float32 *prval, NUMA **phisto ); -LEPT_DLL extern l_ok numaGetHistogramStats ( NUMA *nahisto, l_float32 startx, l_float32 deltax, l_float32 *pxmean, l_float32 *pxmedian, l_float32 *pxmode, l_float32 *pxvariance ); -LEPT_DLL extern l_ok numaGetHistogramStatsOnInterval ( NUMA *nahisto, l_float32 startx, l_float32 deltax, l_int32 ifirst, l_int32 ilast, l_float32 *pxmean, l_float32 *pxmedian, l_float32 *pxmode, l_float32 *pxvariance ); -LEPT_DLL extern l_ok numaMakeRankFromHistogram ( l_float32 startx, l_float32 deltax, NUMA *nasy, l_int32 npts, NUMA **pnax, NUMA **pnay ); -LEPT_DLL extern l_ok numaHistogramGetRankFromVal ( NUMA *na, l_float32 rval, l_float32 *prank ); -LEPT_DLL extern l_ok numaHistogramGetValFromRank ( NUMA *na, l_float32 rank, l_float32 *prval ); -LEPT_DLL extern l_ok numaDiscretizeRankAndIntensity ( NUMA *na, l_int32 nbins, NUMA **pnarbin, NUMA **pnam, NUMA **pnar, NUMA **pnabb ); -LEPT_DLL extern l_ok numaGetRankBinValues ( NUMA *na, l_int32 nbins, NUMA **pnarbin, NUMA **pnam ); -LEPT_DLL extern l_ok numaSplitDistribution ( NUMA *na, l_float32 scorefract, l_int32 *psplitindex, l_float32 *pave1, l_float32 *pave2, l_float32 *pnum1, l_float32 *pnum2, NUMA **pnascore ); -LEPT_DLL extern l_ok grayHistogramsToEMD ( NUMAA *naa1, NUMAA *naa2, NUMA **pnad ); -LEPT_DLL extern l_ok numaEarthMoverDistance ( NUMA *na1, NUMA *na2, l_float32 *pdist ); -LEPT_DLL extern l_ok grayInterHistogramStats ( NUMAA *naa, l_int32 wc, NUMA **pnam, NUMA **pnams, NUMA **pnav, NUMA **pnarv ); -LEPT_DLL extern NUMA * numaFindPeaks ( NUMA *nas, l_int32 nmax, l_float32 fract1, l_float32 fract2 ); -LEPT_DLL extern NUMA * numaFindExtrema ( NUMA *nas, l_float32 delta, NUMA **pnav ); -LEPT_DLL extern l_ok numaFindLocForThreshold ( NUMA *na, l_int32 skip, l_int32 *pthresh, l_float32 *pfract ); -LEPT_DLL extern l_ok numaCountReversals ( NUMA *nas, l_float32 minreversal, l_int32 *pnr, l_float32 *prd ); -LEPT_DLL extern l_ok numaSelectCrossingThreshold ( NUMA *nax, NUMA *nay, l_float32 estthresh, l_float32 *pbestthresh ); -LEPT_DLL extern NUMA * numaCrossingsByThreshold ( NUMA *nax, NUMA *nay, l_float32 thresh ); -LEPT_DLL extern NUMA * numaCrossingsByPeaks ( NUMA *nax, NUMA *nay, l_float32 delta ); -LEPT_DLL extern l_ok numaEvalBestHaarParameters ( NUMA *nas, l_float32 relweight, l_int32 nwidth, l_int32 nshift, l_float32 minwidth, l_float32 maxwidth, l_float32 *pbestwidth, l_float32 *pbestshift, l_float32 *pbestscore ); -LEPT_DLL extern l_ok numaEvalHaarSum ( NUMA *nas, l_float32 width, l_float32 shift, l_float32 relweight, l_float32 *pscore ); -LEPT_DLL extern NUMA * genConstrainedNumaInRange ( l_int32 first, l_int32 last, l_int32 nmax, l_int32 use_pairs ); -LEPT_DLL extern l_ok pixGetRegionsBinary ( PIX *pixs, PIX **ppixhm, PIX **ppixtm, PIX **ppixtb, PIXA *pixadb ); -LEPT_DLL extern PIX * pixGenHalftoneMask ( PIX *pixs, PIX **ppixtext, l_int32 *phtfound, l_int32 debug ); -LEPT_DLL extern PIX * pixGenerateHalftoneMask ( PIX *pixs, PIX **ppixtext, l_int32 *phtfound, PIXA *pixadb ); -LEPT_DLL extern PIX * pixGenTextlineMask ( PIX *pixs, PIX **ppixvws, l_int32 *ptlfound, PIXA *pixadb ); -LEPT_DLL extern PIX * pixGenTextblockMask ( PIX *pixs, PIX *pixvws, PIXA *pixadb ); -LEPT_DLL extern BOX * pixFindPageForeground ( PIX *pixs, l_int32 threshold, l_int32 mindist, l_int32 erasedist, l_int32 showmorph, PIXAC *pixac ); -LEPT_DLL extern l_ok pixSplitIntoCharacters ( PIX *pixs, l_int32 minw, l_int32 minh, BOXA **pboxa, PIXA **ppixa, PIX **ppixdebug ); -LEPT_DLL extern BOXA * pixSplitComponentWithProfile ( PIX *pixs, l_int32 delta, l_int32 mindel, PIX **ppixdebug ); -LEPT_DLL extern PIXA * pixExtractTextlines ( PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 minw, l_int32 minh, l_int32 adjw, l_int32 adjh, PIXA *pixadb ); -LEPT_DLL extern PIXA * pixExtractRawTextlines ( PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 adjw, l_int32 adjh, PIXA *pixadb ); -LEPT_DLL extern l_ok pixCountTextColumns ( PIX *pixs, l_float32 deltafract, l_float32 peakfract, l_float32 clipfract, l_int32 *pncols, PIXA *pixadb ); -LEPT_DLL extern l_ok pixDecideIfText ( PIX *pixs, BOX *box, l_int32 *pistext, PIXA *pixadb ); -LEPT_DLL extern l_ok pixFindThreshFgExtent ( PIX *pixs, l_int32 thresh, l_int32 *ptop, l_int32 *pbot ); -LEPT_DLL extern l_ok pixDecideIfTable ( PIX *pixs, BOX *box, l_int32 orient, l_int32 *pscore, PIXA *pixadb ); -LEPT_DLL extern PIX * pixPrepare1bpp ( PIX *pixs, BOX *box, l_float32 cropfract, l_int32 outres ); -LEPT_DLL extern l_ok pixEstimateBackground ( PIX *pixs, l_int32 darkthresh, l_float32 edgecrop, l_int32 *pbg ); -LEPT_DLL extern l_ok pixFindLargeRectangles ( PIX *pixs, l_int32 polarity, l_int32 nrect, BOXA **pboxa, PIX **ppixdb ); -LEPT_DLL extern l_ok pixFindLargestRectangle ( PIX *pixs, l_int32 polarity, BOX **pbox, PIX **ppixdb ); -LEPT_DLL extern BOX * pixFindRectangleInCC ( PIX *pixs, BOX *boxs, l_float32 fract, l_int32 dir, l_int32 select, l_int32 debug ); -LEPT_DLL extern PIX * pixAutoPhotoinvert ( PIX *pixs, l_int32 thresh, PIX **ppixm, PIXA *pixadb ); -LEPT_DLL extern l_ok pixSetSelectCmap ( PIX *pixs, BOX *box, l_int32 sindex, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern l_ok pixColorGrayRegionsCmap ( PIX *pixs, BOXA *boxa, l_int32 type, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern l_ok pixColorGrayCmap ( PIX *pixs, BOX *box, l_int32 type, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern l_ok pixColorGrayMaskedCmap ( PIX *pixs, PIX *pixm, l_int32 type, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern l_ok addColorizedGrayToCmap ( PIXCMAP *cmap, l_int32 type, l_int32 rval, l_int32 gval, l_int32 bval, NUMA **pna ); -LEPT_DLL extern l_ok pixSetSelectMaskedCmap ( PIX *pixs, PIX *pixm, l_int32 x, l_int32 y, l_int32 sindex, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern l_ok pixSetMaskedCmap ( PIX *pixs, PIX *pixm, l_int32 x, l_int32 y, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern char * parseForProtos ( const char *filein, const char *prestring ); -LEPT_DLL extern l_ok partifyFiles ( const char *dirname, const char *substr, l_int32 nparts, const char *outroot, const char *debugfile ); -LEPT_DLL extern l_ok partifyPixac ( PIXAC *pixac, l_int32 nparts, const char *outroot, PIXA *pixadb ); -LEPT_DLL extern BOXA * boxaGetWhiteblocks ( BOXA *boxas, BOX *box, l_int32 sortflag, l_int32 maxboxes, l_float32 maxoverlap, l_int32 maxperim, l_float32 fract, l_int32 maxpops ); -LEPT_DLL extern BOXA * boxaPruneSortedOnOverlap ( BOXA *boxas, l_float32 maxoverlap ); -LEPT_DLL extern l_ok convertFilesToPdf ( const char *dirname, const char *substr, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout ); -LEPT_DLL extern l_ok saConvertFilesToPdf ( SARRAY *sa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout ); -LEPT_DLL extern l_ok saConvertFilesToPdfData ( SARRAY *sa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, l_uint8 **pdata, size_t *pnbytes ); -LEPT_DLL extern l_ok selectDefaultPdfEncoding ( PIX *pix, l_int32 *ptype ); -LEPT_DLL extern l_ok convertUnscaledFilesToPdf ( const char *dirname, const char *substr, const char *title, const char *fileout ); -LEPT_DLL extern l_ok saConvertUnscaledFilesToPdf ( SARRAY *sa, const char *title, const char *fileout ); -LEPT_DLL extern l_ok saConvertUnscaledFilesToPdfData ( SARRAY *sa, const char *title, l_uint8 **pdata, size_t *pnbytes ); -LEPT_DLL extern l_ok convertUnscaledToPdfData ( const char *fname, const char *title, l_uint8 **pdata, size_t *pnbytes ); -LEPT_DLL extern l_ok pixaConvertToPdf ( PIXA *pixa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout ); -LEPT_DLL extern l_ok pixaConvertToPdfData ( PIXA *pixa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, l_uint8 **pdata, size_t *pnbytes ); -LEPT_DLL extern l_ok convertToPdf ( const char *filein, l_int32 type, l_int32 quality, const char *fileout, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position ); -LEPT_DLL extern l_ok convertImageDataToPdf ( l_uint8 *imdata, size_t size, l_int32 type, l_int32 quality, const char *fileout, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position ); -LEPT_DLL extern l_ok convertToPdfData ( const char *filein, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position ); -LEPT_DLL extern l_ok convertImageDataToPdfData ( l_uint8 *imdata, size_t size, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position ); -LEPT_DLL extern l_ok pixConvertToPdf ( PIX *pix, l_int32 type, l_int32 quality, const char *fileout, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position ); -LEPT_DLL extern l_ok pixWriteStreamPdf ( FILE *fp, PIX *pix, l_int32 res, const char *title ); -LEPT_DLL extern l_ok pixWriteMemPdf ( l_uint8 **pdata, size_t *pnbytes, PIX *pix, l_int32 res, const char *title ); -LEPT_DLL extern l_ok convertSegmentedFilesToPdf ( const char *dirname, const char *substr, l_int32 res, l_int32 type, l_int32 thresh, BOXAA *baa, l_int32 quality, l_float32 scalefactor, const char *title, const char *fileout ); -LEPT_DLL extern BOXAA * convertNumberedMasksToBoxaa ( const char *dirname, const char *substr, l_int32 numpre, l_int32 numpost ); -LEPT_DLL extern l_ok convertToPdfSegmented ( const char *filein, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, const char *fileout ); -LEPT_DLL extern l_ok pixConvertToPdfSegmented ( PIX *pixs, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, const char *fileout ); -LEPT_DLL extern l_ok convertToPdfDataSegmented ( const char *filein, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, l_uint8 **pdata, size_t *pnbytes ); -LEPT_DLL extern l_ok pixConvertToPdfDataSegmented ( PIX *pixs, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, l_uint8 **pdata, size_t *pnbytes ); -LEPT_DLL extern l_ok concatenatePdf ( const char *dirname, const char *substr, const char *fileout ); -LEPT_DLL extern l_ok saConcatenatePdf ( SARRAY *sa, const char *fileout ); -LEPT_DLL extern l_ok ptraConcatenatePdf ( L_PTRA *pa, const char *fileout ); -LEPT_DLL extern l_ok concatenatePdfToData ( const char *dirname, const char *substr, l_uint8 **pdata, size_t *pnbytes ); -LEPT_DLL extern l_ok saConcatenatePdfToData ( SARRAY *sa, l_uint8 **pdata, size_t *pnbytes ); -LEPT_DLL extern l_ok pixConvertToPdfData ( PIX *pix, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position ); -LEPT_DLL extern l_ok ptraConcatenatePdfToData ( L_PTRA *pa_data, SARRAY *sa, l_uint8 **pdata, size_t *pnbytes ); -LEPT_DLL extern l_ok convertTiffMultipageToPdf ( const char *filein, const char *fileout ); -LEPT_DLL extern l_ok l_generateCIDataForPdf ( const char *fname, PIX *pix, l_int32 quality, L_COMP_DATA **pcid ); -LEPT_DLL extern L_COMP_DATA * l_generateFlateDataPdf ( const char *fname, PIX *pixs ); -LEPT_DLL extern L_COMP_DATA * l_generateJpegData ( const char *fname, l_int32 ascii85flag ); -LEPT_DLL extern L_COMP_DATA * l_generateJpegDataMem ( l_uint8 *data, size_t nbytes, l_int32 ascii85flag ); -LEPT_DLL extern l_ok l_generateCIData ( const char *fname, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid ); -LEPT_DLL extern l_ok pixGenerateCIData ( PIX *pixs, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid ); -LEPT_DLL extern L_COMP_DATA * l_generateFlateData ( const char *fname, l_int32 ascii85flag ); -LEPT_DLL extern L_COMP_DATA * l_generateG4Data ( const char *fname, l_int32 ascii85flag ); -LEPT_DLL extern l_ok cidConvertToPdfData ( L_COMP_DATA *cid, const char *title, l_uint8 **pdata, size_t *pnbytes ); -LEPT_DLL extern void l_CIDataDestroy ( L_COMP_DATA **pcid ); -LEPT_DLL extern void l_pdfSetG4ImageMask ( l_int32 flag ); -LEPT_DLL extern void l_pdfSetDateAndVersion ( l_int32 flag ); -LEPT_DLL extern void setPixMemoryManager ( alloc_fn allocator, dealloc_fn deallocator ); -LEPT_DLL extern PIX * pixCreate ( l_int32 width, l_int32 height, l_int32 depth ); -LEPT_DLL extern PIX * pixCreateNoInit ( l_int32 width, l_int32 height, l_int32 depth ); -LEPT_DLL extern PIX * pixCreateTemplate ( const PIX *pixs ); -LEPT_DLL extern PIX * pixCreateTemplateNoInit ( const PIX *pixs ); -LEPT_DLL extern PIX * pixCreateWithCmap ( l_int32 width, l_int32 height, l_int32 depth, l_int32 initcolor ); -LEPT_DLL extern PIX * pixCreateHeader ( l_int32 width, l_int32 height, l_int32 depth ); -LEPT_DLL extern PIX * pixClone ( PIX *pixs ); -LEPT_DLL extern void pixDestroy ( PIX **ppix ); -LEPT_DLL extern PIX * pixCopy ( PIX *pixd, const PIX *pixs ); -LEPT_DLL extern l_ok pixResizeImageData ( PIX *pixd, const PIX *pixs ); -LEPT_DLL extern l_ok pixCopyColormap ( PIX *pixd, const PIX *pixs ); -LEPT_DLL extern l_int32 pixSizesEqual ( const PIX *pix1, const PIX *pix2 ); -LEPT_DLL extern l_ok pixTransferAllData ( PIX *pixd, PIX **ppixs, l_int32 copytext, l_int32 copyformat ); -LEPT_DLL extern l_ok pixSwapAndDestroy ( PIX **ppixd, PIX **ppixs ); -LEPT_DLL extern l_int32 pixGetWidth ( const PIX *pix ); -LEPT_DLL extern l_int32 pixSetWidth ( PIX *pix, l_int32 width ); -LEPT_DLL extern l_int32 pixGetHeight ( const PIX *pix ); -LEPT_DLL extern l_int32 pixSetHeight ( PIX *pix, l_int32 height ); -LEPT_DLL extern l_int32 pixGetDepth ( const PIX *pix ); -LEPT_DLL extern l_int32 pixSetDepth ( PIX *pix, l_int32 depth ); -LEPT_DLL extern l_ok pixGetDimensions ( const PIX *pix, l_int32 *pw, l_int32 *ph, l_int32 *pd ); -LEPT_DLL extern l_ok pixSetDimensions ( PIX *pix, l_int32 w, l_int32 h, l_int32 d ); -LEPT_DLL extern l_ok pixCopyDimensions ( PIX *pixd, const PIX *pixs ); -LEPT_DLL extern l_int32 pixGetSpp ( const PIX *pix ); -LEPT_DLL extern l_int32 pixSetSpp ( PIX *pix, l_int32 spp ); -LEPT_DLL extern l_ok pixCopySpp ( PIX *pixd, const PIX *pixs ); -LEPT_DLL extern l_int32 pixGetWpl ( const PIX *pix ); -LEPT_DLL extern l_int32 pixSetWpl ( PIX *pix, l_int32 wpl ); -LEPT_DLL extern l_int32 pixGetRefcount ( const PIX *pix ); -LEPT_DLL extern l_int32 pixChangeRefcount ( PIX *pix, l_int32 delta ); -LEPT_DLL extern l_int32 pixGetXRes ( const PIX *pix ); -LEPT_DLL extern l_int32 pixSetXRes ( PIX *pix, l_int32 res ); -LEPT_DLL extern l_int32 pixGetYRes ( const PIX *pix ); -LEPT_DLL extern l_int32 pixSetYRes ( PIX *pix, l_int32 res ); -LEPT_DLL extern l_ok pixGetResolution ( const PIX *pix, l_int32 *pxres, l_int32 *pyres ); -LEPT_DLL extern l_ok pixSetResolution ( PIX *pix, l_int32 xres, l_int32 yres ); -LEPT_DLL extern l_int32 pixCopyResolution ( PIX *pixd, const PIX *pixs ); -LEPT_DLL extern l_int32 pixScaleResolution ( PIX *pix, l_float32 xscale, l_float32 yscale ); -LEPT_DLL extern l_int32 pixGetInputFormat ( const PIX *pix ); -LEPT_DLL extern l_int32 pixSetInputFormat ( PIX *pix, l_int32 informat ); -LEPT_DLL extern l_int32 pixCopyInputFormat ( PIX *pixd, const PIX *pixs ); -LEPT_DLL extern l_int32 pixSetSpecial ( PIX *pix, l_int32 special ); -LEPT_DLL extern char * pixGetText ( PIX *pix ); -LEPT_DLL extern l_ok pixSetText ( PIX *pix, const char *textstring ); -LEPT_DLL extern l_ok pixAddText ( PIX *pix, const char *textstring ); -LEPT_DLL extern l_int32 pixCopyText ( PIX *pixd, const PIX *pixs ); -LEPT_DLL extern PIXCMAP * pixGetColormap ( PIX *pix ); -LEPT_DLL extern l_ok pixSetColormap ( PIX *pix, PIXCMAP *colormap ); -LEPT_DLL extern l_ok pixDestroyColormap ( PIX *pix ); -LEPT_DLL extern l_uint32 * pixGetData ( PIX *pix ); -LEPT_DLL extern l_int32 pixSetData ( PIX *pix, l_uint32 *data ); -LEPT_DLL extern l_uint32 * pixExtractData ( PIX *pixs ); -LEPT_DLL extern l_int32 pixFreeData ( PIX *pix ); -LEPT_DLL extern void ** pixGetLinePtrs ( PIX *pix, l_int32 *psize ); -LEPT_DLL extern l_ok pixPrintStreamInfo ( FILE *fp, const PIX *pix, const char *text ); -LEPT_DLL extern l_ok pixGetPixel ( PIX *pix, l_int32 x, l_int32 y, l_uint32 *pval ); -LEPT_DLL extern l_ok pixSetPixel ( PIX *pix, l_int32 x, l_int32 y, l_uint32 val ); -LEPT_DLL extern l_ok pixGetRGBPixel ( PIX *pix, l_int32 x, l_int32 y, l_int32 *prval, l_int32 *pgval, l_int32 *pbval ); -LEPT_DLL extern l_ok pixSetRGBPixel ( PIX *pix, l_int32 x, l_int32 y, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern l_ok pixSetCmapPixel ( PIX *pix, l_int32 x, l_int32 y, l_int32 rval, l_int32 gval, l_int32 bval ); -LEPT_DLL extern l_ok pixGetRandomPixel ( PIX *pix, l_uint32 *pval, l_int32 *px, l_int32 *py ); -LEPT_DLL extern l_ok pixClearPixel ( PIX *pix, l_int32 x, l_int32 y ); -LEPT_DLL extern l_ok pixFlipPixel ( PIX *pix, l_int32 x, l_int32 y ); -LEPT_DLL extern void setPixelLow ( l_uint32 *line, l_int32 x, l_int32 depth, l_uint32 val ); -LEPT_DLL extern l_ok pixGetBlackOrWhiteVal ( PIX *pixs, l_int32 op, l_uint32 *pval ); -LEPT_DLL extern l_ok pixClearAll ( PIX *pix ); -LEPT_DLL extern l_ok pixSetAll ( PIX *pix ); -LEPT_DLL extern l_ok pixSetAllGray ( PIX *pix, l_int32 grayval ); -LEPT_DLL extern l_ok pixSetAllArbitrary ( PIX *pix, l_uint32 val ); -LEPT_DLL extern l_ok pixSetBlackOrWhite ( PIX *pixs, l_int32 op ); -LEPT_DLL extern l_ok pixSetComponentArbitrary ( PIX *pix, l_int32 comp, l_int32 val ); -LEPT_DLL extern l_ok pixClearInRect ( PIX *pix, BOX *box ); -LEPT_DLL extern l_ok pixSetInRect ( PIX *pix, BOX *box ); -LEPT_DLL extern l_ok pixSetInRectArbitrary ( PIX *pix, BOX *box, l_uint32 val ); -LEPT_DLL extern l_ok pixBlendInRect ( PIX *pixs, BOX *box, l_uint32 val, l_float32 fract ); -LEPT_DLL extern l_ok pixSetPadBits ( PIX *pix, l_int32 val ); -LEPT_DLL extern l_ok pixSetPadBitsBand ( PIX *pix, l_int32 by, l_int32 bh, l_int32 val ); -LEPT_DLL extern l_ok pixSetOrClearBorder ( PIX *pixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot, l_int32 op ); -LEPT_DLL extern l_ok pixSetBorderVal ( PIX *pixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot, l_uint32 val ); -LEPT_DLL extern l_ok pixSetBorderRingVal ( PIX *pixs, l_int32 dist, l_uint32 val ); -LEPT_DLL extern l_ok pixSetMirroredBorder ( PIX *pixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot ); -LEPT_DLL extern PIX * pixCopyBorder ( PIX *pixd, PIX *pixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot ); -LEPT_DLL extern PIX * pixAddBorder ( PIX *pixs, l_int32 npix, l_uint32 val ); -LEPT_DLL extern PIX * pixAddBlackOrWhiteBorder ( PIX *pixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot, l_int32 op ); -LEPT_DLL extern PIX * pixAddBorderGeneral ( PIX *pixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot, l_uint32 val ); -LEPT_DLL extern PIX * pixRemoveBorder ( PIX *pixs, l_int32 npix ); -LEPT_DLL extern PIX * pixRemoveBorderGeneral ( PIX *pixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot ); -LEPT_DLL extern PIX * pixRemoveBorderToSize ( PIX *pixs, l_int32 wd, l_int32 hd ); -LEPT_DLL extern PIX * pixAddMirroredBorder ( PIX *pixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot ); -LEPT_DLL extern PIX * pixAddRepeatedBorder ( PIX *pixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot ); -LEPT_DLL extern PIX * pixAddMixedBorder ( PIX *pixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot ); -LEPT_DLL extern PIX * pixAddContinuedBorder ( PIX *pixs, l_int32 left, l_int32 right, l_int32 top, l_int32 bot ); -LEPT_DLL extern l_ok pixShiftAndTransferAlpha ( PIX *pixd, PIX *pixs, l_float32 shiftx, l_float32 shifty ); -LEPT_DLL extern PIX * pixDisplayLayersRGBA ( PIX *pixs, l_uint32 val, l_int32 maxw ); -LEPT_DLL extern PIX * pixCreateRGBImage ( PIX *pixr, PIX *pixg, PIX *pixb ); -LEPT_DLL extern PIX * pixGetRGBComponent ( PIX *pixs, l_int32 comp ); -LEPT_DLL extern l_ok pixSetRGBComponent ( PIX *pixd, PIX *pixs, l_int32 comp ); -LEPT_DLL extern PIX * pixGetRGBComponentCmap ( PIX *pixs, l_int32 comp ); -LEPT_DLL extern l_ok pixCopyRGBComponent ( PIX *pixd, PIX *pixs, l_int32 comp ); -LEPT_DLL extern l_ok composeRGBPixel ( l_int32 rval, l_int32 gval, l_int32 bval, l_uint32 *ppixel ); -LEPT_DLL extern l_ok composeRGBAPixel ( l_int32 rval, l_int32 gval, l_int32 bval, l_int32 aval, l_uint32 *ppixel ); -LEPT_DLL extern void extractRGBValues ( l_uint32 pixel, l_int32 *prval, l_int32 *pgval, l_int32 *pbval ); -LEPT_DLL extern void extractRGBAValues ( l_uint32 pixel, l_int32 *prval, l_int32 *pgval, l_int32 *pbval, l_int32 *paval ); -LEPT_DLL extern l_int32 extractMinMaxComponent ( l_uint32 pixel, l_int32 type ); -LEPT_DLL extern l_ok pixGetRGBLine ( PIX *pixs, l_int32 row, l_uint8 *bufr, l_uint8 *bufg, l_uint8 *bufb ); -LEPT_DLL extern l_ok setLineDataVal ( l_uint32 *line, l_int32 j, l_int32 d, l_uint32 val ); -LEPT_DLL extern PIX * pixEndianByteSwapNew ( PIX *pixs ); -LEPT_DLL extern l_ok pixEndianByteSwap ( PIX *pixs ); -LEPT_DLL extern l_int32 lineEndianByteSwap ( l_uint32 *datad, l_uint32 *datas, l_int32 wpl ); -LEPT_DLL extern PIX * pixEndianTwoByteSwapNew ( PIX *pixs ); -LEPT_DLL extern l_ok pixEndianTwoByteSwap ( PIX *pixs ); -LEPT_DLL extern l_ok pixGetRasterData ( PIX *pixs, l_uint8 **pdata, size_t *pnbytes ); -LEPT_DLL extern l_ok pixAlphaIsOpaque ( PIX *pix, l_int32 *popaque ); -LEPT_DLL extern l_uint8 ** pixSetupByteProcessing ( PIX *pix, l_int32 *pw, l_int32 *ph ); -LEPT_DLL extern l_ok pixCleanupByteProcessing ( PIX *pix, l_uint8 **lineptrs ); -LEPT_DLL extern void l_setAlphaMaskBorder ( l_float32 val1, l_float32 val2 ); -LEPT_DLL extern l_ok pixSetMasked ( PIX *pixd, PIX *pixm, l_uint32 val ); -LEPT_DLL extern l_ok pixSetMaskedGeneral ( PIX *pixd, PIX *pixm, l_uint32 val, l_int32 x, l_int32 y ); -LEPT_DLL extern l_ok pixCombineMasked ( PIX *pixd, PIX *pixs, PIX *pixm ); -LEPT_DLL extern l_ok pixCombineMaskedGeneral ( PIX *pixd, PIX *pixs, PIX *pixm, l_int32 x, l_int32 y ); -LEPT_DLL extern l_ok pixPaintThroughMask ( PIX *pixd, PIX *pixm, l_int32 x, l_int32 y, l_uint32 val ); -LEPT_DLL extern PIX * pixCopyWithBoxa ( PIX *pixs, BOXA *boxa, l_int32 background ); -LEPT_DLL extern l_ok pixPaintSelfThroughMask ( PIX *pixd, PIX *pixm, l_int32 x, l_int32 y, l_int32 searchdir, l_int32 mindist, l_int32 tilesize, l_int32 ntiles, l_int32 distblend ); -LEPT_DLL extern PIX * pixMakeMaskFromVal ( PIX *pixs, l_int32 val ); -LEPT_DLL extern PIX * pixMakeMaskFromLUT ( PIX *pixs, l_int32 *tab ); -LEPT_DLL extern PIX * pixMakeArbMaskFromRGB ( PIX *pixs, l_float32 rc, l_float32 gc, l_float32 bc, l_float32 thresh ); -LEPT_DLL extern PIX * pixSetUnderTransparency ( PIX *pixs, l_uint32 val, l_int32 debug ); -LEPT_DLL extern PIX * pixMakeAlphaFromMask ( PIX *pixs, l_int32 dist, BOX **pbox ); -LEPT_DLL extern l_ok pixGetColorNearMaskBoundary ( PIX *pixs, PIX *pixm, BOX *box, l_int32 dist, l_uint32 *pval, l_int32 debug ); -LEPT_DLL extern PIX * pixDisplaySelectedPixels ( PIX *pixs, PIX *pixm, SEL *sel, l_uint32 val ); -LEPT_DLL extern PIX * pixInvert ( PIX *pixd, PIX *pixs ); -LEPT_DLL extern PIX * pixOr ( PIX *pixd, PIX *pixs1, PIX *pixs2 ); -LEPT_DLL extern PIX * pixAnd ( PIX *pixd, PIX *pixs1, PIX *pixs2 ); -LEPT_DLL extern PIX * pixXor ( PIX *pixd, PIX *pixs1, PIX *pixs2 ); -LEPT_DLL extern PIX * pixSubtract ( PIX *pixd, PIX *pixs1, PIX *pixs2 ); -LEPT_DLL extern l_ok pixZero ( PIX *pix, l_int32 *pempty ); -LEPT_DLL extern l_ok pixForegroundFraction ( PIX *pix, l_float32 *pfract ); -LEPT_DLL extern NUMA * pixaCountPixels ( PIXA *pixa ); -LEPT_DLL extern l_ok pixCountPixels ( PIX *pixs, l_int32 *pcount, l_int32 *tab8 ); -LEPT_DLL extern l_ok pixCountPixelsInRect ( PIX *pixs, BOX *box, l_int32 *pcount, l_int32 *tab8 ); -LEPT_DLL extern NUMA * pixCountByRow ( PIX *pix, BOX *box ); -LEPT_DLL extern NUMA * pixCountByColumn ( PIX *pix, BOX *box ); -LEPT_DLL extern NUMA * pixCountPixelsByRow ( PIX *pix, l_int32 *tab8 ); -LEPT_DLL extern NUMA * pixCountPixelsByColumn ( PIX *pix ); -LEPT_DLL extern l_ok pixCountPixelsInRow ( PIX *pix, l_int32 row, l_int32 *pcount, l_int32 *tab8 ); -LEPT_DLL extern NUMA * pixGetMomentByColumn ( PIX *pix, l_int32 order ); -LEPT_DLL extern l_ok pixThresholdPixelSum ( PIX *pix, l_int32 thresh, l_int32 *pabove, l_int32 *tab8 ); -LEPT_DLL extern l_int32 * makePixelSumTab8 ( void ); -LEPT_DLL extern l_int32 * makePixelCentroidTab8 ( void ); -LEPT_DLL extern NUMA * pixAverageByRow ( PIX *pix, BOX *box, l_int32 type ); -LEPT_DLL extern NUMA * pixAverageByColumn ( PIX *pix, BOX *box, l_int32 type ); -LEPT_DLL extern l_ok pixAverageInRect ( PIX *pixs, PIX *pixm, BOX *box, l_int32 minval, l_int32 maxval, l_int32 subsamp, l_float32 *pave ); -LEPT_DLL extern l_ok pixAverageInRectRGB ( PIX *pixs, PIX *pixm, BOX *box, l_int32 subsamp, l_uint32 *pave ); -LEPT_DLL extern NUMA * pixVarianceByRow ( PIX *pix, BOX *box ); -LEPT_DLL extern NUMA * pixVarianceByColumn ( PIX *pix, BOX *box ); -LEPT_DLL extern l_ok pixVarianceInRect ( PIX *pix, BOX *box, l_float32 *prootvar ); -LEPT_DLL extern NUMA * pixAbsDiffByRow ( PIX *pix, BOX *box ); -LEPT_DLL extern NUMA * pixAbsDiffByColumn ( PIX *pix, BOX *box ); -LEPT_DLL extern l_ok pixAbsDiffInRect ( PIX *pix, BOX *box, l_int32 dir, l_float32 *pabsdiff ); -LEPT_DLL extern l_ok pixAbsDiffOnLine ( PIX *pix, l_int32 x1, l_int32 y1, l_int32 x2, l_int32 y2, l_float32 *pabsdiff ); -LEPT_DLL extern l_int32 pixCountArbInRect ( PIX *pixs, BOX *box, l_int32 val, l_int32 factor, l_int32 *pcount ); -LEPT_DLL extern PIX * pixMirroredTiling ( PIX *pixs, l_int32 w, l_int32 h ); -LEPT_DLL extern l_ok pixFindRepCloseTile ( PIX *pixs, BOX *box, l_int32 searchdir, l_int32 mindist, l_int32 tsize, l_int32 ntiles, BOX **pboxtile, l_int32 debug ); -LEPT_DLL extern NUMA * pixGetGrayHistogram ( PIX *pixs, l_int32 factor ); -LEPT_DLL extern NUMA * pixGetGrayHistogramMasked ( PIX *pixs, PIX *pixm, l_int32 x, l_int32 y, l_int32 factor ); -LEPT_DLL extern NUMA * pixGetGrayHistogramInRect ( PIX *pixs, BOX *box, l_int32 factor ); -LEPT_DLL extern NUMAA * pixGetGrayHistogramTiled ( PIX *pixs, l_int32 factor, l_int32 nx, l_int32 ny ); -LEPT_DLL extern l_ok pixGetColorHistogram ( PIX *pixs, l_int32 factor, NUMA **pnar, NUMA **pnag, NUMA **pnab ); -LEPT_DLL extern l_ok pixGetColorHistogramMasked ( PIX *pixs, PIX *pixm, l_int32 x, l_int32 y, l_int32 factor, NUMA **pnar, NUMA **pnag, NUMA **pnab ); -LEPT_DLL extern NUMA * pixGetCmapHistogram ( PIX *pixs, l_int32 factor ); -LEPT_DLL extern NUMA * pixGetCmapHistogramMasked ( PIX *pixs, PIX *pixm, l_int32 x, l_int32 y, l_int32 factor ); -LEPT_DLL extern NUMA * pixGetCmapHistogramInRect ( PIX *pixs, BOX *box, l_int32 factor ); -LEPT_DLL extern l_ok pixCountRGBColors ( PIX *pixs, l_int32 factor, l_int32 *pncolors ); -LEPT_DLL extern L_AMAP * pixGetColorAmapHistogram ( PIX *pixs, l_int32 factor ); -LEPT_DLL extern l_int32 amapGetCountForColor ( L_AMAP *amap, l_uint32 val ); -LEPT_DLL extern l_ok pixGetRankValue ( PIX *pixs, l_int32 factor, l_float32 rank, l_uint32 *pvalue ); -LEPT_DLL extern l_ok pixGetRankValueMaskedRGB ( PIX *pixs, PIX *pixm, l_int32 x, l_int32 y, l_int32 factor, l_float32 rank, l_float32 *prval, l_float32 *pgval, l_float32 *pbval ); -LEPT_DLL extern l_ok pixGetRankValueMasked ( PIX *pixs, PIX *pixm, l_int32 x, l_int32 y, l_int32 factor, l_float32 rank, l_float32 *pval, NUMA **pna ); -LEPT_DLL extern l_ok pixGetPixelAverage ( PIX *pixs, PIX *pixm, l_int32 x, l_int32 y, l_int32 factor, l_uint32 *pval ); -LEPT_DLL extern l_ok pixGetPixelStats ( PIX *pixs, l_int32 factor, l_int32 type, l_uint32 *pvalue ); -LEPT_DLL extern l_ok pixGetAverageMaskedRGB ( PIX *pixs, PIX *pixm, l_int32 x, l_int32 y, l_int32 factor, l_int32 type, l_float32 *prval, l_float32 *pgval, l_float32 *pbval ); -LEPT_DLL extern l_ok pixGetAverageMasked ( PIX *pixs, PIX *pixm, l_int32 x, l_int32 y, l_int32 factor, l_int32 type, l_float32 *pval ); -LEPT_DLL extern l_ok pixGetAverageTiledRGB ( PIX *pixs, l_int32 sx, l_int32 sy, l_int32 type, PIX **ppixr, PIX **ppixg, PIX **ppixb ); -LEPT_DLL extern PIX * pixGetAverageTiled ( PIX *pixs, l_int32 sx, l_int32 sy, l_int32 type ); -LEPT_DLL extern l_int32 pixRowStats ( PIX *pixs, BOX *box, NUMA **pnamean, NUMA **pnamedian, NUMA **pnamode, NUMA **pnamodecount, NUMA **pnavar, NUMA **pnarootvar ); -LEPT_DLL extern l_int32 pixColumnStats ( PIX *pixs, BOX *box, NUMA **pnamean, NUMA **pnamedian, NUMA **pnamode, NUMA **pnamodecount, NUMA **pnavar, NUMA **pnarootvar ); -LEPT_DLL extern l_ok pixGetRangeValues ( PIX *pixs, l_int32 factor, l_int32 color, l_int32 *pminval, l_int32 *pmaxval ); -LEPT_DLL extern l_ok pixGetExtremeValue ( PIX *pixs, l_int32 factor, l_int32 type, l_int32 *prval, l_int32 *pgval, l_int32 *pbval, l_int32 *pgrayval ); -LEPT_DLL extern l_ok pixGetMaxValueInRect ( PIX *pixs, BOX *box, l_uint32 *pmaxval, l_int32 *pxmax, l_int32 *pymax ); -LEPT_DLL extern l_ok pixGetBinnedComponentRange ( PIX *pixs, l_int32 nbins, l_int32 factor, l_int32 color, l_int32 *pminval, l_int32 *pmaxval, l_uint32 **pcarray, l_int32 fontsize ); -LEPT_DLL extern l_ok pixGetRankColorArray ( PIX *pixs, l_int32 nbins, l_int32 type, l_int32 factor, l_uint32 **pcarray, PIXA *pixadb, l_int32 fontsize ); -LEPT_DLL extern l_ok pixGetBinnedColor ( PIX *pixs, PIX *pixg, l_int32 factor, l_int32 nbins, NUMA *nalut, l_uint32 **pcarray, PIXA *pixadb ); -LEPT_DLL extern PIX * pixDisplayColorArray ( l_uint32 *carray, l_int32 ncolors, l_int32 side, l_int32 ncols, l_int32 fontsize ); -LEPT_DLL extern PIX * pixRankBinByStrip ( PIX *pixs, l_int32 direction, l_int32 size, l_int32 nbins, l_int32 type ); -LEPT_DLL extern PIX * pixaGetAlignedStats ( PIXA *pixa, l_int32 type, l_int32 nbins, l_int32 thresh ); -LEPT_DLL extern l_ok pixaExtractColumnFromEachPix ( PIXA *pixa, l_int32 col, PIX *pixd ); -LEPT_DLL extern l_ok pixGetRowStats ( PIX *pixs, l_int32 type, l_int32 nbins, l_int32 thresh, l_float32 *colvect ); -LEPT_DLL extern l_ok pixGetColumnStats ( PIX *pixs, l_int32 type, l_int32 nbins, l_int32 thresh, l_float32 *rowvect ); -LEPT_DLL extern l_ok pixSetPixelColumn ( PIX *pix, l_int32 col, l_float32 *colvect ); -LEPT_DLL extern l_ok pixThresholdForFgBg ( PIX *pixs, l_int32 factor, l_int32 thresh, l_int32 *pfgval, l_int32 *pbgval ); -LEPT_DLL extern l_ok pixSplitDistributionFgBg ( PIX *pixs, l_float32 scorefract, l_int32 factor, l_int32 *pthresh, l_int32 *pfgval, l_int32 *pbgval, PIX **ppixdb ); -LEPT_DLL extern l_ok pixaFindDimensions ( PIXA *pixa, NUMA **pnaw, NUMA **pnah ); -LEPT_DLL extern l_ok pixFindAreaPerimRatio ( PIX *pixs, l_int32 *tab, l_float32 *pfract ); -LEPT_DLL extern NUMA * pixaFindPerimToAreaRatio ( PIXA *pixa ); -LEPT_DLL extern l_ok pixFindPerimToAreaRatio ( PIX *pixs, l_int32 *tab, l_float32 *pfract ); -LEPT_DLL extern NUMA * pixaFindPerimSizeRatio ( PIXA *pixa ); -LEPT_DLL extern l_ok pixFindPerimSizeRatio ( PIX *pixs, l_int32 *tab, l_float32 *pratio ); -LEPT_DLL extern NUMA * pixaFindAreaFraction ( PIXA *pixa ); -LEPT_DLL extern l_ok pixFindAreaFraction ( PIX *pixs, l_int32 *tab, l_float32 *pfract ); -LEPT_DLL extern NUMA * pixaFindAreaFractionMasked ( PIXA *pixa, PIX *pixm, l_int32 debug ); -LEPT_DLL extern l_ok pixFindAreaFractionMasked ( PIX *pixs, BOX *box, PIX *pixm, l_int32 *tab, l_float32 *pfract ); -LEPT_DLL extern NUMA * pixaFindWidthHeightRatio ( PIXA *pixa ); -LEPT_DLL extern NUMA * pixaFindWidthHeightProduct ( PIXA *pixa ); -LEPT_DLL extern l_ok pixFindOverlapFraction ( PIX *pixs1, PIX *pixs2, l_int32 x2, l_int32 y2, l_int32 *tab, l_float32 *pratio, l_int32 *pnoverlap ); -LEPT_DLL extern BOXA * pixFindRectangleComps ( PIX *pixs, l_int32 dist, l_int32 minw, l_int32 minh ); -LEPT_DLL extern l_ok pixConformsToRectangle ( PIX *pixs, BOX *box, l_int32 dist, l_int32 *pconforms ); -LEPT_DLL extern PIXA * pixClipRectangles ( PIX *pixs, BOXA *boxa ); -LEPT_DLL extern PIX * pixClipRectangle ( PIX *pixs, BOX *box, BOX **pboxc ); -LEPT_DLL extern PIX * pixClipMasked ( PIX *pixs, PIX *pixm, l_int32 x, l_int32 y, l_uint32 outval ); -LEPT_DLL extern l_ok pixCropToMatch ( PIX *pixs1, PIX *pixs2, PIX **ppixd1, PIX **ppixd2 ); -LEPT_DLL extern PIX * pixCropToSize ( PIX *pixs, l_int32 w, l_int32 h ); -LEPT_DLL extern PIX * pixResizeToMatch ( PIX *pixs, PIX *pixt, l_int32 w, l_int32 h ); -LEPT_DLL extern PIX * pixSelectComponentBySize ( PIX *pixs, l_int32 rankorder, l_int32 type, l_int32 connectivity, BOX **pbox ); -LEPT_DLL extern PIX * pixFilterComponentBySize ( PIX *pixs, l_int32 rankorder, l_int32 type, l_int32 connectivity, BOX **pbox ); -LEPT_DLL extern PIX * pixMakeSymmetricMask ( l_int32 w, l_int32 h, l_float32 hf, l_float32 vf, l_int32 type ); -LEPT_DLL extern PIX * pixMakeFrameMask ( l_int32 w, l_int32 h, l_float32 hf1, l_float32 hf2, l_float32 vf1, l_float32 vf2 ); -LEPT_DLL extern PIX * pixMakeCoveringOfRectangles ( PIX *pixs, l_int32 maxiters ); -LEPT_DLL extern l_ok pixFractionFgInMask ( PIX *pix1, PIX *pix2, l_float32 *pfract ); -LEPT_DLL extern l_ok pixClipToForeground ( PIX *pixs, PIX **ppixd, BOX **pbox ); -LEPT_DLL extern l_ok pixTestClipToForeground ( PIX *pixs, l_int32 *pcanclip ); -LEPT_DLL extern l_ok pixClipBoxToForeground ( PIX *pixs, BOX *boxs, PIX **ppixd, BOX **pboxd ); -LEPT_DLL extern l_ok pixScanForForeground ( PIX *pixs, BOX *box, l_int32 scanflag, l_int32 *ploc ); -LEPT_DLL extern l_ok pixClipBoxToEdges ( PIX *pixs, BOX *boxs, l_int32 lowthresh, l_int32 highthresh, l_int32 maxwidth, l_int32 factor, PIX **ppixd, BOX **pboxd ); -LEPT_DLL extern l_ok pixScanForEdge ( PIX *pixs, BOX *box, l_int32 lowthresh, l_int32 highthresh, l_int32 maxwidth, l_int32 factor, l_int32 scanflag, l_int32 *ploc ); -LEPT_DLL extern NUMA * pixExtractOnLine ( PIX *pixs, l_int32 x1, l_int32 y1, l_int32 x2, l_int32 y2, l_int32 factor ); -LEPT_DLL extern l_float32 pixAverageOnLine ( PIX *pixs, l_int32 x1, l_int32 y1, l_int32 x2, l_int32 y2, l_int32 factor ); -LEPT_DLL extern NUMA * pixAverageIntensityProfile ( PIX *pixs, l_float32 fract, l_int32 dir, l_int32 first, l_int32 last, l_int32 factor1, l_int32 factor2 ); -LEPT_DLL extern NUMA * pixReversalProfile ( PIX *pixs, l_float32 fract, l_int32 dir, l_int32 first, l_int32 last, l_int32 minreversal, l_int32 factor1, l_int32 factor2 ); -LEPT_DLL extern l_ok pixWindowedVarianceOnLine ( PIX *pixs, l_int32 dir, l_int32 loc, l_int32 c1, l_int32 c2, l_int32 size, NUMA **pnad ); -LEPT_DLL extern l_ok pixMinMaxNearLine ( PIX *pixs, l_int32 x1, l_int32 y1, l_int32 x2, l_int32 y2, l_int32 dist, l_int32 direction, NUMA **pnamin, NUMA **pnamax, l_float32 *pminave, l_float32 *pmaxave ); -LEPT_DLL extern PIX * pixRankRowTransform ( PIX *pixs ); -LEPT_DLL extern PIX * pixRankColumnTransform ( PIX *pixs ); -LEPT_DLL extern PIXA * pixaCreate ( l_int32 n ); -LEPT_DLL extern PIXA * pixaCreateFromPix ( PIX *pixs, l_int32 n, l_int32 cellw, l_int32 cellh ); -LEPT_DLL extern PIXA * pixaCreateFromBoxa ( PIX *pixs, BOXA *boxa, l_int32 start, l_int32 num, l_int32 *pcropwarn ); -LEPT_DLL extern PIXA * pixaSplitPix ( PIX *pixs, l_int32 nx, l_int32 ny, l_int32 borderwidth, l_uint32 bordercolor ); -LEPT_DLL extern void pixaDestroy ( PIXA **ppixa ); -LEPT_DLL extern PIXA * pixaCopy ( PIXA *pixa, l_int32 copyflag ); -LEPT_DLL extern l_ok pixaAddPix ( PIXA *pixa, PIX *pix, l_int32 copyflag ); -LEPT_DLL extern l_ok pixaAddBox ( PIXA *pixa, BOX *box, l_int32 copyflag ); -LEPT_DLL extern l_ok pixaExtendArrayToSize ( PIXA *pixa, l_int32 size ); -LEPT_DLL extern l_int32 pixaGetCount ( PIXA *pixa ); -LEPT_DLL extern l_ok pixaChangeRefcount ( PIXA *pixa, l_int32 delta ); -LEPT_DLL extern PIX * pixaGetPix ( PIXA *pixa, l_int32 index, l_int32 accesstype ); -LEPT_DLL extern l_ok pixaGetPixDimensions ( PIXA *pixa, l_int32 index, l_int32 *pw, l_int32 *ph, l_int32 *pd ); -LEPT_DLL extern BOXA * pixaGetBoxa ( PIXA *pixa, l_int32 accesstype ); -LEPT_DLL extern l_int32 pixaGetBoxaCount ( PIXA *pixa ); -LEPT_DLL extern BOX * pixaGetBox ( PIXA *pixa, l_int32 index, l_int32 accesstype ); -LEPT_DLL extern l_ok pixaGetBoxGeometry ( PIXA *pixa, l_int32 index, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph ); -LEPT_DLL extern l_ok pixaSetBoxa ( PIXA *pixa, BOXA *boxa, l_int32 accesstype ); -LEPT_DLL extern PIX ** pixaGetPixArray ( PIXA *pixa ); -LEPT_DLL extern l_ok pixaVerifyDepth ( PIXA *pixa, l_int32 *psame, l_int32 *pmaxd ); -LEPT_DLL extern l_ok pixaVerifyDimensions ( PIXA *pixa, l_int32 *psame, l_int32 *pmaxw, l_int32 *pmaxh ); -LEPT_DLL extern l_ok pixaIsFull ( PIXA *pixa, l_int32 *pfullpa, l_int32 *pfullba ); -LEPT_DLL extern l_ok pixaCountText ( PIXA *pixa, l_int32 *pntext ); -LEPT_DLL extern l_ok pixaSetText ( PIXA *pixa, const char *text, SARRAY *sa ); -LEPT_DLL extern void *** pixaGetLinePtrs ( PIXA *pixa, l_int32 *psize ); -LEPT_DLL extern l_ok pixaWriteStreamInfo ( FILE *fp, PIXA *pixa ); -LEPT_DLL extern l_ok pixaReplacePix ( PIXA *pixa, l_int32 index, PIX *pix, BOX *box ); -LEPT_DLL extern l_ok pixaInsertPix ( PIXA *pixa, l_int32 index, PIX *pixs, BOX *box ); -LEPT_DLL extern l_ok pixaRemovePix ( PIXA *pixa, l_int32 index ); -LEPT_DLL extern l_ok pixaRemovePixAndSave ( PIXA *pixa, l_int32 index, PIX **ppix, BOX **pbox ); -LEPT_DLL extern l_ok pixaRemoveSelected ( PIXA *pixa, NUMA *naindex ); -LEPT_DLL extern l_ok pixaInitFull ( PIXA *pixa, PIX *pix, BOX *box ); -LEPT_DLL extern l_ok pixaClear ( PIXA *pixa ); -LEPT_DLL extern l_ok pixaJoin ( PIXA *pixad, PIXA *pixas, l_int32 istart, l_int32 iend ); -LEPT_DLL extern PIXA * pixaInterleave ( PIXA *pixa1, PIXA *pixa2, l_int32 copyflag ); -LEPT_DLL extern l_ok pixaaJoin ( PIXAA *paad, PIXAA *paas, l_int32 istart, l_int32 iend ); -LEPT_DLL extern PIXAA * pixaaCreate ( l_int32 n ); -LEPT_DLL extern PIXAA * pixaaCreateFromPixa ( PIXA *pixa, l_int32 n, l_int32 type, l_int32 copyflag ); -LEPT_DLL extern void pixaaDestroy ( PIXAA **ppaa ); -LEPT_DLL extern l_ok pixaaAddPixa ( PIXAA *paa, PIXA *pixa, l_int32 copyflag ); -LEPT_DLL extern l_ok pixaaExtendArray ( PIXAA *paa ); -LEPT_DLL extern l_ok pixaaAddPix ( PIXAA *paa, l_int32 index, PIX *pix, BOX *box, l_int32 copyflag ); -LEPT_DLL extern l_ok pixaaAddBox ( PIXAA *paa, BOX *box, l_int32 copyflag ); -LEPT_DLL extern l_int32 pixaaGetCount ( PIXAA *paa, NUMA **pna ); -LEPT_DLL extern PIXA * pixaaGetPixa ( PIXAA *paa, l_int32 index, l_int32 accesstype ); -LEPT_DLL extern BOXA * pixaaGetBoxa ( PIXAA *paa, l_int32 accesstype ); -LEPT_DLL extern PIX * pixaaGetPix ( PIXAA *paa, l_int32 index, l_int32 ipix, l_int32 accessflag ); -LEPT_DLL extern l_ok pixaaVerifyDepth ( PIXAA *paa, l_int32 *psame, l_int32 *pmaxd ); -LEPT_DLL extern l_ok pixaaVerifyDimensions ( PIXAA *paa, l_int32 *psame, l_int32 *pmaxw, l_int32 *pmaxh ); -LEPT_DLL extern l_int32 pixaaIsFull ( PIXAA *paa, l_int32 *pfull ); -LEPT_DLL extern l_ok pixaaInitFull ( PIXAA *paa, PIXA *pixa ); -LEPT_DLL extern l_ok pixaaReplacePixa ( PIXAA *paa, l_int32 index, PIXA *pixa ); -LEPT_DLL extern l_ok pixaaClear ( PIXAA *paa ); -LEPT_DLL extern l_ok pixaaTruncate ( PIXAA *paa ); -LEPT_DLL extern PIXA * pixaRead ( const char *filename ); -LEPT_DLL extern PIXA * pixaReadStream ( FILE *fp ); -LEPT_DLL extern PIXA * pixaReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok pixaWriteDebug ( const char *fname, PIXA *pixa ); -LEPT_DLL extern l_ok pixaWrite ( const char *filename, PIXA *pixa ); -LEPT_DLL extern l_ok pixaWriteStream ( FILE *fp, PIXA *pixa ); -LEPT_DLL extern l_ok pixaWriteMem ( l_uint8 **pdata, size_t *psize, PIXA *pixa ); -LEPT_DLL extern PIXA * pixaReadBoth ( const char *filename ); -LEPT_DLL extern PIXAA * pixaaReadFromFiles ( const char *dirname, const char *substr, l_int32 first, l_int32 nfiles ); -LEPT_DLL extern PIXAA * pixaaRead ( const char *filename ); -LEPT_DLL extern PIXAA * pixaaReadStream ( FILE *fp ); -LEPT_DLL extern PIXAA * pixaaReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok pixaaWrite ( const char *filename, PIXAA *paa ); -LEPT_DLL extern l_ok pixaaWriteStream ( FILE *fp, PIXAA *paa ); -LEPT_DLL extern l_ok pixaaWriteMem ( l_uint8 **pdata, size_t *psize, PIXAA *paa ); -LEPT_DLL extern PIXACC * pixaccCreate ( l_int32 w, l_int32 h, l_int32 negflag ); -LEPT_DLL extern PIXACC * pixaccCreateFromPix ( PIX *pix, l_int32 negflag ); -LEPT_DLL extern void pixaccDestroy ( PIXACC **ppixacc ); -LEPT_DLL extern PIX * pixaccFinal ( PIXACC *pixacc, l_int32 outdepth ); -LEPT_DLL extern PIX * pixaccGetPix ( PIXACC *pixacc ); -LEPT_DLL extern l_int32 pixaccGetOffset ( PIXACC *pixacc ); -LEPT_DLL extern l_ok pixaccAdd ( PIXACC *pixacc, PIX *pix ); -LEPT_DLL extern l_ok pixaccSubtract ( PIXACC *pixacc, PIX *pix ); -LEPT_DLL extern l_ok pixaccMultConst ( PIXACC *pixacc, l_float32 factor ); -LEPT_DLL extern l_ok pixaccMultConstAccumulate ( PIXACC *pixacc, PIX *pix, l_float32 factor ); -LEPT_DLL extern PIX * pixSelectBySize ( PIX *pixs, l_int32 width, l_int32 height, l_int32 connectivity, l_int32 type, l_int32 relation, l_int32 *pchanged ); -LEPT_DLL extern PIXA * pixaSelectBySize ( PIXA *pixas, l_int32 width, l_int32 height, l_int32 type, l_int32 relation, l_int32 *pchanged ); -LEPT_DLL extern NUMA * pixaMakeSizeIndicator ( PIXA *pixa, l_int32 width, l_int32 height, l_int32 type, l_int32 relation ); -LEPT_DLL extern PIX * pixSelectByPerimToAreaRatio ( PIX *pixs, l_float32 thresh, l_int32 connectivity, l_int32 type, l_int32 *pchanged ); -LEPT_DLL extern PIXA * pixaSelectByPerimToAreaRatio ( PIXA *pixas, l_float32 thresh, l_int32 type, l_int32 *pchanged ); -LEPT_DLL extern PIX * pixSelectByPerimSizeRatio ( PIX *pixs, l_float32 thresh, l_int32 connectivity, l_int32 type, l_int32 *pchanged ); -LEPT_DLL extern PIXA * pixaSelectByPerimSizeRatio ( PIXA *pixas, l_float32 thresh, l_int32 type, l_int32 *pchanged ); -LEPT_DLL extern PIX * pixSelectByAreaFraction ( PIX *pixs, l_float32 thresh, l_int32 connectivity, l_int32 type, l_int32 *pchanged ); -LEPT_DLL extern PIXA * pixaSelectByAreaFraction ( PIXA *pixas, l_float32 thresh, l_int32 type, l_int32 *pchanged ); -LEPT_DLL extern PIX * pixSelectByWidthHeightRatio ( PIX *pixs, l_float32 thresh, l_int32 connectivity, l_int32 type, l_int32 *pchanged ); -LEPT_DLL extern PIXA * pixaSelectByWidthHeightRatio ( PIXA *pixas, l_float32 thresh, l_int32 type, l_int32 *pchanged ); -LEPT_DLL extern PIXA * pixaSelectByNumConnComp ( PIXA *pixas, l_int32 nmin, l_int32 nmax, l_int32 connectivity, l_int32 *pchanged ); -LEPT_DLL extern PIXA * pixaSelectWithIndicator ( PIXA *pixas, NUMA *na, l_int32 *pchanged ); -LEPT_DLL extern l_ok pixRemoveWithIndicator ( PIX *pixs, PIXA *pixa, NUMA *na ); -LEPT_DLL extern l_ok pixAddWithIndicator ( PIX *pixs, PIXA *pixa, NUMA *na ); -LEPT_DLL extern PIXA * pixaSelectWithString ( PIXA *pixas, const char *str, l_int32 *perror ); -LEPT_DLL extern PIX * pixaRenderComponent ( PIX *pixs, PIXA *pixa, l_int32 index ); -LEPT_DLL extern PIXA * pixaSort ( PIXA *pixas, l_int32 sorttype, l_int32 sortorder, NUMA **pnaindex, l_int32 copyflag ); -LEPT_DLL extern PIXA * pixaBinSort ( PIXA *pixas, l_int32 sorttype, l_int32 sortorder, NUMA **pnaindex, l_int32 copyflag ); -LEPT_DLL extern PIXA * pixaSortByIndex ( PIXA *pixas, NUMA *naindex, l_int32 copyflag ); -LEPT_DLL extern PIXAA * pixaSort2dByIndex ( PIXA *pixas, NUMAA *naa, l_int32 copyflag ); -LEPT_DLL extern PIXA * pixaSelectRange ( PIXA *pixas, l_int32 first, l_int32 last, l_int32 copyflag ); -LEPT_DLL extern PIXAA * pixaaSelectRange ( PIXAA *paas, l_int32 first, l_int32 last, l_int32 copyflag ); -LEPT_DLL extern PIXAA * pixaaScaleToSize ( PIXAA *paas, l_int32 wd, l_int32 hd ); -LEPT_DLL extern PIXAA * pixaaScaleToSizeVar ( PIXAA *paas, NUMA *nawd, NUMA *nahd ); -LEPT_DLL extern PIXA * pixaScaleToSize ( PIXA *pixas, l_int32 wd, l_int32 hd ); -LEPT_DLL extern PIXA * pixaScaleToSizeRel ( PIXA *pixas, l_int32 delw, l_int32 delh ); -LEPT_DLL extern PIXA * pixaScale ( PIXA *pixas, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern PIXA * pixaScaleBySampling ( PIXA *pixas, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern PIXA * pixaRotate ( PIXA *pixas, l_float32 angle, l_int32 type, l_int32 incolor, l_int32 width, l_int32 height ); -LEPT_DLL extern PIXA * pixaRotateOrth ( PIXA *pixas, l_int32 rotation ); -LEPT_DLL extern PIXA * pixaTranslate ( PIXA *pixas, l_int32 hshift, l_int32 vshift, l_int32 incolor ); -LEPT_DLL extern PIXA * pixaAddBorderGeneral ( PIXA *pixad, PIXA *pixas, l_int32 left, l_int32 right, l_int32 top, l_int32 bot, l_uint32 val ); -LEPT_DLL extern PIXA * pixaaFlattenToPixa ( PIXAA *paa, NUMA **pnaindex, l_int32 copyflag ); -LEPT_DLL extern l_ok pixaaSizeRange ( PIXAA *paa, l_int32 *pminw, l_int32 *pminh, l_int32 *pmaxw, l_int32 *pmaxh ); -LEPT_DLL extern l_ok pixaSizeRange ( PIXA *pixa, l_int32 *pminw, l_int32 *pminh, l_int32 *pmaxw, l_int32 *pmaxh ); -LEPT_DLL extern PIXA * pixaClipToPix ( PIXA *pixas, PIX *pixs ); -LEPT_DLL extern l_ok pixaClipToForeground ( PIXA *pixas, PIXA **ppixad, BOXA **pboxa ); -LEPT_DLL extern l_ok pixaGetRenderingDepth ( PIXA *pixa, l_int32 *pdepth ); -LEPT_DLL extern l_ok pixaHasColor ( PIXA *pixa, l_int32 *phascolor ); -LEPT_DLL extern l_ok pixaAnyColormaps ( PIXA *pixa, l_int32 *phascmap ); -LEPT_DLL extern l_ok pixaGetDepthInfo ( PIXA *pixa, l_int32 *pmaxdepth, l_int32 *psame ); -LEPT_DLL extern PIXA * pixaConvertToSameDepth ( PIXA *pixas ); -LEPT_DLL extern l_ok pixaEqual ( PIXA *pixa1, PIXA *pixa2, l_int32 maxdist, NUMA **pnaindex, l_int32 *psame ); -LEPT_DLL extern l_ok pixaSetFullSizeBoxa ( PIXA *pixa ); -LEPT_DLL extern PIX * pixaDisplay ( PIXA *pixa, l_int32 w, l_int32 h ); -LEPT_DLL extern PIX * pixaDisplayRandomCmap ( PIXA *pixa, l_int32 w, l_int32 h ); -LEPT_DLL extern PIX * pixaDisplayLinearly ( PIXA *pixas, l_int32 direction, l_float32 scalefactor, l_int32 background, l_int32 spacing, l_int32 border, BOXA **pboxa ); -LEPT_DLL extern PIX * pixaDisplayOnLattice ( PIXA *pixa, l_int32 cellw, l_int32 cellh, l_int32 *pncols, BOXA **pboxa ); -LEPT_DLL extern PIX * pixaDisplayUnsplit ( PIXA *pixa, l_int32 nx, l_int32 ny, l_int32 borderwidth, l_uint32 bordercolor ); -LEPT_DLL extern PIX * pixaDisplayTiled ( PIXA *pixa, l_int32 maxwidth, l_int32 background, l_int32 spacing ); -LEPT_DLL extern PIX * pixaDisplayTiledInRows ( PIXA *pixa, l_int32 outdepth, l_int32 maxwidth, l_float32 scalefactor, l_int32 background, l_int32 spacing, l_int32 border ); -LEPT_DLL extern PIX * pixaDisplayTiledInColumns ( PIXA *pixas, l_int32 nx, l_float32 scalefactor, l_int32 spacing, l_int32 border ); -LEPT_DLL extern PIX * pixaDisplayTiledAndScaled ( PIXA *pixa, l_int32 outdepth, l_int32 tilewidth, l_int32 ncols, l_int32 background, l_int32 spacing, l_int32 border ); -LEPT_DLL extern PIX * pixaDisplayTiledWithText ( PIXA *pixa, l_int32 maxwidth, l_float32 scalefactor, l_int32 spacing, l_int32 border, l_int32 fontsize, l_uint32 textcolor ); -LEPT_DLL extern PIX * pixaDisplayTiledByIndex ( PIXA *pixa, NUMA *na, l_int32 width, l_int32 spacing, l_int32 border, l_int32 fontsize, l_uint32 textcolor ); -LEPT_DLL extern PIX * pixaaDisplay ( PIXAA *paa, l_int32 w, l_int32 h ); -LEPT_DLL extern PIX * pixaaDisplayByPixa ( PIXAA *paa, l_int32 maxnx, l_float32 scalefactor, l_int32 hspacing, l_int32 vspacing, l_int32 border ); -LEPT_DLL extern PIXA * pixaaDisplayTiledAndScaled ( PIXAA *paa, l_int32 outdepth, l_int32 tilewidth, l_int32 ncols, l_int32 background, l_int32 spacing, l_int32 border ); -LEPT_DLL extern PIXA * pixaConvertTo1 ( PIXA *pixas, l_int32 thresh ); -LEPT_DLL extern PIXA * pixaConvertTo8 ( PIXA *pixas, l_int32 cmapflag ); -LEPT_DLL extern PIXA * pixaConvertTo8Colormap ( PIXA *pixas, l_int32 dither ); -LEPT_DLL extern PIXA * pixaConvertTo32 ( PIXA *pixas ); -LEPT_DLL extern PIXA * pixaConstrainedSelect ( PIXA *pixas, l_int32 first, l_int32 last, l_int32 nmax, l_int32 use_pairs, l_int32 copyflag ); -LEPT_DLL extern l_ok pixaSelectToPdf ( PIXA *pixas, l_int32 first, l_int32 last, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, l_uint32 color, l_int32 fontsize, const char *fileout ); -LEPT_DLL extern PIXA * pixaMakeFromTiledPixa ( PIXA *pixas, l_int32 w, l_int32 h, l_int32 nsamp ); -LEPT_DLL extern PIXA * pixaMakeFromTiledPix ( PIX *pixs, l_int32 w, l_int32 h, l_int32 start, l_int32 num, BOXA *boxa ); -LEPT_DLL extern l_ok pixGetTileCount ( PIX *pix, l_int32 *pn ); -LEPT_DLL extern PIXA * pixaDisplayMultiTiled ( PIXA *pixas, l_int32 nx, l_int32 ny, l_int32 maxw, l_int32 maxh, l_float32 scalefactor, l_int32 spacing, l_int32 border ); -LEPT_DLL extern l_ok pixaSplitIntoFiles ( PIXA *pixas, l_int32 nsplit, l_float32 scale, l_int32 outwidth, l_int32 write_pixa, l_int32 write_pix, l_int32 write_pdf ); -LEPT_DLL extern l_ok convertToNUpFiles ( const char *dir, const char *substr, l_int32 nx, l_int32 ny, l_int32 tw, l_int32 spacing, l_int32 border, l_int32 fontsize, const char *outdir ); -LEPT_DLL extern PIXA * convertToNUpPixa ( const char *dir, const char *substr, l_int32 nx, l_int32 ny, l_int32 tw, l_int32 spacing, l_int32 border, l_int32 fontsize ); -LEPT_DLL extern PIXA * pixaConvertToNUpPixa ( PIXA *pixas, SARRAY *sa, l_int32 nx, l_int32 ny, l_int32 tw, l_int32 spacing, l_int32 border, l_int32 fontsize ); -LEPT_DLL extern l_ok pixaCompareInPdf ( PIXA *pixa1, PIXA *pixa2, l_int32 nx, l_int32 ny, l_int32 tw, l_int32 spacing, l_int32 border, l_int32 fontsize, const char *fileout ); -LEPT_DLL extern l_ok pmsCreate ( size_t minsize, size_t smallest, NUMA *numalloc, const char *logfile ); -LEPT_DLL extern void pmsDestroy ( void ); -LEPT_DLL extern void * pmsCustomAlloc ( size_t nbytes ); -LEPT_DLL extern void pmsCustomDealloc ( void *data ); -LEPT_DLL extern void * pmsGetAlloc ( size_t nbytes ); -LEPT_DLL extern l_ok pmsGetLevelForAlloc ( size_t nbytes, l_int32 *plevel ); -LEPT_DLL extern l_ok pmsGetLevelForDealloc ( void *data, l_int32 *plevel ); -LEPT_DLL extern void pmsLogInfo ( void ); -LEPT_DLL extern l_ok pixAddConstantGray ( PIX *pixs, l_int32 val ); -LEPT_DLL extern l_ok pixMultConstantGray ( PIX *pixs, l_float32 val ); -LEPT_DLL extern PIX * pixAddGray ( PIX *pixd, PIX *pixs1, PIX *pixs2 ); -LEPT_DLL extern PIX * pixSubtractGray ( PIX *pixd, PIX *pixs1, PIX *pixs2 ); -LEPT_DLL extern PIX * pixMultiplyGray ( PIX *pixs, PIX *pixg, l_float32 norm ); -LEPT_DLL extern PIX * pixThresholdToValue ( PIX *pixd, PIX *pixs, l_int32 threshval, l_int32 setval ); -LEPT_DLL extern PIX * pixInitAccumulate ( l_int32 w, l_int32 h, l_uint32 offset ); -LEPT_DLL extern PIX * pixFinalAccumulate ( PIX *pixs, l_uint32 offset, l_int32 depth ); -LEPT_DLL extern PIX * pixFinalAccumulateThreshold ( PIX *pixs, l_uint32 offset, l_uint32 threshold ); -LEPT_DLL extern l_ok pixAccumulate ( PIX *pixd, PIX *pixs, l_int32 op ); -LEPT_DLL extern l_ok pixMultConstAccumulate ( PIX *pixs, l_float32 factor, l_uint32 offset ); -LEPT_DLL extern PIX * pixAbsDifference ( PIX *pixs1, PIX *pixs2 ); -LEPT_DLL extern PIX * pixAddRGB ( PIX *pixs1, PIX *pixs2 ); -LEPT_DLL extern PIX * pixMinOrMax ( PIX *pixd, PIX *pixs1, PIX *pixs2, l_int32 type ); -LEPT_DLL extern PIX * pixMaxDynamicRange ( PIX *pixs, l_int32 type ); -LEPT_DLL extern PIX * pixMaxDynamicRangeRGB ( PIX *pixs, l_int32 type ); -LEPT_DLL extern l_uint32 linearScaleRGBVal ( l_uint32 sval, l_float32 factor ); -LEPT_DLL extern l_uint32 logScaleRGBVal ( l_uint32 sval, l_float32 *tab, l_float32 factor ); -LEPT_DLL extern l_float32 * makeLogBase2Tab ( void ); -LEPT_DLL extern l_float32 getLogBase2 ( l_int32 val, l_float32 *logtab ); -LEPT_DLL extern PIXC * pixcompCreateFromPix ( PIX *pix, l_int32 comptype ); -LEPT_DLL extern PIXC * pixcompCreateFromString ( l_uint8 *data, size_t size, l_int32 copyflag ); -LEPT_DLL extern PIXC * pixcompCreateFromFile ( const char *filename, l_int32 comptype ); -LEPT_DLL extern void pixcompDestroy ( PIXC **ppixc ); -LEPT_DLL extern PIXC * pixcompCopy ( PIXC *pixcs ); -LEPT_DLL extern l_ok pixcompGetDimensions ( PIXC *pixc, l_int32 *pw, l_int32 *ph, l_int32 *pd ); -LEPT_DLL extern l_ok pixcompGetParameters ( PIXC *pixc, l_int32 *pxres, l_int32 *pyres, l_int32 *pcomptype, l_int32 *pcmapflag ); -LEPT_DLL extern l_ok pixcompDetermineFormat ( l_int32 comptype, l_int32 d, l_int32 cmapflag, l_int32 *pformat ); -LEPT_DLL extern PIX * pixCreateFromPixcomp ( PIXC *pixc ); -LEPT_DLL extern PIXAC * pixacompCreate ( l_int32 n ); -LEPT_DLL extern PIXAC * pixacompCreateWithInit ( l_int32 n, l_int32 offset, PIX *pix, l_int32 comptype ); -LEPT_DLL extern PIXAC * pixacompCreateFromPixa ( PIXA *pixa, l_int32 comptype, l_int32 accesstype ); -LEPT_DLL extern PIXAC * pixacompCreateFromFiles ( const char *dirname, const char *substr, l_int32 comptype ); -LEPT_DLL extern PIXAC * pixacompCreateFromSA ( SARRAY *sa, l_int32 comptype ); -LEPT_DLL extern void pixacompDestroy ( PIXAC **ppixac ); -LEPT_DLL extern l_ok pixacompAddPix ( PIXAC *pixac, PIX *pix, l_int32 comptype ); -LEPT_DLL extern l_ok pixacompAddPixcomp ( PIXAC *pixac, PIXC *pixc, l_int32 copyflag ); -LEPT_DLL extern l_ok pixacompReplacePix ( PIXAC *pixac, l_int32 index, PIX *pix, l_int32 comptype ); -LEPT_DLL extern l_ok pixacompReplacePixcomp ( PIXAC *pixac, l_int32 index, PIXC *pixc ); -LEPT_DLL extern l_ok pixacompAddBox ( PIXAC *pixac, BOX *box, l_int32 copyflag ); -LEPT_DLL extern l_int32 pixacompGetCount ( PIXAC *pixac ); -LEPT_DLL extern PIXC * pixacompGetPixcomp ( PIXAC *pixac, l_int32 index, l_int32 copyflag ); -LEPT_DLL extern PIX * pixacompGetPix ( PIXAC *pixac, l_int32 index ); -LEPT_DLL extern l_ok pixacompGetPixDimensions ( PIXAC *pixac, l_int32 index, l_int32 *pw, l_int32 *ph, l_int32 *pd ); -LEPT_DLL extern BOXA * pixacompGetBoxa ( PIXAC *pixac, l_int32 accesstype ); -LEPT_DLL extern l_int32 pixacompGetBoxaCount ( PIXAC *pixac ); -LEPT_DLL extern BOX * pixacompGetBox ( PIXAC *pixac, l_int32 index, l_int32 accesstype ); -LEPT_DLL extern l_ok pixacompGetBoxGeometry ( PIXAC *pixac, l_int32 index, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph ); -LEPT_DLL extern l_int32 pixacompGetOffset ( PIXAC *pixac ); -LEPT_DLL extern l_ok pixacompSetOffset ( PIXAC *pixac, l_int32 offset ); -LEPT_DLL extern PIXA * pixaCreateFromPixacomp ( PIXAC *pixac, l_int32 accesstype ); -LEPT_DLL extern l_ok pixacompJoin ( PIXAC *pixacd, PIXAC *pixacs, l_int32 istart, l_int32 iend ); -LEPT_DLL extern PIXAC * pixacompInterleave ( PIXAC *pixac1, PIXAC *pixac2 ); -LEPT_DLL extern PIXAC * pixacompRead ( const char *filename ); -LEPT_DLL extern PIXAC * pixacompReadStream ( FILE *fp ); -LEPT_DLL extern PIXAC * pixacompReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok pixacompWrite ( const char *filename, PIXAC *pixac ); -LEPT_DLL extern l_ok pixacompWriteStream ( FILE *fp, PIXAC *pixac ); -LEPT_DLL extern l_ok pixacompWriteMem ( l_uint8 **pdata, size_t *psize, PIXAC *pixac ); -LEPT_DLL extern l_ok pixacompConvertToPdf ( PIXAC *pixac, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout ); -LEPT_DLL extern l_ok pixacompConvertToPdfData ( PIXAC *pixac, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, l_uint8 **pdata, size_t *pnbytes ); -LEPT_DLL extern l_ok pixacompFastConvertToPdfData ( PIXAC *pixac, const char *title, l_uint8 **pdata, size_t *pnbytes ); -LEPT_DLL extern l_ok pixacompWriteStreamInfo ( FILE *fp, PIXAC *pixac, const char *text ); -LEPT_DLL extern l_ok pixcompWriteStreamInfo ( FILE *fp, PIXC *pixc, const char *text ); -LEPT_DLL extern PIX * pixacompDisplayTiledAndScaled ( PIXAC *pixac, l_int32 outdepth, l_int32 tilewidth, l_int32 ncols, l_int32 background, l_int32 spacing, l_int32 border ); -LEPT_DLL extern l_ok pixacompWriteFiles ( PIXAC *pixac, const char *subdir ); -LEPT_DLL extern l_ok pixcompWriteFile ( const char *rootname, PIXC *pixc ); -LEPT_DLL extern PIX * pixThreshold8 ( PIX *pixs, l_int32 d, l_int32 nlevels, l_int32 cmapflag ); -LEPT_DLL extern PIX * pixRemoveColormapGeneral ( PIX *pixs, l_int32 type, l_int32 ifnocmap ); -LEPT_DLL extern PIX * pixRemoveColormap ( PIX *pixs, l_int32 type ); -LEPT_DLL extern l_ok pixAddGrayColormap8 ( PIX *pixs ); -LEPT_DLL extern PIX * pixAddMinimalGrayColormap8 ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvertRGBToLuminance ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvertRGBToGray ( PIX *pixs, l_float32 rwt, l_float32 gwt, l_float32 bwt ); -LEPT_DLL extern PIX * pixConvertRGBToGrayFast ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvertRGBToGrayMinMax ( PIX *pixs, l_int32 type ); -LEPT_DLL extern PIX * pixConvertRGBToGraySatBoost ( PIX *pixs, l_int32 refval ); -LEPT_DLL extern PIX * pixConvertRGBToGrayArb ( PIX *pixs, l_float32 rc, l_float32 gc, l_float32 bc ); -LEPT_DLL extern PIX * pixConvertRGBToBinaryArb ( PIX *pixs, l_float32 rc, l_float32 gc, l_float32 bc, l_int32 thresh, l_int32 relation ); -LEPT_DLL extern PIX * pixConvertGrayToColormap ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvertGrayToColormap8 ( PIX *pixs, l_int32 mindepth ); -LEPT_DLL extern PIX * pixColorizeGray ( PIX *pixs, l_uint32 color, l_int32 cmapflag ); -LEPT_DLL extern PIX * pixConvertRGBToColormap ( PIX *pixs, l_int32 ditherflag ); -LEPT_DLL extern PIX * pixConvertCmapTo1 ( PIX *pixs ); -LEPT_DLL extern l_ok pixQuantizeIfFewColors ( PIX *pixs, l_int32 maxcolors, l_int32 mingraycolors, l_int32 octlevel, PIX **ppixd ); -LEPT_DLL extern PIX * pixConvert16To8 ( PIX *pixs, l_int32 type ); -LEPT_DLL extern PIX * pixConvertGrayToFalseColor ( PIX *pixs, l_float32 gamma ); -LEPT_DLL extern PIX * pixUnpackBinary ( PIX *pixs, l_int32 depth, l_int32 invert ); -LEPT_DLL extern PIX * pixConvert1To16 ( PIX *pixd, PIX *pixs, l_uint16 val0, l_uint16 val1 ); -LEPT_DLL extern PIX * pixConvert1To32 ( PIX *pixd, PIX *pixs, l_uint32 val0, l_uint32 val1 ); -LEPT_DLL extern PIX * pixConvert1To2Cmap ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvert1To2 ( PIX *pixd, PIX *pixs, l_int32 val0, l_int32 val1 ); -LEPT_DLL extern PIX * pixConvert1To4Cmap ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvert1To4 ( PIX *pixd, PIX *pixs, l_int32 val0, l_int32 val1 ); -LEPT_DLL extern PIX * pixConvert1To8Cmap ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvert1To8 ( PIX *pixd, PIX *pixs, l_uint8 val0, l_uint8 val1 ); -LEPT_DLL extern PIX * pixConvert2To8 ( PIX *pixs, l_uint8 val0, l_uint8 val1, l_uint8 val2, l_uint8 val3, l_int32 cmapflag ); -LEPT_DLL extern PIX * pixConvert4To8 ( PIX *pixs, l_int32 cmapflag ); -LEPT_DLL extern PIX * pixConvert8To16 ( PIX *pixs, l_int32 leftshift ); -LEPT_DLL extern PIX * pixConvertTo2 ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvert8To2 ( PIX *pix ); -LEPT_DLL extern PIX * pixConvertTo4 ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvert8To4 ( PIX *pix ); -LEPT_DLL extern PIX * pixConvertTo1Adaptive ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvertTo1 ( PIX *pixs, l_int32 threshold ); -LEPT_DLL extern PIX * pixConvertTo1BySampling ( PIX *pixs, l_int32 factor, l_int32 threshold ); -LEPT_DLL extern PIX * pixConvertTo8 ( PIX *pixs, l_int32 cmapflag ); -LEPT_DLL extern PIX * pixConvertTo8BySampling ( PIX *pixs, l_int32 factor, l_int32 cmapflag ); -LEPT_DLL extern PIX * pixConvertTo8Colormap ( PIX *pixs, l_int32 dither ); -LEPT_DLL extern PIX * pixConvertTo16 ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvertTo32 ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvertTo32BySampling ( PIX *pixs, l_int32 factor ); -LEPT_DLL extern PIX * pixConvert8To32 ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvertTo8Or32 ( PIX *pixs, l_int32 copyflag, l_int32 warnflag ); -LEPT_DLL extern PIX * pixConvert24To32 ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvert32To24 ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvert32To16 ( PIX *pixs, l_int32 type ); -LEPT_DLL extern PIX * pixConvert32To8 ( PIX *pixs, l_int32 type16, l_int32 type8 ); -LEPT_DLL extern PIX * pixRemoveAlpha ( PIX *pixs ); -LEPT_DLL extern PIX * pixAddAlphaTo1bpp ( PIX *pixd, PIX *pixs ); -LEPT_DLL extern PIX * pixConvertLossless ( PIX *pixs, l_int32 d ); -LEPT_DLL extern PIX * pixConvertForPSWrap ( PIX *pixs ); -LEPT_DLL extern PIX * pixConvertToSubpixelRGB ( PIX *pixs, l_float32 scalex, l_float32 scaley, l_int32 order ); -LEPT_DLL extern PIX * pixConvertGrayToSubpixelRGB ( PIX *pixs, l_float32 scalex, l_float32 scaley, l_int32 order ); -LEPT_DLL extern PIX * pixConvertColorToSubpixelRGB ( PIX *pixs, l_float32 scalex, l_float32 scaley, l_int32 order ); -LEPT_DLL extern void l_setNeutralBoostVal ( l_int32 val ); -LEPT_DLL extern PIX * pixConnCompTransform ( PIX *pixs, l_int32 connect, l_int32 depth ); -LEPT_DLL extern PIX * pixConnCompAreaTransform ( PIX *pixs, l_int32 connect ); -LEPT_DLL extern l_ok pixConnCompIncrInit ( PIX *pixs, l_int32 conn, PIX **ppixd, PTAA **pptaa, l_int32 *pncc ); -LEPT_DLL extern l_int32 pixConnCompIncrAdd ( PIX *pixs, PTAA *ptaa, l_int32 *pncc, l_float32 x, l_float32 y, l_int32 debug ); -LEPT_DLL extern l_ok pixGetSortedNeighborValues ( PIX *pixs, l_int32 x, l_int32 y, l_int32 conn, l_int32 **pneigh, l_int32 *pnvals ); -LEPT_DLL extern PIX * pixLocToColorTransform ( PIX *pixs ); -LEPT_DLL extern PIXTILING * pixTilingCreate ( PIX *pixs, l_int32 nx, l_int32 ny, l_int32 w, l_int32 h, l_int32 xoverlap, l_int32 yoverlap ); -LEPT_DLL extern void pixTilingDestroy ( PIXTILING **ppt ); -LEPT_DLL extern l_ok pixTilingGetCount ( PIXTILING *pt, l_int32 *pnx, l_int32 *pny ); -LEPT_DLL extern l_ok pixTilingGetSize ( PIXTILING *pt, l_int32 *pw, l_int32 *ph ); -LEPT_DLL extern PIX * pixTilingGetTile ( PIXTILING *pt, l_int32 i, l_int32 j ); -LEPT_DLL extern l_ok pixTilingNoStripOnPaint ( PIXTILING *pt ); -LEPT_DLL extern l_ok pixTilingPaintTile ( PIX *pixd, l_int32 i, l_int32 j, PIX *pixs, PIXTILING *pt ); -LEPT_DLL extern PIX * pixReadStreamPng ( FILE *fp ); -LEPT_DLL extern l_ok readHeaderPng ( const char *filename, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap ); -LEPT_DLL extern l_ok freadHeaderPng ( FILE *fp, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap ); -LEPT_DLL extern l_ok readHeaderMemPng ( const l_uint8 *data, size_t size, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap ); -LEPT_DLL extern l_int32 fgetPngResolution ( FILE *fp, l_int32 *pxres, l_int32 *pyres ); -LEPT_DLL extern l_ok isPngInterlaced ( const char *filename, l_int32 *pinterlaced ); -LEPT_DLL extern l_ok fgetPngColormapInfo ( FILE *fp, PIXCMAP **pcmap, l_int32 *ptransparency ); -LEPT_DLL extern l_ok pixWritePng ( const char *filename, PIX *pix, l_float32 gamma ); -LEPT_DLL extern l_ok pixWriteStreamPng ( FILE *fp, PIX *pix, l_float32 gamma ); -LEPT_DLL extern l_ok pixSetZlibCompression ( PIX *pix, l_int32 compval ); -LEPT_DLL extern void l_pngSetReadStrip16To8 ( l_int32 flag ); -LEPT_DLL extern PIX * pixReadMemPng ( const l_uint8 *filedata, size_t filesize ); -LEPT_DLL extern l_ok pixWriteMemPng ( l_uint8 **pfiledata, size_t *pfilesize, PIX *pix, l_float32 gamma ); -LEPT_DLL extern PIX * pixReadStreamPnm ( FILE *fp ); -LEPT_DLL extern l_ok readHeaderPnm ( const char *filename, l_int32 *pw, l_int32 *ph, l_int32 *pd, l_int32 *ptype, l_int32 *pbps, l_int32 *pspp ); -LEPT_DLL extern l_ok freadHeaderPnm ( FILE *fp, l_int32 *pw, l_int32 *ph, l_int32 *pd, l_int32 *ptype, l_int32 *pbps, l_int32 *pspp ); -LEPT_DLL extern l_ok pixWriteStreamPnm ( FILE *fp, PIX *pix ); -LEPT_DLL extern l_ok pixWriteStreamAsciiPnm ( FILE *fp, PIX *pix ); -LEPT_DLL extern l_ok pixWriteStreamPam ( FILE *fp, PIX *pix ); -LEPT_DLL extern PIX * pixReadMemPnm ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok readHeaderMemPnm ( const l_uint8 *data, size_t size, l_int32 *pw, l_int32 *ph, l_int32 *pd, l_int32 *ptype, l_int32 *pbps, l_int32 *pspp ); -LEPT_DLL extern l_ok pixWriteMemPnm ( l_uint8 **pdata, size_t *psize, PIX *pix ); -LEPT_DLL extern l_ok pixWriteMemPam ( l_uint8 **pdata, size_t *psize, PIX *pix ); -LEPT_DLL extern PIX * pixProjectiveSampledPta ( PIX *pixs, PTA *ptad, PTA *ptas, l_int32 incolor ); -LEPT_DLL extern PIX * pixProjectiveSampled ( PIX *pixs, l_float32 *vc, l_int32 incolor ); -LEPT_DLL extern PIX * pixProjectivePta ( PIX *pixs, PTA *ptad, PTA *ptas, l_int32 incolor ); -LEPT_DLL extern PIX * pixProjective ( PIX *pixs, l_float32 *vc, l_int32 incolor ); -LEPT_DLL extern PIX * pixProjectivePtaColor ( PIX *pixs, PTA *ptad, PTA *ptas, l_uint32 colorval ); -LEPT_DLL extern PIX * pixProjectiveColor ( PIX *pixs, l_float32 *vc, l_uint32 colorval ); -LEPT_DLL extern PIX * pixProjectivePtaGray ( PIX *pixs, PTA *ptad, PTA *ptas, l_uint8 grayval ); -LEPT_DLL extern PIX * pixProjectiveGray ( PIX *pixs, l_float32 *vc, l_uint8 grayval ); -LEPT_DLL extern PIX * pixProjectivePtaWithAlpha ( PIX *pixs, PTA *ptad, PTA *ptas, PIX *pixg, l_float32 fract, l_int32 border ); -LEPT_DLL extern l_ok getProjectiveXformCoeffs ( PTA *ptas, PTA *ptad, l_float32 **pvc ); -LEPT_DLL extern l_ok projectiveXformSampledPt ( l_float32 *vc, l_int32 x, l_int32 y, l_int32 *pxp, l_int32 *pyp ); -LEPT_DLL extern l_ok projectiveXformPt ( l_float32 *vc, l_int32 x, l_int32 y, l_float32 *pxp, l_float32 *pyp ); -LEPT_DLL extern l_ok convertFilesToPS ( const char *dirin, const char *substr, l_int32 res, const char *fileout ); -LEPT_DLL extern l_ok sarrayConvertFilesToPS ( SARRAY *sa, l_int32 res, const char *fileout ); -LEPT_DLL extern l_ok convertFilesFittedToPS ( const char *dirin, const char *substr, l_float32 xpts, l_float32 ypts, const char *fileout ); -LEPT_DLL extern l_ok sarrayConvertFilesFittedToPS ( SARRAY *sa, l_float32 xpts, l_float32 ypts, const char *fileout ); -LEPT_DLL extern l_ok writeImageCompressedToPSFile ( const char *filein, const char *fileout, l_int32 res, l_int32 *pindex ); -LEPT_DLL extern l_ok convertSegmentedPagesToPS ( const char *pagedir, const char *pagestr, l_int32 page_numpre, const char *maskdir, const char *maskstr, l_int32 mask_numpre, l_int32 numpost, l_int32 maxnum, l_float32 textscale, l_float32 imagescale, l_int32 threshold, const char *fileout ); -LEPT_DLL extern l_ok pixWriteSegmentedPageToPS ( PIX *pixs, PIX *pixm, l_float32 textscale, l_float32 imagescale, l_int32 threshold, l_int32 pageno, const char *fileout ); -LEPT_DLL extern l_ok pixWriteMixedToPS ( PIX *pixb, PIX *pixc, l_float32 scale, l_int32 pageno, const char *fileout ); -LEPT_DLL extern l_ok convertToPSEmbed ( const char *filein, const char *fileout, l_int32 level ); -LEPT_DLL extern l_ok pixaWriteCompressedToPS ( PIXA *pixa, const char *fileout, l_int32 res, l_int32 level ); -LEPT_DLL extern l_ok pixWriteCompressedToPS ( PIX *pix, const char *fileout, l_int32 res, l_int32 level, l_int32 *pindex ); -LEPT_DLL extern l_ok pixWritePSEmbed ( const char *filein, const char *fileout ); -LEPT_DLL extern l_ok pixWriteStreamPS ( FILE *fp, PIX *pix, BOX *box, l_int32 res, l_float32 scale ); -LEPT_DLL extern char * pixWriteStringPS ( PIX *pixs, BOX *box, l_int32 res, l_float32 scale ); -LEPT_DLL extern char * generateUncompressedPS ( char *hexdata, l_int32 w, l_int32 h, l_int32 d, l_int32 psbpl, l_int32 bps, l_float32 xpt, l_float32 ypt, l_float32 wpt, l_float32 hpt, l_int32 boxflag ); -LEPT_DLL extern l_ok convertJpegToPSEmbed ( const char *filein, const char *fileout ); -LEPT_DLL extern l_ok convertJpegToPS ( const char *filein, const char *fileout, const char *operation, l_int32 x, l_int32 y, l_int32 res, l_float32 scale, l_int32 pageno, l_int32 endpage ); -LEPT_DLL extern l_ok convertG4ToPSEmbed ( const char *filein, const char *fileout ); -LEPT_DLL extern l_ok convertG4ToPS ( const char *filein, const char *fileout, const char *operation, l_int32 x, l_int32 y, l_int32 res, l_float32 scale, l_int32 pageno, l_int32 maskflag, l_int32 endpage ); -LEPT_DLL extern l_ok convertTiffMultipageToPS ( const char *filein, const char *fileout, l_float32 fillfract ); -LEPT_DLL extern l_ok convertFlateToPSEmbed ( const char *filein, const char *fileout ); -LEPT_DLL extern l_ok convertFlateToPS ( const char *filein, const char *fileout, const char *operation, l_int32 x, l_int32 y, l_int32 res, l_float32 scale, l_int32 pageno, l_int32 endpage ); -LEPT_DLL extern l_ok pixWriteMemPS ( l_uint8 **pdata, size_t *psize, PIX *pix, BOX *box, l_int32 res, l_float32 scale ); -LEPT_DLL extern l_int32 getResLetterPage ( l_int32 w, l_int32 h, l_float32 fillfract ); -LEPT_DLL extern l_int32 getResA4Page ( l_int32 w, l_int32 h, l_float32 fillfract ); -LEPT_DLL extern void l_psWriteBoundingBox ( l_int32 flag ); -LEPT_DLL extern PTA * ptaCreate ( l_int32 n ); -LEPT_DLL extern PTA * ptaCreateFromNuma ( NUMA *nax, NUMA *nay ); -LEPT_DLL extern void ptaDestroy ( PTA **ppta ); -LEPT_DLL extern PTA * ptaCopy ( PTA *pta ); -LEPT_DLL extern PTA * ptaCopyRange ( PTA *ptas, l_int32 istart, l_int32 iend ); -LEPT_DLL extern PTA * ptaClone ( PTA *pta ); -LEPT_DLL extern l_ok ptaEmpty ( PTA *pta ); -LEPT_DLL extern l_ok ptaAddPt ( PTA *pta, l_float32 x, l_float32 y ); -LEPT_DLL extern l_ok ptaInsertPt ( PTA *pta, l_int32 index, l_int32 x, l_int32 y ); -LEPT_DLL extern l_ok ptaRemovePt ( PTA *pta, l_int32 index ); -LEPT_DLL extern l_int32 ptaGetRefcount ( PTA *pta ); -LEPT_DLL extern l_int32 ptaChangeRefcount ( PTA *pta, l_int32 delta ); -LEPT_DLL extern l_int32 ptaGetCount ( PTA *pta ); -LEPT_DLL extern l_ok ptaGetPt ( PTA *pta, l_int32 index, l_float32 *px, l_float32 *py ); -LEPT_DLL extern l_ok ptaGetIPt ( PTA *pta, l_int32 index, l_int32 *px, l_int32 *py ); -LEPT_DLL extern l_ok ptaSetPt ( PTA *pta, l_int32 index, l_float32 x, l_float32 y ); -LEPT_DLL extern l_ok ptaGetArrays ( PTA *pta, NUMA **pnax, NUMA **pnay ); -LEPT_DLL extern PTA * ptaRead ( const char *filename ); -LEPT_DLL extern PTA * ptaReadStream ( FILE *fp ); -LEPT_DLL extern PTA * ptaReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok ptaWriteDebug ( const char *filename, PTA *pta, l_int32 type ); -LEPT_DLL extern l_ok ptaWrite ( const char *filename, PTA *pta, l_int32 type ); -LEPT_DLL extern l_ok ptaWriteStream ( FILE *fp, PTA *pta, l_int32 type ); -LEPT_DLL extern l_ok ptaWriteMem ( l_uint8 **pdata, size_t *psize, PTA *pta, l_int32 type ); -LEPT_DLL extern PTAA * ptaaCreate ( l_int32 n ); -LEPT_DLL extern void ptaaDestroy ( PTAA **pptaa ); -LEPT_DLL extern l_ok ptaaAddPta ( PTAA *ptaa, PTA *pta, l_int32 copyflag ); -LEPT_DLL extern l_int32 ptaaGetCount ( PTAA *ptaa ); -LEPT_DLL extern PTA * ptaaGetPta ( PTAA *ptaa, l_int32 index, l_int32 accessflag ); -LEPT_DLL extern l_ok ptaaGetPt ( PTAA *ptaa, l_int32 ipta, l_int32 jpt, l_float32 *px, l_float32 *py ); -LEPT_DLL extern l_ok ptaaInitFull ( PTAA *ptaa, PTA *pta ); -LEPT_DLL extern l_ok ptaaReplacePta ( PTAA *ptaa, l_int32 index, PTA *pta ); -LEPT_DLL extern l_ok ptaaAddPt ( PTAA *ptaa, l_int32 ipta, l_float32 x, l_float32 y ); -LEPT_DLL extern l_ok ptaaTruncate ( PTAA *ptaa ); -LEPT_DLL extern PTAA * ptaaRead ( const char *filename ); -LEPT_DLL extern PTAA * ptaaReadStream ( FILE *fp ); -LEPT_DLL extern PTAA * ptaaReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok ptaaWriteDebug ( const char *filename, PTAA *ptaa, l_int32 type ); -LEPT_DLL extern l_ok ptaaWrite ( const char *filename, PTAA *ptaa, l_int32 type ); -LEPT_DLL extern l_ok ptaaWriteStream ( FILE *fp, PTAA *ptaa, l_int32 type ); -LEPT_DLL extern l_ok ptaaWriteMem ( l_uint8 **pdata, size_t *psize, PTAA *ptaa, l_int32 type ); -LEPT_DLL extern PTA * ptaSubsample ( PTA *ptas, l_int32 subfactor ); -LEPT_DLL extern l_ok ptaJoin ( PTA *ptad, PTA *ptas, l_int32 istart, l_int32 iend ); -LEPT_DLL extern l_ok ptaaJoin ( PTAA *ptaad, PTAA *ptaas, l_int32 istart, l_int32 iend ); -LEPT_DLL extern PTA * ptaReverse ( PTA *ptas, l_int32 type ); -LEPT_DLL extern PTA * ptaTranspose ( PTA *ptas ); -LEPT_DLL extern PTA * ptaCyclicPerm ( PTA *ptas, l_int32 xs, l_int32 ys ); -LEPT_DLL extern PTA * ptaSelectRange ( PTA *ptas, l_int32 first, l_int32 last ); -LEPT_DLL extern BOX * ptaGetBoundingRegion ( PTA *pta ); -LEPT_DLL extern l_ok ptaGetRange ( PTA *pta, l_float32 *pminx, l_float32 *pmaxx, l_float32 *pminy, l_float32 *pmaxy ); -LEPT_DLL extern PTA * ptaGetInsideBox ( PTA *ptas, BOX *box ); -LEPT_DLL extern PTA * pixFindCornerPixels ( PIX *pixs ); -LEPT_DLL extern l_int32 ptaContainsPt ( PTA *pta, l_int32 x, l_int32 y ); -LEPT_DLL extern l_int32 ptaTestIntersection ( PTA *pta1, PTA *pta2 ); -LEPT_DLL extern PTA * ptaTransform ( PTA *ptas, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern l_int32 ptaPtInsidePolygon ( PTA *pta, l_float32 x, l_float32 y, l_int32 *pinside ); -LEPT_DLL extern l_float32 l_angleBetweenVectors ( l_float32 x1, l_float32 y1, l_float32 x2, l_float32 y2 ); -LEPT_DLL extern l_ok ptaGetMinMax ( PTA *pta, l_float32 *pxmin, l_float32 *pymin, l_float32 *pxmax, l_float32 *pymax ); -LEPT_DLL extern PTA * ptaSelectByValue ( PTA *ptas, l_float32 xth, l_float32 yth, l_int32 type, l_int32 relation ); -LEPT_DLL extern PTA * ptaCropToMask ( PTA *ptas, PIX *pixm ); -LEPT_DLL extern l_ok ptaGetLinearLSF ( PTA *pta, l_float32 *pa, l_float32 *pb, NUMA **pnafit ); -LEPT_DLL extern l_ok ptaGetQuadraticLSF ( PTA *pta, l_float32 *pa, l_float32 *pb, l_float32 *pc, NUMA **pnafit ); -LEPT_DLL extern l_ok ptaGetCubicLSF ( PTA *pta, l_float32 *pa, l_float32 *pb, l_float32 *pc, l_float32 *pd, NUMA **pnafit ); -LEPT_DLL extern l_ok ptaGetQuarticLSF ( PTA *pta, l_float32 *pa, l_float32 *pb, l_float32 *pc, l_float32 *pd, l_float32 *pe, NUMA **pnafit ); -LEPT_DLL extern l_ok ptaNoisyLinearLSF ( PTA *pta, l_float32 factor, PTA **pptad, l_float32 *pa, l_float32 *pb, l_float32 *pmederr, NUMA **pnafit ); -LEPT_DLL extern l_ok ptaNoisyQuadraticLSF ( PTA *pta, l_float32 factor, PTA **pptad, l_float32 *pa, l_float32 *pb, l_float32 *pc, l_float32 *pmederr, NUMA **pnafit ); -LEPT_DLL extern l_ok applyLinearFit ( l_float32 a, l_float32 b, l_float32 x, l_float32 *py ); -LEPT_DLL extern l_ok applyQuadraticFit ( l_float32 a, l_float32 b, l_float32 c, l_float32 x, l_float32 *py ); -LEPT_DLL extern l_ok applyCubicFit ( l_float32 a, l_float32 b, l_float32 c, l_float32 d, l_float32 x, l_float32 *py ); -LEPT_DLL extern l_ok applyQuarticFit ( l_float32 a, l_float32 b, l_float32 c, l_float32 d, l_float32 e, l_float32 x, l_float32 *py ); -LEPT_DLL extern l_ok pixPlotAlongPta ( PIX *pixs, PTA *pta, l_int32 outformat, const char *title ); -LEPT_DLL extern PTA * ptaGetPixelsFromPix ( PIX *pixs, BOX *box ); -LEPT_DLL extern PIX * pixGenerateFromPta ( PTA *pta, l_int32 w, l_int32 h ); -LEPT_DLL extern PTA * ptaGetBoundaryPixels ( PIX *pixs, l_int32 type ); -LEPT_DLL extern PTAA * ptaaGetBoundaryPixels ( PIX *pixs, l_int32 type, l_int32 connectivity, BOXA **pboxa, PIXA **ppixa ); -LEPT_DLL extern PTAA * ptaaIndexLabeledPixels ( PIX *pixs, l_int32 *pncc ); -LEPT_DLL extern PTA * ptaGetNeighborPixLocs ( PIX *pixs, l_int32 x, l_int32 y, l_int32 conn ); -LEPT_DLL extern PTA * numaConvertToPta1 ( NUMA *na ); -LEPT_DLL extern PTA * numaConvertToPta2 ( NUMA *nax, NUMA *nay ); -LEPT_DLL extern l_ok ptaConvertToNuma ( PTA *pta, NUMA **pnax, NUMA **pnay ); -LEPT_DLL extern PIX * pixDisplayPta ( PIX *pixd, PIX *pixs, PTA *pta ); -LEPT_DLL extern PIX * pixDisplayPtaaPattern ( PIX *pixd, PIX *pixs, PTAA *ptaa, PIX *pixp, l_int32 cx, l_int32 cy ); -LEPT_DLL extern PIX * pixDisplayPtaPattern ( PIX *pixd, PIX *pixs, PTA *pta, PIX *pixp, l_int32 cx, l_int32 cy, l_uint32 color ); -LEPT_DLL extern PTA * ptaReplicatePattern ( PTA *ptas, PIX *pixp, PTA *ptap, l_int32 cx, l_int32 cy, l_int32 w, l_int32 h ); -LEPT_DLL extern PIX * pixDisplayPtaa ( PIX *pixs, PTAA *ptaa ); -LEPT_DLL extern PTA * ptaSort ( PTA *ptas, l_int32 sorttype, l_int32 sortorder, NUMA **pnaindex ); -LEPT_DLL extern l_ok ptaGetSortIndex ( PTA *ptas, l_int32 sorttype, l_int32 sortorder, NUMA **pnaindex ); -LEPT_DLL extern PTA * ptaSortByIndex ( PTA *ptas, NUMA *naindex ); -LEPT_DLL extern PTAA * ptaaSortByIndex ( PTAA *ptaas, NUMA *naindex ); -LEPT_DLL extern l_ok ptaGetRankValue ( PTA *pta, l_float32 fract, PTA *ptasort, l_int32 sorttype, l_float32 *pval ); -LEPT_DLL extern PTA * ptaSort2d ( PTA *pta ); -LEPT_DLL extern l_ok ptaEqual ( PTA *pta1, PTA *pta2, l_int32 *psame ); -LEPT_DLL extern PTA * ptaUnionByAset ( PTA *pta1, PTA *pta2 ); -LEPT_DLL extern PTA * ptaRemoveDupsByAset ( PTA *ptas ); -LEPT_DLL extern PTA * ptaIntersectionByAset ( PTA *pta1, PTA *pta2 ); -LEPT_DLL extern L_ASET * l_asetCreateFromPta ( PTA *pta ); -LEPT_DLL extern PTA * ptaUnionByHash ( PTA *pta1, PTA *pta2 ); -LEPT_DLL extern l_ok ptaRemoveDupsByHash ( PTA *ptas, PTA **pptad, L_DNAHASH **pdahash ); -LEPT_DLL extern PTA * ptaIntersectionByHash ( PTA *pta1, PTA *pta2 ); -LEPT_DLL extern l_ok ptaFindPtByHash ( PTA *pta, L_DNAHASH *dahash, l_int32 x, l_int32 y, l_int32 *pindex ); -LEPT_DLL extern L_DNAHASH * l_dnaHashCreateFromPta ( PTA *pta ); -LEPT_DLL extern L_PTRA * ptraCreate ( l_int32 n ); -LEPT_DLL extern void ptraDestroy ( L_PTRA **ppa, l_int32 freeflag, l_int32 warnflag ); -LEPT_DLL extern l_ok ptraAdd ( L_PTRA *pa, void *item ); -LEPT_DLL extern l_ok ptraInsert ( L_PTRA *pa, l_int32 index, void *item, l_int32 shiftflag ); -LEPT_DLL extern void * ptraRemove ( L_PTRA *pa, l_int32 index, l_int32 flag ); -LEPT_DLL extern void * ptraRemoveLast ( L_PTRA *pa ); -LEPT_DLL extern void * ptraReplace ( L_PTRA *pa, l_int32 index, void *item, l_int32 freeflag ); -LEPT_DLL extern l_ok ptraSwap ( L_PTRA *pa, l_int32 index1, l_int32 index2 ); -LEPT_DLL extern l_ok ptraCompactArray ( L_PTRA *pa ); -LEPT_DLL extern l_ok ptraReverse ( L_PTRA *pa ); -LEPT_DLL extern l_ok ptraJoin ( L_PTRA *pa1, L_PTRA *pa2 ); -LEPT_DLL extern l_ok ptraGetMaxIndex ( L_PTRA *pa, l_int32 *pmaxindex ); -LEPT_DLL extern l_ok ptraGetActualCount ( L_PTRA *pa, l_int32 *pcount ); -LEPT_DLL extern void * ptraGetPtrToItem ( L_PTRA *pa, l_int32 index ); -LEPT_DLL extern L_PTRAA * ptraaCreate ( l_int32 n ); -LEPT_DLL extern void ptraaDestroy ( L_PTRAA **ppaa, l_int32 freeflag, l_int32 warnflag ); -LEPT_DLL extern l_ok ptraaGetSize ( L_PTRAA *paa, l_int32 *psize ); -LEPT_DLL extern l_ok ptraaInsertPtra ( L_PTRAA *paa, l_int32 index, L_PTRA *pa ); -LEPT_DLL extern L_PTRA * ptraaGetPtra ( L_PTRAA *paa, l_int32 index, l_int32 accessflag ); -LEPT_DLL extern L_PTRA * ptraaFlattenToPtra ( L_PTRAA *paa ); -LEPT_DLL extern l_ok pixQuadtreeMean ( PIX *pixs, l_int32 nlevels, PIX *pix_ma, FPIXA **pfpixa ); -LEPT_DLL extern l_ok pixQuadtreeVariance ( PIX *pixs, l_int32 nlevels, PIX *pix_ma, DPIX *dpix_msa, FPIXA **pfpixa_v, FPIXA **pfpixa_rv ); -LEPT_DLL extern l_ok pixMeanInRectangle ( PIX *pixs, BOX *box, PIX *pixma, l_float32 *pval ); -LEPT_DLL extern l_ok pixVarianceInRectangle ( PIX *pixs, BOX *box, PIX *pix_ma, DPIX *dpix_msa, l_float32 *pvar, l_float32 *prvar ); -LEPT_DLL extern BOXAA * boxaaQuadtreeRegions ( l_int32 w, l_int32 h, l_int32 nlevels ); -LEPT_DLL extern l_ok quadtreeGetParent ( FPIXA *fpixa, l_int32 level, l_int32 x, l_int32 y, l_float32 *pval ); -LEPT_DLL extern l_ok quadtreeGetChildren ( FPIXA *fpixa, l_int32 level, l_int32 x, l_int32 y, l_float32 *pval00, l_float32 *pval10, l_float32 *pval01, l_float32 *pval11 ); -LEPT_DLL extern l_int32 quadtreeMaxLevels ( l_int32 w, l_int32 h ); -LEPT_DLL extern PIX * fpixaDisplayQuadtree ( FPIXA *fpixa, l_int32 factor, l_int32 fontsize ); -LEPT_DLL extern L_QUEUE * lqueueCreate ( l_int32 nalloc ); -LEPT_DLL extern void lqueueDestroy ( L_QUEUE **plq, l_int32 freeflag ); -LEPT_DLL extern l_ok lqueueAdd ( L_QUEUE *lq, void *item ); -LEPT_DLL extern void * lqueueRemove ( L_QUEUE *lq ); -LEPT_DLL extern l_int32 lqueueGetCount ( L_QUEUE *lq ); -LEPT_DLL extern l_ok lqueuePrint ( FILE *fp, L_QUEUE *lq ); -LEPT_DLL extern PIX * pixRankFilter ( PIX *pixs, l_int32 wf, l_int32 hf, l_float32 rank ); -LEPT_DLL extern PIX * pixRankFilterRGB ( PIX *pixs, l_int32 wf, l_int32 hf, l_float32 rank ); -LEPT_DLL extern PIX * pixRankFilterGray ( PIX *pixs, l_int32 wf, l_int32 hf, l_float32 rank ); -LEPT_DLL extern PIX * pixMedianFilter ( PIX *pixs, l_int32 wf, l_int32 hf ); -LEPT_DLL extern PIX * pixRankFilterWithScaling ( PIX *pixs, l_int32 wf, l_int32 hf, l_float32 rank, l_float32 scalefactor ); -LEPT_DLL extern L_RBTREE * l_rbtreeCreate ( l_int32 keytype ); -LEPT_DLL extern RB_TYPE * l_rbtreeLookup ( L_RBTREE *t, RB_TYPE key ); -LEPT_DLL extern void l_rbtreeInsert ( L_RBTREE *t, RB_TYPE key, RB_TYPE value ); -LEPT_DLL extern void l_rbtreeDelete ( L_RBTREE *t, RB_TYPE key ); -LEPT_DLL extern void l_rbtreeDestroy ( L_RBTREE **pt ); -LEPT_DLL extern L_RBTREE_NODE * l_rbtreeGetFirst ( L_RBTREE *t ); -LEPT_DLL extern L_RBTREE_NODE * l_rbtreeGetNext ( L_RBTREE_NODE *n ); -LEPT_DLL extern L_RBTREE_NODE * l_rbtreeGetLast ( L_RBTREE *t ); -LEPT_DLL extern L_RBTREE_NODE * l_rbtreeGetPrev ( L_RBTREE_NODE *n ); -LEPT_DLL extern l_int32 l_rbtreeGetCount ( L_RBTREE *t ); -LEPT_DLL extern void l_rbtreePrint ( FILE *fp, L_RBTREE *t ); -LEPT_DLL extern SARRAY * pixProcessBarcodes ( PIX *pixs, l_int32 format, l_int32 method, SARRAY **psaw, l_int32 debugflag ); -LEPT_DLL extern PIXA * pixExtractBarcodes ( PIX *pixs, l_int32 debugflag ); -LEPT_DLL extern SARRAY * pixReadBarcodes ( PIXA *pixa, l_int32 format, l_int32 method, SARRAY **psaw, l_int32 debugflag ); -LEPT_DLL extern NUMA * pixReadBarcodeWidths ( PIX *pixs, l_int32 method, l_int32 debugflag ); -LEPT_DLL extern BOXA * pixLocateBarcodes ( PIX *pixs, l_int32 thresh, PIX **ppixb, PIX **ppixm ); -LEPT_DLL extern PIX * pixDeskewBarcode ( PIX *pixs, PIX *pixb, BOX *box, l_int32 margin, l_int32 threshold, l_float32 *pangle, l_float32 *pconf ); -LEPT_DLL extern NUMA * pixExtractBarcodeWidths1 ( PIX *pixs, l_float32 thresh, l_float32 binfract, NUMA **pnaehist, NUMA **pnaohist, l_int32 debugflag ); -LEPT_DLL extern NUMA * pixExtractBarcodeWidths2 ( PIX *pixs, l_float32 thresh, l_float32 *pwidth, NUMA **pnac, l_int32 debugflag ); -LEPT_DLL extern NUMA * pixExtractBarcodeCrossings ( PIX *pixs, l_float32 thresh, l_int32 debugflag ); -LEPT_DLL extern NUMA * numaQuantizeCrossingsByWidth ( NUMA *nas, l_float32 binfract, NUMA **pnaehist, NUMA **pnaohist, l_int32 debugflag ); -LEPT_DLL extern NUMA * numaQuantizeCrossingsByWindow ( NUMA *nas, l_float32 ratio, l_float32 *pwidth, l_float32 *pfirstloc, NUMA **pnac, l_int32 debugflag ); -LEPT_DLL extern PIXA * pixaReadFiles ( const char *dirname, const char *substr ); -LEPT_DLL extern PIXA * pixaReadFilesSA ( SARRAY *sa ); -LEPT_DLL extern PIX * pixRead ( const char *filename ); -LEPT_DLL extern PIX * pixReadWithHint ( const char *filename, l_int32 hint ); -LEPT_DLL extern PIX * pixReadIndexed ( SARRAY *sa, l_int32 index ); -LEPT_DLL extern PIX * pixReadStream ( FILE *fp, l_int32 hint ); -LEPT_DLL extern l_ok pixReadHeader ( const char *filename, l_int32 *pformat, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap ); -LEPT_DLL extern l_ok findFileFormat ( const char *filename, l_int32 *pformat ); -LEPT_DLL extern l_ok findFileFormatStream ( FILE *fp, l_int32 *pformat ); -LEPT_DLL extern l_ok findFileFormatBuffer ( const l_uint8 *buf, l_int32 *pformat ); -LEPT_DLL extern l_int32 fileFormatIsTiff ( FILE *fp ); -LEPT_DLL extern PIX * pixReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok pixReadHeaderMem ( const l_uint8 *data, size_t size, l_int32 *pformat, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap ); -LEPT_DLL extern l_ok writeImageFileInfo ( const char *filename, FILE *fpout, l_int32 headeronly ); -LEPT_DLL extern l_ok ioFormatTest ( const char *filename ); -LEPT_DLL extern L_RECOG * recogCreateFromRecog ( L_RECOG *recs, l_int32 scalew, l_int32 scaleh, l_int32 linew, l_int32 threshold, l_int32 maxyshift ); -LEPT_DLL extern L_RECOG * recogCreateFromPixa ( PIXA *pixa, l_int32 scalew, l_int32 scaleh, l_int32 linew, l_int32 threshold, l_int32 maxyshift ); -LEPT_DLL extern L_RECOG * recogCreateFromPixaNoFinish ( PIXA *pixa, l_int32 scalew, l_int32 scaleh, l_int32 linew, l_int32 threshold, l_int32 maxyshift ); -LEPT_DLL extern L_RECOG * recogCreate ( l_int32 scalew, l_int32 scaleh, l_int32 linew, l_int32 threshold, l_int32 maxyshift ); -LEPT_DLL extern void recogDestroy ( L_RECOG **precog ); -LEPT_DLL extern l_int32 recogGetCount ( L_RECOG *recog ); -LEPT_DLL extern l_ok recogSetParams ( L_RECOG *recog, l_int32 type, l_int32 min_nopad, l_float32 max_wh_ratio, l_float32 max_ht_ratio ); -LEPT_DLL extern l_int32 recogGetClassIndex ( L_RECOG *recog, l_int32 val, char *text, l_int32 *pindex ); -LEPT_DLL extern l_ok recogStringToIndex ( L_RECOG *recog, char *text, l_int32 *pindex ); -LEPT_DLL extern l_int32 recogGetClassString ( L_RECOG *recog, l_int32 index, char **pcharstr ); -LEPT_DLL extern l_ok l_convertCharstrToInt ( const char *str, l_int32 *pval ); -LEPT_DLL extern L_RECOG * recogRead ( const char *filename ); -LEPT_DLL extern L_RECOG * recogReadStream ( FILE *fp ); -LEPT_DLL extern L_RECOG * recogReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok recogWrite ( const char *filename, L_RECOG *recog ); -LEPT_DLL extern l_ok recogWriteStream ( FILE *fp, L_RECOG *recog ); -LEPT_DLL extern l_ok recogWriteMem ( l_uint8 **pdata, size_t *psize, L_RECOG *recog ); -LEPT_DLL extern PIXA * recogExtractPixa ( L_RECOG *recog ); -LEPT_DLL extern BOXA * recogDecode ( L_RECOG *recog, PIX *pixs, l_int32 nlevels, PIX **ppixdb ); -LEPT_DLL extern l_ok recogCreateDid ( L_RECOG *recog, PIX *pixs ); -LEPT_DLL extern l_ok recogDestroyDid ( L_RECOG *recog ); -LEPT_DLL extern l_int32 recogDidExists ( L_RECOG *recog ); -LEPT_DLL extern L_RDID * recogGetDid ( L_RECOG *recog ); -LEPT_DLL extern l_ok recogSetChannelParams ( L_RECOG *recog, l_int32 nlevels ); -LEPT_DLL extern l_ok recogIdentifyMultiple ( L_RECOG *recog, PIX *pixs, l_int32 minh, l_int32 skipsplit, BOXA **pboxa, PIXA **ppixa, PIX **ppixdb, l_int32 debugsplit ); -LEPT_DLL extern l_ok recogSplitIntoCharacters ( L_RECOG *recog, PIX *pixs, l_int32 minh, l_int32 skipsplit, BOXA **pboxa, PIXA **ppixa, l_int32 debug ); -LEPT_DLL extern l_ok recogCorrelationBestRow ( L_RECOG *recog, PIX *pixs, BOXA **pboxa, NUMA **pnascore, NUMA **pnaindex, SARRAY **psachar, l_int32 debug ); -LEPT_DLL extern l_ok recogCorrelationBestChar ( L_RECOG *recog, PIX *pixs, BOX **pbox, l_float32 *pscore, l_int32 *pindex, char **pcharstr, PIX **ppixdb ); -LEPT_DLL extern l_ok recogIdentifyPixa ( L_RECOG *recog, PIXA *pixa, PIX **ppixdb ); -LEPT_DLL extern l_ok recogIdentifyPix ( L_RECOG *recog, PIX *pixs, PIX **ppixdb ); -LEPT_DLL extern l_ok recogSkipIdentify ( L_RECOG *recog ); -LEPT_DLL extern void rchaDestroy ( L_RCHA **prcha ); -LEPT_DLL extern void rchDestroy ( L_RCH **prch ); -LEPT_DLL extern l_ok rchaExtract ( L_RCHA *rcha, NUMA **pnaindex, NUMA **pnascore, SARRAY **psatext, NUMA **pnasample, NUMA **pnaxloc, NUMA **pnayloc, NUMA **pnawidth ); -LEPT_DLL extern l_ok rchExtract ( L_RCH *rch, l_int32 *pindex, l_float32 *pscore, char **ptext, l_int32 *psample, l_int32 *pxloc, l_int32 *pyloc, l_int32 *pwidth ); -LEPT_DLL extern PIX * recogProcessToIdentify ( L_RECOG *recog, PIX *pixs, l_int32 pad ); -LEPT_DLL extern SARRAY * recogExtractNumbers ( L_RECOG *recog, BOXA *boxas, l_float32 scorethresh, l_int32 spacethresh, BOXAA **pbaa, NUMAA **pnaa ); -LEPT_DLL extern PIXA * showExtractNumbers ( PIX *pixs, SARRAY *sa, BOXAA *baa, NUMAA *naa, PIX **ppixdb ); -LEPT_DLL extern l_ok recogTrainLabeled ( L_RECOG *recog, PIX *pixs, BOX *box, char *text, l_int32 debug ); -LEPT_DLL extern l_ok recogProcessLabeled ( L_RECOG *recog, PIX *pixs, BOX *box, char *text, PIX **ppix ); -LEPT_DLL extern l_ok recogAddSample ( L_RECOG *recog, PIX *pix, l_int32 debug ); -LEPT_DLL extern PIX * recogModifyTemplate ( L_RECOG *recog, PIX *pixs ); -LEPT_DLL extern l_int32 recogAverageSamples ( L_RECOG **precog, l_int32 debug ); -LEPT_DLL extern l_int32 pixaAccumulateSamples ( PIXA *pixa, PTA *pta, PIX **ppixd, l_float32 *px, l_float32 *py ); -LEPT_DLL extern l_ok recogTrainingFinished ( L_RECOG **precog, l_int32 modifyflag, l_int32 minsize, l_float32 minfract ); -LEPT_DLL extern PIXA * recogFilterPixaBySize ( PIXA *pixas, l_int32 setsize, l_int32 maxkeep, l_float32 max_ht_ratio, NUMA **pna ); -LEPT_DLL extern PIXAA * recogSortPixaByClass ( PIXA *pixa, l_int32 setsize ); -LEPT_DLL extern l_ok recogRemoveOutliers1 ( L_RECOG **precog, l_float32 minscore, l_int32 mintarget, l_int32 minsize, PIX **ppixsave, PIX **ppixrem ); -LEPT_DLL extern PIXA * pixaRemoveOutliers1 ( PIXA *pixas, l_float32 minscore, l_int32 mintarget, l_int32 minsize, PIX **ppixsave, PIX **ppixrem ); -LEPT_DLL extern l_ok recogRemoveOutliers2 ( L_RECOG **precog, l_float32 minscore, l_int32 minsize, PIX **ppixsave, PIX **ppixrem ); -LEPT_DLL extern PIXA * pixaRemoveOutliers2 ( PIXA *pixas, l_float32 minscore, l_int32 minsize, PIX **ppixsave, PIX **ppixrem ); -LEPT_DLL extern PIXA * recogTrainFromBoot ( L_RECOG *recogboot, PIXA *pixas, l_float32 minscore, l_int32 threshold, l_int32 debug ); -LEPT_DLL extern l_ok recogPadDigitTrainingSet ( L_RECOG **precog, l_int32 scaleh, l_int32 linew ); -LEPT_DLL extern l_int32 recogIsPaddingNeeded ( L_RECOG *recog, SARRAY **psa ); -LEPT_DLL extern PIXA * recogAddDigitPadTemplates ( L_RECOG *recog, SARRAY *sa ); -LEPT_DLL extern L_RECOG * recogMakeBootDigitRecog ( l_int32 nsamp, l_int32 scaleh, l_int32 linew, l_int32 maxyshift, l_int32 debug ); -LEPT_DLL extern PIXA * recogMakeBootDigitTemplates ( l_int32 nsamp, l_int32 debug ); -LEPT_DLL extern l_ok recogShowContent ( FILE *fp, L_RECOG *recog, l_int32 index, l_int32 display ); -LEPT_DLL extern l_ok recogDebugAverages ( L_RECOG **precog, l_int32 debug ); -LEPT_DLL extern l_int32 recogShowAverageTemplates ( L_RECOG *recog ); -LEPT_DLL extern l_ok recogShowMatchesInRange ( L_RECOG *recog, PIXA *pixa, l_float32 minscore, l_float32 maxscore, l_int32 display ); -LEPT_DLL extern PIX * recogShowMatch ( L_RECOG *recog, PIX *pix1, PIX *pix2, BOX *box, l_int32 index, l_float32 score ); -LEPT_DLL extern l_ok regTestSetup ( l_int32 argc, char **argv, L_REGPARAMS **prp ); -LEPT_DLL extern l_ok regTestCleanup ( L_REGPARAMS *rp ); -LEPT_DLL extern l_ok regTestCompareValues ( L_REGPARAMS *rp, l_float32 val1, l_float32 val2, l_float32 delta ); -LEPT_DLL extern l_ok regTestCompareStrings ( L_REGPARAMS *rp, l_uint8 *string1, size_t bytes1, l_uint8 *string2, size_t bytes2 ); -LEPT_DLL extern l_ok regTestComparePix ( L_REGPARAMS *rp, PIX *pix1, PIX *pix2 ); -LEPT_DLL extern l_ok regTestCompareSimilarPix ( L_REGPARAMS *rp, PIX *pix1, PIX *pix2, l_int32 mindiff, l_float32 maxfract, l_int32 printstats ); -LEPT_DLL extern l_ok regTestCheckFile ( L_REGPARAMS *rp, const char *localname ); -LEPT_DLL extern l_ok regTestCompareFiles ( L_REGPARAMS *rp, l_int32 index1, l_int32 index2 ); -LEPT_DLL extern l_ok regTestWritePixAndCheck ( L_REGPARAMS *rp, PIX *pix, l_int32 format ); -LEPT_DLL extern l_ok regTestWriteDataAndCheck ( L_REGPARAMS *rp, void *data, size_t nbytes, const char *ext ); -LEPT_DLL extern char * regTestGenLocalFilename ( L_REGPARAMS *rp, l_int32 index, l_int32 format ); -LEPT_DLL extern l_ok pixRasterop ( PIX *pixd, l_int32 dx, l_int32 dy, l_int32 dw, l_int32 dh, l_int32 op, PIX *pixs, l_int32 sx, l_int32 sy ); -LEPT_DLL extern l_ok pixRasteropVip ( PIX *pixd, l_int32 bx, l_int32 bw, l_int32 vshift, l_int32 incolor ); -LEPT_DLL extern l_ok pixRasteropHip ( PIX *pixd, l_int32 by, l_int32 bh, l_int32 hshift, l_int32 incolor ); -LEPT_DLL extern PIX * pixTranslate ( PIX *pixd, PIX *pixs, l_int32 hshift, l_int32 vshift, l_int32 incolor ); -LEPT_DLL extern l_ok pixRasteropIP ( PIX *pixd, l_int32 hshift, l_int32 vshift, l_int32 incolor ); -LEPT_DLL extern l_ok pixRasteropFullImage ( PIX *pixd, PIX *pixs, l_int32 op ); -LEPT_DLL extern void rasteropUniLow ( l_uint32 *datad, l_int32 dpixw, l_int32 dpixh, l_int32 depth, l_int32 dwpl, l_int32 dx, l_int32 dy, l_int32 dw, l_int32 dh, l_int32 op ); -LEPT_DLL extern void rasteropLow ( l_uint32 *datad, l_int32 dpixw, l_int32 dpixh, l_int32 depth, l_int32 dwpl, l_int32 dx, l_int32 dy, l_int32 dw, l_int32 dh, l_int32 op, l_uint32 *datas, l_int32 spixw, l_int32 spixh, l_int32 swpl, l_int32 sx, l_int32 sy ); -LEPT_DLL extern void rasteropVipLow ( l_uint32 *data, l_int32 pixw, l_int32 pixh, l_int32 depth, l_int32 wpl, l_int32 x, l_int32 w, l_int32 shift ); -LEPT_DLL extern void rasteropHipLow ( l_uint32 *data, l_int32 pixh, l_int32 depth, l_int32 wpl, l_int32 y, l_int32 h, l_int32 shift ); -LEPT_DLL extern PIX * pixRotate ( PIX *pixs, l_float32 angle, l_int32 type, l_int32 incolor, l_int32 width, l_int32 height ); -LEPT_DLL extern PIX * pixEmbedForRotation ( PIX *pixs, l_float32 angle, l_int32 incolor, l_int32 width, l_int32 height ); -LEPT_DLL extern PIX * pixRotateBySampling ( PIX *pixs, l_int32 xcen, l_int32 ycen, l_float32 angle, l_int32 incolor ); -LEPT_DLL extern PIX * pixRotateBinaryNice ( PIX *pixs, l_float32 angle, l_int32 incolor ); -LEPT_DLL extern PIX * pixRotateWithAlpha ( PIX *pixs, l_float32 angle, PIX *pixg, l_float32 fract ); -LEPT_DLL extern PIX * pixRotateAM ( PIX *pixs, l_float32 angle, l_int32 incolor ); -LEPT_DLL extern PIX * pixRotateAMColor ( PIX *pixs, l_float32 angle, l_uint32 colorval ); -LEPT_DLL extern PIX * pixRotateAMGray ( PIX *pixs, l_float32 angle, l_uint8 grayval ); -LEPT_DLL extern PIX * pixRotateAMCorner ( PIX *pixs, l_float32 angle, l_int32 incolor ); -LEPT_DLL extern PIX * pixRotateAMColorCorner ( PIX *pixs, l_float32 angle, l_uint32 fillval ); -LEPT_DLL extern PIX * pixRotateAMGrayCorner ( PIX *pixs, l_float32 angle, l_uint8 grayval ); -LEPT_DLL extern PIX * pixRotateAMColorFast ( PIX *pixs, l_float32 angle, l_uint32 colorval ); -LEPT_DLL extern PIX * pixRotateOrth ( PIX *pixs, l_int32 quads ); -LEPT_DLL extern PIX * pixRotate180 ( PIX *pixd, PIX *pixs ); -LEPT_DLL extern PIX * pixRotate90 ( PIX *pixs, l_int32 direction ); -LEPT_DLL extern PIX * pixFlipLR ( PIX *pixd, PIX *pixs ); -LEPT_DLL extern PIX * pixFlipTB ( PIX *pixd, PIX *pixs ); -LEPT_DLL extern PIX * pixRotateShear ( PIX *pixs, l_int32 xcen, l_int32 ycen, l_float32 angle, l_int32 incolor ); -LEPT_DLL extern PIX * pixRotate2Shear ( PIX *pixs, l_int32 xcen, l_int32 ycen, l_float32 angle, l_int32 incolor ); -LEPT_DLL extern PIX * pixRotate3Shear ( PIX *pixs, l_int32 xcen, l_int32 ycen, l_float32 angle, l_int32 incolor ); -LEPT_DLL extern l_ok pixRotateShearIP ( PIX *pixs, l_int32 xcen, l_int32 ycen, l_float32 angle, l_int32 incolor ); -LEPT_DLL extern PIX * pixRotateShearCenter ( PIX *pixs, l_float32 angle, l_int32 incolor ); -LEPT_DLL extern l_ok pixRotateShearCenterIP ( PIX *pixs, l_float32 angle, l_int32 incolor ); -LEPT_DLL extern PIX * pixStrokeWidthTransform ( PIX *pixs, l_int32 color, l_int32 depth, l_int32 nangles ); -LEPT_DLL extern PIX * pixRunlengthTransform ( PIX *pixs, l_int32 color, l_int32 direction, l_int32 depth ); -LEPT_DLL extern l_ok pixFindHorizontalRuns ( PIX *pix, l_int32 y, l_int32 *xstart, l_int32 *xend, l_int32 *pn ); -LEPT_DLL extern l_ok pixFindVerticalRuns ( PIX *pix, l_int32 x, l_int32 *ystart, l_int32 *yend, l_int32 *pn ); -LEPT_DLL extern NUMA * pixFindMaxRuns ( PIX *pix, l_int32 direction, NUMA **pnastart ); -LEPT_DLL extern l_ok pixFindMaxHorizontalRunOnLine ( PIX *pix, l_int32 y, l_int32 *pxstart, l_int32 *psize ); -LEPT_DLL extern l_ok pixFindMaxVerticalRunOnLine ( PIX *pix, l_int32 x, l_int32 *pystart, l_int32 *psize ); -LEPT_DLL extern l_ok runlengthMembershipOnLine ( l_int32 *buffer, l_int32 size, l_int32 depth, l_int32 *start, l_int32 *end, l_int32 n ); -LEPT_DLL extern l_int32 * makeMSBitLocTab ( l_int32 bitval ); -LEPT_DLL extern SARRAY * sarrayCreate ( l_int32 n ); -LEPT_DLL extern SARRAY * sarrayCreateInitialized ( l_int32 n, const char *initstr ); -LEPT_DLL extern SARRAY * sarrayCreateWordsFromString ( const char *string ); -LEPT_DLL extern SARRAY * sarrayCreateLinesFromString ( const char *string, l_int32 blankflag ); -LEPT_DLL extern void sarrayDestroy ( SARRAY **psa ); -LEPT_DLL extern SARRAY * sarrayCopy ( SARRAY *sa ); -LEPT_DLL extern SARRAY * sarrayClone ( SARRAY *sa ); -LEPT_DLL extern l_ok sarrayAddString ( SARRAY *sa, const char *string, l_int32 copyflag ); -LEPT_DLL extern char * sarrayRemoveString ( SARRAY *sa, l_int32 index ); -LEPT_DLL extern l_ok sarrayReplaceString ( SARRAY *sa, l_int32 index, char *newstr, l_int32 copyflag ); -LEPT_DLL extern l_ok sarrayClear ( SARRAY *sa ); -LEPT_DLL extern l_int32 sarrayGetCount ( SARRAY *sa ); -LEPT_DLL extern char ** sarrayGetArray ( SARRAY *sa, l_int32 *pnalloc, l_int32 *pn ); -LEPT_DLL extern char * sarrayGetString ( SARRAY *sa, l_int32 index, l_int32 copyflag ); -LEPT_DLL extern l_int32 sarrayGetRefcount ( SARRAY *sa ); -LEPT_DLL extern l_ok sarrayChangeRefcount ( SARRAY *sa, l_int32 delta ); -LEPT_DLL extern char * sarrayToString ( SARRAY *sa, l_int32 addnlflag ); -LEPT_DLL extern char * sarrayToStringRange ( SARRAY *sa, l_int32 first, l_int32 nstrings, l_int32 addnlflag ); -LEPT_DLL extern l_ok sarrayJoin ( SARRAY *sa1, SARRAY *sa2 ); -LEPT_DLL extern l_ok sarrayAppendRange ( SARRAY *sa1, SARRAY *sa2, l_int32 start, l_int32 end ); -LEPT_DLL extern l_ok sarrayPadToSameSize ( SARRAY *sa1, SARRAY *sa2, const char *padstring ); -LEPT_DLL extern SARRAY * sarrayConvertWordsToLines ( SARRAY *sa, l_int32 linesize ); -LEPT_DLL extern l_int32 sarraySplitString ( SARRAY *sa, const char *str, const char *separators ); -LEPT_DLL extern SARRAY * sarraySelectBySubstring ( SARRAY *sain, const char *substr ); -LEPT_DLL extern SARRAY * sarraySelectByRange ( SARRAY *sain, l_int32 first, l_int32 last ); -LEPT_DLL extern l_int32 sarrayParseRange ( SARRAY *sa, l_int32 start, l_int32 *pactualstart, l_int32 *pend, l_int32 *pnewstart, const char *substr, l_int32 loc ); -LEPT_DLL extern SARRAY * sarrayRead ( const char *filename ); -LEPT_DLL extern SARRAY * sarrayReadStream ( FILE *fp ); -LEPT_DLL extern SARRAY * sarrayReadMem ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok sarrayWrite ( const char *filename, SARRAY *sa ); -LEPT_DLL extern l_ok sarrayWriteStream ( FILE *fp, SARRAY *sa ); -LEPT_DLL extern l_ok sarrayWriteMem ( l_uint8 **pdata, size_t *psize, SARRAY *sa ); -LEPT_DLL extern l_ok sarrayAppend ( const char *filename, SARRAY *sa ); -LEPT_DLL extern SARRAY * getNumberedPathnamesInDirectory ( const char *dirname, const char *substr, l_int32 numpre, l_int32 numpost, l_int32 maxnum ); -LEPT_DLL extern SARRAY * getSortedPathnamesInDirectory ( const char *dirname, const char *substr, l_int32 first, l_int32 nfiles ); -LEPT_DLL extern SARRAY * convertSortedToNumberedPathnames ( SARRAY *sa, l_int32 numpre, l_int32 numpost, l_int32 maxnum ); -LEPT_DLL extern SARRAY * getFilenamesInDirectory ( const char *dirname ); -LEPT_DLL extern SARRAY * sarraySort ( SARRAY *saout, SARRAY *sain, l_int32 sortorder ); -LEPT_DLL extern SARRAY * sarraySortByIndex ( SARRAY *sain, NUMA *naindex ); -LEPT_DLL extern l_int32 stringCompareLexical ( const char *str1, const char *str2 ); -LEPT_DLL extern SARRAY * sarrayUnionByAset ( SARRAY *sa1, SARRAY *sa2 ); -LEPT_DLL extern SARRAY * sarrayRemoveDupsByAset ( SARRAY *sas ); -LEPT_DLL extern SARRAY * sarrayIntersectionByAset ( SARRAY *sa1, SARRAY *sa2 ); -LEPT_DLL extern L_ASET * l_asetCreateFromSarray ( SARRAY *sa ); -LEPT_DLL extern l_ok sarrayRemoveDupsByHash ( SARRAY *sas, SARRAY **psad, L_DNAHASH **pdahash ); -LEPT_DLL extern SARRAY * sarrayIntersectionByHash ( SARRAY *sa1, SARRAY *sa2 ); -LEPT_DLL extern l_ok sarrayFindStringByHash ( SARRAY *sa, L_DNAHASH *dahash, const char *str, l_int32 *pindex ); -LEPT_DLL extern L_DNAHASH * l_dnaHashCreateFromSarray ( SARRAY *sa ); -LEPT_DLL extern SARRAY * sarrayGenerateIntegers ( l_int32 n ); -LEPT_DLL extern l_ok sarrayLookupCSKV ( SARRAY *sa, const char *keystring, char **pvalstring ); -LEPT_DLL extern PIX * pixScale ( PIX *pixs, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern PIX * pixScaleToSizeRel ( PIX *pixs, l_int32 delw, l_int32 delh ); -LEPT_DLL extern PIX * pixScaleToSize ( PIX *pixs, l_int32 wd, l_int32 hd ); -LEPT_DLL extern PIX * pixScaleToResolution ( PIX *pixs, l_float32 target, l_float32 assumed, l_float32 *pscalefact ); -LEPT_DLL extern PIX * pixScaleGeneral ( PIX *pixs, l_float32 scalex, l_float32 scaley, l_float32 sharpfract, l_int32 sharpwidth ); -LEPT_DLL extern PIX * pixScaleLI ( PIX *pixs, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern PIX * pixScaleColorLI ( PIX *pixs, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern PIX * pixScaleColor2xLI ( PIX *pixs ); -LEPT_DLL extern PIX * pixScaleColor4xLI ( PIX *pixs ); -LEPT_DLL extern PIX * pixScaleGrayLI ( PIX *pixs, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern PIX * pixScaleGray2xLI ( PIX *pixs ); -LEPT_DLL extern PIX * pixScaleGray4xLI ( PIX *pixs ); -LEPT_DLL extern PIX * pixScaleGray2xLIThresh ( PIX *pixs, l_int32 thresh ); -LEPT_DLL extern PIX * pixScaleGray2xLIDither ( PIX *pixs ); -LEPT_DLL extern PIX * pixScaleGray4xLIThresh ( PIX *pixs, l_int32 thresh ); -LEPT_DLL extern PIX * pixScaleGray4xLIDither ( PIX *pixs ); -LEPT_DLL extern PIX * pixScaleBySampling ( PIX *pixs, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern PIX * pixScaleBySamplingToSize ( PIX *pixs, l_int32 wd, l_int32 hd ); -LEPT_DLL extern PIX * pixScaleByIntSampling ( PIX *pixs, l_int32 factor ); -LEPT_DLL extern PIX * pixScaleRGBToGrayFast ( PIX *pixs, l_int32 factor, l_int32 color ); -LEPT_DLL extern PIX * pixScaleRGBToBinaryFast ( PIX *pixs, l_int32 factor, l_int32 thresh ); -LEPT_DLL extern PIX * pixScaleGrayToBinaryFast ( PIX *pixs, l_int32 factor, l_int32 thresh ); -LEPT_DLL extern PIX * pixScaleSmooth ( PIX *pix, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern PIX * pixScaleSmoothToSize ( PIX *pixs, l_int32 wd, l_int32 hd ); -LEPT_DLL extern PIX * pixScaleRGBToGray2 ( PIX *pixs, l_float32 rwt, l_float32 gwt, l_float32 bwt ); -LEPT_DLL extern PIX * pixScaleAreaMap ( PIX *pix, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern PIX * pixScaleAreaMap2 ( PIX *pix ); -LEPT_DLL extern PIX * pixScaleAreaMapToSize ( PIX *pixs, l_int32 wd, l_int32 hd ); -LEPT_DLL extern PIX * pixScaleBinary ( PIX *pixs, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern PIX * pixScaleToGray ( PIX *pixs, l_float32 scalefactor ); -LEPT_DLL extern PIX * pixScaleToGrayFast ( PIX *pixs, l_float32 scalefactor ); -LEPT_DLL extern PIX * pixScaleToGray2 ( PIX *pixs ); -LEPT_DLL extern PIX * pixScaleToGray3 ( PIX *pixs ); -LEPT_DLL extern PIX * pixScaleToGray4 ( PIX *pixs ); -LEPT_DLL extern PIX * pixScaleToGray6 ( PIX *pixs ); -LEPT_DLL extern PIX * pixScaleToGray8 ( PIX *pixs ); -LEPT_DLL extern PIX * pixScaleToGray16 ( PIX *pixs ); -LEPT_DLL extern PIX * pixScaleToGrayMipmap ( PIX *pixs, l_float32 scalefactor ); -LEPT_DLL extern PIX * pixScaleMipmap ( PIX *pixs1, PIX *pixs2, l_float32 scale ); -LEPT_DLL extern PIX * pixExpandReplicate ( PIX *pixs, l_int32 factor ); -LEPT_DLL extern PIX * pixScaleGrayMinMax ( PIX *pixs, l_int32 xfact, l_int32 yfact, l_int32 type ); -LEPT_DLL extern PIX * pixScaleGrayMinMax2 ( PIX *pixs, l_int32 type ); -LEPT_DLL extern PIX * pixScaleGrayRankCascade ( PIX *pixs, l_int32 level1, l_int32 level2, l_int32 level3, l_int32 level4 ); -LEPT_DLL extern PIX * pixScaleGrayRank2 ( PIX *pixs, l_int32 rank ); -LEPT_DLL extern l_ok pixScaleAndTransferAlpha ( PIX *pixd, PIX *pixs, l_float32 scalex, l_float32 scaley ); -LEPT_DLL extern PIX * pixScaleWithAlpha ( PIX *pixs, l_float32 scalex, l_float32 scaley, PIX *pixg, l_float32 fract ); -LEPT_DLL extern PIX * pixSeedfillBinary ( PIX *pixd, PIX *pixs, PIX *pixm, l_int32 connectivity ); -LEPT_DLL extern PIX * pixSeedfillBinaryRestricted ( PIX *pixd, PIX *pixs, PIX *pixm, l_int32 connectivity, l_int32 xmax, l_int32 ymax ); -LEPT_DLL extern PIX * pixHolesByFilling ( PIX *pixs, l_int32 connectivity ); -LEPT_DLL extern PIX * pixFillClosedBorders ( PIX *pixs, l_int32 connectivity ); -LEPT_DLL extern PIX * pixExtractBorderConnComps ( PIX *pixs, l_int32 connectivity ); -LEPT_DLL extern PIX * pixRemoveBorderConnComps ( PIX *pixs, l_int32 connectivity ); -LEPT_DLL extern PIX * pixFillBgFromBorder ( PIX *pixs, l_int32 connectivity ); -LEPT_DLL extern PIX * pixFillHolesToBoundingRect ( PIX *pixs, l_int32 minsize, l_float32 maxhfract, l_float32 minfgfract ); -LEPT_DLL extern l_ok pixSeedfillGray ( PIX *pixs, PIX *pixm, l_int32 connectivity ); -LEPT_DLL extern l_ok pixSeedfillGrayInv ( PIX *pixs, PIX *pixm, l_int32 connectivity ); -LEPT_DLL extern l_ok pixSeedfillGraySimple ( PIX *pixs, PIX *pixm, l_int32 connectivity ); -LEPT_DLL extern l_ok pixSeedfillGrayInvSimple ( PIX *pixs, PIX *pixm, l_int32 connectivity ); -LEPT_DLL extern PIX * pixSeedfillGrayBasin ( PIX *pixb, PIX *pixm, l_int32 delta, l_int32 connectivity ); -LEPT_DLL extern PIX * pixDistanceFunction ( PIX *pixs, l_int32 connectivity, l_int32 outdepth, l_int32 boundcond ); -LEPT_DLL extern PIX * pixSeedspread ( PIX *pixs, l_int32 connectivity ); -LEPT_DLL extern l_ok pixLocalExtrema ( PIX *pixs, l_int32 maxmin, l_int32 minmax, PIX **ppixmin, PIX **ppixmax ); -LEPT_DLL extern l_ok pixSelectedLocalExtrema ( PIX *pixs, l_int32 mindist, PIX **ppixmin, PIX **ppixmax ); -LEPT_DLL extern PIX * pixFindEqualValues ( PIX *pixs1, PIX *pixs2 ); -LEPT_DLL extern l_ok pixSelectMinInConnComp ( PIX *pixs, PIX *pixm, PTA **ppta, NUMA **pnav ); -LEPT_DLL extern PIX * pixRemoveSeededComponents ( PIX *pixd, PIX *pixs, PIX *pixm, l_int32 connectivity, l_int32 bordersize ); -LEPT_DLL extern SELA * selaCreate ( l_int32 n ); -LEPT_DLL extern void selaDestroy ( SELA **psela ); -LEPT_DLL extern SEL * selCreate ( l_int32 height, l_int32 width, const char *name ); -LEPT_DLL extern void selDestroy ( SEL **psel ); -LEPT_DLL extern SEL * selCopy ( SEL *sel ); -LEPT_DLL extern SEL * selCreateBrick ( l_int32 h, l_int32 w, l_int32 cy, l_int32 cx, l_int32 type ); -LEPT_DLL extern SEL * selCreateComb ( l_int32 factor1, l_int32 factor2, l_int32 direction ); -LEPT_DLL extern l_int32 ** create2dIntArray ( l_int32 sy, l_int32 sx ); -LEPT_DLL extern l_ok selaAddSel ( SELA *sela, SEL *sel, const char *selname, l_int32 copyflag ); -LEPT_DLL extern l_int32 selaGetCount ( SELA *sela ); -LEPT_DLL extern SEL * selaGetSel ( SELA *sela, l_int32 i ); -LEPT_DLL extern char * selGetName ( SEL *sel ); -LEPT_DLL extern l_ok selSetName ( SEL *sel, const char *name ); -LEPT_DLL extern l_ok selaFindSelByName ( SELA *sela, const char *name, l_int32 *pindex, SEL **psel ); -LEPT_DLL extern l_ok selGetElement ( SEL *sel, l_int32 row, l_int32 col, l_int32 *ptype ); -LEPT_DLL extern l_ok selSetElement ( SEL *sel, l_int32 row, l_int32 col, l_int32 type ); -LEPT_DLL extern l_ok selGetParameters ( SEL *sel, l_int32 *psy, l_int32 *psx, l_int32 *pcy, l_int32 *pcx ); -LEPT_DLL extern l_ok selSetOrigin ( SEL *sel, l_int32 cy, l_int32 cx ); -LEPT_DLL extern l_ok selGetTypeAtOrigin ( SEL *sel, l_int32 *ptype ); -LEPT_DLL extern char * selaGetBrickName ( SELA *sela, l_int32 hsize, l_int32 vsize ); -LEPT_DLL extern char * selaGetCombName ( SELA *sela, l_int32 size, l_int32 direction ); -LEPT_DLL extern l_ok getCompositeParameters ( l_int32 size, l_int32 *psize1, l_int32 *psize2, char **pnameh1, char **pnameh2, char **pnamev1, char **pnamev2 ); -LEPT_DLL extern SARRAY * selaGetSelnames ( SELA *sela ); -LEPT_DLL extern l_ok selFindMaxTranslations ( SEL *sel, l_int32 *pxp, l_int32 *pyp, l_int32 *pxn, l_int32 *pyn ); -LEPT_DLL extern SEL * selRotateOrth ( SEL *sel, l_int32 quads ); -LEPT_DLL extern SELA * selaRead ( const char *fname ); -LEPT_DLL extern SELA * selaReadStream ( FILE *fp ); -LEPT_DLL extern SEL * selRead ( const char *fname ); -LEPT_DLL extern SEL * selReadStream ( FILE *fp ); -LEPT_DLL extern l_ok selaWrite ( const char *fname, SELA *sela ); -LEPT_DLL extern l_ok selaWriteStream ( FILE *fp, SELA *sela ); -LEPT_DLL extern l_ok selWrite ( const char *fname, SEL *sel ); -LEPT_DLL extern l_ok selWriteStream ( FILE *fp, SEL *sel ); -LEPT_DLL extern SEL * selCreateFromString ( const char *text, l_int32 h, l_int32 w, const char *name ); -LEPT_DLL extern char * selPrintToString ( SEL *sel ); -LEPT_DLL extern SELA * selaCreateFromFile ( const char *filename ); -LEPT_DLL extern SEL * selCreateFromPta ( PTA *pta, l_int32 cy, l_int32 cx, const char *name ); -LEPT_DLL extern SEL * selCreateFromPix ( PIX *pix, l_int32 cy, l_int32 cx, const char *name ); -LEPT_DLL extern SEL * selReadFromColorImage ( const char *pathname ); -LEPT_DLL extern SEL * selCreateFromColorPix ( PIX *pixs, const char *selname ); -LEPT_DLL extern SELA * selaCreateFromColorPixa ( PIXA *pixa, SARRAY *sa ); -LEPT_DLL extern PIX * selDisplayInPix ( SEL *sel, l_int32 size, l_int32 gthick ); -LEPT_DLL extern PIX * selaDisplayInPix ( SELA *sela, l_int32 size, l_int32 gthick, l_int32 spacing, l_int32 ncols ); -LEPT_DLL extern SELA * selaAddBasic ( SELA *sela ); -LEPT_DLL extern SELA * selaAddHitMiss ( SELA *sela ); -LEPT_DLL extern SELA * selaAddDwaLinear ( SELA *sela ); -LEPT_DLL extern SELA * selaAddDwaCombs ( SELA *sela ); -LEPT_DLL extern SELA * selaAddCrossJunctions ( SELA *sela, l_float32 hlsize, l_float32 mdist, l_int32 norient, l_int32 debugflag ); -LEPT_DLL extern SELA * selaAddTJunctions ( SELA *sela, l_float32 hlsize, l_float32 mdist, l_int32 norient, l_int32 debugflag ); -LEPT_DLL extern SELA * sela4ccThin ( SELA *sela ); -LEPT_DLL extern SELA * sela8ccThin ( SELA *sela ); -LEPT_DLL extern SELA * sela4and8ccThin ( SELA *sela ); -LEPT_DLL extern SEL * selMakePlusSign ( l_int32 size, l_int32 linewidth ); -LEPT_DLL extern SEL * pixGenerateSelWithRuns ( PIX *pixs, l_int32 nhlines, l_int32 nvlines, l_int32 distance, l_int32 minlength, l_int32 toppix, l_int32 botpix, l_int32 leftpix, l_int32 rightpix, PIX **ppixe ); -LEPT_DLL extern SEL * pixGenerateSelRandom ( PIX *pixs, l_float32 hitfract, l_float32 missfract, l_int32 distance, l_int32 toppix, l_int32 botpix, l_int32 leftpix, l_int32 rightpix, PIX **ppixe ); -LEPT_DLL extern SEL * pixGenerateSelBoundary ( PIX *pixs, l_int32 hitdist, l_int32 missdist, l_int32 hitskip, l_int32 missskip, l_int32 topflag, l_int32 botflag, l_int32 leftflag, l_int32 rightflag, PIX **ppixe ); -LEPT_DLL extern NUMA * pixGetRunCentersOnLine ( PIX *pixs, l_int32 x, l_int32 y, l_int32 minlength ); -LEPT_DLL extern NUMA * pixGetRunsOnLine ( PIX *pixs, l_int32 x1, l_int32 y1, l_int32 x2, l_int32 y2 ); -LEPT_DLL extern PTA * pixSubsampleBoundaryPixels ( PIX *pixs, l_int32 skip ); -LEPT_DLL extern l_int32 adjacentOnPixelInRaster ( PIX *pixs, l_int32 x, l_int32 y, l_int32 *pxa, l_int32 *pya ); -LEPT_DLL extern PIX * pixDisplayHitMissSel ( PIX *pixs, SEL *sel, l_int32 scalefactor, l_uint32 hitcolor, l_uint32 misscolor ); -LEPT_DLL extern PIX * pixHShear ( PIX *pixd, PIX *pixs, l_int32 yloc, l_float32 radang, l_int32 incolor ); -LEPT_DLL extern PIX * pixVShear ( PIX *pixd, PIX *pixs, l_int32 xloc, l_float32 radang, l_int32 incolor ); -LEPT_DLL extern PIX * pixHShearCorner ( PIX *pixd, PIX *pixs, l_float32 radang, l_int32 incolor ); -LEPT_DLL extern PIX * pixVShearCorner ( PIX *pixd, PIX *pixs, l_float32 radang, l_int32 incolor ); -LEPT_DLL extern PIX * pixHShearCenter ( PIX *pixd, PIX *pixs, l_float32 radang, l_int32 incolor ); -LEPT_DLL extern PIX * pixVShearCenter ( PIX *pixd, PIX *pixs, l_float32 radang, l_int32 incolor ); -LEPT_DLL extern l_ok pixHShearIP ( PIX *pixs, l_int32 yloc, l_float32 radang, l_int32 incolor ); -LEPT_DLL extern l_ok pixVShearIP ( PIX *pixs, l_int32 xloc, l_float32 radang, l_int32 incolor ); -LEPT_DLL extern PIX * pixHShearLI ( PIX *pixs, l_int32 yloc, l_float32 radang, l_int32 incolor ); -LEPT_DLL extern PIX * pixVShearLI ( PIX *pixs, l_int32 xloc, l_float32 radang, l_int32 incolor ); -LEPT_DLL extern PIX * pixDeskewBoth ( PIX *pixs, l_int32 redsearch ); -LEPT_DLL extern PIX * pixDeskew ( PIX *pixs, l_int32 redsearch ); -LEPT_DLL extern PIX * pixFindSkewAndDeskew ( PIX *pixs, l_int32 redsearch, l_float32 *pangle, l_float32 *pconf ); -LEPT_DLL extern PIX * pixDeskewGeneral ( PIX *pixs, l_int32 redsweep, l_float32 sweeprange, l_float32 sweepdelta, l_int32 redsearch, l_int32 thresh, l_float32 *pangle, l_float32 *pconf ); -LEPT_DLL extern l_ok pixFindSkew ( PIX *pixs, l_float32 *pangle, l_float32 *pconf ); -LEPT_DLL extern l_ok pixFindSkewSweep ( PIX *pixs, l_float32 *pangle, l_int32 reduction, l_float32 sweeprange, l_float32 sweepdelta ); -LEPT_DLL extern l_ok pixFindSkewSweepAndSearch ( PIX *pixs, l_float32 *pangle, l_float32 *pconf, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta ); -LEPT_DLL extern l_ok pixFindSkewSweepAndSearchScore ( PIX *pixs, l_float32 *pangle, l_float32 *pconf, l_float32 *pendscore, l_int32 redsweep, l_int32 redsearch, l_float32 sweepcenter, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta ); -LEPT_DLL extern l_ok pixFindSkewSweepAndSearchScorePivot ( PIX *pixs, l_float32 *pangle, l_float32 *pconf, l_float32 *pendscore, l_int32 redsweep, l_int32 redsearch, l_float32 sweepcenter, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta, l_int32 pivot ); -LEPT_DLL extern l_int32 pixFindSkewOrthogonalRange ( PIX *pixs, l_float32 *pangle, l_float32 *pconf, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta, l_float32 confprior ); -LEPT_DLL extern l_ok pixFindDifferentialSquareSum ( PIX *pixs, l_float32 *psum ); -LEPT_DLL extern l_ok pixFindNormalizedSquareSum ( PIX *pixs, l_float32 *phratio, l_float32 *pvratio, l_float32 *pfract ); -LEPT_DLL extern PIX * pixReadStreamSpix ( FILE *fp ); -LEPT_DLL extern l_ok readHeaderSpix ( const char *filename, l_int32 *pwidth, l_int32 *pheight, l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap ); -LEPT_DLL extern l_ok freadHeaderSpix ( FILE *fp, l_int32 *pwidth, l_int32 *pheight, l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap ); -LEPT_DLL extern l_ok sreadHeaderSpix ( const l_uint32 *data, l_int32 *pwidth, l_int32 *pheight, l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap ); -LEPT_DLL extern l_ok pixWriteStreamSpix ( FILE *fp, PIX *pix ); -LEPT_DLL extern PIX * pixReadMemSpix ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok pixWriteMemSpix ( l_uint8 **pdata, size_t *psize, PIX *pix ); -LEPT_DLL extern l_ok pixSerializeToMemory ( PIX *pixs, l_uint32 **pdata, size_t *pnbytes ); -LEPT_DLL extern PIX * pixDeserializeFromMemory ( const l_uint32 *data, size_t nbytes ); -LEPT_DLL extern L_STACK * lstackCreate ( l_int32 n ); -LEPT_DLL extern void lstackDestroy ( L_STACK **plstack, l_int32 freeflag ); -LEPT_DLL extern l_ok lstackAdd ( L_STACK *lstack, void *item ); -LEPT_DLL extern void * lstackRemove ( L_STACK *lstack ); -LEPT_DLL extern l_int32 lstackGetCount ( L_STACK *lstack ); -LEPT_DLL extern l_ok lstackPrint ( FILE *fp, L_STACK *lstack ); -LEPT_DLL extern L_STRCODE * strcodeCreate ( l_int32 fileno ); -LEPT_DLL extern l_ok strcodeCreateFromFile ( const char *filein, l_int32 fileno, const char *outdir ); -LEPT_DLL extern l_ok strcodeGenerate ( L_STRCODE *strcode, const char *filein, const char *type ); -LEPT_DLL extern l_int32 strcodeFinalize ( L_STRCODE **pstrcode, const char *outdir ); -LEPT_DLL extern l_int32 l_getStructStrFromFile ( const char *filename, l_int32 field, char **pstr ); -LEPT_DLL extern l_ok pixFindStrokeLength ( PIX *pixs, l_int32 *tab8, l_int32 *plength ); -LEPT_DLL extern l_ok pixFindStrokeWidth ( PIX *pixs, l_float32 thresh, l_int32 *tab8, l_float32 *pwidth, NUMA **pnahisto ); -LEPT_DLL extern NUMA * pixaFindStrokeWidth ( PIXA *pixa, l_float32 thresh, l_int32 *tab8, l_int32 debug ); -LEPT_DLL extern PIXA * pixaModifyStrokeWidth ( PIXA *pixas, l_float32 targetw ); -LEPT_DLL extern PIX * pixModifyStrokeWidth ( PIX *pixs, l_float32 width, l_float32 targetw ); -LEPT_DLL extern PIXA * pixaSetStrokeWidth ( PIXA *pixas, l_int32 width, l_int32 thinfirst, l_int32 connectivity ); -LEPT_DLL extern PIX * pixSetStrokeWidth ( PIX *pixs, l_int32 width, l_int32 thinfirst, l_int32 connectivity ); -LEPT_DLL extern l_int32 * sudokuReadFile ( const char *filename ); -LEPT_DLL extern l_int32 * sudokuReadString ( const char *str ); -LEPT_DLL extern L_SUDOKU * sudokuCreate ( l_int32 *array ); -LEPT_DLL extern void sudokuDestroy ( L_SUDOKU **psud ); -LEPT_DLL extern l_int32 sudokuSolve ( L_SUDOKU *sud ); -LEPT_DLL extern l_ok sudokuTestUniqueness ( l_int32 *array, l_int32 *punique ); -LEPT_DLL extern L_SUDOKU * sudokuGenerate ( l_int32 *array, l_int32 seed, l_int32 minelems, l_int32 maxtries ); -LEPT_DLL extern l_int32 sudokuOutput ( L_SUDOKU *sud, l_int32 arraytype ); -LEPT_DLL extern PIX * pixAddSingleTextblock ( PIX *pixs, L_BMF *bmf, const char *textstr, l_uint32 val, l_int32 location, l_int32 *poverflow ); -LEPT_DLL extern PIX * pixAddTextlines ( PIX *pixs, L_BMF *bmf, const char *textstr, l_uint32 val, l_int32 location ); -LEPT_DLL extern l_ok pixSetTextblock ( PIX *pixs, L_BMF *bmf, const char *textstr, l_uint32 val, l_int32 x0, l_int32 y0, l_int32 wtext, l_int32 firstindent, l_int32 *poverflow ); -LEPT_DLL extern l_ok pixSetTextline ( PIX *pixs, L_BMF *bmf, const char *textstr, l_uint32 val, l_int32 x0, l_int32 y0, l_int32 *pwidth, l_int32 *poverflow ); -LEPT_DLL extern PIXA * pixaAddTextNumber ( PIXA *pixas, L_BMF *bmf, NUMA *na, l_uint32 val, l_int32 location ); -LEPT_DLL extern PIXA * pixaAddTextlines ( PIXA *pixas, L_BMF *bmf, SARRAY *sa, l_uint32 val, l_int32 location ); -LEPT_DLL extern l_ok pixaAddPixWithText ( PIXA *pixa, PIX *pixs, l_int32 reduction, L_BMF *bmf, const char *textstr, l_uint32 val, l_int32 location ); -LEPT_DLL extern SARRAY * bmfGetLineStrings ( L_BMF *bmf, const char *textstr, l_int32 maxw, l_int32 firstindent, l_int32 *ph ); -LEPT_DLL extern NUMA * bmfGetWordWidths ( L_BMF *bmf, const char *textstr, SARRAY *sa ); -LEPT_DLL extern l_ok bmfGetStringWidth ( L_BMF *bmf, const char *textstr, l_int32 *pw ); -LEPT_DLL extern SARRAY * splitStringToParagraphs ( char *textstr, l_int32 splitflag ); -LEPT_DLL extern PIX * pixReadTiff ( const char *filename, l_int32 n ); -LEPT_DLL extern PIX * pixReadStreamTiff ( FILE *fp, l_int32 n ); -LEPT_DLL extern l_ok pixWriteTiff ( const char *filename, PIX *pix, l_int32 comptype, const char *modestr ); -LEPT_DLL extern l_ok pixWriteTiffCustom ( const char *filename, PIX *pix, l_int32 comptype, const char *modestr, NUMA *natags, SARRAY *savals, SARRAY *satypes, NUMA *nasizes ); -LEPT_DLL extern l_ok pixWriteStreamTiff ( FILE *fp, PIX *pix, l_int32 comptype ); -LEPT_DLL extern l_ok pixWriteStreamTiffWA ( FILE *fp, PIX *pix, l_int32 comptype, const char *modestr ); -LEPT_DLL extern PIX * pixReadFromMultipageTiff ( const char *fname, size_t *poffset ); -LEPT_DLL extern PIXA * pixaReadMultipageTiff ( const char *filename ); -LEPT_DLL extern l_ok pixaWriteMultipageTiff ( const char *fname, PIXA *pixa ); -LEPT_DLL extern l_ok writeMultipageTiff ( const char *dirin, const char *substr, const char *fileout ); -LEPT_DLL extern l_ok writeMultipageTiffSA ( SARRAY *sa, const char *fileout ); -LEPT_DLL extern l_ok fprintTiffInfo ( FILE *fpout, const char *tiffile ); -LEPT_DLL extern l_ok tiffGetCount ( FILE *fp, l_int32 *pn ); -LEPT_DLL extern l_ok getTiffResolution ( FILE *fp, l_int32 *pxres, l_int32 *pyres ); -LEPT_DLL extern l_ok readHeaderTiff ( const char *filename, l_int32 n, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp, l_int32 *pres, l_int32 *pcmap, l_int32 *pformat ); -LEPT_DLL extern l_ok freadHeaderTiff ( FILE *fp, l_int32 n, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp, l_int32 *pres, l_int32 *pcmap, l_int32 *pformat ); -LEPT_DLL extern l_ok readHeaderMemTiff ( const l_uint8 *cdata, size_t size, l_int32 n, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp, l_int32 *pres, l_int32 *pcmap, l_int32 *pformat ); -LEPT_DLL extern l_ok findTiffCompression ( FILE *fp, l_int32 *pcomptype ); -LEPT_DLL extern l_ok extractG4DataFromFile ( const char *filein, l_uint8 **pdata, size_t *pnbytes, l_int32 *pw, l_int32 *ph, l_int32 *pminisblack ); -LEPT_DLL extern PIX * pixReadMemTiff ( const l_uint8 *cdata, size_t size, l_int32 n ); -LEPT_DLL extern PIX * pixReadMemFromMultipageTiff ( const l_uint8 *cdata, size_t size, size_t *poffset ); -LEPT_DLL extern PIXA * pixaReadMemMultipageTiff ( const l_uint8 *data, size_t size ); -LEPT_DLL extern l_ok pixaWriteMemMultipageTiff ( l_uint8 **pdata, size_t *psize, PIXA *pixa ); -LEPT_DLL extern l_ok pixWriteMemTiff ( l_uint8 **pdata, size_t *psize, PIX *pix, l_int32 comptype ); -LEPT_DLL extern l_ok pixWriteMemTiffCustom ( l_uint8 **pdata, size_t *psize, PIX *pix, l_int32 comptype, NUMA *natags, SARRAY *savals, SARRAY *satypes, NUMA *nasizes ); -LEPT_DLL extern l_int32 setMsgSeverity ( l_int32 newsev ); -LEPT_DLL extern l_int32 returnErrorInt ( const char *msg, const char *procname, l_int32 ival ); -LEPT_DLL extern l_float32 returnErrorFloat ( const char *msg, const char *procname, l_float32 fval ); -LEPT_DLL extern void * returnErrorPtr ( const char *msg, const char *procname, void *pval ); -LEPT_DLL extern void leptSetStderrHandler ( void ( *handler ) ( const char * ) ); -LEPT_DLL extern void lept_stderr ( const char *fmt, ... ); -LEPT_DLL extern l_ok filesAreIdentical ( const char *fname1, const char *fname2, l_int32 *psame ); -LEPT_DLL extern l_uint16 convertOnLittleEnd16 ( l_uint16 shortin ); -LEPT_DLL extern l_uint16 convertOnBigEnd16 ( l_uint16 shortin ); -LEPT_DLL extern l_uint32 convertOnLittleEnd32 ( l_uint32 wordin ); -LEPT_DLL extern l_uint32 convertOnBigEnd32 ( l_uint32 wordin ); -LEPT_DLL extern l_ok fileCorruptByDeletion ( const char *filein, l_float32 loc, l_float32 size, const char *fileout ); -LEPT_DLL extern l_ok fileCorruptByMutation ( const char *filein, l_float32 loc, l_float32 size, const char *fileout ); -LEPT_DLL extern l_ok fileReplaceBytes ( const char *filein, l_int32 start, l_int32 nbytes, l_uint8 *newdata, size_t newsize, const char *fileout ); -LEPT_DLL extern l_ok genRandomIntegerInRange ( l_int32 range, l_int32 seed, l_int32 *pval ); -LEPT_DLL extern l_int32 lept_roundftoi ( l_float32 fval ); -LEPT_DLL extern l_ok l_hashStringToUint64 ( const char *str, l_uint64 *phash ); -LEPT_DLL extern l_ok l_hashPtToUint64 ( l_int32 x, l_int32 y, l_uint64 *phash ); -LEPT_DLL extern l_ok l_hashFloat64ToUint64 ( l_int32 nbuckets, l_float64 val, l_uint64 *phash ); -LEPT_DLL extern l_ok findNextLargerPrime ( l_int32 start, l_uint32 *pprime ); -LEPT_DLL extern l_ok lept_isPrime ( l_uint64 n, l_int32 *pis_prime, l_uint32 *pfactor ); -LEPT_DLL extern l_uint32 convertIntToGrayCode ( l_uint32 val ); -LEPT_DLL extern l_uint32 convertGrayCodeToInt ( l_uint32 val ); -LEPT_DLL extern char * getLeptonicaVersion ( void ); -LEPT_DLL extern void startTimer ( void ); -LEPT_DLL extern l_float32 stopTimer ( void ); -LEPT_DLL extern L_TIMER startTimerNested ( void ); -LEPT_DLL extern l_float32 stopTimerNested ( L_TIMER rusage_start ); -LEPT_DLL extern void l_getCurrentTime ( l_int32 *sec, l_int32 *usec ); -LEPT_DLL extern L_WALLTIMER * startWallTimer ( void ); -LEPT_DLL extern l_float32 stopWallTimer ( L_WALLTIMER **ptimer ); -LEPT_DLL extern char * l_getFormattedDate ( void ); -LEPT_DLL extern char * stringNew ( const char *src ); -LEPT_DLL extern l_ok stringCopy ( char *dest, const char *src, l_int32 n ); -LEPT_DLL extern char * stringCopySegment ( const char *src, l_int32 start, l_int32 nbytes ); -LEPT_DLL extern l_ok stringReplace ( char **pdest, const char *src ); -LEPT_DLL extern l_int32 stringLength ( const char *src, size_t size ); -LEPT_DLL extern l_int32 stringCat ( char *dest, size_t size, const char *src ); -LEPT_DLL extern char * stringConcatNew ( const char *first, ... ); -LEPT_DLL extern char * stringJoin ( const char *src1, const char *src2 ); -LEPT_DLL extern l_ok stringJoinIP ( char **psrc1, const char *src2 ); -LEPT_DLL extern char * stringReverse ( const char *src ); -LEPT_DLL extern char * strtokSafe ( char *cstr, const char *seps, char **psaveptr ); -LEPT_DLL extern l_ok stringSplitOnToken ( char *cstr, const char *seps, char **phead, char **ptail ); -LEPT_DLL extern l_ok stringCheckForChars ( const char *src, const char *chars, l_int32 *pfound ); -LEPT_DLL extern char * stringRemoveChars ( const char *src, const char *remchars ); -LEPT_DLL extern char * stringReplaceEachSubstr ( const char *src, const char *sub1, const char *sub2, l_int32 *pcount ); -LEPT_DLL extern char * stringReplaceSubstr ( const char *src, const char *sub1, const char *sub2, l_int32 *ploc, l_int32 *pfound ); -LEPT_DLL extern L_DNA * stringFindEachSubstr ( const char *src, const char *sub ); -LEPT_DLL extern l_int32 stringFindSubstr ( const char *src, const char *sub, l_int32 *ploc ); -LEPT_DLL extern l_uint8 * arrayReplaceEachSequence ( const l_uint8 *datas, size_t dataslen, const l_uint8 *seq, size_t seqlen, const l_uint8 *newseq, size_t newseqlen, size_t *pdatadlen, l_int32 *pcount ); -LEPT_DLL extern L_DNA * arrayFindEachSequence ( const l_uint8 *data, size_t datalen, const l_uint8 *sequence, size_t seqlen ); -LEPT_DLL extern l_ok arrayFindSequence ( const l_uint8 *data, size_t datalen, const l_uint8 *sequence, size_t seqlen, l_int32 *poffset, l_int32 *pfound ); -LEPT_DLL extern void * reallocNew ( void **pindata, l_int32 oldsize, l_int32 newsize ); -LEPT_DLL extern l_uint8 * l_binaryRead ( const char *filename, size_t *pnbytes ); -LEPT_DLL extern l_uint8 * l_binaryReadStream ( FILE *fp, size_t *pnbytes ); -LEPT_DLL extern l_uint8 * l_binaryReadSelect ( const char *filename, size_t start, size_t nbytes, size_t *pnread ); -LEPT_DLL extern l_uint8 * l_binaryReadSelectStream ( FILE *fp, size_t start, size_t nbytes, size_t *pnread ); -LEPT_DLL extern l_ok l_binaryWrite ( const char *filename, const char *operation, const void *data, size_t nbytes ); -LEPT_DLL extern size_t nbytesInFile ( const char *filename ); -LEPT_DLL extern size_t fnbytesInFile ( FILE *fp ); -LEPT_DLL extern l_uint8 * l_binaryCopy ( const l_uint8 *datas, size_t size ); -LEPT_DLL extern l_ok l_binaryCompare ( const l_uint8 *data1, size_t size1, const l_uint8 *data2, size_t size2, l_int32 *psame ); -LEPT_DLL extern l_ok fileCopy ( const char *srcfile, const char *newfile ); -LEPT_DLL extern l_ok fileConcatenate ( const char *srcfile, const char *destfile ); -LEPT_DLL extern l_ok fileAppendString ( const char *filename, const char *str ); -LEPT_DLL extern FILE * fopenReadStream ( const char *filename ); -LEPT_DLL extern FILE * fopenWriteStream ( const char *filename, const char *modestring ); -LEPT_DLL extern FILE * fopenReadFromMemory ( const l_uint8 *data, size_t size ); -LEPT_DLL extern FILE * fopenWriteWinTempfile ( void ); -LEPT_DLL extern FILE * lept_fopen ( const char *filename, const char *mode ); -LEPT_DLL extern l_ok lept_fclose ( FILE *fp ); -LEPT_DLL extern void * lept_calloc ( size_t nmemb, size_t size ); -LEPT_DLL extern void lept_free ( void *ptr ); -LEPT_DLL extern l_int32 lept_mkdir ( const char *subdir ); -LEPT_DLL extern l_int32 lept_rmdir ( const char *subdir ); -LEPT_DLL extern void lept_direxists ( const char *dir, l_int32 *pexists ); -LEPT_DLL extern l_int32 lept_rm_match ( const char *subdir, const char *substr ); -LEPT_DLL extern l_int32 lept_rm ( const char *subdir, const char *tail ); -LEPT_DLL extern l_int32 lept_rmfile ( const char *filepath ); -LEPT_DLL extern l_int32 lept_mv ( const char *srcfile, const char *newdir, const char *newtail, char **pnewpath ); -LEPT_DLL extern l_int32 lept_cp ( const char *srcfile, const char *newdir, const char *newtail, char **pnewpath ); -LEPT_DLL extern void callSystemDebug ( const char *cmd ); -LEPT_DLL extern l_ok splitPathAtDirectory ( const char *pathname, char **pdir, char **ptail ); -LEPT_DLL extern l_ok splitPathAtExtension ( const char *pathname, char **pbasename, char **pextension ); -LEPT_DLL extern char * pathJoin ( const char *dir, const char *fname ); -LEPT_DLL extern char * appendSubdirs ( const char *basedir, const char *subdirs ); -LEPT_DLL extern l_ok convertSepCharsInPath ( char *path, l_int32 type ); -LEPT_DLL extern char * genPathname ( const char *dir, const char *fname ); -LEPT_DLL extern l_ok makeTempDirname ( char *result, size_t nbytes, const char *subdir ); -LEPT_DLL extern l_ok modifyTrailingSlash ( char *path, size_t nbytes, l_int32 flag ); -LEPT_DLL extern char * l_makeTempFilename ( void ); -LEPT_DLL extern l_int32 extractNumberFromFilename ( const char *fname, l_int32 numpre, l_int32 numpost ); -LEPT_DLL extern PIX * pixSimpleCaptcha ( PIX *pixs, l_int32 border, l_int32 nterms, l_uint32 seed, l_uint32 color, l_int32 cmapflag ); -LEPT_DLL extern PIX * pixRandomHarmonicWarp ( PIX *pixs, l_float32 xmag, l_float32 ymag, l_float32 xfreq, l_float32 yfreq, l_int32 nx, l_int32 ny, l_uint32 seed, l_int32 grayval ); -LEPT_DLL extern PIX * pixWarpStereoscopic ( PIX *pixs, l_int32 zbend, l_int32 zshiftt, l_int32 zshiftb, l_int32 ybendt, l_int32 ybendb, l_int32 redleft ); -LEPT_DLL extern PIX * pixStretchHorizontal ( PIX *pixs, l_int32 dir, l_int32 type, l_int32 hmax, l_int32 operation, l_int32 incolor ); -LEPT_DLL extern PIX * pixStretchHorizontalSampled ( PIX *pixs, l_int32 dir, l_int32 type, l_int32 hmax, l_int32 incolor ); -LEPT_DLL extern PIX * pixStretchHorizontalLI ( PIX *pixs, l_int32 dir, l_int32 type, l_int32 hmax, l_int32 incolor ); -LEPT_DLL extern PIX * pixQuadraticVShear ( PIX *pixs, l_int32 dir, l_int32 vmaxt, l_int32 vmaxb, l_int32 operation, l_int32 incolor ); -LEPT_DLL extern PIX * pixQuadraticVShearSampled ( PIX *pixs, l_int32 dir, l_int32 vmaxt, l_int32 vmaxb, l_int32 incolor ); -LEPT_DLL extern PIX * pixQuadraticVShearLI ( PIX *pixs, l_int32 dir, l_int32 vmaxt, l_int32 vmaxb, l_int32 incolor ); -LEPT_DLL extern PIX * pixStereoFromPair ( PIX *pix1, PIX *pix2, l_float32 rwt, l_float32 gwt, l_float32 bwt ); -LEPT_DLL extern L_WSHED * wshedCreate ( PIX *pixs, PIX *pixm, l_int32 mindepth, l_int32 debugflag ); -LEPT_DLL extern void wshedDestroy ( L_WSHED **pwshed ); -LEPT_DLL extern l_ok wshedApply ( L_WSHED *wshed ); -LEPT_DLL extern l_ok wshedBasins ( L_WSHED *wshed, PIXA **ppixa, NUMA **pnalevels ); -LEPT_DLL extern PIX * wshedRenderFill ( L_WSHED *wshed ); -LEPT_DLL extern PIX * wshedRenderColors ( L_WSHED *wshed ); -LEPT_DLL extern l_ok pixaWriteWebPAnim ( const char *filename, PIXA *pixa, l_int32 loopcount, l_int32 duration, l_int32 quality, l_int32 lossless ); -LEPT_DLL extern l_ok pixaWriteStreamWebPAnim ( FILE *fp, PIXA *pixa, l_int32 loopcount, l_int32 duration, l_int32 quality, l_int32 lossless ); -LEPT_DLL extern l_ok pixaWriteMemWebPAnim ( l_uint8 **pencdata, size_t *pencsize, PIXA *pixa, l_int32 loopcount, l_int32 duration, l_int32 quality, l_int32 lossless ); -LEPT_DLL extern PIX * pixReadStreamWebP ( FILE *fp ); -LEPT_DLL extern PIX * pixReadMemWebP ( const l_uint8 *filedata, size_t filesize ); -LEPT_DLL extern l_ok readHeaderWebP ( const char *filename, l_int32 *pw, l_int32 *ph, l_int32 *pspp ); -LEPT_DLL extern l_ok readHeaderMemWebP ( const l_uint8 *data, size_t size, l_int32 *pw, l_int32 *ph, l_int32 *pspp ); -LEPT_DLL extern l_ok pixWriteWebP ( const char *filename, PIX *pixs, l_int32 quality, l_int32 lossless ); -LEPT_DLL extern l_ok pixWriteStreamWebP ( FILE *fp, PIX *pixs, l_int32 quality, l_int32 lossless ); -LEPT_DLL extern l_ok pixWriteMemWebP ( l_uint8 **pencdata, size_t *pencsize, PIX *pixs, l_int32 quality, l_int32 lossless ); -LEPT_DLL extern l_int32 l_jpegSetQuality ( l_int32 new_quality ); -LEPT_DLL extern void setLeptDebugOK ( l_int32 allow ); -LEPT_DLL extern l_ok pixaWriteFiles ( const char *rootname, PIXA *pixa, l_int32 format ); -LEPT_DLL extern l_ok pixWriteDebug ( const char *fname, PIX *pix, l_int32 format ); -LEPT_DLL extern l_ok pixWrite ( const char *fname, PIX *pix, l_int32 format ); -LEPT_DLL extern l_ok pixWriteAutoFormat ( const char *filename, PIX *pix ); -LEPT_DLL extern l_ok pixWriteStream ( FILE *fp, PIX *pix, l_int32 format ); -LEPT_DLL extern l_ok pixWriteImpliedFormat ( const char *filename, PIX *pix, l_int32 quality, l_int32 progressive ); -LEPT_DLL extern l_int32 pixChooseOutputFormat ( PIX *pix ); -LEPT_DLL extern l_int32 getImpliedFileFormat ( const char *filename ); -LEPT_DLL extern l_ok pixGetAutoFormat ( PIX *pix, l_int32 *pformat ); -LEPT_DLL extern const char * getFormatExtension ( l_int32 format ); -LEPT_DLL extern l_ok pixWriteMem ( l_uint8 **pdata, size_t *psize, PIX *pix, l_int32 format ); -LEPT_DLL extern l_ok l_fileDisplay ( const char *fname, l_int32 x, l_int32 y, l_float32 scale ); -LEPT_DLL extern l_ok pixDisplay ( PIX *pixs, l_int32 x, l_int32 y ); -LEPT_DLL extern l_ok pixDisplayWithTitle ( PIX *pixs, l_int32 x, l_int32 y, const char *title, l_int32 dispflag ); -LEPT_DLL extern PIX * pixMakeColorSquare ( l_uint32 color, l_int32 size, l_int32 addlabel, l_int32 location, l_uint32 textcolor ); -LEPT_DLL extern void l_chooseDisplayProg ( l_int32 selection ); -LEPT_DLL extern void changeFormatForMissingLib ( l_int32 *pformat ); -LEPT_DLL extern l_ok pixDisplayWrite ( PIX *pixs, l_int32 reduction ); -LEPT_DLL extern l_ok pixSaveTiled ( PIX *pixs, PIXA *pixa, l_float32 scalefactor, l_int32 newrow, l_int32 space, l_int32 dp ); -LEPT_DLL extern l_ok pixSaveTiledOutline ( PIX *pixs, PIXA *pixa, l_float32 scalefactor, l_int32 newrow, l_int32 space, l_int32 linewidth, l_int32 dp ); -LEPT_DLL extern l_ok pixSaveTiledWithText ( PIX *pixs, PIXA *pixa, l_int32 outwidth, l_int32 newrow, l_int32 space, l_int32 linewidth, L_BMF *bmf, const char *textstr, l_uint32 val, l_int32 location ); -LEPT_DLL extern l_uint8 * zlibCompress ( l_uint8 *datain, size_t nin, size_t *pnout ); -LEPT_DLL extern l_uint8 * zlibUncompress ( l_uint8 *datain, size_t nin, size_t *pnout ); - -#ifdef __cplusplus -} -#endif /* __cplusplus */ -#endif /* NO_PROTOS */ - - -#endif /* LEPTONICA_ALLHEADERS_H */ - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/alltypes.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/alltypes.h deleted file mode 100644 index a84c0bfa..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/alltypes.h +++ /dev/null @@ -1,66 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_ALLTYPES_H -#define LEPTONICA_ALLTYPES_H - - /* Standard */ -#include -#include -#include - - /* General and configuration defs */ -#include "endianness.h" -#include "environ.h" - - /* Generic and non-image-specific containers */ -#include "array.h" -#include "bbuffer.h" -#include "heap.h" -#include "list.h" -#include "ptra.h" -#include "queue.h" -#include "rbtree.h" -#include "stack.h" - - /* Imaging */ -#include "arrayaccess.h" -#include "bmf.h" -#include "ccbord.h" -#include "dewarp.h" -#include "gplot.h" -#include "imageio.h" -#include "jbclass.h" -#include "morph.h" -#include "pix.h" -#include "recog.h" -#include "regutils.h" -#include "stringcode.h" -#include "sudoku.h" -#include "watershed.h" - - -#endif /* LEPTONICA_ALLTYPES_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/array.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/array.h deleted file mode 100644 index 5c13977a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/array.h +++ /dev/null @@ -1,158 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_ARRAY_H -#define LEPTONICA_ARRAY_H - -/*! - * \file array.h - * - *
- *  Contains the following structs:
- *      struct Numa
- *      struct Numaa
- *      struct L_Dna
- *      struct L_Dnaa
- *      struct L_DnaHash
- *      struct Sarray
- *      struct L_Bytea
- *
- *  Contains definitions for:
- *      Numa interpolation flags
- *      Numa and FPix border flags
- *      Numa data type conversion to string
- * 
- */ - - -/*------------------------------------------------------------------------* - * Array Structs * - *------------------------------------------------------------------------*/ - -/*! Numa version for serialization */ -#define NUMA_VERSION_NUMBER 1 - - /*! Number array: an array of floats */ -struct Numa -{ - l_int32 nalloc; /*!< size of allocated number array */ - l_int32 n; /*!< number of numbers saved */ - l_int32 refcount; /*!< reference count (1 if no clones) */ - l_float32 startx; /*!< x value assigned to array[0] */ - l_float32 delx; /*!< change in x value as i --> i + 1 */ - l_float32 *array; /*!< number array */ -}; -typedef struct Numa NUMA; - - /*! Array of number arrays */ -struct Numaa -{ - l_int32 nalloc; /*!< size of allocated ptr array */ - l_int32 n; /*!< number of Numa saved */ - struct Numa **numa; /*!< array of Numa */ -}; -typedef struct Numaa NUMAA; - -/*! Dna version for serialization */ -#define DNA_VERSION_NUMBER 1 - - /*! Double number array: an array of doubles */ -struct L_Dna -{ - l_int32 nalloc; /*!< size of allocated number array */ - l_int32 n; /*!< number of numbers saved */ - l_int32 refcount; /*!< reference count (1 if no clones) */ - l_float64 startx; /*!< x value assigned to array[0] */ - l_float64 delx; /*!< change in x value as i --> i + 1 */ - l_float64 *array; /*!< number array */ -}; -typedef struct L_Dna L_DNA; - - /*! Array of double number arrays */ -struct L_Dnaa -{ - l_int32 nalloc; /*!< size of allocated ptr array */ - l_int32 n; /*!< number of L_Dna saved */ - struct L_Dna **dna; /*!< array of L_Dna */ -}; -typedef struct L_Dnaa L_DNAA; - - /*! A hash table of Dnas */ -struct L_DnaHash -{ - l_int32 nbuckets; - l_int32 initsize; /*!< initial size of each dna that is made */ - struct L_Dna **dna; /*!< array of L_Dna */ -}; -typedef struct L_DnaHash L_DNAHASH; - -/*! Sarray version for serialization */ -#define SARRAY_VERSION_NUMBER 1 - - /*! String array: an array of C strings */ -struct Sarray -{ - l_int32 nalloc; /*!< size of allocated ptr array */ - l_int32 n; /*!< number of strings allocated */ - l_int32 refcount; /*!< reference count (1 if no clones) */ - char **array; /*!< string array */ -}; -typedef struct Sarray SARRAY; - - /*! Byte array (analogous to C++ "string") */ -struct L_Bytea -{ - size_t nalloc; /*!< number of bytes allocated in data array */ - size_t size; /*!< number of bytes presently used */ - l_int32 refcount; /*!< reference count (1 if no clones) */ - l_uint8 *data; /*!< data array */ -}; -typedef struct L_Bytea L_BYTEA; - - -/*------------------------------------------------------------------------* - * Array flags * - *------------------------------------------------------------------------*/ -/*! Numa Interpolation */ -enum { - L_LINEAR_INTERP = 1, /*!< linear */ - L_QUADRATIC_INTERP = 2 /*!< quadratic */ -}; - -/*! Border Adding */ -enum { - L_CONTINUED_BORDER = 1, /*!< extended with same value */ - L_SLOPE_BORDER = 2, /*!< extended with constant normal derivative */ - L_MIRRORED_BORDER = 3 /*!< mirrored */ -}; - -/*! Numa Data Conversion */ -enum { - L_INTEGER_VALUE = 1, /*!< convert to integer */ - L_FLOAT_VALUE = 2 /*!< convert to float */ -}; - -#endif /* LEPTONICA_ARRAY_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/arrayaccess.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/arrayaccess.c deleted file mode 100644 index 6675a535..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/arrayaccess.c +++ /dev/null @@ -1,367 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file arrayaccess.c - *
- *
- *     Access within an array of 32-bit words
- *
- *           l_int32     l_getDataBit()
- *           void        l_setDataBit()
- *           void        l_clearDataBit()
- *           void        l_setDataBitVal()
- *           l_int32     l_getDataDibit()
- *           void        l_setDataDibit()
- *           void        l_clearDataDibit()
- *           l_int32     l_getDataQbit()
- *           void        l_setDataQbit()
- *           void        l_clearDataQbit()
- *           l_int32     l_getDataByte()
- *           void        l_setDataByte()
- *           l_int32     l_getDataTwoBytes()
- *           void        l_setDataTwoBytes()
- *           l_int32     l_getDataFourBytes()
- *           void        l_setDataFourBytes()
- *
- *     Note that these all require 32-bit alignment, and hence an input
- *     ptr to l_uint32.  However, this is not enforced by the compiler.
- *     Instead, we allow the use of a void* ptr, because the line ptrs
- *     are an efficient way to get random access (see pixGetLinePtrs()).
- *     It is then necessary to cast internally within each function
- *     because ptr arithmetic requires knowing the size of the units
- *     being referenced.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/*----------------------------------------------------------------------* - * Access within an array of 32-bit words * - *----------------------------------------------------------------------*/ -/*! - * \brief l_getDataBit() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \return val of the nth 1-bit pixel. - */ -l_int32 -l_getDataBit(const void *line, - l_int32 n) -{ - return (*((const l_uint32 *)line + (n >> 5)) >> (31 - (n & 31))) & 1; -} - - -/*! - * \brief l_setDataBit() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \return void - * - * Action: sets the pixel to 1 - */ -void -l_setDataBit(void *line, - l_int32 n) -{ - *((l_uint32 *)line + (n >> 5)) |= (0x80000000 >> (n & 31)); -} - - -/*! - * \brief l_clearDataBit() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \return void - * - * Action: sets the 1-bit pixel to 0 - */ -void -l_clearDataBit(void *line, - l_int32 n) -{ - *((l_uint32 *)line + (n >> 5)) &= ~(0x80000000 >> (n & 31)); -} - - -/*! - * \brief l_setDataBitVal() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \param[in] val val to be inserted: 0 or 1 - * \return void - * - *
- * Notes:
- *      (1) This is an accessor for a 1 bpp pix.
- *      (2) It is actually a little slower than using:
- *            if (val == 0)
- *                l_ClearDataBit(line, n);
- *            else
- *                l_SetDataBit(line, n);
- * 
- */ -void -l_setDataBitVal(void *line, - l_int32 n, - l_int32 val) -{ -l_uint32 *pword; - - pword = (l_uint32 *)line + (n >> 5); - *pword &= ~(0x80000000 >> (n & 31)); /* clear */ - *pword |= (l_uint32)val << (31 - (n & 31)); /* set */ - return; -} - - -/*! - * \brief l_getDataDibit() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \return val of the nth 2-bit pixel. - */ -l_int32 -l_getDataDibit(const void *line, - l_int32 n) -{ - return (*((const l_uint32 *)line + (n >> 4)) >> (2 * (15 - (n & 15)))) & 3; -} - - -/*! - * \brief l_setDataDibit() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \param[in] val val to be inserted: 0 - 3 - * \return void - */ -void -l_setDataDibit(void *line, - l_int32 n, - l_int32 val) -{ -l_uint32 *pword; - - pword = (l_uint32 *)line + (n >> 4); - *pword &= ~(0xc0000000 >> (2 * (n & 15))); /* clear */ - *pword |= (l_uint32)(val & 3) << (30 - 2 * (n & 15)); /* set */ - return; -} - - -/*! - * \brief l_clearDataDibit() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \return void - * - * Action: sets the 2-bit pixel to 0 - */ -void -l_clearDataDibit(void *line, - l_int32 n) -{ - *((l_uint32 *)line + (n >> 4)) &= ~(0xc0000000 >> (2 * (n & 15))); -} - - -/*! - * \brief l_getDataQbit() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \return val of the nth 4-bit pixel. - */ -l_int32 -l_getDataQbit(const void *line, - l_int32 n) -{ - return (*((const l_uint32 *)line + (n >> 3)) >> (4 * (7 - (n & 7)))) & 0xf; -} - - -/*! - * \brief l_setDataQbit() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \param[in] val val to be inserted: 0 - 0xf - * \return void - */ -void -l_setDataQbit(void *line, - l_int32 n, - l_int32 val) -{ -l_uint32 *pword; - - pword = (l_uint32 *)line + (n >> 3); - *pword &= ~(0xf0000000 >> (4 * (n & 7))); /* clear */ - *pword |= (l_uint32)(val & 15) << (28 - 4 * (n & 7)); /* set */ - return; -} - - -/*! - * \brief l_clearDataQbit() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \return void - * - * Action: sets the 4-bit pixel to 0 - */ -void -l_clearDataQbit(void *line, - l_int32 n) -{ - *((l_uint32 *)line + (n >> 3)) &= ~(0xf0000000 >> (4 * (n & 7))); -} - - -/*! - * \brief l_getDataByte() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \return value of the n-th byte pixel - */ -l_int32 -l_getDataByte(const void *line, - l_int32 n) -{ -#ifdef L_BIG_ENDIAN - return *((const l_uint8 *)line + n); -#else /* L_LITTLE_ENDIAN */ - return *(l_uint8 *)((l_uintptr_t)((const l_uint8 *)line + n) ^ 3); -#endif /* L_BIG_ENDIAN */ -} - - -/*! - * \brief l_setDataByte() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \param[in] val val to be inserted: 0 - 0xff - * \return void - */ -void -l_setDataByte(void *line, - l_int32 n, - l_int32 val) -{ -#ifdef L_BIG_ENDIAN - *((l_uint8 *)line + n) = val; -#else /* L_LITTLE_ENDIAN */ - *(l_uint8 *)((l_uintptr_t)((l_uint8 *)line + n) ^ 3) = val; -#endif /* L_BIG_ENDIAN */ -} - - -/*! - * \brief l_getDataTwoBytes() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \return value of the n-th 2-byte pixel - */ -l_int32 -l_getDataTwoBytes(const void *line, - l_int32 n) -{ -#ifdef L_BIG_ENDIAN - return *((const l_uint16 *)line + n); -#else /* L_LITTLE_ENDIAN */ - return *(l_uint16 *)((l_uintptr_t)((const l_uint16 *)line + n) ^ 2); -#endif /* L_BIG_ENDIAN */ -} - - -/*! - * \brief l_setDataTwoBytes() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \param[in] val val to be inserted: 0 - 0xffff - * \return void - */ -void -l_setDataTwoBytes(void *line, - l_int32 n, - l_int32 val) -{ -#ifdef L_BIG_ENDIAN - *((l_uint16 *)line + n) = val; -#else /* L_LITTLE_ENDIAN */ - *(l_uint16 *)((l_uintptr_t)((l_uint16 *)line + n) ^ 2) = val; -#endif /* L_BIG_ENDIAN */ -} - - -/*! - * \brief l_getDataFourBytes() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \return value of the n-th 4-byte pixel - */ -l_int32 -l_getDataFourBytes(const void *line, - l_int32 n) -{ - return *((const l_uint32 *)line + n); -} - - -/*! - * \brief l_setDataFourBytes() - * - * \param[in] line ptr to beginning of data line - * \param[in] n pixel index - * \param[in] val val to be inserted: 0 - 0xffffffff - * \return void - */ -void -l_setDataFourBytes(void *line, - l_int32 n, - l_int32 val) -{ - *((l_uint32 *)line + n) = val; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/arrayaccess.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/arrayaccess.h deleted file mode 100644 index 1a831bcc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/arrayaccess.h +++ /dev/null @@ -1,270 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_ARRAY_ACCESS_H -#define LEPTONICA_ARRAY_ACCESS_H - -/*! - * \file arrayaccess.h - * - *
- *  1, 2, 4, 8, 16 and 32 bit data access within an array of 32-bit words
- *
- *  This is used primarily to access 1, 2, 4, 8, 16 and 32 bit pixels
- *  in a line of image data, represented as an array of 32-bit words.
- *
- *     pdata:  pointer to first 32-bit word in the array
- *     n:      index of the pixel in the array
- *
- *  Function calls for these accessors are defined in arrayaccess.c.
- *
- *  However, for efficiency we use the inline macros for all accesses.
- *  Even though the 2 and 4 bit set* accessors are more complicated,
- *  they are about 10% faster than the function calls.
- *
- *  The 32 bit access is just a cast and ptr arithmetic.  We include
- *  it so that the input ptr can be void*.
- *
- *  At the end of this file is code for invoking the function calls
- *  instead of inlining.
- *
- *  The macro SET_DATA_BIT_VAL(pdata, n, val) is a bit slower than
- *      if (val == 0)
- *          CLEAR_DATA_BIT(pdata, n);
- *      else
- *          SET_DATA_BIT(pdata, n);
- *
- *  Some compilers complain when the SET macros are surrounded by
- *  parentheses, because parens require an evaluation and it is not
- *  defined for SET macros.  If SET_DATA_QBIT were defined as a
- *  compound macro, in analogy to l_setDataQbit(), it requires
- *  surrounding braces:
- * \code
- *     #define  SET_DATA_QBIT(pdata, n, val) \
- *        {l_uint32 *_TEMP_WORD_PTR_; \
- *         _TEMP_WORD_PTR_ = (l_uint32 *)(pdata) + ((n) >> 3); \
- *         *_TEMP_WORD_PTR_ &= ~(0xf0000000 >> (4 * ((n) & 7))); \
- *         *_TEMP_WORD_PTR_ |= (((val) & 15) << (28 - 4 * ((n) & 7)));}
- * \endcode
- *  but if used in an if/else
- * \code
- *      if (x)
- *         SET_DATA_QBIT(...);
- *      else
- *         ...
- * \endcode
- *  the compiler sees
- * \code
- *      if (x)
- *         {......};
- *      else
- *         ...
- * \endcode
- *  The semicolon comes after the brace and will not compile.
- *  This can be fixed in the call by either omitting the semicolon
- *  or requiring another set of braces around SET_DATA_QBIT(), but
- *  both these options break compatibility with current code, and
- *  require special attention by anyone using the macros.
- *
- *  There are (at least) two ways to fix this in the macro definitions,
- *  suggested by Dave Bryan.
- *  (1) Surround the braces in the macro above with
- *         do {....} while(0)
- *      Then the semicolon just terminates the expression.
- *  (2) Reduce the blocks to a single expression; e.g,
- *         *((l_uint32 *)(pdata) + ((n) >> 3)) = \
- *           *((l_uint32 *)(pdata) + ((n) >> 3)) \
- *           & ~(0xf0000000 >> (4 * ((n) & 7))) \
- *           | (((val) & 15) << (28 - 4 * ((n) & 7)))
- *      This appears to cause redundant computation, but the compiler
- *      should evaluate the common subexpression only once.
- *  All these methods have the same performance, giving about 300M
- *  SET_DATA_QBIT operations per second on a fast 64 bit system.
- *  Using the function calls instead of the macros results in about 250M
- *  SET_DATA_QBIT operations per second, a performance hit of nearly 20%.
- * 
- */ - -#define USE_INLINE_ACCESSORS 1 - -#if USE_INLINE_ACCESSORS - - /*=============================================================*/ - /* Faster: use in line accessors */ - /*=============================================================*/ - - /*--------------------------------------------------* - * 1 bit access * - *--------------------------------------------------*/ -/*! 1 bit access - get */ -#define GET_DATA_BIT(pdata, n) \ - ((*((const l_uint32 *)(pdata) + ((n) >> 5)) >> (31 - ((n) & 31))) & 1) - -/*! 1 bit access - set */ -#define SET_DATA_BIT(pdata, n) \ - *((l_uint32 *)(pdata) + ((n) >> 5)) |= (0x80000000 >> ((n) & 31)) - -/*! 1 bit access - clear */ -#define CLEAR_DATA_BIT(pdata, n) \ - *((l_uint32 *)(pdata) + ((n) >> 5)) &= ~(0x80000000 >> ((n) & 31)) - -/*! 1 bit access - set value (0 or 1) */ -#define SET_DATA_BIT_VAL(pdata, n, val) \ - *((l_uint32 *)(pdata) + ((n) >> 5)) = \ - ((*((l_uint32 *)(pdata) + ((n) >> 5)) \ - & (~(0x80000000 >> ((n) & 31)))) \ - | ((l_uint32)(val) << (31 - ((n) & 31)))) - - /*--------------------------------------------------* - * 2 bit access * - *--------------------------------------------------*/ -/*! 2 bit access - get */ -#define GET_DATA_DIBIT(pdata, n) \ - ((*((const l_uint32 *)(pdata) + ((n) >> 4)) >> (2 * (15 - ((n) & 15)))) & 3) - -/*! 2 bit access - set value (0 ... 3) */ -#define SET_DATA_DIBIT(pdata, n, val) \ - *((l_uint32 *)(pdata) + ((n) >> 4)) = \ - ((*((l_uint32 *)(pdata) + ((n) >> 4)) \ - & (~(0xc0000000 >> (2 * ((n) & 15))))) \ - | ((l_uint32)((val) & 3) << (30 - 2 * ((n) & 15)))) - -/*! 2 bit access - clear */ -#define CLEAR_DATA_DIBIT(pdata, n) \ - *((l_uint32 *)(pdata) + ((n) >> 4)) &= ~(0xc0000000 >> (2 * ((n) & 15))) - - - /*--------------------------------------------------* - * 4 bit access * - *--------------------------------------------------*/ -/*! 4 bit access - get */ -#define GET_DATA_QBIT(pdata, n) \ - ((*((const l_uint32 *)(pdata) + ((n) >> 3)) >> (4 * (7 - ((n) & 7)))) & 0xf) - -/*! 4 bit access - set value (0 ... 15) */ -#define SET_DATA_QBIT(pdata, n, val) \ - *((l_uint32 *)(pdata) + ((n) >> 3)) = \ - ((*((l_uint32 *)(pdata) + ((n) >> 3)) \ - & (~(0xf0000000 >> (4 * ((n) & 7))))) \ - | ((l_uint32)((val) & 15) << (28 - 4 * ((n) & 7)))) - -/*! 4 bit access - clear */ -#define CLEAR_DATA_QBIT(pdata, n) \ - *((l_uint32 *)(pdata) + ((n) >> 3)) &= ~(0xf0000000 >> (4 * ((n) & 7))) - - - /*--------------------------------------------------* - * 8 bit access * - *--------------------------------------------------*/ -#ifdef L_BIG_ENDIAN -/*! 8 bit access - get */ -#define GET_DATA_BYTE(pdata, n) \ - (*((const l_uint8 *)(pdata) + (n))) -#else /* L_LITTLE_ENDIAN */ -/*! 8 bit access - get */ -#define GET_DATA_BYTE(pdata, n) \ - (*(l_uint8 *)((l_uintptr_t)((const l_uint8 *)(pdata) + (n)) ^ 3)) -#endif /* L_BIG_ENDIAN */ - -#ifdef L_BIG_ENDIAN -/*! 8 bit access - set value (0 ... 255) */ -#define SET_DATA_BYTE(pdata, n, val) \ - *((l_uint8 *)(pdata) + (n)) = (val) -#else /* L_LITTLE_ENDIAN */ -/*! 8 bit access - set value (0 ... 255) */ -#define SET_DATA_BYTE(pdata, n, val) \ - *(l_uint8 *)((l_uintptr_t)((l_uint8 *)(pdata) + (n)) ^ 3) = (val) -#endif /* L_BIG_ENDIAN */ - - - /*--------------------------------------------------* - * 16 bit access * - *--------------------------------------------------*/ -#ifdef L_BIG_ENDIAN -/*! 16 bit access - get */ -#define GET_DATA_TWO_BYTES(pdata, n) \ - (*((const l_uint16 *)(pdata) + (n))) -#else /* L_LITTLE_ENDIAN */ -/*! 16 bit access - get */ -#define GET_DATA_TWO_BYTES(pdata, n) \ - (*(l_uint16 *)((l_uintptr_t)((const l_uint16 *)(pdata) + (n)) ^ 2)) -#endif /* L_BIG_ENDIAN */ - -#ifdef L_BIG_ENDIAN -/*! 16 bit access - set value (0 ... 65535) */ -#define SET_DATA_TWO_BYTES(pdata, n, val) \ - *((l_uint16 *)(pdata) + (n)) = (val) -#else /* L_LITTLE_ENDIAN */ -/*! 16 bit access - set value (0 ... 65535) */ -#define SET_DATA_TWO_BYTES(pdata, n, val) \ - *(l_uint16 *)((l_uintptr_t)((l_uint16 *)(pdata) + (n)) ^ 2) = (val) -#endif /* L_BIG_ENDIAN */ - - - /*--------------------------------------------------* - * 32 bit access * - *--------------------------------------------------*/ -/*! 32 bit access - get */ -#define GET_DATA_FOUR_BYTES(pdata, n) \ - (*((const l_uint32 *)(pdata) + (n))) - -/*! 32 bit access - set (0 ... 4294967295) */ -#define SET_DATA_FOUR_BYTES(pdata, n, val) \ - *((l_uint32 *)(pdata) + (n)) = (val) - - -#else - - /*=============================================================*/ - /* Slower: use function calls for all accessors */ - /*=============================================================*/ - -#define GET_DATA_BIT(pdata, n) l_getDataBit(pdata, n) -#define SET_DATA_BIT(pdata, n) l_setDataBit(pdata, n) -#define CLEAR_DATA_BIT(pdata, n) l_clearDataBit(pdata, n) -#define SET_DATA_BIT_VAL(pdata, n, val) l_setDataBitVal(pdata, n, val) - -#define GET_DATA_DIBIT(pdata, n) l_getDataDibit(pdata, n) -#define SET_DATA_DIBIT(pdata, n, val) l_setDataDibit(pdata, n, val) -#define CLEAR_DATA_DIBIT(pdata, n) l_clearDataDibit(pdata, n) - -#define GET_DATA_QBIT(pdata, n) l_getDataQbit(pdata, n) -#define SET_DATA_QBIT(pdata, n, val) l_setDataQbit(pdata, n, val) -#define CLEAR_DATA_QBIT(pdata, n) l_clearDataQbit(pdata, n) - -#define GET_DATA_BYTE(pdata, n) l_getDataByte(pdata, n) -#define SET_DATA_BYTE(pdata, n, val) l_setDataByte(pdata, n, val) - -#define GET_DATA_TWO_BYTES(pdata, n) l_getDataTwoBytes(pdata, n) -#define SET_DATA_TWO_BYTES(pdata, n, val) l_setDataTwoBytes(pdata, n, val) - -#define GET_DATA_FOUR_BYTES(pdata, n) l_getDataFourBytes(pdata, n) -#define SET_DATA_FOUR_BYTES(pdata, n, val) l_setDataFourBytes(pdata, n, val) - -#endif /* USE_INLINE_ACCESSORS */ - - -#endif /* LEPTONICA_ARRAY_ACCESS_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bardecode.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bardecode.c deleted file mode 100644 index 87956da0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bardecode.c +++ /dev/null @@ -1,1034 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file bardecode.c - *
- *
- *      Dispatcher
- *          char            *barcodeDispatchDecoder()
- *
- *      Format Determination
- *          static l_int32   barcodeFindFormat()
- *          l_int32          barcodeFormatIsSupported()
- *          static l_int32   barcodeVerifyFormat()
- *
- *      Decode 2 of 5
- *          static char     *barcodeDecode2of5()
- *
- *      Decode Interleaved 2 of 5
- *          static char     *barcodeDecodeI2of5()
- *
- *      Decode Code 93
- *          static char     *barcodeDecode93()
- *
- *      Decode Code 39
- *          static char     *barcodeDecode39()
- *
- *      Decode Codabar
- *          static char     *barcodeDecodeCodabar()
- *
- *      Decode UPC-A
- *          static char     *barcodeDecodeUpca()
- *
- *      Decode EAN 13
- *          static char     *barcodeDecodeEan13()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" -#include "readbarcode.h" - -static l_int32 barcodeFindFormat(char *barstr); -static l_int32 barcodeVerifyFormat(char *barstr, l_int32 format, - l_int32 *pvalid, l_int32 *preverse); -static char *barcodeDecode2of5(char *barstr, l_int32 debugflag); -static char *barcodeDecodeI2of5(char *barstr, l_int32 debugflag); -static char *barcodeDecode93(char *barstr, l_int32 debugflag); -static char *barcodeDecode39(char *barstr, l_int32 debugflag); -static char *barcodeDecodeCodabar(char *barstr, l_int32 debugflag); -static char *barcodeDecodeUpca(char *barstr, l_int32 debugflag); -static char *barcodeDecodeEan13(char *barstr, l_int32 first, l_int32 debugflag); - -#ifndef NO_CONSOLE_IO -#define DEBUG_CODES 0 -#endif /* ~NO_CONSOLE_IO */ - -/*------------------------------------------------------------------------* - * Decoding dispatcher * - *------------------------------------------------------------------------*/ -/*! - * \brief barcodeDispatchDecoder() - * - * \param[in] barstr string of integers in set {1,2,3,4} of bar widths - * \param[in] format L_BF_ANY, L_BF_CODEI2OF5, L_BF_CODE93, ... - * \param[in] debugflag use 1 to generate debug output - * \return data string of decoded barcode data, or NULL on error - */ -char * -barcodeDispatchDecoder(char *barstr, - l_int32 format, - l_int32 debugflag) -{ -char *data = NULL; - - PROCNAME("barcodeDispatchDecoder"); - - if (!barstr) - return (char *)ERROR_PTR("barstr not defined", procName, NULL); - - debugflag = FALSE; /* not used yet */ - - if (format == L_BF_ANY) - format = barcodeFindFormat(barstr); - - if (format == L_BF_CODE2OF5) - data = barcodeDecode2of5(barstr, debugflag); - else if (format == L_BF_CODEI2OF5) - data = barcodeDecodeI2of5(barstr, debugflag); - else if (format == L_BF_CODE93) - data = barcodeDecode93(barstr, debugflag); - else if (format == L_BF_CODE39) - data = barcodeDecode39(barstr, debugflag); - else if (format == L_BF_CODABAR) - data = barcodeDecodeCodabar(barstr, debugflag); - else if (format == L_BF_UPCA) - data = barcodeDecodeUpca(barstr, debugflag); - else if (format == L_BF_EAN13) - data = barcodeDecodeEan13(barstr, 0, debugflag); - else - return (char *)ERROR_PTR("format not implemented", procName, NULL); - - return data; -} - - -/*------------------------------------------------------------------------* - * Barcode format determination * - *------------------------------------------------------------------------*/ -/*! - * \brief barcodeFindFormat() - * - * \param[in] barstr of barcode widths, in set {1,2,3,4} - * \return format for barcode, or L_BF_UNKNOWN if not recognized - */ -static l_int32 -barcodeFindFormat(char *barstr) -{ -l_int32 i, format, valid; - - PROCNAME("barcodeFindFormat"); - - if (!barstr) - return ERROR_INT("barstr not defined", procName, L_BF_UNKNOWN); - - for (i = 0; i < NumSupportedBarcodeFormats; i++) { - format = SupportedBarcodeFormat[i]; - barcodeVerifyFormat(barstr, format, &valid, NULL); - if (valid) { - L_INFO("Barcode format: %s\n", procName, - SupportedBarcodeFormatName[i]); - return format; - } - } - return L_BF_UNKNOWN; -} - - -/*! - * \brief barcodeFormatIsSupported() - * - * \param[in] format - * \return 1 if format is one of those supported; 0 otherwise - * - */ -l_int32 -barcodeFormatIsSupported(l_int32 format) -{ -l_int32 i; - - for (i = 0; i < NumSupportedBarcodeFormats; i++) { - if (format == SupportedBarcodeFormat[i]) - return 1; - } - return 0; -} - - -/*! - * \brief barcodeVerifyFormat() - * - * \param[in] barstr of barcode widths, in set {1,2,3,4} - * \param[in] format L_BF_CODEI2OF5, L_BF_CODE93, ... - * \param[out] pvalid 0 if not valid, 1 and 2 if valid - * \param[out] preverse [optional] 1 if reversed; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If valid == 1, the barcode is of the given format in the
- *          forward order; if valid == 2, it is backwards.
- *      (2) If the barcode needs to be reversed to read it, and &reverse
- *          is provided, a 1 is put into %reverse.
- *      (3) Add to this as more formats are supported.
- * 
- */ -static l_int32 -barcodeVerifyFormat(char *barstr, - l_int32 format, - l_int32 *pvalid, - l_int32 *preverse) -{ -char *revbarstr; -l_int32 i, start, len, stop, mid; - - PROCNAME("barcodeVerifyFormat"); - - if (!pvalid) - return ERROR_INT("barstr not defined", procName, 1); - *pvalid = 0; - if (preverse) *preverse = 0; - if (!barstr) - return ERROR_INT("barstr not defined", procName, 1); - - switch (format) - { - case L_BF_CODE2OF5: - start = !strncmp(barstr, Code2of5[C25_START], 3); - len = strlen(barstr); - stop = !strncmp(&barstr[len - 5], Code2of5[C25_STOP], 5); - if (start && stop) { - *pvalid = 1; - } else { - revbarstr = stringReverse(barstr); - start = !strncmp(revbarstr, Code2of5[C25_START], 3); - stop = !strncmp(&revbarstr[len - 5], Code2of5[C25_STOP], 5); - LEPT_FREE(revbarstr); - if (start && stop) { - *pvalid = 1; - if (preverse) *preverse = 1; - } - } - break; - case L_BF_CODEI2OF5: - start = !strncmp(barstr, CodeI2of5[CI25_START], 4); - len = strlen(barstr); - stop = !strncmp(&barstr[len - 3], CodeI2of5[CI25_STOP], 3); - if (start && stop) { - *pvalid = 1; - } else { - revbarstr = stringReverse(barstr); - start = !strncmp(revbarstr, CodeI2of5[CI25_START], 4); - stop = !strncmp(&revbarstr[len - 3], CodeI2of5[CI25_STOP], 3); - LEPT_FREE(revbarstr); - if (start && stop) { - *pvalid = 1; - if (preverse) *preverse = 1; - } - } - break; - case L_BF_CODE93: - start = !strncmp(barstr, Code93[C93_START], 6); - len = strlen(barstr); - stop = !strncmp(&barstr[len - 7], Code93[C93_STOP], 6); - if (start && stop) { - *pvalid = 1; - } else { - revbarstr = stringReverse(barstr); - start = !strncmp(revbarstr, Code93[C93_START], 6); - stop = !strncmp(&revbarstr[len - 7], Code93[C93_STOP], 6); - LEPT_FREE(revbarstr); - if (start && stop) { - *pvalid = 1; - if (preverse) *preverse = 1; - } - } - break; - case L_BF_CODE39: - start = !strncmp(barstr, Code39[C39_START], 9); - len = strlen(barstr); - stop = !strncmp(&barstr[len - 9], Code39[C39_STOP], 9); - if (start && stop) { - *pvalid = 1; - } else { - revbarstr = stringReverse(barstr); - start = !strncmp(revbarstr, Code39[C39_START], 9); - stop = !strncmp(&revbarstr[len - 9], Code39[C39_STOP], 9); - LEPT_FREE(revbarstr); - if (start && stop) { - *pvalid = 1; - if (preverse) *preverse = 1; - } - } - break; - case L_BF_CODABAR: - start = stop = 0; - len = strlen(barstr); - for (i = 16; i <= 19; i++) /* any of these will do */ - start += !strncmp(barstr, Codabar[i], 7); - for (i = 16; i <= 19; i++) /* ditto */ - stop += !strncmp(&barstr[len - 7], Codabar[i], 7); - if (start && stop) { - *pvalid = 1; - } else { - start = stop = 0; - revbarstr = stringReverse(barstr); - for (i = 16; i <= 19; i++) - start += !strncmp(revbarstr, Codabar[i], 7); - for (i = 16; i <= 19; i++) - stop += !strncmp(&revbarstr[len - 7], Codabar[i], 7); - LEPT_FREE(revbarstr); - if (start && stop) { - *pvalid = 1; - if (preverse) *preverse = 1; - } - } - break; - case L_BF_UPCA: - case L_BF_EAN13: - len = strlen(barstr); - if (len == 59) { - start = !strncmp(barstr, Upca[UPCA_START], 3); - mid = !strncmp(&barstr[27], Upca[UPCA_MID], 5); - stop = !strncmp(&barstr[len - 3], Upca[UPCA_STOP], 3); - if (start && mid && stop) - *pvalid = 1; - } - break; - default: - return ERROR_INT("format not supported", procName, 1); - } - - return 0; -} - - -/*------------------------------------------------------------------------* - * Code 2 of 5 * - *------------------------------------------------------------------------*/ -/*! - * \brief barcodeDecode2of5() - * - * \param[in] barstr of widths, in set {1, 2} - * \param[in] debugflag - * \return data string of digits, or NULL if none found or on error - * - *
- * Notes:
- *      (1) Ref: http://en.wikipedia.org/wiki/Two-out-of-five_code (Note:
- *                 the codes given here are wrong!)
- *               http://morovia.com/education/symbology/code25.asp
- *      (2) This is a very low density encoding for the 10 digits.
- *          Each digit is encoded with 5 black bars, of which 2 are wide
- *          and 3 are narrow.  No information is carried in the spaces
- *          between the bars, which are all equal in width, represented by
- *          a "1" in our encoding.
- *      (3) The mapping from the sequence of five bar widths to the
- *          digit is identical to the mapping used by the interleaved
- *          2 of 5 code.  The start code is 21211, representing two
- *          wide bars and a narrow bar, and the interleaved "1" spaces
- *          are explicit.  The stop code is 21112.  For all codes
- *          (including start and stop), the trailing space "1" is
- *          implicit -- there is no reason to represent it in the
- *          Code2of5[] array.
- * 
- */ -static char * -barcodeDecode2of5(char *barstr, - l_int32 debugflag) -{ -char *data, *vbarstr; -char code[10]; -l_int32 valid, reverse, i, j, len, error, ndigits, start, found; - - PROCNAME("barcodeDecodeI2of5"); - - if (!barstr) - return (char *)ERROR_PTR("barstr not defined", procName, NULL); - - /* Verify format; reverse if necessary */ - barcodeVerifyFormat(barstr, L_BF_CODE2OF5, &valid, &reverse); - if (!valid) - return (char *)ERROR_PTR("barstr not in 2of5 format", procName, NULL); - if (reverse) - vbarstr = stringReverse(barstr); - else - vbarstr = stringNew(barstr); - - /* Verify size */ - len = strlen(vbarstr); - if ((len - 11) % 10 != 0) { - LEPT_FREE(vbarstr); - return (char *)ERROR_PTR("size not divisible by 10: invalid 2of5 code", - procName, NULL); - } - - error = FALSE; - ndigits = (len - 11) / 10; - data = (char *)LEPT_CALLOC(ndigits + 1, sizeof(char)); - memset(code, 0, 10); - for (i = 0; i < ndigits; i++) { - start = 6 + 10 * i; - for (j = 0; j < 9; j++) - code[j] = vbarstr[start + j]; - - if (debugflag) - lept_stderr("code: %s\n", code); - - found = FALSE; - for (j = 0; j < 10; j++) { - if (!strcmp(code, Code2of5[j])) { - data[i] = 0x30 + j; - found = TRUE; - break; - } - } - if (!found) error = TRUE; - } - LEPT_FREE(vbarstr); - - if (error) { - LEPT_FREE(data); - return (char *)ERROR_PTR("error in decoding", procName, NULL); - } - - return data; -} - - -/*------------------------------------------------------------------------* - * Interleaved Code 2 of 5 * - *------------------------------------------------------------------------*/ -/*! - * \brief barcodeDecodeI2of5() - * - * \param[in] barstr of widths, in set {1, 2} - * \param[in] debugflag - * \return data string of digits, or NULL if none found or on error - * - *
- * Notes:
- *      (1) Ref: http://en.wikipedia.org/wiki/Interleaved_2_of_5
- *      (2) This always encodes an even number of digits.
- *          The start code is 1111; the stop code is 211.
- * 
- */ -static char * -barcodeDecodeI2of5(char *barstr, - l_int32 debugflag) -{ -char *data, *vbarstr; -char code1[6], code2[6]; -l_int32 valid, reverse, i, j, len, error, npairs, start, found; - - PROCNAME("barcodeDecodeI2of5"); - - if (!barstr) - return (char *)ERROR_PTR("barstr not defined", procName, NULL); - - /* Verify format; reverse if necessary */ - barcodeVerifyFormat(barstr, L_BF_CODEI2OF5, &valid, &reverse); - if (!valid) - return (char *)ERROR_PTR("barstr not in i2of5 format", procName, NULL); - if (reverse) - vbarstr = stringReverse(barstr); - else - vbarstr = stringNew(barstr); - - /* Verify size */ - len = strlen(vbarstr); - if ((len - 7) % 10 != 0) { - LEPT_FREE(vbarstr); - return (char *)ERROR_PTR("size not divisible by 10: invalid I2of5 code", - procName, NULL); - } - - error = FALSE; - npairs = (len - 7) / 10; - data = (char *)LEPT_CALLOC(2 * npairs + 1, sizeof(char)); - memset(code1, 0, 6); - memset(code2, 0, 6); - for (i = 0; i < npairs; i++) { - start = 4 + 10 * i; - for (j = 0; j < 5; j++) { - code1[j] = vbarstr[start + 2 * j]; - code2[j] = vbarstr[start + 2 * j + 1]; - } - - if (debugflag) - lept_stderr("code1: %s, code2: %s\n", code1, code2); - - found = FALSE; - for (j = 0; j < 10; j++) { - if (!strcmp(code1, CodeI2of5[j])) { - data[2 * i] = 0x30 + j; - found = TRUE; - break; - } - } - if (!found) error = TRUE; - found = FALSE; - for (j = 0; j < 10; j++) { - if (!strcmp(code2, CodeI2of5[j])) { - data[2 * i + 1] = 0x30 + j; - found = TRUE; - break; - } - } - if (!found) error = TRUE; - } - LEPT_FREE(vbarstr); - - if (error) { - LEPT_FREE(data); - return (char *)ERROR_PTR("error in decoding", procName, NULL); - } - - return data; -} - - -/*------------------------------------------------------------------------* - * Code 93 * - *------------------------------------------------------------------------*/ -/*! - * \brief barcodeDecode93() - * - * \param[in] barstr of widths, in set {1, 2, 3, 4} - * \param[in] debugflag - * \return data string of digits, or NULL if none found or on error - * - *
- * Notes:
- *      (1) Ref:  http://en.wikipedia.org/wiki/Code93
- *                http://morovia.com/education/symbology/code93.asp
- *      (2) Each symbol has 3 black and 3 white bars.
- *          The start and stop codes are 111141; the stop code then is
- *          terminated with a final (1) bar.
- *      (3) The last two codes are check codes.  We are checking them
- *          for correctness, and issuing a warning on failure.  Should
- *          probably not return any data on failure.
- * 
- */ -static char * -barcodeDecode93(char *barstr, - l_int32 debugflag) -{ -const char *checkc, *checkk; -char *data, *vbarstr; -char code[7]; -l_int32 valid, reverse, i, j, len, error, nsymb, start, found, sum; -l_int32 *index; - - PROCNAME("barcodeDecode93"); - - if (!barstr) - return (char *)ERROR_PTR("barstr not defined", procName, NULL); - - /* Verify format; reverse if necessary */ - barcodeVerifyFormat(barstr, L_BF_CODE93, &valid, &reverse); - if (!valid) - return (char *)ERROR_PTR("barstr not in code93 format", procName, NULL); - if (reverse) - vbarstr = stringReverse(barstr); - else - vbarstr = stringNew(barstr); - - /* Verify size; skip the first 6 and last 7 bars. */ - len = strlen(vbarstr); - if ((len - 13) % 6 != 0) { - LEPT_FREE(vbarstr); - return (char *)ERROR_PTR("size not divisible by 6: invalid code 93", - procName, NULL); - } - - /* Decode the symbols */ - nsymb = (len - 13) / 6; - data = (char *)LEPT_CALLOC(nsymb + 1, sizeof(char)); - index = (l_int32 *)LEPT_CALLOC(nsymb, sizeof(l_int32)); - memset(code, 0, 7); - error = FALSE; - for (i = 0; i < nsymb; i++) { - start = 6 + 6 * i; - for (j = 0; j < 6; j++) - code[j] = vbarstr[start + j]; - - if (debugflag) - lept_stderr("code: %s\n", code); - - found = FALSE; - for (j = 0; j < C93_START; j++) { - if (!strcmp(code, Code93[j])) { - data[i] = Code93Val[j]; - index[i] = j; - found = TRUE; - break; - } - } - if (!found) error = TRUE; - } - LEPT_FREE(vbarstr); - - if (error) { - LEPT_FREE(index); - LEPT_FREE(data); - return (char *)ERROR_PTR("error in decoding", procName, NULL); - } - - /* Do check sums. For character "C", use only the - * actual data in computing the sum. For character "K", - * use the actual data plus the check character "C". */ - sum = 0; - for (i = 0; i < nsymb - 2; i++) /* skip the "C" and "K" */ - sum += ((i % 20) + 1) * index[nsymb - 3 - i]; - if (data[nsymb - 2] != Code93Val[sum % 47]) - L_WARNING("Error for check C\n", procName); - - if (debugflag) { - checkc = Code93[sum % 47]; - lept_stderr("checkc = %s\n", checkc); - } - - sum = 0; - for (i = 0; i < nsymb - 1; i++) /* skip the "K" */ - sum += ((i % 15) + 1) * index[nsymb - 2 - i]; - if (data[nsymb - 1] != Code93Val[sum % 47]) - L_WARNING("Error for check K\n", procName); - - if (debugflag) { - checkk = Code93[sum % 47]; - lept_stderr("checkk = %s\n", checkk); - } - - /* Remove the two check codes from the output */ - data[nsymb - 2] = '\0'; - - LEPT_FREE(index); - return data; -} - - -/*------------------------------------------------------------------------* - * Code 39 * - *------------------------------------------------------------------------*/ -/*! - * \brief barcodeDecode39() - * - * \param[in] barstr of widths, in set {1, 2} - * \param[in] debugflag - * \return data string of digits, or NULL if none found or on error - * - *
- * Notes:
- *      (1) Ref:  http://en.wikipedia.org/wiki/Code39
- *                http://morovia.com/education/symbology/code39.asp
- *      (2) Each symbol has 5 black and 4 white bars.
- *          The start and stop codes are 121121211 (the asterisk)
- *      (3) This decoder was contributed by Roger Hyde.
- * 
- */ -static char * -barcodeDecode39(char *barstr, - l_int32 debugflag) -{ -char *data, *vbarstr; -char code[10]; -l_int32 valid, reverse, i, j, len, error, nsymb, start, found; - - PROCNAME("barcodeDecode39"); - - if (!barstr) - return (char *)ERROR_PTR("barstr not defined", procName, NULL); - - /* Verify format; reverse if necessary */ - barcodeVerifyFormat(barstr, L_BF_CODE39, &valid, &reverse); - if (!valid) - return (char *)ERROR_PTR("barstr not in code39 format", procName, NULL); - if (reverse) - vbarstr = stringReverse(barstr); - else - vbarstr = stringNew(barstr); - - /* Verify size */ - len = strlen(vbarstr); - if ((len + 1) % 10 != 0) { - LEPT_FREE(vbarstr); - return (char *)ERROR_PTR("size+1 not divisible by 10: invalid code 39", - procName, NULL); - } - - /* Decode the symbols */ - nsymb = (len - 19) / 10; - data = (char *)LEPT_CALLOC(nsymb + 1, sizeof(char)); - memset(code, 0, 10); - error = FALSE; - for (i = 0; i < nsymb; i++) { - start = 10 + 10 * i; - for (j = 0; j < 9; j++) - code[j] = vbarstr[start + j]; - - if (debugflag) - lept_stderr("code: %s\n", code); - - found = FALSE; - for (j = 0; j < C39_START; j++) { - if (!strcmp(code, Code39[j])) { - data[i] = Code39Val[j]; - found = TRUE; - break; - } - } - if (!found) error = TRUE; - } - LEPT_FREE(vbarstr); - - if (error) { - LEPT_FREE(data); - return (char *)ERROR_PTR("error in decoding", procName, NULL); - } - - return data; -} - - -/*------------------------------------------------------------------------* - * Codabar * - *------------------------------------------------------------------------*/ -/*! - * \brief barcodeDecodeCodabar() - * - * \param[in] barstr of widths, in set {1, 2} - * \param[in] debugflag - * \return data string of digits, or NULL if none found or on error - * - *
- * Notes:
- *      (1) Ref:  http://en.wikipedia.org/wiki/Codabar
- *                http://morovia.com/education/symbology/codabar.asp
- *      (2) Each symbol has 4 black and 3 white bars.  They represent the
- *          10 digits, and optionally 6 other characters.  The start and
- *          stop codes can be any of four (typically denoted A,B,C,D).
- * 
- */ -static char * -barcodeDecodeCodabar(char *barstr, - l_int32 debugflag) -{ -char *data, *vbarstr; -char code[8]; -l_int32 valid, reverse, i, j, len, error, nsymb, start, found; - - PROCNAME("barcodeDecodeCodabar"); - - if (!barstr) - return (char *)ERROR_PTR("barstr not defined", procName, NULL); - - /* Verify format; reverse if necessary */ - barcodeVerifyFormat(barstr, L_BF_CODABAR, &valid, &reverse); - if (!valid) - return (char *)ERROR_PTR("barstr not in codabar format", - procName, NULL); - if (reverse) - vbarstr = stringReverse(barstr); - else - vbarstr = stringNew(barstr); - - /* Verify size */ - len = strlen(vbarstr); - if ((len + 1) % 8 != 0) { - LEPT_FREE(vbarstr); - return (char *)ERROR_PTR("size+1 not divisible by 8: invalid codabar", - procName, NULL); - } - - /* Decode the symbols */ - nsymb = (len - 15) / 8; - data = (char *)LEPT_CALLOC(nsymb + 1, sizeof(char)); - memset(code, 0, 8); - error = FALSE; - for (i = 0; i < nsymb; i++) { - start = 8 + 8 * i; - for (j = 0; j < 7; j++) - code[j] = vbarstr[start + j]; - - if (debugflag) - lept_stderr("code: %s\n", code); - - found = FALSE; - for (j = 0; j < 16; j++) { - if (!strcmp(code, Codabar[j])) { - data[i] = CodabarVal[j]; - found = TRUE; - break; - } - } - if (!found) error = TRUE; - } - LEPT_FREE(vbarstr); - - if (error) { - LEPT_FREE(data); - return (char *)ERROR_PTR("error in decoding", procName, NULL); - } - - return data; -} - - -/*------------------------------------------------------------------------* - * Code UPC-A * - *------------------------------------------------------------------------*/ -/*! - * \brief barcodeDecodeUpca() - * - * \param[in] barstr of widths, in set {1, 2, 3, 4} - * \param[in] debugflag - * \return data string of digits, or NULL if none found or on error - * - *
- * Notes:
- *      (1) Ref:  http://en.wikipedia.org/wiki/UniversalProductCode
- *                http://morovia.com/education/symbology/upc-a.asp
- *      (2) Each symbol has 2 black and 2 white bars, and encodes a digit.
- *          The start and stop codes are 111 and 111.  There are a total of
- *          30 black bars, encoding 12 digits in two sets of 6, with
- *          2 black bars separating the sets.
- *      (3) The last digit is a check digit.  We check for correctness, and
- *          issue a warning on failure.  Should probably not return any
- *          data on failure.
- * 
- */ -static char * -barcodeDecodeUpca(char *barstr, - l_int32 debugflag) -{ -char *data, *vbarstr; -char code[5]; -l_int32 valid, i, j, len, error, start, found, sum, checkdigit; - - PROCNAME("barcodeDecodeUpca"); - - if (!barstr) - return (char *)ERROR_PTR("barstr not defined", procName, NULL); - - /* Verify format; reverse has no meaning here -- we must test both */ - barcodeVerifyFormat(barstr, L_BF_UPCA, &valid, NULL); - if (!valid) - return (char *)ERROR_PTR("barstr not in UPC-A format", procName, NULL); - - /* Verify size */ - len = strlen(barstr); - if (len != 59) - return (char *)ERROR_PTR("size not 59; invalid UPC-A barcode", - procName, NULL); - - /* Check the first digit. If invalid, reverse the string. */ - memset(code, 0, 5); - for (i = 0; i < 4; i++) - code[i] = barstr[i + 3]; - found = FALSE; - for (i = 0; i < 10; i++) { - if (!strcmp(code, Upca[i])) { - found = TRUE; - break; - } - } - if (found == FALSE) - vbarstr = stringReverse(barstr); - else - vbarstr = stringNew(barstr); - - /* Decode the 12 symbols */ - data = (char *)LEPT_CALLOC(13, sizeof(char)); - memset(code, 0, 5); - error = FALSE; - for (i = 0; i < 12; i++) { - if (i < 6) - start = 3 + 4 * i; - else - start = 32 + 4 * (i - 6); - for (j = 0; j < 4; j++) - code[j] = vbarstr[start + j]; - - if (debugflag) - lept_stderr("code: %s\n", code); - - found = FALSE; - for (j = 0; j < 10; j++) { - if (!strcmp(code, Upca[j])) { - data[i] = 0x30 + j; - found = TRUE; - break; - } - } - if (!found) error = TRUE; - } - LEPT_FREE(vbarstr); - - if (error) { - LEPT_FREE(data); - return (char *)ERROR_PTR("error in decoding", procName, NULL); - } - - /* Calculate the check digit (data[11]). */ - sum = 0; - for (i = 0; i < 12; i += 2) /* "even" digits */ - sum += 3 * (data[i] - 0x30); - for (i = 1; i < 11; i += 2) /* "odd" digits */ - sum += (data[i] - 0x30); - checkdigit = sum % 10; - if (checkdigit) /* not 0 */ - checkdigit = 10 - checkdigit; - if (checkdigit + 0x30 != data[11]) - L_WARNING("Error for UPC-A check character\n", procName); - - return data; -} - - -/*------------------------------------------------------------------------* - * Code EAN-13 * - *------------------------------------------------------------------------*/ -/*! - * \brief barcodeDecodeEan13() - * - * \param[in] barstr of widths, in set {1, 2, 3, 4} - * \param[in] first first digit: 0 - 9 - * \param[in] debugflag - * \return data string of digits, or NULL if none found or on error - * - *
- * Notes:
- *      (1) Ref:  http://en.wikipedia.org/wiki/UniversalProductCode
- *                http://morovia.com/education/symbology/ean-13.asp
- *      (2) The encoding is essentially the same as UPC-A, except
- *          there are 13 digits in total, of which 12 are encoded
- *          by bars (as with UPC-A) and the 13th is a leading digit
- *          that determines the encoding of the next 6 digits,
- *          selecting each digit from one of two tables.
- *          encoded in the bars (as with UPC-A).  If the first digit
- *          is 0, the encoding is identical to UPC-A.
- *      (3) As with UPC-A, the last digit is a check digit.
- *      (4) For now, we assume the first digit is input to this function.
- *          Eventually, we will read it by pattern matching.
- *
- *    TODO: fix this for multiple tables, depending on the value of %first
- * 
- */ -static char * -barcodeDecodeEan13(char *barstr, - l_int32 first, - l_int32 debugflag) -{ -char *data, *vbarstr; -char code[5]; -l_int32 valid, i, j, len, error, start, found, sum, checkdigit; - - PROCNAME("barcodeDecodeEan13"); - - if (!barstr) - return (char *)ERROR_PTR("barstr not defined", procName, NULL); - - /* Verify format. You can't tell the orientation by the start - * and stop codes, but you can by the location of the digits. - * Use the UPCA verifier for EAN 13 -- it is identical. */ - barcodeVerifyFormat(barstr, L_BF_UPCA, &valid, NULL); - if (!valid) - return (char *)ERROR_PTR("barstr not in EAN 13 format", procName, NULL); - - /* Verify size */ - len = strlen(barstr); - if (len != 59) - return (char *)ERROR_PTR("size not 59; invalid EAN 13 barcode", - procName, NULL); - - /* Check the first digit. If invalid, reverse the string. */ - memset(code, 0, 5); - for (i = 0; i < 4; i++) - code[i] = barstr[i + 3]; - found = FALSE; - for (i = 0; i < 10; i++) { - if (!strcmp(code, Upca[i])) { - found = TRUE; - break; - } - } - if (found == FALSE) - vbarstr = stringReverse(barstr); - else - vbarstr = stringNew(barstr); - - /* Decode the 12 symbols */ - data = (char *)LEPT_CALLOC(13, sizeof(char)); - memset(code, 0, 5); - error = FALSE; - for (i = 0; i < 12; i++) { - if (i < 6) - start = 3 + 4 * i; - else - start = 32 + 4 * (i - 6); - for (j = 0; j < 4; j++) - code[j] = vbarstr[start + j]; - - if (debugflag) - lept_stderr("code: %s\n", code); - - found = FALSE; - for (j = 0; j < 10; j++) { - if (!strcmp(code, Upca[j])) { - data[i] = 0x30 + j; - found = TRUE; - break; - } - } - if (!found) error = TRUE; - } - LEPT_FREE(vbarstr); - - if (error) { - LEPT_FREE(data); - return (char *)ERROR_PTR("error in decoding", procName, NULL); - } - - /* Calculate the check digit (data[11]). */ - sum = 0; - for (i = 0; i < 12; i += 2) /* "even" digits */ - sum += 3 * (data[i] - 0x30); - for (i = 1; i < 12; i += 2) /* "odd" digits */ - sum += (data[i] - 0x30); - checkdigit = sum % 10; - if (checkdigit) /* not 0 */ - checkdigit = 10 - checkdigit; - if (checkdigit + 0x30 != data[11]) - L_WARNING("Error for EAN-13 check character\n", procName); - - return data; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/baseline.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/baseline.c deleted file mode 100644 index 94607961..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/baseline.c +++ /dev/null @@ -1,600 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file baseline.c - *
- *
- *      Locate text baselines in an image
- *           NUMA     *pixFindBaselines()
- *
- *      Projective transform to remove local skew
- *           PIX      *pixDeskewLocal()
- *
- *      Determine local skew
- *           l_int32   pixGetLocalSkewTransform()
- *           NUMA     *pixGetLocalSkewAngles()
- *
- *  We have two apparently different functions here:
- *    ~ finding baselines
- *    ~ finding a projective transform to remove keystone warping
- *  The function pixGetLocalSkewAngles() returns an array of angles,
- *  one for each raster line, and the baselines of the text lines
- *  should intersect the left edge of the image with that angle.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Min to travel after finding max before abandoning peak */ -static const l_int32 MinDistInPeak = 35; - - /* Thresholds for peaks and zeros, relative to the max peak */ -static const l_int32 PeakThresholdRatio = 20; -static const l_int32 ZeroThresholdRatio = 100; - - /* Default values for determining local skew */ -static const l_int32 DefaultSlices = 10; -static const l_int32 DefaultSweepReduction = 2; -static const l_int32 DefaultBsReduction = 1; -static const l_float32 DefaultSweepRange = 5.; /* degrees */ -static const l_float32 DefaultSweepDelta = 1.; /* degrees */ -static const l_float32 DefaultMinbsDelta = 0.01; /* degrees */ - - /* Overlap slice fraction added to top and bottom of each slice */ -static const l_float32 OverlapFraction = 0.5; - - /* Minimum allowed confidence (ratio) for accepting a value */ -static const l_float32 MinAllowedConfidence = 3.0; - - -/*---------------------------------------------------------------------* - * Locate text baselines in an image * - *---------------------------------------------------------------------*/ -/*! - * \brief pixFindBaselines() - * - * \param[in] pixs 1 bpp, 300 ppi - * \param[out] ppta [optional] pairs of pts corresponding to - * approx. ends of each text line - * \param[in] pixadb for debug output; use NULL to skip - * \return na of baseline y values, or NULL on error - * - *
- * Notes:
- *      (1) Input binary image must have text lines already aligned
- *          horizontally.  This can be done by either rotating the
- *          image with pixDeskew(), or, if a projective transform
- *          is required, by doing pixDeskewLocal() first.
- *      (2) Input null for &pta if you don't want this returned.
- *          The pta will come in pairs of points (left and right end
- *          of each baseline).
- *      (3) Caution: this will not work properly on text with multiple
- *          columns, where the lines are not aligned between columns.
- *          If there are multiple columns, they should be extracted
- *          separately before finding the baselines.
- *      (4) This function constructs different types of output
- *          for baselines; namely, a set of raster line values and
- *          a set of end points of each baseline.
- *      (5) This function was designed to handle short and long text lines
- *          without using dangerous thresholds on the peak heights.  It does
- *          this by combining the differential signal with a morphological
- *          analysis of the locations of the text lines.  One can also
- *          combine this data to normalize the peak heights, by weighting
- *          the differential signal in the region of each baseline
- *          by the inverse of the width of the text line found there.
- * 
- */ -NUMA * -pixFindBaselines(PIX *pixs, - PTA **ppta, - PIXA *pixadb) -{ -l_int32 h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh; -l_int32 imaxloc, peakthresh, zerothresh, inpeak; -l_int32 mintosearch, max, maxloc, nloc, locval; -l_int32 *array; -l_float32 maxval; -BOXA *boxa1, *boxa2, *boxa3; -GPLOT *gplot; -NUMA *nasum, *nadiff, *naloc, *naval; -PIX *pix1, *pix2; -PTA *pta; - - PROCNAME("pixFindBaselines"); - - if (ppta) *ppta = NULL; - if (!pixs || pixGetDepth(pixs) != 1) - return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - - /* Close up the text characters, removing noise */ - pix1 = pixMorphSequence(pixs, "c25.1 + e15.1", 0); - - /* Estimate the resolution */ - if (pixadb) pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT); - - /* Save the difference of adjacent row sums. - * The high positive-going peaks are the baselines */ - if ((nasum = pixCountPixelsByRow(pix1, NULL)) == NULL) { - pixDestroy(&pix1); - return (NUMA *)ERROR_PTR("nasum not made", procName, NULL); - } - h = pixGetHeight(pixs); - nadiff = numaCreate(h); - numaGetIValue(nasum, 0, &val2); - for (i = 0; i < h - 1; i++) { - val1 = val2; - numaGetIValue(nasum, i + 1, &val2); - numaAddNumber(nadiff, val1 - val2); - } - numaDestroy(&nasum); - - if (pixadb) { /* show the difference signal */ - lept_mkdir("lept/baseline"); - gplotSimple1(nadiff, GPLOT_PNG, "/tmp/lept/baseline/diff", "Diff Sig"); - pix2 = pixRead("/tmp/lept/baseline/diff.png"); - pixaAddPix(pixadb, pix2, L_INSERT); - } - - /* Use the zeroes of the profile to locate each baseline. */ - array = numaGetIArray(nadiff); - ndiff = numaGetCount(nadiff); - numaGetMax(nadiff, &maxval, &imaxloc); - numaDestroy(&nadiff); - - /* Use this to begin locating a new peak: */ - peakthresh = (l_int32)maxval / PeakThresholdRatio; - /* Use this to begin a region between peaks: */ - zerothresh = (l_int32)maxval / ZeroThresholdRatio; - - naloc = numaCreate(0); - naval = numaCreate(0); - inpeak = FALSE; - for (i = 0; i < ndiff; i++) { - if (inpeak == FALSE) { - if (array[i] > peakthresh) { /* transition to in-peak */ - inpeak = TRUE; - mintosearch = i + MinDistInPeak; /* accept no zeros - * between i and mintosearch */ - max = array[i]; - maxloc = i; - } - } else { /* inpeak == TRUE; look for max */ - if (array[i] > max) { - max = array[i]; - maxloc = i; - mintosearch = i + MinDistInPeak; - } else if (i > mintosearch && array[i] <= zerothresh) { /* leave */ - inpeak = FALSE; - numaAddNumber(naval, max); - numaAddNumber(naloc, maxloc); - } - } - } - LEPT_FREE(array); - - /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */ - if (inpeak) { - numaAddNumber(naval, max); - numaAddNumber(naloc, maxloc); - } - - if (pixadb) { /* show the raster locations for the peaks */ - gplot = gplotCreate("/tmp/lept/baseline/loc", GPLOT_PNG, "Peak locs", - "rasterline", "height"); - gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs"); - gplotMakeOutput(gplot); - gplotDestroy(&gplot); - pix2 = pixRead("/tmp/lept/baseline/loc.png"); - pixaAddPix(pixadb, pix2, L_INSERT); - } - numaDestroy(&naval); - - /* Generate an approximate profile of text line width. - * First, filter the boxes of text, where there may be - * more than one box for a given textline. */ - pix2 = pixMorphSequence(pix1, "r11 + c20.1 + o30.1 +c1.3", 0); - if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); - boxa1 = pixConnComp(pix2, NULL, 4); - pixDestroy(&pix1); - pixDestroy(&pix2); - if (boxaGetCount(boxa1) == 0) { - numaDestroy(&naloc); - boxaDestroy(&boxa1); - L_INFO("no compnents after filtering\n", procName); - return NULL; - } - boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.); - boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL); - boxaDestroy(&boxa1); - boxaDestroy(&boxa2); - - /* Optionally, find the baseline segments */ - pta = NULL; - if (ppta) { - pta = ptaCreate(0); - *ppta = pta; - } - if (pta) { - nloc = numaGetCount(naloc); - nbox = boxaGetCount(boxa3); - for (i = 0; i < nbox; i++) { - boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh); - for (j = 0; j < nloc; j++) { - numaGetIValue(naloc, j, &locval); - if (L_ABS(locval - (by + bh)) > 25) - continue; - ptaAddPt(pta, bx, locval); - ptaAddPt(pta, bx + bw, locval); - break; - } - } - } - boxaDestroy(&boxa3); - - if (pixadb && pta) { /* display baselines */ - l_int32 npts, x1, y1, x2, y2; - pix1 = pixConvertTo32(pixs); - npts = ptaGetCount(pta); - for (i = 0; i < npts; i += 2) { - ptaGetIPt(pta, i, &x1, &y1); - ptaGetIPt(pta, i + 1, &x2, &y2); - pixRenderLineArb(pix1, x1, y1, x2, y2, 2, 255, 0, 0); - } - pixWriteDebug("/tmp/lept/baseline/baselines.png", pix1, IFF_PNG); - pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT); - pixDestroy(&pix1); - } - - return naloc; -} - - -/*---------------------------------------------------------------------* - * Projective transform to remove local skew * - *---------------------------------------------------------------------*/ -/*! - * \brief pixDeskewLocal() - * - * \param[in] pixs 1 bpp - * \param[in] nslices the number of horizontal overlapping slices; - * must be larger than 1 and not exceed 20; - * use 0 for default - * \param[in] redsweep sweep reduction factor: 1, 2, 4 or 8; - * use 0 for default value - * \param[in] redsearch search reduction factor: 1, 2, 4 or 8, and - * not larger than redsweep; use 0 for default value - * \param[in] sweeprange half the full range, assumed about 0; in degrees; - * use 0.0 for default value - * \param[in] sweepdelta angle increment of sweep; in degrees; - * use 0.0 for default value - * \param[in] minbsdelta min binary search increment angle; in degrees; - * use 0.0 for default value - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This function allows deskew of a page whose skew changes
- *          approximately linearly with vertical position.  It uses
- *          a projective transform that in effect does a differential
- *          shear about the LHS of the page, and makes all text lines
- *          horizontal.
- *      (2) The origin of the keystoning can be either a cheap document
- *          feeder that rotates the page as it is passed through, or a
- *          camera image taken from either the left or right side
- *          of the vertical.
- *      (3) The image transformation is a projective warping,
- *          not a rotation.  Apart from this function, the text lines
- *          must be properly aligned vertically with respect to each
- *          other.  This can be done by pre-processing the page; e.g.,
- *          by rotating or horizontally shearing it.
- *          Typically, this can be achieved by vertically aligning
- *          the page edge.
- * 
- */ -PIX * -pixDeskewLocal(PIX *pixs, - l_int32 nslices, - l_int32 redsweep, - l_int32 redsearch, - l_float32 sweeprange, - l_float32 sweepdelta, - l_float32 minbsdelta) -{ -l_int32 ret; -PIX *pixd; -PTA *ptas, *ptad; - - PROCNAME("pixDeskewLocal"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - - /* Skew array gives skew angle (deg) as fctn of raster line - * where it intersects the LHS of the image */ - ret = pixGetLocalSkewTransform(pixs, nslices, redsweep, redsearch, - sweeprange, sweepdelta, minbsdelta, - &ptas, &ptad); - if (ret != 0) - return (PIX *)ERROR_PTR("transform pts not found", procName, NULL); - - /* Use a projective transform */ - pixd = pixProjectiveSampledPta(pixs, ptad, ptas, L_BRING_IN_WHITE); - - ptaDestroy(&ptas); - ptaDestroy(&ptad); - return pixd; -} - - -/*---------------------------------------------------------------------* - * Determine the local skew * - *---------------------------------------------------------------------*/ -/*! - * \brief pixGetLocalSkewTransform() - * - * \param[in] pixs - * \param[in] nslices the number of horizontal overlapping slices; - * must be larger than 1 and not exceed 20; - * use 0 for default - * \param[in] redsweep sweep reduction factor: 1, 2, 4 or 8; - * use 0 for default value - * \param[in] redsearch search reduction factor: 1, 2, 4 or 8, and not - * larger than redsweep; use 0 for default value - * \param[in] sweeprange half the full range, assumed about 0; - * in degrees; use 0.0 for default value - * \param[in] sweepdelta angle increment of sweep; in degrees; - * use 0.0 for default value - * \param[in] minbsdelta min binary search increment angle; in degrees; - * use 0.0 for default value - * \param[out] pptas 4 points in the source - * \param[out] pptad the corresponding 4 pts in the dest - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates two pairs of points in the src, each pair
- *          corresponding to a pair of points that would lie along
- *          the same raster line in a transformed (dewarped) image.
- *      (2) The sets of 4 src and 4 dest points returned by this function
- *          can then be used, in a projective or bilinear transform,
- *          to remove keystoning in the src.
- * 
- */ -l_ok -pixGetLocalSkewTransform(PIX *pixs, - l_int32 nslices, - l_int32 redsweep, - l_int32 redsearch, - l_float32 sweeprange, - l_float32 sweepdelta, - l_float32 minbsdelta, - PTA **pptas, - PTA **pptad) -{ -l_int32 w, h, i; -l_float32 deg2rad, angr, angd, dely; -NUMA *naskew; -PTA *ptas, *ptad; - - PROCNAME("pixGetLocalSkewTransform"); - - if (!pptas || !pptad) - return ERROR_INT("&ptas and &ptad not defined", procName, 1); - *pptas = *pptad = NULL; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (nslices < 2 || nslices > 20) - nslices = DefaultSlices; - if (redsweep < 1 || redsweep > 8) - redsweep = DefaultSweepReduction; - if (redsearch < 1 || redsearch > redsweep) - redsearch = DefaultBsReduction; - if (sweeprange == 0.0) - sweeprange = DefaultSweepRange; - if (sweepdelta == 0.0) - sweepdelta = DefaultSweepDelta; - if (minbsdelta == 0.0) - minbsdelta = DefaultMinbsDelta; - - naskew = pixGetLocalSkewAngles(pixs, nslices, redsweep, redsearch, - sweeprange, sweepdelta, minbsdelta, - NULL, NULL, 0); - if (!naskew) - return ERROR_INT("naskew not made", procName, 1); - - deg2rad = 3.14159265 / 180.; - w = pixGetWidth(pixs); - h = pixGetHeight(pixs); - ptas = ptaCreate(4); - ptad = ptaCreate(4); - *pptas = ptas; - *pptad = ptad; - - /* Find i for skew line that intersects LHS at i and RHS at h / 20 */ - for (i = 0; i < h; i++) { - numaGetFValue(naskew, i, &angd); - angr = angd * deg2rad; - dely = w * tan(angr); - if (i - dely > 0.05 * h) - break; - } - ptaAddPt(ptas, 0, i); - ptaAddPt(ptas, w - 1, i - dely); - ptaAddPt(ptad, 0, i); - ptaAddPt(ptad, w - 1, i); - - /* Find i for skew line that intersects LHS at i and RHS at 19h / 20 */ - for (i = h - 1; i > 0; i--) { - numaGetFValue(naskew, i, &angd); - angr = angd * deg2rad; - dely = w * tan(angr); - if (i - dely < 0.95 * h) - break; - } - ptaAddPt(ptas, 0, i); - ptaAddPt(ptas, w - 1, i - dely); - ptaAddPt(ptad, 0, i); - ptaAddPt(ptad, w - 1, i); - - numaDestroy(&naskew); - return 0; -} - - -/*! - * \brief pixGetLocalSkewAngles() - * - * \param[in] pixs 1 bpp - * \param[in] nslices the number of horizontal overlapping slices; - * must be larger than 1 and not exceed 20; - * use 0 for default - * \param[in] redsweep sweep reduction factor: 1, 2, 4 or 8; - * use 0 for default value - * \param[in] redsearch search reduction factor: 1, 2, 4 or 8, and not - * larger than redsweep; use 0 for default value - * \param[in] sweeprange half the full range, assumed about 0; - * in degrees; use 0.0 for default value - * \param[in] sweepdelta angle increment of sweep; in degrees; - * use 0.0 for default value - * \param[in] minbsdelta min binary search increment angle; in degrees; - * use 0.0 for default value - * \param[out] pa [optional] slope of skew as fctn of y - * \param[out] pb [optional] intercept at y = 0 of skew, - 8 as a function of y - * \param[in] debug 1 for generating plot of skew angle vs. y; - * 0 otherwise - * \return naskew, or NULL on error - * - *
- * Notes:
- *      (1) The local skew is measured in a set of overlapping strips.
- *          We then do a least square linear fit parameters to get
- *          the slope and intercept parameters a and b in
- *              skew-angle = a * y + b  (degrees)
- *          for the local skew as a function of raster line y.
- *          This is then used to make naskew, which can be interpreted
- *          as the computed skew angle (in degrees) at the left edge
- *          of each raster line.
- *      (2) naskew can then be used to find the baselines of text, because
- *          each text line has a baseline that should intersect
- *          the left edge of the image with the angle given by this
- *          array, evaluated at the raster line of intersection.
- * 
- */ -NUMA * -pixGetLocalSkewAngles(PIX *pixs, - l_int32 nslices, - l_int32 redsweep, - l_int32 redsearch, - l_float32 sweeprange, - l_float32 sweepdelta, - l_float32 minbsdelta, - l_float32 *pa, - l_float32 *pb, - l_int32 debug) -{ -l_int32 w, h, hs, i, ystart, yend, ovlap, npts; -l_float32 angle, conf, ycenter, a, b; -BOX *box; -GPLOT *gplot; -NUMA *naskew, *nax, *nay; -PIX *pix; -PTA *pta; - - PROCNAME("pixGetLocalSkewAngles"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (nslices < 2 || nslices > 20) - nslices = DefaultSlices; - if (redsweep < 1 || redsweep > 8) - redsweep = DefaultSweepReduction; - if (redsearch < 1 || redsearch > redsweep) - redsearch = DefaultBsReduction; - if (sweeprange == 0.0) - sweeprange = DefaultSweepRange; - if (sweepdelta == 0.0) - sweepdelta = DefaultSweepDelta; - if (minbsdelta == 0.0) - minbsdelta = DefaultMinbsDelta; - - pixGetDimensions(pixs, &w, &h, NULL); - hs = h / nslices; - ovlap = (l_int32)(OverlapFraction * hs); - pta = ptaCreate(nslices); - for (i = 0; i < nslices; i++) { - ystart = L_MAX(0, hs * i - ovlap); - yend = L_MIN(h - 1, hs * (i + 1) + ovlap); - ycenter = (l_float32)(ystart + yend) / 2; - box = boxCreate(0, ystart, w, yend - ystart + 1); - pix = pixClipRectangle(pixs, box, NULL); - pixFindSkewSweepAndSearch(pix, &angle, &conf, redsweep, redsearch, - sweeprange, sweepdelta, minbsdelta); - if (conf > MinAllowedConfidence) - ptaAddPt(pta, ycenter, angle); - pixDestroy(&pix); - boxDestroy(&box); - } - - /* Do linear least squares fit */ - if ((npts = ptaGetCount(pta)) < 2) { - ptaDestroy(&pta); - return (NUMA *)ERROR_PTR("can't fit skew", procName, NULL); - } - ptaGetLinearLSF(pta, &a, &b, NULL); - if (pa) *pa = a; - if (pb) *pb = b; - - /* Make skew angle array as function of raster line */ - naskew = numaCreate(h); - for (i = 0; i < h; i++) { - angle = a * i + b; - numaAddNumber(naskew, angle); - } - - if (debug) { - lept_mkdir("lept/baseline"); - ptaGetArrays(pta, &nax, &nay); - gplot = gplotCreate("/tmp/lept/baseline/skew", GPLOT_PNG, - "skew as fctn of y", "y (in raster lines from top)", - "angle (in degrees)"); - gplotAddPlot(gplot, NULL, naskew, GPLOT_POINTS, "linear lsf"); - gplotAddPlot(gplot, nax, nay, GPLOT_POINTS, "actual data pts"); - gplotMakeOutput(gplot); - gplotDestroy(&gplot); - numaDestroy(&nax); - numaDestroy(&nay); - } - - ptaDestroy(&pta); - return naskew; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bbuffer.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bbuffer.c deleted file mode 100644 index 4e0e3074..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bbuffer.c +++ /dev/null @@ -1,486 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file bbuffer.c - *
- *
- *      Create/Destroy BBuffer
- *          L_BBUFFER      *bbufferCreate()
- *          void           *bbufferDestroy()
- *          l_uint8        *bbufferDestroyAndSaveData()
- *
- *      Operations to read data TO a BBuffer
- *          l_int32         bbufferRead()
- *          l_int32         bbufferReadStream()
- *          l_int32         bbufferExtendArray()
- *
- *      Operations to write data FROM a BBuffer
- *          l_int32         bbufferWrite()
- *          l_int32         bbufferWriteStream()
- *
- *    The bbuffer is an implementation of a byte queue.
- *    The bbuffer holds a byte array from which bytes are
- *    processed in a first-in/first-out fashion.  As with
- *    any queue, bbuffer maintains two "pointers," one to the
- *    tail of the queue (where you read new bytes onto it)
- *    and one to the head of the queue (where you start from
- *    when writing bytes out of it.
- *
- *    The queue can be visualized:
- *
- * \code
- *  byte 0                                           byte (nalloc - 1)
- *       |                                                |
- *       --------------------------------------------------
- *                 H                             T
- *       [   aw   ][  bytes currently on queue  ][  anr   ]
- *
- *       ---:  all allocated data in bbuffer
- *       H:    queue head (ptr to next byte to be written out)
- *       T:    queue tail (ptr to first byte to be written to)
- *       aw:   already written from queue
- *       anr:  allocated but not yet read to
- * \endcode
- *    The purpose of bbuffer is to allow you to safely read
- *    bytes in, and to sequentially write them out as well.
- *    In the process of writing bytes out, you don't actually
- *    remove the bytes in the array; you just move the pointer
- *    (nwritten) which points to the head of the queue.  In
- *    the process of reading bytes in, you sometimes need to
- *    expand the array size.  If a read is performed after a
- *    write, so that the head of the queue is not at the
- *    beginning of the array, the bytes already written are
- *    first removed by copying the others over them; then the
- *    new bytes are read onto the tail of the queue.
- *
- *    Note that the meaning of "read into" and "write from"
- *    the bbuffer is OPPOSITE to that for a stream, where
- *    you read "from" a stream and write "into" a stream.
- *    As a mnemonic for remembering the direction:
- *        ~ to read bytes from a stream into the bbuffer,
- *          you call fread on the stream
- *        ~ to write bytes from the bbuffer into a stream,
- *          you call fwrite on the stream
- *
- *    See zlibmem.c for an example use of bbuffer, where we
- *    compress and decompress an array of bytes in memory.
- *
- *    We can also use the bbuffer trivially to read from stdin
- *    into memory; e.g., to capture bytes piped from the stdout
- *    of another program.  This is equivalent to repeatedly
- *    calling bbufferReadStream() until the input queue is empty.
- *    This is implemented in l_binaryReadStream().
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Bounds on array size */ -static const l_uint32 MaxArraySize = 1000000000; /* 10^9 bytes */ -static const l_int32 InitialArraySize = 1024; /*!< n'importe quoi */ - -/*--------------------------------------------------------------------------* - * BBuffer create/destroy * - *--------------------------------------------------------------------------*/ -/*! - * \brief bbufferCreate() - * - * \param[in] indata address in memory [optional] - * \param[in] nalloc size of byte array to be alloc'd 0 for default - * \return bbuffer, or NULL on error - * - *
- * Notes:
- *      (1) If a buffer address is given, you should read all the data in.
- *      (2) Allocates a bbuffer with associated byte array of
- *          the given size.  If a buffer address is given,
- *          it then reads the number of bytes into the byte array.
- * 
- */ -L_BBUFFER * -bbufferCreate(const l_uint8 *indata, - l_int32 nalloc) -{ -L_BBUFFER *bb; - - PROCNAME("bbufferCreate"); - - if (nalloc <= 0 || nalloc > MaxArraySize) - nalloc = InitialArraySize; - - bb = (L_BBUFFER *)LEPT_CALLOC(1, sizeof(L_BBUFFER)); - if ((bb->array = (l_uint8 *)LEPT_CALLOC(nalloc, sizeof(l_uint8))) == NULL) { - LEPT_FREE(bb); - return (L_BBUFFER *)ERROR_PTR("byte array not made", procName, NULL); - } - bb->nalloc = nalloc; - bb->nwritten = 0; - - if (indata) { - memcpy(bb->array, indata, nalloc); - bb->n = nalloc; - } else { - bb->n = 0; - } - - return bb; -} - - -/*! - * \brief bbufferDestroy() - * - * \param[in,out] pbb will be set to null before returning - * \return void - * - *
- * Notes:
- *      (1) Destroys the byte array in the bbuffer and then the bbuffer;
- *          then nulls the contents of the input ptr.
- * 
- */ -void -bbufferDestroy(L_BBUFFER **pbb) -{ -L_BBUFFER *bb; - - PROCNAME("bbufferDestroy"); - - if (pbb == NULL) { - L_WARNING("ptr address is NULL\n", procName); - return; - } - - if ((bb = *pbb) == NULL) - return; - - if (bb->array) - LEPT_FREE(bb->array); - LEPT_FREE(bb); - *pbb = NULL; - - return; -} - - -/*! - * \brief bbufferDestroyAndSaveData() - * - * \param[in,out] pbb input data buffer; will be nulled - * \param[out] pnbytes number of bytes saved in array - * \return barray newly allocated array of data - * - *
- * Notes:
- *      (1) Copies data to newly allocated array; then destroys the bbuffer.
- * 
- */ -l_uint8 * -bbufferDestroyAndSaveData(L_BBUFFER **pbb, - size_t *pnbytes) -{ -l_uint8 *array; -size_t nbytes; -L_BBUFFER *bb; - - PROCNAME("bbufferDestroyAndSaveData"); - - if (pbb == NULL) { - L_WARNING("ptr address is NULL\n", procName); - return NULL; - } - if (pnbytes == NULL) { - L_WARNING("&nbytes is NULL\n", procName); - bbufferDestroy(pbb); - return NULL; - } - - if ((bb = *pbb) == NULL) - return NULL; - - /* write all unwritten bytes out to a new array */ - nbytes = bb->n - bb->nwritten; - *pnbytes = nbytes; - if ((array = (l_uint8 *)LEPT_CALLOC(nbytes, sizeof(l_uint8))) == NULL) { - L_WARNING("calloc failure for array\n", procName); - return NULL; - } - memcpy(array, bb->array + bb->nwritten, nbytes); - - bbufferDestroy(pbb); - return array; -} - - -/*--------------------------------------------------------------------------* - * Operations to read data INTO a BBuffer * - *--------------------------------------------------------------------------*/ -/*! - * \brief bbufferRead() - * - * \param[in] bb bbuffer - * \param[in] src source memory buffer from which bytes are read - * \param[in] nbytes bytes to be read - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) For a read after write, first remove the written
- *          bytes by shifting the unwritten bytes in the array,
- *          then check if there is enough room to add the new bytes.
- *          If not, realloc with bbufferExpandArray(), resulting
- *          in a second writing of the unwritten bytes.  While less
- *          efficient, this is simpler than making a special case
- *          of reallocNew().
- * 
- */ -l_ok -bbufferRead(L_BBUFFER *bb, - l_uint8 *src, - l_int32 nbytes) -{ -l_int32 navail, nadd, nwritten; - - PROCNAME("bbufferRead"); - - if (!bb) - return ERROR_INT("bb not defined", procName, 1); - if (!src) - return ERROR_INT("src not defined", procName, 1); - if (nbytes == 0) - return ERROR_INT("no bytes to read", procName, 1); - - if ((nwritten = bb->nwritten)) { /* move the unwritten bytes over */ - memmove(bb->array, bb->array + nwritten, bb->n - nwritten); - bb->nwritten = 0; - bb->n -= nwritten; - } - - /* If necessary, expand the allocated array. Do so by - * by at least a factor of two. */ - navail = bb->nalloc - bb->n; - if (nbytes > navail) { - nadd = L_MAX(bb->nalloc, nbytes); - bbufferExtendArray(bb, nadd); - } - - /* Read in the new bytes */ - memcpy(bb->array + bb->n, src, nbytes); - bb->n += nbytes; - - return 0; -} - - -/*! - * \brief bbufferReadStream() - * - * \param[in] bb bbuffer - * \param[in] fp source stream from which bytes are read - * \param[in] nbytes bytes to be read - * \return 0 if OK, 1 on error - */ -l_ok -bbufferReadStream(L_BBUFFER *bb, - FILE *fp, - l_int32 nbytes) -{ -l_int32 navail, nadd, nread, nwritten; - - PROCNAME("bbufferReadStream"); - - if (!bb) - return ERROR_INT("bb not defined", procName, 1); - if (!fp) - return ERROR_INT("fp not defined", procName, 1); - if (nbytes == 0) - return ERROR_INT("no bytes to read", procName, 1); - - if ((nwritten = bb->nwritten)) { /* move any unwritten bytes over */ - memmove(bb->array, bb->array + nwritten, bb->n - nwritten); - bb->nwritten = 0; - bb->n -= nwritten; - } - - /* If necessary, expand the allocated array. Do so by - * by at least a factor of two. */ - navail = bb->nalloc - bb->n; - if (nbytes > navail) { - nadd = L_MAX(bb->nalloc, nbytes); - bbufferExtendArray(bb, nadd); - } - - /* Read in the new bytes */ - nread = fread(bb->array + bb->n, 1, nbytes, fp); - bb->n += nread; - - return 0; -} - - -/*! - * \brief bbufferExtendArray() - * - * \param[in] bb bbuffer - * \param[in] nbytes number of bytes to extend array size - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) reallocNew() copies all bb->nalloc bytes, even though
- *          only bb->n are data.
- * 
- */ -l_ok -bbufferExtendArray(L_BBUFFER *bb, - l_int32 nbytes) -{ - PROCNAME("bbufferExtendArray"); - - if (!bb) - return ERROR_INT("bb not defined", procName, 1); - - if ((bb->array = (l_uint8 *)reallocNew((void **)&bb->array, - bb->nalloc, - bb->nalloc + nbytes)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - - bb->nalloc += nbytes; - return 0; -} - - -/*--------------------------------------------------------------------------* - * Operations to write data FROM a BBuffer * - *--------------------------------------------------------------------------*/ -/*! - * \brief bbufferWrite() - * - * \param[in] bb bbuffer - * \param[in] dest dest memory buffer to which bytes are written - * \param[in] nbytes bytes requested to be written - * \param[out] pnout bytes actually written - * \return 0 if OK, 1 on error - */ -l_ok -bbufferWrite(L_BBUFFER *bb, - l_uint8 *dest, - size_t nbytes, - size_t *pnout) -{ -size_t nleft, nout; - - PROCNAME("bbufferWrite"); - - if (!bb) - return ERROR_INT("bb not defined", procName, 1); - if (!dest) - return ERROR_INT("dest not defined", procName, 1); - if (nbytes <= 0) - return ERROR_INT("no bytes requested to write", procName, 1); - if (!pnout) - return ERROR_INT("&nout not defined", procName, 1); - - nleft = bb->n - bb->nwritten; - nout = L_MIN(nleft, nbytes); - *pnout = nout; - - if (nleft == 0) { /* nothing to write; reinitialize the buffer */ - bb->n = 0; - bb->nwritten = 0; - return 0; - } - - /* nout > 0; transfer the data out */ - memcpy(dest, bb->array + bb->nwritten, nout); - bb->nwritten += nout; - - /* If all written; "empty" the buffer */ - if (nout == nleft) { - bb->n = 0; - bb->nwritten = 0; - } - - return 0; -} - - -/*! - * \brief bbufferWriteStream() - * - * \param[in] bb bbuffer - * \param[in] fp dest stream to which bytes are written - * \param[in] nbytes bytes requested to be written - * \param[out] pnout bytes actually written - * \return 0 if OK, 1 on error - */ -l_ok -bbufferWriteStream(L_BBUFFER *bb, - FILE *fp, - size_t nbytes, - size_t *pnout) -{ -size_t nleft, nout; - - PROCNAME("bbufferWriteStream"); - - if (!bb) - return ERROR_INT("bb not defined", procName, 1); - if (!fp) - return ERROR_INT("output stream not defined", procName, 1); - if (nbytes <= 0) - return ERROR_INT("no bytes requested to write", procName, 1); - if (!pnout) - return ERROR_INT("&nout not defined", procName, 1); - - nleft = bb->n - bb->nwritten; - nout = L_MIN(nleft, nbytes); - *pnout = nout; - - if (nleft == 0) { /* nothing to write; reinitialize the buffer */ - bb->n = 0; - bb->nwritten = 0; - return 0; - } - - /* nout > 0; transfer the data out */ - fwrite(bb->array + bb->nwritten, 1, nout, fp); - bb->nwritten += nout; - - /* If all written; "empty" the buffer */ - if (nout == nleft) { - bb->n = 0; - bb->nwritten = 0; - } - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bbuffer.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bbuffer.h deleted file mode 100644 index 945cbb0f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bbuffer.h +++ /dev/null @@ -1,60 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_BBUFFER_H -#define LEPTONICA_BBUFFER_H - -/*! - * \file bbuffer.h - * - *
- *      Expandable byte buffer for reading data in from memory and
- *      writing data out to other memory.
- *
- *      This implements a queue of bytes, so data read in is put
- *      on the "back" of the queue (i.e., the end of the byte array)
- *      and data written out is taken from the "front" of the queue
- *      (i.e., from an index marker "nwritten" that is initially set at
- *      the beginning of the array.)  As usual with expandable
- *      arrays, we keep the size of the allocated array and the
- *      number of bytes that have been read into the array.
- *
- *      For implementation details, see bbuffer.c.
- * 
- */ - -/*! Expandable byte buffer for memory read/write operations */ -struct L_ByteBuffer -{ - l_int32 nalloc; /*!< size of allocated byte array */ - l_int32 n; /*!< number of bytes read into to the array */ - l_int32 nwritten; /*!< number of bytes written from the array */ - l_uint8 *array; /*!< byte array */ -}; -typedef struct L_ByteBuffer L_BBUFFER; - - -#endif /* LEPTONICA_BBUFFER_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bilateral.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bilateral.c deleted file mode 100644 index 7b29fb19..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bilateral.c +++ /dev/null @@ -1,813 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file bilateral.c - *
- *
- *     Top level approximate separable grayscale or color bilateral filtering
- *          PIX                 *pixBilateral()
- *          PIX                 *pixBilateralGray()
- *
- *     Implementation of approximate separable bilateral filter
- *          static L_BILATERAL  *bilateralCreate()
- *          static void         *bilateralDestroy()
- *          static PIX          *bilateralApply()
- *
- *     Slow, exact implementation of grayscale or color bilateral filtering
- *          PIX                 *pixBilateralExact()
- *          PIX                 *pixBilateralGrayExact()
- *          PIX                 *pixBlockBilateralExact()
- *
- *     Kernel helper function
- *          L_KERNEL            *makeRangeKernel()
- *
- *  This includes both a slow, exact implementation of the bilateral
- *  filter algorithm (given by Sylvain Paris and Frédo Durand),
- *  and a fast, approximate and separable implementation (following
- *  Yang, Tan and Ahuja).  See bilateral.h for algorithmic details.
- *
- *  The bilateral filter has the nice property of applying a gaussian
- *  filter to smooth parts of the image that don't vary too quickly,
- *  while at the same time preserving edges.  The filter is nonlinear
- *  and cannot be decomposed into two separable filters; however,
- *  there exists an approximate method that is separable.  To further
- *  speed up the separable implementation, you can generate the
- *  intermediate data at reduced resolution.
- *
- *  The full kernel is composed of two parts: a spatial gaussian filter
- *  and a nonlinear "range" filter that depends on the intensity difference
- *  between the reference pixel at the spatial kernel origin and any other
- *  pixel within the kernel support.
- *
- *  In our implementations, the range filter is a parameterized,
- *  one-sided, 256-element, monotonically decreasing gaussian function
- *  of the absolute value of the difference between pixel values; namely,
- *  abs(I2 - I1).  In general, any decreasing function can be used,
- *  and more generally,  any two-dimensional kernel can be used if
- *  you wish to relax the 'abs' condition.  (In that case, the range
- *  filter can be 256 x 256).
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" -#include "bilateral.h" - -static L_BILATERAL *bilateralCreate(PIX *pixs, l_float32 spatial_stdev, - l_float32 range_stdev, l_int32 ncomps, - l_int32 reduction); -static PIX *bilateralApply(L_BILATERAL *bil); -static void bilateralDestroy(L_BILATERAL **pbil); - - -#ifndef NO_CONSOLE_IO -#define DEBUG_BILATERAL 0 -#endif /* ~NO_CONSOLE_IO */ - -/*--------------------------------------------------------------------------* - * Top level approximate separable grayscale or color bilateral filtering * - *--------------------------------------------------------------------------*/ -/*! - * \brief pixBilateral() - * - * \param[in] pixs 8 bpp gray or 32 bpp rgb, no colormap - * \param[in] spatial_stdev of gaussian kernel; in pixels, > 0.5 - * \param[in] range_stdev of gaussian range kernel; > 5.0; typ. 50.0 - * \param[in] ncomps number of intermediate sums J(k,x); - * in [4 ... 30] - * \param[in] reduction 1, 2 or 4 - * \return pixd bilateral filtered image, or NULL on error - * - *
- * Notes:
- *      (1) This performs a relatively fast, separable bilateral
- *          filtering operation.  The time is proportional to ncomps
- *          and varies inversely approximately as the cube of the
- *          reduction factor.  See bilateral.h for algorithm details.
- *      (2) We impose minimum values for range_stdev and ncomps to
- *          avoid nasty artifacts when either are too small.  We also
- *          impose a constraint on their product:
- *               ncomps * range_stdev >= 100.
- *          So for values of range_stdev >= 25, ncomps can be as small as 4.
- *          Here is a qualitative, intuitive explanation for this constraint.
- *          Call the difference in k values between the J(k) == 'delta', where
- *              'delta' ~ 200 / ncomps
- *          Then this constraint is roughly equivalent to the condition:
- *              'delta' < 2 * range_stdev
- *          Note that at an intensity difference of (2 * range_stdev), the
- *          range part of the kernel reduces the effect by the factor 0.14.
- *          This constraint requires that we have a sufficient number of
- *          PCBs (i.e, a small enough 'delta'), so that for any value of
- *          image intensity I, there exists a k (and a PCB, J(k), such that
- *              |I - k| < range_stdev
- *          Any fewer PCBs and we don't have enough to support this condition.
- *      (3) The upper limit of 30 on ncomps is imposed because the
- *          gain in accuracy is not worth the extra computation.
- *      (4) The size of the gaussian kernel is twice the spatial_stdev
- *          on each side of the origin.  The minimum value of
- *          spatial_stdev, 0.5, is required to have a finite sized
- *          spatial kernel.  In practice, a much larger value is used.
- *      (5) Computation of the intermediate images goes inversely
- *          as the cube of the reduction factor.  If you can use a
- *          reduction of 2 or 4, it is well-advised.
- *      (6) The range kernel is defined over the absolute value of pixel
- *          grayscale differences, and hence must have size 256 x 1.
- *          Values in the array represent the multiplying weight
- *          depending on the absolute gray value difference between
- *          the source pixel and the neighboring pixel, and should
- *          be monotonically decreasing.
- *      (7) Interesting observation.  Run this on prog/fish24.jpg, with
- *          range_stdev = 60, ncomps = 6, and spatial_dev = {10, 30, 50}.
- *          As spatial_dev gets larger, we get the counter-intuitive
- *          result that the body of the red fish becomes less blurry.
- * 
- */ -PIX * -pixBilateral(PIX *pixs, - l_float32 spatial_stdev, - l_float32 range_stdev, - l_int32 ncomps, - l_int32 reduction) -{ -l_int32 d; -l_float32 sstdev; /* scaled spatial stdev */ -PIX *pixt, *pixr, *pixg, *pixb, *pixd; - - PROCNAME("pixBilateral"); - - if (!pixs || pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs not defined or cmapped", procName, NULL); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL); - if (reduction != 1 && reduction != 2 && reduction != 4) - return (PIX *)ERROR_PTR("reduction invalid", procName, NULL); - sstdev = spatial_stdev / (l_float32)reduction; /* reduced spat. stdev */ - if (sstdev < 0.5) - return (PIX *)ERROR_PTR("sstdev < 0.5", procName, NULL); - if (range_stdev <= 5.0) - return (PIX *)ERROR_PTR("range_stdev <= 5.0", procName, NULL); - if (ncomps < 4 || ncomps > 30) - return (PIX *)ERROR_PTR("ncomps not in [4 ... 30]", procName, NULL); - if (ncomps * range_stdev < 100.0) - return (PIX *)ERROR_PTR("ncomps * range_stdev < 100.0", procName, NULL); - - if (d == 8) - return pixBilateralGray(pixs, spatial_stdev, range_stdev, - ncomps, reduction); - - pixt = pixGetRGBComponent(pixs, COLOR_RED); - pixr = pixBilateralGray(pixt, spatial_stdev, range_stdev, ncomps, - reduction); - pixDestroy(&pixt); - pixt = pixGetRGBComponent(pixs, COLOR_GREEN); - pixg = pixBilateralGray(pixt, spatial_stdev, range_stdev, ncomps, - reduction); - pixDestroy(&pixt); - pixt = pixGetRGBComponent(pixs, COLOR_BLUE); - pixb = pixBilateralGray(pixt, spatial_stdev, range_stdev, ncomps, - reduction); - pixDestroy(&pixt); - pixd = pixCreateRGBImage(pixr, pixg, pixb); - pixDestroy(&pixr); - pixDestroy(&pixg); - pixDestroy(&pixb); - return pixd; -} - - -/*! - * \brief pixBilateralGray() - * - * \param[in] pixs 8 bpp gray - * \param[in] spatial_stdev of gaussian kernel; in pixels, > 0.5 - * \param[in] range_stdev of gaussian range kernel; > 5.0; typ. 50.0 - * \param[in] ncomps number of intermediate sums J(k,x); - * in [4 ... 30] - * \param[in] reduction 1, 2 or 4 - * \return pixd 8 bpp bilateral filtered image, or NULL on error - * - *
- * Notes:
- *      (1) See pixBilateral() for constraints on the input parameters.
- *      (2) See pixBilateral() for algorithm details.
- * 
- */ -PIX * -pixBilateralGray(PIX *pixs, - l_float32 spatial_stdev, - l_float32 range_stdev, - l_int32 ncomps, - l_int32 reduction) -{ -l_float32 sstdev; /* scaled spatial stdev */ -PIX *pixd; -L_BILATERAL *bil; - - PROCNAME("pixBilateralGray"); - - if (!pixs || pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs not defined or cmapped", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp gray", procName, NULL); - if (reduction != 1 && reduction != 2 && reduction != 4) - return (PIX *)ERROR_PTR("reduction invalid", procName, NULL); - sstdev = spatial_stdev / (l_float32)reduction; /* reduced spat. stdev */ - if (sstdev < 0.5) - return (PIX *)ERROR_PTR("sstdev < 0.5", procName, NULL); - if (range_stdev <= 5.0) - return (PIX *)ERROR_PTR("range_stdev <= 5.0", procName, NULL); - if (ncomps < 4 || ncomps > 30) - return (PIX *)ERROR_PTR("ncomps not in [4 ... 30]", procName, NULL); - if (ncomps * range_stdev < 100.0) - return (PIX *)ERROR_PTR("ncomps * range_stdev < 100.0", procName, NULL); - - bil = bilateralCreate(pixs, spatial_stdev, range_stdev, ncomps, reduction); - if (!bil) return (PIX *)ERROR_PTR("bil not made", procName, NULL); - pixd = bilateralApply(bil); - bilateralDestroy(&bil); - return pixd; -} - - -/*----------------------------------------------------------------------* - * Implementation of approximate separable bilateral filter * - *----------------------------------------------------------------------*/ -/*! - * \brief bilateralCreate() - * - * \param[in] pixs 8 bpp gray, no colormap - * \param[in] spatial_stdev of gaussian kernel; in pixels, > 0.5 - * \param[in] range_stdev of gaussian range kernel; > 5.0; typ. 50.0 - * \param[in] ncomps number of intermediate sums J(k,x); - * in [4 ... 30] - * \param[in] reduction 1, 2 or 4 - * \return bil, or NULL on error - * - *
- * Notes:
- *      (1) This initializes a bilateral filtering operation, generating all
- *          the data required.  It takes most of the time in the bilateral
- *          filtering operation.
- *      (2) See bilateral.h for details of the algorithm.
- *      (3) See pixBilateral() for constraints on input parameters, which
- *          are not checked here.
- * 
- */ -static L_BILATERAL * -bilateralCreate(PIX *pixs, - l_float32 spatial_stdev, - l_float32 range_stdev, - l_int32 ncomps, - l_int32 reduction) -{ -l_int32 w, ws, wd, h, hs, hd, i, j, k, index; -l_int32 border, minval, maxval, spatial_size; -l_int32 halfwidth, wpls, wplt, wpld, kval, nval, dval; -l_float32 sstdev, fval1, fval2, denom, sum, norm, kern; -l_int32 *nc, *kindex; -l_float32 *kfract, *range, *spatial; -l_uint32 *datas, *datat, *datad, *lines, *linet, *lined; -L_BILATERAL *bil; -PIX *pixt, *pixt2, *pixsc, *pixd; -PIXA *pixac; - - PROCNAME("bilateralCreate"); - - sstdev = spatial_stdev / (l_float32)reduction; /* reduced spat. stdev */ - if ((bil = (L_BILATERAL *)LEPT_CALLOC(1, sizeof(L_BILATERAL))) == NULL) - return (L_BILATERAL *)ERROR_PTR("bil not made", procName, NULL); - bil->spatial_stdev = sstdev; - bil->range_stdev = range_stdev; - bil->reduction = reduction; - bil->ncomps = ncomps; - - if (reduction == 1) { - pixt = pixClone(pixs); - } else if (reduction == 2) { - pixt = pixScaleAreaMap2(pixs); - } else { /* reduction == 4) */ - pixt2 = pixScaleAreaMap2(pixs); - pixt = pixScaleAreaMap2(pixt2); - pixDestroy(&pixt2); - } - - pixGetExtremeValue(pixt, 1, L_SELECT_MIN, NULL, NULL, NULL, &minval); - pixGetExtremeValue(pixt, 1, L_SELECT_MAX, NULL, NULL, NULL, &maxval); - bil->minval = minval; - bil->maxval = maxval; - - border = (l_int32)(2 * sstdev + 1); - pixsc = pixAddMirroredBorder(pixt, border, border, border, border); - bil->pixsc = pixsc; - pixDestroy(&pixt); - bil->pixs = pixClone(pixs); - - - /* -------------------------------------------------------------------- * - * Generate arrays for interpolation of J(k,x): - * (1.0 - kfract[.]) * J(kindex[.], x) + kfract[.] * J(kindex[.] + 1, x), - * where I(x) is the index into kfract[] and kindex[], - * and x is an index into the 2D image array. - * -------------------------------------------------------------------- */ - /* nc is the set of k values to be used in J(k,x) */ - nc = (l_int32 *)LEPT_CALLOC(ncomps, sizeof(l_int32)); - for (i = 0; i < ncomps; i++) - nc[i] = minval + i * (maxval - minval) / (ncomps - 1); - bil->nc = nc; - - /* kindex maps from intensity I(x) to the lower k index for J(k,x) */ - kindex = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - for (i = minval, k = 0; i <= maxval && k < ncomps - 1; k++) { - fval2 = nc[k + 1]; - while (i < fval2) { - kindex[i] = k; - i++; - } - } - kindex[maxval] = ncomps - 2; - bil->kindex = kindex; - - /* kfract maps from intensity I(x) to the fraction of J(k+1,x) used */ - kfract = (l_float32 *)LEPT_CALLOC(256, sizeof(l_float32)); /* from lower */ - for (i = minval, k = 0; i <= maxval && k < ncomps - 1; k++) { - fval1 = nc[k]; - fval2 = nc[k + 1]; - while (i < fval2) { - kfract[i] = (l_float32)(i - fval1) / (l_float32)(fval2 - fval1); - i++; - } - } - kfract[maxval] = 1.0; - bil->kfract = kfract; - -#if DEBUG_BILATERAL - for (i = minval; i <= maxval; i++) - lept_stderr("kindex[%d] = %d; kfract[%d] = %5.3f\n", - i, kindex[i], i, kfract[i]); - for (i = 0; i < ncomps; i++) - lept_stderr("nc[%d] = %d\n", i, nc[i]); -#endif /* DEBUG_BILATERAL */ - - - /* -------------------------------------------------------------------- * - * Generate 1-D kernel arrays (spatial and range) * - * -------------------------------------------------------------------- */ - spatial_size = 2 * sstdev + 1; - spatial = (l_float32 *)LEPT_CALLOC(spatial_size, sizeof(l_float32)); - denom = 2. * sstdev * sstdev; - for (i = 0; i < spatial_size; i++) - spatial[i] = expf(-(l_float32)(i * i) / denom); - bil->spatial = spatial; - - range = (l_float32 *)LEPT_CALLOC(256, sizeof(l_float32)); - denom = 2. * range_stdev * range_stdev; - for (i = 0; i < 256; i++) - range[i] = expf(-(l_float32)(i * i) / denom); - bil->range = range; - - - /* -------------------------------------------------------------------- * - * Generate principal bilateral component images * - * -------------------------------------------------------------------- */ - pixac = pixaCreate(ncomps); - pixGetDimensions(pixsc, &ws, &hs, NULL); - datas = pixGetData(pixsc); - wpls = pixGetWpl(pixsc); - pixGetDimensions(pixs, &w, &h, NULL); - wd = (w + reduction - 1) / reduction; - hd = (h + reduction - 1) / reduction; - halfwidth = (l_int32)(2.0 * sstdev); - for (index = 0; index < ncomps; index++) { - pixt = pixCopy(NULL, pixsc); - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - kval = nc[index]; - /* Separable convolutions: horizontal first */ - for (i = 0; i < hd; i++) { - lines = datas + (border + i) * wpls; - linet = datat + (border + i) * wplt; - for (j = 0; j < wd; j++) { - sum = 0.0; - norm = 0.0; - for (k = -halfwidth; k <= halfwidth; k++) { - nval = GET_DATA_BYTE(lines, border + j + k); - kern = spatial[L_ABS(k)] * range[L_ABS(kval - nval)]; - sum += kern * nval; - norm += kern; - } - dval = (l_int32)((sum / norm) + 0.5); - SET_DATA_BYTE(linet, border + j, dval); - } - } - /* Vertical convolution */ - pixd = pixCreate(wd, hd, 8); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < hd; i++) { - linet = datat + (border + i) * wplt; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - sum = 0.0; - norm = 0.0; - for (k = -halfwidth; k <= halfwidth; k++) { - nval = GET_DATA_BYTE(linet + k * wplt, border + j); - kern = spatial[L_ABS(k)] * range[L_ABS(kval - nval)]; - sum += kern * nval; - norm += kern; - } - dval = (l_int32)((sum / norm) + 0.5); - SET_DATA_BYTE(lined, j, dval); - } - } - pixDestroy(&pixt); - pixaAddPix(pixac, pixd, L_INSERT); - } - bil->pixac = pixac; - bil->lineset = (l_uint32 ***)pixaGetLinePtrs(pixac, NULL); - - return bil; -} - - -/*! - * \brief bilateralApply() - * - * \param[in] bil - * \return pixd - */ -static PIX * -bilateralApply(L_BILATERAL *bil) -{ -l_int32 i, j, k, ired, jred, w, h, wpls, wpld, ncomps, reduction; -l_int32 vals, vald, lowval, hival; -l_int32 *kindex; -l_float32 fract; -l_float32 *kfract; -l_uint32 *lines, *lined, *datas, *datad; -l_uint32 ***lineset = NULL; /* for set of PBC */ -PIX *pixs, *pixd; -PIXA *pixac; - - PROCNAME("bilateralApply"); - - if (!bil) - return (PIX *)ERROR_PTR("bil not defined", procName, NULL); - pixs = bil->pixs; - ncomps = bil->ncomps; - kindex = bil->kindex; - kfract = bil->kfract; - reduction = bil->reduction; - pixac = bil->pixac; - lineset = bil->lineset; - if (pixaGetCount(pixac) != ncomps) - return (PIX *)ERROR_PTR("PBC images do not exist", procName, NULL); - - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - pixGetDimensions(pixs, &w, &h, NULL); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - ired = i / reduction; - for (j = 0; j < w; j++) { - jred = j / reduction; - vals = GET_DATA_BYTE(lines, j); - k = kindex[vals]; - lowval = GET_DATA_BYTE(lineset[k][ired], jred); - hival = GET_DATA_BYTE(lineset[k + 1][ired], jred); - fract = kfract[vals]; - vald = (l_int32)((1.0 - fract) * lowval + fract * hival + 0.5); - SET_DATA_BYTE(lined, j, vald); - } - } - - return pixd; -} - - -/*! - * \brief bilateralDestroy() - * - * \param[in,out] pbil will be set to null before returning - */ -static void -bilateralDestroy(L_BILATERAL **pbil) -{ -l_int32 i; -L_BILATERAL *bil; - - PROCNAME("bilateralDestroy"); - - if (pbil == NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - - if ((bil = *pbil) == NULL) - return; - - pixDestroy(&bil->pixs); - pixDestroy(&bil->pixsc); - pixaDestroy(&bil->pixac); - LEPT_FREE(bil->spatial); - LEPT_FREE(bil->range); - LEPT_FREE(bil->nc); - LEPT_FREE(bil->kindex); - LEPT_FREE(bil->kfract); - for (i = 0; i < bil->ncomps; i++) - LEPT_FREE(bil->lineset[i]); - LEPT_FREE(bil->lineset); - LEPT_FREE(bil); - *pbil = NULL; - return; -} - - -/*----------------------------------------------------------------------* - * Exact implementation of grayscale or color bilateral filtering * - *----------------------------------------------------------------------*/ -/*! - * \brief pixBilateralExact() - * - * \param[in] pixs 8 bpp gray or 32 bpp rgb - * \param[in] spatial_kel gaussian kernel - * \param[in] range_kel [optional] 256 x 1, monotonically decreasing - * \return pixd 8 bpp bilateral filtered image - * - *
- * Notes:
- *      (1) The spatial_kel is a conventional smoothing kernel, typically a
- *          2-d Gaussian kernel or other block kernel.  It can be either
- *          normalized or not, but must be everywhere positive.
- *      (2) The range_kel is defined over the absolute value of pixel
- *          grayscale differences, and hence must have size 256 x 1.
- *          Values in the array represent the multiplying weight for each
- *          gray value difference between the target pixel and center of the
- *          kernel, and should be monotonically decreasing.
- *      (3) If range_kel == NULL, a constant weight is applied regardless
- *          of the range value difference.  This degenerates to a regular
- *          pixConvolve() with a normalized kernel.
- * 
- */ -PIX * -pixBilateralExact(PIX *pixs, - L_KERNEL *spatial_kel, - L_KERNEL *range_kel) -{ -l_int32 d; -PIX *pixt, *pixr, *pixg, *pixb, *pixd; - - PROCNAME("pixBilateralExact"); - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs) != NULL) - return (PIX *)ERROR_PTR("pixs is cmapped", procName, NULL); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL); - if (!spatial_kel) - return (PIX *)ERROR_PTR("spatial_ke not defined", procName, NULL); - - if (d == 8) { - return pixBilateralGrayExact(pixs, spatial_kel, range_kel); - } else { /* d == 32 */ - pixt = pixGetRGBComponent(pixs, COLOR_RED); - pixr = pixBilateralGrayExact(pixt, spatial_kel, range_kel); - pixDestroy(&pixt); - pixt = pixGetRGBComponent(pixs, COLOR_GREEN); - pixg = pixBilateralGrayExact(pixt, spatial_kel, range_kel); - pixDestroy(&pixt); - pixt = pixGetRGBComponent(pixs, COLOR_BLUE); - pixb = pixBilateralGrayExact(pixt, spatial_kel, range_kel); - pixDestroy(&pixt); - pixd = pixCreateRGBImage(pixr, pixg, pixb); - - pixDestroy(&pixr); - pixDestroy(&pixg); - pixDestroy(&pixb); - return pixd; - } -} - - -/*! - * \brief pixBilateralGrayExact() - * - * \param[in] pixs 8 bpp gray - * \param[in] spatial_kel gaussian kernel - * \param[in] range_kel [optional] 256 x 1, monotonically decreasing - * \return pixd 8 bpp bilateral filtered image - * - *
- * Notes:
- *      (1) See pixBilateralExact().
- * 
- */ -PIX * -pixBilateralGrayExact(PIX *pixs, - L_KERNEL *spatial_kel, - L_KERNEL *range_kel) -{ -l_int32 i, j, id, jd, k, m, w, h, d, sx, sy, cx, cy, wplt, wpld; -l_int32 val, center_val; -l_uint32 *datat, *datad, *linet, *lined; -l_float32 sum, weight_sum, weight; -L_KERNEL *keli; -PIX *pixt, *pixd; - - PROCNAME("pixBilateralGrayExact"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs must be gray", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (!spatial_kel) - return (PIX *)ERROR_PTR("spatial kel not defined", procName, NULL); - - if (!range_kel) - return pixConvolve(pixs, spatial_kel, 8, 1); - if (range_kel->sx != 256 || range_kel->sy != 1) - return (PIX *)ERROR_PTR("range kel not {256 x 1", procName, NULL); - - keli = kernelInvert(spatial_kel); - kernelGetParameters(keli, &sy, &sx, &cy, &cx); - if ((pixt = pixAddMirroredBorder(pixs, cx, sx - cx, cy, sy - cy)) == NULL) { - kernelDestroy(&keli); - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - } - - pixd = pixCreate(w, h, 8); - datat = pixGetData(pixt); - datad = pixGetData(pixd); - wplt = pixGetWpl(pixt); - wpld = pixGetWpl(pixd); - for (i = 0, id = 0; id < h; i++, id++) { - lined = datad + id * wpld; - for (j = 0, jd = 0; jd < w; j++, jd++) { - center_val = GET_DATA_BYTE(datat + (i + cy) * wplt, j + cx); - weight_sum = 0.0; - sum = 0.0; - for (k = 0; k < sy; k++) { - linet = datat + (i + k) * wplt; - for (m = 0; m < sx; m++) { - val = GET_DATA_BYTE(linet, j + m); - weight = keli->data[k][m] * - range_kel->data[0][L_ABS(center_val - val)]; - weight_sum += weight; - sum += val * weight; - } - } - SET_DATA_BYTE(lined, jd, (l_int32)(sum / weight_sum + 0.5)); - } - } - - kernelDestroy(&keli); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixBlockBilateralExact() - * - * \param[in] pixs 8 bpp gray or 32 bpp rgb - * \param[in] spatial_stdev must be > 0.0 - * \param[in] range_stdev must be > 0.0 - * \return pixd 8 bpp or 32 bpp bilateral filtered image - * - *
- * Notes:
- *      (1) See pixBilateralExact().  This provides an interface using
- *          the standard deviations of the spatial and range filters.
- *      (2) The convolution window halfwidth is 2 * spatial_stdev,
- *          and the square filter size is 4 * spatial_stdev + 1.
- *          The kernel captures 95% of total energy.  This is compensated
- *          by normalization.
- *      (3) The range_stdev is analogous to spatial_halfwidth in the
- *          grayscale domain [0...255], and determines how much damping of the
- *          smoothing operation is applied across edges.  The larger this
- *          value is, the smaller the damping.  The smaller the value, the
- *          more edge details are preserved.  These approximations are useful
- *          for deciding the appropriate cutoff.
- *              kernel[1 * stdev] ~= 0.6  * kernel[0]
- *              kernel[2 * stdev] ~= 0.14 * kernel[0]
- *              kernel[3 * stdev] ~= 0.01 * kernel[0]
- *          If range_stdev is infinite there is no damping, and this
- *          becomes a conventional gaussian smoothing.
- *          This value does not affect the run time.
- *      (4) If range_stdev is negative or zero, the range kernel is
- *          ignored and this degenerates to a straight gaussian convolution.
- *      (5) This is very slow for large spatial filters.  The time
- *          on a 3GHz pentium is roughly
- *             T = 1.2 * 10^-8 * (A * sh^2)  sec
- *          where A = # of pixels, sh = spatial halfwidth of filter.
- * 
- */ -PIX* -pixBlockBilateralExact(PIX *pixs, - l_float32 spatial_stdev, - l_float32 range_stdev) -{ -l_int32 d, halfwidth; -L_KERNEL *spatial_kel, *range_kel; -PIX *pixd; - - PROCNAME("pixBlockBilateralExact"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL); - if (pixGetColormap(pixs) != NULL) - return (PIX *)ERROR_PTR("pixs is cmapped", procName, NULL); - if (spatial_stdev <= 0.0) - return (PIX *)ERROR_PTR("invalid spatial stdev", procName, NULL); - if (range_stdev <= 0.0) - return (PIX *)ERROR_PTR("invalid range stdev", procName, NULL); - - halfwidth = 2 * spatial_stdev; - spatial_kel = makeGaussianKernel(halfwidth, halfwidth, spatial_stdev, 1.0); - range_kel = makeRangeKernel(range_stdev); - pixd = pixBilateralExact(pixs, spatial_kel, range_kel); - kernelDestroy(&spatial_kel); - kernelDestroy(&range_kel); - return pixd; -} - - -/*----------------------------------------------------------------------* - * Kernel helper function * - *----------------------------------------------------------------------*/ -/*! - * \brief makeRangeKernel() - * - * \param[in] range_stdev must be > 0.0 - * \return kel, or NULL on error - * - *
- * Notes:
- *      (1) Creates a one-sided Gaussian kernel with the given
- *          standard deviation.  At grayscale difference of one stdev,
- *          the kernel falls to 0.6, and to 0.01 at three stdev.
- *      (2) A typical input number might be 20.  Then pixels whose
- *          value differs by 60 from the center pixel have their
- *          weight in the convolution reduced by a factor of about 0.01.
- * 
- */ -L_KERNEL * -makeRangeKernel(l_float32 range_stdev) -{ -l_int32 x; -l_float32 val, denom; -L_KERNEL *kel; - - PROCNAME("makeRangeKernel"); - - if (range_stdev <= 0.0) - return (L_KERNEL *)ERROR_PTR("invalid stdev <= 0", procName, NULL); - - denom = 2. * range_stdev * range_stdev; - if ((kel = kernelCreate(1, 256)) == NULL) - return (L_KERNEL *)ERROR_PTR("kel not made", procName, NULL); - kernelSetOrigin(kel, 0, 0); - for (x = 0; x < 256; x++) { - val = expf(-(l_float32)(x * x) / denom); - kernelSetElement(kel, 0, x, val); - } - return kel; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bilateral.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bilateral.h deleted file mode 100644 index e5b5bbdd..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bilateral.h +++ /dev/null @@ -1,136 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_BILATERAL_H -#define LEPTONICA_BILATERAL_H - -/*! - * \file bilateral.h - * - *
- *  Contains the following struct
- *      struct L_Bilateral
- *
- *
- *  For a tutorial introduction to bilateral filters, which apply a
- *  gaussian blur to smooth parts of the image while preserving edges, see
- *    http://people.csail.mit.edu/sparis/bf_course/slides/03_definition_bf.pdf
- *
- *  We give an implementation of a bilateral filtering algorithm given in:
- *    "Real-Time O(1) Bilateral Filtering," by Yang, Tan and Ahuja, CVPR 2009
- *  which is at:
- *    http://vision.ai.uiuc.edu/~qyang6/publications/cvpr-09-qingxiong-yang.pdf
- *  This is based on an earlier algorithm by Sylvain Paris and Frédo Durand:
- *    http://people.csail.mit.edu/sparis/publi/2006/eccv/
- *               Paris_06_Fast_Approximation.pdf
- *
- *  The kernel of the filter is a product of a spatial gaussian and a
- *  monotonically decreasing function of the difference in intensity
- *  between the source pixel and the neighboring pixel.  The intensity
- *  part of the filter gives higher influence for pixels with intensities
- *  that are near to the source pixel, and the spatial part of the
- *  filter gives higher weight to pixels that are near the source pixel.
- *  This combination smooths in relatively uniform regions, while
- *  maintaining edges.
- *
- *  The advantage of the appoach of Yang et al is that it is separable,
- *  so the computation time is linear in the gaussian filter size.
- *  Furthermore, it is possible to do much of the computation as a reduced
- *  scale, which gives a good approximation to the full resolution version
- *  but greatly speeds it up.
- *
- *  The bilateral filtered value at x is:
- *
- *            sum[y in N(x)]: spatial(|y - x|) * range(|I(x) - I(y)|) * I(y)
- *    I'(x) = --------------------------------------------------------------
- *            sum[y in N(x)]: spatial(|y - x|) * range(|I(x) - I(y)|)
- *
- *  where I() is the input image, I'() is the filtered image, N(x) is the
- *  set of pixels around x in the filter support, and spatial() and range()
- *  are gaussian functions:
- *          spatial(x) = exp(-x^2 / (2 * s_s^2))
- *          range(x) = exp(-x^2 / (2 * s_r^2))
- *  and s_s and s_r and the standard deviations of the two gaussians.
- *
- *  Yang et al use a separable approximation to this, by defining a set
- *  of related but separable functions J(k,x), that we call Principal
- *  Bilateral Components (PBC):
- *
- *             sum[y in N(x)]: spatial(|y - x|) * range(|k - I(y)|) * I(y)
- *    J(k,x) = -----------------------------------------------------------
- *             sum[y in N(x)]: spatial(|y - x|) * range(|k - I(y)|)
- *
- *  which are computed quickly for a set of n values k[p], p = 0 ... n-1.
- *  Then each output pixel is found using a linear interpolation:
- *
- *    I'(x) = (1 - q) * J(k[p],x) + q * J(k[p+1],x)
- *
- *  where J(k[p],x) and J(k[p+1],x) are PBC for which
- *    k[p] <= I(x) and k[p+1] >= I(x), and
- *    q = (I(x) - k[p]) / (k[p+1] - k[p]).
- *
- *  We can also subsample I(x), create subsampled versions of J(k,x),
- *  which are then interpolated between for I'(x).
- *
- *  We generate 'pixsc', by optionally downscaling the input image
- *  (using area mapping by the factor 'reduction'), and then adding
- *  a mirrored border to avoid boundary cases.  This is then used
- *  to compute 'ncomps' PBCs.
- *
- *  The 'spatial_stdev' is also downscaled by 'reduction'.  The size
- *  of the 'spatial' array is 4 * (reduced 'spatial_stdev') + 1.
- *  The size of the 'range' array is 256.
- * 
- */ - - -/*------------------------------------------------------------------------* - * Bilateral filter * - *------------------------------------------------------------------------*/ - -/*! Bilateral filter */ -struct L_Bilateral -{ - struct Pix *pixs; /*!< clone of source pix */ - struct Pix *pixsc; /*!< downscaled pix with mirrored border */ - l_int32 reduction; /*!< 1, 2 or 4x for intermediates */ - l_float32 spatial_stdev; /*!< stdev of spatial gaussian */ - l_float32 range_stdev; /*!< stdev of range gaussian */ - l_float32 *spatial; /*!< 1D gaussian spatial kernel */ - l_float32 *range; /*!< one-sided gaussian range kernel */ - l_int32 minval; /*!< min value in 8 bpp pix */ - l_int32 maxval; /*!< max value in 8 bpp pix */ - l_int32 ncomps; /*!< number of intermediate results */ - l_int32 *nc; /*!< set of k values (size ncomps) */ - l_int32 *kindex; /*!< mapping from intensity to lower k */ - l_float32 *kfract; /*!< mapping from intensity to fract k */ - struct Pixa *pixac; /*!< intermediate result images (PBC) */ - l_uint32 ***lineset; /*!< lineptrs for pixac */ -}; -typedef struct L_Bilateral L_BILATERAL; - - -#endif /* LEPTONICA_BILATERAL_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bilinear.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bilinear.c deleted file mode 100644 index 7336e91b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bilinear.c +++ /dev/null @@ -1,912 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file bilinear.c - *
- *
- *      Bilinear (4 pt) image transformation using a sampled
- *      (to nearest integer) transform on each dest point
- *           PIX      *pixBilinearSampledPta()
- *           PIX      *pixBilinearSampled()
- *
- *      Bilinear (4 pt) image transformation using interpolation
- *      (or area mapping) for anti-aliasing images that are
- *      2, 4, or 8 bpp gray, or colormapped, or 32 bpp RGB
- *           PIX      *pixBilinearPta()
- *           PIX      *pixBilinear()
- *           PIX      *pixBilinearPtaColor()
- *           PIX      *pixBilinearColor()
- *           PIX      *pixBilinearPtaGray()
- *           PIX      *pixBilinearGray()
- *
- *      Bilinear transform including alpha (blend) component
- *           PIX      *pixBilinearPtaWithAlpha()
- *
- *      Bilinear coordinate transformation
- *           l_int32   getBilinearXformCoeffs()
- *           l_int32   bilinearXformSampledPt()
- *           l_int32   bilinearXformPt()
- *
- *      A bilinear transform can be specified as a specific functional
- *      mapping between 4 points in the source and 4 points in the dest.
- *      It can be used as an approximation to a (nonlinear) projective
- *      transform, because for small warps it is very similar and
- *      it is more stable.  (Projective transforms have a division
- *      by a quantity that can get arbitrarily small.)
- *
- *      We give both a bilinear coordinate transformation and
- *      a bilinear image transformation.
- *
- *      For the former, we ask for the coordinate value (x',y')
- *      in the transformed space for any point (x,y) in the original
- *      space.  The coefficients of the transformation are found by
- *      solving 8 simultaneous equations for the 8 coordinates of
- *      the 4 points in src and dest.  The transformation can then
- *      be used to compute the associated image transform, by
- *      computing, for each dest pixel, the relevant pixel(s) in
- *      the source.  This can be done either by taking the closest
- *      src pixel to each transformed dest pixel ("sampling") or
- *      by doing an interpolation and averaging over 4 source
- *      pixels with appropriate weightings ("interpolated").
- *
- *      A typical application would be to remove some of the
- *      keystoning due to a projective transform in the imaging system.
- *
- *      The bilinear transform is given by specifying two equations:
- *
- *          x' = ax + by + cxy + d
- *          y' = ex + fy + gxy + h
- *
- *      where the eight coefficients have been computed from four
- *      sets of these equations, each for two corresponding data pts.
- *      In practice, once the coefficients are known, we use the
- *      equations "backwards": for each point (x,y) in the dest image,
- *      these two equations are used to compute the corresponding point
- *      (x',y') in the src.  That computed point in the src is then used
- *      to determine the corresponding dest pixel value in one of two ways:
- *
- *       ~ sampling: simply take the value of the src pixel in which this
- *                   point falls
- *       ~ interpolation: take appropriate linear combinations of the
- *                        four src pixels that this dest pixel would
- *                        overlap, with the coefficients proportional
- *                        to the amount of overlap
- *
- *      For small warp, like rotation, area mapping in the
- *      interpolation is equivalent to linear interpolation.
- *
- *      Typical relative timing of transforms (sampled = 1.0):
- *      8 bpp:   sampled        1.0
- *               interpolated   1.6
- *      32 bpp:  sampled        1.0
- *               interpolated   1.8
- *      Additionally, the computation time/pixel is nearly the same
- *      for 8 bpp and 32 bpp, for both sampled and interpolated.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -extern l_float32 AlphaMaskBorderVals[2]; - -/*-------------------------------------------------------------* - * Sampled bilinear image transformation * - *-------------------------------------------------------------*/ -/*! - * \brief pixBilinearSampledPta() - * - * \param[in] pixs all depths - * \param[in] ptad 4 pts of final coordinate space - * \param[in] ptas 4 pts of initial coordinate space - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Brings in either black or white pixels from the boundary.
- *      (2) Retains colormap, which you can do for a sampled transform..
- *      (3) No 3 of the 4 points may be collinear.
- *      (4) For 8 and 32 bpp pix, better quality is obtained by the
- *          somewhat slower pixBilinearPta().  See that
- *          function for relative timings between sampled and interpolated.
- * 
- */ -PIX * -pixBilinearSampledPta(PIX *pixs, - PTA *ptad, - PTA *ptas, - l_int32 incolor) -{ -l_float32 *vc; -PIX *pixd; - - PROCNAME("pixBilinearSampledPta"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - if (ptaGetCount(ptas) != 4) - return (PIX *)ERROR_PTR("ptas count not 4", procName, NULL); - if (ptaGetCount(ptad) != 4) - return (PIX *)ERROR_PTR("ptad count not 4", procName, NULL); - - /* Get backwards transform from dest to src, and apply it */ - getBilinearXformCoeffs(ptad, ptas, &vc); - pixd = pixBilinearSampled(pixs, vc, incolor); - LEPT_FREE(vc); - - return pixd; -} - - -/*! - * \brief pixBilinearSampled() - * - * \param[in] pixs all depths - * \param[in] vc vector of 8 coefficients for bilinear transformation - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Brings in either black or white pixels from the boundary.
- *      (2) Retains colormap, which you can do for a sampled transform..
- *      (3) For 8 or 32 bpp, much better quality is obtained by the
- *          somewhat slower pixBilinear().  See that function
- *          for relative timings between sampled and interpolated.
- * 
- */ -PIX * -pixBilinearSampled(PIX *pixs, - l_float32 *vc, - l_int32 incolor) -{ -l_int32 i, j, w, h, d, x, y, wpls, wpld, color, cmapindex; -l_uint32 val; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixBilinearSampled"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!vc) - return (PIX *)ERROR_PTR("vc not defined", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 32) - return (PIX *)ERROR_PTR("depth not 1, 2, 4, 8 or 16", procName, NULL); - - /* Init all dest pixels to color to be brought in from outside */ - pixd = pixCreateTemplate(pixs); - if ((cmap = pixGetColormap(pixs)) != NULL) { - if (incolor == L_BRING_IN_WHITE) - color = 1; - else - color = 0; - pixcmapAddBlackOrWhite(cmap, color, &cmapindex); - pixSetAllArbitrary(pixd, cmapindex); - } else { - if ((d == 1 && incolor == L_BRING_IN_WHITE) || - (d > 1 && incolor == L_BRING_IN_BLACK)) { - pixClearAll(pixd); - } else { - pixSetAll(pixd); - } - } - - /* Scan over the dest pixels */ - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - bilinearXformSampledPt(vc, j, i, &x, &y); - if (x < 0 || y < 0 || x >=w || y >= h) - continue; - lines = datas + y * wpls; - if (d == 1) { - val = GET_DATA_BIT(lines, x); - SET_DATA_BIT_VAL(lined, j, val); - } else if (d == 8) { - val = GET_DATA_BYTE(lines, x); - SET_DATA_BYTE(lined, j, val); - } else if (d == 32) { - lined[j] = lines[x]; - } else if (d == 2) { - val = GET_DATA_DIBIT(lines, x); - SET_DATA_DIBIT(lined, j, val); - } else if (d == 4) { - val = GET_DATA_QBIT(lines, x); - SET_DATA_QBIT(lined, j, val); - } - } - } - - return pixd; -} - - -/*---------------------------------------------------------------------* - * Interpolated bilinear image transformation * - *---------------------------------------------------------------------*/ -/*! - * \brief pixBilinearPta() - * - * \param[in] pixs all depths; colormap ok - * \param[in] ptad 4 pts of final coordinate space - * \param[in] ptas 4 pts of initial coordinate space - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Brings in either black or white pixels from the boundary
- *      (2) Removes any existing colormap, if necessary, before transforming
- * 
- */ -PIX * -pixBilinearPta(PIX *pixs, - PTA *ptad, - PTA *ptas, - l_int32 incolor) -{ -l_int32 d; -l_uint32 colorval; -PIX *pixt1, *pixt2, *pixd; - - PROCNAME("pixBilinearPta"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - if (ptaGetCount(ptas) != 4) - return (PIX *)ERROR_PTR("ptas count not 4", procName, NULL); - if (ptaGetCount(ptad) != 4) - return (PIX *)ERROR_PTR("ptad count not 4", procName, NULL); - - if (pixGetDepth(pixs) == 1) - return pixBilinearSampledPta(pixs, ptad, ptas, incolor); - - /* Remove cmap if it exists, and unpack to 8 bpp if necessary */ - pixt1 = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - d = pixGetDepth(pixt1); - if (d < 8) - pixt2 = pixConvertTo8(pixt1, FALSE); - else - pixt2 = pixClone(pixt1); - d = pixGetDepth(pixt2); - - /* Compute actual color to bring in from edges */ - colorval = 0; - if (incolor == L_BRING_IN_WHITE) { - if (d == 8) - colorval = 255; - else /* d == 32 */ - colorval = 0xffffff00; - } - - if (d == 8) - pixd = pixBilinearPtaGray(pixt2, ptad, ptas, colorval); - else /* d == 32 */ - pixd = pixBilinearPtaColor(pixt2, ptad, ptas, colorval); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - return pixd; -} - - -/*! - * \brief pixBilinear() - * - * \param[in] pixs all depths; colormap ok - * \param[in] vc vector of 8 coefficients for bilinear transformation - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Brings in either black or white pixels from the boundary
- *      (2) Removes any existing colormap, if necessary, before transforming
- * 
- */ -PIX * -pixBilinear(PIX *pixs, - l_float32 *vc, - l_int32 incolor) -{ -l_int32 d; -l_uint32 colorval; -PIX *pixt1, *pixt2, *pixd; - - PROCNAME("pixBilinear"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!vc) - return (PIX *)ERROR_PTR("vc not defined", procName, NULL); - - if (pixGetDepth(pixs) == 1) - return pixBilinearSampled(pixs, vc, incolor); - - /* Remove cmap if it exists, and unpack to 8 bpp if necessary */ - pixt1 = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - d = pixGetDepth(pixt1); - if (d < 8) - pixt2 = pixConvertTo8(pixt1, FALSE); - else - pixt2 = pixClone(pixt1); - d = pixGetDepth(pixt2); - - /* Compute actual color to bring in from edges */ - colorval = 0; - if (incolor == L_BRING_IN_WHITE) { - if (d == 8) - colorval = 255; - else /* d == 32 */ - colorval = 0xffffff00; - } - - if (d == 8) - pixd = pixBilinearGray(pixt2, vc, colorval); - else /* d == 32 */ - pixd = pixBilinearColor(pixt2, vc, colorval); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - return pixd; -} - - -/*! - * \brief pixBilinearPtaColor() - * - * \param[in] pixs 32 bpp - * \param[in] ptad 4 pts of final coordinate space - * \param[in] ptas 4 pts of initial coordinate space - * \param[in] colorval e.g., 0 to bring in BLACK, 0xffffff00 for WHITE - * \return pixd, or NULL on error - */ -PIX * -pixBilinearPtaColor(PIX *pixs, - PTA *ptad, - PTA *ptas, - l_uint32 colorval) -{ -l_float32 *vc; -PIX *pixd; - - PROCNAME("pixBilinearPtaColor"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs must be 32 bpp", procName, NULL); - if (ptaGetCount(ptas) != 4) - return (PIX *)ERROR_PTR("ptas count not 4", procName, NULL); - if (ptaGetCount(ptad) != 4) - return (PIX *)ERROR_PTR("ptad count not 4", procName, NULL); - - /* Get backwards transform from dest to src, and apply it */ - getBilinearXformCoeffs(ptad, ptas, &vc); - pixd = pixBilinearColor(pixs, vc, colorval); - LEPT_FREE(vc); - - return pixd; -} - - -/*! - * \brief pixBilinearColor() - * - * \param[in] pixs 32 bpp - * \param[in] vc vector of 8 coefficients for bilinear transformation - * \param[in] colorval e.g., 0 to bring in BLACK, 0xffffff00 for WHITE - * \return pixd, or NULL on error - */ -PIX * -pixBilinearColor(PIX *pixs, - l_float32 *vc, - l_uint32 colorval) -{ -l_int32 i, j, w, h, d, wpls, wpld; -l_uint32 val; -l_uint32 *datas, *datad, *lined; -l_float32 x, y; -PIX *pix1, *pix2, *pixd; - - PROCNAME("pixBilinearColor"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 32) - return (PIX *)ERROR_PTR("pixs must be 32 bpp", procName, NULL); - if (!vc) - return (PIX *)ERROR_PTR("vc not defined", procName, NULL); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreateTemplate(pixs); - pixSetAllArbitrary(pixd, colorval); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* Iterate over destination pixels */ - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - /* Compute float src pixel location corresponding to (i,j) */ - bilinearXformPt(vc, j, i, &x, &y); - linearInterpolatePixelColor(datas, wpls, w, h, x, y, colorval, - &val); - *(lined + j) = val; - } - } - - /* If rgba, transform the pixs alpha channel and insert in pixd */ - if (pixGetSpp(pixs) == 4) { - pix1 = pixGetRGBComponent(pixs, L_ALPHA_CHANNEL); - pix2 = pixBilinearGray(pix1, vc, 255); /* bring in opaque */ - pixSetRGBComponent(pixd, pix2, L_ALPHA_CHANNEL); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - return pixd; -} - - -/*! - * \brief pixBilinearPtaGray() - * - * \param[in] pixs 8 bpp - * \param[in] ptad 4 pts of final coordinate space - * \param[in] ptas 4 pts of initial coordinate space - * \param[in] grayval e.g., 0 to bring in BLACK, 255 for WHITE - * \return pixd, or NULL on error - */ -PIX * -pixBilinearPtaGray(PIX *pixs, - PTA *ptad, - PTA *ptas, - l_uint8 grayval) -{ -l_float32 *vc; -PIX *pixd; - - PROCNAME("pixBilinearPtaGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs must be 8 bpp", procName, NULL); - if (ptaGetCount(ptas) != 4) - return (PIX *)ERROR_PTR("ptas count not 4", procName, NULL); - if (ptaGetCount(ptad) != 4) - return (PIX *)ERROR_PTR("ptad count not 4", procName, NULL); - - /* Get backwards transform from dest to src, and apply it */ - getBilinearXformCoeffs(ptad, ptas, &vc); - pixd = pixBilinearGray(pixs, vc, grayval); - LEPT_FREE(vc); - - return pixd; -} - - -/*! - * \brief pixBilinearGray() - * - * \param[in] pixs 8 bpp - * \param[in] vc vector of 8 coefficients for bilinear transformation - * \param[in] grayval e.g., 0 to bring in BLACK, 255 for WHITE - * \return pixd, or NULL on error - */ -PIX * -pixBilinearGray(PIX *pixs, - l_float32 *vc, - l_uint8 grayval) -{ -l_int32 i, j, w, h, wpls, wpld, val; -l_uint32 *datas, *datad, *lined; -l_float32 x, y; -PIX *pixd; - - PROCNAME("pixBilinearGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs must be 8 bpp", procName, NULL); - if (!vc) - return (PIX *)ERROR_PTR("vc not defined", procName, NULL); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreateTemplate(pixs); - pixSetAllArbitrary(pixd, grayval); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* Iterate over destination pixels */ - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - /* Compute float src pixel location corresponding to (i,j) */ - bilinearXformPt(vc, j, i, &x, &y); - linearInterpolatePixelGray(datas, wpls, w, h, x, y, grayval, &val); - SET_DATA_BYTE(lined, j, val); - } - } - - return pixd; -} - - -/*-------------------------------------------------------------------------* - * Bilinear transform including alpha (blend) component * - *-------------------------------------------------------------------------*/ -/*! - * \brief pixBilinearPtaWithAlpha() - * - * \param[in] pixs 32 bpp rgb - * \param[in] ptad 4 pts of final coordinate space - * \param[in] ptas 4 pts of initial coordinate space - * \param[in] pixg [optional] 8 bpp, can be null - * \param[in] fract between 0.0 and 1.0, with 0.0 fully transparent - * and 1.0 fully opaque - * \param[in] border of pixels added to capture transformed source pixels - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The alpha channel is transformed separately from pixs,
- *          and aligns with it, being fully transparent outside the
- *          boundary of the transformed pixs.  For pixels that are fully
- *          transparent, a blending function like pixBlendWithGrayMask()
- *          will give zero weight to corresponding pixels in pixs.
- *      (2) If %pixg is NULL, it is generated as an alpha layer that is
- *          partially opaque, using %fract.  Otherwise, it is cropped
- *          to %pixs if required and %fract is ignored.  The alpha channel
- *          in %pixs is never used.
- *      (3) Colormaps are removed.
- *      (4) When pixs is transformed, it doesn't matter what color is brought
- *          in because the alpha channel will be transparent (0) there.
- *      (5) To avoid losing source pixels in the destination, it may be
- *          necessary to add a border to the source pix before doing
- *          the bilinear transformation.  This can be any non-negative number.
- *      (6) The input %ptad and %ptas are in a coordinate space before
- *          the border is added.  Internally, we compensate for this
- *          before doing the bilinear transform on the image after
- *          the border is added.
- *      (7) The default setting for the border values in the alpha channel
- *          is 0 (transparent) for the outermost ring of pixels and
- *          (0.5 * fract * 255) for the second ring.  When blended over
- *          a second image, this
- *          (a) shrinks the visible image to make a clean overlap edge
- *              with an image below, and
- *          (b) softens the edges by weakening the aliasing there.
- *          Use l_setAlphaMaskBorder() to change these values.
- * 
- */ -PIX * -pixBilinearPtaWithAlpha(PIX *pixs, - PTA *ptad, - PTA *ptas, - PIX *pixg, - l_float32 fract, - l_int32 border) -{ -l_int32 ws, hs, d; -PIX *pixd, *pixb1, *pixb2, *pixg2, *pixga; -PTA *ptad2, *ptas2; - - PROCNAME("pixBilinearPtaWithAlpha"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &ws, &hs, &d); - if (d != 32 && pixGetColormap(pixs) == NULL) - return (PIX *)ERROR_PTR("pixs not cmapped or 32 bpp", procName, NULL); - if (pixg && pixGetDepth(pixg) != 8) { - L_WARNING("pixg not 8 bpp; using 'fract' transparent alpha\n", - procName); - pixg = NULL; - } - if (!pixg && (fract < 0.0 || fract > 1.0)) { - L_WARNING("invalid fract; using 1.0 (fully transparent)\n", procName); - fract = 1.0; - } - if (!pixg && fract == 0.0) - L_WARNING("fully opaque alpha; image cannot be blended\n", procName); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - - /* Add border; the color doesn't matter */ - pixb1 = pixAddBorder(pixs, border, 0); - - /* Transform the ptr arrays to work on the bordered image */ - ptad2 = ptaTransform(ptad, border, border, 1.0, 1.0); - ptas2 = ptaTransform(ptas, border, border, 1.0, 1.0); - - /* Do separate bilinear transform of rgb channels of pixs and of pixg */ - pixd = pixBilinearPtaColor(pixb1, ptad2, ptas2, 0); - if (!pixg) { - pixg2 = pixCreate(ws, hs, 8); - if (fract == 1.0) - pixSetAll(pixg2); - else - pixSetAllArbitrary(pixg2, (l_int32)(255.0 * fract)); - } else { - pixg2 = pixResizeToMatch(pixg, NULL, ws, hs); - } - if (ws > 10 && hs > 10) { /* see note 7 */ - pixSetBorderRingVal(pixg2, 1, - (l_int32)(255.0 * fract * AlphaMaskBorderVals[0])); - pixSetBorderRingVal(pixg2, 2, - (l_int32)(255.0 * fract * AlphaMaskBorderVals[1])); - - } - pixb2 = pixAddBorder(pixg2, border, 0); /* must be black border */ - pixga = pixBilinearPtaGray(pixb2, ptad2, ptas2, 0); - pixSetRGBComponent(pixd, pixga, L_ALPHA_CHANNEL); - pixSetSpp(pixd, 4); - - pixDestroy(&pixg2); - pixDestroy(&pixb1); - pixDestroy(&pixb2); - pixDestroy(&pixga); - ptaDestroy(&ptad2); - ptaDestroy(&ptas2); - return pixd; -} - - -/*-------------------------------------------------------------* - * Bilinear coordinate transformation * - *-------------------------------------------------------------*/ -/*! - * \brief getBilinearXformCoeffs() - * - * \param[in] ptas source 4 points; unprimed - * \param[in] ptad transformed 4 points; primed - * \param[out] pvc vector of coefficients of transform - * \return 0 if OK; 1 on error - * - *
- * We have a set of 8 equations, describing the bilinear
- * transformation that takes 4 points ptas into 4 other
- * points ptad.  These equations are:
- *
- *          x1' = c[0]*x1 + c[1]*y1 + c[2]*x1*y1 + c[3]
- *          y1' = c[4]*x1 + c[5]*y1 + c[6]*x1*y1 + c[7]
- *          x2' = c[0]*x2 + c[1]*y2 + c[2]*x2*y2 + c[3]
- *          y2' = c[4]*x2 + c[5]*y2 + c[6]*x2*y2 + c[7]
- *          x3' = c[0]*x3 + c[1]*y3 + c[2]*x3*y3 + c[3]
- *          y3' = c[4]*x3 + c[5]*y3 + c[6]*x3*y3 + c[7]
- *          x4' = c[0]*x4 + c[1]*y4 + c[2]*x4*y4 + c[3]
- *          y4' = c[4]*x4 + c[5]*y4 + c[6]*x4*y4 + c[7]
- *
- * This can be represented as
- *
- *           AC = B
- *
- * where B and C are column vectors
- *
- *         B = [ x1' y1' x2' y2' x3' y3' x4' y4' ]
- *         C = [ c[0] c[1] c[2] c[3] c[4] c[5] c[6] c[7] ]
- *
- * and A is the 8x8 matrix
- *
- *             x1   y1   x1*y1   1   0    0      0     0
- *              0    0     0     0   x1   y1   x1*y1   1
- *             x2   y2   x2*y2   1   0    0      0     0
- *              0    0     0     0   x2   y2   x2*y2   1
- *             x3   y3   x3*y3   1   0    0      0     0
- *              0    0     0     0   x3   y3   x3*y3   1
- *             x4   y4   x4*y4   1   0    0      0     0
- *              0    0     0     0   x4   y4   x4*y4   1
- *
- * These eight equations are solved here for the coefficients C.
- *
- * These eight coefficients can then be used to find the mapping
- * x,y) --> (x',y':
- *
- *           x' = c[0]x + c[1]y + c[2]xy + c[3]
- *           y' = c[4]x + c[5]y + c[6]xy + c[7]
- *
- * that are implemented in bilinearXformSampledPt and
- * bilinearXFormPt.
- * 
- */ -l_ok -getBilinearXformCoeffs(PTA *ptas, - PTA *ptad, - l_float32 **pvc) -{ -l_int32 i; -l_float32 x1, y1, x2, y2, x3, y3, x4, y4; -l_float32 *b; /* rhs vector of primed coords X'; coeffs returned in *pvc */ -l_float32 *a[8]; /* 8x8 matrix A */ - - PROCNAME("getBilinearXformCoeffs"); - - if (!ptas) - return ERROR_INT("ptas not defined", procName, 1); - if (!ptad) - return ERROR_INT("ptad not defined", procName, 1); - if (!pvc) - return ERROR_INT("&vc not defined", procName, 1); - - b = (l_float32 *)LEPT_CALLOC(8, sizeof(l_float32)); - *pvc = b; - ptaGetPt(ptas, 0, &x1, &y1); - ptaGetPt(ptas, 1, &x2, &y2); - ptaGetPt(ptas, 2, &x3, &y3); - ptaGetPt(ptas, 3, &x4, &y4); - ptaGetPt(ptad, 0, &b[0], &b[1]); - ptaGetPt(ptad, 1, &b[2], &b[3]); - ptaGetPt(ptad, 2, &b[4], &b[5]); - ptaGetPt(ptad, 3, &b[6], &b[7]); - - for (i = 0; i < 8; i++) - a[i] = (l_float32 *)LEPT_CALLOC(8, sizeof(l_float32)); - a[0][0] = x1; - a[0][1] = y1; - a[0][2] = x1 * y1; - a[0][3] = 1.; - a[1][4] = x1; - a[1][5] = y1; - a[1][6] = x1 * y1; - a[1][7] = 1.; - a[2][0] = x2; - a[2][1] = y2; - a[2][2] = x2 * y2; - a[2][3] = 1.; - a[3][4] = x2; - a[3][5] = y2; - a[3][6] = x2 * y2; - a[3][7] = 1.; - a[4][0] = x3; - a[4][1] = y3; - a[4][2] = x3 * y3; - a[4][3] = 1.; - a[5][4] = x3; - a[5][5] = y3; - a[5][6] = x3 * y3; - a[5][7] = 1.; - a[6][0] = x4; - a[6][1] = y4; - a[6][2] = x4 * y4; - a[6][3] = 1.; - a[7][4] = x4; - a[7][5] = y4; - a[7][6] = x4 * y4; - a[7][7] = 1.; - - gaussjordan(a, b, 8); - - for (i = 0; i < 8; i++) - LEPT_FREE(a[i]); - return 0; -} - - -/*! - * \brief bilinearXformSampledPt() - * - * \param[in] vc vector of 8 coefficients - * \param[in] x, y initial point - * \param[out] pxp, pyp transformed point - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This finds the nearest pixel coordinates of the transformed point.
- *      (2) It does not check ptrs for returned data!
- * 
- */ -l_ok -bilinearXformSampledPt(l_float32 *vc, - l_int32 x, - l_int32 y, - l_int32 *pxp, - l_int32 *pyp) -{ - - PROCNAME("bilinearXformSampledPt"); - - if (!vc) - return ERROR_INT("vc not defined", procName, 1); - - *pxp = (l_int32)(vc[0] * x + vc[1] * y + vc[2] * x * y + vc[3] + 0.5); - *pyp = (l_int32)(vc[4] * x + vc[5] * y + vc[6] * x * y + vc[7] + 0.5); - return 0; -} - - -/*! - * \brief bilinearXformPt() - * - * \param[in] vc vector of 8 coefficients - * \param[in] x, y initial point - * \param[out] pxp, pyp transformed point - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This computes the floating point location of the transformed point.
- *      (2) It does not check ptrs for returned data!
- * 
- */ -l_ok -bilinearXformPt(l_float32 *vc, - l_int32 x, - l_int32 y, - l_float32 *pxp, - l_float32 *pyp) -{ - PROCNAME("bilinearXformPt"); - - if (!vc) - return ERROR_INT("vc not defined", procName, 1); - - *pxp = vc[0] * x + vc[1] * y + vc[2] * x * y + vc[3]; - *pyp = vc[4] * x + vc[5] * y + vc[6] * x * y + vc[7]; - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/binarize.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/binarize.c deleted file mode 100644 index c21c1634..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/binarize.c +++ /dev/null @@ -1,1103 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file binarize.c - *
- *
- *  ===================================================================
- *  Image binarization algorithms are found in:
- *    grayquant.c:   standard, simple, general grayscale quantization
- *    adaptmap.c:    local adaptive; mostly gray-to-gray in preparation
- *                   for binarization
- *    binarize.c:    special binarization methods, locally adaptive and
- *                   global.
- *  ===================================================================
- *
- *      Adaptive Otsu-based thresholding
- *          l_int32       pixOtsuAdaptiveThreshold()       8 bpp
- *
- *      Otsu thresholding on adaptive background normalization
- *          PIX          *pixOtsuThreshOnBackgroundNorm()  8 bpp
- *
- *      Masking and Otsu estimate on adaptive background normalization
- *          PIX          *pixMaskedThreshOnBackgroundNorm()  8 bpp
- *
- *      Sauvola local thresholding
- *          l_int32       pixSauvolaBinarizeTiled()
- *          l_int32       pixSauvolaBinarize()
- *          static PIX   *pixSauvolaGetThreshold()
- *          static PIX   *pixApplyLocalThreshold();
- *
- *      Global thresholding using connected components
- *          PIX          *pixThresholdByConnComp()
- *
- *      Global thresholding by histogram
- *          PIX          *pixThresholdByHisto()
- *
- *  Notes:
- *      (1) pixOtsuAdaptiveThreshold() computes a global threshold over each
- *          tile and performs the threshold operation, resulting in a
- *          binary image for each tile.  These are stitched into the
- *          final result.
- *      (2) pixOtsuThreshOnBackgroundNorm() and
- *          pixMaskedThreshOnBackgroundNorm() are binarization functions
- *          that use background normalization with other techniques.
- *      (3) Sauvola binarization computes a local threshold based on
- *          the local average and square average.  It takes two constants:
- *          the window size for the measurement at each pixel and a
- *          parameter that determines the amount of normalized local
- *          standard deviation to subtract from the local average value.
- *      (4) pixThresholdByConnComp() uses the numbers of 4 and 8 connected
- *          components at different thresholding to determine if a
- *          global threshold can be used (for text or line-art) and the
- *          value it should have.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static PIX *pixSauvolaGetThreshold(PIX *pixm, PIX *pixms, l_float32 factor, - PIX **ppixsd); -static PIX *pixApplyLocalThreshold(PIX *pixs, PIX *pixth); - -/*------------------------------------------------------------------* - * Adaptive Otsu-based thresholding * - *------------------------------------------------------------------*/ -/*! - * \brief pixOtsuAdaptiveThreshold() - * - * \param[in] pixs 8 bpp - * \param[in] sx, sy desired tile dimensions; actual size may vary - * \param[in] smoothx, smoothy half-width of convolution kernel applied to - * threshold array: use 0 for no smoothing - * \param[in] scorefract fraction of the max Otsu score; typ. 0.1; - * use 0.0 for standard Otsu - * \param[out] ppixth [optional] array of threshold values - * found for each tile - * \param[out] ppixd [optional] thresholded input pixs, - * based on the threshold array - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The Otsu method finds a single global threshold for an image.
- *          This function allows a locally adapted threshold to be
- *          found for each tile into which the image is broken up.
- *      (2) The array of threshold values, one for each tile, constitutes
- *          a highly downscaled image.  This array is optionally
- *          smoothed using a convolution.  The full width and height of the
- *          convolution kernel are (2 * %smoothx + 1) and (2 * %smoothy + 1).
- *      (3) The minimum tile dimension allowed is 16.  If such small
- *          tiles are used, it is recommended to use smoothing, because
- *          without smoothing, each small tile determines the splitting
- *          threshold independently.  A tile that is entirely in the
- *          image bg will then hallucinate fg, resulting in a very noisy
- *          binarization.  The smoothing should be large enough that no
- *          tile is only influenced by one type (fg or bg) of pixels,
- *          because it will force a split of its pixels.
- *      (4) To get a single global threshold for the entire image, use
- *          input values of %sx and %sy that are larger than the image.
- *          For this situation, the smoothing parameters are ignored.
- *      (5) The threshold values partition the image pixels into two classes:
- *          one whose values are less than the threshold and another
- *          whose values are greater than or equal to the threshold.
- *          This is the same use of 'threshold' as in pixThresholdToBinary().
- *      (6) The scorefract is the fraction of the maximum Otsu score, which
- *          is used to determine the range over which the histogram minimum
- *          is searched.  See numaSplitDistribution() for details on the
- *          underlying method of choosing a threshold.
- *      (7) This uses enables a modified version of the Otsu criterion for
- *          splitting the distribution of pixels in each tile into a
- *          fg and bg part.  The modification consists of searching for
- *          a minimum in the histogram over a range of pixel values where
- *          the Otsu score is within a defined fraction, %scorefract,
- *          of the max score.  To get the original Otsu algorithm, set
- *          %scorefract == 0.
- *      (8) N.B. This method is NOT recommended for images with weak text
- *          and significant background noise, such as bleedthrough, because
- *          of the problem noted in (3) above for tiling.  Use Sauvola.
- * 
- */ -l_ok -pixOtsuAdaptiveThreshold(PIX *pixs, - l_int32 sx, - l_int32 sy, - l_int32 smoothx, - l_int32 smoothy, - l_float32 scorefract, - PIX **ppixth, - PIX **ppixd) -{ -l_int32 w, h, nx, ny, i, j, thresh; -l_uint32 val; -PIX *pixt, *pixb, *pixthresh, *pixth, *pixd; -PIXTILING *pt; - - PROCNAME("pixOtsuAdaptiveThreshold"); - - if (!ppixth && !ppixd) - return ERROR_INT("neither &pixth nor &pixd defined", procName, 1); - if (ppixth) *ppixth = NULL; - if (ppixd) *ppixd = NULL; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (sx < 16 || sy < 16) - return ERROR_INT("sx and sy must be >= 16", procName, 1); - - /* Compute the threshold array for the tiles */ - pixGetDimensions(pixs, &w, &h, NULL); - nx = L_MAX(1, w / sx); - ny = L_MAX(1, h / sy); - smoothx = L_MIN(smoothx, (nx - 1) / 2); - smoothy = L_MIN(smoothy, (ny - 1) / 2); - pt = pixTilingCreate(pixs, nx, ny, 0, 0, 0, 0); - pixthresh = pixCreate(nx, ny, 8); - for (i = 0; i < ny; i++) { - for (j = 0; j < nx; j++) { - pixt = pixTilingGetTile(pt, i, j); - pixSplitDistributionFgBg(pixt, scorefract, 1, &thresh, - NULL, NULL, NULL); - pixSetPixel(pixthresh, j, i, thresh); /* see note (4) */ - pixDestroy(&pixt); - } - } - - /* Optionally smooth the threshold array */ - if (smoothx > 0 || smoothy > 0) - pixth = pixBlockconv(pixthresh, smoothx, smoothy); - else - pixth = pixClone(pixthresh); - pixDestroy(&pixthresh); - - /* Optionally apply the threshold array to binarize pixs */ - if (ppixd) { - pixd = pixCreate(w, h, 1); - pixCopyResolution(pixd, pixs); - for (i = 0; i < ny; i++) { - for (j = 0; j < nx; j++) { - pixt = pixTilingGetTile(pt, i, j); - pixGetPixel(pixth, j, i, &val); - pixb = pixThresholdToBinary(pixt, val); - pixTilingPaintTile(pixd, i, j, pixb, pt); - pixDestroy(&pixt); - pixDestroy(&pixb); - } - } - *ppixd = pixd; - } - - if (ppixth) - *ppixth = pixth; - else - pixDestroy(&pixth); - - pixTilingDestroy(&pt); - return 0; -} - - -/*------------------------------------------------------------------* - * Otsu thresholding on adaptive background normalization * - *------------------------------------------------------------------*/ -/*! - * \brief pixOtsuThreshOnBackgroundNorm() - * - * \param[in] pixs 8 bpp grayscale; not colormapped - * \param[in] pixim [optional] 1 bpp 'image' mask; can be null - * \param[in] sx, sy tile size in pixels - * \param[in] thresh threshold for determining foreground - * \param[in] mincount min threshold on counts in a tile - * \param[in] bgval target bg val; typ. > 128 - * \param[in] smoothx half-width of block convolution kernel width - * \param[in] smoothy half-width of block convolution kernel height - * \param[in] scorefract fraction of the max Otsu score; typ. 0.1 - * \param[out] pthresh [optional] threshold value that was - * used on the normalized image - * \return pixd 1 bpp thresholded image, or NULL on error - * - *
- * Notes:
- *      (1) This does background normalization followed by Otsu
- *          thresholding.  Otsu binarization attempts to split the
- *          image into two roughly equal sets of pixels, and it does
- *          a very poor job when there are large amounts of dark
- *          background.  By doing a background normalization first,
- *          to get the background near 255, we remove this problem.
- *          Then we use a modified Otsu to estimate the best global
- *          threshold on the normalized image.
- *      (2) See pixBackgroundNorm() for meaning and typical values
- *          of input parameters.  For a start, you can try:
- *            sx, sy = 10, 15
- *            thresh = 100
- *            mincount = 50
- *            bgval = 255
- *            smoothx, smoothy = 2
- * 
- */ -PIX * -pixOtsuThreshOnBackgroundNorm(PIX *pixs, - PIX *pixim, - l_int32 sx, - l_int32 sy, - l_int32 thresh, - l_int32 mincount, - l_int32 bgval, - l_int32 smoothx, - l_int32 smoothy, - l_float32 scorefract, - l_int32 *pthresh) -{ -l_int32 w, h; -l_uint32 val; -PIX *pixn, *pixt, *pixd; - - PROCNAME("pixOtsuThreshOnBackgroundNorm"); - - if (pthresh) *pthresh = 0; - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL); - if (sx < 4 || sy < 4) - return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL); - if (mincount > sx * sy) { - L_WARNING("mincount too large for tile size\n", procName); - mincount = (sx * sy) / 3; - } - - pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh, - mincount, bgval, smoothx, smoothy); - if (!pixn) - return (PIX *)ERROR_PTR("pixn not made", procName, NULL); - - /* Just use 1 tile for a global threshold, which is stored - * as a single pixel in pixt. */ - pixGetDimensions(pixn, &w, &h, NULL); - pixOtsuAdaptiveThreshold(pixn, w, h, 0, 0, scorefract, &pixt, &pixd); - pixDestroy(&pixn); - - if (pixt && pthresh) { - pixGetPixel(pixt, 0, 0, &val); - *pthresh = val; - } - pixDestroy(&pixt); - - if (!pixd) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - else - return pixd; -} - - - -/*----------------------------------------------------------------------* - * Masking and Otsu estimate on adaptive background normalization * - *----------------------------------------------------------------------*/ -/*! - * \brief pixMaskedThreshOnBackgroundNorm() - * - * \param[in] pixs 8 bpp grayscale; not colormapped - * \param[in] pixim [optional] 1 bpp 'image' mask; can be null - * \param[in] sx, sy tile size in pixels - * \param[in] thresh threshold for determining foreground - * \param[in] mincount min threshold on counts in a tile - * \param[in] smoothx half-width of block convolution kernel width - * \param[in] smoothy half-width of block convolution kernel height - * \param[in] scorefract fraction of the max Otsu score; typ. ~ 0.1 - * \param[out] pthresh [optional] threshold value that was - * used on the normalized image - * \return pixd 1 bpp thresholded image, or NULL on error - * - *
- * Notes:
- *      (1) This begins with a standard background normalization.
- *          Additionally, there is a flexible background norm, that
- *          will adapt to a rapidly varying background, and this
- *          puts white pixels in the background near regions with
- *          significant foreground.  The white pixels are turned into
- *          a 1 bpp selection mask by binarization followed by dilation.
- *          Otsu thresholding is performed on the input image to get an
- *          estimate of the threshold in the non-mask regions.
- *          The background normalized image is thresholded with two
- *          different values, and the result is combined using
- *          the selection mask.
- *      (2) Note that the numbers 255 (for bgval target) and 190 (for
- *          thresholding on pixn) are tied together, and explicitly
- *          defined in this function.
- *      (3) See pixBackgroundNorm() for meaning and typical values
- *          of input parameters.  For a start, you can try:
- *            sx, sy = 10, 15
- *            thresh = 100
- *            mincount = 50
- *            smoothx, smoothy = 2
- * 
- */ -PIX * -pixMaskedThreshOnBackgroundNorm(PIX *pixs, - PIX *pixim, - l_int32 sx, - l_int32 sy, - l_int32 thresh, - l_int32 mincount, - l_int32 smoothx, - l_int32 smoothy, - l_float32 scorefract, - l_int32 *pthresh) -{ -l_int32 w, h, highthresh; -l_uint32 val; -PIX *pixn, *pixm, *pixd, *pix1, *pix2, *pix3, *pix4; - - PROCNAME("pixMaskedThreshOnBackgroundNorm"); - - if (pthresh) *pthresh = 0; - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL); - if (sx < 4 || sy < 4) - return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL); - if (mincount > sx * sy) { - L_WARNING("mincount too large for tile size\n", procName); - mincount = (sx * sy) / 3; - } - - /* Standard background normalization */ - pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh, - mincount, 255, smoothx, smoothy); - if (!pixn) - return (PIX *)ERROR_PTR("pixn not made", procName, NULL); - - /* Special background normalization for adaptation to quickly - * varying background. Threshold on the very light parts, - * which tend to be near significant edges, and dilate to - * form a mask over regions that are typically text. The - * dilation size is chosen to cover the text completely, - * except for very thick fonts. */ - pix1 = pixBackgroundNormFlex(pixs, 7, 7, 1, 1, 20); - pix2 = pixThresholdToBinary(pix1, 240); - pixInvert(pix2, pix2); - pixm = pixMorphSequence(pix2, "d21.21", 0); - pixDestroy(&pix1); - pixDestroy(&pix2); - - /* Use Otsu to get a global threshold estimate for the image, - * which is stored as a single pixel in pix3. */ - pixGetDimensions(pixs, &w, &h, NULL); - pixOtsuAdaptiveThreshold(pixs, w, h, 0, 0, scorefract, &pix3, NULL); - pixGetPixel(pix3, 0, 0, &val); - if (pthresh) *pthresh = val; - pixDestroy(&pix3); - - /* Threshold the background normalized images differentially, - * using a high value correlated with the background normalization - * for the part of the image under the mask (i.e., near the - * darker, thicker foreground), and a value that depends on the Otsu - * threshold for the rest of the image. This gives a solid - * (high) thresholding for the foreground parts of the image, - * while allowing the background and light foreground to be - * reasonably well cleaned using a threshold adapted to the - * input image. */ - highthresh = L_MIN(256, val + 30); - pixd = pixThresholdToBinary(pixn, highthresh); /* for bg and light fg */ - pix4 = pixThresholdToBinary(pixn, 190); /* for heavier fg */ - pixCombineMasked(pixd, pix4, pixm); - pixDestroy(&pix4); - pixDestroy(&pixm); - pixDestroy(&pixn); - - if (!pixd) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - else - return pixd; -} - - -/*----------------------------------------------------------------------* - * Sauvola binarization * - *----------------------------------------------------------------------*/ -/*! - * \brief pixSauvolaBinarizeTiled() - * - * \param[in] pixs 8 bpp grayscale, not colormapped - * \param[in] whsize window half-width for measuring local statistics - * \param[in] factor factor for reducing threshold due to variance; >= 0 - * \param[in] nx, ny subdivision into tiles; >= 1 - * \param[out] ppixth [optional] Sauvola threshold values - * \param[out] ppixd [optional] thresholded image - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The window width and height are 2 * %whsize + 1.  The minimum
- *          value for %whsize is 2; typically it is >= 7..
- *      (2) For nx == ny == 1, this defaults to pixSauvolaBinarize().
- *      (3) Why a tiled version?
- *          (a) Because the mean value accumulator is a uint32, overflow
- *              can occur for an image with more than 16M pixels.
- *          (b) The mean value accumulator array for 16M pixels is 64 MB.
- *              The mean square accumulator array for 16M pixels is 128 MB.
- *              Using tiles reduces the size of these arrays.
- *          (c) Each tile can be processed independently, in parallel,
- *              on a multicore processor.
- *      (4) The Sauvola threshold is determined from the formula:
- *              t = m * (1 - k * (1 - s / 128))
- *          See pixSauvolaBinarize() for details.
- * 
- */ -l_ok -pixSauvolaBinarizeTiled(PIX *pixs, - l_int32 whsize, - l_float32 factor, - l_int32 nx, - l_int32 ny, - PIX **ppixth, - PIX **ppixd) -{ -l_int32 i, j, w, h, xrat, yrat; -PIX *pixth, *pixd, *tileth, *tiled, *pixt; -PIX **ptileth, **ptiled; -PIXTILING *pt; - - PROCNAME("pixSauvolaBinarizeTiled"); - - if (!ppixth && !ppixd) - return ERROR_INT("no outputs", procName, 1); - if (ppixth) *ppixth = NULL; - if (ppixd) *ppixd = NULL; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs undefined or not 8 bpp", procName, 1); - if (pixGetColormap(pixs)) - return ERROR_INT("pixs is cmapped", procName, 1); - pixGetDimensions(pixs, &w, &h, NULL); - if (whsize < 2) - return ERROR_INT("whsize must be >= 2", procName, 1); - if (w < 2 * whsize + 3 || h < 2 * whsize + 3) - return ERROR_INT("whsize too large for image", procName, 1); - if (factor < 0.0) - return ERROR_INT("factor must be >= 0", procName, 1); - - if (nx <= 1 && ny <= 1) - return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL, - ppixth, ppixd); - - /* Test to see if the tiles are too small. The required - * condition is that the tile dimensions must be at least - * (whsize + 2) x (whsize + 2). */ - xrat = w / nx; - yrat = h / ny; - if (xrat < whsize + 2) { - nx = w / (whsize + 2); - L_WARNING("tile width too small; nx reduced to %d\n", procName, nx); - } - if (yrat < whsize + 2) { - ny = h / (whsize + 2); - L_WARNING("tile height too small; ny reduced to %d\n", procName, ny); - } - if (nx <= 1 && ny <= 1) - return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL, - ppixth, ppixd); - - /* We can use pixtiling for painting both outputs, if requested */ - if (ppixth) { - pixth = pixCreateNoInit(w, h, 8); - *ppixth = pixth; - } - if (ppixd) { - pixd = pixCreateNoInit(w, h, 1); - *ppixd = pixd; - } - pt = pixTilingCreate(pixs, nx, ny, 0, 0, whsize + 1, whsize + 1); - pixTilingNoStripOnPaint(pt); /* pixSauvolaBinarize() does the stripping */ - - for (i = 0; i < ny; i++) { - for (j = 0; j < nx; j++) { - pixt = pixTilingGetTile(pt, i, j); - ptileth = (ppixth) ? &tileth : NULL; - ptiled = (ppixd) ? &tiled : NULL; - pixSauvolaBinarize(pixt, whsize, factor, 0, NULL, NULL, - ptileth, ptiled); - if (ppixth) { /* do not strip */ - pixTilingPaintTile(pixth, i, j, tileth, pt); - pixDestroy(&tileth); - } - if (ppixd) { - pixTilingPaintTile(pixd, i, j, tiled, pt); - pixDestroy(&tiled); - } - pixDestroy(&pixt); - } - } - - pixTilingDestroy(&pt); - return 0; -} - - -/*! - * \brief pixSauvolaBinarize() - * - * \param[in] pixs 8 bpp grayscale; not colormapped - * \param[in] whsize window half-width for measuring local statistics - * \param[in] factor factor for reducing threshold due to variance; >= 0 - * \param[in] addborder 1 to add border of width (%whsize + 1) on all sides - * \param[out] ppixm [optional] local mean values - * \param[out] ppixsd [optional] local standard deviation values - * \param[out] ppixth [optional] threshold values - * \param[out] ppixd [optional] thresholded image - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The window width and height are 2 * %whsize + 1.  The minimum
- *          value for %whsize is 2; typically it is >= 7..
- *      (2) The local statistics, measured over the window, are the
- *          average and standard deviation.
- *      (3) The measurements of the mean and standard deviation are
- *          performed inside a border of (%whsize + 1) pixels.  If pixs does
- *          not have these added border pixels, use %addborder = 1 to add
- *          it here; otherwise use %addborder = 0.
- *      (4) The Sauvola threshold is determined from the formula:
- *            t = m * (1 - k * (1 - s / 128))
- *          where:
- *            t = local threshold
- *            m = local mean
- *            k = %factor (>= 0)   [ typ. 0.35 ]
- *            s = local standard deviation, which is maximized at
- *                127.5 when half the samples are 0 and half are 255.
- *      (5) The basic idea of Niblack and Sauvola binarization is that
- *          the local threshold should be less than the median value,
- *          and the larger the variance, the closer to the median
- *          it should be chosen.  Typical values for k are between
- *          0.2 and 0.5.
- * 
- */ -l_ok -pixSauvolaBinarize(PIX *pixs, - l_int32 whsize, - l_float32 factor, - l_int32 addborder, - PIX **ppixm, - PIX **ppixsd, - PIX **ppixth, - PIX **ppixd) -{ -l_int32 w, h; -PIX *pixg, *pixsc, *pixm, *pixms, *pixth, *pixd; - - PROCNAME("pixSauvolaBinarize"); - - if (ppixm) *ppixm = NULL; - if (ppixsd) *ppixsd = NULL; - if (ppixth) *ppixth = NULL; - if (ppixd) *ppixd = NULL; - if (!ppixm && !ppixsd && !ppixth && !ppixd) - return ERROR_INT("no outputs", procName, 1); - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs undefined or not 8 bpp", procName, 1); - if (pixGetColormap(pixs)) - return ERROR_INT("pixs is cmapped", procName, 1); - pixGetDimensions(pixs, &w, &h, NULL); - if (whsize < 2) - return ERROR_INT("whsize must be >= 2", procName, 1); - if (w < 2 * whsize + 3 || h < 2 * whsize + 3) - return ERROR_INT("whsize too large for image", procName, 1); - if (factor < 0.0) - return ERROR_INT("factor must be >= 0", procName, 1); - - if (addborder) { - pixg = pixAddMirroredBorder(pixs, whsize + 1, whsize + 1, - whsize + 1, whsize + 1); - pixsc = pixClone(pixs); - } else { - pixg = pixClone(pixs); - pixsc = pixRemoveBorder(pixs, whsize + 1); - } - if (!pixg || !pixsc) - return ERROR_INT("pixg and pixsc not made", procName, 1); - - /* All these functions strip off the border pixels. */ - if (ppixm || ppixth || ppixd) - pixm = pixWindowedMean(pixg, whsize, whsize, 1, 1); - if (ppixsd || ppixth || ppixd) - pixms = pixWindowedMeanSquare(pixg, whsize, whsize, 1); - if (ppixth || ppixd) - pixth = pixSauvolaGetThreshold(pixm, pixms, factor, ppixsd); - if (ppixd) { - pixd = pixApplyLocalThreshold(pixsc, pixth); - pixCopyResolution(pixd, pixs); - } - - if (ppixm) - *ppixm = pixm; - else - pixDestroy(&pixm); - pixDestroy(&pixms); - if (ppixth) - *ppixth = pixth; - else - pixDestroy(&pixth); - if (ppixd) - *ppixd = pixd; - pixDestroy(&pixg); - pixDestroy(&pixsc); - return 0; -} - - -/*! - * \brief pixSauvolaGetThreshold() - * - * \param[in] pixm 8 bpp grayscale; not colormapped - * \param[in] pixms 32 bpp - * \param[in] factor factor for reducing threshold due to variance; >= 0 - * \param[out] ppixsd [optional] local standard deviation - * \return pixd 8 bpp, sauvola threshold values, or NULL on error - * - *
- * Notes:
- *      (1) The Sauvola threshold is determined from the formula:
- *            t = m * (1 - k * (1 - s / 128))
- *          where:
- *            t = local threshold
- *            m = local mean
- *            k = %factor (>= 0)   [ typ. 0.35 ]
- *            s = local standard deviation, which is maximized at
- *                127.5 when half the samples are 0 and half are 255.
- *      (2) See pixSauvolaBinarize() for other details.
- *      (3) Important definitions and relations for computing averages:
- *            v == pixel value
- *            E(p) == expected value of p == average of p over some pixel set
- *            S(v) == square of v == v * v
- *            mv == E(v) == expected pixel value == mean value
- *            ms == E(S(v)) == expected square of pixel values
- *               == mean square value
- *            var == variance == expected square of deviation from mean
- *                == E(S(v - mv)) = E(S(v) - 2 * S(v * mv) + S(mv))
- *                                = E(S(v)) - S(mv)
- *                                = ms - mv * mv
- *            s == standard deviation = sqrt(var)
- *          So for evaluating the standard deviation in the Sauvola
- *          threshold, we take
- *            s = sqrt(ms - mv * mv)
- * 
- */ -static PIX * -pixSauvolaGetThreshold(PIX *pixm, - PIX *pixms, - l_float32 factor, - PIX **ppixsd) -{ -l_int32 i, j, w, h, tabsize, wplm, wplms, wplsd, wpld, usetab; -l_int32 mv, ms, var, thresh; -l_uint32 *datam, *datams, *datasd, *datad; -l_uint32 *linem, *linems, *linesd, *lined; -l_float32 sd; -l_float32 *tab; /* of 2^16 square roots */ -PIX *pixsd, *pixd; - - PROCNAME("pixSauvolaGetThreshold"); - - if (ppixsd) *ppixsd = NULL; - if (!pixm || pixGetDepth(pixm) != 8) - return (PIX *)ERROR_PTR("pixm undefined or not 8 bpp", procName, NULL); - if (pixGetColormap(pixm)) - return (PIX *)ERROR_PTR("pixm is colormapped", procName, NULL); - if (!pixms || pixGetDepth(pixms) != 32) - return (PIX *)ERROR_PTR("pixms undefined or not 32 bpp", - procName, NULL); - if (factor < 0.0) - return (PIX *)ERROR_PTR("factor must be >= 0", procName, NULL); - - /* Only make a table of 2^16 square roots if there - * are enough pixels to justify it. */ - pixGetDimensions(pixm, &w, &h, NULL); - usetab = (w * h > 100000) ? 1 : 0; - if (usetab) { - tabsize = 1 << 16; - tab = (l_float32 *)LEPT_CALLOC(tabsize, sizeof(l_float32)); - for (i = 0; i < tabsize; i++) - tab[i] = sqrtf((l_float32)i); - } - - pixd = pixCreate(w, h, 8); - if (ppixsd) { - pixsd = pixCreate(w, h, 8); - *ppixsd = pixsd; - } - datam = pixGetData(pixm); - datams = pixGetData(pixms); - if (ppixsd) datasd = pixGetData(pixsd); - datad = pixGetData(pixd); - wplm = pixGetWpl(pixm); - wplms = pixGetWpl(pixms); - if (ppixsd) wplsd = pixGetWpl(pixsd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - linem = datam + i * wplm; - linems = datams + i * wplms; - if (ppixsd) linesd = datasd + i * wplsd; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - mv = GET_DATA_BYTE(linem, j); - ms = linems[j]; - var = ms - mv * mv; - if (usetab) - sd = tab[var]; - else - sd = sqrtf((l_float32)var); - if (ppixsd) SET_DATA_BYTE(linesd, j, (l_int32)sd); - thresh = (l_int32)(mv * (1.0 - factor * (1.0 - sd / 128.))); - SET_DATA_BYTE(lined, j, thresh); - } - } - - if (usetab) LEPT_FREE(tab); - return pixd; -} - - -/*! - * \brief pixApplyLocalThreshold() - * - * \param[in] pixs 8 bpp grayscale; not colormapped - * \param[in] pixth 8 bpp array of local thresholds - * \return pixd 1 bpp, thresholded image, or NULL on error - */ -static PIX * -pixApplyLocalThreshold(PIX *pixs, - PIX *pixth) -{ -l_int32 i, j, w, h, wpls, wplt, wpld, vals, valt; -l_uint32 *datas, *datat, *datad, *lines, *linet, *lined; -PIX *pixd; - - PROCNAME("pixApplyLocalThreshold"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL); - if (!pixth || pixGetDepth(pixth) != 8) - return (PIX *)ERROR_PTR("pixth undefined or not 8 bpp", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - pixd = pixCreate(w, h, 1); - datas = pixGetData(pixs); - datat = pixGetData(pixth); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wplt = pixGetWpl(pixth); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - vals = GET_DATA_BYTE(lines, j); - valt = GET_DATA_BYTE(linet, j); - if (vals < valt) - SET_DATA_BIT(lined, j); - } - } - - return pixd; -} - - -/*----------------------------------------------------------------------* - * Global thresholding using connected components * - *----------------------------------------------------------------------*/ -/*! - * \brief pixThresholdByConnComp() - * - * \param[in] pixs depth > 1, colormap OK - * \param[in] pixm [optional] 1 bpp mask giving region to ignore - * by setting pixels to white; use NULL if no mask - * \param[in] start, end, incr binarization threshold levels to test - * \param[in] thresh48 threshold on normalized difference between the - * numbers of 4 and 8 connected components - * \param[in] threshdiff threshold on normalized difference between the - * number of 4 cc at successive iterations - * \param[out] pglobthresh [optional] best global threshold; 0 - * if no threshold is found - * \param[out] ppixd [optional] image thresholded to binary, or - * null if no threshold is found - * \param[in] debugflag 1 for plotted results - * \return 0 if OK, 1 on error or if no threshold is found - * - *
- * Notes:
- *      (1) This finds a global threshold based on connected components.
- *          Although slow, it is reasonable to use it in a situation where
- *          (a) the background in the image is relatively uniform, and
- *          (b) the result will be fed to an OCR program that accepts 1 bpp
- *              images and works best with easily segmented characters.
- *          The reason for (b) is that this selects a threshold with a
- *          minimum number of both broken characters and merged characters.
- *      (2) If the pix has color, it is converted to gray using the
- *          max component.
- *      (3) Input 0 to use default values for any of these inputs:
- *          %start, %end, %incr, %thresh48, %threshdiff.
- *      (4) This approach can be understood as follows.  When the
- *          binarization threshold is varied, the numbers of c.c. identify
- *          four regimes:
- *          (a) For low thresholds, text is broken into small pieces, and
- *              the number of c.c. is large, with the 4 c.c. significantly
- *              exceeding the 8 c.c.
- *          (b) As the threshold rises toward the optimum value, the text
- *              characters coalesce and there is very little difference
- *              between the numbers of 4 and 8 c.c, which both go
- *              through a minimum.
- *          (c) Above this, the image background gets noisy because some
- *              pixels are(thresholded to foreground, and the numbers
- *              of c.c. quickly increase, with the 4 c.c. significantly
- *              larger than the 8 c.c.
- *          (d) At even higher thresholds, the image background noise
- *              coalesces as it becomes mostly foreground, and the
- *              number of c.c. drops quickly.
- *      (5) If there is no global threshold that distinguishes foreground
- *          text from background (e.g., weak text over a background that
- *          has significant variation and/or bleedthrough), this returns 1,
- *          which the caller should check.
- * 
- */ -l_ok -pixThresholdByConnComp(PIX *pixs, - PIX *pixm, - l_int32 start, - l_int32 end, - l_int32 incr, - l_float32 thresh48, - l_float32 threshdiff, - l_int32 *pglobthresh, - PIX **ppixd, - l_int32 debugflag) -{ -l_int32 i, thresh, n, n4, n8, mincounts, found, globthresh; -l_float32 count4, count8, firstcount4, prevcount4, diff48, diff4; -GPLOT *gplot; -NUMA *na4, *na8; -PIX *pix1, *pix2, *pix3; - - PROCNAME("pixThresholdByConnComp"); - - if (pglobthresh) *pglobthresh = 0; - if (ppixd) *ppixd = NULL; - if (!pixs || pixGetDepth(pixs) == 1) - return ERROR_INT("pixs undefined or 1 bpp", procName, 1); - if (pixm && pixGetDepth(pixm) != 1) - return ERROR_INT("pixm must be 1 bpp", procName, 1); - - /* Assign default values if requested */ - if (start <= 0) start = 80; - if (end <= 0) end = 200; - if (incr <= 0) incr = 10; - if (thresh48 <= 0.0) thresh48 = 0.01; - if (threshdiff <= 0.0) threshdiff = 0.01; - if (start > end) - return ERROR_INT("invalid start,end", procName, 1); - - /* Make 8 bpp, using the max component if color. */ - if (pixGetColormap(pixs)) - pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - else - pix1 = pixClone(pixs); - if (pixGetDepth(pix1) == 32) - pix2 = pixConvertRGBToGrayMinMax(pix1, L_CHOOSE_MAX); - else - pix2 = pixConvertTo8(pix1, 0); - pixDestroy(&pix1); - - /* Mask out any non-text regions. Do this in-place, because pix2 - * can never be the same pix as pixs. */ - if (pixm) - pixSetMasked(pix2, pixm, 255); - - /* Make sure there are enough components to get a valid signal */ - pix3 = pixConvertTo1(pix2, start); - pixCountConnComp(pix3, 4, &n4); - pixDestroy(&pix3); - mincounts = 500; - if (n4 < mincounts) { - L_INFO("Insufficient component count: %d\n", procName, n4); - pixDestroy(&pix2); - return 1; - } - - /* Compute the c.c. data */ - na4 = numaCreate(0); - na8 = numaCreate(0); - numaSetParameters(na4, start, incr); - numaSetParameters(na8, start, incr); - for (thresh = start, i = 0; thresh <= end; thresh += incr, i++) { - pix3 = pixConvertTo1(pix2, thresh); - pixCountConnComp(pix3, 4, &n4); - pixCountConnComp(pix3, 8, &n8); - numaAddNumber(na4, n4); - numaAddNumber(na8, n8); - pixDestroy(&pix3); - } - if (debugflag) { - lept_mkdir("lept/binarize"); - gplot = gplotCreate("/tmp/lept/binarize", GPLOT_PNG, - "number of cc vs. threshold", - "threshold", "number of cc"); - gplotAddPlot(gplot, NULL, na4, GPLOT_LINES, "plot 4cc"); - gplotAddPlot(gplot, NULL, na8, GPLOT_LINES, "plot 8cc"); - gplotMakeOutput(gplot); - gplotDestroy(&gplot); - } - - n = numaGetCount(na4); - found = FALSE; - for (i = 0; i < n; i++) { - if (i == 0) { - numaGetFValue(na4, i, &firstcount4); - prevcount4 = firstcount4; - } else { - numaGetFValue(na4, i, &count4); - numaGetFValue(na8, i, &count8); - diff48 = (count4 - count8) / firstcount4; - diff4 = L_ABS(prevcount4 - count4) / firstcount4; - if (debugflag) { - lept_stderr("diff48 = %7.3f, diff4 = %7.3f\n", - diff48, diff4); - } - if (diff48 < thresh48 && diff4 < threshdiff) { - found = TRUE; - break; - } - prevcount4 = count4; - } - } - numaDestroy(&na4); - numaDestroy(&na8); - - if (found) { - globthresh = start + i * incr; - if (pglobthresh) *pglobthresh = globthresh; - if (ppixd) { - *ppixd = pixConvertTo1(pix2, globthresh); - pixCopyResolution(*ppixd, pixs); - } - if (debugflag) lept_stderr("global threshold = %d\n", globthresh); - pixDestroy(&pix2); - return 0; - } - - if (debugflag) lept_stderr("no global threshold found\n"); - pixDestroy(&pix2); - return 1; -} - -/*----------------------------------------------------------------------* - * Global thresholding by histogram * - *----------------------------------------------------------------------*/ -/*! - * \brief pixThresholdByHisto() - * - * \param[in] pixs gray 8 bpp, no colormap - * \param[in] factor subsampling factor >= 1 - * \param[in] halfw half of window width for smoothing; - * use 0 for default - * \param[in] delta relative amount to resolve peaks and valleys; - * in (0 ... 1], use 0 for default - * \param[out] pthresh best global threshold; 0 if no threshold is found - * \param[out] ppixd [optional] thresholded 1 bpp pix - * \param[out] ppixhisto [optional] rescaled histogram of gray values - * \return 0 if OK, 1 on error or if no threshold is found - * - *
- * Notes:
- *      (1) This finds a global threshold.  It is best for an image that
- *          has a fairly well-defined fg and bg.
- *      (2) If it finds a good threshold and %ppixd is defined, the binarized
- *          image is returned in &pixd; otherwise it return null.
- *      (3) Suggest using default values for %half and %delta.
- *      (4) Returns 0 in %pthresh if it can't find a good threshold.
- * 
- */ -l_ok -pixThresholdByHisto(PIX *pixs, - l_int32 factor, - l_int32 halfw, - l_float32 delta, - l_int32 *pthresh, - PIX **ppixd, - PIX **ppixhisto) -{ -l_int32 i, n; -l_float32 maxval, val1, val2, fract; -NUMA *na1, *na2, *na3, *naloc, *nav; -PIX *pix1; - - PROCNAME("pixThresholdByHisto"); - - if (ppixhisto) *ppixhisto = NULL; - if (ppixd) *ppixd = NULL; - if (!pthresh) - return ERROR_INT("&thresh not defined", procName, 1); - *pthresh = 0; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs undefined or not 8 bpp", procName, 1); - if (pixGetColormap(pixs)) - return ERROR_INT("pixs has colormap", procName, 1); - if (factor < 1) - return ERROR_INT("sampling must be >= 1", procName, 1); - if (halfw <= 0) halfw = 20; - if (delta <= 0.0) delta = 0.1; - - /* Make a histogram of pixel values where the largest peak - * is normalized to a value of 1.0. */ - na1 = pixGetGrayHistogram(pixs, factor); - na2 = numaWindowedMean(na1, halfw); /* smoothing */ - numaGetMax(na2, &maxval, NULL); - na3 = numaTransform(na2, 0.0, 1.0 / maxval); /* rescale to max of 1.0 */ - numaDestroy(&na1); - numaDestroy(&na2); - - numaFindLocForThreshold(na3, 0, pthresh, &fract); - L_INFO("fractional area under first peak: %5.3f\n", procName, fract); - - if (ppixhisto) { - lept_mkdir("lept/histo"); - gplotSimple1(na3, GPLOT_PNG, "/tmp/lept/histo/histo", NULL); - *ppixhisto = pixRead("/tmp/lept/histo/histo.png"); - } - numaDestroy(&na3); - - if (*pthresh > 0 && ppixd) - *ppixd = pixThresholdToBinary(pixs, *pthresh); - return 0; -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/binexpand.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/binexpand.c deleted file mode 100644 index c7a8c1a1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/binexpand.c +++ /dev/null @@ -1,306 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file binexpand.c - *
- *
- *      Replicated expansion (integer scaling)
- *         PIX     *pixExpandBinaryReplicate()
- *
- *      Special case: power of 2 replicated expansion
- *         PIX     *pixExpandBinaryPower2()
- *
- *      Expansion tables for power of 2 expansion
- *         static l_uint16    *makeExpandTab2x()
- *         static l_uint32    *makeExpandTab4x()
- *         static l_uint32    *makeExpandTab8x()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Static table functions and tables */ -static l_uint16 * makeExpandTab2x(void); -static l_uint32 * makeExpandTab4x(void); -static l_uint32 * makeExpandTab8x(void); -static l_uint32 expandtab16[] = { - 0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff}; - -/*------------------------------------------------------------------* - * Replicated expansion (integer scaling) * - *------------------------------------------------------------------*/ -/*! - * \brief pixExpandBinaryReplicate() - * - * \param[in] pixs 1 bpp - * \param[in] xfact integer scale factor for horiz. replicative expansion - * \param[in] yfact integer scale factor for vertical replicative expansion - * \return pixd scaled up, or NULL on error - */ -PIX * -pixExpandBinaryReplicate(PIX *pixs, - l_int32 xfact, - l_int32 yfact) -{ -l_int32 w, h, d, wd, hd, wpls, wpld, i, j, k, start; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixExpandBinaryReplicate"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1) - return (PIX *)ERROR_PTR("pixs not binary", procName, NULL); - if (xfact <= 0 || yfact <= 0) - return (PIX *)ERROR_PTR("invalid scale factor: <= 0", procName, NULL); - - if (xfact == yfact) { - if (xfact == 1) - return pixCopy(NULL, pixs); - if (xfact == 2 || xfact == 4 || xfact == 8 || xfact == 16) - return pixExpandBinaryPower2(pixs, xfact); - } - - wpls = pixGetWpl(pixs); - datas = pixGetData(pixs); - wd = xfact * w; - hd = yfact * h; - if ((pixd = pixCreate(wd, hd, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, (l_float32)xfact, (l_float32)yfact); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + yfact * i * wpld; - for (j = 0; j < w; j++) { /* replicate pixels on a single line */ - if (GET_DATA_BIT(lines, j)) { - start = xfact * j; - for (k = 0; k < xfact; k++) - SET_DATA_BIT(lined, start + k); - } - } - for (k = 1; k < yfact; k++) /* replicate the line */ - memcpy(lined + k * wpld, lined, 4 * wpld); - } - - return pixd; -} - - -/*------------------------------------------------------------------* - * Power of 2 expansion * - *------------------------------------------------------------------*/ -/*! - * \brief pixExpandBinaryPower2() - * - * \param[in] pixs 1 bpp - * \param[in] factor expansion factor: 1, 2, 4, 8, 16 - * \return pixd expanded 1 bpp by replication, or NULL on error - */ -PIX * -pixExpandBinaryPower2(PIX *pixs, - l_int32 factor) -{ -l_uint8 sval; -l_uint16 *tab2; -l_int32 i, j, k, w, h, d, wd, hd, wpls, wpld, sdibits, sqbits, sbytes; -l_uint32 *datas, *datad, *lines, *lined, *tab4, *tab8; -PIX *pixd; - - PROCNAME("pixExpandBinaryPower2"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1) - return (PIX *)ERROR_PTR("pixs not binary", procName, NULL); - if (factor == 1) - return pixCopy(NULL, pixs); - if (factor != 2 && factor != 4 && factor != 8 && factor != 16) - return (PIX *)ERROR_PTR("factor must be in {2,4,8,16}", procName, NULL); - - wpls = pixGetWpl(pixs); - datas = pixGetData(pixs); - wd = factor * w; - hd = factor * h; - if ((pixd = pixCreate(wd, hd, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, (l_float32)factor, (l_float32)factor); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - if (factor == 2) { - tab2 = makeExpandTab2x(); - sbytes = (w + 7) / 8; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + 2 * i * wpld; - for (j = 0; j < sbytes; j++) { - sval = GET_DATA_BYTE(lines, j); - SET_DATA_TWO_BYTES(lined, j, tab2[sval]); - } - memcpy(lined + wpld, lined, 4 * wpld); - } - LEPT_FREE(tab2); - } else if (factor == 4) { - tab4 = makeExpandTab4x(); - sbytes = (w + 7) / 8; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + 4 * i * wpld; - for (j = 0; j < sbytes; j++) { - sval = GET_DATA_BYTE(lines, j); - lined[j] = tab4[sval]; - } - for (k = 1; k < 4; k++) - memcpy(lined + k * wpld, lined, 4 * wpld); - } - LEPT_FREE(tab4); - } else if (factor == 8) { - tab8 = makeExpandTab8x(); - sqbits = (w + 3) / 4; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + 8 * i * wpld; - for (j = 0; j < sqbits; j++) { - sval = GET_DATA_QBIT(lines, j); - if (sval > 15) - L_WARNING("sval = %d; should be < 16\n", procName, sval); - lined[j] = tab8[sval]; - } - for (k = 1; k < 8; k++) - memcpy(lined + k * wpld, lined, 4 * wpld); - } - LEPT_FREE(tab8); - } else { /* factor == 16 */ - sdibits = (w + 1) / 2; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + 16 * i * wpld; - for (j = 0; j < sdibits; j++) { - sval = GET_DATA_DIBIT(lines, j); - lined[j] = expandtab16[sval]; - } - for (k = 1; k < 16; k++) - memcpy(lined + k * wpld, lined, 4 * wpld); - } - } - - return pixd; -} - - -/*-------------------------------------------------------------------* - * Expansion tables for 2x, 4x and 8x expansion * - *-------------------------------------------------------------------*/ -static l_uint16 * -makeExpandTab2x(void) -{ -l_uint16 *tab; -l_int32 i; - - tab = (l_uint16 *) LEPT_CALLOC(256, sizeof(l_uint16)); - for (i = 0; i < 256; i++) { - if (i & 0x01) - tab[i] = 0x3; - if (i & 0x02) - tab[i] |= 0xc; - if (i & 0x04) - tab[i] |= 0x30; - if (i & 0x08) - tab[i] |= 0xc0; - if (i & 0x10) - tab[i] |= 0x300; - if (i & 0x20) - tab[i] |= 0xc00; - if (i & 0x40) - tab[i] |= 0x3000; - if (i & 0x80) - tab[i] |= 0xc000; - } - return tab; -} - - -static l_uint32 * -makeExpandTab4x(void) -{ -l_uint32 *tab; -l_int32 i; - - tab = (l_uint32 *) LEPT_CALLOC(256, sizeof(l_uint32)); - for (i = 0; i < 256; i++) { - if (i & 0x01) - tab[i] = 0xf; - if (i & 0x02) - tab[i] |= 0xf0; - if (i & 0x04) - tab[i] |= 0xf00; - if (i & 0x08) - tab[i] |= 0xf000; - if (i & 0x10) - tab[i] |= 0xf0000; - if (i & 0x20) - tab[i] |= 0xf00000; - if (i & 0x40) - tab[i] |= 0xf000000; - if (i & 0x80) - tab[i] |= 0xf0000000; - } - return tab; -} - - -static l_uint32 * -makeExpandTab8x(void) -{ -l_uint32 *tab; -l_int32 i; - - tab = (l_uint32 *) LEPT_CALLOC(16, sizeof(l_uint32)); - for (i = 0; i < 16; i++) { - if (i & 0x01) - tab[i] = 0xff; - if (i & 0x02) - tab[i] |= 0xff00; - if (i & 0x04) - tab[i] |= 0xff0000; - if (i & 0x08) - tab[i] |= 0xff000000; - } - return tab; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/binreduce.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/binreduce.c deleted file mode 100644 index 84e3a7cd..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/binreduce.c +++ /dev/null @@ -1,410 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file binreduce.c - *
- *
- *      Subsampled 2x reduction
- *           PIX      *pixReduceBinary2()
- *
- *      Rank filtered 2x reductions
- *           PIX      *pixReduceRankBinaryCascade()
- *           PIX      *pixReduceRankBinary2()
- *
- *      Permutation table for 2x rank binary reduction
- *           l_uint8  *makeSubsampleTab2x(void)
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*------------------------------------------------------------------* - * Subsampled reduction * - *------------------------------------------------------------------*/ -/*! - * \brief pixReduceBinary2() - * - * \param[in] pixs - * \param[in] intab [optional]; if null, a table is made here - * and destroyed before exit - * \return pixd 2x subsampled, or NULL on error - * - *
- * Notes:
- *      (1) After folding, the data is in bytes 0 and 2 of the word,
- *          and the bits in each byte are in the following order
- *          (with 0 being the leftmost originating pair and 7 being
- *          the rightmost originating pair):
- *               0 4 1 5 2 6 3 7
- *          These need to be permuted to
- *               0 1 2 3 4 5 6 7
- *          which is done with an 8-bit table generated by makeSubsampleTab2x().
- * 
- */ -PIX * -pixReduceBinary2(PIX *pixs, - l_uint8 *intab) -{ -l_uint8 byte0, byte1; -l_uint8 *tab; -l_uint16 shortd; -l_int32 i, id, j, ws, hs, wpls, wpld, wplsi; -l_uint32 word; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixReduceBinary2"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - if (hs <= 1) - return (PIX *)ERROR_PTR("hs must be at least 2", procName, NULL); - wpls = pixGetWpl(pixs); - datas = pixGetData(pixs); - - if ((pixd = pixCreate(ws / 2, hs / 2, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, 0.5, 0.5); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - - tab = (intab) ? intab : makeSubsampleTab2x(); - if (!tab) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("tab not made", procName, NULL); - } - - /* e.g., if ws = 65: wd = 32, wpls = 3, wpld = 1 --> trouble */ - wplsi = L_MIN(wpls, 2 * wpld); /* iterate over this number of words */ - - for (i = 0, id = 0; i < hs - 1; i += 2, id++) { - lines = datas + i * wpls; - lined = datad + id * wpld; - for (j = 0; j < wplsi; j++) { - word = *(lines + j); - word = word & 0xaaaaaaaa; /* mask */ - word = word | (word << 7); /* fold; data in bytes 0 & 2 */ - byte0 = word >> 24; - byte1 = (word >> 8) & 0xff; - shortd = (tab[byte0] << 8) | tab[byte1]; - SET_DATA_TWO_BYTES(lined, j, shortd); - } - } - - if (!intab) LEPT_FREE(tab); - return pixd; -} - - -/*------------------------------------------------------------------* - * Rank filtered binary reductions * - *------------------------------------------------------------------*/ -/*! - * \brief pixReduceRankBinaryCascade() - * - * \param[in] pixs 1 bpp - * \param[in] level1 threshold, in the set {0, 1, 2, 3, 4} - * \param[in] level2 threshold, in the set {0, 1, 2, 3, 4} - * \param[in] level3 threshold, in the set {0, 1, 2, 3, 4} - * \param[in] level4 threshold, in the set {0, 1, 2, 3, 4} - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This performs up to four cascaded 2x rank reductions.
- *      (2) Use level = 0 to truncate the cascade.
- * 
- */ -PIX * -pixReduceRankBinaryCascade(PIX *pixs, - l_int32 level1, - l_int32 level2, - l_int32 level3, - l_int32 level4) -{ -PIX *pix1, *pix2, *pix3, *pix4; -l_uint8 *tab; - - PROCNAME("pixReduceRankBinaryCascade"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs must be binary", procName, NULL); - if (level1 > 4 || level2 > 4 || level3 > 4 || level4 > 4) - return (PIX *)ERROR_PTR("levels must not exceed 4", procName, NULL); - - if (level1 <= 0) { - L_WARNING("no reduction because level1 not > 0\n", procName); - return pixCopy(NULL, pixs); - } - - if ((tab = makeSubsampleTab2x()) == NULL) - return (PIX *)ERROR_PTR("tab not made", procName, NULL); - - pix1 = pixReduceRankBinary2(pixs, level1, tab); - if (level2 <= 0) { - LEPT_FREE(tab); - return pix1; - } - - pix2 = pixReduceRankBinary2(pix1, level2, tab); - pixDestroy(&pix1); - if (level3 <= 0) { - LEPT_FREE(tab); - return pix2; - } - - pix3 = pixReduceRankBinary2(pix2, level3, tab); - pixDestroy(&pix2); - if (level4 <= 0) { - LEPT_FREE(tab); - return pix3; - } - - pix4 = pixReduceRankBinary2(pix3, level4, tab); - pixDestroy(&pix3); - LEPT_FREE(tab); - return pix4; -} - - -/*! - * \brief pixReduceRankBinary2() - * - * \param[in] pixs 1 bpp - * \param[in] level rank threshold: 1, 2, 3, 4 - * \param[in] intab [optional]; if null, a table is made here - * and destroyed before exit - * \return pixd 1 bpp, 2x rank threshold reduced, or NULL on error - * - *
- * Notes:
- *      (1) pixd is downscaled by 2x from pixs.
- *      (2) The rank threshold specifies the minimum number of ON
- *          pixels in each 2x2 region of pixs that are required to
- *          set the corresponding pixel ON in pixd.
- *      (3) Rank filtering is done to the UL corner of each 2x2 pixel block,
- *          using only logical operations.  Then these pixels are chosen
- *          in the 2x subsampling process, subsampled, as described
- *          above in pixReduceBinary2().
- * 
- */ -PIX * -pixReduceRankBinary2(PIX *pixs, - l_int32 level, - l_uint8 *intab) -{ -l_uint8 byte0, byte1; -l_uint8 *tab; -l_uint16 shortd; -l_int32 i, id, j, ws, hs, wpls, wpld, wplsi; -l_uint32 word1, word2, word3, word4; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixReduceRankBinary2"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not binary", procName, NULL); - if (level < 1 || level > 4) - return (PIX *)ERROR_PTR("level must be in set {1,2,3,4}", - procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - if (hs <= 1) - return (PIX *)ERROR_PTR("hs must be at least 2", procName, NULL); - wpls = pixGetWpl(pixs); - datas = pixGetData(pixs); - - if ((pixd = pixCreate(ws / 2, hs / 2, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, 0.5, 0.5); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - - tab = (intab) ? intab : makeSubsampleTab2x(); - if (!tab) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("tab not made", procName, NULL); - } - - /* e.g., if ws = 65: wd = 32, wpls = 3, wpld = 1 --> trouble */ - wplsi = L_MIN(wpls, 2 * wpld); /* iterate over this number of words */ - - switch (level) - { - - case 1: - for (i = 0, id = 0; i < hs - 1; i += 2, id++) { - lines = datas + i * wpls; - lined = datad + id * wpld; - for (j = 0; j < wplsi; j++) { - word1 = *(lines + j); - word2 = *(lines + wpls + j); - - /* OR/OR */ - word2 = word1 | word2; - word2 = word2 | (word2 << 1); - - word2 = word2 & 0xaaaaaaaa; /* mask */ - word1 = word2 | (word2 << 7); /* fold; data in bytes 0 & 2 */ - byte0 = word1 >> 24; - byte1 = (word1 >> 8) & 0xff; - shortd = (tab[byte0] << 8) | tab[byte1]; - SET_DATA_TWO_BYTES(lined, j, shortd); - } - } - break; - - case 2: - for (i = 0, id = 0; i < hs - 1; i += 2, id++) { - lines = datas + i * wpls; - lined = datad + id * wpld; - for (j = 0; j < wplsi; j++) { - word1 = *(lines + j); - word2 = *(lines + wpls + j); - - /* (AND/OR) OR (OR/AND) */ - word3 = word1 & word2; - word3 = word3 | (word3 << 1); - word4 = word1 | word2; - word4 = word4 & (word4 << 1); - word2 = word3 | word4; - - word2 = word2 & 0xaaaaaaaa; /* mask */ - word1 = word2 | (word2 << 7); /* fold; data in bytes 0 & 2 */ - byte0 = word1 >> 24; - byte1 = (word1 >> 8) & 0xff; - shortd = (tab[byte0] << 8) | tab[byte1]; - SET_DATA_TWO_BYTES(lined, j, shortd); - } - } - break; - - case 3: - for (i = 0, id = 0; i < hs - 1; i += 2, id++) { - lines = datas + i * wpls; - lined = datad + id * wpld; - for (j = 0; j < wplsi; j++) { - word1 = *(lines + j); - word2 = *(lines + wpls + j); - - /* (AND/OR) AND (OR/AND) */ - word3 = word1 & word2; - word3 = word3 | (word3 << 1); - word4 = word1 | word2; - word4 = word4 & (word4 << 1); - word2 = word3 & word4; - - word2 = word2 & 0xaaaaaaaa; /* mask */ - word1 = word2 | (word2 << 7); /* fold; data in bytes 0 & 2 */ - byte0 = word1 >> 24; - byte1 = (word1 >> 8) & 0xff; - shortd = (tab[byte0] << 8) | tab[byte1]; - SET_DATA_TWO_BYTES(lined, j, shortd); - } - } - break; - - case 4: - for (i = 0, id = 0; i < hs - 1; i += 2, id++) { - lines = datas + i * wpls; - lined = datad + id * wpld; - for (j = 0; j < wplsi; j++) { - word1 = *(lines + j); - word2 = *(lines + wpls + j); - - /* AND/AND */ - word2 = word1 & word2; - word2 = word2 & (word2 << 1); - - word2 = word2 & 0xaaaaaaaa; /* mask */ - word1 = word2 | (word2 << 7); /* fold; data in bytes 0 & 2 */ - byte0 = word1 >> 24; - byte1 = (word1 >> 8) & 0xff; - shortd = (tab[byte0] << 8) | tab[byte1]; - SET_DATA_TWO_BYTES(lined, j, shortd); - } - } - break; - } - - if (!intab) LEPT_FREE(tab); - return pixd; -} - - -/*! - * \brief makeSubsampleTab2x() - * - * \return tab table of 256 permutations, or NULL on error - * - *
- * Notes:
- *      Permutation table for 2x rank binary reduction
- *      This table permutes the bits in a byte, from
- *          0 4 1 5 2 6 3 7
- *      to
- *          0 1 2 3 4 5 6 7
- * 
- */ -l_uint8 * -makeSubsampleTab2x(void) -{ -l_uint8 *tab; -l_int32 i; - - PROCNAME("makeSubsampleTab2x"); - - if ((tab = (l_uint8 *) LEPT_CALLOC(256, sizeof(l_uint8))) == NULL) - return (l_uint8 *)ERROR_PTR("tab not made", procName, NULL); - - for (i = 0; i < 256; i++) - tab[i] = ((i & 0x01) ) | /* 7 */ - ((i & 0x04) >> 1) | /* 6 */ - ((i & 0x10) >> 2) | /* 5 */ - ((i & 0x40) >> 3) | /* 4 */ - ((i & 0x02) << 3) | /* 3 */ - ((i & 0x08) << 2) | /* 2 */ - ((i & 0x20) << 1) | /* 1 */ - ((i & 0x80) ); /* 0 */ - - return tab; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/blend.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/blend.c deleted file mode 100644 index 1cb79c61..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/blend.c +++ /dev/null @@ -1,2295 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file blend.c - *
- *
- *      Blending two images that are not colormapped
- *           PIX             *pixBlend()
- *           PIX             *pixBlendMask()
- *           PIX             *pixBlendGray()
- *           PIX             *pixBlendGrayInverse()
- *           PIX             *pixBlendColor()
- *           PIX             *pixBlendColorByChannel()
- *           PIX             *pixBlendGrayAdapt()
- *           static l_int32   blendComponents()
- *           PIX             *pixFadeWithGray()
- *           PIX             *pixBlendHardLight()
- *           static l_int32   blendHardLightComponents()
- *
- *      Blending two colormapped images
- *           l_int32          pixBlendCmap()
- *
- *      Blending two images using a third (alpha mask)
- *           PIX             *pixBlendWithGrayMask()
- *
- *      Blending background to a specific color
- *           PIX             *pixBlendBackgroundToColor()
- *
- *      Multiplying by a specific color
- *           PIX             *pixMultiplyByColor()
- *
- *      Rendering with alpha blending over a uniform background
- *           PIX             *pixAlphaBlendUniform()
- *
- *      Adding an alpha layer for blending
- *           PIX             *pixAddAlphaToBlend()
- *
- *      Setting a transparent alpha component over a white background
- *           PIX             *pixSetAlphaOverWhite()
- *
- *      Fading from the edge
- *           l_int32          pixLinearEdgeFade()
- *
- *  In blending operations a new pix is produced where typically
- *  a subset of pixels in src1 are changed by the set of pixels
- *  in src2, when src2 is located in a given position relative
- *  to src1.  This is similar to rasterop, except that the
- *  blending operations we allow are more complex, and typically
- *  result in dest pixels that are a linear combination of two
- *  pixels, such as src1 and its inverse.  I find it convenient
- *  to think of src2 as the "blender" (the one that takes the action)
- *  and src1 as the "blendee" (the one that changes).
- *
- *  Blending works best when src1 is 8 or 32 bpp.  We also allow
- *  src1 to be colormapped, but the colormap is removed before blending,
- *  so if src1 is colormapped, we can't allow in-place blending.
- *
- *  Because src2 is typically smaller than src1, we can implement by
- *  clipping src2 to src1 and then transforming some of the dest
- *  pixels that are under the support of src2.  In practice, we
- *  do the clipping in the inner pixel loop.  For grayscale and
- *  color src2, we also allow a simple form of transparency, where
- *  pixels of a particular value in src2 are transparent; for those pixels,
- *  no blending is done.
- *
- *  The blending functions are categorized by the depth of src2,
- *  the blender, and not that of src1, the blendee.
- *
- *   ~ If src2 is 1 bpp, we can do one of three things:
- *     (1) L_BLEND_WITH_INVERSE: Blend a given fraction of src1 with its
- *         inverse color for those pixels in src2 that are fg (ON),
- *         and leave the dest pixels unchanged for pixels in src2 that
- *         are bg (OFF).
- *     (2) L_BLEND_TO_WHITE: Fade the src1 pixels toward white by a
- *         given fraction for those pixels in src2 that are fg (ON),
- *         and leave the dest pixels unchanged for pixels in src2 that
- *         are bg (OFF).
- *     (3) L_BLEND_TO_BLACK: Fade the src1 pixels toward black by a
- *         given fraction for those pixels in src2 that are fg (ON),
- *         and leave the dest pixels unchanged for pixels in src2 that
- *         are bg (OFF).
- *     The blending function is pixBlendMask().
- *
- *   ~ If src2 is 8 bpp grayscale, we can do one of two things
- *     (but see pixFadeWithGray() below):
- *     (1) L_BLEND_GRAY: If src1 is 8 bpp, mix the two values, using
- *         a fraction of src2 and (1 - fraction) of src1.
- *         If src1 is 32 bpp (rgb), mix the fraction of src2 with
- *         each of the color components in src1.
- *     (2) L_BLEND_GRAY_WITH_INVERSE: Use the grayscale value in src2
- *         to determine how much of the inverse of a src1 pixel is
- *         to be combined with the pixel value.  The input fraction
- *         further acts to scale the change in the src1 pixel.
- *     The blending function is pixBlendGray().
- *
- *   ~ If src2 is color, we blend a given fraction of src2 with
- *     src1.  If src1 is 8 bpp, the resulting image is 32 bpp.
- *     The blending function is pixBlendColor().
- *
- *   ~ For all three blending functions -- pixBlendMask(), pixBlendGray()
- *     and pixBlendColor() -- you can apply the blender to the blendee
- *     either in-place or generating a new pix.  For the in-place
- *     operation, this requires that the depth of the resulting pix
- *     must equal that of the input pixs1.
- *
- *   ~ We remove colormaps from src1 and src2 before blending.
- *     Any quantization would have to be done after blending.
- *
- *  We include another function, pixFadeWithGray(), that blends
- *  a gray or color src1 with a gray src2.  It does one of these things:
- *     (1) L_BLEND_TO_WHITE: Fade the src1 pixels toward white by
- *         a number times the value in src2.
- *     (2) L_BLEND_TO_BLACK: Fade the src1 pixels toward black by
- *         a number times the value in src2.
- *
- *  Also included is a generalization of the so-called "hard light"
- *  blending: pixBlendHardLight().  We generalize by allowing a fraction < 1.0
- *  of the blender to be admixed with the blendee.  The standard function
- *  does full mixing.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -static l_int32 blendComponents(l_int32 a, l_int32 b, l_float32 fract); -static l_int32 blendHardLightComponents(l_int32 a, l_int32 b, l_float32 fract); - -/*-------------------------------------------------------------* - * Blending two images that are not colormapped * - *-------------------------------------------------------------*/ -/*! - * \brief pixBlend() - * - * \param[in] pixs1 blendee - * \param[in] pixs2 blender; typ. smaller - * \param[in] x,y origin [UL corner] of pixs2 relative to - * the origin of pixs1; can be < 0 - * \param[in] fract blending fraction - * \return pixd blended image, or null on error - * - *
- * Notes:
- *      (1) This is a simple top-level interface.  For more flexibility,
- *          call directly into pixBlendMask(), etc.
- * 
- */ -PIX * -pixBlend(PIX *pixs1, - PIX *pixs2, - l_int32 x, - l_int32 y, - l_float32 fract) -{ -l_int32 w1, h1, d1, d2; -BOX *box; -PIX *pixc, *pixt, *pixd; - - PROCNAME("pixBlend"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, NULL); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, NULL); - - /* check relative depths */ - d1 = pixGetDepth(pixs1); - d2 = pixGetDepth(pixs2); - if (d1 == 1 && d2 > 1) - return (PIX *)ERROR_PTR("mixing gray or color with 1 bpp", - procName, NULL); - - /* remove colormap from pixs2 if necessary */ - pixt = pixRemoveColormap(pixs2, REMOVE_CMAP_BASED_ON_SRC); - d2 = pixGetDepth(pixt); - - /* Check if pixs2 is clipped by its position with respect - * to pixs1; if so, clip it and redefine x and y if necessary. - * This actually isn't necessary, as the specific blending - * functions do the clipping directly in the pixel loop - * over pixs2, but it's included here to show how it can - * easily be done on pixs2 first. */ - pixGetDimensions(pixs1, &w1, &h1, NULL); - box = boxCreate(-x, -y, w1, h1); /* box of pixs1 relative to pixs2 */ - pixc = pixClipRectangle(pixt, box, NULL); - boxDestroy(&box); - if (!pixc) { - L_WARNING("box doesn't overlap pix\n", procName); - pixDestroy(&pixt); - return NULL; - } - x = L_MAX(0, x); - y = L_MAX(0, y); - - if (d2 == 1) { - pixd = pixBlendMask(NULL, pixs1, pixc, x, y, fract, - L_BLEND_WITH_INVERSE); - } else if (d2 == 8) { - pixd = pixBlendGray(NULL, pixs1, pixc, x, y, fract, - L_BLEND_GRAY, 0, 0); - } else { /* d2 == 32 */ - pixd = pixBlendColor(NULL, pixs1, pixc, x, y, fract, 0, 0); - } - - pixDestroy(&pixc); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixBlendMask() - * - * \param[in] pixd [optional]; either NULL or equal to pixs1 for in-place - * \param[in] pixs1 blendee, depth > 1 - * \param[in] pixs2 blender, 1 bpp; typ. smaller in size than pixs1 - * \param[in] x,y origin [UL corner] of pixs2 relative to - * the origin of pixs1; can be < 0 - * \param[in] fract blending fraction - * \param[in] type L_BLEND_WITH_INVERSE, L_BLEND_TO_WHITE, - * L_BLEND_TO_BLACK - * \return pixd if OK; null on error - * - *
- * Notes:
- *      (1) Clipping of pixs2 to pixs1 is done in the inner pixel loop.
- *      (2) If pixs1 has a colormap, it is removed.
- *      (3) For inplace operation (pixs1 not cmapped), call it this way:
- *            pixBlendMask(pixs1, pixs1, pixs2, ...)
- *      (4) For generating a new pixd:
- *            pixd = pixBlendMask(NULL, pixs1, pixs2, ...)
- *      (5) Only call in-place if pixs1 does not have a colormap.
- *      (6) Invalid %fract defaults to 0.5 with a warning.
- *          Invalid %type defaults to L_BLEND_WITH_INVERSE with a warning.
- * 
- */ -PIX * -pixBlendMask(PIX *pixd, - PIX *pixs1, - PIX *pixs2, - l_int32 x, - l_int32 y, - l_float32 fract, - l_int32 type) -{ -l_int32 i, j, d, wc, hc, w, h, wplc; -l_int32 val, rval, gval, bval; -l_uint32 pixval; -l_uint32 *linec, *datac; -PIX *pixc, *pix1, *pix2; - - PROCNAME("pixBlendMask"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, NULL); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, NULL); - if (pixGetDepth(pixs1) == 1) - return (PIX *)ERROR_PTR("pixs1 is 1 bpp", procName, NULL); - if (pixGetDepth(pixs2) != 1) - return (PIX *)ERROR_PTR("pixs2 not 1 bpp", procName, NULL); - if (pixd == pixs1 && pixGetColormap(pixs1)) - return (PIX *)ERROR_PTR("inplace; pixs1 has colormap", procName, NULL); - if (pixd && (pixd != pixs1)) - return (PIX *)ERROR_PTR("pixd must be NULL or pixs1", procName, NULL); - if (fract < 0.0 || fract > 1.0) { - L_WARNING("fract must be in [0.0, 1.0]; setting to 0.5\n", procName); - fract = 0.5; - } - if (type != L_BLEND_WITH_INVERSE && type != L_BLEND_TO_WHITE && - type != L_BLEND_TO_BLACK) { - L_WARNING("invalid blend type; setting to L_BLEND_WITH_INVERSE\n", - procName); - type = L_BLEND_WITH_INVERSE; - } - - /* If pixd != NULL, we know that it is equal to pixs1 and - * that pixs1 does not have a colormap, so that an in-place operation - * can be done. Otherwise, remove colormap from pixs1 if - * it exists and unpack to at least 8 bpp if necessary, - * to do the blending on a new pix. */ - if (!pixd) { - pix1 = pixRemoveColormap(pixs1, REMOVE_CMAP_BASED_ON_SRC); - if (pixGetDepth(pix1) < 8) - pix2 = pixConvertTo8(pix1, FALSE); - else - pix2 = pixClone(pix1); - pixd = pixCopy(NULL, pix2); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - pixGetDimensions(pixd, &w, &h, &d); /* d must be either 8 or 32 bpp */ - pixc = pixClone(pixs2); - wc = pixGetWidth(pixc); - hc = pixGetHeight(pixc); - datac = pixGetData(pixc); - wplc = pixGetWpl(pixc); - - /* Check limits for src1, in case clipping was not done. */ - switch (type) - { - case L_BLEND_WITH_INVERSE: - /* - * The basic logic for this blending is: - * p --> (1 - f) * p + f * (1 - p) - * where p is a normalized value: p = pixval / 255. - * Thus, - * p --> p + f * (1 - 2 * p) - */ - for (i = 0; i < hc; i++) { - if (i + y < 0 || i + y >= h) continue; - linec = datac + i * wplc; - for (j = 0; j < wc; j++) { - if (j + x < 0 || j + x >= w) continue; - bval = GET_DATA_BIT(linec, j); - if (bval) { - switch (d) - { - case 8: - pixGetPixel(pixd, x + j, y + i, &pixval); - val = (l_int32)(pixval + fract * (255 - 2 * pixval)); - pixSetPixel(pixd, x + j, y + i, val); - break; - case 32: - pixGetPixel(pixd, x + j, y + i, &pixval); - extractRGBValues(pixval, &rval, &gval, &bval); - rval = (l_int32)(rval + fract * (255 - 2 * rval)); - gval = (l_int32)(gval + fract * (255 - 2 * gval)); - bval = (l_int32)(bval + fract * (255 - 2 * bval)); - composeRGBPixel(rval, gval, bval, &pixval); - pixSetPixel(pixd, x + j, y + i, pixval); - break; - default: - L_WARNING("d neither 8 nor 32 bpp; no blend\n", - procName); - } - } - } - } - break; - case L_BLEND_TO_WHITE: - /* - * The basic logic for this blending is: - * p --> p + f * (1 - p) (p normalized to [0...1]) - */ - for (i = 0; i < hc; i++) { - if (i + y < 0 || i + y >= h) continue; - linec = datac + i * wplc; - for (j = 0; j < wc; j++) { - if (j + x < 0 || j + x >= w) continue; - bval = GET_DATA_BIT(linec, j); - if (bval) { - switch (d) - { - case 8: - pixGetPixel(pixd, x + j, y + i, &pixval); - val = (l_int32)(pixval + fract * (255 - pixval)); - pixSetPixel(pixd, x + j, y + i, val); - break; - case 32: - pixGetPixel(pixd, x + j, y + i, &pixval); - extractRGBValues(pixval, &rval, &gval, &bval); - rval = (l_int32)(rval + fract * (255 - rval)); - gval = (l_int32)(gval + fract * (255 - gval)); - bval = (l_int32)(bval + fract * (255 - bval)); - composeRGBPixel(rval, gval, bval, &pixval); - pixSetPixel(pixd, x + j, y + i, pixval); - break; - default: - L_WARNING("d neither 8 nor 32 bpp; no blend\n", - procName); - } - } - } - } - break; - case L_BLEND_TO_BLACK: - /* - * The basic logic for this blending is: - * p --> (1 - f) * p (p normalized to [0...1]) - */ - for (i = 0; i < hc; i++) { - if (i + y < 0 || i + y >= h) continue; - linec = datac + i * wplc; - for (j = 0; j < wc; j++) { - if (j + x < 0 || j + x >= w) continue; - bval = GET_DATA_BIT(linec, j); - if (bval) { - switch (d) - { - case 8: - pixGetPixel(pixd, x + j, y + i, &pixval); - val = (l_int32)((1. - fract) * pixval); - pixSetPixel(pixd, x + j, y + i, val); - break; - case 32: - pixGetPixel(pixd, x + j, y + i, &pixval); - extractRGBValues(pixval, &rval, &gval, &bval); - rval = (l_int32)((1. - fract) * rval); - gval = (l_int32)((1. - fract) * gval); - bval = (l_int32)((1. - fract) * bval); - composeRGBPixel(rval, gval, bval, &pixval); - pixSetPixel(pixd, x + j, y + i, pixval); - break; - default: - L_WARNING("d neither 8 nor 32 bpp; no blend\n", - procName); - } - } - } - } - break; - default: - L_WARNING("invalid binary mask blend type\n", procName); - break; - } - - pixDestroy(&pixc); - return pixd; -} - - -/*! - * \brief pixBlendGray() - * - * \param[in] pixd [optional] either equal to pixs1 for in-place, - * or NULL - * \param[in] pixs1 blendee, depth > 1 - * \param[in] pixs2 blender, any depth; typically, the area of - * pixs2 is smaller than pixs1 - * \param[in] x,y origin [UL corner] of pixs2 relative to - * the origin of pixs1; can be < 0 - * \param[in] fract blending fraction - * \param[in] type L_BLEND_GRAY, L_BLEND_GRAY_WITH_INVERSE - * \param[in] transparent 1 to use transparency; 0 otherwise - * \param[in] transpix pixel grayval in pixs2 that is to be transparent - * \return pixd if OK; pixs1 on error - * - *
- * Notes:
- *      (1) For inplace operation (pixs1 not cmapped), call it this way:
- *            pixBlendGray(pixs1, pixs1, pixs2, ...)
- *      (2) For generating a new pixd:
- *            pixd = pixBlendGray(NULL, pixs1, pixs2, ...)
- *      (3) Clipping of pixs2 to pixs1 is done in the inner pixel loop.
- *      (4) If pixs1 has a colormap, it is removed; otherwise, if pixs1
- *          has depth < 8, it is unpacked to generate a 8 bpp pix.
- *      (5) If transparent = 0, the blending fraction (fract) is
- *          applied equally to all pixels.
- *      (6) If transparent = 1, all pixels of value transpix (typically
- *          either 0 or 0xff) in pixs2 are transparent in the blend.
- *      (7) After processing pixs1, it is either 8 bpp or 32 bpp:
- *          ~ if 8 bpp, the fraction of pixs2 is mixed with pixs1.
- *          ~ if 32 bpp, each component of pixs1 is mixed with
- *            the same fraction of pixs2.
- *      (8) For L_BLEND_GRAY_WITH_INVERSE, the white values of the blendee
- *          (cval == 255 in the code below) result in a delta of 0.
- *          Thus, these pixels are intrinsically transparent!
- *          The "pivot" value of the src, at which no blending occurs, is
- *          128.  Compare with the adaptive pivot in pixBlendGrayAdapt().
- *      (9) Invalid %fract defaults to 0.5 with a warning.
- *          Invalid %type defaults to L_BLEND_GRAY with a warning.
- * 
- */ -PIX * -pixBlendGray(PIX *pixd, - PIX *pixs1, - PIX *pixs2, - l_int32 x, - l_int32 y, - l_float32 fract, - l_int32 type, - l_int32 transparent, - l_uint32 transpix) -{ -l_int32 i, j, d, wc, hc, w, h, wplc, wpld, delta; -l_int32 ival, irval, igval, ibval, cval, dval; -l_uint32 val32; -l_uint32 *linec, *lined, *datac, *datad; -PIX *pixc, *pix1, *pix2; - - PROCNAME("pixBlendGray"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); - if (pixGetDepth(pixs1) == 1) - return (PIX *)ERROR_PTR("pixs1 is 1 bpp", procName, pixd); - if (pixd == pixs1 && pixGetColormap(pixs1)) - return (PIX *)ERROR_PTR("can't do in-place with cmap", procName, pixd); - if (pixd && (pixd != pixs1)) - return (PIX *)ERROR_PTR("pixd must be NULL or pixs1", procName, pixd); - if (fract < 0.0 || fract > 1.0) { - L_WARNING("fract must be in [0.0, 1.0]; setting to 0.5\n", procName); - fract = 0.5; - } - if (type != L_BLEND_GRAY && type != L_BLEND_GRAY_WITH_INVERSE) { - L_WARNING("invalid blend type; setting to L_BLEND_GRAY\n", procName); - type = L_BLEND_GRAY; - } - - /* If pixd != NULL, we know that it is equal to pixs1 and - * that pixs1 does not have a colormap, so that an in-place operation - * can be done. Otherwise, remove colormap from pixs1 if - * it exists and unpack to at least 8 bpp if necessary, - * to do the blending on a new pix. */ - if (!pixd) { - pix1 = pixRemoveColormap(pixs1, REMOVE_CMAP_BASED_ON_SRC); - if (pixGetDepth(pix1) < 8) - pix2 = pixConvertTo8(pix1, FALSE); - else - pix2 = pixClone(pix1); - pixd = pixCopy(NULL, pix2); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - pixGetDimensions(pixd, &w, &h, &d); /* 8 or 32 bpp */ - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - pixc = pixConvertTo8(pixs2, 0); - pixGetDimensions(pixc, &wc, &hc, NULL); - datac = pixGetData(pixc); - wplc = pixGetWpl(pixc); - - /* Check limits for src1, in case clipping was not done */ - if (type == L_BLEND_GRAY) { - /* - * The basic logic for this blending is: - * p --> (1 - f) * p + f * c - * where c is the 8 bpp blender. All values are normalized to [0...1]. - */ - for (i = 0; i < hc; i++) { - if (i + y < 0 || i + y >= h) continue; - linec = datac + i * wplc; - lined = datad + (i + y) * wpld; - switch (d) - { - case 8: - for (j = 0; j < wc; j++) { - if (j + x < 0 || j + x >= w) continue; - cval = GET_DATA_BYTE(linec, j); - if (transparent == 0 || cval != transpix) { - dval = GET_DATA_BYTE(lined, j + x); - ival = (l_int32)((1. - fract) * dval + fract * cval); - SET_DATA_BYTE(lined, j + x, ival); - } - } - break; - case 32: - for (j = 0; j < wc; j++) { - if (j + x < 0 || j + x >= w) continue; - cval = GET_DATA_BYTE(linec, j); - if (transparent == 0 || cval != transpix) { - val32 = *(lined + j + x); - extractRGBValues(val32, &irval, &igval, &ibval); - irval = (l_int32)((1. - fract) * irval + fract * cval); - igval = (l_int32)((1. - fract) * igval + fract * cval); - ibval = (l_int32)((1. - fract) * ibval + fract * cval); - composeRGBPixel(irval, igval, ibval, &val32); - *(lined + j + x) = val32; - } - } - break; - default: - break; /* shouldn't happen */ - } - } - } else { /* L_BLEND_GRAY_WITH_INVERSE */ - for (i = 0; i < hc; i++) { - if (i + y < 0 || i + y >= h) continue; - linec = datac + i * wplc; - lined = datad + (i + y) * wpld; - switch (d) - { - case 8: - /* - * For 8 bpp, the dest pix is shifted by a signed amount - * proportional to the distance from 128 (the pivot value), - * and to the darkness of src2. If the dest is darker - * than 128, it becomes lighter, and v.v. - * The basic logic is: - * d --> d + f * (0.5 - d) * (1 - c) - * where d and c are normalized pixel values for src1 and - * src2, respectively, with 8 bit normalization to [0...1]. - */ - for (j = 0; j < wc; j++) { - if (j + x < 0 || j + x >= w) continue; - cval = GET_DATA_BYTE(linec, j); - if (transparent == 0 || cval != transpix) { - ival = GET_DATA_BYTE(lined, j + x); - delta = (128 - ival) * (255 - cval) / 256; - ival += (l_int32)(fract * delta + 0.5); - SET_DATA_BYTE(lined, j + x, ival); - } - } - break; - case 32: - /* Each component is shifted by the same formula for 8 bpp */ - for (j = 0; j < wc; j++) { - if (j + x < 0 || j + x >= w) continue; - cval = GET_DATA_BYTE(linec, j); - if (transparent == 0 || cval != transpix) { - val32 = *(lined + j + x); - extractRGBValues(val32, &irval, &igval, &ibval); - delta = (128 - irval) * (255 - cval) / 256; - irval += (l_int32)(fract * delta + 0.5); - delta = (128 - igval) * (255 - cval) / 256; - igval += (l_int32)(fract * delta + 0.5); - delta = (128 - ibval) * (255 - cval) / 256; - ibval += (l_int32)(fract * delta + 0.5); - composeRGBPixel(irval, igval, ibval, &val32); - *(lined + j + x) = val32; - } - } - break; - default: - break; /* shouldn't happen */ - } - } - } - - pixDestroy(&pixc); - return pixd; -} - - -/*! - * \brief pixBlendGrayInverse() - * - * \param[in] pixd [optional] either equal to pixs1 for in-place, or NULL - * \param[in] pixd [optional] either NULL or equal to pixs1 for in-place - * \param[in] pixs1 blendee, depth > 1 - * \param[in] pixs2 blender, any depth; typ. smaller in size than pixs1 - * \param[in] x,y origin [UL corner] of pixs2 relative to - * the origin of pixs1; can be < 0 - * \param[in] fract blending fraction - * \return pixd if OK; pixs1 on error - * - *
- * Notes:
- *      (1) For inplace operation (pixs1 not cmapped), call it this way:
- *            pixBlendGrayInverse(pixs1, pixs1, pixs2, ...)
- *      (2) For generating a new pixd:
- *            pixd = pixBlendGrayInverse(NULL, pixs1, pixs2, ...)
- *      (3) Clipping of pixs2 to pixs1 is done in the inner pixel loop.
- *      (4) If pixs1 has a colormap, it is removed; otherwise if pixs1
- *          has depth < 8, it is unpacked to generate a 8 bpp pix.
- *      (5) This is a no-nonsense blender.  It changes the src1 pixel except
- *          when the src1 pixel is midlevel gray.  Use fract == 1 for the most
- *          aggressive blending, where, if the gray pixel in pixs2 is 0,
- *          we get a complete inversion of the color of the src pixel in pixs1.
- *      (6) The basic logic is that each component transforms by:
-                 d  -->  c * d + (1 - c ) * (f * (1 - d) + d * (1 - f))
- *          where c is the blender pixel from pixs2,
- *                f is %fract,
- *                c and d are normalized to [0...1]
- *          This has the property that for f == 0 (no blend) or c == 1 (white):
- *               d  -->  d
- *          For c == 0 (black) we get maximum inversion:
- *               d  -->  f * (1 - d) + d * (1 - f)   [inversion by fraction f]
- * 
- */ -PIX * -pixBlendGrayInverse(PIX *pixd, - PIX *pixs1, - PIX *pixs2, - l_int32 x, - l_int32 y, - l_float32 fract) -{ -l_int32 i, j, d, wc, hc, w, h, wplc, wpld; -l_int32 irval, igval, ibval, cval, dval; -l_float32 a; -l_uint32 val32; -l_uint32 *linec, *lined, *datac, *datad; -PIX *pixc, *pix1, *pix2; - - PROCNAME("pixBlendGrayInverse"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); - if (pixGetDepth(pixs1) == 1) - return (PIX *)ERROR_PTR("pixs1 is 1 bpp", procName, pixd); - if (pixd == pixs1 && pixGetColormap(pixs1)) - return (PIX *)ERROR_PTR("can't do in-place with cmap", procName, pixd); - if (pixd && (pixd != pixs1)) - return (PIX *)ERROR_PTR("pixd must be NULL or pixs1", procName, pixd); - if (fract < 0.0 || fract > 1.0) { - L_WARNING("fract must be in [0.0, 1.0]; setting to 0.5\n", procName); - fract = 0.5; - } - - /* If pixd != NULL, we know that it is equal to pixs1 and - * that pixs1 does not have a colormap, so that an in-place operation - * can be done. Otherwise, remove colormap from pixs1 if - * it exists and unpack to at least 8 bpp if necessary, - * to do the blending on a new pix. */ - if (!pixd) { - pix1 = pixRemoveColormap(pixs1, REMOVE_CMAP_BASED_ON_SRC); - if (pixGetDepth(pix1) < 8) - pix2 = pixConvertTo8(pix1, FALSE); - else - pix2 = pixClone(pix1); - pixd = pixCopy(NULL, pix2); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - pixGetDimensions(pixd, &w, &h, &d); /* 8 or 32 bpp */ - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - pixc = pixConvertTo8(pixs2, 0); - pixGetDimensions(pixc, &wc, &hc, NULL); - datac = pixGetData(pixc); - wplc = pixGetWpl(pixc); - - /* Check limits for src1, in case clipping was not done */ - for (i = 0; i < hc; i++) { - if (i + y < 0 || i + y >= h) continue; - linec = datac + i * wplc; - lined = datad + (i + y) * wpld; - switch (d) - { - case 8: - for (j = 0; j < wc; j++) { - if (j + x < 0 || j + x >= w) continue; - cval = GET_DATA_BYTE(linec, j); - dval = GET_DATA_BYTE(lined, j + x); - a = (1.0 - fract) * dval + fract * (255.0 - dval); - dval = (l_int32)(cval * dval / 255.0 + - a * (255.0 - cval) / 255.0); - SET_DATA_BYTE(lined, j + x, dval); - } - break; - case 32: - for (j = 0; j < wc; j++) { - if (j + x < 0 || j + x >= w) continue; - cval = GET_DATA_BYTE(linec, j); - val32 = *(lined + j + x); - extractRGBValues(val32, &irval, &igval, &ibval); - a = (1.0 - fract) * irval + fract * (255.0 - irval); - irval = (l_int32)(cval * irval / 255.0 + - a * (255.0 - cval) / 255.0); - a = (1.0 - fract) * igval + fract * (255.0 - igval); - igval = (l_int32)(cval * igval / 255.0 + - a * (255.0 - cval) / 255.0); - a = (1.0 - fract) * ibval + fract * (255.0 - ibval); - ibval = (l_int32)(cval * ibval / 255.0 + - a * (255.0 - cval) / 255.0); - composeRGBPixel(irval, igval, ibval, &val32); - *(lined + j + x) = val32; - } - break; - default: - break; /* shouldn't happen */ - } - } - - pixDestroy(&pixc); - return pixd; -} - - -/*! - * \brief pixBlendColor() - * - * \param[in] pixd [optional] either equal to pixs1 for in-place, - * or NULL - * \param[in] pixs1 blendee; depth > 1 - * \param[in] pixs2 blender, any depth; typically, the area of - * pixs2 is smaller than pixs1 - * \param[in] x,y origin [UL corner] of pixs2 relative to - * the origin of pixs1 - * \param[in] fract blending fraction - * \param[in] transparent 1 to use transparency; 0 otherwise - * \param[in] transpix pixel color in pixs2 that is to be transparent - * \return pixd, or null on error - * - *
- * Notes:
- *      (1) For inplace operation (pixs1 must be 32 bpp), call it this way:
- *            pixBlendColor(pixs1, pixs1, pixs2, ...)
- *      (2) For generating a new pixd:
- *            pixd = pixBlendColor(NULL, pixs1, pixs2, ...)
- *      (3) If pixs2 is not 32 bpp rgb, it is converted.
- *      (4) Clipping of pixs2 to pixs1 is done in the inner pixel loop.
- *      (5) If pixs1 has a colormap, it is removed to generate a 32 bpp pix.
- *      (6) If pixs1 has depth < 32, it is unpacked to generate a 32 bpp pix.
- *      (7) If transparent = 0, the blending fraction (fract) is
- *          applied equally to all pixels.
- *      (8) If transparent = 1, all pixels of value transpix (typically
- *          either 0 or 0xffffff00) in pixs2 are transparent in the blend.
- * 
- */ -PIX * -pixBlendColor(PIX *pixd, - PIX *pixs1, - PIX *pixs2, - l_int32 x, - l_int32 y, - l_float32 fract, - l_int32 transparent, - l_uint32 transpix) -{ -l_int32 i, j, wc, hc, w, h, wplc, wpld; -l_int32 rval, gval, bval, rcval, gcval, bcval; -l_uint32 cval32, val32; -l_uint32 *linec, *lined, *datac, *datad; -PIX *pixc; - - PROCNAME("pixBlendColor"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, NULL); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, NULL); - if (pixGetDepth(pixs1) == 1) - return (PIX *)ERROR_PTR("pixs1 is 1 bpp", procName, NULL); - if (pixd == pixs1 && pixGetDepth(pixs1) != 32) - return (PIX *)ERROR_PTR("inplace; pixs1 not 32 bpp", procName, NULL); - if (pixd && (pixd != pixs1)) - return (PIX *)ERROR_PTR("pixd must be NULL or pixs1", procName, NULL); - if (fract < 0.0 || fract > 1.0) { - L_WARNING("fract must be in [0.0, 1.0]; setting to 0.5\n", procName); - fract = 0.5; - } - - /* If pixd != null, we know that it is equal to pixs1 and - * that pixs1 is 32 bpp rgb, so that an in-place operation - * can be done. Otherwise, pixConvertTo32() will remove a - * colormap from pixs1 if it exists and unpack to 32 bpp - * (if necessary) to do the blending on a new 32 bpp Pix. */ - if (!pixd) - pixd = pixConvertTo32(pixs1); - pixGetDimensions(pixd, &w, &h, NULL); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - pixc = pixConvertTo32(pixs2); /* blend with 32 bpp rgb */ - pixGetDimensions(pixc, &wc, &hc, NULL); - datac = pixGetData(pixc); - wplc = pixGetWpl(pixc); - - /* Check limits for src1, in case clipping was not done */ - for (i = 0; i < hc; i++) { - /* - * The basic logic for this blending is: - * p --> (1 - f) * p + f * c - * for each color channel. c is a color component of the blender. - * All values are normalized to [0...1]. - */ - if (i + y < 0 || i + y >= h) continue; - linec = datac + i * wplc; - lined = datad + (i + y) * wpld; - for (j = 0; j < wc; j++) { - if (j + x < 0 || j + x >= w) continue; - cval32 = *(linec + j); - if (transparent == 0 || - ((cval32 & 0xffffff00) != (transpix & 0xffffff00))) { - val32 = *(lined + j + x); - extractRGBValues(cval32, &rcval, &gcval, &bcval); - extractRGBValues(val32, &rval, &gval, &bval); - rval = (l_int32)((1. - fract) * rval + fract * rcval); - gval = (l_int32)((1. - fract) * gval + fract * gcval); - bval = (l_int32)((1. - fract) * bval + fract * bcval); - composeRGBPixel(rval, gval, bval, &val32); - *(lined + j + x) = val32; - } - } - } - - pixDestroy(&pixc); - return pixd; -} - - -/* - * \brief pixBlendColorByChannel() - * - * \param[in] pixd [optional] either equal to pixs1 for in-place, - * or NULL - * \param[in] pixs1 blendee; depth > 1 - * \param[in] pixs2 blender, any depth; typically, the area of - * pixs2 is smaller than pixs1 - * \param[in] x,y origin [UL corner] of pixs2 relative to - * the origin of pixs1 - * \param[in] rfract blending fraction in red channel - * \param[in] gfract blending fraction in green channel - * \param[in] bfract blending fraction in blue channel - * \param[in] transparent 1 to use transparency; 0 otherwise - * \param[in] transpix pixel color in pixs2 that is to be transparent - * \return pixd if OK; pixd on error - * - *
- * Notes:
- *      (1) This generalizes pixBlendColor() in two ways:
- *          (a) The mixing fraction is specified per channel.
- *          (b) The mixing fraction may be < 0 or > 1, in which case,
- *              the min or max of two images are taken, respectively.
- *      (2) Specifically,
- *          for p = pixs1[i], c = pixs2[i], f = fract[i], i = 1, 2, 3:
- *              f < 0.0:          p --> min(p, c)
- *              0.0 <= f <= 1.0:  p --> (1 - f) * p + f * c
- *              f > 1.0:          p --> max(a, c)
- *          Special cases:
- *              f = 0:   p --> p
- *              f = 1:   p --> c
- *      (3) See usage notes in pixBlendColor()
- *      (4) pixBlendColor() would be equivalent to
- *            pixBlendColorChannel(..., fract, fract, fract, ...);
- *          at a small cost of efficiency.
- * 
- */ -PIX * -pixBlendColorByChannel(PIX *pixd, - PIX *pixs1, - PIX *pixs2, - l_int32 x, - l_int32 y, - l_float32 rfract, - l_float32 gfract, - l_float32 bfract, - l_int32 transparent, - l_uint32 transpix) -{ -l_int32 i, j, wc, hc, w, h, wplc, wpld; -l_int32 rval, gval, bval, rcval, gcval, bcval; -l_uint32 cval32, val32; -l_uint32 *linec, *lined, *datac, *datad; -PIX *pixc; - - PROCNAME("pixBlendColorByChannel"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); - if (pixGetDepth(pixs1) == 1) - return (PIX *)ERROR_PTR("pixs1 is 1 bpp", procName, pixd); - if (pixd == pixs1 && pixGetDepth(pixs1) != 32) - return (PIX *)ERROR_PTR("inplace; pixs1 not 32 bpp", procName, pixd); - if (pixd && (pixd != pixs1)) - return (PIX *)ERROR_PTR("pixd must be NULL or pixs1", procName, pixd); - - /* If pixd != NULL, we know that it is equal to pixs1 and - * that pixs1 is 32 bpp rgb, so that an in-place operation - * can be done. Otherwise, pixConvertTo32() will remove a - * colormap from pixs1 if it exists and unpack to 32 bpp - * (if necessary) to do the blending on a new 32 bpp Pix. */ - if (!pixd) - pixd = pixConvertTo32(pixs1); - pixGetDimensions(pixd, &w, &h, NULL); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - pixc = pixConvertTo32(pixs2); - pixGetDimensions(pixc, &wc, &hc, NULL); - datac = pixGetData(pixc); - wplc = pixGetWpl(pixc); - - /* Check limits for src1, in case clipping was not done */ - for (i = 0; i < hc; i++) { - if (i + y < 0 || i + y >= h) continue; - linec = datac + i * wplc; - lined = datad + (i + y) * wpld; - for (j = 0; j < wc; j++) { - if (j + x < 0 || j + x >= w) continue; - cval32 = *(linec + j); - if (transparent == 0 || - ((cval32 & 0xffffff00) != (transpix & 0xffffff00))) { - val32 = *(lined + j + x); - extractRGBValues(cval32, &rcval, &gcval, &bcval); - extractRGBValues(val32, &rval, &gval, &bval); - rval = blendComponents(rval, rcval, rfract); - gval = blendComponents(gval, gcval, gfract); - bval = blendComponents(bval, bcval, bfract); - composeRGBPixel(rval, gval, bval, &val32); - *(lined + j + x) = val32; - } - } - } - - pixDestroy(&pixc); - return pixd; -} - - -static l_int32 -blendComponents(l_int32 a, - l_int32 b, - l_float32 fract) -{ - if (fract < 0.) - return ((a < b) ? a : b); - if (fract > 1.) - return ((a > b) ? a : b); - return (l_int32)((1. - fract) * a + fract * b); -} - - -/*! - * \brief pixBlendGrayAdapt() - * - * \param[in] pixd [optional] either equal to pixs1 for in-place, or NULL - * \param[in] pixs1 blendee; depth > 1 - * \param[in] pixs2 blender, any depth; typically, the area of - * pixs2 is smaller than pixs1 - * \param[in] x,y origin [UL corner] of pixs2 relative to - * the origin of pixs1; can be < 0 - * \param[in] fract blending fraction - * \param[in] shift >= 0 but <= 128: shift of zero blend value from - * median source; use -1 for default value; - * \return pixd if OK; pixs1 on error - * - *
- * Notes:
- *      (1) For inplace operation (pixs1 not cmapped), call it this way:
- *            pixBlendGrayAdapt(pixs1, pixs1, pixs2, ...)
- *          For generating a new pixd:
- *            pixd = pixBlendGrayAdapt(NULL, pixs1, pixs2, ...)
- *      (2) Clipping of pixs2 to pixs1 is done in the inner pixel loop.
- *      (3) If pixs1 has a colormap, it is removed.
- *      (4) If pixs1 has depth < 8, it is unpacked to generate a 8 bpp pix.
- *      (5) This does a blend with inverse.  Whereas in pixGlendGray(), the
- *          zero blend point is where the blendee pixel is 128, here
- *          the zero blend point is found adaptively, with respect to the
- *          median of the blendee region.  If the median is < 128,
- *          the zero blend point is found from
- *              median + shift.
- *          Otherwise, if the median >= 128, the zero blend point is
- *              median - shift.
- *          The purpose of shifting the zero blend point away from the
- *          median is to prevent a situation in pixBlendGray() where
- *          the median is 128 and the blender is not visible.
- *          The default value of shift is 64.
- *      (6) After processing pixs1, it is either 8 bpp or 32 bpp:
- *          ~ if 8 bpp, the fraction of pixs2 is mixed with pixs1.
- *          ~ if 32 bpp, each component of pixs1 is mixed with
- *            the same fraction of pixs2.
- *      (7) The darker the blender, the more it mixes with the blendee.
- *          A blender value of 0 has maximum mixing; a value of 255
- *          has no mixing and hence is transparent.
- * 
- */ -PIX * -pixBlendGrayAdapt(PIX *pixd, - PIX *pixs1, - PIX *pixs2, - l_int32 x, - l_int32 y, - l_float32 fract, - l_int32 shift) -{ -l_int32 i, j, d, wc, hc, w, h, wplc, wpld, delta, overlap; -l_int32 rval, gval, bval, cval, dval, mval, median, pivot; -l_uint32 val32; -l_uint32 *linec, *lined, *datac, *datad; -l_float32 fmedian, factor; -BOX *box, *boxt; -PIX *pixc, *pix1, *pix2; - - PROCNAME("pixBlendGrayAdapt"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); - if (pixGetDepth(pixs1) == 1) - return (PIX *)ERROR_PTR("pixs1 is 1 bpp", procName, pixd); - if (pixd == pixs1 && pixGetColormap(pixs1)) - return (PIX *)ERROR_PTR("can't do in-place with cmap", procName, pixd); - if (pixd && (pixd != pixs1)) - return (PIX *)ERROR_PTR("pixd must be NULL or pixs1", procName, pixd); - if (fract < 0.0 || fract > 1.0) { - L_WARNING("fract must be in [0.0, 1.0]; setting to 0.5\n", procName); - fract = 0.5; - } - if (shift == -1) shift = 64; /* default value */ - if (shift < 0 || shift > 127) { - L_WARNING("invalid shift; setting to 64\n", procName); - shift = 64; - } - - /* Test for overlap */ - pixGetDimensions(pixs1, &w, &h, NULL); - pixGetDimensions(pixs2, &wc, &hc, NULL); - box = boxCreate(x, y, wc, hc); - boxt = boxCreate(0, 0, w, h); - boxIntersects(box, boxt, &overlap); - boxDestroy(&boxt); - if (!overlap) { - boxDestroy(&box); - return (PIX *)ERROR_PTR("no image overlap", procName, pixd); - } - - /* If pixd != NULL, we know that it is equal to pixs1 and - * that pixs1 does not have a colormap, so that an in-place operation - * can be done. Otherwise, remove colormap from pixs1 if - * it exists and unpack to at least 8 bpp if necessary, - * to do the blending on a new pix. */ - if (!pixd) { - pix1 = pixRemoveColormap(pixs1, REMOVE_CMAP_BASED_ON_SRC); - if (pixGetDepth(pix1) < 8) - pix2 = pixConvertTo8(pix1, FALSE); - else - pix2 = pixClone(pix1); - pixd = pixCopy(NULL, pix2); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - /* Get the median value in the region of blending */ - pix1 = pixClipRectangle(pixd, box, NULL); - pix2 = pixConvertTo8(pix1, 0); - pixGetRankValueMasked(pix2, NULL, 0, 0, 1, 0.5, &fmedian, NULL); - median = (l_int32)(fmedian + 0.5); - if (median < 128) - pivot = median + shift; - else - pivot = median - shift; - pixDestroy(&pix1); - pixDestroy(&pix2); - boxDestroy(&box); - - /* Process over src2; clip to src1. */ - d = pixGetDepth(pixd); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - pixc = pixConvertTo8(pixs2, 0); - datac = pixGetData(pixc); - wplc = pixGetWpl(pixc); - for (i = 0; i < hc; i++) { - if (i + y < 0 || i + y >= h) continue; - linec = datac + i * wplc; - lined = datad + (i + y) * wpld; - switch (d) - { - case 8: - /* - * For 8 bpp, the dest pix is shifted by an amount - * proportional to the distance from the pivot value, - * and to the darkness of src2. In no situation will it - * pass the pivot value in intensity. - * The basic logic is: - * d --> d + f * (np - d) * (1 - c) - * where np, d and c are normalized pixel values for - * the pivot, src1 and src2, respectively, with normalization - * to 255. - */ - for (j = 0; j < wc; j++) { - if (j + x < 0 || j + x >= w) continue; - dval = GET_DATA_BYTE(lined, j + x); - cval = GET_DATA_BYTE(linec, j); - delta = (pivot - dval) * (255 - cval) / 256; - dval += (l_int32)(fract * delta + 0.5); - SET_DATA_BYTE(lined, j + x, dval); - } - break; - case 32: - /* - * For 32 bpp, the dest pix is shifted by an amount - * proportional to the max component distance from the - * pivot value, and to the darkness of src2. Each component - * is shifted by the same fraction, either up or down, - * depending on the shift direction (which is toward the - * pivot). The basic logic for the red component is: - * r --> r + f * (np - m) * (1 - c) * (r / m) - * where np, r, m and c are normalized pixel values for - * the pivot, the r component of src1, the max component - * of src1, and src2, respectively, again with normalization - * to 255. Likewise for the green and blue components. - */ - for (j = 0; j < wc; j++) { - if (j + x < 0 || j + x >= w) continue; - cval = GET_DATA_BYTE(linec, j); - val32 = *(lined + j + x); - extractRGBValues(val32, &rval, &gval, &bval); - mval = L_MAX(rval, gval); - mval = L_MAX(mval, bval); - mval = L_MAX(mval, 1); - delta = (pivot - mval) * (255 - cval) / 256; - factor = fract * delta / mval; - rval += (l_int32)(factor * rval + 0.5); - gval += (l_int32)(factor * gval + 0.5); - bval += (l_int32)(factor * bval + 0.5); - composeRGBPixel(rval, gval, bval, &val32); - *(lined + j + x) = val32; - } - break; - default: - break; /* shouldn't happen */ - } - } - - pixDestroy(&pixc); - return pixd; -} - - -/*! - * \brief pixFadeWithGray() - * - * \param[in] pixs colormapped or 8 bpp or 32 bpp - * \param[in] pixb 8 bpp blender - * \param[in] factor multiplicative factor to apply to blender value - * \param[in] type L_BLEND_TO_WHITE, L_BLEND_TO_BLACK - * \return pixd, or null on error - * - *
- * Notes:
- *      (1) This function combines two pix aligned to the UL corner; they
- *          need not be the same size.
- *      (2) Each pixel in pixb is multiplied by 'factor' divided by 255, and
- *          clipped to the range [0 ... 1].  This gives the fade fraction
- *          to be applied to pixs.  Fade either to white (L_BLEND_TO_WHITE)
- *          or to black (L_BLEND_TO_BLACK).
- * 
- */ -PIX * -pixFadeWithGray(PIX *pixs, - PIX *pixb, - l_float32 factor, - l_int32 type) -{ -l_int32 i, j, w, h, d, wb, hb, db, wd, hd, wplb, wpld; -l_int32 valb, vald, nvald, rval, gval, bval, nrval, ngval, nbval; -l_float32 nfactor, fract; -l_uint32 val32, nval32; -l_uint32 *lined, *datad, *lineb, *datab; -PIX *pixd; - - PROCNAME("pixFadeWithGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!pixb) - return (PIX *)ERROR_PTR("pixb not defined", procName, NULL); - if (pixGetDepth(pixs) == 1) - return (PIX *)ERROR_PTR("pixs is 1 bpp", procName, NULL); - pixGetDimensions(pixb, &wb, &hb, &db); - if (db != 8) - return (PIX *)ERROR_PTR("pixb not 8 bpp", procName, NULL); - if (factor < 0.0 || factor > 255.0) - return (PIX *)ERROR_PTR("factor not in [0.0...255.0]", procName, NULL); - if (type != L_BLEND_TO_WHITE && type != L_BLEND_TO_BLACK) - return (PIX *)ERROR_PTR("invalid fade type", procName, NULL); - - /* Remove colormap if it exists; otherwise copy */ - pixd = pixRemoveColormapGeneral(pixs, REMOVE_CMAP_BASED_ON_SRC, L_COPY); - pixGetDimensions(pixd, &wd, &hd, &d); - w = L_MIN(wb, wd); - h = L_MIN(hb, hd); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - datab = pixGetData(pixb); - wplb = pixGetWpl(pixb); - - /* The basic logic for this blending is, for each component p of pixs: - * fade-to-white: p --> p + (f * c) * (1 - p) - * fade-to-black: p --> p - (f * c) * p - * with c being the 8 bpp blender pixel of pixb, and with both - * p and c normalized to [0...1]. */ - nfactor = factor / 255.; - for (i = 0; i < h; i++) { - lineb = datab + i * wplb; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - valb = GET_DATA_BYTE(lineb, j); - fract = nfactor * (l_float32)valb; - fract = L_MIN(fract, 1.0); - if (d == 8) { - vald = GET_DATA_BYTE(lined, j); - if (type == L_BLEND_TO_WHITE) - nvald = vald + (l_int32)(fract * (255. - (l_float32)vald)); - else /* L_BLEND_TO_BLACK */ - nvald = vald - (l_int32)(fract * (l_float32)vald); - SET_DATA_BYTE(lined, j, nvald); - } else { /* d == 32 */ - val32 = lined[j]; - extractRGBValues(val32, &rval, &gval, &bval); - if (type == L_BLEND_TO_WHITE) { - nrval = rval + (l_int32)(fract * (255. - (l_float32)rval)); - ngval = gval + (l_int32)(fract * (255. - (l_float32)gval)); - nbval = bval + (l_int32)(fract * (255. - (l_float32)bval)); - } else { - nrval = rval - (l_int32)(fract * (l_float32)rval); - ngval = gval - (l_int32)(fract * (l_float32)gval); - nbval = bval - (l_int32)(fract * (l_float32)bval); - } - composeRGBPixel(nrval, ngval, nbval, &nval32); - lined[j] = nval32; - } - } - } - - return pixd; -} - - -/* - * \brief pixBlendHardLight() - * - * \param[in] pixd either NULL or equal to pixs1 for in-place - * \param[in] pixs1 blendee; depth > 1, may be cmapped - * \param[in] pixs2 blender, 8 or 32 bpp; may be colormapped; - * typ. smaller in size than pixs1 - * \param[in] x,y origin [UL corner] of pixs2 relative to - * the origin of pixs1 - * \param[in] fract blending fraction, or 'opacity factor' - * \return pixd if OK; pixs1 on error - * - *
- * Notes:
- *      (1) pixs2 must be 8 or 32 bpp; either may have a colormap.
- *      (2) Clipping of pixs2 to pixs1 is done in the inner pixel loop.
- *      (3) Only call in-place if pixs1 is not colormapped.
- *      (4) If pixs1 has a colormap, it is removed to generate either an
- *          8 or 32 bpp pix, depending on the colormap.
- *      (5) For inplace operation, call it this way:
- *            pixBlendHardLight(pixs1, pixs1, pixs2, ...)
- *      (6) For generating a new pixd:
- *            pixd = pixBlendHardLight(NULL, pixs1, pixs2, ...)
- *      (7) This is a generalization of the usual hard light blending,
- *          where fract == 1.0.
- *      (8) "Overlay" blending is the same as hard light blending, with
- *          fract == 1.0, except that the components are switched
- *          in the test.  (Note that the result is symmetric in the
- *          two components.)
- *      (9) See, e.g.:
- *           http://www.pegtop.net/delphi/articles/blendmodes/hardlight.htm
- *           http://www.digitalartform.com/imageArithmetic.htm
- *      (10) This function was built by Paco Galanes.
- * 
- */ -PIX * -pixBlendHardLight(PIX *pixd, - PIX *pixs1, - PIX *pixs2, - l_int32 x, - l_int32 y, - l_float32 fract) -{ -l_int32 i, j, w, h, d, wc, hc, dc, wplc, wpld; -l_int32 cval, dval, rcval, gcval, bcval, rdval, gdval, bdval; -l_uint32 cval32, dval32; -l_uint32 *linec, *lined, *datac, *datad; -PIX *pixc, *pixt; - - PROCNAME("pixBlendHardLight"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); - pixGetDimensions(pixs1, &w, &h, &d); - pixGetDimensions(pixs2, &wc, &hc, &dc); - if (d == 1) - return (PIX *)ERROR_PTR("pixs1 is 1 bpp", procName, pixd); - if (dc != 8 && dc != 32) - return (PIX *)ERROR_PTR("pixs2 not 8 or 32 bpp", procName, pixd); - if (pixd && (pixd != pixs1)) - return (PIX *)ERROR_PTR("inplace and pixd != pixs1", procName, pixd); - if (pixd == pixs1 && pixGetColormap(pixs1)) - return (PIX *)ERROR_PTR("inplace and pixs1 cmapped", procName, pixd); - if (pixd && d != 8 && d != 32) - return (PIX *)ERROR_PTR("inplace and not 8 or 32 bpp", procName, pixd); - - if (fract < 0.0 || fract > 1.0) { - L_WARNING("fract must be in [0.0, 1.0]; setting to 0.5\n", procName); - fract = 0.5; - } - - /* If pixs2 has a colormap, remove it */ - pixc = pixRemoveColormap(pixs2, REMOVE_CMAP_BASED_ON_SRC); /* clone ok */ - dc = pixGetDepth(pixc); - - /* There are 4 cases: - * * pixs1 has or doesn't have a colormap - * * pixc is either 8 or 32 bpp - * In all situations, if pixs has a colormap it must be removed, - * and pixd must have a depth that is equal to or greater than pixc. */ - if (dc == 32) { - if (pixGetColormap(pixs1)) { /* pixd == NULL */ - pixd = pixRemoveColormap(pixs1, REMOVE_CMAP_TO_FULL_COLOR); - } else { - if (!pixd) { - pixd = pixConvertTo32(pixs1); - } else { - pixt = pixConvertTo32(pixs1); - pixCopy(pixd, pixt); - pixDestroy(&pixt); - } - } - d = 32; - } else { /* dc == 8 */ - if (pixGetColormap(pixs1)) /* pixd == NULL */ - pixd = pixRemoveColormap(pixs1, REMOVE_CMAP_BASED_ON_SRC); - else - pixd = pixCopy(pixd, pixs1); - d = pixGetDepth(pixd); - } - - if (!(d == 8 && dc == 8) && /* 3 cases only */ - !(d == 32 && dc == 8) && - !(d == 32 && dc == 32)) { - pixDestroy(&pixc); - return (PIX *)ERROR_PTR("bad! -- invalid depth combo!", procName, pixd); - } - - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - datac = pixGetData(pixc); - wplc = pixGetWpl(pixc); - for (i = 0; i < hc; i++) { - if (i + y < 0 || i + y >= h) continue; - linec = datac + i * wplc; - lined = datad + (i + y) * wpld; - for (j = 0; j < wc; j++) { - if (j + x < 0 || j + x >= w) continue; - if (d == 8 && dc == 8) { - dval = GET_DATA_BYTE(lined, x + j); - cval = GET_DATA_BYTE(linec, j); - dval = blendHardLightComponents(dval, cval, fract); - SET_DATA_BYTE(lined, x + j, dval); - } else if (d == 32 && dc == 8) { - dval32 = *(lined + x + j); - extractRGBValues(dval32, &rdval, &gdval, &bdval); - cval = GET_DATA_BYTE(linec, j); - rdval = blendHardLightComponents(rdval, cval, fract); - gdval = blendHardLightComponents(gdval, cval, fract); - bdval = blendHardLightComponents(bdval, cval, fract); - composeRGBPixel(rdval, gdval, bdval, &dval32); - *(lined + x + j) = dval32; - } else if (d == 32 && dc == 32) { - dval32 = *(lined + x + j); - extractRGBValues(dval32, &rdval, &gdval, &bdval); - cval32 = *(linec + j); - extractRGBValues(cval32, &rcval, &gcval, &bcval); - rdval = blendHardLightComponents(rdval, rcval, fract); - gdval = blendHardLightComponents(gdval, gcval, fract); - bdval = blendHardLightComponents(bdval, bcval, fract); - composeRGBPixel(rdval, gdval, bdval, &dval32); - *(lined + x + j) = dval32; - } - } - } - - pixDestroy(&pixc); - return pixd; -} - - -/* - * \brief blendHardLightComponents() - * - * \param[in] a 8 bpp blendee component - * \param[in] b 8 bpp blender component - * \param[in] fract fraction of blending; use 1.0 for usual definition - * \return blended 8 bpp component - * - *
- * Notes:
- *
- *    The basic logic for this blending is:
- *      b < 0.5:
- *          a --> 2 * a * (0.5 - f * (0.5 - b))
- *      b >= 0.5:
- *          a --> 1 - 2 * (1 - a) * (1 - (0.5 - f * (0.5 - b)))
- *
- *    In the limit that f == 1 (standard hardlight blending):
- *      b < 0.5:   a --> 2 * a * b
- *                     or
- *                 a --> a - a * (1 - 2 * b)
- *      b >= 0.5:  a --> 1 - 2 * (1 - a) * (1 - b)
- *                     or
- *                 a --> a + (1 - a) * (2 * b - 1)
- *
- *    You can see that for standard hardlight blending:
- *      b < 0.5:   a is pushed linearly with b down to 0
- *      b >= 0.5:  a is pushed linearly with b up to 1
- *    a is unchanged if b = 0.5
- *
- *    Our opacity factor f reduces the deviation of b from 0.5:
- *      f == 0:  b -->  0.5, so no blending occurs
- *      f == 1:  b -->  b, so we get full conventional blending
- *
- *    There is a variant of hardlight blending called "softlight" blending:
- *    (e.g., http://jswidget.com/blog/tag/hard-light/)
- *      b < 0.5:
- *          a --> a - a * (0.5 - b) * (1 - Abs(2 * a - 1))
- *      b >= 0.5:
- *          a --> a + (1 - a) * (b - 0.5) * (1 - Abs(2 * a - 1))
- *    which limits the amount that 'a' can be moved to a maximum of
- *    halfway toward 0 or 1, and further reduces it as 'a' moves
- *    away from 0.5.
- *    As you can see, there are a nearly infinite number of different
- *    blending formulas that can be conjured up.
- * 
- */ -static l_int32 blendHardLightComponents(l_int32 a, - l_int32 b, - l_float32 fract) -{ - if (b < 0x80) { - b = 0x80 - (l_int32)(fract * (0x80 - b)); - return (a * b) >> 7; - } else { - b = 0x80 + (l_int32)(fract * (b - 0x80)); - return 0xff - (((0xff - b) * (0xff - a)) >> 7); - } -} - - -/*-------------------------------------------------------------* - * Blending two colormapped images * - *-------------------------------------------------------------*/ -/*! - * \brief pixBlendCmap() - * - * \param[in] pixs 2, 4 or 8 bpp, with colormap - * \param[in] pixb colormapped blender - * \param[in] x, y UL corner of blender relative to pixs - * \param[in] sindex colormap index of pixels in pixs to be changed - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function combines two colormaps, and replaces the pixels
- *          in pixs that have a specified color value with those in pixb.
- *      (2) sindex must be in the existing colormap; otherwise an
- *          error is returned.  In use, sindex will typically be the index
- *          for white (255, 255, 255).
- *      (3) Blender colors that already exist in the colormap are used;
- *          others are added.  If any blender colors cannot be
- *          stored in the colormap, an error is returned.
- *      (4) In the implementation, a mapping is generated from each
- *          original blender colormap index to the corresponding index
- *          in the expanded colormap for pixs.  Then for each pixel in
- *          pixs with value sindex, and which is covered by a blender pixel,
- *          the new index corresponding to the blender pixel is substituted
- *          for sindex.
- * 
- */ -l_ok -pixBlendCmap(PIX *pixs, - PIX *pixb, - l_int32 x, - l_int32 y, - l_int32 sindex) -{ -l_int32 rval, gval, bval; -l_int32 i, j, w, h, d, ncb, wb, hb, wpls; -l_int32 index, val, nadded; -l_int32 lut[256]; -l_uint32 pval; -l_uint32 *lines, *datas; -PIXCMAP *cmaps, *cmapb, *cmapsc; - - PROCNAME("pixBlendCmap"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixb) - return ERROR_INT("pixb not defined", procName, 1); - if ((cmaps = pixGetColormap(pixs)) == NULL) - return ERROR_INT("no colormap in pixs", procName, 1); - if ((cmapb = pixGetColormap(pixb)) == NULL) - return ERROR_INT("no colormap in pixb", procName, 1); - ncb = pixcmapGetCount(cmapb); - - pixGetDimensions(pixs, &w, &h, &d); - if (d != 2 && d != 4 && d != 8) - return ERROR_INT("depth not in {2,4,8}", procName, 1); - - /* Make a copy of cmaps; we'll add to this if necessary - * and substitute at the end if we found there was enough room - * to hold all the new colors. */ - cmapsc = pixcmapCopy(cmaps); - - /* Add new colors if necessary; get mapping array between - * cmaps and cmapb. */ - for (i = 0, nadded = 0; i < ncb; i++) { - pixcmapGetColor(cmapb, i, &rval, &gval, &bval); - if (pixcmapGetIndex(cmapsc, rval, gval, bval, &index)) { /* not found */ - if (pixcmapAddColor(cmapsc, rval, gval, bval)) { - pixcmapDestroy(&cmapsc); - return ERROR_INT("not enough room in cmaps", procName, 1); - } - lut[i] = pixcmapGetCount(cmapsc) - 1; - nadded++; - } else { - lut[i] = index; - } - } - - /* Replace cmaps if colors have been added. */ - if (nadded == 0) - pixcmapDestroy(&cmapsc); - else - pixSetColormap(pixs, cmapsc); - - /* Replace each pixel value sindex by mapped colormap index when - * a blender pixel in pixbc overlays it. */ - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixGetDimensions(pixb, &wb, &hb, NULL); - for (i = 0; i < hb; i++) { - if (i + y < 0 || i + y >= h) continue; - lines = datas + (y + i) * wpls; - for (j = 0; j < wb; j++) { - if (j + x < 0 || j + x >= w) continue; - switch (d) { - case 2: - val = GET_DATA_DIBIT(lines, x + j); - if (val == sindex) { - pixGetPixel(pixb, j, i, &pval); - SET_DATA_DIBIT(lines, x + j, lut[pval]); - } - break; - case 4: - val = GET_DATA_QBIT(lines, x + j); - if (val == sindex) { - pixGetPixel(pixb, j, i, &pval); - SET_DATA_QBIT(lines, x + j, lut[pval]); - } - break; - case 8: - val = GET_DATA_BYTE(lines, x + j); - if (val == sindex) { - pixGetPixel(pixb, j, i, &pval); - SET_DATA_BYTE(lines, x + j, lut[pval]); - } - break; - default: - return ERROR_INT("depth not in {2,4,8}", procName, 1); - } - } - } - - return 0; -} - - -/*---------------------------------------------------------------------* - * Blending two images using a third * - *---------------------------------------------------------------------*/ -/*! - * \brief pixBlendWithGrayMask() - * - * \param[in] pixs1 8 bpp gray, rgb, rgba or colormapped - * \param[in] pixs2 8 bpp gray, rgb, rgba or colormapped - * \param[in] pixg [optional] 8 bpp gray, for transparency of pixs2; - * can be null - * \param[in] x, y UL corner of pixs2 and pixg with respect to pixs1 - * \return pixd blended image, or null on error - * - *
- * Notes:
- *      (1) The result is 8 bpp grayscale if both pixs1 and pixs2 are
- *          8 bpp gray.  Otherwise, the result is 32 bpp rgb.
- *      (2) pixg is an 8 bpp transparency image, where 0 is transparent
- *          and 255 is opaque.  It determines the transparency of pixs2
- *          when applied over pixs1.  It can be null if pixs2 is rgba,
- *          in which case we use the alpha component of pixs2.
- *      (3) If pixg exists, it need not be the same size as pixs2.
- *          However, we assume their UL corners are aligned with each other,
- *          and placed at the location (x, y) in pixs1.
- *      (4) The pixels in pixd are a combination of those in pixs1
- *          and pixs2, where the amount from pixs2 is proportional to
- *          the value of the pixel (p) in pixg, and the amount from pixs1
- *          is proportional to (255 - p).  Thus pixg is a transparency
- *          image (usually called an alpha blender) where each pixel
- *          can be associated with a pixel in pixs2, and determines
- *          the amount of the pixs2 pixel in the final result.
- *          For example, if pixg is all 0, pixs2 is transparent and
- *          the result in pixd is simply pixs1.
- *      (5) A typical use is for the pixs2/pixg combination to be
- *          a small watermark that is applied to pixs1.
- * 
- */ -PIX * -pixBlendWithGrayMask(PIX *pixs1, - PIX *pixs2, - PIX *pixg, - l_int32 x, - l_int32 y) -{ -l_int32 w1, h1, d1, w2, h2, d2, spp, wg, hg, wmin, hmin, wpld, wpls, wplg; -l_int32 i, j, val, dval, sval; -l_int32 drval, dgval, dbval, srval, sgval, sbval; -l_uint32 dval32, sval32; -l_uint32 *datad, *datas, *datag, *lined, *lines, *lineg; -l_float32 fract; -PIX *pixr1, *pixr2, *pix1, *pix2, *pixg2, *pixd; - - PROCNAME("pixBlendWithGrayMask"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, NULL); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, NULL); - pixGetDimensions(pixs1, &w1, &h1, &d1); - pixGetDimensions(pixs2, &w2, &h2, &d2); - if (d1 == 1 || d2 == 1) - return (PIX *)ERROR_PTR("pixs1 or pixs2 is 1 bpp", procName, NULL); - if (pixg) { - if (pixGetDepth(pixg) != 8) - return (PIX *)ERROR_PTR("pixg not 8 bpp", procName, NULL); - pixGetDimensions(pixg, &wg, &hg, NULL); - wmin = L_MIN(w2, wg); - hmin = L_MIN(h2, hg); - pixg2 = pixClone(pixg); - } else { /* use the alpha component of pixs2 */ - spp = pixGetSpp(pixs2); - if (d2 != 32 || spp != 4) - return (PIX *)ERROR_PTR("no alpha; pixs2 not rgba", procName, NULL); - wmin = w2; - hmin = h2; - pixg2 = pixGetRGBComponent(pixs2, L_ALPHA_CHANNEL); - } - - /* Remove colormaps if they exist; clones are OK */ - pixr1 = pixRemoveColormap(pixs1, REMOVE_CMAP_BASED_ON_SRC); - pixr2 = pixRemoveColormap(pixs2, REMOVE_CMAP_BASED_ON_SRC); - - /* Regularize to the same depth if necessary */ - d1 = pixGetDepth(pixr1); - d2 = pixGetDepth(pixr2); - if (d1 == 32) { /* convert d2 to rgb if necessary */ - pix1 = pixClone(pixr1); - if (d2 != 32) - pix2 = pixConvertTo32(pixr2); - else - pix2 = pixClone(pixr2); - } else if (d2 == 32) { /* and d1 != 32; convert to 32 */ - pix2 = pixClone(pixr2); - pix1 = pixConvertTo32(pixr1); - } else { /* both are 8 bpp or less */ - pix1 = pixConvertTo8(pixr1, FALSE); - pix2 = pixConvertTo8(pixr2, FALSE); - } - pixDestroy(&pixr1); - pixDestroy(&pixr2); - - /* Sanity check: both either 8 or 32 bpp */ - d1 = pixGetDepth(pix1); - d2 = pixGetDepth(pix2); - if (d1 != d2 || (d1 != 8 && d1 != 32)) { - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pixg2); - return (PIX *)ERROR_PTR("depths not regularized! bad!", procName, NULL); - } - - /* Start with a copy of pix1 */ - pixd = pixCopy(NULL, pix1); - pixDestroy(&pix1); - - /* Blend pix2 onto pixd, using pixg2. - * Let the normalized pixel value of pixg2 be f = pixval / 255, - * and the pixel values of pixd and pix2 be p1 and p2, rsp. - * Then the blended value is: - * p = (1.0 - f) * p1 + f * p2 - * Blending is done component-wise if rgb. - * Scan over pix2 and pixg2, clipping to pixd where necessary. */ - datad = pixGetData(pixd); - datas = pixGetData(pix2); - datag = pixGetData(pixg2); - wpld = pixGetWpl(pixd); - wpls = pixGetWpl(pix2); - wplg = pixGetWpl(pixg2); - for (i = 0; i < hmin; i++) { - if (i + y < 0 || i + y >= h1) continue; - lined = datad + (i + y) * wpld; - lines = datas + i * wpls; - lineg = datag + i * wplg; - for (j = 0; j < wmin; j++) { - if (j + x < 0 || j + x >= w1) continue; - val = GET_DATA_BYTE(lineg, j); - if (val == 0) continue; /* pix2 is transparent */ - fract = (l_float32)val / 255.; - if (d1 == 8) { - dval = GET_DATA_BYTE(lined, j + x); - sval = GET_DATA_BYTE(lines, j); - dval = (l_int32)((1.0 - fract) * dval + fract * sval); - SET_DATA_BYTE(lined, j + x, dval); - } else { /* 32 */ - dval32 = *(lined + j + x); - sval32 = *(lines + j); - extractRGBValues(dval32, &drval, &dgval, &dbval); - extractRGBValues(sval32, &srval, &sgval, &sbval); - drval = (l_int32)((1.0 - fract) * drval + fract * srval); - dgval = (l_int32)((1.0 - fract) * dgval + fract * sgval); - dbval = (l_int32)((1.0 - fract) * dbval + fract * sbval); - composeRGBPixel(drval, dgval, dbval, &dval32); - *(lined + j + x) = dval32; - } - } - } - - pixDestroy(&pixg2); - pixDestroy(&pix2); - return pixd; -} - - -/*---------------------------------------------------------------------* - * Blending background to a specific color * - *---------------------------------------------------------------------*/ -/*! - * \brief pixBlendBackgroundToColor() - * - * \param[in] pixd can be NULL or pixs - * \param[in] pixs 32 bpp rgb - * \param[in] box region for blending; can be NULL) - * \param[in] color 32 bit color in 0xrrggbb00 format - * \param[in] gamma, minval, maxval args for grayscale TRC mapping - * \return pixd always - * - *
- * Notes:
- *      (1) This in effect replaces light background pixels in pixs
- *          by the input color.  It does it by alpha blending so that
- *          there are no visible artifacts from hard cutoffs.
- *      (2) If pixd == pixs, this is done in-place.
- *      (3) If box == NULL, this is performed on all of pixs.
- *      (4) The alpha component for blending is derived from pixs,
- *          by converting to grayscale and enhancing with a TRC.
- *      (5) The last three arguments specify the TRC operation.
- *          Suggested values are: %gamma = 0.3, %minval = 50, %maxval = 200.
- *          To skip the TRC, use %gamma == 1, %minval = 0, %maxval = 255.
- *          See pixGammaTRC() for details.
- * 
- */ -PIX * -pixBlendBackgroundToColor(PIX *pixd, - PIX *pixs, - BOX *box, - l_uint32 color, - l_float32 gamma, - l_int32 minval, - l_int32 maxval) -{ -l_int32 x, y, w, h; -BOX *boxt; -PIX *pixt, *pixc, *pixr, *pixg; - - PROCNAME("pixBlendBackgroundToColor"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, pixd); - if (pixd && (pixd != pixs)) - return (PIX *)ERROR_PTR("pixd neither null nor pixs", procName, pixd); - - /* Extract the (optionally cropped) region, pixr, and generate - * an identically sized pixc with the uniform color. */ - if (!pixd) - pixd = pixCopy(NULL, pixs); - if (box) { - pixr = pixClipRectangle(pixd, box, &boxt); - boxGetGeometry(boxt, &x, &y, &w, &h); - pixc = pixCreate(w, h, 32); - boxDestroy(&boxt); - } else { - pixc = pixCreateTemplate(pixs); - pixr = pixClone(pixd); - } - pixSetAllArbitrary(pixc, color); - - /* Set up the alpha channel */ - pixg = pixConvertTo8(pixr, 0); - pixGammaTRC(pixg, pixg, gamma, minval, maxval); - pixSetRGBComponent(pixc, pixg, L_ALPHA_CHANNEL); - - /* Blend and replace in pixd */ - pixt = pixBlendWithGrayMask(pixr, pixc, NULL, 0, 0); - if (box) { - pixRasterop(pixd, x, y, w, h, PIX_SRC, pixt, 0, 0); - pixDestroy(&pixt); - } else { - pixTransferAllData(pixd, &pixt, 0, 0); - } - - pixDestroy(&pixc); - pixDestroy(&pixr); - pixDestroy(&pixg); - return pixd; -} - - -/*---------------------------------------------------------------------* - * Multiplying by a specific color * - *---------------------------------------------------------------------*/ -/*! - * \brief pixMultiplyByColor() - * - * \param[in] pixd can be NULL or pixs - * \param[in] pixs 32 bpp rgb - * \param[in] box region for filtering; can be NULL) - * \param[in] color 32 bit color in 0xrrggbb00 format - * \return pixd always - * - *
- * Notes:
- *      (1) This filters all pixels in the specified region by
- *          multiplying each component by the input color.
- *          This leaves black invariant and transforms white to the
- *          input color.
- *      (2) If pixd == pixs, this is done in-place.
- *      (3) If box == NULL, this is performed on all of pixs.
- * 
- */ -PIX * -pixMultiplyByColor(PIX *pixd, - PIX *pixs, - BOX *box, - l_uint32 color) -{ -l_int32 i, j, bx, by, w, h, wpl; -l_int32 red, green, blue, rval, gval, bval, nrval, ngval, nbval; -l_float32 frval, fgval, fbval; -l_uint32 *data, *line; -PIX *pixt; - - PROCNAME("pixMultiplyByColor"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, pixd); - if (pixd && (pixd != pixs)) - return (PIX *)ERROR_PTR("pixd neither null nor pixs", procName, pixd); - - if (!pixd) - pixd = pixCopy(NULL, pixs); - if (box) { - boxGetGeometry(box, &bx, &by, NULL, NULL); - pixt = pixClipRectangle(pixd, box, NULL); - } else { - pixt = pixClone(pixd); - } - - /* Multiply each pixel in pixt by the color */ - extractRGBValues(color, &red, &green, &blue); - frval = (1. / 255.) * red; - fgval = (1. / 255.) * green; - fbval = (1. / 255.) * blue; - data = pixGetData(pixt); - wpl = pixGetWpl(pixt); - pixGetDimensions(pixt, &w, &h, NULL); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - extractRGBValues(line[j], &rval, &gval, &bval); - nrval = (l_int32)(frval * rval + 0.5); - ngval = (l_int32)(fgval * gval + 0.5); - nbval = (l_int32)(fbval * bval + 0.5); - composeRGBPixel(nrval, ngval, nbval, line + j); - } - } - - /* Replace */ - if (box) - pixRasterop(pixd, bx, by, w, h, PIX_SRC, pixt, 0, 0); - pixDestroy(&pixt); - return pixd; -} - - -/*---------------------------------------------------------------------* - * Rendering with alpha blending over a uniform background * - *---------------------------------------------------------------------*/ -/*! - * \brief pixAlphaBlendUniform() - * - * \param[in] pixs 32 bpp rgba, with alpha - * \param[in] color 32 bit color in 0xrrggbb00 format - * \return pixd 32 bpp rgb: pixs blended over uniform color %color, - * a clone of pixs if no alpha, and null on error - * - *
- * Notes:
- *      (1) This is a convenience function that renders 32 bpp RGBA images
- *          (with an alpha channel) over a uniform background of
- *          value %color.  To render over a white background,
- *          use %color = 0xffffff00.  The result is an RGB image.
- *      (2) If pixs does not have an alpha channel, it returns a clone
- *          of pixs.
- * 
- */ -PIX * -pixAlphaBlendUniform(PIX *pixs, - l_uint32 color) -{ -PIX *pixt, *pixd; - - PROCNAME("pixAlphaBlendUniform"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (pixGetSpp(pixs) != 4) { - L_WARNING("no alpha channel; returning clone\n", procName); - return pixClone(pixs); - } - - pixt = pixCreateTemplate(pixs); - pixSetAllArbitrary(pixt, color); - pixSetSpp(pixt, 3); /* not required */ - pixd = pixBlendWithGrayMask(pixt, pixs, NULL, 0, 0); - - pixDestroy(&pixt); - return pixd; -} - - -/*---------------------------------------------------------------------* - * Adding an alpha layer for blending * - *---------------------------------------------------------------------*/ -/*! - * \brief pixAddAlphaToBlend() - * - * \param[in] pixs any depth - * \param[in] fract fade fraction in the alpha component - * \param[in] invert 1 to photometrically invert pixs - * \return pixd 32 bpp with alpha, or null on error - * - *
- * Notes:
- *      (1) This is a simple alpha layer generator, where typically white has
- *          maximum transparency and black has minimum.
- *      (2) If %invert == 1, generate the same alpha layer but invert
- *          the input image photometrically.  This is useful for blending
- *          over dark images, where you want dark regions in pixs, such
- *          as text, to be lighter in the blended image.
- *      (3) The fade %fract gives the minimum transparency (i.e.,
- *          maximum opacity).  A small fraction is useful for adding
- *          a watermark to an image.
- *      (4) If pixs has a colormap, it is removed to rgb.
- *      (5) If pixs already has an alpha layer, it is overwritten.
- * 
- */ -PIX * -pixAddAlphaToBlend(PIX *pixs, - l_float32 fract, - l_int32 invert) -{ -PIX *pixd, *pix1, *pix2; - - PROCNAME("pixAddAlphaToBlend"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (fract < 0.0 || fract > 1.0) - return (PIX *)ERROR_PTR("invalid fract", procName, NULL); - - /* Convert to 32 bpp */ - if (pixGetColormap(pixs)) - pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_FULL_COLOR); - else - pix1 = pixClone(pixs); - pixd = pixConvertTo32(pix1); /* new */ - - /* Use an inverted image if this will be blended with a dark image */ - if (invert) pixInvert(pixd, pixd); - - /* Generate alpha layer */ - pix2 = pixConvertTo8(pix1, 0); /* new */ - pixInvert(pix2, pix2); - pixMultConstantGray(pix2, fract); - pixSetRGBComponent(pixd, pix2, L_ALPHA_CHANNEL); - - pixDestroy(&pix1); - pixDestroy(&pix2); - return pixd; -} - - - -/*---------------------------------------------------------------------* - * Setting a transparent alpha component over a white background * - *---------------------------------------------------------------------*/ -/*! - * \brief pixSetAlphaOverWhite() - * - * \param[in] pixs colormapped or 32 bpp rgb; no alpha - * \return pixd new pix with meaningful alpha component, - * or null on error - * - *
- * Notes:
- *      (1) The generated alpha component is transparent over white
- *          (background) pixels in pixs, and quickly grades to opaque
- *          away from the transparent parts.  This is a cheap and
- *          dirty alpha generator.  The 2 pixel gradation is useful
- *          to blur the boundary between the transparent region
- *          (that will render entirely from a backing image) and
- *          the remainder which renders from pixs.
- *      (2) All alpha component bits in pixs are overwritten.
- * 
- */ -PIX * -pixSetAlphaOverWhite(PIX *pixs) -{ -PIX *pixd, *pix1, *pix2, *pix3, *pix4; - - PROCNAME("pixSetAlphaOverWhite"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!(pixGetDepth(pixs) == 32 || pixGetColormap(pixs))) - return (PIX *)ERROR_PTR("pixs not 32 bpp or cmapped", procName, NULL); - - /* Remove colormap if it exists; otherwise copy */ - pixd = pixRemoveColormapGeneral(pixs, REMOVE_CMAP_TO_FULL_COLOR, L_COPY); - - /* Generate a 1 bpp image where a white pixel in pixd is 0. - * In the comments below, a "white" pixel refers to pixd. - * pix1 is rgb, pix2 is 8 bpp gray, pix3 is 1 bpp. */ - pix1 = pixInvert(NULL, pixd); /* send white (255) to 0 for each sample */ - pix2 = pixConvertRGBToGrayMinMax(pix1, L_CHOOSE_MAX); /* 0 if white */ - pix3 = pixThresholdToBinary(pix2, 1); /* sets white pixels to 1 */ - pixInvert(pix3, pix3); /* sets white pixels to 0 */ - - /* Generate the alpha component using the distance transform, - * which measures the distance to the nearest bg (0) pixel in pix3. - * After multiplying by 128, its value is 0 (transparent) - * over white pixels, and goes to opaque (255) two pixels away - * from the nearest white pixel. */ - pix4 = pixDistanceFunction(pix3, 8, 8, L_BOUNDARY_FG); - pixMultConstantGray(pix4, 128.0); - pixSetRGBComponent(pixd, pix4, L_ALPHA_CHANNEL); - - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - pixDestroy(&pix4); - return pixd; -} - - -/*---------------------------------------------------------------------* - * Fading from the edge * - *---------------------------------------------------------------------*/ -/*! - * \brief pixLinearEdgeFade() - * - * \param[in] pixs 8 or 32 bpp; no colormap - * \param[in] dir L_FROM_LEFT, L_FROM_RIGHT, L_FROM_TOP, L_FROM_BOT - * \param[in] fadeto L_BLEND_TO_WHITE, L_BLEND_TO_BLACK - * \param[in] distfract fraction of width or height over which fading occurs - * \param[in] maxfade fraction of fading at the edge, <= 1.0 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) In-place operation.
- *      (2) Maximum fading fraction %maxfade occurs at the edge of the image,
- *          and the fraction goes to 0 at the fractional distance %distfract
- *          from the edge.  %maxfade must be in [0, 1].
- *      (3) %distrfact must be in [0, 1], and typically it would be <= 0.5.
- * 
- */ -l_ok -pixLinearEdgeFade(PIX *pixs, - l_int32 dir, - l_int32 fadeto, - l_float32 distfract, - l_float32 maxfade) -{ -l_int32 i, j, w, h, d, wpl, xmin, ymin, range, val, rval, gval, bval; -l_float32 slope, limit, del; -l_uint32 *data, *line; - - PROCNAME("pixLinearEdgeFade"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetColormap(pixs) != NULL) - return ERROR_INT("pixs has a colormap", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 && d != 32) - return ERROR_INT("pixs not 8 or 32 bpp", procName, 1); - if (dir != L_FROM_LEFT && dir != L_FROM_RIGHT && - dir != L_FROM_TOP && dir != L_FROM_BOT) - return ERROR_INT("invalid fade direction from edge", procName, 1); - if (fadeto != L_BLEND_TO_WHITE && fadeto != L_BLEND_TO_BLACK) - return ERROR_INT("invalid fadeto photometry", procName, 1); - if (maxfade <= 0) return 0; - if (maxfade > 1.0) - return ERROR_INT("invalid maxfade", procName, 1); - if (distfract <= 0 || distfract * L_MIN(w, h) < 1.0) { - L_INFO("distfract is too small\n", procName); - return 0; - } - if (distfract > 1.0) - return ERROR_INT("invalid distfract", procName, 1); - - /* Set up parameters */ - if (dir == L_FROM_LEFT) { - range = (l_int32)(distfract * w); - xmin = 0; - slope = maxfade / (l_float32)range; - } else if (dir == L_FROM_RIGHT) { - range = (l_int32)(distfract * w); - xmin = w - range; - slope = maxfade / (l_float32)range; - } else if (dir == L_FROM_TOP) { - range = (l_int32)(distfract * h); - ymin = 0; - slope = maxfade / (l_float32)range; - } else if (dir == L_FROM_BOT) { - range = (l_int32)(distfract * h); - ymin = h - range; - slope = maxfade / (l_float32)range; - } - - limit = (fadeto == L_BLEND_TO_WHITE) ? 255.0 : 0.0; - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - if (dir == L_FROM_LEFT || dir == L_FROM_RIGHT) { - for (j = 0; j < range; j++) { - del = (dir == L_FROM_LEFT) ? maxfade - slope * j - : maxfade - slope * (range - j); - for (i = 0; i < h; i++) { - line = data + i * wpl; - if (d == 8) { - val = GET_DATA_BYTE(line, xmin + j); - val += (limit - val) * del + 0.5; - SET_DATA_BYTE(line, xmin + j, val); - } else { /* rgb */ - extractRGBValues(*(line + xmin + j), &rval, &gval, &bval); - rval += (limit - rval) * del + 0.5; - gval += (limit - gval) * del + 0.5; - bval += (limit - bval) * del + 0.5; - composeRGBPixel(rval, gval, bval, line + xmin + j); - } - } - } - } else { /* dir == L_FROM_TOP || L_FROM_BOT */ - for (i = 0; i < range; i++) { - del = (dir == L_FROM_TOP) ? maxfade - slope * i - : maxfade - slope * (range - i); - line = data + (ymin + i) * wpl; - for (j = 0; j < w; j++) { - if (d == 8) { - val = GET_DATA_BYTE(line, j); - val += (limit - val) * del + 0.5; - SET_DATA_BYTE(line, j, val); - } else { /* rgb */ - extractRGBValues(*(line + j), &rval, &gval, &bval); - rval += (limit - rval) * del + 0.5; - gval += (limit - gval) * del + 0.5; - bval += (limit - bval) * del + 0.5; - composeRGBPixel(rval, gval, bval, line + j); - } - } - } - } - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmf.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmf.c deleted file mode 100644 index 2f03ddb9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmf.c +++ /dev/null @@ -1,877 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file bmf.c - *
- *
- *   Acquisition and generation of bitmap fonts.
- *
- *       L_BMF           *bmfCreate()
- *       L_BMF           *bmfDestroy()
- *
- *       PIX             *bmfGetPix()
- *       l_int32          bmfGetWidth()
- *       l_int32          bmfGetBaseline()
- *
- *       PIXA            *pixaGetFont()
- *       l_int32          pixaSaveFont()
- *       static PIXA     *pixaGenerateFontFromFile()
- *       static PIXA     *pixaGenerateFontFromString()
- *       static PIXA     *pixaGenerateFont()
- *       static l_int32   pixGetTextBaseline()
- *       static l_int32   bmfMakeAsciiTables()
- *
- *   This is not a very general utility, because it only uses bitmap
- *   representations of a single font, Palatino-Roman, with the
- *   normal style.  It uses bitmaps generated for nine sizes, from
- *   4 to 20 pts, rendered at 300 ppi.  Generalization to different
- *   fonts, styles and sizes is straightforward.
- *
- *   I chose Palatino-Roman is because I like it.
- *   The input font images were generated from a set of small
- *   PostScript files, such as chars-12.ps, which were rendered
- *   into the inputfont[] bitmap files using GhostScript.  See, for
- *   example, the bash script prog/ps2tiff, which will "rip" a
- *   PostScript file into a set of ccitt-g4 compressed tiff files.
- *
- *   The set of ascii characters from 32 through 126 are the 95
- *   printable ascii chars.  Palatino-Roman is missing char 92, '\'.
- *   I have substituted an LR flip of '/', char 47, for 92, so that
- *   there are no missing printable chars in this set.  The space is
- *   char 32, and I have given it a width equal to twice the width of '!'.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" -#include "bmfdata.h" - -static const l_float32 VertFractSep = 0.3; - -#ifndef NO_CONSOLE_IO -#define DEBUG_BASELINE 0 -#define DEBUG_CHARS 0 -#define DEBUG_FONT_GEN 0 -#endif /* ~NO_CONSOLE_IO */ - -static PIXA *pixaGenerateFontFromFile(const char *dir, l_int32 fontsize, - l_int32 *pbl0, l_int32 *pbl1, - l_int32 *pbl2); -static PIXA *pixaGenerateFontFromString(l_int32 fontsize, l_int32 *pbl0, - l_int32 *pbl1, l_int32 *pbl2); -static PIXA *pixaGenerateFont(PIX *pixs, l_int32 fontsize, l_int32 *pbl0, - l_int32 *pbl1, l_int32 *pbl2); -static l_int32 pixGetTextBaseline(PIX *pixs, l_int32 *tab8, l_int32 *py); -static l_int32 bmfMakeAsciiTables(L_BMF *bmf); - -/*---------------------------------------------------------------------*/ -/* Bmf create/destroy */ -/*---------------------------------------------------------------------*/ -/*! - * \brief bmfCreate() - * - * \param[in] dir [optional] directory holding pixa of character set - * \param[in] fontsize 4, 6, 8, ... , 20 - * \return bmf holding the bitmap font and associated information - * - *
- * Notes:
- *      (1) If %dir == null, this generates the font bitmaps from a
- *          compiled string.
- *      (2) Otherwise, this tries to read a pre-computed pixa file with the
- *          95 ascii chars in it.  If the file is not found, it then
- *          attempts to generate the pixa and associated baseline
- *          data from a tiff image containing all the characters.  If
- *          that fails, it uses the compiled string.
- * 
- */ -L_BMF * -bmfCreate(const char *dir, - l_int32 fontsize) -{ -L_BMF *bmf; -PIXA *pixa; - - PROCNAME("bmfCreate"); - - if (fontsize < 4 || fontsize > 20 || (fontsize % 2)) - return (L_BMF *)ERROR_PTR("fontsize must be in {4, 6, ..., 20}", - procName, NULL); - - bmf = (L_BMF *)LEPT_CALLOC(1, sizeof(L_BMF)); - - if (!dir) { /* Generate from a string */ - pixa = pixaGenerateFontFromString(fontsize, &bmf->baseline1, - &bmf->baseline2, &bmf->baseline3); - } else { /* Look for the pixa in a directory */ - pixa = pixaGetFont(dir, fontsize, &bmf->baseline1, &bmf->baseline2, - &bmf->baseline3); - if (!pixa) { /* Not found; make it from a file */ - L_INFO("Generating pixa of bitmap fonts from file\n", procName); - pixa = pixaGenerateFontFromFile(dir, fontsize, &bmf->baseline1, - &bmf->baseline2, &bmf->baseline3); - if (!pixa) { /* Not made; make it from a string after all */ - L_ERROR("Failed to make font; use string\n", procName); - pixa = pixaGenerateFontFromString(fontsize, &bmf->baseline1, - &bmf->baseline2, &bmf->baseline3); - } - } - } - - if (!pixa) { - bmfDestroy(&bmf); - return (L_BMF *)ERROR_PTR("font pixa not made", procName, NULL); - } - - bmf->pixa = pixa; - bmf->size = fontsize; - if (dir) bmf->directory = stringNew(dir); - bmfMakeAsciiTables(bmf); - return bmf; -} - - -/*! - * \brief bmfDestroy() - * - * \param[in,out] pbmf will be set to null before returning - * \return void - */ -void -bmfDestroy(L_BMF **pbmf) -{ -L_BMF *bmf; - - PROCNAME("bmfDestroy"); - - if (pbmf == NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - - if ((bmf = *pbmf) == NULL) - return; - - pixaDestroy(&bmf->pixa); - LEPT_FREE(bmf->directory); - LEPT_FREE(bmf->fonttab); - LEPT_FREE(bmf->baselinetab); - LEPT_FREE(bmf->widthtab); - LEPT_FREE(bmf); - *pbmf = NULL; - return; -} - - -/*---------------------------------------------------------------------*/ -/* Bmf accessors */ -/*---------------------------------------------------------------------*/ -/*! - * \brief bmfGetPix() - * - * \param[in] bmf - * \param[in] chr should be one of the 95 supported printable bitmaps - * \return pix clone of pix in bmf, or NULL on error - */ -PIX * -bmfGetPix(L_BMF *bmf, - char chr) -{ -l_int32 i, index; -PIXA *pixa; - - PROCNAME("bmfGetPix"); - - if ((index = (l_int32)chr) == 10) /* NL */ - return NULL; - if (!bmf) - return (PIX *)ERROR_PTR("bmf not defined", procName, NULL); - - i = bmf->fonttab[index]; - if (i == UNDEF) { - L_ERROR("no bitmap representation for %d\n", procName, index); - return NULL; - } - - if ((pixa = bmf->pixa) == NULL) - return (PIX *)ERROR_PTR("pixa not found", procName, NULL); - - return pixaGetPix(pixa, i, L_CLONE); -} - - -/*! - * \brief bmfGetWidth() - * - * \param[in] bmf - * \param[in] chr should be one of the 95 supported bitmaps - * \param[out] pw character width; -1 if not printable - * \return 0 if OK, 1 on error - */ -l_ok -bmfGetWidth(L_BMF *bmf, - char chr, - l_int32 *pw) -{ -l_int32 i, index; -PIXA *pixa; - - PROCNAME("bmfGetWidth"); - - if (!pw) - return ERROR_INT("&w not defined", procName, 1); - *pw = -1; - if (!bmf) - return ERROR_INT("bmf not defined", procName, 1); - if ((index = (l_int32)chr) == 10) /* NL */ - return 0; - - i = bmf->fonttab[index]; - if (i == UNDEF) { - L_ERROR("no bitmap representation for %d\n", procName, index); - return 1; - } - - if ((pixa = bmf->pixa) == NULL) - return ERROR_INT("pixa not found", procName, 1); - - return pixaGetPixDimensions(pixa, i, pw, NULL, NULL); -} - - -/*! - * \brief bmfGetBaseline() - * - * \param[in] bmf - * \param[in] chr should be one of the 95 supported bitmaps - * \param[out] pbaseline distance below UL corner of bitmap char - * \return 0 if OK, 1 on error - */ -l_ok -bmfGetBaseline(L_BMF *bmf, - char chr, - l_int32 *pbaseline) -{ -l_int32 bl, index; - - PROCNAME("bmfGetBaseline"); - - if (!pbaseline) - return ERROR_INT("&baseline not defined", procName, 1); - *pbaseline = 0; - if (!bmf) - return ERROR_INT("bmf not defined", procName, 1); - if ((index = (l_int32)chr) == 10) /* NL */ - return 0; - - bl = bmf->baselinetab[index]; - if (bl == UNDEF) { - L_ERROR("no bitmap representation for %d\n", procName, index); - return 1; - } - - *pbaseline = bl; - return 0; -} - - -/*---------------------------------------------------------------------*/ -/* Font bitmap acquisition and generation */ -/*---------------------------------------------------------------------*/ -/*! - * \brief pixaGetFont() - * - * \param[in] dir directory holding pixa of character set - * \param[in] fontsize 4, 6, 8, ... , 20 - * \param[out] pbl0 baseline of row 1 - * \param[out] pbl1 baseline of row 2 - * \param[out] pbl2 baseline of row 3 - * \return pixa of font bitmaps for 95 characters, or NULL on error - * - *
- * Notes:
- *      (1) This reads a pre-computed pixa file with the 95 ascii chars.
- * 
- */ -PIXA * -pixaGetFont(const char *dir, - l_int32 fontsize, - l_int32 *pbl0, - l_int32 *pbl1, - l_int32 *pbl2) -{ -char *pathname; -l_int32 fileno; -PIXA *pixa; - - PROCNAME("pixaGetFont"); - - fileno = (fontsize / 2) - 2; - if (fileno < 0 || fileno >= NUM_FONTS) - return (PIXA *)ERROR_PTR("font size invalid", procName, NULL); - if (!pbl0 || !pbl1 || !pbl2) - return (PIXA *)ERROR_PTR("&bl not all defined", procName, NULL); - *pbl0 = baselines[fileno][0]; - *pbl1 = baselines[fileno][1]; - *pbl2 = baselines[fileno][2]; - - pathname = pathJoin(dir, outputfonts[fileno]); - pixa = pixaRead(pathname); - LEPT_FREE(pathname); - - if (!pixa) - L_WARNING("pixa of char bitmaps not found\n", procName); - return pixa; -} - - -/*! - * \brief pixaSaveFont() - * - * \param[in] indir [optional] directory holding image of character set - * \param[in] outdir directory into which the output pixa file - * will be written - * \param[in] fontsize in pts, at 300 ppi - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This saves a font of a particular size.
- *      (2) If %indir == null, this generates the font bitmaps from a
- *          compiled string.
- *      (3) prog/genfonts calls this function for each of the
- *          nine font sizes, to generate all the font pixa files.
- * 
- */ -l_ok -pixaSaveFont(const char *indir, - const char *outdir, - l_int32 fontsize) -{ -char *pathname; -l_int32 bl1, bl2, bl3; -PIXA *pixa; - - PROCNAME("pixaSaveFont"); - - if (fontsize < 4 || fontsize > 20 || (fontsize % 2)) - return ERROR_INT("fontsize must be in {4, 6, ..., 20}", procName, 1); - - if (!indir) /* Generate from a string */ - pixa = pixaGenerateFontFromString(fontsize, &bl1, &bl2, &bl3); - else /* Generate from an image file */ - pixa = pixaGenerateFontFromFile(indir, fontsize, &bl1, &bl2, &bl3); - if (!pixa) - return ERROR_INT("pixa not made", procName, 1); - - pathname = pathJoin(outdir, outputfonts[(fontsize - 4) / 2]); - pixaWrite(pathname, pixa); - -#if DEBUG_FONT_GEN - L_INFO("Found %d chars in font size %d\n", procName, pixaGetCount(pixa), - fontsize); - L_INFO("Baselines are at: %d, %d, %d\n", procName, bl1, bl2, bl3); -#endif /* DEBUG_FONT_GEN */ - - LEPT_FREE(pathname); - pixaDestroy(&pixa); - return 0; -} - - -/*! - * \brief pixaGenerateFontFromFile() - * - * \param[in] dir directory holding image of character set - * \param[in] fontsize 4, 6, 8, ... , 20, in pts at 300 ppi - * \param[out] pbl0 baseline of row 1 - * \param[out] pbl1 baseline of row 2 - * \param[out] pbl2 baseline of row 3 - * \return pixa of font bitmaps for 95 characters, or NULL on error - * - * These font generation functions use 9 sets, each with bitmaps - * of 94 ascii characters, all in Palatino-Roman font. - * Each input bitmap has 3 rows of characters. The range of - * ascii values in each row is as follows: - * row 0: 32-57 32 is a space - * row 1: 58-91 92, '\', is not represented in this font - * row 2: 93-126 - * We LR flip the '/' char to generate a bitmap for the missing - * '\' character, so that we have representations of all 95 - * printable chars. - * - * Typically, use pixaGetFont() to generate the character bitmaps - * in memory for a bmf. This will simply access the bitmap files - * in a serialized pixa that were produced in prog/genfonts.c using - * this function. - */ -static PIXA * -pixaGenerateFontFromFile(const char *dir, - l_int32 fontsize, - l_int32 *pbl0, - l_int32 *pbl1, - l_int32 *pbl2) -{ -char *pathname; -l_int32 fileno; -PIX *pix; -PIXA *pixa; - - PROCNAME("pixaGenerateFontFromFile"); - - if (!pbl0 || !pbl1 || !pbl2) - return (PIXA *)ERROR_PTR("&bl not all defined", procName, NULL); - *pbl0 = *pbl1 = *pbl2 = 0; - if (!dir) - return (PIXA *)ERROR_PTR("dir not defined", procName, NULL); - fileno = (fontsize / 2) - 2; - if (fileno < 0 || fileno >= NUM_FONTS) - return (PIXA *)ERROR_PTR("font size invalid", procName, NULL); - - pathname = pathJoin(dir, inputfonts[fileno]); - pix = pixRead(pathname); - LEPT_FREE(pathname); - if (!pix) { - L_ERROR("pix not found for font size %d\n", procName, fontsize); - return NULL; - } - - pixa = pixaGenerateFont(pix, fontsize, pbl0, pbl1, pbl2); - pixDestroy(&pix); - return pixa; -} - - -/*! - * \brief pixaGenerateFontFromString() - * - * \param[in] fontsize 4, 6, 8, ... , 20, in pts at 300 ppi - * \param[out] pbl0 baseline of row 1 - * \param[out] pbl1 baseline of row 2 - * \param[out] pbl2 baseline of row 3 - * \return pixa of font bitmaps for 95 characters, or NULL on error - * - *
- * Notes:
- *      (1) See pixaGenerateFontFromFile() for details.
- * 
- */ -static PIXA * -pixaGenerateFontFromString(l_int32 fontsize, - l_int32 *pbl0, - l_int32 *pbl1, - l_int32 *pbl2) -{ -l_uint8 *data; -l_int32 redsize, nbytes; -PIX *pix; -PIXA *pixa; - - PROCNAME("pixaGenerateFontFromString"); - - if (!pbl0 || !pbl1 || !pbl2) - return (PIXA *)ERROR_PTR("&bl not all defined", procName, NULL); - *pbl0 = *pbl1 = *pbl2 = 0; - redsize = (fontsize / 2) - 2; - if (redsize < 0 || redsize >= NUM_FONTS) - return (PIXA *)ERROR_PTR("invalid font size", procName, NULL); - - if (fontsize == 4) { - data = decodeBase64(fontdata_4, strlen(fontdata_4), &nbytes); - } else if (fontsize == 6) { - data = decodeBase64(fontdata_6, strlen(fontdata_6), &nbytes); - } else if (fontsize == 8) { - data = decodeBase64(fontdata_8, strlen(fontdata_8), &nbytes); - } else if (fontsize == 10) { - data = decodeBase64(fontdata_10, strlen(fontdata_10), &nbytes); - } else if (fontsize == 12) { - data = decodeBase64(fontdata_12, strlen(fontdata_12), &nbytes); - } else if (fontsize == 14) { - data = decodeBase64(fontdata_14, strlen(fontdata_14), &nbytes); - } else if (fontsize == 16) { - data = decodeBase64(fontdata_16, strlen(fontdata_16), &nbytes); - } else if (fontsize == 18) { - data = decodeBase64(fontdata_18, strlen(fontdata_18), &nbytes); - } else { /* fontsize == 20 */ - data = decodeBase64(fontdata_20, strlen(fontdata_20), &nbytes); - } - if (!data) - return (PIXA *)ERROR_PTR("data not made", procName, NULL); - - pix = pixReadMem(data, nbytes); - LEPT_FREE(data); - if (!pix) - return (PIXA *)ERROR_PTR("pix not made", procName, NULL); - - pixa = pixaGenerateFont(pix, fontsize, pbl0, pbl1, pbl2); - pixDestroy(&pix); - return pixa; -} - - -/*! - * \brief pixaGenerateFont() - * - * \param[in] pixs of 95 characters in 3 rows - * \param[in] fontsize 4, 6, 8, ... , 20, in pts at 300 ppi - * \param[out] pbl0 baseline of row 1 - * \param[out] pbl1 baseline of row 2 - * \param[out] pbl2 baseline of row 3 - * \return pixa of font bitmaps for 95 characters, or NULL on error - * - *
- * Notes:
- *      (1) This does all the work.  See pixaGenerateFontFromFile()
- *          for an overview.
- *      (2) The pix is for one of the 9 fonts.  %fontsize is only
- *          used here for debugging.
- * 
- */ -static PIXA * -pixaGenerateFont(PIX *pixs, - l_int32 fontsize, - l_int32 *pbl0, - l_int32 *pbl1, - l_int32 *pbl2) -{ -l_int32 i, j, nrows, nrowchars, nchars, h, yval; -l_int32 width, height; -l_int32 baseline[3]; -l_int32 *tab = NULL; -BOX *box, *box1, *box2; -BOXA *boxar, *boxac, *boxacs; -PIX *pix1, *pix2, *pixr, *pixrc, *pixc; -PIXA *pixa; -l_int32 n, w, inrow, top; -l_int32 *ia; -NUMA *na; - - PROCNAME("pixaGenerateFont"); - - if (!pbl0 || !pbl1 || !pbl2) - return (PIXA *)ERROR_PTR("&bl not all defined", procName, NULL); - *pbl0 = *pbl1 = *pbl2 = 0; - if (!pixs) - return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); - - /* Locate the 3 rows of characters */ - w = pixGetWidth(pixs); - na = pixCountPixelsByRow(pixs, NULL); - boxar = boxaCreate(0); - n = numaGetCount(na); - ia = numaGetIArray(na); - inrow = 0; - for (i = 0; i < n; i++) { - if (!inrow && ia[i] > 0) { - inrow = 1; - top = i; - } else if (inrow && ia[i] == 0) { - inrow = 0; - box = boxCreate(0, top, w, i - top); - boxaAddBox(boxar, box, L_INSERT); - } - } - LEPT_FREE(ia); - numaDestroy(&na); - nrows = boxaGetCount(boxar); -#if DEBUG_FONT_GEN - L_INFO("For fontsize %s, have %d rows\n", procName, fontsize, nrows); -#endif /* DEBUG_FONT_GEN */ - if (nrows != 3) { - L_INFO("nrows = %d; skipping fontsize %d\n", procName, nrows, fontsize); - boxaDestroy(&boxar); - return (PIXA *)ERROR_PTR("3 rows not generated", procName, NULL); - } - - /* Grab the character images and baseline data */ -#if DEBUG_BASELINE - lept_rmdir("baseline"); - lept_mkdir("baseline"); -#endif /* DEBUG_BASELINE */ - tab = makePixelSumTab8(); - pixa = pixaCreate(95); - for (i = 0; i < nrows; i++) { - box = boxaGetBox(boxar, i, L_CLONE); - pixr = pixClipRectangle(pixs, box, NULL); /* row of chars */ - pixGetTextBaseline(pixr, tab, &yval); - baseline[i] = yval; - -#if DEBUG_BASELINE - L_INFO("Baseline info: row %d, yval = %d, h = %d\n", procName, - i, yval, pixGetHeight(pixr)); - pix1 = pixCopy(NULL, pixr); - pixRenderLine(pix1, 0, yval, pixGetWidth(pix1), yval, 1, - L_FLIP_PIXELS); - if (i == 0 ) - pixWriteDebug("/tmp/baseline/row0.png", pix1, IFF_PNG); - else if (i == 1) - pixWriteDebug("/tmp/baseline/row1.png", pix1, IFF_PNG); - else - pixWriteDebug("/tmp/baseline/row2.png", pix1, IFF_PNG); - pixDestroy(&pix1); -#endif /* DEBUG_BASELINE */ - - boxDestroy(&box); - pixrc = pixCloseSafeBrick(NULL, pixr, 1, 35); - boxac = pixConnComp(pixrc, NULL, 8); - boxacs = boxaSort(boxac, L_SORT_BY_X, L_SORT_INCREASING, NULL); - if (i == 0) { /* consolidate the two components of '"' */ - box1 = boxaGetBox(boxacs, 1, L_CLONE); - box2 = boxaGetBox(boxacs, 2, L_CLONE); - box1->w = box2->x + box2->w - box1->x; /* increase width */ - boxDestroy(&box1); - boxDestroy(&box2); - boxaRemoveBox(boxacs, 2); - } - h = pixGetHeight(pixr); - nrowchars = boxaGetCount(boxacs); - for (j = 0; j < nrowchars; j++) { - box = boxaGetBox(boxacs, j, L_COPY); - if (box->w <= 2 && box->h == 1) { /* skip 1x1, 2x1 components */ - boxDestroy(&box); - continue; - } - box->y = 0; - box->h = h - 1; - pixc = pixClipRectangle(pixr, box, NULL); - boxDestroy(&box); - if (i == 0 && j == 0) /* add a pix for the space; change later */ - pixaAddPix(pixa, pixc, L_COPY); - if (i == 2 && j == 0) /* add a pix for the '\'; change later */ - pixaAddPix(pixa, pixc, L_COPY); - pixaAddPix(pixa, pixc, L_INSERT); - } - pixDestroy(&pixr); - pixDestroy(&pixrc); - boxaDestroy(&boxac); - boxaDestroy(&boxacs); - } - LEPT_FREE(tab); - - nchars = pixaGetCount(pixa); - if (nchars != 95) - return (PIXA *)ERROR_PTR("95 chars not generated", procName, NULL); - - *pbl0 = baseline[0]; - *pbl1 = baseline[1]; - *pbl2 = baseline[2]; - - /* Fix the space character up; it should have no ON pixels, - * and be about twice as wide as the '!' character. */ - pix1 = pixaGetPix(pixa, 0, L_CLONE); - width = 2 * pixGetWidth(pix1); - height = pixGetHeight(pix1); - pixDestroy(&pix1); - pix1 = pixCreate(width, height, 1); - pixaReplacePix(pixa, 0, pix1, NULL); - - /* Fix up the '\' character; use a LR flip of the '/' char */ - pix1 = pixaGetPix(pixa, 15, L_CLONE); - pix2 = pixFlipLR(NULL, pix1); - pixDestroy(&pix1); - pixaReplacePix(pixa, 60, pix2, NULL); - -#if DEBUG_CHARS - pix1 = pixaDisplayTiled(pixa, 1500, 0, 10); - pixDisplay(pix1, 100 * i, 200); - pixDestroy(&pix1); -#endif /* DEBUG_CHARS */ - - boxaDestroy(&boxar); - return pixa; -} - - -/*! - * \brief pixGetTextBaseline() - * - * \param[in] pixs 1 bpp, one textline character set - * \param[in] tab8 [optional] pixel sum table - * \param[out] py baseline value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Method: find the largest difference in pixel sums from one
- *          raster line to the next one below it.  The baseline is the
- *          upper raster line for the pair of raster lines that
- *          maximizes this function.
- * 
- */ -static l_int32 -pixGetTextBaseline(PIX *pixs, - l_int32 *tab8, - l_int32 *py) -{ -l_int32 i, h, val1, val2, diff, diffmax, ymax; -l_int32 *tab; -NUMA *na; - - PROCNAME("pixGetTextBaseline"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!py) - return ERROR_INT("&y not defined", procName, 1); - *py = 0; - if (!tab8) - tab = makePixelSumTab8(); - else - tab = tab8; - - na = pixCountPixelsByRow(pixs, tab); - h = numaGetCount(na); - diffmax = 0; - ymax = 0; - for (i = 1; i < h; i++) { - numaGetIValue(na, i - 1, &val1); - numaGetIValue(na, i, &val2); - diff = L_MAX(0, val1 - val2); - if (diff > diffmax) { - diffmax = diff; - ymax = i - 1; /* upper raster line */ - } - } - *py = ymax; - - if (!tab8) - LEPT_FREE(tab); - numaDestroy(&na); - return 0; -} - - -/*! - * \brief bmfMakeAsciiTables - * - * \param[in] bmf - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This makes three tables, each of size 128, as follows:
- *          ~ fonttab is a table containing the index of the Pix
- *            that corresponds to each input ascii character;
- *            it maps (ascii-index) --> Pixa index
- *          ~ baselinetab is a table containing the baseline offset
- *            for the Pix that corresponds to each input ascii character;
- *            it maps (ascii-index) --> baseline offset
- *          ~ widthtab is a table containing the character width in
- *            pixels for the Pix that corresponds to that character;
- *            it maps (ascii-index) --> bitmap width
- *     (2) This also computes
- *          ~ lineheight (sum of maximum character extensions above and
- *                        below the baseline)
- *          ~ kernwidth (spacing between characters within a word)
- *          ~ spacewidth (space between words)
- *          ~ vertlinesep (extra vertical spacing between textlines)
- *     (3) The baselines apply as follows:
- *          baseline1   (ascii 32 - 57), ascii 92
- *          baseline2   (ascii 58 - 91)
- *          baseline3   (ascii 93 - 126)
- *     (4) The only array in bmf that is not ascii-based is the
- *         array of bitmaps in the pixa, which starts at ascii 32.
- * 
- */ -static l_int32 -bmfMakeAsciiTables(L_BMF *bmf) -{ -l_int32 i, maxh, height, charwidth, xwidth, kernwidth; -l_int32 *fonttab, *baselinetab, *widthtab; -PIX *pix; - - PROCNAME("bmfMakeAsciiTables"); - - if (!bmf) - return ERROR_INT("bmf not defined", procName, 1); - - /* First get the fonttab; we use this later for the char widths */ - fonttab = (l_int32 *)LEPT_CALLOC(128, sizeof(l_int32)); - bmf->fonttab = fonttab; - for (i = 0; i < 128; i++) - fonttab[i] = UNDEF; - for (i = 32; i < 127; i++) - fonttab[i] = i - 32; - - baselinetab = (l_int32 *)LEPT_CALLOC(128, sizeof(l_int32)); - bmf->baselinetab = baselinetab; - for (i = 0; i < 128; i++) - baselinetab[i] = UNDEF; - for (i = 32; i <= 57; i++) - baselinetab[i] = bmf->baseline1; - for (i = 58; i <= 91; i++) - baselinetab[i] = bmf->baseline2; - baselinetab[92] = bmf->baseline1; /* the '\' char */ - for (i = 93; i < 127; i++) - baselinetab[i] = bmf->baseline3; - - /* Generate array of character widths; req's fonttab to exist */ - widthtab = (l_int32 *)LEPT_CALLOC(128, sizeof(l_int32)); - bmf->widthtab = widthtab; - for (i = 0; i < 128; i++) - widthtab[i] = UNDEF; - for (i = 32; i < 127; i++) { - bmfGetWidth(bmf, i, &charwidth); - widthtab[i] = charwidth; - } - - /* Get the line height of text characters, from the highest - * ascender to the lowest descender; req's fonttab to exist. */ - pix = bmfGetPix(bmf, 32); - maxh = pixGetHeight(pix); - pixDestroy(&pix); - pix = bmfGetPix(bmf, 58); - height = pixGetHeight(pix); - pixDestroy(&pix); - maxh = L_MAX(maxh, height); - pix = bmfGetPix(bmf, 93); - height = pixGetHeight(pix); - pixDestroy(&pix); - maxh = L_MAX(maxh, height); - bmf->lineheight = maxh; - - /* Get the kern width (distance between characters). - * We let it be the same for all characters in a given - * font size, and scale it linearly with the size; - * req's fonttab to be built first. */ - bmfGetWidth(bmf, 120, &xwidth); - kernwidth = (l_int32)(0.08 * (l_float32)xwidth + 0.5); - bmf->kernwidth = L_MAX(1, kernwidth); - - /* Save the space width (between words) */ - bmfGetWidth(bmf, 32, &charwidth); - bmf->spacewidth = charwidth; - - /* Save the extra vertical space between lines */ - bmf->vertlinesep = (l_int32)(VertFractSep * bmf->lineheight + 0.5); - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmf.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmf.h deleted file mode 100644 index 328e2c0d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmf.h +++ /dev/null @@ -1,64 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_BMF_H -#define LEPTONICA_BMF_H - -/*! - * \file bmf.h - * - * Simple data structure to hold bitmap fonts and related data - */ - - /*! Constants for deciding when text block is divided into paragraphs */ -/*! Split Text */ -enum { - SPLIT_ON_LEADING_WHITE = 1, /*!< tab or space at beginning of line */ - SPLIT_ON_BLANK_LINE = 2, /*!< newline with optional white space */ - SPLIT_ON_BOTH = 3 /*!< leading white space or newline */ -}; - - -/*! Data structure to hold bitmap fonts and related data */ -struct L_Bmf -{ - struct Pixa *pixa; /*!< pixa of bitmaps for 93 characters */ - l_int32 size; /*!< font size (in points at 300 ppi) */ - char *directory; /*!< directory containing font bitmaps */ - l_int32 baseline1; /*!< baseline offset for ascii 33 - 57 */ - l_int32 baseline2; /*!< baseline offset for ascii 58 - 91 */ - l_int32 baseline3; /*!< baseline offset for ascii 93 - 126 */ - l_int32 lineheight; /*!< max height of line of chars */ - l_int32 kernwidth; /*!< pixel dist between char bitmaps */ - l_int32 spacewidth; /*!< pixel dist between word bitmaps */ - l_int32 vertlinesep; /*!< extra vertical space between text lines */ - l_int32 *fonttab; /*!< table mapping ascii --> font index */ - l_int32 *baselinetab; /*!< table mapping ascii --> baseline offset */ - l_int32 *widthtab; /*!< table mapping ascii --> char width */ -}; -typedef struct L_Bmf L_BMF; - -#endif /* LEPTONICA_BMF_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmfdata.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmfdata.h deleted file mode 100644 index 30e2b5ad..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmfdata.h +++ /dev/null @@ -1,636 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file bmfdata.h - * - *
- *  This file contains data for constructing the bitmap fonts.
- *
- *  The fontdata string holds all 9 sets of bitmap fonts in a base64
- *  encoding of a pixacomp representation of the tiff compressed images.
- *  It was generated by prog/genfonts and pasted in.  This allows
- *  the use of the bitmap fonts for image labelling without accessing
- *  stored versions of either the tiff images for each set, or the pixa
- *  of the 95 printable character images that was derived from the tiff image.
- *
- *  In use, to get the bmf for a specific font size, from the encoded
- *  string in this file, call
- *      bmfCreate(NULL, fontsize);
- * 
- */ - -#ifndef LEPTONICA_BMFDATA_H -#define LEPTONICA_BMFDATA_H - -#define NUM_FONTS 9 -static const char *inputfonts[] = {"chars-4.tif", "chars-6.tif", - "chars-8.tif", "chars-10.tif", - "chars-12.tif", "chars-14.tif", - "chars-16.tif", "chars-18.tif", - "chars-20.tif"}; -static const char *outputfonts[] = {"chars-4.pa", "chars-6.pa", - "chars-8.pa", "chars-10.pa", - "chars-12.pa", "chars-14.pa", - "chars-16.pa", "chars-18.pa", - "chars-20.pa"}; -static const l_int32 baselines[NUM_FONTS][3] = {{11, 12, 12}, {18, 18, 18}, - {24, 24, 24}, {30, 30, 30}, - {36, 36, 36}, {42, 42, 42}, - {48, 48, 48}, {54, 54, 54}, - {60, 60, 60}}; - -static const char fontdata_4[] = - "SUkqACYFAAAmoHICP///////////////////////kFcchgc45Bgc45AgcgxBY5DY5DY5Agcg" - "jkM45A8GocgxBA8M45BfCGgchhzOQxZBiNe/CDQRT6RQ+k4QV6BHcgvBBjCC+KoSjQI7wjj/" - "16I+EUPTpV0rI4LilVtAjjyPuR58jg3CRd6dJkcDMCj+v//qlVsMgQPVY6vugih9Lr/8RCF+" - "OqUUK6C/fHFV9RStf8MulG10fKcN6X+lXOBg+GexX71wxSPCf4/+kE0uR5zE0rtfCFg3oIp0" - "R+GF5DSmQaMS/oG1xen0X2wyh8WXwoI46VPt/kNYcf9J4h/pUHB///2H+t+lkCByDj/r9ZBX" - "H1BAtUr7u/IEOQanrS0eByO16tpVaSWtaEVsNiG66WrBgg05wM4bCYNWDCWIiDCER6HGhERE" - "RER3ZHBfXjaSQ7iOP/////////////////////////////////////////////////////+Q" - "JgK95DIDRZAjCDccgRMhn4g5yC9CD0IL+QxhuIfCCYQTC4IJhBiyLBB7J4QX4gvQgxxBehBi" - "yGDkPhdkEw1kPZY5cEHck5BIJOQc9aI+wjE7DL7RdsMu2GXoZehGDYaDCDQaDSCDQdIOGEEX" - "bDLzCLthl5ojzkeL0NMJhNNbVoJ6kclXuggyOGfugnw3vugv/0u+9IN7pBvdJ//brT3VtdLy" - "B4NxyGsOPRnv9R7xx3/9L+EU/3/f4jj/t+3TdDvkFZyC7hYdKkCCKHQI76SW/pD/6XCKdAin" - "29L9L6/9eEUOrD0kv8IIMNKkq/j/zD5h+P4r//99LfBKcDR9utK62NLxEIIhnmGGlpek3Lz/" - "jj5cv/ul7f+EvimH///0l6CENpfrHt/y9l7kr/4RT/f7f+PwRTkG7/tpav26XtrxoVI5/vSx" - "xsP/7ful7fdd1tv/7FRoj//DLgQZgQCFhlYlfv1kx9//28mPx/7ruu3/t9K3pEh/IKzkF3DL" - "g2BENDtBr9Jh4S12H/+3+17GwwltpbZBx0u0unr0v9IMjhrBYYpO0KZmDikMJsYTCDCeE2Gh" - "p6DTdiEE2KCdo8GcNj3pJsJofjiIiIiIiIiI4iIiIiIhhCIiIiIiIr1SMwyQbOkEiGQCvd4i" - "I//////////////////////////////////////////////////////+QVo7IEDkGwchpOQV" - "nIa0ENKCGhyC7kHchocgZschnHIMPtKk7oIP7ulv6f9Yj5DIDaH/3gjjr///+rI4aiIEXngg" - "RZBfCBEWQXsofKggu5DD5Y+Qw5UHghiCoIEYQw5VkCMIO5TkF7shhzOQxZ4IJZxy3IO5nIJZ" - "4IP//1iiPOGd0R+iPQgR3TQIIXZ3/S7BBnezui87MOiPbKHRHqftNNXvTTUjy/9JkcFjTpOk" - "9NsKmFTu+Etppw06VtMjhhO0OLCd3S+rSdIUvyDD+Iha8fQ//+K//3/+D/vbQRT7d9LsjhgI" - "7nH8Ivf/lw0bS/4RT////7f//pfq+lhr6/v/Yf/t//3/+D/sO2NNhpfiP66Xat8L/2//3S0r" - "XIMD/rvUEd9Isf/4Mp5wCDgYBlOzgO0fB3aem2mmnYTtipwCAZQ6DnAXDgynapwk20h/+IiI" - "iIy9ERxEREREZHDLiIiIiIjjj6kNWdP//qP/pMjhq8bSXwojsGkEwmliIiP/////////////" - "/////////////////////////wAQAQ4AAAEDAAEAAACSAwAAAQEDAAEAAAA2AgAAAgEDAAEA" - "AAABAAAAAwEDAAEAAAAEAAAABgEDAAEAAAABAAAAEQEEAAEAAAAIAAAAEgEDAAEAAAABAAAA" - "FQEDAAEAAAABAAAAFgEDAAEAAAA2AgAAFwEEAAEAAAAeBQAAGgEFAAEAAADUBQAAGwEFAAEA" - "AADcBQAAHAEDAAEAAAABAAAAKAEDAAEAAAACAAAAAAAAAAAAwBIAAAQAAADAEgAABAA="; - -static const char fontdata_6[] = - "SUkqAMoGAAAmoHVf///////////////////////////////IZAUfsgeBdyGdyDjkMgI+QPKC" - "GIO5AhzOgyGiCMcgYtUrIKHohowhschs4hnwgXcgRQhsgguQQXwhov6/QYQI7qgRUUk2QIfV" - "F5hQmmugqCMTCBHj/9F8j9JuknWm7rSbCBFPLtou2sjhlBSOKkE3Qf3+kv9fpcMQaXY9PTwR" - "T6WvpX/0v19aVbeQ0D6X7+v/X//QIQfj6xSS4QLS3xx69IVtL/EQy8CvbSqhq4I7//pJeVnT" - "Dr/+Niloufj9fpJLxalYrDtdr2DGk/etf6CDrkduzQkw21/w2prRfYZcNbj1+kQMQuL03hF5" - "sQRT+CEMMj7pAjuk/5DVDINfr+k9b06Stj+GXgW6pN9/kNsdL/XQg/+nSx/0v20vxSv0v/S3" - "/yDA/19sV/6WkQ0D5DY/6+lkDyf/SX9h65BRBDTdJ/StLILuk2lWkl399U2kw0Thpa0r7S0U" - "A7S20rSVtJL/iGrFMSPJv+qYoEaA+KBA4pikmKCWIiDVCINaQ0KiIiIiIoFhoRfSodbS1xbp" - "Id0hx8f///////////////////////////////////////////////////IHMFnMgTA0hyGQ" - "G45DLcg0jkQfyGQDNxBv5DLcg3QQ2EEHDIEaEHDIaDkMTJzIeZBJkEmTwh5kNmEPhB7ITCGi" - "ZDOghsmQ0IIbJhHUEMzPAh8jYOeIuRsEZFHCZEHBDhdoww1DLm0bOGXGwZccGXHCMDgwQMED" - "BAwQMEi4ZwQdAg2GEEbYYZc2EbYYZcwwjB5dmDgwQMIMJoNbQNqHuRxF6I7YQIN+6BBrDf+E" - "E//pf3oEG9tAg3vC9//126bQWlXh0gyODd+l7fXwv/0u1gio0m90m916x9uu60nXXyB4G7kN" - "tx6JwU9oEU/4944qP/pcEU8EU+37f7f4j/q6q2tpDXhYaShBBDer1XfJD5IdL/0vtf9L9L//" - "ergin9JukvIHk5BiAggw+kn1fSr///9L3r2/fS30of9r1exWqXp4QQYaWl9XH/a2vH+l9/t/" - "6X58mgN//r07dJe04QRDYGGGgvpVeXb/jj5gT8X7r7f+CX6CDD/bp6bXY/xEIIQw16Xq8N/y" - "5ZcvT/Lp/de3/j+2QMd/r/p0l6CDdf0h73//ZF7/w37r99/fuD/vVq9SP3S9hpd+lLj/6444" - "a/9v7r39L0tt/7Xq9b0vDDIbAwQQu2ElKHq/fr3f/2/dfb39/b/V6jjSb1Io/hhiEFbEECFK" - "r/euRR+//28ivxXt913XZBcf/jaevr8geTkCHDDCCIF3bEk9XpN6X7f/7f7+xtpbaW+l2l9K" - "3pfpqGGEErBhJfCTBk4wl+wf/7f9fsMJba7cMJbDSa9JvSX2sPCwxCQYQaFBikIQQwQMMYIG" - "CBggeCBsNCgg3CBhBuGKBA2KBA24hAgbFdOlYIGh+NCIiIiIiIiI4iIiIhxEGCERERERER9L" - "GHfVBF0Tgtg0dSBoDTYk+h40PiP/////////////////////////////////////////////" - "//////5A887IHkOQbLIE8EFaCGvBBmsgosgaDcg3HIbHwaIbIvVVIZTkGHVUtv9IOHRHBU+D" - "g5DJBx//QRTr69fr/+3X+I+v/pa//v/9N0Q2XnshsshsjIaMyGjMhlOQIHycZAhyDUOQy+IZ" - "xzWQUWUOQYc7kGMyGdyTkH41kH4scnZB4JwQxhrIYp/64hF56DCLzBF4aLzQNF8+DyuCguuF" - "Kw/ApXIvMFTCI7FhU0XmgYUL/ap0tow3/6TdN2XCTpB0rVJqJHmHD6BYbNhoDEjzSbDDLhJo" - "NnHSdQ4cMJoMJQ0DpBphVC//x9v/ScMEkwqf9Lpp6dJum18cQwX3V9XXWv/pN9OkKX/9f6X1" - "1/TpdX+6umrDdRSS2yBGFv4iQZu/9D//4r//f/58CP3XI/p7pL9F9peEYv/zAF8NL/hFP///" - "/t/utrrutN6SQYr0F//7Ff+3////g3/11dJ+l+I/+ld7ey4KP+3//fpX5DOOD/3sb8j+6X/9" - "en1+v/b//dLr//Vuo0rY0ib//aphKGYdtAinbLfROC//Yf/8NKGEmwvaUOwvtK3SX/7DPcUG" - "NjhsUEHhBwwg8JuEGEGEHDCDhhiopiCKcIOKeJHTd8JNuh/+IiIiIsubERxEREREZcNKIiIi" - "IiNDj+En/X/IbQdf/+Cj/9Npd6SXq3WLDSrwSEdigkEGCDrEREf/////////////////////" - "///////4AIAIAA4AAAEDAAEAAABBBAAAAQEDAAEAAAA6AgAAAgEDAAEAAAABAAAAAwEDAAEA" - "AAAEAAAABgEDAAEAAAABAAAAEQEEAAEAAAAIAAAAEgEDAAEAAAABAAAAFQEDAAEAAAABAAAA" - "FgEDAAEAAAA6AgAAFwEEAAEAAADBBgAAGgEFAAEAAAB4BwAAGwEFAAEAAACABwAAHAEDAAEA" - "AAABAAAAKAEDAAEAAAACAAAAAAAAAAAAwBIAAAQAAADAEgAABAA="; - -static const char fontdata_8[] = - "SUkqALIIAAAmoHcGf/////////////////////////////////kMgMsfUgeDaOQLjkHHIZAN" - "T5A8K5AiDQQ0OW7kMqCEHIZthNJkcMwuGQG8g34gYcgo8go4hmwQIDIGIIL1EGOIKO1/wRmG" - "cvBqEX3S3dBGJhUwmlQSpGINF2/9cIxkfa9U+k2Q2OlpNgqaNzWwgWk2k33Veluk2q6STadJ" - "U2jHlzcJtZcGlS4RJOt9f9f9L62GMw+vC0np5HXS/0n/6Vf9dapwxpdj7rr6Wl/f//v9dJLa" - "kG76X/XXpf//v/j62kl4I2i4ZVd8caX8UrS/xEgvV7aVMUP19f615+S7/6BmGXBh70tK21ev" - "60lxefkmGla/8WxVZM9Y31/RDYOEl5uappMV/1sGKhNfYX/1EOuEHiR57DbXfUMOieIxwZgN" - "vjpfrI7a9XQdJF9sSOv+QL+qLzSt//9IW6x6tUg21+Q2qpHnS3Tf5BtTkNSi/06710rYpeDM" - "MuBi6pNq3+QZX6/S0J8DHdUn8f+v3S/Fb9L/63r8hnH9f26/rS0sgXj9fXpV+vuP9X9Igofy" - "DD1el6WQPCR/pL+w7XIZUEGx660nS3V0vSrv/qm0m2UBr61T7S0dAd13XSTdBL+r0l6YYX+t" - "JtK1hhK7CTDCSthJLpeIpIMUGJHaf9rYohsQsQiBhDEIMQtiECCxESCjKESKPdDQqIiIiIig" - "sGhF1Wh16pfbSSrFtKh3odkcHWI/////////////////////////////////////////////" - "////5A7AyfkDqG265DJBRxDKmQanIZWpDKDIOnIaBhB05BQGQwgkcgiCCIIIglxBEEG/kGPI" - "J5DzIN6EG+pDKoQ2akDFCGBBBDkdCCUI5kE8iuRfIPxCwCZBHIYGMFhMI2w8M42COFBnCDIN" - "7JWQz2SsEcKQzwDBENEENkENkQRDRANwQNgwQRthhnDYRthgzZhhGG5cjZQYIGXDOCBhNYYW" - "k2rMBNcu2ECBhptBtAgdoGHQPQdFwTv+l6T4QIGG0Gwi4UOg2gg0777dNXg2gg9Qq+m0g37p" - "eG/8Jf/pd96Cb7Sb9f//1pvbS0vV0rT9L3/0v/0vWCKjV91fdJ//dK/0n1Xx6eXX0vvHGv/0" - "uXTkde9Jv0m//6+/T20rSevIZCggrxpErPFpX+O36j/6C/X2//7/Ecf95dUnSdIUvCsNLCCC" - "I6vvpL+RR8ij//pe3++lfpev+2l1ffdJeQPCOQ0OEEw9Un6+q3/0v/S/S9v/S/q//tfYp1S9" - "NMIIMNKkq1uwS////0vb/b9+t9KZg0fdL3Wm0v/CCDBpdfvF/wwsMLx/pfpff+Evz+ygMr9+" - "ldPdJe00EEQbpww0tV0rmDf8cfNhfxD9/2/8/foEw//f/Y0vEQQQgw6+l3wb/mB5gfoP8wn9" - "pe/+P4bBv90vfvS9Ag2l10lff++//7fv+3/3+Qau/vtK0kXTaX6bq9ePe9L/shZ/+39pfff/" - "th/3S9/+vhhL/SkcJ//HHBr/2/f9v0vS23/vdL0m9LwwwgmRwb20R1SW/f/d//b+0vff2/b/" - "3r70m9LwwyDdOEENsHpHH3+9LIUfv/9vIUff9vuvryGcf9dY2KX1IUfwYMQgnFik0r1b0v2/" - "/2++K+9tLbXbuu+Oum9L8geEchogMMEEQzXbFBb9N6Wvf/7f7+xvX1t6+k0+k/X6ahhhAk2G" - "kt6TZDj4S/b//b0v92GEttLb0tgwvTS3pL/QbQWGDBL7CQYMFTCVhbDBrffbaYW2r3YYSthh" - "K7gwguKr0m9Jfaw8JoMQgQYIMIQgxCQhAhkHQGIRBhBI5BEZBhAYaGCB4IGQSmGIRBugMQiG" - "hDDiiCg4YT+EoZDOhD8aERERERERERxERERDiIMIRERERERH1xb+qQfpJBF2UAZhn9EDUFTK" - "B7xoQYSB7Qjj/////////////////////////////////////////////////kDxf7IHgQOQ" - "VbIH1kCSyCrZA8cEMyCBqHcgYcgYfIHh7IF4TChVCkM1yGhwoVe+loHBwi8gdNMOHS2/tL6H" - "/yGSCkP/6BFOvrtNeE//Sv9cR+v/p1////W6////p1zZkNnZAv2bCDcchsHyLGQ2DmwnZAuO" - "bCBfiBcc3EGochoHNBAjsg3HIQcguOSHLHLHIJMm5LiC7kMocmOWOWOQXciv/62JDZPQZBv5" - "DYhF5z4Zy8yr0yDGEGM1yDGJoMgxyYRiDIEYmQboIYxNF2HPg8lkaH6hMjhDjQ//p0Xb0XmE" - "YmEYcJNhNJj0Xn+gtUXqL3ReaQbVF5ou1qk4TVQwgYQYWDCDoIMIMKXH/9bSbig6CDoIOlyO" - "jAbFVthw+gsG4qwbbSsGKDYQQcMSPJRSBwd6dPbSfpL/6f6tdXqx1YVf6XTCevem168GYDR9" - "fSutLS/9WxeuqrV/9/wl/7pXXXQ/91p7pXjSW5DRhFH+sLuor///6C//33X4P91bl1pjdJKt" - "hovBr4iQPKn/x/X/F////7NAz/v0tavW9aYaXhG3/+YDM2l/zCf///+3+9e3TvSTeglDFegv" - "//bS/9v//+vw3/q3Wt6pf0PpfV3+xX/t//3635DNv9utb0R9t1X4/+vreyOGZ/2//+uvyGx3" - "/16elvVIjH//Xp3/X/2//3X3//WKjjSeNb/+10rtWyMfX/2//7q0rX6u1d2kraSr/3RdYaTD" - "LdsIv2GvJAZ/+w//2GErCCbCLr2EoNiR161b0l/9g0HI6FBimKg2KCB2CBwwQPBA2wQMEDBA" - "4MEDhhiFFBisETgwITTCg2vCTDaQ//ERERERZg2IjiIiIiIzAa8REREREccfwgg/9f6X+v+Q" - "ZK///0x/+m0sF0q9W0sW6XyGSGkOkI7YSr4rYhAkEGCDrFhCI4//////////////////////" - "///////////8AEAEDgAAAQMAAQAAAP8EAAABAQMAAQAAAFUCAAACAQMAAQAAAAEAAAADAQMA" - "AQAAAAQAAAAGAQMAAQAAAAEAAAARAQQAAQAAAAgAAAASAQMAAQAAAAEAAAAVAQMAAQAAAAEA" - "AAAWAQMAAQAAAFUCAAAXAQQAAQAAAKoIAAAaAQUAAQAAAGAJAAAbAQUAAQAAAGgJAAAcAQMA" - "AQAAAAEAAAAoAQMAAQAAAAIAAAAAAAAAAADAEgAABAAAAMASAAAEAA=="; - -static const char fontdata_10[] = - "SUkqAGwKAAAmoFQGz///////////////////////////5DIBocgZg0PkDwy3JvkFdyB4Qchl" - "DkGB7yB5OnZBQ5J8hmckQ0rBNUyDSOkQWnIZXkMqZBrghs0INDkM/kdkDfsLqqhGYKDEHp0k" - "G0HkFEwoQaaqCcWQzzCMMPXfwg0m0gi89KyCgekkYmCpppYQKgjc0m//0Yy8/16VtP0EGwqN" - "to22ugtBBtJv2vpLdJtJJ1SbTpJKwjnoOgg2swGmFLgiStb3+lXf/69v1bYLpuuR1pLVX//X" - "r/S60mwYorKXH/dfS69J/2vX/9UvYyGU699PXXpa/3//4+l1S2EcXqvXHX1qr/8RIMCP17SS" - "pwggnqvj1XpClpf1+3SWlS2l/v6S+btbr/IKbknv62KH2Fel/VJeEGlTDS/1W9tJKiGL8f/1" - "Sri83qxVr/sQ2K1JBpXel/RAuOFXm29On//YMUk/dhf+qEOuEHQtWG2v+w9GEwZuXj1/Uuw1" - "6bnzaSDtF1/wbSI+Sdx/X9IQ6WPCb0YbYr38MvvCMTVv8gqlyGsR/pX/ukkHaS8gqiMOkk2l" - "f/pfpOlvXSTYa/9/b2/yBO9f9cTQMzuu4/RBSgnHpJe2l+KX6Wv6ST1j//7f/2lpdf/pfkM8" - "el+xVr0/pEMofIZV16+v//9tda/pdZAh1vS+sge4/0kv3fyGbBBVeutK126dLtJLuq+ttJuH" - "+FTV/SOR19dJPSWqr6SX2gyx+ur7S0LbS20n/oJf8PS20mwjeNtf0noINYMJBBwwk2kk2kEF" - "texFJBiExCYXXTWwwkCBrEIEDimGEErDCQILERBgsQwgafFRSDEIRDCEMIMUIYhQWQyAaHER" - "bSrERER/0q90tfukqxbWh3odtLbSxH//////////////////////////////////////////" - "////yBTDMpkFsFhyB4YOQyAboILYFByB4hyB4vkMgCIK4iOQsFWQ07IZxyBEeQyQ1PINNLIZ" - "icEDIMeWcgoBkFy4IGQIIIoZByCDhkHIInkMEEDFCGyhBJkFzggyDcYCDINxgQMgwoIIGRDk" - "EIIp0O0MhjrIPyZDCj0GCD4aOEHEN3CPDDaDTQaapp6bwjxByc2EeIOTmGEcbw1TTT7ppJ1U" - "4B46aPGGmQabJeECIJZDPZEmDNhIM2JQIHBggwQMEDBAwSBAwQNo4DdkCHQIGyCiw2gQNkFF" - "htBB5cZwWGCIMOGCBhBglBggdBA6U2Ca5c2EbDvwbSayCZh8Ogg+/6C329JvbSb3SD777/q3" - "TdQq9INoIN/oL2/9J//S7W9IN9pBvv//tJ720m0tL/SbT3X2/9L/9L+XXSvdK90v//1p0nrS" - "+npuXX0vb66X/9Ll0176b/b///eu++1/yGQxyBwOOk63+++ONV/6X8uu3r+l/iOP2t6uk9Cl" - "4WHqR8e7r6SH/Uf/S+19v3/f/96dGF7q0kvCw0qCBAn6vpff//pe9e39/3pX/a9XTaTql5A9" - "wQ2QEmHWgmKer6X8iPkR1/9L7X30vSS///991bpL1TCCDBpKv76Vb/9f+l719+/W+lD/erXW" - "K0v7wggw0qS9K4YIL////QX3+3/pfpMoBq/a9XTTapfWCCIFy4MNL694g/44+P9fdL2/8Jfn" - "mzoGZ96dX+6S92ggsMNLS9bmyD///i/v9v/P/6BMP+/r22KS8RCBCGGl+teDf84POD82DH79" - "1//5HDL+Gw3+6/a/XhBBhpddK+/9PT//N7/r2/8b9yGpT/q1ek2l9BBuvS6vu9f+yDuRj/+3" - "9r7ff/2D/2r16MLpfT9+kh7/X/xf/t+9e39fW2/71q2qV6XsML+qV//jjkCM/9h/a+36+u2/" - "/9dU3peGDCCbdtalw/2/93/9v3r/f2/b/20r71frwwyGWXBBVbaL8JK/+l9//t/a+33X1//7" - "G+levhh4QIXYqKNFX7fWQR9v/9vIO+9e3uu2ltkND/rHUaTekQw/hhiEE2IpK+l6///7elx+" - "33X+313TXX6X5A9uQUQGGEEQa4tKr9vS/b//b/a9jbS20tvX16dJvS/TChgwgk2Gkr6TDILj" - "4S/Yf/7f/+2ltpfdbaX6Tfr90GwgtsJd4JNhcEtLb//b/r3YaWw0tu0uDBJp9fSX/B4WGeNB" - "NNCEGZkghCCGEGGZlCDCDCDwg2GhhN0GE3YYJBBsMEEEGw4YJBBsV00kw0Gh+1QeE0xCCDBB" - "hBMQkCChBsQggwQYQeEG2FBA8IGCBuGIQQYYoINuIQINr8JWCBr4qIiDCERBhCIgygDw1IiI" - "tCLhghBghEGEIMJrxER+hEaERDiIiPpaB/0g/SIGwCcdJFzOgGgr6jEGvGgamgH2EL4j////" - "//////////////////////////////////////////+QP6EDob+QPBoHIElkDw9kCyyBJBA8" - "F7INVkDYDEZDLjyGVCZBXmCqQZPIaUENEAoKlt5A8sTSfV00/S2/6BwdF3D+Dg//pr6Q/+QW" - "wbj//MKvrtNeC/9JN1/iP//+vr//+k3////9r///+k9ZeECzPy+IZY5BuP5AuOXhHhDKHL4g" - "tOXxBowscg3HLjIGByHHIG9CMci+Qzv/+3BEMyeEGQMUCGQLzyBimgwUgRmRewVNBgqDIZXg" - "qYQsFTIEUyGzAUgucuippgmRLIOcuhDFX/pYhPTChGHCNzROBBuKAXpgoLoLBU0wVMIwwwVN" - "Fzgqow2icEgoYIGCDBYMK0EGEDClxP/7YRtvl20YOgg6CDYVBNaMXfQXovNGK6MUIJt0XbCT" - "WqCDhX336B6apJL/0ug3bpB0nSsGbDZZsNghBsHB9BYNhiE2GIQbSbBsNoJwYkergzYN4P1p" - "9pXXX/q3vTaWrr6V1/pf9at02vTX/t7fTaT+l/9Y/rr0370/6XTT0/fr44/6WnuukKpdkFFk" - "K/pN+9DWv//6C//S/rq/7+XVJum9Kt0DXxEF9V///9f/991+ZgY+6Tf8VrQSww0YwaXkDwOE" - "f/H3X/H////sH/+k2k1dJN6SQYrwjj//Ng1dL/m0////9h/t1/tvpN6SQa9Av//ev/b////w" - "3/rpN6ekrelQ+v//sMJf+3///X4N/3t+lt6X4+l6V33hiF/7f/9+t+D/ulr6L70q////+XBp" - "/7f//XX5BQO/9/TdJNvpER//16d1fS/9v/919//1emONK71r//0rtb1/9h//3Wla/XrHWrxS" - "S//YRdbpsijtourZFfT/9v/9+0E2vrZ3hourW0k26X/7aWgwgmGFYaVsMJJzWBDtPTYaaYTt" - "O20oaTYRhUGnUUxV76V0kF/9ioOXQpigxUNiggbYQOGEDwg3CBggwg4MIHDYaCimIWEHDCCa" - "ah9OrDeP/2ENBoNMIQwhbERxkcMgYqbQTCxDEJpoX8RocfxEREUYE4jiOIiIj/2En/r/IG5d" - "J/1/////H69JtLIH9NJf3S6uq9ISh0CxdL8gt46iO2kl6FbYSCQIMIHWGISCTCbWIiI/////" - "/////////////////////////wAQAQ4AAAEDAAEAAACoBQAAAQEDAAEAAABCAgAAAgEDAAEA" - "AAABAAAAAwEDAAEAAAAEAAAABgEDAAEAAAABAAAAEQEEAAEAAAAIAAAAEgEDAAEAAAABAAAA" - "FQEDAAEAAAABAAAAFgEDAAEAAABCAgAAFwEEAAEAAABkCgAAGgEFAAEAAAAaCwAAGwEFAAEA" - "AAAiCwAAHAEDAAEAAAABAAAAKAEDAAEAAAACAAAAAAAAAAAAwBIAAAQAAADAEgAABAA="; - -static const char fontdata_12[] = - "SUkqAFAMAAAmoFsNP/////////////////////////////////kMgNpyBoLGQPBocjfIEkED" - "wU3ILjrkDxwmnkGmKIa+ENfFshpj0Qy5kNIcg0UIHhxyCjCLhDSHIa9kG8yGZPCqpAvBK4YR" - "oCU0km4PTChBkMqgJxhMhnCBBhB6u/QIoBubbpPSb0gjbYKmEH4S0bNo43/rhBpNqjHpKyBh" - "/SDYVNNLCBUkG0EG//0Yi7fdJOqt3S02CzjaPNroLSdJv6qtLDS2qT1TaaVLo5UEDwQb5gGx" - "TAYXdf/ql9PS+t3rVwurp0XXS6SdW+v9f9fpJwxRcUrj7/9JUv/7v1X/Wkl2DGv9aTpel16X" - "v66/6/pbkMyK79/S+tf2///H6tJLbBHv6/4/66Vpf4iQYUfqulXhAioHSrx6S9If//9uq0kk" - "tL/f0v9K0v/v62KHbq9f60vNNdhpX+QJ4JXe6pV7X1+qSXhB0kw0tf6Ye2l0RNFxb1/oEF8W" - "pf0xC/14gwxCSTXv6/yBiiXON4Qattr/sGOmtcL/0oNeEDappMO1+thpIxyIRuOl+kjDdcJ4" - "lzemwwjC/4byL6TbNgp//6ENpY3CDpBG5sV/qQaCEgjc0rfyDKTIbWiX6T+9WqCDbVbkGRRL" - "t6Tav/1/pWl9PShsNL14dJK6b/1X9LXLHf1Scf//bVv8gtRVfpPEX71vXRAnslG6SX2l+K39" - "a/qlrjX/+3/1paX/pb1+Qbj+l+2la/+lkM26/9L1T/+26/Sf1IZg9f6X//0l+xT1/6VrkNDp" - "N0vSWQPOOvX+2/yGlBBkdetLr/WrVLTX+km0m2H+Cp1a6RB3b+0n1eku/9L+0DLHtLpNXrQu" - "0t6tKrUJfXD0knpgwQt/+rSTW0EnYSbpW0kF/weEtsJMTcF/Tqw0iBepYYSIZurDCTDCSsMJ" - "BLa1DEQkgxCYQa0taoMV8QriExVMQiCjsREGFiGEGm8aHaEQYQsIMIQwoWQyA2nER6pIRERH" - "3Vf26pf0kq9v1xbSSHdKFtpDt11WI///////////////////////////////////////////" - "/kC0GD5AzAxBA8DCCGQCoQQMw0yCB4EEEDwYoQyA1YNxDuQ8Hwg2YQ24vIZILHkNQ+QaS4IG" - "QzqyGWkILkwQMhs1ITUg+pB9SD6kJQhjUhmHIGDkMUIZyAgyBgGEGQMBAgZDPQhaEEqIQggm" - "hCoQ1QyBFqQX5MgwGQl1hBgg7hhHyBw/CPkD///vCPEHDCPEHDRxhx/r+CeE6i5wDwxTCPkG" - "pDSmT9GwSQ0TIzkMuZF8homR+EcB2Q2eQI8g38g38g3+cBQfDUaPgoZDZYQIGGQMTJTCBAwy" - "BiZKaBA+QI4hnsGfAgEDBWQe00CbWvRttGwR7CDYQQdhEE9hA0wgaQQdpppppBNPTtIINsIN" - "oINsINpPLhDgmmnaaVyGzkgepgCPwg2EEGHe2k+GHvuk//pdrek3uk3uk//6/t02lSX7aTa+" - "l4f/Sf/0v70m9tJvbX/967SbV60vS0nvdL2/9Kv/S9b0n9J//3+9td0m0tL90m5dfX2/9L/9" - "Ll0+XT9vfb3Sr/3S/ur9J8erX9L7xxX/9L+XXb1/X/f6/+6dJ0q/IZAdyBY+pCQ9X+O/0P/o" - "L7X36v6v8Rx+/RhVbW0hS8LD6BBny1fpL/X/0vevb1f1f/90r/un0vCw0lRyddXr9//+l9r9" - "/f96V/3ule6TaSXkDzggogJMHVIJjdX6/yFfIV//0vf9vS9JL//dL3Suuv00wggw1Vf7wku/" - "+l/6X2l7f//pQ//691bVL1sEEGGlpVpeEFX///6Xv+/vpb6TB/36t7FaSX+EEDDqkv3iv//h" - "hf0vtL2/9L8IKdQ0/uk39U3SXvhBEMomGGgv+rg/44+P9ff+/8JfnOynBp/f1q+qXtMIIFhh" - "paXq84Qf//8X9pe3/nP/BBv961b7Yr8RCCww0vSXvITv58efH5wNH79/2/9hfuG/9ev3S8II" - "QwaX9Je3/CDwg//zif2l7/4/tkNQP9vbXpPS8IINpdfvvf///7fv+339/kNqf+l7a20l8IN1" - "fpJX36/9kGCP/Df6Xt//7Yf+/r0Y//v+lx7/X/3/7f3/fpeltv+9at0lel8MEt/ST9/33chs" - "//2/evb39/b/9f1pvS8MMIJvbRHWpgMfv8cbD/+39r79/f7/t02l6vpeGGQaSYQT3YXX/9L/" - "/9v3r2/r62//X29K9Lww8IIXYrCR4Sv2/9v/9h5Bgftfb3XbXbINx/1/rpX8gw/hg8IKwwmI" - "S76V6WQXf//29divvuvrbuu9uo46vS/DDEIJsWkkr9vS12//2//29tLbrtV+o3dJvS/IHnBA" - "vYMMEEQ04bFLfpvS62//2/39jettLfrdWqpX0v0woYYQSbaS3pNkM4+l+3/+3/Xu2l2lt69p" - "fpXr+tBhhArbCVPhJhhcJft//t67+7DS20tu62GvT030v+G0FsMJLagkygWmRaYLsNdf21BV" - "q12GEsMMJd2EtgwSafX0gv9B4WGfMIEUAgNCgxSEIhlkyC+oZoOQY0IXQhjXIZ9GDQyGEOCI" - "YYKAIsGCRAvoydogX0YcGEiGXoxX0CTBkC+iH7Sh4TQYhJqgQYSBLhiCu/t1vTtwxCsMQrbY" - "hWwunSbv8aERDCERBghEQZIA8GWIiNCLhghBghEGCEGF+IiP0IjQiJA8C+CIiK64QP6pB+kk" - "gf+i4zUBoDN0iBKb0INfCigak4HhI0QMw1IvYQjj////////////////////////////////" - "////////////kD9BA6hrjkM2CGYP5DIDUggeBiyB9hBYsgeGVBDVggbQ2ZiVHkGiCB4rkDfy" - "B4bJqQN5kNdyCiCBEyDVNBbeQPHyqqqqaf/e6aRBYsgeBfEXcgUYnZDRZDUtLb/90hf//9NL" - "1/8gtgsP/8xtfS2mvBf/X/8R//6ptfX+v/Xr///+m1////V////9K0iGb/kMz8g0fkD4fyB4" - "ZxyG3MhmjkDwUp5DMHIYHIHgTj//uwQTycyDTMhl0wnhPLmQy4BcheyBeC5kfgpcwQYKXMg1" - "0M5DZBPAg8FBSBBBM5DCCK5EoQx5C4QcgmcguI/9KxT0wQYQ0bmiQGgwyGBFMhsmQInpZDPN" - "NBkNk00cYZAiaDCGQXmFRttEgHkWbuune7//7hGDeEGEbOEbOEEGwqQfT10C9NNU0EG1QYRs" - "uqQcL4YIGCBgkyFsG0CDBAwUwFX/pXQfRt0EGggg6V6TWjDZBRZDZmlkFFow2jDkFGIw2k5D" - "RiMG0EGiGy1p1Bwd6fp0n6S/+n24hBtXSDpNgzYF84CgQg3voLiEGIQbYhBtJtiEDaTxLuuQ" - "0W76991paX/rdPCdLp/0un/S6rp+6dLhP//WtNq36//TY+366X71/pdNPWr02vjtft72rpdV" - "SXZAxhBx/X66f9v/f8Jf+9X/1Y/62i602lqKXug0/pv9RS1///QX/6/pfD/br3WKbpJBbaDS" - "8RIHgYPv/DC//+v//7/ygDH/dbprVIJYbRuBhLwRmv/x9pf8X//v/7B/6V17vShh4QVBj8I8" - "f/4L6/5tP////Yf7fq2vfTeqQa9Av/5wNS2l/7f///+G/9J66vVK9KgYXpf/+w0v/b///r8G" - "/2+9+26Sf8fX6u/2K/9v/+/W/Iav/6WlaSL71S/H69f7wwv/b//66/D///pb0v//16vouGp/" - "2//3X/yGU7+rdOrGrd9EKP/+vttr6/+3//daTf/36xVJNukkv/66Xe3pf+3///Wv16sfpXGl" - "//aLraTbYRhYZCPp/+3/+2laTYX1u0XWmnV9L/+wl3CbIjsMJbDCXIwG//Yf/7aVoKGEbXus" - "zthLfqm2kl/9iFMwXBhJhhJiFMwzjIMEWQYRBkMEZBghhkEIIYIMRMwwDg2GlDCTELIMaQwS" - "ioqZgY7glB6H/7XL4pimlYVtp3fbV3dp2xCimF6EJ2uq92v/2hoMIMINCGEIbERxDBCIiIhh" - "TeEGsQwmgwhd6EccfsREREIwE4jiOIiIjX+Egf//1f9f8gVq6/6////S1H0vSb8gfo0v90vu" - "v0m4WLrXkFsGsdRHtJL7S2GCCr4rDFEDwUYQyQ0yCCqGlhgqXaxERH//////////////////" - "/////////////////////4AIAIAOAAABAwABAAAAYwYAAAEBAwABAAAAeAIAAAIBAwABAAAA" - "AQAAAAMBAwABAAAABAAAAAYBAwABAAAAAQAAABEBBAABAAAACAAAABIBAwABAAAAAQAAABUB" - "AwABAAAAAQAAABYBAwABAAAAeAIAABcBBAABAAAASAwAABoBBQABAAAA/gwAABsBBQABAAAA" - "Bg0AABwBAwABAAAAAQAAACgBAwABAAAAAgAAAAAAAAAAAMASAAAEAAAAwBIAAAQA"; - -static const char fontdata_14[] = - "SUkqAKINAAAmoCAz/////////////////////////yGQBw/kMgGYcgw5DJBpvIHg1wR3kCuC" - "B4NFhbrIHiwnZAxZFjIafUQ2+BJJshrRkGnyGtBBqmQ05kNqyBcQQ1YINyZBRMhpfhf1CMwz" - "S5hqg9W4aggwoIGCDCWC4QYIPXrwR1BQm6Wkm6pGzYKmn2EFQRsgwjhB/9UjeXg0m1RifVkM" - "t1VBNhUGE1pAtBBtBN//hBYdboJOkk2nVJNgj3R4s8b8JUk6TftfpYfdafV09VbQXCDcEHWX" - "BWCmAIraTf/9eldL0ld1VcLp6bRddKkqff91Vf9fXbDeqtwum0v9L11v/+v+uqSwxR+rx/3S" - "9LS+vfqtf9da7DHr+/pel/79f1/9dKr5Boha9Lr/9L1/a/8fXSqsI/ev/HS9Kkrrv/IZ0n9V" - "aSXYIEU467ePX6j2v+I/tqulSulfX+qX0ldf/e9U6Q9wr1X6pfJ+u2l/kFqyO/tJYr2vr/qv" - "BA9JhpX/XeG0qqtq9f1SS9NIl3DS1/pg8MQlyJWuP/9JfF4QaTFN//EMaVd36/SIZrhNLnCe" - "EGob1/2U4bUJ/cLX/iDXQQb06Ydr0uw6RvZCaePX6V106EwdK2GF38NqQnJOzgE/1/SkcbS2" - "nhBtQjc2JfX6kGrSgjDDW3/r+hDfi3CekEG2v62XmoQTdN/kDgCIKtS/pOl+2qQba/IHCTD0" - "rat//X6Ta/XSuGEl/htaur/0v9et91SbH/+l1evIH0a/pOhJAaf0t/ogtWRY3Wm9v/GutLX/" - "S0sdfpfbS/X9L/0t/r9L9v/pv63r19L8gXH//tL9ddKiDVn9fX19JfbFPXXWkQan+npekv//" - "99df0tLIbHW+vXIHjj11S6bf8hrWQJHp/Sb/rVfS01/rddu/BUH2lpaW2k9JNpJa63pJX3D6" - "6TX9IoZddrf+gvrvS3psIMk7/9N1odpbpOkraQS/70km0mGEcxWvWrpJqwwknDCCbSStJL+o" - "PCW2EmKDXWtUwwkQy06xCINQyKYaWGGEECC2vDEQkgxBMINN/TSsV9bCYhJMUCBYiJBppiGC" - "DC0hxoMIRBghYIMIQwULIZAHDiIvpKIiIj91X7qtfdUvuklXtrS4t0o+lC20h263SxH/////" - "////////////////////////////////////yBlyPyBmCy5A8NUMhkrQgaA6CB4NKCB4ZhyG" - "QBxZCDkHcg8EUcg3cgr35BbB5kGw6kNRQQ1QZAgwQaBogwBkGgGQ0VkPWQxWQxWQxWQShBes" - "g0oINBBDCCDcMhmJyGWrIaichmKwQMhoEyD1kEDIPUQQiPjIMTIaOIL0IKMIEDc8B4WCBggd" - "sMIMMgYZkOCDDQYQaDCDShoNwg7QQMMGEDYYQeGE0GEGg0mGk1uutMIPBnthGYRAzwIGQaMO" - "nIKMPWEZhiQL8DBEMrgYIhldOBlngbcEDZDKgIzEYM8EYRmIyGbhCURwJwZ4C5gFAIGEGCwY" - "QNoEHSr7CMxA03ISYQIgxjkGJ5BiMgvCBB6apqkqtK9AgYbg2gQMPBsIINTAU8FT70/T0G1m" - "A2L5gbRwF34dBB8N/4QT/+gv70E3toJveuv/XT20m6pfSDhBBhp7aT4b/pBV/6Xa3oIN7oIN" - "7aT/+3X7aTpaX02k/ul7f+k//pf+k/aT+v1/+qT1daX/TaML6Xt/6X/6XMJowswnre63vX/7" - "ave2rpaXi6Tffpff///hL/9vSb9Jv1//6/0m168hkA3H0np/r3xxS//S9tL2/f9/xHH/tGF2" - "6ehXwpA/foh7bW/Ue/Uf/S//b0r9K//20vtK0rSS8LDpIEzZ19Vv9f+l9pf+/7//+9e6vpeF" - "hrhHmR/at6r/r/6Xv+3r9L9X+2lq3t1aSXkDyggYgJMHSSCjf+vvIO+Qd//0v0vb6/q9f/79" - "LSbSr00wggw10mtJ9Kt/+v/QXt/t/ev6V//pPtpevqmEEGGlr/eEl//X/0v0vb1fpX6Yf7aT" - "98baSX3ggQYaSSXpPhAv///9L2/2/9L8JSQCr/+vadJL/CCDDS6r7j//+P9L9L//S/CTNYa/" - "3S1dJq+vpoIIg0AQYaWv1yXDZ+OP/0/b/b/wl+ZDIgNP999+6S+00EFhh116vOCB///xf6Xt" - "/5Z/4Jh//pe3el4iEFhhpaql3g3//OAX/ft/t/8L9wb/bSferYpLwghBg0F9aT7f84D5wH//" - "Ob/S9v/H9shr1f/1arpeEEGGvX97f1///t+3///7kFU/7pWr6MJtV4QINpeqST7////7f6Xt" - "9/f2Df9//7r8IPX1xfd6/9kNGn/t+3+39fW2//ulaSTel9+36Xu//7//t/17e/v7b/tpe+k3" - "pfDBf1pf+scchld/7ftr7fr2u7//1ev14MMIJvdUpgGH96/b/+3//919d/71a9U3peGGEE7d" - "yOqSX79e//7ftpe3v/7f/avuqV6+GDINYEEEO2EnCW39/9//t//t91t1t/09aV6vpeGHhArY" - "qKLtL6fSyGd9//28hoftL2/X12yDd69bX/Sb0iGx/DDwQTYaYSW3rel/f/7f7/t7dbdf/f8b" - "1V9fhhiEE2IpL9N6/t//hv+K9vbXtdv/V6qNX0vyB5QQy7DDCCINsWtPq3pft//sPXf/tLet" - "vS26jd0r1/TBIGGEEm2l3pN6X7f/7f9extpbaW3a9r1Svpfrhgwgk20l9JhkNj4S12H/+3+/" - "u2lsNL+uwk19N6S/dBhhBbDCVN4JMMJYIL9h//t6XXuw0ttLbhhLYYS/Svpf8PBYYMIJO0KY" - "MFQhIUmwYVNNPTbQ03TTdhhBJsMJJtwwkmxVNOraaH9JB4TTFEFAZDGqCDEIIIg0AZBisMUQ" - "z1kPWQxXkNlbBhSC+mQlRDGmGKIZVYZQwiGVWwcQiDTW0/QJQZDKrX2sPCaBgvRTg2BIhA0u" - "GS4KP+/te4YLDEL2Fhr+n/xoREGCERIKgYiJBVDERxERxEODBCDBCIMEIMF04iI+oiNCIkDw" - "1bEREfrCB/WEH60gf0qMMH6VIIGU4GoKfSIEsGKCDV9UQNA9IeNA1JAHnhD4j///////////" - "//////////////////////////////+QPkEDMFW+yGQBPBA8NSAmQZ4IHhqQQ2oIEoDFkGuC" - "GlHkDwN4ILMyB4NM1ILMyB4NMyGrNLYeQPF4g14kFC4UgqQQLwFCpbe9pEGbiB4NfIu5As5N" - "Mg34hr9X+qu6Qd1t3Xb+0vUf//9G1/S+vIGYZj//tr67TXhf/S6/xH1//bX///9L/X///bX/" - "//9Lr///9Jtf/////8l/kNTiHwg2f/+k3LhpGgZclMhqeQaJ5Bp/INU9BkGiCBeMgnZDLgIM" - "IMhmwgyDXMg1QSmQ1KE3IF4JYQUHyGbBBdyBGhJBDXchrcQfCC4ZGggwE//xCDwgwQMIYIPJ" - "OCD0wUF1yCj00wVMEDBUGEMFCgg8gY8h+8hjRSEQE1//9JsJ6YUKEcMMIYRsjqBFMhsOC6BY" - "KmmQ0HTRsgwUINSDB1RgcI6BiCgz4OCBnwSDBBtAgz4OCmARf/thGxvTCOFCODoINhJJrRg3" - "+gvRt0YN6MGwgg3phGxVqkGgvvvbh6dqkv/S6D6MDaCDoIHS9J9BByBjCDfNLIGJhtJyBfEE" - "HSbIKMRgVoIHIKMVJ1IaMIJnTrTaTpaX/7e8Qm0mknSbIN8VnAMCn/S6YpuKem4hB0uJdpcg" - "oz3+9tb//9Lq6DpaTr9XV/hBdV1avTaXQff+61S66pL/9t3r/6b1en/S6aenW/Xof/dW/bSd" - "dL8gpD+lj7aTrr//+l//T02vVj/1ownTaV0KSW2QzMv6b/xr///0F//39ff9r1r060luEDXx" - "ELuq///+l/+vv/B/vTa3TFeqWw0DS8hkBoI/+Gv1/xf/+/r7JAZn7+n2m6Sr0bMMJeQyAXmb" - "P/j7X/v////Z1Av90v19UmHhBJBj8I8P/8iAMXr/nE////9h/3tpN03dJN/QYXoL/+cBs2l/" - "7f///+G/3S/W3XfSSBr0vr/2GEv/b///r8H//W6+kr9ofS//9iF/7f///+Q16f39Poum3pfj" - "6X93+GC/9v/+61vwf90m10lb1S//9L+9mA1v+3///X7/39N6T3SX//07r6X/t//v+/kMt3/d" - "LX0rdVId//11u9vS/9v/+0tK//19jikm+q//16bbX1/9v/9/rX69YqnVtvS//tdL0XWyDj6/" - "+3//aVpNr39our/XFJf/6L+GgmGQo7aW2vf/t//t1DSsIwvpWW8NL6pJt0l/9sJcMJMMKwwl" - "sMLyXAv/2H/+2lDCCYaX2lFMVbTurdKl/7EKDiExTFScNAogRrDIMazQMHUGJAjVsg+pDGpt" - "JOCHUQ0DQGEopiFkCKoYSdqThlfBKD0P/60Y07WGFt/+wuv9iFCDXxCaa3pqnf/8MIWgYQME" - "DCEMEIcRHFghEREQwU5BBhYhhNBhDT4jQ4/iIiIhGw7xHEcRERH/0g/9f4Sf//yB+Bf+l/6X" - "/9f/+ra+PVfXWCf/q2uC6r9NoLpuq9RHHS/IGeOltpV9rtpJehWwwSIHg08EDCDrDEKECDIM" - "tVYYIfaxER/////////////////////////////+ACACAA4AAAEDAAEAAAATBwAAAQEDAAEA" - "AABKAgAAAgEDAAEAAAABAAAAAwEDAAEAAAAEAAAABgEDAAEAAAABAAAAEQEEAAEAAAAIAAAA" - "EgEDAAEAAAABAAAAFQEDAAEAAAABAAAAFgEDAAEAAABKAgAAFwEEAAEAAACZDQAAGgEFAAEA" - "AABQDgAAGwEFAAEAAABYDgAAHAEDAAEAAAABAAAAKAEDAAEAAAACAAAAAAAAAAAAwBIAAAQA" - "AADAEgAABAA="; - -static const char fontdata_16[] = - "SUkqAHAPAAAmoCQP/////////////////////////////////IZJx0QyQzjkM45DJA3vIHhr" - "2RbyB9BA8Gy00/IHg8XZDMsiXkGzqIK/Akk2Q2nSINUyG25DVoQ1aEGSCGUoINjkFEyGPIZU" - "yGrPBVXqwQahNUm4PCBhQQYQMFwQcYIGED131IZoaNsOk6SbVII4bBQgwmlhAtHDDCOEH79Y" - "QNINqnrZBoHrQQbCpp+EFSCDYQQb/1wjkXbSekbfSbT9JsFTR82uEFpOk3/+gsOtqk6STadJ" - "LYR9Z4bhBv0FSTdX9fpYf6SeltP6cILhBtBOswCkpsNFdX666S+m1/p7pJbgtJ6bRddBVVNp" - "X++v69LpK2G164XT1/pa/v79a/69dWGKJ2krY+3ul6XS6V/69f9a0uGP/rX/Wkv//9f9fSps" - "Ol/vWl6Wv7/X//1pa6kGu9f/0vS69f+v8fW6S8Izf6/xr/1uu99yGga/qtaSbBH1HS28fS9I" - "atf8R/dVdJLwlf/6S+q9f/fdVpD9PpL9VXkvqmGl//uqxCW2r//18EDVbSv8gerIl3tpVW7C" - "vS/VKvQekw0tevb7SVrx//pBJcXRH9MNBf/yhQxCrIUZXf/0kvahA1Ypv/qIMMJQmv+l+pBp" - "cIOueG8J0w9f1ZLgyJNVuC/9JCDXhB9NWG2v1sNQjnIWvx0v6uug3EwTSu19cMNIh/SsGcF/" - "/6UuNpdaBB8I5hsMI2lv4N4QaTeP6X6iG1xbptJBBtiF/5DU1SCON07//9But61SDtfkFgal" - "29INrf5BZEyDInS/S1/bpINtJf4dJK1b/0v9JuvrVXBhf+303Tf//6Wu+9U2P/ukv3X6pdaT" - "oSGDZ9JXrogerIl79Orf5A8S6/0v/Wtev9Jb3S/FJ/S1/pXrH//2//v0t69fX/0v20v0tdKi" - "Gl36/X0Qyn/+20nr+tIhpj/v16XS/SX8f6X9L5BQ9dL0lr//7Vr7+k2l6V9euQPDx/pJdNv+" - "Q2o7rS62/VdUsJ//trbD/BSBPiWulf6T0k3SXfrpJdWw3rVPetIhiel3V/0gv+9LdWwgyKP/" - "qlfobaW2k6STapa9XpJXTDCH/XulrDSuwk3S6QS3pYelthJibabS10m0kGsMIJOwk2ltpBBd" - "LyjggkgxCaDX9PtpEMwGsUQ0xDEJsJJMNBBBbXgxFYYTCYT/tbFfC4TELDFEMueIiQa0JCGC" - "Bq6FIUgwhEGCEMIMIQwUFkMk3ERdaxEREf60vbVL/qkvbSX9+ku7SS8W0qHekttIdtLbS3ax" - "H//////////////////////////////////////8gMBZD1yBoDQ5A8GXQhkg31IGgFAZA8G0" - "MgeGsQQyQ2oIG45AkvyC2GvMgqoTIa6QhtBCGgbINQqQYFCDWoIbBBBBBDAghgQQwIIOgguI" - "INYZDTIIYIIGKgREA0EwDYRANBMBqgyGgoIYGEMVEHrIY0IYqyC+hAiZBvMhg5DL4gQLMzA8" - "PBAyGsn4MIHIqGZoED//9bwQcGCDgwgf/64J9pcLCYQOyG0kBGgeQboIQgg1AZBQYCMweQLz" - "IGJkMuZDLmQy5o+GWZgqOZgYZDNxHwoZBpORaI+FDINJyKdHhNENlCBjAZoBgEDNAzyGzNHA" - "zuv7CNBA1Z8I0CB2CIMHZ4GEzwLwgQO00001CadJtoIIGHBA2EEDDYIG0EDzYc+HtNNU1dEC" - "9EgdJmwUL5smEfBh24NhIO4N4fDoIP/6Xe+gg27aCDe2k01u+364eg3wkvQQbQQYfugnww9/" - "Sa/9L1vSb20m90n//p/tJ0v+nQTa7aTW3/pP/6Xfek3uk3tpfX7/dNq3Wl+2kG79L2/9L/9L" - "1vSb7Sb///tK1V6tJUvS0nRhd0vf/S//CXcwswnpPuk+6X///tpOlpfugm+/r2/9L/9LmFX3" - "6b+m/3/9unutJv68dbS/X28cV//S+69vW/W/X//XRhdv0tfIZAaQ5A8Ufp9/r/6//QXuv30/" - "q/4jj7/2raTdCl4WHpEH5tb6Ue/Uf/S+69vX+v/+6tf0nVLwsOlBM3dP9b/X/pe6+39/3//q" - "9PTdWkl4WDWkeb/vSX/X/0vuvb1fpX0r/br79XqvIHhYIZdhWHWEE6TaT//kF3yC7//pe6+/" - "XrX//rpatpWkvhNMIIMNUko/vS9v/r/0vuvv7670r/er3punVL7UEEGHXvpegq////S9/2//" - "/ph/3ut+k3SX9hAgw0tKvfCS//r/6X2l7er6SvpQ/9enVjtKvXCCDDSSS9bhggX///+l7/t/" - "6X4SZ1BW+3X/T6++EEQaBMMNL/p4h/668f6C+0vf/S/CTIgGz+ut01aSX00EFhg0tV+4P+OP" - "/0/f+/8JfmIYP96un23SS9poILDDS6rSeeCB///xf6Xt/5ZH8Ew/73XXvS8RCCwYaX6XeDf/" - "88GX+H7f7f+wX7hh/69XVsVXgghDDrX0vb/ngfPA//57f6+3/j+2Q2hH717+6+EEGGl0l77f" - "++//7ft17f/+2QV9f7W19PS8IEGHX6S3v9b7//t/r//9bkFNH709NqjabSXwg2v/T93///+3" - "7de33Xf2G/7/6S9L4Qer1SQvf1/7IN6v/b/X2//9sP+66V9N9fa79V+/X+9/+37de3v7+2//" - "Xvrevwwv6pX/+OOQzJ/+3+vt+l6W//e2ukk+l4YMIJ7fVGwz/vX7D/+37df9/f2/7pdXpN6X" - "hhhArfRdUqf36///b/X29//b/7/f768MMhqiYIJrbS0Et/f+//7ft17fpb1t/7paWqT6+GHh" - "BC22lpU/vpff/7f6+339r/6X33SV6RBRHhh4QVsUxCJ2t9XrkG77f/7eQUPt17e9b1tkC8V/" - "exv76VeDB4QThhMJa9W+v//9vX/77S7S2/73Sr0m9L8MMQgmxGlf70tdv/9v+K99v39vS3X9" - "ikr6/IHhYINEBhhBEFS7S70m9L9v/9v9/b3S20v/umKrV9fwmChhggSbaVP03hLrv/9v+vY7" - "S7S29L136b0v7UMMILYYSW9WGQLvpft//sPS3/bS20tu67S90r0l/oMMIJNtL8JMMJYS1ww/" - "/2//3YaW2lt2lthWqpX0v1w2gsMMElbwSYMElIOfW2Gt3fbarbXuGwgrYaCu7CVsGEv0r6C+" - "6QPC2DCSpoQgxoQkNWDCqq6txrppuwYSUMMElbgwknFe6tpof1h4TQYhEDGpBisIIMIIIg1C" - "hBgQGIRDQIIIIIYEZBuIDBhSC9TRDjCD1OxCIZohiEQzRDBxCINYwNNNUCTBkMsQvtUHhNBh" - "eiXBVClWGrwZCAX/7r/4YWGFuGFhhf1/44iIMEIiDOoZIaDUGQEQiIuIhwYISCmGIgwQhgvx" - "ER9IRHERIHgrwIiI11hGgGwCzroO+qCB+loP9JGCNQGwGXpECYGYPSCBkuBsBt9Q0qBr0ooS" - "GciHjQMJHQDx6IGobv8IRx///////////////////////////////////////yB49PIZIsED" - "wZIIHgxxA8rIHgqWQVrIEsM2yGnZDUvyGQoIM8yB4KnhSB/MgeDZMhtTCWw8geCTIamBIFIH" - "g2IUgzEEFeCGXAKC1t7rXrpp+v9WpA+4geCryMHIHvk0yBfiCp1b7ql6Q/+vf2vr///o4tel" - "015AzBmj/6tf9prwv/q/64j4X/0rS//r/vf9f//0rX///+m////9df///6b////1dL///+rg" - "iGpTIvkG2ZDS/IaX5DUpkpkNOCGXGQf8hmOR+QTyGnBKZDXoQ04I5kNqhJyGVBLiBc+QanIZ" - "4IZ4ISCOCOCDa5BUwgvxBeCJBFciuQz8Qxf/q4gg8EDBAwgeCB4IPCBgoLrkC/BBhBgoQMED" - "BQgYIGFBQoI1gokMzgWOMg9VkKGQwdY44//qwnphQhhHDDR1BQbJnnpkFCCGdGlkM6EGgyDc" - "hMI4QZBuhNDIEIVGx0ageQqAZoGAQMEoMJuCDBBhL/6unphHChGxwgg2FCCY9P9AvCp6aCDe" - "gwjg1qEg0F9pphbCB0mg1MBhf/Vo2K9GyYQQcIIHScKrWjZMgY8go/QWQUejhsI4bIGPRsmk" - "2QL+jZNAg5BR/ThSBHkMe9PbtpPX/+r0H0EG0g6TpXLx4MtPEJ3fS2IJiE7EJ0m3QINhIO6p" - "Pu/6039aX/pXvEJtLSDpNj8+GWn/S6aenVuKDpcS5pXIF+9tJu1dJ1pL/6em6etf9J6b/QXT" - "TdNpPTa9P//61aXWl/63/q6Wl/1/pdV19/XQ//dft039Vv9Nj03/7evv+l7vuk9Nr9j+6ujC" - "aTpaQqvZBp4gQ/q/6Qpa///hL///S6v/q/7SvSrcINP6t9////9Bf/97/3+2vTdN06SSWw0D" - "S8RIHgrU9f///0v/1//ZQDX/3XVj9IILDDQa+QyAatP15OJ+v+L//39fmoMz91dfTdbegpsg" - "0vIZAZlDd/8ff/3////sH+66tpPqkw8JJBivCPj//KgDF0v+eT////2/+nr3fSb0kga8IF//" - "PBt7S/9v//+vwb/bW1bq7SSfqg16X1/7df+3////hv/r9b9K/wwvS//9hhL/2////8g2Eft+" - "l0rdfq0P/93+GIX/t//39X5BUn/pN7ejG9Uvx9L0v/Bgv/b//61/D/39apNvSX//7v3y4bf/" - "b//f6/IZkP+6Wr0t2qX//1d74S/9v//X2//19+1Sf0iGH//XX1vS/9v/+60v//bWK6Stuv//" - "+m219f/b//f1tfrtetjpvVJf/sLpdq3r/7f/9pXTa/+sbWk2xSX/9owtoJttGFhkHfT/9v/9" - "urQTa9pWSHbRftbS+lX/thBcNJhkOOwwlsMJcqwyv+w//20rCUMI2v9pwwl9aTbS//xXDCCb" - "CsQrY1hra6sGtrrbaUNBMQtbSYpit/VvSBf/ak4ZzCFMbUkBsRDPU2QYrlAOawzyBFbkHrIP" - "WauUA5rCFAbGlEINZAhPDCCpqUBmp2gSg9D/9hdNNBrDC2//YXX+xCnkmF8U01vXTtf/hoaB" - "hAwQaEMEIOIjiyOGQCwCwwgYWIMEGgwh/EccfoREREI2CPEcRxEREa/wgg///hJ/6/0n/X/I" - "HiiX///pdfT+n/+tpePX9fhfX1bSyB49NKvptL7/1IHg1wEYA1CxdKvSEdtJLyBmDU/2l/YS" - "2wkl8eGGEEQPDXcJBBhBpYYhMQgQMgUVwsGaAeCsF7WIiI//////////////////////////" - "////////wAQAQA4AAAEDAAEAAADOBwAAAQEDAAEAAAB3AgAAAgEDAAEAAAABAAAAAwEDAAEA" - "AAAEAAAABgEDAAEAAAABAAAAEQEEAAEAAAAIAAAAEgEDAAEAAAABAAAAFQEDAAEAAAABAAAA" - "FgEDAAEAAAB3AgAAFwEEAAEAAABoDwAAGgEFAAEAAAAeEAAAGwEFAAEAAAAmEAAAHAEDAAEA" - "AAABAAAAKAEDAAEAAAACAAAAAAAAAAAAwBIAAAQAAADAEgAABAA="; - -static const char fontdata_18[] = - "SUkqAEARAAAmoCq/////////////////////////////////+QyXe5DJDVchncgthMyB4NFk" - "TMgeJBA8FKE06yB9ad5DbxIgScCpNkFYdSGnQgrOQbKENqhA3ghmWQz2QVRyBxZDMoQbJ4XU" - "g0YQl4IHhBhUm4OggwoIGCBhYwQZBuJggYIHhf1CJwazjaSdJNpqEGFQaYWgSwmg9d6yGanQ" - "Qb10m+gjxMKEGEGlhAtHhhhHyf/4QaVpIw3rZBpelQQbCpp+EFSCDaCDe/XSOMwbSfSDpJN3" - "1TYKqMyraC0nQTfT/pYaW0gv06dKk4Iz8+K4Qb9BUk2k/+ugt9+npbTXVtBcINwnWYAnTNg3" - "77f1+v1aS+k3dVXC6em0YXSqkrSv99UvX/S8N6q3C6dJ/0tLff/r9/S6pJsN0RB6rH2/S9JL" - "XX/r0v//WwxRfqt6XvXpaX1fv9f+lqlThjrf+tfS//////6pbdf910vrS9X+tdf9LSWsKQ1L" - "pfr/9fuv/f/H+1rcIzH+v8aS9LS17/yGwU96S0klsEf2OvePpfj3X+I/2v0l4Svr/S/SStf3" - "/9JwkwqevXf9L+m6/rvdLihW6vpfpL8jmktpf5A8WyEu+6She16/rSXggekwwgr/XvbXr2E3" - "1/SSXgmpHPYaWv+3tpJNEGt/H1/0viHhBpMU3/SyGoYhaZCg/v/0gSS7oINWtf9ifDVCSC6T" - "/X+Qa9Pnx1Tph3/0yEAkpr3Ba/pQaXBBvTUMNtf9hpI4mD+PX9JpdBvRwnV2F/2HhBOQxhFj" - "9f1mBh/TxBB0gjiDYYRxL1wbSIP6Tdj/X9QgbSxdJtQgg2xX/kNVPhA6t/1/SF/unpINwvel" - "DMbSCOG1b/IM4vX6TaW9OqQOGvogzCmD6TaT/5BSEL+k6X7aSTbSX+G0km+/9f+r/r0nDBf+" - "/TpN/9V1paWRjv6VNj/+kt7deQPBe9f0nQj99XS8geWEJe/6t/61fpf+lv8Kv0qb7S/FX9LX" - "9a6x1/X2/+v0v/pXX/0v20v110t69fr6IZcf/vv9daVENXt+vX/9L7bS6/9SGo//S9Ja/1/s" - "Va6t+ldZAu6V6X1/+l9tf11069//pZA9RX6/T/5BWhddK63p0krSSwnr9JOtsP8hteQLMpv0" - "tLtL1fS7/177Yb1qmvWk3X7aXaSWv/SXTcHr0nTa6IGBDXbSvSeku+r0ttJhhFIBO/XXS1tL" - "tJ0u1CXroPSSdWwj3f/ptJNbQSbaTaSTaQQXXw8JbDCCiE1117DSIZsBBlusMJENSAg4YQTD" - "CSsMIIILetkNQgkmKDQYVr00rFRGrEKJrDOKYqmKCC2FqDEVgwmEGnodqmvhbCYShhAgsREh" - "phpCDBBr0hUMIRBhCGCDCEMKCyGScCItpdCIiI/6S+9VX2uvvSX9qlXvWuraSVYtpUO9Jdqw" - "ttIdtLbS2mFiP///////////////////////////////////+QEwate8gaApwQPArqIZINtZ" - "A1DU1IHgpaEDwZCCC3wQPA4jyC2DJMgpIyG0BkFdQQUCZDUKSBAuQ1CZAuDIYBkFwZDAMguD" - "IYDIMAyGpqQ19CGNCGYnINYMg1DRBUBkGsbBEYDchgQQwwQYLlOGCIBc1A5GAxIEVENnkMqZ" - "BihBp4hoK5OB4KJBbBU1IbYxODBEaDORcGpYIH//63hA4MIHBhBxX/+QXDUJrwsIMIPDYRoC" - "MMGHDhGgOGGCDBBggYQYLDBBuEHDCCMw4YYYRmHYYaPBA3DBBhMEGEwSYYV9112EGpBXoQiA" - "gQNEDFCD6EGs5B6EDByCUCBA5AxQhmUIZtCGbQhmUR8Gg0BSonDMZBrwIEDZBqOQiAgQMMg1" - "nIQgIINZBvQg0YDNAzQgYLIKMdHAb0p8C69MIoCB3QdBB6IaEMIhsthEM9oIGmmmmmqaurpB" - "NsIG0E2wgbSDzAOZg9qnp9yGUdIN1BV84NozBh7hsJB3BvfdBP/6W6tukGHhtIMPDaT/7v6a" - "Qbw2lST8INhAgbT3QT7f+En/9LvvSb2wk3vXX/dfuk/pekHSb+0vDf+k//pevpP9P6T9P/uv" - "aVpf/aCDae6Xt/6X/4QX96Te2k3t//7pd09OlpfTpN/r7f/X/6XazCek36Tfpf/3+rat1per" - "03ML9Vv/r/+lzCza7et7re//+62vaTrpeOk636X3/pf/pfaXt9P9P///dPe9XX/pXT+vt44/" - "/6Xv/et+t+uOP/zCpNpNpCl5DIBocgeDj9Ot/Q/6Q/+l917+/7/j+6Wt+rpJeFh6RDH3T9Lt" - "+v/pe6+3pfpf/9/vt1el4WDrCDNzrev/1/6X3Xt/f99f7q2l1pOlXhYapBH2//qv//9L3X2/" - "fpX1v+6+1bStJfCw60E6t0/r+QIfIEOv/oL7r719f6//6fb30vIM9kM2wQQYaSQSf1vS9v//" - "/S919vr+vW/3S1/SbSS9BoMIIMNLqK6fSr/+v/S+/3++u+r/vvum0ukvVMIEGGuvreEF////" - "pe6Xt++kr6TD/enp16bX+8IIMOtL6fBBL+uq/+l9/t6+v6UH/a6+x2kl/hBBg0kl9eIL////" - "S/S+/9L8JMpwZf/03tPSX1hBEGsXDDS6r7lQGX/XXj/S9v9v/S/BBSoDb+6WvVWqXu0EFgw0" - "Fr6Twf8cf/p/pe/+EvzaMH/e36tulX1ggsMNLr+58IH///F+3+3/k9P4IMP966b9ul9oMIIL" - "DDS+kqfIPT/+fBo/f6Xt/8L9oN/669NxpeIhBCGGlqut7f8+GM+GP/z6fv/f+wX7YN/39Wqb" - "SXhAgw6/6fb/w+H//b+69v/H9sgyDP3SbV/0vCCDBpdaS+39f//7fuvt//7kDjT+//Ta/CCD" - "aXpfe////+w/uvfuu67B/3rq6Rvel8IP/0k/d6773/4b919v//bDf+urql6Xwgem/WL3f/9y" - "BhP/7f3X3v7+2/7f1+3pfDC/qkv/X/3/7fuvt+v+7/9Nq3SX18ML71V/f445Bqp/7f3Xt/Xp" - "dv/a/6Tevhhggm71WbBv/1+w//t+6+/f37f+9daq3peGGEFvouqSX79e//7f3+3v7+2/+urf" - "XpeGGEE3bfpd/f///t/pff19bf+3TddJN6XgwZDXFwghd0nCST++l9//t+3+32va/+l+uq3/" - "hh4QVthKIRPO+r/3//byBiPS9vet62yGU9f2vt6T6RDKjww8IJtimkkn1b0sgXjv/9v9v9vt" - "Lutv+9666pvS/DDwQLDCYKv/0v2//2/4r32/ddv911Y2Nb0vwwxCCbEV76b0utv/9v9/vdLt" - "L7S7pivSfr8gz2Qa4DDCCIMsNpJPq3r///t6XXt91t1t/69aV6/pkMzwwYQSDYaXfTelrt//" - "t/v7G2ltpbeltpe1vpV9qDDCCVsJK+kwyGaPpft//t/17tpbaW312rVaTekvrQYYIJNhpVeE" - "mwuEF+w//2/3920ttL7S20t9K9L+8MNBbDCS+kwYSUJa7f/7D16/bS20tu0uDBfaV9Jf0g8F" - "sGEltQSYYLZB0KFTDDVNNPTbUFTdNN2GEk2GEk24YSTYpqtK01C/WHhYYhINNCEGKCEJDTDB" - "gqaaem2hp6abhhhBJsGEEmw0GEk2v1YacfvQeE0GEQzKyGcQEEGEECIahQQIBhiCINwZDAMg" - "uDyCgGwYUgwnaIOIIYTuxCINQMMUQagcOIRDUBhhbuCTBkMwGvsLDwg0GF8hAZQUgQNnDJMM" - "r9b1/uGFhgtwwthNVVf+KQiIMEIiDBCIgynDIBRERGhFwYISBPqIkNGogwX4iI/QiOIiQPBZ" - "cRER9cIzA2gb+qCB/WEH60g/rQNdJJGxlOMjgbQaOpAmg2D1CBj+oaQg1egoogahpaXjQMKa" - "gPBjRA1Dbv4Q1sIcf////////////////////////////////////IFmpA0AkvUhkhlQQPAr" - "gIMgeFsgeBxBAkggTA0rIa9kNe/IZAJBA8vIHgT+FIHjmQPBS8gyeEth5A9HIa2BIgfkEDwU" - "iFIHxBA8FUghtkJbeHtL/IElkMu/S291UgeL5A8Cf0YOQPHNNMhleQZP/+6d0g4dNN3rfpV9" - "If/Xv00vX/yBoGoP/84n+l7+v/q6/7VeC//V167CDXhf/V1/xH//6tf/3/9J69f//9tf///0" - "v////2////9LX////br///9LQIg2UyE8gqTIa08hqzyDZTIsyGu5BofyDU5F8hiZDVgizIbZ" - "yGs5EmQVqEVyGa5F4hlnyGlBDZBA8G1yDJhA8NQC/9W2CBB4IGCBhA8EDwQPCBgpDK7ILzwU" - "IGEGChAwQMFCBhA1BQoI1hlQUFIaE5F0IOnUgmpBc5BjyDFZE0//rEIPCYQYQwnNQGHpkMs5" - "BufXIFzhNMhlnTCDIGDhMIZDYdQuSsgwcg5/77///VtPQYUI2OEeGwoR4iIDRTChdAsKmmFT" - "CPjYVBhHBYVUcFhDBQUgXoQI4hsopBjg2jYFzQCDwNzYLr/6sI4L0wjwsIIHQQbBUEGsJ/oL" - "008JoIMPTQQa1QQcF9qmug6TtV/+k9BvRwdBBoJB0m9J9HBshl+QMfoLIZfo4VHCZDL9HBtB" - "NkC/o4NhBByBj1UOkQ2eQY+6bvbV/S//bp9Ag2gg6TpWQL8k1EJ3fS2IQYhOxCdJ3QIG0ndG" - "3rmI8DTd/Wm60qS/9Lp4p10nSfVngzQg/6XCYTwg6txTpcQnVx/6e90m66//b7ptL3W+rq/0" - "F003V03Ta9Nf+2utWlrpf+l709Ol/77/hL/3Xp66f/+r1bS61//bHq1/SfXr/S6p69utfHH9" - "906em8UktshpYQ2P6X7+v/+/6Xv7pe2vV/2lzH7S1S/CBw/2/0hr7//9Bf/7/X3/9+k2r1SW" - "2g0vS//1///CX//e/8H+2lpXvVqkltoNLxEgeBORvr/9f+v/6//shhp/991iulBAsGDRww18" - "hkArV+vx9//F//7+vynBo+3WldNN6QMPBAkgwYS8ETv/9el/3////sP/W999JJh6SQYrwjMX" - "/8pAy7f/zyf///+w//dLSt+m9Kg16Bf/z4K+0v/b////w3+3X3vapfpA16X1/7aX/t///1+D" - "f+tq9JvSvpYYXpf/+wwgv/b////wb/bp6tbvSv2h9L+7/Yr/2//7/vyCuR/r/oxvSS/H/1/8" - "ML/2//+tL8H/erSel36/9L6/vBgv/b//f/8P/a/apN2kl//+22r6MArf9v//+n8g1O//TpPS" - "Tb6//+l/4S/9v/+0tb//Vtb7VK70iC8f/r1drel/7f//9f/69R/vpJf//q2+3pf+3/+3Wtr9" - "er7T0k231//YXS7X1/9v//StJtf+1j40rikq//RhbQTDbRhWyBA+n/7f/7faCte0rtG1qnSb" - "df/20uGk3YS4YXv/2//20oaTYRtf7Juwwgt3pXpJf/sILgwgmGQsMMILYYXlIGZ/2H/+2lDB" - "BMMJfDSjiuvSt0l/9irMPFMUxVsUnDTtPTYaaYTtO2GEopiFoM0WEmqTenVukP/6jmEnakgC" - "6IaE7DIMDJIDBThokNCdyC6cgunLGSGCEYHJANxChMLIaBEQqakMGn0CTB6/+wtIMINBrYLf" - "/2F/+wp5INegmmFtNPW//4NDQMIMEDQgwhBsRHEMjhkhAsGEDCxBhBoGEP4jQ4/iIiIhHATi" - "OI4iIiP/hBA3/X+Eg///pf9f6T/r/ZA8OLf//+l16T1///a6/S+k9aj//a5Arq0q9JtL//wY" - "YQLdtKvSEcdV8gaIddtKvTS20kvj2GEkvrYYIIgeCsOEggyB6sIOtiFFEFsGRPE6AZgsLDCY" - "XsFkDYDScREcf////////////////////////////////////4AIAIAADgAAAQMAAQAAAIEI" - "AAABAQMAAQAAAIsCAAACAQMAAQAAAAEAAAADAQMAAQAAAAQAAAAGAQMAAQAAAAEAAAARAQQA" - "AQAAAAgAAAASAQMAAQAAAAEAAAAVAQMAAQAAAAEAAAAWAQMAAQAAAIsCAAAXAQQAAQAAADcR" - "AAAaAQUAAQAAAO4RAAAbAQUAAQAAAPYRAAAcAQMAAQAAAAEAAAAoAQMAAQAAAAIAAAAAAAAA" - "AADAEgAABAAAAMASAAAEAA=="; - -static const char fontdata_20[] = - "SUkqABATAAAmoDgf////////////////////////////+QyQy7IGwGXPIZILLkNA/kDwVrIW" - "3IHgvBA8FqE00sgeC9pp5BWhIFSvIHhpOQPDToQK3ILYb01TTINOELmCJwypBY8FVsgy2kQ1" - "6BSCocEDBSDQBEFfCBcWINJwQeF/qDCDSCD0m4eCBhSDZWEGFwTwQMIPC1VKQa6keMPTpJu8" - "IMKEGmuECwg0fIP3dcIGgg2kE9JukkeGwqDQaWECwj42EEG//wiRhpN6ON0lZDSetBBsFTXw" - "gqQQNoJv/9HnJetpIJ1201SSbCpo0JroLQTdP/+EFh6b1ekm060mwRp5mNwQb8JUrat//1uu" - "kk+laeklhBaBA6QdZsCsKcAwqdK/qukv3/pXuvbgum4TaMLpUq3T7u9KltVaS61bfpcLp6/p" - "Kqp1vr1/1/qlcNpJK2wvfdL0tf3//X/S+qsGMjvrHpuuvS6XS//6//SVWw0c6X/q6+lr/f//" - "/pfXhj1b/9L0uv3+mvX/9JJLyGtiX9PX+uvr+/VePpf7YR9f/XGkvqquv/1X+u0klwUi3pL/" - "/66Wvf+Qbi/uqWklVhGaY/Xj6XpD3X+I/37pVuCT/36SX6Vpf1/0lpQl6vS/qv0lbr/720u0" - "h7hX+/6XkWfVpfv+6pRVbXpL+kvggaqw0r/IHgmELd7aWwldq//SSrwg6qGEtfpW9hoJKu2v" - "S/0kvCakWisNL/Xg8MV5DNp43/9IL4h4QaTEJv/4wwSSkEUf6X6gklrhB0+v+yXBmprW//0Q" - "06l5mK0k1DDv/5JgUIJrvBf+lB1wgbwnTDbX/Yejyh1sdL/tLhPo8TW9fqgw0kmyC/Eu//0k" - "cGH9NxBA2kEGwwjaX6hvIYmleP6X6UEG0tp0n0cbYYS+lyGqESCDSd//+hbXF4TaQSBuK/8h" - "tJ8I8w0m/9L9J673VINsL3+YbSQQbSv8gflZA4Cf6TaW9dJBsNJeQPyjD6Te3//+k/+2lTtf" - "+G0ltW/+kv0rS+vScGC/9+nr/1+utcijv6STY1/6S3t1/SX6vEfuu6/7/q3+QPDZi7fpa/0r" - "/RA8PhC56VN7a/il+lr+tdY/+vt1//pf+krr/6X9/6WulvX/1//+2k9f1pUQ1/30vS6ohmv6" - "X22tr/6RBsH//9dV+v9uv0m/SC110vVf/pfsV/9aTrkDELel6X//9tfpfpXX06/pZA8Hj/SS" - "6d+sgy3uul+39JWklhB//utsN/BSB4b4lddNpeler6X1rpJX3DtcKn/pdbaTpVpJaa1f/TcP" - "+k0m0tItXaX16t0l+vpJOkw0GQg71r7+h2l2k2lVqEv+Hpba2EeRrn/S9patpJthJulbSQX/" - "D0km0mdWmKtfdJtJNYYSuGEmwkraQS/4eEttBMQg09de2kQaUiygwiGuTk5ptKmGEEEFtexE" - "JJimgwv+kmDBLWK2ITBgkrBgkCC2qwYWDBBhBp0hSDWGIXwuExCoMQgQLERIauiQhggwr8Ug" - "whEGCEMEGEIYUFkMkMyMgeC/EMu+qwZwMgMo4B4axwDg8B4axwG0LxEW1SxEREfetL9Uv26p" - "L9JfVvpL20kv+qSxbSUd1S26YW2kO2ltpb1tNYj/////////////////////////////////" - "5AWBqr/IGoNMEMgk5DJBaGEDYGKyB4FlZA8CjQgtgYghkJ/IGYMUIHCLIKgkCKsC5QCHQNcl" - "oaCWBqlIGYVAMFWGCoBcqwXKgCDoDBUBrmoFQ1Bg6g1EQGmVYaREwZCrDSIoCqDIGKEDGhDC" - "ghnGEMAyBCchgvZDRWQUYQzaENEyGlMhsBfBAyCuMWRYFYjYaBKcNPQIiYF//9reEHIuG0HB" - "hA4r//IMEZBcOuaAeGnCDBA+wg4Pwgf//94QcMIOGEH/+uE1tetMIOyDIKBFAPIKCwhpi5DK" - "FQigGEQzCCGaQQzCCGaQQzCEaBmkgFNokBlshpzCJwcMhpaEQgInBwyGloRA0fD8g3IINPkD" - "CCDTMgoRkDE7R8NDqvTCJAQNQzMEYQIhsoGZgQ2aBns0BiEaAXtNNNNIJp6baQIG2aAu0CBt" - "mgLsIIPtNO01YaIGEUQMISnwy9do0Ah3g6CD5BQG5BRbkG9oIO00000gmm0naCCDDcNhBBhu" - "G6DzYYNAxap91oNpN1BV84Kwggw120EHwb38JB//S/vSb20m90nrf+vVtJtKkvQQbQQbvcJN" - "Yb/0E//pdrekG9tIN7aT7/v970/pfToIN+2gvb/pL/9L+9Jv0m/S//tLWm1bS/90m19L2/9J" - "//S770n3Sb2+v/fb/SetL0m0E97r7f+l/+l6za9X3X6//3Xat02lpf6TaNr9e3/1/+lzac2u" - "3pN7pN71//rrTaT1pfT0336Xv/r/+l+v3q/q///71362vXjq6T+vt44pf/pe3Xt9X9X6v/9q" - "2u3Tpa/pX39ff//6C/X29X9X/Ecff5tV0m6FLyGQCm5A8ND9Pv0kO/pD/6XvXv9/3//1902l" - "apeFg9SGKE/tL////0vtfb0n9J//7XSfb3pLwsOqCe+3qvfr/6XvXt/fXfX+9P/pWlXhYapB" - "GZv0vqt/r/0vtfv//1v+1vbSbSdV8Fh1oJtb76X///S969vV9Vfr/f0v6bpLyB4eyDVYSDDS" - "0unSfS/yGeMhnhf/S+19+vqvW/+v9tdJL0GEwggw9JJ//S+///9L3r2/v++r/tdJtXSbWvWw" - "gQYaWsfTelW//X/pfa+3/pfpMP96b+rrpL7TCCDBpaXr4QS////oL+vvV/V9KH/f/sU2kv+E" - "EGGlpV7eCC//1/9L2/2/9L9JmoFn7paTf6pf4QQYaSX6XEwGn//4/0v0vf/S/CCkICn//0mm" - "6S/wQRDTKwYaWl/cgQZn8Lhf/X2/2/8JfkKdlICt+1dJvabSr6aCCww6+tJ4P+OP/0/0vb/y" - "xwQX5tWD/er/v196CCww0tf+Zh3///F+3/f+1/BMP+//bbSS9hBoILDDS6VJJvBh//zMGv9/" - "pe3/wX7Qb/dLSbS40vEQgWGDS//w3///37f7/8F+2Df/v/bVeEEIYaWte3hv6mYEZmBH/5nv" - "9fb/x/bIKYT9tbX09Lwggw0v0kvf/ff/9v3r2//9yCwn/66bSV18EEDDrpf+////9v7X77ru" - "tsgtB///zadJfCDaX9JN73r/3/7fvXt//9h/20tL0ndL4Qer0lj7fr/yGZZBI/9v7X3//22/" - "/fb1vr4Qff6T3//3i//b969vf39sP+2trpJN6XwwX/X/////2/tfb9L0t//rp+vX4YX3SSv7" - "6xxshpI/9h+9e339/b//96t6XhhhBPeqSNgY++v4f/2/v+//9v/bSdLpJvrwwYQVu3LrX/fX" - "u//t+6XvvW9bb//e2qXpeGGEE3elqrf+v3/+39/t9r37/7df+3peGGQ2ysIJp20sIKv2+v//" - "2/0vb+vS7/9dL0vXww8ECG2wk4SW+vX2//w37f7e37+3Xpe+2kk3peGHhBOGKiieU/t9ZDLj" - "//28hmR6X36XaW2Qy4+9tLS9W+iDRHhg8IKwwmEq3revW3/+3+3+3t+3W///G/Xpfgw8IJsW" - "Cqn6b0tdv/9vXivfdL/vrdW640m9fwwxCCbFL9X0v7//b639vuttLb17qOqW9L8geHshp2GG" - "CCIG92q31D0v2//2/69vbS20tvr13dJ9L9MFDDCCCbaVPq3pft//t/v8baXpbeu3tV031+mE" - "gwwgVtpLek3hL9v/9v/920tuvtLtL6W9JfvDBhBK2Et9JhkM2NLXb//b12ve0thpbd1tha3S" - "fX/wwwgrYYSSvCTDCWCX7D//b//dhpbYS27CXaTtaV6S/pA6Cwwwl+CQYMElIEB6Ww17/bVd" - "rtw2Ethpd2lsMJf7ekF/w8LYMIJO0ITIwXEJDTDBqmqemw409Ndgwgkwwwgk24MJJs1DTW0k" - "2mh/0HhUGQyQIGCDoMUEECkNCgM1iyGxQQYVkMKMgXUMMJAgbQIiAzg3ZqJEGpQwYJEGpQw5" - "0JENRWxXWCwyDWo/aw8JoMQgQMhsBggQYSBAuGJIAxrrYYUgQIwUhgQ4YhYYhbDQhbX4SYev" - "sJIPBNBgvkmBwCkaBU4ZAgzf+/v7hgsMLcGFhhU71/40IiDCERILIgREgsCBEcREaEQ4MEJA" - "sTiIMEIME9UIiP5BbBq8hkhnmQWy/EgeDIOQUuIZAuEDwZuIHgz0ER9IRHEWZgSgVf0ED9LQ" - "f1QQP0tB/pI5g/SpBEQuSe+iByA3HqEDH6UMKINfUUQNgJXfCoGFNYHhlxhAwvogbAzB/hCO" - "P/////////////////////////////////+QPFrIGoF8IHYG3PIZIbdkMg4CZA8CIIZAb2QJ" - "7IE0NbMg255DIAw5A8PMhkLwpA8H8geBZMgpzCCww/CkDy0IHgVhqQPFDIHgcaEFXRLbyB4L" - "0INvL/kFiyDRnpbe6aXrhNP17+0iB4PMhkL0bKQPB/tMhlTIKc//XfSD67vW/2l6Q/+vfqvr" - "///zyf6W7XkDUo/9LS/6/C//Ta+uGgwvBf+lev8R//+rX/+v/V/1///q1////V////9K0v//" - "/9v////S/////bX///9LnA2ycNKQkyDLMg2/kG2eQ26ZEoQ2oINT+Q0nIkyDEyDa5EoQVzkN" - "qCEmQZTkRyDTBF4hmoyGq5Aw5A8FNyBxf/03BAg8EDIbWoIHggeQ19cIGFBcgwfBQgYQMFCB" - "ggYKCBhA1ChMEU4ZkFBSGwQRKiC4yyD1EGEENHkFxhEqIaBv/1cQg8JhBhDCDwg9Mg1UIGKa" - "4KmmQaKJhAwVBhDIGKKE8g0UIEU9V7X//6sJ6DChQjw2ERAFzxEGGygyBjQho00CyCgMJpkD" - "AaDCPjZAvog1IaNFR4WiMB5BNCBhBDP5BQjIEUtHAUEgGAX/6ujYr0wjwsI8LCCDYSQQfX9B" - "emE9UEG9MI8L1QQcL7TTSYaIGDpMINTYEL/6sJA+jg2gg4QQOk2FSawjxP9BejxtHieEeJpB" - "h6ODaBA16QcF96fp96SS/9XpvhB0EHQQdJvSfQINkM2ZDKnoLIZvoEHQTZDLmgQbSchlzQIN" - "hIOQy5qr5DRMhnn/e2ldVX/1e+kG0nSdLIZXqzMGgU7vpcUxCdinSbdIOk7o2eshl+7uk602" - "k/pf+r08QnWldW9XhP+lwmnhOtxCbS4hPv/ff9aWv/pX9Wk6T19XTf6XTTdN03TpdOv/06aT" - "aT+l/+33ul1rf//oL/9fbX6/9tf20rS6/9LF61169Xr/S6rr2666HH/rTp0nxSS2yGpxAu/7" - "ft/6v/v+l7vul7a/f77c2q33SX4Qafpf9Cl///4QX/+//V/2vXulqqW6DX6b/X///9L///S+" - "H/1q2k2k9JJbaBpeIgu///X/pf/97/sH+2rdfFWtILDDQa+QyQ31/XX//9P/9f/5qDU/1rdN" - "PpJbaPEQzXkMgCsR6/H3/8f/+/r7IgGn7/Tq1dJBh4QVBivBFB///S/7////2H/aVr16qw8J" - "JBrwjQT/8gYZt6/59P////Yf7703tvST6SQNegX/8zBT2l/7f//9fhv+6/Sbtav6DXhBfX/t" - "pf+3////hv/XSvTekn6UGF6X//tpf+3////g3+9N7/6V6tD//d/sMJf+3///35Bk0/tddJNt" - "V/H0v//Yhf+3//daX5AkR+/03o3vpL//0v28ML/2////8H/tK10km7Wv/S/d/wzCBf+3/+/1" - "+/771elvSX//1d/pD/7f//WrfkGuP+0tX1b9IgRH//r63hL/2//7Xr/f1+/SpttJf/9em730" - "v/b//f1v/7V6er0r9L//XSttb1/9v//rptfr+1jY0ntUq/+197X1/9v/+0rS1/9XqqVtiqX/" - "9bQVtowrZDYPr/7D//b7SYa9pW2jCxu6V6//thG/aTdhLbC9/+3/+6VoK1/yKPYS3rSbaS//" - "hhLhhJhkMOwwlw15AgaP+3//aVpMMEc/aVhOGEuulfS//iFoMIJhhWKthhJYd2urBra922lB" - "ggmK1tJimK/eraSBf/asqIpgwVqGwYJBA2GCDhhA8IG2EDBAwQODBBw2GlFMLCKcDDCVVBu1" - "hJh6H/7Cjm0ExQanQMoUQ2F7IEF50DBLhokNghyGC5DBcEDnQHIgEOgFwYIKE1kNgOUGE0wo" - "OtoKw9f/a6DQaDC2Cw//sF/+xCn0gwvimg1vXW0//hhDQMIMEDQgwhDYiOLLhkhlBYMEDCxD" - "BBoMEO+I44/iIkCThILGBHgTiQyQaOxHIHh+EREa/xFBEM58fH/sIJ/6/wk///pP+l/kDwIK" - "////X/3r+uvStf///8ev0nrC+v9pZA8WvSr0g2l//7aXtpfqJTgi4GwGeP16QjtpJeQNAanX" - "tL+0tsJJehWwwgklX2GEgkmg6wxCBIgZiCCB+DrDChfCyBtDOdkcDMMcMLEREf//////////" - "////////////////////4AIAIAAOAAABAwABAAAATAkAAAEBAwABAAAAcwIAAAIBAwABAAAA" - "AQAAAAMBAwABAAAABAAAAAYBAwABAAAAAQAAABEBBAABAAAACAAAABIBAwABAAAAAQAAABUB" - "AwABAAAAAQAAABYBAwABAAAAcwIAABcBBAABAAAABxMAABoBBQABAAAAvhMAABsBBQABAAAA" - "xhMAABwBAwABAAAAAQAAACgBAwABAAAAAgAAAAAAAAAAAMASAAAEAAAAwBIAAAQA"; - -#endif /* LEPTONICA_BMFDATA_H */ - - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmp.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmp.h deleted file mode 100644 index 568c9901..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmp.h +++ /dev/null @@ -1,124 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_BMP_H -#define LEPTONICA_BMP_H - -/*! - * \file bmp.h - * - *
- * This file is here to describe the fields in the header of
- * the BMP file.  These fields are not used directly in Leptonica.
- * The only thing we use are the sizes of these two headers.
- * Furthermore, because of potential namespace conflicts with
- * the typedefs and defined sizes, we have changed the names
- * to protect anyone who may also need to use the original definitions.
- * Thanks to J. D. Bryan for pointing out the potential problems when
- * developing on Win32 compatible systems.
- * 
- */ - -/*-------------------------------------------------------------* - * BMP file header * - *-------------------------------------------------------------*/ - -/*! BMP file header - * - * Notes: - * (1) The bfSize field is stored as a 32 bit integer and includes - * the size of the BMP_FileHeader, BMP_InfoHeader, the color - * table (if any), and the size of the DIB bits. - * (2) The bfOffBits field is also stored as a 32 bit integer and - * contains the absolute offset in bytes of the image data - * in this file. Some bmp files have additional data after the - * BMP_InfoHeader and before the color table (if it exists). - * However, enabling reading of these files makes the reader - * vulnerable to various malware attacks. Therefore we do not - * read bmp files with extra data, and require that the size - * of the color table in bytes is - * offset - sizeof(BMP_FileHeader) - sizeof(BMP_InfoHeader) - * (3) Use arrays of l_uint8[] to make an endianness agnostic - * access to the BMP_FileHeader easier. - */ -struct BMP_FileHeader -{ - l_uint8 bfType[2]; /*!< file type; must be "BM" */ - l_uint8 bfSize[4]; /*!< length of the file; - sizeof(BMP_FileHeader) + - sizeof(BMP_InfoHeader) + - size of optional extra data + - size of color table + - size of DIB bits */ - l_uint8 bfReserved1[2]; /*!< don't care (set to 0) */ - l_uint8 bfReserved2[2]; /*!< don't care (set to 0) */ - l_uint8 bfOffBits[4]; /*!< offset from beginning of file */ -}; -typedef struct BMP_FileHeader BMP_FH; - -/*! Number of bytes in a BMP file header */ -#define BMP_FHBYTES sizeof(BMP_FH) - - -/*-------------------------------------------------------------* - * BMP info header * - *-------------------------------------------------------------*/ - -/*! BMP info header */ -struct BMP_InfoHeader -{ - l_int32 biSize; /*!< size of the BMP_InfoHeader struct */ - l_int32 biWidth; /*!< bitmap width in pixels */ - l_int32 biHeight; /*!< bitmap height in pixels */ - l_int16 biPlanes; /*!< number of bitmap planes */ - l_int16 biBitCount; /*!< number of bits per pixel */ - l_int32 biCompression; /*!< compress format (0 == uncompressed) */ - l_int32 biSizeImage; /*!< size of image in bytes */ - l_int32 biXPelsPerMeter; /*!< pixels per meter in x direction */ - l_int32 biYPelsPerMeter; /*!< pixels per meter in y direction */ - l_int32 biClrUsed; /*!< number of colors used */ - l_int32 biClrImportant; /*!< number of important colors used */ -}; -typedef struct BMP_InfoHeader BMP_IH; - -/*! Number of bytes in a BMP info header */ -#define BMP_IHBYTES sizeof(BMP_IH) - - -/*-------------------------------------------------------------* - * Align BMP headers on 4 byte boundaries * - *-------------------------------------------------------------*/ - -/*! BMP_IH is misaligned, causing crashes on some big-endians. - * A packed struct forces alignment. */ -#if defined(__GNUC__) -typedef struct __attribute__((__packed__)) { - BMP_FH bmpfh; - BMP_IH bmpih; -} BMP_HEADER; -#endif - -#endif /* LEPTONICA_BMP_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmpio.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmpio.c deleted file mode 100644 index ef645128..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmpio.c +++ /dev/null @@ -1,639 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file bmpio.c - *
- *
- *      Read bmp
- *           PIX          *pixReadStreamBmp()
- *           PIX          *pixReadMemBmp()
- *
- *      Write bmp
- *           l_int32       pixWriteStreamBmp()
- *           l_int32       pixWriteMemBmp()
- *
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" -#include "bmp.h" - -/* --------------------------------------------*/ -#if USE_BMPIO /* defined in environ.h */ -/* --------------------------------------------*/ - - /* Here we're setting the pixel value 0 to white (255) and the - * value 1 to black (0). This is the convention for grayscale, but - * the opposite of the convention for 1 bpp, where 0 is white - * and 1 is black. Both colormap entries are opaque (alpha = 255) */ -RGBA_QUAD bwmap[2] = { {255,255,255,255}, {0,0,0,255} }; - - /* Image dimension limits */ -static const l_int32 L_MAX_ALLOWED_WIDTH = 1000000; -static const l_int32 L_MAX_ALLOWED_HEIGHT = 1000000; -static const l_int64 L_MAX_ALLOWED_PIXELS = 400000000LL; -static const l_int32 L_MAX_ALLOWED_RES = 10000000; /* pixels/meter */ - -#ifndef NO_CONSOLE_IO -#define DEBUG 0 -#endif /* ~NO_CONSOLE_IO */ - -/*--------------------------------------------------------------* - * Read bmp * - *--------------------------------------------------------------*/ -/*! - * \brief pixReadStreamBmp() - * - * \param[in] fp file stream opened for read - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) Here are references on the bmp file format:
- *          http://en.wikipedia.org/wiki/BMP_file_format
- *          http://www.fortunecity.com/skyscraper/windows/364/bmpffrmt.html
- * 
- */ -PIX * -pixReadStreamBmp(FILE *fp) -{ -l_uint8 *data; -size_t size; -PIX *pix; - - PROCNAME("pixReadStreamBmp"); - - if (!fp) - return (PIX *)ERROR_PTR("fp not defined", procName, NULL); - - /* Read data from file and decode into Y,U,V arrays */ - rewind(fp); - if ((data = l_binaryReadStream(fp, &size)) == NULL) - return (PIX *)ERROR_PTR("data not read", procName, NULL); - - pix = pixReadMemBmp(data, size); - LEPT_FREE(data); - return pix; -} - - -/*! - * \brief pixReadMemBmp() - * - * \param[in] cdata bmp data - * \param[in] size number of bytes of bmp-formatted data - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) The BMP file is organized as follows:
- *          * 14 byte fileheader
- *          * Variable size infoheader: 40, 108 or 124 bytes.
- *            We only use data in he first 40 bytes.
- *          * Optional colormap, with size 4 * ncolors (in bytes)
- *          * Image data
- *      (2) 2 bpp bmp files are not valid in the original spec, but they
- *          are valid in later versions.
- * 
- */ -PIX * -pixReadMemBmp(const l_uint8 *cdata, - size_t size) -{ -l_uint8 pel[4]; -l_uint8 *cmapBuf, *fdata, *data; -l_int16 bftype, depth, d; -l_int32 offset, ihbytes, width, height, height_neg, xres, yres; -l_int32 compression, imagebytes, fdatabytes, cmapbytes, ncolors, maxcolors; -l_int32 fdatabpl, extrabytes, pixWpl, pixBpl, i, j, k; -l_uint32 *line, *pixdata, *pword; -l_int64 npixels; -BMP_FH *bmpfh; -#if defined(__GNUC__) -BMP_HEADER *bmph; -#define bmpih (&bmph->bmpih) -#else -BMP_IH *bmpih; -#endif -PIX *pix, *pix1; -PIXCMAP *cmap; - - PROCNAME("pixReadMemBmp"); - - if (!cdata) - return (PIX *)ERROR_PTR("cdata not defined", procName, NULL); - if (size < sizeof(BMP_FH) + sizeof(BMP_IH)) - return (PIX *)ERROR_PTR("bmf size error", procName, NULL); - - /* Verify this is an uncompressed bmp */ - bmpfh = (BMP_FH *)cdata; - bftype = bmpfh->bfType[0] + ((l_int32)bmpfh->bfType[1] << 8); - if (bftype != BMP_ID) - return (PIX *)ERROR_PTR("not bmf format", procName, NULL); -#if defined(__GNUC__) - bmph = (BMP_HEADER *)bmpfh; -#else - bmpih = (BMP_IH *)(cdata + BMP_FHBYTES); -#endif - compression = convertOnBigEnd32(bmpih->biCompression); - if (compression != 0) - return (PIX *)ERROR_PTR("cannot read compressed BMP files", - procName, NULL); - - /* Find the offset from the beginning of the file to the image data */ - offset = bmpfh->bfOffBits[0]; - offset += (l_int32)bmpfh->bfOffBits[1] << 8; - offset += (l_int32)bmpfh->bfOffBits[2] << 16; - offset += (l_uint32)bmpfh->bfOffBits[3] << 24; - - /* Read the remaining useful data in the infoheader. - * Note that the first 4 bytes give the infoheader size. */ - ihbytes = convertOnBigEnd32(*(l_uint32 *)(bmpih)); - width = convertOnBigEnd32(bmpih->biWidth); - height = convertOnBigEnd32(bmpih->biHeight); - depth = convertOnBigEnd16(bmpih->biBitCount); - imagebytes = convertOnBigEnd32(bmpih->biSizeImage); - xres = convertOnBigEnd32(bmpih->biXPelsPerMeter); - yres = convertOnBigEnd32(bmpih->biYPelsPerMeter); - - /* Some sanity checking. We impose limits on the image - * dimensions, resolution and number of pixels. We make sure the - * file is the correct size to hold the amount of uncompressed data - * that is specified in the header. The number of colormap - * entries is checked: it can be either 0 (no cmap) or some - * number between 2 and 256. - * Note that the imagebytes for uncompressed images is either - * 0 or the size of the file data. (The fact that it can - * be 0 is perhaps some legacy glitch). */ - if (width < 1) - return (PIX *)ERROR_PTR("width < 1", procName, NULL); - if (width > L_MAX_ALLOWED_WIDTH) - return (PIX *)ERROR_PTR("width too large", procName, NULL); - if (height == 0 || height < -L_MAX_ALLOWED_HEIGHT || - height > L_MAX_ALLOWED_HEIGHT) - return (PIX *)ERROR_PTR("invalid height", procName, NULL); - if (xres < 0 || xres > L_MAX_ALLOWED_RES || - yres < 0 || yres > L_MAX_ALLOWED_RES) - return (PIX *)ERROR_PTR("invalid resolution", procName, NULL); - height_neg = 0; - if (height < 0) { - height_neg = 1; - height = -height; - } - if (ihbytes != 40 && ihbytes != 108 && ihbytes != 124) { - L_ERROR("invalid ihbytes = %d; not in {40, 108, 124}\n", - procName, ihbytes); - return NULL; - } - npixels = 1LL * width * height; - if (npixels > L_MAX_ALLOWED_PIXELS) - return (PIX *)ERROR_PTR("npixels too large", procName, NULL); - if (depth != 1 && depth != 2 && depth != 4 && depth != 8 && - depth != 16 && depth != 24 && depth != 32) { - L_ERROR("invalid depth = %d; not in {1, 2, 4, 8, 16, 24, 32}\n", - procName, depth); - return NULL; - } - fdatabpl = 4 * ((1LL * width * depth + 31)/32); - fdatabytes = fdatabpl * height; - if (imagebytes != 0 && imagebytes != fdatabytes) { - L_ERROR("invalid imagebytes = %d; not equal to fdatabytes = %d\n", - procName, imagebytes, fdatabytes); - return NULL; - } - - /* In the original spec, BITMAPINFOHEADER is 40 bytes. - * There have been a number of revisions, to capture more information. - * For example, the fifth version, BITMAPV5HEADER, adds 84 bytes - * of ICC color profiles. We use the size of the infoheader - * to accommodate these newer formats. Knowing the size of the - * infoheader gives more opportunity to sanity check input params. */ - cmapbytes = offset - BMP_FHBYTES - ihbytes; - ncolors = cmapbytes / sizeof(RGBA_QUAD); - if (ncolors < 0 || ncolors == 1) - return (PIX *)ERROR_PTR("invalid: cmap size < 0 or 1", procName, NULL); - if (ncolors > 0 && depth > 8) - return (PIX *)ERROR_PTR("can't have cmap for d > 8", procName, NULL); - maxcolors = (depth <= 8) ? 1 << depth : 0; - if (ncolors > maxcolors) { - L_ERROR("cmap too large for depth %d: ncolors = %d > maxcolors = %d\n", - procName, depth, ncolors, maxcolors); - return NULL; - } - if (size != 1LL * offset + 1LL * fdatabytes) - return (PIX *)ERROR_PTR("size incommensurate with image data", - procName,NULL); - - /* Handle the colormap */ - cmapBuf = NULL; - if (ncolors > 0) { - if ((cmapBuf = (l_uint8 *)LEPT_CALLOC(ncolors, sizeof(RGBA_QUAD))) - == NULL) - return (PIX *)ERROR_PTR("cmapBuf alloc fail", procName, NULL ); - - /* Read the colormap entry data from bmp. The RGBA_QUAD colormap - * entries are used for both bmp and leptonica colormaps. */ - memcpy(cmapBuf, cdata + BMP_FHBYTES + ihbytes, - ncolors * sizeof(RGBA_QUAD)); - } - - /* Make a 32 bpp pix if depth is 24 bpp */ - d = (depth == 24) ? 32 : depth; - if ((pix = pixCreate(width, height, d)) == NULL) { - LEPT_FREE(cmapBuf); - return (PIX *)ERROR_PTR( "pix not made", procName, NULL); - } - pixSetXRes(pix, (l_int32)((l_float32)xres / 39.37 + 0.5)); /* to ppi */ - pixSetYRes(pix, (l_int32)((l_float32)yres / 39.37 + 0.5)); /* to ppi */ - pixSetInputFormat(pix, IFF_BMP); - pixWpl = pixGetWpl(pix); - pixBpl = 4 * pixWpl; - - /* Convert the bmp colormap to a pixcmap */ - cmap = NULL; - if (ncolors > 0) { /* import the colormap to the pix cmap */ - cmap = pixcmapCreate(L_MIN(d, 8)); - LEPT_FREE(cmap->array); /* remove generated cmap array */ - cmap->array = (void *)cmapBuf; /* and replace */ - cmap->n = L_MIN(ncolors, 256); - for (i = 0; i < cmap->n; i++) /* set all colors opaque */ - pixcmapSetAlpha (cmap, i, 255); - } - pixSetColormap(pix, cmap); - - /* Acquire the image data. Image origin for bmp is at lower right. */ - fdata = (l_uint8 *)cdata + offset; /* start of the bmp image data */ - pixdata = pixGetData(pix); - if (depth != 24) { /* typ. 1 or 8 bpp */ - data = (l_uint8 *)pixdata + pixBpl * (height - 1); - for (i = 0; i < height; i++) { - memcpy(data, fdata, fdatabpl); - fdata += fdatabpl; - data -= pixBpl; - } - } else { /* 24 bpp file; 32 bpp pix - * Note: for bmp files, pel[0] is blue, pel[1] is green, - * and pel[2] is red. This is opposite to the storage - * in the pix, which puts the red pixel in the 0 byte, - * the green in the 1 byte and the blue in the 2 byte. - * Note also that all words are endian flipped after - * assignment on L_LITTLE_ENDIAN platforms. - * - * We can then make these assignments for little endians: - * SET_DATA_BYTE(pword, 1, pel[0]); blue - * SET_DATA_BYTE(pword, 2, pel[1]); green - * SET_DATA_BYTE(pword, 3, pel[2]); red - * This looks like: - * 3 (R) 2 (G) 1 (B) 0 - * |-----------|------------|-----------|-----------| - * and after byte flipping: - * 3 2 (B) 1 (G) 0 (R) - * |-----------|------------|-----------|-----------| - * - * For big endians we set: - * SET_DATA_BYTE(pword, 2, pel[0]); blue - * SET_DATA_BYTE(pword, 1, pel[1]); green - * SET_DATA_BYTE(pword, 0, pel[2]); red - * This looks like: - * 0 (R) 1 (G) 2 (B) 3 - * |-----------|------------|-----------|-----------| - * so in both cases we get the correct assignment in the PIX. - * - * Can we do a platform-independent assignment? - * Yes, set the bytes without using macros: - * *((l_uint8 *)pword) = pel[2]; red - * *((l_uint8 *)pword + 1) = pel[1]; green - * *((l_uint8 *)pword + 2) = pel[0]; blue - * For little endians, before flipping, this looks again like: - * 3 (R) 2 (G) 1 (B) 0 - * |-----------|------------|-----------|-----------| - */ - extrabytes = fdatabpl - 3 * width; - line = pixdata + pixWpl * (height - 1); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - pword = line + j; - memcpy(&pel, fdata, 3); - fdata += 3; - *((l_uint8 *)pword + COLOR_RED) = pel[2]; - *((l_uint8 *)pword + COLOR_GREEN) = pel[1]; - *((l_uint8 *)pword + COLOR_BLUE) = pel[0]; - /* should not use alpha byte, but for buggy readers, - * set it to opaque */ - *((l_uint8 *)pword + L_ALPHA_CHANNEL) = 255; - } - if (extrabytes) { - for (k = 0; k < extrabytes; k++) { - memcpy(&pel, fdata, 1); - fdata++; - } - } - line -= pixWpl; - } - } - - pixEndianByteSwap(pix); - if (height_neg) - pixFlipTB(pix, pix); - - /* ---------------------------------------------- - * The bmp colormap determines the values of black - * and white pixels for binary in the following way: - * (a) white = 0 [255], black = 1 [0] - * 255, 255, 255, 255, 0, 0, 0, 255 - * (b) black = 0 [0], white = 1 [255] - * 0, 0, 0, 255, 255, 255, 255, 255 - * We have no need for a 1 bpp pix with a colormap! - * Note: the alpha component here is 255 (opaque) - * ---------------------------------------------- */ - if (depth == 1 && cmap) { - pix1 = pixRemoveColormap(pix, REMOVE_CMAP_TO_BINARY); - pixDestroy(&pix); - pix = pix1; /* rename */ - } - - return pix; -} - - -/*--------------------------------------------------------------* - * Write bmp * - *--------------------------------------------------------------*/ -/*! - * \brief pixWriteStreamBmp() - * - * \param[in] fp file stream - * \param[in] pix all depths - * \return 0 if OK, 1 on error - */ -l_ok -pixWriteStreamBmp(FILE *fp, - PIX *pix) -{ -l_uint8 *data; -size_t size, nbytes; - - PROCNAME("pixWriteStreamBmp"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pixWriteMemBmp(&data, &size, pix); - rewind(fp); - nbytes = fwrite(data, 1, size, fp); - free(data); - if (nbytes != size) - return ERROR_INT("Write error", procName, 1); - return 0; -} - - -/*! - * \brief pixWriteMemBmp() - * - * \param[out] pfdata data of bmp formatted image - * \param[out] pfsize size of returned data - * \param[in] pixs 1, 2, 4, 8, 16, 32 bpp - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) 2 bpp bmp files are not valid in the original spec, and are
- *          written as 8 bpp.
- *      (2) pix with depth <= 8 bpp are written with a colormap.
- *          16 bpp gray and 32 bpp rgb pix are written without a colormap.
- *      (3) The transparency component in an rgb pix is ignored.
- *          All 32 bpp pix have the bmp alpha component set to 255 (opaque).
- *      (4) The bmp colormap entries, RGBA_QUAD, are the same as
- *          the ones used for colormaps in leptonica.  This allows
- *          a simple memcpy for bmp output.
- * 
- */ -l_ok -pixWriteMemBmp(l_uint8 **pfdata, - size_t *pfsize, - PIX *pixs) -{ -l_uint8 pel[4]; -l_uint8 *cta = NULL; /* address of the bmp color table array */ -l_uint8 *fdata, *data, *fmdata; -l_int32 cmaplen; /* number of bytes in the bmp colormap */ -l_int32 ncolors, val, stepsize; -l_int32 w, h, d, fdepth, xres, yres; -l_int32 pixWpl, pixBpl, extrabytes, fBpl, fWpl, i, j, k; -l_int32 heapcm; /* extra copy of cta on the heap ? 1 : 0 */ -l_uint32 offbytes, fimagebytes; -l_uint32 *line, *pword; -size_t fsize; -BMP_FH *bmpfh; -#if defined(__GNUC__) -BMP_HEADER *bmph; -#define bmpih (&bmph->bmpih) -#else -BMP_IH *bmpih; -#endif -PIX *pix; -PIXCMAP *cmap; -RGBA_QUAD *pquad; - - PROCNAME("pixWriteMemBmp"); - - if (pfdata) *pfdata = NULL; - if (pfsize) *pfsize = 0; - if (!pfdata) - return ERROR_INT("&fdata not defined", procName, 1 ); - if (!pfsize) - return ERROR_INT("&fsize not defined", procName, 1 ); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - pixGetDimensions(pixs, &w, &h, &d); - if (d == 2) { - L_WARNING("2 bpp files can't be read; converting to 8 bpp\n", procName); - pix = pixConvert2To8(pixs, 0, 85, 170, 255, 1); - d = 8; - } else { - pix = pixCopy(NULL, pixs); - } - fdepth = (d == 32) ? 24 : d; - - /* Resolution is given in pixels/meter */ - xres = (l_int32)(39.37 * (l_float32)pixGetXRes(pix) + 0.5); - yres = (l_int32)(39.37 * (l_float32)pixGetYRes(pix) + 0.5); - - pixWpl = pixGetWpl(pix); - pixBpl = 4 * pixWpl; - fWpl = (w * fdepth + 31) / 32; - fBpl = 4 * fWpl; - fimagebytes = h * fBpl; - if (fimagebytes > 4LL * L_MAX_ALLOWED_PIXELS) { - pixDestroy(&pix); - return ERROR_INT("image data is too large", procName, 1); - } - - /* If not rgb or 16 bpp, the bmp data is required to have a colormap */ - heapcm = 0; - if (d == 32 || d == 16) { /* 24 bpp rgb or 16 bpp: no colormap */ - ncolors = 0; - cmaplen = 0; - } else if ((cmap = pixGetColormap(pix))) { /* existing colormap */ - ncolors = pixcmapGetCount(cmap); - cmaplen = ncolors * sizeof(RGBA_QUAD); - cta = (l_uint8 *)cmap->array; - } else { /* no existing colormap; d <= 8; make a binary or gray one */ - if (d == 1) { - cmaplen = sizeof(bwmap); - ncolors = 2; - cta = (l_uint8 *)bwmap; - } else { /* d = 2,4,8; use a grayscale output colormap */ - ncolors = 1 << fdepth; - cmaplen = ncolors * sizeof(RGBA_QUAD); - heapcm = 1; - cta = (l_uint8 *)LEPT_CALLOC(cmaplen, 1); - stepsize = 255 / (ncolors - 1); - for (i = 0, val = 0, pquad = (RGBA_QUAD *)cta; - i < ncolors; - i++, val += stepsize, pquad++) { - pquad->blue = pquad->green = pquad->red = val; - pquad->alpha = 255; /* opaque */ - } - } - } - -#if DEBUG - {l_uint8 *pcmptr; - pcmptr = (l_uint8 *)pixGetColormap(pix)->array; - lept_stderr("Pix colormap[0] = %c%c%c%d\n", - pcmptr[0], pcmptr[1], pcmptr[2], pcmptr[3]); - lept_stderr("Pix colormap[1] = %c%c%c%d\n", - pcmptr[4], pcmptr[5], pcmptr[6], pcmptr[7]); - } -#endif /* DEBUG */ - - offbytes = BMP_FHBYTES + BMP_IHBYTES + cmaplen; - fsize = offbytes + fimagebytes; - fdata = (l_uint8 *)LEPT_CALLOC(fsize, 1); - *pfdata = fdata; - *pfsize = fsize; - - /* Write little-endian file header data */ - bmpfh = (BMP_FH *)fdata; - bmpfh->bfType[0] = (l_uint8)(BMP_ID >> 0); - bmpfh->bfType[1] = (l_uint8)(BMP_ID >> 8); - bmpfh->bfSize[0] = (l_uint8)(fsize >> 0); - bmpfh->bfSize[1] = (l_uint8)(fsize >> 8); - bmpfh->bfSize[2] = (l_uint8)(fsize >> 16); - bmpfh->bfSize[3] = (l_uint8)(fsize >> 24); - bmpfh->bfOffBits[0] = (l_uint8)(offbytes >> 0); - bmpfh->bfOffBits[1] = (l_uint8)(offbytes >> 8); - bmpfh->bfOffBits[2] = (l_uint8)(offbytes >> 16); - bmpfh->bfOffBits[3] = (l_uint8)(offbytes >> 24); - - /* Convert to little-endian and write the info header data */ -#if defined(__GNUC__) - bmph = (BMP_HEADER *)bmpfh; -#else - bmpih = (BMP_IH *)(fdata + BMP_FHBYTES); -#endif - bmpih->biSize = convertOnBigEnd32(BMP_IHBYTES); - bmpih->biWidth = convertOnBigEnd32(w); - bmpih->biHeight = convertOnBigEnd32(h); - bmpih->biPlanes = convertOnBigEnd16(1); - bmpih->biBitCount = convertOnBigEnd16(fdepth); - bmpih->biSizeImage = convertOnBigEnd32(fimagebytes); - bmpih->biXPelsPerMeter = convertOnBigEnd32(xres); - bmpih->biYPelsPerMeter = convertOnBigEnd32(yres); - bmpih->biClrUsed = convertOnBigEnd32(ncolors); - bmpih->biClrImportant = convertOnBigEnd32(ncolors); - - /* Copy the colormap data and free the cta if necessary */ - if (ncolors > 0) { - memcpy(fdata + BMP_FHBYTES + BMP_IHBYTES, cta, cmaplen); - if (heapcm) LEPT_FREE(cta); - } - - /* When you write a binary image with a colormap - * that sets BLACK to 0, you must invert the data */ - if (fdepth == 1 && cmap && ((l_uint8 *)(cmap->array))[0] == 0x0) { - pixInvert(pix, pix); - } - - /* An endian byte swap is also required */ - pixEndianByteSwap(pix); - - /* Transfer the image data. Image origin for bmp is at lower right. */ - fmdata = fdata + offbytes; - if (fdepth != 24) { /* typ 1 or 8 bpp */ - data = (l_uint8 *)pixGetData(pix) + pixBpl * (h - 1); - for (i = 0; i < h; i++) { - memcpy(fmdata, data, fBpl); - data -= pixBpl; - fmdata += fBpl; - } - } else { /* 32 bpp pix; 24 bpp file - * See the comments in pixReadStreamBmp() to - * understand the logic behind the pixel ordering below. - * Note that we have again done an endian swap on - * little endian machines before arriving here, so that - * the bytes are ordered on both platforms as: - Red Green Blue -- - |-----------|------------|-----------|-----------| - */ - extrabytes = fBpl - 3 * w; - line = pixGetData(pix) + pixWpl * (h - 1); - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - pword = line + j; - pel[2] = *((l_uint8 *)pword + COLOR_RED); - pel[1] = *((l_uint8 *)pword + COLOR_GREEN); - pel[0] = *((l_uint8 *)pword + COLOR_BLUE); - memcpy(fmdata, &pel, 3); - fmdata += 3; - } - if (extrabytes) { - for (k = 0; k < extrabytes; k++) { - memcpy(fmdata, &pel, 1); - fmdata++; - } - } - line -= pixWpl; - } - } - - pixDestroy(&pix); - return 0; -} - -/* --------------------------------------------*/ -#endif /* USE_BMPIO */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmpiostub.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmpiostub.c deleted file mode 100644 index a861d342..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bmpiostub.c +++ /dev/null @@ -1,72 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file bmpiostub.c - *
- *
- *      Stubs for bmpio.c functions
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* --------------------------------------------*/ -#if !USE_BMPIO /* defined in environ.h */ -/* --------------------------------------------*/ - -PIX * pixReadStreamBmp(FILE *fp) -{ - return (PIX * )ERROR_PTR("function not present", "pixReadStreamBmp", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteStreamBmp(FILE *fp, PIX *pix) -{ - return ERROR_INT("function not present", "pixWriteStreamBmp", 1); -} - -/* ----------------------------------------------------------------------*/ - -PIX * pixReadMemBmp(const l_uint8 *cdata, size_t size) -{ - return (PIX *)ERROR_PTR("function not present", "pixReadMemBmp", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteMemBmp(l_uint8 **pdata, size_t *psize, PIX *pix) -{ - return ERROR_INT("function not present", "pixWriteMemBmp", 1); -} - -/* --------------------------------------------*/ -#endif /* !USE_BMPIO */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bootnumgen1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bootnumgen1.c deleted file mode 100644 index b19f49de..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bootnumgen1.c +++ /dev/null @@ -1,308 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file bootnumgen1.c - *
- *
- *   Function for generating prog/recog/digits/bootnum1.pa from an
- *   encoded, gzipped and serialized string.
- *
- *   This was generated using the stringcode utility, slightly edited,
- *   and then merged into a single file.
- *
- *   The code and encoded strings were made using the stringcode utility:
- *
- *       L_STRCODE  *strc;
- *       strc = strcodeCreate(101);   // arbitrary integer
- *       strcodeGenerate(strc, "recog/digits/bootnum1.pa", "PIXA");
- *       strcodeFinalize(&strc, ".");
- *
- *   The two output files, autogen.101.c and autogen.101.h, were
- *   then slightly edited and merged into this file.
- *
- *   Call this way:
- *       PIXA  *pixa = l_bootnum_gen1();   (C)
- *       Pixa  *pixa = l_bootnum_gen1();   (C++)
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*---------------------------------------------------------------------*/ -/* Serialized string */ -/*---------------------------------------------------------------------*/ -static const char *l_bootnum1 = - "eJy9nAdUU1kbrs8hkFACCQgYakKTiIChgyAJvdgAG44tVFERQRFR0SSU0AWsgChE0MGxgV1s" - "CaGpIOBYQFEJomNBDaIYNJAbymHmTvz/deJd/3URyTorS98ne+/ve/e3i2LA2sQQwpKITZvX" - "bowhWCvO37IhNGITYWMkIXZtImE2wcraRlHRfeN/+EzoxsSIzeJPkRRHP76ctHIWIXHT+BNz" - "wraJd4qZAfN9lBW1FQEAUPbz9QwS/8aKXzhQ/Bfw9CmFJf6lEOu7bDMA/TkcOs169GG8V3C8" - "x8YNGyJi4gHS2YEbf4gfOvp5ui2SLwoTiEp5GDIXk8DEpGvpu4YnCug8EnYnNQp5FEX9Auox" - "1F3CHeUXJeXzEDa+n/DbAO0HRjh7wNV49D/w85rvecadmjwm2wq2bKVJ2bL7Z92EJduq40uu" - "i/ihCST7G52XIsNFyDGRxl+AHFcCCxWsipDXo1IThQCw/PTUz6tXbNOR0GgNW6Oy+DV1TKPg" - "mcAclkZrt+Eupvjh7AmNjlROLCWZxUhh5IKpHVq4Qk6GXs5HG5LwJVVgxOFzk3k+ctx0PBqz" - "gKQbQ42i0kEVggxwWU155/GZHs8ltNvA1q4y+f32XO//Dku7jf33vtHe5AZ9vwu5fAqTxUhn" - "5aaxDjNYN8CskfMk4V8hAgMu343JQ2EK77JJtxiMGwyGVjKOUsZI4wEYLkoHQK2ewpybuIoh" - "od9WKv3j3/2Udx4zf03/Gi7fh8nKHdOvl3Pv8ph2lKc1dU8aj4wF5XRwux1IFxxIVQhVJQ+y" - "kgcew8VoMfH2wKetGhsKjhpNl9BvJ1Xf0RzTb8OYkQlLv23lkW3nxQ9nQvpfcPilDB4eNxxC" - "FYDeeBxOn95uhWRuRqMHcLhwHLYbFWyCBoFUB9XFelbmdRJq7aUKIuPftlnu53hYau1eFPmZ" - "iB/6QGrZzXwgo0NLn2/jUrUmUVjKG6Ix00Va+k02LjGRghdsvghQ1yOz25LorH4AYZMfl8+6" - "VcBzseGSdzFFXwH1MH11wpytiRIUDrApVCf7vK3WjFJYFPbe+bjRT/pBFI1ioSk3GDzEZgSX" - "m6KMTg8RWGFFG6lRqB5kWXEDVxndgmEIwAA0k0zh96OwSTiiCFf50cO5bW7ZEZFIB1i+xFLm" - "TeS5IxIcjv/Lvu9gdZCoIH5oD8UdNruNTQcU0Wh8MG5kgCQ8GCIw5fJRGdG4XR2kJHqZNhNl" - "h0Z1v8HIzxYigKWJU901PsoMSIh2kqoLjXd4XrtBOyzRjiyiXaH4odO4aI071coAQR7gOGge" - "OIWujrXokL1cpb7n7tLWQ72E5MMtltqC6GV6RN0Ip6Bzp2chzu86kjSE0+jtOPjVTEK3lXQJ" - "VMq+7/RoptxoI7lCvYbQGksv5ZFsPDbbIH64IQcRaGHsF6Bug7jfrKEKmkf7Pk4vPIrcxO5C" - "u7f0oZWFaEwf+NxZozLqTKm7pHr4eVRd/DIYU9/vdQleHoXS/3xIfesIg4d6iUKS5grYfL4H" - "yOXgkeh0xhWtHDlch5sAg7bpbgi4+CEA/UgERmli8UkBbD4dTGUMAh9+gBe+YbCiUhEIWEyz" - "Sa0hakyRpIGfcdXEL/wYTe2MMmNYNJArsJyMmjQ6D/8DZOIpXfRUPNCPT8jQPSbA2LS5eJdd" - "RWOBoyIbEgBcdiQxt5V+vyupFn6OVZ9U22rXFgZLLeQPlo+rtbjLUaBT0J7tMzWp/p0q0bdV" - "Frl0LOkL/0BonJ0Rp2nsgouU3yD/3UnpY5pRzvktpDuEpuP3Xj06EthUk6/faxnGn8+x0dlg" - "Z5hr9vgkPudD49m/ol8hjEBLMwuW4xtJNPjpd7QV9cfQXl/E3YKFBqXfxeNoEXerlekE9VRe" - "aXBwjUy5VRaz3td8xcsSwF9RLYl6yvxm76ar5RuOOsZ96pnLIC4Uti6WWZnyxm/RD9tvrzuf" - "Tmu1qFO821jDyzdVbROc3g5Ym9kRm7fg2iSh4Odkjcmxop/+jA4LCsrJ/pNWmYfiilABSUCW" - "Dr0QTBaB/XgC00Ps4PD0dfoy/S9TyLjdiDgydgBhZR1F7aYXbsYqONDnUqkCoJ4wKBLZALlf" - "KQ+YJ8qcJEng5+spkz3P4u1dbVgkUL5eMkGSyM5nxwK78SRdqmzoEArdKPKIdVZ34COsAs6A" - "lceHKDIeP8TWehfhiIjB4JHV8SMcdhu6oqjAKkB5AJRDIsuGlAMG8CSy2MiKAGDOi5k/nFTc" - "LSSp4OfvXxj9UP6eoNrQIu50JKwsx0vWMmXJTXV/N8NY3znpUb1v2QZg7fUI92S/+b/pbcE6" - "HKUKoktUdjh2Nc1TvZqwdkqOgfZRrdmXQm99XvN1fjZ/cMqH/XU2b+VO9JNkUSv3SSZzK/jZ" - "fDRKEMaoZmlF6MCigrL5pJN9EcCOpTMYLFBOiR73A4VAjmDk0J/xrqRmGlCHIZOEZPn6loAu" - "lbo+rBCjLOKkiPoRX5toAPAY6RQz2HTbUhIAfmb/hc4GZfbgcYDZY7EAS2lfEPFW0eFJbrc8" - "TulRbcCVhlyKn9uDKWrZj4dbz+PsHNWnvDrlv7b24e+9KpcdTDaq/Oj8hna/nf/H+6qaVwk7" - "FZVDNY9u3fbFW2lQ5r2cxTqf5hBJt2gNP/GrTcY4ha2bGbC4oMS/YJxrsbi7sQmj4TswIDL1" - "VG2Up/qpLZ11RhUVRulBZeoLEuJcbVRU/ZqemNR47j4W3Zr8rSXOjKDIjRR+FWy//+7a7EOY" - "c5Ujvy+cyUs2SznB27deEge+E9CcjG5fqyJTYOFATmDepPsVpoOyNNIDUpIghMvvzkNz3bAj" - "jYiGFmpzLRtdK9LKqB192yZfS9mJ4hSm8wywKc2iAPF8kMtGRq1X5tIayEDnKZJQ2UATJQkj" - "3dR7HMZsRLcEFgxkBIyhABfAaaNy2FTQEB9MSipDyhRvpqewGGCqSiwA1D8zXJYS7DgiqRF+" - "+h9NJ0ZjGhcuSMqGpRFK/xET6b/ZSjGFgvPkmWo+6Yw5uL6PlyqvOf3Gnj3Zpxw64heaD9mo" - "ZWNWWaGrFbYcNdy9Z9n6HpP1Rn9uiGpbvuauy7nlRVN4Kx8Il3qWDF5ySzLdkTH4hZaRd/jS" - "Xotq3OWTRvcxZURn5SeaH1dKQkpnBMajV0f9rlBYkJARmPA4EeMx2SOwXH3fqfz2GFnvDE+3" - "2/KnZO4Y6vPeBBGSjcrcf1/63OG60ZNXLaQ/qxI9RcVd+QJCZs0cguX073G4pBxHxQDyXy/M" - "PALPB9pUoj4cZ+vYvbRP/atp7i1JNPh2QHMyrt1lA6tgoUF2YNGkHShn8kQoHQLyPoYuAPKf" - "AXVoe7SIEsxh8hYkK3PzUrWZGIodniEwwOKRPZGEjzQql42pa+oWgVply+wDNrMEoHpKr3Yw" - "BQeUf7Z7Tvo8lS7JBN8YjPZJwzGmwilnLWExQcbgt3GmlS0scUxT92qfg42XbagMv+Bv/8Y/" - "xz2FHfTNCzFsuzij/OMc/a2Y3pVxwtTofVPOGDyof9y+84txv3onsVX2Ijd47aH5r+I/FTde" - "ovuhAml7NI4YzGR5U2TPWZzOlCSDbw5+IbxB5iAYMgcsVXFH9GxvxRapBfnLRhgY4u5rTjuc" - "ygKNe17XqqJl+rtKVz7ZE2Do3l2Sm5igvO+SlgFNr9ZfZn2RS7Bz5fuGhrNdVw9u8Zq9+MjZ" - "pojlyV2lB6zJi/fML5Dkgm8PfqHFIHsQONFiTSxl8SRCrq2UwD58+r22g/aeVLAwk1D40p6y" - "ZMX36axkwpFtjmuDCeDa7ZnU8rIq9xvJrimzO7nsGRcvpBudDpEv59C2D7U5XtSZmuYRVNFX" - "+UwSCL5d+AWXDdmFZf9wcaNThxf+3tR9Sx1xx+pqL5Isa+X3qcvOtk3QXN2/Z+vWCyeCKntD" - "SxYXr/l09rRzv+ey9HpfXFFZ85quI0ZvL/umXL37tXH24UCd6R17H48oD2bORL+4p3FBAswG" - "vl/QnGypzcbqFrDAIL+wDBpbkA8qM44D1R5XUYlzlHR6QmsUYih+cU6p8y1WDQfMWJR6rq9X" - "+6px/oPN8Zeu4l13uxkolMwIL8O13W5/cuj3td0fz6ELrn/deN07f5l+5EE7u8tHfH4CBt85" - "TJ1sMe1NyjdggUnUEBoFeTyMhx4Xo+TMxNB6cfrCl4iXKdTmhlg+WcfjJW2aY6QnBx2VJO9d" - "uQYRlq9FGNAf2Yt42s/y3s3zYei1PaehKT9AWSfr9GEcHidJA986oCdp6l22HoVFA1kHLYiG" - "L/baQJaKOsmlTE1GWw4B7C4xUT3Gdq6UFCZduWC8/2xtr8PCEgb5BeqEj75TrZxCwsnxNntl" - "OjQpPqfvt6YYZwRVaUaH2ieapNWXrjE5KqiYoWISqLrDImnfstbmjlkxKSbLV5EPlb5Z1Oe3" - "SPXajQ3Yx8QnHiV726INVJ4SDyzLufVcdUipOsbeO6Ymu1CSD75V+AU+yCqEjPNtEY+P1NEI" - "TTMkuGylKFlumNPtSfGdc3zKQRKrwHwIuSnZdMO5Aa29UfhA1TXK9hGDy++2Dp+3szZJNrPK" - "Ovy6dH5bE1FtXeZJbmLcl7rHkZ/mO1xe4pD947cvqIxYF4OCqHYVSUDpDIOUAQAyDMsnDcMt" - "sWHAE0ZCEBw8XQBi8fQeJCGyKkSAqkXao2mK63C7kipDBOJZHWMdBtEfR4hBhSaUuSKRmkh5" - "KvjuL0ah3KLsZLdQTDg9MoUU8CyFDPBrXT0XVsrLSaL9f/ENS/8Z29SBdiP1++3WiuWBs7T9" - "OdrTLClDSoafp4QdrX9v6XRvRlyFgkn4AN9dy32482LRhmnnLlJUfd4qvrN17br7jW1oFrq8" - "NjzkYZ7G93yd3BZyTe2pbfsksaQzDVK2GGQaVk5gNVcriE1DGm/13KJaUFf88yErY71jTkyo" - "vW9RD99Fw7O3f0sefrlcuU+6737fwK2e5yyij599u2el2h7RwqluMsFlVlG013lvNa8PdNxY" - "XfG79VHX9V9lLS7bbzvqr/q7JJx0hQXjMThWhqkyLDjIOSyE5kgiEYIbR3CVr1NGo3GFw3kA" - "QkQGkUM0JFUowmBFRHoPBi0iU/o9xJ+pHy0JZxSxCocRXDJxhJvHJlOC9ZNFHYhX78kAkNnv" - "/WcWh+0oiQTfO/xCNftfpYbx2YZ4Sr7A61CgC+Bhlawqt94ktCCn+76comPgrCzTlXE13Knc" - "jhPT1BeULIiKnTcb77bc3cHA5/JJDjKzhKi4Nfcm98nxbxv76n0StHR275xFs0iKigx5KMFl" - "+//FOkxGjhcjk3NzI/HcHI/hckQu6BbRsrJCBkML4UB/640baWTHdmMQXBRSfqTxHpsvIsuf" - "J5Daul5QhN34tt3ahDNVIYnAhW8Io7aEETIQFG7fmqhwd4kkGnzzMBo5xmdRhG/XVsJCg8yD" - "9ySaKIWHeYlh4tEa6NGomCLHZeDV0CiRL8l25CU1CskT6aL7RFq+tS0BNwg2lEhGcxKbAF5C" - "hE9tmw/cxJgNOl3cXSZJAd80jL7XG6O4Vsy2gUUBmQZtiILLBjzJOJwrqYyOoMZZFyAAbhOu" - "4JvriS5JZdJVGcarVO9PRglgKYNcwwqoyjAxJPKiutoj98Wf8kIR5UxmcKZpMe7LZTmBl1bF" - "XYq3XXHoRMXjOJ01Nv4palXDCQrPPER8z0VO1hzP9w/anR93PwiM/Au9Zr9h9naOoGHfB7D8" - "qGVvYJgCW5JNOscwzia7FtEAiw1yDAv/ZmOT5GXZeSsqLjjwFfUtjDzvhGqlqRKnm8tjs/Zi" - "XTBOONmGbCUHTf1ljt98b8xdo6bZbPNuWT1ZZ8j1Y5btnJntfyaGbyWI9jfgP0whfrjBfHtZ" - "Ekm6NQYpmwvyCBNr0PPGkNAIt+N35fW9yqvj7Tfb98ubLZavVvBMU1jh47JEbUZdxB3iSkqS" - "iY7qNOMVoukbt9XGXuWtaDx9/MQGmmhfl89x+107HHt/ErKkcwRSjmvIEUDZRcCmcvh0t2Qe" - "oLucpCsQD3ICFu9dq9xMaQZqNSnmYEML2RG3W8SjCkR5YLLIiOSc36qyLwzZw8Rjv6LYCYtJ" - "QqqQw88bRgEv35KAxz2x7yWR4LsBjPilO4Z04FPHfamQHCaQoqihdMQgDS2k1ql45IhwuD4c" - "ToBTqbIShoQKMJ7KaBGGWoZEOpc5RRP2oxOAShPc3SYtxV2SsqXL81KOEsjEBP1dpxZHALn2" - "88SgnkCX35ZQzDXtQ2QPTj9Kn2cWfuPj+2as6afjyPKrccqvSvz98mbWMxalyHnVnMu9+NSU" - "bBJhau9ju/zSLfz8qTQQTzfJBBfLWksSSbeiMB5tqQbrVGERQc4F2rNGpSDEmULsUZaIO48B" - "gydKQWGTsFmHGIV6DFG/Bx+/jXiJxuYjssgPynkiKhqIf+EwxbX72msJ3Xbw0/gvLPhC9mSy" - "xM4dYfAQRlgbdZIulQSEgswGsgy6hSDEZLGEDXiZADskGlNmGzyMQSJf09DoviYBOblFmM5g" - "gZWXwT9Q4Y/xrzHAu4fTijbbFyhLwsBP3L+wxAt5kokl3nljyx9Yj/a8oCpkWEWYhrfbndD4" - "ZS+9y67LuOkGGtfPMD+8szk60dYcp1x+56FS+59rrnckOc+q8dJ7AHg1FAWPlB+OSLu2ziF6" - "3q3tl1CPBmY8p2X7EySh4Ofx0VKG1hjU5+UZO2FBQW5kopo2L2esXF13yjPZMlTGO8TyTqr5" - "YDlJxkTu+19YdXXL+Z+L3txL21xVZROG7iiOOvKmAIcIpHU0Xtscbbtt8c1534MeTC95N/tZ" - "0y6ERbaxsaB20bAkEPz0P1rNGN/T9NrPoBEWEGRM9KGhQgdAJlOGiZBDYzK8UWhKDaG7FQWY" - "t0zdeXbQtUNSnXQJXHtMXanArQ2WOsicTKzYmjRXq9IJ4in/JyzWH3WBKvfIE0T+VnSwHWmy" - "Iqo7I/tey92PM2/M2hp75rbWY2Jhn7nsDM/bm4r/TPf48vShS+CHU/amSWmXWPYnhlnKNy1o" - "4CvOtLUVm8MlFwbs4Ofw0Yg0vrDzW0VXPywqyJZMTIZ3NrPEnQiQazfS8PWmLrp5sk/+VFAy" - "5Zx7Z5maG5Esm5v4ebdzhlLg6evU29hsX/vlztEXW/Yox3QdPrxzwxTbxtMjqtdo7be68Tfy" - "V2t7HfkCbFninJfla39IEgt+Rh8dGyZjWIyXC07CwoKsyZzJ8sVLPFeEoQjLVDIYX0BCDD2U" - "SYsVoDJo9CoueXSpA8MQWI0udSTgYqhhVCo1lAJMYw4oJ2FtFEIKD6NEIJCrHXCe3U8RSKJI" - "t9Nv2hiK4tp57rBQoEw+sXS7pfmaKoWq7skbyKNUyu7XXq6tEcV7RyGSznzNQHMO9jXrb5p6" - "tbY2b75n1tCrrqPVT8xmNGwYmInKbvxsEF/kHSisV+46qvFuC3DDdrY97+TueEkc+Blec7Jl" - "YgyM98PCgTJ85D8qZ5TRUFyIzfwiY6zNIpo5tmQ2HJh/KVVZ4ZQes9s9eWrwgWjDEqznnIcZ" - "57AZrvK5PcVeO04dHYmw7JxqXyInmit4zCftAsNibjqdI88qeUa0fufanhg9ZBbapvy7o0tO" - "MVuoL0kp3cLAOGWaZukBWJT/qld4ca1U6AS0XNuttw13Guy6rhw4NM1SX6nWq2OFcwJpSZRf" - "4m2uUY6hgxp94VltfmyvauKftc6p25kP7POO3Wio3/XhfaTKgaOnh0P2driItn+cw7HdvLhc" - "AskeviEYzaHj9YopRafhRWfIEEzUlyJyx6yZ7Ez/AjuX37ypqQRF/yWKhxLF5iyIz1BbZjrn" - "UWlpwWH220UfSbtDShBnhgKPB8bfNfRKa1nrt23bmRvTepHeGxs+3mFer+4ivxe9638c90AE" - "mG6xeCs73bdPEk66ZQEp4wVkECL+uR1ntBiTH1SLDNnb7MWQyaivnaZlEJHV1BCvrn7W6f29" - "Nybxarm+JgGPMrtmX06cvwVpORtHuz60gyz7GSkXze084MGPEkXtVYiyMznqxFrygDOS534o" - "raZwpb7BVbKbg6fSMklI+IZBC4Dyq+m99ethQUKGYQEEeW0U0qt9xlXf2/LRtdGtd0L1TYwq" - "KiiFwe5HWU4Pf+RXOHbWV2QyMq9MPWK53f9CDkbzJf6k78zr6/5gOPbpHY7u+aO8JgnYHEJK" - "rWg7Hy2JA98ujB6rGPc/rbffwttsBNkFs8mtw3RAGY1GqeFUXKiJgAeKhF3kAqZq030ZDDfQ" - "c1cAALwT6KoetLpxWFIpfOvwC+dWIOtAgbIRp5VPL+IBRh4IhHqVA4mPp+aT2Xx8hjZuxMFa" - "uCZc0NjMTy/lpc/mpm9nIkSqWVo4t4x0PeAeqGYi6CiQPBtiD98k/EL1FTIJE3UZr+aJZU4b" - "9X2krHIrmfk9Zjmfc5RW+IUk6jPret6POCcaN3jHqwf0nS+/HeEcEzdDiTdvt9c1xonuwE92" - "J8GSOaLDe7Z7h3u3mf4RtdR0OJhQKJDRbHXZc8il45wkm3TbC6UMaxBb8P814/Rs91Bfxi3f" - "pm6MpJrMelS1T87dR1X1Juto4p7sa+cbtGs03m26eHB68YnQiruqXY6BcYQdCxP0eg0HDLNH" - "LrPif1Q5ODyubT28a18i8Gb77GHKpeuSK3D28G3DaEQbL8OWvKZNgcUl6YBYu3kilDNTNLrh" - "Q92BYuWhxyVjNSvDBGEBjfzyst08Lkp+l3xdOuuKXkYno7AI1CK5NScFpMs9NeilDIoEKGBL" - "PVmWwa6XLAvaw7cMapPd79SBpf6wUP61A3RLs4HK2HaItWZmTjJL3z50BJcWl5u7UR9fPHrb" - "uuI4sr+r1Scni6/IzaryXOF2od7ocse0bfZn7CvTd+IJNTm0NWeJew475lCLZ870fDEk89zT" - "a1q/kSNbkko6iyDlMhRkhFZDRmiCagH2wcNBouOZtNRFhKtBTgqxKXc0DlnOSfZycRqw0qsu" - "ruqRt1V7oqAtV5S6rvbD7tbtikpni8wz503xUKprOPtsg+IyRPrGrMuW3n+Irlu8fwXq3HK8" - "73jPUrKm5gDfLvzCxAJyQPP+Oa6wshwP2VRLzrErtqtfXzJvl4+nWsvUDfAPVt2vy46UZbSe" - "nd/rti+l8l7CZf+96h/nrth0gLSuY9XIG3eH2+bZcxpQp4Mc4oOnn1slCSPdfsNxD/7m4hJn" - "WDCQ94memPvtHl3uRXvhDU3kEOHyd2ZY47yyPMyRh8Jl78w4JXCd32+8fJBm/5G5gBk4VLAt" - "OHnQb8XiL3W8lKc+tKLXO45v8Lq28Yz3zYWeDfZOg542V5zv1E6PxG8htH7bcnpT/TO1I2dW" - "f/kTr6bphH13rAAvyQrfJWhONtyVxMYMWKyQFQqZbDiFVIo42IsCOjyrQJ626pljM9ie0X37" - "DVcgmDroHzGxRikzfxSxNAI7rwQrHHiqOuVYsfdr74MLjm1J+fCxZ33O86L8iJU1d+/tz2nx" - "vJmKjPvuELnjqY8wiEYpsiUx81fclwSE7xtGAce3+bn2XZwGCxCyQdCGcgFNiOGSG8hogiCF" - "xeT5pClz08n2aMxIE8lFyOXG8vHy5+lxRuoOojB6WCQhUrbHm9giv1iJ44LGMHDhmk3NGA07" - "rDOOkDDylwgBAOvs//J9tr9Vkgq+xxhNy+Mlrhf3T/TCooLc0AyIaimH35jMo8t5IHA7gXK8" - "ZxnGV96MyOG7JfNAFAJrGwsAgkOEGSj3Ej1JqfDtxC9U436yzJNKwsq1u03F3gef+GOPXZFV" - "JNDfuP92ztPolngWiI3coHo92rYrhV8aYPrZfP4C4bNio27bp0HL9kaom/hrRmBuJLrvfFYe" - "N495lml2FKPztvydXAzHoivVau0lSbb/6WmFf+0hHV/CGttDOt1GJifhWkBF+DGC4iOSTqql" - "nKGjadbjEiHm09wHvvsyHZUK/E7d21UzB2/plOHzQ/OBVtz6P98nP0deMedYWNp7/MHcIHzy" - "MMe+H7FjwOVlonGc5Bq8A3xH8QvNBjkK37/HDcgVRRo3ozkiSuEwyMVHyo+AtZQhzAcMzhXR" - "H0bYhan7i5Xoir1rY0ZiUxDYlBZ36294DxkuqswRiwBUCuz2UdfHfJTk+J+uMUAck6u4Ij6C" - "K0L4XgGztAgCPFbkVkvWdSUlOQoYPJuAD2IMlbpGhtgU6WgRysAM3mvd1GKlRzPZfITxtz/F" - "NqZ+9ltVp3WdkhTw7YMaIPUBEsgUBUxSNDJ46WmjS+hYV4CTQ2mm0XkDNNkRAqauGTuUh1Eh" - "uYx0Aw0iGonazaV0oeua8ZQOMKeTsV2A0adGkZPoPKxzhwvg5eh4PO2ZfpYEj6N0fmGcZ7qu" - "jiksnn+t/EzaIYzG/hufr6w6WZG8nnKFEqchW4hVU+/JvP3x3ibvOPRRo0HDSzIPI+/e26bR" - "/z3HD7vqSFq9dh1+iKf/6aGf+c31I517Y9IemkXPun9C8lyZI3zToAZIfeYCckDOfx9tZQO1" - "ytQ+oB6F+052r3KRJ7uyq0R8kCkidKnU9401mbj/cZJpZSHhV2mfUYAIbf2wObcNKSkdvgf4" - "hcaA/A60WA0dF8kmErUYixf4F+zZn3Eu4zwYbxnUmz991auNetfVjKYcyBgwKb4dTD8Xka2z" - "8M1dFv4tQR9YunR9+9IXN1syDTf8cSNsziEy7blZ3pw/U3wkkeBn/V8YL5InYFTpBMCjPT6H" - "R6zR1lhfXx6r7KhJ3z3/VtrvmmebXlm2rzilf+rA6RWnU9vwT9NXefk3Vfa60Uaq0hJ1qDb2" - "68nmA4sFJSdnjlx3dAqZVnxPEucf6d6a9DeQ+P1/aiXs+HkF/xOf/0bSrx19gTFtW1z/jQQZ" - "GejstID7gs1nXGXwwDAEl4NQRqfjSSQSjUql+nA/ULqEYOVrMGCDERbxVdi9EQAeEqZOL3Fq" - "dJNUbieV8tH3qmPKm4MBX1jK/71vNZFGZzHAHF2cvi47nb6ODQBmq9VJRSGIDElx9lKJQ0+K" - "43jHYmCJg+yJFfS1BrBjKUCqNg732ookTCGA2O9hbP7vdJ4PyE1HoTEYkm40cCKECgA1LNW1" - "A/MoJEnRDlKJ/k93YPwX0ZDvcP072LQBHvPM0X8l4kb6ScKCkNGDUWCOvtjUdocI2LX8PAaP" - "jLWjBJIR8rZVJN3ukEQqAhiga5SV3rldIUngKBUBZpJAeVdxHSwCKC3Pmog5d64pAAQ00M41" - "3HEyv172WyxYfSHfU6h71uX6S42lK6rWKyzEuS/M2jOloDTt/I0heVwrsgunBTz6oiW5C9vR" - "Serv/2f3GPwX9VA6dvp7UxUfkeM6uukN4Y4ZIbmkslJoOJUOkm5+SCKbyy9N49GU1EJDBI21" - "bWxuGwEBRDXrh6A6ciSne04kqcT/p2sB/ot4KPdO1Ce9asfCPcDJWxFg7e4dYhtdXdhZZxRQ" - "Q9l0p2R3zb6gWIO0JKeUQgHb+UkAJ+fisv7LueTUWO1O8iGSVkXPB4RkJdjJSuruM84gJBJV" - "YTH8e5/F+AAYwFgJUUftST1hApTnJsKXxt08FPahVRJQjhJR3nSTmVwRrlhkdUE8b6N36NUu" - "vd9rI6ndWurOozGmfZVhjzEs7VC6nQV1HvFQpafzYpW4/Qj0B6psuK5LLZ8uZ6ziiQrGPdGi" - "9AxjcAI8vWkQhU7H0OvwfHGrU3CuVlUMyXUTJ5v/ddeHMqvH5PVG6bwUZS5Cn4kwR7eg0R9Q" - "lAy9HELfayQTgUZjRjcakoRrQgStXH5eOi8PzcWUndFmiufffTRgaTfucXYnUXJty0m6k3/j" - "u6VO7rx6AZY5gDLp3wV6AR2UmYJTofNRcswGvCK6hdIsTGcw9LRE3OSCXrd3h9xwld8IbSUI" - "BUXqZrfjNODJQPowCIgMTJBfStsOSOqX7gaeX5z8G07ejMEGPEU43AiJZEsiUBDBvfgTGYis" - "EfG/YvVN6+MrnO9WSYX/0033UFINHVdo12ylyB219xvvV25+f+9x7MH1V897qPScfEN8d/+P" - "ktoom6Cm2pSIEHJcpr1C9cJOonn4/djn6LODA4Mj1QEHM9ZFGJwbunw17lDXSUxgZMHSHVkD" - "Vc9TeeRhTdZMcJ9VrNn9vZI1QifpTvSPb/TKewlKd4tT4GQf2kXnIQZAJioHhaNU9TMQHjbY" - "yzSgB6+CHqZ8J8uTHdn8PFBDtAa4bUXhY5DIXcRIVEgZzqatBFmmnX+VMOA2KNxLFneGw5a0" - "37D9DZJA0p3W+3+7QmLszDhJ3rNdJyXQZYn/Qf3cQBwnSFsxfJPaFd/+J6ICni++6VnOviVc" - "9lq354NE++xzrDwqSPY/q4APKp3xZm74d5RWtb0B5frXB5Ik0u3Bk3J4Q8kZugxj/AoJrfNk" - "qqNABHrYqDuQEFYIeVMSiV4tI55ByyFf49HoYZp4AJWH7CJEisB9qD9CksqARyjrb6C8tbhN" - "Lh0zVXs9u3qLBIkVSbqD/DpjKMtqeqxhoUCpeu7EKvxEfakdf6wJR7rz4UDyxeZpObERWb6c" - "imOJuRE7HxlEamQmyu/jbNgp+/j+o5zIEY4pceQBb9vIlk+zh12ePV35LXtV8FITwH2wVfUn" - "MNJd6POL10VAh/EcCSIGg+FGB5HadHHDiOfFZLogBcROJQlpQB3KlSTcKNsTSYgcEQ8cvWQR" - "38OqrQQ/iBWCqe8NBKC8qHE3TSQCAO5ht7PMF1+9foLzP62cQ2l8ova3+N7oFBOdxitN2QOa" - "69ZzzM2pNl11qubTN3HoWxM1V++9vkPhtMK7tKzY83HVQTWzg4w3vVt9gum4qXTrgd2U+xT7" - "m1ZOe/dXz5lTFV2oN3x8FmckEegcnnm1SYciGdysSNJNn8eHkOet+1mw2KAkP3fS39LoPFAc" - "3RDi6KaNo3QwGEMgAvEDNbcZXZdDccJGAT0ICrpHJk1rRETv3kX4Xa9ikIFYJNc2t6wTRU9I" - "oooH0Z3D+JxoJ3XJk1JWpH+ke5t/uBabn7uW0ZQ5Pt1QyPYf+hvI0mj0BSYeIyf8p3BNhoAE" - "KTy6HBdUYNqrMBNUmANotIiRkcLDo7gYBSZKkyAbjvAasSHku+D640ikWJJtMClJCAKuXuqC" - "+1y5lz9hsJOKATs5fmw+14fAYoAitPtkRfYIj+bMpSUwaZS6Vj4qo1drpMNGWIliYURub8Kc" - "uWEJxFkq3sOkB+xmQFYZTfgg6gx5zW6jJQACrsGKR/o53j/BsP9fN8W/96onCkaXKHB+JBcB" - "J1afqSKiOlI5bdM4pNIUHg0xq/gFm93KZtdy2thg0OBLFJDA0FueaLqY/xPtDlJp/0/38P0X" - "7VA8nrxnqbWLzU+vZvBQHuJEQtiVD9RhtHEqIzFUR+EtBks1Q0fAwFWeGQKwiFcdZBI7zlnd" - "waOfh6EMYxiRjObwjWKEP6c5z4i4eewnOI7/66b4Sc1y9Do+oS76sx56UA8tMvCVyRoBC+S4" - "eCUmXhONN6O84WCYCDJOm0ayFTfUCAIo8cJ1aeKcv/5Ev5PUI2J8TxA2fa8sLP1QCPaAiggT" - "M1kfyxWRxsxyqoXPKtk6GcuFacHE+29Prlmqb5L6ViB8tjYkeboHTg7h8FsbYva8DyVOdfXL" - "yefN8Tm2T74Dkhz/vKQPJsfPdgz9Fw4o3E6M7MW1Y7tSvDimx8qa5XP2tMf29px9Fd0TTfFj" - "7GvbNOuIv6Z3I6pYd+hk3S03tvLZ6uSHBUasrMPBRmeVa/y1yTPiwn5yk9I/b+uDgSEPQBVK" - "RD+iDRYGNC/RnOhOb9ZQZWu9mZG2HU2ATC4QdgdpkZdOS/+JNGupezpmTNrU4MQnsKRBExIL" - "6IBMFZWaT+1GGKogt5G+o7xpxrhdvqSkbmpiB7utSzwKxD8gcPGTwiev491FP1FsI5Xi0QrH" - "eF3SLO4uGpZiaJJKmpxn5/FSdLmgKxMRjf6wAf1hHrovnf0GyOhNZw16oNM7cfp8K5d8BHDp" - "Itb32CUe6yeipUuzo9Z7fO+OxyKXeKl6wOQ9paKZDJ4II8cUodRxI1ZEkhAMK8PJO7D5iEXN" - "QH2OvBCTWhTYj8dG+nTjmwpFoDiIDgOVTLDyIBi1CvEDATw74abm0ux58ic00iXcUY86vg9E" - "b86zbFg00LCMnbjW4F61choJiwjwUZ+uxYj+vDRE1l7gZq5p7/R2ZmyEojrNByxeOtSHGLJq" - "SyFitWbgzFnzjsR6od5lt868m6Lc14A69te5DN/KxCg9/WU6uh3dj53scg7vjTJcHfhgWfQ6" - "i7bfmWdZyEGbJ/jnzgtPRDanXPsJtHTpWReA9sZuQ1xYIdVI2TIRi5o5CqmjB8tNnXrfJORr" - "9hEsOt1kNCPrvA3tSY267g0OSpZ9r1K0EsmLtjKyjORkg61WLGvbOVzWWrFTIcHbOmhl2W+7" - "evd3JOB2bt+x5VIUwUkBeevNPX6FnGHBg3Ci0xfN6O1avwOxTFF6e9dRl4cjKwfZmYs8f0Iu" - "XXLXmuy8ixJO3YBFDhmTiVssttybWAbzsCRqMbwMq5dqu5gZEY6xKI6oDEOv4qVXBK6FW9jq" - "qGj1MNVEvYIvX1ivbs92W7QoNdP0YL7bEZ76YJ7urH4dZIVFx94752as/Xjnjp1wSd0XOcxF" - "/4ET7bMlj0VZWUmX8kdL3+OzluPV35xhQUIpfxG0Vf2aMp0AeLXPIRK1OIG6yjlpYVcUx84R" - "8TI5rX4qWr6rPerO3d0Un77xyHmLyt0RXk7NfsTdT6M8Lx+9evX7VfPNN88a3kjJWPDF4b4c" - "2Em5JjodcfMnYNJ5Ae3J1rvlNvUmLDDI4W+YOB91j6M8dmDHQNcxKPmY7/QEQ72O+wfcFfSd" - "sI+0ctvvKul04LuNdLpkQLRNGdWY69Gaax768MqUrTMqFGMEDz5mVJgce3Aye772jqRSAWpE" - "1/Ct4T2dGM+v7msIJX0Pc28o7mJZ7QDtmgP/lE3N9JcktpbONWhNhietm8qXYBFD84GJsxUR" - "90avgFCXbcOr35e3JFzJSEtZlhRbLTu1XXlXkWHvjjUFDa/jWvRMP57pOZKpBp5eiCDqGYlN" - "OIq0R+GC3IM8iz/2IYJBzO3ec8te2RoXtLkZfSi4+77Uc598TW5SZuNqrFdA5fKWq5J3r1hZ" - "S+cstCeDUrxenQ4sVMh3r4f2st8Y3ZLsxZtDjJqLY2iUUA7PpZ53UDQKCMIeOVhX2vB6Rivj" - "nWVbqJ8MqLVEJd6neKZxvvY7htofZX3bjfojvsrOz2cv+k3e3uOeXwnRCv8yPQavvfCqVfHJ" - "0nbSQMFq/qPDd+TMh/zrtxevkby70cpaOruiM9mZA1KS6LB4/7UDcfSsdR0B7c1LJxLP1+bG" - "rVxy21i13CrLJ9aPQbi1vze+xTT/Q+Pn8FNMOsbmEFHF6M3TbBFLY6ZbDErvWcI5KyJmYOQ1" - "jvipOnR1+/zAqSjZt8d/u77rjWELlRhwbXvuPHDQ1+Ia0DfkTV9Xvvr/OvH0fwCqtDFT"; - -/*---------------------------------------------------------------------*/ -/* Auto-generated deserializer */ -/*---------------------------------------------------------------------*/ -/*! - * \brief l_bootnum_gen1() - * - * \return pixa of labeled digits - * - *
- * Call this way:
- *      PIXA  *pixa = l_bootnum_gen1();   (C)
- *      Pixa  *pixa = l_bootnum_gen1();   (C++)
- * 
- */ -PIXA * -l_bootnum_gen1(void) -{ -l_uint8 *data1, *data2; -l_int32 size1; -size_t size2; -PIXA *pixa; - - /* Unencode selected string, write to file, and read it */ - data1 = decodeBase64(l_bootnum1, strlen(l_bootnum1), &size1); - data2 = zlibUncompress(data1, size1, &size2); - pixa = pixaReadMem(data2, size2); - lept_free(data1); - lept_free(data2); - return pixa; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bootnumgen2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bootnumgen2.c deleted file mode 100644 index ffcf6474..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bootnumgen2.c +++ /dev/null @@ -1,291 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file bootnumgen2.c - *
- *
- *   Function for generating prog/recog/digits/bootnum2.pa from an
- *   encoded, gzipped and serialized string.
- *
- *   This was generated using the stringcode utility, slightly edited,
- *   and then merged into a single file.
- *
- *   The code and encoded strings were made using the stringcode utility:
- *
- *       L_STRCODE  *strc;
- *       strc = strcodeCreate(102);   // arbitrary integer
- *       strcodeGenerate(strc, "recog/digits/bootnum2.pa", "PIXA");
- *       strcodeFinalize(&strc, ".");
- *
- *   The two output files, autogen.102.c and autogen.102.h, were
- *   then slightly edited and merged into this file.
- *
- *   Call this way:
- *       PIXA  *pixa = l_bootnum_gen2();   (C)
- *       Pixa  *pixa = l_bootnum_gen2();   (C++)
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*---------------------------------------------------------------------*/ -/* Serialized string */ -/*---------------------------------------------------------------------*/ -static const char *l_bootnum2 = - "eJy1nAlUUun//y9eBNOrF3dUBFxKsw00t1xA0dRWc6xsR22x3XanTEANtExtmdJqUlu+U00z" - "ZautglczS1NbZqysRMvMagbTihS5f1DxO+d3+58Dzvl20jgcq/eL+zyf9+f5PJ/nMYxcnhRL" - "n7Vk/Ybla9fQ3Q2nbVodt2Q9fe1SeuLyJHoAnenuaWgYvPb/8zNxa5OWbFD9FMNQ/ePzGAsm" - "0JPW978zmv7jwCvDzMhpYcaGtoYAABhHhIdEqf9UfZFxqm/AsF2Tvqn/SAyfswHQ/Po5boS7" - "+s2NoTEbOWtXr16yZiPg+So/YrjqzYCIkKBog/x4uYNEJsClU8MZ/kpJIiXLlscYH82gnAeG" - "KaVcORwCQaiduA3YX1yaFrQwVSDRO/GpAwbcnlI6fgVdDNT/S0TotJDfg7mpfdqZWms3V33R" - "+7Q3VKTE6aR9lkY7Ok4gRXLwQiJEh9KzIqtRvvQbSoT+rpeZLz3vJqyEzchHeUAzzRBCUUZh" - "Uw+70aT8Q3gemqMHsiI/JhcueMvy5yYlyXAI/IUILPcLXHf3+YdMDJS71lDq1/Z9UDcOiT10" - "ggruh1pyp8iYz7BgS64Gze4+7EAKn9gerzfL1dBIEm06d9U+80o7iwlpobMiSr/M8fI9cfcJ" - "ftPiLfedfvmwd8kn0DxhxJalK11uYgA8tAawUH1R+wDwy8E7OgHEDDyVBFQhKPrWQRQSWmGI" - "yM2jZXxkMShozk74PMhsIQoRGkTORk25cjQHp09DXcR10B0Iqo7c7Be5AKWBQiVNH6pkJXOT" - "cmUCKbGTCIy6OKGMpfdyKQZrvNZYkBqtD2vPLulNnbAmDGAlccV1dHGd6jsqkKJEYS93/DeQ" - "w8GFk1w/5wikLJInv05BIHwjEFJwws2gChO4/NLK7HUYZQxGuKdOM9yyT/hlyzkuOgn30ggX" - "S+qAIDOol8VOCMxbfIrLzeVym7htquldK5aV4tJbvRky0JQI2sX0CgyBiGjTzPL5tHKMaC+t" - "RZsODqJm9t1UnURP7xe94P4NUz4dCJFsMDAYI4la9XFCseUH/JQpVXSk0mcE4XBj2KbiTy4x" - "J+esvrnG4oSxaZf1jT25M1yWdxXk19uvcixbnlnw7ZcnZ27Q8KWe091K17VgaLx1mhKOfTRb" - "6stJOtHM7qdZXV2smtMQIMlxfbh0/yrxJIifngd58U3JkpO77t4pEqQ2nzlI/Wr+1f6VuO1e" - "Zfru1DHn/njDL6zf9PspS4XknNNrxqXg9ksXJi3Jqfyped8bwt2x/o9la2RrMVQ+WlOZqL7s" - "+qgWu6ZFDyVSxfsoRdImmpBgQz4Tm4AidagoNpD8mXMiifxsDrn1KkNBPAYaPObxBagByDMC" - "txlx/GAEZyckegKua2mFytDUexgAX50GWT/Ahd1zmDoBzOwHsK9mGqpCbZp00QSfrenUJ+Hj" - "ZiW2tVnOpcw3ohbeMaFJ9T+Rz7X9ckoya1ezffOVt7UvXoyY++lFyOnm9rgpUyqidjAycz+S" - "3ju+Ln3aQ/2Me6E/dvm8F3fGY5iY2hu6Gsq2D+rH+p37dYKa1g/leV8NRdKvHzZydMDe2T1x" - "0stWUa/JY1/rpe7eUWiXWpG2XPje9K3xg+lJhnMucfbPbqA8jJwSndaOUK/k+r0vv+QW7Jrz" - "p/V2wyrnPMmoIAGWRnuLNxuMA8O2bBDoRBM5GLyS+VIA6qRxfZQogGyjB+akw8XgGzLMoPgn" - "8qUykCh8S6OQlSiXXcErJE9NRmEiIaVw6xcikXwIpnITgGedAlT1Lzb4fjy38PwVLI/27q6e" - "M/3pVvPNjm6teHyKXD3zVG/6auYMHUnMExXd2qGaFHrFVxnjOxnAycIMUdHuHUUFO4JuC4KU" - "AGQf28Yqq3ugFyWTwsCl9RaLZk77BcQK183V+3OtCTZL7HQSvnRgWNX0pSXp0gJ67nncqP01" - "E/RcnbNCUtkXgs3xX6p2/icRTyoouXk/YP3q8nsG5JVdWVXMJWG9N176L63N9d/ufmBa70NG" - "pv9Nnwd15+ch+wtrCt7X3x99ffnaRy+LFhWft6uq9rK4s4z/HUrtTX4IIU1DOVGTUa5FZDdF" - "UoWpUOmiGnT8SBY+IDaBV5YohxHQTggzuiKE0gAY+WYl/BbGdnsjSVCak9pZSKLCBHnLAp7J" - "nHtPQ3Y+WArtHV9N0e/4Cx2bnXWi8NBQcJFEvqgoc4d6nN3CpR+aR7aNYafCi9kGd4H1T23Y" - "UQoufsYGvr6rEdBRCzOplNpGrGTt/R4elFzclGmrk+RAjWRxpUxJRr6F0R+bTKzJIdseZVK4" - "8T6qd/m7pThzBDdC2GkM9cJkOY0fTtdz81NmSHE0YGan7XzcnI/1WPXa+7vZ4OTwJlrO10l9" - "/EC2kvWranIAITn0XPui1KC2NQbOTht/cjJwHnXJLvpAZGBY1I6k1k9/SakZqzIem61z+CNk" - "VX5e4M5fE0oz3tCTzIoOvprZtc02Dk6fbhLo1wA1fOL0nG0lP8mldY0RjnuS5zchzMMpAouo" - "vdkPIRBrEGdqHpBEFYhBBCcksihkEyUuliBEwuV0sh//DSsaeKL3JK1LEYt/yN5Oo/DX0/jd" - "KQzGePCJO8j0I11FBbLDklYfdg1wZja+fWYgAGTPH//n8mkPsCk9U/sEQJ2K9XvlAXPpGZ2g" - "BrzS/p46qJHY9QXR0ZudhbFmj6t+x/vWe5bQz8/egsfveP3m19ofOtLHUIS2H89t/iN26fSk" - "EZ++rhJuLB/JiQSHPdvxsiHTOm4rLvOss97MX479gaFx1835dUxnNDTRmkcU/0osE1wTSDsQ" - "gsoU7VSrLKCZBUEfWfQSm7sswIIFRHfybY59AZ1ya+rEiXIgsgYkEDYXTkghKdPEH9jbP7KT" - "ZU4hr2AlAcie65YVEmb2Acukm//3M8253uyuE9PUfqap9/sy55B62hJmThl5f2ZM9ZTUsMR3" - "jmZ56anN7048sv96MutMUoTrlWHtZTZTup9d5Jz/9Lz4+ssmxqmjcUvQS9fmFJzqBqjDXe6I" - "toQ2YGG0N3/LwTn0/tcEuU4wMzQPCJGlSTkCfSSNZgYR0XAGRVmJu1MTaZs6XCyX0oyEMC2F" - "IW4SwVaJAfTPH1EyuYHJiPwLJnKcIl8IYDMINnMjpzCUAJDX7S6zqWxagAXSPilQR22r/ri3" - "seeSVkDjTx398SLw3zWxnJct5dEQXqCQlwzxxG1EGlLpJaxdDeU8J2dfZR5ZG99GdPQiGCRw" - "4+S1hD/0gt4Cc1/bjG06scYDq1x7o1enM7Q+5bWedfE6KZ8yWDrqBBGUGJlM2mknyDMVoL4p" - "XDlYxr5WALMYyYTmt5HJ1Iy/wY5k+gFWrhgAURj4HJh+qIeUub2QL4UjlTSECBjkhoFzLHFv" - "sSza2/0QMn4Ni88gi2rOo/H0QKi8JrI6S8zjN7MEcu/Imi30XkpGLdjRkWYMGUMkIE7wuaWs" - "1g6Yv9j9bPnG+yuxurX3fPPB6dB6mVyqk27OoG4FDkEr2cnknYcFeTapaDFHRqMrcaqnALM9" - "aQI5k0RbK2xKcW0lxvlw5wNMI2M0NpIrlxSq0nDlBFn5k+DtWATtjV9tIP1py7LQH3g6IfgO" - "ToACKY+F8FKEPG6XqEBKZCHEFGGlAqqUnBL8LBDswRnYkoPu0dtofghtqZD1ATj7h3lKNX0r" - "duXrrr2d/4uZ66kRzsqRllIQFqPbqVLGyZaCeQrQHH/sx2SIMgWii8tlOfBO5XmmQjVxURD4" - "W2qZvjGRPAorWnu7HsIaRCN68uCA6QBBMgvfnBbZaFCeRVLAGSg/sZkopPUNl84WGMqBeVwf" - "uLyXvZkVCYNpOBBvQK/+EaqA66tFVdtVkbP042R75JdtczEoHtp7teVg/KF/vbFAJ5SIQZQW" - "IoLCkQpgJwysA0kebBlrBw3iGaYwFHJAQmMpuUlQea1A7pP7KkWcKhAI0nB4W34XKgpfKi2j" - "qUbr5vOT18zDz7iEBdHeoK0HJ/HfC0sqhvJM+ktbJM6MXY6d7kH769tinKIvhR3U81U4j3EJ" - "uHZ+U/uVrfcnFzazBReytnx9fMayy/rGYf+nlsNX2XpfCJvLG4FuU+B6FwVFXDhz1Q+Lor09" - "qwul/ZX3OROfT9IJJWQwf5LIkFQpCAUAxwxVeS67rQkWIixyNlhvIgwcJgw0Ef7IWEycuMkM" - "2kShL5YAei5ksjKRodgHOALHt1L8RJHlllgG7R15CDkghoF3W6CKSBadHBnLVQlWsPhyFxJP" - "5WdsBVVlBetYpFpinM0XAoGUCzajHww2sD/w2ArQVdnJA4HHb0aUm0fU/Y5l0N6b1Tlff43k" - "1djcIzoxDHjzmGoHE9WQCqmfHnz99/mnr3LW27++PilbMEZCtz0QcSrpW1WGwxnzX/M6J9Go" - "1y69ah850yRtDLJX3Kpn4RhB/YC+2/co9wUrcLIzdX88FIhl0d6bLQbHVGrvuoShPQ/0myrP" - "EKniVUUWjawM6qCRAuUC6TgSSmiuZW+qFXT9SP+LGJdCFwoIBFCIEvndcJyE3iHUIxEAY0v2" - "Ausp5Swsg/Y+PYSaldfEXHKB6s3QwTEllL6iCWmeEE3J8Fcg6mJIBwv6yC2PRMRiMPjDGEhB" - "JL8WtOEMOtJmoObCL06QaBuZepeRfAoHrCulnD5Ea07FQmjv1OpZ5dAHQRW94OsEMX9gIVFT" - "bMqnqwZVqJ6rJGr0/dxGfplriL0dvZGP+8pgJm2wVlxDZCGgJKBoPTIGnm0xdZ/dnkqXUwse" - "N09KXJHssc7QPMVxPUp5tmtu/NeVU7+YvjBteW3Ee+G9xsBmrCmWTXszV/9lSh/buYzlmTqx" - "/Xcf9FZ/AmhSrjCG0Nd2ZGWDN0Nxai1XLisVSB0saOnNrSn0pYS4OwRCq8gY+kijl6BEkNOT" - "Q8ATdnryVnF9FKocEhf5iphxDSDbe7691uTyAkulvdsPYemnoYrWhIEBZzlukZ821d3ML3Lx" - "mOMW1YKYplMljnrJhhUrR79vqQopMFhksKE5t8bwkOnKkozue1MXb2eaV4k+vCE+rhMtmLfP" - "Y+uKmC5z1psmZldy9YVoDNN47W1/CGUuDVPQPxZMLBpC8xXSNqtmE9NfXpkoo3HS7DmgHWhu" - "Dm4zB1EcYysQ/IG9kV/0jYRIvITEZIj2Afi602qO9QG9eCyA9navTnit+wDcdn/aOBSAMWVF" - "pmI6KVRy0FdwIv3SvZq1UGuI8xg2MRzv6Ph0zrSlLgeYP5MMu5p+3isWX7P1UnZPOzajgjGr" - "tXHcyLHW7fMMdoZhAbQ3+SEsOjQAmq24moFRFbYnvMqgMeFpuvtwDwfDPxK9wn8MDWFk7G3e" - "tMiLkDGKFGB2m7R6uE+G2w1KgeM+W7Kpv9Ok9i+H/FLIV0R2O59dXBK9YtGio0kgecX4XRt/" - "IoZjsbT3/X+BtagfK0A9WegW+LqLxxbjybunOhef34ifXR9Zcuca7s24jKTy8mbPLQc5a01/" - "Zte6m+Y4HXIZTdpk9dP++MiDl+eEn/0p0VMyL5jc1XlRvuTCyovLt70I8f18Eu5MxiU99nEf" - "vTwKu386XvuUgDQYCxZMd4d1whtIl+3LJCo8iF2/dmsbOftMOuftCp+SicOMcpOk3BDkfrxb" - "DiN97OOQDZNarhdHzJqGi93g/Ee2Epy34aTPQgOn/cWdVNpv06hLLlpFAVgQ3dbqQwzVA8Mv" - "tHqgngVOLlmMF8bvOX5vbvjIPMTO+ViEQC8MrFh5ljdxS7P7MeOQF5m5CkPHn9ej/8knPzk8" - "93nIz5ZlRk3G4bTmuoDJ0cfOdp9bvUgOvGWO0edwqrGNOON1W8r3lyC2O889rRPWwHZ9wH3J" - "MDED4tTTJrBny1ztTzptTGeP6BxhK6mkBEc9CjaB8XtjZk7JOqqXQdwvuSFFEiY//tRyfeSe" - "C35eCYe3+0NvvEgBvJbHRlWnRzR+3udZg8XRbb9ex+oQxlBfRYplORK+NGwHDhGxCBCstFZl" - "PqhAyrC46hT7pRWGIIg+XyyWI3zBtxyAfBEFYwlQo0mFSLDCRK+DU3gUNtSHWPylirWqNWeA" - "nsfBRsuYI1gq3fbrbfqoRjY5LtKKyj2ot1EI/He/Xv5NJBUYIzgbIYEqtLIS9o6G0HmRM2CE" - "RxWi7hDKvyuUTjdGclgQDWX4K1V5nhFHtd4+rxr3oneUupQ91WZYgv9pAUBDsEQT5ZiGaWyL" - "dCkNOrExp9n45l7vjLhYm5xLXKlns/HC62OsKiMMYuWHYsmPiTsSCdltDZOa53y7XlCnmOJ1" - "4/nKYNrLqm0V/IUCr3lnjafx/gj7j2vnFc6o/PIExaasqyXAxINeNg+fZ9IxkJ665Qj9j+mO" - "Y0SsTpADsc5TE+toY1/GSKYmejbjsrOd7TbuEA5nx881N19E9SgyHUn5+eLFkb9eUKzk2N5u" - "0c+JaWaOjW+8e8LNV+wtsj4otT9iYH54ExZE+1xBPd7M+0AytuF8dQJhaMbbV81446lG1549" - "xgiL3o3IaMLbaKyPXFQkUP3aIf1GBZ662pyP2ie9hdWrfWowhA4vjd7xGr0ygZSP46QBCAUn" - "RCGGMxCKQmSlBUPxTSzrBJG3MFTLYlBYYkBfD7gYYPm1ddz7aqxm7X1f/Rn352Pm7ZxxOmme" - "+M/PGEJwVsJAK+E3CFLl/x+MoWpSubjuQZksRyQtgBCWnZDlCbGUjGSlWCYHESkRMh4NQaRy" - "EHiw3GJG8q16bKrvqZu991cjpfUO9UOLTJIHMkG+FHDicJw4nRyCUgR94i7m5Up5Hghvs5Cn" - "gHgfbFIeeyTL78tkMCKFIeMsCGKMR1mAxV6rv0vf/OaLJdDe14dQs9AQDGztjLnPNGGzLQhS" - "0x0PjUsyt1dGuTo7TwpL/OntiG34CTs+dGc2H14TLvF1czkQ5TP2/cotXsuHTymtqLo8fmJ7" - "EzXrV29H4f1TjheVySnUmTdHlIxmvMEu+j21d3T1ZDDtA5JMTNQu49IAuWsyriB1Ax5evDtx" - "xm4xmT2qPt0w8EeG64WL3UXlVRW3vBe22jpt9Y8uvdAKchOgN39dm9OLVaxbLV7HzVyN4qmD" - "OeIwVWrFrl+0VdZmYD/8+MiuJvLvxzYCszNK2LdGpk981zJD397H8H1EVEjET6fSA0ctqfS4" - "3eQ0ph5/OGn38xLqFXFjZw6tus5+xrayUicsjG5L+n4TQEPlE3SCGeh7mqrJE2knVcuU0ZPW" - "ueqvPBlv3CBestOgZOPdrbtrl2013RPKeTwi2+gAP+7pw9cLVidO4G3p7G15GRP6qpGWCKMG" - "TFHpCbj2scN69NSLyVge7Z17CK3aGp65GudWJYh0izQpm5iYfj1i9rr9L7duPJm6Cz+fLYsL" - "6rZd+U6alovA8zKvkPRDlrJHU4t/LClqsa7oujUp7+7IKofskifvDx+aOD9q+OS95tSeZasu" - "ygCX9QGCisLdzzFkXtrb9b8gWzDQr11dPIzPJoXUw1aJwxNcrlw4aWDFHXblGDlGaOSVyF0v" - "KHkwfAQhJ0I4vXra8iNZF68VL4t3s39HNFkdfc09GI05ErNk/2v/3zZ8XSE8tL5q5GEob2NB" - "yUd9j93+fz4zf/obFk57Cx9CLuL7xzh9tSuFaQIzvVLGz5Em2nF6HEjtPLEY5rA394DCVhAi" - "kslUBiOZy5W/wk9cDaFPycpOpqIlXh5bmViYLW1iCVtQgPMf6twKxxCsS3pp7+zqvLG/+NcR" - "euW2ThhzNYt+dZ8HOUQaRrr8gq9Psv9tVO7OB6HXw2c07LNM/2S9g53c+mZvy8KQw6E/ly8M" - "jXK+yrcbi0rONdLzHX3cEnasueWNTpjKjJB47YkizvByWPZqF9Ur3SuYN4yO3YD00m0Hvj/o" - "sa/41epEFj7o/znSNHsE9BW2bIc+roY+iMhyWFBFhBTEwlP2WSbeDH/VE5KIZQqA0+lAiiPs" - "rSQ/+0Y2ucv0Px8v74SB2W8pt8Zdy8Lux3v9Txf4Go6Bmv/Usr72wtD6GxKagDHsuAPpbnbd" - "zEmkn86yO61dL0RIY0lHKGta5q5zsQ7uffb3+gvDH4aPuluf7W54N4nQHVJwuqzC4djCLj8X" - "B/MJz3FYFu1zAXWpub/mv5plO0snltmaUvPA/sW6GyY3sld2JXw2MJlV/JYdO5nhdOiAfnBq" - "UruLR1twVvAHovSnmHxOah1jBphi8XD09cBJDWeOHSr6JV0yXSjakfzi+bJkn/qlC08t9OIY" - "lZ0/isXSrUWvP9VUuLqa6oQ1mKR1qoYaBekIFLashj6KyA0ObYDFZXCGKqEkCEXGUCnMULDK" - "ExUkjgfJPj5hbbxPY6VMnC2tZAE3xlEeziwlpGEJtM8Q1KG6f1k/5t197aqv/ycMzKwpVlcp" - "8GJ4ZNRW/d171t3cOergnJG5jitvekeeDi7MIOtJr/31TWRdZtVsnDl51PVh142yowPl77xe" - "LHctnNl0g37BbpbLcEfv+BMT5ZOOzLs3nUCr8vBZzE+fehVLpn26MISFsYZs4UBZqeaacV/T" - "dxA+vQScm7h8Pz+EPY+u94xhr6gTvFni65z6yzDRw12Nhp6OS9Yd2HOrvDJOOhtcUPEeav30" - "bD13qiDE2e+Ry+5pDe1L/8xsP9fzS9CUESaP9EuZAVx/6uccLJ32ycO/cKHBIDejbyABNE4n" - "U9jK6Rt8gjZA73JafQdBSJwC0fLISgZTNfrUPaQd8ZColkn5K95HZV7sbOlXFmD6yZ4/avfU" - "AxgOb91W9v1Bbld72wqdOEL/GawpSKdqBlWSu2kCEgpufMzjF/UCSAcIsUwYCh4Q+pbGvESL" - "S2DdqUvJLtpO5dywR3B+whwUuCB3XnNyfdtULIRuHXn9D8NiPUe7Vb0G4gfNtsxvai9Nl3qf" - "PrUY/4e4Y/fEVO5vK8l8y3uEnza9oWeEvYDjrcbsOhRyreLl2V2NLZVvkzyPIMtPBg9vm8xo" - "vHHz/gHFnp1i3nPnS+04u1xXH75+JwGLpFtfno7NGBqkeZredgczPt0iXfrKuWbSJwrlfrgL" - "OcC8JVRGKLcmH51eL6t5sKohFC6aGvzCMdTxmn7UV1rl79lzwgL3xvXm2ln8kMZJbFvRXQtf" - "+Xrh40XDb2b3VvF2NbK6fI69O+OGPUblrX1+oF7G9a9LWyMcKrVC8/Dq/qC248HDX5HiRDFf" - "KgIQnsEhYCJbiENYoJDFdhbL4LtLgOMEi0uL+AIJXyAFOT1EAH+LYpUbScf2FXrr1p2nY76m" - "UT2wWbugpthYTFcF6hzXqDLXrFZLx51n9Wf+wU0fNzLyLifqpE/VhV6vNdMTndyg4WHO03Bh" - "PV1854PtzOWBCeT8tuyEfRd+/bP5xqMiqCllc83NI9dnnHn39h7rwxwfy7Nu+l1YNu2zA/UT" - "6d8B2N4Qr12tQ8MWMHg+xxigQ6ESEXgieu7w87FGI8PlThzQebjzVeoP0DLyfYP4Hpfg0WEH" - "3HB2Dx7dPuQ+yaJ+E85a5JxJyi5pxorXPgcwGxQfUvpwp07ipw2uB3h8Ka4HJ+wUwYwuFr4Z" - "JpKfSRpwk2hcOVzGvobmwKolgegUKoYswft+/HWs8FqYQPhCM4EUNHoSi8x5QvUHgLpzvjeX" - "m2+mYmm0zweGUPDT0Aw2TvZNjqIgfuxR1fwgd88Wy47zpQKcQS9uvQ1ZqUpt0raDackgWAqA" - "dpH3icAKvvmCwt5XCqxu3Qr5Ohb9/s8Qik+iI3Vi1e8yAJ/FfiyuaxTXPRDLCnZIYSMEpgph" - "dnmZjOaqZ7GWyxU6jCgkWOW+JHoB9HK4umrqqO8chdRtW78/2JaNKtTucIpG/GDnba1qCNFU" - "Q4hVTSSnMKhcuaIJQDpVA0kZj8ACOSNyjB8eXZa+U/A6xacwluCzjUXqBJ+cB2uWAu3LgWeb" - "+CgA1BYEZVZtMh2LYfH5n679NSwDXrj6frEpnwFxZry4Fl5lSK05vhhcGme+bjT3ugVlYsTU" - "DrftReyb7qTmu9KSN/tXZp7c63qv6Pbpq9/21P3nd0JkbemCbYcfxkIxLteVjXLc6Qq/6lXz" - "YwKwSLrZe3/oVcy7o919ABqkfxRq+AyAUz+OSkFmTtjp5JPaBPy+f+ZOo8fzJHG7owJucsIN" - "NllbR46ed0gaTup0If1QOGLG8d+8Mhptjk2ZvuylctKjT60pSZkLj3VWNsO7m89ex4l8/R2X" - "+OBHY8m0d/kh1Ac1ZIPtxpJaGd8m3WS8O8XfIBQVtDEQdLMQVUA0vmGDDdXDQ7EWv4sFFH0h" - "QjXccsiZ5j7ey4PCXZYkhgHmUofj4FTb41gE3br7dNw91iAMHK0Pvc80FNNJePGOUaO37130" - "nBubP84man7+ODl+slGQNymcSbm6ZVd+hmnGmnRa+QbTk2bXetcsmXz1ZOlI5Pat2/wnwEHH" - "P0I9Tlm/if7To/7Y1ky7ovyxT+56npiJ5dLe74dQ7cQ8GlapKgHZBiJgK0GIEAVyHN47kH0e" - "BStQD6fH2wOB12BFLYvL5U4s+0CH9r8nNo+YWDi3sBv+ZAyUWLi22m5+GYVF0G0DYIhh7QcN" - "QmUKX+rRghO2wCaMI2iTuC4n/TmLwm0j7kR5BuUiQYlNRr5qhYw7g9YC5TR2Ay6Vx+BKSL4/" - "qo/fEA4h0PjjQUoA+RslAgGnAw0uDHd8iEXSbVNfx+WyBmmgjrHg3kANmmMx0gY3sdPNNHTS" - "rNSdntPicAdHOu7weOd6dPG3wGdTfDJEwufGodnXix6vs3bZWrLY+vN2szzlsS+P9FLuXMFf" - "zgx5UJ6fcf2T/pg7/u3Mtw/LsFjau/4Q6hiMc5231BHDffB0ZKVMQUXQDdDXAmY18VgY9Cmm" - "HmToXRSsaKXFtbHuJCop6oIavAkAaiiP5h+mY5tIfbT3+yH0LmoUTx9sR0DTpLROWEjLYpED" - "GQpukqwoVdqRYyhEeKPJ2egeboJIyqNAHxKr2YDT4ytpxXgyjRBvJBTx3MiBCuDZHINLN+RE" - "4CHOc2fhBIoxFke3DKB/X/jyO652fQganMFCkosqAwA7cUJiFpFswvMR16FvAUTBM4F6BA1K" - "Fmj4EqU5muxloZUCKTOyl1h4m/ZBlR100gDAXuK4Ge4ZiTVJX918v7/Cx3VYodsQGtx1RZQq" - "AieShzPD3zWcmyTnCKR0Ek15XFynEOGynu9h+JlAszvBU8RTpfaLCSSWEhjfCZZns9iKAhQH" - "jL881uawvl4ulkK3Xj4dD69oKJgaimW1MjQA6XwLiWgeFN7dOpMQWmGGvaUAz0p9TOFVy+S+" - "yNsWiCXHXT1ifwlf8W4YVrBuDq5j86FG8DiNYKdaWVqBVBIrRNhduUU2GVQ+yYOQmuGd9gSY" - "GehfLRPYn/iaEwA8f0zZNv0cD7u88NWtJ69/V9uw9dBDneQObp5UonwpsRMUwhBMpiaHc30U" - "HEFRL2zgwTiPxnObUGJR7CoG4d5nkBk+1S+8Fy4MLFxYOEyVjyztrVU90tebHHcd3+JahcXQ" - "3pwtB4eJYFWe39CmK6JIk4IILCRClhCR7Ey2DQxnjFe2cBNQuMweUnAb+ak9ArLzKyh6iwwm" - "Rp7Dk+bxa74hchBIljhwM91P3scS6NZ017+XbW61dbZOBIOHnZapD8aqUgzcBhCRVhpDH1Xr" - "DBB/kcdlS7gdRPYmtidsI+jmb1U2eAPAiRY7w/CNLdhOW9//6VJbI3qw6FmpipJE1TopR7XU" - "9k9O5ks7KkFIpEopElBcmUl4nkBgP054pkeVaShwcXfSRESVNYDMOgWLvU35XBUrZYaMHGXD" - "b9grFXx166XrT7sL5EF1Qxs+lQqBlNhCJBiMVvsWX9oEg4w1CiegAoJqwvNshnmDbyRyEYDE" - "nrqdxvagAjV6+QbuPbQeIvDAw619b3ZyPpZAe/u1AjS9tcCRRz06EUwdJJCJpDm7jZGcCTZC" - "GF1Cpirf4pqthGnsZDkP5HhQaPz4pc7V2c4vs04oGTEsozoK9Xw+TVnFUJwqxj2bhchKP8OA" - "kadfiYXlcmz51ld78yUMxlAgAHyuFQyzoWu3v3ryaG7niuXiyqxSzwtwBsDrB8Pq5582DMVI" - "YjK0t1N1PaM/0USvSYJ10kT/x60btUKpCALTjA08gaf7nWyEBAJdD9A3dpi07UwW+h2B2jul" - "Ot3pX5+U/Zmv3f0OGoGOg1tfaVIEJhAY3cBElkERbEnHL/M/zwVwJsCiky4/r0kzV35Hovbe" - "qH6uZn0SI2V7tTto9H+fKxdIJQidfGKNOEbA9Taj7c/Sqg2/o0l7A/wXmqwGNOUh4rK6NP2j" - "uH1j2HcB4LhsmL7J08uHvyNLe0MzGhxuNrmGhKE9zQ6+VAISCOQJMlUWt6QtSI+TpscBQRIO" - "GOHMqNoDuXl+R6L2jqV+3e+5s6uCF+okkaqZpYHiuiRVqMQJRSFOhc0gkcxvEuOAb3NoM/Vz" - "w+jfkae9Nw3hSjmNPOfBwZZGIAhBM4g2mpwSzQ2jcdtwpDmM8eHsjTggf4GlpOfNrLzviNTe" - "eIZwCw7mM0SROrG4TjUE6YgMEcZ0sYU4PAW4TbP08f12re078rR3FfVrUp88Iv7RxyHJU32G" - "qsdK5RaKGPjF3j4Mhg/DPxEAqveavxZOArFblUyG9j6h/vT6TW/8G7O3OskbPThJ+pt37YQE" - "g72xSUCYV0M9cVUbAIN4CjdW7oLIXhGAn2yoJqcntT7FitXhzrMh7HJhZvQPYjHAgQ3Jgdxj" - "MDf93vZIIBTSg+iqiWLZZtd7ZlTsd2KhDheZqV/reK2nRuIITc1elcDJcISJ5BX+4sRciy5S" - "Fv2D6vnb8u8KigQ44HKFS8rnEfn7vyPzf3o/mTfzoKt6WacpjiSJyxL5KkWConxBkNIA4dHb" - "BQ42rt3L0ti8Few2IqlXcCyHR05RrWLfKelTJkSle39HsvYuM4S6rkayn8Zl0IKiAkFfN/ke" - "gSBf8CI/SOmbpthG6OmEvnTaKME9/ga9F5fJwYms9qA20KurhQVci7HfEvqaNeU72nVrexqi" - "du9/ascRyDbkPJvXOWm5AaTPHoqCWtmio1Ia6amHAnecxu3CZah04yBBCYqbu9cu+v3ZJdiL" - "GJg63DA2hGPmGt0DBZAxd/pKg/r1pTgnpu1L55YTXQYWIxOCTkfqSR03PNw37W2ce2jnnc+/" - "uvrkSsNjEL/V/wmcAq8ZH22x2GfzCRnuwF9eH+9dLxz3HQzd1lg6XmqlwfjvDSRH1LMPx/7Q" - "iVLID9At3Dy4jP1FwSKS/wI70AK2wjSDxelQn6yt+BhZncKX0sZWwxL0DQwA86/7/HQ46gv7" - "Owy6lTN1PFKqYdBsBvqIy+vEQDBbzMuIRR2E6GoI7elrxdwHFrE207uOqzuwVMsW1aol3kee" - "I2WQ7VEicNXUjna+qxF7Kpupw7Vi6kyiv0biIRil3Y6GRr3mjEjbV24Cl48zIfSYCLtpECpI" - "ICC8YULUCkLFbTC59xS7EOZ2HdcDDrmRp+EDr2PPiDB1uDJMXdTpr8Wa5/+mndVoBMcMniM3" - "7uuLH/eRgvOKtfRmzjqQ3eRGtv6RE7uruIjebD5y9O+BjTsM1i82ghUxU36u2Es4G2cmdZwx" - "dtgF7w2PeEfvkebs7Slf4Rzz7lPNiGVSo6iFIXXRMwrasWA63B6mDkX9WfGdl4k2OoFN1pzs" - "LR7W14H53tlm+PHQRCAs0cLJqnQjISJ2VENMxnReVyJjT8fHGx1SutWdVK/FCUEBtpGyLYpF" - "mZ2jvmy13JcXOvsw1XrUqGMi+6Ujv8PyP21b1rBoNvYTuPFcbhxfbwRBOR36VED+wouXwyG1" - "3DayuuyP9ogz4HRllaCOxuiOJQbz2oEr6+xNfB59xB7kZ+pwQ9i/CEia6yJ8FGnqgJRqQ6YU" - "ygGwUwRCClqRvANH+JH+kicHEHQpfamoiUevzhHzcmXw4tGwhMduoJ66olqAft7BmHWSCGLv" - "L2fqcCvYEHbzNBBsjanFImIxXQIQjCN9CopVacQ6MK13g/p2grd2ZOVXsNgfj15cq75u67Mx" - "xCqUg1aQGdpTigPCERez2i3vg74DoJsrD3EqhGqmgvoEnvqQewx/eOTOk1W7y/wTTxDiwaR5" - "Zi3Ws0ZwTNc5uO/YCXnyjpsvULJthpc+WEsuSLH77dVpXmw3MN3SbXYy1wX5J8X/A4d2+ho="; - - -/*---------------------------------------------------------------------*/ -/* Auto-generated deserializer */ -/*---------------------------------------------------------------------*/ -/*! - * \brief l_bootnum_gen2() - * - * \return pixa of labeled digits - * - *
- * Call this way:
- *      PIXA  *pixa = l_bootnum_gen2();   (C)
- *      Pixa  *pixa = l_bootnum_gen2();   (C++)
- * 
- */ -PIXA * -l_bootnum_gen2(void) -{ -l_uint8 *data1, *data2; -l_int32 size1; -size_t size2; -PIXA *pixa; - - /* Unencode selected string, write to file, and read it */ - data1 = decodeBase64(l_bootnum2, strlen(l_bootnum2), &size1); - data2 = zlibUncompress(data1, size1, &size2); - pixa = pixaReadMem(data2, size2); - lept_free(data1); - lept_free(data2); - return pixa; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bootnumgen3.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bootnumgen3.c deleted file mode 100644 index 6d45d16a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bootnumgen3.c +++ /dev/null @@ -1,368 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/* - * \file bootnumgen3.c - *
- *
- *   Function for generating prog/recog/digits/bootnum3.pa from an
- *   encoded, gzipped and serialized string.
- *
- *   This was generated using the stringcode utility, slightly edited,
- *   and then merged into a single file.
- *
- *   The code and encoded strings were made using the stringcode utility:
- *
- *       L_STRCODE  *strc;
- *       strc = strcodeCreate(103);   // arbitrary integer
- *       strcodeGenerate(strc, "recog/digits/bootnum3.pa", "PIXA");
- *       strcodeFinalize(&strc, ".");
- *
- *   The two output files, autogen.103.c and autogen.103.h, were
- *   then slightly edited and merged into this file.
- *
- *   Call this way:
- *       PIXA  *pixa = l_bootnum_gen3();   (C)
- *       Pixa  *pixa = l_bootnum_gen3();   (C++)
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*---------------------------------------------------------------------*/ -/* Serialized string */ -/*---------------------------------------------------------------------*/ -static const char *l_strdata_0 = - "eJy9nXk01P37/2cMM4NhZqxjnbGPsoxdhRn7WpGSiowlqSSyJmXGTgiplJSt0o42oYx9iUIq" - "FWXQolRoG4X5jmXcy/id07v7fH5/uO/7zHHu8354vd7X83ldr+t1DZ/jjghPnMu2vcE79gTg" - "tPnWhO722rYXt8cXF7gjAmeM09I15OMz2/P/+B2vPRHbglm/ReCb/fUtBPeVuIi9c5/oEAhq" - "uH2L/82X7LjGWoBPkg8EAgnY2lisY/1bkvWjBGb9AxQi0yDF+hdvoM2mYBBIQ2H2Bxxxlhg2" - "+2GIpWuI+Z7du7cFhIAMC/B6OawPt9lamK6Hn9jwoIOXQkDwDGw0ingk4Ha2/3LQdYOUAS5Z" - "YxGv02fz5aJWDCfS1qpIm5orBfgM1tlJ5/DKJ9sEFPIZ1GJDn9p0P7//NT7Pr1focR9flrM0" - "xhETaaEun/ALNuT3aFfACeQE2mql12jY+9lHtLVcY3HVjBwzx6kFiBPD+lGY4zxX9mPVb3ES" - "rn25e5H14Xo2Z5kAi9Oi67pbsQ/VRiK+Q15B7D4+lWRryUCdFLm2ojX8yZWr9pbnxWT87M5k" - "IJ+JiLrDd9fImOwQVtjvRFz/Scfnq9SWB75mntGi0UboD8srdnaYenBgaQNePsU5rBpT8Xu/" - "haXV+zXNiPWh1TyWN0OByQTVg6G4Lkq8IQ2uKEtJwuIGoxWS/EBoGL0BPJEEK7EwN26RQOD2" - "S0hjMPnckFM8WWQj2hiD+AsM2p/gIeq8Z/VHDhAdQCBSiyCOsVGU3wLRNp3uS2B96DcPsu1B" - "GZqGQ5g7BYmo6pj1Kl4q5ZVNczLiUuJJVFDa5la97hW8SloqsT6De+t7a/GhGzH7Vtyxrg2A" - "JDHwDZbiLrwwr+BlLmJu6apNtqdulirH9Fu4KOtcu+3SM+0uftVD+UwUSO2RTf3+gmE7DlRd" - "QKjii6h2NacEfwtVR//nKIr14eZ/bcVruLM+XA3S61MTjCQdaHysvUjV6LWVTHl6ao1JkamR" - "f1/7aKrr0QI7gpJDcr3o6ybtiZcvLqS0r0oId+P9HhzZZFb1otFldOvlmUQTfpvlAWYdlfc4" - "4PQAwUkvwpV7bJ/8LTjdktP7rrM+9JyH291ey4onKG7aUaOJVFE//hHaMTU6LXG3u+rGOuMD" - "cQ2r4lG1BxBq/IdcjhQe9StREAxvMcVcQNYmCK1pFM3zeffePzHGw8aUf0Y7loKAjt5Yg23w" - "HM0zctkRCXmMsx+wvKNE5WDUB8Qosci4Puzy3d9i1Ht1wlaJ9aH7PGMoa69SCHCezodn80O4" - "rVv9PHX7LG7b0CUm0R8dxs00sIIhJZ7WWoxYTOqG8gy0rvbLtuVOJl33TgnZvikZ8tnfwMVU" - "OHibP2nYqK4K1dGaftTexfywe0DFPuOA97fThTn4DP7XfPpWGZgzrA/d/r5BUeZO1qrdtfDU" - "rK7BLm07q/xi0soJtKrfVs+8KJev/XyiHxL413T4kzbmFef0xL9AXqgF82grdsrbVKzu8oyC" - "O/jaVScf1+TV1nmSn6tOH4Os2GlIuOIZ94wDzxAw3rzknVkWjfktPAOt43he1odb5vHc2+fe" - "P27a4XWO2lxFpRihMyk35cvgCsXFJK1bSoeSNNd8rjuccgJXngnvsPZ5dB2e7MwnB2nt9Bh5" - "UGyUnt427c17MuiKqV4FLNCneL+9lb4gnX9Id22eZG5oIgfdiv+10LEF3WmebvWDKgEaAQ7q" - "MtfI0pBSKqr1Sx1ErzuX5OM9cfOrRTA8uFDaf1RtvfIpIW6pJ52jzcGVdvobf7XmSSieFNq9" - "45sSWO/08Y93DT41YVfu9lDZbOeSuJFTvIG5lD8ImSueaPII/rUj9R7MRhWEFX17vKxMra2F" - "uLBqpat1K9w4Ep8WbQl5pT7MUyN51IpOc7V1EKLf5vaj1lY5T6gku4S9tD2jpmz5HvrJnRZa" - "v8tRda892vl+ztnb3+p7Zb/WqK7K8Rrl5+QD7k7mt2TgOxFlQO5k06L6zUvCnrM2tvbN8nB9" - "hQe+qXVwVVX51CPyQoURj1vDmwqdGitsRb5/8hpUDFsJCwk+nK/so3LOnGEY3bDt4fMrsgGh" - "cV2vbvEdsl+PYL7lOU7QjjyS3rKdEw6YRxFf3JEbKy/fAuRRNBY8ih+Z2UyjpSZIyDFAKCZk" - "kInrs5ELYDSPJRakF1Cph8FSfmQyCGQMXqlSeFeCh/OJgZmRP4gQbDOyY0HEOsoE4gki5k4K" - "fG3+oCKCpO/1VCF9Srp8XW+2BzFlzZHqcLmkd67JwvZ4i3qL9amecEJwr430xbiDz2KJH8Zz" - "IivuWp7/cWHfdaPRY7s8JiS6ZTqMT7g4qIu7t05HT7y38nC55zVgnadffMSUExaYHcEsvltV" - "N5pjAdmRhb3nnjYX7SGadplZaNfY9K4WP09hq3wH0SEYn0VQ5MjuyXdJsUebHLZxpbQi7c+l" - "N5hoZyW/4N/Z1D9u2kCcGA5wjI7o2vI1/Nf3OM3q3OlBtUrQrtfq53a1ns/khANuR+b33rXN" - "G00B2RGfv71YOBHuTh2R7ErqckPq1QZohmuFWBGhTlMVHnHcr6gmotF97ySmX8pK+HGmfLnN" - "DNJGvIM3WuK9mosXTFW++VGG/MZS79q3WS3k2tNF/Xd7bLH4+3Cv/fg6VMCjnbbcazabHeXk" - "BGZJ/iCAsC3J6r927Kxkd2HP5lejMUpFr8s3WXmi4u5YbJRL2bjpVqaBMux0r9qpznj7VORt" - "EvLY3Up3ka/c6ClYaovnhcmbN+4aHKpYrdnzmLBhxBzkxgkEzIOIsH5k5oBipoP8AC2cHTux" - "YY5D6pkwxyhQihQlhw6pJ6LEGLSxIlASMYrMADUgqQwtFBY6GIb3PehIo9FwNBBEgPSdCYGS" - "dPhxbw6SakEKu4k58asOx3PSALMcQqwf7BxN3fJ8RUCOasOCKM9aDhyGh64jlp2qmHuhIPBK" - "szJGPL8YN4D1Lmzc+FJLQ6vw3RpdzffybTsTDFdHfvTN7VAkta8Y4Kvgu3eg76HnoELGTpz4" - "r80eHx/dDchn6Op+eqDbxgkGzG0IL4Kpj9yXBCTMC7mM8fy+g5DsVddpc9XwpmY5kVTLXXqH" - "n+FeizvFYBV93HdIHXC6kT/mdjJji5mWCt4d5cHgaXtAiX0vqM9jFatsYJKyaUTx0+ESAnPz" - "5A3VTxeQjBjdV64DLTkcdNrAbIfIIt1DvU5vQLJsw96E9VNUOqweloDkxWJkp/DkCEY9hT6A" - "hSS8IXIhOpi4nGksxDZHIqn2JxXj88Q0/wTuOlGENPqznkGlE5kwUPxpzaK6VXJgThZgFmN2" - "C8rO/V94w4OpgDTNcSEStpfxUkgi3F1o/lJu+/GVX2g0bbVGUalJJTc/3OnlGLhgdQg+L8ml" - "BC2htb3Mwks9+afCzmr9F7vLn03diQwSHMRcSfHyHNwS2cenov9p86d0V04mYM4Cvbg+qtJS" - "KoCka8HrurOj3mH8ulknf/ICfpNqkmWWAAZnF8i9gZVI+2O3Ce9Aa8E7vM407Es+BvvxGfX4" - "TveKLeOrxdRN91v9jFCIEP/xrsblbu5y/Z7dh61WcUIBMx/Ci5GPLLcTDSjyWf4V+cDzkS9p" - "NvKB64n4GUgdDxJzMCqCNoZOwnKNe+I/wbwO5ptY5ZuEjDFJXJ0OOOhF01oslpXk+BhdsN+k" - "yMcJAsxY/EFsYAc9l4U0cnZ1SAgISSEl+/JYbilf7pjQofjVRmZtmvKyNSgbkY2NYZ3e5uHg" - "IIlM/i4X+e+wVXTN/gPyF8s35K3gu2wryFhhwvM2X/rb8p7dz22GBMR09JPMJSR3c5IBcxXo" - "xSXy6alU+y0ydgZptOho/cgkHgHEBFYSMxM7Zo0/CG88TGUQRLAwliDNrg18RgXZ+F0AEZ3P" - "gMGfmo8zoSD1/dp8u+6+LOJ8fmBu4Q9iATtHdJ5/fsv22tl0A0IqMil3sfIUuWjiej/J8rnA" - "RLuwuanRmhT5FzYKkXLpa07Re4N3nsFdDMTfRhX49364900qt3L9rUtSlNfXjN2kBK8GK/Br" - "Vjno3Ggn+jdwYgHzDH+gsmwxWqjxrp4LBwieLnMRVR2uoowOkFPpE2u1QbTbOgtuZ9zZiI8y" - "oUImfFINSkGuY2aVxIxVF3QS+k2XYwZEdvp9F/6I2JI0c8SiY/xE/LHOr1w2w1pvL4QORnBy" - "AXMP/0GGrNkRoZlJpSO/sGQolTTFskL1WB5EIhNDGR+PhiI+ktVoNMFmhAUZXHraFwU9GYyK" - "o74xYTjSQDfi6TCF3jUgy0Yt5zeoHeKcKMD8wn94c1YtoBjSOkFxCDICipTEDBN1CVOJ9MM2" - "DDkRbMLgEKlPsHHaJgeb9FoKM0MtPQCpx6KuJnqDNnboOWJ763ZyPL4OMEMAX3z8mj2o36tD" - "sFNZWfbjt3S20OANuAiUgCzUfL2LgjkEwnLvYpvQviIeL5ZxPiEwmUcvvtqDpJYYQH9gncU/" - "MI3WSaPwzf1xrZuQmWOe+IA5m8yKStwzKodp0aNyszGpXgtU0qT1roW4bgfnYwNT8v/gTtYt" - "FHjaF0qOzk7as29uhYZ3aYoS3xMbfYqGZTY5u757l1tUcfyAQ6+uhYKgkPGdVYaCr473qx63" - "cDp/pRwdqmKnIttax4WZ4aHz3z2z8p3pWUk5TipgUv4fFoO92xkLux3yHTknFDDcglAImQg2" - "fHTsQzZMwafWylKZveZBxtxMUBdTCrR+u268F++0NefjAxNw1GLcaXoZKAHIXjksxNM6lmUk" - "iFh2SR0ct7cOzL4sVlYacpb70GW0QQuo9jw8JtH8sK6KVGuvgHpJFHJ9V8KpcV2rAT58Qerz" - "KuZpB54ttZ4JERDnNSoiYzx3Qjl5gMv2Hy7HikVnVUYFc6FJM6YQ8+lEaAKTMIVKwULGPRWj" - "pFNeFORgY5it5mXGXEwwPRI+86sGDOL9pB++99Y+zjMHHWCa/R9soc0/bSFpISGes4VFYxS6" - "NRcK8jiKe7CZNCWddBgStAr1BaK135wLAonlgiCkQeW1voVDh8kI0KbTxul5ZZZJnCzAhPoP" - "FoJdrXD8ewV+zrfjJahOQc2bPY/4xHaeTQoxxIs27ooyERaS6bosmBhhlq2a90Ztt/y1Txry" - "fV+SW5xvftcpf7+1bIvbzFGD3poO8QM7dR1Wh6Y7czIBT/Gl55gsarpTAIn0om1XiKbQIb/A" - "CbB2GEaQIEv2C2SJHWMAZP4FAiP4zrDi8hpd6EXYh9icrkCUn3JnH4z0hTiGJTM8o0GgNILK" - "rV898NucIMAkWoz1ozwH8u6my+8dILDf+oXjkQPsWrSm0jYrcjaiESRiqtbEWqduS59gcNe6" - "6vdRK4d2kBErpVSONsnznNOL3fY9c4WiiUuSxpWByvA2BccnwzGBydi2Izus8p72nXoROrMX" - "EuWtvcp/aqqUA08XmITPxrFlc3gBUR0dgPbewkHz7vnDLQjNWiSbtfcw6tgyV9QhiitFW7Qp" - "Ac5rEXz01UFz3oBnm7sqJo8/UAcfHV/9PN9G5C78W2HdOmJLapeAjVdkC9EN/clsO95ru6i2" - "6kWPTwWmgX2S6K9WistrVoLP8WzZsKe6O5iTFZgZmF1K/Bxrd42B/W+xsjMwz7+XnUTi6Gf8" - "zOrhylcHQMoavvIBTfhUsq0pzTxtc9S1feHW6umSjav40ttJ3avd35Rq+w5m8b7YfQ2bTd3g" - "G3RY9tHWbw89Rov2U9HXqs/gSqRudinuFnsi3OGjNwFLeoPghARmHf5DZ8fGhcJGx8LZkIrx" - "8Dt4+qBXjLq7WkA3OvIrLm03N9ooCrWXgRVLO5R35GibmacVOkS4kCfrPnf4VNKjgx/eHnc7" - "aLzqecxnsYZVRQaVV37sLpd606cvX1L7o5oTDZh/EFtEq4hoTvotNLbd1Fr0D1OJBdRxJAQi" - "BB1HgsajUQHF8tCTPyhU1sdUniSJAQYIFU0eAIOmEk3D5NzFsjgfGphrmH3o+fOEswd+EABZ" - "ua2LWdisa4ijE1GlrXBVaYtArUOU+F3bslMzuuWH90mikfc+9Ru3663RX8Yfb6OcpFKgGwit" - "kj5WijQX73f9lhM5jW4+HXa/p2WXwrOBre2mQ6ejP0W7DPJMrtYWL6hmqnASAvMRmMUQQr13" - "7gugCOnFPv8vQLNCiEXXmeSkYm67Ah3DBC4/1bM0XGkm/pg9lGpqFFp1PTFMDqoRqb9S+0KZ" - "VtbNwJU7ZuAahYNqQfvf2r98/Ob2qupnz1uJ7V3lzaJRBl6hVRdqECLKN96DNw3YoQ4Fojiz" - "aV3gXQ7/LVAeeGDMx3qvEujZgjLGZocHuxzGybcMxqGVI7imajiP+PFE7ntTSScuXM7acFEx" - "SeZ5dYmu/Ov7fcuP8lphcKZxA644t/X6hF2O0U5jWlX0rxtc8g51rDJY9vleOASriMmrT8Q6" - "n/Ko0B+mreZkBd7xAJCVHSj3/KMlh3tWFFJpeAmB7TrXk63zHdCeMzzowhXPNGbS2hszVjc4" - "uGGe8ucWBFudsdnSciclzQJdZTOIxXuJOmi1x2qP39q4ZiThJyphiIfotu+qoLOUDhfCUPSx" - "RXOE9oYTtRPDMAP4M7Kc7onkDZzIwPyK+KI2GB9wC/0tZHYNyJ0dW6KnsBD4DLnUO4KGaw6k" - "UJFJzzCSM2BWkPH18rOoh1H2wnb2Ut5PYkxI4xA89MMV0EhiybaZV6CmUcf21DgJpg4lyFwG" - "ZU+gfjNrz4ZUvSXWE0G8Ajqd6+W3KnECAm+EWD7fMzAY6vJbgOxqkP/C0QRrTetnTeaeEscL" - "PFwRIruPtYx5HV99SmB9Jaqd4JQxqJeoYKNYYIlYphIfADvBqFA5KZR768zbp1efvqqOZ0qO" - "UyroUz27c57HnDoeZv/oaFfpo0jTsr1lK5371ety5beUvU/rlrrx1t7D7hBWjINXD5ixEV2M" - "u2vsHHQAiYXholgwY+mgX8gE6BssAuG3cNbuh8ntjQ5T8+r9qs3VsS2DTIIJIKQRTKYNCPSZ" - "adChcieUUzP0/ufdmWzNcP3bMdLs+ZiCSHb6AD79zUNfRY06+DGRhrc6koeu3fUu13EdUdPl" - "Mz34Mdnx7MWgnJ4rB+ouKulqvxeLv5828iTwS3vhtVvJ5zZGBWK7P0+ubhPg2WoQ+fOdA2cZ" - "XA94i+Z8bIm3D3sASC2CFgznw6o5w+lYNBtb+FJpPqgGr0TXODRetTAnmJEU82Yke1gNpigg" - "UeRpvau3EyO/Ey2b0FZwfa92xiH8I7Up3utJpk73crMnGh+W4GXNnMJJIi7voiP20BUfv7t4" - "e7PqyHiY6A/NYtIq8EdBH3dBh03fOKGBOZfZ6KIyB92+/FP+b0Gzs1Vz9mZ0Yh6mxxqL3Ffw" - "lFiBf+PNwIEb9EsYVB6Rp7gxKrGkAhyn3ovBYHLyMdAcjAlhTGuKCW6MLh2IjgKd7zIoyr64" - "hNLrAfMyEosx5CxO9vdOodka6Lug9A8XcqFD861iJxXE+HMJXCHrMdGkT36nRVrvXH/96Zy3" - "lIN7o4+mXYR2N5l6MtvipMRpss176y82QXtKzZj1QgfdAq0onzPfxn2SfPZVtbDc4s4GTOwD" - "DbcQCA966+Oa6yvXcpIC8zR/IAdsBdzGPh7UEqyb9TR71HavfLo+8Eig+jLXgGNFDLiGs0VV" - "quXKlI3TXRn7FaoyNTCwtLyjTjFl3NOy/sh9sNO1b9fz6OW43LH9tU9SKVVS/NipqbXGoqFP" - "H1xraup591Ny79hPri0telY+pYMvOUGB2Zo/AGUXgSz+KpA2d9IogjkYTHSpFiFWCwv5hVyR" - "MIMMQzBNGaYyXFgGrDbNpkIi6QSagUVcVxjDuuVgqUxq6RQRdFTSaiX/MxiGk+N/blnY+r37" - "H02aEBIrrPiDcml+UQ1eOLHkBi5VZwtt5hjF72qpz32cKnn9suyYTRlamDxcwQblbXnHiV22" - "WO/SzWMB+1oN9B8nbqdUkYeUkxweTAw+ew+3lPIuyi/bWPvj6uTO8B3N+xzX/HDa9EFP7AmE" - "Exi4YZlvcaEOrb0ESM+3/K3FZTaZxfKPYsBPS/dqe2PSnDDUrJjqt0pWyw99rqzy3tvZqit0" - "FM/Pgz9fJxcxWXs+Z7Tkh1/3Ta3Gj6Ljqgp0981PQnzfVQVd2N6Du9RlUAXOVdYyrTy6XZ4T" - "D7hd+cMWwIUWkfgHl1jrKRJPr7nDxStVCk1vhO8Q3RUqYkWSPykbkEuxjJow6pRbVkgI1tqr" - "baiw1qrPLZQvbu2ow6T1VPWJ4p6qk9VVjT+3vrjr7vlNJRK0XV1P51DkTAUHmD5wXzKv7c5r" - "ow4B8iXs9j/GQ5YvAbN9CaEdTmeCBpk9dR2OffAbh6mxYBldAivTvGxjqOzgwHea84mBu5H5" - "Ctj3KN/fS7vZbsST7UYuoSkkEfOuqljSOq6znRJYyy1VeUYiYndi5LhzTKVPSCkWKK6BLjOG" - "yZjan5M7fC6+cOK56Lvbm8oahS4KpIjti77jfOZ0lbWKF2rfz/afA0iba8/K44lnH4dASGbm" - "x2VfvDrICQnMlvxBbYhtSxbSdHd2mQ9L1xbiJ/CZa5FSQi434lNJJ+pSgmxQQo3L1c68HuJr" - "M5W/znXoUKllDF+w6OhaBZdT2x+L1uy/+Sy97GmmQYLR2IX+rQ1MO+Z0iJ+70NtigzPvDTMj" - "eG5wVk/0gV8pmX+jcl9dUwLkQdgBo41d6ZvtTfLnykUvs3Ox059Er4uJ8zqleRoqrmJ/ayCB" - "b6/KikHEGlxH0rCW1V6JR13JPrzLQhV2UnwC8Xc/CurcPz7ccxQiqUlBRxt+47mA3EzozDA6" - "zIkH3Jv8Yc66iy0Atax4iODpFBKUfkay8tS5kKvFpxD4lVTCvKQYuW/Hke+3vXyX6Ru/KxYC" - "O8u3Lqv3HEYoxpwm3S9WW+a8bLn/EQt53gbNMJjDBsFL0Sczbj9GtvmUe95uFShceaT5i8/g" - "Y66xJp7OK27Ot0xvc7bF6AMvugB8Kdl65/GvFmnsnXyfuMuk1kB4VkedsgQV7NjOw5eB3+r9" - "atMbnyHcy2UK8VRtiRHdvToxNX6xb54/LPR/iMXeDNdqE2qRJTzMOhgTdlnb/uGlH30putND" - "VR5Z8stqX53Q2MWJCNyazCPCwo15ACncQl3Jkt3OtFb+bJxATiyfmDnev4m7PQONd7YIx/Dh" - "Tw836d+TSjvR1mK5NjPWYivkGZdhyLWpZI+EKcIJR+S3LsHn/DkM1WMHrzRfcit8+Laq9MIu" - "aSdokVoYpB+qR1vDvWIbJyUw4/IHV9bYQufNFjotQda+jadvx7g5VFOSrWtzweNer5WlazOp" - "rh6Q5AANj8ob0wUXY15kQjVLennEfcsNB215W1u4nCazblRLE095C/m86Ji2U5yestD/BTXg" - "u9hVWv6wWxda1weTy3XL/OkvaMSJCcyuCCwupsEvtd9rRmHLnjpb9jRrxxJj6TBIPQyaAEG8" - "A+G5sn9CTKUj0BAhs6G6TqJgAlMIlPkDl3m+5+wpzgcGZkD+wF+xVc/9H/7Ksmu54rHnm+nK" - "lVnCKKuiYAGr/GIc89fRJ8dPfXf3EZLMtMsvzTDRcTu34pxJRLDUxy+dW+87QjZvhZ1UOMl9" - "0+NbTM69zfU97vjzGaufM9zrjLNvan/6Wpk6wQFoAMyIzEaQ+ZzUGHdTHZDiefx1MyZuVtZX" - "nSSb2eULaD9/Tj57QCgtzUKhEyyH3vpFtNbWH2ZV1ItabswvcGRD28iRCx4vtK+POHXuPBIf" - "xBt+/6WKoprGt9cPussOaYCfpehxX4me6swnNpwi8VpYCTpxIgJzLtKLot6Yd/73Dlb/1e6+" - "oWPx5hbeE+JChjwhI/Rt1pW5WLXCN67DtcusE6QPbvDQtuHDW9nbTR95RkVnr1pp/jZSAlm7" - "wnrbhoYdF5yfqSKg3PvqYxD91hDlVX2m9n5am069k4a+Bnf07eh8F/5FhJMT+JUSgHuVnZdv" - "YN8ynKupcHdlZjpY8WrEby7hdjtpH8QbJ9gpc3/tXtJYYZWMZOays862PR34u04V7Vk+leFV" - "DHQg5MfamfGXakZM9azj8Pf5su99XdrIKqs3G9ZhgmsCOcGA100ASsC/0nC9jlre+tk0/DAk" - "ee0y0xGLHa2ivbt6eo8NCkX1yPnp2AyPYTCYBL8TYxauhUcYdkU2bw+2VgUfUCYqv4hTsz0N" - "USuN+SjUtMJwg6wCs98HFrEp8KPGx8/jud8qhW8yQaRVhll9YyZpnKDAb50AlHN2Gm67sFOb" - "ywRoOJacm2vMtgBob57N5eSU4hupr3FZmncHY1EiU9evwQcIe/2mbMgGElFbP658f7fzqFKF" - "Z8OOkAP2a3RUTPXN70G+DDM5YYB5k1krpjZ/Tqek0QnIm+xdgHm4eHygKkG1RPlDsWU5122t" - "8mO5WoflzDrdvmBfUKuEkVUkeoOZdE6SgHFml2gCqVe+Winrmugvxxgj/j5ugYqRrWarVxt/" - "sWlItLgcIC/80V6hkOui2dHdPS+Y6eZDa/tHy2becW32IEd+Vj5gz0kN/IAIYHGMbVdC2IXN" - "2VtSKIijtUh2NKQcLlTUmNQ468iy5AfvTXcdx/ttDS7/LIg2bVDWSEp3OIVJDtNBheorodUu" - "JWFXlVkdOP+yU6tLGQUbeEPs9yuoSLU/o4i8YVD5IOnRjcPCz541Hmj8caFQSeekmuCBrzxj" - "4d56GfdyJDi5gRsYgCMT2AbG6+8dBLOrrYrfcYGcXbopyXWUtdo2J+TrerOkUB16x5hYeTMb" - "pwALL7JKZRdhVSi/3clUB0cbkQtYwWGDp8Qblw50Fp0aHNfbuWVL7PFwb3LZgUq7B8Xv3sDE" - "fMklyXcstTkp/+flFrZ/MV3sN2Ji64nfoxHIHKaWLqFwEEKagiUxYwMV0AycSGxvh2GDlNyb" - "A+SB2YpaQyclDkso1ZqKpsFAQih7eI++eQgnBDBP8x8u8vn966q9+fy5XtqLuA3aKdY2J7iC" - "GHCzwn17h/aYC8mNNiQffefa7qDrYGEkIhR62MIWDxJ4biCgfXZnxte+DXl5D9/pTTQO37z1" - "pr/XIfML8R0SXr1e+NwvuZEBgaqnTideMQQHOVgNgdsbgBGVbW/c/7Ytcah4+hnH2Vp1hVg8" - "L0a8frWDGyUr5Qu8e1PU46rq8Ba5EJbhsRtocg7MLgmzHzlmb2x37E3GxRTricFz+gEIefOZ" - "2uMSaz7KL+vPbQrrJHbzd98zyu24GcLZfGsIvCyjOq+Ny3rcAZmbhdseu1tmE16WNtoaBfZB" - "EWFceQVnnVKUIoZLPd24vXVltx6cMBWSTO5Wi+Zu5A+59XIKJWsd6J51/17YnZOs34lsTXhP" - "e97yDvbxiIrkQ9KMGq8/mhMKmJP5D6fsSwWTEUx8l1eDl5ValqmjLDEpSb5D73uNQBa0GZ9V" - "e9CBem574l3PFq9poYuR/G+Kj9aSAgZRGnawPUb3UzYEWfLbZ7s8XnUsbPVhlTJuJWet5wIe" - "cO9DqEdenAMFDIE3sgAsNrHVnn0c5EemcacSXTE51JxpGBJ1n3AjOsOTwayFJu3CzBBBdCKC" - "iSQFQVAG8p6+3DOwulHUVOL6dmSPChMGMjtsHdLb1lbHiQG8tWU+g1AYiT8NSOcXCoMbUudM" - "CzdRSCjtrBornqjzoCXai3y47cThE1aqZkFpB34QdkUEkjLzj2XkDryP6Ya0uY5XTVxT9u8e" - "nWEk/IjfWoxtns4sO1IBiszH9rb6H7i+tzmwcM9Hnoj7+m+VNo9wc0ICMzNii6+ZxNDwJ0Cy" - "7rk43YKXQmK9ZpqyOdK9lLPwW9tw5WFhYV68CeJqV+slpKr9eh8O5gVfD7L1rGvpPUm3Dcl8" - "BynzsIbcKtrfM7l+n+KNxJVpJ+nrrjs0KFnmvEDyNEdK+GQ3rqj3QN4yTmvWaOZUN0Pg92X/" - "sGlsy9+uiuBEQF06Itk3wtS8QnTiSzENloEQe/6hH4klZRGvX2dsMNxkODZacc7rQqzcQ7uV" - "KRaF2dbK05gLZ3r8L+ruEb7xMK0g/aLBz7KfIT+/uznYZEhJu5kkZvRKaHLiAbMo/6Hmvtg4" - "VsuMLaBOwyAQuI7CGBg3czj9WXJD71ezcSyUPwEKhRISwKhfIAgMpLJa3/HrA3nOK7+GwBzH" - "H1xEZ4s1eTF55W2YzX00RQxDeD81aKze8pzPoFBoeDQmdhyDaOZBQsVmCu2TYjWlsC7K/LF2" - "61ruoq4I21VCz4+s3Xw75pxl6K59W9I2NfY1GFro3/kS+5mOpX+zkcqW2vrWuA409HKJcA/8" - "lGc+ELaHwRiARHrhvmxo+ny4X1tocwKNUbpELfezyi9OmqhebW4eVD5M/JCmqOzVadhL3cnX" - "XndLv1iTR33Ioah89GFxUG5O2aMNHip91R/eTNXe2V85Pdoxw1CY6YJUNZi0H6gxGeegWwHc" - "ggCk+5dCh/5lt1Svg7dy1/rii7aJWAWOcDW6EWwcHasHw7GfBLfIPG89nkw6j/jwJn/P/d38" - "v4ri7V7bgKwkNeKXvcgc+LhWGfxqTS+3yeqYAh7OS6YrgNsOgN1+bIVmTzR6UIVm2Q7uzm0a" - "uGJuuzJ1HU9/xPJSnjZCoKu43F7E1r1n7kJsRSwUI8paFS7dYxKowmonP+z3HL167h69GHLi" - "/FDR077OYf2xCTKvX9uFPW+U2wzkLbuMpxmgfnlb43CNn+84IYEXVP7w+H/nwrvXXiZQR8Dw" - "0A9lntKYPU72ygzMJ6sfzkK7nSudaba3ODL4YU3iz7jNVo0t3MifKH/vzqthyCfn78ZUOpdO" - "iYY3Gvf5lyOuW3o5Dhb+8FxvPHlxrOx1Rkmw17bjO2b8Z0KjLadAG3uMlYKC8mM5aYHZkT9w" - "kmw74vBXbkOsB0OtZiOhImKYSfbxm6pv75xKlEmalEhnZmxnIOuwpF5ZKrbHfIzKREWhkqQK" - "KiR4dYoGownt6VjmWyYSVOzvyHVle/8eTh5gvkR00Ze0vDTJA+RL3P86G5n1JSwTWQdPrXNV" - "VTVV89VvhWdryLd4W+7ICE37eDt1k+illULc6L08SdAbzUWp7gr0UStGw6u27d+lv6Tvh+UF" - "1waAjvLt2fbtmHDVkdEz9K88x7x0LO2/Ru7nBATmSWZT7vlSwxrxs7sAeZJ9fxVY6kgisZ4s" - "l3wdtM6KN7c/z7X92XBaRpZ8Sll8Rv7kxfPMqAwL+Js6u+pSFCzAwlM/vF5fQsBrI1KxKfHn" - "2Uuy95YfHLfVU4y3yKxeEVtZ1D3zEP5wIDx0X8IDx61KVyXunC1Xet4+TD+1Wr3gtc5Ml9S3" - "9qA3JU8q0jnhgXmVP+gCZ6/u+sVOzTFkPREqlYBFaCOQmAiMIKGUoJs/hCRNQewgkzBaDuyy" - "bAKS1Gg6gRBDIBDR+RXQJAEMLR+cYvqeFGkCGsfm32VSUmaYTAgIsQ+nIriPh/NEbwUwk4IE" - "sa+XofZfjAJkUjQXGwOaxs4k07HC9UjlBIgDQsABgSD0gMQmIaYIx6/ImBxHtP44MwoUdFEy" - "5k1ZOWd9fQUwiyK2+Mj3aaCtgCyK89+uKc6ekexSLPPktvIk8B8iSgbUwbNFiLUqg/ZrvmUd" - "GgrRTrNQWgc/Ag5rU5q5s+3rWahVMmKr7dHL6Y/0z3/aa2jp8MpfnZnKAN2jKb40nLTo48QC" - "fuVnfsLEt1JfYCOANrMPHueuUMTTmZ1FCoZJMNgX16CylEMhcdL78Wma6zLGTIXMZD/llTSl" - "l63bhC9ea1Jsu7Zv8H1/cdKng8Hb88ONIGtaTYNf+syoVV5r03ptedVjmmfDfY2EqQ1vPTno" - "tAjAbMks3vx1M9CpR78A2RLbxXBPS6AzwRIJTAHHdlAtk8JAijBBg0jHKVAcP45Bg0kkQHH9" - "GbR6Go1WR0FIUhggx1EmRDaBSQoNnILE7Qbd1lDi3+y3a/MSNMAv9eDmaExGbwIb9bOefXlu" - "YbGwo2UKIiEZqcqugWWp3QKjXeYkD0ujCEZExnB8ehfecLJ5TF02NCd+g0PK8MbC7w2hdohA" - "fLipzg1v21d3L734el64QcQg9IFyeAbshcESYMA9yPwulNwrAGzs4AKYesecivF0HeG3syIL" - "NfigPXUrL9fCVZ1P9K5JedrbTfx+Fp+X2LTaM3O1V+VJTP8pSZqes4P+iEpgy4+ZNByi6uGy" - "sttm6ek/qqtv3+uDy/ePtRskLwEGvPoBMGqw7YbNYvWj1JMEE3CcYoL5UV9IZYh6JgTnD2nQ" - "QzClZghT20FeTUQEk5QziazHYnyZ9WMQiS9MTwYlLroF8riFCQY11WiNeSV8oiwBA3x66fz2" - "69J4+XslVbYa2f2jLpywkqVGYQgshoGRJZRSx8GoY2QvQ3IsDenfOzDRwgBBhLlfEAi6PbUE" - "Ye6nIC3YetmriUi/4zKdL6NBfRJyxkV2rZNL4ADve51fG0WuXWRAzsLpHz0X3LWNYDklR95C" - "DS7+O7rHB4VMpmjaffdtuIPp6gckDA2abL3Plzu5nldwKk+lafI7T+tqBe3TfSK8oU2vZtDA" - "or4nuIl/TZf8cd8CkyWmMhGAd5IAXCR2eeMfg3FYr5JzSnbPeocY7c11fr7CsyM8BYkJKYO2" - "x9TOjFQNrdcQe0AIyuC6LnKhG6ob/XzXr0cPzOpCdl2y0LyUV0Q82C+ycga0r0x2PdXDgLAE" - "FTDD8Af30dmGgbBohE7Ro3Xqo60SolMRRIyEIEFbl6BNoLRCCT+w+l/PoC/yiHOp+kFBjiMy" - "d6Ikng4t8dDAz1EAbrB/WYaFO5hQurCwnVWg0KBfQlfg7EpE0CeL0hqfugu+UmmyfZufqbjO" - "g5oFcYtS7hu8xR1lczhidQTa2IHuesv26W1b96/mNTVmUaDIVMXtbq1uS4kqMM8gshitxy1v" - "/V4bD1tUHdkVtNlqPMi8S1mDBk9/A08lDXphxKkCkcOkyxu6eofzIhmOStka6LohBuqA33V1" - "7kOZJbcEqq6k5thxo8Y8yv3bwBOa9/r8bwtjeowtEm7Ic9Z3tQDOGZ29xD1vtcuObJ8ApK1r" - "Fy1pBIUOjoXUg3EBlCEI4uNH0hSW+qwghyqTdIKaA06QGahgUiH1TCQCwYwm15KCZpCCiHby" - "TdRdHAPkx4Tl74seZlnjV6qaJTwT7l+WYALmF2YXSn6OKbyrAQVIVjew78cWsGQVY0E/jitN" - "rfNL7TwXc9OvEq6AQ3NHEi9cYuwdbPCWSbbtq9S9oPh+Z99EVHkE+Iji3iR1pcHI3qkBlcO0" - "s5+zY8oSbmadTBt5zq+4VsPvnJXtySXIgI//mt+C4v4HfgLS1QVfZ9k0axhQ5k7h2XMtBKl4" - "V/7ZrFfVUcOEJ67mtA/q5WmmhsvkrizFtdSI47EYDaGHe4X63Rt2bzmqvKNNqBhDXMmvX2Tt" - "m8TZEKEFcOio6CKNbGI/BZCwbmIrkRbf3K1STbFz17/hZTu0M+p2qW5VfInbllK60pf/UHMd" - "od5rvaRU/NH27jy9QL0yknZNXnlV5NsPv5rCnaAfrWJHJjvK7dfJSknHadh3WdV4NDcvI1je" - "Ob9+CTrgw8Dm6YRPXGkGpLOufxXc5/o9sE6OvnEEySLFwpbNso0KjutwmeYWTj53B/3zHJBr" - "GvUkctWcdg4GJPfivm9jnvglv9Eyr9ESnSfx/uBO34ocnTXTz6T5r5VEMSCNtVp5qbZoztMt" - "LYBTRkUXX7Ec4WsagOR2NTtsvJqKpSPrkbjLZAaNXDtGMY2hU2O562uxQoi0aEPMSaYp4Us0" - "FBpmVCEjYKAwFI0gXxecAnlB4dBnX8BJpt9nvKfAoPMoDWUdIo3zkowWwHmi6MVAuLm4bxyQ" - "1C5nE3m+oo2R7lELwALMAugt66EvBoQpcgDZkAa6EQsGk/zzMSCdEALK+GpG8xLPC3z01/wK" - "RD68sRGQym78x+gvi67D64p9uF1afbm1QMv78RJy2/gUg1UzvD7c+SKzR+iAsMA+/sdRNjIT" - "lObBp+fNdI8k7ZzJk1BY5R3e5Hm+RE6jSl2TvLdZ+9M0aH2JttOjYDjnPWwtgMND0YtsmQzk" - "+t9iY2fni7urlrUWlDtUOsgbAkHoY8lkBhOURCTXzg4VMtAnTEEuMuvhBaYMJhWaJG3TgdRF" - "iFyfnQ94BwUqg/motHwiTgiAuj31tkNsTS4sQQTMO8y+L/P29PELQWCd+faLOcQAjJUDibLy" - "n/wccAwTPI7F6JeQGeAGLF8+w5ynPhYmikBSv5riA6BeTTAxxLt8cBKWYEOYYpLBDaTr0flk" - "UNtOPQ0bO7ElppMDnBOKWgxujKtiDYBsg/FfNw1AODioVkXkWCrtYmoTPnUAWpJ8VOvO4KHt" - "fLxlvOdcawIx+bjmNfaV7ybp8Y3G5q2mk1ya32S2OAZvqFmC4P+bSdj0dzfHEp+1/pR17xzT" - "YC9ieuFhpdbIuFCCtFG1T1Ze6+CxMfVBi0rdfTa9O3OkNgSo1u/wSdBtfDhDe4kLcT0oeOdc" - "4X6XaxeJOd+H1jpPkDVpJ3R3xCaF3FqCDphR+IP4zJbWhZZz9ftzd6us6EMVCiEiTWbycnDF" - "5UfXl1oJi4E2dt2Y4mlQ+TTk+NbCV7FLOKihtvc6uu1tyRnc+TerVl3trWF2fAs3kTxUeZW0" - "MXfk5rTLx7B+o3evek5EWjt5mEeCHLbovL2dffLmEpzAR4f+ochu/IfIzn2zQTlXro+oVZlF" - "5k3ttsMkW2rg6TibzNDGLRNSB1QkS1orGjIc8XpyukaDPdHZa1VFK/cFdfGZy2wlSjxIDlTw" - "3xJjMW0cAUGiNE4Zgu8uMZId4DTRPzjS/ldK680gMmH1xCYsAos5SDAil7CkiAxuSsMqYk5G" - "2xBKmeDBaFx7Le2wsA5JK2zfd6I0OTiH+kbLKIPsRwZ5JhljKWEzoLrodiJo76VlTOsLVpwT" - "1rT+YJIowNj+74P6CNwrWiclDgkax7r13qPSsULQcc9BjH8IIvtXLUL3TAEVLEGJwGIkQXe3" - "ab4y3JY8ssRTA59FNv/URwVe7QCktn8f8TLXIgJNODYQ6rX82F6ydwp1d6qMm19gRXr+QF3s" - "NtewkqQjyWYT0Ps+KpdF9V+omzwoUjR7U98iLxl0szBs5tEhbXia2GBpx5uNsmcSGqLL7/yC" - "0o4ZeOEZzseXQARuKOZfpaktTb/3PTb/mn3HEt0oCp2VB+JDQEP1SAxmJho0iMWNUmhYCqMW" - "nBCdaoMpGYqGoIiUC7Hvf0EaSfrETrWw/BOoAVAT7oUs5et1LAgkU6t/xp+/pGwJIODzyf5b" - "arutvYAVA1lpoPWyDPcRrMDGVps+vEyBkCne2g6Fu3cqsnlFsh1uy9bCr57Trr72XOGEvtUX" - "TF9hYS9r+mKKA4WLviROPS5/l6txR1ZfFHzgSfESTMCnj88zbXVCtANSLdfF4h0vbbblEaum" - "dj2maEWKe1PLMjOxO/6BZxvRwSlZg7HN4xHW1+rsm2x0z59uNHavOB5cFSd0yiBnIgt+Msk8" - "1vs68fB+R9/baqEzQj0G6m2wFUEazuAaoyWyW4CzRv8gNLCzW/ZAuYjZOZ6CgqBxJgzXRxtj" - "yqGwcYO+eN/E2enCsLqJueHCicbc0SD6T2gCzDFqgMhlYBakg8IyX4FAVWcMToa7ungtQfI/" - "Nxcco2aaGVRWegRLwCJ4SN2zBRZQPRhnwoilFkwiRZixnYlUxgA2fwoJzl8JEYkCJeUS4YQb" - "kNfPqO8nTRlErtiGHOpPqpiWLqhjD+gFpHsLSOQnK7eRXfbt9t7P00sQAjMYfxAQ/yW8G9Kq" - "5rJbj+yYLFEjyaKMZbu0Uqzzi0lyKqeUy0+r9Vs3B6+UeSr6vKniZFC4goD+vhlbna/XzXlC" - "ucSzMjvWRQsc+qxUcvXUuubEW7u6w76Lm2ur7bzgFmK8BBswUyGy+JJVp4nfBCS8fx0/MyD1" - "xAQkIhrDx5JeDEt8UWRDUAOMSSBITzXTOhENH5noHBl+qxaD/BIe+hApioHljm2PGjMH+cPg" - "0aDnPyB+mi0zzB9EEGiViG7/RwqDtAQYMEfxBxGRrb26bDDHaAqdMg1OgOJ8KV6oX2AUk1Qf" - "nUHQ14rCnJuBrOo0gcKgUIIVBYaAgX6FGfYK+bn4L/HgwEzDH4Q9tvxu+uctr67DeEysf90y" - "CSX0ITmn0qYy0zYRJ0FrXfKInoZ/lPbsVb0kcGIx9E69xnFG/oNL2WPlSdjCNJNd1u/lLznp" - "Bw0d+6LwK4Khkiwsy/vO8OKxfU5LWCKAg07/IFr8q7C8rW1h6uz2o92pg3wBw3C0+K9nii63" - "rq1H1WhiHMfqRjSiFKKOpVecdP6e8Bh/Ap/1tHmdqHzvpdu8cpZR9utQdH3aN6aj4MsyD1Pm" - "zlf3vZeA+p/XJ9jq68BW3yoBiiM8nv6qFFPU/6DbyY3qhKntruNG3ml6kxUlkgizzKhYmXqm" - "XsL1CuFZZNWkdVDdwdogc+NLD/ab7HopMA2ftjskHh6Ah4kEFF5eAgi4nQAIxJZeZ/Zt0dkb" - "eAjLLvGzga12zxAoSGmW9Tg8WxVF31xcbHhs6qnjI/ULQ2YZv94/b36b835knSV1XS4+0UdN" - "yvoYdtO1r/zBuUJEVMODjuYpnvpryqmjhamcc4u0AE4//QMutuoufKHm6vqFfvzPs9fSWny5" - "t5VtP1LKerHg3vc9ArupeMalgxEfLE/sid7xSKkmSOl0xWuJcK+G2Hd29x+feZ6KTaWZPGvT" - "GeacB6YFcNIpajFM3PS9/RBQfFNjG4ixMwUFp6m1simSoxImvTpTlKLI9XHWfLa1sLAEqBXU" - "ZyxWAgT/oEjZEmlbtcQDA/+KEoB/e7bnZo8FTptPXzVnS8NpAq6t7k/G4KqqqGHw2dpuvc/R" - "MsYrRFXivA8hjDRTIwnXUQG6etGVmGdfr/evCC/E19zsN5C5lRrUemYK8vqY3gN0Ta7vElDA" - "aw8Ay/rscLb4Da3Rs7WuWYkZiHacgsWJ4hjWXNzREWOxdDkhE0Tjx0C3WhoNB26QJveBFJGg" - "ESapjEm6w8QeJA8eJoK+QSx6znTTdJZAAeYE/kMKsXqxSRQ997URQSIaOmYu434vKUd1WTbH" - "wbJQF5OyJyRrTNqizjRZPKij26RzWOQjxavg08ecF71j56vP17vmdsK+HbVKfcDTcFAnYUos" - "rGMJIuCnEgB9GzuKrWOfjS30c9mrucZ2YSyHvDD8TyMCSLamDcjeTad+dq4rRpecy/G3sDhU" - "kWvYPnmn2jWv4JAyDaRRv07Zg/lde3eMbvpBy1WNQ1+5XpWq75bWtpFaAgt4VeEPD8aMF2dc" - "UXglKQwmGBVAGYh17EM0jqKmkHGTUpgZSqkRNxPk+QuagCS4vaKNgeKY5mOmeJNUGsi9Rs/j" - "VbSI5BIEwEzAH1R7/jXtY0Na2VzT9UO5AftSkbYybnNzGcn6ArQdYYpCN7aAxAp88v2M1Vdd" - "pu8Gx+dgu+UNL+EMTKNa9fd+vr+y+p5EFaWh2nwgu/OLwNh3lfJVNj+EJgPwzNxHUmVtyztl" - "NT8scWoEcOjof6hobVqsgk/B6rEJWJJr7Ri1NoaVFnHPpUXWMQXjSJj5FyLKl+w3g6z96Ngu" - "2CxA7hM8K+cPXY/oicb4PTDFmTjKgW9BfMThTPDzX0i/X8xmIuhavt7gzdTSf7xa/wfeUnmO"; - - -/*---------------------------------------------------------------------*/ -/* Auto-generated deserializer */ -/*---------------------------------------------------------------------*/ -/*! - * \brief l_bootnum_gen3() - * - * \return pixa of labeled digits - * - *
- * Call this way:
- *      PIXA  *pixa = l_bootnum_gen3();   (C)
- *      Pixa  *pixa = l_bootnum_gen3();   (C++)
- * 
- */ -PIXA * -l_bootnum_gen3(void) -{ -l_uint8 *data1, *data2; -l_int32 size1; -size_t size2; -PIXA *pixa; - - /* Unencode selected string, uncompress it, and read it */ - data1 = decodeBase64(l_strdata_0, strlen(l_strdata_0), &size1); - data2 = zlibUncompress(data1, size1, &size2); - pixa = pixaReadMem(data2, size2); - lept_free(data1); - lept_free(data2); - return pixa; -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bootnumgen4.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bootnumgen4.c deleted file mode 100644 index 731884db..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bootnumgen4.c +++ /dev/null @@ -1,823 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file bootnumgen4.c - *
- *
- *   Function for re-generating prog/recog/digits/bootnum4.pa from an
- *   encoded, gzipped and serialized string.
- *
- *   Call this way:
- *       PIXA  *pixa = l_bootnum_gen4(nsamp);
- *   where nsamp is the number of digit templates requested for each
- *   of the 10 digits.  nsamp can be anything from 1 to 100.
-
- *   This file was generated using the stringcode utility, in recog_bootnum3.c,
- *   slightly edited, and then merged into a single file.  That program
- *   generated a pixa of 100 mosaic'd samples of each digit,
- *   which was copied to recog/digits/bootnum4.pa.
- *
- *       L_STRCODE  *strc;
- *       strc = strcodeCreate(212);   // arbitrary integer
- *       strcodeGenerate(strc, "recog/digits/bootnum4.pa", "PIXA");
- *       strcodeFinalize(&strc, ".");
- *
- *   The two output files, autogen.212.c and autogen.212.h, were
- *   then slightly edited and merged into this file, and the code
- *   to generate the pixa of sample templates was added.
- *
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*---------------------------------------------------------------------*/ -/* Serialized string */ -/*---------------------------------------------------------------------*/ -static const char *l_bootnum4 = - "eJy8uFVQHUzQrou7u7OAhbu7LNzdJbi7uy3cPbi7S/AgwSG4BAvuENyd8+39n3O169zuuemu" - "qa6prpmamfd5kVRtfUwB2pZu7rbOTgAOJGVPRzNLN4CzFcDF1gcgDGBnQ0ISd/7/KTFz9rF0" - "/6+IDel/VRuwGQkAfNz+Z4YJ4Pv/ZkgxqsoyqEhESBAQEKhyspLq/8VxCAhIJsj/EoiG0AHs" - "/wKii6yeO8T/N/LNaDj+C+geUroeEs6OjpZOHhBO/7sZtkc89o7/1mGRkwRp6kbUZzc7HUhj" - "ReZW7BaY7JBZY6MsBYWj4qJIbuY+b4nN8BBEA21C0NDmIjI3Sw2xUXBecXPmoA2R0RLhsR2+" - "grn5iWzbOogNxV3uUelb/jXW9PstZY3UZN2eKJCI53ncey3fbKvB4mY8MGjjbbtK/cJ/yBrj" - "YQjWYfinWa3YqbCtRqSrdzhayuHZOXDMhj+a8MTwJLrpnHBWYV9OvLMRpTHvBchG54Wvf/UN" - "zmhbHDhiIZnsy07U9txvfpbmBvqt47cl45WdjUpQRiTPRASu3dWdVUioVXtthUz9bKmFQhM/" - "s9XgisnSy0ADehGR13qt62Jp0WEZJwD4sMH294x74Se25iIy6Q0RnRFV+GZbqU7UPlCkotQX" - "ZbEK6sZCfwUlWcsUndfry4D0rDFBFM/mH7Hn8o2jgWuHi0HUuvbILf3Ah1bGuwiTiHyixkkR" - "8zI7Erfhe5O24UpxYK5fNwuHNEsUWuK2rfA3g9FWm3830Z37gvovmMjr8bTQERsGF04qBhe4" - "uOK9IC1au+NvhI0LkEDVgku8f301WnoM83RVvtEiuRmhVrXiaO7KCTNw4YXV7uNbkjqj20Uu" - "uNyJfCFs+C4KxsNMn1a4P8KDhzezkEXSM/I1A3x2sG04979sfL8XsDanLrTuJz21E46qnwZ1" - "73lfOibw9jYrdQriTrPtKhEnkzgjcSiOlaDvdp8UAB4r8tv5xwe8Ys8uhb77nrhqd9wtL8cA" - "gmUfD+fcBqr/zQQPg8Z3MG36Uxd+xoS/7GMPkONLIdgeG+1uysOliu2lF4Tv5L5TyaE8MMGs" - "dHLeiBPZfTCzs0omGktVIN+68fbEfjL5eip1GwR05c7it9DqbeOUwejpjG0W6ERZFHXit9TR" - "rSQowN1iRfxrUbCfvnUVRAIpysWiWeO1nZm4RY1zT8H+9Yk2RigJxCDNgueGURtPWe9a8VW4" - "AEJtEpNeXrahOWn7SvT6HnRk+Cn5Mgi3AJ6KxQ4raKR6rSr2aLX9gw26oayLcUb+2rjEcqq9" - "X7GnVIfMZAMc0Fgk+ny+7SvgeqSXuUUeMg9xJ37BvwwNH4g2Vrlrulc4Jlvva/A+H3X49Ptf" - "wAm1kYmbGvzhr0PE7vPukNkXLJNOlLip6nZpJSUGRb1CQK3xzgWt1foNI/3IQ+Hj7ANJnREl" - "01J0wrvtKsLxVzls4NzyH+xLI3iEEKRWiStDo+FZv43/uPWlji1UM7zlz88AHV6l+FIcGpQY" - "yz7iSgZzT/S7TD/nWOBb/MDyLXRIhQqZJZQT7KVBZQHudrOnYe3qpwmx2vemtLGIx4qjppbM" - "cVLIRnK7EadKUHomLQG6aLpYCvIEU0nQ5gaqfUrwmx4g2O+VN1UFJyRWaQ+HMGvcaMM2OmcZ" - "RQt0Ta3fog0xqGeWjOs4GNSokJNFFTEkKBtoKl0uhplMxkL2QpE9opvLVn2P4iSXw1pkM0Qp" - "N1PV9mfHmQSoGL8cDrh7LBsKaFthyEtAXgXq7smG40uLMBrhre31OmOOM0eRrHg53AdVLQWo" - "i9X2rQOYIr8joLxlw6chg8ZKMrZOKogbxdZ9GznPusfW9XXzw0A/e1vEUdAto5yMEE5nhrWB" - "0OLKYA4tlb9gyTX53/4S4Cx9rgALtN5J6gkdrHA+ihfGPeNo4xjgOu1wPQwe9Mght9pVGgNm" - "kOZFpQXA9VbEucBkXEShicjyaeatDqZ3XoBkxqJQlpXxLMc3+Bsv1z/9jcX9AMcaqxhwPahz" - "+aekSZAruqTTWFbLRmjrOYFCK+MwVyaMC0JzpJdyjCoF8TrF74NuLjsCHS1+T4sfdPMUl8rw" - "9xEhIpiXVmnxGnbYcxbF6j+KXNEVo+UEYGbKWIvMbRLhhm9CuyBIcEUE2xFkBA/M39FbzCyg" - "1ZjecfrLIKPgNjjYbDV//VgLaYxWGML+hfqzfxgeud/dfAoi/3g2FO/M65XnKqAV/TwhwrTB" - "WqVTXzSBPK7izn1+wQDPF3562TChp1AEL5D8Mk4Es6HKPydZNAlb0TbOtjSzz4hTIQ+n1CtV" - "ZDpxvmhoJT4pE22rJbNIqpS11TXMvgoDsW7YSIE1dAtjX1KT5CTHlFy2aL4FS62lR3SIY96t" - "R2LqXgIH/B6TKwluhRkadAfdG7RgtjBjzTdGByehMK7hNF4+Qk9cCreoXO1boi784D6t3sbS" - "FWcJf29WgGFksFcC4wbu6PfTeO3om/d0VqgJZORPISuFFQ0gXihtUa6TUmX+d6eXUEmaf5OP" - "YfdllQhvOrCblnSK3M0lpU0Q05XJsISMV4kdZhdL2HoxtLDOdU6ms22pxyzRTUioO4+n7tUC" - "DenPWf8UkiESSCMij/RB7GvqoQyLlDjuFO2vuR7epcKlWcDmJq5SjHzGcdFlh7HZsLlWjNyZ" - "fXeVRW4OXg9Or8tyGNJcqVEvndlI4h2/OP2dRds6UhBUqab1C/AnUFI4RLilYiz28UIxSTMC" - "PaLU+iybYdoX4kSTVGRuNdZavFoEW4CiapfiE1rLYWt8ihbIclB4o6PWk0EYRs3h15lgBFPt" - "o4ChnL8J21xF+Glov2lQIkoROxwYWtb1xF39+4s3G0vOWSCv8BXsYa0Ofs/HXkUbzJ+13yzz" - "xG7p17WXhBwWLkt0lTtx8pZ6jXfRn0iFYx8Znt0yHsIr3XL9ZmNhIytpVLvhm0zLFRdFplfy" - "F1Fbv5l43GUNzbSi5rJDFvcg6kSJCjq+wIVtAFRKGnv/JD9qRVhKcdo/IWbkQnVStNxM2PKF" - "/wNqtLosSYBdQZz+TdYGrzh5GngwSUD9w8EO8UWAIb6adRwPqwLKVXoh2+HZPgfoxEOAMUyF" - "K+1NkJYyd8iKZ++SJkD5SmIFTB50sanSYoUelE0KP0Vq6WdYlwloFq8shywfqBuFzj0vDjiI" - "xPaOS+VmjrJL/wHdkW1Bjti2ElOJyzvJuWZVo/dB8V1ug9k61pcQxt5Sew+/RPdFlZZVdlS/" - "J32qw8KR7ipVCPNBr+Xb/oPZEbap8a/Wt/4YA7kb8W5vWb8zZWHD5GlGt2+kKA62npRv1XV/" - "I3/KIJvG7MaSWVgrx2JupBN3aMPxxIhTkZIe5HACKMcwni6yrI6p1/N/ChIQhEDzpjnV8TOW" - "nfR2cyglqRVf8KFIaJXL1yBlAe9/VNK62Xopv1KF0hQGrsu47aUY1my7t59knX1I5CU75HRD" - "xZL7PvrHfw//KXPYeRCJgrkAdZLSwsU657rL5AssmLhv+GD+q0WXkhC7P8RcoLObNOcrZiy2" - "RZmwdi6gT6M4zkt3pZv4mGKlRgQE1rmviV40qVa73Qsr2iS2fWfir0gokW7970Vf01+qsV0T" - "r0H9OSAxXFS8GIQQc35EOIxBama8DPw2QcAcGkxxxO1a0gO/NB3xS3XRfEczmqK5x486OYe6" - "OWZc8Q2NHbnYPHu7V7rcfgM/2jfK4yiw5SfgzyvMT4XULQ9f8k6+/Oi6GeXlovvChhNlbQ9q" - "nGBnSqw9XoyYwZCsvX8EuY6N2bgE5A7LMwbDUebus5SrCq2gzuKZLC8OHnN5m1xCaxu6bXPd" - "HrB4z8/jVZTmIZl5cMC7iPtGp4o8zj/OeGqFu3E48auW2H79YLwQ05QL8ojv1ziW5LaKeuyT" - "JpAjX/i2JmLqP/i+0kOm/BrMy/ndx9HqLff0JiQefsZxMBBcstk6Obvad/z6IcyvyK/MHuZs" - "mickQ0rbNfeTOKN4zVX8yCpVICh3JSu5+DmEokQdPhUbUZqBcGguH72iJuw9FqJRDzFbgwUI" - "7vs1vXpWSY3fg+HkRw0O86Mg3m8iK6fdlQ6iVB1/j7UAxM5/chBGBNDpntRPYRgrLFkjKJDm" - "JhvdVhbzfkJVs4wfoYZmHnxXmyakrPLT/5xNLlD6b+OI1ZmHUx3kVN1IxlU3EdcGM3UAWjPk" - "pHmSkDe2t75xWX6aXUSFCsM45j+GuQrDSZwJLvb518eYxzN+49RKKke0hiKFjqh2FzC1Dqmd" - "1jHoo2nu+g2v0yu255XvL0eTlPPz3WOVFh0xWCIuvcUp6bnGcQytrcX9kTOQ5nyDaDgqqOGK" - "w5OTtG71QMkGHnRAJ65LHzk+EmdLxN8zxaP5/MPUMy2UuTR0jY0MltCHn4J65R8PF9pTYy92" - "T+2k1/9MnzPTfKYJzhGpSARRQkMemXHFObMxbhsVa5E1wdExnWPr2uMgaogqk4c7uI8Y53YX" - "cLJYcf4jO0a31FA5H7ifMeK8Kuk0WCO0JVqj5X13lO9RVLBXWpjkcaRkFoZDRPu5Wkm6zguj" - "4XnwOCs/XFAYbIxx25P8jIhYSJVysKIi6w7imwyM3GkNjPLItKCVLHAc0jOhcKIg9i2MuH5Y" - "POu0WtNbTT1nkf3D1C/k4Mhqn18BJV2ckvhtiJPCjREUYBOHTqPzh/QDfV2DV7HyqlPDLWPy" - "kd7DZcS4rAh6jRcrwkc2LtzgbkMj9ERsNOJEKmbOgNGqQH5pf1aTcgHcF9QiXQHhyt2SXXbQ" - "AZOIvyE+SJhaEU6a81sUkn00YR4tz5Dmf8GVnJSyZIO4Cfh/Ixv7/zVkQ9j8H2Qbye5NPOjE" - "jWgUM/kDKuWPxIYfvcUlUW7SfUkZ1YOXjkuAXJ4Y/Om8g78W8osgy0XdGm9nmCIBUoiLKVrk" - "+JsctVb1q7XjL1RFJa1A6tvYHtFjdXXz+kIFDfcq0fS+5fYKtva2e3VqTTl186qruPu0IfyJ" - "P6sTQFOMFLXBA7HPveuIHaOoxj645KlZwsK/lN9y6g6mCyywvmnEByjzSNuy69sxjItxyJI6" - "bFVHMHdVV4fpOfPhIfRzgXMJBGKwfmuUrWGJMmBOxdq/1pb9TfRKKHNhz/mbZlP2LTWlzMFR" - "4VOAnpXSLaGudtF5vNUgLLvzWDZ/wifn+DzH1R70baLbCQ3IqPYXLnWeMCYo+RuiF0uoz7mp" - "5jmsMpZYwADY3zUAM/Rva8AKy4G2VzDvcyXeyd/wbYGPoGTn61j6Ps2UPo8HhQhys9ZY7zb4" - "gR4CH4Q/f/bg0KCEFrD7MVMJrvk77n+jWaVD3CYeaC99aUNdTaPmk+OFQxYmSCakmsHwPvXt" - "proWCKWpTThL2Li8rW4u3Qimxp6OVKq+OAHZt9U4CLLjNZm+Fcgtv/mC03lXZIN4047AB/YH" - "5Dpy0z/+4N8lc8kMTx+4teKnXq/bL4EDHnLsgUKeCeey/EZp7RY7TgrWwN9hRzMDYcCeIZnX" - "TN5MuJjpcOERDO8Fmodnun5MUxq4Ty+0sKpwETuLX7MXOaAg0rwlmYO9+VKMpSQ9Q9K3A26I" - "8IIzDYOoPso0hWn0ni1OkSX/lH41Jo5/Q4ICR2F1MBIcaeYcyRsLjWn70ycGG/RTv5F3CRyt" - "UlVLvIh2exxvABu4poR6iFsF4+SEDMDdPEXp5BleC8pqIp/Z5NMQi4q77FbkbCssY5m9f+PX" - "+m59cNjfmwIjz5lkpb+Z5opSXSxKxiO4C3WsMJdayqLIRNnuMjnvCAKnvM96lYXXP1WdzPGz" - "IeWWWY/F5xCoFBsxR6Pm0oC9/oy3pmecjGofaZhTBhjR2tqn/dxPWc1ixTsz0KTcwN1DhHgR" - "HkPSZ6bh2Ek8TwFz69uoVxmpOXFVVzhqSwqQnPjFyaRiVbqsdJ0APvBBEQkom4SZ9WBtsqaB" - "lYGyqlxFz1SRGgokDP3zv1pPkl1P83NRurtD6MDW8qDeh3TVafQlnLh0I3t/lVPPOjWWWJCv" - "kOAgSVuDOp9kB/12QJcrjkv6+XEDpJdMW81PfUb2k6US5dnWwLFtfwJqitx64WPQmhHZqnSW" - "cNPiasQ/fCsnBuM+mN6bG7sz//ECkWbURHazeHI/1PjT3n2rylNjW8+fCm/YqD631hrQB8jP" - "peTJGk3kMzRE2CzFpWD8lDIOs6mY8yUgkAdrrytLwP7hI6JHjpGuoOK4lGs34qNqTtosVCk7" - "n/COurvef5ru1164HFfJBPKbbcxmgrhHIKsZXVUwz9QwvBPi4cs4F77lVjLlz1Pf0FIKpgb2" - "rha6yKkGL3XN24I9iMGbMt+VQxMPI7GLe1xSUi2KIOR+wMre1xH43DDE7co5ckeYBSeakeG3" - "6LZS/RvcNGQyfNG0SpB8KIctyniF94Bw+cRUCvvyric/Q7eBrLkHH4aUx4wC7Ezw/Cj+hZXi" - "cbTPkRAWiyst6RplEXT50pxbaqeu7N3gsRqaUCm+dzrXi6JzThC4zQhyJ56z0zhyyfJd2MDo" - "ZQvicn17BBLRS6/GXMl10L5QW5DVCwP2E1EaAUqb+pvz/YnRUh+EE5kW66tWZYI0LdSV81Pg" - "RSIHZoKGZISFxtILhIUX6tMMBMm5x6Cs5nSf1/MYn98EZbt8zTAbsSvLo1zqFvb65nrE4IIP" - "cAoxjZ48uWDuWnr4z9cOPXEKzYOCeh42s12JclqviD8QzELpyALwXvwPwuHYNYZoWkEqMhdu" - "xtzGOhanv6ygtLcXuwRb3ww9N+hJ8ZtPbWWLUCTe49f/U3y+jCnHUCjRWzMpBHpZmUpIHxAP" - "YH2mux476jQy63moyRvO00jsbPdx8ds4uQ6KfX7bQqowzMe3D1uuYEpZRoCTSgUpYiPluhZr" - "6U1dJAyij8ogYfYjsO6ODtFgQUjlyHedNBttwqlA2SCqdB7KAP4Geff707+ZgUlddjjJX73X" - "O0Nn+mykM7G7S10yjHDofgJl9YP5XPKxLyIZqj39lj82mvMztH5vN69IEm+X8u18qQT4SroM" - "ZiNH6yIbrpEDvtVrmVzlYfpOvWSXjReBsYO9EmFAQ3hZvTJLFjUwCWFaSiihmHrsTcqc81OQ" - "ah0UvMTgdLHBUjBbNexSiuHGq+yCJh4B1b44UXJoOHIfFRqMwlqACI3ylYfEKPPaNwlvUgOM" - "pXIZz0iFBRFQDumukKXEYzuuyl5jyi44XjSpSKpHmc4RbVnsI4Dm3wdQAUJG0js36v7GBjWV" - "bBu5NGjzQhmd5vgH0fssl6T4x+LCGrEwr9lc066mm7lRaMoU/qIk0Eme4efRAkdmgpy/GA7s" - "MCCV+KqusUG5Ti8CFv5VaVZEC78F10XcERq5r6IoFBvtvCeBbbF0511e8y2qtXXELPy24a1Z" - "TTgr0x7pyPHll5i/9T8JaepxpGbmW8GCwaH2xvRWfVs5LltVXq/QwHHC03TNDlnIadn6fO1P" - "fODHVYyQt5JP4n9f13krNx+NztEadpd07HLpkVuBdtsX//xHTACfGNZMjK7OtZj6QSZVOwkZ" - "JUPS/fkdWTUO2hLBPPS81PucOyAp9SU3poSXM+eHaAkEpisneVdQ0BRKsgEeRupr8zXwzxCZ" - "NoLI4E5a5HBQqHXdiPUjhFounLGouqc0tR7jYsdj6+CKCgoCWeXe6P37Wf3aXu0sMiF7JCae" - "oOjNtdY/28YOLDkOcoZ4om+06rKxP6xNBAdHX8HDGSfu2buSJp7rya0UI4kXR9b8tAvCSvrU" - "dgOijamnAuyN+kw+9F2t68smvRVvoilpfz4l6Ypmoy/A7dh+oqrijmrBcXXor+MooktjncEr" - "dqqX9/zZmD3X0cac/swdQOeL0NwfEHPnjRZUSUdBu5JOR1Eesi8PjV+QdEIi4FvnI5//Q2Fx" - "/F9TWBjJ/6OwLPUGnO5k/Rwiw1rh1JaIpiiI4s2Ovpu3VtLpjpD+dyz4z6Ew0fjsU0cY0Y9N" - "BIeQ4HlkpmR8hOuOojA3hNEOQ6WhKNHKTZ4AW3O/1epX/RbrFpG51FgHnomaxjP7GjXL7TIG" - "f+Lcc/e9FV1ray3Veg7QpqmGtWVYt7afqh633Y5MI08NWU1ZvM5k+9OG2ktna5sOstYLg1uh" - "nAjfeZVWJp7jiuuUEa76RQjKDHPIPZXXWelvp2CfiTb1GtIaNpW2dTc3A+XT/eskWEStd0rk" - "hdfNaNuOIdFjsmMG2UYv261OIQaDZAHl6IBAaSEMhwJ3kTM+R4mouMs/gYYT7BUA/hwSNLH2" - "M0hZneTMPH9KCTxgFLHBIRf8bfrd9/F9WLnOQtsYaFFMRDgJOIWDZI38Usd/Ab8JaQcmXGHX" - "6NDq3W3SZp4ke6YFfuYRR3zdmrD1ARqUlEavE4jYxeLyIa7cW94tZNAary2f8c/D04Kmqg8r" - "OXpiHUX6zGhJbNynzB/8s/XHGUxqGgmw8Bf39DUCg39LvF2n4zpNXPJnv9uScB6RPgg7D16m" - "4z5yz84fGfmNiPCfODue3To9lb6m9xdq8slhF94Fm3gL24dm0dRofCbURHpxDRXx8PhzT6tV" - "62Gna2hodf2rVv9WX69jIqv6jjobOHBxgKp2yJjyQuh6Fqb+hV4N+1K43UeF1H/oirrsWvn+" - "E30jSpSBAzOJ+RYLPm6sjEgIZQMhIQqS+aAYcjMPQRz+DNbQawlKU1zPxN0ABgqkWzWMn7Ej" - "c7U8qQ+zt4SruReyRBnnqNuGm1H2j28IhaFf05N8pWZr1PfvA1qhR049F5zRzhgmPixZx+Yy" - "NHtpFyQWXjuHb5ABr6SX6eYyTW7ZwLDU5SOhOtSCx1vnxAchpG13HkW5IkzZCz8HUosA1bRv" - "K5xoonHqYK306p/jeaA7iOlSq+eFn1JSWXJwWtqnY3pCkhLobzNgX+h0E3OQqASzT/oIAQah" - "l/WyghHwOtptQB5UTQhg2ZEr0gIS5SjUo80iH5Q5cVQicy+y9MlPHTeTA+dYlfaRy88ozL0Y" - "oSZUofzUtESBJ/Z2c1IfAPxpMdY3woNy6OFSuKkfx++/8Pjl7GSEhUqja8Zj7gk9NV68IgWX" - "abSED7+0GFq3iU1KKXNpayK5tDx/92sTmNmRcR9CRNIFodecOYMML6viAeIW8kVuZ8hRqDBF" - "9DgvtYGmd/gSF7FcS3rIJiUIe/8gc3FFAxNVwl61WqjQxryJ26i6XGmEHL9/065dgtugFD67" - "1S2KgBnl3XkpgkcoScSNvYAYYyNwLJ0a0IM3Hu/kcCHXH0+AgwrMV82RhPtTBABFCWhVSxgm" - "L+j0FEyv/GE3+a602grvOewI45pZ4TY0qo0RXu9Q9im6e/SHfLHV1Un3r7K5tBExvtivbgSL" - "hVcGJn0d2uouZ1kJGGhLnZEYQAKgumERiKyeUfmlcaByZ4rsXL2UANm8oC5tr765H0Ti9ky9" - "wYTU0gwf8TvPAz2HrDy+U9Y2hB23E5kwL/xnQ8KrnGSYn8T0mk/1S5kJBvXueSyULR0u9BCy" - "EiYqgvSHKrIPjVLduI9MNc4wbqiJ6qs5ni/ZKTJfIQKwNWxmTJbcxsZILwH4ZfMEeVnYYboE" - "J7gwr4nF3PqTqgJunfbH3X2iecWk2KFvx8d86KPIOqlQawT+vKWd4eqMpAxz+PIO3q+pEw4f" - "D/5ZxRPZN1PuxRfcjmM9VEmyMdtNek+JSBVucozVfxctsrCSn2I5itHv+H0Cnz5W78Z5E5cz" - "f4xFaswgH2YDQJoc9LCdIgbI96Hzv+aFWMYV7d8wF6ueKFjIdCV6CA8RB5rXo+fMqUjBfRC8" - "gBfzaYtiyvLQA+jRbSX6sFmuwIG5gh2Me+Jn8EidMzPHooD9kzflDG6jpuM62qJdSnbfb+GU" - "nLcmMiaD46Vu0u5rSxGvobw7ePJorq0XWDL6Co+TQaFltCTPX55QA97EXPEXmGuMWlOCYTWD" - "CzgwOgt7WQI6uKUJNZTJGlm8kj6tE1rM0Yk6CMqmoZPQbj4U/jE+gARhhEFzARekEgr7XUX1" - "NIo4FmUHUBm4tjfzBJrMM9qUaUxtfDW0F48LdqUdfo20wyCuH0OOiGP8yHMnqaiLxiiUDgHG" - "QuXx4VdnhLvdWq4WE0/oIX46IRNJQJH5r2BVQrjfJiZ1gi0LpnQ2U1W6FIu+NBRCt6SYtN8W" - "drpyPmXO28PeB45WzzMyUGIrHT2Gy4itmh6jTKyzsFihZz3gCOuuHAZP79ALELrSJr6Rbrwu" - "0xR+kY6T2gXMYVhWfxaVesFb7OOPvWPRRbtnwBtFvAUbO0Dt9/TAk3hW9PphoyaEOBW9E+/N" - "/PW2+MSGUexZE9Mr3KRDvHvsZ2Wv1c76uaKWGT88Fk78jQOrhtJeZGpgCAvjQd94L7mkXhMr" - "lqQAIShcwqeZqNKp8g8iXINgFMaBBPTYXYmCT8j2CKOdS/4Z2k3VgmlzQ4ayhhAKH1yAWUYt" - "mQgEGssqbpGm+bdFsIcYG+x1iU5rA3i1005Q239+knBecr2zhe/oTmips68UkLR6l+g75mX0" - "ij1u4viJwe+C0OEU/espyVO/paXx72eExROvsyxrM+DbMWUYhWx/CHmHHkY4YFpls8tYzNAM" - "i0hhupJV8zgoNKIwossqxSLGCtNM5xczfCeAlJD1A0rld7GLxAnclOQCa6skVKIfRMshP6iV" - "aTee8BTx0yXpeHxONIejH8SxgPijWwd3YZEdIMWFGDWI37S5W5VfM+fnYExJKUmyikCBCAIs" - "QPwinUJuoIcHXPOUcW6FG9eesQdv9xSlue0saqWl21ey8cJTkt8+PU5wwlBIGVgVVzvoIVZu" - "yFIbI8TRHvMdyBcX4pf5rSQdPb1uiEyxJhZr3nVnzoASCaVw0dYlxCYlteX7K/71T/H/jTJ8" - "hTUE1v04GjKNKQmqNxOyc9mfV2DMgWYGtLjs1Zz3E+pQ4YDg4Pz01Lh4z/NdtV55z2YeoRt6" - "8OXQhEoFwmvbjWv+PoZoE48szWKDsSQ2eMm/v7BFmAt1szUHEX8y6Tyl9N7xkuO9FIWgGOWs" - "mtLCaqhP8TLSaVR7ct5fNxdLnt9T2131N+D+rG0j/A8w9TCfuwEJjR9LKCgGYBJdmNCNKeQ9" - "bqkH7OFAXa65lzr+67HQHkjwHITFYlOYmd/eZaivOMbVRT1grAXRceJvr5+HeukE1tgORT7M" - "b4EFjbGjZfZGujOZV9gDR3cQICsFyWTMfNyjFPg6njSSAiwRVriqIxe3LU2zoJxOIyn3brf6" - "1Uxebk0omXSP5oE1lO/7e3cG7ttEjARqHkKrC8E0vXDio61Fgbway5IqsnL2G3lh6mbiCYkD" - "zjywCw7wijvOAir8DmXr04aawlj+6uFYam18/nUa+/TwN7KvCRjKe5yDvp67c1WUp3DV8zl0" - "9jX2fINVzfIfIs1gvXxI2Aqd3ZY6WORTpodY3dIilfgxfWpaVrfdupCpSXQdAdBk0atWeTSQ" - "WyGeTyHIi/Cc1pBlc5kSxvIDWjziuXdH4J9hBMwueBUViJckxzcoEZndtmmnpsMt/U+GCsMo" - "skz/kXa/UhH0V6Fg9V3t61T1coLAAIz6RPWPfrBIXS5n464e/3UDlhsnM8XYz64bFCkxAjAd" - "rtOOW5CTIe046VktDaVtNWBpcX4bicM2dApWD3JXfu7H0gECcu3eIODqmRtj1i4n5werB6yr" - "N9Ox4sluPEmi69qvHyQrHulSy3juCOW7GcB07Dd2BJcd8ZTKoFXlJgnlrGB/UF5rn6I48Cfo" - "AupNKArLX4J8gW4ezl1LQcYsZUhuqcsbbUpr9jr7A2BleogdaLoK0BE2S3fjdBHoLWXN2nRh" - "7GLgiXy4JYNf3/xAXtQoVCXwfnbVrizurdyZuoTFMWlQr5C79KsolelIeW+z/fYrjHAhLQdS" - "rcXCAkVk+hw7MNQjPyN+mJQYhd1oBd/CTfrak/J39Z8RXiT1fNRwe3Hr235vrDRKWqtM4k+n" - "2zD7fuhcZ/rW72vkTxf07qEJ3Duh2kIEpyMWSlrqzEjn49x/zj2S44HI9enypJMC6pNnSSo1" - "TBSuE3RVbdK7/e2kyZb1ry6C1W9NxdFH7X9bHI+Q9OAPKqq9XfW8mzDjHj1ThP9GQacuJNSj" - "5ljeE4gVReAd+lPzf4560TDrE8VfhJ8LvIu7vcPOg5Ll9pXepaQLcZzWglXOB1x6z+Lr7Ci9" - "z/tpj5qSJtv/7Zshchpig0K9AvUJnqECUNO3DDEy78jz58D3logx+jaD9ZcwkG9bGKY9gHv4" - "pquG+YjzPqZHLm4dZQ36J0O5P+XejwvO9rfqeykw+Zct6D+NTvVi+d9xJakmTV1QEJNR5Cmh" - "Y2mvGUmoWskt22w6GRNNYx1RxFpSpNLMY4/IlO2eJPb4YqLjW5RUYVlaNwu/pX30Jrn8sv8Y" - "HGmC0XIwlnCaYtT/53hQUyzNIT4ziF1dGHVT44uqYR7tcI9NJTgxE2ky8Sr0Fn9vmGmhiYG3" - "+Dt/RXPFOSNwdHPlqvfuDNV2tboBCXxAOt8vAncqco0TPb4jieFyOy5sqlVpilvujsvMJs9G" - "OBHJMw/n2y/mVcrVbml+Ao1q+I2g/2hwnvvvkZLvooly8T1ytNMM81ppIe+DiDhRjQzOkmeI" - "hX1/J/3AbAUsigY0SEKgq77LCTlS0YkRUC9BCxl+hQ+IzlH5uEOxcANdmsIQGH2/UmtNuUrO" - "taOMp+wV0sOqfrMH1WjuD1rIXaPSN8oYvnO0jP9etwah4RqT0pHnghHWkjbujYC3JA4goEuh" - "hVxl3DVSQ/V3K73MGUghrkIwILFjmbclIGeWOFhm7GSVafIntJ7Nw4JIyO/UD88+JgTkta46" - "Lrs+Y4KpKvnaZAsJYjAroAY1wBpnvaNg0O3K2O0QXl8EvvKXIv66xdVmzhW+IdVkLcFVzIYM" - "pcPVz/EOBGX0xwKOnQgOJ+Fj4Zptdqr+Zy9apYlETtHduSsX70RDZvDHGd2Nzc6/KI0+jhYy" - "ynTpT9Nu8zfUXN1+b9jFEgjPrbo/f7f9dwtxDg3VZ6nJtI4bpwRTyJN240OcS7AExGfcnYaF" - "6jpUWNU+JJT3kD2U8BAbMhmoFGSv9x2ohQlHqZKFbz7E0y3LHcpq8XghsGgfSvtTGmXFpxS5" - "N8SUC+alGpfnDOPOKRkN/nbmQYEytkZeuj/xYoWrHHUDtC9SAU1vpH+PaMb1lQ4Mwrm6amDY" - "EQcmkBbKpC5h+HfnX2JxvmtNcN4Wpg2igwJAbq/EB4V6ntLd4m9gtmkVxur8roMq77JIavwy" - "QVzQNW7KkkFHtmyhfkliT/tZNPDCeRZBQop+ykRorxIxhYE/3YonMwGqC1y3yCVVnaR3xvPT" - "zlZKnDGaiyPG2uxqwxXd7UWor/XueIuwYO+wPCSeUqE7oHQUHyLlx8ffeRGhZtq9VKcg3/IW" - "D5N/ENTCeUWp8KeJIh9Vc9IrEsvk8F7ET/ewXbBPjbGsGCbi/vczy4Cnw15Zy68auPZhua0u" - "yq8dt0f026Ak+xqbKirM5F7EqHfS8lvBN7peCI4XcQELHE096HVFSyUtdmWmLf2IFwMaht77" - "vXot3WqFlTqX3E6SFw+88Rq6KMG9lvZo8StZ34/rYOgoDxQKZRyr/8MU4fy/Z4rI/48pYqSn" - "lEgl6AccgLUs2qgOxW+I2AjgbepZ/CXu6iUyTmJi4VMaUZEVAyM9KSBubOrENoW5o7+MKRDR" - "cWL5gz1uLKpYSiZA8c8/XmKQOt3X+HNf9HOZ01GFIQf3i5ndPa+Rv/e/mtuzY11N3jv3Ba2u" - "upqzlokajeDcHy0phdYPdd3pBen5WuraRgu78JwPtINv+b3fe3nVSJWlEw51H+yFXjiTzVgP" - "53MVNuBQh/982q9HsDOoWBxGsTv22skq/RR4nIDswpww2K5FTo72b9i/ZrR4hm3WuJSP+sHv" - "PiMIaAsXuOb/mdN9zmK5f27viyRpN4IEQ5IjQKJfoNxuZdxWU201dLdbxtKdIzE6X0yk1a9V" - "l1Ojtvx7meAIjXWNgSTZQa2tQ+sCNwi7ZMR8GI0x3DzkerJ3QeqFjJHYr7JvmgIaEbbwCR6g" - "vGstov+g4frM4bfR7db57YyEJdZ61GjyjA8QGOXt06D7Y79383sHdlCiN3hnJQzAmfJMfzf5" - "l1S70dtiQbIP4xSG8O9at4BNwcVUUnuduOZR0ETxl5FOpbsgXMcHspkf7PBvdl/m8f49eg6e" - "bp3VPnc98+KQ+9/f29QLU2dJfVRnzfXgA5X/FS11kaQ3s0vy1pq3rZ4I67aNfOYh60VFoMqV" - "58editGQxaplfkFr+CcI/xagF7UPUPRpPvt7TKyD92YQsjPgxLAhlGnIAFH70bVn1MBt4ul7" - "rBz6HpiZ/i/w6Myc/YillKsuR0uVpxrVnqKKJWd9f6uAHCWZNqID/bfija05vNHUam0aJ8Pe" - "sS4BB+dFGOGrWQW74y81AFn07Sv11VCR/5KqxPVdkX/b41Bl/KwA0q0s8MOdtAKq7CifOPLA" - "OCZkBfrS3TC5N/cy2ijZeTA/wK2z8wucKcNZg39+0zWjzjCS0Ni8PMlXKHCN+v5DrImnOrIZ" - "64W+gCd/0M8KuWW6zJRmTU05cBMb8qUZHjPufF6kaeOEl2KCvfcFDTTQDGEFPSxFGmNS7p/2" - "sBIfOOQtaRWogatEj3xhn38b2gOn7g8o+s6GPr7hs8kAMRsGpfcSjRpkgEGFG2ImTVhnKGQu" - "FZzwnoLA4rD3PZqtjyIKwiIh1x6BkLWNvSKMdg9+6Ec+7OkNYJEUsR/QUhm0wJnSUiv2eGS3" - "oI/xJ2GH++c6e8ibSfSAU5qgAYeIYOlXwC1e9IKfiBNTMilBI1JVBBx46EU5f67RusePpedf" - "fd0lquRhTsqKtIBhFqMx08R0r8bGaIjWdzifH8CftcEUqbecZ6eG/uijszsiM99RvG/UH38r" - "GSPNXmYcWaolRHSrPiZtXjP/APDIq9iOectYErkVJBV9QI5Ti2Kt0aMemJrP3AWX/jyAL49q" - "YXnl7qUK/BIV+x0Gf5sL9lvbtTWeDIOGehy9xbgPr6gUPRT2mzbw1U9G9Hegx/a5tWbSZe7L" - "gUQhXKP6yaqXvONnq3HEmIx//R97GsUFQgZywbo8MXjcGJaHeG/jlXm/yLTiOrhaRcFZVsgz" - "78aas1vv8u8IaNC0YJZN9sM3hjaFLSPkP3DasdwKbPfRMHNLZ+2JAbHlgiH2KVaJuiOZY8E4" - "KgfbBtBW1dcMnRytMMzJCf0377UICNqK0CDv0zmsY+QUBY14LV6xMJ1kXkrofhLiEhi1r2pY" - "tWesJ5ZjI4xG+jIxbdQBdnsAnPVPKiYvPVGQdBc0QCd/xXTPk8a4HS22wHvA45SAhnRlqwAS" - "nYVcoJ4yJdPe7aT2bxKpRygpd6gWu4RvpYKZjkgADGkxch0ScrWY3tTJYYHdjo2BQDVSxNHy" - "EH5Rr2mmHuuzEpoC89wrVxREfbCEaNfuirIr67Si/zApRx1WHpFACNIMk6jhbWMny4yWHnIn" - "I+U3+z7hIRnbaFghdTBlyBrds4qt8ZSCQIn83laSxpYYe5FDwdZh5LHgpR5c0Fmz5A4axo2g" - "mDKo/3AsWNJnKkbIBqG1ObErf7pg/jMI9IvUAOwDfFvAMKZdUmhBsgdDyd0Tcx5i6uSpMrjQ" - "+djgTP36dHHshYFKzS+BErd2xbOUU+2mZjIKGHSXbqXkCkIATifA5dlHZmrq5lsKcjUMKWtV" - "d80SS1nmuKKRRCZEl/XmUTYpC66XJj7STUm6dLQ6lMCl+XAFfOf4JXHgBBr+E52UmmmrhqJN" - "uM2ChnzVcCy4BzvvbMmunPCIDMYzndkTBVmBGjoPSJsvHBi/s10iebceeW5M628w5kTajcMK" - "dlhuX4+sveCEq6VJiQ7NPFTbxRmFJIjwdZ7LAHzboihPWqHBaRC7SEO3aHdEijDME8xZz6Kh" - "5zYN/lziVQhMNPpK8E/gBnRaD+97mWudCIYO0lUrWadz8ZadHjCcpkw+gu7+2jagq+A3pqTc" - "AQkdg4OvlUbtGVVDSKmVYcc1VhB9GHF8Ska+JCyb8sE0PGdz0WQU7uVTnkniVybWfKZMZQo+" - "lOIkUh8SM3xDmSiGrwt0ICWmZkzK8nLlouDe2zuo64Y0lHwEKYkK2M2qUutt8bycfBcxlo6d" - "gjmOi94GwI2xBFBJP/xu6YQE/XLllYEk1GWINOpKXZgiINs+wGNiKMSpBsSR7i5vhbeuBmni" - "ScDoLpT2hj2/I4jLp6qKI+cAWPRStFVkYGEYhpBCxoM8NvL9+kiF24Bykqcln/4vO3ilmjt2" - "UtPRXElcTNFlUNEZjt5e5r0WKRB5xinH4GE5jBZvJnSxGTCbHErreCMThFLD0Ng2ZktXJUF0" - "Xi+VrhBIERN9taff0BoERxnWS1XXRsWEK8woLk7gaAEXQC9YcLgzFc1dwoJT+wev74ma3OtS" - "pYzLrWxo0gk1XxTunAQKbM17uXd18B1Q4cMPwo+/vwDiYU9GzkAHljw1fmkR7GXNrI5d72YI" - "oo2qSORnx/lHwgH5qrEiubnBDi7Ss3SkNPCjFobwetkIdmyXesizGNRsJKzP0QIB2layb+WS" - "s5xCqVgjTYj+QgfT8z3qWN2+yFRVy2CTL8VP3mT2i48DjpROGuxQRbj2ETZcE5UOsttbVhNW" - "dzbM5X8F45CulF2CHCzR8G73paMVeWr9J5+N2ZCA/Og8MDtCufZHYBlLyBwX6IESVkzAC+yi" - "Uk6xx6sTJDkog8ZjljQ4Nco4ARIn0DyqHHaQM5igceiCT7yKB5hgPoa/0TzsheudL3ApX4tu" - "KCqEbzHS/8nxcduLdzEYd+Aa0Or1Dr68H/R5UWId30CbUkgiL4PQ96JRM8Bu5E2H9B/ewHDH" - "hvHW1Ss9EiwjhZbQcVan7hdXDbPfFh/2jjbCYIBJkQynyokZf8b3K9Nvh0R8KmxyJzXj8mmK" - "VBNHJo1GwVg0iTmvl0NA5aGUK+KC6voBlvA1E4/E8JEqTcRoDotXxW8yq0NFAKkMWB5jIuOG" - "uIC4U2+mNmBYJSN/iBShBtf++TKEIqgE2CnG1iFJ3biQkOy6m0bspmdmk9HLm1mthODvF51k" - "4yrQAtsG1395JplmUXqYMBG7qrzJ4Ew5WbMgovMfkE4JDkPGYj5Bqtz4cOTBhI7110OngAKV" - "BV34EFu8UlBoKZ0+RKOF8qbEwhNIQ18w80uQrmIMbZJW25LRLxHcROOXItzLYdeaxsKOSffI" - "ZgrpEWOUo+fF2l4UPPjStRNED6AGqCdk4ZHhVmDkyrtCy66J5FsgctS3rUSnkG8X6DHjrPgS" - "AHlsA8yE7Zwemq8MhCvspYQC5EqnJnF3hR4DiGsRzYJy3f3xkCVpuwT2vvMRdLhsFHW8zVl9" - "T7J6YHQWiCHC74iLF4m/UjeIg4n5Ni8TFSIVDtIKt53utZD3dvxMzC1oeBbFjcYOHSTRGNVE" - "cQQxz/6VbS1gYzRj1WE9/phoQprbVPmc4op3aTmaszOQmUkR888g/1XzoF14czg3NkSG5sN8" - "GcNRq8+A6hOKFg8grYFBpd4g6CfQswjHyaLY+WCERyvQ4KQKTJiAVMHH+cmeYKXiDqHEjRJy" - "KSIjrjK8DJwgSCWZuoA/Si2aMggqYclTXvWeUfVc6+BPDQK1RtoL+nVW5rfQIYhCvayiHlNS" - "ldZ3LPa8RdPu192l5hXylREL2hxG9+xo5oJbYsbuHLVYnWigXKNRCfGBhnZQ6otqyXPJ1Vva" - "ZONOoG+3fW6JwHQfj0yBK2iB8xQwOLm+QhvPH7Dw3IgrFQ4m+DOsQxctKsHCMBGxFbdN8ED1" - "MbdS6PzWMM2Fq4HXaT0C6h63nM7Da04Am2qiwMHwNjO55bJR4CbnCNGY1BOmrd1dXIgBYxTi" - "SHrcoAAIdH+TQW3DTL3elz5fRqx4DIOOfITwru+LyTUGGT1ctz9ujqOapkzL5aOnColabh0h" - "Di0SJVU4/0bW/Vzer46eZP8uwUKZjtoxpXkogNzy4A2zoeO8AUtI7HJswrqiKUvZaIsdiZZ5" - "Pns3kMTapHkg8uih/RfV7MiqdOBD+PXoUq5wn+UXkI2pXzoLE4/Ubn9c02VwTHErrst6/wll" - "yYJVpOWYk+fpuwZICOJlsF3a+82dmSOiaelV5Xr4mdsztJ6ZkwggNutgqaZ8qfpTHa7K2yOk" - "KruRuf/fZZqGIdG6a8zKZzY5byd0X8mgGkZI/4zznS30gRqO+QOm1WFp7onu0ewp/w6vIhaH" - "BixYfVcWfJ/32uo2VckIZ8f0z3pdq5kHtQvskxrEaqYDSwhLwcon1sA66J4YpsqAnhjwj4+l" - "yB449C/JLB7AHC+LVeOSUjBmHM7Hwom7kyEOeFvXZzsuPP9MNIcvpKuuw5o1xeqcw1DXj+f7" - "GZzebgtr2lgtcRTMVFW6yqPS9uWTHq9hGYfxeG20LEkw83PU5+9g0ysgra6BJewPU/Q7HEr/" - "GPELI4gDe9vVKbHvx7vh1B+w7GJvKzYFOytC3gEA4UswmllsfxIf9GyzTUEpT2rMNxGapvbe" - "vwtVEGfhbXhnT058q8pP57baeIGI9WFw3q4vbJd30Qyt54Vm+tJ0x0J6Rqdov6h9Bk4TJTzX" - "MuPYFHDn2MOX17UlvGTj34HewF4k9PtD/LW6k16lyp6coYfMIdOCiwllwUuHKqRdC3tgVoNS" - "DqdTRrXTGJqXl9K/phsoXad/EGcmTxAZNKlQKHABZNQYxIFCT+4UFrv21kWwsNycVMS96CP2" - "MCSfzvPSvfZxIXJCDO2nsO2d1wGw/Y0KuWw8jf7bj1SzaLxAXmwkzOEffM9hIEOPartpyyES" - "6qWVLJc3R1rNPWC1QlCNgPqSg3GqB9BBS8pRK+EpMdlbLBgqEzCVO5koxjCcnSWRYURLq0lZ" - "y0+WYweB464TUJZoJoyzhV+TAMmX2JgnMNZLPgSf/YBgT6s6vExB9k0OcrGtvyTVdFwD3/Hd" - "3gzqq6iRy2TavTuAFphv1HLJe1cTphU1D18hZd7hQfDibFAp7gfauN7Drnh3iSeRU6vk+paU" - "1oTilZ4JrG4oj40UH13vcCsxBs/SIEWB8bUeHjCAsbRi3i6E5EPSa+5puSvEykXhnff74oRt" - "/D9yB3Tm7JO/CQnwy1ALsaaaFxZusxXmMSb1Hwka+cdHKUxRMlVbw5dXL7Ps+N/qEqsnCYDv" - "oh84Wke1pbbK/4edwfV/zc5Ah/gfO8MxWyVpX9ZtJO3IRx0Zh+YvbWao+OEM79yWhYSBg7lo" - "NDvtNgrhNQi+WJaFWIHYRldkxjT0k6hP6bdd3aOJSeB9PT2yy5Wjus2vmdEDjo9vQK2anDYi" - "E433FJyvnmgfzHJ12VNSfJ4Ioq8xHKVXCXlpamPz0mPvOl+OhC/ZjYwyruTTg2ut8opEvNoB" - "E8g9DkRW2bTr4LSeIyjJPwO/GeTCgsNSRtxLZHFi5P1LpbX8c/tZ1QdXDlDw4CnCRs78ORs0" - "mEal3ARqWrkZRGiLo//dS4+tnOargf7MmFV2wBEHYop3zw9QK6j3doB2l0XgMZguFV3MLdN1" - "/hAT+yweq6L380W52s441UX0tFmnalRtfkq6uWFauKB746d2f1AMo8r+wt7sOswX7p4eDUCp" - "vhIr4e/EiFNUBkYle6xEASTp/3RZWj451f+uD+YKJF+ef1efBsl36K5L96rb/eNpH/uuo39T" - "y7B0PlP13oU5OKsYWFwQbFVjQ3zZ4MBxm1+rxiskgUQ80sGfUuKpxYVmDV9Ntc0jW4oyvGvH" - "YFH+2xq4K4mN+CneUpPDWNxagGkZmJ1zJj8oKW0ZSVkdI2b8oHq0jcnxOwlRbbmaFuu3mpam" - "kJy5UYrXrWCzPPkoh6kgZZH8cXq7nHmxetUCD/j51LvJf3DlW3RMRDgItq0dIBQh/b7TImvo" - "lkEbY7WEQGkI/3iwBni0a0JUmRlWzJHXbuakNKHshKb4huDEyy+IFx0bU2XH5gPAECykQOjT" - "RmikpexgHDYar3Hj2SGx1MQlI0cEB8zZGaqtRNdhrRdqWVMQw6TEknU/Zgsx1WjkCnDCpM6K" - "bCb3rAz9O1wPctBwhjN+Er+U3bI7xrN/+8lD+OOubf7sABNPIu3yfmZa4qFYqvMCFC/wfLhA" - "stm5VJ/7dmYkqL/4IsMIA7kYj7zYXehr0NHM0Zu8xVqyxuvqWKSms+RYFf9IPGxb/U1qkZfC" - "Yk1n+kIl4/FLVUnNJIct63i98jx30q466F4uqEAHSSqQ33OyLRujTxSOfBsHF8v1flqyyH6Y" - "frEQt9pTiZrQa8HBk3qqjcb+WPzAYa64sbQqttmkEUb8to31+TKKkiHsQBHqwr9nx+8X7rU+" - "K3D1pg2Tcowgb1ov70vxgZYszEy15p6+nIhYxuGdJTDkvIkUdy9s0FL6k48lUDJItiQQQwGL" - "BqqaeBSqoYzMVZRZTgK8jd+iMCQphOhkfEmoNi+HpcrHLjvJJtaT3PRdEabDOJnt27WNWA5R" - "Y9ZeY9UdLfgNtCXzyOq2FMLG4HNrlieKyfSJmoQSnqIV7vuxPf4JW0CP1y/pWoUuLt9fEwy9" - "sHcTyfOBVKz+FH9bfF9woOjmtJiJd9xJa/u7tHE7BbCc6NjdsXoXeqByrzweAxPn7/94MwAD" - "0yLyPB2m9oan1ukcFOjsLSKSM7uq5vxBdCDWYm+Fnp1He2uz9B2m278pH730ZSmo9ncQgF1K" - "Fa+7j7McOenI/K/qbiMZIUQH/k3tH5U4CcGIkJkvVHXtztX2kg/bAfFf6mV4NXNIjVXZe7RF" - "RFxcMDWDgjTZS3HnBrI097ECUV4sMPBN50mKfhMOf7m+p3ktYQ9I28nImnnyoC7ujt+ZF59N" - "sn8LLH/ec1Wc0E+axCgBHWQdS1IqNC/zUiMTqwubEcKMIqhRYOYBBRxfmSjkacPb0IF8PeUN" - "syhq3vV6iYLVirrvZ7UyJYZaaD6YDXzFyISFmH7AjE1sbVBZ/PgldEwnbd1oDIo+njqJTSzc" - "tOIMLZK6Ba3kiIok1F7RjU1BinSUFo1Bwr+gIdLtfb1+uBtGvFNKQtJ+SWV0VKa0WxaKUj+K" - "0j+u2ts2opgILydq/hSnSLQ1l/eY9OTNYi13tpEBnMuAJG+iGNKDk72Erbeb5SA7UI+f4Vlg" - "nH7xTvqlx/FYhHBbk3jYOpPn9qB4NXTMo2voQtqENFVz2P3Mtu3+CrK/TeCCcn12mbXP8uhA" - "4MkR3mW1sEyyVtEqyONHjhmk9xxOQk5/HRqrKHlR1Rd55WMMjlMQYhaj6yWuXEYg69ZpnqRH" - "CJPRgp1sSZk4pbHvq3mJHWa2TxmS8klMPJCLP6DyGixehICl4GoQU4g8sI4Mpp55JJTHEV+s" - "GAP2/4i6dAdQCnrjdEjVR4tLx2goqbco6jqU+gk6hQ8je6Fp9JyPRHnQK5dVU6cWfVzKp5uk" - "UqYVAyfUODaDQBeZ42+xCkh6siYEv64lNnQ4rgdgEgrfWInYdGpv+G+A/ZSnfbRrcukQKzKp" - "jUxkQduMTE/A/oESEVtNBIUWKa+B/34Z4qHuokDBrb9zxfmAU7nL+GAj+0CHMRl/lkWUMoXx" - "R/GTeFV4sotqEuc2Aa95skiaQbSBQwDKoI2KEQz/ubknn9qeP1a0f+sbS13xM4tmFLTcsVg7" - "cBXO1RnkobLwZonqRvPCLKv23L3KExTW7YJ8XIMxZrkWngojBRHOQntqHfM1QtvHJc86WCRD" - "D8LY3Ijthm9WeCGpfuOi1L2pelaOKsElv7oMniVNKQRJCtJXyvTI9GBRjPBEuyevIqivedJQ" - "P0Rai6pPA2K/GxzW7EVI4oGaJSDtrcIHGVk9d+WF+hPZGrAJUaH2PEyOfzXGhAcdoyfTNiB6" - "0fHO9QCWIE9QjcQJI57tWrsU86jlMZyAXPIZXFIAwRis9nlW2R6MJ8kduwNb2SpFTwr5XwOe" - "Z+elqhR1S7A8CEOSZTU4+GacauciuoJAzTIzXUMOpvR7ZHywfsugdBcbxTx7S+y9Qe1ebLlj" - "/meb+ED46c0pM+OwqH6E1lPHwEoc0segThPPaaHj87/7ei1dR/mrM08aWnQVGDfGMsZZPlPu" - "qsPejxNM0SEWZqbXiLJJ7IkGPZH1MHKZn3+LCaqSRLJvqcjU/vAgy65bthT8P7ydVVAcDLCl" - "g7u7DO7u7gwwuLu7E9zdHQZ3d3cIHpzgEggSPLi7s3/V7j7d9/var93Vdb7Tp6qLjfEVfSYs" - "SMBn5zY9hsic7dRroUw50g4iRv1bMOQu6v1uX9keCu2o0aFk1cSYY7IVyh0C2vXbhzzZgBY/" - "LcwDypEd5i7vKPX+7n+7jwELkn2i1o0KpKJyQWxYnT+Xsa+BtCWNwDzgqJl9YZubm3DkhcOK" - "viz7Zqo3TxLbN9f4uQCzlY3BlUHbV+OUwd4gcvZUuehMBjWVVar0pR9ephHBEwAKvj5zfI3o" - "GNI/DHwdKHQE2yEOa+5aPqVHmhvVxtHSDqI8DDwYP0Sr9iP4QOARyyHjMqrRR/NS9PSNm2SE" - "0QK9lR88b1UKsGd0j9CpQn/749OZBG8iU5RgpqTBAWmYixaYnBik3Cz41HwLndh+YqTTWuIT" - "hHMqCXGnHYhnv74gZ23ETi2YJy1G2XS0h0k+rengZDCvpTV0HFeCTiTbr8Qf/y4U/8qD2h/3" - "Szaveg3cLuvf6FGFLzk4Xt21Jqu4IrxPUXkP8ECMTxlmD38REdKh8rXp87rpf8XHp94oJdUg" - "zWCCWLALgy0SeB6YAoKXu4OTomhzknBuH9YZJNVeR9WUMbWI8icz7bhjPiKmxXeeo16ALM0L" - "B6Ahyp6Uauhnsggy8PQ8eSt01eFcdVh0QDgXE7TPRGo75VWb8jBFkoGrS1dKYrUjBUc0MHQ6" - "vJ4FwbiOluCXAjpz7AWNnoolAEp1ejkIGwshu8gtGFcWHylnxqVKYiZvAmOxUhgy9GCEZXc6" - "5RpqlWLdisU3PbF3pThx6RnbtOmfhLw280dYvrRJ+jRkCbqtv4/osQtraYhCr8ZgjDPmOLeL" - "skRLjqJTEWt4vaSwwr66beqJP8dkAT2a3k00n+Lv8xrAZIjk4lMQcGj1pE8K4G1fFjIm4oci" - "fz1wTNUvzi0m6BKW8NsPe+68QOydlxiZP6uAr/Ez34IROn/E5SAZO5H05tbeu8TCz+PyE+6/" - "ksE65W0rQnEJrUSLdqGAMiYv7sdP2J6wIsoGX2+qZCgE2uo/HyEFNoaiYHRVpneB1KGZQAWN" - "5qNBBMh6+d8hacKBf2OdBQf+HMd1f0UcCIG3RM/wDjiDCKjjY1t8YlGeeB1SrZEOKMY01mVv" - "nn/hlKBr0WSg4LORJ9RQbVtSqD+KYMkxpt6vl9EOQUykW1wYgOh9S1UxS2gpPuijmlUh0QBy" - "4Bgu+E3D8ff9JQVurOGYVLkvC3K/cW5eRSU4F2lhv8erWwWcuJyy5k06rG+5uWS49rNdhJE/" - "s1jZ3vLzJvMtTxv3Nc6Xl2nk+7W4BcvVIM1O0frnXnKChMls3bDuU4rI6Szn5QbCNt4Atx4a" - "aJZyt/n9LtOKj094FHukA4fwGRbDZ6GdvazbxZroa2iwLdtnEeiNl9xDRYGSa0gnD8xHHhM3" - "T8bBQluSj9F1P8OmjffFa35u77v4Ju9GT7iy8fv+A3l+OqHyRpA88WHUwnWK01y9AxwYJk29" - "eyXxi062fnwr+VBb6zNOet0RE4CO3hL8qpkuo7WdV0ld7saCMt+8sE8eqPwxYGnfpPNlS0v4" - "ewZZWflDt0gYU8LUKvvLokn7r7LlPi2HGU7o1aVukInQE0teFUGS6+c0s+KRrRbXE1Sgl8bT" - "OwLj9zecOBlhsEN81mXTKx4XLdXlUX9eZGMJ+nNYDNdKLq3EgPm/rInAXdd460bIxTQO4EyQ" - "bVY0Vsmtd3qQXldKSjeuxMDw/C4fvMcu5YF74CkmRY8N1aeRlbz6te+sK5R9xlwQq+mAM8IW" - "x1e24l4bxldHLjIqQp+lE4qr9cHoDoh86YftxFIJ0m6Bkrw9WYv2jwQtnwboUIWCDbN0uHof" - "czs+Db4H8QopWwbcxCMVqreC4VKhKMtmsateiaS48+0nYoxiqDeBj4Iy+n1EYZaJjqizL8gD" - "okQiE5ipnXGYSincF9x2SXIeyLgIhjmRhocfPk0fW256XMS7jrMstswHy0IaAbTQo2YJKWx6" - "AbfHgcozKR982A2e410dfniOPmT8XEieh/az4GfBPiTNhbemRX1yJ9QW4U90h9U6nFeo5Yj6" - "HNqO3lzMg/PWhnZjl50vKqel3uOwICEhO5YPic9wRk+7ahbH2/iLEUL77X1EulXp8UPT0URq" - "RcQfmKrwXzarorYy2HucxVCXlQivwCFgskb7e4ijmuYwO4qgpq8zAbJLgQ3UVt60q64mhzhA" - "JbNDd4IsiORSnI/5lTENPqUgX/IGeP4FkYxSHwpB/t74PyCc+38vU4Dw/yF8xomSPyPiOzZW" - "EdF8FuG9dBgu8QfsvrijRrApLhurWlJROV4kATuEAtMDJMxBrrsa27zRKwYHNGuvQncfHU0K" - "IM8NNDl7SsgKzS9CHyp6tOXHQs36om3r2uFWcLCydWRbjLpvvzKHeam1KES7Qc+qXWKlG+1u" - "hUstLtZrcGdJqchQVm+R2popaPvoSsmAaxakTOKKDQghwkRxleT4KgKtW5RwFXJMxgHRuc8U" - "zDgHQIu/7GF9ptO7/Cl4vhgRePxl/PUntrljEHm+ZqZzmbBga0jdFtd4VfO+i31qP+fJllTJ" - "pI544jsT5cwfsm+aOqRutoWNbz6IOADhEFAm9SZ/fj//KY/5gK5tv+m7cWDY1SpXcH2Bx9qp" - "E9blyuDFm+J1MvR4S7xBPOCN9489/4eI36pu6kpD5jYmfMWN8Av0oiwLlRkKJwhuz1HpKAIf" - "M1UQ/exOQ/bygLxgwnr6JGFHUPK9k9iLKstxAzAJeaflEvM9+ZiXo+I3GBqLebLl4i6t7zcZ" - "BYPDZghQzCFVOFJty7Sc53HyG68AS6PGpbjCuhG32Xs9HZYT7gw+i9QcA69ulHO1RVSfCumH" - "1nJ0O09/smRwjX8YrZDnn37UqXVPGobkiY+ftx3ziW3qnHFYcTifpA6Z4HO2adn9H1lLwrDf" - "UvSElIS4IlklCfn/Nfj8CsJ6iO1TX4xpfyPTJKWQelwqx0hh3G9+Iomlgy5mn/Qh/3O8Gk8E" - "XlbJRufSi8xjELeYgTVOoEIoJBS/noiXtcNMaAa4u3Z9KzEmDzwmhN7reCEYnv4nWJEMmDSn" - "/oJeCwgFGoX8BTWL7aIeMEbT9Qa8SBhqc6SSufLZ5ULh1uBILlbWw6hSae8ZQUaGJH/nhBpS" - "UERHasl4pGabpk5xACqOcIS3H3+fVEQkXf2RpED8CdmJj7VCN+c4VMqDbyKQekjCoa5y9VK/" - "p5CzWV/2+053Q1hsoW85Lps1kHls+mj2br0/uaAsiixsa3c7ZrXCa6/5r2zQZwHT7Sew5TPa" - "a0kYvsNHxO1t+/qrYF0lp4x1JBk5/Y+QcxIZK+ZI5BkjQkSe9q3aRJEY7DGq1/DAd2hdkzxt" - "XDP/yRyIPZnpe4QXF4/ScR96Ad5drdwujploqSDcf40j8baMOgxCmh4qv4ULnoMNu1I9FXij" - "7an6Mm7D+g22xNN0zwO6cJiIeVYoyZKbku9h5IJmWeGQZ02uABxFbX/ODpLT8/fVnGNIqusE" - "2UFmJ0gTOCTizeaAHkxv/JiH6ZvOUA1XFByTK24cSFfmdIuUQxDFpCF51rUggSs5NgFwYke8" - "P9E1JJt9HA5mUm554hnFi/3APlX8k1PU0mDU+UZGN2TH0gLBm+GUuayaao031JZqhm7xn1PM" - "NkI6fuLOkRZ+CUSPVZSyTlolefp50Jn0T2i5qmNU03kJ1XzIH7/IVs34rSdIcsOfkxLOh3HS" - "YqtNLlbpAkP9PaN5YjbHPQoinWdDbdU4X8u54JClDvhD4n+6I3ZLjyGSKVv+5FtGGAqgrHVc" - "8J35xDI6jLwSMIiPeOZVW02kAkwGx0VF7Y8nHciTozBMFLNSkTYjpxdRkeZg0fNinnbD6XZX" - "VmhkY9bhy5Kfgth2Aqenf8ukYNvH+KF/ZeruBlFdtfVq2kkzSY5kvhSn/tPjRlgbCb8oV6u+" - "0KvbpbkSEG8+3qO+Ff8TK23JQEC/RZiE0+HvkjA+hPqM7CoUOSOQSIZ9GprtdKtvQ2sDcrRq" - "ouUe9jIRYIG3z5LhAIZKGwGpJwN5BY3tLhkncjbsaQur3smDp11pniTmgIuy5AAHGGDQWA6M" - "YZYEhXnJPhIgPWVPRyUcXXQiQDEKzwbmcg/IYSb5D40wd8vW6g/uWiFJ9aQGKy+IUou2C2aq" - "O1mVtmWGlZ+Skaj37mJVjx1kElsAGnsH/NwWbn8Y+dhMv7cBXYN82U/LMgix85UCHhKxioLk" - "GZTpOj3dqQwsTcmbptEsNZ3ZHdtYPYrvMlMJhEsOdq7kbjyjX4miutEzySA2cqE4xucNF+gc" - "b8UeaPqQrEIjbHsOOVNISF7Uqd2gIEfZ7eQWZ5eClmMhYmDCTPEM8SDxPKnmyLqB3zrV/6n1" - "gppSQTEZFlETcLoblCegdbU8xmIsQh9eH1rVhuSODLIc6FjletPy0GlZFnCBJkOZ4gWoRLms" - "sUySpoPSTuP7nmvYYF8b1KlEbpMeT5epUbuzfKaOyUBEJ3GooqVLqzIdkTnchTxWA9f9odpZ" - "9ukHrKqENSJgFKcVvybLmk9N3tMu+cT2C1k0fhq4MAo7S7eopeBMjuvVI11dKTiYeJjEb9AU" - "YCIFt8CKDL/1sw0hGzVJD4/WfUZNYyI05gbURtaXarDLT5YYg6GqCi/4LT5dCr69NLmx7Xgx" - "LEu4D/mW2xMrZKU3LQHuBel3bvY/DueyGQpbukc5prwFpLjRgmx0pWNg72siGSv5jX6/hw38" - "p+6VlbA+hKnv+OgIttuZ4aCnvTET1pKg9vOKiQ6pnjtMnhG7dMtzkW7iuO4KrmmfJWRf7Qhq" - "W8HTGusmevLczncDJ3QkpM5tvxTnjKtSJlW34i3YWOPBV7EGbJaZCVdVBgntO34ERDKERa9M" - "+9EqAd504udZUkZpEczCO9169O/hH5LUh8bRGq1qk8JADyobaH3RXrDwnbGLaVo8M/0ByZOH" - "uvH50bSNhe5TzfONz2RNaDu0uMJC73nus/XlwiJFYHjuGk3Dhi7Ja9P0Pn7nZb+p4/QdShsl" - "OnbJQUoEX1fUGOUPOx0aQyH+x6Mxgrr+DdmosQ6E+yYs4cXlsvowZrfI/BorYc/QJu8iiD/d" - "MWH7cNn50lQ5gTBhNPZhm2uB7xzaUUmb0Ul1x2+4dNyuxVE6RFaCQloSzcOsBvOndeF/pSrF" - "eajskaFeJSL+zFP9mU1mcvyz1WJleBd3nq/K0ySdBQPol4Q2d6HlSkZ06mzlqJVznfnlN3Y9" - "6MDaGvaTP1mIPRUNe2oQ81sfnMKiJiT7Gm+KjXOTh+iGBNvlaEzTu9r74VWbnbvEPRVVE3vs" - "WqTpNOEEis3hQI4E2SdjrQ4Se6F8JITgZ2PVVJUvEe8g8800RDPZqHjo/GbhPxKnSVSC5m+u" - "C6HPounSvFek841DgVMlqiGG3Pbe5P0p0Upm8eWp0TLkb3PRZQHccjH5I+OpznHnT5o3/hDi" - "83i3157NgdZMEpFl5G+YrEG/Ps0lDqWh8dQsjOGdim8aKtsY1tTPC/YuVSARdSlMOLZSTKCn" - "NNUn0VmaBe6o12MWvw/Pm1Sc1/gF/VBNIiOMyZeynfqtq9eN4olwc6iV5GKeHikTSv9X59De" - "18AhgWAowVLMucfbcS9MUHHt2X2FQ4A7NzLDCEARjlNdI7Eokb5Yh5SVGLhHjlITma/xHOkQ" - "MiHO6uaqrNrvoQPnnY6wwNPjd7zVpAoUvzYgv3+gQvBA1T0AyTkoGAQlymX2nPCmMKrW6XIp" - "ce1iASaTg8IqbVHPvUZ6Xn6RHwl9wcpzH5hZHJupzNgZThib/E0awQ6S3AORyAqksJOS+CQe" - "XMR4H/8NPf9zWixgBpn4keLVZIiPlEQmFhvW+8b4ysIVTPXK9gNVlf0L5DoTfyCwpwSmI23k" - "tzuEm4MOxsb/7SYmHBSLV/5m49h6tT5rguL6RVwPq2veWe19PPRuB1tG95W1jtt9yK92gWlS" - "PG09N0venra/x4ybQq91lbYft7ddMmJj3E25/ymmQbfN3/kd2nSQZ50rnYnukhvAeUa7cdyR" - "HOWalbbifCyI6Y5ZfXFD3KtPOHJyivZfr/agoFs6nLMdWwKejjZTyXBZmAWYjvuvhhm6i9+e" - "qtx7KdbEvkP+pUQxdroL7iCAUCXk8kflpuYgcNS8FLSJAUpqRmOS+N64JzWgYlw/S2yEASJk" - "9uw1JsBMfriTuLkQCwuUaxi3Ysz++Xe8VGV3uPzpSmOkqQJRV6fi4F7p10hK25dLEKExsZT2" - "0gfM19z4MLVdGFXKNXGGiF2KWbls1jKpb+xG6bGdhqbv9ZkccRrqD5F5ReFrQ/5tYbBGND9Y" - "X5FVC0EUdVwTwV5UZoeOL2QPvHEpW9M4VGXI4vjt1HeSJgL+LWLGkV948XwUm9lk46rGT6/Y" - "u4lwU5lKTSBG4/SRwLr7QFdGN0Ffr0L/UYGk7ErFmFSCqFmA6MhHDSYJexb4hxV+6Bt5As1Q" - "ko59cldXafgS+JnbMRDJXtxw6uYg7UrCRed8xF4nTwKGsaWqeLEv1bbOEtrjOT+t0xntX9wE" - "QuGSH23iFbTmZEv+vb5FPnwextlQWWzrUr8wZgFRiIGABo5VEJj07hMj5Cn0fqtGwl2PJdHi" - "Wv4t9PDZoP1m7EEg3ZWRfIkyu3KhVSL8oO3wB6MZuUEpiwM4I0dHjocBeh3BgPP8NOvUB3X/" - "OEAvI/B+I/sTjTovJcwrktQlpJJletXnMAZJPVFi4EV1WH6jeBkGl1YSh3Lj6L22SZ4Z4Lox" - "eUOMsCbCxYjOn1K6xxhiYsOL1CxzmzBe6ZDr1j2Gv4BjgTssKFLSdp+b4Tv2AdLgz/Y+7DL2" - "roD7Hv4GbosQAvcuUhafp5NANf57tzADVIUnUMTMtOdqRJisZnHmaYhqdMvSe8mvNwW2SDaU" - "CQhOteoBfISiPlw8Lrj16x5JBT6C/WasN/mnPvb91fgDun7r3PR0VGu21kEsXbIVI/w9uiRE" - "FCfnqOo5cp84ZlcRM2EaygmPYmcqpaKzz0KdGOIeJyZNihJ5e00RuX6KeGKjw1ugf6OUvTpA" - "7mcoPKHw86H9BBUd3arPNmdWhw/I+svoyCd6vSXie4pbew7kMiHUiJ/gYDCk2XQ6iH8yehWi" - "E304GTHUjZWbMjnM60+tcDuGR0jgv88feN+FMPqaY+NsH6OnroCPQCIBOyxUdXQG3Xav3qV+" - "QYjw9I2AID6zCLZ38qG0YHQS4nYsC9sBlz+ps+ZT5pTj2/FVayvxDtDGEZ7UNT+qOomzjW06" - "oh9l8QnvBal4qVYJifzGVUtOPwrw0RlgKVXSz0kpiEIFw5nnDLkUz0xBRAVXyQe6ZyHmnFx3" - "lcYYFEGQWUHslKyEvz4TLFKIsLmPu/Oa3wuK+PxrgKtjkM3OFABzcc4byoT/esucmP3p9zlq" - "5pDJjOa15skedQ2dFZB5InenaTfOqk0tp8MU2F/RlX1nW7JGeuw6VPZ4s4lq3rV3uV70BLZG" - "pWTo9Q1lDbkYKGFki6nkY2QIzO85ymnDOz6mbC+cvZuk/M3jYzpg/+2AGVkoiDk3sVklUKpK" - "qQr0/EWedNccUvCLJs1MBbVKKGiVTuoeLTz759mzjOuXdOzTK4Aexb2JWwktEu5b1lwSgYAM" - "esz2MsuRicoi6q3LutXx+/SDzdRm0/Con/AHdOg5Wij0PCW/YN2D8ov4e8ZlTOV72IBN8UKX" - "VYQz5XqfVma3yiwzyhnkrZXOcg3ruxSW/C4NIZkDUU/BoPrRF5RCOdQ9wi0W5/9wI3j+99wI" - "xP/rRqTryidugjL8HRD3m0ET5gRxee4A8gwId4o+wgAMAHnw2ghFMtVPmEwBCVIz1Hkl52JL" - "QHHePMQvLL00vFhJ0tZxEL3mX3wTTHSpRfz3l1ZrKyV61vJ46h6OTFZq7lquhfT5Oq2mHg3q" - "ZzVTa7uWZxOkHTk57YzuNupVnyuJ1JEOKzCHZssKNQHLprxrAAKe/ZtBpmmag5x73Gh1yM7A" - "+9bDGi4e1dqib12HpRxaQ7xYzUEcEfUrBUxthcLU73SR7RcBnliuLngtx5MpSIu1FxGeuMKY" - "HTnL0dmUgOqp0SBUlBUqrl5/3Y9IfJZQ1fCjQQlwRRaLsZw+hu2LlZ5uzuKXssCrlyvu6i8C" - "UIQ1tKudXP41JDDRTI+fJC8676mjj3hk5cQBGX1Dfn3bXDFxvHsKjt1jvccotKthiLEu+HxO" - "sy96+aHl6Q994EgFqVSKQbnRqEsPQiMSGR98be3ziwxVtC5PmezD2ToRlVlPmeMRiu1e5L33" - "9stXyA5PyHnO8vfVNe5iPU9eqvtaih5HZncxyEvfo6Eukmhif7guAkmRHZ919yZTg8sPIdOF" - "JJ+jcbIksSbsmKnS+vH781P0SwPSf4hUv1I1ZAvKlUi1m1Uv638iuRVSa5aPGqatuO5ioh7k" - "iBvmYhLjFXBrwL+lv5X5M9RVVnBZfmNcqfFn93ZCdnOJgDnHC8uXum6TTOI1BqqQMkF/ETQW" - "MXVl0goy02ayO5OIfgsTecimDNH3oGPCE7ZL87CJc8QyKh03LN1rRYqAj3Nrxpztdxz611Bt" - "1JIbjL3sahdNhaa316fp5gl1loOTtPdd8trjpz3LR6+3X+1zdLdTz/U36H2jilBrDOQi6ouA" - "kyFAckp0MFdo91ciaEkr5sqCGu+AooNITb27URD3jQA7SFcJNxr36we9/3i7a7HkGmH1HskG" - "aZJ1HAZH1V1ftTjhOaftQK3K+5VY3IEA27c4/0H2uvxfTH4b2rvTn2R1+WEA3Zh+Tu8rsUQW" - "Oojuhx0rqVAYzBpg+L8JiNjt5EOZsheMNd6v0KxOkKpFBup5fOy9CZsTkv0SCj/dnuT4mb/k" - "GArBtSQZLd46MuG0GA0CXZoqGJ2lAQmSZZrPNzmL9C8fC61o8awjzkEDARY6441/u7CJPGy0" - "lmbhwQ0g2Y6bJrArh7zYHMqnwzt9bg8GgA91103GOMKMw5Dr8bc7HcXbst2MlmE+bTu4SIUB" - "24+jSoAY1k6niSJU9Ess3/oRgziVyUjJ059SGnEWAvY5UBIzl1f6Sp2Vz89ZNe/EJcMGKtCw" - "xJsX4dUx7veTCYrObne02bjkC2uKRuCKoIw0LJoF3e4+FoNLxbsCvneG44Z7CMgXmiNjtMhg" - "IT0EtDQOJ7ThZWHiJ5Mvf7IvTCK8K0b2wqyGBXaHCM3fi2RwjK5diogfyMunoNu8k7lflnKQ" - "6v/2s383kwqHYGcis/vMGREcdjIayYmBHgqC6Q+GPYJanh6jtDT/Unl6CTdPtNZG0mGLiziG" - "Pv0eJR0+wDgq7tc4Q5f7GonPrCBVJMUkfO4gzeQgb2GhDTt0aiVDea7Hc/GMcgMklOtFis0K" - "inLfBpv+7RP3zueNRhqOPI8O/mYj/xmZLBfsJV3N7pXsQTaVT1QNC0CvroKah4mGSParJsNo" - "OJuiBNeeSVJwsBgpYB9LGBfMm8xqnw5h1dh9AhIK90yG1MpmWyRcy5vDykRV5KsU5osqJDQ0" - "4FmovtrfjZoIhdGzigpthZlwj89QPIvnum+V9WDOv3AeUgYjwnlBtm3uDIlOMbi0oUjMFlDv" - "pqTFqG0ogX8yVEqH2NLoxpQ2kN+hYuHklr26JtudXGoQsT3qGZSgRsWP9ItUHG+ngnK8XbgV" - "ZQe5tdFjzBb22J74D2mH9c+adeZ5OT0ji/4bJd2t4cG7Yn2avJ0zFB+NcA7wB2msw9+gO2Bg" - "XwwEnwztiOYDUMybDJ7qLs2zv9xyYwdRlHgBlnj5RcEaeg56TdQnTfgSNRc2NpAqyB3cxZfH" - "ViWDTpFMtkhiT43Rtn8NgYvZQr7b0gmIx6Lz2hqoNakOrCHHSk7AmFpIRg+ViuIsoCf7Qyy+" - "MfgUL6Cd5S9eDJv9pwZnQQulLP4016Zjiww8sXL77+pfBVIrBXMyFx1HgOTYFxSE+HqcQ4HN" - "CIAu7Akolsw70UtynG0SKy1Q6sx/neUnpXRw8TM36xKAIRpGfZ2C8FhFlDmegB4gJCpgM451" - "wG1YhEVuXD3DuFubP1/GztF5oYiihzRFlatfkoS3SJE2RBH+M1ghGXKo95oxAdpQ0gJOCSbJ" - "QKC4spiF6VybeUPBwkaH4VShYXHPRqaP4B5YleQQvtnEeYq9GtD0IEFV4ODecr3n1uGYyvLU" - "MFuGswd9SXb3Sx32haKg1+ovNwRciS3R5u+07dWIq3qaM7lM5gJqPSGZASF8qUjQwknXS2ww" - "rLmIxAAZXfleBXRT1XIXTWL4PtpXRG592kmIwSeadM4lL1swCmHIt1yJzpOt4j5O6nksw3IM" - "S/E4GgvLv7+2cjoCyliahmwPFDV1HsdLz3ruu0YRKiCPoWFhavG4bkgmIaJah6SUGmrWPCN0" - "v+GEmGMCRNDnHi9t9Z2iCanv5z9Qiv1k8viWc8gpXvdXJgP8UTCa7HmvvlhBb9bADX7sYYyQ" - "/ZoBSrtqsdlO0MCHVSWImUjtH4o4lwlZVrSGUE71YIL6GjthMvqUatlU7CBP0p3v9x6fH1Eu" - "Re4UXEUD5KxZiD4Qj1gfKAmkjexlcCNN0bts857RMHkYxHIVqrfe9bZxiIrykkuh6N5FEwK4" - "i4OlqTKWYj8hXdJHmZNpsYe0f9nexODEQhZfZL53yZY2adeiWZCElKNnYUT+gzMFkRg5s8+Y" - "TyDdHFyk/TMS47k6nAa11Hx+czEcSv3s1qotdIExLUmQ4tIYcb115gUNVNnLqZkaBSt4UvK2" - "GOboXhEtRzBQ9vYElojLkVTIJCwoeNESxuBAuCHpRJptq4WYKl36QLJlhohhCAlhnkVCj+CR" - "gWz04IFZqcVIdNkpyb5/jjcKnSZupJNT9Jg9+QqREMDZVLN/nDYsYLvL7XROalWqd8ZKTF/g" - "I8aNLVdTee9bnVGPSyl/3ETAdeA+/K5NIn3xpLQb8DE1/17vezuza95GYaNr1Nb3JUjl+Gn1" - "N/MaV9cJPmec12LnjzbvCBQ/9ko7RW+YKHlOcqV2uNfOPq0UrAYvuuK02qGvpFKXEhyGpeej" - "IVLG9IfnGtKlAOzBQRexAU7Gm9VptMIZDAHUxZapAgk5xKofb5srmXFDgYG3JxZUaIxMuHmk" - "Q5NZX+v0LDetOyg5co3ABZJp8DnNWK6JsGitkIhbSaA6DFF61gLaCl/aCL+KmDI2qjt5t0sM" - "XgVRtZgB6QiMK2MIioeIpUph4N/00frIvhm87JvUlidw0SEW9ghRFxGve5MxcKBrkqzqsa6w" - "y1+yxUp2iZBBXHugZmlwpKoO/geH/XaKjJFn1EhKMLzRUjNcCV0l0dvoFM2Gc9+7zb7NaGCb" - "8pzIFw313danzoMEoc9CA5oEAbjB/eBbz635/WLrPk87lAB7IoUhsgJ2baDRr10FO+z1ao2P" - "jFafUe1oH4NpwWZGSopzekTo573JWwlcyaRp1iMKBnSw5dzYHFQm1uMyEkIKb/iSElxFaB/F" - "baxNcEVSzYowi43TLAgD31DpabApuDuX9DXSxvyI6jcavEeJX1hTKyPHcfyil4WOp5NDkJhV" - "Vaxj+XayIeeDuzI+TFTLNmPo5XZBiNhCLgiLS3EKImUqrILs6i96cYT5Tvx0b94nBUclrSC0" - "D1moP2Bk9ZBatMadp70YveiTS7BUteyOc4tZPo9cy60ZHwcKZV5gda2XxipfVW66vAqolFmE" - "9xl/8279Inmkscay61WFShaBkiEj0Duv9TZBpQGnEc7tIS3RTO+H3PNse+hhMwq+O8KjyiCw" - "IplZRImBDae+i/csiK2u/EfA0cwnBC8Fb9b27CtXe1GJEy4zKrnQ8aOSEFSotUo5sUc4BYWR" - "/95CgHqBQ4+FfsB08SzyZgybFyj5+nDaLyEv5p6OcefnuZFnNxejcTJZ+KfMauMFDwow76Mt" - "jme9CSLtpf+FeCwhuhujEpfkrcJFro7mLh+CkN2q5M79RJdm8eY77eUtE5tXdOwh0kSc6FSn" - "7to3KaPfPg1VslTNTEWOedJ2LpjPL2VJEWSsM0MgupmseSSjrTi9jbG254X9L/JxLwdaNRyr" - "wKX2mcj1etdzh3fEAd5fGGibi9iXP+OcO+AT57wSWwtU6fNVGHgnEO+Ey0eHyfopVTrUQcYx" - "j6q4if2gAOgcnwbrXzpy26bsIKvPmfyZI76gpxQbYJqeRYAo1ipNGzebhh+xoHEsbu20S+P7" - "+O4Tbzg7PVsFOV+yTixv0Q9Lzic5Bx8f+5XO3ZwC6xgm8AI4lBi0a4IjhySBE4njcPxD8FN6" - "mfAIIKJqfpqtpr50u8xhneQdeFsoXVVfLLPHth8FJIPOhOOlkNqY2F71lTTpi0L/T5i3/RmV" - "UuR8Jr8dCtdjzbMYRMTgeQQGQNw64Pyst4R7GrE1uvjGy83nrTjNaqhKKV2Pr4HirbN8ssQs" - "hXXyJbiNbnFS+reFY2RxQZj9pBINteMsLNIUaPt69Thh8N45Yjs/YrbKiqZ98cn0iWa9TflA" - "DrfEQW6kTyHzp3QKo0Jm9t3kGynMyI3+zy1OBq7W3XjMHO96Y0Jpzfgmcr3VjcxI4b/dVmGO" - "/VHqgu+LRaO95L/1LcK+g55Qmr+DP3iBgWKvvPCnmyzfk0sZJvQtcn9fLE2Oi8EQ6Ox6iktE" - "wriD3oFj/5RK0XQkfIv3mMFxuZSxpAIQF4P0Bkl7Y/Lj6hRMrajSmBLGbkKi4niu7a4lDz/B" - "6GPQsIdAOwe3GgUDIvK93PPppum9p/jWAhEjWp0ipEL5l7Z2eVOEwL/9KunbZe91FUO7BMqv" - "9/oMdNyfxY/LPoeMeJ3K/LrgiSurCuU4GbzPmG2FndI6ivNeJ7l3VPRUXjchQQ9btv13Aemh" - "0kTyaLf3yZszpaZ7g7grWvcQjkiZtI81gnzqilSo22zj3Qp3D2OJ9eSN+/kjcdEC6lNsV61d" - "hQwGkqO8jjk7sm9HOeHETMORYtvBgyXHw+omXXfHkWKtc/Ras2B+G1wPqHbL6GTRdmltuhFF" - "cWS9jdEbonBANRJs9Cxj/l8uJtptt9ugNMpI4n65l5HHxJ3lGyKrg14M8Me4nG3IzwcQeyz8" - "LY/osrvVX0gumW+gclBndpF2+FMtiPFVHAIiZEJJ5oI5tKFTPdyF2GMaetP18iQS2dL+vTE6" - "MLspD59ikWGIeDNARprCMGl9gPyvNEfb29w/MJTfAHnffEYIWdLBWCqRicNfvlMhNbFo/GEC" - "iWuc6HLLU4nAQSIppPaAZGtB/whCNsJgattcGd/1Q7CDHiHK+EYlb/4oRi2XivUImhPyt+Z3" - "VvbMwpy9yLro/2FH8P7vvWZS+//hCNGkzRWcUdLhb2kYUt9RcMCmUgqZnii89kC2D9VbDC0V" - "UwgEBdSKLGzICkXeF3iW1F/EyMn0T5RZccxpKXHM0xF0W3lul/vTdQfMXXkvLs9HfW5Xwk/s" - "BtkRJQjKdvZDpSqOjsg+WAVR6I884Uzh4wuQXznOkzOU5x3rZqnUrs+Uc3v3E6KW5EICKTvQ" - "xtL6zmGNs3l/zDhhaHvC52QD334w8y5J/ZjYWHwcRXfUTcDjc3YsZ3bsbrJcyOxgGF+8tfOH" - "UlPCmdABa3Q0gByadsXs9MFAIgZan53MCJ6zjjG0VzuG85XIPbGKxRxCWsUyOILE8Eo7xvOL" - "PHxPOJKCm6qMp6Y2Edgz2UvFMl5RS1PARIbx1j5Q3QlQEV7JVMeM9WPsiL2TiHZJUi0JpkFo" - "N+0B3+XHTpJP2JykaS2ULTASWFLvOO3Mfs7tsaOT7N6liqpEUYzaRXjpETV+zoD8lrwBue8s" - "h+vvi5yYSq9D4SdkNWV9nmiBTS9GuGWhvTMEKGTeQf0uzvYtITLBT8mZdNWK2pPbwXHkRBwR" - "TIRZgCbqxM8FKHwNP6B4wP6ohro8AsiOGG5mz00WWQdYB8CbP/McPN7ht7BoeMLw8xBgo5V7" - "arT+E6Ym8ouMNDNFjZbNGc2UfXxNmJANprMhxPz1qVdGLKQhl3LXbTuk3vP7529v3JZe4Vov" - "U+G+FiniwnVAf5TJWOtd3TEGqSlUS/w+6TlsmdEf788nIbPQDLUrVyhqcBYWXZ7FDnFwL9Iu" - "UeweEcM4U6W0DFiY5WpcjFQ9Qn6LEHUoBiOa+L2guCVLrtpr53j9t28Vd4LG872Q3HfqyZwl" - "CYkI5Tk5TCCLk9WwcWbWTiANDGtoLr24L23Wa1AmhPofgGyEoszFF1/qWDw4ou9vyI6SMxjm" - "ig4vG3ICu4aNimMZXS/uhiKGAhQpExoOEVqKIZoPUlo42GRMHPF+gcoGVT0LtCHrGeUV3wUz" - "gYr+5GabLAmTCcJqXqN7tChYXJUdtd0s2EdnUFHsRMGNDMVmFuJfx8dDrel0YYoXcFFrTA8X" - "3AUfrwDa6KCl5PMpQRIdMi4723dUFpphiTqxqKYNSP2y60HuVP3Wkn7P3KLaACQIih3Ot3mK" - "aDHnKHKDgjVuJIgJ9R5j1KPM/0hXko6s98yVL6iA0vv8w8rfcrTGK5RYCzXzIdZjt4L4iokD" - "t6M4GN6L2spTU/VRM8iaWhYDUeFvw6m62Hm21lTChDLALcsiifSSnQi/CDU7PHYVHDYRaVZB" - "jKULgDSTZN8f4qOGQi8UIuPbnNaYXlYqgYMGcdNjsyf8WlWie6s2wzySFcCiN1zUT/mFBhPy" - "OaZGgHmUb5f+4wTCljSlPlXuVf3imN4/OWJiUBAxrQ/001qfZigYw3RwNClmDFvbUy36HpFD" - "znuQM8ZEQNYb1mgVIfIEK5Q9fzIZfjb4gO2TgT4Zko2tE9kSfHqdewKqAaJD8wqp/it/Bljj" - "Qk3xiXGbVKXlEa9E4JFzCkH//qAh6w2ufFYOBDIp/JCJcX1vRf/0gg/rXLvDhso+VzkbO15j" - "lwNybHRcFoh0XihwXJZUnET9e6YWjXQNIZtyGG/601rkQB9en4h6jdOhT57xVHsNQz2RQuZm" - "CRDCmmToZ0YrE0KHXugeyWREDYFngL0zAa7REYhyVuuwi+29FgxzplJ0xJP+4tOEGktFDBpZ" - "oKBM5QsMvo5GjG75uQy3DrmFNUsBuZaZG38yfmQ3ad7Ir5wcQC9FW2IngqWFjM7QrBVJTW+Z" - "7Wv3N71K6NRUce4PJ9R81L07Em3ZpRUFM6W+nGrWczdM9QmPRabcnd8uXZuQ8qAp73QXWXbS" - "kMmMgkpFloPlvBLIGAC6fFpMdafw0yvMUXWViLeACUXAJLDNxqyWtUPOvVQraNY0BHm8PIKc" - "QA3MvbzJBWMIh8csP2SHSdJe/dQNxS4TnG1339j+RHXawSRqHbFvxKBvtY2fbxN/oH1+Lqs9" - "kmJ85crGplCK986/yfyhNnhPOtY3SYIMY91Wxnvg/vWUjcNGyyVHR3cNCmJuqJD/Q0okmulY" - "rCpQaf25/uiu4be8UumJU9whhGrsMjRgLXjzSysErp6SVtpSkuWw8mZlheOIDMQmau+cPit+" - "fG0KXq1vEU0t7P0lLKeYbLg+cUrHkLKrXOZa21reTVWkf9JtmZixkbHl2khdh8Q8MC00Lv+W" - "n4+5llHeyUihgmO1GeMgV8PhB47XZFaGjdS0sl1E7gkFpkVosqOWUU8qksg+XSdjK2FbEsfm" - "sEd1zWX7SZ2DEMcCEv0IfdgPL6MssFXn1K+tyRgizRNdCHyIppRlHxBUAwjpoblha7GGljnA" - "CjFAdc7dkWMmi+JAGtVfKU9zuMIjShvGvPyxUpm9A0idHWX7mQ8diqeemFehR8/Wbt/ajNiV" - "4hfPax/nPfw4SWJ+wfag+PJSdVhXY63Sr+5rw4sk3an2ny74ZJRmFE0GJgRbcpnjzxlfZa3d" - "tQTZTWFuMy/jJP9R3CrdUCthvVHYYj1HgWiJsk55konABzppHmDIla8rJqdsF7Ocg14H1mqc" - "Tj7Sb324+pB+j58vLfcR2tgVlrGtUiPD2YNHZqrICHwOFOiWDwooZKXtsfDRZTiqJ62PyYYr" - "tpJhkdcGKCZt1K0kLjjHV+/4y1QHiDCoQpQQfhCCltlOKO9saJuTuZA/862dPGGYuSPnJwPy" - "TU7AwBIbGXyqFszmpnzZ28/4OlULayemXEltMlfzc8RlDO7Lj6hjcJtkIpcyGRq8ow0F9wAT" - "SUrWxGIpEJgAg3Ae+gL9i3MRC1wHjoqjeU8c/U9ZKW4FHUvqYZVQN5d29QRe4uURtwZ5MdxO" - "ST6KCgITvQgzo/pasfmfe8TgAL48y9EJdl2D2tspwJGhUKpaMlPgKFqmLgSChXg379mAAceG" - "PTP1W0y9mUnCaNUWg5er8w8dDhMVKPu6iV9LLnNg2BDfK6wYpQ6lrsQyupd/W3Kdj/qJL37x" - "DkW2EMXh+pf0CwMy7CbvJs+rJBC4AYRBGLrdSOAuZl+8wVNSoe9Sjfh6u8caxAxjc2Kh8BNq" - "QSqkHEws9CWfwX0VQ+o36jUhSokFttwVyBtxE47z7jwVCaDRf0TqOTsSqhSSFIoUakYBZAEU" - "hvX67ot3He+g/xaiW3eqJw1xT3yclpIerfD3rvI+AUx1wa1WVCbQWBIo8z+1HwR8TkY50jSv" - "ux51cNWM9axmJ+B4QKUJ1Vkn8g3r/G3lhYZGYIbFQIxl9Bhv0pVO80m5WaHjPbR3LdQLTOuF" - "Dmn8oVBx1Cwza86K8H57dx2cV8doqceasNSqoaIE0mCKNvDOVpHle0MNPCIe5jLU2Kji1vS3" - "jds2gcoNkr0wWhZMMfh4ZPnEw605JUDfzyXMjdudi60zzOsv0bcQi0Y+emRx6Jr/pdbW8TjL" - "OJICc5DfmOcUuWqT2Cexebvabghj/IvYrl5KZbRzi7CunWH0b7N7xHesmJ89Ij/oT7zZSekG" - "1F9/KYP9e0nxhdJcletvuaoJyE/yQOPqAuVYv2OwSjjA76jUM75rmojwtHoIApjcuHZvw2mn" - "QO2HGPeY+AGJlNobpgRWfRYGt0alhQmmv6Jk9EpwLCUiz4q1NmVYr2nIhSkNatxEQqG6WDh1" - "nAuurlPcmo/63D0kbqJTW9EPlTH29/vYvH7KI0OpiLi/zmXwbl3ao/vcOuwcM7oGIOGrg03u" - "qIhRC2WzZE9F5Fh2KrqLcCrwbgj5P+NLazMJrP3hXLSGXnqrvoupoNhtTkM6nSqqKim/vO5u" - "vAJavozzzus+fCHFB1QSRYuMVcIcsQrrfYRx3rvM6rsQpx17gJc68rvfdRhXfn0K9vk0XU+V" - "UZeCZGAejHK9pmtl7b0kL0iqFQ4KkPamV9PuN3HOQcGl3cbJxXJbQeNV5LtC1BQDYaRVR99U" - "Acm6OrJ9MfH/VLzFwdLg2k2Xf3AtB3Q1pXmTO2qUuzNXbZdBVBxKensuHN7L95UWHlR9WLEB" - "RswU4EoNGm1GMs6SJyTXXCdH6RQjC1DZXbuyl7w/VCroxGJXXjNagTa/WjVIjMGwkvP7+Bm/" - "/X38dJEUTN/+ce+S61ALlmr51b/MvrtF5CcufmxLUqPwpvVEG+ubrjVs0TLN3/O+VkGpr0ZX" - "YbpyIoFxO4YhineX9KbsRRe0L7OypHKfcxcZ5V3UN7xo2UYGLuX3SlLdjkOMtL4Eyzi+HmIt" - "Orv0jZLP58auzxvtRgAQDlUVlvE0dG7fa8TvmtUhuxPDnxz/vAWK6RbuucTrr3zS4l2Y1+aA" - "uYrOor0ea0/uElnO/ml76UWq4lZDCbI3UJlRVJdGORpX2m9vMLSCV1QaxJHZQG0H7dls/+S9" - "H5dn8uoX7OP2HemR4iv+v/H6TPs5H/H7+33kqnIod0dz7BRagqzPRagCadsh9eM0xD3dQfLq" - "FA7aF4in7VHhvrk2xJJeZJQ9NDA8E9JUnT+UoI0RhrMyKEYV/iiXi3SpGat1rQlQ+2sXcYhI" - "MxcK0MvagY/P5LDgXhOhfabOv1LiYrdMBPP7/wfW8f3vXZmL/h/W6Qo37st5OuSY80IgLenx" - "gSF0+4qh4BBhdLqIB4MOLJW+V4mh5xZBCLDVeRwODr7Rb0BD8bNdf/4MHUIhdjLuNdnQybmo" - "Werl3I+CgQ0/xKVOOSqtmS2OtVR7RCNXfDCg+b4oPErzwT66XaPCJCdqpmbFkLH4zRP3q1c1" - "NUdzbU67vC/nAv/L9CWOAxDPmK6dOSfG8rCz3B4osu+wUd5fCzmxYFX2UPn60mFQlzQzAHky" - "3AzhWhNAGiKUoQ51MJCmFvAudq9AdUFIuiiqX34fbzxc/BgJKuwgcKnJC/r4cX64+iAYsK5L" - "MVZNQaJ5chgHePXuMM/0HIvaUEL79RK32BATVNHjzOOVOesLv2q4xiAU0fJMbZk3QH/mZK05" - "tiS1KxuXDR/KW31g1/hN5l2RESi/pmI1sspH15MGhNyrgNoHjcD+5vOHU9XCWKUPM6u7kcHS" - "SPgK/LErkDpEfdWgOlabEz0DKob1gtyzLxcMahrNFdQYdeuGJBjWwqHJm41I9NsQGfosItmN" - "j/ZUBUQvRxgxQPs3EAJf2rQROxPtgdLu1TnVyl1JfkIUK4V4FK01tGHKWCT8L4J7V7+Pcu3W" - "G0GDXosWMOyyyebCJ99BQnsE10EbHNzqoldZpOJGU+IeUytklbSCbS6FbogHoUcupyO+kod5" - "AqtzLhBmW5Stg+9FmNwMGPwBs3icm9v49o24mkmtQHdVGn/x4CiSQg0mTFfHZEL/24fQr9NJ" - "mnAhjtRqhY8A22uEanyszzc3CVw7aBnu7sIbm96sGmv/BUj+vuMio+mJQPr8dXrhvv4JwLmd" - "cHVfgFKsLkI6Q2FVr9KP/Q6CyKZGWn99HCmNGmKMBFV+A6iJhgLYAc6q9niuOZwYMe8KGWH6" - "tBdckPjIeE2BgNAtEOhxp5p8EXsPdN6jJTuOxfEZBdwjeKiEX0i3w39kYXRKQXaXt4SpjSOu" - "Jh4mssQ8DNeGAYLTItTHlam7xfQPrANNffQujoTNnSWi5agI2ZN5bIRQWfmdTLDG9JIHF8WJ" - "0JurmMVVbnAx2qutD6/tpX+D68kxADQeVTKHfegH1aj8K94koz2JlShkpdPHPEeYyhHbHv12" - "oUaeK+kToZcuk6gKIRwMkOjDxzYjevvzbYyhrLArN2UE9sQ5plIZaOHtrBNpYM+XS0HFTDBP" - "IJlewx2KOHzIN9R/Le0ka59vBfwB5qO8khhefPSX83m9Z7ge15G0CUXzX0AA23SEWd+Lt4Qm" - "+51AVyuASgCxmUZU2yRzmybqtM92JtqONl8plIKjXCrsWfZqwhRRsYYcluEU+w8ZBjmENnWu" - "//AAdxfqLHSIIrVskii0qtgR/iQ9y9pzSYJJ/UlU7s9R3JOl3BP/KkG1RGZ/Pl5zvIaM8yAp" - "cSn3X4CFG0mhSlp0dAadbpLutZxpQcf33PuGYnQxqyaxF5TsLwFKLPfdaVS+Y9mW4OTyZDDm" - "7w+mQ7yPNCY+MtALjIduEW2hoz89XMb5DkvlPbSZyttNDRze9xo3J3s9r26u75ZdiJQz/+iS" - "zrDboBxNSFFB1cQveRHxJlZl72CDNmla7CHQPCouPtTnF/ojZPfeGFtoxjwSyt6EP/gZxRYe" - "xSU1ur8qFO7AvVTucU7bIohF2mvrlF2rxiYEGxBbPGh+brca8Vf3Sd5DrU9UTGiI4Lxxgcwo" - "zJDrGHDB54ifX+MZCyDn1iecp5mtxld1hqUmpxTGCEwxE6+LafozQKEmrQkILsJDP5T+wgyZ" - "5lUWv/wRFJhOUYYLQvrh23H36ECG0H7A1e0scahy5GlB9MdF+gv5Jm0LgL1pHTUEvTzURLZz" - "aG1RzIR9tqUSrY4+w5zx+oRtX48LCpUies/CfZRerPP+VLxhwVo34XFFFVLbiYFnoibujK60" - "fMWH/L6HR6WFDVUNccbxNvqGrBuElsntVp60Ijr+Yre8cWs2eNPoUR0J/0i8UwU5Oiu6+NMT" - "LdoQaWUr21PHAa8q57PynrSjVAwVyIQOZSHufnli3C/jz7hNvoi1ENo2AHx8NKLouBYEvpsZ" - "Bke09Mv7udHy7jYkWS+96l8kB2vvb9yHjgf5cJf8JMSIjO1a1RHwCsNS2+iyc5atlcw3EZk3" - "RmWQ/90bs31jk+yfiMVjCjSOjkYw095Rx4AuxmIARkuxO++RgKy++hhgA9N4vLuhMEbU2hsW" - "pa6eiJxFZn1GEhxljB0XkIpbGpJ+E7z3bVQbnasMUqvcnEf8qJE/De2urCtEMaTlJyUVpJil" - "wqe+jf8iCQn3oXcyJXsFiGP2h+lg+SAiS9eYVAzInJJTStU5yJfRQqa+EAkS6hn86y8qdfku" - "i7RYWdr3dIITwMB+XISf075PIi+tAD5zvLbWmMfWvAFirAeaUssRZyMy6kYfx/pmnRSo3KjP" - "QHPdC8Ii7i5+Jd4LVr3RQxRgMWzp21ZaLC7TNTeS2/e9X5a8TTSoiBiV4+VdWq18Mtbl0IEJ" - "rZ/JSVEroCrPIHDLAkmtawgcRQmhnYaZ/JDsoRiOMwyXLCuOrWyE0SustEsTvWbgoRsq9Xm4" - "+QKl9mLLKC9U0zHmWjKNpybmh07FVILdo4kw+IoTtdzF9sV5Zo8zXyyCr7fYgGIzLG+u7jd2" - "O8OshNAer8iPA9Nr/8TwrP7uFkl7gY2CBYgIxZXUqT6lZC9K9+T/DwAAAP//vdzXMxwAowVw" - "ZVl9EZ3V++qd6Fa0jR4RFtEj0TtZvZcoq3dCtISIEmz0XlcJUaML0VeJzvc93bkz9/3+Aeft" - "zJyZ38PxUE1IxF+ofZvD1pds5oQbaMKBSkk98dEulnuUzcOfgVBc9Dw1sesttfonH2MxHeMV" - "JSfDG6v2Idhj9lP2MpnP641yIVTdwnrd1ymGllO66jwcq5fPdcKFAtNpbvCCltUo8t3bHD2e" - "8keK25ReN38hZh/zTNL68oTqDIj8FZ0dC4OWK0UBRouGIoDqhBa40N5k4p7Cpx5tlbkViZz3" - "p5+hir3tF6mlOHL+dv9EdRHq+Z2G3Jcm4j8Oy4T+BiWO5XOG1tiUv8zptwkclWTYT2bT4wz8" - "5XW2Crk1knb3L4/aYaV+TGNlTKO+5+NOovVqtaPhOKi6mJLZ+dsFLVV1CVEtl5sxx9pRSwqQ" - "Ky/E3r4qrgr2JCtX+s0iMYJJVHHSMw+pXE31ltGM8NBK7gGORqv1MMeW7M+sslGPz8SfzPle" - "lHJMB6l5d5ClrqroTeGHm6q8NmUdTP7nZ/77+5thDdrLP3FxlG+VZYPXVmSsippcOY6oUEF8" - "Ty/foR0c5UqevF2NyfLxLRe3YRz7YsJ3y+joCB2F2ZtRULwI4/rFzeLPzYjDpDfHx7OtJuTL" - "mv/Rx4SerbTbmjd0rZMXVC8wG2G2cR/w9bZqsboipnKjVvqkNLj55WHj++7sxLQBLJAluqjE" - "XJPucPo7n37py6RpqNTnJ6fOEWFcB4HflBquCuzV63WHRMaECPdsg1NQrmVJeHEhlHydbHfr" - "60ljrUlNVqo1ofoq523f+oNaszOpcXSIbdKfEzK7uCUAfw9iRXwp6NWvNguZqjGvALySF9lD" - "Fh0UKpYDomW95Mpx0NOxQe2mmrPsA7G4WPSEuD6EFqEKpz57ooru5V3y0nOnBZPuc9Ym/La6" - "MdvFXEqZfaa6C+OWl7lcaLavf1wKRITvsHgkrJlDXSuen/W9KT6nWzNlhlMXn+5ZqyvP2ipx" - "yClQ25iK9SiHCXUBJ1GFU/Qv7QaPX8+bpYLPgUBozOAxuRBEbIMVUDqUZilCvSZH44dVo/8Q" - "+As0qaWQeVabbrl7h+Bnqa9YU34tR6sXe0i9mdoJf4WRUC9OMHwqG+Ot8k/kY4VJJq2Y0hV8" - "fZr4lRWXq3Crj/Y1o0UF5GVy4w7LhOG/5U42DXtadVpfqAjnlTx15Dh94UaxyJTxjw3RsF3r" - "atBy5wtJRGydkm57WrcZNKjG/4ex4XTTjagnE+emZr+e1Iuyt8uJn6bQP2vVL/BV7JBDBIzs" - "HBDMD4b8ax1oR2kHW75oaFsprd0Le8y7sqeEXC4LaboE+FjX3+BFr6ujL3fQ++OfjX6TYSqJ" - "arF/sFFaU74vSjgZWczC3dlnnv1R3WAXzzWGZS4ZOSBNhbNY2sWXW6p8GsfMV871qbWkRxIu" - "LlkHC4Fpm2nBDMYC9PZYcmSP1FwN72Djs8sx7g2bS38SnZNbxguljRRxLzMxi/KrmT+ffR5M" - "FEWRaljU7eKTdBrOKR8RR88rbmDWy19Dwupq84LVRYTCFvygunyJo6LdLwlxGkCMmancJMrF" - "sFW2CuqcWuginiXJJn60rS9W6o534CejKGIwm4iISkXaevvshFa756BPfPnlaBXgGD5q6EVP" - "3CoJyuBaB4TwJ85Ue8XJdWClWue0p9DWCGXdV8E+LqnySL5j0Rwfn0h6r4j+b3Nyvogn1YSZ" - "ZSk415TpCOzETOPNlwQPUNXgNytRt0dYMMyU82vX6ZRkjZP0d0Uur2y64hLjNdsVN1aedMK8" - "GQUmAt7zDvXj/bBRJxnNIFX5CgKmDGFERLINu/LreT2ZAkQNN/oobXdi44EVH0mRj2Q5j/z3" - "Apopv/7G1lmFE0FFGMv86MlNAOIoozHQvd5KEkgZ/837s6PXgX2/pSxAzUuBu1BzfzsywOxW" - "AX/iQBRb+FJ/yHPpqnl32vt1vd6Y1E/kIdP51Ty1MfnuiigAsEOvUlar0myksGa5bLXipFQi" - "cqRTZTE3tUjuoe4078BYOrD4RfZfeIil68Gzhb2Hn9/DfDZpHhIR9wAWnAlt5qhRLfCSf6xw" - "hmCohAtLfe9qucEM68epxaJpoabs6+niFgSzpBqgjP5IWZ6SUGkR83Q1hVksjPIhOtRoZTMB" - "eUnMEquW/XSeCwEcEikvCMJRsUD9eXObuf8loKGp1E0ovHhEAaZO8/sV2IfWmXfAZuUvRTu5" - "+G0piVCle/rbFcmqNgV2sTOjtbge5ni0pkOi4XwUvtHtgTVfDbwQtJ/uu3m623VJTCZnIFf3" - "ye8EFpL9D/7lKImLJ9So21omkZnQP/IeTzwShXxytqIURebb1DIxy95UK3NYDs9BfFaOPI/1" - "huim2BzZs8yt0AZKs2anGOVxV3kJfmWREUawqo18pTGxu8fwkBf9mqPw7roEbR1uymX/8UHU" - "iOMUuK5kyy5+KuCLlbeXGHpiT4yhM0uu3kM0Cr5lodGu1l4xE22RgaIqzC1t9SOEPzOl5UU6" - "tgi9yE+heE+fNnUQS2wl7e6WLjOhHsjteOpZVazBH+seZGNj6T80NmdnaCvMBKVUpDYyIQgs" - "829oaa6TPpmHj56AUwWSmYmpsCcaqmLiIg9E9xpZb/TwRjrY0mwB/+CnLaG1O/GaIw+H7wF8" - "LH23j5RO4uEiOCpQWSmQsgjOzzvP/JRorMHeeLIHVER35LtoZ+WnxcL/SvIXu6heOCZZj9jI" - "KOZ+yxO+IdvqsiRXQo32W6J+CrhRCKwOLBMUdQia2HuZjb7PvZgN139ZuAglxZENsUAWnvHq" - "V18oLIb5BA/VGknBlveYv189OgpNgJxT2pFBVPm4Mp0XdOzpp1e2+5BPK7xkrF3PN6P16RM5" - "qJeps8oFBIUbxNUcFkaanNPlcwKRSdl29xHPxvMMr/JWy+rYN8Rt/kGhi9SELJom1NlVIToB" - "A11+/KzW6r+s0lgckNzLzefEjBurYSdSbITUYhUibwjo2bx2BS7tgVrZl2uAi2Ybu3WhsmoE" - "WUnbwPXTLWANSMNTTVeWiTHHQ42Z3dWwPX9cAkZ+3BxnjLsX3ubo+gCSMLlJrhK1/vx/kET2" - "/w9JNP7nGCCJUcs3yk2XCAwnp27ifI2yucPfthCMJcJeuRJFrSqveKjwivGEraMDsXE3gaKT" - "qUPqgrcU+VDBxnzywBI6PRlbdOswv1ZNNBZ/jhUyQ2ln+b2rfnyVRc33+O7Meed359S3bWUO" - "FEWH7ILNsLefm9RpvomNasF7HMR1PHqNzJaNMn+uDGUz98G27Qb5wPfnJVRKXLL+HGor7U4x" - "nudnskpXU1lpIYX2tPhOZzUuE9tsZV4dl7RWzHf2mUuTOuZ9UqySeZ77jinucezW8Di+S+Ey" - "xUjZDg5RzhYmSrZWIB8le9xt0PRDpmb27XYUwZHOc+5HMtnNe23FtQSH5XKR/dYwZ4SzlebB" - "SbtBbn0rG8Xyhw9FPG3Q/LB9TWg5ipr+bKB/3hVBa2+rjmrLcKiWZjcItf3hwxk+7ic9XRZu" - "i8RyPCsK52nIwv9qkgt31qW+GjRTGsrfIuNMGBX0/aUjWItmlVznDHGyk5UjwFJtubtRdYDr" - "ungcFvbN6SDYshYttEzEfPrnvv3SABompDKoSRXpVO4cAU5LmJVzc2tCeaS39Z/7Ic43nwyd" - "Tb8uWhPyCx6R+XHkH1490v1cx9yRRydBHFtLaoX2nWahdkJnWIJlmW7VM1rG5X/Zeg+4PnCu" - "QKZU1q2UEydQk9xLgQ49FMq00HvwlhU7xu1edZ3YdRxO+9/1MGUSV6P0qRR7rIlPuKorwPib" - "M8V+da7+5sl3jPHD8MCcxUiwSVsc6TYEjGSdx9OtU0S5moh0FYqW00haQtfhZMF7ZwrMAfKo" - "dVw7peG6A8210DC7IY25pW0pmI3yMxSAx/0dhvrSmjNsI+sDC1hKCx6TlaGMaTl5U5eTnn1M" - "MDvqxhFn4nNPmnqsOer2g9JgY/YVqzphJKEaQg3flMqxjpL2eVjLt7i5koM7EmFNr/IDUqtd" - "0rkPAW6b2cUnXovztRPvnxhYfCfb6Eh7sW4UPs+xgr5q3tYFMnax1MSD3WJeTZO2fXu9s7S9" - "xYs3J3kdygNJUeFp7I6l5cUzrxvQhn8g9wNzIyfmCSn38LbQH7RvNtLA8VawcTa4nRkZZ1Ou" - "4tzCP64UKr/UDqYDial41gJGMa0RXxzZhZJH4wZcijkHhTznIkut016e/NMTrUduQoD9xL3+" - "jsh9fN575m8vnn/PtHvnSNyq27KwruffvSrQYj9xl6T6aSK6AzMpjFZtG4smI0h5mtQ4T8iI" - "qJ6PycXZX4M+zYzass4vgihGd7gPYweF+/bevmmo493oJ0p0Okxkt92lFR/UkHcNaHyGHkUX" - "Cf/1n0ukJ+X6A+weV+riyrRTXQd2gxnOjuxQze/9w4jCaV2Dfu4Apf9d01xKHd4ROMYXWcZW" - "nO1PblnoU3ITZQDw6Mt2xk21jMOD1eqrLD7PPmbUPYk6J/1bW9aJxMIg2F7EIdT/HMdd1Rr1" - "mP+mP8f7+3OgQjeusoG/vc6ZucSdLhwqp+KlrNC13ovj4bgg20jRpn4RE4JqbKKcYiobWvHF" - "tPz+pibzj/BW3Axw01NvFlUU7NXbcQL0vuSsTU6BJzT0iB6Qf0i/ZK3Uztj6SttmjLQ2FoYh" - "9AyttPDCAmJyg5RXzy9yhMexYJ4gJFltTkaZWv9Z1+XakN8D3OA4l+CUwrykHXmO3mSqF8cM" - "0UQYuqL8XwLPkYDpNAlnff/6SfRK0afTzDlfLhGCG8GhiNvQpNRXC6OMxJSt6lwOO5wcvXVd" - "CXnm/z52Ff5OF2wj33sSSMVJ1s/4XRwuL/JgO2hrT+Ce7DtJN6mG4o9LWkso/SUgThEPa3fg" - "fE+/ILoyTe65uBgbhD+xs8u2dTl/TZNsDdNY8j3DTCIjy/V4utG3IiQpQUsVwJSjwVs5Mnmk" - "NpM5OdVAgdvU3TGE/yBiP+aj3Q5DhG2xAB3+1kov9nx4i8UEoy+X4NCVNgrSfZvDWR94sh01" - "qQ1xqu0Lfo5Pc1PIpkkonoo80b7yk7rJWUWSG5wK3tUbhEeLczZiofCDXtsW+yDIEbSASMki" - "DK/qXAE3W8sfYM6q66szPxGsuHKutiP1ByUF7YblVYsIw48HCwLsRJ3Hi9uWS/gZ4fwLyDqu" - "N8HKNMrPhUgNCTQ7kDdRrKL+3CUFEd5EGWmZIuX18ZFavOK67yIZGlh2nrBLagOQk5pinJnL" - "oj/4ChGekL/TcUSE/iAPEIbqRKPY1jjqkdePFzkPh/nuZOMqYl3jAXrr/bK0OrHavdAwxZQg" - "6qB4VoYRXxrCqJJ4qKxzcYX2Ok08Qw257tRoVWfXrQjBIrxSQdjMXVRkxHGo0Gn4kTTY+lZ5" - "X51ZtUH54dsi6Vv7g40tVgQI+NrriPXWbBS8bEpCxcLmy8nB/8AqSNCW2Km4nlPozGhwfsMH" - "Gbbe3QJXK/1QWR1GX4rESlzsMJUHP3uu4aksfeUV3jXJlPIhD6BWtpRruvAVsLA97reFuCfh" - "37tLAmSYZ6+UEl2PQZW4Fz8jNBK/ZN5lQlOktTek7UqhtXb3T3fxGYLIY1I+owVeqNkMki+u" - "wdxgeKhfiTMT2XzFtzWHhSkhxR/BnS62MtTFtpUbRZTEj0sKw3t4PO/cUVANoLwNT28GA84x" - "RbK+Kvssg/3DlC1CP0vguDaqE3gtHMuFcM2ZVfdRsBx20lN92RLANd8KIh/zgAiUHX5Nl9JU" - "llNZ2DNYwWzhl32iKBtM+5m82d1DzErjUgF0moTkmCszZPqNFadoglq/8+IS7IRogW59w5Ks" - "gXfrHcQhEprR2xrEXvLZdnyo0IYEuVapIgwdPiI1z2DBxcoJ8sy3m16HFTPkDkbjClnDSU4T" - "w/BlY1SqevyTwB4YzjEPXUfsfJM/Jg7qvCcgfvyCcvq/E/Ph42L3YRXkqMxx7OhKNExrld1g" - "PawYZk0z/91WXibCfFrS1iawp42VtLrzHVDihZdQrDSuqkB5eP1SxPJPYs1nO/1NqSU3OWc4" - "SLUOUJCOTebTiJPmM9pxDLiUDNp8RrDnR2PXe3BK3cqlUtjn/Q3pzrVNnAqJ+wp4aqD50sak" - "x72/KpBLXasVeZ1xnk/kgIpUgC0RWHxbbnz+uSxC/VbxbzE2YyPxzGiSivDrwGP29rA/MbuJ" - "gyQhMoHWDS7eps8slTfVnVx2o3Sn88D7SYG9pIgcczRaXz6//Ay3uv1xqNEY4yv8GG7cQDvw" - "N+l63bZvzWzJ8/wwKj6kvmjU5w3dHDKZ9g/YarmBEsCcEjLrUBzAQyWdiQuf3twkeGb8BN9h" - "akDsY3oYZ3LB8cl1LXIyiuO90nDutn7qZ+p4yT7v5dJsx3KuZV7YLH1AzIXcAMec32m3k8j0" - "iKC4xmsq85l3xDiJSuODfKkbjsUeB8SWP5UoQZEPZRB8psqK+Ia4fRssNLz41rG8wbjEK7/O" - "stzaasjdUh9jW579HHkZEgmJdyi7FHAIA+Pyq8ijJM8QiwzGTb2Fpbi5LHsPAIhSo+3AuAOX" - "fLEEE9McEzqL+eP+xFie+7BQY/nB/bwwOtimbleJqpbDIK4mKkWYc2yLLdi0I4VoWkBa6J0S" - "Iqf6q91D7hcncN/fSSNX+E3hiXgnW4tPyZTbfkhM8SvVof8mvrdRkl16jLEG/oKsE1EJ+Y2K" - "Liz2qZQ2oHqoEpj0NSUI6c+qRheVW3qooqCNf/KOeFye2C6+uVg9qHsyRfJC4kFi/+Auusz5" - "EvaWie9xcIYwWtmqZZ0zjEd+V266a0NGFMDGO5/hbuoLkW0QJoG7OzUKm2YM4PXVC+e/0mlN" - "8g53oJAWeJmdnm1Fwy2EivZpN5U/9DFudhwQ8Gc7BkOLLLKCHDjVmHsyqWWwzoJIhv34xvKt" - "d9zRBEaVc+4v7L9+jIHMye3/5AnEzomTjIl+k+1O97Ch7vVirUHsLlPqFYKqKNdcF29i5v7A" - "w2VYdZ/WkqUDyB5q9GApiWXzplCWHBpj+0gbt88zQ+aNi5EARBjGGKAKU1MdhrBKCDJineIO" - "iUyqot9VD8H21kSVcit0yRMnlZetphhBzIIqWXCWQ066oGc0ScmsoPyC40/gkRVLgNtmZNwV" - "x08jOkE18rsNIVKie9hOW8r2rwRk5vYdiSd26044WpDWmdW49nwsezfDPaDy7Qj2+tvUf1HE" - "kR+hv+5KoxRTE600L7X91Ur2ua3mCjvY1V4NM+TPT0tZ25mFW83niOhwCNl/MGYkkeAEP6GO" - "s8jS5shVW+Uc1zT/oIgyO1e1O8TVDm4rVYmVNvemHuAQgkiOaZ70gA6IQebfKWCcrPdRkApN" - "CuJb1dYSzIFXrwFDUBh2IeWOoe3gTFjuKxPVfqHHtg6W7FLvw7/stGDqEbXdifkAS0LAfSpg" - "Ua3qnaw9Y16rWFryES1cL1Ti7MfzUcc3H5jpki21zvVmdIvOsL24HYnoBghnxhkT9kA9hLzu" - "/cbDUEzDzWVRy5AgStGQOdWCZGa8SVf3+U16C1afFg5ZzbnRYHHkirVPdyikos5r8IFNfql1" - "mpja69/q5zuWnS3EuGaBENt1nxK3kYB57GOf5GxUxO17b4KWruHIYSJxYbFVOtFlP3n8dtVE" - "h0c25HlcAqNnOk18pxNF5XEv3ervI3OF8igrM/622f5xCrDdqhgHKf5liexXcIQq9/pbiukn" - "c7noUQh9pnnFvqY35Q1opBvS/aojZdIjwOylSifXfvhPN4I26w5yUhWFH1sMJaabswwthhjI" - "Ok1M7P4m+QcNurgPDx3kbFZCzC4St8dwY+FKiPsYvkqcpOJ+3xfLXZPwji4ntczeYAuFt7Uq" - "hG6bneIcKg1vGOGGI59VZiT8IDkDnrYG2sn+rid/Im2L+c5ddPGICy+MLLPgkuNr2hQh8wL6" - "sgbTvVfrUe7Tvbsn9BRLdYUX3Er4/uuxFViGY3RTwk3T6zmBnKxIbGwAyr2Bw+n2qi5iy/ON" - "Pd364JCVx40xWTU5estviIpjz78PTBIE/Fhbiavx9DsADJ4hH8JwwmGqtRs3yr3+s3sfQb42" - "sAGw8N0Fa7MIgwXd1RP1IaY0ewtBkYuXZKr51nUvAkhemvtYVP2cLQ0xZLSr1Rj69P4xwiCr" - "2MVZVlHEv4v8pGeY9LPOn8tEHFCdtpzphSqdki9wcGwLPxZsiYaLboIINoVTx7FFKcE2lzH/" - "8q8hTA4RgdejiQuJnT2kQ2DWyrp8UdeFvje4/iWD2/NxZz1X2j0ZMz4+jxltMEqrWP6HqOL8" - "tF0xl5bA7o4x4gLkQ8aXqRio8YFFyYYPjYiw2qX2RiEEpi+dVSJViNuoYbfY+LPtEy2qPqQ3" - "ekNE3w0kZjNAN/3g06Tp8ECU1ZIfaPwRUnGznxN/XzNbxEKDn+RkZ9WS2GOd8yrhRh/nQigf" - "DyUtJLWfDc/G/lUVmnDt8VXOyir7fqFbFptkruC9HqPyM3/0k7OR3CrgzNMYvtwMxJ+vWryo" - "N6KQgDrcLUDqh36PIOOXgmytZHpLxrWVKnOcd9hrPfnUGBcHC0113+9yPz0Djl/byoGd1487" - "Ym3gq99gdwc2GKOFHwvcIznxj27RXyqTj1bn5yhi/VEkVU0fKdy/xSn1e1w1x6pc2N4DZTb0" - "mn/BeBL/t2X8B2rxLZ8="; - -/*---------------------------------------------------------------------*/ -/* Deserializer with added processing */ -/*---------------------------------------------------------------------*/ -/*! - * \brief l_bootnum_gen4() - * - * \param[in] nsamp number of samples to retain for each digit - * \return pixa of labeled digits - * - *
- * Notes:
- *      (1) The encoded string and the code to generate pixa1 was
- *          automatically generated.
- *      (2) pixa1 is further processed to make the pixa of labelled digits.
- * 
- */ -PIXA * -l_bootnum_gen4(l_int32 nsamp) -{ -l_uint8 *data1, *data2; -l_int32 size1; -size_t size2; -PIXA *pixa1, *pixa2; - - PROCNAME("l_bootnum_gen4"); - - if (nsamp <= 0) - return (PIXA *)ERROR_PTR("invalid nsamp\n", procName, NULL); - - /* Unencode selected string, write to file, and read it */ - data1 = decodeBase64(l_bootnum4, strlen(l_bootnum4), &size1); - data2 = zlibUncompress(data1, size1, &size2); - pixa1 = pixaReadMem(data2, size2); - lept_free(data1); - lept_free(data2); - - /* pixa1 has 10 images of mosaic'd digits. Each of these images - * must be extracted into a pixa of templates, where each template - * is labeled with the digit value, and then selectively - * concatenated into an output pixa. */ - pixa2 = pixaMakeFromTiledPixa(pixa1, 20, 30, nsamp); - pixaDestroy(&pixa1); - return pixa2; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxbasic.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxbasic.c deleted file mode 100644 index c86b81e3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxbasic.c +++ /dev/null @@ -1,2390 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file boxbasic.c - *
- *
- *   Basic 'class' functions for box, boxa and boxaa,
- *   including accessors and serialization.
- *
- *      Box creation, copy, clone, destruction
- *           BOX      *boxCreate()
- *           BOX      *boxCreateValid()
- *           BOX      *boxCopy()
- *           BOX      *boxClone()
- *           void      boxDestroy()
- *
- *      Box accessors
- *           l_int32   boxGetGeometry()
- *           l_int32   boxSetGeometry()
- *           l_int32   boxGetSideLocations()
- *           l_int32   boxSetSideLocations()
- *           l_int32   boxGetRefcount()
- *           l_int32   boxChangeRefcount()
- *           l_int32   boxIsValid()
- *
- *      Boxa creation, copy, destruction
- *           BOXA     *boxaCreate()
- *           BOXA     *boxaCopy()
- *           void      boxaDestroy()
- *
- *      Boxa array extension
- *           l_int32   boxaAddBox()
- *           l_int32   boxaExtendArray()
- *           l_int32   boxaExtendArrayToSize()
- *
- *      Boxa accessors
- *           l_int32   boxaGetCount()
- *           l_int32   boxaGetValidCount()
- *           BOX      *boxaGetBox()
- *           BOX      *boxaGetValidBox()
- *           NUMA     *boxaFindInvalidBoxes()
- *           l_int32   boxaGetBoxGeometry()
- *           l_int32   boxaIsFull()
- *
- *      Boxa array modifiers
- *           l_int32   boxaReplaceBox()
- *           l_int32   boxaInsertBox()
- *           l_int32   boxaRemoveBox()
- *           l_int32   boxaRemoveBoxAndSave()
- *           BOXA     *boxaSaveValid()
- *           l_int32   boxaInitFull()
- *           l_int32   boxaClear()
- *
- *      Boxaa creation, copy, destruction
- *           BOXAA    *boxaaCreate()
- *           BOXAA    *boxaaCopy()
- *           void      boxaaDestroy()
- *
- *      Boxaa array extension
- *           l_int32   boxaaAddBoxa()
- *           l_int32   boxaaExtendArray()
- *           l_int32   boxaaExtendArrayToSize()
- *
- *      Boxaa accessors
- *           l_int32   boxaaGetCount()
- *           l_int32   boxaaGetBoxCount()
- *           BOXA     *boxaaGetBoxa()
- *           BOX      *boxaaGetBox()
- *
- *      Boxaa array modifiers
- *           l_int32   boxaaInitFull()
- *           l_int32   boxaaExtendWithInit()
- *           l_int32   boxaaReplaceBoxa()
- *           l_int32   boxaaInsertBoxa()
- *           l_int32   boxaaRemoveBoxa()
- *           l_int32   boxaaAddBox()
- *
- *      Boxaa serialized I/O
- *           BOXAA    *boxaaReadFromFiles()
- *           BOXAA    *boxaaRead()
- *           BOXAA    *boxaaReadStream()
- *           BOXAA    *boxaaReadMem()
- *           l_int32   boxaaWrite()
- *           l_int32   boxaaWriteStream()
- *           l_int32   boxaaWriteMem()
- *
- *      Boxa serialized I/O
- *           BOXA     *boxaRead()
- *           BOXA     *boxaReadStream()
- *           BOXA     *boxaReadMem()
- *           l_int32   boxaWriteDebug()
- *           l_int32   boxaWrite()
- *           l_int32   boxaWriteStream()
- *           l_int32   boxaWriteStderr()
- *           l_int32   boxaWriteMem()
- *
- *      Box print (for debug)
- *           l_int32   boxPrintStreamInfo()
- *
- *   Most functions use only valid boxes, which are boxes that have both
- *   width and height > 0.  However, a few functions, such as
- *   boxaGetMedianVals() do not assume that all boxes are valid.  For any
- *   function that can use a boxa with invalid boxes, it is convenient
- *   to use these accessors:
- *       boxaGetValidCount()   :  count of valid boxes
- *       boxaGetValidBox()     :  returns NULL for invalid boxes
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Bounds on initial array size */ -static const l_uint32 MaxPtrArraySize = 1000000; -static const l_int32 InitialPtrArraySize = 20; /*!< n'importe quoi */ - -/*---------------------------------------------------------------------* - * Box creation, destruction and copy * - *---------------------------------------------------------------------*/ -/*! - * \brief boxCreate() - * - * \param[in] x, y, w, h - * \return box, or NULL on error - * - *
- * Notes:
- *      (1) This clips the box to the +quad.  If no part of the
- *          box is in the +quad, this returns NULL.
- *      (2) We allow you to make a box with w = 0 and/or h = 0.
- *          This does not represent a valid region, but it is useful
- *          as a placeholder in a boxa for which the index of the
- *          box in the boxa is important.  This is an atypical
- *          situation; usually you want to put only valid boxes with
- *          nonzero width and height in a boxa.  If you have a boxa
- *          with invalid boxes, the accessor boxaGetValidBox()
- *          will return NULL on each invalid box.
- *      (3) If you want to create only valid boxes, use boxCreateValid(),
- *          which returns NULL if either w or h is 0.
- * 
- */ -BOX * -boxCreate(l_int32 x, - l_int32 y, - l_int32 w, - l_int32 h) -{ -BOX *box; - - PROCNAME("boxCreate"); - - if (w < 0 || h < 0) - return (BOX *)ERROR_PTR("w and h not both >= 0", procName, NULL); - if (x < 0) { /* take part in +quad */ - w = w + x; - x = 0; - if (w <= 0) - return (BOX *)ERROR_PTR("x < 0 and box off +quad", procName, NULL); - } - if (y < 0) { /* take part in +quad */ - h = h + y; - y = 0; - if (h <= 0) - return (BOX *)ERROR_PTR("y < 0 and box off +quad", procName, NULL); - } - - box = (BOX *)LEPT_CALLOC(1, sizeof(BOX)); - boxSetGeometry(box, x, y, w, h); - box->refcount = 1; - return box; -} - - -/*! - * \brief boxCreateValid() - * - * \param[in] x, y, w, h - * \return box, or NULL on error - * - *
- * Notes:
- *      (1) This returns NULL if either w = 0 or h = 0.
- * 
- */ -BOX * -boxCreateValid(l_int32 x, - l_int32 y, - l_int32 w, - l_int32 h) -{ - PROCNAME("boxCreateValid"); - - if (w <= 0 || h <= 0) - return (BOX *)ERROR_PTR("w and h not both > 0", procName, NULL); - return boxCreate(x, y, w, h); -} - - -/*! - * \brief boxCopy() - * - * \param[in] box - * \return copy of box, or NULL on error - */ -BOX * -boxCopy(BOX *box) -{ -BOX *boxc; - - PROCNAME("boxCopy"); - - if (!box) - return (BOX *)ERROR_PTR("box not defined", procName, NULL); - - boxc = boxCreate(box->x, box->y, box->w, box->h); - return boxc; -} - - -/*! - * \brief boxClone() - * - * \param[in] box - * \return ptr to same box, or NULL on error - */ -BOX * -boxClone(BOX *box) -{ - - PROCNAME("boxClone"); - - if (!box) - return (BOX *)ERROR_PTR("box not defined", procName, NULL); - - boxChangeRefcount(box, 1); - return box; -} - - -/*! - * \brief boxDestroy() - * - * \param[in,out] pbox will be set to null before returning - * \return void - * - *
- * Notes:
- *      (1) Decrements the ref count and, if 0, destroys the box.
- *      (2) Always nulls the input ptr.
- * 
- */ -void -boxDestroy(BOX **pbox) -{ -BOX *box; - - PROCNAME("boxDestroy"); - - if (pbox == NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - if ((box = *pbox) == NULL) - return; - - boxChangeRefcount(box, -1); - if (boxGetRefcount(box) <= 0) - LEPT_FREE(box); - *pbox = NULL; - return; -} - - -/*---------------------------------------------------------------------* - * Box accessors * - *---------------------------------------------------------------------*/ -/*! - * \brief boxGetGeometry() - * - * \param[in] box - * \param[out] px, py, pw, ph [optional] each can be null - * \return 0 if OK, 1 on error - */ -l_ok -boxGetGeometry(BOX *box, - l_int32 *px, - l_int32 *py, - l_int32 *pw, - l_int32 *ph) -{ - PROCNAME("boxGetGeometry"); - - if (px) *px = 0; - if (py) *py = 0; - if (pw) *pw = 0; - if (ph) *ph = 0; - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (px) *px = box->x; - if (py) *py = box->y; - if (pw) *pw = box->w; - if (ph) *ph = box->h; - return 0; -} - - -/*! - * \brief boxSetGeometry() - * - * \param[in] box - * \param[in] x, y, w, h [optional] use -1 to leave unchanged - * \return 0 if OK, 1 on error - */ -l_ok -boxSetGeometry(BOX *box, - l_int32 x, - l_int32 y, - l_int32 w, - l_int32 h) -{ - PROCNAME("boxSetGeometry"); - - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (x != -1) box->x = x; - if (y != -1) box->y = y; - if (w != -1) box->w = w; - if (h != -1) box->h = h; - return 0; -} - - -/*! - * \brief boxGetSideLocations() - * - * \param[in] box - * \param[out] pl, pt, pr, pb [optional] each can be null - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) All returned values are within the box.
- * 
- */ -l_ok -boxGetSideLocations(BOX *box, - l_int32 *pl, - l_int32 *pr, - l_int32 *pt, - l_int32 *pb) -{ -l_int32 x, y, w, h; - - PROCNAME("boxGetSideLocations"); - - if (pl) *pl = 0; - if (pr) *pr = 0; - if (pt) *pt = 0; - if (pb) *pb = 0; - if (!box) - return ERROR_INT("box not defined", procName, 1); - - boxGetGeometry(box, &x, &y, &w, &h); - if (pl) *pl = x; - if (pr) *pr = x + w - 1; - if (pt) *pt = y; - if (pb) *pb = y + h - 1; - return 0; -} - - -/*! - * \brief boxSetSideLocations() - * - * \param[in] box - * \param[in] l, r, t, b [optional] use -1 to leave unchanged - * \return 0 if OK, 1 on error - */ -l_ok -boxSetSideLocations(BOX *box, - l_int32 l, - l_int32 r, - l_int32 t, - l_int32 b) -{ -l_int32 x, y, w, h; - - PROCNAME("boxSetSideLocations"); - - if (!box) - return ERROR_INT("box not defined", procName, 1); - x = (l != -1) ? l : box->x; - w = (r != -1) ? r - x + 1 : box->x + box->w - x; - y = (t != -1) ? t : box->y; - h = (b != -1) ? b - y + 1 : box->y + box->h - y; - boxSetGeometry(box, x, y, w, h); - return 0; -} - - -/*! - * \brief Return the current reference count of %box - * - * \param[in] box - * \return refcount - */ -l_int32 -boxGetRefcount(BOX *box) -{ - PROCNAME("boxGetRefcount"); - - if (!box) - return ERROR_INT("box not defined", procName, UNDEF); - - return box->refcount; -} - -/*! - * \brief Adjust the current references count of %box by %delta - * - * \param[in] box ptr to box - * \param[in] delta adjustment, usually -1 or 1 - * \return 0 if OK, 1 on error - */ -l_ok -boxChangeRefcount(BOX *box, - l_int32 delta) -{ - PROCNAME("boxChangeRefcount"); - - if (!box) - return ERROR_INT("box not defined", procName, 1); - - box->refcount += delta; - return 0; -} - - -/*! - * \brief boxIsValid() - * - * \param[in] box - * \param[out] pvalid 1 if valid; 0 otherwise - * \return 0 if OK, 1 on error - */ -l_ok -boxIsValid(BOX *box, - l_int32 *pvalid) -{ - PROCNAME("boxIsValid"); - - if (!pvalid) - return ERROR_INT("&valid not defined", procName, 1); - *pvalid = 0; - if (!box) - return ERROR_INT("box not defined", procName, 1); - - if (box->w > 0 && box->h > 0) - *pvalid = 1; - return 0; -} - - -/*---------------------------------------------------------------------* - * Boxa creation, destruction, copy, extension * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaCreate() - * - * \param[in] n initial number of ptrs; 0 for default - * \return boxa, or NULL on error - */ -BOXA * -boxaCreate(l_int32 n) -{ -BOXA *boxa; - - PROCNAME("boxaCreate"); - - if (n <= 0 || n > MaxPtrArraySize) - n = InitialPtrArraySize; - - boxa = (BOXA *)LEPT_CALLOC(1, sizeof(BOXA)); - boxa->n = 0; - boxa->nalloc = n; - boxa->refcount = 1; - if ((boxa->box = (BOX **)LEPT_CALLOC(n, sizeof(BOX *))) == NULL) { - boxaDestroy(&boxa); - return (BOXA *)ERROR_PTR("boxa ptrs not made", procName, NULL); - } - return boxa; -} - - -/*! - * \brief boxaCopy() - * - * \param[in] boxa - * \param[in] copyflag L_COPY, L_CLONE, L_COPY_CLONE - * \return new boxa, or NULL on error - * - *
- * Notes:
- *      (1) See pix.h for description of the copyflag.
- *      (2) The copy-clone makes a new boxa that holds clones of each box.
- * 
- */ -BOXA * -boxaCopy(BOXA *boxa, - l_int32 copyflag) -{ -l_int32 i; -BOX *boxc; -BOXA *boxac; - - PROCNAME("boxaCopy"); - - if (!boxa) - return (BOXA *)ERROR_PTR("boxa not defined", procName, NULL); - - if (copyflag == L_CLONE) { - boxa->refcount++; - return boxa; - } - - if (copyflag != L_COPY && copyflag != L_COPY_CLONE) - return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL); - - if ((boxac = boxaCreate(boxa->nalloc)) == NULL) - return (BOXA *)ERROR_PTR("boxac not made", procName, NULL); - for (i = 0; i < boxa->n; i++) { - if (copyflag == L_COPY) - boxc = boxaGetBox(boxa, i, L_COPY); - else /* copy-clone */ - boxc = boxaGetBox(boxa, i, L_CLONE); - boxaAddBox(boxac, boxc, L_INSERT); - } - return boxac; -} - - -/*! - * \brief boxaDestroy() - * - * \param[in,out] pboxa will be set to null before returning - * \return void - * - *
- * Notes:
- *      (1) Decrements the ref count and, if 0, destroys the boxa.
- *      (2) Always nulls the input ptr.
- * 
- */ -void -boxaDestroy(BOXA **pboxa) -{ -l_int32 i; -BOXA *boxa; - - PROCNAME("boxaDestroy"); - - if (pboxa == NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - - if ((boxa = *pboxa) == NULL) - return; - - /* Decrement the ref count. If it is 0, destroy the boxa. */ - boxa->refcount--; - if (boxa->refcount <= 0) { - for (i = 0; i < boxa->n; i++) - boxDestroy(&boxa->box[i]); - LEPT_FREE(boxa->box); - LEPT_FREE(boxa); - } - - *pboxa = NULL; - return; -} - - -/*! - * \brief boxaAddBox() - * - * \param[in] boxa - * \param[in] box to be added - * \param[in] copyflag L_INSERT, L_COPY, L_CLONE - * \return 0 if OK, 1 on error - */ -l_ok -boxaAddBox(BOXA *boxa, - BOX *box, - l_int32 copyflag) -{ -l_int32 n; -BOX *boxc; - - PROCNAME("boxaAddBox"); - - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - - if (copyflag == L_INSERT) - boxc = box; - else if (copyflag == L_COPY) - boxc = boxCopy(box); - else if (copyflag == L_CLONE) - boxc = boxClone(box); - else - return ERROR_INT("invalid copyflag", procName, 1); - if (!boxc) - return ERROR_INT("boxc not made", procName, 1); - - n = boxaGetCount(boxa); - if (n >= boxa->nalloc) - boxaExtendArray(boxa); - boxa->box[n] = boxc; - boxa->n++; - - return 0; -} - - -/*! - * \brief boxaExtendArray() - * - * \param[in] boxa - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Reallocs with doubled size of ptr array.
- * 
- */ -l_ok -boxaExtendArray(BOXA *boxa) -{ - PROCNAME("boxaExtendArray"); - - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - return boxaExtendArrayToSize(boxa, 2 * boxa->nalloc); -} - - -/*! - * \brief boxaExtendArrayToSize() - * - * \param[in] boxa - * \param[in] size new size of boxa array - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) If necessary, reallocs new boxa ptr array to %size.
- * 
- */ -l_ok -boxaExtendArrayToSize(BOXA *boxa, - l_int32 size) -{ - PROCNAME("boxaExtendArrayToSize"); - - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - if (size > boxa->nalloc) { - if ((boxa->box = (BOX **)reallocNew((void **)&boxa->box, - sizeof(BOX *) * boxa->nalloc, - size * sizeof(BOX *))) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - boxa->nalloc = size; - } - return 0; -} - - -/*---------------------------------------------------------------------* - * Boxa accessors * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaGetCount() - * - * \param[in] boxa - * \return count of all boxes; 0 if no boxes or on error - */ -l_int32 -boxaGetCount(BOXA *boxa) -{ - PROCNAME("boxaGetCount"); - - if (!boxa) - return ERROR_INT("boxa not defined", procName, 0); - return boxa->n; -} - - -/*! - * \brief boxaGetValidCount() - * - * \param[in] boxa - * \return count of valid boxes; 0 if no valid boxes or on error - */ -l_int32 -boxaGetValidCount(BOXA *boxa) -{ -l_int32 n, i, w, h, count; - - PROCNAME("boxaGetValidCount"); - - if (!boxa) - return ERROR_INT("boxa not defined", procName, 0); - - n = boxaGetCount(boxa); - for (i = 0, count = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h); - if (w > 0 && h > 0) - count++; - } - return count; -} - - -/*! - * \brief boxaGetBox() - * - * \param[in] boxa - * \param[in] index to the index-th box - * \param[in] accessflag L_COPY or L_CLONE - * \return box, or NULL on error - */ -BOX * -boxaGetBox(BOXA *boxa, - l_int32 index, - l_int32 accessflag) -{ - PROCNAME("boxaGetBox"); - - if (!boxa) - return (BOX *)ERROR_PTR("boxa not defined", procName, NULL); - if (index < 0 || index >= boxa->n) - return (BOX *)ERROR_PTR("index not valid", procName, NULL); - - if (accessflag == L_COPY) - return boxCopy(boxa->box[index]); - else if (accessflag == L_CLONE) - return boxClone(boxa->box[index]); - else - return (BOX *)ERROR_PTR("invalid accessflag", procName, NULL); -} - - -/*! - * \brief boxaGetValidBox() - * - * \param[in] boxa - * \param[in] index to the index-th box - * \param[in] accessflag L_COPY or L_CLONE - * \return box, or NULL if box is not valid or on error - * - *
- * Notes:
- *      (1) This returns NULL for an invalid box in a boxa.
- *          For a box to be valid, both the width and height must be > 0.
- *      (2) We allow invalid boxes, with w = 0 or h = 0, as placeholders
- *          in boxa for which the index of the box in the boxa is important.
- *          This is an atypical situation; usually you want to put only
- *          valid boxes in a boxa.
- * 
- */ -BOX * -boxaGetValidBox(BOXA *boxa, - l_int32 index, - l_int32 accessflag) -{ -l_int32 w, h; -BOX *box; - - PROCNAME("boxaGetValidBox"); - - if (!boxa) - return (BOX *)ERROR_PTR("boxa not defined", procName, NULL); - - if ((box = boxaGetBox(boxa, index, accessflag)) == NULL) - return (BOX *)ERROR_PTR("box not returned", procName, NULL); - boxGetGeometry(box, NULL, NULL, &w, &h); - if (w <= 0 || h <= 0) /* not valid, but not necessarily an error */ - boxDestroy(&box); - return box; -} - - -/*! - * \brief boxaFindInvalidBoxes() - * - * \param[in] boxa - * \return na numa of invalid boxes; NULL if there are none or on error - */ -NUMA * -boxaFindInvalidBoxes(BOXA *boxa) -{ -l_int32 i, n, w, h; -NUMA *na; - - PROCNAME("boxaFindInvalidBoxes"); - - if (!boxa) - return (NUMA *)ERROR_PTR("boxa not defined", procName, NULL); - - n = boxaGetCount(boxa); - if (boxaGetValidCount(boxa) == n) - return NULL; - - na = numaMakeConstant(0, n); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h); - if (w == 0 || h == 0) - numaSetValue(na, i, 1); - } - return na; -} - - -/*! - * \brief boxaGetBoxGeometry() - * - * \param[in] boxa - * \param[in] index to the index-th box - * \param[out] px, py, pw, ph [optional] each can be null - * \return 0 if OK, 1 on error - */ -l_ok -boxaGetBoxGeometry(BOXA *boxa, - l_int32 index, - l_int32 *px, - l_int32 *py, - l_int32 *pw, - l_int32 *ph) -{ -BOX *box; - - PROCNAME("boxaGetBoxGeometry"); - - if (px) *px = 0; - if (py) *py = 0; - if (pw) *pw = 0; - if (ph) *ph = 0; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (index < 0 || index >= boxa->n) - return ERROR_INT("index not valid", procName, 1); - - if ((box = boxaGetBox(boxa, index, L_CLONE)) == NULL) - return ERROR_INT("box not found!", procName, 1); - boxGetGeometry(box, px, py, pw, ph); - boxDestroy(&box); - return 0; -} - - -/*! - * \brief boxaIsFull() - * - * \param[in] boxa - * \param[out] pfull 1 if boxa is full; 0 otherwise - * \return 0 if OK, 1 on error - */ -l_ok -boxaIsFull(BOXA *boxa, - l_int32 *pfull) -{ -l_int32 i, n, full; -BOX *box; - - PROCNAME("boxaIsFull"); - - if (!pfull) - return ERROR_INT("&full not defined", procName, 1); - *pfull = 0; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - n = boxaGetCount(boxa); - full = 1; - for (i = 0; i < n; i++) { - if ((box = boxaGetBox(boxa, i, L_CLONE)) == NULL) { - full = 0; - break; - } - boxDestroy(&box); - } - *pfull = full; - return 0; -} - - -/*---------------------------------------------------------------------* - * Boxa array modifiers * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaReplaceBox() - * - * \param[in] boxa - * \param[in] index to the index-th box - * \param[in] box insert this box to replace existing one - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) In-place replacement of one box; the input %box is now
- *          owned by the boxa.
- *      (2) The previous box at that location, if any, is destroyed.
- * 
- */ -l_ok -boxaReplaceBox(BOXA *boxa, - l_int32 index, - BOX *box) -{ - PROCNAME("boxaReplaceBox"); - - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (index < 0 || index >= boxa->n) - return ERROR_INT("index not valid", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - - boxDestroy(&(boxa->box[index])); - boxa->box[index] = box; - return 0; -} - - -/*! - * \brief boxaInsertBox() - * - * \param[in] boxa - * \param[in] index location in boxa to insert new value - * \param[in] box new box to be inserted; the boxa now owns it - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This shifts box[i] --> box[i + 1] for all i >= index,
- *          and then inserts box as box[index].
- *      (2) To insert at the beginning of the array, set index = 0.
- *      (3) To append to the array, it's easier to use boxaAddBox().
- *      (4) This should not be used repeatedly to insert into large arrays,
- *          because the function is O(n).
- * 
- */ -l_ok -boxaInsertBox(BOXA *boxa, - l_int32 index, - BOX *box) -{ -l_int32 i, n; -BOX **array; - - PROCNAME("boxaInsertBox"); - - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - n = boxaGetCount(boxa); - if (index < 0 || index > n) - return ERROR_INT("index not in {0...n}", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - - if (n >= boxa->nalloc) - boxaExtendArray(boxa); - array = boxa->box; - boxa->n++; - for (i = n; i > index; i--) - array[i] = array[i - 1]; - array[index] = box; - - return 0; -} - - -/*! - * \brief boxaRemoveBox() - * - * \param[in] boxa - * \param[in] index of box to be removed and destroyed - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This removes box[index] and then shifts
- *          box[i] --> box[i - 1] for all i > index.
- *      (2) It should not be used repeatedly to remove boxes from
- *          large arrays, because the function is O(n).
- * 
- */ -l_ok -boxaRemoveBox(BOXA *boxa, - l_int32 index) -{ - return boxaRemoveBoxAndSave(boxa, index, NULL); -} - - -/*! - * \brief boxaRemoveBoxAndSave() - * - * \param[in] boxa - * \param[in] index of box to be removed - * \param[out] pbox [optional] removed box - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This removes box[index] and then shifts
- *          box[i] --> box[i - 1] for all i > index.
- *      (2) It should not be used repeatedly to remove boxes from
- *          large arrays, because the function is O(n).
- * 
- */ -l_ok -boxaRemoveBoxAndSave(BOXA *boxa, - l_int32 index, - BOX **pbox) -{ -l_int32 i, n; -BOX **array; - - PROCNAME("boxaRemoveBoxAndSave"); - - if (pbox) *pbox = NULL; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - n = boxaGetCount(boxa); - if (index < 0 || index >= n) - return ERROR_INT("index not in {0...n - 1}", procName, 1); - - if (pbox) - *pbox = boxaGetBox(boxa, index, L_CLONE); - array = boxa->box; - boxDestroy(&array[index]); - for (i = index + 1; i < n; i++) - array[i - 1] = array[i]; - array[n - 1] = NULL; - boxa->n--; - - return 0; -} - - -/*! - * \brief boxaSaveValid() - * - * \param[in] boxas - * \param[in] copyflag L_COPY or L_CLONE - * \return boxad if OK, NULL on error - * - *
- * Notes:
- *      (1) This makes a copy/clone of each valid box.
- * 
- */ -BOXA * -boxaSaveValid(BOXA *boxas, - l_int32 copyflag) -{ -l_int32 i, n; -BOX *box; -BOXA *boxad; - - PROCNAME("boxaSaveValid"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (copyflag != L_COPY && copyflag != L_CLONE) - return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL); - - n = boxaGetCount(boxas); - boxad = boxaCreate(n); - for (i = 0; i < n; i++) { - if ((box = boxaGetValidBox(boxas, i, copyflag)) != NULL) - boxaAddBox(boxad, box, L_INSERT); - } - - return boxad; -} - - -/*! - * \brief boxaInitFull() - * - * \param[in] boxa typically empty - * \param[in] box [optional] to be replicated into the entire ptr array - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This initializes a boxa by filling up the entire box ptr array
- *          with copies of %box.  If %box == NULL, use a placeholder box
- *          of zero size.  Any existing boxes are destroyed.
- *          After this opepration, the number of boxes is equal to
- *          the number of allocated ptrs.
- *      (2) Note that we use boxaReplaceBox() instead of boxaInsertBox().
- *          They both have the same effect when inserting into a NULL ptr
- *          in the boxa ptr array:
- *      (3) Example usage.  This function is useful to prepare for a
- *          random insertion (or replacement) of boxes into a boxa.
- *          To randomly insert boxes into a boxa, up to some index "max":
- *             Boxa *boxa = boxaCreate(max);
- *             boxaInitFull(boxa, NULL);
- *          If you want placeholder boxes of non-zero size:
- *             Boxa *boxa = boxaCreate(max);
- *             Box *box = boxCreate(...);
- *             boxaInitFull(boxa, box);
- *             boxDestroy(&box);
- *          If we have an existing boxa with a smaller ptr array, it can
- *          be reused for up to max boxes:
- *             boxaExtendArrayToSize(boxa, max);
- *             boxaInitFull(boxa, NULL);
- *          The initialization allows the boxa to always be properly
- *          filled, even if all the boxes are not later replaced.
- *          If you want to know which boxes have been replaced,
- *          and you initialized with invalid zero-sized boxes,
- *          use boxaGetValidBox() to return NULL for the invalid boxes.
- * 
- */ -l_ok -boxaInitFull(BOXA *boxa, - BOX *box) -{ -l_int32 i, n; -BOX *boxt; - - PROCNAME("boxaInitFull"); - - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - n = boxa->nalloc; - boxa->n = n; - for (i = 0; i < n; i++) { - if (box) - boxt = boxCopy(box); - else - boxt = boxCreate(0, 0, 0, 0); - boxaReplaceBox(boxa, i, boxt); - } - return 0; -} - - -/*! - * \brief boxaClear() - * - * \param[in] boxa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This destroys all boxes in the boxa, setting the ptrs
- *          to null.  The number of allocated boxes, n, is set to 0.
- * 
- */ -l_ok -boxaClear(BOXA *boxa) -{ -l_int32 i, n; - - PROCNAME("boxaClear"); - - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - n = boxaGetCount(boxa); - for (i = 0; i < n; i++) - boxDestroy(&boxa->box[i]); - boxa->n = 0; - return 0; -} - - -/*--------------------------------------------------------------------------* - * Boxaa creation, destruction * - *--------------------------------------------------------------------------*/ -/*! - * \brief boxaaCreate() - * - * \param[in] n size of boxa ptr array to be alloc'd; 0 for default - * \return baa, or NULL on error - */ -BOXAA * -boxaaCreate(l_int32 n) -{ -BOXAA *baa; - - PROCNAME("boxaaCreate"); - - if (n <= 0 || n > MaxPtrArraySize) - n = InitialPtrArraySize; - - baa = (BOXAA *)LEPT_CALLOC(1, sizeof(BOXAA)); - if ((baa->boxa = (BOXA **)LEPT_CALLOC(n, sizeof(BOXA *))) == NULL) { - boxaaDestroy(&baa); - return (BOXAA *)ERROR_PTR("boxa ptr array not made", procName, NULL); - } - baa->nalloc = n; - baa->n = 0; - return baa; -} - - -/*! - * \brief boxaaCopy() - * - * \param[in] baas input boxaa to be copied - * \param[in] copyflag L_COPY, L_CLONE - * \return baad new boxaa, composed of copies or clones of the boxa - * in baas, or NULL on error - * - *
- * Notes:
- *      (1) L_COPY makes a copy of each boxa in baas.
- *          L_CLONE makes a clone of each boxa in baas.
- * 
- */ -BOXAA * -boxaaCopy(BOXAA *baas, - l_int32 copyflag) -{ -l_int32 i, n; -BOXA *boxa; -BOXAA *baad; - - PROCNAME("boxaaCopy"); - - if (!baas) - return (BOXAA *)ERROR_PTR("baas not defined", procName, NULL); - if (copyflag != L_COPY && copyflag != L_CLONE) - return (BOXAA *)ERROR_PTR("invalid copyflag", procName, NULL); - - n = boxaaGetCount(baas); - baad = boxaaCreate(n); - for (i = 0; i < n; i++) { - boxa = boxaaGetBoxa(baas, i, copyflag); - boxaaAddBoxa(baad, boxa, L_INSERT); - } - - return baad; -} - - -/*! - * \brief boxaaDestroy() - * - * \param[in,out] pbaa will be set to null before returning - */ -void -boxaaDestroy(BOXAA **pbaa) -{ -l_int32 i; -BOXAA *baa; - - PROCNAME("boxaaDestroy"); - - if (pbaa == NULL) { - L_WARNING("ptr address is NULL!\n", procName); - return; - } - - if ((baa = *pbaa) == NULL) - return; - - for (i = 0; i < baa->n; i++) - boxaDestroy(&baa->boxa[i]); - LEPT_FREE(baa->boxa); - LEPT_FREE(baa); - *pbaa = NULL; - - return; -} - - - -/*--------------------------------------------------------------------------* - * Add Boxa to Boxaa * - *--------------------------------------------------------------------------*/ -/*! - * \brief boxaaAddBoxa() - * - * \param[in] baa - * \param[in] ba to be added - * \param[in] copyflag L_INSERT, L_COPY, L_CLONE - * \return 0 if OK, 1 on error - */ -l_ok -boxaaAddBoxa(BOXAA *baa, - BOXA *ba, - l_int32 copyflag) -{ -l_int32 n; -BOXA *bac; - - PROCNAME("boxaaAddBoxa"); - - if (!baa) - return ERROR_INT("baa not defined", procName, 1); - if (!ba) - return ERROR_INT("ba not defined", procName, 1); - if (copyflag != L_INSERT && copyflag != L_COPY && copyflag != L_CLONE) - return ERROR_INT("invalid copyflag", procName, 1); - - if (copyflag == L_INSERT) - bac = ba; - else - bac = boxaCopy(ba, copyflag); - - n = boxaaGetCount(baa); - if (n >= baa->nalloc) - boxaaExtendArray(baa); - baa->boxa[n] = bac; - baa->n++; - return 0; -} - - -/*! - * \brief boxaaExtendArray() - * - * \param[in] baa - * \return 0 if OK, 1 on error - */ -l_ok -boxaaExtendArray(BOXAA *baa) -{ - - PROCNAME("boxaaExtendArray"); - - if (!baa) - return ERROR_INT("baa not defined", procName, 1); - - if ((baa->boxa = (BOXA **)reallocNew((void **)&baa->boxa, - sizeof(BOXA *) * baa->nalloc, - 2 * sizeof(BOXA *) * baa->nalloc)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - - baa->nalloc *= 2; - return 0; -} - - -/*! - * \brief boxaaExtendArrayToSize() - * - * \param[in] baa - * \param[in] size new size of boxa array - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) If necessary, reallocs the boxa ptr array to %size.
- * 
- */ -l_ok -boxaaExtendArrayToSize(BOXAA *baa, - l_int32 size) -{ - PROCNAME("boxaaExtendArrayToSize"); - - if (!baa) - return ERROR_INT("baa not defined", procName, 1); - - if (size > baa->nalloc) { - if ((baa->boxa = (BOXA **)reallocNew((void **)&baa->boxa, - sizeof(BOXA *) * baa->nalloc, - size * sizeof(BOXA *))) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - baa->nalloc = size; - } - return 0; -} - - -/*----------------------------------------------------------------------* - * Boxaa accessors * - *----------------------------------------------------------------------*/ -/*! - * \brief boxaaGetCount() - * - * \param[in] baa - * \return count number of boxa, or 0 if no boxa or on error - */ -l_int32 -boxaaGetCount(BOXAA *baa) -{ - PROCNAME("boxaaGetCount"); - - if (!baa) - return ERROR_INT("baa not defined", procName, 0); - return baa->n; -} - - -/*! - * \brief boxaaGetBoxCount() - * - * \param[in] baa - * \return count number of boxes, or 0 if no boxes or on error - */ -l_int32 -boxaaGetBoxCount(BOXAA *baa) -{ -BOXA *boxa; -l_int32 n, sum, i; - - PROCNAME("boxaaGetBoxCount"); - - if (!baa) - return ERROR_INT("baa not defined", procName, 0); - - n = boxaaGetCount(baa); - for (sum = 0, i = 0; i < n; i++) { - boxa = boxaaGetBoxa(baa, i, L_CLONE); - sum += boxaGetCount(boxa); - boxaDestroy(&boxa); - } - - return sum; -} - - -/*! - * \brief boxaaGetBoxa() - * - * \param[in] baa - * \param[in] index to the index-th boxa - * \param[in] accessflag L_COPY or L_CLONE - * \return boxa, or NULL on error - */ -BOXA * -boxaaGetBoxa(BOXAA *baa, - l_int32 index, - l_int32 accessflag) -{ -l_int32 n; - - PROCNAME("boxaaGetBoxa"); - - if (!baa) - return (BOXA *)ERROR_PTR("baa not defined", procName, NULL); - n = boxaaGetCount(baa); - if (index < 0 || index >= n) - return (BOXA *)ERROR_PTR("index not valid", procName, NULL); - if (accessflag != L_COPY && accessflag != L_CLONE) - return (BOXA *)ERROR_PTR("invalid accessflag", procName, NULL); - - return boxaCopy(baa->boxa[index], accessflag); -} - - -/*! - * \brief boxaaGetBox() - * - * \param[in] baa - * \param[in] iboxa index into the boxa array in the boxaa - * \param[in] ibox index into the box array in the boxa - * \param[in] accessflag L_COPY or L_CLONE - * \return box, or NULL on error - */ -BOX * -boxaaGetBox(BOXAA *baa, - l_int32 iboxa, - l_int32 ibox, - l_int32 accessflag) -{ -BOX *box; -BOXA *boxa; - - PROCNAME("boxaaGetBox"); - - if ((boxa = boxaaGetBoxa(baa, iboxa, L_CLONE)) == NULL) - return (BOX *)ERROR_PTR("boxa not retrieved", procName, NULL); - if ((box = boxaGetBox(boxa, ibox, accessflag)) == NULL) - L_ERROR("box not retrieved\n", procName); - boxaDestroy(&boxa); - return box; -} - - -/*----------------------------------------------------------------------* - * Boxaa array modifiers * - *----------------------------------------------------------------------*/ -/*! - * \brief boxaaInitFull() - * - * \param[in] baa typically empty - * \param[in] boxa to be replicated into the entire ptr array - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This initializes a boxaa by filling up the entire boxa ptr array
- *          with copies of %boxa.  Any existing boxa are destroyed.
- *          After this operation, the number of boxa is equal to
- *          the number of allocated ptrs.
- *      (2) Note that we use boxaaReplaceBox() instead of boxaInsertBox().
- *          They both have the same effect when inserting into a NULL ptr
- *          in the boxa ptr array
- *      (3) Example usage.  This function is useful to prepare for a
- *          random insertion (or replacement) of boxa into a boxaa.
- *          To randomly insert boxa into a boxaa, up to some index "max":
- *             Boxaa *baa = boxaaCreate(max);
- *               // initialize the boxa
- *             Boxa *boxa = boxaCreate(...);
- *             ...  [optionally fix with boxes]
- *             boxaaInitFull(baa, boxa);
- *          A typical use is to initialize the array with empty boxa,
- *          and to replace only a subset that must be aligned with
- *          something else, such as a pixa.
- * 
- */ -l_ok -boxaaInitFull(BOXAA *baa, - BOXA *boxa) -{ -l_int32 i, n; -BOXA *boxat; - - PROCNAME("boxaaInitFull"); - - if (!baa) - return ERROR_INT("baa not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - n = baa->nalloc; - baa->n = n; - for (i = 0; i < n; i++) { - boxat = boxaCopy(boxa, L_COPY); - boxaaReplaceBoxa(baa, i, boxat); - } - return 0; -} - - -/*! - * \brief boxaaExtendWithInit() - * - * \param[in] baa - * \param[in] maxindex - * \param[in] boxa to be replicated into the extended ptr array - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This should be used on an existing boxaa that has been
- *          fully loaded with boxa.  It then extends the boxaa,
- *          loading all the additional ptrs with copies of boxa.
- *          Typically, boxa will be empty.
- * 
- */ -l_ok -boxaaExtendWithInit(BOXAA *baa, - l_int32 maxindex, - BOXA *boxa) -{ -l_int32 i, n; - - PROCNAME("boxaaExtendWithInit"); - - if (!baa) - return ERROR_INT("baa not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - /* Extend the ptr array if necessary */ - n = boxaaGetCount(baa); - if (maxindex < n) return 0; - boxaaExtendArrayToSize(baa, maxindex + 1); - - /* Fill the new entries with copies of boxa */ - for (i = n; i <= maxindex; i++) - boxaaAddBoxa(baa, boxa, L_COPY); - return 0; -} - - -/*! - * \brief boxaaReplaceBoxa() - * - * \param[in] baa - * \param[in] index to the index-th boxa - * \param[in] boxa insert and replace any existing one - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Any existing boxa is destroyed, and the input one
- *          is inserted in its place.
- *      (2) If the index is invalid, return 1 (error)
- * 
- */ -l_ok -boxaaReplaceBoxa(BOXAA *baa, - l_int32 index, - BOXA *boxa) -{ -l_int32 n; - - PROCNAME("boxaaReplaceBoxa"); - - if (!baa) - return ERROR_INT("baa not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - n = boxaaGetCount(baa); - if (index < 0 || index >= n) - return ERROR_INT("index not valid", procName, 1); - - boxaDestroy(&baa->boxa[index]); - baa->boxa[index] = boxa; - return 0; -} - - -/*! - * \brief boxaaInsertBoxa() - * - * \param[in] baa - * \param[in] index location in boxaa to insert new boxa - * \param[in] boxa new boxa to be inserted - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This shifts boxa[i] --> boxa[i + 1] for all i >= index,
- *          and then inserts boxa as boxa[index].
- *      (2) To insert at the beginning of the array, set index = 0.
- *      (3) To append to the array, it's easier to use boxaaAddBoxa().
- *      (4) This should not be used repeatedly to insert into large arrays,
- *          because the function is O(n).
- * 
- */ -l_ok -boxaaInsertBoxa(BOXAA *baa, - l_int32 index, - BOXA *boxa) -{ -l_int32 i, n; -BOXA **array; - - PROCNAME("boxaaInsertBoxa"); - - if (!baa) - return ERROR_INT("baa not defined", procName, 1); - n = boxaaGetCount(baa); - if (index < 0 || index > n) - return ERROR_INT("index not in {0...n}", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - if (n >= baa->nalloc) - boxaaExtendArray(baa); - array = baa->boxa; - baa->n++; - for (i = n; i > index; i--) - array[i] = array[i - 1]; - array[index] = boxa; - - return 0; -} - - -/*! - * \brief boxaaRemoveBoxa() - * - * \param[in] baa - * \param[in] index of the boxa to be removed and destroyed - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This removes boxa[index] and then shifts
- *          boxa[i] --> boxa[i - 1] for all i > index.
- *      (2) The removed boxaa is destroyed.
- *      (2) This should not be used repeatedly on large arrays,
- *          because the function is O(n).
- * 
- */ -l_ok -boxaaRemoveBoxa(BOXAA *baa, - l_int32 index) -{ -l_int32 i, n; -BOXA **array; - - PROCNAME("boxaaRemoveBox"); - - if (!baa) - return ERROR_INT("baa not defined", procName, 1); - n = boxaaGetCount(baa); - if (index < 0 || index >= n) - return ERROR_INT("index not valid", procName, 1); - - array = baa->boxa; - boxaDestroy(&array[index]); - for (i = index + 1; i < n; i++) - array[i - 1] = array[i]; - array[n - 1] = NULL; - baa->n--; - - return 0; -} - - -/*! - * \brief boxaaAddBox() - * - * \param[in] baa - * \param[in] index of boxa with boxaa - * \param[in] box to be added - * \param[in] accessflag L_INSERT, L_COPY or L_CLONE - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Adds to an existing boxa only.
- * 
- */ -l_ok -boxaaAddBox(BOXAA *baa, - l_int32 index, - BOX *box, - l_int32 accessflag) -{ -l_int32 n; -BOXA *boxa; - PROCNAME("boxaaAddBox"); - - if (!baa) - return ERROR_INT("baa not defined", procName, 1); - n = boxaaGetCount(baa); - if (index < 0 || index >= n) - return ERROR_INT("index not valid", procName, 1); - if (accessflag != L_INSERT && accessflag != L_COPY && accessflag != L_CLONE) - return ERROR_INT("invalid accessflag", procName, 1); - - boxa = boxaaGetBoxa(baa, index, L_CLONE); - boxaAddBox(boxa, box, accessflag); - boxaDestroy(&boxa); - return 0; -} - - -/*---------------------------------------------------------------------* - * Boxaa serialized I/O * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaaReadFromFiles() - * - * \param[in] dirname directory - * \param[in] substr [optional] substring filter on filenames; can be NULL - * \param[in] first 0-based - * \param[in] nfiles use 0 for everything from %first to the end - * \return baa, or NULL on error or if no boxa files are found. - * - *
- * Notes:
- *      (1) The files must be serialized boxa files (e.g., *.ba).
- *          If some files cannot be read, warnings are issued.
- *      (2) Use %substr to filter filenames in the directory.  If
- *          %substr == NULL, this takes all files.
- *      (3) After filtering, use %first and %nfiles to select
- *          a contiguous set of files, that have been lexically
- *          sorted in increasing order.
- * 
- */ -BOXAA * -boxaaReadFromFiles(const char *dirname, - const char *substr, - l_int32 first, - l_int32 nfiles) -{ -char *fname; -l_int32 i, n; -BOXA *boxa; -BOXAA *baa; -SARRAY *sa; - - PROCNAME("boxaaReadFromFiles"); - - if (!dirname) - return (BOXAA *)ERROR_PTR("dirname not defined", procName, NULL); - - sa = getSortedPathnamesInDirectory(dirname, substr, first, nfiles); - if (!sa || ((n = sarrayGetCount(sa)) == 0)) { - sarrayDestroy(&sa); - return (BOXAA *)ERROR_PTR("no pixa files found", procName, NULL); - } - - baa = boxaaCreate(n); - for (i = 0; i < n; i++) { - fname = sarrayGetString(sa, i, L_NOCOPY); - if ((boxa = boxaRead(fname)) == NULL) { - L_ERROR("boxa not read for %d-th file", procName, i); - continue; - } - boxaaAddBoxa(baa, boxa, L_INSERT); - } - - sarrayDestroy(&sa); - return baa; -} - - -/*! - * \brief boxaaRead() - * - * \param[in] filename - * \return boxaa, or NULL on error - */ -BOXAA * -boxaaRead(const char *filename) -{ -FILE *fp; -BOXAA *baa; - - PROCNAME("boxaaRead"); - - if (!filename) - return (BOXAA *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (BOXAA *)ERROR_PTR("stream not opened", procName, NULL); - baa = boxaaReadStream(fp); - fclose(fp); - if (!baa) - return (BOXAA *)ERROR_PTR("boxaa not read", procName, NULL); - return baa; -} - - -/*! - * \brief boxaaReadStream() - * - * \param[in] fp input file stream - * \return boxaa, or NULL on error - */ -BOXAA * -boxaaReadStream(FILE *fp) -{ -l_int32 n, i, x, y, w, h, version; -l_int32 ignore; -BOXA *boxa; -BOXAA *baa; - - PROCNAME("boxaaReadStream"); - - if (!fp) - return (BOXAA *)ERROR_PTR("stream not defined", procName, NULL); - - if (fscanf(fp, "\nBoxaa Version %d\n", &version) != 1) - return (BOXAA *)ERROR_PTR("not a boxaa file", procName, NULL); - if (version != BOXAA_VERSION_NUMBER) - return (BOXAA *)ERROR_PTR("invalid boxa version", procName, NULL); - if (fscanf(fp, "Number of boxa = %d\n", &n) != 1) - return (BOXAA *)ERROR_PTR("not a boxaa file", procName, NULL); - - if ((baa = boxaaCreate(n)) == NULL) - return (BOXAA *)ERROR_PTR("boxaa not made", procName, NULL); - for (i = 0; i < n; i++) { - if (fscanf(fp, "\nBoxa[%d] extent: x = %d, y = %d, w = %d, h = %d", - &ignore, &x, &y, &w, &h) != 5) { - boxaaDestroy(&baa); - return (BOXAA *)ERROR_PTR("boxa descr not valid", procName, NULL); - } - if ((boxa = boxaReadStream(fp)) == NULL) { - boxaaDestroy(&baa); - return (BOXAA *)ERROR_PTR("boxa not made", procName, NULL); - } - boxaaAddBoxa(baa, boxa, L_INSERT); - } - return baa; -} - - -/*! - * \brief boxaaReadMem() - * - * \param[in] data serialization of boxaa; in ascii - * \param[in] size of data in bytes; can use strlen to get it - * \return baa, or NULL on error - */ -BOXAA * -boxaaReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -BOXAA *baa; - - PROCNAME("boxaaReadMem"); - - if (!data) - return (BOXAA *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (BOXAA *)ERROR_PTR("stream not opened", procName, NULL); - - baa = boxaaReadStream(fp); - fclose(fp); - if (!baa) L_ERROR("baa not read\n", procName); - return baa; -} - - -/*! - * \brief boxaaWrite() - * - * \param[in] filename - * \param[in] baa - * \return 0 if OK, 1 on error - */ -l_ok -boxaaWrite(const char *filename, - BOXAA *baa) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("boxaaWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!baa) - return ERROR_INT("baa not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "w")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = boxaaWriteStream(fp, baa); - fclose(fp); - if (ret) - return ERROR_INT("baa not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief boxaaWriteStream() - * - * \param[in] fp output file stream - * \param[in] baa - * \return 0 if OK, 1 on error - */ -l_ok -boxaaWriteStream(FILE *fp, - BOXAA *baa) -{ -l_int32 n, i, x, y, w, h; -BOX *box; -BOXA *boxa; - - PROCNAME("boxaaWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!baa) - return ERROR_INT("baa not defined", procName, 1); - - n = boxaaGetCount(baa); - fprintf(fp, "\nBoxaa Version %d\n", BOXAA_VERSION_NUMBER); - fprintf(fp, "Number of boxa = %d\n", n); - - for (i = 0; i < n; i++) { - if ((boxa = boxaaGetBoxa(baa, i, L_CLONE)) == NULL) - return ERROR_INT("boxa not found", procName, 1); - boxaGetExtent(boxa, NULL, NULL, &box); - boxGetGeometry(box, &x, &y, &w, &h); - fprintf(fp, "\nBoxa[%d] extent: x = %d, y = %d, w = %d, h = %d", - i, x, y, w, h); - boxaWriteStream(fp, boxa); - boxDestroy(&box); - boxaDestroy(&boxa); - } - return 0; -} - - -/*! - * \brief boxaaWriteMem() - * - * \param[out] pdata data of serialized boxaa; ascii - * \param[out] psize size of returned data - * \param[in] baa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes a boxaa in memory and puts the result in a buffer.
- * 
- */ -l_ok -boxaaWriteMem(l_uint8 **pdata, - size_t *psize, - BOXAA *baa) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("boxaaWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!baa) - return ERROR_INT("baa not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = boxaaWriteStream(fp, baa); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = boxaaWriteStream(fp, baa); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - -/*---------------------------------------------------------------------* - * Boxa serialized I/O * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaRead() - * - * \param[in] filename - * \return boxa, or NULL on error - */ -BOXA * -boxaRead(const char *filename) -{ -FILE *fp; -BOXA *boxa; - - PROCNAME("boxaRead"); - - if (!filename) - return (BOXA *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (BOXA *)ERROR_PTR("stream not opened", procName, NULL); - boxa = boxaReadStream(fp); - fclose(fp); - if (!boxa) - return (BOXA *)ERROR_PTR("boxa not read", procName, NULL); - return boxa; -} - - -/*! - * \brief boxaReadStream() - * - * \param[in] fp input file stream - * \return boxa, or NULL on error - */ -BOXA * -boxaReadStream(FILE *fp) -{ -l_int32 n, i, x, y, w, h, version; -l_int32 ignore; -BOX *box; -BOXA *boxa; - - PROCNAME("boxaReadStream"); - - if (!fp) - return (BOXA *)ERROR_PTR("stream not defined", procName, NULL); - - if (fscanf(fp, "\nBoxa Version %d\n", &version) != 1) - return (BOXA *)ERROR_PTR("not a boxa file", procName, NULL); - if (version != BOXA_VERSION_NUMBER) - return (BOXA *)ERROR_PTR("invalid boxa version", procName, NULL); - if (fscanf(fp, "Number of boxes = %d\n", &n) != 1) - return (BOXA *)ERROR_PTR("not a boxa file", procName, NULL); - - if ((boxa = boxaCreate(n)) == NULL) - return (BOXA *)ERROR_PTR("boxa not made", procName, NULL); - for (i = 0; i < n; i++) { - if (fscanf(fp, " Box[%d]: x = %d, y = %d, w = %d, h = %d\n", - &ignore, &x, &y, &w, &h) != 5) { - boxaDestroy(&boxa); - return (BOXA *)ERROR_PTR("box descr not valid", procName, NULL); - } - box = boxCreate(x, y, w, h); - boxaAddBox(boxa, box, L_INSERT); - } - - return boxa; -} - - -/*! - * \brief boxaReadMem() - * - * \param[in] data serialization of boxa; in ascii - * \param[in] size of data in bytes; can use strlen to get it - * \return boxa, or NULL on error - */ -BOXA * -boxaReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -BOXA *boxa; - - PROCNAME("boxaReadMem"); - - if (!data) - return (BOXA *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (BOXA *)ERROR_PTR("stream not opened", procName, NULL); - - boxa = boxaReadStream(fp); - fclose(fp); - if (!boxa) L_ERROR("boxa not read\n", procName); - return boxa; -} - - -/*! - * \brief boxaWriteDebug() - * - * \param[in] filename - * \param[in] boxa - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Debug version, intended for use in the library when writing
- *          to files in a temp directory with names that are compiled in.
- *          This is used instead of boxaWrite() for all such library calls.
- *      (2) The global variable LeptDebugOK defaults to 0, and can be set
- *          or cleared by the function setLeptDebugOK().
- * 
- */ -l_ok -boxaWriteDebug(const char *filename, - BOXA *boxa) -{ - PROCNAME("boxaWriteDebug"); - - if (LeptDebugOK) { - return boxaWrite(filename, boxa); - } else { - L_INFO("write to named temp file %s is disabled\n", procName, filename); - return 0; - } -} - - -/*! - * \brief boxaWrite() - * - * \param[in] filename - * \param[in] boxa - * \return 0 if OK, 1 on error - */ -l_ok -boxaWrite(const char *filename, - BOXA *boxa) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("boxaWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "w")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = boxaWriteStream(fp, boxa); - fclose(fp); - if (ret) - return ERROR_INT("boxa not written to stream", procName, 1); - - return 0; -} - - -/*! - * \brief boxaWriteStream() - * - * \param[in] fp file stream; use NULL for stderr - * \param[in] boxa - * \return 0 if OK, 1 on error - */ -l_ok -boxaWriteStream(FILE *fp, - BOXA *boxa) -{ -l_int32 n, i; -BOX *box; - - PROCNAME("boxaWriteStream"); - - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (!fp) - return boxaWriteStderr(boxa); - - n = boxaGetCount(boxa); - fprintf(fp, "\nBoxa Version %d\n", BOXA_VERSION_NUMBER); - fprintf(fp, "Number of boxes = %d\n", n); - for (i = 0; i < n; i++) { - if ((box = boxaGetBox(boxa, i, L_CLONE)) == NULL) - return ERROR_INT("box not found", procName, 1); - fprintf(fp, " Box[%d]: x = %d, y = %d, w = %d, h = %d\n", - i, box->x, box->y, box->w, box->h); - boxDestroy(&box); - } - return 0; -} - - -/*! - * \brief boxaWriteStderr() - * - * \param[in] boxa - * \return 0 if OK, 1 on error - */ -l_ok -boxaWriteStderr(BOXA *boxa) -{ -l_int32 n, i; -BOX *box; - - PROCNAME("boxaWriteStderr"); - - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - n = boxaGetCount(boxa); - lept_stderr("\nBoxa Version %d\n", BOXA_VERSION_NUMBER); - lept_stderr("Number of boxes = %d\n", n); - for (i = 0; i < n; i++) { - if ((box = boxaGetBox(boxa, i, L_CLONE)) == NULL) - return ERROR_INT("box not found", procName, 1); - lept_stderr(" Box[%d]: x = %d, y = %d, w = %d, h = %d\n", - i, box->x, box->y, box->w, box->h); - boxDestroy(&box); - } - return 0; -} - - -/*! - * \brief boxaWriteMem() - * - * \param[out] pdata data of serialized boxa; ascii - * \param[out] psize size of returned data - * \param[in] boxa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes a boxa in memory and puts the result in a buffer.
- * 
- */ -l_ok -boxaWriteMem(l_uint8 **pdata, - size_t *psize, - BOXA *boxa) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("boxaWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = boxaWriteStream(fp, boxa); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = boxaWriteStream(fp, boxa); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - -/*---------------------------------------------------------------------* - * Debug printing * - *---------------------------------------------------------------------*/ -/*! - * \brief boxPrintStreamInfo() - * - * \param[in] fp file stream; use NULL for stderr - * \param[in] box - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This outputs debug info.  Use serialization functions to
- *          write to file if you want to read the data back.
- * 
- */ -l_ok -boxPrintStreamInfo(FILE *fp, - BOX *box) -{ - PROCNAME("boxPrintStreamInfo"); - - if (!box) - return ERROR_INT("box not defined", procName, 1); - - if (!fp) { /* output to stderr */ - lept_stderr(" Box: x = %d, y = %d, w = %d, h = %d\n", - box->x, box->y, box->w, box->h); - } else { - fprintf(fp, " Box: x = %d, y = %d, w = %d, h = %d\n", - box->x, box->y, box->w, box->h); - } - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxfunc1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxfunc1.c deleted file mode 100644 index 45e8995d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxfunc1.c +++ /dev/null @@ -1,2737 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file boxfunc1.c - *
- *
- *      Box geometry
- *           l_int32   boxContains()
- *           l_int32   boxIntersects()
- *           BOXA     *boxaContainedInBox()
- *           l_int32   boxaContainedInBoxCount()
- *           l_int32   boxaContainedInBoxa()
- *           BOXA     *boxaIntersectsBox()
- *           l_int32   boxaIntersectsBoxCount()
- *           BOXA     *boxaClipToBox()
- *           BOXA     *boxaCombineOverlaps()
- *           l_int32   boxaCombineOverlapsInPair()
- *           BOX      *boxOverlapRegion()
- *           BOX      *boxBoundingRegion()
- *           l_int32   boxOverlapFraction()
- *           l_int32   boxOverlapArea()
- *           BOXA     *boxaHandleOverlaps()
- *           l_int32   boxOverlapDistance()
- *           l_int32   boxSeparationDistance()
- *           l_int32   boxCompareSize()
- *           l_int32   boxContainsPt()
- *           BOX      *boxaGetNearestToPt()
- *           BOX      *boxaGetNearestToLine()
- *           l_int32   boxaFindNearestBoxes()
- *           l_int32   boxaGetNearestByDirection()
- *    static l_int32   boxHasOverlapInXorY()
- *    static l_int32   boxGetDistanceInXorY()
- *           l_int32   boxIntersectByLine()
- *           l_int32   boxGetCenter()
- *           BOX      *boxClipToRectangle()
- *           l_int32   boxClipToRectangleParams()
- *           BOX      *boxRelocateOneSide()
- *           BOXA     *boxaAdjustSides()
- *           BOXA     *boxaAdjustBoxSides()
- *           BOX      *boxAdjustSides()
- *           BOXA     *boxaSetSide()
- *           l_int32   boxSetSide()
- *           BOXA     *boxaAdjustWidthToTarget()
- *           BOXA     *boxaAdjustHeightToTarget()
- *           l_int32   boxEqual()
- *           l_int32   boxaEqual()
- *           l_int32   boxSimilar()
- *           l_int32   boxaSimilar()
- *
- *      Boxa combine and split
- *           l_int32   boxaJoin()
- *           l_int32   boxaaJoin()
- *           l_int32   boxaSplitEvenOdd()
- *           BOXA     *boxaMergeEvenOdd()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -static l_int32 boxHasOverlapInXorY(l_int32 c1, l_int32 s1, l_int32 c2, - l_int32 s2); -static l_int32 boxGetDistanceInXorY(l_int32 c1, l_int32 s1, l_int32 c2, - l_int32 s2); - - -/*---------------------------------------------------------------------* - * Box geometry * - *---------------------------------------------------------------------*/ -/*! - * \brief boxContains() - * - * \param[in] box1, box2 - * \param[out] presult 1 if box2 is entirely contained within box1; - * 0 otherwise - * \return 0 if OK, 1 on error - */ -l_ok -boxContains(BOX *box1, - BOX *box2, - l_int32 *presult) -{ -l_int32 x1, y1, w1, h1, x2, y2, w2, h2, valid1, valid2; - - PROCNAME("boxContains"); - - if (!presult) - return ERROR_INT("&result not defined", procName, 1); - *presult = 0; - if (!box1 || !box2) - return ERROR_INT("boxes not both defined", procName, 1); - boxIsValid(box1, &valid1); - boxIsValid(box2, &valid2); - if (!valid1 || !valid2) - return ERROR_INT("boxes not both valid", procName, 1); - - boxGetGeometry(box1, &x1, &y1, &w1, &h1); - boxGetGeometry(box2, &x2, &y2, &w2, &h2); - if (x1 <= x2 && y1 <= y2 && (x1 + w1 >= x2 + w2) && (y1 + h1 >= y2 + h2)) - *presult = 1; - return 0; -} - - -/*! - * \brief boxIntersects() - * - * \param[in] box1, box2 - * \param[out] presult 1 if any part of box2 is contained in box1; - * 0 otherwise - * \return 0 if OK, 1 on error - */ -l_ok -boxIntersects(BOX *box1, - BOX *box2, - l_int32 *presult) -{ -l_int32 l1, l2, r1, r2, t1, t2, b1, b2, w1, h1, w2, h2, valid1, valid2; - - PROCNAME("boxIntersects"); - - if (!presult) - return ERROR_INT("&result not defined", procName, 1); - *presult = 0; - if (!box1 || !box2) - return ERROR_INT("boxes not both defined", procName, 1); - boxIsValid(box1, &valid1); - boxIsValid(box2, &valid2); - if (!valid1 || !valid2) - return ERROR_INT("boxes not both valid", procName, 1); - - boxGetGeometry(box1, &l1, &t1, &w1, &h1); - boxGetGeometry(box2, &l2, &t2, &w2, &h2); - r1 = l1 + w1 - 1; - r2 = l2 + w2 - 1; - b1 = t1 + h1 - 1; - b2 = t2 + h2 - 1; - if (b2 < t1 || b1 < t2 || r1 < l2 || r2 < l1) - *presult = 0; - else - *presult = 1; - return 0; -} - - -/*! - * \brief boxaContainedInBox() - * - * \param[in] boxas - * \param[in] box for containment - * \return boxad boxa with all boxes in boxas that are entirely - * contained in box, or NULL on error - * - *
- * Notes:
- *      (1) All boxes in %boxas that are entirely outside box are removed.
- *      (2) If %box is not valid, returns an empty boxa.
- * 
- */ -BOXA * -boxaContainedInBox(BOXA *boxas, - BOX *box) -{ -l_int32 i, n, val, valid; -BOX *box1; -BOXA *boxad; - - PROCNAME("boxaContainedInBox"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (!box) - return (BOXA *)ERROR_PTR("box not defined", procName, NULL); - n = boxaGetCount(boxas); - boxIsValid(box, &valid); - if (n == 0 || !valid) - return boxaCreate(1); /* empty */ - - boxad = boxaCreate(0); - for (i = 0; i < n; i++) { - if ((box1 = boxaGetValidBox(boxas, i, L_CLONE)) == NULL) - continue; - boxContains(box, box1, &val); - if (val == 1) - boxaAddBox(boxad, box1, L_COPY); - boxDestroy(&box1); /* destroy the clone */ - } - - return boxad; -} - - -/*! - * \brief boxaContainedInBoxCount() - * - * \param[in] boxa - * \param[in] box for selecting contained boxes in %boxa - * \param[out] pcount number of boxes intersecting the box - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %box is not valid, returns a zero count.
- * 
- */ -l_ok -boxaContainedInBoxCount(BOXA *boxa, - BOX *box, - l_int32 *pcount) -{ -l_int32 i, n, val, valid; -BOX *box1; - - PROCNAME("boxaContainedInBoxCount"); - - if (!pcount) - return ERROR_INT("&count not defined", procName, 1); - *pcount = 0; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - n = boxaGetCount(boxa); - boxIsValid(box, &valid); - if (n == 0 || !valid) - return 0; - - for (i = 0; i < n; i++) { - if ((box1 = boxaGetValidBox(boxa, i, L_CLONE)) == NULL) - continue; - boxContains(box, box1, &val); - if (val == 1) - (*pcount)++; - boxDestroy(&box1); - } - return 0; -} - - -/*! - * \brief boxaContainedInBoxa() - * - * \param[in] boxa1, boxa2 - * \param[out] pcontained 1 if every box in boxa2 is contained in - * some box in boxa1; 0 otherwise - * \return 0 if OK, 1 on error - */ -l_ok -boxaContainedInBoxa(BOXA *boxa1, - BOXA *boxa2, - l_int32 *pcontained) -{ -l_int32 i, j, n1, n2, cont, result; -BOX *box1, *box2; - - PROCNAME("boxaContainedInBoxa"); - - if (!pcontained) - return ERROR_INT("&contained not defined", procName, 1); - *pcontained = 0; - if (!boxa1 || !boxa2) - return ERROR_INT("boxa1 and boxa2 not both defined", procName, 1); - - n1 = boxaGetCount(boxa1); - n2 = boxaGetCount(boxa2); - for (i = 0; i < n2; i++) { - if ((box2 = boxaGetValidBox(boxa2, i, L_CLONE)) == NULL) - continue; - cont = 0; - for (j = 0; j < n1; j++) { - if ((box1 = boxaGetValidBox(boxa1, j, L_CLONE)) == NULL) - continue; - boxContains(box1, box2, &result); - boxDestroy(&box1); - if (result) { - cont = 1; - break; - } - } - boxDestroy(&box2); - if (!cont) return 0; - } - - *pcontained = 1; - return 0; -} - - -/*! - * \brief boxaIntersectsBox() - * - * \param[in] boxas - * \param[in] box for intersecting - * \return boxad boxa with all boxes in boxas that intersect box, - * or NULL on error - * - *
- * Notes:
- *      (1) All boxes in boxa that intersect with box (i.e., are completely
- *          or partially contained in box) are retained.
- * 
- */ -BOXA * -boxaIntersectsBox(BOXA *boxas, - BOX *box) -{ -l_int32 i, n, val, valid; -BOX *box1; -BOXA *boxad; - - PROCNAME("boxaIntersectsBox"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (!box) - return (BOXA *)ERROR_PTR("box not defined", procName, NULL); - n = boxaGetCount(boxas); - boxIsValid(box, &valid); - if (n == 0 || !valid) - return boxaCreate(1); /* empty */ - - boxad = boxaCreate(0); - for (i = 0; i < n; i++) { - if ((box1 = boxaGetValidBox(boxas, i, L_CLONE)) == NULL) - continue; - boxIntersects(box, box1, &val); - if (val == 1) - boxaAddBox(boxad, box1, L_COPY); - boxDestroy(&box1); /* destroy the clone */ - } - - return boxad; -} - - -/*! - * \brief boxaIntersectsBoxCount() - * - * \param[in] boxa - * \param[in] box for selecting intersecting boxes in %boxa - * \param[out] pcount number of boxes intersecting the box - * \return 0 if OK, 1 on error - */ -l_ok -boxaIntersectsBoxCount(BOXA *boxa, - BOX *box, - l_int32 *pcount) -{ -l_int32 i, n, val, valid; -BOX *box1; - - PROCNAME("boxaIntersectsBoxCount"); - - if (!pcount) - return ERROR_INT("&count not defined", procName, 1); - *pcount = 0; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - n = boxaGetCount(boxa); - boxIsValid(box, &valid); - if (n == 0 || !valid) - return 0; - - for (i = 0; i < n; i++) { - if ((box1 = boxaGetValidBox(boxa, i, L_CLONE)) == NULL) - continue; - boxIntersects(box, box1, &val); - if (val == 1) - (*pcount)++; - boxDestroy(&box1); - } - return 0; -} - - -/*! - * \brief boxaClipToBox() - * - * \param[in] boxas - * \param[in] box for clipping - * \return boxad boxa with boxes in boxas clipped to box, or NULL on error - * - *
- * Notes:
- *      (1) All boxes in boxa not intersecting with box are removed, and
- *          the remaining boxes are clipped to box.
- * 
- */ -BOXA * -boxaClipToBox(BOXA *boxas, - BOX *box) -{ -l_int32 i, n, valid; -BOX *box1, *boxo; -BOXA *boxad; - - PROCNAME("boxaClipToBox"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (!box) - return (BOXA *)ERROR_PTR("box not defined", procName, NULL); - n = boxaGetCount(boxas); - boxIsValid(box, &valid); - if (n == 0 || !valid) - return boxaCreate(1); /* empty */ - - boxad = boxaCreate(0); - for (i = 0; i < n; i++) { - if ((box1 = boxaGetValidBox(boxas, i, L_CLONE)) == NULL) - continue; - if ((boxo = boxOverlapRegion(box, box1)) != NULL) - boxaAddBox(boxad, boxo, L_INSERT); - boxDestroy(&box1); - } - - return boxad; -} - - -/*! - * \brief boxaCombineOverlaps() - * - * \param[in] boxas - * \param[in,out] pixadb debug output - * \return boxad where each set of boxes in boxas that overlap are combined - * into a single bounding box in boxad, or NULL on error. - * - *
- * Notes:
- *      (1) If there are no overlapping boxes, it simply returns a copy
- *          of %boxas.
- *      (2) Input an empty %pixadb, using pixaCreate(0), for debug output.
- *          The output gives 2 visualizations of the boxes per iteration;
- *          boxes in red before, and added boxes in green after. Note that
- *          all pixels in the red boxes are contained in the green ones.
- *      (3) The alternative method of painting each rectangle and finding
- *          the 4-connected components gives a different result in
- *          general, because two non-overlapping (but touching)
- *          rectangles, when rendered, are 4-connected and will be joined.
- *      (4) A bad case computationally is to have n boxes, none of which
- *          overlap.  Then you have one iteration with O(n^2) compares.
- *          This is still faster than painting each rectangle and finding
- *          the bounding boxes of the connected components, even for
- *          thousands of rectangles.
- * 
- */ -BOXA * -boxaCombineOverlaps(BOXA *boxas, - PIXA *pixadb) -{ -l_int32 i, j, w, h, n1, n2, overlap, niters; -BOX *box1, *box2, *box3; -BOXA *boxa1, *boxa2; -PIX *pix1; - - PROCNAME("boxaCombineOverlaps"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - - if (pixadb) boxaGetExtent(boxas, &w, &h, NULL); - - boxa1 = boxaCopy(boxas, L_COPY); - n1 = boxaGetCount(boxa1); - niters = 0; - while (1) { /* loop until no change from previous iteration */ - niters++; - if (pixadb) { - pix1 = pixCreate(w + 5, h + 5, 32); - pixSetAll(pix1); - pixRenderBoxaArb(pix1, boxa1, 2, 255, 0, 0); - pixaAddPix(pixadb, pix1, L_COPY); - } - - /* Combine overlaps for this iteration */ - for (i = 0; i < n1; i++) { - if ((box1 = boxaGetValidBox(boxa1, i, L_COPY)) == NULL) - continue; - for (j = i + 1; j < n1; j++) { - if ((box2 = boxaGetValidBox(boxa1, j, L_COPY)) == NULL) - continue; - boxIntersects(box1, box2, &overlap); - if (overlap) { - box3 = boxBoundingRegion(box1, box2); - boxaReplaceBox(boxa1, i, box3); - boxaReplaceBox(boxa1, j, boxCreate(0, 0, 0, 0)); - boxDestroy(&box1); - box1 = boxCopy(box3); - } - boxDestroy(&box2); - } - boxDestroy(&box1); - } - boxa2 = boxaSaveValid(boxa1, L_COPY); - n2 = boxaGetCount(boxa2); - boxaDestroy(&boxa1); - boxa1 = boxa2; - if (n1 == n2) { - if (pixadb) pixDestroy(&pix1); - break; - } - n1 = n2; - if (pixadb) { - pixRenderBoxaArb(pix1, boxa1, 2, 0, 255, 0); - pixaAddPix(pixadb, pix1, L_INSERT); - } - } - - if (pixadb) - L_INFO("number of iterations: %d\n", procName, niters); - return boxa1; -} - - -/*! - * \brief boxaCombineOverlapsInPair() - * - * \param[in] boxas1 input boxa1 - * \param[in] boxas2 input boxa2 - * \param[out] pboxad1 output boxa1 - * \param[out] pboxad2 output boxa2 - * \param[in,out] pixadb debug output - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) One of three things happens to each box in %boxa1 and %boxa2:
- *           * it gets absorbed into a larger box that it overlaps with
- *           * it absorbs a smaller (by area) box that it overlaps with
- *             and gets larger, using the bounding region of the 2 boxes
- *           * it is unchanged (including absorbing smaller boxes that
- *             are contained within it).
- *      (2) If all the boxes from one of the input boxa are absorbed, this
- *          returns an empty boxa.
- *      (3) Input an empty %pixadb, using pixaCreate(0), for debug output
- *      (4) This is useful if different operations are to be carried out
- *          on possibly overlapping rectangular regions, and it is desired
- *          to have only one operation on any rectangular region.
- * 
- */ -l_ok -boxaCombineOverlapsInPair(BOXA *boxas1, - BOXA *boxas2, - BOXA **pboxad1, - BOXA **pboxad2, - PIXA *pixadb) -{ -l_int32 i, j, w, h, w2, h2, n1, n2, n1i, n2i, niters; -l_int32 overlap, bigger, area1, area2; -BOX *box1, *box2, *box3; -BOXA *boxa1, *boxa2, *boxac1, *boxac2; -PIX *pix1; - - PROCNAME("boxaCombineOverlapsInPair"); - - if (pboxad1) *pboxad1 = NULL; - if (pboxad2) *pboxad2 = NULL; - if (!boxas1 || !boxas2) - return ERROR_INT("boxas1 and boxas2 not both defined", procName, 1); - if (!pboxad1 || !pboxad2) - return ERROR_INT("&boxad1 and &boxad2 not both defined", procName, 1); - - if (pixadb) { - boxaGetExtent(boxas1, &w, &h, NULL); - boxaGetExtent(boxas2, &w2, &h2, NULL); - w = L_MAX(w, w2); - h = L_MAX(h, w2); - } - - /* Let the boxa with the largest area have first crack at the other */ - boxaGetArea(boxas1, &area1); - boxaGetArea(boxas2, &area2); - if (area1 >= area2) { - boxac1 = boxaCopy(boxas1, L_COPY); - boxac2 = boxaCopy(boxas2, L_COPY); - } else { - boxac1 = boxaCopy(boxas2, L_COPY); - boxac2 = boxaCopy(boxas1, L_COPY); - } - - n1i = boxaGetCount(boxac1); - n2i = boxaGetCount(boxac2); - niters = 0; - while (1) { - niters++; - if (pixadb) { - pix1 = pixCreate(w + 5, h + 5, 32); - pixSetAll(pix1); - pixRenderBoxaArb(pix1, boxac1, 2, 255, 0, 0); - pixRenderBoxaArb(pix1, boxac2, 2, 0, 255, 0); - pixaAddPix(pixadb, pix1, L_INSERT); - } - - /* First combine boxes in each set */ - boxa1 = boxaCombineOverlaps(boxac1, NULL); - boxa2 = boxaCombineOverlaps(boxac2, NULL); - - /* Now combine boxes between sets */ - n1 = boxaGetCount(boxa1); - n2 = boxaGetCount(boxa2); - for (i = 0; i < n1; i++) { /* 1 eats 2 */ - if ((box1 = boxaGetValidBox(boxa1, i, L_COPY)) == NULL) - continue; - for (j = 0; j < n2; j++) { - if ((box2 = boxaGetValidBox(boxa2, j, L_COPY)) == NULL) - continue; - boxIntersects(box1, box2, &overlap); - boxCompareSize(box1, box2, L_SORT_BY_AREA, &bigger); - if (overlap && (bigger == 1)) { - box3 = boxBoundingRegion(box1, box2); - boxaReplaceBox(boxa1, i, box3); - boxaReplaceBox(boxa2, j, boxCreate(0, 0, 0, 0)); - boxDestroy(&box1); - box1 = boxCopy(box3); - } - boxDestroy(&box2); - } - boxDestroy(&box1); - } - for (i = 0; i < n2; i++) { /* 2 eats 1 */ - if ((box2 = boxaGetValidBox(boxa2, i, L_COPY)) == NULL) - continue; - for (j = 0; j < n1; j++) { - if ((box1 = boxaGetValidBox(boxa1, j, L_COPY)) == NULL) - continue; - boxIntersects(box1, box2, &overlap); - boxCompareSize(box2, box1, L_SORT_BY_AREA, &bigger); - if (overlap && (bigger == 1)) { - box3 = boxBoundingRegion(box1, box2); - boxaReplaceBox(boxa2, i, box3); - boxaReplaceBox(boxa1, j, boxCreate(0, 0, 0, 0)); - boxDestroy(&box2); - box2 = boxCopy(box3); - } - boxDestroy(&box1); - } - boxDestroy(&box2); - } - boxaDestroy(&boxac1); - boxaDestroy(&boxac2); - boxac1 = boxaSaveValid(boxa1, L_COPY); /* remove invalid boxes */ - boxac2 = boxaSaveValid(boxa2, L_COPY); - boxaDestroy(&boxa1); - boxaDestroy(&boxa2); - n1 = boxaGetCount(boxac1); - n2 = boxaGetCount(boxac2); - if (n1 == n1i && n2 == n2i) break; - n1i = n1; - n2i = n2; - if (pixadb) { - pix1 = pixCreate(w + 5, h + 5, 32); - pixSetAll(pix1); - pixRenderBoxaArb(pix1, boxac1, 2, 255, 0, 0); - pixRenderBoxaArb(pix1, boxac2, 2, 0, 255, 0); - pixaAddPix(pixadb, pix1, L_INSERT); - } - } - - if (pixadb) - L_INFO("number of iterations: %d\n", procName, niters); - *pboxad1 = boxac1; - *pboxad2 = boxac2; - return 0; -} - - -/*! - * \brief boxOverlapRegion() - * - * \param[in] box1, box2 - * \return box of overlap region between input boxes; - * NULL if no overlap or on error - * - *
- * Notes:
- *      (1) This is the geometric intersection of the two rectangles.
- * 
- */ -BOX * -boxOverlapRegion(BOX *box1, - BOX *box2) -{ -l_int32 l1, l2, r1, r2, t1, t2, b1, b2, w1, h1, w2, h2, ld, td, rd, bd; -l_int32 valid1, valid2; - - PROCNAME("boxOverlapRegion"); - - if (!box1 || !box2) - return (BOX *)ERROR_PTR("boxes not both defined", procName, NULL); - boxIsValid(box1, &valid1); - boxIsValid(box2, &valid2); - if (!valid1 || !valid2) { - L_WARNING("at least one box is invalid\n", procName); - return NULL; - } - - boxGetGeometry(box1, &l1, &t1, &w1, &h1); - boxGetGeometry(box2, &l2, &t2, &w2, &h2); - r1 = l1 + w1 - 1; - r2 = l2 + w2 - 1; - b1 = t1 + h1 - 1; - b2 = t2 + h2 - 1; - if (b2 < t1 || b1 < t2 || r1 < l2 || r2 < l1) - return NULL; - - ld = L_MAX(l1, l2); - td = L_MAX(t1, t2); - rd = L_MIN(r1, r2); - bd = L_MIN(b1, b2); - return boxCreate(ld, td, rd - ld + 1, bd - td + 1); -} - - -/*! - * \brief boxBoundingRegion() - * - * \param[in] box1, box2 - * \return box of bounding region containing the input boxes; - * NULL on error - * - *
- * Notes:
- *      (1) This is the geometric union of the two rectangles.
- *      (2) Invalid boxes are ignored.  This returns an invalid box
- *          if both input boxes are invalid.
- *      (3) For the geometric union of a boxa, use boxaGetExtent().
- * 
- */ -BOX * -boxBoundingRegion(BOX *box1, - BOX *box2) -{ -l_int32 l1, l2, r1, r2, t1, t2, b1, b2, w1, h1, w2, h2, ld, td, rd, bd; -l_int32 valid1, valid2; - - PROCNAME("boxBoundingRegion"); - - if (!box1 || !box2) - return (BOX *)ERROR_PTR("boxes not both defined", procName, NULL); - boxIsValid(box1, &valid1); - boxIsValid(box2, &valid2); - if (!valid1 && !valid2) { - L_WARNING("both boxes are invalid\n", procName); - return boxCreate(0, 0, 0, 0); - } - if (valid1 && !valid2) - return boxCopy(box1); - if (!valid1 && valid2) - return boxCopy(box2); - - boxGetGeometry(box1, &l1, &t1, &w1, &h1); - boxGetGeometry(box2, &l2, &t2, &w2, &h2); - r1 = l1 + w1 - 1; - r2 = l2 + w2 - 1; - b1 = t1 + h1 - 1; - b2 = t2 + h2 - 1; - ld = L_MIN(l1, l2); - td = L_MIN(t1, t2); - rd = L_MAX(r1, r2); - bd = L_MAX(b1, b2); - return boxCreate(ld, td, rd - ld + 1, bd - td + 1); -} - - -/*! - * \brief boxOverlapFraction() - * - * \param[in] box1, box2 - * \param[out] pfract the fraction of box2 overlapped by box1 - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) The result depends on the order of the input boxes,
- *          because the overlap is taken as a fraction of box2.
- *      (2) If at least one box is not valid, there is no overlap.
- * 
- */ -l_ok -boxOverlapFraction(BOX *box1, - BOX *box2, - l_float32 *pfract) -{ -l_int32 w2, h2, w, h, valid1, valid2; -BOX *boxo; - - PROCNAME("boxOverlapFraction"); - - if (!pfract) - return ERROR_INT("&fract not defined", procName, 1); - *pfract = 0.0; - if (!box1 || !box2) - return ERROR_INT("boxes not both defined", procName, 1); - boxIsValid(box1, &valid1); - boxIsValid(box2, &valid2); - if (!valid1 || !valid2) { - L_WARNING("boxes not both valid\n", procName); - return 0; - } - - if ((boxo = boxOverlapRegion(box1, box2)) == NULL) /* no overlap */ - return 0; - - boxGetGeometry(box2, NULL, NULL, &w2, &h2); - boxGetGeometry(boxo, NULL, NULL, &w, &h); - *pfract = (l_float32)(w * h) / (l_float32)(w2 * h2); - boxDestroy(&boxo); - return 0; -} - - -/*! - * \brief boxOverlapArea() - * - * \param[in] box1, box2 - * \param[out] parea the number of pixels in the overlap - * \return 0 if OK, 1 on error. - */ -l_ok -boxOverlapArea(BOX *box1, - BOX *box2, - l_int32 *parea) -{ -l_int32 w, h, valid1, valid2; -BOX *box; - - PROCNAME("boxOverlapArea"); - - if (!parea) - return ERROR_INT("&area not defined", procName, 1); - *parea = 0; - if (!box1 || !box2) - return ERROR_INT("boxes not both defined", procName, 1); - boxIsValid(box1, &valid1); - boxIsValid(box2, &valid2); - if (!valid1 || !valid2) - return ERROR_INT("boxes not both valid", procName, 1); - - if ((box = boxOverlapRegion(box1, box2)) == NULL) /* no overlap */ - return 0; - - boxGetGeometry(box, NULL, NULL, &w, &h); - *parea = w * h; - boxDestroy(&box); - return 0; -} - - -/*! - * \brief boxaHandleOverlaps() - * - * \param[in] boxas - * \param[in] op L_COMBINE, L_REMOVE_SMALL - * \param[in] range forward distance over which overlaps - * are checked; > 0 - * \param[in] min_overlap minimum fraction of smaller box required for - * overlap to count; 0.0 to ignore - * \param[in] max_ratio maximum fraction of small/large areas for - * overlap to count; 1.0 to ignore - * \param[out] pnamap [optional] combining map - * \return boxad, or NULL on error. - * - *
- * Notes:
- *      (1) For all n(n-1)/2 box pairings, if two boxes overlap, either:
- *          (a) op == L_COMBINE: get the bounding region for the two,
- *              replace the larger with the bounding region, and remove
- *              the smaller of the two, or
- *          (b) op == L_REMOVE_SMALL: just remove the smaller.
- *      (2) If boxas is 2D sorted, range can be small, but if it is
- *          not spatially sorted, range should be large to allow all
- *          pairwise comparisons to be made.
- *      (3) The %min_overlap parameter allows ignoring small overlaps.
- *          If %min_overlap == 1.0, only boxes fully contained in larger
- *          boxes can be considered for removal; if %min_overlap == 0.0,
- *          this constraint is ignored.
- *      (4) The %max_ratio parameter allows ignoring overlaps between
- *          boxes that are not too different in size.  If %max_ratio == 0.0,
- *          no boxes can be removed; if %max_ratio == 1.0, this constraint
- *          is ignored.
- * 
- */ -BOXA * -boxaHandleOverlaps(BOXA *boxas, - l_int32 op, - l_int32 range, - l_float32 min_overlap, - l_float32 max_ratio, - NUMA **pnamap) -{ -l_int32 i, j, n, w, h, area1, area2, val; -l_int32 overlap_area; -l_float32 overlap_ratio, area_ratio; -BOX *box1, *box2, *box3; -BOXA *boxat, *boxad; -NUMA *namap; - - PROCNAME("boxaHandleOverlaps"); - - if (pnamap) *pnamap = NULL; - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (op != L_COMBINE && op != L_REMOVE_SMALL) - return (BOXA *)ERROR_PTR("invalid op", procName, NULL); - - n = boxaGetCount(boxas); - if (n == 0) - return boxaCreate(1); /* empty */ - if (range == 0) { - L_WARNING("range is 0\n", procName); - return boxaCopy(boxas, L_COPY); - } - - /* Identify smaller boxes in overlap pairs, and mark to eliminate. */ - namap = numaMakeConstant(-1, n); - for (i = 0; i < n; i++) { - if ((box1 = boxaGetValidBox(boxas, i, L_CLONE)) == NULL) - continue; - boxGetGeometry(box1, NULL, NULL, &w, &h); - area1 = w * h; - if (area1 == 0) { - boxDestroy(&box1); - continue; - } - for (j = i + 1; j < i + 1 + range && j < n; j++) { - if ((box2 = boxaGetValidBox(boxas, j, L_CLONE)) == NULL) - continue; - boxOverlapArea(box1, box2, &overlap_area); - if (overlap_area > 0) { - boxGetGeometry(box2, NULL, NULL, &w, &h); - area2 = w * h; - if (area2 == 0) { - /* do nothing */ - } else if (area1 >= area2) { - overlap_ratio = (l_float32)overlap_area / (l_float32)area2; - area_ratio = (l_float32)area2 / (l_float32)area1; - if (overlap_ratio >= min_overlap && - area_ratio <= max_ratio) { - numaSetValue(namap, j, i); - } - } else { - overlap_ratio = (l_float32)overlap_area / (l_float32)area1; - area_ratio = (l_float32)area1 / (l_float32)area2; - if (overlap_ratio >= min_overlap && - area_ratio <= max_ratio) { - numaSetValue(namap, i, j); - } - } - } - boxDestroy(&box2); - } - boxDestroy(&box1); - } - - boxat = boxaCopy(boxas, L_COPY); - if (op == L_COMBINE) { - /* Resize the larger of the pair to the bounding region */ - for (i = 0; i < n; i++) { - numaGetIValue(namap, i, &val); - if (val >= 0) { - box1 = boxaGetBox(boxas, i, L_CLONE); /* smaller */ - box2 = boxaGetBox(boxas, val, L_CLONE); /* larger */ - box3 = boxBoundingRegion(box1, box2); - boxaReplaceBox(boxat, val, box3); - boxDestroy(&box1); - boxDestroy(&box2); - } - } - } - - /* Remove the smaller of the pairs */ - boxad = boxaCreate(n); - for (i = 0; i < n; i++) { - numaGetIValue(namap, i, &val); - if (val == -1) { - box1 = boxaGetBox(boxat, i, L_COPY); - boxaAddBox(boxad, box1, L_INSERT); - } - } - boxaDestroy(&boxat); - if (pnamap) - *pnamap = namap; - else - numaDestroy(&namap); - return boxad; -} - - -/*! - * \brief boxOverlapDistance() - * - * \param[in] box1, box2 two boxes, in any order - * \param[out] ph_ovl [optional] horizontal overlap - * \param[out] pv_ovl [optional] vertical overlap - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This measures horizontal and vertical overlap of the
- *          two boxes.  Horizontal and vertical overlap are measured
- *          independently.  We need to consider several cases to clarify.
- *      (2) A positive horizontal overlap means that there is at least
- *          one point on the the %box1 boundary with the same x-component
- *          as some point on the %box2 boundary.  Conversely, with a zero
- *          or negative horizontal overlap, there are no boundary pixels
- *          in %box1 that share an x-component with a boundary pixel in %box2.
- *      (3) For a zero or negative horizontal overlap, o <= 0, the minimum
- *          difference in the x-component between pixels on the boundaries
- *          of the two boxes is d = -o + 1.
- *      (4) Likewise for vertical overlaps.
- * 
- */ -l_ok -boxOverlapDistance(BOX *box1, - BOX *box2, - l_int32 *ph_ovl, - l_int32 *pv_ovl) -{ -l_int32 l1, t1, w1, h1, r1, b1, l2, t2, w2, h2, r2, b2, valid1, valid2; - - PROCNAME("boxOverlapDistance"); - - if (!ph_ovl && !pv_ovl) - return ERROR_INT("nothing to do", procName, 1); - if (ph_ovl) *ph_ovl = 0; - if (pv_ovl) *pv_ovl = 0; - if (!box1 || !box2) - return ERROR_INT("boxes not both defined", procName, 1); - boxIsValid(box1, &valid1); - boxIsValid(box2, &valid2); - if (!valid1 || !valid2) - return ERROR_INT("boxes not both valid", procName, 1); - - if (ph_ovl) { - boxGetGeometry(box1, &l1, NULL, &w1, NULL); - boxGetGeometry(box2, &l2, NULL, &w2, NULL); - r1 = l1 + w1; /* 1 pixel to the right of box 1 */ - r2 = l2 + w2; - if (l2 >= l1) - *ph_ovl = r1 - l2; - else - *ph_ovl = r2 - l1; - } - if (pv_ovl) { - boxGetGeometry(box1, NULL, &t1, NULL, &h1); - boxGetGeometry(box2, NULL, &t2, NULL, &h2); - b1 = t1 + h1; /* 1 pixel below box 1 */ - b2 = t2 + h2; - if (t2 >= t1) - *pv_ovl = b1 - t2; - else - *pv_ovl = b2 - t1; - } - return 0; -} - - -/*! - * \brief boxSeparationDistance() - * - * \param[in] box1, box2 two boxes, in any order - * \param[out] ph_sep horizontal separation - * \param[out] pv_sep vertical separation - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This measures the Manhattan distance between the closest points
- *          on the boundaries of the two boxes.  When the boxes overlap
- *          (including touching along a line or at a corner), the
- *          horizontal and vertical distances are 0.
- *      (2) The distances represent the horizontal and vertical separation
- *          of the two boxes.  The boxes have a nonzero intersection when
- *          both the horizontal and vertical overlaps are positive, and
- *          for that case both horizontal and vertical separation
- *          distances are 0.
- *      (3) If the horizontal overlap of the boxes is positive, the
- *          horizontal separation between nearest points on respective
- *          boundaries is 0, and likewise for the vertical overlap.
- *      (4) If the horizontal overlap ho <= 0, the horizontal
- *          separation between nearest points is d = -ho + 1.
- *          Likewise, if the vertical overlap vo <= 0, the vertical
- *          separation between nearest points is d = -vo + 1.
- * 
- */ -l_ok -boxSeparationDistance(BOX *box1, - BOX *box2, - l_int32 *ph_sep, - l_int32 *pv_sep) -{ -l_int32 h_ovl, v_ovl, valid1, valid2; - - PROCNAME("boxSeparationDistance"); - - if (ph_sep) *ph_sep = 0; - if (pv_sep) *pv_sep = 0; - if (!ph_sep || !pv_sep) - return ERROR_INT("&h_sep and &v_sep not both defined", procName, 1); - if (!box1 || !box2) - return ERROR_INT("boxes not both defined", procName, 1); - boxIsValid(box1, &valid1); - boxIsValid(box2, &valid2); - if (!valid1 || !valid2) - return ERROR_INT("boxes not both valid", procName, 1); - - boxOverlapDistance(box1, box2, &h_ovl, &v_ovl); - if (h_ovl <= 0) - *ph_sep = -h_ovl + 1; - if (v_ovl <= 0) - *pv_sep = -v_ovl + 1; - return 0; -} - - -/*! - * \brief boxCompareSize() - * - * \param[in] box1, box2 - * \param[in] type L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT, - * L_SORT_BY_MAX_DIMENSION, L_SORT_BY_PERIMETER, - * L_SORT_BY_AREA, - * \param[out] prel 1 if box1 > box2, 0 if the same, -1 if box1 < box2 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) We're re-using the SORT enum for these comparisons.
- * 
- */ -l_ok -boxCompareSize(BOX *box1, - BOX *box2, - l_int32 type, - l_int32 *prel) -{ -l_int32 w1, h1, w2, h2, size1, size2, valid1, valid2; - - PROCNAME("boxCompareSize"); - - if (!prel) - return ERROR_INT("&rel not defined", procName, 1); - *prel = 0; - if (!box1 || !box2) - return ERROR_INT("boxes not both defined", procName, 1); - boxIsValid(box1, &valid1); - boxIsValid(box2, &valid2); - if (!valid1 || !valid2) - return ERROR_INT("boxes not both valid", procName, 1); - if (type != L_SORT_BY_WIDTH && type != L_SORT_BY_HEIGHT && - type != L_SORT_BY_MAX_DIMENSION && type != L_SORT_BY_PERIMETER && - type != L_SORT_BY_AREA) - return ERROR_INT("invalid compare type", procName, 1); - - boxGetGeometry(box1, NULL, NULL, &w1, &h1); - boxGetGeometry(box2, NULL, NULL, &w2, &h2); - if (type == L_SORT_BY_WIDTH) { - *prel = (w1 > w2) ? 1 : ((w1 == w2) ? 0 : -1); - } else if (type == L_SORT_BY_HEIGHT) { - *prel = (h1 > h2) ? 1 : ((h1 == h2) ? 0 : -1); - } else if (type == L_SORT_BY_MAX_DIMENSION) { - size1 = L_MAX(w1, h1); - size2 = L_MAX(w2, h2); - *prel = (size1 > size2) ? 1 : ((size1 == size2) ? 0 : -1); - } else if (type == L_SORT_BY_PERIMETER) { - size1 = w1 + h1; - size2 = w2 + h2; - *prel = (size1 > size2) ? 1 : ((size1 == size2) ? 0 : -1); - } else if (type == L_SORT_BY_AREA) { - size1 = w1 * h1; - size2 = w2 * h2; - *prel = (size1 > size2) ? 1 : ((size1 == size2) ? 0 : -1); - } - return 0; -} - - -/*! - * \brief boxContainsPt() - * - * \param[in] box - * \param[in] x, y a point - * \param[out] pcontains 1 if box contains point; 0 otherwise - * \return 0 if OK, 1 on error. - */ -l_ok -boxContainsPt(BOX *box, - l_float32 x, - l_float32 y, - l_int32 *pcontains) -{ -l_int32 bx, by, bw, bh; - - PROCNAME("boxContainsPt"); - - if (!pcontains) - return ERROR_INT("&contains not defined", procName, 1); - *pcontains = 0; - if (!box) - return ERROR_INT("&box not defined", procName, 1); - boxGetGeometry(box, &bx, &by, &bw, &bh); - if (x >= bx && x < bx + bw && y >= by && y < by + bh) - *pcontains = 1; - return 0; -} - - -/*! - * \brief boxaGetNearestToPt() - * - * \param[in] boxa - * \param[in] x, y point - * \return box with centroid closest to the given point [x,y], - * or NULL if no boxes in boxa - * - *
- * Notes:
- *      (1) Uses euclidean distance between centroid and point.
- * 
- */ -BOX * -boxaGetNearestToPt(BOXA *boxa, - l_int32 x, - l_int32 y) -{ -l_int32 i, n, minindex; -l_float32 delx, dely, dist, mindist, cx, cy; -BOX *box; - - PROCNAME("boxaGetNearestToPt"); - - if (!boxa) - return (BOX *)ERROR_PTR("boxa not defined", procName, NULL); - if ((n = boxaGetCount(boxa)) == 0) - return (BOX *)ERROR_PTR("n = 0", procName, NULL); - - mindist = 1000000000.; - minindex = 0; - for (i = 0; i < n; i++) { - if ((box = boxaGetValidBox(boxa, i, L_CLONE)) == NULL) - continue; - boxGetCenter(box, &cx, &cy); - delx = (l_float32)(cx - x); - dely = (l_float32)(cy - y); - dist = delx * delx + dely * dely; - if (dist < mindist) { - minindex = i; - mindist = dist; - } - boxDestroy(&box); - } - - return boxaGetBox(boxa, minindex, L_COPY); -} - - -/*! - * \brief boxaGetNearestToLine() - * - * \param[in] boxa - * \param[in] x, y (y = -1 for vertical line; x = -1 for horiz line) - * \return box with centroid closest to the given line, - * or NULL if no boxes in boxa - * - *
- * Notes:
- *      (1) For a horizontal line at some value y, get the minimum of the
- *          distance |yc - y| from the box centroid yc value to y;
- *          likewise minimize |xc - x| for a vertical line at x.
- *      (2) Input y < 0, x >= 0 to indicate a vertical line at x, and
- *          x < 0, y >= 0 for a horizontal line at y.
- * 
- */ -BOX * -boxaGetNearestToLine(BOXA *boxa, - l_int32 x, - l_int32 y) -{ -l_int32 i, n, minindex; -l_float32 dist, mindist, cx, cy; -BOX *box; - - PROCNAME("boxaGetNearestToLine"); - - if (!boxa) - return (BOX *)ERROR_PTR("boxa not defined", procName, NULL); - if ((n = boxaGetCount(boxa)) == 0) - return (BOX *)ERROR_PTR("n = 0", procName, NULL); - if (y >= 0 && x >= 0) - return (BOX *)ERROR_PTR("either x or y must be < 0", procName, NULL); - if (y < 0 && x < 0) - return (BOX *)ERROR_PTR("either x or y must be >= 0", procName, NULL); - - mindist = 1000000000.; - minindex = 0; - for (i = 0; i < n; i++) { - if ((box = boxaGetValidBox(boxa, i, L_CLONE)) == NULL) - continue; - boxGetCenter(box, &cx, &cy); - if (x >= 0) - dist = L_ABS(cx - (l_float32)x); - else /* y >= 0 */ - dist = L_ABS(cy - (l_float32)y); - if (dist < mindist) { - minindex = i; - mindist = dist; - } - boxDestroy(&box); - } - - return boxaGetBox(boxa, minindex, L_COPY); -} - - -/*! - * \brief boxaFindNearestBoxes() - * - * \param[in] boxa either unsorted, or 2D sorted in LR/TB scan order - * \param[in] dist_select L_NON_NEGATIVE, L_ALL - * \param[in] range search distance from box i; use 0 to search - * entire boxa (e.g., if it's not 2D sorted) - * \param[out] pnaaindex for each box in %boxa, contains a numa of 4 - * box indices (per direction) of the nearest box - * \param[out] pnaadist for each box in %boxa, this contains a numa - * \return 0 if OK, 1 on error - *
- * Notes:
- *      (1) See boxaGetNearestByDirection() for usage of %dist_select
- *          and %range.
- * 
- */ -l_ok -boxaFindNearestBoxes(BOXA *boxa, - l_int32 dist_select, - l_int32 range, - NUMAA **pnaaindex, - NUMAA **pnaadist) -{ -l_int32 i, n, index, dist; -NUMA *nai, *nad; -NUMAA *naai, *naad; - - PROCNAME("boxaFindNearestBoxes"); - - if (pnaaindex) *pnaaindex = NULL; - if (pnaadist) *pnaadist = NULL; - if (!pnaaindex) - return ERROR_INT("&naaindex not defined", procName, 1); - if (!pnaadist) - return ERROR_INT("&naadist not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - n = boxaGetCount(boxa); - naai = numaaCreate(n); - naad = numaaCreate(n); - *pnaaindex = naai; - *pnaadist = naad; - for (i = 0; i < n; i++) { - nai = numaCreate(4); - nad = numaCreate(4); - boxaGetNearestByDirection(boxa, i, L_FROM_LEFT, dist_select, - range, &index, &dist); - numaAddNumber(nai, index); - numaAddNumber(nad, dist); - boxaGetNearestByDirection(boxa, i, L_FROM_RIGHT, dist_select, - range, &index, &dist); - numaAddNumber(nai, index); - numaAddNumber(nad, dist); - boxaGetNearestByDirection(boxa, i, L_FROM_TOP, dist_select, - range, &index, &dist); - numaAddNumber(nai, index); - numaAddNumber(nad, dist); - boxaGetNearestByDirection(boxa, i, L_FROM_BOT, dist_select, - range, &index, &dist); - numaAddNumber(nai, index); - numaAddNumber(nad, dist); - numaaAddNuma(naai, nai, L_INSERT); - numaaAddNuma(naad, nad, L_INSERT); - } - return 0; -} - - -/*! - * \brief boxaGetNearestByDirection() - * - * \param[in] boxa either unsorted, or 2D sorted in LR/TB scan order - * \param[in] i box we test against - * \param[in] dir direction to look: L_FROM_LEFT, L_FROM_RIGHT, - * L_FROM_TOP, L_FROM_BOT - * \param[in] dist_select L_NON_NEGATIVE, L_ALL - * \param[in] range search distance from box i; use 0 to search - * entire boxa (e.g., if it's not 2D sorted) - * \param[out] pindex index in boxa of nearest box with overlapping - * coordinates in the indicated direction; - * -1 if there is no box - * \param[out] pdist distance of the nearest box in the indicated - * direction; 100000 if no box - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) For efficiency, use a LR/TD sorted %boxa, which can be
- *          made by flattening a 2D sorted boxaa.  In that case,
- *          %range can be some positive integer like 50.
- *      (2) If boxes overlap, the distance will be < 0.  Use %dist_select
- *          to determine if these should count or not.  If L_ALL, then
- *          one box will match as the nearest to another in 2 or more
- *          directions.
- * 
- */ -l_ok -boxaGetNearestByDirection(BOXA *boxa, - l_int32 i, - l_int32 dir, - l_int32 dist_select, - l_int32 range, - l_int32 *pindex, - l_int32 *pdist) -{ -l_int32 j, jmin, jmax, n, mindist, dist, index; -l_int32 x, y, w, h, bx, by, bw, bh; - - PROCNAME("boxaGetNearestByDirection"); - - if (pindex) *pindex = -1; - if (pdist) *pdist = 100000; - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - if (!pdist) - return ERROR_INT("&dist not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (dir != L_FROM_LEFT && dir != L_FROM_RIGHT && - dir != L_FROM_TOP && dir != L_FROM_BOT) - return ERROR_INT("invalid dir", procName, 1); - if (dist_select != L_NON_NEGATIVE && dist_select != L_ALL) - return ERROR_INT("invalid dist_select", procName, 1); - n = boxaGetCount(boxa); - if (i < 0 || i >= n) - return ERROR_INT("invalid box index", procName, 1); - - jmin = (range <= 0) ? 0 : L_MAX(0, i - range); - jmax = (range <= 0) ? n - 1 : L_MIN(n -1, i + range); - boxaGetBoxGeometry(boxa, i, &x, &y, &w, &h); - mindist = 100000; - index = -1; - if (dir == L_FROM_LEFT || dir == L_FROM_RIGHT) { - for (j = jmin; j <= jmax; j++) { - if (j == i) continue; - boxaGetBoxGeometry(boxa, j, &bx, &by, &bw, &bh); - if ((bx >= x && dir == L_FROM_LEFT) || /* not to the left */ - (x >= bx && dir == L_FROM_RIGHT)) /* not to the right */ - continue; - if (boxHasOverlapInXorY(y, h, by, bh) == 1) { - dist = boxGetDistanceInXorY(x, w, bx, bw); - if (dist_select == L_NON_NEGATIVE && dist < 0) continue; - if (dist < mindist) { - mindist = dist; - index = j; - } - } - } - } else if (dir == L_FROM_TOP || dir == L_FROM_BOT) { - for (j = jmin; j <= jmax; j++) { - if (j == i) continue; - boxaGetBoxGeometry(boxa, j, &bx, &by, &bw, &bh); - if ((by >= y && dir == L_FROM_TOP) || /* not above */ - (y >= by && dir == L_FROM_BOT)) /* not below */ - continue; - if (boxHasOverlapInXorY(x, w, bx, bw) == 1) { - dist = boxGetDistanceInXorY(y, h, by, bh); - if (dist_select == L_NON_NEGATIVE && dist < 0) continue; - if (dist < mindist) { - mindist = dist; - index = j; - } - } - } - } - *pindex = index; - *pdist = mindist; - return 0; -} - - -/*! - * \brief boxHasOverlapInXorY() - * - * \param[in] c1 left or top coordinate of box1 - * \param[in] s1 width or height of box1 - * \param[in] c2 left or top coordinate of box2 - * \param[in] s2 width or height of box2 - * \return 0 if no overlap; 1 if any overlap - * - *
- * Notes:
- *      (1) Like boxGetDistanceInXorY(), this is used for overlaps both in
- *          x (which projected vertically) and in y (projected horizontally)
- * 
- */ -static l_int32 -boxHasOverlapInXorY(l_int32 c1, - l_int32 s1, - l_int32 c2, - l_int32 s2) -{ -l_int32 ovlp; - - if (c1 > c2) - ovlp = c2 + s2 - 1 - c1; - else - ovlp = c1 + s1 - 1 - c2; - return (ovlp < 0) ? 0 : 1; -} - - -/*! - * \brief boxGetDistanceInXorY() - * - * \param[in] c1 left or top coordinate of box1 - * \param[in] s1 width or height of box1 - * \param[in] c2 left or top coordinate of box2 - * \param[in] s2 width or height of box2 - * \return distance between them (if < 0, box2 overlaps box1 in the - * dimension considered) - */ -static l_int32 -boxGetDistanceInXorY(l_int32 c1, - l_int32 s1, - l_int32 c2, - l_int32 s2) -{ -l_int32 dist; - - if (c1 > c2) - dist = c1 - (c2 + s2 - 1); - else - dist = c2 - (c1 + s1 - 1); - return dist; -} - - -/*! - * \brief boxGetCenter() - * - * \param[in] box - * \param[out] pcx, pcy location of center of box - * \return 0 if OK, 1 on error or if box is not valid - */ -l_ok -boxGetCenter(BOX *box, - l_float32 *pcx, - l_float32 *pcy) -{ -l_int32 x, y, w, h; - - PROCNAME("boxGetCenter"); - - if (pcx) *pcx = 0; - if (pcy) *pcy = 0; - if (!pcx || !pcy) - return ERROR_INT("&cx, &cy not both defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - boxGetGeometry(box, &x, &y, &w, &h); - if (w == 0 || h == 0) return 1; - *pcx = (l_float32)(x + 0.5 * w); - *pcy = (l_float32)(y + 0.5 * h); - - return 0; -} - - -/*! - * \brief boxIntersectByLine() - * - * \param[in] box - * \param[in] x, y point that line goes through - * \param[in] slope of line - * \param[out] px1, py1 1st point of intersection with box - * \param[out] px2, py2 2nd point of intersection with box - * \param[out] pn number of points of intersection - * \return 0 if OK, 1 on error or if box is not valid - * - *
- * Notes:
- *      (1) If the intersection is at only one point (a corner), the
- *          coordinates are returned in (x1, y1).
- *      (2) Represent a vertical line by one with a large but finite slope.
- * 
- */ -l_ok -boxIntersectByLine(BOX *box, - l_int32 x, - l_int32 y, - l_float32 slope, - l_int32 *px1, - l_int32 *py1, - l_int32 *px2, - l_int32 *py2, - l_int32 *pn) -{ -l_int32 bx, by, bw, bh, xp, yp, xt, yt, i, n; -l_float32 invslope; -PTA *pta; - - PROCNAME("boxIntersectByLine"); - - if (px1) *px1 = 0; - if (px2) *px2 = 0; - if (py1) *py1 = 0; - if (py2) *py2 = 0; - if (pn) *pn = 0; - if (!px1 || !py1 || !px2 || !py2) - return ERROR_INT("&x1, &y1, &x2, &y2 not all defined", procName, 1); - if (!pn) - return ERROR_INT("&n not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - boxGetGeometry(box, &bx, &by, &bw, &bh); - if (bw == 0 || bh == 0) return 1; - - if (slope == 0.0) { - if (y >= by && y < by + bh) { - *py1 = *py2 = y; - *px1 = bx; - *px2 = bx + bw - 1; - } - return 0; - } - - if (slope > 1000000.0) { - if (x >= bx && x < bx + bw) { - *px1 = *px2 = x; - *py1 = by; - *py2 = by + bh - 1; - } - return 0; - } - - /* Intersection with top and bottom lines of box */ - pta = ptaCreate(2); - invslope = 1.0 / slope; - xp = (l_int32)(x + invslope * (y - by)); - if (xp >= bx && xp < bx + bw) - ptaAddPt(pta, xp, by); - xp = (l_int32)(x + invslope * (y - by - bh + 1)); - if (xp >= bx && xp < bx + bw) - ptaAddPt(pta, xp, by + bh - 1); - - /* Intersection with left and right lines of box */ - yp = (l_int32)(y + slope * (x - bx)); - if (yp >= by && yp < by + bh) - ptaAddPt(pta, bx, yp); - yp = (l_int32)(y + slope * (x - bx - bw + 1)); - if (yp >= by && yp < by + bh) - ptaAddPt(pta, bx + bw - 1, yp); - - /* There is a maximum of 2 unique points; remove duplicates. */ - n = ptaGetCount(pta); - if (n > 0) { - ptaGetIPt(pta, 0, px1, py1); /* accept the first one */ - *pn = 1; - } - for (i = 1; i < n; i++) { - ptaGetIPt(pta, i, &xt, &yt); - if ((*px1 != xt) || (*py1 != yt)) { - *px2 = xt; - *py2 = yt; - *pn = 2; - break; - } - } - - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief boxClipToRectangle() - * - * \param[in] box - * \param[in] wi, hi rectangle representing image - * \return part of box within given rectangle, or NULL on error - * or if box is entirely outside the rectangle - * - *
- * Notes:
- *      (1) This can be used to clip a rectangle to an image.
- *          The clipping rectangle is assumed to have a UL corner at (0, 0),
- *          and a LR corner at (wi - 1, hi - 1).
- * 
- */ -BOX * -boxClipToRectangle(BOX *box, - l_int32 wi, - l_int32 hi) -{ -BOX *boxd; - - PROCNAME("boxClipToRectangle"); - - if (!box) - return (BOX *)ERROR_PTR("box not defined", procName, NULL); - if (box->x >= wi || box->y >= hi || - box->x + box->w <= 0 || box->y + box->h <= 0) - return (BOX *)ERROR_PTR("box outside rectangle", procName, NULL); - - boxd = boxCopy(box); - if (boxd->x < 0) { - boxd->w += boxd->x; - boxd->x = 0; - } - if (boxd->y < 0) { - boxd->h += boxd->y; - boxd->y = 0; - } - if (boxd->x + boxd->w > wi) - boxd->w = wi - boxd->x; - if (boxd->y + boxd->h > hi) - boxd->h = hi - boxd->y; - return boxd; -} - - -/*! - * \brief boxClipToRectangleParams() - * - * \param[in] box [optional] requested box; can be null - * \param[in] w, h clipping box size; typ. the size of an image - * \param[out] pxstart start x coordinate - * \param[out] pystart start y coordinate - * \param[out] pxend one pixel beyond clipping box - * \param[out] pyend one pixel beyond clipping box - * \param[out] pbw [optional] clipped width - * \param[out] pbh [optional] clipped height - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The return value should be checked.  If it is 1, the
- *          returned parameter values are bogus.
- *      (2) This simplifies the selection of pixel locations within
- *          a given rectangle:
- *             for (i = ystart; i < yend; i++ {
- *                 ...
- *                 for (j = xstart; j < xend; j++ {
- *                     ....
- * 
- */ -l_ok -boxClipToRectangleParams(BOX *box, - l_int32 w, - l_int32 h, - l_int32 *pxstart, - l_int32 *pystart, - l_int32 *pxend, - l_int32 *pyend, - l_int32 *pbw, - l_int32 *pbh) -{ -l_int32 bw, bh; -BOX *boxc; - - PROCNAME("boxClipToRectangleParams"); - - if (pxstart) *pxstart = 0; - if (pystart) *pystart = 0; - if (pxend) *pxend = w; - if (pyend) *pyend = h; - if (pbw) *pbw = w; - if (pbh) *pbh = h; - if (!pxstart || !pystart || !pxend || !pyend) - return ERROR_INT("invalid ptr input", procName, 1); - if (!box) return 0; - - if ((boxc = boxClipToRectangle(box, w, h)) == NULL) - return ERROR_INT("box outside image", procName, 1); - boxGetGeometry(boxc, pxstart, pystart, &bw, &bh); - boxDestroy(&boxc); - - if (pbw) *pbw = bw; - if (pbh) *pbh = bh; - if (bw == 0 || bh == 0) - return ERROR_INT("invalid clipping box", procName, 1); - *pxend = *pxstart + bw; /* 1 past the end */ - *pyend = *pystart + bh; /* 1 past the end */ - return 0; -} - - -/*! - * \brief boxRelocateOneSide() - * - * \param[in] boxd [optional]; this can be null, equal to boxs, - * or different from boxs; - * \param[in] boxs starting box; to have one side relocated - * \param[in] loc new location of the side that is changing - * \param[in] sideflag L_FROM_LEFT, etc., indicating the side that moves - * \return boxd, or NULL on error or if the computed boxd has - * width or height <= 0. - * - *
- * Notes:
- *      (1) Set boxd == NULL to get new box; boxd == boxs for in-place;
- *          or otherwise to resize existing boxd.
- *      (2) For usage, suggest one of these:
- *               boxd = boxRelocateOneSide(NULL, boxs, ...);   // new
- *               boxRelocateOneSide(boxs, boxs, ...);          // in-place
- *               boxRelocateOneSide(boxd, boxs, ...);          // other
- * 
- */ -BOX * -boxRelocateOneSide(BOX *boxd, - BOX *boxs, - l_int32 loc, - l_int32 sideflag) -{ -l_int32 x, y, w, h; - - PROCNAME("boxRelocateOneSide"); - - if (!boxs) - return (BOX *)ERROR_PTR("boxs not defined", procName, NULL); - if (!boxd) - boxd = boxCopy(boxs); - - boxGetGeometry(boxs, &x, &y, &w, &h); - if (w == 0 || h == 0) - return boxd; - if (sideflag == L_FROM_LEFT) - boxSetGeometry(boxd, loc, -1, w + x - loc, -1); - else if (sideflag == L_FROM_RIGHT) - boxSetGeometry(boxd, -1, -1, loc - x + 1, -1); - else if (sideflag == L_FROM_TOP) - boxSetGeometry(boxd, -1, loc, -1, h + y - loc); - else if (sideflag == L_FROM_BOT) - boxSetGeometry(boxd, -1, -1, -1, loc - y + 1); - return boxd; -} - - -/*! - * \brief boxaAdjustSides() - * - * \param[in] boxas - * \param[in] delleft, delright, deltop, delbot changes in location of - * each side for each box - * \return boxad, or NULL on error - * - *
- * Notes:
- *      (1) New box dimensions are cropped at left and top to x >= 0 and y >= 0.
- *      (2) If the width or height of a box goes to 0, we generate a box with
- *          w == 1 and h == 1, as a placeholder.
- *      (3) See boxAdjustSides().
- * 
- */ -BOXA * -boxaAdjustSides(BOXA *boxas, - l_int32 delleft, - l_int32 delright, - l_int32 deltop, - l_int32 delbot) -{ -l_int32 n, i, x, y; -BOX *box1, *box2; -BOXA *boxad; - - PROCNAME("boxaAdjustSides"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - - n = boxaGetCount(boxas); - boxad = boxaCreate(n); - for (i = 0; i < n; i++) { - box1 = boxaGetBox(boxas, i, L_COPY); - box2 = boxAdjustSides(NULL, box1, delleft, delright, deltop, delbot); - if (!box2) { - boxGetGeometry(box1, &x, &y, NULL, NULL); - box2 = boxCreate(x, y, 1, 1); - } - boxaAddBox(boxad, box2, L_INSERT); - boxDestroy(&box1); - } - - return boxad; -} - - -/*! - * \brief boxaAdjustBoxSides() - * - * \param[in] boxas - * \param[in] index - * \param[in] delleft, delright, deltop, delbot changes to box side locs - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) In-place operation on a box in a boxa.
- *      (2) New box dimensions are cropped at left and top to x >= 0 and y >= 0.
- *      (3) If a box ends up with no area, an error message is emitted,
- *          but the box dimensions are not changed.
- *      (4) See boxaAdjustSides().
- * 
- */ -l_ok -boxaAdjustBoxSides(BOXA *boxa, - l_int32 index, - l_int32 delleft, - l_int32 delright, - l_int32 deltop, - l_int32 delbot) -{ -BOX *box; - - PROCNAME("boxaAdjustBoxSides"); - - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - if ((box = boxaGetBox(boxa, index, L_CLONE)) == NULL) - return ERROR_INT("invalid index", procName, 1); - - boxAdjustSides(box, box, delleft, delright, deltop, delbot); - boxDestroy(&box); /* the clone */ - return 0; -} - - -/*! - * \brief boxAdjustSides() - * - * \param[in] boxd [optional]; this can be null, equal to boxs, - * or different from boxs - * \param[in] boxs starting box; to have sides adjusted - * \param[in] delleft, delright, deltop, delbot changes in location - * of each side - * \return boxd, or NULL on error or if the computed boxd has - * width or height <= 0. - * - *
- * Notes:
- *      (1) Set boxd == NULL to get new box; boxd == boxs for in-place;
- *          or otherwise to resize existing boxd.
- *      (2) For usage, suggest one of these:
- *               boxd = boxAdjustSides(NULL, boxs, ...);   // new
- *               boxAdjustSides(boxs, boxs, ...);          // in-place
- *               boxAdjustSides(boxd, boxs, ...);          // other
- *      (3) New box dimensions are cropped at left and top to x >= 0 and y >= 0.
- *      (4) For example, to expand in-place by 20 pixels on each side, use
- *             boxAdjustSides(box, box, -20, 20, -20, 20);
- * 
- */ -BOX * -boxAdjustSides(BOX *boxd, - BOX *boxs, - l_int32 delleft, - l_int32 delright, - l_int32 deltop, - l_int32 delbot) -{ -l_int32 x, y, w, h, xl, xr, yt, yb, wnew, hnew; - - PROCNAME("boxAdjustSides"); - - if (!boxs) - return (BOX *)ERROR_PTR("boxs not defined", procName, NULL); - - boxGetGeometry(boxs, &x, &y, &w, &h); - xl = L_MAX(0, x + delleft); - yt = L_MAX(0, y + deltop); - xr = x + w + delright; /* one pixel beyond right edge */ - yb = y + h + delbot; /* one pixel below bottom edge */ - wnew = xr - xl; - hnew = yb - yt; - - if (wnew < 1 || hnew < 1) - return (BOX *)ERROR_PTR("boxd has 0 area", procName, NULL); - if (!boxd) - return boxCreate(xl, yt, wnew, hnew); - - boxSetGeometry(boxd, xl, yt, wnew, hnew); - return boxd; -} - - -/*! - * \brief boxaSetSide() - * - * \param[in] boxad use NULL to get a new one; same as boxas for in-place - * \param[in] boxas - * \param[in] side L_SET_LEFT, L_SET_RIGHT, L_SET_TOP, L_SET_BOT - * \param[in] val location to set for given side, for each box - * \param[in] thresh min abs difference to cause resetting to %val - * \return boxad, or NULL on error - * - *
- * Notes:
- *      (1) Sets the given side of each box.  Use boxad == NULL for a new
- *          boxa, and boxad == boxas for in-place.
- *      (2) Use one of these:
- *               boxad = boxaSetSide(NULL, boxas, ...);   // new
- *               boxaSetSide(boxas, boxas, ...);  // in-place
- * 
- */ -BOXA * -boxaSetSide(BOXA *boxad, - BOXA *boxas, - l_int32 side, - l_int32 val, - l_int32 thresh) -{ -l_int32 n, i; -BOX *box; - - PROCNAME("boxaSetSide"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (boxad && (boxas != boxad)) - return (BOXA *)ERROR_PTR("not in-place", procName, NULL); - if (side != L_SET_LEFT && side != L_SET_RIGHT && - side != L_SET_TOP && side != L_SET_BOT) - return (BOXA *)ERROR_PTR("invalid side", procName, NULL); - if (val < 0) - return (BOXA *)ERROR_PTR("val < 0", procName, NULL); - - if (!boxad) - boxad = boxaCopy(boxas, L_COPY); - n = boxaGetCount(boxad); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxad, i, L_CLONE); - boxSetSide(box, side, val, thresh); - boxDestroy(&box); /* the clone */ - } - - return boxad; -} - - -/*! - * \brief boxSetSide() - * - * \param[in] boxs - * \param[in] side L_SET_LEFT, L_SET_RIGHT, L_SET_TOP, L_SET_BOT - * \param[in] val location to set for given side, for each box - * \param[in] thresh min abs difference to cause resetting to %val - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) In-place operation.
- *      (2) Use %thresh = 0 to definitely set the side to %val.
- * 
- */ -l_ok -boxSetSide(BOX *boxs, - l_int32 side, - l_int32 val, - l_int32 thresh) -{ -l_int32 x, y, w, h, diff; - - PROCNAME("boxSetSide"); - - if (!boxs) - return ERROR_INT("box not defined", procName, 1); - if (side != L_SET_LEFT && side != L_SET_RIGHT && - side != L_SET_TOP && side != L_SET_BOT) - return ERROR_INT("invalid side", procName, 1); - if (val < 0) - return ERROR_INT("val < 0", procName, 1); - - boxGetGeometry(boxs, &x, &y, &w, &h); - if (side == L_SET_LEFT) { - diff = x - val; - if (L_ABS(diff) >= thresh) - boxSetGeometry(boxs, val, y, w + diff, h); - } else if (side == L_SET_RIGHT) { - diff = x + w -1 - val; - if (L_ABS(diff) >= thresh) - boxSetGeometry(boxs, x, y, val - x + 1, h); - } else if (side == L_SET_TOP) { - diff = y - val; - if (L_ABS(diff) >= thresh) - boxSetGeometry(boxs, x, val, w, h + diff); - } else { /* side == L_SET_BOT */ - diff = y + h - 1 - val; - if (L_ABS(diff) >= thresh) - boxSetGeometry(boxs, x, y, w, val - y + 1); - } - - return 0; -} - - -/*! - * \brief boxaAdjustWidthToTarget() - * - * \param[in] boxad use NULL to get a new one; same as boxas for in-place - * \param[in] boxas - * \param[in] sides L_ADJUST_LEFT, L_ADJUST_RIGHT, L_ADJUST_LEFT_AND_RIGHT - * \param[in] target target width if differs by more than thresh - * \param[in] thresh min abs difference in width to cause adjustment - * \return boxad, or NULL on error - * - *
- * Notes:
- *      (1) Conditionally adjusts the width of each box, by moving
- *          the indicated edges (left and/or right) if the width differs
- *          by %thresh or more from %target.
- *      (2) Use boxad == NULL for a new boxa, and boxad == boxas for in-place.
- *          Use one of these:
- *               boxad = boxaAdjustWidthToTarget(NULL, boxas, ...);   // new
- *               boxaAdjustWidthToTarget(boxas, boxas, ...);  // in-place
- * 
- */ -BOXA * -boxaAdjustWidthToTarget(BOXA *boxad, - BOXA *boxas, - l_int32 sides, - l_int32 target, - l_int32 thresh) -{ -l_int32 x, y, w, h, n, i, diff; -BOX *box; - - PROCNAME("boxaAdjustWidthToTarget"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (boxad && (boxas != boxad)) - return (BOXA *)ERROR_PTR("not in-place", procName, NULL); - if (sides != L_ADJUST_LEFT && sides != L_ADJUST_RIGHT && - sides != L_ADJUST_LEFT_AND_RIGHT) - return (BOXA *)ERROR_PTR("invalid sides", procName, NULL); - if (target < 1) - return (BOXA *)ERROR_PTR("target < 1", procName, NULL); - - if (!boxad) - boxad = boxaCopy(boxas, L_COPY); - n = boxaGetCount(boxad); - for (i = 0; i < n; i++) { - if ((box = boxaGetValidBox(boxad, i, L_CLONE)) == NULL) - continue; - boxGetGeometry(box, &x, &y, &w, &h); - diff = w - target; - if (sides == L_ADJUST_LEFT) { - if (L_ABS(diff) >= thresh) - boxSetGeometry(box, L_MAX(0, x + diff), y, target, h); - } else if (sides == L_ADJUST_RIGHT) { - if (L_ABS(diff) >= thresh) - boxSetGeometry(box, x, y, target, h); - } else { /* sides == L_ADJUST_LEFT_AND_RIGHT */ - if (L_ABS(diff) >= thresh) - boxSetGeometry(box, L_MAX(0, x + diff/2), y, target, h); - } - boxDestroy(&box); - } - - return boxad; -} - - -/*! - * \brief boxaAdjustHeightToTarget() - * - * \param[in] boxad use NULL to get a new one - * \param[in] boxas - * \param[in] sides L_ADJUST_TOP, L_ADJUST_BOT, L_ADJUST_TOP_AND_BOT - * \param[in] target target height if differs by more than thresh - * \param[in] thresh min abs difference in height to cause adjustment - * \return boxad, or NULL on error - * - *
- * Notes:
- *      (1) Conditionally adjusts the height of each box, by moving
- *          the indicated edges (top and/or bot) if the height differs
- *          by %thresh or more from %target.
- *      (2) Use boxad == NULL for a new boxa, and boxad == boxas for in-place.
- *          Use one of these:
- *               boxad = boxaAdjustHeightToTarget(NULL, boxas, ...);   // new
- *               boxaAdjustHeightToTarget(boxas, boxas, ...);  // in-place
- * 
- */ -BOXA * -boxaAdjustHeightToTarget(BOXA *boxad, - BOXA *boxas, - l_int32 sides, - l_int32 target, - l_int32 thresh) -{ -l_int32 x, y, w, h, n, i, diff; -BOX *box; - - PROCNAME("boxaAdjustHeightToTarget"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (boxad && (boxas != boxad)) - return (BOXA *)ERROR_PTR("not in-place", procName, NULL); - if (sides != L_ADJUST_TOP && sides != L_ADJUST_BOT && - sides != L_ADJUST_TOP_AND_BOT) - return (BOXA *)ERROR_PTR("invalid sides", procName, NULL); - if (target < 1) - return (BOXA *)ERROR_PTR("target < 1", procName, NULL); - - if (!boxad) - boxad = boxaCopy(boxas, L_COPY); - n = boxaGetCount(boxad); - for (i = 0; i < n; i++) { - if ((box = boxaGetValidBox(boxad, i, L_CLONE)) == NULL) - continue; - boxGetGeometry(box, &x, &y, &w, &h); - diff = h - target; - if (sides == L_ADJUST_TOP) { - if (L_ABS(diff) >= thresh) - boxSetGeometry(box, x, L_MAX(0, y + diff), w, target); - } else if (sides == L_ADJUST_BOT) { - if (L_ABS(diff) >= thresh) - boxSetGeometry(box, x, y, w, target); - } else { /* sides == L_ADJUST_TOP_AND_BOT */ - if (L_ABS(diff) >= thresh) - boxSetGeometry(box, x, L_MAX(0, y + diff/2), w, target); - } - boxDestroy(&box); - } - - return boxad; -} - - -/*! - * \brief boxEqual() - * - * \param[in] box1 - * \param[in] box2 - * \param[out] psame 1 if equal; 0 otherwise - * \return 0 if OK, 1 on error - */ -l_ok -boxEqual(BOX *box1, - BOX *box2, - l_int32 *psame) -{ - PROCNAME("boxEqual"); - - if (!psame) - return ERROR_INT("&same not defined", procName, 1); - *psame = 0; - if (!box1 || !box2) - return ERROR_INT("boxes not both defined", procName, 1); - if (box1->x == box2->x && box1->y == box2->y && - box1->w == box2->w && box1->h == box2->h) - *psame = 1; - return 0; -} - - -/*! - * \brief boxaEqual() - * - * \param[in] boxa1 - * \param[in] boxa2 - * \param[in] maxdist - * \param[out] pnaindex [optional] index array of correspondences - * \param[out] psame 1 if equal; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The two boxa are the "same" if they contain the same
- *          boxes and each box is within %maxdist of its counterpart
- *          in their positions within the boxa.  This allows for
- *          small rearrangements.  Use 0 for maxdist if the boxa
- *          must be identical.
- *      (2) This applies only to geometry and ordering; refcounts
- *          are not considered.
- *      (3) %maxdist allows some latitude in the ordering of the boxes.
- *          For the boxa to be the "same", corresponding boxes must
- *          be within %maxdist of each other.  Note that for large
- *          %maxdist, we should use a hash function for efficiency.
- *      (4) naindex[i] gives the position of the box in boxa2 that
- *          corresponds to box i in boxa1.  It is only returned if the
- *          boxa are equal.
- * 
- */ -l_ok -boxaEqual(BOXA *boxa1, - BOXA *boxa2, - l_int32 maxdist, - NUMA **pnaindex, - l_int32 *psame) -{ -l_int32 i, j, n, jstart, jend, found, samebox; -l_int32 *countarray; -BOX *box1, *box2; -NUMA *na; - - PROCNAME("boxaEqual"); - - if (pnaindex) *pnaindex = NULL; - if (!psame) - return ERROR_INT("&same not defined", procName, 1); - *psame = 0; - if (!boxa1 || !boxa2) - return ERROR_INT("boxa1 and boxa2 not both defined", procName, 1); - n = boxaGetCount(boxa1); - if (n != boxaGetCount(boxa2)) - return 0; - - if ((countarray = (l_int32 *)LEPT_CALLOC(n, sizeof(l_int32))) == NULL) - return ERROR_INT("calloc fail for countarray", procName, 1); - na = numaMakeConstant(0.0, n); - - for (i = 0; i < n; i++) { - box1 = boxaGetBox(boxa1, i, L_CLONE); - jstart = L_MAX(0, i - maxdist); - jend = L_MIN(n-1, i + maxdist); - found = FALSE; - for (j = jstart; j <= jend; j++) { - box2 = boxaGetBox(boxa2, j, L_CLONE); - boxEqual(box1, box2, &samebox); - if (samebox && countarray[j] == 0) { - countarray[j] = 1; - numaReplaceNumber(na, i, j); - found = TRUE; - boxDestroy(&box2); - break; - } - boxDestroy(&box2); - } - boxDestroy(&box1); - if (!found) { - numaDestroy(&na); - LEPT_FREE(countarray); - return 0; - } - } - - *psame = 1; - if (pnaindex) - *pnaindex = na; - else - numaDestroy(&na); - LEPT_FREE(countarray); - return 0; -} - - -/*! - * \brief boxSimilar() - * - * \param[in] box1 - * \param[in] box2 - * \param[in] leftdiff, rightdiff, topdiff, botdiff - * \param[out] psimilar 1 if similar; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The values of leftdiff (etc) are the maximum allowed deviations
- *          between the locations of the left (etc) sides.  If any side
- *          pairs differ by more than this amount, the boxes are not similar.
- * 
- */ -l_ok -boxSimilar(BOX *box1, - BOX *box2, - l_int32 leftdiff, - l_int32 rightdiff, - l_int32 topdiff, - l_int32 botdiff, - l_int32 *psimilar) -{ -l_int32 l1, l2, r1, r2, t1, t2, b1, b2, valid1, valid2; - - PROCNAME("boxSimilar"); - - if (!psimilar) - return ERROR_INT("&similar not defined", procName, 1); - *psimilar = 0; - if (!box1 || !box2) - return ERROR_INT("boxes not both defined", procName, 1); - boxIsValid(box1, &valid1); - boxIsValid(box2, &valid2); - if (!valid1 || !valid2) - return ERROR_INT("boxes not both valid", procName, 1); - - boxGetSideLocations(box1, &l1, &r1, &t1, &b1); - boxGetSideLocations(box2, &l2, &r2, &t2, &b2); - if (L_ABS(l1 - l2) > leftdiff) - return 0; - if (L_ABS(r1 - r2) > rightdiff) - return 0; - if (L_ABS(t1 - t2) > topdiff) - return 0; - if (L_ABS(b1 - b2) > botdiff) - return 0; - - *psimilar = 1; - return 0; -} - - -/*! - * \brief boxaSimilar() - * - * \param[in] boxa1 - * \param[in] boxa2 - * \param[in] leftdiff, rightdiff, topdiff, botdiff - * \param[in] debug output details of non-similar boxes - * \param[out] psimilar 1 if similar; 0 otherwise - * \param[out] pnasim [optional] na containing 1 if similar; else 0 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See boxSimilar() for parameter usage.
- *      (2) Corresponding boxes are taken in order in the two boxa.
- *      (3) %nasim is an indicator array with a (0/1) for each box pair.
- *      (4) With %nasim or debug == 1, boxes continue to be tested
- *          after failure.
- * 
- */ -l_ok -boxaSimilar(BOXA *boxa1, - BOXA *boxa2, - l_int32 leftdiff, - l_int32 rightdiff, - l_int32 topdiff, - l_int32 botdiff, - l_int32 debug, - l_int32 *psimilar, - NUMA **pnasim) -{ -l_int32 i, n1, n2, match, mismatch; -BOX *box1, *box2; - - PROCNAME("boxaSimilar"); - - if (psimilar) *psimilar = 0; - if (pnasim) *pnasim = NULL; - if (!boxa1 || !boxa2) - return ERROR_INT("boxa1 and boxa2 not both defined", procName, 1); - if (!psimilar) - return ERROR_INT("&similar not defined", procName, 1); - n1 = boxaGetCount(boxa1); - n2 = boxaGetCount(boxa2); - if (n1 != n2) { - L_ERROR("boxa counts differ: %d vs %d\n", procName, n1, n2); - return 1; - } - if (pnasim) *pnasim = numaCreate(n1); - - mismatch = FALSE; - for (i = 0; i < n1; i++) { - box1 = boxaGetBox(boxa1, i, L_CLONE); - box2 = boxaGetBox(boxa2, i, L_CLONE); - boxSimilar(box1, box2, leftdiff, rightdiff, topdiff, botdiff, - &match); - boxDestroy(&box1); - boxDestroy(&box2); - if (pnasim) - numaAddNumber(*pnasim, match); - if (!match) { - mismatch = TRUE; - if (!debug && pnasim == NULL) - return 0; - else if (debug) - L_INFO("box %d not similar\n", procName, i); - } - } - - if (!mismatch) *psimilar = 1; - return 0; -} - - -/*----------------------------------------------------------------------* - * Boxa combine and split * - *----------------------------------------------------------------------*/ -/*! - * \brief boxaJoin() - * - * \param[in] boxad dest boxa; add to this one - * \param[in] boxas source boxa; add from this one - * \param[in] istart starting index in boxas - * \param[in] iend ending index in boxas; use -1 to cat all - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This appends a clone of each indicated box in boxas to boxad
- *      (2) istart < 0 is taken to mean 'read from the start' (istart = 0)
- *      (3) iend < 0 means 'read to the end'
- *      (4) if boxas == NULL or has no boxes, this is a no-op.
- * 
- */ -l_ok -boxaJoin(BOXA *boxad, - BOXA *boxas, - l_int32 istart, - l_int32 iend) -{ -l_int32 n, i; -BOX *box; - - PROCNAME("boxaJoin"); - - if (!boxad) - return ERROR_INT("boxad not defined", procName, 1); - if (!boxas || ((n = boxaGetCount(boxas)) == 0)) - return 0; - - if (istart < 0) - istart = 0; - if (iend < 0 || iend >= n) - iend = n - 1; - if (istart > iend) - return ERROR_INT("istart > iend; nothing to add", procName, 1); - - for (i = istart; i <= iend; i++) { - box = boxaGetBox(boxas, i, L_CLONE); - boxaAddBox(boxad, box, L_INSERT); - } - - return 0; -} - - -/*! - * \brief boxaaJoin() - * - * \param[in] baad dest boxaa; add to this one - * \param[in] baas source boxaa; add from this one - * \param[in] istart starting index in baas - * \param[in] iend ending index in baas; use -1 to cat all - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This appends a clone of each indicated boxa in baas to baad
- *      (2) istart < 0 is taken to mean 'read from the start' (istart = 0)
- *      (3) iend < 0 means 'read to the end'
- *      (4) if baas == NULL, this is a no-op.
- * 
- */ -l_ok -boxaaJoin(BOXAA *baad, - BOXAA *baas, - l_int32 istart, - l_int32 iend) -{ -l_int32 n, i; -BOXA *boxa; - - PROCNAME("boxaaJoin"); - - if (!baad) - return ERROR_INT("baad not defined", procName, 1); - if (!baas) - return 0; - - if (istart < 0) - istart = 0; - n = boxaaGetCount(baas); - if (iend < 0 || iend >= n) - iend = n - 1; - if (istart > iend) - return ERROR_INT("istart > iend; nothing to add", procName, 1); - - for (i = istart; i <= iend; i++) { - boxa = boxaaGetBoxa(baas, i, L_CLONE); - boxaaAddBoxa(baad, boxa, L_INSERT); - } - - return 0; -} - - -/*! - * \brief boxaSplitEvenOdd() - * - * \param[in] boxa - * \param[in] fillflag 1 to put invalid boxes in place; 0 to omit - * \param[out] pboxae, pboxao save even and odd boxes in their separate - * boxa, setting the other type to invalid boxes. - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %fillflag == 1, boxae has copies of the even boxes
- *          in their original location, and nvalid boxes are placed
- *          in the odd array locations.  And v.v.
- *      (2) If %fillflag == 0, boxae has only copies of the even boxes.
- * 
- */ -l_ok -boxaSplitEvenOdd(BOXA *boxa, - l_int32 fillflag, - BOXA **pboxae, - BOXA **pboxao) -{ -l_int32 i, n; -BOX *box, *box1; - - PROCNAME("boxaSplitEvenOdd"); - - if (pboxae) *pboxae = NULL; - if (pboxao) *pboxao = NULL; - if (!pboxae || !pboxao) - return ERROR_INT("&boxae and &boxao not both defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - n = boxaGetCount(boxa); - *pboxae = boxaCreate(n); - *pboxao = boxaCreate(n); - if (fillflag == 0) { - /* don't fill with invalid boxes; end up with half-size boxa */ - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_COPY); - if ((i & 1) == 0) - boxaAddBox(*pboxae, box, L_INSERT); - else - boxaAddBox(*pboxao, box, L_INSERT); - } - } else { - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_COPY); - box1 = boxCreate(0, 0, 0, 0); /* empty placeholder */ - if ((i & 1) == 0) { - boxaAddBox(*pboxae, box, L_INSERT); - boxaAddBox(*pboxao, box1, L_INSERT); - } else { - boxaAddBox(*pboxae, box1, L_INSERT); - boxaAddBox(*pboxao, box, L_INSERT); - } - } - } - return 0; -} - - -/*! - * \brief boxaMergeEvenOdd() - * - * \param[in] boxae boxes to go in even positions in merged boxa - * \param[in] boxao boxes to go in odd positions in merged boxa - * \param[in] fillflag 1 if there are invalid boxes in placeholders - * \return boxad merged, or NULL on error - * - *
- * Notes:
- *      (1) This is essentially the inverse of boxaSplitEvenOdd().
- *          Typically, boxae and boxao were generated by boxaSplitEvenOdd(),
- *          and the value of %fillflag needs to be the same in both calls.
- *      (2) If %fillflag == 1, both boxae and boxao are of the same size;
- *          otherwise boxae may have one more box than boxao.
- * 
- */ -BOXA * -boxaMergeEvenOdd(BOXA *boxae, - BOXA *boxao, - l_int32 fillflag) -{ -l_int32 i, n, ne, no; -BOX *box; -BOXA *boxad; - - PROCNAME("boxaMergeEvenOdd"); - - if (!boxae || !boxao) - return (BOXA *)ERROR_PTR("boxae and boxao not defined", procName, NULL); - ne = boxaGetCount(boxae); - no = boxaGetCount(boxao); - if (ne < no || ne > no + 1) - return (BOXA *)ERROR_PTR("boxa sizes invalid", procName, NULL); - - boxad = boxaCreate(ne); - if (fillflag == 0) { /* both are approx. half-sized; all valid boxes */ - n = ne + no; - for (i = 0; i < n; i++) { - if ((i & 1) == 0) - box = boxaGetBox(boxae, i / 2, L_COPY); - else - box = boxaGetBox(boxao, i / 2, L_COPY); - boxaAddBox(boxad, box, L_INSERT); - } - } else { /* both are full size and have invalid placeholders */ - for (i = 0; i < ne; i++) { - if ((i & 1) == 0) - box = boxaGetBox(boxae, i, L_COPY); - else - box = boxaGetBox(boxao, i, L_COPY); - boxaAddBox(boxad, box, L_INSERT); - } - } - return boxad; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxfunc2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxfunc2.c deleted file mode 100644 index 98f2808a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxfunc2.c +++ /dev/null @@ -1,1933 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file boxfunc2.c - *
- *
- *      Boxa/Box transform (shift, scale) and orthogonal rotation
- *           BOXA            *boxaTransform()
- *           BOX             *boxTransform()
- *           BOXA            *boxaTransformOrdered()
- *           BOX             *boxTransformOrdered()
- *           BOXA            *boxaRotateOrth()
- *           BOX             *boxRotateOrth()
- *           BOXA            *boxaShiftWithPta()
- *
- *      Boxa sort
- *           BOXA            *boxaSort()
- *           BOXA            *boxaBinSort()
- *           BOXA            *boxaSortByIndex()
- *           BOXAA           *boxaSort2d()
- *           BOXAA           *boxaSort2dByIndex()
- *
- *      Boxa statistics
- *           l_int32          boxaGetRankVals()
- *           l_int32          boxaGetMedianVals()
- *           l_int32          boxaGetAverageSize()
- *
- *      Boxa array extraction
- *           l_int32          boxaExtractAsNuma()
- *           l_int32          boxaExtractAsPta()
- *           PTA             *boxaExtractCorners()
- *
- *      Other Boxaa functions
- *           l_int32          boxaaGetExtent()
- *           BOXA            *boxaaFlattenToBoxa()
- *           BOXA            *boxaaFlattenAligned()
- *           BOXAA           *boxaEncapsulateAligned()
- *           BOXAA           *boxaaTranspose()
- *           l_int32          boxaaAlignBox()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* For more than this number of c.c. in a binarized image of - * semi-perimeter (w + h) about 5000 or less, the O(n) binsort - * is faster than the O(nlogn) shellsort. */ -static const l_int32 MinCompsForBinSort = 200; - -/*---------------------------------------------------------------------* - * Boxa/Box transform (shift, scale) and orthogonal rotation * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaTransform() - * - * \param[in] boxas - * \param[in] shiftx - * \param[in] shifty - * \param[in] scalex - * \param[in] scaley - * \return boxad, or NULL on error - * - *
- * Notes:
- *      (1) This is a very simple function that first shifts, then scales.
- *      (2) The UL corner coordinates of all boxes in the output %boxad
- *      (3) For the boxes in the output %boxad, the UL corner coordinates
- *          must be non-negative, and the width and height of valid
- *          boxes must be at least 1.
- * 
- */ -BOXA * -boxaTransform(BOXA *boxas, - l_int32 shiftx, - l_int32 shifty, - l_float32 scalex, - l_float32 scaley) -{ -l_int32 i, n; -BOX *boxs, *boxd; -BOXA *boxad; - - PROCNAME("boxaTransform"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - n = boxaGetCount(boxas); - if ((boxad = boxaCreate(n)) == NULL) - return (BOXA *)ERROR_PTR("boxad not made", procName, NULL); - for (i = 0; i < n; i++) { - if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) { - boxaDestroy(&boxad); - return (BOXA *)ERROR_PTR("boxs not found", procName, NULL); - } - boxd = boxTransform(boxs, shiftx, shifty, scalex, scaley); - boxDestroy(&boxs); - boxaAddBox(boxad, boxd, L_INSERT); - } - - return boxad; -} - - -/*! - * \brief boxTransform() - * - * \param[in] box - * \param[in] shiftx - * \param[in] shifty - * \param[in] scalex - * \param[in] scaley - * \return boxd, or NULL on error - * - *
- * Notes:
- *      (1) This is a very simple function that first shifts, then scales.
- *      (2) If the box is invalid, a new invalid box is returned.
- *      (3) The UL corner coordinates must be non-negative, and the
- *          width and height of valid boxes must be at least 1.
- * 
- */ -BOX * -boxTransform(BOX *box, - l_int32 shiftx, - l_int32 shifty, - l_float32 scalex, - l_float32 scaley) -{ - PROCNAME("boxTransform"); - - if (!box) - return (BOX *)ERROR_PTR("box not defined", procName, NULL); - if (box->w <= 0 || box->h <= 0) - return boxCreate(0, 0, 0, 0); - else - return boxCreate((l_int32)(L_MAX(0, scalex * (box->x + shiftx) + 0.5)), - (l_int32)(L_MAX(0, scaley * (box->y + shifty) + 0.5)), - (l_int32)(L_MAX(1.0, scalex * box->w + 0.5)), - (l_int32)(L_MAX(1.0, scaley * box->h + 0.5))); -} - - -/*! - * \brief boxaTransformOrdered() - * - * \param[in] boxas - * \param[in] shiftx - * \param[in] shifty - * \param[in] scalex - * \param[in] scaley - * \param[in] xcen, ycen center of rotation - * \param[in] angle in radians; clockwise is positive - * \param[in] order one of 6 combinations: L_TR_SC_RO, ... - * \return boxd, or NULL on error - * - *
- *          shift, scaling and rotation, and the order of the
- *          transforms is specified.
- *      (2) Although these operations appear to be on an infinite
- *          2D plane, in practice the region of interest is clipped
- *          to a finite image.  The center of rotation is usually taken
- *          with respect to the image (either the UL corner or the
- *          center).  A translation can have two very different effects:
- *            (a) Moves the boxes across the fixed image region.
- *            (b) Moves the image origin, causing a change in the image
- *                region and an opposite effective translation of the boxes.
- *          This function should only be used for (a), where the image
- *          region is fixed on translation.  If the image region is
- *          changed by the translation, use instead the functions
- *          in affinecompose.c, where the image region and rotation
- *          center can be computed from the actual clipping due to
- *          translation of the image origin.
- *      (3) See boxTransformOrdered() for usage and implementation details.
- * 
- */ -BOXA * -boxaTransformOrdered(BOXA *boxas, - l_int32 shiftx, - l_int32 shifty, - l_float32 scalex, - l_float32 scaley, - l_int32 xcen, - l_int32 ycen, - l_float32 angle, - l_int32 order) -{ -l_int32 i, n; -BOX *boxs, *boxd; -BOXA *boxad; - - PROCNAME("boxaTransformOrdered"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - n = boxaGetCount(boxas); - if ((boxad = boxaCreate(n)) == NULL) - return (BOXA *)ERROR_PTR("boxad not made", procName, NULL); - for (i = 0; i < n; i++) { - if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) { - boxaDestroy(&boxad); - return (BOXA *)ERROR_PTR("boxs not found", procName, NULL); - } - boxd = boxTransformOrdered(boxs, shiftx, shifty, scalex, scaley, - xcen, ycen, angle, order); - boxDestroy(&boxs); - boxaAddBox(boxad, boxd, L_INSERT); - } - - return boxad; -} - - -/*! - * \brief boxTransformOrdered() - * - * \param[in] boxs - * \param[in] shiftx - * \param[in] shifty - * \param[in] scalex - * \param[in] scaley - * \param[in] xcen, ycen center of rotation - * \param[in] angle in radians; clockwise is positive - * \param[in] order one of 6 combinations: L_TR_SC_RO, ... - * \return boxd, or NULL on error - * - *
- * Notes:
- *      (1) This allows a sequence of linear transforms, composed of
- *          shift, scaling and rotation, where the order of the
- *          transforms is specified.
- *      (2) The rotation is taken about a point specified by (xcen, ycen).
- *          Let the components of the vector from the center of rotation
- *          to the box center be (xdif, ydif):
- *            xdif = (bx + 0.5 * bw) - xcen
- *            ydif = (by + 0.5 * bh) - ycen
- *          Then the box center after rotation has new components:
- *            bxcen = xcen + xdif * cosa + ydif * sina
- *            bycen = ycen + ydif * cosa - xdif * sina
- *          where cosa and sina are the cos and sin of the angle,
- *          and the enclosing box for the rotated box has size:
- *            rw = |bw * cosa| + |bh * sina|
- *            rh = |bh * cosa| + |bw * sina|
- *          where bw and bh are the unrotated width and height.
- *          Then the box UL corner (rx, ry) is
- *            rx = bxcen - 0.5 * rw
- *            ry = bycen - 0.5 * rh
- *      (3) The center of rotation specified by args %xcen and %ycen
- *          is the point BEFORE any translation or scaling.  If the
- *          rotation is not the first operation, this function finds
- *          the actual center at the time of rotation.  It does this
- *          by making the following assumptions:
- *             (1) Any scaling is with respect to the UL corner, so
- *                 that the center location scales accordingly.
- *             (2) A translation does not affect the center of
- *                 the image; it just moves the boxes.
- *          We always use assumption (1).  However, assumption (2)
- *          will be incorrect if the apparent translation is due
- *          to a clipping operation that, in effect, moves the
- *          origin of the image.  In that case, you should NOT use
- *          these simple functions.  Instead, use the functions
- *          in affinecompose.c, where the rotation center can be
- *          computed from the actual clipping due to translation
- *          of the image origin.
- * 
- */ -BOX * -boxTransformOrdered(BOX *boxs, - l_int32 shiftx, - l_int32 shifty, - l_float32 scalex, - l_float32 scaley, - l_int32 xcen, - l_int32 ycen, - l_float32 angle, - l_int32 order) -{ -l_int32 bx, by, bw, bh, tx, ty, tw, th; -l_int32 xcent, ycent; /* transformed center of rotation due to scaling */ -l_float32 sina, cosa, xdif, ydif, rx, ry, rw, rh; -BOX *boxd; - - PROCNAME("boxTransformOrdered"); - - if (!boxs) - return (BOX *)ERROR_PTR("boxs not defined", procName, NULL); - if (order != L_TR_SC_RO && order != L_SC_RO_TR && order != L_RO_TR_SC && - order != L_TR_RO_SC && order != L_RO_SC_TR && order != L_SC_TR_RO) - return (BOX *)ERROR_PTR("order invalid", procName, NULL); - - boxGetGeometry(boxs, &bx, &by, &bw, &bh); - if (bw <= 0 || bh <= 0) /* invalid */ - return boxCreate(0, 0, 0, 0); - if (angle != 0.0) { - sina = sin(angle); - cosa = cos(angle); - } - - if (order == L_TR_SC_RO) { - tx = (l_int32)(scalex * (bx + shiftx) + 0.5); - ty = (l_int32)(scaley * (by + shifty) + 0.5); - tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5)); - th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5)); - xcent = (l_int32)(scalex * xcen + 0.5); - ycent = (l_int32)(scaley * ycen + 0.5); - if (angle == 0.0) { - boxd = boxCreate(tx, ty, tw, th); - } else { - xdif = tx + 0.5 * tw - xcent; - ydif = ty + 0.5 * th - ycent; - rw = L_ABS(tw * cosa) + L_ABS(th * sina); - rh = L_ABS(th * cosa) + L_ABS(tw * sina); - rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw; - ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh; - boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw, - (l_int32)rh); - } - } else if (order == L_SC_TR_RO) { - tx = (l_int32)(scalex * bx + shiftx + 0.5); - ty = (l_int32)(scaley * by + shifty + 0.5); - tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5)); - th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5)); - xcent = (l_int32)(scalex * xcen + 0.5); - ycent = (l_int32)(scaley * ycen + 0.5); - if (angle == 0.0) { - boxd = boxCreate(tx, ty, tw, th); - } else { - xdif = tx + 0.5 * tw - xcent; - ydif = ty + 0.5 * th - ycent; - rw = L_ABS(tw * cosa) + L_ABS(th * sina); - rh = L_ABS(th * cosa) + L_ABS(tw * sina); - rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw; - ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh; - boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw, - (l_int32)rh); - } - } else if (order == L_RO_TR_SC) { - if (angle == 0.0) { - rx = bx; - ry = by; - rw = bw; - rh = bh; - } else { - xdif = bx + 0.5 * bw - xcen; - ydif = by + 0.5 * bh - ycen; - rw = L_ABS(bw * cosa) + L_ABS(bh * sina); - rh = L_ABS(bh * cosa) + L_ABS(bw * sina); - rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw; - ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh; - } - tx = (l_int32)(scalex * (rx + shiftx) + 0.5); - ty = (l_int32)(scaley * (ry + shifty) + 0.5); - tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5)); - th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5)); - boxd = boxCreate(tx, ty, tw, th); - } else if (order == L_RO_SC_TR) { - if (angle == 0.0) { - rx = bx; - ry = by; - rw = bw; - rh = bh; - } else { - xdif = bx + 0.5 * bw - xcen; - ydif = by + 0.5 * bh - ycen; - rw = L_ABS(bw * cosa) + L_ABS(bh * sina); - rh = L_ABS(bh * cosa) + L_ABS(bw * sina); - rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw; - ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh; - } - tx = (l_int32)(scalex * rx + shiftx + 0.5); - ty = (l_int32)(scaley * ry + shifty + 0.5); - tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5)); - th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5)); - boxd = boxCreate(tx, ty, tw, th); - } else if (order == L_TR_RO_SC) { - tx = bx + shiftx; - ty = by + shifty; - if (angle == 0.0) { - rx = tx; - ry = ty; - rw = bw; - rh = bh; - } else { - xdif = tx + 0.5 * bw - xcen; - ydif = ty + 0.5 * bh - ycen; - rw = L_ABS(bw * cosa) + L_ABS(bh * sina); - rh = L_ABS(bh * cosa) + L_ABS(bw * sina); - rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw; - ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh; - } - tx = (l_int32)(scalex * rx + 0.5); - ty = (l_int32)(scaley * ry + 0.5); - tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5)); - th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5)); - boxd = boxCreate(tx, ty, tw, th); - } else { /* order == L_SC_RO_TR) */ - tx = (l_int32)(scalex * bx + 0.5); - ty = (l_int32)(scaley * by + 0.5); - tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5)); - th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5)); - xcent = (l_int32)(scalex * xcen + 0.5); - ycent = (l_int32)(scaley * ycen + 0.5); - if (angle == 0.0) { - rx = tx; - ry = ty; - rw = tw; - rh = th; - } else { - xdif = tx + 0.5 * tw - xcent; - ydif = ty + 0.5 * th - ycent; - rw = L_ABS(tw * cosa) + L_ABS(th * sina); - rh = L_ABS(th * cosa) + L_ABS(tw * sina); - rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw; - ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh; - } - tx = (l_int32)(rx + shiftx + 0.5); - ty = (l_int32)(ry + shifty + 0.5); - tw = (l_int32)(rw + 0.5); - th = (l_int32)(rh + 0.5); - boxd = boxCreate(tx, ty, tw, th); - } - - return boxd; -} - - -/*! - * \brief boxaRotateOrth() - * - * \param[in] boxas - * \param[in] w, h of image in which the boxa is embedded - * \param[in] rotation 0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg; - * all rotations are clockwise - * \return boxad, or NULL on error - * - *
- * Notes:
- *      (1) See boxRotateOrth() for details.
- * 
- */ -BOXA * -boxaRotateOrth(BOXA *boxas, - l_int32 w, - l_int32 h, - l_int32 rotation) -{ -l_int32 i, n; -BOX *boxs, *boxd; -BOXA *boxad; - - PROCNAME("boxaRotateOrth"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (rotation < 0 || rotation > 3) - return (BOXA *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL); - if (rotation == 0) - return boxaCopy(boxas, L_COPY); - - n = boxaGetCount(boxas); - if ((boxad = boxaCreate(n)) == NULL) - return (BOXA *)ERROR_PTR("boxad not made", procName, NULL); - for (i = 0; i < n; i++) { - if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) { - boxaDestroy(&boxad); - return (BOXA *)ERROR_PTR("boxs not found", procName, NULL); - } - boxd = boxRotateOrth(boxs, w, h, rotation); - boxDestroy(&boxs); - boxaAddBox(boxad, boxd, L_INSERT); - } - - return boxad; -} - - -/*! - * \brief boxRotateOrth() - * - * \param[in] box - * \param[in] w, h of image in which the box is embedded - * \param[in] rotation 0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg; - * all rotations are clockwise - * \return boxd, or NULL on error - * - *
- * Notes:
- *      (1) Rotate the image with the embedded box by the specified amount.
- *      (2) After rotation, the rotated box is always measured with
- *          respect to the UL corner of the image.
- * 
- */ -BOX * -boxRotateOrth(BOX *box, - l_int32 w, - l_int32 h, - l_int32 rotation) -{ -l_int32 bx, by, bw, bh, xdist, ydist; - - PROCNAME("boxRotateOrth"); - - if (!box) - return (BOX *)ERROR_PTR("box not defined", procName, NULL); - if (rotation < 0 || rotation > 3) - return (BOX *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL); - if (rotation == 0) - return boxCopy(box); - - boxGetGeometry(box, &bx, &by, &bw, &bh); - if (bw <= 0 || bh <= 0) /* invalid */ - return boxCreate(0, 0, 0, 0); - ydist = h - by - bh; /* below box */ - xdist = w - bx - bw; /* to right of box */ - if (rotation == 1) /* 90 deg cw */ - return boxCreate(ydist, bx, bh, bw); - else if (rotation == 2) /* 180 deg cw */ - return boxCreate(xdist, ydist, bw, bh); - else /* rotation == 3, 270 deg cw */ - return boxCreate(by, xdist, bh, bw); -} - - -/*! - * \brief boxaShiftWithPta() - * - * \param[in] boxas - * \param[in] pta aligned with the boxes; determines shift amount - * \param[in] dir +1 to shift by the values in pta; -1 to shift - * by the negative of the values in the pta. - * \return boxad, or NULL on error - * - *
- * Notes:
- *      (1) In use, %pta may come from the UL corners of of a boxa, each
- *          of whose boxes contains the corresponding box of %boxas
- *          within it.  The output %boxad is then a boxa in the (global)
- *          coordinates of the containing boxa.  So the input %pta
- *          could come from boxaExtractCorners().
- *      (2) The operations with %dir == 1 and %dir == -1 are inverses if
- *          called in order (1, -1).  Starting with an input boxa and
- *          calling twice with these values of %dir results in a boxa
- *          identical to the input.  However, because box parameters can
- *          never be negative, calling in the order (-1, 1) may result
- *          in clipping at the left side and the top.
- * 
- */ -BOXA * -boxaShiftWithPta(BOXA *boxas, - PTA *pta, - l_int32 dir) -{ -l_int32 i, n, x, y, full; -BOX *box1, *box2; -BOXA *boxad; - - PROCNAME("boxaShiftWithPta"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - boxaIsFull(boxas, &full); - if (!full) - return (BOXA *)ERROR_PTR("boxas not full", procName, NULL); - if (!pta) - return (BOXA *)ERROR_PTR("pta not defined", procName, NULL); - if (dir != 1 && dir != -1) - return (BOXA *)ERROR_PTR("invalid dir", procName, NULL); - n = boxaGetCount(boxas); - if (n != ptaGetCount(pta)) - return (BOXA *)ERROR_PTR("boxas and pta not same size", procName, NULL); - - if ((boxad = boxaCreate(n)) == NULL) - return (BOXA *)ERROR_PTR("boxad not made", procName, NULL); - for (i = 0; i < n; i++) { - box1 = boxaGetBox(boxas, i, L_COPY); - ptaGetIPt(pta, i, &x, &y); - box2 = boxTransform(box1, dir * x, dir * y, 1.0, 1.0); - boxaAddBox(boxad, box2, L_INSERT); - boxDestroy(&box1); - } - return boxad; -} - - -/*---------------------------------------------------------------------* - * Boxa sort * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaSort() - * - * \param[in] boxas - * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y, - * L_SORT_BY_RIGHT, L_SORT_BY_BOT, - * L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT, - * L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION, - * L_SORT_BY_PERIMETER, L_SORT_BY_AREA, - * L_SORT_BY_ASPECT_RATIO - * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING - * \param[out] pnaindex [optional] index of sorted order into - * original array - * \return boxad sorted version of boxas, or NULL on error - * - *
- * Notes:
- *      (1) An empty boxa returns a copy, with a warning.
- * 
- */ -BOXA * -boxaSort(BOXA *boxas, - l_int32 sorttype, - l_int32 sortorder, - NUMA **pnaindex) -{ -l_int32 i, n, x, y, w, h, size; -BOXA *boxad; -NUMA *na, *naindex; - - PROCNAME("boxaSort"); - - if (pnaindex) *pnaindex = NULL; - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if ((n = boxaGetCount(boxas)) == 0) { - L_WARNING("boxas is empty\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y && - sorttype != L_SORT_BY_RIGHT && sorttype != L_SORT_BY_BOT && - sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT && - sorttype != L_SORT_BY_MIN_DIMENSION && - sorttype != L_SORT_BY_MAX_DIMENSION && - sorttype != L_SORT_BY_PERIMETER && - sorttype != L_SORT_BY_AREA && - sorttype != L_SORT_BY_ASPECT_RATIO) - return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL); - if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) - return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL); - - /* Use O(n) binsort if possible */ - if (n > MinCompsForBinSort && - ((sorttype == L_SORT_BY_X) || (sorttype == L_SORT_BY_Y) || - (sorttype == L_SORT_BY_WIDTH) || (sorttype == L_SORT_BY_HEIGHT) || - (sorttype == L_SORT_BY_PERIMETER))) - return boxaBinSort(boxas, sorttype, sortorder, pnaindex); - - /* Build up numa of specific data */ - if ((na = numaCreate(n)) == NULL) - return (BOXA *)ERROR_PTR("na not made", procName, NULL); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h); - switch (sorttype) - { - case L_SORT_BY_X: - numaAddNumber(na, x); - break; - case L_SORT_BY_Y: - numaAddNumber(na, y); - break; - case L_SORT_BY_RIGHT: - numaAddNumber(na, x + w - 1); - break; - case L_SORT_BY_BOT: - numaAddNumber(na, y + h - 1); - break; - case L_SORT_BY_WIDTH: - numaAddNumber(na, w); - break; - case L_SORT_BY_HEIGHT: - numaAddNumber(na, h); - break; - case L_SORT_BY_MIN_DIMENSION: - size = L_MIN(w, h); - numaAddNumber(na, size); - break; - case L_SORT_BY_MAX_DIMENSION: - size = L_MAX(w, h); - numaAddNumber(na, size); - break; - case L_SORT_BY_PERIMETER: - size = w + h; - numaAddNumber(na, size); - break; - case L_SORT_BY_AREA: - size = w * h; - numaAddNumber(na, size); - break; - case L_SORT_BY_ASPECT_RATIO: - numaAddNumber(na, (l_float32)w / (l_float32)h); - break; - default: - L_WARNING("invalid sort type\n", procName); - } - } - - /* Get the sort index for data array */ - naindex = numaGetSortIndex(na, sortorder); - numaDestroy(&na); - if (!naindex) - return (BOXA *)ERROR_PTR("naindex not made", procName, NULL); - - /* Build up sorted boxa using sort index */ - boxad = boxaSortByIndex(boxas, naindex); - - if (pnaindex) - *pnaindex = naindex; - else - numaDestroy(&naindex); - return boxad; -} - - -/*! - * \brief boxaBinSort() - * - * \param[in] boxas - * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH, - * L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER - * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING - * \param[out] pnaindex [optional] index of sorted order into - * original array - * \return boxad sorted version of boxas, or NULL on error - * - *
- * Notes:
- *      (1) For a large number of boxes (say, greater than 1000), this
- *          O(n) binsort is much faster than the O(nlogn) shellsort.
- *          For 5000 components, this is over 20x faster than boxaSort().
- *      (2) Consequently, boxaSort() calls this function if it will
- *          likely go much faster.
- * 
- */ -BOXA * -boxaBinSort(BOXA *boxas, - l_int32 sorttype, - l_int32 sortorder, - NUMA **pnaindex) -{ -l_int32 i, n, x, y, w, h; -BOXA *boxad; -NUMA *na, *naindex; - - PROCNAME("boxaBinSort"); - - if (pnaindex) *pnaindex = NULL; - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if ((n = boxaGetCount(boxas)) == 0) { - L_WARNING("boxas is empty\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y && - sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT && - sorttype != L_SORT_BY_PERIMETER) - return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL); - if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) - return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL); - - /* Generate Numa of appropriate box dimensions */ - if ((na = numaCreate(n)) == NULL) - return (BOXA *)ERROR_PTR("na not made", procName, NULL); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h); - switch (sorttype) - { - case L_SORT_BY_X: - numaAddNumber(na, x); - break; - case L_SORT_BY_Y: - numaAddNumber(na, y); - break; - case L_SORT_BY_WIDTH: - numaAddNumber(na, w); - break; - case L_SORT_BY_HEIGHT: - numaAddNumber(na, h); - break; - case L_SORT_BY_PERIMETER: - numaAddNumber(na, w + h); - break; - default: - L_WARNING("invalid sort type\n", procName); - } - } - - /* Get the sort index for data array */ - naindex = numaGetBinSortIndex(na, sortorder); - numaDestroy(&na); - if (!naindex) - return (BOXA *)ERROR_PTR("naindex not made", procName, NULL); - - /* Build up sorted boxa using the sort index */ - boxad = boxaSortByIndex(boxas, naindex); - - if (pnaindex) - *pnaindex = naindex; - else - numaDestroy(&naindex); - return boxad; -} - - -/*! - * \brief boxaSortByIndex() - * - * \param[in] boxas - * \param[in] naindex na that maps from the new boxa to the input boxa - * \return boxad sorted, or NULL on error - */ -BOXA * -boxaSortByIndex(BOXA *boxas, - NUMA *naindex) -{ -l_int32 i, n, index; -BOX *box; -BOXA *boxad; - - PROCNAME("boxaSortByIndex"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if ((n = boxaGetCount(boxas)) == 0) { - L_WARNING("boxas is empty\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (!naindex) - return (BOXA *)ERROR_PTR("naindex not defined", procName, NULL); - - boxad = boxaCreate(n); - for (i = 0; i < n; i++) { - numaGetIValue(naindex, i, &index); - box = boxaGetBox(boxas, index, L_COPY); - boxaAddBox(boxad, box, L_INSERT); - } - - return boxad; -} - - -/*! - * \brief boxaSort2d() - * - * \param[in] boxas - * \param[out] pnaad [optional] numaa with sorted indices - * whose values are the indices of the input array - * \param[in] delta1 min separation that permits aggregation of a box - * onto a boxa of horizontally-aligned boxes; pass 1 - * \param[in] delta2 min separation that permits aggregation of a box - * onto a boxa of horizontally-aligned boxes; pass 2 - * \param[in] minh1 components less than this height either join an - * existing boxa or are set aside for pass 2 - * \return baa 2d sorted version of boxa, or NULL on error - * - *
- * Notes:
- *      (1) The final result is a sort where the 'fast scan' direction is
- *          left to right, and the 'slow scan' direction is from top
- *          to bottom.  Each boxa in the baa represents a sorted set
- *          of boxes from left to right.
- *      (2) Three passes are used to aggregate the boxas, which can correspond
- *          to characters or words in a line of text.  In pass 1, only
- *          taller components, which correspond to xheight or larger,
- *          are permitted to start a new boxa.  In pass 2, the remaining
- *          vertically-challenged components are allowed to join an
- *          existing boxa or start a new one.  In pass 3, boxa whose extent
- *          is overlapping are joined.  After that, the boxes in each
- *          boxa are sorted horizontally, and finally the boxa are
- *          sorted vertically.
- *      (3) If %delta1 > 0, the first pass allows aggregation when
- *          boxes in the same boxa do not overlap vertically.  In fact,
- *          %delta1 is the max distance by which they can miss and still
- *          be aggregated.  If %delta1 < 0, the box must have vertical
- *          overlap of at least abs(%delta1) with the boxa before it
- *          can be merged.  Similar for delta2 on the second pass.
- *      (4) On the first pass, any component of height less than minh1
- *          cannot start a new boxa; it's put aside for later insertion.
- *      (5) On the second pass, any small component that doesn't align
- *          with an existing boxa can start a new one.
- *      (6) This can be used to identify lines of text from
- *          character or word bounding boxes.
- *      (7) Typical values for the input parameters on 300 ppi text are:
- *                 delta1 ~ 0
- *                 delta2 ~ 0
- *                 minh1 ~ 5
- * 
- */ -BOXAA * -boxaSort2d(BOXA *boxas, - NUMAA **pnaad, - l_int32 delta1, - l_int32 delta2, - l_int32 minh1) -{ -l_int32 i, index, h, nt, ne, n, m, ival; -BOX *box; -BOXA *boxa, *boxae, *boxan, *boxa1, *boxa2, *boxa3, *boxav, *boxavs; -BOXAA *baa, *baa1, *baad; -NUMA *naindex, *nae, *nan, *nah, *nav, *na1, *na2, *nad, *namap; -NUMAA *naa, *naa1, *naad; - - PROCNAME("boxaSort2d"); - - if (pnaad) *pnaad = NULL; - if (!boxas) - return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL); - if (boxaGetCount(boxas) == 0) - return (BOXAA *)ERROR_PTR("boxas is empty", procName, NULL); - - /* Sort from left to right */ - if ((boxa = boxaSort(boxas, L_SORT_BY_X, L_SORT_INCREASING, &naindex)) - == NULL) - return (BOXAA *)ERROR_PTR("boxa not made", procName, NULL); - - /* First pass: assign taller boxes to boxa by row */ - nt = boxaGetCount(boxa); - baa = boxaaCreate(0); - naa = numaaCreate(0); - boxae = boxaCreate(0); /* save small height boxes here */ - nae = numaCreate(0); /* keep track of small height boxes */ - for (i = 0; i < nt; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - boxGetGeometry(box, NULL, NULL, NULL, &h); - if (h < minh1) { /* save for 2nd pass */ - boxaAddBox(boxae, box, L_INSERT); - numaAddNumber(nae, i); - } else { - n = boxaaGetCount(baa); - boxaaAlignBox(baa, box, delta1, &index); - if (index < n) { /* append to an existing boxa */ - boxaaAddBox(baa, index, box, L_INSERT); - } else { /* doesn't align, need new boxa */ - boxan = boxaCreate(0); - boxaAddBox(boxan, box, L_INSERT); - boxaaAddBoxa(baa, boxan, L_INSERT); - nan = numaCreate(0); - numaaAddNuma(naa, nan, L_INSERT); - } - numaGetIValue(naindex, i, &ival); - numaaAddNumber(naa, index, ival); - } - } - boxaDestroy(&boxa); - numaDestroy(&naindex); - - /* Second pass: feed in small height boxes */ - ne = boxaGetCount(boxae); - for (i = 0; i < ne; i++) { - box = boxaGetBox(boxae, i, L_CLONE); - n = boxaaGetCount(baa); - boxaaAlignBox(baa, box, delta2, &index); - if (index < n) { /* append to an existing boxa */ - boxaaAddBox(baa, index, box, L_INSERT); - } else { /* doesn't align, need new boxa */ - boxan = boxaCreate(0); - boxaAddBox(boxan, box, L_INSERT); - boxaaAddBoxa(baa, boxan, L_INSERT); - nan = numaCreate(0); - numaaAddNuma(naa, nan, L_INSERT); - } - numaGetIValue(nae, i, &ival); /* location in original boxas */ - numaaAddNumber(naa, index, ival); - } - - /* Third pass: merge some boxa whose extent is overlapping. - * Think of these boxa as text lines, where the bounding boxes - * of the text lines can overlap, but likely won't have - * a huge overlap. - * First do a greedy find of pairs of overlapping boxa, where - * the two boxa overlap by at least 50% of the smaller, and - * the smaller is not more than half the area of the larger. - * For such pairs, call the larger one the primary boxa. The - * boxes in the smaller one are appended to those in the primary - * in pass 3a, and the primaries are extracted in pass 3b. - * In this way, all boxes in the original baa are saved. */ - n = boxaaGetCount(baa); - boxaaGetExtent(baa, NULL, NULL, NULL, &boxa3); - boxa1 = boxaHandleOverlaps(boxa3, L_REMOVE_SMALL, 1000, 0.5, 0.5, &namap); - boxaDestroy(&boxa1); - boxaDestroy(&boxa3); - for (i = 0; i < n; i++) { /* Pass 3a: join selected copies of boxa */ - numaGetIValue(namap, i, &ival); - if (ival >= 0) { /* join current to primary boxa[ival] */ - boxa1 = boxaaGetBoxa(baa, i, L_COPY); - boxa2 = boxaaGetBoxa(baa, ival, L_CLONE); - boxaJoin(boxa2, boxa1, 0, -1); - boxaDestroy(&boxa2); - boxaDestroy(&boxa1); - na1 = numaaGetNuma(naa, i, L_COPY); - na2 = numaaGetNuma(naa, ival, L_CLONE); - numaJoin(na2, na1, 0, -1); - numaDestroy(&na1); - numaDestroy(&na2); - } - } - baa1 = boxaaCreate(n); - naa1 = numaaCreate(n); - for (i = 0; i < n; i++) { /* Pass 3b: save primary boxa */ - numaGetIValue(namap, i, &ival); - if (ival == -1) { - boxa1 = boxaaGetBoxa(baa, i, L_CLONE); - boxaaAddBoxa(baa1, boxa1, L_INSERT); - na1 = numaaGetNuma(naa, i, L_CLONE); - numaaAddNuma(naa1, na1, L_INSERT); - } - } - numaDestroy(&namap); - boxaaDestroy(&baa); - baa = baa1; - numaaDestroy(&naa); - naa = naa1; - - /* Sort the boxes in each boxa horizontally */ - m = boxaaGetCount(baa); - for (i = 0; i < m; i++) { - boxa1 = boxaaGetBoxa(baa, i, L_CLONE); - boxa2 = boxaSort(boxa1, L_SORT_BY_X, L_SORT_INCREASING, &nah); - boxaaReplaceBoxa(baa, i, boxa2); - na1 = numaaGetNuma(naa, i, L_CLONE); - na2 = numaSortByIndex(na1, nah); - numaaReplaceNuma(naa, i, na2); - boxaDestroy(&boxa1); - numaDestroy(&na1); - numaDestroy(&nah); - } - - /* Sort the boxa vertically within boxaa, using the first box - * in each boxa. */ - m = boxaaGetCount(baa); - boxav = boxaCreate(m); /* holds first box in each boxa in baa */ - naad = numaaCreate(m); - if (pnaad) - *pnaad = naad; - baad = boxaaCreate(m); - for (i = 0; i < m; i++) { - boxa1 = boxaaGetBoxa(baa, i, L_CLONE); - box = boxaGetBox(boxa1, 0, L_CLONE); - boxaAddBox(boxav, box, L_INSERT); - boxaDestroy(&boxa1); - } - boxavs = boxaSort(boxav, L_SORT_BY_Y, L_SORT_INCREASING, &nav); - for (i = 0; i < m; i++) { - numaGetIValue(nav, i, &index); - boxa = boxaaGetBoxa(baa, index, L_CLONE); - boxaaAddBoxa(baad, boxa, L_INSERT); - nad = numaaGetNuma(naa, index, L_CLONE); - numaaAddNuma(naad, nad, L_INSERT); - } - - -/* lept_stderr("box count = %d, numaa count = %d\n", nt, - numaaGetNumberCount(naad)); */ - - boxaaDestroy(&baa); - boxaDestroy(&boxav); - boxaDestroy(&boxavs); - boxaDestroy(&boxae); - numaDestroy(&nav); - numaDestroy(&nae); - numaaDestroy(&naa); - if (!pnaad) - numaaDestroy(&naad); - - return baad; -} - - -/*! - * \brief boxaSort2dByIndex() - * - * \param[in] boxas - * \param[in] naa numaa that maps from the new baa to the input boxa - * \return baa sorted boxaa, or NULL on error - */ -BOXAA * -boxaSort2dByIndex(BOXA *boxas, - NUMAA *naa) -{ -l_int32 ntot, boxtot, i, j, n, nn, index; -BOX *box; -BOXA *boxa; -BOXAA *baa; -NUMA *na; - - PROCNAME("boxaSort2dByIndex"); - - if (!boxas) - return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL); - if ((boxtot = boxaGetCount(boxas)) == 0) - return (BOXAA *)ERROR_PTR("boxas is empty", procName, NULL); - if (!naa) - return (BOXAA *)ERROR_PTR("naindex not defined", procName, NULL); - - /* Check counts */ - ntot = numaaGetNumberCount(naa); - if (ntot != boxtot) - return (BOXAA *)ERROR_PTR("element count mismatch", procName, NULL); - - n = numaaGetCount(naa); - baa = boxaaCreate(n); - for (i = 0; i < n; i++) { - na = numaaGetNuma(naa, i, L_CLONE); - nn = numaGetCount(na); - boxa = boxaCreate(nn); - for (j = 0; j < nn; j++) { - numaGetIValue(na, i, &index); - box = boxaGetBox(boxas, index, L_COPY); - boxaAddBox(boxa, box, L_INSERT); - } - boxaaAddBoxa(baa, boxa, L_INSERT); - numaDestroy(&na); - } - - return baa; -} - - -/*---------------------------------------------------------------------* - * Boxa array extraction * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaExtractAsNuma() - * - * \param[in] boxa - * \param[out] pnal [optional] array of left locations - * \param[out] pnat [optional] array of top locations - * \param[out] pnar [optional] array of right locations - * \param[out] pnab [optional] array of bottom locations - * \param[out] pnaw [optional] array of widths - * \param[out] pnah [optional] array of heights - * \param[in] keepinvalid 1 to keep invalid boxes; 0 to remove them - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If you are counting or sorting values, such as determining
- *          rank order, you must remove invalid boxes.
- *      (2) If you are parametrizing the values, or doing an evaluation
- *          where the position in the boxa sequence is important, you
- *          must replace the invalid boxes with valid ones before
- *          doing the extraction. This is easily done with boxaFillSequence().
- * 
- */ -l_ok -boxaExtractAsNuma(BOXA *boxa, - NUMA **pnal, - NUMA **pnat, - NUMA **pnar, - NUMA **pnab, - NUMA **pnaw, - NUMA **pnah, - l_int32 keepinvalid) -{ -l_int32 i, n, left, top, right, bot, w, h; - - PROCNAME("boxaExtractAsNuma"); - - if (!pnal && !pnat && !pnar && !pnab && !pnaw && !pnah) - return ERROR_INT("no output requested", procName, 1); - if (pnal) *pnal = NULL; - if (pnat) *pnat = NULL; - if (pnar) *pnar = NULL; - if (pnab) *pnab = NULL; - if (pnaw) *pnaw = NULL; - if (pnah) *pnah = NULL; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (!keepinvalid && boxaGetValidCount(boxa) == 0) - return ERROR_INT("no valid boxes", procName, 1); - - n = boxaGetCount(boxa); - if (pnal) *pnal = numaCreate(n); - if (pnat) *pnat = numaCreate(n); - if (pnar) *pnar = numaCreate(n); - if (pnab) *pnab = numaCreate(n); - if (pnaw) *pnaw = numaCreate(n); - if (pnah) *pnah = numaCreate(n); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h); - if (!keepinvalid && (w <= 0 || h <= 0)) - continue; - right = left + w - 1; - bot = top + h - 1; - if (pnal) numaAddNumber(*pnal, left); - if (pnat) numaAddNumber(*pnat, top); - if (pnar) numaAddNumber(*pnar, right); - if (pnab) numaAddNumber(*pnab, bot); - if (pnaw) numaAddNumber(*pnaw, w); - if (pnah) numaAddNumber(*pnah, h); - } - - return 0; -} - - -/*! - * \brief boxaExtractAsPta() - * - * \param[in] boxa - * \param[out] pptal [optional] array of left locations vs. index - * \param[out] pptat [optional] array of top locations vs. index - * \param[out] pptar [optional] array of right locations vs. index - * \param[out] pptab [optional] array of bottom locations vs. index - * \param[out] pptaw [optional] array of widths vs. index - * \param[out] pptah [optional] array of heights vs. index - * \param[in] keepinvalid 1 to keep invalid boxes; 0 to remove them - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) For most applications, such as counting, sorting, fitting
- *          to some parametrized form, plotting or filtering in general,
- *          you should remove the invalid boxes.  Each pta saves the
- *          box index in the x array, so replacing invalid boxes by
- *          filling with boxaFillSequence(), which is required for
- *          boxaExtractAsNuma(), is not necessary.
- *      (2) If invalid boxes are retained, each one will result in
- *          entries (typically 0) in all selected output pta.
- *      (3) Other boxa --> pta functions are:
- *          * boxaExtractCorners(): extracts any of the four corners as a pta.
- *          * boxaConvertToPta(): extracts sufficient number of corners
- *            to allow reconstruction of the original boxa from the pta.
- * 
- */ -l_ok -boxaExtractAsPta(BOXA *boxa, - PTA **pptal, - PTA **pptat, - PTA **pptar, - PTA **pptab, - PTA **pptaw, - PTA **pptah, - l_int32 keepinvalid) -{ -l_int32 i, n, left, top, right, bot, w, h; - - PROCNAME("boxaExtractAsPta"); - - if (!pptal && !pptar && !pptat && !pptab && !pptaw && !pptah) - return ERROR_INT("no output requested", procName, 1); - if (pptal) *pptal = NULL; - if (pptat) *pptat = NULL; - if (pptar) *pptar = NULL; - if (pptab) *pptab = NULL; - if (pptaw) *pptaw = NULL; - if (pptah) *pptah = NULL; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (!keepinvalid && boxaGetValidCount(boxa) == 0) - return ERROR_INT("no valid boxes", procName, 1); - - n = boxaGetCount(boxa); - if (pptal) *pptal = ptaCreate(n); - if (pptat) *pptat = ptaCreate(n); - if (pptar) *pptar = ptaCreate(n); - if (pptab) *pptab = ptaCreate(n); - if (pptaw) *pptaw = ptaCreate(n); - if (pptah) *pptah = ptaCreate(n); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h); - if (!keepinvalid && (w <= 0 || h <= 0)) - continue; - right = left + w - 1; - bot = top + h - 1; - if (pptal) ptaAddPt(*pptal, i, left); - if (pptat) ptaAddPt(*pptat, i, top); - if (pptar) ptaAddPt(*pptar, i, right); - if (pptab) ptaAddPt(*pptab, i, bot); - if (pptaw) ptaAddPt(*pptaw, i, w); - if (pptah) ptaAddPt(*pptah, i, h); - } - - return 0; -} - - -/*! - * \brief boxaExtractCorners() - * - * \param[in] boxa - * \param[in] loc L_UPPER_LEFT, L_UPPER_RIGHT, L_LOWER_LEFT, - * L_LOWER_RIGHT, L_BOX_CENTER - * \return pta of requested coordinates, or NULL on error - * - *
- * Notes:
- *      (1) Extracts (0,0) for invalid boxes.
- *      (2) Other boxa --> pta functions are:
- *          * boxaExtractAsPta(): allows extraction of any dimension
- *            and/or side location, with each in a separate pta.
- *          * boxaConvertToPta(): extracts sufficient number of corners
- *            to allow reconstruction of the original boxa from the pta.
- * 
- */ -PTA * -boxaExtractCorners(BOXA *boxa, - l_int32 loc) -{ -l_int32 i, n, left, top, right, bot, w, h; -PTA *pta; - - PROCNAME("boxaExtractCorners"); - - if (!boxa) - return (PTA *)ERROR_PTR("boxa not defined", procName, NULL); - if (loc != L_UPPER_LEFT && loc != L_UPPER_RIGHT && loc != L_LOWER_LEFT && - loc != L_LOWER_RIGHT && loc != L_BOX_CENTER) - return (PTA *)ERROR_PTR("invalid location", procName, NULL); - - n = boxaGetCount(boxa); - if ((pta = ptaCreate(n)) == NULL) - return (PTA *)ERROR_PTR("pta not made", procName, NULL); - - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h); - right = left + w - 1; - bot = top + h - 1; - if (w == 0 || h == 0) { /* invalid */ - left = 0; - top = 0; - right = 0; - bot = 0; - } - if (loc == L_UPPER_LEFT) - ptaAddPt(pta, left, top); - else if (loc == L_UPPER_RIGHT) - ptaAddPt(pta, right, top); - else if (loc == L_LOWER_LEFT) - ptaAddPt(pta, left, bot); - else if (loc == L_LOWER_RIGHT) - ptaAddPt(pta, right, bot); - else if (loc == L_BOX_CENTER) - ptaAddPt(pta, (left + right) / 2, (top + bot) / 2); - } - - return pta; -} - - -/*---------------------------------------------------------------------* - * Boxa statistics * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaGetRankVals() - * - * \param[in] boxa - * \param[in] fract use 0.0 for smallest, 1.0 for largest width and height - * \param[out] px [optional] rank value of x (left side) - * \param[out] py [optional] rank value of y (top side) - * \param[out] pr [optional] rank value of right side - * \param[out] pb [optional] rank value of bottom side - * \param[out] pw [optional] rank value of width - * \param[out] ph [optional] rank value of height - * \return 0 if OK, 1 on error or if the boxa is empty or has no valid boxes - * - *
- * Notes:
- *      (1) This function does not assume that all boxes in the boxa are valid
- *      (2) The six box parameters are sorted independently.
- *          For rank order, the width and height are sorted in increasing
- *          order.  But what does it mean to sort x and y in "rank order"?
- *          If the boxes are of comparable size and somewhat
- *          aligned (e.g., from multiple images), it makes some sense
- *          to give a "rank order" for x and y by sorting them in
- *          decreasing order.  (By the same argument, we choose to sort
- *          the r and b sides in increasing order.)  In general, the
- *          interpretation of a rank order on x and y (or on r and b)
- *          is highly application dependent.  In summary:
- *             ~ x and y are sorted in decreasing order
- *             ~ r and b are sorted in increasing order
- *             ~ w and h are sorted in increasing order
- * 
- */ -l_ok -boxaGetRankVals(BOXA *boxa, - l_float32 fract, - l_int32 *px, - l_int32 *py, - l_int32 *pr, - l_int32 *pb, - l_int32 *pw, - l_int32 *ph) -{ -l_float32 xval, yval, rval, bval, wval, hval; -NUMA *nax, *nay, *nar, *nab, *naw, *nah; - - PROCNAME("boxaGetRankVals"); - - if (px) *px = 0; - if (py) *py = 0; - if (pr) *pr = 0; - if (pb) *pb = 0; - if (pw) *pw = 0; - if (ph) *ph = 0; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (fract < 0.0 || fract > 1.0) - return ERROR_INT("fract not in [0.0 ... 1.0]", procName, 1); - if (boxaGetValidCount(boxa) == 0) - return ERROR_INT("no valid boxes in boxa", procName, 1); - - /* Use only the valid boxes */ - boxaExtractAsNuma(boxa, &nax, &nay, &nar, &nab, &naw, &nah, 0); - - if (px) { - numaGetRankValue(nax, 1.0 - fract, NULL, 1, &xval); - *px = (l_int32)xval; - } - if (py) { - numaGetRankValue(nay, 1.0 - fract, NULL, 1, &yval); - *py = (l_int32)yval; - } - if (pr) { - numaGetRankValue(nar, fract, NULL, 1, &rval); - *pr = (l_int32)rval; - } - if (pb) { - numaGetRankValue(nab, fract, NULL, 1, &bval); - *pb = (l_int32)bval; - } - if (pw) { - numaGetRankValue(naw, fract, NULL, 1, &wval); - *pw = (l_int32)wval; - } - if (ph) { - numaGetRankValue(nah, fract, NULL, 1, &hval); - *ph = (l_int32)hval; - } - numaDestroy(&nax); - numaDestroy(&nay); - numaDestroy(&nar); - numaDestroy(&nab); - numaDestroy(&naw); - numaDestroy(&nah); - return 0; -} - - -/*! - * \brief boxaGetMedianVals() - * - * \param[in] boxa - * \param[out] px [optional] median value of x (left side) - * \param[out] py [optional] median value of y (top side) - * \param[out] pr [optional] median value of right side - * \param[out] pb [optional] median value of bottom side - * \param[out] pw [optional] median value of width - * \param[out] ph [optional] median value of height - * \return 0 if OK, 1 on error or if the boxa is empty or has no valid boxes - * - *
- * Notes:
- *      (1) See boxaGetRankVals()
- * 
- */ -l_ok -boxaGetMedianVals(BOXA *boxa, - l_int32 *px, - l_int32 *py, - l_int32 *pr, - l_int32 *pb, - l_int32 *pw, - l_int32 *ph) -{ - PROCNAME("boxaGetMedianVals"); - - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (boxaGetValidCount(boxa) == 0) - return ERROR_INT("no valid boxes in boxa", procName, 1); - - return boxaGetRankVals(boxa, 0.5, px, py, pr, pb, pw, ph); -} - - -/*! - * \brief boxaGetAverageSize() - * - * \param[in] boxa - * \param[out] pw [optional] average width - * \param[out] ph [optional] average height - * \return 0 if OK, 1 on error or if the boxa is empty - */ -l_ok -boxaGetAverageSize(BOXA *boxa, - l_float32 *pw, - l_float32 *ph) -{ -l_int32 i, n, bw, bh; -l_float32 sumw, sumh; - - PROCNAME("boxaGetAverageSize"); - - if (pw) *pw = 0.0; - if (ph) *ph = 0.0; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if ((n = boxaGetCount(boxa)) == 0) - return ERROR_INT("boxa is empty", procName, 1); - - sumw = sumh = 0.0; - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh); - sumw += bw; - sumh += bh; - } - - if (pw) *pw = sumw / n; - if (ph) *ph = sumh / n; - return 0; -} - - -/*---------------------------------------------------------------------* - * Other Boxaa functions * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaaGetExtent() - * - * \param[in] baa - * \param[out] pw [optional] width - * \param[out] ph [optional] height - * \param[out] pbox [optional] minimum box containing all boxa - * in boxaa - * \param[out] pboxa [optional] boxa containing all boxes in each - * boxa in the boxaa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The returned w and h are the minimum size image
- *          that would contain all boxes untranslated.
- *      (2) Each box in the returned boxa is the minimum box required to
- *          hold all the boxes in the respective boxa of baa.
- *      (3) If there are no valid boxes in a boxa, the box corresponding
- *          to its extent has all fields set to 0 (an invalid box).
- * 
- */ -l_ok -boxaaGetExtent(BOXAA *baa, - l_int32 *pw, - l_int32 *ph, - BOX **pbox, - BOXA **pboxa) -{ -l_int32 i, n, x, y, w, h, xmax, ymax, xmin, ymin, found; -BOX *box1; -BOXA *boxa, *boxa1; - - PROCNAME("boxaaGetExtent"); - - if (!pw && !ph && !pbox && !pboxa) - return ERROR_INT("no ptrs defined", procName, 1); - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pbox) *pbox = NULL; - if (pboxa) *pboxa = NULL; - if (!baa) - return ERROR_INT("baa not defined", procName, 1); - - n = boxaaGetCount(baa); - if (n == 0) - return ERROR_INT("no boxa in baa", procName, 1); - - boxa = boxaCreate(n); - xmax = ymax = 0; - xmin = ymin = 100000000; - found = FALSE; - for (i = 0; i < n; i++) { - boxa1 = boxaaGetBoxa(baa, i, L_CLONE); - boxaGetExtent(boxa1, NULL, NULL, &box1); - boxaDestroy(&boxa1); - boxGetGeometry(box1, &x, &y, &w, &h); - if (w > 0 && h > 0) { /* a valid extent box */ - found = TRUE; /* found at least one valid extent box */ - xmin = L_MIN(xmin, x); - ymin = L_MIN(ymin, y); - xmax = L_MAX(xmax, x + w); - ymax = L_MAX(ymax, y + h); - } - boxaAddBox(boxa, box1, L_INSERT); - } - if (found == FALSE) /* no valid extent boxes */ - xmin = ymin = 0; - - if (pw) *pw = xmax; - if (ph) *ph = ymax; - if (pbox) - *pbox = boxCreate(xmin, ymin, xmax - xmin, ymax - ymin); - if (pboxa) - *pboxa = boxa; - else - boxaDestroy(&boxa); - return 0; -} - - -/*! - * \brief boxaaFlattenToBoxa() - * - * \param[in] baa - * \param[out] pnaindex [optional] the boxa index in the baa - * \param[in] copyflag L_COPY or L_CLONE - * \return boxa, or NULL on error - * - *
- * Notes:
- *      (1) This 'flattens' the baa to a boxa, taking the boxes in
- *          order in the first boxa, then the second, etc.
- *      (2) If a boxa is empty, we generate an invalid, placeholder box
- *          of zero size.  This is useful when converting from a baa
- *          where each boxa has either 0 or 1 boxes, and it is necessary
- *          to maintain a 1:1 correspondence between the initial
- *          boxa array and the resulting box array.
- *      (3) If &naindex is defined, we generate a Numa that gives, for
- *          each box in the baa, the index of the boxa to which it belongs.
- * 
- */ -BOXA * -boxaaFlattenToBoxa(BOXAA *baa, - NUMA **pnaindex, - l_int32 copyflag) -{ -l_int32 i, j, m, n; -BOXA *boxa, *boxat; -BOX *box; -NUMA *naindex; - - PROCNAME("boxaaFlattenToBoxa"); - - if (pnaindex) *pnaindex = NULL; - if (!baa) - return (BOXA *)ERROR_PTR("baa not defined", procName, NULL); - if (copyflag != L_COPY && copyflag != L_CLONE) - return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL); - if (pnaindex) { - naindex = numaCreate(0); - *pnaindex = naindex; - } - - n = boxaaGetCount(baa); - boxa = boxaCreate(n); - for (i = 0; i < n; i++) { - boxat = boxaaGetBoxa(baa, i, L_CLONE); - m = boxaGetCount(boxat); - if (m == 0) { /* placeholder box */ - box = boxCreate(0, 0, 0, 0); - boxaAddBox(boxa, box, L_INSERT); - if (pnaindex) - numaAddNumber(naindex, i); /* save 'row' number */ - } else { - for (j = 0; j < m; j++) { - box = boxaGetBox(boxat, j, copyflag); - boxaAddBox(boxa, box, L_INSERT); - if (pnaindex) - numaAddNumber(naindex, i); /* save 'row' number */ - } - } - boxaDestroy(&boxat); - } - - return boxa; -} - - -/*! - * \brief boxaaFlattenAligned() - * - * \param[in] baa - * \param[in] num number extracted from each - * \param[in] fillerbox [optional] that fills if necessary - * \param[in] copyflag L_COPY or L_CLONE - * \return boxa, or NULL on error - * - *
- * Notes:
- *      (1) This 'flattens' the baa to a boxa, taking the first %num
- *          boxes from each boxa.
- *      (2) In each boxa, if there are less than %num boxes, we preserve
- *          the alignment between the input baa and the output boxa
- *          by inserting one or more fillerbox(es) or, if %fillerbox == NULL,
- *          one or more invalid placeholder boxes.
- * 
- */ -BOXA * -boxaaFlattenAligned(BOXAA *baa, - l_int32 num, - BOX *fillerbox, - l_int32 copyflag) -{ -l_int32 i, j, m, n, mval, nshort; -BOXA *boxat, *boxad; -BOX *box; - - PROCNAME("boxaaFlattenAligned"); - - if (!baa) - return (BOXA *)ERROR_PTR("baa not defined", procName, NULL); - if (copyflag != L_COPY && copyflag != L_CLONE) - return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL); - - n = boxaaGetCount(baa); - boxad = boxaCreate(n); - for (i = 0; i < n; i++) { - boxat = boxaaGetBoxa(baa, i, L_CLONE); - m = boxaGetCount(boxat); - mval = L_MIN(m, num); - nshort = num - mval; - for (j = 0; j < mval; j++) { /* take the first %num if possible */ - box = boxaGetBox(boxat, j, copyflag); - boxaAddBox(boxad, box, L_INSERT); - } - for (j = 0; j < nshort; j++) { /* add fillers if necessary */ - if (fillerbox) { - boxaAddBox(boxad, fillerbox, L_COPY); - } else { - box = boxCreate(0, 0, 0, 0); /* invalid placeholder box */ - boxaAddBox(boxad, box, L_INSERT); - } - } - boxaDestroy(&boxat); - } - - return boxad; -} - - -/*! - * \brief boxaEncapsulateAligned() - * - * \param[in] boxa - * \param[in] num number put into each boxa in the baa - * \param[in] copyflag L_COPY or L_CLONE - * \return baa, or NULL on error - * - *
- * Notes:
- *      (1) This puts %num boxes from the input %boxa into each of a
- *          set of boxa within an output baa.
- *      (2) This assumes that the boxes in %boxa are in sets of %num each.
- * 
- */ -BOXAA * -boxaEncapsulateAligned(BOXA *boxa, - l_int32 num, - l_int32 copyflag) -{ -l_int32 i, j, n, nbaa, index; -BOX *box; -BOXA *boxat; -BOXAA *baa; - - PROCNAME("boxaEncapsulateAligned"); - - if (!boxa) - return (BOXAA *)ERROR_PTR("boxa not defined", procName, NULL); - if (copyflag != L_COPY && copyflag != L_CLONE) - return (BOXAA *)ERROR_PTR("invalid copyflag", procName, NULL); - - n = boxaGetCount(boxa); - nbaa = n / num; - if (num * nbaa != n) - L_ERROR("inconsistent alignment: num doesn't divide n\n", procName); - baa = boxaaCreate(nbaa); - for (i = 0, index = 0; i < nbaa; i++) { - boxat = boxaCreate(num); - for (j = 0; j < num; j++, index++) { - box = boxaGetBox(boxa, index, copyflag); - boxaAddBox(boxat, box, L_INSERT); - } - boxaaAddBoxa(baa, boxat, L_INSERT); - } - - return baa; -} - - -/*! - * \brief boxaaTranspose() - * - * \param[in] baas - * \return baad, or NULL on error - * - *
- * Notes:
- *      (1) If you think of a boxaa as a 2D array of boxes that is accessed
- *          row major, then each row is represented by one of the boxa.
- *          This function creates a new boxaa related to the input boxaa
- *          as a column major traversal of the input boxaa.
- *      (2) For example, if %baas has 2 boxa, each with 10 boxes, then
- *          %baad will have 10 boxa, each with 2 boxes.
- *      (3) Require for this transpose operation that each boxa in
- *          %baas has the same number of boxes.  This operation is useful
- *          when the i-th boxes in each boxa are meaningfully related.
- * 
- */ -BOXAA * -boxaaTranspose(BOXAA *baas) -{ -l_int32 i, j, ny, nb, nbox; -BOX *box; -BOXA *boxa; -BOXAA *baad; - - PROCNAME("boxaaTranspose"); - - if (!baas) - return (BOXAA *)ERROR_PTR("baas not defined", procName, NULL); - if ((ny = boxaaGetCount(baas)) == 0) - return (BOXAA *)ERROR_PTR("baas empty", procName, NULL); - - /* Make sure that each boxa in baas has the same number of boxes */ - for (i = 0; i < ny; i++) { - if ((boxa = boxaaGetBoxa(baas, i, L_CLONE)) == NULL) - return (BOXAA *)ERROR_PTR("baas is missing a boxa", procName, NULL); - nb = boxaGetCount(boxa); - boxaDestroy(&boxa); - if (i == 0) - nbox = nb; - else if (nb != nbox) - return (BOXAA *)ERROR_PTR("boxa are not all the same size", - procName, NULL); - } - - /* baad[i][j] = baas[j][i] */ - baad = boxaaCreate(nbox); - for (i = 0; i < nbox; i++) { - boxa = boxaCreate(ny); - for (j = 0; j < ny; j++) { - box = boxaaGetBox(baas, j, i, L_COPY); - boxaAddBox(boxa, box, L_INSERT); - } - boxaaAddBoxa(baad, boxa, L_INSERT); - } - return baad; -} - - -/*! - * \brief boxaaAlignBox() - * - * \param[in] baa - * \param[in] box to be aligned with bext boxa in the baa, if possible - * \param[in] delta amount by which consecutive components can miss - * in overlap and still be included in the array - * \param[out] pindex index of boxa with best overlap, or if none match, - * this is the index of the next boxa to be generated - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is not greedy.  It finds the boxa whose vertical
- *          extent has the closest overlap with the input box.
- * 
- */ -l_ok -boxaaAlignBox(BOXAA *baa, - BOX *box, - l_int32 delta, - l_int32 *pindex) -{ -l_int32 i, n, m, y, yt, h, ht, ovlp, maxovlp, maxindex; -BOX *boxt; -BOXA *boxa; - - PROCNAME("boxaaAlignBox"); - - if (pindex) *pindex = 0; - if (!baa) - return ERROR_INT("baa not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - - n = boxaaGetCount(baa); - boxGetGeometry(box, NULL, &y, NULL, &h); - maxovlp = -10000000; - for (i = 0; i < n; i++) { - boxa = boxaaGetBoxa(baa, i, L_CLONE); - if ((m = boxaGetCount(boxa)) == 0) { - boxaDestroy(&boxa); - L_WARNING("no boxes in boxa\n", procName); - continue; - } - boxaGetExtent(boxa, NULL, NULL, &boxt); - boxGetGeometry(boxt, NULL, &yt, NULL, &ht); - boxDestroy(&boxt); - boxaDestroy(&boxa); - - /* Overlap < 0 means the components do not overlap vertically */ - if (yt >= y) - ovlp = y + h - 1 - yt; - else - ovlp = yt + ht - 1 - y; - if (ovlp > maxovlp) { - maxovlp = ovlp; - maxindex = i; - } - } - - if (maxovlp + delta >= 0) - *pindex = maxindex; - else - *pindex = n; - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxfunc3.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxfunc3.c deleted file mode 100644 index f0da183f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxfunc3.c +++ /dev/null @@ -1,1629 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file boxfunc3.c - *
- *
- *      Boxa/Boxaa painting into pix
- *           PIX             *pixMaskConnComp()
- *           PIX             *pixMaskBoxa()
- *           PIX             *pixPaintBoxa()
- *           PIX             *pixSetBlackOrWhiteBoxa()
- *           PIX             *pixPaintBoxaRandom()
- *           PIX             *pixBlendBoxaRandom()
- *           PIX             *pixDrawBoxa()
- *           PIX             *pixDrawBoxaRandom()
- *           PIX             *boxaaDisplay()
- *           PIXA            *pixaDisplayBoxaa()
- *
- *      Split mask components into Boxa
- *           BOXA            *pixSplitIntoBoxa()
- *           BOXA            *pixSplitComponentIntoBoxa()
- *           static l_int32   pixSearchForRectangle()
- *
- *      Represent horizontal or vertical mosaic strips
- *           BOXA            *makeMosaicStrips()
- *
- *      Comparison between boxa
- *           l_int32          boxaCompareRegions()
- *
- *      Reliable selection of a single large box
- *           BOX             *pixSelectLargeULComp()
- *           BOX             *boxaSelectLargeULBox()
- *
- *  See summary in pixPaintBoxa() of various ways to paint and draw
- *  boxes on images.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -static l_int32 pixSearchForRectangle(PIX *pixs, BOX *boxs, l_int32 minsum, - l_int32 skipdist, l_int32 delta, - l_int32 maxbg, l_int32 sideflag, - BOXA *boxat, NUMA *nascore); - -#ifndef NO_CONSOLE_IO -#define DEBUG_SPLIT 0 -#endif /* ~NO_CONSOLE_IO */ - -/*---------------------------------------------------------------------* - * Boxa/Boxaa painting into Pix * - *---------------------------------------------------------------------*/ -/*! - * \brief pixMaskConnComp() - * - * \param[in] pixs 1 bpp - * \param[in] connectivity 4 or 8 - * \param[out] pboxa [optional] bounding boxes of c.c. - * \return pixd 1 bpp mask over the c.c., or NULL on error - * - *
- * Notes:
- *      (1) This generates a mask image with ON pixels over the
- *          b.b. of the c.c. in pixs.  If there are no ON pixels in pixs,
- *          pixd will also have no ON pixels.
- * 
- */ -PIX * -pixMaskConnComp(PIX *pixs, - l_int32 connectivity, - BOXA **pboxa) -{ -BOXA *boxa; -PIX *pixd; - - PROCNAME("pixMaskConnComp"); - - if (pboxa) *pboxa = NULL; - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - - boxa = pixConnComp(pixs, NULL, connectivity); - pixd = pixCreateTemplate(pixs); - if (boxaGetCount(boxa) != 0) - pixMaskBoxa(pixd, pixd, boxa, L_SET_PIXELS); - if (pboxa) - *pboxa = boxa; - else - boxaDestroy(&boxa); - return pixd; -} - - -/*! - * \brief pixMaskBoxa() - * - * \param[in] pixd [optional] may be NULL - * \param[in] pixs any depth; not cmapped - * \param[in] boxa of boxes, to paint - * \param[in] op L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS - * \return pixd with masking op over the boxes, or NULL on error - * - *
- * Notes:
- *      (1) This can be used with:
- *              pixd = NULL  (makes a new pixd)
- *              pixd = pixs  (in-place)
- *      (2) If pixd == NULL, this first makes a copy of pixs, and then
- *          bit-twiddles over the boxes.  Otherwise, it operates directly
- *          on pixs.
- *      (3) This simple function is typically used with 1 bpp images.
- *          It uses the 1-image rasterop function, rasteropUniLow(),
- *          to set, clear or flip the pixels in pixd.
- *      (4) If you want to generate a 1 bpp mask of ON pixels from the boxes
- *          in a Boxa, in a pix of size (w,h):
- *              pix = pixCreate(w, h, 1);
- *              pixMaskBoxa(pix, pix, boxa, L_SET_PIXELS);
- * 
- */ -PIX * -pixMaskBoxa(PIX *pixd, - PIX *pixs, - BOXA *boxa, - l_int32 op) -{ -l_int32 i, n, x, y, w, h; -BOX *box; - - PROCNAME("pixMaskBoxa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs is cmapped", procName, NULL); - if (pixd && (pixd != pixs)) - return (PIX *)ERROR_PTR("if pixd, must be in-place", procName, NULL); - if (!boxa) - return (PIX *)ERROR_PTR("boxa not defined", procName, NULL); - if (op != L_SET_PIXELS && op != L_CLEAR_PIXELS && op != L_FLIP_PIXELS) - return (PIX *)ERROR_PTR("invalid op", procName, NULL); - - pixd = pixCopy(pixd, pixs); - if ((n = boxaGetCount(boxa)) == 0) { - L_WARNING("no boxes to mask\n", procName); - return pixd; - } - - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - boxGetGeometry(box, &x, &y, &w, &h); - if (op == L_SET_PIXELS) - pixRasterop(pixd, x, y, w, h, PIX_SET, NULL, 0, 0); - else if (op == L_CLEAR_PIXELS) - pixRasterop(pixd, x, y, w, h, PIX_CLR, NULL, 0, 0); - else /* op == L_FLIP_PIXELS */ - pixRasterop(pixd, x, y, w, h, PIX_NOT(PIX_DST), NULL, 0, 0); - boxDestroy(&box); - } - - return pixd; -} - - -/*! - * \brief pixPaintBoxa() - * - * \param[in] pixs any depth, can be cmapped - * \param[in] boxa of boxes, to paint - * \param[in] val rgba color to paint - * \return pixd with painted boxes, or NULL on error - * - *
- * Notes:
- *      (1) If pixs is 1 bpp or is colormapped, it is converted to 8 bpp
- *          and the boxa is painted using a colormap; otherwise,
- *          it is converted to 32 bpp rgb.
- *      (2) There are several ways to display a box on an image:
- *            * Paint it as a solid color
- *            * Draw the outline
- *            * Blend the outline or region with the existing image
- *          We provide painting and drawing here; blending is in blend.c.
- *          When painting or drawing, the result can be either a
- *          cmapped image or an rgb image.  The dest will be cmapped
- *          if the src is either 1 bpp or has a cmap that is not full.
- *          To force RGB output, use pixConvertTo8(pixs, FALSE)
- *          before calling any of these paint and draw functions.
- * 
- */ -PIX * -pixPaintBoxa(PIX *pixs, - BOXA *boxa, - l_uint32 val) -{ -l_int32 i, n, d, rval, gval, bval, newindex; -l_int32 mapvacancy; /* true only if cmap and not full */ -BOX *box; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixPaintBoxa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!boxa) - return (PIX *)ERROR_PTR("boxa not defined", procName, NULL); - - if ((n = boxaGetCount(boxa)) == 0) { - L_WARNING("no boxes to paint; returning a copy\n", procName); - return pixCopy(NULL, pixs); - } - - mapvacancy = FALSE; - if ((cmap = pixGetColormap(pixs)) != NULL) { - if (pixcmapGetCount(cmap) < 256) - mapvacancy = TRUE; - } - if (pixGetDepth(pixs) == 1 || mapvacancy) - pixd = pixConvertTo8(pixs, TRUE); - else - pixd = pixConvertTo32(pixs); - if (!pixd) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - - d = pixGetDepth(pixd); - if (d == 8) { /* colormapped */ - cmap = pixGetColormap(pixd); - extractRGBValues(val, &rval, &gval, &bval); - if (pixcmapAddNewColor(cmap, rval, gval, bval, &newindex)) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("cmap full; can't add", procName, NULL); - } - } - - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - if (d == 8) - pixSetInRectArbitrary(pixd, box, newindex); - else - pixSetInRectArbitrary(pixd, box, val); - boxDestroy(&box); - } - - return pixd; -} - - -/*! - * \brief pixSetBlackOrWhiteBoxa() - * - * \param[in] pixs any depth, can be cmapped - * \param[in] boxa [optional] of boxes, to clear or set - * \param[in] op L_SET_BLACK, L_SET_WHITE - * \return pixd with boxes filled with white or black, or NULL on error - */ -PIX * -pixSetBlackOrWhiteBoxa(PIX *pixs, - BOXA *boxa, - l_int32 op) -{ -l_int32 i, n, d, index; -l_uint32 color; -BOX *box; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixSetBlackOrWhiteBoxa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!boxa) - return pixCopy(NULL, pixs); - if ((n = boxaGetCount(boxa)) == 0) - return pixCopy(NULL, pixs); - - pixd = pixCopy(NULL, pixs); - d = pixGetDepth(pixd); - if (d == 1) { - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - if (op == L_SET_WHITE) - pixClearInRect(pixd, box); - else - pixSetInRect(pixd, box); - boxDestroy(&box); - } - return pixd; - } - - cmap = pixGetColormap(pixs); - if (cmap) { - color = (op == L_SET_WHITE) ? 1 : 0; - pixcmapAddBlackOrWhite(cmap, color, &index); - } else if (d == 8) { - color = (op == L_SET_WHITE) ? 0xff : 0x0; - } else if (d == 32) { - color = (op == L_SET_WHITE) ? 0xffffff00 : 0x0; - } else if (d == 2) { - color = (op == L_SET_WHITE) ? 0x3 : 0x0; - } else if (d == 4) { - color = (op == L_SET_WHITE) ? 0xf : 0x0; - } else if (d == 16) { - color = (op == L_SET_WHITE) ? 0xffff : 0x0; - } else { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("invalid depth", procName, NULL); - } - - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - if (cmap) - pixSetInRectArbitrary(pixd, box, index); - else - pixSetInRectArbitrary(pixd, box, color); - boxDestroy(&box); - } - - return pixd; -} - - -/*! - * \brief pixPaintBoxaRandom() - * - * \param[in] pixs any depth, can be cmapped - * \param[in] boxa of boxes, to paint - * \return pixd with painted boxes, or NULL on error - * - *
- * Notes:
- *      (1) If pixs is 1 bpp, we paint the boxa using a colormap;
- *          otherwise, we convert to 32 bpp.
- *      (2) We use up to 254 different colors for painting the regions.
- *      (3) If boxes overlap, the later ones paint over earlier ones.
- * 
- */ -PIX * -pixPaintBoxaRandom(PIX *pixs, - BOXA *boxa) -{ -l_int32 i, n, d, rval, gval, bval, index; -l_uint32 val; -BOX *box; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixPaintBoxaRandom"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!boxa) - return (PIX *)ERROR_PTR("boxa not defined", procName, NULL); - - if ((n = boxaGetCount(boxa)) == 0) { - L_WARNING("no boxes to paint; returning a copy\n", procName); - return pixCopy(NULL, pixs); - } - - if (pixGetDepth(pixs) == 1) - pixd = pixConvert1To8(NULL, pixs, 255, 0); - else - pixd = pixConvertTo32(pixs); - if (!pixd) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - - cmap = pixcmapCreateRandom(8, 1, 1); - d = pixGetDepth(pixd); /* either 8 or 32 */ - if (d == 8) /* colormapped */ - pixSetColormap(pixd, cmap); - - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - index = 1 + (i % 254); - if (d == 8) { - pixSetInRectArbitrary(pixd, box, index); - } else { /* d == 32 */ - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, &val); - pixSetInRectArbitrary(pixd, box, val); - } - boxDestroy(&box); - } - - if (d == 32) - pixcmapDestroy(&cmap); - return pixd; -} - - -/*! - * \brief pixBlendBoxaRandom() - * - * \param[in] pixs any depth; can be cmapped - * \param[in] boxa of boxes, to blend/paint - * \param[in] fract of box color to use - * \return pixd 32 bpp, with blend/painted boxes, or NULL on error - * - *
- * Notes:
- *      (1) pixs is converted to 32 bpp.
- *      (2) This differs from pixPaintBoxaRandom(), in that the
- *          colors here are blended with the color of pixs.
- *      (3) We use up to 254 different colors for painting the regions.
- *      (4) If boxes overlap, the final color depends only on the last
- *          rect that is used.
- * 
- */ -PIX * -pixBlendBoxaRandom(PIX *pixs, - BOXA *boxa, - l_float32 fract) -{ -l_int32 i, n, rval, gval, bval, index; -l_uint32 val; -BOX *box; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixBlendBoxaRandom"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!boxa) - return (PIX *)ERROR_PTR("boxa not defined", procName, NULL); - if (fract < 0.0 || fract > 1.0) { - L_WARNING("fract must be in [0.0, 1.0]; setting to 0.5\n", procName); - fract = 0.5; - } - - if ((n = boxaGetCount(boxa)) == 0) { - L_WARNING("no boxes to paint; returning a copy\n", procName); - return pixCopy(NULL, pixs); - } - - if ((pixd = pixConvertTo32(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not defined", procName, NULL); - - cmap = pixcmapCreateRandom(8, 1, 1); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - index = 1 + (i % 254); - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, &val); - pixBlendInRect(pixd, box, val, fract); - boxDestroy(&box); - } - - pixcmapDestroy(&cmap); - return pixd; -} - - -/*! - * \brief pixDrawBoxa() - * - * \param[in] pixs any depth; can be cmapped - * \param[in] boxa of boxes, to draw - * \param[in] width of lines - * \param[in] val rgba color to draw - * \return pixd with outlines of boxes added, or NULL on error - * - *
- * Notes:
- *      (1) If pixs is 1 bpp or is colormapped, it is converted to 8 bpp
- *          and the boxa is drawn using a colormap; otherwise,
- *          it is converted to 32 bpp rgb.
- * 
- */ -PIX * -pixDrawBoxa(PIX *pixs, - BOXA *boxa, - l_int32 width, - l_uint32 val) -{ -l_int32 rval, gval, bval, newindex; -l_int32 mapvacancy; /* true only if cmap and not full */ -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixDrawBoxa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!boxa) - return (PIX *)ERROR_PTR("boxa not defined", procName, NULL); - if (width < 1) - return (PIX *)ERROR_PTR("width must be >= 1", procName, NULL); - - if (boxaGetCount(boxa) == 0) { - L_WARNING("no boxes to draw; returning a copy\n", procName); - return pixCopy(NULL, pixs); - } - - mapvacancy = FALSE; - if ((cmap = pixGetColormap(pixs)) != NULL) { - if (pixcmapGetCount(cmap) < 256) - mapvacancy = TRUE; - } - if (pixGetDepth(pixs) == 1 || mapvacancy) - pixd = pixConvertTo8(pixs, TRUE); - else - pixd = pixConvertTo32(pixs); - if (!pixd) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - - extractRGBValues(val, &rval, &gval, &bval); - if (pixGetDepth(pixd) == 8) { /* colormapped */ - cmap = pixGetColormap(pixd); - pixcmapAddNewColor(cmap, rval, gval, bval, &newindex); - } - - pixRenderBoxaArb(pixd, boxa, width, rval, gval, bval); - return pixd; -} - - -/*! - * \brief pixDrawBoxaRandom() - * - * \param[in] pixs any depth, can be cmapped - * \param[in] boxa of boxes, to draw - * \param[in] width thickness of line - * \return pixd with box outlines drawn, or NULL on error - * - *
- * Notes:
- *      (1) If pixs is 1 bpp, we draw the boxa using a colormap;
- *          otherwise, we convert to 32 bpp.
- *      (2) We use up to 254 different colors for drawing the boxes.
- *      (3) If boxes overlap, the later ones draw over earlier ones.
- * 
- */ -PIX * -pixDrawBoxaRandom(PIX *pixs, - BOXA *boxa, - l_int32 width) -{ -l_int32 i, n, rval, gval, bval, index; -BOX *box; -PIX *pixd; -PIXCMAP *cmap; -PTAA *ptaa; - - PROCNAME("pixDrawBoxaRandom"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!boxa) - return (PIX *)ERROR_PTR("boxa not defined", procName, NULL); - if (width < 1) - return (PIX *)ERROR_PTR("width must be >= 1", procName, NULL); - - if ((n = boxaGetCount(boxa)) == 0) { - L_WARNING("no boxes to draw; returning a copy\n", procName); - return pixCopy(NULL, pixs); - } - - /* Input depth = 1 bpp; generate cmapped output */ - if (pixGetDepth(pixs) == 1) { - ptaa = generatePtaaBoxa(boxa); - pixd = pixRenderRandomCmapPtaa(pixs, ptaa, 1, width, 1); - ptaaDestroy(&ptaa); - return pixd; - } - - /* Generate rgb output */ - pixd = pixConvertTo32(pixs); - cmap = pixcmapCreateRandom(8, 1, 1); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - index = 1 + (i % 254); - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - pixRenderBoxArb(pixd, box, width, rval, gval, bval); - boxDestroy(&box); - } - pixcmapDestroy(&cmap); - return pixd; -} - - -/*! - * \brief boxaaDisplay() - * - * \param[in] pixs [optional] 1 bpp - * \param[in] baa boxaa, typically from a 2d sort - * \param[in] linewba line width to display outline of each boxa - * \param[in] linewb line width to display outline of each box - * \param[in] colorba color to display boxa - * \param[in] colorb color to display box - * \param[in] w width of outupt pix; use 0 if determined by %pixs or %baa - * \param[in] h height of outupt pix; use 0 if determined by %pixs or %baa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %pixs exists, this renders the boxes over an 8 bpp version
- *          of it.  Otherwise, it renders the boxes over an empty image
- *          with a white background.
- *      (2) If %pixs exists, the dimensions of %pixd are the same,
- *          and input values of %w and %h are ignored.
- *          If %pixs is NULL, the dimensions of %pixd are determined by
- *            - %w and %h if both are > 0, or
- *            - the minimum size required using all boxes in %baa.
- *
- * 
- */ -PIX * -boxaaDisplay(PIX *pixs, - BOXAA *baa, - l_int32 linewba, - l_int32 linewb, - l_uint32 colorba, - l_uint32 colorb, - l_int32 w, - l_int32 h) -{ -l_int32 i, j, n, m, rbox, gbox, bbox, rboxa, gboxa, bboxa; -BOX *box; -BOXA *boxa; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("boxaaDisplay"); - - if (!baa) - return (PIX *)ERROR_PTR("baa not defined", procName, NULL); - - if (w <= 0 || h <= 0) { - if (pixs) - pixGetDimensions(pixs, &w, &h, NULL); - else - boxaaGetExtent(baa, &w, &h, NULL, NULL); - } - - if (pixs) { - pixd = pixConvertTo8(pixs, 1); - cmap = pixGetColormap(pixd); - } else { - pixd = pixCreate(w, h, 8); - cmap = pixcmapCreate(8); - pixSetColormap(pixd, cmap); - pixcmapAddColor(cmap, 255, 255, 255); - } - extractRGBValues(colorb, &rbox, &gbox, &bbox); - extractRGBValues(colorba, &rboxa, &gboxa, &bboxa); - pixcmapAddColor(cmap, rbox, gbox, bbox); - pixcmapAddColor(cmap, rboxa, gboxa, bboxa); - - n = boxaaGetCount(baa); - for (i = 0; i < n; i++) { - boxa = boxaaGetBoxa(baa, i, L_CLONE); - boxaGetExtent(boxa, NULL, NULL, &box); - pixRenderBoxArb(pixd, box, linewba, rboxa, gboxa, bboxa); - boxDestroy(&box); - m = boxaGetCount(boxa); - for (j = 0; j < m; j++) { - box = boxaGetBox(boxa, j, L_CLONE); - pixRenderBoxArb(pixd, box, linewb, rbox, gbox, bbox); - boxDestroy(&box); - } - boxaDestroy(&boxa); - } - - return pixd; -} - - -/*! - * \brief pixaDisplayBoxaa() - * - * \param[in] pixas any depth, can be cmapped - * \param[in] baa boxes to draw on input pixa - * \param[in] colorflag L_DRAW_RED, L_DRAW_GREEN, etc - * \param[in] width thickness of lines - * \return pixa with box outlines drawn on each pix, or NULL on error - * - *
- * Notes:
- *      (1) All pix in %pixas that are not rgb are converted to rgb.
- *      (2) Each boxa in %baa contains boxes that will be drawn on
- *          the corresponding pix in %pixas.
- *      (3) The color of the boxes drawn on each pix are selected with
- *          %colorflag:
- *            * For red, green or blue: use L_DRAW_RED, etc.
- *            * For sequential r, g, b: use L_DRAW_RGB
- *            * For random colors: use L_DRAW_RANDOM
- * 
- */ -PIXA * -pixaDisplayBoxaa(PIXA *pixas, - BOXAA *baa, - l_int32 colorflag, - l_int32 width) -{ -l_int32 i, j, nba, n, nbox, rval, gval, bval; -l_uint32 color; -l_uint32 colors[255]; -BOXA *boxa; -BOX *box; -PIX *pix; -PIXA *pixad; - - PROCNAME("pixaDisplayBoxaa"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (!baa) - return (PIXA *)ERROR_PTR("baa not defined", procName, NULL); - if (width < 1) - return (PIXA *)ERROR_PTR("width must be >= 1", procName, NULL); - if ((nba = boxaaGetCount(baa)) < 1) - return (PIXA *)ERROR_PTR("no boxa in baa", procName, NULL); - if ((n = pixaGetCount(pixas)) == 0) - return (PIXA *)ERROR_PTR("no pix in pixas", procName, NULL); - if (n != nba) - return (PIXA *)ERROR_PTR("num pix != num boxa", procName, NULL); - if (colorflag == L_DRAW_RED) - color = 0xff000000; - else if (colorflag == L_DRAW_GREEN) - color = 0x00ff0000; - else if (colorflag == L_DRAW_BLUE) - color = 0x0000ff00; - else if (colorflag == L_DRAW_RGB) - color = 0x000000ff; - else if (colorflag == L_DRAW_RANDOM) - color = 0x00000000; - else - return (PIXA *)ERROR_PTR("invalid colorflag", procName, NULL); - - if (colorflag == L_DRAW_RED || colorflag == L_DRAW_GREEN || - colorflag == L_DRAW_BLUE) { - for (i = 0; i < 255; i++) - colors[i] = color; - } else if (colorflag == L_DRAW_RGB) { - for (i = 0; i < 255; i++) { - if (i % 3 == L_DRAW_RED) - colors[i] = 0xff000000; - else if (i % 3 == L_DRAW_GREEN) - colors[i] = 0x00ff0000; - else /* i % 3 == L_DRAW_BLUE) */ - colors[i] = 0x0000ff00; - } - } else if (colorflag == L_DRAW_RANDOM) { - for (i = 0; i < 255; i++) { - rval = (l_uint32)rand() & 0xff; - gval = (l_uint32)rand() & 0xff; - bval = (l_uint32)rand() & 0xff; - composeRGBPixel(rval, gval, bval, &colors[i]); - } - } - - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixas, i, L_COPY); - boxa = boxaaGetBoxa(baa, i, L_CLONE); - nbox = boxaGetCount(boxa); - for (j = 0; j < nbox; j++) { - box = boxaGetBox(boxa, j, L_CLONE); - extractRGBValues(colors[j % 255], &rval, &gval, &bval); - pixRenderBoxArb(pix, box, width, rval, gval, bval); - boxDestroy(&box); - } - boxaDestroy(&boxa); - pixaAddPix(pixad, pix, L_INSERT); - } - - return pixad; -} - - -/*---------------------------------------------------------------------* - * Split mask components into Boxa * - *---------------------------------------------------------------------*/ -/*! - * \brief pixSplitIntoBoxa() - * - * \param[in] pixs 1 bpp - * \param[in] minsum minimum pixels to trigger propagation - * \param[in] skipdist distance before computing sum for propagation - * \param[in] delta difference required to stop propagation - * \param[in] maxbg maximum number of allowed bg pixels in ref scan - * \param[in] maxcomps use 0 for unlimited number of subdivided components - * \param[in] remainder set to 1 to get b.b. of remaining stuff - * \return boxa of rectangles covering the fg of pixs, or NULL on error - * - *
- * Notes:
- *      (1) This generates a boxa of rectangles that covers
- *          the fg of a mask.  For each 8-connected component in pixs,
- *          it does a greedy partitioning, choosing the largest
- *          rectangle found from each of the four directions at each iter.
- *          See pixSplitComponentIntoBoxa() for details.
- *      (2) The input parameters give some flexibility for boundary
- *          noise.  The resulting set of rectangles may cover some
- *          bg pixels.
- *      (3) This should be used when there are a small number of
- *          mask components, each of which has sides that are close
- *          to horizontal and vertical.  The input parameters %delta
- *          and %maxbg determine whether or not holes in the mask are covered.
- *      (4) The parameter %maxcomps gives the maximum number of allowed
- *          rectangles extracted from any single connected component.
- *          Use 0 if no limit is to be applied.
- *      (5) The flag %remainder specifies whether we take a final bounding
- *          box for anything left after the maximum number of allowed
- *          rectangle is extracted.
- * 
- */ -BOXA * -pixSplitIntoBoxa(PIX *pixs, - l_int32 minsum, - l_int32 skipdist, - l_int32 delta, - l_int32 maxbg, - l_int32 maxcomps, - l_int32 remainder) -{ -l_int32 i, n; -BOX *box; -BOXA *boxa, *boxas, *boxad; -PIX *pix; -PIXA *pixas; - - PROCNAME("pixSplitIntoBoxa"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (BOXA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - - boxas = pixConnComp(pixs, &pixas, 8); - n = boxaGetCount(boxas); - boxad = boxaCreate(0); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixas, i, L_CLONE); - box = boxaGetBox(boxas, i, L_CLONE); - boxa = pixSplitComponentIntoBoxa(pix, box, minsum, skipdist, - delta, maxbg, maxcomps, remainder); - boxaJoin(boxad, boxa, 0, -1); - pixDestroy(&pix); - boxDestroy(&box); - boxaDestroy(&boxa); - } - - pixaDestroy(&pixas); - boxaDestroy(&boxas); - return boxad; -} - - -/*! - * \brief pixSplitComponentIntoBoxa() - * - * \param[in] pix 1 bpp - * \param[in] box [optional] location of pix w/rt an origin - * \param[in] minsum minimum pixels to trigger propagation - * \param[in] skipdist distance before computing sum for propagation - * \param[in] delta difference required to stop propagation - * \param[in] maxbg maximum number of allowed bg pixels in ref scan - * \param[in] maxcomps use 0 for unlimited number of subdivided components - * \param[in] remainder set to 1 to get b.b. of remaining stuff - * \return boxa of rectangles covering the fg of pix, or NULL on error - * - *
- * Notes:
- *      (1) This generates a boxa of rectangles that covers
- *          the fg of a mask.  It does so by a greedy partitioning of
- *          the mask, choosing the largest rectangle found from
- *          each of the four directions at each step.
- *      (2) The input parameters give some flexibility for boundary
- *          noise.  The resulting set of rectangles must cover all
- *          the fg pixels and, in addition, may cover some bg pixels.
- *          Using small input parameters on a noiseless mask (i.e., one
- *          that has only large vertical and horizontal edges) will
- *          result in a proper covering of only the fg pixels of the mask.
- *      (3) The input is assumed to be a single connected component, that
- *          may have holes.  From each side, sweep inward, counting
- *          the pixels.  If the count becomes greater than %minsum,
- *          and we have moved forward a further amount %skipdist,
- *          record that count ('countref'), but don't accept if the scan
- *          contains more than %maxbg bg pixels.  Continue the scan
- *          until we reach a count that differs from countref by at
- *          least %delta, at which point the propagation stops.  The box
- *          swept out gets a score, which is the sum of fg pixels
- *          minus a penalty.  The penalty is the number of bg pixels
- *          in the box.  This is done from all four sides, and the
- *          side with the largest score is saved as a rectangle.
- *          The process repeats until there is either no rectangle
- *          left, or there is one that can't be captured from any
- *          direction.  For the latter case, we simply accept the
- *          last rectangle.
- *      (4) The input box is only used to specify the location of
- *          the UL corner of pix, with respect to an origin that
- *          typically represents the UL corner of an underlying image,
- *          of which pix is one component.  If %box is null,
- *          the UL corner is taken to be (0, 0).
- *      (5) The parameter %maxcomps gives the maximum number of allowed
- *          rectangles extracted from any single connected component.
- *          Use 0 if no limit is to be applied.
- *      (6) The flag %remainder specifies whether we take a final bounding
- *          box for anything left after the maximum number of allowed
- *          rectangle is extracted.
- *      (7) So if %maxcomps > 0, it specifies that we want no more than
- *          the first %maxcomps rectangles that satisfy the input
- *          criteria.  After this, we can get a final rectangle that
- *          bounds everything left over by setting %remainder == 1.
- *          If %remainder == 0, we only get rectangles that satisfy
- *          the input criteria.
- *      (8) It should be noted that the removal of rectangles can
- *          break the original c.c. into several c.c.
- *      (9) Summing up:
- *            * If %maxcomp == 0, the splitting proceeds as far as possible.
- *            * If %maxcomp > 0, the splitting stops when %maxcomps are
- *                found, or earlier if no more components can be selected.
- *            * If %remainder == 1 and components remain that cannot be
- *                selected, they are returned as a single final rectangle;
- *                otherwise, they are ignored.
- * 
- */ -BOXA * -pixSplitComponentIntoBoxa(PIX *pix, - BOX *box, - l_int32 minsum, - l_int32 skipdist, - l_int32 delta, - l_int32 maxbg, - l_int32 maxcomps, - l_int32 remainder) -{ -l_int32 i, w, h, boxx, boxy, bx, by, bw, bh, maxdir, maxscore; -l_int32 iter; -BOX *boxs; /* shrinks as rectangular regions are removed */ -BOX *boxt1, *boxt2, *boxt3; -BOXA *boxat; /* stores rectangle data for each side in an iteration */ -BOXA *boxad; -NUMA *nascore, *nas; -PIX *pixs; - - PROCNAME("pixSplitComponentIntoBoxa"); - - if (!pix || pixGetDepth(pix) != 1) - return (BOXA *)ERROR_PTR("pix undefined or not 1 bpp", procName, NULL); - - pixs = pixCopy(NULL, pix); - pixGetDimensions(pixs, &w, &h, NULL); - if (box) - boxGetGeometry(box, &boxx, &boxy, NULL, NULL); - else - boxx = boxy = 0; - boxs = boxCreate(0, 0, w, h); - boxad = boxaCreate(0); - - iter = 0; - while (boxs != NULL) { - boxGetGeometry(boxs, &bx, &by, &bw, &bh); - boxat = boxaCreate(4); /* potential rectangular regions */ - nascore = numaCreate(4); - for (i = 0; i < 4; i++) { - pixSearchForRectangle(pixs, boxs, minsum, skipdist, delta, maxbg, - i, boxat, nascore); - } - nas = numaGetSortIndex(nascore, L_SORT_DECREASING); - numaGetIValue(nas, 0, &maxdir); - numaGetIValue(nascore, maxdir, &maxscore); -#if DEBUG_SPLIT - lept_stderr("Iteration: %d\n", iter); - boxPrintStreamInfo(stderr, boxs); - boxaWriteStderr(boxat); - lept_stderr("\nmaxdir = %d, maxscore = %d\n\n", maxdir, maxscore); -#endif /* DEBUG_SPLIT */ - if (maxscore > 0) { /* accept this */ - boxt1 = boxaGetBox(boxat, maxdir, L_CLONE); - boxt2 = boxTransform(boxt1, boxx, boxy, 1.0, 1.0); - boxaAddBox(boxad, boxt2, L_INSERT); - pixClearInRect(pixs, boxt1); - boxDestroy(&boxt1); - pixClipBoxToForeground(pixs, boxs, NULL, &boxt3); - boxDestroy(&boxs); - boxs = boxt3; - if (boxs) { - boxGetGeometry(boxs, NULL, NULL, &bw, &bh); - if (bw < 2 || bh < 2) - boxDestroy(&boxs); /* we're done */ - } - } else { /* no more valid rectangles can be found */ - if (remainder == 1) { /* save the last box */ - boxt1 = boxTransform(boxs, boxx, boxy, 1.0, 1.0); - boxaAddBox(boxad, boxt1, L_INSERT); - } - boxDestroy(&boxs); /* we're done */ - } - boxaDestroy(&boxat); - numaDestroy(&nascore); - numaDestroy(&nas); - - iter++; - if ((iter == maxcomps) && boxs) { - if (remainder == 1) { /* save the last box */ - boxt1 = boxTransform(boxs, boxx, boxy, 1.0, 1.0); - boxaAddBox(boxad, boxt1, L_INSERT); - } - boxDestroy(&boxs); /* we're done */ - } - } - - pixDestroy(&pixs); - return boxad; -} - - -/*! - * \brief pixSearchForRectangle() - * - * \param[in] pixs 1 bpp - * \param[in] boxs current region to investigate - * \param[in] minsum minimum pixels to trigger propagation - * \param[in] skipdist distance before computing sum for propagation - * \param[in] delta difference required to stop propagation - * \param[in] maxbg maximum number of allowed bg pixels in ref scan - * \param[in] sideflag side to search from - * \param[in] boxat add result of rectangular region found here - * \param[in] nascore add score for this rectangle here - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See pixSplitComponentIntoBoxa() for an explanation of the algorithm.
- *          This does the sweep from a single side.  For each iteration
- *          in pixSplitComponentIntoBoxa(), this will be called 4 times,
- *          for %sideflag = {0, 1, 2, 3}.
- *      (2) If a valid rectangle is not found, add a score of 0 and
- *          input a minimum box.
- * 
- */ -static l_int32 -pixSearchForRectangle(PIX *pixs, - BOX *boxs, - l_int32 minsum, - l_int32 skipdist, - l_int32 delta, - l_int32 maxbg, - l_int32 sideflag, - BOXA *boxat, - NUMA *nascore) -{ -l_int32 bx, by, bw, bh, width, height, setref, atref; -l_int32 minincol, maxincol, mininrow, maxinrow, minval, maxval, bgref; -l_int32 x, y, x0, y0, xref, yref, colsum, rowsum, score, countref, diff; -void **lines1; -BOX *boxr; - - PROCNAME("pixSearchForRectangle"); - - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs undefined or not 1 bpp", procName, 1); - if (!boxs) - return ERROR_INT("boxs not defined", procName, 1); - if (!boxat) - return ERROR_INT("boxat not defined", procName, 1); - if (!nascore) - return ERROR_INT("nascore not defined", procName, 1); - - lines1 = pixGetLinePtrs(pixs, NULL); - boxGetGeometry(boxs, &bx, &by, &bw, &bh); - boxr = NULL; - setref = 0; - atref = 0; - maxval = 0; - minval = 100000; - score = 0; /* sum of all (fg - bg) pixels seen in the scan */ - xref = yref = 100000; /* init to impossibly big number */ - if (sideflag == L_FROM_LEFT) { - for (x = bx; x < bx + bw; x++) { - colsum = 0; - maxincol = 0; - minincol = 100000; - for (y = by; y < by + bh; y++) { - if (GET_DATA_BIT(lines1[y], x)) { - colsum++; - if (y > maxincol) maxincol = y; - if (y < minincol) minincol = y; - } - } - score += colsum; - - /* Enough fg to sweep out a rectangle? */ - if (!setref && colsum >= minsum) { - setref = 1; - xref = x + 10; - if (xref >= bx + bw) - goto failure; - } - - /* Reached the reference line; save the count; - * if there is too much bg, the rectangle is invalid. */ - if (setref && x == xref) { - atref = 1; - countref = colsum; - bgref = maxincol - minincol + 1 - countref; - if (bgref > maxbg) - goto failure; - } - - /* Have we left the rectangle? If so, save it along - * with the score. */ - if (atref) { - diff = L_ABS(colsum - countref); - if (diff >= delta || x == bx + bw - 1) { - height = maxval - minval + 1; - width = x - bx; - if (x == bx + bw - 1) width = x - bx + 1; - boxr = boxCreate(bx, minval, width, height); - score = 2 * score - width * height; - goto success; - } - } - maxval = L_MAX(maxval, maxincol); - minval = L_MIN(minval, minincol); - } - goto failure; - } else if (sideflag == L_FROM_RIGHT) { - for (x = bx + bw - 1; x >= bx; x--) { - colsum = 0; - maxincol = 0; - minincol = 100000; - for (y = by; y < by + bh; y++) { - if (GET_DATA_BIT(lines1[y], x)) { - colsum++; - if (y > maxincol) maxincol = y; - if (y < minincol) minincol = y; - } - } - score += colsum; - if (!setref && colsum >= minsum) { - setref = 1; - xref = x - 10; - if (xref < bx) - goto failure; - } - if (setref && x == xref) { - atref = 1; - countref = colsum; - bgref = maxincol - minincol + 1 - countref; - if (bgref > maxbg) - goto failure; - } - if (atref) { - diff = L_ABS(colsum - countref); - if (diff >= delta || x == bx) { - height = maxval - minval + 1; - x0 = x + 1; - if (x == bx) x0 = x; - width = bx + bw - x0; - boxr = boxCreate(x0, minval, width, height); - score = 2 * score - width * height; - goto success; - } - } - maxval = L_MAX(maxval, maxincol); - minval = L_MIN(minval, minincol); - } - goto failure; - } else if (sideflag == L_FROM_TOP) { - for (y = by; y < by + bh; y++) { - rowsum = 0; - maxinrow = 0; - mininrow = 100000; - for (x = bx; x < bx + bw; x++) { - if (GET_DATA_BIT(lines1[y], x)) { - rowsum++; - if (x > maxinrow) maxinrow = x; - if (x < mininrow) mininrow = x; - } - } - score += rowsum; - if (!setref && rowsum >= minsum) { - setref = 1; - yref = y + 10; - if (yref >= by + bh) - goto failure; - } - if (setref && y == yref) { - atref = 1; - countref = rowsum; - bgref = maxinrow - mininrow + 1 - countref; - if (bgref > maxbg) - goto failure; - } - if (atref) { - diff = L_ABS(rowsum - countref); - if (diff >= delta || y == by + bh - 1) { - width = maxval - minval + 1; - height = y - by; - if (y == by + bh - 1) height = y - by + 1; - boxr = boxCreate(minval, by, width, height); - score = 2 * score - width * height; - goto success; - } - } - maxval = L_MAX(maxval, maxinrow); - minval = L_MIN(minval, mininrow); - } - goto failure; - } else if (sideflag == L_FROM_BOT) { - for (y = by + bh - 1; y >= by; y--) { - rowsum = 0; - maxinrow = 0; - mininrow = 100000; - for (x = bx; x < bx + bw; x++) { - if (GET_DATA_BIT(lines1[y], x)) { - rowsum++; - if (x > maxinrow) maxinrow = x; - if (x < mininrow) mininrow = x; - } - } - score += rowsum; - if (!setref && rowsum >= minsum) { - setref = 1; - yref = y - 10; - if (yref < by) - goto failure; - } - if (setref && y == yref) { - atref = 1; - countref = rowsum; - bgref = maxinrow - mininrow + 1 - countref; - if (bgref > maxbg) - goto failure; - } - if (atref) { - diff = L_ABS(rowsum - countref); - if (diff >= delta || y == by) { - width = maxval - minval + 1; - y0 = y + 1; - if (y == by) y0 = y; - height = by + bh - y0; - boxr = boxCreate(minval, y0, width, height); - score = 2 * score - width * height; - goto success; - } - } - maxval = L_MAX(maxval, maxinrow); - minval = L_MIN(minval, mininrow); - } - goto failure; - } - -failure: - numaAddNumber(nascore, 0); - boxaAddBox(boxat, boxCreate(0, 0, 1, 1), L_INSERT); /* min box */ - LEPT_FREE(lines1); - return 0; - -success: - numaAddNumber(nascore, score); - boxaAddBox(boxat, boxr, L_INSERT); - LEPT_FREE(lines1); - return 0; -} - - -/*---------------------------------------------------------------------* - * Represent horizontal or vertical mosaic strips * - *---------------------------------------------------------------------*/ -/*! - * \brief makeMosaicStrips() - * - * \param[in] w, h - * \param[in] direction L_SCAN_HORIZONTAL or L_SCAN_VERTICAL - * \param[in] size of strips in the scan direction - * \return boxa, or NULL on error - * - *
- * Notes:
- *      (1) For example, this can be used to generate a pixa of
- *          vertical strips of width 10 from an image, using:
- *             pixGetDimensions(pix, &w, &h, NULL);
- *             boxa = makeMosaicStrips(w, h, L_SCAN_HORIZONTAL, 10);
- *             pixa = pixClipRectangles(pix, boxa);
- *          All strips except the last will be the same width.  The
- *          last strip will have width w % 10.
- * 
- */ -BOXA * -makeMosaicStrips(l_int32 w, - l_int32 h, - l_int32 direction, - l_int32 size) -{ -l_int32 i, nstrips, extra; -BOX *box; -BOXA *boxa; - - PROCNAME("makeMosaicStrips"); - - if (w < 1 || h < 1) - return (BOXA *)ERROR_PTR("invalid w or h", procName, NULL); - if (direction != L_SCAN_HORIZONTAL && direction != L_SCAN_VERTICAL) - return (BOXA *)ERROR_PTR("invalid direction", procName, NULL); - if (size < 1) - return (BOXA *)ERROR_PTR("size < 1", procName, NULL); - - boxa = boxaCreate(0); - if (direction == L_SCAN_HORIZONTAL) { - nstrips = w / size; - for (i = 0; i < nstrips; i++) { - box = boxCreate(i * size, 0, size, h); - boxaAddBox(boxa, box, L_INSERT); - } - if ((extra = w % size) > 0) { - box = boxCreate(nstrips * size, 0, extra, h); - boxaAddBox(boxa, box, L_INSERT); - } - } else { - nstrips = h / size; - for (i = 0; i < nstrips; i++) { - box = boxCreate(0, i * size, w, size); - boxaAddBox(boxa, box, L_INSERT); - } - if ((extra = h % size) > 0) { - box = boxCreate(0, nstrips * size, w, extra); - boxaAddBox(boxa, box, L_INSERT); - } - } - return boxa; -} - - -/*---------------------------------------------------------------------* - * Comparison between boxa * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaCompareRegions() - * - * \param[in] boxa1, boxa2 - * \param[in] areathresh minimum area of boxes to be considered - * \param[out] pnsame true if same number of boxes - * \param[out] pdiffarea fractional difference in total area - * \param[out] pdiffxor [optional] fractional difference in xor of regions - * \param[out] ppixdb [optional] debug pix showing two boxa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This takes 2 boxa, removes all boxes smaller than a given area,
- *          and compares the remaining boxes between the boxa.
- *      (2) The area threshold is introduced to help remove noise from
- *          small components.  Any box with a smaller value of w * h
- *          will be removed from consideration.
- *      (3) The xor difference is the most stringent test, requiring alignment
- *          of the corresponding boxes.  It is also more computationally
- *          intensive and is optionally returned.  Alignment is to the
- *          UL corner of each region containing all boxes, as given by
- *          boxaGetExtent().
- *      (4) Both fractional differences are with respect to the total
- *          area in the two boxa.  They range from 0.0 to 1.0.
- *          A perfect match has value 0.0.  If both boxa are empty,
- *          we return 0.0; if one is empty we return 1.0.
- *      (5) An example input might be the rectangular regions of a
- *          segmentation mask for text or images from two pages.
- * 
- */ -l_ok -boxaCompareRegions(BOXA *boxa1, - BOXA *boxa2, - l_int32 areathresh, - l_int32 *pnsame, - l_float32 *pdiffarea, - l_float32 *pdiffxor, - PIX **ppixdb) -{ -l_int32 w, h, x3, y3, w3, h3, x4, y4, w4, h4, n3, n4, area1, area2; -l_int32 count3, count4, countxor; -l_int32 *tab; -BOX *box3, *box4; -BOXA *boxa3, *boxa4, *boxa3t, *boxa4t; -PIX *pix1, *pix2, *pix3, *pix4, *pix5; -PIXA *pixa; - - PROCNAME("boxaCompareRegions"); - - if (pdiffxor) *pdiffxor = 1.0; - if (ppixdb) *ppixdb = NULL; - if (pnsame) *pnsame = FALSE; - if (pdiffarea) *pdiffarea = 1.0; - if (!boxa1 || !boxa2) - return ERROR_INT("boxa1 and boxa2 not both defined", procName, 1); - if (!pnsame) - return ERROR_INT("&nsame not defined", procName, 1); - if (!pdiffarea) - return ERROR_INT("&diffarea not defined", procName, 1); - - boxa3 = boxaSelectByArea(boxa1, areathresh, L_SELECT_IF_GTE, NULL); - boxa4 = boxaSelectByArea(boxa2, areathresh, L_SELECT_IF_GTE, NULL); - n3 = boxaGetCount(boxa3); - n4 = boxaGetCount(boxa4); - if (n3 == n4) - *pnsame = TRUE; - - /* There are no boxes in one or both */ - if (n3 == 0 || n4 == 0) { - boxaDestroy(&boxa3); - boxaDestroy(&boxa4); - if (n3 == 0 && n4 == 0) { /* they are both empty: we say they are the - * same; otherwise, they differ maximally - * and retain the default value. */ - *pdiffarea = 0.0; - if (pdiffxor) *pdiffxor = 0.0; - } - return 0; - } - - /* There are boxes in both */ - boxaGetArea(boxa3, &area1); - boxaGetArea(boxa4, &area2); - *pdiffarea = (l_float32)L_ABS(area1 - area2) / (l_float32)(area1 + area2); - if (!pdiffxor) { - boxaDestroy(&boxa3); - boxaDestroy(&boxa4); - return 0; - } - - /* The easiest way to get the xor of aligned boxes is to work - * with images of each boxa. This is done by translating each - * boxa so that the UL corner of the region that includes all - * boxes in the boxa is placed at the origin of each pix. */ - boxaGetExtent(boxa3, &w, &h, &box3); - boxaGetExtent(boxa4, &w, &h, &box4); - boxGetGeometry(box3, &x3, &y3, &w3, &h3); - boxGetGeometry(box4, &x4, &y4, &w4, &h4); - boxa3t = boxaTransform(boxa3, -x3, -y3, 1.0, 1.0); - boxa4t = boxaTransform(boxa4, -x4, -y4, 1.0, 1.0); - w = L_MAX(x3 + w3, x4 + w4); - h = L_MAX(y3 + h3, y4 + h4); - pix3 = pixCreate(w, h, 1); /* use the max to keep everything in the xor */ - pix4 = pixCreate(w, h, 1); - pixMaskBoxa(pix3, pix3, boxa3t, L_SET_PIXELS); - pixMaskBoxa(pix4, pix4, boxa4t, L_SET_PIXELS); - tab = makePixelSumTab8(); - pixCountPixels(pix3, &count3, tab); - pixCountPixels(pix4, &count4, tab); - pix5 = pixXor(NULL, pix3, pix4); - pixCountPixels(pix5, &countxor, tab); - LEPT_FREE(tab); - *pdiffxor = (l_float32)countxor / (l_float32)(count3 + count4); - - if (ppixdb) { - pixa = pixaCreate(2); - pix1 = pixCreate(w, h, 32); - pixSetAll(pix1); - pixRenderHashBoxaBlend(pix1, boxa3, 5, 1, L_POS_SLOPE_LINE, 2, - 255, 0, 0, 0.5); - pixRenderHashBoxaBlend(pix1, boxa4, 5, 1, L_NEG_SLOPE_LINE, 2, - 0, 255, 0, 0.5); - pixaAddPix(pixa, pix1, L_INSERT); - pix2 = pixCreate(w, h, 32); - pixPaintThroughMask(pix2, pix3, x3, y3, 0xff000000); - pixPaintThroughMask(pix2, pix4, x4, y4, 0x00ff0000); - pixAnd(pix3, pix3, pix4); - pixPaintThroughMask(pix2, pix3, x3, y3, 0x0000ff00); - pixaAddPix(pixa, pix2, L_INSERT); - *ppixdb = pixaDisplayTiledInRows(pixa, 32, 1000, 1.0, 0, 30, 2); - pixaDestroy(&pixa); - } - - boxDestroy(&box3); - boxDestroy(&box4); - boxaDestroy(&boxa3); - boxaDestroy(&boxa3t); - boxaDestroy(&boxa4); - boxaDestroy(&boxa4t); - pixDestroy(&pix3); - pixDestroy(&pix4); - pixDestroy(&pix5); - return 0; -} - - -/*---------------------------------------------------------------------* - * Reliable selection of a single large box * - *---------------------------------------------------------------------*/ -/*! - * \brief pixSelectLargeULComp() - * - * \param[in] pixs 1 bpp - * \param[in] areaslop fraction near but less than 1.0 - * \param[in] yslop number of pixels in y direction - * \param[in] connectivity 4 or 8 - * \return box, or NULL on error - * - *
- * Notes:
- *      (1) This selects a box near the top (first) and left (second)
- *          of the image, from the set of all boxes that have
- *                area >= %areaslop * (area of biggest box),
- *          where %areaslop is some fraction; say ~ 0.9.
- *      (2) For all boxes satisfying the above condition, select
- *          the left-most box that is within %yslop (say, 20) pixels
- *          of the box nearest the top.
- *      (3) This can be used to reliably select a specific one of
- *          the largest regions in an image, for applications where
- *          there are expected to be small variations in region size
- *          and location.
- *      (4) See boxSelectLargeULBox() for implementation details.
- * 
- */ -BOX * -pixSelectLargeULComp(PIX *pixs, - l_float32 areaslop, - l_int32 yslop, - l_int32 connectivity) -{ -BOX *box; -BOXA *boxa1; - - PROCNAME("pixSelectLargeULComp"); - - if (!pixs) - return (BOX *)ERROR_PTR("pixs not defined", procName, NULL); - if (areaslop < 0.0 || areaslop > 1.0) - return (BOX *)ERROR_PTR("invalid value for areaslop", procName, NULL); - yslop = L_MAX(0, yslop); - - boxa1 = pixConnCompBB(pixs, connectivity); - if (boxaGetCount(boxa1) == 0) { - boxaDestroy(&boxa1); - return NULL; - } - box = boxaSelectLargeULBox(boxa1, areaslop, yslop); - boxaDestroy(&boxa1); - return box; -} - - -/*! - * \brief boxaSelectLargeULBox() - * - * \param[in] boxas 1 bpp - * \param[in] areaslop fraction near but less than 1.0 - * \param[in] yslop number of pixels in y direction - * \return box, or NULL on error - * - *
- * Notes:
- *      (1) See usage notes in pixSelectLargeULComp().
- * 
- */ -BOX * -boxaSelectLargeULBox(BOXA *boxas, - l_float32 areaslop, - l_int32 yslop) -{ -l_int32 w, h, i, n, x1, y1, x2, y2, select; -l_float32 area, max_area; -BOX *box; -BOXA *boxa1, *boxa2, *boxa3; - - PROCNAME("boxaSelectLargeULBox"); - - if (!boxas) - return (BOX *)ERROR_PTR("boxas not defined", procName, NULL); - if (boxaGetCount(boxas) == 0) - return (BOX *)ERROR_PTR("no boxes in boxas", procName, NULL); - if (areaslop < 0.0 || areaslop > 1.0) - return (BOX *)ERROR_PTR("invalid value for areaslop", procName, NULL); - yslop = L_MAX(0, yslop); - - boxa1 = boxaSort(boxas, L_SORT_BY_AREA, L_SORT_DECREASING, NULL); - boxa2 = boxaSort(boxa1, L_SORT_BY_Y, L_SORT_INCREASING, NULL); - n = boxaGetCount(boxa2); - boxaGetBoxGeometry(boxa1, 0, NULL, NULL, &w, &h); /* biggest box by area */ - max_area = (l_float32)(w * h); - - /* boxa3 collects all boxes eligible by area, sorted top-down */ - boxa3 = boxaCreate(4); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa2, i, NULL, NULL, &w, &h); - area = (l_float32)(w * h); - if (area / max_area >= areaslop) { - box = boxaGetBox(boxa2, i, L_COPY); - boxaAddBox(boxa3, box, L_INSERT); - } - } - - /* Take the first (top-most box) unless the second (etc) has - * nearly the same y value but a smaller x value. */ - n = boxaGetCount(boxa3); - boxaGetBoxGeometry(boxa3, 0, &x1, &y1, NULL, NULL); - select = 0; - for (i = 1; i < n; i++) { - boxaGetBoxGeometry(boxa3, i, &x2, &y2, NULL, NULL); - if (y2 - y1 < yslop && x2 < x1) { - select = i; - x1 = x2; /* but always compare against y1 */ - } - } - - box = boxaGetBox(boxa3, select, L_COPY); - boxaDestroy(&boxa1); - boxaDestroy(&boxa2); - boxaDestroy(&boxa3); - return box; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxfunc4.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxfunc4.c deleted file mode 100644 index 9880a51a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxfunc4.c +++ /dev/null @@ -1,1426 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file boxfunc4.c - *
- *
- *      Boxa and Boxaa range selection
- *           BOXA     *boxaSelectRange()
- *           BOXAA    *boxaaSelectRange()
- *
- *      Boxa size selection
- *           BOXA     *boxaSelectBySize()
- *           NUMA     *boxaMakeSizeIndicator()
- *           BOXA     *boxaSelectByArea()
- *           NUMA     *boxaMakeAreaIndicator()
- *           BOXA     *boxaSelectByWHRatio()
- *           NUMA     *boxaMakeWHRatioIndicator()
- *           BOXA     *boxaSelectWithIndicator()
- *
- *      Boxa permutation
- *           BOXA     *boxaPermutePseudorandom()
- *           BOXA     *boxaPermuteRandom()
- *           l_int32   boxaSwapBoxes()
- *
- *      Boxa and box conversions
- *           PTA      *boxaConvertToPta()
- *           BOXA     *ptaConvertToBoxa()
- *           PTA      *boxConvertToPta()
- *           BOX      *ptaConvertToBox()
- *
- *      Miscellaneous boxa functions
- *           l_int32   boxaGetExtent()
- *           l_int32   boxaGetCoverage()
- *           l_int32   boxaaSizeRange()
- *           l_int32   boxaSizeRange()
- *           l_int32   boxaLocationRange()
- *           NUMA     *boxaGetSizes()
- *           l_int32   boxaGetArea()
- *           PIX      *boxaDisplayTiled()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*---------------------------------------------------------------------* - * Boxa and boxaa range selection * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaSelectRange() - * - * \param[in] boxas - * \param[in] first use 0 to select from the beginning - * \param[in] last use -1 to select to the end - * \param[in] copyflag L_COPY, L_CLONE - * \return boxad, or NULL on error - * - *
- * Notes:
- *      (1) The copyflag specifies what we do with each box from boxas.
- *          Specifically, L_CLONE inserts a clone into boxad of each
- *          selected box from boxas.
- * 
- */ -BOXA * -boxaSelectRange(BOXA *boxas, - l_int32 first, - l_int32 last, - l_int32 copyflag) -{ -l_int32 n, nbox, i; -BOX *box; -BOXA *boxad; - - PROCNAME("boxaSelectRange"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (copyflag != L_COPY && copyflag != L_CLONE) - return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL); - if ((n = boxaGetCount(boxas)) == 0) { - L_WARNING("boxas is empty\n", procName); - return boxaCopy(boxas, copyflag); - } - first = L_MAX(0, first); - if (last < 0) last = n - 1; - if (first >= n) - return (BOXA *)ERROR_PTR("invalid first", procName, NULL); - if (last >= n) { - L_WARNING("last = %d is beyond max index = %d; adjusting\n", - procName, last, n - 1); - last = n - 1; - } - if (first > last) - return (BOXA *)ERROR_PTR("first > last", procName, NULL); - - nbox = last - first + 1; - boxad = boxaCreate(nbox); - for (i = first; i <= last; i++) { - box = boxaGetBox(boxas, i, copyflag); - boxaAddBox(boxad, box, L_INSERT); - } - return boxad; -} - - -/*! - * \brief boxaaSelectRange() - * - * \param[in] baas - * \param[in] first use 0 to select from the beginning - * \param[in] last use -1 to select to the end - * \param[in] copyflag L_COPY, L_CLONE - * \return baad, or NULL on error - * - *
- * Notes:
- *      (1) The copyflag specifies what we do with each boxa from baas.
- *          Specifically, L_CLONE inserts a clone into baad of each
- *          selected boxa from baas.
- * 
- */ -BOXAA * -boxaaSelectRange(BOXAA *baas, - l_int32 first, - l_int32 last, - l_int32 copyflag) -{ -l_int32 n, nboxa, i; -BOXA *boxa; -BOXAA *baad; - - PROCNAME("boxaaSelectRange"); - - if (!baas) - return (BOXAA *)ERROR_PTR("baas not defined", procName, NULL); - if (copyflag != L_COPY && copyflag != L_CLONE) - return (BOXAA *)ERROR_PTR("invalid copyflag", procName, NULL); - if ((n = boxaaGetCount(baas)) == 0) - return (BOXAA *)ERROR_PTR("empty baas", procName, NULL); - first = L_MAX(0, first); - if (last < 0) last = n - 1; - if (first >= n) - return (BOXAA *)ERROR_PTR("invalid first", procName, NULL); - if (last >= n) { - L_WARNING("last = %d is beyond max index = %d; adjusting\n", - procName, last, n - 1); - last = n - 1; - } - if (first > last) - return (BOXAA *)ERROR_PTR("first > last", procName, NULL); - - nboxa = last - first + 1; - baad = boxaaCreate(nboxa); - for (i = first; i <= last; i++) { - boxa = boxaaGetBoxa(baas, i, copyflag); - boxaaAddBoxa(baad, boxa, L_INSERT); - } - return baad; -} - - -/*---------------------------------------------------------------------* - * Boxa size selection * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaSelectBySize() - * - * \param[in] boxas - * \param[in] width, height threshold dimensions - * \param[in] type L_SELECT_WIDTH, L_SELECT_HEIGHT, - * L_SELECT_IF_EITHER, L_SELECT_IF_BOTH - * \param[in] relation L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \param[out] pchanged [optional] 1 if changed; 0 if clone returned - * \return boxad filtered set, or NULL on error - * - *
- * Notes:
- *      (1) The args specify constraints on the size of the
- *          components that are kept.
- *      (2) Uses box copies in the new boxa.
- *      (3) If the selection type is L_SELECT_WIDTH, the input
- *          height is ignored, and v.v.
- *      (4) To keep small components, use relation = L_SELECT_IF_LT or
- *          L_SELECT_IF_LTE.
- *          To keep large components, use relation = L_SELECT_IF_GT or
- *          L_SELECT_IF_GTE.
- * 
- */ -BOXA * -boxaSelectBySize(BOXA *boxas, - l_int32 width, - l_int32 height, - l_int32 type, - l_int32 relation, - l_int32 *pchanged) -{ -BOXA *boxad; -NUMA *na; - - PROCNAME("boxaSelectBySize"); - - if (pchanged) *pchanged = FALSE; - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (boxaGetCount(boxas) == 0) { - L_WARNING("boxas is empty\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (type != L_SELECT_WIDTH && type != L_SELECT_HEIGHT && - type != L_SELECT_IF_EITHER && type != L_SELECT_IF_BOTH) - return (BOXA *)ERROR_PTR("invalid type", procName, NULL); - if (relation != L_SELECT_IF_LT && relation != L_SELECT_IF_GT && - relation != L_SELECT_IF_LTE && relation != L_SELECT_IF_GTE) - return (BOXA *)ERROR_PTR("invalid relation", procName, NULL); - - /* Compute the indicator array for saving components */ - if ((na = - boxaMakeSizeIndicator(boxas, width, height, type, relation)) == NULL) - return (BOXA *)ERROR_PTR("na not made", procName, NULL); - - /* Filter to get output */ - boxad = boxaSelectWithIndicator(boxas, na, pchanged); - - numaDestroy(&na); - return boxad; -} - - -/*! - * \brief boxaMakeSizeIndicator() - * - * \param[in] boxa - * \param[in] width, height threshold dimensions - * \param[in] type L_SELECT_WIDTH, L_SELECT_HEIGHT, - * L_SELECT_IF_EITHER, L_SELECT_IF_BOTH - * \param[in] relation L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \return na indicator array, or NULL on error - * - *
- * Notes:
- *      (1) The args specify constraints on the size of the
- *          components that are kept.
- *      (2) If the selection type is L_SELECT_WIDTH, the input
- *          height is ignored, and v.v.
- *      (3) To keep small components, use relation = L_SELECT_IF_LT or
- *          L_SELECT_IF_LTE.
- *          To keep large components, use relation = L_SELECT_IF_GT or
- *          L_SELECT_IF_GTE.
- * 
- */ -NUMA * -boxaMakeSizeIndicator(BOXA *boxa, - l_int32 width, - l_int32 height, - l_int32 type, - l_int32 relation) -{ -l_int32 i, n, w, h, ival; -NUMA *na; - - PROCNAME("boxaMakeSizeIndicator"); - - if (!boxa) - return (NUMA *)ERROR_PTR("boxa not defined", procName, NULL); - if ((n = boxaGetCount(boxa)) == 0) - return (NUMA *)ERROR_PTR("boxa is empty", procName, NULL); - if (type != L_SELECT_WIDTH && type != L_SELECT_HEIGHT && - type != L_SELECT_IF_EITHER && type != L_SELECT_IF_BOTH) - return (NUMA *)ERROR_PTR("invalid type", procName, NULL); - if (relation != L_SELECT_IF_LT && relation != L_SELECT_IF_GT && - relation != L_SELECT_IF_LTE && relation != L_SELECT_IF_GTE) - return (NUMA *)ERROR_PTR("invalid relation", procName, NULL); - - na = numaCreate(n); - for (i = 0; i < n; i++) { - ival = 0; - boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h); - switch (type) - { - case L_SELECT_WIDTH: - if ((relation == L_SELECT_IF_LT && w < width) || - (relation == L_SELECT_IF_GT && w > width) || - (relation == L_SELECT_IF_LTE && w <= width) || - (relation == L_SELECT_IF_GTE && w >= width)) - ival = 1; - break; - case L_SELECT_HEIGHT: - if ((relation == L_SELECT_IF_LT && h < height) || - (relation == L_SELECT_IF_GT && h > height) || - (relation == L_SELECT_IF_LTE && h <= height) || - (relation == L_SELECT_IF_GTE && h >= height)) - ival = 1; - break; - case L_SELECT_IF_EITHER: - if (((relation == L_SELECT_IF_LT) && (w < width || h < height)) || - ((relation == L_SELECT_IF_GT) && (w > width || h > height)) || - ((relation == L_SELECT_IF_LTE) && (w <= width || h <= height)) || - ((relation == L_SELECT_IF_GTE) && (w >= width || h >= height))) - ival = 1; - break; - case L_SELECT_IF_BOTH: - if (((relation == L_SELECT_IF_LT) && (w < width && h < height)) || - ((relation == L_SELECT_IF_GT) && (w > width && h > height)) || - ((relation == L_SELECT_IF_LTE) && (w <= width && h <= height)) || - ((relation == L_SELECT_IF_GTE) && (w >= width && h >= height))) - ival = 1; - break; - default: - L_WARNING("can't get here!\n", procName); - break; - } - numaAddNumber(na, ival); - } - - return na; -} - - -/*! - * \brief boxaSelectByArea() - * - * \param[in] boxas - * \param[in] area threshold value of width * height - * \param[in] relation L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \param[out] pchanged [optional] 1 if changed; 0 if clone returned - * \return boxad filtered set, or NULL on error - * - *
- * Notes:
- *      (1) Uses box copies in the new boxa.
- *      (2) To keep small components, use relation = L_SELECT_IF_LT or
- *          L_SELECT_IF_LTE.
- *          To keep large components, use relation = L_SELECT_IF_GT or
- *          L_SELECT_IF_GTE.
- * 
- */ -BOXA * -boxaSelectByArea(BOXA *boxas, - l_int32 area, - l_int32 relation, - l_int32 *pchanged) -{ -BOXA *boxad; -NUMA *na; - - PROCNAME("boxaSelectByArea"); - - if (pchanged) *pchanged = FALSE; - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (boxaGetCount(boxas) == 0) { - L_WARNING("boxas is empty\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (relation != L_SELECT_IF_LT && relation != L_SELECT_IF_GT && - relation != L_SELECT_IF_LTE && relation != L_SELECT_IF_GTE) - return (BOXA *)ERROR_PTR("invalid relation", procName, NULL); - - /* Compute the indicator array for saving components */ - na = boxaMakeAreaIndicator(boxas, area, relation); - - /* Filter to get output */ - boxad = boxaSelectWithIndicator(boxas, na, pchanged); - - numaDestroy(&na); - return boxad; -} - - -/*! - * \brief boxaMakeAreaIndicator() - * - * \param[in] boxa - * \param[in] area threshold value of width * height - * \param[in] relation L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \return na indicator array, or NULL on error - * - *
- * Notes:
- *      (1) To keep small components, use relation = L_SELECT_IF_LT or
- *          L_SELECT_IF_LTE.
- *          To keep large components, use relation = L_SELECT_IF_GT or
- *          L_SELECT_IF_GTE.
- * 
- */ -NUMA * -boxaMakeAreaIndicator(BOXA *boxa, - l_int32 area, - l_int32 relation) -{ -l_int32 i, n, w, h, ival; -NUMA *na; - - PROCNAME("boxaMakeAreaIndicator"); - - if (!boxa) - return (NUMA *)ERROR_PTR("boxa not defined", procName, NULL); - if ((n = boxaGetCount(boxa)) == 0) - return (NUMA *)ERROR_PTR("boxa is empty", procName, NULL); - if (relation != L_SELECT_IF_LT && relation != L_SELECT_IF_GT && - relation != L_SELECT_IF_LTE && relation != L_SELECT_IF_GTE) - return (NUMA *)ERROR_PTR("invalid relation", procName, NULL); - - na = numaCreate(n); - for (i = 0; i < n; i++) { - ival = 0; - boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h); - - if ((relation == L_SELECT_IF_LT && w * h < area) || - (relation == L_SELECT_IF_GT && w * h > area) || - (relation == L_SELECT_IF_LTE && w * h <= area) || - (relation == L_SELECT_IF_GTE && w * h >= area)) - ival = 1; - numaAddNumber(na, ival); - } - - return na; -} - - -/*! - * \brief boxaSelectByWHRatio() - * - * \param[in] boxas - * \param[in] ratio width/height threshold value - * \param[in] relation L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \param[out] pchanged [optional] 1 if changed; 0 if clone returned - * \return boxad filtered set, or NULL on error - * - *
- * Notes:
- *      (1) Uses box copies in the new boxa.
- *      (2) To keep narrow components, use relation = L_SELECT_IF_LT or
- *          L_SELECT_IF_LTE.
- *          To keep wide components, use relation = L_SELECT_IF_GT or
- *          L_SELECT_IF_GTE.
- * 
- */ -BOXA * -boxaSelectByWHRatio(BOXA *boxas, - l_float32 ratio, - l_int32 relation, - l_int32 *pchanged) -{ -BOXA *boxad; -NUMA *na; - - PROCNAME("boxaSelectByWHRatio"); - - if (pchanged) *pchanged = FALSE; - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (boxaGetCount(boxas) == 0) { - L_WARNING("boxas is empty\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (relation != L_SELECT_IF_LT && relation != L_SELECT_IF_GT && - relation != L_SELECT_IF_LTE && relation != L_SELECT_IF_GTE) - return (BOXA *)ERROR_PTR("invalid relation", procName, NULL); - - /* Compute the indicator array for saving components */ - na = boxaMakeWHRatioIndicator(boxas, ratio, relation); - - /* Filter to get output */ - boxad = boxaSelectWithIndicator(boxas, na, pchanged); - - numaDestroy(&na); - return boxad; -} - - -/*! - * \brief boxaMakeWHRatioIndicator() - * - * \param[in] boxa - * \param[in] ratio width/height threshold value - * \param[in] relation L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \return na indicator array, or NULL on error - * - *
- * Notes:
- *      (1) To keep narrow components, use relation = L_SELECT_IF_LT or
- *          L_SELECT_IF_LTE.
- *          To keep wide components, use relation = L_SELECT_IF_GT or
- *          L_SELECT_IF_GTE.
- * 
- */ -NUMA * -boxaMakeWHRatioIndicator(BOXA *boxa, - l_float32 ratio, - l_int32 relation) -{ -l_int32 i, n, w, h, ival; -l_float32 whratio; -NUMA *na; - - PROCNAME("boxaMakeWHRatioIndicator"); - - if (!boxa) - return (NUMA *)ERROR_PTR("boxa not defined", procName, NULL); - if ((n = boxaGetCount(boxa)) == 0) - return (NUMA *)ERROR_PTR("boxa is empty", procName, NULL); - if (relation != L_SELECT_IF_LT && relation != L_SELECT_IF_GT && - relation != L_SELECT_IF_LTE && relation != L_SELECT_IF_GTE) - return (NUMA *)ERROR_PTR("invalid relation", procName, NULL); - - na = numaCreate(n); - for (i = 0; i < n; i++) { - ival = 0; - boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h); - whratio = (l_float32)w / (l_float32)h; - - if ((relation == L_SELECT_IF_LT && whratio < ratio) || - (relation == L_SELECT_IF_GT && whratio > ratio) || - (relation == L_SELECT_IF_LTE && whratio <= ratio) || - (relation == L_SELECT_IF_GTE && whratio >= ratio)) - ival = 1; - numaAddNumber(na, ival); - } - - return na; -} - - -/*! - * \brief boxaSelectWithIndicator() - * - * \param[in] boxas - * \param[in] na indicator numa - * \param[out] pchanged [optional] 1 if changed; 0 if clone returned - * \return boxad, or NULL on error - * - *
- * Notes:
- *      (1) Returns a copy of the boxa if no components are removed.
- *      (2) Uses box copies in the new boxa.
- *      (3) The indicator numa has values 0 (ignore) and 1 (accept).
- *      (4) If all indicator values are 0, the returned boxa is empty.
- * 
- */ -BOXA * -boxaSelectWithIndicator(BOXA *boxas, - NUMA *na, - l_int32 *pchanged) -{ -l_int32 i, n, ival, nsave; -BOX *box; -BOXA *boxad; - - PROCNAME("boxaSelectWithIndicator"); - - if (pchanged) *pchanged = FALSE; - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (!na) - return (BOXA *)ERROR_PTR("na not defined", procName, NULL); - - nsave = 0; - n = numaGetCount(na); - for (i = 0; i < n; i++) { - numaGetIValue(na, i, &ival); - if (ival == 1) nsave++; - } - - if (nsave == n) { - if (pchanged) *pchanged = FALSE; - return boxaCopy(boxas, L_COPY); - } - if (pchanged) *pchanged = TRUE; - boxad = boxaCreate(nsave); - for (i = 0; i < n; i++) { - numaGetIValue(na, i, &ival); - if (ival == 0) continue; - box = boxaGetBox(boxas, i, L_COPY); - boxaAddBox(boxad, box, L_INSERT); - } - - return boxad; -} - - -/*---------------------------------------------------------------------* - * Boxa Permutation * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaPermutePseudorandom() - * - * \param[in] boxas input boxa - * \return boxad with boxes permuted, or NULL on error - * - *
- * Notes:
- *      (1) This does a pseudorandom in-place permutation of the boxes.
- *      (2) The result is guaranteed not to have any boxes in their
- *          original position, but it is not very random.  If you
- *          need randomness, use boxaPermuteRandom().
- * 
- */ -BOXA * -boxaPermutePseudorandom(BOXA *boxas) -{ -l_int32 n; -NUMA *na; -BOXA *boxad; - - PROCNAME("boxaPermutePseudorandom"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxa not defined", procName, NULL); - - n = boxaGetCount(boxas); - na = numaPseudorandomSequence(n, 0); - boxad = boxaSortByIndex(boxas, na); - numaDestroy(&na); - return boxad; -} - - -/*! - * \brief boxaPermuteRandom() - * - * \param[in] boxad [optional] can be null or equal to boxas - * \param[in] boxas input boxa - * \return boxad with boxes permuted, or NULL on error - * - *
- * Notes:
- *      (1) If boxad is null, make a copy of boxas and permute the copy.
- *          Otherwise, boxad must be equal to boxas, and the operation
- *          is done in-place.
- *      (2) If boxas is empty, return an empty boxad.
- *      (3) This does a random in-place permutation of the boxes,
- *          by swapping each box in turn with a random box.  The
- *          result is almost guaranteed not to have any boxes in their
- *          original position.
- *      (4) MSVC rand() has MAX_RAND = 2^15 - 1, so it will not do
- *          a proper permutation is the number of boxes exceeds this.
- * 
- */ -BOXA * -boxaPermuteRandom(BOXA *boxad, - BOXA *boxas) -{ -l_int32 i, n, index; - - PROCNAME("boxaPermuteRandom"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxa not defined", procName, NULL); - if (boxad && (boxad != boxas)) - return (BOXA *)ERROR_PTR("boxad defined but in-place", procName, NULL); - - if (!boxad) - boxad = boxaCopy(boxas, L_COPY); - if ((n = boxaGetCount(boxad)) == 0) - return boxad; - index = (l_uint32)rand() % n; - index = L_MAX(1, index); - boxaSwapBoxes(boxad, 0, index); - for (i = 1; i < n; i++) { - index = (l_uint32)rand() % n; - if (index == i) index--; - boxaSwapBoxes(boxad, i, index); - } - - return boxad; -} - - -/*! - * \brief boxaSwapBoxes() - * - * \param[in] boxa - * \param[in] i, j two indices of boxes, that are to be swapped - * \return 0 if OK, 1 on error - */ -l_ok -boxaSwapBoxes(BOXA *boxa, - l_int32 i, - l_int32 j) -{ -l_int32 n; -BOX *box; - - PROCNAME("boxaSwapBoxes"); - - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - n = boxaGetCount(boxa); - if (i < 0 || i >= n) - return ERROR_INT("i invalid", procName, 1); - if (j < 0 || j >= n) - return ERROR_INT("j invalid", procName, 1); - if (i == j) - return ERROR_INT("i == j", procName, 1); - - box = boxa->box[i]; - boxa->box[i] = boxa->box[j]; - boxa->box[j] = box; - return 0; -} - - -/*---------------------------------------------------------------------* - * Boxa and Box Conversions * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaConvertToPta() - * - * \param[in] boxa - * \param[in] ncorners 2 or 4 for the representation of each box - * \return pta with %ncorners points for each box in the boxa, - * or NULL on error - * - *
- * Notes:
- *      (1) If ncorners == 2, we select the UL and LR corners.
- *          Otherwise we save all 4 corners in this order: UL, UR, LL, LR.
- *      (2) Other boxa --> pta functions are:
- *          * boxaExtractAsPta(): allows extraction of any dimension
- *            and/or side location, with each in a separate pta.
- *          * boxaExtractCorners(): extracts any of the four corners as a pta.
- * 
- */ -PTA * -boxaConvertToPta(BOXA *boxa, - l_int32 ncorners) -{ -l_int32 i, n; -BOX *box; -PTA *pta, *pta1; - - PROCNAME("boxaConvertToPta"); - - if (!boxa) - return (PTA *)ERROR_PTR("boxa not defined", procName, NULL); - if (ncorners != 2 && ncorners != 4) - return (PTA *)ERROR_PTR("ncorners not 2 or 4", procName, NULL); - - n = boxaGetCount(boxa); - if ((pta = ptaCreate(n)) == NULL) - return (PTA *)ERROR_PTR("pta not made", procName, NULL); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_COPY); - pta1 = boxConvertToPta(box, ncorners); - ptaJoin(pta, pta1, 0, -1); - boxDestroy(&box); - ptaDestroy(&pta1); - } - - return pta; -} - - -/*! - * \brief ptaConvertToBoxa() - * - * \param[in] pta - * \param[in] ncorners 2 or 4 for the representation of each box - * \return boxa with one box for each 2 or 4 points in the pta, - * or NULL on error - * - *
- * Notes:
- *      (1) For 2 corners, the order of the 2 points is UL, LR.
- *          For 4 corners, the order of points is UL, UR, LL, LR.
- *      (2) Each derived box is the minimum size containing all corners.
- * 
- */ -BOXA * -ptaConvertToBoxa(PTA *pta, - l_int32 ncorners) -{ -l_int32 i, n, nbox, x1, y1, x2, y2, x3, y3, x4, y4, x, y, xmax, ymax; -BOX *box; -BOXA *boxa; - - PROCNAME("ptaConvertToBoxa"); - - if (!pta) - return (BOXA *)ERROR_PTR("pta not defined", procName, NULL); - if (ncorners != 2 && ncorners != 4) - return (BOXA *)ERROR_PTR("ncorners not 2 or 4", procName, NULL); - n = ptaGetCount(pta); - if (n % ncorners != 0) - return (BOXA *)ERROR_PTR("size % ncorners != 0", procName, NULL); - nbox = n / ncorners; - if ((boxa = boxaCreate(nbox)) == NULL) - return (BOXA *)ERROR_PTR("boxa not made", procName, NULL); - for (i = 0; i < n; i += ncorners) { - ptaGetIPt(pta, i, &x1, &y1); - ptaGetIPt(pta, i + 1, &x2, &y2); - if (ncorners == 2) { - box = boxCreate(x1, y1, x2 - x1 + 1, y2 - y1 + 1); - boxaAddBox(boxa, box, L_INSERT); - continue; - } - ptaGetIPt(pta, i + 2, &x3, &y3); - ptaGetIPt(pta, i + 3, &x4, &y4); - x = L_MIN(x1, x3); - y = L_MIN(y1, y2); - xmax = L_MAX(x2, x4); - ymax = L_MAX(y3, y4); - box = boxCreate(x, y, xmax - x + 1, ymax - y + 1); - boxaAddBox(boxa, box, L_INSERT); - } - - return boxa; -} - - -/*! - * \brief boxConvertToPta() - * - * \param[in] box - * \param[in] ncorners 2 or 4 for the representation of the box - * \return pta with %ncorners points, or NULL on error - * - *
- * Notes:
- *      (1) If ncorners == 2, we select the UL and LR corners.
- *          Otherwise we save all 4 corners in this order: UL, UR, LL, LR.
- * 
- */ -PTA * -boxConvertToPta(BOX *box, - l_int32 ncorners) -{ -l_int32 x, y, w, h; -PTA *pta; - - PROCNAME("boxConvertToPta"); - - if (!box) - return (PTA *)ERROR_PTR("box not defined", procName, NULL); - if (ncorners != 2 && ncorners != 4) - return (PTA *)ERROR_PTR("ncorners not 2 or 4", procName, NULL); - - if ((pta = ptaCreate(ncorners)) == NULL) - return (PTA *)ERROR_PTR("pta not made", procName, NULL); - boxGetGeometry(box, &x, &y, &w, &h); - ptaAddPt(pta, x, y); - if (ncorners == 2) { - ptaAddPt(pta, x + w - 1, y + h - 1); - } else { - ptaAddPt(pta, x + w - 1, y); - ptaAddPt(pta, x, y + h - 1); - ptaAddPt(pta, x + w - 1, y + h - 1); - } - - return pta; -} - - -/*! - * \brief ptaConvertToBox() - * - * \param[in] pta - * \return box minimum containing all points in the pta, or NULL on error - * - *
- * Notes:
- *      (1) For 2 corners, the order of the 2 points is UL, LR.
- *          For 4 corners, the order of points is UL, UR, LL, LR.
- * 
- */ -BOX * -ptaConvertToBox(PTA *pta) -{ -l_int32 n, x1, y1, x2, y2, x3, y3, x4, y4, x, y, xmax, ymax; - - PROCNAME("ptaConvertToBox"); - - if (!pta) - return (BOX *)ERROR_PTR("pta not defined", procName, NULL); - n = ptaGetCount(pta); - ptaGetIPt(pta, 0, &x1, &y1); - ptaGetIPt(pta, 1, &x2, &y2); - if (n == 2) - return boxCreate(x1, y1, x2 - x1 + 1, y2 - y1 + 1); - - /* 4 corners */ - ptaGetIPt(pta, 2, &x3, &y3); - ptaGetIPt(pta, 3, &x4, &y4); - x = L_MIN(x1, x3); - y = L_MIN(y1, y2); - xmax = L_MAX(x2, x4); - ymax = L_MAX(y3, y4); - return boxCreate(x, y, xmax - x + 1, ymax - y + 1); -} - - -/*---------------------------------------------------------------------* - * Miscellaneous Boxa functions * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaGetExtent() - * - * \param[in] boxa - * \param[out] pw [optional] width - * \param[out] ph [optional] height - * \param[out] pbox [optional] minimum box containing all boxes in boxa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This computes the minimum rectangular bounding region
- *          that contains all valid boxes in a boxa.
- *      (2) The returned w and h are the minimum size image
- *          that would contain all boxes untranslated.
- *      (3) If there are no valid boxes, returned w and h are 0 and
- *          all parameters in the returned box are 0.  This
- *          is not an error, because an empty boxa is valid and
- *          boxaGetExtent() is required for serialization.
- * 
- */ -l_ok -boxaGetExtent(BOXA *boxa, - l_int32 *pw, - l_int32 *ph, - BOX **pbox) -{ -l_int32 i, n, x, y, w, h, xmax, ymax, xmin, ymin, found; - - PROCNAME("boxaGetExtent"); - - if (!pw && !ph && !pbox) - return ERROR_INT("no ptrs defined", procName, 1); - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pbox) *pbox = NULL; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - n = boxaGetCount(boxa); - xmax = ymax = 0; - xmin = ymin = 100000000; - found = FALSE; - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, &x, &y, &w, &h); - if (w <= 0 || h <= 0) - continue; - found = TRUE; - xmin = L_MIN(xmin, x); - ymin = L_MIN(ymin, y); - xmax = L_MAX(xmax, x + w); - ymax = L_MAX(ymax, y + h); - } - if (found == FALSE) /* no valid boxes in boxa */ - xmin = ymin = 0; - if (pw) *pw = xmax; - if (ph) *ph = ymax; - if (pbox) - *pbox = boxCreate(xmin, ymin, xmax - xmin, ymax - ymin); - - return 0; -} - - -/*! - * \brief boxaGetCoverage() - * - * \param[in] boxa - * \param[in] wc, hc dimensions of overall clipping rectangle with UL - * corner at (0, 0 that is covered by the boxes. - * \param[in] exactflag 1 for guaranteeing an exact result; 0 for getting - * an exact result only if the boxes do not overlap - * \param[out] pfract sum of box area as fraction of w * h - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The boxes in boxa are clipped to the input rectangle.
- *      (2) * When %exactflag == 1, we generate a 1 bpp pix of size
- *            wc x hc, paint all the boxes black, and count the fg pixels.
- *            This can take 1 msec on a large page with many boxes.
- *          * When %exactflag == 0, we clip each box to the wc x hc region
- *            and sum the resulting areas.  This is faster.
- *          * The results are the same when none of the boxes overlap
- *            within the wc x hc region.
- * 
- */ -l_ok -boxaGetCoverage(BOXA *boxa, - l_int32 wc, - l_int32 hc, - l_int32 exactflag, - l_float32 *pfract) -{ -l_int32 i, n, x, y, w, h, sum; -BOX *box, *boxc; -PIX *pixt; - - PROCNAME("boxaGetCoverage"); - - if (!pfract) - return ERROR_INT("&fract not defined", procName, 1); - *pfract = 0.0; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - n = boxaGetCount(boxa); - if (n == 0) - return ERROR_INT("no boxes in boxa", procName, 1); - - if (exactflag == 0) { /* quick and dirty */ - sum = 0; - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - if ((boxc = boxClipToRectangle(box, wc, hc)) != NULL) { - boxGetGeometry(boxc, NULL, NULL, &w, &h); - sum += w * h; - boxDestroy(&boxc); - } - boxDestroy(&box); - } - } else { /* slower and exact */ - pixt = pixCreate(wc, hc, 1); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - boxGetGeometry(box, &x, &y, &w, &h); - pixRasterop(pixt, x, y, w, h, PIX_SET, NULL, 0, 0); - boxDestroy(&box); - } - pixCountPixels(pixt, &sum, NULL); - pixDestroy(&pixt); - } - - *pfract = (l_float32)sum / (l_float32)(wc * hc); - return 0; -} - - -/*! - * \brief boxaaSizeRange() - * - * \param[in] baa - * \param[out] pminw [optional] min width of all boxes - * \param[out] pmaxw [optional] max width of all boxes - * \param[out] pminh [optional] min height of all boxes - * \param[out] pmaxh [optional] max height of all boxes - * \return 0 if OK, 1 on error - */ -l_ok -boxaaSizeRange(BOXAA *baa, - l_int32 *pminw, - l_int32 *pminh, - l_int32 *pmaxw, - l_int32 *pmaxh) -{ -l_int32 minw, minh, maxw, maxh, minbw, minbh, maxbw, maxbh, i, n; -BOXA *boxa; - - PROCNAME("boxaaSizeRange"); - - if (!pminw && !pmaxw && !pminh && !pmaxh) - return ERROR_INT("no data can be returned", procName, 1); - if (pminw) *pminw = 0; - if (pminh) *pminh = 0; - if (pmaxw) *pmaxw = 0; - if (pmaxh) *pmaxh = 0; - if (!baa) - return ERROR_INT("baa not defined", procName, 1); - - minw = minh = 100000000; - maxw = maxh = 0; - n = boxaaGetCount(baa); - for (i = 0; i < n; i++) { - boxa = boxaaGetBoxa(baa, i, L_CLONE); - boxaSizeRange(boxa, &minbw, &minbh, &maxbw, &maxbh); - if (minbw < minw) - minw = minbw; - if (minbh < minh) - minh = minbh; - if (maxbw > maxw) - maxw = maxbw; - if (maxbh > maxh) - maxh = maxbh; - boxaDestroy(&boxa); - } - - if (pminw) *pminw = minw; - if (pminh) *pminh = minh; - if (pmaxw) *pmaxw = maxw; - if (pmaxh) *pmaxh = maxh; - return 0; -} - - -/*! - * \brief boxaSizeRange() - * - * \param[in] boxa - * \param[out] pminw [optional] min width of all boxes - * \param[out] pmaxw [optional] max width of all boxes - * \param[out] pminh [optional] min height of all boxes - * \param[out] pmaxh [optional] max height of all boxes - * \return 0 if OK, 1 on error - */ -l_ok -boxaSizeRange(BOXA *boxa, - l_int32 *pminw, - l_int32 *pminh, - l_int32 *pmaxw, - l_int32 *pmaxh) -{ -l_int32 minw, minh, maxw, maxh, i, n, w, h; - - PROCNAME("boxaSizeRange"); - - if (!pminw && !pmaxw && !pminh && !pmaxh) - return ERROR_INT("no data can be returned", procName, 1); - if (pminw) *pminw = 0; - if (pminh) *pminh = 0; - if (pmaxw) *pmaxw = 0; - if (pmaxh) *pmaxh = 0; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - minw = minh = 100000000; - maxw = maxh = 0; - n = boxaGetCount(boxa); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h); - if (w < minw) - minw = w; - if (h < minh) - minh = h; - if (w > maxw) - maxw = w; - if (h > maxh) - maxh = h; - } - - if (pminw) *pminw = minw; - if (pminh) *pminh = minh; - if (pmaxw) *pmaxw = maxw; - if (pmaxh) *pmaxh = maxh; - return 0; -} - - -/*! - * \brief boxaLocationRange() - * - * \param[in] boxa - * \param[out] pminx [optional] min (UL corner) x value of all boxes - * \param[out] pminy [optional] min (UL corner) y value of all boxes - * \param[out] pmaxx [optional] max (UL corner) x value of all boxes - * \param[out] pmaxy [optional] max (UL corner) y value of all boxes - * \return 0 if OK, 1 on error - */ -l_ok -boxaLocationRange(BOXA *boxa, - l_int32 *pminx, - l_int32 *pminy, - l_int32 *pmaxx, - l_int32 *pmaxy) -{ -l_int32 minx, miny, maxx, maxy, i, n, x, y; - - PROCNAME("boxaLocationRange"); - - if (!pminx && !pminy && !pmaxx && !pmaxy) - return ERROR_INT("no data can be returned", procName, 1); - if (pminx) *pminx = 0; - if (pminy) *pminy = 0; - if (pmaxx) *pmaxx = 0; - if (pmaxy) *pmaxy = 0; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - minx = miny = 100000000; - maxx = maxy = 0; - n = boxaGetCount(boxa); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, &x, &y, NULL, NULL); - if (x < minx) - minx = x; - if (y < miny) - miny = y; - if (x > maxx) - maxx = x; - if (y > maxy) - maxy = y; - } - - if (pminx) *pminx = minx; - if (pminy) *pminy = miny; - if (pmaxx) *pmaxx = maxx; - if (pmaxy) *pmaxy = maxy; - - return 0; -} - - -/*! - * \brief boxaGetSizes() - * - * \param[in] boxa - * \param[out] pnaw [optional] widths of valid boxes - * \param[out] pnah [optional] heights of valid boxes - * \return 0 if OK, 1 on error - */ -l_ok -boxaGetSizes(BOXA *boxa, - NUMA **pnaw, - NUMA **pnah) -{ -l_int32 i, n, w, h; -BOX *box; - - PROCNAME("boxaGetSizes"); - - if (pnaw) *pnaw = NULL; - if (pnah) *pnah = NULL; - if (!pnaw && !pnah) - return ERROR_INT("no output requested", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - n = boxaGetValidCount(boxa); - if (pnaw) *pnaw = numaCreate(n); - if (pnah) *pnah = numaCreate(n); - for (i = 0; i < n; i++) { - box = boxaGetValidBox(boxa, i, L_COPY); - if (box) { - boxGetGeometry(box, NULL, NULL, &w, &h); - if (pnaw) numaAddNumber(*pnaw, w); - if (pnah) numaAddNumber(*pnah, h); - boxDestroy(&box); - } - } - - return 0; -} - - -/*! - * \brief boxaGetArea() - * - * \param[in] boxa - * \param[out] parea total area of all boxes - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Measures the total area of the boxes, without regard to overlaps.
- * 
- */ -l_ok -boxaGetArea(BOXA *boxa, - l_int32 *parea) -{ -l_int32 i, n, w, h; - - PROCNAME("boxaGetArea"); - - if (!parea) - return ERROR_INT("&area not defined", procName, 1); - *parea = 0; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - n = boxaGetCount(boxa); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h); - *parea += w * h; - } - return 0; -} - - -/*! - * \brief boxaDisplayTiled() - * - * \param[in] boxas - * \param[in] pixa [optional] background for each box - * \param[in] first index of first box - * \param[in] last index of last box; use -1 to go to end - * \param[in] maxwidth of output image - * \param[in] linewidth width of box outlines, before scaling - * \param[in] scalefactor applied to every box; use 1.0 for no scaling - * \param[in] background 0 for white, 1 for black; this is the color - * of the spacing between the images - * \param[in] spacing between images, and on outside - * \param[in] border width of black border added to each image; - * use 0 for no border - * \return pixd of tiled images of boxes, or NULL on error - * - *
- * Notes:
- *      (1) Displays each box separately in a tiled 32 bpp image.
- *      (2) If pixa is defined, it must have the same count as the boxa,
- *          and it will be a background over with each box is rendered.
- *          If pixa is not defined, the boxes will be rendered over
- *          blank images of identical size.
- *      (3) See pixaDisplayTiledInRows() for other parameters.
- * 
- */ -PIX * -boxaDisplayTiled(BOXA *boxas, - PIXA *pixa, - l_int32 first, - l_int32 last, - l_int32 maxwidth, - l_int32 linewidth, - l_float32 scalefactor, - l_int32 background, - l_int32 spacing, - l_int32 border) -{ -char buf[32]; -l_int32 i, n, npix, w, h, fontsize; -L_BMF *bmf; -BOX *box; -BOXA *boxa; -PIX *pix1, *pix2, *pixd; -PIXA *pixat; - - PROCNAME("boxaDisplayTiled"); - - if (!boxas) - return (PIX *)ERROR_PTR("boxas not defined", procName, NULL); - - boxa = boxaSaveValid(boxas, L_COPY); - n = boxaGetCount(boxa); - if (pixa) { - npix = pixaGetCount(pixa); - if (n != npix) { - boxaDestroy(&boxa); - return (PIX *)ERROR_PTR("boxa and pixa counts differ", - procName, NULL); - } - } - first = L_MAX(0, first); - if (last < 0) last = n - 1; - if (first >= n) { - boxaDestroy(&boxa); - return (PIX *)ERROR_PTR("invalid first", procName, NULL); - } - if (last >= n) { - L_WARNING("last = %d is beyond max index = %d; adjusting\n", - procName, last, n - 1); - last = n - 1; - } - if (first > last) { - boxaDestroy(&boxa); - return (PIX *)ERROR_PTR("first > last", procName, NULL); - } - - /* Because the bitmap font will be reduced when tiled, choose the - * font size inversely with the scale factor. */ - if (scalefactor > 0.8) - fontsize = 6; - else if (scalefactor > 0.6) - fontsize = 10; - else if (scalefactor > 0.4) - fontsize = 14; - else if (scalefactor > 0.3) - fontsize = 18; - else fontsize = 20; - bmf = bmfCreate(NULL, fontsize); - - pixat = pixaCreate(n); - boxaGetExtent(boxa, &w, &h, NULL); - for (i = first; i <= last; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - if (!pixa) { - pix1 = pixCreate(w, h, 32); - pixSetAll(pix1); - } else { - pix1 = pixaGetPix(pixa, i, L_COPY); - } - pixSetBorderVal(pix1, 0, 0, 0, 2, 0x0000ff00); - snprintf(buf, sizeof(buf), "%d", i); - pix2 = pixAddSingleTextblock(pix1, bmf, buf, 0x00ff0000, - L_ADD_BELOW, NULL); - pixDestroy(&pix1); - pixRenderBoxArb(pix2, box, linewidth, 255, 0, 0); - pixaAddPix(pixat, pix2, L_INSERT); - boxDestroy(&box); - } - bmfDestroy(&bmf); - boxaDestroy(&boxa); - - pixd = pixaDisplayTiledInRows(pixat, 32, maxwidth, scalefactor, background, - spacing, border); - pixaDestroy(&pixat); - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxfunc5.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxfunc5.c deleted file mode 100644 index d0ca60d0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/boxfunc5.c +++ /dev/null @@ -1,2214 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file boxfunc5.c - *
- *
- *      Boxa sequence fitting
- *           BOXA     *boxaSmoothSequenceMedian()
- *           BOXA     *boxaWindowedMedian()
- *           BOXA     *boxaModifyWithBoxa()
- *           BOXA     *boxaConstrainSize()
- *           BOXA     *boxaReconcileEvenOddHeight()
- *    static l_int32   boxaTestEvenOddHeight()
- *           BOXA     *boxaReconcilePairWidth()
- *           l_int32   boxaSizeConsistency1()
- *           l_int32   boxaSizeConsistency2()
- *           BOXA     *boxaReconcileAllByMedian()
- *           BOXA     *boxaReconcileSidesByMedian()
- *    static void      adjustSidePlotName()  -- debug
- *           BOXA     *boxaReconcileSizeByMedian()
- *           l_int32   boxaPlotSides()   [for debugging]
- *           l_int32   boxaPlotSizes()   [for debugging]
- *           BOXA     *boxaFillSequence()
- *    static l_int32   boxaFillAll()
- *           l_int32   boxaSizeVariation()
- *           l_int32   boxaMedianDimensions()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static l_int32 boxaTestEvenOddHeight(BOXA *boxa1, BOXA *boxa2, l_int32 start, - l_float32 *pdel1, l_float32 *pdel2); -static l_int32 boxaFillAll(BOXA *boxa); - -static void adjustSidePlotName(char *buf, size_t size, const char *preface, - l_int32 select); - -/*---------------------------------------------------------------------* - * Boxa sequence fitting * - *---------------------------------------------------------------------*/ -/*! - * \brief boxaSmoothSequenceMedian() - * - * \param[in] boxas source boxa - * \param[in] halfwin half-width of sliding window; used to find median - * \param[in] subflag L_USE_MINSIZE, L_USE_MAXSIZE, - * L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF, - * L_USE_CAPPED_MIN, L_USE_CAPPED_MAX - * \param[in] maxdiff parameter used with L_SUB_ON_LOC_DIFF, - * L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MIN, - * L_USE_CAPPED_MAX - * \param[in] extrapixels pixels added on all sides (or subtracted - * if %extrapixels < 0) when using - * L_SUB_ON_LOC_DIFF and L_SUB_ON_SIZE_DIFF - * \param[in] debug 1 for debug output - * \return boxad fitted boxa, or NULL on error - * - *
- * Notes:
- *      (1) The target width of the sliding window is 2 * %halfwin + 1.
- *          If necessary, this will be reduced by boxaWindowedMedian().
- *      (2) This returns a modified version of %boxas by constructing
- *          for each input box a box that has been smoothed with windowed
- *          median filtering.  The filtering is done to each of the
- *          box sides independently, and it is computed separately for
- *          sequences of even and odd boxes.  The output %boxad is
- *          constructed from the input boxa and the filtered boxa,
- *          depending on %subflag.  See boxaModifyWithBoxa() for
- *          details on the use of %subflag, %maxdiff and %extrapixels.
- *      (3) This is useful for removing noise separately in the even
- *          and odd sets, where the box edge locations can have
- *          discontinuities but otherwise vary roughly linearly within
- *          intervals of size %halfwin or larger.
- *      (4) If you don't need to handle even and odd sets separately,
- *          just do this:
- *              boxam = boxaWindowedMedian(boxas, halfwin, debug);
- *              boxad = boxaModifyWithBoxa(boxas, boxam, subflag, maxdiff,
- *                                         extrapixels);
- *              boxaDestroy(&boxam);
- * 
- */ -BOXA * -boxaSmoothSequenceMedian(BOXA *boxas, - l_int32 halfwin, - l_int32 subflag, - l_int32 maxdiff, - l_int32 extrapixels, - l_int32 debug) -{ -l_int32 n; -BOXA *boxae, *boxao, *boxamede, *boxamedo, *boxame, *boxamo, *boxad; -PIX *pix1; - - PROCNAME("boxaSmoothSequenceMedian"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (halfwin <= 0) { - L_WARNING("halfwin must be > 0; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (maxdiff < 0) { - L_WARNING("maxdiff must be >= 0; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (subflag != L_USE_MINSIZE && subflag != L_USE_MAXSIZE && - subflag != L_SUB_ON_LOC_DIFF && subflag != L_SUB_ON_SIZE_DIFF && - subflag != L_USE_CAPPED_MIN && subflag != L_USE_CAPPED_MAX) { - L_WARNING("invalid subflag; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - if ((n = boxaGetCount(boxas)) < 6) { - L_WARNING("need at least 6 boxes; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - - boxaSplitEvenOdd(boxas, 0, &boxae, &boxao); - if (debug) { - lept_mkdir("lept/smooth"); - boxaWriteDebug("/tmp/lept/smooth/boxae.ba", boxae); - boxaWriteDebug("/tmp/lept/smooth/boxao.ba", boxao); - } - - boxamede = boxaWindowedMedian(boxae, halfwin, debug); - boxamedo = boxaWindowedMedian(boxao, halfwin, debug); - if (debug) { - boxaWriteDebug("/tmp/lept/smooth/boxamede.ba", boxamede); - boxaWriteDebug("/tmp/lept/smooth/boxamedo.ba", boxamedo); - } - - boxame = boxaModifyWithBoxa(boxae, boxamede, subflag, maxdiff, extrapixels); - boxamo = boxaModifyWithBoxa(boxao, boxamedo, subflag, maxdiff, extrapixels); - if (debug) { - boxaWriteDebug("/tmp/lept/smooth/boxame.ba", boxame); - boxaWriteDebug("/tmp/lept/smooth/boxamo.ba", boxamo); - } - - boxad = boxaMergeEvenOdd(boxame, boxamo, 0); - if (debug) { - boxaPlotSides(boxas, NULL, NULL, NULL, NULL, NULL, &pix1); - pixWrite("/tmp/lept/smooth/plotsides1.png", pix1, IFF_PNG); - pixDestroy(&pix1); - boxaPlotSides(boxad, NULL, NULL, NULL, NULL, NULL, &pix1); - pixWrite("/tmp/lept/smooth/plotsides2.png", pix1, IFF_PNG); - pixDestroy(&pix1); - boxaPlotSizes(boxas, NULL, NULL, NULL, &pix1); - pixWrite("/tmp/lept/smooth/plotsizes1.png", pix1, IFF_PNG); - pixDestroy(&pix1); - boxaPlotSizes(boxad, NULL, NULL, NULL, &pix1); - pixWrite("/tmp/lept/smooth/plotsizes2.png", pix1, IFF_PNG); - pixDestroy(&pix1); - } - - boxaDestroy(&boxae); - boxaDestroy(&boxao); - boxaDestroy(&boxamede); - boxaDestroy(&boxamedo); - boxaDestroy(&boxame); - boxaDestroy(&boxamo); - return boxad; -} - - -/*! - * \brief boxaWindowedMedian() - * - * \param[in] boxas source boxa - * \param[in] halfwin half width of window over which the median is found - * \param[in] debug 1 for debug output - * \return boxad smoothed boxa, or NULL on error - * - *
- * Notes:
- *      (1) This finds a set of boxes (boxad) where each edge of each box is
- *          a windowed median smoothed value to the edges of the
- *          input set of boxes (boxas).
- *      (2) Invalid input boxes are filled from nearby ones.
- *      (3) The returned boxad can then be used in boxaModifyWithBoxa()
- *          to selectively change the boxes in the source boxa.
- * 
- */ -BOXA * -boxaWindowedMedian(BOXA *boxas, - l_int32 halfwin, - l_int32 debug) -{ -l_int32 n, i, left, top, right, bot; -BOX *box; -BOXA *boxaf, *boxad; -NUMA *nal, *nat, *nar, *nab, *naml, *namt, *namr, *namb; -PIX *pix1; - - PROCNAME("boxaWindowedMedian"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if ((n = boxaGetCount(boxas)) < 3) { - L_WARNING("less than 3 boxes; returning a copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (halfwin <= 0) { - L_WARNING("halfwin must be > 0; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - - /* Fill invalid boxes in the input sequence */ - if ((boxaf = boxaFillSequence(boxas, L_USE_ALL_BOXES, debug)) == NULL) - return (BOXA *)ERROR_PTR("filled boxa not made", procName, NULL); - - /* Get the windowed median output from each of the sides */ - boxaExtractAsNuma(boxaf, &nal, &nat, &nar, &nab, NULL, NULL, 0); - naml = numaWindowedMedian(nal, halfwin); - namt = numaWindowedMedian(nat, halfwin); - namr = numaWindowedMedian(nar, halfwin); - namb = numaWindowedMedian(nab, halfwin); - - n = boxaGetCount(boxaf); - boxad = boxaCreate(n); - for (i = 0; i < n; i++) { - numaGetIValue(naml, i, &left); - numaGetIValue(namt, i, &top); - numaGetIValue(namr, i, &right); - numaGetIValue(namb, i, &bot); - box = boxCreate(left, top, right - left + 1, bot - top + 1); - boxaAddBox(boxad, box, L_INSERT); - } - - if (debug) { - lept_mkdir("lept/windowed"); - boxaPlotSides(boxaf, NULL, NULL, NULL, NULL, NULL, &pix1); - pixWrite("/tmp/lept/windowed/plotsides1.png", pix1, IFF_PNG); - pixDestroy(&pix1); - boxaPlotSides(boxad, NULL, NULL, NULL, NULL, NULL, &pix1); - pixWrite("/tmp/lept/windowed/plotsides2.png", pix1, IFF_PNG); - pixDestroy(&pix1); - boxaPlotSizes(boxaf, NULL, NULL, NULL, &pix1); - pixWrite("/tmp/lept/windowed/plotsizes1.png", pix1, IFF_PNG); - pixDestroy(&pix1); - boxaPlotSizes(boxad, NULL, NULL, NULL, &pix1); - pixWrite("/tmp/lept/windowed/plotsizes2.png", pix1, IFF_PNG); - pixDestroy(&pix1); - } - - boxaDestroy(&boxaf); - numaDestroy(&nal); - numaDestroy(&nat); - numaDestroy(&nar); - numaDestroy(&nab); - numaDestroy(&naml); - numaDestroy(&namt); - numaDestroy(&namr); - numaDestroy(&namb); - return boxad; -} - - -/*! - * \brief boxaModifyWithBoxa() - * - * \param[in] boxas - * \param[in] boxam boxa with boxes used to modify those in boxas - * \param[in] subflag L_USE_MINSIZE, L_USE_MAXSIZE, - * L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF, - * L_USE_CAPPED_MIN, L_USE_CAPPED_MAX - * \param[in] maxdiff parameter used with L_SUB_ON_LOC_DIFF, - * L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MIN, - * L_USE_CAPPED_MAX - * \param[in] extrapixels pixels added on all sides (or subtracted - * if %extrapixels < 0) when using - * L_SUB_ON_LOC_DIFF and L_SUB_ON_SIZE_DIFF - * \return boxad result after adjusting boxes in boxas, or NULL on error. - * - *
- * Notes:
- *      (1) This takes two input boxa (boxas, boxam) and constructs boxad,
- *          where each box in boxad is generated from the corresponding
- *          boxes in boxas and boxam.  The rule for constructing each
- *          output box depends on %subflag and %maxdiff.  Let boxs be
- *          a box from %boxas and boxm be a box from %boxam.
- *          * If %subflag == L_USE_MINSIZE: the output box is the intersection
- *            of the two input boxes.
- *          * If %subflag == L_USE_MAXSIZE: the output box is the union of the
- *            two input boxes; i.e., the minimum bounding rectangle for the
- *            two input boxes.
- *          * If %subflag == L_SUB_ON_LOC_DIFF: each side of the output box
- *            is found separately from the corresponding side of boxs and boxm.
- *            Use the boxm side, expanded by %extrapixels, if greater than
- *            %maxdiff pixels from the boxs side.
- *          * If %subflag == L_SUB_ON_SIZE_DIFF: the sides of the output box
- *            are determined in pairs from the width and height of boxs
- *            and boxm.  If the boxm width differs by more than %maxdiff
- *            pixels from boxs, use the boxm left and right sides,
- *            expanded by %extrapixels.  Ditto for the height difference.
- *          For the last two flags, each side of the output box is found
- *          separately from the corresponding side of boxs and boxm,
- *          according to these rules, where "smaller"("bigger") mean in a
- *          direction that decreases(increases) the size of the output box:
- *          * If %subflag == L_USE_CAPPED_MIN: use the Min of boxm
- *            with the Max of (boxs, boxm +- %maxdiff), where the sign
- *            is adjusted to make the box smaller (e.g., use "+" on left side).
- *          * If %subflag == L_USE_CAPPED_MAX: use the Max of boxm
- *            with the Min of (boxs, boxm +- %maxdiff), where the sign
- *            is adjusted to make the box bigger (e.g., use "-" on left side).
- *          Use of the last 2 flags is further explained in (3) and (4).
- *      (2) boxas and boxam must be the same size.  If boxam == NULL,
- *          this returns a copy of boxas with a warning.
- *      (3) If %subflag == L_SUB_ON_LOC_DIFF, use boxm for each side
- *          where the corresponding sides differ by more than %maxdiff.
- *          Two extreme cases:
- *          (a) set %maxdiff == 0 to use only values from boxam in boxad.
- *          (b) set %maxdiff == 10000 to ignore all values from boxam;
- *              then boxad will be the same as boxas.
- *      (4) If %subflag == L_USE_CAPPED_MAX: use boxm if boxs is smaller;
- *          use boxs if boxs is bigger than boxm by an amount up to %maxdiff;
- *          and use boxm +- %maxdiff (the 'capped' value) if boxs is
- *          bigger than boxm by an amount larger than %maxdiff.
- *          Similarly, with interchange of Min/Max and sign of %maxdiff,
- *          for %subflag == L_USE_CAPPED_MIN.
- *      (5) If either of corresponding boxes in boxas and boxam is invalid,
- *          an invalid box is copied to the result.
- *      (6) Typical input for boxam may be the output of boxaLinearFit().
- *          where outliers have been removed and each side is LS fit to a line.
- *      (7) Unlike boxaAdjustWidthToTarget() and boxaAdjustHeightToTarget(),
- *          this uses two boxes and does not specify target dimensions.
- *          Additional constraints on the size of each box can be enforced
- *          by following this operation with boxaConstrainSize(), taking
- *          boxad as input.
- * 
- */ -BOXA * -boxaModifyWithBoxa(BOXA *boxas, - BOXA *boxam, - l_int32 subflag, - l_int32 maxdiff, - l_int32 extrapixels) -{ -l_int32 n, i, ls, ts, rs, bs, ws, hs, lm, tm, rm, bm, wm, hm, ld, td, rd, bd; -BOX *boxs, *boxm, *boxd, *boxempty; -BOXA *boxad; - - PROCNAME("boxaModifyWithBoxa"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (!boxam) { - L_WARNING("boxam not defined; returning copy", procName); - return boxaCopy(boxas, L_COPY); - } - if (subflag != L_USE_MINSIZE && subflag != L_USE_MAXSIZE && - subflag != L_SUB_ON_LOC_DIFF && subflag != L_SUB_ON_SIZE_DIFF && - subflag != L_USE_CAPPED_MIN && subflag != L_USE_CAPPED_MAX) { - L_WARNING("invalid subflag; returning copy", procName); - return boxaCopy(boxas, L_COPY); - } - n = boxaGetCount(boxas); - if (n != boxaGetCount(boxam)) { - L_WARNING("boxas and boxam sizes differ; returning copy", procName); - return boxaCopy(boxas, L_COPY); - } - - boxad = boxaCreate(n); - boxempty = boxCreate(0, 0, 0, 0); /* placeholders */ - for (i = 0; i < n; i++) { - boxs = boxaGetValidBox(boxas, i, L_CLONE); - boxm = boxaGetValidBox(boxam, i, L_CLONE); - if (!boxs || !boxm) { - boxaAddBox(boxad, boxempty, L_COPY); - } else { - boxGetGeometry(boxs, &ls, &ts, &ws, &hs); - boxGetGeometry(boxm, &lm, &tm, &wm, &hm); - rs = ls + ws - 1; - bs = ts + hs - 1; - rm = lm + wm - 1; - bm = tm + hm - 1; - if (subflag == L_USE_MINSIZE) { - ld = L_MAX(ls, lm); - rd = L_MIN(rs, rm); - td = L_MAX(ts, tm); - bd = L_MIN(bs, bm); - } else if (subflag == L_USE_MAXSIZE) { - ld = L_MIN(ls, lm); - rd = L_MAX(rs, rm); - td = L_MIN(ts, tm); - bd = L_MAX(bs, bm); - } else if (subflag == L_SUB_ON_LOC_DIFF) { - ld = (L_ABS(lm - ls) <= maxdiff) ? ls : lm - extrapixels; - td = (L_ABS(tm - ts) <= maxdiff) ? ts : tm - extrapixels; - rd = (L_ABS(rm - rs) <= maxdiff) ? rs : rm + extrapixels; - bd = (L_ABS(bm - bs) <= maxdiff) ? bs : bm + extrapixels; - } else if (subflag == L_SUB_ON_SIZE_DIFF) { - ld = (L_ABS(wm - ws) <= maxdiff) ? ls : lm - extrapixels; - td = (L_ABS(hm - hs) <= maxdiff) ? ts : tm - extrapixels; - rd = (L_ABS(wm - ws) <= maxdiff) ? rs : rm + extrapixels; - bd = (L_ABS(hm - hs) <= maxdiff) ? bs : bm + extrapixels; - } else if (subflag == L_USE_CAPPED_MIN) { - ld = L_MAX(lm, L_MIN(ls, lm + maxdiff)); - td = L_MAX(tm, L_MIN(ts, tm + maxdiff)); - rd = L_MIN(rm, L_MAX(rs, rm - maxdiff)); - bd = L_MIN(bm, L_MAX(bs, bm - maxdiff)); - } else { /* subflag == L_USE_CAPPED_MAX */ - ld = L_MIN(lm, L_MAX(ls, lm - maxdiff)); - td = L_MIN(tm, L_MAX(ts, tm - maxdiff)); - rd = L_MAX(rm, L_MIN(rs, rm + maxdiff)); - bd = L_MAX(bm, L_MIN(bs, bm + maxdiff)); - } - boxd = boxCreate(ld, td, rd - ld + 1, bd - td + 1); - boxaAddBox(boxad, boxd, L_INSERT); - } - boxDestroy(&boxs); - boxDestroy(&boxm); - } - boxDestroy(&boxempty); - - return boxad; -} - - -/*! - * \brief boxaConstrainSize() - * - * \param[in] boxas - * \param[in] width force width of all boxes to this size; - * input 0 to use the median width - * \param[in] widthflag L_ADJUST_SKIP, L_ADJUST_LEFT, L_ADJUST_RIGHT, - * or L_ADJUST_LEFT_AND_RIGHT - * \param[in] height force height of all boxes to this size; - * input 0 to use the median height - * \param[in] heightflag L_ADJUST_SKIP, L_ADJUST_TOP, L_ADJUST_BOT, - * or L_ADJUST_TOP_AND_BOT - * \return boxad adjusted so all boxes are the same size - * - *
- * Notes:
- *      (1) Forces either width or height (or both) of every box in
- *          the boxa to a specified size, by moving the indicated sides.
- *      (2) Not all input boxes need to be valid.  Median values will be
- *          used with invalid boxes.
- *      (3) Typical input might be the output of boxaLinearFit(),
- *          where each side has been fit.
- *      (4) Unlike boxaAdjustWidthToTarget() and boxaAdjustHeightToTarget(),
- *          this is not dependent on a difference threshold to change the size.
- *      (5) On error, a message is issued and a copy of the input boxa
- *          is returned.
- * 
- */ -BOXA * -boxaConstrainSize(BOXA *boxas, - l_int32 width, - l_int32 widthflag, - l_int32 height, - l_int32 heightflag) -{ -l_int32 n, i, x, y, w, h, invalid; -l_int32 delw, delh, del_left, del_right, del_top, del_bot; -BOX *medbox, *boxs, *boxd; -BOXA *boxad; - - PROCNAME("boxaConstrainSize"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - - /* Need median values if requested or if there are invalid boxes */ - invalid = boxaGetCount(boxas) - boxaGetValidCount(boxas); - medbox = NULL; - if (width == 0 || height == 0 || invalid > 0) { - if (boxaGetMedianVals(boxas, &x, &y, NULL, NULL, &w, &h)) { - L_ERROR("median vals not returned", procName); - return boxaCopy(boxas, L_COPY); - } - medbox = boxCreate(x, y, w, h); - if (width == 0) width = w; - if (height == 0) height = h; - } - - n = boxaGetCount(boxas); - boxad = boxaCreate(n); - for (i = 0; i < n; i++) { - if ((boxs = boxaGetValidBox(boxas, i, L_COPY)) == NULL) - boxs = boxCopy(medbox); - boxGetGeometry(boxs, NULL, NULL, &w, &h); - delw = width - w; - delh = height - h; - del_left = del_right = del_top = del_bot = 0; - if (widthflag == L_ADJUST_LEFT) { - del_left = -delw; - } else if (widthflag == L_ADJUST_RIGHT) { - del_right = delw; - } else { - del_left = -delw / 2; - del_right = delw / 2 + L_SIGN(delw) * (delw & 1); - } - if (heightflag == L_ADJUST_TOP) { - del_top = -delh; - } else if (heightflag == L_ADJUST_BOT) { - del_bot = delh; - } else { - del_top = -delh / 2; - del_bot = delh / 2 + L_SIGN(delh) * (delh & 1); - } - boxd = boxAdjustSides(NULL, boxs, del_left, del_right, - del_top, del_bot); - boxaAddBox(boxad, boxd, L_INSERT); - boxDestroy(&boxs); - } - - boxDestroy(&medbox); - return boxad; -} - - -/*! - * \brief boxaReconcileEvenOddHeight() - * - * \param[in] boxas containing at least 3 valid boxes in even and odd - * \param[in] sides L_ADJUST_TOP, L_ADJUST_BOT, L_ADJUST_TOP_AND_BOT - * \param[in] delh threshold on median height difference - * \param[in] op L_ADJUST_CHOOSE_MIN, L_ADJUST_CHOOSE_MAX - * \param[in] factor > 0.0, typically near 1.0 - * \param[in] start 0 if pairing (0,1), etc; 1 if pairing (1,2), etc - * \return boxad adjusted, or a copy of boxas on error - * - *
- * Notes:
- *      (1) The basic idea is to reconcile differences in box height
- *          in the even and odd boxes, by moving the top and/or bottom
- *          edges in the even and odd boxes.  Choose the edge or edges
- *          to be moved, whether to adjust the boxes with the min
- *          or the max of the medians, and the threshold on the median
- *          difference between even and odd box heights for the operations
- *          to take place.  The same threshold is also used to
- *          determine if each individual box edge is to be adjusted.
- *      (2) Boxes are conditionally reset with either the same top (y)
- *          value or the same bottom value, or both.  The value is
- *          determined by the greater or lesser of the medians of the
- *          even and odd boxes, with the choice depending on the value
- *          of %op, which selects for either min or max median height.
- *          If the median difference between even and odd boxes is
- *          greater than %dely, then any individual box edge that differs
- *          from the selected median by more than %dely is set to
- *          the selected median times a factor typically near 1.0.
- *      (3) Note that if selecting for minimum height, you will choose
- *          the largest y-value for the top and the smallest y-value for
- *          the bottom of the box.
- *      (4) Typical input might be the output of boxaSmoothSequenceMedian(),
- *          where even and odd boxa have been independently regulated.
- *      (5) Require at least 3 valid even boxes and 3 valid odd boxes.
- *          Median values will be used for invalid boxes.
- *      (6) If the median height is not representative of the boxes
- *          in %boxas, this can make things much worse.  In that case,
- *          ignore the value of %op, and force pairwise equality of the
- *          heights, with pairwise maximal vertical extension.
- * 
- */ -BOXA * -boxaReconcileEvenOddHeight(BOXA *boxas, - l_int32 sides, - l_int32 delh, - l_int32 op, - l_float32 factor, - l_int32 start) -{ -l_int32 n, he, ho, hmed, doeven; -l_float32 del1, del2; -BOXA *boxae, *boxao, *boxa1e, *boxa1o, *boxad; - - PROCNAME("boxaReconcileEvenOddHeight"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (sides != L_ADJUST_TOP && sides != L_ADJUST_BOT && - sides != L_ADJUST_TOP_AND_BOT) { - L_WARNING("no action requested; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - if ((n = boxaGetValidCount(boxas)) < 6) { - L_WARNING("need at least 6 valid boxes; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (factor <= 0.0) { - L_WARNING("invalid factor; setting to 1.0\n", procName); - factor = 1.0; - } - - /* Require at least 3 valid boxes of both types */ - boxaSplitEvenOdd(boxas, 0, &boxae, &boxao); - if (boxaGetValidCount(boxae) < 3 || boxaGetValidCount(boxao) < 3) { - boxaDestroy(&boxae); - boxaDestroy(&boxao); - return boxaCopy(boxas, L_COPY); - } - - /* Get the median heights for each set */ - boxaGetMedianVals(boxae, NULL, NULL, NULL, NULL, NULL, &he); - boxaGetMedianVals(boxao, NULL, NULL, NULL, NULL, NULL, &ho); - L_INFO("median he = %d, median ho = %d\n", procName, he, ho); - - /* If the difference in median height reaches the threshold %delh, - * only adjust the side(s) of one of the sets. If we choose - * the minimum median height as the target, allow the target - * to be scaled by a factor, typically near 1.0, of the - * minimum median height. And similarly if the target is - * the maximum median height. */ - if (L_ABS(he - ho) > delh) { - if (op == L_ADJUST_CHOOSE_MIN) { - doeven = (ho < he) ? TRUE : FALSE; - hmed = (l_int32)(factor * L_MIN(he, ho)); - hmed = L_MIN(hmed, L_MAX(he, ho)); /* don't make it bigger! */ - } else { /* max height */ - doeven = (ho > he) ? TRUE : FALSE; - hmed = (l_int32)(factor * L_MAX(he, ho)); - hmed = L_MAX(hmed, L_MIN(he, ho)); /* don't make it smaller! */ - } - if (doeven) { - boxa1e = boxaAdjustHeightToTarget(NULL, boxae, sides, hmed, delh); - boxa1o = boxaCopy(boxao, L_COPY); - } else { /* !doeven */ - boxa1e = boxaCopy(boxae, L_COPY); - boxa1o = boxaAdjustHeightToTarget(NULL, boxao, sides, hmed, delh); - } - } else { - boxa1e = boxaCopy(boxae, L_CLONE); - boxa1o = boxaCopy(boxao, L_CLONE); - } - boxaDestroy(&boxae); - boxaDestroy(&boxao); - - /* It can happen that the median is not a good measure for an - * entire book. In that case, the reconciliation above can do - * more harm than good. Sanity check by comparing height and y - * differences of adjacent even/odd boxes, before and after - * reconciliation. */ - boxad = boxaMergeEvenOdd(boxa1e, boxa1o, 0); - boxaTestEvenOddHeight(boxas, boxad, start, &del1, &del2); - boxaDestroy(&boxa1e); - boxaDestroy(&boxa1o); - if (del2 < del1 + 10.) - return boxad; - - /* Using the median made it worse. Skip reconciliation: - * forcing all pairs of top and bottom values to have - * maximum extent does not improve the situation either. */ - L_INFO("Got worse: del2 = %f > del1 = %f\n", procName, del2, del1); - boxaDestroy(&boxad); - return boxaCopy(boxas, L_COPY); -} - - -/*! - * \brief boxaTestEvenOddHeight() - * - * \param[in] boxa1 input boxa 1 - * \param[in] boxa2 input boxa 2 - * \param[in] start 0 if pairing (0,1), etc; 1 if pairing (1,2), etc - * \param[out] pdel1 root mean of (dely^2 + delh^2 for boxa1 - * \param[out] pdel2 root mean of (dely^2 + delh^2 for boxa2 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This compares differences in the y location and height of
- *          adjacent boxes, in each of the input boxa.
- * 
- */ -static l_int32 -boxaTestEvenOddHeight(BOXA *boxa1, - BOXA *boxa2, - l_int32 start, - l_float32 *pdel1, - l_float32 *pdel2) -{ -l_int32 i, n, npairs, y1a, y1b, y2a, y2b, h1a, h1b, h2a, h2b; -l_float32 del1, del2; - - PROCNAME("boxaTestEvenOddHeight"); - - if (pdel1) *pdel1 = 0.0; - if (pdel2) *pdel2 = 0.0; - if (!pdel1 || !pdel2) - return ERROR_INT("&del1 and &del2 not both defined", procName, 1); - if (!boxa1 || !boxa2) - return ERROR_INT("boxa1 and boxa2 not both defined", procName, 1); - n = L_MIN(boxaGetCount(boxa1), boxaGetCount(boxa2)); - - /* For boxa1 and boxa2 separately, we expect the y and h values - * to be similar for adjacent boxes. Get a measure of similarity - * by finding the sum of squares of differences between - * y values and between h values, and adding them. */ - del1 = del2 = 0.0; - npairs = (n - start) / 2; - for (i = start; i < 2 * npairs; i += 2) { - boxaGetBoxGeometry(boxa1, i, NULL, &y1a, NULL, &h1a); - boxaGetBoxGeometry(boxa1, i + 1, NULL, &y1b, NULL, &h1b); - del1 += (l_float32)(y1a - y1b) * (y1a - y1b) - + (h1a - h1b) * (h1a - h1b); - boxaGetBoxGeometry(boxa2, i, NULL, &y2a, NULL, &h2a); - boxaGetBoxGeometry(boxa2, i + 1, NULL, &y2b, NULL, &h2b); - del2 += (l_float32)(y2a - y2b) * (y2a - y2b) - + (h2a - h2b) * (h2a - h2b); - } - - /* Get the root of the average of the sum of square differences */ - *pdel1 = (l_float32)sqrt((l_float64)del1 / (0.5 * n)); - *pdel2 = (l_float32)sqrt((l_float64)del2 / (0.5 * n)); - return 0; -} - - -/*! - * \brief boxaReconcilePairWidth() - * - * \param[in] boxas - * \param[in] delw threshold on adjacent width difference - * \param[in] op L_ADJUST_CHOOSE_MIN, L_ADJUST_CHOOSE_MAX - * \param[in] factor > 0.0, typically near 1.0 - * \param[in] na [optional] indicator array allowing change - * \return boxad adjusted, or a copy of boxas on error - * - *
- * Notes:
- *      (1) This reconciles differences in the width of adjacent boxes,
- *          by moving one side of one of the boxes in each pair.
- *          If the widths in the pair differ by more than some
- *          threshold, move either the left side for even boxes or
- *          the right side for odd boxes, depending on if we're choosing
- *          the min or max.  If choosing min, the width of the max is
- *          set to factor * (width of min).  If choosing max, the width
- *          of the min is set to factor * (width of max).
- *      (2) If %na exists, it is an indicator array corresponding to the
- *          boxes in %boxas.  If %na != NULL, only boxes with an
- *          indicator value of 1 are allowed to adjust; otherwise,
- *          all boxes can adjust.
- *      (3) Typical input might be the output of boxaSmoothSequenceMedian(),
- *          where even and odd boxa have been independently regulated.
- * 
- */ -BOXA * -boxaReconcilePairWidth(BOXA *boxas, - l_int32 delw, - l_int32 op, - l_float32 factor, - NUMA *na) -{ -l_int32 i, ne, no, nmin, xe, we, xo, wo, inde, indo, x, w; -BOX *boxe, *boxo; -BOXA *boxae, *boxao, *boxad; - - PROCNAME("boxaReconcilePairWidth"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (factor <= 0.0) { - L_WARNING("invalid factor; setting to 1.0\n", procName); - factor = 1.0; - } - - /* Taking the boxes in pairs, if the difference in width reaches - * the threshold %delw, adjust the left or right side of one - * of the pair. */ - boxaSplitEvenOdd(boxas, 0, &boxae, &boxao); - ne = boxaGetCount(boxae); - no = boxaGetCount(boxao); - nmin = L_MIN(ne, no); - for (i = 0; i < nmin; i++) { - /* Set indicator values */ - if (na) { - numaGetIValue(na, 2 * i, &inde); - numaGetIValue(na, 2 * i + 1, &indo); - } else { - inde = indo = 1; - } - if (inde == 0 && indo == 0) continue; - - boxe = boxaGetBox(boxae, i, L_CLONE); - boxo = boxaGetBox(boxao, i, L_CLONE); - boxGetGeometry(boxe, &xe, NULL, &we, NULL); - boxGetGeometry(boxo, &xo, NULL, &wo, NULL); - if (we == 0 || wo == 0) { /* if either is invalid; skip */ - boxDestroy(&boxe); - boxDestroy(&boxo); - continue; - } else if (L_ABS(we - wo) > delw) { - if (op == L_ADJUST_CHOOSE_MIN) { - if (we > wo && inde == 1) { - /* move left side of even to the right */ - w = factor * wo; - x = xe + (we - w); - boxSetGeometry(boxe, x, -1, w, -1); - } else if (we < wo && indo == 1) { - /* move right side of odd to the left */ - w = factor * we; - boxSetGeometry(boxo, -1, -1, w, -1); - } - } else { /* maximize width */ - if (we < wo && inde == 1) { - /* move left side of even to the left */ - w = factor * wo; - x = L_MAX(0, xe + (we - w)); - w = we + (xe - x); /* covers both cases for the max */ - boxSetGeometry(boxe, x, -1, w, -1); - } else if (we > wo && indo == 1) { - /* move right side of odd to the right */ - w = factor * we; - boxSetGeometry(boxo, -1, -1, w, -1); - } - } - } - boxDestroy(&boxe); - boxDestroy(&boxo); - } - - boxad = boxaMergeEvenOdd(boxae, boxao, 0); - boxaDestroy(&boxae); - boxaDestroy(&boxao); - return boxad; -} - - -/*! - * \brief boxaSizeConsistency1() - * - * \param[in] boxas of size >= 10 - * \param[in] type L_CHECK_WIDTH, L_CHECK_HEIGHT - * \param[in] threshp threshold for pairwise fractional variation - * \param[in] threshm threshold for fractional variation from median - * \param[out] pfvarp [optional] average fractional pairwise variation - * \param[out] pfvarm [optional] average fractional median variation - * \param[out] psame decision for uniformity of page size (1, 0, -1) - * - *
- * Notes:
- *      (1) This evaluates a boxa for particular types of dimensional
- *          variation.  Select either width or height variation.  Then
- *          it returns two numbers: one is based on pairwise (even/odd)
- *          variation; the other is based on the average variation
- *          from the boxa median.
- *      (2) For the pairwise variation, get the fraction of the absolute
- *          difference in dimension of each pair of boxes, and take
- *          the average value.  The median variation is simply the
- *          the average of the fractional deviation from the median
- *          of all the boxes.
- *      (3) Use 0 for default values of %threshp and %threshm.  They are
- *            threshp:  0.02
- *            threshm:  0.015
- *      (4) The intended application is that the boxes are a sequence of
- *          page regions in a book scan, and we calculate two numbers
- *          that can give an indication if the pages are approximately
- *          the same size.  The pairwise variation should be small if
- *          the boxes are correctly calculated.  If there are a
- *          significant number of random or systematic outliers, the
- *          pairwise variation will be large, and no decision will be made
- *          (i.e., return same == -1).  Here are the possible outcomes:
- *            Pairwise Var    Median Var    Decision
- *            ------------    ----------    --------
- *            small           small         same size  (1)
- *            small           large         different size  (0)
- *            large           small/large   unknown   (-1)
- * 
- */ -l_ok -boxaSizeConsistency1(BOXA *boxas, - l_int32 type, - l_float32 threshp, - l_float32 threshm, - l_float32 *pfvarp, - l_float32 *pfvarm, - l_int32 *psame) -{ -l_int32 i, n, bw1, bh1, bw2, bh2, npairs; -l_float32 ave, fdiff, sumdiff, med, fvarp, fvarm; -NUMA *na1; - - PROCNAME("boxaSizeConsistency1"); - - if (pfvarp) *pfvarp = 0.0; - if (pfvarm) *pfvarm = 0.0; - if (!psame) - return ERROR_INT("&same not defined", procName, 1); - *psame = -1; - if (!boxas) - return ERROR_INT("boxas not defined", procName, 1); - if (boxaGetValidCount(boxas) < 6) - return ERROR_INT("need a least 6 valid boxes", procName, 1); - if (type != L_CHECK_WIDTH && type != L_CHECK_HEIGHT) - return ERROR_INT("invalid type", procName, 1); - if (threshp < 0.0 || threshp >= 0.5) - return ERROR_INT("invalid threshp", procName, 1); - if (threshm < 0.0 || threshm >= 0.5) - return ERROR_INT("invalid threshm", procName, 1); - if (threshp == 0.0) threshp = 0.02; - if (threshm == 0.0) threshm = 0.015; - - /* Evaluate pairwise variation */ - n = boxaGetCount(boxas); - na1 = numaCreate(0); - for (i = 0, npairs = 0, sumdiff = 0; i < n - 1; i += 2) { - boxaGetBoxGeometry(boxas, i, NULL, NULL, &bw1, &bh1); - boxaGetBoxGeometry(boxas, i + 1, NULL, NULL, &bw2, &bh2); - if (bw1 == 0 || bh1 == 0 || bw2 == 0 || bh2 == 0) - continue; - npairs++; - if (type == L_CHECK_WIDTH) { - ave = (bw1 + bw2) / 2.0; - fdiff = L_ABS(bw1 - bw2) / ave; - numaAddNumber(na1, bw1); - numaAddNumber(na1, bw2); - } else { /* type == L_CHECK_HEIGHT) */ - ave = (bh1 + bh2) / 2.0; - fdiff = L_ABS(bh1 - bh2) / ave; - numaAddNumber(na1, bh1); - numaAddNumber(na1, bh2); - } - sumdiff += fdiff; - } - fvarp = sumdiff / npairs; - if (pfvarp) *pfvarp = fvarp; - - /* Evaluate the average abs fractional deviation from the median */ - numaGetMedian(na1, &med); - if (med == 0.0) { - L_WARNING("median value is 0\n", procName); - } else { - numaGetMeanDevFromMedian(na1, med, &fvarm); - fvarm /= med; - if (pfvarm) *pfvarm = fvarm; - } - numaDestroy(&na1); - - /* Make decision */ - if (fvarp < threshp && fvarm < threshm) - *psame = 1; - else if (fvarp < threshp && fvarm > threshm) - *psame = 0; - else - *psame = -1; /* unknown */ - return 0; -} - - -/*! - * \brief boxaSizeConsistency2() - * - * \param[in] boxas of size >= 10 - * \param[out] pfdevw average fractional deviation from median width - * \param[out] pfdevh average fractional deviation from median height - * \param[in] debug 1 for debug plot output of input and regularized - * width and height - * - *
- * Notes:
- *      (1) This evaluates a boxa for consistency of the box sizes.
- *          The intended application is that the boxes are a sequence of
- *          page regions in a book scan, and the output is a decision
- *          about whether the pages should be approximately the same size.
- *          The determination should be robust to outliers, both random
- *          and (for many cases) systematic.
- *      (2) This differs from boxaSizeConsistency1() in that it attempts
- *          to correct for box dimensional errors before doing the
- *          evaluation.  For this reason, it may be less robust.
- *      (3) Adjacent even and odd boxes are expected to be the same size.
- *          Take them pairwise, and assume the minimum height, hmin,
- *          is correct.  Then for (the usual case) wmin/hmin > 0.5, assume
- *          the minimum width is correct.  If wmin/hmin <= 0.5, assume
- *          the maximum width is correct.
- *      (4) After correcting each pair so that they are the same size,
- *          compute the average fractional deviation, from median width and
- *          height.  A deviation of width or height by more than about
- *          0.02 is evidence that the boxes may be from a non-homogeneous
- *          source, such as a book with significantly different page sizes.
- * 
- */ -l_ok -boxaSizeConsistency2(BOXA *boxas, - l_float32 *pfdevw, - l_float32 *pfdevh, - l_int32 debug) -{ -l_int32 i, n, bw1, bh1, bw2, bh2, npairs; -l_float32 medw, medh, devw, devh, minw, maxw, minh, w; -BOX *box; -BOXA *boxa1; -NUMA *naw, *nah; -PIX *pix1, *pix2, *pix3; -PIXA *pixa; - - PROCNAME("boxaSizeConsistency2"); - - if (pfdevw) *pfdevw = 0.0; - if (pfdevh) *pfdevh = 0.0; - if (!boxas) - return ERROR_INT("boxas not defined", procName, 1); - if (!pfdevw || !pfdevh) - return ERROR_INT("&fdevw and &fdevh not both defined", procName, 1); - n = boxaGetCount(boxas); - if (n < 10) { - L_WARNING("small boxa; assuming OK", procName); - return 0; - } - - /* Regularize w and h in pairs; skip last box if n is odd */ - boxa1 = (debug) ? boxaCreate(n) : NULL; - naw = numaCreate(0); - nah = numaCreate(0); - for (i = 0, npairs = 0; i < n - 1; i += 2) { - boxaGetBoxGeometry(boxas, i, NULL, NULL, &bw1, &bh1); - boxaGetBoxGeometry(boxas, i + 1, NULL, NULL, &bw2, &bh2); - if (bw1 == 0 || bh1 == 0 || bw2 == 0 || bh2 == 0) - continue; - npairs++; - minw = (l_float32)L_MIN(bw1, bw2); - maxw = (l_float32)L_MAX(bw1, bw2); - minh = (l_float32)L_MIN(bh1, bh2); - w = (minw / minh > 0.5) ? minw : maxw; - numaAddNumber(naw, w); - numaAddNumber(nah, minh); - if (debug) { - box = boxCreate(0, 0, w, minh); - boxaAddBox(boxa1, box, L_COPY); - boxaAddBox(boxa1, box, L_INSERT); - } - } - if (npairs == 0) { - L_WARNING("no valid box pairs\n", procName); - numaDestroy(&naw); - numaDestroy(&nah); - boxaDestroy(&boxa1); - } - - /* Get the median value of the regularized sizes, and find - * the average absolute fractional deviation from the median. */ - numaGetMedian(naw, &medw); - numaGetMedian(nah, &medh); - numaGetMeanDevFromMedian(naw, medw, &devw); - numaGetMeanDevFromMedian(nah, medh, &devh); - *pfdevw = devw / medw; - *pfdevh = devh / medh; - if (debug) { - lept_stderr("medw = %5.1f, medh = %5.1f\n", medw, medh); - lept_stderr("fdevw = %6.3f, fdevh = %6.3f\n", *pfdevw, *pfdevh); - boxaPlotSizes(boxas, "input_boxa", NULL, NULL, &pix1); - boxaPlotSizes(boxa1, "regularized_boxa", NULL, NULL, &pix2); - pixDisplay(pix1, 500, 0); - pixDisplay(pix2, 500, 1000); - pixa = pixaCreate(2); - pixaAddPix(pixa, pix1, L_INSERT); - pixaAddPix(pixa, pix2, L_INSERT); - pix3 = pixaDisplayTiledInColumns(pixa, 2, 1.0, 3, 2); - lept_mkdir("lept/boxa"); - pixWrite("/tmp/lept/boxa/eval.png", pix3, IFF_PNG); - pixDisplay(pix3, 100, 100); - pixDestroy(&pix3); - pixaDestroy(&pixa); - boxaDestroy(&boxa1); - } - - numaDestroy(&naw); - numaDestroy(&nah); - return 0; -} - - -/*! - * \brief boxaReconcileAllByMedian() - * - * \param[in] boxas containing at least 6 valid boxes - * \param[in] select1 L_ADJUST_LEFT_AND_RIGHT or L_ADJUST_SKIP - * \param[in] select2 L_ADJUST_TOP_AND_BOT or L_ADJUST_SKIP - * \param[in] thresh threshold number of pixels to make adjustment - * \param[in] extra extra pixels to add beyond median value - * \param[in] pixadb use NULL to skip debug output - * \return boxad possibly adjusted from boxas; a copy of boxas on error - * - *
- * Notes:
- *      (1) This uses boxaReconcileSidesByMedian() to reconcile
- *          the left-and-right and/or top-and-bottom sides of the
- *          even and odd boxes, separately.
- *      (2) See boxaReconcileSidesByMedian() for use of %thresh and %extra.
- *      (3) If all box sides are within %thresh of the median value,
- *          the returned box will be identical to %boxas.
- * 
- */ -BOXA * -boxaReconcileAllByMedian(BOXA *boxas, - l_int32 select1, - l_int32 select2, - l_int32 thresh, - l_int32 extra, - PIXA *pixadb) - { -l_int32 i, n, diff, ncols; -l_int32 left, right, top, bot, medleft, medright, medtop, medbot; -BOX *box; -BOXA *boxa1e, *boxa1o, *boxa2e, *boxa2o, *boxa3e, *boxa3o, *boxad; -PIX *pix1; - - PROCNAME("boxaReconcileAllByMedian"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (select1 != L_ADJUST_LEFT_AND_RIGHT && select1 != L_ADJUST_SKIP) { - L_WARNING("invalid select1; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (select2 != L_ADJUST_TOP_AND_BOT && select2 != L_ADJUST_SKIP) { - L_WARNING("invalid select2; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (thresh < 0) { - L_WARNING("thresh must be >= 0; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (boxaGetValidCount(boxas) < 3) { - L_WARNING("need at least 3 valid boxes; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - - /* Adjust even and odd box sides separately */ - boxaSplitEvenOdd(boxas, 0, &boxa1e, &boxa1o); - ncols = 1; - if (select1 == L_ADJUST_LEFT_AND_RIGHT) { - ncols += 2; - boxa2e = boxaReconcileSidesByMedian(boxa1e, select1, thresh, - extra, pixadb); - } else { - boxa2e = boxaCopy(boxa1e, L_COPY); - } - if (select2 == L_ADJUST_TOP_AND_BOT) { - ncols += 2; - boxa3e = boxaReconcileSidesByMedian(boxa2e, select2, thresh, - extra, pixadb); - } else { - boxa3e = boxaCopy(boxa2e, L_COPY); - } - if (select1 == L_ADJUST_LEFT_AND_RIGHT) - boxa2o = boxaReconcileSidesByMedian(boxa1o, select1, thresh, - extra, pixadb); - else - boxa2o = boxaCopy(boxa1o, L_COPY); - if (select2 == L_ADJUST_TOP_AND_BOT) - boxa3o = boxaReconcileSidesByMedian(boxa2o, select2, thresh, - extra, pixadb); - else - boxa3o = boxaCopy(boxa2o, L_COPY); - boxad = boxaMergeEvenOdd(boxa3e, boxa3o, 0); - - /* This generates 2 sets of 3 or 5 plots in a row, depending - * on whether select1 and select2 are true (not skipping). - * The top row is for even boxes; the bottom row is for odd boxes. */ - if (pixadb) { - lept_mkdir("lept/boxa"); - pix1 = pixaDisplayTiledInColumns(pixadb, ncols, 1.0, 30, 2); - pixWrite("/tmp/lept/boxa/recon_sides.png", pix1, IFF_PNG); - pixDestroy(&pix1); - } - - boxaDestroy(&boxa1e); - boxaDestroy(&boxa1o); - boxaDestroy(&boxa2e); - boxaDestroy(&boxa2o); - boxaDestroy(&boxa3e); - boxaDestroy(&boxa3o); - return boxad; -} - - -/*! - * \brief boxaReconcileSidesByMedian() - * - * \param[in] boxas containing at least 3 valid boxes - * \param[in] select L_ADJUST_LEFT, L_ADJUST_RIGHT, etc. - * \param[in] thresh threshold number of pixels to make adjustment - * \param[in] extra extra pixels to add beyond median value - * \param[in] pixadb use NULL to skip debug output - * \return boxad possibly adjusted from boxas; a copy of boxas on error - * - *
- * Notes:
- *      (1) This modifies individual box sides if their location differs
- *          significantly (>= %thresh) from the median value.
- *      (2) %select specifies which sides are to be checked.
- *      (3) %thresh specifies the tolerance for different side locations.
- *          Any box side that differs from the median by this much will
- *          be set to the median value, plus the %extra amount.
- *      (4) If %extra is positive, the box dimensions are expanded.
- *          For example, for the left side, a positive %extra results in
- *          moving the left side farther to the left (i.e., in a negative
- *          direction).
- *      (5) If all box sides are within %thresh - 1 of the median value,
- *          the returned box will be identical to %boxas.
- *      (6) N.B. If you expect that even and odd box sides should be
- *          significantly different, this function must be called separately
- *          on the even and odd boxes in %boxas.  Note also that the
- *          higher level function boxaReconcileAllByMedian() handles the
- *          even and odd box sides separately.
- * 
- */ -BOXA * -boxaReconcileSidesByMedian(BOXA *boxas, - l_int32 select, - l_int32 thresh, - l_int32 extra, - PIXA *pixadb) - { -char buf[128]; -l_int32 i, n, diff; -l_int32 left, right, top, bot, medleft, medright, medtop, medbot; -BOX *box; -BOXA *boxa1, *boxad; -PIX *pix; - - PROCNAME("boxaReconcileSidesByMedian"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (select != L_ADJUST_LEFT && select != L_ADJUST_RIGHT && - select != L_ADJUST_TOP && select != L_ADJUST_BOT && - select != L_ADJUST_LEFT_AND_RIGHT && select != L_ADJUST_TOP_AND_BOT) { - L_WARNING("invalid select; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (thresh < 0) { - L_WARNING("thresh must be >= 0; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (boxaGetValidCount(boxas) < 3) { - L_WARNING("need at least 3 valid boxes; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - - if (select == L_ADJUST_LEFT_AND_RIGHT) { - boxa1 = boxaReconcileSidesByMedian(boxas, L_ADJUST_LEFT, thresh, extra, - pixadb); - boxad = boxaReconcileSidesByMedian(boxa1, L_ADJUST_RIGHT, thresh, extra, - pixadb); - boxaDestroy(&boxa1); - return boxad; - } - if (select == L_ADJUST_TOP_AND_BOT) { - boxa1 = boxaReconcileSidesByMedian(boxas, L_ADJUST_TOP, thresh, extra, - pixadb); - boxad = boxaReconcileSidesByMedian(boxa1, L_ADJUST_BOT, thresh, extra, - pixadb); - boxaDestroy(&boxa1); - return boxad; - } - - if (pixadb) { - l_int32 ndb = pixaGetCount(pixadb); - if (ndb == 0 || ndb == 5) { /* first of even and odd box sets */ - adjustSidePlotName(buf, sizeof(buf), "init", select); - boxaPlotSides(boxas, buf, NULL, NULL, NULL, NULL, &pix); - pixaAddPix(pixadb, pix, L_INSERT); - } - } - - n = boxaGetCount(boxas); - boxad = boxaCreate(n); - if (select == L_ADJUST_LEFT) { - boxaGetMedianVals(boxas, &medleft, NULL, NULL, NULL, NULL, NULL); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxas, i, L_COPY); - boxGetSideLocations(box, &left, NULL, NULL, NULL); - diff = medleft - left; - if (L_ABS(diff) >= thresh) - boxAdjustSides(box, box, diff - extra, 0, 0, 0); - boxaAddBox(boxad, box, L_INSERT); - } - } else if (select == L_ADJUST_RIGHT) { - boxaGetMedianVals(boxas, NULL, NULL, &medright, NULL, NULL, NULL); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxas, i, L_COPY); - boxGetSideLocations(box, NULL, &right, NULL, NULL); - diff = medright - right; - if (L_ABS(diff) >= thresh) - boxAdjustSides(box, box, 0, diff + extra, 0, 0); - boxaAddBox(boxad, box, L_INSERT); - } - } else if (select == L_ADJUST_TOP) { - boxaGetMedianVals(boxas, NULL, &medtop, NULL, NULL, NULL, NULL); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxas, i, L_COPY); - boxGetSideLocations(box, NULL, NULL, &top, NULL); - diff = medtop - top; - if (L_ABS(diff) >= thresh) - boxAdjustSides(box, box, 0, 0, diff - extra, 0); - boxaAddBox(boxad, box, L_INSERT); - } - } else { /* select == L_ADJUST_BOT */ - boxaGetMedianVals(boxas, NULL, NULL, NULL, &medbot, NULL, NULL); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxas, i, L_COPY); - boxGetSideLocations(box, NULL, NULL, NULL, &bot); - diff = medbot - bot; - if (L_ABS(diff) >= thresh) - boxAdjustSides(box, box, 0, 0, 0, diff + extra); - boxaAddBox(boxad, box, L_INSERT); - } - } - - if (pixadb) { - adjustSidePlotName(buf, sizeof(buf), "final", select); - boxaPlotSides(boxad, buf, NULL, NULL, NULL, NULL, &pix); - pixaAddPix(pixadb, pix, L_INSERT); - } - return boxad; -} - - -static void -adjustSidePlotName(char *buf, - size_t size, - const char *preface, - l_int32 select) -{ - stringCopy(buf, preface, size - 8); - if (select == L_ADJUST_LEFT) - stringCat(buf, size, "-left"); - else if (select == L_ADJUST_RIGHT) - stringCat(buf, size, "-right"); - else if (select == L_ADJUST_TOP) - stringCat(buf, size, "-top"); - else if (select == L_ADJUST_BOT) - stringCat(buf, size, "-bot"); -} - - -/*! - * \brief boxaReconcileSizeByMedian() - * - * \param[in] boxas containing at least 6 valid boxes - * \param[in] type L_CHECK_WIDTH, L_CHECK_HEIGHT, L_CHECK_BOTH - * \param[in] dfract threshold fraction of dimensional variation from - * median; in range (0 ... 1); typ. about 0.05. - * \param[in] sfract threshold fraction of side variation from median; - * in range (0 ... 1); typ. about 0.04. - * \param[in] factor expansion for fixed box beyond median width; - * should be near 1.0. - * \param[out] pnadelw [optional] diff from median width for boxes - * above threshold - * \param[out] pnadelh [optional] diff from median height for boxes - * above threshold - * \param[out] pratiowh [optional] ratio of median width/height of boxas - * \return boxad possibly adjusted from boxas; a copy of boxas on error - * - *
- * Notes:
- *      (1) The basic idea is to identify significant differences in box
- *          dimension (either width or height) and modify the outlier boxes.
- *      (2) %type specifies if we are reconciling the width, height or both.
- *      (3) %dfract specifies the tolerance for different dimensions. Any
- *          box with a fractional difference from the median size that
- *          exceeds %dfract will be altered.
- *      (4) %sfract specifies the tolerance for different side locations.
- *          If a box has been marked by (3) for alteration, any side
- *          location that differs from the median side location by
- *          more than %sfract of the median dimension (medw or medh)
- *          will be moved.
- *      (5) Median width and height are found for all valid boxes (i.e.,
- *          for all boxes with width and height > 0.
- *          Median side locations are found separately for even and odd boxes,
- *          using only boxes that are "inliers"; i.e., that have been
- *          found by (3) to be within tolerance for width or height.
- *      (6) If all box dimensions are within threshold of the median size,
- *          just return a copy.  Otherwise, box sides of the outliers
- *          will be adjusted.
- *      (7) Using %sfract, sides that are sufficiently far from the median
- *          are first moved to the median value.  Then they are moved
- *          together (in or out) so that the final box dimension
- *          is %factor times the median dimension.
- *      (8) The arrays that are the initial deviation from median size
- *          (width and height) are optionally returned.  Also optionally
- *          returned is the median w/h asperity ratio of the input %boxas.
- * 
- */ -BOXA * -boxaReconcileSizeByMedian(BOXA *boxas, - l_int32 type, - l_float32 dfract, - l_float32 sfract, - l_float32 factor, - NUMA **pnadelw, - NUMA **pnadelh, - l_float32 *pratiowh) -{ -l_int32 i, n, ne, no, outfound, isvalid, ind, del, maxdel; -l_int32 medw, medh, bw, bh, left, right, top, bot; -l_int32 medleft, medlefte, medlefto, medright, medrighte, medrighto; -l_int32 medtop, medtope, medtopo, medbot, medbote, medboto; -l_float32 brat; -BOX *box; -BOXA *boxa1, *boxae, *boxao, *boxad; -NUMA *naind, *nadelw, *nadelh; - - PROCNAME("boxaReconcileSizeByMedian"); - - if (pnadelw) *pnadelw = NULL; - if (pnadelh) *pnadelh = NULL; - if (pratiowh) *pratiowh = 0.0; - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (type != L_CHECK_WIDTH && type != L_CHECK_HEIGHT && - type != L_CHECK_BOTH) { - L_WARNING("invalid type; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (dfract <= 0.0 || dfract >= 0.5) { - L_WARNING("invalid dimensional fract; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (sfract <= 0.0 || sfract >= 0.5) { - L_WARNING("invalid side fract; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - if (factor < 0.8 || factor > 1.25) - L_WARNING("factor %5.3f is typ. closer to 1.0\n", procName, factor); - if (boxaGetValidCount(boxas) < 6) { - L_WARNING("need at least 6 valid boxes; returning copy\n", procName); - return boxaCopy(boxas, L_COPY); - } - - /* If reconciling both width and height, optionally return array of - * median deviations and even/odd ratio for width measurements */ - if (type == L_CHECK_BOTH) { - boxa1 = boxaReconcileSizeByMedian(boxas, L_CHECK_WIDTH, dfract, sfract, - factor, pnadelw, NULL, pratiowh); - boxad = boxaReconcileSizeByMedian(boxa1, L_CHECK_HEIGHT, dfract, sfract, - factor, NULL, pnadelh, NULL); - boxaDestroy(&boxa1); - return boxad; - } - - n = boxaGetCount(boxas); - naind = numaCreate(n); /* outlier indicator array */ - boxae = boxaCreate(0); /* even inliers */ - boxao = boxaCreate(0); /* odd inliers */ - outfound = FALSE; - if (type == L_CHECK_WIDTH) { - boxaMedianDimensions(boxas, &medw, &medh, NULL, NULL, NULL, NULL, - &nadelw, NULL); - if (pratiowh) { - *pratiowh = (l_float32)medw / (l_float32)medh; - L_INFO("median ratio w/h = %5.3f\n", procName, *pratiowh); - } - if (pnadelw) - *pnadelw = nadelw; - else - numaDestroy(&nadelw); - - /* Check for outliers; assemble inliers */ - for (i = 0; i < n; i++) { - if ((box = boxaGetValidBox(boxas, i, L_COPY)) == NULL) { - numaAddNumber(naind, 0); - continue; - } - boxGetGeometry(box, NULL, NULL, &bw, NULL); - brat = (l_float32)bw / (l_float32)medw; - if (brat < 1.0 - dfract || brat > 1.0 + dfract) { - outfound = TRUE; - numaAddNumber(naind, 1); - boxDestroy(&box); - } else { /* add to inliers */ - numaAddNumber(naind, 0); - if (i % 2 == 0) - boxaAddBox(boxae, box, L_INSERT); - else - boxaAddBox(boxao, box, L_INSERT); - } - } - if (!outfound) { /* nothing to do */ - numaDestroy(&naind); - boxaDestroy(&boxae); - boxaDestroy(&boxao); - L_INFO("no width outlier boxes found\n", procName); - return boxaCopy(boxas, L_COPY); - } - - /* Get left/right parameters from inliers. Handle the case - * where there are no inliers for one of the sets. For example, - * when all the even boxes have a different dimension from - * the odd boxes, and the median arbitrarily gets assigned - * to the even boxes, there are no odd inliers; in that case, - * use the even inliers sides to decide whether to adjust - * the left or the right sides of individual outliers. */ - L_INFO("fixing width of outlier boxes\n", procName); - medlefte = medrighte = medlefto = medrighto = 0; - if ((ne = boxaGetValidCount(boxae)) > 0) - boxaGetMedianVals(boxae, &medlefte, NULL, &medrighte, NULL, - NULL, NULL); - if ((no = boxaGetValidCount(boxao)) > 0) - boxaGetMedianVals(boxao, &medlefto, NULL, &medrighto, NULL, - NULL, NULL); - if (ne == 0) { /* use odd inliers values for both */ - medlefte = medlefto; - medrighte = medrighto; - } else if (no == 0) { /* use even inliers values for both */ - medlefto = medlefte; - medrighto = medrighte; - } - - /* Adjust the left and/or right sides of outliers. - * For each box that is a dimensional outlier, consider each side. - * Any side that differs fractionally from the median value - * by more than %sfract times the median width (medw) is set to - * the median value for that side. Then both sides are moved - * an equal distance in or out to make w = %factor * medw. */ - boxad = boxaCreate(n); - maxdel = (l_int32)(sfract * medw + 0.5); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxas, i, L_COPY); - boxIsValid(box, &isvalid); - numaGetIValue(naind, i, &ind); - medleft = (i % 2 == 0) ? medlefte : medlefto; - medright = (i % 2 == 0) ? medrighte : medrighto; - if (ind == 1 && isvalid) { /* adjust sides */ - boxGetSideLocations(box, &left, &right, NULL, NULL); - if (L_ABS(left - medleft) > maxdel) left = medleft; - if (L_ABS(right - medright) > maxdel) right = medright; - del = (l_int32)(factor * medw - (right - left)) / 2; - boxSetSide(box, L_SET_LEFT, left - del, 0); - boxSetSide(box, L_SET_RIGHT, right + del, 0); - } - boxaAddBox(boxad, box, L_INSERT); - } - } else { /* L_CHECK_HEIGHT */ - boxaMedianDimensions(boxas, &medw, &medh, NULL, NULL, NULL, NULL, - NULL, &nadelh); - if (pratiowh) { - *pratiowh = (l_float32)medw / (l_float32)medh; - L_INFO("median ratio w/h = %5.3f\n", procName, *pratiowh); - } - if (pnadelh) - *pnadelh = nadelh; - else - numaDestroy(&nadelh); - - /* Check for outliers; assemble inliers */ - for (i = 0; i < n; i++) { - if ((box = boxaGetValidBox(boxas, i, L_COPY)) == NULL) { - numaAddNumber(naind, 0); - continue; - } - boxGetGeometry(box, NULL, NULL, NULL, &bh); - brat = (l_float32)bh / (l_float32)medh; - if (brat < 1.0 - dfract || brat > 1.0 + dfract) { - outfound = TRUE; - numaAddNumber(naind, 1); - boxDestroy(&box); - } else { /* add to inliers */ - numaAddNumber(naind, 0); - if (i % 2 == 0) - boxaAddBox(boxae, box, L_INSERT); - else - boxaAddBox(boxao, box, L_INSERT); - } - } - if (!outfound) { /* nothing to do */ - numaDestroy(&naind); - boxaDestroy(&boxae); - boxaDestroy(&boxao); - L_INFO("no height outlier boxes found\n", procName); - return boxaCopy(boxas, L_COPY); - } - - /* Get top/bot parameters from inliers. Handle the case - * where there are no inliers for one of the sets. For example, - * when all the even boxes have a different dimension from - * the odd boxes, and the median arbitrarily gets assigned - * to the even boxes, there are no odd inliers; in that case, - * use the even inlier sides to decide whether to adjust - * the top or the bottom sides of individual outliers. */ - L_INFO("fixing height of outlier boxes\n", procName); - medlefte = medtope = medbote = medtopo = medboto = 0; - if ((ne = boxaGetValidCount(boxae)) > 0) - boxaGetMedianVals(boxae, NULL, &medtope, NULL, &medbote, - NULL, NULL); - if ((no = boxaGetValidCount(boxao)) > 0) - boxaGetMedianVals(boxao, NULL, &medtopo, NULL, &medboto, - NULL, NULL); - if (ne == 0) { /* use odd inliers values for both */ - medtope = medtopo; - medbote = medboto; - } else if (no == 0) { /* use even inliers values for both */ - medtopo = medtope; - medboto = medbote; - } - - /* Adjust the top and/or bottom sides of outliers. - * For each box that is a dimensional outlier, consider each side. - * Any side that differs fractionally from the median value - * by more than %sfract times the median height (medh) is - * set to the median value for that that side. Then both - * sides are moved an equal distance in or out to make - * h = %factor * medh). */ - boxad = boxaCreate(n); - maxdel = (l_int32)(sfract * medh + 0.5); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxas, i, L_COPY); - boxIsValid(box, &isvalid); - numaGetIValue(naind, i, &ind); - medtop = (i % 2 == 0) ? medtope : medtopo; - medbot = (i % 2 == 0) ? medbote : medboto; - if (ind == 1 && isvalid) { /* adjust sides */ - boxGetSideLocations(box, NULL, NULL, &top, &bot); - if (L_ABS(top - medtop) > maxdel) top = medtop; - if (L_ABS(bot - medbot) > maxdel) bot = medbot; - del = (l_int32)(factor * medh - (bot - top)) / 2; /* typ > 0 */ - boxSetSide(box, L_SET_TOP, L_MAX(0, top - del), 0); - boxSetSide(box, L_SET_BOT, bot + del, 0); - } - boxaAddBox(boxad, box, L_INSERT); - } - } - numaDestroy(&naind); - boxaDestroy(&boxae); - boxaDestroy(&boxao); - return boxad; -} - - -/*! - * \brief boxaPlotSides() - * - * \param[in] boxa source boxa - * \param[in] plotname [optional], can be NULL - * \param[out] pnal [optional] na of left sides - * \param[out] pnat [optional] na of top sides - * \param[out] pnar [optional] na of right sides - * \param[out] pnab [optional] na of bottom sides - * \param[out] ppixd pix of the output plot - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This debugging function shows the progression of the four
- *          sides in the boxa.  There must be at least 2 boxes.
- *      (2) If there are invalid boxes (e.g., if only even or odd
- *          indices have valid boxes), this will fill them with the
- *          nearest valid box before plotting.
- *      (3) The plotfiles are put in /tmp/lept/plots/, and are named
- *          either with %plotname or, if NULL, a default name.  If
- *          %plotname is used, make sure it has no whitespace characters.
- * 
- */ -l_ok -boxaPlotSides(BOXA *boxa, - const char *plotname, - NUMA **pnal, - NUMA **pnat, - NUMA **pnar, - NUMA **pnab, - PIX **ppixd) -{ -char buf[128], titlebuf[128]; -char *dataname; -static l_int32 plotid = 0; -l_int32 n, i, w, h, left, top, right, bot; -l_int32 debugprint = FALSE; /* change to TRUE to spam stderr */ -l_float32 med, dev; -BOXA *boxat; -GPLOT *gplot; -NUMA *nal, *nat, *nar, *nab; - - PROCNAME("boxaPlotSides"); - - if (pnal) *pnal = NULL; - if (pnat) *pnat = NULL; - if (pnar) *pnar = NULL; - if (pnab) *pnab = NULL; - if (ppixd) *ppixd = NULL; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if ((n = boxaGetCount(boxa)) < 2) - return ERROR_INT("less than 2 boxes", procName, 1); - if (!ppixd) - return ERROR_INT("&pixd not defined", procName, 1); - - boxat = boxaFillSequence(boxa, L_USE_ALL_BOXES, 0); - - /* Build the numas for each side */ - nal = numaCreate(n); - nat = numaCreate(n); - nar = numaCreate(n); - nab = numaCreate(n); - - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxat, i, &left, &top, &w, &h); - right = left + w - 1; - bot = top + h - 1; - numaAddNumber(nal, left); - numaAddNumber(nat, top); - numaAddNumber(nar, right); - numaAddNumber(nab, bot); - } - boxaDestroy(&boxat); - - lept_mkdir("lept/plots"); - if (plotname) { - snprintf(buf, sizeof(buf), "/tmp/lept/plots/sides.%s", plotname); - snprintf(titlebuf, sizeof(titlebuf), "%s: Box sides vs. box index", - plotname); - } else { - snprintf(buf, sizeof(buf), "/tmp/lept/plots/sides.%d", plotid++); - snprintf(titlebuf, sizeof(titlebuf), "Box sides vs. box index"); - } - gplot = gplotCreate(buf, GPLOT_PNG, titlebuf, - "box index", "side location"); - gplotAddPlot(gplot, NULL, nal, GPLOT_LINES, "left side"); - gplotAddPlot(gplot, NULL, nat, GPLOT_LINES, "top side"); - gplotAddPlot(gplot, NULL, nar, GPLOT_LINES, "right side"); - gplotAddPlot(gplot, NULL, nab, GPLOT_LINES, "bottom side"); - *ppixd = gplotMakeOutputPix(gplot); - gplotDestroy(&gplot); - - if (debugprint) { - dataname = (plotname) ? stringNew(plotname) : stringNew("no_name"); - numaGetMedian(nal, &med); - numaGetMeanDevFromMedian(nal, med, &dev); - lept_stderr("%s left: med = %7.3f, meandev = %7.3f\n", - dataname, med, dev); - numaGetMedian(nat, &med); - numaGetMeanDevFromMedian(nat, med, &dev); - lept_stderr("%s top: med = %7.3f, meandev = %7.3f\n", - dataname, med, dev); - numaGetMedian(nar, &med); - numaGetMeanDevFromMedian(nar, med, &dev); - lept_stderr("%s right: med = %7.3f, meandev = %7.3f\n", - dataname, med, dev); - numaGetMedian(nab, &med); - numaGetMeanDevFromMedian(nab, med, &dev); - lept_stderr("%s bot: med = %7.3f, meandev = %7.3f\n", - dataname, med, dev); - LEPT_FREE(dataname); - } - - if (pnal) - *pnal = nal; - else - numaDestroy(&nal); - if (pnat) - *pnat = nat; - else - numaDestroy(&nat); - if (pnar) - *pnar = nar; - else - numaDestroy(&nar); - if (pnab) - *pnab = nab; - else - numaDestroy(&nab); - return 0; -} - - -/*! - * \brief boxaPlotSizes() - * - * \param[in] boxa source boxa - * \param[in] plotname [optional], can be NULL - * \param[out] pnaw [optional] na of widths - * \param[out] pnah [optional] na of heights - * \param[out] ppixd pix of the output plot - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This debugging function shows the progression of box width
- *          and height in the boxa.  There must be at least 2 boxes.
- *      (2) If there are invalid boxes (e.g., if only even or odd
- *          indices have valid boxes), this will fill them with the
- *          nearest valid box before plotting.
- *      (3) The plotfiles are put in /tmp/lept/plots/, and are named
- *          either with %plotname or, if NULL, a default name.  If
- *          %plotname is used, make sure it has no whitespace characters.
- * 
- */ -l_ok -boxaPlotSizes(BOXA *boxa, - const char *plotname, - NUMA **pnaw, - NUMA **pnah, - PIX **ppixd) -{ -char buf[128], titlebuf[128]; -static l_int32 plotid = 0; -l_int32 n, i, w, h; -BOXA *boxat; -GPLOT *gplot; -NUMA *naw, *nah; - - PROCNAME("boxaPlotSizes"); - - if (pnaw) *pnaw = NULL; - if (pnah) *pnah = NULL; - if (ppixd) *ppixd = NULL; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if ((n = boxaGetCount(boxa)) < 2) - return ERROR_INT("less than 2 boxes", procName, 1); - if (!ppixd) - return ERROR_INT("&pixd not defined", procName, 1); - - boxat = boxaFillSequence(boxa, L_USE_ALL_BOXES, 0); - - /* Build the numas for the width and height */ - naw = numaCreate(n); - nah = numaCreate(n); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxat, i, NULL, NULL, &w, &h); - numaAddNumber(naw, w); - numaAddNumber(nah, h); - } - boxaDestroy(&boxat); - - lept_mkdir("lept/plots"); - if (plotname) { - snprintf(buf, sizeof(buf), "/tmp/lept/plots/size.%s", plotname); - snprintf(titlebuf, sizeof(titlebuf), "%s: Box size vs. box index", - plotname); - } else { - snprintf(buf, sizeof(buf), "/tmp/lept/plots/size.%d", plotid++); - snprintf(titlebuf, sizeof(titlebuf), "Box size vs. box index"); - } - gplot = gplotCreate(buf, GPLOT_PNG, titlebuf, - "box index", "box dimension"); - gplotAddPlot(gplot, NULL, naw, GPLOT_LINES, "width"); - gplotAddPlot(gplot, NULL, nah, GPLOT_LINES, "height"); - *ppixd = gplotMakeOutputPix(gplot); - gplotDestroy(&gplot); - - if (pnaw) - *pnaw = naw; - else - numaDestroy(&naw); - if (pnah) - *pnah = nah; - else - numaDestroy(&nah); - return 0; -} - - -/*! - * \brief boxaFillSequence() - * - * \param[in] boxas with at least 3 boxes - * \param[in] useflag L_USE_ALL_BOXES, L_USE_SAME_PARITY_BOXES - * \param[in] debug 1 for debug output - * \return boxad filled boxa, or NULL on error - * - *
- * Notes:
- *      (1) This simple function replaces invalid boxes with a copy of
- *          the nearest valid box, selected from either the entire
- *          sequence (L_USE_ALL_BOXES) or from the boxes with the
- *          same parity (L_USE_SAME_PARITY_BOXES).  It returns a new boxa.
- *      (2) This is useful if you expect boxes in the sequence to
- *          vary slowly with index.
- * 
- */ -BOXA * -boxaFillSequence(BOXA *boxas, - l_int32 useflag, - l_int32 debug) -{ -l_int32 n, nv; -BOXA *boxae, *boxao, *boxad; - - PROCNAME("boxaFillSequence"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (useflag != L_USE_ALL_BOXES && useflag != L_USE_SAME_PARITY_BOXES) - return (BOXA *)ERROR_PTR("invalid useflag", procName, NULL); - - n = boxaGetCount(boxas); - nv = boxaGetValidCount(boxas); - if (n == nv) - return boxaCopy(boxas, L_COPY); /* all valid */ - if (debug) - L_INFO("%d valid boxes, %d invalid boxes\n", procName, nv, n - nv); - if (useflag == L_USE_SAME_PARITY_BOXES && n < 3) { - L_WARNING("n < 3; some invalid\n", procName); - return boxaCopy(boxas, L_COPY); - } - - if (useflag == L_USE_ALL_BOXES) { - boxad = boxaCopy(boxas, L_COPY); - boxaFillAll(boxad); - } else { - boxaSplitEvenOdd(boxas, 0, &boxae, &boxao); - boxaFillAll(boxae); - boxaFillAll(boxao); - boxad = boxaMergeEvenOdd(boxae, boxao, 0); - boxaDestroy(&boxae); - boxaDestroy(&boxao); - } - - nv = boxaGetValidCount(boxad); - if (n != nv) - L_WARNING("there are still %d invalid boxes\n", procName, n - nv); - - return boxad; -} - - -/*! - * \brief boxaFillAll() - * - * \param[in] boxa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This static function replaces every invalid box with the
- *          nearest valid box.  If there are no valid boxes, it
- *          issues a warning.
- * 
- */ -static l_int32 -boxaFillAll(BOXA *boxa) -{ -l_int32 n, nv, i, j, spandown, spanup; -l_int32 *indic; -BOX *box, *boxt; - - PROCNAME("boxaFillAll"); - - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - n = boxaGetCount(boxa); - nv = boxaGetValidCount(boxa); - if (n == nv) return 0; - if (nv == 0) { - L_WARNING("no valid boxes out of %d boxes\n", procName, n); - return 0; - } - - /* Make indicator array for valid boxes */ - if ((indic = (l_int32 *)LEPT_CALLOC(n, sizeof(l_int32))) == NULL) - return ERROR_INT("indic not made", procName, 1); - for (i = 0; i < n; i++) { - box = boxaGetValidBox(boxa, i, L_CLONE); - if (box) - indic[i] = 1; - boxDestroy(&box); - } - - /* Replace invalid boxes with the nearest valid one */ - for (i = 0; i < n; i++) { - box = boxaGetValidBox(boxa, i, L_CLONE); - if (!box) { - spandown = spanup = 10000000; - for (j = i - 1; j >= 0; j--) { - if (indic[j] == 1) { - spandown = i - j; - break; - } - } - for (j = i + 1; j < n; j++) { - if (indic[j] == 1) { - spanup = j - i; - break; - } - } - if (spandown < spanup) - boxt = boxaGetBox(boxa, i - spandown, L_COPY); - else - boxt = boxaGetBox(boxa, i + spanup, L_COPY); - boxaReplaceBox(boxa, i, boxt); - } - boxDestroy(&box); - } - - LEPT_FREE(indic); - return 0; -} - - -/*! - * \brief boxaSizeVariation() - * - * \param[in] boxa at least 4 boxes - * \param[in] type L_SELECT_WIDTH, L_SELECT_HEIGHT - * \param[out] pdel_evenodd [optional] average absolute value of - * (even - odd) size pairs - * \param[out] prms_even [optional] rms deviation of even boxes - * \param[out] prms_odd [optional] rms deviation of odd boxes - * \param[out] prms_all [optional] rms deviation of all boxes - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This gives several measures of the smoothness of either the
- *          width or height of a sequence of boxes.
- *          See boxaMedianDimensions() for some other measures.
- *      (2) Statistics can be found separately for even and odd boxes.
- *          Additionally, the average pair-wise difference between
- *          adjacent even and odd boxes can be returned.
- *      (3) The use case is bounding boxes for scanned page images,
- *          where ideally the sizes should have little variance.
- * 
- */ -l_ok -boxaSizeVariation(BOXA *boxa, - l_int32 type, - l_float32 *pdel_evenodd, - l_float32 *prms_even, - l_float32 *prms_odd, - l_float32 *prms_all) -{ -l_int32 n, ne, no, nmin, vale, valo, i; -l_float32 sum; -BOXA *boxae, *boxao; -NUMA *nae, *nao, *na_all; - - PROCNAME("boxaSizeVariation"); - - if (pdel_evenodd) *pdel_evenodd = 0.0; - if (prms_even) *prms_even = 0.0; - if (prms_odd) *prms_odd = 0.0; - if (prms_all) *prms_all = 0.0; - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (type != L_SELECT_WIDTH && type != L_SELECT_HEIGHT) - return ERROR_INT("invalid type", procName, 1); - if (!pdel_evenodd && !prms_even && !prms_odd && !prms_all) - return ERROR_INT("nothing to do", procName, 1); - n = boxaGetCount(boxa); - if (n < 4) - return ERROR_INT("too few boxes", procName, 1); - - boxaSplitEvenOdd(boxa, 0, &boxae, &boxao); - ne = boxaGetCount(boxae); - no = boxaGetCount(boxao); - nmin = L_MIN(ne, no); - if (nmin == 0) { - boxaDestroy(&boxae); - boxaDestroy(&boxao); - return ERROR_INT("either no even or no odd boxes", procName, 1); - } - - if (type == L_SELECT_WIDTH) { - boxaGetSizes(boxae, &nae, NULL); - boxaGetSizes(boxao, &nao, NULL); - boxaGetSizes(boxa, &na_all, NULL); - } else { /* L_SELECT_HEIGHT) */ - boxaGetSizes(boxae, NULL, &nae); - boxaGetSizes(boxao, NULL, &nao); - boxaGetSizes(boxa, NULL, &na_all); - } - - if (pdel_evenodd) { - sum = 0.0; - for (i = 0; i < nmin; i++) { - numaGetIValue(nae, i, &vale); - numaGetIValue(nao, i, &valo); - sum += L_ABS(vale - valo); - } - *pdel_evenodd = sum / nmin; - } - if (prms_even) - numaSimpleStats(nae, 0, -1, NULL, NULL, prms_even); - if (prms_odd) - numaSimpleStats(nao, 0, -1, NULL, NULL, prms_odd); - if (prms_all) - numaSimpleStats(na_all, 0, -1, NULL, NULL, prms_all); - - boxaDestroy(&boxae); - boxaDestroy(&boxao); - numaDestroy(&nae); - numaDestroy(&nao); - numaDestroy(&na_all); - return 0; -} - - -/*! - * \brief boxaMedianDimensions() - * - * \param[in] boxas containing at least 3 valid boxes in even and odd - * \param[out] pmedw [optional] median width of all boxes - * \param[out] pmedh [optional] median height of all boxes - * \param[out] pmedwe [optional] median width of even boxes - * \param[out] pmedwo [optional] median width of odd boxes - * \param[out] pmedhe [optional] median height of even boxes - * \param[out] pmedho [optional] median height of odd boxes - * \param[out] pnadelw [optional] width diff of each box from median - * \param[out] pnadelh [optional] height diff of each box from median - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This provides information that (1) allows identification of
- *          boxes that have unusual (outlier) width or height, and (2) can
- *          be used to regularize the sizes of the outlier boxes, assuming
- *          that the boxes satisfy a fairly regular sequence and should
- *          mostly have the same width and height.
- *      (2) This finds the median width and height, as well as separate
- *          median widths and heights of even and odd boxes.  It also
- *          generates arrays that give the difference in width and height
- *          of each box from the median, which can be used to correct
- *          individual boxes.
- *      (3) All return values are optional.
- * 
- */ -l_ok -boxaMedianDimensions(BOXA *boxas, - l_int32 *pmedw, - l_int32 *pmedh, - l_int32 *pmedwe, - l_int32 *pmedwo, - l_int32 *pmedhe, - l_int32 *pmedho, - NUMA **pnadelw, - NUMA **pnadelh) -{ -l_int32 i, n, bw, bh, medw, medh, medwe, medwo, medhe, medho; -BOXA *boxae, *boxao; -NUMA *nadelw, *nadelh; - - PROCNAME("boxaMedianDimensions"); - - if (pmedw) *pmedw = 0; - if (pmedh) *pmedh = 0; - if (pmedwe) *pmedwe= 0; - if (pmedwo) *pmedwo= 0; - if (pmedhe) *pmedhe= 0; - if (pmedho) *pmedho= 0; - if (pnadelw) *pnadelw = NULL; - if (pnadelh) *pnadelh = NULL; - if (!boxas) - return ERROR_INT("boxas not defined", procName, 1); - if (boxaGetValidCount(boxas) < 6) - return ERROR_INT("need at least 6 valid boxes", procName, 1); - - /* Require at least 3 valid boxes of both types */ - boxaSplitEvenOdd(boxas, 0, &boxae, &boxao); - if (boxaGetValidCount(boxae) < 3 || boxaGetValidCount(boxao) < 3) { - boxaDestroy(&boxae); - boxaDestroy(&boxao); - return ERROR_INT("don't have 3+ valid boxes of each type", procName, 1); - } - - /* Get the relevant median widths and heights */ - boxaGetMedianVals(boxas, NULL, NULL, NULL, NULL, &medw, &medh); - boxaGetMedianVals(boxae, NULL, NULL, NULL, NULL, &medwe, &medhe); - boxaGetMedianVals(boxao, NULL, NULL, NULL, NULL, &medwo, &medho); - if (pmedw) *pmedw = medw; - if (pmedh) *pmedh = medh; - if (pmedwe) *pmedwe = medwe; - if (pmedwo) *pmedwo = medwo; - if (pmedhe) *pmedhe = medhe; - if (pmedho) *pmedho = medho; - - /* Find the variation from median dimension for each box */ - n = boxaGetCount(boxas); - nadelw = numaCreate(n); - nadelh = numaCreate(n); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxas, i, NULL, NULL, &bw, &bh); - if (bw == 0 || bh == 0) { /* invalid box */ - numaAddNumber(nadelw, 0); - numaAddNumber(nadelh, 0); - } else { - numaAddNumber(nadelw, bw - medw); - numaAddNumber(nadelh, bh - medh); - } - } - if (pnadelw) - *pnadelw = nadelw; - else - numaDestroy(&nadelw); - if (pnadelh) - *pnadelh = nadelh; - else - numaDestroy(&nadelh); - - boxaDestroy(&boxae); - boxaDestroy(&boxao); - return 0; -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bytearray.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bytearray.c deleted file mode 100644 index 39d06212..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/bytearray.c +++ /dev/null @@ -1,640 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file bytearray.c - *
- *
- *   Functions for handling byte arrays, in analogy with C++ 'strings'
- *
- *      Creation, copy, clone, destruction
- *           L_BYTEA      *l_byteaCreate()
- *           L_BYTEA      *l_byteaInitFromMem()
- *           L_BYTEA      *l_byteaInitFromFile()
- *           L_BYTEA      *l_byteaInitFromStream()
- *           L_BYTEA      *l_byteaCopy()
- *           void          l_byteaDestroy()
- *
- *      Accessors
- *           size_t        l_byteaGetSize()
- *           l_uint8      *l_byteaGetData()
- *           l_uint8      *l_byteaCopyData()
- *
- *      Appending
- *           l_int32       l_byteaAppendData()
- *           l_int32       l_byteaAppendString()
- *           static l_int32  l_byteaExtendArrayToSize()
- *
- *      Join/Split
- *           l_int32       l_byteaJoin()
- *           l_int32       l_byteaSplit()
- *
- *      Search
- *           l_int32       l_byteaFindEachSequence()
- *
- *      Output to file
- *           l_int32       l_byteaWrite()
- *           l_int32       l_byteaWriteStream()
- *
- *   The internal data array is always null-terminated, for ease of use
- *   in the event that it is an ascii string without null bytes.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Bounds on array size */ -static const l_uint32 MaxArraySize = 1000000000; /* 10^9 bytes */ -static const l_int32 InitialArraySize = 200; /*!< n'importe quoi */ - - /* Static function */ -static l_int32 l_byteaExtendArrayToSize(L_BYTEA *ba, size_t size); - -/*---------------------------------------------------------------------* - * Creation, copy, clone, destruction * - *---------------------------------------------------------------------*/ -/*! - * \brief l_byteaCreate() - * - * \param[in] nbytes determines initial size of data array - * \return l_bytea, or NULL on error - * - *
- * Notes:
- *      (1) The allocated array is n + 1 bytes.  This allows room
- *          for null termination.
- * 
- */ -L_BYTEA * -l_byteaCreate(size_t nbytes) -{ -L_BYTEA *ba; - - PROCNAME("l_byteaCreate"); - - if (nbytes <= 0 || nbytes > MaxArraySize) - nbytes = InitialArraySize; - ba = (L_BYTEA *)LEPT_CALLOC(1, sizeof(L_BYTEA)); - ba->data = (l_uint8 *)LEPT_CALLOC(nbytes + 1, sizeof(l_uint8)); - if (!ba->data) { - l_byteaDestroy(&ba); - return (L_BYTEA *)ERROR_PTR("ba array not made", procName, NULL); - } - ba->nalloc = nbytes + 1; - ba->refcount = 1; - return ba; -} - - -/*! - * \brief l_byteaInitFromMem() - * - * \param[in] data to be copied to the array - * \param[in] size amount of data - * \return l_bytea, or NULL on error - */ -L_BYTEA * -l_byteaInitFromMem(const l_uint8 *data, - size_t size) -{ -L_BYTEA *ba; - - PROCNAME("l_byteaInitFromMem"); - - if (!data) - return (L_BYTEA *)ERROR_PTR("data not defined", procName, NULL); - if (size <= 0) - return (L_BYTEA *)ERROR_PTR("no bytes to initialize", procName, NULL); - if (size > MaxArraySize) - return (L_BYTEA *)ERROR_PTR("size is too big", procName, NULL); - - if ((ba = l_byteaCreate(size)) == NULL) - return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL); - memcpy(ba->data, data, size); - ba->size = size; - return ba; -} - - -/*! - * \brief l_byteaInitFromFile() - * - * \param[in] fname - * \return l_bytea, or NULL on error - */ -L_BYTEA * -l_byteaInitFromFile(const char *fname) -{ -FILE *fp; -L_BYTEA *ba; - - PROCNAME("l_byteaInitFromFile"); - - if (!fname) - return (L_BYTEA *)ERROR_PTR("fname not defined", procName, NULL); - - if ((fp = fopenReadStream(fname)) == NULL) - return (L_BYTEA *)ERROR_PTR("file stream not opened", procName, NULL); - ba = l_byteaInitFromStream(fp); - fclose(fp); - if (!ba) - return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL); - return ba; -} - - -/*! - * \brief l_byteaInitFromStream() - * - * \param[in] fp file stream - * \return l_bytea, or NULL on error - */ -L_BYTEA * -l_byteaInitFromStream(FILE *fp) -{ -l_uint8 *data; -size_t nbytes; -L_BYTEA *ba; - - PROCNAME("l_byteaInitFromStream"); - - if (!fp) - return (L_BYTEA *)ERROR_PTR("stream not defined", procName, NULL); - - if ((data = l_binaryReadStream(fp, &nbytes)) == NULL) - return (L_BYTEA *)ERROR_PTR("data not read", procName, NULL); - if ((ba = l_byteaCreate(nbytes)) == NULL) { - LEPT_FREE(data); - return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL); - } - memcpy(ba->data, data, nbytes); - ba->size = nbytes; - LEPT_FREE(data); - return ba; -} - - -/*! - * \brief l_byteaCopy() - * - * \param[in] bas source lba - * \param[in] copyflag L_COPY, L_CLONE - * \return clone or copy of bas, or NULL on error - * - *
- * Notes:
- *      (1) If cloning, up the refcount and return a ptr to %bas.
- * 
- */ -L_BYTEA * -l_byteaCopy(L_BYTEA *bas, - l_int32 copyflag) -{ - PROCNAME("l_byteaCopy"); - - if (!bas) - return (L_BYTEA *)ERROR_PTR("bas not defined", procName, NULL); - - if (copyflag == L_CLONE) { - bas->refcount++; - return bas; - } - - return l_byteaInitFromMem(bas->data, bas->size); -} - - -/*! - * \brief l_byteaDestroy() - * - * \param[in,out] pba will be set to null before returning - * \return void - * - *
- * Notes:
- *      (1) Decrements the ref count and, if 0, destroys the lba.
- *      (2) Always nulls the input ptr.
- *      (3) If the data has been previously removed, the lba will
- *          have been nulled, so this will do nothing.
- * 
- */ -void -l_byteaDestroy(L_BYTEA **pba) -{ -L_BYTEA *ba; - - PROCNAME("l_byteaDestroy"); - - if (pba == NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - - if ((ba = *pba) == NULL) - return; - - /* Decrement the ref count. If it is 0, destroy the lba. */ - ba->refcount--; - if (ba->refcount <= 0) { - if (ba->data) LEPT_FREE(ba->data); - LEPT_FREE(ba); - } - - *pba = NULL; - return; -} - - -/*---------------------------------------------------------------------* - * Accessors * - *---------------------------------------------------------------------*/ -/*! - * \brief l_byteaGetSize() - * - * \param[in] ba - * \return size of stored byte array, or 0 on error - */ -size_t -l_byteaGetSize(L_BYTEA *ba) -{ - PROCNAME("l_byteaGetSize"); - - if (!ba) - return ERROR_INT("ba not defined", procName, 0); - return ba->size; -} - - -/*! - * \brief l_byteaGetData() - * - * \param[in] ba - * \param[out] psize size of data in lba - * \return ptr to existing data array, or NULL on error - * - *
- * Notes:
- *      (1) The returned ptr is owned by %ba.  Do not free it!
- * 
- */ -l_uint8 * -l_byteaGetData(L_BYTEA *ba, - size_t *psize) -{ - PROCNAME("l_byteaGetData"); - - if (!ba) - return (l_uint8 *)ERROR_PTR("ba not defined", procName, NULL); - if (!psize) - return (l_uint8 *)ERROR_PTR("&size not defined", procName, NULL); - - *psize = ba->size; - return ba->data; -} - - -/*! - * \brief l_byteaCopyData() - * - * \param[in] ba - * \param[out] psize size of data in lba - * \return copy of data in use in the data array, or NULL on error. - * - *
- * Notes:
- *      (1) The returned data is owned by the caller.  The input %ba
- *          still owns the original data array.
- * 
- */ -l_uint8 * -l_byteaCopyData(L_BYTEA *ba, - size_t *psize) -{ -l_uint8 *data; - - PROCNAME("l_byteaCopyData"); - - if (!psize) - return (l_uint8 *)ERROR_PTR("&size not defined", procName, NULL); - *psize = 0; - if (!ba) - return (l_uint8 *)ERROR_PTR("ba not defined", procName, NULL); - - data = l_byteaGetData(ba, psize); - return l_binaryCopy(data, *psize); -} - - -/*---------------------------------------------------------------------* - * Appending * - *---------------------------------------------------------------------*/ -/*! - * \brief l_byteaAppendData() - * - * \param[in] ba - * \param[in] newdata byte array to be appended - * \param[in] newbytes size of data array - * \return 0 if OK, 1 on error - */ -l_ok -l_byteaAppendData(L_BYTEA *ba, - const l_uint8 *newdata, - size_t newbytes) -{ -size_t size, nalloc, reqsize; - - PROCNAME("l_byteaAppendData"); - - if (!ba) - return ERROR_INT("ba not defined", procName, 1); - if (!newdata) - return ERROR_INT("newdata not defined", procName, 1); - - size = l_byteaGetSize(ba); - reqsize = size + newbytes + 1; - nalloc = ba->nalloc; - if (nalloc < reqsize) - l_byteaExtendArrayToSize(ba, 2 * reqsize); - - memcpy(ba->data + size, newdata, newbytes); - ba->size += newbytes; - return 0; -} - - -/*! - * \brief l_byteaAppendString() - * - * \param[in] ba - * \param[in] str null-terminated string to be appended - * \return 0 if OK, 1 on error - */ -l_ok -l_byteaAppendString(L_BYTEA *ba, - const char *str) -{ -size_t size, len, nalloc, reqsize; - - PROCNAME("l_byteaAppendString"); - - if (!ba) - return ERROR_INT("ba not defined", procName, 1); - if (!str) - return ERROR_INT("str not defined", procName, 1); - - size = l_byteaGetSize(ba); - len = strlen(str); - reqsize = size + len + 1; - nalloc = ba->nalloc; - if (nalloc < reqsize) - l_byteaExtendArrayToSize(ba, 2 * reqsize); - - memcpy(ba->data + size, str, len); - ba->size += len; - return 0; -} - - -/*! - * \brief l_byteaExtendArrayToSize() - * - * \param[in] ba - * \param[in] size new size of lba data array - * \return 0 if OK; 1 on error - */ -static l_int32 -l_byteaExtendArrayToSize(L_BYTEA *ba, - size_t size) -{ - PROCNAME("l_byteaExtendArrayToSize"); - - if (!ba) - return ERROR_INT("ba not defined", procName, 1); - - if (size > ba->nalloc) { - if ((ba->data = - (l_uint8 *)reallocNew((void **)&ba->data, ba->nalloc, size)) - == NULL) - return ERROR_INT("new array not returned", procName, 1); - ba->nalloc = size; - } - return 0; -} - - -/*---------------------------------------------------------------------* - * String join/split * - *---------------------------------------------------------------------*/ -/*! - * \brief l_byteaJoin() - * - * \param[in] ba1 - * \param[in,out] pba2 data array is added to the one in ba1; - * then ba2 is destroyed and its pointer is nulled. - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) It is a no-op, not an error, for %ba2 to be null.
- * 
- */ -l_ok -l_byteaJoin(L_BYTEA *ba1, - L_BYTEA **pba2) -{ -l_uint8 *data2; -size_t nbytes2; -L_BYTEA *ba2; - - PROCNAME("l_byteaJoin"); - - if (!ba1) - return ERROR_INT("ba1 not defined", procName, 1); - if (!pba2) - return ERROR_INT("&ba2 not defined", procName, 1); - if ((ba2 = *pba2) == NULL) return 0; - - data2 = l_byteaGetData(ba2, &nbytes2); - l_byteaAppendData(ba1, data2, nbytes2); - - l_byteaDestroy(pba2); - return 0; -} - - -/*! - * \brief l_byteaSplit() - * - * \param[in] ba1 lba to split; array bytes nulled beyond the split loc - * \param[in] splitloc location in ba1 to split; ba2 begins there - * \param[out] pba2 with data starting at splitloc - * \return 0 if OK, 1 on error - */ -l_ok -l_byteaSplit(L_BYTEA *ba1, - size_t splitloc, - L_BYTEA **pba2) -{ -l_uint8 *data1; -size_t nbytes1, nbytes2; - - PROCNAME("l_byteaSplit"); - - if (!pba2) - return ERROR_INT("&ba2 not defined", procName, 1); - *pba2 = NULL; - if (!ba1) - return ERROR_INT("ba1 not defined", procName, 1); - - data1 = l_byteaGetData(ba1, &nbytes1); - if (splitloc >= nbytes1) - return ERROR_INT("splitloc invalid", procName, 1); - nbytes2 = nbytes1 - splitloc; - - /* Make the new lba */ - *pba2 = l_byteaInitFromMem(data1 + splitloc, nbytes2); - - /* Null the removed bytes in the input lba */ - memset(data1 + splitloc, 0, nbytes2); - ba1->size = splitloc; - return 0; -} - - -/*---------------------------------------------------------------------* - * Search * - *---------------------------------------------------------------------*/ -/*! - * \brief l_byteaFindEachSequence() - * - * \param[in] ba - * \param[in] sequence subarray of bytes to find in data - * \param[in] seqlen length of sequence, in bytes - * \param[out] pda byte positions of each occurrence of %sequence - * \return 0 if OK, 1 on error - */ -l_ok -l_byteaFindEachSequence(L_BYTEA *ba, - const l_uint8 *sequence, - size_t seqlen, - L_DNA **pda) -{ -l_uint8 *data; -size_t size; - - PROCNAME("l_byteaFindEachSequence"); - - if (!pda) - return ERROR_INT("&da not defined", procName, 1); - *pda = NULL; - if (!ba) - return ERROR_INT("ba not defined", procName, 1); - if (!sequence) - return ERROR_INT("sequence not defined", procName, 1); - - data = l_byteaGetData(ba, &size); - *pda = arrayFindEachSequence(data, size, sequence, seqlen); - return 0; -} - - -/*---------------------------------------------------------------------* - * Output to file * - *---------------------------------------------------------------------*/ -/*! - * \brief l_byteaWrite() - * - * \param[in] fname output file - * \param[in] ba - * \param[in] startloc first byte to output - * \param[in] nbytes number of bytes to write; use 0 to write to - * the end of the data array - * \return 0 if OK, 1 on error - */ -l_ok -l_byteaWrite(const char *fname, - L_BYTEA *ba, - size_t startloc, - size_t nbytes) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("l_byteaWrite"); - - if (!fname) - return ERROR_INT("fname not defined", procName, 1); - if (!ba) - return ERROR_INT("ba not defined", procName, 1); - - if ((fp = fopenWriteStream(fname, "wb")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = l_byteaWriteStream(fp, ba, startloc, nbytes); - fclose(fp); - return ret; -} - - -/*! - * \brief l_byteaWriteStream() - * - * \param[in] fp file stream opened for binary write - * \param[in] ba - * \param[in] startloc first byte to output - * \param[in] nbytes number of bytes to write; use 0 to write to - * the end of the data array - * \return 0 if OK, 1 on error - */ -l_ok -l_byteaWriteStream(FILE *fp, - L_BYTEA *ba, - size_t startloc, - size_t nbytes) -{ -l_uint8 *data; -size_t size, maxbytes; - - PROCNAME("l_byteaWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!ba) - return ERROR_INT("ba not defined", procName, 1); - - data = l_byteaGetData(ba, &size); - if (startloc >= size) - return ERROR_INT("invalid startloc", procName, 1); - maxbytes = size - startloc; - nbytes = (nbytes == 0) ? maxbytes : L_MIN(nbytes, maxbytes); - - fwrite(data + startloc, 1, nbytes, fp); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ccbord.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ccbord.c deleted file mode 100644 index 2f08aab6..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ccbord.c +++ /dev/null @@ -1,2617 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file ccbord.c - *
- *
- *     CCBORDA and CCBORD creation and destruction
- *         CCBORDA         *ccbaCreate()
- *         void            *ccbaDestroy()
- *         CCBORD          *ccbCreate()
- *         void            *ccbDestroy()
- *
- *     CCBORDA addition
- *         l_int32          ccbaAddCcb()
- *         static l_int32   ccbaExtendArray()
- *
- *     CCBORDA accessors
- *         l_int32          ccbaGetCount()
- *         l_int32          ccbaGetCcb()
- *
- *     Top-level border-finding routines
- *         CCBORDA         *pixGetAllCCBorders()
- *         static CCBORD   *pixGetCCBorders()
- *         PTAA            *pixGetOuterBordersPtaa()
- *         static PTA      *pixGetOuterBorderPta()
- *
- *     Lower-level border location routines
- *         PTAA            *pixGetOuterBorder()
- *         static l_int32   pixGetHoleBorder()
- *         static l_int32   findNextBorderPixel()
- *         static void      locateOutsideSeedPixel()
- *
- *     Border conversions
- *         l_int32          ccbaGenerateGlobalLocs()
- *         l_int32          ccbaGenerateStepChains()
- *         l_int32          ccbaStepChainsToPixCoords()
- *         l_int32          ccbaGenerateSPGlobalLocs()
- *
- *     Conversion to single path
- *         l_int32          ccbaGenerateSinglePath()
- *         PTA             *getCutPathForHole()
- *
- *     Border and full image rendering
- *         PIX             *ccbaDisplayBorder()
- *         PIX             *ccbaDisplaySPBorder()
- *         PIX             *ccbaDisplayImage1()
- *         PIX             *ccbaDisplayImage2()
- *
- *     Serialize for I/O
- *         l_int32          ccbaWrite()
- *         l_int32          ccbaWriteStream()
- *         l_int32          ccbaRead()
- *         l_int32          ccbaReadStream()
- *
- *     SVG output
- *         l_int32          ccbaWriteSVG()
- *         char            *ccbaWriteSVGString()
- *
- *
- *     Border finding is tricky because components can have
- *     holes, which also need to be traced out.  The outer
- *     border can be connected with all the hole borders,
- *     so that there is a single border for each component.
- *     [Alternatively, the connecting paths can be eliminated if
- *     you're willing to have a set of borders for each
- *     component (an exterior border and some number of
- *     interior ones), with "line to" operations tracing
- *     out each border and "move to" operations going from
- *     one border to the next.]
- *
- *     Here's the plan.  We get the pix for each connected
- *     component, and trace its exterior border.  We then
- *     find the holes (if any) in the pix, and separately
- *     trace out their borders, all using the same
- *     border-following rule that has ON pixels on the right
- *     side of the path.
- *
- *     [For svg, we may want to turn each set of borders for a c.c.
- *     into a closed path.  This can be done by tunnelling
- *     through the component from the outer border to each of the
- *     holes, going in and coming out along the same path so
- *     the connection will be invisible in any rendering
- *     (display or print) from the outline.  The result is a
- *     closed path, where the outside border is traversed
- *     cw and each hole is traversed ccw.  The svg renderer
- *     is assumed to handle these closed borders properly.]
- *
- *     Each border is a closed path that is traversed in such
- *     a way that the stuff inside the c.c. is on the right
- *     side of the traveller.  The border of a singly-connected
- *     component is thus traversed cw, and the border of the
- *     holes inside a c.c. are traversed ccw.  Suppose we have
- *     a list of all the borders of each c.c., both the cw and ccw
- *     traversals.  How do we reconstruct the image?
- *
- *   Reconstruction:
- *
- *     Method 1.  Topological method using connected components.
- *     We have closed borders composed of cw border pixels for the
- *     exterior of c.c. and ccw border pixels for the interior (holes)
- *     in the c.c.
- *         (a) Initialize the destination to be OFF.  Then,
- *             in any order:
- *         (b) Fill the components within and including the cw borders,
- *             and sequentially XOR them onto the destination.
- *         (c) Fill the components within but not including the ccw
- *             borders and sequentially XOR them onto the destination.
- *     The components that are XOR'd together can be generated as follows:
- *         (a) For each closed cw path, use pixFillClosedBorders():
- *               (1) Turn on the path pixels in a subimage that
- *                   minimally supports the border.
- *               (2) Do a 4-connected fill from a seed of 1 pixel width
- *                   on the border, using the inverted image in (1) as
- *                   a filling mask.
- *               (3) Invert the fill result: this gives the component
- *                   including the exterior cw path, with all holes
- *                   filled.
- *         (b) For each closed ccw path (hole):
- *               (1) Turn on the path pixels in a subimage that minimally
- *                   supports the path.
- *               (2) Find a seed pixel on the inside of this path.
- *               (3) Do a 4-connected fill from this seed pixel, using
- *                   the inverted image of the path in (1) as a filling
- *                   mask.
- *
- *     ------------------------------------------------------
- *
- *     Method 2.  A variant of Method 1.  Topological.
- *     In Method 1, we treat the exterior border differently from
- *     the interior (hole) borders.  Here, all borders in a c.c.
- *     are treated equally:
- *         (1) Start with a pix with a 1 pixel OFF boundary
- *             enclosing all the border pixels of the c.c.
- *             This is the filling mask.
- *         (2) Make a seed image of the same size as follows:  for
- *             each border, put one seed pixel OUTSIDE the border
- *             (where OUTSIDE is determined by the inside/outside
- *             convention for borders).
- *         (3) Seedfill into the seed image, filling in the regions
- *             determined by the filling mask.  The fills are clipped
- *             by the border pixels.
- *         (4) Inverting this, we get the c.c. properly filled,
- *             with the holes empty!
- *         (5) Rasterop using XOR the filled c.c. (but not the 1
- *             pixel boundary) into the full dest image.
- *
- *     Method 2 is about 1.2x faster than Method 1 on text images,
- *     and about 2x faster on complex images (e.g., with halftones).
- *
- *     ------------------------------------------------------
- *
- *     Method 3.  The traditional way to fill components delineated
- *     by boundaries is through scan line conversion.  It's a bit
- *     tricky, and I have not yet tried to implement it.
- *
- *     ------------------------------------------------------
- *
- *     Method 4.  [Nota Bene: this method probably doesn't work, and
- *     won't be implemented.  If I get a more traditional scan line
- *     conversion algorithm working, I'll erase these notes.]
- *     Render all border pixels on a destination image,
- *     which will be the final result after scan conversion.  Assign
- *     a value 1 to pixels on cw paths, 2 to pixels on ccw paths,
- *     and 3 to pixels that are on both paths.  Each of the paths
- *     is an 8-connected component.  Now scan across each raster
- *     line.  The attempt is to make rules for each scan line
- *     that are independent of neighboring scanlines.  Here are
- *     a set of rules for writing ON pixels on a destination raster image:
- *
- *         (a) The rasterizer will be in one of two states: ON and OFF.
- *         (b) Start each line in the OFF state.  In the OFF state,
- *             skip pixels until you hit a path of any type.  Turn
- *             the path pixel ON.
- *         (c) If the state is ON, each pixel you encounter will
- *             be turned on, until and including hitting a path pixel.
- *         (d) When you hit a path pixel, if the path does NOT cut
- *             through the line, so that there is not an 8-cc path
- *             pixel (of any type) both above and below, the state
- *             is unchanged (it stays either ON or OFF).
- *         (e) If the path does cut through, but with a possible change
- *             of pixel type, then we decide whether or
- *             not to toggle the state based on the values of the
- *             path pixel and the path pixels above and below:
- *               (1) if a 1 path cuts through, toggle;
- *               (1) if a 2 path cuts through, toggle;
- *               (3) if a 3 path cuts through, do not toggle;
- *               (4) if on one side a 3 touches both a 1 and a 2, use the 2
- *               (5) if a 3 has any 1 neighbors, toggle; else if it has
- *                   no 1 neighbors, do not toggle;
- *               (6) if a 2 has any neighbors that are 1 or 3,
- *                   do not toggle
- *               (7) if a 1 has neighbors 1 and x (x = 2 or 3),
- *                   toggle
- *
- *
- *     To visualize how these rules work, consider the following
- *     component with border pixels labeled according to the scheme
- *     above.  We also show the values of the interior pixels
- *     (w=OFF, b=ON), but these of course must be inferred properly
- *     from the rules above:
- *
- *                     3
- *                  3  w  3             1  1  1
- *                  1  2  1          1  b  2  b  1
- *                  1  b  1             3  w  2  1
- *                  3  b  1          1  b  2  b  1
- *               3  w  3                1  1  1
- *               3  w  3
- *            1  b  2  b  1
- *            1  2  w  2  1
- *         1  b  2  w  2  b  1
- *            1  2  w  2  1
- *               1  2  b  1
- *               1  b  1
- *                  1
- *
- *
- *     Even if this works, which is unlikely, it will certainly be
- *     slow because decisions have to be made on a pixel-by-pixel
- *     basis when encountering borders.
- *
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static const l_int32 INITIAL_PTR_ARRAYSIZE = 20; /* n'import quoi */ - - /* In ccbaGenerateSinglePath(): don't save holes - * in c.c. with ridiculously many small holes */ -static const l_int32 NMAX_HOLES = 150; - - /* Tables used to trace the border. - * - The 8 pixel positions of neighbors Q are labeled clockwise - * starting from the west: - * 1 2 3 - * 0 P 4 - * 7 6 5 - * where the labels are the index offset [0, ... 7] of Q relative to P. - * - xpostab[] and ypostab[] give the actual x and y pixel offsets - * of Q relative to P, indexed by the index offset. - * - qpostab[pos] gives the new index offset of Q relative to P, at - * the time that a new P has been chosen to be in index offset - * position 'pos' relative to the previous P. The relation - * between P and Q is always 4-connected. */ -static const l_int32 xpostab[] = {-1, -1, 0, 1, 1, 1, 0, -1}; -static const l_int32 ypostab[] = {0, -1, -1, -1, 0, 1, 1, 1}; -static const l_int32 qpostab[] = {6, 6, 0, 0, 2, 2, 4, 4}; - - /* Static functions */ -static l_int32 ccbaExtendArray(CCBORDA *ccba); -static CCBORD *pixGetCCBorders(PIX *pixs, BOX *box); -static PTA *pixGetOuterBorderPta(PIX *pixs, BOX *box); -static l_ok pixGetHoleBorder(CCBORD *ccb, PIX *pixs, BOX *box, - l_int32 xs, l_int32 ys); -static l_int32 findNextBorderPixel(l_int32 w, l_int32 h, l_uint32 *data, - l_int32 wpl, l_int32 px, l_int32 py, - l_int32 *pqpos, l_int32 *pnpx, - l_int32 *pnpy); -static void locateOutsideSeedPixel(l_int32 fpx, l_int32 fpy, l_int32 spx, - l_int32 spy, l_int32 *pxs, l_int32 *pys); - -#ifndef NO_CONSOLE_IO -#define DEBUG_PRINT 0 -#endif /* NO CONSOLE_IO */ - - -/*---------------------------------------------------------------------* - * ccba and ccb creation and destruction * - *---------------------------------------------------------------------*/ -/*! - * \brief ccbaCreate() - * - * \param[in] pixs 1 bpp; can be null - * \param[in] n initial number of ptrs - * \return ccba, or NULL on error - */ -CCBORDA * -ccbaCreate(PIX *pixs, - l_int32 n) -{ -CCBORDA *ccba; - - PROCNAME("ccbaCreate"); - - if (n <= 0) - n = INITIAL_PTR_ARRAYSIZE; - - ccba = (CCBORDA *)LEPT_CALLOC(1, sizeof(CCBORDA)); - if (pixs) { - ccba->pix = pixClone(pixs); - ccba->w = pixGetWidth(pixs); - ccba->h = pixGetHeight(pixs); - } - ccba->n = 0; - ccba->nalloc = n; - if ((ccba->ccb = (CCBORD **)LEPT_CALLOC(n, sizeof(CCBORD *))) == NULL) { - ccbaDestroy(&ccba); - return (CCBORDA *)ERROR_PTR("ccba ptrs not made", procName, NULL); - } - return ccba; -} - - -/*! - * \brief ccbaDestroy() - * - * \param[in,out] pccba will be set to null befoe returning - * \return void - */ -void -ccbaDestroy(CCBORDA **pccba) -{ -l_int32 i; -CCBORDA *ccba; - - PROCNAME("ccbaDestroy"); - - if (pccba == NULL) { - L_WARNING("ptr address is NULL!\n", procName); - return; - } - - if ((ccba = *pccba) == NULL) - return; - - pixDestroy(&ccba->pix); - for (i = 0; i < ccba->n; i++) - ccbDestroy(&ccba->ccb[i]); - LEPT_FREE(ccba->ccb); - LEPT_FREE(ccba); - *pccba = NULL; - return; -} - - -/*! - * \brief ccbCreate() - * - * \param[in] pixs [optional]; can be null - * \return ccb or NULL on error - */ -CCBORD * -ccbCreate(PIX *pixs) -{ -BOXA *boxa; -CCBORD *ccb; -PTA *start; -PTAA *local; - - PROCNAME("ccbCreate"); - - if (pixs) { - if (pixGetDepth(pixs) != 1) - return (CCBORD *)ERROR_PTR("pixs not binary", procName, NULL); - } - - if ((ccb = (CCBORD *)LEPT_CALLOC(1, sizeof(CCBORD))) == NULL) - return (CCBORD *)ERROR_PTR("ccb not made", procName, NULL); - ccb->refcount++; - if (pixs) - ccb->pix = pixClone(pixs); - if ((boxa = boxaCreate(1)) == NULL) - return (CCBORD *)ERROR_PTR("boxa not made", procName, NULL); - ccb->boxa = boxa; - if ((start = ptaCreate(1)) == NULL) - return (CCBORD *)ERROR_PTR("start pta not made", procName, NULL); - ccb->start = start; - if ((local = ptaaCreate(1)) == NULL) - return (CCBORD *)ERROR_PTR("local ptaa not made", procName, NULL); - ccb->local = local; - - return ccb; -} - - -/*! - * \brief ccbDestroy() - * - * \param[in,out] pccb will be set to null before returning - * \return void - */ -void -ccbDestroy(CCBORD **pccb) -{ -CCBORD *ccb; - - PROCNAME("ccbDestroy"); - - if (pccb == NULL) { - L_WARNING("ptr address is NULL!\n", procName); - return; - } - - if ((ccb = *pccb) == NULL) - return; - - ccb->refcount--; - if (ccb->refcount == 0) { - if (ccb->pix) - pixDestroy(&ccb->pix); - if (ccb->boxa) - boxaDestroy(&ccb->boxa); - if (ccb->start) - ptaDestroy(&ccb->start); - if (ccb->local) - ptaaDestroy(&ccb->local); - if (ccb->global) - ptaaDestroy(&ccb->global); - if (ccb->step) - numaaDestroy(&ccb->step); - if (ccb->splocal) - ptaDestroy(&ccb->splocal); - if (ccb->spglobal) - ptaDestroy(&ccb->spglobal); - LEPT_FREE(ccb); - *pccb = NULL; - } - return; -} - - -/*---------------------------------------------------------------------* - * ccba addition * - *---------------------------------------------------------------------*/ -/*! - * \brief ccbaAddCcb() - * - * \param[in] ccba - * \param[in] ccb to be added by insertion - * \return 0 if OK; 1 on error - */ -l_ok -ccbaAddCcb(CCBORDA *ccba, - CCBORD *ccb) -{ -l_int32 n; - - PROCNAME("ccbaAddCcb"); - - if (!ccba) - return ERROR_INT("ccba not defined", procName, 1); - if (!ccb) - return ERROR_INT("ccb not defined", procName, 1); - - n = ccbaGetCount(ccba); - if (n >= ccba->nalloc) - ccbaExtendArray(ccba); - ccba->ccb[n] = ccb; - ccba->n++; - return 0; -} - - -/*! - * \brief ccbaExtendArray() - * - * \param[in] ccba - * \return 0 if OK; 1 on error - */ -static l_int32 -ccbaExtendArray(CCBORDA *ccba) -{ - PROCNAME("ccbaExtendArray"); - - if (!ccba) - return ERROR_INT("ccba not defined", procName, 1); - - if ((ccba->ccb = (CCBORD **)reallocNew((void **)&ccba->ccb, - sizeof(CCBORD *) * ccba->nalloc, - 2 * sizeof(CCBORD *) * ccba->nalloc)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - - ccba->nalloc = 2 * ccba->nalloc; - return 0; -} - - - -/*---------------------------------------------------------------------* - * ccba accessors * - *---------------------------------------------------------------------*/ -/*! - * \brief ccbaGetCount() - * - * \param[in] ccba - * \return count, with 0 on error - */ -l_int32 -ccbaGetCount(CCBORDA *ccba) -{ - - PROCNAME("ccbaGetCount"); - - if (!ccba) - return ERROR_INT("ccba not defined", procName, 0); - - return ccba->n; -} - - -/*! - * \brief ccbaGetCcb() - * - * \param[in] ccba - * \param[in] index - * \return ccb, or NULL on error - * - *
- * Notes:
- *      (1) This returns a clone of the ccb; it must be destroyed
- * 
- */ -CCBORD * -ccbaGetCcb(CCBORDA *ccba, - l_int32 index) -{ -CCBORD *ccb; - - PROCNAME("ccbaGetCcb"); - - if (!ccba) - return (CCBORD *)ERROR_PTR("ccba not defined", procName, NULL); - if (index < 0 || index >= ccba->n) - return (CCBORD *)ERROR_PTR("index out of bounds", procName, NULL); - - ccb = ccba->ccb[index]; - ccb->refcount++; - return ccb; -} - - - -/*---------------------------------------------------------------------* - * Top-level border-finding routines * - *---------------------------------------------------------------------*/ -/*! - * \brief pixGetAllCCBorders() - * - * \param[in] pixs 1 bpp - * \return ccborda, or NULL on error - */ -CCBORDA * -pixGetAllCCBorders(PIX *pixs) -{ -l_int32 n, i; -BOX *box; -BOXA *boxa; -CCBORDA *ccba; -CCBORD *ccb; -PIX *pix; -PIXA *pixa; - - PROCNAME("pixGetAllCCBorders"); - - if (!pixs) - return (CCBORDA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (CCBORDA *)ERROR_PTR("pixs not binary", procName, NULL); - - if ((boxa = pixConnComp(pixs, &pixa, 8)) == NULL) - return (CCBORDA *)ERROR_PTR("boxa not made", procName, NULL); - n = boxaGetCount(boxa); - - if ((ccba = ccbaCreate(pixs, n)) == NULL) { - boxaDestroy(&boxa); - pixaDestroy(&pixa); - return (CCBORDA *)ERROR_PTR("ccba not made", procName, NULL); - } - for (i = 0; i < n; i++) { - if ((pix = pixaGetPix(pixa, i, L_CLONE)) == NULL) { - ccbaDestroy(&ccba); - pixaDestroy(&pixa); - boxaDestroy(&boxa); - return (CCBORDA *)ERROR_PTR("pix not found", procName, NULL); - } - if ((box = pixaGetBox(pixa, i, L_CLONE)) == NULL) { - ccbaDestroy(&ccba); - pixaDestroy(&pixa); - boxaDestroy(&boxa); - pixDestroy(&pix); - return (CCBORDA *)ERROR_PTR("box not found", procName, NULL); - } - ccb = pixGetCCBorders(pix, box); - pixDestroy(&pix); - boxDestroy(&box); - if (!ccb) { - ccbaDestroy(&ccba); - pixaDestroy(&pixa); - boxaDestroy(&boxa); - return (CCBORDA *)ERROR_PTR("ccb not made", procName, NULL); - } -/* ptaWriteStream(stderr, ccb->local, 1); */ - ccbaAddCcb(ccba, ccb); - } - - boxaDestroy(&boxa); - pixaDestroy(&pixa); - return ccba; -} - - -/*! - * \brief pixGetCCBorders() - * - * \param[in] pixs 1 bpp, one 8-connected component - * \param[in] box of %pixs, in global coords - * \return ccbord, or NULL on error - * - *
- * Notes:
- *      (1) We are finding the exterior and interior borders
- *          of an 8-connected component.   This should be used
- *          on a pix that has exactly one 8-connected component.
- *      (2) Typically, pixs is a c.c. in some larger pix.  The
- *          input box gives its location in global coordinates.
- *          This box is saved, as well as the boxes for the
- *          borders of any holes within the c.c., but the latter
- *          are given in relative coords within the c.c.
- *      (3) The calculations for the exterior border are done
- *          on a pix with a 1-pixel
- *          added border, but the saved pixel coordinates
- *          are the correct (relative) ones for the input pix
- *          (without a 1-pixel border)
- *      (4) For the definition of the three tables -- xpostab[], ypostab[]
- *          and qpostab[] -- see above where they are defined.
- * 
- */ -static CCBORD * -pixGetCCBorders(PIX *pixs, - BOX *box) -{ -l_int32 allzero, i, x, xh, w, nh; -l_int32 xs, ys; /* starting hole border pixel, relative in pixs */ -l_uint32 val; -BOX *boxt, *boxe; -BOXA *boxa; -CCBORD *ccb; -PIX *pixh; /* for hole components */ -PIX *pixt; -PIXA *pixa; - - PROCNAME("pixGetCCBorders"); - - if (!pixs) - return (CCBORD *)ERROR_PTR("pixs not defined", procName, NULL); - if (!box) - return (CCBORD *)ERROR_PTR("box not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (CCBORD *)ERROR_PTR("pixs not binary", procName, NULL); - - pixZero(pixs, &allzero); - if (allzero) - return (CCBORD *)ERROR_PTR("pixs all 0", procName, NULL); - - if ((ccb = ccbCreate(pixs)) == NULL) - return (CCBORD *)ERROR_PTR("ccb not made", procName, NULL); - - /* Get the exterior border */ - pixGetOuterBorder(ccb, pixs, box); - - /* Find the holes, if any */ - if ((pixh = pixHolesByFilling(pixs, 4)) == NULL) { - ccbDestroy(&ccb); - return (CCBORD *)ERROR_PTR("pixh not made", procName, NULL); - } - pixZero(pixh, &allzero); - if (allzero) { /* no holes */ - pixDestroy(&pixh); - return ccb; - } - - /* Get c.c. and locations of the holes */ - if ((boxa = pixConnComp(pixh, &pixa, 4)) == NULL) { - ccbDestroy(&ccb); - pixDestroy(&pixh); - return (CCBORD *)ERROR_PTR("boxa not made", procName, NULL); - } - nh = boxaGetCount(boxa); -/* lept_stderr("%d holes\n", nh); */ - - /* For each hole, find an interior pixel within the hole, - * then march to the right and stop at the first border - * pixel. Save the bounding box of the border, which - * is 1 pixel bigger on each side than the bounding box - * of the hole itself. Note that we use a pix of the - * c.c. of the hole itself to be sure that we start - * with a pixel in the hole of the proper component. - * If we did everything from the parent component, it is - * possible to start in a different hole that is within - * the b.b. of a larger hole. */ - w = pixGetWidth(pixs); - for (i = 0; i < nh; i++) { - boxt = boxaGetBox(boxa, i, L_CLONE); - pixt = pixaGetPix(pixa, i, L_CLONE); - ys = boxt->y; /* there must be a hole pixel on this raster line */ - for (x = 0; x < boxt->w; x++) { /* look for (fg) hole pixel */ - pixGetPixel(pixt, x, 0, &val); - if (val == 1) { - xh = x; - break; - } - } - if (x == boxt->w) { - L_WARNING("no hole pixel found!\n", procName); - continue; - } - for (x = xh + boxt->x; x < w; x++) { /* look for (fg) border pixel */ - pixGetPixel(pixs, x, ys, &val); - if (val == 1) { - xs = x; - break; - } - } - boxe = boxCreate(boxt->x - 1, boxt->y - 1, boxt->w + 2, boxt->h + 2); -#if DEBUG_PRINT - boxPrintStreamInfo(stderr, box); - boxPrintStreamInfo(stderr, boxe); - lept_stderr("xs = %d, ys = %d\n", xs, ys); -#endif /* DEBUG_PRINT */ - pixGetHoleBorder(ccb, pixs, boxe, xs, ys); - boxDestroy(&boxt); - boxDestroy(&boxe); - pixDestroy(&pixt); - } - - boxaDestroy(&boxa); - pixaDestroy(&pixa); - pixDestroy(&pixh); - return ccb; -} - - -/*! - * \brief pixGetOuterBordersPtaa() - * - * \param[in] pixs 1 bpp - * \return ptaa of outer borders, in global coords, or NULL on error - */ -PTAA * -pixGetOuterBordersPtaa(PIX *pixs) -{ -l_int32 i, n; -BOX *box; -BOXA *boxa; -PIX *pix; -PIXA *pixa; -PTA *pta; -PTAA *ptaa; - - PROCNAME("pixGetOuterBordersPtaa"); - - if (!pixs) - return (PTAA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PTAA *)ERROR_PTR("pixs not binary", procName, NULL); - - boxa = pixConnComp(pixs, &pixa, 8); - n = boxaGetCount(boxa); - if (n == 0) { - boxaDestroy(&boxa); - pixaDestroy(&pixa); - return (PTAA *)ERROR_PTR("pixs empty", procName, NULL); - } - - ptaa = ptaaCreate(n); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - pix = pixaGetPix(pixa, i, L_CLONE); - pta = pixGetOuterBorderPta(pix, box); - if (pta) - ptaaAddPta(ptaa, pta, L_INSERT); - boxDestroy(&box); - pixDestroy(&pix); - } - - pixaDestroy(&pixa); - boxaDestroy(&boxa); - return ptaa; -} - - -/*! - * \brief pixGetOuterBorderPta() - * - * \param[in] pixs 1 bpp, one 8-connected component - * \param[in] box [optional] of %pixs, in global coordinates - * \return pta of outer border, in global coords, or NULL on error - * - *
- * Notes:
- *      (1) We are finding the exterior border of a single 8-connected
- *          component.
- *      (2) If box is NULL, the outline returned is in the local coords
- *          of the input pix.  Otherwise, box is assumed to give the
- *          location of the pix in global coordinates, and the returned
- *          pta will be in those global coordinates.
- * 
- */ -static PTA * -pixGetOuterBorderPta(PIX *pixs, - BOX *box) -{ -l_int32 allzero, x, y; -BOX *boxt; -CCBORD *ccb; -PTA *ptaloc, *ptad; - - PROCNAME("pixGetOuterBorderPta"); - - if (!pixs) - return (PTA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PTA *)ERROR_PTR("pixs not binary", procName, NULL); - - pixZero(pixs, &allzero); - if (allzero) - return (PTA *)ERROR_PTR("pixs all 0", procName, NULL); - - if ((ccb = ccbCreate(pixs)) == NULL) - return (PTA *)ERROR_PTR("ccb not made", procName, NULL); - if (!box) - boxt = boxCreate(0, 0, pixGetWidth(pixs), pixGetHeight(pixs)); - else - boxt = boxClone(box); - - /* Get the exterior border in local coords */ - pixGetOuterBorder(ccb, pixs, boxt); - if ((ptaloc = ptaaGetPta(ccb->local, 0, L_CLONE)) == NULL) { - ccbDestroy(&ccb); - boxDestroy(&boxt); - return (PTA *)ERROR_PTR("ptaloc not made", procName, NULL); - } - - /* Transform to global coordinates, if they are given */ - if (box) { - boxGetGeometry(box, &x, &y, NULL, NULL); - ptad = ptaTransform(ptaloc, x, y, 1.0, 1.0); - } else { - ptad = ptaClone(ptaloc); - } - - ptaDestroy(&ptaloc); - boxDestroy(&boxt); - ccbDestroy(&ccb); - return ptad; -} - - -/*---------------------------------------------------------------------* - * Lower-level border-finding routines * - *---------------------------------------------------------------------*/ -/*! - * \brief pixGetOuterBorder() - * - * \param[in] ccb unfilled - * \param[in] pixs for the component at hand - * \param[in] box for the component, in global coords - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) the border is saved in relative coordinates within
- *          the c.c. (pixs).  Because the calculation is done
- *          in pixb with added 1 pixel border, we must subtract
- *          1 from each pixel value before storing it.
- *      (2) the stopping condition is that after the first pixel is
- *          returned to, the next pixel is the second pixel.  Having
- *          these 2 pixels recur in sequence proves the path is closed,
- *          and we do not store the second pixel again.
- * 
- */ -l_ok -pixGetOuterBorder(CCBORD *ccb, - PIX *pixs, - BOX *box) -{ -l_int32 fpx, fpy, spx, spy, qpos; -l_int32 px, py, npx, npy; -l_int32 w, h, wpl; -l_uint32 *data; -PTA *pta; -PIX *pixb; /* with 1 pixel border */ - - PROCNAME("pixGetOuterBorder"); - - if (!ccb) - return ERROR_INT("ccb not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - - /* Add 1-pixel border all around, and find start pixel */ - if ((pixb = pixAddBorder(pixs, 1, 0)) == NULL) - return ERROR_INT("pixs not made", procName, 1); - if (!nextOnPixelInRaster(pixb, 1, 1, &px, &py)) { - pixDestroy(&pixb); - return ERROR_INT("no start pixel found", procName, 1); - } - qpos = 0; /* relative to p */ - fpx = px; /* save location of first pixel on border */ - fpy = py; - - /* Save box and start pixel in relative coords */ - boxaAddBox(ccb->boxa, box, L_COPY); - ptaAddPt(ccb->start, px - 1, py - 1); - - pta = ptaCreate(0); - ptaaAddPta(ccb->local, pta, L_INSERT); - ptaAddPt(pta, px - 1, py - 1); /* initial point */ - pixGetDimensions(pixb, &w, &h, NULL); - data = pixGetData(pixb); - wpl = pixGetWpl(pixb); - - /* Get the second point; if there is none, return */ - if (findNextBorderPixel(w, h, data, wpl, px, py, &qpos, &npx, &npy)) { - pixDestroy(&pixb); - return 0; - } - - spx = npx; /* save location of second pixel on border */ - spy = npy; - ptaAddPt(pta, npx - 1, npy - 1); /* second point */ - px = npx; - py = npy; - - while (1) { - findNextBorderPixel(w, h, data, wpl, px, py, &qpos, &npx, &npy); - if (px == fpx && py == fpy && npx == spx && npy == spy) - break; - ptaAddPt(pta, npx - 1, npy - 1); - px = npx; - py = npy; - } - - pixDestroy(&pixb); - return 0; -} - - -/*! - * \brief pixGetHoleBorder() - * - * \param[in] ccb the exterior border is already made - * \param[in] pixs for the connected component at hand - * \param[in] box for the specific hole border, in relative - * coordinates to the c.c. - * \param[in] xs, ys first pixel on hole border, relative to c.c. - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) we trace out hole border on pixs without addition
- *          of single pixel added border to pixs
- *      (2) therefore all coordinates are relative within the c.c. (pixs)
- *      (3) same position tables and stopping condition as for
- *          exterior borders
- * 
- */ -static l_ok -pixGetHoleBorder(CCBORD *ccb, - PIX *pixs, - BOX *box, - l_int32 xs, - l_int32 ys) -{ -l_int32 fpx, fpy, spx, spy, qpos; -l_int32 px, py, npx, npy; -l_int32 w, h, wpl; -l_uint32 *data; -PTA *pta; - - PROCNAME("pixGetHoleBorder"); - - if (!ccb) - return ERROR_INT("ccb not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - - /* Add border and find start pixel */ - qpos = 0; /* orientation of Q relative to P */ - fpx = xs; /* save location of first pixel on border */ - fpy = ys; - - /* Save box and start pixel */ - boxaAddBox(ccb->boxa, box, L_COPY); - ptaAddPt(ccb->start, xs, ys); - - if ((pta = ptaCreate(0)) == NULL) - return ERROR_INT("pta not made", procName, 1); - ptaaAddPta(ccb->local, pta, L_INSERT); - ptaAddPt(pta, xs, ys); /* initial pixel */ - - w = pixGetWidth(pixs); - h = pixGetHeight(pixs); - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - - /* Get the second point; there should always be at least 4 pts - * in a minimal hole border! */ - if (findNextBorderPixel(w, h, data, wpl, xs, ys, &qpos, &npx, &npy)) - return ERROR_INT("isolated hole border point!", procName, 1); - - spx = npx; /* save location of second pixel on border */ - spy = npy; - ptaAddPt(pta, npx, npy); /* second pixel */ - px = npx; - py = npy; - - while (1) { - findNextBorderPixel(w, h, data, wpl, px, py, &qpos, &npx, &npy); - if (px == fpx && py == fpy && npx == spx && npy == spy) - break; - ptaAddPt(pta, npx, npy); - px = npx; - py = npy; - } - - return 0; -} - - -/*! - * \brief findNextBorderPixel() - * - * \param[in] w, h - * \param[in] data, wpl - * \param[in] px, py current P - * \param[in,out] pqpos input current Q; new Q - * \param[out] pnpx, pnpy new P - * \return 0 if next pixel found; 1 otherwise - * - *
- * Notes:
- *      (1) qpos increases clockwise from 0 to 7, with 0 at
- *          location with Q to left of P:   Q P
- *      (2) this is a low-level function that does not check input
- *          parameters.  All calling functions should check them.
- * 
- */ -static l_int32 -findNextBorderPixel(l_int32 w, - l_int32 h, - l_uint32 *data, - l_int32 wpl, - l_int32 px, - l_int32 py, - l_int32 *pqpos, - l_int32 *pnpx, - l_int32 *pnpy) -{ -l_int32 qpos, i, pos, npx, npy, val; -l_uint32 *line; - - qpos = *pqpos; - for (i = 1; i < 8; i++) { - pos = (qpos + i) % 8; - npx = px + xpostab[pos]; - npy = py + ypostab[pos]; - line = data + npy * wpl; - val = GET_DATA_BIT(line, npx); - if (val) { - *pnpx = npx; - *pnpy = npy; - *pqpos = qpostab[pos]; - return 0; - } - } - - return 1; -} - - -/*! - * \brief locateOutsideSeedPixel() - * - * \param[in] fpx, fpy location of first pixel - * \param[in] spx, spy location of second pixel - * \param[out] pxs, pys seed pixel to be returned - * - *
- * Notes:
- *      (1) The first and second pixels must be 8-adjacent,
- *          so |dx| <= 1 and |dy| <= 1 and both dx and dy
- *          cannot be 0.  There are 8 possible cases.
- *      (2) The seed pixel is OUTSIDE the foreground of the c.c.
- *      (3) These rules are for the situation where the INSIDE
- *          of the c.c. is on the right as you follow the border:
- *          cw for an exterior border and ccw for a hole border.
- * 
- */ -static void -locateOutsideSeedPixel(l_int32 fpx, - l_int32 fpy, - l_int32 spx, - l_int32 spy, - l_int32 *pxs, - l_int32 *pys) -{ -l_int32 dx, dy; - - dx = spx - fpx; - dy = spy - fpy; - - if (dx * dy == 1) { - *pxs = fpx + dx; - *pys = fpy; - } else if (dx * dy == -1) { - *pxs = fpx; - *pys = fpy + dy; - } else if (dx == 0) { - *pxs = fpx + dy; - *pys = fpy + dy; - } else /* dy == 0 */ { - *pxs = fpx + dx; - *pys = fpy - dx; - } - - return; -} - - - -/*---------------------------------------------------------------------* - * Border conversions * - *---------------------------------------------------------------------*/ -/*! - * \brief ccbaGenerateGlobalLocs() - * - * \param[in] ccba with local chain ptaa of borders computed - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This uses the pixel locs in the local ptaa, which are all
- *          relative to each c.c., to find the global pixel locations,
- *          and stores them in the global ptaa.
- * 
- */ -l_ok -ccbaGenerateGlobalLocs(CCBORDA *ccba) -{ -l_int32 ncc, nb, n, i, j, k, xul, yul, x, y; -CCBORD *ccb; -PTAA *ptaal, *ptaag; -PTA *ptal, *ptag; - - PROCNAME("ccbaGenerateGlobalLocs"); - - if (!ccba) - return ERROR_INT("ccba not defined", procName, 1); - - ncc = ccbaGetCount(ccba); /* number of c.c. */ - for (i = 0; i < ncc; i++) { - ccb = ccbaGetCcb(ccba, i); - - /* Get the UL corner in global coords, (xul, yul), of the c.c. */ - boxaGetBoxGeometry(ccb->boxa, 0, &xul, &yul, NULL, NULL); - - /* Make a new global ptaa, removing any old one */ - ptaal = ccb->local; - nb = ptaaGetCount(ptaal); /* number of borders */ - if (ccb->global) /* remove old one */ - ptaaDestroy(&ccb->global); - if ((ptaag = ptaaCreate(nb)) == NULL) - return ERROR_INT("ptaag not made", procName, 1); - ccb->global = ptaag; /* save new one */ - - /* Iterate through the borders for this c.c. */ - for (j = 0; j < nb; j++) { - ptal = ptaaGetPta(ptaal, j, L_CLONE); - n = ptaGetCount(ptal); /* number of pixels in border */ - if ((ptag = ptaCreate(n)) == NULL) - return ERROR_INT("ptag not made", procName, 1); - ptaaAddPta(ptaag, ptag, L_INSERT); - for (k = 0; k < n; k++) { - ptaGetIPt(ptal, k, &x, &y); - ptaAddPt(ptag, x + xul, y + yul); - } - ptaDestroy(&ptal); - } - ccbDestroy(&ccb); - } - - return 0; -} - - -/*! - * \brief ccbaGenerateStepChains() - * - * \param[in] ccba with local chain ptaa of borders computed - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This uses the pixel locs in the local ptaa,
- *          which are all relative to each c.c., to find
- *          the step directions for successive pixels in
- *          the chain, and stores them in the step numaa.
- *      (2) To get the step direction, use
- *              1   2   3
- *              0   P   4
- *              7   6   5
- *          where P is the previous pixel at (px, py).  The step direction
- *          is the number (from 0 through 7) for each relative location
- *          of the current pixel at (cx, cy).  It is easily found by
- *          indexing into a 2-d 3x3 array (dirtab).
- * 
- */ -l_ok -ccbaGenerateStepChains(CCBORDA *ccba) -{ -l_int32 ncc, nb, n, i, j, k; -l_int32 px, py, cx, cy, stepdir; -l_int32 dirtab[][3] = {{1, 2, 3}, {0, -1, 4}, {7, 6, 5}}; -CCBORD *ccb; -NUMA *na; -NUMAA *naa; /* step chain code; to be made */ -PTA *ptal; -PTAA *ptaal; /* local chain code */ - - PROCNAME("ccbaGenerateStepChains"); - - if (!ccba) - return ERROR_INT("ccba not defined", procName, 1); - - ncc = ccbaGetCount(ccba); /* number of c.c. */ - for (i = 0; i < ncc; i++) { - ccb = ccbaGetCcb(ccba, i); - - /* Make a new step numaa, removing any old one */ - ptaal = ccb->local; - nb = ptaaGetCount(ptaal); /* number of borders */ - if (ccb->step) /* remove old one */ - numaaDestroy(&ccb->step); - if ((naa = numaaCreate(nb)) == NULL) - return ERROR_INT("naa not made", procName, 1); - ccb->step = naa; /* save new one */ - - /* Iterate through the borders for this c.c. */ - for (j = 0; j < nb; j++) { - ptal = ptaaGetPta(ptaal, j, L_CLONE); - n = ptaGetCount(ptal); /* number of pixels in border */ - if (n == 1) { /* isolated pixel */ - na = numaCreate(1); /* but leave it empty */ - } else { /* trace out the boundary */ - if ((na = numaCreate(n)) == NULL) - return ERROR_INT("na not made", procName, 1); - ptaGetIPt(ptal, 0, &px, &py); - for (k = 1; k < n; k++) { - ptaGetIPt(ptal, k, &cx, &cy); - stepdir = dirtab[1 + cy - py][1 + cx - px]; - numaAddNumber(na, stepdir); - px = cx; - py = cy; - } - } - numaaAddNuma(naa, na, L_INSERT); - ptaDestroy(&ptal); - } - ccbDestroy(&ccb); /* just decrement refcount */ - } - - return 0; -} - - -/*! - * \brief ccbaStepChainsToPixCoords() - * - * \param[in] ccba with step chains numaa of borders - * \param[in] coordtype CCB_GLOBAL_COORDS or CCB_LOCAL_COORDS - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This uses the step chain data in each ccb to determine
- *          the pixel locations, either global or local,
- *          and stores them in the appropriate ptaa,
- *          either global or local.  For the latter, the
- *          pixel locations are relative to the c.c.
- * 
- */ -l_ok -ccbaStepChainsToPixCoords(CCBORDA *ccba, - l_int32 coordtype) -{ -l_int32 ncc, nb, n, i, j, k; -l_int32 xul, yul, xstart, ystart, x, y, stepdir; -BOXA *boxa; -CCBORD *ccb; -NUMA *na; -NUMAA *naa; -PTAA *ptaan; /* new pix coord ptaa */ -PTA *ptas, *ptan; - - PROCNAME("ccbaStepChainsToPixCoords"); - - if (!ccba) - return ERROR_INT("ccba not defined", procName, 1); - if (coordtype != CCB_GLOBAL_COORDS && coordtype != CCB_LOCAL_COORDS) - return ERROR_INT("coordtype not valid", procName, 1); - - ncc = ccbaGetCount(ccba); /* number of c.c. */ - for (i = 0; i < ncc; i++) { - ccb = ccbaGetCcb(ccba, i); - if ((naa = ccb->step) == NULL) - return ERROR_INT("step numaa not found", procName, 1); - if ((boxa = ccb->boxa) == NULL) - return ERROR_INT("boxa not found", procName, 1); - if ((ptas = ccb->start) == NULL) - return ERROR_INT("start pta not found", procName, 1); - - /* For global coords, get the (xul, yul) of the c.c.; - * otherwise, use relative coords. */ - if (coordtype == CCB_LOCAL_COORDS) { - xul = 0; - yul = 0; - } else { /* coordtype == CCB_GLOBAL_COORDS */ - /* Get UL corner in global coords */ - if (boxaGetBoxGeometry(boxa, 0, &xul, &yul, NULL, NULL)) - return ERROR_INT("bounding rectangle not found", procName, 1); - } - - /* Make a new ptaa, removing any old one */ - nb = numaaGetCount(naa); /* number of borders */ - if ((ptaan = ptaaCreate(nb)) == NULL) - return ERROR_INT("ptaan not made", procName, 1); - if (coordtype == CCB_LOCAL_COORDS) { - if (ccb->local) /* remove old one */ - ptaaDestroy(&ccb->local); - ccb->local = ptaan; /* save new local chain */ - } else { /* coordtype == CCB_GLOBAL_COORDS */ - if (ccb->global) /* remove old one */ - ptaaDestroy(&ccb->global); - ccb->global = ptaan; /* save new global chain */ - } - - /* Iterate through the borders for this c.c. */ - for (j = 0; j < nb; j++) { - na = numaaGetNuma(naa, j, L_CLONE); - n = numaGetCount(na); /* number of steps in border */ - if ((ptan = ptaCreate(n + 1)) == NULL) - return ERROR_INT("ptan not made", procName, 1); - ptaaAddPta(ptaan, ptan, L_INSERT); - ptaGetIPt(ptas, j, &xstart, &ystart); - x = xul + xstart; - y = yul + ystart; - ptaAddPt(ptan, x, y); - for (k = 0; k < n; k++) { - numaGetIValue(na, k, &stepdir); - x += xpostab[stepdir]; - y += ypostab[stepdir]; - ptaAddPt(ptan, x, y); - } - numaDestroy(&na); - } - ccbDestroy(&ccb); - } - - return 0; -} - - -/*! - * \brief ccbaGenerateSPGlobalLocs() - * - * \param[in] ccba - * \param[in] ptsflag CCB_SAVE_ALL_PTS or CCB_SAVE_TURNING_PTS - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This calculates the splocal rep if not yet made.
- *      (2) It uses the local pixel values in splocal, the single
- *          path pta, which are all relative to each c.c., to find
- *          the corresponding global pixel locations, and stores
- *          them in the spglobal pta.
- *      (3) This lists only the turning points: it both makes a
- *          valid svg file and is typically about half the size
- *          when all border points are listed.
- * 
- */ -l_ok -ccbaGenerateSPGlobalLocs(CCBORDA *ccba, - l_int32 ptsflag) -{ -l_int32 ncc, npt, i, j, xul, yul, x, y, delx, dely; -l_int32 xp, yp, delxp, delyp; /* prev point and increments */ -CCBORD *ccb; -PTA *ptal, *ptag; - - PROCNAME("ccbaGenerateSPGlobalLocs"); - - if (!ccba) - return ERROR_INT("ccba not defined", procName, 1); - - /* Make sure we have a local single path representation */ - if ((ccb = ccbaGetCcb(ccba, 0)) == NULL) - return ERROR_INT("no ccb", procName, 1); - if (!ccb->splocal) - ccbaGenerateSinglePath(ccba); - ccbDestroy(&ccb); /* clone ref */ - - ncc = ccbaGetCount(ccba); /* number of c.c. */ - for (i = 0; i < ncc; i++) { - ccb = ccbaGetCcb(ccba, i); - - /* Get the UL corner in global coords, (xul, yul), of the c.c. */ - if (boxaGetBoxGeometry(ccb->boxa, 0, &xul, &yul, NULL, NULL)) - return ERROR_INT("bounding rectangle not found", procName, 1); - - /* Make a new spglobal pta, removing any old one */ - ptal = ccb->splocal; - npt = ptaGetCount(ptal); /* number of points */ - if (ccb->spglobal) /* remove old one */ - ptaDestroy(&ccb->spglobal); - if ((ptag = ptaCreate(npt)) == NULL) - return ERROR_INT("ptag not made", procName, 1); - ccb->spglobal = ptag; /* save new one */ - - /* Convert local to global */ - if (ptsflag == CCB_SAVE_ALL_PTS) { - for (j = 0; j < npt; j++) { - ptaGetIPt(ptal, j, &x, &y); - ptaAddPt(ptag, x + xul, y + yul); - } - } else { /* ptsflag = CCB_SAVE_TURNING_PTS */ - ptaGetIPt(ptal, 0, &xp, &yp); /* get the 1st pt */ - ptaAddPt(ptag, xp + xul, yp + yul); /* save the 1st pt */ - if (npt == 2) { /* get and save the 2nd pt */ - ptaGetIPt(ptal, 1, &x, &y); - ptaAddPt(ptag, x + xul, y + yul); - } else if (npt > 2) { - ptaGetIPt(ptal, 1, &x, &y); - delxp = x - xp; - delyp = y - yp; - xp = x; - yp = y; - for (j = 2; j < npt; j++) { - ptaGetIPt(ptal, j, &x, &y); - delx = x - xp; - dely = y - yp; - if (delx != delxp || dely != delyp) - ptaAddPt(ptag, xp + xul, yp + yul); - xp = x; - yp = y; - delxp = delx; - delyp = dely; - } - ptaAddPt(ptag, xp + xul, yp + yul); - } - } - - ccbDestroy(&ccb); /* clone ref */ - } - - return 0; -} - - - -/*---------------------------------------------------------------------* - * Conversion to single path * - *---------------------------------------------------------------------*/ -/*! - * \brief ccbaGenerateSinglePath() - * - * \param[in] ccba - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Generates a single border in local pixel coordinates.
- *          For each c.c., if there is just an outer border, copy it.
- *          If there are also hole borders, for each hole border,
- *          determine the smallest horizontal or vertical
- *          distance from the border to the outside of the c.c.,
- *          and find a path through the c.c. for this cut.
- *          We do this in a way that guarantees a pixel from the
- *          hole border is the starting point of the path, and
- *          we must verify that the path intersects the outer
- *          border (if it intersects it, then it ends on it).
- *          One can imagine pathological cases, but they may not
- *          occur in images of text characters and un-textured
- *          line graphics.
- *      (2) Once it is verified that the path through the c.c.
- *          intersects both the hole and outer borders, we
- *          generate the full single path for all borders in the
- *          c.c.  Starting at the start point on the outer
- *          border, when we hit a line on a cut, we take
- *          the cut, do the hold border, and return on the cut
- *          to the outer border.  We compose a pta of the
- *          outer border pts that are on cut paths, and for
- *          every point on the outer border (as we go around),
- *          we check against this pta.  When we find a matching
- *          point in the pta, we do its cut path and hole border.
- *          The single path is saved in the ccb.
- * 
- */ -l_ok -ccbaGenerateSinglePath(CCBORDA *ccba) -{ -l_int32 i, j, k, ncc, nb, ncut, npt, dir, len, state, lostholes; -l_int32 x, y, xl, yl, xf, yf; -BOX *boxinner; -BOXA *boxa; -CCBORD *ccb; -PTA *pta, *ptac, *ptah; -PTA *ptahc; /* cyclic permutation of hole border, with end pts at cut */ -PTA *ptas; /* output result: new single path for c.c. */ -PTA *ptaf; /* points on the hole borders that intersect with cuts */ -PTA *ptal; /* points on outer border that intersect with cuts */ -PTA *ptap, *ptarp; /* path and reverse path between borders */ -PTAA *ptaa; -PTAA *ptaap; /* ptaa for all paths between borders */ - - PROCNAME("ccbaGenerateSinglePath"); - - if (!ccba) - return ERROR_INT("ccba not defined", procName, 1); - - ncc = ccbaGetCount(ccba); /* number of c.c. */ - lostholes = 0; - for (i = 0; i < ncc; i++) { - ccb = ccbaGetCcb(ccba, i); - if ((ptaa = ccb->local) == NULL) { - L_WARNING("local pixel loc array not found\n", procName); - continue; - } - nb = ptaaGetCount(ptaa); /* number of borders in the c.c. */ - - /* Prepare the output pta */ - if (ccb->splocal) - ptaDestroy(&ccb->splocal); - ptas = ptaCreate(0); - ccb->splocal = ptas; - - /* If no holes, just concat the outer border */ - pta = ptaaGetPta(ptaa, 0, L_CLONE); - if (nb == 1 || nb > NMAX_HOLES + 1) { - ptaJoin(ptas, pta, 0, -1); - ptaDestroy(&pta); /* remove clone */ - ccbDestroy(&ccb); /* remove clone */ - continue; - } - - /* Find the (nb - 1) cut paths that connect holes - * with outer border */ - boxa = ccb->boxa; - ptaap = ptaaCreate(nb - 1); - ptaf = ptaCreate(nb - 1); - ptal = ptaCreate(nb - 1); - for (j = 1; j < nb; j++) { - boxinner = boxaGetBox(boxa, j, L_CLONE); - - /* Find a short path and store it */ - ptac = getCutPathForHole(ccb->pix, pta, boxinner, &dir, &len); - if (len == 0) { /* bad: we lose the hole! */ - lostholes++; -/* boxPrintStreamInfo(stderr, boxa->box[0]); */ - } - ptaaAddPta(ptaap, ptac, L_INSERT); -/* lept_stderr("dir = %d, length = %d\n", dir, len); */ -/* ptaWriteStream(stderr, ptac, 1); */ - - /* Store the first and last points in the cut path, - * which must be on a hole border and the outer - * border, respectively */ - ncut = ptaGetCount(ptac); - if (ncut == 0) { /* missed hole; neg coords won't match */ - ptaAddPt(ptaf, -1, -1); - ptaAddPt(ptal, -1, -1); - } else { - ptaGetIPt(ptac, 0, &x, &y); - ptaAddPt(ptaf, x, y); - ptaGetIPt(ptac, ncut - 1, &x, &y); - ptaAddPt(ptal, x, y); - } - boxDestroy(&boxinner); - } - - /* Make a single path for the c.c. using these connections */ - npt = ptaGetCount(pta); /* outer border pts */ - for (k = 0; k < npt; k++) { - ptaGetIPt(pta, k, &x, &y); - if (k == 0) { /* if there is a cut at the first point, - * we can wait until the end to take it */ - ptaAddPt(ptas, x, y); - continue; - } - state = L_NOT_FOUND; - for (j = 0; j < nb - 1; j++) { /* iterate over cut end pts */ - ptaGetIPt(ptal, j, &xl, &yl); /* cut point on outer border */ - if (x == xl && y == yl) { /* take this cut to the hole */ - state = L_FOUND; - ptap = ptaaGetPta(ptaap, j, L_CLONE); - ptarp = ptaReverse(ptap, 1); - /* Cut point on hole border: */ - ptaGetIPt(ptaf, j, &xf, &yf); - /* Hole border: */ - ptah = ptaaGetPta(ptaa, j + 1, L_CLONE); - ptahc = ptaCyclicPerm(ptah, xf, yf); -/* ptaWriteStream(stderr, ptahc, 1); */ - ptaJoin(ptas, ptarp, 0, -1); - ptaJoin(ptas, ptahc, 0, -1); - ptaJoin(ptas, ptap, 0, -1); - ptaDestroy(&ptap); - ptaDestroy(&ptarp); - ptaDestroy(&ptah); - ptaDestroy(&ptahc); - break; - } - } - if (state == L_NOT_FOUND) - ptaAddPt(ptas, x, y); - } - -/* ptaWriteStream(stderr, ptas, 1); */ - ptaaDestroy(&ptaap); - ptaDestroy(&ptaf); - ptaDestroy(&ptal); - ptaDestroy(&pta); /* remove clone */ - ccbDestroy(&ccb); /* remove clone */ - } - - if (lostholes > 0) - L_WARNING("***** %d lost holes *****\n", procName, lostholes); - - return 0; -} - - -/*! - * \brief getCutPathForHole() - * - * \param[in] pix 1 bpp, of c.c. - * \param[in] pta of outer border - * \param[in] boxinner bounding box of hole path - * \param[out] pdir direction (0-3), returned; only needed for debug - * \param[out] plen length of path, returned - * \return pta of pts on cut path from the hole border - * to the outer border, including end points on - * both borders; or NULL on error - * - *
- * Notes:
- *      (1) If we don't find a path, we return a pta with no pts
- *          in it and len = 0.
- *      (2) The goal is to get a reasonably short path between the
- *          inner and outer borders, that goes entirely within the fg of
- *          the pix.  This function is cheap-and-dirty, may fail for some
- *          holes in complex topologies such as those you might find in a
- *          moderately dark scanned halftone.  If it fails to find a
- *          path to any particular hole, it gives a warning, and because
- *          that hole path is not included, the hole will not be rendered.
- * 
- */ -PTA * -getCutPathForHole(PIX *pix, - PTA *pta, - BOX *boxinner, - l_int32 *pdir, - l_int32 *plen) -{ -l_int32 w, h, nc, x, y, xl, yl, xmid, ymid; -l_uint32 val; -PTA *ptac; - - PROCNAME("getCutPathForHole"); - - if (!pix) - return (PTA *)ERROR_PTR("pix not defined", procName, NULL); - if (!pta) - return (PTA *)ERROR_PTR("pta not defined", procName, NULL); - if (!boxinner) - return (PTA *)ERROR_PTR("boxinner not defined", procName, NULL); - - w = pixGetWidth(pix); - h = pixGetHeight(pix); - - if ((ptac = ptaCreate(4)) == NULL) - return (PTA *)ERROR_PTR("ptac not made", procName, NULL); - xmid = boxinner->x + boxinner->w / 2; - ymid = boxinner->y + boxinner->h / 2; - - /* try top first */ - for (y = ymid; y >= 0; y--) { - pixGetPixel(pix, xmid, y, &val); - if (val == 1) { - ptaAddPt(ptac, xmid, y); - break; - } - } - for (y = y - 1; y >= 0; y--) { - pixGetPixel(pix, xmid, y, &val); - if (val == 1) - ptaAddPt(ptac, xmid, y); - else - break; - } - nc = ptaGetCount(ptac); - ptaGetIPt(ptac, nc - 1, &xl, &yl); - if (ptaContainsPt(pta, xl, yl)) { - *pdir = 1; - *plen = nc; - return ptac; - } - - /* Next try bottom */ - ptaEmpty(ptac); - for (y = ymid; y < h; y++) { - pixGetPixel(pix, xmid, y, &val); - if (val == 1) { - ptaAddPt(ptac, xmid, y); - break; - } - } - for (y = y + 1; y < h; y++) { - pixGetPixel(pix, xmid, y, &val); - if (val == 1) - ptaAddPt(ptac, xmid, y); - else - break; - } - nc = ptaGetCount(ptac); - ptaGetIPt(ptac, nc - 1, &xl, &yl); - if (ptaContainsPt(pta, xl, yl)) { - *pdir = 3; - *plen = nc; - return ptac; - } - - /* Next try left */ - ptaEmpty(ptac); - for (x = xmid; x >= 0; x--) { - pixGetPixel(pix, x, ymid, &val); - if (val == 1) { - ptaAddPt(ptac, x, ymid); - break; - } - } - for (x = x - 1; x >= 0; x--) { - pixGetPixel(pix, x, ymid, &val); - if (val == 1) - ptaAddPt(ptac, x, ymid); - else - break; - } - nc = ptaGetCount(ptac); - ptaGetIPt(ptac, nc - 1, &xl, &yl); - if (ptaContainsPt(pta, xl, yl)) { - *pdir = 0; - *plen = nc; - return ptac; - } - - /* Finally try right */ - ptaEmpty(ptac); - for (x = xmid; x < w; x++) { - pixGetPixel(pix, x, ymid, &val); - if (val == 1) { - ptaAddPt(ptac, x, ymid); - break; - } - } - for (x = x + 1; x < w; x++) { - pixGetPixel(pix, x, ymid, &val); - if (val == 1) - ptaAddPt(ptac, x, ymid); - else - break; - } - nc = ptaGetCount(ptac); - ptaGetIPt(ptac, nc - 1, &xl, &yl); - if (ptaContainsPt(pta, xl, yl)) { - *pdir = 2; - *plen = nc; - return ptac; - } - - /* If we get here, we've failed! */ - ptaEmpty(ptac); - L_WARNING("no path found\n", procName); - *plen = 0; - return ptac; -} - - - -/*---------------------------------------------------------------------* - * Border rendering * - *---------------------------------------------------------------------*/ -/*! - * \brief ccbaDisplayBorder() - * - * \param[in] ccba - * \return pix of border pixels, or NULL on error - * - *
- * Notes:
- *      (1) Uses global ptaa, which gives each border pixel in
- *          global coordinates, and must be computed in advance
- *          by calling ccbaGenerateGlobalLocs().
- * 
- */ -PIX * -ccbaDisplayBorder(CCBORDA *ccba) -{ -l_int32 ncc, nb, n, i, j, k, x, y; -CCBORD *ccb; -PIX *pixd; -PTAA *ptaa; -PTA *pta; - - PROCNAME("ccbaDisplayBorder"); - - if (!ccba) - return (PIX *)ERROR_PTR("ccba not defined", procName, NULL); - - if ((pixd = pixCreate(ccba->w, ccba->h, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - ncc = ccbaGetCount(ccba); /* number of c.c. */ - for (i = 0; i < ncc; i++) { - ccb = ccbaGetCcb(ccba, i); - if ((ptaa = ccb->global) == NULL) { - L_WARNING("global pixel loc array not found", procName); - continue; - } - nb = ptaaGetCount(ptaa); /* number of borders in the c.c. */ - for (j = 0; j < nb; j++) { - pta = ptaaGetPta(ptaa, j, L_CLONE); - n = ptaGetCount(pta); /* number of pixels in the border */ - for (k = 0; k < n; k++) { - ptaGetIPt(pta, k, &x, &y); - pixSetPixel(pixd, x, y, 1); - } - ptaDestroy(&pta); - } - ccbDestroy(&ccb); - } - - return pixd; -} - - -/*! - * \brief ccbaDisplaySPBorder() - * - * \param[in] ccba - * \return pix of border pixels, or NULL on error - * - *
- * Notes:
- *      (1) Uses spglobal pta, which gives each border pixel in
- *          global coordinates, one path per c.c., and must
- *          be computed in advance by calling ccbaGenerateSPGlobalLocs().
- * 
- */ -PIX * -ccbaDisplaySPBorder(CCBORDA *ccba) -{ -l_int32 ncc, npt, i, j, x, y; -CCBORD *ccb; -PIX *pixd; -PTA *ptag; - - PROCNAME("ccbaDisplaySPBorder"); - - if (!ccba) - return (PIX *)ERROR_PTR("ccba not defined", procName, NULL); - - if ((pixd = pixCreate(ccba->w, ccba->h, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - ncc = ccbaGetCount(ccba); /* number of c.c. */ - for (i = 0; i < ncc; i++) { - ccb = ccbaGetCcb(ccba, i); - if ((ptag = ccb->spglobal) == NULL) { - L_WARNING("spglobal pixel loc array not found\n", procName); - continue; - } - npt = ptaGetCount(ptag); /* number of pixels on path */ - for (j = 0; j < npt; j++) { - ptaGetIPt(ptag, j, &x, &y); - pixSetPixel(pixd, x, y, 1); - } - ccbDestroy(&ccb); /* clone ref */ - } - - return pixd; -} - - -/*! - * \brief ccbaDisplayImage1() - * - * \param[in] ccba - * \return pix of image, or NULL on error - * - *
- * Notes:
- *      (1) Uses local ptaa, which gives each border pixel in
- *          local coordinates, so the actual pixel positions must
- *          be computed using all offsets.
- *      (2) For the holes, use coordinates relative to the c.c.
- *      (3) This is slower than Method 2.
- *      (4) This uses topological properties (Method 1) to do scan
- *          conversion to raster
- *
- *  This algorithm deserves some commentary.
- *
- *  I first tried the following:
- *    ~ outer borders: 4-fill from outside, stopping at the
- *         border, using pixFillClosedBorders()
- *    ~ inner borders: 4-fill from outside, stopping again
- *         at the border, XOR with the border, and invert
- *         to get the hole.  This did not work, because if
- *         you have a hole border that looks like:
- *
- *                x x x x x x
- *                x          x
- *                x   x x x   x
- *                  x x o x   x
- *                      x     x
- *                      x     x
- *                        x x x
- *
- *         if you 4-fill from the outside, the pixel 'o' will
- *         not be filled!  XORing with the border leaves it OFF.
- *         Inverting then gives a single bad ON pixel that is not
- *         actually part of the hole.
- *
- *  So what you must do instead is 4-fill the holes from inside.
- *  You can do this from a seedfill, using a pix with the hole
- *  border as the filling mask.  But you need to start with a
- *  pixel inside the hole.  How is this determined?  The best
- *  way is from the contour.  We have a right-hand shoulder
- *  rule for inside (i.e., the filled region).   Take the
- *  first 2 pixels of the hole border, and compute dx and dy
- *  (second coord minus first coord:  dx = sx - fx, dy = sy - fy).
- *  There are 8 possibilities, depending on the values of dx and
- *  dy (which can each be -1, 0, and +1, but not both 0).
- *  These 8 cases can be broken into 4; see the simple algorithm below.
- *  Once you have an interior seed pixel, you fill from the seed,
- *  clipping with the hole border pix by filling into its invert.
- *
- *  You then successively XOR these interior filled components, in any order.
- * 
- */ -PIX * -ccbaDisplayImage1(CCBORDA *ccba) -{ -l_int32 ncc, i, nb, n, j, k, x, y, xul, yul, xoff, yoff, w, h; -l_int32 fpx, fpy, spx, spy, xs, ys; -BOX *box; -BOXA *boxa; -CCBORD *ccb; -PIX *pixd, *pixt, *pixh; -PTAA *ptaa; -PTA *pta; - - PROCNAME("ccbaDisplayImage1"); - - if (!ccba) - return (PIX *)ERROR_PTR("ccba not defined", procName, NULL); - - if ((pixd = pixCreate(ccba->w, ccba->h, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - ncc = ccbaGetCount(ccba); - for (i = 0; i < ncc; i++) { - ccb = ccbaGetCcb(ccba, i); - if ((boxa = ccb->boxa) == NULL) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("boxa not found", procName, NULL); - } - - /* Render border in pixt */ - if ((ptaa = ccb->local) == NULL) { - L_WARNING("local chain array not found\n", procName); - continue; - } - - nb = ptaaGetCount(ptaa); /* number of borders in the c.c. */ - for (j = 0; j < nb; j++) { - if ((box = boxaGetBox(boxa, j, L_CLONE)) == NULL) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("b. box not found", procName, NULL); - } - if (j == 0) { - boxGetGeometry(box, &xul, &yul, &w, &h); - xoff = yoff = 0; - } else { - boxGetGeometry(box, &xoff, &yoff, &w, &h); - } - boxDestroy(&box); - - /* Render the border in a minimum-sized pix; - * subtract xoff and yoff because the pixel - * location is stored relative to the c.c., but - * we need it relative to just the hole border. */ - if ((pixt = pixCreate(w, h, 1)) == NULL) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - } - pta = ptaaGetPta(ptaa, j, L_CLONE); - n = ptaGetCount(pta); /* number of pixels in the border */ - for (k = 0; k < n; k++) { - ptaGetIPt(pta, k, &x, &y); - pixSetPixel(pixt, x - xoff, y - yoff, 1); - if (j > 0) { /* need this for finding hole border pixel */ - if (k == 0) { - fpx = x - xoff; - fpy = y - yoff; - } - if (k == 1) { - spx = x - xoff; - spy = y - yoff; - } - } - } - ptaDestroy(&pta); - - /* Get the filled component */ - if (j == 0) { /* if outer border, fill from outer boundary */ - if ((pixh = pixFillClosedBorders(pixt, 4)) == NULL) { - pixDestroy(&pixd); - pixDestroy(&pixt); - return (PIX *)ERROR_PTR("pixh not made", procName, NULL); - } - } else { /* fill the hole from inside */ - /* get the location of a seed pixel in the hole */ - locateOutsideSeedPixel(fpx, fpy, spx, spy, &xs, &ys); - - /* Put seed in hole and fill interior of hole, - * using pixt as clipping mask */ - pixh = pixCreateTemplate(pixt); - pixSetPixel(pixh, xs, ys, 1); /* put seed pixel in hole */ - pixInvert(pixt, pixt); /* to make filling mask */ - pixSeedfillBinary(pixh, pixh, pixt, 4); /* 4-fill hole */ - } - - /* XOR into the dest */ - pixRasterop(pixd, xul + xoff, yul + yoff, w, h, PIX_XOR, - pixh, 0, 0); - pixDestroy(&pixt); - pixDestroy(&pixh); - } - ccbDestroy(&ccb); - } - return pixd; -} - - - -/*! - * \brief ccbaDisplayImage2() - * - * \param[in] ccba - * \return pix of image, or NULL on error - * - *
- * Notes:
- *      (1) Uses local chain ptaa, which gives each border pixel in
- *          local coordinates, so the actual pixel positions must
- *          be computed using all offsets.
- *      (2) Treats exterior and hole borders on equivalent
- *          footing, and does all calculations on a pix
- *          that spans the c.c. with a 1 pixel added boundary.
- *      (3) This uses topological properties (Method 2) to do scan
- *          conversion to raster
- *      (4) The algorithm is described at the top of this file (Method 2).
- *          It is preferred to Method 1 because it is between 1.2x and 2x
- *          faster than Method 1.
- * 
- */ -PIX * -ccbaDisplayImage2(CCBORDA *ccba) -{ -l_int32 ncc, nb, n, i, j, k, x, y, xul, yul, w, h; -l_int32 fpx, fpy, spx, spy, xs, ys; -BOXA *boxa; -CCBORD *ccb; -PIX *pixd, *pixc, *pixs; -PTAA *ptaa; -PTA *pta; - - PROCNAME("ccbaDisplayImage2"); - - if (!ccba) - return (PIX *)ERROR_PTR("ccba not defined", procName, NULL); - - if ((pixd = pixCreate(ccba->w, ccba->h, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - ncc = ccbaGetCount(ccba); - for (i = 0; i < ncc; i++) { - /* Generate clipping mask from border pixels and seed image - * from one seed for each closed border. */ - ccb = ccbaGetCcb(ccba, i); - if ((boxa = ccb->boxa) == NULL) { - pixDestroy(&pixd); - ccbDestroy(&ccb); - return (PIX *)ERROR_PTR("boxa not found", procName, NULL); - } - if (boxaGetBoxGeometry(boxa, 0, &xul, &yul, &w, &h)) { - pixDestroy(&pixd); - ccbDestroy(&ccb); - return (PIX *)ERROR_PTR("b. box not found", procName, NULL); - } - pixc = pixCreate(w + 2, h + 2, 1); - pixs = pixCreateTemplate(pixc); - - if ((ptaa = ccb->local) == NULL) { - pixDestroy(&pixc); - pixDestroy(&pixs); - ccbDestroy(&ccb); - L_WARNING("local chain array not found\n", procName); - continue; - } - nb = ptaaGetCount(ptaa); /* number of borders in the c.c. */ - for (j = 0; j < nb; j++) { - pta = ptaaGetPta(ptaa, j, L_CLONE); - n = ptaGetCount(pta); /* number of pixels in the border */ - - /* Render border pixels in pixc */ - for (k = 0; k < n; k++) { - ptaGetIPt(pta, k, &x, &y); - pixSetPixel(pixc, x + 1, y + 1, 1); - if (k == 0) { - fpx = x + 1; - fpy = y + 1; - } else if (k == 1) { - spx = x + 1; - spy = y + 1; - } - } - - /* Get and set seed pixel for this border in pixs */ - if (n > 1) - locateOutsideSeedPixel(fpx, fpy, spx, spy, &xs, &ys); - else /* isolated c.c. */ - xs = ys = 0; - pixSetPixel(pixs, xs, ys, 1); - ptaDestroy(&pta); - } - - /* Fill from seeds in pixs, using pixc as the clipping mask, - * to reconstruct the c.c. */ - pixInvert(pixc, pixc); /* to convert clipping -> filling mask */ - pixSeedfillBinary(pixs, pixs, pixc, 4); /* 4-fill */ - pixInvert(pixs, pixs); /* to make the c.c. */ - - /* XOR into the dest */ - pixRasterop(pixd, xul, yul, w, h, PIX_XOR, pixs, 1, 1); - - pixDestroy(&pixc); - pixDestroy(&pixs); - ccbDestroy(&ccb); /* ref-counted */ - } - return pixd; -} - - - -/*---------------------------------------------------------------------* - * Serialize for I/O * - *---------------------------------------------------------------------*/ -/*! - * \brief ccbaWrite() - * - * \param[in] filename - * \param[in] ccba - * \return 0 if OK, 1 on error - */ -l_ok -ccbaWrite(const char *filename, - CCBORDA *ccba) -{ -FILE *fp; - - PROCNAME("ccbaWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!ccba) - return ERROR_INT("ccba not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "wb+")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - if (ccbaWriteStream(fp, ccba)) { - fclose(fp); - return ERROR_INT("ccba not written to stream", procName, 1); - } - - fclose(fp); - return 0; -} - - - -/*! - * \brief ccbaWriteStream() - * - * \param[in] fp file stream - * \param[in] ccba - * \return 0 if OK; 1 on error - * - * Format: - * \code - * ccba: %7d cc\n num. c.c.) (ascii) (18B - * pix width 4B - * pix height 4B - * [for i = 1, ncc] - * ulx 4B - * uly 4B - * w 4B -- not req'd for reconstruction - * h 4B -- not req'd for reconstruction - * number of borders 4B - * [for j = 1, nb] - * startx 4B - * starty 4B - * [for k = 1, nb] - * 2 steps 1B - * end in z8 or 88 1B - * \endcode - */ -l_ok -ccbaWriteStream(FILE *fp, - CCBORDA *ccba) -{ -char strbuf[256]; -l_uint8 bval; -l_uint8 *datain, *dataout; -l_int32 i, j, k, bx, by, bw, bh, val, startx, starty; -l_int32 ncc, nb, n; -l_uint32 w, h; -size_t inbytes, outbytes; -L_BBUFFER *bbuf; -CCBORD *ccb; -NUMA *na; -NUMAA *naa; -PTA *pta; - - PROCNAME("ccbaWriteStream"); - -#if !HAVE_LIBZ /* defined in environ.h */ - return ERROR_INT("no libz: can't write data", procName, 1); -#else - - if (!fp) - return ERROR_INT("stream not open", procName, 1); - if (!ccba) - return ERROR_INT("ccba not defined", procName, 1); - - if ((bbuf = bbufferCreate(NULL, 1000)) == NULL) - return ERROR_INT("bbuf not made", procName, 1); - - ncc = ccbaGetCount(ccba); - snprintf(strbuf, sizeof(strbuf), "ccba: %7d cc\n", ncc); - bbufferRead(bbuf, (l_uint8 *)strbuf, 18); - w = pixGetWidth(ccba->pix); - h = pixGetHeight(ccba->pix); - bbufferRead(bbuf, (l_uint8 *)&w, 4); /* width */ - bbufferRead(bbuf, (l_uint8 *)&h, 4); /* height */ - for (i = 0; i < ncc; i++) { - ccb = ccbaGetCcb(ccba, i); - if (boxaGetBoxGeometry(ccb->boxa, 0, &bx, &by, &bw, &bh)) { - bbufferDestroy(&bbuf); - return ERROR_INT("bounding box not found", procName, 1); - } - bbufferRead(bbuf, (l_uint8 *)&bx, 4); /* ulx of c.c. */ - bbufferRead(bbuf, (l_uint8 *)&by, 4); /* uly of c.c. */ - bbufferRead(bbuf, (l_uint8 *)&bw, 4); /* w of c.c. */ - bbufferRead(bbuf, (l_uint8 *)&bh, 4); /* h of c.c. */ - if ((naa = ccb->step) == NULL) { - ccbaGenerateStepChains(ccba); - naa = ccb->step; - } - nb = numaaGetCount(naa); - bbufferRead(bbuf, (l_uint8 *)&nb, 4); /* number of borders in c.c. */ - pta = ccb->start; - for (j = 0; j < nb; j++) { - ptaGetIPt(pta, j, &startx, &starty); - bbufferRead(bbuf, (l_uint8 *)&startx, 4); /* starting x in border */ - bbufferRead(bbuf, (l_uint8 *)&starty, 4); /* starting y in border */ - na = numaaGetNuma(naa, j, L_CLONE); - n = numaGetCount(na); - for (k = 0; k < n; k++) { - numaGetIValue(na, k, &val); - if (k % 2 == 0) - bval = (l_uint8)val << 4; - else - bval |= (l_uint8)val; - if (k % 2 == 1) - bbufferRead(bbuf, (l_uint8 *)&bval, 1); /* 2 border steps */ - } - if (n % 2 == 1) { - bval |= 0x8; - bbufferRead(bbuf, (l_uint8 *)&bval, 1); /* end with 0xz8, */ - /* where z = {0..7} */ - } else { /* n % 2 == 0 */ - bval = 0x88; - bbufferRead(bbuf, (l_uint8 *)&bval, 1); /* end with 0x88 */ - } - numaDestroy(&na); - } - ccbDestroy(&ccb); - } - - datain = bbufferDestroyAndSaveData(&bbuf, &inbytes); - dataout = zlibCompress(datain, inbytes, &outbytes); - fwrite(dataout, 1, outbytes, fp); - - LEPT_FREE(datain); - LEPT_FREE(dataout); - return 0; - -#endif /* !HAVE_LIBZ */ -} - - -/*! - * \brief ccbaRead() - * - * \param[in] filename - * \return ccba, or NULL on error - */ -CCBORDA * -ccbaRead(const char *filename) -{ -FILE *fp; -CCBORDA *ccba; - - PROCNAME("ccbaRead"); - - if (!filename) - return (CCBORDA *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (CCBORDA *)ERROR_PTR("stream not opened", procName, NULL); - ccba = ccbaReadStream(fp); - fclose(fp); - - if (!ccba) - return (CCBORDA *)ERROR_PTR("ccba not returned", procName, NULL); - return ccba; -} - - -/*! - * \brief ccbaReadStream() - * - * \param[in] fp file stream - * \return ccba, or NULL on error - * - * \code - * Format: ccba: %7d cc\n num. c.c.) (ascii) (17B - * pix width 4B - * pix height 4B - * [for i = 1, ncc] - * ulx 4B - * uly 4B - * w 4B -- not req'd for reconstruction - * h 4B -- not req'd for reconstruction - * number of borders 4B - * [for j = 1, nb] - * startx 4B - * starty 4B - * [for k = 1, nb] - * 2 steps 1B - * end in z8 or 88 1B - * \endcode - */ -CCBORDA * -ccbaReadStream(FILE *fp) -{ -char strbuf[256]; -l_uint8 bval; -l_uint8 *datain, *dataout; -l_int32 i, j, startx, starty; -l_int32 offset, nib1, nib2; -l_int32 ncc, nb; -l_uint32 width, height, w, h, xoff, yoff; -size_t inbytes, outbytes; -BOX *box; -CCBORD *ccb; -CCBORDA *ccba; -NUMA *na; -NUMAA *step; - - PROCNAME("ccbaReadStream"); - -#if !HAVE_LIBZ /* defined in environ.h */ - return (CCBORDA *)ERROR_PTR("no libz: can't read data", procName, NULL); -#else - - if (!fp) - return (CCBORDA *)ERROR_PTR("stream not open", procName, NULL); - - if ((datain = l_binaryReadStream(fp, &inbytes)) == NULL) - return (CCBORDA *)ERROR_PTR("data not read from file", procName, NULL); - dataout = zlibUncompress(datain, inbytes, &outbytes); - LEPT_FREE(datain); - if (!dataout) - return (CCBORDA *)ERROR_PTR("dataout not made", procName, NULL); - - offset = 18; - memcpy(strbuf, dataout, offset); - strbuf[17] = '\0'; - if (memcmp(strbuf, "ccba:", 5) != 0) { - LEPT_FREE(dataout); - return (CCBORDA *)ERROR_PTR("file not type ccba", procName, NULL); - } - sscanf(strbuf, "ccba: %7d cc\n", &ncc); -/* lept_stderr("ncc = %d\n", ncc); */ - if ((ccba = ccbaCreate(NULL, ncc)) == NULL) { - LEPT_FREE(dataout); - return (CCBORDA *)ERROR_PTR("ccba not made", procName, NULL); - } - - memcpy(&width, dataout + offset, 4); - offset += 4; - memcpy(&height, dataout + offset, 4); - offset += 4; - ccba->w = width; - ccba->h = height; -/* lept_stderr("width = %d, height = %d\n", width, height); */ - - for (i = 0; i < ncc; i++) { /* should be ncc */ - ccb = ccbCreate(NULL); - ccbaAddCcb(ccba, ccb); - - memcpy(&xoff, dataout + offset, 4); - offset += 4; - memcpy(&yoff, dataout + offset, 4); - offset += 4; - memcpy(&w, dataout + offset, 4); - offset += 4; - memcpy(&h, dataout + offset, 4); - offset += 4; - box = boxCreate(xoff, yoff, w, h); - boxaAddBox(ccb->boxa, box, L_INSERT); -/* lept_stderr("xoff = %d, yoff = %d, w = %d, h = %d\n", - xoff, yoff, w, h); */ - - memcpy(&nb, dataout + offset, 4); - offset += 4; -/* lept_stderr("num borders = %d\n", nb); */ - step = numaaCreate(nb); - ccb->step = step; - - for (j = 0; j < nb; j++) { /* should be nb */ - memcpy(&startx, dataout + offset, 4); - offset += 4; - memcpy(&starty, dataout + offset, 4); - offset += 4; - ptaAddPt(ccb->start, startx, starty); -/* lept_stderr("startx = %d, starty = %d\n", startx, starty); */ - na = numaCreate(0); - numaaAddNuma(step, na, L_INSERT); - - while(1) { - bval = *(dataout + offset); - offset++; - nib1 = (bval >> 4); - nib2 = bval & 0xf; - if (nib1 != 8) - numaAddNumber(na, nib1); - else - break; - if (nib2 != 8) - numaAddNumber(na, nib2); - else - break; - } - } - } - LEPT_FREE(dataout); - return ccba; - -#endif /* !HAVE_LIBZ */ -} - - -/*---------------------------------------------------------------------* - * SVG Output * - *---------------------------------------------------------------------*/ -/*! - * \brief ccbaWriteSVG() - * - * \param[in] filename - * \param[in] ccba - * \return 0 if OK, 1 on error - */ -l_ok -ccbaWriteSVG(const char *filename, - CCBORDA *ccba) -{ -char *svgstr; - - PROCNAME("ccbaWriteSVG"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!ccba) - return ERROR_INT("ccba not defined", procName, 1); - - if ((svgstr = ccbaWriteSVGString(filename, ccba)) == NULL) - return ERROR_INT("svgstr not made", procName, 1); - - l_binaryWrite(filename, "w", svgstr, strlen(svgstr)); - LEPT_FREE(svgstr); - - return 0; -} - - -/*! - * \brief ccbaWriteSVGString() - * - * \param[in] filename - * \param[in] ccba - * \return string in svg-formatted, that can be written to file, - * or NULL on error. - */ -char * -ccbaWriteSVGString(const char *filename, - CCBORDA *ccba) -{ -char *svgstr; -char smallbuf[256]; -char line0[] = ""; -char line1[] = ""; -char line2[] = ""; -char line3[] = ""; -char line5[] = ""; -char space[] = " "; -l_int32 i, j, ncc, npt, x, y; -CCBORD *ccb; -PTA *pta; -SARRAY *sa; - - PROCNAME("ccbaWriteSVGString"); - - if (!filename) - return (char *)ERROR_PTR("filename not defined", procName, NULL); - if (!ccba) - return (char *)ERROR_PTR("ccba not defined", procName, NULL); - - sa = sarrayCreate(0); - sarrayAddString(sa, line0, L_COPY); - sarrayAddString(sa, line1, L_COPY); - sarrayAddString(sa, line2, L_COPY); - ncc = ccbaGetCount(ccba); - for (i = 0; i < ncc; i++) { - if ((ccb = ccbaGetCcb(ccba, i)) == NULL) { - sarrayDestroy(&sa); - return (char *)ERROR_PTR("ccb not found", procName, NULL); - } - if ((pta = ccb->spglobal) == NULL) { - sarrayDestroy(&sa); - ccbDestroy(&ccb); - return (char *)ERROR_PTR("spglobal not made", procName, NULL); - } - sarrayAddString(sa, line3, L_COPY); - npt = ptaGetCount(pta); - for (j = 0; j < npt; j++) { - ptaGetIPt(pta, j, &x, &y); - snprintf(smallbuf, sizeof(smallbuf), "%0d,%0d", x, y); - sarrayAddString(sa, smallbuf, L_COPY); - } - sarrayAddString(sa, line4, L_COPY); - ccbDestroy(&ccb); - } - sarrayAddString(sa, line5, L_COPY); - sarrayAddString(sa, space, L_COPY); - - svgstr = sarrayToString(sa, 1); -/* lept_stderr("%s", svgstr); */ - - sarrayDestroy(&sa); - return svgstr; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ccbord.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ccbord.h deleted file mode 100644 index cccef6eb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ccbord.h +++ /dev/null @@ -1,121 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_CCBORD_H -#define LEPTONICA_CCBORD_H - -/*! - * \file ccbord.h - * - *
- *           CCBord:   represents a single connected component
- *           CCBorda:  an array of CCBord
- * 
- */ - - /*! Use in ccbaStepChainsToPixCoords() */ -/*! CCB Coords */ -enum { - CCB_LOCAL_COORDS = 1, - CCB_GLOBAL_COORDS = 2 -}; - - /*! Use in ccbaGenerateSPGlobalLocs() */ -/*! CCB Points */ -enum { - CCB_SAVE_ALL_PTS = 1, - CCB_SAVE_TURNING_PTS = 2 -}; - - - /*! - *
-     * CCBord contains:
-     *
-     *    (1) a minimally-clipped bitmap of the component (pix),
-     *    (2) a boxa consisting of:
-     *          for the primary component:
-     *                (xul, yul) pixel location in global coords
-     *                (w, h) of the bitmap
-     *          for the hole components:
-     *                (x, y) in relative coordinates in primary component
-     *                (w, h) of the hole border (which is 2 pixels
-     *                       larger in each direction than the hole itself)
-     *    (3) a pta ('start') of the initial border pixel location for each
-     *        closed curve, all in relative coordinates of the primary
-     *        component.  This is given for the primary component,
-     *        followed by the hole components, if any.
-     *    (4) a refcount of the ccbord; used internally when a ccbord
-     *        is accessed from a ccborda (array of ccbord)
-     *    (5) a ptaa for the chain code for the border in relative
-     *        coordinates, where the first pta is the exterior border
-     *        and all other pta are for interior borders (holes)
-     *    (6) a ptaa for the global pixel loc rendition of the border,
-     *        where the first pta is the exterior border and all other
-     *        pta are for interior borders (holes).
-     *        This is derived from the local or step chain code.
-     *    (7) a numaa for the chain code for the border as orientation
-     *        directions between successive border pixels, where
-     *        the first numa is the exterior border and all other
-     *        numa are for interior borders (holes).  This is derived
-     *        from the local chain code.  The 8 directions are 0 - 7.
-     *    (8) a pta for a single chain for each c.c., comprised of outer
-     *        and hole borders, plus cut paths between them, all in
-     *        local coords.
-     *    (9) a pta for a single chain for each c.c., comprised of outer
-     *        and hole borders, plus cut paths between them, all in
-     *        global coords.
-     * 
- */ -struct CCBord -{ - struct Pix *pix; /*!< component bitmap (min size) */ - struct Boxa *boxa; /*!< regions of each closed curve */ - struct Pta *start; /*!< initial border pixel locations */ - l_int32 refcount; /*!< number of handles; start at 1 */ - struct Ptaa *local; /*!< ptaa of chain pixels (local) */ - struct Ptaa *global; /*!< ptaa of chain pixels (global) */ - struct Numaa *step; /*!< numaa of chain code (step dir) */ - struct Pta *splocal; /*!< pta of single chain (local) */ - struct Pta *spglobal; /*!< pta of single chain (global) */ -}; -typedef struct CCBord CCBORD; - -/*! Array of CCBord */ -struct CCBorda -{ - struct Pix *pix; /*!< input pix (may be null) */ - l_int32 w; /*!< width of pix */ - l_int32 h; /*!< height of pix */ - l_int32 n; /*!< number of ccbord in ptr array */ - l_int32 nalloc; /*!< number of ccbord ptrs allocated */ - struct CCBord **ccb; /*!< ccb ptr array */ -}; -typedef struct CCBorda CCBORDA; - - -#endif /* LEPTONICA_CCBORD_H */ - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ccthin.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ccthin.c deleted file mode 100644 index 968e8620..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ccthin.c +++ /dev/null @@ -1,476 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file ccthin.c - *
- *
- *     PIXA   *pixaThinConnected()
- *     PIX    *pixThinConnected()
- *     PIX    *pixThinConnectedBySet()
- *     SELA   *selaMakeThinSets()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - - /* ------------------------------------------------------------ - * The sels used here (and their rotated counterparts) are the - * useful 3x3 Sels for thinning. They are defined in sel2.c, - * and the sets are constructed in selaMakeThinSets(). - * The notation is based on "Connectivity-preserving morphological - * image transformations", a version of which can be found at - * http://www.leptonica.com/papers/conn.pdf - * ------------------------------------------------------------ */ - -/*----------------------------------------------------------------* - * CC-preserving thinning * - *----------------------------------------------------------------*/ -/*! - * \brief pixaThinConnected() - * - * \param[in] pixas of 1 bpp pix - * \param[in] type L_THIN_FG, L_THIN_BG - * \param[in] connectivity 4 or 8 - * \param[in] maxiters max number of iters allowed; - * use 0 to iterate until completion - * \return pixds, or NULL on error - * - *
- * Notes:
- *      (1) See notes in pixThinConnected().
- * 
- */ -PIXA * -pixaThinConnected(PIXA *pixas, - l_int32 type, - l_int32 connectivity, - l_int32 maxiters) -{ -l_int32 i, n, d, same; -PIX *pix1, *pix2; -PIXA *pixad; -SELA *sela; - - PROCNAME("pixaThinConnected"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (type != L_THIN_FG && type != L_THIN_BG) - return (PIXA *)ERROR_PTR("invalid fg/bg type", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIXA *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - if (maxiters == 0) maxiters = 10000; - - pixaVerifyDepth(pixas, &same, &d); - if (d != 1) - return (PIXA *)ERROR_PTR("pix are not all 1 bpp", procName, NULL); - - if (connectivity == 4) - sela = selaMakeThinSets(1, 0); - else /* connectivity == 8 */ - sela = selaMakeThinSets(5, 0); - - n = pixaGetCount(pixas); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - pix2 = pixThinConnectedBySet(pix1, type, sela, maxiters); - pixaAddPix(pixad, pix2, L_INSERT); - pixDestroy(&pix1); - } - - selaDestroy(&sela); - return pixad; -} - - -/*! - * \brief pixThinConnected() - * - * \param[in] pixs 1 bpp - * \param[in] type L_THIN_FG, L_THIN_BG - * \param[in] connectivity 4 or 8 - * \param[in] maxiters max number of iters allowed; - * use 0 to iterate until completion - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) See "Connectivity-preserving morphological image transformations,"
- *          Dan S. Bloomberg, in SPIE Visual Communications and Image
- *          Processing, Conference 1606, pp. 320-334, November 1991,
- *          Boston, MA.   A web version is available at
- *              http://www.leptonica.com/papers/conn.pdf
- *      (2) This is a simple interface for two of the best iterative
- *          morphological thinning algorithms, for 4-c.c and 8-c.c.
- *          Each iteration uses a mixture of parallel operations
- *          (using several different 3x3 Sels) and serial operations.
- *          Specifically, each thinning iteration consists of
- *          four sequential thinnings from each of four directions.
- *          Each of these thinnings is a parallel composite
- *          operation, where the union of a set of HMTs are set
- *          subtracted from the input.  For 4-cc thinning, we
- *          use 3 HMTs in parallel, and for 8-cc thinning we use 4 HMTs.
- *      (3) A "good" thinning algorithm is one that generates a skeleton
- *          that is near the medial axis and has neither pruned
- *          real branches nor left extra dendritic branches.
- *      (4) Duality between operations on fg and bg require switching
- *          the connectivity.  To thin the foreground, which is the usual
- *          situation, use type == L_THIN_FG.  Thickening the foreground
- *          is equivalent to thinning the background (type == L_THIN_BG),
- *          where the alternate connectivity gets preserved.
- *          For example, to thicken the fg with 2 rounds of iterations
- *          using 4-c.c., thin the bg using Sels that preserve 8-connectivity:
- *             Pix *pix = pixThinConnected(pixs, L_THIN_BG, 8, 2);
- *      (5) This makes and destroys the sela set each time. It's not a large
- *          overhead, but if you are calling this thousands of times on
- *          very small images, you can avoid the overhead; e.g.
- *             Sela *sela = selaMakeThinSets(1, 0);  // for 4-c.c.
- *             Pix *pix = pixThinConnectedBySet(pixs, L_THIN_FG, sela, 0);
- *          using set 1 for 4-c.c. and set 5 for 8-c.c operations.
- * 
- */ -PIX * -pixThinConnected(PIX *pixs, - l_int32 type, - l_int32 connectivity, - l_int32 maxiters) -{ -PIX *pixd; -SELA *sela; - - PROCNAME("pixThinConnected"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (type != L_THIN_FG && type != L_THIN_BG) - return (PIX *)ERROR_PTR("invalid fg/bg type", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - if (maxiters == 0) maxiters = 10000; - - if (connectivity == 4) - sela = selaMakeThinSets(1, 0); - else /* connectivity == 8 */ - sela = selaMakeThinSets(5, 0); - - pixd = pixThinConnectedBySet(pixs, type, sela, maxiters); - - selaDestroy(&sela); - return pixd; -} - - -/*! - * \brief pixThinConnectedBySet() - * - * \param[in] pixs 1 bpp - * \param[in] type L_THIN_FG, L_THIN_BG - * \param[in] sela of Sels for parallel composite HMTs - * \param[in] maxiters max number of iters allowed; - * use 0 to iterate until completion - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) See notes in pixThinConnected().
- *      (2) This takes a sela representing one of 11 sets of HMT Sels.
- *          The HMTs from this set are run in parallel and the result
- *          is OR'd before being subtracted from the source.  For each
- *          iteration, this "parallel" thin is performed four times
- *          sequentially, for sels rotated by 90 degrees in all four
- *          directions.
- *      (3) The "parallel" and "sequential" nomenclature is standard
- *          in digital filtering.  Here, "parallel" operations work on the
- *          same source (pixd), and accumulate the results in a temp
- *          image before actually applying them to the source (in this
- *          case, using an in-place subtraction).  "Sequential" operations
- *          operate directly on the source (pixd) to produce the result
- *          (in this case, with four sequential thinning operations, one
- *          from each of four directions).
- * 
- */ -PIX * -pixThinConnectedBySet(PIX *pixs, - l_int32 type, - SELA *sela, - l_int32 maxiters) -{ -l_int32 i, j, r, nsels, same; -PIXA *pixahmt; -PIX **pixhmt; /* array owned by pixahmt; do not destroy! */ -PIX *pix1, *pix2, *pixd; -SEL *sel, *selr; - - PROCNAME("pixThinConnectedBySet"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (type != L_THIN_FG && type != L_THIN_BG) - return (PIX *)ERROR_PTR("invalid fg/bg type", procName, NULL); - if (!sela) - return (PIX *)ERROR_PTR("sela not defined", procName, NULL); - if (maxiters == 0) maxiters = 10000; - - /* Set up array of temp pix to hold hmts */ - nsels = selaGetCount(sela); - pixahmt = pixaCreate(nsels); - for (i = 0; i < nsels; i++) { - pix1 = pixCreateTemplate(pixs); - pixaAddPix(pixahmt, pix1, L_INSERT); - } - pixhmt = pixaGetPixArray(pixahmt); - if (!pixhmt) { - pixaDestroy(&pixahmt); - return (PIX *)ERROR_PTR("pixhmt array not made", procName, NULL); - } - - /* Set up initial image for fg thinning */ - if (type == L_THIN_FG) - pixd = pixCopy(NULL, pixs); - else /* bg thinning */ - pixd = pixInvert(NULL, pixs); - - /* Thin the fg, with up to maxiters iterations */ - for (i = 0; i < maxiters; i++) { - pix1 = pixCopy(NULL, pixd); /* test for completion */ - for (r = 0; r < 4; r++) { /* over 90 degree rotations of Sels */ - for (j = 0; j < nsels; j++) { /* over individual sels in sela */ - sel = selaGetSel(sela, j); /* not a copy */ - selr = selRotateOrth(sel, r); - pixHMT(pixhmt[j], pixd, selr); - selDestroy(&selr); - if (j > 0) - pixOr(pixhmt[0], pixhmt[0], pixhmt[j]); /* accum result */ - } - pixSubtract(pixd, pixd, pixhmt[0]); /* remove result */ - } - pixEqual(pixd, pix1, &same); - pixDestroy(&pix1); - if (same) { -/* L_INFO("%d iterations to completion\n", procName, i); */ - break; - } - } - - /* This is a bit tricky. If we're thickening the foreground, then - * we get a fg border of thickness equal to the number of - * iterations. This border is connected to all components that - * were initially touching the border, but as it grows, it does - * not touch other growing components -- it leaves a 1 pixel wide - * background between it and the growing components, and that - * thin background prevents the components from growing further. - * This border can be entirely removed as follows: - * (1) Subtract the original (unthickened) image pixs from the - * thickened image. This removes the pixels that were originally - * touching the border. - * (2) Get all remaining pixels that are connected to the border. - * (3) Remove those pixels from the thickened image. */ - if (type == L_THIN_BG) { - pixInvert(pixd, pixd); /* finish with duality */ - pix1 = pixSubtract(NULL, pixd, pixs); - pix2 = pixExtractBorderConnComps(pix1, 4); - pixSubtract(pixd, pixd, pix2); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - pixaDestroy(&pixahmt); - return pixd; -} - - -/*! - * \brief selaMakeThinSets() - * - * \param[in] index into specific sets - * \param[in] debug 1 to output display of sela - * \return sela, or NULL on error - * - *
- * Notes:
- *      (1) These are specific sets of HMTs to be used in parallel for
- *          for thinning from each of four directions.
- *      (2) The sets are indexed as follows:
- *          For thinning (e.g., run to completion):
- *              index = 1     sel_4_1, sel_4_2, sel_4_3
- *              index = 2     sel_4_1, sel_4_5, sel_4_6
- *              index = 3     sel_4_1, sel_4_7, sel_4_7_rot
- *              index = 4     sel_48_1, sel_48_1_rot, sel_48_2
- *              index = 5     sel_8_2, sel_8_3, sel_8_5, sel_8_6
- *              index = 6     sel_8_2, sel_8_3, sel_48_2
- *              index = 7     sel_8_1, sel_8_5, sel_8_6
- *              index = 8     sel_8_2, sel_8_3, sel_8_8, sel_8_9
- *              index = 9     sel_8_5, sel_8_6, sel_8_7, sel_8_7_rot
- *          For thickening (e.g., just a few iterations):
- *              index = 10    sel_4_2, sel_4_3
- *              index = 11    sel_8_4
- *      (3) For a very smooth skeleton, use set 1 for 4 connected and
- *          set 5 for 8 connected thins.
- * 
- */ -SELA * -selaMakeThinSets(l_int32 index, - l_int32 debug) -{ -SEL *sel; -SELA *sela1, *sela2, *sela3; - - PROCNAME("selaMakeThinSets"); - - if (index < 1 || index > 11) - return (SELA *)ERROR_PTR("invalid index", procName, NULL); - - sela2 = selaCreate(4); - switch(index) - { - case 1: - sela1 = sela4ccThin(NULL); - selaFindSelByName(sela1, "sel_4_1", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_4_2", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_4_3", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - break; - case 2: - sela1 = sela4ccThin(NULL); - selaFindSelByName(sela1, "sel_4_1", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_4_5", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_4_6", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - break; - case 3: - sela1 = sela4ccThin(NULL); - selaFindSelByName(sela1, "sel_4_1", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_4_7", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - sel = selRotateOrth(sel, 1); - selaAddSel(sela2, sel, "sel_4_7_rot", L_INSERT); - break; - case 4: - sela1 = sela4and8ccThin(NULL); - selaFindSelByName(sela1, "sel_48_1", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - sel = selRotateOrth(sel, 1); - selaAddSel(sela2, sel, "sel_48_1_rot", L_INSERT); - selaFindSelByName(sela1, "sel_48_2", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - break; - case 5: - sela1 = sela8ccThin(NULL); - selaFindSelByName(sela1, "sel_8_2", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_8_3", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_8_5", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_8_6", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - break; - case 6: - sela1 = sela8ccThin(NULL); - sela3 = sela4and8ccThin(NULL); - selaFindSelByName(sela1, "sel_8_2", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_8_3", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela3, "sel_48_2", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaDestroy(&sela3); - break; - case 7: - sela1 = sela8ccThin(NULL); - selaFindSelByName(sela1, "sel_8_1", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_8_5", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_8_6", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - break; - case 8: - sela1 = sela8ccThin(NULL); - selaFindSelByName(sela1, "sel_8_2", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_8_3", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_8_8", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_8_9", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - break; - case 9: - sela1 = sela8ccThin(NULL); - selaFindSelByName(sela1, "sel_8_5", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_8_6", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_8_7", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - sel = selRotateOrth(sel, 1); - selaAddSel(sela2, sel, "sel_8_7_rot", L_INSERT); - break; - case 10: /* thicken for this one; use just a few iterations */ - sela1 = sela4ccThin(NULL); - selaFindSelByName(sela1, "sel_4_2", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - selaFindSelByName(sela1, "sel_4_3", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - break; - case 11: /* thicken for this one; use just a few iterations */ - sela1 = sela8ccThin(NULL); - selaFindSelByName(sela1, "sel_8_4", NULL, &sel); - selaAddSel(sela2, sel, NULL, L_COPY); - break; - } - - /* Optionally display the sel set */ - if (debug) { - PIX *pix1; - char buf[32]; - lept_mkdir("/lept/sels"); - pix1 = selaDisplayInPix(sela2, 35, 3, 15, 4); - snprintf(buf, sizeof(buf), "/tmp/lept/sels/set%d.png", index); - pixWrite(buf, pix1, IFF_PNG); - pixDisplay(pix1, 100, 100); - pixDestroy(&pix1); - } - - selaDestroy(&sela1); - return sela2; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/checkerboard.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/checkerboard.c deleted file mode 100644 index fac314bb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/checkerboard.c +++ /dev/null @@ -1,313 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/* - * \file checkerboard.c - *
- *
- *    Find the checker corners where 4 squares come together
- *      PIX            *pixFindCheckerboardCorners()
- *
- *    Generate the hit-miss sels
- *      SELA           *makeCheckerboardCornerSela()
- *      static PIXA    *makeCheckerboardCornerPixa()
- *
- * The functions in this file locate the corners where four squares
- * in a checkerboard come together.  With a perfectly aligned checkerboard,
- * the solution is trivial: take the union of two hit-miss transforms (HMTs),
- * each having a simple diagonal structuring element (sel).  The two
- * sels can be generated from strings such as these, using
- * selCreateFromString():
- *
- *  static const char *str1 = "o     x"
- *                            "       "
- *                            "       "
- *                            "   C   "
- *                            "       "
- *                            "       "
- *                            "x     o";
- *  static const char *str2 = "x     o"
- *                            "       "
- *                            "       "
- *                            "   C   "
- *                            "       "
- *                            "       "
- *                            "o     x";
- *
- * A more interesting problem is to consider the checkerboard viewed from
- * some arbitrary angle and orientation from the normal.  The method
- * developed here works for a camera located within a cone with an opening
- * half-angle of about 45 degrees, and with its axis along the normal
- * to the checkerboard.
- *
- * See prog/checkerboard_reg.c for usage.
- *
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -static PIXA *makeCheckerboardCornerPixa(l_int32 size, l_int32 dilation, - l_int32 nsels); - -static const char selnames[64] = "s_diag1 s_diag2 s_cross1 s_cross2"; - -/*! - * \brief pixFindCheckerboardCorner() - * - * \param[in] pixs of checkerboard - * \param[in] size size of HMT sel; >= 7, typ. 15; 0 for default - * \param[in] dilation size of hit and miss squares; typ. 1 or 3; max 5 - * \param[in] nsels number to use (either 2 or 4) - * \param[out] ppix_corners [optional] 1 bpp pix giving corner locations - * \param[out] ppta_corners [optional] pta giving corner locations - * \param[in] pixadb [optional] pass in pre-allocated - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Use %nsels = 4 if the checkerboard may be rotated by more
- *          than 20 deg.
- *      (2) The values of %size and %dilation that can be used depend on
- *          the square sizes.  Nominal values here are for squares of
- *          size 30 to 50.  In general, because of the viewing angle
- *          of the camera, the "squares" will appear approximately
- *          as a rotated rectangle.
- *      (3) The outputs pix_corners and pta_corners are optional.
- * 
- */ -l_ok -pixFindCheckerboardCorners(PIX *pixs, - l_int32 size, - l_int32 dilation, - l_int32 nsels, - PIX **ppix_corners, - PTA **ppta_corners, - PIXA *pixadb) -{ -BOXA *boxa1; -PIX *pix1, *pix2, *pix3; -PTA *pta1; -SEL *sel; -SELA *sela; - - PROCNAME("pixFindCheckerboardCorners"); - - if (ppix_corners) *ppix_corners = NULL; - if (ppta_corners) *ppta_corners = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (size <= 0) size = 7; - if (size < 7) - return ERROR_INT("size too small", procName, 1); - if (dilation < 1 || dilation > 5) - return ERROR_INT("dilation not in [1 ...5]", procName, 1); - if (nsels != 2 && nsels != 4) - return ERROR_INT("nsels not 2 or 4", procName, 1); - - /* Generate the hit-miss sels for finding corners */ - sela = makeCheckerboardCornerSela(size, dilation, nsels, pixadb); - if (!sela) - return ERROR_INT("sela not made", procName, 1); - if (pixadb) { - pix1 = selaDisplayInPix(sela, 15, 3, 15, 2); - pixaAddPix(pixadb, pix1, L_INSERT); - } - - /* Do the hit-miss transform to find corner locations */ - pix1 = pixUnionOfMorphOps(pixs, sela, L_MORPH_HMT); - if (pixadb) pixaAddPix(pixadb, pix1, L_CLONE); - selaDestroy(&sela); - - /* Remove large noise c.c. */ - pix2 = pixSelectBySize(pix1, size, size, 8, L_SELECT_IF_BOTH, - L_SELECT_IF_LTE, NULL); - if (pixadb) pixaAddPix(pixadb, pix2, L_CLONE); - - /* Thin remaining c.c. */ - pix3 = pixThinConnected(pix2, L_THIN_FG, 8, 0); - if (pixadb) pixaAddPix(pixadb, pix3, L_CLONE); - - /* Extract the location of the center of each component */ - boxa1 = pixConnCompBB(pix3, 8); - pta1 = boxaExtractCorners(boxa1, L_BOX_CENTER); - boxaDestroy(&boxa1); - pixDestroy(&pix1); - pixDestroy(&pix2); - if (pixadb) { /* show the result as colored plus signs on the input */ - sel = selMakePlusSign(15, 2); - pix1 = pixDisplaySelectedPixels(pixs, pix3, sel, 0xff000000); - pixaAddPix(pixadb, pix1, L_INSERT); - selDestroy(&sel); - } - - if (ppix_corners) - *ppix_corners = pix3; - else - pixDestroy(&pix3); - if (ppta_corners) - *ppta_corners = pta1; - else - ptaDestroy(&pta1); - return 0; -} - - -/*! - * \brief makeCheckerboardCornerSela() - * - * \param[in] size size of HMT sel; >= 7, typ. 15; 0 for default - * \param[in] dilation size of hit and miss squares; typ. 1 or 3; max 5 - * \param[in] nsels number to use (either 2 or 4) - * \param[in] pixadb [optional] pass in pre-allocated - * \return sela hit-miss sels for finding corners, or NULL on error - * - *
- * Notes:
- *      (1) Use 4 sels if the checkerboard may be rotated by more than 20 deg.
- * 
- */ -SELA * -makeCheckerboardCornerSela(l_int32 size, - l_int32 dilation, - l_int32 nsels, - PIXA *pixadb) -{ -PIX *pix1; -PIXA *pixa1; -SARRAY *sa; -SELA *sela; - - PROCNAME("makeCheckerboardCornerSela"); - - if (size <= 0) size = 7; - if (size < 7) - return (SELA *)ERROR_PTR("size too small", procName, NULL); - if (dilation < 1 || dilation > 5) - return (SELA *)ERROR_PTR("dilation not in [1 ...5]", procName, NULL); - if (nsels != 2 && nsels != 4) - return (SELA *)ERROR_PTR("nsels not 2 or 4", procName, NULL); - - if ((pixa1 = makeCheckerboardCornerPixa(size, dilation, nsels)) == NULL) - return (SELA *)ERROR_PTR("pixa for sels not made", procName, NULL); - if (pixadb) { - pix1 = pixaDisplayTiledInColumns(pixa1, 4, 8.0, 15, 2); - pixaAddPix(pixadb, pix1, L_INSERT); - } - sa = sarrayCreateWordsFromString(selnames); - sela = selaCreateFromColorPixa(pixa1, sa); - pixaDestroy(&pixa1); - sarrayDestroy(&sa); - if (!sela) - return (SELA *)ERROR_PTR("sela not made", procName, NULL); - return sela; -} - - -/*! - * \brief makeCheckerboardCornerPixa() - * - * \param[in] size size of HMT sel; >= 7, typ. 15; 0 for default - * \param[in] dilation size of hit and miss squares; typ. 1 or 3; max 5 - * \param[in] nsels number to use (either 2 or 4) - * \return pixa representing hit-miss sels for finding corners, or NULL on error - * - *
- * Notes:
- *      (1) Each pix can be used to generate a hit-miss sel, using the
- *          function selCreateFromColorPix().  See that function for the
- *          use of color and gray pixels to encode the hits, misses and
- *          center in the structuring element.
- * 
- */ -static PIXA * -makeCheckerboardCornerPixa(l_int32 size, - l_int32 dilation, - l_int32 nsels) -{ -PIX *pix1, *pix2, *pix3; -PIXA *pixa1; - - pixa1 = pixaCreate(4); - - /* Represent diagonal neg slope hits and pos slope misses */ - pix1 = pixCreate(size, size, 32); - pixSetAll(pix1); - pix2 = pixCreate(size, size, 1); /* slope -1 line (2 pixel) mask */ - pixSetPixel(pix2, 1, 1, 1); /* UL corner */ - pixSetPixel(pix2, size - 2, size - 2, 1); /* LR corner */ - if (dilation > 1) - pixDilateBrick(pix2, pix2, dilation, dilation); /* dilate each pixel */ - pixSetMasked(pix1, pix2, 0x00ff0000); /* green hit */ - pix3 = pixRotate90(pix2, 1); /* slope +1 line (2 pixel) mask */ - pixSetMasked(pix1, pix3, 0xff000000); /* red miss */ - pixSetRGBPixel(pix1, size / 2, size / 2, 128, 128, 128); /* gray center */ - pixaAddPix(pixa1, pix1, L_INSERT); - - /* Represent diagonal pos slope hits and neg slope misses */ - pix1 = pixCreate(size, size, 32); - pixSetAll(pix1); - pixSetMasked(pix1, pix2, 0xff000000); /* red hit */ - pixSetMasked(pix1, pix3, 0x00ff0000); /* green miss */ - pixSetRGBPixel(pix1, size / 2, size / 2, 128, 128, 128); /* gray center */ - pixaAddPix(pixa1, pix1, L_INSERT); - pixDestroy(&pix2); - pixDestroy(&pix3); - - if (nsels == 2) - return pixa1; - - /* Represent cross: vertical hits and horizontal misses */ - pix1 = pixCreate(size, size, 32); - pixSetAll(pix1); - pix2 = pixCreate(size, size, 1); /* vertical line (2 pixel) mask */ - pixSetPixel(pix2, size / 2, 1, 1); - pixSetPixel(pix2, size / 2, size - 2, 1); - if (dilation > 1) - pixDilateBrick(pix2, pix2, dilation, dilation); /* dilate each pixel */ - pixSetMasked(pix1, pix2, 0x00ff0000); /* green hit */ - pix3 = pixRotate90(pix2, 1); /* horizontal line (2 pixel) mask */ - pixSetMasked(pix1, pix3, 0xff000000); /* red miss */ - pixSetRGBPixel(pix1, size / 2, size / 2, 128, 128, 128); /* gray center */ - pixaAddPix(pixa1, pix1, L_INSERT); - - /* Represent cross: horizontal hits and vertical misses */ - pix1 = pixCreate(size, size, 32); - pixSetAll(pix1); - pixSetMasked(pix1, pix3, 0x00ff0000); /* green hit */ - pixSetMasked(pix1, pix2, 0xff000000); /* red miss */ - pixSetRGBPixel(pix1, size / 2, size / 2, 128, 128, 128); /* gray center */ - pixaAddPix(pixa1, pix1, L_INSERT); - pixDestroy(&pix2); - pixDestroy(&pix3); - - return pixa1; -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/classapp.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/classapp.c deleted file mode 100644 index c383c547..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/classapp.c +++ /dev/null @@ -1,1050 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file classapp.c - *
- *
- *      Top-level jb2 correlation and rank-hausdorff
- *         l_int32         jbCorrelation()
- *         l_int32         jbRankHaus()
- *
- *      Extract and classify words in textline order
- *         JBCLASSER      *jbWordsInTextlines()
- *         l_int32         pixGetWordsInTextlines()
- *         l_int32         pixGetWordBoxesInTextlines()
- *
- *      Extract word and character bounding boxes
- *         l_int32         pixFindWordAndCharacterBoxes()
- *
- *      Use word bounding boxes to compare page images
- *         NUMAA          *boxaExtractSortedPattern()
- *         l_int32         numaaCompareImagesByBoxes()
- *         static l_int32  testLineAlignmentX()
- *         static l_int32  countAlignedMatches()
- *         static void     printRowIndices()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static const l_int32 L_BUF_SIZE = 512; /*!< size of filename buffer */ -static const l_int32 JB_WORDS_MIN_WIDTH = 5; /*!< min. word width in pixels */ -static const l_int32 JB_WORDS_MIN_HEIGHT = 3; /*!< min. word height in pixels */ - - /* Static comparison functions */ -static l_int32 testLineAlignmentX(NUMA *na1, NUMA *na2, l_int32 shiftx, - l_int32 delx, l_int32 nperline); -static l_int32 countAlignedMatches(NUMA *nai1, NUMA *nai2, NUMA *nasx, - NUMA *nasy, l_int32 n1, l_int32 n2, - l_int32 delx, l_int32 dely, - l_int32 nreq, l_int32 *psame, - l_int32 debugflag); -static void printRowIndices(l_int32 *index1, l_int32 n1, - l_int32 *index2, l_int32 n2); - -/*------------------------------------------------------------------* - * Top-level jb2 correlation and rank-hausdorff * - *------------------------------------------------------------------*/ -/*! - * \brief jbCorrelation() - * - * \param[in] dirin directory of input images - * \param[in] thresh typically ~0.8 - * \param[in] weight typically ~0.6 - * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS - * \param[in] rootname for output files - * \param[in] firstpage 0-based - * \param[in] npages use 0 for all pages in dirin - * \param[in] renderflag 1 to render from templates; 0 to skip - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The images must be 1 bpp.  If they are not, you can convert
- *          them using convertFilesTo1bpp().
- *      (2) See prog/jbcorrelation for generating more output (e.g.,
- *          for debugging)
- * 
- */ -l_ok -jbCorrelation(const char *dirin, - l_float32 thresh, - l_float32 weight, - l_int32 components, - const char *rootname, - l_int32 firstpage, - l_int32 npages, - l_int32 renderflag) -{ -char filename[L_BUF_SIZE]; -l_int32 nfiles, i, numpages; -JBDATA *data; -JBCLASSER *classer; -PIX *pix; -PIXA *pixa; -SARRAY *safiles; - - PROCNAME("jbCorrelation"); - - if (!dirin) - return ERROR_INT("dirin not defined", procName, 1); - if (!rootname) - return ERROR_INT("rootname not defined", procName, 1); - if (components != JB_CONN_COMPS && components != JB_CHARACTERS && - components != JB_WORDS) - return ERROR_INT("components invalid", procName, 1); - - safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); - nfiles = sarrayGetCount(safiles); - - /* Classify components */ - classer = jbCorrelationInit(components, 0, 0, thresh, weight); - jbAddPages(classer, safiles); - - /* Save data */ - data = jbDataSave(classer); - jbDataWrite(rootname, data); - - /* Optionally, render pages using class templates */ - if (renderflag) { - pixa = jbDataRender(data, FALSE); - numpages = pixaGetCount(pixa); - if (numpages != nfiles) - lept_stderr("numpages = %d, nfiles = %d, not equal!\n", - numpages, nfiles); - for (i = 0; i < numpages; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - snprintf(filename, L_BUF_SIZE, "%s.%04d", rootname, i); - lept_stderr("filename: %s\n", filename); - pixWrite(filename, pix, IFF_PNG); - pixDestroy(&pix); - } - pixaDestroy(&pixa); - } - - sarrayDestroy(&safiles); - jbClasserDestroy(&classer); - jbDataDestroy(&data); - return 0; -} - - -/*! - * \brief jbRankHaus() - * - * \param[in] dirin directory of input images - * \param[in] size of Sel used for dilation; typ. 2 - * \param[in] rank rank value of match; typ. 0.97 - * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS - * \param[in] rootname for output files - * \param[in] firstpage 0-based - * \param[in] npages use 0 for all pages in dirin - * \param[in] renderflag 1 to render from templates; 0 to skip - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See prog/jbrankhaus for generating more output (e.g.,
- *          for debugging)
- * 
- */ -l_ok -jbRankHaus(const char *dirin, - l_int32 size, - l_float32 rank, - l_int32 components, - const char *rootname, - l_int32 firstpage, - l_int32 npages, - l_int32 renderflag) -{ -char filename[L_BUF_SIZE]; -l_int32 nfiles, i, numpages; -JBDATA *data; -JBCLASSER *classer; -PIX *pix; -PIXA *pixa; -SARRAY *safiles; - - PROCNAME("jbRankHaus"); - - if (!dirin) - return ERROR_INT("dirin not defined", procName, 1); - if (!rootname) - return ERROR_INT("rootname not defined", procName, 1); - if (components != JB_CONN_COMPS && components != JB_CHARACTERS && - components != JB_WORDS) - return ERROR_INT("components invalid", procName, 1); - - safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); - nfiles = sarrayGetCount(safiles); - - /* Classify components */ - classer = jbRankHausInit(components, 0, 0, size, rank); - jbAddPages(classer, safiles); - - /* Save data */ - data = jbDataSave(classer); - jbDataWrite(rootname, data); - - /* Optionally, render pages using class templates */ - if (renderflag) { - pixa = jbDataRender(data, FALSE); - numpages = pixaGetCount(pixa); - if (numpages != nfiles) - lept_stderr("numpages = %d, nfiles = %d, not equal!\n", - numpages, nfiles); - for (i = 0; i < numpages; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - snprintf(filename, L_BUF_SIZE, "%s.%04d", rootname, i); - lept_stderr("filename: %s\n", filename); - pixWrite(filename, pix, IFF_PNG); - pixDestroy(&pix); - } - pixaDestroy(&pixa); - } - - sarrayDestroy(&safiles); - jbClasserDestroy(&classer); - jbDataDestroy(&data); - return 0; -} - - - -/*------------------------------------------------------------------* - * Extract and classify words in textline order * - *------------------------------------------------------------------*/ -/*! - * \brief jbWordsInTextlines() - * - * \param[in] dirin directory of input pages - * \param[in] reduction 1 for full res; 2 for half-res - * \param[in] maxwidth of word mask components, to be kept - * \param[in] maxheight of word mask components, to be kept - * \param[in] thresh on correlation; 0.80 is reasonable - * \param[in] weight for handling thick text; 0.6 is reasonable - * \param[out] pnatl numa with textline index for each component - * \param[in] firstpage 0-based - * \param[in] npages use 0 for all pages in dirin - * \return classer for the set of pages - * - *
- * Notes:
- *      (1) This is a high-level function.  See prog/jbwords for example
- *          of usage.
- *      (2) Typically, use input of 75 - 150 ppi for finding words.
- * 
- */ -JBCLASSER * -jbWordsInTextlines(const char *dirin, - l_int32 reduction, - l_int32 maxwidth, - l_int32 maxheight, - l_float32 thresh, - l_float32 weight, - NUMA **pnatl, - l_int32 firstpage, - l_int32 npages) -{ -char *fname; -l_int32 nfiles, i, w, h; -BOXA *boxa; -JBCLASSER *classer; -NUMA *nai, *natl; -PIX *pix1, *pix2; -PIXA *pixa; -SARRAY *safiles; - - PROCNAME("jbWordsInTextlines"); - - if (!pnatl) - return (JBCLASSER *)ERROR_PTR("&natl not defined", procName, NULL); - *pnatl = NULL; - if (!dirin) - return (JBCLASSER *)ERROR_PTR("dirin not defined", procName, NULL); - if (reduction != 1 && reduction != 2) - return (JBCLASSER *)ERROR_PTR("reduction not in {1,2}", procName, NULL); - - safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); - nfiles = sarrayGetCount(safiles); - - /* Classify components */ - classer = jbCorrelationInit(JB_WORDS, maxwidth, maxheight, thresh, weight); - classer->safiles = sarrayCopy(safiles); - natl = numaCreate(0); - *pnatl = natl; - for (i = 0; i < nfiles; i++) { - fname = sarrayGetString(safiles, i, L_NOCOPY); - if ((pix1 = pixRead(fname)) == NULL) { - L_WARNING("image file %d not read\n", procName, i); - continue; - } - if (reduction == 1) - pix2 = pixClone(pix1); - else /* reduction == 2 */ - pix2 = pixReduceRankBinaryCascade(pix1, 1, 0, 0, 0); - pixGetWordsInTextlines(pix2, JB_WORDS_MIN_WIDTH, - JB_WORDS_MIN_HEIGHT, maxwidth, maxheight, - &boxa, &pixa, &nai); - pixGetDimensions(pix2, &w, &h, NULL); - classer->w = w; - classer->h = h; - jbAddPageComponents(classer, pix2, boxa, pixa); - numaJoin(natl, nai, 0, -1); - pixDestroy(&pix1); - pixDestroy(&pix2); - numaDestroy(&nai); - boxaDestroy(&boxa); - pixaDestroy(&pixa); - } - - sarrayDestroy(&safiles); - return classer; -} - - -/*! - * \brief pixGetWordsInTextlines() - * - * \param[in] pixs 1 bpp, typ. 75 - 150 ppi - * \param[in] minwidth of saved components; smaller are discarded - * \param[in] minheight of saved components; smaller are discarded - * \param[in] maxwidth of saved components; larger are discarded - * \param[in] maxheight of saved components; larger are discarded - * \param[out] pboxad word boxes sorted in textline line order - * \param[out] ppixad word images sorted in textline line order - * \param[out] pnai index of textline for each word - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The input should be at a resolution of between 75 and 150 ppi.
- *      (2) The four size constraints on saved components are all
- *          scaled by %reduction.
- *      (3) The result are word images (and their b.b.), extracted in
- *          textline order, at either full res or 2x reduction,
- *          and with a numa giving the textline index for each word.
- *      (4) The pixa and boxa interfaces should make this type of
- *          application simple to put together.  The steps are:
- *           ~ generate first estimate of word masks
- *           ~ get b.b. of these, and remove the small and big ones
- *           ~ extract pixa of the word images, using the b.b.
- *           ~ sort actual word images in textline order (2d)
- *           ~ flatten them to a pixa (1d), saving the textline index
- *             for each pix
- *      (5) In an actual application, it may be desirable to pre-filter
- *          the input image to remove large components, to extract
- *          single columns of text, and to deskew them.  For example,
- *          to remove both large components and small noisy components
- *          that can interfere with the statistics used to estimate
- *          parameters for segmenting by words, but still retain text lines,
- *          the following image preprocessing can be done:
- *                Pix *pixt = pixMorphSequence(pixs, "c40.1", 0);
- *                Pix *pixf = pixSelectBySize(pixt, 0, 60, 8,
- *                                     L_SELECT_HEIGHT, L_SELECT_IF_LT, NULL);
- *                pixAnd(pixf, pixf, pixs);  // the filtered image
- *          The closing turns text lines into long blobs, but does not
- *          significantly increase their height.  But if there are many
- *          small connected components in a dense texture, this is likely
- *          to generate tall components that will be eliminated in pixf.
- * 
- */ -l_ok -pixGetWordsInTextlines(PIX *pixs, - l_int32 minwidth, - l_int32 minheight, - l_int32 maxwidth, - l_int32 maxheight, - BOXA **pboxad, - PIXA **ppixad, - NUMA **pnai) -{ -BOXA *boxa1, *boxad; -BOXAA *baa; -NUMA *nai; -NUMAA *naa; -PIXA *pixa1, *pixad; -PIXAA *paa; - - PROCNAME("pixGetWordsInTextlines"); - - if (!pboxad || !ppixad || !pnai) - return ERROR_INT("&boxad, &pixad, &nai not all defined", procName, 1); - *pboxad = NULL; - *ppixad = NULL; - *pnai = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - /* Get the bounding boxes of the words from the word mask. */ - pixWordBoxesByDilation(pixs, minwidth, minheight, maxwidth, maxheight, - &boxa1, NULL, NULL); - - /* Generate a pixa of the word images */ - pixa1 = pixaCreateFromBoxa(pixs, boxa1, 0, 0, NULL); - - /* Sort the bounding boxes of these words by line. We use the - * index mapping to allow identical sorting of the pixa. */ - baa = boxaSort2d(boxa1, &naa, -1, -1, 4); - paa = pixaSort2dByIndex(pixa1, naa, L_CLONE); - - /* Flatten the word paa */ - pixad = pixaaFlattenToPixa(paa, &nai, L_CLONE); - boxad = pixaGetBoxa(pixad, L_COPY); - - *pnai = nai; - *pboxad = boxad; - *ppixad = pixad; - - pixaDestroy(&pixa1); - boxaDestroy(&boxa1); - boxaaDestroy(&baa); - pixaaDestroy(&paa); - numaaDestroy(&naa); - return 0; -} - - -/*! - * \brief pixGetWordBoxesInTextlines() - * - * \param[in] pixs 1 bpp, typ. 75 - 150 ppi - * \param[in] minwidth of saved components; smaller are discarded - * \param[in] minheight of saved components; smaller are discarded - * \param[in] maxwidth of saved components; larger are discarded - * \param[in] maxheight of saved components; larger are discarded - * \param[out] pboxad word boxes sorted in textline line order - * \param[out] pnai [optional] index of textline for each word - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The input should be at a resolution of between 75 and 150 ppi.
- *      (2) This is a special version of pixGetWordsInTextlines(), that
- *          just finds the word boxes in line order, with a numa
- *          giving the textline index for each word.
- *          See pixGetWordsInTextlines() for more details.
- * 
- */ -l_ok -pixGetWordBoxesInTextlines(PIX *pixs, - l_int32 minwidth, - l_int32 minheight, - l_int32 maxwidth, - l_int32 maxheight, - BOXA **pboxad, - NUMA **pnai) -{ -BOXA *boxa1; -BOXAA *baa; -NUMA *nai; - - PROCNAME("pixGetWordBoxesInTextlines"); - - if (pnai) *pnai = NULL; - if (!pboxad) - return ERROR_INT("&boxad and &nai not both defined", procName, 1); - *pboxad = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - /* Get the bounding boxes of the words from the word mask. */ - pixWordBoxesByDilation(pixs, minwidth, minheight, maxwidth, maxheight, - &boxa1, NULL, NULL); - - /* 2D sort the bounding boxes of these words. */ - baa = boxaSort2d(boxa1, NULL, 3, -5, 5); - - /* Flatten the boxaa, saving the boxa index for each box */ - *pboxad = boxaaFlattenToBoxa(baa, &nai, L_CLONE); - - if (pnai) - *pnai = nai; - else - numaDestroy(&nai); - boxaDestroy(&boxa1); - boxaaDestroy(&baa); - return 0; -} - - -/*------------------------------------------------------------------* - * Extract word and character bounding boxes * - *------------------------------------------------------------------*/ -/*! - * \brief pixFindWordAndCharacterBoxes() - * - * \param[in] pixs 2, 4, 8 or 32 bpp; colormap OK; typ. 300 ppi - * \param[in] boxs [optional] region to select in pixs - * \param[in] thresh binarization threshold (typ. 100 - 150) - * \param[out] pboxaw return the word boxes - * \param[out] pboxaac return the character boxes - * \param[in] debugdir [optional] for debug images; use NULL to skip - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %boxs == NULL, the entire input image is used.
- *      (2) Having an input pix that is not 1bpp is necessary to reduce
- *          touching characters by using a low binarization threshold.
- *          Suggested thresholds are between 100 and 150.
- *      (3) The coordinates in the output boxes are global, with respect
- *          to the input image.
- * 
- */ -l_ok -pixFindWordAndCharacterBoxes(PIX *pixs, - BOX *boxs, - l_int32 thresh, - BOXA **pboxaw, - BOXAA **pboxaac, - const char *debugdir) -{ -char *debugfile, *subdir; -l_int32 i, xs, ys, xb, yb, nb, loc; -l_float32 scalefact; -BOX *box1, *box2; -BOXA *boxa1, *boxa1a, *boxa2, *boxa3, *boxa4, *boxa5, *boxaw; -BOXAA *boxaac; -PIX *pix1, *pix2, *pix3, *pix3a, *pix4, *pix5; - - PROCNAME("pixFindWordAndCharacterBoxes"); - - if (pboxaw) *pboxaw = NULL; - if (pboxaac) *pboxaac = NULL; - if (!pboxaw || !pboxaac) - return ERROR_INT("&boxaw and &boxaac not defined", procName, 1); - if (!pixs || pixGetDepth(pixs) == 1) - return ERROR_INT("pixs not defined or 1 bpp", procName, 1); - if (thresh > 150) - L_WARNING("threshold is %d; may be too high\n", procName, thresh); - - if (boxs) { - if ((pix1 = pixClipRectangle(pixs, boxs, NULL)) == NULL) - return ERROR_INT("pix1 not made", procName, 1); - boxGetGeometry(boxs, &xs, &ys, NULL, NULL); - } else { - pix1 = pixClone(pixs); - xs = ys = 0; - } - - /* Convert pix1 to 8 bpp gray if necessary */ - pix2 = pixConvertTo8(pix1, FALSE); - - /* To find the words and letters, work with 1 bpp images and use - * a low threshold to reduce the number of touching characters. */ - pix3 = pixConvertTo1(pix2, thresh); - - /* Work at about 120 ppi to find the word bounding boxes. */ - pix3a = pixScaleToResolution(pix3, 120.0, 300.0, &scalefact); - - /* First find the words, removing the very small things like - * dots over the 'i' that weren't included in word boxes. */ - pixGetWordBoxesInTextlines(pix3a, 1, 4, 150, 40, &boxa1a, NULL); - boxa1 = boxaTransform(boxa1a, 0, 0, 1.0 / scalefact, 1.0 / scalefact); - if (debugdir) { - loc = 0; - subdir = stringReplaceSubstr(debugdir, "/tmp/", "", &loc, NULL); - lept_mkdir(subdir); - LEPT_FREE(subdir); - pix4 = pixConvertTo32(pix2); - pixRenderBoxaArb(pix4, boxa1, 2, 255, 0, 0); - debugfile = stringJoin(debugdir, "/words.png"); - pixWrite(debugfile, pix4, IFF_PNG); - pixDestroy(&pix4); - LEPT_FREE(debugfile); - } - - /* Now find the letters at 300 ppi */ - nb = boxaGetCount(boxa1); - boxaw = boxaCreate(nb); - boxaac = boxaaCreate(nb); - *pboxaw = boxaw; - *pboxaac = boxaac; - for (i = 0; i < nb; i++) { - box1 = boxaGetBox(boxa1, i, L_COPY); - boxGetGeometry(box1, &xb, &yb, NULL, NULL); - pix4 = pixClipRectangle(pix3, box1, NULL); - /* Join detached parts of characters vertically */ - pix5 = pixMorphSequence(pix4, "c1.10", 0); - /* The connected components should mostly be characters */ - boxa2 = pixConnCompBB(pix5, 4); - /* Remove very small pieces */ - boxa3 = boxaSelectBySize(boxa2, 2, 5, L_SELECT_IF_BOTH, - L_SELECT_IF_GTE, NULL); - /* Order left to right */ - boxa4 = boxaSort(boxa3, L_SORT_BY_X, L_SORT_INCREASING, NULL); - /* Express locations with reference to the full input image */ - boxa5 = boxaTransform(boxa4, xs + xb, ys + yb, 1.0, 1.0); - box2 = boxTransform(box1, xs, ys, 1.0, 1.0); - - /* Ignore any boxa with no boxes after size filtering */ - if (boxaGetCount(boxa5) > 0) { - boxaAddBox(boxaw, box2, L_INSERT); - boxaaAddBoxa(boxaac, boxa5, L_INSERT); - } else { - boxDestroy(&box2); - boxaDestroy(&boxa5); - } - boxDestroy(&box1); - pixDestroy(&pix4); - pixDestroy(&pix5); - boxaDestroy(&boxa2); - boxaDestroy(&boxa3); - boxaDestroy(&boxa4); - } - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - pixDestroy(&pix3a); - boxaDestroy(&boxa1); - boxaDestroy(&boxa1a); - if (debugdir) { - pix4 = pixConvertTo32(pixs); - boxa2 = boxaaFlattenToBoxa(boxaac, NULL, L_COPY); - pixRenderBoxaArb(pix4, boxa2, 2, 255, 0, 0); - boxa3 = boxaAdjustSides(boxaw, -2, 2, -2, 2); - pixRenderBoxaArb(pix4, boxa3, 2, 0, 255, 0); - debugfile = stringJoin(debugdir, "/chars.png"); - pixWrite(debugfile, pix4, IFF_PNG); - pixDestroy(&pix4); - boxaDestroy(&boxa2); - boxaDestroy(&boxa3); - LEPT_FREE(debugfile); - } - return 0; -} - - -/*------------------------------------------------------------------* - * Use word bounding boxes to compare page images * - *------------------------------------------------------------------*/ -/*! - * \brief boxaExtractSortedPattern() - * - * \param[in] boxa typ. of word bounding boxes, in textline order - * \param[in] na index of textline for each box in boxa - * \return naa NUMAA, where each numa represents one textline, - * or NULL on error - * - *
- * Notes:
- *      (1) The input is expected to come from pixGetWordBoxesInTextlines().
- *      (2) Each numa in the output consists of an average y coordinate
- *          of the first box in the textline, followed by pairs of
- *          x coordinates representing the left and right edges of each
- *          of the boxes in the textline.
- * 
- */ -NUMAA * -boxaExtractSortedPattern(BOXA *boxa, - NUMA *na) -{ -l_int32 index, nbox, row, prevrow, x, y, w, h; -BOX *box; -NUMA *nad; -NUMAA *naa; - - PROCNAME("boxaExtractSortedPattern"); - - if (!boxa) - return (NUMAA *)ERROR_PTR("boxa not defined", procName, NULL); - if (!na) - return (NUMAA *)ERROR_PTR("na not defined", procName, NULL); - - naa = numaaCreate(0); - nbox = boxaGetCount(boxa); - if (nbox == 0) - return naa; - - prevrow = -1; - for (index = 0; index < nbox; index++) { - box = boxaGetBox(boxa, index, L_CLONE); - numaGetIValue(na, index, &row); - if (row > prevrow) { - if (index > 0) - numaaAddNuma(naa, nad, L_INSERT); - nad = numaCreate(0); - prevrow = row; - boxGetGeometry(box, NULL, &y, NULL, &h); - numaAddNumber(nad, y + h / 2); - } - boxGetGeometry(box, &x, NULL, &w, NULL); - numaAddNumber(nad, x); - numaAddNumber(nad, x + w - 1); - boxDestroy(&box); - } - numaaAddNuma(naa, nad, L_INSERT); - - return naa; -} - - -/*! - * \brief numaaCompareImagesByBoxes() - * - * \param[in] naa1 for image 1, formatted by boxaExtractSortedPattern() - * \param[in] naa2 for image 2, formatted by boxaExtractSortedPattern() - * \param[in] nperline number of box regions to be used in each textline - * \param[in] nreq number of complete row matches required - * \param[in] maxshiftx max allowed x shift between two patterns, in pixels - * \param[in] maxshifty max allowed y shift between two patterns, in pixels - * \param[in] delx max allowed difference in x data, after alignment - * \param[in] dely max allowed difference in y data, after alignment - * \param[out] psame 1 if %nreq row matches are found; 0 otherwise - * \param[in] debugflag 1 for debug output - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Each input numaa describes a set of sorted bounding boxes
- *          (sorted by textline and, within each textline, from
- *          left to right) in the images from which they are derived.
- *          See boxaExtractSortedPattern() for a description of the data
- *          format in each of the input numaa.
- *      (2) This function does an alignment between the input
- *          descriptions of bounding boxes for two images. The
- *          input parameter %nperline specifies the number of boxes
- *          to consider in each line when testing for a match, and
- *          %nreq is the required number of lines that must be well-aligned
- *          to get a match.
- *      (3) Testing by alignment has 3 steps:
- *          (a) Generating the location of word bounding boxes from the
- *              images (prior to calling this function).
- *          (b) Listing all possible pairs of aligned rows, based on
- *              tolerances in horizontal and vertical positions of
- *              the boxes.  Specifically, all pairs of rows are enumerated
- *              whose first %nperline boxes can be brought into close
- *              alignment, based on the delx parameter for boxes in the
- *              line and within the overall the %maxshiftx and %maxshifty
- *              constraints.
- *          (c) Each pair, starting with the first, is used to search
- *              for a set of %nreq - 1 other pairs that can all be aligned
- *              with a difference in global translation of not more
- *              than (%delx, %dely).
- * 
- */ -l_ok -numaaCompareImagesByBoxes(NUMAA *naa1, - NUMAA *naa2, - l_int32 nperline, - l_int32 nreq, - l_int32 maxshiftx, - l_int32 maxshifty, - l_int32 delx, - l_int32 dely, - l_int32 *psame, - l_int32 debugflag) -{ -l_int32 n1, n2, i, j, nbox, y1, y2, xl1, xl2; -l_int32 shiftx, shifty, match; -l_int32 *line1, *line2; /* indicator for sufficient boxes in a line */ -l_int32 *yloc1, *yloc2; /* arrays of y value for first box in a line */ -l_int32 *xleft1, *xleft2; /* arrays of x value for left side of first box */ -NUMA *na1, *na2, *nai1, *nai2, *nasx, *nasy; - - PROCNAME("numaaCompareImagesByBoxes"); - - if (!psame) - return ERROR_INT("&same not defined", procName, 1); - *psame = 0; - if (!naa1) - return ERROR_INT("naa1 not defined", procName, 1); - if (!naa2) - return ERROR_INT("naa2 not defined", procName, 1); - if (nperline < 1) - return ERROR_INT("nperline < 1", procName, 1); - if (nreq < 1) - return ERROR_INT("nreq < 1", procName, 1); - - n1 = numaaGetCount(naa1); - n2 = numaaGetCount(naa2); - if (n1 < nreq || n2 < nreq) - return 0; - - /* Find the lines in naa1 and naa2 with sufficient boxes. - * Also, find the y-values for each of the lines, and the - * LH x-values of the first box in each line. */ - line1 = (l_int32 *)LEPT_CALLOC(n1, sizeof(l_int32)); - line2 = (l_int32 *)LEPT_CALLOC(n2, sizeof(l_int32)); - yloc1 = (l_int32 *)LEPT_CALLOC(n1, sizeof(l_int32)); - yloc2 = (l_int32 *)LEPT_CALLOC(n2, sizeof(l_int32)); - xleft1 = (l_int32 *)LEPT_CALLOC(n1, sizeof(l_int32)); - xleft2 = (l_int32 *)LEPT_CALLOC(n2, sizeof(l_int32)); - if (!line1 || !line2 || !yloc1 || !yloc2 || !xleft1 || !xleft2) - return ERROR_INT("callof failure for an array", procName, 1); - for (i = 0; i < n1; i++) { - na1 = numaaGetNuma(naa1, i, L_CLONE); - numaGetIValue(na1, 0, yloc1 + i); - numaGetIValue(na1, 1, xleft1 + i); - nbox = (numaGetCount(na1) - 1) / 2; - if (nbox >= nperline) - line1[i] = 1; - numaDestroy(&na1); - } - for (i = 0; i < n2; i++) { - na2 = numaaGetNuma(naa2, i, L_CLONE); - numaGetIValue(na2, 0, yloc2 + i); - numaGetIValue(na2, 1, xleft2 + i); - nbox = (numaGetCount(na2) - 1) / 2; - if (nbox >= nperline) - line2[i] = 1; - numaDestroy(&na2); - } - - /* Enumerate all possible line matches. A 'possible' line - * match is one where the x and y shifts for the first box - * in each line are within the maxshiftx and maxshifty - * constraints, and the left and right sides of the remaining - * (nperline - 1) successive boxes are within delx of each other. - * The result is a set of four numas giving parameters of - * each set of matching lines. */ - nai1 = numaCreate(0); /* line index 1 of match */ - nai2 = numaCreate(0); /* line index 2 of match */ - nasx = numaCreate(0); /* shiftx for match */ - nasy = numaCreate(0); /* shifty for match */ - for (i = 0; i < n1; i++) { - if (line1[i] == 0) continue; - y1 = yloc1[i]; - xl1 = xleft1[i]; - na1 = numaaGetNuma(naa1, i, L_CLONE); - for (j = 0; j < n2; j++) { - if (line2[j] == 0) continue; - y2 = yloc2[j]; - if (L_ABS(y1 - y2) > maxshifty) continue; - xl2 = xleft2[j]; - if (L_ABS(xl1 - xl2) > maxshiftx) continue; - shiftx = xl1 - xl2; /* shift to add to x2 values */ - shifty = y1 - y2; /* shift to add to y2 values */ - na2 = numaaGetNuma(naa2, j, L_CLONE); - - /* Now check if 'nperline' boxes in the two lines match */ - match = testLineAlignmentX(na1, na2, shiftx, delx, nperline); - if (match) { - numaAddNumber(nai1, i); - numaAddNumber(nai2, j); - numaAddNumber(nasx, shiftx); - numaAddNumber(nasy, shifty); - } - numaDestroy(&na2); - } - numaDestroy(&na1); - } - - /* Determine if there are a sufficient number of mutually - * aligned matches. Mutually aligned matches place an additional - * constraint on the 'possible' matches, where the relative - * shifts must not exceed the (delx, dely) distances. */ - countAlignedMatches(nai1, nai2, nasx, nasy, n1, n2, delx, dely, - nreq, psame, debugflag); - - LEPT_FREE(line1); - LEPT_FREE(line2); - LEPT_FREE(yloc1); - LEPT_FREE(yloc2); - LEPT_FREE(xleft1); - LEPT_FREE(xleft2); - numaDestroy(&nai1); - numaDestroy(&nai2); - numaDestroy(&nasx); - numaDestroy(&nasy); - return 0; -} - - -static l_int32 -testLineAlignmentX(NUMA *na1, - NUMA *na2, - l_int32 shiftx, - l_int32 delx, - l_int32 nperline) -{ -l_int32 i, xl1, xr1, xl2, xr2, diffl, diffr; - - PROCNAME("testLineAlignmentX"); - - if (!na1) - return ERROR_INT("na1 not defined", procName, 1); - if (!na2) - return ERROR_INT("na2 not defined", procName, 1); - - for (i = 0; i < nperline; i++) { - numaGetIValue(na1, i + 1, &xl1); - numaGetIValue(na1, i + 2, &xr1); - numaGetIValue(na2, i + 1, &xl2); - numaGetIValue(na2, i + 2, &xr2); - diffl = L_ABS(xl1 - xl2 - shiftx); - diffr = L_ABS(xr1 - xr2 - shiftx); - if (diffl > delx || diffr > delx) - return 0; - } - - return 1; -} - - -/* - * \brief countAlignedMatches() - * - * \param[in] nai1, nai2 numas of row pairs for matches - * \param[in] nasx, nasy numas of x and y shifts for the matches - * \param[in] n1, n2 number of rows in images 1 and 2 - * \param[in] delx, dely allowed difference in shifts of the match, - * compared to the reference match - * \param[in] nre1 number of required aligned matches - * \param[out] psame return 1 if %nreq row matches are found; - * 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This takes 4 input arrays giving parameters of all the
- *          line matches.  It looks for the maximum set of aligned
- *          matches (matches with approximately the same overall shifts)
- *          that do not use rows from either image more than once.
- * 
- */ -static l_ok -countAlignedMatches(NUMA *nai1, - NUMA *nai2, - NUMA *nasx, - NUMA *nasy, - l_int32 n1, - l_int32 n2, - l_int32 delx, - l_int32 dely, - l_int32 nreq, - l_int32 *psame, - l_int32 debugflag) -{ -l_int32 i, j, nm, shiftx, shifty, nmatch, diffx, diffy; -l_int32 *ia1, *ia2, *iasx, *iasy, *index1, *index2; - - PROCNAME("countAlignedMatches"); - - if (!nai1 || !nai2 || !nasx || !nasy) - return ERROR_INT("4 input numas not defined", procName, 1); - if (!psame) - return ERROR_INT("&same not defined", procName, 1); - *psame = 0; - - /* Check for sufficient aligned matches, doing a double iteration - * over the set of raw matches. The row index arrays - * are used to verify that the same rows in either image - * are not used in more than one match. Whenever there - * is a match that is properly aligned, those rows are - * marked in the index arrays. */ - nm = numaGetCount(nai1); /* number of matches */ - if (nm < nreq) - return 0; - - ia1 = numaGetIArray(nai1); - ia2 = numaGetIArray(nai2); - iasx = numaGetIArray(nasx); - iasy = numaGetIArray(nasy); - index1 = (l_int32 *)LEPT_CALLOC(n1, sizeof(l_int32)); /* watch rows */ - index2 = (l_int32 *)LEPT_CALLOC(n2, sizeof(l_int32)); - if (!index1 || !index2) - return ERROR_INT("calloc fail for array", procName, 1); - for (i = 0; i < nm; i++) { - if (*psame == 1) - break; - - /* Reset row index arrays */ - memset(index1, 0, 4 * n1); - memset(index2, 0, 4 * n2); - nmatch = 1; - index1[ia1[i]] = nmatch; /* mark these rows as taken */ - index2[ia2[i]] = nmatch; - shiftx = iasx[i]; /* reference shift between two rows */ - shifty = iasy[i]; /* ditto */ - if (nreq == 1) { - *psame = 1; - break; - } - for (j = 0; j < nm; j++) { - if (j == i) continue; - /* Rows must both be different from any previously seen */ - if (index1[ia1[j]] > 0 || index2[ia2[j]] > 0) continue; - /* Check the shift for this match */ - diffx = L_ABS(shiftx - iasx[j]); - diffy = L_ABS(shifty - iasy[j]); - if (diffx > delx || diffy > dely) continue; - /* We have a match */ - nmatch++; - index1[ia1[j]] = nmatch; /* mark the rows */ - index2[ia2[j]] = nmatch; - if (nmatch >= nreq) { - *psame = 1; - if (debugflag) - printRowIndices(index1, n1, index2, n2); - break; - } - } - } - - LEPT_FREE(ia1); - LEPT_FREE(ia2); - LEPT_FREE(iasx); - LEPT_FREE(iasy); - LEPT_FREE(index1); - LEPT_FREE(index2); - return 0; -} - - -static void -printRowIndices(l_int32 *index1, - l_int32 n1, - l_int32 *index2, - l_int32 n2) -{ -l_int32 i; - - lept_stderr("Index1: "); - for (i = 0; i < n1; i++) { - if (i && (i % 20 == 0)) - lept_stderr("\n "); - lept_stderr("%3d", index1[i]); - } - lept_stderr("\n"); - - lept_stderr("Index2: "); - for (i = 0; i < n2; i++) { - if (i && (i % 20 == 0)) - lept_stderr("\n "); - lept_stderr("%3d", index2[i]); - } - lept_stderr("\n"); - return; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colorcontent.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colorcontent.c deleted file mode 100644 index b58c7fb3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colorcontent.c +++ /dev/null @@ -1,2009 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file colorcontent.c - *
- *
- *      Builds an image of the color content, on a per-pixel basis,
- *      as a measure of the amount of divergence of each color
- *      component (R,G,B) from gray.
- *         l_int32    pixColorContent()
- *
- *      Finds the 'amount' of color in an image, on a per-pixel basis,
- *      as a measure of the difference of the pixel color from gray.
- *         PIX       *pixColorMagnitude()
- *
- *      Generates a mask over pixels that have sufficient color and
- *      are not too close to gray pixels.
- *         PIX       *pixMaskOverColorPixels()
- *
- *      Generates a mask over pixels that have little color and
- *      are not too bright.
- *         PIX       *pixMaskOverGrayPixels()
- *
- *      Generates mask over pixels within a prescribed cube in RGB space
- *         PIX       *pixMaskOverColorRange()
- *
- *      Finds the fraction of pixels with "color" that are not close to black
- *         l_int32    pixColorFraction()
- *
- *      Determine if there are significant color regions that are
- *      not background in a page image
- *         l_int32    pixFindColorRegions()
- *
- *      Finds the number of perceptually significant gray intensities
- *      in a grayscale image.
- *         l_int32    pixNumSignificantGrayColors()
- *
- *      Identifies images where color quantization will cause posterization
- *      due to the existence of many colors in low-gradient regions.
- *         l_int32    pixColorsForQuantization()
- *
- *      Finds the number of unique colors in an image
- *         l_int32    pixNumColors()
- *
- *      Lossless conversion of RGB image to colormapped
- *         PIX       *pixConvertRGBToCmap()
- *
- *      Find the most "populated" colors in the image (and quantize)
- *         l_int32    pixGetMostPopulatedColors()
- *         PIX       *pixSimpleColorQuantize()
- *
- *      Constructs a color histogram based on rgb indices
- *         NUMA      *pixGetRGBHistogram()
- *         l_int32    makeRGBIndexTables()
- *         l_int32    getRGBFromIndex()
- *
- *      Identify images that have highlight (red) color
- *         l_int32    pixHasHighlightRed()
- *
- *  Color is tricky.  If we consider gray (r = g = b) to have no color
- *  content, how should we define the color content in each component
- *  of an arbitrary pixel, as well as the overall color magnitude?
- *
- *  I can think of three ways to define the color content in each component:
- *
- *  (1) Linear.  For each component, take the difference from the average
- *      of all three.
- *  (2) Linear.  For each component, take the difference from the average
- *      of the other two.
- *  (3) Nonlinear.  For each component, take the minimum of the differences
- *      from the other two.
- *
- *  How might one choose from among these?  Consider two different situations:
- *  (a) r = g = 0, b = 255            {255}   /255/
- *  (b) r = 0, g = 127, b = 255       {191}   /128/
- *  How much g is in each of these?  The three methods above give:
- *  (a)  1: 85   2: 127   3: 0        [85]
- *  (b)  1: 0    2: 0     3: 127      [0]
- *  How much b is in each of these?
- *  (a)  1: 170  2: 255   3: 255      [255]
- *  (b)  1: 127  2: 191   3: 127      [191]
- *  The number I'd "like" to give is in [].  (Please don't ask why, it's
- *  just a feeling.
- *
- *  So my preferences seem to be somewhere between (1) and (2).
- *  (3) is just too "decisive!"  Let's pick (2).
- *
- *  We also allow compensation for white imbalance.  For each
- *  component, we do a linear TRC (gamma = 1.0), where the black
- *  point remains at 0 and the white point is given by the input
- *  parameter.  This is equivalent to doing a global remapping,
- *  as with pixGlobalNormRGB(), followed by color content (or magnitude)
- *  computation, but without the overhead of first creating the
- *  white point normalized image.
- *
- *  Another useful property is the overall color magnitude in the pixel.
- *  For this there are again several choices, such as:
- *      (a) rms deviation from the mean
- *      (b) the average L1 deviation from the mean
- *      (c) the maximum (over components) of one of the color
- *          content measures given above.
- *
- *  For now, we will choose two of the methods in (c):
- *     L_MAX_DIFF_FROM_AVERAGE_2
- *        Define the color magnitude as the maximum over components
- *        of the difference between the component value and the
- *        average of the other two.  It is easy to show that
- *        this is equivalent to selecting the two component values
- *        that are closest to each other, averaging them, and
- *        using the distance from that average to the third component.
- *        For (a) and (b) above, this value is in {..}.
- *    L_MAX_MIN_DIFF_FROM_2
- *        Define the color magnitude as the maximum over components
- *        of the minimum difference between the component value and the
- *        other two values.  It is easy to show that this is equivalent
- *        to selecting the intermediate value of the three differences
- *        between the three components.  For (a) and (b) above,
- *        this value is in /../.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* ----------------------------------------------------------------------- * - * Builds an image of the color content, on a per-pixel basis, * - * as a measure of the amount of divergence of each color * - * component (R,G,B) from gray. * - * ----------------------------------------------------------------------- */ -/*! - * \brief pixColorContent() - * - * \param[in] pixs 32 bpp rgb or 8 bpp colormapped - * \param[in] rref, gref, bref reference color values (e.g. median - * or mean, to compare with the pixel - * component values. - * \param[in] mingray min gray value for which color is measured - * \param[out] ppixr [optional] 8 bpp red 'content' - * \param[out] ppixg [optional] 8 bpp green 'content' - * \param[out] ppixb [optional] 8 bpp blue 'content' - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This returns the color content in each component, which is
- *          a measure of the deviation from gray, and is defined
- *          as the difference between the component and the average of
- *          the other two components.  See the discussion at the
- *          top of this file.
- *      (2) The three numbers (rref, gref and bref) can be thought
- *          of in two ways:
- *            (a) as the values in the image corresponding to white,
- *                to compensate for an unbalanced color white point.
- *            (b) the median or mean values of the background color of
- *                a scan.
- *          The gamma TRC transformation is used to modify all colors so that
- *          these reference values become white.
- *          These three numbers must either be all 0 or all non-zero.
- *          To skip the TRC transform, set them all to 0.
- *      (3) If the maximum component after white point correction,
- *          max(r,g,b), is less than mingray, all color components
- *          for that pixel are set to zero.
- *          Use mingray = 0 to turn off this filtering of dark pixels.
- *      (4) Therefore, use 0 for all four input parameters if the color
- *          magnitude is to be calculated without either white balance
- *          correction or dark filtering.
- * 
- */ -l_ok -pixColorContent(PIX *pixs, - l_int32 rref, - l_int32 gref, - l_int32 bref, - l_int32 mingray, - PIX **ppixr, - PIX **ppixg, - PIX **ppixb) -{ -l_int32 w, h, d, i, j, wplc, wplr, wplg, wplb; -l_int32 rval, gval, bval, rgdiff, rbdiff, gbdiff, maxval, colorval; -l_int32 *rtab, *gtab, *btab; -l_uint32 pixel; -l_uint32 *datac, *datar, *datag, *datab, *linec, *liner, *lineg, *lineb; -NUMA *nar, *nag, *nab; -PIX *pixc; /* rgb */ -PIX *pixr, *pixg, *pixb; /* 8 bpp grayscale */ -PIXCMAP *cmap; - - PROCNAME("pixColorContent"); - - if (!ppixr && !ppixg && !ppixb) - return ERROR_INT("no return val requested", procName, 1); - if (ppixr) *ppixr = NULL; - if (ppixg) *ppixg = NULL; - if (ppixb) *ppixb = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (mingray < 0) mingray = 0; - pixGetDimensions(pixs, &w, &h, &d); - if (mingray > 255) - return ERROR_INT("mingray > 255", procName, 1); - if (rref < 0 || gref < 0 || bref < 0) - return ERROR_INT("some reference vals are negative", procName, 1); - if ((rref || gref || bref) && (rref * gref * bref == 0)) - return ERROR_INT("reference vals not all zero or all nonzero", - procName, 1); - - cmap = pixGetColormap(pixs); - if (!cmap && d != 32) - return ERROR_INT("pixs neither cmapped nor 32 bpp", procName, 1); - if (cmap) - pixc = pixRemoveColormap(pixs, REMOVE_CMAP_TO_FULL_COLOR); - else - pixc = pixClone(pixs); - - pixr = pixg = pixb = NULL; - pixGetDimensions(pixc, &w, &h, NULL); - if (ppixr) { - pixr = pixCreate(w, h, 8); - datar = pixGetData(pixr); - wplr = pixGetWpl(pixr); - *ppixr = pixr; - } - if (ppixg) { - pixg = pixCreate(w, h, 8); - datag = pixGetData(pixg); - wplg = pixGetWpl(pixg); - *ppixg = pixg; - } - if (ppixb) { - pixb = pixCreate(w, h, 8); - datab = pixGetData(pixb); - wplb = pixGetWpl(pixb); - *ppixb = pixb; - } - - datac = pixGetData(pixc); - wplc = pixGetWpl(pixc); - if (rref) { /* all reference vals are nonzero */ - nar = numaGammaTRC(1.0, 0, rref); - rtab = numaGetIArray(nar); - nag = numaGammaTRC(1.0, 0, gref); - gtab = numaGetIArray(nag); - nab = numaGammaTRC(1.0, 0, bref); - btab = numaGetIArray(nab); - } - for (i = 0; i < h; i++) { - linec = datac + i * wplc; - if (pixr) - liner = datar + i * wplr; - if (pixg) - lineg = datag + i * wplg; - if (pixb) - lineb = datab + i * wplb; - for (j = 0; j < w; j++) { - pixel = linec[j]; - extractRGBValues(pixel, &rval, &gval, &bval); - if (rref) { /* color correct for reference values */ - rval = rtab[rval]; - gval = gtab[gval]; - bval = btab[bval]; - } - if (mingray > 0) { /* dark pixels have no color value */ - maxval = L_MAX(rval, gval); - maxval = L_MAX(maxval, bval); - if (maxval < mingray) - continue; /* colorval = 0 for each component */ - } - rgdiff = L_ABS(rval - gval); - rbdiff = L_ABS(rval - bval); - gbdiff = L_ABS(gval - bval); - if (pixr) { - colorval = (rgdiff + rbdiff) / 2; - SET_DATA_BYTE(liner, j, colorval); - } - if (pixg) { - colorval = (rgdiff + gbdiff) / 2; - SET_DATA_BYTE(lineg, j, colorval); - } - if (pixb) { - colorval = (rbdiff + gbdiff) / 2; - SET_DATA_BYTE(lineb, j, colorval); - } - } - } - - if (rref) { - numaDestroy(&nar); - numaDestroy(&nag); - numaDestroy(&nab); - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - } - pixDestroy(&pixc); - return 0; -} - - -/* ----------------------------------------------------------------------- * - * Finds the 'amount' of color in an image, on a per-pixel basis, * - * as a measure of the difference of the pixel color from gray. * - * ----------------------------------------------------------------------- */ -/*! - * \brief pixColorMagnitude() - * - * \param[in] pixs 32 bpp rgb or 8 bpp colormapped - * \param[in] rref, gref, bref reference color values (e.g. median - * or mean, to compare with the pixel - * component values. - * \param[in] type chooses the method for calculating the color magnitude: - * L_MAX_DIFF_FROM_AVERAGE_2, L_MAX_MIN_DIFF_FROM_2, - * L_MAX_DIFF - * \return pixd 8 bpp, amount of color in each source pixel, - * or NULL on error - * - *
- * Notes:
- *      (1) For an RGB image, a gray pixel is one where all three components
- *          are equal.  We define the amount of color in an RGB pixel as
- *          a function depending on the absolute value of the differences
- *          between the three color components.  Consider the two largest
- *          of these differences.  The pixel component in common to these
- *          two differences is the color farthest from the other two.
- *          The color magnitude in an RGB pixel can be taken as one
- *          of these three definitions:
- *            (a) The average of these two differences.  This is the
- *                average distance from the two components that are
- *                nearest to each other to the third component.
- *            (b) The minimum value of these two differences.  This is
- *                the intermediate value of the three distances between
- *                component values.  Stated otherwise, it is the
- *                maximum over all components of the minimum distance
- *                from that component to the other two components.
- *            (c) The maximum difference between component values.
- *      (2) As an example, suppose that R and G are the closest in
- *          magnitude.  Then the color is determined as either:
- *            (a) The average distance of B from these two:
- *                   (|B - R| + |B - G|) / 2
- *            (b) The minimum distance of B from these two:
- *                   min(|B - R|, |B - G|).
- *            (c) The maximum distance of B from these two:
- *                   max(|B - R|, |B - G|)
- *      (3) The three methods for choosing the color magnitude from
- *          the components are selected with these flags:
- *            (a) L_MAX_DIFF_FROM_AVERAGE_2
- *            (b) L_MAX_MIN_DIFF_FROM_2
- *            (c) L_MAX_DIFF
- *      (4) The three numbers (rref, gref and bref) can be thought
- *          of in two ways:
- *            (a) as the values in the image corresponding to white,
- *                to compensate for an unbalanced color white point.
- *            (b) the median or mean values of the background color of
- *                a scan.
- *          The gamma TRC transformation is used to modify all colors so that
- *          these reference values become white.
- *          These three numbers must either be all 0 or all non-zero.
- *          To skip the TRC transform, set them all to 0.
- * 
- */ -PIX * -pixColorMagnitude(PIX *pixs, - l_int32 rref, - l_int32 gref, - l_int32 bref, - l_int32 type) -{ -l_int32 w, h, d, i, j, wplc, wpld; -l_int32 rval, gval, bval, rdist, gdist, bdist, colorval; -l_int32 rgdist, rbdist, gbdist, mindist, maxdist, minval, maxval; -l_int32 *rtab, *gtab, *btab; -l_uint32 pixel; -l_uint32 *datac, *datad, *linec, *lined; -NUMA *nar, *nag, *nab; -PIX *pixc, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixColorMagnitude"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (type != L_MAX_DIFF_FROM_AVERAGE_2 && type != L_MAX_MIN_DIFF_FROM_2 && - type != L_MAX_DIFF) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - if (rref < 0 || gref < 0 || bref < 0) - return (PIX *)ERROR_PTR("some reference vals are negative", - procName, NULL); - if ((rref || gref || bref) && (rref * gref * bref == 0)) - return (PIX *)ERROR_PTR("reference vals not all zero or all nonzero", - procName, NULL); - - cmap = pixGetColormap(pixs); - if (!cmap && d != 32) - return (PIX *)ERROR_PTR("pixs not cmapped or 32 bpp", procName, NULL); - if (cmap) - pixc = pixRemoveColormap(pixs, REMOVE_CMAP_TO_FULL_COLOR); - else - pixc = pixClone(pixs); - - pixd = pixCreate(w, h, 8); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - datac = pixGetData(pixc); - wplc = pixGetWpl(pixc); - if (rref) { /* all ref vals are nonzero */ - nar = numaGammaTRC(1.0, 0, rref); - rtab = numaGetIArray(nar); - nag = numaGammaTRC(1.0, 0, gref); - gtab = numaGetIArray(nag); - nab = numaGammaTRC(1.0, 0, bref); - btab = numaGetIArray(nab); - } - for (i = 0; i < h; i++) { - linec = datac + i * wplc; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - pixel = linec[j]; - extractRGBValues(pixel, &rval, &gval, &bval); - if (rref) { /* color correct for reference values */ - rval = rtab[rval]; - gval = gtab[gval]; - bval = btab[bval]; - } - if (type == L_MAX_DIFF_FROM_AVERAGE_2) { - rdist = ((gval + bval ) / 2 - rval); - rdist = L_ABS(rdist); - gdist = ((rval + bval ) / 2 - gval); - gdist = L_ABS(gdist); - bdist = ((rval + gval ) / 2 - bval); - bdist = L_ABS(bdist); - colorval = L_MAX(rdist, gdist); - colorval = L_MAX(colorval, bdist); - } else if (type == L_MAX_MIN_DIFF_FROM_2) { /* intermediate dist */ - rgdist = L_ABS(rval - gval); - rbdist = L_ABS(rval - bval); - gbdist = L_ABS(gval - bval); - maxdist = L_MAX(rgdist, rbdist); - if (gbdist >= maxdist) { - colorval = maxdist; - } else { /* gbdist is smallest or intermediate */ - mindist = L_MIN(rgdist, rbdist); - colorval = L_MAX(mindist, gbdist); - } - } else { /* type == L_MAX_DIFF */ - minval = L_MIN(rval, gval); - minval = L_MIN(minval, bval); - maxval = L_MAX(rval, gval); - maxval = L_MAX(maxval, bval); - colorval = maxval - minval; - } - SET_DATA_BYTE(lined, j, colorval); - } - } - - if (rref) { - numaDestroy(&nar); - numaDestroy(&nag); - numaDestroy(&nab); - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - } - pixDestroy(&pixc); - return pixd; -} - - -/* ----------------------------------------------------------------------- * - * Generates a mask over pixels that have sufficient color and * - * are not too close to gray pixels. * - * ----------------------------------------------------------------------- */ -/*! - * \brief pixMaskOverColorPixels() - * - * \param[in] pixs 32 bpp rgb or 8 bpp colormapped - * \param[in] threshdiff threshold for minimum of the max difference - * between components - * \param[in] mindist min allowed distance from nearest non-color pixel - * \return pixd 1 bpp, mask over color pixels, or NULL on error - * - *
- * Notes:
- *      (1) The generated mask identifies each pixel as either color or
- *          non-color.  For a pixel to be color, it must satisfy two
- *          constraints:
- *            (a) The max difference between the r,g and b components must
- *                equal or exceed a threshold %threshdiff.
- *            (b) It must be at least %mindist (in an 8-connected way)
- *                from the nearest non-color pixel.
- *      (2) The distance constraint (b) is only applied if %mindist > 1.
- *          For example, if %mindist == 2, the color pixels identified
- *          by (a) are eroded by a 3x3 Sel.  In general, the Sel size
- *          for erosion is 2 * (%mindist - 1) + 1.
- *          Why have this constraint?  In scanned images that are
- *          essentially gray, color artifacts are typically introduced
- *          in transition regions near sharp edges that go from dark
- *          to light, so this allows these transition regions to be removed.
- * 
- */ -PIX * -pixMaskOverColorPixels(PIX *pixs, - l_int32 threshdiff, - l_int32 mindist) -{ -l_int32 w, h, d, i, j, wpls, wpld, size; -l_int32 rval, gval, bval, minval, maxval; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixc, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixMaskOverColorPixels"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - - cmap = pixGetColormap(pixs); - if (!cmap && d != 32) - return (PIX *)ERROR_PTR("pixs not cmapped or 32 bpp", procName, NULL); - if (cmap) - pixc = pixRemoveColormap(pixs, REMOVE_CMAP_TO_FULL_COLOR); - else - pixc = pixClone(pixs); - - pixd = pixCreate(w, h, 1); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - datas = pixGetData(pixc); - wpls = pixGetWpl(pixc); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - minval = L_MIN(rval, gval); - minval = L_MIN(minval, bval); - maxval = L_MAX(rval, gval); - maxval = L_MAX(maxval, bval); - if (maxval - minval >= threshdiff) - SET_DATA_BIT(lined, j); - } - } - - if (mindist > 1) { - size = 2 * (mindist - 1) + 1; - pixErodeBrick(pixd, pixd, size, size); - } - - pixDestroy(&pixc); - return pixd; -} - - -/* ----------------------------------------------------------------------- * - * Generates a mask over pixels that have little color and * - * are not too bright * - * ----------------------------------------------------------------------- */ -/*! - * \brief pixMaskOverGrayPixels() - * - * \param[in] pixs 32 bpp rgb - * \param[in] maxlimit only consider pixels with max component <= %maxlimit - * \param[in] satlimit only consider pixels with saturation <= %satlimit - * \return pixd (1 bpp), or NULL on error - * - *
- * Notes:
- *      (1) This generates a mask over rgb pixels that are gray (i.e.,
- *          have low saturation) and are not too bright.  For example, if
- *          we know that the gray pixels in %pixs have saturation
- *          (max - min) less than 10, and brightness (max) less than 200,
- *             pixMaskOverGrayPixels(pixs, 220, 10)
- *          will generate a mask over the gray pixels.  Other pixels that
- *          are not too dark and have a relatively large saturation will
- *          be little affected.
- *      (2) The algorithm is related to pixDarkenGray().
- * 
- */ -PIX * -pixMaskOverGrayPixels(PIX *pixs, - l_int32 maxlimit, - l_int32 satlimit) -{ -l_int32 w, h, i, j, wpls, wpld; -l_int32 rval, gval, bval, minrg, min, maxrg, max, sat; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixMaskOverGrayPixels"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - if (maxlimit < 0 || maxlimit > 255) - return (PIX *)ERROR_PTR("invalid maxlimit", procName, NULL); - if (satlimit < 1) - return (PIX *)ERROR_PTR("invalid satlimit", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if ((pixd = pixCreate(w, h, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - minrg = L_MIN(rval, gval); - min = L_MIN(minrg, bval); - maxrg = L_MAX(rval, gval); - max = L_MAX(maxrg, bval); - sat = max - min; - if (max <= maxlimit && sat <= satlimit) - SET_DATA_BIT(lined, j); - } - } - return pixd; -} - - -/* ----------------------------------------------------------------------- * - * Generates a mask over pixels that have RGB color components * - * within the prescribed range (a cube in RGB color space) * - * ----------------------------------------------------------------------- */ -/*! - * \brief pixMaskOverColorRange() - * - * \param[in] pixs 32 bpp rgb or 8 bpp colormapped - * \param[in] rmin, rmax min and max allowed values for red component - * \param[in] gmin, gmax ditto for green - * \param[in] bmin, bmax ditto for blue - * \return pixd 1 bpp, mask over color pixels, or NULL on error - */ -PIX * -pixMaskOverColorRange(PIX *pixs, - l_int32 rmin, - l_int32 rmax, - l_int32 gmin, - l_int32 gmax, - l_int32 bmin, - l_int32 bmax) -{ -l_int32 w, h, d, i, j, wpls, wpld; -l_int32 rval, gval, bval; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixc, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixMaskOverColorRange"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - - cmap = pixGetColormap(pixs); - if (!cmap && d != 32) - return (PIX *)ERROR_PTR("pixs not cmapped or 32 bpp", procName, NULL); - if (cmap) - pixc = pixRemoveColormap(pixs, REMOVE_CMAP_TO_FULL_COLOR); - else - pixc = pixClone(pixs); - - pixd = pixCreate(w, h, 1); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - datas = pixGetData(pixc); - wpls = pixGetWpl(pixc); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - if (rval < rmin || rval > rmax) continue; - if (gval < gmin || gval > gmax) continue; - if (bval < bmin || bval > bmax) continue; - SET_DATA_BIT(lined, j); - } - } - - pixDestroy(&pixc); - return pixd; -} - - -/* ----------------------------------------------------------------------- * - * Finds the fraction of pixels with "color" that are not close to black * - * ----------------------------------------------------------------------- */ -/*! - * \brief pixColorFraction() - * - * \param[in] pixs 32 bpp rgb - * \param[in] darkthresh threshold near black; if the lightest component - * is below this, the pixel is not considered in - * the statistics; typ. 20 - * \param[in] lightthresh threshold near white; if the darkest component - * is above this, the pixel is not considered in - * the statistics; typ. 244 - * \param[in] diffthresh thresh for the maximum difference between - * component value; below this the pixel is not - * considered to have sufficient color - * \param[in] factor subsampling factor - * \param[out] ppixfract fraction of pixels in intermediate - * brightness range that were considered - * for color content - * \param[out] pcolorfract fraction of pixels that meet the - * criterion for sufficient color; 0.0 on error - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function is asking the question: to what extent does the
- *          image appear to have color?   The amount of color a pixel
- *          appears to have depends on both the deviation of the
- *          individual components from their average and on the average
- *          intensity itself.  For example, the color will be much more
- *          obvious with a small deviation from white than the same
- *          deviation from black.
- *      (2) Any pixel that meets these three tests is considered a
- *          colorful pixel:
- *            (a) the lightest component must equal or exceed %darkthresh
- *            (b) the darkest component must not exceed %lightthresh
- *            (c) the max difference between components must equal or
- *                exceed %diffthresh.
- *      (3) The dark pixels are removed from consideration because
- *          they don't appear to have color.
- *      (4) The very lightest pixels are removed because if an image
- *          has a lot of "white", the color fraction will be artificially
- *          low, even if all the other pixels are colorful.
- *      (5) If pixfract is very small, there are few pixels that are neither
- *          black nor white.  If colorfract is very small, the pixels
- *          that are neither black nor white have very little color
- *          content.  The product 'pixfract * colorfract' gives the
- *          fraction of pixels with significant color content.
- *      (6) One use of this function is as a preprocessing step for median
- *          cut quantization (colorquant2.c), which does a very poor job
- *          splitting the color space into rectangular volume elements when
- *          all the pixels are near the diagonal of the color cube.  For
- *          octree quantization of an image with only gray values, the
- *          2^(level) octcubes on the diagonal are the only ones
- *          that can be occupied.
- * 
- */ -l_ok -pixColorFraction(PIX *pixs, - l_int32 darkthresh, - l_int32 lightthresh, - l_int32 diffthresh, - l_int32 factor, - l_float32 *ppixfract, - l_float32 *pcolorfract) -{ -l_int32 i, j, w, h, wpl, rval, gval, bval, minval, maxval; -l_int32 total, npix, ncolor; -l_uint32 pixel; -l_uint32 *data, *line; - - PROCNAME("pixColorFraction"); - - if (ppixfract) *ppixfract = 0.0; - if (pcolorfract) *pcolorfract = 0.0; - if (!ppixfract || !pcolorfract) - return ERROR_INT("&pixfract and &colorfract not defined", - procName, 1); - if (!pixs || pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not defined or not 32 bpp", procName, 1); - - pixGetDimensions(pixs, &w, &h, NULL); - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - npix = ncolor = total = 0; - for (i = 0; i < h; i += factor) { - line = data + i * wpl; - for (j = 0; j < w; j += factor) { - total++; - pixel = line[j]; - extractRGBValues(pixel, &rval, &gval, &bval); - minval = L_MIN(rval, gval); - minval = L_MIN(minval, bval); - if (minval > lightthresh) /* near white */ - continue; - maxval = L_MAX(rval, gval); - maxval = L_MAX(maxval, bval); - if (maxval < darkthresh) /* near black */ - continue; - - npix++; - if (maxval - minval >= diffthresh) - ncolor++; - } - } - - if (npix == 0) { - L_WARNING("No pixels found for consideration\n", procName); - return 0; - } - *ppixfract = (l_float32)npix / (l_float32)total; - *pcolorfract = (l_float32)ncolor / (l_float32)npix; - return 0; -} - - -/* ----------------------------------------------------------------------- * - * Determine if there are significant color regions in a page image * - * ----------------------------------------------------------------------- */ -/*! - * \brief pixFindColorRegions() - * - * \param[in] pixs 32 bpp rgb - * \param[in] pixm [optional] 1 bpp mask image - * \param[in] factor subsample factor; integer >= 1 - * \param[in] lightthresh threshold for component average in lightest - * of 10 buckets; typ. 210; -1 for default - * \param[in] darkthresh threshold to eliminate dark pixels (e.g., text) - * from consideration; typ. 70; -1 for default. - * \param[in] mindiff minimum difference (b - r) and (g - r), used to - * find blue or green pixels; typ. 10; -1 for default - * \param[in] colordiff minimum difference in (max - min) component to - * qualify as a color pixel; typ. 90; -1 for default - * \param[in] edgefract fraction of image half-width and half-height - * for which color pixels are ignored; typ. 0.05. - * \param[out] pcolorfract fraction of 'color' pixels found - * \param[out] pcolormask1 [optional] mask over background color, if any - * \param[out] pcolormask2 [optional] filtered mask over background color - * \param[out] pixadb [optional] debug intermediate results - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function tries to determine if there is a significant
- *          color or darker region on a scanned page image, where part
- *          of the image is background that is either white or reddish.
- *          This also allows extraction of regions of colored pixels that
- *          have a smaller red component than blue or green components.
- *      (2) If %pixm exists, pixels under its fg are combined with
- *          dark pixels to make a mask of pixels not to be considered
- *          as color candidates.
- *      (3) There are four thresholds.
- *          * %lightthresh: compute the average value of each rgb pixel,
- *            and make 10 buckets by value.  If the lightest bucket gray
- *            value is below %lightthresh, the image is not considered
- *            to have a light bg, and this returns 0.0 for %colorfract.
- *          * %darkthresh: ignore pixels darker than this (typ. fg text).
- *            We make a 1 bpp mask of these pixels, and then dilate it to
- *            remove all vestiges of fg from their vicinity.
- *          * %mindiff: consider pixels with either (b - r) or (g - r)
- *            being at least this value, as having color.
- *          * %colordiff: consider pixels where the (max - min) difference
- *            of the pixel components exceeds this value, as having color.
- *      (4) All components of color pixels that are touching the image
- *          border are removed.  Additionally, all pixels within some
- *          normalized distance %edgefract from the image border can
- *          be removed.  This insures that dark pixels near the edge
- *          of the image are not included.
- *      (5) This returns in %pcolorfract the fraction of pixels that have
- *          color and are not in the set consisting of an OR between
- *          %pixm and the dilated dark pixel mask.
- *      (6) No masks are returned unless light color pixels are found.
- *          If colorfract > 0.0 and %pcolormask1 is defined, this returns
- *          a 1 bpp mask with fg pixels over the color background.
- *          This mask may have some holes in it.
- *      (7) If colorfract > 0.0 and %pcolormask2 is defined, this returns
- *          a version of colormask1 where small holes have been filled.
- *      (8) To generate a boxa of rectangular regions from the overlap
- *          of components in the filtered mask:
- *                boxa1 = pixConnCompBB(colormask2, 8);
- *                boxa2 = boxaCombineOverlaps(boxa1, NULL);
- *          This is done here in debug mode.
- * 
- */ -l_ok -pixFindColorRegions(PIX *pixs, - PIX *pixm, - l_int32 factor, - l_int32 lightthresh, - l_int32 darkthresh, - l_int32 mindiff, - l_int32 colordiff, - l_float32 edgefract, - l_float32 *pcolorfract, - PIX **pcolormask1, - PIX **pcolormask2, - PIXA *pixadb) -{ -l_int32 w, h, count, rval, gval, bval, aveval, proceed; -l_float32 ratio; -l_uint32 *carray; -BOXA *boxa1, *boxa2; -PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pixm1, *pixm2, *pixm3; - - PROCNAME("pixFindColorRegions"); - - if (pcolormask1) *pcolormask1 = NULL; - if (pcolormask2) *pcolormask2 = NULL; - if (!pcolorfract) - return ERROR_INT("&colorfract not defined", procName, 1); - *pcolorfract = 0.0; - if (!pixs || pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not defined or not 32 bpp", procName, 1); - if (factor < 1) factor = 1; - if (lightthresh < 0) lightthresh = 210; /* defaults */ - if (darkthresh < 0) darkthresh = 70; - if (mindiff < 0) mindiff = 10; - if (colordiff < 0) colordiff = 90; - if (edgefract < 0.0 || edgefract > 1.0) edgefract = 0.05; - - /* Check if pixm covers most of the image. If so, just return. */ - pixGetDimensions(pixs, &w, &h, NULL); - if (pixm) { - pixCountPixels(pixm, &count, NULL); - ratio = (l_float32)count / ((l_float32)(w) * h); - if (ratio > 0.7) { - if (pixadb) L_INFO("pixm has big fg: %f5.2\n", procName, ratio); - return 0; - } - } - - /* Get the light background color. Use the average component value - * and select the lightest of 10 buckets. Require that it is - * reddish and, using lightthresh, not too dark. */ - pixGetRankColorArray(pixs, 10, L_SELECT_AVERAGE, factor, &carray, NULL, 0); - if (!carray) - return ERROR_INT("rank color array not made", procName, 1); - extractRGBValues(carray[9], &rval, &gval, &bval); - if (pixadb) L_INFO("lightest background color: (r,g,b) = (%d,%d,%d)\n", - procName, rval, gval, bval); - proceed = TRUE; - if ((rval < bval - 2) || (rval < gval - 2)) { - if (pixadb) L_INFO("background not reddish\n", procName); - proceed = FALSE; - } - aveval = (rval + gval + bval) / 3; - if (aveval < lightthresh) { - if (pixadb) L_INFO("background too dark\n", procName); - proceed = FALSE; - } - if (pixadb) { - pix1 = pixDisplayColorArray(carray, 10, 120, 3, 6); - pixaAddPix(pixadb, pix1, L_INSERT); - } - LEPT_FREE(carray); - if (proceed == FALSE) return 0; - - /* Make a mask pixm1 over the dark pixels in the image: - * convert to gray using the average of the components; - * threshold using darkthresh; do a small dilation; - * combine with pixm. */ - pix1 = pixConvertRGBToGray(pixs, 0.33, 0.34, 0.33); - if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); - pixm1 = pixThresholdToBinary(pix1, darkthresh); - pixDilateBrick(pixm1, pixm1, 7, 7); - if (pixadb) pixaAddPix(pixadb, pixm1, L_COPY); - if (pixm) { - pixOr(pixm1, pixm1, pixm); - if (pixadb) pixaAddPix(pixadb, pixm1, L_COPY); - } - pixDestroy(&pix1); - - /* Make masks over pixels that are bluish, or greenish, or - have a very large color saturation (max - min) value. */ - pixm2 = pixConvertRGBToBinaryArb(pixs, -1.0, 0.0, 1.0, mindiff, - L_SELECT_IF_GTE); /* b - r */ - if (pixadb) pixaAddPix(pixadb, pixm2, L_COPY); - pix1 = pixConvertRGBToBinaryArb(pixs, -1.0, 1.0, 0.0, mindiff, - L_SELECT_IF_GTE); /* g - r */ - if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); - pixOr(pixm2, pixm2, pix1); - pixDestroy(&pix1); - pix1 = pixConvertRGBToGrayMinMax(pixs, L_CHOOSE_MAXDIFF); - pix2 = pixThresholdToBinary(pix1, colordiff); - pixInvert(pix2, pix2); - if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); - pixOr(pixm2, pixm2, pix2); - if (pixadb) pixaAddPix(pixadb, pixm2, L_COPY); - pixDestroy(&pix1); - pixDestroy(&pix2); - - /* Subtract the dark pixels represented by pixm1. - * pixm2 now holds all the color pixels of interest */ - pixSubtract(pixm2, pixm2, pixm1); - pixDestroy(&pixm1); - if (pixadb) pixaAddPix(pixadb, pixm2, L_COPY); - - /* But we're not quite finished. Remove pixels from any component - * that is touching the image border. False color pixels can - * sometimes be found there if the image is much darker near - * the border, due to oxidation or reduced illumination. Also - * remove any pixels within the normalized fraction %distfract - * of the image border. */ - pixm3 = pixRemoveBorderConnComps(pixm2, 8); - pixDestroy(&pixm2); - if (edgefract > 0.0) { - pix2 = pixMakeSymmetricMask(w, h, edgefract, edgefract, L_USE_INNER); - pixAnd(pixm3, pixm3, pix2); - pixDestroy(&pix2); - } - if (pixadb) pixaAddPix(pixadb, pixm3, L_COPY); - - /* Get the fraction of light color pixels */ - pixCountPixels(pixm3, &count, NULL); - *pcolorfract = (l_float32)count / ((l_float32)(w) * h); - if (pixadb) { - if (count == 0) - L_INFO("no light color pixels found\n", procName); - else - L_INFO("fraction of light color pixels = %5.3f\n", procName, - *pcolorfract); - } - - /* Debug: extract the color pixels from pixs */ - if (pixadb && count > 0) { - /* Use pixm3 to extract the color pixels */ - pix3 = pixCreateTemplate(pixs); - pixSetAll(pix3); - pixCombineMasked(pix3, pixs, pixm3); - pixaAddPix(pixadb, pix3, L_INSERT); - - /* Use additional filtering to extract the color pixels */ - pix3 = pixCloseSafeBrick(NULL, pixm3, 15, 15); - pixaAddPix(pixadb, pix3, L_INSERT); - pix5 = pixCreateTemplate(pixs); - pixSetAll(pix5); - pixCombineMasked(pix5, pixs, pix3); - pixaAddPix(pixadb, pix5, L_INSERT); - - /* Get the combined bounding boxes of the mask components - * in pix3, and extract those pixels from pixs. */ - boxa1 = pixConnCompBB(pix3, 8); - boxa2 = boxaCombineOverlaps(boxa1, NULL); - pix4 = pixCreateTemplate(pix3); - pixMaskBoxa(pix4, pix4, boxa2, L_SET_PIXELS); - pixaAddPix(pixadb, pix4, L_INSERT); - pix5 = pixCreateTemplate(pixs); - pixSetAll(pix5); - pixCombineMasked(pix5, pixs, pix4); - pixaAddPix(pixadb, pix5, L_INSERT); - boxaDestroy(&boxa1); - boxaDestroy(&boxa2); - } - pixaAddPix(pixadb, pixs, L_COPY); - - /* Optional colormask returns */ - if (pcolormask2 && count > 0) - *pcolormask2 = pixCloseSafeBrick(NULL, pixm3, 15, 15); - if (pcolormask1 && count > 0) - *pcolormask1 = pixm3; - else - pixDestroy(&pixm3); - return 0; -} - - -/* ----------------------------------------------------------------------- * - * Finds the number of perceptually significant gray intensities * - * in a grayscale image. * - * ----------------------------------------------------------------------- */ -/*! - * \brief pixNumSignificantGrayColors() - * - * \param[in] pixs 8 bpp gray - * \param[in] darkthresh dark threshold for minimum intensity to be - * considered; typ. 20 - * \param[in] lightthresh threshold near white, for maximum intensity - * to be considered; typ. 236 - * \param[in] minfract minimum fraction of all pixels to include a level - * as significant; typ. 0.0001; should be < 0.001 - * \param[in] factor subsample factor; integer >= 1 - * \param[out] pncolors number of significant colors; 0 on error - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function is asking the question: how many perceptually
- *          significant gray color levels is in this pix?
- *          A color level must meet 3 criteria to be significant:
- *            ~ it can't be too close to black
- *            ~ it can't be too close to white
- *            ~ it must have at least some minimum fractional population
- *      (2) Use -1 for default values for darkthresh, lightthresh and minfract.
- *      (3) Choose default of darkthresh = 20, because variations in very
- *          dark pixels are not visually significant.
- *      (4) Choose default of lightthresh = 236, because document images
- *          that have been jpeg'd typically have near-white pixels in the
- *          8x8 jpeg blocks, and these should not be counted.  It is desirable
- *          to obtain a clean image by quantizing this noise away.
- * 
- */ -l_ok -pixNumSignificantGrayColors(PIX *pixs, - l_int32 darkthresh, - l_int32 lightthresh, - l_float32 minfract, - l_int32 factor, - l_int32 *pncolors) -{ -l_int32 i, w, h, count, mincount, ncolors; -NUMA *na; - - PROCNAME("pixNumSignificantGrayColors"); - - if (!pncolors) - return ERROR_INT("&ncolors not defined", procName, 1); - *pncolors = 0; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (darkthresh < 0) darkthresh = 20; /* defaults */ - if (lightthresh < 0) lightthresh = 236; - if (minfract < 0.0) minfract = 0.0001; - if (minfract > 1.0) - return ERROR_INT("minfract > 1.0", procName, 1); - if (minfract >= 0.001) - L_WARNING("minfract too big; likely to underestimate ncolors\n", - procName); - if (lightthresh > 255 || darkthresh >= lightthresh) - return ERROR_INT("invalid thresholds", procName, 1); - if (factor < 1) factor = 1; - - pixGetDimensions(pixs, &w, &h, NULL); - mincount = (l_int32)(minfract * w * h * factor * factor); - if ((na = pixGetGrayHistogram(pixs, factor)) == NULL) - return ERROR_INT("na not made", procName, 1); - ncolors = 2; /* add in black and white */ - for (i = darkthresh; i <= lightthresh; i++) { - numaGetIValue(na, i, &count); - if (count >= mincount) - ncolors++; - } - - *pncolors = ncolors; - numaDestroy(&na); - return 0; -} - - -/* ----------------------------------------------------------------------- * - * Identifies images where color quantization will cause posterization * - * due to the existence of many colors in low-gradient regions. * - * ----------------------------------------------------------------------- */ -/*! - * \brief pixColorsForQuantization() - * \param[in] pixs 8 bpp gray or 32 bpp rgb; with or without colormap - * \param[in] thresh binary threshold on edge gradient; 0 for default - * \param[out] pncolors the number of colors found - * \param[out] piscolor [optional] 1 if significant color is found; - * 0 otherwise. If pixs is 8 bpp, and does not have - * a colormap with color entries, this is 0 - * \param[in] debug 1 to output masked image that is tested for colors; - * 0 otherwise - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) This function finds a measure of the number of colors that are
- *          found in low-gradient regions of an image.  By its
- *          magnitude relative to some threshold (not specified in
- *          this function), it gives a good indication of whether
- *          quantization will generate posterization.   This number
- *          is larger for images with regions of slowly varying
- *          intensity (if 8 bpp) or color (if rgb). Such images, if
- *          quantized, may require dithering to avoid posterization,
- *          and lossless compression is then expected to be poor.
- *      (2) If pixs has a colormap, the number of colors returned is
- *          the number in the colormap.
- *      (3) It is recommended that document images be reduced to a width
- *          of 800 pixels before applying this function.  Then it can
- *          be expected that color detection will be fairly accurate
- *          and the number of colors will reflect both the content and
- *          the type of compression to be used.  For less than 15 colors,
- *          there is unlikely to be a halftone image, and lossless
- *          quantization should give both a good visual result and
- *          better compression.
- *      (4) When using the default threshold on the gradient (15),
- *          images (both gray and rgb) where ncolors is greater than
- *          about 15 will compress poorly with either lossless
- *          compression or dithered quantization, and they may be
- *          posterized with non-dithered quantization.
- *      (5) For grayscale images, or images without significant color,
- *          this returns the number of significant gray levels in
- *          the low-gradient regions.  The actual number of gray levels
- *          can be large due to jpeg compression noise in the background.
- *      (6) Similarly, for color images, the actual number of different
- *          (r,g,b) colors in the low-gradient regions (rather than the
- *          number of occupied level 4 octcubes) can be quite large, e.g.,
- *          due to jpeg compression noise, even for regions that appear
- *          to be of a single color.  By quantizing to level 4 octcubes,
- *          most of these superfluous colors are removed from the counting.
- *      (7) The image is tested for color.  If there is very little color,
- *          it is thresholded to gray and the number of gray levels in
- *          the low gradient regions is found.  If the image has color,
- *          the number of occupied level 4 octcubes is found.
- *      (8) The number of colors in the low-gradient regions increases
- *          monotonically with the threshold %thresh on the edge gradient.
- *      (9) Background: grayscale and color quantization is often useful
- *          to achieve highly compressed images with little visible
- *          distortion.  However, gray or color washes (regions of
- *          low gradient) can defeat this approach to high compression.
- *          How can one determine if an image is expected to compress
- *          well using gray or color quantization?  We use the fact that
- *            * gray washes, when quantized with less than 50 intensities,
- *              have posterization (visible boundaries between regions
- *              of uniform 'color') and poor lossless compression
- *            * color washes, when quantized with level 4 octcubes,
- *              typically result in both posterization and the occupancy
- *              of many level 4 octcubes.
- *          Images can have colors either intrinsically or as jpeg
- *          compression artifacts.  This function reduces but does not
- *          completely eliminate measurement of jpeg quantization noise
- *          in the white background of grayscale or color images.
- * 
- */ -l_ok -pixColorsForQuantization(PIX *pixs, - l_int32 thresh, - l_int32 *pncolors, - l_int32 *piscolor, - l_int32 debug) -{ -l_int32 w, h, d, minside, factor; -l_float32 pixfract, colorfract; -PIX *pixt, *pixsc, *pixg, *pixe, *pixb, *pixm; -PIXCMAP *cmap; - - PROCNAME("pixColorsForQuantization"); - - if (piscolor) *piscolor = 0; - if (!pncolors) - return ERROR_INT("&ncolors not defined", procName, 1); - *pncolors = 0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if ((cmap = pixGetColormap(pixs)) != NULL) { - *pncolors = pixcmapGetCount(cmap); - if (piscolor) - pixcmapHasColor(cmap, piscolor); - return 0; - } - - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 && d != 32) - return ERROR_INT("pixs not 8 or 32 bpp", procName, 1); - if (thresh <= 0) - thresh = 15; - - /* First test if 32 bpp has any significant color; if not, - * convert it to gray. Colors whose average values are within - * 20 of black or 8 of white are ignored because they're not - * very 'colorful'. If less than 2.5/10000 of the pixels have - * significant color, consider the image to be gray. */ - minside = L_MIN(w, h); - if (d == 8) { - pixt = pixClone(pixs); - } else { /* d == 32 */ - factor = L_MAX(1, minside / 400); - pixColorFraction(pixs, 20, 248, 30, factor, &pixfract, &colorfract); - if (pixfract * colorfract < 0.00025) { - pixt = pixGetRGBComponent(pixs, COLOR_RED); - d = 8; - } else { /* d == 32 */ - pixt = pixClone(pixs); - if (piscolor) - *piscolor = 1; - } - } - - /* If the smallest side is less than 1000, do not downscale. - * If it is in [1000 ... 2000), downscale by 2x. If it is >= 2000, - * downscale by 4x. Factors of 2 are chosen for speed. The - * actual resolution at which subsequent calculations take place - * is not strongly dependent on downscaling. */ - factor = L_MAX(1, minside / 500); - if (factor == 1) - pixsc = pixCopy(NULL, pixt); /* to be sure pixs is unchanged */ - else if (factor == 2 || factor == 3) - pixsc = pixScaleAreaMap2(pixt); - else - pixsc = pixScaleAreaMap(pixt, 0.25, 0.25); - - /* Basic edge mask generation procedure: - * ~ work on a grayscale image - * ~ get a 1 bpp edge mask by using an edge filter and - * thresholding to get fg pixels at the edges - * ~ for gray, dilate with a 3x3 brick Sel to get mask over - * all pixels within a distance of 1 pixel from the nearest - * edge pixel - * ~ for color, dilate with a 7x7 brick Sel to get mask over - * all pixels within a distance of 3 pixels from the nearest - * edge pixel */ - if (d == 8) - pixg = pixClone(pixsc); - else /* d == 32 */ - pixg = pixConvertRGBToLuminance(pixsc); - pixe = pixSobelEdgeFilter(pixg, L_ALL_EDGES); - pixb = pixThresholdToBinary(pixe, thresh); - pixInvert(pixb, pixb); - if (d == 8) - pixm = pixMorphSequence(pixb, "d3.3", 0); - else - pixm = pixMorphSequence(pixb, "d7.7", 0); - - /* Mask the near-edge pixels to white, and count the colors. - * If grayscale, don't count colors within 20 levels of - * black or white, and only count colors with a fraction - * of at least 1/10000 of the image pixels. - * If color, count the number of level 4 octcubes that - * contain at least 20 pixels. These magic numbers are guesses - * as to what might work, based on a small data set. Results - * should not be overly sensitive to their actual values. */ - if (d == 8) { - pixSetMasked(pixg, pixm, 0xff); - if (debug) pixWrite("junkpix8.png", pixg, IFF_PNG); - pixNumSignificantGrayColors(pixg, 20, 236, 0.0001, 1, pncolors); - } else { /* d == 32 */ - pixSetMasked(pixsc, pixm, 0xffffffff); - if (debug) pixWrite("junkpix32.png", pixsc, IFF_PNG); - pixNumberOccupiedOctcubes(pixsc, 4, 20, -1, pncolors); - } - - pixDestroy(&pixt); - pixDestroy(&pixsc); - pixDestroy(&pixg); - pixDestroy(&pixe); - pixDestroy(&pixb); - pixDestroy(&pixm); - return 0; -} - - -/* ----------------------------------------------------------------------- * - * Finds the number of unique colors in an image * - * ----------------------------------------------------------------------- */ -/*! - * \brief pixNumColors() - * \param[in] pixs 2, 4, 8, 32 bpp - * \param[in] factor subsampling factor; integer - * \param[out] pncolors the number of colors found, or 0 if - * there are more than 256 - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) This returns the number of colors found in the image,
- *          even if there is a colormap.  If %factor == 1 and the
- *          number of colors differs from the number of entries
- *          in the colormap, a warning is issued.
- *      (2) Use %factor == 1 to find the actual number of colors.
- *          Use %factor > 1 to more efficiently find an approximate
- *          number of colors.
- *      (3) For d = 2, 4 or 8 bpp grayscale, this returns the number
- *          of colors found in the image in 'ncolors'.
- *      (4) For d = 32 bpp (rgb), if the number of colors is greater
- *          than 256, this uses an ordered set.
- * 
- */ -l_ok -pixNumColors(PIX *pixs, - l_int32 factor, - l_int32 *pncolors) -{ -l_int32 w, h, d, i, j, wpl, hashsize, sum, count, manycolors; -l_int32 rval, gval, bval, val; -l_int32 *inta; -l_uint32 pixel; -l_uint32 *data, *line; -PIXCMAP *cmap; - - PROCNAME("pixNumColors"); - - if (!pncolors) - return ERROR_INT("&ncolors not defined", procName, 1); - *pncolors = 0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 2 && d != 4 && d != 8 && d != 32) - return ERROR_INT("d not in {2, 4, 8, 32}", procName, 1); - if (factor < 1) factor = 1; - - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - sum = 0; - if (d != 32) { /* grayscale */ - inta = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - for (i = 0; i < h; i += factor) { - line = data + i * wpl; - for (j = 0; j < w; j += factor) { - if (d == 8) - val = GET_DATA_BYTE(line, j); - else if (d == 4) - val = GET_DATA_QBIT(line, j); - else /* d == 2 */ - val = GET_DATA_DIBIT(line, j); - inta[val] = 1; - } - } - for (i = 0; i < 256; i++) - if (inta[i]) sum++; - *pncolors = sum; - LEPT_FREE(inta); - - cmap = pixGetColormap(pixs); - if (cmap && factor == 1) { - count = pixcmapGetCount(cmap); - if (sum != count) - L_WARNING("colormap size %d differs from actual colors\n", - procName, count); - } - return 0; - } - - /* 32 bpp rgb; quit if we get above 256 colors */ - hashsize = 5507; /* big and prime; collisions are not likely */ - inta = (l_int32 *)LEPT_CALLOC(hashsize, sizeof(l_int32)); - manycolors = 0; - for (i = 0; i < h && manycolors == 0; i += factor) { - line = data + i * wpl; - for (j = 0; j < w; j += factor) { - pixel = line[j]; - extractRGBValues(pixel, &rval, &gval, &bval); - val = (137 * rval + 269 * gval + 353 * bval) % hashsize; - if (inta[val] == 0) { - inta[val] = 1; - sum++; - if (sum > 256) { - manycolors = 1; - break; - } - } - } - } - LEPT_FREE(inta); - - if (manycolors == 0) { - *pncolors = sum; - return 0; - } - - /* More than 256 colors in RGB image */ - return pixCountRGBColors(pixs, factor, pncolors); -} - - -/* ----------------------------------------------------------------------- * - * Lossless conversion of RGB image to colormapped * - * ----------------------------------------------------------------------- */ -/*! - * \brief pixConvertRGBToCmap() - * \param[in] pixs 32 bpp RGB - * \return pixd if num colors <= 256; null otherwise or on error - * - *
- * Notes:
- *      (1) If there are not more than 256 colors, this losslessly
- *          converts and RGB image to a colormapped one, with the
- *          smallest pixel depth required to hold all the colors.
- * 
- */ -PIX * -pixConvertRGBToCmap(PIX *pixs) -{ -l_int32 w, h, d, i, j, wpls, wpld, hashsize, hashval, ncolors, index; -l_int32 rval, gval, bval, val; -l_int32 *hasha1, *hasha2; -l_uint32 pixel; -l_uint32 *datas, *lines, *datad, *lined; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixConvertRGBToCmap"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - pixNumColors(pixs, 1, &ncolors); - if (ncolors > 256) { - L_ERROR("too many colors found: %d\n", procName, ncolors); - return NULL; - } - - pixGetDimensions(pixs, &w, &h, NULL); - if (ncolors <= 2) - d = 1; - else if (ncolors <= 4) - d = 2; - else if (ncolors <= 16) - d = 4; - else /* ncolors <= 256 */ - d = 8; - - if ((pixd = pixCreate(w, h, d)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - cmap = pixcmapCreate(d); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* hasha1 is a 1/0 indicator array for colors seen. - hasha2 holds the index into the colormap that will be - generated from the colors in the order seen. This is - the value inserted into pixd. */ - hashsize = 5507; /* big and prime; collisions are not likely */ - hasha1 = (l_int32 *)LEPT_CALLOC(hashsize, sizeof(l_int32)); - hasha2 = (l_int32 *)LEPT_CALLOC(hashsize, sizeof(l_int32)); - index = -1; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - pixel = lines[j]; - extractRGBValues(pixel, &rval, &gval, &bval); - hashval = (137 * rval + 269 * gval + 353 * bval) % hashsize; - if (hasha1[hashval] == 0) { /* new color */ - hasha1[hashval] = 1; - index++; - hasha2[hashval] = index; - pixcmapAddColor(cmap, rval, gval, bval); - } - val = hasha2[hashval]; - setLineDataVal(lined, j, d, val); - } - } - pixSetColormap(pixd, cmap); - - LEPT_FREE(hasha1); - LEPT_FREE(hasha2); - return pixd; -} - - -/* ----------------------------------------------------------------------- * - * Find the most "populated" colors in the image (and quantize) * - * ----------------------------------------------------------------------- */ -/*! - * \brief pixGetMostPopulatedColors() - * \param[in] pixs 32 bpp rgb - * \param[in] sigbits 2-6, significant bits retained in the quantizer - * for each component of the input image - * \param[in] factor subsampling factor; use 1 for no subsampling - * \param[in] ncolors the number of most populated colors to select - * \param[out] parray [optional] array of colors, each as 0xrrggbb00 - * \param[out] pcmap [optional] colormap of the colors - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This finds the %ncolors most populated cubes in rgb colorspace,
- *          where the cube size depends on %sigbits as
- *               cube side = (256 >> sigbits)
- *      (2) The rgb color components are found at the center of the cube.
- *      (3) The output array of colors can be displayed using
- *               pixDisplayColorArray(array, ncolors, ...);
- * 
- */ -l_ok -pixGetMostPopulatedColors(PIX *pixs, - l_int32 sigbits, - l_int32 factor, - l_int32 ncolors, - l_uint32 **parray, - PIXCMAP **pcmap) -{ -l_int32 n, i, rgbindex, rval, gval, bval; -NUMA *nahisto, *naindex; - - PROCNAME("pixGetMostPopulatedColors"); - - if (!parray && !pcmap) - return ERROR_INT("no return val requested", procName, 1); - if (parray) *parray = NULL; - if (pcmap) *pcmap = NULL; - if (!pixs || pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not defined", procName, 1); - if (sigbits < 2 || sigbits > 6) - return ERROR_INT("sigbits not in [2 ... 6]", procName, 1); - if (factor < 1 || ncolors < 1) - return ERROR_INT("factor < 1 or ncolors < 1", procName, 1); - - if ((nahisto = pixGetRGBHistogram(pixs, sigbits, factor)) == NULL) - return ERROR_INT("nahisto not made", procName, 1); - - /* naindex contains the index into nahisto, which is the rgbindex */ - naindex = numaSortIndexAutoSelect(nahisto, L_SORT_DECREASING); - numaDestroy(&nahisto); - if (!naindex) - return ERROR_INT("naindex not made", procName, 1); - - n = numaGetCount(naindex); - ncolors = L_MIN(n, ncolors); - if (parray) *parray = (l_uint32 *)LEPT_CALLOC(ncolors, sizeof(l_uint32)); - if (pcmap) *pcmap = pixcmapCreate(8); - for (i = 0; i < ncolors; i++) { - numaGetIValue(naindex, i, &rgbindex); /* rgb index */ - getRGBFromIndex(rgbindex, sigbits, &rval, &gval, &bval); - if (parray) composeRGBPixel(rval, gval, bval, *parray + i); - if (pcmap) pixcmapAddColor(*pcmap, rval, gval, bval); - } - - numaDestroy(&naindex); - return 0; -} - - -/*! - * \brief pixSimpleColorQuantize() - * \param[in] pixs 32 bpp rgb - * \param[in] sigbits 2-4, significant bits retained in the quantizer - * for each component of the input image - * \param[in] factor subsampling factor; use 1 for no subsampling - * \param[in] ncolors the number of most populated colors to select - * \return pixd 8 bpp cmapped or NULL on error - * - *
- * Notes:
- *      (1) If you want to do color quantization for real, use octcube
- *          or modified median cut.  This function shows that it is
- *          easy to make a simple quantizer based solely on the population
- *          in cells of a given size in rgb color space.
- *      (2) The %ncolors most populated cells at the %sigbits level form
- *          the colormap for quantizing, and this uses octcube indexing
- *          under the covers to assign each pixel to the nearest color.
- *      (3) %sigbits is restricted to 2, 3 and 4.  At the low end, the
- *          color discrimination is very crude; at the upper end, a set of
- *          similar colors can dominate the result.  Interesting results
- *          are generally found for %sigbits = 3 and ncolors ~ 20.
- *      (4) See also pixColorSegment() for a method of quantizing the
- *          colors to generate regions of similar color.
- *      (5) See also pixConvertRGBToCmap() to losslessly convert an
- *          RGB image with not more than 256 colors.
- * 
- */ -PIX * -pixSimpleColorQuantize(PIX *pixs, - l_int32 sigbits, - l_int32 factor, - l_int32 ncolors) -{ -l_int32 w, h; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixSimpleColorQuantize"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (sigbits < 2 || sigbits > 4) - return (PIX *)ERROR_PTR("sigbits not in {2,3,4}", procName, NULL); - - pixGetMostPopulatedColors(pixs, sigbits, factor, ncolors, NULL, &cmap); - pixGetDimensions(pixs, &w, &h, NULL); - pixd = pixCreate(w, h, 8); - pixSetColormap(pixd, cmap); - pixAssignToNearestColor(pixd, pixs, NULL, 4, NULL); - return pixd; -} - - -/* ----------------------------------------------------------------------- * - * Constructs a color histogram based on rgb indices * - * ----------------------------------------------------------------------- */ -/*! - * \brief pixGetRGBHistogram() - * \param[in] pixs 32 bpp rgb - * \param[in] sigbits 2-6, significant bits retained in the quantizer - * for each component of the input image - * \param[in] factor subsampling factor; use 1 for no subsampling - * \return numa histogram of colors, indexed by RGB - * components, or NULL on error - * - *
- * Notes:
- *      (1) This uses a simple, fast method of indexing into an rgb image.
- *      (2) The output is a 1D histogram of count vs. rgb-index, which
- *          uses red sigbits as the most significant and blue as the least.
- *      (3) This function produces the same result as pixMedianCutHisto().
- * 
- */ -NUMA * -pixGetRGBHistogram(PIX *pixs, - l_int32 sigbits, - l_int32 factor) -{ -l_int32 w, h, i, j, size, wpl, rval, gval, bval, npts; -l_uint32 val32, rgbindex; -l_float32 *array; -l_uint32 *data, *line, *rtab, *gtab, *btab; -NUMA *na; - - PROCNAME("pixGetRGBHistogram"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); - if (sigbits < 2 || sigbits > 6) - return (NUMA *)ERROR_PTR("sigbits not in [2 ... 6]", procName, NULL); - if (factor < 1) - return (NUMA *)ERROR_PTR("factor < 1", procName, NULL); - - /* Get histogram size: 2^(3 * sigbits) */ - size = 1 << (3 * sigbits); /* 64, 512, 4096, 32768, 262144 */ - na = numaMakeConstant(0, size); /* init to all 0 */ - array = numaGetFArray(na, L_NOCOPY); - - makeRGBIndexTables(&rtab, >ab, &btab, sigbits); - - /* Check the number of sampled pixels */ - pixGetDimensions(pixs, &w, &h, NULL); - npts = ((w + factor - 1) / factor) * ((h + factor - 1) / factor); - if (npts < 1000) - L_WARNING("only sampling %d pixels\n", procName, npts); - wpl = pixGetWpl(pixs); - data = pixGetData(pixs); - for (i = 0; i < h; i += factor) { - line = data + i * wpl; - for (j = 0; j < w; j += factor) { - val32 = *(line + j); - extractRGBValues(val32, &rval, &gval, &bval); - rgbindex = rtab[rval] | gtab[gval] | btab[bval]; - array[rgbindex]++; - } - } - - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - return na; -} - - -/*! - * \brief makeRGBIndexTables() - * - * \param[out] prtab, pgtab, pbtab 256-entry rgb index tables - * \param[in] sigbits 2-6, significant bits retained in the quantizer - * for each component of the input image - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) These tables are used to map from rgb sample values to
- *          an rgb index, using
- *             rgbindex = rtab[rval] | gtab[gval] | btab[bval]
- *          where, e.g., if sigbits = 3, the index is a 9 bit integer:
- *             r7 r6 r5 g7 g6 g5 b7 b6 b5
- * 
- */ -l_ok -makeRGBIndexTables(l_uint32 **prtab, - l_uint32 **pgtab, - l_uint32 **pbtab, - l_int32 sigbits) -{ -l_int32 i; -l_uint32 *rtab, *gtab, *btab; - - PROCNAME("makeRGBIndexTables"); - - if (prtab) *prtab = NULL; - if (pgtab) *pgtab = NULL; - if (pbtab) *pbtab = NULL; - if (!prtab || !pgtab || !pbtab) - return ERROR_INT("not all table ptrs defined", procName, 1); - if (sigbits < 2 || sigbits > 6) - return ERROR_INT("sigbits not in [2 ... 6]", procName, 1); - - rtab = (l_uint32 *)LEPT_CALLOC(256, sizeof(l_uint32)); - gtab = (l_uint32 *)LEPT_CALLOC(256, sizeof(l_uint32)); - btab = (l_uint32 *)LEPT_CALLOC(256, sizeof(l_uint32)); - if (!rtab || !gtab || !btab) - return ERROR_INT("calloc fail for tab", procName, 1); - *prtab = rtab; - *pgtab = gtab; - *pbtab = btab; - switch (sigbits) { - case 2: - for (i = 0; i < 256; i++) { - rtab[i] = (i & 0xc0) >> 2; - gtab[i] = (i & 0xc0) >> 4; - btab[i] = (i & 0xc0) >> 6; - } - break; - case 3: - for (i = 0; i < 256; i++) { - rtab[i] = (i & 0xe0) << 1; - gtab[i] = (i & 0xe0) >> 2; - btab[i] = (i & 0xe0) >> 5; - } - break; - case 4: - for (i = 0; i < 256; i++) { - rtab[i] = (i & 0xf0) << 4; - gtab[i] = (i & 0xf0); - btab[i] = (i & 0xf0) >> 4; - } - break; - case 5: - for (i = 0; i < 256; i++) { - rtab[i] = (i & 0xf8) << 7; - gtab[i] = (i & 0xf8) << 2; - btab[i] = (i & 0xf8) >> 3; - } - break; - case 6: - for (i = 0; i < 256; i++) { - rtab[i] = (i & 0xfc) << 10; - gtab[i] = (i & 0xfc) << 4; - btab[i] = (i & 0xfc) >> 2; - } - break; - default: - L_ERROR("Illegal sigbits = %d\n", procName, sigbits); - return ERROR_INT("sigbits not in [2 ... 6]", procName, 1); - } - - return 0; -} - - -/*! - * \brief getRGBFromIndex() - * - * \param[in] index rgbindex - * \param[in] sigbits 2-6, significant bits retained in the quantizer - * for each component of the input image - * \param[out] prval, pgval, pbval rgb values - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The %index is expressed in bits, based on the the
- *          %sigbits of the r, g and b components, as
- *             r7 r6 ... g7 g6 ... b7 b6 ...
- *      (2) The computed rgb values are in the center of the quantized cube.
- *          The extra bit that is OR'd accomplishes this.
- * 
- */ -l_ok -getRGBFromIndex(l_uint32 index, - l_int32 sigbits, - l_int32 *prval, - l_int32 *pgval, - l_int32 *pbval) -{ - PROCNAME("getRGBFromIndex"); - - if (prval) *prval = 0; - if (pgval) *pgval = 0; - if (pbval) *pbval = 0; - if (!prval || !pgval || !pbval) - return ERROR_INT("not all component ptrs defined", procName, 1); - if (sigbits < 2 || sigbits > 6) - return ERROR_INT("sigbits not in [2 ... 6]", procName, 1); - - switch (sigbits) { - case 2: - *prval = ((index << 2) & 0xc0) | 0x20; - *pgval = ((index << 4) & 0xc0) | 0x20; - *pbval = ((index << 6) & 0xc0) | 0x20; - break; - case 3: - *prval = ((index >> 1) & 0xe0) | 0x10; - *pgval = ((index << 2) & 0xe0) | 0x10; - *pbval = ((index << 5) & 0xe0) | 0x10; - break; - case 4: - *prval = ((index >> 4) & 0xf0) | 0x08; - *pgval = (index & 0xf0) | 0x08; - *pbval = ((index << 4) & 0xf0) | 0x08; - break; - case 5: - *prval = ((index >> 7) & 0xf8) | 0x04; - *pgval = ((index >> 2) & 0xf8) | 0x04; - *pbval = ((index << 3) & 0xf8) | 0x04; - break; - case 6: - *prval = ((index >> 10) & 0xfc) | 0x02; - *pgval = ((index >> 4) & 0xfc) | 0x02; - *pbval = ((index << 2) & 0xfc) | 0x02; - break; - default: - L_ERROR("Illegal sigbits = %d\n", procName, sigbits); - return ERROR_INT("sigbits not in [2 ... 6]", procName, 1); - } - - return 0; -} - - -/* ----------------------------------------------------------------------- * - * Identify images that have highlight (red) color * - * ----------------------------------------------------------------------- */ -/*! - * \brief pixHasHighlightRed() - * - * \param[in] pixs 32 bpp rgb - * \param[in] factor subsampling; an integer >= 1; use 1 for all pixels - * \param[in] fract threshold fraction of all image pixels - * \param[in] fthresh threshold on a function of the components; typ. ~2.5 - * \param[out] phasred 1 if red pixels are above threshold - * \param[out] pratio [optional] normalized fraction of threshold - * red pixels that is actually observed - * \param[out] ppixdb [optional] seed pixel mask - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Pixels are identified as red if they satisfy two conditions:
- *          (a) The components satisfy (R-B)/B > %fthresh   (red or dark fg)
- *          (b) The red component satisfied R > 128  (red or light bg)
- *          Masks are generated for (a) and (b), and the intersection
- *          gives the pixels that are red but not either light bg or
- *          dark fg.
- *      (2) A typical value for fract = 0.0001, which gives sensitivity
- *          to an image where a small fraction of the pixels are printed
- *          in red.
- *      (3) A typical value for fthresh = 2.5.  Higher values give less
- *          sensitivity to red, and fewer false positives.
- * 
- */ -l_ok -pixHasHighlightRed(PIX *pixs, - l_int32 factor, - l_float32 fract, - l_float32 fthresh, - l_int32 *phasred, - l_float32 *pratio, - PIX **ppixdb) -{ -l_int32 w, h, count; -l_float32 ratio; -PIX *pix1, *pix2, *pix3, *pix4; -FPIX *fpix; - - PROCNAME("pixHasHighlightRed"); - - if (pratio) *pratio = 0.0; - if (ppixdb) *ppixdb = NULL; - if (phasred) *phasred = 0; - if (!pratio && !ppixdb) - return ERROR_INT("no return val requested", procName, 1); - if (!phasred) - return ERROR_INT("&hasred not defined", procName, 1); - if (!pixs || pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not defined or not 32 bpp", procName, 1); - if (fthresh < 1.5 || fthresh > 3.5) - L_WARNING("fthresh = %f is out of normal bounds\n", procName, fthresh); - - if (factor > 1) - pix1 = pixScaleByIntSampling(pixs, factor); - else - pix1 = pixClone(pixs); - - /* Identify pixels that are either red or dark foreground */ - fpix = pixComponentFunction(pix1, 1.0, 0.0, -1.0, 0.0, 0.0, 1.0); - pix2 = fpixThresholdToPix(fpix, fthresh); - pixInvert(pix2, pix2); - - /* Identify pixels that are either red or light background */ - pix3 = pixGetRGBComponent(pix1, COLOR_RED); - pix4 = pixThresholdToBinary(pix3, 130); - pixInvert(pix4, pix4); - - pixAnd(pix4, pix4, pix2); - pixCountPixels(pix4, &count, NULL); - pixGetDimensions(pix4, &w, &h, NULL); - L_INFO("count = %d, thresh = %d\n", procName, count, - (l_int32)(fract * w * h)); - ratio = (l_float32)count / (fract * w * h); - if (pratio) *pratio = ratio; - if (ratio >= 1.0) - *phasred = 1; - if (ppixdb) - *ppixdb = pix4; - else - pixDestroy(&pix4); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - fpixDestroy(&fpix); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/coloring.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/coloring.c deleted file mode 100644 index 7624d3d5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/coloring.c +++ /dev/null @@ -1,1049 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file coloring.c - *
- *
- *      Coloring "gray" pixels
- *           PIX             *pixColorGrayRegions()
- *           l_int32          pixColorGray()
- *           PIX             *pixColorGrayMasked()
- *
- *      Adjusting one or more colors to a target color
- *           PIX             *pixSnapColor()
- *           PIX             *pixSnapColorCmap()
- *
- *      Piecewise linear color mapping based on a source/target pair
- *           PIX             *pixLinearMapToTargetColor()
- *           l_int32          pixelLinearMapToTargetColor()
- *
- *      Fractional shift of RGB towards black or white
- *           PIX             *pixShiftByComponent()
- *           l_int32          pixelShiftByComponent()
- *           l_int32          pixelFractionalShift()
- *
- *  There are several "coloring" functions in leptonica.
- *  You can find them in these files:
- *       coloring.c
- *       paintcmap.c
- *       pix2.c
- *       blend.c
- *       enhance.c
- *
- *  They fall into the following categories:
- *
- *  (1) Moving either the light or dark pixels toward a
- *      specified color. (pixColorGray, pixColorGrayMasked)
- *  (2) Forcing all pixels whose color is within some delta of a
- *      specified color to move to that color. (pixSnapColor)
- *  (3) Doing a piecewise linear color shift specified by a source
- *      and a target color.  Each component shifts independently.
- *      (pixLinearMapToTargetColor)
- *  (4) Shifting all colors by a given fraction of their distance
- *      from 0 (if shifting down) or from 255 (if shifting up).
- *      This is useful for colorizing either the background or
- *      the foreground of a grayscale image. (pixShiftByComponent)
- *  (5) Shifting all colors by a component-dependent fraction of
- *      their distance from 0 (if shifting down) or from 255 (if
- *      shifting up).  This is useful for modifying the color to
- *      compensate for color shifts in acquisition or printing.
- *      (enhance.c: pixColorShiftRGB, pixMosaicColorShiftRGB).
- *  (6) Repainting selected pixels. (paintcmap.c: pixSetSelectMaskedCmap)
- *  (7) Blending a fraction of a specific color with the existing RGB
- *      color.  (pix2.c: pixBlendInRect())
- *  (8) Changing selected colors in a colormap.
- *      (paintcmap.c: pixSetSelectCmap, pixSetSelectMaskedCmap)
- *  (9) Shifting all the pixels towards black or white depending on
- *      the gray value of a second image.  (blend.c: pixFadeWithGray)
- *  (10) Changing the hue, saturation or brightness, by changing the
- *      appropriate parameter in HSV color space by a fraction of
- *      the distance toward its end-point.  For example, you can change
- *      the brightness by moving each pixel's v-parameter a specified
- *      fraction of the distance toward 0 (darkening) or toward 255
- *      (brightening).  (enhance.c: pixModifySaturation,
- *      pixModifyHue, pixModifyBrightness)
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/*---------------------------------------------------------------------* - * Coloring "gray" pixels * - *---------------------------------------------------------------------*/ -/*! - * \brief pixColorGrayRegions() - * - * \param[in] pixs 2, 4 or 8 bpp gray, rgb, or colormapped - * \param[in] boxa of regions in which to apply color - * \param[in] type L_PAINT_LIGHT, L_PAINT_DARK - * \param[in] thresh average value below/above which pixel is unchanged - * \param[in] rval, gval, bval new color to paint - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This generates a new image, where some of the pixels in each
- *          box in the boxa are colorized.  See pixColorGray() for usage
- *          with %type and %thresh.  Note that %thresh is only used for
- *          rgb; it is ignored for colormapped images.
- *      (2) If the input image is colormapped, the new image will be 8 bpp
- *          colormapped if possible; otherwise, it will be converted
- *          to 32 bpp rgb.  Only pixels that are strictly gray will be
- *          colorized.
- *      (3) If the input image is not colormapped, it is converted to rgb.
- *          A "gray" value for a pixel is determined by averaging the
- *          components, and the output rgb value is determined from this.
- *      (4) This can be used in conjunction with pixHasHighlightRed() to
- *          add highlight color to a grayscale image.
- * 
- */ -PIX * -pixColorGrayRegions(PIX *pixs, - BOXA *boxa, - l_int32 type, - l_int32 thresh, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -l_int32 i, n, ncolors, ngray; -BOX *box; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixColorGrayRegions"); - - if (!pixs || pixGetDepth(pixs) == 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (!boxa) - return (PIX *)ERROR_PTR("boxa not defined", procName, NULL); - if (type != L_PAINT_LIGHT && type != L_PAINT_DARK) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - - /* If cmapped and there is room in an 8 bpp colormap for - * expansion, convert pixs to 8 bpp, and colorize. */ - cmap = pixGetColormap(pixs); - if (cmap) { - ncolors = pixcmapGetCount(cmap); - pixcmapCountGrayColors(cmap, &ngray); - if (ncolors + ngray < 255) { - pixd = pixConvertTo8(pixs, 1); /* always new image */ - pixColorGrayRegionsCmap(pixd, boxa, type, rval, gval, bval); - return pixd; - } - } - - /* The output will be rgb. Make sure the thresholds are valid */ - if (type == L_PAINT_LIGHT) { /* thresh should be low */ - if (thresh >= 255) - return (PIX *)ERROR_PTR("thresh must be < 255", procName, NULL); - if (thresh > 127) - L_WARNING("threshold set very high\n", procName); - } else { /* type == L_PAINT_DARK; thresh should be high */ - if (thresh <= 0) - return (PIX *)ERROR_PTR("thresh must be > 0", procName, NULL); - if (thresh < 128) - L_WARNING("threshold set very low\n", procName); - } - - pixd = pixConvertTo32(pixs); /* always new image */ - n = boxaGetCount(boxa); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - pixColorGray(pixd, box, type, thresh, rval, gval, bval); - boxDestroy(&box); - } - - return pixd; -} - - -/*! - * \brief pixColorGray() - * - * \param[in] pixs 8 bpp gray, rgb or colormapped image - * \param[in] box [optional] region in which to apply color; can be NULL - * \param[in] type L_PAINT_LIGHT, L_PAINT_DARK - * \param[in] thresh average value below/above which pixel is unchanged - * \param[in] rval, gval, bval new color to paint - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This is an in-place operation; pixs is modified.
- *          If pixs is colormapped, the operation will add colors to the
- *          colormap.  Otherwise, pixs will be converted to 32 bpp rgb if
- *          it is initially 8 bpp gray.
- *      (2) If type == L_PAINT_LIGHT, it colorizes non-black pixels,
- *          preserving antialiasing.
- *          If type == L_PAINT_DARK, it colorizes non-white pixels,
- *          preserving antialiasing.
- *      (3) If box is NULL, applies function to the entire image; otherwise,
- *          clips the operation to the intersection of the box and pix.
- *      (4) If colormapped, calls pixColorGrayCmap(), which applies the
- *          coloring algorithm only to pixels that are strictly gray.
- *      (5) For RGB, determines a "gray" value by averaging; then uses this
- *          value, plus the input rgb target, to generate the output
- *          pixel values.
- *      (6) thresh is only used for rgb; it is ignored for colormapped pix.
- *          If type == L_PAINT_LIGHT, use thresh = 0 if all pixels are to
- *          be colored (black pixels will be unaltered).
- *          In situations where there are a lot of black pixels,
- *          setting thresh > 0 will make the function considerably
- *          more efficient without affecting the final result.
- *          If type == L_PAINT_DARK, use thresh = 255 if all pixels
- *          are to be colored (white pixels will be unaltered).
- *          In situations where there are a lot of white pixels,
- *          setting thresh < 255 will make the function considerably
- *          more efficient without affecting the final result.
- * 
- */ -l_ok -pixColorGray(PIX *pixs, - BOX *box, - l_int32 type, - l_int32 thresh, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -l_int32 i, j, w, h, d, wpl, x1, x2, y1, y2, bw, bh; -l_int32 nrval, ngval, nbval, aveval; -l_float32 factor; -l_uint32 val32; -l_uint32 *line, *data; -PIX *pixt; -PIXCMAP *cmap; - - PROCNAME("pixColorGray"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (type != L_PAINT_LIGHT && type != L_PAINT_DARK) - return ERROR_INT("invalid type", procName, 1); - - cmap = pixGetColormap(pixs); - pixGetDimensions(pixs, &w, &h, &d); - if (!cmap && d != 8 && d != 32) - return ERROR_INT("pixs not cmapped, 8 bpp or rgb", procName, 1); - if (cmap) - return pixColorGrayCmap(pixs, box, type, rval, gval, bval); - - /* rgb or 8 bpp gray image; check the thresh */ - if (type == L_PAINT_LIGHT) { /* thresh should be low */ - if (thresh >= 255) - return ERROR_INT("thresh must be < 255; else this is a no-op", - procName, 1); - if (thresh > 127) - L_WARNING("threshold set very high\n", procName); - } else { /* type == L_PAINT_DARK; thresh should be high */ - if (thresh <= 0) - return ERROR_INT("thresh must be > 0; else this is a no-op", - procName, 1); - if (thresh < 128) - L_WARNING("threshold set very low\n", procName); - } - - /* In-place conversion to 32 bpp if necessary */ - if (d == 8) { - pixt = pixConvertTo32(pixs); - pixTransferAllData(pixs, &pixt, 1, 0); - } - - if (!box) { - x1 = y1 = 0; - x2 = w; - y2 = h; - } else { - boxGetGeometry(box, &x1, &y1, &bw, &bh); - x2 = x1 + bw - 1; - y2 = y1 + bh - 1; - } - - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - factor = 1. / 255.; - for (i = y1; i <= y2; i++) { - if (i < 0 || i >= h) - continue; - line = data + i * wpl; - for (j = x1; j <= x2; j++) { - if (j < 0 || j >= w) - continue; - val32 = *(line + j); - aveval = ((val32 >> 24) + ((val32 >> 16) & 0xff) + - ((val32 >> 8) & 0xff)) / 3; - if (type == L_PAINT_LIGHT) { - if (aveval < thresh) /* skip sufficiently dark pixels */ - continue; - nrval = (l_int32)(rval * aveval * factor); - ngval = (l_int32)(gval * aveval * factor); - nbval = (l_int32)(bval * aveval * factor); - } else { /* type == L_PAINT_DARK */ - if (aveval > thresh) /* skip sufficiently light pixels */ - continue; - nrval = rval + (l_int32)((255. - rval) * aveval * factor); - ngval = gval + (l_int32)((255. - gval) * aveval * factor); - nbval = bval + (l_int32)((255. - bval) * aveval * factor); - } - composeRGBPixel(nrval, ngval, nbval, &val32); - *(line + j) = val32; - } - } - - return 0; -} - - -/*! - * \brief pixColorGrayMasked() - * - * \param[in] pixs 8 bpp gray, rgb or colormapped image - * \param[in] pixm 1 bpp mask, through which to apply color - * \param[in] type L_PAINT_LIGHT, L_PAINT_DARK - * \param[in] thresh average value below/above which pixel is unchanged - * \param[in] rval, gval, bval new color to paint - * \return pixd colorized, or NULL on error - * - *
- * Notes:
- *      (1) This generates a new image, where some of the pixels under
- *          FG in the mask are colorized.
- *      (2) See pixColorGray() for usage with %type and %thresh.  Note
- *          that %thresh is only used for rgb; it is ignored for
- *          colormapped images.  In most cases, the mask will be over
- *          the darker parts and %type == L_PAINT_DARK.
- *      (3) If pixs is colormapped this calls pixColorMaskedCmap(),
- *          which adds colors to the colormap for pixd; it only adds
- *          colors corresponding to strictly gray colors in the colormap.
- *          Otherwise, if pixs is 8 bpp gray, pixd will be 32 bpp rgb.
- *      (4) If pixs is 32 bpp rgb, for each pixel a "gray" value is
- *          found by averaging.  This average is then used with the
- *          input rgb target to generate the output pixel values.
- *      (5) This can be used in conjunction with pixHasHighlightRed() to
- *          add highlight color to a grayscale image.
- * 
- */ -PIX * -pixColorGrayMasked(PIX *pixs, - PIX *pixm, - l_int32 type, - l_int32 thresh, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -l_int32 i, j, w, h, d, wm, hm, wmin, hmin, wpl, wplm; -l_int32 nrval, ngval, nbval, aveval; -l_float32 factor; -l_uint32 val32; -l_uint32 *line, *data, *linem, *datam; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixColorGrayMasked"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!pixm || pixGetDepth(pixm) != 1) - return (PIX *)ERROR_PTR("pixm undefined or not 1 bpp", procName, NULL); - if (type != L_PAINT_LIGHT && type != L_PAINT_DARK) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - - cmap = pixGetColormap(pixs); - pixGetDimensions(pixs, &w, &h, &d); - if (!cmap && d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not cmapped, 8 bpp gray or 32 bpp", - procName, NULL); - if (cmap) { - pixd = pixCopy(NULL, pixs); - pixColorGrayMaskedCmap(pixd, pixm, type, rval, gval, bval); - return pixd; - } - - /* rgb or 8 bpp gray image; check the thresh */ - if (type == L_PAINT_LIGHT) { /* thresh should be low */ - if (thresh >= 255) - return (PIX *)ERROR_PTR( - "thresh must be < 255; else this is a no-op", procName, NULL); - if (thresh > 127) - L_WARNING("threshold set very high\n", procName); - } else { /* type == L_PAINT_DARK; thresh should be high */ - if (thresh <= 0) - return (PIX *)ERROR_PTR( - "thresh must be > 0; else this is a no-op", procName, NULL); - if (thresh < 128) - L_WARNING("threshold set very low\n", procName); - } - - pixGetDimensions(pixm, &wm, &hm, NULL); - if (wm != w) - L_WARNING("wm = %d differs from w = %d\n", procName, wm, w); - if (hm != h) - L_WARNING("hm = %d differs from h = %d\n", procName, hm, h); - wmin = L_MIN(w, wm); - hmin = L_MIN(h, hm); - if (d == 8) - pixd = pixConvertTo32(pixs); - else - pixd = pixCopy(NULL, pixs); - - data = pixGetData(pixd); - wpl = pixGetWpl(pixd); - datam = pixGetData(pixm); - wplm = pixGetWpl(pixm); - factor = 1. / 255.; - for (i = 0; i < hmin; i++) { - line = data + i * wpl; - linem = datam + i * wplm; - for (j = 0; j < wmin; j++) { - if (GET_DATA_BIT(linem, j) == 0) - continue; - val32 = *(line + j); - aveval = ((val32 >> 24) + ((val32 >> 16) & 0xff) + - ((val32 >> 8) & 0xff)) / 3; - if (type == L_PAINT_LIGHT) { - if (aveval < thresh) /* skip sufficiently dark pixels */ - continue; - nrval = (l_int32)(rval * aveval * factor); - ngval = (l_int32)(gval * aveval * factor); - nbval = (l_int32)(bval * aveval * factor); - } else { /* type == L_PAINT_DARK */ - if (aveval > thresh) /* skip sufficiently light pixels */ - continue; - nrval = rval + (l_int32)((255. - rval) * aveval * factor); - ngval = gval + (l_int32)((255. - gval) * aveval * factor); - nbval = bval + (l_int32)((255. - bval) * aveval * factor); - } - composeRGBPixel(nrval, ngval, nbval, &val32); - *(line + j) = val32; - } - } - - return pixd; -} - - -/*------------------------------------------------------------------* - * Adjusting one or more colors to a target color * - *------------------------------------------------------------------*/ -/*! - * \brief pixSnapColor() - * - * \param[in] pixd [optional]; either NULL or equal to pixs for in-place - * \param[in] pixs colormapped or 8 bpp gray or 32 bpp rgb - * \param[in] srcval color center to be selected for change: 0xrrggbb00 - * \param[in] dstval target color for pixels: 0xrrggbb00 - * \param[in] diff max absolute difference, applied to all components - * \return pixd with all pixels within diff of pixval set to pixval, - * or pixd on error - * - *
- * Notes:
- *      (1) For inplace operation, call it this way:
- *           pixSnapColor(pixs, pixs, ... )
- *      (2) For generating a new pixd:
- *           pixd = pixSnapColor(NULL, pixs, ...)
- *      (3) If pixs has a colormap, it is handled by pixSnapColorCmap().
- *      (4) All pixels within 'diff' of 'srcval', componentwise,
- *          will be changed to 'dstval'.
- * 
- */ -PIX * -pixSnapColor(PIX *pixd, - PIX *pixs, - l_uint32 srcval, - l_uint32 dstval, - l_int32 diff) -{ -l_int32 val, sval, dval; -l_int32 rval, gval, bval, rsval, gsval, bsval; -l_int32 i, j, w, h, d, wpl; -l_uint32 pixel; -l_uint32 *line, *data; - - PROCNAME("pixSnapColor"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixd && (pixd != pixs)) - return (PIX *)ERROR_PTR("pixd not null or == pixs", procName, pixd); - - if (pixGetColormap(pixs)) - return pixSnapColorCmap(pixd, pixs, srcval, dstval, diff); - - /* pixs does not have a colormap; it must be 8 bpp gray or - * 32 bpp rgb. */ - if (pixGetDepth(pixs) < 8) - return (PIX *)ERROR_PTR("pixs is < 8 bpp", procName, pixd); - - /* Do the work on pixd */ - if (!pixd) - pixd = pixCopy(NULL, pixs); - - pixGetDimensions(pixd, &w, &h, &d); - data = pixGetData(pixd); - wpl = pixGetWpl(pixd); - if (d == 8) { - sval = srcval & 0xff; - dval = dstval & 0xff; - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(line, j); - if (L_ABS(val - sval) <= diff) - SET_DATA_BYTE(line, j, dval); - } - } - } else { /* d == 32 */ - extractRGBValues(srcval, &rsval, &gsval, &bsval); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - pixel = *(line + j); - extractRGBValues(pixel, &rval, &gval, &bval); - if ((L_ABS(rval - rsval) <= diff) && - (L_ABS(gval - gsval) <= diff) && - (L_ABS(bval - bsval) <= diff)) - *(line + j) = dstval; /* replace */ - } - } - } - - return pixd; -} - - -/*! - * \brief pixSnapColorCmap() - * - * \param[in] pixd [optional]; either NULL or equal to pixs for in-place - * \param[in] pixs colormapped - * \param[in] srcval color center to be selected for change: 0xrrggbb00 - * \param[in] dstval target color for pixels: 0xrrggbb00 - * \param[in] diff max absolute difference, applied to all components - * \return pixd with all pixels within diff of srcval set to dstval, - * or pixd on error - * - *
- * Notes:
- *      (1) For inplace operation, call it this way:
- *           pixSnapCcmap(pixs, pixs, ... )
- *      (2) For generating a new pixd:
- *           pixd = pixSnapCmap(NULL, pixs, ...)
- *      (3) pixs must have a colormap.
- *      (4) All colors within 'diff' of 'srcval', componentwise,
- *          will be changed to 'dstval'.
- * 
- */ -PIX * -pixSnapColorCmap(PIX *pixd, - PIX *pixs, - l_uint32 srcval, - l_uint32 dstval, - l_int32 diff) -{ -l_int32 i, ncolors, index, found; -l_int32 rval, gval, bval, rsval, gsval, bsval, rdval, gdval, bdval; -l_int32 *tab; -PIX *pixm; -PIXCMAP *cmap; - - PROCNAME("pixSnapColorCmap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (!pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("cmap not found", procName, pixd); - if (pixd && (pixd != pixs)) - return (PIX *)ERROR_PTR("pixd not null or == pixs", procName, pixd); - - if (!pixd) - pixd = pixCopy(NULL, pixs); - - /* If no free colors, look for one close to the target - * that can be commandeered. */ - cmap = pixGetColormap(pixd); - ncolors = pixcmapGetCount(cmap); - extractRGBValues(srcval, &rsval, &gsval, &bsval); - extractRGBValues(dstval, &rdval, &gdval, &bdval); - found = FALSE; - if (pixcmapGetFreeCount(cmap) == 0) { - for (i = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - if ((L_ABS(rval - rsval) <= diff) && - (L_ABS(gval - gsval) <= diff) && - (L_ABS(bval - bsval) <= diff)) { - index = i; - pixcmapResetColor(cmap, index, rdval, gdval, bdval); - found = TRUE; - break; - } - } - } else { /* just add the new color */ - pixcmapAddColor(cmap, rdval, gdval, bdval); - ncolors = pixcmapGetCount(cmap); - index = ncolors - 1; /* index of new destination color */ - found = TRUE; - } - - if (!found) { - L_INFO("nothing to do\n", procName); - return pixd; - } - - /* For each color in cmap that is close enough to srcval, - * set the tab value to 1. Then generate a 1 bpp mask with - * fg pixels for every pixel in pixd that is close enough - * to srcval (i.e., has value 1 in tab). */ - if ((tab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32))) == NULL) - return (PIX *)ERROR_PTR("tab not made", procName, pixd); - for (i = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - if ((L_ABS(rval - rsval) <= diff) && - (L_ABS(gval - gsval) <= diff) && - (L_ABS(bval - bsval) <= diff)) - tab[i] = 1; - } - pixm = pixMakeMaskFromLUT(pixd, tab); - LEPT_FREE(tab); - - /* Use the binary mask to set all selected pixels to - * the dest color index. */ - pixSetMasked(pixd, pixm, dstval); - pixDestroy(&pixm); - - /* Remove all unused colors from the colormap. */ - pixRemoveUnusedColors(pixd); - - return pixd; -} - - -/*---------------------------------------------------------------------* - * Piecewise linear color mapping based on a source/target pair * - *---------------------------------------------------------------------*/ -/*! - * \brief pixLinearMapToTargetColor() - * - * \param[in] pixd [optional]; either NULL or equal to pixs for in-place - * \param[in] pixs 32 bpp rgb - * \param[in] srcval source color: 0xrrggbb00 - * \param[in] dstval target color: 0xrrggbb00 - * \return pixd with all pixels mapped based on the srcval/destval mapping, - * or pixd on error - * - *
- * Notes:
- *      (1) For each component (r, b, g) separately, this does a piecewise
- *          linear mapping of the colors in pixs to colors in pixd.
- *          If rs and rd are the red src and dest components in %srcval and
- *          %dstval, then the range [0 ... rs] in pixs is mapped to
- *          [0 ... rd] in pixd.  Likewise, the range [rs ... 255] in pixs
- *          is mapped to [rd ... 255] in pixd.  And similarly for green
- *          and blue.
- *      (2) The mapping will in general change the hue of the pixels.
- *          However, if the src and dst targets are related by
- *          a transformation given by pixelFractionalShift(), the hue
- *          is invariant.
- *      (3) For inplace operation, call it this way:
- *            pixLinearMapToTargetColor(pixs, pixs, ... )
- *      (4) For generating a new pixd:
- *            pixd = pixLinearMapToTargetColor(NULL, pixs, ...)
- * 
- */ -PIX * -pixLinearMapToTargetColor(PIX *pixd, - PIX *pixs, - l_uint32 srcval, - l_uint32 dstval) -{ -l_int32 i, j, w, h, wpl; -l_int32 rval, gval, bval, rsval, gsval, bsval, rdval, gdval, bdval; -l_int32 *rtab, *gtab, *btab; -l_uint32 pixel; -l_uint32 *line, *data; - - PROCNAME("pixLinearMapToTargetColor"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixd && (pixd != pixs)) - return (PIX *)ERROR_PTR("pixd not null or == pixs", procName, pixd); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs is not 32 bpp", procName, pixd); - - /* Do the work on pixd */ - if (!pixd) - pixd = pixCopy(NULL, pixs); - - extractRGBValues(srcval, &rsval, &gsval, &bsval); - extractRGBValues(dstval, &rdval, &gdval, &bdval); - rsval = L_MIN(254, L_MAX(1, rsval)); - gsval = L_MIN(254, L_MAX(1, gsval)); - bsval = L_MIN(254, L_MAX(1, bsval)); - rtab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - gtab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - btab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - if (!rtab || !gtab || !btab) - return (PIX *)ERROR_PTR("calloc fail for tab", procName, pixd); - for (i = 0; i < 256; i++) { - if (i <= rsval) - rtab[i] = (i * rdval) / rsval; - else - rtab[i] = rdval + ((255 - rdval) * (i - rsval)) / (255 - rsval); - if (i <= gsval) - gtab[i] = (i * gdval) / gsval; - else - gtab[i] = gdval + ((255 - gdval) * (i - gsval)) / (255 - gsval); - if (i <= bsval) - btab[i] = (i * bdval) / bsval; - else - btab[i] = bdval + ((255 - bdval) * (i - bsval)) / (255 - bsval); - } - pixGetDimensions(pixd, &w, &h, NULL); - data = pixGetData(pixd); - wpl = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - pixel = line[j]; - extractRGBValues(pixel, &rval, &gval, &bval); - composeRGBPixel(rtab[rval], gtab[gval], btab[bval], &pixel); - line[j] = pixel; - } - } - - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - return pixd; -} - - -/*! - * \brief pixelLinearMapToTargetColor() - * - * \param[in] scolor rgb source color: 0xrrggbb00 - * \param[in] srcmap source mapping color: 0xrrggbb00 - * \param[in] dstmap target mapping color: 0xrrggbb00 - * \param[out] pdcolor rgb dest color: 0xrrggbb00 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This does this does a piecewise linear mapping of each
- *          component of %scolor to %dcolor, based on the relation
- *          between the components of %srcmap and %dstmap.  It is the
- *          same transformation, performed on a single color, as mapped
- *          on every pixel in a pix by pixLinearMapToTargetColor().
- *      (2) For each component, if the sval is larger than the smap,
- *          the dval will be pushed up from dmap towards white.
- *          Otherwise, dval will be pushed down from dmap towards black.
- *          This is because you can visualize the transformation as
- *          a linear stretching where smap moves to dmap, and everything
- *          else follows linearly with 0 and 255 fixed.
- *      (3) The mapping will in general change the hue of %scolor.
- *          However, if the %srcmap and %dstmap targets are related by
- *          a transformation given by pixelFractionalShift(), the hue
- *          will be invariant.
- * 
- */ -l_ok -pixelLinearMapToTargetColor(l_uint32 scolor, - l_uint32 srcmap, - l_uint32 dstmap, - l_uint32 *pdcolor) -{ -l_int32 srval, sgval, sbval, drval, dgval, dbval; -l_int32 srmap, sgmap, sbmap, drmap, dgmap, dbmap; - - PROCNAME("pixelLinearMapToTargetColor"); - - if (!pdcolor) - return ERROR_INT("&dcolor not defined", procName, 1); - *pdcolor = 0; - - extractRGBValues(scolor, &srval, &sgval, &sbval); - extractRGBValues(srcmap, &srmap, &sgmap, &sbmap); - extractRGBValues(dstmap, &drmap, &dgmap, &dbmap); - srmap = L_MIN(254, L_MAX(1, srmap)); - sgmap = L_MIN(254, L_MAX(1, sgmap)); - sbmap = L_MIN(254, L_MAX(1, sbmap)); - - if (srval <= srmap) - drval = (srval * drmap) / srmap; - else - drval = drmap + ((255 - drmap) * (srval - srmap)) / (255 - srmap); - if (sgval <= sgmap) - dgval = (sgval * dgmap) / sgmap; - else - dgval = dgmap + ((255 - dgmap) * (sgval - sgmap)) / (255 - sgmap); - if (sbval <= sbmap) - dbval = (sbval * dbmap) / sbmap; - else - dbval = dbmap + ((255 - dbmap) * (sbval - sbmap)) / (255 - sbmap); - - composeRGBPixel(drval, dgval, dbval, pdcolor); - return 0; -} - - -/*------------------------------------------------------------------* - * Fractional shift of RGB towards black or white * - *------------------------------------------------------------------*/ -/*! - * \brief pixShiftByComponent() - * - * \param[in] pixd [optional]; either NULL or equal to pixs for in-place - * \param[in] pixs 32 bpp rgb - * \param[in] srcval source color: 0xrrggbb00 - * \param[in] dstval target color: 0xrrggbb00 - * \return pixd with all pixels mapped based on the srcval/destval mapping, - * or pixd on error - * - *
- * Notes:
- *      (1) For each component (r, b, g) separately, this does a linear
- *          mapping of the colors in pixs to colors in pixd.
- *          Let rs and rd be the red src and dest components in %srcval and
- *          %dstval, and rval is the red component of the src pixel.
- *          Then for all pixels in pixs, the mapping for the red
- *          component from pixs to pixd is:
- *             if (rd <= rs)   (shift toward black)
- *                 rval --> (rd/rs) * rval
- *             if (rd > rs)    (shift toward white)
- *                (255 - rval) --> ((255 - rs)/(255 - rd)) * (255 - rval)
- *          Thus if rd <= rs, the red component of all pixels is
- *          mapped by the same fraction toward white, and if rd > rs,
- *          they are mapped by the same fraction toward black.
- *          This is essentially a different linear TRC (gamma = 1)
- *          for each component.  The source and target color inputs are
- *          just used to generate the three fractions.
- *      (2) Note that this mapping differs from that in
- *          pixLinearMapToTargetColor(), which maps rs --> rd and does
- *          a piecewise stretching in between.
- *      (3) For inplace operation, call it this way:
- *            pixFractionalShiftByComponent(pixs, pixs, ... )
- *      (4) For generating a new pixd:
- *            pixd = pixLinearMapToTargetColor(NULL, pixs, ...)
- *      (5) A simple application is to color a grayscale image.
- *          A light background can be colored using srcval = 0xffffff00
- *          and picking a target background color for dstval.
- *          A dark foreground can be colored by using srcval = 0x0
- *          and choosing a target foreground color for dstval.
- * 
- */ -PIX * -pixShiftByComponent(PIX *pixd, - PIX *pixs, - l_uint32 srcval, - l_uint32 dstval) -{ -l_int32 i, j, w, h, wpl; -l_int32 rval, gval, bval, rsval, gsval, bsval, rdval, gdval, bdval; -l_int32 *rtab, *gtab, *btab; -l_uint32 pixel; -l_uint32 *line, *data; -PIXCMAP *cmap; - - PROCNAME("pixShiftByComponent"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixd && (pixd != pixs)) - return (PIX *)ERROR_PTR("pixd not null or == pixs", procName, pixd); - if (pixGetDepth(pixs) != 32 && !pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs not cmapped or 32 bpp", procName, pixd); - - /* Do the work on pixd */ - if (!pixd) - pixd = pixCopy(NULL, pixs); - - /* If colormapped, just modify it */ - if ((cmap = pixGetColormap(pixd)) != NULL) { - pixcmapShiftByComponent(cmap, srcval, dstval); - return pixd; - } - - extractRGBValues(srcval, &rsval, &gsval, &bsval); - extractRGBValues(dstval, &rdval, &gdval, &bdval); - rtab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - gtab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - btab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - if (!rtab || !gtab || !btab) { - L_ERROR("calloc fail for tab\n", procName); - goto cleanup; - } - for (i = 0; i < 256; i++) { - if (rdval == rsval) - rtab[i] = i; - else if (rdval < rsval) - rtab[i] = (i * rdval) / rsval; - else - rtab[i] = 255 - (255 - rdval) * (255 - i) / (255 - rsval); - if (gdval == gsval) - gtab[i] = i; - else if (gdval < gsval) - gtab[i] = (i * gdval) / gsval; - else - gtab[i] = 255 - (255 - gdval) * (255 - i) / (255 - gsval); - if (bdval == bsval) - btab[i] = i; - else if (bdval < bsval) - btab[i] = (i * bdval) / bsval; - else - btab[i] = 255 - (255 - bdval) * (255 - i) / (255 - bsval); - } - pixGetDimensions(pixd, &w, &h, NULL); - data = pixGetData(pixd); - wpl = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - pixel = line[j]; - extractRGBValues(pixel, &rval, &gval, &bval); - composeRGBPixel(rtab[rval], gtab[gval], btab[bval], &pixel); - line[j] = pixel; - } - } - -cleanup: - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - return pixd; -} - - -/*! - * \brief pixelShiftByComponent() - * - * \param[in] rval, gval, bval - * \param[in] srcval source color: 0xrrggbb00 - * \param[in] dstval target color: 0xrrggbb00 - * \param[out] ppixel rgb value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is a linear transformation that gives the same result
- *          on a single pixel as pixShiftByComponent() gives
- *          on a pix.  Each component is handled separately.  If
- *          the dest component is larger than the src, then the
- *          component is pushed toward 255 by the same fraction as
- *          the src --> dest shift.
- * 
- */ -l_ok -pixelShiftByComponent(l_int32 rval, - l_int32 gval, - l_int32 bval, - l_uint32 srcval, - l_uint32 dstval, - l_uint32 *ppixel) -{ -l_int32 rsval, rdval, gsval, gdval, bsval, bdval, rs, gs, bs; - - PROCNAME("pixelShiftByComponent"); - - if (!ppixel) - return ERROR_INT("&pixel defined", procName, 1); - - extractRGBValues(srcval, &rsval, &gsval, &bsval); - extractRGBValues(dstval, &rdval, &gdval, &bdval); - if (rdval == rsval) - rs = rval; - else if (rdval < rsval) - rs = (rval * rdval) / rsval; - else - rs = 255 - (255 - rdval) * (255 - rval) / (255 - rsval); - if (gdval == gsval) - gs = gval; - else if (gdval < gsval) - gs = (gval * gdval) / gsval; - else - gs = 255 - (255 - gdval) * (255 - gval) / (255 - gsval); - if (bdval == bsval) - bs = bval; - else if (bdval < bsval) - bs = (bval * bdval) / bsval; - else - bs = 255 - (255 - bdval) * (255 - bval) / (255 - bsval); - composeRGBPixel(rs, gs, bs, ppixel); - return 0; -} - - -/*! - * \brief pixelFractionalShift() - * - * \param[in] rval, gval, bval - * \param[in] fraction negative toward black; positive toward white - * \param[out] ppixel rgb value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This transformation leaves the hue invariant, while changing
- *          the saturation and intensity.  It can be used for that
- *          purpose in pixLinearMapToTargetColor().
- *      (2) %fraction is in the range [-1 .... +1].  If %fraction < 0,
- *          saturation is increased and brightness is reduced.  The
- *          opposite results if %fraction > 0.  If %fraction == -1,
- *          the resulting pixel is black; %fraction == 1 results in white.
- * 
- */ -l_ok -pixelFractionalShift(l_int32 rval, - l_int32 gval, - l_int32 bval, - l_float32 fraction, - l_uint32 *ppixel) -{ -l_int32 nrval, ngval, nbval; - - PROCNAME("pixelFractionalShift"); - - if (!ppixel) - return ERROR_INT("&pixel defined", procName, 1); - if (fraction < -1.0 || fraction > 1.0) - return ERROR_INT("fraction not in [-1 ... +1]", procName, 1); - - nrval = (fraction < 0) ? (l_int32)((1.0 + fraction) * rval + 0.5) : - rval + (l_int32)(fraction * (255 - rval) + 0.5); - ngval = (fraction < 0) ? (l_int32)((1.0 + fraction) * gval + 0.5) : - gval + (l_int32)(fraction * (255 - gval) + 0.5); - nbval = (fraction < 0) ? (l_int32)((1.0 + fraction) * bval + 0.5) : - bval + (l_int32)(fraction * (255 - bval) + 0.5); - composeRGBPixel(nrval, ngval, nbval, ppixel); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colormap.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colormap.c deleted file mode 100644 index ff0fe755..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colormap.c +++ /dev/null @@ -1,2307 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file colormap.c - *
- *
- *      Colormap creation, copy, destruction, addition
- *           PIXCMAP    *pixcmapCreate()
- *           PIXCMAP    *pixcmapCreateRandom()
- *           PIXCMAP    *pixcmapCreateLinear()
- *           PIXCMAP    *pixcmapCopy()
- *           void        pixcmapDestroy()
- *           l_int32     pixcmapIsValid()
- *           l_int32     pixcmapAddColor()
- *           l_int32     pixcmapAddRGBA()
- *           l_int32     pixcmapAddNewColor()
- *           l_int32     pixcmapAddNearestColor()
- *           l_int32     pixcmapUsableColor()
- *           l_int32     pixcmapAddBlackOrWhite()
- *           l_int32     pixcmapSetBlackAndWhite()
- *           l_int32     pixcmapGetCount()
- *           l_int32     pixcmapGetDepth()
- *           l_int32     pixcmapGetMinDepth()
- *           l_int32     pixcmapGetFreeCount()
- *           l_int32     pixcmapClear()
- *
- *      Colormap random access and test
- *           l_int32     pixcmapGetColor()
- *           l_int32     pixcmapGetColor32()
- *           l_int32     pixcmapGetRGBA()
- *           l_int32     pixcmapGetRGBA32()
- *           l_int32     pixcmapResetColor()
- *           l_int32     pixcmapSetAlpha()
- *           l_int32     pixcmapGetIndex()
- *           l_int32     pixcmapHasColor()
- *           l_int32     pixcmapIsOpaque()
- *           l_int32     pixcmapIsBlackAndWhite()
- *           l_int32     pixcmapCountGrayColors()
- *           l_int32     pixcmapGetRankIntensity()
- *           l_int32     pixcmapGetNearestIndex()
- *           l_int32     pixcmapGetNearestGrayIndex()
- *           l_int32     pixcmapGetDistanceToColor()
- *           l_int32     pixcmapGetRangeValues()
- *
- *      Colormap conversion
- *           PIXCMAP    *pixcmapGrayToColor()
- *           PIXCMAP    *pixcmapColorToGray()
- *           PIXCMAP    *pixcmapConvertTo4()
- *           PIXCMAP    *pixcmapConvertTo8()
- *
- *      Colormap I/O
- *           l_int32     pixcmapRead()
- *           l_int32     pixcmapReadStream()
- *           l_int32     pixcmapReadMem()
- *           l_int32     pixcmapWrite()
- *           l_int32     pixcmapWriteStream()
- *           l_int32     pixcmapWriteMem()
- *
- *      Extract colormap arrays and serialization
- *           l_int32     pixcmapToArrays()
- *           l_int32     pixcmapToRGBTable()
- *           l_int32     pixcmapSerializeToMemory()
- *           PIXCMAP    *pixcmapDeserializeFromMemory()
- *           char       *pixcmapConvertToHex()
- *
- *      Colormap transforms
- *           l_int32     pixcmapGammaTRC()
- *           l_int32     pixcmapContrastTRC()
- *           l_int32     pixcmapShiftIntensity()
- *           l_int32     pixcmapShiftByComponent()
- *
- *  Note:
- *      (1) colormaps in leptonica have a maximum of 256 entries.
- *      (2) nalloc, the allocated size of the palette array, is related
- *          to the depth d of the pixels by:
- *                 nalloc = 2^(d)
- *
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*-------------------------------------------------------------* - * Colormap creation and addition * - *-------------------------------------------------------------*/ -/*! - * \brief pixcmapCreate() - * - * \param[in] depth of pix, in bpp - * \return cmap, or NULL on error - */ -PIXCMAP * -pixcmapCreate(l_int32 depth) -{ -RGBA_QUAD *cta; -PIXCMAP *cmap; - - PROCNAME("pixcmapCreate"); - - if (depth != 1 && depth != 2 && depth !=4 && depth != 8) - return (PIXCMAP *)ERROR_PTR("depth not in {1,2,4,8}", procName, NULL); - - cmap = (PIXCMAP *)LEPT_CALLOC(1, sizeof(PIXCMAP)); - cmap->depth = depth; - cmap->nalloc = 1 << depth; - cta = (RGBA_QUAD *)LEPT_CALLOC(cmap->nalloc, sizeof(RGBA_QUAD)); - cmap->array = cta; - cmap->n = 0; - return cmap; -} - - -/*! - * \brief pixcmapCreateRandom() - * - * \param[in] depth of pix, in bpp: 2, 4 or 8 - * \param[in] hasblack 1 if the first color is black; 0 if no black - * \param[in] haswhite 1 if the last color is white; 0 if no white - * \return cmap, or NULL on error - * - *
- * Notes:
- *      (1) This sets up a colormap with random colors,
- *          where the first color is optionally black, the last color
- *          is optionally white, and the remaining colors are
- *          chosen randomly.
- *      (2) The number of randomly chosen colors is:
- *               2^(depth) - haswhite - hasblack
- *      (3) Because rand() is seeded, it might disrupt otherwise
- *          deterministic results if also used elsewhere in a program.
- *      (4) rand() is not threadsafe, and will generate garbage if run
- *          on multiple threads at once -- though garbage is generally
- *          what you want from a random number generator!
- *      (5) Modern rand()s have equal randomness in low and high order
- *          bits, but older ones don't.  Here, we're just using rand()
- *          to choose colors for output.
- * 
- */ -PIXCMAP * -pixcmapCreateRandom(l_int32 depth, - l_int32 hasblack, - l_int32 haswhite) -{ -l_int32 ncolors, i; -l_int32 red[256], green[256], blue[256]; -PIXCMAP *cmap; - - PROCNAME("pixcmapCreateRandom"); - - if (depth != 2 && depth != 4 && depth != 8) - return (PIXCMAP *)ERROR_PTR("depth not in {2, 4, 8}", procName, NULL); - if (hasblack != 0) hasblack = 1; - if (haswhite != 0) haswhite = 1; - - cmap = pixcmapCreate(depth); - ncolors = 1 << depth; - if (hasblack) /* first color is optionally black */ - pixcmapAddColor(cmap, 0, 0, 0); - for (i = hasblack; i < ncolors - haswhite; i++) { - red[i] = (l_uint32)rand() & 0xff; - green[i] = (l_uint32)rand() & 0xff; - blue[i] = (l_uint32)rand() & 0xff; - pixcmapAddColor(cmap, red[i], green[i], blue[i]); - } - if (haswhite) /* last color is optionally white */ - pixcmapAddColor(cmap, 255, 255, 255); - - return cmap; -} - - -/*! - * \brief pixcmapCreateLinear() - * - * \param[in] d depth of pix for this colormap; 1, 2, 4 or 8 - * \param[in] nlevels valid in range [2, 2^d] - * \return cmap, or NULL on error - * - *
- * Notes:
- *      (1) Colormap has equally spaced gray color values
- *          from black (0, 0, 0) to white (255, 255, 255).
- * 
- */ -PIXCMAP * -pixcmapCreateLinear(l_int32 d, - l_int32 nlevels) -{ -l_int32 maxlevels, i, val; -PIXCMAP *cmap; - - PROCNAME("pixcmapCreateLinear"); - - if (d != 1 && d != 2 && d !=4 && d != 8) - return (PIXCMAP *)ERROR_PTR("d not in {1, 2, 4, 8}", procName, NULL); - maxlevels = 1 << d; - if (nlevels < 2 || nlevels > maxlevels) - return (PIXCMAP *)ERROR_PTR("invalid nlevels", procName, NULL); - - cmap = pixcmapCreate(d); - for (i = 0; i < nlevels; i++) { - val = (255 * i) / (nlevels - 1); - pixcmapAddColor(cmap, val, val, val); - } - return cmap; -} - - -/*! - * \brief pixcmapCopy() - * - * \param[in] cmaps - * \return cmapd, or NULL on error - */ -PIXCMAP * -pixcmapCopy(const PIXCMAP *cmaps) -{ -l_int32 nbytes, valid; -PIXCMAP *cmapd; - - PROCNAME("pixcmapCopy"); - - if (!cmaps) - return (PIXCMAP *)ERROR_PTR("cmaps not defined", procName, NULL); - pixcmapIsValid(cmaps, &valid); - if (!valid) - return (PIXCMAP *)ERROR_PTR("invalid cmap", procName, NULL); - - cmapd = (PIXCMAP *)LEPT_CALLOC(1, sizeof(PIXCMAP)); - nbytes = cmaps->nalloc * sizeof(RGBA_QUAD); - cmapd->array = (void *)LEPT_CALLOC(1, nbytes); - memcpy(cmapd->array, cmaps->array, cmaps->n * sizeof(RGBA_QUAD)); - cmapd->n = cmaps->n; - cmapd->nalloc = cmaps->nalloc; - cmapd->depth = cmaps->depth; - return cmapd; -} - - -/*! - * \brief pixcmapDestroy() - * - * \param[in,out] pcmap set to null on return - * \return void - */ -void -pixcmapDestroy(PIXCMAP **pcmap) -{ -PIXCMAP *cmap; - - PROCNAME("pixcmapDestroy"); - - if (pcmap == NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - - if ((cmap = *pcmap) == NULL) - return; - - LEPT_FREE(cmap->array); - LEPT_FREE(cmap); - *pcmap = NULL; - return; -} - -/*! - * \brief pixcmapIsValid() - * - * \param[in] cmap - * \param[out] pvalid return 1 if valid; 0 if not - * \return 0 if OK, 1 on error or if cmap is not valid - */ -l_ok -pixcmapIsValid(const PIXCMAP *cmap, - l_int32 *pvalid) -{ -l_int32 d; - - PROCNAME("pixcmapIsValid"); - - if (!pvalid) - return ERROR_INT("&valid not defined", procName, 1); - *pvalid = 0; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - if (!cmap->array) - return ERROR_INT("cmap array not defined", procName, 1); - d = cmap->depth; - if (d !=1 && d != 2 && d != 4 && d != 8) { - L_ERROR("invalid cmap depth: %d\n", procName, d); - return 1; - } - if (cmap->nalloc < 2 || cmap->nalloc > 256) { - L_ERROR("invalid cmap nalloc: %d\n", procName, cmap->nalloc); - return 1; - } - if (cmap->n < 0 || cmap->n > 256 || cmap->n > cmap->nalloc) { - L_ERROR("invalid cmap n: %d (nalloc = %d)\n", procName, - cmap->n, cmap->nalloc); - return 1; - } - *pvalid = 1; - return 0; -} - - -/*! - * \brief pixcmapAddColor() - * - * \param[in] cmap - * \param[in] rval, gval, bval colormap entry to be added; each number - * is in range [0, ... 255] - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This always adds the color if there is room.
- *      (2) The alpha component is 255 (opaque)
- * 
- */ -l_ok -pixcmapAddColor(PIXCMAP *cmap, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -RGBA_QUAD *cta; - - PROCNAME("pixcmapAddColor"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - if (cmap->n >= cmap->nalloc) - return ERROR_INT("no free color entries", procName, 1); - - cta = (RGBA_QUAD *)cmap->array; - cta[cmap->n].red = rval; - cta[cmap->n].green = gval; - cta[cmap->n].blue = bval; - cta[cmap->n].alpha = 255; - cmap->n++; - return 0; -} - - -/*! - * \brief pixcmapAddRGBA() - * - * \param[in] cmap - * \param[in] rval, gval, bval, aval colormap entry to be added; - * each number is in range [0, ... 255] - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This always adds the color if there is room.
- * 
- */ -l_ok -pixcmapAddRGBA(PIXCMAP *cmap, - l_int32 rval, - l_int32 gval, - l_int32 bval, - l_int32 aval) -{ -RGBA_QUAD *cta; - - PROCNAME("pixcmapAddRGBA"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - if (cmap->n >= cmap->nalloc) - return ERROR_INT("no free color entries", procName, 1); - - cta = (RGBA_QUAD *)cmap->array; - cta[cmap->n].red = rval; - cta[cmap->n].green = gval; - cta[cmap->n].blue = bval; - cta[cmap->n].alpha = aval; - cmap->n++; - return 0; -} - - -/*! - * \brief pixcmapAddNewColor() - * - * \param[in] cmap - * \param[in] rval, gval, bval colormap entry to be added; each number - * is in range [0, ... 255] - * \param[out] pindex index of color - * \return 0 if OK, 1 on error; 2 if unable to add color - * - *
- * Notes:
- *      (1) This only adds color if not already there.
- *      (2) The alpha component is 255 (opaque)
- *      (3) This returns the index of the new (or existing) color.
- *      (4) Returns 2 with a warning if unable to add this color;
- *          the caller should check the return value.
- * 
- */ -l_ok -pixcmapAddNewColor(PIXCMAP *cmap, - l_int32 rval, - l_int32 gval, - l_int32 bval, - l_int32 *pindex) -{ - PROCNAME("pixcmapAddNewColor"); - - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - *pindex = 0; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - /* Check if the color is already present. */ - if (!pixcmapGetIndex(cmap, rval, gval, bval, pindex)) /* found */ - return 0; - - /* We need to add the color. Is there room? */ - if (cmap->n >= cmap->nalloc) { - L_WARNING("no free color entries\n", procName); - return 2; - } - - /* There's room. Add it. */ - pixcmapAddColor(cmap, rval, gval, bval); - *pindex = pixcmapGetCount(cmap) - 1; - return 0; -} - - -/*! - * \brief pixcmapAddNearestColor() - * - * \param[in] cmap - * \param[in] rval, gval, bval colormap entry to be added; each number - * is in range [0, ... 255] - * \param[out] pindex index of color - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This only adds color if not already there.
- *      (2) The alpha component is 255 (opaque)
- *      (3) If it's not in the colormap and there is no room to add
- *          another color, this returns the index of the nearest color.
- * 
- */ -l_ok -pixcmapAddNearestColor(PIXCMAP *cmap, - l_int32 rval, - l_int32 gval, - l_int32 bval, - l_int32 *pindex) -{ - PROCNAME("pixcmapAddNearestColor"); - - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - *pindex = 0; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - /* Check if the color is already present. */ - if (!pixcmapGetIndex(cmap, rval, gval, bval, pindex)) /* found */ - return 0; - - /* We need to add the color. Is there room? */ - if (cmap->n < cmap->nalloc) { - pixcmapAddColor(cmap, rval, gval, bval); - *pindex = pixcmapGetCount(cmap) - 1; - return 0; - } - - /* There's no room. Return the index of the nearest color */ - pixcmapGetNearestIndex(cmap, rval, gval, bval, pindex); - return 0; -} - - -/*! - * \brief pixcmapUsableColor() - * - * \param[in] cmap - * \param[in] rval, gval, bval colormap entry to be added; each number - * is in range [0, ... 255] - * \param[out] pusable 1 if usable; 0 if not - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This checks if the color already exists or if there is
- *          room to add it.  It makes no change in the colormap.
- * 
- */ -l_ok -pixcmapUsableColor(PIXCMAP *cmap, - l_int32 rval, - l_int32 gval, - l_int32 bval, - l_int32 *pusable) -{ -l_int32 index; - - PROCNAME("pixcmapUsableColor"); - - if (!pusable) - return ERROR_INT("&usable not defined", procName, 1); - *pusable = 0; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - /* Is there room to add it? */ - if (cmap->n < cmap->nalloc) { - *pusable = 1; - return 0; - } - - /* No room; check if the color is already present. */ - if (!pixcmapGetIndex(cmap, rval, gval, bval, &index)) /* found */ - *pusable = 1; - return 0; -} - - -/*! - * \brief pixcmapAddBlackOrWhite() - * - * \param[in] cmap - * \param[in] color 0 for black, 1 for white - * \param[out] pindex [optional] index of color; can be null - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This only adds color if not already there.
- *      (2) The alpha component is 255 (opaque)
- *      (3) This sets index to the requested color.
- *      (4) If there is no room in the colormap, returns the index
- *          of the closest color.
- * 
- */ -l_ok -pixcmapAddBlackOrWhite(PIXCMAP *cmap, - l_int32 color, - l_int32 *pindex) -{ -l_int32 index; - - PROCNAME("pixcmapAddBlackOrWhite"); - - if (pindex) *pindex = 0; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - if (color == 0) { /* black */ - if (pixcmapGetFreeCount(cmap) > 0) - pixcmapAddNewColor(cmap, 0, 0, 0, &index); - else - pixcmapGetRankIntensity(cmap, 0.0, &index); - } else { /* white */ - if (pixcmapGetFreeCount(cmap) > 0) - pixcmapAddNewColor(cmap, 255, 255, 255, &index); - else - pixcmapGetRankIntensity(cmap, 1.0, &index); - } - - if (pindex) - *pindex = index; - return 0; -} - - -/*! - * \brief pixcmapSetBlackAndWhite() - * - * \param[in] cmap - * \param[in] setblack 0 for no operation; 1 to set darkest color to black - * \param[in] setwhite 0 for no operation; 1 to set lightest color to white - * \return 0 if OK, 1 on error - */ -l_ok -pixcmapSetBlackAndWhite(PIXCMAP *cmap, - l_int32 setblack, - l_int32 setwhite) -{ -l_int32 index; - - PROCNAME("pixcmapSetBlackAndWhite"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - if (setblack) { - pixcmapGetRankIntensity(cmap, 0.0, &index); - pixcmapResetColor(cmap, index, 0, 0, 0); - } - if (setwhite) { - pixcmapGetRankIntensity(cmap, 1.0, &index); - pixcmapResetColor(cmap, index, 255, 255, 255); - } - return 0; -} - - -/*! - * \brief pixcmapGetCount() - * - * \param[in] cmap - * \return count, or 0 on error - */ -l_int32 -pixcmapGetCount(const PIXCMAP *cmap) -{ - PROCNAME("pixcmapGetCount"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 0); - return cmap->n; -} - - -/*! - * \brief pixcmapGetFreeCount() - * - * \param[in] cmap - * \return free entries, or 0 on error - */ -l_int32 -pixcmapGetFreeCount(PIXCMAP *cmap) -{ - PROCNAME("pixcmapGetFreeCount"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 0); - return (cmap->nalloc - cmap->n); -} - - -/*! - * \brief pixcmapGetDepth() - * - * \param[in] cmap - * \return depth, or 0 on error - */ -l_int32 -pixcmapGetDepth(PIXCMAP *cmap) -{ - PROCNAME("pixcmapGetDepth"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 0); - return cmap->depth; -} - - -/*! - * \brief pixcmapGetMinDepth() - * - * \param[in] cmap - * \param[out] pmindepth minimum depth to support the colormap - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) On error, &mindepth is returned as 0.
- * 
- */ -l_ok -pixcmapGetMinDepth(PIXCMAP *cmap, - l_int32 *pmindepth) -{ -l_int32 ncolors; - - PROCNAME("pixcmapGetMinDepth"); - - if (!pmindepth) - return ERROR_INT("&mindepth not defined", procName, 1); - *pmindepth = 0; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - ncolors = pixcmapGetCount(cmap); - if (ncolors <= 4) - *pmindepth = 2; - else if (ncolors <= 16) - *pmindepth = 4; - else /* ncolors > 16 */ - *pmindepth = 8; - return 0; -} - - -/*! - * \brief pixcmapClear() - * - * \param[in] cmap - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This removes the colors by setting the count to 0.
- * 
- */ -l_ok -pixcmapClear(PIXCMAP *cmap) -{ - PROCNAME("pixcmapClear"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - cmap->n = 0; - return 0; -} - - -/*-------------------------------------------------------------* - * Colormap random access * - *-------------------------------------------------------------*/ -/*! - * \brief pixcmapGetColor() - * - * \param[in] cmap - * \param[in] index - * \param[out] prval, pgval, pbval each color value - * \return 0 if OK, 1 if not accessible caller should check - */ -l_ok -pixcmapGetColor(PIXCMAP *cmap, - l_int32 index, - l_int32 *prval, - l_int32 *pgval, - l_int32 *pbval) -{ -RGBA_QUAD *cta; - - PROCNAME("pixcmapGetColor"); - - if (!prval || !pgval || !pbval) - return ERROR_INT("&rval, &gval, &bval not all defined", procName, 1); - *prval = *pgval = *pbval = 0; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - if (index < 0 || index >= cmap->n) - return ERROR_INT("index out of bounds", procName, 1); - - cta = (RGBA_QUAD *)cmap->array; - *prval = cta[index].red; - *pgval = cta[index].green; - *pbval = cta[index].blue; - return 0; -} - - -/*! - * \brief pixcmapGetColor32() - * - * \param[in] cmap - * \param[in] index - * \param[out] pval32 32-bit rgb color value - * \return 0 if OK, 1 if not accessible caller should check - * - *
- * Notes:
- *      (1) The returned alpha channel value is 255.
- * 
- */ -l_ok -pixcmapGetColor32(PIXCMAP *cmap, - l_int32 index, - l_uint32 *pval32) -{ -l_int32 rval, gval, bval; - - PROCNAME("pixcmapGetColor32"); - - if (!pval32) - return ERROR_INT("&val32 not defined", procName, 1); - *pval32 = 0; - - if (pixcmapGetColor(cmap, index, &rval, &gval, &bval) != 0) - return ERROR_INT("rgb values not found", procName, 1); - composeRGBAPixel(rval, gval, bval, 255, pval32); - return 0; -} - - -/*! - * \brief pixcmapGetRGBA() - * - * \param[in] cmap - * \param[in] index - * \param[out] prval, pgval, pbval, paval each color value - * \return 0 if OK, 1 if not accessible caller should check - */ -l_ok -pixcmapGetRGBA(PIXCMAP *cmap, - l_int32 index, - l_int32 *prval, - l_int32 *pgval, - l_int32 *pbval, - l_int32 *paval) -{ -RGBA_QUAD *cta; - - PROCNAME("pixcmapGetRGBA"); - - if (!prval || !pgval || !pbval || !paval) - return ERROR_INT("&rval, &gval, &bval, &aval not all defined", - procName, 1); - *prval = *pgval = *pbval = *paval = 0; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - if (index < 0 || index >= cmap->n) - return ERROR_INT("index out of bounds", procName, 1); - - cta = (RGBA_QUAD *)cmap->array; - *prval = cta[index].red; - *pgval = cta[index].green; - *pbval = cta[index].blue; - *paval = cta[index].alpha; - return 0; -} - - -/*! - * \brief pixcmapGetRGBA32() - * - * \param[in] cmap - * \param[in] index - * \param[out] pval32 32-bit rgba color value - * \return 0 if OK, 1 if not accessible caller should check - */ -l_ok -pixcmapGetRGBA32(PIXCMAP *cmap, - l_int32 index, - l_uint32 *pval32) -{ -l_int32 rval, gval, bval, aval; - - PROCNAME("pixcmapGetRGBA32"); - - if (!pval32) - return ERROR_INT("&val32 not defined", procName, 1); - *pval32 = 0; - - if (pixcmapGetRGBA(cmap, index, &rval, &gval, &bval, &aval) != 0) - return ERROR_INT("rgba values not found", procName, 1); - composeRGBAPixel(rval, gval, bval, aval, pval32); - return 0; -} - - -/*! - * \brief pixcmapResetColor() - * - * \param[in] cmap - * \param[in] index - * \param[in] rval, gval, bval colormap entry to be reset; each number - * is in range [0, ... 255] - * \return 0 if OK, 1 if not accessible caller should check - * - *
- * Notes:
- *      (1) This resets sets the color of an entry that has already
- *          been set and included in the count of colors.
- *      (2) The alpha component is 255 (opaque)
- * 
- */ -l_ok -pixcmapResetColor(PIXCMAP *cmap, - l_int32 index, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -RGBA_QUAD *cta; - - PROCNAME("pixcmapResetColor"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - if (index < 0 || index >= cmap->n) - return ERROR_INT("index out of bounds", procName, 1); - - cta = (RGBA_QUAD *)cmap->array; - cta[index].red = rval; - cta[index].green = gval; - cta[index].blue = bval; - cta[index].alpha = 255; - return 0; -} - - -/*! - * \brief pixcmapSetAlpha() - * - * \param[in] cmap - * \param[in] index - * \param[in] aval in range [0, ... 255] - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This modifies the transparency of one entry in a colormap.
- *          The alpha component by default is 255 (opaque).
- *          This is used when extracting the colormap from a PNG file
- *          without decoding the image.
- * 
- */ -l_ok -pixcmapSetAlpha(PIXCMAP *cmap, - l_int32 index, - l_int32 aval) -{ -RGBA_QUAD *cta; - - PROCNAME("pixcmapSetAlpha"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - if (index < 0 || index >= cmap->n) - return ERROR_INT("index out of bounds", procName, 1); - - cta = (RGBA_QUAD *)cmap->array; - cta[index].alpha = aval; - return 0; -} - - -/*! - * \brief pixcmapGetIndex() - * - * \param[in] cmap - * \param[in] rval, gval, bval colormap colors to search for; each number - * is in range [0, ... 255] - * \param[out] pindex value of index found - * \return 0 if found, 1 if not found caller must check - */ -l_int32 -pixcmapGetIndex(PIXCMAP *cmap, - l_int32 rval, - l_int32 gval, - l_int32 bval, - l_int32 *pindex) -{ -l_int32 n, i; -RGBA_QUAD *cta; - - PROCNAME("pixcmapGetIndex"); - - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - *pindex = 0; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - n = pixcmapGetCount(cmap); - - cta = (RGBA_QUAD *)cmap->array; - for (i = 0; i < n; i++) { - if (rval == cta[i].red && - gval == cta[i].green && - bval == cta[i].blue) { - *pindex = i; - return 0; - } - } - return 1; -} - - -/*! - * \brief pixcmapHasColor() - * - * \param[in] cmap - * \param[out] pcolor TRUE if cmap has color; FALSE otherwise - * \return 0 if OK, 1 on error - */ -l_ok -pixcmapHasColor(PIXCMAP *cmap, - l_int32 *pcolor) -{ -l_int32 n, i; -l_int32 *rmap, *gmap, *bmap; - - PROCNAME("pixcmapHasColor"); - - if (!pcolor) - return ERROR_INT("&color not defined", procName, 1); - *pcolor = FALSE; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - if (pixcmapToArrays(cmap, &rmap, &gmap, &bmap, NULL)) - return ERROR_INT("colormap arrays not made", procName, 1); - n = pixcmapGetCount(cmap); - for (i = 0; i < n; i++) { - if ((rmap[i] != gmap[i]) || (rmap[i] != bmap[i])) { - *pcolor = TRUE; - break; - } - } - - LEPT_FREE(rmap); - LEPT_FREE(gmap); - LEPT_FREE(bmap); - return 0; -} - - -/*! - * \brief pixcmapIsOpaque() - * - * \param[in] cmap - * \param[out] popaque TRUE if fully opaque: all entries are 255 - * \return 0 if OK, 1 on error - */ -l_ok -pixcmapIsOpaque(PIXCMAP *cmap, - l_int32 *popaque) -{ -l_int32 i, n; -RGBA_QUAD *cta; - - PROCNAME("pixcmapIsOpaque"); - - if (!popaque) - return ERROR_INT("&opaque not defined", procName, 1); - *popaque = TRUE; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - n = pixcmapGetCount(cmap); - cta = (RGBA_QUAD *)cmap->array; - for (i = 0; i < n; i++) { - if (cta[i].alpha != 255) { - *popaque = FALSE; - break; - } - } - return 0; -} - - -/*! - * \brief pixcmapIsBlackAndWhite() - * - * \param[in] cmap - * \param[out] pblackwhite TRUE if the cmap has only two colors: - * black (0,0,0) and white (255,255,255) - * \return 0 if OK, 1 on error - */ -l_ok -pixcmapIsBlackAndWhite(PIXCMAP *cmap, - l_int32 *pblackwhite) -{ -l_int32 val0, val1, hascolor; -RGBA_QUAD *cta; - - PROCNAME("pixcmapIsBlackAndWhite"); - - if (!pblackwhite) - return ERROR_INT("&blackwhite not defined", procName, 1); - *pblackwhite = FALSE; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - if (pixcmapGetCount(cmap) != 2) - return 0; - - pixcmapHasColor(cmap, &hascolor); - if (hascolor) return 0; - - cta = (RGBA_QUAD *)cmap->array; - val0 = cta[0].red; - val1 = cta[1].red; - if ((val0 == 0 && val1 == 255) || (val0 == 255 && val1 == 0)) - *pblackwhite = TRUE; - return 0; -} - - -/*! - * \brief pixcmapCountGrayColors() - * - * \param[in] cmap - * \param[out] pngray number of gray colors - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This counts the unique gray colors, including black and white.
- * 
- */ -l_ok -pixcmapCountGrayColors(PIXCMAP *cmap, - l_int32 *pngray) -{ -l_int32 n, i, rval, gval, bval, count; -l_int32 *array; - - PROCNAME("pixcmapCountGrayColors"); - - if (!pngray) - return ERROR_INT("&ngray not defined", procName, 1); - *pngray = 0; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - array = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - n = pixcmapGetCount(cmap); - count = 0; - for (i = 0; i < n; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - if ((rval == gval) && (rval == bval) && (array[rval] == 0)) { - array[rval] = 1; - count++; - } - } - - LEPT_FREE(array); - *pngray = count; - return 0; -} - - -/*! - * \brief pixcmapGetRankIntensity() - * - * \param[in] cmap - * \param[in] rankval 0.0 for darkest, 1.0 for lightest color - * \param[out] pindex the index into the colormap that corresponds - * to the rank intensity color - * \return 0 if OK, 1 on error - */ -l_ok -pixcmapGetRankIntensity(PIXCMAP *cmap, - l_float32 rankval, - l_int32 *pindex) -{ -l_int32 n, i, rval, gval, bval, rankindex; -NUMA *na, *nasort; - - PROCNAME("pixcmapGetRankIntensity"); - - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - *pindex = 0; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - if (rankval < 0.0 || rankval > 1.0) - return ERROR_INT("rankval not in [0.0 ... 1.0]", procName, 1); - - n = pixcmapGetCount(cmap); - na = numaCreate(n); - for (i = 0; i < n; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - numaAddNumber(na, rval + gval + bval); - } - nasort = numaGetSortIndex(na, L_SORT_INCREASING); - rankindex = (l_int32)(rankval * (n - 1) + 0.5); - numaGetIValue(nasort, rankindex, pindex); - - numaDestroy(&na); - numaDestroy(&nasort); - return 0; -} - - -/*! - * \brief pixcmapGetNearestIndex() - * - * \param[in] cmap - * \param[in] rval, gval, bval colormap colors to search for; each number - * is in range [0, ... 255] - * \param[out] pindex the index of the nearest color - * \return 0 if OK, 1 on error caller must check - * - *
- * Notes:
- *      (1) Returns the index of the exact color if possible, otherwise the
- *          index of the color closest to the target color.
- *      (2) Nearest color is that which is the least sum-of-squares distance
- *          from the target color.
- * 
- */ -l_ok -pixcmapGetNearestIndex(PIXCMAP *cmap, - l_int32 rval, - l_int32 gval, - l_int32 bval, - l_int32 *pindex) -{ -l_int32 i, n, delta, dist, mindist; -RGBA_QUAD *cta; - - PROCNAME("pixcmapGetNearestIndex"); - - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - *pindex = UNDEF; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - if ((cta = (RGBA_QUAD *)cmap->array) == NULL) - return ERROR_INT("cta not defined(!)", procName, 1); - n = pixcmapGetCount(cmap); - - mindist = 3 * 255 * 255 + 1; - for (i = 0; i < n; i++) { - delta = cta[i].red - rval; - dist = delta * delta; - delta = cta[i].green - gval; - dist += delta * delta; - delta = cta[i].blue - bval; - dist += delta * delta; - if (dist < mindist) { - *pindex = i; - if (dist == 0) - break; - mindist = dist; - } - } - - return 0; -} - - -/*! - * \brief pixcmapGetNearestGrayIndex() - * - * \param[in] cmap - * \param[in] val gray value to search for; in range [0, ... 255] - * \param[out] pindex the index of the nearest color - * \return 0 if OK, 1 on error caller must check - * - *
- * Notes:
- *      (1) This should be used on gray colormaps.  It uses only the
- *          green value of the colormap.
- *      (2) Returns the index of the exact color if possible, otherwise the
- *          index of the color closest to the target color.
- * 
- */ -l_ok -pixcmapGetNearestGrayIndex(PIXCMAP *cmap, - l_int32 val, - l_int32 *pindex) -{ -l_int32 i, n, dist, mindist; -RGBA_QUAD *cta; - - PROCNAME("pixcmapGetNearestGrayIndex"); - - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - *pindex = 0; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - if (val < 0 || val > 255) - return ERROR_INT("val not in [0 ... 255]", procName, 1); - - if ((cta = (RGBA_QUAD *)cmap->array) == NULL) - return ERROR_INT("cta not defined(!)", procName, 1); - n = pixcmapGetCount(cmap); - - mindist = 256; - for (i = 0; i < n; i++) { - dist = cta[i].green - val; - dist = L_ABS(dist); - if (dist < mindist) { - *pindex = i; - if (dist == 0) - break; - mindist = dist; - } - } - - return 0; -} - - -/*! - * \brief pixcmapGetDistanceToColor() - * - * \param[in] cmap - * \param[in] index - * \param[in] rval, gval, bval target color - * \param[out] pdist the distance from the cmap entry to target - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Returns the L2 distance (squared) between the color at index i
- *          and the target color.
- * 
- */ -l_ok -pixcmapGetDistanceToColor(PIXCMAP *cmap, - l_int32 index, - l_int32 rval, - l_int32 gval, - l_int32 bval, - l_int32 *pdist) -{ -l_int32 n, delta, dist; -RGBA_QUAD *cta; - - PROCNAME("pixcmapGetDistanceToColor"); - - if (!pdist) - return ERROR_INT("&dist not defined", procName, 1); - *pdist = UNDEF; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - n = pixcmapGetCount(cmap); - if (index >= n) - return ERROR_INT("invalid index", procName, 1); - - if ((cta = (RGBA_QUAD *)cmap->array) == NULL) - return ERROR_INT("cta not defined(!)", procName, 1); - - delta = cta[index].red - rval; - dist = delta * delta; - delta = cta[index].green - gval; - dist += delta * delta; - delta = cta[index].blue - bval; - dist += delta * delta; - *pdist = dist; - - return 0; -} - - -/*! - * \brief pixcmapGetRangeValues() - * - * \param[in] cmap - * \param[in] select L_SELECT_RED, L_SELECT_GREEN, L_SELECT_BLUE or - * L_SELECT_AVERAGE - * \param[out] pminval [optional] minimum value of component - * \param[out] pmaxval [optional] maximum value of component - * \param[out] pminindex [optional] index of minimum value - * \param[out] pmaxindex [optional] index of maximum value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Returns, for selected components (or the average), the
- *          the extreme values (min and/or max) and their indices
- *          that are found in the cmap.
- * 
- */ -l_ok -pixcmapGetRangeValues(PIXCMAP *cmap, - l_int32 select, - l_int32 *pminval, - l_int32 *pmaxval, - l_int32 *pminindex, - l_int32 *pmaxindex) -{ -l_int32 i, n, imin, imax, minval, maxval, rval, gval, bval, aveval; - - PROCNAME("pixcmapGetRangeValues"); - - if (pminval) *pminval = UNDEF; - if (pmaxval) *pmaxval = UNDEF; - if (pminindex) *pminindex = UNDEF; - if (pmaxindex) *pmaxindex = UNDEF; - if (!pminval && !pmaxval && !pminindex && !pmaxindex) - return ERROR_INT("no result requested", procName, 1); - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - imin = UNDEF; - imax = UNDEF; - minval = 100000; - maxval = -1; - n = pixcmapGetCount(cmap); - for (i = 0; i < n; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - if (select == L_SELECT_RED) { - if (rval < minval) { - minval = rval; - imin = i; - } - if (rval > maxval) { - maxval = rval; - imax = i; - } - } else if (select == L_SELECT_GREEN) { - if (gval < minval) { - minval = gval; - imin = i; - } - if (gval > maxval) { - maxval = gval; - imax = i; - } - } else if (select == L_SELECT_BLUE) { - if (bval < minval) { - minval = bval; - imin = i; - } - if (bval > maxval) { - maxval = bval; - imax = i; - } - } else if (select == L_SELECT_AVERAGE) { - aveval = (rval + gval + bval) / 3; - if (aveval < minval) { - minval = aveval; - imin = i; - } - if (aveval > maxval) { - maxval = aveval; - imax = i; - } - } else { - return ERROR_INT("invalid selection", procName, 1); - } - } - - if (pminval) *pminval = minval; - if (pmaxval) *pmaxval = maxval; - if (pminindex) *pminindex = imin; - if (pmaxindex) *pmaxindex = imax; - return 0; -} - - -/*-------------------------------------------------------------* - * Colormap conversion * - *-------------------------------------------------------------*/ -/*! - * \brief pixcmapGrayToColor() - * - * \param[in] color - * \return cmap, or NULL on error - * - *
- * Notes:
- *      (1) This creates a colormap that maps from gray to
- *          a specific color.  In the mapping, each component
- *          is faded to white, depending on the gray value.
- *      (2) In use, this is simply attached to a grayscale pix
- *          to give it the input color.
- * 
- */ -PIXCMAP * -pixcmapGrayToColor(l_uint32 color) -{ -l_int32 i, rval, gval, bval; -PIXCMAP *cmap; - - extractRGBValues(color, &rval, &gval, &bval); - cmap = pixcmapCreate(8); - for (i = 0; i < 256; i++) { - pixcmapAddColor(cmap, rval + (i * (255 - rval)) / 255, - gval + (i * (255 - gval)) / 255, - bval + (i * (255 - bval)) / 255); - } - - return cmap; -} - - -/*! - * \brief pixcmapColorToGray() - * - * \param[in] cmaps - * \param[in] rwt, gwt, bwt non-negative; these should add to 1.0 - * \return cmap gray, or NULL on error - * - *
- * Notes:
- *      (1) This creates a gray colormap from an arbitrary colormap.
- *      (2) In use, attach the output gray colormap to the pix
- *          (or a copy of it) that provided the input colormap.
- * 
- */ -PIXCMAP * -pixcmapColorToGray(PIXCMAP *cmaps, - l_float32 rwt, - l_float32 gwt, - l_float32 bwt) -{ -l_int32 i, n, rval, gval, bval, val; -l_float32 sum; -PIXCMAP *cmapd; - - PROCNAME("pixcmapColorToGray"); - - if (!cmaps) - return (PIXCMAP *)ERROR_PTR("cmaps not defined", procName, NULL); - if (rwt < 0.0 || gwt < 0.0 || bwt < 0.0) - return (PIXCMAP *)ERROR_PTR("weights not all >= 0.0", procName, NULL); - - /* Make sure the sum of weights is 1.0; otherwise, you can get - * overflow in the gray value. */ - sum = rwt + gwt + bwt; - if (sum == 0.0) { - L_WARNING("all weights zero; setting equal to 1/3\n", procName); - rwt = gwt = bwt = 0.33333; - sum = 1.0; - } - if (L_ABS(sum - 1.0) > 0.0001) { /* maintain ratios with sum == 1.0 */ - L_WARNING("weights don't sum to 1; maintaining ratios\n", procName); - rwt = rwt / sum; - gwt = gwt / sum; - bwt = bwt / sum; - } - - if ((cmapd = pixcmapCopy(cmaps)) == NULL) - return (PIXCMAP *)ERROR_PTR("cmapd not made", procName, NULL); - n = pixcmapGetCount(cmapd); - for (i = 0; i < n; i++) { - pixcmapGetColor(cmapd, i, &rval, &gval, &bval); - val = (l_int32)(rwt * rval + gwt * gval + bwt * bval + 0.5); - pixcmapResetColor(cmapd, i, val, val, val); - } - - return cmapd; -} - - -/*! - * \brief pixcmapConvertTo4() - * - * \param[in] cmaps colormap for 2 bpp pix - * \return cmapd (4 bpp) - * - *
- * Notes:
- *      (1) This converts a 2 bpp colormap to 4 bpp.  The colors
- *          are the same; the output colormap entry array has size 16.
- * 
- */ -PIXCMAP * -pixcmapConvertTo4(PIXCMAP *cmaps) -{ -l_int32 i, n, rval, gval, bval; -PIXCMAP *cmapd; - - PROCNAME("pixcmapConvertTo4"); - - if (!cmaps) - return (PIXCMAP *)ERROR_PTR("cmaps not defined", procName, NULL); - if (pixcmapGetDepth(cmaps) != 2) - return (PIXCMAP *)ERROR_PTR("cmaps not for 2 bpp pix", procName, NULL); - - cmapd = pixcmapCreate(4); - n = pixcmapGetCount(cmaps); - for (i = 0; i < n; i++) { - pixcmapGetColor(cmaps, i, &rval, &gval, &bval); - pixcmapAddColor(cmapd, rval, gval, bval); - } - return cmapd; -} - - -/*! - * \brief pixcmapConvertTo8() - * - * \param[in] cmaps colormap for 2 bpp or 4 bpp pix - * \return cmapd (8 bpp) - * - *
- * Notes:
- *      (1) This converts a 2 bpp or 4 bpp colormap to 8 bpp.  The colors
- *          are the same; the output colormap entry array has size 256.
- * 
- */ -PIXCMAP * -pixcmapConvertTo8(PIXCMAP *cmaps) -{ -l_int32 i, n, depth, rval, gval, bval; -PIXCMAP *cmapd; - - PROCNAME("pixcmapConvertTo8"); - - if (!cmaps) - return (PIXCMAP *)ERROR_PTR("cmaps not defined", procName, NULL); - depth = pixcmapGetDepth(cmaps); - if (depth == 8) return pixcmapCopy(cmaps); - if (depth != 2 && depth != 4) - return (PIXCMAP *)ERROR_PTR("cmaps not 2 or 4 bpp", procName, NULL); - - cmapd = pixcmapCreate(8); - n = pixcmapGetCount(cmaps); - for (i = 0; i < n; i++) { - pixcmapGetColor(cmaps, i, &rval, &gval, &bval); - pixcmapAddColor(cmapd, rval, gval, bval); - } - return cmapd; -} - - -/*-------------------------------------------------------------* - * Colormap I/O * - *-------------------------------------------------------------*/ -/*! - * \brief pixcmapRead() - * - * \param[in] filename - * \return cmap, or NULL on error - */ -PIXCMAP * -pixcmapRead(const char *filename) -{ -FILE *fp; -PIXCMAP *cmap; - - PROCNAME("pixcmapRead"); - - if (!filename) - return (PIXCMAP *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (PIXCMAP *)ERROR_PTR("stream not opened", procName, NULL); - cmap = pixcmapReadStream(fp); - fclose(fp); - if (!cmap) - return (PIXCMAP *)ERROR_PTR("cmap not read", procName, NULL); - return cmap; -} - - -/*! - * \brief pixcmapReadStream() - * - * \param[in] fp file stream - * \return cmap, or NULL on error - */ -PIXCMAP * -pixcmapReadStream(FILE *fp) -{ -l_int32 rval, gval, bval, aval, ignore; -l_int32 i, index, ret, depth, ncolors; -PIXCMAP *cmap; - - PROCNAME("pixcmapReadStream"); - - if (!fp) - return (PIXCMAP *)ERROR_PTR("stream not defined", procName, NULL); - - ret = fscanf(fp, "\nPixcmap: depth = %d bpp; %d colors\n", - &depth, &ncolors); - if (ret != 2 || - (depth != 1 && depth != 2 && depth != 4 && depth != 8) || - (ncolors < 2 || ncolors > 256)) - return (PIXCMAP *)ERROR_PTR("invalid cmap size", procName, NULL); - ignore = fscanf(fp, "Color R-val G-val B-val Alpha\n"); - ignore = fscanf(fp, "----------------------------------------\n"); - - cmap = pixcmapCreate(depth); - for (i = 0; i < ncolors; i++) { - if (fscanf(fp, "%3d %3d %3d %3d %3d\n", - &index, &rval, &gval, &bval, &aval) != 5) { - pixcmapDestroy(&cmap); - return (PIXCMAP *)ERROR_PTR("invalid entry", procName, NULL); - } - pixcmapAddRGBA(cmap, rval, gval, bval, aval); - } - return cmap; -} - - -/*! - * \brief pixcmapReadMem() - * - * \param[in] data serialization of pixcmap; in ascii - * \param[in] size of data in bytes; can use strlen to get it - * \return cmap, or NULL on error - */ -PIXCMAP * -pixcmapReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -PIXCMAP *cmap; - - PROCNAME("pixcmapReadMem"); - - if (!data) - return (PIXCMAP *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (PIXCMAP *)ERROR_PTR("stream not opened", procName, NULL); - - cmap = pixcmapReadStream(fp); - fclose(fp); - if (!cmap) L_ERROR("cmap not read\n", procName); - return cmap; -} - - -/*! - * \brief pixcmapWrite() - * - * \param[in] filename - * \param[in] cmap - * \return 0 if OK, 1 on error - */ -l_ok -pixcmapWrite(const char *filename, - const PIXCMAP *cmap) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("pixcmapWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "w")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = pixcmapWriteStream(fp, cmap); - fclose(fp); - if (ret) - return ERROR_INT("cmap not written to stream", procName, 1); - return 0; -} - - - -/*! - * \brief pixcmapWriteStream() - * - * \param[in] fp file stream - \param[in] cmap - * \return 0 if OK, 1 on error - */ -l_ok -pixcmapWriteStream(FILE *fp, - const PIXCMAP *cmap) -{ -l_int32 *rmap, *gmap, *bmap, *amap; -l_int32 i; - - PROCNAME("pixcmapWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - if (pixcmapToArrays(cmap, &rmap, &gmap, &bmap, &amap)) - return ERROR_INT("colormap arrays not made", procName, 1); - - fprintf(fp, "\nPixcmap: depth = %d bpp; %d colors\n", cmap->depth, cmap->n); - fprintf(fp, "Color R-val G-val B-val Alpha\n"); - fprintf(fp, "----------------------------------------\n"); - for (i = 0; i < cmap->n; i++) - fprintf(fp, "%3d %3d %3d %3d %3d\n", - i, rmap[i], gmap[i], bmap[i], amap[i]); - fprintf(fp, "\n"); - - LEPT_FREE(rmap); - LEPT_FREE(gmap); - LEPT_FREE(bmap); - LEPT_FREE(amap); - return 0; -} - - -/*! - * \brief pixcmapWriteMem() - * - * \param[out] pdata data of serialized pixcmap; ascii - * \param[out] psize size of returned data - * \param[in] cmap - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes a pixcmap in memory and puts the result in a buffer.
- * 
- */ -l_ok -pixcmapWriteMem(l_uint8 **pdata, - size_t *psize, - const PIXCMAP *cmap) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("pixcmapWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = pixcmapWriteStream(fp, cmap); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = pixcmapWriteStream(fp, cmap); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - -/*----------------------------------------------------------------------* - * Extract colormap arrays and serialization * - *----------------------------------------------------------------------*/ -/*! - * \brief pixcmapToArrays() - * - * \param[in] cmap colormap - * \param[out] prmap, pgmap, pbmap colormap arrays - * \param[out] pamap [optional] alpha array - * \return 0 if OK; 1 on error - */ -l_ok -pixcmapToArrays(const PIXCMAP *cmap, - l_int32 **prmap, - l_int32 **pgmap, - l_int32 **pbmap, - l_int32 **pamap) -{ -l_int32 *rmap, *gmap, *bmap, *amap; -l_int32 i, ncolors; -RGBA_QUAD *cta; - - PROCNAME("pixcmapToArrays"); - - if (!prmap || !pgmap || !pbmap) - return ERROR_INT("&rmap, &gmap, &bmap not all defined", procName, 1); - *prmap = *pgmap = *pbmap = NULL; - if (pamap) *pamap = NULL; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - ncolors = pixcmapGetCount(cmap); - rmap = (l_int32 *)LEPT_CALLOC(ncolors, sizeof(l_int32)); - gmap = (l_int32 *)LEPT_CALLOC(ncolors, sizeof(l_int32)); - bmap = (l_int32 *)LEPT_CALLOC(ncolors, sizeof(l_int32)); - *prmap = rmap; - *pgmap = gmap; - *pbmap = bmap; - if (pamap) { - amap = (l_int32 *)LEPT_CALLOC(ncolors, sizeof(l_int32)); - *pamap = amap; - } - - cta = (RGBA_QUAD *)cmap->array; - for (i = 0; i < ncolors; i++) { - rmap[i] = cta[i].red; - gmap[i] = cta[i].green; - bmap[i] = cta[i].blue; - if (pamap) - amap[i] = cta[i].alpha; - } - - return 0; -} - - -/*! - * \brief pixcmapToRGBTable() - * - * \param[in] cmap colormap - * \param[out] ptab table of rgba values for the colormap - * \param[out] pncolors [optional] size of table - * \return 0 if OK; 1 on error - */ -l_ok -pixcmapToRGBTable(PIXCMAP *cmap, - l_uint32 **ptab, - l_int32 *pncolors) -{ -l_int32 i, ncolors, rval, gval, bval, aval; -l_uint32 *tab; - - PROCNAME("pixcmapToRGBTable"); - - if (!ptab) - return ERROR_INT("&tab not defined", procName, 1); - *ptab = NULL; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - ncolors = pixcmapGetCount(cmap); - if (pncolors) *pncolors = ncolors; - tab = (l_uint32 *)LEPT_CALLOC(ncolors, sizeof(l_uint32)); - *ptab = tab; - - for (i = 0; i < ncolors; i++) { - pixcmapGetRGBA(cmap, i, &rval, &gval, &bval, &aval); - composeRGBAPixel(rval, gval, bval, aval, &tab[i]); - } - return 0; -} - - -/*! - * \brief pixcmapSerializeToMemory() - * - * \param[in] cmap colormap - * \param[in] cpc components/color: 3 for rgb, 4 for rgba - * \param[out] pncolors number of colors in table - * \param[out] pdata binary string, cpc bytes per color - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) When serializing to store in a pdf, use %cpc = 3.
- * 
- */ -l_ok -pixcmapSerializeToMemory(PIXCMAP *cmap, - l_int32 cpc, - l_int32 *pncolors, - l_uint8 **pdata) -{ -l_int32 i, ncolors, rval, gval, bval, aval; -l_uint8 *data; - - PROCNAME("pixcmapSerializeToMemory"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pncolors) - return ERROR_INT("&ncolors not defined", procName, 1); - *pncolors = 0; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - if (cpc != 3 && cpc != 4) - return ERROR_INT("cpc not 3 or 4", procName, 1); - - ncolors = pixcmapGetCount(cmap); - *pncolors = ncolors; - data = (l_uint8 *)LEPT_CALLOC((size_t)cpc * ncolors, sizeof(l_uint8)); - *pdata = data; - - for (i = 0; i < ncolors; i++) { - pixcmapGetRGBA(cmap, i, &rval, &gval, &bval, &aval); - data[cpc * i] = rval; - data[cpc * i + 1] = gval; - data[cpc * i + 2] = bval; - if (cpc == 4) - data[cpc * i + 3] = aval; - } - return 0; -} - - -/*! - * \brief pixcmapDeserializeFromMemory() - * - * \param[in] data binary string, 3 or 4 bytes per color - * \param[in] cpc components/color: 3 for rgb, 4 for rgba - * \param[in] ncolors - * \return cmap, or NULL on error - */ -PIXCMAP * -pixcmapDeserializeFromMemory(l_uint8 *data, - l_int32 cpc, - l_int32 ncolors) -{ -l_int32 i, d, rval, gval, bval, aval; -PIXCMAP *cmap; - - PROCNAME("pixcmapDeserializeFromMemory"); - - if (!data) - return (PIXCMAP *)ERROR_PTR("data not defined", procName, NULL); - if (cpc != 3 && cpc != 4) - return (PIXCMAP *)ERROR_PTR("cpc not 3 or 4", procName, NULL); - if (ncolors == 0) - return (PIXCMAP *)ERROR_PTR("no entries", procName, NULL); - if (ncolors > 256) - return (PIXCMAP *)ERROR_PTR("ncolors > 256", procName, NULL); - - if (ncolors > 16) - d = 8; - else if (ncolors > 4) - d = 4; - else if (ncolors > 2) - d = 2; - else - d = 1; - cmap = pixcmapCreate(d); - for (i = 0; i < ncolors; i++) { - rval = data[cpc * i]; - gval = data[cpc * i + 1]; - bval = data[cpc * i + 2]; - if (cpc == 4) - aval = data[cpc * i + 3]; - else - aval = 255; /* opaque */ - pixcmapAddRGBA(cmap, rval, gval, bval, aval); - } - - return cmap; -} - - -/*! - * \brief pixcmapConvertToHex() - * - * \param[in] data binary serialized data - * \param[in] ncolors in colormap - * \return hexdata bracketed, space-separated ascii hex string, - * or NULL on error. - * - *
- * Notes:
- *      (1) The number of bytes in %data is 3 * ncolors.
- *      (2) Output is in form:
- *             < r0g0b0 r1g1b1 ... rngnbn >
- *          where r0, g0, b0 ... are each 2 bytes of hex ascii
- *      (3) This is used in pdf files to express the colormap as an
- *          array in ascii (human-readable) format.
- * 
- */ -char * -pixcmapConvertToHex(l_uint8 *data, - l_int32 ncolors) -{ -l_int32 i, j, hexbytes; -char *hexdata = NULL; -char buf[4]; - - PROCNAME("pixcmapConvertToHex"); - - if (!data) - return (char *)ERROR_PTR("data not defined", procName, NULL); - if (ncolors < 1) - return (char *)ERROR_PTR("no colors", procName, NULL); - - hexbytes = 2 + (2 * 3 + 1) * ncolors + 2; - hexdata = (char *)LEPT_CALLOC(hexbytes, sizeof(char)); - hexdata[0] = '<'; - hexdata[1] = ' '; - - for (i = 0; i < ncolors; i++) { - j = 2 + (2 * 3 + 1) * i; - snprintf(buf, sizeof(buf), "%02x", data[3 * i]); - hexdata[j] = buf[0]; - hexdata[j + 1] = buf[1]; - snprintf(buf, sizeof(buf), "%02x", data[3 * i + 1]); - hexdata[j + 2] = buf[0]; - hexdata[j + 3] = buf[1]; - snprintf(buf, sizeof(buf), "%02x", data[3 * i + 2]); - hexdata[j + 4] = buf[0]; - hexdata[j + 5] = buf[1]; - hexdata[j + 6] = ' '; - } - hexdata[j + 7] = '>'; - hexdata[j + 8] = '\0'; - return hexdata; -} - - -/*-------------------------------------------------------------* - * Colormap transforms * - *-------------------------------------------------------------*/ -/*! - * \brief pixcmapGammaTRC() - * - * \param[in] cmap colormap - * \param[in] gamma gamma correction; must be > 0.0 - * \param[in] minval input value that gives 0 for output; can be < 0 - * \param[in] maxval input value that gives 255 for output; can be > 255 - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This is an in-place transform
- *      (2) See pixGammaTRC() and numaGammaTRC() in enhance.c
- *          for description and use of transform
- * 
- */ -l_ok -pixcmapGammaTRC(PIXCMAP *cmap, - l_float32 gamma, - l_int32 minval, - l_int32 maxval) -{ -l_int32 rval, gval, bval, trval, tgval, tbval, i, ncolors; -NUMA *nag; - - PROCNAME("pixcmapGammaTRC"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - if (gamma <= 0.0) { - L_WARNING("gamma must be > 0.0; setting to 1.0\n", procName); - gamma = 1.0; - } - if (minval >= maxval) - return ERROR_INT("minval not < maxval", procName, 1); - - if (gamma == 1.0 && minval == 0 && maxval == 255) /* no-op */ - return 0; - - if ((nag = numaGammaTRC(gamma, minval, maxval)) == NULL) - return ERROR_INT("nag not made", procName, 1); - - ncolors = pixcmapGetCount(cmap); - for (i = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - numaGetIValue(nag, rval, &trval); - numaGetIValue(nag, gval, &tgval); - numaGetIValue(nag, bval, &tbval); - pixcmapResetColor(cmap, i, trval, tgval, tbval); - } - - numaDestroy(&nag); - return 0; -} - - -/*! - * \brief pixcmapContrastTRC() - * - * \param[in] cmap colormap - * \param[in] factor generally between 0.0 [no enhancement] - * and 1.0, but can be larger than 1.0 - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This is an in-place transform
- *      (2) See pixContrastTRC() and numaContrastTRC() in enhance.c
- *          for description and use of transform
- * 
- */ -l_ok -pixcmapContrastTRC(PIXCMAP *cmap, - l_float32 factor) -{ -l_int32 i, ncolors, rval, gval, bval, trval, tgval, tbval; -NUMA *nac; - - PROCNAME("pixcmapContrastTRC"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - if (factor < 0.0) { - L_WARNING("factor must be >= 0.0; setting to 0.0\n", procName); - factor = 0.0; - } - - if ((nac = numaContrastTRC(factor)) == NULL) - return ERROR_INT("nac not made", procName, 1); - - ncolors = pixcmapGetCount(cmap); - for (i = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - numaGetIValue(nac, rval, &trval); - numaGetIValue(nac, gval, &tgval); - numaGetIValue(nac, bval, &tbval); - pixcmapResetColor(cmap, i, trval, tgval, tbval); - } - - numaDestroy(&nac); - return 0; -} - - -/*! - * \brief pixcmapShiftIntensity() - * - * \param[in] cmap colormap - * \param[in] fraction between -1.0 and +1.0 - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This is an in-place transform
- *      (2) It does a proportional shift of the intensity for each color.
- *      (3) If fraction < 0.0, it moves all colors towards (0,0,0).
- *          This darkens the image.
- *          If fraction > 0.0, it moves all colors towards (255,255,255)
- *          This fades the image.
- *      (4) The equivalent transform can be accomplished with pixcmapGammaTRC(),
- *          but it is considerably more difficult (see numaGammaTRC()).
- * 
- */ -l_ok -pixcmapShiftIntensity(PIXCMAP *cmap, - l_float32 fraction) -{ -l_int32 i, ncolors, rval, gval, bval; - - PROCNAME("pixcmapShiftIntensity"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - if (fraction < -1.0 || fraction > 1.0) - return ERROR_INT("fraction not in [-1.0, 1.0]", procName, 1); - - ncolors = pixcmapGetCount(cmap); - for (i = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - if (fraction < 0.0) - pixcmapResetColor(cmap, i, - (l_int32)((1.0 + fraction) * rval), - (l_int32)((1.0 + fraction) * gval), - (l_int32)((1.0 + fraction) * bval)); - else - pixcmapResetColor(cmap, i, - rval + (l_int32)(fraction * (255 - rval)), - gval + (l_int32)(fraction * (255 - gval)), - bval + (l_int32)(fraction * (255 - bval))); - } - - return 0; -} - - -/*! - * \brief pixcmapShiftByComponent() - * - * \param[in] cmap colormap - * \param[in] srcval source color: 0xrrggbb00 - * \param[in] dstval target color: 0xrrggbb00 - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This is an in-place transform
- *      (2) It implements pixelShiftByComponent() for each color.
- *          The mapping is specified by srcval and dstval.
- *      (3) If a component decreases, the component in the colormap
- *          decreases by the same ratio.  Likewise for increasing, except
- *          all ratios are taken with respect to the distance from 255.
- * 
- */ -l_ok -pixcmapShiftByComponent(PIXCMAP *cmap, - l_uint32 srcval, - l_uint32 dstval) -{ -l_int32 i, ncolors, rval, gval, bval; -l_uint32 newval; - - PROCNAME("pixcmapShiftByComponent"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - ncolors = pixcmapGetCount(cmap); - for (i = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - pixelShiftByComponent(rval, gval, bval, srcval, dstval, &newval); - extractRGBValues(newval, &rval, &gval, &bval); - pixcmapResetColor(cmap, i, rval, gval, bval); - } - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colormorph.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colormorph.c deleted file mode 100644 index e59b7891..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colormorph.c +++ /dev/null @@ -1,128 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file colormorph.c - *
- *
- *      Top-level color morphological operations
- *
- *            PIX     *pixColorMorph()
- *
- *      Method: Algorithm by van Herk and Gil and Werman, 1992
- *              Apply grayscale morphological operations separately
- *              to each component.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/*-----------------------------------------------------------------* - * Top-level color morphological operations * - *-----------------------------------------------------------------*/ -/*! - * \brief pixColorMorph() - * - * \param[in] pixs - * \param[in] type L_MORPH_DILATE, L_MORPH_ERODE, L_MORPH_OPEN, - * or L_MORPH_CLOSE - * \param[in] hsize width of Sel; must be odd; origin implicitly in center - * \param[in] vsize ditto for height of Sel - * \return pixd - * - *
- * Notes:
- *      (1) This does the morph operation on each component separately,
- *          and recombines the result.
- *      (2) Sel is a brick with all elements being hits.
- *      (3) If hsize = vsize = 1, just returns a copy.
- * 
- */ -PIX * -pixColorMorph(PIX *pixs, - l_int32 type, - l_int32 hsize, - l_int32 vsize) -{ -PIX *pixr, *pixg, *pixb, *pixrm, *pixgm, *pixbm, *pixd; - - PROCNAME("pixColorMorph"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (type != L_MORPH_DILATE && type != L_MORPH_ERODE && - type != L_MORPH_OPEN && type != L_MORPH_CLOSE) - return (PIX *)ERROR_PTR("invalid morph type", procName, NULL); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize or vsize < 1", procName, NULL); - if ((hsize & 1) == 0 ) { - L_WARNING("horiz sel size must be odd; increasing by 1\n", procName); - hsize++; - } - if ((vsize & 1) == 0 ) { - L_WARNING("vert sel size must be odd; increasing by 1\n", procName); - vsize++; - } - - if (hsize == 1 && vsize == 1) - return pixCopy(NULL, pixs); - - pixr = pixGetRGBComponent(pixs, COLOR_RED); - pixg = pixGetRGBComponent(pixs, COLOR_GREEN); - pixb = pixGetRGBComponent(pixs, COLOR_BLUE); - if (type == L_MORPH_DILATE) { - pixrm = pixDilateGray(pixr, hsize, vsize); - pixgm = pixDilateGray(pixg, hsize, vsize); - pixbm = pixDilateGray(pixb, hsize, vsize); - } else if (type == L_MORPH_ERODE) { - pixrm = pixErodeGray(pixr, hsize, vsize); - pixgm = pixErodeGray(pixg, hsize, vsize); - pixbm = pixErodeGray(pixb, hsize, vsize); - } else if (type == L_MORPH_OPEN) { - pixrm = pixOpenGray(pixr, hsize, vsize); - pixgm = pixOpenGray(pixg, hsize, vsize); - pixbm = pixOpenGray(pixb, hsize, vsize); - } else { /* type == L_MORPH_CLOSE */ - pixrm = pixCloseGray(pixr, hsize, vsize); - pixgm = pixCloseGray(pixg, hsize, vsize); - pixbm = pixCloseGray(pixb, hsize, vsize); - } - pixd = pixCreateRGBImage(pixrm, pixgm, pixbm); - pixDestroy(&pixr); - pixDestroy(&pixrm); - pixDestroy(&pixg); - pixDestroy(&pixgm); - pixDestroy(&pixb); - pixDestroy(&pixbm); - - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colorquant1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colorquant1.c deleted file mode 100644 index 52ddd386..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colorquant1.c +++ /dev/null @@ -1,4155 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file colorquant1.c - *
- *
- *  Octcube color quantization
- *
- *  There are several different octcube/octree based quantizations.
- *  These can be classified, in the order in which they appear in this
- *  file, as follows:
- *
- *  -----------------------------------------------------------------
- *  (1) General adaptive octree
- *  (2) Adaptive octree by population at fixed level
- *  (3) Adaptive octree using population and with specified number
- *      of output colors
- *  (4) Octcube with colormap representation of mixed color/gray
- *  (5) 256 fixed octcubes covering color space
- *  (6) Octcubes at fixed level for ncolors <= 256
- *  (7) Octcubes at fixed level with RGB output
- *  (8) Quantizing an rgb image using a specified colormap
- *  -----------------------------------------------------------------
- *
- *  (1) Two-pass adaptive octree color quantization
- *          PIX              *pixOctreeColorQuant()
- *          PIX              *pixOctreeColorQuantGeneral()
- *
- *        which calls
- *          static CQCELL  ***octreeGenerateAndPrune()
- *          static PIX       *pixOctreeQuantizePixels()
- *
- *        which calls
- *          static l_int32    octreeFindColorCell()
- *
- *      Helper cqcell functions
- *          static CQCELL  ***cqcellTreeCreate()
- *          static void       cqcellTreeDestroy()
- *
- *      Helper index functions
- *          l_int32           makeRGBToIndexTables()
- *          void              getOctcubeIndexFromRGB()
- *          static void       getRGBFromOctcube()
- *          static l_int32    getOctcubeIndices()
- *          static l_int32    octcubeGetCount()
- *
- *  (2) Adaptive octree quantization based on population at a fixed level
- *          PIX              *pixOctreeQuantByPopulation()
- *          static l_int32    pixDitherOctindexWithCmap()
- *
- *  (3) Adaptive octree quantization to 4 and 8 bpp with specified
- *      number of output colors in colormap
- *          PIX              *pixOctreeQuantNumColors()
- *
- *  (4) Mixed color/gray quantization with specified number of colors
- *          PIX              *pixOctcubeQuantMixedWithGray()
- *
- *  (5) Fixed partition octcube quantization with 256 cells
- *          PIX              *pixFixedOctcubeQuant256()
- *
- *  (6) Fixed partition quantization for images with few colors
- *          PIX              *pixFewColorsOctcubeQuant1()
- *          PIX              *pixFewColorsOctcubeQuant2()
- *          PIX              *pixFewColorsOctcubeQuantMixed()
- *
- *  (7) Fixed partition octcube quantization at specified level
- *      with quantized output to RGB
- *          PIX              *pixFixedOctcubeQuantGenRGB()
- *
- *  (8) Color quantize RGB image using existing colormap
- *          PIX              *pixQuantFromCmap()  [high-level wrapper]
- *          PIX              *pixOctcubeQuantFromCmap()
- *          static PIX       *pixOctcubeQuantFromCmapLUT()
- *
- *      Generation of octcube histogram
- *          NUMA             *pixOctcubeHistogram()
- *
- *      Get filled octcube table from colormap
- *          l_int32          *pixcmapToOctcubeLUT()
- *
- *      Strip out unused elements in colormap
- *          l_int32           pixRemoveUnusedColors()
- *
- *      Find number of occupied octcubes at the specified level
- *          l_int32           pixNumberOccupiedOctcubes()
- *
- *  Notes:
- *        Leptonica also provides color quantization using a modified
- *        form of median cut.  See colorquant2.c for details.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/* - *
- *   This data structure is used for pixOctreeColorQuant(),
- *   a color octree that adjusts to the color distribution
- *   in the image that is being quantized.  The best settings
- *   are with CqNLevels = 6 and DITHERING set on.
- *
- * Notes:
- *      (1) the CTE (color table entry) index is sequentially
- *          assigned as the tree is pruned back
- *      (2) if 'bleaf' == 1, all pixels in that cube have been
- *          assigned to one or more CTEs.  But note that if
- *          all 8 subcubes have 'bleaf' == 1, it will have no
- *          pixels left for assignment and will not be a CTE.
- *      (3) 'nleaves', the number of leaves contained at the next
- *          lower level is some number between 0 and 8, inclusive.
- *          If it is zero, it means that all colors within this cube
- *          are part of a single growing cluster that has not yet
- *          been set aside as a leaf.  If 'nleaves' > 0, 'bleaf'
- *          will be set to 1 and all pixels not assigned to leaves
- *          at lower levels will be assigned to a CTE here.
- *          (However, as described above, if all pixels are already
- *          assigned, we set 'bleaf' = 1 but do not create a CTE
- *          at this level.)
- *      (4) To keep the maximum color error to a minimum, we
- *          prune the tree back to level 2, and require that
- *          all 64 level 2 cells are CTEs.
- *      (5) We reserve an extra set of colors to prevent running out
- *          of colors during the assignment of the final 64 level 2 cells.
- *          This is more likely to happen with small images.
- *      (6) When we run out of colors, the dithered image can be very
- *          poor, so we additionally prevent dithering if the image
- *          is small.
- *      (7) The color content of the image is measured, and if there
- *          is very little color, it is quantized in grayscale.
- * 
- */ -struct ColorQuantCell -{ - l_int32 rc, gc, bc; /* center values */ - l_int32 n; /* number of samples in this cell */ - l_int32 index; /* CTE (color table entry) index */ - l_int32 nleaves; /* # of leaves contained at next lower level */ - l_int32 bleaf; /* boolean: 0 if not a leaf, 1 if so */ -}; -typedef struct ColorQuantCell CQCELL; - - /* Constants for pixOctreeColorQuant() */ -static const l_int32 CqNLevels = 5; /* only 4, 5 and 6 are allowed */ -static const l_int32 CqReservedColors = 64; /* to allow for level 2 */ - /* remainder CTEs */ -static const l_int32 ExtraReservedColors = 25; /* to avoid running out */ -static const l_int32 TreeGenWidth = 350; /* big enough for good stats */ -static const l_int32 MinDitherSize = 250; /* don't dither if smaller */ - -/* - *
- *   This data structure is used for pixOctreeQuantNumColors(),
- *   a color octree that adjusts in a simple way to the to the color
- *   distribution in the image that is being quantized.  It outputs
- *   colormapped images, either 4 bpp or 8 bpp, depending on the
- *   max number of colors and the compression desired.
- *
- *   The number of samples is saved as a float in the first location,
- *   because this is required to use it as the key that orders the
- *   cells in the priority queue.
- * 
- * */ -struct OctcubeQuantCell -{ - l_float32 n; /* number of samples in this cell */ - l_int32 octindex; /* octcube index */ - l_int32 rcum, gcum, bcum; /* cumulative values */ - l_int32 rval, gval, bval; /* average values */ -}; -typedef struct OctcubeQuantCell OQCELL; - -/* - *
- *   This data structure is using for heap sorting octcubes
- *   by population.  Sort order is decreasing.
- * 
- */ -struct L_OctcubePop -{ - l_float32 npix; /* parameter on which to sort */ - l_int32 index; /* octcube index at assigned level */ - l_int32 rval; /* mean red value of pixels in octcube */ - l_int32 gval; /* mean green value of pixels in octcube */ - l_int32 bval; /* mean blue value of pixels in octcube */ -}; -typedef struct L_OctcubePop L_OCTCUBE_POP; - -/* - *
- *   In pixDitherOctindexWithCmap(), we use these default values.
-     To get the max value of 'dif' in the dithering color transfer,
-     divide these "DIF_CAP" values by 8.  However, a value of
-     0 means that there is no cap (infinite cap).  A very small
-     value is used for POP_DIF_CAP because dithering on the population
-     generated colormap can be unstable without a tight cap.
- * 
- */ - -static const l_int32 FIXED_DIF_CAP = 0; -static const l_int32 POP_DIF_CAP = 40; - - - /* Static octree helper function */ -static l_int32 octreeFindColorCell(l_int32 octindex, CQCELL ***cqcaa, - l_int32 *pindex, l_int32 *prval, - l_int32 *pgval, l_int32 *pbval); - - /* Static cqcell functions */ -static CQCELL ***octreeGenerateAndPrune(PIX *pixs, l_int32 colors, - l_int32 reservedcolors, - PIXCMAP **pcmap); -static PIX *pixOctreeQuantizePixels(PIX *pixs, CQCELL ***cqcaa, - l_int32 ditherflag); -static CQCELL ***cqcellTreeCreate(void); -static void cqcellTreeDestroy(CQCELL ****pcqcaa); - - /* Static helper octcube index functions */ -static void getRGBFromOctcube(l_int32 cubeindex, l_int32 level, - l_int32 *prval, l_int32 *pgval, l_int32 *pbval); -static l_int32 getOctcubeIndices(l_int32 rgbindex, l_int32 level, - l_int32 *pbindex, l_int32 *psindex); -static l_int32 octcubeGetCount(l_int32 level, l_int32 *psize); - - /* Static function to perform octcube-indexed dithering */ -static l_int32 pixDitherOctindexWithCmap(PIX *pixs, PIX *pixd, l_uint32 *rtab, - l_uint32 *gtab, l_uint32 *btab, - l_int32 *carray, l_int32 difcap); - - /* Static function to perform octcube-based quantizing from colormap */ -static PIX *pixOctcubeQuantFromCmapLUT(PIX *pixs, PIXCMAP *cmap, - l_int32 mindepth, l_int32 *cmaptab, - l_uint32 *rtab, l_uint32 *gtab, - l_uint32 *btab); - -#ifndef NO_CONSOLE_IO -#define DEBUG_COLORQUANT 0 -#define DEBUG_OCTINDEX 0 -#define DEBUG_OCTCUBE_CMAP 0 -#define DEBUG_POP 0 -#define DEBUG_FEW_COLORS 0 -#define PRINT_OCTCUBE_STATS 0 -#endif /* ~NO_CONSOLE_IO */ - -/*-------------------------------------------------------------------------* - * Two-pass adaptive octree color quantization * - *-------------------------------------------------------------------------*/ -/*! - * \brief pixOctreeColorQuant() - * - * \param[in] pixs 32 bpp; 24-bit color - * \param[in] colors in colormap; some number in range [128 ... 256]; - * the actual number of colors used will be smaller - * \param[in] ditherflag 1 to dither, 0 otherwise - * \return pixd 8 bpp with colormap, or NULL on error - * - *
- *  I found one description in the literature of octree color
- *  quantization, using progressive truncation of the octree,
- *  by M. Gervautz and W. Purgathofer in Graphics Gems, pp.
- *  287-293, ed. A. Glassner, Academic Press, 1990.
- *  Rather than setting up a fixed partitioning of the color
- *  space ab initio, as we do here, they allow the octree to be
- *  progressively truncated as new pixels are added.  They
- *  need to set up some data structures that are traversed
- *  with the addition of each 24 bit pixel, in order to decide
- *  either 1) in which cluster (sub-branch of the octree to put
- *  the pixel, or 2 whether to truncate the octree further
- *  to place the pixel in an existing cluster, or 3 which
- *  two existing clusters should be merged so that the pixel
- *  can be left to start a truncated leaf of the octree.  Such dynamic
- *  truncation is considerably more complicated, and Gervautz et
- *  al. did not explain how they did it in anywhere near the
- *  detail required to check their implementation.
- *
- *  The simple method in pixFixedOctcubeQuant256 is very
- *  fast, and with dithering the results are good, but you
- *  can do better if the color clusters are selected adaptively
- *  from the image.  We want a method that makes much better
- *  use of color samples in regions of color space with high
- *  pixel density, while also fairly representing small numbers
- *  of color pixels in low density regions.  Such adaptation
- *  requires two passes through the image: the first for generating
- *  the pruned tree of color cubes and the second for computing the index
- *  into the color table for each pixel.
- *
- *  A relatively simple adaptive method is pixOctreeQuantByPopulation.
- *  That function first determines if the image has very few colors,
- *  and, if so, quantizes to those colors.  If there are more than
- *  256 colors, it generates a histogram of octcube leaf occupancy
- *  at level 4, chooses the 192 most populated such leaves as
- *  the first 192 colors, and sets the remaining 64 colors to the
- *  residual average pixel values in each of the 64 level 2 octcubes.
- *  This is a bit faster than pixOctreeColorQuant, and does very
- *  well without dithering, but for most images with dithering it
- *  is clearly inferior.
- *
- *  We now describe pixOctreeColorQuant.  The first pass is done
- *  on a subsampled image, because we do not need to use all the
- *  pixels in the image to generate the tree.  Subsampling
- *  down to 0.25 1/16 of the pixels makes the program run
- *  about 1.3 times faster.
- *
- *  Instead of dividing the color space into 256 equal-sized
- *  regions, we initially divide it into 2^12 or 2^15 or 2^18
- *  equal-sized octcubes.  Suppose we choose to use 2^18 octcubes.
- *  This gives us 6 octree levels.  We then prune back,
- *  starting from level 6.  For every cube at level 6, there
- *  are 8 cubes at level 5.  Call the operation of putting a
- *  cube aside as a color table entry CTE a "saving."
- *  We use a in general level-dependent threshold, and save
- *  those level 6 cubes that are above threshold.
- *  The rest are combined into the containing level 5 cube.
- *  If between 1 and 7 level 6 cubes within a level 5
- *  cube have been saved by thresholding, then the remaining
- *  level 6 cubes in that level 5 cube are automatically
- *  saved as well, without applying a threshold.  This greatly
- *  simplifies both the description of the CTEs and the later
- *  classification of each pixel as belonging to a CTE.
- *  This procedure is iterated through every cube, starting at
- *  level 5, and then 4, 3, and 2, successively.  The result is that
- *  each CTE contains the entirety of a set of from 1 to 7 cubes
- *  from a given level that all belong to a single cube at the
- *  level above.   We classify the CTEs in terms of the
- *  condition in which they are made as either being "threshold"
- *  or "residual."  They are "threshold" CTEs if no subcubes
- *  are CTEs that is, they contain every pixel within the cube
- *  and the number of pixels exceeds the threshold for making
- *  a CTE.  They are "residual" CTEs if at least one but not more
- *  than 7 of the subcubes have already been determined to be CTEs;
- *  this happens automatically -- no threshold is applied.
- *  If all 8 subcubes are determined to be CTEs, the cube is
- *  marked as having all pixels accounted for 'bleaf' = 1 but
- *  is not saved as a CTE.
- *
- *  We stop the pruning at level 2, at which there are 64
- *  sub-cubes.  Any pixels not already claimed in a CTE are
- *  put in these cubes.
- *
- *  As the cubes are saved as color samples in the color table,
- *  the number of remaining pixels P and the number of
- *  remaining colors in the color table N are recomputed,
- *  along with the average number of pixels P/N ppc to go in
- *  each of the remaining colors.  This running average number is
- *  used to set the threshold at the current level.
- *
- *  Because we are going to very small cubes at levels 6 or 5,
- *  and will dither the colors for errors, it is not necessary
- *  to compute the color center of each cluster; we can simply
- *  use the center of the cube.  This gives us a minimax error
- *  condition: the maximum error is half the width of the
- *  level 2 cubes -- 32 color values out of 256 -- for each color
- *  sample.  In practice, most of the pixels will be very much
- *  closer to the center of their cells.  And with dithering,
- *  the average pixel color in a small region will be closer still.
- *  Thus with the octree quantizer, we are able to capture
- *  regions of high color pdf probability density function in small
- *  but accurate CTEs, and to have only a small number of pixels
- *  that end up a significant distance with a guaranteed maximum
- *  from their true color.
- *
- *  How should the threshold factor vary?  Threshold factors
- *  are required for levels 2, 3, 4 and 5 in the pruning stage.
- *  The threshold for level 5 is actually applied to cubes at
- *  level 6, etc.  From various experiments, it appears that
- *  the results do not vary appreciably for threshold values near 1.0.
- *  If you want more colors in smaller cubes, the threshold
- *  factors can be set lower than 1.0 for cubes at levels 4 and 5.
- *  However, if the factor is set much lower than 1.0 for
- *  levels 2 and 3, we can easily run out of colors.
- *  We put aside 64 colors in the calculation of the threshold
- *  values, because we must have 64 color centers at level 2,
- *  that will have very few pixels in most of them.
- *  If we reduce the factor for level 5 to 0.4, this will
- *  generate many level 6 CTEs, and consequently
- *  many residual cells will be formed up from those leaves,
- *  resulting in the possibility of running out of colors.
- *  Remember, the residual CTEs are mandatory, and are formed
- *  without using the threshold, regardless of the number of
- *  pixels that are absorbed.
- *
- *  The implementation logically has four parts:
- *
- *       1 accumulation into small, fixed cells
- *       2 pruning back into selected CTE cubes
- *       3 organizing the CTEs for fast search to find
- *           the CTE to which any image pixel belongs
- *       4 doing a second scan to code the image pixels by CTE
- *
- *  Step 1 is straightforward; we use 2^15 cells.
- *
- *  We've already discussed how the pruning step 2 will be performed.
- *
- *  Steps 3) and (4 are related, in that the organization
- *  used by step 3 determines how the search actually
- *  takes place for each pixel in step 4.
- *
- *  There are many ways to do step 3.  Let's explore a few.
- *
- *  a The simplest is to order the cubes from highest occupancy
- *      to lowest, and traverse the list looking for the deepest
- *      match.  To make this more efficient, so that we know when
- *      to stop looking, any cube that has separate CTE subcubes
- *      would be marked as such, so that we know when we hit a
- *      true leaf.
- *
- *  b Alternatively, we can order the cubes by highest
- *      occupancy separately each level, and work upward,
- *      starting at level 5, so that when we find a match we
- *      know that it will be correct.
- *
- *  c Another approach would be to order the cubes by
- *      "address" and use a hash table to find the cube
- *      corresponding to a pixel color.  I don't know how to
- *      do this with a variable length address, as each CTE
- *      will have 3*n bits, where n is the level.
- *
- *  d Another approach entirely is to put the CTE cubes into
- *      a tree, in such a way that starting from the root, and
- *      using 3 bits of address at a time, the correct branch of
- *      each octree can be taken until a leaf is found.  Because
- *      a given cube can be both a leaf and also have branches
- *      going to sub-cubes, the search stops only when no
- *      marked subcubes have addresses that match the given pixel.
- *
- *      In the tree method, we can start with a dense infrastructure,
- *      and place the leaves corresponding to the N colors
- *      in the tree, or we can grow from the root only those
- *      branches that end directly on leaves.
- *
- *  What we do here is to take approach d, and implement the tree
- *  "virtually", as a set of arrays, one array for each level
- *  of the tree.   Initially we start at level 5, an array with
- *  2^15 cubes, each with 8 subcubes.  We then build nodes at
- *  levels closer to the root; at level 4 there are 2^12 nodes
- *  each with 8 subcubes; etc.  Using these arrays has
- *  several advantages:
- *
- *     ~  We don't need to keep track of links between cubes
- *        and subcubes, because we can use the canonical
- *        addressing on the cell arrays directly to determine
- *        which nodes are parent cubes and which are sub-cubes.
- *
- *     ~  We can prune directly on this tree
- *
- *     ~  We can navigate the pruned tree quickly to classify
- *        each pixel in the image.
- *
- *  Canonical addressing guarantees that the i-th node at level k
- *  has 8 subnodes given by the 8*i ... 8*i+7 nodes at level k+1.
- *
- *  The pruning step works as follows.  We go from the lowest
- *  level up.  At each level, the threshold is found from the
- *  product of a factor near 1.0 and the ratio of unmarked pixels
- *  to remaining colors minus the 64.  We march through
- *  the space, sequentially considering a cube and its 8 subcubes.
- *  We first check those subcubes that are not already
- *  marked as CTE to see if any are above threshold, and if so,
- *  generate a CTE and mark them as such.
- *  We then determine if any of the subcubes have been marked.
- *  If so, and there are subcubes that are not marked,
- *  we generate a CTE for the cube from the remaining unmarked
- *  subcubes; this is mandatory and does not depend on how many
- *  pixels are in the set of subcubes.  If none of the subcubes
- *  are marked, we aggregate their pixels into the cube
- *  containing them, but do not mark it as a CTE; that
- *  will be determined when iterating through the next level up.
- *
- *  When all the pixels in a cube are accounted for in one or more
- *  colors, we set the boolean 'bleaf' to true.  This is the
- *  flag used to mark the cubes in the pruning step.  If a cube
- *  is marked, and all 8 subcubes are marked, then it is not
- *  itself given a CTE because all pixels have already been
- *  accounted for.
- *
- *  Note that the pruning of the tree and labelling of the CTEs
- *  step 2 accomplishes step 3 implicitly, because the marked
- *  and pruned tree is ready for use in labelling each pixel
- *  in step 4.  We now, for every pixel in the image, traverse
- *  the tree from the root, looking for the lowest cube that is a leaf.
- *  At each level we have a cube and subcube.  If we reach a subcube
- *  leaf that is marked 0, we know that the color is stored in the
- *  cube above, and we've found the CTE.  Otherwise, the subcube
- *  leaf is marked 1.  If we're at the last level, we've reached
- *  the final leaf and must use it.  Otherwise, continue the
- *  process at the next level down.
- *
- *  For robustness, efficiency and high quality output, we do the following:
- *
- *  (1) Measure the color content of the image.  If there is very little
- *      color, quantize in grayscale.
- *  (2) For efficiency, build the octree with a subsampled image if the
- *      image is larger than some threshold size.
- *  (3) Reserve an extra set of colors to prevent running out of colors
- *      when pruning the octree; specifically, during the assignment
- *      of those level 2 cells out of the 64 that have unassigned
- *      pixels.  The problem of running out is more likely to happen
- *      with small images, because the estimation we use for the
- *      number of pixels available is not accurate.
- *  (4) In the unlikely event that we run out of colors, the dithered
- *      image can be very poor.  As this would only happen with very
- *      small images, and dithering is not particularly noticeable with
- *      such images, turn it off.
- * 
- */ -PIX * -pixOctreeColorQuant(PIX *pixs, - l_int32 colors, - l_int32 ditherflag) -{ - PROCNAME("pixOctreeColorQuant"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (colors < 128 || colors > 240) /* further restricted */ - return (PIX *)ERROR_PTR("colors must be in [128, 240]", procName, NULL); - - return pixOctreeColorQuantGeneral(pixs, colors, ditherflag, 0.01, 0.01); -} - - -/*! - * \brief pixOctreeColorQuantGeneral() - * - * \param[in] pixs 32 bpp; 24-bit color - * \param[in] colors in colormap; some number in range [128 ... 240]; - * the actual number of colors used will be smaller - * \param[in] ditherflag 1 to dither, 0 otherwise - * \param[in] validthresh minimum fraction of pixels neither near white - * nor black, required for color quantization; - * typically ~0.01, but smaller for images that have - * color but are nearly all white - * \param[in] colorthresh minimum fraction of pixels with color that are - * not near white or black, that are required - * for color quantization; typ. ~0.01, but smaller - * for images that have color along with a - * significant fraction of gray - * \return pixd 8 bit with colormap, or NULL on error - * - *
- * Notes:
- *      (1) The parameters %validthresh and %colorthresh are used to
- *          determine if color quantization should be used on an image,
- *          or whether, instead, it should be quantized in grayscale.
- *          If the image has very few non-white and non-black pixels, or
- *          if those pixels that are non-white and non-black are all
- *          very close to either white or black, it is usually better
- *          to treat the color as accidental and to quantize the image
- *          to gray only.  These parameters are useful if you know
- *          something a priori about the image.  Perhaps you know that
- *          there is only a very small fraction of color pixels, but they're
- *          important to preserve; then you want to use a smaller value for
- *          these parameters.  To disable conversion to gray and force
- *          color quantization, use %validthresh = 0.0 and %colorthresh = 0.0.
- *      (2) See pixOctreeColorQuant() for algorithmic and implementation
- *          details.  This function has a more general interface.
- *      (3) See pixColorFraction() for computing the fraction of pixels
- *          that are neither white nor black, and the fraction of those
- *          pixels that have little color.  From the documentation there:
- *             If pixfract is very small, there are few pixels that are
- *             neither black nor white.  If colorfract is very small,
- *             the pixels that are neither black nor white have very
- *             little color content.  The product 'pixfract * colorfract'
- *             gives the fraction of pixels with significant color content.
- *          We test against the product %validthresh * %colorthresh
- *          to find color in images that have either very few
- *          intermediate gray pixels or that have many such gray pixels.
- * 
- */ -PIX * -pixOctreeColorQuantGeneral(PIX *pixs, - l_int32 colors, - l_int32 ditherflag, - l_float32 validthresh, - l_float32 colorthresh) -{ -l_int32 w, h, minside, factor, index, rval, gval, bval; -l_float32 scalefactor; -l_float32 pixfract; /* fraction neither near white nor black */ -l_float32 colorfract; /* fraction with color of the pixfract population */ -CQCELL ***cqcaa; -PIX *pixd, *pixsub; -PIXCMAP *cmap; - - PROCNAME("pixOctreeColorQuantGeneral"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (colors < 128 || colors > 240) - return (PIX *)ERROR_PTR("colors must be in [128, 240]", procName, NULL); - - /* Determine if the image has sufficient color content for - * octree quantization, based on the input thresholds. - * If pixfract << 1, most pixels are close to black or white. - * If colorfract << 1, the pixels that are not near - * black or white have very little color. - * If with insufficient color, quantize with a grayscale colormap. */ - pixGetDimensions(pixs, &w, &h, NULL); - if (validthresh > 0.0 && colorthresh > 0.0) { - minside = L_MIN(w, h); - factor = L_MAX(1, minside / 400); - pixColorFraction(pixs, 20, 244, 20, factor, &pixfract, &colorfract); - if (pixfract * colorfract < validthresh * colorthresh) { - L_INFO("\n Pixel fraction neither white nor black = %6.3f" - "\n Color fraction of those pixels = %6.3f" - "\n Quantizing to 8 bpp gray\n", - procName, pixfract, colorfract); - return pixConvertTo8(pixs, 1); - } - } else { - L_INFO("\n Process in color by default\n", procName); - } - - /* Conditionally subsample to speed up the first pass */ - if (w > TreeGenWidth) { - scalefactor = (l_float32)TreeGenWidth / (l_float32)w; - pixsub = pixScaleBySampling(pixs, scalefactor, scalefactor); - } else { - pixsub = pixClone(pixs); - } - - /* Drop the number of requested colors if image is very small */ - if (w < MinDitherSize && h < MinDitherSize) - colors = L_MIN(colors, 220); - - /* Make the pruned octree */ - cqcaa = octreeGenerateAndPrune(pixsub, colors, CqReservedColors, &cmap); - if (!cqcaa) { - pixDestroy(&pixsub); - return (PIX *)ERROR_PTR("tree not made", procName, NULL); - } -#if DEBUG_COLORQUANT - L_INFO(" Colors requested = %d\n", procName, colors); - L_INFO(" Actual colors = %d\n", procName, cmap->n); -#endif /* DEBUG_COLORQUANT */ - - /* Do not dither if image is very small */ - if (w < MinDitherSize && h < MinDitherSize && ditherflag == 1) { - L_INFO("Small image: dithering turned off\n", procName); - ditherflag = 0; - } - - /* Traverse tree from root, looking for lowest cube - * that is a leaf, and set dest pix value to its - * colortable index */ - if ((pixd = pixOctreeQuantizePixels(pixs, cqcaa, ditherflag)) == NULL) { - pixDestroy(&pixsub); - cqcellTreeDestroy(&cqcaa); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - - /* Attach colormap and copy res */ - pixSetColormap(pixd, cmap); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - - /* Force darkest color to black if each component <= 4 */ - pixcmapGetRankIntensity(cmap, 0.0, &index); - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - if (rval < 5 && gval < 5 && bval < 5) - pixcmapResetColor(cmap, index, 0, 0, 0); - - /* Force lightest color to white if each component >= 252 */ - pixcmapGetRankIntensity(cmap, 1.0, &index); - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - if (rval > 251 && gval > 251 && bval > 251) - pixcmapResetColor(cmap, index, 255, 255, 255); - - cqcellTreeDestroy(&cqcaa); - pixDestroy(&pixsub); - return pixd; -} - - -/*! - * \brief octreeGenerateAndPrune() - * - * \param[in] pixs - * \param[in] colors number of colors to use between 128 and 256 - * \param[in] reservedcolors number of reserved colors - * \param[out] pcmap colormap returned - * \return octree, colormap and number of colors used, or NULL - * on error - * - *
- * Notes:
- *      (1) The number of colors in the cmap may differ from the number
- *          of colors requested, but it will not be larger than 256
- * 
- */ -static CQCELL *** -octreeGenerateAndPrune(PIX *pixs, - l_int32 colors, - l_int32 reservedcolors, - PIXCMAP **pcmap) -{ -l_int32 rval, gval, bval, cindex; -l_int32 level, ncells, octindex; -l_int32 w, h, wpls; -l_int32 i, j, isub; -l_int32 npix; /* number of remaining pixels to be assigned */ -l_int32 ncolor; /* number of remaining color cells to be used */ -l_int32 ppc; /* ave number of pixels left for each color cell */ -l_int32 rv, gv, bv; -l_float32 thresholdFactor[] = {0.01f, 0.01f, 1.0f, 1.0f, 1.0f, 1.0f}; -l_float32 thresh; /* factor of ppc for this level */ -l_uint32 *datas, *lines; -l_uint32 *rtab, *gtab, *btab; -CQCELL ***cqcaa; /* one array for each octree level */ -CQCELL **cqca, **cqcasub; -CQCELL *cqc, *cqcsub; -PIXCMAP *cmap; -NUMA *nat; /* accumulates levels for threshold cells */ -NUMA *nar; /* accumulates levels for residual cells */ - - PROCNAME("octreeGenerateAndPrune"); - - if (!pixs) - return (CQCELL ***)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (CQCELL ***)ERROR_PTR("pixs must be 32 bpp", procName, NULL); - if (colors < 128 || colors > 256) - return (CQCELL ***)ERROR_PTR("colors not in [128,256]", procName, NULL); - if (!pcmap) - return (CQCELL ***)ERROR_PTR("&cmap not defined", procName, NULL); - - if ((cqcaa = cqcellTreeCreate()) == NULL) - return (CQCELL ***)ERROR_PTR("cqcaa not made", procName, NULL); - - /* Make the canonical index tables */ - rtab = gtab = btab = NULL; - makeRGBToIndexTables(CqNLevels, &rtab, >ab, &btab); - - /* Generate an 8 bpp cmap (max size 256) */ - cmap = pixcmapCreate(8); - *pcmap = cmap; - - pixGetDimensions(pixs, &w, &h, NULL); - npix = w * h; /* initialize to all pixels */ - ncolor = colors - reservedcolors - ExtraReservedColors; - ppc = npix / ncolor; - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - - /* Accumulate the centers of each cluster at level CqNLevels */ - ncells = 1 << (3 * CqNLevels); - cqca = cqcaa[CqNLevels]; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - octindex = rtab[rval] | gtab[gval] | btab[bval]; - cqc = cqca[octindex]; - cqc->n++; - } - } - - /* Arrays for storing statistics */ - nat = numaCreate(0); - nar = numaCreate(0); - - /* Prune back from the lowest level and generate the colormap */ - for (level = CqNLevels - 1; level >= 2; level--) { - thresh = thresholdFactor[level]; - cqca = cqcaa[level]; - cqcasub = cqcaa[level + 1]; - ncells = 1 << (3 * level); - for (i = 0; i < ncells; i++) { /* i is octindex at level */ - cqc = cqca[i]; - for (j = 0; j < 8; j++) { /* check all subnodes */ - isub = 8 * i + j; /* isub is octindex at level+1 */ - cqcsub = cqcasub[isub]; - if (cqcsub->bleaf == 1) { /* already a leaf? */ - cqc->nleaves++; /* count the subcube leaves */ - continue; - } - if (cqcsub->n >= thresh * ppc) { /* make it a true leaf? */ - cqcsub->bleaf = 1; - if (cmap->n < 256) { - cqcsub->index = cmap->n; /* assign the color index */ - getRGBFromOctcube(isub, level + 1, &rv, &gv, &bv); - pixcmapAddColor(cmap, rv, gv, bv); -#if 1 /* save values */ - cqcsub->rc = rv; - cqcsub->gc = gv; - cqcsub->bc = bv; -#endif - } else { - /* This doesn't seem to happen. Do something. */ - L_ERROR("assigning pixels to wrong color\n", procName); - pixcmapGetNearestIndex(cmap, 128, 128, 128, &cindex); - cqcsub->index = cindex; /* assign to the nearest */ - pixcmapGetColor(cmap, cindex, &rval, &gval, &bval); - cqcsub->rc = rval; - cqcsub->gc = gval; - cqcsub->bc = bval; - } - cqc->nleaves++; - npix -= cqcsub->n; - ncolor--; - if (ncolor > 0) - ppc = npix / ncolor; - else if (ncolor + reservedcolors > 0) - ppc = npix / (ncolor + reservedcolors); - else - ppc = 1000000; /* make it big */ - numaAddNumber(nat, level + 1); - -#if DEBUG_OCTCUBE_CMAP - lept_stderr("Exceeds threshold: colors used = %d, colors remaining = %d\n", - cmap->n, ncolor + reservedcolors); - lept_stderr(" cell with %d pixels, npix = %d, ppc = %d\n", - cqcsub->n, npix, ppc); - lept_stderr(" index = %d, level = %d, subindex = %d\n", - i, level, j); - lept_stderr(" rv = %d, gv = %d, bv = %d\n", rv, gv, bv); -#endif /* DEBUG_OCTCUBE_CMAP */ - - } - } - if (cqc->nleaves > 0 || level == 2) { /* make the cube a leaf now */ - cqc->bleaf = 1; - if (cqc->nleaves < 8) { /* residual CTE cube: acquire the - * remaining pixels */ - for (j = 0; j < 8; j++) { /* check all subnodes */ - isub = 8 * i + j; - cqcsub = cqcasub[isub]; - if (cqcsub->bleaf == 0) /* absorb */ - cqc->n += cqcsub->n; - } - if (cmap->n < 256) { - cqc->index = cmap->n; /* assign the color index */ - getRGBFromOctcube(i, level, &rv, &gv, &bv); - pixcmapAddColor(cmap, rv, gv, bv); -#if 1 /* save values */ - cqc->rc = rv; - cqc->gc = gv; - cqc->bc = bv; -#endif - } else { - L_WARNING("possibly assigned pixels to wrong color\n", - procName); - /* This is very bad. It will only cause trouble - * with dithering, and we try to avoid it with - * ExtraReservedColors. */ - pixcmapGetNearestIndex(cmap, rv, gv, bv, &cindex); - cqc->index = cindex; /* assign to the nearest */ - pixcmapGetColor(cmap, cindex, &rval, &gval, &bval); - cqc->rc = rval; - cqc->gc = gval; - cqc->bc = bval; - } - npix -= cqc->n; - ncolor--; - if (ncolor > 0) - ppc = npix / ncolor; - else if (ncolor + reservedcolors > 0) - ppc = npix / (ncolor + reservedcolors); - else - ppc = 1000000; /* make it big */ - numaAddNumber(nar, level); - -#if DEBUG_OCTCUBE_CMAP - lept_stderr("By remainder: colors used = %d, colors remaining = %d\n", - cmap->n, ncolor + reservedcolors); - lept_stderr(" cell with %d pixels, npix = %d, ppc = %d\n", - cqc->n, npix, ppc); - lept_stderr(" index = %d, level = %d\n", i, level); - lept_stderr(" rv = %d, gv = %d, bv = %d\n", rv, gv, bv); -#endif /* DEBUG_OCTCUBE_CMAP */ - - } - } else { /* absorb all the subpixels but don't make it a leaf */ - for (j = 0; j < 8; j++) { /* absorb from all subnodes */ - isub = 8 * i + j; - cqcsub = cqcasub[isub]; - cqc->n += cqcsub->n; - } - } - } - } - -#if PRINT_OCTCUBE_STATS -{ -l_int32 tc[] = {0, 0, 0, 0, 0, 0, 0}; -l_int32 rc[] = {0, 0, 0, 0, 0, 0, 0}; -l_int32 nt, nr, ival; - - nt = numaGetCount(nat); - nr = numaGetCount(nar); - for (i = 0; i < nt; i++) { - numaGetIValue(nat, i, &ival); - tc[ival]++; - } - for (i = 0; i < nr; i++) { - numaGetIValue(nar, i, &ival); - rc[ival]++; - } - lept_stderr(" Threshold cells formed: %d\n", nt); - for (i = 1; i < CqNLevels + 1; i++) - lept_stderr(" level %d: %d\n", i, tc[i]); - lept_stderr("\n Residual cells formed: %d\n", nr); - for (i = 0; i < CqNLevels ; i++) - lept_stderr(" level %d: %d\n", i, rc[i]); -} -#endif /* PRINT_OCTCUBE_STATS */ - - numaDestroy(&nat); - numaDestroy(&nar); - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - - return cqcaa; -} - - -/*! - * \brief pixOctreeQuantizePixels() - * - * \param[in] pixs 32 bpp - * \param[in] cqcaa octree in array format - * \param[in] ditherflag 1 for dithering, 0 for no dithering - * \return pixd or NULL on error - * - *
- * Notes:
- *      (1) This routine doesn't need to use the CTEs (colormap
- *          table entries) because the color indices are embedded
- *          in the octree.  Thus, the calling program must make
- *          and attach the colormap to pixd after it is returned.
- *      (2) Dithering is performed in integers, effectively rounding
- *          to 1/8 sample increment.  The data in the integer buffers is
- *          64 times the sample values.  The 'dif' is 8 times the
- *          sample values, and this spread, multiplied by 8, to the
- *          integer buffers.  Because the dif is truncated to an
- *          integer, the dither is accurate to 1/8 of a sample increment,
- *          or 1/2048 of the color range.
- * 
- */ -static PIX * -pixOctreeQuantizePixels(PIX *pixs, - CQCELL ***cqcaa, - l_int32 ditherflag) -{ -l_uint8 *bufu8r, *bufu8g, *bufu8b; -l_int32 rval, gval, bval; -l_int32 octindex, index; -l_int32 val1, val2, val3, dif; -l_int32 w, h, wpls, wpld, i, j, success; -l_int32 rc, gc, bc; -l_int32 *buf1r, *buf1g, *buf1b, *buf2r, *buf2g, *buf2b; -l_uint32 *rtab, *gtab, *btab; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixOctreeQuantizePixels"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs must be 32 bpp", procName, NULL); - if (!cqcaa) - return (PIX *)ERROR_PTR("cqcaa not defined", procName, NULL); - - /* Make output 8 bpp palette image */ - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if ((pixd = pixCreate(w, h, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* Make the canonical index tables */ - rtab = gtab = btab = NULL; - makeRGBToIndexTables(CqNLevels, &rtab, >ab, &btab); - - /* Traverse tree from root, looking for lowest cube - * that is a leaf, and set dest pix to its - * colortable index value. The results are far - * better when dithering to get a more accurate - * average color. */ - if (ditherflag == 0) { /* no dithering */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - octindex = rtab[rval] | gtab[gval] | btab[bval]; - octreeFindColorCell(octindex, cqcaa, &index, &rc, &gc, &bc); - SET_DATA_BYTE(lined, j, index); - } - } - } else { /* Dither */ - success = TRUE; - bufu8r = bufu8g = bufu8b = NULL; - buf1r = buf1g = buf1b = buf2r = buf2g = buf2b = NULL; - bufu8r = (l_uint8 *)LEPT_CALLOC(w, sizeof(l_uint8)); - bufu8g = (l_uint8 *)LEPT_CALLOC(w, sizeof(l_uint8)); - bufu8b = (l_uint8 *)LEPT_CALLOC(w, sizeof(l_uint8)); - buf1r = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - buf1g = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - buf1b = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - buf2r = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - buf2g = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - buf2b = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - if (!bufu8r || !bufu8g || !bufu8b || !buf1r || !buf1g || - !buf1b || !buf2r || !buf2g || !buf2b) { - L_ERROR("buffer not made\n", procName); - success = FALSE; - goto buffer_cleanup; - } - - /* Start by priming buf2; line 1 is above line 2 */ - pixGetRGBLine(pixs, 0, bufu8r, bufu8g, bufu8b); - for (j = 0; j < w; j++) { - buf2r[j] = 64 * bufu8r[j]; - buf2g[j] = 64 * bufu8g[j]; - buf2b[j] = 64 * bufu8b[j]; - } - - for (i = 0; i < h - 1; i++) { - /* Swap data 2 --> 1, and read in new line 2 */ - memcpy(buf1r, buf2r, 4 * w); - memcpy(buf1g, buf2g, 4 * w); - memcpy(buf1b, buf2b, 4 * w); - pixGetRGBLine(pixs, i + 1, bufu8r, bufu8g, bufu8b); - for (j = 0; j < w; j++) { - buf2r[j] = 64 * bufu8r[j]; - buf2g[j] = 64 * bufu8g[j]; - buf2b[j] = 64 * bufu8b[j]; - } - - /* Dither */ - lined = datad + i * wpld; - for (j = 0; j < w - 1; j++) { - rval = buf1r[j] / 64; - gval = buf1g[j] / 64; - bval = buf1b[j] / 64; - octindex = rtab[rval] | gtab[gval] | btab[bval]; - octreeFindColorCell(octindex, cqcaa, &index, &rc, &gc, &bc); - SET_DATA_BYTE(lined, j, index); - - dif = buf1r[j] / 8 - 8 * rc; - if (dif != 0) { - val1 = buf1r[j + 1] + 3 * dif; - val2 = buf2r[j] + 3 * dif; - val3 = buf2r[j + 1] + 2 * dif; - if (dif > 0) { - buf1r[j + 1] = L_MIN(16383, val1); - buf2r[j] = L_MIN(16383, val2); - buf2r[j + 1] = L_MIN(16383, val3); - } else { - buf1r[j + 1] = L_MAX(0, val1); - buf2r[j] = L_MAX(0, val2); - buf2r[j + 1] = L_MAX(0, val3); - } - } - - dif = buf1g[j] / 8 - 8 * gc; - if (dif != 0) { - val1 = buf1g[j + 1] + 3 * dif; - val2 = buf2g[j] + 3 * dif; - val3 = buf2g[j + 1] + 2 * dif; - if (dif > 0) { - buf1g[j + 1] = L_MIN(16383, val1); - buf2g[j] = L_MIN(16383, val2); - buf2g[j + 1] = L_MIN(16383, val3); - } else { - buf1g[j + 1] = L_MAX(0, val1); - buf2g[j] = L_MAX(0, val2); - buf2g[j + 1] = L_MAX(0, val3); - } - } - - dif = buf1b[j] / 8 - 8 * bc; - if (dif != 0) { - val1 = buf1b[j + 1] + 3 * dif; - val2 = buf2b[j] + 3 * dif; - val3 = buf2b[j + 1] + 2 * dif; - if (dif > 0) { - buf1b[j + 1] = L_MIN(16383, val1); - buf2b[j] = L_MIN(16383, val2); - buf2b[j + 1] = L_MIN(16383, val3); - } else { - buf1b[j + 1] = L_MAX(0, val1); - buf2b[j] = L_MAX(0, val2); - buf2b[j + 1] = L_MAX(0, val3); - } - } - } - - /* Get last pixel in row; no downward propagation */ - rval = buf1r[w - 1] / 64; - gval = buf1g[w - 1] / 64; - bval = buf1b[w - 1] / 64; - octindex = rtab[rval] | gtab[gval] | btab[bval]; - octreeFindColorCell(octindex, cqcaa, &index, &rc, &gc, &bc); - SET_DATA_BYTE(lined, w - 1, index); - } - - /* Get last row of pixels; no leftward propagation */ - lined = datad + (h - 1) * wpld; - for (j = 0; j < w; j++) { - rval = buf2r[j] / 64; - gval = buf2g[j] / 64; - bval = buf2b[j] / 64; - octindex = rtab[rval] | gtab[gval] | btab[bval]; - octreeFindColorCell(octindex, cqcaa, &index, &rc, &gc, &bc); - SET_DATA_BYTE(lined, j, index); - } - -buffer_cleanup: - LEPT_FREE(bufu8r); - LEPT_FREE(bufu8g); - LEPT_FREE(bufu8b); - LEPT_FREE(buf1r); - LEPT_FREE(buf1g); - LEPT_FREE(buf1b); - LEPT_FREE(buf2r); - LEPT_FREE(buf2g); - LEPT_FREE(buf2b); - if (!success) pixDestroy(&pixd); - } - - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - return pixd; -} - - -/*! - * \brief octreeFindColorCell() - * - * \param[in] octindex - * \param[in] cqcaa - * \param[out] pindex index of CTE; returned to set pixel value - * \param[out] prval of CTE - * \param[out] pgval of CTE - * \param[out] pbval of CTE - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) As this is in inner loop, we don't check input pointers!
- *      (2) This traverses from the root (well, actually from level 2,
- *          because the level 2 cubes are the largest CTE cubes),
- *          and finds the index number of the cell and the color values,
- *          which can be used either directly or in a (Floyd-Steinberg)
- *          error-diffusion dithering algorithm.
- * 
- */ -static l_int32 -octreeFindColorCell(l_int32 octindex, - CQCELL ***cqcaa, - l_int32 *pindex, - l_int32 *prval, - l_int32 *pgval, - l_int32 *pbval) -{ -l_int32 level; -l_int32 baseindex, subindex; -CQCELL *cqc, *cqcsub; - - /* Use rgb values stored in the cubes; a little faster */ - for (level = 2; level < CqNLevels; level++) { - getOctcubeIndices(octindex, level, &baseindex, &subindex); - cqc = cqcaa[level][baseindex]; - cqcsub = cqcaa[level + 1][subindex]; - if (cqcsub->bleaf == 0) { /* use cell at level above */ - *pindex = cqc->index; - *prval = cqc->rc; - *pgval = cqc->gc; - *pbval = cqc->bc; - break; - } else if (level == CqNLevels - 1) { /* reached the bottom */ - *pindex = cqcsub->index; - *prval = cqcsub->rc; - *pgval = cqcsub->gc; - *pbval = cqcsub->bc; - break; - } - } - -#if 0 - /* Generate rgb values for each cube on the fly; slower */ - for (level = 2; level < CqNLevels; level++) { - l_int32 rv, gv, bv; - getOctcubeIndices(octindex, level, &baseindex, &subindex); - cqc = cqcaa[level][baseindex]; - cqcsub = cqcaa[level + 1][subindex]; - if (cqcsub->bleaf == 0) { /* use cell at level above */ - getRGBFromOctcube(baseindex, level, &rv, &gv, &bv); - *pindex = cqc->index; - *prval = rv; - *pgval = gv; - *pbval = bv; - break; - } else if (level == CqNLevels - 1) { /* reached the bottom */ - getRGBFromOctcube(subindex, level + 1, &rv, &gv, &bv); - *pindex = cqcsub->index; - *prval = rv; - *pgval = gv; - *pbval = bv; - break; - } - } -#endif - - return 0; -} - - - -/*------------------------------------------------------------------* - * Helper cqcell functions * - *------------------------------------------------------------------*/ -/*! - * \brief cqcellTreeCreate() - * - * \return cqcell array tree - */ -static CQCELL *** -cqcellTreeCreate(void) -{ -l_int32 level, ncells, i; -CQCELL ***cqcaa; -CQCELL **cqca; /* one array for each octree level */ - - PROCNAME("cqcellTreeCreate"); - - /* Make array of accumulation cell arrays from levels 1 to 5 */ - if ((cqcaa = (CQCELL ***)LEPT_CALLOC(CqNLevels + 1, sizeof(CQCELL **))) - == NULL) - return (CQCELL ***)ERROR_PTR("cqcaa not made", procName, NULL); - for (level = 0; level <= CqNLevels; level++) { - ncells = 1 << (3 * level); - if ((cqca = (CQCELL **)LEPT_CALLOC(ncells, sizeof(CQCELL *))) == NULL) { - cqcellTreeDestroy(&cqcaa); - return (CQCELL ***)ERROR_PTR("cqca not made", procName, NULL); - } - cqcaa[level] = cqca; - for (i = 0; i < ncells; i++) { - if ((cqca[i] = (CQCELL *)LEPT_CALLOC(1, sizeof(CQCELL))) == NULL) { - cqcellTreeDestroy(&cqcaa); - return (CQCELL ***)ERROR_PTR("cqc not made", procName, NULL); - } - } - } - - return cqcaa; -} - - -/*! - * \brief cqcellTreeDestroy() - * - * \param[in,out] pcqcaa will be set to null before returning - */ -static void -cqcellTreeDestroy(CQCELL ****pcqcaa) -{ -l_int32 level, ncells, i; -CQCELL ***cqcaa; -CQCELL **cqca; - - PROCNAME("cqcellTreeDestroy"); - - if (pcqcaa == NULL) { - L_WARNING("ptr address is NULL\n", procName); - return; - } - - if ((cqcaa = *pcqcaa) == NULL) - return; - - for (level = 0; level <= CqNLevels; level++) { - cqca = cqcaa[level]; - ncells = 1 << (3 * level); - for (i = 0; i < ncells; i++) - LEPT_FREE(cqca[i]); - LEPT_FREE(cqca); - } - LEPT_FREE(cqcaa); - *pcqcaa = NULL; - - return; -} - - - -/*------------------------------------------------------------------* - * Helper index functions * - *------------------------------------------------------------------*/ -/*! - * \brief makeRGBToIndexTables() - * - * \param[in] cqlevels can be 1, 2, 3, 4, 5 or 6 - * \param[out] prtab, pgtab, pbtab tables - * \return 0 if OK; 1 on error - * - *
- *  Set up tables.  e.g., for cqlevels = 5, we need an integer 0 < i < 2^15:
- *      rtab = 0  i7  0   0  i6  0   0  i5  0   0   i4  0   0   i3  0   0
- *      gtab = 0  0   i7  0   0  i6  0   0  i5  0   0   i4  0   0   i3  0
- *      btab = 0  0   0   i7  0  0   i6  0  0   i5  0   0   i4  0   0   i3
- *
- *  The tables are then used to map from rbg --> index as follows:
- *      index = 0  r7  g7  b7  r6  g6  b6  r5  g5  b5  r4  g4  b4  r3  g3  b3
- *
- *    e.g., for cqlevels = 4, we map to
- *      index = 0  0   0   0   r7  g7  b7  r6  g6  b6  r5  g5  b5  r4  g4  b4
- *
- *  This may look a bit strange.  The notation 'r7' means the MSBit of
- *  the r value which has 8 bits, going down from r7 to r0.
- *  Keep in mind that r7 is actually the r component bit for level 1 of
- *  the octtree.  Level 1 is composed of 8 octcubes, represented by
- *  the bits r7 g7 b7, which divide the entire color space into
- *  8 cubes.  At level 2, each of these 8 octcubes is further divided into
- *  8 cubes, each labeled by the second most significant bits r6 g6 b6
- *  of the rgb color.
- * 
- */ -l_ok -makeRGBToIndexTables(l_int32 cqlevels, - l_uint32 **prtab, - l_uint32 **pgtab, - l_uint32 **pbtab) -{ -l_int32 i; -l_uint32 *rtab, *gtab, *btab; - - PROCNAME("makeRGBToIndexTables"); - - if (cqlevels < 1 || cqlevels > 6) - return ERROR_INT("cqlevels must be in {1,...6}", procName, 1); - if (!prtab || !pgtab || !pbtab) - return ERROR_INT("not all &tabs defined", procName, 1); - - rtab = (l_uint32 *)LEPT_CALLOC(256, sizeof(l_uint32)); - gtab = (l_uint32 *)LEPT_CALLOC(256, sizeof(l_uint32)); - btab = (l_uint32 *)LEPT_CALLOC(256, sizeof(l_uint32)); - if (!rtab || !gtab || !btab) - return ERROR_INT("calloc fail for tab", procName, 1); - *prtab = rtab; - *pgtab = gtab; - *pbtab = btab; - - switch (cqlevels) - { - case 1: - for (i = 0; i < 256; i++) { - rtab[i] = (i >> 5) & 0x0004; - gtab[i] = (i >> 6) & 0x0002; - btab[i] = (i >> 7); - } - break; - case 2: - for (i = 0; i < 256; i++) { - rtab[i] = ((i >> 2) & 0x0020) | ((i >> 4) & 0x0004); - gtab[i] = ((i >> 3) & 0x0010) | ((i >> 5) & 0x0002); - btab[i] = ((i >> 4) & 0x0008) | ((i >> 6) & 0x0001); - } - break; - case 3: - for (i = 0; i < 256; i++) { - rtab[i] = ((i << 1) & 0x0100) | ((i >> 1) & 0x0020) | - ((i >> 3) & 0x0004); - gtab[i] = (i & 0x0080) | ((i >> 2) & 0x0010) | - ((i >> 4) & 0x0002); - btab[i] = ((i >> 1) & 0x0040) | ((i >> 3) & 0x0008) | - ((i >> 5) & 0x0001); - } - break; - case 4: - for (i = 0; i < 256; i++) { - rtab[i] = ((i << 4) & 0x0800) | ((i << 2) & 0x0100) | - (i & 0x0020) | ((i >> 2) & 0x0004); - gtab[i] = ((i << 3) & 0x0400) | ((i << 1) & 0x0080) | - ((i >> 1) & 0x0010) | ((i >> 3) & 0x0002); - btab[i] = ((i << 2) & 0x0200) | (i & 0x0040) | - ((i >> 2) & 0x0008) | ((i >> 4) & 0x0001); - } - break; - case 5: - for (i = 0; i < 256; i++) { - rtab[i] = ((i << 7) & 0x4000) | ((i << 5) & 0x0800) | - ((i << 3) & 0x0100) | ((i << 1) & 0x0020) | - ((i >> 1) & 0x0004); - gtab[i] = ((i << 6) & 0x2000) | ((i << 4) & 0x0400) | - ((i << 2) & 0x0080) | (i & 0x0010) | - ((i >> 2) & 0x0002); - btab[i] = ((i << 5) & 0x1000) | ((i << 3) & 0x0200) | - ((i << 1) & 0x0040) | ((i >> 1) & 0x0008) | - ((i >> 3) & 0x0001); - } - break; - case 6: - for (i = 0; i < 256; i++) { - rtab[i] = ((i << 10) & 0x20000) | ((i << 8) & 0x4000) | - ((i << 6) & 0x0800) | ((i << 4) & 0x0100) | - ((i << 2) & 0x0020) | (i & 0x0004); - gtab[i] = ((i << 9) & 0x10000) | ((i << 7) & 0x2000) | - ((i << 5) & 0x0400) | ((i << 3) & 0x0080) | - ((i << 1) & 0x0010) | ((i >> 1) & 0x0002); - btab[i] = ((i << 8) & 0x8000) | ((i << 6) & 0x1000) | - ((i << 4) & 0x0200) | ((i << 2) & 0x0040) | - (i & 0x0008) | ((i >> 2) & 0x0001); - } - break; - default: - ERROR_INT("cqlevels not in [1...6]", procName, 1); - break; - } - - return 0; -} - - -/*! - * \brief getOctcubeIndexFromRGB() - * - * \param[in] rval, gval, bval - * \param[in] rtab, gtab, btab generated with makeRGBToIndexTables() - * \param[out] pindex found index - * \return void - * - *
- * Notes:
- *      No error checking!
- * 
- */ -void -getOctcubeIndexFromRGB(l_int32 rval, - l_int32 gval, - l_int32 bval, - l_uint32 *rtab, - l_uint32 *gtab, - l_uint32 *btab, - l_uint32 *pindex) -{ - *pindex = rtab[rval] | gtab[gval] | btab[bval]; - return; -} - - -/*! - * \brief getRGBFromOctcube() - * - * \param[in] cubeindex octcube index - * \param[in] level at which index is expressed - * \param[out] prval r val of this cube - * \param[out] pgval g val of this cube - * \param[out] pbval b val of this cube - * \return void - * - *
- * Notes:
- *      (1) We can consider all octcube indices to represent a
- *          specific point in color space: namely, the location
- *          of the 'upper-left' corner of the cube, where indices
- *          increase down and to the right.  The upper left corner
- *          of the color space is then 00000....
- *      (2) The 'rgbindex' is a 24-bit representation of the location,
- *          in octcube notation, at the center of the octcube.
- *          To get to the center of an octcube, you choose the 111
- *          octcube at the next lower level.
- *      (3) For example, if the octcube index = 110101 (binary),
- *          which is a level 2 expression, then the rgbindex
- *          is the 24-bit representation of 110101111 (at level 3);
- *          namely, 000110101111000000000000.  The number is padded
- *          with 3 leading 0s (because the representation uses
- *          only 21 bits) and 12 trailing 0s (the default for
- *          levels 4-7, which are contained within each of the level3
- *          octcubes.  Then the rgb values for the center of the
- *          octcube are: rval = 11100000, gval = 10100000, bval = 01100000
- * 
- */ -static void -getRGBFromOctcube(l_int32 cubeindex, - l_int32 level, - l_int32 *prval, - l_int32 *pgval, - l_int32 *pbval) -{ -l_int32 rgbindex; - - /* Bring to format in 21 bits: (r7 g7 b7 r6 g6 b6 ...) */ - /* This is valid for levels from 0 to 6 */ - rgbindex = cubeindex << (3 * (7 - level)); /* upper corner of cube */ - rgbindex |= (0x7 << (3 * (6 - level))); /* index to center of cube */ - - /* Extract separate pieces */ - *prval = ((rgbindex >> 13) & 0x80) | - ((rgbindex >> 11) & 0x40) | - ((rgbindex >> 9) & 0x20) | - ((rgbindex >> 7) & 0x10) | - ((rgbindex >> 5) & 0x08) | - ((rgbindex >> 3) & 0x04) | - ((rgbindex >> 1) & 0x02); - *pgval = ((rgbindex >> 12) & 0x80) | - ((rgbindex >> 10) & 0x40) | - ((rgbindex >> 8) & 0x20) | - ((rgbindex >> 6) & 0x10) | - ((rgbindex >> 4) & 0x08) | - ((rgbindex >> 2) & 0x04) | - (rgbindex & 0x02); - *pbval = ((rgbindex >> 11) & 0x80) | - ((rgbindex >> 9) & 0x40) | - ((rgbindex >> 7) & 0x20) | - ((rgbindex >> 5) & 0x10) | - ((rgbindex >> 3) & 0x08) | - ((rgbindex >> 1) & 0x04) | - ((rgbindex << 1) & 0x02); - - return; -} - - -/*! - * \brief getOctcubeIndices() - * - * \param[in] rgbindex - * \param[in] level octree level 0, 1, 2, 3, 4, 5 - * \param[out] pbindex base index index at the octree level - * \param[out] psindex sub index index at the next lower level - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *  for CqNLevels = 6, the full RGB index is in the form:
- *     index = (0[13] 0 r7 g7 b7 r6 g6 b6 r5 g5 b5 r4 g4 b4 r3 g3 b3 r2 g2 b2)
- *  for CqNLevels = 5, the full RGB index is in the form:
- *     index = (0[16] 0 r7 g7 b7 r6 g6 b6 r5 g5 b5 r4 g4 b4 r3 g3 b3)
- *  for CqNLevels = 4, the full RGB index is in the form:
- *     index = (0[19] 0 r7 g7 b7 r6 g6 b6 r5 g5 b5 r4 g4 b4)
- *
- *  The base index is the index of the octcube at the level given,
- *  whereas the sub index is the index at the next level down.
- *
- *  For level 0: base index = 0
- *               sub index is the 3 bit number (r7 g7 b7)
- *  For level 1: base index = (r7 g7 b7)
- *               sub index = (r7 g7 b7 r6 g6 b6)
- *  For level 2: base index = (r7 g7 b7 r6 g6 b6)
- *               sub index = (r7 g7 b7 r6 g6 b6 r5 g5 b5)
- *  For level 3: base index = (r7 g7 b7 r6 g6 b6 r5 g5 b5)
- *               sub index = (r7 g7 b7 r6 g6 b6 r5 g5 b5 r4 g4 b4)
- *  For level 4: base index = (r7 g7 b7 r6 g6 b6 r5 g5 b5 r4 g4 b4)
- *               sub index = (r7 g7 b7 r6 g6 b6 r5 g5 b5 r4 g4 b4 r3 g3 b3)
- *  For level 5: base index = (r7 g7 b7 r6 g6 b6 r5 g5 b5 r4 g4 b4 r3 g3 b3)
- *               sub index = (r7 g7 b7 r6 g6 b6 r5 g5 b5 r4 g4 b4 r3 g3 b3
- *                            r2 g2 b2)
- * 
- */ -static l_int32 -getOctcubeIndices(l_int32 rgbindex, - l_int32 level, - l_int32 *pbindex, - l_int32 *psindex) -{ - PROCNAME("getOctcubeIndex"); - - if (level < 0 || level > CqNLevels - 1) - return ERROR_INT("level must be in e.g., [0 ... 5]", procName, 1); - if (!pbindex) - return ERROR_INT("&bindex not defined", procName, 1); - if (!psindex) - return ERROR_INT("&sindex not defined", procName, 1); - - *pbindex = rgbindex >> (3 * (CqNLevels - level)); - *psindex = rgbindex >> (3 * (CqNLevels - 1 - level)); - return 0; -} - - -/*! - * \brief octcubeGetCount() - * - * \param[in] level valid values are in [1,...6]; there are 2^level - * cubes along each side of the rgb cube - * \param[out] psize 2^(3 * level) cubes in the entire rgb cube - * \return 0 if OK, 1 on error. Caller must check! - * - *
- *     level:   1        2        3        4        5        6
- *     size:    8       64       512     4098     32784   262272
- * 
- */ -static l_int32 -octcubeGetCount(l_int32 level, - l_int32 *psize) -{ - PROCNAME("octcubeGetCount"); - - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (level < 1 || level > 6) - return ERROR_INT("invalid level", procName, 1); - - *psize = 1 << (3 * level); - return 0; -} - - -/*---------------------------------------------------------------------------* - * Adaptive octree quantization based on population at a fixed level * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixOctreeQuantByPopulation() - * - * \param[in] pixs 32 bpp rgb - * \param[in] level significant bits for each of RGB; valid for {3,4}. - * Use 0 for default (level 4; recommended - * \param[in] ditherflag 1 to dither, 0 otherwise - * \return pixd quantized to octcubes or NULL on error - * - *
- * Notes:
- *      (1) This color quantization method works very well without
- *          dithering, using octcubes at two different levels:
- *            (a) the input %level, which is either 3 or 4
- *            (b) level 2 (64 octcubes to cover the entire color space)
- *      (2) For best results, using %level = 4 is recommended.
- *          Why do we provide an option for using level 3?  Because
- *          there are 512 octcubes at level 3, and for many images
- *          not more than 256 are filled.  As a result, on some images
- *          a very accurate quantized representation is possible using
- *          %level = 3.
- *      (3) This first breaks up the color space into octcubes at the
- *          input %level, and computes, for each octcube, the average
- *          value of the pixels that are in it.
- *      (4) Then there are two possible situations:
- *            (a) If there are not more than 256 populated octcubes,
- *                it returns a cmapped pix with those values assigned.
- *            (b) Otherwise, it selects 192 octcubes containing the largest
- *                number of pixels and quantizes pixels within those octcubes
- *                to their average.  Then, to handle the residual pixels
- *                that are not in those 192 octcubes, it generates a
- *                level 2 octree consisting of 64 octcubes, and within
- *                each octcube it quantizes the residual pixels to their
- *                average within each of those level 2 octcubes.
- *      (5) Unpopulated level 2 octcubes are represented in the colormap
- *          by their centers.  This, of course, has no effect unless
- *          dithering is used for the output image.
- *      (6) The depth of pixd is the minimum required to support the
- *          number of colors found at %level; namely, 2, 4 or 8.
- *      (7) This function works particularly well on images such as maps,
- *          where there are a relatively small number of well-populated
- *          colors, but due to antialiasing and compression artifacts
- *          there may be a large number of different colors.  This will
- *          pull out and represent accurately the highly populated colors,
- *          while still making a reasonable approximation for the others.
- *      (8) The highest level of octcubes allowed is 4.  Use of higher
- *          levels typically results in having a small fraction of
- *          pixels in the most populated 192 octcubes.  As a result,
- *          most of the pixels are represented at level 2, which is
- *          not sufficiently accurate.
- *      (9) Dithering shows artifacts on some images.  If you plan to
- *          dither, pixOctreeColorQuant() and pixFixedOctcubeQuant256()
- *          usually give better results.
- * 
- */ -PIX * -pixOctreeQuantByPopulation(PIX *pixs, - l_int32 level, - l_int32 ditherflag) -{ -l_int32 w, h, wpls, wpld, i, j, depth, size, ncolors, index; -l_int32 rval, gval, bval; -l_int32 *rarray, *garray, *barray, *narray, *iarray; -l_uint32 octindex, octindex2; -l_uint32 *rtab, *gtab, *btab, *rtab2, *gtab2, *btab2; -l_uint32 *lines, *lined, *datas, *datad; -L_OCTCUBE_POP *opop; -L_HEAP *lh; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixOctreeQuantByPopulation"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (level == 0) level = 4; - if (level < 3 || level > 4) - return (PIX *)ERROR_PTR("level not in {3,4}", procName, NULL); - - /* Do not dither if image is very small */ - pixGetDimensions(pixs, &w, &h, NULL); - if (w < MinDitherSize && h < MinDitherSize && ditherflag == 1) { - L_INFO("Small image: dithering turned off\n", procName); - ditherflag = 0; - } - - if (octcubeGetCount(level, &size)) /* array size = 2 ** (3 * level) */ - return (PIX *)ERROR_PTR("size not returned", procName, NULL); - rtab = gtab = btab = NULL; - makeRGBToIndexTables(level, &rtab, >ab, &btab); - - pixd = NULL; - narray = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32)); - rarray = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32)); - garray = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32)); - barray = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32)); - if (!narray || !rarray || !garray || !barray) - goto array_cleanup; - - /* Place the pixels in octcube leaves. */ - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - octindex = rtab[rval] | gtab[gval] | btab[bval]; - narray[octindex]++; - rarray[octindex] += rval; - garray[octindex] += gval; - barray[octindex] += bval; - } - } - - /* Find the number of different colors */ - for (i = 0, ncolors = 0; i < size; i++) { - if (narray[i] > 0) - ncolors++; - } - if (ncolors <= 4) - depth = 2; - else if (ncolors <= 16) - depth = 4; - else - depth = 8; - pixd = pixCreate(w, h, depth); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - cmap = pixcmapCreate(depth); - pixSetColormap(pixd, cmap); - - /* Average the colors in each octcube leaf. */ - for (i = 0; i < size; i++) { - if (narray[i] > 0) { - rarray[i] /= narray[i]; - garray[i] /= narray[i]; - barray[i] /= narray[i]; - } - } - - /* If ncolors <= 256, finish immediately. Do not dither. - * Re-use narray to hold the colormap index + 1 */ - if (ncolors <= 256) { - for (i = 0, index = 0; i < size; i++) { - if (narray[i] > 0) { - pixcmapAddColor(cmap, rarray[i], garray[i], barray[i]); - narray[i] = index + 1; /* to avoid storing 0 */ - index++; - } - } - - /* Set the cmap indices for each pixel */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - octindex = rtab[rval] | gtab[gval] | btab[bval]; - switch (depth) - { - case 8: - SET_DATA_BYTE(lined, j, narray[octindex] - 1); - break; - case 4: - SET_DATA_QBIT(lined, j, narray[octindex] - 1); - break; - case 2: - SET_DATA_DIBIT(lined, j, narray[octindex] - 1); - break; - default: - L_WARNING("shouldn't get here\n", procName); - } - } - } - goto array_cleanup; - } - - /* More complicated. Sort by decreasing population */ - lh = lheapCreate(500, L_SORT_DECREASING); - for (i = 0; i < size; i++) { - if (narray[i] > 0) { - opop = (L_OCTCUBE_POP *)LEPT_CALLOC(1, sizeof(L_OCTCUBE_POP)); - opop->npix = (l_float32)narray[i]; - opop->index = i; - opop->rval = rarray[i]; - opop->gval = garray[i]; - opop->bval = barray[i]; - lheapAdd(lh, opop); - } - } - - /* Take the top 192. These will form the first 192 colors - * in the cmap. iarray[i] holds the index into the cmap. */ - iarray = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32)); - for (i = 0; i < 192; i++) { - opop = (L_OCTCUBE_POP*)lheapRemove(lh); - if (!opop) break; - pixcmapAddColor(cmap, opop->rval, opop->gval, opop->bval); - iarray[opop->index] = i + 1; /* +1 to avoid storing 0 */ - -#if DEBUG_POP - lept_stderr("i = %d, n = %6.0f, (r,g,b) = (%d %d %d)\n", - i, opop->npix, opop->rval, opop->gval, opop->bval); -#endif /* DEBUG_POP */ - - LEPT_FREE(opop); - } - - /* Make the octindex tables for level 2, and reuse rarray, etc. */ - rtab2 = gtab2 = btab2 = NULL; - makeRGBToIndexTables(2, &rtab2, >ab2, &btab2); - for (i = 0; i < 64; i++) { - narray[i] = 0; - rarray[i] = 0; - garray[i] = 0; - barray[i] = 0; - } - - /* Take the rest of the occupied octcubes, assigning the pixels - * to these new colormap indices. iarray[] is addressed - * by %level octcube indices, and it now holds the - * colormap indices for all pixels in pixs. */ - for (i = 192; i < size; i++) { - opop = (L_OCTCUBE_POP*)lheapRemove(lh); - if (!opop) break; - rval = opop->rval; - gval = opop->gval; - bval = opop->bval; - octindex2 = rtab2[rval] | gtab2[gval] | btab2[bval]; - narray[octindex2] += (l_int32)opop->npix; - rarray[octindex2] += (l_int32)opop->npix * rval; - garray[octindex2] += (l_int32)opop->npix * gval; - barray[octindex2] += (l_int32)opop->npix * bval; - iarray[opop->index] = 192 + octindex2 + 1; /* +1 to avoid storing 0 */ - LEPT_FREE(opop); - } - lheapDestroy(&lh, TRUE); - - /* To span the full color space, which is necessary for dithering, - * set each iarray element whose value is still 0 at the input - * level octcube leaves (because there were no pixels in those - * octcubes) to the colormap index corresponding to its level 2 - * octcube. */ - if (ditherflag) { - for (i = 0; i < size; i++) { - if (iarray[i] == 0) { - getRGBFromOctcube(i, level, &rval, &gval, &bval); - octindex2 = rtab2[rval] | gtab2[gval] | btab2[bval]; - iarray[i] = 192 + octindex2 + 1; - } - } - } - LEPT_FREE(rtab2); - LEPT_FREE(gtab2); - LEPT_FREE(btab2); - - /* Average the colors from the residuals in each level 2 octcube, - * and add these 64 values to the colormap. */ - for (i = 0; i < 64; i++) { - if (narray[i] > 0) { - rarray[i] /= narray[i]; - garray[i] /= narray[i]; - barray[i] /= narray[i]; - } else { /* no pixels in this octcube; use center value */ - getRGBFromOctcube(i, 2, &rarray[i], &garray[i], &barray[i]); - } - pixcmapAddColor(cmap, rarray[i], garray[i], barray[i]); - } - - /* Set the cmap indices for each pixel. Subtract 1 from - * the value in iarray[] because we added 1 earlier. */ - if (ditherflag == 0) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - octindex = rtab[rval] | gtab[gval] | btab[bval]; - SET_DATA_BYTE(lined, j, iarray[octindex] - 1); - } - } - } else { /* dither */ - pixDitherOctindexWithCmap(pixs, pixd, rtab, gtab, btab, - iarray, POP_DIF_CAP); - } - -#if DEBUG_POP - for (i = 0; i < size / 16; i++) { - l_int32 j; - for (j = 0; j < 16; j++) - lept_stderr("%d ", iarray[16 * i + j]); - lept_stderr("\n"); - } -#endif /* DEBUG_POP */ - - LEPT_FREE(iarray); - -array_cleanup: - LEPT_FREE(narray); - LEPT_FREE(rarray); - LEPT_FREE(garray); - LEPT_FREE(barray); - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - - return pixd; -} - - -/*! - * \brief pixDitherOctindexWithCmap() - * - * \param[in] pixs 32 bpp rgb - * \param[in] pixd 8 bpp cmapped - * \param[in] rtab, gtab, btab tables from rval to octindex - * \param[in] indexmap array mapping octindex to cmap index - * \param[in] difcap max allowed dither transfer; - * use 0 for infinite cap - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This performs dithering to generate the colormap indices
- *          in pixd.  The colormap has been calculated, along with
- *          four input LUTs that together give the inverse colormapping
- *          from RGB to colormap index.
- *      (2) For pixOctreeQuantByPopulation(), %indexmap maps from the
- *          standard octindex to colormap index (after subtracting 1).
- *          The basic pixel-level function, without dithering, is:
- *             extractRGBValues(lines[j], &rval, &gval, &bval);
- *             octindex = rtab[rval] | gtab[gval] | btab[bval];
- *             SET_DATA_BYTE(lined, j, indexmap[octindex] - 1);
- *      (3) This can be used in any situation where the general
- *          prescription for finding the colormap index from the rgb
- *          value is precisely this:
- *             cmapindex = indexmap[rtab[rval] | gtab[gval] | btab[bval]] - 1
- *          For example, in pixFixedOctcubeQuant256(), we don't use
- *          standard octcube indexing, the rtab (etc) LUTs map directly
- *          to the colormap index, and %indexmap just compensates for
- *          the 1-off indexing assumed to be in that table.
- * 
- */ -static l_int32 -pixDitherOctindexWithCmap(PIX *pixs, - PIX *pixd, - l_uint32 *rtab, - l_uint32 *gtab, - l_uint32 *btab, - l_int32 *indexmap, - l_int32 difcap) -{ -l_uint8 *bufu8r, *bufu8g, *bufu8b; -l_int32 i, j, w, h, wpld, octindex, cmapindex, success; -l_int32 rval, gval, bval, rc, gc, bc; -l_int32 dif, val1, val2, val3; -l_int32 *buf1r, *buf1g, *buf1b, *buf2r, *buf2g, *buf2b; -l_uint32 *datad, *lined; -PIXCMAP *cmap; - - PROCNAME("pixDitherOctindexWithCmap"); - - if (!pixs || pixGetDepth(pixs) != 32) - return ERROR_INT("pixs undefined or not 32 bpp", procName, 1); - if (!pixd || pixGetDepth(pixd) != 8) - return ERROR_INT("pixd undefined or not 8 bpp", procName, 1); - if ((cmap = pixGetColormap(pixd)) == NULL) - return ERROR_INT("pixd not cmapped", procName, 1); - if (!rtab || !gtab || !btab || !indexmap) - return ERROR_INT("not all 4 tables defined", procName, 1); - pixGetDimensions(pixs, &w, &h, NULL); - if (pixGetWidth(pixd) != w || pixGetHeight(pixd) != h) - return ERROR_INT("pixs and pixd not same size", procName, 1); - - success = TRUE; - bufu8r = bufu8g = bufu8b = NULL; - buf1r = buf1g = buf1b = buf2r = buf2g = buf2b = NULL; - bufu8r = (l_uint8 *)LEPT_CALLOC(w, sizeof(l_uint8)); - bufu8g = (l_uint8 *)LEPT_CALLOC(w, sizeof(l_uint8)); - bufu8b = (l_uint8 *)LEPT_CALLOC(w, sizeof(l_uint8)); - buf1r = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - buf1g = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - buf1b = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - buf2r = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - buf2g = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - buf2b = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - if (!bufu8r || !bufu8g || !bufu8b || !buf1r || !buf1g || - !buf1b || !buf2r || !buf2g || !buf2b) { - L_ERROR("buffer not made\n", procName); - success = FALSE; - goto buffer_cleanup; - } - - /* Start by priming buf2; line 1 is above line 2 */ - pixGetRGBLine(pixs, 0, bufu8r, bufu8g, bufu8b); - for (j = 0; j < w; j++) { - buf2r[j] = 64 * bufu8r[j]; - buf2g[j] = 64 * bufu8g[j]; - buf2b[j] = 64 * bufu8b[j]; - } - - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h - 1; i++) { - /* Swap data 2 --> 1, and read in new line 2 */ - memcpy(buf1r, buf2r, 4 * w); - memcpy(buf1g, buf2g, 4 * w); - memcpy(buf1b, buf2b, 4 * w); - pixGetRGBLine(pixs, i + 1, bufu8r, bufu8g, bufu8b); - for (j = 0; j < w; j++) { - buf2r[j] = 64 * bufu8r[j]; - buf2g[j] = 64 * bufu8g[j]; - buf2b[j] = 64 * bufu8b[j]; - } - - /* Dither */ - lined = datad + i * wpld; - for (j = 0; j < w - 1; j++) { - rval = buf1r[j] / 64; - gval = buf1g[j] / 64; - bval = buf1b[j] / 64; - octindex = rtab[rval] | gtab[gval] | btab[bval]; - cmapindex = indexmap[octindex] - 1; - SET_DATA_BYTE(lined, j, cmapindex); - pixcmapGetColor(cmap, cmapindex, &rc, &gc, &bc); - - dif = buf1r[j] / 8 - 8 * rc; - if (difcap > 0) { - if (dif > difcap) dif = difcap; - if (dif < -difcap) dif = -difcap; - } - if (dif != 0) { - val1 = buf1r[j + 1] + 3 * dif; - val2 = buf2r[j] + 3 * dif; - val3 = buf2r[j + 1] + 2 * dif; - if (dif > 0) { - buf1r[j + 1] = L_MIN(16383, val1); - buf2r[j] = L_MIN(16383, val2); - buf2r[j + 1] = L_MIN(16383, val3); - } else { - buf1r[j + 1] = L_MAX(0, val1); - buf2r[j] = L_MAX(0, val2); - buf2r[j + 1] = L_MAX(0, val3); - } - } - - dif = buf1g[j] / 8 - 8 * gc; - if (difcap > 0) { - if (dif > difcap) dif = difcap; - if (dif < -difcap) dif = -difcap; - } - if (dif != 0) { - val1 = buf1g[j + 1] + 3 * dif; - val2 = buf2g[j] + 3 * dif; - val3 = buf2g[j + 1] + 2 * dif; - if (dif > 0) { - buf1g[j + 1] = L_MIN(16383, val1); - buf2g[j] = L_MIN(16383, val2); - buf2g[j + 1] = L_MIN(16383, val3); - } else { - buf1g[j + 1] = L_MAX(0, val1); - buf2g[j] = L_MAX(0, val2); - buf2g[j + 1] = L_MAX(0, val3); - } - } - - dif = buf1b[j] / 8 - 8 * bc; - if (difcap > 0) { - if (dif > difcap) dif = difcap; - if (dif < -difcap) dif = -difcap; - } - if (dif != 0) { - val1 = buf1b[j + 1] + 3 * dif; - val2 = buf2b[j] + 3 * dif; - val3 = buf2b[j + 1] + 2 * dif; - if (dif > 0) { - buf1b[j + 1] = L_MIN(16383, val1); - buf2b[j] = L_MIN(16383, val2); - buf2b[j + 1] = L_MIN(16383, val3); - } else { - buf1b[j + 1] = L_MAX(0, val1); - buf2b[j] = L_MAX(0, val2); - buf2b[j + 1] = L_MAX(0, val3); - } - } - } - - /* Get last pixel in row; no downward propagation */ - rval = buf1r[w - 1] / 64; - gval = buf1g[w - 1] / 64; - bval = buf1b[w - 1] / 64; - octindex = rtab[rval] | gtab[gval] | btab[bval]; - cmapindex = indexmap[octindex] - 1; - SET_DATA_BYTE(lined, w - 1, cmapindex); - } - - /* Get last row of pixels; no leftward propagation */ - lined = datad + (h - 1) * wpld; - for (j = 0; j < w; j++) { - rval = buf2r[j] / 64; - gval = buf2g[j] / 64; - bval = buf2b[j] / 64; - octindex = rtab[rval] | gtab[gval] | btab[bval]; - cmapindex = indexmap[octindex] - 1; - SET_DATA_BYTE(lined, j, cmapindex); - } - -buffer_cleanup: - LEPT_FREE(bufu8r); - LEPT_FREE(bufu8g); - LEPT_FREE(bufu8b); - LEPT_FREE(buf1r); - LEPT_FREE(buf1g); - LEPT_FREE(buf1b); - LEPT_FREE(buf2r); - LEPT_FREE(buf2g); - LEPT_FREE(buf2b); - - return (success) ? 0 : 1; -} - - -/*---------------------------------------------------------------------------* - * Adaptive octree quantization to 4 and 8 bpp with max colors * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixOctreeQuantNumColors() - * - * \param[in] pixs 32 bpp rgb - * \param[in] maxcolors 8 to 256; the actual number of colors used - * may be less than this - * \param[in] subsample factor for computing color distribution; - * use 0 for default - * \return pixd 4 or 8 bpp, colormapped, or NULL on error - * - *
- *  pixOctreeColorQuant is very flexible in terms of the relative
- *  depth of different cubes of the octree.   By contrast, this function,
- *  pixOctreeQuantNumColors is also adaptive, but it supports octcube
- *  leaves at only two depths: a smaller depth that guarantees
- *  full coverage of the color space and octcubes at one level
- *  deeper for more accurate colors.  Its main virutes are simplicity
- *  and speed, which are both derived from the natural indexing of
- *  the octcubes from the RGB values.
- *
- *  Before describing pixOctreeQuantNumColors, consider an even simpler
- *  approach for 4 bpp with either 8 or 16 colors.  With 8 colors,
- *  you simply go to level 1 octcubes and use the average color
- *  found in each cube.  For 16 colors, you find which of the three
- *  colors has the largest variance at the second level, and use two
- *  indices for that color.  The result is quite poor, because 1 some
- *  of the cubes are nearly empty and 2 you don't get much color
- *  differentiation for the extra 8 colors.  Trust me, this method may
- *  be simple, but it isn't worth anything.
- *
- *  In pixOctreeQuantNumColors, we generate colormapped images at
- *  either 4 bpp or 8 bpp.  For 4 bpp, we have a minimum of 8 colors
- *  for the level 1 octcubes, plus up to 8 additional colors that
- *  are determined from the level 2 popularity.  If the number of colors
- *  is between 8 and 16, the output is a 4 bpp image.  If the number of
- *  colors is greater than 16, the output is a 8 bpp image.
- *
- *  We use a priority queue, implemented with a heap, to select the
- *  requisite number of most populated octcubes at the deepest level
- *  level 2 for 64 or fewer colors; level 3 for more than 64 colors.
- *  These are combined with one color for each octcube one level above,
- *  which is used to span the color space of octcubes that were not
- *  included at the deeper level.
- *
- *  If the deepest level is 2, we combine the popular level 2 octcubes
- *  out of a total of 64 with the 8 level 1 octcubes.  If the deepest
- *  level is 3, we combine the popular level 3 octcubes out of a
- *  total 512 with the 64 level 2 octcubes that span the color space.
- *  In the latter case, we require a minimum of 64 colors for the level 2
- *  octcubes, plus up to 192 additional colors determined from level 3
- *  popularity.
- *
- *  The parameter 'maxlevel' is the deepest octcube level that is used.
- *  The implementation also uses two LUTs, which are employed in
- *  two successive traversals of the dest image.  The first maps
- *  from the src octindex at 'maxlevel' to the color table index,
- *  which is the value that is stored in the 4 or 8 bpp dest pixel.
- *  The second LUT maps from that colormap value in the dest to a
- *  new colormap value for a minimum sized colormap, stored back in
- *  the dest.  It is used to remove any color map entries that
- *  correspond to color space regions that have no pixels in the
- *  source image.  These regions can be either from the higher level
- *  e.g., level 1 for 4 bpp, or from octcubes at 'maxlevel' that
- *  are unoccupied.  This remapping results in the minimum number
- *  of colors used according to the constraints induced by the
- *  input 'maxcolors'.  We also compute the average R, G and B color
- *  values in each region of the color space represented by a
- *  colormap entry, and store them in the colormap.
- *
- *  The maximum number of colors is input, which determines the
- *  following properties of the dest image and octcube regions used:
- *
- *     Number of colors      dest image depth      maxlevel
- *     ----------------      ----------------      --------
- *       8 to 16                  4 bpp               2
- *       17 to 64                 8 bpp               2
- *       65 to 256                8 bpp               3
- *
- *  It may turn out that the number of extra colors, beyond the
- *  minimum 8 and 64 for maxlevel 2 and 3, respectively, is larger
- *  than the actual number of occupied cubes at these levels
- *  In that case, all the pixels are contained in this
- *  subset of cubes at maxlevel, and no colormap colors are needed
- *  to represent the remainder pixels one level above.  Thus, for
- *  example, in use one often finds that the pixels in an image
- *  occupy less than 192 octcubes at level 3, so they can be represented
- *  by a colormap for octcubes at level 3 only.
- * 
- */ -PIX * -pixOctreeQuantNumColors(PIX *pixs, - l_int32 maxcolors, - l_int32 subsample) -{ -l_int32 w, h, minside, bpp, wpls, wpld, i, j, actualcolors; -l_int32 rval, gval, bval, nbase, nextra, maxlevel, ncubes, val; -l_int32 *lut1, *lut2; -l_uint32 index; -l_uint32 *lines, *lined, *datas, *datad, *pspixel; -l_uint32 *rtab, *gtab, *btab; -OQCELL *oqc; -OQCELL **oqca; -L_HEAP *lh; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixOctreeQuantNumColors"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (maxcolors < 8) { - L_WARNING("max colors < 8; setting to 8\n", procName); - maxcolors = 8; - } - if (maxcolors > 256) { - L_WARNING("max colors > 256; setting to 256\n", procName); - maxcolors = 256; - } - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - minside = L_MIN(w, h); - if (subsample <= 0) { - subsample = L_MAX(1, minside / 200); - } - - if (maxcolors <= 16) { - bpp = 4; - pixd = pixCreate(w, h, bpp); - maxlevel = 2; - ncubes = 64; /* 2^6 */ - nbase = 8; - nextra = maxcolors - nbase; - } else if (maxcolors <= 64) { - bpp = 8; - pixd = pixCreate(w, h, bpp); - maxlevel = 2; - ncubes = 64; /* 2^6 */ - nbase = 8; - nextra = maxcolors - nbase; - } else { /* maxcolors <= 256 */ - bpp = 8; - pixd = pixCreate(w, h, bpp); - maxlevel = 3; - ncubes = 512; /* 2^9 */ - nbase = 64; - nextra = maxcolors - nbase; - } - - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - - /*----------------------------------------------------------* - * If we're using the minimum number of colors, it is * - * much simpler. We just use 'nbase' octcubes. * - * For this case, we don't eliminate any extra colors. * - *----------------------------------------------------------*/ - if (nextra == 0) { - /* prepare the OctcubeQuantCell array */ - if ((oqca = (OQCELL **)LEPT_CALLOC(nbase, sizeof(OQCELL *))) == NULL) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("oqca not made", procName, NULL); - } - for (i = 0; i < nbase; i++) { - oqca[i] = (OQCELL *)LEPT_CALLOC(1, sizeof(OQCELL)); - oqca[i]->n = 0.0; - } - - rtab = gtab = btab = NULL; - makeRGBToIndexTables(maxlevel - 1, &rtab, >ab, &btab); - - /* Go through the entire image, gathering statistics and - * assigning pixels to their quantized value */ - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - pspixel = lines + j; - extractRGBValues(*pspixel, &rval, &gval, &bval); - getOctcubeIndexFromRGB(rval, gval, bval, - rtab, gtab, btab, &index); -/* lept_stderr("rval = %d, gval = %d, bval = %d," - " index = %d\n", rval, gval, bval, index); */ - if (bpp == 4) - SET_DATA_QBIT(lined, j, index); - else /* bpp == 8 */ - SET_DATA_BYTE(lined, j, index); - oqca[index]->n += 1.0; - oqca[index]->rcum += rval; - oqca[index]->gcum += gval; - oqca[index]->bcum += bval; - } - } - - /* Compute average color values in each octcube, and - * generate colormap */ - cmap = pixcmapCreate(bpp); - pixSetColormap(pixd, cmap); - for (i = 0; i < nbase; i++) { - oqc = oqca[i]; - if (oqc->n != 0) { - oqc->rval = (l_int32)(oqc->rcum / oqc->n); - oqc->gval = (l_int32)(oqc->gcum / oqc->n); - oqc->bval = (l_int32)(oqc->bcum / oqc->n); - } else { - getRGBFromOctcube(i, maxlevel - 1, &oqc->rval, - &oqc->gval, &oqc->bval); - } - pixcmapAddColor(cmap, oqc->rval, oqc->gval, oqc->bval); - } - - for (i = 0; i < nbase; i++) - LEPT_FREE(oqca[i]); - LEPT_FREE(oqca); - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - return pixd; - } - - /*------------------------------------------------------------* - * General case: we will use colors in octcubes at maxlevel. * - * We also remove any colors that are not populated from * - * the colormap. * - *------------------------------------------------------------*/ - /* Prepare the OctcubeQuantCell array */ - if ((oqca = (OQCELL **)LEPT_CALLOC(ncubes, sizeof(OQCELL *))) == NULL) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("oqca not made", procName, NULL); - } - for (i = 0; i < ncubes; i++) { - oqca[i] = (OQCELL *)LEPT_CALLOC(1, sizeof(OQCELL)); - oqca[i]->n = 0.0; - } - - /* Make the tables to map color to the octindex, - * of which there are 'ncubes' at 'maxlevel' */ - rtab = gtab = btab = NULL; - makeRGBToIndexTables(maxlevel, &rtab, >ab, &btab); - - /* Estimate the color distribution; we want to find the - * most popular nextra colors at 'maxlevel' */ - for (i = 0; i < h; i += subsample) { - lines = datas + i * wpls; - for (j = 0; j < w; j += subsample) { - pspixel = lines + j; - extractRGBValues(*pspixel, &rval, &gval, &bval); - getOctcubeIndexFromRGB(rval, gval, bval, rtab, gtab, btab, &index); - oqca[index]->n += 1.0; - oqca[index]->octindex = index; - oqca[index]->rcum += rval; - oqca[index]->gcum += gval; - oqca[index]->bcum += bval; - } - } - - /* Transfer the OQCELL from the array, and order in a heap */ - lh = lheapCreate(512, L_SORT_DECREASING); - for (i = 0; i < ncubes; i++) - lheapAdd(lh, oqca[i]); - LEPT_FREE(oqca); /* don't need this array */ - - /* Prepare a new OctcubeQuantCell array, with maxcolors cells */ - oqca = (OQCELL **)LEPT_CALLOC(maxcolors, sizeof(OQCELL *)); - for (i = 0; i < nbase; i++) { /* make nbase cells */ - oqca[i] = (OQCELL *)LEPT_CALLOC(1, sizeof(OQCELL)); - oqca[i]->n = 0.0; - } - - /* Remove the nextra most populated ones, and put them in the array */ - for (i = 0; i < nextra; i++) { - oqc = (OQCELL *)lheapRemove(lh); - oqc->n = 0.0; /* reinit */ - oqc->rcum = 0; - oqc->gcum = 0; - oqc->bcum = 0; - oqca[nbase + i] = oqc; /* store it in the array */ - } - - /* Destroy the heap and its remaining contents */ - lheapDestroy(&lh, TRUE); - - /* Generate a lookup table from octindex at maxlevel - * to color table index */ - lut1 = (l_int32 *)LEPT_CALLOC(ncubes, sizeof(l_int32)); - for (i = 0; i < nextra; i++) - lut1[oqca[nbase + i]->octindex] = nbase + i; - for (index = 0; index < ncubes; index++) { - if (lut1[index] == 0) /* not one of the extras; need to assign */ - lut1[index] = index >> 3; /* remove the least significant bits */ -/* lept_stderr("lut1[%d] = %d\n", index, lut1[index]); */ - } - - /* Go through the entire image, gathering statistics and - * assigning pixels to their quantized value */ - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - pspixel = lines + j; - extractRGBValues(*pspixel, &rval, &gval, &bval); - getOctcubeIndexFromRGB(rval, gval, bval, rtab, gtab, btab, &index); -/* lept_stderr("rval = %d, gval = %d, bval = %d, index = %d\n", - rval, gval, bval, index); */ - val = lut1[index]; - switch (bpp) { - case 4: - SET_DATA_QBIT(lined, j, val); - break; - case 8: - SET_DATA_BYTE(lined, j, val); - break; - default: - LEPT_FREE(oqca); - LEPT_FREE(lut1); - return (PIX *)ERROR_PTR("bpp not 4 or 8!", procName, NULL); - break; - } - oqca[val]->n += 1.0; - oqca[val]->rcum += rval; - oqca[val]->gcum += gval; - oqca[val]->bcum += bval; - } - } - - /* Compute averages, set up a colormap, and make a second - * lut that converts from the color values currently in - * the image to a minimal set */ - lut2 = (l_int32 *)LEPT_CALLOC(ncubes, sizeof(l_int32)); - cmap = pixcmapCreate(bpp); - pixSetColormap(pixd, cmap); - for (i = 0, index = 0; i < maxcolors; i++) { - oqc = oqca[i]; - lut2[i] = index; - if (oqc->n == 0) /* no occupancy; don't bump up index */ - continue; - oqc->rval = (l_int32)(oqc->rcum / oqc->n); - oqc->gval = (l_int32)(oqc->gcum / oqc->n); - oqc->bval = (l_int32)(oqc->bcum / oqc->n); - pixcmapAddColor(cmap, oqc->rval, oqc->gval, oqc->bval); - index++; - } -/* pixcmapWriteStream(stderr, cmap); */ - actualcolors = pixcmapGetCount(cmap); -/* lept_stderr("Number of different colors = %d\n", actualcolors); */ - - /* Last time through the image; use the lookup table to - * remap the pixel value to the minimal colormap */ - if (actualcolors < maxcolors) { - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - switch (bpp) { - case 4: - val = GET_DATA_QBIT(lined, j); - SET_DATA_QBIT(lined, j, lut2[val]); - break; - case 8: - val = GET_DATA_BYTE(lined, j); - SET_DATA_BYTE(lined, j, lut2[val]); - break; - } - } - } - } - - if (oqca) { - for (i = 0; i < maxcolors; i++) - LEPT_FREE(oqca[i]); - } - LEPT_FREE(oqca); - LEPT_FREE(lut1); - LEPT_FREE(lut2); - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - return pixd; -} - - -/*-------------------------------------------------------------------------* - * Mixed color/gray quantization with specified number of colors * - *-------------------------------------------------------------------------*/ -/*! - * \brief pixOctcubeQuantMixedWithGray() - * - * \param[in] pixs 32 bpp rgb - * \param[in] depth of output pix - * \param[in] graylevels graylevels (must be > 1) - * \param[in] delta threshold for deciding if a pix is color or gray - * \return pixd quantized to octcube and gray levels or NULL on error - * - *
- * Notes:
- *      (1) Generates a colormapped image, where the colormap table values
- *          have two components: octcube values representing pixels with
- *          color content, and grayscale values for the rest.
- *      (2) The threshold (delta) is the maximum allowable difference of
- *          the max abs value of | r - g |, | r - b | and | g - b |.
- *      (3) The octcube values are the averages of all pixels that are
- *          found in the octcube, and that are far enough from gray to
- *          be considered color.  This can roughly be visualized as all
- *          the points in the rgb color cube that are not within a "cylinder"
- *          of diameter approximately 'delta' along the main diagonal.
- *      (4) We want to guarantee full coverage of the rgb color space; thus,
- *          if the output depth is 4, the octlevel is 1 (2 x 2 x 2 = 8 cubes)
- *          and if the output depth is 8, the octlevel is 2 (4 x 4 x 4
- *          = 64 cubes).
- *      (5) Consequently, we have the following constraint on the number
- *          of allowed gray levels: for 4 bpp, 8; for 8 bpp, 192.
- * 
- */ -PIX * -pixOctcubeQuantMixedWithGray(PIX *pixs, - l_int32 depth, - l_int32 graylevels, - l_int32 delta) -{ -l_int32 w, h, wpls, wpld, i, j, size, octlevels; -l_int32 rval, gval, bval, del, val, midval; -l_int32 *carray, *rarray, *garray, *barray; -l_int32 *tabval; -l_uint32 octindex; -l_uint32 *rtab, *gtab, *btab; -l_uint32 *lines, *lined, *datas, *datad; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixOctcubeQuantMixedWithGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (graylevels < 2) - return (PIX *)ERROR_PTR("invalid graylevels", procName, NULL); - if (depth == 4) { - octlevels = 1; - size = 8; /* 2 ** 3 */ - if (graylevels > 8) - return (PIX *)ERROR_PTR("max 8 gray levels", procName, NULL); - } else if (depth == 8) { - octlevels = 2; - size = 64; /* 2 ** 6 */ - if (graylevels > 192) - return (PIX *)ERROR_PTR("max 192 gray levels", procName, NULL); - } else { - return (PIX *)ERROR_PTR("output depth not 4 or 8 bpp", procName, NULL); - } - - pixd = NULL; - - /* Make octcube index tables */ - rtab = gtab = btab = NULL; - makeRGBToIndexTables(octlevels, &rtab, >ab, &btab); - - /* Make octcube arrays for storing points in each cube */ - carray = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32)); - rarray = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32)); - garray = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32)); - barray = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32)); - - /* Make lookup table, using computed thresholds */ - tabval = makeGrayQuantIndexTable(graylevels); - if (!rtab || !gtab || !btab || - !carray || !rarray || !garray || !barray || !tabval) { - L_ERROR("calloc fail for an array\n", procName); - goto array_cleanup; - } - - /* Make colormapped output pixd */ - pixGetDimensions(pixs, &w, &h, NULL); - if ((pixd = pixCreate(w, h, depth)) == NULL) { - L_ERROR("pixd not made\n", procName); - goto array_cleanup; - } - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - cmap = pixcmapCreate(depth); - for (j = 0; j < size; j++) /* reserve octcube colors */ - pixcmapAddColor(cmap, 1, 1, 1); /* a color that won't be used */ - for (j = 0; j < graylevels; j++) { /* set grayscale colors */ - val = (255 * j) / (graylevels - 1); - pixcmapAddColor(cmap, val, val, val); - } - pixSetColormap(pixd, cmap); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - - /* Go through src image: assign dest pixels to colormap values - * and compute average colors in each occupied octcube */ - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - if (rval > gval) { - if (gval > bval) { /* r > g > b */ - del = rval - bval; - midval = gval; - } else if (rval > bval) { /* r > b > g */ - del = rval - gval; - midval = bval; - } else { /* b > r > g */ - del = bval - gval; - midval = rval; - } - } else { /* gval >= rval */ - if (rval > bval) { /* g > r > b */ - del = gval - bval; - midval = rval; - } else if (gval > bval) { /* g > b > r */ - del = gval - rval; - midval = bval; - } else { /* b > g > r */ - del = bval - rval; - midval = gval; - } - } - if (del > delta) { /* assign to color */ - octindex = rtab[rval] | gtab[gval] | btab[bval]; - carray[octindex]++; - rarray[octindex] += rval; - garray[octindex] += gval; - barray[octindex] += bval; - if (depth == 4) - SET_DATA_QBIT(lined, j, octindex); - else /* depth == 8 */ - SET_DATA_BYTE(lined, j, octindex); - } else { /* assign to grayscale */ - val = size + tabval[midval]; - if (depth == 4) - SET_DATA_QBIT(lined, j, val); - else /* depth == 8 */ - SET_DATA_BYTE(lined, j, val); - } - } - } - - /* Average the colors in each bin and reset the colormap */ - for (i = 0; i < size; i++) { - if (carray[i] > 0) { - rarray[i] /= carray[i]; - garray[i] /= carray[i]; - barray[i] /= carray[i]; - pixcmapResetColor(cmap, i, rarray[i], garray[i], barray[i]); - } - } - -array_cleanup: - LEPT_FREE(carray); - LEPT_FREE(rarray); - LEPT_FREE(garray); - LEPT_FREE(barray); - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - LEPT_FREE(tabval); - - return pixd; -} - - -/*-------------------------------------------------------------------------* - * Fixed partition octcube quantization with 256 cells * - *-------------------------------------------------------------------------*/ -/*! - * \brief pixFixedOctcubeQuant256() - * - * \param[in] pixs 32 bpp; 24-bit color - * \param[in] ditherflag 1 for dithering; 0 for no dithering - * \return pixd 8 bit with colormap, or NULL on error - * - *
- * Notes:
- *  This simple 1-pass color quantization works by breaking the
- *  color space into 256 pieces, with 3 bits quantized for each of
- *  red and green, and 2 bits quantized for blue.  We shortchange
- *  blue because the eye is least sensitive to blue.  This
- *  division of the color space is into two levels of octrees,
- *  followed by a further division by 4 not 8, where both
- *  blue octrees have been combined in the third level.
- *
- *  The color map is generated from the 256 color centers by
- *  taking the representative color to be the center of the
- *  cell volume.  This gives a maximum error in the red and
- *  green values of 16 levels, and a maximum error in the
- *  blue sample of 32 levels.
- *
- *  Each pixel in the 24-bit color image is placed in its containing
- *  cell, given by the relevant MSbits of the red, green and blue
- *  samples.  An error-diffusion dithering is performed on each
- *  color sample to give the appearance of good average local color.
- *  Dithering is required; without it, the contouring and visible
- *  color errors are very bad.
- *
- *  I originally implemented this algorithm in two passes,
- *  where the first pass was used to compute the weighted average
- *  of each sample in each pre-allocated region of color space.
- *  The idea was to use these centroids in the dithering algorithm
- *  of the second pass, to reduce the average error that was
- *  being dithered.  However, with dithering, there is
- *  virtually no difference, so there is no reason to make the
- *  first pass.  Consequently, this 1-pass version just assigns
- *  the pixels to the centers of the pre-allocated cells.
- *  We use dithering to spread the difference between the sample
- *  value and the location of the center of the cell.  For speed
- *  and simplicity, we use integer dithering and propagate only
- *  to the right, down, and diagonally down-right, with ratios
- *  3/8, 3/8 and 1/4, respectively.  The results should be nearly
- *  as good, and a bit faster, with propagation only to the right
- *  and down.
- *
- *  The algorithm is very fast, because there is no search,
- *  only fast generation of the cell index for each pixel.
- *  We use a simple mapping from the three 8 bit rgb samples
- *  to the 8 bit cell index; namely, r7 r6 r5 g7 g6 g5 b7 b6.
- *  This is not in an octcube format, but it doesn't matter.
- *  There are no storage requirements.  We could keep a
- *  running average of the center of each sample in each
- *  cluster, rather than using the center of the cell, but
- *  this is just extra work, esp. with dithering.
- *
- *  This method gives surprisingly good results with dithering.
- *  However, without dithering, the loss of color accuracy is
- *  evident in regions that are very light or that have subtle
- *  blending of colors.
- * 
- */ -PIX * -pixFixedOctcubeQuant256(PIX *pixs, - l_int32 ditherflag) -{ -l_uint8 index; -l_int32 rval, gval, bval; -l_int32 w, h, wpls, wpld, i, j, cindex; -l_uint32 *rtab, *gtab, *btab; -l_int32 *itab; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixFixedOctcubeQuant256"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - - /* Do not dither if image is very small */ - pixGetDimensions(pixs, &w, &h, NULL); - if (w < MinDitherSize && h < MinDitherSize && ditherflag == 1) { - L_INFO("Small image: dithering turned off\n", procName); - ditherflag = 0; - } - - /* Find the centers of the 256 cells, each of which represents - * the 3 MSBits of the red and green components, and the - * 2 MSBits of the blue component. This gives a mapping - * from a "cube index" to the rgb values. Save all 256 - * rgb values of these centers in a colormap. - * For example, to get the red color of the cell center, - * you take the 3 MSBits of to the index and add the - * offset to the center of the cell, which is 0x10. */ - cmap = pixcmapCreate(8); - for (cindex = 0; cindex < 256; cindex++) { - rval = (cindex & 0xe0) | 0x10; - gval = ((cindex << 3) & 0xe0) | 0x10; - bval = ((cindex << 6) & 0xc0) | 0x20; - pixcmapAddColor(cmap, rval, gval, bval); - } - - /* Make output 8 bpp palette image */ - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if ((pixd = pixCreate(w, h, 8)) == NULL) { - pixcmapDestroy(&cmap); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixSetColormap(pixd, cmap); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* Set dest pix values to colortable indices */ - if (ditherflag == 0) { /* no dithering */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - index = (rval & 0xe0) | ((gval >> 3) & 0x1c) | (bval >> 6); - SET_DATA_BYTE(lined, j, index); - } - } - } else { /* ditherflag == 1 */ - /* Set up conversion tables from rgb directly to the colormap - * index. However, the dithering function expects these tables - * to generate an octcube index (+1), and the table itab[] to - * convert to the colormap index. So we make a trivial - * itab[], that simply compensates for the -1 in - * pixDitherOctindexWithCmap(). No cap is required on - * the propagated difference. */ - rtab = (l_uint32 *)LEPT_CALLOC(256, sizeof(l_uint32)); - gtab = (l_uint32 *)LEPT_CALLOC(256, sizeof(l_uint32)); - btab = (l_uint32 *)LEPT_CALLOC(256, sizeof(l_uint32)); - itab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - if (!rtab || !gtab || !btab || !itab) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("calloc fail for table", procName, NULL); - } - for (i = 0; i < 256; i++) { - rtab[i] = i & 0xe0; - gtab[i] = (i >> 3) & 0x1c; - btab[i] = i >> 6; - itab[i] = i + 1; - } - pixDitherOctindexWithCmap(pixs, pixd, rtab, gtab, btab, itab, - FIXED_DIF_CAP); - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - LEPT_FREE(itab); - } - - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Nearly exact quantization for images with few colors * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixFewColorsOctcubeQuant1() - * - * \param[in] pixs 32 bpp rgb - * \param[in] level significant bits for each of RGB; valid in [1...6] - * \return pixd quantized to octcube or NULL on error - * - *
- * Notes:
- *      (1) Generates a colormapped image, where the colormap table values
- *          are the averages of all pixels that are found in the octcube.
- *      (2) This fails if there are more than 256 colors (i.e., more
- *          than 256 occupied octcubes).
- *      (3) Often level 3 (512 octcubes) will succeed because not more
- *          than half of them are occupied with 1 or more pixels.
- *      (4) The depth of the result, which is either 2, 4 or 8 bpp,
- *          is the minimum required to hold the number of colors that
- *          are found.
- *      (5) This can be useful for quantizing orthographically generated
- *          images such as color maps, where there may be more than 256 colors
- *          because of aliasing or jpeg artifacts on text or lines, but
- *          there are a relatively small number of solid colors.  Then,
- *          use with level = 3 can often generate a compact and accurate
- *          representation of the original RGB image.  For this purpose,
- *          it is better than pixFewColorsOctcubeQuant2(), because it
- *          uses the average value of pixels in the octcube rather
- *          than the first found pixel.  It is also simpler to use,
- *          because it generates the histogram internally.
- * 
- */ -PIX * -pixFewColorsOctcubeQuant1(PIX *pixs, - l_int32 level) -{ -l_int32 w, h, wpls, wpld, i, j, depth, size, ncolors, index; -l_int32 rval, gval, bval; -l_int32 *carray, *rarray, *garray, *barray; -l_uint32 octindex; -l_uint32 *rtab, *gtab, *btab; -l_uint32 *lines, *lined, *datas, *datad, *pspixel; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixFewColorsOctcubeQuant1"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (level < 1 || level > 6) - return (PIX *)ERROR_PTR("invalid level", procName, NULL); - - pixd = NULL; - - if (octcubeGetCount(level, &size)) /* array size = 2 ** (3 * level) */ - return (PIX *)ERROR_PTR("size not returned", procName, NULL); - rtab = gtab = btab = NULL; - makeRGBToIndexTables(level, &rtab, >ab, &btab); - - carray = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32)); - rarray = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32)); - garray = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32)); - barray = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32)); - if (!carray || !rarray || !garray || !barray) { - L_ERROR("calloc fail for an array\n", procName); - goto array_cleanup; - } - - /* Place the pixels in octcube leaves. */ - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - pspixel = lines + j; - extractRGBValues(*pspixel, &rval, &gval, &bval); - octindex = rtab[rval] | gtab[gval] | btab[bval]; - carray[octindex]++; - rarray[octindex] += rval; - garray[octindex] += gval; - barray[octindex] += bval; - } - } - - /* Find the number of different colors */ - for (i = 0, ncolors = 0; i < size; i++) { - if (carray[i] > 0) - ncolors++; - } - if (ncolors > 256) { - L_WARNING("%d colors found; more than 256\n", procName, ncolors); - goto array_cleanup; - } - if (ncolors <= 4) - depth = 2; - else if (ncolors <= 16) - depth = 4; - else - depth = 8; - - /* Average the colors in each octcube leaf and add to colormap table; - * then use carray to hold the colormap index + 1 */ - cmap = pixcmapCreate(depth); - for (i = 0, index = 0; i < size; i++) { - if (carray[i] > 0) { - rarray[i] /= carray[i]; - garray[i] /= carray[i]; - barray[i] /= carray[i]; - pixcmapAddColor(cmap, rarray[i], garray[i], barray[i]); - carray[i] = index + 1; /* to avoid storing 0 */ - index++; - } - } - - pixd = pixCreate(w, h, depth); - pixSetColormap(pixd, cmap); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - pspixel = lines + j; - extractRGBValues(*pspixel, &rval, &gval, &bval); - octindex = rtab[rval] | gtab[gval] | btab[bval]; - switch (depth) - { - case 2: - SET_DATA_DIBIT(lined, j, carray[octindex] - 1); - break; - case 4: - SET_DATA_QBIT(lined, j, carray[octindex] - 1); - break; - case 8: - SET_DATA_BYTE(lined, j, carray[octindex] - 1); - break; - default: - L_WARNING("shouldn't get here\n", procName); - } - } - } - -array_cleanup: - LEPT_FREE(carray); - LEPT_FREE(rarray); - LEPT_FREE(garray); - LEPT_FREE(barray); - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - return pixd; -} - - -/*! - * \brief pixFewColorsOctcubeQuant2() - * - * \param[in] pixs 32 bpp rgb - * \param[in] level of octcube indexing, for histogram: 3, 4, 5, 6 - * \param[in] na histogram of pixel occupation in octree leaves - * at given level - * \param[in] ncolors number of occupied octree leaves at given level - * \param[out] pnerrors [optional] num of pixels not exactly - * represented in the colormap - * \return pixd 2, 4 or 8 bpp with colormap, or NULL on error - * - *
- * Notes:
- *      (1) Generates a colormapped image, where the colormap table values
- *          are the averages of all pixels that are found in the octcube.
- *      (2) This fails if there are more than 256 colors (i.e., more
- *          than 256 occupied octcubes).
- *      (3) Often level 3 (512 octcubes) will succeed because not more
- *          than half of them are occupied with 1 or more pixels.
- *      (4) For an image with not more than 256 colors, it is unlikely
- *          that two pixels of different color will fall in the same
- *          octcube at level = 4.   However it is possible, and this
- *          function optionally returns %nerrors, the number of pixels
- *          where, because more than one color is in the same octcube,
- *          the pixel color is not exactly reproduced in the colormap.
- *          The colormap for an occupied leaf of the octree contains
- *          the color of the first pixel encountered in that octcube.
- *      (5) This differs from pixFewColorsOctcubeQuant1(), which also
- *          requires not more than 256 occupied leaves, but represents
- *          the color of each leaf by an average over the pixels in
- *          that leaf.  This also requires precomputing the histogram
- *          of occupied octree leaves, which is generated using
- *          pixOctcubeHistogram().
- *      (6) This is used in pixConvertRGBToColormap() for images that
- *          are determined, by their histogram, to have relatively few
- *          colors.  This typically happens with orthographically
- *          produced images (as oppopsed to natural images), where
- *          it is expected that most of the pixels within a leaf
- *          octcube have exactly the same color, and quantization to
- *          that color is lossless.
- * 
- */ -PIX * -pixFewColorsOctcubeQuant2(PIX *pixs, - l_int32 level, - NUMA *na, - l_int32 ncolors, - l_int32 *pnerrors) -{ -l_int32 w, h, wpls, wpld, i, j, nerrors; -l_int32 ncubes, depth, cindex, oval; -l_int32 rval, gval, bval; -l_int32 *octarray; -l_uint32 octindex; -l_uint32 *rtab, *gtab, *btab; -l_uint32 *lines, *lined, *datas, *datad, *ppixel; -l_uint32 *colorarray; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixFewColorsOctcubeQuant2"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (level < 3 || level > 6) - return (PIX *)ERROR_PTR("level not in {4, 5, 6}", procName, NULL); - if (ncolors > 256) - return (PIX *)ERROR_PTR("ncolors > 256", procName, NULL); - if (pnerrors) - *pnerrors = UNDEF; - - pixd = NULL; - - /* Represent the image with a set of leaf octcubes - * at 'level', one for each color. */ - rtab = gtab = btab = NULL; - makeRGBToIndexTables(level, &rtab, >ab, &btab); - - /* The octarray will give a ptr from the octcube to the colorarray */ - ncubes = numaGetCount(na); - octarray = (l_int32 *)LEPT_CALLOC(ncubes, sizeof(l_int32)); - - /* The colorarray will hold the colors of the first pixel - * that lands in the leaf octcube. After filling, it is - * used to generate the colormap. */ - colorarray = (l_uint32 *)LEPT_CALLOC(ncolors + 1, sizeof(l_uint32)); - if (!octarray || !colorarray) { - L_ERROR("octarray or colorarray not made\n", procName); - goto cleanup_arrays; - } - - /* Determine the output depth from the number of colors */ - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if (ncolors <= 4) - depth = 2; - else if (ncolors <= 16) - depth = 4; - else /* ncolors <= 256 */ - depth = 8; - - if ((pixd = pixCreate(w, h, depth)) == NULL) { - L_ERROR("pixd not made\n", procName); - goto cleanup_arrays; - } - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* For each pixel, get the octree index for its leaf octcube. - * Check if a pixel has already been found in this octcube. - * ~ If not yet found, save that color in the colorarray - * and save the cindex in the octarray. - * ~ If already found, compare the pixel color with the - * color in the colorarray, and note if it differs. - * Then set the dest pixel value to the cindex - 1, which - * will be the cmap index for this color. */ - cindex = 1; /* start with 1 */ - nerrors = 0; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - ppixel = lines + j; - extractRGBValues(*ppixel, &rval, &gval, &bval); - octindex = rtab[rval] | gtab[gval] | btab[bval]; - oval = octarray[octindex]; - if (oval == 0) { - octarray[octindex] = cindex; - colorarray[cindex] = *ppixel; - setPixelLow(lined, j, depth, cindex - 1); - cindex++; - } else { /* already have seen this color; is it unique? */ - setPixelLow(lined, j, depth, oval - 1); - if (colorarray[oval] != *ppixel) - nerrors++; - } - } - } - if (pnerrors) - *pnerrors = nerrors; - -#if DEBUG_FEW_COLORS - lept_stderr("ncubes = %d, ncolors = %d\n", ncubes, ncolors); - for (i = 0; i < ncolors; i++) - lept_stderr("color[%d] = %x\n", i, colorarray[i + 1]); -#endif /* DEBUG_FEW_COLORS */ - - /* Make the colormap. */ - cmap = pixcmapCreate(depth); - for (i = 0; i < ncolors; i++) { - ppixel = colorarray + i + 1; - extractRGBValues(*ppixel, &rval, &gval, &bval); - pixcmapAddColor(cmap, rval, gval, bval); - } - pixSetColormap(pixd, cmap); - -cleanup_arrays: - LEPT_FREE(octarray); - LEPT_FREE(colorarray); - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - - return pixd; -} - - -/*! - * \brief pixFewColorsOctcubeQuantMixed() - * - * \param[in] pixs 32 bpp rgb - * \param[in] level significant octcube bits for each of RGB; - * valid in [1...6]; use 0 for default - * \param[in] darkthresh threshold near black; if the lightest component - * is below this, the pixel is not considered to - * be gray or color; uses 0 for default - * \param[in] lightthresh threshold near white; if the darkest component - * is above this, the pixel is not considered to - * be gray or color; use 0 for default - * \param[in] diffthresh thresh for the max difference between component - * values; for differences below this, the pixel - * is considered to be gray; use 0 for default - * \param[in] minfract min fraction of pixels for gray histo bin; - * use 0.0 for default - * \param[in] maxspan max size of gray histo bin; use 0 for default - * \return pixd 8 bpp, quantized to octcube for pixels that are - * not gray; gray pixels are quantized separately - * over the full gray range, or NULL on error - * - *
- * Notes:
- *      (1) First runs pixFewColorsOctcubeQuant1().  If this succeeds,
- *          it separates the color from gray(ish) entries in the cmap,
- *          and re-quantizes the gray pixels.  The result has some pixels
- *          in color and others in gray.
- *      (2) This fails if there are more than 256 colors (i.e., more
- *          than 256 occupied octcubes in the color quantization).
- *      (3) Level 3 (512 octcubes) will usually succeed because not more
- *          than half of them are occupied with 1 or more pixels.
- *      (4) This uses the criterion from pixColorFraction() for deciding
- *          if a colormap entry is color; namely, if the color components
- *          are not too close to either black or white, and the maximum
- *          difference between component values equals or exceeds a threshold.
- *      (5) For quantizing the gray pixels, it uses a histogram-based
- *          method where input parameters determining the buckets are
- *          the minimum population fraction and the maximum allowed size.
- *      (6) Recommended input parameters are:
- *              %level:  3 or 4  (3 is default)
- *              %darkthresh:  20
- *              %lightthresh: 244
- *              %diffthresh: 20
- *              %minfract: 0.05
- *              %maxspan: 15
- *          These numbers are intended to be conservative (somewhat over-
- *          sensitive) in color detection,  It's usually better to pay
- *          extra with octcube quantization of a grayscale image than
- *          to use grayscale quantization on an image that has some
- *          actual color.  Input 0 on any of these to get the default.
- *      (7) This can be useful for quantizing orthographically generated
- *          images such as color maps, where there may be more than 256 colors
- *          because of aliasing or jpeg artifacts on text or lines, but
- *          there are a relatively small number of solid colors.  It usually
- *          gives results that are better than pixOctcubeQuantMixedWithGray(),
- *          both in size and appearance.  But it is a bit slower.
- * 
- */ -PIX * -pixFewColorsOctcubeQuantMixed(PIX *pixs, - l_int32 level, - l_int32 darkthresh, - l_int32 lightthresh, - l_int32 diffthresh, - l_float32 minfract, - l_int32 maxspan) -{ -l_int32 i, j, w, h, wplc, wplm, wpld, ncolors, index; -l_int32 rval, gval, bval, val, minval, maxval; -l_int32 *lut; -l_uint32 *datac, *datam, *datad, *linec, *linem, *lined; -PIX *pixc, *pixm, *pixg, *pixd; -PIXCMAP *cmap, *cmapd; - - PROCNAME("pixFewColorsOctcubeQuantMixed"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - if (level <= 0) level = 3; - if (level > 6) - return (PIX *)ERROR_PTR("invalid level", procName, NULL); - if (darkthresh <= 0) darkthresh = 20; - if (lightthresh <= 0) lightthresh = 244; - if (diffthresh <= 0) diffthresh = 20; - if (minfract <= 0.0) minfract = 0.05; - if (maxspan <= 2) maxspan = 15; - - /* Start with a simple fixed octcube quantizer. */ - if ((pixc = pixFewColorsOctcubeQuant1(pixs, level)) == NULL) - return (PIX *)ERROR_PTR("too many colors", procName, NULL); - - /* Identify and save color entries in the colormap. Set up a LUT - * that returns -1 for any gray pixel. */ - cmap = pixGetColormap(pixc); - ncolors = pixcmapGetCount(cmap); - cmapd = pixcmapCreate(8); - lut = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - for (i = 0; i < 256; i++) - lut[i] = -1; - for (i = 0, index = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - minval = L_MIN(rval, gval); - minval = L_MIN(minval, bval); - if (minval > lightthresh) /* near white */ - continue; - maxval = L_MAX(rval, gval); - maxval = L_MAX(maxval, bval); - if (maxval < darkthresh) /* near black */ - continue; - - /* Use the max diff between components to test for color */ - if (maxval - minval >= diffthresh) { - pixcmapAddColor(cmapd, rval, gval, bval); - lut[i] = index; - index++; - } - } - - /* Generate dest pix with just the color pixels set to their - * colormap indices. At the same time, make a 1 bpp mask - * of the non-color pixels */ - pixGetDimensions(pixs, &w, &h, NULL); - pixd = pixCreate(w, h, 8); - pixSetColormap(pixd, cmapd); - pixm = pixCreate(w, h, 1); - datac = pixGetData(pixc); - datam = pixGetData(pixm); - datad = pixGetData(pixd); - wplc = pixGetWpl(pixc); - wplm = pixGetWpl(pixm); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - linec = datac + i * wplc; - linem = datam + i * wplm; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(linec, j); - if (lut[val] == -1) - SET_DATA_BIT(linem, j); - else - SET_DATA_BYTE(lined, j, lut[val]); - } - } - - /* Fill in the gray values. Use a grayscale version of pixs - * as input, along with the mask over the actual gray pixels. */ - pixg = pixConvertTo8(pixs, 0); - pixGrayQuantFromHisto(pixd, pixg, pixm, minfract, maxspan); - - LEPT_FREE(lut); - pixDestroy(&pixc); - pixDestroy(&pixm); - pixDestroy(&pixg); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Fixed partition octcube quantization with RGB output * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixFixedOctcubeQuantGenRGB() - * - * \param[in] pixs 32 bpp rgb - * \param[in] level significant bits for each of r,g,b - * \return pixd rgb; quantized to octcube centers, or NULL on error - * - *
- * Notes:
- *      (1) Unlike the other color quantization functions, this one
- *          generates an rgb image.
- *      (2) The pixel values are quantized to the center of each octcube
- *          (at the specified level) containing the pixel.  They are
- *          not quantized to the average of the pixels in that octcube.
- * 
- */ -PIX * -pixFixedOctcubeQuantGenRGB(PIX *pixs, - l_int32 level) -{ -l_int32 w, h, wpls, wpld, i, j; -l_int32 rval, gval, bval; -l_uint32 octindex; -l_uint32 *rtab, *gtab, *btab; -l_uint32 *lines, *lined, *datas, *datad; -PIX *pixd; - - PROCNAME("pixFixedOctcubeQuantGenRGB"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (level < 1 || level > 6) - return (PIX *)ERROR_PTR("level not in {1,...6}", procName, NULL); - - if (makeRGBToIndexTables(level, &rtab, >ab, &btab)) - return (PIX *)ERROR_PTR("tables not made", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - pixd = pixCreate(w, h, 32); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - octindex = rtab[rval] | gtab[gval] | btab[bval]; - getRGBFromOctcube(octindex, level, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, lined + j); - } - } - - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - return pixd; -} - - -/*------------------------------------------------------------------* - * Color quantize RGB image using existing colormap * - *------------------------------------------------------------------*/ -/*! - * \brief pixQuantFromCmap() - * - * \param[in] pixs 8 bpp grayscale without cmap, or 32 bpp rgb - * \param[in] cmap to quantize to; insert copy into dest pix - * \param[in] mindepth minimum depth of pixd: can be 2, 4 or 8 bpp - * \param[in] level of octcube used for finding nearest color in cmap - * \param[in] metric L_MANHATTAN_DISTANCE, L_EUCLIDEAN_DISTANCE - * \return pixd 2, 4 or 8 bpp, colormapped, or NULL on error - * - *
- * Notes:
- *      (1) This is a top-level wrapper for quantizing either grayscale
- *          or rgb images to a specified colormap.
- *      (2) The actual output depth is constrained by %mindepth and
- *          by the number of colors in %cmap.
- *      (3) For grayscale, %level and %metric are ignored.
- *      (4) If the cmap has color and pixs is grayscale, the color is
- *          removed from the cmap before quantizing pixs.
- * 
- */ -PIX * -pixQuantFromCmap(PIX *pixs, - PIXCMAP *cmap, - l_int32 mindepth, - l_int32 level, - l_int32 metric) -{ -l_int32 d; - - PROCNAME("pixQuantFromCmap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (mindepth != 2 && mindepth != 4 && mindepth != 8) - return (PIX *)ERROR_PTR("invalid mindepth", procName, NULL); - d = pixGetDepth(pixs); - if (d == 8) - return pixGrayQuantFromCmap(pixs, cmap, mindepth); - else if (d == 32) - return pixOctcubeQuantFromCmap(pixs, cmap, mindepth, - level, metric); - else - return (PIX *)ERROR_PTR("d not 8 or 32 bpp", procName, NULL); -} - - - -/*! - * \brief pixOctcubeQuantFromCmap() - * - * \param[in] pixs 32 bpp rgb - * \param[in] cmap to quantize to; insert copy into dest pix - * \param[in] mindepth minimum depth of pixd: can be 2, 4 or 8 bpp - * \param[in] level of octcube used for finding nearest color in cmap - * \param[in] metric L_MANHATTAN_DISTANCE, L_EUCLIDEAN_DISTANCE - * \return pixd 2, 4 or 8 bpp, colormapped, or NULL on error - * - *
- * Notes:
- *      (1) In typical use, we are doing an operation, such as
- *          interpolative scaling, on a colormapped pix, where it is
- *          necessary to remove the colormap before the operation.
- *          We then want to re-quantize the RGB result using the same
- *          colormap.
- *      (2) The level is used to divide the color space into octcubes.
- *          Each input pixel is, in effect, placed at the center of an
- *          octcube at the given level, and it is mapped into the
- *          exact color (given in the colormap) that is the closest
- *          to that location.  We need to know that distance, for each color
- *          in the colormap.  The higher the level of the octtree, the smaller
- *          the octcubes in the color space, and hence the more accurately
- *          we can determine the closest color in the colormap; however,
- *          the size of the LUT, which is the total number of octcubes,
- *          increases by a factor of 8 for each increase of 1 level.
- *          The time required to acquire a level 4 mapping table, which has
- *          about 4K entries, is less than 1 msec, so that is the
- *          recommended minimum size to be used.  At that size, the
- *          octcubes have their centers 16 units apart in each (r,g,b)
- *          direction.  If two colors are in the same octcube, the one
- *          closest to the center will always be chosen.  The maximum
- *          error for any component occurs when the correct color is
- *          at a cube corner and there is an incorrect color just inside
- *          the cube next to the opposite corner, giving an error of
- *          14 units (out of 256) for each component.   Using a level 5
- *          mapping table reduces the maximum error to 6 units.
- *      (3) Typically you should use the Euclidean metric, because the
- *          resulting voronoi cells (which are generated using the actual
- *          colormap values as seeds) are convex for Euclidean distance
- *          but not for Manhattan distance.  In terms of the octcubes,
- *          convexity of the voronoi cells means that if the 8 corners
- *          of any cube (of which the octcubes are special cases)
- *          are all within a cell, then every point in the cube will
- *          lie within the cell.
- *      (4) The depth of the output pixd is equal to the maximum of
- *          (a) %mindepth and (b) the minimum (2, 4 or 8 bpp) necessary
- *          to hold the indices in the colormap.
- *      (5) We build a mapping table from octcube to colormap index so
- *          that this function can run in a time (otherwise) independent
- *          of the number of colors in the colormap.  This avoids a
- *          brute-force search for the closest colormap color to each
- *          pixel in the image.
- *      (6) This is similar to the function pixAssignToNearestColor()
- *          used for color segmentation.
- *      (7) Except for very small images or when using level > 4,
- *          it takes very little time to generate the tables,
- *          compared to the generation of the colormapped dest pix,
- *          so one would not typically use the low-level version.
- * 
- */ -PIX * -pixOctcubeQuantFromCmap(PIX *pixs, - PIXCMAP *cmap, - l_int32 mindepth, - l_int32 level, - l_int32 metric) -{ -l_int32 *cmaptab; -l_uint32 *rtab, *gtab, *btab; -PIX *pixd; - - PROCNAME("pixOctcubeQuantFromCmap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (!cmap) - return (PIX *)ERROR_PTR("cmap not defined", procName, NULL); - if (mindepth != 2 && mindepth != 4 && mindepth != 8) - return (PIX *)ERROR_PTR("invalid mindepth", procName, NULL); - if (level < 1 || level > 6) - return (PIX *)ERROR_PTR("level not in {1...6}", procName, NULL); - if (metric != L_MANHATTAN_DISTANCE && metric != L_EUCLIDEAN_DISTANCE) - return (PIX *)ERROR_PTR("invalid metric", procName, NULL); - - /* Set up the tables to map rgb to the nearest colormap index */ - rtab = gtab = btab = NULL; - makeRGBToIndexTables(level, &rtab, >ab, &btab); - cmaptab = pixcmapToOctcubeLUT(cmap, level, metric); - - pixd = pixOctcubeQuantFromCmapLUT(pixs, cmap, mindepth, - cmaptab, rtab, gtab, btab); - - LEPT_FREE(cmaptab); - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - return pixd; -} - - -/*! - * \brief pixOctcubeQuantFromCmapLUT() - * - * \param[in] pixs 32 bpp rgb - * \param[in] cmap to quantize to; insert copy into dest pix - * \param[in] mindepth minimum depth of pixd: can be 2, 4 or 8 bpp - * \param[in] cmaptab table mapping from octindex to colormap index - * \param[in] rtab, gtab, btab tables mapping from RGB to octindex - * \return pixd 2, 4 or 8 bpp, colormapped, or NULL on error - * - *
- * Notes:
- *      (1) See the notes in the higher-level function
- *          pixOctcubeQuantFromCmap().  The octcube level for
- *          the generated octree is specified there, along with
- *          the distance metric for determining the closest
- *          color in the colormap to each octcube.
- *      (2) If the colormap, level and metric information have already
- *          been used to construct the set of mapping tables,
- *          this low-level function can be used directly (i.e.,
- *          independently of pixOctcubeQuantFromCmap()) to build
- *          a colormapped pix that uses the specified colormap.
- * 
- */ -static PIX * -pixOctcubeQuantFromCmapLUT(PIX *pixs, - PIXCMAP *cmap, - l_int32 mindepth, - l_int32 *cmaptab, - l_uint32 *rtab, - l_uint32 *gtab, - l_uint32 *btab) -{ -l_int32 i, j, w, h, depth, wpls, wpld; -l_int32 rval, gval, bval, index; -l_uint32 octindex; -l_uint32 *lines, *lined, *datas, *datad; -PIX *pixd; -PIXCMAP *cmapc; - - PROCNAME("pixOctcubeQuantFromCmapLUT"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (!cmap) - return (PIX *)ERROR_PTR("cmap not defined", procName, NULL); - if (mindepth != 2 && mindepth != 4 && mindepth != 8) - return (PIX *)ERROR_PTR("invalid mindepth", procName, NULL); - if (!rtab || !gtab || !btab || !cmaptab) - return (PIX *)ERROR_PTR("tables not all defined", procName, NULL); - - /* Init dest pix (with minimum bpp depending on cmap) */ - pixcmapGetMinDepth(cmap, &depth); - depth = L_MAX(depth, mindepth); - pixGetDimensions(pixs, &w, &h, NULL); - if ((pixd = pixCreate(w, h, depth)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - cmapc = pixcmapCopy(cmap); - pixSetColormap(pixd, cmapc); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - - /* Insert the colormap index of the color nearest to the input pixel */ - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - /* Map from rgb to octcube index */ - getOctcubeIndexFromRGB(rval, gval, bval, rtab, gtab, btab, - &octindex); - /* Map from octcube index to nearest colormap index */ - index = cmaptab[octindex]; - if (depth == 2) - SET_DATA_DIBIT(lined, j, index); - else if (depth == 4) - SET_DATA_QBIT(lined, j, index); - else /* depth == 8 */ - SET_DATA_BYTE(lined, j, index); - } - } - - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Generation of octcube histogram * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixOctcubeHistogram() - * - * \param[in] pixs 32 bpp rgb - * \param[in] level significant bits for each of RGB; valid in [1...6] - * \param[out] pncolors [optional] number of occupied cubes - * \return numa histogram of color pixels, or NULL on error - * - *
- * Notes:
- *      (1) Input NULL for &ncolors to prevent computation and return value.
- * 
- */ -NUMA * -pixOctcubeHistogram(PIX *pixs, - l_int32 level, - l_int32 *pncolors) -{ -l_int32 size, i, j, w, h, wpl, ncolors, val; -l_int32 rval, gval, bval; -l_uint32 octindex; -l_uint32 *rtab, *gtab, *btab; -l_uint32 *data, *line; -l_float32 *array; -NUMA *na; - - PROCNAME("pixOctcubeHistogram"); - - if (pncolors) *pncolors = 0; - if (!pixs) - return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (NUMA *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - wpl = pixGetWpl(pixs); - data = pixGetData(pixs); - - if (octcubeGetCount(level, &size)) /* array size = 2 ** (3 * level) */ - return (NUMA *)ERROR_PTR("size not returned", procName, NULL); - rtab = gtab = btab = NULL; - makeRGBToIndexTables(level, &rtab, >ab, &btab); - - if ((na = numaCreate(size)) == NULL) { - L_ERROR("na not made\n", procName); - goto cleanup_arrays; - } - numaSetCount(na, size); - array = numaGetFArray(na, L_NOCOPY); - - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - extractRGBValues(line[j], &rval, &gval, &bval); - octindex = rtab[rval] | gtab[gval] | btab[bval]; -#if DEBUG_OCTINDEX - if ((level == 1 && octindex > 7) || - (level == 2 && octindex > 63) || - (level == 3 && octindex > 511) || - (level == 4 && octindex > 4097) || - (level == 5 && octindex > 32783) || - (level == 6 && octindex > 262271)) { - lept_stderr("level = %d, octindex = %d, index error!\n", - level, octindex); - continue; - } -#endif /* DEBUG_OCTINDEX */ - array[octindex] += 1.0; - } - } - - if (pncolors) { - for (i = 0, ncolors = 0; i < size; i++) { - numaGetIValue(na, i, &val); - if (val > 0) - ncolors++; - } - *pncolors = ncolors; - } - -cleanup_arrays: - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - return na; -} - - -/*------------------------------------------------------------------* - * Get filled octcube table from colormap * - *------------------------------------------------------------------*/ -/*! - * \brief pixcmapToOctcubeLUT() - * - * \param[in] cmap - * \param[in] level significant bits for each of RGB; valid in [1...6] - * \param[in] metric L_MANHATTAN_DISTANCE, L_EUCLIDEAN_DISTANCE - * \return tab[2**3 * level] - * - *
- * Notes:
- *      (1) This function is used to quickly find the colormap color
- *          that is closest to any rgb color.  It is used to assign
- *          rgb colors to an existing colormap.  It can be very expensive
- *          to search through the entire colormap for the closest color
- *          to each pixel.  Instead, we first set up this table, which is
- *          populated by the colormap index nearest to each octcube
- *          color.  Then we go through the image; for each pixel,
- *          do two table lookups: first to generate the octcube index
- *          from rgb and second to use this table to read out the
- *          colormap index.
- *      (2) Do a slight modification for white and black.  For level = 4,
- *          each octcube size is 16.  The center of the whitest octcube
- *          is at (248, 248, 248), which is closer to 242 than 255.
- *          Consequently, any gray color between 242 and 254 will
- *          be selected, even if white (255, 255, 255) exists.  This is
- *          typically not optimal, because the original color was
- *          likely white.  Therefore, if white exists in the colormap,
- *          use it for any rgb color that falls into the most white octcube.
- *          Do the similar thing for black.
- *      (3) Here are the actual function calls for quantizing to a
- *          specified colormap:
- *            ~ first make the tables that map from rgb --> octcube index
- *                     makeRGBToIndexTables()
- *            ~ then for each pixel:
- *                * use the tables to get the octcube index
- *                     getOctcubeIndexFromRGB()
- *                * use this table to get the nearest color in the colormap
- *                     cmap_index = tab[index]
- *      (4) Distance can be either manhattan or euclidean.
- *      (5) In typical use, level = 4 gives reasonable results, and
- *          level = 5 is slightly better.  When this function is used
- *          for color segmentation, there are typically a small number
- *          of colors and the number of levels can be small (e.g., level = 3).
- * 
- */ -l_int32 * -pixcmapToOctcubeLUT(PIXCMAP *cmap, - l_int32 level, - l_int32 metric) -{ -l_int32 i, k, size, ncolors, mindist, dist, mincolor, index; -l_int32 rval, gval, bval; /* color at center of the octcube */ -l_int32 *rmap, *gmap, *bmap, *tab; - - PROCNAME("pixcmapToOctcubeLUT"); - - if (!cmap) - return (l_int32 *)ERROR_PTR("cmap not defined", procName, NULL); - if (level < 1 || level > 6) - return (l_int32 *)ERROR_PTR("level not in {1...6}", procName, NULL); - if (metric != L_MANHATTAN_DISTANCE && metric != L_EUCLIDEAN_DISTANCE) - return (l_int32 *)ERROR_PTR("invalid metric", procName, NULL); - - if (octcubeGetCount(level, &size)) /* array size = 2 ** (3 * level) */ - return (l_int32 *)ERROR_PTR("size not returned", procName, NULL); - if ((tab = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32))) == NULL) - return (l_int32 *)ERROR_PTR("tab not allocated", procName, NULL); - - ncolors = pixcmapGetCount(cmap); - pixcmapToArrays(cmap, &rmap, &gmap, &bmap, NULL); - - /* Assign based on the closest octcube center to the cmap color */ - for (i = 0; i < size; i++) { - getRGBFromOctcube(i, level, &rval, &gval, &bval); - mindist = 1000000; - mincolor = 0; /* irrelevant init */ - for (k = 0; k < ncolors; k++) { - if (metric == L_MANHATTAN_DISTANCE) { - dist = L_ABS(rval - rmap[k]) + L_ABS(gval - gmap[k]) + - L_ABS(bval - bmap[k]); - } else { /* L_EUCLIDEAN_DISTANCE */ - dist = (rval - rmap[k]) * (rval - rmap[k]) + - (gval - gmap[k]) * (gval - gmap[k]) + - (bval - bmap[k]) * (bval - bmap[k]); - } - if (dist < mindist) { - mindist = dist; - mincolor = k; - } - } - tab[i] = mincolor; - } - - /* Reset black and white if available in the colormap. - * The darkest octcube is at octindex 0. - * The lightest octcube is at the max octindex. */ - pixcmapGetNearestIndex(cmap, 0, 0, 0, &index); - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - if (rval < 7 && gval < 7 && bval < 7) { - tab[0] = index; - } - pixcmapGetNearestIndex(cmap, 255, 255, 255, &index); - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - if (rval > 248 && gval > 248 && bval > 248) { - tab[(1 << (3 * level)) - 1] = index; - } - - LEPT_FREE(rmap); - LEPT_FREE(gmap); - LEPT_FREE(bmap); - return tab; -} - - -/*------------------------------------------------------------------* - * Strip out unused elements in colormap * - *------------------------------------------------------------------*/ -/*! - * \brief pixRemoveUnusedColors() - * - * \param[in] pixs colormapped - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is an in-place operation.
- *      (2) If the image doesn't have a colormap, returns without error.
- *      (3) Unusued colors are removed from the colormap, and the
- *          image pixels are re-numbered.
- * 
- */ -l_ok -pixRemoveUnusedColors(PIX *pixs) -{ -l_int32 i, j, w, h, d, nc, wpls, val, newval, index, zerofound; -l_int32 rval, gval, bval; -l_uint32 *datas, *lines; -l_int32 *histo, *map1, *map2; -PIXCMAP *cmap, *cmapd; - - PROCNAME("pixRemoveUnusedColors"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if ((cmap = pixGetColormap(pixs)) == NULL) - return 0; - - d = pixGetDepth(pixs); - if (d != 2 && d != 4 && d != 8) - return ERROR_INT("d not in {2, 4, 8}", procName, 1); - - /* Find which indices are actually used */ - nc = pixcmapGetCount(cmap); - if ((histo = (l_int32 *)LEPT_CALLOC(nc, sizeof(l_int32))) == NULL) - return ERROR_INT("histo not made", procName, 1); - pixGetDimensions(pixs, &w, &h, NULL); - wpls = pixGetWpl(pixs); - datas = pixGetData(pixs); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - switch (d) - { - case 2: - val = GET_DATA_DIBIT(lines, j); - break; - case 4: - val = GET_DATA_QBIT(lines, j); - break; - case 8: - val = GET_DATA_BYTE(lines, j); - break; - default: - LEPT_FREE(histo); - return ERROR_INT("switch ran off end!", procName, 1); - } - if (val >= nc) { - L_WARNING("cmap index out of bounds!\n", procName); - continue; - } - histo[val]++; - } - } - - /* Check if there are any zeroes. If none, quit. */ - zerofound = FALSE; - for (i = 0; i < nc; i++) { - if (histo[i] == 0) { - zerofound = TRUE; - break; - } - } - if (!zerofound) { - LEPT_FREE(histo); - return 0; - } - - /* Generate mapping tables between indices */ - map1 = (l_int32 *)LEPT_CALLOC(nc, sizeof(l_int32)); - map2 = (l_int32 *)LEPT_CALLOC(nc, sizeof(l_int32)); - index = 0; - for (i = 0; i < nc; i++) { - if (histo[i] != 0) { - map1[index] = i; /* get old index from new */ - map2[i] = index; /* get new index from old */ - index++; - } - } - - /* Generate new colormap and attach to pixs */ - cmapd = pixcmapCreate(d); - for (i = 0; i < index; i++) { - pixcmapGetColor(cmap, map1[i], &rval, &gval, &bval); - pixcmapAddColor(cmapd, rval, gval, bval); - } - pixSetColormap(pixs, cmapd); - - /* Map pixel (index) values to new cmap */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - switch (d) - { - case 2: - val = GET_DATA_DIBIT(lines, j); - newval = map2[val]; - SET_DATA_DIBIT(lines, j, newval); - break; - case 4: - val = GET_DATA_QBIT(lines, j); - newval = map2[val]; - SET_DATA_QBIT(lines, j, newval); - break; - case 8: - val = GET_DATA_BYTE(lines, j); - newval = map2[val]; - SET_DATA_BYTE(lines, j, newval); - break; - default: - LEPT_FREE(histo); - LEPT_FREE(map1); - LEPT_FREE(map2); - return ERROR_INT("switch ran off end!", procName, 1); - } - } - } - - LEPT_FREE(histo); - LEPT_FREE(map1); - LEPT_FREE(map2); - return 0; -} - - -/*------------------------------------------------------------------* - * Find number of occupied octcubes at the specified level * - *------------------------------------------------------------------*/ -/*! - * \brief pixNumberOccupiedOctcubes() - * - * \param[in] pix 32 bpp - * \param[in] level of octcube - * \param[in] mincount minimum num pixels in an octcube to be counted; - * -1 to not use - * \param[in] minfract minimum fract of pixels in an octcube to be - * counted; -1 to not use - * \param[out] pncolors number of occupied octcubes - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Exactly one of (%mincount, %minfract) must be -1, so, e.g.,
- *          if %mincount == -1, then we use %minfract.
- *      (2) If all occupied octcubes are to count, set %mincount == 1.
- *          Setting %minfract == 0.0 is taken to mean the same thing.
- * 
- */ -l_ok -pixNumberOccupiedOctcubes(PIX *pix, - l_int32 level, - l_int32 mincount, - l_float32 minfract, - l_int32 *pncolors) -{ -l_int32 i, j, w, h, d, wpl, ncolors, size, octindex; -l_int32 rval, gval, bval; -l_int32 *carray; -l_uint32 *data, *line, *rtab, *gtab, *btab; - - PROCNAME("pixNumberOccupiedOctcubes"); - - if (!pncolors) - return ERROR_INT("&ncolors not defined", procName, 1); - *pncolors = 0; - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - pixGetDimensions(pix, &w, &h, &d); - if (d != 32) - return ERROR_INT("pix not 32 bpp", procName, 1); - if (level < 1 || level > 6) - return ERROR_INT("invalid level", procName, 1); - if ((mincount < 0 && minfract < 0) || (mincount >= 0.0 && minfract >= 0.0)) - return ERROR_INT("invalid mincount/minfract", procName, 1); - if (mincount == 0 || minfract == 0.0) - mincount = 1; - else if (minfract > 0.0) - mincount = L_MIN(1, (l_int32)(minfract * w * h)); - - if (octcubeGetCount(level, &size)) /* array size = 2 ** (3 * level) */ - return ERROR_INT("size not returned", procName, 1); - rtab = gtab = btab = NULL; - makeRGBToIndexTables(level, &rtab, >ab, &btab); - if ((carray = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32))) == NULL) { - L_ERROR("carray not made\n", procName); - goto cleanup_arrays; - } - - /* Mark the occupied octcube leaves */ - data = pixGetData(pix); - wpl = pixGetWpl(pix); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - extractRGBValues(line[j], &rval, &gval, &bval); - octindex = rtab[rval] | gtab[gval] | btab[bval]; - carray[octindex]++; - } - } - - /* Count them */ - for (i = 0, ncolors = 0; i < size; i++) { - if (carray[i] >= mincount) - ncolors++; - } - *pncolors = ncolors; - -cleanup_arrays: - LEPT_FREE(carray); - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colorquant2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colorquant2.c deleted file mode 100644 index 98ab8b0a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colorquant2.c +++ /dev/null @@ -1,1692 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file colorquant2.c - *
- *
- *  Modified median cut color quantization
- *
- *      High level
- *          PIX              *pixMedianCutQuant()
- *          PIX              *pixMedianCutQuantGeneral()
- *          PIX              *pixMedianCutQuantMixed()
- *          PIX              *pixFewColorsMedianCutQuantMixed()
- *
- *      Median cut indexed histogram
- *          l_int32          *pixMedianCutHisto()
- *
- *      Static helpers
- *          static PIXCMAP   *pixcmapGenerateFromHisto()
- *          static PIX       *pixQuantizeWithColormap()
- *          static void       getColorIndexMedianCut()
- *          static L_BOX3D   *pixGetColorRegion()
- *          static l_int32    medianCutApply()
- *          static PIXCMAP   *pixcmapGenerateFromMedianCuts()
- *          static l_int32    vboxGetAverageColor()
- *          static l_int32    vboxGetCount()
- *          static l_int32    vboxGetVolume()
- *          static L_BOX3D   *box3dCreate();
- *          static L_BOX3D   *box3dCopy();
- *
- *   Paul Heckbert published the median cut algorithm, "Color Image
- *   Quantization for Frame Buffer Display," in Proc. SIGGRAPH '82,
- *   Boston, July 1982, pp. 297-307.  See:
- *   http://delivery.acm.org/10.1145/810000/801294/p297-heckbert.pdf
- *
- *   Median cut starts with either the full color space or the occupied
- *   region of color space.  If you're not dithering, the occupied region
- *   can be used, but with dithering, pixels can end up in any place
- *   in the color space, so you must represent the entire color space in
- *   the final colormap.
- *
- *   Color components are quantized to typically 5 or 6 significant
- *   bits (for each of r, g and b).   Call a 3D region of color
- *   space a 'vbox'.  Any color in this quantized space is represented
- *   by an element of a linear histogram array, indexed by rgb value.
- *   The initial region is then divided into two regions that have roughly
- *   equal pixel occupancy (hence the name "median cut").  Subdivision
- *   continues until the requisite number of vboxes has been generated.
- *
- *   But the devil is in the details of the subdivision process.
- *   Here are some choices that you must make:
- *     (1) Along which axis to subdivide?
- *     (2) Which box to put the bin with the median pixel?
- *     (3) How to order the boxes for subdivision?
- *     (4) How to adequately handle boxes with very small numbers of pixels?
- *     (5) How to prevent a little-represented but highly visible color
- *         from being masked out by other colors in its vbox.
- *
- *   Taking these in order:
- *     (1) Heckbert suggests using either the largest vbox side, or the vbox
- *         side with the largest variance in pixel occupancy.  We choose
- *         to divide based on the largest vbox side.
- *     (2) Suppose you've chosen a side.  Then you have a histogram
- *         of pixel occupancy in 2D slices of the vbox.  One of those
- *         slices includes the median pixel.  Suppose there are L bins
- *         to the left (smaller index) and R bins to the right.  Then
- *         this slice (or bin) should be assigned to the box containing
- *         the smaller of L and R.  This both shortens the larger
- *         of the subdivided dimensions and helps a low-count color
- *         far from the subdivision boundary to better express itself.
- *     (2a) One can also ask if the boundary should be moved even
- *         farther into the longer side.  This is feasible if we have
- *         a method for doing extra subdivisions on the high count
- *         vboxes.  And we do (see (3)).
- *     (3) To make sure that the boxes are subdivided toward equal
- *         occupancy, use an occupancy-sorted priority queue, rather
- *         than a simple queue.
- *     (4) With a priority queue, boxes with small number of pixels
- *         won't be repeatedly subdivided.  This is good.
- *     (5) Use of a priority queue allows tricks such as in (2a) to let
- *         small occupancy clusters be better expressed.  In addition,
- *         rather than splitting near the median, small occupancy colors
- *         are best reproduced by cutting half-way into the longer side.
- *
- *   However, serious problems can arise with dithering if a priority
- *   queue is used based on population alone.  If the picture has
- *   large regions of nearly constant color, some vboxes can be very
- *   large and have a sizeable population (but not big enough to get to
- *   the head of the queue).  If one of these large, occupied vboxes
- *   is near in color to a nearly constant color region of the
- *   image, dithering can inject pixels from the large vbox into
- *   the nearly uniform region.  These pixels can be very far away
- *   in color, and the oscillations are highly visible.  To prevent
- *   this, we can take either or both of these actions:
- *
- *     (1) Subdivide a fraction (< 1.0) based on population, and
- *         do the rest of the subdivision based on the product of
- *         the vbox volume and its population.  By using the product,
- *         we avoid further subdivision of nearly empty vboxes, and
- *         directly target large vboxes with significant population.
- *
- *     (2) Threshold the excess color transferred in dithering to
- *         neighboring pixels.
- *
- *   Doing either of these will stop the most annoying oscillations
- *   in dithering.  Furthermore, by doing (1), we also improve the
- *   rendering of regions of nearly constant color, both with and
- *   without dithering.  It turns out that the image quality is
- *   not sensitive to the value of the parameter in (1); values
- *   between 0.3 and 0.9 give very good results.
- *
- *   Here's the lesson: subdivide the color space into vboxes such
- *   that (1) the most populated vboxes that can be further
- *   subdivided (i.e., that occupy more than one quantum volume
- *   in color space) all have approximately the same population,
- *   and (2) all large vboxes have no significant population.
- *   If these conditions are met, the quantization will be excellent.
- *
- *   Once the subdivision has been made, the colormap is generated,
- *   with one color for each vbox and using the average color in the vbox.
- *   At the same time, the histogram array is converted to an inverse
- *   colormap table, storing the colormap index in every cell in the
- *   vbox.  Finally, using both the colormap and the inverse colormap,
- *   a colormapped pix is quickly generated from the original rgb pix.
- *
- *   In the present implementation, subdivided regions of colorspace
- *   that are not occupied are retained, but not further subdivided.
- *   This is required for our inverse colormap lookup table for
- *   dithering, because dithered pixels may fall into these unoccupied
- *   regions.  For such empty regions, we use the center as the rgb
- *   colormap value.
- *
- *   This variation on median cut can be referred to as "Modified Median
- *   Cut" quantization, or MMCQ.  Overall, the undithered MMCQ gives
- *   comparable results to the two-pass Octcube Quantizer (OQ).
- *   Comparing the two methods on the test24.jpg painting, we see:
- *
- *     (1) For rendering spot color (the various reds and pinks in
- *         the image), MMCQ is not as good as OQ.
- *
- *     (2) For rendering majority color regions, MMCQ does a better
- *         job of avoiding posterization.  That is, it does better
- *         dividing the color space up in the most heavily populated regions.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - - /* Median cut 3-d volume element. Sort on first element, which - * can be the number of pixels, the volume or a combination - * of these. */ -struct L_Box3d -{ - l_float32 sortparam; /* parameter on which to sort the vbox */ - l_int32 npix; /* number of pixels in the vbox */ - l_int32 vol; /* quantized volume of vbox */ - l_int32 r1; /* min r index in the vbox */ - l_int32 r2; /* max r index in the vbox */ - l_int32 g1; /* min g index in the vbox */ - l_int32 g2; /* max g index in the vbox */ - l_int32 b1; /* min b index in the vbox */ - l_int32 b2; /* max b index in the vbox */ -}; -typedef struct L_Box3d L_BOX3D; - - /* Static median cut helper functions */ -static PIXCMAP *pixcmapGenerateFromHisto(PIX *pixs, l_int32 depth, - l_int32 *histo, l_int32 histosize, - l_int32 sigbits); -static PIX *pixQuantizeWithColormap(PIX *pixs, l_int32 ditherflag, - l_int32 outdepth, - PIXCMAP *cmap, l_int32 *indexmap, - l_int32 mapsize, l_int32 sigbits); -static void getColorIndexMedianCut(l_uint32 pixel, l_int32 rshift, - l_uint32 mask, l_int32 sigbits, - l_int32 *pindex); -static L_BOX3D *pixGetColorRegion(PIX *pixs, l_int32 sigbits, - l_int32 subsample); -static l_int32 medianCutApply(l_int32 *histo, l_int32 sigbits, - L_BOX3D *vbox, L_BOX3D **pvbox1, - L_BOX3D **pvbox2); -static PIXCMAP *pixcmapGenerateFromMedianCuts(L_HEAP *lh, l_int32 *histo, - l_int32 sigbits); -static l_int32 vboxGetAverageColor(L_BOX3D *vbox, l_int32 *histo, - l_int32 sigbits, l_int32 index, - l_int32 *prval, l_int32 *pgval, - l_int32 *pbval); -static l_int32 vboxGetCount(L_BOX3D *vbox, l_int32 *histo, l_int32 sigbits); -static l_int32 vboxGetVolume(L_BOX3D *vbox); -static L_BOX3D *box3dCreate(l_int32 r1, l_int32 r2, l_int32 g1, - l_int32 g2, l_int32 b1, l_int32 b2); -static L_BOX3D *box3dCopy(L_BOX3D *vbox); - - - /* 5 significant bits for each component is generally satisfactory */ -static const l_int32 DefaultSigBits = 5; -static const l_int32 MaxItersAllowed = 5000; /* prevents infinite looping */ - - /* Specify fraction of vboxes made that are sorted on population alone. - * The remaining vboxes are sorted on (population * vbox-volume). */ -static const l_float32 FractByPopulation = 0.85; - - /* To get the max value of 'dif' in the dithering color transfer, - * divide DifCap by 8. */ -static const l_int32 DifCap = 100; - - -#ifndef NO_CONSOLE_IO -#define DEBUG_MC_COLORS 0 -#define DEBUG_SPLIT_AXES 0 -#endif /* ~NO_CONSOLE_IO */ - -/*------------------------------------------------------------------------* - * High level * - *------------------------------------------------------------------------*/ -/*! - * \brief pixMedianCutQuant() - * - * \param[in] pixs 32 bpp; rgb color - * \param[in] ditherflag 1 for dither; 0 for no dither - * \return pixd 8 bit with colormap, or NULL on error - * - *
- * Notes:
- *      (1) Simple interface.  See pixMedianCutQuantGeneral() for
- *          use of defaulted parameters.
- * 
- */ -PIX * -pixMedianCutQuant(PIX *pixs, - l_int32 ditherflag) -{ - return pixMedianCutQuantGeneral(pixs, ditherflag, - 0, 256, DefaultSigBits, 1, 1); -} - - -/*! - * \brief pixMedianCutQuantGeneral() - * - * \param[in] pixs 32 bpp; rgb color - * \param[in] ditherflag 1 for dither; 0 for no dither - * \param[in] outdepth output depth; valid: 0, 1, 2, 4, 8 - * \param[in] maxcolors between 2 and 256 - * \param[in] sigbits valid: 5 or 6; use 0 for default - * \param[in] maxsub max subsampling, integer; use 0 for default; - * 1 for no subsampling - * \param[in] checkbw 1 to check if color content is very small, - * 0 to assume there is sufficient color - * \return pixd 8 bit with colormap, or NULL on error - * - *
- * Notes:
- *      (1) %maxcolors must be in the range [2 ... 256].
- *      (2) Use %outdepth = 0 to have the output depth computed as the
- *          minimum required to hold the actual colors found, given
- *          the %maxcolors constraint.
- *      (3) Use %outdepth = 1, 2, 4 or 8 to specify the output depth.
- *          In that case, %maxcolors must not exceed 2^(outdepth).
- *      (4) If there are fewer quantized colors in the image than %maxcolors,
- *          the colormap is simply generated from those colors.
- *      (5) %maxsub is the maximum allowed subsampling to be used in the
- *          computation of the color histogram and region of occupied
- *          color space.  The subsampling is chosen internally for
- *          efficiency, based on the image size, but this parameter
- *          limits it.  Use %maxsub = 0 for the internal default, which is the
- *          maximum allowed subsampling.  Use %maxsub = 1 to prevent
- *          subsampling.  In general use %maxsub >= 1 to specify the
- *          maximum subsampling to be allowed, where the actual subsampling
- *          will be the minimum of this value and the internally
- *          determined default value.
- *      (6) %sigbits can be 5 or 6.  There are 2^24 colors in the color space.
- *              sigbits     # of volume elems    # of colors in a volume elem
- *              --------------------------------------------------------------
- *                 5              2^15                  2^9 = 512
- *                 6              2^18                  2^6 = 64
- *          Volume in color space is measured in the number of volume elements.
- *      (7) If the image appears gray because either most of the pixels
- *          are gray or most of the pixels are essentially black or white,
- *          the image is trivially quantized with a grayscale colormap.  The
- *          reason is that median cut divides the color space into rectangular
- *          regions, and it does a very poor job if all the pixels are
- *          near the diagonal of the color space cube.
- * 
- */ -PIX * -pixMedianCutQuantGeneral(PIX *pixs, - l_int32 ditherflag, - l_int32 outdepth, - l_int32 maxcolors, - l_int32 sigbits, - l_int32 maxsub, - l_int32 checkbw) -{ -l_int32 i, subsample, histosize, smalln, ncolors, niters, popcolors; -l_int32 w, h, minside, factor, index, rval, gval, bval; -l_int32 *histo; -l_float32 maxprod, prod, norm, pixfract, colorfract; -L_BOX3D *vbox, *vbox1, *vbox2; -L_HEAP *lh, *lhs; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixMedianCutQuantGeneral"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - if (maxcolors < 2 || maxcolors > 256) - return (PIX *)ERROR_PTR("maxcolors not in [2...256]", procName, NULL); - if (outdepth != 0 && outdepth != 1 && outdepth != 2 && outdepth != 4 && - outdepth != 8) - return (PIX *)ERROR_PTR("outdepth not in {0,1,2,4,8}", procName, NULL); - if (outdepth > 0 && (maxcolors > (1 << outdepth))) - return (PIX *)ERROR_PTR("maxcolors > 2^(outdepth)", procName, NULL); - if (sigbits == 0) - sigbits = DefaultSigBits; - else if (sigbits < 5 || sigbits > 6) - return (PIX *)ERROR_PTR("sigbits not 5 or 6", procName, NULL); - if (maxsub <= 0) - maxsub = 10; /* default will prevail for 10^7 pixels or less */ - - /* Determine if the image has sufficient color content. - * If pixfract << 1, most pixels are close to black or white. - * If colorfract << 1, the pixels that are not near - * black or white have very little color. - * If with little color, quantize with a grayscale colormap. */ - pixGetDimensions(pixs, &w, &h, NULL); - if (checkbw) { - minside = L_MIN(w, h); - factor = L_MAX(1, minside / 400); - pixColorFraction(pixs, 20, 244, 20, factor, &pixfract, &colorfract); - if (pixfract * colorfract < 0.00025) { - L_INFO("\n Pixel fraction neither white nor black = %6.3f" - "\n Color fraction of those pixels = %6.3f" - "\n Quantizing in gray\n", - procName, pixfract, colorfract); - return pixConvertTo8(pixs, 1); - } - } - - /* Compute the color space histogram. Default sampling - * is about 10^5 sampled pixels. */ - if (maxsub == 1) { - subsample = 1; - } else { - subsample = (l_int32)(sqrt((l_float64)(w * h) / 100000.)); - subsample = L_MAX(1, L_MIN(maxsub, subsample)); - } - histo = pixMedianCutHisto(pixs, sigbits, subsample); - histosize = 1 << (3 * sigbits); - - /* See if the number of quantized colors is less than maxcolors */ - ncolors = 0; - smalln = TRUE; - for (i = 0; i < histosize; i++) { - if (histo[i]) - ncolors++; - if (ncolors > maxcolors) { - smalln = FALSE; - break; - } - } - if (smalln) { /* finish up now */ - if (outdepth == 0) { - if (ncolors <= 2) - outdepth = 1; - else if (ncolors <= 4) - outdepth = 2; - else if (ncolors <= 16) - outdepth = 4; - else - outdepth = 8; - } - cmap = pixcmapGenerateFromHisto(pixs, outdepth, - histo, histosize, sigbits); - pixd = pixQuantizeWithColormap(pixs, ditherflag, outdepth, cmap, - histo, histosize, sigbits); - LEPT_FREE(histo); - return pixd; - } - - /* Initial vbox: minimum region in colorspace occupied by pixels */ - if (ditherflag || subsample > 1) /* use full color space */ - vbox = box3dCreate(0, (1 << sigbits) - 1, - 0, (1 << sigbits) - 1, - 0, (1 << sigbits) - 1); - else - vbox = pixGetColorRegion(pixs, sigbits, subsample); - vbox->npix = vboxGetCount(vbox, histo, sigbits); - vbox->vol = vboxGetVolume(vbox); - - /* For a fraction 'popcolors' of the desired 'maxcolors', - * generate median cuts based on population, putting - * everything on a priority queue sorted by population. */ - lh = lheapCreate(0, L_SORT_DECREASING); - lheapAdd(lh, vbox); - ncolors = 1; - niters = 0; - popcolors = (l_int32)(FractByPopulation * maxcolors); - while (1) { - vbox = (L_BOX3D *)lheapRemove(lh); - if (vboxGetCount(vbox, histo, sigbits) == 0) { /* just put it back */ - lheapAdd(lh, vbox); - continue; - } - medianCutApply(histo, sigbits, vbox, &vbox1, &vbox2); - if (!vbox1) { - L_WARNING("vbox1 not defined; shouldn't happen!\n", procName); - break; - } - if (vbox1->vol > 1) - vbox1->sortparam = vbox1->npix; - LEPT_FREE(vbox); - lheapAdd(lh, vbox1); - if (vbox2) { /* vbox2 can be NULL */ - if (vbox2->vol > 1) - vbox2->sortparam = vbox2->npix; - lheapAdd(lh, vbox2); - ncolors++; - } - if (ncolors >= popcolors) - break; - if (niters++ > MaxItersAllowed) { - L_WARNING("infinite loop; perhaps too few pixels!\n", procName); - break; - } - } - - /* Re-sort by the product of pixel occupancy times the size - * in color space. Normalize to the largest product to avoid - * integer overflow. */ - maxprod = 0.0; - for (i = 0; i < lh->n; i++) { - if ((vbox = (L_BOX3D *)lheapGetElement(lh, i)) == NULL) - continue; - prod = (l_float32)vbox->npix * (l_float32)vbox->vol; - if (prod > maxprod) maxprod = prod; - } - norm = (maxprod == 0) ? 1.0 : 1000000.0 / maxprod; - lhs = lheapCreate(0, L_SORT_DECREASING); - while ((vbox = (L_BOX3D *)lheapRemove(lh))) { - vbox->sortparam = norm * vbox->npix * vbox->vol; - lheapAdd(lhs, vbox); - } - lheapDestroy(&lh, TRUE); - - /* For the remaining (maxcolors - popcolors), generate the - * median cuts using the (npix * vol) sorting. */ - while (1) { - vbox = (L_BOX3D *)lheapRemove(lhs); - if (vboxGetCount(vbox, histo, sigbits) == 0) { /* just put it back */ - lheapAdd(lhs, vbox); - continue; - } - medianCutApply(histo, sigbits, vbox, &vbox1, &vbox2); - if (!vbox1) { - L_WARNING("vbox1 not defined; shouldn't happen!\n", procName); - break; - } - if (vbox1->vol > 1) - vbox1->sortparam = norm * vbox1->npix * vbox1->vol; - LEPT_FREE(vbox); - lheapAdd(lhs, vbox1); - if (vbox2) { /* vbox2 can be NULL */ - if (vbox2->vol > 1) - vbox2->sortparam = norm * vbox2->npix * vbox2->vol; - lheapAdd(lhs, vbox2); - ncolors++; - } - if (ncolors >= maxcolors) - break; - if (niters++ > MaxItersAllowed) { - L_WARNING("infinite loop; perhaps too few pixels!\n", procName); - break; - } - } - - /* Re-sort by pixel occupancy. This is not necessary, - * but it makes a more useful listing. */ - lh = lheapCreate(0, L_SORT_DECREASING); - while ((vbox = (L_BOX3D *)lheapRemove(lhs))) { - vbox->sortparam = vbox->npix; -/* vbox->sortparam = vbox->npix * vbox->vol; */ - lheapAdd(lh, vbox); - } - lheapDestroy(&lhs, TRUE); - - /* Generate colormap from median cuts and quantize pixd */ - cmap = pixcmapGenerateFromMedianCuts(lh, histo, sigbits); - if (outdepth == 0) { - ncolors = pixcmapGetCount(cmap); - if (ncolors <= 2) - outdepth = 1; - else if (ncolors <= 4) - outdepth = 2; - else if (ncolors <= 16) - outdepth = 4; - else - outdepth = 8; - } - pixd = pixQuantizeWithColormap(pixs, ditherflag, outdepth, cmap, - histo, histosize, sigbits); - - /* Force darkest color to black if each component <= 4 */ - pixcmapGetRankIntensity(cmap, 0.0, &index); - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - if (rval < 5 && gval < 5 && bval < 5) - pixcmapResetColor(cmap, index, 0, 0, 0); - - /* Force lightest color to white if each component >= 252 */ - pixcmapGetRankIntensity(cmap, 1.0, &index); - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - if (rval > 251 && gval > 251 && bval > 251) - pixcmapResetColor(cmap, index, 255, 255, 255); - - lheapDestroy(&lh, TRUE); - LEPT_FREE(histo); - return pixd; -} - - -/*! - * \brief pixMedianCutQuantMixed() - * - * \param[in] pixs 32 bpp; rgb color - * \param[in] ncolor maximum number of colors assigned to - * pixels with significant color - * \param[in] ngray number of gray colors to be used; must be >= 2 - * \param[in] darkthresh threshold near black; if the lightest component - * is below this, the pixel is not considered to - * be gray or color; uses 0 for default - * \param[in] lightthresh threshold near white; if the darkest component - * is above this, the pixel is not considered to - * be gray or color; use 0 for default - * \param[in] diffthresh thresh for the max difference between component - * values; for differences below this, the pixel - * is considered to be gray; use 0 for default - * \return pixd 8 bpp cmapped, or NULL on error - * - *
- * Notes:
- *      (1) ncolor + ngray must not exceed 255.
- *      (2) The method makes use of pixMedianCutQuantGeneral() with
- *          minimal addition.
- *          (a) Preprocess the image, setting all pixels with little color
- *              to black, and populating an auxiliary 8 bpp image with the
- *              expected colormap values corresponding to the set of
- *              quantized gray values.
- *          (b) Color quantize the altered input image to n + 1 colors.
- *          (c) Augment the colormap with the gray indices, and
- *              substitute the gray quantized values from the auxiliary
- *              image for those in the color quantized output that had
- *              been quantized as black.
- *      (3) Median cut color quantization is relatively poor for grayscale
- *          images with many colors, when compared to octcube quantization.
- *          Thus, for images with both gray and color, it is important
- *          to quantize the gray pixels by another method.  Here, we
- *          are conservative in detecting color, preferring to use
- *          a few extra bits to encode colorful pixels that push them
- *          to gray.  This is particularly reasonable with this function,
- *          because it handles the gray and color pixels separately,
- *          using median cut color quantization for the color pixels
- *          and equal-bin grayscale quantization for the non-color pixels.
- * 
- */ -PIX * -pixMedianCutQuantMixed(PIX *pixs, - l_int32 ncolor, - l_int32 ngray, - l_int32 darkthresh, - l_int32 lightthresh, - l_int32 diffthresh) -{ -l_int32 i, j, w, h, wplc, wplg, wpld, nc, unused, iscolor, factor, minside; -l_int32 rval, gval, bval, minval, maxval, val, grayval; -l_float32 pixfract, colorfract; -l_int32 *lut; -l_uint32 *datac, *datag, *datad, *linec, *lineg, *lined; -PIX *pixc, *pixg, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixMedianCutQuantMixed"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - if (ngray < 2) - return (PIX *)ERROR_PTR("ngray < 2", procName, NULL); - if (ncolor + ngray > 255) - return (PIX *)ERROR_PTR("ncolor + ngray > 255", procName, NULL); - if (darkthresh <= 0) darkthresh = 20; - if (lightthresh <= 0) lightthresh = 244; - if (diffthresh <= 0) diffthresh = 20; - - /* First check if this should be quantized in gray. - * Use a more sensitive parameter for detecting color than with - * pixMedianCutQuantGeneral(), because this function can handle - * gray pixels well. */ - pixGetDimensions(pixs, &w, &h, NULL); - minside = L_MIN(w, h); - factor = L_MAX(1, minside / 400); - pixColorFraction(pixs, darkthresh, lightthresh, diffthresh, factor, - &pixfract, &colorfract); - if (pixfract * colorfract < 0.0001) { - L_INFO("\n Pixel fraction neither white nor black = %6.3f" - "\n Color fraction of those pixels = %6.3f" - "\n Quantizing in gray\n", - procName, pixfract, colorfract); - pixg = pixConvertTo8(pixs, 0); - pixd = pixThresholdOn8bpp(pixg, ngray, 1); - pixDestroy(&pixg); - return pixd; - } - - /* OK, there is color in the image. - * Preprocess to handle the gray pixels. Set the color pixels in pixc - * to black, and store their (eventual) colormap indices in pixg.*/ - pixc = pixCopy(NULL, pixs); - pixg = pixCreate(w, h, 8); /* color pixels will remain 0 here */ - datac = pixGetData(pixc); - datag = pixGetData(pixg); - wplc = pixGetWpl(pixc); - wplg = pixGetWpl(pixg); - lut = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - for (i = 0; i < 256; i++) - lut[i] = ncolor + 1 + (i * (ngray - 1) + 128) / 255; - for (i = 0; i < h; i++) { - linec = datac + i * wplc; - lineg = datag + i * wplg; - for (j = 0; j < w; j++) { - iscolor = FALSE; - extractRGBValues(linec[j], &rval, &gval, &bval); - minval = L_MIN(rval, gval); - minval = L_MIN(minval, bval); - maxval = L_MAX(rval, gval); - maxval = L_MAX(maxval, bval); - if (maxval >= darkthresh && - minval <= lightthresh && - maxval - minval >= diffthresh) { - iscolor = TRUE; - } - if (!iscolor) { - linec[j] = 0x0; /* set to black */ - grayval = (maxval + minval) / 2; - SET_DATA_BYTE(lineg, j, lut[grayval]); - } - } - } - - /* Median cut on color pixels plus black */ - pixd = pixMedianCutQuantGeneral(pixc, FALSE, 8, ncolor + 1, - DefaultSigBits, 1, 0); - - /* Augment the colormap with gray values. The new cmap - * indices should agree with the values previously stored in pixg. */ - cmap = pixGetColormap(pixd); - nc = pixcmapGetCount(cmap); - unused = ncolor + 1 - nc; - if (unused < 0) - L_ERROR("Too many colors: extra = %d\n", procName, -unused); - if (unused > 0) { /* fill in with black; these won't be used */ - L_INFO("%d unused colors\n", procName, unused); - for (i = 0; i < unused; i++) - pixcmapAddColor(cmap, 0, 0, 0); - } - for (i = 0; i < ngray; i++) { - grayval = (255 * i) / (ngray - 1); - pixcmapAddColor(cmap, grayval, grayval, grayval); - } - - /* Substitute cmap indices for the gray pixels into pixd */ - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - lineg = datag + i * wplg; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(lineg, j); /* if 0, it's a color pixel */ - if (val) - SET_DATA_BYTE(lined, j, val); - } - } - - pixDestroy(&pixc); - pixDestroy(&pixg); - LEPT_FREE(lut); - return pixd; -} - - -/*! - * \brief pixFewColorsMedianCutQuantMixed() - * - * \param[in] pixs 32 bpp rgb - * \param[in] ncolor number of colors to be assigned to pixels - * with significant color - * \param[in] ngray number of gray colors to be used; must be >= 2 - * \param[in] maxncolors maximum number of colors to be returned from - * pixColorsForQuantization(); use 0 for default - * \param[in] darkthresh threshold near black; if the lightest component - * is below this, the pixel is not considered to - * be gray or color; use 0 for default - * \param[in] lightthresh threshold near white; if the darkest component - * is above this, the pixel is not considered to - * be gray or color; use 0 for default - * \param[in] diffthresh thresh for the max difference between component - * values; for differences below this, the pixel - * is considered to be gray; use 0 for default - * \return pixd 8 bpp, median cut quantized for pixels that are - * not gray; gray pixels are quantized separately over - * the full gray range; null if too many colors or on error - * - *
- * Notes:
- *      (1) This is the "few colors" version of pixMedianCutQuantMixed().
- *          It fails (returns NULL) if it finds more than maxncolors, but
- *          otherwise it gives the same result.
- *      (2) Recommended input parameters are:
- *              %maxncolors:  20
- *              %darkthresh:  20
- *              %lightthresh: 244
- *              %diffthresh:  15  (any higher can miss colors differing
- *                                 slightly from gray)
- *      (3) Both ncolor and ngray should be at least equal to maxncolors.
- *          If they're not, they are automatically increased, and a
- *          warning is given.
- *      (4) If very little color content is found, the input is
- *          converted to gray and quantized in equal intervals.
- *      (5) This can be useful for quantizing orthographically generated
- *          images such as color maps, where there may be more than 256 colors
- *          because of aliasing or jpeg artifacts on text or lines, but
- *          there are a relatively small number of solid colors.
- *      (6) Example of usage:
- *             // Try to quantize, using default values for mixed med cut
- *             Pix *pixq = pixFewColorsMedianCutQuantMixed(pixs, 100, 20,
- *                             0, 0, 0, 0);
- *             if (!pixq)  // too many colors; don't quantize
- *                 pixq = pixClone(pixs);
- * 
- */ -PIX * -pixFewColorsMedianCutQuantMixed(PIX *pixs, - l_int32 ncolor, - l_int32 ngray, - l_int32 maxncolors, - l_int32 darkthresh, - l_int32 lightthresh, - l_int32 diffthresh) -{ -l_int32 ncolors, iscolor; -PIX *pixg, *pixd; - - PROCNAME("pixFewColorsMedianCutQuantMixed"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - if (maxncolors <= 0) maxncolors = 20; - if (darkthresh <= 0) darkthresh = 20; - if (lightthresh <= 0) lightthresh = 244; - if (diffthresh <= 0) diffthresh = 15; - if (ncolor < maxncolors) { - L_WARNING("ncolor too small; setting to %d\n", procName, maxncolors); - ncolor = maxncolors; - } - if (ngray < maxncolors) { - L_WARNING("ngray too small; setting to %d\n", procName, maxncolors); - ngray = maxncolors; - } - - /* Estimate the color content and the number of colors required */ - pixColorsForQuantization(pixs, 15, &ncolors, &iscolor, 0); - - /* Note that maxncolors applies to all colors required to quantize, - * both gray and colorful */ - if (ncolors > maxncolors) - return (PIX *)ERROR_PTR("too many colors", procName, NULL); - - /* If no color, return quantized gray pix */ - if (!iscolor) { - pixg = pixConvertTo8(pixs, 0); - pixd = pixThresholdOn8bpp(pixg, ngray, 1); - pixDestroy(&pixg); - return pixd; - } - - /* Use the mixed gray/color quantizer */ - return pixMedianCutQuantMixed(pixs, ncolor, ngray, darkthresh, - lightthresh, diffthresh); -} - - - -/*------------------------------------------------------------------------* - * Median cut indexed histogram * - *------------------------------------------------------------------------*/ -/*! - * \brief pixMedianCutHisto() - * - * \param[in] pixs 32 bpp; rgb color - * \param[in] sigbits valid: 5 or 6 - * \param[in] subsample integer > 0 - * \return histo 1-d array, giving the number of pixels in each - * quantized region of color space, or NULL on error - * - *
- * Notes:
- *      (1) Array is indexed by (3 * sigbits) bits.  The array size
- *          is 2^(3 * sigbits).
- *      (2) Indexing into the array from rgb uses red sigbits as
- *          most significant and blue as least.
- * 
- */ -l_int32 * -pixMedianCutHisto(PIX *pixs, - l_int32 sigbits, - l_int32 subsample) -{ -l_int32 i, j, w, h, wpl, rshift, index, histosize; -l_int32 *histo; -l_uint32 mask, pixel; -l_uint32 *data, *line; - - PROCNAME("pixMedianCutHisto"); - - if (!pixs) - return (l_int32 *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (l_int32 *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (sigbits < 5 || sigbits > 6) - return (l_int32 *)ERROR_PTR("sigbits not 5 or 6", procName, NULL); - if (subsample <= 0) - return (l_int32 *)ERROR_PTR("subsample not > 0", procName, NULL); - - histosize = 1 << (3 * sigbits); - if ((histo = (l_int32 *)LEPT_CALLOC(histosize, sizeof(l_int32))) == NULL) - return (l_int32 *)ERROR_PTR("histo not made", procName, NULL); - - rshift = 8 - sigbits; - mask = 0xff >> rshift; - pixGetDimensions(pixs, &w, &h, NULL); - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - for (i = 0; i < h; i += subsample) { - line = data + i * wpl; - for (j = 0; j < w; j += subsample) { - pixel = line[j]; - getColorIndexMedianCut(pixel, rshift, mask, sigbits, &index); - histo[index]++; - } - } - - return histo; -} - - -/*------------------------------------------------------------------------* - * Static helpers * - *------------------------------------------------------------------------*/ -/*! - * \brief pixcmapGenerateFromHisto() - * - * \param[in] pixs 32 bpp; rgb color - * \param[in] depth of colormap - * \param[in] histo - * \param[in] histosize - * \param[in] sigbits - * \return colormap, or NULL on error - * - *
- * Notes:
- *      (1) This is used when the number of colors in the histo
- *          is not greater than maxcolors.
- *      (2) As a side-effect, the histo becomes an inverse colormap,
- *          labeling the cmap indices for each existing color.
- * 
- */ -static PIXCMAP * -pixcmapGenerateFromHisto(PIX *pixs, - l_int32 depth, - l_int32 *histo, - l_int32 histosize, - l_int32 sigbits) -{ -l_int32 i, index, shift, rval, gval, bval; -l_uint32 mask; -PIXCMAP *cmap; - - PROCNAME("pixcmapGenerateFromHisto"); - - if (!pixs) - return (PIXCMAP *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIXCMAP *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (!histo) - return (PIXCMAP *)ERROR_PTR("histo not defined", procName, NULL); - - /* Capture the rgb values of each occupied cube in the histo, - * and re-label the histo value with the colormap index. */ - cmap = pixcmapCreate(depth); - shift = 8 - sigbits; - mask = 0xff >> shift; - for (i = 0, index = 0; i < histosize; i++) { - if (histo[i]) { - rval = (i >> (2 * sigbits)) << shift; - gval = ((i >> sigbits) & mask) << shift; - bval = (i & mask) << shift; - pixcmapAddColor(cmap, rval, gval, bval); - histo[i] = index++; - } - } - - return cmap; -} - - -/*! - * \brief pixQuantizeWithColormap() - * - * \param[in] pixs 32 bpp; rgb color - * \param[in] ditherflag 1 for dither; 0 for no dither - * \param[in] outdepth depth of the returned pixd - * \param[in] cmap colormap - * \param[in] indexmap lookup table - * \param[in] mapsize size of the lookup table - * \param[in] sigbits significant bits in output - * \return pixd quantized to colormap, or NULL on error - * - *
- * Notes:
- *      (1) The indexmap is a LUT that takes the rgb indices of the
- *          pixel and returns the index into the colormap.
- *      (2) If ditherflag is 1, %outdepth is ignored and the output
- *          depth is set to 8.
- * 
- */ -static PIX * -pixQuantizeWithColormap(PIX *pixs, - l_int32 ditherflag, - l_int32 outdepth, - PIXCMAP *cmap, - l_int32 *indexmap, - l_int32 mapsize, - l_int32 sigbits) -{ -l_uint8 *bufu8r, *bufu8g, *bufu8b; -l_int32 i, j, w, h, wpls, wpld, rshift, index, cmapindex, success; -l_int32 rval, gval, bval, rc, gc, bc; -l_int32 dif, val1, val2, val3; -l_int32 *buf1r, *buf1g, *buf1b, *buf2r, *buf2g, *buf2b; -l_uint32 *datas, *datad, *lines, *lined; -l_uint32 mask, pixel; -PIX *pixd; - - PROCNAME("pixQuantizeWithColormap"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (!cmap) - return (PIX *)ERROR_PTR("cmap not defined", procName, NULL); - if (!indexmap) - return (PIX *)ERROR_PTR("indexmap not defined", procName, NULL); - if (ditherflag) - outdepth = 8; - - pixGetDimensions(pixs, &w, &h, NULL); - pixd = pixCreate(w, h, outdepth); - pixSetColormap(pixd, cmap); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - - rshift = 8 - sigbits; - mask = 0xff >> rshift; - if (ditherflag == 0) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - if (outdepth == 1) { - for (j = 0; j < w; j++) { - pixel = lines[j]; - getColorIndexMedianCut(pixel, rshift, mask, - sigbits, &index); - if (indexmap[index]) - SET_DATA_BIT(lined, j); - } - } else if (outdepth == 2) { - for (j = 0; j < w; j++) { - pixel = lines[j]; - getColorIndexMedianCut(pixel, rshift, mask, - sigbits, &index); - SET_DATA_DIBIT(lined, j, indexmap[index]); - } - } else if (outdepth == 4) { - for (j = 0; j < w; j++) { - pixel = lines[j]; - getColorIndexMedianCut(pixel, rshift, mask, - sigbits, &index); - SET_DATA_QBIT(lined, j, indexmap[index]); - } - } else { /* outdepth == 8 */ - for (j = 0; j < w; j++) { - pixel = lines[j]; - getColorIndexMedianCut(pixel, rshift, mask, - sigbits, &index); - SET_DATA_BYTE(lined, j, indexmap[index]); - } - } - } - } else { /* ditherflag == 1 */ - success = TRUE; - bufu8r = bufu8g = bufu8b = NULL; - buf1r = buf1g = buf1b = buf2r = buf2g = buf2b = NULL; - bufu8r = (l_uint8 *)LEPT_CALLOC(w, sizeof(l_uint8)); - bufu8g = (l_uint8 *)LEPT_CALLOC(w, sizeof(l_uint8)); - bufu8b = (l_uint8 *)LEPT_CALLOC(w, sizeof(l_uint8)); - buf1r = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - buf1g = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - buf1b = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - buf2r = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - buf2g = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - buf2b = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - if (!bufu8r || !bufu8g || !bufu8b || !buf1r || !buf1g || - !buf1b || !buf2r || !buf2g || !buf2b) { - L_ERROR("buffer not made\n", procName); - success = FALSE; - goto buffer_cleanup; - } - - /* Start by priming buf2; line 1 is above line 2 */ - pixGetRGBLine(pixs, 0, bufu8r, bufu8g, bufu8b); - for (j = 0; j < w; j++) { - buf2r[j] = 64 * bufu8r[j]; - buf2g[j] = 64 * bufu8g[j]; - buf2b[j] = 64 * bufu8b[j]; - } - - for (i = 0; i < h - 1; i++) { - /* Swap data 2 --> 1, and read in new line 2 */ - memcpy(buf1r, buf2r, 4 * w); - memcpy(buf1g, buf2g, 4 * w); - memcpy(buf1b, buf2b, 4 * w); - pixGetRGBLine(pixs, i + 1, bufu8r, bufu8g, bufu8b); - for (j = 0; j < w; j++) { - buf2r[j] = 64 * bufu8r[j]; - buf2g[j] = 64 * bufu8g[j]; - buf2b[j] = 64 * bufu8b[j]; - } - - /* Dither */ - lined = datad + i * wpld; - for (j = 0; j < w - 1; j++) { - rval = buf1r[j] / 64; - gval = buf1g[j] / 64; - bval = buf1b[j] / 64; - index = ((rval >> rshift) << (2 * sigbits)) + - ((gval >> rshift) << sigbits) + (bval >> rshift); - cmapindex = indexmap[index]; - SET_DATA_BYTE(lined, j, cmapindex); - pixcmapGetColor(cmap, cmapindex, &rc, &gc, &bc); - - dif = buf1r[j] / 8 - 8 * rc; - if (dif > DifCap) dif = DifCap; - if (dif < -DifCap) dif = -DifCap; - if (dif != 0) { - val1 = buf1r[j + 1] + 3 * dif; - val2 = buf2r[j] + 3 * dif; - val3 = buf2r[j + 1] + 2 * dif; - if (dif > 0) { - buf1r[j + 1] = L_MIN(16383, val1); - buf2r[j] = L_MIN(16383, val2); - buf2r[j + 1] = L_MIN(16383, val3); - } else { - buf1r[j + 1] = L_MAX(0, val1); - buf2r[j] = L_MAX(0, val2); - buf2r[j + 1] = L_MAX(0, val3); - } - } - - dif = buf1g[j] / 8 - 8 * gc; - if (dif > DifCap) dif = DifCap; - if (dif < -DifCap) dif = -DifCap; - if (dif != 0) { - val1 = buf1g[j + 1] + 3 * dif; - val2 = buf2g[j] + 3 * dif; - val3 = buf2g[j + 1] + 2 * dif; - if (dif > 0) { - buf1g[j + 1] = L_MIN(16383, val1); - buf2g[j] = L_MIN(16383, val2); - buf2g[j + 1] = L_MIN(16383, val3); - } else { - buf1g[j + 1] = L_MAX(0, val1); - buf2g[j] = L_MAX(0, val2); - buf2g[j + 1] = L_MAX(0, val3); - } - } - - dif = buf1b[j] / 8 - 8 * bc; - if (dif > DifCap) dif = DifCap; - if (dif < -DifCap) dif = -DifCap; - if (dif != 0) { - val1 = buf1b[j + 1] + 3 * dif; - val2 = buf2b[j] + 3 * dif; - val3 = buf2b[j + 1] + 2 * dif; - if (dif > 0) { - buf1b[j + 1] = L_MIN(16383, val1); - buf2b[j] = L_MIN(16383, val2); - buf2b[j + 1] = L_MIN(16383, val3); - } else { - buf1b[j + 1] = L_MAX(0, val1); - buf2b[j] = L_MAX(0, val2); - buf2b[j + 1] = L_MAX(0, val3); - } - } - } - - /* Get last pixel in row; no downward propagation */ - rval = buf1r[w - 1] / 64; - gval = buf1g[w - 1] / 64; - bval = buf1b[w - 1] / 64; - index = ((rval >> rshift) << (2 * sigbits)) + - ((gval >> rshift) << sigbits) + (bval >> rshift); - SET_DATA_BYTE(lined, w - 1, indexmap[index]); - } - - /* Get last row of pixels; no leftward propagation */ - lined = datad + (h - 1) * wpld; - for (j = 0; j < w; j++) { - rval = buf2r[j] / 64; - gval = buf2g[j] / 64; - bval = buf2b[j] / 64; - index = ((rval >> rshift) << (2 * sigbits)) + - ((gval >> rshift) << sigbits) + (bval >> rshift); - SET_DATA_BYTE(lined, j, indexmap[index]); - } - -buffer_cleanup: - LEPT_FREE(bufu8r); - LEPT_FREE(bufu8g); - LEPT_FREE(bufu8b); - LEPT_FREE(buf1r); - LEPT_FREE(buf1g); - LEPT_FREE(buf1b); - LEPT_FREE(buf2r); - LEPT_FREE(buf2g); - LEPT_FREE(buf2b); - if (!success) pixDestroy(&pixd); - } - - return pixd; -} - - -/*! - * \brief getColorIndexMedianCut() - * - * \param[in] pixel 32 bit rgb - * \param[in] rshift of component: 8 - sigbits - * \param[in] mask over sigbits - * \param[in] sigbits - * \param[out] pindex rgb index value - * \return void - * - *
- * Notes:
- *      (1) This is used on each pixel in the source image.  No checking
- *          is done on input values.
- * 
- */ -static void -getColorIndexMedianCut(l_uint32 pixel, - l_int32 rshift, - l_uint32 mask, - l_int32 sigbits, - l_int32 *pindex) -{ -l_int32 rval, gval, bval; - - rval = pixel >> (24 + rshift); - gval = (pixel >> (16 + rshift)) & mask; - bval = (pixel >> (8 + rshift)) & mask; - *pindex = (rval << (2 * sigbits)) + (gval << sigbits) + bval; - return; -} - - -/*! - * \brief pixGetColorRegion() - * - * \param[in] pixs 32 bpp; rgb color - * \param[in] sigbits valid: 5, 6 - * \param[in] subsample integer > 0 - * \return vbox minimum 3D box in color space enclosing all pixels, - * or NULL on error - * - *
- * Notes:
- *      (1) Computes the minimum 3D box in color space enclosing all
- *          pixels in the image.
- * 
- */ -static L_BOX3D * -pixGetColorRegion(PIX *pixs, - l_int32 sigbits, - l_int32 subsample) -{ -l_int32 rmin, rmax, gmin, gmax, bmin, bmax, rval, gval, bval; -l_int32 w, h, wpl, i, j, rshift; -l_uint32 mask, pixel; -l_uint32 *data, *line; - - PROCNAME("pixGetColorRegion"); - - if (!pixs) - return (L_BOX3D *)ERROR_PTR("pixs not defined", procName, NULL); - - rmin = gmin = bmin = 1000000; - rmax = gmax = bmax = 0; - rshift = 8 - sigbits; - mask = 0xff >> rshift; - pixGetDimensions(pixs, &w, &h, NULL); - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - for (i = 0; i < h; i += subsample) { - line = data + i * wpl; - for (j = 0; j < w; j += subsample) { - pixel = line[j]; - rval = pixel >> (24 + rshift); - gval = (pixel >> (16 + rshift)) & mask; - bval = (pixel >> (8 + rshift)) & mask; - if (rval < rmin) - rmin = rval; - else if (rval > rmax) - rmax = rval; - if (gval < gmin) - gmin = gval; - else if (gval > gmax) - gmax = gval; - if (bval < bmin) - bmin = bval; - else if (bval > bmax) - bmax = bval; - } - } - - return box3dCreate(rmin, rmax, gmin, gmax, bmin, bmax); -} - - -/*! - * \brief medianCutApply() - * - * \param[in] histo array; in rgb colorspace - * \param[in] sigbits - * \param[in] vbox input 3D box - * \param[out] pvbox1, pvbox2 vbox split in two parts - * \return 0 if OK, 1 on error - */ -static l_int32 -medianCutApply(l_int32 *histo, - l_int32 sigbits, - L_BOX3D *vbox, - L_BOX3D **pvbox1, - L_BOX3D **pvbox2) -{ -l_int32 i, j, k, sum, rw, gw, bw, maxw, index; -l_int32 total, left, right; -l_int32 partialsum[128]; -L_BOX3D *vbox1, *vbox2; - - PROCNAME("medianCutApply"); - - if (pvbox1) *pvbox1 = NULL; - if (pvbox2) *pvbox2 = NULL; - if (!histo) - return ERROR_INT("histo not defined", procName, 1); - if (!vbox) - return ERROR_INT("vbox not defined", procName, 1); - if (!pvbox1 || !pvbox2) - return ERROR_INT("&vbox1 and &vbox2 not both defined", procName, 1); - - if (vboxGetCount(vbox, histo, sigbits) == 0) - return ERROR_INT("no pixels in vbox", procName, 1); - - /* If the vbox occupies just one element in color space, it can't - * be split. Leave the 'sortparam' field at 0, so that it goes to - * the tail of the priority queue and stays there, thereby avoiding - * an infinite loop (take off, put back on the head) if it - * happens to be the most populous box! */ - rw = vbox->r2 - vbox->r1 + 1; - gw = vbox->g2 - vbox->g1 + 1; - bw = vbox->b2 - vbox->b1 + 1; - if (rw == 1 && gw == 1 && bw == 1) { - *pvbox1 = box3dCopy(vbox); - return 0; - } - - /* Select the longest axis for splitting */ - maxw = L_MAX(rw, gw); - maxw = L_MAX(maxw, bw); -#if DEBUG_SPLIT_AXES - if (rw == maxw) - lept_stderr("red split\n"); - else if (gw == maxw) - lept_stderr("green split\n"); - else - lept_stderr("blue split\n"); -#endif /* DEBUG_SPLIT_AXES */ - - /* Find the partial sum arrays along the selected axis. */ - total = 0; - if (maxw == rw) { - for (i = vbox->r1; i <= vbox->r2; i++) { - sum = 0; - for (j = vbox->g1; j <= vbox->g2; j++) { - for (k = vbox->b1; k <= vbox->b2; k++) { - index = (i << (2 * sigbits)) + (j << sigbits) + k; - sum += histo[index]; - } - } - total += sum; - partialsum[i] = total; - } - } else if (maxw == gw) { - for (i = vbox->g1; i <= vbox->g2; i++) { - sum = 0; - for (j = vbox->r1; j <= vbox->r2; j++) { - for (k = vbox->b1; k <= vbox->b2; k++) { - index = (i << sigbits) + (j << (2 * sigbits)) + k; - sum += histo[index]; - } - } - total += sum; - partialsum[i] = total; - } - } else { /* maxw == bw */ - for (i = vbox->b1; i <= vbox->b2; i++) { - sum = 0; - for (j = vbox->r1; j <= vbox->r2; j++) { - for (k = vbox->g1; k <= vbox->g2; k++) { - index = i + (j << (2 * sigbits)) + (k << sigbits); - sum += histo[index]; - } - } - total += sum; - partialsum[i] = total; - } - } - - /* Determine the cut planes, making sure that two vboxes - * are always produced. Generate the two vboxes and compute - * the sum in each of them. Choose the cut plane within - * the greater of the (left, right) sides of the bin in which - * the median pixel resides. Here's the surprise: go halfway - * into that side. By doing that, you technically move away - * from "median cut," but in the process a significant number - * of low-count vboxes are produced, allowing much better - * reproduction of low-count spot colors. */ - vbox1 = vbox2 = NULL; - if (maxw == rw) { - for (i = vbox->r1; i <= vbox->r2; i++) { - if (partialsum[i] > total / 2) { - vbox1 = box3dCopy(vbox); - vbox2 = box3dCopy(vbox); - left = i - vbox->r1; - right = vbox->r2 - i; - if (left <= right) - vbox1->r2 = L_MIN(vbox->r2 - 1, i + right / 2); - else /* left > right */ - vbox1->r2 = L_MAX(vbox->r1, i - 1 - left / 2); - vbox2->r1 = vbox1->r2 + 1; - break; - } - } - } else if (maxw == gw) { - for (i = vbox->g1; i <= vbox->g2; i++) { - if (partialsum[i] > total / 2) { - vbox1 = box3dCopy(vbox); - vbox2 = box3dCopy(vbox); - left = i - vbox->g1; - right = vbox->g2 - i; - if (left <= right) - vbox1->g2 = L_MIN(vbox->g2 - 1, i + right / 2); - else /* left > right */ - vbox1->g2 = L_MAX(vbox->g1, i - 1 - left / 2); - vbox2->g1 = vbox1->g2 + 1; - break; - } - } - } else { /* maxw == bw */ - for (i = vbox->b1; i <= vbox->b2; i++) { - if (partialsum[i] > total / 2) { - vbox1 = box3dCopy(vbox); - vbox2 = box3dCopy(vbox); - left = i - vbox->b1; - right = vbox->b2 - i; - if (left <= right) - vbox1->b2 = L_MIN(vbox->b2 - 1, i + right / 2); - else /* left > right */ - vbox1->b2 = L_MAX(vbox->b1, i - 1 - left / 2); - vbox2->b1 = vbox1->b2 + 1; - break; - } - } - } - *pvbox1 = vbox1; - *pvbox2 = vbox2; - if (!vbox1) - return ERROR_INT("vbox1 not made; shouldn't happen", procName, 1); - if (!vbox2) - return ERROR_INT("vbox2 not made; shouldn't happen", procName, 1); - vbox1->npix = vboxGetCount(vbox1, histo, sigbits); - vbox2->npix = vboxGetCount(vbox2, histo, sigbits); - vbox1->vol = vboxGetVolume(vbox1); - vbox2->vol = vboxGetVolume(vbox2); - - return 0; -} - - -/*! - * \brief pixcmapGenerateFromMedianCuts() - * - * \param[in] lh priority queue of pointers to vboxes - * \param[in] histo - * \param[in] sigbits valid: 5 or 6 - * \return cmap, or NULL on error - * - *
- * Notes:
- *      (1) Each vbox in the heap represents a color in the colormap.
- *      (2) As a side-effect, the histo becomes an inverse colormap,
- *          where the part of the array correpsonding to each vbox
- *          is labeled with the cmap index for that vbox.  Then
- *          for each rgb pixel, the colormap index is found directly
- *          by mapping the rgb value to the histo array index.
- * 
- */ -static PIXCMAP * -pixcmapGenerateFromMedianCuts(L_HEAP *lh, - l_int32 *histo, - l_int32 sigbits) -{ -l_int32 index, rval, gval, bval; -L_BOX3D *vbox; -PIXCMAP *cmap; - - PROCNAME("pixcmapGenerateFromMedianCuts"); - - if (!lh) - return (PIXCMAP *)ERROR_PTR("lh not defined", procName, NULL); - if (!histo) - return (PIXCMAP *)ERROR_PTR("histo not defined", procName, NULL); - - rval = gval = bval = 0; /* make compiler happy */ - cmap = pixcmapCreate(8); - index = 0; - while (lheapGetCount(lh) > 0) { - vbox = (L_BOX3D *)lheapRemove(lh); - vboxGetAverageColor(vbox, histo, sigbits, index, &rval, &gval, &bval); - pixcmapAddColor(cmap, rval, gval, bval); - LEPT_FREE(vbox); - index++; - } - - return cmap; -} - - -/*! - * \brief vboxGetAverageColor() - * - * \param[in] vbox 3d region of color space for one quantized color - * \param[in] histo - * \param[in] sigbits valid: 5 or 6 - * \param[in] index if >= 0, assign to all colors in histo in this vbox - * \param[out] prval, pgval, pbval average color - * \return cmap, or NULL on error - * - *
- * Notes:
- *      (1) The vbox represents one color in the colormap.
- *      (2) If index >= 0, as a side-effect, all array elements in
- *          the histo corresponding to the vbox are labeled with this
- *          cmap index for that vbox.  Otherwise, the histo array
- *          is not changed.
- *      (3) The vbox is quantized in sigbits.  So the actual 8-bit color
- *          components are found by multiplying the quantized value
- *          by either 4 or 8.  We must add 0.5 to the quantized index
- *          before multiplying to get the approximate 8-bit color in
- *          the center of the vbox; otherwise we get values on
- *          the lower corner.
- * 
- */ -static l_int32 -vboxGetAverageColor(L_BOX3D *vbox, - l_int32 *histo, - l_int32 sigbits, - l_int32 index, - l_int32 *prval, - l_int32 *pgval, - l_int32 *pbval) -{ -l_int32 i, j, k, ntot, mult, histoindex, rsum, gsum, bsum; - - PROCNAME("vboxGetAverageColor"); - - if (!vbox) - return ERROR_INT("vbox not defined", procName, 1); - if (!histo) - return ERROR_INT("histo not defined", procName, 1); - if (!prval || !pgval || !pbval) - return ERROR_INT("&p*val not all defined", procName, 1); - - *prval = *pgval = *pbval = 0; - ntot = 0; - mult = 1 << (8 - sigbits); - rsum = gsum = bsum = 0; - for (i = vbox->r1; i <= vbox->r2; i++) { - for (j = vbox->g1; j <= vbox->g2; j++) { - for (k = vbox->b1; k <= vbox->b2; k++) { - histoindex = (i << (2 * sigbits)) + (j << sigbits) + k; - ntot += histo[histoindex]; - rsum += (l_int32)(histo[histoindex] * (i + 0.5) * mult); - gsum += (l_int32)(histo[histoindex] * (j + 0.5) * mult); - bsum += (l_int32)(histo[histoindex] * (k + 0.5) * mult); - if (index >= 0) - histo[histoindex] = index; - } - } - } - - if (ntot == 0) { - *prval = mult * (vbox->r1 + vbox->r2 + 1) / 2; - *pgval = mult * (vbox->g1 + vbox->g2 + 1) / 2; - *pbval = mult * (vbox->b1 + vbox->b2 + 1) / 2; - } else { - *prval = rsum / ntot; - *pgval = gsum / ntot; - *pbval = bsum / ntot; - } - -#if DEBUG_MC_COLORS - lept_stderr("ntot[%d] = %d: [%d, %d, %d], (%d, %d, %d)\n", - index, ntot, vbox->r2 - vbox->r1 + 1, - vbox->g2 - vbox->g1 + 1, vbox->b2 - vbox->b1 + 1, - *prval, *pgval, *pbval); -#endif /* DEBUG_MC_COLORS */ - - return 0; -} - - -/*! - * \brief vboxGetCount() - * - * \param[in] vbox 3d region of color space for one quantized color - * \param[in] histo - * \param[in] sigbits valid: 5 or 6 - * \return number of image pixels in this region, or 0 on error - */ -static l_int32 -vboxGetCount(L_BOX3D *vbox, - l_int32 *histo, - l_int32 sigbits) -{ -l_int32 i, j, k, npix, index; - - PROCNAME("vboxGetCount"); - - if (!vbox) - return ERROR_INT("vbox not defined", procName, 0); - if (!histo) - return ERROR_INT("histo not defined", procName, 0); - - npix = 0; - for (i = vbox->r1; i <= vbox->r2; i++) { - for (j = vbox->g1; j <= vbox->g2; j++) { - for (k = vbox->b1; k <= vbox->b2; k++) { - index = (i << (2 * sigbits)) + (j << sigbits) + k; - npix += histo[index]; - } - } - } - - return npix; -} - - -/*! - * \brief vboxGetVolume() - * - * \param[in] vbox 3d region of color space for one quantized color - * \return quantized volume of vbox, or 0 on error - */ -static l_int32 -vboxGetVolume(L_BOX3D *vbox) -{ - PROCNAME("vboxGetVolume"); - - if (!vbox) - return ERROR_INT("vbox not defined", procName, 0); - - return ((vbox->r2 - vbox->r1 + 1) * (vbox->g2 - vbox->g1 + 1) * - (vbox->b2 - vbox->b1 + 1)); -} - -/*! - * \brief box3dCreate() - * - * \param[in] r1, r2, g1, g2, b1, b2 initial values - * \return vbox - */ -static L_BOX3D * -box3dCreate(l_int32 r1, - l_int32 r2, - l_int32 g1, - l_int32 g2, - l_int32 b1, - l_int32 b2) -{ -L_BOX3D *vbox; - - vbox = (L_BOX3D *)LEPT_CALLOC(1, sizeof(L_BOX3D)); - vbox->r1 = r1; - vbox->r2 = r2; - vbox->g1 = g1; - vbox->g2 = g2; - vbox->b1 = b1; - vbox->b2 = b2; - return vbox; -} - - -/*! - * \brief box3dCopy() - * - * \param[in] vbox - * \return vboxc copy of vbox - * - *
- * Notes:
- *      Don't copy the sortparam.
- * 
- */ -static L_BOX3D * -box3dCopy(L_BOX3D *vbox) -{ -L_BOX3D *vboxc; - - PROCNAME("box3dCopy"); - - if (!vbox) - return (L_BOX3D *)ERROR_PTR("vbox not defined", procName, NULL); - - vboxc = box3dCreate(vbox->r1, vbox->r2, vbox->g1, vbox->g2, - vbox->b1, vbox->b2); - vboxc->npix = vbox->npix; - vboxc->vol = vbox->vol; - return vboxc; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colorseg.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colorseg.c deleted file mode 100644 index c3f3c3d8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colorseg.c +++ /dev/null @@ -1,658 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file colorseg.c - *
- *
- *    Unsupervised color segmentation
- *
- *               PIX     *pixColorSegment()
- *               PIX     *pixColorSegmentCluster()
- *       static  l_int32  pixColorSegmentTryCluster()
- *               l_int32  pixAssignToNearestColor()
- *               l_int32  pixColorSegmentClean()
- *               l_int32  pixColorSegmentRemoveColors()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - - /* Maximum allowed iterations in Phase 1. */ -static const l_int32 MAX_ALLOWED_ITERATIONS = 20; - - /* Factor by which max dist is increased on each iteration */ -static const l_float32 DIST_EXPAND_FACT = 1.3; - - /* Octcube division level for computing nearest colormap color using LUT. - * Using 4 should suffice for up to 50 - 100 colors, and it is - * very fast. Using 5 takes 8 times as long to set up the LUT - * for little perceptual gain, even with 100 colors. */ -static const l_int32 LEVEL_IN_OCTCUBE = 4; - - -static l_int32 pixColorSegmentTryCluster(PIX *pixd, PIX *pixs, - l_int32 maxdist, l_int32 maxcolors, - l_int32 debugflag); - -/*------------------------------------------------------------------* - * Unsupervised color segmentation * - *------------------------------------------------------------------*/ -/*! - * \brief pixColorSegment() - * - * \param[in] pixs 32 bpp; 24-bit color - * \param[in] maxdist max euclidean dist to existing cluster - * \param[in] maxcolors max number of colors allowed in first pass - * \param[in] selsize linear size of sel for closing to remove noise - * \param[in] finalcolors max number of final colors allowed after 4th pass - * \param[in] debugflag 1 for debug output; 0 otherwise - * \return pixd 8 bit with colormap, or NULL on error - * - *
- *  Color segmentation proceeds in four phases:
- *
- *  Phase 1:  pixColorSegmentCluster()
- *  The image is traversed in raster order.  Each pixel either
- *  becomes the representative for a new cluster or is assigned to an
- *  existing cluster.  Assignment is greedy.  The data is stored in
- *  a colormapped image.  Three auxiliary arrays are used to hold
- *  the colors of the representative pixels, for fast lookup.
- *  The average color in each cluster is computed.
- *
- *  Phase 2.  pixAssignToNearestColor()
- *  A second non-greedy clustering pass is performed, where each pixel
- *  is assigned to the nearest cluster average.  We also keep track
- *  of how many pixels are assigned to each cluster.
- *
- *  Phase 3.  pixColorSegmentClean()
- *  For each cluster, starting with the largest, do a morphological
- *  closing to eliminate small components within larger ones.
- *
- *  Phase 4.  pixColorSegmentRemoveColors()
- *  Eliminate all colors except the most populated 'finalcolors'.
- *  Then remove unused colors from the colormap, and reassign those
- *  pixels to the nearest remaining cluster, using the original pixel values.
- *
- * Notes:
- *      (1) The goal is to generate a small number of colors.
- *          Typically this would be specified by 'finalcolors',
- *          a number that would be somewhere between 3 and 6.
- *          The parameter 'maxcolors' specifies the maximum number of
- *          colors generated in the first phase.  This should be
- *          larger than finalcolors, perhaps twice as large.
- *          If more than 'maxcolors' are generated in the first phase
- *          using the input 'maxdist', the distance is repeatedly
- *          increased by a multiplicative factor until the condition
- *          is satisfied.  The implicit relation between 'maxdist'
- *          and 'maxcolors' is thus adjusted programmatically.
- *      (2) As a very rough guideline, given a target value of 'finalcolors',
- *          here are approximate values of 'maxdist' and 'maxcolors'
- *          to start with:
- *
- *               finalcolors    maxcolors    maxdist
- *               -----------    ---------    -------
- *                   3             6          100
- *                   4             8           90
- *                   5            10           75
- *                   6            12           60
- *
- *          For a given number of finalcolors, if you use too many
- *          maxcolors, the result will be noisy.  If you use too few,
- *          the result will be a relatively poor assignment of colors.
- * 
- */ -PIX * -pixColorSegment(PIX *pixs, - l_int32 maxdist, - l_int32 maxcolors, - l_int32 selsize, - l_int32 finalcolors, - l_int32 debugflag) -{ -l_int32 *countarray; -PIX *pixd; - - PROCNAME("pixColorSegment"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("must be rgb color", procName, NULL); - - /* Phase 1; original segmentation */ - pixd = pixColorSegmentCluster(pixs, maxdist, maxcolors, debugflag); - if (!pixd) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - if (debugflag) { - lept_mkdir("lept/segment"); - pixWriteDebug("/tmp/lept/segment/colorseg1.png", pixd, IFF_PNG); - } - - /* Phase 2; refinement in pixel assignment */ - if ((countarray = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32))) == NULL) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("countarray not made", procName, NULL); - } - pixAssignToNearestColor(pixd, pixs, NULL, LEVEL_IN_OCTCUBE, countarray); - if (debugflag) - pixWriteDebug("/tmp/lept/segment/colorseg2.png", pixd, IFF_PNG); - - /* Phase 3: noise removal by separately closing each color */ - pixColorSegmentClean(pixd, selsize, countarray); - LEPT_FREE(countarray); - if (debugflag) - pixWriteDebug("/tmp/lept/segment/colorseg3.png", pixd, IFF_PNG); - - /* Phase 4: removal of colors with small population and - * reassignment of pixels to remaining colors */ - pixColorSegmentRemoveColors(pixd, pixs, finalcolors); - return pixd; -} - - -/*! - * \brief pixColorSegmentCluster() - * - * \param[in] pixs 32 bpp; 24-bit color - * \param[in] maxdist max euclidean dist to existing cluster - * \param[in] maxcolors max number of colors allowed in first pass - * \param[in] debugflag 1 for debug output; 0 otherwise - * \return pixd 8 bit with colormap, or NULL on error - * - *
- * Notes:
- *      (1) This is phase 1.  See description in pixColorSegment().
- *      (2) Greedy unsupervised classification.  If the limit 'maxcolors'
- *          is exceeded, the computation is repeated with a larger
- *          allowed cluster size.
- *      (3) On each successive iteration, 'maxdist' is increased by a
- *          constant factor.  See comments in pixColorSegment() for
- *          a guideline on parameter selection.
- *          Note that the diagonal of the 8-bit rgb color cube is about
- *          440, so for 'maxdist' = 440, you are guaranteed to get 1 color!
- * 
- */ -PIX * -pixColorSegmentCluster(PIX *pixs, - l_int32 maxdist, - l_int32 maxcolors, - l_int32 debugflag) -{ -l_int32 w, h, newmaxdist, ret, niters, ncolors, success; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixColorSegmentCluster"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("must be rgb color", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - if ((pixd = pixCreate(w, h, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - cmap = pixcmapCreate(8); - pixSetColormap(pixd, cmap); - pixCopyResolution(pixd, pixs); - - newmaxdist = maxdist; - niters = 0; - success = TRUE; - while (1) { - ret = pixColorSegmentTryCluster(pixd, pixs, newmaxdist, - maxcolors, debugflag); - niters++; - if (!ret) { - ncolors = pixcmapGetCount(cmap); - if (debugflag) - L_INFO("Success with %d colors after %d iters\n", procName, - ncolors, niters); - break; - } - if (niters == MAX_ALLOWED_ITERATIONS) { - L_WARNING("too many iters; newmaxdist = %d\n", - procName, newmaxdist); - success = FALSE; - break; - } - newmaxdist = (l_int32)(DIST_EXPAND_FACT * (l_float32)newmaxdist); - } - - if (!success) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("failure in phase 1", procName, NULL); - } - - return pixd; -} - - -/*! - * \brief pixColorSegmentTryCluster() - * - * \param[in] pixd - * \param[in] pixs - * \param[in] maxdist - * \param[in] maxcolors - * \param[in] debugflag 1 for debug output; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      This function should only be called from pixColorSegCluster()
- * 
- */ -static l_int32 -pixColorSegmentTryCluster(PIX *pixd, - PIX *pixs, - l_int32 maxdist, - l_int32 maxcolors, - l_int32 debugflag) -{ -l_int32 rmap[256], gmap[256], bmap[256]; -l_int32 w, h, wpls, wpld, i, j, k, found, ret, index, ncolors; -l_int32 rval, gval, bval, dist2, maxdist2; -l_int32 countarray[256]; -l_int32 rsum[256], gsum[256], bsum[256]; -l_uint32 *ppixel; -l_uint32 *datas, *datad, *lines, *lined; -PIXCMAP *cmap; - - PROCNAME("pixColorSegmentTryCluster"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - - w = pixGetWidth(pixs); - h = pixGetHeight(pixs); - maxdist2 = maxdist * maxdist; - cmap = pixGetColormap(pixd); - pixcmapClear(cmap); - for (k = 0; k < 256; k++) { - rsum[k] = gsum[k] = bsum[k] = 0; - rmap[k] = gmap[k] = bmap[k] = 0; - } - - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - ncolors = 0; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - ppixel = lines + j; - rval = GET_DATA_BYTE(ppixel, COLOR_RED); - gval = GET_DATA_BYTE(ppixel, COLOR_GREEN); - bval = GET_DATA_BYTE(ppixel, COLOR_BLUE); - ncolors = pixcmapGetCount(cmap); - found = FALSE; - for (k = 0; k < ncolors; k++) { - dist2 = (rval - rmap[k]) * (rval - rmap[k]) + - (gval - gmap[k]) * (gval - gmap[k]) + - (bval - bmap[k]) * (bval - bmap[k]); - if (dist2 <= maxdist2) { /* take it; greedy */ - found = TRUE; - SET_DATA_BYTE(lined, j, k); - countarray[k]++; - rsum[k] += rval; - gsum[k] += gval; - bsum[k] += bval; - break; - } - } - if (!found) { /* Add a new color */ - ret = pixcmapAddNewColor(cmap, rval, gval, bval, &index); -/* lept_stderr( - "index = %d, (i,j) = (%d,%d), rgb = (%d, %d, %d)\n", - index, i, j, rval, gval, bval); */ - if (ret == 0 && index < maxcolors) { - countarray[index] = 1; - SET_DATA_BYTE(lined, j, index); - rmap[index] = rval; - gmap[index] = gval; - bmap[index] = bval; - rsum[index] = rval; - gsum[index] = gval; - bsum[index] = bval; - } else { - if (debugflag) { - L_INFO("maxcolors exceeded for maxdist = %d\n", - procName, maxdist); - } - return 1; - } - } - } - } - - /* Replace the colors in the colormap by the averages */ - for (k = 0; k < ncolors; k++) { - rval = rsum[k] / countarray[k]; - gval = gsum[k] / countarray[k]; - bval = bsum[k] / countarray[k]; - pixcmapResetColor(cmap, k, rval, gval, bval); - } - - return 0; -} - - -/*! - * \brief pixAssignToNearestColor() - * - * \param[in] pixd 8 bpp, colormapped - * \param[in] pixs 32 bpp; 24-bit color - * \param[in] pixm [optional] 1 bpp - * \param[in] level of octcube used for finding nearest color in cmap - * \param[in] countarray [optional] ptr to array, in which we can store - * the number of pixels found in each color in - * the colormap in pixd - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is used in phase 2 of color segmentation, where pixs
- *          is the original input image to pixColorSegment(), and
- *          pixd is the colormapped image returned from
- *          pixColorSegmentCluster().  It is also used, with a mask,
- *          in phase 4.
- *      (2) This is an in-place operation.
- *      (3) The colormap in pixd is unchanged.
- *      (4) pixs and pixd must be the same size (w, h).
- *      (5) The selection mask pixm can be null.  If it exists, it must
- *          be the same size as pixs and pixd, and only pixels
- *          corresponding to fg in pixm are assigned.  Set to
- *          NULL if all pixels in pixd are to be assigned.
- *      (6) The countarray can be null.  If it exists, it is pre-allocated
- *          and of a size at least equal to the size of the colormap in pixd.
- *      (7) This does a best-fit (non-greedy) assignment of pixels to
- *          existing clusters.  Specifically, it assigns each pixel
- *          in pixd to the color index in the pixd colormap that has a
- *          color closest to the corresponding rgb pixel in pixs.
- *      (8) 'level' is the octcube level used to quickly find the nearest
- *          color in the colormap for each pixel.  For color segmentation,
- *          this parameter is set to LEVEL_IN_OCTCUBE.
- *      (9) We build a mapping table from octcube to colormap index so
- *          that this function can run in a time (otherwise) independent
- *          of the number of colors in the colormap.  This avoids a
- *          brute-force search for the closest colormap color to each
- *          pixel in the image.
- * 
- */ -l_ok -pixAssignToNearestColor(PIX *pixd, - PIX *pixs, - PIX *pixm, - l_int32 level, - l_int32 *countarray) -{ -l_int32 w, h, wpls, wpld, wplm, i, j, success; -l_int32 rval, gval, bval, index; -l_int32 *cmaptab; -l_uint32 octindex; -l_uint32 *rtab, *gtab, *btab; -l_uint32 *ppixel; -l_uint32 *datas, *datad, *datam, *lines, *lined, *linem; -PIXCMAP *cmap; - - PROCNAME("pixAssignToNearestColor"); - - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if ((cmap = pixGetColormap(pixd)) == NULL) - return ERROR_INT("cmap not found", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not 32 bpp", procName, 1); - if (level < 1 || level > 6) - return ERROR_INT("level not in [1 ... 6]", procName, 1); - - /* Set up the tables to map rgb to the nearest colormap index */ - success = TRUE; - makeRGBToIndexTables(level, &rtab, >ab, &btab); - cmaptab = pixcmapToOctcubeLUT(cmap, level, L_MANHATTAN_DISTANCE); - if (!rtab || !gtab || !btab || !cmaptab) { - L_ERROR("failure to make a table\n", procName); - success = FALSE; - goto cleanup_arrays; - } - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - if (pixm) { - datam = pixGetData(pixm); - wplm = pixGetWpl(pixm); - } - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - if (pixm) - linem = datam + i * wplm; - for (j = 0; j < w; j++) { - if (pixm) { - if (!GET_DATA_BIT(linem, j)) - continue; - } - ppixel = lines + j; - rval = GET_DATA_BYTE(ppixel, COLOR_RED); - gval = GET_DATA_BYTE(ppixel, COLOR_GREEN); - bval = GET_DATA_BYTE(ppixel, COLOR_BLUE); - /* Map from rgb to octcube index */ - getOctcubeIndexFromRGB(rval, gval, bval, rtab, gtab, btab, - &octindex); - /* Map from octcube index to nearest colormap index */ - index = cmaptab[octindex]; - if (countarray) - countarray[index]++; - SET_DATA_BYTE(lined, j, index); - } - } - -cleanup_arrays: - LEPT_FREE(cmaptab); - LEPT_FREE(rtab); - LEPT_FREE(gtab); - LEPT_FREE(btab); - return (success) ? 0 : 1; -} - - -/*! - * \brief pixColorSegmentClean() - * - * \param[in] pixs 8 bpp, colormapped - * \param[in] selsize for closing - * \param[in] countarray ptr to array containing the number of pixels - * found in each color in the colormap - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This operation is in-place.
- *      (2) This is phase 3 of color segmentation.  It is the first
- *          part of a two-step noise removal process.  Colors with a
- *          large population are closed first; this operation absorbs
- *          small sets of intercolated pixels of a different color.
- * 
- */ -l_ok -pixColorSegmentClean(PIX *pixs, - l_int32 selsize, - l_int32 *countarray) -{ -l_int32 i, ncolors, val; -l_uint32 val32; -NUMA *na, *nasi; -PIX *pixt1, *pixt2; -PIXCMAP *cmap; - - PROCNAME("pixColorSegmentClean"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not 8 bpp", procName, 1); - if ((cmap = pixGetColormap(pixs)) == NULL) - return ERROR_INT("cmap not found", procName, 1); - if (!countarray) - return ERROR_INT("countarray not defined", procName, 1); - if (selsize <= 1) - return 0; /* nothing to do */ - - /* Sort colormap indices in decreasing order of pixel population */ - ncolors = pixcmapGetCount(cmap); - na = numaCreate(ncolors); - for (i = 0; i < ncolors; i++) - numaAddNumber(na, countarray[i]); - nasi = numaGetSortIndex(na, L_SORT_DECREASING); - numaDestroy(&na); - if (!nasi) - return ERROR_INT("nasi not made", procName, 1); - - /* For each color, in order of decreasing population, - * do a closing and absorb the added pixels. Note that - * if the closing removes pixels at the border, they'll - * still appear in the xor and will be properly (re)set. */ - for (i = 0; i < ncolors; i++) { - numaGetIValue(nasi, i, &val); - pixt1 = pixGenerateMaskByValue(pixs, val, 1); - pixt2 = pixCloseSafeCompBrick(NULL, pixt1, selsize, selsize); - pixXor(pixt2, pixt2, pixt1); /* pixels to be added to type 'val' */ - pixcmapGetColor32(cmap, val, &val32); - pixSetMasked(pixs, pixt2, val32); /* add them */ - pixDestroy(&pixt1); - pixDestroy(&pixt2); - } - numaDestroy(&nasi); - return 0; -} - - -/*! - * \brief pixColorSegmentRemoveColors() - * - * \param[in] pixd 8 bpp, colormapped - * \param[in] pixs 32 bpp rgb, with initial pixel values - * \param[in] finalcolors max number of colors to retain - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This operation is in-place.
- *      (2) This is phase 4 of color segmentation, and the second part
- *          of the 2-step noise removal.  Only 'finalcolors' different
- *          colors are retained, with colors with smaller populations
- *          being replaced by the nearest color of the remaining colors.
- *          For highest accuracy, for pixels that are being replaced,
- *          we find the nearest colormap color  to the original rgb color.
- * 
- */ -l_ok -pixColorSegmentRemoveColors(PIX *pixd, - PIX *pixs, - l_int32 finalcolors) -{ -l_int32 i, ncolors, index, tempindex; -l_int32 *tab; -l_uint32 tempcolor; -NUMA *na, *nasi; -PIX *pixm; -PIXCMAP *cmap; - - PROCNAME("pixColorSegmentRemoveColors"); - - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (pixGetDepth(pixd) != 8) - return ERROR_INT("pixd not 8 bpp", procName, 1); - if ((cmap = pixGetColormap(pixd)) == NULL) - return ERROR_INT("cmap not found", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - ncolors = pixcmapGetCount(cmap); - if (finalcolors >= ncolors) /* few enough colors already; nothing to do */ - return 0; - - /* Generate a mask over all pixels that are not in the - * 'finalcolors' most populated colors. Save the colormap - * index of any one of the retained colors in 'tempindex'. - * The LUT has values 0 for the 'finalcolors' most populated colors, - * which will be retained; and 1 for the rest, which are marked - * by fg pixels in pixm and will be removed. */ - na = pixGetCmapHistogram(pixd, 1); - if ((nasi = numaGetSortIndex(na, L_SORT_DECREASING)) == NULL) { - numaDestroy(&na); - return ERROR_INT("nasi not made", procName, 1); - } - numaGetIValue(nasi, finalcolors - 1, &tempindex); /* retain down to this */ - pixcmapGetColor32(cmap, tempindex, &tempcolor); /* use this color */ - tab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - for (i = finalcolors; i < ncolors; i++) { - numaGetIValue(nasi, i, &index); - tab[index] = 1; - } - - pixm = pixMakeMaskFromLUT(pixd, tab); - LEPT_FREE(tab); - - /* Reassign the masked pixels temporarily to the saved index - * (tempindex). This guarantees that no pixels are labeled by - * a colormap index of any colors that will be removed. - * The actual value doesn't matter, as long as it's one - * of the retained colors, because these pixels will later - * be reassigned based on the full set of colors retained - * in the colormap. */ - pixSetMasked(pixd, pixm, tempcolor); - - /* Now remove unused colors from the colormap. This reassigns - * image pixels as required. */ - pixRemoveUnusedColors(pixd); - - /* Finally, reassign the pixels under the mask (those that were - * given a 'tempindex' value) to the nearest color in the colormap. - * This is the function used in phase 2 on all image pixels; here - * it is only used on the masked pixels given by pixm. */ - pixAssignToNearestColor(pixd, pixs, pixm, LEVEL_IN_OCTCUBE, NULL); - - pixDestroy(&pixm); - numaDestroy(&na); - numaDestroy(&nasi); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colorspace.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colorspace.c deleted file mode 100644 index 18a57d10..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/colorspace.c +++ /dev/null @@ -1,2419 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file colorspace.c - *
- *
- *      Colorspace conversion between RGB and HSV
- *           PIX        *pixConvertRGBToHSV()
- *           PIX        *pixConvertHSVToRGB()
- *           l_int32     convertRGBToHSV()
- *           l_int32     convertHSVToRGB()
- *           l_int32     pixcmapConvertRGBToHSV()
- *           l_int32     pixcmapConvertHSVToRGB()
- *           PIX        *pixConvertRGBToHue()
- *           PIX        *pixConvertRGBToSaturation()
- *           PIX        *pixConvertRGBToValue()
- *
- *      Selection and display of range of colors in HSV space
- *           PIX        *pixMakeRangeMaskHS()
- *           PIX        *pixMakeRangeMaskHV()
- *           PIX        *pixMakeRangeMaskSV()
- *           PIX        *pixMakeHistoHS()
- *           PIX        *pixMakeHistoHV()
- *           PIX        *pixMakeHistoSV()
- *           PIX        *pixFindHistoPeaksHSV()
- *           PIX        *displayHSVColorRange()
- *
- *      Colorspace conversion between RGB and YUV
- *           PIX        *pixConvertRGBToYUV()
- *           PIX        *pixConvertYUVToRGB()
- *           l_int32     convertRGBToYUV()
- *           l_int32     convertYUVToRGB()
- *           l_int32     pixcmapConvertRGBToYUV()
- *           l_int32     pixcmapConvertYUVToRGB()
- *
- *      Colorspace conversion between RGB and XYZ
- *           FPIXA      *pixConvertRGBToXYZ()
- *           PIX        *fpixaConvertXYZToRGB()
- *           l_int32     convertRGBToXYZ()
- *           l_int32     convertXYZToRGB()
- *
- *      Colorspace conversion between XYZ and LAB
- *           FPIXA      *fpixaConvertXYZToLAB()
- *           PIX        *fpixaConvertLABToXYZ()
- *           l_int32     convertXYZToLAB()
- *           l_int32     convertLABToXYZ()
- *           static l_float32  lab_forward()
- *           static l_float32  lab_reverse()
- *
- *      Colorspace conversion between RGB and LAB
- *           FPIXA      *pixConvertRGBToLAB()
- *           PIX        *fpixaConvertLABToRGB()
- *           l_int32     convertRGBToLAB()
- *           l_int32     convertLABToRGB()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -#ifndef NO_CONSOLE_IO -#define DEBUG_HISTO 0 -#define SLOW_CUBE_ROOT 0 -#endif /* ~NO_CONSOLE_IO */ - - /* Functions used in xyz <--> lab conversions */ -static l_float32 lab_forward(l_float32 v); -static l_float32 lab_reverse(l_float32 v); - -/*---------------------------------------------------------------------------* - * Colorspace conversion between RGB and HSB * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertRGBToHSV() - * - * \param[in] pixd can be NULL; if not NULL, must == pixs - * \param[in] pixs - * \return pixd always - * - *
- * Notes:
- *      (1) For pixs = pixd, this is in-place; otherwise pixd must be NULL.
- *      (2) The definition of our HSV space is given in convertRGBToHSV().
- *      (3) The h, s and v values are stored in the same places as
- *          the r, g and b values, respectively.  Here, they are explicitly
- *          placed in the 3 MS bytes in the pixel.
- *      (4) Normalizing to 1 and considering the r,g,b components,
- *          a simple way to understand the HSV space is:
- *           ~ v = max(r,g,b)
- *           ~ s = (max - min) / max
- *           ~ h ~ (mid - min) / (max - min)  [apart from signs and constants]
- *      (5) Normalizing to 1, some properties of the HSV space are:
- *           ~ For gray values (r = g = b) along the continuum between
- *             black and white:
- *                s = 0  (becoming undefined as you approach black)
- *                h is undefined everywhere
- *           ~ Where one component is saturated and the others are zero:
- *                v = 1
- *                s = 1
- *                h = 0 (r = max), 1/3 (g = max), 2/3 (b = max)
- *           ~ Where two components are saturated and the other is zero:
- *                v = 1
- *                s = 1
- *                h = 1/2 (if r = 0), 5/6 (if g = 0), 1/6 (if b = 0)
- *      (6) Dividing each component by a constant c > 1 reduces the
- *          brightness v, but leaves the saturation and hue invariant.
- * 
- */ -PIX * -pixConvertRGBToHSV(PIX *pixd, - PIX *pixs) -{ -l_int32 w, h, d, wpl, i, j, rval, gval, bval, hval, sval, vval; -l_uint32 *line, *data; -PIXCMAP *cmap; - - PROCNAME("pixConvertRGBToHSV"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixd && pixd != pixs) - return (PIX *)ERROR_PTR("pixd defined and not inplace", procName, pixd); - - d = pixGetDepth(pixs); - cmap = pixGetColormap(pixs); - if (!cmap && d != 32) - return (PIX *)ERROR_PTR("not cmapped or rgb", procName, pixd); - - if (!pixd) - pixd = pixCopy(NULL, pixs); - - cmap = pixGetColormap(pixd); - if (cmap) { /* just convert the colormap */ - pixcmapConvertRGBToHSV(cmap); - return pixd; - } - - /* Convert RGB image */ - pixGetDimensions(pixd, &w, &h, NULL); - wpl = pixGetWpl(pixd); - data = pixGetData(pixd); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - extractRGBValues(line[j], &rval, &gval, &bval); - convertRGBToHSV(rval, gval, bval, &hval, &sval, &vval); - line[j] = (hval << 24) | (sval << 16) | (vval << 8); - } - } - - return pixd; -} - - -/*! - * \brief pixConvertHSVToRGB() - * - * \param[in] pixd can be NULL; if not NULL, must == pixs - * \param[in] pixs - * \return pixd always - * - *
- * Notes:
- *      (1) For pixs = pixd, this is in-place; otherwise pixd must be NULL.
- *      (2) The user takes responsibility for making sure that pixs is
- *          in our HSV space.  The definition of our HSV space is given
- *          in convertRGBToHSV().
- *      (3) The h, s and v values are stored in the same places as
- *          the r, g and b values, respectively.  Here, they are explicitly
- *          placed in the 3 MS bytes in the pixel.
- * 
- */ -PIX * -pixConvertHSVToRGB(PIX *pixd, - PIX *pixs) -{ -l_int32 w, h, d, wpl, i, j, rval, gval, bval, hval, sval, vval; -l_uint32 pixel; -l_uint32 *line, *data; -PIXCMAP *cmap; - - PROCNAME("pixConvertHSVToRGB"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixd && pixd != pixs) - return (PIX *)ERROR_PTR("pixd defined and not inplace", procName, pixd); - - d = pixGetDepth(pixs); - cmap = pixGetColormap(pixs); - if (!cmap && d != 32) - return (PIX *)ERROR_PTR("not cmapped or hsv", procName, pixd); - - if (!pixd) - pixd = pixCopy(NULL, pixs); - - cmap = pixGetColormap(pixd); - if (cmap) { /* just convert the colormap */ - pixcmapConvertHSVToRGB(cmap); - return pixd; - } - - /* Convert HSV image */ - pixGetDimensions(pixd, &w, &h, NULL); - wpl = pixGetWpl(pixd); - data = pixGetData(pixd); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - pixel = line[j]; - hval = pixel >> 24; - sval = (pixel >> 16) & 0xff; - vval = (pixel >> 8) & 0xff; - convertHSVToRGB(hval, sval, vval, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, line + j); - } - } - - return pixd; -} - - -/*! - * \brief convertRGBToHSV() - * - * \param[in] rval, gval, bval RGB input - * \param[out] phval, psval, pvval comparable HSV values - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The range of returned values is:
- *            h [0 ... 239]
- *            s [0 ... 255]
- *            v [0 ... 255]
- *      (2) If r = g = b, the pixel is gray (s = 0), and we define h = 0.
- *      (3) h wraps around, so that h = 0 and h = 240 are equivalent
- *          in hue space.
- *      (4) h has the following correspondence to color:
- *            h = 0         magenta
- *            h = 40        red
- *            h = 80        yellow
- *            h = 120       green
- *            h = 160       cyan
- *            h = 200       blue
- * 
- */ -l_ok -convertRGBToHSV(l_int32 rval, - l_int32 gval, - l_int32 bval, - l_int32 *phval, - l_int32 *psval, - l_int32 *pvval) -{ -l_int32 minrg, maxrg, min, max, delta; -l_float32 h; - - PROCNAME("convertRGBToHSV"); - - if (phval) *phval = 0; - if (psval) *psval = 0; - if (pvval) *pvval = 0; - if (!phval || !psval || !pvval) - return ERROR_INT("&hval, &sval, &vval not all defined", procName, 1); - - minrg = L_MIN(rval, gval); - min = L_MIN(minrg, bval); - maxrg = L_MAX(rval, gval); - max = L_MAX(maxrg, bval); - delta = max - min; - - *pvval = max; - if (delta == 0) { /* gray; no chroma */ - *phval = 0; - *psval = 0; - } else { - *psval = (l_int32)(255. * (l_float32)delta / (l_float32)max + 0.5); - if (rval == max) /* between magenta and yellow */ - h = (l_float32)(gval - bval) / (l_float32)delta; - else if (gval == max) /* between yellow and cyan */ - h = 2. + (l_float32)(bval - rval) / (l_float32)delta; - else /* between cyan and magenta */ - h = 4. + (l_float32)(rval - gval) / (l_float32)delta; - h *= 40.0; - if (h < 0.0) - h += 240.0; - if (h >= 239.5) - h = 0.0; - *phval = (l_int32)(h + 0.5); - } - - return 0; -} - - -/*! - * \brief convertHSVToRGB() - * - * \param[in] hval, sval, vval HSV input - * \param[out] prval, pgval, pbval comparable RGB values - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See convertRGBToHSV() for valid input range of HSV values
- *          and their interpretation in color space.
- * 
- */ -l_ok -convertHSVToRGB(l_int32 hval, - l_int32 sval, - l_int32 vval, - l_int32 *prval, - l_int32 *pgval, - l_int32 *pbval) -{ -l_int32 i, x, y, z; -l_float32 h, f, s; - - PROCNAME("convertHSVToRGB"); - - if (prval) *prval = 0; - if (pgval) *pgval = 0; - if (pbval) *pbval = 0; - if (!prval || !pgval || !pbval) - return ERROR_INT("&rval, &gval, &bval not all defined", procName, 1); - - if (sval == 0) { /* gray */ - *prval = vval; - *pgval = vval; - *pbval = vval; - } else { - if (hval < 0 || hval > 240) - return ERROR_INT("invalid hval", procName, 1); - if (hval == 240) - hval = 0; - h = (l_float32)hval / 40.; - i = (l_int32)h; - f = h - i; - s = (l_float32)sval / 255.; - x = (l_int32)(vval * (1. - s) + 0.5); - y = (l_int32)(vval * (1. - s * f) + 0.5); - z = (l_int32)(vval * (1. - s * (1. - f)) + 0.5); - switch (i) - { - case 0: - *prval = vval; - *pgval = z; - *pbval = x; - break; - case 1: - *prval = y; - *pgval = vval; - *pbval = x; - break; - case 2: - *prval = x; - *pgval = vval; - *pbval = z; - break; - case 3: - *prval = x; - *pgval = y; - *pbval = vval; - break; - case 4: - *prval = z; - *pgval = x; - *pbval = vval; - break; - case 5: - *prval = vval; - *pgval = x; - *pbval = y; - break; - default: /* none possible */ - return 1; - } - } - - return 0; -} - - -/*! - * \brief pixcmapConvertRGBToHSV() - * - * \param[in] cmap - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      ~ in-place transform
- *      ~ See convertRGBToHSV() for def'n of HSV space.
- *      ~ replaces: r --> h, g --> s, b --> v
- * 
- */ -l_ok -pixcmapConvertRGBToHSV(PIXCMAP *cmap) -{ -l_int32 i, ncolors, rval, gval, bval, hval, sval, vval; - - PROCNAME("pixcmapConvertRGBToHSV"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - ncolors = pixcmapGetCount(cmap); - for (i = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - convertRGBToHSV(rval, gval, bval, &hval, &sval, &vval); - pixcmapResetColor(cmap, i, hval, sval, vval); - } - return 0; -} - - -/*! - * \brief pixcmapConvertHSVToRGB() - * - * \param[in] cmap - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      ~ in-place transform
- *      ~ See convertRGBToHSV() for def'n of HSV space.
- *      ~ replaces: h --> r, s --> g, v --> b
- * 
- */ -l_ok -pixcmapConvertHSVToRGB(PIXCMAP *cmap) -{ -l_int32 i, ncolors, rval, gval, bval, hval, sval, vval; - - PROCNAME("pixcmapConvertHSVToRGB"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - ncolors = pixcmapGetCount(cmap); - for (i = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &hval, &sval, &vval); - convertHSVToRGB(hval, sval, vval, &rval, &gval, &bval); - pixcmapResetColor(cmap, i, rval, gval, bval); - } - return 0; -} - - -/*! - * \brief pixConvertRGBToHue() - * - * \param[in] pixs 32 bpp RGB, or 8 bpp with colormap - * \return pixd 8 bpp hue of HSV, or NULL on error - * - *
- * Notes:
- *      (1) The conversion to HSV hue is in-lined here.
- *      (2) If there is a colormap, it is removed.
- *      (3) If you just want the hue component, this does it
- *          at about 10 Mpixels/sec/GHz, which is about
- *          2x faster than using pixConvertRGBToHSV()
- * 
- */ -PIX * -pixConvertRGBToHue(PIX *pixs) -{ -l_int32 w, h, d, wplt, wpld; -l_int32 i, j, rval, gval, bval, hval, minrg, min, maxrg, max, delta; -l_float32 fh; -l_uint32 pixel; -l_uint32 *linet, *lined, *datat, *datad; -PIX *pixt, *pixd; - - PROCNAME("pixConvertRGBToHue"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - pixGetDimensions(pixs, &w, &h, &d); - if (d != 32 && !pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("not cmapped or rgb", procName, NULL); - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_FULL_COLOR); - - /* Convert RGB image */ - pixd = pixCreate(w, h, 8); - pixCopyResolution(pixd, pixs); - wplt = pixGetWpl(pixt); - datat = pixGetData(pixt); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - for (i = 0; i < h; i++) { - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - pixel = linet[j]; - extractRGBValues(pixel, &rval, &gval, &bval); - minrg = L_MIN(rval, gval); - min = L_MIN(minrg, bval); - maxrg = L_MAX(rval, gval); - max = L_MAX(maxrg, bval); - delta = max - min; - if (delta == 0) { /* gray; no chroma */ - hval = 0; - } else { - if (rval == max) /* between magenta and yellow */ - fh = (l_float32)(gval - bval) / (l_float32)delta; - else if (gval == max) /* between yellow and cyan */ - fh = 2. + (l_float32)(bval - rval) / (l_float32)delta; - else /* between cyan and magenta */ - fh = 4. + (l_float32)(rval - gval) / (l_float32)delta; - fh *= 40.0; - if (fh < 0.0) - fh += 240.0; - hval = (l_int32)(fh + 0.5); - } - SET_DATA_BYTE(lined, j, hval); - } - } - pixDestroy(&pixt); - - return pixd; -} - - - -/*! - * \brief pixConvertRGBToSaturation() - * - * \param[in] pixs 32 bpp RGB, or 8 bpp with colormap - * \return pixd 8 bpp sat of HSV, or NULL on error - * - *
- * Notes:
- *      (1) The conversion to HSV sat is in-lined here.
- *      (2) If there is a colormap, it is removed.
- *      (3) If you just want the saturation component, this does it
- *          at about 12 Mpixels/sec/GHz.
- * 
- */ -PIX * -pixConvertRGBToSaturation(PIX *pixs) -{ -l_int32 w, h, d, wplt, wpld; -l_int32 i, j, rval, gval, bval, sval, minrg, min, maxrg, max, delta; -l_uint32 pixel; -l_uint32 *linet, *lined, *datat, *datad; -PIX *pixt, *pixd; - - PROCNAME("pixConvertRGBToSaturation"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - pixGetDimensions(pixs, &w, &h, &d); - if (d != 32 && !pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("not cmapped or rgb", procName, NULL); - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_FULL_COLOR); - - /* Convert RGB image */ - pixd = pixCreate(w, h, 8); - pixCopyResolution(pixd, pixs); - wplt = pixGetWpl(pixt); - datat = pixGetData(pixt); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - for (i = 0; i < h; i++) { - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - pixel = linet[j]; - extractRGBValues(pixel, &rval, &gval, &bval); - minrg = L_MIN(rval, gval); - min = L_MIN(minrg, bval); - maxrg = L_MAX(rval, gval); - max = L_MAX(maxrg, bval); - delta = max - min; - if (delta == 0) /* gray; no chroma */ - sval = 0; - else - sval = (l_int32)(255. * - (l_float32)delta / (l_float32)max + 0.5); - SET_DATA_BYTE(lined, j, sval); - } - } - - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixConvertRGBToValue() - * - * \param[in] pixs 32 bpp RGB,or 8 bpp with colormap - * \return pixd 8 bpp max component intensity of HSV, or NULL on error - * - *
- * Notes:
- *      (1) The conversion to HSV sat is in-lined here.
- *      (2) If there is a colormap, it is removed.
- *      (3) If you just want the value component, this does it
- *          at about 35 Mpixels/sec/GHz.
- * 
- */ -PIX * -pixConvertRGBToValue(PIX *pixs) -{ -l_int32 w, h, d, wplt, wpld; -l_int32 i, j, rval, gval, bval, maxrg, max; -l_uint32 pixel; -l_uint32 *linet, *lined, *datat, *datad; -PIX *pixt, *pixd; - - PROCNAME("pixConvertRGBToValue"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - pixGetDimensions(pixs, &w, &h, &d); - if (d != 32 && !pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("not cmapped or rgb", procName, NULL); - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_FULL_COLOR); - - /* Convert RGB image */ - pixd = pixCreate(w, h, 8); - pixCopyResolution(pixd, pixs); - wplt = pixGetWpl(pixt); - datat = pixGetData(pixt); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - for (i = 0; i < h; i++) { - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - pixel = linet[j]; - extractRGBValues(pixel, &rval, &gval, &bval); - maxrg = L_MAX(rval, gval); - max = L_MAX(maxrg, bval); - SET_DATA_BYTE(lined, j, max); - } - } - - pixDestroy(&pixt); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Selection and display of range of colors in HSV space * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixMakeRangeMaskHS() - * - * \param[in] pixs 32 bpp rgb - * \param[in] huecenter center value of hue range - * \param[in] huehw half-width of hue range - * \param[in] satcenter center value of saturation range - * \param[in] sathw half-width of saturation range - * \param[in] regionflag L_INCLUDE_REGION, L_EXCLUDE_REGION - * \return pixd 1 bpp mask over selected pixels, or NULL on error - * - *
- * Notes:
- *      (1) The pixels are selected based on the specified ranges of
- *          hue and saturation.  For selection or exclusion, the pixel
- *          HS component values must be within both ranges.  Care must
- *          be taken in finding the hue range because of wrap-around.
- *      (2) Use %regionflag == L_INCLUDE_REGION to take only those
- *          pixels within the rectangular region specified in HS space.
- *          Use %regionflag == L_EXCLUDE_REGION to take all pixels except
- *          those within the rectangular region specified in HS space.
- * 
- */ -PIX * -pixMakeRangeMaskHS(PIX *pixs, - l_int32 huecenter, - l_int32 huehw, - l_int32 satcenter, - l_int32 sathw, - l_int32 regionflag) -{ -l_int32 i, j, w, h, wplt, wpld, hstart, hend, sstart, send, hval, sval; -l_int32 *hlut, *slut; -l_uint32 pixel; -l_uint32 *datat, *datad, *linet, *lined; -PIX *pixt, *pixd; - - PROCNAME("pixMakeRangeMaskHS"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - if (regionflag != L_INCLUDE_REGION && regionflag != L_EXCLUDE_REGION) - return (PIX *)ERROR_PTR("invalid regionflag", procName, NULL); - - /* Set up LUTs for hue and saturation. These have the value 1 - * within the specified intervals of hue and saturation. */ - hlut = (l_int32 *)LEPT_CALLOC(240, sizeof(l_int32)); - slut = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - sstart = L_MAX(0, satcenter - sathw); - send = L_MIN(255, satcenter + sathw); - for (i = sstart; i <= send; i++) - slut[i] = 1; - hstart = (huecenter - huehw + 240) % 240; - hend = (huecenter + huehw + 240) % 240; - if (hstart < hend) { - for (i = hstart; i <= hend; i++) - hlut[i] = 1; - } else { /* wrap */ - for (i = hstart; i < 240; i++) - hlut[i] = 1; - for (i = 0; i <= hend; i++) - hlut[i] = 1; - } - - /* Generate the mask */ - pixt = pixConvertRGBToHSV(NULL, pixs); - pixGetDimensions(pixs, &w, &h, NULL); - pixd = pixCreateNoInit(w, h, 1); - if (regionflag == L_INCLUDE_REGION) - pixClearAll(pixd); - else /* L_EXCLUDE_REGION */ - pixSetAll(pixd); - datat = pixGetData(pixt); - datad = pixGetData(pixd); - wplt = pixGetWpl(pixt); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - pixel = linet[j]; - hval = (pixel >> L_RED_SHIFT) & 0xff; - sval = (pixel >> L_GREEN_SHIFT) & 0xff; - if (hlut[hval] == 1 && slut[sval] == 1) { - if (regionflag == L_INCLUDE_REGION) - SET_DATA_BIT(lined, j); - else /* L_EXCLUDE_REGION */ - CLEAR_DATA_BIT(lined, j); - } - } - } - - LEPT_FREE(hlut); - LEPT_FREE(slut); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixMakeRangeMaskHV() - * - * \param[in] pixs 32 bpp rgb - * \param[in] huecenter center value of hue range - * \param[in] huehw half-width of hue range - * \param[in] valcenter center value of max intensity range - * \param[in] valhw half-width of max intensity range - * \param[in] regionflag L_INCLUDE_REGION, L_EXCLUDE_REGION - * \return pixd 1 bpp mask over selected pixels, or NULL on error - * - *
- * Notes:
- *      (1) The pixels are selected based on the specified ranges of
- *          hue and max intensity values.  For selection or exclusion,
- *          the pixel HV component values must be within both ranges.
- *          Care must be taken in finding the hue range because of wrap-around.
- *      (2) Use %regionflag == L_INCLUDE_REGION to take only those
- *          pixels within the rectangular region specified in HV space.
- *          Use %regionflag == L_EXCLUDE_REGION to take all pixels except
- *          those within the rectangular region specified in HV space.
- * 
- */ -PIX * -pixMakeRangeMaskHV(PIX *pixs, - l_int32 huecenter, - l_int32 huehw, - l_int32 valcenter, - l_int32 valhw, - l_int32 regionflag) -{ -l_int32 i, j, w, h, wplt, wpld, hstart, hend, vstart, vend, hval, vval; -l_int32 *hlut, *vlut; -l_uint32 pixel; -l_uint32 *datat, *datad, *linet, *lined; -PIX *pixt, *pixd; - - PROCNAME("pixMakeRangeMaskHV"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - if (regionflag != L_INCLUDE_REGION && regionflag != L_EXCLUDE_REGION) - return (PIX *)ERROR_PTR("invalid regionflag", procName, NULL); - - /* Set up LUTs for hue and maximum intensity (val). These have - * the value 1 within the specified intervals of hue and value. */ - hlut = (l_int32 *)LEPT_CALLOC(240, sizeof(l_int32)); - vlut = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - vstart = L_MAX(0, valcenter - valhw); - vend = L_MIN(255, valcenter + valhw); - for (i = vstart; i <= vend; i++) - vlut[i] = 1; - hstart = (huecenter - huehw + 240) % 240; - hend = (huecenter + huehw + 240) % 240; - if (hstart < hend) { - for (i = hstart; i <= hend; i++) - hlut[i] = 1; - } else { - for (i = hstart; i < 240; i++) - hlut[i] = 1; - for (i = 0; i <= hend; i++) - hlut[i] = 1; - } - - /* Generate the mask */ - pixt = pixConvertRGBToHSV(NULL, pixs); - pixGetDimensions(pixs, &w, &h, NULL); - pixd = pixCreateNoInit(w, h, 1); - if (regionflag == L_INCLUDE_REGION) - pixClearAll(pixd); - else /* L_EXCLUDE_REGION */ - pixSetAll(pixd); - datat = pixGetData(pixt); - datad = pixGetData(pixd); - wplt = pixGetWpl(pixt); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - pixel = linet[j]; - hval = (pixel >> L_RED_SHIFT) & 0xff; - vval = (pixel >> L_BLUE_SHIFT) & 0xff; - if (hlut[hval] == 1 && vlut[vval] == 1) { - if (regionflag == L_INCLUDE_REGION) - SET_DATA_BIT(lined, j); - else /* L_EXCLUDE_REGION */ - CLEAR_DATA_BIT(lined, j); - } - } - } - - LEPT_FREE(hlut); - LEPT_FREE(vlut); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixMakeRangeMaskSV() - * - * \param[in] pixs 32 bpp rgb - * \param[in] satcenter center value of saturation range - * \param[in] sathw half-width of saturation range - * \param[in] valcenter center value of max intensity range - * \param[in] valhw half-width of max intensity range - * \param[in] regionflag L_INCLUDE_REGION, L_EXCLUDE_REGION - * \return pixd 1 bpp mask over selected pixels, or NULL on error - * - *
- * Notes:
- *      (1) The pixels are selected based on the specified ranges of
- *          saturation and max intensity (val).  For selection or
- *          exclusion, the pixel SV component values must be within both ranges.
- *      (2) Use %regionflag == L_INCLUDE_REGION to take only those
- *          pixels within the rectangular region specified in SV space.
- *          Use %regionflag == L_EXCLUDE_REGION to take all pixels except
- *          those within the rectangular region specified in SV space.
- * 
- */ -PIX * -pixMakeRangeMaskSV(PIX *pixs, - l_int32 satcenter, - l_int32 sathw, - l_int32 valcenter, - l_int32 valhw, - l_int32 regionflag) -{ -l_int32 i, j, w, h, wplt, wpld, sval, vval, sstart, send, vstart, vend; -l_int32 *slut, *vlut; -l_uint32 pixel; -l_uint32 *datat, *datad, *linet, *lined; -PIX *pixt, *pixd; - - PROCNAME("pixMakeRangeMaskSV"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - if (regionflag != L_INCLUDE_REGION && regionflag != L_EXCLUDE_REGION) - return (PIX *)ERROR_PTR("invalid regionflag", procName, NULL); - - /* Set up LUTs for saturation and max intensity (val). - * These have the value 1 within the specified intervals of - * saturation and max intensity. */ - slut = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - vlut = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - sstart = L_MAX(0, satcenter - sathw); - send = L_MIN(255, satcenter + sathw); - vstart = L_MAX(0, valcenter - valhw); - vend = L_MIN(255, valcenter + valhw); - for (i = sstart; i <= send; i++) - slut[i] = 1; - for (i = vstart; i <= vend; i++) - vlut[i] = 1; - - /* Generate the mask */ - pixt = pixConvertRGBToHSV(NULL, pixs); - pixGetDimensions(pixs, &w, &h, NULL); - pixd = pixCreateNoInit(w, h, 1); - if (regionflag == L_INCLUDE_REGION) - pixClearAll(pixd); - else /* L_EXCLUDE_REGION */ - pixSetAll(pixd); - datat = pixGetData(pixt); - datad = pixGetData(pixd); - wplt = pixGetWpl(pixt); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - pixel = linet[j]; - sval = (pixel >> L_GREEN_SHIFT) & 0xff; - vval = (pixel >> L_BLUE_SHIFT) & 0xff; - if (slut[sval] == 1 && vlut[vval] == 1) { - if (regionflag == L_INCLUDE_REGION) - SET_DATA_BIT(lined, j); - else /* L_EXCLUDE_REGION */ - CLEAR_DATA_BIT(lined, j); - } - } - } - - LEPT_FREE(slut); - LEPT_FREE(vlut); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixMakeHistoHS() - * - * \param[in] pixs HSV colorspace - * \param[in] factor subsampling factor; integer - * \param[out] pnahue [optional] hue histogram - * \param[out] pnasat [optional] saturation histogram - * \return pixd 32 bpp histogram in hue and saturation, or NULL on error - * - *
- * Notes:
- *      (1) pixs is a 32 bpp image in HSV colorspace; hue is in the "red"
- *          byte, saturation is in the "green" byte.
- *      (2) In pixd, hue is displayed vertically; saturation horizontally.
- *          The dimensions of pixd are w = 256, h = 240, and the depth
- *          is 32 bpp.  The value at each point is simply the number
- *          of pixels found at that value of hue and saturation.
- * 
- */ -PIX * -pixMakeHistoHS(PIX *pixs, - l_int32 factor, - NUMA **pnahue, - NUMA **pnasat) -{ -l_int32 i, j, w, h, wplt, hval, sval, nd; -l_uint32 pixel; -l_uint32 *datat, *linet; -void **lined32; -NUMA *nahue, *nasat; -PIX *pixt, *pixd; - - PROCNAME("pixMakeHistoHS"); - - if (pnahue) *pnahue = NULL; - if (pnasat) *pnasat = NULL; - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - - if (pnahue) { - nahue = numaCreate(240); - numaSetCount(nahue, 240); - *pnahue = nahue; - } - if (pnasat) { - nasat = numaCreate(256); - numaSetCount(nasat, 256); - *pnasat = nasat; - } - - if (factor <= 1) - pixt = pixClone(pixs); - else - pixt = pixScaleBySampling(pixs, 1.0 / (l_float32)factor, - 1.0 / (l_float32)factor); - - /* Create the hue-saturation histogram */ - pixd = pixCreate(256, 240, 32); - lined32 = pixGetLinePtrs(pixd, NULL); - pixGetDimensions(pixt, &w, &h, NULL); - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - for (i = 0; i < h; i++) { - linet = datat + i * wplt; - for (j = 0; j < w; j++) { - pixel = linet[j]; - hval = (pixel >> L_RED_SHIFT) & 0xff; - -#if DEBUG_HISTO - if (hval > 239) { - lept_stderr("hval = %d for (%d,%d)\n", hval, i, j); - continue; - } -#endif /* DEBUG_HISTO */ - - sval = (pixel >> L_GREEN_SHIFT) & 0xff; - if (pnahue) - numaShiftValue(nahue, hval, 1.0); - if (pnasat) - numaShiftValue(nasat, sval, 1.0); - nd = GET_DATA_FOUR_BYTES(lined32[hval], sval); - SET_DATA_FOUR_BYTES(lined32[hval], sval, nd + 1); - } - } - - LEPT_FREE(lined32); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixMakeHistoHV() - * - * \param[in] pixs HSV colorspace - * \param[in] factor subsampling factor; integer - * \param[out] pnahue [optional] hue histogram - * \param[out] pnaval [optional] max intensity (value) histogram - * \return pixd 32 bpp histogram in hue and value, or NULL on error - * - *
- * Notes:
- *      (1) %pixs is a 32 bpp image in HSV colorspace; hue is in the "red"
- *          byte, max intensity ("value") is in the "blue" byte.
- *      (2) In %pixd, hue is displayed vertically; intensity horizontally.
- *          The dimensions of %pixd are w = 256, h = 240, and the depth
- *          is 32 bpp.  The value at each point is simply the number
- *          of pixels found at that value of hue and intensity.
- * 
- */ -PIX * -pixMakeHistoHV(PIX *pixs, - l_int32 factor, - NUMA **pnahue, - NUMA **pnaval) -{ -l_int32 i, j, w, h, wplt, hval, vval, nd; -l_uint32 pixel; -l_uint32 *datat, *linet; -void **lined32; -NUMA *nahue, *naval; -PIX *pixt, *pixd; - - PROCNAME("pixMakeHistoHV"); - - if (pnahue) *pnahue = NULL; - if (pnaval) *pnaval = NULL; - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - - if (pnahue) { - nahue = numaCreate(240); - numaSetCount(nahue, 240); - *pnahue = nahue; - } - if (pnaval) { - naval = numaCreate(256); - numaSetCount(naval, 256); - *pnaval = naval; - } - - if (factor <= 1) - pixt = pixClone(pixs); - else - pixt = pixScaleBySampling(pixs, 1.0 / (l_float32)factor, - 1.0 / (l_float32)factor); - - /* Create the hue-value histogram */ - pixd = pixCreate(256, 240, 32); - lined32 = pixGetLinePtrs(pixd, NULL); - pixGetDimensions(pixt, &w, &h, NULL); - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - for (i = 0; i < h; i++) { - linet = datat + i * wplt; - for (j = 0; j < w; j++) { - pixel = linet[j]; - hval = (pixel >> L_RED_SHIFT) & 0xff; - vval = (pixel >> L_BLUE_SHIFT) & 0xff; - if (pnahue) - numaShiftValue(nahue, hval, 1.0); - if (pnaval) - numaShiftValue(naval, vval, 1.0); - nd = GET_DATA_FOUR_BYTES(lined32[hval], vval); - SET_DATA_FOUR_BYTES(lined32[hval], vval, nd + 1); - } - } - - LEPT_FREE(lined32); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixMakeHistoSV() - * - * \param[in] pixs HSV colorspace - * \param[in] factor subsampling factor; integer - * \param[out] pnasat [optional] sat histogram - * \param[out] pnaval [optional] max intensity (value) histogram - * \return pixd 32 bpp histogram in sat and value, or NULL on error - * - *
- * Notes:
- *      (1) %pixs is a 32 bpp image in HSV colorspace; sat is in the "green"
- *          byte, max intensity ("value") is in the "blue" byte.
- *      (2) In %pixd, sat is displayed vertically; intensity horizontally.
- *          The dimensions of %pixd are w = 256, h = 256, and the depth
- *          is 32 bpp.  The value at each point is simply the number
- *          of pixels found at that value of saturation and intensity.
- * 
- */ -PIX * -pixMakeHistoSV(PIX *pixs, - l_int32 factor, - NUMA **pnasat, - NUMA **pnaval) -{ -l_int32 i, j, w, h, wplt, sval, vval, nd; -l_uint32 pixel; -l_uint32 *datat, *linet; -void **lined32; -NUMA *nasat, *naval; -PIX *pixt, *pixd; - - PROCNAME("pixMakeHistoSV"); - - if (pnasat) *pnasat = NULL; - if (pnaval) *pnaval = NULL; - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - - if (pnasat) { - nasat = numaCreate(256); - numaSetCount(nasat, 256); - *pnasat = nasat; - } - if (pnaval) { - naval = numaCreate(256); - numaSetCount(naval, 256); - *pnaval = naval; - } - - if (factor <= 1) - pixt = pixClone(pixs); - else - pixt = pixScaleBySampling(pixs, 1.0 / (l_float32)factor, - 1.0 / (l_float32)factor); - - /* Create the hue-value histogram */ - pixd = pixCreate(256, 256, 32); - lined32 = pixGetLinePtrs(pixd, NULL); - pixGetDimensions(pixt, &w, &h, NULL); - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - for (i = 0; i < h; i++) { - linet = datat + i * wplt; - for (j = 0; j < w; j++) { - pixel = linet[j]; - sval = (pixel >> L_GREEN_SHIFT) & 0xff; - vval = (pixel >> L_BLUE_SHIFT) & 0xff; - if (pnasat) - numaShiftValue(nasat, sval, 1.0); - if (pnaval) - numaShiftValue(naval, vval, 1.0); - nd = GET_DATA_FOUR_BYTES(lined32[sval], vval); - SET_DATA_FOUR_BYTES(lined32[sval], vval, nd + 1); - } - } - - LEPT_FREE(lined32); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixFindHistoPeaksHSV() - * - * \param[in] pixs 32 bpp; HS, HV or SV histogram; not changed - * \param[in] type L_HS_HISTO, L_HV_HISTO or L_SV_HISTO - * \param[in] width half width of sliding window - * \param[in] height half height of sliding window - * \param[in] npeaks number of peaks to look for - * \param[in] erasefactor ratio of erase window size to sliding window size - * \param[out] ppta locations of max for each integrated peak area - * \param[out] pnatot integrated peak areas - * \param[out] ppixa [optional] pixa for debugging; NULL to skip - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) %pixs is a 32 bpp histogram in a pair of HSV colorspace.  It
- *          should be thought of as a single sample with 32 bps (bits/sample).
- *      (2) After each peak is found, the peak is erased with a window
- *          that is centered on the peak and scaled from the sliding
- *          window by %erasefactor.  Typically, %erasefactor is chosen
- *          to be > 1.0.
- *      (3) Data for a maximum of %npeaks is returned in %pta and %natot.
- *      (4) For debugging, after the pixa is returned, display with:
- *          pixd = pixaDisplayTiledInRows(pixa, 32, 1000, 1.0, 0, 30, 2);
- * 
- */ -l_ok -pixFindHistoPeaksHSV(PIX *pixs, - l_int32 type, - l_int32 width, - l_int32 height, - l_int32 npeaks, - l_float32 erasefactor, - PTA **ppta, - NUMA **pnatot, - PIXA **ppixa) -{ -l_int32 i, xmax, ymax, ewidth, eheight; -l_uint32 maxval; -BOX *box; -NUMA *natot; -PIX *pixh, *pixw, *pix1, *pix2, *pix3; -PTA *pta; - - PROCNAME("pixFindHistoPeaksHSV"); - - if (ppixa) *ppixa = NULL; - if (ppta) *ppta = NULL; - if (pnatot) *pnatot = NULL; - if (!pixs || pixGetDepth(pixs) != 32) - return ERROR_INT("pixs undefined or not 32 bpp", procName, 1); - if (!ppta || !pnatot) - return ERROR_INT("&pta and &natot not both defined", procName, 1); - if (type != L_HS_HISTO && type != L_HV_HISTO && type != L_SV_HISTO) - return ERROR_INT("invalid HSV histo type", procName, 1); - - if ((pta = ptaCreate(npeaks)) == NULL) - return ERROR_INT("pta not made", procName, 1); - *ppta = pta; - if ((natot = numaCreate(npeaks)) == NULL) - return ERROR_INT("natot not made", procName, 1); - *pnatot = natot; - - *ppta = pta; - if (type == L_SV_HISTO) - pixh = pixAddMirroredBorder(pixs, width + 1, width + 1, height + 1, - height + 1); - else /* type == L_HS_HISTO or type == L_HV_HISTO */ - pixh = pixAddMixedBorder(pixs, width + 1, width + 1, height + 1, - height + 1); - - /* Get the total count in the sliding window. If the window - * fully covers the peak, this will be the integrated - * volume under the peak. */ - pixw = pixWindowedMean(pixh, width, height, 1, 0); - pixDestroy(&pixh); - - /* Sequentially identify and erase peaks in the histogram. - * If requested for debugging, save a pixa of the sequence of - * false color histograms. */ - if (ppixa) - *ppixa = pixaCreate(0); - for (i = 0; i < npeaks; i++) { - pixGetMaxValueInRect(pixw, NULL, &maxval, &xmax, &ymax); - if (maxval == 0) break; - numaAddNumber(natot, maxval); - ptaAddPt(pta, xmax, ymax); - ewidth = (l_int32)(width * erasefactor); - eheight = (l_int32)(height * erasefactor); - box = boxCreate(xmax - ewidth, ymax - eheight, 2 * ewidth + 1, - 2 * eheight + 1); - - if (ppixa) { - pix1 = pixMaxDynamicRange(pixw, L_LINEAR_SCALE); - pixaAddPix(*ppixa, pix1, L_INSERT); - pix2 = pixConvertGrayToFalseColor(pix1, 1.0); - pixaAddPix(*ppixa, pix2, L_INSERT); - pix1 = pixMaxDynamicRange(pixw, L_LOG_SCALE); - pix2 = pixConvertGrayToFalseColor(pix1, 1.0); - pixaAddPix(*ppixa, pix2, L_INSERT); - pix3 = pixConvertTo32(pix1); - pixRenderHashBoxArb(pix3, box, 6, 2, L_NEG_SLOPE_LINE, - 1, 255, 100, 100); - pixaAddPix(*ppixa, pix3, L_INSERT); - pixDestroy(&pix1); - } - - pixClearInRect(pixw, box); - boxDestroy(&box); - if (type == L_HS_HISTO || type == L_HV_HISTO) { - /* clear wraps at bottom and top */ - if (ymax - eheight < 0) { /* overlap to bottom */ - box = boxCreate(xmax - ewidth, 240 + ymax - eheight, - 2 * ewidth + 1, eheight - ymax); - } else if (ymax + eheight > 239) { /* overlap to top */ - box = boxCreate(xmax - ewidth, 0, 2 * ewidth + 1, - ymax + eheight - 239); - } else { - box = NULL; - } - if (box) { - pixClearInRect(pixw, box); - boxDestroy(&box); - } - } - } - - pixDestroy(&pixw); - return 0; -} - - -/*! - * \brief displayHSVColorRange() - * - * \param[in] hval hue center value; in range [0 ... 240] - * \param[in] sval saturation center value; in range [0 ... 255] - * \param[in] vval max intensity value; in range [0 ... 255] - * \param[in] huehw half-width of hue range; > 0 - * \param[in] sathw half-width of saturation range; > 0 - * \param[in] nsamp number of samplings in each half-width in hue and sat - * \param[in] factor linear size of each color square, in pixels; > 3 - * \return pixd 32 bpp set of color squares over input range; NULL on error - * - *
- * Notes:
- *      (1) The total number of color samplings in each of the hue
- *          and saturation directions is 2 * nsamp + 1.
- * 
- */ -PIX * -displayHSVColorRange(l_int32 hval, - l_int32 sval, - l_int32 vval, - l_int32 huehw, - l_int32 sathw, - l_int32 nsamp, - l_int32 factor) -{ -l_int32 i, j, w, huedelta, satdelta, hue, sat, rval, gval, bval; -PIX *pixt, *pixd; - - PROCNAME("displayHSVColorRange"); - - if (hval < 0 || hval > 240) - return (PIX *)ERROR_PTR("invalid hval", procName, NULL); - if (huehw < 5 || huehw > 120) - return (PIX *)ERROR_PTR("invalid huehw", procName, NULL); - if (sval - sathw < 0 || sval + sathw > 255) - return (PIX *)ERROR_PTR("invalid sval/sathw", procName, NULL); - if (nsamp < 1 || factor < 3) - return (PIX *)ERROR_PTR("invalid nsamp or rep. factor", procName, NULL); - if (vval < 0 || vval > 255) - return (PIX *)ERROR_PTR("invalid vval", procName, NULL); - - w = (2 * nsamp + 1); - huedelta = (l_int32)((l_float32)huehw / (l_float32)nsamp); - satdelta = (l_int32)((l_float32)sathw / (l_float32)nsamp); - pixt = pixCreate(w, w, 32); - for (i = 0; i < w; i++) { - hue = hval + huedelta * (i - nsamp); - if (hue < 0) hue += 240; - if (hue >= 240) hue -= 240; - for (j = 0; j < w; j++) { - sat = sval + satdelta * (j - nsamp); - convertHSVToRGB(hue, sat, vval, &rval, &gval, &bval); - pixSetRGBPixel(pixt, j, i, rval, gval, bval); - } - } - - pixd = pixExpandReplicate(pixt, factor); - pixDestroy(&pixt); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Colorspace conversion between RGB and YUV * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertRGBToYUV() - * - * \param[in] pixd can be NULL; if not NULL, must == pixs - * \param[in] pixs - * \return pixd always - * - *
- * Notes:
- *      (1) For pixs = pixd, this is in-place; otherwise pixd must be NULL.
- *      (2) The Y, U and V values are stored in the same places as
- *          the r, g and b values, respectively.  Here, they are explicitly
- *          placed in the 3 MS bytes in the pixel.
- *      (3) Normalizing to 1 and considering the r,g,b components,
- *          a simple way to understand the YUV space is:
- *           ~ Y = weighted sum of (r,g,b)
- *           ~ U = weighted difference between Y and B
- *           ~ V = weighted difference between Y and R
- *      (4) Following video conventions, Y, U and V are in the range:
- *             Y: [16, 235]
- *             U: [16, 240]
- *             V: [16, 240]
- *      (5) For the coefficients in the transform matrices, see eq. 4 in
- *          "Frequently Asked Questions about Color" by Charles Poynton,
- *          //http://user.engineering.uiowa.edu/~aip/Misc/ColorFAQ.html
- * 
- */ -PIX * -pixConvertRGBToYUV(PIX *pixd, - PIX *pixs) -{ -l_int32 w, h, d, wpl, i, j, rval, gval, bval, yval, uval, vval; -l_uint32 *line, *data; -PIXCMAP *cmap; - - PROCNAME("pixConvertRGBToYUV"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixd && pixd != pixs) - return (PIX *)ERROR_PTR("pixd defined and not inplace", procName, pixd); - - d = pixGetDepth(pixs); - cmap = pixGetColormap(pixs); - if (!cmap && d != 32) - return (PIX *)ERROR_PTR("not cmapped or rgb", procName, pixd); - - if (!pixd) - pixd = pixCopy(NULL, pixs); - - cmap = pixGetColormap(pixd); - if (cmap) { /* just convert the colormap */ - pixcmapConvertRGBToYUV(cmap); - return pixd; - } - - /* Convert RGB image */ - pixGetDimensions(pixd, &w, &h, NULL); - wpl = pixGetWpl(pixd); - data = pixGetData(pixd); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - extractRGBValues(line[j], &rval, &gval, &bval); - convertRGBToYUV(rval, gval, bval, &yval, &uval, &vval); - line[j] = (yval << 24) | (uval << 16) | (vval << 8); - } - } - - return pixd; -} - - -/*! - * \brief pixConvertYUVToRGB() - * - * \param[in] pixd can be NULL; if not NULL, must == pixs - * \param[in] pixs - * \return pixd always - * - *
- * Notes:
- *      (1) For pixs = pixd, this is in-place; otherwise pixd must be NULL.
- *      (2) The user takes responsibility for making sure that pixs is
- *          in YUV space.
- *      (3) The Y, U and V values are stored in the same places as
- *          the r, g and b values, respectively.  Here, they are explicitly
- *          placed in the 3 MS bytes in the pixel.
- * 
- */ -PIX * -pixConvertYUVToRGB(PIX *pixd, - PIX *pixs) -{ -l_int32 w, h, d, wpl, i, j, rval, gval, bval, yval, uval, vval; -l_uint32 pixel; -l_uint32 *line, *data; -PIXCMAP *cmap; - - PROCNAME("pixConvertYUVToRGB"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixd && pixd != pixs) - return (PIX *)ERROR_PTR("pixd defined and not inplace", procName, pixd); - - d = pixGetDepth(pixs); - cmap = pixGetColormap(pixs); - if (!cmap && d != 32) - return (PIX *)ERROR_PTR("not cmapped or hsv", procName, pixd); - - if (!pixd) - pixd = pixCopy(NULL, pixs); - - cmap = pixGetColormap(pixd); - if (cmap) { /* just convert the colormap */ - pixcmapConvertYUVToRGB(cmap); - return pixd; - } - - /* Convert YUV image */ - pixGetDimensions(pixd, &w, &h, NULL); - wpl = pixGetWpl(pixd); - data = pixGetData(pixd); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - pixel = line[j]; - yval = pixel >> 24; - uval = (pixel >> 16) & 0xff; - vval = (pixel >> 8) & 0xff; - convertYUVToRGB(yval, uval, vval, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, line + j); - } - } - - return pixd; -} - - -/*! - * \brief convertRGBToYUV() - * - * \param[in] rval, gval, bval RGB input - * \param[out] pyval, puval, pvval equivalent YUV values - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The range of returned values is:
- *            Y [16 ... 235]
- *            U [16 ... 240]
- *            V [16 ... 240]
- * 
- */ -l_ok -convertRGBToYUV(l_int32 rval, - l_int32 gval, - l_int32 bval, - l_int32 *pyval, - l_int32 *puval, - l_int32 *pvval) -{ -l_float32 norm; - - PROCNAME("convertRGBToYUV"); - - if (pyval) *pyval = 0; - if (puval) *puval = 0; - if (pvval) *pvval = 0; - if (!pyval || !puval || !pvval) - return ERROR_INT("&yval, &uval, &vval not all defined", procName, 1); - - norm = 1.0 / 256.; - *pyval = (l_int32)(16.0 + - norm * (65.738 * rval + 129.057 * gval + 25.064 * bval) + 0.5); - *puval = (l_int32)(128.0 + - norm * (-37.945 * rval -74.494 * gval + 112.439 * bval) + 0.5); - *pvval = (l_int32)(128.0 + - norm * (112.439 * rval - 94.154 * gval - 18.285 * bval) + 0.5); - return 0; -} - - -/*! - * \brief convertYUVToRGB() - * - * \param[in] yval, uval, vval YUV input - * \param[out] prval, pgval, pbval equivalent RGB values - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The range of valid input values is:
- *            Y [16 ... 235]
- *            U [16 ... 240]
- *            V [16 ... 240]
- *      (2) Conversion of RGB --> YUV --> RGB leaves the image unchanged.
- *      (3) The YUV gamut is larger than the RBG gamut; many YUV values
- *          will result in an invalid RGB value.  We clip individual
- *          r,g,b components to the range [0, 255], and do not test input.
- * 
- */ -l_ok -convertYUVToRGB(l_int32 yval, - l_int32 uval, - l_int32 vval, - l_int32 *prval, - l_int32 *pgval, - l_int32 *pbval) -{ -l_int32 rval, gval, bval; -l_float32 norm, ym, um, vm; - - PROCNAME("convertYUVToRGB"); - - if (prval) *prval = 0; - if (pgval) *pgval = 0; - if (pbval) *pbval = 0; - if (!prval || !pgval || !pbval) - return ERROR_INT("&rval, &gval, &bval not all defined", procName, 1); - - norm = 1.0 / 256.; - ym = yval - 16.0; - um = uval - 128.0; - vm = vval - 128.0; - rval = (l_int32)(norm * (298.082 * ym + 408.583 * vm) + 0.5); - gval = (l_int32)(norm * (298.082 * ym - 100.291 * um - 208.120 * vm) + - 0.5); - bval = (l_int32)(norm * (298.082 * ym + 516.411 * um) + 0.5); - *prval = L_MIN(255, L_MAX(0, rval)); - *pgval = L_MIN(255, L_MAX(0, gval)); - *pbval = L_MIN(255, L_MAX(0, bval)); - - return 0; -} - - -/*! - * \brief pixcmapConvertRGBToYUV() - * - * \param[in] cmap - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      ~ in-place transform
- *      ~ See convertRGBToYUV() for def'n of YUV space.
- *      ~ replaces: r --> y, g --> u, b --> v
- * 
- */ -l_ok -pixcmapConvertRGBToYUV(PIXCMAP *cmap) -{ -l_int32 i, ncolors, rval, gval, bval, yval, uval, vval; - - PROCNAME("pixcmapConvertRGBToYUV"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - ncolors = pixcmapGetCount(cmap); - for (i = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - convertRGBToYUV(rval, gval, bval, &yval, &uval, &vval); - pixcmapResetColor(cmap, i, yval, uval, vval); - } - return 0; -} - - -/*! - * \brief pixcmapConvertYUVToRGB() - * - * \param[in] cmap - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      ~ in-place transform
- *      ~ See convertRGBToYUV() for def'n of YUV space.
- *      ~ replaces: y --> r, u --> g, v --> b
- * 
- */ -l_ok -pixcmapConvertYUVToRGB(PIXCMAP *cmap) -{ -l_int32 i, ncolors, rval, gval, bval, yval, uval, vval; - - PROCNAME("pixcmapConvertYUVToRGB"); - - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - ncolors = pixcmapGetCount(cmap); - for (i = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &yval, &uval, &vval); - convertYUVToRGB(yval, uval, vval, &rval, &gval, &bval); - pixcmapResetColor(cmap, i, rval, gval, bval); - } - return 0; -} - - -/*---------------------------------------------------------------------------* - * Colorspace conversion between RGB and XYZ * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertRGBToXYZ() - * - * \param[in] pixs 32 bpp rgb - * \return fpixa xyz - * - *
- * Notes:
- *      (1) The [x,y,z] values are stored as float values in three fpix
- *          that are returned in a fpixa.
- *      (2) The XYZ color space was defined in 1931 as a reference model that
- *          simulates human color perception.  When Y is taken as luminance,
- *          the values of X and Z constitute a color plane representing
- *          all the hues that can be perceived.  This gamut of colors
- *          is larger than the gamuts that can be displayed or printed.
- *          For example, although all rgb values map to XYZ, the converse
- *          is not true.
- *      (3) The value of the coefficients depends on the illuminant.  We use
- *          coefficients for converting sRGB under D65 (the spectrum from
- *          a 6500 degree K black body; an approximation to daylight color).
- *          See, e.g.,
- *             http://www.cs.rit.edu/~ncs/color/t_convert.html
- *          For more general information on color transforms, see:
- *             http://www.brucelindbloom.com/
- *             http://user.engineering.uiowa.edu/~aip/Misc/ColorFAQ.html
- *             http://en.wikipedia.org/wiki/CIE_1931_color_space
- * 
- */ -FPIXA * -pixConvertRGBToXYZ(PIX *pixs) -{ -l_int32 w, h, wpls, wpld, i, j, rval, gval, bval; -l_uint32 *lines, *datas; -l_float32 fxval, fyval, fzval; -l_float32 *linex, *liney, *linez, *datax, *datay, *dataz; -FPIX *fpix; -FPIXA *fpixa; - - PROCNAME("pixConvertRGBToXYZ"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (FPIXA *)ERROR_PTR("pixs undefined or not rgb", procName, NULL); - - /* Convert RGB image */ - pixGetDimensions(pixs, &w, &h, NULL); - fpixa = fpixaCreate(3); - for (i = 0; i < 3; i++) { - fpix = fpixCreate(w, h); - fpixaAddFPix(fpixa, fpix, L_INSERT); - } - wpls = pixGetWpl(pixs); - wpld = fpixGetWpl(fpix); - datas = pixGetData(pixs); - datax = fpixaGetData(fpixa, 0); - datay = fpixaGetData(fpixa, 1); - dataz = fpixaGetData(fpixa, 2); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linex = datax + i * wpld; - liney = datay + i * wpld; - linez = dataz + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - convertRGBToXYZ(rval, gval, bval, &fxval, &fyval, &fzval); - *(linex + j) = fxval; - *(liney + j) = fyval; - *(linez + j) = fzval; - } - } - - return fpixa; -} - - -/*! - * \brief fpixaConvertXYZToRGB() - * - * \param[in] fpixa three fpix: x,y,z - * \return pixd 32 bpp rgb - * - *
- * Notes:
- *      (1) The xyz image is stored in three fpix.
- *      (2) For values of xyz that are out of gamut for rgb, the rgb
- *          components are set to the closest valid color.
- * 
- */ -PIX * -fpixaConvertXYZToRGB(FPIXA *fpixa) -{ -l_int32 w, h, wpls, wpld, i, j, rval, gval, bval; -l_float32 fxval, fyval, fzval; -l_float32 *linex, *liney, *linez, *datax, *datay, *dataz; -l_uint32 *lined, *datad; -PIX *pixd; -FPIX *fpix; - - PROCNAME("fpixaConvertXYZToRGB"); - - if (!fpixa || fpixaGetCount(fpixa) != 3) - return (PIX *)ERROR_PTR("fpixa undefined or invalid", procName, NULL); - - /* Convert XYZ image */ - if (fpixaGetFPixDimensions(fpixa, 0, &w, &h)) - return (PIX *)ERROR_PTR("fpixa dimensions not found", procName, NULL); - pixd = pixCreate(w, h, 32); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - datax = fpixaGetData(fpixa, 0); - datay = fpixaGetData(fpixa, 1); - dataz = fpixaGetData(fpixa, 2); - fpix = fpixaGetFPix(fpixa, 0, L_CLONE); - wpls = fpixGetWpl(fpix); - fpixDestroy(&fpix); - for (i = 0; i < h; i++) { - linex = datax + i * wpls; - liney = datay + i * wpls; - linez = dataz + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - fxval = linex[j]; - fyval = liney[j]; - fzval = linez[j]; - convertXYZToRGB(fxval, fyval, fzval, 0, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, lined + j); - } - } - - return pixd; -} - - -/*! - * \brief convertRGBToXYZ() - * - * \param[in] rval, gval, bval rgb input - * \param[out] pfxval, pfyval, pfzval equivalent xyz values - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) These conversions are for illuminant D65 acting on linear sRGB
- *          values.
- * 
- */ -l_ok -convertRGBToXYZ(l_int32 rval, - l_int32 gval, - l_int32 bval, - l_float32 *pfxval, - l_float32 *pfyval, - l_float32 *pfzval) -{ - PROCNAME("convertRGBToXYZ"); - - if (pfxval) *pfxval = 0.0; - if (pfyval) *pfyval = 0.0; - if (pfzval) *pfzval = 0.0; - if (!pfxval || !pfyval || !pfzval) - return ERROR_INT("&xval, &yval, &zval not all defined", procName, 1); - - *pfxval = 0.4125 * rval + 0.3576 * gval + 0.1804 * bval; - *pfyval = 0.2127 * rval + 0.7152 * gval + 0.0722 * bval; - *pfzval = 0.0193 * rval + 0.1192 * gval + 0.9502 * bval; - return 0; -} - - -/*! - * \brief convertXYZToRGB() - * - * \param[in] fxval, fyval, fzval - * \param[in] blackout 0 to output nearest color if out of gamut; - * 1 to output black - * \param[out] prval, pgval, pbval 32 bpp rgb values - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) For values of xyz that are out of gamut for rgb, at least
- *          one of the r, g or b components will be either less than 0
- *          or greater than 255.  For that situation:
- *            * if %blackout == 0, the individual component(s) that are out
- *              of gamut will be set to 0 or 255, respectively.
- *            * if %blackout == 1, the output color will be set to black
- * 
- */ -l_ok -convertXYZToRGB(l_float32 fxval, - l_float32 fyval, - l_float32 fzval, - l_int32 blackout, - l_int32 *prval, - l_int32 *pgval, - l_int32 *pbval) -{ -l_int32 rval, gval, bval; - - PROCNAME("convertXYZToRGB"); - - if (prval) *prval = 0; - if (pgval) *pgval = 0; - if (pbval) *pbval = 0; - if (!prval || !pgval ||!pbval) - return ERROR_INT("&rval, &gval, &bval not all defined", procName, 1); - *prval = *pgval = *pbval = 0; - - rval = (l_int32)(3.2405 * fxval - 1.5372 * fyval - 0.4985 * fzval + 0.5); - gval = (l_int32)(-0.9693 * fxval + 1.8760 * fyval + 0.0416 * fzval + 0.5); - bval = (l_int32)(0.0556 * fxval - 0.2040 * fyval + 1.0573 * fzval + 0.5); - if (blackout == 0) { /* the usual situation; use nearest rgb color */ - *prval = L_MAX(0, L_MIN(rval, 255)); - *pgval = L_MAX(0, L_MIN(gval, 255)); - *pbval = L_MAX(0, L_MIN(bval, 255)); - } else { /* use black for out of gamut */ - if (rval >= 0 && rval < 256 && gval >= 0 && gval < 256 && - bval >= 0 && bval < 256) { /* in gamut */ - *prval = rval; - *pgval = gval; - *pbval = bval; - } - } - return 0; -} - - -/*---------------------------------------------------------------------------* - * Colorspace conversion between XYZ and LAB * - *---------------------------------------------------------------------------*/ -/*! - * \brief fpixaConvertXYZToLAB() - * - * \param[in] fpixas xyz - * \return fpixa lab - * - *
- * Notes:
- *      (1) The input [x,y,z] and output [l,a,b] values are stored as
- *          float values, each set in three fpix.
- *      (2) The CIE LAB color space was invented in 1976, as an
- *          absolute reference for specifying colors that we can
- *          perceive, independently of the rendering device.  It was
- *          invented to align color display and print images.
- *          For information, see:
- *             http://www.brucelindbloom.com/
- *             http://en.wikipedia.org/wiki/Lab_color_space
- * 
- */ -FPIXA * -fpixaConvertXYZToLAB(FPIXA *fpixas) -{ -l_int32 w, h, wpl, i, j; -l_float32 fxval, fyval, fzval, flval, faval, fbval; -l_float32 *linex, *liney, *linez, *datax, *datay, *dataz; -l_float32 *linel, *linea, *lineb, *datal, *dataa, *datab; -FPIX *fpix; -FPIXA *fpixad; - - PROCNAME("fpixaConvertXYZToLAB"); - - if (!fpixas || fpixaGetCount(fpixas) != 3) - return (FPIXA *)ERROR_PTR("fpixas undefined/invalid", procName, NULL); - - /* Convert XYZ image */ - if (fpixaGetFPixDimensions(fpixas, 0, &w, &h)) - return (FPIXA *)ERROR_PTR("fpixas sizes not found", procName, NULL); - fpixad = fpixaCreate(3); - for (i = 0; i < 3; i++) { - fpix = fpixCreate(w, h); - fpixaAddFPix(fpixad, fpix, L_INSERT); - } - wpl = fpixGetWpl(fpix); - datax = fpixaGetData(fpixas, 0); - datay = fpixaGetData(fpixas, 1); - dataz = fpixaGetData(fpixas, 2); - datal = fpixaGetData(fpixad, 0); - dataa = fpixaGetData(fpixad, 1); - datab = fpixaGetData(fpixad, 2); - - /* Convert XYZ image */ - for (i = 0; i < h; i++) { - linex = datax + i * wpl; - liney = datay + i * wpl; - linez = dataz + i * wpl; - linel = datal + i * wpl; - linea = dataa + i * wpl; - lineb = datab + i * wpl; - for (j = 0; j < w; j++) { - fxval = *(linex + j); - fyval = *(liney + j); - fzval = *(linez + j); - convertXYZToLAB(fxval, fyval, fzval, &flval, &faval, &fbval); - *(linel + j) = flval; - *(linea + j) = faval; - *(lineb + j) = fbval; - } - } - - return fpixad; -} - - -/*! - * \brief fpixaConvertLABToXYZ() - * - * \param[in] fpixas lab - * \return fpixa xyz - * - *
- * Notes:
- *      (1) The input [l,a,b] and output [x,y,z] values are stored as
- *          float values, each set in three fpix.
- * 
- */ -FPIXA * -fpixaConvertLABToXYZ(FPIXA *fpixas) -{ -l_int32 w, h, wpl, i, j; -l_float32 fxval, fyval, fzval, flval, faval, fbval; -l_float32 *linel, *linea, *lineb, *datal, *dataa, *datab; -l_float32 *linex, *liney, *linez, *datax, *datay, *dataz; -FPIX *fpix; -FPIXA *fpixad; - - PROCNAME("fpixaConvertLABToXYZ"); - - if (!fpixas || fpixaGetCount(fpixas) != 3) - return (FPIXA *)ERROR_PTR("fpixas undefined/invalid", procName, NULL); - - /* Convert LAB image */ - if (fpixaGetFPixDimensions(fpixas, 0, &w, &h)) - return (FPIXA *)ERROR_PTR("fpixas sizes not found", procName, NULL); - fpixad = fpixaCreate(3); - for (i = 0; i < 3; i++) { - fpix = fpixCreate(w, h); - fpixaAddFPix(fpixad, fpix, L_INSERT); - } - wpl = fpixGetWpl(fpix); - datal = fpixaGetData(fpixas, 0); - dataa = fpixaGetData(fpixas, 1); - datab = fpixaGetData(fpixas, 2); - datax = fpixaGetData(fpixad, 0); - datay = fpixaGetData(fpixad, 1); - dataz = fpixaGetData(fpixad, 2); - - /* Convert XYZ image */ - for (i = 0; i < h; i++) { - linel = datal + i * wpl; - linea = dataa + i * wpl; - lineb = datab + i * wpl; - linex = datax + i * wpl; - liney = datay + i * wpl; - linez = dataz + i * wpl; - for (j = 0; j < w; j++) { - flval = *(linel + j); - faval = *(linea + j); - fbval = *(lineb + j); - convertLABToXYZ(flval, faval, fbval, &fxval, &fyval, &fzval); - *(linex + j) = fxval; - *(liney + j) = fyval; - *(linez + j) = fzval; - } - } - - return fpixad; -} - - -/*! - * \brief convertXYZToLAB() - * - * \param[in] xval, yval, zval input xyz - * \param[out] plval, paval, pbval equivalent lab values - * \return 0 if OK, 1 on error - */ -l_ok -convertXYZToLAB(l_float32 xval, - l_float32 yval, - l_float32 zval, - l_float32 *plval, - l_float32 *paval, - l_float32 *pbval) -{ -l_float32 xn, yn, zn, fx, fy, fz; - - PROCNAME("convertXYZToLAB"); - - if (plval) *plval = 0.0; - if (paval) *paval = 0.0; - if (pbval) *pbval = 0.0; - if (!plval || !paval || !pbval) - return ERROR_INT("&lval, &aval, &bval not all defined", procName, 1); - - /* First normalize to the corresponding white values */ - xn = 0.0041259 * xval; - yn = 0.0039216 * yval; - zn = 0.0036012 * zval; - /* Then apply the lab_forward function */ - fx = lab_forward(xn); - fy = lab_forward(yn); - fz = lab_forward(zn); - *plval = 116.0 * fy - 16.0; - *paval = 500.0 * (fx - fy); - *pbval = 200.0 * (fy - fz); - return 0; -} - - -/*! - * \brief convertLABToXYZ() - * - * \param[in] lval, aval, bval input lab - * \param[out] pxval, pyval, pzval equivalent xyz values - * \return 0 if OK, 1 on error - */ -l_ok -convertLABToXYZ(l_float32 lval, - l_float32 aval, - l_float32 bval, - l_float32 *pxval, - l_float32 *pyval, - l_float32 *pzval) -{ -l_float32 fx, fy, fz; -l_float32 xw = 242.37; /* x component corresponding to rgb white */ -l_float32 yw = 255.0; /* y component corresponding to rgb white */ -l_float32 zw = 277.69; /* z component corresponding to rgb white */ - - PROCNAME("convertLABToXYZ"); - - if (pxval) *pxval = 0.0; - if (pyval) *pyval = 0.0; - if (pzval) *pzval = 0.0; - if (!pxval || !pyval || !pzval) - return ERROR_INT("&xval, &yval, &zval not all defined", procName, 1); - - fy = 0.0086207 * (16.0 + lval); - fx = fy + 0.002 * aval; - fz = fy - 0.005 * bval; - *pxval = xw * lab_reverse(fx); - *pyval = yw * lab_reverse(fy); - *pzval = zw * lab_reverse(fz); - return 0; -} - - -/* - * See http://en.wikipedia.org/wiki/Lab_color_space for formulas. - * This is the forward function: from xyz to lab. It includes a rational - * function approximation over [0.008856 ... 1] to the cube root, from - * "Fast Color Space Transformations Using Minimax Approximations", - * M. Celebi et al, http://arxiv.org/pdf/1009.0854v1.pdf. - */ -static l_float32 -lab_forward(l_float32 v) -{ -const l_float32 f_thresh = 0.008856; /* (6/29)^3 */ -const l_float32 f_factor = 7.787; /* (1/3) * (29/6)^2) */ -const l_float32 f_offset = 0.13793; /* 4/29 */ - - if (v > f_thresh) { -#if SLOW_CUBE_ROOT - return powf(v, 0.333333); -#else - l_float32 num, den; - num = 4.37089e-04 + v * (9.52695e-02 + v * (1.25201 + v * 1.30273)); - den = 3.91236e-03 + v * (2.95408e-01 + v * (1.71714 + v * 6.34341e-01)); - return num / den; -#endif - } else { - return f_factor * v + f_offset; - } -} - - -/* - * See http://en.wikipedia.org/wiki/Lab_color_space for formulas. - * This is the reverse (inverse) function: from lab to xyz. - */ -static l_float32 -lab_reverse(l_float32 v) -{ -const l_float32 r_thresh = 0.20690; /* 6/29 */ -const l_float32 r_factor = 0.12842; /* 3 * (6/29)^2 */ -const l_float32 r_offset = 0.13793; /* 4/29 */ - - if (v > r_thresh) { - return v * v * v; - } else { - return r_factor * (v - r_offset); - } -} - - -/*---------------------------------------------------------------------------* - * Colorspace conversion between RGB and LAB * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertRGBToLAB() - * - * \param[in] pixs 32 bpp rgb - * \return fpixa lab - * - *
- * Notes:
- *      (1) The [l,a,b] values are stored as float values in three fpix
- *          that are returned in a fpixa.
- * 
- */ -FPIXA * -pixConvertRGBToLAB(PIX *pixs) -{ -l_int32 w, h, wpls, wpld, i, j, rval, gval, bval; -l_uint32 *lines, *datas; -l_float32 flval, faval, fbval; -l_float32 *linel, *linea, *lineb, *datal, *dataa, *datab; -FPIX *fpix; -FPIXA *fpixa; - - PROCNAME("pixConvertRGBToLAB"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (FPIXA *)ERROR_PTR("pixs undefined or not rgb", procName, NULL); - - /* Convert RGB image */ - pixGetDimensions(pixs, &w, &h, NULL); - fpixa = fpixaCreate(3); - for (i = 0; i < 3; i++) { - fpix = fpixCreate(w, h); - fpixaAddFPix(fpixa, fpix, L_INSERT); - } - wpls = pixGetWpl(pixs); - wpld = fpixGetWpl(fpix); - datas = pixGetData(pixs); - datal = fpixaGetData(fpixa, 0); - dataa = fpixaGetData(fpixa, 1); - datab = fpixaGetData(fpixa, 2); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linel = datal + i * wpld; - linea = dataa + i * wpld; - lineb = datab + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - convertRGBToLAB(rval, gval, bval, &flval, &faval, &fbval); - *(linel + j) = flval; - *(linea + j) = faval; - *(lineb + j) = fbval; - } - } - - return fpixa; -} - - -/*! - * \brief fpixaConvertLABToRGB() - * - * \param[in] fpixa three fpix: l,a,b - * \return pixd 32 bpp rgb - * - *
- * Notes:
- *      (1) The lab image is stored in three fpix.
- * 
- */ -PIX * -fpixaConvertLABToRGB(FPIXA *fpixa) -{ -l_int32 w, h, wpls, wpld, i, j, rval, gval, bval; -l_float32 flval, faval, fbval; -l_float32 *linel, *linea, *lineb, *datal, *dataa, *datab; -l_uint32 *lined, *datad; -PIX *pixd; -FPIX *fpix; - - PROCNAME("fpixaConvertLABToRGB"); - - if (!fpixa || fpixaGetCount(fpixa) != 3) - return (PIX *)ERROR_PTR("fpixa undefined or invalid", procName, NULL); - - /* Convert LAB image */ - if (fpixaGetFPixDimensions(fpixa, 0, &w, &h)) - return (PIX *)ERROR_PTR("fpixa dimensions not found", procName, NULL); - pixd = pixCreate(w, h, 32); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - datal = fpixaGetData(fpixa, 0); - dataa = fpixaGetData(fpixa, 1); - datab = fpixaGetData(fpixa, 2); - fpix = fpixaGetFPix(fpixa, 0, L_CLONE); - wpls = fpixGetWpl(fpix); - fpixDestroy(&fpix); - for (i = 0; i < h; i++) { - linel = datal + i * wpls; - linea = dataa + i * wpls; - lineb = datab + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - flval = linel[j]; - faval = linea[j]; - fbval = lineb[j]; - convertLABToRGB(flval, faval, fbval, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, lined + j); - } - } - - return pixd; -} - - -/*! - * \brief convertRGBToLAB() - * - * \param[in] rval, gval, bval rgb input - * \param[out] pflval, pfaval, pfbval equivalent lab values - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) These conversions are for illuminant D65 acting on linear sRGB
- *          values.
- * 
- */ -l_ok -convertRGBToLAB(l_int32 rval, - l_int32 gval, - l_int32 bval, - l_float32 *pflval, - l_float32 *pfaval, - l_float32 *pfbval) -{ -l_float32 fxval, fyval, fzval; - - PROCNAME("convertRGBToLAB"); - - if (pflval) *pflval = 0.0; - if (pfaval) *pfaval = 0.0; - if (pfbval) *pfbval = 0.0; - if (!pflval || !pfaval || !pfbval) - return ERROR_INT("&flval, &faval, &fbval not all defined", procName, 1); - - convertRGBToXYZ(rval, gval, bval, &fxval, &fyval, &fzval); - convertXYZToLAB(fxval, fyval, fzval, pflval, pfaval, pfbval); - return 0; -} - - -/*! - * \brief convertLABToRGB() - * - * \param[in] flval, faval, fbval input lab - * \param[out] prval, pgval, pbval equivalent rgb values - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) For values of lab that are out of gamut for rgb, the rgb
- *          components are set to the closest valid color.
- * 
- */ -l_ok -convertLABToRGB(l_float32 flval, - l_float32 faval, - l_float32 fbval, - l_int32 *prval, - l_int32 *pgval, - l_int32 *pbval) -{ -l_float32 fxval, fyval, fzval; - - PROCNAME("convertLABToRGB"); - - if (prval) *prval = 0; - if (pgval) *pgval = 0; - if (pbval) *pbval = 0; - if (!prval || !pgval || !pbval) - return ERROR_INT("&rval, &gval, &bval not all defined", procName, 1); - - convertLABToXYZ(flval, faval, fbval, &fxval, &fyval, &fzval); - convertXYZToRGB(fxval, fyval, fzval, 0, prval, pgval, pbval); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/compare.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/compare.c deleted file mode 100644 index d0800719..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/compare.c +++ /dev/null @@ -1,3607 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file compare.c - *
- *
- *      Test for pix equality
- *           l_int32     pixEqual()
- *           l_int32     pixEqualWithAlpha()
- *           l_int32     pixEqualWithCmap()
- *           l_int32     cmapEqual()
- *           l_int32     pixUsesCmapColor()
- *
- *      Binary correlation
- *           l_int32     pixCorrelationBinary()
- *
- *      Difference of two images of same size
- *           l_int32     pixDisplayDiffBinary()
- *           l_int32     pixCompareBinary()
- *           l_int32     pixCompareGrayOrRGB()
- *           l_int32     pixCompareGray()
- *           l_int32     pixCompareRGB()
- *           l_int32     pixCompareTiled()
- *
- *      Other measures of the difference of two images of the same size
- *           NUMA       *pixCompareRankDifference()
- *           l_int32     pixTestForSimilarity()
- *           l_int32     pixGetDifferenceStats()
- *           NUMA       *pixGetDifferenceHistogram()
- *           l_int32     pixGetPerceptualDiff()
- *           l_int32     pixGetPSNR()
- *
- *      Comparison of photo regions by histogram
- *           l_int32     pixaComparePhotoRegionsByHisto()  -- top-level
- *           l_int32     pixComparePhotoRegionsByHisto()  -- top-level for 2
- *           l_int32     pixGenPhotoHistos()
- *           PIX        *pixPadToCenterCentroid()
- *           l_int32     pixCentroid8()
- *           l_int32     pixDecideIfPhotoImage()
- *       static l_int32  findHistoGridDimensions()
- *           l_int32     compareTilesByHisto()
- *
- *           l_int32     pixCompareGrayByHisto()  -- top-level for 2
- *       static l_int32  pixCompareTilesByHisto()
- *           l_int32     pixCropAlignedToCentroid()
- *
- *           l_uint8    *l_compressGrayHistograms()
- *           NUMAA      *l_uncompressGrayHistograms()
- *
- *      Translated images at the same resolution
- *           l_int32     pixCompareWithTranslation()
- *           l_int32     pixBestCorrelation()
- *
- *  For comparing images using tiled histograms, essentially all the
- *  computation goes into deciding if a region of an image is a photo,
- *  whether that photo region is amenable to similarity measurements
- *  using histograms, and finally the calculation of the gray histograms
- *  for each of the tiled regions.  The actual comparison is essentially
- *  instantaneous.  Therefore, with a large number of images to compare
- *  with each other, it is important to first calculate the histograms
- *  for each image.  Then the comparisons, which go as the square of the
- *  number of images, actually takes no time.
- *
- *  A high level function that takes a pixa of images and does
- *  all comparisons, pixaComparePhotosByHisto(), uses this split
- *  approach.  It pads the images so that the centroid is in the center,
- *  which will allow the tiles to be better aligned.
- *
- *  For testing purposes, two functions are given that do all the work
- *  to compare just two photo regions:
- *    *  pixComparePhotoRegionsByHisto() uses the split approach, qualifying
- *       the images first with pixGenPhotoHistos(), and then comparing
- *       with compareTilesByHisto().
- *    *  pixCompareGrayByHisto() aligns the two images by centroid
- *       and calls pixCompareTilesByHisto() to generate the histograms
- *       and do the comparison.
- *
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - - /* Small enough to consider equal to 0.0, for plot output */ -static const l_float32 TINY = 0.00001; - -static l_ok findHistoGridDimensions(l_int32 n, l_int32 w, l_int32 h, - l_int32 *pnx, l_int32 *pny, l_int32 debug); -static l_ok pixCompareTilesByHisto(PIX *pix1, PIX *pix2, l_int32 maxgray, - l_int32 factor, l_int32 n, - l_float32 *pscore, PIXA *pixadebug); - -/*------------------------------------------------------------------* - * Test for pix equality * - *------------------------------------------------------------------*/ -/*! - * \brief pixEqual() - * - * \param[in] pix1 - * \param[in] pix2 - * \param[out] psame 1 if same; 0 if different - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Equality is defined as having the same pixel values for
- *          each respective image pixel.
- *      (2) This works on two pix of any depth.  If one or both pix
- *          have a colormap, the depths can be different and the
- *          two pix can still be equal.
- *      (3) This ignores the alpha component for 32 bpp images.
- *      (4) If both pix have colormaps and the depths are equal,
- *          use the pixEqualWithCmap() function, which does a fast
- *          comparison if the colormaps are identical and a relatively
- *          slow comparison otherwise.
- *      (5) In all other cases, any existing colormaps must first be
- *          removed before doing pixel comparison.  After the colormaps
- *          are removed, the resulting two images must have the same depth.
- *          The "lowest common denominator" is RGB, but this is only
- *          chosen when necessary, or when both have colormaps but
- *          different depths.
- *      (6) For images without colormaps that are not 32 bpp, all bits
- *          in the image part of the data array must be identical.
- * 
- */ -l_ok -pixEqual(PIX *pix1, - PIX *pix2, - l_int32 *psame) -{ - return pixEqualWithAlpha(pix1, pix2, 0, psame); -} - - -/*! - * \brief pixEqualWithAlpha() - * - * \param[in] pix1 - * \param[in] pix2 - * \param[in] use_alpha 1 to compare alpha in RGBA; 0 to ignore - * \param[out] psame 1 if same; 0 if different - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) See notes in pixEqual().
- *      (2) This is more general than pixEqual(), in that for 32 bpp
- *          RGBA images, where spp = 4, you can optionally include
- *          the alpha component in the comparison.
- * 
- */ -l_ok -pixEqualWithAlpha(PIX *pix1, - PIX *pix2, - l_int32 use_alpha, - l_int32 *psame) -{ -l_int32 w1, h1, d1, w2, h2, d2, wpl1, wpl2; -l_int32 spp1, spp2, i, j, color, mismatch, opaque; -l_int32 fullwords, linebits, endbits; -l_uint32 endmask, wordmask; -l_uint32 *data1, *data2, *line1, *line2; -PIX *pixs1, *pixs2, *pixt1, *pixt2, *pixalpha; -PIXCMAP *cmap1, *cmap2; - - PROCNAME("pixEqualWithAlpha"); - - if (!psame) - return ERROR_INT("psame not defined", procName, 1); - *psame = 0; /* init to not equal */ - if (!pix1 || !pix2) - return ERROR_INT("pix1 and pix2 not both defined", procName, 1); - pixGetDimensions(pix1, &w1, &h1, &d1); - pixGetDimensions(pix2, &w2, &h2, &d2); - if (w1 != w2 || h1 != h2) { - L_INFO("pix sizes differ\n", procName); - return 0; - } - - /* Suppose the use_alpha flag is true. - * If only one of two 32 bpp images has spp == 4, we call that - * a "mismatch" of the alpha component. In the case of a mismatch, - * if the 4 bpp pix does not have all alpha components opaque (255), - * the images are not-equal. However if they are all opaque, - * this image is equivalent to spp == 3, so we allow the - * comparison to go forward, testing only for the RGB equality. */ - spp1 = pixGetSpp(pix1); - spp2 = pixGetSpp(pix2); - mismatch = 0; - if (use_alpha && d1 == 32 && d2 == 32) { - mismatch = ((spp1 == 4 && spp2 != 4) || (spp1 != 4 && spp2 == 4)); - if (mismatch) { - pixalpha = (spp1 == 4) ? pix1 : pix2; - pixAlphaIsOpaque(pixalpha, &opaque); - if (!opaque) { - L_INFO("just one pix has a non-opaque alpha layer\n", procName); - return 0; - } - } - } - - cmap1 = pixGetColormap(pix1); - cmap2 = pixGetColormap(pix2); - if (!cmap1 && !cmap2 && (d1 != d2) && (d1 == 32 || d2 == 32)) { - L_INFO("no colormaps, pix depths unequal, and one of them is RGB\n", - procName); - return 0; - } - - if (cmap1 && cmap2 && (d1 == d2)) /* use special function */ - return pixEqualWithCmap(pix1, pix2, psame); - - /* Must remove colormaps if they exist, and in the process - * end up with the resulting images having the same depth. */ - if (cmap1 && !cmap2) { - pixUsesCmapColor(pix1, &color); - if (color && d2 <= 8) /* can't be equal */ - return 0; - if (d2 < 8) - pixs2 = pixConvertTo8(pix2, FALSE); - else - pixs2 = pixClone(pix2); - if (d2 <= 8) - pixs1 = pixRemoveColormap(pix1, REMOVE_CMAP_TO_GRAYSCALE); - else - pixs1 = pixRemoveColormap(pix1, REMOVE_CMAP_TO_FULL_COLOR); - } else if (!cmap1 && cmap2) { - pixUsesCmapColor(pix2, &color); - if (color && d1 <= 8) /* can't be equal */ - return 0; - if (d1 < 8) - pixs1 = pixConvertTo8(pix1, FALSE); - else - pixs1 = pixClone(pix1); - if (d1 <= 8) - pixs2 = pixRemoveColormap(pix2, REMOVE_CMAP_TO_GRAYSCALE); - else - pixs2 = pixRemoveColormap(pix2, REMOVE_CMAP_TO_FULL_COLOR); - } else if (cmap1 && cmap2) { /* depths not equal; use rgb */ - pixs1 = pixRemoveColormap(pix1, REMOVE_CMAP_TO_FULL_COLOR); - pixs2 = pixRemoveColormap(pix2, REMOVE_CMAP_TO_FULL_COLOR); - } else { /* no colormaps */ - pixs1 = pixClone(pix1); - pixs2 = pixClone(pix2); - } - - /* OK, we have no colormaps, but the depths may still be different */ - d1 = pixGetDepth(pixs1); - d2 = pixGetDepth(pixs2); - if (d1 != d2) { - if (d1 == 16 || d2 == 16) { - L_INFO("one pix is 16 bpp\n", procName); - pixDestroy(&pixs1); - pixDestroy(&pixs2); - return 0; - } - pixt1 = pixConvertLossless(pixs1, 8); - pixt2 = pixConvertLossless(pixs2, 8); - if (!pixt1 || !pixt2) { - L_INFO("failure to convert to 8 bpp\n", procName); - pixDestroy(&pixs1); - pixDestroy(&pixs2); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - return 0; - } - } else { - pixt1 = pixClone(pixs1); - pixt2 = pixClone(pixs2); - } - pixDestroy(&pixs1); - pixDestroy(&pixs2); - - /* No colormaps, equal depths; do pixel comparisons */ - d1 = pixGetDepth(pixt1); - d2 = pixGetDepth(pixt2); - wpl1 = pixGetWpl(pixt1); - wpl2 = pixGetWpl(pixt2); - data1 = pixGetData(pixt1); - data2 = pixGetData(pixt2); - - if (d1 == 32) { /* test either RGB or RGBA pixels */ - if (use_alpha && !mismatch) - wordmask = (spp1 == 3) ? 0xffffff00 : 0xffffffff; - else - wordmask = 0xffffff00; - for (i = 0; i < h1; i++) { - line1 = data1 + wpl1 * i; - line2 = data2 + wpl2 * i; - for (j = 0; j < wpl1; j++) { - if ((*line1 ^ *line2) & wordmask) { - pixDestroy(&pixt1); - pixDestroy(&pixt2); - return 0; - } - line1++; - line2++; - } - } - } else { /* all bits count */ - linebits = d1 * w1; - fullwords = linebits / 32; - endbits = linebits & 31; - endmask = (endbits == 0) ? 0 : (0xffffffff << (32 - endbits)); - for (i = 0; i < h1; i++) { - line1 = data1 + wpl1 * i; - line2 = data2 + wpl2 * i; - for (j = 0; j < fullwords; j++) { - if (*line1 ^ *line2) { - pixDestroy(&pixt1); - pixDestroy(&pixt2); - return 0; - } - line1++; - line2++; - } - if (endbits) { - if ((*line1 ^ *line2) & endmask) { - pixDestroy(&pixt1); - pixDestroy(&pixt2); - return 0; - } - } - } - } - - pixDestroy(&pixt1); - pixDestroy(&pixt2); - *psame = 1; - return 0; -} - - -/*! - * \brief pixEqualWithCmap() - * - * \param[in] pix1 - * \param[in] pix2 - * \param[out] psame - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This returns same = TRUE if the images have identical content.
- *      (2) Both pix must have a colormap, and be of equal size and depth.
- *          If these conditions are not satisfied, it is not an error;
- *          the returned result is same = FALSE.
- *      (3) We then check whether the colormaps are the same; if so,
- *          the comparison proceeds 32 bits at a time.
- *      (4) If the colormaps are different, the comparison is done by
- *          slow brute force.
- * 
- */ -l_ok -pixEqualWithCmap(PIX *pix1, - PIX *pix2, - l_int32 *psame) -{ -l_int32 d, w, h, wpl1, wpl2, i, j, linebits, fullwords, endbits; -l_int32 rval1, rval2, gval1, gval2, bval1, bval2, samecmaps; -l_uint32 endmask, val1, val2; -l_uint32 *data1, *data2, *line1, *line2; -PIXCMAP *cmap1, *cmap2; - - PROCNAME("pixEqualWithCmap"); - - if (!psame) - return ERROR_INT("&same not defined", procName, 1); - *psame = 0; - if (!pix1) - return ERROR_INT("pix1 not defined", procName, 1); - if (!pix2) - return ERROR_INT("pix2 not defined", procName, 1); - - if (pixSizesEqual(pix1, pix2) == 0) - return 0; - cmap1 = pixGetColormap(pix1); - cmap2 = pixGetColormap(pix2); - if (!cmap1 || !cmap2) { - L_INFO("both images don't have colormap\n", procName); - return 0; - } - pixGetDimensions(pix1, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8) { - L_INFO("pix depth not in {1, 2, 4, 8}\n", procName); - return 0; - } - - cmapEqual(cmap1, cmap2, 3, &samecmaps); - if (samecmaps == TRUE) { /* colormaps are identical; compare by words */ - linebits = d * w; - wpl1 = pixGetWpl(pix1); - wpl2 = pixGetWpl(pix2); - data1 = pixGetData(pix1); - data2 = pixGetData(pix2); - fullwords = linebits / 32; - endbits = linebits & 31; - endmask = (endbits == 0) ? 0 : (0xffffffff << (32 - endbits)); - for (i = 0; i < h; i++) { - line1 = data1 + wpl1 * i; - line2 = data2 + wpl2 * i; - for (j = 0; j < fullwords; j++) { - if (*line1 ^ *line2) - return 0; - line1++; - line2++; - } - if (endbits) { - if ((*line1 ^ *line2) & endmask) - return 0; - } - } - *psame = 1; - return 0; - } - - /* Colormaps aren't identical; compare pixel by pixel */ - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - pixGetPixel(pix1, j, i, &val1); - pixGetPixel(pix2, j, i, &val2); - pixcmapGetColor(cmap1, val1, &rval1, &gval1, &bval1); - pixcmapGetColor(cmap2, val2, &rval2, &gval2, &bval2); - if (rval1 != rval2 || gval1 != gval2 || bval1 != bval2) - return 0; - } - } - - *psame = 1; - return 0; -} - - -/*! - * \brief cmapEqual() - * - * \param[in] cmap1 - * \param[in] cmap2 - * \param[in] ncomps 3 for RGB, 4 for RGBA - * \param[out] psame - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This returns %same = TRUE if the colormaps have identical entries.
- *      (2) If %ncomps == 4, the alpha components of the colormaps are also
- *          compared.
- * 
- */ -l_ok -cmapEqual(PIXCMAP *cmap1, - PIXCMAP *cmap2, - l_int32 ncomps, - l_int32 *psame) -{ -l_int32 n1, n2, i, rval1, rval2, gval1, gval2, bval1, bval2, aval1, aval2; - - PROCNAME("cmapEqual"); - - if (!psame) - return ERROR_INT("&same not defined", procName, 1); - *psame = FALSE; - if (!cmap1) - return ERROR_INT("cmap1 not defined", procName, 1); - if (!cmap2) - return ERROR_INT("cmap2 not defined", procName, 1); - if (ncomps != 3 && ncomps != 4) - return ERROR_INT("ncomps not 3 or 4", procName, 1); - - n1 = pixcmapGetCount(cmap1); - n2 = pixcmapGetCount(cmap2); - if (n1 != n2) { - L_INFO("colormap sizes are different\n", procName); - return 0; - } - - for (i = 0; i < n1; i++) { - pixcmapGetRGBA(cmap1, i, &rval1, &gval1, &bval1, &aval1); - pixcmapGetRGBA(cmap2, i, &rval2, &gval2, &bval2, &aval2); - if (rval1 != rval2 || gval1 != gval2 || bval1 != bval2) - return 0; - if (ncomps == 4 && aval1 != aval2) - return 0; - } - *psame = TRUE; - return 0; -} - - -/*! - * \brief pixUsesCmapColor() - * - * \param[in] pixs any depth, colormap - * \param[out] pcolor TRUE if color found - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This returns color = TRUE if three things are obtained:
- *          (a) the pix has a colormap
- *          (b) the colormap has at least one color entry
- *          (c) a color entry is actually used
- *      (2) It is used in pixEqual() for comparing two images, in a
- *          situation where it is required to know if the colormap
- *          has color entries that are actually used in the image.
- * 
- */ -l_ok -pixUsesCmapColor(PIX *pixs, - l_int32 *pcolor) -{ -l_int32 n, i, rval, gval, bval, numpix; -NUMA *na; -PIXCMAP *cmap; - - PROCNAME("pixUsesCmapColor"); - - if (!pcolor) - return ERROR_INT("&color not defined", procName, 1); - *pcolor = 0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - if ((cmap = pixGetColormap(pixs)) == NULL) - return 0; - - pixcmapHasColor(cmap, pcolor); - if (*pcolor == 0) /* no color */ - return 0; - - /* The cmap has color entries. Are they used? */ - na = pixGetGrayHistogram(pixs, 1); - n = pixcmapGetCount(cmap); - for (i = 0; i < n; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - numaGetIValue(na, i, &numpix); - if ((rval != gval || rval != bval) && numpix) { /* color found! */ - *pcolor = 1; - break; - } - } - numaDestroy(&na); - - return 0; -} - - -/*------------------------------------------------------------------* - * Binary correlation * - *------------------------------------------------------------------*/ -/*! - * \brief pixCorrelationBinary() - * - * \param[in] pix1 1 bpp - * \param[in] pix2 1 bpp - * \param[out] pval correlation - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The correlation is a number between 0.0 and 1.0,
- *          based on foreground similarity:
- *                           (|1 AND 2|)**2
- *            correlation =  --------------
- *                             |1| * |2|
- *          where |x| is the count of foreground pixels in image x.
- *          If the images are identical, this is 1.0.
- *          If they have no fg pixels in common, this is 0.0.
- *          If one or both images have no fg pixels, the correlation is 0.0.
- *      (2) Typically the two images are of equal size, but this
- *          is not enforced.  Instead, the UL corners are aligned.
- * 
- */ -l_ok -pixCorrelationBinary(PIX *pix1, - PIX *pix2, - l_float32 *pval) -{ -l_int32 count1, count2, countn; -l_int32 *tab8; -PIX *pixn; - - PROCNAME("pixCorrelationBinary"); - - if (!pval) - return ERROR_INT("&pval not defined", procName, 1); - *pval = 0.0; - if (!pix1) - return ERROR_INT("pix1 not defined", procName, 1); - if (!pix2) - return ERROR_INT("pix2 not defined", procName, 1); - - tab8 = makePixelSumTab8(); - pixCountPixels(pix1, &count1, tab8); - pixCountPixels(pix2, &count2, tab8); - if (count1 == 0 || count2 == 0) { - LEPT_FREE(tab8); - return 0; - } - pixn = pixAnd(NULL, pix1, pix2); - pixCountPixels(pixn, &countn, tab8); - *pval = (l_float32)countn * (l_float32)countn / - ((l_float32)count1 * (l_float32)count2); - LEPT_FREE(tab8); - pixDestroy(&pixn); - return 0; -} - - -/*------------------------------------------------------------------* - * Difference of two images * - *------------------------------------------------------------------*/ -/*! - * \brief pixDisplayDiffBinary() - * - * \param[in] pix1 1 bpp - * \param[in] pix2 1 bpp - * \return pixd 4 bpp cmapped, or NULL on error - * - *
- * Notes:
- *      (1) This gives a color representation of the difference between
- *          pix1 and pix2.  The color difference depends on the order.
- *          The pixels in pixd have 4 colors:
- *           * unchanged:  black (on), white (off)
- *           * on in pix1, off in pix2: red
- *           * on in pix2, off in pix1: green
- *      (2) This aligns the UL corners of pix1 and pix2, and crops
- *          to the overlapping pixels.
- * 
- */ -PIX * -pixDisplayDiffBinary(PIX *pix1, - PIX *pix2) -{ -l_int32 w1, h1, d1, w2, h2, d2, minw, minh; -PIX *pixt, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixDisplayDiffBinary"); - - if (!pix1 || !pix2) - return (PIX *)ERROR_PTR("pix1, pix2 not both defined", procName, NULL); - pixGetDimensions(pix1, &w1, &h1, &d1); - pixGetDimensions(pix2, &w2, &h2, &d2); - if (d1 != 1 || d2 != 1) - return (PIX *)ERROR_PTR("pix1 and pix2 not 1 bpp", procName, NULL); - minw = L_MIN(w1, w2); - minh = L_MIN(h1, h2); - - pixd = pixCreate(minw, minh, 4); - cmap = pixcmapCreate(4); - pixcmapAddColor(cmap, 255, 255, 255); /* initialized to white */ - pixcmapAddColor(cmap, 0, 0, 0); - pixcmapAddColor(cmap, 255, 0, 0); - pixcmapAddColor(cmap, 0, 255, 0); - pixSetColormap(pixd, cmap); - - pixt = pixAnd(NULL, pix1, pix2); - pixPaintThroughMask(pixd, pixt, 0, 0, 0x0); /* black */ - pixSubtract(pixt, pix1, pix2); - pixPaintThroughMask(pixd, pixt, 0, 0, 0xff000000); /* red */ - pixSubtract(pixt, pix2, pix1); - pixPaintThroughMask(pixd, pixt, 0, 0, 0x00ff0000); /* green */ - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixCompareBinary() - * - * \param[in] pix1 1 bpp - * \param[in] pix2 1 bpp - * \param[in] comptype L_COMPARE_XOR, L_COMPARE_SUBTRACT - * \param[out] pfract fraction of pixels that are different - * \param[out] ppixdiff [optional] pix of difference - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The two images are aligned at the UL corner, and do not
- *          need to be the same size.
- *      (2) If using L_COMPARE_SUBTRACT, pix2 is subtracted from pix1.
- *      (3) The total number of pixels is determined by pix1.
- *      (4) On error, the returned fraction is 1.0.
- * 
- */ -l_ok -pixCompareBinary(PIX *pix1, - PIX *pix2, - l_int32 comptype, - l_float32 *pfract, - PIX **ppixdiff) -{ -l_int32 w, h, count; -PIX *pixt; - - PROCNAME("pixCompareBinary"); - - if (ppixdiff) *ppixdiff = NULL; - if (!pfract) - return ERROR_INT("&pfract not defined", procName, 1); - *pfract = 1.0; /* initialize to max difference */ - if (!pix1 || pixGetDepth(pix1) != 1) - return ERROR_INT("pix1 not defined or not 1 bpp", procName, 1); - if (!pix2 || pixGetDepth(pix2) != 1) - return ERROR_INT("pix2 not defined or not 1 bpp", procName, 1); - if (comptype != L_COMPARE_XOR && comptype != L_COMPARE_SUBTRACT) - return ERROR_INT("invalid comptype", procName, 1); - - if (comptype == L_COMPARE_XOR) - pixt = pixXor(NULL, pix1, pix2); - else /* comptype == L_COMPARE_SUBTRACT) */ - pixt = pixSubtract(NULL, pix1, pix2); - pixCountPixels(pixt, &count, NULL); - pixGetDimensions(pix1, &w, &h, NULL); - *pfract = (l_float32)(count) / (l_float32)(w * h); - - if (ppixdiff) - *ppixdiff = pixt; - else - pixDestroy(&pixt); - return 0; -} - - -/*! - * \brief pixCompareGrayOrRGB() - * - * \param[in] pix1 2,4,8,16 bpp gray, 32 bpp rgb, or colormapped - * \param[in] pix2 2,4,8,16 bpp gray, 32 bpp rgb, or colormapped - * \param[in] comptype L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF - * \param[in] plottype gplot plot output type, or 0 for no plot - * \param[out] psame [optional] 1 if pixel values are identical - * \param[out] pdiff [optional] average difference - * \param[out] prmsdiff [optional] rms of difference - * \param[out] ppixdiff [optional] pix of difference - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The two images are aligned at the UL corner, and do not
- *          need to be the same size.  If they are not the same size,
- *          the comparison will be made over overlapping pixels.
- *      (2) If there is a colormap, it is removed and the result
- *          is either gray or RGB depending on the colormap.
- *      (3) If RGB, each component is compared separately.
- *      (4) If type is L_COMPARE_ABS_DIFF, pix2 is subtracted from pix1
- *          and the absolute value is taken.
- *      (5) If type is L_COMPARE_SUBTRACT, pix2 is subtracted from pix1
- *          and the result is clipped to 0.
- *      (6) The plot output types are specified in gplot.h.
- *          Use 0 if no difference plot is to be made.
- *      (7) If the images are pixelwise identical, no difference
- *          plot is made, even if requested.  The result (TRUE or FALSE)
- *          is optionally returned in the parameter 'same'.
- *      (8) The average difference (either subtracting or absolute value)
- *          is optionally returned in the parameter 'diff'.
- *      (9) The RMS difference is optionally returned in the
- *          parameter 'rmsdiff'.  For RGB, we return the average of
- *          the RMS differences for each of the components.
- *     (10) Because pixel values are compared, pix1 and pix2 can be equal when:
- *          * they are both gray with different depth
- *          * one is colormapped and the other is not
- *          * they are both colormapped and have different size colormaps
- * 
- */ -l_ok -pixCompareGrayOrRGB(PIX *pix1, - PIX *pix2, - l_int32 comptype, - l_int32 plottype, - l_int32 *psame, - l_float32 *pdiff, - l_float32 *prmsdiff, - PIX **ppixdiff) -{ -l_int32 retval, d1, d2; -PIX *pixt1, *pixt2, *pixs1, *pixs2; - - PROCNAME("pixCompareGrayOrRGB"); - - if (psame) *psame = 0; - if (pdiff) *pdiff = 255.0; - if (prmsdiff) *prmsdiff = 255.0; - if (ppixdiff) *ppixdiff = NULL; - if (!pix1 || pixGetDepth(pix1) == 1) - return ERROR_INT("pix1 not defined or 1 bpp", procName, 1); - if (!pix2 || pixGetDepth(pix2) == 1) - return ERROR_INT("pix2 not defined or 1 bpp", procName, 1); - if (comptype != L_COMPARE_SUBTRACT && comptype != L_COMPARE_ABS_DIFF) - return ERROR_INT("invalid comptype", procName, 1); - if (plottype < 0 || plottype >= NUM_GPLOT_OUTPUTS) - return ERROR_INT("invalid plottype", procName, 1); - - pixt1 = pixRemoveColormap(pix1, REMOVE_CMAP_BASED_ON_SRC); - pixt2 = pixRemoveColormap(pix2, REMOVE_CMAP_BASED_ON_SRC); - d1 = pixGetDepth(pixt1); - d2 = pixGetDepth(pixt2); - if (d1 < 8) - pixs1 = pixConvertTo8(pixt1, FALSE); - else - pixs1 = pixClone(pixt1); - if (d2 < 8) - pixs2 = pixConvertTo8(pixt2, FALSE); - else - pixs2 = pixClone(pixt2); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - d1 = pixGetDepth(pixs1); - d2 = pixGetDepth(pixs2); - if (d1 != d2) { - pixDestroy(&pixs1); - pixDestroy(&pixs2); - return ERROR_INT("intrinsic depths are not equal", procName, 1); - } - - if (d1 == 8 || d1 == 16) - retval = pixCompareGray(pixs1, pixs2, comptype, plottype, psame, - pdiff, prmsdiff, ppixdiff); - else /* d1 == 32 */ - retval = pixCompareRGB(pixs1, pixs2, comptype, plottype, psame, - pdiff, prmsdiff, ppixdiff); - pixDestroy(&pixs1); - pixDestroy(&pixs2); - return retval; -} - - -/*! - * \brief pixCompareGray() - * - * \param[in] pix1 8 or 16 bpp, not cmapped - * \param[in] pix2 8 or 16 bpp, not cmapped - * \param[in] comptype L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF - * \param[in] plottype gplot plot output type, or 0 for no plot - * \param[out] psame [optional] 1 if pixel values are identical - * \param[out] pdiff [optional] average difference - * \param[out] prmsdiff [optional] rms of difference - * \param[out] ppixdiff [optional] pix of difference - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) See pixCompareGrayOrRGB() for details.
- *      (2) Use pixCompareGrayOrRGB() if the input pix are colormapped.
- *      (3) Note: setting %plottype > 0 can result in writing named
- *                output files.
- * 
- */ -l_ok -pixCompareGray(PIX *pix1, - PIX *pix2, - l_int32 comptype, - l_int32 plottype, - l_int32 *psame, - l_float32 *pdiff, - l_float32 *prmsdiff, - PIX **ppixdiff) -{ -char buf[64]; -static l_int32 index = 0; -l_int32 d1, d2, same, first, last; -GPLOT *gplot; -NUMA *na, *nac; -PIX *pixt; - - PROCNAME("pixCompareGray"); - - if (psame) *psame = 0; - if (pdiff) *pdiff = 255.0; - if (prmsdiff) *prmsdiff = 255.0; - if (ppixdiff) *ppixdiff = NULL; - if (!pix1) - return ERROR_INT("pix1 not defined", procName, 1); - if (!pix2) - return ERROR_INT("pix2 not defined", procName, 1); - d1 = pixGetDepth(pix1); - d2 = pixGetDepth(pix2); - if ((d1 != d2) || (d1 != 8 && d1 != 16)) - return ERROR_INT("depths unequal or not 8 or 16 bpp", procName, 1); - if (pixGetColormap(pix1) || pixGetColormap(pix2)) - return ERROR_INT("pix1 and/or pix2 are colormapped", procName, 1); - if (comptype != L_COMPARE_SUBTRACT && comptype != L_COMPARE_ABS_DIFF) - return ERROR_INT("invalid comptype", procName, 1); - if (plottype < 0 || plottype >= NUM_GPLOT_OUTPUTS) - return ERROR_INT("invalid plottype", procName, 1); - - lept_mkdir("lept/comp"); - - if (comptype == L_COMPARE_SUBTRACT) - pixt = pixSubtractGray(NULL, pix1, pix2); - else /* comptype == L_COMPARE_ABS_DIFF) */ - pixt = pixAbsDifference(pix1, pix2); - - pixZero(pixt, &same); - if (same) - L_INFO("Images are pixel-wise identical\n", procName); - if (psame) *psame = same; - - if (pdiff) - pixGetAverageMasked(pixt, NULL, 0, 0, 1, L_MEAN_ABSVAL, pdiff); - - /* Don't bother to plot if the images are the same */ - if (plottype && !same) { - L_INFO("Images differ: output plots will be generated\n", procName); - na = pixGetGrayHistogram(pixt, 1); - numaGetNonzeroRange(na, TINY, &first, &last); - nac = numaClipToInterval(na, 0, last); - snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_gray%d", index); - gplot = gplotCreate(buf, plottype, - "Pixel Difference Histogram", "diff val", - "number of pixels"); - gplotAddPlot(gplot, NULL, nac, GPLOT_LINES, "gray"); - gplotMakeOutput(gplot); - gplotDestroy(&gplot); - snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_gray%d.png", - index++); - l_fileDisplay(buf, 100, 100, 1.0); - numaDestroy(&na); - numaDestroy(&nac); - } - - if (ppixdiff) - *ppixdiff = pixCopy(NULL, pixt); - - if (prmsdiff) { - if (comptype == L_COMPARE_SUBTRACT) { /* wrong type for rms diff */ - pixDestroy(&pixt); - pixt = pixAbsDifference(pix1, pix2); - } - pixGetAverageMasked(pixt, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, prmsdiff); - } - - pixDestroy(&pixt); - return 0; -} - - -/*! - * \brief pixCompareRGB() - * - * \param[in] pix1 32 bpp rgb - * \param[in] pix2 32 bpp rgb - * \param[in] comptype L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF - * \param[in] plottype gplot plot output type, or 0 for no plot - * \param[out] psame [optional] 1 if pixel values are identical - * \param[out] pdiff [optional] average difference - * \param[out] prmsdiff [optional] rms of difference - * \param[out] ppixdiff [optional] pix of difference - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) See pixCompareGrayOrRGB() for details.
- *      (2) Note: setting %plottype > 0 can result in writing named
- *                output files.
- * 
- */ -l_ok -pixCompareRGB(PIX *pix1, - PIX *pix2, - l_int32 comptype, - l_int32 plottype, - l_int32 *psame, - l_float32 *pdiff, - l_float32 *prmsdiff, - PIX **ppixdiff) -{ -char buf[64]; -static l_int32 index = 0; -l_int32 rsame, gsame, bsame, same, first, rlast, glast, blast, last; -l_float32 rdiff, gdiff, bdiff; -GPLOT *gplot; -NUMA *nar, *nag, *nab, *narc, *nagc, *nabc; -PIX *pixr1, *pixr2, *pixg1, *pixg2, *pixb1, *pixb2; -PIX *pixr, *pixg, *pixb; - - PROCNAME("pixCompareRGB"); - - if (psame) *psame = 0; - if (pdiff) *pdiff = 0.0; - if (prmsdiff) *prmsdiff = 0.0; - if (ppixdiff) *ppixdiff = NULL; - if (!pix1 || pixGetDepth(pix1) != 32) - return ERROR_INT("pix1 not defined or not 32 bpp", procName, 1); - if (!pix2 || pixGetDepth(pix2) != 32) - return ERROR_INT("pix2 not defined or not ew bpp", procName, 1); - if (comptype != L_COMPARE_SUBTRACT && comptype != L_COMPARE_ABS_DIFF) - return ERROR_INT("invalid comptype", procName, 1); - if (plottype < 0 || plottype >= NUM_GPLOT_OUTPUTS) - return ERROR_INT("invalid plottype", procName, 1); - - lept_mkdir("lept/comp"); - - pixr1 = pixGetRGBComponent(pix1, COLOR_RED); - pixr2 = pixGetRGBComponent(pix2, COLOR_RED); - pixg1 = pixGetRGBComponent(pix1, COLOR_GREEN); - pixg2 = pixGetRGBComponent(pix2, COLOR_GREEN); - pixb1 = pixGetRGBComponent(pix1, COLOR_BLUE); - pixb2 = pixGetRGBComponent(pix2, COLOR_BLUE); - if (comptype == L_COMPARE_SUBTRACT) { - pixr = pixSubtractGray(NULL, pixr1, pixr2); - pixg = pixSubtractGray(NULL, pixg1, pixg2); - pixb = pixSubtractGray(NULL, pixb1, pixb2); - } else { /* comptype == L_COMPARE_ABS_DIFF) */ - pixr = pixAbsDifference(pixr1, pixr2); - pixg = pixAbsDifference(pixg1, pixg2); - pixb = pixAbsDifference(pixb1, pixb2); - } - - pixZero(pixr, &rsame); - pixZero(pixg, &gsame); - pixZero(pixb, &bsame); - same = rsame && gsame && bsame; - if (same) - L_INFO("Images are pixel-wise identical\n", procName); - if (psame) *psame = same; - - if (pdiff) { - pixGetAverageMasked(pixr, NULL, 0, 0, 1, L_MEAN_ABSVAL, &rdiff); - pixGetAverageMasked(pixg, NULL, 0, 0, 1, L_MEAN_ABSVAL, &gdiff); - pixGetAverageMasked(pixb, NULL, 0, 0, 1, L_MEAN_ABSVAL, &bdiff); - *pdiff = (rdiff + gdiff + bdiff) / 3.0; - } - - /* Don't bother to plot if the images are the same */ - if (plottype && !same) { - L_INFO("Images differ: output plots will be generated\n", procName); - nar = pixGetGrayHistogram(pixr, 1); - nag = pixGetGrayHistogram(pixg, 1); - nab = pixGetGrayHistogram(pixb, 1); - numaGetNonzeroRange(nar, TINY, &first, &rlast); - numaGetNonzeroRange(nag, TINY, &first, &glast); - numaGetNonzeroRange(nab, TINY, &first, &blast); - last = L_MAX(rlast, glast); - last = L_MAX(last, blast); - narc = numaClipToInterval(nar, 0, last); - nagc = numaClipToInterval(nag, 0, last); - nabc = numaClipToInterval(nab, 0, last); - snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_rgb%d", index); - gplot = gplotCreate(buf, plottype, - "Pixel Difference Histogram", "diff val", - "number of pixels"); - gplotAddPlot(gplot, NULL, narc, GPLOT_LINES, "red"); - gplotAddPlot(gplot, NULL, nagc, GPLOT_LINES, "green"); - gplotAddPlot(gplot, NULL, nabc, GPLOT_LINES, "blue"); - gplotMakeOutput(gplot); - gplotDestroy(&gplot); - snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_rgb%d.png", - index++); - l_fileDisplay(buf, 100, 100, 1.0); - numaDestroy(&nar); - numaDestroy(&nag); - numaDestroy(&nab); - numaDestroy(&narc); - numaDestroy(&nagc); - numaDestroy(&nabc); - } - - if (ppixdiff) - *ppixdiff = pixCreateRGBImage(pixr, pixg, pixb); - - if (prmsdiff) { - if (comptype == L_COMPARE_SUBTRACT) { - pixDestroy(&pixr); - pixDestroy(&pixg); - pixDestroy(&pixb); - pixr = pixAbsDifference(pixr1, pixr2); - pixg = pixAbsDifference(pixg1, pixg2); - pixb = pixAbsDifference(pixb1, pixb2); - } - pixGetAverageMasked(pixr, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, &rdiff); - pixGetAverageMasked(pixg, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, &gdiff); - pixGetAverageMasked(pixb, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, &bdiff); - *prmsdiff = (rdiff + gdiff + bdiff) / 3.0; - } - - pixDestroy(&pixr1); - pixDestroy(&pixr2); - pixDestroy(&pixg1); - pixDestroy(&pixg2); - pixDestroy(&pixb1); - pixDestroy(&pixb2); - pixDestroy(&pixr); - pixDestroy(&pixg); - pixDestroy(&pixb); - return 0; -} - - -/*! - * \brief pixCompareTiled() - * - * \param[in] pix1 8 bpp or 32 bpp rgb - * \param[in] pix2 8 bpp 32 bpp rgb - * \param[in] sx, sy tile size; must be > 1 in each dimension - * \param[in] type L_MEAN_ABSVAL or L_ROOT_MEAN_SQUARE - * \param[out] ppixdiff pix of difference - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) With L_MEAN_ABSVAL, we compute for each tile the
- *          average abs value of the pixel component difference between
- *          the two (aligned) images.  With L_ROOT_MEAN_SQUARE, we
- *          compute instead the rms difference over all components.
- *      (2) The two input pix must be the same depth.  Comparison is made
- *          using UL corner alignment.
- *      (3) For 32 bpp, the distance between corresponding tiles
- *          is found by averaging the measured difference over all three
- *          components of each pixel in the tile.
- *      (4) The result, pixdiff, contains one pixel for each source tile.
- * 
- */ -l_ok -pixCompareTiled(PIX *pix1, - PIX *pix2, - l_int32 sx, - l_int32 sy, - l_int32 type, - PIX **ppixdiff) -{ -l_int32 d1, d2, w, h; -PIX *pixt, *pixr, *pixg, *pixb; -PIX *pixrdiff, *pixgdiff, *pixbdiff; -PIXACC *pixacc; - - PROCNAME("pixCompareTiled"); - - if (!ppixdiff) - return ERROR_INT("&pixdiff not defined", procName, 1); - *ppixdiff = NULL; - if (!pix1) - return ERROR_INT("pix1 not defined", procName, 1); - if (!pix2) - return ERROR_INT("pix2 not defined", procName, 1); - d1 = pixGetDepth(pix1); - d2 = pixGetDepth(pix2); - if (d1 != d2) - return ERROR_INT("depths not equal", procName, 1); - if (d1 != 8 && d1 != 32) - return ERROR_INT("pix1 not 8 or 32 bpp", procName, 1); - if (d2 != 8 && d2 != 32) - return ERROR_INT("pix2 not 8 or 32 bpp", procName, 1); - if (sx < 2 || sy < 2) - return ERROR_INT("sx and sy not both > 1", procName, 1); - if (type != L_MEAN_ABSVAL && type != L_ROOT_MEAN_SQUARE) - return ERROR_INT("invalid type", procName, 1); - - pixt = pixAbsDifference(pix1, pix2); - if (d1 == 8) { - *ppixdiff = pixGetAverageTiled(pixt, sx, sy, type); - } else { /* d1 == 32 */ - pixr = pixGetRGBComponent(pixt, COLOR_RED); - pixg = pixGetRGBComponent(pixt, COLOR_GREEN); - pixb = pixGetRGBComponent(pixt, COLOR_BLUE); - pixrdiff = pixGetAverageTiled(pixr, sx, sy, type); - pixgdiff = pixGetAverageTiled(pixg, sx, sy, type); - pixbdiff = pixGetAverageTiled(pixb, sx, sy, type); - pixGetDimensions(pixrdiff, &w, &h, NULL); - pixacc = pixaccCreate(w, h, 0); - pixaccAdd(pixacc, pixrdiff); - pixaccAdd(pixacc, pixgdiff); - pixaccAdd(pixacc, pixbdiff); - pixaccMultConst(pixacc, 1. / 3.); - *ppixdiff = pixaccFinal(pixacc, 8); - pixDestroy(&pixr); - pixDestroy(&pixg); - pixDestroy(&pixb); - pixDestroy(&pixrdiff); - pixDestroy(&pixgdiff); - pixDestroy(&pixbdiff); - pixaccDestroy(&pixacc); - } - pixDestroy(&pixt); - return 0; -} - - -/*------------------------------------------------------------------* - * Other measures of the difference of two images * - *------------------------------------------------------------------*/ -/*! - * \brief pixCompareRankDifference() - * - * \param[in] pix1 8 bpp gray or 32 bpp rgb, or colormapped - * \param[in] pix2 8 bpp gray or 32 bpp rgb, or colormapped - * \param[in] factor subsampling factor; use 0 or 1 for no subsampling - * \return narank numa of rank difference, or NULL on error - * - *
- * Notes:
- *      (1) This answers the question: if the pixel values in each
- *          component are compared by absolute difference, for
- *          any value of difference, what is the fraction of
- *          pixel pairs that have a difference of this magnitude
- *          or greater.  For a difference of 0, the fraction is 1.0.
- *          In this sense, it is a mapping from pixel difference to
- *          rank order of difference.
- *      (2) The two images are aligned at the UL corner, and do not
- *          need to be the same size.  If they are not the same size,
- *          the comparison will be made over overlapping pixels.
- *      (3) If there is a colormap, it is removed and the result
- *          is either gray or RGB depending on the colormap.
- *      (4) If RGB, pixel differences for each component are aggregated
- *          into a single histogram.
- * 
- */ -NUMA * -pixCompareRankDifference(PIX *pix1, - PIX *pix2, - l_int32 factor) -{ -l_int32 i; -l_float32 *array1, *array2; -NUMA *nah, *nan, *nad; - - PROCNAME("pixCompareRankDifference"); - - if (!pix1) - return (NUMA *)ERROR_PTR("pix1 not defined", procName, NULL); - if (!pix2) - return (NUMA *)ERROR_PTR("pix2 not defined", procName, NULL); - - if ((nah = pixGetDifferenceHistogram(pix1, pix2, factor)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - - nan = numaNormalizeHistogram(nah, 1.0); - array1 = numaGetFArray(nan, L_NOCOPY); - - nad = numaCreate(256); - numaSetCount(nad, 256); /* all initialized to 0.0 */ - array2 = numaGetFArray(nad, L_NOCOPY); - - /* Do rank accumulation on normalized histo of diffs */ - array2[0] = 1.0; - for (i = 1; i < 256; i++) - array2[i] = array2[i - 1] - array1[i - 1]; - - numaDestroy(&nah); - numaDestroy(&nan); - return nad; -} - - -/*! - * \brief pixTestForSimilarity() - * - * \param[in] pix1 8 bpp gray or 32 bpp rgb, or colormapped - * \param[in] pix2 8 bpp gray or 32 bpp rgb, or colormapped - * \param[in] factor subsampling factor; use 0 or 1 for no subsampling - * \param[in] mindiff minimum pixel difference to be counted; > 0 - * \param[in] maxfract maximum fraction of pixels allowed to have - * diff greater than or equal to mindiff - * \param[in] maxave maximum average difference of pixels allowed for - * pixels with diff greater than or equal to - * mindiff, after subtracting mindiff - * \param[out] psimilar 1 if similar, 0 otherwise - * \param[in] details use 1 to give normalized histogram and other data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This takes 2 pix that are the same size and determines using
- *          3 input parameters if they are "similar".  The first parameter
- *          %mindiff establishes a criterion of pixel-to-pixel similarity:
- *          two pixels are not similar if their difference in value is
- *          at least mindiff.  Then %maxfract and %maxave are thresholds
- *          on the number and distribution of dissimilar pixels
- *          allowed for the two pix to be similar.   If the pix are
- *          to be similar, neither threshold can be exceeded.
- *      (2) In setting the %maxfract and %maxave thresholds, you have
- *          these options:
- *            (a) Base the comparison only on %maxfract.  Then set
- *                %maxave = 0.0 or 256.0.  (If 0, we always ignore it.)
- *            (b) Base the comparison only on %maxave.  Then set
- *                %maxfract = 1.0.
- *            (c) Base the comparison on both thresholds.
- *      (3) Example of values that can be expected at mindiff = 15 when
- *          comparing lossless png encoding with jpeg encoding, q=75:
- *             (smoothish bg)       fractdiff = 0.01, avediff = 2.5
- *             (natural scene)      fractdiff = 0.13, avediff = 3.5
- *          To identify these images as 'similar', select maxfract
- *          and maxave to be upper bounds of what you expect.
- *      (4) See pixGetDifferenceStats() for a discussion of why we subtract
- *          mindiff from the computed average diff of the nonsimilar pixels
- *          to get the 'avediff' returned by that function.
- *      (5) If there is a colormap, it is removed and the result
- *          is either gray or RGB depending on the colormap.
- *      (6) If RGB, the maximum difference between pixel components is
- *          saved in the histogram.
- * 
- */ -l_ok -pixTestForSimilarity(PIX *pix1, - PIX *pix2, - l_int32 factor, - l_int32 mindiff, - l_float32 maxfract, - l_float32 maxave, - l_int32 *psimilar, - l_int32 details) -{ -l_float32 fractdiff, avediff; - - PROCNAME("pixTestForSimilarity"); - - if (!psimilar) - return ERROR_INT("&similar not defined", procName, 1); - *psimilar = 0; - if (!pix1) - return ERROR_INT("pix1 not defined", procName, 1); - if (!pix2) - return ERROR_INT("pix2 not defined", procName, 1); - if (pixSizesEqual(pix1, pix2) == 0) - return ERROR_INT("pix sizes not equal", procName, 1); - if (mindiff <= 0) - return ERROR_INT("mindiff must be > 0", procName, 1); - - if (pixGetDifferenceStats(pix1, pix2, factor, mindiff, - &fractdiff, &avediff, details)) - return ERROR_INT("diff stats not found", procName, 1); - - if (maxave <= 0.0) maxave = 256.0; - if (fractdiff <= maxfract && avediff <= maxave) - *psimilar = 1; - return 0; -} - - -/*! - * \brief pixGetDifferenceStats() - * - * \param[in] pix1 8 bpp gray or 32 bpp rgb, or colormapped - * \param[in] pix2 8 bpp gray or 32 bpp rgb, or colormapped - * \param[in] factor subsampling factor; use 0 or 1 for no subsampling - * \param[in] mindiff minimum pixel difference to be counted; > 0 - * \param[out] pfractdiff fraction of pixels with diff greater than or - * equal to mindiff - * \param[out] pavediff average difference of pixels with diff greater - * than or equal to mindiff, less mindiff - * \param[in] details use 1 to give normalized histogram and other data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This takes a threshold %mindiff and describes the difference
- *          between two images in terms of two numbers:
- *            (a) the fraction of pixels, %fractdiff, whose difference
- *                equals or exceeds the threshold %mindiff, and
- *            (b) the average value %avediff of the difference in pixel value
- *                for the pixels in the set given by (a), after you subtract
- *                %mindiff.  The reason for subtracting %mindiff is that
- *                you then get a useful measure for the rate of falloff
- *                of the distribution for larger differences.  For example,
- *                if %mindiff = 10 and you find that %avediff = 2.5, it
- *                says that of the pixels with diff > 10, the average of
- *                their diffs is just mindiff + 2.5 = 12.5.  This is a
- *                fast falloff in the histogram with increasing difference.
- *      (2) The two images are aligned at the UL corner, and do not
- *          need to be the same size.  If they are not the same size,
- *          the comparison will be made over overlapping pixels.
- *      (3) If there is a colormap, it is removed and the result
- *          is either gray or RGB depending on the colormap.
- *      (4) If RGB, the maximum difference between pixel components is
- *          saved in the histogram.
- *      (5) Set %details == 1 to see the difference histogram and get
- *          an output that shows for each value of %mindiff, what are the
- *          minimum values required for fractdiff and avediff in order
- *          that the two pix will be considered similar.
- * 
- */ -l_ok -pixGetDifferenceStats(PIX *pix1, - PIX *pix2, - l_int32 factor, - l_int32 mindiff, - l_float32 *pfractdiff, - l_float32 *pavediff, - l_int32 details) -{ -l_int32 i, first, last, diff; -l_float32 fract, ave; -l_float32 *array; -NUMA *nah, *nan, *nac; - - PROCNAME("pixGetDifferenceStats"); - - if (pfractdiff) *pfractdiff = 0.0; - if (pavediff) *pavediff = 0.0; - if (!pfractdiff) - return ERROR_INT("&fractdiff not defined", procName, 1); - if (!pavediff) - return ERROR_INT("&avediff not defined", procName, 1); - if (!pix1) - return ERROR_INT("pix1 not defined", procName, 1); - if (!pix2) - return ERROR_INT("pix2 not defined", procName, 1); - if (mindiff <= 0) - return ERROR_INT("mindiff must be > 0", procName, 1); - - if ((nah = pixGetDifferenceHistogram(pix1, pix2, factor)) == NULL) - return ERROR_INT("na not made", procName, 1); - - if ((nan = numaNormalizeHistogram(nah, 1.0)) == NULL) { - numaDestroy(&nah); - return ERROR_INT("nan not made", procName, 1); - } - array = numaGetFArray(nan, L_NOCOPY); - - if (details) { - lept_mkdir("lept/comp"); - numaGetNonzeroRange(nan, 0.0, &first, &last); - nac = numaClipToInterval(nan, first, last); - gplotSimple1(nac, GPLOT_PNG, "/tmp/lept/comp/histo", - "Difference histogram"); - l_fileDisplay("/tmp/lept/comp/histo.png", 500, 0, 1.0); - lept_stderr("\nNonzero values in normalized histogram:"); - numaWriteStderr(nac); - numaDestroy(&nac); - lept_stderr(" Mindiff fractdiff avediff\n"); - lept_stderr(" -----------------------------------\n"); - for (diff = 1; diff < L_MIN(2 * mindiff, last); diff++) { - fract = 0.0; - ave = 0.0; - for (i = diff; i <= last; i++) { - fract += array[i]; - ave += (l_float32)i * array[i]; - } - ave = (fract == 0.0) ? 0.0 : ave / fract; - ave -= diff; - lept_stderr("%5d %7.4f %7.4f\n", - diff, fract, ave); - } - lept_stderr(" -----------------------------------\n"); - } - - fract = 0.0; - ave = 0.0; - for (i = mindiff; i < 256; i++) { - fract += array[i]; - ave += (l_float32)i * array[i]; - } - ave = (fract == 0.0) ? 0.0 : ave / fract; - ave -= mindiff; - - *pfractdiff = fract; - *pavediff = ave; - - numaDestroy(&nah); - numaDestroy(&nan); - return 0; -} - - -/*! - * \brief pixGetDifferenceHistogram() - * - * \param[in] pix1 8 bpp gray or 32 bpp rgb, or colormapped - * \param[in] pix2 8 bpp gray or 32 bpp rgb, or colormapped - * \param[in] factor subsampling factor; use 0 or 1 for no subsampling - * \return na Numa of histogram of differences, or NULL on error - * - *
- * Notes:
- *      (1) The two images are aligned at the UL corner, and do not
- *          need to be the same size.  If they are not the same size,
- *          the comparison will be made over overlapping pixels.
- *      (2) If there is a colormap, it is removed and the result
- *          is either gray or RGB depending on the colormap.
- *      (3) If RGB, the maximum difference between pixel components is
- *          saved in the histogram.
- * 
- */ -NUMA * -pixGetDifferenceHistogram(PIX *pix1, - PIX *pix2, - l_int32 factor) -{ -l_int32 w1, h1, d1, w2, h2, d2, w, h, wpl1, wpl2; -l_int32 i, j, val, val1, val2; -l_int32 rval1, rval2, gval1, gval2, bval1, bval2; -l_int32 rdiff, gdiff, bdiff, maxdiff; -l_uint32 *data1, *data2, *line1, *line2; -l_float32 *array; -NUMA *na; -PIX *pixt1, *pixt2; - - PROCNAME("pixGetDifferenceHistogram"); - - if (!pix1) - return (NUMA *)ERROR_PTR("pix1 not defined", procName, NULL); - if (!pix2) - return (NUMA *)ERROR_PTR("pix2 not defined", procName, NULL); - d1 = pixGetDepth(pix1); - d2 = pixGetDepth(pix2); - if (d1 == 16 || d2 == 16) - return (NUMA *)ERROR_PTR("d == 16 not supported", procName, NULL); - if (d1 < 8 && !pixGetColormap(pix1)) - return (NUMA *)ERROR_PTR("pix1 depth < 8 bpp and not cmapped", - procName, NULL); - if (d2 < 8 && !pixGetColormap(pix2)) - return (NUMA *)ERROR_PTR("pix2 depth < 8 bpp and not cmapped", - procName, NULL); - pixt1 = pixRemoveColormap(pix1, REMOVE_CMAP_BASED_ON_SRC); - pixt2 = pixRemoveColormap(pix2, REMOVE_CMAP_BASED_ON_SRC); - pixGetDimensions(pixt1, &w1, &h1, &d1); - pixGetDimensions(pixt2, &w2, &h2, &d2); - if (d1 != d2) { - pixDestroy(&pixt1); - pixDestroy(&pixt2); - return (NUMA *)ERROR_PTR("pix depths not equal", procName, NULL); - } - if (factor < 1) factor = 1; - - na = numaCreate(256); - numaSetCount(na, 256); /* all initialized to 0.0 */ - array = numaGetFArray(na, L_NOCOPY); - w = L_MIN(w1, w2); - h = L_MIN(h1, h2); - data1 = pixGetData(pixt1); - data2 = pixGetData(pixt2); - wpl1 = pixGetWpl(pixt1); - wpl2 = pixGetWpl(pixt2); - if (d1 == 8) { - for (i = 0; i < h; i += factor) { - line1 = data1 + i * wpl1; - line2 = data2 + i * wpl2; - for (j = 0; j < w; j += factor) { - val1 = GET_DATA_BYTE(line1, j); - val2 = GET_DATA_BYTE(line2, j); - val = L_ABS(val1 - val2); - array[val]++; - } - } - } else { /* d1 == 32 */ - for (i = 0; i < h; i += factor) { - line1 = data1 + i * wpl1; - line2 = data2 + i * wpl2; - for (j = 0; j < w; j += factor) { - extractRGBValues(line1[j], &rval1, &gval1, &bval1); - extractRGBValues(line2[j], &rval2, &gval2, &bval2); - rdiff = L_ABS(rval1 - rval2); - gdiff = L_ABS(gval1 - gval2); - bdiff = L_ABS(bval1 - bval2); - maxdiff = L_MAX(rdiff, gdiff); - maxdiff = L_MAX(maxdiff, bdiff); - array[maxdiff]++; - } - } - } - - pixDestroy(&pixt1); - pixDestroy(&pixt2); - return na; -} - - -/*! - * \brief pixGetPerceptualDiff() - * - * \param[in] pixs1 8 bpp gray or 32 bpp rgb, or colormapped - * \param[in] pixs2 8 bpp gray or 32 bpp rgb, or colormapped - * \param[in] sampling subsampling factor; use 0 or 1 for no subsampling - * \param[in] dilation size of grayscale or color Sel; odd - * \param[in] mindiff minimum pixel difference to be counted; > 0 - * \param[out] pfract fraction of pixels with diff greater than mindiff - * \param[out] ppixdiff1 [optional] showing difference (gray or color) - * \param[out] ppixdiff2 [optional] showing pixels of sufficient diff - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This takes 2 pix and determines, using 2 input parameters:
- *           * %dilation specifies the amount of grayscale or color
- *             dilation to apply to the images, to compensate for
- *             a small amount of misregistration.  A typical number might
- *             be 5, which uses a 5x5 Sel.  Grayscale dilation expands
- *             lighter pixels into darker pixel regions.
- *           * %mindiff determines the threshold on the difference in
- *             pixel values to be counted -- two pixels are not similar
- *             if their difference in value is at least %mindiff.  For
- *             color pixels, we use the maximum component difference.
- *      (2) The pixelwise comparison is always done with the UL corners
- *          aligned.  The sizes of pix1 and pix2 need not be the same,
- *          although in practice it can be useful to scale to the same size.
- *      (3) If there is a colormap, it is removed and the result
- *          is either gray or RGB depending on the colormap.
- *      (4) Two optional diff images can be retrieved (typ. for debugging):
- *           pixdiff1: the gray or color difference
- *           pixdiff2: thresholded to 1 bpp for pixels exceeding %mindiff
- *      (5) The returned value of fract can be compared to some threshold,
- *          which is application dependent.
- *      (6) This method is in analogy to the two-sided hausdorff transform,
- *          except here it is for d > 1.  For d == 1 (see pixRankHaustest()),
- *          we verify that when one pix1 is dilated, it covers at least a
- *          given fraction of the pixels in pix2, and v.v.; in that
- *          case, the two pix are sufficiently similar.  Here, we
- *          do an analogous thing: subtract the dilated pix1 from pix2 to
- *          get a 1-sided hausdorff-like transform.  Then do it the
- *          other way.  Take the component-wise max of the two results,
- *          and threshold to get the fraction of pixels with a difference
- *          below the threshold.
- * 
- */ -l_ok -pixGetPerceptualDiff(PIX *pixs1, - PIX *pixs2, - l_int32 sampling, - l_int32 dilation, - l_int32 mindiff, - l_float32 *pfract, - PIX **ppixdiff1, - PIX **ppixdiff2) -{ -l_int32 d1, d2, w, h, count; -PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9; -PIX *pix10, *pix11; - - PROCNAME("pixGetPerceptualDiff"); - - if (ppixdiff1) *ppixdiff1 = NULL; - if (ppixdiff2) *ppixdiff2 = NULL; - if (!pfract) - return ERROR_INT("&fract not defined", procName, 1); - *pfract = 1.0; /* init to completely different */ - if ((dilation & 1) == 0) - return ERROR_INT("dilation must be odd", procName, 1); - if (!pixs1) - return ERROR_INT("pixs1 not defined", procName, 1); - if (!pixs2) - return ERROR_INT("pixs2 not defined", procName, 1); - d1 = pixGetDepth(pixs1); - d2 = pixGetDepth(pixs2); - if (!pixGetColormap(pixs1) && d1 < 8) - return ERROR_INT("pixs1 not cmapped or >=8 bpp", procName, 1); - if (!pixGetColormap(pixs2) && d2 < 8) - return ERROR_INT("pixs2 not cmapped or >=8 bpp", procName, 1); - - /* Integer downsample if requested */ - if (sampling > 1) { - pix1 = pixScaleByIntSampling(pixs1, sampling); - pix2 = pixScaleByIntSampling(pixs2, sampling); - } else { - pix1 = pixClone(pixs1); - pix2 = pixClone(pixs2); - } - - /* Remove colormaps */ - if (pixGetColormap(pix1)) { - pix3 = pixRemoveColormap(pix1, REMOVE_CMAP_BASED_ON_SRC); - d1 = pixGetDepth(pix3); - } else { - pix3 = pixClone(pix1); - } - if (pixGetColormap(pix2)) { - pix4 = pixRemoveColormap(pix2, REMOVE_CMAP_BASED_ON_SRC); - d2 = pixGetDepth(pix4); - } else { - pix4 = pixClone(pix2); - } - pixDestroy(&pix1); - pixDestroy(&pix2); - if (d1 != d2) { - pixDestroy(&pix3); - pixDestroy(&pix4); - return ERROR_INT("pix3 and pix4 depths not equal", procName, 1); - } - - /* In each direction, do a small dilation and subtract the dilated - * image from the other image to get a one-sided difference. - * Then take the max of the differences for each direction - * and clipping each component to 255 if necessary. Note that - * for RGB images, the dilations and max selection are done - * component-wise, and the conversion to grayscale also uses the - * maximum component. The resulting grayscale images are - * thresholded using %mindiff. */ - if (d1 == 8) { - pix5 = pixDilateGray(pix3, dilation, dilation); - pixCompareGray(pix4, pix5, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL, - &pix7); - pix6 = pixDilateGray(pix4, dilation, dilation); - pixCompareGray(pix3, pix6, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL, - &pix8); - pix9 = pixMinOrMax(NULL, pix7, pix8, L_CHOOSE_MAX); - pix10 = pixThresholdToBinary(pix9, mindiff); - pixInvert(pix10, pix10); - pixCountPixels(pix10, &count, NULL); - pixGetDimensions(pix10, &w, &h, NULL); - *pfract = (l_float32)count / (l_float32)(w * h); - pixDestroy(&pix5); - pixDestroy(&pix6); - pixDestroy(&pix7); - pixDestroy(&pix8); - if (ppixdiff1) - *ppixdiff1 = pix9; - else - pixDestroy(&pix9); - if (ppixdiff2) - *ppixdiff2 = pix10; - else - pixDestroy(&pix10); - } else { /* d1 == 32 */ - pix5 = pixColorMorph(pix3, L_MORPH_DILATE, dilation, dilation); - pixCompareRGB(pix4, pix5, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL, - &pix7); - pix6 = pixColorMorph(pix4, L_MORPH_DILATE, dilation, dilation); - pixCompareRGB(pix3, pix6, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL, - &pix8); - pix9 = pixMinOrMax(NULL, pix7, pix8, L_CHOOSE_MAX); - pix10 = pixConvertRGBToGrayMinMax(pix9, L_CHOOSE_MAX); - pix11 = pixThresholdToBinary(pix10, mindiff); - pixInvert(pix11, pix11); - pixCountPixels(pix11, &count, NULL); - pixGetDimensions(pix11, &w, &h, NULL); - *pfract = (l_float32)count / (l_float32)(w * h); - pixDestroy(&pix5); - pixDestroy(&pix6); - pixDestroy(&pix7); - pixDestroy(&pix8); - pixDestroy(&pix10); - if (ppixdiff1) - *ppixdiff1 = pix9; - else - pixDestroy(&pix9); - if (ppixdiff2) - *ppixdiff2 = pix11; - else - pixDestroy(&pix11); - - } - pixDestroy(&pix3); - pixDestroy(&pix4); - return 0; -} - - -/*! - * \brief pixGetPSNR() - * - * \param[in] pix1, pix2 8 or 32 bpp; no colormap - * \param[in] factor sampling factor; >= 1 - * \param[out] ppsnr power signal/noise ratio difference - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This computes the power S/N ratio, in dB, for the difference
- *          between two images.  By convention, the power S/N
- *          for a grayscale image is ('log' == log base 10,
- *          and 'ln == log base e):
- *            PSNR = 10 * log((255/MSE)^2)
- *                 = 4.3429 * ln((255/MSE)^2)
- *                 = -4.3429 * ln((MSE/255)^2)
- *          where MSE is the mean squared error.
- *          Here are some examples:
- *             MSE             PSNR
- *             ---             ----
- *             10              28.1
- *             3               38.6
- *             1               48.1
- *             0.1             68.1
- *      (2) If pix1 and pix2 have the same pixel values, the MSE = 0.0
- *          and the PSNR is infinity.  For that case, this returns
- *          PSNR = 1000, which corresponds to the very small MSE of
- *          about 10^(-48).
- * 
- */ -l_ok -pixGetPSNR(PIX *pix1, - PIX *pix2, - l_int32 factor, - l_float32 *ppsnr) -{ -l_int32 same, i, j, w, h, d, wpl1, wpl2, v1, v2, r1, g1, b1, r2, g2, b2; -l_uint32 *data1, *data2, *line1, *line2; -l_float32 mse; /* mean squared error */ - - PROCNAME("pixGetPSNR"); - - if (!ppsnr) - return ERROR_INT("&psnr not defined", procName, 1); - *ppsnr = 0.0; - if (!pix1 || !pix2) - return ERROR_INT("empty input pix", procName, 1); - if (!pixSizesEqual(pix1, pix2)) - return ERROR_INT("pix sizes unequal", procName, 1); - if (pixGetColormap(pix1)) - return ERROR_INT("pix1 has colormap", procName, 1); - if (pixGetColormap(pix2)) - return ERROR_INT("pix2 has colormap", procName, 1); - pixGetDimensions(pix1, &w, &h, &d); - if (d != 8 && d != 32) - return ERROR_INT("pix not 8 or 32 bpp", procName, 1); - if (factor < 1) - return ERROR_INT("invalid sampling factor", procName, 1); - - pixEqual(pix1, pix2, &same); - if (same) { - *ppsnr = 1000.0; /* crazy big exponent */ - return 0; - } - - data1 = pixGetData(pix1); - data2 = pixGetData(pix2); - wpl1 = pixGetWpl(pix1); - wpl2 = pixGetWpl(pix2); - mse = 0.0; - if (d == 8) { - for (i = 0; i < h; i += factor) { - line1 = data1 + i * wpl1; - line2 = data2 + i * wpl2; - for (j = 0; j < w; j += factor) { - v1 = GET_DATA_BYTE(line1, j); - v2 = GET_DATA_BYTE(line2, j); - mse += (l_float32)(v1 - v2) * (v1 - v2); - } - } - } else { /* d == 32 */ - for (i = 0; i < h; i += factor) { - line1 = data1 + i * wpl1; - line2 = data2 + i * wpl2; - for (j = 0; j < w; j += factor) { - extractRGBValues(line1[j], &r1, &g1, &b1); - extractRGBValues(line2[j], &r2, &g2, &b2); - mse += ((l_float32)(r1 - r2) * (r1 - r2) + - (g1 - g2) * (g1 - g2) + - (b1 - b2) * (b1 - b2)) / 3.0; - } - } - } - mse = mse / ((l_float32)(w) * h); - - *ppsnr = -4.3429448 * log(mse / (255 * 255)); - return 0; -} - - -/*------------------------------------------------------------------* - * Comparison of photo regions by histogram * - *------------------------------------------------------------------*/ -/*! - * \brief pixaComparePhotoRegionsByHisto() - * - * \param[in] pixa any depth; colormap OK - * \param[in] minratio requiring sizes be compatible; < 1.0 - * \param[in] textthresh threshold for text/photo; use 0 for default - * \param[in] factor subsampling; >= 1 - * \param[in] n in range {1, ... 7}. n^2 is the maximum number - * of subregions for histograms; typ. n = 3. - * \param[in] simthresh threshold for similarity; use 0 for default - * \param[out] pnai array giving similarity class indices - * \param[out] pscores [optional] score matrix as 1-D array of size N^2 - * \param[out] ppixd [optional] pix of similarity classes - * \param[in] debug 1 to output histograms; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function takes a pixa of cropped photo images and
- *          compares each one to the others for similarity.
- *          Each image is first tested to see if it is a photo that can
- *          be compared by tiled histograms.  If so, it is padded to put
- *          the centroid in the center of the image, and the histograms
- *          are generated.  The final step of comparing each histogram
- *          with all the others is very fast.
- *      (2) To make the histograms, each image is subdivided in a maximum
- *          of n^2 subimages.  The parameter %n specifies the "side" of
- *          an n x n grid of such subimages.  If the subimages have an
- *          aspect ratio larger than 2, the grid will change, again using n^2
- *          as a maximum for the number of subimages.  For example,
- *          if n == 3, but the image is 600 x 200 pixels, a 3x3 grid
- *          would have subimages of 200 x 67 pixels, which is more
- *          than 2:1, so we change to a 4x2 grid where each subimage
- *          has 150 x 100 pixels.
- *      (3) An initial filter gives %score = 0 if the ratio of widths
- *          and heights (smallest / largest) does not exceed a
- *          threshold %minratio.  If set at 1.0, both images must be
- *          exactly the same size.  A typical value for %minratio is 0.9.
- *      (4) The comparison score between two images is a value in [0.0 .. 1.0].
- *          If the comparison score >= %simthresh, the images are placed in
- *          the same similarity class.  Default value for %simthresh is 0.25.
- *      (5) An array %nai of similarity class indices for pix in the
- *          input pixa is returned.
- *      (6) There are two debugging options:
- *          * An optional 2D matrix of scores is returned as a 1D array.
- *            A visualization of this is written to a temp file.
- *          * An optional pix showing the similarity classes can be
- *            returned.  Text in each input pix is reproduced.
- *      (7) See the notes in pixComparePhotoRegionsByHisto() for details
- *          on the implementation.
- * 
- */ -l_ok -pixaComparePhotoRegionsByHisto(PIXA *pixa, - l_float32 minratio, - l_float32 textthresh, - l_int32 factor, - l_int32 n, - l_float32 simthresh, - NUMA **pnai, - l_float32 **pscores, - PIX **ppixd, - l_int32 debug) -{ -char *text; -l_int32 i, j, nim, w, h, w1, h1, w2, h2, ival, index, classid; -l_float32 score; -l_float32 *scores; -NUMA *nai, *naw, *nah; -NUMAA *naa; -NUMAA **n3a; /* array of naa */ -PIX *pix; - - PROCNAME("pixaComparePhotoRegionsByHisto"); - - if (pscores) *pscores = NULL; - if (ppixd) *ppixd = NULL; - if (!pnai) - return ERROR_INT("&na not defined", procName, 1); - *pnai = NULL; - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (minratio < 0.0 || minratio > 1.0) - return ERROR_INT("minratio not in [0.0 ... 1.0]", procName, 1); - if (textthresh <= 0.0) textthresh = 1.3; - if (factor < 1) - return ERROR_INT("subsampling factor must be >= 1", procName, 1); - if (n < 1 || n > 7) { - L_WARNING("n = %d is invalid; setting to 4\n", procName, n); - n = 4; - } - if (simthresh <= 0.0) simthresh = 0.25; - if (simthresh > 1.0) - return ERROR_INT("simthresh invalid; should be near 0.25", procName, 1); - - /* Prepare the histograms */ - nim = pixaGetCount(pixa); - if ((n3a = (NUMAA **)LEPT_CALLOC(nim, sizeof(NUMAA *))) == NULL) - return ERROR_INT("calloc fail for n3a", procName, 1); - naw = numaCreate(0); - nah = numaCreate(0); - for (i = 0; i < nim; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - text = pixGetText(pix); - pixSetResolution(pix, 150, 150); - index = (debug) ? i : 0; - pixGenPhotoHistos(pix, NULL, factor, textthresh, n, - &naa, &w, &h, index); - n3a[i] = naa; - numaAddNumber(naw, w); - numaAddNumber(nah, h); - if (naa) - lept_stderr("Image %s is photo\n", text); - else - lept_stderr("Image %s is NOT photo\n", text); - pixDestroy(&pix); - } - - /* Do the comparisons. We are making a set of classes, where - * all similar images are placed in the same class. There are - * 'nim' input images. The classes are labeled by 'classid' (all - * similar images get the same 'classid' value), and 'nai' maps - * the classid of the image in the input array to the classid - * of the similarity class. */ - if ((scores = - (l_float32 *)LEPT_CALLOC((size_t)nim * nim, sizeof(l_float32))) - == NULL) { - L_ERROR("calloc fail for scores\n", procName); - goto cleanup; - } - nai = numaMakeConstant(-1, nim); /* classid array */ - for (i = 0, classid = 0; i < nim; i++) { - scores[nim * i + i] = 1.0; - numaGetIValue(nai, i, &ival); - if (ival != -1) /* already set */ - continue; - numaSetValue(nai, i, classid); - if (n3a[i] == NULL) { /* not a photo */ - classid++; - continue; - } - numaGetIValue(naw, i, &w1); - numaGetIValue(nah, i, &h1); - for (j = i + 1; j < nim; j++) { - numaGetIValue(nai, j, &ival); - if (ival != -1) /* already set */ - continue; - if (n3a[j] == NULL) /* not a photo */ - continue; - numaGetIValue(naw, j, &w2); - numaGetIValue(nah, j, &h2); - compareTilesByHisto(n3a[i], n3a[j], minratio, w1, h1, w2, h2, - &score, NULL); - scores[nim * i + j] = score; - scores[nim * j + i] = score; /* the score array is symmetric */ -/* lept_stderr("score = %5.3f\n", score); */ - if (score > simthresh) { - numaSetValue(nai, j, classid); - lept_stderr( - "Setting %d similar to %d, in class %d; score %5.3f\n", - j, i, classid, score); - } - } - classid++; - } - *pnai = nai; - - /* Debug: optionally save and display the score array. - * All images that are photos are represented by a point on - * the diagonal. Other images in the same similarity class - * are on the same horizontal raster line to the right. - * The array has been symmetrized, so images in the same - * same similarity class also appear on the same column below. */ - if (pscores) { - l_int32 wpl, fact; - l_uint32 *line, *data; - PIX *pix2, *pix3; - pix2 = pixCreate(nim, nim, 8); - data = pixGetData(pix2); - wpl = pixGetWpl(pix2); - for (i = 0; i < nim; i++) { - line = data + i * wpl; - for (j = 0; j < nim; j++) { - SET_DATA_BYTE(line, j, - L_MIN(255, 4.0 * 255 * scores[nim * i + j])); - } - } - fact = L_MAX(2, 1000 / nim); - pix3 = pixExpandReplicate(pix2, fact); - lept_stderr("Writing to /tmp/lept/comp/scorearray.png\n"); - lept_mkdir("lept/comp"); - pixWrite("/tmp/lept/comp/scorearray.png", pix3, IFF_PNG); - pixDestroy(&pix2); - pixDestroy(&pix3); - *pscores = scores; - } else { - LEPT_FREE(scores); - } - - /* Debug: optionally display and save the image comparisons. - * Image similarity classes are displayed by column; similar - * images are displayed in the same column. */ - if (ppixd) - *ppixd = pixaDisplayTiledByIndex(pixa, nai, 200, 20, 2, 6, 0x0000ff00); - -cleanup: - numaDestroy(&naw); - numaDestroy(&nah); - for (i = 0; i < nim; i++) - numaaDestroy(&n3a[i]); - LEPT_FREE(n3a); - return 0; -} - - -/*! - * \brief pixComparePhotoRegionsByHisto() - * - * \param[in] pix1, pix2 any depth; colormap OK - * \param[in] box1, box2 [optional] photo regions from each; can be null - * \param[in] minratio requiring sizes be compatible; < 1.0 - * \param[in] factor subsampling factor; >= 1 - * \param[in] n in range {1, ... 7}. n^2 is the maximum number - * of subregions for histograms; typ. n = 3. - * \param[out] pscore similarity score of histograms - * \param[in] debugflag 1 for debug output; 0 for no debugging - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function compares two grayscale photo regions.  If a
- *          box is given, the region is clipped; otherwise assume
- *          the entire images are photo regions.  This is done with a
- *          set of not more than n^2 spatially aligned histograms, which are
- *          aligned using the centroid of the inverse image.
- *      (2) The parameter %n specifies the "side" of an n x n grid
- *          of subimages.  If the subimages have an aspect ratio larger
- *          than 2, the grid will change, using n^2 as a maximum for
- *          the number of subimages.  For example, if n == 3, but the
- *          image is 600 x 200 pixels, a 3x3 grid would have subimages
- *          of 200 x 67 pixels, which is more than 2:1, so we change
- *          to a 4x2 grid where each subimage has 150 x 100 pixels.
- *      (3) An initial filter gives %score = 0 if the ratio of widths
- *          and heights (smallest / largest) does not exceed a
- *          threshold %minratio.  This must be between 0.5 and 1.0.
- *          If set at 1.0, both images must be exactly the same size.
- *          A typical value for %minratio is 0.9.
- *      (4) Because this function should not be used on text or
- *          line graphics, which can give false positive results
- *          (i.e., high scores for different images), filter the images
- *          using pixGenPhotoHistos(), which returns tiled histograms
- *          only if an image is not text and comparison is expected
- *          to work with histograms.  If either image fails the test,
- *          the comparison returns a score of 0.0.
- *      (5) The white value counts in the histograms are removed; they
- *          are typically pixels that were padded to achieve alignment.
- *      (6) For an efficient representation of the histogram, normalize
- *          using a multiplicative factor so that the number in the
- *          maximum bucket is 255.  It then takes 256 bytes to store.
- *      (7) When comparing the histograms of two regions, use the
- *          Earth Mover distance (EMD), with the histograms normalized
- *          so that the sum over bins is the same.  Further normalize
- *          by dividing by 255, so that the result is in [0.0 ... 1.0].
- *      (8) Get a similarity score S = 1.0 - k * D, where
- *            k is a constant, say in the range 5-10
- *            D = normalized EMD
- *          and for multiple tiles, take the Min(S) to be the final score.
- *          Using aligned tiles gives protection against accidental
- *          similarity of the overall grayscale histograms.
- *          A small number of aligned tiles works well.
- *      (9) With debug on, you get a pdf that shows, for each tile,
- *          the images, histograms and score.
- * 
- */ -l_ok -pixComparePhotoRegionsByHisto(PIX *pix1, - PIX *pix2, - BOX *box1, - BOX *box2, - l_float32 minratio, - l_int32 factor, - l_int32 n, - l_float32 *pscore, - l_int32 debugflag) -{ -l_int32 w1, h1, w2, h2, w1c, h1c, w2c, h2c, debugindex; -l_float32 wratio, hratio; -NUMAA *naa1, *naa2; -PIX *pix3, *pix4; -PIXA *pixa; - - PROCNAME("pixComparePhotoRegionsByHisto"); - - if (!pscore) - return ERROR_INT("&score not defined", procName, 1); - *pscore = 0.0; - if (!pix1 || !pix2) - return ERROR_INT("pix1 and pix2 not both defined", procName, 1); - if (minratio < 0.5 || minratio > 1.0) - return ERROR_INT("minratio not in [0.5 ... 1.0]", procName, 1); - if (factor < 1) - return ERROR_INT("subsampling factor must be >= 1", procName, 1); - if (n < 1 || n > 7) { - L_WARNING("n = %d is invalid; setting to 4\n", procName, n); - n = 4; - } - - debugindex = 0; - if (debugflag) { - lept_mkdir("lept/comp"); - debugindex = 666; /* arbitrary number used for naming output */ - } - - /* Initial filter by size */ - if (box1) - boxGetGeometry(box1, NULL, NULL, &w1, &h1); - else - pixGetDimensions(pix1, &w1, &h1, NULL); - if (box2) - boxGetGeometry(box2, NULL, NULL, &w2, &h2); - else - pixGetDimensions(pix1, &w2, &h2, NULL); - wratio = (w1 < w2) ? (l_float32)w1 / (l_float32)w2 : - (l_float32)w2 / (l_float32)w1; - hratio = (h1 < h2) ? (l_float32)h1 / (l_float32)h2 : - (l_float32)h2 / (l_float32)h1; - if (wratio < minratio || hratio < minratio) - return 0; - - /* Initial crop, if necessary, and make histos */ - if (box1) - pix3 = pixClipRectangle(pix1, box1, NULL); - else - pix3 = pixClone(pix1); - pixGenPhotoHistos(pix3, NULL, factor, 0, n, &naa1, &w1c, &h1c, debugindex); - pixDestroy(&pix3); - if (!naa1) return 0; - if (box2) - pix4 = pixClipRectangle(pix2, box2, NULL); - else - pix4 = pixClone(pix2); - pixGenPhotoHistos(pix4, NULL, factor, 0, n, &naa2, &w2c, &h2c, debugindex); - pixDestroy(&pix4); - if (!naa2) return 0; - - /* Compare histograms */ - pixa = (debugflag) ? pixaCreate(0) : NULL; - compareTilesByHisto(naa1, naa2, minratio, w1c, h1c, w2c, h2c, pscore, pixa); - pixaDestroy(&pixa); - return 0; -} - - -/*! - * \brief pixGenPhotoHistos() - * - * \param[in] pixs depth > 1 bpp; colormap OK - * \param[in] box [optional] region to be selected; can be null - * \param[in] factor subsampling; >= 1 - * \param[in] thresh threshold for photo/text; use 0 for default - * \param[in] n in range {1, ... 7}. n^2 is the maximum number - * of subregions for histograms; typ. n = 3. - * \param[out] pnaa nx * ny 256-entry gray histograms - * \param[out] pw width of image used to make histograms - * \param[out] ph height of image used to make histograms - * \param[in] debugindex 0 for no debugging; positive integer otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This crops and converts to 8 bpp if necessary.  It adds a
- *          minimal white boundary such that the centroid of the
- *          photo-inverted image is in the center. This allows
- *          automatic alignment with histograms of other image regions.
- *      (2) The parameter %n specifies the "side" of the n x n grid
- *          of subimages.  If the subimages have an aspect ratio larger
- *          than 2, the grid will change, using n^2 as a maximum for
- *          the number of subimages.  For example, if n == 3, but the
- *          image is 600 x 200 pixels, a 3x3 grid would have subimages
- *          of 200 x 67 pixels, which is more than 2:1, so we change
- *          to a 4x2 grid where each subimage has 150 x 100 pixels.
- *      (3) The white value in the histogram is removed, because of
- *          the padding.
- *      (4) Use 0 for conservative default (1.3) for thresh.
- *      (5) For an efficient representation of the histogram, normalize
- *          using a multiplicative factor so that the number in the
- *          maximum bucket is 255.  It then takes 256 bytes to store.
- *      (6) With %debugindex > 0, this makes a pdf that shows, for each tile,
- *          the images and histograms.
- * 
- */ -l_ok -pixGenPhotoHistos(PIX *pixs, - BOX *box, - l_int32 factor, - l_float32 thresh, - l_int32 n, - NUMAA **pnaa, - l_int32 *pw, - l_int32 *ph, - l_int32 debugindex) -{ -char buf[64]; -NUMAA *naa; -PIX *pix1, *pix2, *pix3, *pixm; -PIXA *pixa; - - PROCNAME("pixGenPhotoHistos"); - - if (pnaa) *pnaa = NULL; - if (pw) *pw = 0; - if (ph) *ph = 0; - if (!pnaa) - return ERROR_INT("&naa not defined", procName, 1); - if (!pw || !ph) - return ERROR_INT("&w and &h not both defined", procName, 1); - if (!pixs || pixGetDepth(pixs) == 1) - return ERROR_INT("pixs not defined or 1 bpp", procName, 1); - if (factor < 1) - return ERROR_INT("subsampling factor must be >= 1", procName, 1); - if (thresh <= 0.0) thresh = 1.3; /* default */ - if (n < 1 || n > 7) { - L_WARNING("n = %d is invalid; setting to 4\n", procName, n); - n = 4; - } - - pixa = NULL; - if (debugindex > 0) { - pixa = pixaCreate(0); - lept_mkdir("lept/comp"); - } - - /* Initial crop, if necessary */ - if (box) - pix1 = pixClipRectangle(pixs, box, NULL); - else - pix1 = pixClone(pixs); - - /* Convert to 8 bpp and pad to center the centroid */ - pix2 = pixConvertTo8(pix1, FALSE); - pix3 = pixPadToCenterCentroid(pix2, factor); - - /* Set to 255 all pixels above 230. Do this so that light gray - * pixels do not enter into the comparison. */ - pixm = pixThresholdToBinary(pix3, 230); - pixInvert(pixm, pixm); - pixSetMaskedGeneral(pix3, pixm, 255, 0, 0); - pixDestroy(&pixm); - - if (debugindex > 0) { - PIX *pix4, *pix5, *pix6, *pix7, *pix8; - PIXA *pixa2; - pix4 = pixConvertTo32(pix2); - pix5 = pixConvertTo32(pix3); - pix6 = pixScaleToSize(pix4, 400, 0); - pix7 = pixScaleToSize(pix5, 400, 0); - pixa2 = pixaCreate(2); - pixaAddPix(pixa2, pix6, L_INSERT); - pixaAddPix(pixa2, pix7, L_INSERT); - pix8 = pixaDisplayTiledInRows(pixa2, 32, 1000, 1.0, 0, 50, 3); - pixaAddPix(pixa, pix8, L_INSERT); - pixDestroy(&pix4); - pixDestroy(&pix5); - pixaDestroy(&pixa2); - } - pixDestroy(&pix1); - pixDestroy(&pix2); - - /* Test if this is a photoimage */ - pixDecideIfPhotoImage(pix3, factor, thresh, n, &naa, pixa); - if (naa) { - *pnaa = naa; - *pw = pixGetWidth(pix3); - *ph = pixGetHeight(pix3); - } - - if (pixa) { - snprintf(buf, sizeof(buf), "/tmp/lept/comp/tiledhistos.%d.pdf", - debugindex); - lept_stderr("Writing to %s\n", buf); - pixaConvertToPdf(pixa, 300, 1.0, L_FLATE_ENCODE, 0, NULL, buf); - pixaDestroy(&pixa); - } - - pixDestroy(&pix3); - return 0; -} - - -/*! - * \brief pixPadToCenterCentroid() - * - * \param[in] pixs any depth, colormap OK - * \param[in] factor subsampling for centroid; >= 1 - * \return pixd padded with white pixels, or NULL on error. - * - *
- * Notes:
- *      (1) This add minimum white padding to an 8 bpp pix, such that
- *          the centroid of the photometric inverse is in the center of
- *          the resulting image.  Thus in computing the centroid,
- *          black pixels have weight 255, and white pixels have weight 0.
- * 
- */ -PIX * -pixPadToCenterCentroid(PIX *pixs, - l_int32 factor) - -{ -l_float32 cx, cy; -l_int32 xs, ys, delx, dely, icx, icy, ws, hs, wd, hd; -PIX *pix1, *pixd; - - PROCNAME("pixPadToCenterCentroid"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (factor < 1) - return (PIX *)ERROR_PTR("invalid sampling factor", procName, NULL); - - pix1 = pixConvertTo8(pixs, FALSE); - pixCentroid8(pix1, factor, &cx, &cy); - icx = (l_int32)(cx + 0.5); - icy = (l_int32)(cy + 0.5); - pixGetDimensions(pix1, &ws, &hs, NULL); - delx = ws - 2 * icx; - dely = hs - 2 * icy; - xs = L_MAX(0, delx); - ys = L_MAX(0, dely); - wd = 2 * L_MAX(icx, ws - icx); - hd = 2 * L_MAX(icy, hs - icy); - pixd = pixCreate(wd, hd, 8); - pixSetAll(pixd); /* to white */ - pixCopyResolution(pixd, pixs); - pixRasterop(pixd, xs, ys, ws, hs, PIX_SRC, pix1, 0, 0); - pixDestroy(&pix1); - return pixd; -} - - -/*! - * \brief pixCentroid8() - * - * \param[in] pixs 8 bpp - * \param[in] factor subsampling factor; >= 1 - * \param[out] pcx x value of centroid - * \param[out] pcy y value of centroid - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This first does a photometric inversion (black = 255, white = 0).
- *          It then finds the centroid of the result.  The inversion is
- *          done because white is usually background, so the centroid
- *          is computed based on the "foreground" gray pixels, and the
- *          darker the pixel, the more weight it is given.
- * 
- */ -l_ok -pixCentroid8(PIX *pixs, - l_int32 factor, - l_float32 *pcx, - l_float32 *pcy) -{ -l_int32 i, j, w, h, wpl, val; -l_float32 sumx, sumy, sumv; -l_uint32 *data, *line; -PIX *pix1; - - PROCNAME("pixCentroid8"); - - if (pcx) *pcx = 0.0; - if (pcy) *pcy = 0.0; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs undefined or not 8 bpp", procName, 1); - if (factor < 1) - return ERROR_INT("subsampling factor must be >= 1", procName, 1); - if (!pcx || !pcy) - return ERROR_INT("&cx and &cy not both defined", procName, 1); - - pix1 = pixInvert(NULL, pixs); - pixGetDimensions(pix1, &w, &h, NULL); - data = pixGetData(pix1); - wpl = pixGetWpl(pix1); - sumx = sumy = sumv = 0.0; - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(line, j); - sumx += val * j; - sumy += val * i; - sumv += val; - } - } - pixDestroy(&pix1); - - if (sumv == 0) { - L_INFO("input image is white\n", procName); - *pcx = (l_float32)(w) / 2; - *pcy = (l_float32)(h) / 2; - } else { - *pcx = sumx / sumv; - *pcy = sumy / sumv; - } - - return 0; -} - - -/*! - * \brief pixDecideIfPhotoImage() - * - * \param[in] pix 8 bpp, centroid in center - * \param[in] factor subsampling for histograms; >= 1 - * \param[in] thresh threshold for photo/text; use 0 for default - * \param[in] n in range {1, ... 7}. n^2 is the maximum number - * of subregions for histograms; typ. n = 3. - * \param[out] pnaa array of normalized histograms - * \param[in] pixadebug [optional] use only for debug output - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The input image must be 8 bpp (no colormap), and padded with
- *          white pixels so the centroid of photo-inverted pixels is at
- *          the center of the image.
- *      (2) The parameter %n specifies the "side" of the n x n grid
- *          of subimages.  If the subimages have an aspect ratio larger
- *          than 2, the grid will change, using n^2 as a maximum for
- *          the number of subimages.  For example, if n == 3, but the
- *          image is 600 x 200 pixels, a 3x3 grid would have subimages
- *          of 200 x 67 pixels, which is more than 2:1, so we change
- *          to a 4x2 grid where each subimage has 150 x 100 pixels.
- *      (3) If the pix is not almost certainly a photoimage, the returned
- *          histograms (%naa) are null.
- *      (4) If histograms are generated, the white (255) count is set
- *          to 0.  This removes all pixels values above 230, including
- *          white padding from the centroid matching operation, from
- *          consideration.  The resulting histograms are then normalized
- *          so the maximum count is 255.
- *      (5) Default for %thresh is 1.3; this seems sufficiently conservative.
- *      (6) Use %pixadebug == NULL unless debug output is requested.
- * 
- */ -l_ok -pixDecideIfPhotoImage(PIX *pix, - l_int32 factor, - l_float32 thresh, - l_int32 n, - NUMAA **pnaa, - PIXA *pixadebug) -{ -char buf[64]; -l_int32 i, w, h, nx, ny, ngrids, istext, isphoto; -l_float32 maxval, sum1, sum2, ratio; -L_BMF *bmf; -NUMA *na1, *na2, *na3, *narv; -NUMAA *naa; -PIX *pix1; -PIXA *pixa1, *pixa2, *pixa3; - - PROCNAME("pixDecideIfPhotoImage"); - - if (!pnaa) - return ERROR_INT("&naa not defined", procName, 1); - *pnaa = NULL; - if (!pix || pixGetDepth(pix) != 8 || pixGetColormap(pix)) - return ERROR_INT("pix undefined or invalid", procName, 1); - if (n < 1 || n > 7) { - L_WARNING("n = %d is invalid; setting to 4\n", procName, n); - n = 4; - } - if (thresh <= 0.0) thresh = 1.3; /* default */ - - /* Look for text lines */ - pixDecideIfText(pix, NULL, &istext, pixadebug); - if (istext) { - L_INFO("Image is text\n", procName); - return 0; - } - - /* Determine grid from n */ - pixGetDimensions(pix, &w, &h, NULL); - if (w == 0 || h == 0) - return ERROR_INT("invalid pix dimension", procName, 1); - findHistoGridDimensions(n, w, h, &nx, &ny, 1); - - /* Evaluate histograms in each tile */ - pixa1 = pixaSplitPix(pix, nx, ny, 0, 0); - ngrids = nx * ny; - bmf = (pixadebug) ? bmfCreate(NULL, 6) : NULL; - naa = numaaCreate(ngrids); - if (pixadebug) { - lept_rmdir("lept/compplot"); - lept_mkdir("lept/compplot"); - } - for (i = 0; i < ngrids; i++) { - pix1 = pixaGetPix(pixa1, i, L_CLONE); - - /* Get histograms, set white count to 0, normalize max to 255 */ - na1 = pixGetGrayHistogram(pix1, factor); - numaSetValue(na1, 255, 0); - na2 = numaWindowedMean(na1, 5); /* do some smoothing */ - numaGetMax(na2, &maxval, NULL); - na3 = numaTransform(na2, 0, 255.0 / maxval); - if (pixadebug) { - snprintf(buf, sizeof(buf), "/tmp/lept/compplot/plot.%d", i); - gplotSimple1(na3, GPLOT_PNG, buf, "Histos"); - } - - numaaAddNuma(naa, na3, L_INSERT); - numaDestroy(&na1); - numaDestroy(&na2); - pixDestroy(&pix1); - } - if (pixadebug) { - pix1 = pixaDisplayTiledInColumns(pixa1, nx, 1.0, 30, 2); - pixaAddPix(pixadebug, pix1, L_INSERT); - pixa2 = pixaReadFiles("/tmp/lept/compplot", ".png"); - pixa3 = pixaScale(pixa2, 0.4, 0.4); - pix1 = pixaDisplayTiledInColumns(pixa3, nx, 1.0, 30, 2); - pixaAddPix(pixadebug, pix1, L_INSERT); - pixaDestroy(&pixa2); - pixaDestroy(&pixa3); - } - - /* Compute the standard deviation between these histos to decide - * if the image is photo or something more like line art, - * which does not support good comparison by tiled histograms. */ - grayInterHistogramStats(naa, 5, NULL, NULL, NULL, &narv); - - /* For photos, the root variance has a larger weight of - * values in the range [50 ... 150] compared to [200 ... 230], - * than text or line art. For the latter, most of the variance - * between tiles is in the lightest parts of the image, well - * above 150. */ - numaGetSumOnInterval(narv, 50, 150, &sum1); - numaGetSumOnInterval(narv, 200, 230, &sum2); - if (sum2 == 0.0) { /* shouldn't happen */ - ratio = 0.001; /* anything very small for debug output */ - isphoto = 0; /* be conservative */ - } else { - ratio = sum1 / sum2; - isphoto = (ratio > thresh) ? 1 : 0; - } - if (pixadebug) { - if (isphoto) - L_INFO("ratio %f > %f; isphoto is true\n", - procName, ratio, thresh); - else - L_INFO("ratio %f < %f; isphoto is false\n", - procName, ratio, thresh); - } - if (isphoto) - *pnaa = naa; - else - numaaDestroy(&naa); - bmfDestroy(&bmf); - numaDestroy(&narv); - pixaDestroy(&pixa1); - return 0; -} - - -/*! - * \brief findHistoGridDimensions() - * - * \param[in] n max number of grid elements is n^2; typ. n = 3 - * \param[in] w width of image to be subdivided - * \param[in] h height of image to be subdivided - * \param[out] pnx number of grid elements in x direction - * \param[out] pny number of grid elements in y direction - * \param[in] debug 1 for debug output to stderr - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This determines the number of subdivisions to be used on
- *          the image in each direction.  A histogram will be built
- *          for each subimage.
- *      (2) The parameter %n specifies the "side" of the n x n grid
- *          of subimages.  If the subimages have an aspect ratio larger
- *          than 2, the grid will change, using n^2 as a maximum for
- *          the number of subimages.  For example, if n == 3, but the
- *          image is 600 x 200 pixels, a 3x3 grid would have subimages
- *          of 200 x 67 pixels, which is more than 2:1, so we change
- *          to a 4x2 grid where each subimage has 150 x 100 pixels.
- * 
- */ -static l_ok -findHistoGridDimensions(l_int32 n, - l_int32 w, - l_int32 h, - l_int32 *pnx, - l_int32 *pny, - l_int32 debug) -{ -l_int32 nx, ny, max; -l_float32 ratio; - - ratio = (l_float32)w / (l_float32)h; - max = n * n; - nx = ny = n; - while (nx > 1 && ny > 1) { - if (ratio > 2.0) { /* reduce ny */ - ny--; - nx = max / ny; - if (debug) - lept_stderr("nx = %d, ny = %d, ratio w/h = %4.2f\n", - nx, ny, ratio); - } else if (ratio < 0.5) { /* reduce nx */ - nx--; - ny = max / nx; - if (debug) - lept_stderr("nx = %d, ny = %d, ratio w/h = %4.2f\n", - nx, ny, ratio); - } else { /* we're ok */ - if (debug) - lept_stderr("nx = %d, ny = %d, ratio w/h = %4.2f\n", - nx, ny, ratio); - break; - } - ratio = (l_float32)(ny * w) / (l_float32)(nx * h); - } - *pnx = nx; - *pny = ny; - return 0; -} - - -/*! - * \brief compareTilesByHisto() - * - * \param[in] naa1, naa2 each is a set of 256 entry histograms - * \param[in] minratio requiring image sizes be compatible; < 1.0 - * \param[in] w1, h1, w2, h2 image sizes from which histograms were made - * \param[out] pscore similarity score of histograms - * \param[in] pixadebug [optional] use only for debug output - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) naa1 and naa2 must be generated using pixGenPhotoHistos(),
- *          using the same tile sizes.
- *      (2) The image dimensions must be similar.  The score is 0.0
- *          if the ratio of widths and heights (smallest / largest)
- *          exceeds a threshold %minratio, which must be between
- *          0.5 and 1.0.  If set at 1.0, both images must be exactly
- *          the same size.  A typical value for %minratio is 0.9.
- *      (3) The input pixadebug is null unless debug output is requested.
- * 
- */ -l_ok -compareTilesByHisto(NUMAA *naa1, - NUMAA *naa2, - l_float32 minratio, - l_int32 w1, - l_int32 h1, - l_int32 w2, - l_int32 h2, - l_float32 *pscore, - PIXA *pixadebug) -{ -char buf1[128], buf2[128]; -l_int32 i, n; -l_float32 wratio, hratio, score, minscore, dist; -L_BMF *bmf; -NUMA *na1, *na2, *nadist, *nascore; - - PROCNAME("compareTilesByHisto"); - - if (!pscore) - return ERROR_INT("&score not defined", procName, 1); - *pscore = 0.0; - if (!naa1 || !naa2) - return ERROR_INT("naa1 and naa2 not both defined", procName, 1); - - /* Filter for different sizes */ - wratio = (w1 < w2) ? (l_float32)w1 / (l_float32)w2 : - (l_float32)w2 / (l_float32)w1; - hratio = (h1 < h2) ? (l_float32)h1 / (l_float32)h2 : - (l_float32)h2 / (l_float32)h1; - if (wratio < minratio || hratio < minratio) { - if (pixadebug) - L_INFO("Sizes differ: wratio = %f, hratio = %f\n", - procName, wratio, hratio); - return 0; - } - n = numaaGetCount(naa1); - if (n != numaaGetCount(naa2)) { /* due to differing w/h ratio */ - L_INFO("naa1 and naa2 sizes are different\n", procName); - return 0; - } - - if (pixadebug) { - lept_rmdir("lept/comptile"); - lept_mkdir("lept/comptile"); - } - - - /* Evaluate histograms in each tile. Remove white before - * computing EMD, because there are may be a lot of white - * pixels due to padding, and we don't want to include them. - * This also makes the debug histo plots more informative. */ - minscore = 1.0; - nadist = numaCreate(n); - nascore = numaCreate(n); - bmf = (pixadebug) ? bmfCreate(NULL, 6) : NULL; - for (i = 0; i < n; i++) { - na1 = numaaGetNuma(naa1, i, L_CLONE); - na2 = numaaGetNuma(naa2, i, L_CLONE); - numaSetValue(na1, 255, 0.0); - numaSetValue(na2, 255, 0.0); - - /* To compare histograms, use the normalized earthmover distance. - * Further normalize to get the EM distance as a fraction of the - * maximum distance in the histogram (255). Finally, scale this - * up by 10.0, and subtract from 1.0 to get a similarity score. */ - numaEarthMoverDistance(na1, na2, &dist); - score = L_MAX(0.0, 1.0 - 10.0 * (dist / 255.)); - numaAddNumber(nadist, dist); - numaAddNumber(nascore, score); - minscore = L_MIN(minscore, score); - if (pixadebug) { - snprintf(buf1, sizeof(buf1), "/tmp/lept/comptile/plot.%d", i); - gplotSimple2(na1, na2, GPLOT_PNG, buf1, "Histos"); - } - numaDestroy(&na1); - numaDestroy(&na2); - } - *pscore = minscore; - - if (pixadebug) { - for (i = 0; i < n; i++) { - PIX *pix1, *pix2; - snprintf(buf1, sizeof(buf1), "/tmp/lept/comptile/plot.%d.png", i); - pix1 = pixRead(buf1); - numaGetFValue(nadist, i, &dist); - numaGetFValue(nascore, i, &score); - snprintf(buf2, sizeof(buf2), - "Image %d\ndist = %5.3f, score = %5.3f", i, dist, score); - pix2 = pixAddTextlines(pix1, bmf, buf2, 0x0000ff00, L_ADD_BELOW); - pixaAddPix(pixadebug, pix2, L_INSERT); - pixDestroy(&pix1); - } - lept_stderr("Writing to /tmp/lept/comptile/comparegray.pdf\n"); - pixaConvertToPdf(pixadebug, 300, 1.0, L_FLATE_ENCODE, 0, NULL, - "/tmp/lept/comptile/comparegray.pdf"); - numaWriteDebug("/tmp/lept/comptile/scores.na", nascore); - numaWriteDebug("/tmp/lept/comptile/dists.na", nadist); - } - - bmfDestroy(&bmf); - numaDestroy(&nadist); - numaDestroy(&nascore); - return 0; -} - - -/*! - * \brief pixCompareGrayByHisto() - * - * \param[in] pix1, pix2 any depth; colormap OK - * \param[in] box1, box2 [optional] region selected from each; can be null - * \param[in] minratio requiring sizes be compatible; < 1.0 - * \param[in] maxgray max value to keep in histo; >= 200, 255 to keep all - * \param[in] factor subsampling factor; >= 1 - * \param[in] n in range {1, ... 7}. n^2 is the maximum number - * of subregions for histograms; typ. n = 3. - * \param[out] pscore similarity score of histograms - * \param[in] debugflag 1 for debug output; 0 for no debugging - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function compares two grayscale photo regions.  It can
- *          do it with a single histogram from each region, or with a
- *          set of spatially aligned histograms.  For both cases,
- *          align the regions using the centroid of the inverse image,
- *          and crop to the smallest of the two.
- *      (2) The parameter %n specifies the "side" of an n x n grid
- *          of subimages.  If the subimages have an aspect ratio larger
- *          than 2, the grid will change, using n^2 as a maximum for
- *          the number of subimages.  For example, if n == 3, but the
- *          image is 600 x 200 pixels, a 3x3 grid would have subimages
- *          of 200 x 67 pixels, which is more than 2:1, so we change
- *          to a 4x2 grid where each subimage has 150 x 100 pixels.
- *      (3) An initial filter gives %score = 0 if the ratio of widths
- *          and heights (smallest / largest) does not exceed a
- *          threshold %minratio.  This must be between 0.5 and 1.0.
- *          If set at 1.0, both images must be exactly the same size.
- *          A typical value for %minratio is 0.9.
- *      (4) The lightest values in the histogram can be disregarded.
- *          Set %maxgray to the lightest value to be kept.  For example,
- *          to eliminate white (255), set %maxgray = 254.  %maxgray must
- *          be >= 200.
- *      (5) For an efficient representation of the histogram, normalize
- *          using a multiplicative factor so that the number in the
- *          maximum bucket is 255.  It then takes 256 bytes to store.
- *      (6) When comparing the histograms of two regions:
- *          ~ Use %maxgray = 254 to ignore the white pixels, the number
- *            of which may be sensitive to the crop region if the pixels
- *            outside that region are white.
- *          ~ Use the Earth Mover distance (EMD), with the histograms
- *            normalized so that the sum over bins is the same.
- *            Further normalize by dividing by 255, so that the result
- *            is in [0.0 ... 1.0].
- *      (7) Get a similarity score S = 1.0 - k * D, where
- *            k is a constant, say in the range 5-10
- *            D = normalized EMD
- *          and for multiple tiles, take the Min(S) to be the final score.
- *          Using aligned tiles gives protection against accidental
- *          similarity of the overall grayscale histograms.
- *          A small number of aligned tiles works well.
- *      (8) With debug on, you get a pdf that shows, for each tile,
- *          the images, histograms and score.
- *      (9) When to use:
- *          (a) Because this function should not be used on text or
- *              line graphics, which can give false positive results
- *              (i.e., high scores for different images), the input
- *              images should be filtered.
- *          (b) To filter, first use pixDecideIfText().  If that function
- *              says the image is text, do not use it.  If the function
- *              says it is not text, it still may be line graphics, and
- *              in that case, use:
- *                 pixGetGrayHistogramTiled()
- *                 grayInterHistogramStats()
- *              to determine whether it is photo or line graphics.
- * 
- */ -l_ok -pixCompareGrayByHisto(PIX *pix1, - PIX *pix2, - BOX *box1, - BOX *box2, - l_float32 minratio, - l_int32 maxgray, - l_int32 factor, - l_int32 n, - l_float32 *pscore, - l_int32 debugflag) -{ -l_int32 w1, h1, w2, h2; -l_float32 wratio, hratio; -BOX *box3, *box4; -PIX *pix3, *pix4, *pix5, *pix6, *pix7, *pix8; -PIXA *pixa; - - PROCNAME("pixCompareGrayByHisto"); - - if (!pscore) - return ERROR_INT("&score not defined", procName, 1); - *pscore = 0.0; - if (!pix1 || !pix2) - return ERROR_INT("pix1 and pix2 not both defined", procName, 1); - if (minratio < 0.5 || minratio > 1.0) - return ERROR_INT("minratio not in [0.5 ... 1.0]", procName, 1); - if (maxgray < 200) - return ERROR_INT("invalid maxgray; should be >= 200", procName, 1); - maxgray = L_MIN(255, maxgray); - if (factor < 1) - return ERROR_INT("subsampling factor must be >= 1", procName, 1); - if (n < 1 || n > 7) { - L_WARNING("n = %d is invalid; setting to 4\n", procName, n); - n = 4; - } - - if (debugflag) - lept_mkdir("lept/comp"); - - /* Initial filter by size */ - if (box1) - boxGetGeometry(box1, NULL, NULL, &w1, &h1); - else - pixGetDimensions(pix1, &w1, &h1, NULL); - if (box2) - boxGetGeometry(box2, NULL, NULL, &w2, &h2); - else - pixGetDimensions(pix1, &w2, &h2, NULL); - wratio = (w1 < w2) ? (l_float32)w1 / (l_float32)w2 : - (l_float32)w2 / (l_float32)w1; - hratio = (h1 < h2) ? (l_float32)h1 / (l_float32)h2 : - (l_float32)h2 / (l_float32)h1; - if (wratio < minratio || hratio < minratio) - return 0; - - /* Initial crop, if necessary */ - if (box1) - pix3 = pixClipRectangle(pix1, box1, NULL); - else - pix3 = pixClone(pix1); - if (box2) - pix4 = pixClipRectangle(pix2, box2, NULL); - else - pix4 = pixClone(pix2); - - /* Convert to 8 bpp, align centroids and do maximal crop */ - pix5 = pixConvertTo8(pix3, FALSE); - pix6 = pixConvertTo8(pix4, FALSE); - pixCropAlignedToCentroid(pix5, pix6, factor, &box3, &box4); - pix7 = pixClipRectangle(pix5, box3, NULL); - pix8 = pixClipRectangle(pix6, box4, NULL); - pixa = (debugflag) ? pixaCreate(0) : NULL; - if (debugflag) { - PIX *pix9, *pix10, *pix11, *pix12, *pix13; - PIXA *pixa2; - pix9 = pixConvertTo32(pix5); - pix10 = pixConvertTo32(pix6); - pixRenderBoxArb(pix9, box3, 2, 255, 0, 0); - pixRenderBoxArb(pix10, box4, 2, 255, 0, 0); - pix11 = pixScaleToSize(pix9, 400, 0); - pix12 = pixScaleToSize(pix10, 400, 0); - pixa2 = pixaCreate(2); - pixaAddPix(pixa2, pix11, L_INSERT); - pixaAddPix(pixa2, pix12, L_INSERT); - pix13 = pixaDisplayTiledInRows(pixa2, 32, 1000, 1.0, 0, 50, 0); - pixaAddPix(pixa, pix13, L_INSERT); - pixDestroy(&pix9); - pixDestroy(&pix10); - pixaDestroy(&pixa2); - } - pixDestroy(&pix3); - pixDestroy(&pix4); - pixDestroy(&pix5); - pixDestroy(&pix6); - boxDestroy(&box3); - boxDestroy(&box4); - - /* Tile and compare histograms */ - pixCompareTilesByHisto(pix7, pix8, maxgray, factor, n, pscore, pixa); - pixaDestroy(&pixa); - pixDestroy(&pix7); - pixDestroy(&pix8); - return 0; -} - - -/*! - * \brief pixCompareTilesByHisto() - * - * \param[in] pix1, pix2 8 bpp - * \param[in] maxgray max value to keep in histo; 255 to keep all - * \param[in] factor subsampling factor; >= 1 - * \param[in] n see pixCompareGrayByHisto() - * \param[out] pscore similarity score of histograms - * \param[in] pixadebug [optional] use only for debug output - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This static function is only called from pixCompareGrayByHisto().
- *          The input images have been converted to 8 bpp if necessary,
- *          aligned and cropped.
- *      (2) The input pixadebug is null unless debug output is requested.
- *      (3) See pixCompareGrayByHisto() for details.
- * 
- */ -static l_ok -pixCompareTilesByHisto(PIX *pix1, - PIX *pix2, - l_int32 maxgray, - l_int32 factor, - l_int32 n, - l_float32 *pscore, - PIXA *pixadebug) -{ -char buf[64]; -l_int32 w, h, i, j, nx, ny, ngr; -l_float32 score, minscore, maxval1, maxval2, dist; -L_BMF *bmf; -NUMA *na1, *na2, *na3, *na4, *na5, *na6, *na7; -PIX *pix3, *pix4; -PIXA *pixa1, *pixa2; - - PROCNAME("pixCompareTilesByHisto"); - - if (!pscore) - return ERROR_INT("&score not defined", procName, 1); - *pscore = 0.0; - if (!pix1 || !pix2) - return ERROR_INT("pix1 and pix2 not both defined", procName, 1); - - /* Determine grid from n */ - pixGetDimensions(pix1, &w, &h, NULL); - findHistoGridDimensions(n, w, h, &nx, &ny, 1); - ngr = nx * ny; - - /* Evaluate histograms in each tile */ - pixa1 = pixaSplitPix(pix1, nx, ny, 0, 0); - pixa2 = pixaSplitPix(pix2, nx, ny, 0, 0); - na7 = (pixadebug) ? numaCreate(ngr) : NULL; - bmf = (pixadebug) ? bmfCreate(NULL, 6) : NULL; - minscore = 1.0; - for (i = 0; i < ngr; i++) { - pix3 = pixaGetPix(pixa1, i, L_CLONE); - pix4 = pixaGetPix(pixa2, i, L_CLONE); - - /* Get histograms, set white count to 0, normalize max to 255 */ - na1 = pixGetGrayHistogram(pix3, factor); - na2 = pixGetGrayHistogram(pix4, factor); - if (maxgray < 255) { - for (j = maxgray + 1; j <= 255; j++) { - numaSetValue(na1, j, 0); - numaSetValue(na2, j, 0); - } - } - na3 = numaWindowedMean(na1, 5); - na4 = numaWindowedMean(na2, 5); - numaGetMax(na3, &maxval1, NULL); - numaGetMax(na4, &maxval2, NULL); - na5 = numaTransform(na3, 0, 255.0 / maxval1); - na6 = numaTransform(na4, 0, 255.0 / maxval2); - if (pixadebug) { - gplotSimple2(na5, na6, GPLOT_PNG, "/tmp/lept/comp/plot1", "Histos"); - } - - /* To compare histograms, use the normalized earthmover distance. - * Further normalize to get the EM distance as a fraction of the - * maximum distance in the histogram (255). Finally, scale this - * up by 10.0, and subtract from 1.0 to get a similarity score. */ - numaEarthMoverDistance(na5, na6, &dist); - score = L_MAX(0.0, 1.0 - 8.0 * (dist / 255.)); - if (pixadebug) numaAddNumber(na7, score); - minscore = L_MIN(minscore, score); - if (pixadebug) { - PIX *pix5, *pix6, *pix7, *pix8, *pix9, *pix10; - PIXA *pixa3; - l_int32 w, h, wscale; - pixa3 = pixaCreate(3); - pixGetDimensions(pix3, &w, &h, NULL); - wscale = (w > h) ? 700 : 400; - pix5 = pixScaleToSize(pix3, wscale, 0); - pix6 = pixScaleToSize(pix4, wscale, 0); - pixaAddPix(pixa3, pix5, L_INSERT); - pixaAddPix(pixa3, pix6, L_INSERT); - pix7 = pixRead("/tmp/lept/comp/plot1.png"); - pix8 = pixScaleToSize(pix7, 700, 0); - snprintf(buf, sizeof(buf), "%5.3f", score); - pix9 = pixAddTextlines(pix8, bmf, buf, 0x0000ff00, L_ADD_RIGHT); - pixaAddPix(pixa3, pix9, L_INSERT); - pix10 = pixaDisplayTiledInRows(pixa3, 32, 1000, 1.0, 0, 50, 0); - pixaAddPix(pixadebug, pix10, L_INSERT); - pixDestroy(&pix7); - pixDestroy(&pix8); - pixaDestroy(&pixa3); - } - numaDestroy(&na1); - numaDestroy(&na2); - numaDestroy(&na3); - numaDestroy(&na4); - numaDestroy(&na5); - numaDestroy(&na6); - pixDestroy(&pix3); - pixDestroy(&pix4); - } - *pscore = minscore; - - if (pixadebug) { - pixaConvertToPdf(pixadebug, 300, 1.0, L_FLATE_ENCODE, 0, NULL, - "/tmp/lept/comp/comparegray.pdf"); - numaWriteDebug("/tmp/lept/comp/tilescores.na", na7); - } - - bmfDestroy(&bmf); - numaDestroy(&na7); - pixaDestroy(&pixa1); - pixaDestroy(&pixa2); - return 0; -} - - -/*! - * \brief pixCropAlignedToCentroid() - * - * \param[in] pix1, pix2 any depth; colormap OK - * \param[in] factor subsampling; >= 1 - * \param[out] pbox1 crop box for pix1 - * \param[out] pbox2 crop box for pix2 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This finds the maximum crop boxes for two 8 bpp images when
- *          their centroids of their photometric inverses are aligned.
- *          Black pixels have weight 255; white pixels have weight 0.
- * 
- */ -l_ok -pixCropAlignedToCentroid(PIX *pix1, - PIX *pix2, - l_int32 factor, - BOX **pbox1, - BOX **pbox2) -{ -l_float32 cx1, cy1, cx2, cy2; -l_int32 w1, h1, w2, h2, icx1, icy1, icx2, icy2; -l_int32 xm, xm1, xm2, xp, xp1, xp2, ym, ym1, ym2, yp, yp1, yp2; -PIX *pix3, *pix4; - - PROCNAME("pixCropAlignedToCentroid"); - - if (pbox1) *pbox1 = NULL; - if (pbox2) *pbox2 = NULL; - if (!pix1 || !pix2) - return ERROR_INT("pix1 and pix2 not both defined", procName, 1); - if (factor < 1) - return ERROR_INT("subsampling factor must be >= 1", procName, 1); - if (!pbox1 || !pbox2) - return ERROR_INT("&box1 and &box2 not both defined", procName, 1); - - pix3 = pixConvertTo8(pix1, FALSE); - pix4 = pixConvertTo8(pix2, FALSE); - pixCentroid8(pix3, factor, &cx1, &cy1); - pixCentroid8(pix4, factor, &cx2, &cy2); - pixGetDimensions(pix3, &w1, &h1, NULL); - pixGetDimensions(pix4, &w2, &h2, NULL); - pixDestroy(&pix3); - pixDestroy(&pix4); - - icx1 = (l_int32)(cx1 + 0.5); - icy1 = (l_int32)(cy1 + 0.5); - icx2 = (l_int32)(cx2 + 0.5); - icy2 = (l_int32)(cy2 + 0.5); - xm = L_MIN(icx1, icx2); - xm1 = icx1 - xm; - xm2 = icx2 - xm; - xp = L_MIN(w1 - icx1, w2 - icx2); /* one pixel beyond to the right */ - xp1 = icx1 + xp; - xp2 = icx2 + xp; - ym = L_MIN(icy1, icy2); - ym1 = icy1 - ym; - ym2 = icy2 - ym; - yp = L_MIN(h1 - icy1, h2 - icy2); /* one pixel below the bottom */ - yp1 = icy1 + yp; - yp2 = icy2 + yp; - *pbox1 = boxCreate(xm1, ym1, xp1 - xm1, yp1 - ym1); - *pbox2 = boxCreate(xm2, ym2, xp2 - xm2, yp2 - ym2); - return 0; -} - - -/*! - * \brief l_compressGrayHistograms() - * - * \param[in] naa set of 256-entry histograms - * \param[in] w, h size of image - * \param[out] psize size of byte array - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This first writes w and h to the byte array as 4 byte ints.
- *      (2) Then it normalizes each histogram to a max value of 255,
- *          and saves each value as a byte.  If there are
- *          N histograms, the output bytearray has 8 + 256 * N bytes.
- *      (3) Further compression of the array with zlib yields only about
- *          a 25% decrease in size, so we don't bother.  If size reduction
- *          were important, a lossy transform using a 1-dimensional DCT
- *          would be effective, because we don't care about the fine
- *          details of these histograms.
- * 
- */ -l_uint8 * -l_compressGrayHistograms(NUMAA *naa, - l_int32 w, - l_int32 h, - size_t *psize) -{ -l_uint8 *bytea; -l_int32 i, j, n, nn, ival; -l_float32 maxval; -NUMA *na1, *na2; - - PROCNAME("l_compressGrayHistograms"); - - if (!psize) - return (l_uint8 *)ERROR_PTR("&size not defined", procName, NULL); - *psize = 0; - if (!naa) - return (l_uint8 *)ERROR_PTR("naa not defined", procName, NULL); - n = numaaGetCount(naa); - for (i = 0; i < n; i++) { - nn = numaaGetNumaCount(naa, i); - if (nn != 256) { - L_ERROR("%d numbers in numa[%d]\n", procName, nn, i); - return NULL; - } - } - - if ((bytea = (l_uint8 *)LEPT_CALLOC(8 + 256 * n, sizeof(l_uint8))) == NULL) - return (l_uint8 *)ERROR_PTR("bytea not made", procName, NULL); - *psize = 8 + 256 * n; - l_setDataFourBytes(bytea, 0, w); - l_setDataFourBytes(bytea, 1, h); - for (i = 0; i < n; i++) { - na1 = numaaGetNuma(naa, i, L_COPY); - numaGetMax(na1, &maxval, NULL); - na2 = numaTransform(na1, 0, 255.0 / maxval); - for (j = 0; j < 256; j++) { - numaGetIValue(na2, j, &ival); - bytea[8 + 256 * i + j] = ival; - } - numaDestroy(&na1); - numaDestroy(&na2); - } - - return bytea; -} - - -/*! - * \brief l_uncompressGrayHistograms() - * - * \param[in] bytea byte array of size 8 + 256 * N, N an integer - * \param[in] size size of byte array - * \param[out] pw width of the image that generated the histograms - * \param[out] ph height of the image - * \return numaa representing N histograms, each with 256 bins, - * or NULL on error. - * - *
- * Notes:
- *      (1) The first 8 bytes are read as two 32-bit ints.
- *      (2) Then this constructs a numaa representing some number of
- *          gray histograms that are normalized such that the max value
- *          in each histogram is 255.  The data is stored as a byte
- *          array, with 256 bytes holding the data for each histogram.
- *          Each gray histogram was computed from a tile of a grayscale image.
- * 
- */ -NUMAA * -l_uncompressGrayHistograms(l_uint8 *bytea, - size_t size, - l_int32 *pw, - l_int32 *ph) -{ -l_int32 i, j, n; -NUMA *na; -NUMAA *naa; - - PROCNAME("l_uncompressGrayHistograms"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (!pw || !ph) - return (NUMAA *)ERROR_PTR("&w and &h not both defined", procName, NULL); - if (!bytea) - return (NUMAA *)ERROR_PTR("bytea not defined", procName, NULL); - n = (size - 8) / 256; - if ((size - 8) % 256 != 0) - return (NUMAA *)ERROR_PTR("bytea size is invalid", procName, NULL); - - *pw = l_getDataFourBytes(bytea, 0); - *ph = l_getDataFourBytes(bytea, 1); - naa = numaaCreate(n); - for (i = 0; i < n; i++) { - na = numaCreate(256); - for (j = 0; j < 256; j++) - numaAddNumber(na, bytea[8 + 256 * i + j]); - numaaAddNuma(naa, na, L_INSERT); - } - - return naa; -} - - -/*------------------------------------------------------------------* - * Translated images at the same resolution * - *------------------------------------------------------------------*/ -/*! - * \brief pixCompareWithTranslation() - * - * \param[in] pix1, pix2 any depth; colormap OK - * \param[in] thresh threshold for converting to 1 bpp - * \param[out] pdelx x translation on pix2 to align with pix1 - * \param[out] pdely y translation on pix2 to align with pix1 - * \param[out] pscore correlation score at best alignment - * \param[in] debugflag 1 for debug output; 0 for no debugging - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This does a coarse-to-fine search for best translational
- *          alignment of two images, measured by a scoring function
- *          that is the correlation between the fg pixels.
- *      (2) The threshold is used if the images aren't 1 bpp.
- *      (3) With debug on, you get a pdf that shows, as a grayscale
- *          image, the score as a function of shift from the initial
- *          estimate, for each of the four levels.  The shift is 0 at
- *          the center of the image.
- *      (4) With debug on, you also get a pdf that shows the
- *          difference at the best alignment between the two images,
- *          at each of the four levels.  The red and green pixels
- *          show locations where one image has a fg pixel and the
- *          other doesn't.  The black pixels are where both images
- *          have fg pixels, and white pixels are where neither image
- *          has fg pixels.
- * 
- */ -l_ok -pixCompareWithTranslation(PIX *pix1, - PIX *pix2, - l_int32 thresh, - l_int32 *pdelx, - l_int32 *pdely, - l_float32 *pscore, - l_int32 debugflag) -{ -l_uint8 *subtab; -l_int32 i, level, area1, area2, delx, dely; -l_int32 etransx, etransy, maxshift, dbint; -l_int32 *stab, *ctab; -l_float32 cx1, cx2, cy1, cy2, score; -PIX *pixb1, *pixb2, *pixt1, *pixt2, *pixt3, *pixt4; -PIXA *pixa1, *pixa2, *pixadb; - - PROCNAME("pixCompareWithTranslation"); - - if (pdelx) *pdelx = 0; - if (pdely) *pdely = 0; - if (pscore) *pscore = 0.0; - if (!pdelx || !pdely) - return ERROR_INT("&delx and &dely not defined", procName, 1); - if (!pscore) - return ERROR_INT("&score not defined", procName, 1); - if (!pix1) - return ERROR_INT("pix1 not defined", procName, 1); - if (!pix2) - return ERROR_INT("pix2 not defined", procName, 1); - - /* Make tables */ - subtab = makeSubsampleTab2x(); - stab = makePixelSumTab8(); - ctab = makePixelCentroidTab8(); - - /* Binarize each image */ - pixb1 = pixConvertTo1(pix1, thresh); - pixb2 = pixConvertTo1(pix2, thresh); - - /* Make a cascade of 2x reduced images for each, thresholding - * with level 2 (neutral), down to 8x reduction */ - pixa1 = pixaCreate(4); - pixa2 = pixaCreate(4); - if (debugflag) - pixadb = pixaCreate(4); - pixaAddPix(pixa1, pixb1, L_INSERT); - pixaAddPix(pixa2, pixb2, L_INSERT); - for (i = 0; i < 3; i++) { - pixt1 = pixReduceRankBinary2(pixb1, 2, subtab); - pixt2 = pixReduceRankBinary2(pixb2, 2, subtab); - pixaAddPix(pixa1, pixt1, L_INSERT); - pixaAddPix(pixa2, pixt2, L_INSERT); - pixb1 = pixt1; - pixb2 = pixt2; - } - - /* At the lowest level, use the centroids with a maxshift of 6 - * to search for the best alignment. Then at higher levels, - * use the result from the level below as the initial approximation - * for the alignment, and search with a maxshift of 2. */ - for (level = 3; level >= 0; level--) { - pixt1 = pixaGetPix(pixa1, level, L_CLONE); - pixt2 = pixaGetPix(pixa2, level, L_CLONE); - pixCountPixels(pixt1, &area1, stab); - pixCountPixels(pixt2, &area2, stab); - if (level == 3) { - pixCentroid(pixt1, ctab, stab, &cx1, &cy1); - pixCentroid(pixt2, ctab, stab, &cx2, &cy2); - etransx = lept_roundftoi(cx1 - cx2); - etransy = lept_roundftoi(cy1 - cy2); - maxshift = 6; - } else { - etransx = 2 * delx; - etransy = 2 * dely; - maxshift = 2; - } - dbint = (debugflag) ? level + 1 : 0; - pixBestCorrelation(pixt1, pixt2, area1, area2, etransx, etransy, - maxshift, stab, &delx, &dely, &score, dbint); - if (debugflag) { - lept_stderr("Level %d: delx = %d, dely = %d, score = %7.4f\n", - level, delx, dely, score); - pixRasteropIP(pixt2, delx, dely, L_BRING_IN_WHITE); - pixt3 = pixDisplayDiffBinary(pixt1, pixt2); - pixt4 = pixExpandReplicate(pixt3, 8 / (1 << (3 - level))); - pixaAddPix(pixadb, pixt4, L_INSERT); - pixDestroy(&pixt3); - } - pixDestroy(&pixt1); - pixDestroy(&pixt2); - } - - if (debugflag) { - pixaConvertToPdf(pixadb, 300, 1.0, L_FLATE_ENCODE, 0, NULL, - "/tmp/lept/comp/compare.pdf"); - convertFilesToPdf("/tmp/lept/comp", "correl_", 30, 1.0, L_FLATE_ENCODE, - 0, "Correlation scores at levels 1 through 5", - "/tmp/lept/comp/correl.pdf"); - pixaDestroy(&pixadb); - } - - *pdelx = delx; - *pdely = dely; - *pscore = score; - pixaDestroy(&pixa1); - pixaDestroy(&pixa2); - LEPT_FREE(subtab); - LEPT_FREE(stab); - LEPT_FREE(ctab); - return 0; -} - - -/*! - * \brief pixBestCorrelation() - * - * \param[in] pix1 1 bpp - * \param[in] pix2 1 bpp - * \param[in] area1 number of on pixels in pix1 - * \param[in] area2 number of on pixels in pix2 - * \param[in] etransx estimated x translation of pix2 to align with pix1 - * \param[in] etransy estimated y translation of pix2 to align with pix1 - * \param[in] maxshift max x and y shift of pix2, around the estimated - * alignment location, relative to pix1 - * \param[in] tab8 [optional] sum tab for ON pixels in byte; can be NULL - * \param[out] pdelx [optional] best x shift of pix2 relative to pix1 - * \param[out] pdely [optional] best y shift of pix2 relative to pix1 - * \param[out] pscore [optional] maximum score found; can be NULL - * \param[in] debugflag <= 0 to skip; positive to generate output. - * The integer is used to label the debug image. - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This maximizes the correlation score between two 1 bpp images,
- *          by starting with an estimate of the alignment
- *          (%etransx, %etransy) and computing the correlation around this.
- *          It optionally returns the shift (%delx, %dely) that maximizes
- *          the correlation score when pix2 is shifted by this amount
- *          relative to pix1.
- *      (2) Get the centroids of pix1 and pix2, using pixCentroid(),
- *          to compute (%etransx, %etransy).  Get the areas using
- *          pixCountPixels().
- *      (3) The centroid of pix2 is shifted with respect to the centroid
- *          of pix1 by all values between -maxshiftx and maxshiftx,
- *          and likewise for the y shifts.  Therefore, the number of
- *          correlations computed is:
- *               (2 * maxshiftx + 1) * (2 * maxshifty + 1)
- *          Consequently, if pix1 and pix2 are large, you should do this
- *          in a coarse-to-fine sequence.  See the use of this function
- *          in pixCompareWithTranslation().
- * 
- */ -l_ok -pixBestCorrelation(PIX *pix1, - PIX *pix2, - l_int32 area1, - l_int32 area2, - l_int32 etransx, - l_int32 etransy, - l_int32 maxshift, - l_int32 *tab8, - l_int32 *pdelx, - l_int32 *pdely, - l_float32 *pscore, - l_int32 debugflag) -{ -l_int32 shiftx, shifty, delx, dely; -l_int32 *tab; -l_float32 maxscore, score; -FPIX *fpix; -PIX *pix3, *pix4; - - PROCNAME("pixBestCorrelation"); - - if (pdelx) *pdelx = 0; - if (pdely) *pdely = 0; - if (pscore) *pscore = 0.0; - if (!pix1 || pixGetDepth(pix1) != 1) - return ERROR_INT("pix1 not defined or not 1 bpp", procName, 1); - if (!pix2 || pixGetDepth(pix2) != 1) - return ERROR_INT("pix2 not defined or not 1 bpp", procName, 1); - if (!area1 || !area2) - return ERROR_INT("areas must be > 0", procName, 1); - - if (debugflag > 0) - fpix = fpixCreate(2 * maxshift + 1, 2 * maxshift + 1); - - if (!tab8) - tab = makePixelSumTab8(); - else - tab = tab8; - - /* Search over a set of {shiftx, shifty} for the max */ - maxscore = 0; - delx = etransx; - dely = etransy; - for (shifty = -maxshift; shifty <= maxshift; shifty++) { - for (shiftx = -maxshift; shiftx <= maxshift; shiftx++) { - pixCorrelationScoreShifted(pix1, pix2, area1, area2, - etransx + shiftx, - etransy + shifty, tab, &score); - if (debugflag > 0) { - fpixSetPixel(fpix, maxshift + shiftx, maxshift + shifty, - 1000.0 * score); -/* lept_stderr("(sx, sy) = (%d, %d): score = %6.4f\n", - shiftx, shifty, score); */ - } - if (score > maxscore) { - maxscore = score; - delx = etransx + shiftx; - dely = etransy + shifty; - } - } - } - - if (debugflag > 0) { - lept_mkdir("lept/comp"); - char buf[128]; - pix3 = fpixDisplayMaxDynamicRange(fpix); - pix4 = pixExpandReplicate(pix3, 20); - snprintf(buf, sizeof(buf), "/tmp/lept/comp/correl_%d.png", - debugflag); - pixWrite(buf, pix4, IFF_PNG); - pixDestroy(&pix3); - pixDestroy(&pix4); - fpixDestroy(&fpix); - } - - if (pdelx) *pdelx = delx; - if (pdely) *pdely = dely; - if (pscore) *pscore = maxscore; - if (!tab8) LEPT_FREE(tab); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/conncomp.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/conncomp.c deleted file mode 100644 index 82cc0adf..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/conncomp.c +++ /dev/null @@ -1,1246 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file conncomp.c - *
- *
- *    Connected component counting and extraction, using Heckbert's
- *    stack-based filling algorithm.
- *
- *      4- and 8-connected components: counts, bounding boxes and images
- *
- *      Top-level calls:
- *            BOXA     *pixConnComp()
- *            BOXA     *pixConnCompPixa()
- *            BOXA     *pixConnCompBB()
- *            l_int32   pixCountConnComp()
- *
- *      Identify the next c.c. to be erased:
- *            l_int32   nextOnPixelInRaster()
- *    static  l_int32   nextOnPixelInRasterLow()
- *
- *      Erase the c.c., saving the b.b.:
- *            BOX      *pixSeedfillBB()
- *            BOX      *pixSeedfill4BB()
- *            BOX      *pixSeedfill8BB()
- *
- *      Just erase the c.c.:
- *            l_int32   pixSeedfill()
- *            l_int32   pixSeedfill4()
- *            l_int32   pixSeedfill8()
- *
- *      Static stack helper functions for single raster line seedfill:
- *            static void    pushFillsegBB()
- *            static void    pushFillseg()
- *            static void    popFillseg()
- *
- *  The basic method in pixConnCompBB() is very simple.  We scan the
- *  image in raster order, looking for the next ON pixel.  When it
- *  is found, we erase it and every pixel of the 4- or 8-connected
- *  component to which it belongs, using Heckbert's seedfill
- *  algorithm.  As pixels are erased, we keep track of the
- *  minimum rectangle that encloses all erased pixels; after
- *  the connected component has been erased, we save its
- *  bounding box in an array of boxes.  When all pixels in the
- *  image have been erased, we have an array that describes every
- *  4- or 8-connected component in terms of its bounding box.
- *
- *  pixConnCompPixa() is a slight variation on pixConnCompBB(),
- *  where we additionally save an array of images (in a Pixa)
- *  of each of the 4- or 8-connected components.  This is done trivially
- *  by maintaining two temporary images.  We erase a component from one,
- *  and use the bounding box to extract the pixels within the b.b.
- *  from each of the two images.  An XOR between these subimages
- *  gives the erased component.  Then we erase the component from the
- *  second image using the XOR again, with the extracted component
- *  placed on the second image at the location of the bounding box.
- *  Rasterop does all the work.  At the end, we have an array
- *  of the 4- or 8-connected components, as well as an array of the
- *  bounding boxes that describe where they came from in the original image.
- *
- *  If you just want the number of connected components, pixCountConnComp()
- *  is a bit faster than pixConnCompBB(), because it doesn't have to
- *  keep track of the bounding rectangles for each c.c.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/*! - * \brief The struct FillSeg is used by the Heckbert seedfill algorithm to - * hold information about image segments that are waiting to be - * investigated. We use two Stacks, one to hold the FillSegs in use, - * and an auxiliary Stack as a reservoir to hold FillSegs for re-use. - */ -struct FillSeg -{ - l_int32 xleft; /*!< left edge of run */ - l_int32 xright; /*!< right edge of run */ - l_int32 y; /*!< run y */ - l_int32 dy; /*!< parent segment direction: 1 above, -1 below) */ -}; -typedef struct FillSeg FILLSEG; - -static l_int32 nextOnPixelInRasterLow(l_uint32 *data, l_int32 w, l_int32 h, - l_int32 wpl, l_int32 xstart, - l_int32 ystart, l_int32 *px, l_int32 *py); - - /* Static accessors for FillSegs on a stack */ -static void pushFillsegBB(L_STACK *stack, l_int32 xleft, l_int32 xright, - l_int32 y, l_int32 dy, l_int32 ymax, - l_int32 *pminx, l_int32 *pmaxx, - l_int32 *pminy, l_int32 *pmaxy); -static void pushFillseg(L_STACK *stack, l_int32 xleft, l_int32 xright, - l_int32 y, l_int32 dy, l_int32 ymax); -static void popFillseg(L_STACK *stack, l_int32 *pxleft, l_int32 *pxright, - l_int32 *py, l_int32 *pdy); - - -#ifndef NO_CONSOLE_IO -#define DEBUG 0 -#endif /* ~NO_CONSOLE_IO */ - - -/*-----------------------------------------------------------------------* - * Bounding boxes of 4 Connected Components * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixConnComp() - * - * \param[in] pixs 1 bpp - * \param[out] ppixa [optional] pixa of each c.c. - * \param[in] connectivity 4 or 8 - * \return boxa, or NULL on error - * - *
- * Notes:
- *      (1) This is the top-level call for getting bounding boxes or
- *          a pixa of the components, and it can be used instead
- *          of either pixConnCompBB() or pixConnCompPixa(), rsp.
- * 
- */ -BOXA * -pixConnComp(PIX *pixs, - PIXA **ppixa, - l_int32 connectivity) -{ - - PROCNAME("pixConnComp"); - - if (ppixa) *ppixa = NULL; - if (!pixs || pixGetDepth(pixs) != 1) - return (BOXA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (BOXA *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - - if (!ppixa) - return pixConnCompBB(pixs, connectivity); - else - return pixConnCompPixa(pixs, ppixa, connectivity); -} - - -/*! - * \brief pixConnCompPixa() - * - * \param[in] pixs 1 bpp - * \param[out] ppixa pixa of each c.c. - * \param[in] connectivity 4 or 8 - * \return boxa, or NULL on error - * - *
- * Notes:
- *      (1) This finds bounding boxes of 4- or 8-connected components
- *          in a binary image, and saves images of each c.c
- *          in a pixa array.
- *      (2) It sets up 2 temporary pix, and for each c.c. that is
- *          located in raster order, it erases the c.c. from one pix,
- *          then uses the b.b. to extract the c.c. from the two pix using
- *          an XOR, and finally erases the c.c. from the second pix.
- *      (3) A clone of the returned boxa (where all boxes in the array
- *          are clones) is inserted into the pixa.
- *      (4) If the input is valid, this always returns a boxa and a pixa.
- *          If pixs is empty, the boxa and pixa will be empty.
- * 
- */ -BOXA * -pixConnCompPixa(PIX *pixs, - PIXA **ppixa, - l_int32 connectivity) -{ -l_int32 h, iszero; -l_int32 x, y, xstart, ystart; -PIX *pix1, *pix2, *pix3, *pix4; -PIXA *pixa; -BOX *box; -BOXA *boxa; -L_STACK *stack, *auxstack; - - PROCNAME("pixConnCompPixa"); - - if (!ppixa) - return (BOXA *)ERROR_PTR("&pixa not defined", procName, NULL); - *ppixa = NULL; - if (!pixs || pixGetDepth(pixs) != 1) - return (BOXA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (BOXA *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - - pix1 = pix2 = pix3 = pix4 = NULL; - stack = NULL; - pixa = pixaCreate(0); - boxa = NULL; - *ppixa = pixa; - pixZero(pixs, &iszero); - if (iszero) - return boxaCreate(1); /* return empty boxa and empty pixa */ - - pixSetPadBits(pixs, 0); - pix1 = pixCopy(NULL, pixs); - pix2 = pixCopy(NULL, pixs); - if (!pix1 || !pix2) { - L_ERROR("pix1 or pix2 not made\n", procName); - pixaDestroy(ppixa); - goto cleanup; - } - - h = pixGetHeight(pixs); - if ((stack = lstackCreate(h)) == NULL) { - L_ERROR("stack not made\n", procName); - pixaDestroy(ppixa); - goto cleanup; - } - auxstack = lstackCreate(0); - stack->auxstack = auxstack; - boxa = boxaCreate(0); - - xstart = 0; - ystart = 0; - while (1) { - if (!nextOnPixelInRaster(pix1, xstart, ystart, &x, &y)) - break; - - if ((box = pixSeedfillBB(pix1, stack, x, y, connectivity)) == NULL) { - boxaDestroy(&boxa); - pixaDestroy(ppixa); - L_ERROR("box not made\n", procName); - goto cleanup; - } - boxaAddBox(boxa, box, L_INSERT); - - /* Save the c.c. and remove from pix2 as well */ - pix3 = pixClipRectangle(pix1, box, NULL); - pix4 = pixClipRectangle(pix2, box, NULL); - pixXor(pix3, pix3, pix4); - pixRasterop(pix2, box->x, box->y, box->w, box->h, PIX_SRC ^ PIX_DST, - pix3, 0, 0); - pixaAddPix(pixa, pix3, L_INSERT); - pixDestroy(&pix4); - - xstart = x; - ystart = y; - } - -#if DEBUG - pixCountPixels(pix1, &iszero, NULL); - lept_stderr("Number of remaining pixels = %d\n", iszero); - lept_mkdir("lept/cc"); - pixWriteDebug("/tmp/lept/cc/remain.png", pix1, IFF_PNG); -#endif /* DEBUG */ - - /* Remove old boxa of pixa and replace with a copy */ - boxaDestroy(&pixa->boxa); - pixa->boxa = boxaCopy(boxa, L_COPY); - *ppixa = pixa; - - /* Cleanup, freeing the fillsegs on each stack */ -cleanup: - lstackDestroy(&stack, TRUE); - pixDestroy(&pix1); - pixDestroy(&pix2); - return boxa; -} - - -/*! - * \brief pixConnCompBB() - * - * \param[in] pixs 1 bpp - * \param[in] connectivity 4 or 8 - * \return boxa, or NULL on error - * - *
- * Notes:
- *     (1) Finds bounding boxes of 4- or 8-connected components
- *         in a binary image.
- *     (2) This works on a copy of the input pix.  The c.c. are located
- *         in raster order and erased one at a time.  In the process,
- *         the b.b. is computed and saved.
- * 
- */ -BOXA * -pixConnCompBB(PIX *pixs, - l_int32 connectivity) -{ -l_int32 h, iszero; -l_int32 x, y, xstart, ystart; -PIX *pix1; -BOX *box; -BOXA *boxa; -L_STACK *stack, *auxstack; - - PROCNAME("pixConnCompBB"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (BOXA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (BOXA *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - - boxa = NULL; - pix1 = NULL; - stack = NULL; - pixZero(pixs, &iszero); - if (iszero) - return boxaCreate(1); /* return empty boxa */ - - pixSetPadBits(pixs, 0); - if ((pix1 = pixCopy(NULL, pixs)) == NULL) - return (BOXA *)ERROR_PTR("pix1 not made", procName, NULL); - - h = pixGetHeight(pixs); - if ((stack = lstackCreate(h)) == NULL) { - L_ERROR("stack not made\n", procName); - goto cleanup; - } - auxstack = lstackCreate(0); - stack->auxstack = auxstack; - boxa = boxaCreate(0); - - xstart = 0; - ystart = 0; - while (1) { - if (!nextOnPixelInRaster(pix1, xstart, ystart, &x, &y)) - break; - - if ((box = pixSeedfillBB(pix1, stack, x, y, connectivity)) == NULL) { - L_ERROR("box not made\n", procName); - boxaDestroy(&boxa); - goto cleanup; - } - boxaAddBox(boxa, box, L_INSERT); - - xstart = x; - ystart = y; - } - -#if DEBUG - pixCountPixels(pix1, &iszero, NULL); - lept_stderr("Number of remaining pixels = %d\n", iszero); - lept_mkdir("lept/cc"); - pixWriteDebug("/tmp/lept/cc/remain.png", pix1, IFF_PNG); -#endif /* DEBUG */ - - /* Cleanup, freeing the fillsegs on each stack */ -cleanup: - lstackDestroy(&stack, TRUE); - pixDestroy(&pix1); - return boxa; -} - - -/*! - * \brief pixCountConnComp() - * - * \param[in] pixs 1 bpp - * \param[in] connectivity 4 or 8 - * \param[out] pcount - * \return 0 if OK, 1 on error - * - * Notes: - * (1 This is the top-level call for getting the number of - * 4- or 8-connected components in a 1 bpp image. - * 2 It works on a copy of the input pix. The c.c. are located - * in raster order and erased one at a time. - */ -l_ok -pixCountConnComp(PIX *pixs, - l_int32 connectivity, - l_int32 *pcount) -{ -l_int32 h, iszero; -l_int32 x, y, xstart, ystart; -PIX *pix1; -L_STACK *stack, *auxstack; - - PROCNAME("pixCountConnComp"); - - if (!pcount) - return ERROR_INT("&count not defined", procName, 1); - *pcount = 0; /* initialize the count to 0 */ - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (connectivity != 4 && connectivity != 8) - return ERROR_INT("connectivity not 4 or 8", procName, 1); - - stack = NULL; - pixZero(pixs, &iszero); - if (iszero) - return 0; - - pixSetPadBits(pixs, 0); - if ((pix1 = pixCopy(NULL, pixs)) == NULL) - return ERROR_INT("pix1 not made", procName, 1); - h = pixGetHeight(pixs); - if ((stack = lstackCreate(h)) == NULL) { - pixDestroy(&pix1); - return ERROR_INT("stack not made\n", procName, 1); - } - auxstack = lstackCreate(0); - stack->auxstack = auxstack; - - xstart = 0; - ystart = 0; - while (1) { - if (!nextOnPixelInRaster(pix1, xstart, ystart, &x, &y)) - break; - - pixSeedfill(pix1, stack, x, y, connectivity); - (*pcount)++; - xstart = x; - ystart = y; - } - - /* Cleanup, freeing the fillsegs on each stack */ - lstackDestroy(&stack, TRUE); - pixDestroy(&pix1); - return 0; -} - - -/*! - * \brief nextOnPixelInRaster() - * - * \param[in] pixs 1 bpp - * \param[in] xstart, ystart starting point for search - * \param[out] px, py coord value of next ON pixel - * \return 1 if a pixel is found; 0 otherwise or on error - */ -l_int32 -nextOnPixelInRaster(PIX *pixs, - l_int32 xstart, - l_int32 ystart, - l_int32 *px, - l_int32 *py) -{ -l_int32 w, h, d, wpl; -l_uint32 *data; - - PROCNAME("nextOnPixelInRaster"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 0); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1) - return ERROR_INT("pixs not 1 bpp", procName, 0); - - wpl = pixGetWpl(pixs); - data = pixGetData(pixs); - return nextOnPixelInRasterLow(data, w, h, wpl, xstart, ystart, px, py); -} - - -/*! - * \brief nextOnPixelInRasterLow() - * - * \param[in] data pix data - * \param[in] w, h width and height - * \param[in] wpl words per line - * \param[in] xstart, ystart starting point for search - * \param[out] px, py coord value of next ON pixel - * \return 1 if a pixel is found; 0 otherwise or on error - */ -static l_int32 -nextOnPixelInRasterLow(l_uint32 *data, - l_int32 w, - l_int32 h, - l_int32 wpl, - l_int32 xstart, - l_int32 ystart, - l_int32 *px, - l_int32 *py) -{ -l_int32 i, x, y, xend, startword; -l_uint32 *line, *pword; - - /* Look at the first word */ - line = data + ystart * wpl; - pword = line + (xstart / 32); - if (*pword) { - xend = xstart - (xstart % 32) + 31; - for (x = xstart; x <= xend && x < w; x++) { - if (GET_DATA_BIT(line, x)) { - *px = x; - *py = ystart; - return 1; - } - } - } - - /* Continue with the rest of the line */ - startword = (xstart / 32) + 1; - x = 32 * startword; - for (pword = line + startword; x < w; pword++, x += 32) { - if (*pword) { - for (i = 0; i < 32 && x < w; i++, x++) { - if (GET_DATA_BIT(line, x)) { - *px = x; - *py = ystart; - return 1; - } - } - } - } - - /* Continue with following lines */ - for (y = ystart + 1; y < h; y++) { - line = data + y * wpl; - for (pword = line, x = 0; x < w; pword++, x += 32) { - if (*pword) { - for (i = 0; i < 32 && x < w; i++, x++) { - if (GET_DATA_BIT(line, x)) { - *px = x; - *py = y; - return 1; - } - } - } - } - } - - return 0; -} - - -/*! - * \brief pixSeedfillBB() - * - * \param[in] pixs 1 bpp - * \param[in] stack for holding fillsegs - * \param[in] x,y location of seed pixel - * \param[in] connectivity 4 or 8 - * \return box or NULL on error - * - *
- * Notes:
- *      (1) This is the high-level interface to Paul Heckbert's
- *          stack-based seedfill algorithm.
- * 
- */ -BOX * -pixSeedfillBB(PIX *pixs, - L_STACK *stack, - l_int32 x, - l_int32 y, - l_int32 connectivity) -{ -BOX *box; - - PROCNAME("pixSeedfillBB"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (!stack) - return (BOX *)ERROR_PTR("stack not defined", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (BOX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - - if (connectivity == 4) { - if ((box = pixSeedfill4BB(pixs, stack, x, y)) == NULL) - return (BOX *)ERROR_PTR("box not made", procName, NULL); - } else if (connectivity == 8) { - if ((box = pixSeedfill8BB(pixs, stack, x, y)) == NULL) - return (BOX *)ERROR_PTR("box not made", procName, NULL); - } else { - return (BOX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - } - - return box; -} - - -/*! - * \brief pixSeedfill4BB() - * - * \param[in] pixs 1 bpp - * \param[in] stack for holding fillsegs - * \param[in] x,y location of seed pixel - * \return box or NULL on error. - * - *
- * Notes:
- *      (1) This is Paul Heckbert's stack-based 4-cc seedfill algorithm.
- *      (2) This operates on the input 1 bpp pix to remove the fg seed
- *          pixel, at (x,y), and all pixels that are 4-connected to it.
- *          The seed pixel at (x,y) must initially be ON.
- *      (3) Returns the bounding box of the erased 4-cc component.
- *      (4) Reference: see Paul Heckbert's stack-based seed fill algorithm
- *          in "Graphic Gems", ed. Andrew Glassner, Academic
- *          Press, 1990.  The algorithm description is given
- *          on pp. 275-277; working C code is on pp. 721-722.)
- *          The code here follows Heckbert's exactly, except
- *          we use function calls instead of macros for
- *          pushing data on and popping data off the stack.
- *          This makes sense to do because Heckbert's fixed-size
- *          stack with macros is dangerous: images exist that
- *          will overrun the stack and crash.   The stack utility
- *          here grows dynamically as needed, and the fillseg
- *          structures that are not in use are stored in another
- *          stack for reuse.  It should be noted that the
- *          overhead in the function calls (vs. macros) is negligible.
- * 
- */ -BOX * -pixSeedfill4BB(PIX *pixs, - L_STACK *stack, - l_int32 x, - l_int32 y) -{ -l_int32 w, h, xstart, wpl, x1, x2, dy; -l_int32 xmax, ymax; -l_int32 minx, maxx, miny, maxy; /* for bounding box of this c.c. */ -l_uint32 *data, *line; -BOX *box; - - PROCNAME("pixSeedfill4BB"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (!stack) - return (BOX *)ERROR_PTR("stack not defined", procName, NULL); - if (!stack->auxstack) - stack->auxstack = lstackCreate(0); - - pixGetDimensions(pixs, &w, &h, NULL); - xmax = w - 1; - ymax = h - 1; - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - line = data + y * wpl; - - /* Check pix value of seed; must be within the image and ON */ - if (x < 0 || x > xmax || y < 0 || y > ymax || (GET_DATA_BIT(line, x) == 0)) - return NULL; - - /* Init stack to seed: - * Must first init b.b. values to prevent valgrind from complaining; - * then init b.b. boundaries correctly to seed. */ - minx = miny = 100000; - maxx = maxy = 0; - pushFillsegBB(stack, x, x, y, 1, ymax, &minx, &maxx, &miny, &maxy); - pushFillsegBB(stack, x, x, y + 1, -1, ymax, &minx, &maxx, &miny, &maxy); - minx = maxx = x; - miny = maxy = y; - - while (lstackGetCount(stack) > 0) { - /* Pop segment off stack and fill a neighboring scan line */ - popFillseg(stack, &x1, &x2, &y, &dy); - line = data + y * wpl; - - /* A segment of scanline y - dy for x1 <= x <= x2 was - * previously filled. We now explore adjacent pixels - * in scan line y. There are three regions: to the - * left of x1 - 1, between x1 and x2, and to the right of x2. - * These regions are handled differently. Leaks are - * possible expansions beyond the previous segment and - * going back in the -dy direction. These can happen - * for x < x1 - 1 and for x > x2 + 1. Any "leak" segments - * are plugged with a push in the -dy (opposite) direction. - * And any segments found anywhere are always extended - * in the +dy direction. */ - for (x = x1; x >= 0 && (GET_DATA_BIT(line, x) == 1); x--) - CLEAR_DATA_BIT(line,x); - if (x >= x1) /* pix at x1 was off and was not cleared */ - goto skip; - xstart = x + 1; - if (xstart < x1 - 1) /* leak on left? */ - pushFillsegBB(stack, xstart, x1 - 1, y, -dy, - ymax, &minx, &maxx, &miny, &maxy); - - x = x1 + 1; - do { - for (; x <= xmax && (GET_DATA_BIT(line, x) == 1); x++) - CLEAR_DATA_BIT(line, x); - pushFillsegBB(stack, xstart, x - 1, y, dy, - ymax, &minx, &maxx, &miny, &maxy); - if (x > x2 + 1) /* leak on right? */ - pushFillsegBB(stack, x2 + 1, x - 1, y, -dy, - ymax, &minx, &maxx, &miny, &maxy); - skip: for (x++; x <= x2 && - x <= xmax && - (GET_DATA_BIT(line, x) == 0); x++) - ; - xstart = x; - } while (x <= x2 && x <= xmax); - } - - if ((box = boxCreate(minx, miny, maxx - minx + 1, maxy - miny + 1)) - == NULL) - return (BOX *)ERROR_PTR("box not made", procName, NULL); - return box; -} - - -/*! - * \brief pixSeedfill8BB() - * - * \param[in] pixs 1 bpp - * \param[in] stack for holding fillsegs - * \param[in] x,y location of seed pixel - * \return box or NULL on error. - * - *
- * Notes:
- *      (1) This is Paul Heckbert's stack-based 8-cc seedfill algorithm.
- *      (2) This operates on the input 1 bpp pix to remove the fg seed
- *          pixel, at (x,y), and all pixels that are 8-connected to it.
- *          The seed pixel at (x,y) must initially be ON.
- *      (3) Returns the bounding box of the erased 8-cc component.
- *      (4) Reference: see Paul Heckbert's stack-based seed fill algorithm
- *          in "Graphic Gems", ed. Andrew Glassner, Academic
- *          Press, 1990.  The algorithm description is given
- *          on pp. 275-277; working C code is on pp. 721-722.)
- *          The code here follows Heckbert's closely, except
- *          the leak checks are changed for 8 connectivity.
- *          See comments on pixSeedfill4BB() for more details.
- * 
- */ -BOX * -pixSeedfill8BB(PIX *pixs, - L_STACK *stack, - l_int32 x, - l_int32 y) -{ -l_int32 w, h, xstart, wpl, x1, x2, dy; -l_int32 xmax, ymax; -l_int32 minx, maxx, miny, maxy; /* for bounding box of this c.c. */ -l_uint32 *data, *line; -BOX *box; - - PROCNAME("pixSeedfill8BB"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (!stack) - return (BOX *)ERROR_PTR("stack not defined", procName, NULL); - if (!stack->auxstack) - stack->auxstack = lstackCreate(0); - - pixGetDimensions(pixs, &w, &h, NULL); - xmax = w - 1; - ymax = h - 1; - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - line = data + y * wpl; - - /* Check pix value of seed; must be ON */ - if (x < 0 || x > xmax || y < 0 || y > ymax || (GET_DATA_BIT(line, x) == 0)) - return NULL; - - /* Init stack to seed: - * Must first init b.b. values to prevent valgrind from complaining; - * then init b.b. boundaries correctly to seed. */ - minx = miny = 100000; - maxx = maxy = 0; - pushFillsegBB(stack, x, x, y, 1, ymax, &minx, &maxx, &miny, &maxy); - pushFillsegBB(stack, x, x, y + 1, -1, ymax, &minx, &maxx, &miny, &maxy); - minx = maxx = x; - miny = maxy = y; - - while (lstackGetCount(stack) > 0) { - /* Pop segment off stack and fill a neighboring scan line */ - popFillseg(stack, &x1, &x2, &y, &dy); - line = data + y * wpl; - - /* A segment of scanline y - dy for x1 <= x <= x2 was - * previously filled. We now explore adjacent pixels - * in scan line y. There are three regions: to the - * left of x1, between x1 and x2, and to the right of x2. - * These regions are handled differently. Leaks are - * possible expansions beyond the previous segment and - * going back in the -dy direction. These can happen - * for x < x1 and for x > x2. Any "leak" segments - * are plugged with a push in the -dy (opposite) direction. - * And any segments found anywhere are always extended - * in the +dy direction. */ - for (x = x1 - 1; x >= 0 && (GET_DATA_BIT(line, x) == 1); x--) - CLEAR_DATA_BIT(line,x); - if (x >= x1 - 1) /* pix at x1 - 1 was off and was not cleared */ - goto skip; - xstart = x + 1; - if (xstart < x1) /* leak on left? */ - pushFillsegBB(stack, xstart, x1 - 1, y, -dy, - ymax, &minx, &maxx, &miny, &maxy); - - x = x1; - do { - for (; x <= xmax && (GET_DATA_BIT(line, x) == 1); x++) - CLEAR_DATA_BIT(line, x); - pushFillsegBB(stack, xstart, x - 1, y, dy, - ymax, &minx, &maxx, &miny, &maxy); - if (x > x2) /* leak on right? */ - pushFillsegBB(stack, x2 + 1, x - 1, y, -dy, - ymax, &minx, &maxx, &miny, &maxy); - skip: for (x++; x <= x2 + 1 && - x <= xmax && - (GET_DATA_BIT(line, x) == 0); x++) - ; - xstart = x; - } while (x <= x2 + 1 && x <= xmax); - } - - if ((box = boxCreate(minx, miny, maxx - minx + 1, maxy - miny + 1)) - == NULL) - return (BOX *)ERROR_PTR("box not made", procName, NULL); - return box; -} - - -/*! - * \brief pixSeedfill() - * - * \param[in] pixs 1 bpp - * \param[in] stack for holding fillsegs - * \param[in] x,y location of seed pixel - * \param[in] connectivity 4 or 8 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This removes the component from pixs with a fg pixel at (x,y).
- *      (2) See pixSeedfill4() and pixSeedfill8() for details.
- * 
- */ -l_ok -pixSeedfill(PIX *pixs, - L_STACK *stack, - l_int32 x, - l_int32 y, - l_int32 connectivity) -{ -l_int32 retval; - - PROCNAME("pixSeedfill"); - - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (!stack) - return ERROR_INT("stack not defined", procName, 1); - if (connectivity != 4 && connectivity != 8) - return ERROR_INT("connectivity not 4 or 8", procName, 1); - - if (connectivity == 4) - retval = pixSeedfill4(pixs, stack, x, y); - else /* connectivity == 8 */ - retval = pixSeedfill8(pixs, stack, x, y); - - return retval; -} - - -/*! - * \brief pixSeedfill4() - * - * \param[in] pixs 1 bpp - * \param[in] stack for holding fillsegs - * \param[in] x,y location of seed pixel - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is Paul Heckbert's stack-based 4-cc seedfill algorithm.
- *      (2) This operates on the input 1 bpp pix to remove the fg seed
- *          pixel, at (x,y), and all pixels that are 4-connected to it.
- *          The seed pixel at (x,y) must initially be ON.
- *      (3) Reference: see pixSeedFill4BB()
- * 
- */ -l_ok -pixSeedfill4(PIX *pixs, - L_STACK *stack, - l_int32 x, - l_int32 y) -{ -l_int32 w, h, xstart, wpl, x1, x2, dy; -l_int32 xmax, ymax; -l_uint32 *data, *line; - - PROCNAME("pixSeedfill4"); - - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (!stack) - return ERROR_INT("stack not defined", procName, 1); - if (!stack->auxstack) - stack->auxstack = lstackCreate(0); - - pixGetDimensions(pixs, &w, &h, NULL); - xmax = w - 1; - ymax = h - 1; - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - line = data + y * wpl; - - /* Check pix value of seed; must be within the image and ON */ - if (x < 0 || x > xmax || y < 0 || y > ymax || (GET_DATA_BIT(line, x) == 0)) - return 0; - - /* Init stack to seed */ - pushFillseg(stack, x, x, y, 1, ymax); - pushFillseg(stack, x, x, y + 1, -1, ymax); - - while (lstackGetCount(stack) > 0) { - /* Pop segment off stack and fill a neighboring scan line */ - popFillseg(stack, &x1, &x2, &y, &dy); - line = data + y * wpl; - - /* A segment of scanline y - dy for x1 <= x <= x2 was - * previously filled. We now explore adjacent pixels - * in scan line y. There are three regions: to the - * left of x1 - 1, between x1 and x2, and to the right of x2. - * These regions are handled differently. Leaks are - * possible expansions beyond the previous segment and - * going back in the -dy direction. These can happen - * for x < x1 - 1 and for x > x2 + 1. Any "leak" segments - * are plugged with a push in the -dy (opposite) direction. - * And any segments found anywhere are always extended - * in the +dy direction. */ - for (x = x1; x >= 0 && (GET_DATA_BIT(line, x) == 1); x--) - CLEAR_DATA_BIT(line,x); - if (x >= x1) /* pix at x1 was off and was not cleared */ - goto skip; - xstart = x + 1; - if (xstart < x1 - 1) /* leak on left? */ - pushFillseg(stack, xstart, x1 - 1, y, -dy, ymax); - - x = x1 + 1; - do { - for (; x <= xmax && (GET_DATA_BIT(line, x) == 1); x++) - CLEAR_DATA_BIT(line, x); - pushFillseg(stack, xstart, x - 1, y, dy, ymax); - if (x > x2 + 1) /* leak on right? */ - pushFillseg(stack, x2 + 1, x - 1, y, -dy, ymax); - skip: for (x++; x <= x2 && - x <= xmax && - (GET_DATA_BIT(line, x) == 0); x++) - ; - xstart = x; - } while (x <= x2 && x <= xmax); - } - - return 0; -} - - -/*! - * \brief pixSeedfill8() - * - * \param[in] pixs 1 bpp - * \param[in] stack for holding fillsegs - * \param[in] x,y location of seed pixel - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is Paul Heckbert's stack-based 8-cc seedfill algorithm.
- *      (2) This operates on the input 1 bpp pix to remove the fg seed
- *          pixel, at (x,y), and all pixels that are 8-connected to it.
- *          The seed pixel at (x,y) must initially be ON.
- *      (3) Reference: see pixSeedFill8BB()
- * 
- */ -l_ok -pixSeedfill8(PIX *pixs, - L_STACK *stack, - l_int32 x, - l_int32 y) -{ -l_int32 w, h, xstart, wpl, x1, x2, dy; -l_int32 xmax, ymax; -l_uint32 *data, *line; - - PROCNAME("pixSeedfill8"); - - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (!stack) - return ERROR_INT("stack not defined", procName, 1); - if (!stack->auxstack) - stack->auxstack = lstackCreate(0); - - pixGetDimensions(pixs, &w, &h, NULL); - xmax = w - 1; - ymax = h - 1; - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - line = data + y * wpl; - - /* Check pix value of seed; must be ON */ - if (x < 0 || x > xmax || y < 0 || y > ymax || (GET_DATA_BIT(line, x) == 0)) - return 0; - - /* Init stack to seed */ - pushFillseg(stack, x, x, y, 1, ymax); - pushFillseg(stack, x, x, y + 1, -1, ymax); - - while (lstackGetCount(stack) > 0) { - /* Pop segment off stack and fill a neighboring scan line */ - popFillseg(stack, &x1, &x2, &y, &dy); - line = data + y * wpl; - - /* A segment of scanline y - dy for x1 <= x <= x2 was - * previously filled. We now explore adjacent pixels - * in scan line y. There are three regions: to the - * left of x1, between x1 and x2, and to the right of x2. - * These regions are handled differently. Leaks are - * possible expansions beyond the previous segment and - * going back in the -dy direction. These can happen - * for x < x1 and for x > x2. Any "leak" segments - * are plugged with a push in the -dy (opposite) direction. - * And any segments found anywhere are always extended - * in the +dy direction. */ - for (x = x1 - 1; x >= 0 && (GET_DATA_BIT(line, x) == 1); x--) - CLEAR_DATA_BIT(line,x); - if (x >= x1 - 1) /* pix at x1 - 1 was off and was not cleared */ - goto skip; - xstart = x + 1; - if (xstart < x1) /* leak on left? */ - pushFillseg(stack, xstart, x1 - 1, y, -dy, ymax); - - x = x1; - do { - for (; x <= xmax && (GET_DATA_BIT(line, x) == 1); x++) - CLEAR_DATA_BIT(line, x); - pushFillseg(stack, xstart, x - 1, y, dy, ymax); - if (x > x2) /* leak on right? */ - pushFillseg(stack, x2 + 1, x - 1, y, -dy, ymax); - skip: for (x++; x <= x2 + 1 && - x <= xmax && - (GET_DATA_BIT(line, x) == 0); x++) - ; - xstart = x; - } while (x <= x2 + 1 && x <= xmax); - } - - return 0; -} - - - -/*-----------------------------------------------------------------------* - * Static stack helper functions: push and pop fillsegs * - *-----------------------------------------------------------------------*/ -/*! - * \brief pushFillsegBB() - * - * \param[in] stack - * \param[in] xleft, xright - * \param[in] y - * \param[in] dy - * \param[in] ymax - * \param[out] pminx minimum x - * \param[out] pmaxx maximum x - * \param[out] pminy minimum y - * \param[out] pmaxy maximum y - * \return void - * - *
- * Notes:
- *      (1) This adds a line segment to the stack, and returns its size.
- *      (2) The auxiliary stack is used as a storage area to recycle
- *          fillsegs that are no longer in use.  We only calloc new
- *          fillsegs if the auxiliary stack is empty.
- * 
- */ -static void -pushFillsegBB(L_STACK *stack, - l_int32 xleft, - l_int32 xright, - l_int32 y, - l_int32 dy, - l_int32 ymax, - l_int32 *pminx, - l_int32 *pmaxx, - l_int32 *pminy, - l_int32 *pmaxy) -{ -FILLSEG *fseg; -L_STACK *auxstack; - - PROCNAME("pushFillsegBB"); - - if (!stack) { - L_ERROR("stack not defined\n", procName); - return; - } - - *pminx = L_MIN(*pminx, xleft); - *pmaxx = L_MAX(*pmaxx, xright); - *pminy = L_MIN(*pminy, y); - *pmaxy = L_MAX(*pmaxy, y); - - if (y + dy >= 0 && y + dy <= ymax) { - if ((auxstack = stack->auxstack) == NULL) { - L_ERROR("auxstack not defined\n", procName); - return; - } - - /* Get a fillseg to use */ - if (lstackGetCount(auxstack) > 0) - fseg = (FILLSEG *)lstackRemove(auxstack); - else - fseg = (FILLSEG *)LEPT_CALLOC(1, sizeof(FILLSEG)); - fseg->xleft = xleft; - fseg->xright = xright; - fseg->y = y; - fseg->dy = dy; - lstackAdd(stack, fseg); - } - return; -} - - -/*! - * \brief pushFillseg() - * - * \param[in] stack - * \param[in] xleft, xright - * \param[in] y - * \param[in] dy - * \param[in] ymax - * \return void - * - *
- * Notes:
- *      (1) This adds a line segment to the stack.
- *      (2) The auxiliary stack is used as a storage area to recycle
- *          fillsegs that are no longer in use.  We only calloc new
- *          fillsegs if the auxiliary stack is empty.
- * 
- */ -static void -pushFillseg(L_STACK *stack, - l_int32 xleft, - l_int32 xright, - l_int32 y, - l_int32 dy, - l_int32 ymax) -{ -FILLSEG *fseg; -L_STACK *auxstack; - - PROCNAME("pushFillseg"); - - if (!stack) { - L_ERROR("stack not defined\n", procName); - return; - } - - if (y + dy >= 0 && y + dy <= ymax) { - if ((auxstack = stack->auxstack) == NULL) { - L_ERROR("auxstack not defined\n", procName); - return; - } - - /* Get a fillseg to use */ - if (lstackGetCount(auxstack) > 0) - fseg = (FILLSEG *)lstackRemove(auxstack); - else - fseg = (FILLSEG *)LEPT_CALLOC(1, sizeof(FILLSEG)); - fseg->xleft = xleft; - fseg->xright = xright; - fseg->y = y; - fseg->dy = dy; - lstackAdd(stack, fseg); - } - return; -} - - -/*! - * \brief popFillseg() - * - * \param[in] stack - * \param[out] pxleft left x - * \param[out] pxright right x - * \param[out] py y coordinate - * \param[out] pdy delta y - * \return void - * - *
- * Notes:
- *      (1) This removes a line segment from the stack, and returns its size.
- *      (2) The surplussed fillseg is placed on the auxiliary stack
- *          for future use.
- * 
- */ -static void -popFillseg(L_STACK *stack, - l_int32 *pxleft, - l_int32 *pxright, - l_int32 *py, - l_int32 *pdy) -{ -FILLSEG *fseg; -L_STACK *auxstack; - - PROCNAME("popFillseg"); - - if (!stack) { - L_ERROR("stack not defined\n", procName); - return; - } - if ((auxstack = stack->auxstack) == NULL) { - L_ERROR("auxstack not defined\n", procName); - return; - } - - if ((fseg = (FILLSEG *)lstackRemove(stack)) == NULL) - return; - - *pxleft = fseg->xleft; - *pxright = fseg->xright; - *py = fseg->y + fseg->dy; /* this now points to the new line */ - *pdy = fseg->dy; - - /* Save it for re-use */ - lstackAdd(auxstack, fseg); - return; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/convertfiles.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/convertfiles.c deleted file mode 100644 index 7c229e07..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/convertfiles.c +++ /dev/null @@ -1,149 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file convertfiles.c - *
- *
- *      Conversion to 1 bpp
- *          l_int32    convertFilesTo1bpp()
- *
- *  These are utility functions that will perform depth conversion
- *  on selected files, writing the results to a specified directory.
- *  We start with conversion to 1 bpp.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*------------------------------------------------------------------* - * Conversion to 1 bpp * - *------------------------------------------------------------------*/ -/*! - * \brief convertFilesTo1bpp() - * - * \param[in] dirin - * \param[in] substr [optional] substring filter on filenames; - 8 can be NULL - * \param[in] upscaling 1, 2 or 4; only for input color or grayscale - * \param[in] thresh global threshold for binarization; 0 for default - * \param[in] firstpage - * \param[in] npages use 0 to do all from %firstpage to the end - * \param[in] dirout - * \param[in] outformat IFF_PNG, IFF_TIFF_G4 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Images are sorted lexicographically, and the names in the
- *          output directory are retained except for the extension.
- * 
- */ -l_ok -convertFilesTo1bpp(const char *dirin, - const char *substr, - l_int32 upscaling, - l_int32 thresh, - l_int32 firstpage, - l_int32 npages, - const char *dirout, - l_int32 outformat) -{ -l_int32 i, nfiles; -char buf[512]; -char *fname, *tail, *basename; -PIX *pixs, *pixg1, *pixg2, *pixb; -SARRAY *safiles; - - PROCNAME("convertFilesTo1bpp"); - - if (!dirin) - return ERROR_INT("dirin", procName, 1); - if (!dirout) - return ERROR_INT("dirout", procName, 1); - if (upscaling != 1 && upscaling != 2 && upscaling != 4) - return ERROR_INT("invalid upscaling factor", procName, 1); - if (thresh <= 0) thresh = 180; - if (firstpage < 0) firstpage = 0; - if (npages < 0) npages = 0; - if (outformat != IFF_TIFF_G4) - outformat = IFF_PNG; - - safiles = getSortedPathnamesInDirectory(dirin, substr, firstpage, npages); - if (!safiles) - return ERROR_INT("safiles not made", procName, 1); - if ((nfiles = sarrayGetCount(safiles)) == 0) { - sarrayDestroy(&safiles); - return ERROR_INT("no matching files in the directory", procName, 1); - } - - for (i = 0; i < nfiles; i++) { - fname = sarrayGetString(safiles, i, L_NOCOPY); - if ((pixs = pixRead(fname)) == NULL) { - L_WARNING("Couldn't read file %s\n", procName, fname); - continue; - } - if (pixGetDepth(pixs) == 32) - pixg1 = pixConvertRGBToLuminance(pixs); - else - pixg1 = pixClone(pixs); - pixg2 = pixRemoveColormap(pixg1, REMOVE_CMAP_TO_GRAYSCALE); - if (pixGetDepth(pixg2) == 1) { - pixb = pixClone(pixg2); - } else { - if (upscaling == 1) - pixb = pixThresholdToBinary(pixg2, thresh); - else if (upscaling == 2) - pixb = pixScaleGray2xLIThresh(pixg2, thresh); - else /* upscaling == 4 */ - pixb = pixScaleGray4xLIThresh(pixg2, thresh); - } - pixDestroy(&pixs); - pixDestroy(&pixg1); - pixDestroy(&pixg2); - - splitPathAtDirectory(fname, NULL, &tail); - splitPathAtExtension(tail, &basename, NULL); - if (outformat == IFF_TIFF_G4) { - snprintf(buf, sizeof(buf), "%s/%s.tif", dirout, basename); - pixWrite(buf, pixb, IFF_TIFF_G4); - } else { - snprintf(buf, sizeof(buf), "%s/%s.png", dirout, basename); - pixWrite(buf, pixb, IFF_PNG); - } - pixDestroy(&pixb); - LEPT_FREE(tail); - LEPT_FREE(basename); - } - - sarrayDestroy(&safiles); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/convolve.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/convolve.c deleted file mode 100644 index 0d118795..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/convolve.c +++ /dev/null @@ -1,2580 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file convolve.c - *
- *
- *      Top level grayscale or color block convolution
- *          PIX          *pixBlockconv()
- *
- *      Grayscale block convolution
- *          PIX          *pixBlockconvGray()
- *          static void   blockconvLow()
- *
- *      Accumulator for 1, 8 and 32 bpp convolution
- *          PIX          *pixBlockconvAccum()
- *          static void   blockconvAccumLow()
- *
- *      Un-normalized grayscale block convolution
- *          PIX          *pixBlockconvGrayUnnormalized()
- *
- *      Tiled grayscale or color block convolution
- *          PIX          *pixBlockconvTiled()
- *          PIX          *pixBlockconvGrayTile()
- *
- *      Convolution for mean, mean square, variance and rms deviation
- *      in specified window
- *          l_int32       pixWindowedStats()
- *          PIX          *pixWindowedMean()
- *          PIX          *pixWindowedMeanSquare()
- *          l_int32       pixWindowedVariance()
- *          DPIX         *pixMeanSquareAccum()
- *
- *      Binary block sum and rank filter
- *          PIX          *pixBlockrank()
- *          PIX          *pixBlocksum()
- *          static void   blocksumLow()
- *
- *      Census transform
- *          PIX          *pixCensusTransform()
- *
- *      Generic convolution (with Pix)
- *          PIX          *pixConvolve()
- *          PIX          *pixConvolveSep()
- *          PIX          *pixConvolveRGB()
- *          PIX          *pixConvolveRGBSep()
- *
- *      Generic convolution (with float arrays)
- *          FPIX         *fpixConvolve()
- *          FPIX         *fpixConvolveSep()
- *
- *      Convolution with bias (for non-negative output)
- *          PIX          *pixConvolveWithBias()
- *
- *      Set parameter for convolution subsampling
- *          void          l_setConvolveSampling()
- *
- *      Additive gaussian noise
- *          PIX          *pixAddGaussNoise()
- *          l_float32     gaussDistribSampling()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* These globals determine the subsampling factors for - * generic convolution of pix and fpix. Declare extern to use. - * To change the values, use l_setConvolveSampling(). */ -LEPT_DLL l_int32 ConvolveSamplingFactX = 1; -LEPT_DLL l_int32 ConvolveSamplingFactY = 1; - - /* Low-level static functions */ -static void blockconvLow(l_uint32 *data, l_int32 w, l_int32 h, l_int32 wpl, - l_uint32 *dataa, l_int32 wpla, l_int32 wc, - l_int32 hc); -static void blockconvAccumLow(l_uint32 *datad, l_int32 w, l_int32 h, - l_int32 wpld, l_uint32 *datas, l_int32 d, - l_int32 wpls); -static void blocksumLow(l_uint32 *datad, l_int32 w, l_int32 h, l_int32 wpl, - l_uint32 *dataa, l_int32 wpla, l_int32 wc, l_int32 hc); - - -/*----------------------------------------------------------------------* - * Top-level grayscale or color block convolution * - *----------------------------------------------------------------------*/ -/*! - * \brief pixBlockconv() - * - * \param[in] pix 8 or 32 bpp; or 2, 4 or 8 bpp with colormap - * \param[in] wc, hc half width/height of convolution kernel - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The full width and height of the convolution kernel
- *          are (2 * wc + 1) and (2 * hc + 1)
- *      (2) Returns a copy if both wc and hc are 0
- *      (3) Require that w >= 2 * wc + 1 and h >= 2 * hc + 1,
- *          where (w,h) are the dimensions of pixs.
- * 
- */ -PIX * -pixBlockconv(PIX *pix, - l_int32 wc, - l_int32 hc) -{ -l_int32 w, h, d; -PIX *pixs, *pixd, *pixr, *pixrc, *pixg, *pixgc, *pixb, *pixbc; - - PROCNAME("pixBlockconv"); - - if (!pix) - return (PIX *)ERROR_PTR("pix not defined", procName, NULL); - if (wc < 0) wc = 0; - if (hc < 0) hc = 0; - pixGetDimensions(pix, &w, &h, &d); - if (w < 2 * wc + 1 || h < 2 * hc + 1) { - wc = L_MIN(wc, (w - 1) / 2); - hc = L_MIN(hc, (h - 1) / 2); - L_WARNING("kernel too large; reducing!\n", procName); - L_INFO("wc = %d, hc = %d\n", procName, wc, hc); - } - if (wc == 0 && hc == 0) /* no-op */ - return pixCopy(NULL, pix); - - /* Remove colormap if necessary */ - if ((d == 2 || d == 4 || d == 8) && pixGetColormap(pix)) { - L_WARNING("pix has colormap; removing\n", procName); - pixs = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC); - d = pixGetDepth(pixs); - } else { - pixs = pixClone(pix); - } - - if (d != 8 && d != 32) { - pixDestroy(&pixs); - return (PIX *)ERROR_PTR("depth not 8 or 32 bpp", procName, NULL); - } - - if (d == 8) { - pixd = pixBlockconvGray(pixs, NULL, wc, hc); - } else { /* d == 32 */ - pixr = pixGetRGBComponent(pixs, COLOR_RED); - pixrc = pixBlockconvGray(pixr, NULL, wc, hc); - pixDestroy(&pixr); - pixg = pixGetRGBComponent(pixs, COLOR_GREEN); - pixgc = pixBlockconvGray(pixg, NULL, wc, hc); - pixDestroy(&pixg); - pixb = pixGetRGBComponent(pixs, COLOR_BLUE); - pixbc = pixBlockconvGray(pixb, NULL, wc, hc); - pixDestroy(&pixb); - pixd = pixCreateRGBImage(pixrc, pixgc, pixbc); - pixDestroy(&pixrc); - pixDestroy(&pixgc); - pixDestroy(&pixbc); - } - - pixDestroy(&pixs); - return pixd; -} - - -/*----------------------------------------------------------------------* - * Grayscale block convolution * - *----------------------------------------------------------------------*/ -/*! - * \brief pixBlockconvGray() - * - * \param[in] pixs 8 bpp - * \param[in] pixacc pix 32 bpp; can be null - * \param[in] wc, hc half width/height of convolution kernel - * \return pix 8 bpp, or NULL on error - * - *
- * Notes:
- *      (1) If accum pix is null, make one and destroy it before
- *          returning; otherwise, just use the input accum pix.
- *      (2) The full width and height of the convolution kernel
- *          are (2 * wc + 1) and (2 * hc + 1).
- *      (3) Returns a copy if both wc and hc are 0.
- *      (4) Require that w >= 2 * wc + 1 and h >= 2 * hc + 1,
- *          where (w,h) are the dimensions of pixs.
- * 
- */ -PIX * -pixBlockconvGray(PIX *pixs, - PIX *pixacc, - l_int32 wc, - l_int32 hc) -{ -l_int32 w, h, d, wpl, wpla; -l_uint32 *datad, *dataa; -PIX *pixd, *pixt; - - PROCNAME("pixBlockconvGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (wc < 0) wc = 0; - if (hc < 0) hc = 0; - if (w < 2 * wc + 1 || h < 2 * hc + 1) { - wc = L_MIN(wc, (w - 1) / 2); - hc = L_MIN(hc, (h - 1) / 2); - L_WARNING("kernel too large; reducing!\n", procName); - L_INFO("wc = %d, hc = %d\n", procName, wc, hc); - } - if (wc == 0 && hc == 0) /* no-op */ - return pixCopy(NULL, pixs); - - if (pixacc) { - if (pixGetDepth(pixacc) == 32) { - pixt = pixClone(pixacc); - } else { - L_WARNING("pixacc not 32 bpp; making new one\n", procName); - if ((pixt = pixBlockconvAccum(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - } - } else { - if ((pixt = pixBlockconvAccum(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - } - - if ((pixd = pixCreateTemplate(pixs)) == NULL) { - pixDestroy(&pixt); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - - wpl = pixGetWpl(pixs); - wpla = pixGetWpl(pixt); - datad = pixGetData(pixd); - dataa = pixGetData(pixt); - blockconvLow(datad, w, h, wpl, dataa, wpla, wc, hc); - - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief blockconvLow() - * - * \param[in] data data of input image, to be convolved - * \param[in] w, h, wpl - * \param[in] dataa data of 32 bpp accumulator - * \param[in] wpla accumulator - * \param[in] wc convolution "half-width" - * \param[in] hc convolution "half-height" - * \return void - * - *
- * Notes:
- *      (1) The full width and height of the convolution kernel
- *          are (2 * wc + 1) and (2 * hc + 1).
- *      (2) The lack of symmetry between the handling of the
- *          first (hc + 1) lines and the last (hc) lines,
- *          and similarly with the columns, is due to fact that
- *          for the pixel at (x,y), the accumulator values are
- *          taken at (x + wc, y + hc), (x - wc - 1, y + hc),
- *          (x + wc, y - hc - 1) and (x - wc - 1, y - hc - 1).
- *      (3) We compute sums, normalized as if there were no reduced
- *          area at the boundary.  This under-estimates the value
- *          of the boundary pixels, so we multiply them by another
- *          normalization factor that is greater than 1.
- *      (4) This second normalization is done first for the first
- *          hc + 1 lines; then for the last hc lines; and finally
- *          for the first wc + 1 and last wc columns in the intermediate
- *          lines.
- *      (5) The caller should verify that wc < w and hc < h.
- *          Under those conditions, illegal reads and writes can occur.
- *      (6) Implementation note: to get the same results in the interior
- *          between this function and pixConvolve(), it is necessary to
- *          add 0.5 for roundoff in the main loop that runs over all pixels.
- *          However, if we do that and have white (255) pixels near the
- *          image boundary, some overflow occurs for pixels very close
- *          to the boundary.  We can't fix this by subtracting from the
- *          normalized values for the boundary pixels, because this results
- *          in underflow if the boundary pixels are black (0).  Empirically,
- *          adding 0.25 (instead of 0.5) before truncating in the main
- *          loop will not cause overflow, but this gives some
- *          off-by-1-level errors in interior pixel values.  So we add
- *          0.5 for roundoff in the main loop, and for pixels within a
- *          half filter width of the boundary, use a L_MIN of the
- *          computed value and 255 to avoid overflow during normalization.
- * 
- */ -static void -blockconvLow(l_uint32 *data, - l_int32 w, - l_int32 h, - l_int32 wpl, - l_uint32 *dataa, - l_int32 wpla, - l_int32 wc, - l_int32 hc) -{ -l_int32 i, j, imax, imin, jmax, jmin; -l_int32 wn, hn, fwc, fhc, wmwc, hmhc; -l_float32 norm, normh, normw; -l_uint32 val; -l_uint32 *linemina, *linemaxa, *line; - - PROCNAME("blockconvLow"); - - wmwc = w - wc; - hmhc = h - hc; - if (wmwc <= 0 || hmhc <= 0) { - L_ERROR("wc >= w || hc >=h\n", procName); - return; - } - fwc = 2 * wc + 1; - fhc = 2 * hc + 1; - norm = 1.0 / ((l_float32)(fwc) * fhc); - - /*------------------------------------------------------------* - * Compute, using b.c. only to set limits on the accum image * - *------------------------------------------------------------*/ - for (i = 0; i < h; i++) { - imin = L_MAX(i - 1 - hc, 0); - imax = L_MIN(i + hc, h - 1); - line = data + wpl * i; - linemina = dataa + wpla * imin; - linemaxa = dataa + wpla * imax; - for (j = 0; j < w; j++) { - jmin = L_MAX(j - 1 - wc, 0); - jmax = L_MIN(j + wc, w - 1); - val = linemaxa[jmax] - linemaxa[jmin] - + linemina[jmin] - linemina[jmax]; - val = (l_uint8)(norm * val + 0.5); /* see comment above */ - SET_DATA_BYTE(line, j, val); - } - } - - /*------------------------------------------------------------* - * Fix normalization for boundary pixels * - *------------------------------------------------------------*/ - for (i = 0; i <= hc; i++) { /* first hc + 1 lines */ - hn = hc + i; - normh = (l_float32)fhc / (l_float32)hn; /* > 1 */ - line = data + wpl * i; - for (j = 0; j <= wc; j++) { - wn = wc + j; - normw = (l_float32)fwc / (l_float32)wn; /* > 1 */ - val = GET_DATA_BYTE(line, j); - val = (l_uint8)L_MIN(val * normh * normw, 255); - SET_DATA_BYTE(line, j, val); - } - for (j = wc + 1; j < wmwc; j++) { - val = GET_DATA_BYTE(line, j); - val = (l_uint8)L_MIN(val * normh, 255); - SET_DATA_BYTE(line, j, val); - } - for (j = wmwc; j < w; j++) { - wn = wc + w - j; - normw = (l_float32)fwc / (l_float32)wn; /* > 1 */ - val = GET_DATA_BYTE(line, j); - val = (l_uint8)L_MIN(val * normh * normw, 255); - SET_DATA_BYTE(line, j, val); - } - } - - for (i = hmhc; i < h; i++) { /* last hc lines */ - hn = hc + h - i; - normh = (l_float32)fhc / (l_float32)hn; /* > 1 */ - line = data + wpl * i; - for (j = 0; j <= wc; j++) { - wn = wc + j; - normw = (l_float32)fwc / (l_float32)wn; /* > 1 */ - val = GET_DATA_BYTE(line, j); - val = (l_uint8)L_MIN(val * normh * normw, 255); - SET_DATA_BYTE(line, j, val); - } - for (j = wc + 1; j < wmwc; j++) { - val = GET_DATA_BYTE(line, j); - val = (l_uint8)L_MIN(val * normh, 255); - SET_DATA_BYTE(line, j, val); - } - for (j = wmwc; j < w; j++) { - wn = wc + w - j; - normw = (l_float32)fwc / (l_float32)wn; /* > 1 */ - val = GET_DATA_BYTE(line, j); - val = (l_uint8)L_MIN(val * normh * normw, 255); - SET_DATA_BYTE(line, j, val); - } - } - - for (i = hc + 1; i < hmhc; i++) { /* intermediate lines */ - line = data + wpl * i; - for (j = 0; j <= wc; j++) { /* first wc + 1 columns */ - wn = wc + j; - normw = (l_float32)fwc / (l_float32)wn; /* > 1 */ - val = GET_DATA_BYTE(line, j); - val = (l_uint8)L_MIN(val * normw, 255); - SET_DATA_BYTE(line, j, val); - } - for (j = wmwc; j < w; j++) { /* last wc columns */ - wn = wc + w - j; - normw = (l_float32)fwc / (l_float32)wn; /* > 1 */ - val = GET_DATA_BYTE(line, j); - val = (l_uint8)L_MIN(val * normw, 255); - SET_DATA_BYTE(line, j, val); - } - } - - return; -} - - -/*----------------------------------------------------------------------* - * Accumulator for 1, 8 and 32 bpp convolution * - *----------------------------------------------------------------------*/ -/*! - * \brief pixBlockconvAccum() - * - * \param[in] pixs 1, 8 or 32 bpp - * \return accum pix 32 bpp, or NULL on error. - * - *
- * Notes:
- *      (1) The general recursion relation is
- *            a(i,j) = v(i,j) + a(i-1, j) + a(i, j-1) - a(i-1, j-1)
- *          For the first line, this reduces to the special case
- *            a(i,j) = v(i,j) + a(i, j-1)
- *          For the first column, the special case is
- *            a(i,j) = v(i,j) + a(i-1, j)
- * 
- */ -PIX * -pixBlockconvAccum(PIX *pixs) -{ -l_int32 w, h, d, wpls, wpld; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixBlockconvAccum"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 1, 8 or 32 bpp", procName, NULL); - if ((pixd = pixCreate(w, h, 32)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - blockconvAccumLow(datad, w, h, wpld, datas, d, wpls); - - return pixd; -} - - -/* - * \brief blockconvAccumLow() - * - * \param[in] datad 32 bpp dest - * \param[in] w, h, wpld of 32 bpp dest - * \param[in] datas 1, 8 or 32 bpp src - * \param[in] d bpp of src - * \param[in] wpls of src - * \return void - * - *
- * Notes:
- *      (1) The general recursion relation is
- *             a(i,j) = v(i,j) + a(i-1, j) + a(i, j-1) - a(i-1, j-1)
- *          For the first line, this reduces to the special case
- *             a(0,j) = v(0,j) + a(0, j-1), j > 0
- *          For the first column, the special case is
- *             a(i,0) = v(i,0) + a(i-1, 0), i > 0
- * 
- */ -static void -blockconvAccumLow(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 d, - l_int32 wpls) -{ -l_uint8 val; -l_int32 i, j; -l_uint32 val32; -l_uint32 *lines, *lined, *linedp; - - PROCNAME("blockconvAccumLow"); - - lines = datas; - lined = datad; - - if (d == 1) { - /* Do the first line */ - for (j = 0; j < w; j++) { - val = GET_DATA_BIT(lines, j); - if (j == 0) - lined[0] = val; - else - lined[j] = lined[j - 1] + val; - } - - /* Do the other lines */ - for (i = 1; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; /* curr dest line */ - linedp = lined - wpld; /* prev dest line */ - for (j = 0; j < w; j++) { - val = GET_DATA_BIT(lines, j); - if (j == 0) - lined[0] = val + linedp[0]; - else - lined[j] = val + lined[j - 1] + linedp[j] - linedp[j - 1]; - } - } - } else if (d == 8) { - /* Do the first line */ - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(lines, j); - if (j == 0) - lined[0] = val; - else - lined[j] = lined[j - 1] + val; - } - - /* Do the other lines */ - for (i = 1; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; /* curr dest line */ - linedp = lined - wpld; /* prev dest line */ - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(lines, j); - if (j == 0) - lined[0] = val + linedp[0]; - else - lined[j] = val + lined[j - 1] + linedp[j] - linedp[j - 1]; - } - } - } else if (d == 32) { - /* Do the first line */ - for (j = 0; j < w; j++) { - val32 = lines[j]; - if (j == 0) - lined[0] = val32; - else - lined[j] = lined[j - 1] + val32; - } - - /* Do the other lines */ - for (i = 1; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; /* curr dest line */ - linedp = lined - wpld; /* prev dest line */ - for (j = 0; j < w; j++) { - val32 = lines[j]; - if (j == 0) - lined[0] = val32 + linedp[0]; - else - lined[j] = val32 + lined[j - 1] + linedp[j] - linedp[j - 1]; - } - } - } else { - L_ERROR("depth not 1, 8 or 32 bpp\n", procName); - } - - return; -} - - -/*----------------------------------------------------------------------* - * Un-normalized grayscale block convolution * - *----------------------------------------------------------------------*/ -/*! - * \brief pixBlockconvGrayUnnormalized() - * - * \param[in] pixs 8 bpp - * \param[in] wc, hc half width/height of convolution kernel - * \return pix 32 bpp; containing the convolution without normalizing - * for the window size, or NULL on error - * - *
- * Notes:
- *      (1) The full width and height of the convolution kernel
- *          are (2 * wc + 1) and (2 * hc + 1).
- *      (2) Require that w >= 2 * wc + 1 and h >= 2 * hc + 1,
- *          where (w,h) are the dimensions of pixs.
- *      (3) Returns a copy if both wc and hc are 0.
- *      (3) Adds mirrored border to avoid treating the boundary pixels
- *          specially.  Note that we add wc + 1 pixels to the left
- *          and wc to the right.  The added width is 2 * wc + 1 pixels,
- *          and the particular choice simplifies the indexing in the loop.
- *          Likewise, add hc + 1 pixels to the top and hc to the bottom.
- *      (4) To get the normalized result, divide by the area of the
- *          convolution kernel: (2 * wc + 1) * (2 * hc + 1)
- *          Specifically, do this:
- *               pixc = pixBlockconvGrayUnnormalized(pixs, wc, hc);
- *               fract = 1. / ((2 * wc + 1) * (2 * hc + 1));
- *               pixMultConstantGray(pixc, fract);
- *               pixd = pixGetRGBComponent(pixc, L_ALPHA_CHANNEL);
- *      (5) Unlike pixBlockconvGray(), this always computes the accumulation
- *          pix because its size is tied to wc and hc.
- *      (6) Compare this implementation with pixBlockconvGray(), where
- *          most of the code in blockconvLow() is special casing for
- *          efficiently handling the boundary.  Here, the use of
- *          mirrored borders and destination indexing makes the
- *          implementation very simple.
- * 
- */ -PIX * -pixBlockconvGrayUnnormalized(PIX *pixs, - l_int32 wc, - l_int32 hc) -{ -l_int32 i, j, w, h, d, wpla, wpld, jmax; -l_uint32 *linemina, *linemaxa, *lined, *dataa, *datad; -PIX *pixsb, *pixacc, *pixd; - - PROCNAME("pixBlockconvGrayUnnormalized"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (wc < 0) wc = 0; - if (hc < 0) hc = 0; - if (w < 2 * wc + 1 || h < 2 * hc + 1) { - wc = L_MIN(wc, (w - 1) / 2); - hc = L_MIN(hc, (h - 1) / 2); - L_WARNING("kernel too large; reducing!\n", procName); - L_INFO("wc = %d, hc = %d\n", procName, wc, hc); - } - if (wc == 0 && hc == 0) /* no-op */ - return pixCopy(NULL, pixs); - - if ((pixsb = pixAddMirroredBorder(pixs, wc + 1, wc, hc + 1, hc)) == NULL) - return (PIX *)ERROR_PTR("pixsb not made", procName, NULL); - pixacc = pixBlockconvAccum(pixsb); - pixDestroy(&pixsb); - if (!pixacc) - return (PIX *)ERROR_PTR("pixacc not made", procName, NULL); - if ((pixd = pixCreate(w, h, 32)) == NULL) { - pixDestroy(&pixacc); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - - wpla = pixGetWpl(pixacc); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - dataa = pixGetData(pixacc); - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - linemina = dataa + i * wpla; - linemaxa = dataa + (i + 2 * hc + 1) * wpla; - for (j = 0; j < w; j++) { - jmax = j + 2 * wc + 1; - lined[j] = linemaxa[jmax] - linemaxa[j] - - linemina[jmax] + linemina[j]; - } - } - - pixDestroy(&pixacc); - return pixd; -} - - -/*----------------------------------------------------------------------* - * Tiled grayscale or color block convolution * - *----------------------------------------------------------------------*/ -/*! - * \brief pixBlockconvTiled() - * - * \param[in] pix 8 or 32 bpp; or 2, 4 or 8 bpp with colormap - * \param[in] wc, hc half width/height of convolution kernel - * \param[in] nx, ny subdivision into tiles - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The full width and height of the convolution kernel
- *          are (2 * wc + 1) and (2 * hc + 1)
- *      (2) Returns a copy if both wc and hc are 0
- *      (3) Require that w >= 2 * wc + 1 and h >= 2 * hc + 1,
- *          where (w,h) are the dimensions of pixs.
- *      (4) For nx == ny == 1, this defaults to pixBlockconv(), which
- *          is typically about twice as fast, and gives nearly
- *          identical results as pixBlockconvGrayTile().
- *      (5) If the tiles are too small, nx and/or ny are reduced
- *          a minimum amount so that the tiles are expanded to the
- *          smallest workable size in the problematic direction(s).
- *      (6) Why a tiled version?  Three reasons:
- *          (a) Because the accumulator is a uint32, overflow can occur
- *              for an image with more than 16M pixels.
- *          (b) The accumulator array for 16M pixels is 64 MB; using
- *              tiles reduces the size of this array.
- *          (c) Each tile can be processed independently, in parallel,
- *              on a multicore processor.
- * 
- */ -PIX * -pixBlockconvTiled(PIX *pix, - l_int32 wc, - l_int32 hc, - l_int32 nx, - l_int32 ny) -{ -l_int32 i, j, w, h, d, xrat, yrat; -PIX *pixs, *pixd, *pixc, *pixt; -PIX *pixr, *pixrc, *pixg, *pixgc, *pixb, *pixbc; -PIXTILING *pt; - - PROCNAME("pixBlockconvTiled"); - - if (!pix) - return (PIX *)ERROR_PTR("pix not defined", procName, NULL); - if (wc < 0) wc = 0; - if (hc < 0) hc = 0; - pixGetDimensions(pix, &w, &h, &d); - if (w < 2 * wc + 3 || h < 2 * hc + 3) { - wc = L_MAX(0, L_MIN(wc, (w - 3) / 2)); - hc = L_MAX(0, L_MIN(hc, (h - 3) / 2)); - L_WARNING("kernel too large; reducing!\n", procName); - L_INFO("wc = %d, hc = %d\n", procName, wc, hc); - } - if (wc == 0 && hc == 0) /* no-op */ - return pixCopy(NULL, pix); - if (nx <= 1 && ny <= 1) - return pixBlockconv(pix, wc, hc); - - /* Test to see if the tiles are too small. The required - * condition is that the tile dimensions must be at least - * (wc + 2) x (hc + 2). */ - xrat = w / nx; - yrat = h / ny; - if (xrat < wc + 2) { - nx = w / (wc + 2); - L_WARNING("tile width too small; nx reduced to %d\n", procName, nx); - } - if (yrat < hc + 2) { - ny = h / (hc + 2); - L_WARNING("tile height too small; ny reduced to %d\n", procName, ny); - } - - /* Remove colormap if necessary */ - if ((d == 2 || d == 4 || d == 8) && pixGetColormap(pix)) { - L_WARNING("pix has colormap; removing\n", procName); - pixs = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC); - d = pixGetDepth(pixs); - } else { - pixs = pixClone(pix); - } - - if (d != 8 && d != 32) { - pixDestroy(&pixs); - return (PIX *)ERROR_PTR("depth not 8 or 32 bpp", procName, NULL); - } - - /* Note that the overlaps in the width and height that - * are added to the tile are (wc + 2) and (hc + 2). - * These overlaps are removed by pixTilingPaintTile(). - * They are larger than the extent of the filter because - * although the filter is symmetric with respect to its origin, - * the implementation is asymmetric -- see the implementation in - * pixBlockconvGrayTile(). */ - if ((pixd = pixCreateTemplate(pixs)) == NULL) { - pixDestroy(&pixs); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pt = pixTilingCreate(pixs, nx, ny, 0, 0, wc + 2, hc + 2); - for (i = 0; i < ny; i++) { - for (j = 0; j < nx; j++) { - pixt = pixTilingGetTile(pt, i, j); - - /* Convolve over the tile */ - if (d == 8) { - pixc = pixBlockconvGrayTile(pixt, NULL, wc, hc); - } else { /* d == 32 */ - pixr = pixGetRGBComponent(pixt, COLOR_RED); - pixrc = pixBlockconvGrayTile(pixr, NULL, wc, hc); - pixDestroy(&pixr); - pixg = pixGetRGBComponent(pixt, COLOR_GREEN); - pixgc = pixBlockconvGrayTile(pixg, NULL, wc, hc); - pixDestroy(&pixg); - pixb = pixGetRGBComponent(pixt, COLOR_BLUE); - pixbc = pixBlockconvGrayTile(pixb, NULL, wc, hc); - pixDestroy(&pixb); - pixc = pixCreateRGBImage(pixrc, pixgc, pixbc); - pixDestroy(&pixrc); - pixDestroy(&pixgc); - pixDestroy(&pixbc); - } - - pixTilingPaintTile(pixd, i, j, pixc, pt); - pixDestroy(&pixt); - pixDestroy(&pixc); - } - } - - pixDestroy(&pixs); - pixTilingDestroy(&pt); - return pixd; -} - - -/*! - * \brief pixBlockconvGrayTile() - * - * \param[in] pixs 8 bpp gray - * \param[in] pixacc 32 bpp accum pix - * \param[in] wc, hc half width/height of convolution kernel - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The full width and height of the convolution kernel
- *          are (2 * wc + 1) and (2 * hc + 1)
- *      (2) Assumes that the input pixs is padded with (wc + 1) pixels on
- *          left and right, and with (hc + 1) pixels on top and bottom.
- *          The returned pix has these stripped off; they are only used
- *          for computation.
- *      (3) Returns a copy if both wc and hc are 0
- *      (4) Require that w > 2 * wc + 1 and h > 2 * hc + 1,
- *          where (w,h) are the dimensions of pixs.
- * 
- */ -PIX * -pixBlockconvGrayTile(PIX *pixs, - PIX *pixacc, - l_int32 wc, - l_int32 hc) -{ -l_int32 w, h, d, wd, hd, i, j, imin, imax, jmin, jmax, wplt, wpld; -l_float32 norm; -l_uint32 val; -l_uint32 *datat, *datad, *lined, *linemint, *linemaxt; -PIX *pixt, *pixd; - - PROCNAME("pixBlockconvGrayTile"); - - if (!pixs) - return (PIX *)ERROR_PTR("pix not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (wc < 0) wc = 0; - if (hc < 0) hc = 0; - if (w < 2 * wc + 3 || h < 2 * hc + 3) { - wc = L_MAX(0, L_MIN(wc, (w - 3) / 2)); - hc = L_MAX(0, L_MIN(hc, (h - 3) / 2)); - L_WARNING("kernel too large; reducing!\n", procName); - L_INFO("wc = %d, hc = %d\n", procName, wc, hc); - } - if (wc == 0 && hc == 0) - return pixCopy(NULL, pixs); - wd = w - 2 * wc; - hd = h - 2 * hc; - - if (pixacc) { - if (pixGetDepth(pixacc) == 32) { - pixt = pixClone(pixacc); - } else { - L_WARNING("pixacc not 32 bpp; making new one\n", procName); - if ((pixt = pixBlockconvAccum(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - } - } else { - if ((pixt = pixBlockconvAccum(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - } - - if ((pixd = pixCreateTemplate(pixs)) == NULL) { - pixDestroy(&pixt); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - norm = 1. / (l_float32)((2 * wc + 1) * (2 * hc + 1)); - - /* Do the convolution over the subregion of size (wd - 2, hd - 2), - * which exactly corresponds to the size of the subregion that - * will be extracted by pixTilingPaintTile(). Note that the - * region in which points are computed is not symmetric about - * the center of the images; instead the computation in - * the accumulator image is shifted up and to the left by 1, - * relative to the center, because the 4 accumulator sampling - * points are taken at the LL corner of the filter and at 3 other - * points that are shifted -wc and -hc to the left and above. */ - for (i = hc; i < hc + hd - 2; i++) { - imin = L_MAX(i - hc - 1, 0); - imax = L_MIN(i + hc, h - 1); - lined = datad + i * wpld; - linemint = datat + imin * wplt; - linemaxt = datat + imax * wplt; - for (j = wc; j < wc + wd - 2; j++) { - jmin = L_MAX(j - wc - 1, 0); - jmax = L_MIN(j + wc, w - 1); - val = linemaxt[jmax] - linemaxt[jmin] - + linemint[jmin] - linemint[jmax]; - val = (l_uint8)(norm * val + 0.5); - SET_DATA_BYTE(lined, j, val); - } - } - - pixDestroy(&pixt); - return pixd; -} - - -/*----------------------------------------------------------------------* - * Convolution for mean, mean square, variance and rms deviation * - *----------------------------------------------------------------------*/ -/*! - * \brief pixWindowedStats() - * - * \param[in] pixs 8 bpp grayscale - * \param[in] wc, hc half width/height of convolution kernel - * \param[in] hasborder use 1 if it already has (wc + 1 border pixels - * on left and right, and hc + 1 on top and bottom; - * use 0 to add kernel-dependent border) - * \param[out] ppixm [optional] 8 bpp mean value in window - * \param[out] ppixms [optional] 32 bpp mean square value in window - * \param[out] pfpixv [optional] float variance in window - * \param[out] pfpixrv [optional] float rms deviation from the mean - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is a high-level convenience function for calculating
- *          any or all of these derived images.
- *      (2) If %hasborder = 0, a border is added and the result is
- *          computed over all pixels in pixs.  Otherwise, no border is
- *          added and the border pixels are removed from the output images.
- *      (3) These statistical measures over the pixels in the
- *          rectangular window are:
- *            ~ average value: 

(pixm) - * ~ average squared value: (pixms) - * ~ variance: <(p -

)*(p -

)> = -

*

(pixv) - * ~ square-root of variance: (pixrv) - * where the brackets < .. > indicate that the average value is - * to be taken over the window. - * (4) Note that the variance is just the mean square difference from - * the mean value; and the square root of the variance is the - * root mean square difference from the mean, sometimes also - * called the 'standard deviation'. - * (5) The added border, along with the use of an accumulator array, - * allows computation without special treatment of pixels near - * the image boundary, and runs in a time that is independent - * of the size of the convolution kernel. - *

- */ -l_ok -pixWindowedStats(PIX *pixs, - l_int32 wc, - l_int32 hc, - l_int32 hasborder, - PIX **ppixm, - PIX **ppixms, - FPIX **pfpixv, - FPIX **pfpixrv) -{ -PIX *pixb, *pixm, *pixms; - - PROCNAME("pixWindowedStats"); - - if (!ppixm && !ppixms && !pfpixv && !pfpixrv) - return ERROR_INT("no output requested", procName, 1); - if (ppixm) *ppixm = NULL; - if (ppixms) *ppixms = NULL; - if (pfpixv) *pfpixv = NULL; - if (pfpixrv) *pfpixrv = NULL; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (wc < 2 || hc < 2) - return ERROR_INT("wc and hc not >= 2", procName, 1); - - /* Add border if requested */ - if (!hasborder) - pixb = pixAddBorderGeneral(pixs, wc + 1, wc + 1, hc + 1, hc + 1, 0); - else - pixb = pixClone(pixs); - - if (!pfpixv && !pfpixrv) { - if (ppixm) *ppixm = pixWindowedMean(pixb, wc, hc, 1, 1); - if (ppixms) *ppixms = pixWindowedMeanSquare(pixb, wc, hc, 1); - pixDestroy(&pixb); - return 0; - } - - pixm = pixWindowedMean(pixb, wc, hc, 1, 1); - pixms = pixWindowedMeanSquare(pixb, wc, hc, 1); - pixWindowedVariance(pixm, pixms, pfpixv, pfpixrv); - if (ppixm) - *ppixm = pixm; - else - pixDestroy(&pixm); - if (ppixms) - *ppixms = pixms; - else - pixDestroy(&pixms); - pixDestroy(&pixb); - return 0; -} - - -/*! - * \brief pixWindowedMean() - * - * \param[in] pixs 8 or 32 bpp grayscale - * \param[in] wc, hc half width/height of convolution kernel - * \param[in] hasborder use 1 if it already has (wc + 1 border pixels - * on left and right, and hc + 1 on top and bottom; - * use 0 to add kernel-dependent border) - * \param[in] normflag 1 for normalization to get average in window; - * 0 for the sum in the window (un-normalized) - * \return pixd 8 or 32 bpp, average over kernel window - * - *
- * Notes:
- *      (1) The input and output depths are the same.
- *      (2) A set of border pixels of width (wc + 1) on left and right,
- *          and of height (hc + 1) on top and bottom, must be on the
- *          pix before the accumulator is found.  The output pixd
- *          (after convolution) has this border removed.
- *          If %hasborder = 0, the required border is added.
- *      (3) Typically, %normflag == 1.  However, if you want the sum
- *          within the window, rather than a normalized convolution,
- *          use %normflag == 0.
- *      (4) This builds a block accumulator pix, uses it here, and
- *          destroys it.
- *      (5) The added border, along with the use of an accumulator array,
- *          allows computation without special treatment of pixels near
- *          the image boundary, and runs in a time that is independent
- *          of the size of the convolution kernel.
- * 
- */ -PIX * -pixWindowedMean(PIX *pixs, - l_int32 wc, - l_int32 hc, - l_int32 hasborder, - l_int32 normflag) -{ -l_int32 i, j, w, h, d, wd, hd, wplc, wpld, wincr, hincr; -l_uint32 val; -l_uint32 *datac, *datad, *linec1, *linec2, *lined; -l_float32 norm; -PIX *pixb, *pixc, *pixd; - - PROCNAME("pixWindowedMean"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL); - if (wc < 2 || hc < 2) - return (PIX *)ERROR_PTR("wc and hc not >= 2", procName, NULL); - - pixb = pixc = pixd = NULL; - - /* Add border if requested */ - if (!hasborder) - pixb = pixAddBorderGeneral(pixs, wc + 1, wc + 1, hc + 1, hc + 1, 0); - else - pixb = pixClone(pixs); - - /* Make the accumulator pix from pixb */ - if ((pixc = pixBlockconvAccum(pixb)) == NULL) { - L_ERROR("pixc not made\n", procName); - goto cleanup; - } - wplc = pixGetWpl(pixc); - datac = pixGetData(pixc); - - /* The output has wc + 1 border pixels stripped from each side - * of pixb, and hc + 1 border pixels stripped from top and bottom. */ - pixGetDimensions(pixb, &w, &h, NULL); - wd = w - 2 * (wc + 1); - hd = h - 2 * (hc + 1); - if (wd < 2 || hd < 2) { - L_ERROR("w or h is too small for the kernel\n", procName); - goto cleanup; - } - if ((pixd = pixCreate(wd, hd, d)) == NULL) { - L_ERROR("pixd not made\n", procName); - goto cleanup; - } - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - - wincr = 2 * wc + 1; - hincr = 2 * hc + 1; - norm = 1.0; /* use this for sum-in-window */ - if (normflag) - norm = 1.0 / ((l_float32)(wincr) * hincr); - for (i = 0; i < hd; i++) { - linec1 = datac + i * wplc; - linec2 = datac + (i + hincr) * wplc; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - val = linec2[j + wincr] - linec2[j] - linec1[j + wincr] + linec1[j]; - if (d == 8) { - val = (l_uint8)(norm * val); - SET_DATA_BYTE(lined, j, val); - } else { /* d == 32 */ - val = (l_uint32)(norm * val); - lined[j] = val; - } - } - } - -cleanup: - pixDestroy(&pixb); - pixDestroy(&pixc); - return pixd; -} - - -/*! - * \brief pixWindowedMeanSquare() - * - * \param[in] pixs 8 bpp grayscale - * \param[in] wc, hc half width/height of convolution kernel - * \param[in] hasborder use 1 if it already has (wc + 1 border pixels - * on left and right, and hc + 1 on top and bottom; - * use 0 to add kernel-dependent border) - * \return pixd 32 bpp, average over rectangular window of - * width = 2 * wc + 1 and height = 2 * hc + 1 - * - *
- * Notes:
- *      (1) A set of border pixels of width (wc + 1) on left and right,
- *          and of height (hc + 1) on top and bottom, must be on the
- *          pix before the accumulator is found.  The output pixd
- *          (after convolution) has this border removed.
- *          If %hasborder = 0, the required border is added.
- *      (2) The advantage is that we are unaffected by the boundary, and
- *          it is not necessary to treat pixels within %wc and %hc of the
- *          border differently.  This is because processing for pixd
- *          only takes place for pixels in pixs for which the
- *          kernel is entirely contained in pixs.
- *      (3) Why do we have an added border of width (%wc + 1) and
- *          height (%hc + 1), when we only need %wc and %hc pixels
- *          to satisfy this condition?  Answer: the accumulators
- *          are asymmetric, requiring an extra row and column of
- *          pixels at top and left to work accurately.
- *      (4) The added border, along with the use of an accumulator array,
- *          allows computation without special treatment of pixels near
- *          the image boundary, and runs in a time that is independent
- *          of the size of the convolution kernel.
- * 
- */ -PIX * -pixWindowedMeanSquare(PIX *pixs, - l_int32 wc, - l_int32 hc, - l_int32 hasborder) -{ -l_int32 i, j, w, h, wd, hd, wpl, wpld, wincr, hincr; -l_uint32 ival; -l_uint32 *datad, *lined; -l_float64 norm; -l_float64 val; -l_float64 *data, *line1, *line2; -DPIX *dpix; -PIX *pixb, *pixd; - - PROCNAME("pixWindowedMeanSquare"); - - if (!pixs || (pixGetDepth(pixs) != 8)) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - if (wc < 2 || hc < 2) - return (PIX *)ERROR_PTR("wc and hc not >= 2", procName, NULL); - - pixd = NULL; - - /* Add border if requested */ - if (!hasborder) - pixb = pixAddBorderGeneral(pixs, wc + 1, wc + 1, hc + 1, hc + 1, 0); - else - pixb = pixClone(pixs); - - if ((dpix = pixMeanSquareAccum(pixb)) == NULL) { - L_ERROR("dpix not made\n", procName); - goto cleanup; - } - wpl = dpixGetWpl(dpix); - data = dpixGetData(dpix); - - /* The output has wc + 1 border pixels stripped from each side - * of pixb, and hc + 1 border pixels stripped from top and bottom. */ - pixGetDimensions(pixb, &w, &h, NULL); - wd = w - 2 * (wc + 1); - hd = h - 2 * (hc + 1); - if (wd < 2 || hd < 2) { - L_ERROR("w or h too small for kernel\n", procName); - goto cleanup; - } - if ((pixd = pixCreate(wd, hd, 32)) == NULL) { - L_ERROR("pixd not made\n", procName); - goto cleanup; - } - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - - wincr = 2 * wc + 1; - hincr = 2 * hc + 1; - norm = 1.0 / ((l_float32)(wincr) * hincr); - for (i = 0; i < hd; i++) { - line1 = data + i * wpl; - line2 = data + (i + hincr) * wpl; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - val = line2[j + wincr] - line2[j] - line1[j + wincr] + line1[j]; - ival = (l_uint32)(norm * val + 0.5); /* to round up */ - lined[j] = ival; - } - } - -cleanup: - dpixDestroy(&dpix); - pixDestroy(&pixb); - return pixd; -} - - -/*! - * \brief pixWindowedVariance() - * - * \param[in] pixm mean over window; 8 or 32 bpp grayscale - * \param[in] pixms mean square over window; 32 bpp - * \param[out] pfpixv [optional] float variance -- the ms deviation - * from the mean - * \param[out] pfpixrv [optional] float rms deviation from the mean - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The mean and mean square values are precomputed, using
- *          pixWindowedMean() and pixWindowedMeanSquare().
- *      (2) Either or both of the variance and square-root of variance
- *          are returned as an fpix, where the variance is the
- *          average over the window of the mean square difference of
- *          the pixel value from the mean:
- *                <(p - 

)*(p -

)> = -

*

- * (3) To visualize the results: - * ~ for both, use fpixDisplayMaxDynamicRange(). - * ~ for rms deviation, simply convert the output fpix to pix, - *

- */ -l_ok -pixWindowedVariance(PIX *pixm, - PIX *pixms, - FPIX **pfpixv, - FPIX **pfpixrv) -{ -l_int32 i, j, w, h, ws, hs, ds, wplm, wplms, wplv, wplrv, valm, valms; -l_float32 var; -l_uint32 *linem, *linems, *datam, *datams; -l_float32 *linev, *linerv, *datav, *datarv; -FPIX *fpixv, *fpixrv; /* variance and square root of variance */ - - PROCNAME("pixWindowedVariance"); - - if (!pfpixv && !pfpixrv) - return ERROR_INT("no output requested", procName, 1); - if (pfpixv) *pfpixv = NULL; - if (pfpixrv) *pfpixrv = NULL; - if (!pixm || pixGetDepth(pixm) != 8) - return ERROR_INT("pixm undefined or not 8 bpp", procName, 1); - if (!pixms || pixGetDepth(pixms) != 32) - return ERROR_INT("pixms undefined or not 32 bpp", procName, 1); - pixGetDimensions(pixm, &w, &h, NULL); - pixGetDimensions(pixms, &ws, &hs, &ds); - if (w != ws || h != hs) - return ERROR_INT("pixm and pixms sizes differ", procName, 1); - - if (pfpixv) { - fpixv = fpixCreate(w, h); - *pfpixv = fpixv; - wplv = fpixGetWpl(fpixv); - datav = fpixGetData(fpixv); - } - if (pfpixrv) { - fpixrv = fpixCreate(w, h); - *pfpixrv = fpixrv; - wplrv = fpixGetWpl(fpixrv); - datarv = fpixGetData(fpixrv); - } - - wplm = pixGetWpl(pixm); - wplms = pixGetWpl(pixms); - datam = pixGetData(pixm); - datams = pixGetData(pixms); - for (i = 0; i < h; i++) { - linem = datam + i * wplm; - linems = datams + i * wplms; - if (pfpixv) - linev = datav + i * wplv; - if (pfpixrv) - linerv = datarv + i * wplrv; - for (j = 0; j < w; j++) { - valm = GET_DATA_BYTE(linem, j); - if (ds == 8) - valms = GET_DATA_BYTE(linems, j); - else /* ds == 32 */ - valms = (l_int32)linems[j]; - var = (l_float32)valms - (l_float32)valm * valm; - if (pfpixv) - linev[j] = var; - if (pfpixrv) - linerv[j] = (l_float32)sqrt(var); - } - } - - return 0; -} - - -/*! - * \brief pixMeanSquareAccum() - * - * \param[in] pixs 8 bpp grayscale - * \return dpix 64 bit array, or NULL on error - * - *
- * Notes:
- *      (1) Similar to pixBlockconvAccum(), this computes the
- *          sum of the squares of the pixel values in such a way
- *          that the value at (i,j) is the sum of all squares in
- *          the rectangle from the origin to (i,j).
- *      (2) The general recursion relation (v are squared pixel values) is
- *            a(i,j) = v(i,j) + a(i-1, j) + a(i, j-1) - a(i-1, j-1)
- *          For the first line, this reduces to the special case
- *            a(i,j) = v(i,j) + a(i, j-1)
- *          For the first column, the special case is
- *            a(i,j) = v(i,j) + a(i-1, j)
- * 
- */ -DPIX * -pixMeanSquareAccum(PIX *pixs) -{ -l_int32 i, j, w, h, wpl, wpls, val; -l_uint32 *datas, *lines; -l_float64 *data, *line, *linep; -DPIX *dpix; - - PROCNAME("pixMeanSquareAccum"); - - - if (!pixs || (pixGetDepth(pixs) != 8)) - return (DPIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - if ((dpix = dpixCreate(w, h)) == NULL) - return (DPIX *)ERROR_PTR("dpix not made", procName, NULL); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - data = dpixGetData(dpix); - wpl = dpixGetWpl(dpix); - - lines = datas; - line = data; - for (j = 0; j < w; j++) { /* first line */ - val = GET_DATA_BYTE(lines, j); - if (j == 0) - line[0] = (l_float64)(val) * val; - else - line[j] = line[j - 1] + (l_float64)(val) * val; - } - - /* Do the other lines */ - for (i = 1; i < h; i++) { - lines = datas + i * wpls; - line = data + i * wpl; /* current dest line */ - linep = line - wpl;; /* prev dest line */ - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(lines, j); - if (j == 0) - line[0] = linep[0] + (l_float64)(val) * val; - else - line[j] = line[j - 1] + linep[j] - linep[j - 1] - + (l_float64)(val) * val; - } - } - - return dpix; -} - - -/*----------------------------------------------------------------------* - * Binary block sum/rank * - *----------------------------------------------------------------------*/ -/*! - * \brief pixBlockrank() - * - * \param[in] pixs 1 bpp - * \param[in] pixacc pix [optional] 32 bpp - * \param[in] wc, hc half width/height of block sum/rank kernel - * \param[in] rank between 0.0 and 1.0; 0.5 is median filter - * \return pixd 1 bpp - * - *
- * Notes:
- *      (1) The full width and height of the convolution kernel
- *          are (2 * wc + 1) and (2 * hc + 1)
- *      (2) This returns a pixd where each pixel is a 1 if the
- *          neighborhood (2 * wc + 1) x (2 * hc + 1)) pixels
- *          contains the rank fraction of 1 pixels.  Otherwise,
- *          the returned pixel is 0.  Note that the special case
- *          of rank = 0.0 is always satisfied, so the returned
- *          pixd has all pixels with value 1.
- *      (3) If accum pix is null, make one, use it, and destroy it
- *          before returning; otherwise, just use the input accum pix
- *      (4) If both wc and hc are 0, returns a copy unless rank == 0.0,
- *          in which case this returns an all-ones image.
- *      (5) Require that w >= 2 * wc + 1 and h >= 2 * hc + 1,
- *          where (w,h) are the dimensions of pixs.
- * 
- */ -PIX * -pixBlockrank(PIX *pixs, - PIX *pixacc, - l_int32 wc, - l_int32 hc, - l_float32 rank) -{ -l_int32 w, h, d, thresh; -PIX *pixt, *pixd; - - PROCNAME("pixBlockrank"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (rank < 0.0 || rank > 1.0) - return (PIX *)ERROR_PTR("rank must be in [0.0, 1.0]", procName, NULL); - - if (rank == 0.0) { - pixd = pixCreateTemplate(pixs); - pixSetAll(pixd); - return pixd; - } - - if (wc < 0) wc = 0; - if (hc < 0) hc = 0; - if (w < 2 * wc + 1 || h < 2 * hc + 1) { - wc = L_MIN(wc, (w - 1) / 2); - hc = L_MIN(hc, (h - 1) / 2); - L_WARNING("kernel too large; reducing!\n", procName); - L_INFO("wc = %d, hc = %d\n", procName, wc, hc); - } - if (wc == 0 && hc == 0) - return pixCopy(NULL, pixs); - - if ((pixt = pixBlocksum(pixs, pixacc, wc, hc)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - - /* 1 bpp block rank filter output. - * Must invert because threshold gives 1 for values < thresh, - * but we need a 1 if the value is >= thresh. */ - thresh = (l_int32)(255. * rank); - pixd = pixThresholdToBinary(pixt, thresh); - pixInvert(pixd, pixd); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixBlocksum() - * - * \param[in] pixs 1 bpp - * \param[in] pixacc pix [optional] 32 bpp - * \param[in] wc, hc half width/height of block sum/rank kernel - * \return pixd 8 bpp - * - *
- * Notes:
- *      (1) If accum pix is null, make one and destroy it before
- *          returning; otherwise, just use the input accum pix
- *      (2) The full width and height of the convolution kernel
- *          are (2 * wc + 1) and (2 * hc + 1)
- *      (3) Use of wc = hc = 1, followed by pixInvert() on the
- *          8 bpp result, gives a nice anti-aliased, and somewhat
- *          darkened, result on text.
- *      (4) Require that w >= 2 * wc + 1 and h >= 2 * hc + 1,
- *          where (w,h) are the dimensions of pixs.
- *      (5) Returns in each dest pixel the sum of all src pixels
- *          that are within a block of size of the kernel, centered
- *          on the dest pixel.  This sum is the number of src ON
- *          pixels in the block at each location, normalized to 255
- *          for a block containing all ON pixels.  For pixels near
- *          the boundary, where the block is not entirely contained
- *          within the image, we then multiply by a second normalization
- *          factor that is greater than one, so that all results
- *          are normalized by the number of participating pixels
- *          within the block.
- * 
- */ -PIX * -pixBlocksum(PIX *pixs, - PIX *pixacc, - l_int32 wc, - l_int32 hc) -{ -l_int32 w, h, d, wplt, wpld; -l_uint32 *datat, *datad; -PIX *pixt, *pixd; - - PROCNAME("pixBlocksum"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (wc < 0) wc = 0; - if (hc < 0) hc = 0; - if (w < 2 * wc + 1 || h < 2 * hc + 1) { - wc = L_MIN(wc, (w - 1) / 2); - hc = L_MIN(hc, (h - 1) / 2); - L_WARNING("kernel too large; reducing!\n", procName); - L_INFO("wc = %d, hc = %d\n", procName, wc, hc); - } - if (wc == 0 && hc == 0) - return pixCopy(NULL, pixs); - - if (pixacc) { - if (pixGetDepth(pixacc) != 32) - return (PIX *)ERROR_PTR("pixacc not 32 bpp", procName, NULL); - pixt = pixClone(pixacc); - } else { - if ((pixt = pixBlockconvAccum(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - } - - /* 8 bpp block sum output */ - if ((pixd = pixCreate(w, h, 8)) == NULL) { - pixDestroy(&pixt); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixCopyResolution(pixd, pixs); - - wpld = pixGetWpl(pixd); - wplt = pixGetWpl(pixt); - datad = pixGetData(pixd); - datat = pixGetData(pixt); - blocksumLow(datad, w, h, wpld, datat, wplt, wc, hc); - - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief blocksumLow() - * - * \param[in] datad of 8 bpp dest - * \param[in] w, h, wpl of 8 bpp dest - * \param[in] dataa of 32 bpp accum - * \param[in] wpla of 32 bpp accum - * \param[in] wc, hc convolution "half-width" and "half-height" - * \return void - * - *
- * Notes:
- *      (1) The full width and height of the convolution kernel
- *          are (2 * wc + 1) and (2 * hc + 1).
- *      (2) The lack of symmetry between the handling of the
- *          first (hc + 1) lines and the last (hc) lines,
- *          and similarly with the columns, is due to fact that
- *          for the pixel at (x,y), the accumulator values are
- *          taken at (x + wc, y + hc), (x - wc - 1, y + hc),
- *          (x + wc, y - hc - 1) and (x - wc - 1, y - hc - 1).
- *      (3) Compute sums of ON pixels within the block filter size,
- *          normalized between 0 and 255, as if there were no reduced
- *          area at the boundary.  This under-estimates the value
- *          of the boundary pixels, so we multiply them by another
- *          normalization factor that is greater than 1.
- *      (4) This second normalization is done first for the first
- *          hc + 1 lines; then for the last hc lines; and finally
- *          for the first wc + 1 and last wc columns in the intermediate
- *          lines.
- *      (5) Required constraints are: wc < w and hc < h.
- * 
- */ -static void -blocksumLow(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpl, - l_uint32 *dataa, - l_int32 wpla, - l_int32 wc, - l_int32 hc) -{ -l_int32 i, j, imax, imin, jmax, jmin; -l_int32 wn, hn, fwc, fhc, wmwc, hmhc; -l_float32 norm, normh, normw; -l_uint32 val; -l_uint32 *linemina, *linemaxa, *lined; - - PROCNAME("blocksumLow"); - - wmwc = w - wc; - hmhc = h - hc; - if (wmwc <= 0 || hmhc <= 0) { - L_ERROR("wc >= w || hc >=h\n", procName); - return; - } - fwc = 2 * wc + 1; - fhc = 2 * hc + 1; - norm = 255. / ((l_float32)(fwc) * fhc); - - /*------------------------------------------------------------* - * Compute, using b.c. only to set limits on the accum image * - *------------------------------------------------------------*/ - for (i = 0; i < h; i++) { - imin = L_MAX(i - 1 - hc, 0); - imax = L_MIN(i + hc, h - 1); - lined = datad + wpl * i; - linemina = dataa + wpla * imin; - linemaxa = dataa + wpla * imax; - for (j = 0; j < w; j++) { - jmin = L_MAX(j - 1 - wc, 0); - jmax = L_MIN(j + wc, w - 1); - val = linemaxa[jmax] - linemaxa[jmin] - - linemina[jmax] + linemina[jmin]; - val = (l_uint8)(norm * val); - SET_DATA_BYTE(lined, j, val); - } - } - - /*------------------------------------------------------------* - * Fix normalization for boundary pixels * - *------------------------------------------------------------*/ - for (i = 0; i <= hc; i++) { /* first hc + 1 lines */ - hn = hc + i; - normh = (l_float32)fhc / (l_float32)hn; /* > 1 */ - lined = datad + wpl * i; - for (j = 0; j <= wc; j++) { - wn = wc + j; - normw = (l_float32)fwc / (l_float32)wn; /* > 1 */ - val = GET_DATA_BYTE(lined, j); - val = (l_uint8)(val * normh * normw); - SET_DATA_BYTE(lined, j, val); - } - for (j = wc + 1; j < wmwc; j++) { - val = GET_DATA_BYTE(lined, j); - val = (l_uint8)(val * normh); - SET_DATA_BYTE(lined, j, val); - } - for (j = wmwc; j < w; j++) { - wn = wc + w - j; - normw = (l_float32)fwc / (l_float32)wn; /* > 1 */ - val = GET_DATA_BYTE(lined, j); - val = (l_uint8)(val * normh * normw); - SET_DATA_BYTE(lined, j, val); - } - } - - for (i = hmhc; i < h; i++) { /* last hc lines */ - hn = hc + h - i; - normh = (l_float32)fhc / (l_float32)hn; /* > 1 */ - lined = datad + wpl * i; - for (j = 0; j <= wc; j++) { - wn = wc + j; - normw = (l_float32)fwc / (l_float32)wn; /* > 1 */ - val = GET_DATA_BYTE(lined, j); - val = (l_uint8)(val * normh * normw); - SET_DATA_BYTE(lined, j, val); - } - for (j = wc + 1; j < wmwc; j++) { - val = GET_DATA_BYTE(lined, j); - val = (l_uint8)(val * normh); - SET_DATA_BYTE(lined, j, val); - } - for (j = wmwc; j < w; j++) { - wn = wc + w - j; - normw = (l_float32)fwc / (l_float32)wn; /* > 1 */ - val = GET_DATA_BYTE(lined, j); - val = (l_uint8)(val * normh * normw); - SET_DATA_BYTE(lined, j, val); - } - } - - for (i = hc + 1; i < hmhc; i++) { /* intermediate lines */ - lined = datad + wpl * i; - for (j = 0; j <= wc; j++) { /* first wc + 1 columns */ - wn = wc + j; - normw = (l_float32)fwc / (l_float32)wn; /* > 1 */ - val = GET_DATA_BYTE(lined, j); - val = (l_uint8)(val * normw); - SET_DATA_BYTE(lined, j, val); - } - for (j = wmwc; j < w; j++) { /* last wc columns */ - wn = wc + w - j; - normw = (l_float32)fwc / (l_float32)wn; /* > 1 */ - val = GET_DATA_BYTE(lined, j); - val = (l_uint8)(val * normw); - SET_DATA_BYTE(lined, j, val); - } - } - - return; -} - - -/*----------------------------------------------------------------------* - * Census transform * - *----------------------------------------------------------------------*/ -/*! - * \brief pixCensusTransform() - * - * \param[in] pixs 8 bpp - * \param[in] halfsize of square over which neighbors are averaged - * \param[in] pixacc [optional] 32 bpp pix - * \return pixd 1 bpp - * - *
- * Notes:
- *      (1) The Census transform was invented by Ramin Zabih and John Woodfill
- *          ("Non-parametric local transforms for computing visual
- *          correspondence", Third European Conference on Computer Vision,
- *          Stockholm, Sweden, May 1994); see publications at
- *             http://www.cs.cornell.edu/~rdz/index.htm
- *          This compares each pixel against the average of its neighbors,
- *          in a square of odd dimension centered on the pixel.
- *          If the pixel is greater than the average of its neighbors,
- *          the output pixel value is 1; otherwise it is 0.
- *      (2) This can be used as an encoding for an image that is
- *          fairly robust against slow illumination changes, with
- *          applications in image comparison and mosaicing.
- *      (3) The size of the convolution kernel is (2 * halfsize + 1)
- *          on a side.  The halfsize parameter must be >= 1.
- *      (4) If accum pix is null, make one, use it, and destroy it
- *          before returning; otherwise, just use the input accum pix
- * 
- */ -PIX * -pixCensusTransform(PIX *pixs, - l_int32 halfsize, - PIX *pixacc) -{ -l_int32 i, j, w, h, wpls, wplv, wpld; -l_int32 vals, valv; -l_uint32 *datas, *datav, *datad, *lines, *linev, *lined; -PIX *pixav, *pixd; - - PROCNAME("pixCensusTransform"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (halfsize < 1) - return (PIX *)ERROR_PTR("halfsize must be >= 1", procName, NULL); - - /* Get the average of each pixel with its neighbors */ - if ((pixav = pixBlockconvGray(pixs, pixacc, halfsize, halfsize)) - == NULL) - return (PIX *)ERROR_PTR("pixav not made", procName, NULL); - - /* Subtract the pixel from the average, and then compare - * the pixel value with the remaining average */ - pixGetDimensions(pixs, &w, &h, NULL); - if ((pixd = pixCreate(w, h, 1)) == NULL) { - pixDestroy(&pixav); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - datas = pixGetData(pixs); - datav = pixGetData(pixav); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wplv = pixGetWpl(pixav); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linev = datav + i * wplv; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - vals = GET_DATA_BYTE(lines, j); - valv = GET_DATA_BYTE(linev, j); - if (vals > valv) - SET_DATA_BIT(lined, j); - } - } - - pixDestroy(&pixav); - return pixd; -} - - -/*----------------------------------------------------------------------* - * Generic convolution * - *----------------------------------------------------------------------*/ -/*! - * \brief pixConvolve() - * - * \param[in] pixs 8, 16, 32 bpp; no colormap - * \param[in] kel kernel - * \param[in] outdepth of pixd: 8, 16 or 32 - * \param[in] normflag 1 to normalize kernel to unit sum; 0 otherwise - * \return pixd 8, 16 or 32 bpp - * - *
- * Notes:
- *      (1) This gives a convolution with an arbitrary kernel.
- *      (2) The input pixs must have only one sample/pixel.
- *          To do a convolution on an RGB image, use pixConvolveRGB().
- *      (3) The parameter %outdepth determines the depth of the result.
- *          If the kernel is normalized to unit sum, the output values
- *          can never exceed 255, so an output depth of 8 bpp is sufficient.
- *          If the kernel is not normalized, it may be necessary to use
- *          16 or 32 bpp output to avoid overflow.
- *      (4) If normflag == 1, the result is normalized by scaling all
- *          kernel values for a unit sum.  If the sum of kernel values
- *          is very close to zero, the kernel can not be normalized and
- *          the convolution will not be performed.  A warning is issued.
- *      (5) The kernel values can be positive or negative, but the
- *          result for the convolution can only be stored as a positive
- *          number.  Consequently, if it goes negative, the choices are
- *          to clip to 0 or take the absolute value.  We're choosing
- *          to take the absolute value.  (Another possibility would be
- *          to output a second unsigned image for the negative values.)
- *          If you want to get a clipped result, or to keep the negative
- *          values in the result, use fpixConvolve(), with the
- *          converters in fpix2.c between pix and fpix.
- *      (6) This uses a mirrored border to avoid special casing on
- *          the boundaries.
- *      (7) To get a subsampled output, call l_setConvolveSampling().
- *          The time to make a subsampled output is reduced by the
- *          product of the sampling factors.
- *      (8) The function is slow, running at about 12 machine cycles for
- *          each pixel-op in the convolution.  For example, with a 3 GHz
- *          cpu, a 1 Mpixel grayscale image, and a kernel with
- *          (sx * sy) = 25 elements, the convolution takes about 100 msec.
- * 
- */ -PIX * -pixConvolve(PIX *pixs, - L_KERNEL *kel, - l_int32 outdepth, - l_int32 normflag) -{ -l_int32 i, j, id, jd, k, m, w, h, d, wd, hd, sx, sy, cx, cy, wplt, wpld; -l_int32 val; -l_uint32 *datat, *datad, *linet, *lined; -l_float32 sum; -L_KERNEL *keli, *keln; -PIX *pixt, *pixd; - - PROCNAME("pixConvolve"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs has colormap", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("pixs not 8, 16, or 32 bpp", procName, NULL); - if (!kel) - return (PIX *)ERROR_PTR("kel not defined", procName, NULL); - - pixd = NULL; - - keli = kernelInvert(kel); - kernelGetParameters(keli, &sy, &sx, &cy, &cx); - if (normflag) - keln = kernelNormalize(keli, 1.0); - else - keln = kernelCopy(keli); - - if ((pixt = pixAddMirroredBorder(pixs, cx, sx - cx, cy, sy - cy)) == NULL) { - L_ERROR("pixt not made\n", procName); - goto cleanup; - } - - wd = (w + ConvolveSamplingFactX - 1) / ConvolveSamplingFactX; - hd = (h + ConvolveSamplingFactY - 1) / ConvolveSamplingFactY; - pixd = pixCreate(wd, hd, outdepth); - datat = pixGetData(pixt); - datad = pixGetData(pixd); - wplt = pixGetWpl(pixt); - wpld = pixGetWpl(pixd); - for (i = 0, id = 0; id < hd; i += ConvolveSamplingFactY, id++) { - lined = datad + id * wpld; - for (j = 0, jd = 0; jd < wd; j += ConvolveSamplingFactX, jd++) { - sum = 0.0; - for (k = 0; k < sy; k++) { - linet = datat + (i + k) * wplt; - if (d == 8) { - for (m = 0; m < sx; m++) { - val = GET_DATA_BYTE(linet, j + m); - sum += val * keln->data[k][m]; - } - } else if (d == 16) { - for (m = 0; m < sx; m++) { - val = GET_DATA_TWO_BYTES(linet, j + m); - sum += val * keln->data[k][m]; - } - } else { /* d == 32 */ - for (m = 0; m < sx; m++) { - val = *(linet + j + m); - sum += val * keln->data[k][m]; - } - } - } - if (sum < 0.0) sum = -sum; /* make it non-negative */ - if (outdepth == 8) - SET_DATA_BYTE(lined, jd, (l_int32)(sum + 0.5)); - else if (outdepth == 16) - SET_DATA_TWO_BYTES(lined, jd, (l_int32)(sum + 0.5)); - else /* outdepth == 32 */ - *(lined + jd) = (l_uint32)(sum + 0.5); - } - } - -cleanup: - kernelDestroy(&keli); - kernelDestroy(&keln); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixConvolveSep() - * - * \param[in] pixs 8, 16, 32 bpp; no colormap - * \param[in] kelx x-dependent kernel - * \param[in] kely y-dependent kernel - * \param[in] outdepth of pixd: 8, 16 or 32 - * \param[in] normflag 1 to normalize kernel to unit sum; 0 otherwise - * \return pixd 8, 16 or 32 bpp - * - *
- * Notes:
- *      (1) This does a convolution with a separable kernel that is
- *          is a sequence of convolutions in x and y.  The two
- *          one-dimensional kernel components must be input separately;
- *          the full kernel is the product of these components.
- *          The support for the full kernel is thus a rectangular region.
- *      (2) The input pixs must have only one sample/pixel.
- *          To do a convolution on an RGB image, use pixConvolveSepRGB().
- *      (3) The parameter %outdepth determines the depth of the result.
- *          If the kernel is normalized to unit sum, the output values
- *          can never exceed 255, so an output depth of 8 bpp is sufficient.
- *          If the kernel is not normalized, it may be necessary to use
- *          16 or 32 bpp output to avoid overflow.
- *      (2) The %normflag parameter is used as in pixConvolve().
- *      (4) The kernel values can be positive or negative, but the
- *          result for the convolution can only be stored as a positive
- *          number.  Consequently, if it goes negative, the choices are
- *          to clip to 0 or take the absolute value.  We're choosing
- *          the former for now.  Another possibility would be to output
- *          a second unsigned image for the negative values.
- *      (5) Warning: if you use l_setConvolveSampling() to get a
- *          subsampled output, and the sampling factor is larger than
- *          the kernel half-width, it is faster to use the non-separable
- *          version pixConvolve().  This is because the first convolution
- *          here must be done on every raster line, regardless of the
- *          vertical sampling factor.  If the sampling factor is smaller
- *          than kernel half-width, it's faster to use the separable
- *          convolution.
- *      (6) This uses mirrored borders to avoid special casing on
- *          the boundaries.
- * 
- */ -PIX * -pixConvolveSep(PIX *pixs, - L_KERNEL *kelx, - L_KERNEL *kely, - l_int32 outdepth, - l_int32 normflag) -{ -l_int32 d, xfact, yfact; -L_KERNEL *kelxn, *kelyn; -PIX *pixt, *pixd; - - PROCNAME("pixConvolveSep"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("pixs not 8, 16, or 32 bpp", procName, NULL); - if (!kelx) - return (PIX *)ERROR_PTR("kelx not defined", procName, NULL); - if (!kely) - return (PIX *)ERROR_PTR("kely not defined", procName, NULL); - - xfact = ConvolveSamplingFactX; - yfact = ConvolveSamplingFactY; - if (normflag) { - kelxn = kernelNormalize(kelx, 1000.0); - kelyn = kernelNormalize(kely, 0.001); - l_setConvolveSampling(xfact, 1); - pixt = pixConvolve(pixs, kelxn, 32, 0); - l_setConvolveSampling(1, yfact); - pixd = pixConvolve(pixt, kelyn, outdepth, 0); - l_setConvolveSampling(xfact, yfact); /* restore */ - kernelDestroy(&kelxn); - kernelDestroy(&kelyn); - } else { /* don't normalize */ - l_setConvolveSampling(xfact, 1); - pixt = pixConvolve(pixs, kelx, 32, 0); - l_setConvolveSampling(1, yfact); - pixd = pixConvolve(pixt, kely, outdepth, 0); - l_setConvolveSampling(xfact, yfact); - } - - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixConvolveRGB() - * - * \param[in] pixs 32 bpp rgb - * \param[in] kel kernel - * \return pixd 32 bpp rgb - * - *
- * Notes:
- *      (1) This gives a convolution on an RGB image using an
- *          arbitrary kernel (which we normalize to keep each
- *          component within the range [0 ... 255].
- *      (2) The input pixs must be RGB.
- *      (3) The kernel values can be positive or negative, but the
- *          result for the convolution can only be stored as a positive
- *          number.  Consequently, if it goes negative, we clip the
- *          result to 0.
- *      (4) To get a subsampled output, call l_setConvolveSampling().
- *          The time to make a subsampled output is reduced by the
- *          product of the sampling factors.
- *      (5) This uses a mirrored border to avoid special casing on
- *          the boundaries.
- * 
- */ -PIX * -pixConvolveRGB(PIX *pixs, - L_KERNEL *kel) -{ -PIX *pixt, *pixr, *pixg, *pixb, *pixd; - - PROCNAME("pixConvolveRGB"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs is not 32 bpp", procName, NULL); - if (!kel) - return (PIX *)ERROR_PTR("kel not defined", procName, NULL); - - pixt = pixGetRGBComponent(pixs, COLOR_RED); - pixr = pixConvolve(pixt, kel, 8, 1); - pixDestroy(&pixt); - pixt = pixGetRGBComponent(pixs, COLOR_GREEN); - pixg = pixConvolve(pixt, kel, 8, 1); - pixDestroy(&pixt); - pixt = pixGetRGBComponent(pixs, COLOR_BLUE); - pixb = pixConvolve(pixt, kel, 8, 1); - pixDestroy(&pixt); - pixd = pixCreateRGBImage(pixr, pixg, pixb); - - pixDestroy(&pixr); - pixDestroy(&pixg); - pixDestroy(&pixb); - return pixd; -} - - -/*! - * \brief pixConvolveRGBSep() - * - * \param[in] pixs 32 bpp rgb - * \param[in] kelx x-dependent kernel - * \param[in] kely y-dependent kernel - * \return pixd 32 bpp rgb - * - *
- * Notes:
- *      (1) This does a convolution on an RGB image using a separable
- *          kernel that is a sequence of convolutions in x and y.  The two
- *          one-dimensional kernel components must be input separately;
- *          the full kernel is the product of these components.
- *          The support for the full kernel is thus a rectangular region.
- *      (2) The kernel values can be positive or negative, but the
- *          result for the convolution can only be stored as a positive
- *          number.  Consequently, if it goes negative, we clip the
- *          result to 0.
- *      (3) To get a subsampled output, call l_setConvolveSampling().
- *          The time to make a subsampled output is reduced by the
- *          product of the sampling factors.
- *      (4) This uses a mirrored border to avoid special casing on
- *          the boundaries.
- * 
- */ -PIX * -pixConvolveRGBSep(PIX *pixs, - L_KERNEL *kelx, - L_KERNEL *kely) -{ -PIX *pixt, *pixr, *pixg, *pixb, *pixd; - - PROCNAME("pixConvolveRGBSep"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs is not 32 bpp", procName, NULL); - if (!kelx || !kely) - return (PIX *)ERROR_PTR("kelx, kely not both defined", procName, NULL); - - pixt = pixGetRGBComponent(pixs, COLOR_RED); - pixr = pixConvolveSep(pixt, kelx, kely, 8, 1); - pixDestroy(&pixt); - pixt = pixGetRGBComponent(pixs, COLOR_GREEN); - pixg = pixConvolveSep(pixt, kelx, kely, 8, 1); - pixDestroy(&pixt); - pixt = pixGetRGBComponent(pixs, COLOR_BLUE); - pixb = pixConvolveSep(pixt, kelx, kely, 8, 1); - pixDestroy(&pixt); - pixd = pixCreateRGBImage(pixr, pixg, pixb); - - pixDestroy(&pixr); - pixDestroy(&pixg); - pixDestroy(&pixb); - return pixd; -} - - -/*----------------------------------------------------------------------* - * Generic convolution with float array * - *----------------------------------------------------------------------*/ -/*! - * \brief fpixConvolve() - * - * \param[in] fpixs 32 bit float array - * \param[in] kel kernel - * \param[in] normflag 1 to normalize kernel to unit sum; 0 otherwise - * \return fpixd 32 bit float array - * - *
- * Notes:
- *      (1) This gives a float convolution with an arbitrary kernel.
- *      (2) If normflag == 1, the result is normalized by scaling all
- *          kernel values for a unit sum.  If the sum of kernel values
- *          is very close to zero, the kernel can not be normalized and
- *          the convolution will not be performed.  A warning is issued.
- *      (3) With the FPix, there are no issues about negative
- *          array or kernel values.  The convolution is performed
- *          with single precision arithmetic.
- *      (4) To get a subsampled output, call l_setConvolveSampling().
- *          The time to make a subsampled output is reduced by the
- *          product of the sampling factors.
- *      (5) This uses a mirrored border to avoid special casing on
- *          the boundaries.
- * 
- */ -FPIX * -fpixConvolve(FPIX *fpixs, - L_KERNEL *kel, - l_int32 normflag) -{ -l_int32 i, j, id, jd, k, m, w, h, wd, hd, sx, sy, cx, cy, wplt, wpld; -l_float32 val; -l_float32 *datat, *datad, *linet, *lined; -l_float32 sum; -L_KERNEL *keli, *keln; -FPIX *fpixt, *fpixd; - - PROCNAME("fpixConvolve"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - if (!kel) - return (FPIX *)ERROR_PTR("kel not defined", procName, NULL); - - fpixd = NULL; - - keli = kernelInvert(kel); - kernelGetParameters(keli, &sy, &sx, &cy, &cx); - if (normflag) - keln = kernelNormalize(keli, 1.0); - else - keln = kernelCopy(keli); - - fpixGetDimensions(fpixs, &w, &h); - fpixt = fpixAddMirroredBorder(fpixs, cx, sx - cx, cy, sy - cy); - if (!fpixt) { - L_ERROR("fpixt not made\n", procName); - goto cleanup; - } - - wd = (w + ConvolveSamplingFactX - 1) / ConvolveSamplingFactX; - hd = (h + ConvolveSamplingFactY - 1) / ConvolveSamplingFactY; - fpixd = fpixCreate(wd, hd); - datat = fpixGetData(fpixt); - datad = fpixGetData(fpixd); - wplt = fpixGetWpl(fpixt); - wpld = fpixGetWpl(fpixd); - for (i = 0, id = 0; id < hd; i += ConvolveSamplingFactY, id++) { - lined = datad + id * wpld; - for (j = 0, jd = 0; jd < wd; j += ConvolveSamplingFactX, jd++) { - sum = 0.0; - for (k = 0; k < sy; k++) { - linet = datat + (i + k) * wplt; - for (m = 0; m < sx; m++) { - val = *(linet + j + m); - sum += val * keln->data[k][m]; - } - } - *(lined + jd) = sum; - } - } - -cleanup: - kernelDestroy(&keli); - kernelDestroy(&keln); - fpixDestroy(&fpixt); - return fpixd; -} - - -/*! - * \brief fpixConvolveSep() - * - * \param[in] fpixs 32 bit float array - * \param[in] kelx x-dependent kernel - * \param[in] kely y-dependent kernel - * \param[in] normflag 1 to normalize kernel to unit sum; 0 otherwise - * \return fpixd 32 bit float array - * - *
- * Notes:
- *      (1) This does a convolution with a separable kernel that is
- *          is a sequence of convolutions in x and y.  The two
- *          one-dimensional kernel components must be input separately;
- *          the full kernel is the product of these components.
- *          The support for the full kernel is thus a rectangular region.
- *      (2) The normflag parameter is used as in fpixConvolve().
- *      (3) Warning: if you use l_setConvolveSampling() to get a
- *          subsampled output, and the sampling factor is larger than
- *          the kernel half-width, it is faster to use the non-separable
- *          version pixConvolve().  This is because the first convolution
- *          here must be done on every raster line, regardless of the
- *          vertical sampling factor.  If the sampling factor is smaller
- *          than kernel half-width, it's faster to use the separable
- *          convolution.
- *      (4) This uses mirrored borders to avoid special casing on
- *          the boundaries.
- * 
- */ -FPIX * -fpixConvolveSep(FPIX *fpixs, - L_KERNEL *kelx, - L_KERNEL *kely, - l_int32 normflag) -{ -l_int32 xfact, yfact; -L_KERNEL *kelxn, *kelyn; -FPIX *fpixt, *fpixd; - - PROCNAME("fpixConvolveSep"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!kelx) - return (FPIX *)ERROR_PTR("kelx not defined", procName, NULL); - if (!kely) - return (FPIX *)ERROR_PTR("kely not defined", procName, NULL); - - xfact = ConvolveSamplingFactX; - yfact = ConvolveSamplingFactY; - if (normflag) { - kelxn = kernelNormalize(kelx, 1.0); - kelyn = kernelNormalize(kely, 1.0); - l_setConvolveSampling(xfact, 1); - fpixt = fpixConvolve(fpixs, kelxn, 0); - l_setConvolveSampling(1, yfact); - fpixd = fpixConvolve(fpixt, kelyn, 0); - l_setConvolveSampling(xfact, yfact); /* restore */ - kernelDestroy(&kelxn); - kernelDestroy(&kelyn); - } else { /* don't normalize */ - l_setConvolveSampling(xfact, 1); - fpixt = fpixConvolve(fpixs, kelx, 0); - l_setConvolveSampling(1, yfact); - fpixd = fpixConvolve(fpixt, kely, 0); - l_setConvolveSampling(xfact, yfact); - } - - fpixDestroy(&fpixt); - return fpixd; -} - - -/*------------------------------------------------------------------------* - * Convolution with bias (for non-negative output) * - *------------------------------------------------------------------------*/ -/*! - * \brief pixConvolveWithBias() - * - * \param[in] pixs 8 bpp; no colormap - * \param[in] kel1 - * \param[in] kel2 can be null; use if separable - * \param[in] force8 if 1, force output to 8 bpp; otherwise, determine - * output depth by the dynamic range of pixel values - * \param[out] pbias applied bias - * \return pixd 8 or 16 bpp - * - *
- * Notes:
- *      (1) This does a convolution with either a single kernel or
- *          a pair of separable kernels, and automatically applies whatever
- *          bias (shift) is required so that the resulting pixel values
- *          are non-negative.
- *      (2) The kernel is always normalized.  If there are no negative
- *          values in the kernel, a standard normalized convolution is
- *          performed, with 8 bpp output.  If the sum of kernel values is
- *          very close to zero, the kernel can not be normalized and
- *          the convolution will not be performed.  An error message results.
- *      (3) If there are negative values in the kernel, the pix is
- *          converted to an fpix, the convolution is done on the fpix, and
- *          a bias (shift) may need to be applied.
- *      (4) If force8 == TRUE and the range of values after the convolution
- *          is > 255, the output values will be scaled to fit in [0 ... 255].
- *          If force8 == FALSE, the output will be either 8 or 16 bpp,
- *          to accommodate the dynamic range of output values without scaling.
- * 
- */ -PIX * -pixConvolveWithBias(PIX *pixs, - L_KERNEL *kel1, - L_KERNEL *kel2, - l_int32 force8, - l_int32 *pbias) -{ -l_int32 outdepth; -l_float32 min1, min2, min, minval, maxval, range; -FPIX *fpix1, *fpix2; -PIX *pixd; - - PROCNAME("pixConvolveWithBias"); - - if (!pbias) - return (PIX *)ERROR_PTR("&bias not defined", procName, NULL); - *pbias = 0; - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs has colormap", procName, NULL); - if (!kel1) - return (PIX *)ERROR_PTR("kel1 not defined", procName, NULL); - - /* Determine if negative values can be produced in the convolution */ - kernelGetMinMax(kel1, &min1, NULL); - min2 = 0.0; - if (kel2) - kernelGetMinMax(kel2, &min2, NULL); - min = L_MIN(min1, min2); - - if (min >= 0.0) { - if (!kel2) - return pixConvolve(pixs, kel1, 8, 1); - else - return pixConvolveSep(pixs, kel1, kel2, 8, 1); - } - - /* Bias may need to be applied; convert to fpix and convolve */ - fpix1 = pixConvertToFPix(pixs, 1); - if (!kel2) - fpix2 = fpixConvolve(fpix1, kel1, 1); - else - fpix2 = fpixConvolveSep(fpix1, kel1, kel2, 1); - fpixDestroy(&fpix1); - - /* Determine the bias and the dynamic range. - * If the dynamic range is <= 255, just shift the values by the - * bias, if any. - * If the dynamic range is > 255, there are two cases: - * (1) the output depth is not forced to 8 bpp - * ==> apply the bias without scaling; outdepth = 16 - * (2) the output depth is forced to 8 - * ==> linearly map the pixel values to [0 ... 255]. */ - fpixGetMin(fpix2, &minval, NULL, NULL); - fpixGetMax(fpix2, &maxval, NULL, NULL); - range = maxval - minval; - *pbias = (minval < 0.0) ? -minval : 0.0; - fpixAddMultConstant(fpix2, *pbias, 1.0); /* shift: min val ==> 0 */ - if (range <= 255 || !force8) { /* no scaling of output values */ - outdepth = (range > 255) ? 16 : 8; - } else { /* scale output values to fit in 8 bpp */ - fpixAddMultConstant(fpix2, 0.0, (255.0 / range)); - outdepth = 8; - } - - /* Convert back to pix; it won't do any clipping */ - pixd = fpixConvertToPix(fpix2, outdepth, L_CLIP_TO_ZERO, 0); - fpixDestroy(&fpix2); - - return pixd; -} - - -/*------------------------------------------------------------------------* - * Set parameter for convolution subsampling * - *------------------------------------------------------------------------*/ -/*! - * \brief l_setConvolveSampling() - - * - * \param[in] xfact, yfact integer >= 1 - * \return void - * - *
- * Notes:
- *      (1) This sets the x and y output subsampling factors for generic pix
- *          and fpix convolution.  The default values are 1 (no subsampling).
- * 
- */ -void -l_setConvolveSampling(l_int32 xfact, - l_int32 yfact) -{ - if (xfact < 1) xfact = 1; - if (yfact < 1) yfact = 1; - ConvolveSamplingFactX = xfact; - ConvolveSamplingFactY = yfact; -} - - -/*------------------------------------------------------------------------* - * Additive gaussian noise * - *------------------------------------------------------------------------*/ -/*! - * \brief pixAddGaussianNoise() - * - * \param[in] pixs 8 bpp gray or 32 bpp rgb; no colormap - * \param[in] stdev of noise - * \return pixd 8 or 32 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This adds noise to each pixel, taken from a normal
- *          distribution with zero mean and specified standard deviation.
- * 
- */ -PIX * -pixAddGaussianNoise(PIX *pixs, - l_float32 stdev) -{ -l_int32 i, j, w, h, d, wpls, wpld, val, rval, gval, bval; -l_uint32 pixel; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixAddGaussianNoise"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs has colormap", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL); - - pixd = pixCreateTemplateNoInit(pixs); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - if (d == 8) { - val = GET_DATA_BYTE(lines, j); - val += (l_int32)(stdev * gaussDistribSampling() + 0.5); - val = L_MIN(255, L_MAX(0, val)); - SET_DATA_BYTE(lined, j, val); - } else { /* d = 32 */ - pixel = *(lines + j); - extractRGBValues(pixel, &rval, &gval, &bval); - rval += (l_int32)(stdev * gaussDistribSampling() + 0.5); - rval = L_MIN(255, L_MAX(0, rval)); - gval += (l_int32)(stdev * gaussDistribSampling() + 0.5); - gval = L_MIN(255, L_MAX(0, gval)); - bval += (l_int32)(stdev * gaussDistribSampling() + 0.5); - bval = L_MIN(255, L_MAX(0, bval)); - composeRGBPixel(rval, gval, bval, lined + j); - } - } - } - return pixd; -} - - -/*! - * \brief gaussDistribSampling() - * - * \return gaussian distributed variable with zero mean and unit stdev - * - *
- * Notes:
- *      (1) For an explanation of the Box-Muller method for generating
- *          a normally distributed random variable with zero mean and
- *          unit standard deviation, see Numerical Recipes in C,
- *          2nd edition, p. 288ff.
- *      (2) This can be called sequentially to get samples that can be
- *          used for adding noise to each pixel of an image, for example.
- * 
- */ -l_float32 -gaussDistribSampling(void) -{ -static l_int32 select = 0; /* flips between 0 and 1 on successive calls */ -static l_float32 saveval; -l_float32 frand, xval, yval, rsq, factor; - - if (select == 0) { - while (1) { /* choose a point in a 2x2 square, centered at origin */ - frand = (l_float32)rand() / (l_float32)RAND_MAX; - xval = 2.0 * frand - 1.0; - frand = (l_float32)rand() / (l_float32)RAND_MAX; - yval = 2.0 * frand - 1.0; - rsq = xval * xval + yval * yval; - if (rsq > 0.0 && rsq < 1.0) /* point is inside the unit circle */ - break; - } - factor = sqrt(-2.0 * log(rsq) / rsq); - saveval = xval * factor; - select = 1; - return yval * factor; - } - else { - select = 0; - return saveval; - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/correlscore.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/correlscore.c deleted file mode 100644 index c5b0e06b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/correlscore.c +++ /dev/null @@ -1,883 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/* - * correlscore.c - * - * These are functions for computing correlation between - * pairs of 1 bpp images. - * - * Optimized 2 pix correlators (for jbig2 clustering) - * l_int32 pixCorrelationScore() - * l_int32 pixCorrelationScoreThresholded() - * - * Simple 2 pix correlators - * l_int32 pixCorrelationScoreSimple() - * l_int32 pixCorrelationScoreShifted() - * - * There are other, more application-oriented functions, that - * compute the correlation between two binary images, taking into - * account small translational shifts, between two binary images. - * These are: - * compare.c: pixBestCorrelation() - * Uses coarse-to-fine translations of full image - * recogident.c: pixCorrelationBestShift() - * Uses small shifts between c.c. centroids. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/* -------------------------------------------------------------------- * - * Optimized 2 pix correlators (for jbig2 clustering) * - * -------------------------------------------------------------------- */ -/*! - * \brief pixCorrelationScore() - * - * \param[in] pix1 test pix, 1 bpp - * \param[in] pix2 exemplar pix, 1 bpp - * \param[in] area1 number of on pixels in pix1 - * \param[in] area2 number of on pixels in pix2 - * \param[in] delx x comp of centroid difference - * \param[in] dely y comp of centroid difference - * \param[in] maxdiffw max width difference of pix1 and pix2 - * \param[in] maxdiffh max height difference of pix1 and pix2 - * \param[in] tab sum tab for byte - * \param[out] pscore correlation score - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *  We check first that the two pix are roughly the same size.
- *  For jbclass (jbig2) applications at roughly 300 ppi, maxdiffw and
- *  maxdiffh should be at least 2.
- *
- *  Only if they meet that criterion do we compare the bitmaps.
- *  The centroid difference is used to align the two images to the
- *  nearest integer for the correlation.
- *
- *  The correlation score is the ratio of the square of the number of
- *  pixels in the AND of the two bitmaps to the product of the number
- *  of ON pixels in each.  Denote the number of ON pixels in pix1
- *  by |1|, the number in pix2 by |2|, and the number in the AND
- *  of pix1 and pix2 by |1 & 2|.  The correlation score is then
- *  (|1 & 2|)**2 / (|1|*|2|).
- *
- *  This score is compared with an input threshold, which can
- *  be modified depending on the weight of the template.
- *  The modified threshold is
- *     thresh + (1.0 - thresh) * weight * R
- *  where
- *     weight is a fixed input factor between 0.0 and 1.0
- *     R = |2| / area(2)
- *  and area(2) is the total number of pixels in 2 (i.e., width x height).
- *
- *  To understand why a weight factor is useful, consider what happens
- *  with thick, sans-serif characters that look similar and have a value
- *  of R near 1.  Different characters can have a high correlation value,
- *  and the classifier will make incorrect substitutions.  The weight
- *  factor raises the threshold for these characters.
- *
- *  Yet another approach to reduce such substitutions is to run the classifier
- *  in a non-greedy way, matching to the template with the highest
- *  score, not the first template with a score satisfying the matching
- *  constraint.  However, this is not particularly effective.
- *
- *  The implementation here gives the same result as in
- *  pixCorrelationScoreSimple(), where a temporary Pix is made to hold
- *  the AND and implementation uses rasterop:
- *      pixt = pixCreateTemplate(pix1);
- *      pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix2, 0, 0);
- *      pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC & PIX_DST, pix1, 0, 0);
- *      pixCountPixels(pixt, &count, tab);
- *      pixDestroy(&pixt);
- *  However, here it is done in a streaming fashion, counting as it goes,
- *  and touching memory exactly once, giving a 3-4x speedup over the
- *  simple implementation.  This very fast correlation matcher was
- *  contributed by William Rucklidge.
- * 
- */ -l_ok -pixCorrelationScore(PIX *pix1, - PIX *pix2, - l_int32 area1, - l_int32 area2, - l_float32 delx, /* x(1) - x(3) */ - l_float32 dely, /* y(1) - y(3) */ - l_int32 maxdiffw, - l_int32 maxdiffh, - l_int32 *tab, - l_float32 *pscore) -{ -l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, count; -l_int32 wpl1, wpl2, lorow, hirow, locol, hicol; -l_int32 x, y, pix1lskip, pix2lskip, rowwords1, rowwords2; -l_uint32 word1, word2, andw; -l_uint32 *row1, *row2; - - PROCNAME("pixCorrelationScore"); - - if (!pscore) - return ERROR_INT("&score not defined", procName, 1); - *pscore = 0.0; - if (!pix1 || pixGetDepth(pix1) != 1) - return ERROR_INT("pix1 undefined or not 1 bpp", procName, 1); - if (!pix2 || pixGetDepth(pix2) != 1) - return ERROR_INT("pix2 undefined or not 1 bpp", procName, 1); - if (!tab) - return ERROR_INT("tab not defined", procName, 1); - if (area1 <= 0 || area2 <= 0) - return ERROR_INT("areas must be > 0", procName, 1); - - /* Eliminate based on size difference */ - pixGetDimensions(pix1, &wi, &hi, NULL); - pixGetDimensions(pix2, &wt, &ht, NULL); - delw = L_ABS(wi - wt); - if (delw > maxdiffw) - return 0; - delh = L_ABS(hi - ht); - if (delh > maxdiffh) - return 0; - - /* Round difference to nearest integer */ - if (delx >= 0) - idelx = (l_int32)(delx + 0.5); - else - idelx = (l_int32)(delx - 0.5); - if (dely >= 0) - idely = (l_int32)(dely + 0.5); - else - idely = (l_int32)(dely - 0.5); - - count = 0; - wpl1 = pixGetWpl(pix1); - wpl2 = pixGetWpl(pix2); - rowwords2 = wpl2; - - /* What rows of pix1 need to be considered? Only those underlying the - * shifted pix2. */ - lorow = L_MAX(idely, 0); - hirow = L_MIN(ht + idely, hi); - - /* Get the pointer to the first row of each image that will be - * considered. */ - row1 = pixGetData(pix1) + wpl1 * lorow; - row2 = pixGetData(pix2) + wpl2 * (lorow - idely); - - /* Similarly, figure out which columns of pix1 will be considered. */ - locol = L_MAX(idelx, 0); - hicol = L_MIN(wt + idelx, wi); - - if (idelx >= 32) { - /* pix2 is shifted far enough to the right that pix1's first - * word(s) won't contribute to the count. Increment its - * pointer to point to the first word that will contribute, - * and adjust other values accordingly. */ - pix1lskip = idelx >> 5; /* # of words to skip on left */ - row1 += pix1lskip; - locol -= pix1lskip << 5; - hicol -= pix1lskip << 5; - idelx &= 31; - } else if (idelx <= -32) { - /* pix2 is shifted far enough to the left that its first word(s) - * won't contribute to the count. Increment its pointer - * to point to the first word that will contribute, - * and adjust other values accordingly. */ - pix2lskip = -((idelx + 31) >> 5); /* # of words to skip on left */ - row2 += pix2lskip; - rowwords2 -= pix2lskip; - idelx += pix2lskip << 5; - } - - if ((locol >= hicol) || (lorow >= hirow)) { /* there is no overlap */ - count = 0; - } else { - /* How many words of each row of pix1 need to be considered? */ - rowwords1 = (hicol + 31) >> 5; - - if (idelx == 0) { - /* There's no lateral offset; simple case. */ - for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { - for (x = 0; x < rowwords1; x++) { - andw = row1[x] & row2[x]; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - } - } - } else if (idelx > 0) { - /* pix2 is shifted to the right. word 0 of pix1 is touched by - * word 0 of pix2; word 1 of pix1 is touched by word 0 and word - * 1 of pix2, and so on up to the last word of pix1 (word N), - * which is touched by words N-1 and N of pix1... if there is a - * word N. Handle the two cases (pix2 has N-1 words and pix2 - * has at least N words) separately. - * - * Note: we know that pix2 has at least N-1 words (i.e., - * rowwords2 >= rowwords1 - 1) by the following logic. - * We can pretend that idelx <= 31 because the >= 32 logic - * above adjusted everything appropriately. Then - * hicol <= wt + idelx <= wt + 31, so - * hicol + 31 <= wt + 62 - * rowwords1 = (hicol + 31) >> 5 <= (wt + 62) >> 5 - * rowwords2 == (wt + 31) >> 5, so - * rowwords1 <= rowwords2 + 1 */ - if (rowwords2 < rowwords1) { - for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { - /* Do the first iteration so the loop can be - * branch-free. */ - word1 = row1[0]; - word2 = row2[0] >> idelx; - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - - for (x = 1; x < rowwords2; x++) { - word1 = row1[x]; - word2 = (row2[x] >> idelx) | - (row2[x - 1] << (32 - idelx)); - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - } - - /* Now the last iteration - we know that this is safe - * (i.e. rowwords1 >= 2) because rowwords1 > rowwords2 - * > 0 (if it was 0, we'd have taken the "count = 0" - * fast-path out of here). */ - word1 = row1[x]; - word2 = row2[x - 1] << (32 - idelx); - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - } - } else { - for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { - /* Do the first iteration so the loop can be - * branch-free. This section is the same as above - * except for the different limit on the loop, since - * the last iteration is the same as all the other - * iterations (beyond the first). */ - word1 = row1[0]; - word2 = row2[0] >> idelx; - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - - for (x = 1; x < rowwords1; x++) { - word1 = row1[x]; - word2 = (row2[x] >> idelx) | - (row2[x - 1] << (32 - idelx)); - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - } - } - } - } else { - /* pix2 is shifted to the left. word 0 of pix1 is touched by - * word 0 and word 1 of pix2, and so on up to the last word of - * pix1 (word N), which is touched by words N and N+1 of - * pix2... if there is a word N+1. Handle the two cases (pix2 - * has N words and pix2 has at least N+1 words) separately. */ - if (rowwords1 < rowwords2) { - /* pix2 has at least N+1 words, so every iteration through - * the loop can be the same. */ - for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { - for (x = 0; x < rowwords1; x++) { - word1 = row1[x]; - word2 = row2[x] << -idelx; - word2 |= row2[x + 1] >> (32 + idelx); - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - } - } - } else { - /* pix2 has only N words, so the last iteration is broken - * out. */ - for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { - for (x = 0; x < rowwords1 - 1; x++) { - word1 = row1[x]; - word2 = row2[x] << -idelx; - word2 |= row2[x + 1] >> (32 + idelx); - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - } - - word1 = row1[x]; - word2 = row2[x] << -idelx; - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - } - } - } - } - - *pscore = (l_float32)count * (l_float32)count / - ((l_float32)area1 * (l_float32)area2); -/* lept_stderr("score = %5.3f, count = %d, area1 = %d, area2 = %d\n", - *pscore, count, area1, area2); */ - return 0; -} - - -/*! - * \brief pixCorrelationScoreThresholded() - * - * \param[in] pix1 test pix, 1 bpp - * \param[in] pix2 exemplar pix, 1 bpp - * \param[in] area1 number of on pixels in pix1 - * \param[in] area2 number of on pixels in pix2 - * \param[in] delx x comp of centroid difference - * \param[in] dely y comp of centroid difference - * \param[in] maxdiffw max width difference of pix1 and pix2 - * \param[in] maxdiffh max height difference of pix1 and pix2 - * \param[in] tab sum tab for byte - * \param[in] downcount count of 1 pixels below each row of pix1 - * \param[in] score_threshold - * \return whether the correlation score is >= score_threshold - * - * - *
- * Notes:
- *  We check first that the two pix are roughly the same size.
- *  Only if they meet that criterion do we compare the bitmaps.
- *  The centroid difference is used to align the two images to the
- *  nearest integer for the correlation.
- *
- *  The correlation score is the ratio of the square of the number of
- *  pixels in the AND of the two bitmaps to the product of the number
- *  of ON pixels in each.  Denote the number of ON pixels in pix1
- *  by |1|, the number in pix2 by |2|, and the number in the AND
- *  of pix1 and pix2 by |1 & 2|.  The correlation score is then
- *  (|1 & 2|)**2 / (|1|*|2|).
- *
- *  This score is compared with an input threshold, which can
- *  be modified depending on the weight of the template.
- *  The modified threshold is
- *     thresh + (1.0 - thresh) * weight * R
- *  where
- *     weight is a fixed input factor between 0.0 and 1.0
- *     R = |2| / area(2)
- *  and area(2) is the total number of pixels in 2 (i.e., width x height).
- *
- *  To understand why a weight factor is useful, consider what happens
- *  with thick, sans-serif characters that look similar and have a value
- *  of R near 1.  Different characters can have a high correlation value,
- *  and the classifier will make incorrect substitutions.  The weight
- *  factor raises the threshold for these characters.
- *
- *  Yet another approach to reduce such substitutions is to run the classifier
- *  in a non-greedy way, matching to the template with the highest
- *  score, not the first template with a score satisfying the matching
- *  constraint.  However, this is not particularly effective.
- *
- *  This very fast correlation matcher was contributed by William Rucklidge.
- * 
- */ -l_int32 -pixCorrelationScoreThresholded(PIX *pix1, - PIX *pix2, - l_int32 area1, - l_int32 area2, - l_float32 delx, /* x(1) - x(3) */ - l_float32 dely, /* y(1) - y(3) */ - l_int32 maxdiffw, - l_int32 maxdiffh, - l_int32 *tab, - l_int32 *downcount, - l_float32 score_threshold) -{ -l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, count; -l_int32 wpl1, wpl2, lorow, hirow, locol, hicol, untouchable; -l_int32 x, y, pix1lskip, pix2lskip, rowwords1, rowwords2; -l_uint32 word1, word2, andw; -l_uint32 *row1, *row2; -l_float32 score; -l_int32 threshold; - - PROCNAME("pixCorrelationScoreThresholded"); - - if (!pix1 || pixGetDepth(pix1) != 1) - return ERROR_INT("pix1 undefined or not 1 bpp", procName, 0); - if (!pix2 || pixGetDepth(pix2) != 1) - return ERROR_INT("pix2 undefined or not 1 bpp", procName, 0); - if (!tab) - return ERROR_INT("tab not defined", procName, 0); - if (area1 <= 0 || area2 <= 0) - return ERROR_INT("areas must be > 0", procName, 0); - - /* Eliminate based on size difference */ - pixGetDimensions(pix1, &wi, &hi, NULL); - pixGetDimensions(pix2, &wt, &ht, NULL); - delw = L_ABS(wi - wt); - if (delw > maxdiffw) - return FALSE; - delh = L_ABS(hi - ht); - if (delh > maxdiffh) - return FALSE; - - /* Round difference to nearest integer */ - if (delx >= 0) - idelx = (l_int32)(delx + 0.5); - else - idelx = (l_int32)(delx - 0.5); - if (dely >= 0) - idely = (l_int32)(dely + 0.5); - else - idely = (l_int32)(dely - 0.5); - - /* Compute the correlation count that is needed so that - * count * count / (area1 * area2) >= score_threshold */ - threshold = (l_int32)ceil(sqrt((l_float64)score_threshold * area1 * area2)); - - count = 0; - wpl1 = pixGetWpl(pix1); - wpl2 = pixGetWpl(pix2); - rowwords2 = wpl2; - - /* What rows of pix1 need to be considered? Only those underlying the - * shifted pix2. */ - lorow = L_MAX(idely, 0); - hirow = L_MIN(ht + idely, hi); - - /* Get the pointer to the first row of each image that will be - * considered. */ - row1 = pixGetData(pix1) + wpl1 * lorow; - row2 = pixGetData(pix2) + wpl2 * (lorow - idely); - if (hirow <= hi) { - /* Some rows of pix1 will never contribute to count */ - untouchable = downcount[hirow - 1]; - } - - /* Similarly, figure out which columns of pix1 will be considered. */ - locol = L_MAX(idelx, 0); - hicol = L_MIN(wt + idelx, wi); - - if (idelx >= 32) { - /* pix2 is shifted far enough to the right that pix1's first - * word(s) won't contribute to the count. Increment its - * pointer to point to the first word that will contribute, - * and adjust other values accordingly. */ - pix1lskip = idelx >> 5; /* # of words to skip on left */ - row1 += pix1lskip; - locol -= pix1lskip << 5; - hicol -= pix1lskip << 5; - idelx &= 31; - } else if (idelx <= -32) { - /* pix2 is shifted far enough to the left that its first word(s) - * won't contribute to the count. Increment its pointer - * to point to the first word that will contribute, - * and adjust other values accordingly. */ - pix2lskip = -((idelx + 31) >> 5); /* # of words to skip on left */ - row2 += pix2lskip; - rowwords2 -= pix2lskip; - idelx += pix2lskip << 5; - } - - if ((locol >= hicol) || (lorow >= hirow)) { /* there is no overlap */ - count = 0; - } else { - /* How many words of each row of pix1 need to be considered? */ - rowwords1 = (hicol + 31) >> 5; - - if (idelx == 0) { - /* There's no lateral offset; simple case. */ - for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { - for (x = 0; x < rowwords1; x++) { - andw = row1[x] & row2[x]; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - } - - /* If the count is over the threshold, no need to - * calculate any further. Likewise, return early if the - * count plus the maximum count attainable from further - * rows is below the threshold. */ - if (count >= threshold) return TRUE; - if (count + downcount[y] - untouchable < threshold) { - return FALSE; - } - } - } else if (idelx > 0) { - /* pix2 is shifted to the right. word 0 of pix1 is touched by - * word 0 of pix2; word 1 of pix1 is touched by word 0 and word - * 1 of pix2, and so on up to the last word of pix1 (word N), - * which is touched by words N-1 and N of pix1... if there is a - * word N. Handle the two cases (pix2 has N-1 words and pix2 - * has at least N words) separately. - * - * Note: we know that pix2 has at least N-1 words (i.e., - * rowwords2 >= rowwords1 - 1) by the following logic. - * We can pretend that idelx <= 31 because the >= 32 logic - * above adjusted everything appropriately. Then - * hicol <= wt + idelx <= wt + 31, so - * hicol + 31 <= wt + 62 - * rowwords1 = (hicol + 31) >> 5 <= (wt + 62) >> 5 - * rowwords2 == (wt + 31) >> 5, so - * rowwords1 <= rowwords2 + 1 */ - if (rowwords2 < rowwords1) { - for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { - /* Do the first iteration so the loop can be - * branch-free. */ - word1 = row1[0]; - word2 = row2[0] >> idelx; - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - - for (x = 1; x < rowwords2; x++) { - word1 = row1[x]; - word2 = (row2[x] >> idelx) | - (row2[x - 1] << (32 - idelx)); - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - } - - /* Now the last iteration - we know that this is safe - * (i.e. rowwords1 >= 2) because rowwords1 > rowwords2 - * > 0 (if it was 0, we'd have taken the "count = 0" - * fast-path out of here). */ - word1 = row1[x]; - word2 = row2[x - 1] << (32 - idelx); - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - - if (count >= threshold) return TRUE; - if (count + downcount[y] - untouchable < threshold) { - return FALSE; - } - } - } else { - for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { - /* Do the first iteration so the loop can be - * branch-free. This section is the same as above - * except for the different limit on the loop, since - * the last iteration is the same as all the other - * iterations (beyond the first). */ - word1 = row1[0]; - word2 = row2[0] >> idelx; - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - - for (x = 1; x < rowwords1; x++) { - word1 = row1[x]; - word2 = (row2[x] >> idelx) | - (row2[x - 1] << (32 - idelx)); - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - } - - if (count >= threshold) return TRUE; - if (count + downcount[y] - untouchable < threshold) { - return FALSE; - } - } - } - } else { - /* pix2 is shifted to the left. word 0 of pix1 is touched by - * word 0 and word 1 of pix2, and so on up to the last word of - * pix1 (word N), which is touched by words N and N+1 of - * pix2... if there is a word N+1. Handle the two cases (pix2 - * has N words and pix2 has at least N+1 words) separately. */ - if (rowwords1 < rowwords2) { - /* pix2 has at least N+1 words, so every iteration through - * the loop can be the same. */ - for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { - for (x = 0; x < rowwords1; x++) { - word1 = row1[x]; - word2 = row2[x] << -idelx; - word2 |= row2[x + 1] >> (32 + idelx); - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - } - - if (count >= threshold) return TRUE; - if (count + downcount[y] - untouchable < threshold) { - return FALSE; - } - } - } else { - /* pix2 has only N words, so the last iteration is broken - * out. */ - for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { - for (x = 0; x < rowwords1 - 1; x++) { - word1 = row1[x]; - word2 = row2[x] << -idelx; - word2 |= row2[x + 1] >> (32 + idelx); - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - } - - word1 = row1[x]; - word2 = row2[x] << -idelx; - andw = word1 & word2; - count += tab[andw & 0xff] + - tab[(andw >> 8) & 0xff] + - tab[(andw >> 16) & 0xff] + - tab[andw >> 24]; - - if (count >= threshold) return TRUE; - if (count + downcount[y] - untouchable < threshold) { - return FALSE; - } - } - } - } - } - - score = (l_float32)count * (l_float32)count / - ((l_float32)area1 * (l_float32)area2); - if (score >= score_threshold) { - lept_stderr( - "count %d < threshold %d but score %g >= score_threshold %g\n", - count, threshold, score, score_threshold); - } - return FALSE; -} - - -/* -------------------------------------------------------------------- * - * Simple 2 pix correlators (for jbig2 clustering) * - * -------------------------------------------------------------------- */ -/*! - * \brief pixCorrelationScoreSimple() - * - * \param[in] pix1 test pix, 1 bpp - * \param[in] pix2 exemplar pix, 1 bpp - * \param[in] area1 number of on pixels in pix1 - * \param[in] area2 number of on pixels in pix2 - * \param[in] delx x comp of centroid difference - * \param[in] dely y comp of centroid difference - * \param[in] maxdiffw max width difference of pix1 and pix2 - * \param[in] maxdiffh max height difference of pix1 and pix2 - * \param[in] tab sum tab for byte - * \param[out] pscore correlation score, in range [0.0 ... 1.0] - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This calculates exactly the same value as pixCorrelationScore().
- *          It is 2-3x slower, but much simpler to understand.
- *      (2) The returned correlation score is 0.0 if the width or height
- *          exceed %maxdiffw or %maxdiffh.
- * 
- */ -l_ok -pixCorrelationScoreSimple(PIX *pix1, - PIX *pix2, - l_int32 area1, - l_int32 area2, - l_float32 delx, /* x(1) - x(3) */ - l_float32 dely, /* y(1) - y(3) */ - l_int32 maxdiffw, - l_int32 maxdiffh, - l_int32 *tab, - l_float32 *pscore) -{ -l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, count; -PIX *pixt; - - PROCNAME("pixCorrelationScoreSimple"); - - if (!pscore) - return ERROR_INT("&score not defined", procName, 1); - *pscore = 0.0; - if (!pix1 || pixGetDepth(pix1) != 1) - return ERROR_INT("pix1 undefined or not 1 bpp", procName, 1); - if (!pix2 || pixGetDepth(pix2) != 1) - return ERROR_INT("pix2 undefined or not 1 bpp", procName, 1); - if (!tab) - return ERROR_INT("tab not defined", procName, 1); - if (!area1 || !area2) - return ERROR_INT("areas must be > 0", procName, 1); - - /* Eliminate based on size difference */ - pixGetDimensions(pix1, &wi, &hi, NULL); - pixGetDimensions(pix2, &wt, &ht, NULL); - delw = L_ABS(wi - wt); - if (delw > maxdiffw) - return 0; - delh = L_ABS(hi - ht); - if (delh > maxdiffh) - return 0; - - /* Round difference to nearest integer */ - if (delx >= 0) - idelx = (l_int32)(delx + 0.5); - else - idelx = (l_int32)(delx - 0.5); - if (dely >= 0) - idely = (l_int32)(dely + 0.5); - else - idely = (l_int32)(dely - 0.5); - - /* pixt = pixAnd(NULL, pix1, pix2), including shift. - * To insure that pixels are ON only within the - * intersection of pix1 and the shifted pix2: - * (1) Start with pixt cleared and equal in size to pix1. - * (2) Blit the shifted pix2 onto pixt. Then all ON pixels - * are within the intersection of pix1 and the shifted pix2. - * (3) AND pix1 with pixt. */ - pixt = pixCreateTemplate(pix1); - pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix2, 0, 0); - pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC & PIX_DST, pix1, 0, 0); - pixCountPixels(pixt, &count, tab); - pixDestroy(&pixt); - - *pscore = (l_float32)count * (l_float32)count / - ((l_float32)area1 * (l_float32)area2); -/* lept_stderr("score = %5.3f, count = %d, area1 = %d, area2 = %d\n", - *pscore, count, area1, area2); */ - return 0; -} - - -/*! - * \brief pixCorrelationScoreShifted() - * - * \param[in] pix1 1 bpp - * \param[in] pix2 1 bpp - * \param[in] area1 number of on pixels in pix1 - * \param[in] area2 number of on pixels in pix2 - * \param[in] delx x translation of pix2 relative to pix1 - * \param[in] dely y translation of pix2 relative to pix1 - * \param[in] tab sum tab for byte - * \param[out] pscore correlation score - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This finds the correlation between two 1 bpp images,
- *          when pix2 is shifted by (delx, dely) with respect
- *          to each other.
- *      (2) This is implemented by starting with a copy of pix1 and
- *          ANDing its pixels with those of a shifted pix2.
- *      (3) Get the pixel counts for area1 and area2 using piCountPixels().
- *      (4) A good estimate for a shift that would maximize the correlation
- *          is to align the centroids (cx1, cy1; cx2, cy2), giving the
- *          relative translations etransx and etransy:
- *             etransx = cx1 - cx2
- *             etransy = cy1 - cy2
- *          Typically delx is chosen to be near etransx; ditto for dely.
- *          This function is used in pixBestCorrelation(), where the
- *          translations delx and dely are varied to find the best alignment.
- *      (5) We do not check the sizes of pix1 and pix2, because they should
- *          be comparable.
- * 
- */ -l_ok -pixCorrelationScoreShifted(PIX *pix1, - PIX *pix2, - l_int32 area1, - l_int32 area2, - l_int32 delx, - l_int32 dely, - l_int32 *tab, - l_float32 *pscore) -{ -l_int32 w1, h1, w2, h2, count; -PIX *pixt; - - PROCNAME("pixCorrelationScoreShifted"); - - if (!pscore) - return ERROR_INT("&score not defined", procName, 1); - *pscore = 0.0; - if (!pix1 || pixGetDepth(pix1) != 1) - return ERROR_INT("pix1 undefined or not 1 bpp", procName, 1); - if (!pix2 || pixGetDepth(pix2) != 1) - return ERROR_INT("pix2 undefined or not 1 bpp", procName, 1); - if (!tab) - return ERROR_INT("tab not defined", procName, 1); - if (!area1 || !area2) - return ERROR_INT("areas must be > 0", procName, 1); - - pixGetDimensions(pix1, &w1, &h1, NULL); - pixGetDimensions(pix2, &w2, &h2, NULL); - - /* To insure that pixels are ON only within the - * intersection of pix1 and the shifted pix2: - * (1) Start with pixt cleared and equal in size to pix1. - * (2) Blit the shifted pix2 onto pixt. Then all ON pixels - * are within the intersection of pix1 and the shifted pix2. - * (3) AND pix1 with pixt. */ - pixt = pixCreateTemplate(pix1); - pixRasterop(pixt, delx, dely, w2, h2, PIX_SRC, pix2, 0, 0); - pixRasterop(pixt, 0, 0, w1, h1, PIX_SRC & PIX_DST, pix1, 0, 0); - pixCountPixels(pixt, &count, tab); - pixDestroy(&pixt); - - *pscore = (l_float32)count * (l_float32)count / - ((l_float32)area1 * (l_float32)area2); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dewarp.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dewarp.h deleted file mode 100644 index 37bfb632..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dewarp.h +++ /dev/null @@ -1,191 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_DEWARP_H -#define LEPTONICA_DEWARP_H - -/*! - * \file dewarp.h - * - *
- *     Data structure to hold arrays and results for generating
- *     horizontal and vertical disparity arrays based on textlines.
- *     Each disparity array is two-dimensional.  The vertical disparity
- *     array gives a vertical displacement, relative to the lowest point
- *     in the textlines.  The horizontal disparty array gives a horizontal
- *     displacement, relative to the minimum values (for even pages)
- *     or maximum values (for odd pages) of the left and right ends of
- *     full textlines.  Horizontal alignment always involves translations
- *     away from the book gutter.
- *
- *     We have intentionally separated the process of building models
- *     from the rendering process that uses the models.  For any page,
- *     the building operation either creates an actual model (that is,
- *     a model with at least the vertical disparity being computed, and
- *     for which the 'success' flag is set) or fails to create a model.
- *     However, at rendering time, a page can have one of two different
- *     types of models.
- *     (1) A valid model is an actual model that meets the rendering
- *         constraints, which are limits on model curvature parameters.
- *         See dewarpaTestForValidModel() for details.
- *         Valid models are identified by dewarpaInsertRefModels(),
- *         which sets the 'vvalid' and 'hvalid' fields.  Only valid
- *         models are used for rendering.
- *     (2) A reference model is used by a page that doesn't have
- *         a valid model, but has a nearby valid model of the same
- *         parity (even/odd page) that it can use.  The range in pages
- *         to search for a valid model is given by the 'maxdist' field.
- *
- *     At the rendering stage, vertical and horizontal disparities are
- *     treated differently.  It is somewhat more robust to generate
- *     vertical disparity models (VDM) than horizontal disparity
- *     models (HDM). A valid VDM is required for any correction to
- *     be made; if a valid VDM is not available, just use the input
- *     image.  Otherwise, assuming it is available, the use of the
- *     HDM is controlled by two fields: 'useboth' and 'check_columns'.
- *       (a) With useboth == 0, we use only the VDM.
- *       (b) With useboth == 1, we require using the VDM and, if a valid
- *           horizontal disparity model (HDM) is available, we also use it.
- *       (c) With check_columns == 1, check for multiple columns and if
- *           true, only use the VDM, even if a valid HDM is available.
- *           Note that 'check_columns' takes precedence over 'useboth'
- *           when there is more than 1 column of text.  By default,
- *           check_columns == 0.
- *
- *     The 'maxdist' parameter is input when the dewarpa is created.
- *     The other rendering parameters have default values given in dewarp1.c.
- *     All parameters used by rendering can be set (or reset) using accessors.
- *
- *     After dewarping, use of the VDM will cause all points on each
- *     altered curve to have a y-value equal to the minimum.  Use of
- *     the HDA will cause the left and right edges of the textlines
- *     to be vertically aligned if they had been typeset flush-left
- *     and flush-right, respectively.
- *
- *     The sampled disparity arrays are expanded to full resolution,
- *     using linear interpolation, and this is further expanded
- *     by slope continuation to the right and below if the image
- *     is larger than the full resolution disparity arrays.  Then
- *     the disparity correction can be applied to the input image.
- *     If the input pix are 2x reduced, the expansion from sampled
- *     to full res uses the product of (sampling) * (redfactor).
- *
- *     The most accurate results are produced at full resolution, and
- *     this is generally recommended.
- * 
- */ - - /*! Dewarp version for serialization - *
-     * Note on versioning of the serialization of this data structure:
-     * The dewarping utility and the stored data can be expected to change.
-     * In most situations, the serialized version is ephemeral -- it is
-     * not needed after being used.  No functions will be provided to
-     * convert between different versions.
-     * 
- */ -#define DEWARP_VERSION_NUMBER 4 - -/*! Data structure to hold a number of Dewarp */ -struct L_Dewarpa -{ - l_int32 nalloc; /*!< size of dewarp ptr array */ - l_int32 maxpage; /*!< maximum page number in array */ - struct L_Dewarp **dewarp; /*!< array of ptrs to page dewarp */ - struct L_Dewarp **dewarpcache; /*!< array of ptrs to cached dewarps */ - struct Numa *namodels; /*!< list of page numbers for pages */ - /*!< with page models */ - struct Numa *napages; /*!< list of page numbers with either */ - /*!< page models or ref page models */ - l_int32 redfactor; /*!< reduction factor of input: 1 or 2 */ - l_int32 sampling; /*!< disparity arrays sampling factor */ - l_int32 minlines; /*!< min number of long lines required */ - l_int32 maxdist; /*!< max distance for getting ref page */ - l_int32 max_linecurv; /*!< maximum abs line curvature, */ - /*!< in micro-units */ - l_int32 min_diff_linecurv; /*!< minimum abs diff line */ - /*!< curvature in micro-units */ - l_int32 max_diff_linecurv; /*!< maximum abs diff line */ - /*!< curvature in micro-units */ - l_int32 max_edgeslope; /*!< maximum abs left or right edge */ - /*!< slope, in milli-units */ - l_int32 max_edgecurv; /*!< maximum abs left or right edge */ - /*!< curvature, in micro-units */ - l_int32 max_diff_edgecurv; /*!< maximum abs diff left-right */ - /*!< edge curvature, in micro-units */ - l_int32 useboth; /*!< use both disparity arrays if */ - /*!< available; only vertical otherwise */ - l_int32 check_columns; /*!< if there are multiple columns, */ - /*!< only use the vertical disparity */ - /*!< array */ - l_int32 modelsready; /*!< invalid models have been removed */ - /*!< and refs built against valid set */ -}; -typedef struct L_Dewarpa L_DEWARPA; - - -/*! Data structure for a single dewarp */ -struct L_Dewarp -{ - struct L_Dewarpa *dewa; /*!< ptr to parent (not owned) */ - struct Pix *pixs; /*!< source pix, 1 bpp */ - struct FPix *sampvdispar; /*!< sampled vert disparity array */ - struct FPix *samphdispar; /*!< sampled horiz disparity array */ - struct FPix *sampydispar; /*!< sampled slope h-disparity array */ - struct FPix *fullvdispar; /*!< full vert disparity array */ - struct FPix *fullhdispar; /*!< full horiz disparity array */ - struct FPix *fullydispar; /*!< full slope h-disparity array */ - struct Numa *namidys; /*!< sorted y val of midpoint each line */ - struct Numa *nacurves; /*!< sorted curvature of each line */ - l_int32 w; /*!< width of source image */ - l_int32 h; /*!< height of source image */ - l_int32 pageno; /*!< page number; important for reuse */ - l_int32 sampling; /*!< sampling factor of disparity arrays */ - l_int32 redfactor; /*!< reduction factor of pixs: 1 or 2 */ - l_int32 minlines; /*!< min number of long lines required */ - l_int32 nlines; /*!< number of long lines found */ - l_int32 mincurv; /*!< min line curvature in micro-units */ - l_int32 maxcurv; /*!< max line curvature in micro-units */ - l_int32 leftslope; /*!< left edge slope in milli-units */ - l_int32 rightslope; /*!< right edge slope in milli-units */ - l_int32 leftcurv; /*!< left edge curvature in micro-units */ - l_int32 rightcurv; /*!< right edge curvature in micro-units*/ - l_int32 nx; /*!< number of sampling pts in x-dir */ - l_int32 ny; /*!< number of sampling pts in y-dir */ - l_int32 hasref; /*!< 0 if normal; 1 if has a refpage */ - l_int32 refpage; /*!< page with disparity model to use */ - l_int32 vsuccess; /*!< sets to 1 if vert disparity builds */ - l_int32 hsuccess; /*!< sets to 1 if horiz disparity builds */ - l_int32 ysuccess; /*!< sets to 1 if slope disparity builds */ - l_int32 vvalid; /*!< sets to 1 if valid vert disparity */ - l_int32 hvalid; /*!< sets to 1 if valid horiz disparity */ - l_int32 skip_horiz; /*!< if 1, skip horiz disparity */ - /*!< correction */ - l_int32 debug; /*!< set to 1 if debug output requested */ -}; -typedef struct L_Dewarp L_DEWARP; - -#endif /* LEPTONICA_DEWARP_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dewarp1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dewarp1.c deleted file mode 100644 index 11633d53..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dewarp1.c +++ /dev/null @@ -1,1701 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file dewarp1.c - *
- *
- *    Basic operations and serialization
- *
- *      Create/destroy dewarp
- *          L_DEWARP          *dewarpCreate()
- *          L_DEWARP          *dewarpCreateRef()
- *          void               dewarpDestroy()
- *
- *      Create/destroy dewarpa
- *          L_DEWARPA         *dewarpaCreate()
- *          L_DEWARPA         *dewarpaCreateFromPixacomp()
- *          void               dewarpaDestroy()
- *          l_int32            dewarpaDestroyDewarp()
- *
- *      Dewarpa insertion/extraction
- *          l_int32            dewarpaInsertDewarp()
- *          static l_int32     dewarpaExtendArraysToSize()
- *          L_DEWARP          *dewarpaGetDewarp()
- *
- *      Setting parameters to control rendering from the model
- *          l_int32            dewarpaSetCurvatures()
- *          l_int32            dewarpaUseBothArrays()
- *          l_int32            dewarpaSetCheckColumns()
- *          l_int32            dewarpaSetMaxDistance()
- *
- *      Dewarp serialized I/O
- *          L_DEWARP          *dewarpRead()
- *          L_DEWARP          *dewarpReadStream()
- *          L_DEWARP          *dewarpReadMem()
- *          l_int32            dewarpWrite()
- *          l_int32            dewarpWriteStream()
- *          l_int32            dewarpWriteMem()
- *
- *      Dewarpa serialized I/O
- *          L_DEWARPA         *dewarpaRead()
- *          L_DEWARPA         *dewarpaReadStream()
- *          L_DEWARPA         *dewarpaReadMem()
- *          l_int32            dewarpaWrite()
- *          l_int32            dewarpaWriteStream()
- *          l_int32            dewarpaWriteMem()
- *
- *
- *  Examples of usage
- *  =================
- *
- *  See dewarpaCreateFromPixacomp() for an example of the basic
- *  operations, starting from a set of 1 bpp images.
- *
- *  Basic functioning to dewarp a specific single page:
- * \code
- *     // Make the Dewarpa for the pages
- *     L_Dewarpa *dewa = dewarpaCreate(1, 30, 1, 15, 50);
- *     dewarpaSetCurvatures(dewa, -1, 5, -1, -1, -1, -1);
- *     dewarpaUseBothArrays(dewa, 1);  // try to use both disparity
- *                                     // arrays for this example
- *
- *     // Do the page: start with a binarized image
- *     Pix *pixb = "binarize"(pixs);
- *     // Initialize a Dewarp for this page (say, page 214)
- *     L_Dewarp *dew = dewarpCreate(pixb, 214);
- *     // Insert in Dewarpa and obtain parameters for building the model
- *     dewarpaInsertDewarp(dewa, dew);
- *     // Do the work
- *     dewarpBuildPageModel(dew, NULL);  // no debugging
- *     // Optionally set rendering parameters
- *     // Apply model to the input pixs
- *     Pix *pixd;
- *     dewarpaApplyDisparity(dewa, 214, pixs, 255, 0, 0, &pixd, NULL);
- *     pixDestroy(&pixb);
- * \endcode
- *
- *  Basic functioning to dewarp many pages:
- * \code
- *     // Make the Dewarpa for the set of pages; use fullres 1 bpp
- *     L_Dewarpa *dewa = dewarpaCreate(10, 30, 1, 15, 50);
- *     // Optionally set rendering parameters
- *     dewarpaSetCurvatures(dewa, -1, 10, -1, -1, -1, -1);
- *     dewarpaUseBothArrays(dewa, 0);  // just use the vertical disparity
- *                                     // array for this example
- *
- *     // Do first page: start with a binarized image
- *     Pix *pixb = "binarize"(pixs);
- *     // Initialize a Dewarp for this page (say, page 1)
- *     L_Dewarp *dew = dewarpCreate(pixb, 1);
- *     // Insert in Dewarpa and obtain parameters for building the model
- *     dewarpaInsertDewarp(dewa, dew);
- *     // Do the work
- *     dewarpBuildPageModel(dew, NULL);  // no debugging
- *     dewarpMinimze(dew);  // remove most heap storage
- *     pixDestroy(&pixb);
- *
- *     // Do the other pages the same way
- *     ...
- *
- *     // Apply models to each page; if the page model is invalid,
- *     // try to use a valid neighboring model.  Note that the call
- *     // to dewarpaInsertRefModels() is optional, because it is called
- *     // by dewarpaApplyDisparity() on the first page it acts on.
- *     dewarpaInsertRefModels(dewa, 0, 1); // use debug flag to get more
- *                         // detailed information about the page models
- *     [For each page, where pixs is the fullres image to be dewarped] {
- *         L_Dewarp *dew = dewarpaGetDewarp(dewa, pageno);
- *         if (dew) {  // disparity model exists
- *             Pix *pixd;
- *             dewarpaApplyDisparity(dewa, pageno, pixs, 255,
- *                                   0, 0, &pixd, NULL);
- *             dewarpMinimize(dew);  // clean out the pix and fpix arrays
- *             // Squirrel pixd away somewhere ...)
- *         }
- *     }
- * \endcode
- *
- *  Basic functioning to dewarp a small set of pages, potentially
- *  using models from nearby pages:
- * \code
- *     // (1) Generate a set of binarized images in the vicinity of the
- *     // pages to be dewarped.  We will attempt to compute models
- *     // for pages from 'firstpage' to 'lastpage'.
- *     // Store the binarized images in a compressed array of
- *     // size 'n', where 'n' is the number of images to be stored,
- *     // and where the offset is the first page.
- *     PixaComp *pixac = pixacompCreateInitialized(n, firstpage, NULL,
- *                                                 IFF_TIFF_G4);
- *     for (i = firstpage; i <= lastpage; i++) {
- *         Pix *pixb = "binarize"(pixs);
- *         pixacompReplacePix(pixac, i, pixb, IFF_TIFF_G4);
- *         pixDestroy(&pixb);
- *     }
- *
- *     // (2) Make the Dewarpa for the pages.
- *     L_Dewarpa *dewa =
- *           dewarpaCreateFromPixacomp(pixac, 30, 15, 20);
- *     dewarpaUseBothArrays(dewa, 1);  // try to use both disparity arrays
- *                                     // in this example
- *
- *     // (3) Finally, apply the models.  For page 'firstpage' with image pixs:
- *     L_Dewarp *dew = dewarpaGetDewarp(dewa, firstpage);
- *     if (dew) {  // disparity model exists
- *         Pix *pixd;
- *         dewarpaApplyDisparity(dewa, firstpage, pixs, 255, 0, 0, &pixd, NULL);
- *         dewarpMinimize(dew);
- *     }
- * \endcode
- *
- *  Because in general some pages will not have enough text to build a
- *  model, we fill in for those pages with a reference to the page
- *  model to use.  Both the target page and the reference page must
- *  have the same parity.  We can also choose to use either a partial model
- *  (with only vertical disparity) or the full model of a nearby page.
- *
- *  Minimizing the data in a model by stripping out images,
- *  numas, and full resolution disparity arrays:
- *     dewarpMinimize(dew);
- *  This can be done at any time to save memory.  Serialization does
- *  not use the data that is stripped.
- *
- *  You can apply any model (in a dew), stripped or not, to another image:
- * \code
- *     // For all pages with invalid models, assign the nearest valid
- *     // page model with same parity.
- *     dewarpaInsertRefModels(dewa, 0, 0);
- *     // You can then apply to 'newpix' the page model that was assigned
- *     // to 'pageno', giving the result in pixd:
- *     Pix *pixd;
- *     dewarpaApplyDisparity(dewa, pageno, newpix, 255, 0, 0, &pixd, NULL);
- * \endcode
- *
- *  You can apply the disparity arrays to a deliberately undercropped
- *  image.  Suppose that you undercrop by (left, right, top, bot), so
- *  that the disparity arrays are aligned with their origin at (left, top).
- *  Dewarp the undercropped image with:
- * \code
- *     Pix *pixd;
- *     dewarpaApplyDisparity(dewa, pageno, undercropped_pix, 255,
- *                           left, top, &pixd, NULL);
- * \endcode
- *
- *  Description of the approach to analyzing page image distortion
- *  ==============================================================
- *
- *  When a book page is scanned, there are several possible causes
- *  for the text lines to appear to be curved:
- *   (1) A barrel (fish-eye) effect because the camera is at
- *       a finite distance from the page.  Take the normal from
- *       the camera to the page (the 'optic axis').  Lines on
- *       the page "below" this point will appear to curve upward
- *       (negative curvature); lines "above" this will curve downward.
- *   (2) Radial distortion from the camera lens.  Probably not
- *       a big factor.
- *   (3) Local curvature of the page in to (or out of) the image
- *       plane (which is perpendicular to the optic axis).
- *       This has no effect if the page is flat.
- *
- *  In the following, the optic axis is in the z direction and is
- *  perpendicular to the xy plane;, the book is assumed to be aligned
- *  so that y is approximately along the binding.
- *  The goal is to compute the "disparity" field, D(x,y), which
- *  is actually a vector composed of the horizontal and vertical
- *  disparity fields H(x,y) and V(x,y).  Each of these is a local
- *  function that gives the amount each point in the image is
- *  required to move in order to rectify the horizontal and vertical
- *  lines.  It would also be nice to "flatten" the page to compensate
- *  for effect (3), foreshortening due to bending of the page into
- *  the z direction, but that is more difficult.
- *
- *  Effects (1) and (2) can be directly compensated by calibrating
- *  the scene, using a flat page with horizontal and vertical lines.
- *  Then H(x,y) and V(x,y) can be found as two (non-parametric) arrays
- *  of values.  Suppose this has been done.  Then the remaining
- *  distortion is due to (3).
- *
- *  We consider the simple situation where the page bending is independent
- *  of y, and is described by alpha(x), where alpha is the angle between
- *  the normal to the page and the optic axis.  cos(alpha(x)) is the local
- *  compression factor of the page image in the horizontal direction, at x.
- *  Thus, if we know alpha(x), we can compute the disparity H(x) required
- *  to flatten the image by simply integrating 1/cos(alpha), and we could
- *  compute the remaining disparities, H(x,y) and V(x,y), from the
- *  page content, as described below.  Unfortunately, we don't know
- *  alpha.  What do we know?  If there are horizontal text lines
- *  on the page, we can compute the vertical disparity, V(x,y), which
- *  is the local translation required to make the text lines parallel
- *  to the rasters.  If the margins are left and right aligned, we can
- *  also estimate the horizontal disparity, H(x,y), required to have
- *  uniform margins.  All that can be done from the image alone,
- *  assuming we have text lines covering a sufficient part of the page.
- *
- *  What about alpha(x)?  The basic question relating to (3) is this:
- *
- *     Is it possible, using the shape of the text lines alone,
- *     to compute both the vertical and horizontal disparity fields?
- *
- *  The underlying problem is to separate the line curvature effects due
- *  to the camera view from those due to actual bending of the page.
- *  I believe the proper way to do this is to make some measurements
- *  based on the camera setup, which will depend mostly on the distance
- *  of the camera from the page, and to a smaller extent on the location
- *  of the optic axis with respect to the page.
- *
- *  Here is the procedure.  Photograph a page with a fine 2D line grid
- *  several times, each with a different slope near the binding.
- *  This can be done by placing the grid page on books that have
- *  different shapes z(x) near the binding.  For each one you can
- *  measure, near the binding:
- *    (1) ds/dy, the vertical rate of change of slope of the horizontal lines
- *    (2) the local horizontal compression of the vertical lines due
- *        to the page angle dz/dx.
- *  As mentioned above, the local horizontal compression is simply
- *  cos(dz/dx).  But the measurement you can make on an actual book
- *  page is (1).  The difficulty is to generate (2) from (1).
- *
- *  Back to the procedure.  The function in (1), ds/dy, likely needs
- *  to be measured at a few y locations, because the relation
- *  between (1) and (2) may weakly depend on the y-location with
- *  respect to the y-coordinate of the optic axis of the camera.
- *  From these measurements you can determine, for the camera setup
- *  that you have, the local horizontal compression, cos(dz/dx), as a
- *  function of the both vertical location (y) and your measured vertical
- *  derivative of the text line slope there, ds/dy.  Then with
- *  appropriate smoothing of your measured values, you can set up a
- *  horizontal disparity array to correct for the compression due
- *  to dz/dx.
- *
- *  Now consider V(x,0) and V(x,h), the vertical disparity along
- *  the top and bottom of the image.  With a little thought you
- *  can convince yourself that the local foreshortening,
- *  as a function of x, is proportional to the difference
- *  between the slope of V(x,0) and V(x,h).  The horizontal
- *  disparity can then be computed by integrating the local foreshortening
- *  over x.  Integration of the slope of V(x,0) and V(x,h) gives
- *  the vertical disparity itself.  We have to normalize to h, the
- *  height of the page.  So the very simple result is that
- *
- *      H(x) ~ (V(x,0) - V(x,h)) / h         [1]
- *
- *  which is easily computed.  There is a proportionality constant
- *  that depends on the ratio of h to the distance to the camera.
- *  Can we actually believe this for the case where the bending
- *  is independent of y?  I believe the answer is yes,
- *  as long as you first remove the apparent distortion due
- *  to the camera being at a finite distance.
- *
- *  If you know the intersection of the optical axis with the page
- *  and the distance to the camera, and if the page is perpendicular
- *  to the optic axis, you can compute the horizontal and vertical
- *  disparities due to (1) and (2) and remove them.  The resulting
- *  distortion should be entirely due to bending (3), for which
- *  the relation
- *
- *      Hx(x) dx = C * ((Vx(x,0) - Vx(x, h))/h) dx         [2]
- *
- *  holds for each point in x (Hx and Vx are partial derivatives w/rt x).
- *  Integrating over x, and using H(0) = 0, we get the result [1].
- *
- *  I believe this result holds differentially for each value of y, so
- *  that in the case where the bending is not independent of y,
- *  the expression (V(x,0) - V(x,h)) / h goes over to Vy(x,y).  Then
- *
- *     H(x,y) = Integral(0,x) (Vyx(x,y) dx)         [3]
- *
- *  where Vyx() is the partial derivative of V w/rt both x and y.
- *
- *  It would be nice if there were a simple mathematical relation between
- *  the horizontal and vertical disparities for the situation
- *  where the paper bends without stretching or kinking.
- *  I had hoped to get a relation between H and V, such as
- *  Hx(x,y) ~ Vy(x,y), which would imply that H and V are real
- *  and imaginary parts of a complex potential, each of which
- *  satisfy the laplace equation.  But then the gradients of the
- *  two potentials would be normal, and that does not appear to be the case.
- *  Thus, the questions of proving the relations above (for small bending),
- *  or finding a simpler relation between H and V than those equations,
- *  remain open.  So far, we have only used [1] for the horizontal
- *  disparity H(x).
- *
- *  In the version of the code that follows, we first use text lines
- *  to find V(x,y).  Then, we try to compute H(x,y) that will align
- *  the text vertically on the left and right margins.  This is not
- *  always possible -- sometimes the right margin is not right justified.
- *  By default, we don't require the horizontal disparity to have a
- *  valid page model for dewarping a page, but this requirement can
- *  be forced using dewarpaUseFullModel().
- *
- *  As described above, one can add a y-independent component of
- *  the horizontal disparity H(x) to counter the foreshortening
- *  effect due to the bending of the page near the binding.
- *  This requires widening the image on the side near the binding,
- *  and we do not provide this option here.  However, we do provide
- *  a function that will generate this disparity field:
- *       fpixExtraHorizDisparity()
- *
- *  Here is the basic outline for building the disparity arrays.
- *
- *  (1) Find lines going approximately through the center of the
- *      text in each text line.  Accept only lines that are
- *      close in length to the longest line.
- *  (2) Use these lines to generate a regular and highly subsampled
- *      vertical disparity field V(x,y).
- *  (3) Interpolate this to generate a full resolution vertical
- *      disparity field.
- *  (4) For lines that are sufficiently long, assume they are approximately
- *      left and right-justified, and construct a highly subsampled
- *      horizontal disparity field H(x,y) that will bring them into alignment.
- *  (5) Interpolate this to generate a full resolution horizontal
- *      disparity field.
- *  (6) Apply the vertical dewarping, followed by the horizontal dewarping.
- *
- *  Step (1) is clearly described by the code in pixGetTextlineCenters().
- *
- *  Steps (2) and (3) follow directly from the data in step (1),
- *  and constitute the bulk of the work done in dewarpBuildPageModel().
- *  Virtually all the noise in the data is smoothed out by doing
- *  least-square quadratic fits, first horizontally to the data
- *  points representing the text line centers, and then vertically.
- *  The trick is to sample these lines on a regular grid.
- *  First each horizontal line is sampled at equally spaced
- *  intervals horizontally.  We thus get a set of points,
- *  one in each line, that are vertically aligned, and
- *  the data we represent is the vertical distance of each point
- *  from the min or max value on the curve, depending on the
- *  sign of the curvature component.  Each of these vertically
- *  aligned sets of points constitutes a sampled vertical disparity,
- *  and we do a LS quartic fit to each of them, followed by
- *  vertical sampling at regular intervals.  We now have a subsampled
- *  grid of points, all equally spaced, giving at each point the local
- *  vertical disparity.  Finally, the full resolution vertical disparity
- *  is formed by interpolation.  All the least square fits do a
- *  great job of smoothing everything out, as can be observed by
- *  the contour maps that are generated for the vertical disparity field.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static l_int32 dewarpaExtendArraysToSize(L_DEWARPA *dewa, l_int32 size); - - /* Parameter values used in dewarpaCreate() */ -static const l_int32 InitialPtrArraySize = 20; /* n'import quoi */ -static const l_int32 MaxPtrArraySize = 10000; -static const l_int32 DefaultArraySampling = 30; -static const l_int32 MinArraySampling = 8; -static const l_int32 DefaultMinLines = 15; -static const l_int32 MinMinLines = 4; -static const l_int32 DefaultMaxRefDist = 16; -static const l_int32 DefaultUseBoth = TRUE; -static const l_int32 DefaultCheckColumns = TRUE; - - /* Parameter values used in dewarpaSetCurvatures() */ -static const l_int32 DefaultMaxLineCurv = 150; -static const l_int32 DefaultMinDiffLineCurv = 0; -static const l_int32 DefaultMaxDiffLineCurv = 170; -static const l_int32 DefaultMaxEdgeCurv = 50; -static const l_int32 DefaultMaxDiffEdgeCurv = 40; -static const l_int32 DefaultMaxEdgeSlope = 80; - -/*----------------------------------------------------------------------* - * Create/destroy Dewarp * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpCreate() - * - * \param[in] pixs 1 bpp - * \param[in] pageno page number - * \return dew or NULL on error - * - *
- * Notes:
- *      (1) The input pixs is either full resolution or 2x reduced.
- *      (2) The page number is typically 0-based.  If scanned from a book,
- *          the even pages are usually on the left.  Disparity arrays
- *          built for even pages should only be applied to even pages.
- * 
- */ -L_DEWARP * -dewarpCreate(PIX *pixs, - l_int32 pageno) -{ -L_DEWARP *dew; - - PROCNAME("dewarpCreate"); - - if (!pixs) - return (L_DEWARP *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (L_DEWARP *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - - dew = (L_DEWARP *)LEPT_CALLOC(1, sizeof(L_DEWARP)); - dew->pixs = pixClone(pixs); - dew->pageno = pageno; - dew->w = pixGetWidth(pixs); - dew->h = pixGetHeight(pixs); - return dew; -} - - -/*! - * \brief dewarpCreateRef() - * - * \param[in] pageno this page number - * \param[in] refpage page number of dewarp disparity arrays to be used - * \return dew or NULL on error - * - *
- * Notes:
- *      (1) This specifies which dewarp struct should be used for
- *          the given page.  It is placed in dewarpa for pages
- *          for which no model can be built.
- *      (2) This page and the reference page have the same parity and
- *          the reference page is the closest page with a disparity model
- *          to this page.
- * 
- */ -L_DEWARP * -dewarpCreateRef(l_int32 pageno, - l_int32 refpage) -{ -L_DEWARP *dew; - - PROCNAME("dewarpCreateRef"); - - dew = (L_DEWARP *)LEPT_CALLOC(1, sizeof(L_DEWARP)); - dew->pageno = pageno; - dew->hasref = 1; - dew->refpage = refpage; - return dew; -} - - -/*! - * \brief dewarpDestroy() - * - * \param[in,out] pdew will be set to null before returning - * \return void - */ -void -dewarpDestroy(L_DEWARP **pdew) -{ -L_DEWARP *dew; - - PROCNAME("dewarpDestroy"); - - if (pdew == NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - if ((dew = *pdew) == NULL) - return; - - pixDestroy(&dew->pixs); - fpixDestroy(&dew->sampvdispar); - fpixDestroy(&dew->samphdispar); - fpixDestroy(&dew->sampydispar); - fpixDestroy(&dew->fullvdispar); - fpixDestroy(&dew->fullhdispar); - fpixDestroy(&dew->fullydispar); - numaDestroy(&dew->namidys); - numaDestroy(&dew->nacurves); - LEPT_FREE(dew); - *pdew = NULL; - return; -} - - -/*----------------------------------------------------------------------* - * Create/destroy Dewarpa * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpaCreate() - * - * \param[in] nptrs number of dewarp page ptrs; typ. the number of pages - * \param[in] sampling use 0 for default value; the minimum allowed is 8 - * \param[in] redfactor of input images: 1 is full res; 2 is 2x reduced - * \param[in] minlines minimum number of lines to accept; use 0 for default - * \param[in] maxdist for locating reference disparity; use -1 for default - * \return dewa or NULL on error - * - *
- * Notes:
- *      (1) The sampling, minlines and maxdist parameters will be
- *          applied to all images.
- *      (2) The sampling factor is used for generating the disparity arrays
- *          from the input image.  For 2x reduced input, use a sampling
- *          factor that is half the sampling you want on the full resolution
- *          images.
- *      (3) Use %redfactor = 1 for full resolution; 2 for 2x reduction.
- *          All input images must be at one of these two resolutions.
- *      (4) %minlines is the minimum number of nearly full-length lines
- *          required to generate a vertical disparity array.  The default
- *          number is 15.  Use a smaller number to accept a questionable
- *          array, but not smaller than 4.
- *      (5) When a model can't be built for a page, it looks up to %maxdist
- *          in either direction for a valid model with the same page parity.
- *          Use -1 for the default value of %maxdist; use 0 to avoid using
- *          a ref model.
- *      (6) The ptr array is expanded as necessary to accommodate page images.
- * 
- */ -L_DEWARPA * -dewarpaCreate(l_int32 nptrs, - l_int32 sampling, - l_int32 redfactor, - l_int32 minlines, - l_int32 maxdist) -{ -L_DEWARPA *dewa; - - PROCNAME("dewarpaCreate"); - - if (nptrs <= 0) - nptrs = InitialPtrArraySize; - if (nptrs > MaxPtrArraySize) - return (L_DEWARPA *)ERROR_PTR("too many pages", procName, NULL); - if (redfactor != 1 && redfactor != 2) - return (L_DEWARPA *)ERROR_PTR("redfactor not in {1,2}", - procName, NULL); - if (sampling == 0) { - sampling = DefaultArraySampling; - } else if (sampling < MinArraySampling) { - L_WARNING("sampling too small; setting to %d\n", procName, - MinArraySampling); - sampling = MinArraySampling; - } - if (minlines == 0) { - minlines = DefaultMinLines; - } else if (minlines < MinMinLines) { - L_WARNING("minlines too small; setting to %d\n", procName, - MinMinLines); - minlines = DefaultMinLines; - } - if (maxdist < 0) - maxdist = DefaultMaxRefDist; - - dewa = (L_DEWARPA *)LEPT_CALLOC(1, sizeof(L_DEWARPA)); - dewa->dewarp = (L_DEWARP **)LEPT_CALLOC(nptrs, sizeof(L_DEWARPA *)); - dewa->dewarpcache = (L_DEWARP **)LEPT_CALLOC(nptrs, sizeof(L_DEWARPA *)); - if (!dewa->dewarp || !dewa->dewarpcache) { - dewarpaDestroy(&dewa); - return (L_DEWARPA *)ERROR_PTR("dewarp ptrs not made", procName, NULL); - } - dewa->nalloc = nptrs; - dewa->sampling = sampling; - dewa->redfactor = redfactor; - dewa->minlines = minlines; - dewa->maxdist = maxdist; - dewa->max_linecurv = DefaultMaxLineCurv; - dewa->min_diff_linecurv = DefaultMinDiffLineCurv; - dewa->max_diff_linecurv = DefaultMaxDiffLineCurv; - dewa->max_edgeslope = DefaultMaxEdgeSlope; - dewa->max_edgecurv = DefaultMaxEdgeCurv; - dewa->max_diff_edgecurv = DefaultMaxDiffEdgeCurv; - dewa->check_columns = DefaultCheckColumns; - dewa->useboth = DefaultUseBoth; - return dewa; -} - - -/*! - * \brief dewarpaCreateFromPixacomp() - * - * \param[in] pixac pixacomp of G4, 1 bpp images; with 1x1x1 placeholders - * \param[in] useboth 0 for only vert disparity; 1 for both vert and horiz - * \param[in] sampling use -1 or 0 for default value; otherwise minimum of 5 - * \param[in] minlines minimum number of lines to accept; e.g., 10 - * \param[in] maxdist for locating reference disparity; use -1 for default - * \return dewa or NULL on error - * - *
- * Notes:
- *      (1) The returned dewa has disparity arrays calculated and
- *          is ready for serialization or for use in dewarping.
- *      (2) The sampling, minlines and maxdist parameters are
- *          applied to all images.  See notes in dewarpaCreate() for details.
- *      (3) The pixac is full.  Placeholders, if any, are w=h=d=1 images,
- *          and the real input images are 1 bpp at full resolution.
- *          They are assumed to be cropped to the actual page regions,
- *          and may be arbitrarily sparse in the array.
- *      (4) The output dewarpa is indexed by the page number.
- *          The offset in the pixac gives the mapping between the
- *          array index in the pixac and the page number.
- *      (5) This adds the ref page models.
- *      (6) This can be used to make models for any desired set of pages.
- *          The direct models are only made for pages with images in
- *          the pixacomp; the ref models are made for pages of the
- *          same parity within %maxdist of the nearest direct model.
- * 
- */ -L_DEWARPA * -dewarpaCreateFromPixacomp(PIXAC *pixac, - l_int32 useboth, - l_int32 sampling, - l_int32 minlines, - l_int32 maxdist) -{ -l_int32 i, nptrs, pageno; -L_DEWARP *dew; -L_DEWARPA *dewa; -PIX *pixt; - - PROCNAME("dewarpaCreateFromPixacomp"); - - if (!pixac) - return (L_DEWARPA *)ERROR_PTR("pixac not defined", procName, NULL); - - nptrs = pixacompGetCount(pixac); - if ((dewa = dewarpaCreate(pixacompGetOffset(pixac) + nptrs, - sampling, 1, minlines, maxdist)) == NULL) - return (L_DEWARPA *)ERROR_PTR("dewa not made", procName, NULL); - dewarpaUseBothArrays(dewa, useboth); - - for (i = 0; i < nptrs; i++) { - pageno = pixacompGetOffset(pixac) + i; /* index into pixacomp */ - pixt = pixacompGetPix(pixac, pageno); - if (pixt && (pixGetWidth(pixt) > 1)) { - dew = dewarpCreate(pixt, pageno); - pixDestroy(&pixt); - if (!dew) { - ERROR_INT("unable to make dew!", procName, 1); - continue; - } - - /* Insert into dewa for this page */ - dewarpaInsertDewarp(dewa, dew); - - /* Build disparity arrays for this page */ - dewarpBuildPageModel(dew, NULL); - if (!dew->vsuccess) { /* will need to use model from nearby page */ - dewarpaDestroyDewarp(dewa, pageno); - L_ERROR("unable to build model for page %d\n", procName, i); - continue; - } - /* Remove all extraneous data */ - dewarpMinimize(dew); - } - pixDestroy(&pixt); - } - dewarpaInsertRefModels(dewa, 0, 0); - - return dewa; -} - - -/*! - * \brief dewarpaDestroy() - * - * \param[in,out] pdewa will be set to null before returning - * \return void - */ -void -dewarpaDestroy(L_DEWARPA **pdewa) -{ -l_int32 i; -L_DEWARP *dew; -L_DEWARPA *dewa; - - PROCNAME("dewarpaDestroy"); - - if (pdewa == NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - if ((dewa = *pdewa) == NULL) - return; - - for (i = 0; i < dewa->nalloc; i++) { - if ((dew = dewa->dewarp[i]) != NULL) - dewarpDestroy(&dew); - if ((dew = dewa->dewarpcache[i]) != NULL) - dewarpDestroy(&dew); - } - numaDestroy(&dewa->namodels); - numaDestroy(&dewa->napages); - - LEPT_FREE(dewa->dewarp); - LEPT_FREE(dewa->dewarpcache); - LEPT_FREE(dewa); - *pdewa = NULL; - return; -} - - -/*! - * \brief dewarpaDestroyDewarp() - * - * \param[in] dewa - * \param[in] pageno of dew to be destroyed - * \return 0 if OK, 1 on error - */ -l_ok -dewarpaDestroyDewarp(L_DEWARPA *dewa, - l_int32 pageno) -{ -L_DEWARP *dew; - - PROCNAME("dewarpaDestroyDewarp"); - - if (!dewa) - return ERROR_INT("dewa or dew not defined", procName, 1); - if (pageno < 0 || pageno > dewa->maxpage) - return ERROR_INT("page out of bounds", procName, 1); - if ((dew = dewa->dewarp[pageno]) == NULL) - return ERROR_INT("dew not defined", procName, 1); - - dewarpDestroy(&dew); - dewa->dewarp[pageno] = NULL; - return 0; -} - - -/*----------------------------------------------------------------------* - * Dewarpa insertion/extraction * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpaInsertDewarp() - * - * \param[in] dewa - * \param[in] dew to be added - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This inserts the dewarp into the array, which now owns it.
- *          It also keeps track of the largest page number stored.
- *          It must be done before the disparity model is built.
- *      (2) Note that this differs from the usual method of filling out
- *          arrays in leptonica, where the arrays are compact and
- *          new elements are typically added to the end.  Here,
- *          the dewarp can be added anywhere, even beyond the initial
- *          allocation.
- * 
- */ -l_ok -dewarpaInsertDewarp(L_DEWARPA *dewa, - L_DEWARP *dew) -{ -l_int32 pageno, n, newsize; -L_DEWARP *prevdew; - - PROCNAME("dewarpaInsertDewarp"); - - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - if (!dew) - return ERROR_INT("dew not defined", procName, 1); - - dew->dewa = dewa; - pageno = dew->pageno; - if (pageno > MaxPtrArraySize) - return ERROR_INT("too many pages", procName, 1); - if (pageno > dewa->maxpage) - dewa->maxpage = pageno; - dewa->modelsready = 0; /* force re-evaluation at application time */ - - /* Extend ptr array if necessary */ - n = dewa->nalloc; - newsize = n; - if (pageno >= 2 * n) - newsize = 2 * pageno; - else if (pageno >= n) - newsize = 2 * n; - if (newsize > n) - dewarpaExtendArraysToSize(dewa, newsize); - - if ((prevdew = dewarpaGetDewarp(dewa, pageno)) != NULL) - dewarpDestroy(&prevdew); - dewa->dewarp[pageno] = dew; - - dew->sampling = dewa->sampling; - dew->redfactor = dewa->redfactor; - dew->minlines = dewa->minlines; - - /* Get the dimensions of the sampled array. This will be - * stored in an fpix, and the input resolution version is - * guaranteed to be larger than pixs. However, if you - * want to apply the disparity to an image with a width - * w > nx * s - 2 * s + 2 - * you will need to extend the input res fpix. - * And similarly for h. */ - dew->nx = (dew->w + 2 * dew->sampling - 2) / dew->sampling; - dew->ny = (dew->h + 2 * dew->sampling - 2) / dew->sampling; - return 0; -} - - -/*! - * \brief dewarpaExtendArraysToSize() - * - * \param[in] dewa - * \param[in] size new size of dewarpa array - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) If necessary, reallocs main and cache dewarpa ptr arrays to %size.
- * 
- */ -static l_int32 -dewarpaExtendArraysToSize(L_DEWARPA *dewa, - l_int32 size) -{ - PROCNAME("dewarpaExtendArraysToSize"); - - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - - if (size > dewa->nalloc) { - if ((dewa->dewarp = (L_DEWARP **)reallocNew((void **)&dewa->dewarp, - sizeof(L_DEWARP *) * dewa->nalloc, - size * sizeof(L_DEWARP *))) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - if ((dewa->dewarpcache = - (L_DEWARP **)reallocNew((void **)&dewa->dewarpcache, - sizeof(L_DEWARP *) * dewa->nalloc, - size * sizeof(L_DEWARP *))) == NULL) - return ERROR_INT("new ptr cache array not returned", procName, 1); - dewa->nalloc = size; - } - return 0; -} - - -/*! - * \brief dewarpaGetDewarp() - * - * \param[in] dewa populated with dewarp structs for pages - * \param[in] index into dewa: this is the pageno - * \return dew handle; still owned by dewa, or NULL on error - */ -L_DEWARP * -dewarpaGetDewarp(L_DEWARPA *dewa, - l_int32 index) -{ - PROCNAME("dewarpaGetDewarp"); - - if (!dewa) - return (L_DEWARP *)ERROR_PTR("dewa not defined", procName, NULL); - if (index < 0 || index > dewa->maxpage) { - L_ERROR("index = %d is invalid; max index = %d\n", - procName, index, dewa->maxpage); - return NULL; - } - - return dewa->dewarp[index]; -} - - -/*----------------------------------------------------------------------* - * Setting parameters to control rendering from the model * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpaSetCurvatures() - * - * \param[in] dewa - * \param[in] max_linecurv -1 for default - * \param[in] min_diff_linecurv -1 for default; 0 to accept all models - * \param[in] max_diff_linecurv -1 for default - * \param[in] max_edgecurv -1 for default - * \param[in] max_diff_edgecurv -1 for default - * \param[in] max_edgeslope -1 for default - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Approximating the line by a quadratic, the coefficient
- *          of the quadratic term is the curvature, and distance
- *          units are in pixels (of course).  The curvature is very
- *          small, so we multiply by 10^6 and express the constraints
- *          on the model curvatures in micro-units.
- *      (2) This sets five curvature thresholds and a slope threshold:
- *          * the maximum absolute value of the vertical disparity
- *            line curvatures
- *          * the minimum absolute value of the largest difference in
- *            vertical disparity line curvatures (Use a value of 0
- *            to accept all models.)
- *          * the maximum absolute value of the largest difference in
- *            vertical disparity line curvatures
- *          * the maximum absolute value of the left and right edge
- *            curvature for the horizontal disparity
- *          * the maximum absolute value of the difference between
- *            left and right edge curvature for the horizontal disparity
- *          all in micro-units, for dewarping to take place.
- *          Use -1 for default values.
- *      (3) An image with a line curvature less than about 0.00001
- *          has fairly straight textlines.  This is 10 micro-units.
- *      (4) For example, if %max_linecurv == 100, this would prevent dewarping
- *          if any of the lines has a curvature exceeding 100 micro-units.
- *          A model having maximum line curvature larger than about 150
- *          micro-units should probably not be used.
- *      (5) A model having a left or right edge curvature larger than
- *          about 50 micro-units should probably not be used.
- * 
- */ -l_ok -dewarpaSetCurvatures(L_DEWARPA *dewa, - l_int32 max_linecurv, - l_int32 min_diff_linecurv, - l_int32 max_diff_linecurv, - l_int32 max_edgecurv, - l_int32 max_diff_edgecurv, - l_int32 max_edgeslope) -{ - PROCNAME("dewarpaSetCurvatures"); - - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - - if (max_linecurv == -1) - dewa->max_linecurv = DefaultMaxLineCurv; - else - dewa->max_linecurv = L_ABS(max_linecurv); - - if (min_diff_linecurv == -1) - dewa->min_diff_linecurv = DefaultMinDiffLineCurv; - else - dewa->min_diff_linecurv = L_ABS(min_diff_linecurv); - - if (max_diff_linecurv == -1) - dewa->max_diff_linecurv = DefaultMaxDiffLineCurv; - else - dewa->max_diff_linecurv = L_ABS(max_diff_linecurv); - - if (max_edgecurv == -1) - dewa->max_edgecurv = DefaultMaxEdgeCurv; - else - dewa->max_edgecurv = L_ABS(max_edgecurv); - - if (max_diff_edgecurv == -1) - dewa->max_diff_edgecurv = DefaultMaxDiffEdgeCurv; - else - dewa->max_diff_edgecurv = L_ABS(max_diff_edgecurv); - - if (max_edgeslope == -1) - dewa->max_edgeslope = DefaultMaxEdgeSlope; - else - dewa->max_edgeslope = L_ABS(max_edgeslope); - - dewa->modelsready = 0; /* force validation */ - return 0; -} - - -/*! - * \brief dewarpaUseBothArrays() - * - * \param[in] dewa - * \param[in] useboth 0 for false, 1 for true - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This sets the useboth field.  If set, this will attempt
- *          to apply both vertical and horizontal disparity arrays.
- *          Note that a model with only a vertical disparity array will
- *          always be valid.
- * 
- */ -l_ok -dewarpaUseBothArrays(L_DEWARPA *dewa, - l_int32 useboth) -{ - PROCNAME("dewarpaUseBothArrays"); - - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - - dewa->useboth = useboth; - dewa->modelsready = 0; /* force validation */ - return 0; -} - - -/*! - * \brief dewarpaSetCheckColumns() - * - * \param[in] dewa - * \param[in] check_columns 0 for false, 1 for true - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This sets the 'check_columns" field.  If set, and if
- *          'useboth' is set, this will count the number of text
- *          columns.  If the number is larger than 1, this will
- *          prevent the application of horizontal disparity arrays
- *          if they exist.  Note that the default value of check_columns
- *          if 0 (FALSE).
- *      (2) This field is set to 0 by default.  For horizontal disparity
- *          correction to take place on a single column of text, you must have:
- *           - a valid horizontal disparity array
- *           - useboth = 1 (TRUE)
- *          If there are multiple columns, additionally
- *           - check_columns = 0 (FALSE)
- *
- * 
- */ -l_ok -dewarpaSetCheckColumns(L_DEWARPA *dewa, - l_int32 check_columns) -{ - PROCNAME("dewarpaSetCheckColumns"); - - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - - dewa->check_columns = check_columns; - return 0; -} - - -/*! - * \brief dewarpaSetMaxDistance() - * - * \param[in] dewa - * \param[in] maxdist for using ref models - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This sets the maxdist field.
- * 
- */ -l_ok -dewarpaSetMaxDistance(L_DEWARPA *dewa, - l_int32 maxdist) -{ - PROCNAME("dewarpaSetMaxDistance"); - - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - - dewa->maxdist = maxdist; - dewa->modelsready = 0; /* force validation */ - return 0; -} - - -/*----------------------------------------------------------------------* - * Dewarp serialized I/O * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpRead() - * - * \param[in] filename - * \return dew, or NULL on error - */ -L_DEWARP * -dewarpRead(const char *filename) -{ -FILE *fp; -L_DEWARP *dew; - - PROCNAME("dewarpRead"); - - if (!filename) - return (L_DEWARP *)ERROR_PTR("filename not defined", procName, NULL); - if ((fp = fopenReadStream(filename)) == NULL) - return (L_DEWARP *)ERROR_PTR("stream not opened", procName, NULL); - - if ((dew = dewarpReadStream(fp)) == NULL) { - fclose(fp); - return (L_DEWARP *)ERROR_PTR("dew not read", procName, NULL); - } - - fclose(fp); - return dew; -} - - -/*! - * \brief dewarpReadStream() - * - * \param[in] fp file stream - * \return dew dewarp, or NULL on error - * - *
- * Notes:
- *      (1) The dewarp struct is stored in minimized format, with only
- *          subsampled disparity arrays.
- *      (2) The sampling and extra horizontal disparity parameters are
- *          stored here.  During generation of the dewarp struct, they
- *          are passed in from the dewarpa.  In readback, it is assumed
- *          that they are (a) the same for each page and (b) the same
- *          as the values used to create the dewarpa.
- * 
- */ -L_DEWARP * -dewarpReadStream(FILE *fp) -{ -l_int32 version, sampling, redfactor, minlines, pageno, hasref, refpage; -l_int32 w, h, nx, ny, vdispar, hdispar, nlines; -l_int32 mincurv, maxcurv, leftslope, rightslope, leftcurv, rightcurv; -L_DEWARP *dew; -FPIX *fpixv, *fpixh; - - PROCNAME("dewarpReadStream"); - - if (!fp) - return (L_DEWARP *)ERROR_PTR("stream not defined", procName, NULL); - - if (fscanf(fp, "\nDewarp Version %d\n", &version) != 1) - return (L_DEWARP *)ERROR_PTR("not a dewarp file", procName, NULL); - if (version != DEWARP_VERSION_NUMBER) - return (L_DEWARP *)ERROR_PTR("invalid dewarp version", procName, NULL); - if (fscanf(fp, "pageno = %d\n", &pageno) != 1) - return (L_DEWARP *)ERROR_PTR("read fail for pageno", procName, NULL); - if (fscanf(fp, "hasref = %d, refpage = %d\n", &hasref, &refpage) != 2) - return (L_DEWARP *)ERROR_PTR("read fail for hasref, refpage", - procName, NULL); - if (fscanf(fp, "sampling = %d, redfactor = %d\n", &sampling, &redfactor) - != 2) - return (L_DEWARP *)ERROR_PTR("read fail for sampling/redfactor", - procName, NULL); - if (fscanf(fp, "nlines = %d, minlines = %d\n", &nlines, &minlines) != 2) - return (L_DEWARP *)ERROR_PTR("read fail for nlines/minlines", - procName, NULL); - if (fscanf(fp, "w = %d, h = %d\n", &w, &h) != 2) - return (L_DEWARP *)ERROR_PTR("read fail for w, h", procName, NULL); - if (fscanf(fp, "nx = %d, ny = %d\n", &nx, &ny) != 2) - return (L_DEWARP *)ERROR_PTR("read fail for nx, ny", procName, NULL); - if (fscanf(fp, "vert_dispar = %d, horiz_dispar = %d\n", &vdispar, &hdispar) - != 2) - return (L_DEWARP *)ERROR_PTR("read fail for flags", procName, NULL); - if (vdispar) { - if (fscanf(fp, "min line curvature = %d, max line curvature = %d\n", - &mincurv, &maxcurv) != 2) - return (L_DEWARP *)ERROR_PTR("read fail for mincurv & maxcurv", - procName, NULL); - } - if (hdispar) { - if (fscanf(fp, "left edge slope = %d, right edge slope = %d\n", - &leftslope, &rightslope) != 2) - return (L_DEWARP *)ERROR_PTR("read fail for leftslope & rightslope", - procName, NULL); - if (fscanf(fp, "left edge curvature = %d, right edge curvature = %d\n", - &leftcurv, &rightcurv) != 2) - return (L_DEWARP *)ERROR_PTR("read fail for leftcurv & rightcurv", - procName, NULL); - } - if (vdispar) { - if ((fpixv = fpixReadStream(fp)) == NULL) - return (L_DEWARP *)ERROR_PTR("read fail for vdispar", - procName, NULL); - } - if (hdispar) { - if ((fpixh = fpixReadStream(fp)) == NULL) - return (L_DEWARP *)ERROR_PTR("read fail for hdispar", - procName, NULL); - } - getc(fp); - - dew = (L_DEWARP *)LEPT_CALLOC(1, sizeof(L_DEWARP)); - dew->w = w; - dew->h = h; - dew->pageno = pageno; - dew->sampling = sampling; - dew->redfactor = redfactor; - dew->minlines = minlines; - dew->nlines = nlines; - dew->hasref = hasref; - dew->refpage = refpage; - if (hasref == 0) /* any dew without a ref has an actual model */ - dew->vsuccess = 1; - dew->nx = nx; - dew->ny = ny; - if (vdispar) { - dew->mincurv = mincurv; - dew->maxcurv = maxcurv; - dew->vsuccess = 1; - dew->sampvdispar = fpixv; - } - if (hdispar) { - dew->leftslope = leftslope; - dew->rightslope = rightslope; - dew->leftcurv = leftcurv; - dew->rightcurv = rightcurv; - dew->hsuccess = 1; - dew->samphdispar = fpixh; - } - - return dew; -} - - -/*! - * \brief dewarpReadMem() - * - * \param[in] data serialization of dewarp - * \param[in] size of data in bytes - * \return dew dewarp, or NULL on error - */ -L_DEWARP * -dewarpReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -L_DEWARP *dew; - - PROCNAME("dewarpReadMem"); - - if (!data) - return (L_DEWARP *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (L_DEWARP *)ERROR_PTR("stream not opened", procName, NULL); - - dew = dewarpReadStream(fp); - fclose(fp); - if (!dew) L_ERROR("dew not read\n", procName); - return dew; -} - - -/*! - * \brief dewarpWrite() - * - * \param[in] filename - * \param[in] dew - * \return 0 if OK, 1 on error - */ -l_ok -dewarpWrite(const char *filename, - L_DEWARP *dew) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("dewarpWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!dew) - return ERROR_INT("dew not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "wb")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = dewarpWriteStream(fp, dew); - fclose(fp); - if (ret) - return ERROR_INT("dew not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief dewarpWriteStream() - * - * \param[in] fp file stream opened for "wb" - * \param[in] dew - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This should not be written if there is no sampled
- *          vertical disparity array, which means that no model has
- *          been built for this page.
- * 
- */ -l_ok -dewarpWriteStream(FILE *fp, - L_DEWARP *dew) -{ -l_int32 vdispar, hdispar; - - PROCNAME("dewarpWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!dew) - return ERROR_INT("dew not defined", procName, 1); - - fprintf(fp, "\nDewarp Version %d\n", DEWARP_VERSION_NUMBER); - fprintf(fp, "pageno = %d\n", dew->pageno); - fprintf(fp, "hasref = %d, refpage = %d\n", dew->hasref, dew->refpage); - fprintf(fp, "sampling = %d, redfactor = %d\n", - dew->sampling, dew->redfactor); - fprintf(fp, "nlines = %d, minlines = %d\n", dew->nlines, dew->minlines); - fprintf(fp, "w = %d, h = %d\n", dew->w, dew->h); - fprintf(fp, "nx = %d, ny = %d\n", dew->nx, dew->ny); - vdispar = (dew->sampvdispar) ? 1 : 0; - hdispar = (dew->samphdispar) ? 1 : 0; - fprintf(fp, "vert_dispar = %d, horiz_dispar = %d\n", vdispar, hdispar); - if (vdispar) - fprintf(fp, "min line curvature = %d, max line curvature = %d\n", - dew->mincurv, dew->maxcurv); - if (hdispar) { - fprintf(fp, "left edge slope = %d, right edge slope = %d\n", - dew->leftslope, dew->rightslope); - fprintf(fp, "left edge curvature = %d, right edge curvature = %d\n", - dew->leftcurv, dew->rightcurv); - } - if (vdispar) fpixWriteStream(fp, dew->sampvdispar); - if (hdispar) fpixWriteStream(fp, dew->samphdispar); - fprintf(fp, "\n"); - - if (!vdispar) - L_WARNING("no disparity arrays!\n", procName); - return 0; -} - - -/*! - * \brief dewarpWriteMem() - * - * \param[out] pdata data of serialized dewarp (not ascii) - * \param[out] psize size of returned data - * \param[in] dew - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes a dewarp in memory and puts the result in a buffer.
- * 
- */ -l_ok -dewarpWriteMem(l_uint8 **pdata, - size_t *psize, - L_DEWARP *dew) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("dewarpWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!dew) - return ERROR_INT("dew not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = dewarpWriteStream(fp, dew); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = dewarpWriteStream(fp, dew); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - -/*----------------------------------------------------------------------* - * Dewarpa serialized I/O * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpaRead() - * - * \param[in] filename - * \return dewa, or NULL on error - */ -L_DEWARPA * -dewarpaRead(const char *filename) -{ -FILE *fp; -L_DEWARPA *dewa; - - PROCNAME("dewarpaRead"); - - if (!filename) - return (L_DEWARPA *)ERROR_PTR("filename not defined", procName, NULL); - if ((fp = fopenReadStream(filename)) == NULL) - return (L_DEWARPA *)ERROR_PTR("stream not opened", procName, NULL); - - if ((dewa = dewarpaReadStream(fp)) == NULL) { - fclose(fp); - return (L_DEWARPA *)ERROR_PTR("dewa not read", procName, NULL); - } - - fclose(fp); - return dewa; -} - - -/*! - * \brief dewarpaReadStream() - * - * \param[in] fp file stream - * \return dewa, or NULL on error - * - *
- * Notes:
- *      (1) The serialized dewarp contains a Numa that gives the
- *          (increasing) page number of the dewarp structs that are
- *          contained.
- *      (2) Reference pages are added in after readback.
- * 
- */ -L_DEWARPA * -dewarpaReadStream(FILE *fp) -{ -l_int32 i, version, ndewarp, maxpage; -l_int32 sampling, redfactor, minlines, maxdist, useboth; -l_int32 max_linecurv, min_diff_linecurv, max_diff_linecurv; -l_int32 max_edgeslope, max_edgecurv, max_diff_edgecurv; -L_DEWARP *dew; -L_DEWARPA *dewa; -NUMA *namodels; - - PROCNAME("dewarpaReadStream"); - - if (!fp) - return (L_DEWARPA *)ERROR_PTR("stream not defined", procName, NULL); - - if (fscanf(fp, "\nDewarpa Version %d\n", &version) != 1) - return (L_DEWARPA *)ERROR_PTR("not a dewarpa file", procName, NULL); - if (version != DEWARP_VERSION_NUMBER) - return (L_DEWARPA *)ERROR_PTR("invalid dewarp version", procName, NULL); - - if (fscanf(fp, "ndewarp = %d, maxpage = %d\n", &ndewarp, &maxpage) != 2) - return (L_DEWARPA *)ERROR_PTR("read fail for maxpage+", procName, NULL); - if (fscanf(fp, - "sampling = %d, redfactor = %d, minlines = %d, maxdist = %d\n", - &sampling, &redfactor, &minlines, &maxdist) != 4) - return (L_DEWARPA *)ERROR_PTR("read fail for 4 params", procName, NULL); - if (fscanf(fp, - "max_linecurv = %d, min_diff_linecurv = %d, max_diff_linecurv = %d\n", - &max_linecurv, &min_diff_linecurv, &max_diff_linecurv) != 3) - return (L_DEWARPA *)ERROR_PTR("read fail for linecurv", procName, NULL); - if (fscanf(fp, - "max_edgeslope = %d, max_edgecurv = %d, max_diff_edgecurv = %d\n", - &max_edgeslope, &max_edgecurv, &max_diff_edgecurv) != 3) - return (L_DEWARPA *)ERROR_PTR("read fail for edgecurv", procName, NULL); - if (fscanf(fp, "fullmodel = %d\n", &useboth) != 1) - return (L_DEWARPA *)ERROR_PTR("read fail for useboth", procName, NULL); - - if (ndewarp > MaxPtrArraySize) - return (L_DEWARPA *)ERROR_PTR("too many pages", procName, NULL); - - dewa = dewarpaCreate(maxpage + 1, sampling, redfactor, minlines, maxdist); - dewa->maxpage = maxpage; - dewa->max_linecurv = max_linecurv; - dewa->min_diff_linecurv = min_diff_linecurv; - dewa->max_diff_linecurv = max_diff_linecurv; - dewa->max_edgeslope = max_edgeslope; - dewa->max_edgecurv = max_edgecurv; - dewa->max_diff_edgecurv = max_diff_edgecurv; - dewa->useboth = useboth; - namodels = numaCreate(ndewarp); - dewa->namodels = namodels; - for (i = 0; i < ndewarp; i++) { - if ((dew = dewarpReadStream(fp)) == NULL) { - L_ERROR("read fail for dew[%d]\n", procName, i); - dewarpaDestroy(&dewa); - return NULL; - } - dewarpaInsertDewarp(dewa, dew); - numaAddNumber(namodels, dew->pageno); - } - - /* Validate the models and insert reference models */ - dewarpaInsertRefModels(dewa, 0, 0); - return dewa; -} - - -/*! - * \brief dewarpaReadMem() - * - * \param[in] data serialization of dewarpa - * \param[in] size of data in bytes - * \return dewa dewarpa, or NULL on error - */ -L_DEWARPA * -dewarpaReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -L_DEWARPA *dewa; - - PROCNAME("dewarpaReadMem"); - - if (!data) - return (L_DEWARPA *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (L_DEWARPA *)ERROR_PTR("stream not opened", procName, NULL); - - dewa = dewarpaReadStream(fp); - fclose(fp); - if (!dewa) L_ERROR("dewa not read\n", procName); - return dewa; -} - - -/*! - * \brief dewarpaWrite() - * - * \param[in] filename - * \param[in] dewa - * \return 0 if OK, 1 on error - */ -l_ok -dewarpaWrite(const char *filename, - L_DEWARPA *dewa) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("dewarpaWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "wb")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = dewarpaWriteStream(fp, dewa); - fclose(fp); - if (ret) - return ERROR_INT("dewa not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief dewarpaWriteStream() - * - * \param[in] fp file stream opened for "wb" - * \param[in] dewa - * \return 0 if OK, 1 on error - */ -l_ok -dewarpaWriteStream(FILE *fp, - L_DEWARPA *dewa) -{ -l_int32 ndewarp, i, pageno; - - PROCNAME("dewarpaWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - - /* Generate the list of page numbers for which a model exists. - * Note that no attempt is made to determine if the model is - * valid, because that determination is associated with - * using the model to remove the warping, which typically - * can happen later, after all the models have been built. */ - dewarpaListPages(dewa); - if (!dewa->namodels) - return ERROR_INT("dewa->namodels not made", procName, 1); - ndewarp = numaGetCount(dewa->namodels); /* with actual page models */ - - fprintf(fp, "\nDewarpa Version %d\n", DEWARP_VERSION_NUMBER); - fprintf(fp, "ndewarp = %d, maxpage = %d\n", ndewarp, dewa->maxpage); - fprintf(fp, "sampling = %d, redfactor = %d, minlines = %d, maxdist = %d\n", - dewa->sampling, dewa->redfactor, dewa->minlines, dewa->maxdist); - fprintf(fp, - "max_linecurv = %d, min_diff_linecurv = %d, max_diff_linecurv = %d\n", - dewa->max_linecurv, dewa->min_diff_linecurv, dewa->max_diff_linecurv); - fprintf(fp, - "max_edgeslope = %d, max_edgecurv = %d, max_diff_edgecurv = %d\n", - dewa->max_edgeslope, dewa->max_edgecurv, dewa->max_diff_edgecurv); - fprintf(fp, "fullmodel = %d\n", dewa->useboth); - for (i = 0; i < ndewarp; i++) { - numaGetIValue(dewa->namodels, i, &pageno); - dewarpWriteStream(fp, dewarpaGetDewarp(dewa, pageno)); - } - - return 0; -} - - -/*! - * \brief dewarpaWriteMem() - * - * \param[out] pdata data of serialized dewarpa (not ascii) - * \param[out] psize size of returned data - * \param[in] dewa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes a dewarpa in memory and puts the result in a buffer.
- * 
- */ -l_ok -dewarpaWriteMem(l_uint8 **pdata, - size_t *psize, - L_DEWARPA *dewa) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("dewarpaWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = dewarpaWriteStream(fp, dewa); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = dewarpaWriteStream(fp, dewa); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dewarp2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dewarp2.c deleted file mode 100644 index a5f9e3e2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dewarp2.c +++ /dev/null @@ -1,1918 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file dewarp2.c - *
- *
- *    Build the page disparity model
- *
- *      Build basic page disparity model
- *          l_int32            dewarpBuildPageModel()
- *          l_int32            dewarpFindVertDisparity()
- *          l_int32            dewarpFindHorizDisparity()
- *          PTAA              *dewarpGetTextlineCenters()
- *          static PTA        *dewarpGetMeanVerticals()
- *          PTAA              *dewarpRemoveShortLines()
- *          static l_int32     dewarpGetLineEndPoints()
- *          static l_int32     dewarpFilterLineEndPoints()
- *          static PTA        *dewarpRemoveBadEndPoints()
- *          static l_int32     dewarpIsLineCoverageValid()
- *          static l_int32     dewarpQuadraticLSF()
- *
- *      Build disparity model for slope near binding
- *          l_int32            dewarpFindHorizSlopeDisparity()
- *
- *      Build the line disparity model
- *          l_int32            dewarpBuildLineModel()
- *
- *      Query model status
- *          l_int32            dewarpaModelStatus()
- *
- *      Rendering helpers
- *          static l_int32     pixRenderMidYs()
- *          static l_int32     pixRenderHorizEndPoints
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static PTA *dewarpGetMeanVerticals(PIX *pixs, l_int32 x, l_int32 y); -static l_int32 dewarpGetLineEndPoints(l_int32 h, PTAA *ptaa, PTA **pptal, - PTA **pptar); -static l_int32 dewarpFilterLineEndPoints(L_DEWARP *dew, PTA *ptal1, PTA *ptar1, - PTA **pptal2, PTA **pptar2); -static PTA *dewarpRemoveBadEndPoints(l_int32 w, PTA *ptas); -static l_int32 dewarpIsLineCoverageValid(PTAA *ptaa2, l_int32 h, - l_int32 *pntop, l_int32 *pnbot, - l_int32 *pytop, l_int32 *pybot); -static l_int32 dewarpQuadraticLSF(PTA *ptad, l_float32 *pa, l_float32 *pb, - l_float32 *pc, l_float32 *pmederr); -static l_int32 pixRenderMidYs(PIX *pixs, NUMA *namidys, l_int32 linew); -static l_int32 pixRenderHorizEndPoints(PIX *pixs, PTA *ptal, PTA *ptar, - l_uint32 color); - - -#ifndef NO_CONSOLE_IO -#define DEBUG_TEXTLINE_CENTERS 0 /* set this to 1 for debugging */ -#define DEBUG_SHORT_LINES 0 /* ditto */ -#else -#define DEBUG_TEXTLINE_CENTERS 0 /* always must be 0 */ -#define DEBUG_SHORT_LINES 0 /* ditto */ -#endif /* !NO_CONSOLE_IO */ - - /* Special parameter values for reducing horizontal disparity */ -static const l_float32 MinRatioLinesToHeight = 0.45; -static const l_int32 MinLinesForHoriz1 = 10; /* initially */ -static const l_int32 MinLinesForHoriz2 = 3; /* after, in each half */ -static const l_float32 AllowedWidthFract = 0.05; /* no bigger */ - - -/*----------------------------------------------------------------------* - * Build basic page disparity model * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpBuildPageModel() - * - * \param[in] dew - * \param[in] debugfile use NULL to skip writing this - * \return 0 if OK, 1 if unable to build the model or on error - * - *
- * Notes:
- *      (1) This is the basic function that builds the horizontal and
- *          vertical disparity arrays, which allow determination of the
- *          src pixel in the input image corresponding to each
- *          dest pixel in the dewarped image.
- *      (2) Sets vsuccess = 1 if the vertical disparity array builds.
- *          Always attempts to build the horizontal disparity array,
- *          even if it will not be requested (useboth == 0).
- *          Sets hsuccess = 1 if horizontal disparity builds.
- *      (3) The method is as follows:
- *          (a) Estimate the points along the centers of all the
- *              long textlines.  If there are too few lines, no
- *              disparity models are built.
- *          (b) From the vertical deviation of the lines, estimate
- *              the vertical disparity.
- *          (c) From the ends of the lines, estimate the horizontal
- *              disparity, assuming that the text is made of lines
- *              that are close to left and right justified.
- *          (d) One can also compute an additional contribution to the
- *              horizontal disparity, inferred from slopes of the top
- *              and bottom lines.  We do not do this.
- *      (4) In more detail for the vertical disparity:
- *          (a) Fit a LS quadratic to center locations along each line.
- *              This smooths the curves.
- *          (b) Sample each curve at a regular interval, find the y-value
- *              of the mid-point on each curve, and subtract the sampled
- *              curve value from this value.  This is the vertical
- *              disparity at sampled points along each curve.
- *          (c) Fit a LS quadratic to each set of vertically aligned
- *              disparity samples.  This smooths the disparity values
- *              in the vertical direction.  Then resample at the same
- *              regular interval.  We now have a regular grid of smoothed
- *              vertical disparity valuels.
- *      (5) Once the sampled vertical disparity array is found, it can be
- *          interpolated to get a full resolution vertical disparity map.
- *          This can be applied directly to the src image pixels
- *          to dewarp the image in the vertical direction, making
- *          all textlines horizontal.  Likewise, the horizontal
- *          disparity array is used to left- and right-align the
- *          longest textlines.
- * 
- */ -l_ok -dewarpBuildPageModel(L_DEWARP *dew, - const char *debugfile) -{ -l_int32 linecount, ntop, nbot, ytop, ybot, ret; -PIX *pixs, *pix1, *pix2, *pix3; -PTA *pta; -PTAA *ptaa1, *ptaa2; - - PROCNAME("dewarpBuildPageModel"); - - if (!dew) - return ERROR_INT("dew not defined", procName, 1); - - dew->debug = (debugfile) ? 1 : 0; - dew->vsuccess = dew->hsuccess = 0; - pixs = dew->pixs; - if (debugfile) { - lept_rmdir("lept/dewmod"); /* erase previous images */ - lept_mkdir("lept/dewmod"); - pixDisplayWithTitle(pixs, 0, 0, "pixs", 1); - pixWriteDebug("/tmp/lept/dewmod/0010.png", pixs, IFF_PNG); - } - - /* Make initial estimate of centers of textlines */ - ptaa1 = dewarpGetTextlineCenters(pixs, debugfile || DEBUG_TEXTLINE_CENTERS); - if (!ptaa1) { - L_WARNING("textline centers not found; model not built\n", procName); - return 1; - } - if (debugfile) { - pix1 = pixConvertTo32(pixs); - pta = generatePtaFilledCircle(1); - pix2 = pixGenerateFromPta(pta, 5, 5); - pix3 = pixDisplayPtaaPattern(NULL, pix1, ptaa1, pix2, 2, 2); - pixWriteDebug("/tmp/lept/dewmod/0020.png", pix3, IFF_PNG); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - ptaDestroy(&pta); - } - - /* Remove all lines that are not at least 0.8 times the length - * of the longest line. */ - ptaa2 = dewarpRemoveShortLines(pixs, ptaa1, 0.8, - debugfile || DEBUG_SHORT_LINES); - if (debugfile) { - pix1 = pixConvertTo32(pixs); - pta = generatePtaFilledCircle(1); - pix2 = pixGenerateFromPta(pta, 5, 5); - pix3 = pixDisplayPtaaPattern(NULL, pix1, ptaa2, pix2, 2, 2); - pixWriteDebug("/tmp/lept/dewmod/0030.png", pix3, IFF_PNG); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - ptaDestroy(&pta); - } - ptaaDestroy(&ptaa1); - - /* Verify that there are sufficient "long" lines */ - linecount = ptaaGetCount(ptaa2); - if (linecount < dew->minlines) { - ptaaDestroy(&ptaa2); - L_WARNING("linecount %d < min req'd number of lines (%d) for model\n", - procName, linecount, dew->minlines); - return 1; - } - - /* Verify that the lines have a reasonable coverage of the - * vertical extent of the page. */ - if (dewarpIsLineCoverageValid(ptaa2, pixGetHeight(pixs), - &ntop, &nbot, &ytop, &ybot) == FALSE) { - ptaaDestroy(&ptaa2); - L_WARNING("invalid line coverage: ntop = %d, nbot = %d;" - " spanning [%d ... %d] in height %d\n", procName, - ntop, nbot, ytop, ybot, pixGetHeight(pixs)); - return 1; - } - - /* Get the sampled vertical disparity from the textline centers. - * The disparity array will push pixels vertically so that each - * textline is flat and centered at the y-position of the mid-point. */ - if (dewarpFindVertDisparity(dew, ptaa2, 0) != 0) { - L_WARNING("vertical disparity not built\n", procName); - ptaaDestroy(&ptaa2); - return 1; - } - - /* Get the sampled horizontal disparity from the left and right - * edges of the text. The disparity array will expand the image - * linearly outward to align the text edges vertically. - * Do this even if useboth == 0; we still calculate it even - * if we don't plan to use it. */ - if ((ret = dewarpFindHorizDisparity(dew, ptaa2)) == 0) - L_INFO("hsuccess = 1\n", procName); - - /* Debug output */ - if (debugfile) { - dewarpPopulateFullRes(dew, NULL, 0, 0); - pix1 = fpixRenderContours(dew->fullvdispar, 3.0, 0.15); - pixWriteDebug("/tmp/lept/dewmod/0060.png", pix1, IFF_PNG); - pixDisplay(pix1, 1000, 0); - pixDestroy(&pix1); - if (ret == 0) { - pix1 = fpixRenderContours(dew->fullhdispar, 3.0, 0.15); - pixWriteDebug("/tmp/lept/dewmod/0070.png", pix1, IFF_PNG); - pixDisplay(pix1, 1000, 0); - pixDestroy(&pix1); - } - convertFilesToPdf("/tmp/lept/dewmod", NULL, 135, 1.0, 0, 0, - "Dewarp Build Model", debugfile); - lept_stderr("pdf file: %s\n", debugfile); - } - - ptaaDestroy(&ptaa2); - return 0; -} - - -/*! - * \brief dewarpFindVertDisparity() - * - * \param[in] dew - * \param[in] ptaa unsmoothed lines, not vertically ordered - * \param[in] rotflag 0 if using dew->pixs; 1 if rotated by 90 degrees cw - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This starts with points along the centers of textlines.
- *          It does quadratic fitting (and smoothing), first along the
- *          lines and then in the vertical direction, to generate
- *          the sampled vertical disparity map.  This can then be
- *          interpolated to full resolution and used to remove
- *          the vertical line warping.
- *      (2) Use %rotflag == 1 if you are dewarping vertical lines, as
- *          is done in dewarpBuildLineModel().  The usual case is for
- *          %rotflag == 0.
- *      (3) Note that this builds a vertical disparity model (VDM), but
- *          does not check it against constraints for validity.
- *          Constraint checking is done after building the models,
- *          and before inserting reference models.
- *      (4) This sets the vsuccess flag to 1 on success.
- *      (5) Pix debug output goes to /tmp/dewvert/ for collection into
- *          a pdf.  Non-pix debug output goes to /tmp.
- * 
- */ -l_ok -dewarpFindVertDisparity(L_DEWARP *dew, - PTAA *ptaa, - l_int32 rotflag) -{ -l_int32 i, j, nlines, npts, nx, ny, sampling; -l_float32 c0, c1, c2, x, y, midy, val, medval, meddev, minval, maxval; -l_float32 *famidys; -NUMA *nax, *nafit, *nacurve0, *nacurve1, *nacurves; -NUMA *namidy, *namidys, *namidysi; -PIX *pix1, *pix2, *pixcirc, *pixdb; -PTA *pta, *ptad, *ptacirc; -PTAA *ptaa0, *ptaa1, *ptaa2, *ptaa3, *ptaa4, *ptaa5, *ptaat; -FPIX *fpix; - - PROCNAME("dewarpFindVertDisparity"); - - if (!dew) - return ERROR_INT("dew not defined", procName, 1); - dew->vsuccess = 0; - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 1); - - if (dew->debug) L_INFO("finding vertical disparity\n", procName); - - /* Do quadratic fit to smooth each line. A single quadratic - * over the entire width of the line appears to be sufficient. - * Quartics tend to overfit to noise. Each line is thus - * represented by three coefficients: y(x) = c2 * x^2 + c1 * x + c0. - * Using the coefficients, sample each fitted curve uniformly - * across the full width of the image. The result is in ptaa0. */ - sampling = dew->sampling; - nx = (rotflag) ? dew->ny : dew->nx; - ny = (rotflag) ? dew->nx : dew->ny; - nlines = ptaaGetCount(ptaa); - dew->nlines = nlines; - ptaa0 = ptaaCreate(nlines); - nacurve0 = numaCreate(nlines); /* stores curvature coeff c2 */ - pixdb = (rotflag) ? pixRotateOrth(dew->pixs, 1) : pixClone(dew->pixs); - for (i = 0; i < nlines; i++) { /* for each line */ - pta = ptaaGetPta(ptaa, i, L_CLONE); - ptaGetQuadraticLSF(pta, &c2, &c1, &c0, NULL); - numaAddNumber(nacurve0, c2); - ptad = ptaCreate(nx); - for (j = 0; j < nx; j++) { /* uniformly sampled in x */ - x = j * sampling; - applyQuadraticFit(c2, c1, c0, x, &y); - ptaAddPt(ptad, x, y); - } - ptaaAddPta(ptaa0, ptad, L_INSERT); - ptaDestroy(&pta); - } - if (dew->debug) { - lept_mkdir("lept/dewarp"); - lept_mkdir("lept/dewdebug"); - lept_mkdir("lept/dewmod"); - ptaat = ptaaCreate(nlines); - for (i = 0; i < nlines; i++) { - pta = ptaaGetPta(ptaa, i, L_CLONE); - ptaGetArrays(pta, &nax, NULL); - ptaGetQuadraticLSF(pta, NULL, NULL, NULL, &nafit); - ptad = ptaCreateFromNuma(nax, nafit); - ptaaAddPta(ptaat, ptad, L_INSERT); - ptaDestroy(&pta); - numaDestroy(&nax); - numaDestroy(&nafit); - } - pix1 = pixConvertTo32(pixdb); - pta = generatePtaFilledCircle(1); - pixcirc = pixGenerateFromPta(pta, 5, 5); - pix2 = pixDisplayPtaaPattern(NULL, pix1, ptaat, pixcirc, 2, 2); - pixWriteDebug("/tmp/lept/dewmod/0041.png", pix2, IFF_PNG); - pixDestroy(&pix1); - pixDestroy(&pix2); - ptaDestroy(&pta); - pixDestroy(&pixcirc); - ptaaDestroy(&ptaat); - } - - /* Remove lines with outlier curvatures. - * Note that this is just looking for internal consistency in - * the line curvatures. It is not rejecting lines based on - * the magnitude of the curvature. That is done when constraints - * are applied for valid models. */ - numaGetMedianDevFromMedian(nacurve0, &medval, &meddev); - L_INFO("\nPage %d\n", procName, dew->pageno); - L_INFO("Pass 1: Curvature: medval = %f, meddev = %f\n", - procName, medval, meddev); - ptaa1 = ptaaCreate(nlines); - nacurve1 = numaCreate(nlines); - for (i = 0; i < nlines; i++) { /* for each line */ - numaGetFValue(nacurve0, i, &val); - if (L_ABS(val - medval) > 7.0 * meddev) /* TODO: reduce to ~ 3.0 */ - continue; - pta = ptaaGetPta(ptaa0, i, L_CLONE); - ptaaAddPta(ptaa1, pta, L_INSERT); - numaAddNumber(nacurve1, val); - } - nlines = ptaaGetCount(ptaa1); - numaDestroy(&nacurve0); - - /* Save the min and max curvature (in micro-units) */ - numaGetMin(nacurve1, &minval, NULL); - numaGetMax(nacurve1, &maxval, NULL); - dew->mincurv = lept_roundftoi(1000000. * minval); - dew->maxcurv = lept_roundftoi(1000000. * maxval); - L_INFO("Pass 2: Min/max curvature = (%d, %d)\n", procName, - dew->mincurv, dew->maxcurv); - - /* Find and save the y values at the mid-points in each curve. - * If the slope is zero anywhere, it will typically be here. */ - namidy = numaCreate(nlines); - for (i = 0; i < nlines; i++) { - pta = ptaaGetPta(ptaa1, i, L_CLONE); - npts = ptaGetCount(pta); - ptaGetPt(pta, npts / 2, NULL, &midy); - numaAddNumber(namidy, midy); - ptaDestroy(&pta); - } - - /* Sort the lines in ptaa1c by their vertical position, going down */ - namidysi = numaGetSortIndex(namidy, L_SORT_INCREASING); - namidys = numaSortByIndex(namidy, namidysi); - nacurves = numaSortByIndex(nacurve1, namidysi); - numaDestroy(&dew->namidys); /* in case previously made */ - numaDestroy(&dew->nacurves); - dew->namidys = namidys; - dew->nacurves = nacurves; - ptaa2 = ptaaSortByIndex(ptaa1, namidysi); - numaDestroy(&namidy); - numaDestroy(&nacurve1); - numaDestroy(&namidysi); - if (dew->debug) { - numaWriteDebug("/tmp/lept/dewdebug/midys.na", namidys); - numaWriteDebug("/tmp/lept/dewdebug/curves.na", nacurves); - pix1 = pixConvertTo32(pixdb); - ptacirc = generatePtaFilledCircle(5); - pixcirc = pixGenerateFromPta(ptacirc, 11, 11); - srand(3); - pixDisplayPtaaPattern(pix1, pix1, ptaa2, pixcirc, 5, 5); - srand(3); /* use the same colors for text and reference lines */ - pixRenderMidYs(pix1, namidys, 2); - pix2 = (rotflag) ? pixRotateOrth(pix1, 3) : pixClone(pix1); - pixWriteDebug("/tmp/lept/dewmod/0042.png", pix2, IFF_PNG); - pixDisplay(pix2, 0, 0); - ptaDestroy(&ptacirc); - pixDestroy(&pixcirc); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - pixDestroy(&pixdb); - - /* Convert the sampled points in ptaa2 to a sampled disparity with - * with respect to the y value at the mid point in the curve. - * The disparity is the distance the point needs to move; - * plus is downward. */ - ptaa3 = ptaaCreate(nlines); - for (i = 0; i < nlines; i++) { - pta = ptaaGetPta(ptaa2, i, L_CLONE); - numaGetFValue(namidys, i, &midy); - ptad = ptaCreate(nx); - for (j = 0; j < nx; j++) { - ptaGetPt(pta, j, &x, &y); - ptaAddPt(ptad, x, midy - y); - } - ptaaAddPta(ptaa3, ptad, L_INSERT); - ptaDestroy(&pta); - } - if (dew->debug) { - ptaaWriteDebug("/tmp/lept/dewdebug/ptaa3.ptaa", ptaa3, 0); - } - - /* Generate ptaa4 by taking vertical 'columns' from ptaa3. - * We want to fit the vertical disparity on the column to the - * vertical position of the line, which we call 'y' here and - * obtain from namidys. So each pta in ptaa4 is the set of - * vertical disparities down a column of points. The columns - * in ptaa4 are equally spaced in x. */ - ptaa4 = ptaaCreate(nx); - famidys = numaGetFArray(namidys, L_NOCOPY); - for (j = 0; j < nx; j++) { - pta = ptaCreate(nlines); - for (i = 0; i < nlines; i++) { - y = famidys[i]; - ptaaGetPt(ptaa3, i, j, NULL, &val); /* disparity value */ - ptaAddPt(pta, y, val); - } - ptaaAddPta(ptaa4, pta, L_INSERT); - } - if (dew->debug) { - ptaaWriteDebug("/tmp/lept/dewdebug/ptaa4.ptaa", ptaa4, 0); - } - - /* Do quadratic fit vertically on each of the pixel columns - * in ptaa4, for the vertical displacement (which identifies the - * src pixel(s) for each dest pixel) as a function of y (the - * y value of the mid-points for each line). Then generate - * ptaa5 by sampling the fitted vertical displacement on a - * regular grid in the vertical direction. Each pta in ptaa5 - * gives the vertical displacement for regularly sampled y values - * at a fixed x. */ - ptaa5 = ptaaCreate(nx); /* uniformly sampled across full height of image */ - for (j = 0; j < nx; j++) { /* for each column */ - pta = ptaaGetPta(ptaa4, j, L_CLONE); - ptaGetQuadraticLSF(pta, &c2, &c1, &c0, NULL); - ptad = ptaCreate(ny); - for (i = 0; i < ny; i++) { /* uniformly sampled in y */ - y = i * sampling; - applyQuadraticFit(c2, c1, c0, y, &val); - ptaAddPt(ptad, y, val); - } - ptaaAddPta(ptaa5, ptad, L_INSERT); - ptaDestroy(&pta); - } - if (dew->debug) { - ptaaWriteDebug("/tmp/lept/dewdebug/ptaa5.ptaa", ptaa5, 0); - convertFilesToPdf("/tmp/lept/dewmod", "004", 135, 1.0, 0, 0, - "Dewarp Vert Disparity", - "/tmp/lept/dewarp/vert_disparity.pdf"); - lept_stderr("pdf file: /tmp/lept/dewarp/vert_disparity.pdf\n"); - } - - /* Save the result in a fpix at the specified subsampling */ - fpix = fpixCreate(nx, ny); - for (i = 0; i < ny; i++) { - for (j = 0; j < nx; j++) { - ptaaGetPt(ptaa5, j, i, NULL, &val); - fpixSetPixel(fpix, j, i, val); - } - } - dew->sampvdispar = fpix; - dew->vsuccess = 1; - - ptaaDestroy(&ptaa0); - ptaaDestroy(&ptaa1); - ptaaDestroy(&ptaa2); - ptaaDestroy(&ptaa3); - ptaaDestroy(&ptaa4); - ptaaDestroy(&ptaa5); - return 0; -} - - -/*! - * \brief dewarpFindHorizDisparity() - * - * \param[in] dew - * \param[in] ptaa unsmoothed lines, not vertically ordered - * \return 0 if OK, 1 if horizontal disparity array is not built, or on error - * - *
- * Notes:
- *      (1) This builds a horizontal disparity model (HDM), but
- *          does not check it against constraints for validity.
- *          Constraint checking is done at rendering time.
- *      (2) Horizontal disparity is not required for a successful model;
- *          only the vertical disparity is required.  This will not be
- *          called if the function to build the vertical disparity fails.
- *      (3) This sets the hsuccess flag to 1 on success.
- *      (4) Internally in ptal1, ptar1, ptal2, ptar2: x and y are reversed,
- *          so the 'y' value is horizontal distance across the image width.
- *      (5) Debug output goes to /tmp/lept/dewmod/ for collection into a pdf.
- * 
- */ -l_ok -dewarpFindHorizDisparity(L_DEWARP *dew, - PTAA *ptaa) -{ -l_int32 i, j, h, nx, ny, sampling, ret; -l_float32 c0, c1, cl0, cl1, cl2, cr0, cr1, cr2; -l_float32 x, y, refl, refr; -l_float32 val, mederr; -NUMA *nald, *nard; -PIX *pix1; -PTA *ptal1, *ptar1; /* left/right end points of lines; initial */ -PTA *ptal2, *ptar2; /* left/right end points; after filtering */ -PTA *ptal3, *ptar3; /* left and right block, fitted, uniform spacing */ -PTA *pta, *ptat, *pta1, *pta2; -PTAA *ptaah; -FPIX *fpix; - - PROCNAME("dewarpFindHorizDisparity"); - - if (!dew) - return ERROR_INT("dew not defined", procName, 1); - dew->hsuccess = 0; - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 1); - - if (dew->debug) L_INFO("finding horizontal disparity\n", procName); - - /* Get the endpoints of the lines, and sort from top to bottom */ - h = pixGetHeight(dew->pixs); - ret = dewarpGetLineEndPoints(h, ptaa, &ptal1, &ptar1); - if (ret) { - L_INFO("Horiz disparity not built\n", procName); - return 1; - } - if (dew->debug) { - lept_mkdir("lept/dewdebug"); - lept_mkdir("lept/dewarp"); - ptaWriteDebug("/tmp/lept/dewdebug/endpts_left1.pta", ptal1, 1); - ptaWriteDebug("/tmp/lept/dewdebug/endpts_right1.pta", ptar1, 1); - } - - /* Filter the points by x-location to prevent 2-column images - * from getting confused about left and right endpoints. We - * require valid left points to not be farther than - * 0.20 * (remaining distance to the right edge of the image) - * to the right of the leftmost endpoint, and similarly for - * the right endpoints. (Note: x and y are reversed in the pta.) - * Also require end points to be near the medians in the - * upper and lower halves. */ - ret = dewarpFilterLineEndPoints(dew, ptal1, ptar1, &ptal2, &ptar2); - ptaDestroy(&ptal1); - ptaDestroy(&ptar1); - if (ret) { - L_INFO("Not enough filtered end points\n", procName); - return 1; - } - - /* Do a quadratic fit to the left and right endpoints of the - * longest lines. Each line is represented by 3 coefficients: - * x(y) = c2 * y^2 + c1 * y + c0. - * Using the coefficients, sample each fitted curve uniformly - * along the full height of the image. */ - sampling = dew->sampling; - nx = dew->nx; - ny = dew->ny; - - /* Fit the left side, using quadratic LSF on the set of long - * lines. It is not necessary to use the noisy LSF fit - * function, because we've removed outlier end points by - * selecting the long lines. Then uniformly sample along - * this fitted curve. */ - dewarpQuadraticLSF(ptal2, &cl2, &cl1, &cl0, &mederr); - dew->leftslope = lept_roundftoi(1000. * cl1); /* milli-units */ - dew->leftcurv = lept_roundftoi(1000000. * cl2); /* micro-units */ - L_INFO("Left quad LSF median error = %5.2f\n", procName, mederr); - L_INFO("Left edge slope = %d\n", procName, dew->leftslope); - L_INFO("Left edge curvature = %d\n", procName, dew->leftcurv); - ptal3 = ptaCreate(ny); - for (i = 0; i < ny; i++) { /* uniformly sampled in y */ - y = i * sampling; - applyQuadraticFit(cl2, cl1, cl0, y, &x); - ptaAddPt(ptal3, x, y); - } - - /* Fit the right side in the same way. */ - dewarpQuadraticLSF(ptar2, &cr2, &cr1, &cr0, &mederr); - dew->rightslope = lept_roundftoi(1000.0 * cr1); /* milli-units */ - dew->rightcurv = lept_roundftoi(1000000. * cr2); /* micro-units */ - L_INFO("Right quad LSF median error = %5.2f\n", procName, mederr); - L_INFO("Right edge slope = %d\n", procName, dew->rightslope); - L_INFO("Right edge curvature = %d\n", procName, dew->rightcurv); - ptar3 = ptaCreate(ny); - for (i = 0; i < ny; i++) { /* uniformly sampled in y */ - y = i * sampling; - applyQuadraticFit(cr2, cr1, cr0, y, &x); - ptaAddPt(ptar3, x, y); - } - - if (dew->debug) { - PTA *ptalft, *ptarft; - h = pixGetHeight(dew->pixs); - pta1 = ptaCreate(h); - pta2 = ptaCreate(h); - for (i = 0; i < h; i++) { - applyQuadraticFit(cl2, cl1, cl0, i, &x); - ptaAddPt(pta1, x, i); - applyQuadraticFit(cr2, cr1, cr0, i, &x); - ptaAddPt(pta2, x, i); - } - pix1 = pixDisplayPta(NULL, dew->pixs, pta1); - pixDisplayPta(pix1, pix1, pta2); - pixRenderHorizEndPoints(pix1, ptal2, ptar2, 0xff000000); - pixDisplay(pix1, 600, 800); - pixWriteDebug("/tmp/lept/dewmod/0051.png", pix1, IFF_PNG); - pixDestroy(&pix1); - - pix1 = pixDisplayPta(NULL, dew->pixs, pta1); - pixDisplayPta(pix1, pix1, pta2); - ptalft = ptaTranspose(ptal3); - ptarft = ptaTranspose(ptar3); - pixRenderHorizEndPoints(pix1, ptalft, ptarft, 0x0000ff00); - pixDisplay(pix1, 800, 800); - pixWriteDebug("/tmp/lept/dewmod/0052.png", pix1, IFF_PNG); - convertFilesToPdf("/tmp/lept/dewmod", "005", 135, 1.0, 0, 0, - "Dewarp Horiz Disparity", - "/tmp/lept/dewarp/horiz_disparity.pdf"); - lept_stderr("pdf file: /tmp/lept/dewarp/horiz_disparity.pdf\n"); - pixDestroy(&pix1); - ptaDestroy(&pta1); - ptaDestroy(&pta2); - ptaDestroy(&ptalft); - ptaDestroy(&ptarft); - } - - /* Find the x value at the midpoints (in y) of the two vertical lines, - * ptal3 and ptar3. These are the reference values for each of the - * lines. Then use the difference between the these midpoint - * values and the actual x coordinates of the lines to represent - * the horizontal disparity (nald, nard) on the vertical lines - * for the sampled y values. */ - ptaGetPt(ptal3, ny / 2, &refl, NULL); - ptaGetPt(ptar3, ny / 2, &refr, NULL); - nald = numaCreate(ny); - nard = numaCreate(ny); - for (i = 0; i < ny; i++) { - ptaGetPt(ptal3, i, &x, NULL); - numaAddNumber(nald, refl - x); - ptaGetPt(ptar3, i, &x, NULL); - numaAddNumber(nard, refr - x); - } - - /* Now for each pair of sampled values of the two lines (at the - * same value of y), do a linear interpolation to generate - * the horizontal disparity on all sampled points between them. */ - ptaah = ptaaCreate(ny); - for (i = 0; i < ny; i++) { - pta = ptaCreate(2); - numaGetFValue(nald, i, &val); - ptaAddPt(pta, refl, val); - numaGetFValue(nard, i, &val); - ptaAddPt(pta, refr, val); - ptaGetLinearLSF(pta, &c1, &c0, NULL); /* horiz disparity along line */ - ptat = ptaCreate(nx); - for (j = 0; j < nx; j++) { - x = j * sampling; - applyLinearFit(c1, c0, x, &val); - ptaAddPt(ptat, x, val); - } - ptaaAddPta(ptaah, ptat, L_INSERT); - ptaDestroy(&pta); - } - numaDestroy(&nald); - numaDestroy(&nard); - - /* Save the result in a fpix at the specified subsampling */ - fpix = fpixCreate(nx, ny); - for (i = 0; i < ny; i++) { - for (j = 0; j < nx; j++) { - ptaaGetPt(ptaah, i, j, NULL, &val); - fpixSetPixel(fpix, j, i, val); - } - } - dew->samphdispar = fpix; - dew->hsuccess = 1; - ptaDestroy(&ptal2); - ptaDestroy(&ptar2); - ptaDestroy(&ptal3); - ptaDestroy(&ptar3); - ptaaDestroy(&ptaah); - return 0; -} - - -/*! - * \brief dewarpGetTextlineCenters() - * - * \param[in] pixs 1 bpp - * \param[in] debugflag 1 for debug output - * \return ptaa of center values of textlines - * - *
- * Notes:
- *      (1) This in general does not have a point for each value
- *          of x, because there will be gaps between words.
- *          It doesn't matter because we will fit a quadratic to the
- *          points that we do have.
- * 
- */ -PTAA * -dewarpGetTextlineCenters(PIX *pixs, - l_int32 debugflag) -{ -char buf[64]; -l_int32 i, w, h, bx, by, nsegs, csize1, csize2; -BOXA *boxa; -PIX *pix1, *pix2; -PIXA *pixa1, *pixa2; -PTA *pta; -PTAA *ptaa; - - PROCNAME("dewarpGetTextlineCenters"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PTAA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - - if (debugflag) L_INFO("finding text line centers\n", procName); - - /* Filter to solidify the text lines within the x-height region, - * and to remove most of the ascenders and descenders. - * We start with a small vertical opening to remove noise beyond - * the line that can cause an error in the line end points. - * The small closing (csize1) is used to bridge the gaps between - * letters. The large closing (csize2) bridges the gaps between - * words; using 1/30 of the page width usually suffices. */ - csize1 = L_MAX(15, w / 80); - csize2 = L_MAX(40, w / 30); - snprintf(buf, sizeof(buf), "o1.3 + c%d.1 + o%d.1 + c%d.1", - csize1, csize1, csize2); - pix1 = pixMorphSequence(pixs, buf, 0); - - /* Remove the components (e.g., embedded images) that have - * long vertical runs (>= 50 pixels). You can't use bounding - * boxes because connected component b.b. of lines can be quite - * tall due to slope and curvature. */ - pix2 = pixMorphSequence(pix1, "e1.50", 0); /* seed */ - pixSeedfillBinary(pix2, pix2, pix1, 8); /* tall components */ - pixXor(pix2, pix2, pix1); /* remove tall */ - - if (debugflag) { - lept_mkdir("lept/dewmod"); - pixWriteDebug("/tmp/lept/dewmod/0011.tif", pix1, IFF_TIFF_G4); - pixDisplayWithTitle(pix1, 0, 600, "pix1", 1); - pixWriteDebug("/tmp/lept/dewmod/0012.tif", pix2, IFF_TIFF_G4); - pixDisplayWithTitle(pix2, 0, 800, "pix2", 1); - } - pixDestroy(&pix1); - - /* Get the 8-connected components ... */ - boxa = pixConnComp(pix2, &pixa1, 8); - pixDestroy(&pix2); - boxaDestroy(&boxa); - if (pixaGetCount(pixa1) == 0) { - pixaDestroy(&pixa1); - return NULL; - } - - /* ... and remove the short width and very short height c.c */ - pixa2 = pixaSelectBySize(pixa1, 100, 4, L_SELECT_IF_BOTH, - L_SELECT_IF_GT, NULL); - if ((nsegs = pixaGetCount(pixa2)) == 0) { - pixaDestroy(&pixa1); - pixaDestroy(&pixa2); - return NULL; - } - if (debugflag) { - pix2 = pixaDisplay(pixa2, w, h); - pixWriteDebug("/tmp/lept/dewmod/0013.tif", pix2, IFF_TIFF_G4); - pixDisplayWithTitle(pix2, 0, 1000, "pix2", 1); - pixDestroy(&pix2); - } - - /* For each c.c., get the weighted center of each vertical column. - * The result is a set of points going approximately through - * the center of the x-height part of the text line. */ - ptaa = ptaaCreate(nsegs); - for (i = 0; i < nsegs; i++) { - pixaGetBoxGeometry(pixa2, i, &bx, &by, NULL, NULL); - pix2 = pixaGetPix(pixa2, i, L_CLONE); - pta = dewarpGetMeanVerticals(pix2, bx, by); - ptaaAddPta(ptaa, pta, L_INSERT); - pixDestroy(&pix2); - } - if (debugflag) { - pix1 = pixCreateTemplate(pixs); - pix2 = pixDisplayPtaa(pix1, ptaa); - pixWriteDebug("/tmp/lept/dewmod/0014.tif", pix2, IFF_PNG); - pixDisplayWithTitle(pix2, 0, 1200, "pix3", 1); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - pixaDestroy(&pixa1); - pixaDestroy(&pixa2); - return ptaa; -} - - -/*! - * \brief dewarpGetMeanVerticals() - * - * \param[in] pixs 1 bpp, single c.c. - * \param[in] x,y location of UL corner of pixs, relative to page image - * \return pta (mean y-values in component for each x-value, - * both translated by (x,y - */ -static PTA * -dewarpGetMeanVerticals(PIX *pixs, - l_int32 x, - l_int32 y) -{ -l_int32 w, h, i, j, wpl, sum, count; -l_uint32 *line, *data; -PTA *pta; - - PROCNAME("pixGetMeanVerticals"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PTA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - pta = ptaCreate(w); - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - for (j = 0; j < w; j++) { - line = data; - sum = count = 0; - for (i = 0; i < h; i++) { - if (GET_DATA_BIT(line, j) == 1) { - sum += i; - count += 1; - } - line += wpl; - } - if (count == 0) continue; - ptaAddPt(pta, x + j, y + (sum / count)); - } - - return pta; -} - - -/*! - * \brief dewarpRemoveShortLines() - * - * \param[in] pixs 1 bpp - * \param[in] ptaas input lines - * \param[in] fract minimum fraction of longest line to keep - * \param[in] debugflag - * \return ptaad containing only lines of sufficient length, - * or NULL on error - */ -PTAA * -dewarpRemoveShortLines(PIX *pixs, - PTAA *ptaas, - l_float32 fract, - l_int32 debugflag) -{ -l_int32 w, n, i, index, maxlen, len; -l_float32 minx, maxx; -NUMA *na, *naindex; -PIX *pix1, *pix2; -PTA *pta; -PTAA *ptaad; - - PROCNAME("dewarpRemoveShortLines"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PTAA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (!ptaas) - return (PTAA *)ERROR_PTR("ptaas undefined", procName, NULL); - - pixGetDimensions(pixs, &w, NULL, NULL); - n = ptaaGetCount(ptaas); - ptaad = ptaaCreate(n); - na = numaCreate(n); - for (i = 0; i < n; i++) { - pta = ptaaGetPta(ptaas, i, L_CLONE); - ptaGetRange(pta, &minx, &maxx, NULL, NULL); - numaAddNumber(na, maxx - minx + 1); - ptaDestroy(&pta); - } - - /* Sort by length and find all that are long enough */ - naindex = numaGetSortIndex(na, L_SORT_DECREASING); - numaGetIValue(naindex, 0, &index); - numaGetIValue(na, index, &maxlen); - if (maxlen < 0.5 * w) - L_WARNING("lines are relatively short\n", procName); - pta = ptaaGetPta(ptaas, index, L_CLONE); - ptaaAddPta(ptaad, pta, L_INSERT); - for (i = 1; i < n; i++) { - numaGetIValue(naindex, i, &index); - numaGetIValue(na, index, &len); - if (len < fract * maxlen) break; - pta = ptaaGetPta(ptaas, index, L_CLONE); - ptaaAddPta(ptaad, pta, L_INSERT); - } - - if (debugflag) { - pix1 = pixCopy(NULL, pixs); - pix2 = pixDisplayPtaa(pix1, ptaad); - pixDisplayWithTitle(pix2, 0, 200, "pix4", 1); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - numaDestroy(&na); - numaDestroy(&naindex); - return ptaad; -} - - -/*! - * \brief dewarpGetLineEndPoints() - * - * \param[in] h height of pixs - * \param[in] ptaa lines - * \param[out] pptal left end points of each line - * \param[out] pptar right end points of each line - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) We require that the set of end points extends over 45% of the
- *          height of the input image, to insure good coverage and
- *          avoid extrapolating the curvature too far beyond the
- *          actual textlines.  Large extrapolations are particularly
- *          dangerous if used as a reference model.  We also require
- *          at least 10 lines of text.
- *      (2) We sort the lines from top to bottom (sort by x in the ptas).
- *      (3) For fitting the endpoints, x = f(y), we transpose x and y.
- *          Thus all these ptas have x and y swapped!
- * 
- */ -static l_int32 -dewarpGetLineEndPoints(l_int32 h, - PTAA *ptaa, - PTA **pptal, - PTA **pptar) -{ -l_int32 i, n, npt, x, y; -l_float32 miny, maxy, ratio; -PTA *pta, *ptal1, *ptar1; - - PROCNAME("dewarpGetLineEndPoints"); - - if (!pptal || !pptar) - return ERROR_INT("&ptal and &ptar not both defined", procName, 1); - *pptal = *pptar = NULL; - if (!ptaa) - return ERROR_INT("ptaa undefined", procName, 1); - - /* Are there at least 10 lines? */ - n = ptaaGetCount(ptaa); - if (n < MinLinesForHoriz1) { - L_INFO("only %d lines; too few\n", procName, n); - return 1; - } - - /* Extract the line end points, and transpose x and y values */ - ptal1 = ptaCreate(n); - ptar1 = ptaCreate(n); - for (i = 0; i < n; i++) { - pta = ptaaGetPta(ptaa, i, L_CLONE); - ptaGetIPt(pta, 0, &x, &y); - ptaAddPt(ptal1, y, x); /* transpose */ - npt = ptaGetCount(pta); - ptaGetIPt(pta, npt - 1, &x, &y); - ptaAddPt(ptar1, y, x); /* transpose */ - ptaDestroy(&pta); - } - - /* Use the min and max of the y value on the left side. */ - ptaGetRange(ptal1, &miny, &maxy, NULL, NULL); - ratio = (maxy - miny) / (l_float32)h; - if (ratio < MinRatioLinesToHeight) { - L_INFO("ratio lines to height, %f, too small\n", procName, ratio); - ptaDestroy(&ptal1); - ptaDestroy(&ptar1); - return 1; - } - - /* Sort from top to bottom */ - *pptal = ptaSort(ptal1, L_SORT_BY_X, L_SORT_INCREASING, NULL); - *pptar = ptaSort(ptar1, L_SORT_BY_X, L_SORT_INCREASING, NULL); - ptaDestroy(&ptal1); - ptaDestroy(&ptar1); - return 0; -} - - -/*! - * \brief dewarpFilterLineEndPoints() - * - * \param[in] dew - * \param[in] ptal input left end points of each line - * \param[in] ptar input right end points of each line - * \param[out] pptalf filtered left end points - * \param[out] pptarf filtered right end points - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) Avoid confusion with multiple columns by requiring that line
- *          end points be close enough to leftmost and rightmost end points.
- *          Must have at least 8 points on left and right after this step.
- *      (2) Apply second filtering step, find the median positions in
- *          top and bottom halves, and removing end points that are
- *          displaced too much from these in the x direction.
- *          Must have at least 6 points on left and right after this step.
- *      (3) Reminder: x and y in the pta are transposed; think x = f(y).
- * 
- */ -static l_int32 -dewarpFilterLineEndPoints(L_DEWARP *dew, - PTA *ptal, - PTA *ptar, - PTA **pptalf, - PTA **pptarf) -{ -l_int32 w, i, n; -l_float32 ymin, ymax, xvall, xvalr, yvall, yvalr; -PTA *ptal1, *ptar1, *ptal2, *ptar2; - - PROCNAME("dewarpFilterLineEndPoints"); - if (!ptal || !ptar) - return ERROR_INT("ptal or ptar not defined", procName, 1); - *pptalf = *pptarf = NULL; - - /* First filter for lines near left and right margins */ - w = pixGetWidth(dew->pixs); - ptaGetMinMax(ptal, NULL, &ymin, NULL, NULL); - ptaGetMinMax(ptar, NULL, NULL, NULL, &ymax); - n = ptaGetCount(ptal); /* ptar is the same size; at least 10 */ - ptal1 = ptaCreate(n); - ptar1 = ptaCreate(n); - for (i = 0; i < n; i++) { - ptaGetPt(ptal, i, &xvall, &yvall); - ptaGetPt(ptar, i, &xvalr, &yvalr); - if (yvall < ymin + 0.20 * (w - ymin) && - yvalr > 0.80 * ymax) { - ptaAddPt(ptal1, xvall, yvall); - ptaAddPt(ptar1, xvalr, yvalr); - } - } - if (dew->debug) { - ptaWriteDebug("/tmp/lept/dewdebug/endpts_left2.pta", ptal1, 1); - ptaWriteDebug("/tmp/lept/dewdebug/endpts_right2.pta", ptar1, 1); - } - - n = L_MIN(ptaGetCount(ptal1), ptaGetCount(ptar1)); - if (n < MinLinesForHoriz1 - 2) { - ptaDestroy(&ptal1); - ptaDestroy(&ptar1); - L_INFO("First filter: only %d endpoints; needed 8\n", procName, n); - return 1; - } - - /* Remove outlier points */ - ptal2 = dewarpRemoveBadEndPoints(w, ptal1); - ptar2 = dewarpRemoveBadEndPoints(w, ptar1); - ptaDestroy(&ptal1); - ptaDestroy(&ptar1); - if (!ptal2 || !ptar2) { - ptaDestroy(&ptal2); - ptaDestroy(&ptar2); - L_INFO("Second filter: too few endpoints left after outliers removed\n", - procName); - return 1; - } - if (dew->debug) { - ptaWriteDebug("/tmp/lept/dewdebug/endpts_left3.pta", ptal2, 1); - ptaWriteDebug("/tmp/lept/dewdebug/endpts_right3.pta", ptar2, 1); - } - - *pptalf = ptal2; - *pptarf = ptar2; - return 0; -} - - -/*! - * \brief dewarpRemoveBadEndPoints() - * - * \param[in] w width of input image - * \param[in] ptas left or right line end points - * \return ptad filtered left or right end points, or NULL on error. - * - *
- * Notes:
- *      (1) The input set is sorted by line position (x value).
- *          Break into two (upper and lower); for each find the median
- *          horizontal (y value), and remove all points farther than
- *          a fraction of the image width from this.  Make sure each
- *          part still has at least 3 points, and join the two sections
- *          before returning.
- *      (2) Reminder: x and y in the pta are transposed; think x = f(y).
- * 
- */ -static PTA * -dewarpRemoveBadEndPoints(l_int32 w, - PTA *ptas) -{ -l_int32 i, n, nu, nd; -l_float32 rval, xval, yval, delta; -PTA *ptau1, *ptau2, *ptad1, *ptad2; - - PROCNAME("dewarpRemoveBadEndPoints"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - - delta = AllowedWidthFract * w; - n = ptaGetCount(ptas); /* will be at least 8 */ - - /* Check the upper half */ - ptau1 = ptaSelectRange(ptas, 0, n / 2); - ptaGetRankValue(ptau1, 0.5, NULL, L_SORT_BY_Y, &rval); - nu = ptaGetCount(ptau1); - ptau2 = ptaCreate(nu); - for (i = 0; i < nu; i++) { - ptaGetPt(ptau1, i, &xval, &yval); /* transposed */ - if (L_ABS(rval - yval) <= delta) - ptaAddPt(ptau2, xval, yval); - } - ptaDestroy(&ptau1); - if (ptaGetCount(ptau2) < MinLinesForHoriz2) { - ptaDestroy(&ptau2); - L_INFO("Second filter: upper set is too small after outliers removed\n", - procName); - return NULL; - } - - /* Check the lower half */ - ptad1 = ptaSelectRange(ptas, n / 2 + 1, -1); - ptaGetRankValue(ptad1, 0.5, NULL, L_SORT_BY_Y, &rval); - nd = ptaGetCount(ptad1); - ptad2 = ptaCreate(nd); - for (i = 0; i < nd; i++) { - ptaGetPt(ptad1, i, &xval, &yval); /* transposed */ - if (L_ABS(rval - yval) <= delta) - ptaAddPt(ptad2, xval, yval); - } - ptaDestroy(&ptad1); - if (ptaGetCount(ptad2) < MinLinesForHoriz2) { - ptaDestroy(&ptau2); - ptaDestroy(&ptad2); - L_INFO("Second filter: lower set is too small after outliers removed\n", - procName); - return NULL; - } - - ptaJoin(ptau2, ptad2, 0, -1); - ptaDestroy(&ptad2); - return ptau2; -} - - -/*! - * \brief dewarpIsLineCoverageValid() - * - * \param[in] ptaa of validated lines - * \param[in] h height of pix - * \param[out] pntop number of lines in top half - * \param[out] pnbot number of lines in bottom half - * \param[out] pytop location of top line - * \param[out] pybot location of bottom line - * \return 1 if coverage is valid, 0 if not or on error. - * - *
- * Notes:
- *      (1) The criterion for valid coverage is:
- *          (a) there must be at least 4 lines in each half (top and bottom)
- *              of the image.
- *          (b) the coverage must be at least 50% of the image height
- * 
- */ -static l_int32 -dewarpIsLineCoverageValid(PTAA *ptaa, - l_int32 h, - l_int32 *pntop, - l_int32 *pnbot, - l_int32 *pytop, - l_int32 *pybot) -{ -l_int32 i, n, iy, both_halves, ntop, nbot, ytop, ybot, nmin; -l_float32 y, fraction; -NUMA *na; - - PROCNAME("dewarpIsLineCoverageValid"); - - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 0); - if ((n = ptaaGetCount(ptaa)) == 0) - return ERROR_INT("ptaa empty", procName, 0); - if (h <= 0) - return ERROR_INT("invalid h", procName, 0); - if (!pntop || !pnbot) - return ERROR_INT("&ntop and &nbot not defined", procName, 0); - if (!pytop || !pybot) - return ERROR_INT("&ytop and &ybot not defined", procName, 0); - - na = numaCreate(n); - for (i = 0; i < n; i++) { - ptaaGetPt(ptaa, i, 0, NULL, &y); - numaAddNumber(na, y); - } - numaSort(na, na, L_SORT_INCREASING); - for (i = 0, ntop = 0; i < n; i++) { - numaGetIValue(na, i, &iy); - if (i == 0) ytop = iy; - if (i == n - 1) ybot = iy; - if (iy < 0.5 * h) - ntop++; - } - numaDestroy(&na); - nbot = n - ntop; - *pntop = ntop; - *pnbot = nbot; - *pytop = ytop; - *pybot = ybot; - nmin = 4; /* minimum number of lines required in each half */ - both_halves = (ntop >= nmin) && (nbot >= nmin); - fraction = (l_float32)(ybot - ytop) / (l_float32)h; - if (both_halves && fraction > 0.50) - return 1; - return 0; -} - - -/*! - * \brief dewarpQuadraticLSF() - * - * \param[in] ptad left or right end points of longest lines - * \param[out] pa coeff a of LSF: y = ax^2 + bx + c - * \param[out] pb coeff b of LSF: y = ax^2 + bx + c - * \param[out] pc coeff c of LSF: y = ax^2 + bx + c - * \param[out] pmederr [optional] median error - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) This is used for finding the left or right sides of
- *          the text block, computed as a quadratic curve.
- *          Only the longest lines are input, so there are
- *          no outliers.
- *      (2) The ptas for the end points all have x and y swapped.
- * 
- */ -static l_int32 -dewarpQuadraticLSF(PTA *ptad, - l_float32 *pa, - l_float32 *pb, - l_float32 *pc, - l_float32 *pmederr) -{ -l_int32 i, n; -l_float32 x, y, xp, c0, c1, c2; -NUMA *naerr; - - PROCNAME("dewarpQuadraticLSF"); - - if (pmederr) *pmederr = 0.0; - if (!pa || !pb || !pc) - return ERROR_INT("not all ptrs are defined", procName, 1); - *pa = *pb = *pc = 0.0; - if (!ptad) - return ERROR_INT("ptad not defined", procName, 1); - - /* Fit to the longest lines */ - ptaGetQuadraticLSF(ptad, &c2, &c1, &c0, NULL); - *pa = c2; - *pb = c1; - *pc = c0; - - /* Optionally, find the median error */ - if (pmederr) { - n = ptaGetCount(ptad); - naerr = numaCreate(n); - for (i = 0; i < n; i++) { - ptaGetPt(ptad, i, &y, &xp); - applyQuadraticFit(c2, c1, c0, y, &x); - numaAddNumber(naerr, L_ABS(x - xp)); - } - numaGetMedian(naerr, pmederr); - numaDestroy(&naerr); - } - return 0; -} - -/*----------------------------------------------------------------------* - * Build disparity model for slope near binding * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpFindHorizSlopeDisparity() - * - * \param[in] dew - * \param[in] pixb 1 bpp, with vert and horiz disparity removed - * \param[in] fractthresh threshold fractional difference in density - * \param[in] parity 0 if even page, 1 if odd page - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) %fractthresh is a threshold on the fractional difference in stroke
- *          density between between left and right sides.  Process this
- *          disparity only if the absolute value of the fractional
- *          difference equals or exceeds this threshold.
- *      (2) %parity indicates where the binding is: on the left for
- *          %parity == 0 and on the right for %parity == 1.
- *      (3) This takes a 1 bpp %pixb where both vertical and horizontal
- *          disparity have been applied, so the text lines are straight and,
- *          more importantly, the line end points are vertically aligned.
- *          It estimates the foreshortening of the characters on the
- *          binding side, and if significant, computes a one-dimensional
- *          horizontal disparity function to compensate.
- *      (4) The first attempt was to use the average width of the
- *          connected components (c.c.) in vertical slices.  This does not work
- *          reliably, because the horizontal compression of the text is
- *          often accompanied by horizontal joining of c.c.
- *      (5) We use the density of vertical strokes, measured by first using
- *          a vertical opening, which improves the signal.  The result
- *          is relatively insensitive to the size of the opening; we use
- *          a 10-pixel opening.  The relative density is measured by
- *          finding the number of c.c. in a full height sliding window
- *          of width 50 pixels, and compute every 25 pixels.  Similar results
- *          are obtained counting c.c. that either intersect the window
- *          or are fully contained within it.
- *      (6) Debug output goes to /tmp/lept/dewmod/ for collection into a pdf.
- * 
- */ -l_ok -dewarpFindHorizSlopeDisparity(L_DEWARP *dew, - PIX *pixb, - l_float32 fractthresh, - l_int32 parity) -{ -l_int32 i, j, x, n1, n2, nb, ne, count, w, h, ival, prev; -l_int32 istart, iend, first, last, x0, x1, nx, ny; -l_float32 fract, delta, sum, aveval, fval, del, denom; -l_float32 ca, cb, cc, cd, ce, y; -BOX *box; -BOXA *boxa1, *boxa2; -GPLOT *gplot; -NUMA *na1, *na2, *na3, *na4, *nasum; -PIX *pix1; -PTA *pta1; -FPIX *fpix; - - PROCNAME("dewarpFindHorizSlopeDisparity"); - - if (!dew) - return ERROR_INT("dew not defined", procName, 1); - if (!dew->vvalid || !dew->hvalid) - return ERROR_INT("invalid vert or horiz disparity model", procName, 1); - if (!pixb || pixGetDepth(pixb) != 1) - return ERROR_INT("pixb not defined or not 1 bpp", procName, 1); - - if (dew->debug) L_INFO("finding slope horizontal disparity\n", procName); - - /* Find the bounding boxes of the vertical strokes; remove noise */ - pix1 = pixMorphSequence(pixb, "o1.10", 0); - pixDisplay(pix1, 100, 100); - boxa1 = pixConnCompBB(pix1, 4); - boxa2 = boxaSelectBySize(boxa1, 0, 5, L_SELECT_HEIGHT, L_SELECT_IF_GT, - NULL); - nb = boxaGetCount(boxa2); - lept_stderr("number of components: %d\n", nb); - boxaDestroy(&boxa1); - - /* Estimate the horizontal density of vertical strokes */ - na1 = numaCreate(0); - numaSetParameters(na1, 0, 25); - pixGetDimensions(pixb, &w, &h, NULL); - for (x = 0; x + 50 < w; x += 25) { - box = boxCreate(x, 0, 50, h); - boxaContainedInBoxCount(boxa2, box, &count); - numaAddNumber(na1, count); - boxDestroy(&box); - } - if (dew->debug) { - lept_mkdir("lept/dew"); - gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/dew/0091", NULL); - lept_mv("/tmp/lept/dew/0091.png", "lept/dewmod", NULL, NULL); - pixWriteDebug("/tmp/lept/dewmod/0090.png", pix1, IFF_PNG); - } - pixDestroy(&pix1); - boxaDestroy(&boxa2); - - /* Find the left and right end local maxima; if the difference - * is small, quit. */ - n1 = numaGetCount(na1); - prev = 0; - istart = 0; - first = 0; - for (i = 0; i < n1; i++) { - numaGetIValue(na1, i, &ival); - if (ival >= prev) { - prev = ival; - continue; - } else { - first = prev; - istart = i - 1; - break; - } - } - prev = 0; - last = 0; - iend = n1 - 1; - for (i = n1 - 1; i >= 0; i--) { - numaGetIValue(na1, i, &ival); - if (ival >= prev) { - prev = ival; - continue; - } else { - last = prev; - iend = i + 1; - break; - } - } - na2 = numaClipToInterval(na1, istart, iend); - numaDestroy(&na1); - n2 = numaGetCount(na2); - delta = (parity == 0) ? last - first : first - last; - denom = L_MAX(1.0, (l_float32)(L_MIN(first, last))); - fract = (l_float32)delta / denom; - if (dew->debug) { - L_INFO("Slope-disparity: first = %d, last = %d, fract = %7.3f\n", - procName, first, last, fract); - gplotSimple1(na2, GPLOT_PNG, "/tmp/lept/dew/0092", NULL); - lept_mv("/tmp/lept/dew/0092.png", "lept/dewmod", NULL, NULL); - } - if (fract < fractthresh) { - L_INFO("Small slope-disparity: first = %d, last = %d, fract = %7.3f\n", - procName, first, last, fract); - numaDestroy(&na2); - return 0; - } - - /* Find the density far from the binding, and normalize to 1. */ - ne = n2 - n2 % 2; - if (parity == 0) - numaGetSumOnInterval(na2, 0, ne / 2 - 1, &sum); - else /* parity == 1 */ - numaGetSumOnInterval(na2, ne / 2, ne - 1, &sum); - denom = L_MAX(1.0, (l_float32)(ne / 2)); - aveval = sum / denom; - na3 = numaMakeConstant(aveval, n2); - numaArithOp(na2, na2, na3, L_ARITH_DIVIDE); - numaDestroy(&na3); - if (dew->debug) { - L_INFO("Average background density: %5.1f\n", procName, aveval); - gplotSimple1(na2, GPLOT_PNG, "/tmp/lept/dew/0093", NULL); - lept_mv("/tmp/lept/dew/0093.png", "lept/dewmod", NULL, NULL); - } - - /* Fit the normalized density curve to a quartic */ - pta1 = numaConvertToPta1(na2); - ptaWriteStream(stderr, pta1, 0); -/* ptaGetQuadraticLSF(pta1, NULL, NULL, NULL, &na3); */ - ptaGetQuarticLSF(pta1, &ca, &cb, &cc, &cd, &ce, &na3); - ptaGetArrays(pta1, &na4, NULL); - if (dew->debug) { - gplot = gplotSimpleXY1(na4, na3, GPLOT_LINES, GPLOT_PNG, - "/tmp/lept/dew/0094", NULL); - gplotDestroy(&gplot); - lept_mv("/tmp/lept/dew/0094.png", "lept/dewmod", NULL, NULL); - } - ptaDestroy(&pta1); - - /* Integrate from the high point down to 1 (or v.v) to get the - * disparity needed to make the density constant. */ - nasum = numaMakeConstant(0, w); /* area under the curve above 1.0 */ - if (parity == 0) { - for (i = n2 - 1; i >= 0; i--) { - numaGetFValue(na3, i, &fval); - if (fval < 1.0) break; - } - numaGetIValue(na4, i + 1, &x0); - numaGetIValue(na4, n2 - 1, &x1); - numaSetParameters(nasum, x0, 1); - sum = 0.0; - for (x = x0; x < x1; x++) { - applyQuarticFit(ca, cb, cc, cd, ce, (l_float32)x, &y); - sum += (y - 1.0); - numaReplaceNumber(nasum, x, sum); - } - for (x = x1; x < w; x++) - numaReplaceNumber(nasum, x, sum); - } else { /* parity == 1 */ - for (i = 0; i < n2; i++) { - numaGetFValue(na3, i, &fval); - if (fval < 1.0) break; - } - numaGetIValue(na4, 0, &x0); - numaGetIValue(na4, i - 1, &x1); - numaSetParameters(nasum, x0, 1); - sum = 0.0; - for (x = x1; x >= x0; x--) { - applyQuarticFit(ca, cb, cc, cd, ce, (l_float32)x, &y); - sum += (y - 1.0); - numaReplaceNumber(nasum, x, sum); - } - for (x = x0; x >= 0; x--) - numaReplaceNumber(nasum, x, sum); - } - - /* Save the result in a fpix at the specified subsampling */ - nx = dew->nx; - ny = dew->ny; - fpix = fpixCreate(nx, ny); - del = (l_float32)w / (l_float32)nx; - for (i = 0; i < ny; i++) { - for (j = 0; j < nx; j++) { - x = del * j; - numaGetFValue(nasum, x, &fval); - fpixSetPixel(fpix, j, i, fval); - } - } - dew->sampydispar = fpix; - dew->ysuccess = 1; - - numaDestroy(&na2); - numaDestroy(&na3); - numaDestroy(&na4); - numaDestroy(&nasum); - return 0; -} - - -/*----------------------------------------------------------------------* - * Build line disparity model * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpBuildLineModel() - * - * \param[in] dew - * \param[in] opensize size of opening to remove perpendicular lines - * \param[in] debugfile use NULL to skip writing this - * \return 0 if OK, 1 if unable to build the model or on error - * - *
- * Notes:
- *      (1) This builds the horizontal and vertical disparity arrays
- *          for an input of ruled lines, typically for calibration.
- *          In book scanning, you could lay the ruled paper over a page.
- *          Then for that page and several below it, you can use the
- *          disparity correction of the line model to dewarp the pages.
- *      (2) The dew has been initialized with the image of ruled lines.
- *          These lines must be continuous, but we do a small amount
- *          of pre-processing here to insure that.
- *      (3) %opensize is typically about 8.  It must be larger than
- *          the thickness of the lines to be extracted.  This is the
- *          default value, which is applied if %opensize < 3.
- *      (4) Sets vsuccess = 1 and hsuccess = 1 if the vertical and/or
- *          horizontal disparity arrays build.
- *      (5) Similar to dewarpBuildPageModel(), except here the vertical
- *          and horizontal disparity arrays are both built from ruled lines.
- *          See notes there.
- * 
- */ -l_ok -dewarpBuildLineModel(L_DEWARP *dew, - l_int32 opensize, - const char *debugfile) -{ -char buf[64]; -l_int32 i, j, bx, by, ret, nlines; -BOXA *boxa; -PIX *pixs, *pixh, *pixv, *pix, *pix1, *pix2; -PIXA *pixa1, *pixa2; -PTA *pta; -PTAA *ptaa1, *ptaa2; - - PROCNAME("dewarpBuildLineModel"); - - if (!dew) - return ERROR_INT("dew not defined", procName, 1); - if (opensize < 3) { - L_WARNING("opensize should be >= 3; setting to 8\n", procName); - opensize = 8; /* default */ - } - - dew->debug = (debugfile) ? 1 : 0; - dew->vsuccess = dew->hsuccess = 0; - pixs = dew->pixs; - if (debugfile) { - lept_rmdir("lept/dewline"); /* erase previous images */ - lept_mkdir("lept/dewline"); - lept_rmdir("lept/dewmod"); /* erase previous images */ - lept_mkdir("lept/dewmod"); - lept_mkdir("lept/dewarp"); - pixDisplayWithTitle(pixs, 0, 0, "pixs", 1); - pixWriteDebug("/tmp/lept/dewline/001.png", pixs, IFF_PNG); - } - - /* Extract and solidify the horizontal and vertical lines. We use - * the horizontal lines to derive the vertical disparity, and v.v. - * Both disparities are computed using the vertical disparity - * algorithm; the horizontal disparity is found from the - * vertical lines by rotating them clockwise by 90 degrees. - * On the first pass, we compute the horizontal disparity, from - * the vertical lines, by rotating them by 90 degrees (so they - * are horizontal) and computing the vertical disparity on them; - * we rotate the resulting fpix array for the horizontal disparity - * back by -90 degrees. On the second pass, we compute the vertical - * disparity from the horizontal lines in the usual fashion. */ - snprintf(buf, sizeof(buf), "d1.3 + c%d.1 + o%d.1", opensize - 2, opensize); - pixh = pixMorphSequence(pixs, buf, 0); /* horiz */ - snprintf(buf, sizeof(buf), "d3.1 + c1.%d + o1.%d", opensize - 2, opensize); - pix1 = pixMorphSequence(pixs, buf, 0); /* vert */ - pixv = pixRotateOrth(pix1, 1); /* vert rotated to horizontal */ - pixa1 = pixaCreate(2); - pixaAddPix(pixa1, pixv, L_INSERT); /* get horizontal disparity first */ - pixaAddPix(pixa1, pixh, L_INSERT); - pixDestroy(&pix1); - - /*--------------------------------------------------------------*/ - /* Process twice: first for horiz disparity, then for vert */ - /*--------------------------------------------------------------*/ - for (i = 0; i < 2; i++) { - pix = pixaGetPix(pixa1, i, L_CLONE); - pixDisplay(pix, 0, 900); - boxa = pixConnComp(pix, &pixa2, 8); - nlines = boxaGetCount(boxa); - boxaDestroy(&boxa); - if (nlines < dew->minlines) { - L_WARNING("only found %d lines\n", procName, nlines); - pixDestroy(&pix); - pixaDestroy(&pixa1); - continue; - } - - /* Identify the pixels along the skeleton of each line */ - ptaa1 = ptaaCreate(nlines); - for (j = 0; j < nlines; j++) { - pixaGetBoxGeometry(pixa2, j, &bx, &by, NULL, NULL); - pix1 = pixaGetPix(pixa2, j, L_CLONE); - pta = dewarpGetMeanVerticals(pix1, bx, by); - ptaaAddPta(ptaa1, pta, L_INSERT); - pixDestroy(&pix1); - } - pixaDestroy(&pixa2); - if (debugfile) { - pix1 = pixConvertTo32(pix); - pix2 = pixDisplayPtaa(pix1, ptaa1); - snprintf(buf, sizeof(buf), "/tmp/lept/dewline/%03d.png", 2 + 2 * i); - pixWriteDebug(buf, pix2, IFF_PNG); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - /* Remove all lines that are not at least 0.75 times the length - * of the longest line. */ - ptaa2 = dewarpRemoveShortLines(pix, ptaa1, 0.75, DEBUG_SHORT_LINES); - if (debugfile) { - pix1 = pixConvertTo32(pix); - pix2 = pixDisplayPtaa(pix1, ptaa2); - snprintf(buf, sizeof(buf), "/tmp/lept/dewline/%03d.png", 3 + 2 * i); - pixWriteDebug(buf, pix2, IFF_PNG); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - ptaaDestroy(&ptaa1); - nlines = ptaaGetCount(ptaa2); - if (nlines < dew->minlines) { - pixDestroy(&pix); - ptaaDestroy(&ptaa2); - L_WARNING("%d lines: too few to build model\n", procName, nlines); - continue; - } - - /* Get the sampled 'vertical' disparity from the textline - * centers. The disparity array will push pixels vertically - * so that each line is flat and centered at the y-position - * of the mid-point. */ - ret = dewarpFindVertDisparity(dew, ptaa2, 1 - i); - - /* If i == 0, move the result to the horizontal disparity, - * rotating it back by -90 degrees. */ - if (i == 0) { /* horizontal disparity, really */ - if (ret) { - L_WARNING("horizontal disparity not built\n", procName); - } else { - L_INFO("hsuccess = 1\n", procName); - dew->samphdispar = fpixRotateOrth(dew->sampvdispar, 3); - fpixDestroy(&dew->sampvdispar); - if (debugfile) - lept_mv("/tmp/lept/dewarp/vert_disparity.pdf", - "lept/dewarp", "horiz_disparity.pdf", NULL); - } - dew->hsuccess = dew->vsuccess; - dew->vsuccess = 0; - } else { /* i == 1 */ - if (ret) - L_WARNING("vertical disparity not built\n", procName); - else - L_INFO("vsuccess = 1\n", procName); - } - ptaaDestroy(&ptaa2); - pixDestroy(&pix); - } - pixaDestroy(&pixa1); - - /* Debug output */ - if (debugfile) { - if (dew->vsuccess == 1) { - dewarpPopulateFullRes(dew, NULL, 0, 0); - pix1 = fpixRenderContours(dew->fullvdispar, 3.0, 0.15); - pixWriteDebug("/tmp/lept/dewline/006.png", pix1, IFF_PNG); - pixDisplay(pix1, 1000, 0); - pixDestroy(&pix1); - } - if (dew->hsuccess == 1) { - pix1 = fpixRenderContours(dew->fullhdispar, 3.0, 0.15); - pixWriteDebug("/tmp/lept/dewline/007.png", pix1, IFF_PNG); - pixDisplay(pix1, 1000, 0); - pixDestroy(&pix1); - } - convertFilesToPdf("/tmp/lept/dewline", NULL, 135, 1.0, 0, 0, - "Dewarp Build Line Model", debugfile); - lept_stderr("pdf file: %s\n", debugfile); - } - - return 0; -} - - -/*----------------------------------------------------------------------* - * Query model status * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpaModelStatus() - * - * \param[in] dewa - * \param[in] pageno - * \param[out] pvsuccess [optional] 1 on success - * \param[out] phsuccess [optional] 1 on success - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This tests if a model has been built, not if it is valid.
- * 
- */ -l_ok -dewarpaModelStatus(L_DEWARPA *dewa, - l_int32 pageno, - l_int32 *pvsuccess, - l_int32 *phsuccess) -{ -L_DEWARP *dew; - - PROCNAME("dewarpaModelStatus"); - - if (pvsuccess) *pvsuccess = 0; - if (phsuccess) *phsuccess = 0; - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - - if ((dew = dewarpaGetDewarp(dewa, pageno)) == NULL) - return ERROR_INT("dew not retrieved", procName, 1); - if (pvsuccess) *pvsuccess = dew->vsuccess; - if (phsuccess) *phsuccess = dew->hsuccess; - return 0; -} - - -/*----------------------------------------------------------------------* - * Rendering helpers * - *----------------------------------------------------------------------*/ -/*! - * \brief pixRenderMidYs() - * - * \param[in] pixs 32 bpp - * \param[in] namidys y location of reference lines for vertical disparity - * \param[in] linew width of rendered line; typ 2 - * \return 0 if OK, 1 on error - */ -static l_int32 -pixRenderMidYs(PIX *pixs, - NUMA *namidys, - l_int32 linew) -{ -l_int32 i, n, w, yval, rval, gval, bval; -PIXCMAP *cmap; - - PROCNAME("pixRenderMidYs"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!namidys) - return ERROR_INT("namidys not defined", procName, 1); - - w = pixGetWidth(pixs); - n = numaGetCount(namidys); - cmap = pixcmapCreateRandom(8, 0, 0); - for (i = 0; i < n; i++) { - pixcmapGetColor(cmap, i % 256, &rval, &gval, &bval); - numaGetIValue(namidys, i, &yval); - pixRenderLineArb(pixs, 0, yval, w, yval, linew, rval, gval, bval); - } - pixcmapDestroy(&cmap); - return 0; -} - - -/*! - * \brief pixRenderHorizEndPoints() - * - * \param[in] pixs 32 bpp - * \param[in] ptal left side line end points - * \param[in] ptar right side line end points - * \param[in] color 0xrrggbb00 - * \return 0 if OK, 1 on error - */ -static l_int32 -pixRenderHorizEndPoints(PIX *pixs, - PTA *ptal, - PTA *ptar, - l_uint32 color) -{ -PIX *pixcirc; -PTA *ptalt, *ptart, *ptacirc; - - PROCNAME("pixRenderHorizEndPoints"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!ptal || !ptar) - return ERROR_INT("ptal and ptar not both defined", procName, 1); - - ptacirc = generatePtaFilledCircle(5); - pixcirc = pixGenerateFromPta(ptacirc, 11, 11); - ptalt = ptaTranspose(ptal); - ptart = ptaTranspose(ptar); - - pixDisplayPtaPattern(pixs, pixs, ptalt, pixcirc, 5, 5, color); - pixDisplayPtaPattern(pixs, pixs, ptart, pixcirc, 5, 5, color); - ptaDestroy(&ptacirc); - ptaDestroy(&ptalt); - ptaDestroy(&ptart); - pixDestroy(&pixcirc); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dewarp3.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dewarp3.c deleted file mode 100644 index 1b195c96..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dewarp3.c +++ /dev/null @@ -1,1016 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file dewarp3.c - *
- *
- *    Applying and stripping the page disparity model
- *
- *      Apply disparity array to pix
- *          l_int32            dewarpaApplyDisparity()
- *          static l_int32     dewarpaApplyInit()
- *          static PIX        *pixApplyVertDisparity()
- *          static PIX        *pixApplyHorizDisparity()
- *
- *      Apply disparity array to boxa
- *          l_int32            dewarpaApplyDisparityBoxa()
- *          static BOXA       *boxaApplyDisparity()
- *
- *      Stripping out data and populating full res disparity
- *          l_int32            dewarpMinimize()
- *          l_int32            dewarpPopulateFullRes()
- *
- *      Static functions not presently in use
- *          static FPIX       *fpixSampledDisparity()
- *          static FPIX       *fpixExtraHorizDisparity()
- *
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static l_int32 dewarpaApplyInit(L_DEWARPA *dewa, l_int32 pageno, PIX *pixs, - l_int32 x, l_int32 y, L_DEWARP **pdew, - const char *debugfile); -static PIX *pixApplyVertDisparity(L_DEWARP *dew, PIX *pixs, l_int32 grayin); -static PIX * pixApplyHorizDisparity(L_DEWARP *dew, PIX *pixs, l_int32 grayin); -static BOXA *boxaApplyDisparity(L_DEWARP *dew, BOXA *boxa, l_int32 direction, - l_int32 mapdir); - -/*----------------------------------------------------------------------* - * Apply warping disparity array to pixa * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpaApplyDisparity() - * - * \param[in] dewa - * \param[in] pageno of page model to be used; may be a ref model - * \param[in] pixs image to be modified; can be 1, 8 or 32 bpp - * \param[in] grayin gray value, from 0 to 255, for pixels brought in; - * use -1 to use pixels on the boundary of pixs - * \param[in] x, y origin for generation of disparity arrays - * \param[out] ppixd disparity corrected image - * \param[in] debugfile use NULL to skip writing this - * \return 0 if OK, 1 on error no models or ref models available - * - *
- * Notes:
- *      (1) This applies the disparity arrays to the specified image.
- *      (2) Specify gray color for pixels brought in from the outside:
- *          0 is black, 255 is white.  Use -1 to select pixels from the
- *          boundary of the source image.
- *      (3) If the models and ref models have not been validated, this
- *          will do so by calling dewarpaInsertRefModels().
- *      (4) This works with both stripped and full resolution page models.
- *          If the full res disparity array(s) are missing, they are remade.
- *      (5) The caller must handle errors that are returned because there
- *          are no valid models or ref models for the page -- typically
- *          by using the input pixs.
- *      (6) If there is no model for %pageno, this will use the model for
- *          'refpage' and put the result in the dew for %pageno.
- *      (7) This populates the full resolution disparity arrays if
- *          necessary.  If x and/or y are positive, they are used,
- *          in conjunction with pixs, to determine the required
- *          slope-based extension of the full resolution disparity
- *          arrays in each direction.  When (x,y) == (0,0), all
- *          extension is to the right and down.  Nonzero values of (x,y)
- *          are useful for dewarping when pixs is deliberately undercropped.
- *      (8) Important: when applying disparity to a number of images,
- *          after calling this function and saving the resulting pixd,
- *          you should call dewarpMinimize(dew) on the dew for %pageno.
- *          This will remove pixs and pixd (or their clones) stored in dew,
- *          as well as the full resolution disparity arrays.  Together,
- *          these hold approximately 16 bytes for each pixel in pixs.
- * 
- */ -l_ok -dewarpaApplyDisparity(L_DEWARPA *dewa, - l_int32 pageno, - PIX *pixs, - l_int32 grayin, - l_int32 x, - l_int32 y, - PIX **ppixd, - const char *debugfile) -{ -L_DEWARP *dew1, *dew; -PIX *pixv, *pixh; - - PROCNAME("dewarpaApplyDisparity"); - - /* Initialize the output with the input, so we'll have that - * in case we can't apply the page model. */ - if (!ppixd) - return ERROR_INT("&pixd not defined", procName, 1); - *ppixd = pixClone(pixs); - if (grayin > 255) { - L_WARNING("invalid grayin = %d; clipping at 255\n", procName, grayin); - grayin = 255; - } - - /* Find the appropriate dew to use and fully populate its array(s) */ - if (dewarpaApplyInit(dewa, pageno, pixs, x, y, &dew, debugfile)) - return ERROR_INT("no model available", procName, 1); - - /* Correct for vertical disparity and save the result */ - if ((pixv = pixApplyVertDisparity(dew, pixs, grayin)) == NULL) { - dewarpMinimize(dew); - return ERROR_INT("pixv not made", procName, 1); - } - pixDestroy(ppixd); - *ppixd = pixv; - if (debugfile) { - pixDisplayWithTitle(pixv, 300, 0, "pixv", 1); - lept_rmdir("lept/dewapply"); /* remove previous images */ - lept_mkdir("lept/dewapply"); - pixWriteDebug("/tmp/lept/dewapply/001.png", pixs, IFF_PNG); - pixWriteDebug("/tmp/lept/dewapply/002.png", pixv, IFF_PNG); - } - - /* Optionally, correct for horizontal disparity */ - if (dewa->useboth && dew->hsuccess && !dew->skip_horiz) { - if (dew->hvalid == FALSE) { - L_INFO("invalid horiz model for page %d\n", procName, pageno); - } else { - if ((pixh = pixApplyHorizDisparity(dew, pixv, grayin)) != NULL) { - pixDestroy(ppixd); - *ppixd = pixh; - if (debugfile) { - pixDisplayWithTitle(pixh, 600, 0, "pixh", 1); - pixWriteDebug("/tmp/lept/dewapply/003.png", pixh, IFF_PNG); - } - } else { - L_ERROR("horiz disparity failed on page %d\n", - procName, pageno); - } - } - } - - if (debugfile) { - dew1 = dewarpaGetDewarp(dewa, pageno); - dewarpDebug(dew1, "lept/dewapply", 0); - convertFilesToPdf("/tmp/lept/dewapply", NULL, 250, 1.0, 0, 0, - "Dewarp Apply Disparity", debugfile); - lept_stderr("pdf file: %s\n", debugfile); - } - - /* Get rid of the large full res disparity arrays */ - dewarpMinimize(dew); - - return 0; -} - - -/*! - * \brief dewarpaApplyInit() - * - * \param[in] dewa - * \param[in] pageno of page model to be used; may be a ref model - * \param[in] pixs image to be modified; can be 1, 8 or 32 bpp - * \param[in] x, y origin for generation of disparity arrays - * \param[out] pdew dewarp to be used for this page - * \param[in] debugfile use NULL to skip writing this - * \return 0 if OK, 1 on error no models or ref models available - * - *
- * Notes:
- *      (1) This prepares pixs for being dewarped.  It returns 1 if
- *          no dewarping model exists.
- *      (2) The returned %dew contains the model to be used for this page
- *          image.  The %dew is owned by dewa; do not destroy.
- *      (3) If both the 'useboth' and 'check_columns' fields are true,
- *          this checks for multiple text columns and if found, sets
- *          the 'skip_horiz' field in the %dew for this page.
- * 
- */ -static l_int32 -dewarpaApplyInit(L_DEWARPA *dewa, - l_int32 pageno, - PIX *pixs, - l_int32 x, - l_int32 y, - L_DEWARP **pdew, - const char *debugfile) -{ -l_int32 ncols, debug; -L_DEWARP *dew1, *dew2; -PIX *pix1; - - PROCNAME("dewarpaApplyInit"); - - if (!pdew) - return ERROR_INT("&dew not defined", procName, 1); - *pdew = NULL; - - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - if (pageno < 0 || pageno > dewa->maxpage) - return ERROR_INT("invalid pageno", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (x < 0) x = 0; - if (y < 0) y = 0; - debug = (debugfile) ? 1 : 0; - - /* Make sure all models are valid and all refmodels have - * been added to dewa */ - if (dewa->modelsready == FALSE) - dewarpaInsertRefModels(dewa, 0, debug); - - /* Check for the existence of a valid model; we don't expect - * all pages to have them. */ - if ((dew1 = dewarpaGetDewarp(dewa, pageno)) == NULL) { - L_INFO("no valid dew model for page %d\n", procName, pageno); - return 1; - } - - /* Get the page model that we will use and sanity-check that - * it is valid. The ultimate result will be put in dew1->pixd. */ - if (dew1->hasref) /* point to another page with a model */ - dew2 = dewarpaGetDewarp(dewa, dew1->refpage); - else - dew2 = dew1; - if (dew2->vvalid == FALSE) - return ERROR_INT("no model; shouldn't happen", procName, 1); - *pdew = dew2; - - /* If check_columns is TRUE and useboth is TRUE, check for - * multiple columns. If there is more than one column, we - * only apply vertical disparity. */ - if (dewa->useboth && dewa->check_columns) { - pix1 = pixConvertTo1(pixs, 140); - pixCountTextColumns(pix1, 0.3, 0.5, 0.1, &ncols, NULL); - pixDestroy(&pix1); - if (ncols > 1) { - L_INFO("found %d columns; not correcting horiz disparity\n", - procName, ncols); - dew2->skip_horiz = TRUE; - } else { - dew2->skip_horiz = FALSE; - } - } - - /* Generate the full res disparity arrays if they don't exist - * (e.g., if they've been minimized or read from file), or if - * they are too small for the current image. */ - dewarpPopulateFullRes(dew2, pixs, x, y); - return 0; -} - - -/*! - * \brief pixApplyVertDisparity() - * - * \param[in] dew - * \param[in] pixs 1, 8 or 32 bpp - * \param[in] grayin gray value, from 0 to 255, for pixels brought in; - * use -1 to use pixels on the boundary of pixs - * \return pixd modified to remove vertical disparity, or NULL on error - * - *
- * Notes:
- *      (1) This applies the vertical disparity array to the specified
- *          image.  For src pixels above the image, we use the pixels
- *          in the first raster line.
- *      (2) Specify gray color for pixels brought in from the outside:
- *          0 is black, 255 is white.  Use -1 to select pixels from the
- *          boundary of the source image.
- * 
- */ -static PIX * -pixApplyVertDisparity(L_DEWARP *dew, - PIX *pixs, - l_int32 grayin) -{ -l_int32 i, j, w, h, d, fw, fh, wpld, wplf, isrc, val8; -l_uint32 *datad, *lined; -l_float32 *dataf, *linef; -void **lineptrs; -FPIX *fpix; -PIX *pixd; - - PROCNAME("pixApplyVertDisparity"); - - if (!dew) - return (PIX *)ERROR_PTR("dew not defined", procName, NULL); - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 8 && d != 32) - return (PIX *)ERROR_PTR("pix not 1, 8 or 32 bpp", procName, NULL); - if ((fpix = dew->fullvdispar) == NULL) - return (PIX *)ERROR_PTR("fullvdispar not defined", procName, NULL); - fpixGetDimensions(fpix, &fw, &fh); - if (fw < w || fh < h) { - lept_stderr("fw = %d, w = %d, fh = %d, h = %d\n", fw, w, fh, h); - return (PIX *)ERROR_PTR("invalid fpix size", procName, NULL); - } - - /* Two choices for requested pixels outside pixs: (1) use pixels' - * from the boundary of pixs; use white or light gray pixels. */ - pixd = pixCreateTemplate(pixs); - if (grayin >= 0) - pixSetAllGray(pixd, grayin); - datad = pixGetData(pixd); - dataf = fpixGetData(fpix); - wpld = pixGetWpl(pixd); - wplf = fpixGetWpl(fpix); - if (d == 1) { - lineptrs = pixGetLinePtrs(pixs, NULL); - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - linef = dataf + i * wplf; - for (j = 0; j < w; j++) { - isrc = (l_int32)(i - linef[j] + 0.5); - if (grayin < 0) /* use value at boundary if outside */ - isrc = L_MIN(L_MAX(isrc, 0), h - 1); - if (isrc >= 0 && isrc < h) { /* remains gray if outside */ - if (GET_DATA_BIT(lineptrs[isrc], j)) - SET_DATA_BIT(lined, j); - } - } - } - } else if (d == 8) { - lineptrs = pixGetLinePtrs(pixs, NULL); - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - linef = dataf + i * wplf; - for (j = 0; j < w; j++) { - isrc = (l_int32)(i - linef[j] + 0.5); - if (grayin < 0) - isrc = L_MIN(L_MAX(isrc, 0), h - 1); - if (isrc >= 0 && isrc < h) { - val8 = GET_DATA_BYTE(lineptrs[isrc], j); - SET_DATA_BYTE(lined, j, val8); - } - } - } - } else { /* d == 32 */ - lineptrs = pixGetLinePtrs(pixs, NULL); - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - linef = dataf + i * wplf; - for (j = 0; j < w; j++) { - isrc = (l_int32)(i - linef[j] + 0.5); - if (grayin < 0) - isrc = L_MIN(L_MAX(isrc, 0), h - 1); - if (isrc >= 0 && isrc < h) - lined[j] = GET_DATA_FOUR_BYTES(lineptrs[isrc], j); - } - } - } - - LEPT_FREE(lineptrs); - return pixd; -} - - -/*! - * \brief pixApplyHorizDisparity() - * - * \param[in] dew - * \param[in] pixs 1, 8 or 32 bpp - * \param[in] grayin gray value, from 0 to 255, for pixels brought in; - * use -1 to use pixels on the boundary of pixs - * \return pixd modified to remove horizontal disparity if possible, - * or NULL on error. - * - *
- * Notes:
- *      (1) This applies the horizontal disparity array to the specified
- *          image.
- *      (2) Specify gray color for pixels brought in from the outside:
- *          0 is black, 255 is white.  Use -1 to select pixels from the
- *          boundary of the source image.
- *      (3) The input pixs has already been corrected for vertical disparity.
- *          If the horizontal disparity array doesn't exist, this returns
- *          a clone of %pixs.
- * 
- */ -static PIX * -pixApplyHorizDisparity(L_DEWARP *dew, - PIX *pixs, - l_int32 grayin) -{ -l_int32 i, j, w, h, d, fw, fh, wpls, wpld, wplf, jsrc, val8; -l_uint32 *datas, *lines, *datad, *lined; -l_float32 *dataf, *linef; -FPIX *fpix; -PIX *pixd; - - PROCNAME("pixApplyHorizDisparity"); - - if (!dew) - return (PIX *)ERROR_PTR("dew not defined", procName, pixs); - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 8 && d != 32) - return (PIX *)ERROR_PTR("pix not 1, 8 or 32 bpp", procName, NULL); - if ((fpix = dew->fullhdispar) == NULL) - return (PIX *)ERROR_PTR("fullhdispar not defined", procName, NULL); - fpixGetDimensions(fpix, &fw, &fh); - if (fw < w || fh < h) { - lept_stderr("fw = %d, w = %d, fh = %d, h = %d\n", fw, w, fh, h); - return (PIX *)ERROR_PTR("invalid fpix size", procName, NULL); - } - - /* Two choices for requested pixels outside pixs: (1) use pixels' - * from the boundary of pixs; use white or light gray pixels. */ - pixd = pixCreateTemplate(pixs); - if (grayin >= 0) - pixSetAllGray(pixd, grayin); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - dataf = fpixGetData(fpix); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - wplf = fpixGetWpl(fpix); - if (d == 1) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - linef = dataf + i * wplf; - for (j = 0; j < w; j++) { - jsrc = (l_int32)(j - linef[j] + 0.5); - if (grayin < 0) /* use value at boundary if outside */ - jsrc = L_MIN(L_MAX(jsrc, 0), w - 1); - if (jsrc >= 0 && jsrc < w) { /* remains gray if outside */ - if (GET_DATA_BIT(lines, jsrc)) - SET_DATA_BIT(lined, j); - } - } - } - } else if (d == 8) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - linef = dataf + i * wplf; - for (j = 0; j < w; j++) { - jsrc = (l_int32)(j - linef[j] + 0.5); - if (grayin < 0) - jsrc = L_MIN(L_MAX(jsrc, 0), w - 1); - if (jsrc >= 0 && jsrc < w) { - val8 = GET_DATA_BYTE(lines, jsrc); - SET_DATA_BYTE(lined, j, val8); - } - } - } - } else { /* d == 32 */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - linef = dataf + i * wplf; - for (j = 0; j < w; j++) { - jsrc = (l_int32)(j - linef[j] + 0.5); - if (grayin < 0) - jsrc = L_MIN(L_MAX(jsrc, 0), w - 1); - if (jsrc >= 0 && jsrc < w) - lined[j] = lines[jsrc]; - } - } - } - - return pixd; -} - - -/*----------------------------------------------------------------------* - * Apply warping disparity array to boxa * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpaApplyDisparityBoxa() - * - * \param[in] dewa - * \param[in] pageno of page model to be used; may be a ref model - * \param[in] pixs initial pix reference; for alignment and debugging - * \param[in] boxas boxa to be mapped - * \param[in] mapdir 1 if mapping forward from original to dewarped; - * 0 if backward - * \param[in] x, y origin for generation of disparity arrays with - * respect to the source region - * \param[out] pboxad disparity corrected boxa - * \param[in] debugfile use NULL to skip writing this - * \return 0 if OK, 1 on error no models or ref models available - * - *
- * Notes:
- *      (1) This applies the disparity arrays in one of two mapping directions
- *          to the specified boxa.  It can be used in the backward direction
- *          to locate a box in the original coordinates that would have
- *          been dewarped to to the specified image.
- *      (2) If there is no model for %pageno, this will use the model for
- *          'refpage' and put the result in the dew for %pageno.
- *      (3) This works with both stripped and full resolution page models.
- *          If the full res disparity array(s) are missing, they are remade.
- *      (4) If an error occurs, a copy of the input boxa is returned.
- * 
- */ -l_ok -dewarpaApplyDisparityBoxa(L_DEWARPA *dewa, - l_int32 pageno, - PIX *pixs, - BOXA *boxas, - l_int32 mapdir, - l_int32 x, - l_int32 y, - BOXA **pboxad, - const char *debugfile) -{ -l_int32 debug_out; -L_DEWARP *dew1, *dew; -BOXA *boxav, *boxah; -PIX *pixv, *pixh; - - PROCNAME("dewarpaApplyDisparityBoxa"); - - /* Initialize the output with the input, so we'll have that - * in case we can't apply the page model. */ - if (!pboxad) - return ERROR_INT("&boxad not defined", procName, 1); - *pboxad = boxaCopy(boxas, L_CLONE); - - /* Find the appropriate dew to use and fully populate its array(s) */ - if (dewarpaApplyInit(dewa, pageno, pixs, x, y, &dew, debugfile)) - return ERROR_INT("no model available", procName, 1); - - /* Correct for vertical disparity and save the result */ - if ((boxav = boxaApplyDisparity(dew, boxas, L_VERT, mapdir)) == NULL) { - dewarpMinimize(dew); - return ERROR_INT("boxa1 not made", procName, 1); - } - boxaDestroy(pboxad); - *pboxad = boxav; - pixv = NULL; - pixh = NULL; - if (debugfile && mapdir != 1) - L_INFO("Reverse map direction; no debug output\n", procName); - debug_out = debugfile && (mapdir == 1); - if (debug_out) { - PIX *pix1; - lept_rmdir("lept/dewboxa"); /* remove previous images */ - lept_mkdir("lept/dewboxa"); - pix1 = pixConvertTo32(pixs); - pixRenderBoxaArb(pix1, boxas, 2, 255, 0, 0); - pixWriteDebug("/tmp/lept/dewboxa/01.png", pix1, IFF_PNG); - pixDestroy(&pix1); - pixv = pixApplyVertDisparity(dew, pixs, 255); - pix1 = pixConvertTo32(pixv); - pixRenderBoxaArb(pix1, boxav, 2, 0, 255, 0); - pixWriteDebug("/tmp/lept/dewboxa/02.png", pix1, IFF_PNG); - pixDestroy(&pix1); - } - - /* Optionally, correct for horizontal disparity */ - if (dewa->useboth && dew->hsuccess && !dew->skip_horiz) { - if (dew->hvalid == FALSE) { - L_INFO("invalid horiz model for page %d\n", procName, pageno); - } else { - boxah = boxaApplyDisparity(dew, boxav, L_HORIZ, mapdir); - if (!boxah) { - L_ERROR("horiz disparity fails on page %d\n", procName, pageno); - } else { - boxaDestroy(pboxad); - *pboxad = boxah; - if (debug_out) { - PIX *pix1; - pixh = pixApplyHorizDisparity(dew, pixv, 255); - pix1 = pixConvertTo32(pixh); - pixRenderBoxaArb(pix1, boxah, 2, 0, 0, 255); - pixWriteDebug("/tmp/lept/dewboxa/03.png", pix1, IFF_PNG); - pixDestroy(&pixh); - pixDestroy(&pix1); - } - } - } - } - - if (debug_out) { - pixDestroy(&pixv); - dew1 = dewarpaGetDewarp(dewa, pageno); - dewarpDebug(dew1, "lept/dewapply", 0); - convertFilesToPdf("/tmp/lept/dewboxa", NULL, 135, 1.0, 0, 0, - "Dewarp Apply Disparity Boxa", debugfile); - lept_stderr("Dewarp Apply Disparity Boxa pdf file: %s\n", - debugfile); - } - - /* Get rid of the large full res disparity arrays */ - dewarpMinimize(dew); - - return 0; -} - - -/*! - * \brief boxaApplyDisparity() - * - * \param[in] dew - * \param[in] boxa - * \param[in] direction L_HORIZ or L_VERT - * \param[in] mapdir 1 if mapping forward from original to dewarped; - * 0 if backward - * \return boxad modified by the disparity, or NULL on error - */ -static BOXA * -boxaApplyDisparity(L_DEWARP *dew, - BOXA *boxa, - l_int32 direction, - l_int32 mapdir) -{ -l_int32 x, y, w, h, ib, ip, nbox, wpl; -l_float32 xn, yn; -l_float32 *data, *line; -BOX *boxs, *boxd; -BOXA *boxad; -FPIX *fpix; -PTA *ptas, *ptad; - - PROCNAME("boxaApplyDisparity"); - - if (!dew) - return (BOXA *)ERROR_PTR("dew not defined", procName, NULL); - if (!boxa) - return (BOXA *)ERROR_PTR("boxa not defined", procName, NULL); - if (direction == L_VERT) - fpix = dew->fullvdispar; - else if (direction == L_HORIZ) - fpix = dew->fullhdispar; - else - return (BOXA *)ERROR_PTR("invalid direction", procName, NULL); - if (!fpix) - return (BOXA *)ERROR_PTR("full disparity not defined", procName, NULL); - fpixGetDimensions(fpix, &w, &h); - - /* Clip the output to the positive quadrant because all box - * coordinates must be non-negative. */ - data = fpixGetData(fpix); - wpl = fpixGetWpl(fpix); - nbox = boxaGetCount(boxa); - boxad = boxaCreate(nbox); - for (ib = 0; ib < nbox; ib++) { - boxs = boxaGetBox(boxa, ib, L_COPY); - ptas = boxConvertToPta(boxs, 4); - ptad = ptaCreate(4); - for (ip = 0; ip < 4; ip++) { - ptaGetIPt(ptas, ip, &x, &y); - line = data + y * wpl; - if (direction == L_VERT) { - if (mapdir == 0) - yn = y - line[x]; - else - yn = y + line[x]; - yn = L_MAX(0, yn); - ptaAddPt(ptad, x, yn); - } else { /* direction == L_HORIZ */ - if (mapdir == 0) - xn = x - line[x]; - else - xn = x + line[x]; - xn = L_MAX(0, xn); - ptaAddPt(ptad, xn, y); - } - } - boxd = ptaConvertToBox(ptad); - boxaAddBox(boxad, boxd, L_INSERT); - boxDestroy(&boxs); - ptaDestroy(&ptas); - ptaDestroy(&ptad); - } - - return boxad; -} - - -/*----------------------------------------------------------------------* - * Stripping out data and populating full res disparity * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpMinimize() - * - * \param[in] dew - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This removes all data that is not needed for serialization.
- *          It keeps the subsampled disparity array(s), so the full
- *          resolution arrays can be reconstructed.
- * 
- */ -l_ok -dewarpMinimize(L_DEWARP *dew) -{ -L_DEWARP *dewt; - - PROCNAME("dewarpMinimize"); - - if (!dew) - return ERROR_INT("dew not defined", procName, 1); - - /* If dew is a ref, minimize the actual dewarp */ - if (dew->hasref) - dewt = dewarpaGetDewarp(dew->dewa, dew->refpage); - else - dewt = dew; - if (!dewt) - return ERROR_INT("dewt not found", procName, 1); - - pixDestroy(&dewt->pixs); - fpixDestroy(&dewt->fullvdispar); - fpixDestroy(&dewt->fullhdispar); - numaDestroy(&dewt->namidys); - numaDestroy(&dewt->nacurves); - return 0; -} - - -/*! - * \brief dewarpPopulateFullRes() - * - * \param[in] dew - * \param[in] pix [optional], to give size of actual image - * \param[in] x, y origin for generation of disparity arrays - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If the full resolution vertical and horizontal disparity
- *          arrays do not exist, they are built from the subsampled ones.
- *      (2) If pixs is not given, the size of the arrays is determined
- *          by the original image from which the sampled version was
- *          generated.  Any values of (x,y) are ignored.
- *      (3) If pixs is given, the full resolution disparity arrays must
- *          be large enough to accommodate it.
- *          (a) If the arrays do not exist, the value of (x,y) determines
- *              the origin of the full resolution arrays without extension,
- *              relative to pixs.  Thus, (x,y) gives the amount of
- *              slope extension in (left, top).  The (right, bottom)
- *              extension is then determined by the size of pixs and
- *              (x,y); the values should never be < 0.
- *          (b) If the arrays exist and pixs is too large, the existing
- *              full res arrays are destroyed and new ones are made,
- *              again using (x,y) to determine the extension in the
- *              four directions.
- * 
- */ -l_ok -dewarpPopulateFullRes(L_DEWARP *dew, - PIX *pix, - l_int32 x, - l_int32 y) -{ -l_int32 width, height, fw, fh, deltaw, deltah, redfactor; -FPIX *fpixt1, *fpixt2; - - PROCNAME("dewarpPopulateFullRes"); - - if (!dew) - return ERROR_INT("dew not defined", procName, 1); - if (!dew->sampvdispar) - return ERROR_INT("no sampled vert disparity", procName, 1); - if (x < 0) x = 0; - if (y < 0) y = 0; - - /* Establish the target size for the full res arrays */ - if (pix) - pixGetDimensions(pix, &width, &height, NULL); - else { - width = dew->w; - height = dew->h; - } - - /* Destroy the existing arrays if they are too small */ - if (dew->fullvdispar) { - fpixGetDimensions(dew->fullvdispar, &fw, &fh); - if (width > fw || height > fw) - fpixDestroy(&dew->fullvdispar); - } - if (dew->fullhdispar) { - fpixGetDimensions(dew->fullhdispar, &fw, &fh); - if (width > fw || height > fw) - fpixDestroy(&dew->fullhdispar); - } - - /* Find the required width and height expansion deltas */ - deltaw = width - dew->sampling * (dew->nx - 1) + 2; - deltah = height - dew->sampling * (dew->ny - 1) + 2; - redfactor = dew->redfactor; - deltaw = redfactor * L_MAX(0, deltaw); - deltah = redfactor * L_MAX(0, deltah); - - /* Generate the full res vertical array if it doesn't exist, - * extending it as required to make it big enough. Use x,y - * to determine the amounts on each side. */ - if (!dew->fullvdispar) { - fpixt1 = fpixCopy(NULL, dew->sampvdispar); - if (redfactor == 2) - fpixAddMultConstant(fpixt1, 0.0, (l_float32)redfactor); - fpixt2 = fpixScaleByInteger(fpixt1, dew->sampling * redfactor); - fpixDestroy(&fpixt1); - if (deltah == 0 && deltaw == 0) { - dew->fullvdispar = fpixt2; - } - else { - dew->fullvdispar = fpixAddSlopeBorder(fpixt2, x, deltaw - x, - y, deltah - y); - fpixDestroy(&fpixt2); - } - } - - /* Similarly, generate the full res horizontal array if it - * doesn't exist. Do this even if useboth == 1, but - * not if required to skip running horizontal disparity. */ - if (!dew->fullhdispar && dew->samphdispar && !dew->skip_horiz) { - fpixt1 = fpixCopy(NULL, dew->samphdispar); - if (redfactor == 2) - fpixAddMultConstant(fpixt1, 0.0, (l_float32)redfactor); - fpixt2 = fpixScaleByInteger(fpixt1, dew->sampling * redfactor); - fpixDestroy(&fpixt1); - if (deltah == 0 && deltaw == 0) { - dew->fullhdispar = fpixt2; - } - else { - dew->fullhdispar = fpixAddSlopeBorder(fpixt2, x, deltaw - x, - y, deltah - y); - fpixDestroy(&fpixt2); - } - } - - return 0; -} - - -#if 0 -/*----------------------------------------------------------------------* - * Static functions not presently in use * - *----------------------------------------------------------------------*/ -/*! - * \brief fpixSampledDisparity() - * - * \param[in] fpixs full resolution disparity model - * \param[in] sampling sampling factor - * \return fpixd sampled disparity model, or NULL on error - * - *
- * Notes:
- *      (1) This converts full to sampled disparity.
- *      (2) The input array is sampled at the right and top edges, and
- *          at every %sampling pixels horizontally and vertically.
- *      (3) The sampled array may not extend to the right and bottom
- *          pixels in fpixs.  This will occur if fpixs was generated
- *          with slope extension because the image on that page was
- *          larger than normal.  This is fine, because in use the
- *          sampled array will be interpolated back to full resolution
- *          and then extended as required.  So the operations of
- *          sampling and interpolation will be idempotent.
- *      (4) There must be at least 3 sampled points horizontally and
- *          vertically.
- * 
- */ -static FPIX * -fpixSampledDisparity(FPIX *fpixs, - l_int32 sampling) -{ -l_int32 w, h, wd, hd, i, j, is, js; -l_float32 val; -FPIX *fpixd; - - PROCNAME("fpixSampledDisparity"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - if (sampling < 1) - return (FPIX *)ERROR_PTR("sampling < 1", procName, NULL); - - fpixGetDimensions(fpixs, &w, &h); - wd = 1 + (w + sampling - 2) / sampling; - hd = 1 + (h + sampling - 2) / sampling; - if (wd < 3 || hd < 3) - return (FPIX *)ERROR_PTR("wd < 3 or hd < 3", procName, NULL); - fpixd = fpixCreate(wd, hd); - for (i = 0; i < hd; i++) { - is = sampling * i; - if (is >= h) continue; - for (j = 0; j < wd; j++) { - js = sampling * j; - if (js >= w) continue; - fpixGetPixel(fpixs, js, is, &val); - fpixSetPixel(fpixd, j, i, val); - } - } - - return fpixd; -} - -static const l_float32 DefaultSlopeFactor = 0.1; /* just a guess; fix it */ - -/*! - * \brief fpixExtraHorizDisparity() - * - * \param[in] fpixv vertical disparity model - * \param[in] factor conversion factor for vertical disparity slope; - * use 0 for default - * \param[out] pxwid extra width to be added to dewarped pix - * \return fpixh, or NULL on error - * - *
- * Notes:
- *      (1) This takes the difference in vertical disparity at top
- *          and bottom of the image, and converts it to an assumed
- *          horizontal disparity.  In use, we add this to the
- *          horizontal disparity determined by the left and right
- *          ends of textlines.
- *      (2) Usage:
- *            l_int32 xwid = [extra width to be added to fpix and image]
- *            FPix *fpix = fpixExtraHorizDisparity(dew->fullvdispar, 0, &xwid);
- *            fpixLinearCombination(dew->fullhdispar, dew->fullhdispar,
- *                                  fpix, 1.0, 1.0);
- * 
- */ -static FPIX * -fpixExtraHorizDisparity(FPIX *fpixv, - l_float32 factor, - l_int32 *pxwid) -{ -l_int32 w, h, i, j, fw, wpl, maxloc; -l_float32 val1, val2, vdisp, vdisp0, maxval; -l_float32 *data, *line, *fadiff; -NUMA *nadiff; -FPIX *fpixh; - - PROCNAME("fpixExtraHorizDisparity"); - - if (!fpixv) - return (FPIX *)ERROR_PTR("fpixv not defined", procName, NULL); - if (!pxwid) - return (FPIX *)ERROR_PTR("&xwid not defined", procName, NULL); - if (factor == 0.0) - factor = DefaultSlopeFactor; - - /* Estimate horizontal disparity from the vertical disparity - * difference between the top and bottom, normalized to the - * image height. Add the maximum value to the width of the - * output image, so that all src pixels can be mapped - * into the dest. */ - fpixGetDimensions(fpixv, &w, &h); - nadiff = numaCreate(w); - for (j = 0; j < w; j++) { - fpixGetPixel(fpixv, j, 0, &val1); - fpixGetPixel(fpixv, j, h - 1, &val2); - vdisp = factor * (val2 - val1) / (l_float32)h; - if (j == 0) vdisp0 = vdisp; - vdisp = vdisp0 - vdisp; - numaAddNumber(nadiff, vdisp); - } - numaGetMax(nadiff, &maxval, &maxloc); - *pxwid = (l_int32)(maxval + 0.5); - - fw = w + *pxwid; - fpixh = fpixCreate(fw, h); - data = fpixGetData(fpixh); - wpl = fpixGetWpl(fpixh); - fadiff = numaGetFArray(nadiff, L_NOCOPY); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < fw; j++) { - if (j < maxloc) /* this may not work for even pages */ - line[j] = fadiff[j]; - else /* keep it at the max value the rest of the way across */ - line[j] = maxval; - } - } - - numaDestroy(&nadiff); - return fpixh; -} -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dewarp4.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dewarp4.c deleted file mode 100644 index 728c7415..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dewarp4.c +++ /dev/null @@ -1,1175 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file dewarp4.c - *
- *
- *    Single page dewarper
- *
- *    Reference model (book-level, dewarpa) operations and debugging output
- *
- *      Top-level single page dewarper
- *          l_int32            dewarpSinglePage()
- *          l_int32            dewarpSinglePageInit()
- *          l_int32            dewarpSinglePageRun()
- *
- *      Operations on dewarpa
- *          l_int32            dewarpaListPages()
- *          l_int32            dewarpaSetValidModels()
- *          l_int32            dewarpaInsertRefModels()
- *          l_int32            dewarpaStripRefModels()
- *          l_int32            dewarpaRestoreModels()
- *
- *      Dewarp debugging output
- *          l_int32            dewarpaInfo()
- *          l_int32            dewarpaModelStats()
- *          static l_int32     dewarpaTestForValidModel()
- *          l_int32            dewarpaShowArrays()
- *          l_int32            dewarpDebug()
- *          l_int32            dewarpShowResults()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static l_int32 dewarpaTestForValidModel(L_DEWARPA *dewa, L_DEWARP *dew, - l_int32 notests); - -#ifndef NO_CONSOLE_IO -#define DEBUG_INVALID_MODELS 0 /* set this to 1 for debugging */ -#endif /* !NO_CONSOLE_IO */ - - /* Special parameter value */ -static const l_int32 GrayInValue = 200; - -/*----------------------------------------------------------------------* - * Top-level single page dewarper * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpSinglePage() - * - * \param[in] pixs with text, any depth - * \param[in] thresh for global thresh to 1 bpp; ignore otherwise - * \param[in] adaptive 1 for adaptive thresh; 0 for global threshold - * \param[in] useboth 1 for both horiz and vert; 0 for vertical only - * \param[in] check_columns 1 to skip horizontal if multiple columns; - * 0 otherwise; default is to skip - * \param[out] ppixd dewarped result - * \param[out] pdewa [optional] dewa with single page; NULL to skip - * \param[in] debug 1 for debugging output, 0 otherwise - * \return 0 if OK, 1 on error list of page numbers, or NULL on error - * - *
- * Notes:
- *      (1) Dewarps pixs and returns the result in &pixd.
- *      (2) This uses default values for all model parameters.
- *      (3) If pixs is 1 bpp, the parameters %adaptive and %thresh are ignored.
- *      (4) If it can't build a model, returns a copy of pixs in &pixd.
- * 
- */ -l_ok -dewarpSinglePage(PIX *pixs, - l_int32 thresh, - l_int32 adaptive, - l_int32 useboth, - l_int32 check_columns, - PIX **ppixd, - L_DEWARPA **pdewa, - l_int32 debug) -{ -L_DEWARPA *dewa; -PIX *pixb; - - PROCNAME("dewarpSinglePage"); - - if (!ppixd) - return ERROR_INT("&pixd not defined", procName, 1); - *ppixd = NULL; - if (pdewa) *pdewa = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - dewarpSinglePageInit(pixs, thresh, adaptive, useboth, - check_columns, &pixb, &dewa); - if (!pixb) { - dewarpaDestroy(&dewa); - return ERROR_INT("pixb not made", procName, 1); - } - - dewarpSinglePageRun(pixs, pixb, dewa, ppixd, debug); - - if (pdewa) - *pdewa = dewa; - else - dewarpaDestroy(&dewa); - pixDestroy(&pixb); - return 0; -} - - -/*! - * \brief dewarpSinglePageInit() - * - * \param[in] pixs with text, any depth - * \param[in] thresh for global thresh to 1 bpp; ignore otherwise - * \param[in] adaptive 1 for adaptive thresh; 0 for global threshold - * \param[in] useboth 1 for both horiz and vert; 0 for vertical only - * \param[in] check_columns 1 to skip horizontal if multiple columns; - * 0 otherwise; default is to skip - * \param[out] ppixb 1 bpp debug image - * \param[out] pdewa initialized dewa - * \return 0 if OK, 1 on error list of page numbers, or NULL on error - * - *
- * Notes:
- *      (1) This binarizes the input pixs if necessary, returning the
- *          binarized image.  It also initializes the dewa to default values
- *          for the model parameters.
- *      (2) If pixs is 1 bpp, the parameters %adaptive and %thresh are ignored.
- *      (3) To change the model parameters, call dewarpaSetCurvatures()
- *          before running dewarpSinglePageRun().  For example:
- *             dewarpSinglePageInit(pixs, 0, 1, 1, 1, &pixb, &dewa);
- *             dewarpaSetCurvatures(dewa, 250, -1, -1, 80, 70, 150);
- *             dewarpSinglePageRun(pixs, pixb, dewa, &pixd, 0);
- *             dewarpaDestroy(&dewa);
- *             pixDestroy(&pixb);
- * 
- */ -l_ok -dewarpSinglePageInit(PIX *pixs, - l_int32 thresh, - l_int32 adaptive, - l_int32 useboth, - l_int32 check_columns, - PIX **ppixb, - L_DEWARPA **pdewa) -{ -PIX *pix1; - - PROCNAME("dewarpSinglePageInit"); - - if (ppixb) *ppixb = NULL; - if (pdewa) *pdewa = NULL; - if (!ppixb || !pdewa) - return ERROR_INT("&pixb and &dewa not both defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - *pdewa = dewarpaCreate(1, 0, 1, 0, -1); - dewarpaUseBothArrays(*pdewa, useboth); - dewarpaSetCheckColumns(*pdewa, check_columns); - - /* Generate a binary image, if necessary */ - if (pixGetDepth(pixs) > 1) { - pix1 = pixConvertTo8(pixs, 0); - if (adaptive) - *ppixb = pixAdaptThresholdToBinary(pix1, NULL, 1.0); - else - *ppixb = pixThresholdToBinary(pix1, thresh); - pixDestroy(&pix1); - } else { - *ppixb = pixClone(pixs); - } - return 0; -} - - -/*! - * \brief dewarpSinglePageRun() - * - * \param[in] pixs any depth - * \param[in] pixb 1 bpp - * \param[in] dewa initialized - * \param[out] ppixd dewarped result - * \param[in] debug 1 for debugging output, 0 otherwise - * \return 0 if OK, 1 on error list of page numbers, or NULL on error - * - *
- * Notes:
- *      (1) Dewarps pixs and returns the result in &pixd.
- *      (2) The 1 bpp version %pixb and %dewa are conveniently generated by
- *          dewarpSinglePageInit().
- *      (3) Non-default model parameters must be set before calling this.
- *      (4) If a model cannot be built, this returns a copy of pixs in &pixd.
- * 
- */ -l_ok -dewarpSinglePageRun(PIX *pixs, - PIX *pixb, - L_DEWARPA *dewa, - PIX **ppixd, - l_int32 debug) -{ -const char *debugfile; -l_int32 vsuccess, ret; -L_DEWARP *dew; - - PROCNAME("dewarpSinglePageRun"); - - if (!ppixd) - return ERROR_INT("&pixd not defined", procName, 1); - *ppixd = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixb) - return ERROR_INT("pixb not defined", procName, 1); - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - - if (debug) - lept_mkdir("lept/dewarp"); - - /* Generate the page model */ - dew = dewarpCreate(pixb, 0); - dewarpaInsertDewarp(dewa, dew); - debugfile = (debug) ? "/tmp/lept/dewarp/singlepage_model.pdf" : NULL; - dewarpBuildPageModel(dew, debugfile); - dewarpaModelStatus(dewa, 0, &vsuccess, NULL); - if (vsuccess == 0) { - L_ERROR("failure to build model for vertical disparity\n", procName); - *ppixd = pixCopy(NULL, pixs); - return 0; - } - - /* Apply the page model */ - debugfile = (debug) ? "/tmp/lept/dewarp/singlepage_apply.pdf" : NULL; - ret = dewarpaApplyDisparity(dewa, 0, pixs, 255, 0, 0, ppixd, debugfile); - if (ret) - L_ERROR("invalid model; failure to apply disparity\n", procName); - return 0; -} - - -/*----------------------------------------------------------------------* - * Operations on dewarpa * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpaListPages() - * - * \param[in] dewa populated with dewarp structs for pages - * \return 0 if OK, 1 on error list of page numbers, or NULL on error - * - *
- * Notes:
- *      (1) This generates two numas, stored in the dewarpa, that give:
- *          (a) the page number for each dew that has a page model.
- *          (b) the page number for each dew that has either a page
- *              model or a reference model.
- *          It can be called at any time.
- *      (2) It is called by the dewarpa serializer before writing.
- * 
- */ -l_ok -dewarpaListPages(L_DEWARPA *dewa) -{ -l_int32 i; -L_DEWARP *dew; -NUMA *namodels, *napages; - - PROCNAME("dewarpaListPages"); - - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - - numaDestroy(&dewa->namodels); - numaDestroy(&dewa->napages); - namodels = numaCreate(dewa->maxpage + 1); - napages = numaCreate(dewa->maxpage + 1); - dewa->namodels = namodels; - dewa->napages = napages; - for (i = 0; i <= dewa->maxpage; i++) { - if ((dew = dewarpaGetDewarp(dewa, i)) != NULL) { - if (dew->hasref == 0) - numaAddNumber(namodels, dew->pageno); - numaAddNumber(napages, dew->pageno); - } - } - return 0; -} - - -/*! - * \brief dewarpaSetValidModels() - * - * \param[in] dewa - * \param[in] notests - * \param[in] debug 1 to output information on invalid page models - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) A valid model must meet the rendering requirements, which
- *          include whether or not a vertical disparity model exists
- *          and conditions on curvatures for vertical and horizontal
- *          disparity models.
- *      (2) If %notests == 1, this ignores the curvature constraints
- *          and assumes that all successfully built models are valid.
- *      (3) This function does not need to be called by the application.
- *          It is called by dewarpaInsertRefModels(), which
- *          will destroy all invalid dewarps.  Consequently, to inspect
- *          an invalid dewarp model, it must be done before calling
- *          dewarpaInsertRefModels().
- * 
- */ -l_ok -dewarpaSetValidModels(L_DEWARPA *dewa, - l_int32 notests, - l_int32 debug) -{ -l_int32 i, n, maxcurv, diffcurv, diffedge; -L_DEWARP *dew; - - PROCNAME("dewarpaSetValidModels"); - - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - - n = dewa->maxpage + 1; - for (i = 0; i < n; i++) { - if ((dew = dewarpaGetDewarp(dewa, i)) == NULL) - continue; - - if (debug) { - if (dew->hasref == 1) { - L_INFO("page %d: has only a ref model\n", procName, i); - } else if (dew->vsuccess == 0) { - L_INFO("page %d: no model successfully built\n", - procName, i); - } else if (!notests) { - maxcurv = L_MAX(L_ABS(dew->mincurv), L_ABS(dew->maxcurv)); - diffcurv = dew->maxcurv - dew->mincurv; - if (dewa->useboth && !dew->hsuccess) - L_INFO("page %d: useboth, but no horiz disparity\n", - procName, i); - if (maxcurv > dewa->max_linecurv) - L_INFO("page %d: max curvature %d > max_linecurv\n", - procName, i, diffcurv); - if (diffcurv < dewa->min_diff_linecurv) - L_INFO("page %d: diff curv %d < min_diff_linecurv\n", - procName, i, diffcurv); - if (diffcurv > dewa->max_diff_linecurv) - L_INFO("page %d: abs diff curv %d > max_diff_linecurv\n", - procName, i, diffcurv); - if (dew->hsuccess) { - if (L_ABS(dew->leftslope) > dewa->max_edgeslope) - L_INFO("page %d: abs left slope %d > max_edgeslope\n", - procName, i, dew->leftslope); - if (L_ABS(dew->rightslope) > dewa->max_edgeslope) - L_INFO("page %d: abs right slope %d > max_edgeslope\n", - procName, i, dew->rightslope); - diffedge = L_ABS(dew->leftcurv - dew->rightcurv); - if (L_ABS(dew->leftcurv) > dewa->max_edgecurv) - L_INFO("page %d: left curvature %d > max_edgecurv\n", - procName, i, dew->leftcurv); - if (L_ABS(dew->rightcurv) > dewa->max_edgecurv) - L_INFO("page %d: right curvature %d > max_edgecurv\n", - procName, i, dew->rightcurv); - if (diffedge > dewa->max_diff_edgecurv) - L_INFO("page %d: abs diff left-right curv %d > " - "max_diff_edgecurv\n", procName, i, diffedge); - } - } - } - - dewarpaTestForValidModel(dewa, dew, notests); - } - - return 0; -} - - -/*! - * \brief dewarpaInsertRefModels() - * - * \param[in] dewa - * \param[in] notests if 1, ignore curvature constraints on model - * \param[in] debug 1 to output information on invalid page models - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This destroys all dewarp models that are invalid, and then
- *          inserts reference models where possible.
- *      (2) If %notests == 1, this ignores the curvature constraints
- *          and assumes that all successfully built models are valid.
- *      (3) If useboth == 0, it uses the closest valid model within the
- *          distance and parity constraints.  If useboth == 1, it tries
- *          to use the closest allowed hvalid model; if it doesn't find
- *          an hvalid model, it uses the closest valid model.
- *      (4) For all pages without a model, this clears out any existing
- *          invalid and reference dewarps, finds the nearest valid model
- *          with the same parity, and inserts an empty dewarp with the
- *          reference page.
- *      (5) Then if it is requested to use both vertical and horizontal
- *          disparity arrays (useboth == 1), it tries to replace any
- *          hvalid == 0 model or reference with an hvalid == 1 reference.
- *      (6) The distance constraint is that any reference model must
- *          be within maxdist.  Note that with the parity constraint,
- *          no reference models will be used if maxdist < 2.
- *      (7) This function must be called, even if reference models will
- *          not be used.  It should be called after building models on all
- *          available pages, and after setting the rendering parameters.
- *      (8) If the dewa has been serialized, this function is called by
- *          dewarpaRead() when it is read back.  It is also called
- *          any time the rendering parameters are changed.
- *      (9) Note: if this has been called with useboth == 1, and useboth
- *          is reset to 0, you should first call dewarpaRestoreModels()
- *          to bring real models from the cache back to the primary array.
- * 
- */ -l_ok -dewarpaInsertRefModels(L_DEWARPA *dewa, - l_int32 notests, - l_int32 debug) -{ -l_int32 i, j, n, val, min, distdown, distup; -L_DEWARP *dew; -NUMA *na, *nah; - - PROCNAME("dewarpaInsertRefModels"); - - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - if (dewa->maxdist < 2) - L_INFO("maxdist < 2; no ref models can be used\n", procName); - - /* Make an indicator numa for pages with valid models. */ - dewarpaSetValidModels(dewa, notests, debug); - n = dewa->maxpage + 1; - na = numaMakeConstant(0, n); - for (i = 0; i < n; i++) { - dew = dewarpaGetDewarp(dewa, i); - if (dew && dew->vvalid) - numaReplaceNumber(na, i, 1); - } - - /* Remove all existing ref models and restore models from cache */ - dewarpaRestoreModels(dewa); - - /* Move invalid models to the cache, and insert reference dewarps - * for pages that need to borrow a model. - * First, try to find a valid model for each page. */ - for (i = 0; i < n; i++) { - numaGetIValue(na, i, &val); - if (val == 1) continue; /* already has a valid model */ - if ((dew = dewa->dewarp[i]) != NULL) { /* exists but is not valid; */ - dewa->dewarpcache[i] = dew; /* move it to the cache */ - dewa->dewarp[i] = NULL; - } - if (dewa->maxdist < 2) continue; /* can't use a ref model */ - /* Look back for nearest model */ - distdown = distup = dewa->maxdist + 1; - for (j = i - 2; j >= 0 && distdown > dewa->maxdist; j -= 2) { - numaGetIValue(na, j, &val); - if (val == 1) distdown = i - j; - } - /* Look ahead for nearest model */ - for (j = i + 2; j < n && distup > dewa->maxdist; j += 2) { - numaGetIValue(na, j, &val); - if (val == 1) distup = j - i; - } - min = L_MIN(distdown, distup); - if (min > dewa->maxdist) continue; /* no valid model in range */ - if (distdown <= distup) - dewarpaInsertDewarp(dewa, dewarpCreateRef(i, i - distdown)); - else - dewarpaInsertDewarp(dewa, dewarpCreateRef(i, i + distup)); - } - numaDestroy(&na); - - /* If a valid model will do, we're finished. */ - if (dewa->useboth == 0) { - dewa->modelsready = 1; /* validated */ - return 0; - } - - /* The request is useboth == 1. Now try to find an hvalid model */ - nah = numaMakeConstant(0, n); - for (i = 0; i < n; i++) { - dew = dewarpaGetDewarp(dewa, i); - if (dew && dew->hvalid) - numaReplaceNumber(nah, i, 1); - } - for (i = 0; i < n; i++) { - numaGetIValue(nah, i, &val); - if (val == 1) continue; /* already has a hvalid model */ - if (dewa->maxdist < 2) continue; /* can't use a ref model */ - distdown = distup = 100000; - for (j = i - 2; j >= 0; j -= 2) { /* look back for nearest model */ - numaGetIValue(nah, j, &val); - if (val == 1) { - distdown = i - j; - break; - } - } - for (j = i + 2; j < n; j += 2) { /* look ahead for nearest model */ - numaGetIValue(nah, j, &val); - if (val == 1) { - distup = j - i; - break; - } - } - min = L_MIN(distdown, distup); - if (min > dewa->maxdist) continue; /* no hvalid model within range */ - - /* We can replace the existing valid model with an hvalid model. - * If it's not a reference, save it in the cache. */ - if ((dew = dewarpaGetDewarp(dewa, i)) == NULL) { - L_ERROR("dew is null for page %d!\n", procName, i); - } else { - if (dew->hasref == 0) { /* not a ref model */ - dewa->dewarpcache[i] = dew; /* move it to the cache */ - dewa->dewarp[i] = NULL; /* must null the ptr */ - } - } - if (distdown <= distup) /* insert the hvalid ref model */ - dewarpaInsertDewarp(dewa, dewarpCreateRef(i, i - distdown)); - else - dewarpaInsertDewarp(dewa, dewarpCreateRef(i, i + distup)); - } - numaDestroy(&nah); - - dewa->modelsready = 1; /* validated */ - return 0; -} - - -/*! - * \brief dewarpaStripRefModels() - * - * \param[in] dewa populated with dewarp structs for pages - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This examines each dew in a dewarpa, and removes
- *          all that don't have their own page model (i.e., all
- *          that have "references" to nearby pages with valid models).
- *          These references were generated by dewarpaInsertRefModels(dewa).
- * 
- */ -l_ok -dewarpaStripRefModels(L_DEWARPA *dewa) -{ -l_int32 i; -L_DEWARP *dew; - - PROCNAME("dewarpaStripRefModels"); - - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - - for (i = 0; i <= dewa->maxpage; i++) { - if ((dew = dewarpaGetDewarp(dewa, i)) != NULL) { - if (dew->hasref) - dewarpDestroy(&dewa->dewarp[i]); - } - } - dewa->modelsready = 0; - - /* Regenerate the page lists */ - dewarpaListPages(dewa); - return 0; -} - - -/*! - * \brief dewarpaRestoreModels() - * - * \param[in] dewa populated with dewarp structs for pages - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This puts all real models (and only real models) in the
- *          primary dewarpa array.  First remove all dewarps that are
- *          only references to other page models.  Then move all models
- *          that had been cached back into the primary dewarp array.
- *      (2) After this is done, we still need to recompute and insert
- *          the reference models before dewa->modelsready is true.
- * 
- */ -l_ok -dewarpaRestoreModels(L_DEWARPA *dewa) -{ -l_int32 i; -L_DEWARP *dew; - - PROCNAME("dewarpaRestoreModels"); - - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - - /* Strip out ref models. Then only real models will be in the - * primary dewarp array. */ - dewarpaStripRefModels(dewa); - - /* The cache holds only real models, which are not necessarily valid. */ - for (i = 0; i <= dewa->maxpage; i++) { - if ((dew = dewa->dewarpcache[i]) != NULL) { - if (dewa->dewarp[i]) { - L_ERROR("dew in both cache and main array!: page %d\n", - procName, i); - } else { - dewa->dewarp[i] = dew; - dewa->dewarpcache[i] = NULL; - } - } - } - dewa->modelsready = 0; /* new ref models not yet inserted */ - - /* Regenerate the page lists */ - dewarpaListPages(dewa); - return 0; -} - - -/*----------------------------------------------------------------------* - * Dewarp debugging output * - *----------------------------------------------------------------------*/ -/*! - * \brief dewarpaInfo() - * - * \param[in] fp - * \param[in] dewa - * \return 0 if OK, 1 on error - */ -l_ok -dewarpaInfo(FILE *fp, - L_DEWARPA *dewa) -{ -l_int32 i, n, pageno, nnone, nvsuccess, nvvalid, nhsuccess, nhvalid, nref; -L_DEWARP *dew; - - PROCNAME("dewarpaInfo"); - - if (!fp) - return ERROR_INT("dewa not defined", procName, 1); - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - - fprintf(fp, "\nDewarpaInfo: %p\n", dewa); - fprintf(fp, "nalloc = %d, maxpage = %d\n", dewa->nalloc, dewa->maxpage); - fprintf(fp, "sampling = %d, redfactor = %d, minlines = %d\n", - dewa->sampling, dewa->redfactor, dewa->minlines); - fprintf(fp, "maxdist = %d, useboth = %d\n", - dewa->maxdist, dewa->useboth); - - dewarpaModelStats(dewa, &nnone, &nvsuccess, &nvvalid, - &nhsuccess, &nhvalid, &nref); - n = numaGetCount(dewa->napages); - lept_stderr("Total number of pages with a dew = %d\n", n); - lept_stderr("Number of pages without any models = %d\n", nnone); - lept_stderr("Number of pages with a vert model = %d\n", nvsuccess); - lept_stderr("Number of pages with a valid vert model = %d\n", nvvalid); - lept_stderr("Number of pages with both models = %d\n", nhsuccess); - lept_stderr("Number of pages with both models valid = %d\n", nhvalid); - lept_stderr("Number of pages with a ref model = %d\n", nref); - - for (i = 0; i < n; i++) { - numaGetIValue(dewa->napages, i, &pageno); - if ((dew = dewarpaGetDewarp(dewa, pageno)) == NULL) - continue; - lept_stderr("Page: %d\n", dew->pageno); - lept_stderr(" hasref = %d, refpage = %d\n", - dew->hasref, dew->refpage); - lept_stderr(" nlines = %d\n", dew->nlines); - lept_stderr(" w = %d, h = %d, nx = %d, ny = %d\n", - dew->w, dew->h, dew->nx, dew->ny); - if (dew->sampvdispar) - lept_stderr(" Vertical disparity builds:\n" - " (min,max,abs-diff) line curvature = (%d,%d,%d)\n", - dew->mincurv, dew->maxcurv, dew->maxcurv - dew->mincurv); - if (dew->samphdispar) - lept_stderr(" Horizontal disparity builds:\n" - " left edge slope = %d, right edge slope = %d\n" - " (left,right,abs-diff) edge curvature = (%d,%d,%d)\n", - dew->leftslope, dew->rightslope, dew->leftcurv, - dew->rightcurv, L_ABS(dew->leftcurv - dew->rightcurv)); - } - return 0; -} - - -/*! - * \brief dewarpaModelStats() - * - * \param[in] dewa - * \param[out] pnnone [optional] number without any model - * \param[out] pnvsuccess [optional] number with a vert model - * \param[out] pnvvalid [optional] number with a valid vert model - * \param[out] pnhsuccess [optional] number with both models - * \param[out] pnhvalid [optional] number with both models valid - * \param[out] pnref [optional] number with a reference model - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) A page without a model has no dew.  It most likely failed to
- *          generate a vertical model, and has not been assigned a ref
- *          model from a neighboring page with a valid vertical model.
- *      (2) A page has vsuccess == 1 if there is at least a model of the
- *          vertical disparity.  The model may be invalid, in which case
- *          dewarpaInsertRefModels() will stash it in the cache and
- *          attempt to replace it by a valid ref model.
- *      (3) A vvvalid model is a vertical disparity model whose parameters
- *          satisfy the constraints given in dewarpaSetValidModels().
- *      (4) A page has hsuccess == 1 if both the vertical and horizontal
- *          disparity arrays have been constructed.
- *      (5) An  hvalid model has vertical and horizontal disparity
- *          models whose parameters satisfy the constraints given
- *          in dewarpaSetValidModels().
- *      (6) A page has a ref model if it failed to generate a valid
- *          model but was assigned a vvalid or hvalid model on another
- *          page (within maxdist) by dewarpaInsertRefModel().
- *      (7) This calls dewarpaTestForValidModel(); it ignores the vvalid
- *          and hvalid fields.
- * 
- */ -l_ok -dewarpaModelStats(L_DEWARPA *dewa, - l_int32 *pnnone, - l_int32 *pnvsuccess, - l_int32 *pnvvalid, - l_int32 *pnhsuccess, - l_int32 *pnhvalid, - l_int32 *pnref) -{ -l_int32 i, n, pageno, nnone, nvsuccess, nvvalid, nhsuccess, nhvalid, nref; -L_DEWARP *dew; - - PROCNAME("dewarpaModelStats"); - - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - - dewarpaListPages(dewa); - n = numaGetCount(dewa->napages); - nnone = nref = nvsuccess = nvvalid = nhsuccess = nhvalid = 0; - for (i = 0; i < n; i++) { - numaGetIValue(dewa->napages, i, &pageno); - dew = dewarpaGetDewarp(dewa, pageno); - if (!dew) { - nnone++; - continue; - } - if (dew->hasref == 1) - nref++; - if (dew->vsuccess == 1) - nvsuccess++; - if (dew->hsuccess == 1) - nhsuccess++; - dewarpaTestForValidModel(dewa, dew, 0); - if (dew->vvalid == 1) - nvvalid++; - if (dew->hvalid == 1) - nhvalid++; - } - - if (pnnone) *pnnone = nnone; - if (pnref) *pnref = nref; - if (pnvsuccess) *pnvsuccess = nvsuccess; - if (pnvvalid) *pnvvalid = nvvalid; - if (pnhsuccess) *pnhsuccess = nhsuccess; - if (pnhvalid) *pnhvalid = nhvalid; - return 0; -} - - -/*! - * \brief dewarpaTestForValidModel() - * - * \param[in] dewa - * \param[in] dew - * \param[in] notests - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Computes validity of vertical (vvalid) model and both
- *          vertical and horizontal (hvalid) models.
- *      (2) If %notests == 1, this ignores the curvature constraints
- *          and assumes that all successfully built models are valid.
- *      (3) This is just about the models, not the rendering process,
- *          so the value of useboth is not considered here.
- * 
- */ -static l_int32 -dewarpaTestForValidModel(L_DEWARPA *dewa, - L_DEWARP *dew, - l_int32 notests) -{ -l_int32 maxcurv, diffcurv, diffedge; - - PROCNAME("dewarpaTestForValidModel"); - - if (!dewa || !dew) - return ERROR_INT("dewa and dew not both defined", procName, 1); - - if (notests) { - dew->vvalid = dew->vsuccess; - dew->hvalid = dew->hsuccess; - return 0; - } - - /* No actual model was built */ - if (dew->vsuccess == 0) return 0; - - /* Was previously found not to have a valid model */ - if (dew->hasref == 1) return 0; - - /* vsuccess == 1; a vertical (line) model exists. - * First test that the vertical curvatures are within allowed - * bounds. Note that all curvatures are signed.*/ - maxcurv = L_MAX(L_ABS(dew->mincurv), L_ABS(dew->maxcurv)); - diffcurv = dew->maxcurv - dew->mincurv; - if (maxcurv <= dewa->max_linecurv && - diffcurv >= dewa->min_diff_linecurv && - diffcurv <= dewa->max_diff_linecurv) { - dew->vvalid = 1; - } else { - L_INFO("invalid vert model for page %d:\n", procName, dew->pageno); -#if DEBUG_INVALID_MODELS - lept_stderr(" max line curv = %d, max allowed = %d\n", - maxcurv, dewa->max_linecurv); - lept_stderr(" diff line curv = %d, max allowed = %d\n", - diffcurv, dewa->max_diff_linecurv); -#endif /* DEBUG_INVALID_MODELS */ - } - - /* If a horizontal (edge) model exists, test for validity. */ - if (dew->hsuccess) { - diffedge = L_ABS(dew->leftcurv - dew->rightcurv); - if (L_ABS(dew->leftslope) <= dewa->max_edgeslope && - L_ABS(dew->rightslope) <= dewa->max_edgeslope && - L_ABS(dew->leftcurv) <= dewa->max_edgecurv && - L_ABS(dew->rightcurv) <= dewa->max_edgecurv && - diffedge <= dewa->max_diff_edgecurv) { - dew->hvalid = 1; - } else { - L_INFO("invalid horiz model for page %d:\n", procName, dew->pageno); -#if DEBUG_INVALID_MODELS - lept_stderr(" left edge slope = %d, max allowed = %d\n", - dew->leftslope, dewa->max_edgeslope); - lept_stderr(" right edge slope = %d, max allowed = %d\n", - dew->rightslope, dewa->max_edgeslope); - lept_stderr(" left edge curv = %d, max allowed = %d\n", - dew->leftcurv, dewa->max_edgecurv); - lept_stderr(" right edge curv = %d, max allowed = %d\n", - dew->rightcurv, dewa->max_edgecurv); - lept_stderr(" diff edge curv = %d, max allowed = %d\n", - diffedge, dewa->max_diff_edgecurv); -#endif /* DEBUG_INVALID_MODELS */ - } - } - - return 0; -} - - -/*! - * \brief dewarpaShowArrays() - * - * \param[in] dewa - * \param[in] scalefact on contour images; typ. 0.5 - * \param[in] first first page model to render - * \param[in] last last page model to render; use 0 to go to end - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Generates a pdf of contour plots of the disparity arrays.
- *      (2) This only shows actual models; not ref models
- * 
- */ -l_ok -dewarpaShowArrays(L_DEWARPA *dewa, - l_float32 scalefact, - l_int32 first, - l_int32 last) -{ -char buf[256]; -l_int32 i, svd, shd; -L_BMF *bmf; -L_DEWARP *dew; -PIX *pixv, *pixvs, *pixh, *pixhs, *pixt, *pixd; -PIXA *pixa; - - PROCNAME("dewarpaShowArrays"); - - if (!dewa) - return ERROR_INT("dew not defined", procName, 1); - if (first < 0 || first > dewa->maxpage) - return ERROR_INT("first out of bounds", procName, 1); - if (last <= 0 || last > dewa->maxpage) last = dewa->maxpage; - if (last < first) - return ERROR_INT("last < first", procName, 1); - - lept_rmdir("lept/dewarp1"); /* temp directory for contour plots */ - lept_mkdir("lept/dewarp1"); - if ((bmf = bmfCreate(NULL, 8)) == NULL) - L_ERROR("bmf not made; page info not displayed", procName); - - lept_stderr("Generating contour plots\n"); - for (i = first; i <= last; i++) { - if (i && ((i % 10) == 0)) - lept_stderr(" .. %d", i); - dew = dewarpaGetDewarp(dewa, i); - if (!dew) continue; - if (dew->hasref == 1) continue; - svd = shd = 0; - if (dew->sampvdispar) svd = 1; - if (dew->samphdispar) shd = 1; - if (!svd) { - L_ERROR("sampvdispar not made for page %d!\n", procName, i); - continue; - } - - /* Generate contour plots at reduced resolution */ - dewarpPopulateFullRes(dew, NULL, 0, 0); - pixv = fpixRenderContours(dew->fullvdispar, 3.0, 0.15); - pixvs = pixScaleBySampling(pixv, scalefact, scalefact); - pixDestroy(&pixv); - if (shd) { - pixh = fpixRenderContours(dew->fullhdispar, 3.0, 0.15); - pixhs = pixScaleBySampling(pixh, scalefact, scalefact); - pixDestroy(&pixh); - } - dewarpMinimize(dew); - - /* Save side-by-side */ - pixa = pixaCreate(2); - pixaAddPix(pixa, pixvs, L_INSERT); - if (shd) - pixaAddPix(pixa, pixhs, L_INSERT); - pixt = pixaDisplayTiledInRows(pixa, 32, 1500, 1.0, 0, 30, 2); - snprintf(buf, sizeof(buf), "Page %d", i); - pixd = pixAddSingleTextblock(pixt, bmf, buf, 0x0000ff00, - L_ADD_BELOW, NULL); - snprintf(buf, sizeof(buf), "/tmp/lept/dewarp1/arrays_%04d.png", i); - pixWriteDebug(buf, pixd, IFF_PNG); - pixaDestroy(&pixa); - pixDestroy(&pixt); - pixDestroy(&pixd); - } - bmfDestroy(&bmf); - lept_stderr("\n"); - - lept_stderr("Generating pdf of contour plots\n"); - convertFilesToPdf("/tmp/lept/dewarp1", "arrays_", 90, 1.0, L_FLATE_ENCODE, - 0, "Disparity arrays", "/tmp/lept/disparity_arrays.pdf"); - lept_stderr("Output written to: /tmp/lept/disparity_arrays.pdf\n"); - return 0; -} - - -/*! - * \brief dewarpDebug() - * - * \param[in] dew - * \param[in] subdirs one or more subdirectories of /tmp; e.g., "dew1" - * \param[in] index to help label output images; e.g., the page number - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Prints dewarp fields and generates disparity array contour images.
- *          The contour images are written to file:
- *                /tmp/[subdirs]/pixv_[index].png
- * 
- */ -l_ok -dewarpDebug(L_DEWARP *dew, - const char *subdirs, - l_int32 index) -{ -char fname[256]; -char *outdir; -l_int32 svd, shd; -PIX *pixv, *pixh; - - PROCNAME("dewarpDebug"); - - if (!dew) - return ERROR_INT("dew not defined", procName, 1); - if (!subdirs) - return ERROR_INT("subdirs not defined", procName, 1); - - lept_stderr("pageno = %d, hasref = %d, refpage = %d\n", - dew->pageno, dew->hasref, dew->refpage); - lept_stderr("sampling = %d, redfactor = %d, minlines = %d\n", - dew->sampling, dew->redfactor, dew->minlines); - svd = shd = 0; - if (!dew->hasref) { - if (dew->sampvdispar) svd = 1; - if (dew->samphdispar) shd = 1; - lept_stderr("sampv = %d, samph = %d\n", svd, shd); - lept_stderr("w = %d, h = %d\n", dew->w, dew->h); - lept_stderr("nx = %d, ny = %d\n", dew->nx, dew->ny); - lept_stderr("nlines = %d\n", dew->nlines); - if (svd) { - lept_stderr("(min,max,abs-diff) line curvature = (%d,%d,%d)\n", - dew->mincurv, dew->maxcurv, dew->maxcurv - dew->mincurv); - } - if (shd) { - lept_stderr("(left edge slope = %d, right edge slope = %d\n", - dew->leftslope, dew->rightslope); - lept_stderr("(left,right,abs-diff) edge curvature = " - "(%d,%d,%d)\n", dew->leftcurv, dew->rightcurv, - L_ABS(dew->leftcurv - dew->rightcurv)); - } - } - if (!svd && !shd) { - lept_stderr("No disparity arrays\n"); - return 0; - } - - dewarpPopulateFullRes(dew, NULL, 0, 0); - lept_mkdir(subdirs); - outdir = pathJoin("/tmp", subdirs); - if (svd) { - pixv = fpixRenderContours(dew->fullvdispar, 3.0, 0.15); - snprintf(fname, sizeof(fname), "%s/pixv_%d.png", outdir, index); - pixWriteDebug(fname, pixv, IFF_PNG); - pixDestroy(&pixv); - } - if (shd) { - pixh = fpixRenderContours(dew->fullhdispar, 3.0, 0.15); - snprintf(fname, sizeof(fname), "%s/pixh_%d.png", outdir, index); - pixWriteDebug(fname, pixh, IFF_PNG); - pixDestroy(&pixh); - } - LEPT_FREE(outdir); - return 0; -} - - -/*! - * \brief dewarpShowResults() - * - * \param[in] dewa - * \param[in] sa of indexed input images - * \param[in] boxa crop boxes for input images; can be null - * \param[in] firstpage - * \param[in] lastpage - * \param[in] pdfout filename - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates a pdf of image pairs (before, after) for
- *          the designated set of input pages.
- *      (2) If the boxa exists, its elements are aligned with numbers
- *          in the filenames in %sa.  It is used to crop the input images.
- *          It is assumed that the dewa was generated from the cropped
- *          images.  No undercropping is applied before rendering.
- * 
- */ -l_ok -dewarpShowResults(L_DEWARPA *dewa, - SARRAY *sa, - BOXA *boxa, - l_int32 firstpage, - l_int32 lastpage, - const char *pdfout) -{ -char bufstr[256]; -l_int32 i, modelpage; -L_BMF *bmf; -BOX *box; -L_DEWARP *dew; -PIX *pixs, *pixc, *pixd, *pixt1, *pixt2; -PIXA *pixa; - - PROCNAME("dewarpShowResults"); - - if (!dewa) - return ERROR_INT("dewa not defined", procName, 1); - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!pdfout) - return ERROR_INT("pdfout not defined", procName, 1); - if (firstpage > lastpage) - return ERROR_INT("invalid first/last page numbers", procName, 1); - - lept_rmdir("lept/dewarp_pdfout"); - lept_mkdir("lept/dewarp_pdfout"); - bmf = bmfCreate(NULL, 6); - - lept_stderr("Dewarping and generating s/by/s view\n"); - for (i = firstpage; i <= lastpage; i++) { - if (i && (i % 10 == 0)) lept_stderr(".. %d ", i); - pixs = pixReadIndexed(sa, i); - if (boxa) { - box = boxaGetBox(boxa, i, L_CLONE); - pixc = pixClipRectangle(pixs, box, NULL); - boxDestroy(&box); - } - else - pixc = pixClone(pixs); - dew = dewarpaGetDewarp(dewa, i); - pixd = NULL; - if (dew) { - dewarpaApplyDisparity(dewa, dew->pageno, pixc, - GrayInValue, 0, 0, &pixd, NULL); - dewarpMinimize(dew); - } - pixa = pixaCreate(2); - pixaAddPix(pixa, pixc, L_INSERT); - if (pixd) - pixaAddPix(pixa, pixd, L_INSERT); - pixt1 = pixaDisplayTiledAndScaled(pixa, 32, 500, 2, 0, 35, 2); - if (dew) { - modelpage = (dew->hasref) ? dew->refpage : dew->pageno; - snprintf(bufstr, sizeof(bufstr), "Page %d; using %d\n", - i, modelpage); - } - else - snprintf(bufstr, sizeof(bufstr), "Page %d; no dewarp\n", i); - pixt2 = pixAddSingleTextblock(pixt1, bmf, bufstr, 0x0000ff00, - L_ADD_BELOW, 0); - snprintf(bufstr, sizeof(bufstr), "/tmp/lept/dewarp_pdfout/%05d", i); - pixWriteDebug(bufstr, pixt2, IFF_JFIF_JPEG); - pixaDestroy(&pixa); - pixDestroy(&pixs); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - } - lept_stderr("\n"); - - lept_stderr("Generating pdf of result\n"); - convertFilesToPdf("/tmp/lept/dewarp_pdfout", NULL, 100, 1.0, L_JPEG_ENCODE, - 0, "Dewarp sequence", pdfout); - lept_stderr("Output written to: %s\n", pdfout); - bmfDestroy(&bmf); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dnabasic.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dnabasic.c deleted file mode 100644 index 9c9dba5f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dnabasic.c +++ /dev/null @@ -1,1685 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file dnabasic.c - *
- *
- *      Dna creation, destruction, copy, clone, etc.
- *          L_DNA       *l_dnaCreate()
- *          L_DNA       *l_dnaCreateFromIArray()
- *          L_DNA       *l_dnaCreateFromDArray()
- *          L_DNA       *l_dnaMakeSequence()
- *          void        *l_dnaDestroy()
- *          L_DNA       *l_dnaCopy()
- *          L_DNA       *l_dnaClone()
- *          l_int32      l_dnaEmpty()
- *
- *      Dna: add/remove number and extend array
- *          l_int32      l_dnaAddNumber()
- *          static l_int32  l_dnaExtendArray()
- *          l_int32      l_dnaInsertNumber()
- *          l_int32      l_dnaRemoveNumber()
- *          l_int32      l_dnaReplaceNumber()
- *
- *      Dna accessors
- *          l_int32      l_dnaGetCount()
- *          l_int32      l_dnaSetCount()
- *          l_int32      l_dnaGetIValue()
- *          l_int32      l_dnaGetDValue()
- *          l_int32      l_dnaSetValue()
- *          l_int32      l_dnaShiftValue()
- *          l_int32     *l_dnaGetIArray()
- *          l_float64   *l_dnaGetDArray()
- *          l_int32      l_dnaGetRefcount()
- *          l_int32      l_dnaChangeRefcount()
- *          l_int32      l_dnaGetParameters()
- *          l_int32      l_dnaSetParameters()
- *          l_int32      l_dnaCopyParameters()
- *
- *      Serialize Dna for I/O
- *          L_DNA       *l_dnaRead()
- *          L_DNA       *l_dnaReadStream()
- *          l_int32      l_dnaWrite()
- *          l_int32      l_dnaWriteStream()
- *
- *      Dnaa creation, destruction
- *          L_DNAA      *l_dnaaCreate()
- *          L_DNAA      *l_dnaaCreateFull()
- *          l_int32      l_dnaaTruncate()
- *          void        *l_dnaaDestroy()
- *
- *      Add Dna to Dnaa
- *          l_int32      l_dnaaAddDna()
- *          static l_int32  l_dnaaExtendArray()
- *
- *      Dnaa accessors
- *          l_int32      l_dnaaGetCount()
- *          l_int32      l_dnaaGetDnaCount()
- *          l_int32      l_dnaaGetNumberCount()
- *          L_DNA       *l_dnaaGetDna()
- *          L_DNA       *l_dnaaReplaceDna()
- *          l_int32      l_dnaaGetValue()
- *          l_int32      l_dnaaAddNumber()
- *
- *      Serialize Dnaa for I/O
- *          L_DNAA      *l_dnaaRead()
- *          L_DNAA      *l_dnaaReadStream()
- *          l_int32      l_dnaaWrite()
- *          l_int32      l_dnaaWriteStream()
- *
- *    (1) The Dna is a struct holding an array of doubles.  It can also
- *        be used to store l_int32 values, up to the full precision
- *        of int32.  Always use it whenever integers larger than a
- *        few million need to be stored.
- *
- *    (2) Always use the accessors in this file, never the fields directly.
- *
- *    (3) Storing and retrieving numbers:
- *
- *       * to append a new number to the array, use l_dnaAddNumber().  If
- *         the number is an int, it will will automatically be converted
- *         to l_float64 and stored.
- *
- *       * to reset a value stored in the array, use l_dnaSetValue().
- *
- *       * to increment or decrement a value stored in the array,
- *         use l_dnaShiftValue().
- *
- *       * to obtain a value from the array, use either l_dnaGetIValue()
- *         or l_dnaGetDValue(), depending on whether you are retrieving
- *         an integer or a float64.  This avoids doing an explicit cast,
- *         such as
- *           (a) return a l_float64 and cast it to an l_int32
- *           (b) cast the return directly to (l_float64 *) to
- *               satisfy the function prototype, as in
- *                 l_dnaGetDValue(da, index, (l_float64 *)&ival);   [ugly!]
- *
- *    (4) int <--> double conversions:
- *
- *        Conversions go automatically from l_int32 --> l_float64,
- *        without loss of precision.  You must cast (l_int32)
- *        to go from l_float64 --> l_int32 because you're truncating
- *        to the integer value.
- *
- *    (5) As with other arrays in leptonica, the l_dna has both an allocated
- *        size and a count of the stored numbers.  When you add a number, it
- *        goes on the end of the array, and causes a realloc if the array
- *        is already filled.  However, in situations where you want to
- *        add numbers randomly into an array, such as when you build a
- *        histogram, you must set the count of stored numbers in advance.
- *        This is done with l_dnaSetCount().  If you set a count larger
- *        than the allocated array, it does a realloc to the size requested.
- *
- *    (6) In situations where the data in a l_dna correspond to a function
- *        y(x), the values can be either at equal spacings in x or at
- *        arbitrary spacings.  For the former, we can represent all x values
- *        by two parameters: startx (corresponding to y[0]) and delx
- *        for the change in x for adjacent values y[i] and y[i+1].
- *        startx and delx are initialized to 0.0 and 1.0, rsp.
- *        For arbitrary spacings, we use a second l_dna, and the two
- *        l_dnas are typically denoted dnay and dnax.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - - /* Bounds on initial array size */ -static const l_uint32 MaxArraySize = 100000000; /* dna */ -static const l_uint32 MaxPtrArraySize = 10000; /* dnaa */ -static const l_int32 InitialArraySize = 50; /*!< n'importe quoi */ - - /* Static functions */ -static l_int32 l_dnaExtendArray(L_DNA *da); -static l_int32 l_dnaaExtendArray(L_DNAA *daa); - -/*--------------------------------------------------------------------------* - * Dna creation, destruction, copy, clone, etc. * - *--------------------------------------------------------------------------*/ -/*! - * \brief l_dnaCreate() - * - * \param[in] n size of number array to be alloc'd; 0 for default - * \return da, or NULL on error - */ -L_DNA * -l_dnaCreate(l_int32 n) -{ -L_DNA *da; - - PROCNAME("l_dnaCreate"); - - if (n <= 0 || n > MaxArraySize) - n = InitialArraySize; - - da = (L_DNA *)LEPT_CALLOC(1, sizeof(L_DNA)); - if ((da->array = (l_float64 *)LEPT_CALLOC(n, sizeof(l_float64))) == NULL) { - l_dnaDestroy(&da); - return (L_DNA *)ERROR_PTR("double array not made", procName, NULL); - } - - da->nalloc = n; - da->n = 0; - da->refcount = 1; - da->startx = 0.0; - da->delx = 1.0; - - return da; -} - - -/*! - * \brief l_dnaCreateFromIArray() - * - * \param[in] iarray integer array - * \param[in] size of the array - * \return da, or NULL on error - * - *
- * Notes:
- *      (1) We can't insert this int array into the l_dna, because a l_dna
- *          takes a double array.  So this just copies the data from the
- *          input array into the l_dna.  The input array continues to be
- *          owned by the caller.
- * 
- */ -L_DNA * -l_dnaCreateFromIArray(l_int32 *iarray, - l_int32 size) -{ -l_int32 i; -L_DNA *da; - - PROCNAME("l_dnaCreateFromIArray"); - - if (!iarray) - return (L_DNA *)ERROR_PTR("iarray not defined", procName, NULL); - if (size <= 0) - return (L_DNA *)ERROR_PTR("size must be > 0", procName, NULL); - - da = l_dnaCreate(size); - for (i = 0; i < size; i++) - l_dnaAddNumber(da, iarray[i]); - - return da; -} - - -/*! - * \brief l_dnaCreateFromDArray() - * - * \param[in] darray float - * \param[in] size of the array - * \param[in] copyflag L_INSERT or L_COPY - * \return da, or NULL on error - * - *
- * Notes:
- *      (1) With L_INSERT, ownership of the input array is transferred
- *          to the returned l_dna, and all %size elements are considered
- *          to be valid.
- * 
- */ -L_DNA * -l_dnaCreateFromDArray(l_float64 *darray, - l_int32 size, - l_int32 copyflag) -{ -l_int32 i; -L_DNA *da; - - PROCNAME("l_dnaCreateFromDArray"); - - if (!darray) - return (L_DNA *)ERROR_PTR("darray not defined", procName, NULL); - if (size <= 0) - return (L_DNA *)ERROR_PTR("size must be > 0", procName, NULL); - if (copyflag != L_INSERT && copyflag != L_COPY) - return (L_DNA *)ERROR_PTR("invalid copyflag", procName, NULL); - - da = l_dnaCreate(size); - if (copyflag == L_INSERT) { - if (da->array) LEPT_FREE(da->array); - da->array = darray; - da->n = size; - } else { /* just copy the contents */ - for (i = 0; i < size; i++) - l_dnaAddNumber(da, darray[i]); - } - - return da; -} - - -/*! - * \brief l_dnaMakeSequence() - * - * \param[in] startval - * \param[in] increment - * \param[in] size of sequence - * \return l_dna of sequence of evenly spaced values, or NULL on error - */ -L_DNA * -l_dnaMakeSequence(l_float64 startval, - l_float64 increment, - l_int32 size) -{ -l_int32 i; -l_float64 val; -L_DNA *da; - - PROCNAME("l_dnaMakeSequence"); - - if ((da = l_dnaCreate(size)) == NULL) - return (L_DNA *)ERROR_PTR("da not made", procName, NULL); - - for (i = 0; i < size; i++) { - val = startval + i * increment; - l_dnaAddNumber(da, val); - } - - return da; -} - - -/*! - * \brief l_dnaDestroy() - * - * \param[in,out] pda will be set to null before returning - * \return void - * - *
- * Notes:
- *      (1) Decrements the ref count and, if 0, destroys the l_dna.
- *      (2) Always nulls the input ptr.
- * 
- */ -void -l_dnaDestroy(L_DNA **pda) -{ -L_DNA *da; - - PROCNAME("l_dnaDestroy"); - - if (pda == NULL) { - L_WARNING("ptr address is NULL\n", procName); - return; - } - - if ((da = *pda) == NULL) - return; - - /* Decrement the ref count. If it is 0, destroy the l_dna. */ - l_dnaChangeRefcount(da, -1); - if (l_dnaGetRefcount(da) <= 0) { - if (da->array) - LEPT_FREE(da->array); - LEPT_FREE(da); - } - - *pda = NULL; - return; -} - - -/*! - * \brief l_dnaCopy() - * - * \param[in] da - * \return copy of da, or NULL on error - * - *
- * Notes:
- *      (1) This removes unused ptrs above da->n.
- * 
- */ -L_DNA * -l_dnaCopy(L_DNA *da) -{ -l_int32 i; -L_DNA *dac; - - PROCNAME("l_dnaCopy"); - - if (!da) - return (L_DNA *)ERROR_PTR("da not defined", procName, NULL); - - if ((dac = l_dnaCreate(da->n)) == NULL) - return (L_DNA *)ERROR_PTR("dac not made", procName, NULL); - dac->startx = da->startx; - dac->delx = da->delx; - - for (i = 0; i < da->n; i++) - l_dnaAddNumber(dac, da->array[i]); - - return dac; -} - - -/*! - * \brief l_dnaClone() - * - * \param[in] da - * \return ptr to same da, or NULL on error - */ -L_DNA * -l_dnaClone(L_DNA *da) -{ - PROCNAME("l_dnaClone"); - - if (!da) - return (L_DNA *)ERROR_PTR("da not defined", procName, NULL); - - l_dnaChangeRefcount(da, 1); - return da; -} - - -/*! - * \brief l_dnaEmpty() - * - * \param[in] da - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This does not change the allocation of the array.
- *          It just clears the number of stored numbers, so that
- *          the array appears to be empty.
- * 
- */ -l_ok -l_dnaEmpty(L_DNA *da) -{ - PROCNAME("l_dnaEmpty"); - - if (!da) - return ERROR_INT("da not defined", procName, 1); - - da->n = 0; - return 0; -} - - - -/*--------------------------------------------------------------------------* - * Dna: add/remove number and extend array * - *--------------------------------------------------------------------------*/ -/*! - * \brief l_dnaAddNumber() - * - * \param[in] da - * \param[in] val float or int to be added; stored as a float - * \return 0 if OK, 1 on error - */ -l_ok -l_dnaAddNumber(L_DNA *da, - l_float64 val) -{ -l_int32 n; - - PROCNAME("l_dnaAddNumber"); - - if (!da) - return ERROR_INT("da not defined", procName, 1); - - n = l_dnaGetCount(da); - if (n >= da->nalloc) - l_dnaExtendArray(da); - da->array[n] = val; - da->n++; - return 0; -} - - -/*! - * \brief l_dnaExtendArray() - * - * \param[in] da - * \return 0 if OK, 1 on error - */ -static l_int32 -l_dnaExtendArray(L_DNA *da) -{ - PROCNAME("l_dnaExtendArray"); - - if (!da) - return ERROR_INT("da not defined", procName, 1); - - if ((da->array = (l_float64 *)reallocNew((void **)&da->array, - sizeof(l_float64) * da->nalloc, - 2 * sizeof(l_float64) * da->nalloc)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - - da->nalloc *= 2; - return 0; -} - - -/*! - * \brief l_dnaInsertNumber() - * - * \param[in] da - * \param[in] index location in da to insert new value - * \param[in] val float64 or integer to be added - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This shifts da[i] --> da[i + 1] for all i >= %index,
- *          and then inserts %val as da[%index].
- *      (2) It should not be used repeatedly on large arrays,
- *          because the function is O(n).
- *
- * 
- */ -l_ok -l_dnaInsertNumber(L_DNA *da, - l_int32 index, - l_float64 val) -{ -l_int32 i, n; - - PROCNAME("l_dnaInsertNumber"); - - if (!da) - return ERROR_INT("da not defined", procName, 1); - n = l_dnaGetCount(da); - if (index < 0 || index > n) - return ERROR_INT("index not in {0...n}", procName, 1); - - if (n >= da->nalloc) - l_dnaExtendArray(da); - for (i = n; i > index; i--) - da->array[i] = da->array[i - 1]; - da->array[index] = val; - da->n++; - return 0; -} - - -/*! - * \brief l_dnaRemoveNumber() - * - * \param[in] da - * \param[in] index element to be removed - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This shifts da[i] --> da[i - 1] for all i > %index.
- *      (2) It should not be used repeatedly on large arrays,
- *          because the function is O(n).
- * 
- */ -l_ok -l_dnaRemoveNumber(L_DNA *da, - l_int32 index) -{ -l_int32 i, n; - - PROCNAME("l_dnaRemoveNumber"); - - if (!da) - return ERROR_INT("da not defined", procName, 1); - n = l_dnaGetCount(da); - if (index < 0 || index >= n) - return ERROR_INT("index not in {0...n - 1}", procName, 1); - - for (i = index + 1; i < n; i++) - da->array[i - 1] = da->array[i]; - da->n--; - return 0; -} - - -/*! - * \brief l_dnaReplaceNumber() - * - * \param[in] da - * \param[in] index element to be replaced - * \param[in] val new value to replace old one - * \return 0 if OK, 1 on error - */ -l_ok -l_dnaReplaceNumber(L_DNA *da, - l_int32 index, - l_float64 val) -{ -l_int32 n; - - PROCNAME("l_dnaReplaceNumber"); - - if (!da) - return ERROR_INT("da not defined", procName, 1); - n = l_dnaGetCount(da); - if (index < 0 || index >= n) - return ERROR_INT("index not in {0...n - 1}", procName, 1); - - da->array[index] = val; - return 0; -} - - -/*----------------------------------------------------------------------* - * Dna accessors * - *----------------------------------------------------------------------*/ -/*! - * \brief l_dnaGetCount() - * - * \param[in] da - * \return count, or 0 if no numbers or on error - */ -l_int32 -l_dnaGetCount(L_DNA *da) -{ - PROCNAME("l_dnaGetCount"); - - if (!da) - return ERROR_INT("da not defined", procName, 0); - return da->n; -} - - -/*! - * \brief l_dnaSetCount() - * - * \param[in] da - * \param[in] newcount - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %newcount <= da->nalloc, this resets da->n.
- *          Using %newcount = 0 is equivalent to l_dnaEmpty().
- *      (2) If %newcount > da->nalloc, this causes a realloc
- *          to a size da->nalloc = %newcount.
- *      (3) All the previously unused values in da are set to 0.0.
- * 
- */ -l_ok -l_dnaSetCount(L_DNA *da, - l_int32 newcount) -{ - PROCNAME("l_dnaSetCount"); - - if (!da) - return ERROR_INT("da not defined", procName, 1); - if (newcount > da->nalloc) { - if ((da->array = (l_float64 *)reallocNew((void **)&da->array, - sizeof(l_float64) * da->nalloc, - sizeof(l_float64) * newcount)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - da->nalloc = newcount; - } - da->n = newcount; - return 0; -} - - -/*! - * \brief l_dnaGetDValue() - * - * \param[in] da - * \param[in] index into l_dna - * \param[out] pval double value; 0.0 on error - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Caller may need to check the function return value to
- *          decide if a 0.0 in the returned ival is valid.
- * 
- */ -l_ok -l_dnaGetDValue(L_DNA *da, - l_int32 index, - l_float64 *pval) -{ - PROCNAME("l_dnaGetDValue"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0.0; - if (!da) - return ERROR_INT("da not defined", procName, 1); - - if (index < 0 || index >= da->n) - return ERROR_INT("index not valid", procName, 1); - - *pval = da->array[index]; - return 0; -} - - -/*! - * \brief l_dnaGetIValue() - * - * \param[in] da - * \param[in] index into l_dna - * \param[out] pival integer value; 0 on error - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Caller may need to check the function return value to
- *          decide if a 0 in the returned ival is valid.
- * 
- */ -l_ok -l_dnaGetIValue(L_DNA *da, - l_int32 index, - l_int32 *pival) -{ -l_float64 val; - - PROCNAME("l_dnaGetIValue"); - - if (!pival) - return ERROR_INT("&ival not defined", procName, 1); - *pival = 0; - if (!da) - return ERROR_INT("da not defined", procName, 1); - - if (index < 0 || index >= da->n) - return ERROR_INT("index not valid", procName, 1); - - val = da->array[index]; - *pival = (l_int32)(val + L_SIGN(val) * 0.5); - return 0; -} - - -/*! - * \brief l_dnaSetValue() - * - * \param[in] da - * \param[in] index to element to be set - * \param[in] val to set element - * \return 0 if OK; 1 on error - */ -l_ok -l_dnaSetValue(L_DNA *da, - l_int32 index, - l_float64 val) -{ - PROCNAME("l_dnaSetValue"); - - if (!da) - return ERROR_INT("da not defined", procName, 1); - if (index < 0 || index >= da->n) - return ERROR_INT("index not valid", procName, 1); - - da->array[index] = val; - return 0; -} - - -/*! - * \brief l_dnaShiftValue() - * - * \param[in] da - * \param[in] index to element to change relative to the current value - * \param[in] diff increment if diff > 0 or decrement if diff < 0 - * \return 0 if OK; 1 on error - */ -l_ok -l_dnaShiftValue(L_DNA *da, - l_int32 index, - l_float64 diff) -{ - PROCNAME("l_dnaShiftValue"); - - if (!da) - return ERROR_INT("da not defined", procName, 1); - if (index < 0 || index >= da->n) - return ERROR_INT("index not valid", procName, 1); - - da->array[index] += diff; - return 0; -} - - -/*! - * \brief l_dnaGetIArray() - * - * \param[in] da - * \return a copy of the bare internal array, integerized - * by rounding, or NULL on error - *
- * Notes:
- *      (1) A copy of the array is made, because we need to
- *          generate an integer array from the bare double array.
- *          The caller is responsible for freeing the array.
- *      (2) The array size is determined by the number of stored numbers,
- *          not by the size of the allocated array in the l_dna.
- *      (3) This function is provided to simplify calculations
- *          using the bare internal array, rather than continually
- *          calling accessors on the l_dna.  It is typically used
- *          on an array of size 256.
- * 
- */ -l_int32 * -l_dnaGetIArray(L_DNA *da) -{ -l_int32 i, n, ival; -l_int32 *array; - - PROCNAME("l_dnaGetIArray"); - - if (!da) - return (l_int32 *)ERROR_PTR("da not defined", procName, NULL); - - n = l_dnaGetCount(da); - if ((array = (l_int32 *)LEPT_CALLOC(n, sizeof(l_int32))) == NULL) - return (l_int32 *)ERROR_PTR("array not made", procName, NULL); - for (i = 0; i < n; i++) { - l_dnaGetIValue(da, i, &ival); - array[i] = ival; - } - - return array; -} - - -/*! - * \brief l_dnaGetDArray() - * - * \param[in] da - * \param[in] copyflag L_NOCOPY or L_COPY - * \return either the bare internal array or a copy of it, or NULL on error - * - *
- * Notes:
- *      (1) If %copyflag == L_COPY, it makes a copy which the caller
- *          is responsible for freeing.  Otherwise, it operates
- *          directly on the bare array of the l_dna.
- *      (2) Very important: for L_NOCOPY, any writes to the array
- *          will be in the l_dna.  Do not write beyond the size of
- *          the count field, because it will not be accessible
- *          from the l_dna!  If necessary, be sure to set the count
- *          field to a larger number (such as the alloc size)
- *          BEFORE calling this function.  Creating with l_dnaMakeConstant()
- *          is another way to insure full initialization.
- * 
- */ -l_float64 * -l_dnaGetDArray(L_DNA *da, - l_int32 copyflag) -{ -l_int32 i, n; -l_float64 *array; - - PROCNAME("l_dnaGetDArray"); - - if (!da) - return (l_float64 *)ERROR_PTR("da not defined", procName, NULL); - - if (copyflag == L_NOCOPY) { - array = da->array; - } else { /* copyflag == L_COPY */ - n = l_dnaGetCount(da); - if ((array = (l_float64 *)LEPT_CALLOC(n, sizeof(l_float64))) == NULL) - return (l_float64 *)ERROR_PTR("array not made", procName, NULL); - for (i = 0; i < n; i++) - array[i] = da->array[i]; - } - - return array; -} - - -/*! - * \brief l_dnaGetRefCount() - * - * \param[in] da - * \return refcount, or UNDEF on error - */ -l_int32 -l_dnaGetRefcount(L_DNA *da) -{ - PROCNAME("l_dnaGetRefcount"); - - if (!da) - return ERROR_INT("da not defined", procName, UNDEF); - return da->refcount; -} - - -/*! - * \brief l_dnaChangeRefCount() - * - * \param[in] da - * \param[in] delta change to be applied - * \return 0 if OK, 1 on error - */ -l_ok -l_dnaChangeRefcount(L_DNA *da, - l_int32 delta) -{ - PROCNAME("l_dnaChangeRefcount"); - - if (!da) - return ERROR_INT("da not defined", procName, 1); - da->refcount += delta; - return 0; -} - - -/*! - * \brief l_dnaGetParameters() - * - * \param[in] da - * \param[out] pstartx [optional] startx - * \param[out] pdelx [optional] delx - * \return 0 if OK, 1 on error - */ -l_ok -l_dnaGetParameters(L_DNA *da, - l_float64 *pstartx, - l_float64 *pdelx) -{ - PROCNAME("l_dnaGetParameters"); - - if (pstartx) *pstartx = 0.0; - if (pdelx) *pdelx = 1.0; - if (!pstartx && !pdelx) - return ERROR_INT("neither &startx nor &delx are defined", procName, 1); - if (!da) - return ERROR_INT("da not defined", procName, 1); - - if (pstartx) *pstartx = da->startx; - if (pdelx) *pdelx = da->delx; - return 0; -} - - -/*! - * \brief l_dnaSetParameters() - * - * \param[in] da - * \param[in] startx x value corresponding to da[0] - * \param[in] delx difference in x values for the situation where the - * elements of da correspond to the evaulation of a - * function at equal intervals of size %delx - * \return 0 if OK, 1 on error - */ -l_ok -l_dnaSetParameters(L_DNA *da, - l_float64 startx, - l_float64 delx) -{ - PROCNAME("l_dnaSetParameters"); - - if (!da) - return ERROR_INT("da not defined", procName, 1); - - da->startx = startx; - da->delx = delx; - return 0; -} - - -/*! - * \brief l_dnaCopyParameters() - * - * \param[in] dad destination DNuma - * \param[in] das source DNuma - * \return 0 if OK, 1 on error - */ -l_ok -l_dnaCopyParameters(L_DNA *dad, - L_DNA *das) -{ -l_float64 start, binsize; - - PROCNAME("l_dnaCopyParameters"); - - if (!das || !dad) - return ERROR_INT("das and dad not both defined", procName, 1); - - l_dnaGetParameters(das, &start, &binsize); - l_dnaSetParameters(dad, start, binsize); - return 0; -} - - -/*----------------------------------------------------------------------* - * Serialize Dna for I/O * - *----------------------------------------------------------------------*/ -/*! - * \brief l_dnaRead() - * - * \param[in] filename - * \return da, or NULL on error - */ -L_DNA * -l_dnaRead(const char *filename) -{ -FILE *fp; -L_DNA *da; - - PROCNAME("l_dnaRead"); - - if (!filename) - return (L_DNA *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (L_DNA *)ERROR_PTR("stream not opened", procName, NULL); - da = l_dnaReadStream(fp); - fclose(fp); - if (!da) - return (L_DNA *)ERROR_PTR("da not read", procName, NULL); - return da; -} - - -/*! - * \brief l_dnaReadStream() - * - * \param[in] fp file stream - * \return da, or NULL on error - * - *
- * Notes:
- *      (1) fscanf takes %lf to read a double; fprintf takes %f to write it.
- * 
- */ -L_DNA * -l_dnaReadStream(FILE *fp) -{ -l_int32 i, n, index, ret, version; -l_float64 val, startx, delx; -L_DNA *da; - - PROCNAME("l_dnaReadStream"); - - if (!fp) - return (L_DNA *)ERROR_PTR("stream not defined", procName, NULL); - - ret = fscanf(fp, "\nL_Dna Version %d\n", &version); - if (ret != 1) - return (L_DNA *)ERROR_PTR("not a l_dna file", procName, NULL); - if (version != DNA_VERSION_NUMBER) - return (L_DNA *)ERROR_PTR("invalid l_dna version", procName, NULL); - if (fscanf(fp, "Number of numbers = %d\n", &n) != 1) - return (L_DNA *)ERROR_PTR("invalid number of numbers", procName, NULL); - - if (n > MaxArraySize) { - L_ERROR("n = %d > %d\n", procName, n, MaxArraySize); - return NULL; - } - if ((da = l_dnaCreate(n)) == NULL) - return (L_DNA *)ERROR_PTR("da not made", procName, NULL); - for (i = 0; i < n; i++) { - if (fscanf(fp, " [%d] = %lf\n", &index, &val) != 2) { - l_dnaDestroy(&da); - return (L_DNA *)ERROR_PTR("bad input data", procName, NULL); - } - l_dnaAddNumber(da, val); - } - - /* Optional data */ - if (fscanf(fp, "startx = %lf, delx = %lf\n", &startx, &delx) == 2) - l_dnaSetParameters(da, startx, delx); - return da; -} - - -/*! - * \brief l_dnaWrite() - * - * \param[in] filename - * \param[in] da - * \return 0 if OK, 1 on error - */ -l_ok -l_dnaWrite(const char *filename, - L_DNA *da) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("l_dnaWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!da) - return ERROR_INT("da not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "w")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = l_dnaWriteStream(fp, da); - fclose(fp); - if (ret) - return ERROR_INT("da not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief l_dnaWriteStream() - * - * \param[in] fp file stream - * \param[in] da - * \return 0 if OK, 1 on error - */ -l_ok -l_dnaWriteStream(FILE *fp, - L_DNA *da) -{ -l_int32 i, n; -l_float64 startx, delx; - - PROCNAME("l_dnaWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!da) - return ERROR_INT("da not defined", procName, 1); - - n = l_dnaGetCount(da); - fprintf(fp, "\nL_Dna Version %d\n", DNA_VERSION_NUMBER); - fprintf(fp, "Number of numbers = %d\n", n); - for (i = 0; i < n; i++) - fprintf(fp, " [%d] = %f\n", i, da->array[i]); - fprintf(fp, "\n"); - - /* Optional data */ - l_dnaGetParameters(da, &startx, &delx); - if (startx != 0.0 || delx != 1.0) - fprintf(fp, "startx = %f, delx = %f\n", startx, delx); - - return 0; -} - - -/*--------------------------------------------------------------------------* - * Dnaa creation, destruction * - *--------------------------------------------------------------------------*/ -/*! - * \brief l_dnaaCreate() - * - * \param[in] n size of l_dna ptr array to be alloc'd 0 for default - * \return daa, or NULL on error - * - */ -L_DNAA * -l_dnaaCreate(l_int32 n) -{ -L_DNAA *daa; - - PROCNAME("l_dnaaCreate"); - - if (n <= 0 || n > MaxPtrArraySize) - n = InitialArraySize; - - daa = (L_DNAA *)LEPT_CALLOC(1, sizeof(L_DNAA)); - if ((daa->dna = (L_DNA **)LEPT_CALLOC(n, sizeof(L_DNA *))) == NULL) { - l_dnaaDestroy(&daa); - return (L_DNAA *)ERROR_PTR("l_dna ptr array not made", procName, NULL); - } - daa->nalloc = n; - daa->n = 0; - return daa; -} - - -/*! - * \brief l_dnaaCreateFull() - * - * \param[in] nptr size of dna ptr array to be alloc'd - * \param[in] n size of individual dna arrays to be alloc'd 0 for default - * \return daa, or NULL on error - * - *
- * Notes:
- *      (1) This allocates a dnaa and fills the array with allocated dnas.
- *          In use, after calling this function, use
- *              l_dnaaAddNumber(dnaa, index, val);
- *          to add val to the index-th dna in dnaa.
- * 
- */ -L_DNAA * -l_dnaaCreateFull(l_int32 nptr, - l_int32 n) -{ -l_int32 i; -L_DNAA *daa; -L_DNA *da; - - daa = l_dnaaCreate(nptr); - for (i = 0; i < nptr; i++) { - da = l_dnaCreate(n); - l_dnaaAddDna(daa, da, L_INSERT); - } - - return daa; -} - - -/*! - * \brief l_dnaaTruncate() - * - * \param[in] daa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This identifies the largest index containing a dna that
- *          has any numbers within it, destroys all dna beyond that
- *          index, and resets the count.
- * 
- */ -l_ok -l_dnaaTruncate(L_DNAA *daa) -{ -l_int32 i, n, nn; -L_DNA *da; - - PROCNAME("l_dnaaTruncate"); - - if (!daa) - return ERROR_INT("daa not defined", procName, 1); - - n = l_dnaaGetCount(daa); - for (i = n - 1; i >= 0; i--) { - da = l_dnaaGetDna(daa, i, L_CLONE); - if (!da) - continue; - nn = l_dnaGetCount(da); - l_dnaDestroy(&da); /* the clone */ - if (nn == 0) - l_dnaDestroy(&daa->dna[i]); - else - break; - } - daa->n = i + 1; - return 0; -} - - -/*! - * \brief l_dnaaDestroy() - * - * \param[in,out] pdaa will be set to null before returning - * \return void - */ -void -l_dnaaDestroy(L_DNAA **pdaa) -{ -l_int32 i; -L_DNAA *daa; - - PROCNAME("l_dnaaDestroy"); - - if (pdaa == NULL) { - L_WARNING("ptr address is NULL!\n", procName); - return; - } - - if ((daa = *pdaa) == NULL) - return; - - for (i = 0; i < daa->n; i++) - l_dnaDestroy(&daa->dna[i]); - LEPT_FREE(daa->dna); - LEPT_FREE(daa); - *pdaa = NULL; - - return; -} - - -/*--------------------------------------------------------------------------* - * Add Dna to Dnaa * - *--------------------------------------------------------------------------*/ -/*! - * \brief l_dnaaAddDna() - * - * \param[in] daa - * \param[in] da to be added - * \param[in] copyflag L_INSERT, L_COPY, L_CLONE - * \return 0 if OK, 1 on error - */ -l_ok -l_dnaaAddDna(L_DNAA *daa, - L_DNA *da, - l_int32 copyflag) -{ -l_int32 n; -L_DNA *dac; - - PROCNAME("l_dnaaAddDna"); - - if (!daa) - return ERROR_INT("daa not defined", procName, 1); - if (!da) - return ERROR_INT("da not defined", procName, 1); - - if (copyflag == L_INSERT) { - dac = da; - } else if (copyflag == L_COPY) { - if ((dac = l_dnaCopy(da)) == NULL) - return ERROR_INT("dac not made", procName, 1); - } else if (copyflag == L_CLONE) { - dac = l_dnaClone(da); - } else { - return ERROR_INT("invalid copyflag", procName, 1); - } - - n = l_dnaaGetCount(daa); - if (n >= daa->nalloc) - l_dnaaExtendArray(daa); - daa->dna[n] = dac; - daa->n++; - return 0; -} - - -/*! - * \brief l_dnaaExtendArray() - * - * \param[in] daa - * \return 0 if OK, 1 on error - */ -static l_int32 -l_dnaaExtendArray(L_DNAA *daa) -{ - PROCNAME("l_dnaaExtendArray"); - - if (!daa) - return ERROR_INT("daa not defined", procName, 1); - - if ((daa->dna = (L_DNA **)reallocNew((void **)&daa->dna, - sizeof(L_DNA *) * daa->nalloc, - 2 * sizeof(L_DNA *) * daa->nalloc)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - - daa->nalloc *= 2; - return 0; -} - - -/*----------------------------------------------------------------------* - * DNumaa accessors * - *----------------------------------------------------------------------*/ -/*! - * \brief l_dnaaGetCount() - * - * \param[in] daa - * \return count number of l_dna, or 0 if no l_dna or on error - */ -l_int32 -l_dnaaGetCount(L_DNAA *daa) -{ - PROCNAME("l_dnaaGetCount"); - - if (!daa) - return ERROR_INT("daa not defined", procName, 0); - return daa->n; -} - - -/*! - * \brief l_dnaaGetDnaCount() - * - * \param[in] daa - * \param[in] index of l_dna in daa - * \return count of numbers in the referenced l_dna, or 0 on error. - */ -l_int32 -l_dnaaGetDnaCount(L_DNAA *daa, - l_int32 index) -{ - PROCNAME("l_dnaaGetDnaCount"); - - if (!daa) - return ERROR_INT("daa not defined", procName, 0); - if (index < 0 || index >= daa->n) - return ERROR_INT("invalid index into daa", procName, 0); - return l_dnaGetCount(daa->dna[index]); -} - - -/*! - * \brief l_dnaaGetNumberCount() - * - * \param[in] daa - * \return count total number of numbers in the l_dnaa, - * or 0 if no numbers or on error - */ -l_int32 -l_dnaaGetNumberCount(L_DNAA *daa) -{ -L_DNA *da; -l_int32 n, sum, i; - - PROCNAME("l_dnaaGetNumberCount"); - - if (!daa) - return ERROR_INT("daa not defined", procName, 0); - - n = l_dnaaGetCount(daa); - for (sum = 0, i = 0; i < n; i++) { - da = l_dnaaGetDna(daa, i, L_CLONE); - sum += l_dnaGetCount(da); - l_dnaDestroy(&da); - } - - return sum; -} - - -/*! - * \brief l_dnaaGetDna() - * - * \param[in] daa - * \param[in] index to the index-th l_dna - * \param[in] accessflag L_COPY or L_CLONE - * \return l_dna, or NULL on error - */ -L_DNA * -l_dnaaGetDna(L_DNAA *daa, - l_int32 index, - l_int32 accessflag) -{ - PROCNAME("l_dnaaGetDna"); - - if (!daa) - return (L_DNA *)ERROR_PTR("daa not defined", procName, NULL); - if (index < 0 || index >= daa->n) - return (L_DNA *)ERROR_PTR("index not valid", procName, NULL); - - if (accessflag == L_COPY) - return l_dnaCopy(daa->dna[index]); - else if (accessflag == L_CLONE) - return l_dnaClone(daa->dna[index]); - else - return (L_DNA *)ERROR_PTR("invalid accessflag", procName, NULL); -} - - -/*! - * \brief l_dnaaReplaceDna() - * - * \param[in] daa - * \param[in] index to the index-th l_dna - * \param[in] da insert and replace any existing one - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Any existing l_dna is destroyed, and the input one
- *          is inserted in its place.
- *      (2) If %index is invalid, return 1 (error)
- * 
- */ -l_ok -l_dnaaReplaceDna(L_DNAA *daa, - l_int32 index, - L_DNA *da) -{ -l_int32 n; - - PROCNAME("l_dnaaReplaceDna"); - - if (!daa) - return ERROR_INT("daa not defined", procName, 1); - if (!da) - return ERROR_INT("da not defined", procName, 1); - n = l_dnaaGetCount(daa); - if (index < 0 || index >= n) - return ERROR_INT("index not valid", procName, 1); - - l_dnaDestroy(&daa->dna[index]); - daa->dna[index] = da; - return 0; -} - - -/*! - * \brief l_dnaaGetValue() - * - * \param[in] daa - * \param[in] i index of l_dna within l_dnaa - * \param[in] j index into l_dna - * \param[out] pval double value - * \return 0 if OK, 1 on error - */ -l_ok -l_dnaaGetValue(L_DNAA *daa, - l_int32 i, - l_int32 j, - l_float64 *pval) -{ -l_int32 n; -L_DNA *da; - - PROCNAME("l_dnaaGetValue"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0.0; - if (!daa) - return ERROR_INT("daa not defined", procName, 1); - n = l_dnaaGetCount(daa); - if (i < 0 || i >= n) - return ERROR_INT("invalid index into daa", procName, 1); - da = daa->dna[i]; - if (j < 0 || j >= da->n) - return ERROR_INT("invalid index into da", procName, 1); - *pval = da->array[j]; - return 0; -} - - -/*! - * \brief l_dnaaAddNumber() - * - * \param[in] daa - * \param[in] index of l_dna within l_dnaa - * \param[in] val number to be added; stored as a double - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Adds to an existing l_dna only.
- * 
- */ -l_ok -l_dnaaAddNumber(L_DNAA *daa, - l_int32 index, - l_float64 val) -{ -l_int32 n; -L_DNA *da; - - PROCNAME("l_dnaaAddNumber"); - - if (!daa) - return ERROR_INT("daa not defined", procName, 1); - n = l_dnaaGetCount(daa); - if (index < 0 || index >= n) - return ERROR_INT("invalid index in daa", procName, 1); - - da = l_dnaaGetDna(daa, index, L_CLONE); - l_dnaAddNumber(da, val); - l_dnaDestroy(&da); - return 0; -} - - -/*----------------------------------------------------------------------* - * Serialize Dna for I/O * - *----------------------------------------------------------------------*/ -/*! - * \brief l_dnaaRead() - * - * \param[in] filename - * \return daa, or NULL on error - */ -L_DNAA * -l_dnaaRead(const char *filename) -{ -FILE *fp; -L_DNAA *daa; - - PROCNAME("l_dnaaRead"); - - if (!filename) - return (L_DNAA *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (L_DNAA *)ERROR_PTR("stream not opened", procName, NULL); - daa = l_dnaaReadStream(fp); - fclose(fp); - if (!daa) - return (L_DNAA *)ERROR_PTR("daa not read", procName, NULL); - return daa; -} - - -/*! - * \brief l_dnaaReadStream() - * - * \param[in] fp file stream - * \return daa, or NULL on error - */ -L_DNAA * -l_dnaaReadStream(FILE *fp) -{ -l_int32 i, n, index, ret, version; -L_DNA *da; -L_DNAA *daa; - - PROCNAME("l_dnaaReadStream"); - - if (!fp) - return (L_DNAA *)ERROR_PTR("stream not defined", procName, NULL); - - ret = fscanf(fp, "\nL_Dnaa Version %d\n", &version); - if (ret != 1) - return (L_DNAA *)ERROR_PTR("not a l_dna file", procName, NULL); - if (version != DNA_VERSION_NUMBER) - return (L_DNAA *)ERROR_PTR("invalid l_dnaa version", procName, NULL); - if (fscanf(fp, "Number of L_Dna = %d\n\n", &n) != 1) - return (L_DNAA *)ERROR_PTR("invalid number of l_dna", procName, NULL); - - if (n > MaxPtrArraySize) { - L_ERROR("n = %d > %d\n", procName, n, MaxPtrArraySize); - return NULL; - } - if ((daa = l_dnaaCreate(n)) == NULL) - return (L_DNAA *)ERROR_PTR("daa not made", procName, NULL); - - for (i = 0; i < n; i++) { - if (fscanf(fp, "L_Dna[%d]:", &index) != 1) { - l_dnaaDestroy(&daa); - return (L_DNAA *)ERROR_PTR("invalid l_dna header", procName, NULL); - } - if ((da = l_dnaReadStream(fp)) == NULL) { - l_dnaaDestroy(&daa); - return (L_DNAA *)ERROR_PTR("da not made", procName, NULL); - } - l_dnaaAddDna(daa, da, L_INSERT); - } - - return daa; -} - - -/*! - * \brief l_dnaaWrite() - * - * \param[in] filename - * \param[in] daa - * \return 0 if OK, 1 on error - */ -l_ok -l_dnaaWrite(const char *filename, - L_DNAA *daa) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("l_dnaaWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!daa) - return ERROR_INT("daa not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "w")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = l_dnaaWriteStream(fp, daa); - fclose(fp); - if (ret) - return ERROR_INT("daa not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief l_dnaaWriteStream() - * - * \param[in] fp file stream - * \param[in] daa - * \return 0 if OK, 1 on error - */ -l_ok -l_dnaaWriteStream(FILE *fp, - L_DNAA *daa) -{ -l_int32 i, n; -L_DNA *da; - - PROCNAME("l_dnaaWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!daa) - return ERROR_INT("daa not defined", procName, 1); - - n = l_dnaaGetCount(daa); - fprintf(fp, "\nL_Dnaa Version %d\n", DNA_VERSION_NUMBER); - fprintf(fp, "Number of L_Dna = %d\n\n", n); - for (i = 0; i < n; i++) { - if ((da = l_dnaaGetDna(daa, i, L_CLONE)) == NULL) - return ERROR_INT("da not found", procName, 1); - fprintf(fp, "L_Dna[%d]:", i); - l_dnaWriteStream(fp, da); - l_dnaDestroy(&da); - } - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dnafunc1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dnafunc1.c deleted file mode 100644 index aba2528d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dnafunc1.c +++ /dev/null @@ -1,408 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file dnafunc1.c - *
- *
- *      Rearrangements
- *          l_int32     *l_dnaJoin()
- *          l_int32     *l_dnaaFlattenToDna()
- *
- *      Conversion between numa and dna
- *          NUMA        *l_dnaConvertToNuma()
- *          L_DNA       *numaConvertToDna()
- *
- *      Set operations using aset (rbtree)
- *          L_DNA       *l_dnaUnionByAset()
- *          L_DNA       *l_dnaRemoveDupsByAset()
- *          L_DNA       *l_dnaIntersectionByAset()
- *          L_ASET      *l_asetCreateFromDna()
- *
- *      Miscellaneous operations
- *          L_DNA       *l_dnaDiffAdjValues()
- *
- *
- * This file contains an implementation on sets of doubles (or integers)
- * that uses an underlying tree (rbtree).  The keys stored in the tree
- * are simply the double array values in the dna.  Use of a DnaHash
- * is typically more efficient, with O(1) in lookup and insertion.
- *
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/*----------------------------------------------------------------------* - * Rearrangements * - *----------------------------------------------------------------------*/ -/*! - * \brief l_dnaJoin() - * - * \param[in] dad dest dna; add to this one - * \param[in] das [optional] source dna; add from this one - * \param[in] istart starting index in das - * \param[in] iend ending index in das; use -1 to cat all - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) istart < 0 is taken to mean 'read from the start' (istart = 0)
- *      (2) iend < 0 means 'read to the end'
- *      (3) if das == NULL, this is a no-op
- * 
- */ -l_ok -l_dnaJoin(L_DNA *dad, - L_DNA *das, - l_int32 istart, - l_int32 iend) -{ -l_int32 n, i; -l_float64 val; - - PROCNAME("l_dnaJoin"); - - if (!dad) - return ERROR_INT("dad not defined", procName, 1); - if (!das) - return 0; - - if (istart < 0) - istart = 0; - n = l_dnaGetCount(das); - if (iend < 0 || iend >= n) - iend = n - 1; - if (istart > iend) - return ERROR_INT("istart > iend; nothing to add", procName, 1); - - for (i = istart; i <= iend; i++) { - l_dnaGetDValue(das, i, &val); - l_dnaAddNumber(dad, val); - } - - return 0; -} - - -/*! - * \brief l_dnaaFlattenToDna() - * - * \param[in] daa - * \return dad, or NULL on error - * - *
- * Notes:
- *      (1) This 'flattens' the dnaa to a dna, by joining successively
- *          each dna in the dnaa.
- *      (2) It leaves the input dnaa unchanged.
- * 
- */ -L_DNA * -l_dnaaFlattenToDna(L_DNAA *daa) -{ -l_int32 i, nalloc; -L_DNA *da, *dad; -L_DNA **array; - - PROCNAME("l_dnaaFlattenToDna"); - - if (!daa) - return (L_DNA *)ERROR_PTR("daa not defined", procName, NULL); - - nalloc = daa->nalloc; - array = daa->dna; - dad = l_dnaCreate(0); - for (i = 0; i < nalloc; i++) { - da = array[i]; - if (!da) continue; - l_dnaJoin(dad, da, 0, -1); - } - - return dad; -} - - -/*----------------------------------------------------------------------* - * Conversion between numa and dna * - *----------------------------------------------------------------------*/ -/*! - * \brief l_dnaConvertToNuma() - * - * \param[in] da - * \return na, or NULL on error - */ -NUMA * -l_dnaConvertToNuma(L_DNA *da) -{ -l_int32 i, n; -l_float64 val; -NUMA *na; - - PROCNAME("l_dnaConvertToNuma"); - - if (!da) - return (NUMA *)ERROR_PTR("da not defined", procName, NULL); - - n = l_dnaGetCount(da); - na = numaCreate(n); - for (i = 0; i < n; i++) { - l_dnaGetDValue(da, i, &val); - numaAddNumber(na, val); - } - return na; -} - - -/*! - * \brief numaConvertToDna - * - * \param[in] na - * \return da, or NULL on error - */ -L_DNA * -numaConvertToDna(NUMA *na) -{ -l_int32 i, n; -l_float32 val; -L_DNA *da; - - PROCNAME("numaConvertToDna"); - - if (!na) - return (L_DNA *)ERROR_PTR("na not defined", procName, NULL); - - n = numaGetCount(na); - da = l_dnaCreate(n); - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &val); - l_dnaAddNumber(da, val); - } - return da; -} - - -/*----------------------------------------------------------------------* - * Set operations using aset (rbtree) * - *----------------------------------------------------------------------*/ -/*! - * \brief l_dnaUnionByAset() - * - * \param[in] da1, da2 - * \return dad with the union of the set of numbers, or NULL on error - * - *
- * Notes:
- *      (1) See sarrayUnionByAset() for the approach.
- *      (2) Here, the key in building the sorted tree is the number itself.
- *      (3) Operations using an underlying tree are O(nlogn), which is
- *          typically less efficient than hashing, which is O(n).
- * 
- */ -L_DNA * -l_dnaUnionByAset(L_DNA *da1, - L_DNA *da2) -{ -L_DNA *da3, *dad; - - PROCNAME("l_dnaUnionByAset"); - - if (!da1) - return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL); - if (!da2) - return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL); - - /* Join */ - da3 = l_dnaCopy(da1); - l_dnaJoin(da3, da2, 0, -1); - - /* Eliminate duplicates */ - dad = l_dnaRemoveDupsByAset(da3); - l_dnaDestroy(&da3); - return dad; -} - - -/*! - * \brief l_dnaRemoveDupsByAset() - * - * \param[in] das - * \return dad with duplicates removed, or NULL on error - */ -L_DNA * -l_dnaRemoveDupsByAset(L_DNA *das) -{ -l_int32 i, n; -l_float64 val; -L_DNA *dad; -L_ASET *set; -RB_TYPE key; - - PROCNAME("l_dnaRemoveDupsByAset"); - - if (!das) - return (L_DNA *)ERROR_PTR("das not defined", procName, NULL); - - set = l_asetCreate(L_FLOAT_TYPE); - dad = l_dnaCreate(0); - n = l_dnaGetCount(das); - for (i = 0; i < n; i++) { - l_dnaGetDValue(das, i, &val); - key.ftype = val; - if (!l_asetFind(set, key)) { - l_dnaAddNumber(dad, val); - l_asetInsert(set, key); - } - } - - l_asetDestroy(&set); - return dad; -} - - -/*! - * \brief l_dnaIntersectionByAset() - * - * \param[in] da1, da2 - * \return dad with the intersection of the two arrays, or NULL on error - * - *
- * Notes:
- *      (1) See sarrayIntersection() for the approach.
- *      (2) Here, the key in building the sorted tree is the number itself.
- *      (3) Operations using an underlying tree are O(nlogn), which is
- *          typically less efficient than hashing, which is O(n).
- * 
- */ -L_DNA * -l_dnaIntersectionByAset(L_DNA *da1, - L_DNA *da2) -{ -l_int32 n1, n2, i, n; -l_float64 val; -L_ASET *set1, *set2; -RB_TYPE key; -L_DNA *da_small, *da_big, *dad; - - PROCNAME("l_dnaIntersectionByAset"); - - if (!da1) - return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL); - if (!da2) - return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL); - - /* Put the elements of the largest array into a set */ - n1 = l_dnaGetCount(da1); - n2 = l_dnaGetCount(da2); - da_small = (n1 < n2) ? da1 : da2; /* do not destroy da_small */ - da_big = (n1 < n2) ? da2 : da1; /* do not destroy da_big */ - set1 = l_asetCreateFromDna(da_big); - - /* Build up the intersection of floats */ - dad = l_dnaCreate(0); - n = l_dnaGetCount(da_small); - set2 = l_asetCreate(L_FLOAT_TYPE); - for (i = 0; i < n; i++) { - l_dnaGetDValue(da_small, i, &val); - key.ftype = val; - if (l_asetFind(set1, key) && !l_asetFind(set2, key)) { - l_dnaAddNumber(dad, val); - l_asetInsert(set2, key); - } - } - - l_asetDestroy(&set1); - l_asetDestroy(&set2); - return dad; -} - - -/*! - * \brief l_asetCreateFromDna() - * - * \param[in] da source dna - * \return set using the doubles in %da as keys - */ -L_ASET * -l_asetCreateFromDna(L_DNA *da) -{ -l_int32 i, n; -l_float64 val; -L_ASET *set; -RB_TYPE key; - - PROCNAME("l_asetCreateFromDna"); - - if (!da) - return (L_ASET *)ERROR_PTR("da not defined", procName, NULL); - - set = l_asetCreate(L_FLOAT_TYPE); - n = l_dnaGetCount(da); - for (i = 0; i < n; i++) { - l_dnaGetDValue(da, i, &val); - key.ftype = val; - l_asetInsert(set, key); - } - - return set; -} - - -/*----------------------------------------------------------------------* - * Miscellaneous operations * - *----------------------------------------------------------------------*/ -/*! - * \brief l_dnaDiffAdjValues() - * - * \param[in] das input l_dna - * \return dad of difference values val[i+1] - val[i], - * or NULL on error - */ -L_DNA * -l_dnaDiffAdjValues(L_DNA *das) -{ -l_int32 i, n, prev, cur; -L_DNA *dad; - - PROCNAME("l_dnaDiffAdjValues"); - - if (!das) - return (L_DNA *)ERROR_PTR("das not defined", procName, NULL); - n = l_dnaGetCount(das); - dad = l_dnaCreate(n - 1); - prev = 0; - for (i = 1; i < n; i++) { - l_dnaGetIValue(das, i, &cur); - l_dnaAddNumber(dad, cur - prev); - prev = cur; - } - return dad; -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dnahash.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dnahash.c deleted file mode 100644 index a1c5a452..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dnahash.c +++ /dev/null @@ -1,593 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file dnahash.c - *
- *
- *      DnaHash creation, destruction
- *          L_DNAHASH   *l_dnaHashCreate()
- *          void         l_dnaHashDestroy()
- *
- *      DnaHash: Accessors and modifiers                      *
- *          l_int32      l_dnaHashGetCount()
- *          l_int32      l_dnaHashGetTotalCount()
- *          L_DNA       *l_dnaHashGetDna()
- *          void         l_dnaHashAdd()
- *
- *      DnaHash: Operations on Dna
- *          L_DNAHASH   *l_dnaHashCreateFromDna()
- *          l_int32      l_dnaRemoveDupsByHash()
- *          l_int32      l_dnaMakeHistoByHash()
- *          L_DNA       *l_dnaIntersectionByHash()
- *          l_int32      l_dnaFindValByHash()
- *
- *    (1) The DnaHash is an array of Dna.  It is useful for fast
- *        storage and lookup for sets and maps.  If the set or map
- *        is on a Dna itself, the hash is a simple function that
- *        maps a double to a l_uint64; otherwise the function will
- *        map a string or a (x,y) point to a l_uint64.  The result of
- *        the map is the "key", which is then used with the mod
- *        function to select which Dna array is to be used.  The
- *        number of arrays in a DnaHash should be a prime number.
- *        If there are N items, we set up the DnaHash array to have
- *        approximately N/20 Dna, so the average size of these arrays
- *        will be about 20 when fully populated.  The number 20 was
- *        found empirically to be in a broad maximum of efficiency.
- *    (2) Note that the word "hash" is overloaded.  There are actually
- *        two hashing steps: the first hashes the object to a l_uint64,
- *        called the "key", and the second uses the mod function to
- *        "hash" the "key" to the index of a particular Dna in the
- *        DnaHash array.
- *    (3) Insertion and lookup time for DnaHash is O(1).  Hash collisions
- *        are easily handled (we expect an average of 20 for each key),
- *        so we can use simple (fast) hash functions: we deal with
- *        collisions by storing an array for each hash key.
- *        This can be contrasted with using rbtree for sets and
- *        maps, where insertion and lookup are O(logN) and hash functions
- *        are slower because they must be good enough (i.e, random
- *        enough with arbitrary input) to avoid collisions.
- *    (4) Hash functions that map points, strings and floats to l_uint64
- *        are given in utils.c.
- *    (5) The use of the DnaHash (and RBTree) with strings and
- *        (x,y) points can be found in string2.c and ptafunc2.c, rsp.
- *        This file has similar hash set functions, using DnaHash on
- *        two input Dna, for removing duplicates and finding the
- *        intersection.  It also uses DnaHash as a hash map to find
- *        a histogram of counts from an input Dna.
- *    (6) Comparisons in running time, between DnaHash and RBTree, for
- *        large sets of strings and points, are given in prog/hashtest.c.
- *    (7) This is a very simple implementation, that expects that you
- *        know approximately (i.e., within a factor of 2 or 3) how many
- *        items are to be stored when you initialize the DnaHash.
- *        (It would be nice to modify the l_dnaHashAdd() function
- *        to increase the number of bins when the average occupation
- *        exceeds 40 or so.)
- *    (8) Useful rule of thumb for hashing collisions:
- *        For a random hashing function (say, from strings to l_uint64),
- *        the probability of a collision increases as N^2 for N much
- *        less than 2^32.  The quadratic behavior switches over to
- *        approaching 1.0 around 2^32, which is the square root of 2^64.
- *        So, for example, if you have 10^7 strings, the probability
- *        of a single collision using an l_uint64 key is on the order of
- *            (10^7/10^9)^2 ~ 10^-4.
- *        For a million strings you don't need to worry about collisons
- *        (~10-6 probability), and for most applications can use the
- *        RBTree (sorting) implementation with confidence.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/*--------------------------------------------------------------------------* - * Dna hash: Creation and destruction * - *--------------------------------------------------------------------------*/ -/*! - * \brief l_dnaHashCreate() - * - * \param[in] nbuckets the number of buckets in the hash table, - * which should be prime. - * \param[in] initsize initial size of each allocated dna; 0 for default - * \return ptr to new dnahash, or NULL on error - * - *
- * Notes:
- *      (1) Actual dna are created only as required by l_dnaHashAdd()
- * 
- */ -L_DNAHASH * -l_dnaHashCreate(l_int32 nbuckets, - l_int32 initsize) -{ -L_DNAHASH *dahash; - - PROCNAME("l_dnaHashCreate"); - - if (nbuckets <= 0) - return (L_DNAHASH *)ERROR_PTR("negative hash size", procName, NULL); - dahash = (L_DNAHASH *)LEPT_CALLOC(1, sizeof(L_DNAHASH)); - if ((dahash->dna = (L_DNA **)LEPT_CALLOC(nbuckets, sizeof(L_DNA *))) - == NULL) { - LEPT_FREE(dahash); - return (L_DNAHASH *)ERROR_PTR("dna ptr array not made", procName, NULL); - } - - dahash->nbuckets = nbuckets; - dahash->initsize = initsize; - return dahash; -} - - -/*! - * \brief l_dnaHashDestroy() - * - * \param[in,out] pdahash will be set to null before returning - * \return void - */ -void -l_dnaHashDestroy(L_DNAHASH **pdahash) -{ -L_DNAHASH *dahash; -l_int32 i; - - PROCNAME("l_dnaHashDestroy"); - - if (pdahash == NULL) { - L_WARNING("ptr address is NULL!\n", procName); - return; - } - - if ((dahash = *pdahash) == NULL) - return; - - for (i = 0; i < dahash->nbuckets; i++) - l_dnaDestroy(&dahash->dna[i]); - LEPT_FREE(dahash->dna); - LEPT_FREE(dahash); - *pdahash = NULL; -} - - -/*--------------------------------------------------------------------------* - * Dna hash: Accessors and modifiers * - *--------------------------------------------------------------------------*/ -/*! - * \brief l_dnaHashGetCount() - * - * \param[in] dahash - * \return nbuckets allocated, or 0 on error - */ -l_int32 -l_dnaHashGetCount(L_DNAHASH *dahash) -{ - - PROCNAME("l_dnaHashGetCount"); - - if (!dahash) - return ERROR_INT("dahash not defined", procName, 0); - return dahash->nbuckets; -} - - -/*! - * \brief l_dnaHashGetTotalCount() - * - * \param[in] dahash - * \return n number of numbers in all dna, or 0 on error - */ -l_int32 -l_dnaHashGetTotalCount(L_DNAHASH *dahash) -{ -l_int32 i, n; -L_DNA *da; - - PROCNAME("l_dnaHashGetTotalCount"); - - if (!dahash) - return ERROR_INT("dahash not defined", procName, 0); - - for (i = 0, n = 0; i < dahash->nbuckets; i++) { - da = l_dnaHashGetDna(dahash, i, L_NOCOPY); - if (da) - n += l_dnaGetCount(da); - } - - return n; -} - - -/*! - * \brief l_dnaHashGetDna() - * - * \param[in] dahash - * \param[in] key key to be hashed into a bucket number - * \param[in] copyflag L_NOCOPY, L_COPY, L_CLONE - * \return ptr to dna - */ -L_DNA * -l_dnaHashGetDna(L_DNAHASH *dahash, - l_uint64 key, - l_int32 copyflag) -{ -l_int32 bucket; -L_DNA *da; - - PROCNAME("l_dnaHashGetDna"); - - if (!dahash) - return (L_DNA *)ERROR_PTR("dahash not defined", procName, NULL); - bucket = key % dahash->nbuckets; - da = dahash->dna[bucket]; - if (da) { - if (copyflag == L_NOCOPY) - return da; - else if (copyflag == L_COPY) - return l_dnaCopy(da); - else - return l_dnaClone(da); - } - else - return NULL; -} - - -/*! - * \brief l_dnaHashAdd() - * - * \param[in] dahash - * \param[in] key key to be hashed into a bucket number - * \param[in] value float value to be appended to the specific dna - * \return 0 if OK; 1 on error - */ -l_ok -l_dnaHashAdd(L_DNAHASH *dahash, - l_uint64 key, - l_float64 value) -{ -l_int32 bucket; -L_DNA *da; - - PROCNAME("l_dnaHashAdd"); - - if (!dahash) - return ERROR_INT("dahash not defined", procName, 1); - bucket = key % dahash->nbuckets; - da = dahash->dna[bucket]; - if (!da) { - if ((da = l_dnaCreate(dahash->initsize)) == NULL) - return ERROR_INT("da not made", procName, 1); - dahash->dna[bucket] = da; - } - l_dnaAddNumber(da, value); - return 0; -} - - -/*--------------------------------------------------------------------------* - * DnaHash: Operations on Dna * - *--------------------------------------------------------------------------*/ -/*! - * \brief l_dnaHashCreateFromDna() - * - * \param[in] da - * \return dahash if OK; 1 on error - * - *
- * Notes:
- *      (1) The values stored in the %dahash are indices into %da;
- *          %dahash has no use without %da.
- * 
- */ -L_DNAHASH * -l_dnaHashCreateFromDna(L_DNA *da) -{ -l_int32 i, n; -l_uint32 nsize; -l_uint64 key; -l_float64 val; -L_DNAHASH *dahash; - - PROCNAME("l_dnaHashCreateFromDna"); - - if (!da) - return (L_DNAHASH *)ERROR_PTR("da not defined", procName, NULL); - - n = l_dnaGetCount(da); - findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */ - - dahash = l_dnaHashCreate(nsize, 8); - for (i = 0; i < n; i++) { - l_dnaGetDValue(da, i, &val); - l_hashFloat64ToUint64(nsize, val, &key); - l_dnaHashAdd(dahash, key, (l_float64)i); - } - - return dahash; -} - - -/*! - * \brief l_dnaRemoveDupsByHash() - * - * \param[in] das - * \param[out] pdad hash set - * \param[out] pdahash [optional] dnahash used for lookup - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Generates a dna with unique values.
- *      (2) The dnahash is built up with dad to assure uniqueness.
- *          It can be used to find if an element is in the set:
- *              l_dnaFindValByHash(dad, dahash, val, &index)
- * 
- */ -l_ok -l_dnaRemoveDupsByHash(L_DNA *das, - L_DNA **pdad, - L_DNAHASH **pdahash) -{ -l_int32 i, n, index, items; -l_uint32 nsize; -l_uint64 key; -l_float64 val; -L_DNA *dad; -L_DNAHASH *dahash; - - PROCNAME("l_dnaRemoveDupsByHash"); - - if (pdahash) *pdahash = NULL; - if (!pdad) - return ERROR_INT("&dad not defined", procName, 1); - *pdad = NULL; - if (!das) - return ERROR_INT("das not defined", procName, 1); - - n = l_dnaGetCount(das); - findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */ - dahash = l_dnaHashCreate(nsize, 8); - dad = l_dnaCreate(n); - *pdad = dad; - for (i = 0, items = 0; i < n; i++) { - l_dnaGetDValue(das, i, &val); - l_dnaFindValByHash(dad, dahash, val, &index); - if (index < 0) { /* not found */ - l_hashFloat64ToUint64(nsize, val, &key); - l_dnaHashAdd(dahash, key, (l_float64)items); - l_dnaAddNumber(dad, val); - items++; - } - } - - if (pdahash) - *pdahash = dahash; - else - l_dnaHashDestroy(&dahash); - return 0; -} - - -/*! - * \brief l_dnaMakeHistoByHash() - * - * \param[in] das - * \param[out] pdahash hash map: val --> index - * \param[out] pdav array of values: index --> val - * \param[out] pdac histo array of counts: index --> count - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Generates and returns a dna of occurrences (histogram),
- *          an aligned dna of values, and an associated hashmap.
- *          The hashmap takes %dav and a value, and points into the
- *          histogram in %dac.
- *      (2) The dna of values, %dav, is aligned with the histogram %dac,
- *          and is needed for fast lookup.  It is a hash set, because
- *          the values are unique.
- *      (3) Lookup is simple:
- *              l_dnaFindValByHash(dav, dahash, val, &index);
- *              if (index >= 0)
- *                  l_dnaGetIValue(dac, index, &icount);
- *              else
- *                  icount = 0;
- * 
- */ -l_ok -l_dnaMakeHistoByHash(L_DNA *das, - L_DNAHASH **pdahash, - L_DNA **pdav, - L_DNA **pdac) -{ -l_int32 i, n, nitems, index, count; -l_uint32 nsize; -l_uint64 key; -l_float64 val; -L_DNA *dac, *dav; -L_DNAHASH *dahash; - - PROCNAME("l_dnaMakeHistoByHash"); - - if (pdahash) *pdahash = NULL; - if (pdac) *pdac = NULL; - if (pdav) *pdav = NULL; - if (!pdahash || !pdac || !pdav) - return ERROR_INT("&dahash, &dac, &dav not all defined", procName, 1); - if (!das) - return ERROR_INT("das not defined", procName, 1); - if ((n = l_dnaGetCount(das)) == 0) - return ERROR_INT("no data in das", procName, 1); - - findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */ - dahash = l_dnaHashCreate(nsize, 8); - dac = l_dnaCreate(n); /* histogram */ - dav = l_dnaCreate(n); /* the values */ - for (i = 0, nitems = 0; i < n; i++) { - l_dnaGetDValue(das, i, &val); - /* Is this value already stored in dav? */ - l_dnaFindValByHash(dav, dahash, val, &index); - if (index >= 0) { /* found */ - l_dnaGetIValue(dac, (l_float64)index, &count); - l_dnaSetValue(dac, (l_float64)index, count + 1); - } else { /* not found */ - l_hashFloat64ToUint64(nsize, val, &key); - l_dnaHashAdd(dahash, key, (l_float64)nitems); - l_dnaAddNumber(dav, val); - l_dnaAddNumber(dac, 1); - nitems++; - } - } - - *pdahash = dahash; - *pdac = dac; - *pdav = dav; - return 0; -} - - -/*! - * \brief l_dnaIntersectionByHash() - * - * \param[in] da1, da2 - * \return dad intersection of the number arrays, or NULL on error - * - *
- * Notes:
- *      (1) This uses the same method for building the intersection set
- *          as ptaIntersectionByHash() and sarrayIntersectionByHash().
- * 
- */ -L_DNA * -l_dnaIntersectionByHash(L_DNA *da1, - L_DNA *da2) -{ -l_int32 n1, n2, nsmall, nbuckets, i, index1, index2; -l_uint32 nsize2; -l_uint64 key; -l_float64 val; -L_DNAHASH *dahash1, *dahash2; -L_DNA *da_small, *da_big, *dad; - - PROCNAME("l_dnaIntersectionByHash"); - - if (!da1) - return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL); - if (!da2) - return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL); - - /* Put the elements of the biggest array into a dnahash */ - n1 = l_dnaGetCount(da1); - n2 = l_dnaGetCount(da2); - da_small = (n1 < n2) ? da1 : da2; /* do not destroy da_small */ - da_big = (n1 < n2) ? da2 : da1; /* do not destroy da_big */ - dahash1 = l_dnaHashCreateFromDna(da_big); - - /* Build up the intersection of numbers. Add to %dad - * if the number is in da_big (using dahash1) but hasn't - * yet been seen in the traversal of da_small (using dahash2). */ - dad = l_dnaCreate(0); - nsmall = l_dnaGetCount(da_small); - findNextLargerPrime(nsmall / 20, &nsize2); /* buckets in hash table */ - dahash2 = l_dnaHashCreate(nsize2, 0); - nbuckets = l_dnaHashGetCount(dahash2); - for (i = 0; i < nsmall; i++) { - l_dnaGetDValue(da_small, i, &val); - l_dnaFindValByHash(da_big, dahash1, val, &index1); - if (index1 >= 0) { /* found */ - l_dnaFindValByHash(da_small, dahash2, val, &index2); - if (index2 == -1) { /* not found */ - l_dnaAddNumber(dad, val); - l_hashFloat64ToUint64(nbuckets, val, &key); - l_dnaHashAdd(dahash2, key, (l_float64)i); - } - } - } - - l_dnaHashDestroy(&dahash1); - l_dnaHashDestroy(&dahash2); - return dad; -} - - -/*! - * \brief l_dnaFindValByHash() - * - * \param[in] da - * \param[in] dahash containing indices into %da - * \param[in] val searching for this number in %da - * \param[out] pindex index into da if found; -1 otherwise - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Algo: hash %val into a key; hash the key to get the dna
- *                in %dahash (that holds indices into %da); traverse
- *                the dna of indices looking for %val in %da.
- * 
- */ -l_ok -l_dnaFindValByHash(L_DNA *da, - L_DNAHASH *dahash, - l_float64 val, - l_int32 *pindex) -{ -l_int32 i, nbuckets, nvals, indexval; -l_float64 vali; -l_uint64 key; -L_DNA *da1; - - PROCNAME("l_dnaFindValByHash"); - - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - *pindex = -1; - if (!da) - return ERROR_INT("da not defined", procName, 1); - if (!dahash) - return ERROR_INT("dahash not defined", procName, 1); - - nbuckets = l_dnaHashGetCount(dahash); - l_hashFloat64ToUint64(nbuckets, val, &key); - da1 = l_dnaHashGetDna(dahash, key, L_NOCOPY); - if (!da1) return 0; - - /* Run through da1, looking for this %val */ - nvals = l_dnaGetCount(da1); - for (i = 0; i < nvals; i++) { - l_dnaGetIValue(da1, i, &indexval); - l_dnaGetDValue(da, indexval, &vali); - if (val == vali) { - *pindex = indexval; - return 0; - } - } - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dwacomb.2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dwacomb.2.c deleted file mode 100644 index 4f48897d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dwacomb.2.c +++ /dev/null @@ -1,299 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * Top-level fast binary morphology with auto-generated sels - * - * PIX *pixMorphDwa_2() - * PIX *pixFMorphopGen_2() - */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -PIX *pixMorphDwa_2(PIX *pixd, PIX *pixs, l_int32 operation, char *selname); -PIX *pixFMorphopGen_2(PIX *pixd, PIX *pixs, l_int32 operation, char *selname); -l_int32 fmorphopgen_low_2(l_uint32 *datad, l_int32 w, - l_int32 h, l_int32 wpld, - l_uint32 *datas, l_int32 wpls, - l_int32 index); - -static l_int32 NUM_SELS_GENERATED = 76; -static char SEL_NAMES[][80] = { - "sel_comb_4h", - "sel_comb_4v", - "sel_comb_5h", - "sel_comb_5v", - "sel_comb_6h", - "sel_comb_6v", - "sel_comb_7h", - "sel_comb_7v", - "sel_comb_8h", - "sel_comb_8v", - "sel_comb_9h", - "sel_comb_9v", - "sel_comb_10h", - "sel_comb_10v", - "sel_comb_12h", - "sel_comb_12v", - "sel_comb_14h", - "sel_comb_14v", - "sel_comb_15h", - "sel_comb_15v", - "sel_comb_16h", - "sel_comb_16v", - "sel_comb_18h", - "sel_comb_18v", - "sel_comb_20h", - "sel_comb_20v", - "sel_comb_21h", - "sel_comb_21v", - "sel_comb_22h", - "sel_comb_22v", - "sel_comb_24h", - "sel_comb_24v", - "sel_comb_25h", - "sel_comb_25v", - "sel_comb_27h", - "sel_comb_27v", - "sel_comb_28h", - "sel_comb_28v", - "sel_comb_30h", - "sel_comb_30v", - "sel_comb_32h", - "sel_comb_32v", - "sel_comb_33h", - "sel_comb_33v", - "sel_comb_35h", - "sel_comb_35v", - "sel_comb_36h", - "sel_comb_36v", - "sel_comb_39h", - "sel_comb_39v", - "sel_comb_40h", - "sel_comb_40v", - "sel_comb_42h", - "sel_comb_42v", - "sel_comb_44h", - "sel_comb_44v", - "sel_comb_45h", - "sel_comb_45v", - "sel_comb_48h", - "sel_comb_48v", - "sel_comb_49h", - "sel_comb_49v", - "sel_comb_50h", - "sel_comb_50v", - "sel_comb_52h", - "sel_comb_52v", - "sel_comb_54h", - "sel_comb_54v", - "sel_comb_55h", - "sel_comb_55v", - "sel_comb_56h", - "sel_comb_56v", - "sel_comb_60h", - "sel_comb_60v", - "sel_comb_63h", - "sel_comb_63v"}; - -/*! - * \brief pixMorphDwa_2() - * - * \param[in] pixd usual 3 choices: null, == pixs, != pixs - * \param[in] pixs 1 bpp - * \param[in] operation L_MORPH_DILATE, L_MORPH_ERODE, - * L_MORPH_OPEN, L_MORPH_CLOSE - * \param[in] sel name - * \return pixd - * - *
- * Notes:
- *      (1) This simply adds a border, calls the appropriate
- *          pixFMorphopGen_*(), and removes the border.
- *          See the notes for that function.
- *      (2) The size of the border depends on the operation
- *          and the boundary conditions.
- * 
- */ -PIX * -pixMorphDwa_2(PIX *pixd, - PIX *pixs, - l_int32 operation, - char *selname) -{ -l_int32 bordercolor, bordersize; -PIX *pixt1, *pixt2, *pixt3; - - PROCNAME("pixMorphDwa_2"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs must be 1 bpp", procName, pixd); - - /* Set the border size */ - bordercolor = getMorphBorderPixelColor(L_MORPH_ERODE, 1); - bordersize = 32; - if (bordercolor == 0 && operation == L_MORPH_CLOSE) - bordersize += 32; - - pixt1 = pixAddBorder(pixs, bordersize, 0); - pixt2 = pixFMorphopGen_2(NULL, pixt1, operation, selname); - pixt3 = pixRemoveBorder(pixt2, bordersize); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - - if (!pixd) - return pixt3; - - pixCopy(pixd, pixt3); - pixDestroy(&pixt3); - return pixd; -} - - -/*! - * \brief pixFMorphopGen_2() - * - * \param[in] pixd usual 3 choices: null, == pixs, != pixs - * \param[in] pixs 1 bpp - * \param[in] operation L_MORPH_DILATE, L_MORPH_ERODE, - * L_MORPH_OPEN, L_MORPH_CLOSE - * \param[in] sel name - * \return pixd - * - *
- * Notes:
- *      (1) This is a dwa operation, and the Sels must be limited in
- *          size to not more than 31 pixels about the origin.
- *      (2) A border of appropriate size (32 pixels, or 64 pixels
- *          for safe closing with asymmetric b.c.) must be added before
- *          this function is called.
- *      (3) This handles all required setting of the border pixels
- *          before erosion and dilation.
- *      (4) The closing operation is safe; no pixels can be removed
- *          near the boundary.
- * 
- */ -PIX * -pixFMorphopGen_2(PIX *pixd, - PIX *pixs, - l_int32 operation, - char *selname) -{ -l_int32 i, index, found, w, h, wpls, wpld, bordercolor, erodeop, borderop; -l_uint32 *datad, *datas, *datat; -PIX *pixt; - - PROCNAME("pixFMorphopGen_2"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs must be 1 bpp", procName, pixd); - - /* Get boundary colors to use */ - bordercolor = getMorphBorderPixelColor(L_MORPH_ERODE, 1); - if (bordercolor == 1) - erodeop = PIX_SET; - else - erodeop = PIX_CLR; - - found = FALSE; - for (i = 0; i < NUM_SELS_GENERATED; i++) { - if (strcmp(selname, SEL_NAMES[i]) == 0) { - found = TRUE; - index = 2 * i; - break; - } - } - if (found == FALSE) - return (PIX *)ERROR_PTR("sel index not found", procName, pixd); - - if (!pixd) { - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - else /* for in-place or pre-allocated */ - pixResizeImageData(pixd, pixs); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - - /* The images must be surrounded, in advance, with a border of - * size 32 pixels (or 64, for closing), that we'll read from. - * Fabricate a "proper" image as the subimage within the 32 - * pixel border, having the following parameters: */ - w = pixGetWidth(pixs) - 64; - h = pixGetHeight(pixs) - 64; - datas = pixGetData(pixs) + 32 * wpls + 1; - datad = pixGetData(pixd) + 32 * wpld + 1; - - if (operation == L_MORPH_DILATE || operation == L_MORPH_ERODE) { - borderop = PIX_CLR; - if (operation == L_MORPH_ERODE) { - borderop = erodeop; - index++; - } - if (pixd == pixs) { /* in-place; generate a temp image */ - if ((pixt = pixCopy(NULL, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, pixd); - datat = pixGetData(pixt) + 32 * wpls + 1; - pixSetOrClearBorder(pixt, 32, 32, 32, 32, borderop); - fmorphopgen_low_2(datad, w, h, wpld, datat, wpls, index); - pixDestroy(&pixt); - } - else { /* not in-place */ - pixSetOrClearBorder(pixs, 32, 32, 32, 32, borderop); - fmorphopgen_low_2(datad, w, h, wpld, datas, wpls, index); - } - } - else { /* opening or closing; generate a temp image */ - if ((pixt = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, pixd); - datat = pixGetData(pixt) + 32 * wpls + 1; - if (operation == L_MORPH_OPEN) { - pixSetOrClearBorder(pixs, 32, 32, 32, 32, erodeop); - fmorphopgen_low_2(datat, w, h, wpls, datas, wpls, index+1); - pixSetOrClearBorder(pixt, 32, 32, 32, 32, PIX_CLR); - fmorphopgen_low_2(datad, w, h, wpld, datat, wpls, index); - } - else { /* closing */ - pixSetOrClearBorder(pixs, 32, 32, 32, 32, PIX_CLR); - fmorphopgen_low_2(datat, w, h, wpls, datas, wpls, index); - pixSetOrClearBorder(pixt, 32, 32, 32, 32, erodeop); - fmorphopgen_low_2(datad, w, h, wpld, datat, wpls, index+1); - } - pixDestroy(&pixt); - } - - return pixd; -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dwacomblow.2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dwacomblow.2.c deleted file mode 100644 index e47684ff..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/dwacomblow.2.c +++ /dev/null @@ -1,4970 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * Low-level fast binary morphology with auto-generated sels - * - * Dispatcher: - * l_int32 fmorphopgen_low_2() - * - * Static Low-level: - * void fdilate_2_*() - * void ferode_2_*() - */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -static void fdilate_2_0(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_0(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_1(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_1(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_2(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_2(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_3(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_3(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_4(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_4(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_5(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_5(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_6(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_6(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_7(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_7(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_8(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_8(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_9(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_9(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_10(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_10(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_11(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_11(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_12(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_12(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_13(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_13(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_14(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_14(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_15(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_15(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_16(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_16(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_17(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_17(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_18(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_18(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_19(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_19(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_20(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_20(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_21(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_21(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_22(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_22(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_23(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_23(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_24(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_24(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_25(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_25(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_26(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_26(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_27(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_27(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_28(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_28(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_29(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_29(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_30(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_30(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_31(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_31(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_32(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_32(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_33(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_33(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_34(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_34(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_35(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_35(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_36(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_36(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_37(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_37(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_38(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_38(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_39(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_39(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_40(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_40(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_41(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_41(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_42(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_42(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_43(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_43(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_44(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_44(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_45(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_45(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_46(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_46(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_47(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_47(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_48(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_48(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_49(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_49(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_50(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_50(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_51(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_51(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_52(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_52(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_53(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_53(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_54(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_54(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_55(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_55(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_56(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_56(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_57(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_57(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_58(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_58(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_59(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_59(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_60(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_60(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_61(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_61(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_62(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_62(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_63(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_63(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_64(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_64(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_65(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_65(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_66(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_66(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_67(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_67(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_68(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_68(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_69(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_69(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_70(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_70(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_71(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_71(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_72(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_72(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_73(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_73(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_74(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_74(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_2_75(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_2_75(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); - - -/*---------------------------------------------------------------------* - * Fast morph dispatcher * - *---------------------------------------------------------------------*/ -/*! - * fmorphopgen_low_2() - * - * a dispatcher to appropriate low-level code - */ -l_int32 -fmorphopgen_low_2(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_int32 index) -{ - - switch (index) - { - case 0: - fdilate_2_0(datad, w, h, wpld, datas, wpls); - break; - case 1: - ferode_2_0(datad, w, h, wpld, datas, wpls); - break; - case 2: - fdilate_2_1(datad, w, h, wpld, datas, wpls); - break; - case 3: - ferode_2_1(datad, w, h, wpld, datas, wpls); - break; - case 4: - fdilate_2_2(datad, w, h, wpld, datas, wpls); - break; - case 5: - ferode_2_2(datad, w, h, wpld, datas, wpls); - break; - case 6: - fdilate_2_3(datad, w, h, wpld, datas, wpls); - break; - case 7: - ferode_2_3(datad, w, h, wpld, datas, wpls); - break; - case 8: - fdilate_2_4(datad, w, h, wpld, datas, wpls); - break; - case 9: - ferode_2_4(datad, w, h, wpld, datas, wpls); - break; - case 10: - fdilate_2_5(datad, w, h, wpld, datas, wpls); - break; - case 11: - ferode_2_5(datad, w, h, wpld, datas, wpls); - break; - case 12: - fdilate_2_6(datad, w, h, wpld, datas, wpls); - break; - case 13: - ferode_2_6(datad, w, h, wpld, datas, wpls); - break; - case 14: - fdilate_2_7(datad, w, h, wpld, datas, wpls); - break; - case 15: - ferode_2_7(datad, w, h, wpld, datas, wpls); - break; - case 16: - fdilate_2_8(datad, w, h, wpld, datas, wpls); - break; - case 17: - ferode_2_8(datad, w, h, wpld, datas, wpls); - break; - case 18: - fdilate_2_9(datad, w, h, wpld, datas, wpls); - break; - case 19: - ferode_2_9(datad, w, h, wpld, datas, wpls); - break; - case 20: - fdilate_2_10(datad, w, h, wpld, datas, wpls); - break; - case 21: - ferode_2_10(datad, w, h, wpld, datas, wpls); - break; - case 22: - fdilate_2_11(datad, w, h, wpld, datas, wpls); - break; - case 23: - ferode_2_11(datad, w, h, wpld, datas, wpls); - break; - case 24: - fdilate_2_12(datad, w, h, wpld, datas, wpls); - break; - case 25: - ferode_2_12(datad, w, h, wpld, datas, wpls); - break; - case 26: - fdilate_2_13(datad, w, h, wpld, datas, wpls); - break; - case 27: - ferode_2_13(datad, w, h, wpld, datas, wpls); - break; - case 28: - fdilate_2_14(datad, w, h, wpld, datas, wpls); - break; - case 29: - ferode_2_14(datad, w, h, wpld, datas, wpls); - break; - case 30: - fdilate_2_15(datad, w, h, wpld, datas, wpls); - break; - case 31: - ferode_2_15(datad, w, h, wpld, datas, wpls); - break; - case 32: - fdilate_2_16(datad, w, h, wpld, datas, wpls); - break; - case 33: - ferode_2_16(datad, w, h, wpld, datas, wpls); - break; - case 34: - fdilate_2_17(datad, w, h, wpld, datas, wpls); - break; - case 35: - ferode_2_17(datad, w, h, wpld, datas, wpls); - break; - case 36: - fdilate_2_18(datad, w, h, wpld, datas, wpls); - break; - case 37: - ferode_2_18(datad, w, h, wpld, datas, wpls); - break; - case 38: - fdilate_2_19(datad, w, h, wpld, datas, wpls); - break; - case 39: - ferode_2_19(datad, w, h, wpld, datas, wpls); - break; - case 40: - fdilate_2_20(datad, w, h, wpld, datas, wpls); - break; - case 41: - ferode_2_20(datad, w, h, wpld, datas, wpls); - break; - case 42: - fdilate_2_21(datad, w, h, wpld, datas, wpls); - break; - case 43: - ferode_2_21(datad, w, h, wpld, datas, wpls); - break; - case 44: - fdilate_2_22(datad, w, h, wpld, datas, wpls); - break; - case 45: - ferode_2_22(datad, w, h, wpld, datas, wpls); - break; - case 46: - fdilate_2_23(datad, w, h, wpld, datas, wpls); - break; - case 47: - ferode_2_23(datad, w, h, wpld, datas, wpls); - break; - case 48: - fdilate_2_24(datad, w, h, wpld, datas, wpls); - break; - case 49: - ferode_2_24(datad, w, h, wpld, datas, wpls); - break; - case 50: - fdilate_2_25(datad, w, h, wpld, datas, wpls); - break; - case 51: - ferode_2_25(datad, w, h, wpld, datas, wpls); - break; - case 52: - fdilate_2_26(datad, w, h, wpld, datas, wpls); - break; - case 53: - ferode_2_26(datad, w, h, wpld, datas, wpls); - break; - case 54: - fdilate_2_27(datad, w, h, wpld, datas, wpls); - break; - case 55: - ferode_2_27(datad, w, h, wpld, datas, wpls); - break; - case 56: - fdilate_2_28(datad, w, h, wpld, datas, wpls); - break; - case 57: - ferode_2_28(datad, w, h, wpld, datas, wpls); - break; - case 58: - fdilate_2_29(datad, w, h, wpld, datas, wpls); - break; - case 59: - ferode_2_29(datad, w, h, wpld, datas, wpls); - break; - case 60: - fdilate_2_30(datad, w, h, wpld, datas, wpls); - break; - case 61: - ferode_2_30(datad, w, h, wpld, datas, wpls); - break; - case 62: - fdilate_2_31(datad, w, h, wpld, datas, wpls); - break; - case 63: - ferode_2_31(datad, w, h, wpld, datas, wpls); - break; - case 64: - fdilate_2_32(datad, w, h, wpld, datas, wpls); - break; - case 65: - ferode_2_32(datad, w, h, wpld, datas, wpls); - break; - case 66: - fdilate_2_33(datad, w, h, wpld, datas, wpls); - break; - case 67: - ferode_2_33(datad, w, h, wpld, datas, wpls); - break; - case 68: - fdilate_2_34(datad, w, h, wpld, datas, wpls); - break; - case 69: - ferode_2_34(datad, w, h, wpld, datas, wpls); - break; - case 70: - fdilate_2_35(datad, w, h, wpld, datas, wpls); - break; - case 71: - ferode_2_35(datad, w, h, wpld, datas, wpls); - break; - case 72: - fdilate_2_36(datad, w, h, wpld, datas, wpls); - break; - case 73: - ferode_2_36(datad, w, h, wpld, datas, wpls); - break; - case 74: - fdilate_2_37(datad, w, h, wpld, datas, wpls); - break; - case 75: - ferode_2_37(datad, w, h, wpld, datas, wpls); - break; - case 76: - fdilate_2_38(datad, w, h, wpld, datas, wpls); - break; - case 77: - ferode_2_38(datad, w, h, wpld, datas, wpls); - break; - case 78: - fdilate_2_39(datad, w, h, wpld, datas, wpls); - break; - case 79: - ferode_2_39(datad, w, h, wpld, datas, wpls); - break; - case 80: - fdilate_2_40(datad, w, h, wpld, datas, wpls); - break; - case 81: - ferode_2_40(datad, w, h, wpld, datas, wpls); - break; - case 82: - fdilate_2_41(datad, w, h, wpld, datas, wpls); - break; - case 83: - ferode_2_41(datad, w, h, wpld, datas, wpls); - break; - case 84: - fdilate_2_42(datad, w, h, wpld, datas, wpls); - break; - case 85: - ferode_2_42(datad, w, h, wpld, datas, wpls); - break; - case 86: - fdilate_2_43(datad, w, h, wpld, datas, wpls); - break; - case 87: - ferode_2_43(datad, w, h, wpld, datas, wpls); - break; - case 88: - fdilate_2_44(datad, w, h, wpld, datas, wpls); - break; - case 89: - ferode_2_44(datad, w, h, wpld, datas, wpls); - break; - case 90: - fdilate_2_45(datad, w, h, wpld, datas, wpls); - break; - case 91: - ferode_2_45(datad, w, h, wpld, datas, wpls); - break; - case 92: - fdilate_2_46(datad, w, h, wpld, datas, wpls); - break; - case 93: - ferode_2_46(datad, w, h, wpld, datas, wpls); - break; - case 94: - fdilate_2_47(datad, w, h, wpld, datas, wpls); - break; - case 95: - ferode_2_47(datad, w, h, wpld, datas, wpls); - break; - case 96: - fdilate_2_48(datad, w, h, wpld, datas, wpls); - break; - case 97: - ferode_2_48(datad, w, h, wpld, datas, wpls); - break; - case 98: - fdilate_2_49(datad, w, h, wpld, datas, wpls); - break; - case 99: - ferode_2_49(datad, w, h, wpld, datas, wpls); - break; - case 100: - fdilate_2_50(datad, w, h, wpld, datas, wpls); - break; - case 101: - ferode_2_50(datad, w, h, wpld, datas, wpls); - break; - case 102: - fdilate_2_51(datad, w, h, wpld, datas, wpls); - break; - case 103: - ferode_2_51(datad, w, h, wpld, datas, wpls); - break; - case 104: - fdilate_2_52(datad, w, h, wpld, datas, wpls); - break; - case 105: - ferode_2_52(datad, w, h, wpld, datas, wpls); - break; - case 106: - fdilate_2_53(datad, w, h, wpld, datas, wpls); - break; - case 107: - ferode_2_53(datad, w, h, wpld, datas, wpls); - break; - case 108: - fdilate_2_54(datad, w, h, wpld, datas, wpls); - break; - case 109: - ferode_2_54(datad, w, h, wpld, datas, wpls); - break; - case 110: - fdilate_2_55(datad, w, h, wpld, datas, wpls); - break; - case 111: - ferode_2_55(datad, w, h, wpld, datas, wpls); - break; - case 112: - fdilate_2_56(datad, w, h, wpld, datas, wpls); - break; - case 113: - ferode_2_56(datad, w, h, wpld, datas, wpls); - break; - case 114: - fdilate_2_57(datad, w, h, wpld, datas, wpls); - break; - case 115: - ferode_2_57(datad, w, h, wpld, datas, wpls); - break; - case 116: - fdilate_2_58(datad, w, h, wpld, datas, wpls); - break; - case 117: - ferode_2_58(datad, w, h, wpld, datas, wpls); - break; - case 118: - fdilate_2_59(datad, w, h, wpld, datas, wpls); - break; - case 119: - ferode_2_59(datad, w, h, wpld, datas, wpls); - break; - case 120: - fdilate_2_60(datad, w, h, wpld, datas, wpls); - break; - case 121: - ferode_2_60(datad, w, h, wpld, datas, wpls); - break; - case 122: - fdilate_2_61(datad, w, h, wpld, datas, wpls); - break; - case 123: - ferode_2_61(datad, w, h, wpld, datas, wpls); - break; - case 124: - fdilate_2_62(datad, w, h, wpld, datas, wpls); - break; - case 125: - ferode_2_62(datad, w, h, wpld, datas, wpls); - break; - case 126: - fdilate_2_63(datad, w, h, wpld, datas, wpls); - break; - case 127: - ferode_2_63(datad, w, h, wpld, datas, wpls); - break; - case 128: - fdilate_2_64(datad, w, h, wpld, datas, wpls); - break; - case 129: - ferode_2_64(datad, w, h, wpld, datas, wpls); - break; - case 130: - fdilate_2_65(datad, w, h, wpld, datas, wpls); - break; - case 131: - ferode_2_65(datad, w, h, wpld, datas, wpls); - break; - case 132: - fdilate_2_66(datad, w, h, wpld, datas, wpls); - break; - case 133: - ferode_2_66(datad, w, h, wpld, datas, wpls); - break; - case 134: - fdilate_2_67(datad, w, h, wpld, datas, wpls); - break; - case 135: - ferode_2_67(datad, w, h, wpld, datas, wpls); - break; - case 136: - fdilate_2_68(datad, w, h, wpld, datas, wpls); - break; - case 137: - ferode_2_68(datad, w, h, wpld, datas, wpls); - break; - case 138: - fdilate_2_69(datad, w, h, wpld, datas, wpls); - break; - case 139: - ferode_2_69(datad, w, h, wpld, datas, wpls); - break; - case 140: - fdilate_2_70(datad, w, h, wpld, datas, wpls); - break; - case 141: - ferode_2_70(datad, w, h, wpld, datas, wpls); - break; - case 142: - fdilate_2_71(datad, w, h, wpld, datas, wpls); - break; - case 143: - ferode_2_71(datad, w, h, wpld, datas, wpls); - break; - case 144: - fdilate_2_72(datad, w, h, wpld, datas, wpls); - break; - case 145: - ferode_2_72(datad, w, h, wpld, datas, wpls); - break; - case 146: - fdilate_2_73(datad, w, h, wpld, datas, wpls); - break; - case 147: - ferode_2_73(datad, w, h, wpld, datas, wpls); - break; - case 148: - fdilate_2_74(datad, w, h, wpld, datas, wpls); - break; - case 149: - ferode_2_74(datad, w, h, wpld, datas, wpls); - break; - case 150: - fdilate_2_75(datad, w, h, wpld, datas, wpls); - break; - case 151: - ferode_2_75(datad, w, h, wpld, datas, wpls); - break; - } - - return 0; -} - - -/*--------------------------------------------------------------------------* - * Low-level auto-generated static routines * - *--------------------------------------------------------------------------*/ -/* - * N.B. In all the low-level routines, the part of the image - * that is accessed has been clipped by 32 pixels on - * all four sides. This is done in the higher level - * code by redefining w and h smaller and by moving the - * start-of-image pointers up to the beginning of this - * interior rectangle. - */ -static void -fdilate_2_0(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)); - } - } -} - -static void -ferode_2_0(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)); - } - } -} - -static void -fdilate_2_1(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls)) | - (*(sptr - wpls)); - } - } -} - -static void -ferode_2_1(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls)) & - (*(sptr + wpls)); - } - } -} - -static void -fdilate_2_2(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*sptr); - } - } -} - -static void -ferode_2_2(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*sptr); - } - } -} - -static void -fdilate_2_3(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*sptr); - } - } -} - -static void -ferode_2_3(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*sptr); - } - } -} - -static void -fdilate_2_4(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)); - } - } -} - -static void -ferode_2_4(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)); - } - } -} - -static void -fdilate_2_5(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls2)) | - (*(sptr - wpls)); - } - } -} - -static void -ferode_2_5(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls2)) & - (*(sptr + wpls)); - } - } -} - -static void -fdilate_2_6(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*sptr); - } - } -} - -static void -ferode_2_6(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*sptr); - } - } -} - -static void -fdilate_2_7(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*sptr); - } - } -} - -static void -ferode_2_7(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*sptr); - } - } -} - -static void -fdilate_2_8(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)); - } - } -} - -static void -ferode_2_8(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)); - } - } -} - -static void -fdilate_2_9(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls2)) | - (*(sptr - wpls2)); - } - } -} - -static void -ferode_2_9(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls2)) & - (*(sptr + wpls2)); - } - } -} - -static void -fdilate_2_10(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - (*sptr) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)); - } - } -} - -static void -ferode_2_10(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - (*sptr) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)); - } - } -} - -static void -fdilate_2_11(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls3; - - wpls3 = 3 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls3)) | - (*sptr) | - (*(sptr - wpls3)); - } - } -} - -static void -ferode_2_11(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls3; - - wpls3 = 3 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls3)) & - (*sptr) & - (*(sptr + wpls3)); - } - } -} - -static void -fdilate_2_12(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)); - } - } -} - -static void -ferode_2_12(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)); - } - } -} - -static void -fdilate_2_13(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; -l_int32 wpls3; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls3)) | - (*(sptr - wpls2)); - } - } -} - -static void -ferode_2_13(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; -l_int32 wpls3; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls3)) & - (*(sptr + wpls2)); - } - } -} - -static void -fdilate_2_14(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - (*sptr) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)); - } - } -} - -static void -ferode_2_14(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - (*sptr) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)); - } - } -} - -static void -fdilate_2_15(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls4; - - wpls4 = 4 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls4)) | - (*sptr) | - (*(sptr - wpls4)); - } - } -} - -static void -ferode_2_15(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls4; - - wpls4 = 4 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls4)) & - (*sptr) & - (*(sptr + wpls4)); - } - } -} - -static void -fdilate_2_16(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)); - } - } -} - -static void -ferode_2_16(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)); - } - } -} - -static void -fdilate_2_17(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls3; -l_int32 wpls4; - - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls4)) | - (*(sptr - wpls3)); - } - } -} - -static void -ferode_2_17(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls3; -l_int32 wpls4; - - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls4)) & - (*(sptr + wpls3)); - } - } -} - -static void -fdilate_2_18(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - (*sptr) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)); - } - } -} - -static void -ferode_2_18(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - (*sptr) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)); - } - } -} - -static void -fdilate_2_19(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls5; - - wpls5 = 5 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls5)) | - (*sptr) | - (*(sptr - wpls5)); - } - } -} - -static void -ferode_2_19(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls5; - - wpls5 = 5 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls5)) & - (*sptr) & - (*(sptr + wpls5)); - } - } -} - -static void -fdilate_2_20(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)); - } - } -} - -static void -ferode_2_20(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)); - } - } -} - -static void -fdilate_2_21(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; -l_int32 wpls6; - - wpls2 = 2 * wpls; - wpls6 = 6 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls6)) | - (*(sptr + wpls2)) | - (*(sptr - wpls2)) | - (*(sptr - wpls6)); - } - } -} - -static void -ferode_2_21(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; -l_int32 wpls6; - - wpls2 = 2 * wpls; - wpls6 = 6 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls6)) & - (*(sptr - wpls2)) & - (*(sptr + wpls2)) & - (*(sptr + wpls6)); - } - } -} - -static void -fdilate_2_22(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - (*sptr) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)); - } - } -} - -static void -ferode_2_22(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - (*sptr) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)); - } - } -} - -static void -fdilate_2_23(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls6; - - wpls6 = 6 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls6)) | - (*sptr) | - (*(sptr - wpls6)); - } - } -} - -static void -ferode_2_23(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls6; - - wpls6 = 6 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls6)) & - (*sptr) & - (*(sptr + wpls6)); - } - } -} - -static void -fdilate_2_24(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 8) | (*(sptr + 1) >> 24)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)); - } - } -} - -static void -ferode_2_24(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 8) | (*(sptr - 1) << 24)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)); - } - } -} - -static void -fdilate_2_25(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; -l_int32 wpls3; -l_int32 wpls7; -l_int32 wpls8; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls8)) | - (*(sptr + wpls3)) | - (*(sptr - wpls2)) | - (*(sptr - wpls7)); - } - } -} - -static void -ferode_2_25(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; -l_int32 wpls3; -l_int32 wpls7; -l_int32 wpls8; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls8)) & - (*(sptr - wpls3)) & - (*(sptr + wpls2)) & - (*(sptr + wpls7)); - } - } -} - -static void -fdilate_2_26(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - (*sptr) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)); - } - } -} - -static void -ferode_2_26(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - (*sptr) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)); - } - } -} - -static void -fdilate_2_27(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls7; - - wpls7 = 7 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls7)) | - (*sptr) | - (*(sptr - wpls7)); - } - } -} - -static void -ferode_2_27(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls7; - - wpls7 = 7 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls7)) & - (*sptr) & - (*(sptr + wpls7)); - } - } -} - -static void -fdilate_2_28(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)); - } - } -} - -static void -ferode_2_28(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)); - } - } -} - -static void -fdilate_2_29(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls5; -l_int32 wpls6; - - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls6)) | - (*(sptr - wpls5)); - } - } -} - -static void -ferode_2_29(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls5; -l_int32 wpls6; - - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls6)) & - (*(sptr + wpls5)); - } - } -} - -static void -fdilate_2_30(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)); - } - } -} - -static void -ferode_2_30(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)); - } - } -} - -static void -fdilate_2_31(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls3; -l_int32 wpls9; - - wpls3 = 3 * wpls; - wpls9 = 9 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls9)) | - (*(sptr + wpls3)) | - (*(sptr - wpls3)) | - (*(sptr - wpls9)); - } - } -} - -static void -ferode_2_31(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls3; -l_int32 wpls9; - - wpls3 = 3 * wpls; - wpls9 = 9 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls9)) & - (*(sptr - wpls3)) & - (*(sptr + wpls3)) & - (*(sptr + wpls9)); - } - } -} - -static void -fdilate_2_32(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 10) | (*(sptr + 1) >> 22)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - (*sptr) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 10) | (*(sptr - 1) << 22)); - } - } -} - -static void -ferode_2_32(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 10) | (*(sptr - 1) << 22)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - (*sptr) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 10) | (*(sptr + 1) >> 22)); - } - } -} - -static void -fdilate_2_33(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls5; -l_int32 wpls10; - - wpls5 = 5 * wpls; - wpls10 = 10 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls10)) | - (*(sptr + wpls5)) | - (*sptr) | - (*(sptr - wpls5)) | - (*(sptr - wpls10)); - } - } -} - -static void -ferode_2_33(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls5; -l_int32 wpls10; - - wpls5 = 5 * wpls; - wpls10 = 10 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls10)) & - (*(sptr - wpls5)) & - (*sptr) & - (*(sptr + wpls5)) & - (*(sptr + wpls10)); - } - } -} - -static void -fdilate_2_34(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - (*sptr) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)); - } - } -} - -static void -ferode_2_34(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - (*sptr) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)); - } - } -} - -static void -fdilate_2_35(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls9; - - wpls9 = 9 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls9)) | - (*sptr) | - (*(sptr - wpls9)); - } - } -} - -static void -ferode_2_35(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls9; - - wpls9 = 9 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls9)) & - (*sptr) & - (*(sptr + wpls9)); - } - } -} - -static void -fdilate_2_36(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 11) | (*(sptr + 1) >> 21)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 10) | (*(sptr - 1) << 22)); - } - } -} - -static void -ferode_2_36(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 11) | (*(sptr - 1) << 21)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 10) | (*(sptr + 1) >> 22)); - } - } -} - -static void -fdilate_2_37(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls3; -l_int32 wpls4; -l_int32 wpls10; -l_int32 wpls11; - - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls11)) | - (*(sptr + wpls4)) | - (*(sptr - wpls3)) | - (*(sptr - wpls10)); - } - } -} - -static void -ferode_2_37(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls3; -l_int32 wpls4; -l_int32 wpls10; -l_int32 wpls11; - - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls11)) & - (*(sptr - wpls4)) & - (*(sptr + wpls3)) & - (*(sptr + wpls10)); - } - } -} - -static void -fdilate_2_38(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 12) | (*(sptr + 1) >> 20)) | - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - (*sptr) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) | - ((*(sptr) >> 12) | (*(sptr - 1) << 20)); - } - } -} - -static void -ferode_2_38(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 12) | (*(sptr - 1) << 20)) & - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - (*sptr) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) & - ((*(sptr) << 12) | (*(sptr + 1) >> 20)); - } - } -} - -static void -fdilate_2_39(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls6; -l_int32 wpls12; - - wpls6 = 6 * wpls; - wpls12 = 12 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls12)) | - (*(sptr + wpls6)) | - (*sptr) | - (*(sptr - wpls6)) | - (*(sptr - wpls12)); - } - } -} - -static void -ferode_2_39(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls6; -l_int32 wpls12; - - wpls6 = 6 * wpls; - wpls12 = 12 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls12)) & - (*(sptr - wpls6)) & - (*sptr) & - (*(sptr + wpls6)) & - (*(sptr + wpls12)); - } - } -} - -static void -fdilate_2_40(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 12) | (*(sptr + 1) >> 20)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 12) | (*(sptr - 1) << 20)); - } - } -} - -static void -ferode_2_40(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 12) | (*(sptr - 1) << 20)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 12) | (*(sptr + 1) >> 20)); - } - } -} - -static void -fdilate_2_41(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls4; -l_int32 wpls12; - - wpls4 = 4 * wpls; - wpls12 = 12 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls12)) | - (*(sptr + wpls4)) | - (*(sptr - wpls4)) | - (*(sptr - wpls12)); - } - } -} - -static void -ferode_2_41(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls4; -l_int32 wpls12; - - wpls4 = 4 * wpls; - wpls12 = 12 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls12)) & - (*(sptr - wpls4)) & - (*(sptr + wpls4)) & - (*(sptr + wpls12)); - } - } -} - -static void -fdilate_2_42(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 11) | (*(sptr + 1) >> 21)) | - (*sptr) | - ((*(sptr) >> 11) | (*(sptr - 1) << 21)); - } - } -} - -static void -ferode_2_42(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 11) | (*(sptr - 1) << 21)) & - (*sptr) & - ((*(sptr) << 11) | (*(sptr + 1) >> 21)); - } - } -} - -static void -fdilate_2_43(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls11; - - wpls11 = 11 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls11)) | - (*sptr) | - (*(sptr - wpls11)); - } - } -} - -static void -ferode_2_43(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls11; - - wpls11 = 11 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls11)) & - (*sptr) & - (*(sptr + wpls11)); - } - } -} - -static void -fdilate_2_44(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 14) | (*(sptr + 1) >> 18)) | - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - (*sptr) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) | - ((*(sptr) >> 14) | (*(sptr - 1) << 18)); - } - } -} - -static void -ferode_2_44(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 14) | (*(sptr - 1) << 18)) & - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - (*sptr) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) & - ((*(sptr) << 14) | (*(sptr + 1) >> 18)); - } - } -} - -static void -fdilate_2_45(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls7; -l_int32 wpls14; - - wpls7 = 7 * wpls; - wpls14 = 14 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls14)) | - (*(sptr + wpls7)) | - (*sptr) | - (*(sptr - wpls7)) | - (*(sptr - wpls14)); - } - } -} - -static void -ferode_2_45(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls7; -l_int32 wpls14; - - wpls7 = 7 * wpls; - wpls14 = 14 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls14)) & - (*(sptr - wpls7)) & - (*sptr) & - (*(sptr + wpls7)) & - (*(sptr + wpls14)); - } - } -} - -static void -fdilate_2_46(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 15) | (*(sptr + 1) >> 17)) | - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) | - ((*(sptr) >> 15) | (*(sptr - 1) << 17)); - } - } -} - -static void -ferode_2_46(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 15) | (*(sptr - 1) << 17)) & - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) & - ((*(sptr) << 15) | (*(sptr + 1) >> 17)); - } - } -} - -static void -fdilate_2_47(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls3; -l_int32 wpls9; -l_int32 wpls15; - - wpls3 = 3 * wpls; - wpls9 = 9 * wpls; - wpls15 = 15 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls15)) | - (*(sptr + wpls9)) | - (*(sptr + wpls3)) | - (*(sptr - wpls3)) | - (*(sptr - wpls9)) | - (*(sptr - wpls15)); - } - } -} - -static void -ferode_2_47(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls3; -l_int32 wpls9; -l_int32 wpls15; - - wpls3 = 3 * wpls; - wpls9 = 9 * wpls; - wpls15 = 15 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls15)) & - (*(sptr - wpls9)) & - (*(sptr - wpls3)) & - (*(sptr + wpls3)) & - (*(sptr + wpls9)) & - (*(sptr + wpls15)); - } - } -} - -static void -fdilate_2_48(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 13) | (*(sptr + 1) >> 19)) | - (*sptr) | - ((*(sptr) >> 13) | (*(sptr - 1) << 19)); - } - } -} - -static void -ferode_2_48(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 13) | (*(sptr - 1) << 19)) & - (*sptr) & - ((*(sptr) << 13) | (*(sptr + 1) >> 19)); - } - } -} - -static void -fdilate_2_49(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls13; - - wpls13 = 13 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls13)) | - (*sptr) | - (*(sptr - wpls13)); - } - } -} - -static void -ferode_2_49(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls13; - - wpls13 = 13 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls13)) & - (*sptr) & - (*(sptr + wpls13)); - } - } -} - -static void -fdilate_2_50(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 16) | (*(sptr + 1) >> 16)) | - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) | - (*sptr) | - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) | - ((*(sptr) >> 16) | (*(sptr - 1) << 16)); - } - } -} - -static void -ferode_2_50(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 16) | (*(sptr - 1) << 16)) & - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) & - (*sptr) & - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) & - ((*(sptr) << 16) | (*(sptr + 1) >> 16)); - } - } -} - -static void -fdilate_2_51(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls8; -l_int32 wpls16; - - wpls8 = 8 * wpls; - wpls16 = 16 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls16)) | - (*(sptr + wpls8)) | - (*sptr) | - (*(sptr - wpls8)) | - (*(sptr - wpls16)); - } - } -} - -static void -ferode_2_51(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls8; -l_int32 wpls16; - - wpls8 = 8 * wpls; - wpls16 = 16 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls16)) & - (*(sptr - wpls8)) & - (*sptr) & - (*(sptr + wpls8)) & - (*(sptr + wpls16)); - } - } -} - -static void -fdilate_2_52(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 18) | (*(sptr + 1) >> 14)) | - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) | - ((*(sptr) >> 17) | (*(sptr - 1) << 15)); - } - } -} - -static void -ferode_2_52(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 18) | (*(sptr - 1) << 14)) & - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) & - ((*(sptr) << 17) | (*(sptr + 1) >> 15)); - } - } -} - -static void -fdilate_2_53(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls3; -l_int32 wpls4; -l_int32 wpls10; -l_int32 wpls11; -l_int32 wpls17; -l_int32 wpls18; - - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls17 = 17 * wpls; - wpls18 = 18 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls18)) | - (*(sptr + wpls11)) | - (*(sptr + wpls4)) | - (*(sptr - wpls3)) | - (*(sptr - wpls10)) | - (*(sptr - wpls17)); - } - } -} - -static void -ferode_2_53(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls3; -l_int32 wpls4; -l_int32 wpls10; -l_int32 wpls11; -l_int32 wpls17; -l_int32 wpls18; - - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls17 = 17 * wpls; - wpls18 = 18 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls18)) & - (*(sptr - wpls11)) & - (*(sptr - wpls4)) & - (*(sptr + wpls3)) & - (*(sptr + wpls10)) & - (*(sptr + wpls17)); - } - } -} - -static void -fdilate_2_54(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 17) | (*(sptr + 1) >> 15)) | - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 16) | (*(sptr - 1) << 16)); - } - } -} - -static void -ferode_2_54(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 17) | (*(sptr - 1) << 15)) & - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 16) | (*(sptr + 1) >> 16)); - } - } -} - -static void -fdilate_2_55(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls5; -l_int32 wpls6; -l_int32 wpls16; -l_int32 wpls17; - - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls16 = 16 * wpls; - wpls17 = 17 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls17)) | - (*(sptr + wpls6)) | - (*(sptr - wpls5)) | - (*(sptr - wpls16)); - } - } -} - -static void -ferode_2_55(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls5; -l_int32 wpls6; -l_int32 wpls16; -l_int32 wpls17; - - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls16 = 16 * wpls; - wpls17 = 17 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls17)) & - (*(sptr - wpls6)) & - (*(sptr + wpls5)) & - (*(sptr + wpls16)); - } - } -} - -static void -fdilate_2_56(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 18) | (*(sptr + 1) >> 14)) | - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - (*sptr) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) | - ((*(sptr) >> 18) | (*(sptr - 1) << 14)); - } - } -} - -static void -ferode_2_56(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 18) | (*(sptr - 1) << 14)) & - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - (*sptr) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) & - ((*(sptr) << 18) | (*(sptr + 1) >> 14)); - } - } -} - -static void -fdilate_2_57(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls9; -l_int32 wpls18; - - wpls9 = 9 * wpls; - wpls18 = 18 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls18)) | - (*(sptr + wpls9)) | - (*sptr) | - (*(sptr - wpls9)) | - (*(sptr - wpls18)); - } - } -} - -static void -ferode_2_57(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls9; -l_int32 wpls18; - - wpls9 = 9 * wpls; - wpls18 = 18 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls18)) & - (*(sptr - wpls9)) & - (*sptr) & - (*(sptr + wpls9)) & - (*(sptr + wpls18)); - } - } -} - -static void -fdilate_2_58(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 20) | (*(sptr + 1) >> 12)) | - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) | - ((*(sptr) >> 20) | (*(sptr - 1) << 12)); - } - } -} - -static void -ferode_2_58(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 20) | (*(sptr - 1) << 12)) & - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) & - ((*(sptr) << 20) | (*(sptr + 1) >> 12)); - } - } -} - -static void -fdilate_2_59(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls4; -l_int32 wpls12; -l_int32 wpls20; - - wpls4 = 4 * wpls; - wpls12 = 12 * wpls; - wpls20 = 20 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls20)) | - (*(sptr + wpls12)) | - (*(sptr + wpls4)) | - (*(sptr - wpls4)) | - (*(sptr - wpls12)) | - (*(sptr - wpls20)); - } - } -} - -static void -ferode_2_59(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls4; -l_int32 wpls12; -l_int32 wpls20; - - wpls4 = 4 * wpls; - wpls12 = 12 * wpls; - wpls20 = 20 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls20)) & - (*(sptr - wpls12)) & - (*(sptr - wpls4)) & - (*(sptr + wpls4)) & - (*(sptr + wpls12)) & - (*(sptr + wpls20)); - } - } -} - -static void -fdilate_2_60(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 21) | (*(sptr + 1) >> 11)) | - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) | - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - (*sptr) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) | - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) | - ((*(sptr) >> 21) | (*(sptr - 1) << 11)); - } - } -} - -static void -ferode_2_60(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 21) | (*(sptr - 1) << 11)) & - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) & - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - (*sptr) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) & - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) & - ((*(sptr) << 21) | (*(sptr + 1) >> 11)); - } - } -} - -static void -fdilate_2_61(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls7; -l_int32 wpls14; -l_int32 wpls21; - - wpls7 = 7 * wpls; - wpls14 = 14 * wpls; - wpls21 = 21 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls21)) | - (*(sptr + wpls14)) | - (*(sptr + wpls7)) | - (*sptr) | - (*(sptr - wpls7)) | - (*(sptr - wpls14)) | - (*(sptr - wpls21)); - } - } -} - -static void -ferode_2_61(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls7; -l_int32 wpls14; -l_int32 wpls21; - - wpls7 = 7 * wpls; - wpls14 = 14 * wpls; - wpls21 = 21 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls21)) & - (*(sptr - wpls14)) & - (*(sptr - wpls7)) & - (*sptr) & - (*(sptr + wpls7)) & - (*(sptr + wpls14)) & - (*(sptr + wpls21)); - } - } -} - -static void -fdilate_2_62(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 20) | (*(sptr + 1) >> 12)) | - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) | - (*sptr) | - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) | - ((*(sptr) >> 20) | (*(sptr - 1) << 12)); - } - } -} - -static void -ferode_2_62(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 20) | (*(sptr - 1) << 12)) & - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) & - (*sptr) & - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) & - ((*(sptr) << 20) | (*(sptr + 1) >> 12)); - } - } -} - -static void -fdilate_2_63(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls10; -l_int32 wpls20; - - wpls10 = 10 * wpls; - wpls20 = 20 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls20)) | - (*(sptr + wpls10)) | - (*sptr) | - (*(sptr - wpls10)) | - (*(sptr - wpls20)); - } - } -} - -static void -ferode_2_63(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls10; -l_int32 wpls20; - - wpls10 = 10 * wpls; - wpls20 = 20 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls20)) & - (*(sptr - wpls10)) & - (*sptr) & - (*(sptr + wpls10)) & - (*(sptr + wpls20)); - } - } -} - -static void -fdilate_2_64(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 20) | (*(sptr + 1) >> 12)) | - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) | - ((*(sptr) >> 19) | (*(sptr - 1) << 13)); - } - } -} - -static void -ferode_2_64(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 20) | (*(sptr - 1) << 12)) & - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) & - ((*(sptr) << 19) | (*(sptr + 1) >> 13)); - } - } -} - -static void -fdilate_2_65(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls6; -l_int32 wpls7; -l_int32 wpls19; -l_int32 wpls20; - - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls19 = 19 * wpls; - wpls20 = 20 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls20)) | - (*(sptr + wpls7)) | - (*(sptr - wpls6)) | - (*(sptr - wpls19)); - } - } -} - -static void -ferode_2_65(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls6; -l_int32 wpls7; -l_int32 wpls19; -l_int32 wpls20; - - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls19 = 19 * wpls; - wpls20 = 20 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls20)) & - (*(sptr - wpls7)) & - (*(sptr + wpls6)) & - (*(sptr + wpls19)); - } - } -} - -static void -fdilate_2_66(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 23) | (*(sptr + 1) >> 9)) | - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) | - ((*(sptr) >> 22) | (*(sptr - 1) << 10)); - } - } -} - -static void -ferode_2_66(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 23) | (*(sptr - 1) << 9)) & - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) & - ((*(sptr) << 22) | (*(sptr + 1) >> 10)); - } - } -} - -static void -fdilate_2_67(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls4; -l_int32 wpls5; -l_int32 wpls13; -l_int32 wpls14; -l_int32 wpls22; -l_int32 wpls23; - - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls22 = 22 * wpls; - wpls23 = 23 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls23)) | - (*(sptr + wpls14)) | - (*(sptr + wpls5)) | - (*(sptr - wpls4)) | - (*(sptr - wpls13)) | - (*(sptr - wpls22)); - } - } -} - -static void -ferode_2_67(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls4; -l_int32 wpls5; -l_int32 wpls13; -l_int32 wpls14; -l_int32 wpls22; -l_int32 wpls23; - - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls22 = 22 * wpls; - wpls23 = 23 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls23)) & - (*(sptr - wpls14)) & - (*(sptr - wpls5)) & - (*(sptr + wpls4)) & - (*(sptr + wpls13)) & - (*(sptr + wpls22)); - } - } -} - -static void -fdilate_2_68(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 22) | (*(sptr + 1) >> 10)) | - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) | - (*sptr) | - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) | - ((*(sptr) >> 22) | (*(sptr - 1) << 10)); - } - } -} - -static void -ferode_2_68(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 22) | (*(sptr - 1) << 10)) & - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) & - (*sptr) & - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) & - ((*(sptr) << 22) | (*(sptr + 1) >> 10)); - } - } -} - -static void -fdilate_2_69(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls11; -l_int32 wpls22; - - wpls11 = 11 * wpls; - wpls22 = 22 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls22)) | - (*(sptr + wpls11)) | - (*sptr) | - (*(sptr - wpls11)) | - (*(sptr - wpls22)); - } - } -} - -static void -ferode_2_69(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls11; -l_int32 wpls22; - - wpls11 = 11 * wpls; - wpls22 = 22 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls22)) & - (*(sptr - wpls11)) & - (*sptr) & - (*(sptr + wpls11)) & - (*(sptr + wpls22)); - } - } -} - -static void -fdilate_2_70(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 24) | (*(sptr + 1) >> 8)) | - ((*(sptr) << 16) | (*(sptr + 1) >> 16)) | - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) | - (*sptr) | - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) | - ((*(sptr) >> 16) | (*(sptr - 1) << 16)) | - ((*(sptr) >> 24) | (*(sptr - 1) << 8)); - } - } -} - -static void -ferode_2_70(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 24) | (*(sptr - 1) << 8)) & - ((*(sptr) >> 16) | (*(sptr - 1) << 16)) & - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) & - (*sptr) & - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) & - ((*(sptr) << 16) | (*(sptr + 1) >> 16)) & - ((*(sptr) << 24) | (*(sptr + 1) >> 8)); - } - } -} - -static void -fdilate_2_71(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls8; -l_int32 wpls16; -l_int32 wpls24; - - wpls8 = 8 * wpls; - wpls16 = 16 * wpls; - wpls24 = 24 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls24)) | - (*(sptr + wpls16)) | - (*(sptr + wpls8)) | - (*sptr) | - (*(sptr - wpls8)) | - (*(sptr - wpls16)) | - (*(sptr - wpls24)); - } - } -} - -static void -ferode_2_71(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls8; -l_int32 wpls16; -l_int32 wpls24; - - wpls8 = 8 * wpls; - wpls16 = 16 * wpls; - wpls24 = 24 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls24)) & - (*(sptr - wpls16)) & - (*(sptr - wpls8)) & - (*sptr) & - (*(sptr + wpls8)) & - (*(sptr + wpls16)) & - (*(sptr + wpls24)); - } - } -} - -static void -fdilate_2_72(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 25) | (*(sptr + 1) >> 7)) | - ((*(sptr) << 15) | (*(sptr + 1) >> 17)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 15) | (*(sptr - 1) << 17)) | - ((*(sptr) >> 25) | (*(sptr - 1) << 7)); - } - } -} - -static void -ferode_2_72(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 25) | (*(sptr - 1) << 7)) & - ((*(sptr) >> 15) | (*(sptr - 1) << 17)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 15) | (*(sptr + 1) >> 17)) & - ((*(sptr) << 25) | (*(sptr + 1) >> 7)); - } - } -} - -static void -fdilate_2_73(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls5; -l_int32 wpls15; -l_int32 wpls25; - - wpls5 = 5 * wpls; - wpls15 = 15 * wpls; - wpls25 = 25 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls25)) | - (*(sptr + wpls15)) | - (*(sptr + wpls5)) | - (*(sptr - wpls5)) | - (*(sptr - wpls15)) | - (*(sptr - wpls25)); - } - } -} - -static void -ferode_2_73(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls5; -l_int32 wpls15; -l_int32 wpls25; - - wpls5 = 5 * wpls; - wpls15 = 15 * wpls; - wpls25 = 25 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls25)) & - (*(sptr - wpls15)) & - (*(sptr - wpls5)) & - (*(sptr + wpls5)) & - (*(sptr + wpls15)) & - (*(sptr + wpls25)); - } - } -} - -static void -fdilate_2_74(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 27) | (*(sptr + 1) >> 5)) | - ((*(sptr) << 18) | (*(sptr + 1) >> 14)) | - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - (*sptr) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) | - ((*(sptr) >> 18) | (*(sptr - 1) << 14)) | - ((*(sptr) >> 27) | (*(sptr - 1) << 5)); - } - } -} - -static void -ferode_2_74(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 27) | (*(sptr - 1) << 5)) & - ((*(sptr) >> 18) | (*(sptr - 1) << 14)) & - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - (*sptr) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) & - ((*(sptr) << 18) | (*(sptr + 1) >> 14)) & - ((*(sptr) << 27) | (*(sptr + 1) >> 5)); - } - } -} - -static void -fdilate_2_75(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls9; -l_int32 wpls18; -l_int32 wpls27; - - wpls9 = 9 * wpls; - wpls18 = 18 * wpls; - wpls27 = 27 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls27)) | - (*(sptr + wpls18)) | - (*(sptr + wpls9)) | - (*sptr) | - (*(sptr - wpls9)) | - (*(sptr - wpls18)) | - (*(sptr - wpls27)); - } - } -} - -static void -ferode_2_75(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls9; -l_int32 wpls18; -l_int32 wpls27; - - wpls9 = 9 * wpls; - wpls18 = 18 * wpls; - wpls27 = 27 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls27)) & - (*(sptr - wpls18)) & - (*(sptr - wpls9)) & - (*sptr) & - (*(sptr + wpls9)) & - (*(sptr + wpls18)) & - (*(sptr + wpls27)); - } - } -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/edge.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/edge.c deleted file mode 100644 index 764ef3ed..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/edge.c +++ /dev/null @@ -1,647 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file edge.c - *
- *
- *      Sobel edge detecting filter
- *          PIX      *pixSobelEdgeFilter()
- *
- *      Two-sided edge gradient filter
- *          PIX      *pixTwoSidedEdgeFilter()
- *
- *      Measurement of edge smoothness
- *          l_int32   pixMeasureEdgeSmoothness()
- *          NUMA     *pixGetEdgeProfile()
- *          l_int32   pixGetLastOffPixelInRun()
- *          l_int32   pixGetLastOnPixelInRun()
- *
- *
- *  The Sobel edge detector uses these two simple gradient filters.
- *
- *       1    2    1             1    0   -1
- *       0    0    0             2    0   -2
- *      -1   -2   -1             1    0   -1
- *
- *      (horizontal)             (vertical)
- *
- *  To use both the vertical and horizontal filters, set the orientation
- *  flag to L_ALL_EDGES; this sums the abs. value of their outputs,
- *  clipped to 255.
- *
- *  See comments below for displaying the resulting image with
- *  the edges dark, both for 8 bpp and 1 bpp.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/*----------------------------------------------------------------------* - * Sobel edge detecting filter * - *----------------------------------------------------------------------*/ -/*! - * \brief pixSobelEdgeFilter() - * - * \param[in] pixs 8 bpp; no colormap - * \param[in] orientflag L_HORIZONTAL_EDGES, L_VERTICAL_EDGES, L_ALL_EDGES - * \return pixd 8 bpp, edges are brighter, or NULL on error - * - *
- * Notes:
- *      (1) Invert pixd to see larger gradients as darker (grayscale).
- *      (2) To generate a binary image of the edges, threshold
- *          the result using pixThresholdToBinary().  If the high
- *          edge values are to be fg (1), invert after running
- *          pixThresholdToBinary().
- *      (3) Label the pixels as follows:
- *              1    4    7
- *              2    5    8
- *              3    6    9
- *          Read the data incrementally across the image and unroll
- *          the loop.
- *      (4) This runs at about 45 Mpix/sec on a 3 GHz processor.
- * 
- */ -PIX * -pixSobelEdgeFilter(PIX *pixs, - l_int32 orientflag) -{ -l_int32 w, h, d, i, j, wplt, wpld, gx, gy, vald; -l_int32 val1, val2, val3, val4, val5, val6, val7, val8, val9; -l_uint32 *datat, *linet, *datad, *lined; -PIX *pixt, *pixd; - - PROCNAME("pixSobelEdgeFilter"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (orientflag != L_HORIZONTAL_EDGES && orientflag != L_VERTICAL_EDGES && - orientflag != L_ALL_EDGES) - return (PIX *)ERROR_PTR("invalid orientflag", procName, NULL); - - /* Add 1 pixel (mirrored) to each side of the image. */ - if ((pixt = pixAddMirroredBorder(pixs, 1, 1, 1, 1)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - - /* Compute filter output at each location. */ - pixd = pixCreateTemplate(pixs); - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - if (j == 0) { /* start a new row */ - val1 = GET_DATA_BYTE(linet, j); - val2 = GET_DATA_BYTE(linet + wplt, j); - val3 = GET_DATA_BYTE(linet + 2 * wplt, j); - val4 = GET_DATA_BYTE(linet, j + 1); - val5 = GET_DATA_BYTE(linet + wplt, j + 1); - val6 = GET_DATA_BYTE(linet + 2 * wplt, j + 1); - val7 = GET_DATA_BYTE(linet, j + 2); - val8 = GET_DATA_BYTE(linet + wplt, j + 2); - val9 = GET_DATA_BYTE(linet + 2 * wplt, j + 2); - } else { /* shift right by 1 pixel; update incrementally */ - val1 = val4; - val2 = val5; - val3 = val6; - val4 = val7; - val5 = val8; - val6 = val9; - val7 = GET_DATA_BYTE(linet, j + 2); - val8 = GET_DATA_BYTE(linet + wplt, j + 2); - val9 = GET_DATA_BYTE(linet + 2 * wplt, j + 2); - } - if (orientflag == L_HORIZONTAL_EDGES) - vald = L_ABS(val1 + 2 * val4 + val7 - - val3 - 2 * val6 - val9) >> 3; - else if (orientflag == L_VERTICAL_EDGES) - vald = L_ABS(val1 + 2 * val2 + val3 - val7 - - 2 * val8 - val9) >> 3; - else { /* L_ALL_EDGES */ - gx = L_ABS(val1 + 2 * val2 + val3 - val7 - - 2 * val8 - val9) >> 3; - gy = L_ABS(val1 + 2 * val4 + val7 - - val3 - 2 * val6 - val9) >> 3; - vald = L_MIN(255, gx + gy); - } - SET_DATA_BYTE(lined, j, vald); - } - } - - pixDestroy(&pixt); - return pixd; -} - - -/*----------------------------------------------------------------------* - * Two-sided edge gradient filter * - *----------------------------------------------------------------------*/ -/*! - * \brief pixTwoSidedEdgeFilter() - * - * \param[in] pixs 8 bpp; no colormap - * \param[in] orientflag L_HORIZONTAL_EDGES, L_VERTICAL_EDGES - * \return pixd 8 bpp, edges are brighter, or NULL on error - * - *
- * Notes:
- *      (1) For detecting vertical edges, this considers the
- *          difference of the central pixel from those on the left
- *          and right.  For situations where the gradient is the same
- *          sign on both sides, this computes and stores the minimum
- *          (absolute value of the) difference.  The reason for
- *          checking the sign is that we are looking for pixels within
- *          a transition.  By contrast, for single pixel noise, the pixel
- *          value is either larger than or smaller than its neighbors,
- *          so the gradient would change direction on each side.  Horizontal
- *          edges are handled similarly, looking for vertical gradients.
- *      (2) To generate a binary image of the edges, threshold
- *          the result using pixThresholdToBinary().  If the high
- *          edge values are to be fg (1), invert after running
- *          pixThresholdToBinary().
- *      (3) This runs at about 60 Mpix/sec on a 3 GHz processor.
- *          It is about 30% faster than Sobel, and the results are
- *          similar.
- * 
- */ -PIX * -pixTwoSidedEdgeFilter(PIX *pixs, - l_int32 orientflag) -{ -l_int32 w, h, d, i, j, wpls, wpld; -l_int32 cval, rval, bval, val, lgrad, rgrad, tgrad, bgrad; -l_uint32 *datas, *lines, *datad, *lined; -PIX *pixd; - - PROCNAME("pixTwoSidedEdgeFilter"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (orientflag != L_HORIZONTAL_EDGES && orientflag != L_VERTICAL_EDGES) - return (PIX *)ERROR_PTR("invalid orientflag", procName, NULL); - - pixd = pixCreateTemplate(pixs); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - if (orientflag == L_VERTICAL_EDGES) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - cval = GET_DATA_BYTE(lines, 1); - lgrad = cval - GET_DATA_BYTE(lines, 0); - for (j = 1; j < w - 1; j++) { - rval = GET_DATA_BYTE(lines, j + 1); - rgrad = rval - cval; - if (lgrad * rgrad > 0) { - if (lgrad < 0) - val = -L_MAX(lgrad, rgrad); - else - val = L_MIN(lgrad, rgrad); - SET_DATA_BYTE(lined, j, val); - } - lgrad = rgrad; - cval = rval; - } - } - } - else { /* L_HORIZONTAL_EDGES) */ - for (j = 0; j < w; j++) { - lines = datas + wpls; - cval = GET_DATA_BYTE(lines, j); /* for line 1 */ - tgrad = cval - GET_DATA_BYTE(datas, j); - for (i = 1; i < h - 1; i++) { - lines += wpls; /* for line i + 1 */ - lined = datad + i * wpld; - bval = GET_DATA_BYTE(lines, j); - bgrad = bval - cval; - if (tgrad * bgrad > 0) { - if (tgrad < 0) - val = -L_MAX(tgrad, bgrad); - else - val = L_MIN(tgrad, bgrad); - SET_DATA_BYTE(lined, j, val); - } - tgrad = bgrad; - cval = bval; - } - } - } - - return pixd; -} - - -/*----------------------------------------------------------------------* - * Measurement of edge smoothness * - *----------------------------------------------------------------------*/ -/*! - * \brief pixMeasureEdgeSmoothness() - * - * \param[in] pixs 1 bpp - * \param[in] side L_FROM_LEFT, L_FROM_RIGHT, L_FROM_TOP, L_FROM_BOT - * \param[in] minjump minimum jump to be counted; >= 1 - * \param[in] minreversal minimum reversal size for new peak or valley - * \param[out] pjpl [optional] jumps/length: number of jumps, - * normalized to length of component side - * \param[out] pjspl [optional] jumpsum/length: sum of all - * sufficiently large jumps, normalized to length - * of component side - * \param[out] prpl [optional] reversals/length: number of - * peak-to-valley or valley-to-peak reversals, - * normalized to length of component side - * \param[in] debugfile [optional] displays constructed edge; use NULL - * for no output - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This computes three measures of smoothness of the edge of a
- *          connected component:
- *            * jumps/length: (jpl) the number of jumps of size >= %minjump,
- *              normalized to the length of the side
- *            * jump sum/length: (jspl) the sum of all jump lengths of
- *              size >= %minjump, normalized to the length of the side
- *            * reversals/length: (rpl) the number of peak <--> valley
- *              reversals, using %minreverse as a minimum deviation of
- *              the peak or valley from its preceding extremum,
- *              normalized to the length of the side
- *      (2) The input pix should be a single connected component, but
- *          this is not required.
- * 
- */ -l_ok -pixMeasureEdgeSmoothness(PIX *pixs, - l_int32 side, - l_int32 minjump, - l_int32 minreversal, - l_float32 *pjpl, - l_float32 *pjspl, - l_float32 *prpl, - const char *debugfile) -{ -l_int32 i, n, val, nval, diff, njumps, jumpsum, nreversal; -NUMA *na, *nae; - - PROCNAME("pixMeasureEdgeSmoothness"); - - if (pjpl) *pjpl = 0.0; - if (pjspl) *pjspl = 0.0; - if (prpl) *prpl = 0.0; - if (!pjpl && !pjspl && !prpl && !debugfile) - return ERROR_INT("no output requested", procName, 1); - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (side != L_FROM_LEFT && side != L_FROM_RIGHT && - side != L_FROM_TOP && side != L_FROM_BOT) - return ERROR_INT("invalid side", procName, 1); - if (minjump < 1) - return ERROR_INT("invalid minjump; must be >= 1", procName, 1); - if (minreversal < 1) - return ERROR_INT("invalid minreversal; must be >= 1", procName, 1); - - if ((na = pixGetEdgeProfile(pixs, side, debugfile)) == NULL) - return ERROR_INT("edge profile not made", procName, 1); - if ((n = numaGetCount(na)) < 2) { - numaDestroy(&na); - return 0; - } - - if (pjpl || pjspl) { - jumpsum = 0; - njumps = 0; - numaGetIValue(na, 0, &val); - for (i = 1; i < n; i++) { - numaGetIValue(na, i, &nval); - diff = L_ABS(nval - val); - if (diff >= minjump) { - njumps++; - jumpsum += diff; - } - val = nval; - } - if (pjpl) - *pjpl = (l_float32)njumps / (l_float32)(n - 1); - if (pjspl) - *pjspl = (l_float32)jumpsum / (l_float32)(n - 1); - } - - if (prpl) { - nae = numaFindExtrema(na, minreversal, NULL); - nreversal = numaGetCount(nae) - 1; - *prpl = (l_float32)nreversal / (l_float32)(n - 1); - numaDestroy(&nae); - } - - numaDestroy(&na); - return 0; -} - - -/*! - * \brief pixGetEdgeProfile() - * - * \param[in] pixs 1 bpp - * \param[in] side L_FROM_LEFT, L_FROM_RIGHT, L_FROM_TOP, L_FROM_BOT - * \param[in] debugfile [optional] displays constructed edge; use NULL - * for no output - * \return na of fg edge pixel locations, or NULL on error - */ -NUMA * -pixGetEdgeProfile(PIX *pixs, - l_int32 side, - const char *debugfile) -{ -l_int32 x, y, w, h, loc, index, ival; -l_uint32 val; -NUMA *na; -PIX *pixt; -PIXCMAP *cmap; - - PROCNAME("pixGetEdgeProfile"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (side != L_FROM_LEFT && side != L_FROM_RIGHT && - side != L_FROM_TOP && side != L_FROM_BOT) - return (NUMA *)ERROR_PTR("invalid side", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - if (side == L_FROM_LEFT || side == L_FROM_RIGHT) - na = numaCreate(h); - else - na = numaCreate(w); - if (side == L_FROM_LEFT) { - pixGetLastOffPixelInRun(pixs, 0, 0, L_FROM_LEFT, &loc); - loc = (loc == w - 1) ? 0 : loc + 1; /* back to the left edge */ - numaAddNumber(na, loc); - for (y = 1; y < h; y++) { - pixGetPixel(pixs, loc, y, &val); - if (val == 1) { - pixGetLastOnPixelInRun(pixs, loc, y, L_FROM_RIGHT, &loc); - } else { - pixGetLastOffPixelInRun(pixs, loc, y, L_FROM_LEFT, &loc); - loc = (loc == w - 1) ? 0 : loc + 1; - } - numaAddNumber(na, loc); - } - } - else if (side == L_FROM_RIGHT) { - pixGetLastOffPixelInRun(pixs, w - 1, 0, L_FROM_RIGHT, &loc); - loc = (loc == 0) ? w - 1 : loc - 1; /* back to the right edge */ - numaAddNumber(na, loc); - for (y = 1; y < h; y++) { - pixGetPixel(pixs, loc, y, &val); - if (val == 1) { - pixGetLastOnPixelInRun(pixs, loc, y, L_FROM_LEFT, &loc); - } else { - pixGetLastOffPixelInRun(pixs, loc, y, L_FROM_RIGHT, &loc); - loc = (loc == 0) ? w - 1 : loc - 1; - } - numaAddNumber(na, loc); - } - } - else if (side == L_FROM_TOP) { - pixGetLastOffPixelInRun(pixs, 0, 0, L_FROM_TOP, &loc); - loc = (loc == h - 1) ? 0 : loc + 1; /* back to the top edge */ - numaAddNumber(na, loc); - for (x = 1; x < w; x++) { - pixGetPixel(pixs, x, loc, &val); - if (val == 1) { - pixGetLastOnPixelInRun(pixs, x, loc, L_FROM_BOT, &loc); - } else { - pixGetLastOffPixelInRun(pixs, x, loc, L_FROM_TOP, &loc); - loc = (loc == h - 1) ? 0 : loc + 1; - } - numaAddNumber(na, loc); - } - } - else { /* side == L_FROM_BOT */ - pixGetLastOffPixelInRun(pixs, 0, h - 1, L_FROM_BOT, &loc); - loc = (loc == 0) ? h - 1 : loc - 1; /* back to the bottom edge */ - numaAddNumber(na, loc); - for (x = 1; x < w; x++) { - pixGetPixel(pixs, x, loc, &val); - if (val == 1) { - pixGetLastOnPixelInRun(pixs, x, loc, L_FROM_TOP, &loc); - } else { - pixGetLastOffPixelInRun(pixs, x, loc, L_FROM_BOT, &loc); - loc = (loc == 0) ? h - 1 : loc - 1; - } - numaAddNumber(na, loc); - } - } - - if (debugfile) { - pixt = pixConvertTo8(pixs, TRUE); - cmap = pixGetColormap(pixt); - pixcmapAddColor(cmap, 255, 0, 0); - index = pixcmapGetCount(cmap) - 1; - if (side == L_FROM_LEFT || side == L_FROM_RIGHT) { - for (y = 0; y < h; y++) { - numaGetIValue(na, y, &ival); - pixSetPixel(pixt, ival, y, index); - } - } else { /* L_FROM_TOP or L_FROM_BOT */ - for (x = 0; x < w; x++) { - numaGetIValue(na, x, &ival); - pixSetPixel(pixt, x, ival, index); - } - } - pixWrite(debugfile, pixt, IFF_PNG); - pixDestroy(&pixt); - } - - return na; -} - - -/* - * \brief pixGetLastOffPixelInRun() - * - * \param[in] pixs 1 bpp - * \param[in] x, y starting location - * \param[in] direction L_FROM_LEFT, L_FROM_RIGHT, L_FROM_TOP, L_FROM_BOT - * \param[out] ploc location in scan direction coordinate - * of last OFF pixel found - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Search starts from the pixel at (x, y), which is OFF.
- *      (2) It returns the location in the scan direction of the last
- *          pixel in the current run that is OFF.
- *      (3) The interface for these pixel run functions is cleaner when
- *          you ask for the last pixel in the current run, rather than the
- *          first pixel of opposite polarity that is found, because the
- *          current run may go to the edge of the image, in which case
- *          no pixel of opposite polarity is found.
- * 
- */ -l_ok -pixGetLastOffPixelInRun(PIX *pixs, - l_int32 x, - l_int32 y, - l_int32 direction, - l_int32 *ploc) -{ -l_int32 loc, w, h; -l_uint32 val; - - PROCNAME("pixGetLastOffPixelInRun"); - - if (!ploc) - return ERROR_INT("&loc not defined", procName, 1); - *ploc = 0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs undefined or not 1 bpp", procName, 1); - if (direction != L_FROM_LEFT && direction != L_FROM_RIGHT && - direction != L_FROM_TOP && direction != L_FROM_BOT) - return ERROR_INT("invalid side", procName, 1); - - pixGetDimensions(pixs, &w, &h, NULL); - if (direction == L_FROM_LEFT) { - for (loc = x; loc < w; loc++) { - pixGetPixel(pixs, loc, y, &val); - if (val == 1) - break; - } - *ploc = loc - 1; - } else if (direction == L_FROM_RIGHT) { - for (loc = x; loc >= 0; loc--) { - pixGetPixel(pixs, loc, y, &val); - if (val == 1) - break; - } - *ploc = loc + 1; - } - else if (direction == L_FROM_TOP) { - for (loc = y; loc < h; loc++) { - pixGetPixel(pixs, x, loc, &val); - if (val == 1) - break; - } - *ploc = loc - 1; - } - else if (direction == L_FROM_BOT) { - for (loc = y; loc >= 0; loc--) { - pixGetPixel(pixs, x, loc, &val); - if (val == 1) - break; - } - *ploc = loc + 1; - } - return 0; -} - - -/* - * \brief pixGetLastOnPixelInRun() - * - * \param[in] pixs 1 bpp - * \param[in] x, y starting location - * \param[in] direction L_FROM_LEFT, L_FROM_RIGHT, L_FROM_TOP, L_FROM_BOT - * \param[out] ploc location in scan direction coordinate - * of first ON pixel found - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Search starts from the pixel at (x, y), which is ON.
- *      (2) It returns the location in the scan direction of the last
- *          pixel in the current run that is ON.
- * 
- */ -l_int32 -pixGetLastOnPixelInRun(PIX *pixs, - l_int32 x, - l_int32 y, - l_int32 direction, - l_int32 *ploc) -{ -l_int32 loc, w, h; -l_uint32 val; - - PROCNAME("pixLastOnPixelInRun"); - - if (!ploc) - return ERROR_INT("&loc not defined", procName, 1); - *ploc = 0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs undefined or not 1 bpp", procName, 1); - if (direction != L_FROM_LEFT && direction != L_FROM_RIGHT && - direction != L_FROM_TOP && direction != L_FROM_BOT) - return ERROR_INT("invalid side", procName, 1); - - pixGetDimensions(pixs, &w, &h, NULL); - if (direction == L_FROM_LEFT) { - for (loc = x; loc < w; loc++) { - pixGetPixel(pixs, loc, y, &val); - if (val == 0) - break; - } - *ploc = loc - 1; - } else if (direction == L_FROM_RIGHT) { - for (loc = x; loc >= 0; loc--) { - pixGetPixel(pixs, loc, y, &val); - if (val == 0) - break; - } - *ploc = loc + 1; - } - else if (direction == L_FROM_TOP) { - for (loc = y; loc < h; loc++) { - pixGetPixel(pixs, x, loc, &val); - if (val == 0) - break; - } - *ploc = loc - 1; - } - else if (direction == L_FROM_BOT) { - for (loc = y; loc >= 0; loc--) { - pixGetPixel(pixs, x, loc, &val); - if (val == 0) - break; - } - *ploc = loc + 1; - } - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/encoding.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/encoding.c deleted file mode 100644 index 4705bde4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/encoding.c +++ /dev/null @@ -1,652 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - This software is distributed in the hope that it will be - - useful, but with NO WARRANTY OF ANY KIND. - - No author or distributor accepts responsibility to anyone for the - - consequences of using this software, or for whether it serves any - - particular purpose or works at all, unless he or she says so in - - writing. Everyone is granted permission to copy, modify and - - redistribute this source code, for commercial or non-commercial - - purposes, with the following restrictions: (1) the origin of this - - source code must not be misrepresented; (2) modified versions must - - be plainly marked as such; and (3) this notice may not be removed - - or altered from any source or modified source distribution. - *====================================================================*/ - -/* - * encodings.c - * - * Base64 - * char *encodeBase64() - * l_uint8 *decodeBase64() - * static l_int32 isBase64() - * static l_int32 *genReverseTab64() - * static void byteConvert3to4() - * static void byteConvert4to3() - * - * Ascii85 - * char *encodeAscii85() - * l_uint8 *decodeAscii85() - * static l_int32 convertChunkToAscii85() - * - * String reformatting for base 64 encoded data - * char *reformatPacked64() - * - * Base64 encoding is useful for encding binary data in a restricted set of - * 64 printable ascii symbols, that includes the 62 alphanumerics and '+' - * and '/'. Notably it does not include quotes, so that base64 encoded - * strings can be used in situations where quotes are used for formatting. - * 64 symbols was chosen because it is the smallest number that can be used - * in 4-for-3 byte encoding of binary data: - * log2(64) / log2(256) = 0.75 = 3/4 - * - * Ascii85 encoding is used in PostScript and some pdf files for - * representing binary data (for example, a compressed image) in printable - * ascii symbols. It has a dictionary of 85 symbols; 85 was chosen because - * it is the smallest number that can be used in 5-for-4 byte encoding - * of binary data (256 possible input values). This can be seen from - * the max information content in such a sequence: - * log2(84) / log2(256) = 0.799 < 4/5 - * log2(85) / log2(256) = 0.801 > 4/5 - */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Base64 encoding table in string representation */ -static const l_int32 MAX_BASE64_LINE = 72; /* max line length base64 */ -static const char *tablechar64 = - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; - -static l_int32 isBase64(char); -static l_int32 *genReverseTab64(void); -static void byteConvert3to4(l_uint8 *in3, l_uint8 *out4); -static void byteConvert4to3(l_uint8 *in4, l_uint8 *out3); - - /* Ascii85 encoding */ -static const l_int32 MAX_ASCII85_LINE = 64; /* max line length ascii85 */ -static const l_uint32 power85[5] = {1, - 85, - 85 * 85, - 85 * 85 * 85, - 85 * 85 * 85 * 85}; - -static l_int32 convertChunkToAscii85(const l_uint8 *inarray, l_int32 insize, - l_int32 *pindex, char *outbuf, - l_int32 *pnbout); - -/*-------------------------------------------------------------* - * Utility for encoding and decoding data with base64 * - *-------------------------------------------------------------*/ -/*! - * \brief encodeBase64() - * - * \param[in] inarray input binary data - * \param[in] insize number of bytes in input array - * \param[out] poutsize number of bytes in output char array - * \return chara with MAX_BASE64_LINE characters + \n in each line - * - *
- * Notes:
- *      (1) The input character data is unrestricted binary.
- *          The output encoded data consists of the 64 characters
- *          in the base64 set, plus newlines and the pad character '='.
- * 
- */ -char * -encodeBase64(const l_uint8 *inarray, - l_int32 insize, - l_int32 *poutsize) -{ -char *chara; -const l_uint8 *bytea; -l_uint8 array3[3], array4[4]; -l_int32 outsize, i, j, index, linecount; - - PROCNAME("encodeBase64"); - - if (!poutsize) - return (char *)ERROR_PTR("&outsize not defined", procName, NULL); - *poutsize = 0; - if (!inarray) - return (char *)ERROR_PTR("inarray not defined", procName, NULL); - if (insize <= 0) - return (char *)ERROR_PTR("insize not > 0", procName, NULL); - - /* The output array is padded to a multiple of 4 bytes, not - * counting the newlines. We just need to allocate a large - * enough array, and add 4 bytes to make sure it is big enough. */ - outsize = 4 * ((insize + 2) / 3); /* without newlines */ - outsize += outsize / MAX_BASE64_LINE + 4; /* with the newlines */ - if ((chara = (char *)LEPT_CALLOC(outsize, sizeof(char))) == NULL) - return (char *)ERROR_PTR("chara not made", procName, NULL); - - /* Read all the input data, and convert in sets of 3 input - * bytes --> 4 output bytes. */ - i = index = linecount = 0; - bytea = inarray; - while (insize--) { - if (linecount == MAX_BASE64_LINE) { - chara[index++] = '\n'; - linecount = 0; - } - array3[i++] = *bytea++; - if (i == 3) { /* convert 3 to 4 and save */ - byteConvert3to4(array3, array4); - for (j = 0; j < 4; j++) - chara[index++] = tablechar64[array4[j]]; - i = 0; - linecount += 4; - } - } - - /* Suppose 1 or 2 bytes has been read but not yet processed. - * If 1 byte has been read, this will generate 2 bytes of - * output, with 6 bits to the first byte and 2 bits to the second. - * We will add two bytes of '=' for padding. - * If 2 bytes has been read, this will generate 3 bytes of output, - * with 6 bits to the first 2 bytes and 4 bits to the third, and - * we add a fourth padding byte ('='). */ - if (i > 0) { /* left-over 1 or 2 input bytes */ - for (j = i; j < 3; j++) - array3[j] = '\0'; /* zero the remaining input bytes */ - byteConvert3to4(array3, array4); - for (j = 0; j <= i; j++) - chara[index++] = tablechar64[array4[j]]; - for (j = i + 1; j < 4; j++) - chara[index++] = '='; - } - *poutsize = index; - - return chara; -} - - -/*! - * \brief decodeBase64() - * - * \param[in] inarray input encoded char data, with 72 chars/line) - * \param[in] insize number of bytes in input array - * \param[out] poutsize number of bytes in output byte array - * \return bytea decoded byte data, or NULL on error - * - *
- * Notes:
- *      (1) The input character data should have only 66 different characters:
- *          The 64 character set for base64 encoding, plus the pad
- *          character '=' and newlines for formatting with fixed line
- *          lengths.  If there are any other characters, the decoder
- *          will declare the input data to be invalid and return NULL.
- *      (2) The decoder ignores newlines and, for a valid input string,
- *          stops reading input when a pad byte is found.
- * 
- */ -l_uint8 * -decodeBase64(const char *inarray, - l_int32 insize, - l_int32 *poutsize) -{ -char inchar; -l_uint8 *bytea; -l_uint8 array3[3], array4[4]; -l_int32 *rtable64; -l_int32 i, j, outsize, in_index, out_index; - - PROCNAME("decodeBase64"); - - if (!poutsize) - return (l_uint8 *)ERROR_PTR("&outsize not defined", procName, NULL); - *poutsize = 0; - if (!inarray) - return (l_uint8 *)ERROR_PTR("inarray not defined", procName, NULL); - if (insize <= 0) - return (l_uint8 *)ERROR_PTR("insize not > 0", procName, NULL); - - /* Validate the input data */ - for (i = 0; i < insize; i++) { - inchar = inarray[i]; - if (inchar == '\n') continue; - if (isBase64(inchar) == 0 && inchar != '=') - return (l_uint8 *)ERROR_PTR("invalid char in inarray", - procName, NULL); - } - - /* The input array typically is made with a newline every - * MAX_BASE64_LINE input bytes. However, as a printed string, the - * newlines would be stripped. So when we allocate the output - * array, assume the input array is all data, but strip - * out the newlines during decoding. This guarantees that - * the allocated array is large enough. */ - outsize = 3 * ((insize + 3) / 4) + 4; - if ((bytea = (l_uint8 *)LEPT_CALLOC(outsize, sizeof(l_uint8))) == NULL) - return (l_uint8 *)ERROR_PTR("bytea not made", procName, NULL); - - /* The number of encoded input data bytes is always a multiple of 4. - * Read all the data, until you reach either the end or - * the first pad character '='. The data is processed in - * units of 4 input bytes, generating 3 output decoded bytes - * of binary data. Newlines are ignored. If there are no - * pad bytes, i == 0 at the end of this section. */ - rtable64 = genReverseTab64(); - i = in_index = out_index = 0; - for (in_index = 0; in_index < insize; in_index++) { - inchar = inarray[in_index]; - if (inchar == '\n') continue; - if (inchar == '=') break; - array4[i++] = rtable64[(unsigned char)inchar]; - if (i < 4) { - continue; - } else { /* i == 4; convert 4 to 3 and save */ - byteConvert4to3(array4, array3); - for (j = 0; j < 3; j++) - bytea[out_index++] = array3[j]; - i = 0; - } - } - - /* If i > 0, we ran into pad bytes ('='). If i == 2, there are - * two input pad bytes and one output data byte. If i == 3, - * there is one input pad byte and two output data bytes. */ - if (i > 0) { - for (j = i; j < 4; j++) - array4[j] = '\0'; /* zero the remaining input bytes */ - byteConvert4to3(array4, array3); - for (j = 0; j < i - 1; j++) - bytea[out_index++] = array3[j]; - } - *poutsize = out_index; - - LEPT_FREE(rtable64); - return bytea; -} - - -/*! - * \brief isBase64() - */ -static l_int32 -isBase64(char c) -{ - return (isalnum(((int)c)) || ((c) == '+') || ((c) == '/')) ? 1 : 0; -} - -/*! - * \brief genReverseTab64() - */ -static l_int32 * -genReverseTab64() -{ -l_int32 i; -l_int32 *rtable64; - - rtable64 = (l_int32 *)LEPT_CALLOC(128, sizeof(l_int32)); - for (i = 0; i < 64; i++) { - rtable64[(unsigned char)tablechar64[i]] = i; - } - return rtable64; -} - -/*! - * \brief byteConvert3to4() - */ -static void -byteConvert3to4(l_uint8 *in3, - l_uint8 *out4) -{ - out4[0] = in3[0] >> 2; - out4[1] = ((in3[0] & 0x03) << 4) | (in3[1] >> 4); - out4[2] = ((in3[1] & 0x0f) << 2) | (in3[2] >> 6); - out4[3] = in3[2] & 0x3f; - return; -} - -/*! - * \brief byteConvert4to3() - */ -static void -byteConvert4to3(l_uint8 *in4, - l_uint8 *out3) -{ - out3[0] = (in4[0] << 2) | (in4[1] >> 4); - out3[1] = ((in4[1] & 0x0f) << 4) | (in4[2] >> 2); - out3[2] = ((in4[2] & 0x03) << 6) | in4[3]; - return; -} - - -/*-------------------------------------------------------------* - * Utility for encoding and decoding data with ascii85 * - *-------------------------------------------------------------*/ -/*! - * \brief encodeAscii85() - * - * \param[in] inarray input data - * \param[in] insize number of bytes in input array - * \param[out] poutsize number of bytes in output char array - * \return chara with 64 characters + \n in each line - * - *
- * Notes:
- *      (1) Ghostscript has a stack break if the last line of
- *          data only has a '>', so we avoid the problem by
- *          always putting '~>' on the last line.
- * 
- */ -char * -encodeAscii85(const l_uint8 *inarray, - l_int32 insize, - l_int32 *poutsize) -{ -char *chara; -char outbuf[8]; -l_int32 maxsize, i, index, outindex, linecount, nbout, eof; - - PROCNAME("encodeAscii85"); - - if (!poutsize) - return (char *)ERROR_PTR("&outsize not defined", procName, NULL); - *poutsize = 0; - if (!inarray) - return (char *)ERROR_PTR("inarray not defined", procName, NULL); - if (insize <= 0) - return (char *)ERROR_PTR("insize not > 0", procName, NULL); - - /* Accumulate results in char array */ - maxsize = (l_int32)(80. + (insize * 5. / 4.) * - (1. + 2. / MAX_ASCII85_LINE)); - if ((chara = (char *)LEPT_CALLOC(maxsize, sizeof(char))) == NULL) - return (char *)ERROR_PTR("chara not made", procName, NULL); - - linecount = 0; - index = 0; - outindex = 0; - while (1) { - eof = convertChunkToAscii85(inarray, insize, &index, outbuf, &nbout); - for (i = 0; i < nbout; i++) { - chara[outindex++] = outbuf[i]; - linecount++; - if (linecount >= MAX_ASCII85_LINE) { - chara[outindex++] = '\n'; - linecount = 0; - } - } - if (eof == TRUE) { - if (linecount != 0) - chara[outindex++] = '\n'; - chara[outindex++] = '~'; - chara[outindex++] = '>'; - chara[outindex++] = '\n'; - break; - } - } - - *poutsize = outindex; - return chara; -} - - -/*! - * \brief convertChunkToAscii85() - * - * \param[in] inarray input data - * \param[in] insize number of bytes in input array - * \param[out] pindex use and -- ptr - * \param[in] outbuf holds 8 ascii chars; we use no more than 7 - * \param[out] pnbsout number of bytes written to outbuf - * \return boolean for eof 0 if more data, 1 if end of file - * - *
- * Notes:
- *      (1) Attempts to read 4 bytes and write 5.
- *      (2) Writes 1 byte if the value is 0.
- * 
- */ -static l_int32 -convertChunkToAscii85(const l_uint8 *inarray, - l_int32 insize, - l_int32 *pindex, - char *outbuf, - l_int32 *pnbout) -{ -l_uint8 inbyte; -l_uint32 inword, val; -l_int32 eof, index, nread, nbout, i; - - eof = FALSE; - index = *pindex; - nread = L_MIN(4, (insize - index)); - if (insize == index + nread) - eof = TRUE; - *pindex += nread; /* save new index */ - - /* Read input data and save in l_uint32 */ - inword = 0; - for (i = 0; i < nread; i++) { - inbyte = inarray[index + i]; - inword += inbyte << (8 * (3 - i)); - } - -#if 0 - lept_stderr("index = %d, nread = %d\n", index, nread); - lept_stderr("inword = %x\n", inword); - lept_stderr("eof = %d\n", eof); -#endif - - /* Special case: output 1 byte only */ - if (inword == 0) { - outbuf[0] = 'z'; - nbout = 1; - } else { /* output nread + 1 bytes */ - for (i = 4; i >= 4 - nread; i--) { - val = inword / power85[i]; - outbuf[4 - i] = (l_uint8)(val + '!'); - inword -= val * power85[i]; - } - nbout = nread + 1; - } - *pnbout = nbout; - - return eof; -} - - -/*! - * \brief decodeAscii85() - * - * \param[in] inarray ascii85 input data - * \param[in] insize number of bytes in input array - * \param[out] poutsize number of bytes in output l_uint8 array - * \return outarray binary - * - *
- * Notes:
- *      (1) We assume the data is properly encoded, so we do not check
- *          for invalid characters or the final '>' character.
- *      (2) We permit whitespace to be added to the encoding in an
- *          arbitrary way.
- * 
- */ -l_uint8 * -decodeAscii85(const char *inarray, - l_int32 insize, - l_int32 *poutsize) -{ -char inc; -const char *pin; -l_uint8 val; -l_uint8 *outa; -l_int32 maxsize, ocount, bytecount, index; -l_uint32 oword; - - PROCNAME("decodeAscii85"); - - if (!poutsize) - return (l_uint8 *)ERROR_PTR("&outsize not defined", procName, NULL); - *poutsize = 0; - if (!inarray) - return (l_uint8 *)ERROR_PTR("inarray not defined", procName, NULL); - if (insize <= 0) - return (l_uint8 *)ERROR_PTR("insize not > 0", procName, NULL); - - /* Accumulate results in outa */ - maxsize = (l_int32)(80. + (insize * 4. / 5.)); /* plenty big */ - if ((outa = (l_uint8 *)LEPT_CALLOC(maxsize, sizeof(l_uint8))) == NULL) - return (l_uint8 *)ERROR_PTR("outa not made", procName, NULL); - - pin = inarray; - ocount = 0; /* byte index into outa */ - oword = 0; - for (index = 0, bytecount = 0; index < insize; index++, pin++) { - inc = *pin; - - if (inc == ' ' || inc == '\t' || inc == '\n' || - inc == '\f' || inc == '\r' || inc == '\v') /* ignore white space */ - continue; - - val = inc - '!'; - if (val < 85) { - oword = oword * 85 + val; - if (bytecount < 4) { - bytecount++; - } else { /* we have all 5 input chars for the oword */ - outa[ocount] = (oword >> 24) & 0xff; - outa[ocount + 1] = (oword >> 16) & 0xff; - outa[ocount + 2] = (oword >> 8) & 0xff; - outa[ocount + 3] = oword & 0xff; - ocount += 4; - bytecount = 0; - oword = 0; - } - } else if (inc == 'z' && bytecount == 0) { - outa[ocount] = 0; - outa[ocount + 1] = 0; - outa[ocount + 2] = 0; - outa[ocount + 3] = 0; - ocount += 4; - } else if (inc == '~') { /* end of data */ - L_INFO(" %d extra bytes output\n", procName, bytecount - 1); - switch (bytecount) { - case 0: /* normal eof */ - case 1: /* error */ - break; - case 2: /* 1 extra byte */ - oword = oword * power85[3] + 0xffffff; - outa[ocount] = (oword >> 24) & 0xff; - break; - case 3: /* 2 extra bytes */ - oword = oword * power85[2] + 0xffff; - outa[ocount] = (oword >> 24) & 0xff; - outa[ocount + 1] = (oword >> 16) & 0xff; - break; - case 4: /* 3 extra bytes */ - oword = oword * 85 + 0xff; - outa[ocount] = (oword >> 24) & 0xff; - outa[ocount + 1] = (oword >> 16) & 0xff; - outa[ocount + 2] = (oword >> 8) & 0xff; - break; - } - if (bytecount > 1) - ocount += (bytecount - 1); - break; - } - } - *poutsize = ocount; - - return outa; -} - - -/*-------------------------------------------------------------* - * String reformatting for base 64 encoded data * - *-------------------------------------------------------------*/ -/*! - * \brief reformatPacked64() - * - * \param[in] inarray base64 encoded string with newlines - * \param[in] insize number of bytes in input array - * \param[in] leadspace number of spaces in each line before the data - * \param[in] linechars number of bytes of data in each line; multiple of 4 - * \param[in] addquotes 1 to add quotes to each line of data; 0 to skip - * \param[out] poutsize number of bytes in output char array - * \return outarray ascii - * - *
- * Notes:
- *      (1) Each line in the output array has %leadspace space characters,
- *          followed optionally by a double-quote, followed by %linechars
- *          bytes of base64 data, followed optionally by a double-quote,
- *          followed by a newline.
- *      (2) This can be used to convert a base64 encoded string to a
- *          string formatted for inclusion in a C source file.
- * 
- */ -char * -reformatPacked64(const char *inarray, - l_int32 insize, - l_int32 leadspace, - l_int32 linechars, - l_int32 addquotes, - l_int32 *poutsize) -{ -char *flata, *outa; -l_int32 i, j, flatindex, flatsize, outindex, nlines, linewithpad, linecount; - - PROCNAME("reformatPacked64"); - - if (!poutsize) - return (char *)ERROR_PTR("&outsize not defined", procName, NULL); - *poutsize = 0; - if (!inarray) - return (char *)ERROR_PTR("inarray not defined", procName, NULL); - if (insize <= 0) - return (char *)ERROR_PTR("insize not > 0", procName, NULL); - if (leadspace < 0) - return (char *)ERROR_PTR("leadspace must be >= 0", procName, NULL); - if (linechars % 4) - return (char *)ERROR_PTR("linechars % 4 must be 0", procName, NULL); - - /* Remove all white space */ - if ((flata = (char *)LEPT_CALLOC(insize, sizeof(char))) == NULL) - return (char *)ERROR_PTR("flata not made", procName, NULL); - for (i = 0, flatindex = 0; i < insize; i++) { - if (isBase64(inarray[i]) || inarray[i] == '=') - flata[flatindex++] = inarray[i]; - } - - /* Generate output string */ - flatsize = flatindex; - nlines = (flatsize + linechars - 1) / linechars; - linewithpad = leadspace + linechars + 1; /* including newline */ - if (addquotes) linewithpad += 2; - if ((outa = (char *)LEPT_CALLOC((size_t)nlines * linewithpad, - sizeof(char))) == NULL) { - LEPT_FREE(flata); - return (char *)ERROR_PTR("outa not made", procName, NULL); - } - for (j = 0, outindex = 0; j < leadspace; j++) - outa[outindex++] = ' '; - if (addquotes) outa[outindex++] = '"'; - for (i = 0, linecount = 0; i < flatsize; i++) { - if (linecount == linechars) { - if (addquotes) outa[outindex++] = '"'; - outa[outindex++] = '\n'; - for (j = 0; j < leadspace; j++) - outa[outindex++] = ' '; - if (addquotes) outa[outindex++] = '"'; - linecount = 0; - } - outa[outindex++] = flata[i]; - linecount++; - } - if (addquotes) outa[outindex++] = '"'; - *poutsize = outindex; - - LEPT_FREE(flata); - return outa; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/endianness.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/endianness.h deleted file mode 100644 index 8cdc060d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/endianness.h +++ /dev/null @@ -1,11 +0,0 @@ -#if !defined (L_BIG_ENDIAN) && !defined (L_LITTLE_ENDIAN) -# if defined (__APPLE_CC__) -# ifdef __BIG_ENDIAN__ -# define L_BIG_ENDIAN -# else -# define L_LITTLE_ENDIAN -# endif -# else -# define L_LITTLE_ENDIAN -# endif -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/enhance.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/enhance.c deleted file mode 100644 index 4033800a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/enhance.c +++ /dev/null @@ -1,2356 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file enhance.c - *
- *
- *      Gamma TRC (tone reproduction curve) mapping
- *           PIX     *pixGammaTRC()
- *           PIX     *pixGammaTRCMasked()
- *           PIX     *pixGammaTRCWithAlpha()
- *           NUMA    *numaGammaTRC()
- *
- *      Contrast enhancement
- *           PIX     *pixContrastTRC()
- *           PIX     *pixContrastTRCMasked()
- *           NUMA    *numaContrastTRC()
- *
- *      Histogram equalization
- *           PIX     *pixEqualizeTRC()
- *           NUMA    *numaEqualizeTRC()
- *
- *      Generic TRC mapper
- *           PIX     *pixTRCMap()
- *           PIX     *pixTRCMapGeneral()
- *
- *      Unsharp-masking
- *           PIX     *pixUnsharpMasking()
- *           PIX     *pixUnsharpMaskingGray()
- *           PIX     *pixUnsharpMaskingFast()
- *           PIX     *pixUnsharpMaskingGrayFast()
- *           PIX     *pixUnsharpMaskingGray1D()
- *           PIX     *pixUnsharpMaskingGray2D()
- *
- *      Hue and saturation modification
- *           PIX     *pixModifyHue()
- *           PIX     *pixModifySaturation()
- *           l_int32  pixMeasureSaturation()
- *           PIX     *pixModifyBrightness()
- *
- *      Color shifting
- *           PIX     *pixMosaicColorShiftRGB()
- *           PIX     *pixColorShiftRGB()
- *
- *      Darken gray (unsaturated) pixels
- *           PIX     *pixDarkenGray()
- *
- *      General multiplicative constant color transform
- *           PIX     *pixMultConstantColor()
- *           PIX     *pixMultMatrixColor()
- *
- *      Edge by bandpass
- *           PIX     *pixHalfEdgeByBandpass()
- *
- *      Gamma correction, contrast enhancement and histogram equalization
- *      apply a simple mapping function to each pixel (or, for color
- *      images, to each sample (i.e., r,g,b) of the pixel).
- *
- *       ~ Gamma correction either lightens the image or darkens
- *         it, depending on whether the gamma factor is greater
- *         or less than 1.0, respectively.
- *
- *       ~ Contrast enhancement darkens the pixels that are already
- *         darker than the middle of the dynamic range (128)
- *         and lightens pixels that are lighter than 128.
- *
- *       ~ Histogram equalization remaps to have the same number
- *         of image pixels at each of 256 intensity values.  This is
- *         a quick and dirty method of adjusting contrast and brightness
- *         to bring out details in both light and dark regions.
- *
- *      Unsharp masking is a more complicated enhancement.
- *      A "high frequency" image, generated by subtracting
- *      the smoothed ("low frequency") part of the image from
- *      itself, has all the energy at the edges.  This "edge image"
- *      has 0 average value.  A fraction of the edge image is
- *      then added to the original, enhancing the differences
- *      between pixel values at edges.  Because we represent
- *      images as l_uint8 arrays, we preserve dynamic range and
- *      handle negative values by doing all the arithmetic on
- *      shifted l_uint16 arrays; the l_uint8 values are recovered
- *      at the end.
- *
- *      Hue and saturation modification work in HSV space.  Because
- *      this is too large for efficient table lookup, each pixel value
- *      is transformed to HSV, modified, and transformed back.
- *      It's not the fastest way to do this, but the method is
- *      easily understood.
- *
- *      Unsharp masking is never in-place, and returns a clone if no
- *      operation is to be performed.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Scales contrast enhancement factor to have a useful range - * between 0.0 and 1.0 */ -static const l_float32 EnhanceScaleFactor = 5.0; - -/*-------------------------------------------------------------* - * Gamma TRC (tone reproduction curve) mapping * - *-------------------------------------------------------------*/ -/*! - * \brief pixGammaTRC() - * - * \param[in] pixd [optional] null or equal to pixs - * \param[in] pixs 8 or 32 bpp; or 2, 4 or 8 bpp with colormap - * \param[in] gamma gamma correction; must be > 0.0 - * \param[in] minval input value that gives 0 for output; can be < 0 - * \param[in] maxval input value that gives 255 for output; can be > 255 - * \return pixd always - * - *
- * Notes:
- *      (1) pixd must either be null or equal to pixs.
- *          For in-place operation, set pixd == pixs:
- *             pixGammaTRC(pixs, pixs, ...);
- *          To get a new image, set pixd == null:
- *             pixd = pixGammaTRC(NULL, pixs, ...);
- *      (2) If pixs is colormapped, the colormap is transformed,
- *          either in-place or in a copy of pixs.
- *      (3) We use a gamma mapping between minval and maxval.
- *      (4) If gamma < 1.0, the image will appear darker;
- *          if gamma > 1.0, the image will appear lighter;
- *      (5) If gamma = 1.0 and minval = 0 and maxval = 255, no
- *          enhancement is performed; return a copy unless in-place,
- *          in which case this is a no-op.
- *      (6) For color images that are not colormapped, the mapping
- *          is applied to each component.
- *      (7) minval and maxval are not restricted to the interval [0, 255].
- *          If minval < 0, an input value of 0 is mapped to a
- *          nonzero output.  This will turn black to gray.
- *          If maxval > 255, an input value of 255 is mapped to
- *          an output value less than 255.  This will turn
- *          white (e.g., in the background) to gray.
- *      (8) Increasing minval darkens the image.
- *      (9) Decreasing maxval bleaches the image.
- *      (10) Simultaneously increasing minval and decreasing maxval
- *           will darken the image and make the colors more intense;
- *           e.g., minval = 50, maxval = 200.
- *      (11) See numaGammaTRC() for further examples of use.
- *      (12) Use pixTRCMapGeneral() if applying different mappings
- *           to each channel in an RGB image.
- * 
- */ -PIX * -pixGammaTRC(PIX *pixd, - PIX *pixs, - l_float32 gamma, - l_int32 minval, - l_int32 maxval) -{ -l_int32 d; -NUMA *nag; -PIXCMAP *cmap; - - PROCNAME("pixGammaTRC"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixd && (pixd != pixs)) - return (PIX *)ERROR_PTR("pixd not null or pixs", procName, pixd); - if (gamma <= 0.0) { - L_WARNING("gamma must be > 0.0; setting to 1.0\n", procName); - gamma = 1.0; - } - if (minval >= maxval) - return (PIX *)ERROR_PTR("minval not < maxval", procName, pixd); - cmap = pixGetColormap(pixs); - d = pixGetDepth(pixs); - if (!cmap && d != 8 && d != 32) - return (PIX *)ERROR_PTR("depth not 8 or 32 bpp", procName, pixd); - - if (gamma == 1.0 && minval == 0 && maxval == 255) /* no-op */ - return pixCopy(pixd, pixs); - - if (!pixd) /* start with a copy if not in-place */ - pixd = pixCopy(NULL, pixs); - - if (cmap) { - pixcmapGammaTRC(pixGetColormap(pixd), gamma, minval, maxval); - return pixd; - } - - /* pixd is 8 or 32 bpp */ - if ((nag = numaGammaTRC(gamma, minval, maxval)) == NULL) - return (PIX *)ERROR_PTR("nag not made", procName, pixd); - pixTRCMap(pixd, NULL, nag); - numaDestroy(&nag); - - return pixd; -} - - -/*! - * \brief pixGammaTRCMasked() - * - * \param[in] pixd [optional] null or equal to pixs - * \param[in] pixs 8 or 32 bpp; not colormapped - * \param[in] pixm [optional] null or 1 bpp - * \param[in] gamma gamma correction; must be > 0.0 - * \param[in] minval input value that gives 0 for output; can be < 0 - * \param[in] maxval input value that gives 255 for output; can be > 255 - * \return pixd always - * - *
- * Notes:
- *      (1) Same as pixGammaTRC() except mapping is optionally over
- *          a subset of pixels described by pixm.
- *      (2) Masking does not work for colormapped images.
- *      (3) See pixGammaTRC() for details on how to use the parameters.
- * 
- */ -PIX * -pixGammaTRCMasked(PIX *pixd, - PIX *pixs, - PIX *pixm, - l_float32 gamma, - l_int32 minval, - l_int32 maxval) -{ -l_int32 d; -NUMA *nag; - - PROCNAME("pixGammaTRCMasked"); - - if (!pixm) - return pixGammaTRC(pixd, pixs, gamma, minval, maxval); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("invalid: pixs has a colormap", procName, pixd); - if (pixd && (pixd != pixs)) - return (PIX *)ERROR_PTR("pixd not null or pixs", procName, pixd); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return (PIX *)ERROR_PTR("depth not 8 or 32 bpp", procName, pixd); - if (minval >= maxval) - return (PIX *)ERROR_PTR("minval not < maxval", procName, pixd); - if (gamma <= 0.0) { - L_WARNING("gamma must be > 0.0; setting to 1.0\n", procName); - gamma = 1.0; - } - - if (gamma == 1.0 && minval == 0 && maxval == 255) - return pixCopy(pixd, pixs); - - if (!pixd) /* start with a copy if not in-place */ - pixd = pixCopy(NULL, pixs); - - if ((nag = numaGammaTRC(gamma, minval, maxval)) == NULL) - return (PIX *)ERROR_PTR("nag not made", procName, pixd); - pixTRCMap(pixd, pixm, nag); - numaDestroy(&nag); - - return pixd; -} - - -/*! - * \brief pixGammaTRCWithAlpha() - * - * \param[in] pixd [optional] null or equal to pixs - * \param[in] pixs 32 bpp - * \param[in] gamma gamma correction; must be > 0.0 - * \param[in] minval input value that gives 0 for output; can be < 0 - * \param[in] maxval input value that gives 255 for output; can be > 255 - * \return pixd always - * - *
- * Notes:
- *      (1) See usage notes in pixGammaTRC().
- *      (2) This version saves the alpha channel.  It is only valid
- *          for 32 bpp (no colormap), and is a bit slower.
- * 
- */ -PIX * -pixGammaTRCWithAlpha(PIX *pixd, - PIX *pixs, - l_float32 gamma, - l_int32 minval, - l_int32 maxval) -{ -NUMA *nag; -PIX *pixalpha; - - PROCNAME("pixGammaTRCWithAlpha"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, pixd); - if (pixd && (pixd != pixs)) - return (PIX *)ERROR_PTR("pixd not null or pixs", procName, pixd); - if (gamma <= 0.0) { - L_WARNING("gamma must be > 0.0; setting to 1.0\n", procName); - gamma = 1.0; - } - if (minval >= maxval) - return (PIX *)ERROR_PTR("minval not < maxval", procName, pixd); - - if (gamma == 1.0 && minval == 0 && maxval == 255) - return pixCopy(pixd, pixs); - if (!pixd) /* start with a copy if not in-place */ - pixd = pixCopy(NULL, pixs); - - pixalpha = pixGetRGBComponent(pixs, L_ALPHA_CHANNEL); /* save */ - if ((nag = numaGammaTRC(gamma, minval, maxval)) == NULL) - return (PIX *)ERROR_PTR("nag not made", procName, pixd); - pixTRCMap(pixd, NULL, nag); - pixSetRGBComponent(pixd, pixalpha, L_ALPHA_CHANNEL); /* restore */ - pixSetSpp(pixd, 4); - - numaDestroy(&nag); - pixDestroy(&pixalpha); - return pixd; -} - - -/*! - * \brief numaGammaTRC() - * - * \param[in] gamma gamma factor; must be > 0.0 - * \param[in] minval input value that gives 0 for output - * \param[in] maxval input value that gives 255 for output - * \return na, or NULL on error - * - *
- * Notes:
- *      (1) The map is returned as a numa; values are clipped to [0, 255].
- *      (2) To force all intensities into a range within fraction delta
- *          of white, use: minval = -256 * (1 - delta) / delta
- *                         maxval = 255
- *      (3) To force all intensities into a range within fraction delta
- *          of black, use: minval = 0
- *                         maxval = 256 * (1 - delta) / delta
- * 
- */ -NUMA * -numaGammaTRC(l_float32 gamma, - l_int32 minval, - l_int32 maxval) -{ -l_int32 i, val; -l_float32 x, invgamma; -NUMA *na; - - PROCNAME("numaGammaTRC"); - - if (minval >= maxval) - return (NUMA *)ERROR_PTR("minval not < maxval", procName, NULL); - if (gamma <= 0.0) { - L_WARNING("gamma must be > 0.0; setting to 1.0\n", procName); - gamma = 1.0; - } - - invgamma = 1. / gamma; - na = numaCreate(256); - for (i = 0; i < minval; i++) - numaAddNumber(na, 0); - for (i = minval; i <= maxval; i++) { - if (i < 0) continue; - if (i > 255) continue; - x = (l_float32)(i - minval) / (l_float32)(maxval - minval); - val = (l_int32)(255. * powf(x, invgamma) + 0.5); - val = L_MAX(val, 0); - val = L_MIN(val, 255); - numaAddNumber(na, val); - } - for (i = maxval + 1; i < 256; i++) - numaAddNumber(na, 255); - - return na; -} - - -/*-------------------------------------------------------------* - * Contrast enhancement * - *-------------------------------------------------------------*/ -/*! - * \brief pixContrastTRC() - * - * \param[in] pixd [optional] null or equal to pixs - * \param[in] pixs 8 or 32 bpp; or 2, 4 or 8 bpp with colormap - * \param[in] factor 0.0 is no enhancement - * \return pixd always - * - *
- * Notes:
- *      (1) pixd must either be null or equal to pixs.
- *          For in-place operation, set pixd == pixs:
- *             pixContrastTRC(pixs, pixs, ...);
- *          To get a new image, set pixd == null:
- *             pixd = pixContrastTRC(NULL, pixs, ...);
- *      (2) If pixs is colormapped, the colormap is transformed,
- *          either in-place or in a copy of pixs.
- *      (3) Contrast is enhanced by mapping each color component
- *          using an atan function with maximum slope at 127.
- *          Pixels below 127 are lowered in intensity and pixels
- *          above 127 are increased.
- *      (4) The useful range for the contrast factor is scaled to
- *          be in (0.0 to 1.0), but larger values can also be used.
- *      (5) If factor == 0.0, no enhancement is performed; return a copy
- *          unless in-place, in which case this is a no-op.
- *      (6) For color images that are not colormapped, the mapping
- *          is applied to each component.
- * 
- */ -PIX * -pixContrastTRC(PIX *pixd, - PIX *pixs, - l_float32 factor) -{ -l_int32 d; -NUMA *nac; -PIXCMAP *cmap; - - PROCNAME("pixContrastTRC"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixd && (pixd != pixs)) - return (PIX *)ERROR_PTR("pixd not null or pixs", procName, pixd); - if (factor < 0.0) { - L_WARNING("factor must be >= 0.0; using 0.0\n", procName); - factor = 0.0; - } - if (factor == 0.0) - return pixCopy(pixd, pixs); - - cmap = pixGetColormap(pixs); - d = pixGetDepth(pixs); - if (!cmap && d != 8 && d != 32) - return (PIX *)ERROR_PTR("depth not 8 or 32 bpp", procName, pixd); - - if (!pixd) /* start with a copy if not in-place */ - pixd = pixCopy(NULL, pixs); - - if (cmap) { - pixcmapContrastTRC(pixGetColormap(pixd), factor); - return pixd; - } - - /* pixd is 8 or 32 bpp */ - if ((nac = numaContrastTRC(factor)) == NULL) - return (PIX *)ERROR_PTR("nac not made", procName, pixd); - pixTRCMap(pixd, NULL, nac); - numaDestroy(&nac); - - return pixd; -} - - -/*! - * \brief pixContrastTRCMasked() - * - * \param[in] pixd [optional] null or equal to pixs - * \param[in] pixs 8 or 32 bpp; or 2, 4 or 8 bpp with colormap - * \param[in] pixm [optional] null or 1 bpp - * \param[in] factor 0.0 is no enhancement - * \return pixd always - * - *
- * Notes:
- *      (1) Same as pixContrastTRC() except mapping is optionally over
- *          a subset of pixels described by pixm.
- *      (2) Masking does not work for colormapped images.
- *      (3) See pixContrastTRC() for details on how to use the parameters.
- * 
- */ -PIX * -pixContrastTRCMasked(PIX *pixd, - PIX *pixs, - PIX *pixm, - l_float32 factor) -{ -l_int32 d; -NUMA *nac; - - PROCNAME("pixContrastTRCMasked"); - - if (!pixm) - return pixContrastTRC(pixd, pixs, factor); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("invalid: pixs has a colormap", procName, pixd); - if (pixd && (pixd != pixs)) - return (PIX *)ERROR_PTR("pixd not null or pixs", procName, pixd); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return (PIX *)ERROR_PTR("depth not 8 or 32 bpp", procName, pixd); - - if (factor < 0.0) { - L_WARNING("factor must be >= 0.0; using 0.0\n", procName); - factor = 0.0; - } - if (factor == 0.0) - return pixCopy(pixd, pixs); - - if (!pixd) /* start with a copy if not in-place */ - pixd = pixCopy(NULL, pixs); - - if ((nac = numaContrastTRC(factor)) == NULL) - return (PIX *)ERROR_PTR("nac not made", procName, pixd); - pixTRCMap(pixd, pixm, nac); - numaDestroy(&nac); - - return pixd; -} - - -/*! - * \brief numaContrastTRC() - * - * \param[in] factor generally between 0.0 [no enhancement] - * and 1.0, but can be larger than 1.0 - * \return na, or NULL on error - * - *
- * Notes:
- *      (1) The mapping is monotonic increasing, where 0 is mapped
- *          to 0 and 255 is mapped to 255.
- *      (2) As 'factor' is increased from 0.0 (where the mapping is linear),
- *          the map gets closer to its limit as a step function that
- *          jumps from 0 to 255 at the center (input value = 127).
- * 
- */ -NUMA * -numaContrastTRC(l_float32 factor) -{ -l_int32 i, val; -l_float64 x, ymax, ymin, dely, scale; -NUMA *na; - - PROCNAME("numaContrastTRC"); - - if (factor < 0.0) { - L_WARNING("factor must be >= 0.0; using 0.0; no enhancement\n", - procName); - factor = 0.0; - } - if (factor == 0.0) - return numaMakeSequence(0, 1, 256); /* linear map */ - - scale = EnhanceScaleFactor; - ymax = atan((l_float64)(1.0 * factor * scale)); - ymin = atan((l_float64)(-127. * factor * scale / 128.)); - dely = ymax - ymin; - na = numaCreate(256); - for (i = 0; i < 256; i++) { - x = (l_float64)i; - val = (l_int32)((255. / dely) * - (-ymin + atan((l_float64)(factor * scale * (x - 127.) / 128.))) + - 0.5); - numaAddNumber(na, val); - } - - return na; -} - - -/*-------------------------------------------------------------* - * Histogram equalization * - *-------------------------------------------------------------*/ -/*! - * \brief pixEqualizeTRC() - * - * \param[in] pixd [optional] null or equal to pixs - * \param[in] pixs 8 bpp gray, 32 bpp rgb, or colormapped - * \param[in] fract fraction of equalization movement of pixel values - * \param[in] factor subsampling factor; integer >= 1 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) pixd must either be null or equal to pixs.
- *          For in-place operation, set pixd == pixs:
- *             pixEqualizeTRC(pixs, pixs, ...);
- *          To get a new image, set pixd == null:
- *             pixd = pixEqualizeTRC(NULL, pixs, ...);
- *      (2) In histogram equalization, a tone reproduction curve
- *          mapping is used to make the number of pixels at each
- *          intensity equal.
- *      (3) If fract == 0.0, no equalization is performed; return a copy
- *          unless in-place, in which case this is a no-op.
- *          If fract == 1.0, equalization is complete.
- *      (4) Set the subsampling factor > 1 to reduce the amount of computation.
- *      (5) If pixs is colormapped, the colormap is removed and
- *          converted to rgb or grayscale.
- *      (6) If pixs has color, equalization is done in each channel
- *          separately.
- *      (7) Note that even if there is a colormap, we can get an
- *          in-place operation because the intermediate image pixt
- *          is copied back to pixs (which for in-place is the same
- *          as pixd).
- * 
- */ -PIX * -pixEqualizeTRC(PIX *pixd, - PIX *pixs, - l_float32 fract, - l_int32 factor) -{ -l_int32 d; -NUMA *na; -PIX *pixt, *pix8; -PIXCMAP *cmap; - - PROCNAME("pixEqualizeTRC"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixd && (pixd != pixs)) - return (PIX *)ERROR_PTR("pixd not null or pixs", procName, pixd); - cmap = pixGetColormap(pixs); - d = pixGetDepth(pixs); - if (d != 8 && d != 32 && !cmap) - return (PIX *)ERROR_PTR("pixs not 8/32 bpp or cmapped", procName, NULL); - if (fract < 0.0 || fract > 1.0) - return (PIX *)ERROR_PTR("fract not in [0.0 ... 1.0]", procName, NULL); - if (factor < 1) - return (PIX *)ERROR_PTR("sampling factor < 1", procName, NULL); - - if (fract == 0.0) - return pixCopy(pixd, pixs); - - /* If there is a colormap, remove it. */ - if (cmap) - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - else - pixt = pixClone(pixs); - - /* Make a copy if necessary */ - pixd = pixCopy(pixd, pixt); - pixDestroy(&pixt); - - d = pixGetDepth(pixd); - if (d == 8) { - na = numaEqualizeTRC(pixd, fract, factor); - pixTRCMap(pixd, NULL, na); - numaDestroy(&na); - } else { /* 32 bpp */ - pix8 = pixGetRGBComponent(pixd, COLOR_RED); - na = numaEqualizeTRC(pix8, fract, factor); - pixTRCMap(pix8, NULL, na); - pixSetRGBComponent(pixd, pix8, COLOR_RED); - numaDestroy(&na); - pixDestroy(&pix8); - pix8 = pixGetRGBComponent(pixd, COLOR_GREEN); - na = numaEqualizeTRC(pix8, fract, factor); - pixTRCMap(pix8, NULL, na); - pixSetRGBComponent(pixd, pix8, COLOR_GREEN); - numaDestroy(&na); - pixDestroy(&pix8); - pix8 = pixGetRGBComponent(pixd, COLOR_BLUE); - na = numaEqualizeTRC(pix8, fract, factor); - pixTRCMap(pix8, NULL, na); - pixSetRGBComponent(pixd, pix8, COLOR_BLUE); - numaDestroy(&na); - pixDestroy(&pix8); - } - - return pixd; -} - - -/*! - * \brief numaEqualizeTRC() - * - * \param[in] pix 8 bpp, no colormap - * \param[in] fract fraction of equalization movement of pixel values - * \param[in] factor subsampling factor; integer >= 1 - * \return nad, or NULL on error - * - *
- * Notes:
- *      (1) If fract == 0.0, no equalization will be performed.
- *          If fract == 1.0, equalization is complete.
- *      (2) Set the subsampling factor > 1 to reduce the amount of computation.
- *      (3) The map is returned as a numa with 256 values, specifying
- *          the equalized value (array value) for every input value
- *          (the array index).
- * 
- */ -NUMA * -numaEqualizeTRC(PIX *pix, - l_float32 fract, - l_int32 factor) -{ -l_int32 iin, iout, itarg; -l_float32 val, sum; -NUMA *nah, *nasum, *nad; - - PROCNAME("numaEqualizeTRC"); - - if (!pix) - return (NUMA *)ERROR_PTR("pix not defined", procName, NULL); - if (pixGetDepth(pix) != 8) - return (NUMA *)ERROR_PTR("pix not 8 bpp", procName, NULL); - if (fract < 0.0 || fract > 1.0) - return (NUMA *)ERROR_PTR("fract not in [0.0 ... 1.0]", procName, NULL); - if (factor < 1) - return (NUMA *)ERROR_PTR("sampling factor < 1", procName, NULL); - - if (fract == 0.0) - L_WARNING("fract = 0.0; no equalization requested\n", procName); - - if ((nah = pixGetGrayHistogram(pix, factor)) == NULL) - return (NUMA *)ERROR_PTR("histogram not made", procName, NULL); - numaGetSum(nah, &sum); - nasum = numaGetPartialSums(nah); - - nad = numaCreate(256); - for (iin = 0; iin < 256; iin++) { - numaGetFValue(nasum, iin, &val); - itarg = (l_int32)(255. * val / sum + 0.5); - iout = iin + (l_int32)(fract * (itarg - iin)); - iout = L_MIN(iout, 255); /* to be safe */ - numaAddNumber(nad, iout); - } - - numaDestroy(&nah); - numaDestroy(&nasum); - return nad; -} - - -/*-------------------------------------------------------------* - * Generic TRC mapping * - *-------------------------------------------------------------*/ -/*! - * \brief pixTRCMap() - * - * \param[in] pixs 8 grayscale or 32 bpp rgb; not colormapped - * \param[in] pixm [optional] 1 bpp mask - * \param[in] na mapping array - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This operation is in-place on pixs.
- *      (2) For 32 bpp, this applies the same map to each of the r,g,b
- *          components.
- *      (3) The mapping array is of size 256, and it maps the input
- *          index into values in the range [0, 255].
- *      (4) If defined, the optional 1 bpp mask pixm has its origin
- *          aligned with pixs, and the map function is applied only
- *          to pixels in pixs under the fg of pixm.
- *      (5) For 32 bpp, this does not save the alpha channel.
- * 
- */ -l_int32 -pixTRCMap(PIX *pixs, - PIX *pixm, - NUMA *na) -{ -l_int32 w, h, d, wm, hm, wpl, wplm, i, j, sval8, dval8; -l_uint32 sval32, dval32; -l_uint32 *data, *datam, *line, *linem, *tab; - - PROCNAME("pixTRCMap"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetColormap(pixs)) - return ERROR_INT("pixs is colormapped", procName, 1); - if (!na) - return ERROR_INT("na not defined", procName, 1); - if (numaGetCount(na) != 256) - return ERROR_INT("na not of size 256", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 && d != 32) - return ERROR_INT("pixs not 8 or 32 bpp", procName, 1); - if (pixm) { - if (pixGetDepth(pixm) != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - } - - tab = (l_uint32 *)numaGetIArray(na); /* get the array for efficiency */ - wpl = pixGetWpl(pixs); - data = pixGetData(pixs); - if (!pixm) { - if (d == 8) { - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - sval8 = GET_DATA_BYTE(line, j); - dval8 = tab[sval8]; - SET_DATA_BYTE(line, j, dval8); - } - } - } else { /* d == 32 */ - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - sval32 = *(line + j); - dval32 = - tab[(sval32 >> L_RED_SHIFT) & 0xff] << L_RED_SHIFT | - tab[(sval32 >> L_GREEN_SHIFT) & 0xff] << L_GREEN_SHIFT | - tab[(sval32 >> L_BLUE_SHIFT) & 0xff] << L_BLUE_SHIFT; - *(line + j) = dval32; - } - } - } - } else { - datam = pixGetData(pixm); - wplm = pixGetWpl(pixm); - pixGetDimensions(pixm, &wm, &hm, NULL); - if (d == 8) { - for (i = 0; i < h; i++) { - if (i >= hm) - break; - line = data + i * wpl; - linem = datam + i * wplm; - for (j = 0; j < w; j++) { - if (j >= wm) - break; - if (GET_DATA_BIT(linem, j) == 0) - continue; - sval8 = GET_DATA_BYTE(line, j); - dval8 = tab[sval8]; - SET_DATA_BYTE(line, j, dval8); - } - } - } else { /* d == 32 */ - for (i = 0; i < h; i++) { - if (i >= hm) - break; - line = data + i * wpl; - linem = datam + i * wplm; - for (j = 0; j < w; j++) { - if (j >= wm) - break; - if (GET_DATA_BIT(linem, j) == 0) - continue; - sval32 = *(line + j); - dval32 = - tab[(sval32 >> L_RED_SHIFT) & 0xff] << L_RED_SHIFT | - tab[(sval32 >> L_GREEN_SHIFT) & 0xff] << L_GREEN_SHIFT | - tab[(sval32 >> L_BLUE_SHIFT) & 0xff] << L_BLUE_SHIFT; - *(line + j) = dval32; - } - } - } - } - - LEPT_FREE(tab); - return 0; -} - - -/*! - * \brief pixTRCMapGeneral() - * - * \param[in] pixs 32 bpp rgb; not colormapped - * \param[in] pixm [optional] 1 bpp mask - * \param[in] nar, nag, nab mapping arrays - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This operation is in-place on %pixs.
- *      (2) Each of the r,g,b mapping arrays is of size 256. They map the
- *          input value for that color component into values in the
- *          range [0, 255].
- *      (3) In the special case where the r, g and b mapping arrays are
- *          all the same, call pixTRCMap() instead.
- *      (4) If defined, the optional 1 bpp mask %pixm has its origin
- *          aligned with %pixs, and the map function is applied only
- *          to pixels in %pixs under the fg of pixm.
- *      (5) The alpha channel is not saved.
- * 
- */ -l_int32 -pixTRCMapGeneral(PIX *pixs, - PIX *pixm, - NUMA *nar, - NUMA *nag, - NUMA *nab) -{ -l_int32 w, h, wm, hm, wpl, wplm, i, j; -l_uint32 sval32, dval32; -l_uint32 *data, *datam, *line, *linem, *tabr, *tabg, *tabb; - - PROCNAME("pixTRCMapGeneral"); - - if (!pixs || pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not defined or not 32 bpp", procName, 1); - if (pixm && pixGetDepth(pixm) != 1) - return ERROR_INT("pixm defined and not 1 bpp", procName, 1); - if (!nar || !nag || !nab) - return ERROR_INT("na{r,g,b} not all defined", procName, 1); - if (numaGetCount(nar) != 256 || numaGetCount(nag) != 256 || - numaGetCount(nab) != 256) - return ERROR_INT("na{r,g,b} not all of size 256", procName, 1); - - /* Get the arrays for efficiency */ - tabr = (l_uint32 *)numaGetIArray(nar); - tabg = (l_uint32 *)numaGetIArray(nag); - tabb = (l_uint32 *)numaGetIArray(nab); - pixGetDimensions(pixs, &w, &h, NULL); - wpl = pixGetWpl(pixs); - data = pixGetData(pixs); - if (!pixm) { - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - sval32 = *(line + j); - dval32 = - tabr[(sval32 >> L_RED_SHIFT) & 0xff] << L_RED_SHIFT | - tabg[(sval32 >> L_GREEN_SHIFT) & 0xff] << L_GREEN_SHIFT | - tabb[(sval32 >> L_BLUE_SHIFT) & 0xff] << L_BLUE_SHIFT; - *(line + j) = dval32; - } - } - } else { - datam = pixGetData(pixm); - wplm = pixGetWpl(pixm); - pixGetDimensions(pixm, &wm, &hm, NULL); - for (i = 0; i < h; i++) { - if (i >= hm) - break; - line = data + i * wpl; - linem = datam + i * wplm; - for (j = 0; j < w; j++) { - if (j >= wm) - break; - if (GET_DATA_BIT(linem, j) == 0) - continue; - sval32 = *(line + j); - dval32 = - tabr[(sval32 >> L_RED_SHIFT) & 0xff] << L_RED_SHIFT | - tabg[(sval32 >> L_GREEN_SHIFT) & 0xff] << L_GREEN_SHIFT | - tabb[(sval32 >> L_BLUE_SHIFT) & 0xff] << L_BLUE_SHIFT; - *(line + j) = dval32; - } - } - } - - LEPT_FREE(tabr); - LEPT_FREE(tabg); - LEPT_FREE(tabb); - return 0; -} - - - -/*-----------------------------------------------------------------------* - * Unsharp masking * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixUnsharpMasking() - * - * \param[in] pixs all depths except 1 bpp; with or without colormaps - * \param[in] halfwidth "half-width" of smoothing filter - * \param[in] fract fraction of edge added back into image - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) We use symmetric smoothing filters of odd dimension,
- *          typically use sizes of 3, 5, 7, etc.  The %halfwidth parameter
- *          for these is (size - 1)/2; i.e., 1, 2, 3, etc.
- *      (2) The fract parameter is typically taken in the
- *          range:  0.2 < fract < 0.7
- *      (3) Returns a clone if no sharpening is requested.
- * 
- */ -PIX * -pixUnsharpMasking(PIX *pixs, - l_int32 halfwidth, - l_float32 fract) -{ -l_int32 d; -PIX *pixt, *pixd, *pixr, *pixrs, *pixg, *pixgs, *pixb, *pixbs; - - PROCNAME("pixUnsharpMasking"); - - if (!pixs || (pixGetDepth(pixs) == 1)) - return (PIX *)ERROR_PTR("pixs not defined or 1 bpp", procName, NULL); - if (fract <= 0.0 || halfwidth <= 0) { - L_WARNING("no sharpening requested; clone returned\n", procName); - return pixClone(pixs); - } - - if (halfwidth == 1 || halfwidth == 2) - return pixUnsharpMaskingFast(pixs, halfwidth, fract, L_BOTH_DIRECTIONS); - - /* Remove colormap; clone if possible; result is either 8 or 32 bpp */ - if ((pixt = pixConvertTo8Or32(pixs, L_CLONE, 0)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - - /* Sharpen */ - d = pixGetDepth(pixt); - if (d == 8) { - pixd = pixUnsharpMaskingGray(pixt, halfwidth, fract); - } else { /* d == 32 */ - pixr = pixGetRGBComponent(pixs, COLOR_RED); - pixrs = pixUnsharpMaskingGray(pixr, halfwidth, fract); - pixDestroy(&pixr); - pixg = pixGetRGBComponent(pixs, COLOR_GREEN); - pixgs = pixUnsharpMaskingGray(pixg, halfwidth, fract); - pixDestroy(&pixg); - pixb = pixGetRGBComponent(pixs, COLOR_BLUE); - pixbs = pixUnsharpMaskingGray(pixb, halfwidth, fract); - pixDestroy(&pixb); - pixd = pixCreateRGBImage(pixrs, pixgs, pixbs); - pixDestroy(&pixrs); - pixDestroy(&pixgs); - pixDestroy(&pixbs); - if (pixGetSpp(pixs) == 4) - pixScaleAndTransferAlpha(pixd, pixs, 1.0, 1.0); - } - - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixUnsharpMaskingGray() - * - * \param[in] pixs 8 bpp; no colormap - * \param[in] halfwidth "half-width" of smoothing filter - * \param[in] fract fraction of edge added back into image - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) We use symmetric smoothing filters of odd dimension,
- *          typically use sizes of 3, 5, 7, etc.  The %halfwidth parameter
- *          for these is (size - 1)/2; i.e., 1, 2, 3, etc.
- *      (2) The fract parameter is typically taken in the range:
- *          0.2 < fract < 0.7
- *      (3) Returns a clone if no sharpening is requested.
- * 
- */ -PIX * -pixUnsharpMaskingGray(PIX *pixs, - l_int32 halfwidth, - l_float32 fract) -{ -l_int32 w, h, d; -PIX *pixc, *pixd; -PIXACC *pixacc; - - PROCNAME("pixUnsharpMaskingGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 || pixGetColormap(pixs) != NULL) - return (PIX *)ERROR_PTR("pixs not 8 bpp or has cmap", procName, NULL); - if (fract <= 0.0 || halfwidth <= 0) { - L_WARNING("no sharpening requested; clone returned\n", procName); - return pixClone(pixs); - } - if (halfwidth == 1 || halfwidth == 2) - return pixUnsharpMaskingGrayFast(pixs, halfwidth, fract, - L_BOTH_DIRECTIONS); - - if ((pixc = pixBlockconvGray(pixs, NULL, halfwidth, halfwidth)) == NULL) - return (PIX *)ERROR_PTR("pixc not made", procName, NULL); - - /* Steps: - * (1) edge image is pixs - pixc (this is highpass part) - * (2) multiply edge image by fract - * (3) add fraction of edge to pixs - * - * To show how this is done with both interfaces to arithmetic - * on integer Pix, here is the implementation in the lower-level - * function calls: - * pixt = pixInitAccumulate(w, h, 0x10000000)) == NULL) - * pixAccumulate(pixt, pixs, L_ARITH_ADD); - * pixAccumulate(pixt, pixc, L_ARITH_SUBTRACT); - * pixMultConstAccumulate(pixt, fract, 0x10000000); - * pixAccumulate(pixt, pixs, L_ARITH_ADD); - * pixd = pixFinalAccumulate(pixt, 0x10000000, 8)) == NULL) - * pixDestroy(&pixt); - * - * The code below does the same thing using the Pixacc accumulator, - * hiding the details of the offset that is needed for subtraction. - */ - pixacc = pixaccCreate(w, h, 1); - pixaccAdd(pixacc, pixs); - pixaccSubtract(pixacc, pixc); - pixaccMultConst(pixacc, fract); - pixaccAdd(pixacc, pixs); - pixd = pixaccFinal(pixacc, 8); - pixaccDestroy(&pixacc); - - pixDestroy(&pixc); - return pixd; -} - - -/*! - * \brief pixUnsharpMaskingFast() - * - * \param[in] pixs all depths except 1 bpp; with or without colormaps - * \param[in] halfwidth "half-width" of smoothing filter; 1 and 2 only - * \param[in] fract fraction of high frequency added to image - * \param[in] direction L_HORIZ, L_VERT, L_BOTH_DIRECTIONS - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The fast version uses separable 1-D filters directly on
- *          the input image.  The halfwidth is either 1 (full width = 3)
- *          or 2 (full width = 5).
- *      (2) The fract parameter is typically taken in the
- *            range:  0.2 < fract < 0.7
- *      (3) To skip horizontal sharpening, use %fracth = 0.0; ditto for %fractv
- *      (4) For one dimensional filtering (as an example):
- *          For %halfwidth = 1, the low-pass filter is
- *              L:    1/3    1/3   1/3
- *          and the high-pass filter is
- *              H = I - L:   -1/3   2/3   -1/3
- *          For %halfwidth = 2, the low-pass filter is
- *              L:    1/5    1/5   1/5    1/5    1/5
- *          and the high-pass filter is
- *              H = I - L:   -1/5  -1/5   4/5  -1/5   -1/5
- *          The new sharpened pixel value is found by adding some fraction
- *          of the high-pass filter value (which sums to 0) to the
- *          initial pixel value:
- *              N = I + fract * H
- *      (5) For 2D, the sharpening filter is not separable, because the
- *          vertical filter depends on the horizontal location relative
- *          to the filter origin, and v.v.   So we either do the full
- *          2D filter (for %halfwidth == 1) or do the low-pass
- *          convolution separably and then compose with the original pix.
- *      (6) Returns a clone if no sharpening is requested.
- * 
- */ -PIX * -pixUnsharpMaskingFast(PIX *pixs, - l_int32 halfwidth, - l_float32 fract, - l_int32 direction) -{ -l_int32 d; -PIX *pixt, *pixd, *pixr, *pixrs, *pixg, *pixgs, *pixb, *pixbs; - - PROCNAME("pixUnsharpMaskingFast"); - - if (!pixs || (pixGetDepth(pixs) == 1)) - return (PIX *)ERROR_PTR("pixs not defined or 1 bpp", procName, NULL); - if (fract <= 0.0 || halfwidth <= 0) { - L_WARNING("no sharpening requested; clone returned\n", procName); - return pixClone(pixs); - } - if (halfwidth != 1 && halfwidth != 2) - return (PIX *)ERROR_PTR("halfwidth must be 1 or 2", procName, NULL); - if (direction != L_HORIZ && direction != L_VERT && - direction != L_BOTH_DIRECTIONS) - return (PIX *)ERROR_PTR("invalid direction", procName, NULL); - - /* Remove colormap; clone if possible; result is either 8 or 32 bpp */ - if ((pixt = pixConvertTo8Or32(pixs, L_CLONE, 0)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - - /* Sharpen */ - d = pixGetDepth(pixt); - if (d == 8) { - pixd = pixUnsharpMaskingGrayFast(pixt, halfwidth, fract, direction); - } else { /* d == 32 */ - pixr = pixGetRGBComponent(pixs, COLOR_RED); - pixrs = pixUnsharpMaskingGrayFast(pixr, halfwidth, fract, direction); - pixDestroy(&pixr); - pixg = pixGetRGBComponent(pixs, COLOR_GREEN); - pixgs = pixUnsharpMaskingGrayFast(pixg, halfwidth, fract, direction); - pixDestroy(&pixg); - pixb = pixGetRGBComponent(pixs, COLOR_BLUE); - pixbs = pixUnsharpMaskingGrayFast(pixb, halfwidth, fract, direction); - pixDestroy(&pixb); - pixd = pixCreateRGBImage(pixrs, pixgs, pixbs); - if (pixGetSpp(pixs) == 4) - pixScaleAndTransferAlpha(pixd, pixs, 1.0, 1.0); - pixDestroy(&pixrs); - pixDestroy(&pixgs); - pixDestroy(&pixbs); - } - - pixDestroy(&pixt); - return pixd; -} - - - -/*! - * \brief pixUnsharpMaskingGrayFast() - * - * \param[in] pixs 8 bpp; no colormap - * \param[in] halfwidth "half-width" of smoothing filter: 1 or 2 - * \param[in] fract fraction of high frequency added to image - * \param[in] direction L_HORIZ, L_VERT, L_BOTH_DIRECTIONS - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) For usage and explanation of the algorithm, see notes
- *          in pixUnsharpMaskingFast().
- *      (2) Returns a clone if no sharpening is requested.
- * 
- */ -PIX * -pixUnsharpMaskingGrayFast(PIX *pixs, - l_int32 halfwidth, - l_float32 fract, - l_int32 direction) -{ -PIX *pixd; - - PROCNAME("pixUnsharpMaskingGrayFast"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8 || pixGetColormap(pixs) != NULL) - return (PIX *)ERROR_PTR("pixs not 8 bpp or has cmap", procName, NULL); - if (fract <= 0.0 || halfwidth <= 0) { - L_WARNING("no sharpening requested; clone returned\n", procName); - return pixClone(pixs); - } - if (halfwidth != 1 && halfwidth != 2) - return (PIX *)ERROR_PTR("halfwidth must be 1 or 2", procName, NULL); - if (direction != L_HORIZ && direction != L_VERT && - direction != L_BOTH_DIRECTIONS) - return (PIX *)ERROR_PTR("invalid direction", procName, NULL); - - if (direction != L_BOTH_DIRECTIONS) - pixd = pixUnsharpMaskingGray1D(pixs, halfwidth, fract, direction); - else /* 2D sharpening */ - pixd = pixUnsharpMaskingGray2D(pixs, halfwidth, fract); - - return pixd; -} - - -/*! - * \brief pixUnsharpMaskingGray1D() - * - * \param[in] pixs 8 bpp; no colormap - * \param[in] halfwidth "half-width" of smoothing filter: 1 or 2 - * \param[in] fract fraction of high frequency added to image - * \param[in] direction filtering direction; use L_HORIZ or L_VERT - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) For usage and explanation of the algorithm, see notes
- *          in pixUnsharpMaskingFast().
- *      (2) Returns a clone if no sharpening is requested.
- * 
- */ -PIX * -pixUnsharpMaskingGray1D(PIX *pixs, - l_int32 halfwidth, - l_float32 fract, - l_int32 direction) -{ -l_int32 w, h, d, wpls, wpld, i, j, ival; -l_uint32 *datas, *datad; -l_uint32 *lines, *lines0, *lines1, *lines2, *lines3, *lines4, *lined; -l_float32 val, a[5]; -PIX *pixd; - - PROCNAME("pixUnsharpMaskingGray1D"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 || pixGetColormap(pixs) != NULL) - return (PIX *)ERROR_PTR("pixs not 8 bpp or has cmap", procName, NULL); - if (fract <= 0.0 || halfwidth <= 0) { - L_WARNING("no sharpening requested; clone returned\n", procName); - return pixClone(pixs); - } - if (halfwidth != 1 && halfwidth != 2) - return (PIX *)ERROR_PTR("halfwidth must be 1 or 2", procName, NULL); - - /* Initialize pixd with pixels from pixs that will not be - * set when computing the sharpened values. */ - pixd = pixCopyBorder(NULL, pixs, halfwidth, halfwidth, - halfwidth, halfwidth); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - - if (halfwidth == 1) { - a[0] = -fract / 3.0; - a[1] = 1.0 + fract * 2.0 / 3.0; - a[2] = a[0]; - } else { /* halfwidth == 2 */ - a[0] = -fract / 5.0; - a[1] = a[0]; - a[2] = 1.0 + fract * 4.0 / 5.0; - a[3] = a[0]; - a[4] = a[0]; - } - - if (direction == L_HORIZ) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - if (halfwidth == 1) { - for (j = 1; j < w - 1; j++) { - val = a[0] * GET_DATA_BYTE(lines, j - 1) + - a[1] * GET_DATA_BYTE(lines, j) + - a[2] * GET_DATA_BYTE(lines, j + 1); - ival = (l_int32)val; - ival = L_MAX(0, ival); - ival = L_MIN(255, ival); - SET_DATA_BYTE(lined, j, ival); - } - } else { /* halfwidth == 2 */ - for (j = 2; j < w - 2; j++) { - val = a[0] * GET_DATA_BYTE(lines, j - 2) + - a[1] * GET_DATA_BYTE(lines, j - 1) + - a[2] * GET_DATA_BYTE(lines, j) + - a[3] * GET_DATA_BYTE(lines, j + 1) + - a[4] * GET_DATA_BYTE(lines, j + 2); - ival = (l_int32)val; - ival = L_MAX(0, ival); - ival = L_MIN(255, ival); - SET_DATA_BYTE(lined, j, ival); - } - } - } - } else { /* direction == L_VERT */ - if (halfwidth == 1) { - for (i = 1; i < h - 1; i++) { - lines0 = datas + (i - 1) * wpls; - lines1 = datas + i * wpls; - lines2 = datas + (i + 1) * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = a[0] * GET_DATA_BYTE(lines0, j) + - a[1] * GET_DATA_BYTE(lines1, j) + - a[2] * GET_DATA_BYTE(lines2, j); - ival = (l_int32)val; - ival = L_MAX(0, ival); - ival = L_MIN(255, ival); - SET_DATA_BYTE(lined, j, ival); - } - } - } else { /* halfwidth == 2 */ - for (i = 2; i < h - 2; i++) { - lines0 = datas + (i - 2) * wpls; - lines1 = datas + (i - 1) * wpls; - lines2 = datas + i * wpls; - lines3 = datas + (i + 1) * wpls; - lines4 = datas + (i + 2) * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = a[0] * GET_DATA_BYTE(lines0, j) + - a[1] * GET_DATA_BYTE(lines1, j) + - a[2] * GET_DATA_BYTE(lines2, j) + - a[3] * GET_DATA_BYTE(lines3, j) + - a[4] * GET_DATA_BYTE(lines4, j); - ival = (l_int32)val; - ival = L_MAX(0, ival); - ival = L_MIN(255, ival); - SET_DATA_BYTE(lined, j, ival); - } - } - } - } - - return pixd; -} - - -/*! - * \brief pixUnsharpMaskingGray2D() - * - * \param[in] pixs 8 bpp; no colormap - * \param[in] halfwidth "half-width" of smoothing filter: 1 or 2 - * \param[in] fract fraction of high frequency added to image - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This is for %halfwidth == 1, 2.
- *      (2) The lowpass filter is implemented separably.
- *      (3) Returns a clone if no sharpening is requested.
- * 
- */ -PIX * -pixUnsharpMaskingGray2D(PIX *pixs, - l_int32 halfwidth, - l_float32 fract) -{ -l_int32 w, h, d, wpls, wpld, wplf, i, j, ival, sval; -l_uint32 *datas, *datad, *lines, *lined; -l_float32 val, norm; -l_float32 *dataf, *linef, *linef0, *linef1, *linef2, *linef3, *linef4; -PIX *pixd; -FPIX *fpix; - - PROCNAME("pixUnsharpMaskingGray2D"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 || pixGetColormap(pixs) != NULL) - return (PIX *)ERROR_PTR("pixs not 8 bpp or has cmap", procName, NULL); - if (fract <= 0.0 || halfwidth <= 0) { - L_WARNING("no sharpening requested; clone returned\n", procName); - return pixClone(pixs); - } - if (halfwidth != 1 && halfwidth != 2) - return (PIX *)ERROR_PTR("halfwidth must be 1 or 2", procName, NULL); - - if ((pixd = pixCopyBorder(NULL, pixs, halfwidth, halfwidth, - halfwidth, halfwidth)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - - /* Do the low pass separably. Store the result of horizontal - * smoothing in an intermediate fpix. */ - if ((fpix = fpixCreate(w, h)) == NULL) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("fpix not made", procName, NULL); - } - dataf = fpixGetData(fpix); - wplf = fpixGetWpl(fpix); - if (halfwidth == 1) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linef = dataf + i * wplf; - for (j = 1; j < w - 1; j++) { - val = GET_DATA_BYTE(lines, j - 1) + - GET_DATA_BYTE(lines, j) + - GET_DATA_BYTE(lines, j + 1); - linef[j] = val; - } - } - } else { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linef = dataf + i * wplf; - for (j = 2; j < w - 2; j++) { - val = GET_DATA_BYTE(lines, j - 2) + - GET_DATA_BYTE(lines, j - 1) + - GET_DATA_BYTE(lines, j) + - GET_DATA_BYTE(lines, j + 1) + - GET_DATA_BYTE(lines, j + 2); - linef[j] = val; - } - } - } - - /* Do vertical smoothing to finish the low-pass filter. - * At each pixel, if L is the lowpass value, I is the - * src pixel value and f is the fraction of highpass to - * be added to I, then the highpass filter value is - * H = I - L - * and the new sharpened value is - * N = I + f * H. */ - if (halfwidth == 1) { - for (i = 1; i < h - 1; i++) { - linef0 = dataf + (i - 1) * wplf; - linef1 = dataf + i * wplf; - linef2 = dataf + (i + 1) * wplf; - lined = datad + i * wpld; - lines = datas + i * wpls; - norm = 1.0 / 9.0; - for (j = 1; j < w - 1; j++) { - val = norm * (linef0[j] + linef1[j] + - linef2[j]); /* L: lowpass filter value */ - sval = GET_DATA_BYTE(lines, j); /* I: source pixel */ - ival = (l_int32)(sval + fract * (sval - val) + 0.5); - ival = L_MAX(0, ival); - ival = L_MIN(255, ival); - SET_DATA_BYTE(lined, j, ival); - } - } - } else { - for (i = 2; i < h - 2; i++) { - linef0 = dataf + (i - 2) * wplf; - linef1 = dataf + (i - 1) * wplf; - linef2 = dataf + i * wplf; - linef3 = dataf + (i + 1) * wplf; - linef4 = dataf + (i + 2) * wplf; - lined = datad + i * wpld; - lines = datas + i * wpls; - norm = 1.0 / 25.0; - for (j = 2; j < w - 2; j++) { - val = norm * (linef0[j] + linef1[j] + linef2[j] + linef3[j] + - linef4[j]); /* L: lowpass filter value */ - sval = GET_DATA_BYTE(lines, j); /* I: source pixel */ - ival = (l_int32)(sval + fract * (sval - val) + 0.5); - ival = L_MAX(0, ival); - ival = L_MIN(255, ival); - SET_DATA_BYTE(lined, j, ival); - } - } - } - - fpixDestroy(&fpix); - return pixd; -} - - -/*-----------------------------------------------------------------------* - * Hue and saturation modification * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixModifyHue() - * - * \param[in] pixd [optional] can be null or equal to pixs - * \param[in] pixs 32 bpp rgb - * \param[in] fract between -1.0 and 1.0 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) pixd must either be null or equal to pixs.
- *          For in-place operation, set pixd == pixs:
- *             pixEqualizeTRC(pixs, pixs, ...);
- *          To get a new image, set pixd == null:
- *             pixd = pixEqualizeTRC(NULL, pixs, ...);
- *      (1) Use fract > 0.0 to increase hue value; < 0.0 to decrease it.
- *          1.0 (or -1.0) represents a 360 degree rotation; i.e., no change.
- *      (2) If no modification is requested (fract = -1.0 or 0 or 1.0),
- *          return a copy unless in-place, in which case this is a no-op.
- *      (3) See discussion of color-modification methods, in coloring.c.
- * 
- */ -PIX * -pixModifyHue(PIX *pixd, - PIX *pixs, - l_float32 fract) -{ -l_int32 w, h, d, i, j, wpl, delhue; -l_int32 rval, gval, bval, hval, sval, vval; -l_uint32 *data, *line; - - PROCNAME("pixModifyHue"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs) != NULL) - return (PIX *)ERROR_PTR("pixs colormapped", procName, NULL); - if (pixd && (pixd != pixs)) - return (PIX *)ERROR_PTR("pixd not null or pixs", procName, pixd); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (L_ABS(fract) > 1.0) - return (PIX *)ERROR_PTR("fract not in [-1.0 ... 1.0]", procName, NULL); - - pixd = pixCopy(pixd, pixs); - - delhue = (l_int32)(240 * fract); - if (delhue == 0 || delhue == 240 || delhue == -240) { - L_WARNING("no change requested in hue\n", procName); - return pixd; - } - if (delhue < 0) - delhue += 240; - - data = pixGetData(pixd); - wpl = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - extractRGBValues(line[j], &rval, &gval, &bval); - convertRGBToHSV(rval, gval, bval, &hval, &sval, &vval); - hval = (hval + delhue) % 240; - convertHSVToRGB(hval, sval, vval, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, line + j); - } - } - if (pixGetSpp(pixs) == 4) - pixScaleAndTransferAlpha(pixd, pixs, 1.0, 1.0); - - return pixd; -} - - -/*! - * \brief pixModifySaturation() - * - * \param[in] pixd [optional] can be null, existing or equal to pixs - * \param[in] pixs 32 bpp rgb - * \param[in] fract between -1.0 and 1.0 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) If fract > 0.0, it gives the fraction that the pixel
- *          saturation is moved from its initial value toward 255.
- *          If fract < 0.0, it gives the fraction that the pixel
- *          saturation is moved from its initial value toward 0.
- *          The limiting values for fract = -1.0 (1.0) thus set the
- *          saturation to 0 (255).
- *      (2) If fract = 0, no modification is requested; return a copy
- *          unless in-place, in which case this is a no-op.
- *      (3) See discussion of color-modification methods, in coloring.c.
- * 
- */ -PIX * -pixModifySaturation(PIX *pixd, - PIX *pixs, - l_float32 fract) -{ -l_int32 w, h, d, i, j, wpl; -l_int32 rval, gval, bval, hval, sval, vval; -l_uint32 *data, *line; - - PROCNAME("pixModifySaturation"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (L_ABS(fract) > 1.0) - return (PIX *)ERROR_PTR("fract not in [-1.0 ... 1.0]", procName, NULL); - - pixd = pixCopy(pixd, pixs); - if (fract == 0.0) { - L_WARNING("no change requested in saturation\n", procName); - return pixd; - } - - data = pixGetData(pixd); - wpl = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - extractRGBValues(line[j], &rval, &gval, &bval); - convertRGBToHSV(rval, gval, bval, &hval, &sval, &vval); - if (fract < 0.0) - sval = (l_int32)(sval * (1.0 + fract)); - else - sval = (l_int32)(sval + fract * (255 - sval)); - convertHSVToRGB(hval, sval, vval, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, line + j); - } - } - if (pixGetSpp(pixs) == 4) - pixScaleAndTransferAlpha(pixd, pixs, 1.0, 1.0); - - return pixd; -} - - -/*! - * \brief pixMeasureSaturation() - * - * \param[in] pixs 32 bpp rgb - * \param[in] factor subsampling factor; integer >= 1 - * \param[out] psat average saturation - * \return 0 if OK, 1 on error - */ -l_int32 -pixMeasureSaturation(PIX *pixs, - l_int32 factor, - l_float32 *psat) -{ -l_int32 w, h, d, i, j, wpl, sum, count; -l_int32 rval, gval, bval, hval, sval, vval; -l_uint32 *data, *line; - - PROCNAME("pixMeasureSaturation"); - - if (!psat) - return ERROR_INT("pixs not defined", procName, 1); - *psat = 0.0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 32) - return ERROR_INT("pixs not 32 bpp", procName, 1); - if (factor < 1) - return ERROR_INT("subsampling factor < 1", procName, 1); - - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - for (i = 0, sum = 0, count = 0; i < h; i += factor) { - line = data + i * wpl; - for (j = 0; j < w; j += factor) { - extractRGBValues(line[j], &rval, &gval, &bval); - convertRGBToHSV(rval, gval, bval, &hval, &sval, &vval); - sum += sval; - count++; - } - } - - if (count > 0) - *psat = (l_float32)sum / (l_float32)count; - return 0; -} - - -/*! - * \brief pixModifyBrightness() - * - * \param[in] pixd [optional] can be null, existing or equal to pixs - * \param[in] pixs 32 bpp rgb - * \param[in] fract between -1.0 and 1.0 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) If fract > 0.0, it gives the fraction that the v-parameter,
- *          which is max(r,g,b), is moved from its initial value toward 255.
- *          If fract < 0.0, it gives the fraction that the v-parameter
- *          is moved from its initial value toward 0.
- *          The limiting values for fract = -1.0 (1.0) thus set the
- *          v-parameter to 0 (255).
- *      (2) If fract = 0, no modification is requested; return a copy
- *          unless in-place, in which case this is a no-op.
- *      (3) See discussion of color-modification methods, in coloring.c.
- * 
- */ -PIX * -pixModifyBrightness(PIX *pixd, - PIX *pixs, - l_float32 fract) -{ -l_int32 w, h, d, i, j, wpl; -l_int32 rval, gval, bval, hval, sval, vval; -l_uint32 *data, *line; - - PROCNAME("pixModifyBrightness"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (L_ABS(fract) > 1.0) - return (PIX *)ERROR_PTR("fract not in [-1.0 ... 1.0]", procName, NULL); - - pixd = pixCopy(pixd, pixs); - if (fract == 0.0) { - L_WARNING("no change requested in brightness\n", procName); - return pixd; - } - - data = pixGetData(pixd); - wpl = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - extractRGBValues(line[j], &rval, &gval, &bval); - convertRGBToHSV(rval, gval, bval, &hval, &sval, &vval); - if (fract > 0.0) - vval = (l_int32)(vval + fract * (255.0 - vval)); - else - vval = (l_int32)(vval * (1.0 + fract)); - convertHSVToRGB(hval, sval, vval, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, line + j); - } - } - if (pixGetSpp(pixs) == 4) - pixScaleAndTransferAlpha(pixd, pixs, 1.0, 1.0); - - return pixd; -} - - -/*-----------------------------------------------------------------------* - * Color shifting * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixMosaicColorShiftRGB() - * - * \param[in] pixs 32 bpp rgb - * \param[in] roff center offset of red component - * \param[in] goff center offset of green component - * \param[in] boff center offset of blue component - * \param[in] delta increments from center offsets [0.0 - 0.1]; - * use 0.0 to get the default (0.04) - * \param[in] nincr number of increments in each (positive and negative) - * direction; use 0 to get the default (2). - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) This generates a mosaic view of the effect of shifting the RGB
- *          components.  See pixColorShiftRGB() for details on the shifting.
- *      (2) The offsets (%roff, %goff, %boff) set the color center point,
- *          and the deviations from this are shown separately for deltas
- *          in r, g and b.  For each component, we show 2 * %nincr + 1
- *          images.
- *      (3) Usage: color prints differ from the original due to three factors:
- *          illumination, calibration of the camera in acquisition,
- *          and calibration of the printer.  This function can be used
- *          to iteratively match a color print to the original.  On each
- *          iteration, the center offsets are set to the best match so
- *          far, and the %delta increments are typically reduced.
- * 
- */ -PIX * -pixMosaicColorShiftRGB(PIX *pixs, - l_float32 roff, - l_float32 goff, - l_float32 boff, - l_float32 delta, - l_int32 nincr) -{ -char buf[64]; -l_int32 i; -l_float32 del; -L_BMF *bmf; -PIX *pix1, *pix2, *pix3; -PIXA *pixa; - - PROCNAME("pixMosaicColorShiftRGB"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not rgb", procName, NULL); - if (roff < -1.0 || roff > 1.0) - return (PIX *)ERROR_PTR("roff not in [-1.0, 1.0]", procName, NULL); - if (goff < -1.0 || goff > 1.0) - return (PIX *)ERROR_PTR("goff not in [-1.0, 1.0]", procName, NULL); - if (boff < -1.0 || boff > 1.0) - return (PIX *)ERROR_PTR("boff not in [-1.0, 1.0]", procName, NULL); - if (delta < 0.0 || delta > 0.1) - return (PIX *)ERROR_PTR("delta not in [0.0, 0.1]", procName, NULL); - if (delta == 0.0) delta = 0.04; - if (nincr < 0 || nincr > 6) - return (PIX *)ERROR_PTR("nincr not in [0, 6]", procName, NULL); - if (nincr == 0) nincr = 2; - - pixa = pixaCreate(3 * (2 * nincr + 1)); - bmf = bmfCreate(NULL, 8); - pix1 = pixScaleToSize(pixs, 400, 0); - for (i = 0, del = - nincr * delta; i < 2 * nincr + 1; i++, del += delta) { - pix2 = pixColorShiftRGB(pix1, roff + del, goff, boff); - snprintf(buf, sizeof(buf), "%4.2f, %4.2f, %4.2f", - roff + del, goff, boff); - pix3 = pixAddSingleTextblock(pix2, bmf, buf, 0xff000000, - L_ADD_BELOW, 0); - pixaAddPix(pixa, pix3, L_INSERT); - pixDestroy(&pix2); - } - for (i = 0, del = - nincr * delta; i < 2 * nincr + 1; i++, del += delta) { - pix2 = pixColorShiftRGB(pix1, roff, goff + del, boff); - snprintf(buf, sizeof(buf), "%4.2f, %4.2f, %4.2f", - roff, goff + del, boff); - pix3 = pixAddSingleTextblock(pix2, bmf, buf, 0xff000000, - L_ADD_BELOW, 0); - pixaAddPix(pixa, pix3, L_INSERT); - pixDestroy(&pix2); - } - for (i = 0, del = - nincr * delta; i < 2 * nincr + 1; i++, del += delta) { - pix2 = pixColorShiftRGB(pix1, roff, goff, boff + del); - snprintf(buf, sizeof(buf), "%4.2f, %4.2f, %4.2f", - roff, goff, boff + del); - pix3 = pixAddSingleTextblock(pix2, bmf, buf, 0xff000000, - L_ADD_BELOW, 0); - pixaAddPix(pixa, pix3, L_INSERT); - pixDestroy(&pix2); - } - pixDestroy(&pix1); - - pix1 = pixaDisplayTiledAndScaled(pixa, 32, 300, 2 * nincr + 1, 0, 30, 2); - pixaDestroy(&pixa); - bmfDestroy(&bmf); - return pix1; -} - - -/*! - * \brief pixColorShiftRGB() - * - * \param[in] pixs 32 bpp rgb - * \param[in] rfract fractional shift in red component - * \param[in] gfract fractional shift in green component - * \param[in] bfract fractional shift in blue component - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This allows independent fractional shifts of the r,g and b
- *          components.  A positive shift pushes to saturation (255);
- *          a negative shift pushes toward 0 (black).
- *      (2) The effect can be imagined using a color wheel that consists
- *          (for our purposes) of these 6 colors, separated by 60 degrees:
- *             red, magenta, blue, cyan, green, yellow
- *      (3) So, for example, a negative shift of the blue component
- *          (bfract < 0) could be accompanied by positive shifts
- *          of red and green to make an image more yellow.
- *      (4) Examples of limiting cases:
- *            rfract = 1 ==> r = 255
- *            rfract = -1 ==> r = 0
- * 
- */ -PIX * -pixColorShiftRGB(PIX *pixs, - l_float32 rfract, - l_float32 gfract, - l_float32 bfract) -{ -l_int32 w, h, i, j, wpls, wpld, rval, gval, bval; -l_int32 *rlut, *glut, *blut; -l_uint32 *datas, *datad, *lines, *lined; -l_float32 fi; -PIX *pixd; - - PROCNAME("pixColorShiftRGB"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (rfract < -1.0 || rfract > 1.0) - return (PIX *)ERROR_PTR("rfract not in [-1.0, 1.0]", procName, NULL); - if (gfract < -1.0 || gfract > 1.0) - return (PIX *)ERROR_PTR("gfract not in [-1.0, 1.0]", procName, NULL); - if (bfract < -1.0 || bfract > 1.0) - return (PIX *)ERROR_PTR("bfract not in [-1.0, 1.0]", procName, NULL); - if (rfract == 0.0 && gfract == 0.0 && bfract == 0.0) - return pixCopy(NULL, pixs); - - rlut = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - glut = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - blut = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - for (i = 0; i < 256; i++) { - fi = i; - if (rfract >= 0) { - rlut[i] = (l_int32)(fi + (255.0 - fi) * rfract); - } else { - rlut[i] = (l_int32)(fi * (1.0 + rfract)); - } - if (gfract >= 0) { - glut[i] = (l_int32)(fi + (255.0 - fi) * gfract); - } else { - glut[i] = (l_int32)(fi * (1.0 + gfract)); - } - if (bfract >= 0) { - blut[i] = (l_int32)(fi + (255.0 - fi) * bfract); - } else { - blut[i] = (l_int32)(fi * (1.0 + bfract)); - } - } - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreate(w, h, 32); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - composeRGBPixel(rlut[rval], glut[gval], blut[bval], lined + j); - } - } - - LEPT_FREE(rlut); - LEPT_FREE(glut); - LEPT_FREE(blut); - return pixd; -} - -/*-----------------------------------------------------------------------* - * Darken gray (unsaturated) pixels - *-----------------------------------------------------------------------*/ -/*! - * \brief pixDarkenGray() - * - * \param[in] pixd [optional] can be null or equal to pixs - * \param[in] pixs 32 bpp rgb - * \param[in] thresh pixels with max component >= %thresh are unchanged - * \param[in] satlimit pixels with saturation >= %satlimit are unchanged - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This darkens gray pixels, by a fraction (sat/%satlimit), where
- *          the saturation, sat, is the component difference (max - min).
- *          The pixel value is unchanged if sat >= %satlimit.  A typical
- *          value of %satlimit might be 40; the larger the value, the
- *          more that pixels with a smaller saturation will be darkened.
- *      (2) Pixels with max component >= %thresh are unchanged. This can be
- *          used to prevent bright pixels with low saturation from being
- *          darkened.  Setting thresh == 0 is a no-op; setting %thresh == 255
- *          causes the darkening to be applied to all pixels.
- *      (3) This function is useful to enhance pixels relative to a
- *          gray background.
- *      (4) A related function that builds a 1 bpp mask over the gray
- *          pixels is pixMaskOverGrayPixels().
- * 
- */ -PIX * -pixDarkenGray(PIX *pixd, - PIX *pixs, - l_int32 thresh, - l_int32 satlimit) -{ -l_int32 w, h, i, j, wpls, wpld; -l_int32 rval, gval, bval, minrg, min, maxrg, max, sat; -l_uint32 *datas, *datad, *lines, *lined; -l_float32 ratio; - - PROCNAME("pixDarkenGray"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - if (thresh < 0 || thresh > 255) - return (PIX *)ERROR_PTR("invalid thresh", procName, NULL); - if (satlimit < 1) - return (PIX *)ERROR_PTR("invalid satlimit", procName, NULL); - if (pixd && (pixs != pixd)) - return (PIX *)ERROR_PTR("not new or in-place", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if ((pixd = pixCopy(pixd, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - minrg = L_MIN(rval, gval); - min = L_MIN(minrg, bval); - maxrg = L_MAX(rval, gval); - max = L_MAX(maxrg, bval); - sat = max - min; - if (max >= thresh || sat >= satlimit) - continue; - ratio = (l_float32)sat / (l_float32)satlimit; - composeRGBPixel((l_int32)(ratio * rval), (l_int32)(ratio * gval), - (l_int32)(ratio * bval), &lined[j]); - } - } - return pixd; -} - - -/*-----------------------------------------------------------------------* - * General multiplicative constant color transform * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixMultConstantColor() - * - * \param[in] pixs colormapped or rgb - * \param[in] rfact red multiplicative factor - * \param[in] gfact green multiplicative factor - * \param[in] bfact blue multiplicative factor - * \return pixd colormapped or rgb, with colors scaled, or NULL on error - * - *
- * Notes:
- *      (1) rfact, gfact and bfact can only have non-negative values.
- *          They can be greater than 1.0.  All transformed component
- *          values are clipped to the interval [0, 255].
- *      (2) For multiplication with a general 3x3 matrix of constants,
- *          use pixMultMatrixColor().
- * 
- */ -PIX * -pixMultConstantColor(PIX *pixs, - l_float32 rfact, - l_float32 gfact, - l_float32 bfact) -{ -l_int32 i, j, w, h, d, wpls, wpld; -l_int32 ncolors, rval, gval, bval, nrval, ngval, nbval; -l_uint32 nval; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixMultConstantColor"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - cmap = pixGetColormap(pixs); - if (!cmap && d != 32) - return (PIX *)ERROR_PTR("pixs not cmapped or 32 bpp", procName, NULL); - rfact = L_MAX(0.0, rfact); - gfact = L_MAX(0.0, gfact); - bfact = L_MAX(0.0, bfact); - - if (cmap) { - if ((pixd = pixCopy(NULL, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - cmap = pixGetColormap(pixd); - ncolors = pixcmapGetCount(cmap); - for (i = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - nrval = (l_int32)(rfact * rval); - ngval = (l_int32)(gfact * gval); - nbval = (l_int32)(bfact * bval); - nrval = L_MIN(255, nrval); - ngval = L_MIN(255, ngval); - nbval = L_MIN(255, nbval); - pixcmapResetColor(cmap, i, nrval, ngval, nbval); - } - return pixd; - } - - if ((pixd = pixCreateTemplateNoInit(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - nrval = (l_int32)(rfact * rval); - ngval = (l_int32)(gfact * gval); - nbval = (l_int32)(bfact * bval); - nrval = L_MIN(255, nrval); - ngval = L_MIN(255, ngval); - nbval = L_MIN(255, nbval); - composeRGBPixel(nrval, ngval, nbval, &nval); - *(lined + j) = nval; - } - } - - return pixd; -} - - -/*! - * \brief pixMultMatrixColor() - * - * \param[in] pixs colormapped or rgb - * \param[in] kel kernel 3x3 matrix of floats - * \return pixd colormapped or rgb, or NULL on error - * - *
- * Notes:
- *      (1) The kernel is a data structure used mostly for floating point
- *          convolution.  Here it is a 3x3 matrix of floats that are used
- *          to transform the pixel values by matrix multiplication:
- *            nrval = a[0,0] * rval + a[0,1] * gval + a[0,2] * bval
- *            ngval = a[1,0] * rval + a[1,1] * gval + a[1,2] * bval
- *            nbval = a[2,0] * rval + a[2,1] * gval + a[2,2] * bval
- *      (2) The matrix can be generated in several ways.
- *          See kernel.c for details.  Here are two of them:
- *            (a) kel = kernelCreate(3, 3);
- *                kernelSetElement(kel, 0, 0, val00);
- *                kernelSetElement(kel, 0, 1, val01);
- *                ...
- *            (b) from a static string; e.g.,:
- *                const char *kdata = " 0.6  0.3 -0.2 "
- *                                    " 0.1  1.2  0.4 "
- *                                    " -0.4 0.2  0.9 ";
- *                kel = kernelCreateFromString(3, 3, 0, 0, kdata);
- *      (3) For the special case where the matrix is diagonal, it is easier
- *          to use pixMultConstantColor().
- *      (4) Matrix entries can have positive and negative values, and can
- *          be larger than 1.0.  All transformed component values
- *          are clipped to [0, 255].
- * 
- */ -PIX * -pixMultMatrixColor(PIX *pixs, - L_KERNEL *kel) -{ -l_int32 i, j, index, kw, kh, w, h, d, wpls, wpld; -l_int32 ncolors, rval, gval, bval, nrval, ngval, nbval; -l_uint32 nval; -l_uint32 *datas, *datad, *lines, *lined; -l_float32 v[9]; /* use linear array for convenience */ -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixMultMatrixColor"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!kel) - return (PIX *)ERROR_PTR("kel not defined", procName, NULL); - kernelGetParameters(kel, &kw, &kh, NULL, NULL); - if (kw != 3 || kh != 3) - return (PIX *)ERROR_PTR("matrix not 3x3", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - cmap = pixGetColormap(pixs); - if (!cmap && d != 32) - return (PIX *)ERROR_PTR("pixs not cmapped or 32 bpp", procName, NULL); - - for (i = 0, index = 0; i < 3; i++) - for (j = 0; j < 3; j++, index++) - kernelGetElement(kel, i, j, v + index); - - if (cmap) { - if ((pixd = pixCopy(NULL, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - cmap = pixGetColormap(pixd); - ncolors = pixcmapGetCount(cmap); - for (i = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - nrval = (l_int32)(v[0] * rval + v[1] * gval + v[2] * bval); - ngval = (l_int32)(v[3] * rval + v[4] * gval + v[5] * bval); - nbval = (l_int32)(v[6] * rval + v[7] * gval + v[8] * bval); - nrval = L_MAX(0, L_MIN(255, nrval)); - ngval = L_MAX(0, L_MIN(255, ngval)); - nbval = L_MAX(0, L_MIN(255, nbval)); - pixcmapResetColor(cmap, i, nrval, ngval, nbval); - } - return pixd; - } - - if ((pixd = pixCreateTemplateNoInit(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - nrval = (l_int32)(v[0] * rval + v[1] * gval + v[2] * bval); - ngval = (l_int32)(v[3] * rval + v[4] * gval + v[5] * bval); - nbval = (l_int32)(v[6] * rval + v[7] * gval + v[8] * bval); - nrval = L_MAX(0, L_MIN(255, nrval)); - ngval = L_MAX(0, L_MIN(255, ngval)); - nbval = L_MAX(0, L_MIN(255, nbval)); - composeRGBPixel(nrval, ngval, nbval, &nval); - *(lined + j) = nval; - } - } - - return pixd; -} - - -/*-------------------------------------------------------------* - * Half-edge by bandpass * - *-------------------------------------------------------------*/ -/*! - * \brief pixHalfEdgeByBandpass() - * - * \param[in] pixs 8 bpp gray or 32 bpp rgb - * \param[in] sm1h, sm1v "half-widths" of smoothing filter sm1 - * \param[in] sm2h, sm2v "half-widths" of smoothing filter sm2; - * require sm2 != sm1 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) We use symmetric smoothing filters of odd dimension,
- *          typically use 3, 5, 7, etc.  The smoothing parameters
- *          for these are 1, 2, 3, etc.  The filter size is related
- *          to the smoothing parameter by
- *               size = 2 * smoothing + 1
- *      (2) Because we take the difference of two lowpass filters,
- *          this is actually a bandpass filter.
- *      (3) We allow both filters to be anisotropic.
- *      (4) Consider either the h or v component of the 2 filters.
- *          Depending on whether sm1 > sm2 or sm2 > sm1, we get
- *          different halves of the smoothed gradients (or "edges").
- *          This difference of smoothed signals looks more like
- *          a second derivative of a transition, which we rectify
- *          by not allowing the signal to go below zero.  If sm1 < sm2,
- *          the sm2 transition is broader, so the difference between
- *          sm1 and sm2 signals is positive on the upper half of
- *          the transition.  Likewise, if sm1 > sm2, the sm1 - sm2
- *          signal difference is positive on the lower half of
- *          the transition.
- * 
- */ -PIX * -pixHalfEdgeByBandpass(PIX *pixs, - l_int32 sm1h, - l_int32 sm1v, - l_int32 sm2h, - l_int32 sm2v) -{ -l_int32 d; -PIX *pixg, *pixacc, *pixc1, *pixc2; - - PROCNAME("pixHalfEdgeByBandpass"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (sm1h == sm2h && sm1v == sm2v) - return (PIX *)ERROR_PTR("sm2 = sm1", procName, NULL); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL); - if (d == 32) - pixg = pixConvertRGBToLuminance(pixs); - else /* d == 8 */ - pixg = pixClone(pixs); - - /* Make a convolution accumulator and use it twice */ - if ((pixacc = pixBlockconvAccum(pixg)) == NULL) { - pixDestroy(&pixg); - return (PIX *)ERROR_PTR("pixacc not made", procName, NULL); - } - if ((pixc1 = pixBlockconvGray(pixg, pixacc, sm1h, sm1v)) == NULL) { - pixDestroy(&pixg); - pixDestroy(&pixacc); - return (PIX *)ERROR_PTR("pixc1 not made", procName, NULL); - } - pixc2 = pixBlockconvGray(pixg, pixacc, sm2h, sm2v); - pixDestroy(&pixg); - pixDestroy(&pixacc); - if (!pixc2) { - pixDestroy(&pixc1); - return (PIX *)ERROR_PTR("pixc2 not made", procName, NULL); - } - - /* Compute the half-edge using pixc1 - pixc2. */ - pixSubtractGray(pixc1, pixc1, pixc2); - pixDestroy(&pixc2); - return pixc1; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/environ.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/environ.h deleted file mode 100644 index 5edb182e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/environ.h +++ /dev/null @@ -1,560 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_ENVIRON_H -#define LEPTONICA_ENVIRON_H - -/*------------------------------------------------------------------------* - * Defines and includes differ for Unix and Windows. Also for Windows, * - * differentiate between conditionals based on platform and compiler. * - * For platforms: * - * _WIN32 => Windows, 32- or 64-bit * - * _WIN64 => Windows, 64-bit only * - * __CYGWIN__ => Cygwin * - * For compilers: * - * __GNUC__ => gcc * - * _MSC_VER => msvc * - *------------------------------------------------------------------------*/ - -/* MS VC++ does not provide stdint.h, so define the missing types here */ - - -#ifndef _MSC_VER -#include - -#else -/* Note that _WIN32 is defined for both 32 and 64 bit applications, - whereas _WIN64 is defined only for the latter */ - -#ifdef _WIN64 -typedef __int64 intptr_t; -typedef unsigned __int64 uintptr_t; -#else -typedef int intptr_t; -typedef unsigned int uintptr_t; -#endif - -/* VC++6 doesn't seem to have powf, expf. */ -#if (_MSC_VER < 1400) -#define powf(x, y) (float)pow((double)(x), (double)(y)) -#define expf(x) (float)exp((double)(x)) -#endif - -#endif /* _MSC_VER */ - -/* Windows specifics */ -#ifdef _WIN32 - /* DLL EXPORTS and IMPORTS */ - #if defined(LIBLEPT_EXPORTS) - #define LEPT_DLL __declspec(dllexport) - #elif defined(LIBLEPT_IMPORTS) - #define LEPT_DLL __declspec(dllimport) - #else - #define LEPT_DLL - #endif -#else /* non-Windows specifics */ - #include - #define LEPT_DLL -#endif /* _WIN32 */ - -typedef intptr_t l_intptr_t; -typedef uintptr_t l_uintptr_t; - - -/*--------------------------------------------------------------------* - * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!* - * USER CONFIGURABLE * - * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!* - * Environment variables with I/O libraries * - * Manual Configuration Only: NOT AUTO_CONF * - *--------------------------------------------------------------------*/ -/* - * Leptonica provides interfaces to link to several external image - * I/O libraries, plus zlib. Setting any of these to 0 here causes - * non-functioning stubs to be linked. - */ -#if !defined(HAVE_CONFIG_H) && !defined(ANDROID_BUILD) && !defined(OS_IOS) - - #if !defined(HAVE_LIBJPEG) - #define HAVE_LIBJPEG 1 - #endif - #if !defined(HAVE_LIBTIFF) - #define HAVE_LIBTIFF 0 - #endif - #if !defined(HAVE_LIBPNG) - #define HAVE_LIBPNG 1 - #endif - #if !defined(HAVE_LIBZ) - #define HAVE_LIBZ 1 - #endif - #if !defined(HAVE_LIBGIF) - #define HAVE_LIBGIF 0 - #endif - #if !defined(HAVE_LIBUNGIF) - #define HAVE_LIBUNGIF 0 - #endif - #if !defined(HAVE_LIBWEBP) - #define HAVE_LIBWEBP 0 - #endif - #if !defined(HAVE_LIBWEBP_ANIM) - #define HAVE_LIBWEBP_ANIM 0 - #endif - #if !defined(HAVE_LIBJP2K) - #define HAVE_LIBJP2K 0 - #endif - - - /*-----------------------------------------------------------------------* - * Leptonica supports OpenJPEG 2.0+. If you have a version of openjpeg * - * (HAVE_LIBJP2K == 1) that is >= 2.0, set the path to the openjpeg.h * - * header in angle brackets here. * - *-----------------------------------------------------------------------*/ - #define LIBJP2K_HEADER - -#endif /* ! HAVE_CONFIG_H etc. */ - -/*--------------------------------------------------------------------* - * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!* - * USER CONFIGURABLE * - * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!* - * Environ variables for image I/O without external libraries * - *--------------------------------------------------------------------*/ -/* - * Leptonica supplies I/O support without using external libraries for: - * * image read/write for bmp, pnm - * * header read for jp2k - * * image wrapping write for pdf and ps. - * Setting any of these to 0 causes non-functioning stubs to be linked. - */ -#define USE_BMPIO 1 -#define USE_PNMIO 1 -#define USE_JP2KHEADER 1 -#define USE_PDFIO 1 -#define USE_PSIO 1 - - -/*-------------------------------------------------------------------------* - * On linux systems, you can do I/O between Pix and memory. Specifically, - * you can compress (write compressed data to memory from a Pix) and - * uncompress (read from compressed data in memory to a Pix). - * For jpeg, png, jp2k, gif, pnm and bmp, these use the non-posix GNU - * functions fmemopen() and open_memstream(). These functions are not - * available on other systems. - * To use these functions in linux, you must define HAVE_FMEMOPEN to 1. - * To use them on MacOS, which does not support these functions, set it to 0. - *-------------------------------------------------------------------------*/ -#if !defined(HAVE_CONFIG_H) && !defined(ANDROID_BUILD) && !defined(OS_IOS) && \ - !defined(_WIN32) -#define HAVE_FMEMOPEN 1 -#endif /* ! HAVE_CONFIG_H etc. */ - -/*-------------------------------------------------------------------------* - * fstatat() is defined by POSIX, but some systems do not support it. * - * One example is older macOS systems (pre-10.10). * - * Play it safe and set the default value to 0. * - *-------------------------------------------------------------------------*/ -#if !defined(HAVE_CONFIG_H) -#define HAVE_FSTATAT 0 -#endif /* ! HAVE_CONFIG_H */ - -/*--------------------------------------------------------------------* - * It is desirable on Windows to have all temp files written to the same - * subdirectory of the Windows directory, because files under - * persist after reboot, and the regression tests write a lot of files. - * We write all test files to /tmp/lept or subdirectories of /tmp/lept. - * Windows temp files are specified as in unix, but have the translation - * /tmp/lept/xxx --> /lept/xxx - *--------------------------------------------------------------------*/ - - -/*--------------------------------------------------------------------* - * Built-in types * - *--------------------------------------------------------------------*/ -typedef int l_ok; /*!< return type 0 if OK, 1 on error */ -typedef signed char l_int8; /*!< signed 8-bit value */ -typedef unsigned char l_uint8; /*!< unsigned 8-bit value */ -typedef short l_int16; /*!< signed 16-bit value */ -typedef unsigned short l_uint16; /*!< unsigned 16-bit value */ -typedef int l_int32; /*!< signed 32-bit value */ -typedef unsigned int l_uint32; /*!< unsigned 32-bit value */ -typedef float l_float32; /*!< 32-bit floating point value */ -typedef double l_float64; /*!< 64-bit floating point value */ -#ifdef COMPILER_MSVC -typedef __int64 l_int64; /*!< signed 64-bit value */ -typedef unsigned __int64 l_uint64; /*!< unsigned 64-bit value */ -#else -typedef long long l_int64; /*!< signed 64-bit value */ -typedef unsigned long long l_uint64; /*!< unsigned 64-bit value */ -#endif /* COMPILER_MSVC */ - - -/*-------------------------------------------------------------------------* - * For security, the library is distributed in a configuration that does * - * not permit (1) forking with 'system', which is used for displaying * - * images and generating gnuplots, and (2) writing files with specified * - * compiled-in file names. All such writes are with functions such as * - * pixWriteDebug() where the "Debug" is appended to the usual name. * - * Whether the "Debug" version defaults to the standard version or is a * - * no-op depends on the value of this global variable. The default value * - * of LeptDebugOK is 0, and it is set in writefile.c. This value can be * - * over-ridden, for development and debugging, by setLeptDebugOK(). * - *-------------------------------------------------------------------------*/ -LEPT_DLL extern l_int32 LeptDebugOK; /* default is 0 */ - - -/*------------------------------------------------------------------------* - * Standard macros * - *------------------------------------------------------------------------*/ -#ifndef L_MIN -/*! Minimum of %x and %y */ -#define L_MIN(x, y) (((x) < (y)) ? (x) : (y)) -#endif - -#ifndef L_MAX -/*! Maximum of %x and %y */ -#define L_MAX(x, y) (((x) > (y)) ? (x) : (y)) -#endif - -#ifndef L_ABS -/*! Absolute value of %x */ -#define L_ABS(x) (((x) < 0) ? (-1 * (x)) : (x)) -#endif - -#ifndef L_SIGN -/*! Sign of %x */ -#define L_SIGN(x) (((x) < 0) ? -1 : 1) -#endif - -#ifndef UNDEF -/*! Undefined value */ -#define UNDEF -1 -#endif - -#ifndef NULL -/*! NULL value */ -#define NULL 0 -#endif - -#ifndef TRUE -/*! True value */ -#define TRUE 1 -#endif - -#ifndef FALSE -/*! False value */ -#define FALSE 0 -#endif - - -/*--------------------------------------------------------------------* - * Environment variables for endian dependence * - *--------------------------------------------------------------------*/ -/* - * To control conditional compilation, one of two variables - * - * L_LITTLE_ENDIAN (e.g., for Intel X86) - * L_BIG_ENDIAN (e.g., for Sun SPARC, Mac Power PC) - * - * is defined when the GCC compiler is invoked. - * All code should compile properly for both hardware architectures. - */ - - -/*------------------------------------------------------------------------* - * Simple search state variables * - *------------------------------------------------------------------------*/ -/*! Search State */ -enum { - L_NOT_FOUND = 0, - L_FOUND = 1 -}; - - -/*------------------------------------------------------------------------* - * Path separator conversion * - *------------------------------------------------------------------------*/ -/*! Path Separators */ -enum { - UNIX_PATH_SEPCHAR = 0, - WIN_PATH_SEPCHAR = 1 -}; - - -/*------------------------------------------------------------------------* - * Timing structs * - *------------------------------------------------------------------------*/ -typedef void *L_TIMER; - -/*! Timing struct */ -struct L_WallTimer { - l_int32 start_sec; - l_int32 start_usec; - l_int32 stop_sec; - l_int32 stop_usec; -}; -typedef struct L_WallTimer L_WALLTIMER; - - -/*------------------------------------------------------------------------* - * Standard memory allocation * - * * - * These specify the memory management functions that are used * - * on all heap data except for Pix. Memory management for Pix * - * also defaults to malloc and free. See pix1.c for details. * - *------------------------------------------------------------------------*/ -#define LEPT_MALLOC(blocksize) malloc(blocksize) -#define LEPT_CALLOC(numelem, elemsize) calloc(numelem, elemsize) -#define LEPT_REALLOC(ptr, blocksize) realloc(ptr, blocksize) -#define LEPT_FREE(ptr) free(ptr) - - -/*------------------------------------------------------------------------* - * Control printing of error, warning, and info messages * - * * - * Leptonica never sends output to stdout. By default, all messages * - * go to stderr. However, we provide a mechanism for runtime * - * redirection of output, using a custom stderr handler defined * - * by the user. See utils1.c for details and examples. * - * * - * To omit all messages to stderr, simply define NO_CONSOLE_IO on the * - * command line. For finer grained control, we have a mechanism * - * based on the message severity level. The following assumes that * - * NO_CONSOLE_IO is not defined. * - * * - * Messages are printed if the message severity is greater than or equal * - * to the current severity threshold. The current severity threshold * - * is the greater of the compile-time severity, which is the minimum * - * severity that can be reported, and the run-time severity, which is * - * the severity threshold at the moment. * - * * - * The compile-time threshold determines which messages are compiled * - * into the library for potential printing. Messages below the * - * compile-time threshold are omitted and can never be printed. The * - * default compile-time threshold is L_SEVERITY_INFO, but this may be * - * overridden by defining MINIMUM_SEVERITY to the desired enumeration * - * identifier on the compiler command line. Defining NO_CONSOLE_IO on * - * the command line is the same as setting MINIMUM_SEVERITY to * - * L_SEVERITY_NONE. * - * * - * The run-time threshold determines which messages are printed during * - * library execution. It defaults to the compile-time threshold but * - * may be changed either statically by defining DEFAULT_SEVERITY to * - * the desired enumeration identifier on the compiler command line, or * - * dynamically by calling setMsgSeverity() to specify a new threshold. * - * The run-time threshold may also be set from the value of the * - * environment variable LEPT_MSG_SEVERITY by calling setMsgSeverity() * - * and specifying L_SEVERITY_EXTERNAL. * - * * - * In effect, the compile-time threshold setting says, "Generate code * - * to permit messages of equal or greater severity than this to be * - * printed, if desired," whereas the run-time threshold setting says, * - * "Print messages that have an equal or greater severity than this." * - *------------------------------------------------------------------------*/ - - /*! Control printing of error, warning and info messages */ -/*! Message Control */ -enum { - L_SEVERITY_EXTERNAL = 0, /* Get the severity from the environment */ - L_SEVERITY_ALL = 1, /* Lowest severity: print all messages */ - L_SEVERITY_DEBUG = 2, /* Print debugging and higher messages */ - L_SEVERITY_INFO = 3, /* Print informational and higher messages */ - L_SEVERITY_WARNING = 4, /* Print warning and higher messages */ - L_SEVERITY_ERROR = 5, /* Print error and higher messages */ - L_SEVERITY_NONE = 6 /* Highest severity: print no messages */ -}; - -/* No message less than the compile-time threshold will ever be - * reported, regardless of the current run-time threshold. This allows - * selection of the set of messages to include in the library. For - * example, setting the threshold to L_SEVERITY_WARNING eliminates all - * informational messages from the library. With that setting, both - * warning and error messages would be printed unless setMsgSeverity() - * was called, or DEFAULT_SEVERITY was redefined, to set the run-time - * severity to L_SEVERITY_ERROR. In that case, only error messages - * would be printed. - * - * This mechanism makes the library smaller and faster, by eliminating - * undesired message reporting and the associated run-time overhead for - * message threshold checking, because code for messages whose severity - * is lower than MINIMUM_SEVERITY won't be generated. - * - * A production library might typically permit ERROR messages to be - * generated, and a development library might permit DEBUG and higher. - * The actual messages printed (as opposed to generated) would depend - * on the current run-time severity threshold. - * - * This is a complex mechanism and a few examples may help. - * (1) No output permitted under any circumstances. - * Use: -DNO_CONSOLE_IO or -DMINIMUM_SEVERITY=6 - * (2) Suppose you want to only allow error messages, and you don't - * want to permit info or warning messages at runtime. - * Use: -DMINIMUM_SEVERITY=5 - * (3) Suppose you want to only allow error messages by default, - * but you will permit this to be over-ridden at runtime. - * Use: -DDEFAULT_SEVERITY=5 - * and to allow info and warning override: - * setMsgSeverity(L_SEVERITY_INFO); - */ - -#ifdef NO_CONSOLE_IO - #undef MINIMUM_SEVERITY - #undef DEFAULT_SEVERITY - - #define MINIMUM_SEVERITY L_SEVERITY_NONE /*!< Compile-time default */ - #define DEFAULT_SEVERITY L_SEVERITY_NONE /*!< Run-time default */ - -#else - #ifndef MINIMUM_SEVERITY - #define MINIMUM_SEVERITY L_SEVERITY_INFO /*!< Compile-time default */ - #endif - - #ifndef DEFAULT_SEVERITY - #define DEFAULT_SEVERITY MINIMUM_SEVERITY /*!< Run-time default */ - #endif -#endif - - -/*! The run-time message severity threshold is defined in utils1.c. */ -LEPT_DLL extern l_int32 LeptMsgSeverity; - -/* - *
- *  Usage
- *  =====
- *  Messages are of two types.
- *
- *  (1) The messages
- *      ERROR_INT(a,b,c)       : returns l_int32
- *      ERROR_FLOAT(a,b,c)     : returns l_float32
- *      ERROR_PTR(a,b,c)       : returns void*
- *  are used to return from functions and take a fixed set of parameters:
- *      a : 
- *      b : procName
- *      c : 
- *  where procName is the name of the local variable naming the function.
- *
- *  (2) The purely informational L_* messages
- *      L_ERROR(a,...)
- *      L_WARNING(a,...)
- *      L_INFO(a,...)
- *  do not take a return value, but they take at least two parameters:
- *      a  :   with optional format conversions
- *      v1 : procName    (this must be included as the first vararg)
- *      v2, ... :  optional varargs to match format converters in the message
- *
- *  To return an error from a function that returns void, use:
- *      L_ERROR(, procName, [...])
- *      return;
- *
- *  Implementation details
- *  ======================
- *  Messages are defined with the IF_SEV macro.  The first parameter is
- *  the message severity, the second is the function to call if the
- *  message is to be printed, and the third is the return value if the
- *  message is to be suppressed.  For example, we might have an
- *  informational message defined as:
- *
- *    IF_SEV(L_SEVERITY_INFO, fprintf(.......), 0)
- *
- *  The macro expands into a conditional.  Because the first comparison
- *  is between two constants, an optimizing compiler will remove either
- *  the comparison (if it's true) or the entire macro expansion (if it
- *  is false).  This means that there is no run-time overhead for
- *  messages whose severity falls below the minimum specified at compile
- *  time, and for others the overhead is one (not two) comparisons.
- *
- *  The L_nnn() macros below do not return a value, but because the
- *  conditional operator requires one for the false condition, we
- *  specify a void expression.
- * 
- */ - -#ifdef NO_CONSOLE_IO - - #define PROCNAME(name) - #define ERROR_INT(a, b, c) ((l_int32)(c)) - #define ERROR_FLOAT(a, b, c) ((l_float32)(c)) - #define ERROR_PTR(a, b, c) ((void *)(c)) - #define L_ERROR(a, ...) - #define L_WARNING(a, ...) - #define L_INFO(a, ...) - -#else - - #define PROCNAME(name) static const char procName[] = name - #define IF_SEV(l, t, f) \ - ((l) >= MINIMUM_SEVERITY && (l) >= LeptMsgSeverity ? (t) : (f)) - - #define ERROR_INT(a, b, c) \ - IF_SEV(L_SEVERITY_ERROR, returnErrorInt((a), (b), (c)), (l_int32)(c)) - #define ERROR_FLOAT(a, b, c) \ - IF_SEV(L_SEVERITY_ERROR, returnErrorFloat((a), (b), (c)), (l_float32)(c)) - #define ERROR_PTR(a, b, c) \ - IF_SEV(L_SEVERITY_ERROR, returnErrorPtr((a), (b), (c)), (void *)(c)) - - #define L_ERROR(a, ...) \ - IF_SEV(L_SEVERITY_ERROR, \ - (void)lept_stderr("Error in %s: " a, __VA_ARGS__), \ - (void)0) - #define L_WARNING(a, ...) \ - IF_SEV(L_SEVERITY_WARNING, \ - (void)lept_stderr("Warning in %s: " a, __VA_ARGS__), \ - (void)0) - #define L_INFO(a, ...) \ - IF_SEV(L_SEVERITY_INFO, \ - (void)lept_stderr("Info in %s: " a, __VA_ARGS__), \ - (void)0) - -#if 0 /* Alternative method for controlling L_* message output */ - #define L_ERROR(a, ...) \ - { if (L_SEVERITY_ERROR >= MINIMUM_SEVERITY && \ - L_SEVERITY_ERROR >= LeptMsgSeverity) \ - lept_stderr("Error in %s: " a, __VA_ARGS__) \ - } - #define L_WARNING(a, ...) \ - { if (L_SEVERITY_WARNING >= MINIMUM_SEVERITY && \ - L_SEVERITY_WARNING >= LeptMsgSeverity) \ - lept_stderr("Warning in %s: " a, __VA_ARGS__) \ - } - #define L_INFO(a, ...) \ - { if (L_SEVERITY_INFO >= MINIMUM_SEVERITY && \ - L_SEVERITY_INFO >= LeptMsgSeverity) \ - lept_stderr("Info in %s: " a, __VA_ARGS__) \ - } -#endif - -#endif /* NO_CONSOLE_IO */ - - -/*------------------------------------------------------------------------* - * snprintf() renamed in MSVC (pre-VS2015) * - *------------------------------------------------------------------------*/ -#if defined _MSC_VER && _MSC_VER < 1900 -#define snprintf(buf, size, ...) _snprintf_s(buf, size, _TRUNCATE, __VA_ARGS__) -#endif - - -#endif /* LEPTONICA_ENVIRON_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fhmtauto.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fhmtauto.c deleted file mode 100644 index 8f38b0fe..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fhmtauto.c +++ /dev/null @@ -1,821 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file fhmtauto.c - *
- *
- *    Main function calls:
- *       l_int32             fhmtautogen()
- *       l_int32             fhmtautogen1()
- *       l_int32             fhmtautogen2()
- *
- *    Static helpers:
- *       static SARRAY      *sarrayMakeWplsCode()
- *       static SARRAY      *sarrayMakeInnerLoopDWACode()
- *       static char        *makeBarrelshiftString()
- *
- *    This automatically generates dwa code for the hit-miss transform.
- *    Here's a road map for how it all works.
- *
- *    (1) You generate an array (a SELA) of hit-miss transform SELs.
- *        This can be done in several ways, including
- *           (a) calling the function selaAddHitMiss() for
- *               pre-compiled SELs
- *           (b) generating the SELA in code in line
- *           (c) reading in a SELA from file, using selaRead()
- *               or various other formats.
- *
- *    (2) You call fhmtautogen1() and fhmtautogen2() on this SELA.
- *        This uses the text files hmttemplate1.txt and
- *        hmttemplate2.txt for building up the source code.  See the file
- *        prog/fhmtautogen.c for an example of how this is done.
- *        The output is written to files named fhmtgen.*.c
- *        and fhmtgenlow.*.c, where "*" is an integer that you
- *        input to this function.  That integer labels both
- *        the output files, as well as all the functions that
- *        are generated.  That way, using different integers,
- *        you can invoke fhmtautogen() any number of times
- *        to get functions that all have different names so that
- *        they can be linked into one program.
- *
- *    (3) You copy the generated source code back to your src
- *        directory for compilation.  Put their names in the
- *        Makefile, regnerate the prototypes, and recompile
- *        the libraries.  Look at the Makefile to see how I've
- *        included fhmtgen.1.c and fhmtgenlow.1.c.  These files
- *        provide the high-level interfaces for the hmt, and
- *        the low-level interfaces to do the actual work.
- *
- *    (4) In an application, you now use this interface.  Again
- *        for the example files generated, using integer "1":
- *
- *           PIX   *pixHMTDwa_1(PIX *pixd, PIX *pixs, const char *selname);
- *
- *              or
- *
- *           PIX   *pixFHMTGen_1(PIX *pixd, PIX *pixs, const char *selname);
- *
- *        where the selname is one of the set that were defined
- *        as the name field of sels.  This set is listed at the
- *        beginning of the file fhmtgen.1.c.
- *        As an example, see the file prog/fmtauto_reg.c, which
- *        verifies the correctness of the implementation by
- *        comparing the dwa result with that of full-image
- *        rasterops.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -#define OUTROOT "fhmtgen" -#define TEMPLATE1 "hmttemplate1.txt" -#define TEMPLATE2 "hmttemplate2.txt" - -#define PROTOARGS "(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);" - -static const l_int32 L_BUF_SIZE = 512; - -static char * makeBarrelshiftString(l_int32 delx, l_int32 dely, l_int32 type); -static SARRAY * sarrayMakeInnerLoopDWACode(SEL *sel, l_int32 nhits, l_int32 nmisses); -static SARRAY * sarrayMakeWplsCode(SEL *sel); - -static char wpldecls[][60] = { - "l_int32 wpls2;", - "l_int32 wpls2, wpls3;", - "l_int32 wpls2, wpls3, wpls4;", - "l_int32 wpls5;", - "l_int32 wpls5, wpls6;", - "l_int32 wpls5, wpls6, wpls7;", - "l_int32 wpls5, wpls6, wpls7, wpls8;", - "l_int32 wpls9;", - "l_int32 wpls9, wpls10;", - "l_int32 wpls9, wpls10, wpls11;", - "l_int32 wpls9, wpls10, wpls11, wpls12;", - "l_int32 wpls13;", - "l_int32 wpls13, wpls14;", - "l_int32 wpls13, wpls14, wpls15;", - "l_int32 wpls13, wpls14, wpls15, wpls16;", - "l_int32 wpls17;", - "l_int32 wpls17, wpls18;", - "l_int32 wpls17, wpls18, wpls19;", - "l_int32 wpls17, wpls18, wpls19, wpls20;", - "l_int32 wpls21;", - "l_int32 wpls21, wpls22;", - "l_int32 wpls21, wpls22, wpls23;", - "l_int32 wpls21, wpls22, wpls23, wpls24;", - "l_int32 wpls25;", - "l_int32 wpls25, wpls26;", - "l_int32 wpls25, wpls26, wpls27;", - "l_int32 wpls25, wpls26, wpls27, wpls28;", - "l_int32 wpls29;", - "l_int32 wpls29, wpls30;", - "l_int32 wpls29, wpls30, wpls31;"}; - -static char wpldefs[][24] = { - " wpls2 = 2 * wpls;", - " wpls3 = 3 * wpls;", - " wpls4 = 4 * wpls;", - " wpls5 = 5 * wpls;", - " wpls6 = 6 * wpls;", - " wpls7 = 7 * wpls;", - " wpls8 = 8 * wpls;", - " wpls9 = 9 * wpls;", - " wpls10 = 10 * wpls;", - " wpls11 = 11 * wpls;", - " wpls12 = 12 * wpls;", - " wpls13 = 13 * wpls;", - " wpls14 = 14 * wpls;", - " wpls15 = 15 * wpls;", - " wpls16 = 16 * wpls;", - " wpls17 = 17 * wpls;", - " wpls18 = 18 * wpls;", - " wpls19 = 19 * wpls;", - " wpls20 = 20 * wpls;", - " wpls21 = 21 * wpls;", - " wpls22 = 22 * wpls;", - " wpls23 = 23 * wpls;", - " wpls24 = 24 * wpls;", - " wpls25 = 25 * wpls;", - " wpls26 = 26 * wpls;", - " wpls27 = 27 * wpls;", - " wpls28 = 28 * wpls;", - " wpls29 = 29 * wpls;", - " wpls30 = 30 * wpls;", - " wpls31 = 31 * wpls;"}; - -static char wplstrp[][10] = {"+ wpls", "+ wpls2", "+ wpls3", "+ wpls4", - "+ wpls5", "+ wpls6", "+ wpls7", "+ wpls8", - "+ wpls9", "+ wpls10", "+ wpls11", "+ wpls12", - "+ wpls13", "+ wpls14", "+ wpls15", "+ wpls16", - "+ wpls17", "+ wpls18", "+ wpls19", "+ wpls20", - "+ wpls21", "+ wpls22", "+ wpls23", "+ wpls24", - "+ wpls25", "+ wpls26", "+ wpls27", "+ wpls28", - "+ wpls29", "+ wpls30", "+ wpls31"}; - -static char wplstrm[][10] = {"- wpls", "- wpls2", "- wpls3", "- wpls4", - "- wpls5", "- wpls6", "- wpls7", "- wpls8", - "- wpls9", "- wpls10", "- wpls11", "- wpls12", - "- wpls13", "- wpls14", "- wpls15", "- wpls16", - "- wpls17", "- wpls18", "- wpls19", "- wpls20", - "- wpls21", "- wpls22", "- wpls23", "- wpls24", - "- wpls25", "- wpls26", "- wpls27", "- wpls28", - "- wpls29", "- wpls30", "- wpls31"}; - - -/*! - * \brief fhmtautogen() - * - * \param[in] sela - * \param[in] fileindex - * \param[in] filename [optional]; can be null - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This function generates all the code for implementing
- *          dwa morphological operations using all the sels in the sela.
- *      (2) See fhmtautogen1() and fhmtautogen2() for details.
- * 
- */ -l_ok -fhmtautogen(SELA *sela, - l_int32 fileindex, - const char *filename) -{ -l_int32 ret1, ret2; - - PROCNAME("fhmtautogen"); - - if (!sela) - return ERROR_INT("sela not defined", procName, 1); - ret1 = fhmtautogen1(sela, fileindex, filename); - ret2 = fhmtautogen2(sela, fileindex, filename); - if (ret1 || ret2) - return ERROR_INT("code generation problem", procName, 1); - return 0; -} - - -/*! - * \brief fhmtautogen1() - * - * \param[in] sela array - * \param[in] fileindex - * \param[in] filename [optional]; can be null - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This function uses hmttemplate1.txt to create a
- *          top-level file that contains two functions that carry
- *          out the hit-miss transform for any of the sels in
- *          the input sela.
- *      (2) The fileindex parameter is inserted into the output
- *          filename, as described below.
- *      (3) If filename == NULL, the output file is fhmtgen.[n].c,
- *          where [n] is equal to the 'fileindex' parameter.
- *      (4) If filename != NULL, the output file is [filename].[n].c.
- *      (5) Each sel must have at least one hit.  A sel with only misses
- *          generates code that will abort the operation if it is called.
- * 
- */ -l_ok -fhmtautogen1(SELA *sela, - l_int32 fileindex, - const char *filename) -{ -char *filestr; -char *str_proto1, *str_proto2, *str_proto3; -char *str_doc1, *str_doc2, *str_doc3, *str_doc4; -char *str_def1, *str_def2, *str_proc1, *str_proc2; -char *str_dwa1, *str_low_dt, *str_low_ds; -char bigbuf[L_BUF_SIZE]; -l_int32 i, nsels, nbytes, actstart, end, newstart; -size_t size; -SARRAY *sa1, *sa2, *sa3; - - PROCNAME("fhmtautogen1"); - - if (!sela) - return ERROR_INT("sela not defined", procName, 1); - if (fileindex < 0) - fileindex = 0; - if ((nsels = selaGetCount(sela)) == 0) - return ERROR_INT("no sels in sela", procName, 1); - - /* Make array of textlines from from hmttemplate1.txt */ - if ((filestr = (char *)l_binaryRead(TEMPLATE1, &size)) == NULL) - return ERROR_INT("filestr not made", procName, 1); - sa2 = sarrayCreateLinesFromString(filestr, 1); - LEPT_FREE(filestr); - if (!sa2) - return ERROR_INT("sa2 not made", procName, 1); - - /* Make array of sel names */ - sa1 = selaGetSelnames(sela); - - /* Make strings containing function call names */ - sprintf(bigbuf, "PIX *pixHMTDwa_%d(PIX *pixd, PIX *pixs, " - "const char *selname);", fileindex); - str_proto1 = stringNew(bigbuf); - sprintf(bigbuf, "PIX *pixFHMTGen_%d(PIX *pixd, PIX *pixs, " - "const char *selname);", fileindex); - str_proto2 = stringNew(bigbuf); - sprintf(bigbuf, "l_int32 fhmtgen_low_%d(l_uint32 *datad, l_int32 w,\n" - " l_int32 h, l_int32 wpld,\n" - " l_uint32 *datas, l_int32 wpls,\n" - " l_int32 index);", fileindex); - str_proto3 = stringNew(bigbuf); - sprintf(bigbuf, " * PIX *pixHMTDwa_%d()", fileindex); - str_doc1 = stringNew(bigbuf); - sprintf(bigbuf, " * PIX *pixFHMTGen_%d()", fileindex); - str_doc2 = stringNew(bigbuf); - sprintf(bigbuf, " * \\brief pixHMTDwa_%d()", fileindex); - str_doc3 = stringNew(bigbuf); - sprintf(bigbuf, " * \\brief pixFHMTGen_%d()", fileindex); - str_doc4 = stringNew(bigbuf); - sprintf(bigbuf, "pixHMTDwa_%d(PIX *pixd,", fileindex); - str_def1 = stringNew(bigbuf); - sprintf(bigbuf, "pixFHMTGen_%d(PIX *pixd,", fileindex); - str_def2 = stringNew(bigbuf); - sprintf(bigbuf, " PROCNAME(\"pixHMTDwa_%d\");", fileindex); - str_proc1 = stringNew(bigbuf); - sprintf(bigbuf, " PROCNAME(\"pixFHMTGen_%d\");", fileindex); - str_proc2 = stringNew(bigbuf); - sprintf(bigbuf, " pixt2 = pixFHMTGen_%d(NULL, pixt1, selname);", - fileindex); - str_dwa1 = stringNew(bigbuf); - sprintf(bigbuf, - " fhmtgen_low_%d(datad, w, h, wpld, datat, wpls, index);", - fileindex); - str_low_dt = stringNew(bigbuf); - sprintf(bigbuf, - " fhmtgen_low_%d(datad, w, h, wpld, datas, wpls, index);", - fileindex); - str_low_ds = stringNew(bigbuf); - - /* Make the output sa */ - sa3 = sarrayCreate(0); - - /* Copyright notice and info header */ - sarrayParseRange(sa2, 0, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - /* Insert function names as documentation */ - sarrayAddString(sa3, str_doc1, L_INSERT); - sarrayAddString(sa3, str_doc2, L_INSERT); - - /* Add '#include's */ - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - /* Insert function prototypes */ - sarrayAddString(sa3, str_proto1, L_INSERT); - sarrayAddString(sa3, str_proto2, L_INSERT); - sarrayAddString(sa3, str_proto3, L_INSERT); - - /* Add static globals */ - sprintf(bigbuf, "\nstatic l_int32 NUM_SELS_GENERATED = %d;", nsels); - sarrayAddString(sa3, bigbuf, L_COPY); - sprintf(bigbuf, "static char SEL_NAMES[][80] = {"); - sarrayAddString(sa3, bigbuf, L_COPY); - for (i = 0; i < nsels - 1; i++) { - sprintf(bigbuf, " \"%s\",", - sarrayGetString(sa1, i, L_NOCOPY)); - sarrayAddString(sa3, bigbuf, L_COPY); - } - sprintf(bigbuf, " \"%s\"};", - sarrayGetString(sa1, i, L_NOCOPY)); - sarrayAddString(sa3, bigbuf, L_COPY); - - /* Start pixHMTDwa_*() function description */ - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_doc3, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - /* Finish pixHMTDwa_*() function definition */ - sarrayAddString(sa3, str_def1, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_proc1, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_dwa1, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - /* Start pixFHMTGen_*() function description */ - sarrayAddString(sa3, str_doc4, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - /* Finish pixFHMTGen_*() function description */ - sarrayAddString(sa3, str_def2, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_proc2, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_low_dt, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_low_ds, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - filestr = sarrayToString(sa3, 1); - nbytes = strlen(filestr); - if (filename) - snprintf(bigbuf, L_BUF_SIZE, "%s.%d.c", filename, fileindex); - else - sprintf(bigbuf, "%s.%d.c", OUTROOT, fileindex); - l_binaryWrite(bigbuf, "w", filestr, nbytes); - sarrayDestroy(&sa1); - sarrayDestroy(&sa2); - sarrayDestroy(&sa3); - LEPT_FREE(filestr); - return 0; -} - - -/*! - * \brief fhmtautogen2() - * - * \param[in] sela array - * \param[in] fileindex - * \param[in] filename [optional]; can be null - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This function uses hmttemplate2.txt to create a
- *          low-level file that contains the low-level functions for
- *          implementing the hit-miss transform for every sel
- *          in the input sela.
- *      (2) The fileindex parameter is inserted into the output
- *          filename, as described below.
- *      (3) If filename == NULL, the output file is fhmtgenlow.[n].c,
- *          where [n] is equal to the %fileindex parameter.
- *      (4) If filename != NULL, the output file is [filename]low.[n].c.
- * 
- */ -l_ok -fhmtautogen2(SELA *sela, - l_int32 fileindex, - const char *filename) -{ -char *filestr, *fname, *linestr; -char *str_doc1, *str_doc2, *str_doc3, *str_def1; -char bigbuf[L_BUF_SIZE]; -char breakstring[] = " break;"; -char staticstring[] = "static void"; -l_int32 i, k, l, nsels, nbytes, nhits, nmisses; -l_int32 actstart, end, newstart; -l_int32 argstart, argend, loopstart, loopend, finalstart, finalend; -size_t size; -SARRAY *sa1, *sa2, *sa3, *sa4, *sa5, *sa6; -SEL *sel; - - PROCNAME("fhmtautogen2"); - - if (!sela) - return ERROR_INT("sela not defined", procName, 1); - if (fileindex < 0) - fileindex = 0; - if ((nsels = selaGetCount(sela)) == 0) - return ERROR_INT("no sels in sela", procName, 1); - - /* Make the array of textlines from hmttemplate2.txt */ - if ((filestr = (char *)l_binaryRead(TEMPLATE2, &size)) == NULL) - return ERROR_INT("filestr not made", procName, 1); - sa1 = sarrayCreateLinesFromString(filestr, 1); - LEPT_FREE(filestr); - if (!sa1) - return ERROR_INT("sa1 not made", procName, 1); - - /* Make the array of static function names */ - if ((sa2 = sarrayCreate(nsels)) == NULL) { - sarrayDestroy(&sa1); - return ERROR_INT("sa2 not made", procName, 1); - } - for (i = 0; i < nsels; i++) { - sprintf(bigbuf, "fhmt_%d_%d", fileindex, i); - sarrayAddString(sa2, bigbuf, L_COPY); - } - - /* Make the static prototype strings */ - sa3 = sarrayCreate(2 * nsels); /* should be ok */ - for (i = 0; i < nsels; i++) { - fname = sarrayGetString(sa2, i, L_NOCOPY); - sprintf(bigbuf, "static void %s%s", fname, PROTOARGS); - sarrayAddString(sa3, bigbuf, L_COPY); - } - - /* Make strings containing function names */ - sprintf(bigbuf, " * l_int32 fhmtgen_low_%d()", - fileindex); - str_doc1 = stringNew(bigbuf); - sprintf(bigbuf, " * void fhmt_%d_*()", fileindex); - str_doc2 = stringNew(bigbuf); - - /* Output to this sa */ - sa4 = sarrayCreate(0); - - /* Copyright notice and info header */ - sarrayParseRange(sa1, 0, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa4, sa1, actstart, end); - - /* Insert function names as documentation */ - sarrayAddString(sa4, str_doc1, L_INSERT); - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa4, sa1, actstart, end); - sarrayAddString(sa4, str_doc2, L_INSERT); - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa4, sa1, actstart, end); - - /* Insert static protos */ - for (i = 0; i < nsels; i++) { - if ((linestr = sarrayGetString(sa3, i, L_COPY)) == NULL) { - sarrayDestroy(&sa1); - sarrayDestroy(&sa2); - sarrayDestroy(&sa3); - sarrayDestroy(&sa4); - return ERROR_INT("linestr not retrieved", procName, 1); - } - sarrayAddString(sa4, linestr, L_INSERT); - } - - /* Make more strings containing function names */ - sprintf(bigbuf, " * fhmtgen_low_%d()", fileindex); - str_doc3 = stringNew(bigbuf); - sprintf(bigbuf, "fhmtgen_low_%d(l_uint32 *datad,", fileindex); - str_def1 = stringNew(bigbuf); - - /* Insert function header */ - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa4, sa1, actstart, end); - sarrayAddString(sa4, str_doc3, L_INSERT); - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa4, sa1, actstart, end); - sarrayAddString(sa4, str_def1, L_INSERT); - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa4, sa1, actstart, end); - - /* Generate and insert the dispatcher code */ - for (i = 0; i < nsels; i++) { - sprintf(bigbuf, " case %d:", i); - sarrayAddString(sa4, bigbuf, L_COPY); - sprintf(bigbuf, " %s(datad, w, h, wpld, datas, wpls);", - sarrayGetString(sa2, i, L_NOCOPY)); - sarrayAddString(sa4, bigbuf, L_COPY); - sarrayAddString(sa4, breakstring, L_COPY); - } - - /* Finish the dispatcher and introduce the low-level code */ - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa4, sa1, actstart, end); - - /* Get the range for the args common to all functions */ - sarrayParseRange(sa1, newstart, &argstart, &argend, &newstart, "--", 0); - - /* Get the range for the loop code common to all functions */ - sarrayParseRange(sa1, newstart, &loopstart, &loopend, &newstart, "--", 0); - - /* Get the range for the ending code common to all functions */ - sarrayParseRange(sa1, newstart, &finalstart, &finalend, &newstart, "--", 0); - - /* Do all the static functions */ - for (i = 0; i < nsels; i++) { - /* Generate the function header and add the common args */ - sarrayAddString(sa4, staticstring, L_COPY); - fname = sarrayGetString(sa2, i, L_NOCOPY); - sprintf(bigbuf, "%s(l_uint32 *datad,", fname); - sarrayAddString(sa4, bigbuf, L_COPY); - sarrayAppendRange(sa4, sa1, argstart, argend); - - /* Declare and define wplsN args, as necessary */ - if ((sel = selaGetSel(sela, i)) == NULL) { - sarrayDestroy(&sa1); - sarrayDestroy(&sa2); - sarrayDestroy(&sa3); - sarrayDestroy(&sa4); - return ERROR_INT("sel not returned", procName, 1); - } - sa5 = sarrayMakeWplsCode(sel); - sarrayJoin(sa4, sa5); - sarrayDestroy(&sa5); - - /* Make sure sel has at least one hit */ - nhits = 0; - nmisses = 0; - for (k = 0; k < sel->sy; k++) { - for (l = 0; l < sel->sx; l++) { - if (sel->data[k][l] == 1) - nhits++; - else if (sel->data[k][l] == 2) - nmisses++; - } - } - if (nhits == 0) { - linestr = stringNew(" " - "lept_stderr(\"Error in HMT: no hits in sel!\\n\");\n}\n\n"); - sarrayAddString(sa4, linestr, L_INSERT); - continue; - } - - /* Add the function loop code */ - sarrayAppendRange(sa4, sa1, loopstart, loopend); - - /* Insert barrel-op code for *dptr */ - if ((sa6 = sarrayMakeInnerLoopDWACode(sel, nhits, nmisses)) == NULL) { - sarrayDestroy(&sa1); - sarrayDestroy(&sa2); - sarrayDestroy(&sa3); - sarrayDestroy(&sa4); - return ERROR_INT("sa6 not made", procName, 1); - } - sarrayJoin(sa4, sa6); - sarrayDestroy(&sa6); - - /* Finish the function code */ - sarrayAppendRange(sa4, sa1, finalstart, finalend); - } - - /* Output to file */ - filestr = sarrayToString(sa4, 1); - nbytes = strlen(filestr); - if (filename) - snprintf(bigbuf, L_BUF_SIZE, "%slow.%d.c", filename, fileindex); - else - sprintf(bigbuf, "%slow.%d.c", OUTROOT, fileindex); - l_binaryWrite(bigbuf, "w", filestr, nbytes); - sarrayDestroy(&sa1); - sarrayDestroy(&sa2); - sarrayDestroy(&sa3); - sarrayDestroy(&sa4); - LEPT_FREE(filestr); - return 0; -} - - - -/*--------------------------------------------------------------------------* - * Helper code for sel * - *--------------------------------------------------------------------------*/ -/*! - * \brief sarrayMakeWplsCode() - */ -static SARRAY * -sarrayMakeWplsCode(SEL *sel) -{ -char emptystring[] = ""; -l_int32 i, j, ymax, dely; -SARRAY *sa; - - PROCNAME("sarrayMakeWplsCode"); - - if (!sel) - return (SARRAY *)ERROR_PTR("sel not defined", procName, NULL); - - ymax = 0; - for (i = 0; i < sel->sy; i++) { - for (j = 0; j < sel->sx; j++) { - if (sel->data[i][j] == 1 || sel->data[i][j] == 2) { - dely = L_ABS(i - sel->cy); - ymax = L_MAX(ymax, dely); - } - } - } - if (ymax > 31) { - L_WARNING("ymax > 31; truncating to 31\n", procName); - ymax = 31; - } - - sa = sarrayCreate(0); - - /* Declarations */ - if (ymax > 4) - sarrayAddString(sa, wpldecls[2], L_COPY); - if (ymax > 8) - sarrayAddString(sa, wpldecls[6], L_COPY); - if (ymax > 12) - sarrayAddString(sa, wpldecls[10], L_COPY); - if (ymax > 16) - sarrayAddString(sa, wpldecls[14], L_COPY); - if (ymax > 20) - sarrayAddString(sa, wpldecls[18], L_COPY); - if (ymax > 24) - sarrayAddString(sa, wpldecls[22], L_COPY); - if (ymax > 28) - sarrayAddString(sa, wpldecls[26], L_COPY); - if (ymax > 1) - sarrayAddString(sa, wpldecls[ymax - 2], L_COPY); - - sarrayAddString(sa, emptystring, L_COPY); - - /* Definitions */ - for (i = 2; i <= ymax; i++) - sarrayAddString(sa, wpldefs[i - 2], L_COPY); - - return sa; -} - - -/*! - * \brief sarrayMakeInnerLoopDWACode() - */ -static SARRAY * -sarrayMakeInnerLoopDWACode(SEL *sel, - l_int32 nhits, - l_int32 nmisses) -{ -char *string; -char land[] = "&"; -char bigbuf[L_BUF_SIZE]; -l_int32 i, j, ntot, nfound, type, delx, dely; -SARRAY *sa; - - PROCNAME("sarrayMakeInnerLoopDWACode"); - - if (!sel) - return (SARRAY *)ERROR_PTR("sel not defined", procName, NULL); - - sa = sarrayCreate(0); - ntot = nhits + nmisses; - nfound = 0; - for (i = 0; i < sel->sy; i++) { - for (j = 0; j < sel->sx; j++) { - type = sel->data[i][j]; - if (type == SEL_HIT || type == SEL_MISS) { - nfound++; - dely = i - sel->cy; - delx = j - sel->cx; - if ((string = makeBarrelshiftString(delx, dely, type)) - == NULL) { - L_WARNING("barrel shift string not made\n", procName); - continue; - } - if (ntot == 1) /* just one item */ - sprintf(bigbuf, " *dptr = %s;", string); - else if (nfound == 1) - sprintf(bigbuf, " *dptr = %s %s", string, land); - else if (nfound < ntot) - sprintf(bigbuf, " %s %s", string, land); - else /* nfound == ntot */ - sprintf(bigbuf, " %s;", string); - sarrayAddString(sa, bigbuf, L_COPY); - LEPT_FREE(string); - } - } - } - - return sa; -} - - -/*! - * \brief makeBarrelshiftString() - */ -static char * -makeBarrelshiftString(l_int32 delx, /* j - cx */ - l_int32 dely, /* i - cy */ - l_int32 type) /* SEL_HIT or SEL_MISS */ -{ -l_int32 absx, absy; -char bigbuf[L_BUF_SIZE]; - - PROCNAME("makeBarrelshiftString"); - - if (delx < -31 || delx > 31) - return (char *)ERROR_PTR("delx out of bounds", procName, NULL); - if (dely < -31 || dely > 31) - return (char *)ERROR_PTR("dely out of bounds", procName, NULL); - absx = L_ABS(delx); - absy = L_ABS(dely); - - if (type == SEL_HIT) { - if ((delx == 0) && (dely == 0)) - sprintf(bigbuf, "(*sptr)"); - else if ((delx == 0) && (dely < 0)) - sprintf(bigbuf, "(*(sptr %s))", wplstrm[absy - 1]); - else if ((delx == 0) && (dely > 0)) - sprintf(bigbuf, "(*(sptr %s))", wplstrp[absy - 1]); - else if ((delx < 0) && (dely == 0)) - sprintf(bigbuf, "((*(sptr) >> %d) | (*(sptr - 1) << %d))", - absx, 32 - absx); - else if ((delx > 0) && (dely == 0)) - sprintf(bigbuf, "((*(sptr) << %d) | (*(sptr + 1) >> %d))", - absx, 32 - absx); - else if ((delx < 0) && (dely < 0)) - sprintf(bigbuf, "((*(sptr %s) >> %d) | (*(sptr %s - 1) << %d))", - wplstrm[absy - 1], absx, wplstrm[absy - 1], 32 - absx); - else if ((delx > 0) && (dely < 0)) - sprintf(bigbuf, "((*(sptr %s) << %d) | (*(sptr %s + 1) >> %d))", - wplstrm[absy - 1], absx, wplstrm[absy - 1], 32 - absx); - else if ((delx < 0) && (dely > 0)) - sprintf(bigbuf, "((*(sptr %s) >> %d) | (*(sptr %s - 1) << %d))", - wplstrp[absy - 1], absx, wplstrp[absy - 1], 32 - absx); - else /* ((delx > 0) && (dely > 0)) */ - sprintf(bigbuf, "((*(sptr %s) << %d) | (*(sptr %s + 1) >> %d))", - wplstrp[absy - 1], absx, wplstrp[absy - 1], 32 - absx); - } else { /* type == SEL_MISS */ - if ((delx == 0) && (dely == 0)) - sprintf(bigbuf, "(~*sptr)"); - else if ((delx == 0) && (dely < 0)) - sprintf(bigbuf, "(~*(sptr %s))", wplstrm[absy - 1]); - else if ((delx == 0) && (dely > 0)) - sprintf(bigbuf, "(~*(sptr %s))", wplstrp[absy - 1]); - else if ((delx < 0) && (dely == 0)) - sprintf(bigbuf, "((~*(sptr) >> %d) | (~*(sptr - 1) << %d))", - absx, 32 - absx); - else if ((delx > 0) && (dely == 0)) - sprintf(bigbuf, "((~*(sptr) << %d) | (~*(sptr + 1) >> %d))", - absx, 32 - absx); - else if ((delx < 0) && (dely < 0)) - sprintf(bigbuf, "((~*(sptr %s) >> %d) | (~*(sptr %s - 1) << %d))", - wplstrm[absy - 1], absx, wplstrm[absy - 1], 32 - absx); - else if ((delx > 0) && (dely < 0)) - sprintf(bigbuf, "((~*(sptr %s) << %d) | (~*(sptr %s + 1) >> %d))", - wplstrm[absy - 1], absx, wplstrm[absy - 1], 32 - absx); - else if ((delx < 0) && (dely > 0)) - sprintf(bigbuf, "((~*(sptr %s) >> %d) | (~*(sptr %s - 1) << %d))", - wplstrp[absy - 1], absx, wplstrp[absy - 1], 32 - absx); - else /* ((delx > 0) && (dely > 0)) */ - sprintf(bigbuf, "((~*(sptr %s) << %d) | (~*(sptr %s + 1) >> %d))", - wplstrp[absy - 1], absx, wplstrp[absy - 1], 32 - absx); - } - - return stringNew(bigbuf); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fhmtgen.1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fhmtgen.1.c deleted file mode 100644 index 8a1fcab8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fhmtgen.1.c +++ /dev/null @@ -1,177 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * Top-level fast hit-miss transform with auto-generated sels - * - * PIX *pixHMTDwa_1() - * PIX *pixFHMTGen_1() - */ - -#include -#include "allheaders.h" - -PIX *pixHMTDwa_1(PIX *pixd, PIX *pixs, const char *selname); -PIX *pixFHMTGen_1(PIX *pixd, PIX *pixs, const char *selname); -l_int32 fhmtgen_low_1(l_uint32 *datad, l_int32 w, - l_int32 h, l_int32 wpld, - l_uint32 *datas, l_int32 wpls, - l_int32 index); - -static l_int32 NUM_SELS_GENERATED = 10; -static char SEL_NAMES[][80] = { - "sel_3hm", - "sel_3de", - "sel_3ue", - "sel_3re", - "sel_3le", - "sel_sl1", - "sel_ulc", - "sel_urc", - "sel_llc", - "sel_lrc"}; - -/*! - * \brief pixHMTDwa_1() - * - * \param[in] pixd usual 3 choices: null, == pixs, != pixs - * \param[in] pixs 1 bpp - * \param[in] sel name - * \return pixd - * - *
- * Notes:
- *      (1) This simply adds a 32 pixel border, calls the appropriate
- *          pixFHMTGen_*(), and removes the border.
- *          See notes below for that function.
- * 
- */ -PIX * -pixHMTDwa_1(PIX *pixd, - PIX *pixs, - const char *selname) -{ -PIX *pixt1, *pixt2, *pixt3; - - PROCNAME("pixHMTDwa_1"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs must be 1 bpp", procName, pixd); - - pixt1 = pixAddBorder(pixs, 32, 0); - pixt2 = pixFHMTGen_1(NULL, pixt1, selname); - pixt3 = pixRemoveBorder(pixt2, 32); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - - if (!pixd) - return pixt3; - - pixCopy(pixd, pixt3); - pixDestroy(&pixt3); - return pixd; -} - - -/*! - * \brief pixFHMTGen_1() - * - * \param[in] pixd usual 3 choices: null, == pixs, != pixs - * \param[in] pixs 1 bpp - * \param[in] sel name - * \return pixd - * - *
- * Notes:
- *      (1) This is a dwa implementation of the hit-miss transform
- *          on pixs by the sel.
- *      (2) The sel must be limited in size to not more than 31 pixels
- *          about the origin.  It must have at least one hit, and it
- *          can have any number of misses.
- *      (3) This handles all required setting of the border pixels
- *          before erosion and dilation.
- * 
- */ -PIX * -pixFHMTGen_1(PIX *pixd, - PIX *pixs, - const char *selname) -{ -l_int32 i, index, found, w, h, wpls, wpld; -l_uint32 *datad, *datas, *datat; -PIX *pixt; - - PROCNAME("pixFHMTGen_1"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs must be 1 bpp", procName, pixd); - - found = FALSE; - for (i = 0; i < NUM_SELS_GENERATED; i++) { - if (strcmp(selname, SEL_NAMES[i]) == 0) { - found = TRUE; - index = i; - break; - } - } - if (found == FALSE) - return (PIX *)ERROR_PTR("sel index not found", procName, pixd); - - if (!pixd) { - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - else /* for in-place or pre-allocated */ - pixResizeImageData(pixd, pixs); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - - /* The images must be surrounded with 32 additional border - * pixels, that we'll read from. We fabricate a "proper" - * image as the subimage within the border, having the - * following parameters: */ - w = pixGetWidth(pixs) - 64; - h = pixGetHeight(pixs) - 64; - datas = pixGetData(pixs) + 32 * wpls + 1; - datad = pixGetData(pixd) + 32 * wpld + 1; - - if (pixd == pixs) { /* need temp image if in-place */ - if ((pixt = pixCopy(NULL, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, pixd); - datat = pixGetData(pixt) + 32 * wpls + 1; - fhmtgen_low_1(datad, w, h, wpld, datat, wpls, index); - pixDestroy(&pixt); - } - else { /* not in-place */ - fhmtgen_low_1(datad, w, h, wpld, datas, wpls, index); - } - - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fhmtgenlow.1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fhmtgenlow.1.c deleted file mode 100644 index b1c863cf..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fhmtgenlow.1.c +++ /dev/null @@ -1,445 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * Low-level fast hit-miss transform with auto-generated sels - * - * Dispatcher: - * l_int32 fhmtgen_low_1() - * - * Static Low-level: - * void fhmt_1_*() - */ - -#include "allheaders.h" - -static void fhmt_1_0(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fhmt_1_1(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fhmt_1_2(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fhmt_1_3(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fhmt_1_4(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fhmt_1_5(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fhmt_1_6(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fhmt_1_7(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fhmt_1_8(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fhmt_1_9(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); - - -/*---------------------------------------------------------------------* - * Fast hmt dispatcher * - *---------------------------------------------------------------------*/ -/*! - * fhmtgen_low_1() - * - * a dispatcher to appropriate low-level code - */ -l_int32 -fhmtgen_low_1(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_int32 index) -{ - - switch (index) - { - case 0: - fhmt_1_0(datad, w, h, wpld, datas, wpls); - break; - case 1: - fhmt_1_1(datad, w, h, wpld, datas, wpls); - break; - case 2: - fhmt_1_2(datad, w, h, wpld, datas, wpls); - break; - case 3: - fhmt_1_3(datad, w, h, wpld, datas, wpls); - break; - case 4: - fhmt_1_4(datad, w, h, wpld, datas, wpls); - break; - case 5: - fhmt_1_5(datad, w, h, wpld, datas, wpls); - break; - case 6: - fhmt_1_6(datad, w, h, wpld, datas, wpls); - break; - case 7: - fhmt_1_7(datad, w, h, wpld, datas, wpls); - break; - case 8: - fhmt_1_8(datad, w, h, wpld, datas, wpls); - break; - case 9: - fhmt_1_9(datad, w, h, wpld, datas, wpls); - break; - } - - return 0; -} - - -/*--------------------------------------------------------------------------* - * Low-level auto-generated static routines * - *--------------------------------------------------------------------------*/ -/* - * N.B. In all the low-level routines, the part of the image - * that is accessed has been clipped by 32 pixels on - * all four sides. This is done in the higher level - * code by redefining w and h smaller and by moving the - * start-of-image pointers up to the beginning of this - * interior rectangle. - */ -static void -fhmt_1_0(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((~*(sptr - wpls) >> 1) | (~*(sptr - wpls - 1) << 31)) & - (~*(sptr - wpls)) & - ((~*(sptr - wpls) << 1) | (~*(sptr - wpls + 1) >> 31)) & - ((~*(sptr) >> 1) | (~*(sptr - 1) << 31)) & - (*sptr) & - ((~*(sptr) << 1) | (~*(sptr + 1) >> 31)) & - ((~*(sptr + wpls) >> 1) | (~*(sptr + wpls - 1) << 31)) & - (~*(sptr + wpls)) & - ((~*(sptr + wpls) << 1) | (~*(sptr + wpls + 1) >> 31)); - } - } -} - -static void -fhmt_1_1(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((~*(sptr + wpls) >> 1) | (~*(sptr + wpls - 1) << 31)) & - (~*(sptr + wpls)) & - ((~*(sptr + wpls) << 1) | (~*(sptr + wpls + 1) >> 31)); - } - } -} - -static void -fhmt_1_2(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((~*(sptr - wpls) >> 1) | (~*(sptr - wpls - 1) << 31)) & - (~*(sptr - wpls)) & - ((~*(sptr - wpls) << 1) | (~*(sptr - wpls + 1) >> 31)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)); - } - } -} - -static void -fhmt_1_3(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls)) & - ((~*(sptr - wpls) << 1) | (~*(sptr - wpls + 1) >> 31)) & - (*sptr) & - ((~*(sptr) << 1) | (~*(sptr + 1) >> 31)) & - (*(sptr + wpls)) & - ((~*(sptr + wpls) << 1) | (~*(sptr + wpls + 1) >> 31)); - } - } -} - -static void -fhmt_1_4(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((~*(sptr - wpls) >> 1) | (~*(sptr - wpls - 1) << 31)) & - (*(sptr - wpls)) & - ((~*(sptr) >> 1) | (~*(sptr - 1) << 31)) & - (*sptr) & - ((~*(sptr + wpls) >> 1) | (~*(sptr + wpls - 1) << 31)) & - (*(sptr + wpls)); - } - } -} - -static void -fhmt_1_5(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((~*(sptr - wpls6) << 1) | (~*(sptr - wpls6 + 1) >> 31)) & - ((*(sptr - wpls6) << 3) | (*(sptr - wpls6 + 1) >> 29)) & - (~*(sptr - wpls2)) & - ((*(sptr - wpls2) << 2) | (*(sptr - wpls2 + 1) >> 30)) & - ((~*(sptr + wpls2) >> 1) | (~*(sptr + wpls2 - 1) << 31)) & - ((*(sptr + wpls2) << 1) | (*(sptr + wpls2 + 1) >> 31)) & - ((~*(sptr + wpls6) >> 2) | (~*(sptr + wpls6 - 1) << 30)) & - (*(sptr + wpls6)); - } - } -} - -static void -fhmt_1_6(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((~*(sptr - wpls) >> 1) | (~*(sptr - wpls - 1) << 31)) & - (~*(sptr - wpls)) & - ((~*(sptr - wpls) << 1) | (~*(sptr - wpls + 1) >> 31)) & - ((~*(sptr - wpls) << 2) | (~*(sptr - wpls + 1) >> 30)) & - ((~*(sptr) >> 1) | (~*(sptr - 1) << 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((~*(sptr + wpls) >> 1) | (~*(sptr + wpls - 1) << 31)) & - ((*(sptr + wpls) << 1) | (*(sptr + wpls + 1) >> 31)) & - ((*(sptr + wpls) << 2) | (*(sptr + wpls + 1) >> 30)) & - ((~*(sptr + wpls2) >> 1) | (~*(sptr + wpls2 - 1) << 31)) & - (*(sptr + wpls2)) & - ((*(sptr + wpls2) << 1) | (*(sptr + wpls2 + 1) >> 31)) & - ((*(sptr + wpls2) << 2) | (*(sptr + wpls2 + 1) >> 30)); - } - } -} - -static void -fhmt_1_7(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((~*(sptr - wpls) >> 2) | (~*(sptr - wpls - 1) << 30)) & - ((~*(sptr - wpls) >> 1) | (~*(sptr - wpls - 1) << 31)) & - (~*(sptr - wpls)) & - ((~*(sptr - wpls) << 1) | (~*(sptr - wpls + 1) >> 31)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((~*(sptr) << 1) | (~*(sptr + 1) >> 31)) & - ((*(sptr + wpls) >> 2) | (*(sptr + wpls - 1) << 30)) & - ((*(sptr + wpls) >> 1) | (*(sptr + wpls - 1) << 31)) & - ((~*(sptr + wpls) << 1) | (~*(sptr + wpls + 1) >> 31)) & - ((*(sptr + wpls2) >> 2) | (*(sptr + wpls2 - 1) << 30)) & - ((*(sptr + wpls2) >> 1) | (*(sptr + wpls2 - 1) << 31)) & - (*(sptr + wpls2)) & - ((~*(sptr + wpls2) << 1) | (~*(sptr + wpls2 + 1) >> 31)); - } - } -} - -static void -fhmt_1_8(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((~*(sptr - wpls2) >> 1) | (~*(sptr - wpls2 - 1) << 31)) & - (*(sptr - wpls2)) & - ((*(sptr - wpls2) << 1) | (*(sptr - wpls2 + 1) >> 31)) & - ((*(sptr - wpls2) << 2) | (*(sptr - wpls2 + 1) >> 30)) & - ((~*(sptr - wpls) >> 1) | (~*(sptr - wpls - 1) << 31)) & - ((*(sptr - wpls) << 1) | (*(sptr - wpls + 1) >> 31)) & - ((*(sptr - wpls) << 2) | (*(sptr - wpls + 1) >> 30)) & - ((~*(sptr) >> 1) | (~*(sptr - 1) << 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((~*(sptr + wpls) >> 1) | (~*(sptr + wpls - 1) << 31)) & - (~*(sptr + wpls)) & - ((~*(sptr + wpls) << 1) | (~*(sptr + wpls + 1) >> 31)) & - ((~*(sptr + wpls) << 2) | (~*(sptr + wpls + 1) >> 30)); - } - } -} - -static void -fhmt_1_9(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr - wpls2) >> 2) | (*(sptr - wpls2 - 1) << 30)) & - ((*(sptr - wpls2) >> 1) | (*(sptr - wpls2 - 1) << 31)) & - (*(sptr - wpls2)) & - ((~*(sptr - wpls2) << 1) | (~*(sptr - wpls2 + 1) >> 31)) & - ((*(sptr - wpls) >> 2) | (*(sptr - wpls - 1) << 30)) & - ((*(sptr - wpls) >> 1) | (*(sptr - wpls - 1) << 31)) & - ((~*(sptr - wpls) << 1) | (~*(sptr - wpls + 1) >> 31)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((~*(sptr) << 1) | (~*(sptr + 1) >> 31)) & - ((~*(sptr + wpls) >> 2) | (~*(sptr + wpls - 1) << 30)) & - ((~*(sptr + wpls) >> 1) | (~*(sptr + wpls - 1) << 31)) & - (~*(sptr + wpls)) & - ((~*(sptr + wpls) << 1) | (~*(sptr + wpls + 1) >> 31)); - } - } -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/finditalic.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/finditalic.c deleted file mode 100644 index 54589075..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/finditalic.c +++ /dev/null @@ -1,240 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/* - * \file finditalic.c - *
- *
- *      l_int32   pixItalicWords()
- *
- *    Locate italic words.  This is an example of the use of
- *    hit-miss binary morphology with binary reconstruction
- *    (filling from a seed into a mask).
- *
- *    To see how this works, run with prog/italic.png.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - - /* --------------------------------------------------------------- * - * These hit-miss sels match the slanted edge of italic characters * - * --------------------------------------------------------------- */ -static const char *str_ital1 = " o x" - " " - " " - " " - " o x " - " " - " C " - " " - " o x " - " " - " " - " " - "o x "; - -static const char *str_ital2 = " o x" - " " - " " - " o x " - " C " - " " - " o x " - " " - " " - "o x "; - - /* ------------------------------------------------------------- * - * This sel removes noise that is not oriented as a slanted edge * - * ------------------------------------------------------------- */ -static const char *str_ital3 = " x" - "Cx" - "x " - "x "; - -/*! - * \brief pixItalicWords() - * - * \param[in] pixs 1 bpp - * \param[in] boxaw [optional] word bounding boxes; can be NULL - * \param[in] pixw [optional] word box mask; can be NULL - * \param[out] pboxa boxa of italic words - * \param[in] debugflag 1 for debug output; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) You can input the bounding boxes for the words in one of
- *          two forms: as bounding boxes (%boxaw) or as a word mask with
- *          the word bounding boxes filled (%pixw).  For example,
- *          to compute %pixw, you can use pixWordMaskByDilation().
- *      (2) Alternatively, you can set both of these inputs to NULL,
- *          in which case the word mask is generated here.  This is
- *          done by dilating and closing the input image to connect
- *          letters within a word, while leaving the words separated.
- *          The parameters are chosen under the assumption that the
- *          input is 10 to 12 pt text, scanned at about 300 ppi.
- *      (3) sel_ital1 and sel_ital2 detect the right edges that are
- *          nearly vertical, at approximately the angle of italic
- *          strokes.  We use the right edge to avoid getting seeds
- *          from lower-case 'y'.  The typical italic slant has a smaller
- *          angle with the vertical than the 'W', so in most cases we
- *          will not trigger on the slanted lines in the 'W'.
- *      (4) Note that sel_ital2 is shorter than sel_ital1.  It is
- *          more appropriate for a typical font scanned at 200 ppi.
- * 
- */ -l_ok -pixItalicWords(PIX *pixs, - BOXA *boxaw, - PIX *pixw, - BOXA **pboxa, - l_int32 debugflag) -{ -char opstring[32]; -l_int32 size; -BOXA *boxa; -PIX *pixsd, *pixm, *pixd; -SEL *sel_ital1, *sel_ital2, *sel_ital3; - - PROCNAME("pixItalicWords"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pboxa) - return ERROR_INT("&boxa not defined", procName, 1); - if (boxaw && pixw) - return ERROR_INT("both boxaw and pixw are defined", procName, 1); - - sel_ital1 = selCreateFromString(str_ital1, 13, 6, NULL); - sel_ital2 = selCreateFromString(str_ital2, 10, 6, NULL); - sel_ital3 = selCreateFromString(str_ital3, 4, 2, NULL); - - /* Make the italic seed: extract with HMT; remove noise. - * The noise removal close/open is important to exclude - * situations where a small slanted line accidentally - * matches sel_ital1. */ - pixsd = pixHMT(NULL, pixs, sel_ital1); - pixClose(pixsd, pixsd, sel_ital3); - pixOpen(pixsd, pixsd, sel_ital3); - - /* Make the word mask. Use input boxes or mask if given. */ - size = 0; /* init */ - if (boxaw) { - pixm = pixCreateTemplate(pixs); - pixMaskBoxa(pixm, pixm, boxaw, L_SET_PIXELS); - } else if (pixw) { - pixm = pixClone(pixw); - } else { - pixWordMaskByDilation(pixs, NULL, &size, NULL); - L_INFO("dilation size = %d\n", procName, size); - snprintf(opstring, sizeof(opstring), "d1.5 + c%d.1", size); - pixm = pixMorphSequence(pixs, opstring, 0); - } - - /* Binary reconstruction to fill in those word mask - * components for which there is at least one seed pixel. */ - pixd = pixSeedfillBinary(NULL, pixsd, pixm, 8); - boxa = pixConnComp(pixd, NULL, 8); - *pboxa = boxa; - - if (debugflag) { - /* Save results at at 2x reduction */ - lept_mkdir("lept/ital"); - l_int32 res, upper; - BOXA *boxat; - GPLOT *gplot; - NUMA *na; - PIXA *pixa1; - PIX *pix1, *pix2, *pix3; - pixa1 = pixaCreate(0); - boxat = pixConnComp(pixm, NULL, 8); - boxaWriteDebug("/tmp/lept/ital/ital.ba", boxat); - pixaAddPix(pixa1, pixs, L_COPY); /* orig */ - pixaAddPix(pixa1, pixsd, L_COPY); /* seed */ - pix1 = pixConvertTo32(pixm); - pixRenderBoxaArb(pix1, boxat, 3, 255, 0, 0); - pixaAddPix(pixa1, pix1, L_INSERT); /* mask + outline */ - pixaAddPix(pixa1, pixd, L_COPY); /* ital mask */ - pix1 = pixConvertTo32(pixs); - pixRenderBoxaArb(pix1, boxa, 3, 255, 0, 0); - pixaAddPix(pixa1, pix1, L_INSERT); /* orig + outline */ - pix1 = pixCreateTemplate(pixs); - pix2 = pixSetBlackOrWhiteBoxa(pix1, boxa, L_SET_BLACK); - pixCopy(pix1, pixs); - pix3 = pixDilateBrick(NULL, pixs, 3, 3); - pixCombineMasked(pix1, pix3, pix2); - pixaAddPix(pixa1, pix1, L_INSERT); /* ital bolded */ - pixDestroy(&pix2); - pixDestroy(&pix3); - pix2 = pixaDisplayTiledInColumns(pixa1, 1, 0.5, 20, 2); - pixWriteDebug("/tmp/lept/ital/ital.png", pix2, IFF_PNG); - pixDestroy(&pix2); - - /* Assuming the image represents 6 inches of actual page width, - * the pixs resolution is approximately - * (width of pixs in pixels) / 6 - * and the images have been saved at half this resolution. */ - res = pixGetWidth(pixs) / 12; - L_INFO("resolution = %d\n", procName, res); - l_pdfSetDateAndVersion(0); - pixaConvertToPdf(pixa1, res, 1.0, L_FLATE_ENCODE, 75, "Italic Finder", - "/tmp/lept/ital/ital.pdf"); - l_pdfSetDateAndVersion(1); - pixaDestroy(&pixa1); - boxaDestroy(&boxat); - - /* Plot histogram of horizontal white run sizes. A small - * initial vertical dilation removes most runs that are neither - * inter-character nor inter-word. The larger first peak is - * from inter-character runs, and the smaller second peak is - * from inter-word runs. */ - pix1 = pixDilateBrick(NULL, pixs, 1, 15); - upper = L_MAX(30, 3 * size); - na = pixRunHistogramMorph(pix1, L_RUN_OFF, L_HORIZ, upper); - pixDestroy(&pix1); - gplot = gplotCreate("/tmp/lept/ital/runhisto", GPLOT_PNG, - "Histogram of horizontal runs of white pixels, vs length", - "run length", "number of runs"); - gplotAddPlot(gplot, NULL, na, GPLOT_LINES, "plot1"); - gplotMakeOutput(gplot); - gplotDestroy(&gplot); - numaDestroy(&na); - } - - selDestroy(&sel_ital1); - selDestroy(&sel_ital2); - selDestroy(&sel_ital3); - pixDestroy(&pixsd); - pixDestroy(&pixm); - pixDestroy(&pixd); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/flipdetect.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/flipdetect.c deleted file mode 100644 index 49f14f78..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/flipdetect.c +++ /dev/null @@ -1,1167 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file flipdetect.c - *
- *
- *      High-level interface for detection and correction
- *          l_int32      pixOrientCorrect()
- *
- *      Page orientation detection (pure rotation by 90 degree increments):
- *          l_int32      pixOrientDetect()
- *          l_int32      makeOrientDecision()
- *          l_int32      pixUpDownDetect()
- *          l_int32      pixUpDownDetectGeneral()
- *          l_int32      pixOrientDetectDwa()
- *          l_int32      pixUpDownDetectDwa()
- *          l_int32      pixUpDownDetectGeneralDwa()
- *
- *      Page mirror detection (flip 180 degrees about line in plane of image):
- *          l_int32      pixMirrorDetect()
- *          l_int32      pixMirrorDetectDwa()
- *
- *      Static debug helper
- *          void         pixDebugFlipDetect()
- *
- *  ===================================================================
- *
- *  Page transformation detection:
- *
- *  Once a page is deskewed, there are 8 possible states that it
- *  can be in, shown symbolically below.  Suppose state 0 is correct.
- *
- *      0: correct     1          2          3
- *      +------+   +------+   +------+   +------+
- *      | **** |   | *    |   | **** |   |    * |
- *      | *    |   | *    |   |    * |   |    * |
- *      | *    |   | **** |   |    * |   | **** |
- *      +------+   +------+   +------+   +------+
- *
- *         4          5          6          7
- *      +-----+    +-----+    +-----+    +-----+
- *      | *** |    |   * |    | *** |    | *   |
- *      |   * |    |   * |    | *   |    | *   |
- *      |   * |    |   * |    | *   |    | *   |
- *      |   * |    | *** |    | *   |    | *** |
- *      +-----+    +-----+    +-----+    +-----+
- *
- *  Each of the other seven can be derived from state 0 by applying some
- *  combination of a 90 degree clockwise rotation, a flip about
- *  a horizontal line, and a flip about a vertical line,
- *  all abbreviated as:
- *      R = Rotation (about a line perpendicular to the image)
- *      H = Horizontal flip (about a vertical line in the plane of the image)
- *      V = Vertical flip (about a horizontal line in the plane of the image)
- *
- *  We get these transformations:
- *      RHV
- *      000  -> 0
- *      001  -> 1
- *      010  -> 2
- *      011  -> 3
- *      100  -> 4
- *      101  -> 5
- *      110  -> 6
- *      111  -> 7
- *
- *  Note that in four of these, the sum of H and V is 1 (odd).
- *  For these four, we have a change in parity (handedness) of
- *  the image, and the transformation cannot be performed by
- *  rotation about a vertical line out of the page.   Under
- *  rotation R, the set of 8 transformations decomposes into
- *  two subgroups linking {0, 3, 4, 7} and {1, 2, 5, 6} independently.
- *
- *  pixOrientDetect*() tests for a pure rotation (0, 90, 180, 270 degrees).
- *  It doesn't change parity.
- *
- *  pixMirrorDetect*() tests for a horizontal flip about the vertical axis.
- *  It changes parity.
- *
- *  The landscape/portrait rotation can be detected in two ways:
- *
- *    (1) Compute the deskew confidence for an image segment,
- *        both as is and rotated 90 degrees  (see skew.c).
- *
- *    (2) Compute the ascender/descender signal for the image,
- *        both as is and rotated 90 degrees  (implemented here).
- *
- *  The ascender/descender signal is useful for determining text
- *  orientation in Roman alphabets because the incidence of letters
- *  with straight-line ascenders (b, d, h, k, l, 't') outnumber
- *  those with descenders ('g', p, q).  The letters 't' and 'g'
- *  will respond variably to the filter, depending on the type face.
- *
- *  What about the mirror image situations?  These aren't common
- *  unless you're dealing with film, for example.
- *  But you can reliably test if the image has undergone a
- *  parity-changing flip once about some axis in the plane
- *  of the image, using pixMirrorDetect*().  This works ostensibly by
- *  counting the number of characters with ascenders that
- *  stick out to the left and right of the ascender.  Characters
- *  that are not mirror flipped are more likely to extend to the
- *  right (b, h, k) than to the left (d).  Of course, that is for
- *  text that is rightside-up.  So before you apply the mirror
- *  test, it is necessary to insure that the text has the ascenders
- *  going up, and not down or to the left or right.  But here's
- *  what *really* happens.  It turns out that the pre-filtering before
- *  the hit-miss transform (HMT) is crucial, and surprisingly, when
- *  the pre-filtering is chosen to generate a large signal, the majority
- *  of the signal comes from open regions of common lower-case
- *  letters such as 'e', 'c' and 'f'.
- *
- *  All operations are given in two implementations whose results are
- *  identical: rasterop morphology and dwa morphology.  The dwa
- *  implementations are between 2x and 3x faster.
- *
- *  The set of operations you actually use depends on your prior knowledge:
- *
- *  (1) If the page is known to be either rightside-up or upside-down, use
- *      either pixOrientDetect*() with pleftconf = NULL, or
- *      pixUpDownDetect*().   [The '*' refers to either the rasterop
- *      or dwa versions.]
- *
- *  (2) If any of the four orientations are possible, use pixOrientDetect*().
- *
- *  (3) If the text is horizontal and rightside-up, the only remaining
- *      degree of freedom is a left-right mirror flip: use
- *      pixMirrorDetect*().
- *
- *  (4) If you have a relatively large amount of numbers on the page,
- *      us the slower pixUpDownDetectGeneral().
- *
- *  We summarize the full orientation and mirror flip detection process:
- *
- *  (1) First determine which of the four 90 degree rotations
- *      causes the text to be rightside-up.  This can be done
- *      with either skew confidence or the pixOrientDetect*()
- *      signals.  For the latter, see the table for pixOrientDetect().
- *
- *  (2) Then, with ascenders pointing up, apply pixMirrorDetect*().
- *      In the normal situation the confidence confidence will be
- *      large and positive.  However, if mirror flipped, the
- *      confidence will be large and negative.
- *
- *  A high-level interface, pixOrientCorrect() combines the detection
- *  of the orientation with the rotation decision and the rotation itself.
- *
- *  Finally, use can be made of programs such as exiftool and convert to
- *  read exif camera orientation data in jpeg files and conditionally rotate.
- *  Here is an example shell script, made by Dan9er:
- *  ==================================================================
- *  #!/bin/sh
- *  #   orientByExif.sh
- *  #   Dependencies: exiftool (exiflib) and convert (ImageMagick)
- *  #   Note: if there is no exif orientation data in the jpeg file,
- *  #         this simply copies the input file.
- *  #
- *  if [[ -z $(command -v exiftool) || -z $(command -v convert) ]]; then
- *      echo "You need to install dependencies; e.g.:"
- *      echo "   sudo apt install libimage-exiftool-perl"
- *      echo "   sudo apt install imagemagick"
- *      exit 1
- *  fi
- *  if [[ $# != 2 ]]; then
- *      echo "Syntax: orientByExif infile outfile"
- *      exit 2
- *  fi
- *  if [[ ${1: -4} != ".jpg" ]]; then
- *      echo "File is not a jpeg"
- *      exit 3
- *  fi
- *  if [[ $(exiftool -s3 -n -Orientation "$1") = 1 ]]; then
- *      echo "Image is already upright"
- *      exit 0
- *  fi
- *  convert "$1" -auto-orient "$2"
- *  echo "Done"
- *  exit 0
- *  ==================================================================
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Sels for pixOrientDetect() and pixMirrorDetect() */ -static const char *textsel1 = "x oo " - "x oOo " - "x o " - "x " - "xxxxxx"; - -static const char *textsel2 = " oo x" - " oOo x" - " o x" - " x" - "xxxxxx"; - -static const char *textsel3 = "xxxxxx" - "x " - "x o " - "x oOo " - "x oo "; - -static const char *textsel4 = "xxxxxx" - " x" - " o x" - " oOo x" - " oo x"; - - /* Parameters for determining orientation */ -static const l_int32 DefaultMinUpDownCount = 70; -static const l_float32 DefaultMinUpDownConf = 8.0; -static const l_float32 DefaultMinUpDownRatio = 2.5; - - /* Parameters for determining mirror flip */ -static const l_int32 DefaultMinMirrorFlipCount = 100; -static const l_float32 DefaultMinMirrorFlipConf = 5.0; - - /* Static debug function */ -static void pixDebugFlipDetect(const char *filename, PIX *pixs, - PIX *pixhm, l_int32 enable); - - -/*----------------------------------------------------------------* - * High-level interface for detection and correction * - *----------------------------------------------------------------*/ -/*! - * \brief pixOrientCorrect() - * - * \param[in] pixs 1 bpp, deskewed, English text, 150 - 300 ppi - * \param[in] minupconf minimum value for which a decision can be made - * \param[in] minratio minimum conf ratio required for a decision - * \param[out] pupconf [optional] ; use NULL to skip - * \param[out] pleftconf [optional] ; use NULL to skip - * \param[out] protation [optional] ; use NULL to skip - * \param[in] debug 1 for debug output; 0 otherwise - * \return pixd may be rotated by 90, 180 or 270; null on error - * - *
- * Notes:
- *      (1) Simple top-level function to detect if Roman text is in
- *          reading orientation, and to rotate the image accordingly if not.
- *      (2) Returns a copy if no rotation is needed.
- *      (3) See notes for pixOrientDetect() and pixOrientDecision().
- *          Use 0.0 for default values for %minupconf and %minratio
- *      (4) Optional output of intermediate confidence results and
- *          the rotation performed on pixs.
- * 
- */ -PIX * -pixOrientCorrect(PIX *pixs, - l_float32 minupconf, - l_float32 minratio, - l_float32 *pupconf, - l_float32 *pleftconf, - l_int32 *protation, - l_int32 debug) -{ -l_int32 orient; -l_float32 upconf, leftconf; -PIX *pix1; - - PROCNAME("pixOrientCorrect"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - - /* Get confidences for orientation */ - pixUpDownDetectDwa(pixs, &upconf, 0, debug); - pix1 = pixRotate90(pixs, 1); - pixUpDownDetectDwa(pix1, &leftconf, 0, debug); - pixDestroy(&pix1); - if (pupconf) *pupconf = upconf; - if (pleftconf) *pleftconf = leftconf; - - /* Decide what to do */ - makeOrientDecision(upconf,leftconf, minupconf, minratio, &orient, debug); - - /* Do it */ - switch (orient) - { - case L_TEXT_ORIENT_UNKNOWN: - L_INFO("text orientation not determined; no rotation\n", procName); - if (protation) *protation = 0; - return pixCopy(NULL, pixs); - break; - case L_TEXT_ORIENT_UP: - L_INFO("text is oriented up; no rotation\n", procName); - if (protation) *protation = 0; - return pixCopy(NULL, pixs); - break; - case L_TEXT_ORIENT_LEFT: - L_INFO("landscape; text oriented left; 90 cw rotation\n", procName); - if (protation) *protation = 90; - return pixRotateOrth(pixs, 1); - break; - case L_TEXT_ORIENT_DOWN: - L_INFO("text oriented down; 180 cw rotation\n", procName); - if (protation) *protation = 180; - return pixRotateOrth(pixs, 2); - break; - case L_TEXT_ORIENT_RIGHT: - L_INFO("landscape; text oriented right; 270 cw rotation\n", procName); - if (protation) *protation = 270; - return pixRotateOrth(pixs, 3); - break; - default: - L_ERROR("invalid orient flag!\n", procName); - return pixCopy(NULL, pixs); - } -} - - -/*----------------------------------------------------------------* - * Orientation detection (four 90 degree angles) * - * Rasterop implementation * - *----------------------------------------------------------------*/ -/*! - * \brief pixOrientDetect() - * - * \param[in] pixs 1 bpp, deskewed, English text, 150 - 300 ppi - * \param[out] pupconf [optional] ; may be NULL - * \param[out] pleftconf [optional] ; may be NULL - * \param[in] mincount min number of up + down; use 0 for default - * \param[in] debug 1 for debug output; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See "Measuring document image skew and orientation"
- *          Dan S. Bloomberg, Gary E. Kopec and Lakshmi Dasari
- *          IS&T/SPIE EI'95, Conference 2422: Document Recognition II
- *          pp 302-316, Feb 6-7, 1995, San Jose, CA
- *      (2) upconf is the normalized difference between up ascenders
- *          and down ascenders.  The image is analyzed without rotation
- *          for being rightside-up or upside-down.  Set &upconf to null
- *          to skip this operation.
- *      (3) leftconf is the normalized difference between up ascenders
- *          and down ascenders in the image after it has been
- *          rotated 90 degrees clockwise.  With that rotation, ascenders
- *          projecting to the left in the source image will project up
- *          in the rotated image.  We compute this by rotating 90 degrees
- *          clockwise and testing for up and down ascenders.  Set
- *          &leftconf to null to skip this operation.
- *      (4) Note that upconf and leftconf are not linear measures of
- *          confidence, e.g., in a range between 0 and 100.  They
- *          measure how far you are out on the tail of a (presumably)
- *          normal distribution.  For example, a confidence of 10 means
- *          that it is nearly certain that the difference did not
- *          happen at random.  However, these values must be interpreted
- *          cautiously, taking into consideration the estimated prior
- *          for a particular orientation or mirror flip.   The up-down
- *          signal is very strong if applied to text with ascenders
- *          up and down, and relatively weak for text at 90 degrees,
- *          but even at 90 degrees, the difference can look significant.
- *          For example, suppose the ascenders are oriented horizontally,
- *          but the test is done vertically.  Then upconf can
- *          be < -MIN_CONF_FOR_UP_DOWN, suggesting the text may be
- *          upside-down.  However, if instead the test were done
- *          horizontally, leftconf will be very much larger
- *          (in absolute value), giving the correct orientation.
- *      (5) If you compute both upconf and leftconf, and there is
- *          sufficient signal, the following table determines the
- *          cw angle necessary to rotate pixs so that the text is
- *          rightside-up:
- *             0 deg :           upconf >> 1,    abs(upconf) >> abs(leftconf)
- *             90 deg :          leftconf >> 1,  abs(leftconf) >> abs(upconf)
- *             180 deg :         upconf << -1,   abs(upconf) >> abs(leftconf)
- *             270 deg :         leftconf << -1, abs(leftconf) >> abs(upconf)
- *      (6) One should probably not interpret the direction unless
- *          there are a sufficient number of counts for both orientations,
- *          in which case neither upconf nor leftconf will be 0.0.
- *      (7) This algorithm will fail on some images, such as tables,
- *          where most of the characters are numbers and appear as
- *          uppercase, but there are some repeated words that give a
- *          biased signal.  It may be advisable to run a table detector
- *          first (e.g., pixDecideIfTable()), and not run the orientation
- *          detector if it is a table.
- *      (8) Uses rasterop implementation of HMT.
- * 
- */ -l_ok -pixOrientDetect(PIX *pixs, - l_float32 *pupconf, - l_float32 *pleftconf, - l_int32 mincount, - l_int32 debug) -{ -PIX *pix1; - - PROCNAME("pixOrientDetect"); - - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (!pupconf && !pleftconf) - return ERROR_INT("nothing to do", procName, 1); - if (mincount == 0) - mincount = DefaultMinUpDownCount; - - if (pupconf) - pixUpDownDetect(pixs, pupconf, mincount, debug); - if (pleftconf) { - pix1 = pixRotate90(pixs, 1); - pixUpDownDetect(pix1, pleftconf, mincount, debug); - pixDestroy(&pix1); - } - - return 0; -} - - -/*! - * \brief makeOrientDecision() - * - * \param[in] upconf nonzero - * \param[in] leftconf nonzero - * \param[in] minupconf minimum value for which a decision can be made - * \param[in] minratio minimum conf ratio required for a decision - * \param[out] porient text orientation enum {0,1,2,3,4} - * \param[in] debug 1 for debug output; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This can be run after pixOrientDetect()
- *      (2) Both upconf and leftconf must be nonzero; otherwise the
- *          orientation cannot be determined.
- *      (3) The abs values of the input confidences are compared to
- *          minupconf.
- *      (4) The abs value of the largest of (upconf/leftconf) and
- *          (leftconf/upconf) is compared with minratio.
- *      (5) Input 0.0 for the default values for minupconf and minratio.
- *      (6) The return value of orient is interpreted thus:
- *            L_TEXT_ORIENT_UNKNOWN:  not enough evidence to determine
- *            L_TEXT_ORIENT_UP:       text rightside-up
- *            L_TEXT_ORIENT_LEFT:     landscape, text up facing left
- *            L_TEXT_ORIENT_DOWN:     text upside-down
- *            L_TEXT_ORIENT_RIGHT:    landscape, text up facing right
- * 
- */ -l_ok -makeOrientDecision(l_float32 upconf, - l_float32 leftconf, - l_float32 minupconf, - l_float32 minratio, - l_int32 *porient, - l_int32 debug) -{ -l_float32 absupconf, absleftconf; - - PROCNAME("makeOrientDecision"); - - if (!porient) - return ERROR_INT("&orient not defined", procName, 1); - *porient = L_TEXT_ORIENT_UNKNOWN; /* default: no decision */ - if (upconf == 0.0 || leftconf == 0.0) { - L_INFO("not enough confidence to get orientation\n", procName); - return 0; - } - - if (minupconf == 0.0) - minupconf = DefaultMinUpDownConf; - if (minratio == 0.0) - minratio = DefaultMinUpDownRatio; - absupconf = L_ABS(upconf); - absleftconf = L_ABS(leftconf); - - /* Here are the four possible orientation decisions, based - * on satisfaction of two threshold constraints. */ - if (upconf > minupconf && absupconf > minratio * absleftconf) - *porient = L_TEXT_ORIENT_UP; - else if (leftconf > minupconf && absleftconf > minratio * absupconf) - *porient = L_TEXT_ORIENT_LEFT; - else if (upconf < -minupconf && absupconf > minratio * absleftconf) - *porient = L_TEXT_ORIENT_DOWN; - else if (leftconf < -minupconf && absleftconf > minratio * absupconf) - *porient = L_TEXT_ORIENT_RIGHT; - - if (debug) { - lept_stderr("upconf = %7.3f, leftconf = %7.3f\n", upconf, leftconf); - if (*porient == L_TEXT_ORIENT_UNKNOWN) - lept_stderr("Confidence is low; no determination is made\n"); - else if (*porient == L_TEXT_ORIENT_UP) - lept_stderr("Text is rightside-up\n"); - else if (*porient == L_TEXT_ORIENT_LEFT) - lept_stderr("Text is rotated 90 deg ccw\n"); - else if (*porient == L_TEXT_ORIENT_DOWN) - lept_stderr("Text is upside-down\n"); - else /* *porient == L_TEXT_ORIENT_RIGHT */ - lept_stderr("Text is rotated 90 deg cw\n"); - } - - return 0; -} - - -/*! - * \brief pixUpDownDetect() - * - * \param[in] pixs 1 bpp, deskewed, English text, 150 - 300 ppi - * \param[out] pconf confidence that text is rightside-up - * \param[in] mincount min number of up + down; use 0 for default - * \param[in] debug 1 for debug output; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Special (typical, slightly faster) case, where the pixels
- *          identified through the HMT (hit-miss transform) are not
- *          clipped by a truncated word mask pixm.  See pixOrientDetect()
- *          and pixUpDownDetectGeneral() for details.
- *      (2) The returned confidence is the normalized difference
- *          between the number of detected up and down ascenders,
- *          assuming that the text is either rightside-up or upside-down
- *          and not rotated at a 90 degree angle.
- * 
- */ -l_ok -pixUpDownDetect(PIX *pixs, - l_float32 *pconf, - l_int32 mincount, - l_int32 debug) -{ - return pixUpDownDetectGeneral(pixs, pconf, mincount, 0, debug); -} - - -/*! - * \brief pixUpDownDetectGeneral() - * - * \param[in] pixs 1 bpp, deskewed, English text, 150 - 300 ppi - * \param[out] pconf confidence that text is rightside-up - * \param[in] mincount min number of up + down; use 0 for default - * \param[in] npixels number of pixels removed from each side of word box - * \param[in] debug 1 for debug output; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See pixOrientDetect() for other details.
- *      (2) %conf is the normalized difference between the number of
- *          detected up and down ascenders, assuming that the text
- *          is either rightside-up or upside-down and not rotated
- *          at a 90 degree angle.
- *      (3) The typical mode of operation is %npixels == 0.
- *          If %npixels > 0, this removes HMT matches at the
- *          beginning and ending of "words."  This is useful for
- *          pages that may have mostly digits, because if npixels == 0,
- *          leading "1" and "3" digits can register as having
- *          ascenders or descenders, and "7" digits can match descenders.
- *          Consequently, a page image of only digits may register
- *          as being upside-down.
- *      (4) We want to count the number of instances found using the HMT.
- *          An expensive way to do this would be to count the
- *          number of connected components.  A cheap way is to do a rank
- *          reduction cascade that reduces each component to a single
- *          pixel, and results (after two or three 2x reductions)
- *          in one pixel for each of the original components.
- *          After the reduction, you have a much smaller pix over
- *          which to count pixels.  We do only 2 reductions, because
- *          this function is designed to work for input pix between
- *          150 and 300 ppi, and an 8x reduction on a 150 ppi image
- *          is going too far -- components will get merged.
- * 
- */ -l_ok -pixUpDownDetectGeneral(PIX *pixs, - l_float32 *pconf, - l_int32 mincount, - l_int32 npixels, - l_int32 debug) -{ -l_int32 countup, countdown, nmax; -l_float32 nup, ndown; -PIX *pix0, *pix1, *pix2, *pix3, *pixm; -SEL *sel1, *sel2, *sel3, *sel4; - - PROCNAME("pixUpDownDetectGeneral"); - - if (!pconf) - return ERROR_INT("&conf not defined", procName, 1); - *pconf = 0.0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (mincount == 0) - mincount = DefaultMinUpDownCount; - if (npixels < 0) - npixels = 0; - - if (debug) { - lept_mkdir("lept/orient"); - } - - sel1 = selCreateFromString(textsel1, 5, 6, NULL); - sel2 = selCreateFromString(textsel2, 5, 6, NULL); - sel3 = selCreateFromString(textsel3, 5, 6, NULL); - sel4 = selCreateFromString(textsel4, 5, 6, NULL); - - /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1). - * This closes holes in x-height characters and joins them at - * the x-height. There is more noise in the descender detection - * from this, but it works fairly well. */ - pix0 = pixMorphCompSequence(pixs, "c1.8 + c30.1", 0); - - /* Optionally, make a mask of the word bounding boxes, shortening - * each of them by a fixed amount at each end. */ - pixm = NULL; - if (npixels > 0) { - l_int32 i, nbox, x, y, w, h; - BOX *box; - BOXA *boxa; - pix1 = pixMorphSequence(pix0, "o10.1", 0); - boxa = pixConnComp(pix1, NULL, 8); - pixm = pixCreateTemplate(pix1); - pixDestroy(&pix1); - nbox = boxaGetCount(boxa); - for (i = 0; i < nbox; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - boxGetGeometry(box, &x, &y, &w, &h); - if (w > 2 * npixels) - pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13, - PIX_SET, NULL, 0, 0); - boxDestroy(&box); - } - boxaDestroy(&boxa); - } - - /* Find the ascenders and optionally filter with pixm. - * For an explanation of the procedure used for counting the result - * of the HMT, see comments at the beginning of this function. */ - pix1 = pixHMT(NULL, pix0, sel1); - pix2 = pixHMT(NULL, pix0, sel2); - pixOr(pix1, pix1, pix2); - if (pixm) - pixAnd(pix1, pix1, pixm); - pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); - pixCountPixels(pix3, &countup, NULL); - pixDebugFlipDetect("/tmp/lept/orient/up.png", pixs, pix1, debug); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - - /* Find the ascenders and optionally filter with pixm. */ - pix1 = pixHMT(NULL, pix0, sel3); - pix2 = pixHMT(NULL, pix0, sel4); - pixOr(pix1, pix1, pix2); - if (pixm) - pixAnd(pix1, pix1, pixm); - pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); - pixCountPixels(pix3, &countdown, NULL); - pixDebugFlipDetect("/tmp/lept/orient/down.png", pixs, pix1, debug); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - - /* Evaluate statistically, generating a confidence that is - * related to the probability with a gaussian distribution. */ - nup = (l_float32)(countup); - ndown = (l_float32)(countdown); - nmax = L_MAX(countup, countdown); - if (nmax > mincount) - *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown)); - - if (debug) { - if (pixm) pixWriteDebug("/tmp/lept/orient/pixm1.png", pixm, IFF_PNG); - lept_stderr("nup = %7.3f, ndown = %7.3f, conf = %7.3f\n", - nup, ndown, *pconf); - if (*pconf > DefaultMinUpDownConf) - lept_stderr("Text is rightside-up\n"); - if (*pconf < -DefaultMinUpDownConf) - lept_stderr("Text is upside-down\n"); - } - - pixDestroy(&pix0); - pixDestroy(&pixm); - selDestroy(&sel1); - selDestroy(&sel2); - selDestroy(&sel3); - selDestroy(&sel4); - return 0; -} - - -/*----------------------------------------------------------------* - * Orientation detection (four 90 degree angles) * - * DWA implementation * - *----------------------------------------------------------------*/ -/*! - * \brief pixOrientDetectDwa() - * - * \param[in] pixs 1 bpp, deskewed, English text - * \param[out] pupconf [optional] ; may be NULL - * \param[out] pleftconf [optional] ; may be NULL - * \param[in] mincount min number of up + down; use 0 for default - * \param[in] debug 1 for debug output; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Same interface as for pixOrientDetect().  See notes
- *          there for usage.
- *      (2) Uses auto-gen'd code for the Sels defined at the
- *          top of this file, with some renaming of functions.
- *          The auto-gen'd code is in fliphmtgen.c, and can
- *          be generated by a simple executable; see prog/flipselgen.c.
- *      (3) This runs about 2.5 times faster than the pixOrientDetect().
- * 
- */ -l_ok -pixOrientDetectDwa(PIX *pixs, - l_float32 *pupconf, - l_float32 *pleftconf, - l_int32 mincount, - l_int32 debug) -{ -PIX *pix1; - - PROCNAME("pixOrientDetectDwa"); - - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (!pupconf && !pleftconf) - return ERROR_INT("nothing to do", procName, 1); - if (mincount == 0) - mincount = DefaultMinUpDownCount; - - if (pupconf) - pixUpDownDetectDwa(pixs, pupconf, mincount, debug); - if (pleftconf) { - pix1 = pixRotate90(pixs, 1); - pixUpDownDetectDwa(pix1, pleftconf, mincount, debug); - pixDestroy(&pix1); - } - - return 0; -} - - -/*! - * \brief pixUpDownDetectDwa() - * - * \param[in] pixs 1 bpp, deskewed, English text, 150 - 300 ppi - * \param[out] pconf confidence that text is rightside-up - * \param[in] mincount min number of up + down; use 0 for default - * \param[in] debug 1 for debug output; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Faster (DWA) version of pixUpDownDetect().
- *      (2) This is a special case (but typical and slightly faster) of
- *          pixUpDownDetectGeneralDwa(), where the pixels identified
- *          through the HMT (hit-miss transform) are not clipped by
- *          a truncated word mask pixm.  See pixUpDownDetectGeneral()
- *          for usage and other details.
- *      (3) The returned confidence is the normalized difference
- *          between the number of detected up and down ascenders,
- *          assuming that the text is either rightside-up or upside-down
- *          and not rotated at a 90 degree angle.
- * 
- */ -l_ok -pixUpDownDetectDwa(PIX *pixs, - l_float32 *pconf, - l_int32 mincount, - l_int32 debug) -{ - return pixUpDownDetectGeneralDwa(pixs, pconf, mincount, 0, debug); -} - - -/*! - * \brief pixUpDownDetectGeneralDwa() - * - * \param[in] pixs 1 bpp, deskewed, English text - * \param[out] pconf confidence that text is rightside-up - * \param[in] mincount min number of up + down; use 0 for default - * \param[in] npixels number of pixels removed from each side of word box - * \param[in] debug 1 for debug output; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See the notes in pixUpDownDetectGeneral() for usage.
- * 
- */ -l_ok -pixUpDownDetectGeneralDwa(PIX *pixs, - l_float32 *pconf, - l_int32 mincount, - l_int32 npixels, - l_int32 debug) -{ -char flipsel1[] = "flipsel1"; -char flipsel2[] = "flipsel2"; -char flipsel3[] = "flipsel3"; -char flipsel4[] = "flipsel4"; -l_int32 countup, countdown, nmax; -l_float32 nup, ndown; -PIX *pixt, *pix0, *pix1, *pix2, *pix3, *pixm; - - PROCNAME("pixUpDownDetectGeneralDwa"); - - if (!pconf) - return ERROR_INT("&conf not defined", procName, 1); - *pconf = 0.0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (mincount == 0) - mincount = DefaultMinUpDownCount; - if (npixels < 0) - npixels = 0; - - /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1). - * This closes holes in x-height characters and joins them at - * the x-height. There is more noise in the descender detection - * from this, but it works fairly well. */ - pixt = pixMorphSequenceDwa(pixs, "c1.8 + c30.1", 0); - - /* Be sure to add the border before the flip DWA operations! */ - pix0 = pixAddBorderGeneral(pixt, ADDED_BORDER, ADDED_BORDER, - ADDED_BORDER, ADDED_BORDER, 0); - pixDestroy(&pixt); - - /* Optionally, make a mask of the word bounding boxes, shortening - * each of them by a fixed amount at each end. */ - pixm = NULL; - if (npixels > 0) { - l_int32 i, nbox, x, y, w, h; - BOX *box; - BOXA *boxa; - pix1 = pixMorphSequenceDwa(pix0, "o10.1", 0); - boxa = pixConnComp(pix1, NULL, 8); - pixm = pixCreateTemplate(pix1); - pixDestroy(&pix1); - nbox = boxaGetCount(boxa); - for (i = 0; i < nbox; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - boxGetGeometry(box, &x, &y, &w, &h); - if (w > 2 * npixels) - pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13, - PIX_SET, NULL, 0, 0); - boxDestroy(&box); - } - boxaDestroy(&boxa); - } - - /* Find the ascenders and optionally filter with pixm. - * For an explanation of the procedure used for counting the result - * of the HMT, see comments in pixUpDownDetectGeneral(). */ - pix1 = pixFlipFHMTGen(NULL, pix0, flipsel1); - pix2 = pixFlipFHMTGen(NULL, pix0, flipsel2); - pixOr(pix1, pix1, pix2); - if (pixm) - pixAnd(pix1, pix1, pixm); - pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); - pixCountPixels(pix3, &countup, NULL); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - - /* Find the ascenders and optionally filter with pixm. */ - pix1 = pixFlipFHMTGen(NULL, pix0, flipsel3); - pix2 = pixFlipFHMTGen(NULL, pix0, flipsel4); - pixOr(pix1, pix1, pix2); - if (pixm) - pixAnd(pix1, pix1, pixm); - pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); - pixCountPixels(pix3, &countdown, NULL); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - - /* Evaluate statistically, generating a confidence that is - * related to the probability with a gaussian distribution. */ - nup = (l_float32)(countup); - ndown = (l_float32)(countdown); - nmax = L_MAX(countup, countdown); - if (nmax > mincount) - *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown)); - - if (debug) { - if (pixm) { - lept_mkdir("lept/orient"); - pixWriteDebug("/tmp/lept/orient/pixm2.png", pixm, IFF_PNG); - } - lept_stderr("nup = %7.3f, ndown = %7.3f, conf = %7.3f\n", - nup, ndown, *pconf); - if (*pconf > DefaultMinUpDownConf) - lept_stderr("Text is rightside-up\n"); - if (*pconf < -DefaultMinUpDownConf) - lept_stderr("Text is upside-down\n"); - } - - pixDestroy(&pix0); - pixDestroy(&pixm); - return 0; -} - - - -/*----------------------------------------------------------------* - * Left-right mirror detection * - * Rasterop implementation * - *----------------------------------------------------------------*/ -/*! - * \brief pixMirrorDetect() - * - * \param[in] pixs 1 bpp, deskewed, English text - * \param[out] pconf confidence that text is not LR mirror reversed - * \param[in] mincount min number of left + right; use 0 for default - * \param[in] debug 1 for debug output; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) For this test, it is necessary that the text is horizontally
- *          oriented, with ascenders going up.
- *      (2) conf is the normalized difference between the number of
- *          right and left facing characters with ascenders.
- *          Left-facing are {d}; right-facing are {b, h, k}.
- *          At least that was the expectation.  In practice, we can
- *          really just say that it is the normalized difference in
- *          hits using two specific hit-miss filters, textsel1 and textsel2,
- *          after the image has been suitably pre-filtered so that
- *          these filters are effective.  See (4) for what's really happening.
- *      (3) A large positive conf value indicates normal text, whereas
- *          a large negative conf value means the page is mirror reversed.
- *      (4) The implementation is a bit tricky.  The general idea is
- *          to fill the x-height part of characters, but not the space
- *          between them, before doing the HMT.  This is done by
- *          finding pixels added using two different operations -- a
- *          horizontal close and a vertical dilation -- and adding
- *          the intersection of these sets to the original.  It turns
- *          out that the original intuition about the signal was largely
- *          in error: much of the signal for right-facing characters
- *          comes from the lower part of common x-height characters, like
- *          the e and c, that remain open after these operations.
- *          So it's important that the operations to close the x-height
- *          parts of the characters are purposely weakened sufficiently
- *          to allow these characters to remain open.  The wonders
- *          of morphology!
- * 
- */ -l_ok -pixMirrorDetect(PIX *pixs, - l_float32 *pconf, - l_int32 mincount, - l_int32 debug) -{ -l_int32 count1, count2, nmax; -l_float32 nleft, nright; -PIX *pix0, *pix1, *pix2, *pix3; -SEL *sel1, *sel2; - - PROCNAME("pixMirrorDetect"); - - if (!pconf) - return ERROR_INT("&conf not defined", procName, 1); - *pconf = 0.0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (mincount == 0) - mincount = DefaultMinMirrorFlipCount; - - if (debug) { - lept_mkdir("lept/orient"); - } - - sel1 = selCreateFromString(textsel1, 5, 6, NULL); - sel2 = selCreateFromString(textsel2, 5, 6, NULL); - - /* Fill x-height characters but not space between them, sort of. */ - pix3 = pixMorphCompSequence(pixs, "d1.30", 0); - pixXor(pix3, pix3, pixs); - pix0 = pixMorphCompSequence(pixs, "c15.1", 0); - pixXor(pix0, pix0, pixs); - pixAnd(pix0, pix0, pix3); - pixOr(pix0, pix0, pixs); - pixDestroy(&pix3); - - /* Filter the right-facing characters. */ - pix1 = pixHMT(NULL, pix0, sel1); - pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); - pixCountPixels(pix3, &count1, NULL); - pixDebugFlipDetect("/tmp/lept/orient/right.png", pixs, pix1, debug); - pixDestroy(&pix1); - pixDestroy(&pix3); - - /* Filter the left-facing characters. */ - pix2 = pixHMT(NULL, pix0, sel2); - pix3 = pixReduceRankBinaryCascade(pix2, 1, 1, 0, 0); - pixCountPixels(pix3, &count2, NULL); - pixDebugFlipDetect("/tmp/lept/orient/left.png", pixs, pix2, debug); - pixDestroy(&pix2); - pixDestroy(&pix3); - - nright = (l_float32)count1; - nleft = (l_float32)count2; - nmax = L_MAX(count1, count2); - pixDestroy(&pix0); - selDestroy(&sel1); - selDestroy(&sel2); - - if (nmax > mincount) - *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft)); - - if (debug) { - lept_stderr("nright = %f, nleft = %f\n", nright, nleft); - if (*pconf > DefaultMinMirrorFlipConf) - lept_stderr("Text is not mirror reversed\n"); - if (*pconf < -DefaultMinMirrorFlipConf) - lept_stderr("Text is mirror reversed\n"); - } - - return 0; -} - - -/*----------------------------------------------------------------* - * Left-right mirror detection * - * DWA implementation * - *----------------------------------------------------------------*/ -/*! - * \brief pixMirrorDetectDwa() - * - * \param[in] pixs 1 bpp, deskewed, English text - * \param[out] pconf confidence that text is not LR mirror reversed - * \param[in] mincount min number of left + right; use 0 for default - * \param[in] debug 1 for debug output; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) We assume the text is horizontally oriented, with
- *          ascenders going up.
- *      (2) See notes in pixMirrorDetect().
- * 
- */ -l_ok -pixMirrorDetectDwa(PIX *pixs, - l_float32 *pconf, - l_int32 mincount, - l_int32 debug) -{ -char flipsel1[] = "flipsel1"; -char flipsel2[] = "flipsel2"; -l_int32 count1, count2, nmax; -l_float32 nleft, nright; -PIX *pix0, *pix1, *pix2, *pix3; - - PROCNAME("pixMirrorDetectDwa"); - - if (!pconf) - return ERROR_INT("&conf not defined", procName, 1); - *pconf = 0.0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (mincount == 0) - mincount = DefaultMinMirrorFlipCount; - - /* Fill x-height characters but not space between them, sort of. */ - pix3 = pixMorphSequenceDwa(pixs, "d1.30", 0); - pixXor(pix3, pix3, pixs); - pix0 = pixMorphSequenceDwa(pixs, "c15.1", 0); - pixXor(pix0, pix0, pixs); - pixAnd(pix0, pix0, pix3); - pixOr(pix3, pix0, pixs); - pixDestroy(&pix0); - pix0 = pixAddBorderGeneral(pix3, ADDED_BORDER, ADDED_BORDER, - ADDED_BORDER, ADDED_BORDER, 0); - pixDestroy(&pix3); - - /* Filter the right-facing characters. */ - pix1 = pixFlipFHMTGen(NULL, pix0, flipsel1); - pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); - pixCountPixels(pix3, &count1, NULL); - pixDestroy(&pix1); - pixDestroy(&pix3); - - /* Filter the left-facing characters. */ - pix2 = pixFlipFHMTGen(NULL, pix0, flipsel2); - pix3 = pixReduceRankBinaryCascade(pix2, 1, 1, 0, 0); - pixCountPixels(pix3, &count2, NULL); - pixDestroy(&pix2); - pixDestroy(&pix3); - - pixDestroy(&pix0); - nright = (l_float32)count1; - nleft = (l_float32)count2; - nmax = L_MAX(count1, count2); - - if (nmax > mincount) - *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft)); - - if (debug) { - lept_stderr("nright = %f, nleft = %f\n", nright, nleft); - if (*pconf > DefaultMinMirrorFlipConf) - lept_stderr("Text is not mirror reversed\n"); - if (*pconf < -DefaultMinMirrorFlipConf) - lept_stderr("Text is mirror reversed\n"); - } - - return 0; -} - - -/*----------------------------------------------------------------* - * Static debug helper * - *----------------------------------------------------------------*/ -/* - * \brief pixDebugFlipDetect() - * - * \param[in] filename for output debug file - * \param[in] pixs input to pix*Detect - * \param[in] pixhm hit-miss result from ascenders or descenders - * \param[in] enable 1 to enable this function; 0 to disable - * \return void - */ -static void -pixDebugFlipDetect(const char *filename, - PIX *pixs, - PIX *pixhm, - l_int32 enable) -{ -PIX *pixt, *pixthm; - - if (!enable) return; - - /* Display with red dot at counted locations */ - pixt = pixConvert1To4Cmap(pixs); - pixthm = pixMorphSequence(pixhm, "d5.5", 0); - pixSetMaskedCmap(pixt, pixthm, 0, 0, 255, 0, 0); - - pixWriteDebug(filename, pixt, IFF_PNG); - pixDestroy(&pixthm); - pixDestroy(&pixt); - return; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fliphmtgen.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fliphmtgen.c deleted file mode 100644 index 2d76edad..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fliphmtgen.c +++ /dev/null @@ -1,360 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/* - * fliphmtgen.c - * - * DWA implementation of hit-miss transforms with auto-generated sels - * for pixOrientDetectDwa() and pixUpDownDetectDwa() in flipdetect.c - * - * PIX *pixFlipFHMTGen() - * static l_int32 flipfhmtgen_low() -- dispatcher - * static void fhmt_1_0() - * static void fhmt_1_1() - * static void fhmt_1_2() - * static void fhmt_1_3() - * - * The code (rearranged) was generated by prog/flipselgen.c - */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static l_int32 NUM_SELS_GENERATED = 4; -static char SEL_NAMES[][10] = {"flipsel1", - "flipsel2", - "flipsel3", - "flipsel4"}; - -static l_int32 flipfhmtgen_low(l_uint32 *, l_int32, l_int32, l_int32, - l_uint32 *, l_int32, l_int32); - -static void fhmt_1_0(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, - l_int32); -static void fhmt_1_1(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, - l_int32); -static void fhmt_1_2(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, - l_int32); -static void fhmt_1_3(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, - l_int32); - -/*---------------------------------------------------------------------* - * Top-level hmt functions * - *---------------------------------------------------------------------*/ -/* - * pixFlipFHMTGen() - * - * Input: pixd (usual 3 choices: null, == pixs, != pixs) - * pixs - * sel name (one of four defined in SEL_NAMES[]) - * Return: pixd - * - * Notes: - * Action: hit-miss transform on pixs by the sel - * N.B.: the sel must have at least one hit, and it - * can have any number of misses. - */ -PIX * -pixFlipFHMTGen(PIX *pixd, - PIX *pixs, - const char *selname) -{ -l_int32 i, index, found, w, h, wpls, wpld; -l_uint32 *datad, *datas, *datat; -PIX *pixt; - - PROCNAME("pixFlipFHMTGen"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs must be 1 bpp", procName, pixd); - - found = FALSE; - for (i = 0; i < NUM_SELS_GENERATED; i++) { - if (strcmp(selname, SEL_NAMES[i]) == 0) { - found = TRUE; - index = i; - break; - } - } - if (found == FALSE) - return (PIX *)ERROR_PTR("sel index not found", procName, pixd); - - if (pixd) { - if (!pixSizesEqual(pixs, pixd)) - return (PIX *)ERROR_PTR("sizes not equal", procName, pixd); - } else { - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - - /* The images must be surrounded with ADDED_BORDER white pixels, - * that we'll read from. We fabricate a "proper" - * image as the subimage within the border, having the - * following parameters: */ - w = pixGetWidth(pixs) - 2 * ADDED_BORDER; - h = pixGetHeight(pixs) - 2 * ADDED_BORDER; - datas = pixGetData(pixs) + ADDED_BORDER * wpls + ADDED_BORDER / 32; - datad = pixGetData(pixd) + ADDED_BORDER * wpld + ADDED_BORDER / 32; - - if (pixd == pixs) { /* need temp image if in-place */ - if ((pixt = pixCopy(NULL, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, pixd); - datat = pixGetData(pixt) + ADDED_BORDER * wpls + ADDED_BORDER / 32; - flipfhmtgen_low(datad, w, h, wpld, datat, wpls, index); - pixDestroy(&pixt); - } else { /* simple and not in-place */ - flipfhmtgen_low(datad, w, h, wpld, datas, wpls, index); - } - - return pixd; -} - - -/*---------------------------------------------------------------------* - * Fast hmt dispatcher * - *---------------------------------------------------------------------*/ -/* - * flipfhmtgen_low() - * - * A dispatcher to appropriate low-level code for flip hmt ops - */ -static l_int32 -flipfhmtgen_low(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_int32 index) -{ - - switch (index) - { - case 0: - fhmt_1_0(datad, w, h, wpld, datas, wpls); - break; - case 1: - fhmt_1_1(datad, w, h, wpld, datas, wpls); - break; - case 2: - fhmt_1_2(datad, w, h, wpld, datas, wpls); - break; - case 3: - fhmt_1_3(datad, w, h, wpld, datas, wpls); - break; - } - - return 0; -} - - -/*--------------------------------------------------------------------------* - * Low-level auto-generated hmt routines * - *--------------------------------------------------------------------------*/ -/* - * N.B. in all the low-level routines, the part of the image - * that is accessed has been clipped by ADDED_BORDER pixels - * on all four sides. This is done in the higher level - * code by redefining w and h smaller and by moving the - * start-of-image pointers up to the beginning of this - * interior rectangle. - */ - -static void -fhmt_1_0(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr - wpls) >> 3) | (*(sptr - wpls - 1) << 29)) & - (~*(sptr - wpls)) & - ((~*(sptr - wpls) << 1) | (~*(sptr - wpls + 1) >> 31)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((~*(sptr) >> 1) | (~*(sptr - 1) << 31)) & - (~*sptr) & - ((~*(sptr) << 1) | (~*(sptr + 1) >> 31)) & - ((*(sptr + wpls) >> 3) | (*(sptr + wpls - 1) << 29)) & - (~*(sptr + wpls)) & - ((*(sptr + wpls2) >> 3) | (*(sptr + wpls2 - 1) << 29)) & - ((*(sptr + wpls3) >> 3) | (*(sptr + wpls3 - 1) << 29)) & - ((*(sptr + wpls3) >> 2) | (*(sptr + wpls3 - 1) << 30)) & - ((*(sptr + wpls3) >> 1) | (*(sptr + wpls3 - 1) << 31)) & - (*(sptr + wpls3)) & - ((*(sptr + wpls3) << 1) | (*(sptr + wpls3 + 1) >> 31)) & - ((*(sptr + wpls3) << 2) | (*(sptr + wpls3 + 1) >> 30)); - } - } -} - - -static void -fhmt_1_1(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((~*(sptr - wpls) >> 1) | (~*(sptr - wpls - 1) << 31)) & - (~*(sptr - wpls)) & - ((*(sptr - wpls) << 3) | (*(sptr - wpls + 1) >> 29)) & - ((~*(sptr) >> 1) | (~*(sptr - 1) << 31)) & - (~*sptr) & - ((~*(sptr) << 1) | (~*(sptr + 1) >> 31)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - (~*(sptr + wpls)) & - ((*(sptr + wpls) << 3) | (*(sptr + wpls + 1) >> 29)) & - ((*(sptr + wpls2) << 3) | (*(sptr + wpls2 + 1) >> 29)) & - ((*(sptr + wpls3) >> 2) | (*(sptr + wpls3 - 1) << 30)) & - ((*(sptr + wpls3) >> 1) | (*(sptr + wpls3 - 1) << 31)) & - (*(sptr + wpls3)) & - ((*(sptr + wpls3) << 1) | (*(sptr + wpls3 + 1) >> 31)) & - ((*(sptr + wpls3) << 2) | (*(sptr + wpls3 + 1) >> 30)) & - ((*(sptr + wpls3) << 3) | (*(sptr + wpls3 + 1) >> 29)); - } - } -} - - -static void -fhmt_1_2(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr - wpls3) >> 3) | (*(sptr - wpls3 - 1) << 29)) & - ((*(sptr - wpls3) >> 2) | (*(sptr - wpls3 - 1) << 30)) & - ((*(sptr - wpls3) >> 1) | (*(sptr - wpls3 - 1) << 31)) & - (*(sptr - wpls3)) & - ((*(sptr - wpls3) << 1) | (*(sptr - wpls3 + 1) >> 31)) & - ((*(sptr - wpls3) << 2) | (*(sptr - wpls3 + 1) >> 30)) & - ((*(sptr - wpls2) >> 3) | (*(sptr - wpls2 - 1) << 29)) & - ((*(sptr - wpls) >> 3) | (*(sptr - wpls - 1) << 29)) & - (~*(sptr - wpls)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((~*(sptr) >> 1) | (~*(sptr - 1) << 31)) & - (~*sptr) & - ((~*(sptr) << 1) | (~*(sptr + 1) >> 31)) & - ((*(sptr + wpls) >> 3) | (*(sptr + wpls - 1) << 29)) & - (~*(sptr + wpls)) & - ((~*(sptr + wpls) << 1) | (~*(sptr + wpls + 1) >> 31)); - } - } -} - - -static void -fhmt_1_3(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr - wpls3) >> 2) | (*(sptr - wpls3 - 1) << 30)) & - ((*(sptr - wpls3) >> 1) | (*(sptr - wpls3 - 1) << 31)) & - (*(sptr - wpls3)) & - ((*(sptr - wpls3) << 1) | (*(sptr - wpls3 + 1) >> 31)) & - ((*(sptr - wpls3) << 2) | (*(sptr - wpls3 + 1) >> 30)) & - ((*(sptr - wpls3) << 3) | (*(sptr - wpls3 + 1) >> 29)) & - ((*(sptr - wpls2) << 3) | (*(sptr - wpls2 + 1) >> 29)) & - (~*(sptr - wpls)) & - ((*(sptr - wpls) << 3) | (*(sptr - wpls + 1) >> 29)) & - ((~*(sptr) >> 1) | (~*(sptr - 1) << 31)) & - (~*sptr) & - ((~*(sptr) << 1) | (~*(sptr + 1) >> 31)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((~*(sptr + wpls) >> 1) | (~*(sptr + wpls - 1) << 31)) & - (~*(sptr + wpls)) & - ((*(sptr + wpls) << 3) | (*(sptr + wpls + 1) >> 29)); - } - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fmorphauto.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fmorphauto.c deleted file mode 100644 index a63b0b7b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fmorphauto.c +++ /dev/null @@ -1,877 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file fmorphauto.c - *
- *
- *    Main function calls:
- *       l_int32             fmorphautogen()
- *       l_int32             fmorphautogen1()
- *       l_int32             fmorphautogen2()
- *
- *    Static helpers:
- *       static SARRAY      *sarrayMakeWplsCode()
- *       static SARRAY      *sarrayMakeInnerLoopDWACode()
- *       static char        *makeBarrelshiftString()
- *
- *
- *    This automatically generates dwa code for erosion and dilation.
- *    Here's a road map for how it all works.
- *
- *    (1) You generate an array (a SELA) of structuring elements (SELs).
- *        This can be done in several ways, including
- *           (a) calling the function selaAddBasic() for
- *               pre-compiled SELs
- *           (b) generating the SELA in code in line
- *           (c) reading in a SELA from file, using selaRead() or
- *               various other formats.
- *
- *    (2) You call fmorphautogen1() and fmorphautogen2() on this SELA.
- *        These use the text files morphtemplate1.txt and
- *        morphtemplate2.txt for building up the source code.  See the file
- *        prog/fmorphautogen.c for an example of how this is done.
- *        The output is written to files named fmorphgen.*.c
- *        and fmorphgenlow.*.c, where "*" is an integer that you
- *        input to this function.  That integer labels both
- *        the output files, as well as all the functions that
- *        are generated.  That way, using different integers,
- *        you can invoke fmorphautogen() any number of times
- *        to get functions that all have different names so that
- *        they can be linked into one program.
- *
- *    (3) You copy the generated source files back to your src
- *        directory for compilation.  Put their names in the
- *        Makefile, regenerate the prototypes, and recompile
- *        the library.  Look at the Makefile to see how I've
- *        included morphgen.1.c and fmorphgenlow.1.c.  These files
- *        provide the high-level interfaces for erosion, dilation,
- *        opening and closing, and the low-level interfaces to
- *        do the actual work, for all 58 SELs in the SEL array.
- *
- *    (4) In an application, you now use this interface.  Again
- *        for the example files in the library, using integer "1":
- *
- *            PIX   *pixMorphDwa_1(PIX *pixd, PIX, *pixs,
- *                                 l_int32 operation, char *selname);
- *
- *                 or
- *
- *            PIX   *pixFMorphopGen_1(PIX *pixd, PIX *pixs,
- *                                    l_int32 operation, char *selname);
- *
- *        where the operation is one of {L_MORPH_DILATE, L_MORPH_ERODE.
- *        L_MORPH_OPEN, L_MORPH_CLOSE}, and the selname is one
- *        of the set that were defined as the name field of sels.
- *        This set is listed at the beginning of the file fmorphgen.1.c.
- *        For examples of use, see the file prog/binmorph_reg1.c, which
- *        verifies the consistency of the various implementations by
- *        comparing the dwa result with that of full-image rasterops.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -#define OUTROOT "fmorphgen" -#define TEMPLATE1 "morphtemplate1.txt" -#define TEMPLATE2 "morphtemplate2.txt" - -#define PROTOARGS "(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);" - -static const l_int32 L_BUF_SIZE = 512; - -static char * makeBarrelshiftString(l_int32 delx, l_int32 dely); -static SARRAY * sarrayMakeInnerLoopDWACode(SEL *sel, l_int32 index); -static SARRAY * sarrayMakeWplsCode(SEL *sel); - -static char wpldecls[][53] = { - "l_int32 wpls2;", - "l_int32 wpls2, wpls3;", - "l_int32 wpls2, wpls3, wpls4;", - "l_int32 wpls5;", - "l_int32 wpls5, wpls6;", - "l_int32 wpls5, wpls6, wpls7;", - "l_int32 wpls5, wpls6, wpls7, wpls8;", - "l_int32 wpls9;", - "l_int32 wpls9, wpls10;", - "l_int32 wpls9, wpls10, wpls11;", - "l_int32 wpls9, wpls10, wpls11, wpls12;", - "l_int32 wpls13;", - "l_int32 wpls13, wpls14;", - "l_int32 wpls13, wpls14, wpls15;", - "l_int32 wpls13, wpls14, wpls15, wpls16;", - "l_int32 wpls17;", - "l_int32 wpls17, wpls18;", - "l_int32 wpls17, wpls18, wpls19;", - "l_int32 wpls17, wpls18, wpls19, wpls20;", - "l_int32 wpls21;", - "l_int32 wpls21, wpls22;", - "l_int32 wpls21, wpls22, wpls23;", - "l_int32 wpls21, wpls22, wpls23, wpls24;", - "l_int32 wpls25;", - "l_int32 wpls25, wpls26;", - "l_int32 wpls25, wpls26, wpls27;", - "l_int32 wpls25, wpls26, wpls27, wpls28;", - "l_int32 wpls29;", - "l_int32 wpls29, wpls30;", - "l_int32 wpls29, wpls30, wpls31;"}; - -static char wplgendecls[][30] = { - "l_int32 wpls2;", - "l_int32 wpls3;", - "l_int32 wpls4;", - "l_int32 wpls5;", - "l_int32 wpls6;", - "l_int32 wpls7;", - "l_int32 wpls8;", - "l_int32 wpls9;", - "l_int32 wpls10;", - "l_int32 wpls11;", - "l_int32 wpls12;", - "l_int32 wpls13;", - "l_int32 wpls14;", - "l_int32 wpls15;", - "l_int32 wpls16;", - "l_int32 wpls17;", - "l_int32 wpls18;", - "l_int32 wpls19;", - "l_int32 wpls20;", - "l_int32 wpls21;", - "l_int32 wpls22;", - "l_int32 wpls23;", - "l_int32 wpls24;", - "l_int32 wpls25;", - "l_int32 wpls26;", - "l_int32 wpls27;", - "l_int32 wpls28;", - "l_int32 wpls29;", - "l_int32 wpls30;", - "l_int32 wpls31;"}; - -static char wpldefs[][25] = { - " wpls2 = 2 * wpls;", - " wpls3 = 3 * wpls;", - " wpls4 = 4 * wpls;", - " wpls5 = 5 * wpls;", - " wpls6 = 6 * wpls;", - " wpls7 = 7 * wpls;", - " wpls8 = 8 * wpls;", - " wpls9 = 9 * wpls;", - " wpls10 = 10 * wpls;", - " wpls11 = 11 * wpls;", - " wpls12 = 12 * wpls;", - " wpls13 = 13 * wpls;", - " wpls14 = 14 * wpls;", - " wpls15 = 15 * wpls;", - " wpls16 = 16 * wpls;", - " wpls17 = 17 * wpls;", - " wpls18 = 18 * wpls;", - " wpls19 = 19 * wpls;", - " wpls20 = 20 * wpls;", - " wpls21 = 21 * wpls;", - " wpls22 = 22 * wpls;", - " wpls23 = 23 * wpls;", - " wpls24 = 24 * wpls;", - " wpls25 = 25 * wpls;", - " wpls26 = 26 * wpls;", - " wpls27 = 27 * wpls;", - " wpls28 = 28 * wpls;", - " wpls29 = 29 * wpls;", - " wpls30 = 30 * wpls;", - " wpls31 = 31 * wpls;"}; - -static char wplstrp[][10] = {"+ wpls", "+ wpls2", "+ wpls3", "+ wpls4", - "+ wpls5", "+ wpls6", "+ wpls7", "+ wpls8", - "+ wpls9", "+ wpls10", "+ wpls11", "+ wpls12", - "+ wpls13", "+ wpls14", "+ wpls15", "+ wpls16", - "+ wpls17", "+ wpls18", "+ wpls19", "+ wpls20", - "+ wpls21", "+ wpls22", "+ wpls23", "+ wpls24", - "+ wpls25", "+ wpls26", "+ wpls27", "+ wpls28", - "+ wpls29", "+ wpls30", "+ wpls31"}; - -static char wplstrm[][10] = {"- wpls", "- wpls2", "- wpls3", "- wpls4", - "- wpls5", "- wpls6", "- wpls7", "- wpls8", - "- wpls9", "- wpls10", "- wpls11", "- wpls12", - "- wpls13", "- wpls14", "- wpls15", "- wpls16", - "- wpls17", "- wpls18", "- wpls19", "- wpls20", - "- wpls21", "- wpls22", "- wpls23", "- wpls24", - "- wpls25", "- wpls26", "- wpls27", "- wpls28", - "- wpls29", "- wpls30", "- wpls31"}; - - -/*! - * \brief fmorphautogen() - * - * \param[in] sela - * \param[in] fileindex - * \param[in] filename [optional]; can be null - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This function generates all the code for implementing
- *          dwa morphological operations using all the sels in the sela.
- *      (2) See fmorphautogen1() and fmorphautogen2() for details.
- * 
- */ -l_ok -fmorphautogen(SELA *sela, - l_int32 fileindex, - const char *filename) -{ -l_int32 ret1, ret2; - - PROCNAME("fmorphautogen"); - - if (!sela) - return ERROR_INT("sela not defined", procName, 1); - ret1 = fmorphautogen1(sela, fileindex, filename); - ret2 = fmorphautogen2(sela, fileindex, filename); - if (ret1 || ret2) - return ERROR_INT("code generation problem", procName, 1); - return 0; -} - - -/*! - * \brief fmorphautogen1() - * - * \param[in] sela - * \param[in] fileindex - * \param[in] filename [optional]; can be null - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This function uses morphtemplate1.txt to create a
- *          top-level file that contains two functions.  These
- *          functions will carry out dilation, erosion,
- *          opening or closing for any of the sels in the input sela.
- *      (2) The fileindex parameter is inserted into the output
- *          filename, as described below.
- *      (3) If filename == NULL, the output file is fmorphgen.[n].c,
- *          where [n] is equal to the %fileindex parameter.
- *      (4) If filename != NULL, the output file is [%filename].[n].c.
- * 
- */ -l_ok -fmorphautogen1(SELA *sela, - l_int32 fileindex, - const char *filename) -{ -char *filestr; -char *str_proto1, *str_proto2, *str_proto3; -char *str_doc1, *str_doc2, *str_doc3, *str_doc4; -char *str_def1, *str_def2, *str_proc1, *str_proc2; -char *str_dwa1, *str_low_dt, *str_low_ds, *str_low_ts; -char *str_low_tsp1, *str_low_dtp1; -char bigbuf[L_BUF_SIZE]; -l_int32 i, nsels, nbytes, actstart, end, newstart; -size_t size; -SARRAY *sa1, *sa2, *sa3; - - PROCNAME("fmorphautogen1"); - - if (!sela) - return ERROR_INT("sela not defined", procName, 1); - if (fileindex < 0) - fileindex = 0; - if ((nsels = selaGetCount(sela)) == 0) - return ERROR_INT("no sels in sela", procName, 1); - - /* Make array of textlines from morphtemplate1.txt */ - if ((filestr = (char *)l_binaryRead(TEMPLATE1, &size)) == NULL) - return ERROR_INT("filestr not made", procName, 1); - sa2 = sarrayCreateLinesFromString(filestr, 1); - LEPT_FREE(filestr); - if (!sa2) - return ERROR_INT("sa2 not made", procName, 1); - - /* Make array of sel names */ - sa1 = selaGetSelnames(sela); - - /* Make strings containing function call names */ - sprintf(bigbuf, "PIX *pixMorphDwa_%d(PIX *pixd, PIX *pixs, " - "l_int32 operation, char *selname);", fileindex); - str_proto1 = stringNew(bigbuf); - sprintf(bigbuf, "PIX *pixFMorphopGen_%d(PIX *pixd, PIX *pixs, " - "l_int32 operation, char *selname);", fileindex); - str_proto2 = stringNew(bigbuf); - sprintf(bigbuf, "l_int32 fmorphopgen_low_%d(l_uint32 *datad, l_int32 w,\n" - " l_int32 h, l_int32 wpld,\n" - " l_uint32 *datas, l_int32 wpls,\n" - " l_int32 index);", fileindex); - str_proto3 = stringNew(bigbuf); - sprintf(bigbuf, " * PIX *pixMorphDwa_%d()", fileindex); - str_doc1 = stringNew(bigbuf); - sprintf(bigbuf, " * PIX *pixFMorphopGen_%d()", fileindex); - str_doc2 = stringNew(bigbuf); - sprintf(bigbuf, " * \\brief pixMorphDwa_%d()", fileindex); - str_doc3 = stringNew(bigbuf); - sprintf(bigbuf, " * \\brief pixFMorphopGen_%d()", fileindex); - str_doc4 = stringNew(bigbuf); - sprintf(bigbuf, "pixMorphDwa_%d(PIX *pixd,", fileindex); - str_def1 = stringNew(bigbuf); - sprintf(bigbuf, "pixFMorphopGen_%d(PIX *pixd,", fileindex); - str_def2 = stringNew(bigbuf); - sprintf(bigbuf, " PROCNAME(\"pixMorphDwa_%d\");", fileindex); - str_proc1 = stringNew(bigbuf); - sprintf(bigbuf, " PROCNAME(\"pixFMorphopGen_%d\");", fileindex); - str_proc2 = stringNew(bigbuf); - sprintf(bigbuf, - " pixt2 = pixFMorphopGen_%d(NULL, pixt1, operation, selname);", - fileindex); - str_dwa1 = stringNew(bigbuf); - sprintf(bigbuf, - " fmorphopgen_low_%d(datad, w, h, wpld, datat, wpls, index);", - fileindex); - str_low_dt = stringNew(bigbuf); - sprintf(bigbuf, - " fmorphopgen_low_%d(datad, w, h, wpld, datas, wpls, index);", - fileindex); - str_low_ds = stringNew(bigbuf); - sprintf(bigbuf, - " fmorphopgen_low_%d(datat, w, h, wpls, datas, wpls, index+1);", - fileindex); - str_low_tsp1 = stringNew(bigbuf); - sprintf(bigbuf, - " fmorphopgen_low_%d(datat, w, h, wpls, datas, wpls, index);", - fileindex); - str_low_ts = stringNew(bigbuf); - sprintf(bigbuf, - " fmorphopgen_low_%d(datad, w, h, wpld, datat, wpls, index+1);", - fileindex); - str_low_dtp1 = stringNew(bigbuf); - - /* Make the output sa */ - sa3 = sarrayCreate(0); - - /* Copyright notice and info header */ - sarrayParseRange(sa2, 0, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - /* Insert function names as documentation */ - sarrayAddString(sa3, str_doc1, L_INSERT); - sarrayAddString(sa3, str_doc2, L_INSERT); - - /* Add '#include's */ - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - /* Insert function prototypes */ - sarrayAddString(sa3, str_proto1, L_INSERT); - sarrayAddString(sa3, str_proto2, L_INSERT); - sarrayAddString(sa3, str_proto3, L_INSERT); - - /* Add static globals */ - sprintf(bigbuf, "\nstatic l_int32 NUM_SELS_GENERATED = %d;", nsels); - sarrayAddString(sa3, bigbuf, L_COPY); - sprintf(bigbuf, "static char SEL_NAMES[][80] = {"); - sarrayAddString(sa3, bigbuf, L_COPY); - for (i = 0; i < nsels - 1; i++) { - sprintf(bigbuf, " \"%s\",", - sarrayGetString(sa1, i, L_NOCOPY)); - sarrayAddString(sa3, bigbuf, L_COPY); - } - sprintf(bigbuf, " \"%s\"};", - sarrayGetString(sa1, i, L_NOCOPY)); - sarrayAddString(sa3, bigbuf, L_COPY); - - /* Start pixMorphDwa_*() function description */ - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_doc3, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - /* Finish pixMorphDwa_*() function definition */ - sarrayAddString(sa3, str_def1, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_proc1, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_dwa1, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - /* Start pixFMorphopGen_*() function description */ - sarrayAddString(sa3, str_doc4, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - /* Finish pixFMorphopGen_*() function definition */ - sarrayAddString(sa3, str_def2, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_proc2, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_low_dt, L_COPY); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_low_ds, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_low_tsp1, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_low_dt, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_low_ts, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - sarrayAddString(sa3, str_low_dtp1, L_INSERT); - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - /* Output to file */ - filestr = sarrayToString(sa3, 1); - nbytes = strlen(filestr); - if (filename) - snprintf(bigbuf, L_BUF_SIZE, "%s.%d.c", filename, fileindex); - else - sprintf(bigbuf, "%s.%d.c", OUTROOT, fileindex); - l_binaryWrite(bigbuf, "w", filestr, nbytes); - sarrayDestroy(&sa1); - sarrayDestroy(&sa2); - sarrayDestroy(&sa3); - LEPT_FREE(filestr); - return 0; -} - - -/* - * fmorphautogen2() - * - * Input: sela - * fileindex - * filename (; can be null) - * Return: 0 if OK; 1 on error - * - * Notes: - * (1) This function uses morphtemplate2.txt to create a - * low-level file that contains the low-level functions for - * implementing dilation and erosion for every sel - * in the input sela. - * (2) The fileindex parameter is inserted into the output - * filename, as described below. - * (3) If filename == NULL, the output file is fmorphgenlow.[n].c, - * where [n] is equal to the 'fileindex' parameter. - * (4) If filename != NULL, the output file is [filename]low.[n].c. - */ -l_int32 -fmorphautogen2(SELA *sela, - l_int32 fileindex, - const char *filename) -{ -char *filestr, *linestr, *fname; -char *str_doc1, *str_doc2, *str_doc3, *str_doc4, *str_def1; -char bigbuf[L_BUF_SIZE]; -char breakstring[] = " break;"; -char staticstring[] = "static void"; -l_int32 i, nsels, nbytes, actstart, end, newstart; -l_int32 argstart, argend, loopstart, loopend, finalstart, finalend; -size_t size; -SARRAY *sa1, *sa2, *sa3, *sa4, *sa5, *sa6; -SEL *sel; - - PROCNAME("fmorphautogen2"); - - if (!sela) - return ERROR_INT("sela not defined", procName, 1); - if (fileindex < 0) - fileindex = 0; - if ((nsels = selaGetCount(sela)) == 0) - return ERROR_INT("no sels in sela", procName, 1); - - /* Make the array of textlines from morphtemplate2.txt */ - if ((filestr = (char *)l_binaryRead(TEMPLATE2, &size)) == NULL) - return ERROR_INT("filestr not made", procName, 1); - sa1 = sarrayCreateLinesFromString(filestr, 1); - LEPT_FREE(filestr); - if (!sa1) - return ERROR_INT("sa1 not made", procName, 1); - - /* Make the array of static function names */ - if ((sa2 = sarrayCreate(2 * nsels)) == NULL) { - sarrayDestroy(&sa1); - return ERROR_INT("sa2 not made", procName, 1); - } - for (i = 0; i < nsels; i++) { - sprintf(bigbuf, "fdilate_%d_%d", fileindex, i); - sarrayAddString(sa2, bigbuf, L_COPY); - sprintf(bigbuf, "ferode_%d_%d", fileindex, i); - sarrayAddString(sa2, bigbuf, L_COPY); - } - - /* Make the static prototype strings */ - sa3 = sarrayCreate(2 * nsels); /* should be ok */ - for (i = 0; i < 2 * nsels; i++) { - fname = sarrayGetString(sa2, i, L_NOCOPY); - sprintf(bigbuf, "static void %s%s", fname, PROTOARGS); - sarrayAddString(sa3, bigbuf, L_COPY); - } - - /* Make strings containing function names */ - sprintf(bigbuf, " * l_int32 fmorphopgen_low_%d()", - fileindex); - str_doc1 = stringNew(bigbuf); - sprintf(bigbuf, " * void fdilate_%d_*()", fileindex); - str_doc2 = stringNew(bigbuf); - sprintf(bigbuf, " * void ferode_%d_*()", fileindex); - str_doc3 = stringNew(bigbuf); - - /* Output to this sa */ - sa4 = sarrayCreate(0); - - /* Copyright notice and info header */ - sarrayParseRange(sa1, 0, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa4, sa1, actstart, end); - - /* Insert function names as documentation */ - sarrayAddString(sa4, str_doc1, L_INSERT); - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa4, sa1, actstart, end); - sarrayAddString(sa4, str_doc2, L_INSERT); - sarrayAddString(sa4, str_doc3, L_INSERT); - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa4, sa1, actstart, end); - - /* Insert static protos */ - for (i = 0; i < 2 * nsels; i++) { - if ((linestr = sarrayGetString(sa3, i, L_COPY)) == NULL) { - sarrayDestroy(&sa1); - sarrayDestroy(&sa2); - sarrayDestroy(&sa3); - sarrayDestroy(&sa4); - return ERROR_INT("linestr not retrieved", procName, 1); - } - sarrayAddString(sa4, linestr, L_INSERT); - } - - /* More strings with function names */ - sprintf(bigbuf, " * fmorphopgen_low_%d()", fileindex); - str_doc4 = stringNew(bigbuf); - sprintf(bigbuf, "fmorphopgen_low_%d(l_uint32 *datad,", fileindex); - str_def1 = stringNew(bigbuf); - - /* Insert function header */ - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa4, sa1, actstart, end); - sarrayAddString(sa4, str_doc4, L_INSERT); - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa4, sa1, actstart, end); - sarrayAddString(sa4, str_def1, L_INSERT); - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa4, sa1, actstart, end); - - /* Generate and insert the dispatcher code */ - for (i = 0; i < 2 * nsels; i++) { - sprintf(bigbuf, " case %d:", i); - sarrayAddString(sa4, bigbuf, L_COPY); - sprintf(bigbuf, " %s(datad, w, h, wpld, datas, wpls);", - sarrayGetString(sa2, i, L_NOCOPY)); - sarrayAddString(sa4, bigbuf, L_COPY); - sarrayAddString(sa4, breakstring, L_COPY); - } - - /* Finish the dispatcher and introduce the low-level code */ - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa4, sa1, actstart, end); - - /* Get the range for the args common to all functions */ - sarrayParseRange(sa1, newstart, &argstart, &argend, &newstart, "--", 0); - - /* Get the range for the loop code common to all functions */ - sarrayParseRange(sa1, newstart, &loopstart, &loopend, &newstart, "--", 0); - - /* Get the range for the ending code common to all functions */ - sarrayParseRange(sa1, newstart, &finalstart, &finalend, &newstart, "--", 0); - - /* Do all the static functions */ - for (i = 0; i < 2 * nsels; i++) { - /* Generate the function header and add the common args */ - sarrayAddString(sa4, staticstring, L_COPY); - fname = sarrayGetString(sa2, i, L_NOCOPY); - sprintf(bigbuf, "%s(l_uint32 *datad,", fname); - sarrayAddString(sa4, bigbuf, L_COPY); - sarrayAppendRange(sa4, sa1, argstart, argend); - - /* Declare and define wplsN args, as necessary */ - if ((sel = selaGetSel(sela, i/2)) == NULL) { - sarrayDestroy(&sa1); - sarrayDestroy(&sa2); - sarrayDestroy(&sa3); - sarrayDestroy(&sa4); - return ERROR_INT("sel not returned", procName, 1); - } - sa5 = sarrayMakeWplsCode(sel); - sarrayJoin(sa4, sa5); - sarrayDestroy(&sa5); - - /* Add the function loop code */ - sarrayAppendRange(sa4, sa1, loopstart, loopend); - - /* Insert barrel-op code for *dptr */ - sa6 = sarrayMakeInnerLoopDWACode(sel, i); - sarrayJoin(sa4, sa6); - sarrayDestroy(&sa6); - - /* Finish the function code */ - sarrayAppendRange(sa4, sa1, finalstart, finalend); - } - - /* Output to file */ - filestr = sarrayToString(sa4, 1); - nbytes = strlen(filestr); - if (filename) - snprintf(bigbuf, L_BUF_SIZE, "%slow.%d.c", filename, fileindex); - else - sprintf(bigbuf, "%slow.%d.c", OUTROOT, fileindex); - l_binaryWrite(bigbuf, "w", filestr, nbytes); - sarrayDestroy(&sa1); - sarrayDestroy(&sa2); - sarrayDestroy(&sa3); - sarrayDestroy(&sa4); - LEPT_FREE(filestr); - return 0; -} - - -/*--------------------------------------------------------------------------* - * Helper code for sel * - *--------------------------------------------------------------------------*/ -/*! - * \brief sarrayMakeWplsCode() - */ -static SARRAY * -sarrayMakeWplsCode(SEL *sel) -{ -char emptystring[] = ""; -l_int32 i, j, ymax, dely, allvshifts; -l_int32 vshift[32]; -SARRAY *sa; - - PROCNAME("sarrayMakeWplsCode"); - - if (!sel) - return (SARRAY *)ERROR_PTR("sel not defined", procName, NULL); - - for (i = 0; i < 32; i++) - vshift[i] = 0; - ymax = 0; - for (i = 0; i < sel->sy; i++) { - for (j = 0; j < sel->sx; j++) { - if (sel->data[i][j] == 1) { - dely = L_ABS(i - sel->cy); - if (dely < 32) - vshift[dely] = 1; - ymax = L_MAX(ymax, dely); - } - } - } - if (ymax > 31) { - L_WARNING("ymax > 31; truncating to 31\n", procName); - ymax = 31; - } - - /* Test if this is a vertical brick */ - allvshifts = TRUE; - for (i = 0; i < ymax; i++) { - if (vshift[i] == 0) { - allvshifts = FALSE; - break; - } - } - - sa = sarrayCreate(0); - - /* Add declarations */ - if (allvshifts == TRUE) { /* packs them as well as possible */ - if (ymax > 4) - sarrayAddString(sa, wpldecls[2], L_COPY); - if (ymax > 8) - sarrayAddString(sa, wpldecls[6], L_COPY); - if (ymax > 12) - sarrayAddString(sa, wpldecls[10], L_COPY); - if (ymax > 16) - sarrayAddString(sa, wpldecls[14], L_COPY); - if (ymax > 20) - sarrayAddString(sa, wpldecls[18], L_COPY); - if (ymax > 24) - sarrayAddString(sa, wpldecls[22], L_COPY); - if (ymax > 28) - sarrayAddString(sa, wpldecls[26], L_COPY); - if (ymax > 1) - sarrayAddString(sa, wpldecls[ymax - 2], L_COPY); - } else { /* puts them one/line */ - for (i = 2; i <= ymax; i++) { - if (vshift[i]) - sarrayAddString(sa, wplgendecls[i - 2], L_COPY); - } - } - - sarrayAddString(sa, emptystring, L_COPY); - - /* Add definitions */ - for (i = 2; i <= ymax; i++) { - if (vshift[i]) - sarrayAddString(sa, wpldefs[i - 2], L_COPY); - } - - return sa; -} - - -/*! - * \brief sarrayMakeInnerLoopDWACode() - */ -static SARRAY * -sarrayMakeInnerLoopDWACode(SEL *sel, - l_int32 index) -{ -char *tstr, *string; -char logicalor[] = "|"; -char logicaland[] = "&"; -char bigbuf[L_BUF_SIZE]; -l_int32 i, j, optype, count, nfound, delx, dely; -SARRAY *sa; - - PROCNAME("sarrayMakeInnerLoopDWACode"); - - if (!sel) - return (SARRAY *)ERROR_PTR("sel not defined", procName, NULL); - - if (index % 2 == 0) { - optype = L_MORPH_DILATE; - tstr = logicalor; - } else { - optype = L_MORPH_ERODE; - tstr = logicaland; - } - - count = 0; - for (i = 0; i < sel->sy; i++) { - for (j = 0; j < sel->sx; j++) { - if (sel->data[i][j] == 1) - count++; - } - } - - sa = sarrayCreate(0); - if (count == 0) { - L_WARNING("no hits in Sel %d\n", procName, index); - return sa; /* no code inside! */ - } - - nfound = 0; - for (i = 0; i < sel->sy; i++) { - for (j = 0; j < sel->sx; j++) { - if (sel->data[i][j] == 1) { - nfound++; - if (optype == L_MORPH_DILATE) { - dely = sel->cy - i; - delx = sel->cx - j; - } else { /* optype == L_MORPH_ERODE */ - dely = i - sel->cy; - delx = j - sel->cx; - } - if ((string = makeBarrelshiftString(delx, dely)) == NULL) { - L_WARNING("barrel shift string not made\n", procName); - continue; - } - if (count == 1) /* just one item */ - sprintf(bigbuf, " *dptr = %s;", string); - else if (nfound == 1) - sprintf(bigbuf, " *dptr = %s %s", string, tstr); - else if (nfound < count) - sprintf(bigbuf, " %s %s", string, tstr); - else /* nfound == count */ - sprintf(bigbuf, " %s;", string); - sarrayAddString(sa, bigbuf, L_COPY); - LEPT_FREE(string); - } - } - } - - return sa; -} - - -/*! - * \brief makeBarrelshiftString() - */ -static char * -makeBarrelshiftString(l_int32 delx, /* j - cx */ - l_int32 dely) /* i - cy */ -{ -l_int32 absx, absy; -char bigbuf[L_BUF_SIZE]; - - PROCNAME("makeBarrelshiftString"); - - if (delx < -31 || delx > 31) - return (char *)ERROR_PTR("delx out of bounds", procName, NULL); - if (dely < -31 || dely > 31) - return (char *)ERROR_PTR("dely out of bounds", procName, NULL); - absx = L_ABS(delx); - absy = L_ABS(dely); - - if ((delx == 0) && (dely == 0)) - sprintf(bigbuf, "(*sptr)"); - else if ((delx == 0) && (dely < 0)) - sprintf(bigbuf, "(*(sptr %s))", wplstrm[absy - 1]); - else if ((delx == 0) && (dely > 0)) - sprintf(bigbuf, "(*(sptr %s))", wplstrp[absy - 1]); - else if ((delx < 0) && (dely == 0)) - sprintf(bigbuf, "((*(sptr) >> %d) | (*(sptr - 1) << %d))", - absx, 32 - absx); - else if ((delx > 0) && (dely == 0)) - sprintf(bigbuf, "((*(sptr) << %d) | (*(sptr + 1) >> %d))", - absx, 32 - absx); - else if ((delx < 0) && (dely < 0)) - sprintf(bigbuf, "((*(sptr %s) >> %d) | (*(sptr %s - 1) << %d))", - wplstrm[absy - 1], absx, wplstrm[absy - 1], 32 - absx); - else if ((delx > 0) && (dely < 0)) - sprintf(bigbuf, "((*(sptr %s) << %d) | (*(sptr %s + 1) >> %d))", - wplstrm[absy - 1], absx, wplstrm[absy - 1], 32 - absx); - else if ((delx < 0) && (dely > 0)) - sprintf(bigbuf, "((*(sptr %s) >> %d) | (*(sptr %s - 1) << %d))", - wplstrp[absy - 1], absx, wplstrp[absy - 1], 32 - absx); - else /* ((delx > 0) && (dely > 0)) */ - sprintf(bigbuf, "((*(sptr %s) << %d) | (*(sptr %s + 1) >> %d))", - wplstrp[absy - 1], absx, wplstrp[absy - 1], 32 - absx); - - return stringNew(bigbuf); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fmorphgen.1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fmorphgen.1.c deleted file mode 100644 index f0bf9aec..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fmorphgen.1.c +++ /dev/null @@ -1,277 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * Top-level fast binary morphology with auto-generated sels - * - * PIX *pixMorphDwa_1() - * PIX *pixFMorphopGen_1() - */ - -#include -#include "allheaders.h" - -PIX *pixMorphDwa_1(PIX *pixd, PIX *pixs, l_int32 operation, char *selname); -PIX *pixFMorphopGen_1(PIX *pixd, PIX *pixs, l_int32 operation, char *selname); -l_int32 fmorphopgen_low_1(l_uint32 *datad, l_int32 w, - l_int32 h, l_int32 wpld, - l_uint32 *datas, l_int32 wpls, - l_int32 index); - -static l_int32 NUM_SELS_GENERATED = 58; -static char SEL_NAMES[][80] = { - "sel_2h", - "sel_3h", - "sel_4h", - "sel_5h", - "sel_6h", - "sel_7h", - "sel_8h", - "sel_9h", - "sel_10h", - "sel_11h", - "sel_12h", - "sel_13h", - "sel_14h", - "sel_15h", - "sel_20h", - "sel_21h", - "sel_25h", - "sel_30h", - "sel_31h", - "sel_35h", - "sel_40h", - "sel_41h", - "sel_45h", - "sel_50h", - "sel_51h", - "sel_2v", - "sel_3v", - "sel_4v", - "sel_5v", - "sel_6v", - "sel_7v", - "sel_8v", - "sel_9v", - "sel_10v", - "sel_11v", - "sel_12v", - "sel_13v", - "sel_14v", - "sel_15v", - "sel_20v", - "sel_21v", - "sel_25v", - "sel_30v", - "sel_31v", - "sel_35v", - "sel_40v", - "sel_41v", - "sel_45v", - "sel_50v", - "sel_51v", - "sel_2", - "sel_3", - "sel_4", - "sel_5", - "sel_2dp", - "sel_2dm", - "sel_5dp", - "sel_5dm"}; - -/*! - * \brief pixMorphDwa_1() - * - * \param[in] pixd usual 3 choices: null, == pixs, != pixs - * \param[in] pixs 1 bpp - * \param[in] operation L_MORPH_DILATE, L_MORPH_ERODE, - * L_MORPH_OPEN, L_MORPH_CLOSE - * \param[in] sel name - * \return pixd - * - *
- * Notes:
- *      (1) This simply adds a border, calls the appropriate
- *          pixFMorphopGen_*(), and removes the border.
- *          See the notes for that function.
- *      (2) The size of the border depends on the operation
- *          and the boundary conditions.
- * 
- */ -PIX * -pixMorphDwa_1(PIX *pixd, - PIX *pixs, - l_int32 operation, - char *selname) -{ -l_int32 bordercolor, bordersize; -PIX *pixt1, *pixt2, *pixt3; - - PROCNAME("pixMorphDwa_1"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs must be 1 bpp", procName, pixd); - - /* Set the border size */ - bordercolor = getMorphBorderPixelColor(L_MORPH_ERODE, 1); - bordersize = 32; - if (bordercolor == 0 && operation == L_MORPH_CLOSE) - bordersize += 32; - - pixt1 = pixAddBorder(pixs, bordersize, 0); - pixt2 = pixFMorphopGen_1(NULL, pixt1, operation, selname); - pixt3 = pixRemoveBorder(pixt2, bordersize); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - - if (!pixd) - return pixt3; - - pixCopy(pixd, pixt3); - pixDestroy(&pixt3); - return pixd; -} - - -/*! - * \brief pixFMorphopGen_1() - * - * \param[in] pixd usual 3 choices: null, == pixs, != pixs - * \param[in] pixs 1 bpp - * \param[in] operation L_MORPH_DILATE, L_MORPH_ERODE, - * L_MORPH_OPEN, L_MORPH_CLOSE - * \param[in] sel name - * \return pixd - * - *
- * Notes:
- *      (1) This is a dwa operation, and the Sels must be limited in
- *          size to not more than 31 pixels about the origin.
- *      (2) A border of appropriate size (32 pixels, or 64 pixels
- *          for safe closing with asymmetric b.c.) must be added before
- *          this function is called.
- *      (3) This handles all required setting of the border pixels
- *          before erosion and dilation.
- *      (4) The closing operation is safe; no pixels can be removed
- *          near the boundary.
- * 
- */ -PIX * -pixFMorphopGen_1(PIX *pixd, - PIX *pixs, - l_int32 operation, - char *selname) -{ -l_int32 i, index, found, w, h, wpls, wpld, bordercolor, erodeop, borderop; -l_uint32 *datad, *datas, *datat; -PIX *pixt; - - PROCNAME("pixFMorphopGen_1"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs must be 1 bpp", procName, pixd); - - /* Get boundary colors to use */ - bordercolor = getMorphBorderPixelColor(L_MORPH_ERODE, 1); - if (bordercolor == 1) - erodeop = PIX_SET; - else - erodeop = PIX_CLR; - - found = FALSE; - for (i = 0; i < NUM_SELS_GENERATED; i++) { - if (strcmp(selname, SEL_NAMES[i]) == 0) { - found = TRUE; - index = 2 * i; - break; - } - } - if (found == FALSE) - return (PIX *)ERROR_PTR("sel index not found", procName, pixd); - - if (!pixd) { - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - else /* for in-place or pre-allocated */ - pixResizeImageData(pixd, pixs); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - - /* The images must be surrounded, in advance, with a border of - * size 32 pixels (or 64, for closing), that we'll read from. - * Fabricate a "proper" image as the subimage within the 32 - * pixel border, having the following parameters: */ - w = pixGetWidth(pixs) - 64; - h = pixGetHeight(pixs) - 64; - datas = pixGetData(pixs) + 32 * wpls + 1; - datad = pixGetData(pixd) + 32 * wpld + 1; - - if (operation == L_MORPH_DILATE || operation == L_MORPH_ERODE) { - borderop = PIX_CLR; - if (operation == L_MORPH_ERODE) { - borderop = erodeop; - index++; - } - if (pixd == pixs) { /* in-place; generate a temp image */ - if ((pixt = pixCopy(NULL, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, pixd); - datat = pixGetData(pixt) + 32 * wpls + 1; - pixSetOrClearBorder(pixt, 32, 32, 32, 32, borderop); - fmorphopgen_low_1(datad, w, h, wpld, datat, wpls, index); - pixDestroy(&pixt); - } - else { /* not in-place */ - pixSetOrClearBorder(pixs, 32, 32, 32, 32, borderop); - fmorphopgen_low_1(datad, w, h, wpld, datas, wpls, index); - } - } - else { /* opening or closing; generate a temp image */ - if ((pixt = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, pixd); - datat = pixGetData(pixt) + 32 * wpls + 1; - if (operation == L_MORPH_OPEN) { - pixSetOrClearBorder(pixs, 32, 32, 32, 32, erodeop); - fmorphopgen_low_1(datat, w, h, wpls, datas, wpls, index+1); - pixSetOrClearBorder(pixt, 32, 32, 32, 32, PIX_CLR); - fmorphopgen_low_1(datad, w, h, wpld, datat, wpls, index); - } - else { /* closing */ - pixSetOrClearBorder(pixs, 32, 32, 32, 32, PIX_CLR); - fmorphopgen_low_1(datat, w, h, wpls, datas, wpls, index); - pixSetOrClearBorder(pixt, 32, 32, 32, 32, erodeop); - fmorphopgen_low_1(datad, w, h, wpld, datat, wpls, index+1); - } - pixDestroy(&pixt); - } - - return pixd; -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fmorphgenlow.1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fmorphgenlow.1.c deleted file mode 100644 index dd43da2e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fmorphgenlow.1.c +++ /dev/null @@ -1,5862 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * Low-level fast binary morphology with auto-generated sels - * - * Dispatcher: - * l_int32 fmorphopgen_low_1() - * - * Static Low-level: - * void fdilate_1_*() - * void ferode_1_*() - */ - -#include "allheaders.h" - -static void fdilate_1_0(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_0(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_1(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_1(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_2(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_2(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_3(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_3(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_4(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_4(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_5(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_5(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_6(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_6(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_7(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_7(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_8(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_8(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_9(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_9(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_10(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_10(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_11(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_11(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_12(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_12(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_13(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_13(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_14(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_14(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_15(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_15(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_16(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_16(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_17(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_17(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_18(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_18(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_19(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_19(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_20(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_20(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_21(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_21(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_22(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_22(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_23(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_23(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_24(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_24(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_25(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_25(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_26(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_26(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_27(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_27(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_28(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_28(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_29(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_29(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_30(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_30(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_31(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_31(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_32(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_32(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_33(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_33(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_34(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_34(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_35(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_35(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_36(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_36(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_37(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_37(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_38(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_38(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_39(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_39(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_40(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_40(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_41(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_41(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_42(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_42(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_43(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_43(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_44(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_44(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_45(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_45(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_46(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_46(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_47(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_47(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_48(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_48(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_49(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_49(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_50(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_50(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_51(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_51(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_52(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_52(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_53(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_53(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_54(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_54(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_55(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_55(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_56(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_56(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void fdilate_1_57(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); -static void ferode_1_57(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32); - - -/*---------------------------------------------------------------------* - * Fast morph dispatcher * - *---------------------------------------------------------------------*/ -/*! - * fmorphopgen_low_1() - * - * a dispatcher to appropriate low-level code - */ -l_int32 -fmorphopgen_low_1(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_int32 index) -{ - - switch (index) - { - case 0: - fdilate_1_0(datad, w, h, wpld, datas, wpls); - break; - case 1: - ferode_1_0(datad, w, h, wpld, datas, wpls); - break; - case 2: - fdilate_1_1(datad, w, h, wpld, datas, wpls); - break; - case 3: - ferode_1_1(datad, w, h, wpld, datas, wpls); - break; - case 4: - fdilate_1_2(datad, w, h, wpld, datas, wpls); - break; - case 5: - ferode_1_2(datad, w, h, wpld, datas, wpls); - break; - case 6: - fdilate_1_3(datad, w, h, wpld, datas, wpls); - break; - case 7: - ferode_1_3(datad, w, h, wpld, datas, wpls); - break; - case 8: - fdilate_1_4(datad, w, h, wpld, datas, wpls); - break; - case 9: - ferode_1_4(datad, w, h, wpld, datas, wpls); - break; - case 10: - fdilate_1_5(datad, w, h, wpld, datas, wpls); - break; - case 11: - ferode_1_5(datad, w, h, wpld, datas, wpls); - break; - case 12: - fdilate_1_6(datad, w, h, wpld, datas, wpls); - break; - case 13: - ferode_1_6(datad, w, h, wpld, datas, wpls); - break; - case 14: - fdilate_1_7(datad, w, h, wpld, datas, wpls); - break; - case 15: - ferode_1_7(datad, w, h, wpld, datas, wpls); - break; - case 16: - fdilate_1_8(datad, w, h, wpld, datas, wpls); - break; - case 17: - ferode_1_8(datad, w, h, wpld, datas, wpls); - break; - case 18: - fdilate_1_9(datad, w, h, wpld, datas, wpls); - break; - case 19: - ferode_1_9(datad, w, h, wpld, datas, wpls); - break; - case 20: - fdilate_1_10(datad, w, h, wpld, datas, wpls); - break; - case 21: - ferode_1_10(datad, w, h, wpld, datas, wpls); - break; - case 22: - fdilate_1_11(datad, w, h, wpld, datas, wpls); - break; - case 23: - ferode_1_11(datad, w, h, wpld, datas, wpls); - break; - case 24: - fdilate_1_12(datad, w, h, wpld, datas, wpls); - break; - case 25: - ferode_1_12(datad, w, h, wpld, datas, wpls); - break; - case 26: - fdilate_1_13(datad, w, h, wpld, datas, wpls); - break; - case 27: - ferode_1_13(datad, w, h, wpld, datas, wpls); - break; - case 28: - fdilate_1_14(datad, w, h, wpld, datas, wpls); - break; - case 29: - ferode_1_14(datad, w, h, wpld, datas, wpls); - break; - case 30: - fdilate_1_15(datad, w, h, wpld, datas, wpls); - break; - case 31: - ferode_1_15(datad, w, h, wpld, datas, wpls); - break; - case 32: - fdilate_1_16(datad, w, h, wpld, datas, wpls); - break; - case 33: - ferode_1_16(datad, w, h, wpld, datas, wpls); - break; - case 34: - fdilate_1_17(datad, w, h, wpld, datas, wpls); - break; - case 35: - ferode_1_17(datad, w, h, wpld, datas, wpls); - break; - case 36: - fdilate_1_18(datad, w, h, wpld, datas, wpls); - break; - case 37: - ferode_1_18(datad, w, h, wpld, datas, wpls); - break; - case 38: - fdilate_1_19(datad, w, h, wpld, datas, wpls); - break; - case 39: - ferode_1_19(datad, w, h, wpld, datas, wpls); - break; - case 40: - fdilate_1_20(datad, w, h, wpld, datas, wpls); - break; - case 41: - ferode_1_20(datad, w, h, wpld, datas, wpls); - break; - case 42: - fdilate_1_21(datad, w, h, wpld, datas, wpls); - break; - case 43: - ferode_1_21(datad, w, h, wpld, datas, wpls); - break; - case 44: - fdilate_1_22(datad, w, h, wpld, datas, wpls); - break; - case 45: - ferode_1_22(datad, w, h, wpld, datas, wpls); - break; - case 46: - fdilate_1_23(datad, w, h, wpld, datas, wpls); - break; - case 47: - ferode_1_23(datad, w, h, wpld, datas, wpls); - break; - case 48: - fdilate_1_24(datad, w, h, wpld, datas, wpls); - break; - case 49: - ferode_1_24(datad, w, h, wpld, datas, wpls); - break; - case 50: - fdilate_1_25(datad, w, h, wpld, datas, wpls); - break; - case 51: - ferode_1_25(datad, w, h, wpld, datas, wpls); - break; - case 52: - fdilate_1_26(datad, w, h, wpld, datas, wpls); - break; - case 53: - ferode_1_26(datad, w, h, wpld, datas, wpls); - break; - case 54: - fdilate_1_27(datad, w, h, wpld, datas, wpls); - break; - case 55: - ferode_1_27(datad, w, h, wpld, datas, wpls); - break; - case 56: - fdilate_1_28(datad, w, h, wpld, datas, wpls); - break; - case 57: - ferode_1_28(datad, w, h, wpld, datas, wpls); - break; - case 58: - fdilate_1_29(datad, w, h, wpld, datas, wpls); - break; - case 59: - ferode_1_29(datad, w, h, wpld, datas, wpls); - break; - case 60: - fdilate_1_30(datad, w, h, wpld, datas, wpls); - break; - case 61: - ferode_1_30(datad, w, h, wpld, datas, wpls); - break; - case 62: - fdilate_1_31(datad, w, h, wpld, datas, wpls); - break; - case 63: - ferode_1_31(datad, w, h, wpld, datas, wpls); - break; - case 64: - fdilate_1_32(datad, w, h, wpld, datas, wpls); - break; - case 65: - ferode_1_32(datad, w, h, wpld, datas, wpls); - break; - case 66: - fdilate_1_33(datad, w, h, wpld, datas, wpls); - break; - case 67: - ferode_1_33(datad, w, h, wpld, datas, wpls); - break; - case 68: - fdilate_1_34(datad, w, h, wpld, datas, wpls); - break; - case 69: - ferode_1_34(datad, w, h, wpld, datas, wpls); - break; - case 70: - fdilate_1_35(datad, w, h, wpld, datas, wpls); - break; - case 71: - ferode_1_35(datad, w, h, wpld, datas, wpls); - break; - case 72: - fdilate_1_36(datad, w, h, wpld, datas, wpls); - break; - case 73: - ferode_1_36(datad, w, h, wpld, datas, wpls); - break; - case 74: - fdilate_1_37(datad, w, h, wpld, datas, wpls); - break; - case 75: - ferode_1_37(datad, w, h, wpld, datas, wpls); - break; - case 76: - fdilate_1_38(datad, w, h, wpld, datas, wpls); - break; - case 77: - ferode_1_38(datad, w, h, wpld, datas, wpls); - break; - case 78: - fdilate_1_39(datad, w, h, wpld, datas, wpls); - break; - case 79: - ferode_1_39(datad, w, h, wpld, datas, wpls); - break; - case 80: - fdilate_1_40(datad, w, h, wpld, datas, wpls); - break; - case 81: - ferode_1_40(datad, w, h, wpld, datas, wpls); - break; - case 82: - fdilate_1_41(datad, w, h, wpld, datas, wpls); - break; - case 83: - ferode_1_41(datad, w, h, wpld, datas, wpls); - break; - case 84: - fdilate_1_42(datad, w, h, wpld, datas, wpls); - break; - case 85: - ferode_1_42(datad, w, h, wpld, datas, wpls); - break; - case 86: - fdilate_1_43(datad, w, h, wpld, datas, wpls); - break; - case 87: - ferode_1_43(datad, w, h, wpld, datas, wpls); - break; - case 88: - fdilate_1_44(datad, w, h, wpld, datas, wpls); - break; - case 89: - ferode_1_44(datad, w, h, wpld, datas, wpls); - break; - case 90: - fdilate_1_45(datad, w, h, wpld, datas, wpls); - break; - case 91: - ferode_1_45(datad, w, h, wpld, datas, wpls); - break; - case 92: - fdilate_1_46(datad, w, h, wpld, datas, wpls); - break; - case 93: - ferode_1_46(datad, w, h, wpld, datas, wpls); - break; - case 94: - fdilate_1_47(datad, w, h, wpld, datas, wpls); - break; - case 95: - ferode_1_47(datad, w, h, wpld, datas, wpls); - break; - case 96: - fdilate_1_48(datad, w, h, wpld, datas, wpls); - break; - case 97: - ferode_1_48(datad, w, h, wpld, datas, wpls); - break; - case 98: - fdilate_1_49(datad, w, h, wpld, datas, wpls); - break; - case 99: - ferode_1_49(datad, w, h, wpld, datas, wpls); - break; - case 100: - fdilate_1_50(datad, w, h, wpld, datas, wpls); - break; - case 101: - ferode_1_50(datad, w, h, wpld, datas, wpls); - break; - case 102: - fdilate_1_51(datad, w, h, wpld, datas, wpls); - break; - case 103: - ferode_1_51(datad, w, h, wpld, datas, wpls); - break; - case 104: - fdilate_1_52(datad, w, h, wpld, datas, wpls); - break; - case 105: - ferode_1_52(datad, w, h, wpld, datas, wpls); - break; - case 106: - fdilate_1_53(datad, w, h, wpld, datas, wpls); - break; - case 107: - ferode_1_53(datad, w, h, wpld, datas, wpls); - break; - case 108: - fdilate_1_54(datad, w, h, wpld, datas, wpls); - break; - case 109: - ferode_1_54(datad, w, h, wpld, datas, wpls); - break; - case 110: - fdilate_1_55(datad, w, h, wpld, datas, wpls); - break; - case 111: - ferode_1_55(datad, w, h, wpld, datas, wpls); - break; - case 112: - fdilate_1_56(datad, w, h, wpld, datas, wpls); - break; - case 113: - ferode_1_56(datad, w, h, wpld, datas, wpls); - break; - case 114: - fdilate_1_57(datad, w, h, wpld, datas, wpls); - break; - case 115: - ferode_1_57(datad, w, h, wpld, datas, wpls); - break; - } - - return 0; -} - - -/*--------------------------------------------------------------------------* - * Low-level auto-generated static routines * - *--------------------------------------------------------------------------*/ -/* - * N.B. In all the low-level routines, the part of the image - * that is accessed has been clipped by 32 pixels on - * all four sides. This is done in the higher level - * code by redefining w and h smaller and by moving the - * start-of-image pointers up to the beginning of this - * interior rectangle. - */ -static void -fdilate_1_0(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr); - } - } -} - -static void -ferode_1_0(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr); - } - } -} - -static void -fdilate_1_1(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)); - } - } -} - -static void -ferode_1_1(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)); - } - } -} - -static void -fdilate_1_2(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)); - } - } -} - -static void -ferode_1_2(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)); - } - } -} - -static void -fdilate_1_3(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)); - } - } -} - -static void -ferode_1_3(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)); - } - } -} - -static void -fdilate_1_4(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)); - } - } -} - -static void -ferode_1_4(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)); - } - } -} - -static void -fdilate_1_5(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)); - } - } -} - -static void -ferode_1_5(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)); - } - } -} - -static void -fdilate_1_6(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)); - } - } -} - -static void -ferode_1_6(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)); - } - } -} - -static void -fdilate_1_7(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)); - } - } -} - -static void -ferode_1_7(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)); - } - } -} - -static void -fdilate_1_8(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)); - } - } -} - -static void -ferode_1_8(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)); - } - } -} - -static void -fdilate_1_9(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)); - } - } -} - -static void -ferode_1_9(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)); - } - } -} - -static void -fdilate_1_10(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)); - } - } -} - -static void -ferode_1_10(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)); - } - } -} - -static void -fdilate_1_11(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)); - } - } -} - -static void -ferode_1_11(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)); - } - } -} - -static void -fdilate_1_12(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)); - } - } -} - -static void -ferode_1_12(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)); - } - } -} - -static void -fdilate_1_13(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)); - } - } -} - -static void -ferode_1_13(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)); - } - } -} - -static void -fdilate_1_14(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 10) | (*(sptr + 1) >> 22)) | - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) | - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) | - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)); - } - } -} - -static void -ferode_1_14(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 10) | (*(sptr - 1) << 22)) & - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) & - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) & - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)); - } - } -} - -static void -fdilate_1_15(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 10) | (*(sptr + 1) >> 22)) | - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) | - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) | - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) | - ((*(sptr) >> 10) | (*(sptr - 1) << 22)); - } - } -} - -static void -ferode_1_15(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 10) | (*(sptr - 1) << 22)) & - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) & - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) & - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) & - ((*(sptr) << 10) | (*(sptr + 1) >> 22)); - } - } -} - -static void -fdilate_1_16(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 12) | (*(sptr + 1) >> 20)) | - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) | - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) | - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) | - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) | - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) | - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) | - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) | - ((*(sptr) >> 12) | (*(sptr - 1) << 20)); - } - } -} - -static void -ferode_1_16(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 12) | (*(sptr - 1) << 20)) & - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) & - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) & - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) & - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) & - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) & - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) & - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) & - ((*(sptr) << 12) | (*(sptr + 1) >> 20)); - } - } -} - -static void -fdilate_1_17(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 15) | (*(sptr + 1) >> 17)) | - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) | - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) | - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) | - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) | - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) | - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) | - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) | - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) | - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) | - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) | - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) | - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) | - ((*(sptr) >> 14) | (*(sptr - 1) << 18)); - } - } -} - -static void -ferode_1_17(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 15) | (*(sptr - 1) << 17)) & - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) & - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) & - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) & - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) & - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) & - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) & - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) & - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) & - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) & - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) & - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) & - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) & - ((*(sptr) << 14) | (*(sptr + 1) >> 18)); - } - } -} - -static void -fdilate_1_18(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 15) | (*(sptr + 1) >> 17)) | - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) | - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) | - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) | - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) | - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) | - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) | - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) | - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) | - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) | - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) | - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) | - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) | - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) | - ((*(sptr) >> 15) | (*(sptr - 1) << 17)); - } - } -} - -static void -ferode_1_18(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 15) | (*(sptr - 1) << 17)) & - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) & - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) & - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) & - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) & - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) & - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) & - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) & - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) & - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) & - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) & - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) & - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) & - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) & - ((*(sptr) << 15) | (*(sptr + 1) >> 17)); - } - } -} - -static void -fdilate_1_19(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 17) | (*(sptr + 1) >> 15)) | - ((*(sptr) << 16) | (*(sptr + 1) >> 16)) | - ((*(sptr) << 15) | (*(sptr + 1) >> 17)) | - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) | - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) | - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) | - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) | - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) | - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) | - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) | - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) | - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) | - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) | - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) | - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) | - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) | - ((*(sptr) >> 15) | (*(sptr - 1) << 17)) | - ((*(sptr) >> 16) | (*(sptr - 1) << 16)) | - ((*(sptr) >> 17) | (*(sptr - 1) << 15)); - } - } -} - -static void -ferode_1_19(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 17) | (*(sptr - 1) << 15)) & - ((*(sptr) >> 16) | (*(sptr - 1) << 16)) & - ((*(sptr) >> 15) | (*(sptr - 1) << 17)) & - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) & - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) & - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) & - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) & - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) & - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) & - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) & - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) & - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) & - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) & - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) & - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) & - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) & - ((*(sptr) << 15) | (*(sptr + 1) >> 17)) & - ((*(sptr) << 16) | (*(sptr + 1) >> 16)) & - ((*(sptr) << 17) | (*(sptr + 1) >> 15)); - } - } -} - -static void -fdilate_1_20(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 20) | (*(sptr + 1) >> 12)) | - ((*(sptr) << 19) | (*(sptr + 1) >> 13)) | - ((*(sptr) << 18) | (*(sptr + 1) >> 14)) | - ((*(sptr) << 17) | (*(sptr + 1) >> 15)) | - ((*(sptr) << 16) | (*(sptr + 1) >> 16)) | - ((*(sptr) << 15) | (*(sptr + 1) >> 17)) | - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) | - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) | - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) | - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) | - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) | - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) | - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) | - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) | - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) | - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) | - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) | - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) | - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) | - ((*(sptr) >> 15) | (*(sptr - 1) << 17)) | - ((*(sptr) >> 16) | (*(sptr - 1) << 16)) | - ((*(sptr) >> 17) | (*(sptr - 1) << 15)) | - ((*(sptr) >> 18) | (*(sptr - 1) << 14)) | - ((*(sptr) >> 19) | (*(sptr - 1) << 13)); - } - } -} - -static void -ferode_1_20(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 20) | (*(sptr - 1) << 12)) & - ((*(sptr) >> 19) | (*(sptr - 1) << 13)) & - ((*(sptr) >> 18) | (*(sptr - 1) << 14)) & - ((*(sptr) >> 17) | (*(sptr - 1) << 15)) & - ((*(sptr) >> 16) | (*(sptr - 1) << 16)) & - ((*(sptr) >> 15) | (*(sptr - 1) << 17)) & - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) & - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) & - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) & - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) & - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) & - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) & - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) & - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) & - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) & - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) & - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) & - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) & - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) & - ((*(sptr) << 15) | (*(sptr + 1) >> 17)) & - ((*(sptr) << 16) | (*(sptr + 1) >> 16)) & - ((*(sptr) << 17) | (*(sptr + 1) >> 15)) & - ((*(sptr) << 18) | (*(sptr + 1) >> 14)) & - ((*(sptr) << 19) | (*(sptr + 1) >> 13)); - } - } -} - -static void -fdilate_1_21(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 20) | (*(sptr + 1) >> 12)) | - ((*(sptr) << 19) | (*(sptr + 1) >> 13)) | - ((*(sptr) << 18) | (*(sptr + 1) >> 14)) | - ((*(sptr) << 17) | (*(sptr + 1) >> 15)) | - ((*(sptr) << 16) | (*(sptr + 1) >> 16)) | - ((*(sptr) << 15) | (*(sptr + 1) >> 17)) | - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) | - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) | - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) | - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) | - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) | - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) | - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) | - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) | - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) | - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) | - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) | - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) | - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) | - ((*(sptr) >> 15) | (*(sptr - 1) << 17)) | - ((*(sptr) >> 16) | (*(sptr - 1) << 16)) | - ((*(sptr) >> 17) | (*(sptr - 1) << 15)) | - ((*(sptr) >> 18) | (*(sptr - 1) << 14)) | - ((*(sptr) >> 19) | (*(sptr - 1) << 13)) | - ((*(sptr) >> 20) | (*(sptr - 1) << 12)); - } - } -} - -static void -ferode_1_21(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 20) | (*(sptr - 1) << 12)) & - ((*(sptr) >> 19) | (*(sptr - 1) << 13)) & - ((*(sptr) >> 18) | (*(sptr - 1) << 14)) & - ((*(sptr) >> 17) | (*(sptr - 1) << 15)) & - ((*(sptr) >> 16) | (*(sptr - 1) << 16)) & - ((*(sptr) >> 15) | (*(sptr - 1) << 17)) & - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) & - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) & - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) & - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) & - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) & - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) & - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) & - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) & - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) & - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) & - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) & - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) & - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) & - ((*(sptr) << 15) | (*(sptr + 1) >> 17)) & - ((*(sptr) << 16) | (*(sptr + 1) >> 16)) & - ((*(sptr) << 17) | (*(sptr + 1) >> 15)) & - ((*(sptr) << 18) | (*(sptr + 1) >> 14)) & - ((*(sptr) << 19) | (*(sptr + 1) >> 13)) & - ((*(sptr) << 20) | (*(sptr + 1) >> 12)); - } - } -} - -static void -fdilate_1_22(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 22) | (*(sptr + 1) >> 10)) | - ((*(sptr) << 21) | (*(sptr + 1) >> 11)) | - ((*(sptr) << 20) | (*(sptr + 1) >> 12)) | - ((*(sptr) << 19) | (*(sptr + 1) >> 13)) | - ((*(sptr) << 18) | (*(sptr + 1) >> 14)) | - ((*(sptr) << 17) | (*(sptr + 1) >> 15)) | - ((*(sptr) << 16) | (*(sptr + 1) >> 16)) | - ((*(sptr) << 15) | (*(sptr + 1) >> 17)) | - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) | - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) | - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) | - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) | - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) | - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) | - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) | - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) | - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) | - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) | - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) | - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) | - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) | - ((*(sptr) >> 15) | (*(sptr - 1) << 17)) | - ((*(sptr) >> 16) | (*(sptr - 1) << 16)) | - ((*(sptr) >> 17) | (*(sptr - 1) << 15)) | - ((*(sptr) >> 18) | (*(sptr - 1) << 14)) | - ((*(sptr) >> 19) | (*(sptr - 1) << 13)) | - ((*(sptr) >> 20) | (*(sptr - 1) << 12)) | - ((*(sptr) >> 21) | (*(sptr - 1) << 11)) | - ((*(sptr) >> 22) | (*(sptr - 1) << 10)); - } - } -} - -static void -ferode_1_22(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 22) | (*(sptr - 1) << 10)) & - ((*(sptr) >> 21) | (*(sptr - 1) << 11)) & - ((*(sptr) >> 20) | (*(sptr - 1) << 12)) & - ((*(sptr) >> 19) | (*(sptr - 1) << 13)) & - ((*(sptr) >> 18) | (*(sptr - 1) << 14)) & - ((*(sptr) >> 17) | (*(sptr - 1) << 15)) & - ((*(sptr) >> 16) | (*(sptr - 1) << 16)) & - ((*(sptr) >> 15) | (*(sptr - 1) << 17)) & - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) & - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) & - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) & - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) & - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) & - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) & - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) & - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) & - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) & - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) & - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) & - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) & - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) & - ((*(sptr) << 15) | (*(sptr + 1) >> 17)) & - ((*(sptr) << 16) | (*(sptr + 1) >> 16)) & - ((*(sptr) << 17) | (*(sptr + 1) >> 15)) & - ((*(sptr) << 18) | (*(sptr + 1) >> 14)) & - ((*(sptr) << 19) | (*(sptr + 1) >> 13)) & - ((*(sptr) << 20) | (*(sptr + 1) >> 12)) & - ((*(sptr) << 21) | (*(sptr + 1) >> 11)) & - ((*(sptr) << 22) | (*(sptr + 1) >> 10)); - } - } -} - -static void -fdilate_1_23(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 25) | (*(sptr + 1) >> 7)) | - ((*(sptr) << 24) | (*(sptr + 1) >> 8)) | - ((*(sptr) << 23) | (*(sptr + 1) >> 9)) | - ((*(sptr) << 22) | (*(sptr + 1) >> 10)) | - ((*(sptr) << 21) | (*(sptr + 1) >> 11)) | - ((*(sptr) << 20) | (*(sptr + 1) >> 12)) | - ((*(sptr) << 19) | (*(sptr + 1) >> 13)) | - ((*(sptr) << 18) | (*(sptr + 1) >> 14)) | - ((*(sptr) << 17) | (*(sptr + 1) >> 15)) | - ((*(sptr) << 16) | (*(sptr + 1) >> 16)) | - ((*(sptr) << 15) | (*(sptr + 1) >> 17)) | - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) | - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) | - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) | - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) | - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) | - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) | - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) | - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) | - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) | - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) | - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) | - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) | - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) | - ((*(sptr) >> 15) | (*(sptr - 1) << 17)) | - ((*(sptr) >> 16) | (*(sptr - 1) << 16)) | - ((*(sptr) >> 17) | (*(sptr - 1) << 15)) | - ((*(sptr) >> 18) | (*(sptr - 1) << 14)) | - ((*(sptr) >> 19) | (*(sptr - 1) << 13)) | - ((*(sptr) >> 20) | (*(sptr - 1) << 12)) | - ((*(sptr) >> 21) | (*(sptr - 1) << 11)) | - ((*(sptr) >> 22) | (*(sptr - 1) << 10)) | - ((*(sptr) >> 23) | (*(sptr - 1) << 9)) | - ((*(sptr) >> 24) | (*(sptr - 1) << 8)); - } - } -} - -static void -ferode_1_23(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 25) | (*(sptr - 1) << 7)) & - ((*(sptr) >> 24) | (*(sptr - 1) << 8)) & - ((*(sptr) >> 23) | (*(sptr - 1) << 9)) & - ((*(sptr) >> 22) | (*(sptr - 1) << 10)) & - ((*(sptr) >> 21) | (*(sptr - 1) << 11)) & - ((*(sptr) >> 20) | (*(sptr - 1) << 12)) & - ((*(sptr) >> 19) | (*(sptr - 1) << 13)) & - ((*(sptr) >> 18) | (*(sptr - 1) << 14)) & - ((*(sptr) >> 17) | (*(sptr - 1) << 15)) & - ((*(sptr) >> 16) | (*(sptr - 1) << 16)) & - ((*(sptr) >> 15) | (*(sptr - 1) << 17)) & - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) & - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) & - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) & - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) & - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) & - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) & - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) & - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) & - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) & - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) & - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) & - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) & - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) & - ((*(sptr) << 15) | (*(sptr + 1) >> 17)) & - ((*(sptr) << 16) | (*(sptr + 1) >> 16)) & - ((*(sptr) << 17) | (*(sptr + 1) >> 15)) & - ((*(sptr) << 18) | (*(sptr + 1) >> 14)) & - ((*(sptr) << 19) | (*(sptr + 1) >> 13)) & - ((*(sptr) << 20) | (*(sptr + 1) >> 12)) & - ((*(sptr) << 21) | (*(sptr + 1) >> 11)) & - ((*(sptr) << 22) | (*(sptr + 1) >> 10)) & - ((*(sptr) << 23) | (*(sptr + 1) >> 9)) & - ((*(sptr) << 24) | (*(sptr + 1) >> 8)); - } - } -} - -static void -fdilate_1_24(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 25) | (*(sptr + 1) >> 7)) | - ((*(sptr) << 24) | (*(sptr + 1) >> 8)) | - ((*(sptr) << 23) | (*(sptr + 1) >> 9)) | - ((*(sptr) << 22) | (*(sptr + 1) >> 10)) | - ((*(sptr) << 21) | (*(sptr + 1) >> 11)) | - ((*(sptr) << 20) | (*(sptr + 1) >> 12)) | - ((*(sptr) << 19) | (*(sptr + 1) >> 13)) | - ((*(sptr) << 18) | (*(sptr + 1) >> 14)) | - ((*(sptr) << 17) | (*(sptr + 1) >> 15)) | - ((*(sptr) << 16) | (*(sptr + 1) >> 16)) | - ((*(sptr) << 15) | (*(sptr + 1) >> 17)) | - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) | - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) | - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) | - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) | - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) | - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) | - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) | - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) | - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) | - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) | - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) | - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) | - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) | - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) | - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) | - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) | - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) | - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) | - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) | - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) | - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) | - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) | - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) | - ((*(sptr) >> 15) | (*(sptr - 1) << 17)) | - ((*(sptr) >> 16) | (*(sptr - 1) << 16)) | - ((*(sptr) >> 17) | (*(sptr - 1) << 15)) | - ((*(sptr) >> 18) | (*(sptr - 1) << 14)) | - ((*(sptr) >> 19) | (*(sptr - 1) << 13)) | - ((*(sptr) >> 20) | (*(sptr - 1) << 12)) | - ((*(sptr) >> 21) | (*(sptr - 1) << 11)) | - ((*(sptr) >> 22) | (*(sptr - 1) << 10)) | - ((*(sptr) >> 23) | (*(sptr - 1) << 9)) | - ((*(sptr) >> 24) | (*(sptr - 1) << 8)) | - ((*(sptr) >> 25) | (*(sptr - 1) << 7)); - } - } -} - -static void -ferode_1_24(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 25) | (*(sptr - 1) << 7)) & - ((*(sptr) >> 24) | (*(sptr - 1) << 8)) & - ((*(sptr) >> 23) | (*(sptr - 1) << 9)) & - ((*(sptr) >> 22) | (*(sptr - 1) << 10)) & - ((*(sptr) >> 21) | (*(sptr - 1) << 11)) & - ((*(sptr) >> 20) | (*(sptr - 1) << 12)) & - ((*(sptr) >> 19) | (*(sptr - 1) << 13)) & - ((*(sptr) >> 18) | (*(sptr - 1) << 14)) & - ((*(sptr) >> 17) | (*(sptr - 1) << 15)) & - ((*(sptr) >> 16) | (*(sptr - 1) << 16)) & - ((*(sptr) >> 15) | (*(sptr - 1) << 17)) & - ((*(sptr) >> 14) | (*(sptr - 1) << 18)) & - ((*(sptr) >> 13) | (*(sptr - 1) << 19)) & - ((*(sptr) >> 12) | (*(sptr - 1) << 20)) & - ((*(sptr) >> 11) | (*(sptr - 1) << 21)) & - ((*(sptr) >> 10) | (*(sptr - 1) << 22)) & - ((*(sptr) >> 9) | (*(sptr - 1) << 23)) & - ((*(sptr) >> 8) | (*(sptr - 1) << 24)) & - ((*(sptr) >> 7) | (*(sptr - 1) << 25)) & - ((*(sptr) >> 6) | (*(sptr - 1) << 26)) & - ((*(sptr) >> 5) | (*(sptr - 1) << 27)) & - ((*(sptr) >> 4) | (*(sptr - 1) << 28)) & - ((*(sptr) >> 3) | (*(sptr - 1) << 29)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr) << 3) | (*(sptr + 1) >> 29)) & - ((*(sptr) << 4) | (*(sptr + 1) >> 28)) & - ((*(sptr) << 5) | (*(sptr + 1) >> 27)) & - ((*(sptr) << 6) | (*(sptr + 1) >> 26)) & - ((*(sptr) << 7) | (*(sptr + 1) >> 25)) & - ((*(sptr) << 8) | (*(sptr + 1) >> 24)) & - ((*(sptr) << 9) | (*(sptr + 1) >> 23)) & - ((*(sptr) << 10) | (*(sptr + 1) >> 22)) & - ((*(sptr) << 11) | (*(sptr + 1) >> 21)) & - ((*(sptr) << 12) | (*(sptr + 1) >> 20)) & - ((*(sptr) << 13) | (*(sptr + 1) >> 19)) & - ((*(sptr) << 14) | (*(sptr + 1) >> 18)) & - ((*(sptr) << 15) | (*(sptr + 1) >> 17)) & - ((*(sptr) << 16) | (*(sptr + 1) >> 16)) & - ((*(sptr) << 17) | (*(sptr + 1) >> 15)) & - ((*(sptr) << 18) | (*(sptr + 1) >> 14)) & - ((*(sptr) << 19) | (*(sptr + 1) >> 13)) & - ((*(sptr) << 20) | (*(sptr + 1) >> 12)) & - ((*(sptr) << 21) | (*(sptr + 1) >> 11)) & - ((*(sptr) << 22) | (*(sptr + 1) >> 10)) & - ((*(sptr) << 23) | (*(sptr + 1) >> 9)) & - ((*(sptr) << 24) | (*(sptr + 1) >> 8)) & - ((*(sptr) << 25) | (*(sptr + 1) >> 7)); - } - } -} - -static void -fdilate_1_25(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls)) | - (*sptr); - } - } -} - -static void -ferode_1_25(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls)) & - (*sptr); - } - } -} - -static void -fdilate_1_26(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)); - } - } -} - -static void -ferode_1_26(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)); - } - } -} - -static void -fdilate_1_27(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)); - } - } -} - -static void -ferode_1_27(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)); - } - } -} - -static void -fdilate_1_28(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)); - } - } -} - -static void -ferode_1_28(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)); - } - } -} - -static void -fdilate_1_29(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)); - } - } -} - -static void -ferode_1_29(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)); - } - } -} - -static void -fdilate_1_30(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)); - } - } -} - -static void -ferode_1_30(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)); - } - } -} - -static void -fdilate_1_31(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)); - } - } -} - -static void -ferode_1_31(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)); - } - } -} - -static void -fdilate_1_32(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)); - } - } -} - -static void -ferode_1_32(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)); - } - } -} - -static void -fdilate_1_33(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)); - } - } -} - -static void -ferode_1_33(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)); - } - } -} - -static void -fdilate_1_34(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)); - } - } -} - -static void -ferode_1_34(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)); - } - } -} - -static void -fdilate_1_35(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls6)) | - (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)); - } - } -} - -static void -ferode_1_35(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls6)) & - (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)); - } - } -} - -static void -fdilate_1_36(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls6)) | - (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)) | - (*(sptr - wpls6)); - } - } -} - -static void -ferode_1_36(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls6)) & - (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)) & - (*(sptr + wpls6)); - } - } -} - -static void -fdilate_1_37(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls7)) | - (*(sptr + wpls6)) | - (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)) | - (*(sptr - wpls6)); - } - } -} - -static void -ferode_1_37(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls7)) & - (*(sptr - wpls6)) & - (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)) & - (*(sptr + wpls6)); - } - } -} - -static void -fdilate_1_38(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls7)) | - (*(sptr + wpls6)) | - (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)) | - (*(sptr - wpls6)) | - (*(sptr - wpls7)); - } - } -} - -static void -ferode_1_38(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls7)) & - (*(sptr - wpls6)) & - (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)) & - (*(sptr + wpls6)) & - (*(sptr + wpls7)); - } - } -} - -static void -fdilate_1_39(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls10)) | - (*(sptr + wpls9)) | - (*(sptr + wpls8)) | - (*(sptr + wpls7)) | - (*(sptr + wpls6)) | - (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)) | - (*(sptr - wpls6)) | - (*(sptr - wpls7)) | - (*(sptr - wpls8)) | - (*(sptr - wpls9)); - } - } -} - -static void -ferode_1_39(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls10)) & - (*(sptr - wpls9)) & - (*(sptr - wpls8)) & - (*(sptr - wpls7)) & - (*(sptr - wpls6)) & - (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)) & - (*(sptr + wpls6)) & - (*(sptr + wpls7)) & - (*(sptr + wpls8)) & - (*(sptr + wpls9)); - } - } -} - -static void -fdilate_1_40(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls10)) | - (*(sptr + wpls9)) | - (*(sptr + wpls8)) | - (*(sptr + wpls7)) | - (*(sptr + wpls6)) | - (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)) | - (*(sptr - wpls6)) | - (*(sptr - wpls7)) | - (*(sptr - wpls8)) | - (*(sptr - wpls9)) | - (*(sptr - wpls10)); - } - } -} - -static void -ferode_1_40(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls10)) & - (*(sptr - wpls9)) & - (*(sptr - wpls8)) & - (*(sptr - wpls7)) & - (*(sptr - wpls6)) & - (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)) & - (*(sptr + wpls6)) & - (*(sptr + wpls7)) & - (*(sptr + wpls8)) & - (*(sptr + wpls9)) & - (*(sptr + wpls10)); - } - } -} - -static void -fdilate_1_41(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls12)) | - (*(sptr + wpls11)) | - (*(sptr + wpls10)) | - (*(sptr + wpls9)) | - (*(sptr + wpls8)) | - (*(sptr + wpls7)) | - (*(sptr + wpls6)) | - (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)) | - (*(sptr - wpls6)) | - (*(sptr - wpls7)) | - (*(sptr - wpls8)) | - (*(sptr - wpls9)) | - (*(sptr - wpls10)) | - (*(sptr - wpls11)) | - (*(sptr - wpls12)); - } - } -} - -static void -ferode_1_41(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls12)) & - (*(sptr - wpls11)) & - (*(sptr - wpls10)) & - (*(sptr - wpls9)) & - (*(sptr - wpls8)) & - (*(sptr - wpls7)) & - (*(sptr - wpls6)) & - (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)) & - (*(sptr + wpls6)) & - (*(sptr + wpls7)) & - (*(sptr + wpls8)) & - (*(sptr + wpls9)) & - (*(sptr + wpls10)) & - (*(sptr + wpls11)) & - (*(sptr + wpls12)); - } - } -} - -static void -fdilate_1_42(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls15)) | - (*(sptr + wpls14)) | - (*(sptr + wpls13)) | - (*(sptr + wpls12)) | - (*(sptr + wpls11)) | - (*(sptr + wpls10)) | - (*(sptr + wpls9)) | - (*(sptr + wpls8)) | - (*(sptr + wpls7)) | - (*(sptr + wpls6)) | - (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)) | - (*(sptr - wpls6)) | - (*(sptr - wpls7)) | - (*(sptr - wpls8)) | - (*(sptr - wpls9)) | - (*(sptr - wpls10)) | - (*(sptr - wpls11)) | - (*(sptr - wpls12)) | - (*(sptr - wpls13)) | - (*(sptr - wpls14)); - } - } -} - -static void -ferode_1_42(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls15)) & - (*(sptr - wpls14)) & - (*(sptr - wpls13)) & - (*(sptr - wpls12)) & - (*(sptr - wpls11)) & - (*(sptr - wpls10)) & - (*(sptr - wpls9)) & - (*(sptr - wpls8)) & - (*(sptr - wpls7)) & - (*(sptr - wpls6)) & - (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)) & - (*(sptr + wpls6)) & - (*(sptr + wpls7)) & - (*(sptr + wpls8)) & - (*(sptr + wpls9)) & - (*(sptr + wpls10)) & - (*(sptr + wpls11)) & - (*(sptr + wpls12)) & - (*(sptr + wpls13)) & - (*(sptr + wpls14)); - } - } -} - -static void -fdilate_1_43(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls15)) | - (*(sptr + wpls14)) | - (*(sptr + wpls13)) | - (*(sptr + wpls12)) | - (*(sptr + wpls11)) | - (*(sptr + wpls10)) | - (*(sptr + wpls9)) | - (*(sptr + wpls8)) | - (*(sptr + wpls7)) | - (*(sptr + wpls6)) | - (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)) | - (*(sptr - wpls6)) | - (*(sptr - wpls7)) | - (*(sptr - wpls8)) | - (*(sptr - wpls9)) | - (*(sptr - wpls10)) | - (*(sptr - wpls11)) | - (*(sptr - wpls12)) | - (*(sptr - wpls13)) | - (*(sptr - wpls14)) | - (*(sptr - wpls15)); - } - } -} - -static void -ferode_1_43(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls15)) & - (*(sptr - wpls14)) & - (*(sptr - wpls13)) & - (*(sptr - wpls12)) & - (*(sptr - wpls11)) & - (*(sptr - wpls10)) & - (*(sptr - wpls9)) & - (*(sptr - wpls8)) & - (*(sptr - wpls7)) & - (*(sptr - wpls6)) & - (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)) & - (*(sptr + wpls6)) & - (*(sptr + wpls7)) & - (*(sptr + wpls8)) & - (*(sptr + wpls9)) & - (*(sptr + wpls10)) & - (*(sptr + wpls11)) & - (*(sptr + wpls12)) & - (*(sptr + wpls13)) & - (*(sptr + wpls14)) & - (*(sptr + wpls15)); - } - } -} - -static void -fdilate_1_44(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15, wpls16; -l_int32 wpls17; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - wpls16 = 16 * wpls; - wpls17 = 17 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls17)) | - (*(sptr + wpls16)) | - (*(sptr + wpls15)) | - (*(sptr + wpls14)) | - (*(sptr + wpls13)) | - (*(sptr + wpls12)) | - (*(sptr + wpls11)) | - (*(sptr + wpls10)) | - (*(sptr + wpls9)) | - (*(sptr + wpls8)) | - (*(sptr + wpls7)) | - (*(sptr + wpls6)) | - (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)) | - (*(sptr - wpls6)) | - (*(sptr - wpls7)) | - (*(sptr - wpls8)) | - (*(sptr - wpls9)) | - (*(sptr - wpls10)) | - (*(sptr - wpls11)) | - (*(sptr - wpls12)) | - (*(sptr - wpls13)) | - (*(sptr - wpls14)) | - (*(sptr - wpls15)) | - (*(sptr - wpls16)) | - (*(sptr - wpls17)); - } - } -} - -static void -ferode_1_44(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15, wpls16; -l_int32 wpls17; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - wpls16 = 16 * wpls; - wpls17 = 17 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls17)) & - (*(sptr - wpls16)) & - (*(sptr - wpls15)) & - (*(sptr - wpls14)) & - (*(sptr - wpls13)) & - (*(sptr - wpls12)) & - (*(sptr - wpls11)) & - (*(sptr - wpls10)) & - (*(sptr - wpls9)) & - (*(sptr - wpls8)) & - (*(sptr - wpls7)) & - (*(sptr - wpls6)) & - (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)) & - (*(sptr + wpls6)) & - (*(sptr + wpls7)) & - (*(sptr + wpls8)) & - (*(sptr + wpls9)) & - (*(sptr + wpls10)) & - (*(sptr + wpls11)) & - (*(sptr + wpls12)) & - (*(sptr + wpls13)) & - (*(sptr + wpls14)) & - (*(sptr + wpls15)) & - (*(sptr + wpls16)) & - (*(sptr + wpls17)); - } - } -} - -static void -fdilate_1_45(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15, wpls16; -l_int32 wpls17, wpls18, wpls19, wpls20; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - wpls16 = 16 * wpls; - wpls17 = 17 * wpls; - wpls18 = 18 * wpls; - wpls19 = 19 * wpls; - wpls20 = 20 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls20)) | - (*(sptr + wpls19)) | - (*(sptr + wpls18)) | - (*(sptr + wpls17)) | - (*(sptr + wpls16)) | - (*(sptr + wpls15)) | - (*(sptr + wpls14)) | - (*(sptr + wpls13)) | - (*(sptr + wpls12)) | - (*(sptr + wpls11)) | - (*(sptr + wpls10)) | - (*(sptr + wpls9)) | - (*(sptr + wpls8)) | - (*(sptr + wpls7)) | - (*(sptr + wpls6)) | - (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)) | - (*(sptr - wpls6)) | - (*(sptr - wpls7)) | - (*(sptr - wpls8)) | - (*(sptr - wpls9)) | - (*(sptr - wpls10)) | - (*(sptr - wpls11)) | - (*(sptr - wpls12)) | - (*(sptr - wpls13)) | - (*(sptr - wpls14)) | - (*(sptr - wpls15)) | - (*(sptr - wpls16)) | - (*(sptr - wpls17)) | - (*(sptr - wpls18)) | - (*(sptr - wpls19)); - } - } -} - -static void -ferode_1_45(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15, wpls16; -l_int32 wpls17, wpls18, wpls19, wpls20; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - wpls16 = 16 * wpls; - wpls17 = 17 * wpls; - wpls18 = 18 * wpls; - wpls19 = 19 * wpls; - wpls20 = 20 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls20)) & - (*(sptr - wpls19)) & - (*(sptr - wpls18)) & - (*(sptr - wpls17)) & - (*(sptr - wpls16)) & - (*(sptr - wpls15)) & - (*(sptr - wpls14)) & - (*(sptr - wpls13)) & - (*(sptr - wpls12)) & - (*(sptr - wpls11)) & - (*(sptr - wpls10)) & - (*(sptr - wpls9)) & - (*(sptr - wpls8)) & - (*(sptr - wpls7)) & - (*(sptr - wpls6)) & - (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)) & - (*(sptr + wpls6)) & - (*(sptr + wpls7)) & - (*(sptr + wpls8)) & - (*(sptr + wpls9)) & - (*(sptr + wpls10)) & - (*(sptr + wpls11)) & - (*(sptr + wpls12)) & - (*(sptr + wpls13)) & - (*(sptr + wpls14)) & - (*(sptr + wpls15)) & - (*(sptr + wpls16)) & - (*(sptr + wpls17)) & - (*(sptr + wpls18)) & - (*(sptr + wpls19)); - } - } -} - -static void -fdilate_1_46(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15, wpls16; -l_int32 wpls17, wpls18, wpls19, wpls20; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - wpls16 = 16 * wpls; - wpls17 = 17 * wpls; - wpls18 = 18 * wpls; - wpls19 = 19 * wpls; - wpls20 = 20 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls20)) | - (*(sptr + wpls19)) | - (*(sptr + wpls18)) | - (*(sptr + wpls17)) | - (*(sptr + wpls16)) | - (*(sptr + wpls15)) | - (*(sptr + wpls14)) | - (*(sptr + wpls13)) | - (*(sptr + wpls12)) | - (*(sptr + wpls11)) | - (*(sptr + wpls10)) | - (*(sptr + wpls9)) | - (*(sptr + wpls8)) | - (*(sptr + wpls7)) | - (*(sptr + wpls6)) | - (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)) | - (*(sptr - wpls6)) | - (*(sptr - wpls7)) | - (*(sptr - wpls8)) | - (*(sptr - wpls9)) | - (*(sptr - wpls10)) | - (*(sptr - wpls11)) | - (*(sptr - wpls12)) | - (*(sptr - wpls13)) | - (*(sptr - wpls14)) | - (*(sptr - wpls15)) | - (*(sptr - wpls16)) | - (*(sptr - wpls17)) | - (*(sptr - wpls18)) | - (*(sptr - wpls19)) | - (*(sptr - wpls20)); - } - } -} - -static void -ferode_1_46(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15, wpls16; -l_int32 wpls17, wpls18, wpls19, wpls20; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - wpls16 = 16 * wpls; - wpls17 = 17 * wpls; - wpls18 = 18 * wpls; - wpls19 = 19 * wpls; - wpls20 = 20 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls20)) & - (*(sptr - wpls19)) & - (*(sptr - wpls18)) & - (*(sptr - wpls17)) & - (*(sptr - wpls16)) & - (*(sptr - wpls15)) & - (*(sptr - wpls14)) & - (*(sptr - wpls13)) & - (*(sptr - wpls12)) & - (*(sptr - wpls11)) & - (*(sptr - wpls10)) & - (*(sptr - wpls9)) & - (*(sptr - wpls8)) & - (*(sptr - wpls7)) & - (*(sptr - wpls6)) & - (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)) & - (*(sptr + wpls6)) & - (*(sptr + wpls7)) & - (*(sptr + wpls8)) & - (*(sptr + wpls9)) & - (*(sptr + wpls10)) & - (*(sptr + wpls11)) & - (*(sptr + wpls12)) & - (*(sptr + wpls13)) & - (*(sptr + wpls14)) & - (*(sptr + wpls15)) & - (*(sptr + wpls16)) & - (*(sptr + wpls17)) & - (*(sptr + wpls18)) & - (*(sptr + wpls19)) & - (*(sptr + wpls20)); - } - } -} - -static void -fdilate_1_47(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15, wpls16; -l_int32 wpls17, wpls18, wpls19, wpls20; -l_int32 wpls21, wpls22; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - wpls16 = 16 * wpls; - wpls17 = 17 * wpls; - wpls18 = 18 * wpls; - wpls19 = 19 * wpls; - wpls20 = 20 * wpls; - wpls21 = 21 * wpls; - wpls22 = 22 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls22)) | - (*(sptr + wpls21)) | - (*(sptr + wpls20)) | - (*(sptr + wpls19)) | - (*(sptr + wpls18)) | - (*(sptr + wpls17)) | - (*(sptr + wpls16)) | - (*(sptr + wpls15)) | - (*(sptr + wpls14)) | - (*(sptr + wpls13)) | - (*(sptr + wpls12)) | - (*(sptr + wpls11)) | - (*(sptr + wpls10)) | - (*(sptr + wpls9)) | - (*(sptr + wpls8)) | - (*(sptr + wpls7)) | - (*(sptr + wpls6)) | - (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)) | - (*(sptr - wpls6)) | - (*(sptr - wpls7)) | - (*(sptr - wpls8)) | - (*(sptr - wpls9)) | - (*(sptr - wpls10)) | - (*(sptr - wpls11)) | - (*(sptr - wpls12)) | - (*(sptr - wpls13)) | - (*(sptr - wpls14)) | - (*(sptr - wpls15)) | - (*(sptr - wpls16)) | - (*(sptr - wpls17)) | - (*(sptr - wpls18)) | - (*(sptr - wpls19)) | - (*(sptr - wpls20)) | - (*(sptr - wpls21)) | - (*(sptr - wpls22)); - } - } -} - -static void -ferode_1_47(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15, wpls16; -l_int32 wpls17, wpls18, wpls19, wpls20; -l_int32 wpls21, wpls22; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - wpls16 = 16 * wpls; - wpls17 = 17 * wpls; - wpls18 = 18 * wpls; - wpls19 = 19 * wpls; - wpls20 = 20 * wpls; - wpls21 = 21 * wpls; - wpls22 = 22 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls22)) & - (*(sptr - wpls21)) & - (*(sptr - wpls20)) & - (*(sptr - wpls19)) & - (*(sptr - wpls18)) & - (*(sptr - wpls17)) & - (*(sptr - wpls16)) & - (*(sptr - wpls15)) & - (*(sptr - wpls14)) & - (*(sptr - wpls13)) & - (*(sptr - wpls12)) & - (*(sptr - wpls11)) & - (*(sptr - wpls10)) & - (*(sptr - wpls9)) & - (*(sptr - wpls8)) & - (*(sptr - wpls7)) & - (*(sptr - wpls6)) & - (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)) & - (*(sptr + wpls6)) & - (*(sptr + wpls7)) & - (*(sptr + wpls8)) & - (*(sptr + wpls9)) & - (*(sptr + wpls10)) & - (*(sptr + wpls11)) & - (*(sptr + wpls12)) & - (*(sptr + wpls13)) & - (*(sptr + wpls14)) & - (*(sptr + wpls15)) & - (*(sptr + wpls16)) & - (*(sptr + wpls17)) & - (*(sptr + wpls18)) & - (*(sptr + wpls19)) & - (*(sptr + wpls20)) & - (*(sptr + wpls21)) & - (*(sptr + wpls22)); - } - } -} - -static void -fdilate_1_48(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15, wpls16; -l_int32 wpls17, wpls18, wpls19, wpls20; -l_int32 wpls21, wpls22, wpls23, wpls24; -l_int32 wpls25; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - wpls16 = 16 * wpls; - wpls17 = 17 * wpls; - wpls18 = 18 * wpls; - wpls19 = 19 * wpls; - wpls20 = 20 * wpls; - wpls21 = 21 * wpls; - wpls22 = 22 * wpls; - wpls23 = 23 * wpls; - wpls24 = 24 * wpls; - wpls25 = 25 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls25)) | - (*(sptr + wpls24)) | - (*(sptr + wpls23)) | - (*(sptr + wpls22)) | - (*(sptr + wpls21)) | - (*(sptr + wpls20)) | - (*(sptr + wpls19)) | - (*(sptr + wpls18)) | - (*(sptr + wpls17)) | - (*(sptr + wpls16)) | - (*(sptr + wpls15)) | - (*(sptr + wpls14)) | - (*(sptr + wpls13)) | - (*(sptr + wpls12)) | - (*(sptr + wpls11)) | - (*(sptr + wpls10)) | - (*(sptr + wpls9)) | - (*(sptr + wpls8)) | - (*(sptr + wpls7)) | - (*(sptr + wpls6)) | - (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)) | - (*(sptr - wpls6)) | - (*(sptr - wpls7)) | - (*(sptr - wpls8)) | - (*(sptr - wpls9)) | - (*(sptr - wpls10)) | - (*(sptr - wpls11)) | - (*(sptr - wpls12)) | - (*(sptr - wpls13)) | - (*(sptr - wpls14)) | - (*(sptr - wpls15)) | - (*(sptr - wpls16)) | - (*(sptr - wpls17)) | - (*(sptr - wpls18)) | - (*(sptr - wpls19)) | - (*(sptr - wpls20)) | - (*(sptr - wpls21)) | - (*(sptr - wpls22)) | - (*(sptr - wpls23)) | - (*(sptr - wpls24)); - } - } -} - -static void -ferode_1_48(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15, wpls16; -l_int32 wpls17, wpls18, wpls19, wpls20; -l_int32 wpls21, wpls22, wpls23, wpls24; -l_int32 wpls25; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - wpls16 = 16 * wpls; - wpls17 = 17 * wpls; - wpls18 = 18 * wpls; - wpls19 = 19 * wpls; - wpls20 = 20 * wpls; - wpls21 = 21 * wpls; - wpls22 = 22 * wpls; - wpls23 = 23 * wpls; - wpls24 = 24 * wpls; - wpls25 = 25 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls25)) & - (*(sptr - wpls24)) & - (*(sptr - wpls23)) & - (*(sptr - wpls22)) & - (*(sptr - wpls21)) & - (*(sptr - wpls20)) & - (*(sptr - wpls19)) & - (*(sptr - wpls18)) & - (*(sptr - wpls17)) & - (*(sptr - wpls16)) & - (*(sptr - wpls15)) & - (*(sptr - wpls14)) & - (*(sptr - wpls13)) & - (*(sptr - wpls12)) & - (*(sptr - wpls11)) & - (*(sptr - wpls10)) & - (*(sptr - wpls9)) & - (*(sptr - wpls8)) & - (*(sptr - wpls7)) & - (*(sptr - wpls6)) & - (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)) & - (*(sptr + wpls6)) & - (*(sptr + wpls7)) & - (*(sptr + wpls8)) & - (*(sptr + wpls9)) & - (*(sptr + wpls10)) & - (*(sptr + wpls11)) & - (*(sptr + wpls12)) & - (*(sptr + wpls13)) & - (*(sptr + wpls14)) & - (*(sptr + wpls15)) & - (*(sptr + wpls16)) & - (*(sptr + wpls17)) & - (*(sptr + wpls18)) & - (*(sptr + wpls19)) & - (*(sptr + wpls20)) & - (*(sptr + wpls21)) & - (*(sptr + wpls22)) & - (*(sptr + wpls23)) & - (*(sptr + wpls24)); - } - } -} - -static void -fdilate_1_49(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15, wpls16; -l_int32 wpls17, wpls18, wpls19, wpls20; -l_int32 wpls21, wpls22, wpls23, wpls24; -l_int32 wpls25; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - wpls16 = 16 * wpls; - wpls17 = 17 * wpls; - wpls18 = 18 * wpls; - wpls19 = 19 * wpls; - wpls20 = 20 * wpls; - wpls21 = 21 * wpls; - wpls22 = 22 * wpls; - wpls23 = 23 * wpls; - wpls24 = 24 * wpls; - wpls25 = 25 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr + wpls25)) | - (*(sptr + wpls24)) | - (*(sptr + wpls23)) | - (*(sptr + wpls22)) | - (*(sptr + wpls21)) | - (*(sptr + wpls20)) | - (*(sptr + wpls19)) | - (*(sptr + wpls18)) | - (*(sptr + wpls17)) | - (*(sptr + wpls16)) | - (*(sptr + wpls15)) | - (*(sptr + wpls14)) | - (*(sptr + wpls13)) | - (*(sptr + wpls12)) | - (*(sptr + wpls11)) | - (*(sptr + wpls10)) | - (*(sptr + wpls9)) | - (*(sptr + wpls8)) | - (*(sptr + wpls7)) | - (*(sptr + wpls6)) | - (*(sptr + wpls5)) | - (*(sptr + wpls4)) | - (*(sptr + wpls3)) | - (*(sptr + wpls2)) | - (*(sptr + wpls)) | - (*sptr) | - (*(sptr - wpls)) | - (*(sptr - wpls2)) | - (*(sptr - wpls3)) | - (*(sptr - wpls4)) | - (*(sptr - wpls5)) | - (*(sptr - wpls6)) | - (*(sptr - wpls7)) | - (*(sptr - wpls8)) | - (*(sptr - wpls9)) | - (*(sptr - wpls10)) | - (*(sptr - wpls11)) | - (*(sptr - wpls12)) | - (*(sptr - wpls13)) | - (*(sptr - wpls14)) | - (*(sptr - wpls15)) | - (*(sptr - wpls16)) | - (*(sptr - wpls17)) | - (*(sptr - wpls18)) | - (*(sptr - wpls19)) | - (*(sptr - wpls20)) | - (*(sptr - wpls21)) | - (*(sptr - wpls22)) | - (*(sptr - wpls23)) | - (*(sptr - wpls24)) | - (*(sptr - wpls25)); - } - } -} - -static void -ferode_1_49(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2, wpls3, wpls4; -l_int32 wpls5, wpls6, wpls7, wpls8; -l_int32 wpls9, wpls10, wpls11, wpls12; -l_int32 wpls13, wpls14, wpls15, wpls16; -l_int32 wpls17, wpls18, wpls19, wpls20; -l_int32 wpls21, wpls22, wpls23, wpls24; -l_int32 wpls25; - - wpls2 = 2 * wpls; - wpls3 = 3 * wpls; - wpls4 = 4 * wpls; - wpls5 = 5 * wpls; - wpls6 = 6 * wpls; - wpls7 = 7 * wpls; - wpls8 = 8 * wpls; - wpls9 = 9 * wpls; - wpls10 = 10 * wpls; - wpls11 = 11 * wpls; - wpls12 = 12 * wpls; - wpls13 = 13 * wpls; - wpls14 = 14 * wpls; - wpls15 = 15 * wpls; - wpls16 = 16 * wpls; - wpls17 = 17 * wpls; - wpls18 = 18 * wpls; - wpls19 = 19 * wpls; - wpls20 = 20 * wpls; - wpls21 = 21 * wpls; - wpls22 = 22 * wpls; - wpls23 = 23 * wpls; - wpls24 = 24 * wpls; - wpls25 = 25 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*(sptr - wpls25)) & - (*(sptr - wpls24)) & - (*(sptr - wpls23)) & - (*(sptr - wpls22)) & - (*(sptr - wpls21)) & - (*(sptr - wpls20)) & - (*(sptr - wpls19)) & - (*(sptr - wpls18)) & - (*(sptr - wpls17)) & - (*(sptr - wpls16)) & - (*(sptr - wpls15)) & - (*(sptr - wpls14)) & - (*(sptr - wpls13)) & - (*(sptr - wpls12)) & - (*(sptr - wpls11)) & - (*(sptr - wpls10)) & - (*(sptr - wpls9)) & - (*(sptr - wpls8)) & - (*(sptr - wpls7)) & - (*(sptr - wpls6)) & - (*(sptr - wpls5)) & - (*(sptr - wpls4)) & - (*(sptr - wpls3)) & - (*(sptr - wpls2)) & - (*(sptr - wpls)) & - (*sptr) & - (*(sptr + wpls)) & - (*(sptr + wpls2)) & - (*(sptr + wpls3)) & - (*(sptr + wpls4)) & - (*(sptr + wpls5)) & - (*(sptr + wpls6)) & - (*(sptr + wpls7)) & - (*(sptr + wpls8)) & - (*(sptr + wpls9)) & - (*(sptr + wpls10)) & - (*(sptr + wpls11)) & - (*(sptr + wpls12)) & - (*(sptr + wpls13)) & - (*(sptr + wpls14)) & - (*(sptr + wpls15)) & - (*(sptr + wpls16)) & - (*(sptr + wpls17)) & - (*(sptr + wpls18)) & - (*(sptr + wpls19)) & - (*(sptr + wpls20)) & - (*(sptr + wpls21)) & - (*(sptr + wpls22)) & - (*(sptr + wpls23)) & - (*(sptr + wpls24)) & - (*(sptr + wpls25)); - } - } -} - -static void -fdilate_1_50(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr + wpls) << 1) | (*(sptr + wpls + 1) >> 31)) | - (*(sptr + wpls)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr); - } - } -} - -static void -ferode_1_50(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr - wpls) >> 1) | (*(sptr - wpls - 1) << 31)) & - (*(sptr - wpls)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr); - } - } -} - -static void -fdilate_1_51(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr + wpls) << 1) | (*(sptr + wpls + 1) >> 31)) | - (*(sptr + wpls)) | - ((*(sptr + wpls) >> 1) | (*(sptr + wpls - 1) << 31)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr - wpls) << 1) | (*(sptr - wpls + 1) >> 31)) | - (*(sptr - wpls)) | - ((*(sptr - wpls) >> 1) | (*(sptr - wpls - 1) << 31)); - } - } -} - -static void -ferode_1_51(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr - wpls) >> 1) | (*(sptr - wpls - 1) << 31)) & - (*(sptr - wpls)) & - ((*(sptr - wpls) << 1) | (*(sptr - wpls + 1) >> 31)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr + wpls) >> 1) | (*(sptr + wpls - 1) << 31)) & - (*(sptr + wpls)) & - ((*(sptr + wpls) << 1) | (*(sptr + wpls + 1) >> 31)); - } - } -} - -static void -fdilate_1_52(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr + wpls2) << 2) | (*(sptr + wpls2 + 1) >> 30)) | - ((*(sptr + wpls2) << 1) | (*(sptr + wpls2 + 1) >> 31)) | - (*(sptr + wpls2)) | - ((*(sptr + wpls2) >> 1) | (*(sptr + wpls2 - 1) << 31)) | - ((*(sptr + wpls) << 2) | (*(sptr + wpls + 1) >> 30)) | - ((*(sptr + wpls) << 1) | (*(sptr + wpls + 1) >> 31)) | - (*(sptr + wpls)) | - ((*(sptr + wpls) >> 1) | (*(sptr + wpls - 1) << 31)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr - wpls) << 2) | (*(sptr - wpls + 1) >> 30)) | - ((*(sptr - wpls) << 1) | (*(sptr - wpls + 1) >> 31)) | - (*(sptr - wpls)) | - ((*(sptr - wpls) >> 1) | (*(sptr - wpls - 1) << 31)); - } - } -} - -static void -ferode_1_52(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr - wpls2) >> 2) | (*(sptr - wpls2 - 1) << 30)) & - ((*(sptr - wpls2) >> 1) | (*(sptr - wpls2 - 1) << 31)) & - (*(sptr - wpls2)) & - ((*(sptr - wpls2) << 1) | (*(sptr - wpls2 + 1) >> 31)) & - ((*(sptr - wpls) >> 2) | (*(sptr - wpls - 1) << 30)) & - ((*(sptr - wpls) >> 1) | (*(sptr - wpls - 1) << 31)) & - (*(sptr - wpls)) & - ((*(sptr - wpls) << 1) | (*(sptr - wpls + 1) >> 31)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr + wpls) >> 2) | (*(sptr + wpls - 1) << 30)) & - ((*(sptr + wpls) >> 1) | (*(sptr + wpls - 1) << 31)) & - (*(sptr + wpls)) & - ((*(sptr + wpls) << 1) | (*(sptr + wpls + 1) >> 31)); - } - } -} - -static void -fdilate_1_53(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr + wpls2) << 2) | (*(sptr + wpls2 + 1) >> 30)) | - ((*(sptr + wpls2) << 1) | (*(sptr + wpls2 + 1) >> 31)) | - (*(sptr + wpls2)) | - ((*(sptr + wpls2) >> 1) | (*(sptr + wpls2 - 1) << 31)) | - ((*(sptr + wpls2) >> 2) | (*(sptr + wpls2 - 1) << 30)) | - ((*(sptr + wpls) << 2) | (*(sptr + wpls + 1) >> 30)) | - ((*(sptr + wpls) << 1) | (*(sptr + wpls + 1) >> 31)) | - (*(sptr + wpls)) | - ((*(sptr + wpls) >> 1) | (*(sptr + wpls - 1) << 31)) | - ((*(sptr + wpls) >> 2) | (*(sptr + wpls - 1) << 30)) | - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) | - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) | - (*sptr) | - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) | - ((*(sptr - wpls) << 2) | (*(sptr - wpls + 1) >> 30)) | - ((*(sptr - wpls) << 1) | (*(sptr - wpls + 1) >> 31)) | - (*(sptr - wpls)) | - ((*(sptr - wpls) >> 1) | (*(sptr - wpls - 1) << 31)) | - ((*(sptr - wpls) >> 2) | (*(sptr - wpls - 1) << 30)) | - ((*(sptr - wpls2) << 2) | (*(sptr - wpls2 + 1) >> 30)) | - ((*(sptr - wpls2) << 1) | (*(sptr - wpls2 + 1) >> 31)) | - (*(sptr - wpls2)) | - ((*(sptr - wpls2) >> 1) | (*(sptr - wpls2 - 1) << 31)) | - ((*(sptr - wpls2) >> 2) | (*(sptr - wpls2 - 1) << 30)); - } - } -} - -static void -ferode_1_53(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr - wpls2) >> 2) | (*(sptr - wpls2 - 1) << 30)) & - ((*(sptr - wpls2) >> 1) | (*(sptr - wpls2 - 1) << 31)) & - (*(sptr - wpls2)) & - ((*(sptr - wpls2) << 1) | (*(sptr - wpls2 + 1) >> 31)) & - ((*(sptr - wpls2) << 2) | (*(sptr - wpls2 + 1) >> 30)) & - ((*(sptr - wpls) >> 2) | (*(sptr - wpls - 1) << 30)) & - ((*(sptr - wpls) >> 1) | (*(sptr - wpls - 1) << 31)) & - (*(sptr - wpls)) & - ((*(sptr - wpls) << 1) | (*(sptr - wpls + 1) >> 31)) & - ((*(sptr - wpls) << 2) | (*(sptr - wpls + 1) >> 30)) & - ((*(sptr) >> 2) | (*(sptr - 1) << 30)) & - ((*(sptr) >> 1) | (*(sptr - 1) << 31)) & - (*sptr) & - ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - ((*(sptr) << 2) | (*(sptr + 1) >> 30)) & - ((*(sptr + wpls) >> 2) | (*(sptr + wpls - 1) << 30)) & - ((*(sptr + wpls) >> 1) | (*(sptr + wpls - 1) << 31)) & - (*(sptr + wpls)) & - ((*(sptr + wpls) << 1) | (*(sptr + wpls + 1) >> 31)) & - ((*(sptr + wpls) << 2) | (*(sptr + wpls + 1) >> 30)) & - ((*(sptr + wpls2) >> 2) | (*(sptr + wpls2 - 1) << 30)) & - ((*(sptr + wpls2) >> 1) | (*(sptr + wpls2 - 1) << 31)) & - (*(sptr + wpls2)) & - ((*(sptr + wpls2) << 1) | (*(sptr + wpls2 + 1) >> 31)) & - ((*(sptr + wpls2) << 2) | (*(sptr + wpls2 + 1) >> 30)); - } - } -} - -static void -fdilate_1_54(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) >> 1) | (*(sptr - 1) << 31)) | - (*(sptr - wpls)); - } - } -} - -static void -ferode_1_54(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr) << 1) | (*(sptr + 1) >> 31)) & - (*(sptr + wpls)); - } - } -} - -static void -fdilate_1_55(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*sptr) | - ((*(sptr - wpls) >> 1) | (*(sptr - wpls - 1) << 31)); - } - } -} - -static void -ferode_1_55(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; - - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = (*sptr) & - ((*(sptr + wpls) << 1) | (*(sptr + wpls + 1) >> 31)); - } - } -} - -static void -fdilate_1_56(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr + wpls2) >> 2) | (*(sptr + wpls2 - 1) << 30)) | - ((*(sptr + wpls) >> 1) | (*(sptr + wpls - 1) << 31)) | - (*sptr) | - ((*(sptr - wpls) << 1) | (*(sptr - wpls + 1) >> 31)) | - ((*(sptr - wpls2) << 2) | (*(sptr - wpls2 + 1) >> 30)); - } - } -} - -static void -ferode_1_56(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr - wpls2) << 2) | (*(sptr - wpls2 + 1) >> 30)) & - ((*(sptr - wpls) << 1) | (*(sptr - wpls + 1) >> 31)) & - (*sptr) & - ((*(sptr + wpls) >> 1) | (*(sptr + wpls - 1) << 31)) & - ((*(sptr + wpls2) >> 2) | (*(sptr + wpls2 - 1) << 30)); - } - } -} - -static void -fdilate_1_57(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr + wpls2) << 2) | (*(sptr + wpls2 + 1) >> 30)) | - ((*(sptr + wpls) << 1) | (*(sptr + wpls + 1) >> 31)) | - (*sptr) | - ((*(sptr - wpls) >> 1) | (*(sptr - wpls - 1) << 31)) | - ((*(sptr - wpls2) >> 2) | (*(sptr - wpls2 - 1) << 30)); - } - } -} - -static void -ferode_1_57(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls) -{ -l_int32 i; -l_int32 j, pwpls; -l_uint32 *sptr, *dptr; -l_int32 wpls2; - - wpls2 = 2 * wpls; - pwpls = (l_uint32)(w + 31) / 32; /* proper wpl of src */ - - for (i = 0; i < h; i++) { - sptr = datas + i * wpls; - dptr = datad + i * wpld; - for (j = 0; j < pwpls; j++, sptr++, dptr++) { - *dptr = ((*(sptr - wpls2) >> 2) | (*(sptr - wpls2 - 1) << 30)) & - ((*(sptr - wpls) >> 1) | (*(sptr - wpls - 1) << 31)) & - (*sptr) & - ((*(sptr + wpls) << 1) | (*(sptr + wpls + 1) >> 31)) & - ((*(sptr + wpls2) << 2) | (*(sptr + wpls2 + 1) >> 30)); - } - } -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fpix1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fpix1.c deleted file mode 100644 index f9691d89..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fpix1.c +++ /dev/null @@ -1,2342 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file fpix1.c - *
- *
- *    ---------------------------------------------------
- *    This file has these FPix, FPixa and DPix utilities:
- *         - creation and destruction
- *         - accessors
- *         - serialization and deserialization
- *    ---------------------------------------------------
- *
- *    FPix Create/copy/destroy
- *          FPIX          *fpixCreate()
- *          FPIX          *fpixCreateTemplate()
- *          FPIX          *fpixClone()
- *          FPIX          *fpixCopy()
- *          l_int32        fpixResizeImageData()
- *          void           fpixDestroy()
- *
- *    FPix accessors
- *          l_int32        fpixGetDimensions()
- *          l_int32        fpixSetDimensions()
- *          l_int32        fpixGetWpl()
- *          l_int32        fpixSetWpl()
- *          l_int32        fpixGetRefcount()
- *          l_int32        fpixChangeRefcount()
- *          l_int32        fpixGetResolution()
- *          l_int32        fpixSetResolution()
- *          l_int32        fpixCopyResolution()
- *          l_float32     *fpixGetData()
- *          l_int32        fpixSetData()
- *          l_int32        fpixGetPixel()
- *          l_int32        fpixSetPixel()
- *
- *    FPixa Create/copy/destroy
- *          FPIXA         *fpixaCreate()
- *          FPIXA         *fpixaCopy()
- *          void           fpixaDestroy()
- *
- *    FPixa addition
- *          l_int32        fpixaAddFPix()
- *          static l_int32 fpixaExtendArray()
- *          static l_int32 fpixaExtendArrayToSize()
- *
- *    FPixa accessors
- *          l_int32        fpixaGetCount()
- *          l_int32        fpixaChangeRefcount()
- *          FPIX          *fpixaGetFPix()
- *          l_int32        fpixaGetFPixDimensions()
- *          l_float32     *fpixaGetData()
- *          l_int32        fpixaGetPixel()
- *          l_int32        fpixaSetPixel()
- *
- *    DPix Create/copy/destroy
- *          DPIX          *dpixCreate()
- *          DPIX          *dpixCreateTemplate()
- *          DPIX          *dpixClone()
- *          DPIX          *dpixCopy()
- *          l_int32        dpixResizeImageData()
- *          void           dpixDestroy()
- *
- *    DPix accessors
- *          l_int32        dpixGetDimensions()
- *          l_int32        dpixSetDimensions()
- *          l_int32        dpixGetWpl()
- *          l_int32        dpixSetWpl()
- *          l_int32        dpixGetRefcount()
- *          l_int32        dpixChangeRefcount()
- *          l_int32        dpixGetResolution()
- *          l_int32        dpixSetResolution()
- *          l_int32        dpixCopyResolution()
- *          l_float64     *dpixGetData()
- *          l_int32        dpixSetData()
- *          l_int32        dpixGetPixel()
- *          l_int32        dpixSetPixel()
- *
- *    FPix serialized I/O
- *          FPIX          *fpixRead()
- *          FPIX          *fpixReadStream()
- *          FPIX          *fpixReadMem()
- *          l_int32        fpixWrite()
- *          l_int32        fpixWriteStream()
- *          l_int32        fpixWriteMem()
- *          FPIX          *fpixEndianByteSwap()
- *
- *    DPix serialized I/O
- *          DPIX          *dpixRead()
- *          DPIX          *dpixReadStream()
- *          DPIX          *dpixReadMem()
- *          l_int32        dpixWrite()
- *          l_int32        dpixWriteStream()
- *          l_int32        dpixWriteMem()
- *          DPIX          *dpixEndianByteSwap()
- *
- *    Print FPix (subsampled, for debugging)
- *          l_int32        fpixPrintStream()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Bounds on initial array size */ -static const l_uint32 MaxPtrArraySize = 100000; -static const l_int32 InitialPtrArraySize = 20; /*!< n'importe quoi */ - - /* Static functions */ -static l_int32 fpixaExtendArray(FPIXA *fpixa); -static l_int32 fpixaExtendArrayToSize(FPIXA *fpixa, l_int32 size); - -/*--------------------------------------------------------------------* - * FPix Create/copy/destroy * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixCreate() - * - * \param[in] width, height - * \return fpixd with data allocated and initialized to 0, or NULL on error - * - *
- * Notes:
- *      (1) Makes a FPix of specified size, with the data array
- *          allocated and initialized to 0.
- *      (2) The number of pixels must be less than 2^29.
- * 
- */ -FPIX * -fpixCreate(l_int32 width, - l_int32 height) -{ -l_float32 *data; -l_uint64 npix64; -FPIX *fpixd; - - PROCNAME("fpixCreate"); - - if (width <= 0) - return (FPIX *)ERROR_PTR("width must be > 0", procName, NULL); - if (height <= 0) - return (FPIX *)ERROR_PTR("height must be > 0", procName, NULL); - - /* Avoid overflow in malloc arg, malicious or otherwise */ - npix64 = (l_uint64)width * (l_uint64)height; /* # of 4-byte pixels */ - if (npix64 >= (1LL << 29)) { - L_ERROR("requested w = %d, h = %d\n", procName, width, height); - return (FPIX *)ERROR_PTR("requested bytes >= 2^31", procName, NULL); - } - - fpixd = (FPIX *)LEPT_CALLOC(1, sizeof(FPIX)); - fpixSetDimensions(fpixd, width, height); - fpixSetWpl(fpixd, width); /* 4-byte words */ - fpixd->refcount = 1; - - data = (l_float32 *)LEPT_CALLOC((size_t)width * height, sizeof(l_float32)); - if (!data) { - fpixDestroy(&fpixd); - return (FPIX *)ERROR_PTR("calloc fail for data", procName, NULL); - } - fpixSetData(fpixd, data); - return fpixd; -} - - -/*! - * \brief fpixCreateTemplate() - * - * \param[in] fpixs - * \return fpixd, or NULL on error - * - *
- * Notes:
- *      (1) Makes a FPix of the same size as the input FPix, with the
- *          data array allocated and initialized to 0.
- *      (2) Copies the resolution.
- * 
- */ -FPIX * -fpixCreateTemplate(FPIX *fpixs) -{ -l_int32 w, h; -FPIX *fpixd; - - PROCNAME("fpixCreateTemplate"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - - fpixGetDimensions(fpixs, &w, &h); - if ((fpixd = fpixCreate(w, h)) == NULL) - return (FPIX *)ERROR_PTR("fpixd not made", procName, NULL); - fpixCopyResolution(fpixd, fpixs); - return fpixd; -} - - -/*! - * \brief fpixClone() - * - * \param[in] fpix - * \return same fpix ptr, or NULL on error - * - *
- * Notes:
- *      (1) See pixClone() for definition and usage.
- * 
- */ -FPIX * -fpixClone(FPIX *fpix) -{ - PROCNAME("fpixClone"); - - if (!fpix) - return (FPIX *)ERROR_PTR("fpix not defined", procName, NULL); - fpixChangeRefcount(fpix, 1); - - return fpix; -} - - -/*! - * \brief fpixCopy() - * - * \param[in] fpixd [optional] can be null, or equal to fpixs, - * or different from fpixs - * \param[in] fpixs - * \return fpixd, or NULL on error - * - *
- * Notes:
- *      (1) There are three cases:
- *            (a) fpixd == null  (makes a new fpix; refcount = 1)
- *            (b) fpixd == fpixs  (no-op)
- *            (c) fpixd != fpixs  (data copy; no change in refcount)
- *          If the refcount of fpixd > 1, case (c) will side-effect
- *          these handles.
- *      (2) The general pattern of use is:
- *             fpixd = fpixCopy(fpixd, fpixs);
- *          This will work for all three cases.
- *          For clarity when the case is known, you can use:
- *            (a) fpixd = fpixCopy(NULL, fpixs);
- *            (c) fpixCopy(fpixd, fpixs);
- *      (3) For case (c), we check if fpixs and fpixd are the same size.
- *          If so, the data is copied directly.
- *          Otherwise, the data is reallocated to the correct size
- *          and the copy proceeds.  The refcount of fpixd is unchanged.
- *      (4) This operation, like all others that may involve a pre-existing
- *          fpixd, will side-effect any existing clones of fpixd.
- * 
- */ -FPIX * -fpixCopy(FPIX *fpixd, /* can be null */ - FPIX *fpixs) -{ -l_int32 w, h, bytes; -l_float32 *datas, *datad; - - PROCNAME("fpixCopy"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - if (fpixs == fpixd) - return fpixd; - - /* Total bytes in image data */ - fpixGetDimensions(fpixs, &w, &h); - bytes = 4 * w * h; - - /* If we're making a new fpix ... */ - if (!fpixd) { - if ((fpixd = fpixCreateTemplate(fpixs)) == NULL) - return (FPIX *)ERROR_PTR("fpixd not made", procName, NULL); - datas = fpixGetData(fpixs); - datad = fpixGetData(fpixd); - memcpy(datad, datas, bytes); - return fpixd; - } - - /* Reallocate image data if sizes are different */ - fpixResizeImageData(fpixd, fpixs); - - /* Copy data */ - fpixCopyResolution(fpixd, fpixs); - datas = fpixGetData(fpixs); - datad = fpixGetData(fpixd); - memcpy(datad, datas, bytes); - return fpixd; -} - - -/*! - * \brief fpixResizeImageData() - * - * \param[in] fpixd, fpixs - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If the data sizes differ, this destroys the existing
- *          data in fpixd and allocates a new, uninitialized, data array
- *          of the same size as the data in fpixs.  Otherwise, this
- *          doesn't do anything.
- * 
- */ -l_ok -fpixResizeImageData(FPIX *fpixd, - FPIX *fpixs) -{ -l_int32 ws, hs, wd, hd, bytes; -l_float32 *data; - - PROCNAME("fpixResizeImageData"); - - if (!fpixs) - return ERROR_INT("fpixs not defined", procName, 1); - if (!fpixd) - return ERROR_INT("fpixd not defined", procName, 1); - - fpixGetDimensions(fpixs, &ws, &hs); - fpixGetDimensions(fpixd, &wd, &hd); - if (ws == wd && hs == hd) /* nothing to do */ - return 0; - - fpixSetDimensions(fpixd, ws, hs); - fpixSetWpl(fpixd, ws); - bytes = 4 * ws * hs; - data = fpixGetData(fpixd); - if (data) LEPT_FREE(data); - if ((data = (l_float32 *)LEPT_MALLOC(bytes)) == NULL) - return ERROR_INT("LEPT_MALLOC fail for data", procName, 1); - fpixSetData(fpixd, data); - return 0; -} - - -/*! - * \brief fpixDestroy() - * - * \param[in,out] pfpix will be set to null before returning - * \return void - * - *
- * Notes:
- *      (1) Decrements the ref count and, if 0, destroys the fpix.
- *      (2) Always nulls the input ptr.
- * 
- */ -void -fpixDestroy(FPIX **pfpix) -{ -l_float32 *data; -FPIX *fpix; - - PROCNAME("fpixDestroy"); - - if (!pfpix) { - L_WARNING("ptr address is null!\n", procName); - return; - } - - if ((fpix = *pfpix) == NULL) - return; - - /* Decrement the ref count. If it is 0, destroy the fpix. */ - fpixChangeRefcount(fpix, -1); - if (fpixGetRefcount(fpix) <= 0) { - if ((data = fpixGetData(fpix)) != NULL) - LEPT_FREE(data); - LEPT_FREE(fpix); - } - - *pfpix = NULL; - return; -} - - -/*--------------------------------------------------------------------* - * FPix Accessors * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixGetDimensions() - * - * \param[in] fpix - * \param[out] pw, ph [optional] each can be null - * \return 0 if OK, 1 on error - */ -l_ok -fpixGetDimensions(FPIX *fpix, - l_int32 *pw, - l_int32 *ph) -{ - PROCNAME("fpixGetDimensions"); - - if (!pw && !ph) - return ERROR_INT("no return val requested", procName, 1); - if (pw) *pw = 0; - if (ph) *ph = 0; - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - if (pw) *pw = fpix->w; - if (ph) *ph = fpix->h; - return 0; -} - - -/*! - * \brief fpixSetDimensions() - * - * \param[in] fpix - * \param[in] w, h - * \return 0 if OK, 1 on error - */ -l_ok -fpixSetDimensions(FPIX *fpix, - l_int32 w, - l_int32 h) -{ - PROCNAME("fpixSetDimensions"); - - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - fpix->w = w; - fpix->h = h; - return 0; -} - - -/*! - * \brief fpixGetWpl() - * - * \param[in] fpix - * \return wpl, or UNDEF on error - */ -l_int32 -fpixGetWpl(FPIX *fpix) -{ - PROCNAME("fpixGetWpl"); - - if (!fpix) - return ERROR_INT("fpix not defined", procName, UNDEF); - return fpix->wpl; -} - - -/*! - * \brief fpixSetWpl() - * - * \param[in] fpix - * \param[in] wpl - * \return 0 if OK, 1 on error - */ -l_ok -fpixSetWpl(FPIX *fpix, - l_int32 wpl) -{ - PROCNAME("fpixSetWpl"); - - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - - fpix->wpl = wpl; - return 0; -} - - -/*! - * \brief fpixGetRefcount() - * - * \param[in] fpix - * \return refcount, or UNDEF on error - */ -l_int32 -fpixGetRefcount(FPIX *fpix) -{ - PROCNAME("fpixGetRefcount"); - - if (!fpix) - return ERROR_INT("fpix not defined", procName, UNDEF); - return fpix->refcount; -} - - -/*! - * \brief fpixChangeRefcount() - * - * \param[in] fpix - * \param[in] delta - * \return 0 if OK, 1 on error - */ -l_ok -fpixChangeRefcount(FPIX *fpix, - l_int32 delta) -{ - PROCNAME("fpixChangeRefcount"); - - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - - fpix->refcount += delta; - return 0; -} - - -/*! - * \brief fpixGetResolution() - * - * \param[in] fpix - * \param[out] pxres, pyres [optional] x and y resolution - * \return 0 if OK, 1 on error - */ -l_ok -fpixGetResolution(FPIX *fpix, - l_int32 *pxres, - l_int32 *pyres) -{ - PROCNAME("fpixGetResolution"); - - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - if (pxres) *pxres = fpix->xres; - if (pyres) *pyres = fpix->yres; - return 0; -} - - -/*! - * \brief fpixSetResolution() - * - * \param[in] fpix - * \param[in] xres, yres x and y resolution - * \return 0 if OK, 1 on error - */ -l_ok -fpixSetResolution(FPIX *fpix, - l_int32 xres, - l_int32 yres) -{ - PROCNAME("fpixSetResolution"); - - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - - fpix->xres = xres; - fpix->yres = yres; - return 0; -} - - -/*! - * \brief fpixCopyResolution() - * - * \param[in] fpixd, fpixs - * \return 0 if OK, 1 on error - */ -l_ok -fpixCopyResolution(FPIX *fpixd, - FPIX *fpixs) -{ -l_int32 xres, yres; - PROCNAME("fpixCopyResolution"); - - if (!fpixs || !fpixd) - return ERROR_INT("fpixs and fpixd not both defined", procName, 1); - - fpixGetResolution(fpixs, &xres, &yres); - fpixSetResolution(fpixd, xres, yres); - return 0; -} - - -/*! - * \brief fpixGetData() - * - * \param[in] fpix - * \return ptr to fpix data, or NULL on error - */ -l_float32 * -fpixGetData(FPIX *fpix) -{ - PROCNAME("fpixGetData"); - - if (!fpix) - return (l_float32 *)ERROR_PTR("fpix not defined", procName, NULL); - return fpix->data; -} - - -/*! - * \brief fpixSetData() - * - * \param[in] fpix - * \param[in] data - * \return 0 if OK, 1 on error - */ -l_ok -fpixSetData(FPIX *fpix, - l_float32 *data) -{ - PROCNAME("fpixSetData"); - - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - - fpix->data = data; - return 0; -} - - -/*! - * \brief fpixGetPixel() - * - * \param[in] fpix - * \param[in] x,y pixel coords - * \param[out] pval pixel value - * \return 0 if OK; 1 or 2 on error - * - * Notes: - * (1) If the point is outside the image, this returns an error (2), - * with 0.0 in %pval. To avoid spamming output, it fails silently. - */ -l_ok -fpixGetPixel(FPIX *fpix, - l_int32 x, - l_int32 y, - l_float32 *pval) -{ -l_int32 w, h; - - PROCNAME("fpixGetPixel"); - - if (!pval) - return ERROR_INT("pval not defined", procName, 1); - *pval = 0.0; - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - - fpixGetDimensions(fpix, &w, &h); - if (x < 0 || x >= w || y < 0 || y >= h) - return 2; - - *pval = *(fpix->data + y * w + x); - return 0; -} - - -/*! - * \brief fpixSetPixel() - * - * \param[in] fpix - * \param[in] x,y pixel coords - * \param[in] val pixel value - * \return 0 if OK; 1 or 2 on error - * - * Notes: - * (1) If the point is outside the image, this returns an error (2), - * with 0.0 in %pval. To avoid spamming output, it fails silently. - */ -l_ok -fpixSetPixel(FPIX *fpix, - l_int32 x, - l_int32 y, - l_float32 val) -{ -l_int32 w, h; - - PROCNAME("fpixSetPixel"); - - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - - fpixGetDimensions(fpix, &w, &h); - if (x < 0 || x >= w || y < 0 || y >= h) - return 2; - - *(fpix->data + y * w + x) = val; - return 0; -} - - -/*--------------------------------------------------------------------* - * FPixa Create/copy/destroy * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixaCreate() - * - * \param[in] n initial number of ptrs - * \return fpixa, or NULL on error - */ -FPIXA * -fpixaCreate(l_int32 n) -{ -FPIXA *fpixa; - - PROCNAME("fpixaCreate"); - - if (n <= 0 || n > MaxPtrArraySize) - n = InitialPtrArraySize; - - fpixa = (FPIXA *)LEPT_CALLOC(1, sizeof(FPIXA)); - fpixa->n = 0; - fpixa->nalloc = n; - fpixa->refcount = 1; - if ((fpixa->fpix = (FPIX **)LEPT_CALLOC(n, sizeof(FPIX *))) == NULL) { - fpixaDestroy(&fpixa); - return (FPIXA *)ERROR_PTR("fpixa ptrs not made", procName, NULL); - } - - return fpixa; -} - - -/*! - * \brief fpixaCopy() - * - * \param[in] fpixa - * \param[in] copyflag L_COPY, L_CLODE or L_COPY_CLONE - * \return new fpixa, or NULL on error - * - *
- * Notes:
- *      copyflag may be one of
- *        ~ L_COPY makes a new fpixa and copies each fpix
- *        ~ L_CLONE gives a new ref-counted handle to the input fpixa
- *        ~ L_COPY_CLONE makes a new fpixa with clones of all fpix
- * 
- */ -FPIXA * -fpixaCopy(FPIXA *fpixa, - l_int32 copyflag) -{ -l_int32 i; -FPIX *fpixc; -FPIXA *fpixac; - - PROCNAME("fpixaCopy"); - - if (!fpixa) - return (FPIXA *)ERROR_PTR("fpixa not defined", procName, NULL); - - if (copyflag == L_CLONE) { - fpixaChangeRefcount(fpixa, 1); - return fpixa; - } - - if (copyflag != L_COPY && copyflag != L_COPY_CLONE) - return (FPIXA *)ERROR_PTR("invalid copyflag", procName, NULL); - - if ((fpixac = fpixaCreate(fpixa->n)) == NULL) - return (FPIXA *)ERROR_PTR("fpixac not made", procName, NULL); - for (i = 0; i < fpixa->n; i++) { - if (copyflag == L_COPY) - fpixc = fpixaGetFPix(fpixa, i, L_COPY); - else /* copy-clone */ - fpixc = fpixaGetFPix(fpixa, i, L_CLONE); - fpixaAddFPix(fpixac, fpixc, L_INSERT); - } - - return fpixac; -} - - -/*! - * \brief fpixaDestroy() - * - * \param[in,out] pfpixa will be set to null before returning - * \return void - * - *
- * Notes:
- *      (1) Decrements the ref count and, if 0, destroys the fpixa.
- *      (2) Always nulls the input ptr.
- * 
- */ -void -fpixaDestroy(FPIXA **pfpixa) -{ -l_int32 i; -FPIXA *fpixa; - - PROCNAME("fpixaDestroy"); - - if (pfpixa == NULL) { - L_WARNING("ptr address is NULL!\n", procName); - return; - } - - if ((fpixa = *pfpixa) == NULL) - return; - - /* Decrement the refcount. If it is 0, destroy the pixa. */ - fpixaChangeRefcount(fpixa, -1); - if (fpixa->refcount <= 0) { - for (i = 0; i < fpixa->n; i++) - fpixDestroy(&fpixa->fpix[i]); - LEPT_FREE(fpixa->fpix); - LEPT_FREE(fpixa); - } - - *pfpixa = NULL; - return; -} - - -/*--------------------------------------------------------------------* - * FPixa addition * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixaAddFPix() - * - * \param[in] fpixa - * \param[in] fpix to be added - * \param[in] copyflag L_INSERT, L_COPY, L_CLONE - * \return 0 if OK; 1 on error - */ -l_ok -fpixaAddFPix(FPIXA *fpixa, - FPIX *fpix, - l_int32 copyflag) -{ -l_int32 n; -FPIX *fpixc; - - PROCNAME("fpixaAddFPix"); - - if (!fpixa) - return ERROR_INT("fpixa not defined", procName, 1); - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - - if (copyflag == L_INSERT) - fpixc = fpix; - else if (copyflag == L_COPY) - fpixc = fpixCopy(NULL, fpix); - else if (copyflag == L_CLONE) - fpixc = fpixClone(fpix); - else - return ERROR_INT("invalid copyflag", procName, 1); - if (!fpixc) - return ERROR_INT("fpixc not made", procName, 1); - - n = fpixaGetCount(fpixa); - if (n >= fpixa->nalloc) - fpixaExtendArray(fpixa); - fpixa->fpix[n] = fpixc; - fpixa->n++; - - return 0; -} - - -/*! - * \brief fpixaExtendArray() - * - * \param[in] fpixa - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Doubles the size of the fpixa ptr array.
- * 
- */ -static l_int32 -fpixaExtendArray(FPIXA *fpixa) -{ - PROCNAME("fpixaExtendArray"); - - if (!fpixa) - return ERROR_INT("fpixa not defined", procName, 1); - - return fpixaExtendArrayToSize(fpixa, 2 * fpixa->nalloc); -} - - -/*! - * \brief fpixaExtendArrayToSize() - * - * \param[in] fpixa - * \param[in] size new ptr array size - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) If necessary, reallocs new fpixa ptrs array to %size.
- * 
- */ -static l_int32 -fpixaExtendArrayToSize(FPIXA *fpixa, - l_int32 size) -{ - PROCNAME("fpixaExtendArrayToSize"); - - if (!fpixa) - return ERROR_INT("fpixa not defined", procName, 1); - - if (size > fpixa->nalloc) { - if ((fpixa->fpix = (FPIX **)reallocNew((void **)&fpixa->fpix, - sizeof(FPIX *) * fpixa->nalloc, - size * sizeof(FPIX *))) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - fpixa->nalloc = size; - } - return 0; -} - - -/*--------------------------------------------------------------------* - * FPixa accessors * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixaGetCount() - * - * \param[in] fpixa - * \return count, or 0 if no pixa - */ -l_int32 -fpixaGetCount(FPIXA *fpixa) -{ - PROCNAME("fpixaGetCount"); - - if (!fpixa) - return ERROR_INT("fpixa not defined", procName, 0); - - return fpixa->n; -} - - -/*! - * \brief fpixaChangeRefcount() - * - * \param[in] fpixa - * \param[in] delta - * \return 0 if OK, 1 on error - */ -l_ok -fpixaChangeRefcount(FPIXA *fpixa, - l_int32 delta) -{ - PROCNAME("fpixaChangeRefcount"); - - if (!fpixa) - return ERROR_INT("fpixa not defined", procName, 1); - - fpixa->refcount += delta; - return 0; -} - - -/*! - * \brief fpixaGetFPix() - * - * \param[in] fpixa - * \param[in] index to the index-th fpix - * \param[in] accesstype L_COPY or L_CLONE - * \return fpix, or NULL on error - */ -FPIX * -fpixaGetFPix(FPIXA *fpixa, - l_int32 index, - l_int32 accesstype) -{ - PROCNAME("fpixaGetFPix"); - - if (!fpixa) - return (FPIX *)ERROR_PTR("fpixa not defined", procName, NULL); - if (index < 0 || index >= fpixa->n) - return (FPIX *)ERROR_PTR("index not valid", procName, NULL); - - if (accesstype == L_COPY) - return fpixCopy(NULL, fpixa->fpix[index]); - else if (accesstype == L_CLONE) - return fpixClone(fpixa->fpix[index]); - else - return (FPIX *)ERROR_PTR("invalid accesstype", procName, NULL); -} - - -/*! - * \brief fpixaGetFPixDimensions() - * - * \param[in] fpixa - * \param[in] index to the index-th box - * \param[out] pw, ph [optional] each can be null - * \return 0 if OK, 1 on error - */ -l_ok -fpixaGetFPixDimensions(FPIXA *fpixa, - l_int32 index, - l_int32 *pw, - l_int32 *ph) -{ -FPIX *fpix; - - PROCNAME("fpixaGetFPixDimensions"); - - if (!pw && !ph) - return ERROR_INT("no return val requested", procName, 1); - if (pw) *pw = 0; - if (ph) *ph = 0; - if (!fpixa) - return ERROR_INT("fpixa not defined", procName, 1); - if (index < 0 || index >= fpixa->n) - return ERROR_INT("index not valid", procName, 1); - - if ((fpix = fpixaGetFPix(fpixa, index, L_CLONE)) == NULL) - return ERROR_INT("fpix not found!", procName, 1); - fpixGetDimensions(fpix, pw, ph); - fpixDestroy(&fpix); - return 0; -} - - -/*! - * \brief fpixaGetData() - * - * \param[in] fpixa - * \param[in] index into fpixa array - * \return data not a copy, or NULL on error - */ -l_float32 * -fpixaGetData(FPIXA *fpixa, - l_int32 index) -{ -l_int32 n; -l_float32 *data; -FPIX *fpix; - - PROCNAME("fpixaGetData"); - - if (!fpixa) - return (l_float32 *)ERROR_PTR("fpixa not defined", procName, NULL); - n = fpixaGetCount(fpixa); - if (index < 0 || index >= n) - return (l_float32 *)ERROR_PTR("invalid index", procName, NULL); - - fpix = fpixaGetFPix(fpixa, index, L_CLONE); - data = fpixGetData(fpix); - fpixDestroy(&fpix); - return data; -} - - -/*! - * \brief fpixaGetPixel() - * - * \param[in] fpixa - * \param[in] index into fpixa array - * \param[in] x,y pixel coords - * \param[out] pval pixel value - * \return 0 if OK; 1 on error - */ -l_ok -fpixaGetPixel(FPIXA *fpixa, - l_int32 index, - l_int32 x, - l_int32 y, - l_float32 *pval) -{ -l_int32 n, ret; -FPIX *fpix; - - PROCNAME("fpixaGetPixel"); - - if (!pval) - return ERROR_INT("pval not defined", procName, 1); - *pval = 0.0; - if (!fpixa) - return ERROR_INT("fpixa not defined", procName, 1); - n = fpixaGetCount(fpixa); - if (index < 0 || index >= n) - return ERROR_INT("invalid index into fpixa", procName, 1); - - fpix = fpixaGetFPix(fpixa, index, L_CLONE); - ret = fpixGetPixel(fpix, x, y, pval); - fpixDestroy(&fpix); - return ret; -} - - -/*! - * \brief fpixaSetPixel() - * - * \param[in] fpixa - * \param[in] index into fpixa array - * \param[in] x,y pixel coords - * \param[in] val pixel value - * \return 0 if OK; 1 on error - */ -l_ok -fpixaSetPixel(FPIXA *fpixa, - l_int32 index, - l_int32 x, - l_int32 y, - l_float32 val) -{ -l_int32 n, ret; -FPIX *fpix; - - PROCNAME("fpixaSetPixel"); - - if (!fpixa) - return ERROR_INT("fpixa not defined", procName, 1); - n = fpixaGetCount(fpixa); - if (index < 0 || index >= n) - return ERROR_INT("invalid index into fpixa", procName, 1); - - fpix = fpixaGetFPix(fpixa, index, L_CLONE); - ret = fpixSetPixel(fpix, x, y, val); - fpixDestroy(&fpix); - return ret; -} - - -/*--------------------------------------------------------------------* - * DPix Create/copy/destroy * - *--------------------------------------------------------------------*/ -/*! - * \brief dpixCreate() - * - * \param[in] width, height - * \return dpix with data allocated and initialized to 0, or NULL on error - * - *
- * Notes:
- *      (1) Makes a DPix of specified size, with the data array
- *          allocated and initialized to 0.
- *      (2) The number of pixels must be less than 2^28.
- * 
- */ -DPIX * -dpixCreate(l_int32 width, - l_int32 height) -{ -l_float64 *data; -l_uint64 npix64; -DPIX *dpix; - - PROCNAME("dpixCreate"); - - if (width <= 0) - return (DPIX *)ERROR_PTR("width must be > 0", procName, NULL); - if (height <= 0) - return (DPIX *)ERROR_PTR("height must be > 0", procName, NULL); - - /* Avoid overflow in malloc arg, malicious or otherwise */ - npix64 = (l_uint64)width * (l_uint64)height; /* # of 8 byte pixels */ - if (npix64 >= (1LL << 28)) { - L_ERROR("requested w = %d, h = %d\n", procName, width, height); - return (DPIX *)ERROR_PTR("requested bytes >= 2^31", procName, NULL); - } - - dpix = (DPIX *)LEPT_CALLOC(1, sizeof(DPIX)); - dpixSetDimensions(dpix, width, height); - dpixSetWpl(dpix, width); /* 8 byte words */ - dpix->refcount = 1; - - data = (l_float64 *)LEPT_CALLOC((size_t)width * height, sizeof(l_float64)); - if (!data) { - dpixDestroy(&dpix); - return (DPIX *)ERROR_PTR("calloc fail for data", procName, NULL); - } - dpixSetData(dpix, data); - return dpix; -} - - -/*! - * \brief dpixCreateTemplate() - * - * \param[in] dpixs - * \return dpixd, or NULL on error - * - *
- * Notes:
- *      (1) Makes a DPix of the same size as the input DPix, with the
- *          data array allocated and initialized to 0.
- *      (2) Copies the resolution.
- * 
- */ -DPIX * -dpixCreateTemplate(DPIX *dpixs) -{ -l_int32 w, h; -DPIX *dpixd; - - PROCNAME("dpixCreateTemplate"); - - if (!dpixs) - return (DPIX *)ERROR_PTR("dpixs not defined", procName, NULL); - - dpixGetDimensions(dpixs, &w, &h); - dpixd = dpixCreate(w, h); - dpixCopyResolution(dpixd, dpixs); - return dpixd; -} - - -/*! - * \brief dpixClone() - * - * \param[in] dpix - * \return same dpix ptr, or NULL on error - * - *
- * Notes:
- *      (1) See pixClone() for definition and usage.
- * 
- */ -DPIX * -dpixClone(DPIX *dpix) -{ - PROCNAME("dpixClone"); - - if (!dpix) - return (DPIX *)ERROR_PTR("dpix not defined", procName, NULL); - dpixChangeRefcount(dpix, 1); - - return dpix; -} - - -/*! - * \brief dpixCopy() - * - * \param[in] dpixd [optional] can be null, or equal to dpixs, - * or different from dpixs - * \param[in] dpixs - * \return dpixd, or NULL on error - * - *
- * Notes:
- *      (1) There are three cases:
- *            (a) dpixd == null  (makes a new dpix; refcount = 1)
- *            (b) dpixd == dpixs  (no-op)
- *            (c) dpixd != dpixs  (data copy; no change in refcount)
- *          If the refcount of dpixd > 1, case (c) will side-effect
- *          these handles.
- *      (2) The general pattern of use is:
- *             dpixd = dpixCopy(dpixd, dpixs);
- *          This will work for all three cases.
- *          For clarity when the case is known, you can use:
- *            (a) dpixd = dpixCopy(NULL, dpixs);
- *            (c) dpixCopy(dpixd, dpixs);
- *      (3) For case (c), we check if dpixs and dpixd are the same size.
- *          If so, the data is copied directly.
- *          Otherwise, the data is reallocated to the correct size
- *          and the copy proceeds.  The refcount of dpixd is unchanged.
- *      (4) This operation, like all others that may involve a pre-existing
- *          dpixd, will side-effect any existing clones of dpixd.
- * 
- */ -DPIX * -dpixCopy(DPIX *dpixd, /* can be null */ - DPIX *dpixs) -{ -l_int32 w, h, bytes; -l_float64 *datas, *datad; - - PROCNAME("dpixCopy"); - - if (!dpixs) - return (DPIX *)ERROR_PTR("dpixs not defined", procName, NULL); - if (dpixs == dpixd) - return dpixd; - - /* Total bytes in image data */ - dpixGetDimensions(dpixs, &w, &h); - bytes = 8 * w * h; - - /* If we're making a new dpix ... */ - if (!dpixd) { - if ((dpixd = dpixCreateTemplate(dpixs)) == NULL) - return (DPIX *)ERROR_PTR("dpixd not made", procName, NULL); - datas = dpixGetData(dpixs); - datad = dpixGetData(dpixd); - memcpy(datad, datas, bytes); - return dpixd; - } - - /* Reallocate image data if sizes are different */ - dpixResizeImageData(dpixd, dpixs); - - /* Copy data */ - dpixCopyResolution(dpixd, dpixs); - datas = dpixGetData(dpixs); - datad = dpixGetData(dpixd); - memcpy(datad, datas, bytes); - return dpixd; -} - - -/*! - * \brief dpixResizeImageData() - * - * \param[in] dpixd, dpixs - * \return 0 if OK, 1 on error - */ -l_ok -dpixResizeImageData(DPIX *dpixd, - DPIX *dpixs) -{ -l_int32 ws, hs, wd, hd, bytes; -l_float64 *data; - - PROCNAME("dpixResizeImageData"); - - if (!dpixs) - return ERROR_INT("dpixs not defined", procName, 1); - if (!dpixd) - return ERROR_INT("dpixd not defined", procName, 1); - - dpixGetDimensions(dpixs, &ws, &hs); - dpixGetDimensions(dpixd, &wd, &hd); - if (ws == wd && hs == hd) /* nothing to do */ - return 0; - - dpixSetDimensions(dpixd, ws, hs); - dpixSetWpl(dpixd, ws); /* 8 byte words */ - bytes = 8 * ws * hs; - data = dpixGetData(dpixd); - if (data) LEPT_FREE(data); - if ((data = (l_float64 *)LEPT_MALLOC(bytes)) == NULL) - return ERROR_INT("LEPT_MALLOC fail for data", procName, 1); - dpixSetData(dpixd, data); - return 0; -} - - -/*! - * \brief dpixDestroy() - * - * \param[in,out] pdpix will be set to null before returning - * \return void - * - *
- * Notes:
- *      (1) Decrements the ref count and, if 0, destroys the dpix.
- *      (2) Always nulls the input ptr.
- * 
- */ -void -dpixDestroy(DPIX **pdpix) -{ -l_float64 *data; -DPIX *dpix; - - PROCNAME("dpixDestroy"); - - if (!pdpix) { - L_WARNING("ptr address is null!\n", procName); - return; - } - - if ((dpix = *pdpix) == NULL) - return; - - /* Decrement the ref count. If it is 0, destroy the dpix. */ - dpixChangeRefcount(dpix, -1); - if (dpixGetRefcount(dpix) <= 0) { - if ((data = dpixGetData(dpix)) != NULL) - LEPT_FREE(data); - LEPT_FREE(dpix); - } - - *pdpix = NULL; - return; -} - - -/*--------------------------------------------------------------------* - * DPix Accessors * - *--------------------------------------------------------------------*/ -/*! - * \brief dpixGetDimensions() - * - * \param[in] dpix - * \param[out] pw, ph [optional] each can be null - * \return 0 if OK, 1 on error - */ -l_ok -dpixGetDimensions(DPIX *dpix, - l_int32 *pw, - l_int32 *ph) -{ - PROCNAME("dpixGetDimensions"); - - if (!pw && !ph) - return ERROR_INT("no return val requested", procName, 1); - if (pw) *pw = 0; - if (ph) *ph = 0; - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - if (pw) *pw = dpix->w; - if (ph) *ph = dpix->h; - return 0; -} - - -/*! - * \brief dpixSetDimensions() - * - * \param[in] dpix - * \param[in] w, h - * \return 0 if OK, 1 on error - */ -l_ok -dpixSetDimensions(DPIX *dpix, - l_int32 w, - l_int32 h) -{ - PROCNAME("dpixSetDimensions"); - - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - dpix->w = w; - dpix->h = h; - return 0; -} - - -/*! - * \brief dpixGetWpl() - * - * \param[in] dpix - * \return wpl, or UNDEF on error - */ -l_int32 -dpixGetWpl(DPIX *dpix) -{ - PROCNAME("dpixGetWpl"); - - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - return dpix->wpl; -} - - -/*! - * \brief dpixSetWpl() - * - * \param[in] dpix - * \param[in] wpl - * \return 0 if OK, 1 on error - */ -l_ok -dpixSetWpl(DPIX *dpix, - l_int32 wpl) -{ - PROCNAME("dpixSetWpl"); - - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - - dpix->wpl = wpl; - return 0; -} - - -/*! - * \brief dpixGetRefcount() - * - * \param[in] dpix - * \return refcount, or UNDEF on error - */ -l_int32 -dpixGetRefcount(DPIX *dpix) -{ - PROCNAME("dpixGetRefcount"); - - if (!dpix) - return ERROR_INT("dpix not defined", procName, UNDEF); - return dpix->refcount; -} - - -/*! - * \brief dpixChangeRefcount() - * - * \param[in] dpix - * \param[in] delta - * \return 0 if OK, 1 on error - */ -l_ok -dpixChangeRefcount(DPIX *dpix, - l_int32 delta) -{ - PROCNAME("dpixChangeRefcount"); - - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - - dpix->refcount += delta; - return 0; -} - - -/*! - * \brief dpixGetResolution() - * - * \param[in] dpix - * \param[out] pxres, pyres [optional] x and y resolution - * \return 0 if OK, 1 on error - */ -l_ok -dpixGetResolution(DPIX *dpix, - l_int32 *pxres, - l_int32 *pyres) -{ - PROCNAME("dpixGetResolution"); - - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - if (pxres) *pxres = dpix->xres; - if (pyres) *pyres = dpix->yres; - return 0; -} - - -/*! - * \brief dpixSetResolution() - * - * \param[in] dpix - * \param[in] xres, yres x and y resolution - * \return 0 if OK, 1 on error - */ -l_ok -dpixSetResolution(DPIX *dpix, - l_int32 xres, - l_int32 yres) -{ - PROCNAME("dpixSetResolution"); - - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - - dpix->xres = xres; - dpix->yres = yres; - return 0; -} - - -/*! - * \brief dpixCopyResolution() - * - * \param[in] dpixd, dpixs - * \return 0 if OK, 1 on error - */ -l_ok -dpixCopyResolution(DPIX *dpixd, - DPIX *dpixs) -{ -l_int32 xres, yres; - PROCNAME("dpixCopyResolution"); - - if (!dpixs || !dpixd) - return ERROR_INT("dpixs and dpixd not both defined", procName, 1); - - dpixGetResolution(dpixs, &xres, &yres); - dpixSetResolution(dpixd, xres, yres); - return 0; -} - - -/*! - * \brief dpixGetData() - * - * \param[in] dpix - * \return ptr to dpix data, or NULL on error - */ -l_float64 * -dpixGetData(DPIX *dpix) -{ - PROCNAME("dpixGetData"); - - if (!dpix) - return (l_float64 *)ERROR_PTR("dpix not defined", procName, NULL); - return dpix->data; -} - - -/*! - * \brief dpixSetData() - * - * \param[in] dpix - * \param[in] data - * \return 0 if OK, 1 on error - */ -l_ok -dpixSetData(DPIX *dpix, - l_float64 *data) -{ - PROCNAME("dpixSetData"); - - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - - dpix->data = data; - return 0; -} - - -/*! - * \brief dpixGetPixel() - * - * \param[in] dpix - * \param[in] x,y pixel coords - * \param[out] pval pixel value - * \return 0 if OK; 1 or 2 on error - * - * Notes: - * (1) If the point is outside the image, this returns an error (2), - * with 0.0 in %pval. To avoid spamming output, it fails silently. - */ -l_ok -dpixGetPixel(DPIX *dpix, - l_int32 x, - l_int32 y, - l_float64 *pval) -{ -l_int32 w, h; - - PROCNAME("dpixGetPixel"); - - if (!pval) - return ERROR_INT("pval not defined", procName, 1); - *pval = 0.0; - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - - dpixGetDimensions(dpix, &w, &h); - if (x < 0 || x >= w || y < 0 || y >= h) - return 2; - - *pval = *(dpix->data + y * w + x); - return 0; -} - - -/*! - * \brief dpixSetPixel() - * - * \param[in] dpix - * \param[in] x,y pixel coords - * \param[in] val pixel value - * \return 0 if OK; 1 or 2 on error - * - * Notes: - * (1) If the point is outside the image, this returns an error (2), - * with 0.0 in %pval. To avoid spamming output, it fails silently. - */ -l_ok -dpixSetPixel(DPIX *dpix, - l_int32 x, - l_int32 y, - l_float64 val) -{ -l_int32 w, h; - - PROCNAME("dpixSetPixel"); - - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - - dpixGetDimensions(dpix, &w, &h); - if (x < 0 || x >= w || y < 0 || y >= h) - return 2; - - *(dpix->data + y * w + x) = val; - return 0; -} - - -/*--------------------------------------------------------------------* - * FPix serialized I/O * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixRead() - * - * \param[in] filename - * \return fpix, or NULL on error - */ -FPIX * -fpixRead(const char *filename) -{ -FILE *fp; -FPIX *fpix; - - PROCNAME("fpixRead"); - - if (!filename) - return (FPIX *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (FPIX *)ERROR_PTR("stream not opened", procName, NULL); - fpix = fpixReadStream(fp); - fclose(fp); - if (!fpix) - return (FPIX *)ERROR_PTR("fpix not read", procName, NULL); - return fpix; -} - - -/*! - * \brief fpixReadStream() - * - * \param[in] fp file stream - * \return fpix, or NULL on error - */ -FPIX * -fpixReadStream(FILE *fp) -{ -char buf[256]; -l_int32 w, h, nbytes, xres, yres, version; -l_float32 *data; -FPIX *fpix; - - PROCNAME("fpixReadStream"); - - if (!fp) - return (FPIX *)ERROR_PTR("stream not defined", procName, NULL); - - if (fscanf(fp, "\nFPix Version %d\n", &version) != 1) - return (FPIX *)ERROR_PTR("not a fpix file", procName, NULL); - if (version != FPIX_VERSION_NUMBER) - return (FPIX *)ERROR_PTR("invalid fpix version", procName, NULL); - if (fscanf(fp, "w = %d, h = %d, nbytes = %d\n", &w, &h, &nbytes) != 3) - return (FPIX *)ERROR_PTR("read fail for data size", procName, NULL); - - /* Use fgets() and sscanf(); not fscanf(), for the last - * bit of header data before the float data. The reason is - * that fscanf throws away white space, and if the float data - * happens to begin with ascii character(s) that are white - * space, it will swallow them and all will be lost! */ - if (fgets(buf, sizeof(buf), fp) == NULL) - return (FPIX *)ERROR_PTR("fgets read fail", procName, NULL); - if (sscanf(buf, "xres = %d, yres = %d\n", &xres, &yres) != 2) - return (FPIX *)ERROR_PTR("read fail for xres, yres", procName, NULL); - - if ((fpix = fpixCreate(w, h)) == NULL) - return (FPIX *)ERROR_PTR("fpix not made", procName, NULL); - fpixSetResolution(fpix, xres, yres); - data = fpixGetData(fpix); - if (fread(data, 1, nbytes, fp) != nbytes) { - fpixDestroy(&fpix); - return (FPIX *)ERROR_PTR("read error for nbytes", procName, NULL); - } - fgetc(fp); /* ending nl */ - - /* Convert to little-endian if necessary */ - fpixEndianByteSwap(fpix, fpix); - return fpix; -} - - -/*! - * \brief fpixReadMem() - * - * \param[in] data of serialized fpix - * \param[in] size of data in bytes - * \return fpix, or NULL on error - */ -FPIX * -fpixReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -FPIX *fpix; - - PROCNAME("fpixReadMem"); - - if (!data) - return (FPIX *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (FPIX *)ERROR_PTR("stream not opened", procName, NULL); - - fpix = fpixReadStream(fp); - fclose(fp); - if (!fpix) L_ERROR("fpix not read\n", procName); - return fpix; -} - - -/*! - * \brief fpixWrite() - * - * \param[in] filename - * \param[in] fpix - * \return 0 if OK, 1 on error - */ -l_ok -fpixWrite(const char *filename, - FPIX *fpix) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("fpixWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "wb")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = fpixWriteStream(fp, fpix); - fclose(fp); - if (ret) - return ERROR_INT("fpix not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief fpixWriteStream() - * - * \param[in] fp file stream opened for "wb" - * \param[in] fpix - * \return 0 if OK, 1 on error - */ -l_ok -fpixWriteStream(FILE *fp, - FPIX *fpix) -{ -l_int32 w, h, xres, yres; -l_uint32 nbytes; -l_float32 *data; -FPIX *fpixt; - - PROCNAME("fpixWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - - /* Convert to little-endian if necessary */ - fpixt = fpixEndianByteSwap(NULL, fpix); - - fpixGetDimensions(fpixt, &w, &h); - data = fpixGetData(fpixt); - nbytes = sizeof(l_float32) * w * h; - fpixGetResolution(fpixt, &xres, &yres); - fprintf(fp, "\nFPix Version %d\n", FPIX_VERSION_NUMBER); - fprintf(fp, "w = %d, h = %d, nbytes = %u\n", w, h, nbytes); - fprintf(fp, "xres = %d, yres = %d\n", xres, yres); - fwrite(data, 1, nbytes, fp); - fprintf(fp, "\n"); - - fpixDestroy(&fpixt); - return 0; -} - - -/*! - * \brief fpixWriteMem() - * - * \param[out] pdata data of serialized fpix - * \param[out] psize size of returned data - * \param[in] fpix - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes a fpix in memory and puts the result in a buffer.
- * 
- */ -l_ok -fpixWriteMem(l_uint8 **pdata, - size_t *psize, - FPIX *fpix) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("fpixWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = fpixWriteStream(fp, fpix); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = fpixWriteStream(fp, fpix); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - -/*! - * \brief fpixEndianByteSwap() - * - * \param[in] fpixd can be equal to fpixs or NULL - * \param[in] fpixs - * \return fpixd always - * - *
- * Notes:
- *      (1) On big-endian hardware, this does byte-swapping on each of
- *          the 4-byte floats in the fpix data.  On little-endians,
- *          the data is unchanged.  This is used for serialization
- *          of fpix; the data is serialized in little-endian byte
- *          order because most hardware is little-endian.
- *      (2) The operation can be either in-place or, if fpixd == NULL,
- *          a new fpix is made.  If not in-place, caller must catch
- *          the returned pointer.
- * 
- */ -FPIX * -fpixEndianByteSwap(FPIX *fpixd, - FPIX *fpixs) -{ - PROCNAME("fpixEndianByteSwap"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, fpixd); - if (fpixd && (fpixs != fpixd)) - return (FPIX *)ERROR_PTR("fpixd != fpixs", procName, fpixd); - -#ifdef L_BIG_ENDIAN - { - l_uint32 *data; - l_int32 i, j, w, h; - l_uint32 word; - - fpixGetDimensions(fpixs, &w, &h); - fpixd = fpixCopy(fpixd, fpixs); /* no copy if fpixd == fpixs */ - - data = (l_uint32 *)fpixGetData(fpixd); - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++, data++) { - word = *data; - *data = (word >> 24) | - ((word >> 8) & 0x0000ff00) | - ((word << 8) & 0x00ff0000) | - (word << 24); - } - } - return fpixd; - } -#else /* L_LITTLE_ENDIAN */ - - if (fpixd) - return fpixd; /* no-op */ - else - return fpixClone(fpixs); - -#endif /* L_BIG_ENDIAN */ -} - - -/*--------------------------------------------------------------------* - * DPix serialized I/O * - *--------------------------------------------------------------------*/ -/*! - * \brief dpixRead() - * - * \param[in] filename - * \return dpix, or NULL on error - */ -DPIX * -dpixRead(const char *filename) -{ -FILE *fp; -DPIX *dpix; - - PROCNAME("dpixRead"); - - if (!filename) - return (DPIX *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (DPIX *)ERROR_PTR("stream not opened", procName, NULL); - dpix = dpixReadStream(fp); - fclose(fp); - if (!dpix) - return (DPIX *)ERROR_PTR("dpix not read", procName, NULL); - return dpix; -} - - -/*! - * \brief dpixReadStream() - * - * \param[in] fp file stream - * \return dpix, or NULL on error - */ -DPIX * -dpixReadStream(FILE *fp) -{ -char buf[256]; -l_int32 w, h, nbytes, version, xres, yres; -l_float64 *data; -DPIX *dpix; - - PROCNAME("dpixReadStream"); - - if (!fp) - return (DPIX *)ERROR_PTR("stream not defined", procName, NULL); - - if (fscanf(fp, "\nDPix Version %d\n", &version) != 1) - return (DPIX *)ERROR_PTR("not a dpix file", procName, NULL); - if (version != DPIX_VERSION_NUMBER) - return (DPIX *)ERROR_PTR("invalid dpix version", procName, NULL); - if (fscanf(fp, "w = %d, h = %d, nbytes = %d\n", &w, &h, &nbytes) != 3) - return (DPIX *)ERROR_PTR("read fail for data size", procName, NULL); - - /* Use fgets() and sscanf(); not fscanf(), for the last - * bit of header data before the float data. The reason is - * that fscanf throws away white space, and if the float data - * happens to begin with ascii character(s) that are white - * space, it will swallow them and all will be lost! */ - if (fgets(buf, sizeof(buf), fp) == NULL) - return (DPIX *)ERROR_PTR("fgets read fail", procName, NULL); - if (sscanf(buf, "xres = %d, yres = %d\n", &xres, &yres) != 2) - return (DPIX *)ERROR_PTR("read fail for xres, yres", procName, NULL); - - if ((dpix = dpixCreate(w, h)) == NULL) - return (DPIX *)ERROR_PTR("dpix not made", procName, NULL); - dpixSetResolution(dpix, xres, yres); - data = dpixGetData(dpix); - if (fread(data, 1, nbytes, fp) != nbytes) { - dpixDestroy(&dpix); - return (DPIX *)ERROR_PTR("read error for nbytes", procName, NULL); - } - fgetc(fp); /* ending nl */ - - /* Convert to little-endian if necessary */ - dpixEndianByteSwap(dpix, dpix); - return dpix; -} - - -/*! - * \brief dpixReadMem() - * - * \param[in] data of serialized dpix - * \param[in] size of data in bytes - * \return dpix, or NULL on error - */ -DPIX * -dpixReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -DPIX *dpix; - - PROCNAME("dpixReadMem"); - - if (!data) - return (DPIX *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (DPIX *)ERROR_PTR("stream not opened", procName, NULL); - - dpix = dpixReadStream(fp); - fclose(fp); - if (!dpix) L_ERROR("dpix not read\n", procName); - return dpix; -} - - -/*! - * \brief dpixWrite() - * - * \param[in] filename - * \param[in] dpix - * \return 0 if OK, 1 on error - */ -l_ok -dpixWrite(const char *filename, - DPIX *dpix) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("dpixWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "wb")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = dpixWriteStream(fp, dpix); - fclose(fp); - if (ret) - return ERROR_INT("dpix not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief dpixWriteStream() - * - * \param[in] fp file stream opened for "wb" - * \param[in] dpix - * \return 0 if OK, 1 on error - */ -l_ok -dpixWriteStream(FILE *fp, - DPIX *dpix) -{ -l_int32 w, h, xres, yres; -l_uint32 nbytes; -l_float64 *data; -DPIX *dpixt; - - PROCNAME("dpixWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - - /* Convert to little-endian if necessary */ - dpixt = dpixEndianByteSwap(NULL, dpix); - - dpixGetDimensions(dpixt, &w, &h); - dpixGetResolution(dpixt, &xres, &yres); - data = dpixGetData(dpixt); - nbytes = sizeof(l_float64) * w * h; - fprintf(fp, "\nDPix Version %d\n", DPIX_VERSION_NUMBER); - fprintf(fp, "w = %d, h = %d, nbytes = %u\n", w, h, nbytes); - fprintf(fp, "xres = %d, yres = %d\n", xres, yres); - fwrite(data, 1, nbytes, fp); - fprintf(fp, "\n"); - - dpixDestroy(&dpixt); - return 0; -} - - -/*! - * \brief dpixWriteMem() - * - * \param[out] pdata data of serialized dpix - * \param[out] psize size of returned data - * \param[in] dpix - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes a dpix in memory and puts the result in a buffer.
- * 
- */ -l_ok -dpixWriteMem(l_uint8 **pdata, - size_t *psize, - DPIX *dpix) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("dpixWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = dpixWriteStream(fp, dpix); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = dpixWriteStream(fp, dpix); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - -/*! - * \brief dpixEndianByteSwap() - * - * \param[in] dpixd can be equal to dpixs or NULL - * \param[in] dpixs - * \return dpixd always - * - *
- * Notes:
- *      (1) On big-endian hardware, this does byte-swapping on each of
- *          the 4-byte words in the dpix data.  On little-endians,
- *          the data is unchanged.  This is used for serialization
- *          of dpix; the data is serialized in little-endian byte
- *          order because most hardware is little-endian.
- *      (2) The operation can be either in-place or, if dpixd == NULL,
- *          a new dpix is made.  If not in-place, caller must catch
- *          the returned pointer.
- * 
- */ -DPIX * -dpixEndianByteSwap(DPIX *dpixd, - DPIX *dpixs) -{ - PROCNAME("dpixEndianByteSwap"); - - if (!dpixs) - return (DPIX *)ERROR_PTR("dpixs not defined", procName, dpixd); - if (dpixd && (dpixs != dpixd)) - return (DPIX *)ERROR_PTR("dpixd != dpixs", procName, dpixd); - -#ifdef L_BIG_ENDIAN - { - l_uint32 *data; - l_int32 i, j, w, h; - l_uint32 word; - - dpixGetDimensions(dpixs, &w, &h); - dpixd = dpixCopy(dpixd, dpixs); /* no copy if dpixd == dpixs */ - - data = (l_uint32 *)dpixGetData(dpixd); - for (i = 0; i < h; i++) { - for (j = 0; j < 2 * w; j++, data++) { - word = *data; - *data = (word >> 24) | - ((word >> 8) & 0x0000ff00) | - ((word << 8) & 0x00ff0000) | - (word << 24); - } - } - return dpixd; - } -#else /* L_LITTLE_ENDIAN */ - - if (dpixd) - return dpixd; /* no-op */ - else - return dpixClone(dpixs); - -#endif /* L_BIG_ENDIAN */ -} - - -/*--------------------------------------------------------------------* - * Print FPix (subsampled, for debugging) * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixPrintStream() - * - * \param[in] fp file stream - * \param[in] fpix - * \param[in] factor for subsampling - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Subsampled printout of fpix for debugging.
- * 
- */ -l_ok -fpixPrintStream(FILE *fp, - FPIX *fpix, - l_int32 factor) -{ -l_int32 i, j, w, h, count; -l_float32 val; - - PROCNAME("fpixPrintStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - if (factor < 1) - return ERROR_INT("sampling factor < 1f", procName, 1); - - fpixGetDimensions(fpix, &w, &h); - fprintf(fp, "\nFPix: w = %d, h = %d\n", w, h); - for (i = 0; i < h; i += factor) { - for (count = 0, j = 0; j < w; j += factor, count++) { - fpixGetPixel(fpix, j, i, &val); - fprintf(fp, "val[%d, %d] = %f ", i, j, val); - if ((count + 1) % 3 == 0) fprintf(fp, "\n"); - } - if (count % 3) fprintf(fp, "\n"); - } - fprintf(fp, "\n"); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fpix2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fpix2.c deleted file mode 100644 index befe30eb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/fpix2.c +++ /dev/null @@ -1,2471 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file fpix2.c - *
- *
- *    ------------------------------------------
- *    This file has these FPix utilities:
- *       ~ interconversions with pix, fpix, dpix
- *       ~ min and max values
- *       ~ integer scaling
- *       ~ arithmetic operations
- *       ~ set all
- *       ~ border functions
- *       ~ simple rasterop (source --> dest)
- *       ~ geometric transforms
- *    ------------------------------------------
- *
- *    Interconversions between Pix, FPix and DPix
- *          FPIX          *pixConvertToFPix()
- *          DPIX          *pixConvertToDPix()
- *          PIX           *fpixConvertToPix()
- *          PIX           *fpixDisplayMaxDynamicRange()  [useful for debugging]
- *          DPIX          *fpixConvertToDPix()
- *          PIX           *dpixConvertToPix()
- *          FPIX          *dpixConvertToFPix()
- *
- *    Min/max value
- *          l_int32        fpixGetMin()
- *          l_int32        fpixGetMax()
- *          l_int32        dpixGetMin()
- *          l_int32        dpixGetMax()
- *
- *    Integer scaling
- *          FPIX          *fpixScaleByInteger()
- *          DPIX          *dpixScaleByInteger()
- *
- *    Arithmetic operations
- *          FPIX          *fpixLinearCombination()
- *          l_int32        fpixAddMultConstant()
- *          DPIX          *dpixLinearCombination()
- *          l_int32        dpixAddMultConstant()
- *
- *    Set all
- *          l_int32        fpixSetAllArbitrary()
- *          l_int32        dpixSetAllArbitrary()
- *
- *    FPix border functions
- *          FPIX          *fpixAddBorder()
- *          FPIX          *fpixRemoveBorder()
- *          FPIX          *fpixAddMirroredBorder()
- *          FPIX          *fpixAddContinuedBorder()
- *          FPIX          *fpixAddSlopeBorder()
- *
- *    FPix simple rasterop
- *          l_int32        fpixRasterop()
- *
- *    FPix rotation by multiples of 90 degrees
- *          FPIX          *fpixRotateOrth()
- *          FPIX          *fpixRotate180()
- *          FPIX          *fpixRotate90()
- *          FPIX          *fpixFlipLR()
- *          FPIX          *fpixFlipTB()
- *
- *    FPix affine and projective interpolated transforms
- *          FPIX          *fpixAffinePta()
- *          FPIX          *fpixAffine()
- *          FPIX          *fpixProjectivePta()
- *          FPIX          *fpixProjective()
- *          l_int32        linearInterpolatePixelFloat()
- *
- *    Thresholding to 1 bpp Pix
- *          PIX           *fpixThresholdToPix()
- *
- *    Generate function from components
- *          FPIX          *pixComponentFunction()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*--------------------------------------------------------------------* - * FPix <--> Pix conversions * - *--------------------------------------------------------------------*/ -/*! - * \brief pixConvertToFPix() - * - * \param[in] pixs 1, 2, 4, 8, 16 or 32 bpp - * \param[in] ncomps number of components: 3 for RGB, 1 otherwise - * \return fpix, or NULL on error - * - *
- * Notes:
- *      (1) If colormapped, remove to grayscale.
- *      (2) If 32 bpp and %ncomps == 3, this is RGB; convert to luminance.
- *          In all other cases the src image is treated as having a single
- *          component of pixel values.
- * 
- */ -FPIX * -pixConvertToFPix(PIX *pixs, - l_int32 ncomps) -{ -l_int32 w, h, d, i, j, val, wplt, wpld; -l_uint32 uval; -l_uint32 *datat, *linet; -l_float32 *datad, *lined; -PIX *pixt; -FPIX *fpixd; - - PROCNAME("pixConvertToFPix"); - - if (!pixs) - return (FPIX *)ERROR_PTR("pixs not defined", procName, NULL); - - /* Convert to a single component */ - if (pixGetColormap(pixs)) - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - else if (pixGetDepth(pixs) == 32 && ncomps == 3) - pixt = pixConvertRGBToLuminance(pixs); - else - pixt = pixClone(pixs); - pixGetDimensions(pixt, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) { - pixDestroy(&pixt); - return (FPIX *)ERROR_PTR("invalid depth", procName, NULL); - } - - if ((fpixd = fpixCreate(w, h)) == NULL) { - pixDestroy(&pixt); - return (FPIX *)ERROR_PTR("fpixd not made", procName, NULL); - } - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - datad = fpixGetData(fpixd); - wpld = fpixGetWpl(fpixd); - for (i = 0; i < h; i++) { - linet = datat + i * wplt; - lined = datad + i * wpld; - if (d == 1) { - for (j = 0; j < w; j++) { - val = GET_DATA_BIT(linet, j); - lined[j] = (l_float32)val; - } - } else if (d == 2) { - for (j = 0; j < w; j++) { - val = GET_DATA_DIBIT(linet, j); - lined[j] = (l_float32)val; - } - } else if (d == 4) { - for (j = 0; j < w; j++) { - val = GET_DATA_QBIT(linet, j); - lined[j] = (l_float32)val; - } - } else if (d == 8) { - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(linet, j); - lined[j] = (l_float32)val; - } - } else if (d == 16) { - for (j = 0; j < w; j++) { - val = GET_DATA_TWO_BYTES(linet, j); - lined[j] = (l_float32)val; - } - } else { /* d == 32 */ - for (j = 0; j < w; j++) { - uval = GET_DATA_FOUR_BYTES(linet, j); - lined[j] = (l_float32)uval; - } - } - } - - pixDestroy(&pixt); - return fpixd; -} - - -/*! - * \brief pixConvertToDPix() - * - * \param[in] pixs 1, 2, 4, 8, 16 or 32 bpp - * \param[in] ncomps number of components: 3 for RGB, 1 otherwise - * \return dpix, or NULL on error - * - *
- * Notes:
- *      (1) If colormapped, remove to grayscale.
- *      (2) If 32 bpp and %ncomps == 3, this is RGB; convert to luminance.
- *          In all other cases the src image is treated as having a single
- *          component of pixel values.
- * 
- */ -DPIX * -pixConvertToDPix(PIX *pixs, - l_int32 ncomps) -{ -l_int32 w, h, d, i, j, val, wplt, wpld; -l_uint32 uval; -l_uint32 *datat, *linet; -l_float64 *datad, *lined; -PIX *pixt; -DPIX *dpixd; - - PROCNAME("pixConvertToDPix"); - - if (!pixs) - return (DPIX *)ERROR_PTR("pixs not defined", procName, NULL); - - /* Convert to a single component */ - if (pixGetColormap(pixs)) - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - else if (pixGetDepth(pixs) == 32 && ncomps == 3) - pixt = pixConvertRGBToLuminance(pixs); - else - pixt = pixClone(pixs); - pixGetDimensions(pixt, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) { - pixDestroy(&pixt); - return (DPIX *)ERROR_PTR("invalid depth", procName, NULL); - } - - if ((dpixd = dpixCreate(w, h)) == NULL) { - pixDestroy(&pixt); - return (DPIX *)ERROR_PTR("dpixd not made", procName, NULL); - } - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - datad = dpixGetData(dpixd); - wpld = dpixGetWpl(dpixd); - for (i = 0; i < h; i++) { - linet = datat + i * wplt; - lined = datad + i * wpld; - if (d == 1) { - for (j = 0; j < w; j++) { - val = GET_DATA_BIT(linet, j); - lined[j] = (l_float64)val; - } - } else if (d == 2) { - for (j = 0; j < w; j++) { - val = GET_DATA_DIBIT(linet, j); - lined[j] = (l_float64)val; - } - } else if (d == 4) { - for (j = 0; j < w; j++) { - val = GET_DATA_QBIT(linet, j); - lined[j] = (l_float64)val; - } - } else if (d == 8) { - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(linet, j); - lined[j] = (l_float64)val; - } - } else if (d == 16) { - for (j = 0; j < w; j++) { - val = GET_DATA_TWO_BYTES(linet, j); - lined[j] = (l_float64)val; - } - } else { /* d == 32 */ - for (j = 0; j < w; j++) { - uval = GET_DATA_FOUR_BYTES(linet, j); - lined[j] = (l_float64)uval; - } - } - } - - pixDestroy(&pixt); - return dpixd; -} - - -/*! - * \brief fpixConvertToPix() - * - * \param[in] fpixs - * \param[in] outdepth 0, 8, 16 or 32 bpp - * \param[in] negvals L_CLIP_TO_ZERO, L_TAKE_ABSVAL - * \param[in] errorflag 1 to output error stats; 0 otherwise - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Use %outdepth = 0 to programmatically determine the
- *          output depth.  If no values are greater than 255,
- *          it will set outdepth = 8; otherwise to 16 or 32.
- *      (2) Because we are converting a float to an unsigned int
- *          with a specified dynamic range (8, 16 or 32 bits), errors
- *          can occur.  If errorflag == TRUE, output the number
- *          of values out of range, both negative and positive.
- *      (3) If a pixel value is positive and out of range, clip to
- *          the maximum value represented at the outdepth of 8, 16
- *          or 32 bits.
- * 
- */ -PIX * -fpixConvertToPix(FPIX *fpixs, - l_int32 outdepth, - l_int32 negvals, - l_int32 errorflag) -{ -l_int32 w, h, i, j, wpls, wpld; -l_uint32 vald, maxval; -l_float32 val; -l_float32 *datas, *lines; -l_uint32 *datad, *lined; -PIX *pixd; - - PROCNAME("fpixConvertToPix"); - - if (!fpixs) - return (PIX *)ERROR_PTR("fpixs not defined", procName, NULL); - if (negvals != L_CLIP_TO_ZERO && negvals != L_TAKE_ABSVAL) - return (PIX *)ERROR_PTR("invalid negvals", procName, NULL); - if (outdepth != 0 && outdepth != 8 && outdepth != 16 && outdepth != 32) - return (PIX *)ERROR_PTR("outdepth not in {0,8,16,32}", procName, NULL); - - fpixGetDimensions(fpixs, &w, &h); - datas = fpixGetData(fpixs); - wpls = fpixGetWpl(fpixs); - - /* Adaptive determination of output depth */ - if (outdepth == 0) { - outdepth = 8; - for (i = 0; i < h && outdepth < 32; i++) { - lines = datas + i * wpls; - for (j = 0; j < w && outdepth < 32; j++) { - if (lines[j] > 65535.5) - outdepth = 32; - else if (lines[j] > 255.5) - outdepth = 16; - } - } - } - if (outdepth == 8) - maxval = 0xff; - else if (outdepth == 16) - maxval = 0xffff; - else /* outdepth == 32 */ - maxval = 0xffffffff; - - /* Gather statistics if %errorflag = TRUE */ - if (errorflag) { - l_int32 negs = 0; - l_int32 overvals = 0; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - val = lines[j]; - if (val < 0.0) - negs++; - else if (val > maxval) - overvals++; - } - } - if (negs > 0) - L_ERROR("Number of negative values: %d\n", procName, negs); - if (overvals > 0) - L_ERROR("Number of too-large values: %d\n", procName, overvals); - } - - /* Make the pix and convert the data */ - if ((pixd = pixCreate(w, h, outdepth)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = lines[j]; - if (val >= 0.0) - vald = (l_uint32)(val + 0.5); - else if (negvals == L_CLIP_TO_ZERO) /* and val < 0.0 */ - vald = 0; - else - vald = (l_uint32)(-val + 0.5); - if (vald > maxval) - vald = maxval; - - if (outdepth == 8) - SET_DATA_BYTE(lined, j, vald); - else if (outdepth == 16) - SET_DATA_TWO_BYTES(lined, j, vald); - else /* outdepth == 32 */ - SET_DATA_FOUR_BYTES(lined, j, vald); - } - } - - return pixd; -} - - -/*! - * \brief fpixDisplayMaxDynamicRange() - * - * \param[in] fpixs - * \return pixd 8 bpp, or NULL on error - */ -PIX * -fpixDisplayMaxDynamicRange(FPIX *fpixs) -{ -l_uint8 dval; -l_int32 i, j, w, h, wpls, wpld; -l_float32 factor, sval, maxval; -l_float32 *lines, *datas; -l_uint32 *lined, *datad; -PIX *pixd; - - PROCNAME("fpixDisplayMaxDynamicRange"); - - if (!fpixs) - return (PIX *)ERROR_PTR("fpixs not defined", procName, NULL); - - fpixGetDimensions(fpixs, &w, &h); - datas = fpixGetData(fpixs); - wpls = fpixGetWpl(fpixs); - - maxval = 0.0; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - sval = *(lines + j); - if (sval > maxval) - maxval = sval; - } - } - - pixd = pixCreate(w, h, 8); - if (maxval == 0.0) - return pixd; /* all pixels are 0 */ - - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - factor = 255. / maxval; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - sval = *(lines + j); - if (sval < 0.0) sval = 0.0; - dval = (l_uint8)(factor * sval + 0.5); - SET_DATA_BYTE(lined, j, dval); - } - } - - return pixd; -} - - -/*! - * \brief fpixConvertToDPix() - * - * \param[in] fpix - * \return dpix, or NULL on error - */ -DPIX * -fpixConvertToDPix(FPIX *fpix) -{ -l_int32 w, h, i, j, wpls, wpld; -l_float32 val; -l_float32 *datas, *lines; -l_float64 *datad, *lined; -DPIX *dpix; - - PROCNAME("fpixConvertToDPix"); - - if (!fpix) - return (DPIX *)ERROR_PTR("fpix not defined", procName, NULL); - - fpixGetDimensions(fpix, &w, &h); - if ((dpix = dpixCreate(w, h)) == NULL) - return (DPIX *)ERROR_PTR("dpix not made", procName, NULL); - - datas = fpixGetData(fpix); - datad = dpixGetData(dpix); - wpls = fpixGetWpl(fpix); - wpld = dpixGetWpl(dpix); /* 8 byte words */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = lines[j]; - lined[j] = val; - } - } - - return dpix; -} - - -/*! - * \brief dpixConvertToPix() - * - * \param[in] dpixs - * \param[in] outdepth 0, 8, 16 or 32 bpp - * \param[in] negvals L_CLIP_TO_ZERO, L_TAKE_ABSVAL - * \param[in] errorflag 1 to output error stats; 0 otherwise - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Use %outdepth = 0 to programmatically determine the
- *          output depth.  If no values are greater than 255,
- *          it will set outdepth = 8; otherwise to 16 or 32.
- *      (2) Because we are converting a float to an unsigned int
- *          with a specified dynamic range (8, 16 or 32 bits), errors
- *          can occur.  If errorflag == TRUE, output the number
- *          of values out of range, both negative and positive.
- *      (3) If a pixel value is positive and out of range, clip to
- *          the maximum value represented at the outdepth of 8, 16
- *          or 32 bits.
- * 
- */ -PIX * -dpixConvertToPix(DPIX *dpixs, - l_int32 outdepth, - l_int32 negvals, - l_int32 errorflag) -{ -l_int32 w, h, i, j, wpls, wpld, maxval; -l_uint32 vald; -l_float64 val; -l_float64 *datas, *lines; -l_uint32 *datad, *lined; -PIX *pixd; - - PROCNAME("dpixConvertToPix"); - - if (!dpixs) - return (PIX *)ERROR_PTR("dpixs not defined", procName, NULL); - if (negvals != L_CLIP_TO_ZERO && negvals != L_TAKE_ABSVAL) - return (PIX *)ERROR_PTR("invalid negvals", procName, NULL); - if (outdepth != 0 && outdepth != 8 && outdepth != 16 && outdepth != 32) - return (PIX *)ERROR_PTR("outdepth not in {0,8,16,32}", procName, NULL); - - dpixGetDimensions(dpixs, &w, &h); - datas = dpixGetData(dpixs); - wpls = dpixGetWpl(dpixs); - - /* Adaptive determination of output depth */ - if (outdepth == 0) { - outdepth = 8; - for (i = 0; i < h && outdepth < 32; i++) { - lines = datas + i * wpls; - for (j = 0; j < w && outdepth < 32; j++) { - if (lines[j] > 65535.5) - outdepth = 32; - else if (lines[j] > 255.5) - outdepth = 16; - } - } - } - maxval = 0xff; - if (outdepth == 16) - maxval = 0xffff; - else /* outdepth == 32 */ - maxval = 0xffffffff; - - /* Gather statistics if %errorflag = TRUE */ - if (errorflag) { - l_int32 negs = 0; - l_int32 overvals = 0; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - val = lines[j]; - if (val < 0.0) - negs++; - else if (val > maxval) - overvals++; - } - } - if (negs > 0) - L_ERROR("Number of negative values: %d\n", procName, negs); - if (overvals > 0) - L_ERROR("Number of too-large values: %d\n", procName, overvals); - } - - /* Make the pix and convert the data */ - if ((pixd = pixCreate(w, h, outdepth)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = lines[j]; - if (val >= 0.0) { - vald = (l_uint32)(val + 0.5); - } else { /* val < 0.0 */ - if (negvals == L_CLIP_TO_ZERO) - vald = 0; - else - vald = (l_uint32)(-val + 0.5); - } - if (vald > maxval) - vald = maxval; - if (outdepth == 8) - SET_DATA_BYTE(lined, j, vald); - else if (outdepth == 16) - SET_DATA_TWO_BYTES(lined, j, vald); - else /* outdepth == 32 */ - SET_DATA_FOUR_BYTES(lined, j, vald); - } - } - - return pixd; -} - - -/*! - * \brief dpixConvertToFPix() - * - * \param[in] dpix - * \return fpix, or NULL on error - */ -FPIX * -dpixConvertToFPix(DPIX *dpix) -{ -l_int32 w, h, i, j, wpls, wpld; -l_float64 val; -l_float32 *datad, *lined; -l_float64 *datas, *lines; -FPIX *fpix; - - PROCNAME("dpixConvertToFPix"); - - if (!dpix) - return (FPIX *)ERROR_PTR("dpix not defined", procName, NULL); - - dpixGetDimensions(dpix, &w, &h); - if ((fpix = fpixCreate(w, h)) == NULL) - return (FPIX *)ERROR_PTR("fpix not made", procName, NULL); - - datas = dpixGetData(dpix); - datad = fpixGetData(fpix); - wpls = dpixGetWpl(dpix); /* 8 byte words */ - wpld = fpixGetWpl(fpix); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = lines[j]; - lined[j] = (l_float32)val; - } - } - - return fpix; -} - - - -/*--------------------------------------------------------------------* - * Min/max value * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixGetMin() - * - * \param[in] fpix - * \param[out] pminval [optional] min value - * \param[out] pxminloc [optional] x location of min - * \param[out] pyminloc [optional] y location of min - * \return 0 if OK; 1 on error - */ -l_ok -fpixGetMin(FPIX *fpix, - l_float32 *pminval, - l_int32 *pxminloc, - l_int32 *pyminloc) -{ -l_int32 i, j, w, h, wpl, xminloc, yminloc; -l_float32 *data, *line; -l_float32 minval; - - PROCNAME("fpixGetMin"); - - if (!pminval && !pxminloc && !pyminloc) - return ERROR_INT("no return val requested", procName, 1); - if (pminval) *pminval = 0.0; - if (pxminloc) *pxminloc = 0; - if (pyminloc) *pyminloc = 0; - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - - minval = +1.0e20; - xminloc = 0; - yminloc = 0; - fpixGetDimensions(fpix, &w, &h); - data = fpixGetData(fpix); - wpl = fpixGetWpl(fpix); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - if (line[j] < minval) { - minval = line[j]; - xminloc = j; - yminloc = i; - } - } - } - - if (pminval) *pminval = minval; - if (pxminloc) *pxminloc = xminloc; - if (pyminloc) *pyminloc = yminloc; - return 0; -} - - -/*! - * \brief fpixGetMax() - * - * \param[in] fpix - * \param[out] pmaxval [optional] max value - * \param[out] pxmaxloc [optional] x location of max - * \param[out] pymaxloc [optional] y location of max - * \return 0 if OK; 1 on error - */ -l_ok -fpixGetMax(FPIX *fpix, - l_float32 *pmaxval, - l_int32 *pxmaxloc, - l_int32 *pymaxloc) -{ -l_int32 i, j, w, h, wpl, xmaxloc, ymaxloc; -l_float32 *data, *line; -l_float32 maxval; - - PROCNAME("fpixGetMax"); - - if (!pmaxval && !pxmaxloc && !pymaxloc) - return ERROR_INT("no return val requested", procName, 1); - if (pmaxval) *pmaxval = 0.0; - if (pxmaxloc) *pxmaxloc = 0; - if (pymaxloc) *pymaxloc = 0; - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - - maxval = -1.0e20; - xmaxloc = 0; - ymaxloc = 0; - fpixGetDimensions(fpix, &w, &h); - data = fpixGetData(fpix); - wpl = fpixGetWpl(fpix); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - if (line[j] > maxval) { - maxval = line[j]; - xmaxloc = j; - ymaxloc = i; - } - } - } - - if (pmaxval) *pmaxval = maxval; - if (pxmaxloc) *pxmaxloc = xmaxloc; - if (pymaxloc) *pymaxloc = ymaxloc; - return 0; -} - - -/*! - * \brief dpixGetMin() - * - * \param[in] dpix - * \param[out] pminval [optional] min value - * \param[out] pxminloc [optional] x location of min - * \param[out] pyminloc [optional] y location of min - * \return 0 if OK; 1 on error - */ -l_ok -dpixGetMin(DPIX *dpix, - l_float64 *pminval, - l_int32 *pxminloc, - l_int32 *pyminloc) -{ -l_int32 i, j, w, h, wpl, xminloc, yminloc; -l_float64 *data, *line; -l_float64 minval; - - PROCNAME("dpixGetMin"); - - if (!pminval && !pxminloc && !pyminloc) - return ERROR_INT("no return val requested", procName, 1); - if (pminval) *pminval = 0.0; - if (pxminloc) *pxminloc = 0; - if (pyminloc) *pyminloc = 0; - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - - minval = +1.0e300; - xminloc = 0; - yminloc = 0; - dpixGetDimensions(dpix, &w, &h); - data = dpixGetData(dpix); - wpl = dpixGetWpl(dpix); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - if (line[j] < minval) { - minval = line[j]; - xminloc = j; - yminloc = i; - } - } - } - - if (pminval) *pminval = minval; - if (pxminloc) *pxminloc = xminloc; - if (pyminloc) *pyminloc = yminloc; - return 0; -} - - -/*! - * \brief dpixGetMax() - * - * \param[in] dpix - * \param[out] pmaxval [optional] max value - * \param[out] pxmaxloc [optional] x location of max - * \param[out] pymaxloc [optional] y location of max - * \return 0 if OK; 1 on error - */ -l_ok -dpixGetMax(DPIX *dpix, - l_float64 *pmaxval, - l_int32 *pxmaxloc, - l_int32 *pymaxloc) -{ -l_int32 i, j, w, h, wpl, xmaxloc, ymaxloc; -l_float64 *data, *line; -l_float64 maxval; - - PROCNAME("dpixGetMax"); - - if (!pmaxval && !pxmaxloc && !pymaxloc) - return ERROR_INT("no return val requested", procName, 1); - if (pmaxval) *pmaxval = 0.0; - if (pxmaxloc) *pxmaxloc = 0; - if (pymaxloc) *pymaxloc = 0; - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - - maxval = -1.0e20; - xmaxloc = 0; - ymaxloc = 0; - dpixGetDimensions(dpix, &w, &h); - data = dpixGetData(dpix); - wpl = dpixGetWpl(dpix); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - if (line[j] > maxval) { - maxval = line[j]; - xmaxloc = j; - ymaxloc = i; - } - } - } - - if (pmaxval) *pmaxval = maxval; - if (pxmaxloc) *pxmaxloc = xmaxloc; - if (pymaxloc) *pymaxloc = ymaxloc; - return 0; -} - - -/*--------------------------------------------------------------------* - * Special integer scaling * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixScaleByInteger() - * - * \param[in] fpixs typically low resolution - * \param[in] factor integer scaling factor - * \return fpixd interpolated result, or NULL on error - * - *
- * Notes:
- *      (1) The width wd of fpixd is related to ws of fpixs by:
- *              wd = factor * (ws - 1) + 1   (and ditto for the height)
- *          We avoid special-casing boundary pixels in the interpolation
- *          by constructing fpixd by inserting (factor - 1) interpolated
- *          pixels between each pixel in fpixs.  Then
- *               wd = ws + (ws - 1) * (factor - 1)    (same as above)
- *          This also has the advantage that if we subsample by %factor,
- *          throwing out all the interpolated pixels, we regain the
- *          original low resolution fpix.
- * 
- */ -FPIX * -fpixScaleByInteger(FPIX *fpixs, - l_int32 factor) -{ -l_int32 i, j, k, m, ws, hs, wd, hd, wpls, wpld; -l_float32 val0, val1, val2, val3; -l_float32 *datas, *datad, *lines, *lined, *fract; -FPIX *fpixd; - - PROCNAME("fpixScaleByInteger"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - - fpixGetDimensions(fpixs, &ws, &hs); - wd = factor * (ws - 1) + 1; - hd = factor * (hs - 1) + 1; - fpixd = fpixCreate(wd, hd); - datas = fpixGetData(fpixs); - datad = fpixGetData(fpixd); - wpls = fpixGetWpl(fpixs); - wpld = fpixGetWpl(fpixd); - fract = (l_float32 *)LEPT_CALLOC(factor, sizeof(l_float32)); - for (i = 0; i < factor; i++) - fract[i] = i / (l_float32)factor; - for (i = 0; i < hs - 1; i++) { - lines = datas + i * wpls; - for (j = 0; j < ws - 1; j++) { - val0 = lines[j]; - val1 = lines[j + 1]; - val2 = lines[wpls + j]; - val3 = lines[wpls + j + 1]; - for (k = 0; k < factor; k++) { /* rows of sub-block */ - lined = datad + (i * factor + k) * wpld; - for (m = 0; m < factor; m++) { /* cols of sub-block */ - lined[j * factor + m] = - val0 * (1.0 - fract[m]) * (1.0 - fract[k]) + - val1 * fract[m] * (1.0 - fract[k]) + - val2 * (1.0 - fract[m]) * fract[k] + - val3 * fract[m] * fract[k]; - } - } - } - } - - /* Do the right-most column of fpixd, skipping LR corner */ - for (i = 0; i < hs - 1; i++) { - lines = datas + i * wpls; - val0 = lines[ws - 1]; - val1 = lines[wpls + ws - 1]; - for (k = 0; k < factor; k++) { - lined = datad + (i * factor + k) * wpld; - lined[wd - 1] = val0 * (1.0 - fract[k]) + val1 * fract[k]; - } - } - - /* Do the bottom-most row of fpixd */ - lines = datas + (hs - 1) * wpls; - lined = datad + (hd - 1) * wpld; - for (j = 0; j < ws - 1; j++) { - val0 = lines[j]; - val1 = lines[j + 1]; - for (m = 0; m < factor; m++) - lined[j * factor + m] = val0 * (1.0 - fract[m]) + val1 * fract[m]; - lined[wd - 1] = lines[ws - 1]; /* LR corner */ - } - - LEPT_FREE(fract); - return fpixd; -} - - -/*! - * \brief dpixScaleByInteger() - * - * \param[in] dpixs typically low resolution - * \param[in] factor integer scaling factor - * \return dpixd interpolated result, or NULL on error - * - *
- * Notes:
- *      (1) The width wd of dpixd is related to ws of dpixs by:
- *              wd = factor * (ws - 1) + 1   (and ditto for the height)
- *          We avoid special-casing boundary pixels in the interpolation
- *          by constructing fpixd by inserting (factor - 1) interpolated
- *          pixels between each pixel in fpixs.  Then
- *               wd = ws + (ws - 1) * (factor - 1)    (same as above)
- *          This also has the advantage that if we subsample by %factor,
- *          throwing out all the interpolated pixels, we regain the
- *          original low resolution dpix.
- * 
- */ -DPIX * -dpixScaleByInteger(DPIX *dpixs, - l_int32 factor) -{ -l_int32 i, j, k, m, ws, hs, wd, hd, wpls, wpld; -l_float64 val0, val1, val2, val3; -l_float64 *datas, *datad, *lines, *lined, *fract; -DPIX *dpixd; - - PROCNAME("dpixScaleByInteger"); - - if (!dpixs) - return (DPIX *)ERROR_PTR("dpixs not defined", procName, NULL); - - dpixGetDimensions(dpixs, &ws, &hs); - wd = factor * (ws - 1) + 1; - hd = factor * (hs - 1) + 1; - dpixd = dpixCreate(wd, hd); - datas = dpixGetData(dpixs); - datad = dpixGetData(dpixd); - wpls = dpixGetWpl(dpixs); - wpld = dpixGetWpl(dpixd); - fract = (l_float64 *)LEPT_CALLOC(factor, sizeof(l_float64)); - for (i = 0; i < factor; i++) - fract[i] = i / (l_float64)factor; - for (i = 0; i < hs - 1; i++) { - lines = datas + i * wpls; - for (j = 0; j < ws - 1; j++) { - val0 = lines[j]; - val1 = lines[j + 1]; - val2 = lines[wpls + j]; - val3 = lines[wpls + j + 1]; - for (k = 0; k < factor; k++) { /* rows of sub-block */ - lined = datad + (i * factor + k) * wpld; - for (m = 0; m < factor; m++) { /* cols of sub-block */ - lined[j * factor + m] = - val0 * (1.0 - fract[m]) * (1.0 - fract[k]) + - val1 * fract[m] * (1.0 - fract[k]) + - val2 * (1.0 - fract[m]) * fract[k] + - val3 * fract[m] * fract[k]; - } - } - } - } - - /* Do the right-most column of dpixd, skipping LR corner */ - for (i = 0; i < hs - 1; i++) { - lines = datas + i * wpls; - val0 = lines[ws - 1]; - val1 = lines[wpls + ws - 1]; - for (k = 0; k < factor; k++) { - lined = datad + (i * factor + k) * wpld; - lined[wd - 1] = val0 * (1.0 - fract[k]) + val1 * fract[k]; - } - } - - /* Do the bottom-most row of dpixd */ - lines = datas + (hs - 1) * wpls; - lined = datad + (hd - 1) * wpld; - for (j = 0; j < ws - 1; j++) { - val0 = lines[j]; - val1 = lines[j + 1]; - for (m = 0; m < factor; m++) - lined[j * factor + m] = val0 * (1.0 - fract[m]) + val1 * fract[m]; - lined[wd - 1] = lines[ws - 1]; /* LR corner */ - } - - LEPT_FREE(fract); - return dpixd; -} - - -/*--------------------------------------------------------------------* - * Arithmetic operations * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixLinearCombination() - * - * \param[in] fpixd [optional] this can be null, equal to fpixs1, or - * different from fpixs1 - * \param[in] fpixs1 can be equal to fpixd - * \param[in] fpixs2 - * \param[in] a, b multiplication factors on fpixs1 and fpixs2, rsp. - * \return fpixd always - * - *
- * Notes:
- *      (1) Computes pixelwise linear combination: a * src1 + b * src2
- *      (2) Alignment is to UL corner.
- *      (3) There are 3 cases.  The result can go to a new dest,
- *          in-place to fpixs1, or to an existing input dest:
- *          * fpixd == null:   (src1 + src2) --> new fpixd
- *          * fpixd == fpixs1:  (src1 + src2) --> src1  (in-place)
- *          * fpixd != fpixs1: (src1 + src2) --> input fpixd
- *      (4) fpixs2 must be different from both fpixd and fpixs1.
- * 
- */ -FPIX * -fpixLinearCombination(FPIX *fpixd, - FPIX *fpixs1, - FPIX *fpixs2, - l_float32 a, - l_float32 b) -{ -l_int32 i, j, ws, hs, w, h, wpls, wpld; -l_float32 *datas, *datad, *lines, *lined; - - PROCNAME("fpixLinearCombination"); - - if (!fpixs1) - return (FPIX *)ERROR_PTR("fpixs1 not defined", procName, fpixd); - if (!fpixs2) - return (FPIX *)ERROR_PTR("fpixs2 not defined", procName, fpixd); - if (fpixs1 == fpixs2) - return (FPIX *)ERROR_PTR("fpixs1 == fpixs2", procName, fpixd); - if (fpixs2 == fpixd) - return (FPIX *)ERROR_PTR("fpixs2 == fpixd", procName, fpixd); - - if (fpixs1 != fpixd) - fpixd = fpixCopy(fpixd, fpixs1); - - datas = fpixGetData(fpixs2); - datad = fpixGetData(fpixd); - wpls = fpixGetWpl(fpixs2); - wpld = fpixGetWpl(fpixd); - fpixGetDimensions(fpixs2, &ws, &hs); - fpixGetDimensions(fpixd, &w, &h); - w = L_MIN(ws, w); - h = L_MIN(hs, h); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) - lined[j] = a * lined[j] + b * lines[j]; - } - - return fpixd; -} - - -/*! - * \brief fpixAddMultConstant() - * - * \param[in] fpix - * \param[in] addc use 0.0 to skip the operation - * \param[in] multc use 1.0 to skip the operation - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is an in-place operation.
- *      (2) It can be used to multiply each pixel by a constant,
- *          and also to add a constant to each pixel.  Multiplication
- *          is done first.
- * 
- */ -l_ok -fpixAddMultConstant(FPIX *fpix, - l_float32 addc, - l_float32 multc) -{ -l_int32 i, j, w, h, wpl; -l_float32 *line, *data; - - PROCNAME("fpixAddMultConstant"); - - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - - if (addc == 0.0 && multc == 1.0) - return 0; - - fpixGetDimensions(fpix, &w, &h); - data = fpixGetData(fpix); - wpl = fpixGetWpl(fpix); - for (i = 0; i < h; i++) { - line = data + i * wpl; - if (addc == 0.0) { - for (j = 0; j < w; j++) - line[j] *= multc; - } else if (multc == 1.0) { - for (j = 0; j < w; j++) - line[j] += addc; - } else { - for (j = 0; j < w; j++) { - line[j] = multc * line[j] + addc; - } - } - } - - return 0; -} - - -/*! - * \brief dpixLinearCombination() - * - * \param[in] dpixd [optional] this can be null, equal to dpixs1, or - * different from dpixs1 - * \param[in] dpixs1 can be equal to dpixd - * \param[in] dpixs2 - * \param[in] a, b multiplication factors on dpixs1 and dpixs2, rsp. - * \return dpixd always - * - *
- * Notes:
- *      (1) Computes pixelwise linear combination: a * src1 + b * src2
- *      (2) Alignment is to UL corner.
- *      (3) There are 3 cases.  The result can go to a new dest,
- *          in-place to dpixs1, or to an existing input dest:
- *          * dpixd == null:   (src1 + src2) --> new dpixd
- *          * dpixd == dpixs1:  (src1 + src2) --> src1  (in-place)
- *          * dpixd != dpixs1: (src1 + src2) --> input dpixd
- *      (4) dpixs2 must be different from both dpixd and dpixs1.
- * 
- */ -DPIX * -dpixLinearCombination(DPIX *dpixd, - DPIX *dpixs1, - DPIX *dpixs2, - l_float32 a, - l_float32 b) -{ -l_int32 i, j, ws, hs, w, h, wpls, wpld; -l_float64 *datas, *datad, *lines, *lined; - - PROCNAME("dpixLinearCombination"); - - if (!dpixs1) - return (DPIX *)ERROR_PTR("dpixs1 not defined", procName, dpixd); - if (!dpixs2) - return (DPIX *)ERROR_PTR("dpixs2 not defined", procName, dpixd); - if (dpixs1 == dpixs2) - return (DPIX *)ERROR_PTR("dpixs1 == dpixs2", procName, dpixd); - if (dpixs2 == dpixd) - return (DPIX *)ERROR_PTR("dpixs2 == dpixd", procName, dpixd); - - if (dpixs1 != dpixd) - dpixd = dpixCopy(dpixd, dpixs1); - - datas = dpixGetData(dpixs2); - datad = dpixGetData(dpixd); - wpls = dpixGetWpl(dpixs2); - wpld = dpixGetWpl(dpixd); - dpixGetDimensions(dpixs2, &ws, &hs); - dpixGetDimensions(dpixd, &w, &h); - w = L_MIN(ws, w); - h = L_MIN(hs, h); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) - lined[j] = a * lined[j] + b * lines[j]; - } - - return dpixd; -} - - -/*! - * \brief dpixAddMultConstant() - * - * \param[in] dpix - * \param[in] addc use 0.0 to skip the operation - * \param[in] multc use 1.0 to skip the operation - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is an in-place operation.
- *      (2) It can be used to multiply each pixel by a constant,
- *          and also to add a constant to each pixel.  Multiplication
- *          is done first.
- * 
- */ -l_ok -dpixAddMultConstant(DPIX *dpix, - l_float64 addc, - l_float64 multc) -{ -l_int32 i, j, w, h, wpl; -l_float64 *line, *data; - - PROCNAME("dpixAddMultConstant"); - - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - - if (addc == 0.0 && multc == 1.0) - return 0; - - dpixGetDimensions(dpix, &w, &h); - data = dpixGetData(dpix); - wpl = dpixGetWpl(dpix); - for (i = 0; i < h; i++) { - line = data + i * wpl; - if (addc == 0.0) { - for (j = 0; j < w; j++) - line[j] *= multc; - } else if (multc == 1.0) { - for (j = 0; j < w; j++) - line[j] += addc; - } else { - for (j = 0; j < w; j++) - line[j] = multc * line[j] + addc; - } - } - - return 0; -} - - -/*--------------------------------------------------------------------* - * Set all * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixSetAllArbitrary() - * - * \param[in] fpix - * \param[in] inval to set at each pixel - * \return 0 if OK, 1 on error - */ -l_ok -fpixSetAllArbitrary(FPIX *fpix, - l_float32 inval) -{ -l_int32 i, j, w, h; -l_float32 *data, *line; - - PROCNAME("fpixSetAllArbitrary"); - - if (!fpix) - return ERROR_INT("fpix not defined", procName, 1); - - fpixGetDimensions(fpix, &w, &h); - data = fpixGetData(fpix); - for (i = 0; i < h; i++) { - line = data + i * w; - for (j = 0; j < w; j++) - *(line + j) = inval; - } - - return 0; -} - - -/*! - * \brief dpixSetAllArbitrary() - * - * \param[in] dpix - * \param[in] inval to set at each pixel - * \return 0 if OK, 1 on error - */ -l_ok -dpixSetAllArbitrary(DPIX *dpix, - l_float64 inval) -{ -l_int32 i, j, w, h; -l_float64 *data, *line; - - PROCNAME("dpixSetAllArbitrary"); - - if (!dpix) - return ERROR_INT("dpix not defined", procName, 1); - - dpixGetDimensions(dpix, &w, &h); - data = dpixGetData(dpix); - for (i = 0; i < h; i++) { - line = data + i * w; - for (j = 0; j < w; j++) - *(line + j) = inval; - } - - return 0; -} - - -/*--------------------------------------------------------------------* - * Border functions * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixAddBorder() - * - * \param[in] fpixs - * \param[in] left, right, top, bot pixels on each side to be added - * \return fpixd, or NULL on error - * - *
- * Notes:
- *      (1) Adds border of '0' 32-bit pixels
- * 
- */ -FPIX * -fpixAddBorder(FPIX *fpixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot) -{ -l_int32 ws, hs, wd, hd; -FPIX *fpixd; - - PROCNAME("fpixAddBorder"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - - if (left <= 0 && right <= 0 && top <= 0 && bot <= 0) - return fpixCopy(NULL, fpixs); - fpixGetDimensions(fpixs, &ws, &hs); - wd = ws + left + right; - hd = hs + top + bot; - if ((fpixd = fpixCreate(wd, hd)) == NULL) - return (FPIX *)ERROR_PTR("fpixd not made", procName, NULL); - - fpixCopyResolution(fpixd, fpixs); - fpixRasterop(fpixd, left, top, ws, hs, fpixs, 0, 0); - return fpixd; -} - - -/*! - * \brief fpixRemoveBorder() - * - * \param[in] fpixs - * \param[in] left, right, top, bot pixels on each side to be removed - * \return fpixd, or NULL on error - */ -FPIX * -fpixRemoveBorder(FPIX *fpixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot) -{ -l_int32 ws, hs, wd, hd; -FPIX *fpixd; - - PROCNAME("fpixRemoveBorder"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - - if (left <= 0 && right <= 0 && top <= 0 && bot <= 0) - return fpixCopy(NULL, fpixs); - fpixGetDimensions(fpixs, &ws, &hs); - wd = ws - left - right; - hd = hs - top - bot; - if (wd <= 0 || hd <= 0) - return (FPIX *)ERROR_PTR("width & height not both > 0", procName, NULL); - if ((fpixd = fpixCreate(wd, hd)) == NULL) - return (FPIX *)ERROR_PTR("fpixd not made", procName, NULL); - - fpixCopyResolution(fpixd, fpixs); - fpixRasterop(fpixd, 0, 0, wd, hd, fpixs, left, top); - return fpixd; -} - - - -/*! - * \brief fpixAddMirroredBorder() - * - * \param[in] fpixs - * \param[in] left, right, top, bot pixels on each side to be added - * \return fpixd, or NULL on error - * - *
- * Notes:
- *      (1) See pixAddMirroredBorder() for situations of usage.
- * 
- */ -FPIX * -fpixAddMirroredBorder(FPIX *fpixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot) -{ -l_int32 i, j, w, h; -FPIX *fpixd; - - PROCNAME("fpixAddMirroredBorder"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - - fpixd = fpixAddBorder(fpixs, left, right, top, bot); - fpixGetDimensions(fpixs, &w, &h); - for (j = 0; j < left; j++) - fpixRasterop(fpixd, left - 1 - j, top, 1, h, - fpixd, left + j, top); - for (j = 0; j < right; j++) - fpixRasterop(fpixd, left + w + j, top, 1, h, - fpixd, left + w - 1 - j, top); - for (i = 0; i < top; i++) - fpixRasterop(fpixd, 0, top - 1 - i, left + w + right, 1, - fpixd, 0, top + i); - for (i = 0; i < bot; i++) - fpixRasterop(fpixd, 0, top + h + i, left + w + right, 1, - fpixd, 0, top + h - 1 - i); - - return fpixd; -} - - -/*! - * \brief fpixAddContinuedBorder() - * - * \param[in] fpixs - * \param[in] left, right, top, bot pixels on each side to be added - * \return fpixd, or NULL on error - * - *
- * Notes:
- *      (1) This adds pixels on each side whose values are equal to
- *          the value on the closest boundary pixel.
- * 
- */ -FPIX * -fpixAddContinuedBorder(FPIX *fpixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot) -{ -l_int32 i, j, w, h; -FPIX *fpixd; - - PROCNAME("fpixAddContinuedBorder"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - - fpixd = fpixAddBorder(fpixs, left, right, top, bot); - fpixGetDimensions(fpixs, &w, &h); - for (j = 0; j < left; j++) - fpixRasterop(fpixd, j, top, 1, h, fpixd, left, top); - for (j = 0; j < right; j++) - fpixRasterop(fpixd, left + w + j, top, 1, h, fpixd, left + w - 1, top); - for (i = 0; i < top; i++) - fpixRasterop(fpixd, 0, i, left + w + right, 1, fpixd, 0, top); - for (i = 0; i < bot; i++) - fpixRasterop(fpixd, 0, top + h + i, left + w + right, 1, - fpixd, 0, top + h - 1); - - return fpixd; -} - - -/*! - * \brief fpixAddSlopeBorder() - * - * \param[in] fpixs - * \param[in] left, right, top, bot pixels on each side to be added - * \return fpixd, or NULL on error - * - *
- * Notes:
- *      (1) This adds pixels on each side whose values have a normal
- *          derivative equal to the normal derivative at the boundary
- *          of fpixs.
- * 
- */ -FPIX * -fpixAddSlopeBorder(FPIX *fpixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot) -{ -l_int32 i, j, w, h, fullw, fullh; -l_float32 val1, val2, del; -FPIX *fpixd; - - PROCNAME("fpixAddSlopeBorder"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - - fpixd = fpixAddBorder(fpixs, left, right, top, bot); - fpixGetDimensions(fpixs, &w, &h); - - /* Left */ - for (i = top; i < top + h; i++) { - fpixGetPixel(fpixd, left, i, &val1); - fpixGetPixel(fpixd, left + 1, i, &val2); - del = val1 - val2; - for (j = 0; j < left; j++) - fpixSetPixel(fpixd, j, i, val1 + del * (left - j)); - } - - /* Right */ - fullw = left + w + right; - for (i = top; i < top + h; i++) { - fpixGetPixel(fpixd, left + w - 1, i, &val1); - fpixGetPixel(fpixd, left + w - 2, i, &val2); - del = val1 - val2; - for (j = left + w; j < fullw; j++) - fpixSetPixel(fpixd, j, i, val1 + del * (j - left - w + 1)); - } - - /* Top */ - for (j = 0; j < fullw; j++) { - fpixGetPixel(fpixd, j, top, &val1); - fpixGetPixel(fpixd, j, top + 1, &val2); - del = val1 - val2; - for (i = 0; i < top; i++) - fpixSetPixel(fpixd, j, i, val1 + del * (top - i)); - } - - /* Bottom */ - fullh = top + h + bot; - for (j = 0; j < fullw; j++) { - fpixGetPixel(fpixd, j, top + h - 1, &val1); - fpixGetPixel(fpixd, j, top + h - 2, &val2); - del = val1 - val2; - for (i = top + h; i < fullh; i++) - fpixSetPixel(fpixd, j, i, val1 + del * (i - top - h + 1)); - } - - return fpixd; -} - - -/*--------------------------------------------------------------------* - * Simple rasterop * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixRasterop() - * - * \param[in] fpixd dest fpix - * \param[in] dx x val of UL corner of dest rectangle - * \param[in] dy y val of UL corner of dest rectangle - * \param[in] dw width of dest rectangle - * \param[in] dh height of dest rectangle - * \param[in] fpixs src fpix - * \param[in] sx x val of UL corner of src rectangle - * \param[in] sy y val of UL corner of src rectangle - * \return 0 if OK; 1 on error. - * - *
- * Notes:
- *      (1) This is similar in structure to pixRasterop(), except
- *          it only allows copying from the source into the destination.
- *          For that reason, no op code is necessary.  Additionally,
- *          all pixels are 32 bit words (float values), which makes
- *          the copy very simple.
- *      (2) Clipping of both src and dest fpix are done automatically.
- *      (3) This allows in-place copying, without checking to see if
- *          the result is valid:  use for in-place with caution!
- * 
- */ -l_ok -fpixRasterop(FPIX *fpixd, - l_int32 dx, - l_int32 dy, - l_int32 dw, - l_int32 dh, - FPIX *fpixs, - l_int32 sx, - l_int32 sy) -{ -l_int32 fsw, fsh, fdw, fdh, dhangw, shangw, dhangh, shangh; -l_int32 i, j, wpls, wpld; -l_float32 *datas, *datad, *lines, *lined; - - PROCNAME("fpixRasterop"); - - if (!fpixs) - return ERROR_INT("fpixs not defined", procName, 1); - if (!fpixd) - return ERROR_INT("fpixd not defined", procName, 1); - - /* -------------------------------------------------------- * - * Clip to maximum rectangle with both src and dest * - * -------------------------------------------------------- */ - fpixGetDimensions(fpixs, &fsw, &fsh); - fpixGetDimensions(fpixd, &fdw, &fdh); - - /* First clip horizontally (sx, dx, dw) */ - if (dx < 0) { - sx -= dx; /* increase sx */ - dw += dx; /* reduce dw */ - dx = 0; - } - if (sx < 0) { - dx -= sx; /* increase dx */ - dw += sx; /* reduce dw */ - sx = 0; - } - dhangw = dx + dw - fdw; /* rect overhang of dest to right */ - if (dhangw > 0) - dw -= dhangw; /* reduce dw */ - shangw = sx + dw - fsw; /* rect overhang of src to right */ - if (shangw > 0) - dw -= shangw; /* reduce dw */ - - /* Then clip vertically (sy, dy, dh) */ - if (dy < 0) { - sy -= dy; /* increase sy */ - dh += dy; /* reduce dh */ - dy = 0; - } - if (sy < 0) { - dy -= sy; /* increase dy */ - dh += sy; /* reduce dh */ - sy = 0; - } - dhangh = dy + dh - fdh; /* rect overhang of dest below */ - if (dhangh > 0) - dh -= dhangh; /* reduce dh */ - shangh = sy + dh - fsh; /* rect overhang of src below */ - if (shangh > 0) - dh -= shangh; /* reduce dh */ - - /* if clipped entirely, quit */ - if ((dw <= 0) || (dh <= 0)) - return 0; - - /* -------------------------------------------------------- * - * Copy block of data * - * -------------------------------------------------------- */ - datas = fpixGetData(fpixs); - datad = fpixGetData(fpixd); - wpls = fpixGetWpl(fpixs); - wpld = fpixGetWpl(fpixd); - datas += sy * wpls + sx; /* at UL corner of block */ - datad += dy * wpld + dx; /* at UL corner of block */ - for (i = 0; i < dh; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < dw; j++) { - *lined = *lines; - lines++; - lined++; - } - } - - return 0; -} - - -/*--------------------------------------------------------------------* - * Rotation by multiples of 90 degrees * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixRotateOrth() - * - * \param[in] fpixs - * \param[in] quads 0-3; number of 90 degree cw rotations - * \return fpixd, or NULL on error - */ -FPIX * -fpixRotateOrth(FPIX *fpixs, - l_int32 quads) -{ - PROCNAME("fpixRotateOrth"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - if (quads < 0 || quads > 3) - return (FPIX *)ERROR_PTR("quads not in {0,1,2,3}", procName, NULL); - - if (quads == 0) - return fpixCopy(NULL, fpixs); - else if (quads == 1) - return fpixRotate90(fpixs, 1); - else if (quads == 2) - return fpixRotate180(NULL, fpixs); - else /* quads == 3 */ - return fpixRotate90(fpixs, -1); -} - - -/*! - * \brief fpixRotate180() - * - * \param[in] fpixd [optional] can be null, equal to fpixs, - * or different from fpixs - * \param[in] fpixs - * \return fpixd, or NULL on error - * - *
- * Notes:
- *      (1) This does a 180 rotation of the image about the center,
- *          which is equivalent to a left-right flip about a vertical
- *          line through the image center, followed by a top-bottom
- *          flip about a horizontal line through the image center.
- *      (2) There are 3 cases for input:
- *          (a) fpixd == null (creates a new fpixd)
- *          (b) fpixd == fpixs (in-place operation)
- *          (c) fpixd != fpixs (existing fpixd)
- *      (3) For clarity, use these three patterns, respectively:
- *          (a) fpixd = fpixRotate180(NULL, fpixs);
- *          (b) fpixRotate180(fpixs, fpixs);
- *          (c) fpixRotate180(fpixd, fpixs);
- * 
- */ -FPIX * -fpixRotate180(FPIX *fpixd, - FPIX *fpixs) -{ - PROCNAME("fpixRotate180"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - - /* Prepare pixd for in-place operation */ - if ((fpixd = fpixCopy(fpixd, fpixs)) == NULL) - return (FPIX *)ERROR_PTR("fpixd not made", procName, NULL); - - fpixFlipLR(fpixd, fpixd); - fpixFlipTB(fpixd, fpixd); - return fpixd; -} - - -/*! - * \brief fpixRotate90() - * - * \param[in] fpixs - * \param[in] direction 1 = clockwise; -1 = counter-clockwise - * \return fpixd, or NULL on error - * - *
- * Notes:
- *      (1) This does a 90 degree rotation of the image about the center,
- *          either cw or ccw, returning a new pix.
- *      (2) The direction must be either 1 (cw) or -1 (ccw).
- * 
- */ -FPIX * -fpixRotate90(FPIX *fpixs, - l_int32 direction) -{ -l_int32 i, j, wd, hd, wpls, wpld; -l_float32 *datas, *datad, *lines, *lined; -FPIX *fpixd; - - PROCNAME("fpixRotate90"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - if (direction != 1 && direction != -1) - return (FPIX *)ERROR_PTR("invalid direction", procName, NULL); - - fpixGetDimensions(fpixs, &hd, &wd); - if ((fpixd = fpixCreate(wd, hd)) == NULL) - return (FPIX *)ERROR_PTR("fpixd not made", procName, NULL); - fpixCopyResolution(fpixd, fpixs); - - datas = fpixGetData(fpixs); - wpls = fpixGetWpl(fpixs); - datad = fpixGetData(fpixd); - wpld = fpixGetWpl(fpixd); - if (direction == 1) { /* clockwise */ - for (i = 0; i < hd; i++) { - lined = datad + i * wpld; - lines = datas + (wd - 1) * wpls; - for (j = 0; j < wd; j++) { - lined[j] = lines[i]; - lines -= wpls; - } - } - } else { /* ccw */ - for (i = 0; i < hd; i++) { - lined = datad + i * wpld; - lines = datas; - for (j = 0; j < wd; j++) { - lined[j] = lines[hd - 1 - i]; - lines += wpls; - } - } - } - - return fpixd; -} - - -/*! - * \brief pixFlipLR() - * - * \param[in] fpixd [optional] can be null, equal to fpixs, - * or different from fpixs - * \param[in] fpixs - * \return fpixd, or NULL on error - * - *
- * Notes:
- *      (1) This does a left-right flip of the image, which is
- *          equivalent to a rotation out of the plane about a
- *          vertical line through the image center.
- *      (2) There are 3 cases for input:
- *          (a) fpixd == null (creates a new fpixd)
- *          (b) fpixd == fpixs (in-place operation)
- *          (c) fpixd != fpixs (existing fpixd)
- *      (3) For clarity, use these three patterns, respectively:
- *          (a) fpixd = fpixFlipLR(NULL, fpixs);
- *          (b) fpixFlipLR(fpixs, fpixs);
- *          (c) fpixFlipLR(fpixd, fpixs);
- *      (4) If an existing fpixd is not the same size as fpixs, the
- *          image data will be reallocated.
- * 
- */ -FPIX * -fpixFlipLR(FPIX *fpixd, - FPIX *fpixs) -{ -l_int32 i, j, w, h, wpl, bpl; -l_float32 *line, *data, *buffer; - - PROCNAME("fpixFlipLR"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - - fpixGetDimensions(fpixs, &w, &h); - - /* Prepare fpixd for in-place operation */ - if ((fpixd = fpixCopy(fpixd, fpixs)) == NULL) - return (FPIX *)ERROR_PTR("fpixd not made", procName, NULL); - - data = fpixGetData(fpixd); - wpl = fpixGetWpl(fpixd); /* 4-byte words */ - bpl = 4 * wpl; - if ((buffer = (l_float32 *)LEPT_CALLOC(wpl, sizeof(l_float32))) == NULL) { - fpixDestroy(&fpixd); - return (FPIX *)ERROR_PTR("buffer not made", procName, NULL); - } - for (i = 0; i < h; i++) { - line = data + i * wpl; - memcpy(buffer, line, bpl); - for (j = 0; j < w; j++) - line[j] = buffer[w - 1 - j]; - } - LEPT_FREE(buffer); - return fpixd; -} - - -/*! - * \brief fpixFlipTB() - * - * \param[in] fpixd [optional] can be null, equal to fpixs, - * or different from fpixs - * \param[in] fpixs - * \return fpixd, or NULL on error - * - *
- * Notes:
- *      (1) This does a top-bottom flip of the image, which is
- *          equivalent to a rotation out of the plane about a
- *          horizontal line through the image center.
- *      (2) There are 3 cases for input:
- *          (a) fpixd == null (creates a new fpixd)
- *          (b) fpixd == fpixs (in-place operation)
- *          (c) fpixd != fpixs (existing fpixd)
- *      (3) For clarity, use these three patterns, respectively:
- *          (a) fpixd = fpixFlipTB(NULL, fpixs);
- *          (b) fpixFlipTB(fpixs, fpixs);
- *          (c) fpixFlipTB(fpixd, fpixs);
- *      (4) If an existing fpixd is not the same size as fpixs, the
- *          image data will be reallocated.
- * 
- */ -FPIX * -fpixFlipTB(FPIX *fpixd, - FPIX *fpixs) -{ -l_int32 i, k, h, h2, wpl, bpl; -l_float32 *linet, *lineb, *data, *buffer; - - PROCNAME("fpixFlipTB"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - - /* Prepare fpixd for in-place operation */ - if ((fpixd = fpixCopy(fpixd, fpixs)) == NULL) - return (FPIX *)ERROR_PTR("fpixd not made", procName, NULL); - - data = fpixGetData(fpixd); - wpl = fpixGetWpl(fpixd); - fpixGetDimensions(fpixd, NULL, &h); - if ((buffer = (l_float32 *)LEPT_CALLOC(wpl, sizeof(l_float32))) == NULL) { - fpixDestroy(&fpixd); - return (FPIX *)ERROR_PTR("buffer not made", procName, NULL); - } - h2 = h / 2; - bpl = 4 * wpl; - for (i = 0, k = h - 1; i < h2; i++, k--) { - linet = data + i * wpl; - lineb = data + k * wpl; - memcpy(buffer, linet, bpl); - memcpy(linet, lineb, bpl); - memcpy(lineb, buffer, bpl); - } - LEPT_FREE(buffer); - return fpixd; -} - - -/*--------------------------------------------------------------------* - * Affine and projective interpolated transforms * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixAffinePta() - * - * \param[in] fpixs 8 bpp - * \param[in] ptad 4 pts of final coordinate space - * \param[in] ptas 4 pts of initial coordinate space - * \param[in] border size of extension with constant normal derivative - * \param[in] inval value brought in; typ. 0 - * \return fpixd, or NULL on error - * - *
- * Notes:
- *      (1) If %border > 0, all four sides are extended by that distance,
- *          and removed after the transformation is finished.  Pixels
- *          that would be brought in to the trimmed result from outside
- *          the extended region are assigned %inval.  The purpose of
- *          extending the image is to avoid such assignments.
- *      (2) On the other hand, you may want to give all pixels that
- *          are brought in from outside fpixs a specific value.  In that
- *          case, set %border == 0.
- * 
- */ -FPIX * -fpixAffinePta(FPIX *fpixs, - PTA *ptad, - PTA *ptas, - l_int32 border, - l_float32 inval) -{ -l_float32 *vc; -PTA *ptas2, *ptad2; -FPIX *fpixs2, *fpixd, *fpixd2; - - PROCNAME("fpixAffinePta"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - if (!ptas) - return (FPIX *)ERROR_PTR("ptas not defined", procName, NULL); - if (!ptad) - return (FPIX *)ERROR_PTR("ptad not defined", procName, NULL); - - /* If a border is to be added, also translate the ptas */ - if (border > 0) { - ptas2 = ptaTransform(ptas, border, border, 1.0, 1.0); - ptad2 = ptaTransform(ptad, border, border, 1.0, 1.0); - fpixs2 = fpixAddSlopeBorder(fpixs, border, border, border, border); - } else { - ptas2 = ptaClone(ptas); - ptad2 = ptaClone(ptad); - fpixs2 = fpixClone(fpixs); - } - - /* Get backwards transform from dest to src, and apply it */ - getAffineXformCoeffs(ptad2, ptas2, &vc); - fpixd2 = fpixAffine(fpixs2, vc, inval); - fpixDestroy(&fpixs2); - ptaDestroy(&ptas2); - ptaDestroy(&ptad2); - LEPT_FREE(vc); - - if (border == 0) - return fpixd2; - - /* Remove the added border */ - fpixd = fpixRemoveBorder(fpixd2, border, border, border, border); - fpixDestroy(&fpixd2); - return fpixd; -} - - -/*! - * \brief fpixAffine() - * - * \param[in] fpixs 8 bpp - * \param[in] vc vector of 8 coefficients for projective transformation - * \param[in] inval value brought in; typ. 0 - * \return fpixd, or NULL on error - */ -FPIX * -fpixAffine(FPIX *fpixs, - l_float32 *vc, - l_float32 inval) -{ -l_int32 i, j, w, h, wpld; -l_float32 val; -l_float32 *datas, *datad, *lined; -l_float32 x, y; -FPIX *fpixd; - - PROCNAME("fpixAffine"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - fpixGetDimensions(fpixs, &w, &h); - if (!vc) - return (FPIX *)ERROR_PTR("vc not defined", procName, NULL); - - datas = fpixGetData(fpixs); - fpixd = fpixCreateTemplate(fpixs); - fpixSetAllArbitrary(fpixd, inval); - datad = fpixGetData(fpixd); - wpld = fpixGetWpl(fpixd); - - /* Iterate over destination pixels */ - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - /* Compute float src pixel location corresponding to (i,j) */ - affineXformPt(vc, j, i, &x, &y); - linearInterpolatePixelFloat(datas, w, h, x, y, inval, &val); - *(lined + j) = val; - } - } - - return fpixd; -} - - -/*! - * \brief fpixProjectivePta() - * - * \param[in] fpixs 8 bpp - * \param[in] ptad 4 pts of final coordinate space - * \param[in] ptas 4 pts of initial coordinate space - * \param[in] border size of extension with constant normal derivative - * \param[in] inval value brought in; typ. 0 - * \return fpixd, or NULL on error - * - *
- * Notes:
- *      (1) If %border > 0, all four sides are extended by that distance,
- *          and removed after the transformation is finished.  Pixels
- *          that would be brought in to the trimmed result from outside
- *          the extended region are assigned %inval.  The purpose of
- *          extending the image is to avoid such assignments.
- *      (2) On the other hand, you may want to give all pixels that
- *          are brought in from outside fpixs a specific value.  In that
- *          case, set %border == 0.
- * 
- */ -FPIX * -fpixProjectivePta(FPIX *fpixs, - PTA *ptad, - PTA *ptas, - l_int32 border, - l_float32 inval) -{ -l_float32 *vc; -PTA *ptas2, *ptad2; -FPIX *fpixs2, *fpixd, *fpixd2; - - PROCNAME("fpixProjectivePta"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - if (!ptas) - return (FPIX *)ERROR_PTR("ptas not defined", procName, NULL); - if (!ptad) - return (FPIX *)ERROR_PTR("ptad not defined", procName, NULL); - - /* If a border is to be added, also translate the ptas */ - if (border > 0) { - ptas2 = ptaTransform(ptas, border, border, 1.0, 1.0); - ptad2 = ptaTransform(ptad, border, border, 1.0, 1.0); - fpixs2 = fpixAddSlopeBorder(fpixs, border, border, border, border); - } else { - ptas2 = ptaClone(ptas); - ptad2 = ptaClone(ptad); - fpixs2 = fpixClone(fpixs); - } - - /* Get backwards transform from dest to src, and apply it */ - getProjectiveXformCoeffs(ptad2, ptas2, &vc); - fpixd2 = fpixProjective(fpixs2, vc, inval); - fpixDestroy(&fpixs2); - ptaDestroy(&ptas2); - ptaDestroy(&ptad2); - LEPT_FREE(vc); - - if (border == 0) - return fpixd2; - - /* Remove the added border */ - fpixd = fpixRemoveBorder(fpixd2, border, border, border, border); - fpixDestroy(&fpixd2); - return fpixd; -} - - -/*! - * \brief fpixProjective() - * - * \param[in] fpixs 8 bpp - * \param[in] vc vector of 8 coefficients for projective transform - * \param[in] inval value brought in; typ. 0 - * \return fpixd, or NULL on error - */ -FPIX * -fpixProjective(FPIX *fpixs, - l_float32 *vc, - l_float32 inval) -{ -l_int32 i, j, w, h, wpld; -l_float32 val; -l_float32 *datas, *datad, *lined; -l_float32 x, y; -FPIX *fpixd; - - PROCNAME("fpixProjective"); - - if (!fpixs) - return (FPIX *)ERROR_PTR("fpixs not defined", procName, NULL); - fpixGetDimensions(fpixs, &w, &h); - if (!vc) - return (FPIX *)ERROR_PTR("vc not defined", procName, NULL); - - datas = fpixGetData(fpixs); - fpixd = fpixCreateTemplate(fpixs); - fpixSetAllArbitrary(fpixd, inval); - datad = fpixGetData(fpixd); - wpld = fpixGetWpl(fpixd); - - /* Iterate over destination pixels */ - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - /* Compute float src pixel location corresponding to (i,j) */ - projectiveXformPt(vc, j, i, &x, &y); - linearInterpolatePixelFloat(datas, w, h, x, y, inval, &val); - *(lined + j) = val; - } - } - - return fpixd; -} - - -/*! - * \brief linearInterpolatePixelFloat() - * - * \param[in] datas ptr to beginning of float image data - * \param[in] w, h dimensions of image - * \param[in] x, y floating pt location for evaluation - * \param[in] inval float value brought in from the outside when the - * input x,y location is outside the image - * \param[out] pval interpolated float value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is a standard linear interpolation function.  It is
- *          equivalent to area weighting on each component, and
- *          avoids "jaggies" when rendering sharp edges.
- * 
- */ -l_ok -linearInterpolatePixelFloat(l_float32 *datas, - l_int32 w, - l_int32 h, - l_float32 x, - l_float32 y, - l_float32 inval, - l_float32 *pval) -{ -l_int32 xpm, ypm, xp, yp, xf, yf; -l_float32 v00, v01, v10, v11; -l_float32 *lines; - - PROCNAME("linearInterpolatePixelFloat"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = inval; - if (!datas) - return ERROR_INT("datas not defined", procName, 1); - - /* Skip if off the edge */ - if (x < 0.0 || y < 0.0 || x > w - 2.0 || y > h - 2.0) - return 0; - - xpm = (l_int32)(16.0 * x + 0.5); - ypm = (l_int32)(16.0 * y + 0.5); - xp = xpm >> 4; - yp = ypm >> 4; - xf = xpm & 0x0f; - yf = ypm & 0x0f; - -#if DEBUG - if (xf < 0 || yf < 0) - lept_stderr("xp = %d, yp = %d, xf = %d, yf = %d\n", xp, yp, xf, yf); -#endif /* DEBUG */ - - /* Interpolate by area weighting. */ - lines = datas + yp * w; - v00 = (16.0 - xf) * (16.0 - yf) * (*(lines + xp)); - v10 = xf * (16.0 - yf) * (*(lines + xp + 1)); - v01 = (16.0 - xf) * yf * (*(lines + w + xp)); - v11 = (l_float32)(xf) * yf * (*(lines + w + xp + 1)); - *pval = (v00 + v01 + v10 + v11) / 256.0; - return 0; -} - - -/*--------------------------------------------------------------------* - * Thresholding to 1 bpp Pix * - *--------------------------------------------------------------------*/ -/*! - * \brief fpixThresholdToPix() - * - * \param[in] fpix - * \param[in] thresh - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) For all values of fpix that are <= thresh, sets the pixel
- *          in pixd to 1.
- * 
- */ -PIX * -fpixThresholdToPix(FPIX *fpix, - l_float32 thresh) -{ -l_int32 i, j, w, h, wpls, wpld; -l_float32 *datas, *lines; -l_uint32 *datad, *lined; -PIX *pixd; - - PROCNAME("fpixThresholdToPix"); - - if (!fpix) - return (PIX *)ERROR_PTR("fpix not defined", procName, NULL); - - fpixGetDimensions(fpix, &w, &h); - datas = fpixGetData(fpix); - wpls = fpixGetWpl(fpix); - pixd = pixCreate(w, h, 1); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - if (lines[j] <= thresh) - SET_DATA_BIT(lined, j); - } - } - - return pixd; -} - - -/*--------------------------------------------------------------------* - * Generate function from components * - *--------------------------------------------------------------------*/ -/*! - * \brief pixComponentFunction() - * - * \param[in] pix 32 bpp rgb - * \param[in] rnum, gnum, bnum coefficients for numerator - * \param[in] rdenom, gdenom, bdenom coefficients for denominator - * \return fpixd, or NULL on error - * - *
- * Notes:
- *      (1) This stores a function of the component values of each
- *          input pixel in %fpixd.
- *      (2) The function is a ratio of linear combinations of component values.
- *          There are two special cases for denominator coefficients:
- *          (a) The denominator is 1.0: input 0 for all denominator coefficients
- *          (b) Only one component is used in the denominator: input 1.0
- *              for that denominator component and 0.0 for the other two.
- *      (3) If the denominator is 0, multiply by an arbitrary number that
- *          is much larger than 1.  Choose 256 "arbitrarily".
- *
- * 
- */ -FPIX * -pixComponentFunction(PIX *pix, - l_float32 rnum, - l_float32 gnum, - l_float32 bnum, - l_float32 rdenom, - l_float32 gdenom, - l_float32 bdenom) -{ -l_int32 i, j, w, h, wpls, wpld, rval, gval, bval, zerodenom, onedenom; -l_float32 fnum, fdenom; -l_uint32 *datas, *lines; -l_float32 *datad, *lined, *recip; -FPIX *fpixd; - - PROCNAME("pixComponentFunction"); - - if (!pix || pixGetDepth(pix) != 32) - return (FPIX *)ERROR_PTR("pix undefined or not 32 bpp", procName, NULL); - - pixGetDimensions(pix, &w, &h, NULL); - datas = pixGetData(pix); - wpls = pixGetWpl(pix); - fpixd = fpixCreate(w, h); - datad = fpixGetData(fpixd); - wpld = fpixGetWpl(fpixd); - zerodenom = (rdenom == 0.0 && gdenom == 0.0 && bdenom == 0.0) ? 1: 0; - onedenom = ((rdenom == 1.0 && gdenom == 0.0 && bdenom == 0.0) || - (rdenom == 0.0 && gdenom == 1.0 && bdenom == 0.0) || - (rdenom == 0.0 && gdenom == 0.0 && bdenom == 1.0)) ? 1 : 0; - recip = NULL; - if (onedenom) { - recip = (l_float32 *)LEPT_CALLOC(256, sizeof(l_float32)); - recip[0] = 256; /* arbitrary large number */ - for (i = 1; i < 256; i++) - recip[i] = 1.0 / (l_float32)i; - } - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - if (zerodenom) { - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - lined[j] = rnum * rval + gnum * gval + bnum * bval; - } - } else if (onedenom && rdenom == 1.0) { - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - lined[j] - = recip[rval] * (rnum * rval + gnum * gval + bnum * bval); - } - } else if (onedenom && gdenom == 1.0) { - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - lined[j] - = recip[gval] * (rnum * rval + gnum * gval + bnum * bval); - } - } else if (onedenom && bdenom == 1.0) { - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - lined[j] - = recip[bval] * (rnum * rval + gnum * gval + bnum * bval); - } - } else { /* general case */ - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - fnum = rnum * rval + gnum * gval + bnum * bval; - fdenom = rdenom * rval + gdenom * gval + bdenom * bval; - lined[j] = (fdenom == 0) ? 256.0 * fnum : fnum / fdenom; - } - } - } - - LEPT_FREE(recip); - return fpixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/gifio.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/gifio.c deleted file mode 100644 index 2131826a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/gifio.c +++ /dev/null @@ -1,675 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file gifio.c - *
- *
- *    Reading gif
- *          PIX            *pixReadStreamGif()
- *          PIX            *pixReadMemGif()
- *          static l_int32  gifReadFunc()
- *          static PIX     *gifToPix()
- *
- *    Writing gif
- *          l_int32         pixWriteStreamGif()
- *          l_int32         pixWriteMemGif()
- *          static l_int32  gifWriteFunc()
- *          static l_int32  pixToGif()
- *
- *    The initial version of this module was generously contribued by
- *    Antony Dovgal.
- *
- *    The functions that read and write from pix to gif-compressed memory,
- *    using gif internal functions DGifOpen() and EGifOpen() that are
- *    available in 5.1 and later, were contributed by Tobias Peirick.
- *
- *    Version information:
- *
- *    (1) This supports the gif library, version 5.1 or later, for which
- *        gif read-from-mem and write-to-mem allow these operations
- *        without writing temporary files.
- *    (2) There has never been a gif stream interface.  For versions
- *        before 5.1, it was necessary to use a file descriptor, and to
- *        generate a file stream from the low-level descriptor.  With the
- *        memory interface in 5.1 that can be used on all platforms, it
- *        is no longer necessary to use any API code with file descriptors.
- *    (3) The public interface changed with 5.0 and with 5.1, and we
- *        no longer support 4.6.1 and 5.0.
- *    (4) Version 5.1.2 came out on Jan 7, 2016.  Leptonica cannot
- *        successfully read gif files that it writes with this version;
- *        DGifSlurp() gets an internal error from an uninitialized array
- *        and returns failure.  The problem was fixed in 5.1.3.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/* --------------------------------------------------------------------*/ -#if HAVE_LIBGIF || HAVE_LIBUNGIF /* defined in environ.h */ -/* --------------------------------------------------------------------*/ - -#include "gif_lib.h" - - /* Interface that enables low-level GIF support for reading from memory */ -static PIX * gifToPix(GifFileType *gif); - /* Interface that enables low-level GIF support for writing to memory */ -static l_int32 pixToGif(PIX *pix, GifFileType *gif); - - /*! For in-memory decoding of GIF; 5.1+ */ -typedef struct GifReadBuffer -{ - size_t size; /*!< size of buffer */ - size_t pos; /*!< position relative to beginning of buffer */ - const l_uint8 *cdata; /*!< data in the buffer */ -} GifReadBuffer; - - /*! Low-level callback for in-memory decoding */ -static l_int32 gifReadFunc(GifFileType *gif, GifByteType *dest, - l_int32 bytesToRead); - /*! Low-level callback for in-memory encoding */ -static l_int32 gifWriteFunc(GifFileType *gif, const GifByteType *src, - l_int32 bytesToWrite); - - -/*---------------------------------------------------------------------* - * Reading gif * - *---------------------------------------------------------------------*/ -/*! - * \brief pixReadStreamGif() - * - * \param[in] fp file stream opened for reading - * \return pix, or NULL on error - */ -PIX * -pixReadStreamGif(FILE *fp) -{ -l_uint8 *filedata; -size_t filesize; -PIX *pix; - - PROCNAME("pixReadStreamGif"); - - if (!fp) - return (PIX *)ERROR_PTR("fp not defined", procName, NULL); - - /* Read data into memory from file */ - rewind(fp); - if ((filedata = l_binaryReadStream(fp, &filesize)) == NULL) - return (PIX *)ERROR_PTR("filedata not read", procName, NULL); - - /* Uncompress from memory */ - pix = pixReadMemGif(filedata, filesize); - LEPT_FREE(filedata); - if (!pix) - L_ERROR("failed to read gif from file data\n", procName); - return pix; -} - - -/*! - * \brief pixReadMemGif() - * - * \param[in] cdata const; gif-encoded - * \param[in] size bytes data - * \return pix, or NULL on error - * - *
- * Notes:
- *     (1) For libgif version >= 5.1, this uses the DGifOpen() buffer
- *         interface.  No temp files are required.
- *     (2) For libgif version < 5.1, it was necessary to write the compressed
- *         data to file and read it back, and we couldn't use the GNU
- *         runtime extension fmemopen() because libgif doesn't have a file
- *         stream interface.
- * 
- */ -PIX * -pixReadMemGif(const l_uint8 *cdata, - size_t size) -{ -GifFileType *gif; -GifReadBuffer buffer; - - PROCNAME("pixReadMemGif"); - - /* 5.1+ and not 5.1.2 */ -#if (GIFLIB_MAJOR < 5 || (GIFLIB_MAJOR == 5 && GIFLIB_MINOR == 0)) - L_ERROR("Require giflib-5.1 or later\n", procName); - return NULL; -#endif /* < 5.1 */ -#if GIFLIB_MAJOR == 5 && GIFLIB_MINOR == 1 && GIFLIB_RELEASE == 2 /* 5.1.2 */ - L_ERROR("Can't use giflib-5.1.2; suggest 5.1.3 or later\n", procName); - return NULL; -#endif /* 5.1.2 */ - - if (!cdata) - return (PIX *)ERROR_PTR("cdata not defined", procName, NULL); - - buffer.cdata = cdata; - buffer.size = size; - buffer.pos = 0; - if ((gif = DGifOpen((void*)&buffer, gifReadFunc, NULL)) == NULL) - return (PIX *)ERROR_PTR("could not open gif stream from memory", - procName, NULL); - - return gifToPix(gif); -} - - -static l_int32 -gifReadFunc(GifFileType *gif, - GifByteType *dest, - l_int32 bytesToRead) -{ -GifReadBuffer *buffer; -l_int32 bytesRead; - - PROCNAME("gifReadFunc"); - - if ((buffer = (GifReadBuffer*)gif->UserData) == NULL) - return ERROR_INT("UserData not set", procName, -1); - - if(buffer->pos >= buffer->size || bytesToRead > buffer->size) - return -1; - - bytesRead = (buffer->pos < buffer->size - bytesToRead) - ? bytesToRead : buffer->size - buffer->pos; - memcpy(dest, buffer->cdata + buffer->pos, bytesRead); - buffer->pos += bytesRead; - return bytesRead; -} - - -/*! - * \brief gifToPix() - * - * \param[in] gif opened gif stream - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) This decodes the pix from the compressed gif stream and
- *          closes the stream.
- *      (2) It is static so that the stream is not exposed to clients.
- * 
- */ -static PIX * -gifToPix(GifFileType *gif) -{ -l_int32 wpl, i, j, w, h, d, cindex, ncolors; -l_int32 rval, gval, bval; -l_uint32 *data, *line; -PIX *pixd; -PIXCMAP *cmap; -ColorMapObject *gif_cmap; -SavedImage si; -int giferr; - - PROCNAME("gifToPix"); - - /* Read all the data, but use only the first image found */ - if (DGifSlurp(gif) != GIF_OK) { - DGifCloseFile(gif, &giferr); - return (PIX *)ERROR_PTR("failed to read GIF data", procName, NULL); - } - - if (gif->SavedImages == NULL) { - DGifCloseFile(gif, &giferr); - return (PIX *)ERROR_PTR("no images found in GIF", procName, NULL); - } - - si = gif->SavedImages[0]; - w = si.ImageDesc.Width; - h = si.ImageDesc.Height; - if (w <= 0 || h <= 0) { - DGifCloseFile(gif, &giferr); - return (PIX *)ERROR_PTR("invalid image dimensions", procName, NULL); - } - - if (si.RasterBits == NULL) { - DGifCloseFile(gif, &giferr); - return (PIX *)ERROR_PTR("no raster data in GIF", procName, NULL); - } - - if (si.ImageDesc.ColorMap) { - /* private cmap for this image */ - gif_cmap = si.ImageDesc.ColorMap; - } else if (gif->SColorMap) { - /* global cmap for whole picture */ - gif_cmap = gif->SColorMap; - } else { - /* don't know where to take cmap from */ - DGifCloseFile(gif, &giferr); - return (PIX *)ERROR_PTR("color map is missing", procName, NULL); - } - - ncolors = gif_cmap->ColorCount; - if (ncolors <= 2) - d = 1; - else if (ncolors <= 4) - d = 2; - else if (ncolors <= 16) - d = 4; - else - d = 8; - if ((cmap = pixcmapCreate(d)) == NULL) { - DGifCloseFile(gif, &giferr); - return (PIX *)ERROR_PTR("cmap creation failed", procName, NULL); - } - - for (cindex = 0; cindex < ncolors; cindex++) { - rval = gif_cmap->Colors[cindex].Red; - gval = gif_cmap->Colors[cindex].Green; - bval = gif_cmap->Colors[cindex].Blue; - pixcmapAddColor(cmap, rval, gval, bval); - } - - if ((pixd = pixCreate(w, h, d)) == NULL) { - DGifCloseFile(gif, &giferr); - pixcmapDestroy(&cmap); - return (PIX *)ERROR_PTR("failed to allocate pixd", procName, NULL); - } - pixSetInputFormat(pixd, IFF_GIF); - pixSetColormap(pixd, cmap); - - wpl = pixGetWpl(pixd); - data = pixGetData(pixd); - for (i = 0; i < h; i++) { - line = data + i * wpl; - if (d == 1) { - for (j = 0; j < w; j++) { - if (si.RasterBits[i * w + j]) - SET_DATA_BIT(line, j); - } - } else if (d == 2) { - for (j = 0; j < w; j++) - SET_DATA_DIBIT(line, j, si.RasterBits[i * w + j]); - } else if (d == 4) { - for (j = 0; j < w; j++) - SET_DATA_QBIT(line, j, si.RasterBits[i * w + j]); - } else { /* d == 8 */ - for (j = 0; j < w; j++) - SET_DATA_BYTE(line, j, si.RasterBits[i * w + j]); - } - } - - /* Versions before 5.0 required un-interlacing to restore - * the raster lines to normal order if the image - * had been interlaced (for viewing in a browser): - if (gif->Image.Interlace) { - PIX *pixdi = pixUninterlaceGIF(pixd); - pixTransferAllData(pixd, &pixdi, 0, 0); - } - * This is no longer required. */ - - DGifCloseFile(gif, &giferr); - return pixd; -} - - -/*---------------------------------------------------------------------* - * Writing gif * - *---------------------------------------------------------------------*/ -/*! - * \brief pixWriteStreamGif() - * - * \param[in] fp file stream opened for writing - * \param[in] pix 1, 2, 4, 8, 16 or 32 bpp - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) All output gif have colormaps.  If the pix is 32 bpp rgb,
- *          this quantizes the colors and writes out 8 bpp.
- *          If the pix is 16 bpp grayscale, it converts to 8 bpp first.
- * 
- */ -l_ok -pixWriteStreamGif(FILE *fp, - PIX *pix) -{ -l_uint8 *filedata; -size_t filebytes, nbytes; - - PROCNAME("pixWriteStreamGif"); - - if (!fp) - return ERROR_INT("stream not open", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pixSetPadBits(pix, 0); - if (pixWriteMemGif(&filedata, &filebytes, pix) != 0) { - LEPT_FREE(filedata); - return ERROR_INT("failure to gif encode pix", procName, 1); - } - - rewind(fp); - nbytes = fwrite(filedata, 1, filebytes, fp); - LEPT_FREE(filedata); - if (nbytes != filebytes) - return ERROR_INT("write error", procName, 1); - return 0; -} - - -/*! - * \brief pixWriteMemGif() - * - * \param[out] pdata data of gif compressed image - * \param[out] psize size of returned data - * \param[in] pix - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See comments in pixReadMemGif()
- * 
- */ -l_ok -pixWriteMemGif(l_uint8 **pdata, - size_t *psize, - PIX *pix) -{ -int giferr; -l_int32 result; -GifFileType *gif; -L_BBUFFER *buffer; - - PROCNAME("pixWriteMemGif"); - - /* 5.1+ and not 5.1.2 */ -#if (GIFLIB_MAJOR < 5 || (GIFLIB_MAJOR == 5 && GIFLIB_MINOR == 0)) - L_ERROR("Require giflib-5.1 or later\n", procName); - return 1; -#endif /* < 5.1 */ -#if GIFLIB_MAJOR == 5 && GIFLIB_MINOR == 1 && GIFLIB_RELEASE == 2 /* 5.1.2 */ - L_ERROR("Can't use giflib-5.1.2; suggest 5.1.3 or later\n", procName); - return 1; -#endif /* 5.1.2 */ - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1 ); - *pdata = NULL; - if (!psize) - return ERROR_INT("&size not defined", procName, 1 ); - *psize = 0; - if (!pix) - return ERROR_INT("&pix not defined", procName, 1 ); - - if ((buffer = bbufferCreate(NULL, 0)) == NULL) - return ERROR_INT("failed to create buffer", procName, 1); - - if ((gif = EGifOpen((void*)buffer, gifWriteFunc, NULL)) == NULL) { - bbufferDestroy(&buffer); - return ERROR_INT("failed to create GIF image handle", procName, 1); - } - - result = pixToGif(pix, gif); - EGifCloseFile(gif, &giferr); - - if (result == 0) { - *pdata = bbufferDestroyAndSaveData(&buffer, psize); - } else { - bbufferDestroy(&buffer); - } - return result; -} - - -static l_int32 -gifWriteFunc(GifFileType *gif, - const GifByteType *src, - l_int32 bytesToWrite) -{ -L_BBUFFER *buffer; - - PROCNAME("gifWriteFunc"); - - if ((buffer = (L_BBUFFER*)gif->UserData) == NULL) - return ERROR_INT("UserData not set", procName, -1); - - if(bbufferRead(buffer, (l_uint8*)src, bytesToWrite) == 0) - return bytesToWrite; - return 0; -} - - -/*! - * \brief pixToGif() - * - * \param[in] pix 1, 2, 4, 8, 16 or 32 bpp - * \param[in] gif opened gif stream - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This encodes the pix to the gif stream. The stream is not
- *          closed by this function.
- *      (2) It is static to make this function private.
- * 
- */ -static l_int32 -pixToGif(PIX *pix, - GifFileType *gif) -{ -char *text; -l_int32 wpl, i, j, w, h, d, ncolor, rval, gval, bval; -l_int32 gif_ncolor = 0; -l_uint32 *data, *line; -PIX *pixd; -PIXCMAP *cmap; -ColorMapObject *gif_cmap; -GifByteType *gif_line; - - PROCNAME("pixToGif"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!gif) - return ERROR_INT("gif not defined", procName, 1); - - d = pixGetDepth(pix); - if (d == 32) { - pixd = pixConvertRGBToColormap(pix, 1); - } else if (d > 1) { - pixd = pixConvertTo8(pix, TRUE); - } else { /* d == 1; make sure there's a colormap */ - pixd = pixClone(pix); - if (!pixGetColormap(pixd)) { - cmap = pixcmapCreate(1); - pixcmapAddColor(cmap, 255, 255, 255); - pixcmapAddColor(cmap, 0, 0, 0); - pixSetColormap(pixd, cmap); - } - } - - if (!pixd) - return ERROR_INT("failed to convert image to indexed", procName, 1); - d = pixGetDepth(pixd); - - if ((cmap = pixGetColormap(pixd)) == NULL) { - pixDestroy(&pixd); - return ERROR_INT("cmap is missing", procName, 1); - } - - /* 'Round' the number of gif colors up to a power of 2 */ - ncolor = pixcmapGetCount(cmap); - for (i = 0; i <= 8; i++) { - if ((1 << i) >= ncolor) { - gif_ncolor = (1 << i); - break; - } - } - if (gif_ncolor < 1) { - pixDestroy(&pixd); - return ERROR_INT("number of colors is invalid", procName, 1); - } - - /* Save the cmap colors in a gif_cmap */ - if ((gif_cmap = GifMakeMapObject(gif_ncolor, NULL)) == NULL) { - pixDestroy(&pixd); - return ERROR_INT("failed to create GIF color map", procName, 1); - } - for (i = 0; i < gif_ncolor; i++) { - rval = gval = bval = 0; - if (ncolor > 0) { - if (pixcmapGetColor(cmap, i, &rval, &gval, &bval) != 0) { - pixDestroy(&pixd); - GifFreeMapObject(gif_cmap); - return ERROR_INT("failed to get color from color map", - procName, 1); - } - ncolor--; - } - gif_cmap->Colors[i].Red = rval; - gif_cmap->Colors[i].Green = gval; - gif_cmap->Colors[i].Blue = bval; - } - - pixGetDimensions(pixd, &w, &h, NULL); - if (EGifPutScreenDesc(gif, w, h, gif_cmap->BitsPerPixel, 0, gif_cmap) - != GIF_OK) { - pixDestroy(&pixd); - GifFreeMapObject(gif_cmap); - return ERROR_INT("failed to write screen description", procName, 1); - } - GifFreeMapObject(gif_cmap); /* not needed after this point */ - - if (EGifPutImageDesc(gif, 0, 0, w, h, FALSE, NULL) != GIF_OK) { - pixDestroy(&pixd); - return ERROR_INT("failed to image screen description", procName, 1); - } - - data = pixGetData(pixd); - wpl = pixGetWpl(pixd); - if (d != 1 && d != 2 && d != 4 && d != 8) { - pixDestroy(&pixd); - return ERROR_INT("image depth is not in {1, 2, 4, 8}", procName, 1); - } - - if ((gif_line = (GifByteType *)LEPT_CALLOC(sizeof(GifByteType), w)) - == NULL) { - pixDestroy(&pixd); - return ERROR_INT("mem alloc fail for data line", procName, 1); - } - - for (i = 0; i < h; i++) { - line = data + i * wpl; - /* Gif's way of setting the raster line up for compression */ - for (j = 0; j < w; j++) { - switch(d) - { - case 8: - gif_line[j] = GET_DATA_BYTE(line, j); - break; - case 4: - gif_line[j] = GET_DATA_QBIT(line, j); - break; - case 2: - gif_line[j] = GET_DATA_DIBIT(line, j); - break; - case 1: - gif_line[j] = GET_DATA_BIT(line, j); - break; - } - } - - /* Compress and save the line */ - if (EGifPutLine(gif, gif_line, w) != GIF_OK) { - LEPT_FREE(gif_line); - pixDestroy(&pixd); - return ERROR_INT("failed to write data line into GIF", procName, 1); - } - } - - /* Write a text comment. This must be placed after writing the - * data (!!) Note that because libgif does not provide a function - * for reading comments from file, you will need another way - * to read comments. */ - if ((text = pixGetText(pix)) != NULL) { - if (EGifPutComment(gif, text) != GIF_OK) - L_WARNING("gif comment not written\n", procName); - } - - LEPT_FREE(gif_line); - pixDestroy(&pixd); - return 0; -} - - -#if 0 -/*---------------------------------------------------------------------* - * Removing interlacing (reference only; not used) * - *---------------------------------------------------------------------*/ - /* GIF supports 4-way interlacing by raster lines. - * Before 5.0, it was necessary for leptonica to restore interlaced - * data to normal raster order when reading to a pix. With 5.0, - * the de-interlacing is done by the library read function. - * It is here only as a reference. */ -static const l_int32 InterlacedOffset[] = {0, 4, 2, 1}; -static const l_int32 InterlacedJumps[] = {8, 8, 4, 2}; - -static PIX * -pixUninterlaceGIF(PIX *pixs) -{ -l_int32 w, h, d, wpl, j, k, srow, drow; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixUninterlaceGIF"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - pixGetDimensions(pixs, &w, &h, &d); - wpl = pixGetWpl(pixs); - pixd = pixCreateTemplate(pixs); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - for (k = 0, srow = 0; k < 4; k++) { - for (drow = InterlacedOffset[k]; drow < h; - drow += InterlacedJumps[k], srow++) { - lines = datas + srow * wpl; - lined = datad + drow * wpl; - for (j = 0; j < w; j++) - memcpy(lined, lines, 4 * wpl); - } - } - - return pixd; -} -#endif - - -/* -----------------------------------------------------------------*/ -#endif /* HAVE_LIBGIF || HAVE_LIBUNGIF */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/gifiostub.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/gifiostub.c deleted file mode 100644 index 20129b9c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/gifiostub.c +++ /dev/null @@ -1,72 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file gifiostub.c - *
- *
- *     Stubs for gifio.c functions
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* -----------------------------------------------------------------*/ -#if (!HAVE_LIBGIF) && (!HAVE_LIBUNGIF) /* defined in environ.h */ -/* -----------------------------------------------------------------*/ - -PIX * pixReadStreamGif(FILE *fp) -{ - return (PIX * )ERROR_PTR("function not present", "pixReadStreamGif", NULL); -} - -/* ----------------------------------------------------------------------*/ - -PIX * pixReadMemGif(const l_uint8 *cdata, size_t size) -{ - return (PIX *)ERROR_PTR("function not present", "pixReadMemGif", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteStreamGif(FILE *fp, PIX *pix) -{ - return ERROR_INT("function not present", "pixWriteStreamGif", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteMemGif(l_uint8 **pdata, size_t *psize, PIX *pix) -{ - return ERROR_INT("function not present", "pixWriteMemGif", 1); -} - -/* -----------------------------------------------------------------*/ -#endif /* !HAVE_LIBGIF && !HAVE_LIBUNGIF */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/gplot.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/gplot.c deleted file mode 100644 index f0089bd0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/gplot.c +++ /dev/null @@ -1,1372 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file gplot.c - *
- *
- *     Basic plotting functions
- *          GPLOT      *gplotCreate()
- *          void        gplotDestroy()
- *          l_int32     gplotAddPlot()
- *          l_int32     gplotSetScaling()
- *          PIX        *gplotMakeOutputPix()
- *          l_int32     gplotMakeOutput()
- *          l_int32     gplotGenCommandFile()
- *          l_int32     gplotGenDataFiles()
- *
- *     Quick, one-line plots
- *          l_int32     gplotSimple1()
- *          l_int32     gplotSimple2()
- *          l_int32     gplotSimpleN()
- *          PIX        *gplotSimplePix1()
- *          PIX        *gplotSimplePix2()
- *          PIX        *gplotSimplePixN()
- *          GPLOT      *gplotSimpleXY1()
- *          GPLOT      *gplotSimpleXY2()
- *          GPLOT      *gplotSimpleXYN()
- *          PIX        *gplotGeneralPix1()
- *          PIX        *gplotGeneralPix2()
- *          PIX        *gplotGeneralPixN()
- *
- *     Serialize for I/O
- *          GPLOT      *gplotRead()
- *          l_int32     gplotWrite()
- *
- *
- *     Utility for programmatic plotting using gnuplot 4.6 or later
- *     Enabled:
- *         ~ output to png (color), ps and eps (mono), latex (mono)
- *         ~ optional title for plot
- *         ~ optional x and y axis labels
- *         ~ multiple plots on one frame
- *         ~ optional label for each plot on the frame
- *         ~ optional log scaling on either or both axes
- *         ~ choice of 5 plot styles for each array of input data
- *         ~ choice of 2 plot modes, either using one input array
- *           (Y vs index) or two input arrays (Y vs X).  For functions
- *           that take two arrays, the first mode (Y vs index) is
- *           employed if the first array is NULL.
- *
- *     General usage:
- *         gplotCreate() initializes for plotting
- *         gplotAddPlot() for each plot on the frame
- *         gplotMakeOutput() to generate all output files and run gnuplot
- *         gplotDestroy() to clean up
- *
- *     Example of use:
- *         gplot = gplotCreate("tempskew", GPLOT_PNG, "Skew score vs angle",
- *                    "angle (deg)", "score");
- *         gplotAddPlot(gplot, natheta, nascore1, GPLOT_LINES, "plot 1");
- *         gplotAddPlot(gplot, natheta, nascore2, GPLOT_POINTS, "plot 2");
- *         gplotSetScaling(gplot, GPLOT_LOG_SCALE_Y);
- *         gplotMakeOutput(gplot);
- *         gplotDestroy(&gplot);
- *
- *     Example usage of one-line plot generators:
- *
- *         -- Simple plots --
- *         Specify the root of output files, the output format,
- *         and the title (optional), but not the x and y coordinate labels
- *         or the plot labels.  The plotstyle defaults to GPLOT_LINES.
- *            gplotSimple2(na1, na2, GPLOT_PNG, "/tmp/lept/histo/gray",
- *                         "gray histogram");
- *         Multiple plots can be generated using gplotSimpleN().
- *
- *         -- Simple plots with more options --
- *         Specify the root of output files, the plotstyle, the output format,
- *         and optionally the title, but not the x and y coordinate labels
- *         or the plot labels.
- *            gplotSimpleXY1(na1, na2, GPLOT_LINES, GPLOT_PNG,
- *                           "/tmp/lept/histo/gray", "gray histogram");
- *         Multiple plots can be generated using gplotSimpleXYN().
- *
- *         -- Simple plots returning a pix --
- *         Specify only the title (optional).  The plotstyle defaults
- *         GPLOT_LINES and the output format is GPLOT_PNG..
- *         You can't specify the x and y coordinate lables or the plot label.
- *         The rootname of the generated files is determined internally.
- *            Pix *pix = gplotSimplePix2(na1, na2, "gray histogram");
- *         Multiple plots can be generated using gplotSimplePixN().
- *
- *         -- General plots returning a pix --
- *         Specify the root of the output files, the plotstyle, and optionally
- *         the title and axis labels.  This does not allow the individual
- *         plots to have plot labels, or to use different plotstyles
- *         for each plot.
- *            Pix *pix = gplotGeneralPix2(na1, na2, "/tmp/lept/histo/gray",
- *                                   GPLOT_LINES, "gray histogram",
- *                                   "pix value", "num pixels");
- *         Multiple plots can be generated using gplotGeneralPixN().
- *
- *     Note for output to GPLOT_LATEX:
- *         This creates latex output of the plot, named .tex.
- *         It needs to be placed in a latex file .tex
- *         that precedes the plot output with, at a minimum:
- *           \documentclass{article}
- *           \begin{document}
- *         and ends with
- *           \end{document}
- *         You can then generate a dvi file .dvi using
- *           latex .tex
- *         and a PostScript file .ps from that using
- *           dvips -o .ps .dvi
- *
- *     N.B. To generate plots, it is necessary to have gnuplot installed on
- *          your Unix system, or wgnuplot on Windows.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" -#ifdef __APPLE__ -#include "TargetConditionals.h" -#endif - -static const l_int32 Bufsize = 512; /* hardcoded below in fscanf */ - -const char *gplotstylenames[] = {"with lines", - "with points", - "with impulses", - "with linespoints", - "with dots"}; -const char *gplotfileoutputs[] = {"", - "PNG", - "PS", - "EPS", - "LATEX", - "PNM"}; - - -/*-----------------------------------------------------------------* - * Basic Plotting Functions * - *-----------------------------------------------------------------*/ -/*! - * \brief gplotCreate() - * - * \param[in] rootname root for all output files - * \param[in] outformat GPLOT_PNG, GPLOT_PS, GPLOT_EPS, - * GPLOT_LATEX, GPLOT_PNM - * \param[in] title [optional] overall title - * \param[in] xlabel [optional] x axis label - * \param[in] ylabel [optional] y axis label - * \return gplot, or NULL on error - * - *
- * Notes:
- *      (1) This initializes the plot.
- *      (2) The 'title', 'xlabel' and 'ylabel' strings can have spaces,
- *          double quotes and backquotes, but not single quotes.
- * 
- */ -GPLOT * -gplotCreate(const char *rootname, - l_int32 outformat, - const char *title, - const char *xlabel, - const char *ylabel) -{ -char *newroot; -char buf[Bufsize]; -l_int32 badchar; -GPLOT *gplot; - - PROCNAME("gplotCreate"); - - if (!rootname) - return (GPLOT *)ERROR_PTR("rootname not defined", procName, NULL); - if (outformat != GPLOT_PNG && outformat != GPLOT_PS && - outformat != GPLOT_EPS && outformat != GPLOT_LATEX && - outformat != GPLOT_PNM) - return (GPLOT *)ERROR_PTR("outformat invalid", procName, NULL); - stringCheckForChars(rootname, "`;&|><\"?*$()", &badchar); - if (badchar) /* danger of command injection */ - return (GPLOT *)ERROR_PTR("invalid rootname", procName, NULL); - -#if !defined(HAVE_LIBPNG) - if (outformat == GPLOT_PNG) { - L_WARNING("png library missing; output pnm format\n", procName); - outformat = GPLOT_PNM; - } -#endif - - gplot = (GPLOT *)LEPT_CALLOC(1, sizeof(GPLOT)); - gplot->cmddata = sarrayCreate(0); - gplot->datanames = sarrayCreate(0); - gplot->plotdata = sarrayCreate(0); - gplot->plotlabels = sarrayCreate(0); - gplot->plotstyles = numaCreate(0); - - /* Save title, labels, rootname, outformat, cmdname, outname */ - newroot = genPathname(rootname, NULL); - gplot->rootname = newroot; - gplot->outformat = outformat; - snprintf(buf, Bufsize, "%s.cmd", rootname); - gplot->cmdname = stringNew(buf); - if (outformat == GPLOT_PNG) - snprintf(buf, Bufsize, "%s.png", newroot); - else if (outformat == GPLOT_PS) - snprintf(buf, Bufsize, "%s.ps", newroot); - else if (outformat == GPLOT_EPS) - snprintf(buf, Bufsize, "%s.eps", newroot); - else if (outformat == GPLOT_LATEX) - snprintf(buf, Bufsize, "%s.tex", newroot); - else if (outformat == GPLOT_PNM) - snprintf(buf, Bufsize, "%s.pnm", newroot); - gplot->outname = stringNew(buf); - if (title) gplot->title = stringNew(title); - if (xlabel) gplot->xlabel = stringNew(xlabel); - if (ylabel) gplot->ylabel = stringNew(ylabel); - - return gplot; -} - - -/*! - * \brief gplotDestroy() - * - * \param[in,out] pgplot will be set to null before returning - */ -void -gplotDestroy(GPLOT **pgplot) -{ -GPLOT *gplot; - - PROCNAME("gplotDestroy"); - - if (pgplot == NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - - if ((gplot = *pgplot) == NULL) - return; - - LEPT_FREE(gplot->rootname); - LEPT_FREE(gplot->cmdname); - sarrayDestroy(&gplot->cmddata); - sarrayDestroy(&gplot->datanames); - sarrayDestroy(&gplot->plotdata); - sarrayDestroy(&gplot->plotlabels); - numaDestroy(&gplot->plotstyles); - LEPT_FREE(gplot->outname); - if (gplot->title) - LEPT_FREE(gplot->title); - if (gplot->xlabel) - LEPT_FREE(gplot->xlabel); - if (gplot->ylabel) - LEPT_FREE(gplot->ylabel); - - LEPT_FREE(gplot); - *pgplot = NULL; - return; -} - - -/*! - * \brief gplotAddPlot() - * - * \param[in] gplot - * \param[in] nax [optional] numa: set to null for Y_VS_I; - * required for Y_VS_X - * \param[in] nay numa; required for both Y_VS_I and Y_VS_X - * \param[in] plotstyle GPLOT_LINES, GPLOT_POINTS, GPLOT_IMPULSES, - * GPLOT_LINESPOINTS, GPLOT_DOTS - * \param[in] plotlabel [optional] label for individual plot - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) There are 2 options for (x,y) values:
- *            o  To plot an array vs a linear function of the
- *               index, set %nax = NULL.
- *            o  To plot one array vs another, use both %nax and %nay.
- *      (2) If %nax is NULL, the x value corresponding to the i-th
- *          value of %nay is found from the startx and delx fields
- *          in %nay:
- *               x = startx + i * delx
- *          These are set with numaSetParameters().  Their default
- *          values are startx = 0.0, delx = 1.0.
- *      (3) If %nax is defined, it must be the same size as %nay, and
- *          must have at least one number.
- *      (4) The 'plotlabel' string can have spaces, double
- *          quotes and backquotes, but not single quotes.
- * 
- */ -l_ok -gplotAddPlot(GPLOT *gplot, - NUMA *nax, - NUMA *nay, - l_int32 plotstyle, - const char *plotlabel) -{ -char buf[Bufsize]; -char emptystring[] = ""; -char *datastr, *title; -l_int32 n, i; -l_float32 valx, valy, startx, delx; -SARRAY *sa; - - PROCNAME("gplotAddPlot"); - - if (!gplot) - return ERROR_INT("gplot not defined", procName, 1); - if (!nay) - return ERROR_INT("nay not defined", procName, 1); - if (plotstyle < 0 || plotstyle >= NUM_GPLOT_STYLES) - return ERROR_INT("invalid plotstyle", procName, 1); - - if ((n = numaGetCount(nay)) == 0) - return ERROR_INT("no points to plot", procName, 1); - if (nax && (n != numaGetCount(nax))) - return ERROR_INT("nax and nay sizes differ", procName, 1); - if (n == 1 && plotstyle == GPLOT_LINES) { - L_INFO("only 1 pt; changing style to points\n", procName); - plotstyle = GPLOT_POINTS; - } - - /* Save plotstyle and plotlabel */ - numaGetParameters(nay, &startx, &delx); - numaAddNumber(gplot->plotstyles, plotstyle); - if (plotlabel) { - title = stringNew(plotlabel); - sarrayAddString(gplot->plotlabels, title, L_INSERT); - } else { - sarrayAddString(gplot->plotlabels, emptystring, L_COPY); - } - - /* Generate and save data filename */ - gplot->nplots++; - snprintf(buf, Bufsize, "%s.data.%d", gplot->rootname, gplot->nplots); - sarrayAddString(gplot->datanames, buf, L_COPY); - - /* Generate data and save as a string */ - sa = sarrayCreate(n); - for (i = 0; i < n; i++) { - if (nax) - numaGetFValue(nax, i, &valx); - else - valx = startx + i * delx; - numaGetFValue(nay, i, &valy); - snprintf(buf, Bufsize, "%f %f\n", valx, valy); - sarrayAddString(sa, buf, L_COPY); - } - datastr = sarrayToString(sa, 0); - sarrayAddString(gplot->plotdata, datastr, L_INSERT); - sarrayDestroy(&sa); - - return 0; -} - - -/*! - * \brief gplotSetScaling() - * - * \param[in] gplot - * \param[in] scaling GPLOT_LINEAR_SCALE, GPLOT_LOG_SCALE_X, - * GPLOT_LOG_SCALE_Y, GPLOT_LOG_SCALE_X_Y - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) By default, the x and y axis scaling is linear.
- *      (2) Call this function to set semi-log or log-log scaling.
- * 
- */ -l_ok -gplotSetScaling(GPLOT *gplot, - l_int32 scaling) -{ - PROCNAME("gplotSetScaling"); - - if (!gplot) - return ERROR_INT("gplot not defined", procName, 1); - if (scaling != GPLOT_LINEAR_SCALE && - scaling != GPLOT_LOG_SCALE_X && - scaling != GPLOT_LOG_SCALE_Y && - scaling != GPLOT_LOG_SCALE_X_Y) - return ERROR_INT("invalid gplot scaling", procName, 1); - gplot->scaling = scaling; - return 0; -} - - -/*! - * \brief gplotMakeOutputPix() - * - * \param[in] gplot - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This wraps gplotMakeOutput(), and returns a pix.
- *          See gplotMakeOutput() for details.
- *      (2) The gplot output format must be an image (png or pnm).
- * 
- */ -PIX * -gplotMakeOutputPix(GPLOT *gplot) -{ - PROCNAME("gplotMakeOutputPix"); - - if (!gplot) - return (PIX *)ERROR_PTR("gplot not defined", procName, NULL); - if (gplot->outformat != GPLOT_PNG && gplot->outformat != GPLOT_PNM) - return (PIX *)ERROR_PTR("output format not an image", procName, NULL); - - if (gplotMakeOutput(gplot)) - return (PIX *)ERROR_PTR("plot output not made", procName, NULL); - return pixRead(gplot->outname); -} - - -/*! - * \brief gplotMakeOutput() - * - * \param[in] gplot - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This uses gplot and the new arrays to add a plot
- *          to the output, by writing a new data file and appending
- *          the appropriate plot commands to the command file.
- *      (2) Along with gplotMakeOutputPix(), these are the only functions
- *          in this file that requires the gnuplot executable to
- *          actually generate the plot.
- *      (3) The command file name for unix is canonical (i.e., directory /tmp)
- *          but the temp filename paths in the command file must be correct.
- *      (4) The gnuplot program for windows is wgnuplot.exe.
- * 
- */ -l_ok -gplotMakeOutput(GPLOT *gplot) -{ -#if WINAPI_FAMILY_APP || TARGET_IPHONE_SIMULATOR || TARGET_OS_IPHONE - return ERROR_INT("gplot not defined", procName, 1); -#else -char buf[Bufsize]; -char *cmdname; - - PROCNAME("gplotMakeOutput"); - - if (!gplot) - return ERROR_INT("gplot not defined", procName, 1); - - if (!LeptDebugOK) { - L_INFO("running gnuplot is disabled; " - "use setLeptDebugOK(1) to enable\n", procName); - return 0; - } - -#ifdef OS_IOS /* iOS 11 does not support system() */ - return ERROR_INT("iOS 11 does not support system()", procName, 0); -#endif /* OS_IOS */ - - gplotGenCommandFile(gplot); - gplotGenDataFiles(gplot); - cmdname = genPathname(gplot->cmdname, NULL); - -#ifndef _WIN32 - snprintf(buf, Bufsize, "gnuplot %s", cmdname); -#else - snprintf(buf, Bufsize, "wgnuplot %s", cmdname); -#endif /* _WIN32 */ - - callSystemDebug(buf); /* gnuplot || wgnuplot */ - LEPT_FREE(cmdname); - return 0; -#endif -} - - -/*! - * \brief gplotGenCommandFile() - * - * \param[in] gplot - * \return 0 if OK, 1 on error - */ -l_ok -gplotGenCommandFile(GPLOT *gplot) -{ -char buf[Bufsize]; -char *cmdstr, *plotlabel, *dataname; -l_int32 i, plotstyle, nplots; -FILE *fp; - - PROCNAME("gplotGenCommandFile"); - - if (!gplot) - return ERROR_INT("gplot not defined", procName, 1); - - /* Remove any previous command data */ - sarrayClear(gplot->cmddata); - - /* Generate command data instructions */ - if (gplot->title) { /* set title */ - snprintf(buf, Bufsize, "set title '%s'", gplot->title); - sarrayAddString(gplot->cmddata, buf, L_COPY); - } - if (gplot->xlabel) { /* set xlabel */ - snprintf(buf, Bufsize, "set xlabel '%s'", gplot->xlabel); - sarrayAddString(gplot->cmddata, buf, L_COPY); - } - if (gplot->ylabel) { /* set ylabel */ - snprintf(buf, Bufsize, "set ylabel '%s'", gplot->ylabel); - sarrayAddString(gplot->cmddata, buf, L_COPY); - } - - /* Set terminal type and output */ - if (gplot->outformat == GPLOT_PNG) { - snprintf(buf, Bufsize, "set terminal png; set output '%s'", - gplot->outname); - } else if (gplot->outformat == GPLOT_PS) { - snprintf(buf, Bufsize, "set terminal postscript; set output '%s'", - gplot->outname); - } else if (gplot->outformat == GPLOT_EPS) { - snprintf(buf, Bufsize, "set terminal postscript eps; set output '%s'", - gplot->outname); - } else if (gplot->outformat == GPLOT_LATEX) { - snprintf(buf, Bufsize, "set terminal latex; set output '%s'", - gplot->outname); - } else if (gplot->outformat == GPLOT_PNM) { - snprintf(buf, Bufsize, "set terminal pbm color; set output '%s'", - gplot->outname); - } - sarrayAddString(gplot->cmddata, buf, L_COPY); - - if (gplot->scaling == GPLOT_LOG_SCALE_X || - gplot->scaling == GPLOT_LOG_SCALE_X_Y) { - snprintf(buf, Bufsize, "set logscale x"); - sarrayAddString(gplot->cmddata, buf, L_COPY); - } - if (gplot->scaling == GPLOT_LOG_SCALE_Y || - gplot->scaling == GPLOT_LOG_SCALE_X_Y) { - snprintf(buf, Bufsize, "set logscale y"); - sarrayAddString(gplot->cmddata, buf, L_COPY); - } - - nplots = sarrayGetCount(gplot->datanames); - for (i = 0; i < nplots; i++) { - plotlabel = sarrayGetString(gplot->plotlabels, i, L_NOCOPY); - dataname = sarrayGetString(gplot->datanames, i, L_NOCOPY); - numaGetIValue(gplot->plotstyles, i, &plotstyle); - if (nplots == 1) { - snprintf(buf, Bufsize, "plot '%s' title '%s' %s", - dataname, plotlabel, gplotstylenames[plotstyle]); - } else { - if (i == 0) - snprintf(buf, Bufsize, "plot '%s' title '%s' %s, \\", - dataname, plotlabel, gplotstylenames[plotstyle]); - else if (i < nplots - 1) - snprintf(buf, Bufsize, " '%s' title '%s' %s, \\", - dataname, plotlabel, gplotstylenames[plotstyle]); - else - snprintf(buf, Bufsize, " '%s' title '%s' %s", - dataname, plotlabel, gplotstylenames[plotstyle]); - } - sarrayAddString(gplot->cmddata, buf, L_COPY); - } - - /* Write command data to file */ - cmdstr = sarrayToString(gplot->cmddata, 1); - if ((fp = fopenWriteStream(gplot->cmdname, "w")) == NULL) { - LEPT_FREE(cmdstr); - return ERROR_INT("cmd stream not opened", procName, 1); - } - fwrite(cmdstr, 1, strlen(cmdstr), fp); - fclose(fp); - LEPT_FREE(cmdstr); - return 0; -} - - -/*! - * \brief gplotGenDataFiles() - * - * \param[in] gplot - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The pathnames in the gplot command file are actual pathnames,
- *          which can be in temp directories.  Consequently, they must not be
- *          rewritten by calling fopenWriteStream(), and we use fopen().
- * 
- */ -l_ok -gplotGenDataFiles(GPLOT *gplot) -{ -char *plotdata, *dataname; -l_int32 i, nplots; -FILE *fp; - - PROCNAME("gplotGenDataFiles"); - - if (!gplot) - return ERROR_INT("gplot not defined", procName, 1); - - nplots = sarrayGetCount(gplot->datanames); - for (i = 0; i < nplots; i++) { - plotdata = sarrayGetString(gplot->plotdata, i, L_NOCOPY); - dataname = sarrayGetString(gplot->datanames, i, L_NOCOPY); - if ((fp = fopen(dataname, "w")) == NULL) - return ERROR_INT("datafile stream not opened", procName, 1); - fwrite(plotdata, 1, strlen(plotdata), fp); - fclose(fp); - } - - return 0; -} - - -/*-----------------------------------------------------------------* - * Quick one-line plots * - *-----------------------------------------------------------------*/ -/*! - * \brief gplotSimple1() - * - * \param[in] na numa; plot Y_VS_I - * \param[in] outformat GPLOT_PNG, GPLOT_PS, GPLOT_EPS, - * GPLOT_LATEX, GPLOT_PNM - * \param[in] outroot root of output files - * \param[in] title [optional], can be NULL - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates a line plot of a numa, where the array value
- *          is plotted vs the array index.  The plot is generated
- *          in the specified output format; the title  is optional.
- *      (2) When calling these simple plot functions more than once, use
- *          different %outroot to avoid overwriting the output files.
- * 
- */ -l_ok -gplotSimple1(NUMA *na, - l_int32 outformat, - const char *outroot, - const char *title) -{ -GPLOT *gplot; - - PROCNAME("gplotSimple1"); - - gplot = gplotSimpleXY1(NULL, na, GPLOT_LINES, outformat, outroot, title); - if (!gplot) - return ERROR_INT("failed to generate plot", procName, 1); - gplotDestroy(&gplot); - return 0; -} - - -/*! - * \brief gplotSimple2() - * - * \param[in] na1 numa; plot with Y_VS_I - * \param[in] na2 ditto - * \param[in] outformat GPLOT_PNG, GPLOT_PS, GPLOT_EPS, - * GPLOT_LATEX, GPLOT_PNM - * \param[in] outroot root of output files - * \param[in] title [optional] - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates a line plot of two numa, where the array values
- *          are each plotted vs the array index.  The plot is generated
- *          in the specified output format; the title  is optional.
- *      (2) When calling these simple plot functions more than once, use
- *          different %outroot to avoid overwriting the output files.
- * 
- */ -l_ok -gplotSimple2(NUMA *na1, - NUMA *na2, - l_int32 outformat, - const char *outroot, - const char *title) -{ -GPLOT *gplot; - - PROCNAME("gplotSimple2"); - - gplot = gplotSimpleXY2(NULL, na1, na2, GPLOT_LINES, - outformat, outroot, title); - if (!gplot) - return ERROR_INT("failed to generate plot", procName, 1); - gplotDestroy(&gplot); - return 0; -} - - -/*! - * \brief gplotSimpleN() - * - * \param[in] naa numaa; plot Y_VS_I for each numa - * \param[in] outformat GPLOT_PNG, GPLOT_PS, GPLOT_EPS, - * GPLOT_LATEX, GPLOT_PNM - * \param[in] outroot root of output files - * \param[in] title [optional] - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates a line plot of all numas in a numaa (array of numa),
- *          where the array values are each plotted vs the array index.
- *          The plot is generated in the specified output format;
- *          the title  is optional.
- *      (2) When calling these simple plot functions more than once, use
- *          different %outroot to avoid overwriting the output files.
- * 
- */ -l_ok -gplotSimpleN(NUMAA *naa, - l_int32 outformat, - const char *outroot, - const char *title) -{ -GPLOT *gplot; - - PROCNAME("gplotSimpleN"); - - gplot = gplotSimpleXYN(NULL, naa, GPLOT_LINES, outformat, outroot, title); - if (!gplot) - return ERROR_INT("failed to generate plot", procName, 1); - gplotDestroy(&gplot); - return 0; -} - - -/*! - * \brief gplotSimplePix1() - * - * \param[in] na numa; plot Y_VS_I - * \param[in] title [optional], can be NULL - * \return pix of plot, or null on error - * - *
- * Notes:
- *      (1) This generates a line plot of a numa as a pix, where the array
- *          value is plotted vs the array index.  The title is optional.
- *      (2) The temporary plot file is a png; its name is generated internally
- *          and stored in gplot.
- * 
- */ -PIX * -gplotSimplePix1(NUMA *na, - const char *title) -{ -char buf[64]; -static l_int32 index; -GPLOT *gplot; -PIX *pix; - - PROCNAME("gplotSimplePix1"); - - if (!na) - return (PIX *)ERROR_PTR("na not defined", procName, NULL); - - lept_mkdir("lept/gplot/pix"); - snprintf(buf, sizeof(buf), "/tmp/lept/gplot/pix1.%d", index++); - gplot = gplotSimpleXY1(NULL, na, GPLOT_LINES, GPLOT_PNG, buf, title); - if (!gplot) - return (PIX *)ERROR_PTR("failed to generate plot", procName, NULL); - pix = pixRead(gplot->outname); - gplotDestroy(&gplot); - if (!pix) - return (PIX *)ERROR_PTR("failed to generate plot", procName, NULL); - return pix; -} - - -/*! - * \brief gplotSimplePix2() - * - * \param[in] na1 numa; plot with Y_VS_I - * \param[in] na2 ditto - * \param[in] title [optional], can be NULL - * \return pix of plot, or null on error - * - *
- * Notes:
- *      (1) This generates a pix with line plots of two numa, where each of
- *          two arrays is plotted vs the array index.  the title is optional.
- *      (2) The temporary plot file is a png; its name is generated internally
- *          and stored in gplot.
- * 
- */ -PIX * -gplotSimplePix2(NUMA *na1, - NUMA *na2, - const char *title) -{ -char buf[64]; -static l_int32 index; -GPLOT *gplot; -PIX *pix; - - PROCNAME("gplotSimplePix2"); - - if (!na1 || !na2) - return (PIX *)ERROR_PTR("both na1, na2 not defined", procName, NULL); - - lept_mkdir("lept/gplot/pix"); - snprintf(buf, sizeof(buf), "/tmp/lept/gplot/pix2.%d", index++); - gplot = gplotSimpleXY2(NULL, na1, na2, GPLOT_LINES, GPLOT_PNG, buf, title); - if (!gplot) - return (PIX *)ERROR_PTR("failed to generate plot", procName, NULL); - pix = pixRead(gplot->outname); - gplotDestroy(&gplot); - if (!pix) - return (PIX *)ERROR_PTR("failed to generate plot", procName, NULL); - return pix; -} - - -/*! - * \brief gplotSimplePixN() - * - * \param[in] naa numaa; plot Y_VS_I for each numa - * \param[in] title [optional], can be NULL - * \return pix of plot, or null on error - * - *
- * Notes:
- *      (1) This generates a pix with an arbitrary number of line plots,
- *          each coming from a numa in %naa.  Each array value is plotted
- *          vs the array index.  The title is optional.
- *      (2) The temporary plot file is a png; its name is generated internally
- *          and stored in gplot.
- * 
- */ -PIX * -gplotSimplePixN(NUMAA *naa, - const char *title) -{ -char buf[64]; -static l_int32 index; -GPLOT *gplot; -PIX *pix; - - PROCNAME("gplotSimplePixN"); - - if (!naa) - return (PIX *)ERROR_PTR("naa not defined", procName, NULL); - - lept_mkdir("lept/gplot/pix"); - snprintf(buf, sizeof(buf), "/tmp/lept/gplot/pixN.%d", index++); - gplot = gplotSimpleXYN(NULL, naa, GPLOT_LINES, GPLOT_PNG, buf, title); - if (!gplot) - return (PIX *)ERROR_PTR("failed to generate plot", procName, NULL); - pix = pixRead(gplot->outname); - gplotDestroy(&gplot); - if (!pix) - return (PIX *)ERROR_PTR("failed to generate plot", procName, NULL); - return pix; -} - - -/*! - * \brief gplotSimpleXY1() - * - * \param[in] nax [optional] - * \param[in] nay [required] - * \param[in] plotstyle GPLOT_LINES, GPLOT_POINTS, GPLOT_IMPULSES, - * GPLOT_LINESPOINTS, GPLOT_DOTS - * \param[in] outformat GPLOT_PNG, GPLOT_PS, GPLOT_EPS, - * GPLOT_LATEX, GPLOT_PNM - * \param[in] outroot root of output files - * \param[in] title [optional], can be NULL - * \return gplot or null on error - * - *
- * Notes:
- *      (1) This generates a plot of a %nay vs %nax, generated in
- *          the specified output format.  The title is optional.
- *      (2) Use 0 for default plotstyle (lines).
- *      (3) %nax is optional.  If NULL, %nay is plotted against
- *          the array index.
- *      (4) When calling these simple plot functions more than once, use
- *          different %outroot to avoid overwriting the output files.
- *      (5) The returned gplot must be destroyed by the caller.
- * 
- */ -GPLOT * -gplotSimpleXY1(NUMA *nax, - NUMA *nay, - l_int32 plotstyle, - l_int32 outformat, - const char *outroot, - const char *title) -{ -GPLOT *gplot; - - PROCNAME("gplotSimpleXY1"); - - if (!nay) - return (GPLOT *)ERROR_PTR("nay not defined", procName, NULL); - if (plotstyle < 0 || plotstyle >= NUM_GPLOT_STYLES) - return (GPLOT *)ERROR_PTR("invalid plotstyle", procName, NULL); - if (outformat != GPLOT_PNG && outformat != GPLOT_PS && - outformat != GPLOT_EPS && outformat != GPLOT_LATEX && - outformat != GPLOT_PNM) - return (GPLOT *)ERROR_PTR("invalid outformat", procName, NULL); - if (!outroot) - return (GPLOT *)ERROR_PTR("outroot not specified", procName, NULL); - - if ((gplot = gplotCreate(outroot, outformat, title, NULL, NULL)) == 0) - return (GPLOT *)ERROR_PTR("gplot not made", procName, NULL); - gplotAddPlot(gplot, nax, nay, plotstyle, NULL); - gplotMakeOutput(gplot); - return gplot; -} - - -/*! - * \brief gplotSimpleXY2() - * - * \param[in] nax [optional], can be NULL - * \param[in] nay1 - * \param[in] nay2 - * \param[in] plotstyle GPLOT_LINES, GPLOT_POINTS, GPLOT_IMPULSES, - * GPLOT_LINESPOINTS, GPLOT_DOTS - * \param[in] outformat GPLOT_PNG, GPLOT_PS, GPLOT_EPS, - * GPLOT_LATEX, GPLOT_PNM - * \param[in] outroot root of output files - * \param[in] title [optional] - * \return gplot or null on error - * - *
- * Notes:
- *      (1) This generates plots of %nay1 and %nay2 against %nax, generated
- *          in the specified output format.  The title is optional.
- *      (2) Use 0 for default plotstyle (lines).
- *      (3) %nax is optional.  If NULL, %nay1 and %nay2 are plotted
- *          against the array index.
- *      (4) When calling these simple plot functions more than once, use
- *          different %outroot to avoid overwriting the output files.
- *      (5) The returned gplot must be destroyed by the caller.
- * 
- */ -GPLOT * -gplotSimpleXY2(NUMA *nax, - NUMA *nay1, - NUMA *nay2, - l_int32 plotstyle, - l_int32 outformat, - const char *outroot, - const char *title) -{ -GPLOT *gplot; - - PROCNAME("gplotSimpleXY2"); - - if (!nay1 || !nay2) - return (GPLOT *)ERROR_PTR("nay1 and nay2 not both defined", - procName, NULL); - if (plotstyle < 0 || plotstyle >= NUM_GPLOT_STYLES) - return (GPLOT *)ERROR_PTR("invalid plotstyle", procName, NULL); - if (outformat != GPLOT_PNG && outformat != GPLOT_PS && - outformat != GPLOT_EPS && outformat != GPLOT_LATEX && - outformat != GPLOT_PNM) - return (GPLOT *)ERROR_PTR("invalid outformat", procName, NULL); - if (!outroot) - return (GPLOT *)ERROR_PTR("outroot not specified", procName, NULL); - - if ((gplot = gplotCreate(outroot, outformat, title, NULL, NULL)) == 0) - return (GPLOT *)ERROR_PTR("gplot not made", procName, NULL); - gplotAddPlot(gplot, nax, nay1, plotstyle, NULL); - gplotAddPlot(gplot, nax, nay2, plotstyle, NULL); - gplotMakeOutput(gplot); - return gplot; -} - - -/*! - * \brief gplotSimpleXYN() - * - * \param[in] nax [optional]; can be NULL - * \param[in] naay numaa of arrays to plot against %nax - * \param[in] plotstyle GPLOT_LINES, GPLOT_POINTS, GPLOT_IMPULSES, - * GPLOT_LINESPOINTS, GPLOT_DOTS - * \param[in] outformat GPLOT_PNG, GPLOT_PS, GPLOT_EPS, - * GPLOT_LATEX, GPLOT_PNM - * \param[in] outroot root of output files - * \param[in] title [optional] - * \return gplot or null on error - * - *
- * Notes:
- *      (1) This generates plots of each Numa in %naa against %nax,
- *          generated in the specified output format.  The title is optional.
- *      (2) Use 0 for default plotstyle (lines).
- *      (3) %nax is optional.  If NULL, each Numa array is plotted against
- *          the array index.
- *      (4) When calling these simple plot functions more than once, use
- *          different %outroot to avoid overwriting the output files.
- *      (5) The returned gplot must be destroyed by the caller.
- * 
- */ -GPLOT * -gplotSimpleXYN(NUMA *nax, - NUMAA *naay, - l_int32 plotstyle, - l_int32 outformat, - const char *outroot, - const char *title) -{ -l_int32 i, n; -GPLOT *gplot; -NUMA *nay; - - PROCNAME("gplotSimpleXYN"); - - if (!naay) - return (GPLOT *)ERROR_PTR("naay not defined", procName, NULL); - if ((n = numaaGetCount(naay)) == 0) - return (GPLOT *)ERROR_PTR("no numa in array", procName, NULL); - if (plotstyle < 0 || plotstyle >= NUM_GPLOT_STYLES) - return (GPLOT *)ERROR_PTR("invalid plotstyle", procName, NULL); - if (outformat != GPLOT_PNG && outformat != GPLOT_PS && - outformat != GPLOT_EPS && outformat != GPLOT_LATEX && - outformat != GPLOT_PNM) - return (GPLOT *)ERROR_PTR("invalid outformat", procName, NULL); - if (!outroot) - return (GPLOT *)ERROR_PTR("outroot not specified", procName, NULL); - - if ((gplot = gplotCreate(outroot, outformat, title, NULL, NULL)) == 0) - return (GPLOT *)ERROR_PTR("gplot not made", procName, NULL); - for (i = 0; i < n; i++) { - nay = numaaGetNuma(naay, i, L_CLONE); - gplotAddPlot(gplot, nax, nay, plotstyle, NULL); - numaDestroy(&nay); - } - gplotMakeOutput(gplot); - return gplot; -} - - -/*! - * \brief gplotGeneralPix1() - * - * \param[in] na data array - * \param[in] plotstyle GPLOT_LINES, GPLOT_POINTS, GPLOT_IMPULSES, - * GPLOT_LINESPOINTS, GPLOT_DOTS - * \param[in] rootname root for all output files - * \param[in] title [optional] overall title - * \param[in] xlabel [optional] x axis label - * \param[in] ylabel [optional] y axis label - * \return pix of plot, or NULL on error - * - *
- * Notes:
- *      (1) The 'title', 'xlabel' and 'ylabel' strings can have spaces,
- *          double quotes and backquotes, but not single quotes.
- * 
- */ -PIX * -gplotGeneralPix1(NUMA *na, - l_int32 plotstyle, - const char *rootname, - const char *title, - const char *xlabel, - const char *ylabel) -{ -GPLOT *gplot; -PIX *pix; - - PROCNAME("gplotGeneralPix1"); - - if (!na) - return (PIX *)ERROR_PTR("na not defined", procName, NULL); - if (plotstyle < 0 || plotstyle >= NUM_GPLOT_STYLES) - return (PIX *)ERROR_PTR("invalid plotstyle", procName, NULL); - if (!rootname) - return (PIX *)ERROR_PTR("rootname not defined", procName, NULL); - - gplot = gplotCreate(rootname, GPLOT_PNG, title, xlabel, ylabel); - if (!gplot) - return (PIX *)ERROR_PTR("gplot not made", procName, NULL); - gplotAddPlot(gplot, NULL, na, plotstyle, NULL); - pix = gplotMakeOutputPix(gplot); - gplotDestroy(&gplot); - return pix; -} - - -/*! - * \brief gplotGeneralPix2() - * - * \param[in] na1 x-axis data array - * \param[in] na2 y-axis data array - * \param[in] plotstyle GPLOT_LINES, GPLOT_POINTS, GPLOT_IMPULSES, - * GPLOT_LINESPOINTS, GPLOT_DOTS - * \param[in] rootname root for all output files - * \param[in] title [optional] overall title - * \param[in] xlabel [optional] x axis label - * \param[in] ylabel [optional] y axis label - * \return pix of plot, or NULL on error - * - *
- * Notes:
- *      (1) The 'title', 'xlabel' and 'ylabel' strings can have spaces,
- *          double quotes and backquotes, but not single quotes.
- * 
- */ -PIX * -gplotGeneralPix2(NUMA *na1, - NUMA *na2, - l_int32 plotstyle, - const char *rootname, - const char *title, - const char *xlabel, - const char *ylabel) -{ -GPLOT *gplot; -PIX *pix; - - PROCNAME("gplotGeneralPix2"); - - if (!na1) - return (PIX *)ERROR_PTR("na1 not defined", procName, NULL); - if (!na2) - return (PIX *)ERROR_PTR("na2 not defined", procName, NULL); - if (plotstyle < 0 || plotstyle >= NUM_GPLOT_STYLES) - return (PIX *)ERROR_PTR("invalid plotstyle", procName, NULL); - if (!rootname) - return (PIX *)ERROR_PTR("rootname not defined", procName, NULL); - - gplot = gplotCreate(rootname, GPLOT_PNG, title, xlabel, ylabel); - if (!gplot) - return (PIX *)ERROR_PTR("gplot not made", procName, NULL); - gplotAddPlot(gplot, na1, na2, plotstyle, NULL); - pix = gplotMakeOutputPix(gplot); - gplotDestroy(&gplot); - return pix; -} - - -/*! - * \brief gplotGeneralPixN() - * - * \param[in] nax x-axis data array - * \param[in] naay array of y-axis data arrays - * \param[in] plotstyle GPLOT_LINES, GPLOT_POINTS, GPLOT_IMPULSES, - * GPLOT_LINESPOINTS, GPLOT_DOTS - * \param[in] rootname root for all output files - * \param[in] title [optional] overall title - * \param[in] xlabel [optional] x axis label - * \param[in] ylabel [optional] y axis label - * \return pix of plot, or NULL on error - * - *
- * Notes:
- *      (1) The 'title', 'xlabel' and 'ylabel' strings can have spaces,
- *          double quotes and backquotes, but not single quotes.
- * 
- */ -PIX * -gplotGeneralPixN(NUMA *nax, - NUMAA *naay, - l_int32 plotstyle, - const char *rootname, - const char *title, - const char *xlabel, - const char *ylabel) -{ -l_int32 i, n; -GPLOT *gplot; -NUMA *nay; -PIX *pix; - - PROCNAME("gplotGeneralPixN"); - - if (!nax) - return (PIX *)ERROR_PTR("nax not defined", procName, NULL); - if (!naay) - return (PIX *)ERROR_PTR("naay not defined", procName, NULL); - if ((n = numaaGetCount(naay)) == 0) - return (PIX *)ERROR_PTR("no numa in array", procName, NULL); - if (plotstyle < 0 || plotstyle >= NUM_GPLOT_STYLES) - return (PIX *)ERROR_PTR("invalid plotstyle", procName, NULL); - if (!rootname) - return (PIX *)ERROR_PTR("rootname not defined", procName, NULL); - - gplot = gplotCreate(rootname, GPLOT_PNG, title, xlabel, ylabel); - if (!gplot) - return (PIX *)ERROR_PTR("gplot not made", procName, NULL); - for (i = 0; i < n; i++) { - nay = numaaGetNuma(naay, i, L_CLONE); - gplotAddPlot(gplot, nax, nay, plotstyle, NULL); - numaDestroy(&nay); - } - pix = gplotMakeOutputPix(gplot); - gplotDestroy(&gplot); - return pix; -} - - -/*-----------------------------------------------------------------* - * Serialize for I/O * - *-----------------------------------------------------------------*/ -/*! - * \brief gplotRead() - * - * \param[in] filename - * \return gplot, or NULL on error - */ -GPLOT * -gplotRead(const char *filename) -{ -char buf[Bufsize]; -char *rootname, *title, *xlabel, *ylabel, *ignores; -l_int32 outformat, ret, version, ignore; -FILE *fp; -GPLOT *gplot; - - PROCNAME("gplotRead"); - - if (!filename) - return (GPLOT *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (GPLOT *)ERROR_PTR("stream not opened", procName, NULL); - - ret = fscanf(fp, "Gplot Version %d\n", &version); - if (ret != 1) { - fclose(fp); - return (GPLOT *)ERROR_PTR("not a gplot file", procName, NULL); - } - if (version != GPLOT_VERSION_NUMBER) { - fclose(fp); - return (GPLOT *)ERROR_PTR("invalid gplot version", procName, NULL); - } - - ignore = fscanf(fp, "Rootname: %511s\n", buf); /* Bufsize - 1 */ - rootname = stringNew(buf); - ignore = fscanf(fp, "Output format: %d\n", &outformat); - ignores = fgets(buf, Bufsize, fp); /* Title: ... */ - title = stringNew(buf + 7); - title[strlen(title) - 1] = '\0'; - ignores = fgets(buf, Bufsize, fp); /* X axis label: ... */ - xlabel = stringNew(buf + 14); - xlabel[strlen(xlabel) - 1] = '\0'; - ignores = fgets(buf, Bufsize, fp); /* Y axis label: ... */ - ylabel = stringNew(buf + 14); - ylabel[strlen(ylabel) - 1] = '\0'; - - gplot = gplotCreate(rootname, outformat, title, xlabel, ylabel); - LEPT_FREE(rootname); - LEPT_FREE(title); - LEPT_FREE(xlabel); - LEPT_FREE(ylabel); - if (!gplot) { - fclose(fp); - return (GPLOT *)ERROR_PTR("gplot not made", procName, NULL); - } - sarrayDestroy(&gplot->cmddata); - sarrayDestroy(&gplot->datanames); - sarrayDestroy(&gplot->plotdata); - sarrayDestroy(&gplot->plotlabels); - numaDestroy(&gplot->plotstyles); - - ignore = fscanf(fp, "Commandfile name: %511s\n", buf); /* Bufsize - 1 */ - stringReplace(&gplot->cmdname, buf); - ignore = fscanf(fp, "\nCommandfile data:"); - gplot->cmddata = sarrayReadStream(fp); - ignore = fscanf(fp, "\nDatafile names:"); - gplot->datanames = sarrayReadStream(fp); - ignore = fscanf(fp, "\nPlot data:"); - gplot->plotdata = sarrayReadStream(fp); - ignore = fscanf(fp, "\nPlot titles:"); - gplot->plotlabels = sarrayReadStream(fp); - ignore = fscanf(fp, "\nPlot styles:"); - gplot->plotstyles = numaReadStream(fp); - - ignore = fscanf(fp, "Number of plots: %d\n", &gplot->nplots); - ignore = fscanf(fp, "Output file name: %511s\n", buf); - stringReplace(&gplot->outname, buf); - ignore = fscanf(fp, "Axis scaling: %d\n", &gplot->scaling); - - fclose(fp); - return gplot; -} - - -/*! - * \brief gplotWrite() - * - * \param[in] filename - * \param[in] gplot - * \return 0 if OK; 1 on error - */ -l_ok -gplotWrite(const char *filename, - GPLOT *gplot) -{ -FILE *fp; - - PROCNAME("gplotWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!gplot) - return ERROR_INT("gplot not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "wb")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - - fprintf(fp, "Gplot Version %d\n", GPLOT_VERSION_NUMBER); - fprintf(fp, "Rootname: %s\n", gplot->rootname); - fprintf(fp, "Output format: %d\n", gplot->outformat); - fprintf(fp, "Title: %s\n", gplot->title); - fprintf(fp, "X axis label: %s\n", gplot->xlabel); - fprintf(fp, "Y axis label: %s\n", gplot->ylabel); - - fprintf(fp, "Commandfile name: %s\n", gplot->cmdname); - fprintf(fp, "\nCommandfile data:"); - sarrayWriteStream(fp, gplot->cmddata); - fprintf(fp, "\nDatafile names:"); - sarrayWriteStream(fp, gplot->datanames); - fprintf(fp, "\nPlot data:"); - sarrayWriteStream(fp, gplot->plotdata); - fprintf(fp, "\nPlot titles:"); - sarrayWriteStream(fp, gplot->plotlabels); - fprintf(fp, "\nPlot styles:"); - numaWriteStderr(gplot->plotstyles); - - fprintf(fp, "Number of plots: %d\n", gplot->nplots); - fprintf(fp, "Output file name: %s\n", gplot->outname); - fprintf(fp, "Axis scaling: %d\n", gplot->scaling); - - fclose(fp); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/gplot.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/gplot.h deleted file mode 100644 index d2e4f7e5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/gplot.h +++ /dev/null @@ -1,96 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_GPLOT_H -#define LEPTONICA_GPLOT_H - -/*! - * \file gplot.h - * - *
- *   Data structures and parameters for generating gnuplot files
- *
- *   We used to support X11 output, but recent versions of gnuplot do not
- *   support the X11 terminal.  To get display to your screen, use
- *   GPLOT_PNG output; e.g.,
- *       gplotSimple1(na, GPLOT_PNG, "/tmp/someroot", ...);
- *       l_fileDisplay("/tmp/someroot.png", ...);
- * 
- */ - -#define GPLOT_VERSION_NUMBER 1 - -#define NUM_GPLOT_STYLES 5 -enum GPLOT_STYLE { - GPLOT_LINES = 0, - GPLOT_POINTS = 1, - GPLOT_IMPULSES = 2, - GPLOT_LINESPOINTS = 3, - GPLOT_DOTS = 4 -}; - -#define NUM_GPLOT_OUTPUTS 6 -enum GPLOT_OUTPUT { - GPLOT_NONE = 0, - GPLOT_PNG = 1, - GPLOT_PS = 2, - GPLOT_EPS = 3, - GPLOT_LATEX = 4, - GPLOT_PNM = 5, -}; - -enum GPLOT_SCALING { - GPLOT_LINEAR_SCALE = 0, /*!< default */ - GPLOT_LOG_SCALE_X = 1, - GPLOT_LOG_SCALE_Y = 2, - GPLOT_LOG_SCALE_X_Y = 3 -}; - -extern const char *gplotstylenames[]; /*!< used in gnuplot cmd file */ -extern const char *gplotfileoutputs[]; /*!< used in simple file input */ - -/*! Data structure for generating gnuplot files */ -struct GPlot -{ - char *rootname; /*!< for cmd, data, output */ - char *cmdname; /*!< command file name */ - struct Sarray *cmddata; /*!< command file contents */ - struct Sarray *datanames; /*!< data file names */ - struct Sarray *plotdata; /*!< plot data (1 string/file) */ - struct Sarray *plotlabels; /*!< label for each individual plot */ - struct Numa *plotstyles; /*!< plot style for individual plots */ - l_int32 nplots; /*!< current number of plots */ - char *outname; /*!< output file name */ - l_int32 outformat; /*!< GPLOT_OUTPUT values */ - l_int32 scaling; /*!< GPLOT_SCALING values */ - char *title; /*!< optional */ - char *xlabel; /*!< optional x axis label */ - char *ylabel; /*!< optional y axis label */ -}; -typedef struct GPlot GPLOT; - - -#endif /* LEPTONICA_GPLOT_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/graphics.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/graphics.c deleted file mode 100644 index 91057499..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/graphics.c +++ /dev/null @@ -1,2903 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file graphics.c - *
- *
- *      Pta generation for arbitrary shapes built with lines
- *          PTA        *generatePtaLine()
- *          PTA        *generatePtaWideLine()
- *          PTA        *generatePtaBox()
- *          PTA        *generatePtaBoxa()
- *          PTA        *generatePtaHashBox()
- *          PTA        *generatePtaHashBoxa()
- *          PTAA       *generatePtaaBoxa()
- *          PTAA       *generatePtaaHashBoxa()
- *          PTA        *generatePtaPolyline()
- *          PTA        *generatePtaGrid()
- *          PTA        *convertPtaLineTo4cc()
- *          PTA        *generatePtaFilledCircle()
- *          PTA        *generatePtaFilledSquare()
- *          PTA        *generatePtaLineFromPt()
- *          l_int32     locatePtRadially()
- *
- *      Rendering function plots directly on images
- *          l_int32     pixRenderPlotFromNuma()
- *          l_int32     pixRenderPlotFromNumaGen()
- *          PTA        *makePlotPtaFromNuma()
- *          PTA        *makePlotPtaFromNumaGen()
- *
- *      Pta rendering
- *          l_int32     pixRenderPta()
- *          l_int32     pixRenderPtaArb()
- *          l_int32     pixRenderPtaBlend()
- *
- *      Rendering of arbitrary shapes built with lines
- *          l_int32     pixRenderLine()
- *          l_int32     pixRenderLineArb()
- *          l_int32     pixRenderLineBlend()
- *
- *          l_int32     pixRenderBox()
- *          l_int32     pixRenderBoxArb()
- *          l_int32     pixRenderBoxBlend()
- *
- *          l_int32     pixRenderBoxa()
- *          l_int32     pixRenderBoxaArb()
- *          l_int32     pixRenderBoxaBlend()
- *
- *          l_int32     pixRenderHashBox()
- *          l_int32     pixRenderHashBoxArb()
- *          l_int32     pixRenderHashBoxBlend()
- *          l_int32     pixRenderHashMaskArb()
- *
- *          l_int32     pixRenderHashBoxa()
- *          l_int32     pixRenderHashBoxaArb()
- *          l_int32     pixRenderHashBoxaBlend()
- *
- *          l_int32     pixRenderPolyline()
- *          l_int32     pixRenderPolylineArb()
- *          l_int32     pixRenderPolylineBlend()
- *
- *          l_int32     pixRenderGrid()
- *
- *          l_int32     pixRenderRandomCmapPtaa()
- *
- *      Rendering and filling of polygons
- *          PIX        *pixRenderPolygon()
- *          PIX        *pixFillPolygon()
- *
- *      Contour rendering on grayscale images
- *          PIX        *pixRenderContours()
- *          PIX        *fpixAutoRenderContours()
- *          PIX        *fpixRenderContours()
- *
- *      Boundary pt generation on 1 bpp images
- *          PTA        *pixGeneratePtaBoundary()
- *
- *  The line rendering functions are relatively crude, but they
- *  get the job done for most simple situations.  We use the pta
- *  (array of points) as an intermediate data structure.  For example,
- *  to render a line we first generate a pta.
- *
- *  Some rendering functions come in sets of three.  For example
- *       pixRenderLine() -- render on 1 bpp pix
- *       pixRenderLineArb() -- render on 32 bpp pix with arbitrary (r,g,b)
- *       pixRenderLineBlend() -- render on 32 bpp pix, blending the
- *               (r,g,b) graphic object with the underlying rgb pixels.
- *
- *  There are also procedures for plotting a function, computed
- *  from the row or column pixels, directly on the image.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -/*------------------------------------------------------------------* - * Pta generation for arbitrary shapes built with lines * - *------------------------------------------------------------------*/ -/*! - * \brief generatePtaLine() - * - * \param[in] x1, y1 end point 1 - * \param[in] x2, y2 end point 2 - * \return pta, or NULL on error - * - *
- * Notes:
- *      (1) Uses Bresenham line drawing, which results in an 8-connected line.
- * 
- */ -PTA * -generatePtaLine(l_int32 x1, - l_int32 y1, - l_int32 x2, - l_int32 y2) -{ -l_int32 npts, diff, getyofx, sign, i, x, y; -l_float32 slope; -PTA *pta; - - PROCNAME("generatePtaLine"); - - /* Generate line parameters */ - if (x1 == x2 && y1 == y2) { /* same point */ - getyofx = TRUE; - npts = 1; - } else if (L_ABS(x2 - x1) >= L_ABS(y2 - y1)) { - getyofx = TRUE; - npts = L_ABS(x2 - x1) + 1; - diff = x2 - x1; - sign = L_SIGN(x2 - x1); - slope = (l_float32)(sign * (y2 - y1)) / (l_float32)diff; - } else { - getyofx = FALSE; - npts = L_ABS(y2 - y1) + 1; - diff = y2 - y1; - sign = L_SIGN(y2 - y1); - slope = (l_float32)(sign * (x2 - x1)) / (l_float32)diff; - } - - if ((pta = ptaCreate(npts)) == NULL) - return (PTA *)ERROR_PTR("pta not made", procName, NULL); - - if (npts == 1) { /* degenerate case */ - ptaAddPt(pta, x1, y1); - return pta; - } - - /* Generate the set of points */ - if (getyofx) { /* y = y(x) */ - for (i = 0; i < npts; i++) { - x = x1 + sign * i; - y = (l_int32)(y1 + (l_float32)i * slope + 0.5); - ptaAddPt(pta, x, y); - } - } else { /* x = x(y) */ - for (i = 0; i < npts; i++) { - x = (l_int32)(x1 + (l_float32)i * slope + 0.5); - y = y1 + sign * i; - ptaAddPt(pta, x, y); - } - } - - return pta; -} - - -/*! - * \brief generatePtaWideLine() - * - * \param[in] x1, y1 end point 1 - * \param[in] x2, y2 end point 2 - * \param[in] width - * \return ptaj, or NULL on error - */ -PTA * -generatePtaWideLine(l_int32 x1, - l_int32 y1, - l_int32 x2, - l_int32 y2, - l_int32 width) -{ -l_int32 i, x1a, x2a, y1a, y2a; -PTA *pta, *ptaj; - - PROCNAME("generatePtaWideLine"); - - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - - if ((ptaj = generatePtaLine(x1, y1, x2, y2)) == NULL) - return (PTA *)ERROR_PTR("ptaj not made", procName, NULL); - if (width == 1) - return ptaj; - - /* width > 1; estimate line direction & join */ - if (L_ABS(x1 - x2) > L_ABS(y1 - y2)) { /* "horizontal" line */ - for (i = 1; i < width; i++) { - if ((i & 1) == 1) { /* place above */ - y1a = y1 - (i + 1) / 2; - y2a = y2 - (i + 1) / 2; - } else { /* place below */ - y1a = y1 + (i + 1) / 2; - y2a = y2 + (i + 1) / 2; - } - if ((pta = generatePtaLine(x1, y1a, x2, y2a)) != NULL) { - ptaJoin(ptaj, pta, 0, -1); - ptaDestroy(&pta); - } - } - } else { /* "vertical" line */ - for (i = 1; i < width; i++) { - if ((i & 1) == 1) { /* place to left */ - x1a = x1 - (i + 1) / 2; - x2a = x2 - (i + 1) / 2; - } else { /* place to right */ - x1a = x1 + (i + 1) / 2; - x2a = x2 + (i + 1) / 2; - } - if ((pta = generatePtaLine(x1a, y1, x2a, y2)) != NULL) { - ptaJoin(ptaj, pta, 0, -1); - ptaDestroy(&pta); - } - } - } - - return ptaj; -} - - -/*! - * \brief generatePtaBox() - * - * \param[in] box - * \param[in] width of line - * \return ptad, or NULL on error - * - *
- * Notes:
- *      (1) Because the box is constructed so that we don't have any
- *          overlapping lines, there is no need to remove duplicates.
- * 
- */ -PTA * -generatePtaBox(BOX *box, - l_int32 width) -{ -l_int32 x, y, w, h; -PTA *ptad, *pta; - - PROCNAME("generatePtaBox"); - - if (!box) - return (PTA *)ERROR_PTR("box not defined", procName, NULL); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - - /* Generate line points and add them to the pta. */ - boxGetGeometry(box, &x, &y, &w, &h); - if (w == 0 || h == 0) - return (PTA *)ERROR_PTR("box has w = 0 or h = 0", procName, NULL); - ptad = ptaCreate(0); - if ((width & 1) == 1) { /* odd width */ - pta = generatePtaWideLine(x - width / 2, y, - x + w - 1 + width / 2, y, width); - ptaJoin(ptad, pta, 0, -1); - ptaDestroy(&pta); - pta = generatePtaWideLine(x + w - 1, y + 1 + width / 2, - x + w - 1, y + h - 2 - width / 2, width); - ptaJoin(ptad, pta, 0, -1); - ptaDestroy(&pta); - pta = generatePtaWideLine(x + w - 1 + width / 2, y + h - 1, - x - width / 2, y + h - 1, width); - ptaJoin(ptad, pta, 0, -1); - ptaDestroy(&pta); - pta = generatePtaWideLine(x, y + h - 2 - width / 2, - x, y + 1 + width / 2, width); - ptaJoin(ptad, pta, 0, -1); - ptaDestroy(&pta); - } else { /* even width */ - pta = generatePtaWideLine(x - width / 2, y, - x + w - 2 + width / 2, y, width); - ptaJoin(ptad, pta, 0, -1); - ptaDestroy(&pta); - pta = generatePtaWideLine(x + w - 1, y + 0 + width / 2, - x + w - 1, y + h - 2 - width / 2, width); - ptaJoin(ptad, pta, 0, -1); - ptaDestroy(&pta); - pta = generatePtaWideLine(x + w - 2 + width / 2, y + h - 1, - x - width / 2, y + h - 1, width); - ptaJoin(ptad, pta, 0, -1); - ptaDestroy(&pta); - pta = generatePtaWideLine(x, y + h - 2 - width / 2, - x, y + 0 + width / 2, width); - ptaJoin(ptad, pta, 0, -1); - ptaDestroy(&pta); - } - - return ptad; -} - - -/*! - * \brief generatePtaBoxa() - * - * \param[in] boxa - * \param[in] width - * \param[in] removedups 1 to remove, 0 to leave - * \return ptad, or NULL on error - * - *
- * Notes:
- *      (1) If %boxa has overlapping boxes, and if blending will
- *          be used to give a transparent effect, transparency
- *          artifacts at line intersections can be removed using
- *          %removedups = 1.
- * 
- */ -PTA * -generatePtaBoxa(BOXA *boxa, - l_int32 width, - l_int32 removedups) -{ -l_int32 i, n; -BOX *box; -PTA *ptad, *ptat, *pta; - - PROCNAME("generatePtaBoxa"); - - if (!boxa) - return (PTA *)ERROR_PTR("boxa not defined", procName, NULL); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - - n = boxaGetCount(boxa); - ptat = ptaCreate(0); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - pta = generatePtaBox(box, width); - ptaJoin(ptat, pta, 0, -1); - ptaDestroy(&pta); - boxDestroy(&box); - } - - if (removedups) - ptad = ptaRemoveDupsByAset(ptat); - else - ptad = ptaClone(ptat); - - ptaDestroy(&ptat); - return ptad; -} - - -/*! - * \brief generatePtaHashBox() - * - * \param[in] box - * \param[in] spacing spacing between lines; must be > 1 - * \param[in] width of line - * \param[in] orient orientation of lines: L_HORIZONTAL_LINE, - * L_POS_SLOPE_LINE, L_VERTICAL_LINE, - * L_NEG_SLOPE_LINE - * \param[in] outline 0 to skip drawing box outline - * \return ptad, or NULL on error - * - *
- * Notes:
- *      (1) The orientation takes on one of 4 orientations (horiz, vertical,
- *          slope +1, slope -1).
- *      (2) The full outline is also drawn if %outline = 1.
- * 
- */ -PTA * -generatePtaHashBox(BOX *box, - l_int32 spacing, - l_int32 width, - l_int32 orient, - l_int32 outline) -{ -l_int32 bx, by, bh, bw, x, y, x1, y1, x2, y2, i, n, npts; -PTA *ptad, *pta; - - PROCNAME("generatePtaHashBox"); - - if (!box) - return (PTA *)ERROR_PTR("box not defined", procName, NULL); - if (spacing <= 1) - return (PTA *)ERROR_PTR("spacing not > 1", procName, NULL); - if (orient != L_HORIZONTAL_LINE && orient != L_POS_SLOPE_LINE && - orient != L_VERTICAL_LINE && orient != L_NEG_SLOPE_LINE) - return (PTA *)ERROR_PTR("invalid line orientation", procName, NULL); - boxGetGeometry(box, &bx, &by, &bw, &bh); - if (bw == 0 || bh == 0) - return (PTA *)ERROR_PTR("box has bw = 0 or bh = 0", procName, NULL); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - - /* Generate line points and add them to the pta. */ - ptad = ptaCreate(0); - if (outline) { - pta = generatePtaBox(box, width); - ptaJoin(ptad, pta, 0, -1); - ptaDestroy(&pta); - } - if (orient == L_HORIZONTAL_LINE) { - n = 1 + bh / spacing; - for (i = 0; i < n; i++) { - y = by + (i * (bh - 1)) / (n - 1); - pta = generatePtaWideLine(bx, y, bx + bw - 1, y, width); - ptaJoin(ptad, pta, 0, -1); - ptaDestroy(&pta); - } - } else if (orient == L_VERTICAL_LINE) { - n = 1 + bw / spacing; - for (i = 0; i < n; i++) { - x = bx + (i * (bw - 1)) / (n - 1); - pta = generatePtaWideLine(x, by, x, by + bh - 1, width); - ptaJoin(ptad, pta, 0, -1); - ptaDestroy(&pta); - } - } else if (orient == L_POS_SLOPE_LINE) { - n = 2 + (l_int32)((bw + bh) / (1.4 * spacing)); - for (i = 0; i < n; i++) { - x = (l_int32)(bx + (i + 0.5) * 1.4 * spacing); - boxIntersectByLine(box, x, by - 1, 1.0, &x1, &y1, &x2, &y2, &npts); - if (npts == 2) { - pta = generatePtaWideLine(x1, y1, x2, y2, width); - ptaJoin(ptad, pta, 0, -1); - ptaDestroy(&pta); - } - } - } else { /* orient == L_NEG_SLOPE_LINE */ - n = 2 + (l_int32)((bw + bh) / (1.4 * spacing)); - for (i = 0; i < n; i++) { - x = (l_int32)(bx - bh + (i + 0.5) * 1.4 * spacing); - boxIntersectByLine(box, x, by - 1, -1.0, &x1, &y1, &x2, &y2, &npts); - if (npts == 2) { - pta = generatePtaWideLine(x1, y1, x2, y2, width); - ptaJoin(ptad, pta, 0, -1); - ptaDestroy(&pta); - } - } - } - - return ptad; -} - - -/*! - * \brief generatePtaHashBoxa() - * - * \param[in] boxa - * \param[in] spacing spacing between lines; must be > 1 - * \param[in] width of line - * \param[in] orient orientation of lines: L_HORIZONTAL_LINE, ... - * \param[in] orient orientation of lines: L_HORIZONTAL_LINE, - * L_POS_SLOPE_LINE, L_VERTICAL_LINE, - * L_NEG_SLOPE_LINE - * \param[in] outline 0 to skip drawing box outline - * \param[in] removedups 1 to remove, 0 to leave - * \return ptad, or NULL on error - * - *
- * Notes:
- *      (1) The orientation takes on one of 4 orientations (horiz, vertical,
- *          slope +1, slope -1).
- *      (2) The full outline is also drawn if %outline = 1.
- *      (3) If the boxa has overlapping boxes, and if blending will
- *          be used to give a transparent effect, transparency
- *          artifacts at line intersections can be removed using
- *          %removedups = 1.
- * 
- */ -PTA * -generatePtaHashBoxa(BOXA *boxa, - l_int32 spacing, - l_int32 width, - l_int32 orient, - l_int32 outline, - l_int32 removedups) -{ -l_int32 i, n; -BOX *box; -PTA *ptad, *ptat, *pta; - - PROCNAME("generatePtaHashBoxa"); - - if (!boxa) - return (PTA *)ERROR_PTR("boxa not defined", procName, NULL); - if (spacing <= 1) - return (PTA *)ERROR_PTR("spacing not > 1", procName, NULL); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - if (orient != L_HORIZONTAL_LINE && orient != L_POS_SLOPE_LINE && - orient != L_VERTICAL_LINE && orient != L_NEG_SLOPE_LINE) - return (PTA *)ERROR_PTR("invalid line orientation", procName, NULL); - - n = boxaGetCount(boxa); - ptat = ptaCreate(0); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - pta = generatePtaHashBox(box, spacing, width, orient, outline); - ptaJoin(ptat, pta, 0, -1); - ptaDestroy(&pta); - boxDestroy(&box); - } - - if (removedups) - ptad = ptaRemoveDupsByAset(ptat); - else - ptad = ptaClone(ptat); - - ptaDestroy(&ptat); - return ptad; -} - - -/*! - * \brief generatePtaaBoxa() - * - * \param[in] boxa - * \return ptaa, or NULL on error - * - *
- * Notes:
- *      (1) This generates a pta of the four corners for each box in
- *          the boxa.
- *      (2) Each of these pta can be rendered onto a pix with random colors,
- *          by using pixRenderRandomCmapPtaa() with closeflag = 1.
- * 
- */ -PTAA * -generatePtaaBoxa(BOXA *boxa) -{ -l_int32 i, n, x, y, w, h; -BOX *box; -PTA *pta; -PTAA *ptaa; - - PROCNAME("generatePtaaBoxa"); - - if (!boxa) - return (PTAA *)ERROR_PTR("boxa not defined", procName, NULL); - - n = boxaGetCount(boxa); - ptaa = ptaaCreate(n); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - boxGetGeometry(box, &x, &y, &w, &h); - pta = ptaCreate(4); - ptaAddPt(pta, x, y); - ptaAddPt(pta, x + w - 1, y); - ptaAddPt(pta, x + w - 1, y + h - 1); - ptaAddPt(pta, x, y + h - 1); - ptaaAddPta(ptaa, pta, L_INSERT); - boxDestroy(&box); - } - - return ptaa; -} - - -/*! - * \brief generatePtaaHashBoxa() - * - * \param[in] boxa - * \param[in] spacing spacing between hash lines; must be > 1 - * \param[in] width hash line width - * \param[in] orient orientation of lines: L_HORIZONTAL_LINE, - * L_POS_SLOPE_LINE, L_VERTICAL_LINE, - * L_NEG_SLOPE_LINE - * \param[in] outline 0 to skip drawing box outline - * \return ptaa, or NULL on error - * - *
- * Notes:
- *      (1) The orientation takes on one of 4 orientations (horiz, vertical,
- *          slope +1, slope -1).
- *      (2) The full outline is also drawn if %outline = 1.
- *      (3) Each of these pta can be rendered onto a pix with random colors,
- *          by using pixRenderRandomCmapPtaa() with closeflag = 1.
- *
- * 
- */ -PTAA * -generatePtaaHashBoxa(BOXA *boxa, - l_int32 spacing, - l_int32 width, - l_int32 orient, - l_int32 outline) -{ -l_int32 i, n; -BOX *box; -PTA *pta; -PTAA *ptaa; - - PROCNAME("generatePtaaHashBoxa"); - - if (!boxa) - return (PTAA *)ERROR_PTR("boxa not defined", procName, NULL); - if (spacing <= 1) - return (PTAA *)ERROR_PTR("spacing not > 1", procName, NULL); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - if (orient != L_HORIZONTAL_LINE && orient != L_POS_SLOPE_LINE && - orient != L_VERTICAL_LINE && orient != L_NEG_SLOPE_LINE) - return (PTAA *)ERROR_PTR("invalid line orientation", procName, NULL); - - n = boxaGetCount(boxa); - ptaa = ptaaCreate(n); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - pta = generatePtaHashBox(box, spacing, width, orient, outline); - ptaaAddPta(ptaa, pta, L_INSERT); - boxDestroy(&box); - } - - return ptaa; -} - - -/*! - * \brief generatePtaPolyline() - * - * \param[in] ptas vertices of polyline - * \param[in] width - * \param[in] closeflag 1 to close the contour; 0 otherwise - * \param[in] removedups 1 to remove, 0 to leave - * \return ptad, or NULL on error - */ -PTA * -generatePtaPolyline(PTA *ptas, - l_int32 width, - l_int32 closeflag, - l_int32 removedups) -{ -l_int32 i, n, x1, y1, x2, y2; -PTA *ptad, *ptat, *pta; - - PROCNAME("generatePtaPolyline"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - - n = ptaGetCount(ptas); - ptat = ptaCreate(0); - if (n < 2) /* nothing to do */ - return ptat; - - ptaGetIPt(ptas, 0, &x1, &y1); - for (i = 1; i < n; i++) { - ptaGetIPt(ptas, i, &x2, &y2); - pta = generatePtaWideLine(x1, y1, x2, y2, width); - ptaJoin(ptat, pta, 0, -1); - ptaDestroy(&pta); - x1 = x2; - y1 = y2; - } - - if (closeflag) { - ptaGetIPt(ptas, 0, &x2, &y2); - pta = generatePtaWideLine(x1, y1, x2, y2, width); - ptaJoin(ptat, pta, 0, -1); - ptaDestroy(&pta); - } - - if (removedups) - ptad = ptaRemoveDupsByAset(ptat); - else - ptad = ptaClone(ptat); - - ptaDestroy(&ptat); - return ptad; -} - - -/*! - * \brief generatePtaGrid() - * - * \param[in] w, h of region where grid will be displayed - * \param[in] nx, ny number of rectangles in each direction in grid - * \param[in] width of rendered lines - * \return ptad, or NULL on error - */ -PTA * -generatePtaGrid(l_int32 w, - l_int32 h, - l_int32 nx, - l_int32 ny, - l_int32 width) -{ -l_int32 i, j, bx, by, x1, x2, y1, y2; -BOX *box; -BOXA *boxa; -PTA *pta; - - PROCNAME("generatePtaGrid"); - - if (nx < 1 || ny < 1) - return (PTA *)ERROR_PTR("nx and ny must be > 0", procName, NULL); - if (w < 2 * nx || h < 2 * ny) - return (PTA *)ERROR_PTR("w and/or h too small", procName, NULL); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - - boxa = boxaCreate(nx * ny); - bx = (w + nx - 1) / nx; - by = (h + ny - 1) / ny; - for (i = 0; i < ny; i++) { - y1 = by * i; - y2 = L_MIN(y1 + by, h - 1); - for (j = 0; j < nx; j++) { - x1 = bx * j; - x2 = L_MIN(x1 + bx, w - 1); - box = boxCreate(x1, y1, x2 - x1 + 1, y2 - y1 + 1); - boxaAddBox(boxa, box, L_INSERT); - } - } - - pta = generatePtaBoxa(boxa, width, 1); - boxaDestroy(&boxa); - return pta; -} - - -/*! - * \brief convertPtaLineTo4cc() - * - * \param[in] ptas 8-connected line of points - * \return ptad 4-connected line, or NULL on error - * - *
- * Notes:
- *      (1) When a polyline is generated with width = 1, the resulting
- *          line is not 4-connected in general.  This function adds
- *          points as necessary to convert the line to 4-cconnected.
- *          It is useful when rendering 1 bpp on a pix.
- *      (2) Do not use this for lines generated with width > 1.
- * 
- */ -PTA * -convertPtaLineTo4cc(PTA *ptas) -{ -l_int32 i, n, x, y, xp, yp; -PTA *ptad; - - PROCNAME("convertPtaLineTo4cc"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - - n = ptaGetCount(ptas); - ptad = ptaCreate(n); - ptaGetIPt(ptas, 0, &xp, &yp); - ptaAddPt(ptad, xp, yp); - for (i = 1; i < n; i++) { - ptaGetIPt(ptas, i, &x, &y); - if (x != xp && y != yp) /* diagonal */ - ptaAddPt(ptad, x, yp); - ptaAddPt(ptad, x, y); - xp = x; - yp = y; - } - - return ptad; -} - - -/*! - * \brief generatePtaFilledCircle() - * - * \param[in] radius - * \return pta, or NULL on error - * - *
- * Notes:
- *      (1) The circle is has diameter = 2 * radius + 1.
- *      (2) It is located with the center of the circle at the
- *          point (%radius, %radius).
- *      (3) Consequently, it typically must be translated if
- *          it is to represent a set of pixels in an image.
- * 
- */ -PTA * -generatePtaFilledCircle(l_int32 radius) -{ -l_int32 x, y; -l_float32 radthresh, sqdist; -PTA *pta; - - PROCNAME("generatePtaFilledCircle"); - - if (radius < 1) - return (PTA *)ERROR_PTR("radius must be >= 1", procName, NULL); - - pta = ptaCreate(0); - radthresh = (radius + 0.5) * (radius + 0.5); - for (y = 0; y <= 2 * radius; y++) { - for (x = 0; x <= 2 * radius; x++) { - sqdist = (l_float32)((y - radius) * (y - radius) + - (x - radius) * (x - radius)); - if (sqdist <= radthresh) - ptaAddPt(pta, x, y); - } - } - - return pta; -} - - -/*! - * \brief generatePtaFilledSquare() - * - * \param[in] side - * \return pta, or NULL on error - * - *
- * Notes:
- *      (1) The center of the square can be chosen to be at
- *          (side / 2, side / 2).  It must be translated by this amount
- *          when used for replication.
- * 
- */ -PTA * -generatePtaFilledSquare(l_int32 side) -{ -l_int32 x, y; -PTA *pta; - - PROCNAME("generatePtaFilledSquare"); - if (side < 1) - return (PTA *)ERROR_PTR("side must be > 0", procName, NULL); - - pta = ptaCreate(0); - for (y = 0; y < side; y++) - for (x = 0; x < side; x++) - ptaAddPt(pta, x, y); - - return pta; -} - - -/*! - * \brief generatePtaLineFromPt() - * - * \param[in] x, y point of origination - * \param[in] length of line, including starting point - * \param[in] radang angle in radians, CW from horizontal - * \return pta, or NULL on error - * - *
- * Notes:
- *      (1) %length of the line is 1 greater than the distance
- *          used in locatePtRadially().  Example: a distance of 1
- *          gives rise to a length of 2.
- * 
- */ -PTA * -generatePtaLineFromPt(l_int32 x, - l_int32 y, - l_float64 length, - l_float64 radang) -{ -l_int32 x2, y2; /* the point at the other end of the line */ - - x2 = x + (l_int32)((length - 1.0) * cos(radang)); - y2 = y + (l_int32)((length - 1.0) * sin(radang)); - return generatePtaLine(x, y, x2, y2); -} - - -/*! - * \brief locatePtRadially() - * - * \param[in] xr, yr reference point - * \param[in] radang angle in radians, CW from horizontal - * \param[in] dist distance of point from reference point along - * line given by the specified angle - * \param[out] px, py location of point - * \return 0 if OK, 1 on error - */ -l_ok -locatePtRadially(l_int32 xr, - l_int32 yr, - l_float64 dist, - l_float64 radang, - l_float64 *px, - l_float64 *py) -{ - PROCNAME("locatePtRadially"); - - if (!px || !py) - return ERROR_INT("&x and &y not both defined", procName, 1); - - *px = xr + dist * cos(radang); - *py = yr + dist * sin(radang); - return 0; -} - - -/*------------------------------------------------------------------* - * Rendering function plots directly on images * - *------------------------------------------------------------------*/ -/*! - * \brief pixRenderPlotFromNuma() - * - * \param[in,out] ppix any type; replaced if not 32 bpp rgb - * \param[in] na to be plotted - * \param[in] plotloc location of plot: L_PLOT_AT_TOP, etc - * \param[in] linewidth width of "line" that is drawn; between 1 and 7 - * \param[in] max maximum excursion in pixels from baseline - * \param[in] color plot color: 0xrrggbb00 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Simplified interface for plotting row or column aligned data
- *          on a pix.
- *      (2) This replaces %pix with a 32 bpp rgb version if it is not
- *          already 32 bpp.  It then draws the plot on the pix.
- *      (3) See makePlotPtaFromNumaGen() for more details.
- * 
- */ -l_ok -pixRenderPlotFromNuma(PIX **ppix, - NUMA *na, - l_int32 plotloc, - l_int32 linewidth, - l_int32 max, - l_uint32 color) -{ -l_int32 w, h, size, rval, gval, bval; -PIX *pix1; -PTA *pta; - - PROCNAME("pixRenderPlotFromNuma"); - - if (!ppix) - return ERROR_INT("&pix not defined", procName, 1); - if (*ppix == NULL) - return ERROR_INT("pix not defined", procName, 1); - - pixGetDimensions(*ppix, &w, &h, NULL); - size = (plotloc == L_PLOT_AT_TOP || plotloc == L_PLOT_AT_MID_HORIZ || - plotloc == L_PLOT_AT_BOT) ? h : w; - pta = makePlotPtaFromNuma(na, size, plotloc, linewidth, max); - if (!pta) - return ERROR_INT("pta not made", procName, 1); - - if (pixGetDepth(*ppix) != 32) { - pix1 = pixConvertTo32(*ppix); - pixDestroy(ppix); - *ppix = pix1; - } - extractRGBValues(color, &rval, &gval, &bval); - pixRenderPtaArb(*ppix, pta, rval, gval, bval); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief makePlotPtaFromNuma() - * - * \param[in] na - * \param[in] size pix height for horizontal plot; pix width - * for vertical plot - * \param[in] plotloc location of plot: L_PLOT_AT_TOP, etc - * \param[in] linewidth width of "line" that is drawn; between 1 and 7 - * \param[in] max maximum excursion in pixels from baseline - * \return ptad, or NULL on error - * - *
- * Notes:
- *      (1) This generates points from %numa representing y(x) or x(y)
- *          with respect to a pix.  A horizontal plot y(x) is drawn for
- *          a function of column position, and a vertical plot is drawn
- *          for a function x(y) of row position.  The baseline is located
- *          so that all plot points will fit in the pix.
- *      (2) See makePlotPtaFromNumaGen() for more details.
- * 
- */ -PTA * -makePlotPtaFromNuma(NUMA *na, - l_int32 size, - l_int32 plotloc, - l_int32 linewidth, - l_int32 max) -{ -l_int32 orient, refpos; - - PROCNAME("makePlotPtaFromNuma"); - - if (!na) - return (PTA *)ERROR_PTR("na not defined", procName, NULL); - if (plotloc == L_PLOT_AT_TOP || plotloc == L_PLOT_AT_MID_HORIZ || - plotloc == L_PLOT_AT_BOT) { - orient = L_HORIZONTAL_LINE; - } else if (plotloc == L_PLOT_AT_LEFT || plotloc == L_PLOT_AT_MID_VERT || - plotloc == L_PLOT_AT_RIGHT) { - orient = L_VERTICAL_LINE; - } else { - return (PTA *)ERROR_PTR("invalid plotloc", procName, NULL); - } - - if (plotloc == L_PLOT_AT_LEFT || plotloc == L_PLOT_AT_TOP) - refpos = max; - else if (plotloc == L_PLOT_AT_MID_VERT || plotloc == L_PLOT_AT_MID_HORIZ) - refpos = size / 2; - else /* L_PLOT_AT_RIGHT || L_PLOT_AT_BOT */ - refpos = size - max - 1; - - return makePlotPtaFromNumaGen(na, orient, linewidth, refpos, max, 1); -} - - -/*! - * \brief pixRenderPlotFromNumaGen() - * - * \param[in,out] ppix any type; replaced if not 32 bpp rgb - * \param[in] na to be plotted - * \param[in] orient L_HORIZONTAL_LINE, L_VERTICAL_LINE - * \param[in] linewidth width of "line" that is drawn; between 1 and 7 - * \param[in] refpos reference position: y for horizontal; - * x for vertical - * \param[in] max maximum excursion in pixels from baseline - * \param[in] drawref 1 to draw the reference line and its normal - * \param[in] color plot color: 0xrrggbb00 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) General interface for plotting row or column aligned data
- *          on a pix.
- *      (2) This replaces %pix with a 32 bpp rgb version if it is not
- *          already 32 bpp.  It then draws the plot on the pix.
- *      (3) See makePlotPtaFromNumaGen() for other input parameters.
- * 
- */ -l_ok -pixRenderPlotFromNumaGen(PIX **ppix, - NUMA *na, - l_int32 orient, - l_int32 linewidth, - l_int32 refpos, - l_int32 max, - l_int32 drawref, - l_uint32 color) -{ -l_int32 rval, gval, bval; -PIX *pix1; -PTA *pta; - - PROCNAME("pixRenderPlotFromNumaGen"); - - if (!ppix) - return ERROR_INT("&pix not defined", procName, 1); - if (*ppix == NULL) - return ERROR_INT("pix not defined", procName, 1); - - pta = makePlotPtaFromNumaGen(na, orient, linewidth, refpos, max, drawref); - if (!pta) - return ERROR_INT("pta not made", procName, 1); - - if (pixGetDepth(*ppix) != 32) { - pix1 = pixConvertTo32(*ppix); - pixDestroy(ppix); - *ppix = pix1; - } - extractRGBValues(color, &rval, &gval, &bval); - pixRenderPtaArb(*ppix, pta, rval, gval, bval); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief makePlotPtaFromNumaGen() - * - * \param[in] na - * \param[in] orient L_HORIZONTAL_LINE, L_VERTICAL_LINE - * \param[in] linewidth width of "line" that is drawn; between 1 and 7 - * \param[in] refpos reference position: y for horizontal; - * x for vertical - * \param[in] max maximum excursion in pixels from baseline - * \param[in] drawref 1 to draw the reference line and its normal - * \return ptad, or NULL on error - * - *
- * Notes:
- *      (1) This generates points from %numa representing y(x) or x(y)
- *          with respect to a pix.  For y(x), we draw a horizontal line
- *          at the reference position and a vertical line at the edge; then
- *          we draw the values of %numa, scaled so that the maximum
- *          excursion from the reference position is %max pixels.
- *      (2) The start and delx parameters of %numa are used to refer
- *          its values to the raster lines (L_VERTICAL_LINE) or columns
- *          (L_HORIZONTAL_LINE).
- *      (3) The linewidth is chosen in the interval [1 ... 7].
- *      (4) %refpos should be chosen so the plot is entirely within the pix
- *          that it will be painted onto.
- *      (5) This would typically be used to plot, in place, a function
- *          computed along pixel rows or columns.
- * 
- */ -PTA * -makePlotPtaFromNumaGen(NUMA *na, - l_int32 orient, - l_int32 linewidth, - l_int32 refpos, - l_int32 max, - l_int32 drawref) -{ -l_int32 i, n, maxw, maxh; -l_float32 minval, maxval, absval, val, scale, start, del; -PTA *pta1, *pta2, *ptad; - - PROCNAME("makePlotPtaFromNumaGen"); - - if (!na) - return (PTA *)ERROR_PTR("na not defined", procName, NULL); - if (orient != L_HORIZONTAL_LINE && orient != L_VERTICAL_LINE) - return (PTA *)ERROR_PTR("invalid orient", procName, NULL); - if (linewidth < 1) { - L_WARNING("linewidth < 1; setting to 1\n", procName); - linewidth = 1; - } - if (linewidth > 7) { - L_WARNING("linewidth > 7; setting to 7\n", procName); - linewidth = 7; - } - - numaGetMin(na, &minval, NULL); - numaGetMax(na, &maxval, NULL); - absval = L_MAX(L_ABS(minval), L_ABS(maxval)); - scale = (l_float32)max / (l_float32)absval; - n = numaGetCount(na); - numaGetParameters(na, &start, &del); - - /* Generate the plot points */ - pta1 = ptaCreate(n); - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &val); - if (orient == L_HORIZONTAL_LINE) { - ptaAddPt(pta1, start + i * del, refpos + scale * val); - maxw = (del >= 0) ? start + n * del + linewidth - : start + linewidth; - maxh = refpos + max + linewidth; - } else { /* vertical line */ - ptaAddPt(pta1, refpos + scale * val, start + i * del); - maxw = refpos + max + linewidth; - maxh = (del >= 0) ? start + n * del + linewidth - : start + linewidth; - } - } - - /* Optionally, widen the plot */ - if (linewidth > 1) { - if (linewidth % 2 == 0) /* even linewidth; use side of a square */ - pta2 = generatePtaFilledSquare(linewidth); - else /* odd linewidth; use radius of a circle */ - pta2 = generatePtaFilledCircle(linewidth / 2); - ptad = ptaReplicatePattern(pta1, NULL, pta2, linewidth / 2, - linewidth / 2, maxw, maxh); - ptaDestroy(&pta2); - } else { - ptad = ptaClone(pta1); - } - ptaDestroy(&pta1); - - /* Optionally, add the reference lines */ - if (drawref) { - if (orient == L_HORIZONTAL_LINE) { - pta1 = generatePtaLine(start, refpos, start + n * del, refpos); - ptaJoin(ptad, pta1, 0, -1); - ptaDestroy(&pta1); - pta1 = generatePtaLine(start, refpos - max, - start, refpos + max); - ptaJoin(ptad, pta1, 0, -1); - } else { /* vertical line */ - pta1 = generatePtaLine(refpos, start, refpos, start + n * del); - ptaJoin(ptad, pta1, 0, -1); - ptaDestroy(&pta1); - pta1 = generatePtaLine(refpos - max, start, - refpos + max, start); - ptaJoin(ptad, pta1, 0, -1); - } - ptaDestroy(&pta1); - } - - return ptad; -} - - -/*------------------------------------------------------------------* - * Pta generation for arbitrary shapes built with lines * - *------------------------------------------------------------------*/ -/*! - * \brief pixRenderPta() - * - * \param[in] pix any depth, not cmapped - * \param[in] pta arbitrary set of points - * \param[in] op one of L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) L_SET_PIXELS puts all image bits in each pixel to 1
- *          (black for 1 bpp; white for depth > 1)
- *      (2) L_CLEAR_PIXELS puts all image bits in each pixel to 0
- *          (white for 1 bpp; black for depth > 1)
- *      (3) L_FLIP_PIXELS reverses all image bits in each pixel
- *      (4) This function clips the rendering to the pix.  It performs
- *          clipping for functions such as pixRenderLine(),
- *          pixRenderBox() and pixRenderBoxa(), that call pixRenderPta().
- * 
- */ -l_ok -pixRenderPta(PIX *pix, - PTA *pta, - l_int32 op) -{ -l_int32 i, n, x, y, w, h, d, maxval; - - PROCNAME("pixRenderPta"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (pixGetColormap(pix)) - return ERROR_INT("pix is colormapped", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if (op != L_SET_PIXELS && op != L_CLEAR_PIXELS && op != L_FLIP_PIXELS) - return ERROR_INT("invalid op", procName, 1); - - pixGetDimensions(pix, &w, &h, &d); - maxval = 1; - if (op == L_SET_PIXELS) { - switch (d) - { - case 2: - maxval = 0x3; - break; - case 4: - maxval = 0xf; - break; - case 8: - maxval = 0xff; - break; - case 16: - maxval = 0xffff; - break; - case 32: - maxval = 0xffffffff; - break; - } - } - - n = ptaGetCount(pta); - for (i = 0; i < n; i++) { - ptaGetIPt(pta, i, &x, &y); - if (x < 0 || x >= w) - continue; - if (y < 0 || y >= h) - continue; - switch (op) - { - case L_SET_PIXELS: - pixSetPixel(pix, x, y, maxval); - break; - case L_CLEAR_PIXELS: - pixClearPixel(pix, x, y); - break; - case L_FLIP_PIXELS: - pixFlipPixel(pix, x, y); - break; - default: - break; - } - } - - return 0; -} - - -/*! - * \brief pixRenderPtaArb() - * - * \param[in] pix any depth, cmapped ok - * \param[in] pta arbitrary set of points - * \param[in] rval, gval, bval - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %pix is colormapped, render this color (or the nearest
- *          color if the cmap is full) on each pixel.
- *      (2) The rgb components have the standard dynamic range [0 ... 255]
- *      (3) If pix is not colormapped, do the best job you can using
- *          the input colors:
- *          ~ d = 1: set the pixels
- *          ~ d = 2, 4, 8: average the input rgb value
- *          ~ d = 32: use the input rgb value
- *      (4) This function clips the rendering to %pix.
- * 
- */ -l_ok -pixRenderPtaArb(PIX *pix, - PTA *pta, - l_uint8 rval, - l_uint8 gval, - l_uint8 bval) -{ -l_int32 i, n, x, y, w, h, d, index; -l_uint8 val; -l_uint32 val32; -PIXCMAP *cmap; - - PROCNAME("pixRenderPtaArb"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - d = pixGetDepth(pix); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 32) - return ERROR_INT("depth not in {1,2,4,8,32}", procName, 1); - - if (d == 1) { - pixRenderPta(pix, pta, L_SET_PIXELS); - return 0; - } - - cmap = pixGetColormap(pix); - pixGetDimensions(pix, &w, &h, &d); - if (cmap) { - pixcmapAddNearestColor(cmap, rval, gval, bval, &index); - } else { - if (d == 2) - val = (rval + gval + bval) / (3 * 64); - else if (d == 4) - val = (rval + gval + bval) / (3 * 16); - else if (d == 8) - val = (rval + gval + bval) / 3; - else /* d == 32 */ - composeRGBPixel(rval, gval, bval, &val32); - } - - n = ptaGetCount(pta); - for (i = 0; i < n; i++) { - ptaGetIPt(pta, i, &x, &y); - if (x < 0 || x >= w) - continue; - if (y < 0 || y >= h) - continue; - if (cmap) - pixSetPixel(pix, x, y, index); - else if (d == 32) - pixSetPixel(pix, x, y, val32); - else - pixSetPixel(pix, x, y, val); - } - - return 0; -} - - -/*! - * \brief pixRenderPtaBlend() - * - * \param[in] pix 32 bpp rgb - * \param[in] pta arbitrary set of points - * \param[in] rval, gval, bval - * \param[in] fract - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function clips the rendering to %pix.
- * 
- */ -l_ok -pixRenderPtaBlend(PIX *pix, - PTA *pta, - l_uint8 rval, - l_uint8 gval, - l_uint8 bval, - l_float32 fract) -{ -l_int32 i, n, x, y, w, h; -l_uint8 nrval, ngval, nbval; -l_uint32 val32; -l_float32 frval, fgval, fbval; - - PROCNAME("pixRenderPtaBlend"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if (pixGetDepth(pix) != 32) - return ERROR_INT("depth not 32 bpp", procName, 1); - if (fract < 0.0 || fract > 1.0) { - L_WARNING("fract must be in [0.0, 1.0]; setting to 0.5\n", procName); - fract = 0.5; - } - - pixGetDimensions(pix, &w, &h, NULL); - n = ptaGetCount(pta); - frval = fract * rval; - fgval = fract * gval; - fbval = fract * bval; - for (i = 0; i < n; i++) { - ptaGetIPt(pta, i, &x, &y); - if (x < 0 || x >= w) - continue; - if (y < 0 || y >= h) - continue; - pixGetPixel(pix, x, y, &val32); - nrval = GET_DATA_BYTE(&val32, COLOR_RED); - nrval = (l_uint8)((1. - fract) * nrval + frval); - ngval = GET_DATA_BYTE(&val32, COLOR_GREEN); - ngval = (l_uint8)((1. - fract) * ngval + fgval); - nbval = GET_DATA_BYTE(&val32, COLOR_BLUE); - nbval = (l_uint8)((1. - fract) * nbval + fbval); - composeRGBPixel(nrval, ngval, nbval, &val32); - pixSetPixel(pix, x, y, val32); - } - - return 0; -} - - -/*------------------------------------------------------------------* - * Rendering of arbitrary shapes built with lines * - *------------------------------------------------------------------*/ -/*! - * \brief pixRenderLine() - * - * \param[in] pix any depth, not cmapped - * \param[in] x1, y1 - * \param[in] x2, y2 - * \param[in] width thickness of line - * \param[in] op one of L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderLine(PIX *pix, - l_int32 x1, - l_int32 y1, - l_int32 x2, - l_int32 y2, - l_int32 width, - l_int32 op) -{ -PTA *pta; - - PROCNAME("pixRenderLine"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (width < 1) { - L_WARNING("width must be > 0; setting to 1\n", procName); - width = 1; - } - if (op != L_SET_PIXELS && op != L_CLEAR_PIXELS && op != L_FLIP_PIXELS) - return ERROR_INT("invalid op", procName, 1); - - if ((pta = generatePtaWideLine(x1, y1, x2, y2, width)) == NULL) - return ERROR_INT("pta not made", procName, 1); - pixRenderPta(pix, pta, op); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderLineArb() - * - * \param[in] pix any depth, cmapped ok - * \param[in] x1, y1 - * \param[in] x2, y2 - * \param[in] width thickness of line - * \param[in] rval, gval, bval - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderLineArb(PIX *pix, - l_int32 x1, - l_int32 y1, - l_int32 x2, - l_int32 y2, - l_int32 width, - l_uint8 rval, - l_uint8 gval, - l_uint8 bval) -{ -PTA *pta; - - PROCNAME("pixRenderLineArb"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (width < 1) { - L_WARNING("width must be > 0; setting to 1\n", procName); - width = 1; - } - - if ((pta = generatePtaWideLine(x1, y1, x2, y2, width)) == NULL) - return ERROR_INT("pta not made", procName, 1); - pixRenderPtaArb(pix, pta, rval, gval, bval); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderLineBlend() - * - * \param[in] pix 32 bpp rgb - * \param[in] x1, y1 - * \param[in] x2, y2 - * \param[in] width thickness of line - * \param[in] rval, gval, bval - * \param[in] fract - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderLineBlend(PIX *pix, - l_int32 x1, - l_int32 y1, - l_int32 x2, - l_int32 y2, - l_int32 width, - l_uint8 rval, - l_uint8 gval, - l_uint8 bval, - l_float32 fract) -{ -PTA *pta; - - PROCNAME("pixRenderLineBlend"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (width < 1) { - L_WARNING("width must be > 0; setting to 1\n", procName); - width = 1; - } - - if ((pta = generatePtaWideLine(x1, y1, x2, y2, width)) == NULL) - return ERROR_INT("pta not made", procName, 1); - pixRenderPtaBlend(pix, pta, rval, gval, bval, fract); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderBox() - * - * \param[in] pix any depth, not cmapped - * \param[in] box - * \param[in] width thickness of box lines - * \param[in] op one of L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderBox(PIX *pix, - BOX *box, - l_int32 width, - l_int32 op) -{ -PTA *pta; - - PROCNAME("pixRenderBox"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - if (op != L_SET_PIXELS && op != L_CLEAR_PIXELS && op != L_FLIP_PIXELS) - return ERROR_INT("invalid op", procName, 1); - - if ((pta = generatePtaBox(box, width)) == NULL) - return ERROR_INT("pta not made", procName, 1); - pixRenderPta(pix, pta, op); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderBoxArb() - * - * \param[in] pix any depth, cmapped ok - * \param[in] box - * \param[in] width thickness of box lines - * \param[in] rval, gval, bval - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderBoxArb(PIX *pix, - BOX *box, - l_int32 width, - l_uint8 rval, - l_uint8 gval, - l_uint8 bval) -{ -PTA *pta; - - PROCNAME("pixRenderBoxArb"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - - if ((pta = generatePtaBox(box, width)) == NULL) - return ERROR_INT("pta not made", procName, 1); - pixRenderPtaArb(pix, pta, rval, gval, bval); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderBoxBlend() - * - * \param[in] pix 32 bpp rgb - * \param[in] box - * \param[in] width thickness of box lines - * \param[in] rval, gval, bval - * \param[in] fract in [0.0 - 1.0]: 1.0 is no transparency; - * 0.0 is complete transparency (no effect) - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderBoxBlend(PIX *pix, - BOX *box, - l_int32 width, - l_uint8 rval, - l_uint8 gval, - l_uint8 bval, - l_float32 fract) -{ -PTA *pta; - - PROCNAME("pixRenderBoxBlend"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - - if ((pta = generatePtaBox(box, width)) == NULL) - return ERROR_INT("pta not made", procName, 1); - pixRenderPtaBlend(pix, pta, rval, gval, bval, fract); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderBoxa() - * - * \param[in] pix any depth, not cmapped - * \param[in] boxa - * \param[in] width thickness of line - * \param[in] op one of L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderBoxa(PIX *pix, - BOXA *boxa, - l_int32 width, - l_int32 op) -{ -PTA *pta; - - PROCNAME("pixRenderBoxa"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - if (op != L_SET_PIXELS && op != L_CLEAR_PIXELS && op != L_FLIP_PIXELS) - return ERROR_INT("invalid op", procName, 1); - - if ((pta = generatePtaBoxa(boxa, width, 0)) == NULL) - return ERROR_INT("pta not made", procName, 1); - pixRenderPta(pix, pta, op); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderBoxaArb() - * - * \param[in] pix any depth; colormapped is ok - * \param[in] boxa - * \param[in] width thickness of line - * \param[in] rval, gval, bval - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderBoxaArb(PIX *pix, - BOXA *boxa, - l_int32 width, - l_uint8 rval, - l_uint8 gval, - l_uint8 bval) -{ -PTA *pta; - - PROCNAME("pixRenderBoxaArb"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - - if ((pta = generatePtaBoxa(boxa, width, 0)) == NULL) - return ERROR_INT("pta not made", procName, 1); - pixRenderPtaArb(pix, pta, rval, gval, bval); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderBoxaBlend() - * - * \param[in] pix 32 bpp rgb - * \param[in] boxa - * \param[in] width thickness of line - * \param[in] rval, gval, bval - * \param[in] fract in [0.0 - 1.0]: 1.0 is no transparency; - * 0.0 is complete transparency (no effect) - * \param[in] removedups 1 to remove; 0 otherwise - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderBoxaBlend(PIX *pix, - BOXA *boxa, - l_int32 width, - l_uint8 rval, - l_uint8 gval, - l_uint8 bval, - l_float32 fract, - l_int32 removedups) -{ -PTA *pta; - - PROCNAME("pixRenderBoxaBlend"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - - if ((pta = generatePtaBoxa(boxa, width, removedups)) == NULL) - return ERROR_INT("pta not made", procName, 1); - pixRenderPtaBlend(pix, pta, rval, gval, bval, fract); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderHashBox() - * - * \param[in] pix any depth, not cmapped - * \param[in] box - * \param[in] spacing spacing between lines; must be > 1 - * \param[in] width thickness of box and hash lines - * \param[in] orient orientation of lines: L_HORIZONTAL_LINE, ... - * \param[in] outline 0 to skip drawing box outline - * \param[in] op one of L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderHashBox(PIX *pix, - BOX *box, - l_int32 spacing, - l_int32 width, - l_int32 orient, - l_int32 outline, - l_int32 op) -{ -PTA *pta; - - PROCNAME("pixRenderHashBox"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (spacing <= 1) - return ERROR_INT("spacing not > 1", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - if (orient != L_HORIZONTAL_LINE && orient != L_POS_SLOPE_LINE && - orient != L_VERTICAL_LINE && orient != L_NEG_SLOPE_LINE) - return ERROR_INT("invalid line orientation", procName, 1); - if (op != L_SET_PIXELS && op != L_CLEAR_PIXELS && op != L_FLIP_PIXELS) - return ERROR_INT("invalid op", procName, 1); - - pta = generatePtaHashBox(box, spacing, width, orient, outline); - if (!pta) - return ERROR_INT("pta not made", procName, 1); - pixRenderPta(pix, pta, op); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderHashBoxArb() - * - * \param[in] pix any depth; cmapped ok - * \param[in] box - * \param[in] spacing spacing between lines; must be > 1 - * \param[in] width thickness of box and hash lines - * \param[in] orient orientation of lines: L_HORIZONTAL_LINE, ... - * \param[in] outline 0 to skip drawing box outline - * \param[in] rval, gval, bval - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderHashBoxArb(PIX *pix, - BOX *box, - l_int32 spacing, - l_int32 width, - l_int32 orient, - l_int32 outline, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -PTA *pta; - - PROCNAME("pixRenderHashBoxArb"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (spacing <= 1) - return ERROR_INT("spacing not > 1", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - if (orient != L_HORIZONTAL_LINE && orient != L_POS_SLOPE_LINE && - orient != L_VERTICAL_LINE && orient != L_NEG_SLOPE_LINE) - return ERROR_INT("invalid line orientation", procName, 1); - - pta = generatePtaHashBox(box, spacing, width, orient, outline); - if (!pta) - return ERROR_INT("pta not made", procName, 1); - pixRenderPtaArb(pix, pta, rval, gval, bval); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderHashBoxBlend() - * - * \param[in] pix 32 bpp - * \param[in] box - * \param[in] spacing spacing between lines; must be > 1 - * \param[in] width thickness of box and hash lines - * \param[in] orient orientation of lines: L_HORIZONTAL_LINE, ... - * \param[in] outline 0 to skip drawing box outline - * \param[in] rval, gval, bval - * \param[in] fract in [0.0 - 1.0]: 1.0 is no transparency; - * 0.0 is complete transparency (no effect) - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderHashBoxBlend(PIX *pix, - BOX *box, - l_int32 spacing, - l_int32 width, - l_int32 orient, - l_int32 outline, - l_int32 rval, - l_int32 gval, - l_int32 bval, - l_float32 fract) -{ -PTA *pta; - - PROCNAME("pixRenderHashBoxBlend"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (spacing <= 1) - return ERROR_INT("spacing not > 1", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - if (orient != L_HORIZONTAL_LINE && orient != L_POS_SLOPE_LINE && - orient != L_VERTICAL_LINE && orient != L_NEG_SLOPE_LINE) - return ERROR_INT("invalid line orientation", procName, 1); - - pta = generatePtaHashBox(box, spacing, width, orient, outline); - if (!pta) - return ERROR_INT("pta not made", procName, 1); - pixRenderPtaBlend(pix, pta, rval, gval, bval, fract); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderHashMaskArb() - * - * \param[in] pix any depth; cmapped ok - * \param[in] pixm 1 bpp clipping mask for hash marks - * \param[in] x,y UL corner of %pixm with respect to %pix - * \param[in] spacing spacing between lines; must be > 1 - * \param[in] width thickness of box and hash lines - * \param[in] orient orientation of lines: L_HORIZONTAL_LINE, - * L_POS_SLOPE_LINE, L_VERTICAL_LINE, - * L_NEG_SLOPE_LINE - * \param[in] outline 0 to skip drawing box outline - * \param[in] rval, gval, bval - * \return 0 if OK, 1 on error - *
- * Notes:
- *      (1) This is an in-place operation that renders hash lines
- *          through a mask %pixm onto %pix.  The mask origin is
- *          translated by (%x,%y) relative to the origin of %pix.
- * 
- */ -l_ok -pixRenderHashMaskArb(PIX *pix, - PIX *pixm, - l_int32 x, - l_int32 y, - l_int32 spacing, - l_int32 width, - l_int32 orient, - l_int32 outline, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -l_int32 w, h; -BOX *box1, *box2; -PIX *pix1; -PTA *pta1, *pta2; - - PROCNAME("pixRenderHashMaskArb"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!pixm || pixGetDepth(pixm) != 1) - return ERROR_INT("pixm not defined or not 1 bpp", procName, 1); - if (spacing <= 1) - return ERROR_INT("spacing not > 1", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - if (orient != L_HORIZONTAL_LINE && orient != L_POS_SLOPE_LINE && - orient != L_VERTICAL_LINE && orient != L_NEG_SLOPE_LINE) - return ERROR_INT("invalid line orientation", procName, 1); - - /* Get the points for masked hash lines */ - pixGetDimensions(pixm, &w, &h, NULL); - box1 = boxCreate(0, 0, w, h); - pta1 = generatePtaHashBox(box1, spacing, width, orient, outline); - pta2 = ptaCropToMask(pta1, pixm); - boxDestroy(&box1); - ptaDestroy(&pta1); - - /* Clip out the region and apply the hash lines */ - box2 = boxCreate(x, y, w, h); - pix1 = pixClipRectangle(pix, box2, NULL); - pixRenderPtaArb(pix1, pta2, rval, gval, bval); - ptaDestroy(&pta2); - boxDestroy(&box2); - - /* Rasterop the altered rectangle back in place */ - pixRasterop(pix, x, y, w, h, PIX_SRC, pix1, 0, 0); - pixDestroy(&pix1); - return 0; -} - - -/*! - * \brief pixRenderHashBoxa() - * - * \param[in] pix any depth, not cmapped - * \param[in] boxa - * \param[in] spacing spacing between lines; must be > 1 - * \param[in] width thickness of box and hash lines - * \param[in] orient orientation of lines: L_HORIZONTAL_LINE, - * L_POS_SLOPE_LINE, L_VERTICAL_LINE, - * L_NEG_SLOPE_LINE - * \param[in] outline 0 to skip drawing box outline - * \param[in] op one of L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderHashBoxa(PIX *pix, - BOXA *boxa, - l_int32 spacing, - l_int32 width, - l_int32 orient, - l_int32 outline, - l_int32 op) - { -PTA *pta; - - PROCNAME("pixRenderHashBoxa"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (spacing <= 1) - return ERROR_INT("spacing not > 1", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - if (orient != L_HORIZONTAL_LINE && orient != L_POS_SLOPE_LINE && - orient != L_VERTICAL_LINE && orient != L_NEG_SLOPE_LINE) - return ERROR_INT("invalid line orientation", procName, 1); - if (op != L_SET_PIXELS && op != L_CLEAR_PIXELS && op != L_FLIP_PIXELS) - return ERROR_INT("invalid op", procName, 1); - - pta = generatePtaHashBoxa(boxa, spacing, width, orient, outline, 1); - if (!pta) - return ERROR_INT("pta not made", procName, 1); - pixRenderPta(pix, pta, op); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderHashBoxaArb() - * - * \param[in] pix any depth; cmapped ok - * \param[in] box - * \param[in] spacing spacing between lines; must be > 1 - * \param[in] width thickness of box and hash lines - * \param[in] orient orientation of lines: L_HORIZONTAL_LINE, - * L_POS_SLOPE_LINE, L_VERTICAL_LINE, - * L_NEG_SLOPE_LINE - * \param[in] outline 0 to skip drawing box outline - * \param[in] rval, gval, bval - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderHashBoxaArb(PIX *pix, - BOXA *boxa, - l_int32 spacing, - l_int32 width, - l_int32 orient, - l_int32 outline, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -PTA *pta; - - PROCNAME("pixRenderHashBoxArb"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (spacing <= 1) - return ERROR_INT("spacing not > 1", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - if (orient != L_HORIZONTAL_LINE && orient != L_POS_SLOPE_LINE && - orient != L_VERTICAL_LINE && orient != L_NEG_SLOPE_LINE) - return ERROR_INT("invalid line orientation", procName, 1); - - pta = generatePtaHashBoxa(boxa, spacing, width, orient, outline, 1); - if (!pta) - return ERROR_INT("pta not made", procName, 1); - pixRenderPtaArb(pix, pta, rval, gval, bval); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderHashBoxaBlend() - * - * \param[in] pix 32 bpp rgb - * \param[in] boxa - * \param[in] spacing spacing between lines; must be > 1 - * \param[in] width thickness of box and hash lines - * \param[in] orient orientation of lines: L_HORIZONTAL_LINE, - * L_POS_SLOPE_LINE, L_VERTICAL_LINE, - * L_NEG_SLOPE_LINE - * \param[in] outline 0 to skip drawing box outline - * \param[in] rval, gval, bval - * \param[in] fract in [0.0 - 1.0]: 1.0 is no transparency; - * 0.0 is complete transparency (no effect) - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderHashBoxaBlend(PIX *pix, - BOXA *boxa, - l_int32 spacing, - l_int32 width, - l_int32 orient, - l_int32 outline, - l_int32 rval, - l_int32 gval, - l_int32 bval, - l_float32 fract) -{ -PTA *pta; - - PROCNAME("pixRenderHashBoxaBlend"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (spacing <= 1) - return ERROR_INT("spacing not > 1", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - if (orient != L_HORIZONTAL_LINE && orient != L_POS_SLOPE_LINE && - orient != L_VERTICAL_LINE && orient != L_NEG_SLOPE_LINE) - return ERROR_INT("invalid line orientation", procName, 1); - - pta = generatePtaHashBoxa(boxa, spacing, width, orient, outline, 1); - if (!pta) - return ERROR_INT("pta not made", procName, 1); - pixRenderPtaBlend(pix, pta, rval, gval, bval, fract); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderPolyline() - * - * \param[in] pix any depth, not cmapped - * \param[in] ptas - * \param[in] width thickness of line - * \param[in] op one of L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS - * \param[in] closeflag 1 to close the contour; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      This renders a closed contour.
- * 
- */ -l_ok -pixRenderPolyline(PIX *pix, - PTA *ptas, - l_int32 width, - l_int32 op, - l_int32 closeflag) -{ -PTA *pta; - - PROCNAME("pixRenderPolyline"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!ptas) - return ERROR_INT("ptas not defined", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - if (op != L_SET_PIXELS && op != L_CLEAR_PIXELS && op != L_FLIP_PIXELS) - return ERROR_INT("invalid op", procName, 1); - - if ((pta = generatePtaPolyline(ptas, width, closeflag, 0)) == NULL) - return ERROR_INT("pta not made", procName, 1); - pixRenderPta(pix, pta, op); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderPolylineArb() - * - * \param[in] pix any depth; cmapped ok - * \param[in] ptas - * \param[in] width thickness of line - * \param[in] rval, gval, bval - * \param[in] closeflag 1 to close the contour; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      This renders a closed contour.
- * 
- */ -l_ok -pixRenderPolylineArb(PIX *pix, - PTA *ptas, - l_int32 width, - l_uint8 rval, - l_uint8 gval, - l_uint8 bval, - l_int32 closeflag) -{ -PTA *pta; - - PROCNAME("pixRenderPolylineArb"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!ptas) - return ERROR_INT("ptas not defined", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - - if ((pta = generatePtaPolyline(ptas, width, closeflag, 0)) == NULL) - return ERROR_INT("pta not made", procName, 1); - pixRenderPtaArb(pix, pta, rval, gval, bval); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderPolylineBlend() - * - * \param[in] pix 32 bpp rgb - * \param[in] ptas - * \param[in] width thickness of line - * \param[in] rval, gval, bval - * \param[in] fract in [0.0 - 1.0]: 1.0 is no transparency; - * 0.0 is complete transparency (no effect) - * \param[in] closeflag 1 to close the contour; 0 otherwise - * \param[in] removedups 1 to remove; 0 otherwise - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderPolylineBlend(PIX *pix, - PTA *ptas, - l_int32 width, - l_uint8 rval, - l_uint8 gval, - l_uint8 bval, - l_float32 fract, - l_int32 closeflag, - l_int32 removedups) -{ -PTA *pta; - - PROCNAME("pixRenderPolylineBlend"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!ptas) - return ERROR_INT("ptas not defined", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - - if ((pta = generatePtaPolyline(ptas, width, closeflag, removedups)) == NULL) - return ERROR_INT("pta not made", procName, 1); - pixRenderPtaBlend(pix, pta, rval, gval, bval, fract); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderGridArb() - * - * \param[in] pix any depth, cmapped ok - * \param[in] nx, ny number of rectangles in each direction - * \param[in] width thickness of grid lines - * \param[in] rval, gval, bval - * \return 0 if OK, 1 on error - */ -l_ok -pixRenderGridArb(PIX *pix, - l_int32 nx, - l_int32 ny, - l_int32 width, - l_uint8 rval, - l_uint8 gval, - l_uint8 bval) -{ -l_int32 w, h; -PTA *pta; - - PROCNAME("pixRenderGridArb"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (nx < 1 || ny < 1) - return ERROR_INT("nx, ny must be > 0", procName, 1); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - - pixGetDimensions(pix, &w, &h, NULL); - if ((pta = generatePtaGrid(w, h, nx, ny, width)) == NULL) - return ERROR_INT("pta not made", procName, 1); - pixRenderPtaArb(pix, pta, rval, gval, bval); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief pixRenderRandomCmapPtaa() - * - * \param[in] pix 1, 2, 4, 8, 16, 32 bpp - * \param[in] ptaa - * \param[in] polyflag 1 to interpret each Pta as a polyline; - * 0 to simply render the Pta as a set of pixels - * \param[in] width thickness of line; use only for polyline - * \param[in] closeflag 1 to close the contour; 0 otherwise; - * use only for polyline mode - * \return pixd cmapped, 8 bpp or NULL on error - * - *
- * Notes:
- *      (1) This is a debugging routine, that displays a set of
- *          pixels, selected by the set of Ptas in a Ptaa,
- *          in a random color in a pix.
- *      (2) If %polyflag == 1, each Pta is considered to be a polyline,
- *          and is rendered using %width and %closeflag.  Each polyline
- *          is rendered in a random color.
- *      (3) If %polyflag == 0, all points in each Pta are rendered in a
- *          random color.  The %width and %closeflag parameters are ignored.
- *      (4) The output pix is 8 bpp and colormapped.  Up to 254
- *          different, randomly selected colors, can be used.
- *      (5) The rendered pixels replace the input pixels.  They will
- *          be clipped silently to the input pix.
- * 
- */ -PIX * -pixRenderRandomCmapPtaa(PIX *pix, - PTAA *ptaa, - l_int32 polyflag, - l_int32 width, - l_int32 closeflag) -{ -l_int32 i, n, index, rval, gval, bval; -PIXCMAP *cmap; -PTA *pta, *ptat; -PIX *pixd; - - PROCNAME("pixRenderRandomCmapPtaa"); - - if (!pix) - return (PIX *)ERROR_PTR("pix not defined", procName, NULL); - if (!ptaa) - return (PIX *)ERROR_PTR("ptaa not defined", procName, NULL); - if (polyflag != 0 && width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - - pixd = pixConvertTo8(pix, FALSE); - cmap = pixcmapCreateRandom(8, 1, 1); - pixSetColormap(pixd, cmap); - - if ((n = ptaaGetCount(ptaa)) == 0) - return pixd; - - for (i = 0; i < n; i++) { - index = 1 + (i % 254); - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - pta = ptaaGetPta(ptaa, i, L_CLONE); - if (polyflag) - ptat = generatePtaPolyline(pta, width, closeflag, 0); - else - ptat = ptaClone(pta); - pixRenderPtaArb(pixd, ptat, rval, gval, bval); - ptaDestroy(&pta); - ptaDestroy(&ptat); - } - - return pixd; -} - - - -/*------------------------------------------------------------------* - * Rendering and filling of polygons * - *------------------------------------------------------------------*/ -/*! - * \brief pixRenderPolygon() - * - * \param[in] ptas of vertices, none repeated - * \param[in] width of polygon outline - * \param[out] pxmin [optional] min x value of input pts - * \param[out] pymin [optional] min y value of input pts - * \return pix 1 bpp, with outline generated, or NULL on error - * - *
- * Notes:
- *      (1) The pix is the minimum size required to contain the origin
- *          and the polygon.  For example, the max x value of the input
- *          points is w - 1, where w is the pix width.
- *      (2) The rendered line is 4-connected, so that an interior or
- *          exterior 8-c.c. flood fill operation works properly.
- * 
- */ -PIX * -pixRenderPolygon(PTA *ptas, - l_int32 width, - l_int32 *pxmin, - l_int32 *pymin) -{ -l_float32 fxmin, fxmax, fymin, fymax; -PIX *pixd; -PTA *pta1, *pta2; - - PROCNAME("pixRenderPolygon"); - - if (pxmin) *pxmin = 0; - if (pymin) *pymin = 0; - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - - /* Generate a 4-connected polygon line */ - if ((pta1 = generatePtaPolyline(ptas, width, 1, 0)) == NULL) - return (PIX *)ERROR_PTR("pta1 not made", procName, NULL); - if (width < 2) - pta2 = convertPtaLineTo4cc(pta1); - else - pta2 = ptaClone(pta1); - - /* Render onto a minimum-sized pix */ - ptaGetRange(pta2, &fxmin, &fxmax, &fymin, &fymax); - if (pxmin) *pxmin = (l_int32)(fxmin + 0.5); - if (pymin) *pymin = (l_int32)(fymin + 0.5); - pixd = pixCreate((l_int32)(fxmax + 0.5) + 1, (l_int32)(fymax + 0.5) + 1, 1); - pixRenderPolyline(pixd, pta2, width, L_SET_PIXELS, 1); - ptaDestroy(&pta1); - ptaDestroy(&pta2); - return pixd; -} - - -/*! - * \brief pixFillPolygon() - * - * \param[in] pixs 1 bpp, with 4-connected polygon outline - * \param[in] pta vertices of the polygon - * \param[in] xmin, ymin min values of vertices of polygon - * \return pixd with outline filled, or NULL on error - * - *
- * Notes:
- *      (1) This fills the interior of the polygon, returning a
- *          new pix.  It works for both convex and non-convex polygons.
- *      (2) To generate a filled polygon from %pta:
- *            PIX *pixt = pixRenderPolygon(pta, 1, &xmin, &ymin);
- *            PIX *pixd = pixFillPolygon(pixt, pta, xmin, ymin);
- *            pixDestroy(&pixt);
- * 
- */ -PIX * -pixFillPolygon(PIX *pixs, - PTA *pta, - l_int32 xmin, - l_int32 ymin) -{ -l_int32 w, h, i, n, inside, found; -l_int32 *xstart, *xend; -PIX *pixi, *pixd; - - PROCNAME("pixFillPolygon"); - - if (!pixs || (pixGetDepth(pixs) != 1)) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (!pta) - return (PIX *)ERROR_PTR("pta not defined", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - xstart = (l_int32 *)LEPT_CALLOC(w / 2, sizeof(l_int32)); - xend = (l_int32 *)LEPT_CALLOC(w / 2, sizeof(l_int32)); - - /* Find a raster with 2 or more black runs. The first background - * pixel after the end of the first run is likely to be inside - * the polygon, and can be used as a seed pixel. */ - found = FALSE; - for (i = ymin + 1; i < h; i++) { - pixFindHorizontalRuns(pixs, i, xstart, xend, &n); - if (n > 1) { - ptaPtInsidePolygon(pta, xend[0] + 1, i, &inside); - if (inside) { - found = TRUE; - break; - } - } - } - if (!found) { - L_WARNING("nothing found to fill\n", procName); - LEPT_FREE(xstart); - LEPT_FREE(xend); - return 0; - } - - /* Place the seed pixel in the output image */ - pixd = pixCreateTemplate(pixs); - pixSetPixel(pixd, xend[0] + 1, i, 1); - - /* Invert pixs to make a filling mask, and fill from the seed */ - pixi = pixInvert(NULL, pixs); - pixSeedfillBinary(pixd, pixd, pixi, 4); - - /* Add the pixels of the original polygon outline */ - pixOr(pixd, pixd, pixs); - - pixDestroy(&pixi); - LEPT_FREE(xstart); - LEPT_FREE(xend); - return pixd; -} - - -/*------------------------------------------------------------------* - * Contour rendering on grayscale images * - *------------------------------------------------------------------*/ -/*! - * \brief pixRenderContours() - * - * \param[in] pixs 8 or 16 bpp; no colormap - * \param[in] startval value of lowest contour; must be in [0 ... maxval] - * \param[in] incr increment to next contour; must be > 0 - * \param[in] outdepth either 1 or depth of pixs - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The output can be either 1 bpp, showing just the contour
- *          lines, or a copy of the input pixs with the contour lines
- *          superposed.
- * 
- */ -PIX * -pixRenderContours(PIX *pixs, - l_int32 startval, - l_int32 incr, - l_int32 outdepth) -{ -l_int32 w, h, d, maxval, wpls, wpld, i, j, val, test; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixRenderContours"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs has colormap", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 && d != 16) - return (PIX *)ERROR_PTR("pixs not 8 or 16 bpp", procName, NULL); - if (outdepth != 1 && outdepth != d) { - L_WARNING("invalid outdepth; setting to 1\n", procName); - outdepth = 1; - } - maxval = (1 << d) - 1; - if (startval < 0 || startval > maxval) - return (PIX *)ERROR_PTR("startval not in [0 ... maxval]", - procName, NULL); - if (incr < 1) - return (PIX *)ERROR_PTR("incr < 1", procName, NULL); - - if (outdepth == d) - pixd = pixCopy(NULL, pixs); - else - pixd = pixCreate(w, h, 1); - - pixCopyResolution(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - - switch (d) - { - case 8: - if (outdepth == 1) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(lines, j); - if (val < startval) - continue; - test = (val - startval) % incr; - if (!test) - SET_DATA_BIT(lined, j); - } - } - } else { /* outdepth == d */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(lines, j); - if (val < startval) - continue; - test = (val - startval) % incr; - if (!test) - SET_DATA_BYTE(lined, j, 0); - } - } - } - break; - - case 16: - if (outdepth == 1) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = GET_DATA_TWO_BYTES(lines, j); - if (val < startval) - continue; - test = (val - startval) % incr; - if (!test) - SET_DATA_BIT(lined, j); - } - } - } else { /* outdepth == d */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = GET_DATA_TWO_BYTES(lines, j); - if (val < startval) - continue; - test = (val - startval) % incr; - if (!test) - SET_DATA_TWO_BYTES(lined, j, 0); - } - } - } - break; - - default: - return (PIX *)ERROR_PTR("pixs not 8 or 16 bpp", procName, NULL); - } - - return pixd; -} - - -/*! - * \brief fpixAutoRenderContours() - * - * \param[in] fpix - * \param[in] ncontours in [2 ... 500]; typically about 50 - * \return pixd 8 bpp, or NULL on error - * - *
- * Notes:
- *      (1) The increment is set to get approximately %ncontours.
- *      (2) The proximity to the target value for contour display
- *          is set to 0.15.
- *      (3) Negative values are rendered in red; positive values as black.
- * 
- */ -PIX * -fpixAutoRenderContours(FPIX *fpix, - l_int32 ncontours) -{ -l_float32 minval, maxval, incr; - - PROCNAME("fpixAutoRenderContours"); - - if (!fpix) - return (PIX *)ERROR_PTR("fpix not defined", procName, NULL); - if (ncontours < 2 || ncontours > 500) - return (PIX *)ERROR_PTR("ncontours < 2 or > 500", procName, NULL); - - fpixGetMin(fpix, &minval, NULL, NULL); - fpixGetMax(fpix, &maxval, NULL, NULL); - if (minval == maxval) - return (PIX *)ERROR_PTR("all values in fpix are equal", procName, NULL); - incr = (maxval - minval) / ((l_float32)ncontours - 1); - return fpixRenderContours(fpix, incr, 0.15); -} - - -/*! - * \brief fpixRenderContours() - * - * \param[in] fpixs - * \param[in] incr increment between contours; must be > 0.0 - * \param[in] proxim required proximity to target value; default 0.15 - * \return pixd 8 bpp, or NULL on error - * - *
- * Notes:
- *      (1) Values are displayed when val/incr is within +-proxim
- *          to an integer.  The default value is 0.15; smaller values
- *          result in thinner contour lines.
- *      (2) Negative values are rendered in red; positive values as black.
- * 
- */ -PIX * -fpixRenderContours(FPIX *fpixs, - l_float32 incr, - l_float32 proxim) -{ -l_int32 i, j, w, h, wpls, wpld; -l_float32 val, invincr, finter, above, below, diff; -l_uint32 *datad, *lined; -l_float32 *datas, *lines; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("fpixRenderContours"); - - if (!fpixs) - return (PIX *)ERROR_PTR("fpixs not defined", procName, NULL); - if (incr <= 0.0) - return (PIX *)ERROR_PTR("incr <= 0.0", procName, NULL); - if (proxim <= 0.0) - proxim = 0.15; /* default */ - - fpixGetDimensions(fpixs, &w, &h); - if ((pixd = pixCreate(w, h, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - cmap = pixcmapCreate(8); - pixSetColormap(pixd, cmap); - pixcmapAddColor(cmap, 255, 255, 255); /* white */ - pixcmapAddColor(cmap, 0, 0, 0); /* black */ - pixcmapAddColor(cmap, 255, 0, 0); /* red */ - - datas = fpixGetData(fpixs); - wpls = fpixGetWpl(fpixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - invincr = 1.0 / incr; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = lines[j]; - finter = invincr * val; - above = finter - floorf(finter); - below = ceilf(finter) - finter; - diff = L_MIN(above, below); - if (diff <= proxim) { - if (val < 0.0) - SET_DATA_BYTE(lined, j, 2); - else - SET_DATA_BYTE(lined, j, 1); - } - } - } - - return pixd; -} - - -/*------------------------------------------------------------------* - * Boundary pt generation on 1 bpp images * - *------------------------------------------------------------------*/ -/*! - * \brief pixGeneratePtaBoundary() - * - * \param[in] pixs 1 bpp - * \param[in] width of boundary line - * \return pta, or NULL on error - * - *
- * Notes:
- *      (1) Similar to ptaGetBoundaryPixels(), except here:
- *          * we only get pixels in the foreground
- *          * we can have a "line" width greater than 1 pixel.
- *      (2) Once generated, this can be applied to a random 1 bpp image
- *          to add a color boundary as follows:
- *             Pta *pta = pixGeneratePtaBoundary(pixs, width);
- *             Pix *pix1 = pixConvert1To8Cmap(pixs);
- *             pixRenderPtaArb(pix1, pta, rval, gval, bval);
- * 
- */ -PTA * -pixGeneratePtaBoundary(PIX *pixs, - l_int32 width) -{ -PIX *pix1; -PTA *pta; - - PROCNAME("pixGeneratePtaBoundary"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PTA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (width < 1) { - L_WARNING("width < 1; setting to 1\n", procName); - width = 1; - } - - pix1 = pixErodeBrick(NULL, pixs, 2 * width + 1, 2 * width + 1); - pixXor(pix1, pix1, pixs); - pta = ptaGetPixelsFromPix(pix1, NULL); - pixDestroy(&pix1); - return pta; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/graymorph.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/graymorph.c deleted file mode 100644 index 1d7440ce..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/graymorph.c +++ /dev/null @@ -1,1376 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file graymorph.c - *
- *
- *      Top-level grayscale morphological operations (van Herk / Gil-Werman)
- *            PIX           *pixErodeGray()
- *            PIX           *pixDilateGray()
- *            PIX           *pixOpenGray()
- *            PIX           *pixCloseGray()
- *
- *      Special operations for 1x3, 3x1 and 3x3 Sels  (direct)
- *            PIX           *pixErodeGray3()
- *            static PIX    *pixErodeGray3h()
- *            static PIX    *pixErodeGray3v()
- *            PIX           *pixDilateGray3()
- *            static PIX    *pixDilateGray3h()
- *            static PIX    *pixDilateGray3v()
- *            PIX           *pixOpenGray3()
- *            PIX           *pixCloseGray3()
- *
- *      Low-level grayscale morphological operations
- *            static void    dilateGrayLow()
- *            static void    erodeGrayLow()
- *
- *
- *      Method: Algorithm by van Herk and Gil and Werman, 1992
- *
- *      Measured speed of the vH/G-W implementation is about 1 output
- *      pixel per 120 PIII clock cycles, for a horizontal or vertical
- *      erosion or dilation.  The computation time doubles for opening
- *      or closing, or for a square SE, as expected, and is independent
- *      of the size of the SE.
- *
- *      A faster implementation can be made directly for brick Sels
- *      of maximum size 3.  We unroll the computation for sets of 8 bytes.
- *      It needs to be called explicitly; the general functions do not
- *      default for the size 3 brick Sels.
- *
- *      We use the van Herk/Gil-Werman (vHGW) algorithm, [van Herk,
- *      Patt. Recog. Let. 13, pp. 517-521, 1992; Gil and Werman,
- *      IEEE Trans PAMI 15(5), pp. 504-507, 1993.]
- *      This was the first grayscale morphology
- *      algorithm to compute dilation and erosion with
- *      complexity independent of the size of the structuring
- *      element.  It is simple and elegant, and surprising that
- *      it was discovered as recently as 1992.  It works for
- *      SEs composed of horizontal and/or vertical lines.  The
- *      general case requires finding the Min or Max over an
- *      arbitrary set of pixels, and this requires a number of
- *      pixel comparisons equal to the SE "size" at each pixel
- *      in the image.  The vHGW algorithm requires not
- *      more than 3 comparisons at each point.  The algorithm has been
- *      recently refined by Gil and Kimmel ("Efficient Dilation
- *      Erosion, Opening and Closing Algorithms", in "Mathematical
- *      Morphology and its Applications to Image and Signal Processing",
- *      the proceedings of the International Symposium on Mathematical
- *      Morphology, Palo Alto, CA, June 2000, Kluwer Academic
- *      Publishers, pp. 301-310).  They bring this number down below
- *      1.5 comparisons per output pixel but at a cost of significantly
- *      increased complexity, so I don't bother with that here.
- *
- *      In brief, the method is as follows.  We evaluate the dilation
- *      in groups of "size" pixels, equal to the size of the SE.
- *      For horizontal, we start at x = "size"/2 and go
- *      (w - 2 * ("size"/2))/"size" steps.  This means that
- *      we don't evaluate the first 0.5 * "size" pixels and, worst
- *      case, the last 1.5 * "size" pixels.  Thus we embed the
- *      image in a larger image with these augmented dimensions, where
- *      the new border pixels are appropriately initialized (0 for
- *      dilation; 255 for erosion), and remove the boundary at the end.
- *      (For vertical, use h instead of w.)   Then for each group
- *      of "size" pixels, we form an array of length 2 * "size" + 1,
- *      consisting of backward and forward partial maxima (for
- *      dilation) or minima (for erosion).  This represents a
- *      jumping window computed from the source image, over which
- *      the SE will slide.  The center of the array gets the source
- *      pixel at the center of the SE.  Call this the center pixel
- *      of the window.  Array values to left of center get
- *      the maxima(minima) of the pixels from the center
- *      one and going to the left an equal distance.  Array
- *      values to the right of center get the maxima(minima) to
- *      the pixels from the center one and going to the right
- *      an equal distance.  These are computed sequentially starting
- *      from the center one.  The SE (of length "size") can slide over this
- *      window (of length 2 * "size + 1) at "size" different places.
- *      At each place, the maxima(minima) of the values in the window
- *      that correspond to the end points of the SE give the extremal
- *      values over that interval, and these are stored at the dest
- *      pixel corresponding to the SE center.  A picture is worth
- *      at least this many words, so if this isn't clear, see the
- *      leptonica documentation on grayscale morphology.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - - /* Special static operations for 3x1, 1x3 and 3x3 structuring elements */ -static PIX *pixErodeGray3h(PIX *pixs); -static PIX *pixErodeGray3v(PIX *pixs); -static PIX *pixDilateGray3h(PIX *pixs); -static PIX *pixDilateGray3v(PIX *pixs); - - /* Low-level gray morphological operations */ -static void dilateGrayLow(l_uint32 *datad, l_int32 w, l_int32 h, - l_int32 wpld, l_uint32 *datas, l_int32 wpls, - l_int32 size, l_int32 direction, l_uint8 *buffer, - l_uint8 *maxarray); -static void erodeGrayLow(l_uint32 *datad, l_int32 w, l_int32 h, - l_int32 wpld, l_uint32 *datas, l_int32 wpls, - l_int32 size, l_int32 direction, l_uint8 *buffer, - l_uint8 *minarray); - -/*-----------------------------------------------------------------* - * Top-level grayscale morphological operations * - *-----------------------------------------------------------------*/ -/*! - * \brief pixErodeGray() - * - * \param[in] pixs - * \param[in] hsize of Sel; must be odd; origin implicitly in center - * \param[in] vsize ditto - * \return pixd - * - *
- * Notes:
- *      (1) Sel is a brick with all elements being hits
- *      (2) If hsize = vsize = 1, just returns a copy.
- * 
- */ -PIX * -pixErodeGray(PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -l_uint8 *buffer, *minarray; -l_int32 w, h, wplb, wplt; -l_int32 leftpix, rightpix, toppix, bottompix, maxsize; -l_uint32 *datab, *datat; -PIX *pixb, *pixt, *pixd; - - PROCNAME("pixErodeGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize or vsize < 1", procName, NULL); - if ((hsize & 1) == 0 ) { - L_WARNING("horiz sel size must be odd; increasing by 1\n", procName); - hsize++; - } - if ((vsize & 1) == 0 ) { - L_WARNING("vert sel size must be odd; increasing by 1\n", procName); - vsize++; - } - - pixb = pixt = pixd = NULL; - buffer = minarray = NULL; - - if (hsize == 1 && vsize == 1) - return pixCopy(NULL, pixs); - - if (vsize == 1) { /* horizontal sel */ - leftpix = (hsize + 1) / 2; - rightpix = (3 * hsize + 1) / 2; - toppix = 0; - bottompix = 0; - } else if (hsize == 1) { /* vertical sel */ - leftpix = 0; - rightpix = 0; - toppix = (vsize + 1) / 2; - bottompix = (3 * vsize + 1) / 2; - } else { - leftpix = (hsize + 1) / 2; - rightpix = (3 * hsize + 1) / 2; - toppix = (vsize + 1) / 2; - bottompix = (3 * vsize + 1) / 2; - } - - pixb = pixAddBorderGeneral(pixs, leftpix, rightpix, toppix, bottompix, 255); - pixt = pixCreateTemplate(pixb); - if (!pixb || !pixt) { - L_ERROR("pixb and pixt not made\n", procName); - goto cleanup; - } - - pixGetDimensions(pixt, &w, &h, NULL); - datab = pixGetData(pixb); - datat = pixGetData(pixt); - wplb = pixGetWpl(pixb); - wplt = pixGetWpl(pixt); - - buffer = (l_uint8 *)LEPT_CALLOC(L_MAX(w, h), sizeof(l_uint8)); - maxsize = L_MAX(hsize, vsize); - minarray = (l_uint8 *)LEPT_CALLOC(2 * maxsize, sizeof(l_uint8)); - if (!buffer || !minarray) { - L_ERROR("buffer and minarray not made\n", procName); - goto cleanup; - } - - if (vsize == 1) { - erodeGrayLow(datat, w, h, wplt, datab, wplb, hsize, L_HORIZ, - buffer, minarray); - } else if (hsize == 1) { - erodeGrayLow(datat, w, h, wplt, datab, wplb, vsize, L_VERT, - buffer, minarray); - } else { - erodeGrayLow(datat, w, h, wplt, datab, wplb, hsize, L_HORIZ, - buffer, minarray); - pixSetOrClearBorder(pixt, leftpix, rightpix, toppix, bottompix, - PIX_SET); - erodeGrayLow(datab, w, h, wplb, datat, wplt, vsize, L_VERT, - buffer, minarray); - pixDestroy(&pixt); - pixt = pixClone(pixb); - } - - pixd = pixRemoveBorderGeneral(pixt, leftpix, rightpix, toppix, bottompix); - if (!pixd) - L_ERROR("pixd not made\n", procName); - -cleanup: - LEPT_FREE(buffer); - LEPT_FREE(minarray); - pixDestroy(&pixb); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixDilateGray() - * - * \param[in] pixs - * \param[in] hsize of Sel; must be odd; origin implicitly in center - * \param[in] vsize ditto - * \return pixd - * - *
- * Notes:
- *      (1) Sel is a brick with all elements being hits
- *      (2) If hsize = vsize = 1, just returns a copy.
- * 
- */ -PIX * -pixDilateGray(PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -l_uint8 *buffer, *maxarray; -l_int32 w, h, wplb, wplt; -l_int32 leftpix, rightpix, toppix, bottompix, maxsize; -l_uint32 *datab, *datat; -PIX *pixb, *pixt, *pixd; - - PROCNAME("pixDilateGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize or vsize < 1", procName, NULL); - if ((hsize & 1) == 0 ) { - L_WARNING("horiz sel size must be odd; increasing by 1\n", procName); - hsize++; - } - if ((vsize & 1) == 0 ) { - L_WARNING("vert sel size must be odd; increasing by 1\n", procName); - vsize++; - } - - pixb = pixt = pixd = NULL; - buffer = maxarray = NULL; - - if (hsize == 1 && vsize == 1) - return pixCopy(NULL, pixs); - - if (vsize == 1) { /* horizontal sel */ - leftpix = (hsize + 1) / 2; - rightpix = (3 * hsize + 1) / 2; - toppix = 0; - bottompix = 0; - } else if (hsize == 1) { /* vertical sel */ - leftpix = 0; - rightpix = 0; - toppix = (vsize + 1) / 2; - bottompix = (3 * vsize + 1) / 2; - } else { - leftpix = (hsize + 1) / 2; - rightpix = (3 * hsize + 1) / 2; - toppix = (vsize + 1) / 2; - bottompix = (3 * vsize + 1) / 2; - } - - pixb = pixAddBorderGeneral(pixs, leftpix, rightpix, toppix, bottompix, 0); - pixt = pixCreateTemplate(pixb); - if (!pixb || !pixt) { - L_ERROR("pixb and pixt not made\n", procName); - goto cleanup; - } - - pixGetDimensions(pixt, &w, &h, NULL); - datab = pixGetData(pixb); - datat = pixGetData(pixt); - wplb = pixGetWpl(pixb); - wplt = pixGetWpl(pixt); - - buffer = (l_uint8 *)LEPT_CALLOC(L_MAX(w, h), sizeof(l_uint8)); - maxsize = L_MAX(hsize, vsize); - maxarray = (l_uint8 *)LEPT_CALLOC(2 * maxsize, sizeof(l_uint8)); - if (!buffer || !maxarray) { - L_ERROR("buffer and maxarray not made\n", procName); - goto cleanup; - } - - if (vsize == 1) { - dilateGrayLow(datat, w, h, wplt, datab, wplb, hsize, L_HORIZ, - buffer, maxarray); - } else if (hsize == 1) { - dilateGrayLow(datat, w, h, wplt, datab, wplb, vsize, L_VERT, - buffer, maxarray); - } else { - dilateGrayLow(datat, w, h, wplt, datab, wplb, hsize, L_HORIZ, - buffer, maxarray); - pixSetOrClearBorder(pixt, leftpix, rightpix, toppix, bottompix, - PIX_CLR); - dilateGrayLow(datab, w, h, wplb, datat, wplt, vsize, L_VERT, - buffer, maxarray); - pixDestroy(&pixt); - pixt = pixClone(pixb); - } - - pixd = pixRemoveBorderGeneral(pixt, leftpix, rightpix, toppix, bottompix); - if (!pixd) - L_ERROR("pixd not made\n", procName); - -cleanup: - LEPT_FREE(buffer); - LEPT_FREE(maxarray); - pixDestroy(&pixb); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixOpenGray() - * - * \param[in] pixs - * \param[in] hsize of Sel; must be odd; origin implicitly in center - * \param[in] vsize ditto - * \return pixd - * - *
- * Notes:
- *      (1) Sel is a brick with all elements being hits
- *      (2) If hsize = vsize = 1, just returns a copy.
- * 
- */ -PIX * -pixOpenGray(PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -l_uint8 *buffer; -l_uint8 *array; /* used to find either min or max in interval */ -l_int32 w, h, wplb, wplt; -l_int32 leftpix, rightpix, toppix, bottompix, maxsize; -l_uint32 *datab, *datat; -PIX *pixb, *pixt, *pixd; - - PROCNAME("pixOpenGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize or vsize < 1", procName, NULL); - if ((hsize & 1) == 0 ) { - L_WARNING("horiz sel size must be odd; increasing by 1\n", procName); - hsize++; - } - if ((vsize & 1) == 0 ) { - L_WARNING("vert sel size must be odd; increasing by 1\n", procName); - vsize++; - } - - pixb = pixt = pixd = NULL; - buffer = array = NULL; - - if (hsize == 1 && vsize == 1) - return pixCopy(NULL, pixs); - - if (vsize == 1) { /* horizontal sel */ - leftpix = (hsize + 1) / 2; - rightpix = (3 * hsize + 1) / 2; - toppix = 0; - bottompix = 0; - } else if (hsize == 1) { /* vertical sel */ - leftpix = 0; - rightpix = 0; - toppix = (vsize + 1) / 2; - bottompix = (3 * vsize + 1) / 2; - } else { - leftpix = (hsize + 1) / 2; - rightpix = (3 * hsize + 1) / 2; - toppix = (vsize + 1) / 2; - bottompix = (3 * vsize + 1) / 2; - } - - pixb = pixAddBorderGeneral(pixs, leftpix, rightpix, toppix, bottompix, 255); - pixt = pixCreateTemplate(pixb); - if (!pixb || !pixt) { - L_ERROR("pixb and pixt not made\n", procName); - goto cleanup; - } - - pixGetDimensions(pixt, &w, &h, NULL); - datab = pixGetData(pixb); - datat = pixGetData(pixt); - wplb = pixGetWpl(pixb); - wplt = pixGetWpl(pixt); - - buffer = (l_uint8 *)LEPT_CALLOC(L_MAX(w, h), sizeof(l_uint8)); - maxsize = L_MAX(hsize, vsize); - array = (l_uint8 *)LEPT_CALLOC(2 * maxsize, sizeof(l_uint8)); - if (!buffer || !array) { - L_ERROR("buffer and array not made\n", procName); - goto cleanup; - } - - if (vsize == 1) { - erodeGrayLow(datat, w, h, wplt, datab, wplb, hsize, L_HORIZ, - buffer, array); - pixSetOrClearBorder(pixt, leftpix, rightpix, toppix, bottompix, - PIX_CLR); - dilateGrayLow(datab, w, h, wplb, datat, wplt, hsize, L_HORIZ, - buffer, array); - } - else if (hsize == 1) { - erodeGrayLow(datat, w, h, wplt, datab, wplb, vsize, L_VERT, - buffer, array); - pixSetOrClearBorder(pixt, leftpix, rightpix, toppix, bottompix, - PIX_CLR); - dilateGrayLow(datab, w, h, wplb, datat, wplt, vsize, L_VERT, - buffer, array); - } else { - erodeGrayLow(datat, w, h, wplt, datab, wplb, hsize, L_HORIZ, - buffer, array); - pixSetOrClearBorder(pixt, leftpix, rightpix, toppix, bottompix, - PIX_SET); - erodeGrayLow(datab, w, h, wplb, datat, wplt, vsize, L_VERT, - buffer, array); - pixSetOrClearBorder(pixb, leftpix, rightpix, toppix, bottompix, - PIX_CLR); - dilateGrayLow(datat, w, h, wplt, datab, wplb, hsize, L_HORIZ, - buffer, array); - pixSetOrClearBorder(pixt, leftpix, rightpix, toppix, bottompix, - PIX_CLR); - dilateGrayLow(datab, w, h, wplb, datat, wplt, vsize, L_VERT, - buffer, array); - } - - pixd = pixRemoveBorderGeneral(pixb, leftpix, rightpix, toppix, bottompix); - if (!pixd) - L_ERROR("pixd not made\n", procName); - -cleanup: - LEPT_FREE(buffer); - LEPT_FREE(array); - pixDestroy(&pixb); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixCloseGray() - * - * \param[in] pixs - * \param[in] hsize of Sel; must be odd; origin implicitly in center - * \param[in] vsize ditto - * \return pixd - * - *
- * Notes:
- *      (1) Sel is a brick with all elements being hits
- *      (2) If hsize = vsize = 1, just returns a copy.
- * 
- */ -PIX * -pixCloseGray(PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -l_uint8 *buffer; -l_uint8 *array; /* used to find either min or max in interval */ -l_int32 w, h, wplb, wplt; -l_int32 leftpix, rightpix, toppix, bottompix, maxsize; -l_uint32 *datab, *datat; -PIX *pixb, *pixt, *pixd; - - PROCNAME("pixCloseGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize or vsize < 1", procName, NULL); - if ((hsize & 1) == 0 ) { - L_WARNING("horiz sel size must be odd; increasing by 1\n", procName); - hsize++; - } - if ((vsize & 1) == 0 ) { - L_WARNING("vert sel size must be odd; increasing by 1\n", procName); - vsize++; - } - - pixb = pixt = pixd = NULL; - buffer = array = NULL; - - if (hsize == 1 && vsize == 1) - return pixCopy(NULL, pixs); - - if (vsize == 1) { /* horizontal sel */ - leftpix = (hsize + 1) / 2; - rightpix = (3 * hsize + 1) / 2; - toppix = 0; - bottompix = 0; - } else if (hsize == 1) { /* vertical sel */ - leftpix = 0; - rightpix = 0; - toppix = (vsize + 1) / 2; - bottompix = (3 * vsize + 1) / 2; - } else { - leftpix = (hsize + 1) / 2; - rightpix = (3 * hsize + 1) / 2; - toppix = (vsize + 1) / 2; - bottompix = (3 * vsize + 1) / 2; - } - - pixb = pixAddBorderGeneral(pixs, leftpix, rightpix, toppix, bottompix, 0); - pixt = pixCreateTemplate(pixb); - if (!pixb || !pixt) { - L_ERROR("pixb and pixt not made\n", procName); - goto cleanup; - } - - pixGetDimensions(pixt, &w, &h, NULL); - datab = pixGetData(pixb); - datat = pixGetData(pixt); - wplb = pixGetWpl(pixb); - wplt = pixGetWpl(pixt); - - buffer = (l_uint8 *)LEPT_CALLOC(L_MAX(w, h), sizeof(l_uint8)); - maxsize = L_MAX(hsize, vsize); - array = (l_uint8 *)LEPT_CALLOC(2 * maxsize, sizeof(l_uint8)); - if (!buffer || !array) { - L_ERROR("buffer and array not made\n", procName); - goto cleanup; - } - - if (vsize == 1) { - dilateGrayLow(datat, w, h, wplt, datab, wplb, hsize, L_HORIZ, - buffer, array); - pixSetOrClearBorder(pixt, leftpix, rightpix, toppix, bottompix, - PIX_SET); - erodeGrayLow(datab, w, h, wplb, datat, wplt, hsize, L_HORIZ, - buffer, array); - } else if (hsize == 1) { - dilateGrayLow(datat, w, h, wplt, datab, wplb, vsize, L_VERT, - buffer, array); - pixSetOrClearBorder(pixt, leftpix, rightpix, toppix, bottompix, - PIX_SET); - erodeGrayLow(datab, w, h, wplb, datat, wplt, vsize, L_VERT, - buffer, array); - } else { - dilateGrayLow(datat, w, h, wplt, datab, wplb, hsize, L_HORIZ, - buffer, array); - pixSetOrClearBorder(pixt, leftpix, rightpix, toppix, bottompix, - PIX_CLR); - dilateGrayLow(datab, w, h, wplb, datat, wplt, vsize, L_VERT, - buffer, array); - pixSetOrClearBorder(pixb, leftpix, rightpix, toppix, bottompix, - PIX_SET); - erodeGrayLow(datat, w, h, wplt, datab, wplb, hsize, L_HORIZ, - buffer, array); - pixSetOrClearBorder(pixt, leftpix, rightpix, toppix, bottompix, - PIX_SET); - erodeGrayLow(datab, w, h, wplb, datat, wplt, vsize, L_VERT, - buffer, array); - } - - pixd = pixRemoveBorderGeneral(pixb, leftpix, rightpix, toppix, bottompix); - if (!pixd) - L_ERROR("pixd not made\n", procName); - -cleanup: - LEPT_FREE(buffer); - LEPT_FREE(array); - pixDestroy(&pixb); - pixDestroy(&pixt); - return pixd; -} - - -/*-----------------------------------------------------------------* - * Special operations for 1x3, 3x1 and 3x3 Sels * - *-----------------------------------------------------------------*/ -/*! - * \brief pixErodeGray3() - * - * \param[in] pixs 8 bpp, not cmapped - * \param[in] hsize 1 or 3 - * \param[in] vsize 1 or 3 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Special case for 1x3, 3x1 or 3x3 brick sel (all hits)
- *      (2) If hsize = vsize = 1, just returns a copy.
- *      (3) It would be nice not to add a border, but it is required
- *          if we want the same results as from the general case.
- *          We add 4 bytes on the left to speed up the copying, and
- *          8 bytes at the right and bottom to allow unrolling of
- *          the computation of 8 pixels.
- * 
- */ -PIX * -pixErodeGray3(PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -PIX *pixt, *pixb, *pixbd, *pixd; - - PROCNAME("pixErodeGray3"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pix has colormap", procName, NULL); - if ((hsize != 1 && hsize != 3) || - (vsize != 1 && vsize != 3)) - return (PIX *)ERROR_PTR("invalid size: must be 1 or 3", procName, NULL); - - if (hsize == 1 && vsize == 1) - return pixCopy(NULL, pixs); - - pixb = pixAddBorderGeneral(pixs, 4, 8, 2, 8, 255); - - if (vsize == 1) - pixbd = pixErodeGray3h(pixb); - else if (hsize == 1) - pixbd = pixErodeGray3v(pixb); - else { /* vize == hsize == 3 */ - pixt = pixErodeGray3h(pixb); - pixbd = pixErodeGray3v(pixt); - pixDestroy(&pixt); - } - - pixd = pixRemoveBorderGeneral(pixbd, 4, 8, 2, 8); - pixDestroy(&pixb); - pixDestroy(&pixbd); - return pixd; -} - - -/*! - * \brief pixErodeGray3h() - * - * \param[in] pixs 8 bpp, not cmapped - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Special case for horizontal 3x1 brick Sel;
- *          also used as the first step for the 3x3 brick Sel.
- * 
- */ -static PIX * -pixErodeGray3h(PIX *pixs) -{ -l_uint32 *datas, *datad, *lines, *lined; -l_int32 w, h, wpl, i, j; -l_int32 val0, val1, val2, val3, val4, val5, val6, val7, val8, val9, minval; -PIX *pixd; - - PROCNAME("pixErodeGray3h"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - - pixd = pixCreateTemplate(pixs); - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpl = pixGetWpl(pixs); - for (i = 0; i < h; i++) { - lines = datas + i * wpl; - lined = datad + i * wpl; - for (j = 1; j < w - 8; j += 8) { - val0 = GET_DATA_BYTE(lines, j - 1); - val1 = GET_DATA_BYTE(lines, j); - val2 = GET_DATA_BYTE(lines, j + 1); - val3 = GET_DATA_BYTE(lines, j + 2); - val4 = GET_DATA_BYTE(lines, j + 3); - val5 = GET_DATA_BYTE(lines, j + 4); - val6 = GET_DATA_BYTE(lines, j + 5); - val7 = GET_DATA_BYTE(lines, j + 6); - val8 = GET_DATA_BYTE(lines, j + 7); - val9 = GET_DATA_BYTE(lines, j + 8); - minval = L_MIN(val1, val2); - SET_DATA_BYTE(lined, j, L_MIN(val0, minval)); - SET_DATA_BYTE(lined, j + 1, L_MIN(minval, val3)); - minval = L_MIN(val3, val4); - SET_DATA_BYTE(lined, j + 2, L_MIN(val2, minval)); - SET_DATA_BYTE(lined, j + 3, L_MIN(minval, val5)); - minval = L_MIN(val5, val6); - SET_DATA_BYTE(lined, j + 4, L_MIN(val4, minval)); - SET_DATA_BYTE(lined, j + 5, L_MIN(minval, val7)); - minval = L_MIN(val7, val8); - SET_DATA_BYTE(lined, j + 6, L_MIN(val6, minval)); - SET_DATA_BYTE(lined, j + 7, L_MIN(minval, val9)); - } - } - return pixd; -} - - -/*! - * \brief pixErodeGray3v() - * - * \param[in] pixs 8 bpp, not cmapped - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Special case for vertical 1x3 brick Sel;
- *          also used as the second step for the 3x3 brick Sel.
- *      (2) Surprisingly, this is faster than setting up the
- *          lineptrs array and accessing into it; e.g.,
- *              val4 = GET_DATA_BYTE(lines8[i + 3], j);
- * 
- */ -static PIX * -pixErodeGray3v(PIX *pixs) -{ -l_uint32 *datas, *datad, *linesi, *linedi; -l_int32 w, h, wpl, i, j; -l_int32 val0, val1, val2, val3, val4, val5, val6, val7, val8, val9, minval; -PIX *pixd; - - PROCNAME("pixErodeGray3v"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - - pixd = pixCreateTemplate(pixs); - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpl = pixGetWpl(pixs); - for (j = 0; j < w; j++) { - for (i = 1; i < h - 8; i += 8) { - linesi = datas + i * wpl; - linedi = datad + i * wpl; - val0 = GET_DATA_BYTE(linesi - wpl, j); - val1 = GET_DATA_BYTE(linesi, j); - val2 = GET_DATA_BYTE(linesi + wpl, j); - val3 = GET_DATA_BYTE(linesi + 2 * wpl, j); - val4 = GET_DATA_BYTE(linesi + 3 * wpl, j); - val5 = GET_DATA_BYTE(linesi + 4 * wpl, j); - val6 = GET_DATA_BYTE(linesi + 5 * wpl, j); - val7 = GET_DATA_BYTE(linesi + 6 * wpl, j); - val8 = GET_DATA_BYTE(linesi + 7 * wpl, j); - val9 = GET_DATA_BYTE(linesi + 8 * wpl, j); - minval = L_MIN(val1, val2); - SET_DATA_BYTE(linedi, j, L_MIN(val0, minval)); - SET_DATA_BYTE(linedi + wpl, j, L_MIN(minval, val3)); - minval = L_MIN(val3, val4); - SET_DATA_BYTE(linedi + 2 * wpl, j, L_MIN(val2, minval)); - SET_DATA_BYTE(linedi + 3 * wpl, j, L_MIN(minval, val5)); - minval = L_MIN(val5, val6); - SET_DATA_BYTE(linedi + 4 * wpl, j, L_MIN(val4, minval)); - SET_DATA_BYTE(linedi + 5 * wpl, j, L_MIN(minval, val7)); - minval = L_MIN(val7, val8); - SET_DATA_BYTE(linedi + 6 * wpl, j, L_MIN(val6, minval)); - SET_DATA_BYTE(linedi + 7 * wpl, j, L_MIN(minval, val9)); - } - } - return pixd; -} - - -/*! - * \brief pixDilateGray3() - * - * \param[in] pixs 8 bpp, not cmapped - * \param[in] hsize 1 or 3 - * \param[in] vsize 1 or 3 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Special case for 1x3, 3x1 or 3x3 brick sel (all hits)
- *      (2) If hsize = vsize = 1, just returns a copy.
- * 
- */ -PIX * -pixDilateGray3(PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -PIX *pixt, *pixb, *pixbd, *pixd; - - PROCNAME("pixDilateGray3"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pix has colormap", procName, NULL); - if ((hsize != 1 && hsize != 3) || - (vsize != 1 && vsize != 3)) - return (PIX *)ERROR_PTR("invalid size: must be 1 or 3", procName, NULL); - - if (hsize == 1 && vsize == 1) - return pixCopy(NULL, pixs); - - pixb = pixAddBorderGeneral(pixs, 4, 8, 2, 8, 0); - - if (vsize == 1) - pixbd = pixDilateGray3h(pixb); - else if (hsize == 1) - pixbd = pixDilateGray3v(pixb); - else { /* vize == hsize == 3 */ - pixt = pixDilateGray3h(pixb); - pixbd = pixDilateGray3v(pixt); - pixDestroy(&pixt); - } - - pixd = pixRemoveBorderGeneral(pixbd, 4, 8, 2, 8); - pixDestroy(&pixb); - pixDestroy(&pixbd); - return pixd; -} - - -/*! - * \brief pixDilateGray3h() - * - * \param[in] pixs 8 bpp, not cmapped - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Special case for horizontal 3x1 brick Sel;
- *          also used as the first step for the 3x3 brick Sel.
- * 
- */ -static PIX * -pixDilateGray3h(PIX *pixs) -{ -l_uint32 *datas, *datad, *lines, *lined; -l_int32 w, h, wpl, i, j; -l_int32 val0, val1, val2, val3, val4, val5, val6, val7, val8, val9, maxval; -PIX *pixd; - - PROCNAME("pixDilateGray3h"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - - pixd = pixCreateTemplate(pixs); - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpl = pixGetWpl(pixs); - for (i = 0; i < h; i++) { - lines = datas + i * wpl; - lined = datad + i * wpl; - for (j = 1; j < w - 8; j += 8) { - val0 = GET_DATA_BYTE(lines, j - 1); - val1 = GET_DATA_BYTE(lines, j); - val2 = GET_DATA_BYTE(lines, j + 1); - val3 = GET_DATA_BYTE(lines, j + 2); - val4 = GET_DATA_BYTE(lines, j + 3); - val5 = GET_DATA_BYTE(lines, j + 4); - val6 = GET_DATA_BYTE(lines, j + 5); - val7 = GET_DATA_BYTE(lines, j + 6); - val8 = GET_DATA_BYTE(lines, j + 7); - val9 = GET_DATA_BYTE(lines, j + 8); - maxval = L_MAX(val1, val2); - SET_DATA_BYTE(lined, j, L_MAX(val0, maxval)); - SET_DATA_BYTE(lined, j + 1, L_MAX(maxval, val3)); - maxval = L_MAX(val3, val4); - SET_DATA_BYTE(lined, j + 2, L_MAX(val2, maxval)); - SET_DATA_BYTE(lined, j + 3, L_MAX(maxval, val5)); - maxval = L_MAX(val5, val6); - SET_DATA_BYTE(lined, j + 4, L_MAX(val4, maxval)); - SET_DATA_BYTE(lined, j + 5, L_MAX(maxval, val7)); - maxval = L_MAX(val7, val8); - SET_DATA_BYTE(lined, j + 6, L_MAX(val6, maxval)); - SET_DATA_BYTE(lined, j + 7, L_MAX(maxval, val9)); - } - } - return pixd; -} - - -/*! - * \brief pixDilateGray3v() - * - * \param[in] pixs 8 bpp, not cmapped - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Special case for vertical 1x3 brick Sel;
- *          also used as the second step for the 3x3 brick Sel.
- * 
- */ -static PIX * -pixDilateGray3v(PIX *pixs) -{ -l_uint32 *datas, *datad, *linesi, *linedi; -l_int32 w, h, wpl, i, j; -l_int32 val0, val1, val2, val3, val4, val5, val6, val7, val8, val9, maxval; -PIX *pixd; - - PROCNAME("pixDilateGray3v"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - - pixd = pixCreateTemplate(pixs); - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpl = pixGetWpl(pixs); - for (j = 0; j < w; j++) { - for (i = 1; i < h - 8; i += 8) { - linesi = datas + i * wpl; - linedi = datad + i * wpl; - val0 = GET_DATA_BYTE(linesi - wpl, j); - val1 = GET_DATA_BYTE(linesi, j); - val2 = GET_DATA_BYTE(linesi + wpl, j); - val3 = GET_DATA_BYTE(linesi + 2 * wpl, j); - val4 = GET_DATA_BYTE(linesi + 3 * wpl, j); - val5 = GET_DATA_BYTE(linesi + 4 * wpl, j); - val6 = GET_DATA_BYTE(linesi + 5 * wpl, j); - val7 = GET_DATA_BYTE(linesi + 6 * wpl, j); - val8 = GET_DATA_BYTE(linesi + 7 * wpl, j); - val9 = GET_DATA_BYTE(linesi + 8 * wpl, j); - maxval = L_MAX(val1, val2); - SET_DATA_BYTE(linedi, j, L_MAX(val0, maxval)); - SET_DATA_BYTE(linedi + wpl, j, L_MAX(maxval, val3)); - maxval = L_MAX(val3, val4); - SET_DATA_BYTE(linedi + 2 * wpl, j, L_MAX(val2, maxval)); - SET_DATA_BYTE(linedi + 3 * wpl, j, L_MAX(maxval, val5)); - maxval = L_MAX(val5, val6); - SET_DATA_BYTE(linedi + 4 * wpl, j, L_MAX(val4, maxval)); - SET_DATA_BYTE(linedi + 5 * wpl, j, L_MAX(maxval, val7)); - maxval = L_MAX(val7, val8); - SET_DATA_BYTE(linedi + 6 * wpl, j, L_MAX(val6, maxval)); - SET_DATA_BYTE(linedi + 7 * wpl, j, L_MAX(maxval, val9)); - } - } - return pixd; -} - - -/*! - * \brief pixOpenGray3() - * - * \param[in] pixs 8 bpp, not cmapped - * \param[in] hsize 1 or 3 - * \param[in] vsize 1 or 3 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Special case for 1x3, 3x1 or 3x3 brick sel (all hits)
- *      (2) If hsize = vsize = 1, just returns a copy.
- *      (3) It would be nice not to add a border, but it is required
- *          to get the same results as for the general case.
- * 
- */ -PIX * -pixOpenGray3(PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -PIX *pixt, *pixb, *pixbd, *pixd; - - PROCNAME("pixOpenGray3"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pix has colormap", procName, NULL); - if ((hsize != 1 && hsize != 3) || - (vsize != 1 && vsize != 3)) - return (PIX *)ERROR_PTR("invalid size: must be 1 or 3", procName, NULL); - - if (hsize == 1 && vsize == 1) - return pixCopy(NULL, pixs); - - pixb = pixAddBorderGeneral(pixs, 4, 8, 2, 8, 255); /* set to max */ - - if (vsize == 1) { - pixt = pixErodeGray3h(pixb); - pixSetBorderVal(pixt, 4, 8, 2, 8, 0); /* set to min */ - pixbd = pixDilateGray3h(pixt); - pixDestroy(&pixt); - } else if (hsize == 1) { - pixt = pixErodeGray3v(pixb); - pixSetBorderVal(pixt, 4, 8, 2, 8, 0); - pixbd = pixDilateGray3v(pixt); - pixDestroy(&pixt); - } else { /* vize == hsize == 3 */ - pixt = pixErodeGray3h(pixb); - pixbd = pixErodeGray3v(pixt); - pixDestroy(&pixt); - pixSetBorderVal(pixbd, 4, 8, 2, 8, 0); - pixt = pixDilateGray3h(pixbd); - pixDestroy(&pixbd); - pixbd = pixDilateGray3v(pixt); - pixDestroy(&pixt); - } - - pixd = pixRemoveBorderGeneral(pixbd, 4, 8, 2, 8); - pixDestroy(&pixb); - pixDestroy(&pixbd); - return pixd; -} - - -/*! - * \brief pixCloseGray3() - * - * \param[in] pixs 8 bpp, not cmapped - * \param[in] hsize 1 or 3 - * \param[in] vsize 1 or 3 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Special case for 1x3, 3x1 or 3x3 brick sel (all hits)
- *      (2) If hsize = vsize = 1, just returns a copy.
- * 
- */ -PIX * -pixCloseGray3(PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -PIX *pixt, *pixb, *pixbd, *pixd; - - PROCNAME("pixCloseGray3"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pix has colormap", procName, NULL); - if ((hsize != 1 && hsize != 3) || - (vsize != 1 && vsize != 3)) - return (PIX *)ERROR_PTR("invalid size: must be 1 or 3", procName, NULL); - - if (hsize == 1 && vsize == 1) - return pixCopy(NULL, pixs); - - pixb = pixAddBorderGeneral(pixs, 4, 8, 2, 8, 0); /* set to min */ - - if (vsize == 1) { - pixt = pixDilateGray3h(pixb); - pixSetBorderVal(pixt, 4, 8, 2, 8, 255); /* set to max */ - pixbd = pixErodeGray3h(pixt); - pixDestroy(&pixt); - } else if (hsize == 1) { - pixt = pixDilateGray3v(pixb); - pixSetBorderVal(pixt, 4, 8, 2, 8, 255); - pixbd = pixErodeGray3v(pixt); - pixDestroy(&pixt); - } else { /* vize == hsize == 3 */ - pixt = pixDilateGray3h(pixb); - pixbd = pixDilateGray3v(pixt); - pixDestroy(&pixt); - pixSetBorderVal(pixbd, 4, 8, 2, 8, 255); - pixt = pixErodeGray3h(pixbd); - pixDestroy(&pixbd); - pixbd = pixErodeGray3v(pixt); - pixDestroy(&pixt); - } - - pixd = pixRemoveBorderGeneral(pixbd, 4, 8, 2, 8); - pixDestroy(&pixb); - pixDestroy(&pixbd); - return pixd; -} - - -/*-----------------------------------------------------------------* - * Low-level gray morphological operations * - *-----------------------------------------------------------------*/ -/*! - * \brief dilateGrayLow() - * - * \param[in] datad 8 bpp dsst image - * \param[in] w, h dimensions of src and dest - * \param[in] wpld words/line of dest - * \param[in] datas 8 bpp src image - * \param[in] wpls words/line of src - * \param[in] size full length of SEL; restricted to odd numbers - * \param[in] direction L_HORIZ or L_VERT - * \param[in] buffer holds full line or column of src image pixels - * \param[in] maxarray array of dimension 2*size+1 - * \return void - * - *
- * Notes:
- *        (1) To eliminate border effects on the actual image, these images
- *            are prepared with an additional border of dimensions:
- *               leftpix = 0.5 * size
- *               rightpix = 1.5 * size
- *               toppix = 0.5 * size
- *               bottompix = 1.5 * size
- *            and we initialize the src border pixels to 0.
- *            This allows full processing over the actual image; at
- *            the end the border is removed.
- *        (2) Uses algorithm of van Herk, Gil and Werman
- * 
- */ -static void -dilateGrayLow(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_int32 size, - l_int32 direction, - l_uint8 *buffer, - l_uint8 *maxarray) -{ -l_int32 i, j, k; -l_int32 hsize, nsteps, startmax, startx, starty; -l_uint8 maxval; -l_uint32 *lines, *lined; - - if (direction == L_HORIZ) { - hsize = size / 2; - nsteps = (w - 2 * hsize) / size; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - - /* fill buffer with pixels in byte order */ - for (j = 0; j < w; j++) - buffer[j] = GET_DATA_BYTE(lines, j); - - for (j = 0; j < nsteps; j++) { - /* refill the minarray */ - startmax = (j + 1) * size - 1; - maxarray[size - 1] = buffer[startmax]; - for (k = 1; k < size; k++) { - maxarray[size - 1 - k] = - L_MAX(maxarray[size - k], buffer[startmax - k]); - maxarray[size - 1 + k] = - L_MAX(maxarray[size + k - 2], buffer[startmax + k]); - } - - /* compute dilation values */ - startx = hsize + j * size; - SET_DATA_BYTE(lined, startx, maxarray[0]); - SET_DATA_BYTE(lined, startx + size - 1, maxarray[2 * size - 2]); - for (k = 1; k < size - 1; k++) { - maxval = L_MAX(maxarray[k], maxarray[k + size - 1]); - SET_DATA_BYTE(lined, startx + k, maxval); - } - } - } - } else { /* direction == L_VERT */ - hsize = size / 2; - nsteps = (h - 2 * hsize) / size; - for (j = 0; j < w; j++) { - /* fill buffer with pixels in byte order */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - buffer[i] = GET_DATA_BYTE(lines, j); - } - - for (i = 0; i < nsteps; i++) { - /* refill the minarray */ - startmax = (i + 1) * size - 1; - maxarray[size - 1] = buffer[startmax]; - for (k = 1; k < size; k++) { - maxarray[size - 1 - k] = - L_MAX(maxarray[size - k], buffer[startmax - k]); - maxarray[size - 1 + k] = - L_MAX(maxarray[size + k - 2], buffer[startmax + k]); - } - - /* compute dilation values */ - starty = hsize + i * size; - lined = datad + starty * wpld; - SET_DATA_BYTE(lined, j, maxarray[0]); - SET_DATA_BYTE(lined + (size - 1) * wpld, j, - maxarray[2 * size - 2]); - for (k = 1; k < size - 1; k++) { - maxval = L_MAX(maxarray[k], maxarray[k + size - 1]); - SET_DATA_BYTE(lined + wpld * k, j, maxval); - } - } - } - } - - return; -} - - -/*! - * \brief erodeGrayLow() - * - * \param[in] datad 8 bpp dsst image - * \param[in] w, h dimensions of src and dest - * \param[in] wpld words/line of dest - * \param[in] datas 8 bpp src image - * \param[in] wpls words/line of src - * \param[in] size full length of SEL; restricted to odd numbers - * \param[in] direction L_HORIZ or L_VERT - * \param[in] buffer holds full line or column of src image pixels - * \param[in] minarray array of dimension 2*size+1 - * \return void - * - *
- * Notes:
- *        (1) See notes in dilateGrayLow()
- * 
- */ -static void -erodeGrayLow(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_int32 size, - l_int32 direction, - l_uint8 *buffer, - l_uint8 *minarray) -{ -l_int32 i, j, k; -l_int32 hsize, nsteps, startmin, startx, starty; -l_uint8 minval; -l_uint32 *lines, *lined; - - if (direction == L_HORIZ) { - hsize = size / 2; - nsteps = (w - 2 * hsize) / size; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - - /* fill buffer with pixels in byte order */ - for (j = 0; j < w; j++) - buffer[j] = GET_DATA_BYTE(lines, j); - - for (j = 0; j < nsteps; j++) { - /* refill the minarray */ - startmin = (j + 1) * size - 1; - minarray[size - 1] = buffer[startmin]; - for (k = 1; k < size; k++) { - minarray[size - 1 - k] = - L_MIN(minarray[size - k], buffer[startmin - k]); - minarray[size - 1 + k] = - L_MIN(minarray[size + k - 2], buffer[startmin + k]); - } - - /* compute erosion values */ - startx = hsize + j * size; - SET_DATA_BYTE(lined, startx, minarray[0]); - SET_DATA_BYTE(lined, startx + size - 1, minarray[2 * size - 2]); - for (k = 1; k < size - 1; k++) { - minval = L_MIN(minarray[k], minarray[k + size - 1]); - SET_DATA_BYTE(lined, startx + k, minval); - } - } - } - } else { /* direction == L_VERT */ - hsize = size / 2; - nsteps = (h - 2 * hsize) / size; - for (j = 0; j < w; j++) { - /* fill buffer with pixels in byte order */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - buffer[i] = GET_DATA_BYTE(lines, j); - } - - for (i = 0; i < nsteps; i++) { - /* refill the minarray */ - startmin = (i + 1) * size - 1; - minarray[size - 1] = buffer[startmin]; - for (k = 1; k < size; k++) { - minarray[size - 1 - k] = - L_MIN(minarray[size - k], buffer[startmin - k]); - minarray[size - 1 + k] = - L_MIN(minarray[size + k - 2], buffer[startmin + k]); - } - - /* compute erosion values */ - starty = hsize + i * size; - lined = datad + starty * wpld; - SET_DATA_BYTE(lined, j, minarray[0]); - SET_DATA_BYTE(lined + (size - 1) * wpld, j, - minarray[2 * size - 2]); - for (k = 1; k < size - 1; k++) { - minval = L_MIN(minarray[k], minarray[k + size - 1]); - SET_DATA_BYTE(lined + wpld * k, j, minval); - } - } - } - } - - return; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/grayquant.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/grayquant.c deleted file mode 100644 index e9d1b9fa..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/grayquant.c +++ /dev/null @@ -1,2912 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file grayquant.c - *
- *
- *      Thresholding from 8 bpp to 1 bpp
- *
- *          Floyd-Steinberg dithering to binary
- *              PIX         *pixDitherToBinary()
- *              PIX         *pixDitherToBinarySpec()
- *              static void  ditherToBinaryLow()
- *              void         ditherToBinaryLineLow()
- *
- *          Simple (pixelwise) binarization with fixed threshold
- *              PIX         *pixThresholdToBinary()
- *              static void  thresholdToBinaryLow()
- *              void         thresholdToBinaryLineLow()
- *
- *          Binarization with variable threshold
- *              PIX         *pixVarThresholdToBinary()
- *
- *          Binarization by adaptive mapping
- *              PIX         *pixAdaptThresholdToBinary()
- *              PIX         *pixAdaptThresholdToBinaryGen()
- *
- *          Generate a binary mask from pixels of particular values
- *              PIX         *pixGenerateMaskByValue()
- *              PIX         *pixGenerateMaskByBand()
- *
- *      Thresholding from 8 bpp to 2 bpp
- *
- *          Floyd-Steinberg-like dithering to 2 bpp
- *              PIX         *pixDitherTo2bpp()
- *              PIX         *pixDitherTo2bppSpec()
- *              static void  ditherTo2bppLow()
- *              static void  ditherTo2bppLineLow()
- *              static l_int32  make8To2DitherTables()
- *
- *          Simple (pixelwise) thresholding to 2 bpp with optional cmap
- *              PIX         *pixThresholdTo2bpp()
- *              static void  thresholdTo2bppLow()
- *
- *      Simple (pixelwise) thresholding from 8 bpp to 4 bpp
- *              PIX         *pixThresholdTo4bpp()
- *              static void  thresholdTo4bppLow()
- *
- *      Simple (pixelwise) quantization on 8 bpp grayscale
- *              PIX         *pixThresholdOn8bpp()
- *
- *      Arbitrary (pixelwise) thresholding from 8 bpp to 2, 4 or 8 bpp
- *              PIX         *pixThresholdGrayArb()
- *
- *      Quantization tables for linear thresholds of grayscale images
- *              l_int32     *makeGrayQuantIndexTable()
- *              static l_int32  *makeGrayQuantTargetTable()
- *
- *      Quantization table for arbitrary thresholding of grayscale images
- *              l_int32      makeGrayQuantTableArb()
- *              static l_int32   makeGrayQuantColormapArb()
- *
- *      Thresholding from 32 bpp rgb to 1 bpp
- *      (really color quantization, but it's better placed in this file)
- *              PIX         *pixGenerateMaskByBand32()
- *              PIX         *pixGenerateMaskByDiscr32()
- *
- *      Histogram-based grayscale quantization
- *              PIX         *pixGrayQuantFromHisto()
- *              static l_int32  numaFillCmapFromHisto()
- *
- *      Color quantize grayscale image using existing colormap
- *              PIX         *pixGrayQuantFromCmap()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -static void ditherToBinaryLow(l_uint32 *datad, l_int32 w, l_int32 h, - l_int32 wpld, l_uint32 *datas, l_int32 wpls, - l_uint32 *bufs1, l_uint32 *bufs2, - l_int32 lowerclip, l_int32 upperclip); -static void thresholdToBinaryLow(l_uint32 *datad, l_int32 w, l_int32 h, - l_int32 wpld, l_uint32 *datas, l_int32 d, - l_int32 wpls, l_int32 thresh); -static void ditherTo2bppLow(l_uint32 *datad, l_int32 w, l_int32 h, l_int32 wpld, - l_uint32 *datas, l_int32 wpls, l_uint32 *bufs1, - l_uint32 *bufs2, l_int32 *tabval, l_int32 *tab38, - l_int32 *tab14); -static void ditherTo2bppLineLow(l_uint32 *lined, l_int32 w, l_uint32 *bufs1, - l_uint32 *bufs2, l_int32 *tabval, - l_int32 *tab38, l_int32 *tab14, - l_int32 lastlineflag); -static l_int32 make8To2DitherTables(l_int32 **ptabval, l_int32 **ptab38, - l_int32 **ptab14, l_int32 cliptoblack, - l_int32 cliptowhite); -static void thresholdTo2bppLow(l_uint32 *datad, l_int32 h, l_int32 wpld, - l_uint32 *datas, l_int32 wpls, l_int32 *tab); -static void thresholdTo4bppLow(l_uint32 *datad, l_int32 h, l_int32 wpld, - l_uint32 *datas, l_int32 wpls, l_int32 *tab); -static l_int32 *makeGrayQuantTargetTable(l_int32 nlevels, l_int32 depth); -static l_int32 makeGrayQuantColormapArb(PIX *pixs, l_int32 *tab, - l_int32 outdepth, PIXCMAP **pcmap); -static l_int32 numaFillCmapFromHisto(NUMA *na, PIXCMAP *cmap, - l_float32 minfract, l_int32 maxsize, - l_int32 **plut); - -#ifndef NO_CONSOLE_IO -#define DEBUG_UNROLLING 0 -#endif /* ~NO_CONSOLE_IO */ - -/*------------------------------------------------------------------* - * Binarization by Floyd-Steinberg dithering * - *------------------------------------------------------------------*/ -/*! - * \brief pixDitherToBinary() - * - * \param[in] pixs - * \return pixd dithered binary, or NULL on error - * - * The Floyd-Steinberg error diffusion dithering algorithm - * binarizes an 8 bpp grayscale image to a threshold of 128. - * If a pixel has a value above 127, it is binarized to white - * and the excess below 255 is subtracted from three - * neighboring pixels in the fractions 3/8 to i, j+1, - * 3/8 to i+1, j) and 1/4 to (i+1,j+1, truncating to 0 - * if necessary. Likewise, if it the pixel has a value - * below 128, it is binarized to black and the excess above 0 - * is added to the neighboring pixels, truncating to 255 if necessary. - * - * This function differs from straight dithering in that it allows - * clipping of grayscale to 0 or 255 if the values are - * sufficiently close, without distribution of the excess. - * This uses default values to specify the range of lower - * and upper values near 0 and 255, rsp that are clipped - * to black and white without propagating the excess. - * Not propagating the excess has the effect of reducing the - * snake patterns in parts of the image that are nearly black or white; - * however, it also prevents the attempt to reproduce gray for those values. - * - * The implementation is straightforward. It uses a pair of - * line buffers to avoid changing pixs. It is about the same speed - * as pixDitherToBinaryLUT(), which uses three LUTs. - */ -PIX * -pixDitherToBinary(PIX *pixs) -{ - PROCNAME("pixDitherToBinary"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("must be 8 bpp for dithering", procName, NULL); - - return pixDitherToBinarySpec(pixs, DEFAULT_CLIP_LOWER_1, - DEFAULT_CLIP_UPPER_1); -} - - -/*! - * \brief pixDitherToBinarySpec() - * - * \param[in] pixs - * \param[in] lowerclip lower clip distance to black; use 0 for default - * \param[in] upperclip upper clip distance to white; use 0 for default - * \return pixd dithered binary, or NULL on error - * - *
- * Notes:
- *      (1) See comments above in pixDitherToBinary() for details.
- *      (2) The input parameters lowerclip and upperclip specify the range
- *          of lower and upper values (near 0 and 255, rsp) that are
- *          clipped to black and white without propagating the excess.
- *          For that reason, lowerclip and upperclip should be small numbers.
- * 
- */ -PIX * -pixDitherToBinarySpec(PIX *pixs, - l_int32 lowerclip, - l_int32 upperclip) -{ -l_int32 w, h, d, wplt, wpld; -l_uint32 *datat, *datad; -l_uint32 *bufs1, *bufs2; -PIX *pixt, *pixd; - - PROCNAME("pixDitherToBinarySpec"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("must be 8 bpp for dithering", procName, NULL); - if (lowerclip < 0 || lowerclip > 255) - return (PIX *)ERROR_PTR("invalid value for lowerclip", procName, NULL); - if (upperclip < 0 || upperclip > 255) - return (PIX *)ERROR_PTR("invalid value for upperclip", procName, NULL); - - if ((pixd = pixCreate(w, h, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* Remove colormap if it exists */ - if ((pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE)) == NULL) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - } - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - - /* Two line buffers, 1 for current line and 2 for next line */ - bufs1 = (l_uint32 *)LEPT_CALLOC(wplt, sizeof(l_uint32)); - bufs2 = (l_uint32 *)LEPT_CALLOC(wplt, sizeof(l_uint32)); - if (!bufs1 || !bufs2) { - LEPT_FREE(bufs1); - LEPT_FREE(bufs2); - pixDestroy(&pixd); - pixDestroy(&pixt); - return (PIX *)ERROR_PTR("bufs1, bufs2 not both made", procName, NULL); - } - - ditherToBinaryLow(datad, w, h, wpld, datat, wplt, bufs1, bufs2, - lowerclip, upperclip); - - LEPT_FREE(bufs1); - LEPT_FREE(bufs2); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief ditherToBinaryLow() - * - * See comments in pixDitherToBinary() in binarize.c - */ -static void -ditherToBinaryLow(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_uint32 *bufs1, - l_uint32 *bufs2, - l_int32 lowerclip, - l_int32 upperclip) -{ -l_int32 i; -l_uint32 *lined; - - /* do all lines except last line */ - memcpy(bufs2, datas, 4 * wpls); /* prime the buffer */ - for (i = 0; i < h - 1; i++) { - memcpy(bufs1, bufs2, 4 * wpls); - memcpy(bufs2, datas + (i + 1) * wpls, 4 * wpls); - lined = datad + i * wpld; - ditherToBinaryLineLow(lined, w, bufs1, bufs2, lowerclip, upperclip, 0); - } - - /* do last line */ - memcpy(bufs1, bufs2, 4 * wpls); - lined = datad + (h - 1) * wpld; - ditherToBinaryLineLow(lined, w, bufs1, bufs2, lowerclip, upperclip, 1); -} - - -/*! - * \brief ditherToBinaryLineLow() - * - * \param[in] lined ptr to beginning of dest line - * \param[in] w width of image in pixels - * \param[in] bufs1 buffer of current source line - * \param[in] bufs2 buffer of next source line - * \param[in] lowerclip lower clip distance to black - * \param[in] upperclip upper clip distance to white - * \param[in] lastlineflag 0 if not last dest line, 1 if last dest line - * \return void - * - * Dispatches FS error diffusion dithering for - * a single line of the image. If lastlineflag == 0, - * both source buffers are used; otherwise, only bufs1 - * is used. We use source buffers because the error - * is propagated into them, and we don't want to change - * the input src image. - * - * We break dithering out line by line to make it - * easier to combine functions like interpolative - * scaling and error diffusion dithering, as such a - * combination of operations obviates the need to - * generate a 2x grayscale image as an intermediary. - */ -void -ditherToBinaryLineLow(l_uint32 *lined, - l_int32 w, - l_uint32 *bufs1, - l_uint32 *bufs2, - l_int32 lowerclip, - l_int32 upperclip, - l_int32 lastlineflag) -{ -l_int32 j; -l_int32 oval, eval; -l_uint8 fval1, fval2, rval, bval, dval; - - if (lastlineflag == 0) { - for (j = 0; j < w - 1; j++) { - oval = GET_DATA_BYTE(bufs1, j); - if (oval > 127) { /* binarize to OFF */ - if ((eval = 255 - oval) > upperclip) { - /* subtract from neighbors */ - fval1 = (3 * eval) / 8; - fval2 = eval / 4; - rval = GET_DATA_BYTE(bufs1, j + 1); - rval = L_MAX(0, rval - fval1); - SET_DATA_BYTE(bufs1, j + 1, rval); - bval = GET_DATA_BYTE(bufs2, j); - bval = L_MAX(0, bval - fval1); - SET_DATA_BYTE(bufs2, j, bval); - dval = GET_DATA_BYTE(bufs2, j + 1); - dval = L_MAX(0, dval - fval2); - SET_DATA_BYTE(bufs2, j + 1, dval); - } - } else { /* oval <= 127; binarize to ON */ - SET_DATA_BIT(lined, j); /* ON pixel */ - if (oval > lowerclip) { - /* add to neighbors */ - fval1 = (3 * oval) / 8; - fval2 = oval / 4; - rval = GET_DATA_BYTE(bufs1, j + 1); - rval = L_MIN(255, rval + fval1); - SET_DATA_BYTE(bufs1, j + 1, rval); - bval = GET_DATA_BYTE(bufs2, j); - bval = L_MIN(255, bval + fval1); - SET_DATA_BYTE(bufs2, j, bval); - dval = GET_DATA_BYTE(bufs2, j + 1); - dval = L_MIN(255, dval + fval2); - SET_DATA_BYTE(bufs2, j + 1, dval); - } - } - } - - /* do last column: j = w - 1 */ - oval = GET_DATA_BYTE(bufs1, j); - if (oval > 127) { /* binarize to OFF */ - if ((eval = 255 - oval) > upperclip) { - /* subtract from neighbors */ - fval1 = (3 * eval) / 8; - bval = GET_DATA_BYTE(bufs2, j); - bval = L_MAX(0, bval - fval1); - SET_DATA_BYTE(bufs2, j, bval); - } - } else { /*oval <= 127; binarize to ON */ - SET_DATA_BIT(lined, j); /* ON pixel */ - if (oval > lowerclip) { - /* add to neighbors */ - fval1 = (3 * oval) / 8; - bval = GET_DATA_BYTE(bufs2, j); - bval = L_MIN(255, bval + fval1); - SET_DATA_BYTE(bufs2, j, bval); - } - } - } else { /* lastlineflag == 1 */ - for (j = 0; j < w - 1; j++) { - oval = GET_DATA_BYTE(bufs1, j); - if (oval > 127) { /* binarize to OFF */ - if ((eval = 255 - oval) > upperclip) { - /* subtract from neighbors */ - fval1 = (3 * eval) / 8; - rval = GET_DATA_BYTE(bufs1, j + 1); - rval = L_MAX(0, rval - fval1); - SET_DATA_BYTE(bufs1, j + 1, rval); - } - } else { /* oval <= 127; binarize to ON */ - SET_DATA_BIT(lined, j); /* ON pixel */ - if (oval > lowerclip) { - /* add to neighbors */ - fval1 = (3 * oval) / 8; - rval = GET_DATA_BYTE(bufs1, j + 1); - rval = L_MIN(255, rval + fval1); - SET_DATA_BYTE(bufs1, j + 1, rval); - } - } - } - - /* do last pixel: (i, j) = (h - 1, w - 1) */ - oval = GET_DATA_BYTE(bufs1, j); - if (oval < 128) - SET_DATA_BIT(lined, j); /* ON pixel */ - } -} - - -/*------------------------------------------------------------------* - * Simple (pixelwise) binarization with fixed threshold * - *------------------------------------------------------------------*/ -/*! - * \brief pixThresholdToBinary() - * - * \param[in] pixs 4 or 8 bpp - * \param[in] thresh threshold value - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) If the source pixel is less than the threshold value,
- *          the dest will be 1; otherwise, it will be 0.
- *      (2) For example, for 8 bpp src pix, if %thresh == 256, the dest
- *          1 bpp pix is all ones (fg), and if %thresh == 0, the dest
- *          pix is all zeros (bg).
- *
- * 
- */ -PIX * -pixThresholdToBinary(PIX *pixs, - l_int32 thresh) -{ -l_int32 d, w, h, wplt, wpld; -l_uint32 *datat, *datad; -PIX *pixt, *pixd; - - PROCNAME("pixThresholdToBinary"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 4 && d != 8) - return (PIX *)ERROR_PTR("pixs must be 4 or 8 bpp", procName, NULL); - if (thresh < 0) - return (PIX *)ERROR_PTR("thresh must be non-negative", procName, NULL); - if (d == 4 && thresh > 16) - return (PIX *)ERROR_PTR("4 bpp thresh not in {0-16}", procName, NULL); - if (d == 8 && thresh > 256) - return (PIX *)ERROR_PTR("8 bpp thresh not in {0-256}", procName, NULL); - - if ((pixd = pixCreate(w, h, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* Remove colormap if it exists. If there is a colormap, - * pixt will be 8 bpp regardless of the depth of pixs. */ - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - if (pixGetColormap(pixs) && d == 4) { /* promoted to 8 bpp */ - d = 8; - thresh *= 16; - } - - thresholdToBinaryLow(datad, w, h, wpld, datat, d, wplt, thresh); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief thresholdToBinaryLow() - * - * If the source pixel is less than thresh, - * the dest will be 1; otherwise, it will be 0 - */ -static void -thresholdToBinaryLow(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 d, - l_int32 wpls, - l_int32 thresh) -{ -l_int32 i; -l_uint32 *lines, *lined; - - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - thresholdToBinaryLineLow(lined, w, lines, d, thresh); - } -} - - -/* - * thresholdToBinaryLineLow() - * - */ -void -thresholdToBinaryLineLow(l_uint32 *lined, - l_int32 w, - l_uint32 *lines, - l_int32 d, - l_int32 thresh) -{ -l_int32 j, k, gval, scount, dcount; -l_uint32 sword, dword; - - PROCNAME("thresholdToBinaryLineLow"); - - switch (d) - { - case 4: - /* Unrolled as 4 source words, 1 dest word */ - for (j = 0, scount = 0, dcount = 0; j + 31 < w; j += 32) { - dword = 0; - for (k = 0; k < 4; k++) { - sword = lines[scount++]; - dword <<= 8; - gval = (sword >> 28) & 0xf; - /* Trick used here and below: if gval < thresh then - * gval - thresh < 0, so its high-order bit is 1, and - * ((gval - thresh) >> 31) & 1 == 1; likewise, if - * gval >= thresh, then ((gval - thresh) >> 31) & 1 == 0 - * Doing it this way avoids a random (and thus easily - * mispredicted) branch on each pixel. */ - dword |= ((gval - thresh) >> 24) & 128; - gval = (sword >> 24) & 0xf; - dword |= ((gval - thresh) >> 25) & 64; - gval = (sword >> 20) & 0xf; - dword |= ((gval - thresh) >> 26) & 32; - gval = (sword >> 16) & 0xf; - dword |= ((gval - thresh) >> 27) & 16; - gval = (sword >> 12) & 0xf; - dword |= ((gval - thresh) >> 28) & 8; - gval = (sword >> 8) & 0xf; - dword |= ((gval - thresh) >> 29) & 4; - gval = (sword >> 4) & 0xf; - dword |= ((gval - thresh) >> 30) & 2; - gval = sword & 0xf; - dword |= ((gval - thresh) >> 31) & 1; - } - lined[dcount++] = dword; - } - - if (j < w) { - dword = 0; - for (; j < w; j++) { - if ((j & 7) == 0) { - sword = lines[scount++]; - } - gval = (sword >> 28) & 0xf; - sword <<= 4; - dword |= (((gval - thresh) >> 31) & 1) << (31 - (j & 31)); - } - lined[dcount] = dword; - } -#if DEBUG_UNROLLING -#define CHECK_BIT(a, b, c) if (GET_DATA_BIT(a, b) != c) { \ - lept_stderr("Error: mismatch at %d/%d(%d), %d vs %d\n", \ - j, w, d, GET_DATA_BIT(a, b), c); } - for (j = 0; j < w; j++) { - gval = GET_DATA_QBIT(lines, j); - CHECK_BIT(lined, j, gval < thresh ? 1 : 0); - } -#endif - break; - case 8: - /* Unrolled as 8 source words, 1 dest word */ - for (j = 0, scount = 0, dcount = 0; j + 31 < w; j += 32) { - dword = 0; - for (k = 0; k < 8; k++) { - sword = lines[scount++]; - dword <<= 4; - gval = (sword >> 24) & 0xff; - dword |= ((gval - thresh) >> 28) & 8; - gval = (sword >> 16) & 0xff; - dword |= ((gval - thresh) >> 29) & 4; - gval = (sword >> 8) & 0xff; - dword |= ((gval - thresh) >> 30) & 2; - gval = sword & 0xff; - dword |= ((gval - thresh) >> 31) & 1; - } - lined[dcount++] = dword; - } - - if (j < w) { - dword = 0; - for (; j < w; j++) { - if ((j & 3) == 0) { - sword = lines[scount++]; - } - gval = (sword >> 24) & 0xff; - sword <<= 8; - dword |= (l_uint64)(((gval - thresh) >> 31) & 1) - << (31 - (j & 31)); - } - lined[dcount] = dword; - } -#if DEBUG_UNROLLING - for (j = 0; j < w; j++) { - gval = GET_DATA_BYTE(lines, j); - CHECK_BIT(lined, j, gval < thresh ? 1 : 0); - } -#undef CHECK_BIT -#endif - break; - default: - L_ERROR("src depth not 4 or 8 bpp\n", procName); - break; - } -} - - -/*------------------------------------------------------------------* - * Binarization with variable threshold * - *------------------------------------------------------------------*/ -/*! - * \brief pixVarThresholdToBinary() - * - * \param[in] pixs 8 bpp - * \param[in] pixg 8 bpp; contains threshold values for each pixel - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) If the pixel in pixs is less than the corresponding pixel
- *          in pixg, the dest will be 1; otherwise it will be 0.
- * 
- */ -PIX * -pixVarThresholdToBinary(PIX *pixs, - PIX *pixg) -{ -l_int32 i, j, vals, valg, w, h, d, wpls, wplg, wpld; -l_uint32 *datas, *datag, *datad, *lines, *lineg, *lined; -PIX *pixd; - - PROCNAME("pixVarThresholdToBinary"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!pixg) - return (PIX *)ERROR_PTR("pixg not defined", procName, NULL); - if (!pixSizesEqual(pixs, pixg)) - return (PIX *)ERROR_PTR("pix sizes not equal", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pixs must be 8 bpp", procName, NULL); - - pixd = pixCreate(w, h, 1); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datag = pixGetData(pixg); - wplg = pixGetWpl(pixg); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lineg = datag + i * wplg; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - vals = GET_DATA_BYTE(lines, j); - valg = GET_DATA_BYTE(lineg, j); - if (vals < valg) - SET_DATA_BIT(lined, j); - } - } - - return pixd; -} - - -/*------------------------------------------------------------------* - * Binarization by adaptive mapping * - *------------------------------------------------------------------*/ -/*! - * \brief pixAdaptThresholdToBinary() - * - * \param[in] pixs 8 bpp - * \param[in] pixm [optional] 1 bpp image mask; can be null - * \param[in] gamma gamma correction; must be > 0.0; typically ~1.0 - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This is a simple convenience function for doing adaptive
- *          thresholding on a grayscale image with variable background.
- *          It uses default parameters appropriate for typical text images.
- *      (2) %pixm is a 1 bpp mask over "image" regions, which are not
- *          expected to have a white background.  The mask inhibits
- *          background finding under the fg pixels of the mask.  For
- *          images with both text and image, the image regions would
- *          be binarized (or quantized) by a different set of operations.
- *      (3) As %gamma is increased, the foreground pixels are reduced.
- *      (4) Under the covers:  The default background value for normalization
- *          is 200, so we choose 170 for 'maxval' in pixGammaTRC.  Likewise,
- *          the default foreground threshold for normalization is 60,
- *          so we choose 50 for 'minval' in pixGammaTRC.  Because
- *          170 was mapped to 255, choosing 200 for the threshold is
- *          quite safe for avoiding speckle noise from the background.
- * 
- */ -PIX * -pixAdaptThresholdToBinary(PIX *pixs, - PIX *pixm, - l_float32 gamma) -{ - PROCNAME("pixAdaptThresholdToBinary"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - - return pixAdaptThresholdToBinaryGen(pixs, pixm, gamma, 50, 170, 200); -} - - -/*! - * \brief pixAdaptThresholdToBinaryGen() - * - * \param[in] pixs 8 bpp - * \param[in] pixm [optional] 1 bpp image mask; can be null - * \param[in] gamma gamma correction; must be > 0.0; typically ~1.0 - * \param[in] blackval dark value to set to black (0) - * \param[in] whiteval light value to set to white (255) - * \param[in] thresh final threshold for binarization - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This is a convenience function for doing adaptive thresholding
- *          on a grayscale image with variable background.  Also see notes
- *          in pixAdaptThresholdToBinary().
- *      (2) Reducing %gamma increases the foreground (text) pixels.
- *          Use a low value (e.g., 0.5) for images with light text.
- *      (3) For normal images, see default args in pixAdaptThresholdToBinary().
- *          For images with very light text, these values are appropriate:
- *             gamma     ~0.5
- *             blackval  ~70
- *             whiteval  ~190
- *             thresh    ~200
- * 
- */ -PIX * -pixAdaptThresholdToBinaryGen(PIX *pixs, - PIX *pixm, - l_float32 gamma, - l_int32 blackval, - l_int32 whiteval, - l_int32 thresh) -{ -PIX *pix1, *pixd; - - PROCNAME("pixAdaptThresholdToBinaryGen"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - - pix1 = pixBackgroundNormSimple(pixs, pixm, NULL); - pixGammaTRC(pix1, pix1, gamma, blackval, whiteval); - pixd = pixThresholdToBinary(pix1, thresh); - pixDestroy(&pix1); - return pixd; -} - - -/*--------------------------------------------------------------------* - * Generate a binary mask from pixels of particular value(s) * - *--------------------------------------------------------------------*/ -/*! - * \brief pixGenerateMaskByValue() - * - * \param[in] pixs 2, 4 or 8 bpp, or colormapped - * \param[in] val of pixels for which we set 1 in dest - * \param[in] usecmap 1 to retain cmap values; 0 to convert to gray - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) %val is the pixel value that we are selecting.  It can be
- *          either a gray value or a colormap index.
- *      (2) If pixs is colormapped, %usecmap determines if the colormap
- *          index values are used, or if the colormap is removed to gray and
- *          the gray values are used.  For the latter, it generates
- *          an approximate grayscale value for each pixel, and then looks
- *          for gray pixels with the value %val.
- * 
- */ -PIX * -pixGenerateMaskByValue(PIX *pixs, - l_int32 val, - l_int32 usecmap) -{ -l_int32 i, j, w, h, d, wplg, wpld; -l_uint32 *datag, *datad, *lineg, *lined; -PIX *pixg, *pixd; - - PROCNAME("pixGenerateMaskByValue"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 2 && d != 4 && d != 8) - return (PIX *)ERROR_PTR("not 2, 4 or 8 bpp", procName, NULL); - - if (!usecmap && pixGetColormap(pixs)) - pixg = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - else - pixg = pixClone(pixs); - pixGetDimensions(pixg, &w, &h, &d); - if (d == 8 && (val < 0 || val > 255)) { - pixDestroy(&pixg); - return (PIX *)ERROR_PTR("val out of 8 bpp range", procName, NULL); - } - if (d == 4 && (val < 0 || val > 15)) { - pixDestroy(&pixg); - return (PIX *)ERROR_PTR("val out of 4 bpp range", procName, NULL); - } - if (d == 2 && (val < 0 || val > 3)) { - pixDestroy(&pixg); - return (PIX *)ERROR_PTR("val out of 2 bpp range", procName, NULL); - } - - pixd = pixCreate(w, h, 1); - pixCopyResolution(pixd, pixg); - pixCopyInputFormat(pixd, pixs); - datag = pixGetData(pixg); - wplg = pixGetWpl(pixg); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lineg = datag + i * wplg; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - if (d == 8) { - if (GET_DATA_BYTE(lineg, j) == val) - SET_DATA_BIT(lined, j); - } else if (d == 4) { - if (GET_DATA_QBIT(lineg, j) == val) - SET_DATA_BIT(lined, j); - } else { /* d == 2 */ - if (GET_DATA_DIBIT(lineg, j) == val) - SET_DATA_BIT(lined, j); - } - } - } - - pixDestroy(&pixg); - return pixd; -} - - -/*! - * \brief pixGenerateMaskByBand() - * - * \param[in] pixs 2, 4 or 8 bpp, or colormapped - * \param[in] lower, upper two pixel values from which a range, either - * between (inband) or outside of (!inband), - * determines which pixels in pixs cause us to - * set a 1 in the dest mask - * \param[in] inband 1 for finding pixels in [lower, upper]; - * 0 for finding pixels in - * [0, lower) union (upper, 255] - * \param[in] usecmap 1 to retain cmap values; 0 to convert to gray - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) Generates a 1 bpp mask pixd, the same size as pixs, where
- *          the fg pixels in the mask are those either within the specified
- *          band (for inband == 1) or outside the specified band
- *          (for inband == 0).
- *      (2) If pixs is colormapped, %usecmap determines if the colormap
- *          values are used, or if the colormap is removed to gray and
- *          the gray values are used.  For the latter, it generates
- *          an approximate grayscale value for each pixel, and then looks
- *          for gray pixels with the value %val.
- * 
- */ -PIX * -pixGenerateMaskByBand(PIX *pixs, - l_int32 lower, - l_int32 upper, - l_int32 inband, - l_int32 usecmap) -{ -l_int32 i, j, w, h, d, wplg, wpld, val; -l_uint32 *datag, *datad, *lineg, *lined; -PIX *pixg, *pixd; - - PROCNAME("pixGenerateMaskByBand"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 2 && d != 4 && d != 8) - return (PIX *)ERROR_PTR("not 2, 4 or 8 bpp", procName, NULL); - if (lower < 0 || lower > upper) - return (PIX *)ERROR_PTR("lower < 0 or lower > upper!", procName, NULL); - - if (!usecmap && pixGetColormap(pixs)) - pixg = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - else - pixg = pixClone(pixs); - pixGetDimensions(pixg, &w, &h, &d); - if (d == 8 && upper > 255) { - pixDestroy(&pixg); - return (PIX *)ERROR_PTR("d == 8 and upper > 255", procName, NULL); - } - if (d == 4 && upper > 15) { - pixDestroy(&pixg); - return (PIX *)ERROR_PTR("d == 4 and upper > 15", procName, NULL); - } - if (d == 2 && upper > 3) { - pixDestroy(&pixg); - return (PIX *)ERROR_PTR("d == 2 and upper > 3", procName, NULL); - } - - pixd = pixCreate(w, h, 1); - pixCopyResolution(pixd, pixg); - pixCopyInputFormat(pixd, pixs); - datag = pixGetData(pixg); - wplg = pixGetWpl(pixg); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lineg = datag + i * wplg; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - if (d == 8) - val = GET_DATA_BYTE(lineg, j); - else if (d == 4) - val = GET_DATA_QBIT(lineg, j); - else /* d == 2 */ - val = GET_DATA_DIBIT(lineg, j); - if (inband) { - if (val >= lower && val <= upper) - SET_DATA_BIT(lined, j); - } else { /* out of band */ - if (val < lower || val > upper) - SET_DATA_BIT(lined, j); - } - } - } - - pixDestroy(&pixg); - return pixd; -} - - -/*------------------------------------------------------------------* - * Thresholding to 2 bpp by dithering * - *------------------------------------------------------------------*/ -/*! - * \brief pixDitherTo2bpp() - * - * \param[in] pixs 8 bpp - * \param[in] cmapflag 1 to generate a colormap - * \return pixd dithered 2 bpp, or NULL on error - * - * An analog of the Floyd-Steinberg error diffusion dithering - * algorithm is used to "dibitize" an 8 bpp grayscale image - * to 2 bpp, using equally spaced gray values of 0, 85, 170, and 255, - * which are served by thresholds of 43, 128 and 213. - * If cmapflag == 1, the colormap values are set to 0, 85, 170 and 255. - * If a pixel has a value between 0 and 42, it is dibitized - * to 0, and the excess above 0 is added to the - * three neighboring pixels, in the fractions 3/8 to i, j+1, - * 3/8 to i+1, j) and 1/4 to (i+1, j+1, truncating to 255 if - * necessary. If a pixel has a value between 43 and 127, it is - * dibitized to 1, and the excess above 85 is added to the three - * neighboring pixels as before. If the value is below 85, the - * excess is subtracted. With a value between 128 - * and 212, it is dibitized to 2, with the excess on either side - * of 170 distributed as before. Finally, with a value between - * 213 and 255, it is dibitized to 3, with the excess below 255 - * subtracted from the neighbors. We always truncate to 0 or 255. - * The details can be seen in the lookup table generation. - * - * This function differs from straight dithering in that it allows - * clipping of grayscale to 0 or 255 if the values are - * sufficiently close, without distribution of the excess. - * This uses default values from pix.h to specify the range of lower - * and upper values near 0 and 255, rsp that are clipped to black - * and white without propagating the excess. - * Not propagating the excess has the effect of reducing the snake - * patterns in parts of the image that are nearly black or white; - * however, it also prevents any attempt to reproduce gray for those values. - * - * The implementation uses 3 lookup tables for simplicity, and - * a pair of line buffers to avoid modifying pixs. - */ -PIX * -pixDitherTo2bpp(PIX *pixs, - l_int32 cmapflag) -{ - PROCNAME("pixDitherTo2bpp"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("must be 8 bpp for dithering", procName, NULL); - - return pixDitherTo2bppSpec(pixs, DEFAULT_CLIP_LOWER_2, - DEFAULT_CLIP_UPPER_2, cmapflag); -} - - -/*! - * \brief pixDitherTo2bppSpec() - * - * \param[in] pixs 8 bpp - * \param[in] lowerclip lower clip distance to black; use 0 for default - * \param[in] upperclip upper clip distance to white; use 0 for default - * \param[in] cmapflag 1 to generate a colormap - * \return pixd dithered 2 bpp, or NULL on error - * - *
- * Notes:
- *      (1) See comments above in pixDitherTo2bpp() for details.
- *      (2) The input parameters lowerclip and upperclip specify the range
- *          of lower and upper values (near 0 and 255, rsp) that are
- *          clipped to black and white without propagating the excess.
- *          For that reason, lowerclip and upperclip should be small numbers.
- * 
- */ -PIX * -pixDitherTo2bppSpec(PIX *pixs, - l_int32 lowerclip, - l_int32 upperclip, - l_int32 cmapflag) -{ -l_int32 w, h, d, wplt, wpld; -l_int32 *tabval, *tab38, *tab14; -l_uint32 *datat, *datad; -l_uint32 *bufs1, *bufs2; -PIX *pixt, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixDitherTo2bppSpec"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("must be 8 bpp for dithering", procName, NULL); - if (lowerclip < 0 || lowerclip > 255) - return (PIX *)ERROR_PTR("invalid value for lowerclip", procName, NULL); - if (upperclip < 0 || upperclip > 255) - return (PIX *)ERROR_PTR("invalid value for upperclip", procName, NULL); - - if ((pixd = pixCreate(w, h, 2)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* If there is a colormap, remove it */ - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - - /* Two line buffers, 1 for current line and 2 for next line */ - bufs1 = (l_uint32 *)LEPT_CALLOC(wplt, sizeof(l_uint32)); - bufs2 = (l_uint32 *)LEPT_CALLOC(wplt, sizeof(l_uint32)); - if (!bufs1 || !bufs2) { - LEPT_FREE(bufs1); - LEPT_FREE(bufs2); - pixDestroy(&pixd); - pixDestroy(&pixt); - return (PIX *)ERROR_PTR("bufs1, bufs2 not both made", procName, NULL); - } - - /* 3 lookup tables: 2-bit value, (3/8)excess, and (1/4)excess */ - make8To2DitherTables(&tabval, &tab38, &tab14, lowerclip, upperclip); - - ditherTo2bppLow(datad, w, h, wpld, datat, wplt, bufs1, bufs2, - tabval, tab38, tab14); - - if (cmapflag) { - cmap = pixcmapCreateLinear(2, 4); - pixSetColormap(pixd, cmap); - } - - LEPT_FREE(bufs1); - LEPT_FREE(bufs2); - LEPT_FREE(tabval); - LEPT_FREE(tab38); - LEPT_FREE(tab14); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief ditherTo2bppLow() - * - * Low-level function for doing Floyd-Steinberg error diffusion - * dithering from 8 bpp (datas) to 2 bpp (datad). Two source - * line buffers, bufs1 and bufs2, are provided, along with three - * 256-entry lookup tables: tabval gives the output pixel value, - * tab38 gives the extra (plus or minus) transferred to the pixels - * directly to the left and below, and tab14 gives the extra - * transferred to the diagonal below. The choice of 3/8 and 1/4 - * is traditional but arbitrary when you use a lookup table; the - * only constraint is that the sum is 1. See other comments - * below and in grayquant.c. - */ -static void -ditherTo2bppLow(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_uint32 *bufs1, - l_uint32 *bufs2, - l_int32 *tabval, - l_int32 *tab38, - l_int32 *tab14) -{ -l_int32 i; -l_uint32 *lined; - - /* do all lines except last line */ - memcpy(bufs2, datas, 4 * wpls); /* prime the buffer */ - for (i = 0; i < h - 1; i++) { - memcpy(bufs1, bufs2, 4 * wpls); - memcpy(bufs2, datas + (i + 1) * wpls, 4 * wpls); - lined = datad + i * wpld; - ditherTo2bppLineLow(lined, w, bufs1, bufs2, tabval, tab38, tab14, 0); - } - - /* do last line */ - memcpy(bufs1, bufs2, 4 * wpls); - lined = datad + (h - 1) * wpld; - ditherTo2bppLineLow(lined, w, bufs1, bufs2, tabval, tab38, tab14, 1); -} - - -/*! - * \brief ditherTo2bppLineLow() - * - * \param[in] lined ptr to beginning of dest line - * \param[in] w width of image in pixels - * \param[in] bufs1 buffer of current source line - * \param[in] bufs2 buffer of next source line - * \param[in] tabval value to assign for current pixel - * \param[in] tab38 excess value to give to neighboring 3/8 pixels - * \param[in] tab14 excess value to give to neighboring 1/4 pixel - * \param[in] lastlineflag 0 if not last dest line, 1 if last dest line - * \return void - * - * Dispatches error diffusion dithering for - * a single line of the image. If lastlineflag == 0, - * both source buffers are used; otherwise, only bufs1 - * is used. We use source buffers because the error - * is propagated into them, and we don't want to change - * the input src image. - * - * We break dithering out line by line to make it - * easier to combine functions like interpolative - * scaling and error diffusion dithering, as such a - * combination of operations obviates the need to - * generate a 2x grayscale image as an intermediary. - */ -static void -ditherTo2bppLineLow(l_uint32 *lined, - l_int32 w, - l_uint32 *bufs1, - l_uint32 *bufs2, - l_int32 *tabval, - l_int32 *tab38, - l_int32 *tab14, - l_int32 lastlineflag) -{ -l_int32 j; -l_int32 oval, tab38val, tab14val; -l_uint8 rval, bval, dval; - - if (lastlineflag == 0) { - for (j = 0; j < w - 1; j++) { - oval = GET_DATA_BYTE(bufs1, j); - SET_DATA_DIBIT(lined, j, tabval[oval]); - rval = GET_DATA_BYTE(bufs1, j + 1); - bval = GET_DATA_BYTE(bufs2, j); - dval = GET_DATA_BYTE(bufs2, j + 1); - tab38val = tab38[oval]; - tab14val = tab14[oval]; - if (tab38val < 0) { - rval = L_MAX(0, rval + tab38val); - bval = L_MAX(0, bval + tab38val); - dval = L_MAX(0, dval + tab14val); - } else { - rval = L_MIN(255, rval + tab38val); - bval = L_MIN(255, bval + tab38val); - dval = L_MIN(255, dval + tab14val); - } - SET_DATA_BYTE(bufs1, j + 1, rval); - SET_DATA_BYTE(bufs2, j, bval); - SET_DATA_BYTE(bufs2, j + 1, dval); - } - - /* do last column: j = w - 1 */ - oval = GET_DATA_BYTE(bufs1, j); - SET_DATA_DIBIT(lined, j, tabval[oval]); - bval = GET_DATA_BYTE(bufs2, j); - tab38val = tab38[oval]; - if (tab38val < 0) - bval = L_MAX(0, bval + tab38val); - else - bval = L_MIN(255, bval + tab38val); - SET_DATA_BYTE(bufs2, j, bval); - } else { /* lastlineflag == 1 */ - for (j = 0; j < w - 1; j++) { - oval = GET_DATA_BYTE(bufs1, j); - SET_DATA_DIBIT(lined, j, tabval[oval]); - rval = GET_DATA_BYTE(bufs1, j + 1); - tab38val = tab38[oval]; - if (tab38val < 0) - rval = L_MAX(0, rval + tab38val); - else - rval = L_MIN(255, rval + tab38val); - SET_DATA_BYTE(bufs1, j + 1, rval); - } - - /* do last pixel: (i, j) = (h - 1, w - 1) */ - oval = GET_DATA_BYTE(bufs1, j); - SET_DATA_DIBIT(lined, j, tabval[oval]); - } -} - - -/*! - * \brief make8To2DitherTables() - * - * \param[out] ptabval value assigned to output pixel; 0, 1, 2 or 3 - * \param[out] ptab38 amount propagated to pixels left and below - * \param[out] ptab14 amount propagated to pixel to left and down - * \param[in] cliptoblack values near 0 where the excess is not propagated - * \param[in] cliptowhite values near 255 where the deficit is not propagated - * - * \return 0 if OK, 1 on error - */ -static l_int32 -make8To2DitherTables(l_int32 **ptabval, - l_int32 **ptab38, - l_int32 **ptab14, - l_int32 cliptoblack, - l_int32 cliptowhite) -{ -l_int32 i; -l_int32 *tabval, *tab38, *tab14; - - /* 3 lookup tables: 2-bit value, (3/8)excess, and (1/4)excess */ - tabval = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - tab38 = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - tab14 = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - *ptabval = tabval; - *ptab38 = tab38; - *ptab14 = tab14; - - for (i = 0; i < 256; i++) { - if (i <= cliptoblack) { - tabval[i] = 0; - tab38[i] = 0; - tab14[i] = 0; - } else if (i < 43) { - tabval[i] = 0; - tab38[i] = (3 * i + 4) / 8; - tab14[i] = (i + 2) / 4; - } else if (i < 85) { - tabval[i] = 1; - tab38[i] = (3 * (i - 85) - 4) / 8; - tab14[i] = ((i - 85) - 2) / 4; - } else if (i < 128) { - tabval[i] = 1; - tab38[i] = (3 * (i - 85) + 4) / 8; - tab14[i] = ((i - 85) + 2) / 4; - } else if (i < 170) { - tabval[i] = 2; - tab38[i] = (3 * (i - 170) - 4) / 8; - tab14[i] = ((i - 170) - 2) / 4; - } else if (i < 213) { - tabval[i] = 2; - tab38[i] = (3 * (i - 170) + 4) / 8; - tab14[i] = ((i - 170) + 2) / 4; - } else if (i < 255 - cliptowhite) { - tabval[i] = 3; - tab38[i] = (3 * (i - 255) - 4) / 8; - tab14[i] = ((i - 255) - 2) / 4; - } else { /* i >= 255 - cliptowhite */ - tabval[i] = 3; - tab38[i] = 0; - tab14[i] = 0; - } - } - - return 0; -} - - -/*--------------------------------------------------------------------* - * Simple (pixelwise) thresholding to 2 bpp with optional colormap * - *--------------------------------------------------------------------*/ -/*! - * \brief pixThresholdTo2bpp() - * - * \param[in] pixs 8 bpp - * \param[in] nlevels equally spaced; must be between 2 and 4 - * \param[in] cmapflag 1 to build colormap; 0 otherwise - * \return pixd 2 bpp, optionally with colormap, or NULL on error - * - *
- * Notes:
- *      (1) Valid values for nlevels is the set {2, 3, 4}.
- *      (2) Any colormap on the input pixs is removed to 8 bpp grayscale.
- *      (3) This function is typically invoked with cmapflag == 1.
- *          In the situation where no colormap is desired, nlevels is
- *          ignored and pixs is thresholded to 4 levels.
- *      (4) The target output colors are equally spaced, with the
- *          darkest at 0 and the lightest at 255.  The thresholds are
- *          chosen halfway between adjacent output values.  A table
- *          is built that specifies the mapping from src to dest.
- *      (5) If cmapflag == 1, a colormap of size 'nlevels' is made,
- *          and the pixel values in pixs are replaced by their
- *          appropriate color indices.  The number of holdouts,
- *          4 - nlevels, will be between 0 and 2.
- *      (6) If you don't want the thresholding to be equally spaced,
- *          either first transform the 8 bpp src using pixGammaTRC().
- *          or, if cmapflag == 1, after calling this function you can use
- *          pixcmapResetColor() to change any individual colors.
- *      (7) If a colormap is generated, it will specify (to display
- *          programs) exactly how each level is to be represented in RGB
- *          space.  When representing text, 3 levels is far better than
- *          2 because of the antialiasing of the single gray level,
- *          and 4 levels (black, white and 2 gray levels) is getting
- *          close to the perceptual quality of a (nearly continuous)
- *          grayscale image.  With 2 bpp, you can set up a colormap
- *          and allocate from 2 to 4 levels to represent antialiased text.
- *          Any left over colormap entries can be used for coloring regions.
- *          For the same number of levels, the file size of a 2 bpp image
- *          is about 10% smaller than that of a 4 bpp result for the same
- *          number of levels.  For both 2 bpp and 4 bpp, using 4 levels you
- *          get compression far better than that of jpeg, because the
- *          quantization to 4 levels will remove the jpeg ringing in the
- *          background near character edges.
- * 
- */ -PIX * -pixThresholdTo2bpp(PIX *pixs, - l_int32 nlevels, - l_int32 cmapflag) -{ -l_int32 *qtab; -l_int32 w, h, d, wplt, wpld; -l_uint32 *datat, *datad; -PIX *pixt, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixThresholdTo2bpp"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (nlevels < 2 || nlevels > 4) - return (PIX *)ERROR_PTR("nlevels not in {2, 3, 4}", procName, NULL); - - if ((pixd = pixCreate(w, h, 2)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - if (cmapflag) { /* hold out (4 - nlevels) cmap entries */ - cmap = pixcmapCreateLinear(2, nlevels); - pixSetColormap(pixd, cmap); - } - - /* If there is a colormap in the src, remove it */ - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - - /* Make the appropriate table */ - if (cmapflag) - qtab = makeGrayQuantIndexTable(nlevels); - else - qtab = makeGrayQuantTargetTable(4, 2); - - thresholdTo2bppLow(datad, h, wpld, datat, wplt, qtab); - - LEPT_FREE(qtab); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief thresholdTo2bppLow() - * - * Low-level function for thresholding from 8 bpp (datas) to - * 2 bpp (datad), using thresholds implicitly defined through %tab, - * a 256-entry lookup table that gives a 2-bit output value - * for each possible input. - * - * For each line, unroll the loop so that for each 32 bit src word, - * representing four consecutive 8-bit pixels, we compose one byte - * of output consisiting of four 2-bit pixels. - */ -static void -thresholdTo2bppLow(l_uint32 *datad, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_int32 *tab) -{ -l_uint8 sval1, sval2, sval3, sval4, dval; -l_int32 i, j, k; -l_uint32 *lines, *lined; - - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < wpls; j++) { - k = 4 * j; - sval1 = GET_DATA_BYTE(lines, k); - sval2 = GET_DATA_BYTE(lines, k + 1); - sval3 = GET_DATA_BYTE(lines, k + 2); - sval4 = GET_DATA_BYTE(lines, k + 3); - dval = (tab[sval1] << 6) | (tab[sval2] << 4) | - (tab[sval3] << 2) | tab[sval4]; - SET_DATA_BYTE(lined, j, dval); - } - } -} - - -/*----------------------------------------------------------------------* - * Simple (pixelwise) thresholding to 4 bpp * - *----------------------------------------------------------------------*/ -/*! - * \brief pixThresholdTo4bpp() - * - * \param[in] pixs 8 bpp, can have colormap - * \param[in] nlevels equally spaced; must be between 2 and 16 - * \param[in] cmapflag 1 to build colormap; 0 otherwise - * \return pixd 4 bpp, optionally with colormap, or NULL on error - * - *
- * Notes:
- *      (1) Valid values for nlevels is the set {2, ... 16}.
- *      (2) Any colormap on the input pixs is removed to 8 bpp grayscale.
- *      (3) This function is typically invoked with cmapflag == 1.
- *          In the situation where no colormap is desired, nlevels is
- *          ignored and pixs is thresholded to 16 levels.
- *      (4) The target output colors are equally spaced, with the
- *          darkest at 0 and the lightest at 255.  The thresholds are
- *          chosen halfway between adjacent output values.  A table
- *          is built that specifies the mapping from src to dest.
- *      (5) If cmapflag == 1, a colormap of size 'nlevels' is made,
- *          and the pixel values in pixs are replaced by their
- *          appropriate color indices.  The number of holdouts,
- *          16 - nlevels, will be between 0 and 14.
- *      (6) If you don't want the thresholding to be equally spaced,
- *          either first transform the 8 bpp src using pixGammaTRC().
- *          or, if cmapflag == 1, after calling this function you can use
- *          pixcmapResetColor() to change any individual colors.
- *      (7) If a colormap is generated, it will specify, to display
- *          programs, exactly how each level is to be represented in RGB
- *          space.  When representing text, 3 levels is far better than
- *          2 because of the antialiasing of the single gray level,
- *          and 4 levels (black, white and 2 gray levels) is getting
- *          close to the perceptual quality of a (nearly continuous)
- *          grayscale image.  Therefore, with 4 bpp, you can set up a
- *          colormap, allocate a relatively small fraction of the 16
- *          possible values to represent antialiased text, and use the
- *          other colormap entries for other things, such as coloring
- *          text or background.  Two other reasons for using a small number
- *          of gray values for antialiased text are (1) PNG compression
- *          gets worse as the number of levels that are used is increased,
- *          and (2) using a small number of levels will filter out most of
- *          the jpeg ringing that is typically introduced near sharp edges
- *          of text.  This filtering is partly responsible for the improved
- *          compression.
- * 
- */ -PIX * -pixThresholdTo4bpp(PIX *pixs, - l_int32 nlevels, - l_int32 cmapflag) -{ -l_int32 *qtab; -l_int32 w, h, d, wplt, wpld; -l_uint32 *datat, *datad; -PIX *pixt, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixThresholdTo4bpp"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (nlevels < 2 || nlevels > 16) - return (PIX *)ERROR_PTR("nlevels not in [2,...,16]", procName, NULL); - - if ((pixd = pixCreate(w, h, 4)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - if (cmapflag) { /* hold out (16 - nlevels) cmap entries */ - cmap = pixcmapCreateLinear(4, nlevels); - pixSetColormap(pixd, cmap); - } - - /* If there is a colormap in the src, remove it */ - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - - /* Make the appropriate table */ - if (cmapflag) - qtab = makeGrayQuantIndexTable(nlevels); - else - qtab = makeGrayQuantTargetTable(16, 4); - - thresholdTo4bppLow(datad, h, wpld, datat, wplt, qtab); - - LEPT_FREE(qtab); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief thresholdTo4bppLow() - * - * Low-level function for thresholding from 8 bpp (datas) to - * 4 bpp (datad), using thresholds implicitly defined through %tab, - * a 256-entry lookup table that gives a 4-bit output value - * for each possible input. - * - * For each line, unroll the loop so that for each 32 bit src word, - * representing four consecutive 8-bit pixels, we compose two bytes - * of output consisiting of four 4-bit pixels. - */ -static void -thresholdTo4bppLow(l_uint32 *datad, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_int32 *tab) -{ -l_uint8 sval1, sval2, sval3, sval4; -l_uint16 dval; -l_int32 i, j, k; -l_uint32 *lines, *lined; - - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < wpls; j++) { - k = 4 * j; - sval1 = GET_DATA_BYTE(lines, k); - sval2 = GET_DATA_BYTE(lines, k + 1); - sval3 = GET_DATA_BYTE(lines, k + 2); - sval4 = GET_DATA_BYTE(lines, k + 3); - dval = (tab[sval1] << 12) | (tab[sval2] << 8) | - (tab[sval3] << 4) | tab[sval4]; - SET_DATA_TWO_BYTES(lined, j, dval); - } - } -} - - -/*----------------------------------------------------------------------* - * Simple (pixelwise) thresholding on 8 bpp with optional colormap * - *----------------------------------------------------------------------*/ -/*! - * \brief pixThresholdOn8bpp() - * - * \param[in] pixs 8 bpp, can have colormap - * \param[in] nlevels equally spaced; must be between 2 and 256 - * \param[in] cmapflag 1 to build colormap; 0 otherwise - * \return pixd 8 bpp, optionally with colormap, or NULL on error - * - *
- * Notes:
- *      (1) Valid values for nlevels is the set {2,...,256}.
- *      (2) Any colormap on the input pixs is removed to 8 bpp grayscale.
- *      (3) If cmapflag == 1, a colormap of size 'nlevels' is made,
- *          and the pixel values in pixs are replaced by their
- *          appropriate color indices.  Otherwise, the pixel values
- *          are the actual thresholded (i.e., quantized) grayscale values.
- *      (4) If you don't want the thresholding to be equally spaced,
- *          first transform the input 8 bpp src using pixGammaTRC().
- * 
- */ -PIX * -pixThresholdOn8bpp(PIX *pixs, - l_int32 nlevels, - l_int32 cmapflag) -{ -l_int32 *qtab; /* quantization table */ -l_int32 i, j, w, h, wpld, val, newval; -l_uint32 *datad, *lined; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixThresholdOn8bpp"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (nlevels < 2 || nlevels > 256) - return (PIX *)ERROR_PTR("nlevels not in [2,...,256]", procName, NULL); - - /* Get a new pixd; if there is a colormap in the src, remove it */ - if (pixGetColormap(pixs)) - pixd = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - else - pixd = pixCopy(NULL, pixs); - - if (cmapflag) { /* hold out (256 - nlevels) cmap entries */ - cmap = pixcmapCreateLinear(8, nlevels); - pixSetColormap(pixd, cmap); - } - - if (cmapflag) - qtab = makeGrayQuantIndexTable(nlevels); - else - qtab = makeGrayQuantTargetTable(nlevels, 8); - - pixGetDimensions(pixd, &w, &h, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(lined, j); - newval = qtab[val]; - SET_DATA_BYTE(lined, j, newval); - } - } - - LEPT_FREE(qtab); - return pixd; -} - - -/*----------------------------------------------------------------------* - * Arbitrary (pixelwise) thresholding from 8 bpp to 2, 4 or 8 bpp * - *----------------------------------------------------------------------*/ -/*! - * \brief pixThresholdGrayArb() - * - * \param[in] pixs 8 bpp grayscale; can have colormap - * \param[in] edgevals string giving edge value of each bin - * \param[in] outdepth 0, 2, 4 or 8 bpp; 0 is default for min depth - * \param[in] use_average 1 if use the average pixel value in colormap - * \param[in] setblack 1 if darkest color is set to black - * \param[in] setwhite 1 if lightest color is set to white - * \return pixd 2, 4 or 8 bpp quantized image with colormap, - * or NULL on error - * - *
- * Notes:
- *      (1) This function allows exact specification of the quantization bins.
- *          The string %edgevals is a space-separated set of values
- *          specifying the dividing points between output quantization bins.
- *          These threshold values are assigned to the bin with higher
- *          values, so that each of them is the smallest value in their bin.
- *      (2) The output image (pixd) depth is specified by %outdepth.  The
- *          number of bins is the number of edgevals + 1.  The
- *          relation between outdepth and the number of bins is:
- *               outdepth = 2       nbins <= 4
- *               outdepth = 4       nbins <= 16
- *               outdepth = 8       nbins <= 256
- *          With %outdepth == 0, the minimum required depth for the
- *          given number of bins is used.
- *          The output pixd has a colormap.
- *      (3) The last 3 args determine the specific values that go into
- *          the colormap.
- *      (4) For %use_average:
- *            ~ if TRUE, the average value of pixels falling in the bin is
- *              chosen as the representative gray value.  Otherwise,
- *            ~ if FALSE, the central value of each bin is chosen as
- *              the representative value.
- *          The colormap holds the representative value.
- *      (5) For %setblack, if TRUE the darkest color is set to (0,0,0).
- *      (6) For %setwhite, if TRUE the lightest color is set to (255,255,255).
- *      (7) An alternative to using this function to quantize to
- *          unequally-spaced bins is to first transform the 8 bpp pixs
- *          using pixGammaTRC(), and follow this with pixThresholdTo4bpp().
- * 
- */ -PIX * -pixThresholdGrayArb(PIX *pixs, - const char *edgevals, - l_int32 outdepth, - l_int32 use_average, - l_int32 setblack, - l_int32 setwhite) -{ -l_int32 *qtab; -l_int32 w, h, d, i, j, n, wplt, wpld, val, newval; -l_uint32 *datat, *datad, *linet, *lined; -NUMA *na; -PIX *pixt, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixThresholdGrayArb"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (!edgevals) - return (PIX *)ERROR_PTR("edgevals not defined", procName, NULL); - if (outdepth != 0 && outdepth != 2 && outdepth != 4 && outdepth != 8) - return (PIX *)ERROR_PTR("invalid outdepth", procName, NULL); - - /* Parse and sort (if required) the bin edge values */ - na = parseStringForNumbers(edgevals, " \t\n,"); - n = numaGetCount(na); - if (n > 255) { - numaDestroy(&na); - return (PIX *)ERROR_PTR("more than 256 levels", procName, NULL); - } - if (outdepth == 0) { - if (n <= 3) - outdepth = 2; - else if (n <= 15) - outdepth = 4; - else - outdepth = 8; - } else if (n + 1 > (1 << outdepth)) { - L_WARNING("outdepth too small; setting to 8 bpp\n", procName); - outdepth = 8; - } - numaSort(na, na, L_SORT_INCREASING); - - /* Make the quantization LUT and the colormap */ - makeGrayQuantTableArb(na, outdepth, &qtab, &cmap); - if (use_average) { /* use the average value in each bin */ - pixcmapDestroy(&cmap); - makeGrayQuantColormapArb(pixs, qtab, outdepth, &cmap); - } - pixcmapSetBlackAndWhite(cmap, setblack, setwhite); - numaDestroy(&na); - - if ((pixd = pixCreate(w, h, outdepth)) == NULL) { - LEPT_FREE(qtab); - pixcmapDestroy(&cmap); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - pixSetColormap(pixd, cmap); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* If there is a colormap in the src, remove it */ - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - - if (outdepth == 2) { - thresholdTo2bppLow(datad, h, wpld, datat, wplt, qtab); - } else if (outdepth == 4) { - thresholdTo4bppLow(datad, h, wpld, datat, wplt, qtab); - } else { - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - linet = datat + i * wplt; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(linet, j); - newval = qtab[val]; - SET_DATA_BYTE(lined, j, newval); - } - } - } - - LEPT_FREE(qtab); - pixDestroy(&pixt); - return pixd; -} - - -/*----------------------------------------------------------------------* - * Quantization tables for linear thresholds of grayscale images * - *----------------------------------------------------------------------*/ -/*! - * \brief makeGrayQuantIndexTable() - * - * \param[in] nlevels number of output levels - * \return table maps input gray level to colormap index, - * or NULL on error - *
- * Notes:
- *      (1) 'nlevels' is some number between 2 and 256 (typically 8 or less).
- *      (2) The table is typically used for quantizing 2, 4 and 8 bpp
- *          grayscale src pix, and generating a colormapped dest pix.
- * 
- */ -l_int32 * -makeGrayQuantIndexTable(l_int32 nlevels) -{ -l_int32 *tab; -l_int32 i, j, thresh; - - tab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - for (i = 0; i < 256; i++) { - for (j = 0; j < nlevels; j++) { - thresh = 255 * (2 * j + 1) / (2 * nlevels - 2); - if (i <= thresh) { - tab[i] = j; -/* lept_stderr("tab[%d] = %d\n", i, j); */ - break; - } - } - } - return tab; -} - - -/*! - * \brief makeGrayQuantTargetTable() - * - * \param[in] nlevels number of output levels - * \param[in] depth of dest pix, in bpp; 2, 4 or 8 bpp - * \return table maps input gray level to thresholded gray level, - * or NULL on error - * - *
- * Notes:
- *      (1) nlevels is some number between 2 and 2^(depth)
- *      (2) The table is used in two similar ways:
- *           ~ for 8 bpp, it quantizes to a given number of target levels
- *           ~ for 2 and 4 bpp, it thresholds to appropriate target values
- *             that will use the full dynamic range of the dest pix.
- *      (3) For depth = 8, the number of thresholds chosen is
- *          ('nlevels' - 1), and the 'nlevels' values stored in the
- *          table are at the two at the extreme ends, (0, 255), plus
- *          plus ('nlevels' - 2) values chosen at equal intervals between.
- *          For example, for depth = 8 and 'nlevels' = 3, the two
- *          threshold values are 3f and bf, and the three target pixel
- *          values are 0, 7f and ff.
- *      (4) For depth < 8, we ignore nlevels, and always use the maximum
- *          number of levels, which is 2^(depth).
- *          If you want nlevels < the maximum number, you should always
- *          use a colormap.
- * 
- */ -static l_int32 * -makeGrayQuantTargetTable(l_int32 nlevels, - l_int32 depth) -{ -l_int32 *tab; -l_int32 i, j, thresh, maxval, quantval; - - tab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - maxval = (1 << depth) - 1; - if (depth < 8) - nlevels = 1 << depth; - for (i = 0; i < 256; i++) { - for (j = 0; j < nlevels; j++) { - thresh = 255 * (2 * j + 1) / (2 * nlevels - 2); - if (i <= thresh) { - quantval = maxval * j / (nlevels - 1); - tab[i] = quantval; -/* lept_stderr("tab[%d] = %d\n", i, tab[i]); */ - break; - } - } - } - return tab; -} - - -/*----------------------------------------------------------------------* - * Quantization table for arbitrary thresholding of grayscale images * - *----------------------------------------------------------------------*/ -/*! - * \brief makeGrayQuantTableArb() - * - * \param[in] na numa of bin boundaries - * \param[in] outdepth of colormap: 1, 2, 4 or 8 - * \param[out] ptab table mapping input gray level to cmap index - * \param[out] pcmap colormap - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The number of bins is the count of %na + 1.
- *      (2) The bin boundaries in na must be sorted in increasing order.
- *      (3) The table is an inverse colormap: it maps input gray level
- *          to colormap index (the bin number).
- *      (4) The colormap generated here has quantized values at the
- *          center of each bin.  If you want to use the average gray
- *          value of pixels within the bin, discard the colormap and
- *          compute it using makeGrayQuantColormapArb().
- *      (5) Returns an error if there are not enough levels in the
- *          output colormap for the number of bins.  The number
- *          of bins must not exceed 2^outdepth.
- * 
- */ -l_ok -makeGrayQuantTableArb(NUMA *na, - l_int32 outdepth, - l_int32 **ptab, - PIXCMAP **pcmap) -{ -l_int32 i, j, n, jstart, ave, val; -l_int32 *tab; -PIXCMAP *cmap; - - PROCNAME("makeGrayQuantTableArb"); - - if (!ptab) - return ERROR_INT("&tab not defined", procName, 1); - *ptab = NULL; - if (!pcmap) - return ERROR_INT("&cmap not defined", procName, 1); - *pcmap = NULL; - if (!na) - return ERROR_INT("na not defined", procName, 1); - n = numaGetCount(na); - if (n + 1 > (1 << outdepth)) - return ERROR_INT("more bins than cmap levels", procName, 1); - - if ((cmap = pixcmapCreate(outdepth)) == NULL) - return ERROR_INT("cmap not made", procName, 1); - tab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - *ptab = tab; - *pcmap = cmap; - - /* First n bins */ - jstart = 0; - for (i = 0; i < n; i++) { - numaGetIValue(na, i, &val); - ave = (jstart + val) / 2; - pixcmapAddColor(cmap, ave, ave, ave); - for (j = jstart; j < val; j++) - tab[j] = i; - jstart = val; - } - - /* Last bin */ - ave = (jstart + 255) / 2; - pixcmapAddColor(cmap, ave, ave, ave); - for (j = jstart; j < 256; j++) - tab[j] = n; - - return 0; -} - - -/*! - * \brief makeGrayQuantColormapArb() - * - * \param[in] pixs 8 bpp - * \param[in] tab table mapping input gray level to cmap index - * \param[in] outdepth of colormap: 1, 2, 4 or 8 - * \param[out] pcmap colormap - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The table is a 256-entry inverse colormap: it maps input gray
- *          level to colormap index (the bin number).  It is computed
- *          using makeGrayQuantTableArb().
- *      (2) The colormap generated here has quantized values at the
- *          average gray value of the pixels that are in each bin.
- *      (3) Returns an error if there are not enough levels in the
- *          output colormap for the number of bins.  The number
- *          of bins must not exceed 2^outdepth.
- * 
- */ -static l_int32 -makeGrayQuantColormapArb(PIX *pixs, - l_int32 *tab, - l_int32 outdepth, - PIXCMAP **pcmap) -{ -l_int32 i, j, index, w, h, d, nbins, wpl, factor, val; -l_int32 *bincount, *binave, *binstart; -l_uint32 *line, *data; - - PROCNAME("makeGrayQuantColormapArb"); - - if (!pcmap) - return ERROR_INT("&cmap not defined", procName, 1); - *pcmap = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return ERROR_INT("pixs not 8 bpp", procName, 1); - if (!tab) - return ERROR_INT("tab not defined", procName, 1); - nbins = tab[255] + 1; - if (nbins > (1 << outdepth)) - return ERROR_INT("more bins than cmap levels", procName, 1); - - /* Find the count and weighted count for each bin */ - if ((bincount = (l_int32 *)LEPT_CALLOC(nbins, sizeof(l_int32))) == NULL) - return ERROR_INT("calloc fail for bincount", procName, 1); - if ((binave = (l_int32 *)LEPT_CALLOC(nbins, sizeof(l_int32))) == NULL) { - LEPT_FREE(bincount); - return ERROR_INT("calloc fail for binave", procName, 1); - } - factor = (l_int32)(sqrt((l_float64)(w * h) / 30000.) + 0.5); - factor = L_MAX(1, factor); - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - for (i = 0; i < h; i += factor) { - line = data + i * wpl; - for (j = 0; j < w; j += factor) { - val = GET_DATA_BYTE(line, j); - bincount[tab[val]]++; - binave[tab[val]] += val; - } - } - - /* Find the smallest gray values in each bin */ - binstart = (l_int32 *)LEPT_CALLOC(nbins, sizeof(l_int32)); - for (i = 1, index = 1; i < 256; i++) { - if (tab[i] < index) continue; - if (tab[i] == index) - binstart[index++] = i; - } - - /* Get the averages. If there are no samples in a bin, use - * the center value of the bin. */ - *pcmap = pixcmapCreate(outdepth); - for (i = 0; i < nbins; i++) { - if (bincount[i]) { - val = binave[i] / bincount[i]; - } else { /* no samples in the bin */ - if (i < nbins - 1) - val = (binstart[i] + binstart[i + 1]) / 2; - else /* last bin */ - val = (binstart[i] + 255) / 2; - } - pixcmapAddColor(*pcmap, val, val, val); - } - - LEPT_FREE(bincount); - LEPT_FREE(binave); - LEPT_FREE(binstart); - return 0; -} - - -/*--------------------------------------------------------------------* - * Thresholding from 32 bpp rgb to 1 bpp * - *--------------------------------------------------------------------*/ -/*! - * \brief pixGenerateMaskByBand32() - * - * \param[in] pixs 32 bpp - * \param[in] refval reference rgb value - * \param[in] delm max amount below the ref value for any component - * \param[in] delp max amount above the ref value for any component - * \param[in] fractm fractional amount below ref value for all components - * \param[in] fractp fractional amount above ref value for all components - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) Generates a 1 bpp mask pixd, the same size as pixs, where
- *          the fg pixels in the mask within a band of rgb values
- *          surrounding %refval.  The band can be chosen in two ways
- *          for each component:
- *          (a) Use (%delm, %delp) to specify how many levels down and up
- *          (b) Use (%fractm, %fractp) to specify the fractional
- *              distance toward 0 and 255, respectively.
- *          Note that %delm and %delp must be in [0 ... 255], whereas
- *          %fractm and %fractp must be in [0.0 - 1.0].
- *      (2) Either (%delm, %delp) or (%fractm, %fractp) can be used.
- *          Set each value in the other pair to 0.
- * 
- */ -PIX * -pixGenerateMaskByBand32(PIX *pixs, - l_uint32 refval, - l_int32 delm, - l_int32 delp, - l_float32 fractm, - l_float32 fractp) -{ -l_int32 i, j, w, h, d, wpls, wpld; -l_int32 rref, gref, bref, rval, gval, bval; -l_int32 rmin, gmin, bmin, rmax, gmax, bmax; -l_uint32 pixel; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixGenerateMaskByBand32"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 32) - return (PIX *)ERROR_PTR("not 32 bpp", procName, NULL); - if (delm < 0 || delp < 0) - return (PIX *)ERROR_PTR("delm and delp must be >= 0", procName, NULL); - if (fractm < 0.0 || fractm > 1.0 || fractp < 0.0 || fractp > 1.0) - return (PIX *)ERROR_PTR("fractm and/or fractp invalid", procName, NULL); - - extractRGBValues(refval, &rref, &gref, &bref); - if (fractm == 0.0 && fractp == 0.0) { - rmin = rref - delm; - gmin = gref - delm; - bmin = bref - delm; - rmax = rref + delm; - gmax = gref + delm; - bmax = bref + delm; - } else if (delm == 0 && delp == 0) { - rmin = (l_int32)((1.0 - fractm) * rref); - gmin = (l_int32)((1.0 - fractm) * gref); - bmin = (l_int32)((1.0 - fractm) * bref); - rmax = rref + (l_int32)(fractp * (255 - rref)); - gmax = gref + (l_int32)(fractp * (255 - gref)); - bmax = bref + (l_int32)(fractp * (255 - bref)); - } else { - L_ERROR("bad input: either (delm, delp) or (fractm, fractp) " - "must be 0\n", procName); - return NULL; - } - - pixd = pixCreate(w, h, 1); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - pixel = lines[j]; - rval = (pixel >> L_RED_SHIFT) & 0xff; - if (rval < rmin || rval > rmax) - continue; - gval = (pixel >> L_GREEN_SHIFT) & 0xff; - if (gval < gmin || gval > gmax) - continue; - bval = (pixel >> L_BLUE_SHIFT) & 0xff; - if (bval < bmin || bval > bmax) - continue; - SET_DATA_BIT(lined, j); - } - } - - return pixd; -} - - -/*! - * \brief pixGenerateMaskByDiscr32() - * - * \param[in] pixs 32 bpp - * \param[in] refval1 reference rgb value - * \param[in] refval2 reference rgb value - * \param[in] distflag L_MANHATTAN_DISTANCE, L_EUCLIDEAN_DISTANCE - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) Generates a 1 bpp mask pixd, the same size as pixs, where
- *          the fg pixels in the mask are those where the pixel in pixs
- *          is "closer" to refval1 than to refval2.
- *      (2) "Closer" can be defined in several ways, such as:
- *            ~ manhattan distance (L1)
- *            ~ euclidean distance (L2)
- *            ~ majority vote of the individual components
- *          Here, we have a choice of L1 or L2.
- * 
- */ -PIX * -pixGenerateMaskByDiscr32(PIX *pixs, - l_uint32 refval1, - l_uint32 refval2, - l_int32 distflag) -{ -l_int32 i, j, w, h, d, wpls, wpld; -l_int32 rref1, gref1, bref1, rref2, gref2, bref2, rval, gval, bval; -l_uint32 pixel, dist1, dist2; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixGenerateMaskByDiscr32"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 32) - return (PIX *)ERROR_PTR("not 32 bpp", procName, NULL); - if (distflag != L_MANHATTAN_DISTANCE && distflag != L_EUCLIDEAN_DISTANCE) - return (PIX *)ERROR_PTR("invalid distflag", procName, NULL); - - extractRGBValues(refval1, &rref1, &gref1, &bref1); - extractRGBValues(refval2, &rref2, &gref2, &bref2); - pixd = pixCreate(w, h, 1); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - pixel = lines[j]; - extractRGBValues(pixel, &rval, &gval, &bval); - if (distflag == L_MANHATTAN_DISTANCE) { - dist1 = L_ABS(rref1 - rval); - dist2 = L_ABS(rref2 - rval); - dist1 += L_ABS(gref1 - gval); - dist2 += L_ABS(gref2 - gval); - dist1 += L_ABS(bref1 - bval); - dist2 += L_ABS(bref2 - bval); - } else { - dist1 = (rref1 - rval) * (rref1 - rval); - dist2 = (rref2 - rval) * (rref2 - rval); - dist1 += (gref1 - gval) * (gref1 - gval); - dist2 += (gref2 - gval) * (gref2 - gval); - dist1 += (bref1 - bval) * (bref1 - bval); - dist2 += (bref2 - bval) * (bref2 - bval); - } - if (dist1 < dist2) - SET_DATA_BIT(lined, j); - } - } - - return pixd; -} - - -/*----------------------------------------------------------------------* - * Histogram-based grayscale quantization * - *----------------------------------------------------------------------*/ -/*! - * \brief pixGrayQuantFromHisto() - * - * \param[in] pixd [optional] quantized pix with cmap; can be null - * \param[in] pixs 8 bpp gray input pix; not cmapped - * \param[in] pixm [optional] mask over pixels in pixs to quantize - * \param[in] minfract minimum fraction of pixels in a set of adjacent - * histo bins that causes the set to be automatically - * set aside as a color in the colormap; must be - * at least 0.01 - * \param[in] maxsize maximum number of adjacent bins allowed to represent - * a color, regardless of the population of pixels - * in the bins; must be at least 2 - * \return pixd 8 bpp, cmapped, or NULL on error - * - *
- * Notes:
- *      (1) This is useful for quantizing images with relatively few
- *          colors, but which may have both color and gray pixels.
- *          If there are color pixels, it is assumed that an input
- *          rgb image has been color quantized first so that:
- *            ~ pixd has a colormap describing the color pixels
- *            ~ pixm is a mask over the non-color pixels in pixd
- *            ~ the colormap in pixd, and the color pixels in pixd,
- *              have been repacked to go from 0 to n-1 (n colors)
- *          If there are no color pixels, pixd and pixm are both null,
- *          and all pixels in pixs are quantized to gray.
- *      (2) A 256-entry histogram is built of the gray values in pixs.
- *          If pixm exists, the pixels contributing to the histogram are
- *          restricted to the fg of pixm.  A colormap and LUT are generated
- *          from this histogram.  We break up the array into a set
- *          of intervals, each one constituting a color in the colormap:
- *          An interval is identified by summing histogram bins until
- *          either the sum equals or exceeds the %minfract of the total
- *          number of pixels, or the span itself equals or exceeds %maxsize.
- *          The color of each bin is always an average of the pixels
- *          that constitute it.
- *      (3) Note that we do not specify the number of gray colors in
- *          the colormap.  Instead, we specify two parameters that
- *          describe the accuracy of the color assignments; this and
- *          the actual image determine the number of resulting colors.
- *      (4) If a mask exists and it is not the same size as pixs, make
- *          a new mask the same size as pixs, with the original mask
- *          aligned at the UL corners.  Set all additional pixels
- *          in the (larger) new mask set to 1, causing those pixels
- *          in pixd to be set as gray.
- *      (5) We estimate the total number of colors (color plus gray);
- *          if it exceeds 255, return null.
- * 
- */ -PIX * -pixGrayQuantFromHisto(PIX *pixd, - PIX *pixs, - PIX *pixm, - l_float32 minfract, - l_int32 maxsize) -{ -l_int32 w, h, wd, hd, wm, hm, wpls, wplm, wpld; -l_int32 nc, nestim, i, j, vals, vald; -l_int32 *lut; -l_uint32 *datas, *datam, *datad, *lines, *linem, *lined; -NUMA *na; -PIX *pixmr; /* resized mask */ -PIXCMAP *cmap; - - PROCNAME("pixGrayQuantFromHisto"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - if (minfract < 0.01) { - L_WARNING("minfract < 0.01; setting to 0.05\n", procName); - minfract = 0.05; - } - if (maxsize < 2) { - L_WARNING("maxsize < 2; setting to 10\n", procName); - maxsize = 10; - } - if ((pixd && !pixm) || (!pixd && pixm)) - return (PIX *)ERROR_PTR("(pixd,pixm) not defined together", - procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - if (pixd) { - if (pixGetDepth(pixm) != 1) - return (PIX *)ERROR_PTR("pixm not 1 bpp", procName, NULL); - if ((cmap = pixGetColormap(pixd)) == NULL) - return (PIX *)ERROR_PTR("pixd not cmapped", procName, NULL); - pixGetDimensions(pixd, &wd, &hd, NULL); - if (w != wd || h != hd) - return (PIX *)ERROR_PTR("pixs, pixd sizes differ", procName, NULL); - nc = pixcmapGetCount(cmap); - nestim = nc + (l_int32)(1.5 * 255 / maxsize); - lept_stderr( "nestim = %d\n", nestim); - if (nestim > 255) { - L_ERROR("Estimate %d colors!\n", procName, nestim); - return (PIX *)ERROR_PTR("probably too many colors", procName, NULL); - } - pixGetDimensions(pixm, &wm, &hm, NULL); - if (w != wm || h != hm) { /* resize the mask */ - L_WARNING("mask and dest sizes not equal\n", procName); - pixmr = pixCreateNoInit(w, h, 1); - pixRasterop(pixmr, 0, 0, wm, hm, PIX_SRC, pixm, 0, 0); - pixRasterop(pixmr, wm, 0, w - wm, h, PIX_SET, NULL, 0, 0); - pixRasterop(pixmr, 0, hm, wm, h - hm, PIX_SET, NULL, 0, 0); - } else { - pixmr = pixClone(pixm); - } - } else { - pixd = pixCreateTemplate(pixs); - cmap = pixcmapCreate(8); - pixSetColormap(pixd, cmap); - } - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - - /* Use original mask, if it exists, to select gray pixels */ - na = pixGetGrayHistogramMasked(pixs, pixm, 0, 0, 1); - - /* Fill out the cmap with gray colors, and generate the lut - * for pixel assignment. Issue a warning on failure. */ - if (numaFillCmapFromHisto(na, cmap, minfract, maxsize, &lut)) - L_ERROR("ran out of colors in cmap!\n", procName); - numaDestroy(&na); - - /* Assign the gray pixels to their cmap indices */ - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - if (!pixm) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - vals = GET_DATA_BYTE(lines, j); - vald = lut[vals]; - SET_DATA_BYTE(lined, j, vald); - } - } - LEPT_FREE(lut); - return pixd; - } - - datam = pixGetData(pixmr); - wplm = pixGetWpl(pixmr); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linem = datam + i * wplm; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - if (!GET_DATA_BIT(linem, j)) - continue; - vals = GET_DATA_BYTE(lines, j); - vald = lut[vals]; - SET_DATA_BYTE(lined, j, vald); - } - } - pixDestroy(&pixmr); - LEPT_FREE(lut); - return pixd; -} - - -/*! - * \brief numaFillCmapFromHisto() - * - * \param[in] na histogram of gray values - * \param[in] cmap 8 bpp cmap, possibly initialized with color value - * \param[in] minfract minimum fraction of pixels in a set of adjacent - * histo bins that causes the set to be automatically - * set aside as a color in the colormap; must be - * at least 0.01 - * \param[in] maxsize maximum number of adjacent bins allowed to represent - * a color, regardless of the population of pixels - * in the bins; must be at least 2 - * \param[out] plut lookup table from gray value to colormap index - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This static function must be called from pixGrayQuantFromHisto()
- * 
- */ -static l_int32 -numaFillCmapFromHisto(NUMA *na, - PIXCMAP *cmap, - l_float32 minfract, - l_int32 maxsize, - l_int32 **plut) -{ -l_int32 mincount, index, sum, wtsum, span, istart, i, val, ret; -l_int32 *iahisto, *lut; -l_float32 total; - - PROCNAME("numaFillCmapFromHisto"); - - if (!plut) - return ERROR_INT("&lut not defined", procName, 1); - *plut = NULL; - if (!na) - return ERROR_INT("na not defined", procName, 1); - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - - numaGetSum(na, &total); - mincount = (l_int32)(minfract * total); - iahisto = numaGetIArray(na); - lut = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - *plut = lut; - index = pixcmapGetCount(cmap); /* start with number of colors - * already reserved */ - - /* March through, associating colors with sets of adjacent - * gray levels. During the process, the LUT that gives - * the colormap index for each gray level is computed. - * To complete a color, either the total count must equal - * or exceed %mincount, or the current span of colors must - * equal or exceed %maxsize. An empty span is not converted - * into a color; it is simply ignored. When a span is completed for a - * color, the weighted color in the span is added to the colormap. */ - sum = 0; - wtsum = 0; - istart = 0; - ret = 0; - for (i = 0; i < 256; i++) { - lut[i] = index; - sum += iahisto[i]; - wtsum += i * iahisto[i]; - span = i - istart + 1; - if (sum < mincount && span < maxsize) - continue; - - if (sum == 0) { /* empty span; don't save */ - istart = i + 1; - continue; - } - - /* Found new color; sum > 0 */ - val = (l_int32)((l_float32)wtsum / (l_float32)sum + 0.5); - ret = pixcmapAddColor(cmap, val, val, val); - istart = i + 1; - sum = 0; - wtsum = 0; - index++; - } - if (istart < 256 && sum > 0) { /* last one */ - span = 256 - istart; - val = (l_int32)((l_float32)wtsum / (l_float32)sum + 0.5); - ret = pixcmapAddColor(cmap, val, val, val); - } - - LEPT_FREE(iahisto); - return ret; -} - - -/*----------------------------------------------------------------------* - * Color quantize grayscale image using existing colormap * - *----------------------------------------------------------------------*/ -/*! - * \brief pixGrayQuantFromCmap() - * - * \param[in] pixs 8 bpp grayscale without cmap - * \param[in] cmap to quantize to; of dest pix - * \param[in] mindepth minimum depth of pixd: can be 2, 4 or 8 bpp - * \return pixd 2, 4 or 8 bpp, colormapped, or NULL on error - * - *
- * Notes:
- *      (1) In use, pixs is an 8 bpp grayscale image without a colormap.
- *          If there is an existing colormap, a warning is issued and
- *          a copy of the input pixs is returned.
- * 
- */ -PIX * -pixGrayQuantFromCmap(PIX *pixs, - PIXCMAP *cmap, - l_int32 mindepth) -{ -l_int32 i, j, index, w, h, d, depth, wpls, wpld; -l_int32 hascolor, vals, vald; -l_int32 *tab; -l_uint32 *datas, *datad, *lines, *lined; -PIXCMAP *cmapd; -PIX *pixd; - - PROCNAME("pixGrayQuantFromCmap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs) != NULL) { - L_WARNING("pixs already has a colormap; returning a copy\n", procName); - return pixCopy(NULL, pixs); - } - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (!cmap) - return (PIX *)ERROR_PTR("cmap not defined", procName, NULL); - if (mindepth != 2 && mindepth != 4 && mindepth != 8) - return (PIX *)ERROR_PTR("invalid mindepth", procName, NULL); - - /* Make sure the colormap is gray */ - pixcmapHasColor(cmap, &hascolor); - if (hascolor) { - L_WARNING("Converting colormap colors to gray\n", procName); - cmapd = pixcmapColorToGray(cmap, 0.3, 0.5, 0.2); - } else { - cmapd = pixcmapCopy(cmap); - } - - /* Make LUT into colormap */ - tab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - for (i = 0; i < 256; i++) { - pixcmapGetNearestGrayIndex(cmapd, i, &index); - tab[i] = index; - } - - pixcmapGetMinDepth(cmap, &depth); - depth = L_MAX(depth, mindepth); - pixd = pixCreate(w, h, depth); - pixSetColormap(pixd, cmapd); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - vals = GET_DATA_BYTE(lines, j); - vald = tab[vals]; - if (depth == 2) - SET_DATA_DIBIT(lined, j, vald); - else if (depth == 4) - SET_DATA_QBIT(lined, j, vald); - else /* depth == 8 */ - SET_DATA_BYTE(lined, j, vald); - } - } - - LEPT_FREE(tab); - return pixd; -} - - -#if 0 /* Documentation */ -/*--------------------------------------------------------------------* - * Implementation of binarization by dithering using LUTs * - * It is archived here. * - *--------------------------------------------------------------------*/ -/*! - * \brief pixDitherToBinaryLUT() - * - * \param[in] pixs - * \param[in] lowerclip lower clip distance to black; use -1 for default - * \param[in] upperclip upper clip distance to white; use -1 for default - * \return pixd dithered binary, or NULL on error - * - * We don't need two implementations of Floyd-Steinberg dithering, - * and this one with LUTs is a little more complicated than - * pixDitherToBinary(). It uses three lookup tables to generate the - * output pixel value and the excess or deficit carried over to the - * neighboring pixels. It's here for pedagogical reasons only. - */ -PIX * -pixDitherToBinaryLUT(PIX *pixs, - l_int32 lowerclip, - l_int32 upperclip) -{ -l_int32 w, h, d, wplt, wpld; -l_int32 *tabval, *tab38, *tab14; -l_uint32 *datat, *datad; -l_uint32 *bufs1, *bufs2; -PIX *pixt, *pixd; - - PROCNAME("pixDitherToBinaryLUT"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("must be 8 bpp for dithering", procName, NULL); - if (lowerclip < 0) - lowerclip = DEFAULT_CLIP_LOWER_1; - if (upperclip < 0) - upperclip = DEFAULT_CLIP_UPPER_1; - - if ((pixd = pixCreate(w, h, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* Remove colormap if it exists */ - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - - /* Two line buffers, 1 for current line and 2 for next line */ - bufs1 = (l_uint32 *)LEPT_CALLOC(wplt, sizeof(l_uint32)); - bufs2 = (l_uint32 *)LEPT_CALLOC(wplt, sizeof(l_uint32)); - if (!bufs1 || !bufs2) { - LEPT_FREE(bufs1); - LEPT_FREE(bufs2); - pixDestroy(&pixd); - pixDestroy(&pixt); - return (PIX *)ERROR_PTR("bufs1, bufs2 not both made", procName, NULL); - } - - /* 3 lookup tables: 1-bit value, (3/8)excess, and (1/4)excess */ - make8To1DitherTables(&tabval, &tab38, &tab14, lowerclip, upperclip); - - ditherToBinaryLUTLow(datad, w, h, wpld, datat, wplt, bufs1, bufs2, - tabval, tab38, tab14); - - LEPT_FREE(bufs1); - LEPT_FREE(bufs2); - LEPT_FREE(tabval); - LEPT_FREE(tab38); - LEPT_FREE(tab14); - pixDestroy(&pixt); - return pixd; -} - -/*! - * \brief ditherToBinaryLUTLow() - * - * Low-level function for doing Floyd-Steinberg error diffusion - * dithering from 8 bpp (datas) to 1 bpp (datad). Two source - * line buffers, bufs1 and bufs2, are provided, along with three - * 256-entry lookup tables: tabval gives the output pixel value, - * tab38 gives the extra (plus or minus) transferred to the pixels - * directly to the left and below, and tab14 gives the extra - * transferred to the diagonal below. The choice of 3/8 and 1/4 - * is traditional but arbitrary when you use a lookup table; the - * only constraint is that the sum is 1. See other comments below. - */ -void -ditherToBinaryLUTLow(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_uint32 *bufs1, - l_uint32 *bufs2, - l_int32 *tabval, - l_int32 *tab38, - l_int32 *tab14) -{ -l_int32 i; -l_uint32 *lined; - - /* do all lines except last line */ - memcpy(bufs2, datas, 4 * wpls); /* prime the buffer */ - for (i = 0; i < h - 1; i++) { - memcpy(bufs1, bufs2, 4 * wpls); - memcpy(bufs2, datas + (i + 1) * wpls, 4 * wpls); - lined = datad + i * wpld; - ditherToBinaryLineLUTLow(lined, w, bufs1, bufs2, - tabval, tab38, tab14, 0); - } - - /* do last line */ - memcpy(bufs1, bufs2, 4 * wpls); - lined = datad + (h - 1) * wpld; - ditherToBinaryLineLUTLow(lined, w, bufs1, bufs2, tabval, tab38, tab14, 1); - return; -} - -/*! - * \brief ditherToBinaryLineLUTLow() - * - * \param[in] lined ptr to beginning of dest line - * \param[in] w width of image in pixels - * \param[in] bufs1 buffer of current source line - * \param[in] bufs2 buffer of next source line - * \param[in] tabval value to assign for current pixel - * \param[in] tab38 excess value to give to neighboring 3/8 pixels - * \param[in] tab14 excess value to give to neighboring 1/4 pixel - * \param[in] lastlineflag 0 if not last dest line, 1 if last dest line - * \return void - */ -void -ditherToBinaryLineLUTLow(l_uint32 *lined, - l_int32 w, - l_uint32 *bufs1, - l_uint32 *bufs2, - l_int32 *tabval, - l_int32 *tab38, - l_int32 *tab14, - l_int32 lastlineflag) -{ -l_int32 j; -l_int32 oval, tab38val, tab14val; -l_uint8 rval, bval, dval; - - if (lastlineflag == 0) { - for (j = 0; j < w - 1; j++) { - oval = GET_DATA_BYTE(bufs1, j); - if (tabval[oval]) - SET_DATA_BIT(lined, j); - rval = GET_DATA_BYTE(bufs1, j + 1); - bval = GET_DATA_BYTE(bufs2, j); - dval = GET_DATA_BYTE(bufs2, j + 1); - tab38val = tab38[oval]; - if (tab38val == 0) - continue; - tab14val = tab14[oval]; - if (tab38val < 0) { - rval = L_MAX(0, rval + tab38val); - bval = L_MAX(0, bval + tab38val); - dval = L_MAX(0, dval + tab14val); - } else { - rval = L_MIN(255, rval + tab38val); - bval = L_MIN(255, bval + tab38val); - dval = L_MIN(255, dval + tab14val); - } - SET_DATA_BYTE(bufs1, j + 1, rval); - SET_DATA_BYTE(bufs2, j, bval); - SET_DATA_BYTE(bufs2, j + 1, dval); - } - - /* do last column: j = w - 1 */ - oval = GET_DATA_BYTE(bufs1, j); - if (tabval[oval]) - SET_DATA_BIT(lined, j); - bval = GET_DATA_BYTE(bufs2, j); - tab38val = tab38[oval]; - if (tab38val < 0) { - bval = L_MAX(0, bval + tab38val); - SET_DATA_BYTE(bufs2, j, bval); - } else if (tab38val > 0 ) { - bval = L_MIN(255, bval + tab38val); - SET_DATA_BYTE(bufs2, j, bval); - } - } else { /* lastlineflag == 1 */ - for (j = 0; j < w - 1; j++) { - oval = GET_DATA_BYTE(bufs1, j); - if (tabval[oval]) - SET_DATA_BIT(lined, j); - rval = GET_DATA_BYTE(bufs1, j + 1); - tab38val = tab38[oval]; - if (tab38val == 0) - continue; - if (tab38val < 0) - rval = L_MAX(0, rval + tab38val); - else - rval = L_MIN(255, rval + tab38val); - SET_DATA_BYTE(bufs1, j + 1, rval); - } - - /* do last pixel: (i, j) = (h - 1, w - 1) */ - oval = GET_DATA_BYTE(bufs1, j); - if (tabval[oval]) - SET_DATA_BIT(lined, j); - } - - return; -} - -/*! - * \brief make8To1DitherTables() - * - * \param[out] ptabval value assigned to output pixel; 0 or 1 - * \param[out] ptab38 amount propagated to pixels left and below - * \param[out] ptab14 amount propagated to pixel to left and down - * \param[in] lowerclip values near 0 where the excess is not propagated - * \param[in] upperclip values near 255 where the deficit is not propagated - * - * \return 0 if OK, 1 on error - */ -l_ok -make8To1DitherTables(l_int32 **ptabval, - l_int32 **ptab38, - l_int32 **ptab14, - l_int32 lowerclip, - l_int32 upperclip) -{ -l_int32 i; -l_int32 *tabval, *tab38, *tab14; - - PROCNAME("make8To1DitherTables"); - - if (ptabval) *ptabval = NULL; - if (ptab38) *ptab38 = NULL; - if (ptab14) *ptab14 = NULL; - if (!ptabval || !ptab38 || !ptab14) - return ERROR_INT("table ptrs not all defined", procName, 1); - - /* 3 lookup tables: 1-bit value, (3/8)excess, and (1/4)excess */ - tabval = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - tab38 = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - tab14 = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - if (!tabval || !tab38 || !tab14) - return ERROR_INT("calloc failure to make small table", procName, 1); - *ptabval = tabval; - *ptab38 = tab38; - *ptab14 = tab14; - - for (i = 0; i < 256; i++) { - if (i <= lowerclip) { - tabval[i] = 1; - tab38[i] = 0; - tab14[i] = 0; - } else if (i < 128) { - tabval[i] = 1; - tab38[i] = (3 * i + 4) / 8; - tab14[i] = (i + 2) / 4; - } else if (i < 255 - upperclip) { - tabval[i] = 0; - tab38[i] = (3 * (i - 255) + 4) / 8; - tab14[i] = ((i - 255) + 2) / 4; - } else { /* i >= 255 - upperclip */ - tabval[i] = 0; - tab38[i] = 0; - tab14[i] = 0; - } - } - - return 0; -} -#endif /* Documentation */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/heap.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/heap.c deleted file mode 100644 index 9b6738e5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/heap.c +++ /dev/null @@ -1,589 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file heap.c - *
- *
- *      Create/Destroy L_Heap
- *          L_HEAP         *lheapCreate()
- *          void           *lheapDestroy()
- *
- *      Operations to add/remove to/from the heap
- *          l_int32         lheapAdd()
- *          static l_int32  lheapExtendArray()
- *          void           *lheapRemove()
- *
- *      Other accessors
- *          l_int32         lheapGetCount()
- *          void           *lheapGetElement()
- *
- *      Heap sort
- *          l_int32         lheapSort()
- *          l_int32         lheapSortStrictOrder()
- *
- *      Low-level heap operations
- *          static l_int32  lheapSwapUp()
- *          static l_int32  lheapSwapDown()
- *
- *      Debug output
- *          l_int32         lheapPrint()
- *
- *    The L_Heap is useful to implement a priority queue, that is sorted
- *    on a key in each element of the heap.  The heap is an array
- *    of nearly arbitrary structs, with a l_float32 the first field.
- *    This field is the key on which the heap is sorted.
- *
- *    Internally, we keep track of the heap size, n.  The item at the
- *    root of the heap is at the head of the array.  Items are removed
- *    from the head of the array and added to the end of the array.
- *    When an item is removed from the head, the item at the end
- *    of the array is moved to the head.  When items are either
- *    added or removed, it is usually necessary to swap array items
- *    to restore the heap order.  It is guaranteed that the number
- *    of swaps does not exceed log(n).
- *
- *    --------------------------  N.B.  ------------------------------
- *    The items on the heap (or, equivalently, in the array) are cast
- *    to void*.  Their key is a l_float32, and it is REQUIRED that the
- *    key be the first field in the struct.  That allows us to get the
- *    key by simply dereferencing the struct.  Alternatively, we could
- *    choose (but don't) to pass an application-specific comparison
- *    function into the heap operation functions.
- *    --------------------------  N.B.  ------------------------------
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Bounds on initial array size */ -static const l_uint32 MaxPtrArraySize = 100000; -static const l_int32 InitialPtrArraySize = 20; /*!< n'importe quoi */ - -#define SWAP_ITEMS(i, j) { void *tempitem = lh->array[(i)]; \ - lh->array[(i)] = lh->array[(j)]; \ - lh->array[(j)] = tempitem; } - - /* Static functions */ -static l_int32 lheapExtendArray(L_HEAP *lh); -static l_ok lheapSwapUp(L_HEAP *lh, l_int32 index); -static l_ok lheapSwapDown(L_HEAP *lh); - - -/*--------------------------------------------------------------------------* - * L_Heap create/destroy * - *--------------------------------------------------------------------------*/ -/*! - * \brief lheapCreate() - * - * \param[in] n size of ptr array to be alloc'd; use 0 for default - * \param[in] direction L_SORT_INCREASING, L_SORT_DECREASING - * \return lheap, or NULL on error - */ -L_HEAP * -lheapCreate(l_int32 n, - l_int32 direction) -{ -L_HEAP *lh; - - PROCNAME("lheapCreate"); - - if (n < InitialPtrArraySize || n > MaxPtrArraySize) - n = InitialPtrArraySize; - - /* Allocate ptr array and initialize counters. */ - lh = (L_HEAP *)LEPT_CALLOC(1, sizeof(L_HEAP)); - if ((lh->array = (void **)LEPT_CALLOC(n, sizeof(void *))) == NULL) { - lheapDestroy(&lh, FALSE); - return (L_HEAP *)ERROR_PTR("ptr array not made", procName, NULL); - } - lh->nalloc = n; - lh->n = 0; - lh->direction = direction; - return lh; -} - - -/*! - * \brief lheapDestroy() - * - * \param[in,out] plh will be set to null before returning - * \param[in] freeflag TRUE to free each remaining struct in the array - * \return void - * - *
- * Notes:
- *      (1) Use %freeflag == TRUE when the items in the array can be
- *          simply destroyed using free.  If those items require their
- *          own destroy function, they must be destroyed before
- *          calling this function, and then this function is called
- *          with %freeflag == FALSE.
- *      (2) To destroy the lheap, we destroy the ptr array, then
- *          the lheap, and then null the contents of the input ptr.
- * 
- */ -void -lheapDestroy(L_HEAP **plh, - l_int32 freeflag) -{ -l_int32 i; -L_HEAP *lh; - - PROCNAME("lheapDestroy"); - - if (plh == NULL) { - L_WARNING("ptr address is NULL\n", procName); - return; - } - if ((lh = *plh) == NULL) - return; - - if (freeflag) { /* free each struct in the array */ - for (i = 0; i < lh->n; i++) - LEPT_FREE(lh->array[i]); - } else if (lh->n > 0) { /* freeflag == FALSE but elements exist on array */ - L_WARNING("memory leak of %d items in lheap!\n", procName, lh->n); - } - - if (lh->array) - LEPT_FREE(lh->array); - LEPT_FREE(lh); - *plh = NULL; - - return; -} - -/*--------------------------------------------------------------------------* - * Operations to add/remove to/from the heap * - *--------------------------------------------------------------------------*/ -/*! - * \brief lheapAdd() - * - * \param[in] lh heap - * \param[in] item to be added to the tail of the heap - * \return 0 if OK, 1 on error - */ -l_ok -lheapAdd(L_HEAP *lh, - void *item) -{ - PROCNAME("lheapAdd"); - - if (!lh) - return ERROR_INT("lh not defined", procName, 1); - if (!item) - return ERROR_INT("item not defined", procName, 1); - - /* If necessary, expand the allocated array by a factor of 2 */ - if (lh->n >= lh->nalloc) - lheapExtendArray(lh); - - /* Add the item */ - lh->array[lh->n] = item; - lh->n++; - - /* Restore the heap */ - lheapSwapUp(lh, lh->n - 1); - return 0; -} - - -/*! - * \brief lheapExtendArray() - * - * \param[in] lh heap - * \return 0 if OK, 1 on error - */ -static l_int32 -lheapExtendArray(L_HEAP *lh) -{ - PROCNAME("lheapExtendArray"); - - if (!lh) - return ERROR_INT("lh not defined", procName, 1); - - if ((lh->array = (void **)reallocNew((void **)&lh->array, - sizeof(void *) * lh->nalloc, - 2 * sizeof(void *) * lh->nalloc)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - - lh->nalloc = 2 * lh->nalloc; - return 0; -} - - -/*! - * \brief lheapRemove() - * - * \param[in] lh heap - * \return ptr to item popped from the root of the heap, - * or NULL if the heap is empty or on error - */ -void * -lheapRemove(L_HEAP *lh) -{ -void *item; - - PROCNAME("lheapRemove"); - - if (!lh) - return (void *)ERROR_PTR("lh not defined", procName, NULL); - - if (lh->n == 0) - return NULL; - - item = lh->array[0]; - lh->array[0] = lh->array[lh->n - 1]; /* move last to the head */ - lh->array[lh->n - 1] = NULL; /* set ptr to null */ - lh->n--; - - lheapSwapDown(lh); /* restore the heap */ - return item; -} - - -/*--------------------------------------------------------------------------* - * Other accessors * - *--------------------------------------------------------------------------*/ -/*! - * \brief lheapGetCount() - * - * \param[in] lh heap - * \return count, or 0 on error - */ -l_int32 -lheapGetCount(L_HEAP *lh) -{ - PROCNAME("lheapGetCount"); - - if (!lh) - return ERROR_INT("lh not defined", procName, 0); - - return lh->n; -} - - -/*! - * \brief lheapGetElement() - * - * \param[in] lh heap - * \param[in] index into the internal heap array - * \return ptr to the element at array[index], or NULL on error - * - *
- * Notes:
- *      (1) This is useful for retrieving an arbitrary element in the
- *          heap array without disturbing the heap.  It allows all the
- *          elements on the heap to be queried in linear time; for
- *          example, to find the min or max of some value.
- *      (2) Tbe retrieved element is owned by the heap.  Do not destroy it.
- * 
- */ -void * -lheapGetElement(L_HEAP *lh, - l_int32 index) -{ - PROCNAME("lheapGetElement"); - - if (!lh) - return ERROR_PTR("lh not defined", procName, NULL); - if (index < 0 || index >= lh->n) - return ERROR_PTR("invalid index", procName, NULL); - - return (void *)lh->array[index]; -} - - -/*--------------------------------------------------------------------------* - * Heap sort * - *--------------------------------------------------------------------------*/ -/*! - * \brief lheapSort() - * - * \param[in] lh heap, with internal array - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This sorts an array into heap order.  If the heap is already
- *          in heap order for the direction given, this has no effect.
- * 
- */ -l_ok -lheapSort(L_HEAP *lh) -{ -l_int32 i; - - PROCNAME("lheapSort"); - - if (!lh) - return ERROR_INT("lh not defined", procName, 1); - - for (i = 0; i < lh->n; i++) - lheapSwapUp(lh, i); - - return 0; -} - - -/*! - * \brief lheapSortStrictOrder() - * - * \param[in] lh heap, with internal array - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This sorts a heap into strict order.
- *      (2) For each element, starting at the end of the array and
- *          working forward, the element is swapped with the head
- *          element and then allowed to swap down onto a heap of
- *          size reduced by one.  The result is that the heap is
- *          reversed but in strict order.  The array elements are
- *          then reversed to put it in the original order.
- * 
- */ -l_ok -lheapSortStrictOrder(L_HEAP *lh) -{ -l_int32 i, index, size; - - PROCNAME("lheapSortStrictOrder"); - - if (!lh) - return ERROR_INT("lh not defined", procName, 1); - - /* Start from a sorted heap */ - lheapSort(lh); - - size = lh->n; /* save the actual size */ - for (i = 0; i < size; i++) { - index = size - i; - SWAP_ITEMS(0, index - 1); - lh->n--; /* reduce the apparent heap size by 1 */ - lheapSwapDown(lh); - } - lh->n = size; /* restore the size */ - - for (i = 0; i < size / 2; i++) /* reverse */ - SWAP_ITEMS(i, size - i - 1); - - return 0; -} - - -/*--------------------------------------------------------------------------* - * Low-level heap operations * - *--------------------------------------------------------------------------*/ -/*! - * \brief lheapSwapUp() - * - * \param[in] lh heap - * \param[in] index of array corresponding to node to be swapped up - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is called after a new item is put on the heap, at the
- *          bottom of a complete tree.
- *      (2) To regain the heap order, we let it bubble up,
- *          iteratively swapping with its parent, until it either
- *          reaches the root of the heap or it finds a parent that
- *          is in the correct position already vis-a-vis the child.
- * 
- */ -static l_ok -lheapSwapUp(L_HEAP *lh, - l_int32 index) -{ -l_int32 ip; /* index to heap for parent; 1 larger than array index */ -l_int32 ic; /* index into heap for child */ -l_float32 valp, valc; - - PROCNAME("lheapSwapUp"); - - if (!lh) - return ERROR_INT("lh not defined", procName, 1); - if (index < 0 || index >= lh->n) - return ERROR_INT("invalid index", procName, 1); - - ic = index + 1; /* index into heap: add 1 to array index */ - if (lh->direction == L_SORT_INCREASING) { - while (1) { - if (ic == 1) /* root of heap */ - break; - ip = ic / 2; - valc = *(l_float32 *)(lh->array[ic - 1]); - valp = *(l_float32 *)(lh->array[ip - 1]); - if (valp <= valc) - break; - SWAP_ITEMS(ip - 1, ic - 1); - ic = ip; - } - } else { /* lh->direction == L_SORT_DECREASING */ - while (1) { - if (ic == 1) /* root of heap */ - break; - ip = ic / 2; - valc = *(l_float32 *)(lh->array[ic - 1]); - valp = *(l_float32 *)(lh->array[ip - 1]); - if (valp >= valc) - break; - SWAP_ITEMS(ip - 1, ic - 1); - ic = ip; - } - } - return 0; -} - - -/*! - * \brief lheapSwapDown() - * - * \param[in] lh heap - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is called after an item has been popped off the
- *          root of the heap, and the last item in the heap has
- *          been placed at the root.
- *      (2) To regain the heap order, we let it bubble down,
- *          iteratively swapping with one of its children.  For a
- *          decreasing sort, it swaps with the largest child; for
- *          an increasing sort, the smallest.  This continues until
- *          it either reaches the lowest level in the heap, or the
- *          parent finds that neither child should swap with it
- *          (e.g., for a decreasing heap, the parent is larger
- *          than or equal to both children).
- * 
- */ -static l_ok -lheapSwapDown(L_HEAP *lh) -{ -l_int32 ip; /* index to heap for parent; 1 larger than array index */ -l_int32 icr, icl; /* index into heap for left/right children */ -l_float32 valp, valcl, valcr; - - PROCNAME("lheapSwapDown"); - - if (!lh) - return ERROR_INT("lh not defined", procName, 1); - if (lheapGetCount(lh) < 1) - return 0; - - ip = 1; /* index into top of heap: corresponds to array[0] */ - if (lh->direction == L_SORT_INCREASING) { - while (1) { - icl = 2 * ip; - if (icl > lh->n) - break; - valp = *(l_float32 *)(lh->array[ip - 1]); - valcl = *(l_float32 *)(lh->array[icl - 1]); - icr = icl + 1; - if (icr > lh->n) { /* only a left child; no iters below */ - if (valp > valcl) - SWAP_ITEMS(ip - 1, icl - 1); - break; - } else { /* both children exist; swap with the smallest if bigger */ - valcr = *(l_float32 *)(lh->array[icr - 1]); - if (valp <= valcl && valp <= valcr) /* smaller than both */ - break; - if (valcl <= valcr) { /* left smaller; swap */ - SWAP_ITEMS(ip - 1, icl - 1); - ip = icl; - } else { /* right smaller; swap */ - SWAP_ITEMS(ip - 1, icr - 1); - ip = icr; - } - } - } - } else { /* lh->direction == L_SORT_DECREASING */ - while (1) { - icl = 2 * ip; - if (icl > lh->n) - break; - valp = *(l_float32 *)(lh->array[ip - 1]); - valcl = *(l_float32 *)(lh->array[icl - 1]); - icr = icl + 1; - if (icr > lh->n) { /* only a left child; no iters below */ - if (valp < valcl) - SWAP_ITEMS(ip - 1, icl - 1); - break; - } else { /* both children exist; swap with the biggest if smaller */ - valcr = *(l_float32 *)(lh->array[icr - 1]); - if (valp >= valcl && valp >= valcr) /* bigger than both */ - break; - if (valcl >= valcr) { /* left bigger; swap */ - SWAP_ITEMS(ip - 1, icl - 1); - ip = icl; - } else { /* right bigger; swap */ - SWAP_ITEMS(ip - 1, icr - 1); - ip = icr; - } - } - } - } - - return 0; -} - - -/*---------------------------------------------------------------------* - * Debug output * - *---------------------------------------------------------------------*/ -/*! - * \brief lheapPrint() - * - * \param[in] fp file stream - * \param[in] lh heap - * \return 0 if OK; 1 on error - */ -l_ok -lheapPrint(FILE *fp, - L_HEAP *lh) -{ -l_int32 i; - - PROCNAME("lheapPrint"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!lh) - return ERROR_INT("lh not defined", procName, 1); - - fprintf(fp, "\n L_Heap: nalloc = %d, n = %d, array = %p\n", - lh->nalloc, lh->n, lh->array); - for (i = 0; i < lh->n; i++) - fprintf(fp, "keyval[%d] = %f\n", i, *(l_float32 *)lh->array[i]); - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/heap.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/heap.h deleted file mode 100644 index d39b06b9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/heap.h +++ /dev/null @@ -1,87 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_HEAP_H -#define LEPTONICA_HEAP_H - -/*! - * \file heap.h - * - *
- *      Expandable priority queue configured as a heap for arbitrary void* data
- *
- *      The L_Heap is used to implement a priority queue.  The elements
- *      in the heap are ordered in either increasing or decreasing key value.
- *      The key is a float field 'keyval' that is required to be
- *      contained in the elements of the queue.
- *
- *      The heap is a simple binary tree with the following constraints:
- *         - the key of each node is >= the keys of the two children
- *         - the tree is complete, meaning that each level (1, 2, 4, ...)
- *           is filled and the last level is filled from left to right
- *
- *      The tree structure is implicit in the queue array, with the
- *      array elements numbered as a breadth-first search of the tree
- *      from left to right.  It is thus guaranteed that the largest
- *      (or smallest) key belongs to the first element in the array.
- *
- *      Heap sort is used to sort the array.  Once an array has been
- *      sorted as a heap, it is convenient to use it as a priority queue,
- *      because the min (or max) elements are always at the root of
- *      the tree (element 0), and once removed, the heap can be
- *      resorted in not more than log[n] steps, where n is the number
- *      of elements on the heap.  Likewise, if an arbitrary element is
- *      added to the end of the array A, the sorted heap can be restored
- *      in not more than log[n] steps.
- *
- *      A L_Heap differs from a L_Queue in that the elements in the former
- *      are sorted by a key.  Internally, the array is maintained
- *      as a queue, with a pointer to the end of the array.  The
- *      head of the array always remains at array[0].  The array is
- *      maintained (sorted) as a heap.  When an item is removed from
- *      the head, the last item takes its place (thus reducing the
- *      array length by 1), and this is followed by array element
- *      swaps to restore the heap property.   When an item is added,
- *      it goes at the end of the array, and is swapped up to restore
- *      the heap.  If the ptr array is full, adding another item causes
- *      the ptr array size to double.
- *
- *      For further implementation details, see heap.c.
- * 
- */ - -/*! Heap of arbitrary void* data */ -struct L_Heap -{ - l_int32 nalloc; /*!< size of allocated ptr array */ - l_int32 n; /*!< number of elements stored in the heap */ - void **array; /*!< ptr array */ - l_int32 direction; /*!< L_SORT_INCREASING or L_SORT_DECREASING */ -}; -typedef struct L_Heap L_HEAP; - - -#endif /* LEPTONICA_HEAP_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/imageio.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/imageio.h deleted file mode 100644 index e0117482..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/imageio.h +++ /dev/null @@ -1,244 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file imageio.h - * - *
- *  General features of image I/O in leptonica
- *
- *  At present, there are 9 file formats for images that can be read
- *  and written:
- *      png (requires libpng, libz)
- *      jpeg (requires libjpeg)
- *      tiff (requires libtiff, libz)
- *      gif (requires libgif)
- *      webp (requires libwebp)
- *      jp2 (requires libopenjp2)
- *      bmp (no library required)
- *      pnm (no library required)
- *      spix (no library required)
- *  Additionally, there are two file formats for writing (only) images:
- *      PostScript (requires libpng, libz, libjpeg, libtiff)
- *      pdf (requires libpng, libz, libjpeg, libtiff)
- *
- *  For all 9 read/write formats, leptonica provides interconversion
- *  between pix (with raster data) and formatted image data:
- *      Conversion from pix (typically compression):
- *          pixWrite():        pix --> file
- *          pixWriteStream():  pix --> filestream (aka FILE*)
- *          pixWriteMem():     pix --> memory buffer
- *      Conversion to pix (typically decompression):
- *          pixRead():         file --> pix
- *          pixReadStream():   filestream --> pix
- *          pixReadMem():      memory buffer --> pix
- *
- *  Conversions for which the image data is not compressed are:
- *     * uncompressed tiff   (IFF_TIFF)
- *     * bmp
- *     * pnm
- *     * spix (fast serialization that copies the pix raster data)
- *
- *  The image header (metadata) information can be read from either
- *  the compressed file or a memory buffer, for all 9 formats.
- * 
- */ - -#ifndef LEPTONICA_IMAGEIO_H -#define LEPTONICA_IMAGEIO_H - -/* --------------------------------------------------------------- * - * Image file format types * - * --------------------------------------------------------------- */ -/* - * The IFF_DEFAULT flag is used to write the file out in the - * same (input) file format that the pix was read from. If the pix - * was not read from file, the input format field will be - * IFF_UNKNOWN and the output file format will be chosen to - * be compressed and lossless; namely, IFF_TIFF_G4 for d = 1 - * and IFF_PNG for everything else. - * - * In the future, new format types that have defined extensions - * will be added before IFF_DEFAULT, and will be kept in sync with - * the file format extensions in writefile.c. The positions of - * file formats before IFF_DEFAULT will remain invariant. - */ - -/*! Image Formats */ -enum { - IFF_UNKNOWN = 0, - IFF_BMP = 1, - IFF_JFIF_JPEG = 2, - IFF_PNG = 3, - IFF_TIFF = 4, - IFF_TIFF_PACKBITS = 5, - IFF_TIFF_RLE = 6, - IFF_TIFF_G3 = 7, - IFF_TIFF_G4 = 8, - IFF_TIFF_LZW = 9, - IFF_TIFF_ZIP = 10, - IFF_PNM = 11, - IFF_PS = 12, - IFF_GIF = 13, - IFF_JP2 = 14, - IFF_WEBP = 15, - IFF_LPDF = 16, - IFF_TIFF_JPEG = 17, - IFF_DEFAULT = 18, - IFF_SPIX = 19 -}; - -/* Convenient macro for checking requested tiff output */ -#define L_FORMAT_IS_TIFF(f) ((f) == IFF_TIFF || (f) == IFF_TIFF_PACKBITS || \ - (f) == IFF_TIFF_RLE || (f) == IFF_TIFF_G3 || \ - (f) == IFF_TIFF_G4 || (f) == IFF_TIFF_LZW || \ - (f) == IFF_TIFF_ZIP || (f) == IFF_TIFF_JPEG) - - -/* --------------------------------------------------------------- * - * Format header ids * - * --------------------------------------------------------------- */ - -/*! Header Ids */ -enum { - BMP_ID = 0x4d42, /*!< BM - for bitmaps */ - TIFF_BIGEND_ID = 0x4d4d, /*!< MM - for 'motorola' */ - TIFF_LITTLEEND_ID = 0x4949 /*!< II - for 'intel' */ -}; - - -/* --------------------------------------------------------------- * - * Hinting bit flags in jpeg reader * - * --------------------------------------------------------------- */ - -/*! Jpeg Hints */ -enum { - L_JPEG_READ_LUMINANCE = 1, /*!< only want luminance data; no chroma */ - L_JPEG_FAIL_ON_BAD_DATA = 2 /*!< don't return possibly damaged pix */ -}; - - -/* --------------------------------------------------------------- * - * Pdf formatted encoding types * - * --------------------------------------------------------------- */ - -/*! Pdf Encoding */ -enum { - L_DEFAULT_ENCODE = 0, /*!< use default encoding based on image */ - L_JPEG_ENCODE = 1, /*!< use dct encoding: 8 and 32 bpp, no cmap */ - L_G4_ENCODE = 2, /*!< use ccitt g4 fax encoding: 1 bpp */ - L_FLATE_ENCODE = 3, /*!< use flate encoding: any depth, cmap ok */ - L_JP2K_ENCODE = 4 /*!< use jp2k encoding: 8 and 32 bpp, no cmap */ -}; - - -/* --------------------------------------------------------------- * - * Compressed image data * - * --------------------------------------------------------------- */ -/* - * In use, either datacomp or data85 will be produced, depending - * on whether the data needs to be ascii85 encoded. PostScript - * requires ascii85 encoding; pdf does not. - * - * For the colormap (flate compression only), PostScript uses ascii85 - * encoding and pdf uses a bracketed array of space-separated - * hex-encoded rgb triples. Only tiff g4 (type == L_G4_ENCODE) uses - * the minisblack field. - */ - -/*! Compressed image data */ -struct L_Compressed_Data -{ - l_int32 type; /*!< encoding type: L_JPEG_ENCODE, etc */ - l_uint8 *datacomp; /*!< gzipped raster data */ - size_t nbytescomp; /*!< number of compressed bytes */ - char *data85; /*!< ascii85-encoded gzipped raster data */ - size_t nbytes85; /*!< number of ascii85 encoded bytes */ - char *cmapdata85; /*!< ascii85-encoded uncompressed cmap */ - char *cmapdatahex; /*!< hex pdf array for the cmap */ - l_int32 ncolors; /*!< number of colors in cmap */ - l_int32 w; /*!< image width */ - l_int32 h; /*!< image height */ - l_int32 bps; /*!< bits/sample; typ. 1, 2, 4 or 8 */ - l_int32 spp; /*!< samples/pixel; typ. 1 or 3 */ - l_int32 minisblack; /*!< tiff g4 photometry */ - l_int32 predictor; /*!< flate data has PNG predictors */ - size_t nbytes; /*!< number of uncompressed raster bytes */ - l_int32 res; /*!< resolution (ppi) */ -}; -typedef struct L_Compressed_Data L_COMP_DATA; - - -/* ------------------------------------------------------------------------- * - * Pdf multi image flags * - * ------------------------------------------------------------------------- */ - -/*! Pdf MultiImage */ -enum { - L_FIRST_IMAGE = 1, /*!< first image to be used */ - L_NEXT_IMAGE = 2, /*!< intermediate image; not first or last */ - L_LAST_IMAGE = 3 /*!< last image to be used */ -}; - - -/* ------------------------------------------------------------------------- * - * Intermediate pdf generation data * - * ------------------------------------------------------------------------- */ -/* - * This accumulates data for generating a pdf of a single page consisting - * of an arbitrary number of images. - * - * None of the strings have a trailing newline. - */ - -/*! Intermediate pdf generation data */ -struct L_Pdf_Data -{ - char *title; /*!< optional title for pdf */ - l_int32 n; /*!< number of images */ - l_int32 ncmap; /*!< number of colormaps */ - struct L_Ptra *cida; /*!< array of compressed image data */ - char *id; /*!< %PDF-1.2 id string */ - char *obj1; /*!< catalog string */ - char *obj2; /*!< metadata string */ - char *obj3; /*!< pages string */ - char *obj4; /*!< page string (variable data) */ - char *obj5; /*!< content string (variable data) */ - char *poststream; /*!< post-binary-stream string */ - char *trailer; /*!< trailer string (variable data) */ - struct Pta *xy; /*!< store (xpt, ypt) array */ - struct Pta *wh; /*!< store (wpt, hpt) array */ - struct Box *mediabox; /*!< bounding region for all images */ - struct Sarray *saprex; /*!< pre-binary-stream xobject strings */ - struct Sarray *sacmap; /*!< colormap pdf object strings */ - struct L_Dna *objsize; /*!< sizes of each pdf string object */ - struct L_Dna *objloc; /*!< location of each pdf string object */ - l_int32 xrefloc; /*!< location of xref */ -}; -typedef struct L_Pdf_Data L_PDF_DATA; - - -#endif /* LEPTONICA_IMAGEIO_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jbclass.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jbclass.c deleted file mode 100644 index cd23a038..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jbclass.c +++ /dev/null @@ -1,2572 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/* - * jbclass.c - * - * These are functions for unsupervised classification of - * collections of connected components -- either characters or - * words -- in binary images. They can be used as image - * processing steps in jbig2 compression. - * - * Initialization - * - * JBCLASSER *jbRankHausInit() [rank hausdorff encoder] - * JBCLASSER *jbCorrelationInit() [correlation encoder] - * JBCLASSER *jbCorrelationInitWithoutComponents() [ditto] - * static JBCLASSER *jbCorrelationInitInternal() - * - * Classify the pages - * - * l_int32 jbAddPages() - * l_int32 jbAddPage() - * l_int32 jbAddPageComponents() - * - * Rank hausdorff classifier - * - * l_int32 jbClassifyRankHaus() - * l_int32 pixHaustest() - * l_int32 pixRankHaustest() - * - * Binary correlation classifier - * - * l_int32 jbClassifyCorrelation() - * - * Determine the image components we start with - * - * l_int32 jbGetComponents() - * l_int32 pixWordMaskByDilation() - * l_int32 pixWordBoxesByDilation() - * - * Build grayscale composites (templates) - * - * PIXA *jbAccumulateComposites - * PIXA *jbTemplatesFromComposites - * - * Utility functions for Classer - * - * JBCLASSER *jbClasserCreate() - * void jbClasserDestroy() - * - * Utility functions for Data - * - * JBDATA *jbDataSave() - * void jbDataDestroy() - * l_int32 jbDataWrite() - * JBDATA *jbDataRead() - * PIXA *jbDataRender() - * l_int32 jbGetULCorners() - * l_int32 jbGetLLCorners() - * - * Static helpers - * - * static JBFINDCTX *findSimilarSizedTemplatesInit() - * static l_int32 findSimilarSizedTemplatesNext() - * static void findSimilarSizedTemplatesDestroy() - * static l_int32 finalPositioningForAlignment() - * - * Note: this is NOT an implementation of the JPEG jbig2 - * proposed standard encoder, the specifications for which - * can be found at http://www.jpeg.org/jbigpt2.html. - * (See below for a full implementation.) - * It is an implementation of the lower-level part of an encoder that: - * - * (1) identifies connected components that are going to be used - * (2) puts them in similarity classes (this is an unsupervised - * classifier), and - * (3) stores the result in a simple file format (2 files, - * one for templates and one for page/coordinate/template-index - * quartets). - * - * An actual implementation of the official jbig2 encoder could - * start with parts (1) and (2), and would then compress the quartets - * according to the standards requirements (e.g., Huffman or - * arithmetic coding of coordinate differences and image templates). - * - * The low-level part of the encoder provided here has the - * following useful features: - * - * ~ It is accurate in the identification of templates - * and classes because it uses a windowed hausdorff - * distance metric. - * ~ It is accurate in the placement of the connected - * components, doing a two step process of first aligning - * the the centroids of the template with those of each instance, - * and then making a further correction of up to +- 1 pixel - * in each direction to best align the templates. - * ~ It is fast because it uses a morphologically based - * matching algorithm to implement the hausdorff criterion, - * and it selects the patterns that are possible matches - * based on their size. - * - * We provide two different matching functions, one using Hausdorff - * distance and one using a simple image correlation. - * The Hausdorff method sometimes produces better results for the - * same number of classes, because it gives a relatively small - * effective weight to foreground pixels near the boundary, - * and a relatively large weight to foreground pixels that are - * not near the boundary. By effectively ignoring these boundary - * pixels, Hausdorff weighting corresponds better to the expected - * probabilities of the pixel values in a scanned image, where the - * variations in instances of the same printed character are much - * more likely to be in pixels near the boundary. By contrast, - * the correlation method gives equal weight to all foreground pixels. - * - * For best results, use the correlation method. Correlation takes - * the number of fg pixels in the AND of instance and template, - * divided by the product of the number of fg pixels in instance - * and template. It compares this with a threshold that, in - * general, depends on the fractional coverage of the template. - * For heavy text, the threshold is raised above that for light - * text, By using both these parameters (basic threshold and - * adjustment factor for text weight), one has more flexibility - * and can arrive at the fewest substitution errors, although - * this comes at the price of more templates. - * - * The strict Hausdorff scoring is not a rank weighting, because a - * single pixel beyond the given distance will cause a match - * failure. A rank Hausdorff is more robust to non-boundary noise, - * but it is also more susceptible to confusing components that - * should be in different classes. For implementing a jbig2 - * application for visually lossless binary image compression, - * you have two choices: - * - * (1) use a 3x3 structuring element (size = 3) and a strict - * Hausdorff comparison (rank = 1.0 in the rank Hausdorff - * function). This will result in a minimal number of classes, - * but confusion of small characters, such as italic and - * non-italic lower-case 'o', can still occur. - * (2) use the correlation method with a threshold of 0.85 - * and a weighting factor of about 0.7. This will result in - * a larger number of classes, but should not be confused - * either by similar small characters or by extremely - * thick sans serif characters, such as in prog/cootoots.png. - * - * As mentioned above, if visual substitution errors must be - * avoided, you should use the correlation method. - * - * We provide executables that show how to do the encoding: - * prog/jbrankhaus.c - * prog/jbcorrelation.c - * - * The basic flow for correlation classification goes as follows, - * where specific choices have been made for parameters (Hausdorff - * is the same except for initialization): - * - * // Initialize and save data in the classer - * JBCLASSER *classer = - * jbCorrelationInit(JB_CONN_COMPS, 0, 0, 0.8, 0.7); - * SARRAY *safiles = getSortedPathnamesInDirectory(directory, - * NULL, 0, 0); - * jbAddPages(classer, safiles); - * - * // Save the data in a data structure for serialization, - * // and write it into two files. - * JBDATA *data = jbDataSave(classer); - * jbDataWrite(rootname, data); - * - * // Reconstruct (render) the pages from the encoded data. - * PIXA *pixa = jbDataRender(data, FALSE); - * - * Adam Langley has built a jbig2 standards-compliant encoder, the - * first one to appear in open source. You can get this encoder at: - * http://www.imperialviolet.org/jbig2.html - * - * It uses arithmetic encoding throughout. It encodes binary images - * losslessly with a single arithmetic coding over the full image. - * It also does both lossy and lossless encoding from connected - * components, using leptonica to generate the templates representing - * each cluster. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -static const l_int32 L_BUF_SIZE = 512; - - /* For jbClassifyRankHaus(): size of border added around - * pix of each c.c., to allow further processing. This - * should be at least the sum of the MAX_DIFF_HEIGHT - * (or MAX_DIFF_WIDTH) and one-half the size of the Sel */ -static const l_int32 JB_ADDED_PIXELS = 6; - - /* For pixHaustest(), pixRankHaustest() and pixCorrelationScore(): - * choose these to be 2 or greater */ -static const l_int32 MAX_DIFF_WIDTH = 2; /* use at least 2 */ -static const l_int32 MAX_DIFF_HEIGHT = 2; /* use at least 2 */ - - /* In initialization, you have the option to discard components - * (cc, characters or words) that have either width or height larger - * than a given size. This is convenient for jbDataSave(), because - * the components are placed onto a regular lattice with cell - * dimension equal to the maximum component size. The default - * values are given here. If you want to save all components, - * use a sufficiently large set of dimensions. */ -static const l_int32 MAX_CONN_COMP_WIDTH = 350; /* default max cc width */ -static const l_int32 MAX_CHAR_COMP_WIDTH = 350; /* default max char width */ -static const l_int32 MAX_WORD_COMP_WIDTH = 1000; /* default max word width */ -static const l_int32 MAX_COMP_HEIGHT = 120; /* default max component height */ - - /* This stores the state of a state machine which fetches - * similar sized templates */ -struct JbFindTemplatesState -{ - JBCLASSER *classer; /* classer */ - l_int32 w; /* desired width */ - l_int32 h; /* desired height */ - l_int32 i; /* index into two_by_two step array */ - L_DNA *dna; /* current number array */ - l_int32 n; /* current element of dna */ -}; -typedef struct JbFindTemplatesState JBFINDCTX; - - /* Static initialization function */ -static JBCLASSER * jbCorrelationInitInternal(l_int32 components, - l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, - l_float32 weightfactor, l_int32 keep_components); - - /* Static helper functions */ -static JBFINDCTX * findSimilarSizedTemplatesInit(JBCLASSER *classer, PIX *pixs); -static l_int32 findSimilarSizedTemplatesNext(JBFINDCTX *context); -static void findSimilarSizedTemplatesDestroy(JBFINDCTX **pcontext); -static l_int32 finalPositioningForAlignment(PIX *pixs, l_int32 x, l_int32 y, - l_int32 idelx, l_int32 idely, PIX *pixt, - l_int32 *sumtab, l_int32 *pdx, l_int32 *pdy); - -#ifndef NO_CONSOLE_IO -#define DEBUG_CORRELATION_SCORE 0 -#endif /* ~NO_CONSOLE_IO */ - -/*----------------------------------------------------------------------* - * Initialization * - *----------------------------------------------------------------------*/ -/*! - * \brief jbRankHausInit() - * - * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS - * \param[in] maxwidth of component; use 0 for default - * \param[in] maxheight of component; use 0 for default - * \param[in] size of square structuring element; 2, representing - * 2x2 sel, is necessary for reasonable accuracy of - * small components; combine this with rank ~ 0.97 - * to avoid undue class expansion - * \param[in] rank rank val of match, each way; in [0.5 - 1.0]; - * when using size = 2, 0.97 is a reasonable value - * \return jbclasser if OK; NULL on error - */ -JBCLASSER * -jbRankHausInit(l_int32 components, - l_int32 maxwidth, - l_int32 maxheight, - l_int32 size, - l_float32 rank) -{ -JBCLASSER *classer; - - PROCNAME("jbRankHausInit"); - - if (components != JB_CONN_COMPS && components != JB_CHARACTERS && - components != JB_WORDS) - return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL); - if (size < 1 || size > 10) - return (JBCLASSER *)ERROR_PTR("size not reasonable", procName, NULL); - if (rank < 0.5 || rank > 1.0) - return (JBCLASSER *)ERROR_PTR("rank not in [0.5-1.0]", procName, NULL); - if (maxwidth == 0) { - if (components == JB_CONN_COMPS) - maxwidth = MAX_CONN_COMP_WIDTH; - else if (components == JB_CHARACTERS) - maxwidth = MAX_CHAR_COMP_WIDTH; - else /* JB_WORDS */ - maxwidth = MAX_WORD_COMP_WIDTH; - } - if (maxheight == 0) - maxheight = MAX_COMP_HEIGHT; - - if ((classer = jbClasserCreate(JB_RANKHAUS, components)) == NULL) - return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL); - classer->maxwidth = maxwidth; - classer->maxheight = maxheight; - classer->sizehaus = size; - classer->rankhaus = rank; - classer->dahash = l_dnaHashCreate(5507, 4); /* 5507 is prime */ - classer->keep_pixaa = 1; /* keep all components in pixaa */ - return classer; -} - - -/*! - * \brief jbCorrelationInit() - * - * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS - * \param[in] maxwidth of component; use 0 for default - * \param[in] maxheight of component; use 0 for default - * \param[in] thresh value for correlation score: in [0.4 - 0.98] - * \param[in] weightfactor corrects thresh for thick characters [0.0 - 1.0] - * \return jbclasser if OK; NULL on error - * - *
- * Notes:
- *      (1) For scanned text, suggested input values are:
- *            thresh ~ [0.8 - 0.85]
- *            weightfactor ~ [0.5 - 0.6]
- *      (2) For electronically generated fonts (e.g., rasterized pdf),
- *          a very high thresh (e.g., 0.95) will not cause a significant
- *          increase in the number of classes.
- * 
- */ -JBCLASSER * -jbCorrelationInit(l_int32 components, - l_int32 maxwidth, - l_int32 maxheight, - l_float32 thresh, - l_float32 weightfactor) -{ - return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh, - weightfactor, 1); -} - -/*! - * \brief jbCorrelationInitWithoutComponents() - * - * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS - * \param[in] maxwidth of component; use 0 for default - * \param[in] maxheight of component; use 0 for default - * \param[in] thresh value for correlation score: in [0.4 - 0.98] - * \param[in] weightfactor corrects thresh for thick characters [0.0 - 1.0] - * \return jbclasser if OK; NULL on error - * - *
- * Notes:
- *      Acts the same as jbCorrelationInit(), but the resulting
- *      object doesn't keep a list of all the components.
- * 
- */ -JBCLASSER * -jbCorrelationInitWithoutComponents(l_int32 components, - l_int32 maxwidth, - l_int32 maxheight, - l_float32 thresh, - l_float32 weightfactor) -{ - return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh, - weightfactor, 0); -} - - -static JBCLASSER * -jbCorrelationInitInternal(l_int32 components, - l_int32 maxwidth, - l_int32 maxheight, - l_float32 thresh, - l_float32 weightfactor, - l_int32 keep_components) -{ -JBCLASSER *classer; - - PROCNAME("jbCorrelationInitInternal"); - - if (components != JB_CONN_COMPS && components != JB_CHARACTERS && - components != JB_WORDS) - return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL); - if (thresh < 0.4 || thresh > 0.98) - return (JBCLASSER *)ERROR_PTR("thresh not in range [0.4 - 0.98]", - procName, NULL); - if (weightfactor < 0.0 || weightfactor > 1.0) - return (JBCLASSER *)ERROR_PTR("weightfactor not in range [0.0 - 1.0]", - procName, NULL); - if (maxwidth == 0) { - if (components == JB_CONN_COMPS) - maxwidth = MAX_CONN_COMP_WIDTH; - else if (components == JB_CHARACTERS) - maxwidth = MAX_CHAR_COMP_WIDTH; - else /* JB_WORDS */ - maxwidth = MAX_WORD_COMP_WIDTH; - } - if (maxheight == 0) - maxheight = MAX_COMP_HEIGHT; - - - if ((classer = jbClasserCreate(JB_CORRELATION, components)) == NULL) - return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL); - classer->maxwidth = maxwidth; - classer->maxheight = maxheight; - classer->thresh = thresh; - classer->weightfactor = weightfactor; - classer->dahash = l_dnaHashCreate(5507, 4); /* 5507 is prime */ - classer->keep_pixaa = keep_components; - return classer; -} - - -/*----------------------------------------------------------------------* - * Classify the pages * - *----------------------------------------------------------------------*/ -/*! - * \brief jbAddPages() - * - * \param[in] jbclasser - * \param[in] safiles of page image file names - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) jbclasser makes a copy of the array of file names.
- *      (2) The caller is still responsible for destroying the input array.
- * 
- */ -l_ok -jbAddPages(JBCLASSER *classer, - SARRAY *safiles) -{ -l_int32 i, nfiles; -char *fname; -PIX *pix; - - PROCNAME("jbAddPages"); - - if (!classer) - return ERROR_INT("classer not defined", procName, 1); - if (!safiles) - return ERROR_INT("safiles not defined", procName, 1); - - classer->safiles = sarrayCopy(safiles); - nfiles = sarrayGetCount(safiles); - for (i = 0; i < nfiles; i++) { - fname = sarrayGetString(safiles, i, L_NOCOPY); - if ((pix = pixRead(fname)) == NULL) { - L_WARNING("image file %d not read\n", procName, i); - continue; - } - if (pixGetDepth(pix) != 1) { - L_WARNING("image file %d not 1 bpp\n", procName, i); - continue; - } - jbAddPage(classer, pix); - pixDestroy(&pix); - } - - return 0; -} - - -/*! - * \brief jbAddPage() - * - * \param[in] jbclasser - * \param[in] pixs input page - * \return 0 if OK; 1 on error - */ -l_ok -jbAddPage(JBCLASSER *classer, - PIX *pixs) -{ -BOXA *boxas; -PIXA *pixas; - - PROCNAME("jbAddPage"); - - if (!classer) - return ERROR_INT("classer not defined", procName, 1); - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - - classer->w = pixGetWidth(pixs); - classer->h = pixGetHeight(pixs); - - /* Get the appropriate components and their bounding boxes */ - if (jbGetComponents(pixs, classer->components, classer->maxwidth, - classer->maxheight, &boxas, &pixas)) { - return ERROR_INT("components not made", procName, 1); - } - - jbAddPageComponents(classer, pixs, boxas, pixas); - boxaDestroy(&boxas); - pixaDestroy(&pixas); - return 0; -} - - -/*! - * \brief jbAddPageComponents() - * - * \param[in] jbclasser - * \param[in] pixs input page - * \param[in] boxas b.b. of components for this page - * \param[in] pixas components for this page - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) If there are no components on the page, we don't require input
- *          of empty boxas or pixas, although that's the typical situation.
- * 
- */ -l_ok -jbAddPageComponents(JBCLASSER *classer, - PIX *pixs, - BOXA *boxas, - PIXA *pixas) -{ -l_int32 n; - - PROCNAME("jbAddPageComponents"); - - if (!classer) - return ERROR_INT("classer not defined", procName, 1); - if (!pixs) - return ERROR_INT("pix not defined", procName, 1); - - /* Test for no components on the current page. Always update the - * number of pages processed, even if nothing is on it. */ - if (!boxas || !pixas || (boxaGetCount(boxas) == 0)) { - classer->npages++; - return 0; - } - - /* Get classes. For hausdorff, it uses a specified size of - * structuring element and specified rank. For correlation, - * it uses a specified threshold. */ - if (classer->method == JB_RANKHAUS) { - if (jbClassifyRankHaus(classer, boxas, pixas)) - return ERROR_INT("rankhaus classification failed", procName, 1); - } else { /* classer->method == JB_CORRELATION */ - if (jbClassifyCorrelation(classer, boxas, pixas)) - return ERROR_INT("correlation classification failed", procName, 1); - } - - /* Find the global UL corners, adjusted for each instance so - * that the class template and instance will have their - * centroids in the same place. Then the template can be - * used to replace the instance. */ - if (jbGetULCorners(classer, pixs, boxas)) - return ERROR_INT("UL corners not found", procName, 1); - - /* Update total component counts and number of pages processed. */ - n = boxaGetCount(boxas); - classer->baseindex += n; - numaAddNumber(classer->nacomps, n); - classer->npages++; - return 0; -} - - -/*----------------------------------------------------------------------* - * Classification using windowed rank hausdorff metric * - *----------------------------------------------------------------------*/ -/*! - * \brief jbClassifyRankHaus() - * - * \param[in] jbclasser - * \param[in] boxa new components for classification - * \param[in] pixas new components for classification - * \return 0 if OK; 1 on error - */ -l_ok -jbClassifyRankHaus(JBCLASSER *classer, - BOXA *boxa, - PIXA *pixas) -{ -l_int32 n, nt, i, wt, ht, iclass, size, found, testval; -l_int32 npages, area1, area3; -l_int32 *tab8; -l_float32 rank, x1, y1, x2, y2; -BOX *box; -NUMA *naclass, *napage; -NUMA *nafg; /* fg area of all instances */ -NUMA *nafgt; /* fg area of all templates */ -JBFINDCTX *findcontext; -L_DNAHASH *dahash; -PIX *pix, *pix1, *pix2, *pix3, *pix4; -PIXA *pixa, *pixa1, *pixa2, *pixat, *pixatd; -PIXAA *pixaa; -PTA *pta, *ptac, *ptact; -SEL *sel; - - PROCNAME("jbClassifyRankHaus"); - - if (!classer) - return ERROR_INT("classer not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (!pixas) - return ERROR_INT("pixas not defined", procName, 1); - if ((n = pixaGetCount(pixas)) == 0) - return ERROR_INT("pixas is empty", procName, 1); - if ((nafg = pixaCountPixels(pixas)) == NULL) /* areas for this page */ - return ERROR_INT("fg counting failed", procName, 1); - - npages = classer->npages; - size = classer->sizehaus; - sel = selCreateBrick(size, size, size / 2, size / 2, SEL_HIT); - - /* Generate the bordered pixa, with and without dilation. - * pixa1 and pixa2 contain all the input components. */ - pixa1 = pixaCreate(n); - pixa2 = pixaCreate(n); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixas, i, L_CLONE); - pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS, - JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0); - pix2 = pixDilate(NULL, pix1, sel); - pixaAddPix(pixa1, pix1, L_INSERT); /* un-dilated */ - pixaAddPix(pixa2, pix2, L_INSERT); /* dilated */ - pixDestroy(&pix); - } - - /* Get the centroids of all the bordered images. - * These are relative to the UL corner of each (bordered) pix. */ - pta = pixaCentroids(pixa1); /* centroids for this page; use here */ - ptac = classer->ptac; /* holds centroids of components up to this page */ - ptaJoin(ptac, pta, 0, -1); /* save centroids of all components */ - ptact = classer->ptact; /* holds centroids of templates */ - - /* Use these to save the class and page of each component. */ - naclass = classer->naclass; - napage = classer->napage; - - /* Store the unbordered pix in a pixaa, in a hierarchical - * set of arrays. There is one pixa for each class, - * and the pix in each pixa are all the instances found - * of that class. This is actually more than one would need - * for a jbig2 encoder, but there are two reasons to keep - * them around: (1) the set of instances for each class - * can be used to make an improved binary (or, better, - * a grayscale) template, rather than simply using the first - * one in the set; (2) we can investigate the failures - * of the classifier. This pixaa grows as we process - * successive pages. */ - pixaa = classer->pixaa; - - /* arrays to store class exemplars (templates) */ - pixat = classer->pixat; /* un-dilated */ - pixatd = classer->pixatd; /* dilated */ - - /* Fill up the pixaa tree with the template exemplars as - * the first pix in each pixa. As we add each pix, - * we also add the associated box to the pixa. - * We also keep track of the centroid of each pix, - * and use the difference between centroids (of the - * pix with the exemplar we are checking it with) - * to align the two when checking that the Hausdorff - * distance does not exceed a threshold. - * The threshold is set by the Sel used for dilating. - * For example, a 3x3 brick, sel_3, corresponds to a - * Hausdorff distance of 1. In general, for an NxN brick, - * with N odd, corresponds to a Hausdorff distance of (N - 1)/2. - * It turns out that we actually need to use a sel of size 2x2 - * to avoid small bad components when there is a halftone image - * from which components can be chosen. - * The larger the Sel you use, the fewer the number of classes, - * and the greater the likelihood of putting semantically - * different objects in the same class. For simplicity, - * we do this separately for the case of rank == 1.0 (exact - * match within the Hausdorff distance) and rank < 1.0. */ - rank = classer->rankhaus; - dahash = classer->dahash; - if (rank == 1.0) { - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixa1, i, L_CLONE); - pix2 = pixaGetPix(pixa2, i, L_CLONE); - ptaGetPt(pta, i, &x1, &y1); - nt = pixaGetCount(pixat); /* number of templates */ - found = FALSE; - findcontext = findSimilarSizedTemplatesInit(classer, pix1); - while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) { - /* Find score for this template */ - pix3 = pixaGetPix(pixat, iclass, L_CLONE); - pix4 = pixaGetPix(pixatd, iclass, L_CLONE); - ptaGetPt(ptact, iclass, &x2, &y2); - testval = pixHaustest(pix1, pix2, pix3, pix4, x1 - x2, y1 - y2, - MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT); - pixDestroy(&pix3); - pixDestroy(&pix4); - if (testval == 1) { - found = TRUE; - numaAddNumber(naclass, iclass); - numaAddNumber(napage, npages); - if (classer->keep_pixaa) { - pixa = pixaaGetPixa(pixaa, iclass, L_CLONE); - pix = pixaGetPix(pixas, i, L_CLONE); - pixaAddPix(pixa, pix, L_INSERT); - box = boxaGetBox(boxa, i, L_CLONE); - pixaAddBox(pixa, box, L_INSERT); - pixaDestroy(&pixa); - } - break; - } - } - findSimilarSizedTemplatesDestroy(&findcontext); - if (found == FALSE) { /* new class */ - numaAddNumber(naclass, nt); - numaAddNumber(napage, npages); - pixa = pixaCreate(0); - pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */ - pixaAddPix(pixa, pix, L_INSERT); - wt = pixGetWidth(pix); - ht = pixGetHeight(pix); - l_dnaHashAdd(dahash, (l_uint64)ht * wt, nt); - box = boxaGetBox(boxa, i, L_CLONE); - pixaAddBox(pixa, box, L_INSERT); - pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */ - ptaAddPt(ptact, x1, y1); - pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */ - pixaAddPix(pixatd, pix2, L_INSERT); /* bordered dil template */ - } else { /* don't save them */ - pixDestroy(&pix1); - pixDestroy(&pix2); - } - } - } else { /* rank < 1.0 */ - nafgt = classer->nafgt; - tab8 = makePixelSumTab8(); - for (i = 0; i < n; i++) { /* all instances on this page */ - pix1 = pixaGetPix(pixa1, i, L_CLONE); - numaGetIValue(nafg, i, &area1); - pix2 = pixaGetPix(pixa2, i, L_CLONE); - ptaGetPt(pta, i, &x1, &y1); /* use pta for this page */ - nt = pixaGetCount(pixat); /* number of templates */ - found = FALSE; - findcontext = findSimilarSizedTemplatesInit(classer, pix1); - while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) { - /* Find score for this template */ - pix3 = pixaGetPix(pixat, iclass, L_CLONE); - numaGetIValue(nafgt, iclass, &area3); - pix4 = pixaGetPix(pixatd, iclass, L_CLONE); - ptaGetPt(ptact, iclass, &x2, &y2); - testval = pixRankHaustest(pix1, pix2, pix3, pix4, - x1 - x2, y1 - y2, - MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, - area1, area3, rank, tab8); - pixDestroy(&pix3); - pixDestroy(&pix4); - if (testval == 1) { /* greedy match; take the first */ - found = TRUE; - numaAddNumber(naclass, iclass); - numaAddNumber(napage, npages); - if (classer->keep_pixaa) { - pixa = pixaaGetPixa(pixaa, iclass, L_CLONE); - pix = pixaGetPix(pixas, i, L_CLONE); - pixaAddPix(pixa, pix, L_INSERT); - box = boxaGetBox(boxa, i, L_CLONE); - pixaAddBox(pixa, box, L_INSERT); - pixaDestroy(&pixa); - } - break; - } - } - findSimilarSizedTemplatesDestroy(&findcontext); - if (found == FALSE) { /* new class */ - numaAddNumber(naclass, nt); - numaAddNumber(napage, npages); - pixa = pixaCreate(0); - pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */ - pixaAddPix(pixa, pix, L_INSERT); - wt = pixGetWidth(pix); - ht = pixGetHeight(pix); - l_dnaHashAdd(dahash, (l_uint64)ht * wt, nt); - box = boxaGetBox(boxa, i, L_CLONE); - pixaAddBox(pixa, box, L_INSERT); - pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */ - ptaAddPt(ptact, x1, y1); - pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */ - pixaAddPix(pixatd, pix2, L_INSERT); /* ditto */ - numaAddNumber(nafgt, area1); - } else { /* don't save them */ - pixDestroy(&pix1); - pixDestroy(&pix2); - } - } - LEPT_FREE(tab8); - } - classer->nclass = pixaGetCount(pixat); - - numaDestroy(&nafg); - ptaDestroy(&pta); - pixaDestroy(&pixa1); - pixaDestroy(&pixa2); - selDestroy(&sel); - return 0; -} - - -/*! - * \brief pixHaustest() - * - * \param[in] pix1 new pix, not dilated - * \param[in] pix2 new pix, dilated - * \param[in] pix3 exemplar pix, not dilated - * \param[in] pix4 exemplar pix, dilated - * \param[in] delx x comp of centroid difference - * \param[in] dely y comp of centroid difference - * \param[in] maxdiffw max width difference of pix1 and pix2 - * \param[in] maxdiffh max height difference of pix1 and pix2 - * \return 0 FALSE) if no match, 1 (TRUE if the new - * pix is in the same class as the exemplar. - * - *
- * Notes:
- *  We check first that the two pix are roughly
- *  the same size.  Only if they meet that criterion do
- *  we compare the bitmaps.  The Hausdorff is a 2-way
- *  check.  The centroid difference is used to align the two
- *  images to the nearest integer for each of the checks.
- *  These check that the dilated image of one contains
- *  ALL the pixels of the undilated image of the other.
- *  Checks are done in both direction.  A single pixel not
- *  contained in either direction results in failure of the test.
- * 
- */ -l_int32 -pixHaustest(PIX *pix1, - PIX *pix2, - PIX *pix3, - PIX *pix4, - l_float32 delx, /* x(1) - x(3) */ - l_float32 dely, /* y(1) - y(3) */ - l_int32 maxdiffw, - l_int32 maxdiffh) -{ -l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch; -PIX *pixt; - - /* Eliminate possible matches based on size difference */ - wi = pixGetWidth(pix1); - hi = pixGetHeight(pix1); - wt = pixGetWidth(pix3); - ht = pixGetHeight(pix3); - delw = L_ABS(wi - wt); - if (delw > maxdiffw) - return FALSE; - delh = L_ABS(hi - ht); - if (delh > maxdiffh) - return FALSE; - - /* Round difference in centroid location to nearest integer; - * use this as a shift when doing the matching. */ - if (delx >= 0) - idelx = (l_int32)(delx + 0.5); - else - idelx = (l_int32)(delx - 0.5); - if (dely >= 0) - idely = (l_int32)(dely + 0.5); - else - idely = (l_int32)(dely - 0.5); - - /* Do 1-direction hausdorff, checking that every pixel in pix1 - * is within a dilation distance of some pixel in pix3. Namely, - * that pix4 entirely covers pix1: - * pixt = pixSubtract(NULL, pix1, pix4), including shift - * where pixt has no ON pixels. */ - pixt = pixCreateTemplate(pix1); - pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0); - pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC), - pix4, 0, 0); - pixZero(pixt, &boolmatch); - if (boolmatch == 0) { - pixDestroy(&pixt); - return FALSE; - } - - /* Do 1-direction hausdorff, checking that every pixel in pix3 - * is within a dilation distance of some pixel in pix1. Namely, - * that pix2 entirely covers pix3: - * pixSubtract(pixt, pix3, pix2), including shift - * where pixt has no ON pixels. */ - pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0); - pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0); - pixZero(pixt, &boolmatch); - pixDestroy(&pixt); - return boolmatch; -} - - -/*! - * \brief pixRankHaustest() - * - * \param[in] pix1 new pix, not dilated - * \param[in] pix2 new pix, dilated - * \param[in] pix3 exemplar pix, not dilated - * \param[in] pix4 exemplar pix, dilated - * \param[in] delx x comp of centroid difference - * \param[in] dely y comp of centroid difference - * \param[in] maxdiffw max width difference of pix1 and pix2 - * \param[in] maxdiffh max height difference of pix1 and pix2 - * \param[in] area1 fg pixels in pix1 - * \param[in] area3 fg pixels in pix3 - * \param[in] rank rank value of test, each way - * \param[in] tab8 table of pixel sums for byte - * \return 0 FALSE) if no match, 1 (TRUE if the new - * pix is in the same class as the exemplar. - * - *
- * Notes:
- *  We check first that the two pix are roughly
- *  the same size.  Only if they meet that criterion do
- *  we compare the bitmaps.  We convert the rank value to
- *  a number of pixels by multiplying the rank fraction by the number
- *  of pixels in the undilated image.  The Hausdorff is a 2-way
- *  check.  The centroid difference is used to align the two
- *  images to the nearest integer for each of the checks.
- *  The rank hausdorff checks that the dilated image of one
- *  contains the rank fraction of the pixels of the undilated
- *  image of the other.   Checks are done in both direction.
- *  Failure of the test in either direction results in failure
- *  of the test.
- * 
- */ -l_int32 -pixRankHaustest(PIX *pix1, - PIX *pix2, - PIX *pix3, - PIX *pix4, - l_float32 delx, /* x(1) - x(3) */ - l_float32 dely, /* y(1) - y(3) */ - l_int32 maxdiffw, - l_int32 maxdiffh, - l_int32 area1, - l_int32 area3, - l_float32 rank, - l_int32 *tab8) -{ -l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch; -l_int32 thresh1, thresh3; -PIX *pixt; - - /* Eliminate possible matches based on size difference */ - wi = pixGetWidth(pix1); - hi = pixGetHeight(pix1); - wt = pixGetWidth(pix3); - ht = pixGetHeight(pix3); - delw = L_ABS(wi - wt); - if (delw > maxdiffw) - return FALSE; - delh = L_ABS(hi - ht); - if (delh > maxdiffh) - return FALSE; - - /* Upper bounds in remaining pixels for allowable match */ - thresh1 = (l_int32)(area1 * (1. - rank) + 0.5); - thresh3 = (l_int32)(area3 * (1. - rank) + 0.5); - - /* Round difference in centroid location to nearest integer; - * use this as a shift when doing the matching. */ - if (delx >= 0) - idelx = (l_int32)(delx + 0.5); - else - idelx = (l_int32)(delx - 0.5); - if (dely >= 0) - idely = (l_int32)(dely + 0.5); - else - idely = (l_int32)(dely - 0.5); - - /* Do 1-direction hausdorff, checking that every pixel in pix1 - * is within a dilation distance of some pixel in pix3. Namely, - * that pix4 entirely covers pix1: - * pixt = pixSubtract(NULL, pix1, pix4), including shift - * where pixt has no ON pixels. */ - pixt = pixCreateTemplate(pix1); - pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0); - pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC), - pix4, 0, 0); - pixThresholdPixelSum(pixt, thresh1, &boolmatch, tab8); - if (boolmatch == 1) { /* above thresh1 */ - pixDestroy(&pixt); - return FALSE; - } - - /* Do 1-direction hausdorff, checking that every pixel in pix3 - * is within a dilation distance of some pixel in pix1. Namely, - * that pix2 entirely covers pix3: - * pixSubtract(pixt, pix3, pix2), including shift - * where pixt has no ON pixels. */ - pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0); - pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0); - pixThresholdPixelSum(pixt, thresh3, &boolmatch, tab8); - pixDestroy(&pixt); - if (boolmatch == 1) /* above thresh3 */ - return FALSE; - else - return TRUE; -} - - -/*----------------------------------------------------------------------* - * Classification using windowed correlation score * - *----------------------------------------------------------------------*/ -/*! - * \brief jbClassifyCorrelation() - * - * \param[in] jbclasser - * \param[in] boxa new components for classification - * \param[in] pixas new components for classification - * \return 0 if OK; 1 on error - */ -l_ok -jbClassifyCorrelation(JBCLASSER *classer, - BOXA *boxa, - PIXA *pixas) -{ -l_int32 n, nt, i, iclass, wt, ht, found, area, area1, area2, npages, - overthreshold; -l_int32 *sumtab, *centtab; -l_uint32 *row, word; -l_float32 x1, y1, x2, y2, xsum, ysum; -l_float32 thresh, weight, threshold; -BOX *box; -NUMA *naclass, *napage; -NUMA *nafgt; /* fg area of all templates */ -NUMA *naarea; /* w * h area of all templates */ -JBFINDCTX *findcontext; -L_DNAHASH *dahash; -PIX *pix, *pix1, *pix2; -PIXA *pixa, *pixa1, *pixat; -PIXAA *pixaa; -PTA *pta, *ptac, *ptact; -l_int32 *pixcts; /* pixel counts of each pixa */ -l_int32 **pixrowcts; /* row-by-row pixel counts of each pixa */ -l_int32 x, y, rowcount, downcount, wpl; -l_uint8 byte; - - PROCNAME("jbClassifyCorrelation"); - - if (!classer) - return ERROR_INT("classer not found", procName, 1); - if (!boxa) - return ERROR_INT("boxa not found", procName, 1); - if (!pixas) - return ERROR_INT("pixas not found", procName, 1); - - npages = classer->npages; - - /* Generate the bordered pixa, which contains all the the - * input components. This will not be saved. */ - if ((n = pixaGetCount(pixas)) == 0) { - L_WARNING("pixas is empty\n", procName); - return 0; - } - pixa1 = pixaCreate(n); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixas, i, L_CLONE); - pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS, - JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0); - pixaAddPix(pixa1, pix1, L_INSERT); - pixDestroy(&pix); - } - - /* Use these to save the class and page of each component. */ - naclass = classer->naclass; - napage = classer->napage; - - /* Get the number of fg pixels in each component. */ - nafgt = classer->nafgt; /* holds fg areas of the templates */ - sumtab = makePixelSumTab8(); - - pixcts = (l_int32 *)LEPT_CALLOC(n, sizeof(*pixcts)); - pixrowcts = (l_int32 **)LEPT_CALLOC(n, sizeof(*pixrowcts)); - centtab = makePixelCentroidTab8(); - - /* Count the "1" pixels in each row of the pix in pixa1; this - * allows pixCorrelationScoreThresholded to abort early if a match - * is impossible. This loop merges three calculations: the total - * number of "1" pixels, the number of "1" pixels in each row, and - * the centroid. The centroids are relative to the UL corner of - * each (bordered) pix. The pixrowcts[i][y] are the total number - * of fg pixels in pixa[i] below row y. */ - pta = ptaCreate(n); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa1, i, L_CLONE); - pixrowcts[i] = (l_int32 *)LEPT_CALLOC(pixGetHeight(pix), - sizeof(**pixrowcts)); - xsum = 0; - ysum = 0; - wpl = pixGetWpl(pix); - row = pixGetData(pix) + (pixGetHeight(pix) - 1) * wpl; - downcount = 0; - for (y = pixGetHeight(pix) - 1; y >= 0; y--, row -= wpl) { - pixrowcts[i][y] = downcount; - rowcount = 0; - for (x = 0; x < wpl; x++) { - word = row[x]; - byte = word & 0xff; - rowcount += sumtab[byte]; - xsum += centtab[byte] + (x * 32 + 24) * sumtab[byte]; - byte = (word >> 8) & 0xff; - rowcount += sumtab[byte]; - xsum += centtab[byte] + (x * 32 + 16) * sumtab[byte]; - byte = (word >> 16) & 0xff; - rowcount += sumtab[byte]; - xsum += centtab[byte] + (x * 32 + 8) * sumtab[byte]; - byte = (word >> 24) & 0xff; - rowcount += sumtab[byte]; - xsum += centtab[byte] + x * 32 * sumtab[byte]; - } - downcount += rowcount; - ysum += rowcount * y; - } - pixcts[i] = downcount; - if (downcount > 0) { - ptaAddPt(pta, - xsum / (l_float32)downcount, ysum / (l_float32)downcount); - } else { /* no pixels; shouldn't happen */ - L_ERROR("downcount == 0 !\n", procName); - ptaAddPt(pta, pixGetWidth(pix) / 2, pixGetHeight(pix) / 2); - } - pixDestroy(&pix); - } - - ptac = classer->ptac; /* holds centroids of components up to this page */ - ptaJoin(ptac, pta, 0, -1); /* save centroids of all components */ - ptact = classer->ptact; /* holds centroids of templates */ - - /* Store the unbordered pix in a pixaa, in a hierarchical - * set of arrays. There is one pixa for each class, - * and the pix in each pixa are all the instances found - * of that class. This is actually more than one would need - * for a jbig2 encoder, but there are two reasons to keep - * them around: (1) the set of instances for each class - * can be used to make an improved binary (or, better, - * a grayscale) template, rather than simply using the first - * one in the set; (2) we can investigate the failures - * of the classifier. This pixaa grows as we process - * successive pages. */ - pixaa = classer->pixaa; - - /* Array to store class exemplars */ - pixat = classer->pixat; - - /* Fill up the pixaa tree with the template exemplars as - * the first pix in each pixa. As we add each pix, - * we also add the associated box to the pixa. - * We also keep track of the centroid of each pix, - * and use the difference between centroids (of the - * pix with the exemplar we are checking it with) - * to align the two when checking that the correlation - * score exceeds a threshold. The correlation score - * is given by the square of the area of the AND - * between aligned instance and template, divided by - * the product of areas of each image. For identical - * template and instance, the score is 1.0. - * If the threshold is too small, non-equivalent instances - * will be placed in the same class; if too large, there will - * be an unnecessary division of classes representing the - * same character. The weightfactor adds in some of the - * difference (1.0 - thresh), depending on the heaviness - * of the template (measured as the fraction of fg pixels). */ - thresh = classer->thresh; - weight = classer->weightfactor; - naarea = classer->naarea; - dahash = classer->dahash; - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixa1, i, L_CLONE); - area1 = pixcts[i]; - ptaGetPt(pta, i, &x1, &y1); /* centroid for this instance */ - nt = pixaGetCount(pixat); - found = FALSE; - findcontext = findSimilarSizedTemplatesInit(classer, pix1); - while ( (iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) { - /* Get the template */ - pix2 = pixaGetPix(pixat, iclass, L_CLONE); - numaGetIValue(nafgt, iclass, &area2); - ptaGetPt(ptact, iclass, &x2, &y2); /* template centroid */ - - /* Find threshold for this template */ - if (weight > 0.0) { - numaGetIValue(naarea, iclass, &area); - threshold = thresh + (1. - thresh) * weight * area2 / area; - } else { - threshold = thresh; - } - - /* Find score for this template */ - overthreshold = pixCorrelationScoreThresholded(pix1, pix2, - area1, area2, x1 - x2, y1 - y2, - MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, - sumtab, pixrowcts[i], threshold); -#if DEBUG_CORRELATION_SCORE - { - l_float32 score, testscore; - l_int32 count, testcount; - pixCorrelationScore(pix1, pix2, area1, area2, x1 - x2, y1 - y2, - MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, - sumtab, &score); - - pixCorrelationScoreSimple(pix1, pix2, area1, area2, - x1 - x2, y1 - y2, MAX_DIFF_WIDTH, - MAX_DIFF_HEIGHT, sumtab, &testscore); - count = (l_int32)rint(sqrt(score * area1 * area2)); - testcount = (l_int32)rint(sqrt(testscore * area1 * area2)); - if ((score >= threshold) != (testscore >= threshold)) { - lept_stderr("Correlation score mismatch: " - "%d(%g,%d) vs %d(%g,%d) (%g)\n", - count, score, score >= threshold, - testcount, testscore, testscore >= threshold, - score - testscore); - } - - if ((score >= threshold) != overthreshold) { - lept_stderr("Mismatch between correlation/threshold " - "comparison: %g(%g,%d) >= %g(%g) vs %s\n", - score, score*area1*area2, count, threshold, - threshold*area1*area2, - (overthreshold ? "true" : "false")); - } - } -#endif /* DEBUG_CORRELATION_SCORE */ - pixDestroy(&pix2); - - if (overthreshold) { /* greedy match */ - found = TRUE; - numaAddNumber(naclass, iclass); - numaAddNumber(napage, npages); - if (classer->keep_pixaa) { - /* We are keeping a record of all components */ - pixa = pixaaGetPixa(pixaa, iclass, L_CLONE); - pix = pixaGetPix(pixas, i, L_CLONE); - pixaAddPix(pixa, pix, L_INSERT); - box = boxaGetBox(boxa, i, L_CLONE); - pixaAddBox(pixa, box, L_INSERT); - pixaDestroy(&pixa); - } - break; - } - } - findSimilarSizedTemplatesDestroy(&findcontext); - if (found == FALSE) { /* new class */ - numaAddNumber(naclass, nt); - numaAddNumber(napage, npages); - pixa = pixaCreate(0); - pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */ - pixaAddPix(pixa, pix, L_INSERT); - wt = pixGetWidth(pix); - ht = pixGetHeight(pix); - l_dnaHashAdd(dahash, (l_uint64)ht * wt, nt); - box = boxaGetBox(boxa, i, L_CLONE); - pixaAddBox(pixa, box, L_INSERT); - pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */ - ptaAddPt(ptact, x1, y1); - numaAddNumber(nafgt, area1); - pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */ - area = (pixGetWidth(pix1) - 2 * JB_ADDED_PIXELS) * - (pixGetHeight(pix1) - 2 * JB_ADDED_PIXELS); - numaAddNumber(naarea, area); - } else { /* don't save it */ - pixDestroy(&pix1); - } - } - classer->nclass = pixaGetCount(pixat); - - LEPT_FREE(pixcts); - LEPT_FREE(centtab); - for (i = 0; i < n; i++) { - LEPT_FREE(pixrowcts[i]); - } - LEPT_FREE(pixrowcts); - - LEPT_FREE(sumtab); - ptaDestroy(&pta); - pixaDestroy(&pixa1); - return 0; -} - - -/*----------------------------------------------------------------------* - * Determine the image components we start with * - *----------------------------------------------------------------------*/ -/*! - * \brief jbGetComponents() - * - * \param[in] pixs 1 bpp - * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS - * \param[in] maxwidth of saved components; larger are discarded - * \param[in] maxheight of saved components; larger are discarded - * \param[out] ppboxa b.b. of component items - * \param[out] pppixa component items - * \return 0 if OK, 1 on error - */ -l_ok -jbGetComponents(PIX *pixs, - l_int32 components, - l_int32 maxwidth, - l_int32 maxheight, - BOXA **pboxad, - PIXA **ppixad) -{ -l_int32 empty, res, redfactor; -BOXA *boxa; -PIX *pix1, *pix2, *pix3; -PIXA *pixa, *pixat; - - PROCNAME("jbGetComponents"); - - if (!pboxad) - return ERROR_INT("&boxad not defined", procName, 1); - *pboxad = NULL; - if (!ppixad) - return ERROR_INT("&pixad not defined", procName, 1); - *ppixad = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (components != JB_CONN_COMPS && components != JB_CHARACTERS && - components != JB_WORDS) - return ERROR_INT("invalid components", procName, 1); - - pixZero(pixs, &empty); - if (empty) { - *pboxad = boxaCreate(0); - *ppixad = pixaCreate(0); - return 0; - } - - /* If required, preprocess input pixs. The method for both - * characters and words is to generate a connected component - * mask over the units that we want to aggregrate, which are, - * in general, sets of related connected components in pixs. - * For characters, we want to include the dots with - * 'i', 'j' and '!', so we do a small vertical closing to - * generate the mask. For words, we make a mask over all - * characters in each word. This is a bit more tricky, because - * the spacing between words is difficult to predict a priori, - * and words can be typeset with variable spacing that can - * in some cases be barely larger than the space between - * characters. The first step is to generate the mask and - * identify each of its connected components. */ - if (components == JB_CONN_COMPS) { /* no preprocessing */ - boxa = pixConnComp(pixs, &pixa, 8); - } else if (components == JB_CHARACTERS) { - pix1 = pixMorphSequence(pixs, "c1.6", 0); - boxa = pixConnComp(pix1, &pixat, 8); - pixa = pixaClipToPix(pixat, pixs); - pixDestroy(&pix1); - pixaDestroy(&pixat); - } else { /* components == JB_WORDS */ - - /* Do the operations at about 150 ppi resolution. - * It is much faster at 75 ppi, but the results are - * more accurate at 150 ppi. This will segment the - * words in body text. It can be expected that relatively - * infrequent words in a larger font will be split. */ - res = pixGetXRes(pixs); - if (res <= 200) { - redfactor = 1; - pix1 = pixClone(pixs); - } else if (res <= 400) { - redfactor = 2; - pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); - } else { - redfactor = 4; - pix1 = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); - } - - /* Estimate the word mask, at approximately 150 ppi. - * This has both very large and very small components left in. */ - pixWordMaskByDilation(pix1, &pix2, NULL, NULL); - - /* Expand the optimally dilated word mask to full res. */ - pix3 = pixExpandReplicate(pix2, redfactor); - - /* Pull out the pixels in pixs corresponding to the mask - * components in pix3. Note that above we used threshold - * levels in the reduction of 1 to insure that the resulting - * mask fully covers the input pixs. The downside of using - * a threshold of 1 is that very close characters from adjacent - * lines can be joined. But with a level of 2 or greater, - * it is necessary to use a seedfill, followed by a pixOr(): - * pixt4 = pixSeedfillBinary(NULL, pix3, pixs, 8); - * pixOr(pix3, pix3, pixt4); - * to insure that the mask coverage is complete over pixs. */ - boxa = pixConnComp(pix3, &pixat, 4); - pixa = pixaClipToPix(pixat, pixs); - pixaDestroy(&pixat); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - } - - /* Remove large components, and save the results. */ - *ppixad = pixaSelectBySize(pixa, maxwidth, maxheight, L_SELECT_IF_BOTH, - L_SELECT_IF_LTE, NULL); - *pboxad = boxaSelectBySize(boxa, maxwidth, maxheight, L_SELECT_IF_BOTH, - L_SELECT_IF_LTE, NULL); - pixaDestroy(&pixa); - boxaDestroy(&boxa); - - return 0; -} - - -/*! - * \brief pixWordMaskByDilation() - * - * \param[in] pixs 1 bpp; typ. at 75 to 150 ppi - * \param[out] pmask [optional] dilated word mask - * \param[out] psize [optional] size of good horizontal dilation - * \param[out] pixadb [optional] debug: pixa of intermediate steps - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This gives an estimate of the word masks.  See
- *          pixWordBoxesByDilation() for further filtering of the word boxes.
- *      (2) The resolution should be between 75 and 150 ppi, and the optimal
- *          dilation will be between 3 and 10.
- *      (3) A good size for dilating to get word masks is optionally returned.
- *      (4) Typically, the number of c.c. reduced with each successive
- *          dilation (stored in nadiff) decreases quickly to a minimum
- *          (where the characters in a word are joined), and then
- *          increases again as the smaller number of words are joined.
- *          For the typical case, you can then look for this minimum
- *          and dilate to get the word mask.  However, there are many
- *          cases where the function is not so simple. For example, if the
- *          pix has been upscaled 2x, the nadiff function oscillates, with
- *          every other value being zero!  And for some images it tails
- *          off without a clear minimum to indicate where to break.
- *          So a more simple and robust method is to find the dilation
- *          where the initial number of c.c. has been reduced by some
- *          fraction (we use a 70% reduction).
- * 
- */ -l_ok -pixWordMaskByDilation(PIX *pixs, - PIX **ppixm, - l_int32 *psize, - PIXA *pixadb) -{ -l_int32 i, n, ndil, maxdiff, diff, ibest; -l_int32 check, count, total, xres; -l_int32 ncc[13]; /* max dilation + 1 */ -l_int32 *diffa; -BOXA *boxa; -NUMA *nacc, *nadiff; -PIX *pix1, *pix2; - - PROCNAME("pixWordMaskByDilation"); - - if (ppixm) *ppixm = NULL; - if (psize) *psize = 0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs undefined or not 1 bpp", procName, 1); - if (!ppixm && !psize) - return ERROR_INT("no output requested", procName, 1); - - /* Find a good dilation to create the word mask, by successively - * increasing dilation size and counting the connected components. */ - pix1 = pixCopy(NULL, pixs); - ndil = 12; /* appropriate for 75 to 150 ppi */ - nacc = numaCreate(ndil + 1); - nadiff = numaCreate(ndil + 1); - for (i = 0; i <= ndil; i++) { - if (i == 0) /* first one not dilated */ - pix2 = pixCopy(NULL, pix1); - else /* successive dilation by sel_2h */ - pix2 = pixMorphSequence(pix1, "d2.1", 0); - boxa = pixConnCompBB(pix2, 4); - ncc[i] = boxaGetCount(boxa); - numaAddNumber(nacc, ncc[i]); - if (i == 0) total = ncc[0]; - if (i > 0) { - diff = ncc[i - 1] - ncc[i]; - numaAddNumber(nadiff, diff); - } - pixDestroy(&pix1); - pix1 = pix2; - boxaDestroy(&boxa); - } - pixDestroy(&pix1); - - /* Find the dilation at which the c.c. count has reduced - * to 30% of the initial value. Although 30% seems high, - * it seems better to use this but add one to ibest. */ - diffa = numaGetIArray(nadiff); - n = numaGetCount(nadiff); - maxdiff = 0; - check = TRUE; - ibest = 2; - for (i = 1; i < n; i++) { - numaGetIValue(nacc, i, &count); - if (check && count < 0.3 * total) { - ibest = i + 1; - check = FALSE; - } - diff = diffa[i]; - if (diff > maxdiff) - maxdiff = diff; - } - LEPT_FREE(diffa); - - /* Add small compensation for higher resolution */ - xres = pixGetXRes(pixs); - if (xres == 0) xres = 150; - if (xres > 110) ibest++; - if (ibest < 2) { - L_INFO("setting ibest to minimum allowed value of 2\n", procName); - ibest = 2; - } - - if (pixadb) { - lept_mkdir("lept/jb"); - {GPLOT *gplot; - NUMA *naseq; - PIX *pix3, *pix4; - L_INFO("Best dilation: %d\n", procName, L_MAX(3, ibest + 1)); - naseq = numaMakeSequence(1, 1, numaGetCount(nacc)); - pix3 = gplotGeneralPix2(naseq, nacc, GPLOT_LINES, - "/tmp/lept/jb/numcc", - "Number of cc vs. horizontal dilation", - "Sel horiz", "Number of cc"); - pixaAddPix(pixadb, pix3, L_INSERT); - numaDestroy(&naseq); - naseq = numaMakeSequence(1, 1, numaGetCount(nadiff)); - pix3 = gplotGeneralPix2(naseq, nadiff, GPLOT_LINES, - "/tmp/lept/jb/diffcc", - "Diff count of cc vs. horizontal dilation", - "Sel horiz", "Diff in cc"); - pixaAddPix(pixadb, pix3, L_INSERT); - numaDestroy(&naseq); - pix3 = pixCloseBrick(NULL, pixs, ibest + 1, 1); - pix4 = pixScaleToSize(pix3, 600, 0); - pixaAddPix(pixadb, pix4, L_INSERT); - pixDestroy(&pix3); - } - } - - if (psize) *psize = ibest + 1; - if (ppixm) - *ppixm = pixCloseBrick(NULL, pixs, ibest + 1, 1); - - numaDestroy(&nacc); - numaDestroy(&nadiff); - return 0; -} - - -/*! - * \brief pixWordBoxesByDilation() - * - * \param[in] pixs 1 bpp; typ. 75 - 200 ppi - * \param[in] minwidth saved components; smaller are discarded - * \param[in] minheight saved components; smaller are discarded - * \param[in] maxwidth saved components; larger are discarded - * \param[in] maxheight saved components; larger are discarded - * \param[out] pboxa of dilated word mask - * \param[out] psize [optional] size of good horizontal dilation - * \param[out] pixadb [optional] debug: pixa of intermediate steps - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Returns a pruned set of word boxes.
- *      (2) See pixWordMaskByDilation().
- * 
- */ -l_ok -pixWordBoxesByDilation(PIX *pixs, - l_int32 minwidth, - l_int32 minheight, - l_int32 maxwidth, - l_int32 maxheight, - BOXA **pboxa, - l_int32 *psize, - PIXA *pixadb) -{ -BOXA *boxa1, *boxa2; -PIX *pix1, *pix2; - - PROCNAME("pixWordBoxesByDilation"); - - if (psize) *psize = 0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs undefined or not 1 bpp", procName, 1); - if (!pboxa) - return ERROR_INT("&boxa not defined", procName, 1); - *pboxa = NULL; - - /* Make a first estimate of the word mask */ - if (pixWordMaskByDilation(pixs, &pix1, psize, pixadb)) - return ERROR_INT("pixWordMaskByDilation() failed", procName, 1); - - /* Prune the word mask. Get the bounding boxes of the words. - * Remove the small ones, which can be due to punctuation - * that was not joined to a word. Also remove the large ones, - * which are not likely to be words. */ - boxa1 = pixConnComp(pix1, NULL, 8); - boxa2 = boxaSelectBySize(boxa1, minwidth, minheight, L_SELECT_IF_BOTH, - L_SELECT_IF_GTE, NULL); - *pboxa = boxaSelectBySize(boxa2, maxwidth, maxheight, L_SELECT_IF_BOTH, - L_SELECT_IF_LTE, NULL); - if (pixadb) { - pix2 = pixUnpackBinary(pixs, 32, 1); - pixRenderBoxaArb(pix2, boxa1, 2, 255, 0, 0); - pixaAddPix(pixadb, pix2, L_INSERT); - pix2 = pixUnpackBinary(pixs, 32, 1); - pixRenderBoxaArb(pix2, boxa2, 2, 0, 255, 0); - pixaAddPix(pixadb, pix2, L_INSERT); - } - boxaDestroy(&boxa1); - boxaDestroy(&boxa2); - pixDestroy(&pix1); - return 0; -} - - -/*----------------------------------------------------------------------* - * Build grayscale composites (templates) * - *----------------------------------------------------------------------*/ -/*! - * \brief jbAccumulateComposites() - * - * \param[in] pixaa one pixa for each class - * \param[out] ppna number of samples used to build each composite - * \param[out] pptat centroids of bordered composites - * \return pixad accumulated sum of samples in each class, or NULL on error - * - */ -PIXA * -jbAccumulateComposites(PIXAA *pixaa, - NUMA **pna, - PTA **pptat) -{ -l_int32 n, nt, i, j, d, minw, maxw, minh, maxh, xdiff, ydiff; -l_float32 x, y, xave, yave; -NUMA *na; -PIX *pix, *pixt1, *pixt2, *pixsum; -PIXA *pixa, *pixad; -PTA *ptat, *pta; - - PROCNAME("jbAccumulateComposites"); - - if (!pptat) - return (PIXA *)ERROR_PTR("&ptat not defined", procName, NULL); - *pptat = NULL; - if (!pna) - return (PIXA *)ERROR_PTR("&na not defined", procName, NULL); - *pna = NULL; - if (!pixaa) - return (PIXA *)ERROR_PTR("pixaa not defined", procName, NULL); - - n = pixaaGetCount(pixaa, NULL); - if ((ptat = ptaCreate(n)) == NULL) - return (PIXA *)ERROR_PTR("ptat not made", procName, NULL); - *pptat = ptat; - pixad = pixaCreate(n); - na = numaCreate(n); - *pna = na; - - for (i = 0; i < n; i++) { - pixa = pixaaGetPixa(pixaa, i, L_CLONE); - nt = pixaGetCount(pixa); - numaAddNumber(na, nt); - if (nt == 0) { - L_WARNING("empty pixa found!\n", procName); - pixaDestroy(&pixa); - continue; - } - pixaSizeRange(pixa, &minw, &minh, &maxw, &maxh); - pix = pixaGetPix(pixa, 0, L_CLONE); - d = pixGetDepth(pix); - pixDestroy(&pix); - pixt1 = pixCreate(maxw, maxh, d); - pixsum = pixInitAccumulate(maxw, maxh, 0); - pta = pixaCentroids(pixa); - - /* Find the average value of the centroids ... */ - xave = yave = 0; - for (j = 0; j < nt; j++) { - ptaGetPt(pta, j, &x, &y); - xave += x; - yave += y; - } - xave = xave / (l_float32)nt; - yave = yave / (l_float32)nt; - - /* and place all centroids at their average value */ - for (j = 0; j < nt; j++) { - pixt2 = pixaGetPix(pixa, j, L_CLONE); - ptaGetPt(pta, j, &x, &y); - xdiff = (l_int32)(x - xave); - ydiff = (l_int32)(y - yave); - pixClearAll(pixt1); - pixRasterop(pixt1, xdiff, ydiff, maxw, maxh, PIX_SRC, - pixt2, 0, 0); - pixAccumulate(pixsum, pixt1, L_ARITH_ADD); - pixDestroy(&pixt2); - } - pixaAddPix(pixad, pixsum, L_INSERT); - ptaAddPt(ptat, xave, yave); - - pixaDestroy(&pixa); - pixDestroy(&pixt1); - ptaDestroy(&pta); - } - - return pixad; -} - - -/*! - * \brief jbTemplatesFromComposites() - * - * \param[in] pixac one pix of composites for each class - * \param[in] na number of samples used for each class composite - * \return pixad 8 bpp templates for each class, or NULL on error - * - */ -PIXA * -jbTemplatesFromComposites(PIXA *pixac, - NUMA *na) -{ -l_int32 n, i; -l_float32 nt; /* number of samples in the composite; always an integer */ -l_float32 factor; -PIX *pixsum; /* accumulated composite */ -PIX *pixd; -PIXA *pixad; - - PROCNAME("jbTemplatesFromComposites"); - - if (!pixac) - return (PIXA *)ERROR_PTR("pixac not defined", procName, NULL); - if (!na) - return (PIXA *)ERROR_PTR("na not defined", procName, NULL); - - n = pixaGetCount(pixac); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pixsum = pixaGetPix(pixac, i, L_COPY); /* changed internally */ - numaGetFValue(na, i, &nt); - factor = 255. / nt; - pixMultConstAccumulate(pixsum, factor, 0); /* changes pixsum */ - pixd = pixFinalAccumulate(pixsum, 0, 8); - pixaAddPix(pixad, pixd, L_INSERT); - pixDestroy(&pixsum); - } - - return pixad; -} - - - -/*----------------------------------------------------------------------* - * jbig2 utility routines * - *----------------------------------------------------------------------*/ -/*! - * \brief jbClasserCreate() - * - * \param[in] method JB_RANKHAUS, JB_CORRELATION - * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS - * \return jbclasser, or NULL on error - */ -JBCLASSER * -jbClasserCreate(l_int32 method, - l_int32 components) -{ -JBCLASSER *classer; - - PROCNAME("jbClasserCreate"); - - if (method != JB_RANKHAUS && method != JB_CORRELATION) - return (JBCLASSER *)ERROR_PTR("invalid method", procName, NULL); - if (components != JB_CONN_COMPS && components != JB_CHARACTERS && - components != JB_WORDS) - return (JBCLASSER *)ERROR_PTR("invalid component", procName, NULL); - - classer = (JBCLASSER *)LEPT_CALLOC(1, sizeof(JBCLASSER)); - classer->method = method; - classer->components = components; - classer->nacomps = numaCreate(0); - classer->pixaa = pixaaCreate(0); - classer->pixat = pixaCreate(0); - classer->pixatd = pixaCreate(0); - classer->nafgt = numaCreate(0); - classer->naarea = numaCreate(0); - classer->ptac = ptaCreate(0); - classer->ptact = ptaCreate(0); - classer->naclass = numaCreate(0); - classer->napage = numaCreate(0); - classer->ptaul = ptaCreate(0); - return classer; -} - - -/* - * \brief jbClasserDestroy() - * - * \param[in,out] pclasser will be set to null before returning - * \return void - */ -void -jbClasserDestroy(JBCLASSER **pclasser) -{ -JBCLASSER *classer; - - if (!pclasser) - return; - if ((classer = *pclasser) == NULL) - return; - - sarrayDestroy(&classer->safiles); - numaDestroy(&classer->nacomps); - pixaaDestroy(&classer->pixaa); - pixaDestroy(&classer->pixat); - pixaDestroy(&classer->pixatd); - l_dnaHashDestroy(&classer->dahash); - numaDestroy(&classer->nafgt); - numaDestroy(&classer->naarea); - ptaDestroy(&classer->ptac); - ptaDestroy(&classer->ptact); - numaDestroy(&classer->naclass); - numaDestroy(&classer->napage); - ptaDestroy(&classer->ptaul); - ptaDestroy(&classer->ptall); - LEPT_FREE(classer); - *pclasser = NULL; - return; -} - - -/*! - * \brief jbDataSave() - * - * \param[in] jbclasser - * \param[in] latticew cell width used to store each connected - * component in the composite - * \param[in] latticeh ditto for cell height - * \return jbdata, or NULL on error - * - *
- * Notes:
- *      (1) This routine stores the jbig2-type data required for
- *          generating a lossy jbig2 version of the image.
- *          It can be losslessly written to (and read from) two files.
- *      (2) It generates and stores the mosaic of templates.
- *      (3) It clones the Numa and Pta arrays, so these must all
- *          be destroyed by the caller.
- *      (4) Input 0 to use the default values for latticew and/or latticeh,
- * 
- */ -JBDATA * -jbDataSave(JBCLASSER *classer) -{ -l_int32 maxw, maxh; -JBDATA *data; -PIX *pix; - - PROCNAME("jbDataSave"); - - if (!classer) - return (JBDATA *)ERROR_PTR("classer not defined", procName, NULL); - - /* Write the templates into an array. */ - pixaSizeRange(classer->pixat, NULL, NULL, &maxw, &maxh); - pix = pixaDisplayOnLattice(classer->pixat, maxw + 1, maxh + 1, - NULL, NULL); - if (!pix) - return (JBDATA *)ERROR_PTR("data not made", procName, NULL); - - data = (JBDATA *)LEPT_CALLOC(1, sizeof(JBDATA)); - data->pix = pix; - data->npages = classer->npages; - data->w = classer->w; - data->h = classer->h; - data->nclass = classer->nclass; - data->latticew = maxw + 1; - data->latticeh = maxh + 1; - data->naclass = numaClone(classer->naclass); - data->napage = numaClone(classer->napage); - data->ptaul = ptaClone(classer->ptaul); - return data; -} - - -/* - * \brief jbDataDestroy() - * - * \param[in,out] pdata will be set to null before returning - * \return void - */ -void -jbDataDestroy(JBDATA **pdata) -{ -JBDATA *data; - - if (!pdata) - return; - if ((data = *pdata) == NULL) - return; - - pixDestroy(&data->pix); - numaDestroy(&data->naclass); - numaDestroy(&data->napage); - ptaDestroy(&data->ptaul); - LEPT_FREE(data); - *pdata = NULL; - return; -} - - -/*! - * \brief jbDataWrite() - * - * \param[in] rootname for output files; everything but the extension - * \param[in] jbdata - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serialization function that writes data in jbdata to file.
- * 
- */ -l_ok -jbDataWrite(const char *rootout, - JBDATA *jbdata) -{ -char buf[L_BUF_SIZE]; -l_int32 w, h, nclass, npages, cellw, cellh, ncomp, i, x, y, iclass, ipage; -NUMA *naclass, *napage; -PTA *ptaul; -PIX *pixt; -FILE *fp; - - PROCNAME("jbDataWrite"); - - if (!rootout) - return ERROR_INT("no rootout", procName, 1); - if (!jbdata) - return ERROR_INT("no jbdata", procName, 1); - - npages = jbdata->npages; - w = jbdata->w; - h = jbdata->h; - pixt = jbdata->pix; - nclass = jbdata->nclass; - cellw = jbdata->latticew; - cellh = jbdata->latticeh; - naclass = jbdata->naclass; - napage = jbdata->napage; - ptaul = jbdata->ptaul; - - snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_TEMPLATE_EXT); - pixWrite(buf, pixt, IFF_PNG); - - snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_DATA_EXT); - if ((fp = fopenWriteStream(buf, "wb")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ncomp = ptaGetCount(ptaul); - fprintf(fp, "jb data file\n"); - fprintf(fp, "num pages = %d\n", npages); - fprintf(fp, "page size: w = %d, h = %d\n", w, h); - fprintf(fp, "num components = %d\n", ncomp); - fprintf(fp, "num classes = %d\n", nclass); - fprintf(fp, "template lattice size: w = %d, h = %d\n", cellw, cellh); - for (i = 0; i < ncomp; i++) { - numaGetIValue(napage, i, &ipage); - numaGetIValue(naclass, i, &iclass); - ptaGetIPt(ptaul, i, &x, &y); - fprintf(fp, "%d %d %d %d\n", ipage, iclass, x, y); - } - fclose(fp); - - return 0; -} - - -/*! - * \brief jbDataRead() - * - * \param[in] rootname for template and data files - * \return jbdata, or NULL on error - */ -JBDATA * -jbDataRead(const char *rootname) -{ -char fname[L_BUF_SIZE]; -char *linestr; -l_uint8 *data; -l_int32 nsa, i, w, h, cellw, cellh, x, y, iclass, ipage; -l_int32 npages, nclass, ncomp, ninit; -size_t size; -JBDATA *jbdata; -NUMA *naclass, *napage; -PIX *pixs; -PTA *ptaul; -SARRAY *sa; - - PROCNAME("jbDataRead"); - - if (!rootname) - return (JBDATA *)ERROR_PTR("rootname not defined", procName, NULL); - - snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_TEMPLATE_EXT); - if ((pixs = pixRead(fname)) == NULL) - return (JBDATA *)ERROR_PTR("pix not read", procName, NULL); - - snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_DATA_EXT); - if ((data = l_binaryRead(fname, &size)) == NULL) { - pixDestroy(&pixs); - return (JBDATA *)ERROR_PTR("data not read", procName, NULL); - } - - if ((sa = sarrayCreateLinesFromString((char *)data, 0)) == NULL) { - pixDestroy(&pixs); - LEPT_FREE(data); - return (JBDATA *)ERROR_PTR("sa not made", procName, NULL); - } - nsa = sarrayGetCount(sa); /* number of cc + 6 */ - linestr = sarrayGetString(sa, 0, L_NOCOPY); - if (strcmp(linestr, "jb data file") != 0) { - pixDestroy(&pixs); - LEPT_FREE(data); - sarrayDestroy(&sa); - return (JBDATA *)ERROR_PTR("invalid jb data file", procName, NULL); - } - linestr = sarrayGetString(sa, 1, L_NOCOPY); - sscanf(linestr, "num pages = %d", &npages); - linestr = sarrayGetString(sa, 2, L_NOCOPY); - sscanf(linestr, "page size: w = %d, h = %d", &w, &h); - linestr = sarrayGetString(sa, 3, L_NOCOPY); - sscanf(linestr, "num components = %d", &ncomp); - linestr = sarrayGetString(sa, 4, L_NOCOPY); - sscanf(linestr, "num classes = %d\n", &nclass); - linestr = sarrayGetString(sa, 5, L_NOCOPY); - sscanf(linestr, "template lattice size: w = %d, h = %d\n", &cellw, &cellh); - -#if 1 - lept_stderr("num pages = %d\n", npages); - lept_stderr("page size: w = %d, h = %d\n", w, h); - lept_stderr("num components = %d\n", ncomp); - lept_stderr("num classes = %d\n", nclass); - lept_stderr("template lattice size: w = %d, h = %d\n", cellw, cellh); -#endif - - ninit = ncomp; - if (ncomp > 1000000) { /* fuzz protection */ - L_WARNING("ncomp > 1M\n", procName); - ninit = 1000000; - } - naclass = numaCreate(ninit); - napage = numaCreate(ninit); - ptaul = ptaCreate(ninit); - for (i = 6; i < nsa; i++) { - linestr = sarrayGetString(sa, i, L_NOCOPY); - sscanf(linestr, "%d %d %d %d\n", &ipage, &iclass, &x, &y); - numaAddNumber(napage, ipage); - numaAddNumber(naclass, iclass); - ptaAddPt(ptaul, x, y); - } - - jbdata = (JBDATA *)LEPT_CALLOC(1, sizeof(JBDATA)); - jbdata->pix = pixs; - jbdata->npages = npages; - jbdata->w = w; - jbdata->h = h; - jbdata->nclass = nclass; - jbdata->latticew = cellw; - jbdata->latticeh = cellh; - jbdata->naclass = naclass; - jbdata->napage = napage; - jbdata->ptaul = ptaul; - - LEPT_FREE(data); - sarrayDestroy(&sa); - return jbdata; -} - - -/*! - * \brief jbDataRender() - * - * \param[in] jbdata - * \param[in] debugflag if TRUE, writes into 2 bpp pix and adds - * component outlines in color - * \return pixa reconstruction of original images, using templates or - * NULL on error - */ -PIXA * -jbDataRender(JBDATA *data, - l_int32 debugflag) -{ -l_int32 i, w, h, cellw, cellh, x, y, iclass, ipage; -l_int32 npages, nclass, ncomp, wp, hp; -BOX *box; -NUMA *naclass, *napage; -PIX *pixt, *pixt2, *pix, *pixd; -PIXA *pixat; /* pixa of templates */ -PIXA *pixad; /* pixa of output images */ -PIXCMAP *cmap; -PTA *ptaul; - - PROCNAME("jbDataRender"); - - if (!data) - return (PIXA *)ERROR_PTR("data not defined", procName, NULL); - - npages = data->npages; - w = data->w; - h = data->h; - pixt = data->pix; - nclass = data->nclass; - cellw = data->latticew; - cellh = data->latticeh; - naclass = data->naclass; - napage = data->napage; - ptaul = data->ptaul; - ncomp = numaGetCount(naclass); - - /* Reconstruct the original set of images from the templates - * and the data associated with each component. First, - * generate the output pixa as a set of empty pix. */ - if ((pixad = pixaCreate(npages)) == NULL) - return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); - for (i = 0; i < npages; i++) { - if (debugflag == FALSE) { - pix = pixCreate(w, h, 1); - } else { - pix = pixCreate(w, h, 2); - cmap = pixcmapCreate(2); - pixcmapAddColor(cmap, 255, 255, 255); - pixcmapAddColor(cmap, 0, 0, 0); - pixcmapAddColor(cmap, 255, 0, 0); /* for box outlines */ - pixSetColormap(pix, cmap); - } - pixaAddPix(pixad, pix, L_INSERT); - } - - /* Put the class templates into a pixa. */ - if ((pixat = pixaCreateFromPix(pixt, nclass, cellw, cellh)) == NULL) { - pixaDestroy(&pixad); - return (PIXA *)ERROR_PTR("pixat not made", procName, NULL); - } - - /* Place each component in the right location on its page. */ - for (i = 0; i < ncomp; i++) { - numaGetIValue(napage, i, &ipage); - numaGetIValue(naclass, i, &iclass); - pix = pixaGetPix(pixat, iclass, L_CLONE); /* the template */ - wp = pixGetWidth(pix); - hp = pixGetHeight(pix); - ptaGetIPt(ptaul, i, &x, &y); - pixd = pixaGetPix(pixad, ipage, L_CLONE); /* the output page */ - if (debugflag == FALSE) { - pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pix, 0, 0); - } else { - pixt2 = pixConvert1To2Cmap(pix); - pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pixt2, 0, 0); - box = boxCreate(x, y, wp, hp); - pixRenderBoxArb(pixd, box, 1, 255, 0, 0); - pixDestroy(&pixt2); - boxDestroy(&box); - } - pixDestroy(&pix); /* the clone only */ - pixDestroy(&pixd); /* the clone only */ - } - - pixaDestroy(&pixat); - return pixad; -} - - -/*! - * \brief jbGetULCorners() - * - * \param[in] jbclasser - * \param[in] pixs full res image - * \param[in] boxa of c.c. bounding rectangles for this page - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This computes the ptaul field, which has the global UL corners,
- *          adjusted for each specific component, so that each component
- *          can be replaced by the template for its class and have the
- *          centroid in the template in the same position as the
- *          centroid of the original connected component.  It is important
- *          that this be done properly to avoid a wavy baseline in the
- *          result.
- *      (2) The array fields ptac and ptact give the centroids of
- *          those components relative to the UL corner of each component.
- *          Here, we compute the difference in each component, round to
- *          nearest integer, and correct the box->x and box->y by
- *          the appropriate integral difference.
- *      (3) The templates and stored instances are all bordered.
- * 
- */ -l_ok -jbGetULCorners(JBCLASSER *classer, - PIX *pixs, - BOXA *boxa) -{ -l_int32 i, baseindex, index, n, iclass, idelx, idely, x, y, dx, dy; -l_int32 *sumtab; -l_float32 x1, x2, y1, y2, delx, dely; -BOX *box; -NUMA *naclass; -PIX *pixt; -PTA *ptac, *ptact, *ptaul; - - PROCNAME("jbGetULCorners"); - - if (!classer) - return ERROR_INT("classer not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - - n = boxaGetCount(boxa); - ptaul = classer->ptaul; - naclass = classer->naclass; - ptac = classer->ptac; - ptact = classer->ptact; - baseindex = classer->baseindex; /* num components before this page */ - sumtab = makePixelSumTab8(); - for (i = 0; i < n; i++) { - index = baseindex + i; - ptaGetPt(ptac, index, &x1, &y1); - numaGetIValue(naclass, index, &iclass); - ptaGetPt(ptact, iclass, &x2, &y2); - delx = x2 - x1; - dely = y2 - y1; - if (delx >= 0) - idelx = (l_int32)(delx + 0.5); - else - idelx = (l_int32)(delx - 0.5); - if (dely >= 0) - idely = (l_int32)(dely + 0.5); - else - idely = (l_int32)(dely - 0.5); - if ((box = boxaGetBox(boxa, i, L_CLONE)) == NULL) { - LEPT_FREE(sumtab); - return ERROR_INT("box not found", procName, 1); - } - boxGetGeometry(box, &x, &y, NULL, NULL); - - /* Get final increments dx and dy for best alignment */ - pixt = pixaGetPix(classer->pixat, iclass, L_CLONE); - finalPositioningForAlignment(pixs, x, y, idelx, idely, - pixt, sumtab, &dx, &dy); -/* if (i % 20 == 0) - lept_stderr("dx = %d, dy = %d\n", dx, dy); */ - ptaAddPt(ptaul, x - idelx + dx, y - idely + dy); - boxDestroy(&box); - pixDestroy(&pixt); - } - - LEPT_FREE(sumtab); - return 0; -} - - -/*! - * \brief jbGetLLCorners() - * - * \param[in] jbclasser - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This computes the ptall field, which has the global LL corners,
- *          adjusted for each specific component, so that each component
- *          can be replaced by the template for its class and have the
- *          centroid in the template in the same position as the
- *          centroid of the original connected component. It is important
- *          that this be done properly to avoid a wavy baseline in the result.
- *      (2) It is computed here from the corresponding UL corners, where
- *          the input templates and stored instances are all bordered.
- *          This should be done after all pages have been processed.
- *      (3) For proper substitution, the templates whose LL corners are
- *          placed in these locations must be UN-bordered.
- *          This is available for a realistic jbig2 encoder, which would
- *          (1) encode each template without a border, and (2) encode
- *          the position using the LL corner (rather than the UL
- *          corner) because the difference between y-values
- *          of successive instances is typically close to zero.
- * 
- */ -l_ok -jbGetLLCorners(JBCLASSER *classer) -{ -l_int32 i, iclass, n, x1, y1, h; -NUMA *naclass; -PIX *pix; -PIXA *pixat; -PTA *ptaul, *ptall; - - PROCNAME("jbGetLLCorners"); - - if (!classer) - return ERROR_INT("classer not defined", procName, 1); - - ptaul = classer->ptaul; - naclass = classer->naclass; - pixat = classer->pixat; - - ptaDestroy(&classer->ptall); - n = ptaGetCount(ptaul); - ptall = ptaCreate(n); - classer->ptall = ptall; - - /* If the templates were bordered, we would add h - 1 to the UL - * corner y-value. However, because the templates to be used - * here have their borders removed, and the borders are - * JB_ADDED_PIXELS on each side, we add h - 1 - 2 * JB_ADDED_PIXELS - * to the UL corner y-value. */ - for (i = 0; i < n; i++) { - ptaGetIPt(ptaul, i, &x1, &y1); - numaGetIValue(naclass, i, &iclass); - pix = pixaGetPix(pixat, iclass, L_CLONE); - h = pixGetHeight(pix); - ptaAddPt(ptall, x1, y1 + h - 1 - 2 * JB_ADDED_PIXELS); - pixDestroy(&pix); - } - - return 0; -} - - -/*----------------------------------------------------------------------* - * Static helpers * - *----------------------------------------------------------------------*/ -/* When looking for similar matches we check templates whose size is +/- 2 in - * each direction. This involves 25 possible sizes. This array contains the - * offsets for each of those positions in a spiral pattern. There are 25 pairs - * of numbers in this array: even positions are x values. */ -static int two_by_two_walk[50] = { - 0, 0, - 0, 1, - -1, 0, - 0, -1, - 1, 0, - -1, 1, - 1, 1, - -1, -1, - 1, -1, - 0, -2, - 2, 0, - 0, 2, - -2, 0, - -1, -2, - 1, -2, - 2, -1, - 2, 1, - 1, 2, - -1, 2, - -2, 1, - -2, -1, - -2, -2, - 2, -2, - 2, 2, - -2, 2}; - - -/*! - * \brief findSimilarSizedTemplatesInit() - * - * \param[in] classer - * \param[in] pixs instance to be matched - * \return Allocated context to be used with findSimilar* - */ -static JBFINDCTX * -findSimilarSizedTemplatesInit(JBCLASSER *classer, - PIX *pixs) -{ -JBFINDCTX *state; - - state = (JBFINDCTX *)LEPT_CALLOC(1, sizeof(JBFINDCTX)); - state->w = pixGetWidth(pixs) - 2 * JB_ADDED_PIXELS; - state->h = pixGetHeight(pixs) - 2 * JB_ADDED_PIXELS; - state->classer = classer; - return state; -} - - -static void -findSimilarSizedTemplatesDestroy(JBFINDCTX **pstate) -{ -JBFINDCTX *state; - - PROCNAME("findSimilarSizedTemplatesDestroy"); - - if (pstate == NULL) { - L_WARNING("ptr address is null\n", procName); - return; - } - if ((state = *pstate) == NULL) - return; - - l_dnaDestroy(&state->dna); - LEPT_FREE(state); - *pstate = NULL; - return; -} - - -/*! - * \brief findSimilarSizedTemplatesNext() - * - * \param[in] state from findSimilarSizedTemplatesInit - * \return next template number, or -1 when finished - * - * We have a dna hash table that maps template area to a list of template - * numbers with that area. We wish to find similar sized templates, - * so we first look for templates with the same width and height, and - * then with width + 1, etc. This walk is guided by the - * two_by_two_walk array, above. - * - * We don't want to have to collect the whole list of templates first, - * because we hope to find a well-matching template quickly. So we - * keep the context for this walk in an explictit state structure, - * and this function acts like a generator. - */ -static l_int32 -findSimilarSizedTemplatesNext(JBFINDCTX *state) -{ -l_int32 desiredh, desiredw, size, templ; -PIX *pixt; - - while(1) { /* Continue the walk over step 'i' */ - if (state->i >= 25) { /* all done; didn't find a good match */ - return -1; - } - - desiredw = state->w + two_by_two_walk[2 * state->i]; - desiredh = state->h + two_by_two_walk[2 * state->i + 1]; - if (desiredh < 1 || desiredw < 1) { /* invalid size */ - state->i++; - continue; - } - - if (!state->dna) { - /* We have yet to start walking the array for the step 'i' */ - state->dna = l_dnaHashGetDna(state->classer->dahash, - (l_uint64)desiredh * desiredw, L_CLONE); - if (!state->dna) { /* nothing there */ - state->i++; - continue; - } - - state->n = 0; /* OK, we got a dna. */ - } - - /* Continue working on this dna */ - size = l_dnaGetCount(state->dna); - for ( ; state->n < size; ) { - templ = (l_int32)(state->dna->array[state->n++] + 0.5); - pixt = pixaGetPix(state->classer->pixat, templ, L_CLONE); - if (pixGetWidth(pixt) - 2 * JB_ADDED_PIXELS == desiredw && - pixGetHeight(pixt) - 2 * JB_ADDED_PIXELS == desiredh) { - pixDestroy(&pixt); - return templ; - } - pixDestroy(&pixt); - } - - /* Exhausted the dna (no match found); take another step and - * try again. */ - state->i++; - l_dnaDestroy(&state->dna); - continue; - } -} - - -/*! - * \brief finalPositioningForAlignment() - * - * \param[in] pixs input page image - * \param[in] x, y location of UL corner of bb of component in pixs - * \param[in] idelx, idely compensation to match centroids of component - * and template - * \param[in] pixt template, with JB_ADDED_PIXELS of padding - * on all sides - * \param[in] sumtab for summing fg pixels in an image - * \param[in] pdx, pdy return delta on position for best match; each - * one is in the set {-1, 0, 1} - * \return 0 if OK, 1 on error - * - */ -static l_int32 -finalPositioningForAlignment(PIX *pixs, - l_int32 x, - l_int32 y, - l_int32 idelx, - l_int32 idely, - PIX *pixt, - l_int32 *sumtab, - l_int32 *pdx, - l_int32 *pdy) -{ -l_int32 w, h, i, j, minx, miny, count, mincount; -PIX *pixi; /* clipped from source pixs */ -PIX *pixr; /* temporary storage */ -BOX *box; - - PROCNAME("finalPositioningForAlignment"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixt) - return ERROR_INT("pixt not defined", procName, 1); - if (!pdx || !pdy) - return ERROR_INT("&dx and &dy not both defined", procName, 1); - if (!sumtab) - return ERROR_INT("sumtab not defined", procName, 1); - *pdx = *pdy = 0; - - /* Use JB_ADDED_PIXELS pixels padding on each side */ - pixGetDimensions(pixt, &w, &h, NULL); - box = boxCreate(x - idelx - JB_ADDED_PIXELS, - y - idely - JB_ADDED_PIXELS, w, h); - pixi = pixClipRectangle(pixs, box, NULL); - boxDestroy(&box); - if (!pixi) - return ERROR_INT("pixi not made", procName, 1); - - pixr = pixCreate(pixGetWidth(pixi), pixGetHeight(pixi), 1); - mincount = 0x7fffffff; - for (i = -1; i <= 1; i++) { - for (j = -1; j <= 1; j++) { - pixCopy(pixr, pixi); - pixRasterop(pixr, j, i, w, h, PIX_SRC ^ PIX_DST, pixt, 0, 0); - pixCountPixels(pixr, &count, sumtab); - if (count < mincount) { - minx = j; - miny = i; - mincount = count; - } - } - } - pixDestroy(&pixi); - pixDestroy(&pixr); - - *pdx = minx; - *pdy = miny; - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jbclass.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jbclass.h deleted file mode 100644 index 62aad60a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jbclass.h +++ /dev/null @@ -1,142 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_JBCLASS_H -#define LEPTONICA_JBCLASS_H - -/*! - * \file jbclass.h - * - * JbClasser - * JbData - */ - - - /*! - *
-     * The JbClasser struct holds all the data accumulated during the
-     * classification process that can be used for a compressed
-     * jbig2-type representation of a set of images.  This is created
-     * in an initialization process and added to as the selected components
-     * on each successive page are analyzed.
-     * 
- */ -struct JbClasser -{ - struct Sarray *safiles; /*!< input page image file names */ - l_int32 method; /*!< JB_RANKHAUS, JB_CORRELATION */ - l_int32 components; /*!< JB_CONN_COMPS, JB_CHARACTERS or */ - /*!< JB_WORDS */ - l_int32 maxwidth; /*!< max component width allowed */ - l_int32 maxheight; /*!< max component height allowed */ - l_int32 npages; /*!< number of pages already processed */ - l_int32 baseindex; /*!< number components already processed */ - /*!< on fully processed pages */ - struct Numa *nacomps; /*!< number of components on each page */ - l_int32 sizehaus; /*!< size of square struct elem for haus */ - l_float32 rankhaus; /*!< rank val of haus match, each way */ - l_float32 thresh; /*!< thresh value for correlation score */ - l_float32 weightfactor; /*!< corrects thresh value for heaver */ - /*!< components; use 0 for no correction */ - struct Numa *naarea; /*!< w * h of each template, without */ - /*!< extra border pixels */ - l_int32 w; /*!< max width of original src images */ - l_int32 h; /*!< max height of original src images */ - l_int32 nclass; /*!< current number of classes */ - l_int32 keep_pixaa; /*!< If zero, pixaa isn't filled */ - struct Pixaa *pixaa; /*!< instances for each class; unbordered */ - struct Pixa *pixat; /*!< templates for each class; bordered */ - /*!< and not dilated */ - struct Pixa *pixatd; /*!< templates for each class; bordered */ - /*!< and dilated */ - struct L_DnaHash *dahash; /*!< Hash table to find templates by size */ - struct Numa *nafgt; /*!< fg areas of undilated templates; */ - /*!< only used for rank < 1.0 */ - struct Pta *ptac; /*!< centroids of all bordered cc */ - struct Pta *ptact; /*!< centroids of all bordered template cc */ - struct Numa *naclass; /*!< array of class ids for each component */ - struct Numa *napage; /*!< array of page nums for each component */ - struct Pta *ptaul; /*!< array of UL corners at which the */ - /*!< template is to be placed for each */ - /*!< component */ - struct Pta *ptall; /*!< similar to ptaul, but for LL corners */ -}; -typedef struct JbClasser JBCLASSER; - - - /*! - *
-     * The JbData struct holds all the data required for
-     * the compressed jbig-type representation of a set of images.
-     * The data can be written to file, read back, and used
-     * to regenerate an approximate version of the original,
-     * which differs in two ways from the original:
-     *   (1) It uses a template image for each c.c. instead of the
-     *       original instance, for each occurrence on each page.
-     *   (2) It discards components with either a height or width larger
-     *       than the maximuma, given here by the lattice dimensions
-     *       used for storing the templates.
-     * 
- */ -struct JbData -{ - struct Pix *pix; /*!< template composite for all classes */ - l_int32 npages; /*!< number of pages */ - l_int32 w; /*!< max width of original page images */ - l_int32 h; /*!< max height of original page images */ - l_int32 nclass; /*!< number of classes */ - l_int32 latticew; /*!< lattice width for template composite */ - l_int32 latticeh; /*!< lattice height for template composite */ - struct Numa *naclass; /*!< array of class ids for each component */ - struct Numa *napage; /*!< array of page nums for each component */ - struct Pta *ptaul; /*!< array of UL corners at which the */ - /*!< template is to be placed for each */ - /*!< component */ -}; -typedef struct JbData JBDATA; - - -/*! JB Classifier */ -enum { - JB_RANKHAUS = 0, - JB_CORRELATION = 1 -}; - - /*! For jbGetComponents(): type of component to extract from images */ -/*! JB Component */ -enum { - JB_CONN_COMPS = 0, - JB_CHARACTERS = 1, - JB_WORDS = 2 -}; - - /*! These parameters are used for naming the two files - * in which the jbig2-like compressed data is stored. */ -#define JB_TEMPLATE_EXT ".templates.png" -#define JB_DATA_EXT ".data" - - -#endif /* LEPTONICA_JBCLASS_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jp2kheader.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jp2kheader.c deleted file mode 100644 index 9fb328b8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jp2kheader.c +++ /dev/null @@ -1,316 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file jp2kheader.c - *
- *
- *      Read header
- *          l_int32          readHeaderJp2k()
- *          l_int32          freadHeaderJp2k()
- *          l_int32          readHeaderMemJp2k()
- *          l_int32          fgetJp2kResolution()
- *
- *  Note: these function read image metadata from a jp2k file, without
- *  using any jp2k libraries.
- *
- *  To read and write jp2k data, using the OpenJPEG library
- *  (http://www.openjpeg.org), see jpegio.c.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -#ifndef NO_CONSOLE_IO -#define DEBUG_IHDR 0 -#endif /* ~NO_CONSOLE_IO */ - -/* --------------------------------------------*/ -#if USE_JP2KHEADER /* defined in environ.h */ -/* --------------------------------------------*/ - - /* a sanity check on the size read from file */ -static const l_int32 MAX_JP2K_WIDTH = 100000; -static const l_int32 MAX_JP2K_HEIGHT = 100000; - -/*--------------------------------------------------------------------* - * Stream interface * - *--------------------------------------------------------------------*/ -/*! - * \brief readHeaderJp2k() - * - * \param[in] filename - * \param[out] pw [optional] - * \param[out] ph [optional] - * \param[out] pbps [optional] bits/sample - * \param[out] pspp [optional] samples/pixel - * \return 0 if OK, 1 on error - */ -l_ok -readHeaderJp2k(const char *filename, - l_int32 *pw, - l_int32 *ph, - l_int32 *pbps, - l_int32 *pspp) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("readHeaderJp2k"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - - if ((fp = fopenReadStream(filename)) == NULL) - return ERROR_INT("image file not found", procName, 1); - ret = freadHeaderJp2k(fp, pw, ph, pbps, pspp); - fclose(fp); - return ret; -} - - -/*! - * \brief freadHeaderJp2k() - * - * \param[in] fp file stream opened for read - * \param[out] pw [optional] - * \param[out] ph [optional] - * \param[out] pbps [optional] bits/sample - * \param[out] pspp [optional] samples/pixel - * \return 0 if OK, 1 on error - */ -l_ok -freadHeaderJp2k(FILE *fp, - l_int32 *pw, - l_int32 *ph, - l_int32 *pbps, - l_int32 *pspp) -{ -l_uint8 buf[80]; /* just need the first 80 bytes */ -l_int32 nread, ret; - - PROCNAME("freadHeaderJp2k"); - - if (!fp) - return ERROR_INT("fp not defined", procName, 1); - - rewind(fp); - nread = fread(buf, 1, sizeof(buf), fp); - if (nread != sizeof(buf)) - return ERROR_INT("read failure", procName, 1); - - ret = readHeaderMemJp2k(buf, sizeof(buf), pw, ph, pbps, pspp); - rewind(fp); - return ret; -} - - -/*! - * \brief readHeaderMemJp2k() - * - * \param[in] data - * \param[in] size at least 80 - * \param[out] pw [optional] - * \param[out] ph [optional] - * \param[out] pbps [optional] bits/sample - * \param[out] pspp [optional] samples/pixel - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The ISO/IEC reference for jpeg2000 is
- *               http://www.jpeg.org/public/15444-1annexi.pdf
- *          and the file format syntax begins at page 127.
- *      (2) The Image Header Box begins with 'ihdr' = 0x69686472 in
- *          big-endian order.  This typically, but not always, starts
- *          byte 44, with the big-endian data fields beginning at byte 48:
- *               h:    4 bytes
- *               w:    4 bytes
- *               spp:  2 bytes
- *               bps:  1 byte   (contains bps - 1)
- * 
- */ -l_ok -readHeaderMemJp2k(const l_uint8 *data, - size_t size, - l_int32 *pw, - l_int32 *ph, - l_int32 *pbps, - l_int32 *pspp) -{ -l_int32 format, val, w, h, bps, spp, loc, found, windex; -l_uint8 ihdr[4] = {0x69, 0x68, 0x64, 0x72}; /* 'ihdr' */ - - PROCNAME("readHeaderMemJp2k"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pbps) *pbps = 0; - if (pspp) *pspp = 0; - if (!data) - return ERROR_INT("data not defined", procName, 1); - if (size < 80) - return ERROR_INT("size < 80", procName, 1); - findFileFormatBuffer(data, &format); - if (format != IFF_JP2) - return ERROR_INT("not jp2 file", procName, 1); - - /* Search for beginning of the Image Header Box: 'ihdr' */ - arrayFindSequence(data, size, ihdr, 4, &loc, &found); - if (!found) - return ERROR_INT("image parameters not found", procName, 1); -#if DEBUG_IHDR - if (loc != 44) - L_INFO("Beginning of ihdr is at byte %d\n", procName, loc); -#endif /* DEBUG_IHDR */ - - windex = loc / 4 + 1; - if (4 * (windex + 2) + 2 >= size) - return ERROR_INT("image parameters end are outside of header", - procName, 1); - val = *((l_uint32 *)data + windex); - h = convertOnLittleEnd32(val); - val = *((l_uint32 *)data + windex + 1); - w = convertOnLittleEnd32(val); - val = *((l_uint16 *)data + 2 * (windex + 2)); - spp = convertOnLittleEnd16(val); - bps = *(data + 4 * (windex + 2) + 2) + 1; - if (w < 1 || h < 1) - return ERROR_INT("w and h must both be > 0", procName, 1); - if (w > MAX_JP2K_WIDTH || h > MAX_JP2K_HEIGHT) - return ERROR_INT("unrealistically large sizes", procName, 1); - if (spp != 1 && spp != 3 && spp != 4) - return ERROR_INT("spp must be in 1, 3 or 4", procName, 1); - if (bps != 8 && bps != 16) - return ERROR_INT("bps must be 8 or 16", procName, 1); - if (pw) *pw = w; - if (ph) *ph = h; - if (pspp) *pspp = spp; - if (pbps) *pbps = bps; - return 0; -} - - -/* - * fgetJp2kResolution() - * - * Input: fp (file stream opened for read) - * &xres, &yres ( resolution in ppi) - * Return: 0 if found; 1 if not found or on error - * - * Notes: - * (1) If the capture resolution field is not set, this is not an error; - * the returned resolution values are 0 (designating 'unknown'). - * (2) Side-effect: this rewinds the stream. - * (3) The capture resolution box is optional in the jp2 spec, and - * it is usually not written. - * (4) The big-endian data fields that follow the 4 bytes of 'resc' are: - * ynum: 2 bytes - * ydenom: 2 bytes - * xnum: 2 bytes - * xdenom: 2 bytes - * yexp: 1 byte - * xexp: 1 byte - */ -l_int32 -fgetJp2kResolution(FILE *fp, - l_int32 *pxres, - l_int32 *pyres) -{ -l_uint8 xexp, yexp; -l_uint8 *data; -l_uint16 xnum, ynum, xdenom, ydenom; /* these jp2k fields are 2-byte */ -l_int32 loc, found; -l_uint8 resc[4] = {0x72, 0x65, 0x73, 0x63}; /* 'resc' */ -size_t nbytes; -l_float64 xres, yres, maxres; - - PROCNAME("fgetJp2kResolution"); - - if (pxres) *pxres = 0; - if (pyres) *pyres = 0; - if (!pxres || !pyres) - return ERROR_INT("&xres and &yres not both defined", procName, 1); - if (!fp) - return ERROR_INT("stream not opened", procName, 1); - - rewind(fp); - data = l_binaryReadStream(fp, &nbytes); - rewind(fp); - - /* Search for the start of the first capture resolution box: 'resc' */ - arrayFindSequence(data, nbytes, resc, 4, &loc, &found); - if (!found) { - L_WARNING("image resolution not found\n", procName); - LEPT_FREE(data); - return 1; - } - if (nbytes < 80 || loc >= nbytes - 13) { - L_WARNING("image resolution found without enough space\n", procName); - LEPT_FREE(data); - return 1; - } - - /* Extract the fields and calculate the resolution in pixels/meter. - * See section 1.5.3.7.1 of JPEG 2000 ISO/IEC 15444-1 spec. */ - ynum = data[loc + 5] << 8 | data[loc + 4]; - ynum = convertOnLittleEnd16(ynum); - ydenom = data[loc + 7] << 8 | data[loc + 6]; - ydenom = convertOnLittleEnd16(ydenom); - xnum = data[loc + 9] << 8 | data[loc + 8]; - xnum = convertOnLittleEnd16(xnum); - xdenom = data[loc + 11] << 8 | data[loc + 10]; - xdenom = convertOnLittleEnd16(xdenom); - yexp = data[loc + 12]; - xexp = data[loc + 13]; - yres = ((l_float64)ynum / (l_float64)ydenom) * pow(10.0, (l_float64)yexp); - xres = ((l_float64)xnum / (l_float64)xdenom) * pow(10.0, (l_float64)xexp); - - /* Convert from pixels/meter to ppi */ - yres *= (300.0 / 11811.0); - xres *= (300.0 / 11811.0); - - /* Sanity check for bad data */ - maxres = 100000.0; /* ppi */ - if (xres > maxres || yres > maxres) { - L_WARNING("ridiculously large resolution\n", procName); - } else { - *pyres = (l_int32)(yres + 0.5); - *pxres = (l_int32)(xres + 0.5); - } - - LEPT_FREE(data); - return 0; -} - -/* --------------------------------------------*/ -#endif /* USE_JP2KHEADER */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jp2kheaderstub.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jp2kheaderstub.c deleted file mode 100644 index 41756c63..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jp2kheaderstub.c +++ /dev/null @@ -1,75 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file jp2kheaderstub.c - *
- *
- *     Stubs for jp2kheader.c functions
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* --------------------------------------------*/ -#if !USE_JP2KHEADER /* defined in environ.h */ -/* --------------------------------------------*/ - -l_ok readHeaderJp2k(const char *filename, l_int32 *pw, l_int32 *ph, - l_int32 *pbps, l_int32 *pspp) -{ - return ERROR_INT("function not present", "readHeaderJp2k", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok freadHeaderJp2k(FILE *fp, l_int32 *pw, l_int32 *ph, - l_int32 *pbps, l_int32 *pspp) -{ - return ERROR_INT("function not present", "freadHeaderJp2k", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok readHeaderMemJp2k(const l_uint8 *cdata, size_t size, l_int32 *pw, - l_int32 *ph, l_int32 *pbps, l_int32 *pspp) -{ - return ERROR_INT("function not present", "readHeaderMemJp2k", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_int32 fgetJp2kResolution(FILE *fp, l_int32 *pxres, l_int32 *pyres) -{ - return ERROR_INT("function not present", "fgetJp2kResolution", 1); -} - -/* --------------------------------------------*/ -#endif /* !USE_JP2KHEADER */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jp2kio.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jp2kio.c deleted file mode 100644 index 69d56128..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jp2kio.c +++ /dev/null @@ -1,949 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file jp2kio.c - *
- *
- *    Read jp2k from file
- *          PIX                *pixReadJp2k()  [special top level]
- *          PIX                *pixReadStreamJp2k()
- *
- *    Write jp2k to file
- *          l_int32             pixWriteJp2k()  [special top level]
- *          l_int32             pixWriteStreamJp2k()
- *          static opj_image_t *pixConvertToOpjImage()
- *
- *    Read/write to memory
- *          PIX                *pixReadMemJp2k()
- *          l_int32             pixWriteMemJp2k()
- *
- *    Static functions from opj 2.0 to retain file stream interface
- *          static opj_stream_t  *opjCreateStream()
- *          [other static helpers]
- *
- *    Based on the OpenJPEG distribution:
- *        http://www.openjpeg.org/
- *    The ISO/IEC reference for jpeg2000 is:
- *        http://www.jpeg.org/public/15444-1annexi.pdf
- *
- *    Compressing to memory and decompressing from memory
- *    ---------------------------------------------------
- *    On systems like windows without fmemopen() and open_memstream(),
- *    we write data to a temp file and read it back for operations
- *    between pix and compressed-data, such as pixReadMemJp2k() and
- *    pixWriteMemJp2k().
- *
- *    Pdf can accept jp2k compressed strings directly
- *    -----------------------------------------------
- *    Transcoding (with the uncompress/compress cycle) is not required
- *    to wrap images that have already been compressed with jp2k in pdf,
- *    because the pdf format for jp2k includes the full string of the
- *    jp2k compressed images.  This is also true for jpeg compressed
- *    strings.
- *
- *    N.B.
- *    * This is based on the most recent openjpeg release: 2.1.
- *    * The openjpeg interface was massively changed from 1.X.  The debian
- *      distribution is way back at 1.3.  We have inquired but are unable
- *      to determine if or when a debian distribution will be built for 2.1.
- *    * For version 2.1, the openjpeg.h file is installed in an
- *      openjpeg-2.1 subdirectory, which is hard to support.
- *    * In openjpeg-2.1, reading is slow compared to jpeg or webp,
- *      and writing is very slow compared to jpeg or webp.  This is expected
- *      to improve significantly in future versions.
- *    * Reading and writing jp2k are supported here for 2.1.
- *      The high-level interface to openjpeg continues to change.
- *      From 2.0 to 2.1, the ability to interface to a C file stream
- *      was removed permanently.  Leptonica supports both file stream
- *      and memory buffer interfaces for every image I/O library, and
- *      it requires the libraries to support at least one of these.
- *      However, openjpeg-2.1 provides neither, so we have brought
- *      several static functions over from openjpeg-2.0 in order to
- *      retain the file stream interface.  See our static function
- *      opjCreateStream().
- *    * Specifying a quality factor for jpeg2000 requires caution.  Unlike
- *      jpeg and webp, which have a sensible scale that goes from 0 (very poor)
- *      to 100 (nearly lossless), kakadu and openjpeg use idiosyncratic and
- *      non-intuitive numbers.  kakadu uses "rate/distortion" numbers in
- *      a narrow range around 50,000; openjpeg (and our write interface)
- *      use SNR.  The visually apparent artifacts introduced by compression
- *      are strongly content-dependent and vary in a highly non-linear
- *      way with SNR.  We take SNR = 34 as default, roughly similar in
- *      quality to jpeg's default standard of 75.  For document images,
- *      SNR = 25 is very poor, whereas SNR = 45 is nearly lossless.  If you
- *      use the latter, you will pay dearly in the size of the compressed file.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/* --------------------------------------------*/ -#if HAVE_LIBJP2K /* defined in environ.h */ -/* --------------------------------------------*/ - - /* Leptonica supports versions 2.0 and newer */ -#ifdef LIBJP2K_HEADER -#include LIBJP2K_HEADER -#else -#include -#endif - - /* 2.0 didn't define OPJ_VERSION_MINOR. */ -#ifndef OPJ_VERSION_MINOR -#define OPJ_VERSION_MINOR 0 -#endif - - /* Static generator of opj_stream from file stream. - * In 2.0.1, this functionality is provided by - * opj_stream_create_default_file_stream(), - * but it was removed in 2.1.0. Because we must have either - * a file stream or a memory interface to the compressed data, - * it is necessary to recreate the stream interface here. */ -static opj_stream_t *opjCreateStream(FILE *fp, l_int32 is_read); - - /* Static converter pix --> opj_image. Used for compressing pix, - * because the codec works on data stored in their raster format. */ -static opj_image_t *pixConvertToOpjImage(PIX *pix); - -/*---------------------------------------------------------------------* - * Callback event handlers * - *---------------------------------------------------------------------*/ -static void error_callback(const char *msg, void *client_data) { - (void)client_data; - fprintf(stdout, "[ERROR] %s", msg); -} - -static void warning_callback(const char *msg, void *client_data) { - (void)client_data; - fprintf(stdout, "[WARNING] %s", msg); -} - -static void info_callback(const char *msg, void *client_data) { - (void)client_data; - fprintf(stdout, "[INFO] %s", msg); -} - - -/*---------------------------------------------------------------------* - * Read jp2k from file (special function) * - *---------------------------------------------------------------------*/ -/*! - * \brief pixReadJp2k() - * - * \param[in] filename - * \param[in] reduction scaling factor: 1, 2, 4, 8, 16 - * \param[in] box [optional] for extracting a subregion, can be null - * \param[in] hint a bitwise OR of L_JP2K_* values; 0 for default - * \param[in] debug output callback messages, etc - * \return pix 8 or 32 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This is a special function for reading jp2k files.
- *          The high-level pixReadStream() uses default values:
- *             %reduction = 1
- *             %box = NULL
- *      (2) This decodes at either full resolution or at a reduction by
- *          a power of 2.  The default value %reduction == 1 gives a full
- *          resolution image.  Use %reduction > 1 to get a reduced image.
- *          The actual values of %reduction that can be used on an image
- *          depend on the number of resolution levels chosen when the
- *          image was compressed.  Typical values might be 1, 2, 4, 8 and 16.
- *          Using a value representing a reduction level that was not
- *          stored when the file was written will fail with the message:
- *          "failed to read the header".
- *      (3) Use %box to decode only a part of the image.  The box is defined
- *          at full resolution.  It is reduced internally by %reduction,
- *          and clipping to the right and bottom of the image is automatic.
- *      (4) We presently only handle images with 8 bits/sample (bps).
- *          If the image has 16 bps, the read will fail.
- *      (5) There are 4 possible values of samples/pixel (spp).
- *          The values in brackets give the pixel values in the Pix:
- *           spp = 1  ==>  grayscale           [8 bpp grayscale]
- *           spp = 2  ==>  grayscale + alpha   [32 bpp rgba]
- *           spp = 3  ==>  rgb                 [32 bpp rgb]
- *           spp = 4  ==>  rgba                [32 bpp rgba]
- *      (6) The %hint parameter is reserved for future use.
- * 
- */ -PIX * -pixReadJp2k(const char *filename, - l_uint32 reduction, - BOX *box, - l_int32 hint, - l_int32 debug) -{ -FILE *fp; -PIX *pix; - - PROCNAME("pixReadJp2k"); - - if (!filename) - return (PIX *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (PIX *)ERROR_PTR("image file not found", procName, NULL); - pix = pixReadStreamJp2k(fp, reduction, box, hint, debug); - fclose(fp); - - if (!pix) - return (PIX *)ERROR_PTR("image not returned", procName, NULL); - return pix; -} - - -/*! - * \brief pixReadStreamJp2k() - * - * \param[in] fp file stream - * \param[in] reduction scaling factor: 1, 2, 4, 8 - * \param[in] box [optional] for extracting a subregion, can be null - * \param[in] hint a bitwise OR of L_JP2K_* values; 0 for default - * \param[in] debug output callback messages, etc - * \return pix 8 or 32 bpp, or NULL on error - * - *
- * Notes:
- *      (1) See pixReadJp2k() for usage.
- * 
- */ -PIX * -pixReadStreamJp2k(FILE *fp, - l_uint32 reduction, - BOX *box, - l_int32 hint, - l_int32 debug) -{ -const char *opjVersion; -l_int32 i, j, index, bx, by, bw, bh, val, rval, gval, bval, aval; -l_int32 w, h, wpl, bps, spp, xres, yres, reduce, prec, colorspace; -l_uint32 pixel; -l_uint32 *data, *line; -opj_dparameters_t parameters; /* decompression parameters */ -opj_image_t *image = NULL; -opj_codec_t *l_codec = NULL; /* handle to decompressor */ -opj_stream_t *l_stream = NULL; /* opj stream */ -PIX *pix = NULL; - - PROCNAME("pixReadStreamJp2k"); - - if (!fp) - return (PIX *)ERROR_PTR("fp not defined", procName, NULL); - - opjVersion = opj_version(); - if (opjVersion[0] != '2') { - L_ERROR("version is %s; must be 2.0 or higher\n", procName, opjVersion); - return NULL; - } - if ((opjVersion[2] - 0x30) != OPJ_VERSION_MINOR) { - L_ERROR("version %s: differs from minor = %d\n", - procName, opjVersion, OPJ_VERSION_MINOR); - return NULL; - } - - /* Get the resolution and the bits/sample */ - rewind(fp); - fgetJp2kResolution(fp, &xres, &yres); - freadHeaderJp2k(fp, NULL, NULL, &bps, NULL); - rewind(fp); - - if (bps > 8) { - L_ERROR("found %d bps; can only handle 8 bps\n", procName, bps); - return NULL; - } - - /* Set decoding parameters to default values */ - opj_set_default_decoder_parameters(¶meters); - - /* Find and set the reduce parameter, which is log2(reduction). - * Valid reductions are powers of 2, and are determined when the - * compressed string is made. A request for an invalid reduction - * will cause an error in opj_read_header(), and no image will - * be returned. */ - for (reduce = 0; (1L << reduce) < reduction; reduce++) { } - if ((1L << reduce) != reduction) { - L_ERROR("invalid reduction %d; not power of 2\n", procName, reduction); - return NULL; - } - parameters.cp_reduce = reduce; - - /* Get a decoder handle */ - if ((l_codec = opj_create_decompress(OPJ_CODEC_JP2)) == NULL) { - L_ERROR("failed to make the codec\n", procName); - return NULL; - } - - /* Catch and report events using callbacks */ - if (debug) { - opj_set_info_handler(l_codec, info_callback, NULL); - opj_set_warning_handler(l_codec, warning_callback, NULL); - opj_set_error_handler(l_codec, error_callback, NULL); - } - - /* Setup the decoding parameters using user parameters */ - if (!opj_setup_decoder(l_codec, ¶meters)){ - L_ERROR("failed to set up decoder\n", procName); - opj_destroy_codec(l_codec); - return NULL; - } - - /* Open decompression 'stream'. In 2.0, we could call this: - * opj_stream_create_default_file_stream(fp, 1) - * but the file stream interface was removed in 2.1. */ - if ((l_stream = opjCreateStream(fp, 1)) == NULL) { - L_ERROR("failed to open the stream\n", procName); - opj_destroy_codec(l_codec); - return NULL; - } - - /* Read the main header of the codestream and, if necessary, - * the JP2 boxes */ - if(!opj_read_header(l_stream, l_codec, &image)){ - L_ERROR("failed to read the header\n", procName); - opj_stream_destroy(l_stream); - opj_destroy_codec(l_codec); - opj_image_destroy(image); - return NULL; - } - - /* Set up to decode a rectangular region */ - if (box) { - boxGetGeometry(box, &bx, &by, &bw, &bh); - if (!opj_set_decode_area(l_codec, image, bx, by, - bx + bw, by + bh)) { - L_ERROR("failed to set the region for decoding\n", procName); - opj_stream_destroy(l_stream); - opj_destroy_codec(l_codec); - opj_image_destroy(image); - return NULL; - } - } - - /* Get the decoded image */ - if (!(opj_decode(l_codec, l_stream, image) && - opj_end_decompress(l_codec, l_stream))) { - L_ERROR("failed to decode the image\n", procName); - opj_destroy_codec(l_codec); - opj_stream_destroy(l_stream); - opj_image_destroy(image); - return NULL; - } - - /* Finished with the byte stream and the codec */ - opj_stream_destroy(l_stream); - opj_destroy_codec(l_codec); - - /* Get the image parameters */ - spp = image->numcomps; - w = image->comps[0].w; - h = image->comps[0].h; - prec = image->comps[0].prec; - if (prec != bps) - L_WARNING("precision %d != bps %d!\n", procName, prec, bps); - if (debug) { - L_INFO("w = %d, h = %d, bps = %d, spp = %d\n", - procName, w, h, bps, spp); - colorspace = image->color_space; - if (colorspace == OPJ_CLRSPC_SRGB) - L_INFO("colorspace is sRGB\n", procName); - else if (colorspace == OPJ_CLRSPC_GRAY) - L_INFO("colorspace is grayscale\n", procName); - else if (colorspace == OPJ_CLRSPC_SYCC) - L_INFO("colorspace is YUV\n", procName); - } - - /* Convert the image to a pix */ - if (spp == 1) - pix = pixCreate(w, h, 8); - else - pix = pixCreate(w, h, 32); - pixSetInputFormat(pix, IFF_JP2); - pixSetResolution(pix, xres, yres); - data = pixGetData(pix); - wpl = pixGetWpl(pix); - index = 0; - if (spp == 1) { - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - val = image->comps[0].data[index]; - SET_DATA_BYTE(line, j, val); - index++; - } - } - } else if (spp == 2) { /* convert to RGBA */ - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - val = image->comps[0].data[index]; - aval = image->comps[1].data[index]; - composeRGBAPixel(val, val, val, aval, &pixel); - line[j] = pixel; - index++; - } - } - } else if (spp >= 3) { - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - rval = image->comps[0].data[index]; - gval = image->comps[1].data[index]; - bval = image->comps[2].data[index]; - if (spp == 3) { - composeRGBPixel(rval, gval, bval, &pixel); - } else { /* spp == 4 */ - aval = image->comps[3].data[index]; - composeRGBAPixel(rval, gval, bval, aval, &pixel); - } - line[j] = pixel; - index++; - } - } - } - - /* Free the opj image data structure */ - opj_image_destroy(image); - - return pix; -} - - -/*---------------------------------------------------------------------* - * Write jp2k to file * - *---------------------------------------------------------------------*/ -/*! - * \brief pixWriteJp2k() - * - * \param[in] filename - * \param[in] pix any depth, cmap is OK - * \param[in] quality SNR > 0; 0 for default (34); 100 for lossless - * \param[in] nlevels resolution levels; <= 10; default = 5 - * \param[in] hint a bitwise OR of L_JP2K_* values; 0 for default - * \param[in] debug output callback messages, etc - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The %quality parameter is the SNR.  The useful range is narrow:
- *             SNR < 27  (terrible quality)
- *             SNR = 34  (default; approximately equivalent to jpeg quality 75)
- *             SNR = 40  (very high quality)
- *             SNR = 45  (nearly lossless)
- *          Use 0 for default; 100 for lossless.
- *      (2) The %nlevels parameter is the number of resolution levels
- *          to be written.  For example, with nlevels == 5, images with
- *          reduction factors of 1, 2, 4, 8 and 16 are encoded, and retrieval
- *          is done at the level requested when reading.  For default,
- *          use either 5 or 0.
- *      (3) The %hint parameter is not yet in use.
- *      (4) For now, we only support 1 "layer" for quality.
- * 
- */ -l_ok -pixWriteJp2k(const char *filename, - PIX *pix, - l_int32 quality, - l_int32 nlevels, - l_int32 hint, - l_int32 debug) -{ -FILE *fp; - - PROCNAME("pixWriteJp2k"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "wb+")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - - if (pixWriteStreamJp2k(fp, pix, quality, nlevels, hint, debug)) { - fclose(fp); - return ERROR_INT("pix not written to stream", procName, 1); - } - - fclose(fp); - return 0; -} - - -/*! - * \brief pixWriteStreamJp2k() - * - * \param[in] fp file stream - * \param[in] pix any depth, cmap is OK - * \param[in] quality SNR > 0; 0 for default (34); 100 for lossless - * \param[in] nlevels <= 10 - * \param[in] hint a bitwise OR of L_JP2K_* values; 0 for default - * \param[in] debug output callback messages, etc - * \return 0 if OK, 1 on error - *
- * Notes:
- *      (1) See pixWriteJp2k() for usage.
- *      (2) For an encoder with more encoding options, see, e.g.,
- *    https://github.com/OpenJPEG/openjpeg/blob/master/tests/test_tile_encoder.c
- * 
- */ -l_ok -pixWriteStreamJp2k(FILE *fp, - PIX *pix, - l_int32 quality, - l_int32 nlevels, - l_int32 hint, - l_int32 debug) -{ -l_int32 w, h, d, success; -l_float32 snr; -const char *opjVersion; -PIX *pixs; -opj_cparameters_t parameters; /* compression parameters */ -opj_stream_t *l_stream = NULL; -opj_codec_t* l_codec = NULL;; -opj_image_t *image = NULL; - - PROCNAME("pixWriteStreamJp2k"); - - if (!fp) - return ERROR_INT("stream not open", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - snr = (l_float32)quality; - if (snr <= 0) snr = 34.0; /* default */ - if (snr < 27) - L_WARNING("SNR = %d < 27; very low\n", procName, (l_int32)snr); - if (snr == 100) snr = 0; /* for lossless */ - if (snr > 45) { - L_WARNING("SNR > 45; using lossless encoding\n", procName); - snr = 0; - } - - if (nlevels <= 0) nlevels = 5; /* default */ - if (nlevels > 10) { - L_WARNING("nlevels = %d > 10; setting to 10\n", procName, nlevels); - nlevels = 10; - } - - opjVersion = opj_version(); - if (opjVersion[0] != '2') { - L_ERROR("version is %s; must be 2.0 or higher\n", procName, opjVersion); - return 1; - } - if ((opjVersion[2] - 0x30) != OPJ_VERSION_MINOR) { - L_ERROR("version %s: differs from minor = %d\n", - procName, opjVersion, OPJ_VERSION_MINOR); - return 1; - } - - /* Remove colormap if it exists; result is 8 or 32 bpp */ - pixGetDimensions(pix, &w, &h, &d); - if (d == 24) { - pixs = pixConvert24To32(pix); - } else if (d == 32) { - pixs = pixClone(pix); - } else if (pixGetColormap(pix) == NULL) { - pixs = pixConvertTo8(pix, 0); - } else { /* colormap */ - L_INFO("removing colormap; may be better to compress losslessly\n", - procName); - pixs = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC); - } - - /* Convert to opj image format. */ - pixSetPadBits(pixs, 0); - image = pixConvertToOpjImage(pixs); - pixDestroy(&pixs); - - /* Set encoding parameters to default values. - * We use one layer with the input SNR. */ - opj_set_default_encoder_parameters(¶meters); - parameters.cp_fixed_quality = 1; - parameters.cp_disto_alloc = 0; - parameters.cp_fixed_alloc = 0; - parameters.tcp_distoratio[0] = snr; - parameters.tcp_numlayers = 1; - parameters.numresolution = nlevels + 1; - - /* Create comment for codestream */ - if (parameters.cp_comment == NULL) { - const char comment1[] = "Created by Leptonica, version "; - const char comment2[] = "; using OpenJPEG, version "; - size_t len1 = strlen(comment1); - size_t len2 = strlen(comment2); - char *version1 = getLeptonicaVersion(); - const char *version2 = opj_version(); - len1 += len2 + strlen(version1) + strlen(version2) + 1; - parameters.cp_comment = (char *)LEPT_MALLOC(len1); - snprintf(parameters.cp_comment, len1, "%s%s%s%s", comment1, version1, - comment2, version2); - LEPT_FREE(version1); - } - - /* Get the encoder handle */ - if ((l_codec = opj_create_compress(OPJ_CODEC_JP2)) == NULL) { - opj_image_destroy(image); - LEPT_FREE(parameters.cp_comment); - return ERROR_INT("failed to get the encoder handle\n", procName, 1); - } - - /* Catch and report events using callbacks */ - if (debug) { - opj_set_info_handler(l_codec, info_callback, NULL); - opj_set_warning_handler(l_codec, warning_callback, NULL); - opj_set_error_handler(l_codec, error_callback, NULL); - } - - /* Set up the encoder */ - if (!opj_setup_encoder(l_codec, ¶meters, image)) { - opj_destroy_codec(l_codec); - opj_image_destroy(image); - LEPT_FREE(parameters.cp_comment); - return ERROR_INT("failed to set up the encoder\n", procName, 1); - } - - /* Open a compression stream for writing. In 2.0 we could use this: - * opj_stream_create_default_file_stream(fp, 0) - * but the file stream interface was removed in 2.1. */ - rewind(fp); - if ((l_stream = opjCreateStream(fp, 0)) == NULL) { - opj_destroy_codec(l_codec); - opj_image_destroy(image); - LEPT_FREE(parameters.cp_comment); - return ERROR_INT("failed to open l_stream\n", procName, 1); - } - - /* Encode the image */ - if (!opj_start_compress(l_codec, image, l_stream)) { - opj_stream_destroy(l_stream); - opj_destroy_codec(l_codec); - opj_image_destroy(image); - LEPT_FREE(parameters.cp_comment); - return ERROR_INT("opj_start_compress failed\n", procName, 1); - } - if (!opj_encode(l_codec, l_stream)) { - opj_stream_destroy(l_stream); - opj_destroy_codec(l_codec); - opj_image_destroy(image); - LEPT_FREE(parameters.cp_comment); - return ERROR_INT("opj_encode failed\n", procName, 1); - } - success = opj_end_compress(l_codec, l_stream); - - /* Clean up */ - opj_stream_destroy(l_stream); - opj_destroy_codec(l_codec); - opj_image_destroy(image); - LEPT_FREE(parameters.cp_comment); - if (success) - return 0; - else - return ERROR_INT("opj_end_compress failed\n", procName, 1); -} - - -/*! - * \brief pixConvertToOpjImage() - * - * \param[in] pix 8 or 32 bpp - * \return opj_image, or NULL on error - * - *
- * Notes:
- *      (1) Input pix is 8 bpp grayscale, 32 bpp rgb, or 32 bpp rgba.
- *      (2) Gray + alpha pix are all represented as rgba.
- * 
- */ -static opj_image_t * -pixConvertToOpjImage(PIX *pix) -{ -l_int32 i, j, k, w, h, d, spp, wpl; -OPJ_COLOR_SPACE colorspace; -l_int32 *ir = NULL; -l_int32 *ig = NULL; -l_int32 *ib = NULL; -l_int32 *ia = NULL; -l_uint32 *line, *data; -opj_image_t *image; -opj_image_cmptparm_t cmptparm[4]; - - PROCNAME("pixConvertToOpjImage"); - - if (!pix) - return (opj_image_t *)ERROR_PTR("pix not defined", procName, NULL); - pixGetDimensions(pix, &w, &h, &d); - if (d != 8 && d != 32) { - L_ERROR("invalid depth: %d\n", procName, d); - return NULL; - } - - /* Allocate the opj_image. */ - spp = pixGetSpp(pix); - memset(&cmptparm[0], 0, 4 * sizeof(opj_image_cmptparm_t)); - for (i = 0; i < spp; i++) { - cmptparm[i].prec = 8; - cmptparm[i].bpp = 8; - cmptparm[i].sgnd = 0; - cmptparm[i].dx = 1; - cmptparm[i].dy = 1; - cmptparm[i].w = w; - cmptparm[i].h = h; - } - colorspace = (spp == 1) ? OPJ_CLRSPC_GRAY : OPJ_CLRSPC_SRGB; - if ((image = opj_image_create(spp, &cmptparm[0], colorspace)) == NULL) - return (opj_image_t *)ERROR_PTR("image not made", procName, NULL); - image->x0 = 0; - image->y0 = 0; - image->x1 = w; - image->y1 = h; - - /* Set the component pointers */ - ir = image->comps[0].data; - if (spp > 1) { - ig = image->comps[1].data; - ib = image->comps[2].data; - } - if(spp == 4) - ia = image->comps[3].data; - - /* Transfer the data from the pix */ - data = pixGetData(pix); - wpl = pixGetWpl(pix); - for (i = 0, k = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++, k++) { - if (spp == 1) { - ir[k] = GET_DATA_BYTE(line, j); - } else if (spp > 1) { - ir[k] = GET_DATA_BYTE(line + j, COLOR_RED); - ig[k] = GET_DATA_BYTE(line + j, COLOR_GREEN); - ib[k] = GET_DATA_BYTE(line + j, COLOR_BLUE); - } - if (spp == 4) - ia[k] = GET_DATA_BYTE(line + j, L_ALPHA_CHANNEL); - } - } - - return image; -} - - -/*---------------------------------------------------------------------* - * Read/write to memory * - *---------------------------------------------------------------------*/ -/*! - * \brief pixReadMemJp2k() - * - * \param[in] data const; jpeg-encoded - * \param[in] size of data - * \param[in] reduction scaling factor: 1, 2, 4, 8 - * \param[in] box [optional] for extracting a subregion, can be null - * \param[in] hint a bitwise OR of L_JP2K_* values; 0 for default - * \param[in] debug output callback messages, etc - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) This crashes when reading through the fmemopen cookie.
- *          Until we can fix this, we use the file-based work-around.
- *          And fixing this may take some time, because the basic
- *          stream interface is no longer supported in openjpeg.
- *      (2) See pixReadJp2k() for usage.
- * 
- */ -PIX * -pixReadMemJp2k(const l_uint8 *data, - size_t size, - l_uint32 reduction, - BOX *box, - l_int32 hint, - l_int32 debug) -{ -FILE *fp; -PIX *pix; - - PROCNAME("pixReadMemJp2k"); - - if (!data) - return (PIX *)ERROR_PTR("data not defined", procName, NULL); - - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (PIX *)ERROR_PTR("stream not opened", procName, NULL); - pix = pixReadStreamJp2k(fp, reduction, box, hint, debug); - fclose(fp); - if (!pix) L_ERROR("pix not read\n", procName); - return pix; -} - - -/*! - * \brief pixWriteMemJp2k() - * - * \param[out] pdata data of jpeg compressed image - * \param[out] psize size of returned data - * \param[in] pix 8 or 32 bpp - * \param[in] quality SNR > 0; 0 for default (34); 100 for lossless - * \param[in] nlevels 0 for default - * \param[in] hint a bitwise OR of L_JP2K_* values; 0 for default - * \param[in] debug output callback messages, etc - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See pixWriteJp2k() for usage.  This version writes to
- *          memory instead of to a file stream.
- * 
- */ -l_ok -pixWriteMemJp2k(l_uint8 **pdata, - size_t *psize, - PIX *pix, - l_int32 quality, - l_int32 nlevels, - l_int32 hint, - l_int32 debug) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("pixWriteMemJp2k"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1 ); - if (!psize) - return ERROR_INT("&size not defined", procName, 1 ); - if (!pix) - return ERROR_INT("&pix not defined", procName, 1 ); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = pixWriteStreamJp2k(fp, pix, quality, nlevels, hint, debug); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = pixWriteStreamJp2k(fp, pix, quality, nlevels, hint, debug); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - -/*---------------------------------------------------------------------* - * Static functions from opj 2.0 to retain file stream interface * - *---------------------------------------------------------------------*/ -static l_uint64 -opj_get_user_data_length(FILE *fp) { - OPJ_OFF_T length = 0; - fseek(fp, 0, SEEK_END); - length = (OPJ_OFF_T)ftell(fp); - fseek(fp, 0, SEEK_SET); - return (l_uint64)length; -} - -static OPJ_SIZE_T -opj_read_from_file(void *p_buffer, OPJ_SIZE_T p_nb_bytes, FILE *fp) { - OPJ_SIZE_T l_nb_read = fread(p_buffer, 1, p_nb_bytes, fp); - return l_nb_read ? l_nb_read : (OPJ_SIZE_T) - 1; -} - -static OPJ_SIZE_T -opj_write_from_file(void *p_buffer, OPJ_SIZE_T p_nb_bytes, FILE *fp) -{ - return fwrite(p_buffer, 1, p_nb_bytes, fp); -} - -static OPJ_OFF_T -opj_skip_from_file(OPJ_OFF_T offset, FILE *fp) { - if (fseek(fp, offset, SEEK_CUR)) { - return -1; - } - return offset; -} - -static l_int32 -opj_seek_from_file(OPJ_OFF_T offset, FILE *fp) { - if (fseek(fp, offset, SEEK_SET)) { - return 0; - } - return 1; -} - - /* Static generator of opj_stream from file stream */ -static opj_stream_t * -opjCreateStream(FILE *fp, - l_int32 is_read_stream) -{ -opj_stream_t *l_stream; - - PROCNAME("opjCreateStream"); - - if (!fp) - return (opj_stream_t *)ERROR_PTR("fp not defined", procName, NULL); - - l_stream = opj_stream_create(OPJ_J2K_STREAM_CHUNK_SIZE, is_read_stream); - if (!l_stream) - return (opj_stream_t *)ERROR_PTR("stream not made", procName, NULL); - -#if OPJ_VERSION_MINOR == 0 - opj_stream_set_user_data(l_stream, fp); -#else - opj_stream_set_user_data(l_stream, fp, - (opj_stream_free_user_data_fn)NULL); -#endif - opj_stream_set_user_data_length(l_stream, opj_get_user_data_length(fp)); - opj_stream_set_read_function(l_stream, - (opj_stream_read_fn)opj_read_from_file); - opj_stream_set_write_function(l_stream, - (opj_stream_write_fn)opj_write_from_file); - opj_stream_set_skip_function(l_stream, - (opj_stream_skip_fn)opj_skip_from_file); - opj_stream_set_seek_function(l_stream, - (opj_stream_seek_fn)opj_seek_from_file); - - return l_stream; -} - -/* --------------------------------------------*/ -#endif /* HAVE_LIBJP2K */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jp2kiostub.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jp2kiostub.c deleted file mode 100644 index f8340677..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jp2kiostub.c +++ /dev/null @@ -1,98 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file jp2kiostub.c - *
- *
- *     Stubs for jp2kio.c functions
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* --------------------------------------------*/ -#if !HAVE_LIBJP2K /* defined in environ.h */ -/* --------------------------------------------*/ - -/* ----------------------------------------------------------------------*/ - -PIX * pixReadJp2k(const char *filename, l_uint32 reduction, BOX *box, - l_int32 hint, l_int32 debug) -{ - return (PIX * )ERROR_PTR("function not present", "pixReadJp2k", NULL); -} - -/* ----------------------------------------------------------------------*/ - -PIX * pixReadStreamJp2k(FILE *fp, l_uint32 reduction, BOX *box, - l_int32 hint, l_int32 debug) -{ - return (PIX * )ERROR_PTR("function not present", "pixReadStreamJp2k", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteJp2k(const char *filename, PIX *pix, l_int32 quality, - l_int32 nlevels, l_int32 hint, l_int32 debug) -{ - return ERROR_INT("function not present", "pixWriteJp2k", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteStreamJp2k(FILE *fp, PIX *pix, l_int32 quality, - l_int32 nlevels, l_int32 hint, l_int32 debug) -{ - return ERROR_INT("function not present", "pixWriteStreamJp2k", 1); -} - -/* ----------------------------------------------------------------------*/ - -PIX * pixReadMemJp2k(const l_uint8 *data, size_t size, l_uint32 reduction, - BOX *box, l_int32 hint, l_int32 debug) -{ - return (PIX * )ERROR_PTR("function not present", "pixReadMemJp2k", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteMemJp2k(l_uint8 **pdata, size_t *psize, PIX *pix, - l_int32 quality, l_int32 nlevels, l_int32 hint, - l_int32 debug) -{ - return ERROR_INT("function not present", "pixWriteMemJp2k", 1); -} - -/* ----------------------------------------------------------------------*/ - -/* --------------------------------------------*/ -#endif /* !HAVE_LIBJP2K */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jpegio.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jpegio.c deleted file mode 100644 index 1e58aa7b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jpegio.c +++ /dev/null @@ -1,1304 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file jpegio.c - *
- *
- *    Read jpeg from file
- *          PIX             *pixReadJpeg()  [special top level]
- *          PIX             *pixReadStreamJpeg()
- *
- *    Read jpeg metadata from file
- *          l_int32          readHeaderJpeg()
- *          l_int32          freadHeaderJpeg()
- *          l_int32          fgetJpegResolution()
- *          l_int32          fgetJpegComment()
- *
- *    Write jpeg to file
- *          l_int32          pixWriteJpeg()  [special top level]
- *          l_int32          pixWriteStreamJpeg()
- *
- *    Read/write to memory
- *          PIX             *pixReadMemJpeg()
- *          l_int32          readHeaderMemJpeg()
- *          l_int32          readResolutionMemJpeg()
- *          l_int32          pixWriteMemJpeg()
- *
- *    Setting special flag for chroma sampling on write
- *          l_int32          pixSetChromaSampling()
- *
- *    Static system helpers
- *          static void      jpeg_error_catch_all_1()
- *          static void      jpeg_error_catch_all_2()
- *          static l_uint8   jpeg_getc()
- *          static l_int32   jpeg_comment_callback()
- *
- *    Documentation: libjpeg.doc can be found, along with all
- *    source code, at ftp://ftp.uu.net/graphics/jpeg
- *    Download and untar the file:  jpegsrc.v6b.tar.gz
- *    A good paper on jpeg can also be found there: wallace.ps.gz
- *
- *    The functions in libjpeg make it very simple to compress
- *    and decompress images.  On input (decompression from file),
- *    3 component color images can be read into either an 8 bpp Pix
- *    with a colormap or a 32 bpp Pix with RGB components.  For output
- *    (compression to file), all color Pix, whether 8 bpp with a
- *    colormap or 32 bpp, are written compressed as a set of three
- *    8 bpp (rgb) images.
- *
- *    Low-level error handling
- *    ------------------------
- *    The default behavior of the jpeg library is to call exit.
- *    This is often undesirable, and the caller should make the
- *    decision when to abort a process.  To prevent the jpeg library
- *    from calling exit(), setjmp() has been inserted into all
- *    readers and writers, and the cinfo struct has been set up so that
- *    the low-level jpeg library will call a special error handler
- *    that doesn't exit, instead of the default function error_exit().
- *
- *    To avoid race conditions and make these functions thread-safe in
- *    the rare situation where calls to two threads are simultaneously
- *    failing on bad jpegs, we insert a local copy of the jmp_buf struct
- *    into the cinfo.client_data field, and use this on longjmp.
- *    For extracting the jpeg comment, we have the added complication
- *    that the client_data field must also return the jpeg comment,
- *    and we use a different error handler.
- *
- *    How to avoid subsampling the chroma channels
- *    --------------------------------------------
- *    When writing, you can avoid subsampling the U,V (chroma)
- *    channels.  This gives higher quality for the color, which is
- *    important for some situations.  The default subsampling is 2x2 on
- *    both channels.  Before writing, call pixSetChromaSampling(pix, 0)
- *    to prevent chroma subsampling.
- *
- *    How to extract just the luminance channel in reading RGB
- *    --------------------------------------------------------
- *    For higher resolution and faster decoding of an RGB image, you
- *    can extract just the 8 bpp luminance channel, using pixReadJpeg(),
- *    where you use L_JPEG_READ_LUMINANCE for the %hint arg.
- *
- *    How to fail to read if the data is corrupted
- *    ---------------------------------------------
- *    By default, if the low-level jpeg library functions do not abort,
- *    a pix will be returned, even if the data is corrupted and warnings
- *    are issued.  In order to be most likely to fail to read when there
- *    is data corruption, use L_JPEG_FAIL_ON_BAD_DATA in the %hint arg.
- *
- *    Compressing to memory and decompressing from memory
- *    ---------------------------------------------------
- *    On systems like windows without fmemopen() and open_memstream(),
- *    we write data to a temp file and read it back for operations
- *    between pix and compressed-data, such as pixReadMemJpeg() and
- *    pixWriteMemJpeg().
- *
- *    Vestigial code: parsing the jpeg file for header metadata
- *    ---------------------------------------------------------
- *    For extracting header metadata, we previously parsed the file, looking
- *    for specific markers.  This is error-prone because of non-standard
- *    jpeg files, and we now use readHeaderJpeg() and readHeaderMemJpeg().
- *    The vestigial code is retained in jpegio_notused.c to help you
- *    understand a bit about how to parse jpeg markers.  It is not compiled
- *    into the library.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/* --------------------------------------------*/ -#if HAVE_LIBJPEG /* defined in environ.h */ -/* --------------------------------------------*/ - -#include - - /* jconfig.h makes the error of setting - * #define HAVE_STDLIB_H - * which conflicts with config_auto.h (where it is set to 1) and results - * for some gcc compiler versions in a warning. The conflict is harmless - * but we suppress it by undefining the variable. */ -#undef HAVE_STDLIB_H -#include "jpeglib.h" - -static void jpeg_error_catch_all_1(j_common_ptr cinfo); -static void jpeg_error_catch_all_2(j_common_ptr cinfo); -static l_uint8 jpeg_getc(j_decompress_ptr cinfo); - - /* Note: 'boolean' is defined in jmorecfg.h. We use it explicitly - * here because for windows where __MINGW32__ is defined, - * the prototype for jpeg_comment_callback() is given as - * returning a boolean. */ -static boolean jpeg_comment_callback(j_decompress_ptr cinfo); - - /* This is saved in the client_data field of cinfo, and used both - * to retrieve the comment from its callback and to handle - * exceptions with a longjmp. */ -struct callback_data { - jmp_buf jmpbuf; - l_uint8 *comment; -}; - -#ifndef NO_CONSOLE_IO -#define DEBUG_INFO 0 -#endif /* ~NO_CONSOLE_IO */ - - -/*---------------------------------------------------------------------* - * Read jpeg from file (special function) * - *---------------------------------------------------------------------*/ -/*! - * \brief pixReadJpeg() - * - * \param[in] filename - * \param[in] cmapflag 0 for no colormap in returned pix; - * 1 to return an 8 bpp cmapped pix if spp = 3 or 4 - * \param[in] reduction scaling factor: 1, 2, 4 or 8 - * \param[out] pnwarn [optional] number of warnings about - * corrupted data - * \param[in] hint a bitwise OR of L_JPEG_* values; 0 for default - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) This is a special function for reading jpeg files.
- *      (2) Use this if you want the jpeg library to create
- *          an 8 bpp colormapped image.
- *      (3) Images reduced by factors of 2, 4 or 8 can be returned
- *          significantly faster than full resolution images.
- *      (4) If the jpeg data is bad, the jpeg library will continue
- *          silently, or return warnings, or attempt to exit.  Depending
- *          on the severity of the data corruption, there are two possible
- *          outcomes:
- *          (a) a possibly damaged pix can be generated, along with zero
- *              or more warnings, or
- *          (b) the library will attempt to exit (caught by our error
- *              handler) and no pix will be returned.
- *          If a pix is generated with at least one warning of data
- *          corruption, and if L_JPEG_FAIL_ON_BAD_DATA is included in %hint,
- *          no pix will be returned.
- *      (5) The possible hint values are given in the enum in imageio.h:
- *            * L_JPEG_READ_LUMINANCE
- *            * L_JPEG_FAIL_ON_BAD_DATA
- *          Default (0) is to do neither.
- * 
- */ -PIX * -pixReadJpeg(const char *filename, - l_int32 cmapflag, - l_int32 reduction, - l_int32 *pnwarn, - l_int32 hint) -{ -l_int32 ret; -l_uint8 *comment; -FILE *fp; -PIX *pix; - - PROCNAME("pixReadJpeg"); - - if (pnwarn) *pnwarn = 0; - if (!filename) - return (PIX *)ERROR_PTR("filename not defined", procName, NULL); - if (cmapflag != 0 && cmapflag != 1) - cmapflag = 0; /* default */ - if (reduction != 1 && reduction != 2 && reduction != 4 && reduction != 8) - return (PIX *)ERROR_PTR("reduction not in {1,2,4,8}", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (PIX *)ERROR_PTR("image file not found", procName, NULL); - pix = pixReadStreamJpeg(fp, cmapflag, reduction, pnwarn, hint); - if (pix) { - ret = fgetJpegComment(fp, &comment); - if (!ret && comment) - pixSetText(pix, (char *)comment); - LEPT_FREE(comment); - } - fclose(fp); - - if (!pix) - return (PIX *)ERROR_PTR("image not returned", procName, NULL); - return pix; -} - - -/*! - * \brief pixReadStreamJpeg() - * - * \param[in] fp file stream - * \param[in] cmapflag 0 for no colormap in returned pix; - * 1 to return an 8 bpp cmapped pix if spp = 3 or 4 - * \param[in] reduction scaling factor: 1, 2, 4 or 8 - * \param[out] pnwarn [optional] number of warnings - * \param[in] hint a bitwise OR of L_JPEG_* values; 0 for default - * \return pix, or NULL on error - * - * Usage: see pixReadJpeg - *
- * Notes:
- *      (1) The jpeg comment, if it exists, is not stored in the pix.
- * 
- */ -PIX * -pixReadStreamJpeg(FILE *fp, - l_int32 cmapflag, - l_int32 reduction, - l_int32 *pnwarn, - l_int32 hint) -{ -l_int32 cyan, yellow, magenta, black, nwarn; -l_int32 i, j, k, rval, gval, bval; -l_int32 w, h, wpl, spp, ncolors, cindex, ycck, cmyk; -l_uint32 *data; -l_uint32 *line, *ppixel; -JSAMPROW rowbuffer; -PIX *pix; -PIXCMAP *cmap; -struct jpeg_decompress_struct cinfo; -struct jpeg_error_mgr jerr; -jmp_buf jmpbuf; /* must be local to the function */ - - PROCNAME("pixReadStreamJpeg"); - - if (pnwarn) *pnwarn = 0; - if (!fp) - return (PIX *)ERROR_PTR("fp not defined", procName, NULL); - if (cmapflag != 0 && cmapflag != 1) - cmapflag = 0; /* default */ - if (reduction != 1 && reduction != 2 && reduction != 4 && reduction != 8) - return (PIX *)ERROR_PTR("reduction not in {1,2,4,8}", procName, NULL); - - if (BITS_IN_JSAMPLE != 8) /* set in jmorecfg.h */ - return (PIX *)ERROR_PTR("BITS_IN_JSAMPLE != 8", procName, NULL); - - rewind(fp); - pix = NULL; - rowbuffer = NULL; - - /* Modify the jpeg error handling to catch fatal errors */ - cinfo.err = jpeg_std_error(&jerr); - jerr.error_exit = jpeg_error_catch_all_1; - cinfo.client_data = (void *)&jmpbuf; - if (setjmp(jmpbuf)) { - pixDestroy(&pix); - LEPT_FREE(rowbuffer); - return (PIX *)ERROR_PTR("internal jpeg error", procName, NULL); - } - - /* Initialize jpeg structs for decompression */ - jpeg_create_decompress(&cinfo); - jpeg_stdio_src(&cinfo, fp); - jpeg_read_header(&cinfo, TRUE); - cinfo.scale_denom = reduction; - cinfo.scale_num = 1; - jpeg_calc_output_dimensions(&cinfo); - if (hint & L_JPEG_READ_LUMINANCE) { - cinfo.out_color_space = JCS_GRAYSCALE; - spp = 1; - L_INFO("reading luminance channel only\n", procName); - } else { - spp = cinfo.out_color_components; - } - - /* Allocate the image and a row buffer */ - w = cinfo.output_width; - h = cinfo.output_height; - ycck = (cinfo.jpeg_color_space == JCS_YCCK && spp == 4 && cmapflag == 0); - cmyk = (cinfo.jpeg_color_space == JCS_CMYK && spp == 4 && cmapflag == 0); - if (spp != 1 && spp != 3 && !ycck && !cmyk) { - jpeg_destroy_decompress(&cinfo); - return (PIX *)ERROR_PTR("spp must be 1 or 3, or YCCK or CMYK", - procName, NULL); - } - if ((spp == 3 && cmapflag == 0) || ycck || cmyk) { /* rgb or 4 bpp color */ - rowbuffer = (JSAMPROW)LEPT_CALLOC(sizeof(JSAMPLE), (size_t)spp * w); - pix = pixCreate(w, h, 32); - } else { /* 8 bpp gray or colormapped */ - rowbuffer = (JSAMPROW)LEPT_CALLOC(sizeof(JSAMPLE), w); - pix = pixCreate(w, h, 8); - } - pixSetInputFormat(pix, IFF_JFIF_JPEG); - if (!rowbuffer || !pix) { - LEPT_FREE(rowbuffer); - pixDestroy(&pix); - jpeg_destroy_decompress(&cinfo); - return (PIX *)ERROR_PTR("rowbuffer or pix not made", procName, NULL); - } - - /* Initialize decompression. Set up a colormap for color - * quantization if requested. */ - if (spp == 1) { /* Grayscale or colormapped */ - jpeg_start_decompress(&cinfo); - } else { /* Color; spp == 3 or YCCK or CMYK */ - if (cmapflag == 0) { /* 24 bit color in 32 bit pix or YCCK/CMYK */ - cinfo.quantize_colors = FALSE; - jpeg_start_decompress(&cinfo); - } else { /* Color quantize to 8 bits */ - cinfo.quantize_colors = TRUE; - cinfo.desired_number_of_colors = 256; - jpeg_start_decompress(&cinfo); - - /* Construct a pix cmap */ - cmap = pixcmapCreate(8); - ncolors = cinfo.actual_number_of_colors; - for (cindex = 0; cindex < ncolors; cindex++) { - rval = cinfo.colormap[0][cindex]; - gval = cinfo.colormap[1][cindex]; - bval = cinfo.colormap[2][cindex]; - pixcmapAddColor(cmap, rval, gval, bval); - } - pixSetColormap(pix, cmap); - } - } - wpl = pixGetWpl(pix); - data = pixGetData(pix); - - /* Decompress. Unfortunately, we cannot use the return value - * from jpeg_read_scanlines() to determine if there was a problem - * with the data; it always appears to return 1. We can only - * tell from the warnings during decoding, such as "premature - * end of data segment". The default behavior is to return an - * image even if there are warnings. However, by setting the - * hint to have the same bit flag as L_JPEG_FAIL_ON_BAD_DATA, - * no image will be returned if there are any warnings. */ - for (i = 0; i < h; i++) { - if (jpeg_read_scanlines(&cinfo, &rowbuffer, (JDIMENSION)1) == 0) { - L_ERROR("read error at scanline %d\n", procName, i); - pixDestroy(&pix); - jpeg_destroy_decompress(&cinfo); - LEPT_FREE(rowbuffer); - return (PIX *)ERROR_PTR("bad data", procName, NULL); - } - - /* -- 24 bit color -- */ - if ((spp == 3 && cmapflag == 0) || ycck || cmyk) { - ppixel = data + i * wpl; - if (spp == 3) { - for (j = k = 0; j < w; j++) { - SET_DATA_BYTE(ppixel, COLOR_RED, rowbuffer[k++]); - SET_DATA_BYTE(ppixel, COLOR_GREEN, rowbuffer[k++]); - SET_DATA_BYTE(ppixel, COLOR_BLUE, rowbuffer[k++]); - ppixel++; - } - } else { - /* This is a conversion from CMYK -> RGB that ignores - color profiles, and is invoked when the image header - claims to be in CMYK or YCCK colorspace. If in YCCK, - libjpeg may be doing YCCK -> CMYK under the hood. - To understand why the colors need to be inverted on - read-in for the Adobe marker, see the "Special - color spaces" section of "Using the IJG JPEG - Library" by Thomas G. Lane: - http://www.jpegcameras.com/libjpeg/libjpeg-3.html#ss3.1 - The non-Adobe conversion is equivalent to: - rval = black - black * cyan / 255 - ... - The Adobe conversion is equivalent to: - rval = black - black * (255 - cyan) / 255 - ... - Note that cyan is the complement to red, and we - are subtracting the complement color (weighted - by black) from black. For Adobe conversions, - where they've already inverted the CMY but not - the K, we have to invert again. The results - must be clipped to [0 ... 255]. */ - for (j = k = 0; j < w; j++) { - cyan = rowbuffer[k++]; - magenta = rowbuffer[k++]; - yellow = rowbuffer[k++]; - black = rowbuffer[k++]; - if (cinfo.saw_Adobe_marker) { - rval = (black * cyan) / 255; - gval = (black * magenta) / 255; - bval = (black * yellow) / 255; - } else { - rval = black * (255 - cyan) / 255; - gval = black * (255 - magenta) / 255; - bval = black * (255 - yellow) / 255; - } - rval = L_MIN(L_MAX(rval, 0), 255); - gval = L_MIN(L_MAX(gval, 0), 255); - bval = L_MIN(L_MAX(bval, 0), 255); - composeRGBPixel(rval, gval, bval, ppixel); - ppixel++; - } - } - } else { /* 8 bpp grayscale or colormapped pix */ - line = data + i * wpl; - for (j = 0; j < w; j++) - SET_DATA_BYTE(line, j, rowbuffer[j]); - } - } - - nwarn = cinfo.err->num_warnings; - if (pnwarn) *pnwarn = nwarn; - - /* If the pixel density is neither 1 nor 2, it may not be defined. - * In that case, don't set the resolution. */ - if (cinfo.density_unit == 1) { /* pixels per inch */ - pixSetXRes(pix, cinfo.X_density); - pixSetYRes(pix, cinfo.Y_density); - } else if (cinfo.density_unit == 2) { /* pixels per centimeter */ - pixSetXRes(pix, (l_int32)((l_float32)cinfo.X_density * 2.54 + 0.5)); - pixSetYRes(pix, (l_int32)((l_float32)cinfo.Y_density * 2.54 + 0.5)); - } - - if (cinfo.output_components != spp) - lept_stderr("output spp = %d, spp = %d\n", - cinfo.output_components, spp); - - jpeg_finish_decompress(&cinfo); - jpeg_destroy_decompress(&cinfo); - LEPT_FREE(rowbuffer); - - if (nwarn > 0) { - if (hint & L_JPEG_FAIL_ON_BAD_DATA) { - L_ERROR("fail with %d warning(s) of bad data\n", procName, nwarn); - pixDestroy(&pix); - } else { - L_WARNING("%d warning(s) of bad data\n", procName, nwarn); - } - } - - return pix; -} - - -/*---------------------------------------------------------------------* - * Read jpeg metadata from file * - *---------------------------------------------------------------------*/ -/*! - * \brief readHeaderJpeg() - * - * \param[in] filename - * \param[out] pw [optional] - * \param[out] ph [optional] - * \param[out] pspp [optional] samples/pixel - * \param[out] pycck [optional] 1 if ycck color space; 0 otherwise - * \param[out] pcmyk [optional] 1 if cmyk color space; 0 otherwise - * \return 0 if OK, 1 on error - */ -l_ok -readHeaderJpeg(const char *filename, - l_int32 *pw, - l_int32 *ph, - l_int32 *pspp, - l_int32 *pycck, - l_int32 *pcmyk) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("readHeaderJpeg"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pspp) *pspp = 0; - if (pycck) *pycck = 0; - if (pcmyk) *pcmyk = 0; - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!pw && !ph && !pspp && !pycck && !pcmyk) - return ERROR_INT("no results requested", procName, 1); - - if ((fp = fopenReadStream(filename)) == NULL) - return ERROR_INT("image file not found", procName, 1); - ret = freadHeaderJpeg(fp, pw, ph, pspp, pycck, pcmyk); - fclose(fp); - return ret; -} - - -/*! - * \brief freadHeaderJpeg() - * - * \param[in] fp file stream - * \param[out] pw [optional] - * \param[out] ph [optional] - * \param[out] pspp [optional] samples/pixel - * \param[out] pycck [optional] 1 if ycck color space; 0 otherwise - * \param[out] pcmyk [optional] 1 if cmyk color space; 0 otherwise - * \return 0 if OK, 1 on error - */ -l_ok -freadHeaderJpeg(FILE *fp, - l_int32 *pw, - l_int32 *ph, - l_int32 *pspp, - l_int32 *pycck, - l_int32 *pcmyk) -{ -l_int32 spp, w, h; -struct jpeg_decompress_struct cinfo; -struct jpeg_error_mgr jerr; -jmp_buf jmpbuf; /* must be local to the function */ - - PROCNAME("freadHeaderJpeg"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pspp) *pspp = 0; - if (pycck) *pycck = 0; - if (pcmyk) *pcmyk = 0; - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!pw && !ph && !pspp && !pycck && !pcmyk) - return ERROR_INT("no results requested", procName, 1); - - rewind(fp); - - /* Modify the jpeg error handling to catch fatal errors */ - cinfo.err = jpeg_std_error(&jerr); - cinfo.client_data = (void *)&jmpbuf; - jerr.error_exit = jpeg_error_catch_all_1; - if (setjmp(jmpbuf)) - return ERROR_INT("internal jpeg error", procName, 1); - - /* Initialize the jpeg structs for reading the header */ - jpeg_create_decompress(&cinfo); - jpeg_stdio_src(&cinfo, fp); - jpeg_read_header(&cinfo, TRUE); - jpeg_calc_output_dimensions(&cinfo); - spp = cinfo.out_color_components; - w = cinfo.output_width; - h = cinfo.output_height; - if (w < 1 || h < 1 || spp < 1 || spp > 4) { - jpeg_destroy_decompress(&cinfo); - rewind(fp); - return ERROR_INT("bad jpeg image parameters", procName, 1); - } - - if (pspp) *pspp = spp; - if (pw) *pw = cinfo.output_width; - if (ph) *ph = cinfo.output_height; - if (pycck) *pycck = - (cinfo.jpeg_color_space == JCS_YCCK && spp == 4); - if (pcmyk) *pcmyk = - (cinfo.jpeg_color_space == JCS_CMYK && spp == 4); - - jpeg_destroy_decompress(&cinfo); - rewind(fp); - return 0; -} - - -/* - * \brief fgetJpegResolution() - * - * \param[in] fp file stream - * \param[out] pxres, pyres resolutions - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) If neither resolution field is set, this is not an error;
- *          the returned resolution values are 0 (designating 'unknown').
- *      (2) Side-effect: this rewinds the stream.
- * 
- */ -l_int32 -fgetJpegResolution(FILE *fp, - l_int32 *pxres, - l_int32 *pyres) -{ -struct jpeg_decompress_struct cinfo; -struct jpeg_error_mgr jerr; -jmp_buf jmpbuf; /* must be local to the function */ - - PROCNAME("fgetJpegResolution"); - - if (pxres) *pxres = 0; - if (pyres) *pyres = 0; - if (!pxres || !pyres) - return ERROR_INT("&xres and &yres not both defined", procName, 1); - if (!fp) - return ERROR_INT("stream not opened", procName, 1); - - rewind(fp); - - /* Modify the jpeg error handling to catch fatal errors */ - cinfo.err = jpeg_std_error(&jerr); - cinfo.client_data = (void *)&jmpbuf; - jerr.error_exit = jpeg_error_catch_all_1; - if (setjmp(jmpbuf)) - return ERROR_INT("internal jpeg error", procName, 1); - - /* Initialize the jpeg structs for reading the header */ - jpeg_create_decompress(&cinfo); - jpeg_stdio_src(&cinfo, fp); - jpeg_read_header(&cinfo, TRUE); - - /* It is common for the input resolution to be omitted from the - * jpeg file. If density_unit is not 1 or 2, simply return 0. */ - if (cinfo.density_unit == 1) { /* pixels/inch */ - *pxres = cinfo.X_density; - *pyres = cinfo.Y_density; - } else if (cinfo.density_unit == 2) { /* pixels/cm */ - *pxres = (l_int32)((l_float32)cinfo.X_density * 2.54 + 0.5); - *pyres = (l_int32)((l_float32)cinfo.Y_density * 2.54 + 0.5); - } - - jpeg_destroy_decompress(&cinfo); - rewind(fp); - return 0; -} - - -/* - * \brief fgetJpegComment() - * - * \param[in] fp file stream opened for read - * \param[out] pcomment comment - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Side-effect: this rewinds the stream.
- * 
- */ -l_int32 -fgetJpegComment(FILE *fp, - l_uint8 **pcomment) -{ -struct jpeg_decompress_struct cinfo; -struct jpeg_error_mgr jerr; -struct callback_data cb_data; /* contains local jmp_buf */ - - PROCNAME("fgetJpegComment"); - - if (!pcomment) - return ERROR_INT("&comment not defined", procName, 1); - *pcomment = NULL; - if (!fp) - return ERROR_INT("stream not opened", procName, 1); - - rewind(fp); - - /* Modify the jpeg error handling to catch fatal errors */ - cinfo.err = jpeg_std_error(&jerr); - jerr.error_exit = jpeg_error_catch_all_2; - cb_data.comment = NULL; - cinfo.client_data = (void *)&cb_data; - if (setjmp(cb_data.jmpbuf)) { - LEPT_FREE(cb_data.comment); - return ERROR_INT("internal jpeg error", procName, 1); - } - - /* Initialize the jpeg structs for reading the header */ - jpeg_create_decompress(&cinfo); - jpeg_set_marker_processor(&cinfo, JPEG_COM, jpeg_comment_callback); - jpeg_stdio_src(&cinfo, fp); - jpeg_read_header(&cinfo, TRUE); - - /* Save the result */ - *pcomment = cb_data.comment; - jpeg_destroy_decompress(&cinfo); - rewind(fp); - return 0; -} - - -/*---------------------------------------------------------------------* - * Writing Jpeg * - *---------------------------------------------------------------------*/ -/*! - * \brief pixWriteJpeg() - * - * \param[in] filename - * \param[in] pix any depth; cmap is OK - * \param[in] quality 1 - 100; 75 is default - * \param[in] progressive 0 for baseline sequential; 1 for progressive - * \return 0 if OK; 1 on error - */ -l_ok -pixWriteJpeg(const char *filename, - PIX *pix, - l_int32 quality, - l_int32 progressive) -{ -FILE *fp; - - PROCNAME("pixWriteJpeg"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "wb+")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - - if (pixWriteStreamJpeg(fp, pix, quality, progressive)) { - fclose(fp); - return ERROR_INT("pix not written to stream", procName, 1); - } - - fclose(fp); - return 0; -} - - -/*! - * \brief pixWriteStreamJpeg() - * - * \param[in] fp file stream - * \param[in] pixs any depth; cmap is OK - * \param[in] quality 1 - 100; 75 is default value; 0 is also default - * \param[in] progressive 0 for baseline sequential; 1 for progressive - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Progressive encoding gives better compression, at the
- *          expense of slower encoding and decoding.
- *      (2) Standard chroma subsampling is 2x2 on both the U and V
- *          channels.  For highest quality, use no subsampling; this
- *          option is set by pixSetChromaSampling(pix, 0).
- *      (3) The only valid pixel depths in leptonica are 1, 2, 4, 8, 16
- *          and 32 bpp.  However, it is possible, and in some cases desirable,
- *          to write out a jpeg file using an rgb pix that has 24 bpp.
- *          This can be created by appending the raster data for a 24 bpp
- *          image (with proper scanline padding) directly to a 24 bpp
- *          pix that was created without a data array.
- *      (4) There are two compression paths in this function:
- *          * Grayscale image, no colormap: compress as 8 bpp image.
- *          * rgb full color image: copy each line into the color
- *            line buffer, and compress as three 8 bpp images.
- *      (5) Under the covers, the jpeg library transforms rgb to a
- *          luminance-chromaticity triple, each component of which is
- *          also 8 bits, and compresses that.  It uses 2 Huffman tables,
- *          a higher resolution one (with more quantization levels)
- *          for luminosity and a lower resolution one for the chromas.
- * 
- */ -l_ok -pixWriteStreamJpeg(FILE *fp, - PIX *pixs, - l_int32 quality, - l_int32 progressive) -{ -l_int32 xres, yres; -l_int32 i, j, k; -l_int32 w, h, d, wpl, spp, colorflag, rowsamples; -l_uint32 *ppixel, *line, *data; -JSAMPROW rowbuffer; -PIX *pix; -struct jpeg_compress_struct cinfo; -struct jpeg_error_mgr jerr; -char *text; -jmp_buf jmpbuf; /* must be local to the function */ - - PROCNAME("pixWriteStreamJpeg"); - - if (!fp) - return ERROR_INT("stream not open", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (quality <= 0) quality = 75; /* default */ - if (quality > 100) { - L_ERROR("invalid jpeg quality; setting to 75\n", procName); - quality = 75; - } - - /* If necessary, convert the pix so that it can be jpeg compressed. - * The colormap is removed based on the source, so if the colormap - * has only gray colors, the image will be compressed with spp = 1. */ - pixGetDimensions(pixs, &w, &h, &d); - pix = NULL; - if (pixGetColormap(pixs) != NULL) { - L_INFO("removing colormap; may be better to compress losslessly\n", - procName); - pix = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - } else if (d >= 8 && d != 16) { /* normal case; no rewrite */ - pix = pixClone(pixs); - } else if (d < 8 || d == 16) { - L_INFO("converting from %d to 8 bpp\n", procName, d); - pix = pixConvertTo8(pixs, 0); /* 8 bpp, no cmap */ - } else { - L_ERROR("unknown pix type with d = %d and no cmap\n", procName, d); - return 1; - } - if (!pix) - return ERROR_INT("pix not made", procName, 1); - pixSetPadBits(pix, 0); - - rewind(fp); - rowbuffer = NULL; - - /* Modify the jpeg error handling to catch fatal errors */ - cinfo.err = jpeg_std_error(&jerr); - cinfo.client_data = (void *)&jmpbuf; - jerr.error_exit = jpeg_error_catch_all_1; - if (setjmp(jmpbuf)) { - LEPT_FREE(rowbuffer); - pixDestroy(&pix); - return ERROR_INT("internal jpeg error", procName, 1); - } - - /* Initialize the jpeg structs for compression */ - jpeg_create_compress(&cinfo); - jpeg_stdio_dest(&cinfo, fp); - cinfo.image_width = w; - cinfo.image_height = h; - - /* Set the color space and number of components */ - d = pixGetDepth(pix); - if (d == 8) { - colorflag = 0; /* 8 bpp grayscale; no cmap */ - cinfo.input_components = 1; - cinfo.in_color_space = JCS_GRAYSCALE; - } else { /* d == 32 || d == 24 */ - colorflag = 1; /* rgb */ - cinfo.input_components = 3; - cinfo.in_color_space = JCS_RGB; - } - - jpeg_set_defaults(&cinfo); - - /* Setting optimize_coding to TRUE seems to improve compression - * by approx 2-4 percent, and increases comp time by approx 20%. */ - cinfo.optimize_coding = FALSE; - - /* Set resolution in pixels/in (density_unit: 1 = in, 2 = cm) */ - xres = pixGetXRes(pix); - yres = pixGetYRes(pix); - if ((xres != 0) && (yres != 0)) { - cinfo.density_unit = 1; /* designates pixels per inch */ - cinfo.X_density = xres; - cinfo.Y_density = yres; - } - - /* Set the quality and progressive parameters */ - jpeg_set_quality(&cinfo, quality, TRUE); - if (progressive) - jpeg_simple_progression(&cinfo); - - /* Set the chroma subsampling parameters. This is done in - * YUV color space. The Y (intensity) channel is never subsampled. - * The standard subsampling is 2x2 on both the U and V channels. - * Notation on this is confusing. For a nice illustrations, see - * http://en.wikipedia.org/wiki/Chroma_subsampling - * The standard subsampling is written as 4:2:0. - * We allow high quality where there is no subsampling on the - * chroma channels: denoted as 4:4:4. */ - if (pixs->special == L_NO_CHROMA_SAMPLING_JPEG) { - cinfo.comp_info[0].h_samp_factor = 1; - cinfo.comp_info[0].v_samp_factor = 1; - cinfo.comp_info[1].h_samp_factor = 1; - cinfo.comp_info[1].v_samp_factor = 1; - cinfo.comp_info[2].h_samp_factor = 1; - cinfo.comp_info[2].v_samp_factor = 1; - } - - jpeg_start_compress(&cinfo, TRUE); - - /* Cap the text the length limit, 65533, for JPEG_COM payload. - * Just to be safe, subtract 100 to cover the Adobe name space. */ - if ((text = pixGetText(pix)) != NULL) { - if (strlen(text) > 65433) { - L_WARNING("text is %zu bytes; clipping to 65433\n", - procName, strlen(text)); - text[65433] = '\0'; - } - jpeg_write_marker(&cinfo, JPEG_COM, (const JOCTET *)text, strlen(text)); - } - - /* Allocate row buffer */ - spp = cinfo.input_components; - rowsamples = spp * w; - if ((rowbuffer = (JSAMPROW)LEPT_CALLOC(sizeof(JSAMPLE), rowsamples)) - == NULL) { - pixDestroy(&pix); - return ERROR_INT("calloc fail for rowbuffer", procName, 1); - } - - data = pixGetData(pix); - wpl = pixGetWpl(pix); - for (i = 0; i < h; i++) { - line = data + i * wpl; - if (colorflag == 0) { /* 8 bpp gray */ - for (j = 0; j < w; j++) - rowbuffer[j] = GET_DATA_BYTE(line, j); - } else { /* colorflag == 1 */ - if (d == 24) { /* See note 3 above; special case of 24 bpp rgb */ - jpeg_write_scanlines(&cinfo, (JSAMPROW *)&line, 1); - } else { /* standard 32 bpp rgb */ - ppixel = line; - for (j = k = 0; j < w; j++) { - rowbuffer[k++] = GET_DATA_BYTE(ppixel, COLOR_RED); - rowbuffer[k++] = GET_DATA_BYTE(ppixel, COLOR_GREEN); - rowbuffer[k++] = GET_DATA_BYTE(ppixel, COLOR_BLUE); - ppixel++; - } - } - } - if (d != 24) - jpeg_write_scanlines(&cinfo, &rowbuffer, 1); - } - jpeg_finish_compress(&cinfo); - - pixDestroy(&pix); - LEPT_FREE(rowbuffer); - jpeg_destroy_compress(&cinfo); - return 0; -} - - -/*---------------------------------------------------------------------* - * Read/write to memory * - *---------------------------------------------------------------------*/ - -/*! - * \brief pixReadMemJpeg() - * - * \param[in] data const; jpeg-encoded - * \param[in] size of data - * \param[in] cmflag colormap flag 0 means return RGB image if color; - * 1 means create a colormap and return - * an 8 bpp colormapped image if color - * \param[in] reduction scaling factor: 1, 2, 4 or 8 - * \param[out] pnwarn [optional] number of warnings - * \param[in] hint a bitwise OR of L_JPEG_* values; 0 for default - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) The %size byte of %data must be a null character.
- *      (2) The only hint flag so far is L_JPEG_READ_LUMINANCE,
- *          given in the enum in imageio.h.
- *      (3) See pixReadJpeg() for usage.
- * 
- */ -PIX * -pixReadMemJpeg(const l_uint8 *data, - size_t size, - l_int32 cmflag, - l_int32 reduction, - l_int32 *pnwarn, - l_int32 hint) -{ -l_int32 ret; -l_uint8 *comment; -FILE *fp; -PIX *pix; - - PROCNAME("pixReadMemJpeg"); - - if (pnwarn) *pnwarn = 0; - if (!data) - return (PIX *)ERROR_PTR("data not defined", procName, NULL); - - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (PIX *)ERROR_PTR("stream not opened", procName, NULL); - pix = pixReadStreamJpeg(fp, cmflag, reduction, pnwarn, hint); - if (pix) { - ret = fgetJpegComment(fp, &comment); - if (!ret && comment) { - pixSetText(pix, (char *)comment); - LEPT_FREE(comment); - } - } - fclose(fp); - if (!pix) L_ERROR("pix not read\n", procName); - return pix; -} - - -/*! - * \brief readHeaderMemJpeg() - * - * \param[in] data const; jpeg-encoded - * \param[in] size of data - * \param[out] pw [optional] width - * \param[out] ph [optional] height - * \param[out] pspp [optional] samples/pixel - * \param[out] pycck [optional] 1 if ycck color space; 0 otherwise - * \param[out] pcmyk [optional] 1 if cmyk color space; 0 otherwise - * \return 0 if OK, 1 on error - */ -l_ok -readHeaderMemJpeg(const l_uint8 *data, - size_t size, - l_int32 *pw, - l_int32 *ph, - l_int32 *pspp, - l_int32 *pycck, - l_int32 *pcmyk) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("readHeaderMemJpeg"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pspp) *pspp = 0; - if (pycck) *pycck = 0; - if (pcmyk) *pcmyk = 0; - if (!data) - return ERROR_INT("data not defined", procName, 1); - if (!pw && !ph && !pspp && !pycck && !pcmyk) - return ERROR_INT("no results requested", procName, 1); - - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = freadHeaderJpeg(fp, pw, ph, pspp, pycck, pcmyk); - fclose(fp); - return ret; -} - - -/*! - * \brief readResolutionMemJpeg() - * - * \param[in] data const; jpeg-encoded - * \param[in] size of data - * \param[out] pxres [optional] - * \param[out] pyres [optional] - * \return 0 if OK, 1 on error - */ -l_ok -readResolutionMemJpeg(const l_uint8 *data, - size_t size, - l_int32 *pxres, - l_int32 *pyres) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("readResolutionMemJpeg"); - - if (pxres) *pxres = 0; - if (pyres) *pyres = 0; - if (!data) - return ERROR_INT("data not defined", procName, 1); - if (!pxres && !pyres) - return ERROR_INT("no results requested", procName, 1); - - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = fgetJpegResolution(fp, pxres, pyres); - fclose(fp); - return ret; -} - - -/*! - * \brief pixWriteMemJpeg() - * - * \param[out] pdata data of jpeg compressed image - * \param[out] psize size of returned data - * \param[in] pix any depth; cmap is OK - * \param[in] quality 1 - 100; 75 is default value; 0 is also default - * \param[in] progressive 0 for baseline sequential; 1 for progressive - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See pixWriteStreamJpeg() for usage.  This version writes to
- *          memory instead of to a file stream.
- * 
- */ -l_ok -pixWriteMemJpeg(l_uint8 **pdata, - size_t *psize, - PIX *pix, - l_int32 quality, - l_int32 progressive) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("pixWriteMemJpeg"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1 ); - if (!psize) - return ERROR_INT("&size not defined", procName, 1 ); - if (!pix) - return ERROR_INT("&pix not defined", procName, 1 ); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = pixWriteStreamJpeg(fp, pix, quality, progressive); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = pixWriteStreamJpeg(fp, pix, quality, progressive); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - -/*---------------------------------------------------------------------* - * Setting special flag for chroma sampling on write * - *---------------------------------------------------------------------*/ -/*! - * \brief pixSetChromaSampling() - * - * \param[in] pix - * \param[in] sampling 1 for subsampling; 0 for no subsampling - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The default is for 2x2 chroma subsampling because the files are
- *          considerably smaller and the appearance is typically satisfactory.
- *          To get full resolution output in the chroma channels for
- *          jpeg writing, call this with %sampling == 0.
- * 
- */ -l_ok -pixSetChromaSampling(PIX *pix, - l_int32 sampling) -{ - PROCNAME("pixSetChromaSampling"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1 ); - if (sampling) - pixSetSpecial(pix, 0); /* default */ - else - pixSetSpecial(pix, L_NO_CHROMA_SAMPLING_JPEG); - return 0; -} - - -/*---------------------------------------------------------------------* - * Static system helpers * - *---------------------------------------------------------------------*/ -/*! - * \brief jpeg_error_catch_all_1() - * - * Notes: - * (1) The default jpeg error_exit() kills the process, but we - * never want a call to leptonica to kill a process. If you - * do want this behavior, remove the calls to these error handlers. - * (2) This is used where cinfo->client_data holds only jmpbuf. - */ -static void -jpeg_error_catch_all_1(j_common_ptr cinfo) -{ - jmp_buf *pjmpbuf = (jmp_buf *)cinfo->client_data; - (*cinfo->err->output_message) (cinfo); - jpeg_destroy(cinfo); - longjmp(*pjmpbuf, 1); - return; -} - -/*! - * \brief jpeg_error_catch_all_2() - * - * Notes: - * (1) This is used where cinfo->client_data needs to hold both - * the jmpbuf and the jpeg comment data. - * (2) On error, the comment data will be freed by the caller. - */ -static void -jpeg_error_catch_all_2(j_common_ptr cinfo) -{ -struct callback_data *pcb_data; - - pcb_data = (struct callback_data *)cinfo->client_data; - (*cinfo->err->output_message) (cinfo); - jpeg_destroy(cinfo); - longjmp(pcb_data->jmpbuf, 1); - return; -} - -/* This function was borrowed from libjpeg */ -static l_uint8 -jpeg_getc(j_decompress_ptr cinfo) -{ -struct jpeg_source_mgr *datasrc; - - datasrc = cinfo->src; - if (datasrc->bytes_in_buffer == 0) { - if (! (*datasrc->fill_input_buffer) (cinfo)) { - return 0; - } - } - datasrc->bytes_in_buffer--; - return GETJOCTET(*datasrc->next_input_byte++); -} - -/*! - * \brief jpeg_comment_callback() - * - * Notes: - * (1) This is used to read the jpeg comment (JPEG_COM). - * See the note above the declaration for why it returns - * a "boolean". - */ -static boolean -jpeg_comment_callback(j_decompress_ptr cinfo) -{ -l_int32 length, i; -l_uint8 *comment; -struct callback_data *pcb_data; - - /* Get the size of the comment */ - length = jpeg_getc(cinfo) << 8; - length += jpeg_getc(cinfo); - length -= 2; - if (length <= 0) - return 1; - - /* Extract the comment from the file */ - if ((comment = (l_uint8 *)LEPT_CALLOC(length + 1, sizeof(l_uint8))) == NULL) - return 0; - for (i = 0; i < length; i++) - comment[i] = jpeg_getc(cinfo); - - /* Save the comment and return */ - pcb_data = (struct callback_data *)cinfo->client_data; - if (pcb_data->comment) { /* clear before overwriting previous comment */ - LEPT_FREE(pcb_data->comment); - pcb_data->comment = NULL; - } - pcb_data->comment = comment; - return 1; -} - -/* --------------------------------------------*/ -#endif /* HAVE_LIBJPEG */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jpegiostub.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jpegiostub.c deleted file mode 100644 index 7fa18142..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/jpegiostub.c +++ /dev/null @@ -1,151 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file jpegiostub.c - *
- *
- *     Stubs for jpegio.c functions
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* --------------------------------------------*/ -#if !HAVE_LIBJPEG /* defined in environ.h */ -/* --------------------------------------------*/ - -/* ----------------------------------------------------------------------*/ - -PIX * pixReadJpeg(const char *filename, l_int32 cmflag, l_int32 reduction, - l_int32 *pnwarn, l_int32 hint) -{ - return (PIX * )ERROR_PTR("function not present", "pixReadJpeg", NULL); -} - -/* ----------------------------------------------------------------------*/ - -PIX * pixReadStreamJpeg(FILE *fp, l_int32 cmflag, l_int32 reduction, - l_int32 *pnwarn, l_int32 hint) -{ - return (PIX * )ERROR_PTR("function not present", "pixReadStreamJpeg", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok readHeaderJpeg(const char *filename, l_int32 *pw, l_int32 *ph, - l_int32 *pspp, l_int32 *pycck, l_int32 *pcmyk) -{ - return ERROR_INT("function not present", "readHeaderJpeg", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok freadHeaderJpeg(FILE *fp, l_int32 *pw, l_int32 *ph, - l_int32 *pspp, l_int32 *pycck, l_int32 *pcmyk) -{ - return ERROR_INT("function not present", "freadHeaderJpeg", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_int32 fgetJpegResolution(FILE *fp, l_int32 *pxres, l_int32 *pyres) -{ - return ERROR_INT("function not present", "fgetJpegResolution", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_int32 fgetJpegComment(FILE *fp, l_uint8 **pcomment) -{ - return ERROR_INT("function not present", "fgetJpegComment", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteJpeg(const char *filename, PIX *pix, l_int32 quality, - l_int32 progressive) -{ - return ERROR_INT("function not present", "pixWriteJpeg", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteStreamJpeg(FILE *fp, PIX *pix, l_int32 quality, - l_int32 progressive) -{ - return ERROR_INT("function not present", "pixWriteStreamJpeg", 1); -} - -/* ----------------------------------------------------------------------*/ - -PIX * pixReadMemJpeg(const l_uint8 *cdata, size_t size, l_int32 cmflag, - l_int32 reduction, l_int32 *pnwarn, l_int32 hint) -{ - return (PIX * )ERROR_PTR("function not present", "pixReadMemJpeg", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok readHeaderMemJpeg(const l_uint8 *cdata, size_t size, - l_int32 *pw, l_int32 *ph, l_int32 *pspp, - l_int32 *pycck, l_int32 *pcmyk) -{ - return ERROR_INT("function not present", "readHeaderMemJpeg", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok readResolutionMemJpeg(const l_uint8 *data, size_t size, - l_int32 *pxres, l_int32 *pyres) -{ - return ERROR_INT("function not present", "readResolutionMemJpeg", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteMemJpeg(l_uint8 **pdata, size_t *psize, PIX *pix, - l_int32 quality, l_int32 progressive) -{ - return ERROR_INT("function not present", "pixWriteMemJpeg", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixSetChromaSampling(PIX *pix, l_int32 sampling) -{ - return ERROR_INT("function not present", "pixSetChromaSampling", 1); -} - -/* ----------------------------------------------------------------------*/ - -/* --------------------------------------------*/ -#endif /* !HAVE_LIBJPEG */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/kernel.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/kernel.c deleted file mode 100644 index ffb16cef..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/kernel.c +++ /dev/null @@ -1,1288 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file kernel.c - *
- *
- *      Basic operations on kernels for image convolution
- *
- *         Create/destroy/copy
- *            L_KERNEL   *kernelCreate()
- *            void        kernelDestroy()
- *            L_KERNEL   *kernelCopy()
- *
- *         Accessors:
- *            l_int32     kernelGetElement()
- *            l_int32     kernelSetElement()
- *            l_int32     kernelGetParameters()
- *            l_int32     kernelSetOrigin()
- *            l_int32     kernelGetSum()
- *            l_int32     kernelGetMinMax()
- *
- *         Normalize/invert
- *            L_KERNEL   *kernelNormalize()
- *            L_KERNEL   *kernelInvert()
- *
- *         Helper function
- *            l_float32 **create2dFloatArray()
- *
- *         Serialized I/O
- *            L_KERNEL   *kernelRead()
- *            L_KERNEL   *kernelReadStream()
- *            l_int32     kernelWrite()
- *            l_int32     kernelWriteStream()
- *
- *         Making a kernel from a compiled string
- *            L_KERNEL   *kernelCreateFromString()
- *
- *         Making a kernel from a simple file format
- *            L_KERNEL   *kernelCreateFromFile()
- *
- *         Making a kernel from a Pix
- *            L_KERNEL   *kernelCreateFromPix()
- *
- *         Display a kernel in a pix
- *            PIX        *kernelDisplayInPix()
- *
- *         Parse string to extract numbers
- *            NUMA       *parseStringForNumbers()
- *
- *      Simple parametric kernels
- *            L_KERNEL   *makeFlatKernel()
- *            L_KERNEL   *makeGaussianKernel()
- *            L_KERNEL   *makeGaussianKernelSep()
- *            L_KERNEL   *makeDoGKernel()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - - /* Array size must be > 0 and not larger than this */ -static const l_uint32 MaxArraySize = 100000; - -/*------------------------------------------------------------------------* - * Create / Destroy * - *------------------------------------------------------------------------*/ -/*! - * \brief kernelCreate() - * - * \param[in] height, width - * \return kernel, or NULL on error - * - *
- * Notes:
- *      (1) kernelCreate() initializes all values to 0.
- *      (2) After this call, (cy,cx) and nonzero data values must be
- *          assigned.
- *      (2) The number of kernel elements must be less than 2^29.
- * 
- */ -L_KERNEL * -kernelCreate(l_int32 height, - l_int32 width) -{ -l_uint64 size64; -L_KERNEL *kel; - - PROCNAME("kernelCreate"); - - if (width <= 0) - return (L_KERNEL *)ERROR_PTR("width must be > 0", procName, NULL); - if (height <= 0) - return (L_KERNEL *)ERROR_PTR("height must be > 0", procName, NULL); - - /* Avoid overflow in malloc arg */ - size64 = (l_uint64)width * (l_uint64)height; - if (size64 >= (1LL << 29)) { - L_ERROR("requested width = %d, height = %d\n", procName, width, height); - return (L_KERNEL *)ERROR_PTR("size >= 2^29", procName, NULL); - } - - kel = (L_KERNEL *)LEPT_CALLOC(1, sizeof(L_KERNEL)); - kel->sy = height; - kel->sx = width; - if ((kel->data = create2dFloatArray(height, width)) == NULL) { - LEPT_FREE(kel); - return (L_KERNEL *)ERROR_PTR("data not allocated", procName, NULL); - } - return kel; -} - - -/*! - * \brief kernelDestroy() - * - * \param[in,out] pkel will be set to null before returning - * \return void - */ -void -kernelDestroy(L_KERNEL **pkel) -{ -l_int32 i; -L_KERNEL *kel; - - PROCNAME("kernelDestroy"); - - if (pkel == NULL) { - L_WARNING("ptr address is NULL!\n", procName); - return; - } - if ((kel = *pkel) == NULL) - return; - - for (i = 0; i < kel->sy; i++) - LEPT_FREE(kel->data[i]); - LEPT_FREE(kel->data); - LEPT_FREE(kel); - - *pkel = NULL; - return; -} - - -/*! - * \brief kernelCopy() - * - * \param[in] kels source kernel - * \return keld copy of kels, or NULL on error - */ -L_KERNEL * -kernelCopy(L_KERNEL *kels) -{ -l_int32 i, j, sx, sy, cx, cy; -L_KERNEL *keld; - - PROCNAME("kernelCopy"); - - if (!kels) - return (L_KERNEL *)ERROR_PTR("kels not defined", procName, NULL); - - kernelGetParameters(kels, &sy, &sx, &cy, &cx); - if ((keld = kernelCreate(sy, sx)) == NULL) - return (L_KERNEL *)ERROR_PTR("keld not made", procName, NULL); - keld->cy = cy; - keld->cx = cx; - for (i = 0; i < sy; i++) - for (j = 0; j < sx; j++) - keld->data[i][j] = kels->data[i][j]; - - return keld; -} - - -/*----------------------------------------------------------------------* - * Accessors * - *----------------------------------------------------------------------*/ -/*! - * \brief kernelGetElement() - * - * \param[in] kel - * \param[in] row - * \param[in] col - * \param[out] pval - * \return 0 if OK; 1 on error - */ -l_ok -kernelGetElement(L_KERNEL *kel, - l_int32 row, - l_int32 col, - l_float32 *pval) -{ - PROCNAME("kernelGetElement"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0; - if (!kel) - return ERROR_INT("kernel not defined", procName, 1); - if (row < 0 || row >= kel->sy) - return ERROR_INT("kernel row out of bounds", procName, 1); - if (col < 0 || col >= kel->sx) - return ERROR_INT("kernel col out of bounds", procName, 1); - - *pval = kel->data[row][col]; - return 0; -} - - -/*! - * \brief kernelSetElement() - * - * \param[in] kel kernel - * \param[in] row - * \param[in] col - * \param[in] val - * \return 0 if OK; 1 on error - */ -l_ok -kernelSetElement(L_KERNEL *kel, - l_int32 row, - l_int32 col, - l_float32 val) -{ - PROCNAME("kernelSetElement"); - - if (!kel) - return ERROR_INT("kel not defined", procName, 1); - if (row < 0 || row >= kel->sy) - return ERROR_INT("kernel row out of bounds", procName, 1); - if (col < 0 || col >= kel->sx) - return ERROR_INT("kernel col out of bounds", procName, 1); - - kel->data[row][col] = val; - return 0; -} - - -/*! - * \brief kernelGetParameters() - * - * \param[in] kel kernel - * \param[out] psy, psx, pcy, pcx [optional] each can be null - * \return 0 if OK, 1 on error - */ -l_ok -kernelGetParameters(L_KERNEL *kel, - l_int32 *psy, - l_int32 *psx, - l_int32 *pcy, - l_int32 *pcx) -{ - PROCNAME("kernelGetParameters"); - - if (psy) *psy = 0; - if (psx) *psx = 0; - if (pcy) *pcy = 0; - if (pcx) *pcx = 0; - if (!kel) - return ERROR_INT("kernel not defined", procName, 1); - if (psy) *psy = kel->sy; - if (psx) *psx = kel->sx; - if (pcy) *pcy = kel->cy; - if (pcx) *pcx = kel->cx; - return 0; -} - - -/*! - * \brief kernelSetOrigin() - * - * \param[in] kel kernel - * \param[in] cy, cx - * \return 0 if OK; 1 on error - */ -l_ok -kernelSetOrigin(L_KERNEL *kel, - l_int32 cy, - l_int32 cx) -{ - PROCNAME("kernelSetOrigin"); - - if (!kel) - return ERROR_INT("kel not defined", procName, 1); - kel->cy = cy; - kel->cx = cx; - return 0; -} - - -/*! - * \brief kernelGetSum() - * - * \param[in] kel kernel - * \param[out] psum sum of all kernel values - * \return 0 if OK, 1 on error - */ -l_ok -kernelGetSum(L_KERNEL *kel, - l_float32 *psum) -{ -l_int32 sx, sy, i, j; - - PROCNAME("kernelGetSum"); - - if (!psum) - return ERROR_INT("&sum not defined", procName, 1); - *psum = 0.0; - if (!kel) - return ERROR_INT("kernel not defined", procName, 1); - - kernelGetParameters(kel, &sy, &sx, NULL, NULL); - for (i = 0; i < sy; i++) { - for (j = 0; j < sx; j++) { - *psum += kel->data[i][j]; - } - } - return 0; -} - - -/*! - * \brief kernelGetMinMax() - * - * \param[in] kel kernel - * \param[out] pmin [optional] minimum value - * \param[out] pmax [optional] maximum value - * \return 0 if OK, 1 on error - */ -l_ok -kernelGetMinMax(L_KERNEL *kel, - l_float32 *pmin, - l_float32 *pmax) -{ -l_int32 sx, sy, i, j; -l_float32 val, minval, maxval; - - PROCNAME("kernelGetMinmax"); - - if (!pmin && !pmax) - return ERROR_INT("neither &min nor &max defined", procName, 1); - if (pmin) *pmin = 0.0; - if (pmax) *pmax = 0.0; - if (!kel) - return ERROR_INT("kernel not defined", procName, 1); - - kernelGetParameters(kel, &sy, &sx, NULL, NULL); - minval = 10000000.0; - maxval = -10000000.0; - for (i = 0; i < sy; i++) { - for (j = 0; j < sx; j++) { - val = kel->data[i][j]; - if (val < minval) - minval = val; - if (val > maxval) - maxval = val; - } - } - if (pmin) - *pmin = minval; - if (pmax) - *pmax = maxval; - - return 0; -} - - -/*----------------------------------------------------------------------* - * Normalize/Invert * - *----------------------------------------------------------------------*/ -/*! - * \brief kernelNormalize() - * - * \param[in] kels source kel, to be normalized - * \param[in] normsum desired sum of elements in keld - * \return keld normalized version of kels, or NULL on error - * or if sum of elements is very close to 0) - * - *
- * Notes:
- *      (1) If the sum of kernel elements is close to 0, do not
- *          try to calculate the normalized kernel.  Instead,
- *          return a copy of the input kernel, with a warning.
- * 
- */ -L_KERNEL * -kernelNormalize(L_KERNEL *kels, - l_float32 normsum) -{ -l_int32 i, j, sx, sy, cx, cy; -l_float32 sum, factor; -L_KERNEL *keld; - - PROCNAME("kernelNormalize"); - - if (!kels) - return (L_KERNEL *)ERROR_PTR("kels not defined", procName, NULL); - - kernelGetSum(kels, &sum); - if (L_ABS(sum) < 0.00001) { - L_WARNING("null sum; not normalizing; returning a copy\n", procName); - return kernelCopy(kels); - } - - kernelGetParameters(kels, &sy, &sx, &cy, &cx); - if ((keld = kernelCreate(sy, sx)) == NULL) - return (L_KERNEL *)ERROR_PTR("keld not made", procName, NULL); - keld->cy = cy; - keld->cx = cx; - - factor = normsum / sum; - for (i = 0; i < sy; i++) - for (j = 0; j < sx; j++) - keld->data[i][j] = factor * kels->data[i][j]; - - return keld; -} - - -/*! - * \brief kernelInvert() - * - * \param[in] kels source kel, to be inverted - * \return keld spatially inverted, about the origin, or NULL on error - * - *
- * Notes:
- *      (1) For convolution, the kernel is spatially inverted before
- *          a "correlation" operation is done between the kernel and the image.
- * 
- */ -L_KERNEL * -kernelInvert(L_KERNEL *kels) -{ -l_int32 i, j, sx, sy, cx, cy; -L_KERNEL *keld; - - PROCNAME("kernelInvert"); - - if (!kels) - return (L_KERNEL *)ERROR_PTR("kels not defined", procName, NULL); - - kernelGetParameters(kels, &sy, &sx, &cy, &cx); - if ((keld = kernelCreate(sy, sx)) == NULL) - return (L_KERNEL *)ERROR_PTR("keld not made", procName, NULL); - keld->cy = sy - 1 - cy; - keld->cx = sx - 1 - cx; - - for (i = 0; i < sy; i++) - for (j = 0; j < sx; j++) - keld->data[i][j] = kels->data[sy - 1 - i][sx - 1 - j]; - - return keld; -} - - -/*----------------------------------------------------------------------* - * Helper function * - *----------------------------------------------------------------------*/ -/*! - * \brief create2dFloatArray() - * - * \param[in] sy rows == height - * \param[in] sx columns == width - * \return doubly indexed array i.e., an array of sy row pointers, - * each of which points to an array of sx floats - * - *
- * Notes:
- *      (1) The array[%sy][%sx] is indexed in standard "matrix notation",
- *          with the row index first.
- *      (2) The caller kernelCreate() limits the size to < 2^29 pixels.
- * 
- */ -l_float32 ** -create2dFloatArray(l_int32 sy, - l_int32 sx) -{ -l_int32 i; -l_float32 **array; - - PROCNAME("create2dFloatArray"); - - if (sx <= 0 || sx > MaxArraySize) - return (l_float32 **)ERROR_PTR("sx out of bounds", procName, NULL); - if (sy <= 0 || sy > MaxArraySize) - return (l_float32 **)ERROR_PTR("sy out of bounds", procName, NULL); - - if ((array = (l_float32 **)LEPT_CALLOC(sy, sizeof(l_float32 *))) == NULL) - return (l_float32 **)ERROR_PTR("ptr array not made", procName, NULL); - for (i = 0; i < sy; i++) - array[i] = (l_float32 *)LEPT_CALLOC(sx, sizeof(l_float32)); - return array; -} - - -/*----------------------------------------------------------------------* - * Kernel serialized I/O * - *----------------------------------------------------------------------*/ -/*! - * \brief kernelRead() - * - * \param[in] fname filename - * \return kernel, or NULL on error - */ -L_KERNEL * -kernelRead(const char *fname) -{ -FILE *fp; -L_KERNEL *kel; - - PROCNAME("kernelRead"); - - if (!fname) - return (L_KERNEL *)ERROR_PTR("fname not defined", procName, NULL); - - if ((fp = fopenReadStream(fname)) == NULL) - return (L_KERNEL *)ERROR_PTR("stream not opened", procName, NULL); - if ((kel = kernelReadStream(fp)) == NULL) { - fclose(fp); - return (L_KERNEL *)ERROR_PTR("kel not returned", procName, NULL); - } - fclose(fp); - - return kel; -} - - -/*! - * \brief kernelReadStream() - * - * \param[in] fp file stream - * \return kernel, or NULL on error - */ -L_KERNEL * -kernelReadStream(FILE *fp) -{ -l_int32 sy, sx, cy, cx, i, j, ret, version, ignore; -L_KERNEL *kel; - - PROCNAME("kernelReadStream"); - - if (!fp) - return (L_KERNEL *)ERROR_PTR("stream not defined", procName, NULL); - - ret = fscanf(fp, " Kernel Version %d\n", &version); - if (ret != 1) - return (L_KERNEL *)ERROR_PTR("not a kernel file", procName, NULL); - if (version != KERNEL_VERSION_NUMBER) - return (L_KERNEL *)ERROR_PTR("invalid kernel version", procName, NULL); - - if (fscanf(fp, " sy = %d, sx = %d, cy = %d, cx = %d\n", - &sy, &sx, &cy, &cx) != 4) - return (L_KERNEL *)ERROR_PTR("dimensions not read", procName, NULL); - if (sx > MaxArraySize || sy > MaxArraySize) { - L_ERROR("sx = %d or sy = %d > %d\n", procName, sx, sy, MaxArraySize); - return NULL; - } - if ((kel = kernelCreate(sy, sx)) == NULL) - return (L_KERNEL *)ERROR_PTR("kel not made", procName, NULL); - kernelSetOrigin(kel, cy, cx); - - for (i = 0; i < sy; i++) { - for (j = 0; j < sx; j++) - ignore = fscanf(fp, "%15f", &kel->data[i][j]); - ignore = fscanf(fp, "\n"); - } - ignore = fscanf(fp, "\n"); - - return kel; -} - - -/*! - * \brief kernelWrite() - * - * \param[in] fname output file - * \param[in] kel kernel - * \return 0 if OK, 1 on error - */ -l_ok -kernelWrite(const char *fname, - L_KERNEL *kel) -{ -FILE *fp; - - PROCNAME("kernelWrite"); - - if (!fname) - return ERROR_INT("fname not defined", procName, 1); - if (!kel) - return ERROR_INT("kel not defined", procName, 1); - - if ((fp = fopenWriteStream(fname, "wb")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - kernelWriteStream(fp, kel); - fclose(fp); - - return 0; -} - - -/*! - * \brief kernelWriteStream() - * - * \param[in] fp file stream - * \param[in] kel - * \return 0 if OK, 1 on error - */ -l_ok -kernelWriteStream(FILE *fp, - L_KERNEL *kel) -{ -l_int32 sx, sy, cx, cy, i, j; - - PROCNAME("kernelWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!kel) - return ERROR_INT("kel not defined", procName, 1); - kernelGetParameters(kel, &sy, &sx, &cy, &cx); - - fprintf(fp, " Kernel Version %d\n", KERNEL_VERSION_NUMBER); - fprintf(fp, " sy = %d, sx = %d, cy = %d, cx = %d\n", sy, sx, cy, cx); - for (i = 0; i < sy; i++) { - for (j = 0; j < sx; j++) - fprintf(fp, "%15.4f", kel->data[i][j]); - fprintf(fp, "\n"); - } - fprintf(fp, "\n"); - - return 0; -} - - -/*----------------------------------------------------------------------* - * Making a kernel from a compiled string * - *----------------------------------------------------------------------*/ -/*! - * \brief kernelCreateFromString() - * - * \param[in] h, w height, width - * \param[in] cy, cx origin - * \param[in] kdata - * \return kernel of the given size, or NULL on error - * - *
- * Notes:
- *      (1) The data is an array of chars, in row-major order, giving
- *          space separated integers in the range [-255 ... 255].
- *      (2) The only other formatting limitation is that you must
- *          leave space between the last number in each row and
- *          the double-quote.  If possible, it's also nice to have each
- *          line in the string represent a line in the kernel; e.g.,
- *              static const char *kdata =
- *                  " 20   50   20 "
- *                  " 70  140   70 "
- *                  " 20   50   20 ";
- * 
- */ -L_KERNEL * -kernelCreateFromString(l_int32 h, - l_int32 w, - l_int32 cy, - l_int32 cx, - const char *kdata) -{ -l_int32 n, i, j, index; -l_float32 val; -L_KERNEL *kel; -NUMA *na; - - PROCNAME("kernelCreateFromString"); - - if (h < 1) - return (L_KERNEL *)ERROR_PTR("height must be > 0", procName, NULL); - if (w < 1) - return (L_KERNEL *)ERROR_PTR("width must be > 0", procName, NULL); - if (cy < 0 || cy >= h) - return (L_KERNEL *)ERROR_PTR("cy invalid", procName, NULL); - if (cx < 0 || cx >= w) - return (L_KERNEL *)ERROR_PTR("cx invalid", procName, NULL); - - kel = kernelCreate(h, w); - kernelSetOrigin(kel, cy, cx); - na = parseStringForNumbers(kdata, " \t\n"); - n = numaGetCount(na); - if (n != w * h) { - kernelDestroy(&kel); - numaDestroy(&na); - lept_stderr("w = %d, h = %d, num ints = %d\n", w, h, n); - return (L_KERNEL *)ERROR_PTR("invalid integer data", procName, NULL); - } - - index = 0; - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - numaGetFValue(na, index, &val); - kernelSetElement(kel, i, j, val); - index++; - } - } - - numaDestroy(&na); - return kel; -} - - -/*----------------------------------------------------------------------* - * Making a kernel from a simple file format * - *----------------------------------------------------------------------*/ -/*! - * \brief kernelCreateFromFile() - * - * \param[in] filename - * \return kernel, or NULL on error - * - *
- * Notes:
- *      (1) The file contains, in the following order:
- *           ~ Any number of comment lines starting with '#' are ignored
- *           ~ The height and width of the kernel
- *           ~ The y and x values of the kernel origin
- *           ~ The kernel data, formatted as lines of numbers (integers
- *             or floats) for the kernel values in row-major order,
- *             and with no other punctuation.
- *             (Note: this differs from kernelCreateFromString(),
- *             where each line must begin and end with a double-quote
- *             to tell the compiler it's part of a string.)
- *           ~ The kernel specification ends when a blank line,
- *             a comment line, or the end of file is reached.
- *      (2) All lines must be left-justified.
- *      (3) See kernelCreateFromString() for a description of the string
- *          format for the kernel data.  As an example, here are the lines
- *          of a valid kernel description file  In the file, all lines
- *          are left-justified:
- * \code
- *                    # small 3x3 kernel
- *                    3 3
- *                    1 1
- *                    25.5   51    24.3
- *                    70.2  146.3  73.4
- *                    20     50.9  18.4
- * \endcode
- * 
- */ -L_KERNEL * -kernelCreateFromFile(const char *filename) -{ -char *filestr, *line; -l_int32 nlines, i, j, first, index, w, h, cx, cy, n; -l_float32 val; -size_t size; -NUMA *na, *nat; -SARRAY *sa; -L_KERNEL *kel; - - PROCNAME("kernelCreateFromFile"); - - if (!filename) - return (L_KERNEL *)ERROR_PTR("filename not defined", procName, NULL); - - if ((filestr = (char *)l_binaryRead(filename, &size)) == NULL) - return (L_KERNEL *)ERROR_PTR("file not found", procName, NULL); - if (size == 0) { - LEPT_FREE(filestr); - return (L_KERNEL *)ERROR_PTR("file is empty", procName, NULL); - } - - sa = sarrayCreateLinesFromString(filestr, 1); - LEPT_FREE(filestr); - nlines = sarrayGetCount(sa); - - /* Find the first data line. */ - for (i = 0, first = 0; i < nlines; i++) { - line = sarrayGetString(sa, i, L_NOCOPY); - if (line[0] != '#') { - first = i; - break; - } - } - - /* Find the kernel dimensions and origin location. */ - line = sarrayGetString(sa, first, L_NOCOPY); - if (sscanf(line, "%d %d", &h, &w) != 2) { - sarrayDestroy(&sa); - return (L_KERNEL *)ERROR_PTR("error reading h,w", procName, NULL); - } - if (h > MaxArraySize || w > MaxArraySize) { - L_ERROR("h = %d or w = %d > %d\n", procName, h, w, MaxArraySize); - sarrayDestroy(&sa); - return NULL; - } - line = sarrayGetString(sa, first + 1, L_NOCOPY); - if (sscanf(line, "%d %d", &cy, &cx) != 2) { - sarrayDestroy(&sa); - return (L_KERNEL *)ERROR_PTR("error reading cy,cx", procName, NULL); - } - - /* Extract the data. This ends when we reach eof, or when we - * encounter a line of data that is either a null string or - * contains just a newline. */ - na = numaCreate(0); - for (i = first + 2; i < nlines; i++) { - line = sarrayGetString(sa, i, L_NOCOPY); - if (line[0] == '\0' || line[0] == '\n' || line[0] == '#') - break; - nat = parseStringForNumbers(line, " \t\n"); - numaJoin(na, nat, 0, -1); - numaDestroy(&nat); - } - sarrayDestroy(&sa); - - n = numaGetCount(na); - if (n != w * h) { - numaDestroy(&na); - lept_stderr("w = %d, h = %d, num ints = %d\n", w, h, n); - return (L_KERNEL *)ERROR_PTR("invalid integer data", procName, NULL); - } - - kel = kernelCreate(h, w); - kernelSetOrigin(kel, cy, cx); - index = 0; - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - numaGetFValue(na, index, &val); - kernelSetElement(kel, i, j, val); - index++; - } - } - - numaDestroy(&na); - return kel; -} - - -/*----------------------------------------------------------------------* - * Making a kernel from a Pix * - *----------------------------------------------------------------------*/ -/*! - * \brief kernelCreateFromPix() - * - * \param[in] pix - * \param[in] cy, cx origin of kernel - * \return kernel, or NULL on error - * - *
- * Notes:
- *      (1) The origin must be positive and within the dimensions of the pix.
- * 
- */ -L_KERNEL * -kernelCreateFromPix(PIX *pix, - l_int32 cy, - l_int32 cx) -{ -l_int32 i, j, w, h, d; -l_uint32 val; -L_KERNEL *kel; - - PROCNAME("kernelCreateFromPix"); - - if (!pix) - return (L_KERNEL *)ERROR_PTR("pix not defined", procName, NULL); - pixGetDimensions(pix, &w, &h, &d); - if (d != 8) - return (L_KERNEL *)ERROR_PTR("pix not 8 bpp", procName, NULL); - if (cy < 0 || cx < 0 || cy >= h || cx >= w) - return (L_KERNEL *)ERROR_PTR("(cy, cx) invalid", procName, NULL); - - kel = kernelCreate(h, w); - kernelSetOrigin(kel, cy, cx); - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - pixGetPixel(pix, j, i, &val); - kernelSetElement(kel, i, j, (l_float32)val); - } - } - - return kel; -} - - -/*----------------------------------------------------------------------* - * Display a kernel in a pix * - *----------------------------------------------------------------------*/ -/*! - * \brief kernelDisplayInPix() - * - * \param[in] kel kernel - * \param[in] size of grid interiors; odd; either 1 or a minimum size - * of 17 is enforced - * \param[in] gthick grid thickness; either 0 or a minimum size of 2 - * is enforced - * \return pix display of kernel, or NULL on error - * - *
- * Notes:
- *      (1) This gives a visual representation of a kernel.
- *      (2) There are two modes of display:
- *          (a) Grid lines of minimum width 2, surrounding regions
- *              representing kernel elements of minimum size 17,
- *              with a "plus" mark at the kernel origin, or
- *          (b) A pix without grid lines and using 1 pixel per kernel element.
- *      (3) For both cases, the kernel absolute value is displayed,
- *          normalized such that the maximum absolute value is 255.
- *      (4) Large 2D separable kernels should be used for convolution
- *          with two 1D kernels.  However, for the bilateral filter,
- *          the computation time is independent of the size of the
- *          2D content kernel.
- * 
- */ -PIX * -kernelDisplayInPix(L_KERNEL *kel, - l_int32 size, - l_int32 gthick) -{ -l_int32 i, j, w, h, sx, sy, cx, cy, width, x0, y0; -l_int32 normval; -l_float32 minval, maxval, max, val, norm; -PIX *pixd, *pixt0, *pixt1; - - PROCNAME("kernelDisplayInPix"); - - if (!kel) - return (PIX *)ERROR_PTR("kernel not defined", procName, NULL); - - /* Normalize the max value to be 255 for display */ - kernelGetParameters(kel, &sy, &sx, &cy, &cx); - kernelGetMinMax(kel, &minval, &maxval); - max = L_MAX(maxval, -minval); - if (max == 0.0) - return (PIX *)ERROR_PTR("kernel elements all 0.0", procName, NULL); - norm = 255. / (l_float32)max; - - /* Handle the 1 element/pixel case; typically with large kernels */ - if (size == 1 && gthick == 0) { - pixd = pixCreate(sx, sy, 8); - for (i = 0; i < sy; i++) { - for (j = 0; j < sx; j++) { - kernelGetElement(kel, i, j, &val); - normval = (l_int32)(norm * L_ABS(val)); - pixSetPixel(pixd, j, i, normval); - } - } - return pixd; - } - - /* Enforce the constraints for the grid line version */ - if (size < 17) { - L_WARNING("size < 17; setting to 17\n", procName); - size = 17; - } - if (size % 2 == 0) - size++; - if (gthick < 2) { - L_WARNING("grid thickness < 2; setting to 2\n", procName); - gthick = 2; - } - - w = size * sx + gthick * (sx + 1); - h = size * sy + gthick * (sy + 1); - pixd = pixCreate(w, h, 8); - - /* Generate grid lines */ - for (i = 0; i <= sy; i++) - pixRenderLine(pixd, 0, gthick / 2 + i * (size + gthick), - w - 1, gthick / 2 + i * (size + gthick), - gthick, L_SET_PIXELS); - for (j = 0; j <= sx; j++) - pixRenderLine(pixd, gthick / 2 + j * (size + gthick), 0, - gthick / 2 + j * (size + gthick), h - 1, - gthick, L_SET_PIXELS); - - /* Generate mask for each element */ - pixt0 = pixCreate(size, size, 1); - pixSetAll(pixt0); - - /* Generate crossed lines for origin pattern */ - pixt1 = pixCreate(size, size, 1); - width = size / 8; - pixRenderLine(pixt1, size / 2, (l_int32)(0.12 * size), - size / 2, (l_int32)(0.88 * size), - width, L_SET_PIXELS); - pixRenderLine(pixt1, (l_int32)(0.15 * size), size / 2, - (l_int32)(0.85 * size), size / 2, - width, L_FLIP_PIXELS); - pixRasterop(pixt1, size / 2 - width, size / 2 - width, - 2 * width, 2 * width, PIX_NOT(PIX_DST), NULL, 0, 0); - - /* Paste the patterns in */ - y0 = gthick; - for (i = 0; i < sy; i++) { - x0 = gthick; - for (j = 0; j < sx; j++) { - kernelGetElement(kel, i, j, &val); - normval = (l_int32)(norm * L_ABS(val)); - pixSetMaskedGeneral(pixd, pixt0, normval, x0, y0); - if (i == cy && j == cx) - pixPaintThroughMask(pixd, pixt1, x0, y0, 255 - normval); - x0 += size + gthick; - } - y0 += size + gthick; - } - - pixDestroy(&pixt0); - pixDestroy(&pixt1); - return pixd; -} - - -/*------------------------------------------------------------------------* - * Parse string to extract numbers * - *------------------------------------------------------------------------*/ -/*! - * \brief parseStringForNumbers() - * - * \param[in] str string containing numbers; not changed - * \param[in] seps string of characters that can be used between ints - * \return numa of numbers found, or NULL on error - * - *
- * Notes:
- *     (1) The numbers can be ints or floats.
- * 
- */ -NUMA * -parseStringForNumbers(const char *str, - const char *seps) -{ -char *newstr, *head; -char *tail = NULL; -l_float32 val; -NUMA *na; - - PROCNAME("parseStringForNumbers"); - - if (!str) - return (NUMA *)ERROR_PTR("str not defined", procName, NULL); - - newstr = stringNew(str); /* to enforce const-ness of str */ - na = numaCreate(0); - head = strtokSafe(newstr, seps, &tail); - val = atof(head); - numaAddNumber(na, val); - LEPT_FREE(head); - while ((head = strtokSafe(NULL, seps, &tail)) != NULL) { - val = atof(head); - numaAddNumber(na, val); - LEPT_FREE(head); - } - - LEPT_FREE(newstr); - return na; -} - - -/*------------------------------------------------------------------------* - * Simple parametric kernels * - *------------------------------------------------------------------------*/ -/*! - * \brief makeFlatKernel() - * - * \param[in] height, width - * \param[in] cy, cx origin of kernel - * \return kernel, or NULL on error - * - *
- * Notes:
- *      (1) This is the same low-pass filtering kernel that is used
- *          in the block convolution functions.
- *      (2) The kernel origin (%cy, %cx) is typically placed as near
- *          the center of the kernel as possible.  If height and
- *          width are odd, then using %cy = height / 2 and
- *          %cx = width / 2 places the origin at the exact center.
- *      (3) This returns a normalized kernel.
- * 
- */ -L_KERNEL * -makeFlatKernel(l_int32 height, - l_int32 width, - l_int32 cy, - l_int32 cx) -{ -l_int32 i, j; -l_float32 normval; -L_KERNEL *kel; - - PROCNAME("makeFlatKernel"); - - if ((kel = kernelCreate(height, width)) == NULL) - return (L_KERNEL *)ERROR_PTR("kel not made", procName, NULL); - kernelSetOrigin(kel, cy, cx); - normval = 1.0 / (l_float32)(height * width); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - kernelSetElement(kel, i, j, normval); - } - } - - return kel; -} - - -/*! - * \brief makeGaussianKernel() - * - * \param[in] halfh sy = 2 * halfh + 1 - * \param[in] halfw sx = 2 * halfw + 1 - * \param[in] stdev standard deviation - * \param[in] max value at (cx,cy) - * \return kernel, or NULL on error - * - *
- * Notes:
- *      (1) The kernel size (sx, sy) = (2 * %halfw + 1, 2 * %halfh + 1)
- *      (2) The kernel center (cx, cy) = (%halfw, %halfh).
- *      (3) %halfw and %halfh are typically equal, and
- *          are typically several times larger than the standard deviation.
- *      (4) If pixConvolve() is invoked with normalization (the sum of
- *          kernel elements = 1.0), use 1.0 for max (or any number that's
- *          not too small or too large).
- * 
- */ -L_KERNEL * -makeGaussianKernel(l_int32 halfh, - l_int32 halfw, - l_float32 stdev, - l_float32 max) -{ -l_int32 sx, sy, i, j; -l_float32 val; -L_KERNEL *kel; - - PROCNAME("makeGaussianKernel"); - - sx = 2 * halfw + 1; - sy = 2 * halfh + 1; - if ((kel = kernelCreate(sy, sx)) == NULL) - return (L_KERNEL *)ERROR_PTR("kel not made", procName, NULL); - kernelSetOrigin(kel, halfh, halfw); - for (i = 0; i < sy; i++) { - for (j = 0; j < sx; j++) { - val = expf(-(l_float32)((i - halfh) * (i - halfh) + - (j - halfw) * (j - halfw)) / - (2. * stdev * stdev)); - kernelSetElement(kel, i, j, max * val); - } - } - - return kel; -} - - -/*! - * \brief makeGaussianKernelSep() - * - * \param[in] halfh sy = 2 * halfh + 1 - * \param[in] halfw sx = 2 * halfw + 1 - * \param[in] stdev standard deviation - * \param[in] max value at (cx,cy) - * \param[out] pkelx x part of kernel - * \param[out] pkely y part of kernel - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See makeGaussianKernel() for description of input parameters.
- *      (2) These kernels are constructed so that the result of both
- *          normalized and un-normalized convolution will be the same
- *          as when convolving with pixConvolve() using the full kernel.
- *      (3) The trick for the un-normalized convolution is to have the
- *          product of the two kernel elemets at (cx,cy) be equal to %max,
- *          not max**2.  That's why %max for kely is 1.0.  If instead
- *          we use sqrt(%max) for both, the results are slightly less
- *          accurate, when compared to using the full kernel in
- *          makeGaussianKernel().
- * 
- */ -l_ok -makeGaussianKernelSep(l_int32 halfh, - l_int32 halfw, - l_float32 stdev, - l_float32 max, - L_KERNEL **pkelx, - L_KERNEL **pkely) -{ - PROCNAME("makeGaussianKernelSep"); - - if (!pkelx || !pkely) - return ERROR_INT("&kelx and &kely not defined", procName, 1); - - *pkelx = makeGaussianKernel(0, halfw, stdev, max); - *pkely = makeGaussianKernel(halfh, 0, stdev, 1.0); - return 0; -} - - -/*! - * \brief makeDoGKernel() - * - * \param[in] halfh sy = 2 * halfh + 1 - * \param[in] halfw sx = 2 * halfw + 1 - * \param[in] stdev standard deviation of narrower gaussian - * \param[in] ratio of stdev for wide filter to stdev for narrow one - * \return kernel, or NULL on error - * - *
- * Notes:
- *      (1) The DoG (difference of gaussians) is a wavelet mother
- *          function with null total sum.  By subtracting two blurred
- *          versions of the image, it acts as a bandpass filter for
- *          frequencies passed by the narrow gaussian but stopped
- *          by the wide one.See:
- *               http://en.wikipedia.org/wiki/Difference_of_Gaussians
- *      (2) The kernel size (sx, sy) = (2 * halfw + 1, 2 * halfh + 1).
- *      (3) The kernel center (cx, cy) = (halfw, halfh).
- *      (4) %halfw and %halfh are typically equal, and are typically
- *          several times larger than the standard deviation.
- *      (5) %ratio is the ratio of standard deviations of the wide
- *          to narrow gaussian.  It must be >= 1.0; 1.0 is a no-op.
- *      (6) Because the kernel is a null sum, it must be invoked without
- *          normalization in pixConvolve().
- * 
- */ -L_KERNEL * -makeDoGKernel(l_int32 halfh, - l_int32 halfw, - l_float32 stdev, - l_float32 ratio) -{ -l_int32 sx, sy, i, j; -l_float32 pi, squaredist, highnorm, lownorm, val; -L_KERNEL *kel; - - PROCNAME("makeDoGKernel"); - - sx = 2 * halfw + 1; - sy = 2 * halfh + 1; - if ((kel = kernelCreate(sy, sx)) == NULL) - return (L_KERNEL *)ERROR_PTR("kel not made", procName, NULL); - kernelSetOrigin(kel, halfh, halfw); - - pi = 3.1415926535; - for (i = 0; i < sy; i++) { - for (j = 0; j < sx; j++) { - squaredist = (l_float32)((i - halfh) * (i - halfh) + - (j - halfw) * (j - halfw)); - highnorm = 1. / (2 * stdev * stdev); - lownorm = highnorm / (ratio * ratio); - val = (highnorm / pi) * expf(-(highnorm * squaredist)) - - (lownorm / pi) * expf(-(lownorm * squaredist)); - kernelSetElement(kel, i, j, val); - } - } - - return kel; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/leptwin.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/leptwin.c deleted file mode 100644 index 72643a0b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/leptwin.c +++ /dev/null @@ -1,368 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file leptwin.c - *
- *
- *    This file contains Leptonica routines needed only on Microsoft Windows
- *
- *    Currently it only contains one public function
- *    (based on dibsectn.c by jmh, 03-30-98):
- *
- *      HBITMAP    pixGetWindowsHBITMAP(PIX *pix)
- * 
- */ - -#ifdef _WIN32 -#include -#include -#include "allheaders.h" -#include "leptwin.h" - -/* Macro to determine the number of bytes per line in the DIB bits. - * This accounts for DWORD alignment by adding 31 bits, - * then dividing by 32, then rounding up to the next highest - * count of 4-bytes. Then, we multiply by 4 to get the total byte count. */ -#define BYTESPERLINE(Width, BPP) ((l_int32)((((DWORD)(Width) * (DWORD)(BPP) + 31) >> 5)) << 2) - - -/* ********************************************************************** - DWORD DSImageBitsSize(LPBITMAPINFO pbmi) - - PARAMETERS: - LPBITMAPINFO - pointer to a BITMAPINFO describing a DIB - - RETURNS: - DWORD - the size, in bytes, of the DIB's image bits - - REMARKS: - Calculates and returns the size, in bytes, of the image bits for - the DIB described by the BITMAPINFO. -********************************************************************** */ -static DWORD -DSImageBitsSize(LPBITMAPINFO pbmi) -{ - switch(pbmi->bmiHeader.biCompression) - { - case BI_RLE8: /* wrong if haven't called DSCreateDIBSection or - * CreateDIBSection with this pbmi */ - case BI_RLE4: - return pbmi->bmiHeader.biSizeImage; - break; - default: /* should not have to use "default" */ - case BI_RGB: - case BI_BITFIELDS: - return BYTESPERLINE(pbmi->bmiHeader.biWidth, \ - pbmi->bmiHeader.biBitCount * pbmi->bmiHeader.biPlanes) * - pbmi->bmiHeader.biHeight; - break; - } - return 0; -} - -/* ********************************************************************** - DWORD ImageBitsSize(HBITMAP hbitmap) - - PARAMETERS: - HBITMAP - hbitmap - - RETURNS: - DWORD - the size, in bytes, of the HBITMAP's image bits - - REMARKS: - Calculates and returns the size, in bytes, of the image bits for - the DIB described by the HBITMAP. -********************************************************************** */ -static DWORD -ImageBitsSize(HBITMAP hBitmap) -{ - DIBSECTION ds; - - GetObject(hBitmap, sizeof(DIBSECTION), &ds); - switch( ds.dsBmih.biCompression ) - { - case BI_RLE8: /* wrong if haven't called DSCreateDIBSection or - * CreateDIBSection with this pbmi */ - case BI_RLE4: - return ds.dsBmih.biSizeImage; - break; - default: /* should not have to use "default" */ - case BI_RGB: - case BI_BITFIELDS: - return BYTESPERLINE(ds.dsBmih.biWidth, \ - ds.dsBmih.biBitCount * ds.dsBmih.biPlanes) * - ds.dsBmih.biHeight; - break; - } - return 0; -} - -/*! - * \brief setColormap(LPBITMAPINFO pbmi, PIXCMAP *cmap) - * - * \param[in] pbmi pointer to a BITMAPINFO describing a DIB - * \param[in] cmap leptonica colormap - * \return number of colors in cmap - */ -static int -setColormap(LPBITMAPINFO pbmi, - PIXCMAP *cmap) -{ -l_int32 i, nColors, rval, gval, bval; - - nColors = pixcmapGetCount(cmap); - for (i = 0; i < nColors; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - pbmi->bmiColors[i].rgbRed = rval; - pbmi->bmiColors[i].rgbGreen = gval; - pbmi->bmiColors[i].rgbBlue = bval; - pbmi->bmiColors[i].rgbReserved = 0; - } - pbmi->bmiHeader.biClrUsed = nColors; - return nColors; -} - -/* ********************************************************************** - HBITMAP DSCreateBitmapInfo(l_int32 width, l_int32 height, l_int32 depth, - PIXCMAP *cmap) - - PARAMETERS: - l_int32 width - Desired width of the DIBSection - l_int32 height - Desired height of the DIBSection - l_int32 depth - Desired bit-depth of the DIBSection - PIXCMAP cmap - leptonica colormap for depths < 16 - - RETURNS: - LPBITMAPINFO - a ptr to BITMAPINFO of the desired size and bit-depth - NULL on failure - - REMARKS: - Creates a BITMAPINFO based on the criteria passed in as parameters. - -********************************************************************** */ -static LPBITMAPINFO -DSCreateBitmapInfo(l_int32 width, - l_int32 height, - l_int32 depth, - PIXCMAP *cmap) -{ -l_int32 nInfoSize; -LPBITMAPINFO pbmi; -LPDWORD pMasks; - - nInfoSize = sizeof(BITMAPINFOHEADER); - if( depth <= 8 ) - nInfoSize += sizeof(RGBQUAD) * (1 << depth); - if((depth == 16) || (depth == 32)) - nInfoSize += (3 * sizeof(DWORD)); - - /* Create the header big enough to contain color table and - * bitmasks if needed. */ - pbmi = (LPBITMAPINFO)malloc(nInfoSize); - if (!pbmi) - return NULL; - - ZeroMemory(pbmi, nInfoSize); - pbmi->bmiHeader.biSize = sizeof(BITMAPINFOHEADER); - pbmi->bmiHeader.biWidth = width; - pbmi->bmiHeader.biHeight = height; - pbmi->bmiHeader.biPlanes = 1; - pbmi->bmiHeader.biBitCount = depth; - - /* override below for 16 and 32 bpp */ - pbmi->bmiHeader.biCompression = BI_RGB; - - /* ?? not sure if this is right? */ - pbmi->bmiHeader.biSizeImage = DSImageBitsSize(pbmi); - - pbmi->bmiHeader.biXPelsPerMeter = 0; - pbmi->bmiHeader.biYPelsPerMeter = 0; - pbmi->bmiHeader.biClrUsed = 0; /* override below */ - pbmi->bmiHeader.biClrImportant = 0; - - switch(depth) - { - case 24: - /* 24bpp requires no special handling */ - break; - case 16: - /* if it's 16bpp, fill in the masks and override the - * compression. These are the default masks -- you - * could change them if needed. */ - pMasks = (LPDWORD)(pbmi->bmiColors); - pMasks[0] = 0x00007c00; - pMasks[1] = 0x000003e0; - pMasks[2] = 0x0000001f; - pbmi->bmiHeader.biCompression = BI_BITFIELDS; - break; - case 32: - /* if it's 32 bpp, fill in the masks and override - * the compression */ - pMasks = (LPDWORD)(pbmi->bmiColors); - /*pMasks[0] = 0x00ff0000; */ - /*pMasks[1] = 0x0000ff00; */ - /*pMasks[2] = 0x000000ff; */ - pMasks[0] = 0xff000000; - pMasks[1] = 0x00ff0000; - pMasks[2] = 0x0000ff00; - - pbmi->bmiHeader.biCompression = BI_BITFIELDS; - break; - case 8: - case 4: - case 1: - setColormap(pbmi, cmap); - break; - } - return pbmi; -} - -/* ********************************************************************** - HBITMAP DSCreateDIBSection(l_int32 width, l_int32 height, l_int32 depth, - PIXCMAP *cmap) - - PARAMETERS: - l_int32 width - Desired width of the DIBSection - l_int32 height - Desired height of the DIBSection - l_int32 depth - Desired bit-depth of the DIBSection - PIXCMAP cmap - leptonica colormap for depths < 16 - - RETURNS: - HBITMAP - a DIBSection HBITMAP of the desired size and bit-depth - NULL on failure - - REMARKS: - Creates a DIBSection based on the criteria passed in as parameters. - -********************************************************************** */ -static HBITMAP -DSCreateDIBSection(l_int32 width, - l_int32 height, - l_int32 depth, - PIXCMAP *cmap) -{ -HBITMAP hBitmap; -l_int32 nInfoSize; -LPBITMAPINFO pbmi; -HDC hRefDC; -LPBYTE pBits; - - pbmi = DSCreateBitmapInfo (width, height, depth, cmap); - if (!pbmi) - return NULL; - - hRefDC = GetDC(NULL); - hBitmap = CreateDIBSection(hRefDC, pbmi, DIB_RGB_COLORS, - (void **) &pBits, NULL, 0); - nInfoSize = GetLastError(); - ReleaseDC(NULL, hRefDC); - free(pbmi); - - return hBitmap; -} - - -/*! - * \brief pixGetWindowsHBITMAP() - * - * \param[in] pix - * \return Windows hBitmap, or NULL on error - * - *
- * Notes:
- *      (1) It's the responsibility of the caller to destroy the
- *          returned hBitmap with a call to DeleteObject (or with
- *          something that eventually calls DeleteObject).
- * 
- */ -HBITMAP -pixGetWindowsHBITMAP(PIX *pix) -{ -l_int32 width, height, depth; -l_uint32 *data; -HBITMAP hBitmap = NULL; -BITMAP bm; -DWORD imageBitsSize; -PIX *pixt = NULL; -PIXCMAP *cmap; - - PROCNAME("pixGetWindowsHBITMAP"); - if (!pix) - return (HBITMAP)ERROR_PTR("pix not defined", procName, NULL); - - pixGetDimensions(pix, &width, &height, &depth); - cmap = pixGetColormap(pix); - - if (depth == 24) depth = 32; - if (depth == 2) { - pixt = pixConvert2To8(pix, 0, 85, 170, 255, TRUE); - if (!pixt) - return (HBITMAP)ERROR_PTR("unable to convert pix from 2bpp to 8bpp", - procName, NULL); - depth = pixGetDepth(pixt); - cmap = pixGetColormap(pixt); - } - - if (depth < 16) { - if (!cmap) - cmap = pixcmapCreateLinear(depth, 1< - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -LEPT_DLL extern HBITMAP pixGetWindowsHBITMAP( PIX *pixs ); - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* LEPTONICA_LEPTWIN_H */ -#endif /* _WIN32 */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/libversions.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/libversions.c deleted file mode 100644 index 004d52a2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/libversions.c +++ /dev/null @@ -1,204 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file libversions.c - *
- *
- *       Image library version number
- *           char      *getImagelibVersions()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -#if HAVE_LIBGIF -#include "gif_lib.h" -#endif - -#if HAVE_LIBJPEG -/* jpeglib.h includes jconfig.h, which makes the error of setting - * #define HAVE_STDLIB_H - * which conflicts with config_auto.h (where it is set to 1) and results - * for some gcc compiler versions in a warning. The conflict is harmless - * but we suppress it by undefining the variable. */ -#undef HAVE_STDLIB_H -#include "jpeglib.h" -#include "jerror.h" -#endif - -#if HAVE_LIBPNG -#include "png.h" -#endif - -#if HAVE_LIBTIFF -#include "tiffio.h" -#endif - -#if HAVE_LIBZ -#include "zlib.h" -#endif - -#if HAVE_LIBWEBP -#include "webp/encode.h" -#endif - -#if HAVE_LIBJP2K -#ifdef LIBJP2K_HEADER -#include LIBJP2K_HEADER -#else -#include -#endif -#endif - - -/*---------------------------------------------------------------------* - * Image Library Version number * - *---------------------------------------------------------------------*/ -/*! - * \brief getImagelibVersions() - * - *
- * Notes:
- *      (1) This returns a string of version numbers; e.g.,
- *            libgif 5.0.3
- *            libjpeg 8b (libjpeg-turbo 1.3.0)
- *            libpng 1.4.3
- *            libtiff 3.9.5
- *            zlib 1.2.5
- *            libwebp 0.3.0
- *            libopenjp2 2.1.0
- *      (2) The caller must free the memory.
- * 
- */ -char * -getImagelibVersions(void) -{ -char buf[128]; -l_int32 first = TRUE; -char *versionNumP; -char *nextTokenP; -char *versionStrP = NULL; - -#if HAVE_LIBGIF - first = FALSE; - stringJoinIP(&versionStrP, "libgif "); - #ifdef GIFLIB_MAJOR - snprintf(buf, sizeof(buf), "%d.%d.%d", GIFLIB_MAJOR, GIFLIB_MINOR, - GIFLIB_RELEASE); - #else - stringCopy(buf, "4.1.6(?)", sizeof(buf)); - #endif - stringJoinIP(&versionStrP, buf); -#endif /* HAVE_LIBGIF */ - -#if HAVE_LIBJPEG - { - struct jpeg_compress_struct cinfo; - struct jpeg_error_mgr err; - char buffer[JMSG_LENGTH_MAX]; - cinfo.err = jpeg_std_error(&err); - err.msg_code = JMSG_VERSION; - (*err.format_message) ((j_common_ptr ) &cinfo, buffer); - - if (!first) stringJoinIP(&versionStrP, " : "); - first = FALSE; - stringJoinIP(&versionStrP, "libjpeg "); - versionNumP = strtokSafe(buffer, " ", &nextTokenP); - stringJoinIP(&versionStrP, versionNumP); - LEPT_FREE(versionNumP); - - #if defined(LIBJPEG_TURBO_VERSION) - /* To stringify the result of expansion of a macro argument, - * you must use two levels of macros. See: - * https://gcc.gnu.org/onlinedocs/cpp/Stringification.html */ - #define l_xstr(s) l_str(s) - #define l_str(s) #s - snprintf(buf, sizeof(buf), " (libjpeg-turbo %s)", - l_xstr(LIBJPEG_TURBO_VERSION)); - stringJoinIP(&versionStrP, buf); - #endif /* LIBJPEG_TURBO_VERSION */ - } -#endif /* HAVE_LIBJPEG */ - -#if HAVE_LIBPNG - if (!first) stringJoinIP(&versionStrP, " : "); - first = FALSE; - stringJoinIP(&versionStrP, "libpng "); - stringJoinIP(&versionStrP, png_get_libpng_ver(NULL)); -#endif /* HAVE_LIBPNG */ - -#if HAVE_LIBTIFF - if (!first) stringJoinIP(&versionStrP, " : "); - first = FALSE; - stringJoinIP(&versionStrP, "libtiff "); - versionNumP = strtokSafe((char *)TIFFGetVersion(), " \n", &nextTokenP); - LEPT_FREE(versionNumP); - versionNumP = strtokSafe(NULL, " \n", &nextTokenP); - LEPT_FREE(versionNumP); - versionNumP = strtokSafe(NULL, " \n", &nextTokenP); - stringJoinIP(&versionStrP, versionNumP); - LEPT_FREE(versionNumP); -#endif /* HAVE_LIBTIFF */ - -#if HAVE_LIBZ - if (!first) stringJoinIP(&versionStrP, " : "); - first = FALSE; - stringJoinIP(&versionStrP, "zlib "); - stringJoinIP(&versionStrP, zlibVersion()); -#endif /* HAVE_LIBZ */ - -#if HAVE_LIBWEBP - { - l_int32 val; - char buf[32]; - if (!first) stringJoinIP(&versionStrP, " : "); - first = FALSE; - stringJoinIP(&versionStrP, "libwebp "); - val = WebPGetEncoderVersion(); - snprintf(buf, sizeof(buf), "%d.%d.%d", val >> 16, (val >> 8) & 0xff, - val & 0xff); - stringJoinIP(&versionStrP, buf); - } -#endif /* HAVE_LIBWEBP */ - -#if HAVE_LIBJP2K - { - const char *version; - if (!first) stringJoinIP(&versionStrP, " : "); - first = FALSE; - stringJoinIP(&versionStrP, "libopenjp2 "); - version = opj_version(); - stringJoinIP(&versionStrP, version); - } -#endif /* HAVE_LIBJP2K */ - - return versionStrP; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/list.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/list.c deleted file mode 100644 index 9063af6e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/list.c +++ /dev/null @@ -1,815 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file list.c - *
- *
- *      Inserting and removing elements
- *
- *           void      listDestroy()
- *           DLLIST   *listAddToHead()
- *           l_int32   listAddToTail()
- *           l_int32   listInsertBefore()
- *           l_int32   listInsertAfter()
- *           void     *listRemoveElement()
- *           void     *listRemoveFromHead()
- *           void     *listRemoveFromTail()
- *
- *      Other list operations
- *
- *           DLLIST   *listFindElement()
- *           DLLIST   *listFindTail()
- *           l_int32   listGetCount()
- *           l_int32   listReverse()
- *           DLLIST   *listJoin()
- *
- *      Lists are much harder to handle than arrays.  There is
- *      more overhead for the programmer, both cognitive and
- *      codewise, and more likelihood that an error can be made.
- *      For that reason, lists should only be used when it is
- *      inefficient to use arrays, such as when elements are
- *      routinely inserted or deleted from inside arrays whose
- *      average size is greater than about 10.
- *
- *      A list of data structures can be implemented in a number
- *      of ways.  The two most popular are:
- *
- *         (1) The list can be composed of a linked list of
- *             pointer cells ("cons cells"), where the data structures
- *             are hung off the cells.  This is more difficult
- *             to use because you have to keep track of both
- *             your hanging data and the cell structures.
- *             It requires 3 pointers for every data structure
- *             that is put in a list.  There is no problem
- *             cloning (using reference counts) for structures that
- *             are put in such a list.  We implement lists by this
- *             method here.
- *
- *         (2) The list pointers can be inserted directly into
- *             the data structures.  This is easy to implement
- *             and easier to use, but it adds 2 ptrs of overhead
- *             to every data structure in which the ptrs are embedded.
- *             It also requires special care not to put the ptrs
- *             in any data that is cloned with a reference count;
- *             else your lists will break.
- *
- *      Writing C code that uses list pointers explicitly to make
- *      and alter lists is difficult and prone to error.
- *      Consequently, a generic list utility that handles lists
- *      of arbitrary objects and doesn't force the programmer to
- *      touch the "next" and "prev" pointers, is quite useful.
- *      Such functions are provided here.   However, the usual
- *      situation requires traversing a list and applying some
- *      function to one or more of the list elements.  Macros
- *      for traversing the list are, in general, necessary, to
- *      achieve the goal of invisibly handling all "next" and "prev"
- *      pointers in generic lists.  We provide macros for
- *      traversing a list in both forward and reverse directions.
- *
- *      Because of the typing in C, implementation of a general
- *      list utility requires casting.  If macros are used, the
- *      casting can be done implicitly; otherwise, using functions,
- *      some of the casts must be explicit.  Fortunately, this
- *      can be implemented with void* so the programmer using
- *      the library will not have to make any casts!  (Unless you
- *      compile with g++, in which case the rules on implicit
- *      conversion are more strict.)
- *
- *      For example, to add an arbitrary data structure foo to the
- *      tail of a list, use
- *             listAddToTail(&head, &tail, pfoo);
- *      where head and tail are list cell ptrs and pfoo is
- *      a pointer to the foo object.
- *      And to remove an arbitrary data structure foo from a
- *      list, when you know the list cell element it is hanging from,
- *      use
- *             pfoo = listRemoveElement(&head, elem)
- *      where head and elem are list cell ptrs and pfoo is a pointer
- *      to the foo object.  No casts are required for foo in
- *      either direction in ANSI C.  (However, casts are
- *      required for ANSI C++).
- *
- *      We use lists that are composed of doubly-linked
- *      cells with data structures hanging off the cells.
- *      We use doubly-linked cells to simplify insertion
- *      and deletion, and to allow operations to proceed in either
- *      direction along the list.  With doubly-linked lists,
- *      it is tempting to make them circular, by setting head->prev
- *      to the tail of the list and tail->next to the head.
- *      The circular list costs nothing extra in storage, and
- *      allows operations to proceed from either end of the list
- *      with equal speed.  However, the circular link adds
- *      cognitive overhead for the application programmer in
- *      general, and it greatly complicates list traversal when
- *      arbitrary list elements can be added or removed as you
- *      move through.  It can be done, but in the spirit of
- *      simplicity, we avoid the temptation.  The price to be paid
- *      is the extra cost to find the tail of a list -- a full
- *      traversal -- before the tail can be used.  This is a
- *      cheap price to pay to avoid major headaches and buggy code.
- *
- *      When you are only applying some function to each element
- *      in a list, you can go either forwards or backwards.
- *      To run through a list forwards, use:
- * \code
- *          for (elem = head; elem; elem = nextelem) {
- *              nextelem = elem->next;   (in case we destroy elem)
- *              data>
- *          }
- * \endcode
- *      To run through a list backwards, find the tail and use:
- *
- *          for (elem = tail; elem; elem = prevelem) {
- #              prevelem = elem->prev;  (in case we destroy elem)
- *              data>
- *          }
- *
- *      Even though these patterns are very simple, they are so common
- *      that we've provided macros for them in list.h.  Using the
- *      macros, this becomes:
- * \code
- *          L_BEGIN_LIST_FORWARD(head, elem)
- *              data>
- *          L_END_LIST
- *
- *          L_BEGIN_LIST_REVERSE(tail, elem)
- *              data>
- *          L_END_LIST
- * \endcode
- *      Note again that with macros, the application programmer does
- *      not need to refer explicitly to next and prev fields.  Also,
- *      in the reverse case, note that we do not explicitly
- *      show the head of the list.  However, the head of the list
- *      is always in scope, and functions can be called within the
- *      iterator that change the head.
- *
- *      Some special cases are simpler.  For example, when
- *      removing all items from the head of the list, you can use
- * \code
- *          while (head) {
- *              obj = listRemoveFromHead(&head);
- *              
- *          }
- * \endcode
- *      Removing successive elements from the tail is equally simple:
- * \code
- *          while (tail) {
- *              obj = listRemoveFromTail(&head, &tail);
- *              
- *          }
- * \endcode
- *      When removing an arbitrary element from a list, use
- * \code
- *              obj = listRemoveElement(&head, elem);
- * \endcode
- *      All the listRemove*() functions hand you the object,
- *      destroy the list cell to which it was attached, and
- *      reset the list pointers if necessary.
- *
- *      Several other list operations, that do not involve
- *      inserting or removing objects, are also provided.
- *      The function listFindElement() locates a list pointer
- *      by matching the object hanging on it to a given
- *      object.  The function listFindTail() gets a handle
- *      to the tail list ptr, allowing backwards traversals of
- *      the list.  listGetCount() gives the number of elements
- *      in a list.  Functions that reverse a list and concatenate
- *      two lists are also provided.
- *
- *      These functions can be modified for efficiency in the
- *      situation where there is a large amount of creation and
- *      destruction of list cells.  If millions of cells are
- *      made and destroyed, but a relatively small number are
- *      around at any time, the list cells can be stored for
- *      later re-use in a stack (see the generic stack functions
- *      in stack.c).
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*---------------------------------------------------------------------* - * Inserting and removing elements * - *---------------------------------------------------------------------*/ -/*! - * \brief listDestroy() - * - * \param[in,out] phead head of list; will be set to null before returning - * \return void - * - *
- * Notes:
- *      (1) This only destroys the cons cells.  Before destroying
- *          the list, it is necessary to remove all data and set the
- *          data pointers in each cons cell to NULL.
- *      (2) listDestroy() will give a warning message for each data
- *          ptr that is not NULL.
- * 
- */ -void -listDestroy(DLLIST **phead) -{ -DLLIST *elem, *next, *head; - - PROCNAME("listDestroy"); - - if (phead == NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - - if ((head = *phead) == NULL) - return; - - for (elem = head; elem; elem = next) { - if (elem->data) - L_WARNING("list data ptr is not null\n", procName); - next = elem->next; - LEPT_FREE(elem); - } - *phead = NULL; - return; -} - - -/*! - * \brief listAddToHead() - * - * \param[in,out] phead [optional] input head - * \param[in] data void* ptr, to be added - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This makes a new cell, attaches %data, and adds the
- *          cell to the head of the list.
- *      (2) When consing from NULL, be sure to initialize head to NULL
- *          before calling this function.
- * 
- */ -l_ok -listAddToHead(DLLIST **phead, - void *data) -{ -DLLIST *cell, *head; - - PROCNAME("listAddToHead"); - - if (!phead) - return ERROR_INT("&head not defined", procName, 1); - head = *phead; - if (!data) - return ERROR_INT("data not defined", procName, 1); - - cell = (DLLIST *)LEPT_CALLOC(1, sizeof(DLLIST)); - cell->data = data; - if (!head) { /* start the list; initialize the ptrs */ - cell->prev = NULL; - cell->next = NULL; - } else { - cell->prev = NULL; - cell->next = head; - head->prev = cell; - } - *phead = cell; - return 0; -} - - -/*! - * \brief listAddToTail() - * - * \param[in,out] phead [may be updated], can be NULL - * \param[in,out] ptail [updated], can be NULL - * \param[in] data void* ptr, to be hung on tail cons cell - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This makes a new cell, attaches %data, and adds the
- *          cell to the tail of the list.
- *      (2) &head is input to allow the list to be "cons'd" up from NULL.
- *      (3) &tail is input to allow the tail to be updated
- *          for efficient sequential operation with this function.
- *      (4) We assume that if *phead and/or *ptail are not NULL,
- *          then they are valid addresses.  Therefore:
- *           (a) when consing from NULL, be sure to initialize both
- *               head and tail to NULL.
- *           (b) when tail == NULL for an existing list, the tail
- *               will be found and updated.
- * 
- */ -l_ok -listAddToTail(DLLIST **phead, - DLLIST **ptail, - void *data) -{ -DLLIST *cell, *head, *tail; - - PROCNAME("listAddToTail"); - - if (!phead) - return ERROR_INT("&head not defined", procName, 1); - head = *phead; - if (!ptail) - return ERROR_INT("&tail not defined", procName, 1); - if (!data) - return ERROR_INT("data not defined", procName, 1); - - cell = (DLLIST *)LEPT_CALLOC(1, sizeof(DLLIST)); - cell->data = data; - if (!head) { /* Start the list and initialize the ptrs. *ptail - * should also have been initialized to NULL */ - cell->prev = NULL; - cell->next = NULL; - *phead = cell; - *ptail = cell; - } else { - if ((tail = *ptail) == NULL) - tail = listFindTail(head); - cell->prev = tail; - cell->next = NULL; - tail->next = cell; - *ptail = cell; - } - - return 0; -} - - -/*! - * \brief listInsertBefore() - * - * \param[in,out] phead [optional] input head - * \param[in] elem list element to be inserted in front of; - * must be NULL if head is NULL - * \param[in] data void* address, to be added - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This can be called on a null list, in which case both
- *          head and elem must be null.
- *      (2) If you are searching through a list, looking for a condition
- *          to add an element, you can do something like this:
- * \code
- *            L_BEGIN_LIST_FORWARD(head, elem)
- *                
- *                listInsertBefore(&head, elem, data);
- *            L_END_LIST
- * \endcode
- * 
- */ -l_ok -listInsertBefore(DLLIST **phead, - DLLIST *elem, - void *data) -{ -DLLIST *cell, *head; - - PROCNAME("listInsertBefore"); - - if (!phead) - return ERROR_INT("&head not defined", procName, 1); - head = *phead; - if (!data) - return ERROR_INT("data not defined", procName, 1); - if ((!head && elem) || (head && !elem)) - return ERROR_INT("head and elem not consistent", procName, 1); - - /* New cell to insert */ - cell = (DLLIST *)LEPT_CALLOC(1, sizeof(DLLIST)); - cell->data = data; - if (!head) { /* start the list; initialize the ptrs */ - cell->prev = NULL; - cell->next = NULL; - *phead = cell; - } else if (head == elem) { /* insert before head of list */ - cell->prev = NULL; - cell->next = head; - head->prev = cell; - *phead = cell; - } else { /* insert before elem and after head of list */ - cell->prev = elem->prev; - cell->next = elem; - elem->prev->next = cell; - elem->prev = cell; - } - return 0; -} - - -/*! - * \brief listInsertAfter() - * - * \param[in,out] phead [optional] input head - * \param[in] elem list element to be inserted after; - * must be NULL if head is NULL - * \param[in] data void* ptr, to be added - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This can be called on a null list, in which case both
- *          head and elem must be null.  The head is included
- *          in the call to allow "consing" up from NULL.
- *      (2) If you are searching through a list, looking for a condition
- *          to add an element, you can do something like this:
- * \code
- *            L_BEGIN_LIST_FORWARD(head, elem)
- *                
- *                listInsertAfter(&head, elem, data);
- *            L_END_LIST
- * \endcode
- * 
- */ -l_ok -listInsertAfter(DLLIST **phead, - DLLIST *elem, - void *data) -{ -DLLIST *cell, *head; - - PROCNAME("listInsertAfter"); - - if (!phead) - return ERROR_INT("&head not defined", procName, 1); - head = *phead; - if (!data) - return ERROR_INT("data not defined", procName, 1); - if ((!head && elem) || (head && !elem)) - return ERROR_INT("head and elem not consistent", procName, 1); - - /* New cell to insert */ - cell = (DLLIST *)LEPT_CALLOC(1, sizeof(DLLIST)); - cell->data = data; - if (!head) { /* start the list; initialize the ptrs */ - cell->prev = NULL; - cell->next = NULL; - *phead = cell; - } else if (elem->next == NULL) { /* insert after last */ - cell->prev = elem; - cell->next = NULL; - elem->next = cell; - } else { /* insert after elem and before the end */ - cell->prev = elem; - cell->next = elem->next; - elem->next->prev = cell; - elem->next = cell; - } - return 0; -} - - -/*! - * \brief listRemoveElement() - * - * \param[in,out] phead input head; can be changed - * \param[in] elem list element to be removed - * \return data void* struct on cell - * - *
- * Notes:
- *      (1) in ANSI C, it is not necessary to cast return to actual type; e.g.,
- *             pix = listRemoveElement(&head, elem);
- *          but in ANSI C++, it is necessary to do the cast:
- *             pix = (Pix *)listRemoveElement(&head, elem);
- * 
- */ -void * -listRemoveElement(DLLIST **phead, - DLLIST *elem) -{ -void *data; -DLLIST *head; - - PROCNAME("listRemoveElement"); - - if (!phead) - return (void *)ERROR_PTR("&head not defined", procName, NULL); - head = *phead; - if (!head) - return (void *)ERROR_PTR("head not defined", procName, NULL); - if (!elem) - return (void *)ERROR_PTR("elem not defined", procName, NULL); - - data = elem->data; - - if (head->next == NULL) { /* only one */ - if (elem != head) - return (void *)ERROR_PTR("elem must be head", procName, NULL); - *phead = NULL; - } else if (head == elem) { /* first one */ - elem->next->prev = NULL; - *phead = elem->next; - } else if (elem->next == NULL) { /* last one */ - elem->prev->next = NULL; - } else { /* neither the first nor the last one */ - elem->next->prev = elem->prev; - elem->prev->next = elem->next; - } - - LEPT_FREE(elem); - return data; -} - - -/*! - * \brief listRemoveFromHead() - * - * \param[in,out] phead head of list; updated - * \return data void* struct on cell, or NULL on error - * - *
- * Notes:
- *      (1) in ANSI C, it is not necessary to cast return to actual type; e.g.,
- *            pix = listRemoveFromHead(&head);
- *          but in ANSI C++, it is necessary to do the cast; e.g.,
- *            pix = (Pix *)listRemoveFromHead(&head);
- * 
- */ -void * -listRemoveFromHead(DLLIST **phead) -{ -DLLIST *head; -void *data; - - PROCNAME("listRemoveFromHead"); - - if (!phead) - return (void *)ERROR_PTR("&head not defined", procName, NULL); - if ((head = *phead) == NULL) - return (void *)ERROR_PTR("head not defined", procName, NULL); - - if (head->next == NULL) { /* only one */ - *phead = NULL; - } else { - head->next->prev = NULL; - *phead = head->next; - } - - data = head->data; - LEPT_FREE(head); - return data; -} - - -/*! - * \brief listRemoveFromTail() - * - * \param[in,out] phead list head must NOT be NULL; may be changed - * \param[in,out] ptail list tail may be NULL; always updated - * \return data void* struct on cell or NULL on error - * - *
- * Notes:
- *      (1) We include &head so that it can be set to NULL if
- *          if the only element in the list is removed.
- *      (2) The function is relying on the fact that if tail is
- *          not NULL, then is is a valid address.  You can use
- *          this function with tail == NULL for an existing list, in
- *          which case  the tail is found and updated, and the
- *          removed element is returned.
- *      (3) In ANSI C, it is not necessary to cast return to actual type; e.g.,
- *            pix = listRemoveFromTail(&head, &tail);
- *          but in ANSI C++, it is necessary to do the cast; e.g.,
- *            pix = (Pix *)listRemoveFromTail(&head, &tail);
- * 
- */ -void * -listRemoveFromTail(DLLIST **phead, - DLLIST **ptail) -{ -DLLIST *head, *tail; -void *data; - - PROCNAME("listRemoveFromTail"); - - if (!phead) - return (void *)ERROR_PTR("&head not defined", procName, NULL); - if ((head = *phead) == NULL) - return (void *)ERROR_PTR("head not defined", procName, NULL); - if (!ptail) - return (void *)ERROR_PTR("&tail not defined", procName, NULL); - if ((tail = *ptail) == NULL) - tail = listFindTail(head); - - if (head->next == NULL) { /* only one */ - *phead = NULL; - *ptail = NULL; - } else { - tail->prev->next = NULL; - *ptail = tail->prev; - } - - data = tail->data; - LEPT_FREE(tail); - return data; -} - - - -/*---------------------------------------------------------------------* - * Other list operations * - *---------------------------------------------------------------------*/ -/*! - * \brief listFindElement() - * - * \param[in] head list head - * \param[in] data void* address, to be searched for - * \return cell the containing cell, or NULL if not found or on error - * - *
- * Notes:
- *      (1) This returns a ptr to the cell, which is still embedded in
- *          the list.
- *      (2) This handle and the attached data have not been copied or
- *          reference counted, so they must not be destroyed.  This
- *          violates our basic rule that every handle returned from a
- *          function is owned by that function and must be destroyed,
- *          but if rules aren't there to be broken, why have them?
- * 
- */ -DLLIST * -listFindElement(DLLIST *head, - void *data) -{ -DLLIST *cell; - - PROCNAME("listFindElement"); - - if (!head) - return (DLLIST *)ERROR_PTR("head not defined", procName, NULL); - if (!data) - return (DLLIST *)ERROR_PTR("data not defined", procName, NULL); - - for (cell = head; cell; cell = cell->next) { - if (cell->data == data) - return cell; - } - - return NULL; -} - - -/*! - * \brief listFindTail() - * - * \param[in] head - * \return tail, or NULL on error - */ -DLLIST * -listFindTail(DLLIST *head) -{ -DLLIST *cell; - - PROCNAME("listFindTail"); - - if (!head) - return (DLLIST *)ERROR_PTR("head not defined", procName, NULL); - - for (cell = head; cell; cell = cell->next) { - if (cell->next == NULL) - return cell; - } - - return (DLLIST *)ERROR_PTR("tail not found !!", procName, NULL); -} - - -/*! - * \brief listGetCount() - * - * \param[in] head of list - * \return number of elements; 0 if no list or on error - */ -l_int32 -listGetCount(DLLIST *head) -{ -l_int32 count; -DLLIST *elem; - - PROCNAME("listGetCount"); - - if (!head) - return ERROR_INT("head not defined", procName, 0); - - count = 0; - for (elem = head; elem; elem = elem->next) - count++; - - return count; -} - - -/*! - * \brief listReverse() - * - * \param[in,out] phead list head; may be changed - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This reverses the list in-place.
- * 
- */ -l_ok -listReverse(DLLIST **phead) -{ -void *obj; /* whatever */ -DLLIST *head, *rhead; - - PROCNAME("listReverse"); - - if (!phead) - return ERROR_INT("&head not defined", procName, 1); - if ((head = *phead) == NULL) - return ERROR_INT("head not defined", procName, 1); - - rhead = NULL; - while (head) { - obj = listRemoveFromHead(&head); - listAddToHead(&rhead, obj); - } - - *phead = rhead; - return 0; -} - - -/*! - * \brief listJoin() - * - * \param[in,out] phead1 head of first list; may be changed - * \param[in,out] phead2 head of second list; to be nulled - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The concatenated list is returned with head1 as the new head.
- *      (2) Both input ptrs must exist, though either can have the value NULL.
- * 
- */ -l_ok -listJoin(DLLIST **phead1, - DLLIST **phead2) -{ -void *obj; -DLLIST *head1, *head2, *tail1; - - PROCNAME("listJoin"); - - if (!phead1) - return ERROR_INT("&head1 not defined", procName, 1); - if (!phead2) - return ERROR_INT("&head2 not defined", procName, 1); - - /* If no list2, just return list1 unchanged */ - if ((head2 = *phead2) == NULL) - return 0; - - /* If no list1, just return list2 */ - if ((head1 = *phead1) == NULL) { - *phead1 = head2; - *phead2 = NULL; - return 0; - } - - /* General case for concatenation into list 1 */ - tail1 = listFindTail(head1); - while (head2) { - obj = listRemoveFromHead(&head2); - listAddToTail(&head1, &tail1, obj); - } - *phead2 = NULL; - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/list.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/list.h deleted file mode 100644 index d207e79f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/list.h +++ /dev/null @@ -1,90 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -#ifndef LEPTONICA_LIST_H -#define LEPTONICA_LIST_H - -/*! - * \file list.h - * - *
- *       Cell for double-linked lists
- *
- *       This allows composition of a list of cells with
- *           prev, next and data pointers.  Generic data
- *           structures hang on the list cell data pointers.
- *
- *       The list is not circular because that would add much
- *           complexity in traversing the list under general
- *           conditions where list cells can be added and removed.
- *           The only disadvantage of not having the head point to
- *           the last cell is that the list must be traversed to
- *           find its tail.  However, this traversal is fast, and
- *           the listRemoveFromTail() function updates the tail
- *           so there is no searching overhead with repeated use.
- *
- *       The list macros are used to run through a list, and their
- *       use is encouraged.  They are invoked, e.g., as
- *
- *             DLLIST  *head, *elem;
- *             ...
- *             L_BEGIN_LIST_FORWARD(head, elem)
- *                 data >
- *             L_END_LIST
- * 
- */ - -struct DoubleLinkedList -{ - struct DoubleLinkedList *prev; - struct DoubleLinkedList *next; - void *data; -}; -typedef struct DoubleLinkedList DLLIST; - - - /*! Simple list traverse macro - forward */ -#define L_BEGIN_LIST_FORWARD(head, element) \ - { \ - DLLIST *_leptvar_nextelem_; \ - for ((element) = (head); (element); (element) = _leptvar_nextelem_) { \ - _leptvar_nextelem_ = (element)->next; - - - /*! Simple list traverse macro - reverse */ -#define L_BEGIN_LIST_REVERSE(tail, element) \ - { \ - DLLIST *_leptvar_prevelem_; \ - for ((element) = (tail); (element); (element) = _leptvar_prevelem_) { \ - _leptvar_prevelem_ = (element)->prev; - - - /*! Simple list traverse macro - end of a list traverse */ -#define L_END_LIST }} - - -#endif /* LEPTONICA_LIST_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/map.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/map.c deleted file mode 100644 index 7817aa80..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/map.c +++ /dev/null @@ -1,264 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file map.c - *
- *
- *  This is an interface for map and set functions, based on using
- *  red-black binary search trees.  Because these trees are sorted,
- *  they are O(nlogn) to build.  They allow logn insertion, find
- *  and deletion of elements.
- *
- *  Both the map and set are ordered by key value, with unique keys.
- *  For the map, the elements are key/value pairs.
- *  For the set we only store unique, ordered keys, and the value
- *  (set to 0 in the implementation) is ignored.
- *
- *  The keys for the map and set can be any of the three types in the
- *  l_rbtree_keytype enum.  The values stored can be any of the four
- *  types in the rb_type union.
- *
- *  In-order forward and reverse iterators are provided for maps and sets.
- *  To forward iterate over the map for any type of key (in this example,
- *  uint32), extracting integer values:
- *
- *      L_AMAP  *m = l_amapCreate(L_UINT_TYPE);
- *      [add elements to the map ...]
- *      L_AMAP_NODE  *n = l_amapGetFirst(m);
- *      while (n) {
- *          l_int32 val = n->value.itype;
- *          // do something ...
- *          n = l_amapGetNext(n);
- *      }
- *
- *  If the nodes are deleted during the iteration:
- *
- *      L_AMAP  *m = l_amapCreate(L_UINT_TYPE);
- *      [add elements to the map ...]
- *      L_AMAP_NODE  *n = l_amapGetFirst(m);
- *      L_AMAP_NODE  *nn;
- *      while (n) {
- *          nn = l_amapGetNext(n);
- *          l_int32 val = n->value.itype;
- *          l_uint32 key = n->key.utype;
- *          // do something ...
- *          l_amapDelete(m, n->key);
- *          n = nn;
- *      }
- *
- *  See prog/maptest.c and prog/settest.c for more examples of usage.
- *
- *  Interface to (a) map using a general key and storing general values
- *           L_AMAP        *l_amapCreate()
- *           RB_TYPE       *l_amapFind()
- *           void           l_amapInsert()
- *           void           l_amapDelete()
- *           void           l_amapDestroy()
- *           L_AMAP_NODE   *l_amapGetFirst()
- *           L_AMAP_NODE   *l_amapGetNext()
- *           L_AMAP_NODE   *l_amapGetLast()
- *           L_AMAP_NODE   *l_amapGetPrev()
- *           l_int32        l_amapSize()
- *
- *  Interface to (a) set using a general key
- *           L_ASET        *l_asetCreate()
- *           RB_TYPE       *l_asetFind()
- *           void           l_asetInsert()
- *           void           l_asetDelete()
- *           void           l_asetDestroy()
- *           L_ASET_NODE   *l_asetGetFirst()
- *           L_ASET_NODE   *l_asetGetNext()
- *           L_ASET_NODE   *l_asetGetLast()
- *           L_ASET_NODE   *l_asetGetPrev()
- *           l_int32        l_asetSize()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* ------------------------------------------------------------- * - * Interface to Map * - * ------------------------------------------------------------- */ -L_AMAP * -l_amapCreate(l_int32 keytype) -{ - PROCNAME("l_amapCreate"); - - if (keytype != L_INT_TYPE && keytype != L_UINT_TYPE && - keytype != L_FLOAT_TYPE) - return (L_AMAP *)ERROR_PTR("invalid keytype", procName, NULL); - - L_AMAP *m = (L_AMAP *)LEPT_CALLOC(1, sizeof(L_AMAP)); - m->keytype = keytype; - return m; -} - -RB_TYPE * -l_amapFind(L_AMAP *m, - RB_TYPE key) -{ - return l_rbtreeLookup(m, key); -} - -void -l_amapInsert(L_AMAP *m, - RB_TYPE key, - RB_TYPE value) -{ - l_rbtreeInsert(m, key, value); -} - -void -l_amapDelete(L_AMAP *m, - RB_TYPE key) -{ - l_rbtreeDelete(m, key); -} - -void -l_amapDestroy(L_AMAP **pm) -{ - l_rbtreeDestroy(pm); -} - -L_AMAP_NODE * -l_amapGetFirst(L_AMAP *m) -{ - return l_rbtreeGetFirst(m); -} - -L_AMAP_NODE * -l_amapGetNext(L_AMAP_NODE *n) -{ - return l_rbtreeGetNext(n); -} - -L_AMAP_NODE * -l_amapGetLast(L_AMAP *m) -{ - return l_rbtreeGetLast(m); -} - -L_AMAP_NODE * -l_amapGetPrev(L_AMAP_NODE *n) -{ - return l_rbtreeGetPrev(n); -} - -l_int32 -l_amapSize(L_AMAP *m) -{ - return l_rbtreeGetCount(m); -} - - -/* ------------------------------------------------------------- * - * Interface to Set * - * ------------------------------------------------------------- */ -L_ASET * -l_asetCreate(l_int32 keytype) -{ - PROCNAME("l_asetCreate"); - - if (keytype != L_INT_TYPE && keytype != L_UINT_TYPE && - keytype != L_FLOAT_TYPE) - return (L_ASET *)ERROR_PTR("invalid keytype", procName, NULL); - - L_ASET *s = (L_ASET *)LEPT_CALLOC(1, sizeof(L_ASET)); - s->keytype = keytype; - return s; -} - -/* - * l_asetFind() - * - * This returns NULL if not found, non-null if it is. In the latter - * case, the value stored in the returned pointer has no significance. - */ -RB_TYPE * -l_asetFind(L_ASET *s, - RB_TYPE key) -{ - return l_rbtreeLookup(s, key); -} - -void -l_asetInsert(L_ASET *s, - RB_TYPE key) -{ -RB_TYPE value; - - value.itype = 0; /* meaningless */ - l_rbtreeInsert(s, key, value); -} - -void -l_asetDelete(L_ASET *s, - RB_TYPE key) -{ - l_rbtreeDelete(s, key); -} - -void -l_asetDestroy(L_ASET **ps) -{ - l_rbtreeDestroy(ps); -} - -L_ASET_NODE * -l_asetGetFirst(L_ASET *s) -{ - return l_rbtreeGetFirst(s); -} - -L_ASET_NODE * -l_asetGetNext(L_ASET_NODE *n) -{ - return l_rbtreeGetNext(n); -} - -L_ASET_NODE * -l_asetGetLast(L_ASET *s) -{ - return l_rbtreeGetLast(s); -} - -L_ASET_NODE * -l_asetGetPrev(L_ASET_NODE *n) -{ - return l_rbtreeGetPrev(n); -} - -l_int32 -l_asetSize(L_ASET *s) -{ - return l_rbtreeGetCount(s); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/maze.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/maze.c deleted file mode 100644 index 87e396c3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/maze.c +++ /dev/null @@ -1,909 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file maze.c - *
- *
- *      This is a game with a pedagogical slant.  A maze is represented
- *      by a binary image.  The ON pixels (fg) are walls.  The goal is
- *      to navigate on OFF pixels (bg), using Manhattan steps
- *      (N, S, E, W), between arbitrary start and end positions.
- *      The problem is thus to find the shortest route between two points
- *      in a binary image that are 4-connected in the bg.  This is done
- *      with a breadth-first search, implemented with a queue.
- *      We also use a queue of pointers to generate the maze (image).
- *
- *          PIX             *generateBinaryMaze()
- *          static MAZEEL   *mazeelCreate()
- *
- *          PIX             *pixSearchBinaryMaze()
- *          static l_int32   localSearchForBackground()
- *
- *      Generalizing a maze to a grayscale image, the search is
- *      now for the "shortest" or least cost path, for some given
- *      cost function.
- *
- *          PIX             *pixSearchGrayMaze()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#ifdef _WIN32 -#include -#include -#endif /* _WIN32 */ -#include "allheaders.h" - -static const l_int32 MinMazeWidth = 50; -static const l_int32 MinMazeHeight = 50; - -static const l_float32 DefaultWallProbability = 0.65; -static const l_float32 DefaultAnisotropyRatio = 0.25; - -enum { /* direction from parent to newly created element */ - START_LOC = 0, - DIR_NORTH = 1, - DIR_SOUTH = 2, - DIR_WEST = 3, - DIR_EAST = 4 -}; - -struct MazeElement { - l_float32 distance; - l_int32 x; - l_int32 y; - l_uint32 val; /* value of maze pixel at this location */ - l_int32 dir; /* direction from parent to child */ -}; -typedef struct MazeElement MAZEEL; - - -static MAZEEL *mazeelCreate(l_int32 x, l_int32 y, l_int32 dir); -static l_int32 localSearchForBackground(PIX *pix, l_int32 *px, - l_int32 *py, l_int32 maxrad); - -#ifndef NO_CONSOLE_IO -#define DEBUG_PATH 0 -#define DEBUG_MAZE 0 -#endif /* ~NO_CONSOLE_IO */ - -/*---------------------------------------------------------------------* - * Binary maze generation as cellular automaton * - *---------------------------------------------------------------------*/ -/*! - * \brief generateBinaryMaze() - * - * \param[in] w, h size of maze - * \param[in] xi, yi initial location - * \param[in] wallps probability that a pixel to the side is ON - * \param[in] ranis ratio of prob that pixel in forward direction - * is a wall to the probability that pixel in - * side directions is a wall - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) We have two input probability factors that determine the
- *          density of walls and average length of straight passages.
- *          When ranis < 1.0, you are more likely to generate a wall
- *          to the side than going forward.  Enter 0.0 for either if
- *          you want to use the default values.
- *      (2) This is a type of percolation problem, and exhibits
- *          different phases for different parameters wallps and ranis.
- *          For larger values of these parameters, regions in the maze
- *          are not explored because the maze generator walls them
- *          off and cannot get through.  The boundary between the
- *          two phases in this two-dimensional parameter space goes
- *          near these values:
- *                wallps       ranis
- *                0.35         1.00
- *                0.40         0.85
- *                0.45         0.70
- *                0.50         0.50
- *                0.55         0.40
- *                0.60         0.30
- *                0.65         0.25
- *                0.70         0.19
- *                0.75         0.15
- *                0.80         0.11
- *      (3) Because there is a considerable amount of overhead in calling
- *          pixGetPixel() and pixSetPixel(), this function can be sped
- *          up with little effort using raster line pointers and the
- *          GET_DATA* and SET_DATA* macros.
- * 
- */ -PIX * -generateBinaryMaze(l_int32 w, - l_int32 h, - l_int32 xi, - l_int32 yi, - l_float32 wallps, - l_float32 ranis) -{ -l_int32 x, y, dir; -l_uint32 val; -l_float32 frand, wallpf, testp; -MAZEEL *el, *elp; -PIX *pixd; /* the destination maze */ -PIX *pixm; /* for bookkeeping, to indicate pixels already visited */ -L_QUEUE *lq; - - /* On Windows, seeding is apparently necessary to get decent mazes. - * Windows rand() returns a value up to 2^15 - 1, whereas unix - * rand() returns a value up to 2^31 - 1. Therefore the generated - * mazes will differ on the two platforms. */ -#ifdef _WIN32 - srand(28*333); -#endif /* _WIN32 */ - - if (w < MinMazeWidth) - w = MinMazeWidth; - if (h < MinMazeHeight) - h = MinMazeHeight; - if (xi <= 0 || xi >= w) - xi = w / 6; - if (yi <= 0 || yi >= h) - yi = h / 5; - if (wallps < 0.05 || wallps > 0.95) - wallps = DefaultWallProbability; - if (ranis < 0.05 || ranis > 1.0) - ranis = DefaultAnisotropyRatio; - wallpf = wallps * ranis; - -#if DEBUG_MAZE - lept_stderr("(w, h) = (%d, %d), (xi, yi) = (%d, %d)\n", w, h, xi, yi); - lept_stderr("Using: prob(wall) = %7.4f, anisotropy factor = %7.4f\n", - wallps, ranis); -#endif /* DEBUG_MAZE */ - - /* These are initialized to OFF */ - pixd = pixCreate(w, h, 1); - pixm = pixCreate(w, h, 1); - - lq = lqueueCreate(0); - - /* Prime the queue with the first pixel; it is OFF */ - el = mazeelCreate(xi, yi, START_LOC); - pixSetPixel(pixm, xi, yi, 1); /* mark visited */ - lqueueAdd(lq, el); - - /* While we're at it ... */ - while (lqueueGetCount(lq) > 0) { - elp = (MAZEEL *)lqueueRemove(lq); - x = elp->x; - y = elp->y; - dir = elp->dir; - if (x > 0) { /* check west */ - pixGetPixel(pixm, x - 1, y, &val); - if (val == 0) { /* not yet visited */ - pixSetPixel(pixm, x - 1, y, 1); /* mark visited */ - frand = (l_float32)rand() / (l_float32)RAND_MAX; - testp = wallps; - if (dir == DIR_WEST) - testp = wallpf; - if (frand <= testp) { /* make it a wall */ - pixSetPixel(pixd, x - 1, y, 1); - } else { /* not a wall */ - el = mazeelCreate(x - 1, y, DIR_WEST); - lqueueAdd(lq, el); - } - } - } - if (y > 0) { /* check north */ - pixGetPixel(pixm, x, y - 1, &val); - if (val == 0) { /* not yet visited */ - pixSetPixel(pixm, x, y - 1, 1); /* mark visited */ - frand = (l_float32)rand() / (l_float32)RAND_MAX; - testp = wallps; - if (dir == DIR_NORTH) - testp = wallpf; - if (frand <= testp) { /* make it a wall */ - pixSetPixel(pixd, x, y - 1, 1); - } else { /* not a wall */ - el = mazeelCreate(x, y - 1, DIR_NORTH); - lqueueAdd(lq, el); - } - } - } - if (x < w - 1) { /* check east */ - pixGetPixel(pixm, x + 1, y, &val); - if (val == 0) { /* not yet visited */ - pixSetPixel(pixm, x + 1, y, 1); /* mark visited */ - frand = (l_float32)rand() / (l_float32)RAND_MAX; - testp = wallps; - if (dir == DIR_EAST) - testp = wallpf; - if (frand <= testp) { /* make it a wall */ - pixSetPixel(pixd, x + 1, y, 1); - } else { /* not a wall */ - el = mazeelCreate(x + 1, y, DIR_EAST); - lqueueAdd(lq, el); - } - } - } - if (y < h - 1) { /* check south */ - pixGetPixel(pixm, x, y + 1, &val); - if (val == 0) { /* not yet visited */ - pixSetPixel(pixm, x, y + 1, 1); /* mark visited */ - frand = (l_float32)rand() / (l_float32)RAND_MAX; - testp = wallps; - if (dir == DIR_SOUTH) - testp = wallpf; - if (frand <= testp) { /* make it a wall */ - pixSetPixel(pixd, x, y + 1, 1); - } else { /* not a wall */ - el = mazeelCreate(x, y + 1, DIR_SOUTH); - lqueueAdd(lq, el); - } - } - } - LEPT_FREE(elp); - } - - lqueueDestroy(&lq, TRUE); - pixDestroy(&pixm); - return pixd; -} - - -static MAZEEL * -mazeelCreate(l_int32 x, - l_int32 y, - l_int32 dir) -{ -MAZEEL *el; - - el = (MAZEEL *)LEPT_CALLOC(1, sizeof(MAZEEL)); - el->x = x; - el->y = y; - el->dir = dir; - return el; -} - - -/*---------------------------------------------------------------------* - * Binary maze search * - *---------------------------------------------------------------------*/ -/*! - * \brief pixSearchBinaryMaze() - * - * \param[in] pixs 1 bpp, maze - * \param[in] xi, yi beginning point; use same initial point - * that was used to generate the maze - * \param[in] xf, yf end point, or close to it - * \param[out] ppixd [optional] maze with path illustrated, or - * if no path possible, the part of the maze - * that was searched - * \return pta shortest path, or NULL if either no path - * exists or on error - * - *
- * Notes:
- *      (1) Because of the overhead in calling pixGetPixel() and
- *          pixSetPixel(), we have used raster line pointers and the
- *          GET_DATA* and SET_DATA* macros for many of the pix accesses.
- *      (2) Commentary:
- *            The goal is to find the shortest path between beginning and
- *          end points, without going through walls, and there are many
- *          ways to solve this problem.
- *            We use a queue to implement a breadth-first search.  Two auxiliary
- *          "image" data structures can be used: one to mark the visited
- *          pixels and one to give the direction to the parent for each
- *          visited pixel.  The first structure is used to avoid putting
- *          pixels on the queue more than once, and the second is used
- *          for retracing back to the origin, like the breadcrumbs in
- *          Hansel and Gretel.  Each pixel taken off the queue is destroyed
- *          after it is used to locate the allowed neighbors.  In fact,
- *          only one distance image is required, if you initialize it
- *          to some value that signifies "not yet visited."  (We use
- *          a binary image for marking visited pixels because it is clearer.)
- *          This method for a simple search of a binary maze is implemented in
- *          pixSearchBinaryMaze().
- *            An alternative method would store the (manhattan) distance
- *          from the start point with each pixel on the queue.  The children
- *          of each pixel get a distance one larger than the parent.  These
- *          values can be stored in an auxiliary distance map image
- *          that is constructed simultaneously with the search.  Once the
- *          end point is reached, the distance map is used to backtrack
- *          along a minimum path.  There may be several equal length
- *          minimum paths, any one of which can be chosen this way.
- * 
- */ -PTA * -pixSearchBinaryMaze(PIX *pixs, - l_int32 xi, - l_int32 yi, - l_int32 xf, - l_int32 yf, - PIX **ppixd) -{ -l_int32 i, j, x, y, w, h, d, found; -l_uint32 val, rpixel, gpixel, bpixel; -void **lines1, **linem1, **linep8, **lined32; -MAZEEL *el, *elp; -PIX *pixd; /* the shortest path written on the maze image */ -PIX *pixm; /* for bookkeeping, to indicate pixels already visited */ -PIX *pixp; /* for bookkeeping, to indicate direction to parent */ -L_QUEUE *lq; -PTA *pta; - - PROCNAME("pixSearchBinaryMaze"); - - if (ppixd) *ppixd = NULL; - if (!pixs) - return (PTA *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1) - return (PTA *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (xi <= 0 || xi >= w) - return (PTA *)ERROR_PTR("xi not valid", procName, NULL); - if (yi <= 0 || yi >= h) - return (PTA *)ERROR_PTR("yi not valid", procName, NULL); - pixGetPixel(pixs, xi, yi, &val); - if (val != 0) - return (PTA *)ERROR_PTR("(xi,yi) not bg pixel", procName, NULL); - pixd = NULL; - pta = NULL; - - /* Find a bg pixel near input point (xf, yf) */ - localSearchForBackground(pixs, &xf, &yf, 5); - -#if DEBUG_MAZE - lept_stderr("(xi, yi) = (%d, %d), (xf, yf) = (%d, %d)\n", - xi, yi, xf, yf); -#endif /* DEBUG_MAZE */ - - pixm = pixCreate(w, h, 1); /* initialized to OFF */ - pixp = pixCreate(w, h, 8); /* direction to parent stored as enum val */ - lines1 = pixGetLinePtrs(pixs, NULL); - linem1 = pixGetLinePtrs(pixm, NULL); - linep8 = pixGetLinePtrs(pixp, NULL); - - lq = lqueueCreate(0); - - /* Prime the queue with the first pixel; it is OFF */ - el = mazeelCreate(xi, yi, 0); /* don't need direction here */ - pixSetPixel(pixm, xi, yi, 1); /* mark visited */ - lqueueAdd(lq, el); - - /* Fill up the pix storing directions to parents, - * stopping when we hit the point (xf, yf) */ - found = FALSE; - while (lqueueGetCount(lq) > 0) { - elp = (MAZEEL *)lqueueRemove(lq); - x = elp->x; - y = elp->y; - if (x == xf && y == yf) { - found = TRUE; - LEPT_FREE(elp); - break; - } - - if (x > 0) { /* check to west */ - val = GET_DATA_BIT(linem1[y], x - 1); - if (val == 0) { /* not yet visited */ - SET_DATA_BIT(linem1[y], x - 1); /* mark visited */ - val = GET_DATA_BIT(lines1[y], x - 1); - if (val == 0) { /* bg, not a wall */ - SET_DATA_BYTE(linep8[y], x - 1, DIR_EAST); /* parent E */ - el = mazeelCreate(x - 1, y, 0); - lqueueAdd(lq, el); - } - } - } - if (y > 0) { /* check north */ - val = GET_DATA_BIT(linem1[y - 1], x); - if (val == 0) { /* not yet visited */ - SET_DATA_BIT(linem1[y - 1], x); /* mark visited */ - val = GET_DATA_BIT(lines1[y - 1], x); - if (val == 0) { /* bg, not a wall */ - SET_DATA_BYTE(linep8[y - 1], x, DIR_SOUTH); /* parent S */ - el = mazeelCreate(x, y - 1, 0); - lqueueAdd(lq, el); - } - } - } - if (x < w - 1) { /* check east */ - val = GET_DATA_BIT(linem1[y], x + 1); - if (val == 0) { /* not yet visited */ - SET_DATA_BIT(linem1[y], x + 1); /* mark visited */ - val = GET_DATA_BIT(lines1[y], x + 1); - if (val == 0) { /* bg, not a wall */ - SET_DATA_BYTE(linep8[y], x + 1, DIR_WEST); /* parent W */ - el = mazeelCreate(x + 1, y, 0); - lqueueAdd(lq, el); - } - } - } - if (y < h - 1) { /* check south */ - val = GET_DATA_BIT(linem1[y + 1], x); - if (val == 0) { /* not yet visited */ - SET_DATA_BIT(linem1[y + 1], x); /* mark visited */ - val = GET_DATA_BIT(lines1[y + 1], x); - if (val == 0) { /* bg, not a wall */ - SET_DATA_BYTE(linep8[y + 1], x, DIR_NORTH); /* parent N */ - el = mazeelCreate(x, y + 1, 0); - lqueueAdd(lq, el); - } - } - } - LEPT_FREE(elp); - } - - lqueueDestroy(&lq, TRUE); - pixDestroy(&pixm); - LEPT_FREE(linem1); - - if (ppixd) { - pixd = pixUnpackBinary(pixs, 32, 1); - *ppixd = pixd; - } - composeRGBPixel(255, 0, 0, &rpixel); /* start point */ - composeRGBPixel(0, 255, 0, &gpixel); - composeRGBPixel(0, 0, 255, &bpixel); /* end point */ - - if (found) { - L_INFO(" Path found\n", procName); - pta = ptaCreate(0); - x = xf; - y = yf; - while (1) { - ptaAddPt(pta, x, y); - if (x == xi && y == yi) - break; - if (pixd) /* write 'gpixel' onto the path */ - pixSetPixel(pixd, x, y, gpixel); - pixGetPixel(pixp, x, y, &val); - if (val == DIR_NORTH) - y--; - else if (val == DIR_SOUTH) - y++; - else if (val == DIR_EAST) - x++; - else if (val == DIR_WEST) - x--; - } - } else { - L_INFO(" No path found\n", procName); - if (pixd) { /* paint all visited locations */ - lined32 = pixGetLinePtrs(pixd, NULL); - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - if (GET_DATA_BYTE(linep8[i], j) != 0) - SET_DATA_FOUR_BYTES(lined32[i], j, gpixel); - } - } - LEPT_FREE(lined32); - } - } - if (pixd) { - pixSetPixel(pixd, xi, yi, rpixel); - pixSetPixel(pixd, xf, yf, bpixel); - } - - pixDestroy(&pixp); - LEPT_FREE(lines1); - LEPT_FREE(linep8); - return pta; -} - - -/*! - * \brief localSearchForBackground() - * - * \param[in] pix - * \param[out] px, py starting position for search; return found position - * \param[in] maxrad max distance to search from starting location - * \return 0 if bg pixel found; 1 if not found - */ -static l_int32 -localSearchForBackground(PIX *pix, - l_int32 *px, - l_int32 *py, - l_int32 maxrad) -{ -l_int32 x, y, w, h, r, i, j; -l_uint32 val; - - x = *px; - y = *py; - pixGetPixel(pix, x, y, &val); - if (val == 0) return 0; - - /* For each value of r, restrict the search to the boundary - * pixels in a square centered on (x,y), clipping to the - * image boundaries if necessary. */ - pixGetDimensions(pix, &w, &h, NULL); - for (r = 1; r < maxrad; r++) { - for (i = -r; i <= r; i++) { - if (y + i < 0 || y + i >= h) - continue; - for (j = -r; j <= r; j++) { - if (x + j < 0 || x + j >= w) - continue; - if (L_ABS(i) != r && L_ABS(j) != r) /* not on "r ring" */ - continue; - pixGetPixel(pix, x + j, y + i, &val); - if (val == 0) { - *px = x + j; - *py = y + i; - return 0; - } - } - } - } - return 1; -} - - - -/*---------------------------------------------------------------------* - * Gray maze search * - *---------------------------------------------------------------------*/ -/*! - * \brief pixSearchGrayMaze() - * - * \param[in] pixs 1 bpp, maze - * \param[in] xi, yi beginning point; use same initial point - * that was used to generate the maze - * \param[in] xf, yf end point, or close to it - * \param[out] ppixd [optional] maze with path illustrated, or - * if no path possible, the part of the maze - * that was searched - * \return pta shortest path, or NULL if either no path - * exists or on error - * - * Commentary: - * Consider first a slight generalization of the binary maze - * search problem. Suppose that you can go through walls, - * but the cost is higher say, an increment of 3 to go into - * a wall pixel rather than 1? You're still trying to find - * the shortest path. One way to do this is with an ordered - * queue, and a simple way to visualize an ordered queue is as - * a set of stacks, each stack being marked with the distance - * of each pixel in the stack from the start. We place the - * start pixel in stack 0, pop it, and process its 4 children. - * Each pixel is given a distance that is incremented from that - * of its parent 0 in this case, depending on if it is a wall - * pixel or not. That value may be recorded on a distance map, - * according to the algorithm below. For children of the first - * pixel, those not on a wall go in stack 1, and wall - * children go in stack 3. Stack 0 being emptied, the process - * then continues with pixels being popped from stack 1. - * Here is the algorithm for each child pixel. The pixel's - * distance value, were it to be placed on a stack, is compared - * with the value for it that is on the distance map. There - * are three possible cases: - * 1 If the pixel has not yet been registered, it is pushed - * on its stack and the distance is written to the map. - * 2 If it has previously been registered with a higher distance, - * the distance on the map is relaxed to that of the - * current pixel, which is then placed on its stack. - * 3 If it has previously been registered with an equal - * or lower value, the pixel is discarded. - * The pixels are popped and processed successively from - * stack 1, and when stack 1 is empty, popping starts on stack 2. - * This continues until the destination pixel is popped off - * a stack. The minimum path is then derived from the distance map, - * going back from the end point as before. This is just Dijkstra's - * algorithm for a directed graph; here, the underlying graph - * consisting of the pixels and four edges connecting each pixel - * to its 4-neighbor is a special case of a directed graph, where - * each edge is bi-directional. The implementation of this generalized - * maze search is left as an exercise to the reader. - * - * Let's generalize a bit further. Suppose the "maze" is just - * a grayscale image -- think of it as an elevation map. The cost - * of moving on this surface depends on the height, or the gradient, - * or whatever you want. All that is required is that the cost - * is specified and non-negative on each link between adjacent - * pixels. Now the problem becomes: find the least cost path - * moving on this surface between two specified end points. - * For example, if the cost across an edge between two pixels - * depends on the "gradient", you can use: - * cost = 1 + L_ABSdeltaV - * where deltaV is the difference in value between two adjacent - * pixels. If the costs are all integers, we can still use an array - * of stacks to avoid ordering the queue e.g., by using a heap sort. - * This is a neat problem, because you don't even have to build a - * maze -- you can can use it on any grayscale image! - * - * Rather than using an array of stacks, a more practical - * approach is to implement with a priority queue, which is - * a queue that is sorted so that the elements with the largest - * or smallest key values always come off first. The - * priority queue is efficiently implemented as a heap, and - * this is how we do it. Suppose you run the algorithm - * using a priority queue, doing the bookkeeping with an - * auxiliary image data structure that saves the distance of - * each pixel put on the queue as before, according to the method - * described above. We implement it as a 2-way choice by - * initializing the distance array to a large value and putting - * a pixel on the queue if its distance is less than the value - * found on the array. When you finally pop the end pixel from - * the queue, you're done, and you can trace the path backward, - * either always going downhill or using an auxiliary image to - * give you the direction to go at each step. This is implemented - * here in searchGrayMaze. - * - * Do we really have to use a sorted queue? Can we solve this - * generalized maze with an unsorted queue of pixels? Or even - * an unsorted stack, doing a depth-first search (DFS)? - * Consider a different algorithm for this generalized maze, where - * we travel again breadth first, but this time use a single, - * unsorted queue. An auxiliary image is used as before to - * store the distances and to determine if pixels get pushed - * on the stack or dropped. As before, we must allow pixels - * to be revisited, with relaxation of the distance if a shorter - * path arrives later. As a result, we will in general have - * multiple instances of the same pixel on the stack with different - * distances. However, because the queue is not ordered, some of - * these pixels will be popped when another instance with a lower - * distance is still on the stack. Here, we're just popping them - * in the order they go on, rather than setting up a priority - * based on minimum distance. Thus, unlike the priority queue, - * when a pixel is popped we have to check the distance map to - * see if a pixel with a lower distance has been put on the queue, - * and, if so, we discard the pixel we just popped. So the - * "while" loop looks like this: - * ~ pop a pixel from the queue - * ~ check its distance against the distance stored in the - * distance map; if larger, discard - * ~ otherwise, for each of its neighbors: - * ~ compute its distance from the start pixel - * ~ compare this distance with that on the distance map: - * ~ if the distance map value higher, relax the distance - * and push the pixel on the queue - * ~ if the distance map value is lower, discard the pixel - * - * How does this loop terminate? Before, with an ordered queue, - * it terminates when you pop the end pixel. But with an unordered - * queue or stack, the first time you hit the end pixel, the - * distance is not guaranteed to be correct, because the pixels - * along the shortest path may not have yet been visited and relaxed. - * Because the shortest path can theoretically go anywhere, - * we must keep going. How do we know when to stop? Dijkstra - * uses an ordered queue to systematically remove nodes from - * further consideration. Each time a pixel is popped, we're - * done with it; it's "finalized" in the Dijkstra sense because - * we know the shortest path to it. However, with an unordered - * queue, the brute force answer is: stop when the queue - * or stack is empty, because then every pixel in the image - * has been assigned its minimum "distance" from the start pixel. - * - * This is similar to the situation when you use a stack for the - * simpler uniform-step problem: with breadth-first search BFS - * the pixels on the queue are automatically ordered, so you are - * done when you locate the end pixel as a neighbor of a popped pixel; - * whereas depth-first search DFS, using a stack, requires, - * in general, a search of every accessible pixel. Further, if - * a pixel is revisited with a smaller distance, that distance is - * recorded and the pixel is put on the stack again. - * - * But surely, you ask, can't we stop sooner? What if the - * start and end pixels are very close to each other? - * OK, suppose they are, and you have very high walls and a - * long snaking level path that is actually the minimum cost. - * That long path can wind back and forth across the entire - * maze many times before ending up at the end point, which - * could be just over a wall from the start. With the unordered - * queue, you very quickly get a high distance for the end - * pixel, which will be relaxed to the minimum distance only - * after all the pixels of the path have been visited and placed - * on the queue, multiple times for many of them. So that's the - * price for not ordering the queue! - */ -PTA * -pixSearchGrayMaze(PIX *pixs, - l_int32 xi, - l_int32 yi, - l_int32 xf, - l_int32 yf, - PIX **ppixd) -{ -l_int32 x, y, w, h, d; -l_uint32 val, valr, vals, rpixel, gpixel, bpixel; -void **lines8, **liner32, **linep8; -l_int32 cost, dist, distparent, sival, sivals; -MAZEEL *el, *elp; -PIX *pixd; /* optionally plot the path on this RGB version of pixs */ -PIX *pixr; /* for bookkeeping, to indicate the minimum distance */ - /* to pixels already visited */ -PIX *pixp; /* for bookkeeping, to indicate direction to parent */ -L_HEAP *lh; -PTA *pta; - - PROCNAME("pixSearchGrayMaze"); - - if (ppixd) *ppixd = NULL; - if (!pixs) - return (PTA *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PTA *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (xi <= 0 || xi >= w) - return (PTA *)ERROR_PTR("xi not valid", procName, NULL); - if (yi <= 0 || yi >= h) - return (PTA *)ERROR_PTR("yi not valid", procName, NULL); - pixd = NULL; - pta = NULL; - - /* Allocate stuff */ - pixr = pixCreate(w, h, 32); - pixSetAll(pixr); /* initialize to max value */ - pixp = pixCreate(w, h, 8); /* direction to parent stored as enum val */ - lines8 = pixGetLinePtrs(pixs, NULL); - linep8 = pixGetLinePtrs(pixp, NULL); - liner32 = pixGetLinePtrs(pixr, NULL); - lh = lheapCreate(0, L_SORT_INCREASING); /* always remove closest pixels */ - - /* Prime the heap with the first pixel */ - pixGetPixel(pixs, xi, yi, &val); - el = mazeelCreate(xi, yi, 0); /* don't need direction here */ - el->distance = 0; - pixGetPixel(pixs, xi, yi, &val); - el->val = val; - pixSetPixel(pixr, xi, yi, 0); /* distance is 0 */ - lheapAdd(lh, el); - - /* Breadth-first search with priority queue (implemented by - a heap), labeling direction to parents in pixp and minimum - distance to visited pixels in pixr. Stop when we pull - the destination point (xf, yf) off the queue. */ - while (lheapGetCount(lh) > 0) { - elp = (MAZEEL *)lheapRemove(lh); - if (!elp) { - L_ERROR("heap broken!!\n", procName); - goto cleanup_stuff; - } - x = elp->x; - y = elp->y; - if (x == xf && y == yf) { /* exit condition */ - LEPT_FREE(elp); - break; - } - distparent = (l_int32)elp->distance; - val = elp->val; - sival = val; - - if (x > 0) { /* check to west */ - vals = GET_DATA_BYTE(lines8[y], x - 1); - valr = GET_DATA_FOUR_BYTES(liner32[y], x - 1); - sivals = (l_int32)vals; - cost = 1 + L_ABS(sivals - sival); /* cost to move to this pixel */ - dist = distparent + cost; - if (dist < valr) { /* shortest path so far to this pixel */ - SET_DATA_FOUR_BYTES(liner32[y], x - 1, dist); /* new dist */ - SET_DATA_BYTE(linep8[y], x - 1, DIR_EAST); /* parent to E */ - el = mazeelCreate(x - 1, y, 0); - el->val = vals; - el->distance = dist; - lheapAdd(lh, el); - } - } - if (y > 0) { /* check north */ - vals = GET_DATA_BYTE(lines8[y - 1], x); - valr = GET_DATA_FOUR_BYTES(liner32[y - 1], x); - sivals = (l_int32)vals; - cost = 1 + L_ABS(sivals - sival); /* cost to move to this pixel */ - dist = distparent + cost; - if (dist < valr) { /* shortest path so far to this pixel */ - SET_DATA_FOUR_BYTES(liner32[y - 1], x, dist); /* new dist */ - SET_DATA_BYTE(linep8[y - 1], x, DIR_SOUTH); /* parent to S */ - el = mazeelCreate(x, y - 1, 0); - el->val = vals; - el->distance = dist; - lheapAdd(lh, el); - } - } - if (x < w - 1) { /* check east */ - vals = GET_DATA_BYTE(lines8[y], x + 1); - valr = GET_DATA_FOUR_BYTES(liner32[y], x + 1); - sivals = (l_int32)vals; - cost = 1 + L_ABS(sivals - sival); /* cost to move to this pixel */ - dist = distparent + cost; - if (dist < valr) { /* shortest path so far to this pixel */ - SET_DATA_FOUR_BYTES(liner32[y], x + 1, dist); /* new dist */ - SET_DATA_BYTE(linep8[y], x + 1, DIR_WEST); /* parent to W */ - el = mazeelCreate(x + 1, y, 0); - el->val = vals; - el->distance = dist; - lheapAdd(lh, el); - } - } - if (y < h - 1) { /* check south */ - vals = GET_DATA_BYTE(lines8[y + 1], x); - valr = GET_DATA_FOUR_BYTES(liner32[y + 1], x); - sivals = (l_int32)vals; - cost = 1 + L_ABS(sivals - sival); /* cost to move to this pixel */ - dist = distparent + cost; - if (dist < valr) { /* shortest path so far to this pixel */ - SET_DATA_FOUR_BYTES(liner32[y + 1], x, dist); /* new dist */ - SET_DATA_BYTE(linep8[y + 1], x, DIR_NORTH); /* parent to N */ - el = mazeelCreate(x, y + 1, 0); - el->val = vals; - el->distance = dist; - lheapAdd(lh, el); - } - } - LEPT_FREE(elp); - } - - lheapDestroy(&lh, TRUE); - - if (ppixd) { - pixd = pixConvert8To32(pixs); - *ppixd = pixd; - } - composeRGBPixel(255, 0, 0, &rpixel); /* start point */ - composeRGBPixel(0, 255, 0, &gpixel); - composeRGBPixel(0, 0, 255, &bpixel); /* end point */ - - x = xf; - y = yf; - pta = ptaCreate(0); - while (1) { /* write path onto pixd */ - ptaAddPt(pta, x, y); - if (x == xi && y == yi) - break; - if (pixd) - pixSetPixel(pixd, x, y, gpixel); - pixGetPixel(pixp, x, y, &val); - if (val == DIR_NORTH) - y--; - else if (val == DIR_SOUTH) - y++; - else if (val == DIR_EAST) - x++; - else if (val == DIR_WEST) - x--; - pixGetPixel(pixr, x, y, &val); - -#if DEBUG_PATH - lept_stderr("(x,y) = (%d, %d); dist = %d\n", x, y, val); -#endif /* DEBUG_PATH */ - - } - if (pixd) { - pixSetPixel(pixd, xi, yi, rpixel); - pixSetPixel(pixd, xf, yf, bpixel); - } - -cleanup_stuff: - lheapDestroy(&lh, TRUE); - pixDestroy(&pixp); - pixDestroy(&pixr); - LEPT_FREE(lines8); - LEPT_FREE(linep8); - LEPT_FREE(liner32); - return pta; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/morph.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/morph.c deleted file mode 100644 index 019ba3cc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/morph.c +++ /dev/null @@ -1,1915 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file morph.c - *
- *
- *     Generic binary morphological ops implemented with rasterop
- *         PIX     *pixDilate()
- *         PIX     *pixErode()
- *         PIX     *pixHMT()
- *         PIX     *pixOpen()
- *         PIX     *pixClose()
- *         PIX     *pixCloseSafe()
- *         PIX     *pixOpenGeneralized()
- *         PIX     *pixCloseGeneralized()
- *
- *     Binary morphological (raster) ops with brick Sels
- *         PIX     *pixDilateBrick()
- *         PIX     *pixErodeBrick()
- *         PIX     *pixOpenBrick()
- *         PIX     *pixCloseBrick()
- *         PIX     *pixCloseSafeBrick()
- *
- *     Binary composed morphological (raster) ops with brick Sels
- *         l_int32  selectComposableSels()
- *         l_int32  selectComposableSizes()
- *         PIX     *pixDilateCompBrick()
- *         PIX     *pixErodeCompBrick()
- *         PIX     *pixOpenCompBrick()
- *         PIX     *pixCloseCompBrick()
- *         PIX     *pixCloseSafeCompBrick()
- *
- *     Functions associated with boundary conditions
- *         void     resetMorphBoundaryCondition()
- *         l_int32  getMorphBorderPixelColor()
- *
- *     Static helpers for arg processing
- *         static PIX     *processMorphArgs1()
- *         static PIX     *processMorphArgs2()
- *
- *  You are provided with many simple ways to do binary morphology.
- *  In particular, if you are using brick Sels, there are six
- *  convenient methods, all specially tailored for separable operations
- *  on brick Sels.  A "brick" Sel is a Sel that is a rectangle
- *  of solid SEL_HITs with the origin at or near the center.
- *  Note that a brick Sel can have one dimension of size 1.
- *  This is very common.  All the brick Sel operations are
- *  separable, meaning the operation is done first in the horizontal
- *  direction and then in the vertical direction.  If one of the
- *  dimensions is 1, this is a special case where the operation is
- *  only performed in the other direction.
- *
- *  These six brick Sel methods are enumerated as follows:
- *
- *  (1) Brick Sels: pix*Brick(), where * = {Dilate, Erode, Open, Close}.
- *      These are separable rasterop implementations.  The Sels are
- *      automatically generated, used, and destroyed at the end.
- *      You can get the result as a new Pix, in-place back into the src Pix,
- *      or written to another existing Pix.
- *
- *  (2) Brick Sels: pix*CompBrick(), where * = {Dilate, Erode, Open, Close}.
- *      These are separable, 2-way composite, rasterop implementations.
- *      The Sels are automatically generated, used, and destroyed at the end.
- *      You can get the result as a new Pix, in-place back into the src Pix,
- *      or written to another existing Pix.  For large Sels, these are
- *      considerably faster than the corresponding pix*Brick() functions.
- *      N.B.:  The size of the Sels that are actually used are typically
- *      close to, but not exactly equal to, the size input to the function.
- *
- *  (3) Brick Sels: pix*BrickDwa(), where * = {Dilate, Erode, Open, Close}.
- *      These are separable dwa (destination word accumulation)
- *      implementations.  They use auto-gen'd dwa code.  You can get
- *      the result as a new Pix, in-place back into the src Pix,
- *      or written to another existing Pix.  This is typically
- *      about 3x faster than the analogous rasterop pix*Brick()
- *      function, but it has the limitation that the Sel size must
- *      be less than 63.  This is pre-set to work on a number
- *      of pre-generated Sels.  If you want to use other Sels, the
- *      code can be auto-gen'd for them; see the instructions in morphdwa.c.
- *
- *  (4) Same as (1), but you run it through pixMorphSequence(), with
- *      the sequence string either compiled in or generated using snprintf.
- *      All intermediate images and Sels are created, used and destroyed.
- *      You always get the result as a new Pix.  For example, you can
- *      specify a separable 11 x 17 brick opening as "o11.17",
- *      or you can specify the horizontal and vertical operations
- *      explicitly as "o11.1 + o1.11".  See morphseq.c for details.
- *
- *  (5) Same as (2), but you run it through pixMorphCompSequence(), with
- *      the sequence string either compiled in or generated using snprintf.
- *      All intermediate images and Sels are created, used and destroyed.
- *      You always get the result as a new Pix.  See morphseq.c for details.
- *
- *  (6) Same as (3), but you run it through pixMorphSequenceDwa(), with
- *      the sequence string either compiled in or generated using snprintf.
- *      All intermediate images and Sels are created, used and destroyed.
- *      You always get the result as a new Pix.  See morphseq.c for details.
- *
- *  If you are using Sels that are not bricks, you have two choices:
- *      (a) simplest: use the basic rasterop implementations (pixDilate(), ...)
- *      (b) fastest: generate the destination word accumumlation (dwa)
- *          code for your Sels and compile it with the library.
- *
- *      For an example, see flipdetect.c, which gives implementations
- *      using hit-miss Sels with both the rasterop and dwa versions.
- *      For the latter, the dwa code resides in fliphmtgen.c, and it
- *      was generated by prog/flipselgen.c.  Both the rasterop and dwa
- *      implementations are tested by prog/fliptest.c.
- *
- *  A global constant MORPH_BC is used to set the boundary conditions
- *  for rasterop-based binary morphology.  MORPH_BC, in morph.c,
- *  is set by default to ASYMMETRIC_MORPH_BC for a non-symmetric
- *  convention for boundary pixels in dilation and erosion:
- *      All pixels outside the image are assumed to be OFF
- *      for both dilation and erosion.
- *  To use a symmetric definition, see comments in pixErode()
- *  and reset MORPH_BC to SYMMETRIC_MORPH_BC, using
- *  resetMorphBoundaryCondition().
- *
- *  Boundary artifacts are possible in closing when the non-symmetric
- *  boundary conditions are used, because foreground pixels very close
- *  to the edge can be removed.  This can be avoided by using either
- *  the symmetric boundary conditions or the function pixCloseSafe(),
- *  which adds a border before the operation and removes it afterwards.
- *
- *  The hit-miss transform (HMT) is the bit-and of 2 erosions:
- *     (erosion of the src by the hits)  &  (erosion of the bit-inverted
- *                                           src by the misses)
- *
- *  The 'generalized opening' is an HMT followed by a dilation that uses
- *  only the hits of the hit-miss Sel.
- *  The 'generalized closing' is a dilation (again, with the hits
- *  of a hit-miss Sel), followed by the HMT.
- *  Both of these 'generalized' functions are idempotent.
- *
- *  These functions are extensively tested in prog/binmorph1_reg.c,
- *  prog/binmorph2_reg.c, and prog/binmorph3_reg.c.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Global constant; initialized here; must be declared extern - * in other files to access it directly. However, in most - * cases that is not necessary, because it can be reset - * using resetMorphBoundaryCondition(). */ -LEPT_DLL l_int32 MORPH_BC = ASYMMETRIC_MORPH_BC; - - /* We accept this cost in extra rasterops for decomposing exactly. */ -static const l_int32 ACCEPTABLE_COST = 5; - - /* Static helpers for arg processing */ -static PIX * processMorphArgs1(PIX *pixd, PIX *pixs, SEL *sel, PIX **ppixt); -static PIX * processMorphArgs2(PIX *pixd, PIX *pixs, SEL *sel); - - -/*-----------------------------------------------------------------* - * Generic binary morphological ops implemented with rasterop * - *-----------------------------------------------------------------*/ -/*! - * \brief pixDilate() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] sel - * \return pixd - * - *
- * Notes:
- *      (1) This dilates src using hits in Sel.
- *      (2) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (3) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixDilate(NULL, pixs, ...);
- *          (b) pixDilate(pixs, pixs, ...);
- *          (c) pixDilate(pixd, pixs, ...);
- *      (4) The size of the result is determined by pixs.
- * 
- */ -PIX * -pixDilate(PIX *pixd, - PIX *pixs, - SEL *sel) -{ -l_int32 i, j, w, h, sx, sy, cx, cy, seldata; -PIX *pixt; - - PROCNAME("pixDilate"); - - if ((pixd = processMorphArgs1(pixd, pixs, sel, &pixt)) == NULL) - return (PIX *)ERROR_PTR("processMorphArgs1 failed", procName, pixd); - - pixGetDimensions(pixs, &w, &h, NULL); - selGetParameters(sel, &sy, &sx, &cy, &cx); - pixClearAll(pixd); - for (i = 0; i < sy; i++) { - for (j = 0; j < sx; j++) { - seldata = sel->data[i][j]; - if (seldata == 1) { /* src | dst */ - pixRasterop(pixd, j - cx, i - cy, w, h, PIX_SRC | PIX_DST, - pixt, 0, 0); - } - } - } - - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixErode() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] sel - * \return pixd - * - *
- * Notes:
- *      (1) This erodes src using hits in Sel.
- *      (2) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (3) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixErode(NULL, pixs, ...);
- *          (b) pixErode(pixs, pixs, ...);
- *          (c) pixErode(pixd, pixs, ...);
- *      (4) The size of the result is determined by pixs.
- * 
- */ -PIX * -pixErode(PIX *pixd, - PIX *pixs, - SEL *sel) -{ -l_int32 i, j, w, h, sx, sy, cx, cy, seldata; -l_int32 xp, yp, xn, yn; -PIX *pixt; - - PROCNAME("pixErode"); - - if ((pixd = processMorphArgs1(pixd, pixs, sel, &pixt)) == NULL) - return (PIX *)ERROR_PTR("processMorphArgs1 failed", procName, pixd); - - pixGetDimensions(pixs, &w, &h, NULL); - selGetParameters(sel, &sy, &sx, &cy, &cx); - pixSetAll(pixd); - for (i = 0; i < sy; i++) { - for (j = 0; j < sx; j++) { - seldata = sel->data[i][j]; - if (seldata == 1) { /* src & dst */ - pixRasterop(pixd, cx - j, cy - i, w, h, PIX_SRC & PIX_DST, - pixt, 0, 0); - } - } - } - - /* Clear near edges. We do this for the asymmetric boundary - * condition convention that implements erosion assuming all - * pixels surrounding the image are OFF. If you use a - * use a symmetric b.c. convention, where the erosion is - * implemented assuming pixels surrounding the image - * are ON, these operations are omitted. */ - if (MORPH_BC == ASYMMETRIC_MORPH_BC) { - selFindMaxTranslations(sel, &xp, &yp, &xn, &yn); - if (xp > 0) - pixRasterop(pixd, 0, 0, xp, h, PIX_CLR, NULL, 0, 0); - if (xn > 0) - pixRasterop(pixd, w - xn, 0, xn, h, PIX_CLR, NULL, 0, 0); - if (yp > 0) - pixRasterop(pixd, 0, 0, w, yp, PIX_CLR, NULL, 0, 0); - if (yn > 0) - pixRasterop(pixd, 0, h - yn, w, yn, PIX_CLR, NULL, 0, 0); - } - - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixHMT() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] sel - * \return pixd - * - *
- * Notes:
- *      (1) The hit-miss transform erodes the src, using both hits
- *          and misses in the Sel.  It ANDs the shifted src for hits
- *          and ANDs the inverted shifted src for misses.
- *      (2) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (3) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixHMT(NULL, pixs, ...);
- *          (b) pixHMT(pixs, pixs, ...);
- *          (c) pixHMT(pixd, pixs, ...);
- *      (4) The size of the result is determined by pixs.
- * 
- */ -PIX * -pixHMT(PIX *pixd, - PIX *pixs, - SEL *sel) -{ -l_int32 i, j, w, h, sx, sy, cx, cy, firstrasterop, seldata; -l_int32 xp, yp, xn, yn; -PIX *pixt; - - PROCNAME("pixHMT"); - - if ((pixd = processMorphArgs1(pixd, pixs, sel, &pixt)) == NULL) - return (PIX *)ERROR_PTR("processMorphArgs1 failed", procName, pixd); - - pixGetDimensions(pixs, &w, &h, NULL); - selGetParameters(sel, &sy, &sx, &cy, &cx); - firstrasterop = TRUE; - for (i = 0; i < sy; i++) { - for (j = 0; j < sx; j++) { - seldata = sel->data[i][j]; - if (seldata == 1) { /* hit */ - if (firstrasterop == TRUE) { /* src only */ - pixClearAll(pixd); - pixRasterop(pixd, cx - j, cy - i, w, h, PIX_SRC, - pixt, 0, 0); - firstrasterop = FALSE; - } else { /* src & dst */ - pixRasterop(pixd, cx - j, cy - i, w, h, PIX_SRC & PIX_DST, - pixt, 0, 0); - } - } else if (seldata == 2) { /* miss */ - if (firstrasterop == TRUE) { /* ~src only */ - pixSetAll(pixd); - pixRasterop(pixd, cx - j, cy - i, w, h, PIX_NOT(PIX_SRC), - pixt, 0, 0); - firstrasterop = FALSE; - } else { /* ~src & dst */ - pixRasterop(pixd, cx - j, cy - i, w, h, - PIX_NOT(PIX_SRC) & PIX_DST, - pixt, 0, 0); - } - } - } - } - - /* Clear near edges */ - selFindMaxTranslations(sel, &xp, &yp, &xn, &yn); - if (xp > 0) - pixRasterop(pixd, 0, 0, xp, h, PIX_CLR, NULL, 0, 0); - if (xn > 0) - pixRasterop(pixd, w - xn, 0, xn, h, PIX_CLR, NULL, 0, 0); - if (yp > 0) - pixRasterop(pixd, 0, 0, w, yp, PIX_CLR, NULL, 0, 0); - if (yn > 0) - pixRasterop(pixd, 0, h - yn, w, yn, PIX_CLR, NULL, 0, 0); - - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixOpen() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] sel - * \return pixd - * - *
- * Notes:
- *      (1) Generic morphological opening, using hits in the Sel.
- *      (2) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (3) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixOpen(NULL, pixs, ...);
- *          (b) pixOpen(pixs, pixs, ...);
- *          (c) pixOpen(pixd, pixs, ...);
- *      (4) The size of the result is determined by pixs.
- * 
- */ -PIX * -pixOpen(PIX *pixd, - PIX *pixs, - SEL *sel) -{ -PIX *pixt; - - PROCNAME("pixOpen"); - - if ((pixd = processMorphArgs2(pixd, pixs, sel)) == NULL) - return (PIX *)ERROR_PTR("pixd not returned", procName, pixd); - - if ((pixt = pixErode(NULL, pixs, sel)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, pixd); - pixDilate(pixd, pixt, sel); - pixDestroy(&pixt); - - return pixd; -} - - -/*! - * \brief pixClose() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] sel - * \return pixd - * - *
- * Notes:
- *      (1) Generic morphological closing, using hits in the Sel.
- *      (2) This implementation is a strict dual of the opening if
- *          symmetric boundary conditions are used (see notes at top
- *          of this file).
- *      (3) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (4) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixClose(NULL, pixs, ...);
- *          (b) pixClose(pixs, pixs, ...);
- *          (c) pixClose(pixd, pixs, ...);
- *      (5) The size of the result is determined by pixs.
- * 
- */ -PIX * -pixClose(PIX *pixd, - PIX *pixs, - SEL *sel) -{ -PIX *pixt; - - PROCNAME("pixClose"); - - if ((pixd = processMorphArgs2(pixd, pixs, sel)) == NULL) - return (PIX *)ERROR_PTR("pixd not returned", procName, pixd); - - if ((pixt = pixDilate(NULL, pixs, sel)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, pixd); - pixErode(pixd, pixt, sel); - pixDestroy(&pixt); - - return pixd; -} - - -/*! - * \brief pixCloseSafe() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] sel - * \return pixd - * - *
- * Notes:
- *      (1) Generic morphological closing, using hits in the Sel.
- *      (2) If non-symmetric boundary conditions are used, this
- *          function adds a border of OFF pixels that is of
- *          sufficient size to avoid losing pixels from the dilation,
- *          and it removes the border after the operation is finished.
- *          It thus enforces a correct extensive result for closing.
- *      (3) If symmetric b.c. are used, it is not necessary to add
- *          and remove this border.
- *      (4) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (5) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixCloseSafe(NULL, pixs, ...);
- *          (b) pixCloseSafe(pixs, pixs, ...);
- *          (c) pixCloseSafe(pixd, pixs, ...);
- *      (6) The size of the result is determined by pixs.
- * 
- */ -PIX * -pixCloseSafe(PIX *pixd, - PIX *pixs, - SEL *sel) -{ -l_int32 xp, yp, xn, yn, xmax, xbord; -PIX *pixt1, *pixt2; - - PROCNAME("pixCloseSafe"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (!sel) - return (PIX *)ERROR_PTR("sel not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - - /* Symmetric b.c. handles correctly without added pixels */ - if (MORPH_BC == SYMMETRIC_MORPH_BC) - return pixClose(pixd, pixs, sel); - - selFindMaxTranslations(sel, &xp, &yp, &xn, &yn); - xmax = L_MAX(xp, xn); - xbord = 32 * ((xmax + 31) / 32); /* full 32 bit words */ - - if ((pixt1 = pixAddBorderGeneral(pixs, xbord, xbord, yp, yn, 0)) == NULL) - return (PIX *)ERROR_PTR("pixt1 not made", procName, pixd); - pixClose(pixt1, pixt1, sel); - if ((pixt2 = pixRemoveBorderGeneral(pixt1, xbord, xbord, yp, yn)) == NULL) - return (PIX *)ERROR_PTR("pixt2 not made", procName, pixd); - pixDestroy(&pixt1); - - if (!pixd) - return pixt2; - - pixCopy(pixd, pixt2); - pixDestroy(&pixt2); - return pixd; -} - - -/*! - * \brief pixOpenGeneralized() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] sel - * \return pixd - * - *
- * Notes:
- *      (1) Generalized morphological opening, using both hits and
- *          misses in the Sel.
- *      (2) This does a hit-miss transform, followed by a dilation
- *          using the hits.
- *      (3) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (4) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixOpenGeneralized(NULL, pixs, ...);
- *          (b) pixOpenGeneralized(pixs, pixs, ...);
- *          (c) pixOpenGeneralized(pixd, pixs, ...);
- *      (5) The size of the result is determined by pixs.
- * 
- */ -PIX * -pixOpenGeneralized(PIX *pixd, - PIX *pixs, - SEL *sel) -{ -PIX *pixt; - - PROCNAME("pixOpenGeneralized"); - - if ((pixd = processMorphArgs2(pixd, pixs, sel)) == NULL) - return (PIX *)ERROR_PTR("pixd not returned", procName, pixd); - - if ((pixt = pixHMT(NULL, pixs, sel)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, pixd); - pixDilate(pixd, pixt, sel); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixCloseGeneralized() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] sel - * \return pixd - * - *
- * Notes:
- *      (1) Generalized morphological closing, using both hits and
- *          misses in the Sel.
- *      (2) This does a dilation using the hits, followed by a
- *          hit-miss transform.
- *      (3) This operation is a dual of the generalized opening.
- *      (4) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (5) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixCloseGeneralized(NULL, pixs, ...);
- *          (b) pixCloseGeneralized(pixs, pixs, ...);
- *          (c) pixCloseGeneralized(pixd, pixs, ...);
- *      (6) The size of the result is determined by pixs.
- * 
- */ -PIX * -pixCloseGeneralized(PIX *pixd, - PIX *pixs, - SEL *sel) -{ -PIX *pixt; - - PROCNAME("pixCloseGeneralized"); - - if ((pixd = processMorphArgs2(pixd, pixs, sel)) == NULL) - return (PIX *)ERROR_PTR("pixd not returned", procName, pixd); - - if ((pixt = pixDilate(NULL, pixs, sel)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, pixd); - pixHMT(pixd, pixt, sel); - pixDestroy(&pixt); - - return pixd; -} - - -/*-----------------------------------------------------------------* - * Binary morphological (raster) ops with brick Sels * - *-----------------------------------------------------------------*/ -/*! - * \brief pixDilateBrick() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd - * - *
- * Notes:
- *      (1) Sel is a brick with all elements being hits
- *      (2) The origin is at (x, y) = (hsize/2, vsize/2)
- *      (3) Do separably if both hsize and vsize are > 1.
- *      (4) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (5) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixDilateBrick(NULL, pixs, ...);
- *          (b) pixDilateBrick(pixs, pixs, ...);
- *          (c) pixDilateBrick(pixd, pixs, ...);
- *      (6) The size of the result is determined by pixs.
- * 
- */ -PIX * -pixDilateBrick(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -PIX *pixt; -SEL *sel, *selh, *selv; - - PROCNAME("pixDilateBrick"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - if (hsize == 1 || vsize == 1) { /* no intermediate result */ - sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT); - if (!sel) - return (PIX *)ERROR_PTR("sel not made", procName, pixd); - pixd = pixDilate(pixd, pixs, sel); - selDestroy(&sel); - } else { - if ((selh = selCreateBrick(1, hsize, 0, hsize / 2, SEL_HIT)) == NULL) - return (PIX *)ERROR_PTR("selh not made", procName, pixd); - if ((selv = selCreateBrick(vsize, 1, vsize / 2, 0, SEL_HIT)) == NULL) { - selDestroy(&selh); - return (PIX *)ERROR_PTR("selv not made", procName, pixd); - } - pixt = pixDilate(NULL, pixs, selh); - pixd = pixDilate(pixd, pixt, selv); - pixDestroy(&pixt); - selDestroy(&selh); - selDestroy(&selv); - } - - return pixd; -} - - -/*! - * \brief pixErodeBrick() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd - * - *
- * Notes:
- *      (1) Sel is a brick with all elements being hits
- *      (2) The origin is at (x, y) = (hsize/2, vsize/2)
- *      (3) Do separably if both hsize and vsize are > 1.
- *      (4) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (5) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixErodeBrick(NULL, pixs, ...);
- *          (b) pixErodeBrick(pixs, pixs, ...);
- *          (c) pixErodeBrick(pixd, pixs, ...);
- *      (6) The size of the result is determined by pixs.
- * 
- */ -PIX * -pixErodeBrick(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -PIX *pixt; -SEL *sel, *selh, *selv; - - PROCNAME("pixErodeBrick"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - if (hsize == 1 || vsize == 1) { /* no intermediate result */ - sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT); - if (!sel) - return (PIX *)ERROR_PTR("sel not made", procName, pixd); - pixd = pixErode(pixd, pixs, sel); - selDestroy(&sel); - } else { - if ((selh = selCreateBrick(1, hsize, 0, hsize / 2, SEL_HIT)) == NULL) - return (PIX *)ERROR_PTR("selh not made", procName, pixd); - if ((selv = selCreateBrick(vsize, 1, vsize / 2, 0, SEL_HIT)) == NULL) { - selDestroy(&selh); - return (PIX *)ERROR_PTR("selv not made", procName, pixd); - } - pixt = pixErode(NULL, pixs, selh); - pixd = pixErode(pixd, pixt, selv); - pixDestroy(&pixt); - selDestroy(&selh); - selDestroy(&selv); - } - - return pixd; -} - - -/*! - * \brief pixOpenBrick() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Sel is a brick with all elements being hits
- *      (2) The origin is at (x, y) = (hsize/2, vsize/2)
- *      (3) Do separably if both hsize and vsize are > 1.
- *      (4) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (5) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixOpenBrick(NULL, pixs, ...);
- *          (b) pixOpenBrick(pixs, pixs, ...);
- *          (c) pixOpenBrick(pixd, pixs, ...);
- *      (6) The size of the result is determined by pixs.
- * 
- */ -PIX * -pixOpenBrick(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -PIX *pixt; -SEL *sel, *selh, *selv; - - PROCNAME("pixOpenBrick"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - if (hsize == 1 || vsize == 1) { /* no intermediate result */ - sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT); - if (!sel) - return (PIX *)ERROR_PTR("sel not made", procName, pixd); - pixd = pixOpen(pixd, pixs, sel); - selDestroy(&sel); - } else { /* do separably */ - if ((selh = selCreateBrick(1, hsize, 0, hsize / 2, SEL_HIT)) == NULL) - return (PIX *)ERROR_PTR("selh not made", procName, pixd); - if ((selv = selCreateBrick(vsize, 1, vsize / 2, 0, SEL_HIT)) == NULL) { - selDestroy(&selh); - return (PIX *)ERROR_PTR("selv not made", procName, pixd); - } - pixt = pixErode(NULL, pixs, selh); - pixd = pixErode(pixd, pixt, selv); - pixDilate(pixt, pixd, selh); - pixDilate(pixd, pixt, selv); - pixDestroy(&pixt); - selDestroy(&selh); - selDestroy(&selv); - } - - return pixd; -} - - -/*! - * \brief pixCloseBrick() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Sel is a brick with all elements being hits
- *      (2) The origin is at (x, y) = (hsize/2, vsize/2)
- *      (3) Do separably if both hsize and vsize are > 1.
- *      (4) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (5) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixCloseBrick(NULL, pixs, ...);
- *          (b) pixCloseBrick(pixs, pixs, ...);
- *          (c) pixCloseBrick(pixd, pixs, ...);
- *      (6) The size of the result is determined by pixs.
- * 
- */ -PIX * -pixCloseBrick(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -PIX *pixt; -SEL *sel, *selh, *selv; - - PROCNAME("pixCloseBrick"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - if (hsize == 1 || vsize == 1) { /* no intermediate result */ - sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT); - if (!sel) - return (PIX *)ERROR_PTR("sel not made", procName, pixd); - pixd = pixClose(pixd, pixs, sel); - selDestroy(&sel); - } else { /* do separably */ - if ((selh = selCreateBrick(1, hsize, 0, hsize / 2, SEL_HIT)) == NULL) - return (PIX *)ERROR_PTR("selh not made", procName, pixd); - if ((selv = selCreateBrick(vsize, 1, vsize / 2, 0, SEL_HIT)) == NULL) { - selDestroy(&selh); - return (PIX *)ERROR_PTR("selv not made", procName, pixd); - } - pixt = pixDilate(NULL, pixs, selh); - pixd = pixDilate(pixd, pixt, selv); - pixErode(pixt, pixd, selh); - pixErode(pixd, pixt, selv); - pixDestroy(&pixt); - selDestroy(&selh); - selDestroy(&selv); - } - - return pixd; -} - - -/*! - * \brief pixCloseSafeBrick() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Sel is a brick with all elements being hits
- *      (2) The origin is at (x, y) = (hsize/2, vsize/2)
- *      (3) Do separably if both hsize and vsize are > 1.
- *      (4) Safe closing adds a border of 0 pixels, of sufficient size so
- *          that all pixels in input image are processed within
- *          32-bit words in the expanded image.  As a result, there is
- *          no special processing for pixels near the boundary, and there
- *          are no boundary effects.  The border is removed at the end.
- *      (5) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (6) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixCloseBrick(NULL, pixs, ...);
- *          (b) pixCloseBrick(pixs, pixs, ...);
- *          (c) pixCloseBrick(pixd, pixs, ...);
- *      (7) The size of the result is determined by pixs.
- * 
- */ -PIX * -pixCloseSafeBrick(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -l_int32 maxtrans, bordsize; -PIX *pixsb, *pixt, *pixdb; -SEL *sel, *selh, *selv; - - PROCNAME("pixCloseSafeBrick"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - - /* Symmetric b.c. handles correctly without added pixels */ - if (MORPH_BC == SYMMETRIC_MORPH_BC) - return pixCloseBrick(pixd, pixs, hsize, vsize); - - maxtrans = L_MAX(hsize / 2, vsize / 2); - bordsize = 32 * ((maxtrans + 31) / 32); /* full 32 bit words */ - pixsb = pixAddBorder(pixs, bordsize, 0); - - if (hsize == 1 || vsize == 1) { /* no intermediate result */ - sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT); - if (!sel) { - pixDestroy(&pixsb); - return (PIX *)ERROR_PTR("sel not made", procName, pixd); - } - pixdb = pixClose(NULL, pixsb, sel); - selDestroy(&sel); - } else { /* do separably */ - selh = selCreateBrick(1, hsize, 0, hsize / 2, SEL_HIT); - selv = selCreateBrick(vsize, 1, vsize / 2, 0, SEL_HIT); - if (!selh || !selv) { - selDestroy(&selh); - selDestroy(&selh); - pixDestroy(&pixsb); - return (PIX *)ERROR_PTR("selh and selv not both made", - procName, pixd); - } - pixt = pixDilate(NULL, pixsb, selh); - pixdb = pixDilate(NULL, pixt, selv); - pixErode(pixt, pixdb, selh); - pixErode(pixdb, pixt, selv); - pixDestroy(&pixt); - selDestroy(&selh); - selDestroy(&selv); - } - - pixt = pixRemoveBorder(pixdb, bordsize); - pixDestroy(&pixsb); - pixDestroy(&pixdb); - - if (!pixd) { - pixd = pixt; - } else { - pixCopy(pixd, pixt); - pixDestroy(&pixt); - } - return pixd; -} - - -/*-----------------------------------------------------------------* - * Binary composed morphological (raster) ops with brick Sels * - *-----------------------------------------------------------------*/ -/* \brief selectComposableSels() - * - * \param[in] size of composed sel - * \param[in] direction L_HORIZ, L_VERT - * \param[out] psel1 [optional] contiguous sel; can be null - * \param[out] psel2 [optional] comb sel; can be null - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) When using composable Sels, where the original Sel is
- *          decomposed into two, the best you can do in terms
- *          of reducing the computation is by a factor:
- *
- *               2 * sqrt(size) / size
- *
- *          In practice, you get quite close to this.  E.g.,
- *
- *             Sel size     |   Optimum reduction factor
- *             --------         ------------------------
- *                36        |          1/3
- *                64        |          1/4
- *               144        |          1/6
- *               256        |          1/8
- * 
- */ -l_int32 -selectComposableSels(l_int32 size, - l_int32 direction, - SEL **psel1, - SEL **psel2) -{ -l_int32 factor1, factor2; - - PROCNAME("selectComposableSels"); - - if (!psel1 && !psel2) - return ERROR_INT("neither &sel1 nor &sel2 are defined", procName, 1); - if (psel1) *psel1 = NULL; - if (psel2) *psel2 = NULL; - if (size < 1 || size > 10000) - return ERROR_INT("size < 1 or size > 10000", procName, 1); - if (direction != L_HORIZ && direction != L_VERT) - return ERROR_INT("invalid direction", procName, 1); - - if (selectComposableSizes(size, &factor1, &factor2)) - return ERROR_INT("factors not found", procName, 1); - - if (psel1) { - if (direction == L_HORIZ) - *psel1 = selCreateBrick(1, factor1, 0, factor1 / 2, SEL_HIT); - else - *psel1 = selCreateBrick(factor1, 1, factor1 / 2 , 0, SEL_HIT); - } - if (psel2) - *psel2 = selCreateComb(factor1, factor2, direction); - return 0; -} - - -/*! - * \brief selectComposableSizes() - * - * \param[in] size of sel to be decomposed - * \param[out] pfactor1 larger factor - * \param[out] pfactor2 smaller factor - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This works for Sel sizes up to 10000, which seems sufficient.
- *      (2) The composable sel size is typically within +- 1 of
- *          the requested size.  Up to size = 300, the maximum difference
- *          is +- 2.
- *      (3) We choose an overall cost function where the penalty for
- *          the size difference between input and actual is 4 times
- *          the penalty for additional rasterops.
- *      (4) Returned values: factor1 >= factor2
- *          If size > 1, then factor1 > 1.
- * 
- */ -l_ok -selectComposableSizes(l_int32 size, - l_int32 *pfactor1, - l_int32 *pfactor2) -{ -l_int32 i, midval, val1, val2m, val2p; -l_int32 index, prodm, prodp; -l_int32 mincost, totcost, rastcostm, rastcostp, diffm, diffp; -l_int32 lowval[256]; -l_int32 hival[256]; -l_int32 rastcost[256]; /* excess in sum of sizes (extra rasterops) */ -l_int32 diff[256]; /* diff between product (sel size) and input size */ - - PROCNAME("selectComposableSizes"); - - if (size < 1 || size > 10000) - return ERROR_INT("size < 1 or size > 10000", procName, 1); - if (!pfactor1 || !pfactor2) - return ERROR_INT("&factor1 or &factor2 not defined", procName, 1); - - midval = (l_int32)(sqrt((l_float64)size) + 0.001); - if (midval * midval == size) { - *pfactor1 = *pfactor2 = midval; - return 0; - } - - /* Set up arrays. For each val1, optimize for lowest diff, - * and save the rastcost, the diff, and the two factors. */ - for (val1 = midval + 1, i = 0; val1 > 0; val1--, i++) { - val2m = size / val1; - val2p = val2m + 1; - prodm = val1 * val2m; - prodp = val1 * val2p; - rastcostm = val1 + val2m - 2 * midval; - rastcostp = val1 + val2p - 2 * midval; - diffm = L_ABS(size - prodm); - diffp = L_ABS(size - prodp); - if (diffm <= diffp) { - lowval[i] = L_MIN(val1, val2m); - hival[i] = L_MAX(val1, val2m); - rastcost[i] = rastcostm; - diff[i] = diffm; - } else { - lowval[i] = L_MIN(val1, val2p); - hival[i] = L_MAX(val1, val2p); - rastcost[i] = rastcostp; - diff[i] = diffp; - } - } - - /* Choose the optimum factors; use cost ratio 4 on diff */ - mincost = 10000; - index = 1; /* unimportant initial value */ - for (i = 0; i < midval + 1; i++) { - if (diff[i] == 0 && rastcost[i] < ACCEPTABLE_COST) { - *pfactor1 = hival[i]; - *pfactor2 = lowval[i]; - return 0; - } - totcost = 4 * diff[i] + rastcost[i]; - if (totcost < mincost) { - mincost = totcost; - index = i; - } - } - *pfactor1 = hival[index]; - *pfactor2 = lowval[index]; - - return 0; -} - - -/*! - * \brief pixDilateCompBrick() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Sel is a brick with all elements being hits
- *      (2) The origin is at (x, y) = (hsize/2, vsize/2)
- *      (3) Do compositely for each dimension > 1.
- *      (4) Do separably if both hsize and vsize are > 1.
- *      (5) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (6) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixDilateCompBrick(NULL, pixs, ...);
- *          (b) pixDilateCompBrick(pixs, pixs, ...);
- *          (c) pixDilateCompBrick(pixd, pixs, ...);
- *      (7) The dimensions of the resulting image are determined by pixs.
- *      (8) CAUTION: both hsize and vsize are being decomposed.
- *          The decomposer chooses a product of sizes (call them
- *          'terms') for each that is close to the input size,
- *          but not necessarily equal to it.  It attempts to optimize:
- *             (a) for consistency with the input values: the product
- *                 of terms is close to the input size
- *             (b) for efficiency of the operation: the sum of the
- *                 terms is small; ideally about twice the square
- *                 root of the input size.
- *          So, for example, if the input hsize = 37, which is
- *          a prime number, the decomposer will break this into two
- *          terms, 6 and 6, so that the net result is a dilation
- *          with hsize = 36.
- * 
- */ -PIX * -pixDilateCompBrick(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -PIX *pix1, *pix2, *pix3; -SEL *selh1 = NULL; -SEL *selh2 = NULL; -SEL *selv1 = NULL; -SEL *selv2 = NULL; - - PROCNAME("pixDilateCompBrick"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - if (hsize > 1) { - if (selectComposableSels(hsize, L_HORIZ, &selh1, &selh2)) { - selDestroy(&selh1); - selDestroy(&selh2); - return (PIX *)ERROR_PTR("horiz sels not made", procName, pixd); - } - } - if (vsize > 1) { - if (selectComposableSels(vsize, L_VERT, &selv1, &selv2)) { - selDestroy(&selh1); - selDestroy(&selh2); - selDestroy(&selv1); - selDestroy(&selv2); - return (PIX *)ERROR_PTR("vert sels not made", procName, pixd); - } - } - - pix1 = pixAddBorder(pixs, 32, 0); - if (vsize == 1) { - pix2 = pixDilate(NULL, pix1, selh1); - pix3 = pixDilate(NULL, pix2, selh2); - } else if (hsize == 1) { - pix2 = pixDilate(NULL, pix1, selv1); - pix3 = pixDilate(NULL, pix2, selv2); - } else { - pix2 = pixDilate(NULL, pix1, selh1); - pix3 = pixDilate(NULL, pix2, selh2); - pixDilate(pix2, pix3, selv1); - pixDilate(pix3, pix2, selv2); - } - pixDestroy(&pix1); - pixDestroy(&pix2); - - selDestroy(&selh1); - selDestroy(&selh2); - selDestroy(&selv1); - selDestroy(&selv2); - - pix1 = pixRemoveBorder(pix3, 32); - pixDestroy(&pix3); - if (!pixd) - return pix1; - pixCopy(pixd, pix1); - pixDestroy(&pix1); - return pixd; -} - - -/*! - * \brief pixErodeCompBrick() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Sel is a brick with all elements being hits
- *      (2) The origin is at (x, y) = (hsize/2, vsize/2)
- *      (3) Do compositely for each dimension > 1.
- *      (4) Do separably if both hsize and vsize are > 1.
- *      (5) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (6) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixErodeCompBrick(NULL, pixs, ...);
- *          (b) pixErodeCompBrick(pixs, pixs, ...);
- *          (c) pixErodeCompBrick(pixd, pixs, ...);
- *      (7) The dimensions of the resulting image are determined by pixs.
- *      (8) CAUTION: both hsize and vsize are being decomposed.
- *          The decomposer chooses a product of sizes (call them
- *          'terms') for each that is close to the input size,
- *          but not necessarily equal to it.  It attempts to optimize:
- *             (a) for consistency with the input values: the product
- *                 of terms is close to the input size
- *             (b) for efficiency of the operation: the sum of the
- *                 terms is small; ideally about twice the square
- *                 root of the input size.
- *          So, for example, if the input hsize = 37, which is
- *          a prime number, the decomposer will break this into two
- *          terms, 6 and 6, so that the net result is a dilation
- *          with hsize = 36.
- * 
- */ -PIX * -pixErodeCompBrick(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -PIX *pixt; -SEL *selh1 = NULL; -SEL *selh2 = NULL; -SEL *selv1 = NULL; -SEL *selv2 = NULL; - - PROCNAME("pixErodeCompBrick"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - if (hsize > 1) { - if (selectComposableSels(hsize, L_HORIZ, &selh1, &selh2)) { - selDestroy(&selh1); - selDestroy(&selh2); - return (PIX *)ERROR_PTR("horiz sels not made", procName, pixd); - } - } - if (vsize > 1) { - if (selectComposableSels(vsize, L_VERT, &selv1, &selv2)) { - selDestroy(&selh1); - selDestroy(&selh2); - selDestroy(&selv1); - selDestroy(&selv2); - return (PIX *)ERROR_PTR("vert sels not made", procName, pixd); - } - } - - if (vsize == 1) { - pixt = pixErode(NULL, pixs, selh1); - pixd = pixErode(pixd, pixt, selh2); - } else if (hsize == 1) { - pixt = pixErode(NULL, pixs, selv1); - pixd = pixErode(pixd, pixt, selv2); - } else { - pixt = pixErode(NULL, pixs, selh1); - pixd = pixErode(pixd, pixt, selh2); - pixErode(pixt, pixd, selv1); - pixErode(pixd, pixt, selv2); - } - pixDestroy(&pixt); - - selDestroy(&selh1); - selDestroy(&selh2); - selDestroy(&selv1); - selDestroy(&selv2); - return pixd; -} - - -/*! - * \brief pixOpenCompBrick() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Sel is a brick with all elements being hits
- *      (2) The origin is at (x, y) = (hsize/2, vsize/2)
- *      (3) Do compositely for each dimension > 1.
- *      (4) Do separably if both hsize and vsize are > 1.
- *      (5) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (6) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixOpenCompBrick(NULL, pixs, ...);
- *          (b) pixOpenCompBrick(pixs, pixs, ...);
- *          (c) pixOpenCompBrick(pixd, pixs, ...);
- *      (7) The dimensions of the resulting image are determined by pixs.
- *      (8) CAUTION: both hsize and vsize are being decomposed.
- *          The decomposer chooses a product of sizes (call them
- *          'terms') for each that is close to the input size,
- *          but not necessarily equal to it.  It attempts to optimize:
- *             (a) for consistency with the input values: the product
- *                 of terms is close to the input size
- *             (b) for efficiency of the operation: the sum of the
- *                 terms is small; ideally about twice the square
- *                 root of the input size.
- *          So, for example, if the input hsize = 37, which is
- *          a prime number, the decomposer will break this into two
- *          terms, 6 and 6, so that the net result is a dilation
- *          with hsize = 36.
- * 
- */ -PIX * -pixOpenCompBrick(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -PIX *pixt; -SEL *selh1 = NULL; -SEL *selh2 = NULL; -SEL *selv1 = NULL; -SEL *selv2 = NULL; - - PROCNAME("pixOpenCompBrick"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - if (hsize > 1) { - if (selectComposableSels(hsize, L_HORIZ, &selh1, &selh2)) { - selDestroy(&selh1); - selDestroy(&selh2); - return (PIX *)ERROR_PTR("horiz sels not made", procName, pixd); - } - } - if (vsize > 1) { - if (selectComposableSels(vsize, L_VERT, &selv1, &selv2)) { - selDestroy(&selh1); - selDestroy(&selh2); - selDestroy(&selv1); - selDestroy(&selv2); - return (PIX *)ERROR_PTR("vert sels not made", procName, pixd); - } - } - - if (vsize == 1) { - pixt = pixErode(NULL, pixs, selh1); - pixd = pixErode(pixd, pixt, selh2); - pixDilate(pixt, pixd, selh1); - pixDilate(pixd, pixt, selh2); - } else if (hsize == 1) { - pixt = pixErode(NULL, pixs, selv1); - pixd = pixErode(pixd, pixt, selv2); - pixDilate(pixt, pixd, selv1); - pixDilate(pixd, pixt, selv2); - } else { /* do separably */ - pixt = pixErode(NULL, pixs, selh1); - pixd = pixErode(pixd, pixt, selh2); - pixErode(pixt, pixd, selv1); - pixErode(pixd, pixt, selv2); - pixDilate(pixt, pixd, selh1); - pixDilate(pixd, pixt, selh2); - pixDilate(pixt, pixd, selv1); - pixDilate(pixd, pixt, selv2); - } - pixDestroy(&pixt); - - selDestroy(&selh1); - selDestroy(&selh2); - selDestroy(&selv1); - selDestroy(&selv2); - return pixd; -} - - -/*! - * \brief pixCloseCompBrick() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Sel is a brick with all elements being hits
- *      (2) The origin is at (x, y) = (hsize/2, vsize/2)
- *      (3) Do compositely for each dimension > 1.
- *      (4) Do separably if both hsize and vsize are > 1.
- *      (5) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (6) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixCloseCompBrick(NULL, pixs, ...);
- *          (b) pixCloseCompBrick(pixs, pixs, ...);
- *          (c) pixCloseCompBrick(pixd, pixs, ...);
- *      (7) The dimensions of the resulting image are determined by pixs.
- *      (8) CAUTION: both hsize and vsize are being decomposed.
- *          The decomposer chooses a product of sizes (call them
- *          'terms') for each that is close to the input size,
- *          but not necessarily equal to it.  It attempts to optimize:
- *             (a) for consistency with the input values: the product
- *                 of terms is close to the input size
- *             (b) for efficiency of the operation: the sum of the
- *                 terms is small; ideally about twice the square
- *                 root of the input size.
- *          So, for example, if the input hsize = 37, which is
- *          a prime number, the decomposer will break this into two
- *          terms, 6 and 6, so that the net result is a dilation
- *          with hsize = 36.
- * 
- */ -PIX * -pixCloseCompBrick(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -PIX *pixt; -SEL *selh1 = NULL; -SEL *selh2 = NULL; -SEL *selv1 = NULL; -SEL *selv2 = NULL; - - PROCNAME("pixCloseCompBrick"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - if (hsize > 1) { - if (selectComposableSels(hsize, L_HORIZ, &selh1, &selh2)) { - selDestroy(&selh1); - selDestroy(&selh2); - return (PIX *)ERROR_PTR("horiz sels not made", procName, pixd); - } - } - if (vsize > 1) { - if (selectComposableSels(vsize, L_VERT, &selv1, &selv2)) { - selDestroy(&selh1); - selDestroy(&selh2); - selDestroy(&selv1); - selDestroy(&selv2); - return (PIX *)ERROR_PTR("vert sels not made", procName, pixd); - } - } - - if (vsize == 1) { - pixt = pixDilate(NULL, pixs, selh1); - pixd = pixDilate(pixd, pixt, selh2); - pixErode(pixt, pixd, selh1); - pixErode(pixd, pixt, selh2); - } else if (hsize == 1) { - pixt = pixDilate(NULL, pixs, selv1); - pixd = pixDilate(pixd, pixt, selv2); - pixErode(pixt, pixd, selv1); - pixErode(pixd, pixt, selv2); - } else { /* do separably */ - pixt = pixDilate(NULL, pixs, selh1); - pixd = pixDilate(pixd, pixt, selh2); - pixDilate(pixt, pixd, selv1); - pixDilate(pixd, pixt, selv2); - pixErode(pixt, pixd, selh1); - pixErode(pixd, pixt, selh2); - pixErode(pixt, pixd, selv1); - pixErode(pixd, pixt, selv2); - } - pixDestroy(&pixt); - - selDestroy(&selh1); - selDestroy(&selh2); - selDestroy(&selv1); - selDestroy(&selv2); - return pixd; -} - - -/*! - * \brief pixCloseSafeCompBrick() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Sel is a brick with all elements being hits
- *      (2) The origin is at (x, y) = (hsize/2, vsize/2)
- *      (3) Do compositely for each dimension > 1.
- *      (4) Do separably if both hsize and vsize are > 1.
- *      (5) Safe closing adds a border of 0 pixels, of sufficient size so
- *          that all pixels in input image are processed within
- *          32-bit words in the expanded image.  As a result, there is
- *          no special processing for pixels near the boundary, and there
- *          are no boundary effects.  The border is removed at the end.
- *      (6) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (7) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixCloseSafeCompBrick(NULL, pixs, ...);
- *          (b) pixCloseSafeCompBrick(pixs, pixs, ...);
- *          (c) pixCloseSafeCompBrick(pixd, pixs, ...);
- *      (8) The dimensions of the resulting image are determined by pixs.
- *      (9) CAUTION: both hsize and vsize are being decomposed.
- *          The decomposer chooses a product of sizes (call them
- *          'terms') for each that is close to the input size,
- *          but not necessarily equal to it.  It attempts to optimize:
- *             (a) for consistency with the input values: the product
- *                 of terms is close to the input size
- *             (b) for efficiency of the operation: the sum of the
- *                 terms is small; ideally about twice the square
- *                 root of the input size.
- *          So, for example, if the input hsize = 37, which is
- *          a prime number, the decomposer will break this into two
- *          terms, 6 and 6, so that the net result is a dilation
- *          with hsize = 36.
- * 
- */ -PIX * -pixCloseSafeCompBrick(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -l_int32 maxtrans, bordsize; -PIX *pixsb, *pixt, *pixdb; -SEL *selh1 = NULL; -SEL *selh2 = NULL; -SEL *selv1 = NULL; -SEL *selv2 = NULL; - - PROCNAME("pixCloseSafeCompBrick"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - - /* Symmetric b.c. handles correctly without added pixels */ - if (MORPH_BC == SYMMETRIC_MORPH_BC) - return pixCloseCompBrick(pixd, pixs, hsize, vsize); - - if (hsize > 1) { - if (selectComposableSels(hsize, L_HORIZ, &selh1, &selh2)) { - selDestroy(&selh1); - selDestroy(&selh2); - return (PIX *)ERROR_PTR("horiz sels not made", procName, pixd); - } - } - if (vsize > 1) { - if (selectComposableSels(vsize, L_VERT, &selv1, &selv2)) { - selDestroy(&selh1); - selDestroy(&selh2); - selDestroy(&selv1); - selDestroy(&selv2); - return (PIX *)ERROR_PTR("vert sels not made", procName, pixd); - } - } - - maxtrans = L_MAX(hsize / 2, vsize / 2); - bordsize = 32 * ((maxtrans + 31) / 32); /* full 32 bit words */ - pixsb = pixAddBorder(pixs, bordsize, 0); - - if (vsize == 1) { - pixt = pixDilate(NULL, pixsb, selh1); - pixdb = pixDilate(NULL, pixt, selh2); - pixErode(pixt, pixdb, selh1); - pixErode(pixdb, pixt, selh2); - } else if (hsize == 1) { - pixt = pixDilate(NULL, pixsb, selv1); - pixdb = pixDilate(NULL, pixt, selv2); - pixErode(pixt, pixdb, selv1); - pixErode(pixdb, pixt, selv2); - } else { /* do separably */ - pixt = pixDilate(NULL, pixsb, selh1); - pixdb = pixDilate(NULL, pixt, selh2); - pixDilate(pixt, pixdb, selv1); - pixDilate(pixdb, pixt, selv2); - pixErode(pixt, pixdb, selh1); - pixErode(pixdb, pixt, selh2); - pixErode(pixt, pixdb, selv1); - pixErode(pixdb, pixt, selv2); - } - pixDestroy(&pixt); - - pixt = pixRemoveBorder(pixdb, bordsize); - pixDestroy(&pixsb); - pixDestroy(&pixdb); - - if (!pixd) { - pixd = pixt; - } else { - pixCopy(pixd, pixt); - pixDestroy(&pixt); - } - - selDestroy(&selh1); - selDestroy(&selh2); - selDestroy(&selv1); - selDestroy(&selv2); - return pixd; -} - - -/*-----------------------------------------------------------------* - * Functions associated with boundary conditions * - *-----------------------------------------------------------------*/ -/*! - * \brief resetMorphBoundaryCondition() - * - * \param[in] bc SYMMETRIC_MORPH_BC, ASYMMETRIC_MORPH_BC - * \return void - */ -void -resetMorphBoundaryCondition(l_int32 bc) -{ - PROCNAME("resetMorphBoundaryCondition"); - - if (bc != SYMMETRIC_MORPH_BC && bc != ASYMMETRIC_MORPH_BC) { - L_WARNING("invalid bc; using asymmetric\n", procName); - bc = ASYMMETRIC_MORPH_BC; - } - MORPH_BC = bc; - return; -} - - -/*! - * \brief getMorphBorderPixelColor() - * - * \param[in] type L_MORPH_DILATE, L_MORPH_ERODE - * \param[in] depth of pix - * \return color of border pixels for this operation - */ -l_uint32 -getMorphBorderPixelColor(l_int32 type, - l_int32 depth) -{ - PROCNAME("getMorphBorderPixelColor"); - - if (type != L_MORPH_DILATE && type != L_MORPH_ERODE) - return ERROR_INT("invalid type", procName, 0); - if (depth != 1 && depth != 2 && depth != 4 && depth != 8 && - depth != 16 && depth != 32) - return ERROR_INT("invalid depth", procName, 0); - - if (MORPH_BC == ASYMMETRIC_MORPH_BC || type == L_MORPH_DILATE) - return 0; - - /* Symmetric & erosion */ - if (depth < 32) - return ((1 << depth) - 1); - else /* depth == 32 */ - return 0xffffff00; -} - - -/*-----------------------------------------------------------------* - * Static helpers for arg processing * - *-----------------------------------------------------------------*/ -/*! - * \brief processMorphArgs1() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] sel - * \param[out] ppixt copy or clone of %pixs - * \return pixd, or NULL on error. - * - *
- * Notes:
- *      (1) This is used for generic erosion, dilation and HMT.
- * 
- */ -static PIX * -processMorphArgs1(PIX *pixd, - PIX *pixs, - SEL *sel, - PIX **ppixt) -{ -l_int32 sx, sy; - - PROCNAME("processMorphArgs1"); - - if (!ppixt) - return (PIX *)ERROR_PTR("&pixt not defined", procName, pixd); - *ppixt = NULL; - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (!sel) - return (PIX *)ERROR_PTR("sel not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - - selGetParameters(sel, &sx, &sy, NULL, NULL); - if (sx == 0 || sy == 0) - return (PIX *)ERROR_PTR("sel of size 0", procName, pixd); - - /* We require pixd to exist and to be the same size as pixs. - * Further, pixt must be a copy (or clone) of pixs. */ - if (!pixd) { - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - *ppixt = pixClone(pixs); - } else { - pixResizeImageData(pixd, pixs); - if (pixd == pixs) { /* in-place; must make a copy of pixs */ - if ((*ppixt = pixCopy(NULL, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, pixd); - } else { - *ppixt = pixClone(pixs); - } - } - return pixd; -} - - -/*! - * \brief processMorphArgs2() - * - * This is used for generic openings and closings. - */ -static PIX * -processMorphArgs2(PIX *pixd, - PIX *pixs, - SEL *sel) -{ -l_int32 sx, sy; - - PROCNAME("processMorphArgs2"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (!sel) - return (PIX *)ERROR_PTR("sel not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - - selGetParameters(sel, &sx, &sy, NULL, NULL); - if (sx == 0 || sy == 0) - return (PIX *)ERROR_PTR("sel of size 0", procName, pixd); - - if (!pixd) - return pixCreateTemplate(pixs); - pixResizeImageData(pixd, pixs); - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/morph.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/morph.h deleted file mode 100644 index d17723fb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/morph.h +++ /dev/null @@ -1,248 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_MORPH_H -#define LEPTONICA_MORPH_H - -/*! - * \file morph.h - * - *
- *  Contains the following structs:
- *      struct Sel
- *      struct Sela
- *      struct Kernel
- *
- *  Contains definitions for:
- *      morphological b.c. flags
- *      structuring element types
- *      runlength flags for granulometry
- *      direction flags for grayscale morphology
- *      morphological operation flags
- *      standard border size
- *      grayscale intensity scaling flags
- *      morphological tophat flags
- *      arithmetic and logical operator flags
- *      grayscale morphology selection flags
- *      distance function b.c. flags
- *      image comparison flags
- *      color content flags
- * 
- */ - -/*-------------------------------------------------------------------------* - * Sel and Sel array * - *-------------------------------------------------------------------------*/ -#define SEL_VERSION_NUMBER 1 - -/*! Selection */ -struct Sel -{ - l_int32 sy; /*!< sel height */ - l_int32 sx; /*!< sel width */ - l_int32 cy; /*!< y location of sel origin */ - l_int32 cx; /*!< x location of sel origin */ - l_int32 **data; /*!< {0,1,2}; data[i][j] in [row][col] order */ - char *name; /*!< used to find sel by name */ -}; -typedef struct Sel SEL; - -/*! Array of Sel */ -struct Sela -{ - l_int32 n; /*!< number of sel actually stored */ - l_int32 nalloc; /*!< size of allocated ptr array */ - struct Sel **sel; /*!< sel ptr array */ -}; -typedef struct Sela SELA; - - -/*-------------------------------------------------------------------------* - * Kernel * - *-------------------------------------------------------------------------*/ -#define KERNEL_VERSION_NUMBER 2 - -/*! Kernel */ -struct L_Kernel -{ - l_int32 sy; /*!< kernel height */ - l_int32 sx; /*!< kernel width */ - l_int32 cy; /*!< y location of kernel origin */ - l_int32 cx; /*!< x location of kernel origin */ - l_float32 **data; /*!< data[i][j] in [row][col] order */ -}; -typedef struct L_Kernel L_KERNEL; - - -/*-------------------------------------------------------------------------* - * Morphological boundary condition flags * - * * - * Two types of boundary condition for erosion. * - * The global variable MORPH_BC takes on one of these two values. * - * See notes in morph.c for usage. * - *-------------------------------------------------------------------------*/ - -/*! Morph Boundary */ -enum { - SYMMETRIC_MORPH_BC = 0, - ASYMMETRIC_MORPH_BC = 1 -}; - -/*-------------------------------------------------------------------------* - * Structuring element vals * - *-------------------------------------------------------------------------*/ - -/*! SEL Vals */ -enum { - SEL_DONT_CARE = 0, - SEL_HIT = 1, - SEL_MISS = 2 -}; - -/*-------------------------------------------------------------------------* - * Runlength flags for granulometry * - *-------------------------------------------------------------------------*/ - -/*! Runlength Polarity */ -enum { - L_RUN_OFF = 0, - L_RUN_ON = 1 -}; - -/*-------------------------------------------------------------------------* - * Direction flags for grayscale morphology, granulometry, * - * composable Sels, convolution, etc. * - *-------------------------------------------------------------------------*/ - -/*! Direction Flags */ -enum { - L_HORIZ = 1, - L_VERT = 2, - L_BOTH_DIRECTIONS = 3 -}; - -/*-------------------------------------------------------------------------* - * Morphological operation flags * - *-------------------------------------------------------------------------*/ - -/*! Morph Operator */ -enum { - L_MORPH_DILATE = 1, - L_MORPH_ERODE = 2, - L_MORPH_OPEN = 3, - L_MORPH_CLOSE = 4, - L_MORPH_HMT = 5 -}; - -/*-------------------------------------------------------------------------* - * Grayscale intensity scaling flags * - *-------------------------------------------------------------------------*/ - -/*! Pixel Value Scaling */ -enum { - L_LINEAR_SCALE = 1, - L_LOG_SCALE = 2 -}; - -/*-------------------------------------------------------------------------* - * Morphological tophat flags * - *-------------------------------------------------------------------------*/ - -/*! Morph Tophat */ -enum { - L_TOPHAT_WHITE = 0, - L_TOPHAT_BLACK = 1 -}; - -/*-------------------------------------------------------------------------* - * Arithmetic and logical operator flags * - * (use on grayscale images and Numas) * - *-------------------------------------------------------------------------*/ - -/*! ArithLogical Ops */ -enum { - L_ARITH_ADD = 1, - L_ARITH_SUBTRACT = 2, - L_ARITH_MULTIPLY = 3, /* on numas only */ - L_ARITH_DIVIDE = 4, /* on numas only */ - L_UNION = 5, /* on numas only */ - L_INTERSECTION = 6, /* on numas only */ - L_SUBTRACTION = 7, /* on numas only */ - L_EXCLUSIVE_OR = 8 /* on numas only */ -}; - -/*-------------------------------------------------------------------------* - * Min/max selection flags * - *-------------------------------------------------------------------------*/ - -/*! MinMax Selection */ -enum { - L_CHOOSE_MIN = 1, /* useful in a downscaling "erosion" */ - L_CHOOSE_MAX = 2, /* useful in a downscaling "dilation" */ - L_CHOOSE_MAXDIFF = 3, /* useful in a downscaling contrast */ - L_CHOOSE_MIN_BOOST = 4, /* use a modification of the min value */ - L_CHOOSE_MAX_BOOST = 5 /* use a modification of the max value */ -}; - -/*-------------------------------------------------------------------------* - * Exterior value b.c. for distance function flags * - *-------------------------------------------------------------------------*/ - -/*! Exterior Value */ -enum { - L_BOUNDARY_BG = 1, /* assume bg outside image */ - L_BOUNDARY_FG = 2 /* assume fg outside image */ -}; - -/*-------------------------------------------------------------------------* - * Image comparison flags * - *-------------------------------------------------------------------------*/ - -/*! Image Comparison */ -enum { - L_COMPARE_XOR = 1, - L_COMPARE_SUBTRACT = 2, - L_COMPARE_ABS_DIFF = 3 -}; - -/*-------------------------------------------------------------------------* - * Color content flags * - *-------------------------------------------------------------------------*/ - -/*! Color Content */ -enum { - L_MAX_DIFF_FROM_AVERAGE_2 = 1, - L_MAX_MIN_DIFF_FROM_2 = 2, - L_MAX_DIFF = 3 -}; - -/*-------------------------------------------------------------------------* - * Standard size of border added around images for special processing * - *-------------------------------------------------------------------------*/ -static const l_int32 ADDED_BORDER = 32; /*!< pixels, not bits */ - - -#endif /* LEPTONICA_MORPH_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/morphapp.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/morphapp.c deleted file mode 100644 index 8ee41b0f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/morphapp.c +++ /dev/null @@ -1,1636 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file morphapp.c - *
- *
- *      These are some useful and/or interesting composite
- *      image processing operations, of the type that are often
- *      useful in applications.  Most are morphological in
- *      nature.
- *
- *      Extraction of boundary pixels
- *            PIX       *pixExtractBoundary()
- *
- *      Selective morph sequence operation under mask
- *            PIX       *pixMorphSequenceMasked()
- *
- *      Selective morph sequence operation on each component
- *            PIX       *pixMorphSequenceByComponent()
- *            PIXA      *pixaMorphSequenceByComponent()
- *
- *      Selective morph sequence operation on each region
- *            PIX       *pixMorphSequenceByRegion()
- *            PIXA      *pixaMorphSequenceByRegion()
- *
- *      Union and intersection of parallel composite operations
- *            PIX       *pixUnionOfMorphOps()
- *            PIX       *pixIntersectionOfMorphOps()
- *
- *      Selective connected component filling
- *            PIX       *pixSelectiveConnCompFill()
- *
- *      Removal of matched patterns
- *            PIX       *pixRemoveMatchedPattern()
- *
- *      Display of matched patterns
- *            PIX       *pixDisplayMatchedPattern()
- *
- *      Extension of pixa by iterative erosion or dilation (and by scaling)
- *            PIXA      *pixaExtendByMorph()
- *            PIXA      *pixaExtendByScaling()
- *
- *      Iterative morphological seed filling (don't use for real work)
- *            PIX       *pixSeedfillMorph()
- *
- *      Granulometry on binary images
- *            NUMA      *pixRunHistogramMorph()
- *
- *      Composite operations on grayscale images
- *            PIX       *pixTophat()
- *            PIX       *pixHDome()
- *            PIX       *pixFastTophat()
- *            PIX       *pixMorphGradient()
- *
- *      Centroid of component
- *            PTA       *pixaCentroids()
- *            l_int32    pixCentroid()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -#define SWAP(x, y) {temp = (x); (x) = (y); (y) = temp;} - -/*-----------------------------------------------------------------* - * Extraction of boundary pixels * - *-----------------------------------------------------------------*/ -/*! - * \brief pixExtractBoundary() - * - * \param[in] pixs 1 bpp - * \param[in] type 0 for background pixels; 1 for foreground pixels - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Extracts the fg or bg boundary pixels for each component.
- *          Components are assumed to end at the boundary of pixs.
- * 
- */ -PIX * -pixExtractBoundary(PIX *pixs, - l_int32 type) -{ -PIX *pixd; - - PROCNAME("pixExtractBoundary"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - if (type == 0) - pixd = pixDilateBrick(NULL, pixs, 3, 3); - else - pixd = pixErodeBrick(NULL, pixs, 3, 3); - pixXor(pixd, pixd, pixs); - return pixd; -} - - -/*-----------------------------------------------------------------* - * Selective morph sequence operation under mask * - *-----------------------------------------------------------------*/ -/*! - * \brief pixMorphSequenceMasked() - * - * \param[in] pixs 1 bpp - * \param[in] pixm [optional] 1 bpp mask - * \param[in] sequence string specifying sequence of operations - * \param[in] dispsep horizontal separation in pixels between - * successive displays; use zero to suppress display - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This applies the morph sequence to the image, but only allows
- *          changes in pixs for pixels under the background of pixm.
- *      (5) If pixm is NULL, this is just pixMorphSequence().
- * 
- */ -PIX * -pixMorphSequenceMasked(PIX *pixs, - PIX *pixm, - const char *sequence, - l_int32 dispsep) -{ -PIX *pixd; - - PROCNAME("pixMorphSequenceMasked"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!sequence) - return (PIX *)ERROR_PTR("sequence not defined", procName, NULL); - - pixd = pixMorphSequence(pixs, sequence, dispsep); - pixCombineMasked(pixd, pixs, pixm); /* restore src pixels under mask fg */ - return pixd; -} - - -/*-----------------------------------------------------------------* - * Morph sequence operation on each component * - *-----------------------------------------------------------------*/ -/*! - * \brief pixMorphSequenceByComponent() - * - * \param[in] pixs 1 bpp - * \param[in] sequence string specifying sequence - * \param[in] connectivity 4 or 8 - * \param[in] minw min width to consider; use 0 or 1 for any width - * \param[in] minh min height to consider; use 0 or 1 for any height - * \param[out] pboxa [optional] return boxa of c.c. in pixs - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) See pixMorphSequence() for composing operation sequences.
- *      (2) This operates separately on each c.c. in the input pix.
- *      (3) The dilation does NOT increase the c.c. size; it is clipped
- *          to the size of the original c.c.   This is necessary to
- *          keep the c.c. independent after the operation.
- *      (4) You can specify that the width and/or height must equal
- *          or exceed a minimum size for the operation to take place.
- *      (5) Use NULL for boxa to avoid returning the boxa.
- * 
- */ -PIX * -pixMorphSequenceByComponent(PIX *pixs, - const char *sequence, - l_int32 connectivity, - l_int32 minw, - l_int32 minh, - BOXA **pboxa) -{ -l_int32 n, i, x, y, w, h; -BOXA *boxa; -PIX *pix, *pixd; -PIXA *pixas, *pixad; - - PROCNAME("pixMorphSequenceByComponent"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!sequence) - return (PIX *)ERROR_PTR("sequence not defined", procName, NULL); - - if (minw <= 0) minw = 1; - if (minh <= 0) minh = 1; - - /* Get the c.c. */ - if ((boxa = pixConnComp(pixs, &pixas, connectivity)) == NULL) - return (PIX *)ERROR_PTR("boxa not made", procName, NULL); - - /* Operate on each c.c. independently */ - pixad = pixaMorphSequenceByComponent(pixas, sequence, minw, minh); - pixaDestroy(&pixas); - boxaDestroy(&boxa); - if (!pixad) - return (PIX *)ERROR_PTR("pixad not made", procName, NULL); - - /* Display the result out into pixd */ - pixd = pixCreateTemplate(pixs); - n = pixaGetCount(pixad); - for (i = 0; i < n; i++) { - pixaGetBoxGeometry(pixad, i, &x, &y, &w, &h); - pix = pixaGetPix(pixad, i, L_CLONE); - pixRasterop(pixd, x, y, w, h, PIX_PAINT, pix, 0, 0); - pixDestroy(&pix); - } - - if (pboxa) - *pboxa = pixaGetBoxa(pixad, L_CLONE); - pixaDestroy(&pixad); - return pixd; -} - - -/*! - * \brief pixaMorphSequenceByComponent() - * - * \param[in] pixas of 1 bpp pix - * \param[in] sequence string specifying sequence - * \param[in] minw min width to consider; use 0 or 1 for any width - * \param[in] minh min height to consider; use 0 or 1 for any height - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) See pixMorphSequence() for composing operation sequences.
- *      (2) This operates separately on each c.c. in the input pixa.
- *      (3) You can specify that the width and/or height must equal
- *          or exceed a minimum size for the operation to take place.
- *      (4) The input pixa should have a boxa giving the locations
- *          of the pix components.
- * 
- */ -PIXA * -pixaMorphSequenceByComponent(PIXA *pixas, - const char *sequence, - l_int32 minw, - l_int32 minh) -{ -l_int32 n, i, w, h, d; -BOX *box; -PIX *pix1, *pix2; -PIXA *pixad; - - PROCNAME("pixaMorphSequenceByComponent"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if ((n = pixaGetCount(pixas)) == 0) - return (PIXA *)ERROR_PTR("no pix in pixas", procName, NULL); - if (n != pixaGetBoxaCount(pixas)) - L_WARNING("boxa size != n\n", procName); - pixaGetPixDimensions(pixas, 0, NULL, NULL, &d); - if (d != 1) - return (PIXA *)ERROR_PTR("depth not 1 bpp", procName, NULL); - - if (!sequence) - return (PIXA *)ERROR_PTR("sequence not defined", procName, NULL); - if (minw <= 0) minw = 1; - if (minh <= 0) minh = 1; - - if ((pixad = pixaCreate(n)) == NULL) - return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); - for (i = 0; i < n; i++) { - pixaGetPixDimensions(pixas, i, &w, &h, NULL); - if (w >= minw && h >= minh) { - if ((pix1 = pixaGetPix(pixas, i, L_CLONE)) == NULL) { - pixaDestroy(&pixad); - return (PIXA *)ERROR_PTR("pix1 not found", procName, NULL); - } - if ((pix2 = pixMorphCompSequence(pix1, sequence, 0)) == NULL) { - pixaDestroy(&pixad); - return (PIXA *)ERROR_PTR("pix2 not made", procName, NULL); - } - pixaAddPix(pixad, pix2, L_INSERT); - box = pixaGetBox(pixas, i, L_COPY); - pixaAddBox(pixad, box, L_INSERT); - pixDestroy(&pix1); - } - } - - return pixad; -} - - -/*-----------------------------------------------------------------* - * Morph sequence operation on each region * - *-----------------------------------------------------------------*/ -/*! - * \brief pixMorphSequenceByRegion() - * - * \param[in] pixs 1 bpp - * \param[in] pixm mask specifying regions - * \param[in] sequence string specifying sequence - * \param[in] connectivity 4 or 8, used on mask - * \param[in] minw min width to consider; use 0 or 1 for any width - * \param[in] minh min height to consider; use 0 or 1 for any height - * \param[out] pboxa [optional] return boxa of c.c. in pixm - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) See pixMorphCompSequence() for composing operation sequences.
- *      (2) This operates separately on the region in pixs corresponding
- *          to each c.c. in the mask pixm.  It differs from
- *          pixMorphSequenceByComponent() in that the latter does not have
- *          a pixm (mask), but instead operates independently on each
- *          component in pixs.
- *      (3) Dilation will NOT increase the region size; the result
- *          is clipped to the size of the mask region.  This is necessary
- *          to make regions independent after the operation.
- *      (4) You can specify that the width and/or height of a region must
- *          equal or exceed a minimum size for the operation to take place.
- *      (5) Use NULL for %pboxa to avoid returning the boxa.
- * 
- */ -PIX * -pixMorphSequenceByRegion(PIX *pixs, - PIX *pixm, - const char *sequence, - l_int32 connectivity, - l_int32 minw, - l_int32 minh, - BOXA **pboxa) -{ -l_int32 n, i, x, y, w, h; -BOXA *boxa; -PIX *pix, *pixd; -PIXA *pixam, *pixad; - - PROCNAME("pixMorphSequenceByRegion"); - - if (pboxa) *pboxa = NULL; - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!pixm) - return (PIX *)ERROR_PTR("pixm not defined", procName, NULL); - if (pixGetDepth(pixs) != 1 || pixGetDepth(pixm) != 1) - return (PIX *)ERROR_PTR("pixs and pixm not both 1 bpp", procName, NULL); - if (!sequence) - return (PIX *)ERROR_PTR("sequence not defined", procName, NULL); - - if (minw <= 0) minw = 1; - if (minh <= 0) minh = 1; - - /* Get the c.c. of the mask */ - if ((boxa = pixConnComp(pixm, &pixam, connectivity)) == NULL) - return (PIX *)ERROR_PTR("boxa not made", procName, NULL); - - /* Operate on each region in pixs independently */ - pixad = pixaMorphSequenceByRegion(pixs, pixam, sequence, minw, minh); - pixaDestroy(&pixam); - boxaDestroy(&boxa); - if (!pixad) - return (PIX *)ERROR_PTR("pixad not made", procName, NULL); - - /* Display the result out into pixd */ - pixd = pixCreateTemplate(pixs); - n = pixaGetCount(pixad); - for (i = 0; i < n; i++) { - pixaGetBoxGeometry(pixad, i, &x, &y, &w, &h); - pix = pixaGetPix(pixad, i, L_CLONE); - pixRasterop(pixd, x, y, w, h, PIX_PAINT, pix, 0, 0); - pixDestroy(&pix); - } - - if (pboxa) - *pboxa = pixaGetBoxa(pixad, L_CLONE); - pixaDestroy(&pixad); - return pixd; -} - - -/*! - * \brief pixaMorphSequenceByRegion() - * - * \param[in] pixs 1 bpp - * \param[in] pixam of 1 bpp mask elements - * \param[in] sequence string specifying sequence - * \param[in] minw min width to consider; use 0 or 1 for any width - * \param[in] minh min height to consider; use 0 or 1 for any height - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) See pixMorphSequence() for composing operation sequences.
- *      (2) This operates separately on each region in the input pixs
- *          defined by the components in pixam.
- *      (3) You can specify that the width and/or height of a mask
- *          component must equal or exceed a minimum size for the
- *          operation to take place.
- *      (4) The input pixam should have a boxa giving the locations
- *          of the regions in pixs.
- * 
- */ -PIXA * -pixaMorphSequenceByRegion(PIX *pixs, - PIXA *pixam, - const char *sequence, - l_int32 minw, - l_int32 minh) -{ -l_int32 n, i, w, h, same, maxd, fullpa, fullba; -BOX *box; -PIX *pix1, *pix2, *pix3; -PIXA *pixad; - - PROCNAME("pixaMorphSequenceByRegion"); - - if (!pixs) - return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIXA *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (!sequence) - return (PIXA *)ERROR_PTR("sequence not defined", procName, NULL); - if (!pixam) - return (PIXA *)ERROR_PTR("pixam not defined", procName, NULL); - pixaVerifyDepth(pixam, &same, &maxd); - if (maxd != 1) - return (PIXA *)ERROR_PTR("mask depth not 1 bpp", procName, NULL); - pixaIsFull(pixam, &fullpa, &fullba); - if (!fullpa || !fullba) - return (PIXA *)ERROR_PTR("missing comps in pixam", procName, NULL); - n = pixaGetCount(pixam); - if (minw <= 0) minw = 1; - if (minh <= 0) minh = 1; - - if ((pixad = pixaCreate(n)) == NULL) - return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); - - /* Use the rectangle to remove the appropriate part of pixs; - * then AND with the mask component to get the actual fg - * of pixs that is under the mask component. */ - for (i = 0; i < n; i++) { - pixaGetPixDimensions(pixam, i, &w, &h, NULL); - if (w >= minw && h >= minh) { - pix1 = pixaGetPix(pixam, i, L_CLONE); - box = pixaGetBox(pixam, i, L_COPY); - pix2 = pixClipRectangle(pixs, box, NULL); - pixAnd(pix2, pix2, pix1); - pix3 = pixMorphCompSequence(pix2, sequence, 0); - pixDestroy(&pix1); - pixDestroy(&pix2); - if (!pix3) { - boxDestroy(&box); - pixaDestroy(&pixad); - L_ERROR("pix3 not made in iter %d; aborting\n", procName, i); - break; - } - pixaAddPix(pixad, pix3, L_INSERT); - pixaAddBox(pixad, box, L_INSERT); - } - } - - return pixad; -} - - -/*-----------------------------------------------------------------* - * Union and intersection of parallel composite operations * - *-----------------------------------------------------------------*/ -/*! - * \brief pixUnionOfMorphOps() - * - * \param[in] pixs 1 bpp - * \param[in] sela - * \param[in] type L_MORPH_DILATE, etc. - * \return pixd union of the specified morphological operation - * on pixs for each Sel in the Sela, or NULL on error - */ -PIX * -pixUnionOfMorphOps(PIX *pixs, - SELA *sela, - l_int32 type) -{ -l_int32 n, i; -PIX *pixt, *pixd; -SEL *sel; - - PROCNAME("pixUnionOfMorphOps"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (!sela) - return (PIX *)ERROR_PTR("sela not defined", procName, NULL); - n = selaGetCount(sela); - if (n == 0) - return (PIX *)ERROR_PTR("no sels in sela", procName, NULL); - if (type != L_MORPH_DILATE && type != L_MORPH_ERODE && - type != L_MORPH_OPEN && type != L_MORPH_CLOSE && - type != L_MORPH_HMT) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - - pixd = pixCreateTemplate(pixs); - for (i = 0; i < n; i++) { - sel = selaGetSel(sela, i); - if (type == L_MORPH_DILATE) - pixt = pixDilate(NULL, pixs, sel); - else if (type == L_MORPH_ERODE) - pixt = pixErode(NULL, pixs, sel); - else if (type == L_MORPH_OPEN) - pixt = pixOpen(NULL, pixs, sel); - else if (type == L_MORPH_CLOSE) - pixt = pixClose(NULL, pixs, sel); - else /* type == L_MORPH_HMT */ - pixt = pixHMT(NULL, pixs, sel); - pixOr(pixd, pixd, pixt); - pixDestroy(&pixt); - } - - return pixd; -} - - -/*! - * \brief pixIntersectionOfMorphOps() - * - * \param[in] pixs 1 bpp - * \param[in] sela - * \param[in] type L_MORPH_DILATE, etc. - * \return pixd intersection of the specified morphological operation - * on pixs for each Sel in the Sela, or NULL on error - */ -PIX * -pixIntersectionOfMorphOps(PIX *pixs, - SELA *sela, - l_int32 type) -{ -l_int32 n, i; -PIX *pixt, *pixd; -SEL *sel; - - PROCNAME("pixIntersectionOfMorphOps"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (!sela) - return (PIX *)ERROR_PTR("sela not defined", procName, NULL); - n = selaGetCount(sela); - if (n == 0) - return (PIX *)ERROR_PTR("no sels in sela", procName, NULL); - if (type != L_MORPH_DILATE && type != L_MORPH_ERODE && - type != L_MORPH_OPEN && type != L_MORPH_CLOSE && - type != L_MORPH_HMT) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - - pixd = pixCreateTemplate(pixs); - pixSetAll(pixd); - for (i = 0; i < n; i++) { - sel = selaGetSel(sela, i); - if (type == L_MORPH_DILATE) - pixt = pixDilate(NULL, pixs, sel); - else if (type == L_MORPH_ERODE) - pixt = pixErode(NULL, pixs, sel); - else if (type == L_MORPH_OPEN) - pixt = pixOpen(NULL, pixs, sel); - else if (type == L_MORPH_CLOSE) - pixt = pixClose(NULL, pixs, sel); - else /* type == L_MORPH_HMT */ - pixt = pixHMT(NULL, pixs, sel); - pixAnd(pixd, pixd, pixt); - pixDestroy(&pixt); - } - - return pixd; -} - - - -/*-----------------------------------------------------------------* - * Selective connected component filling * - *-----------------------------------------------------------------*/ -/*! - * \brief pixSelectiveConnCompFill() - * - * \param[in] pixs 1 bpp - * \param[in] connectivity 4 or 8 - * \param[in] minw min width to consider; use 0 or 1 for any width - * \param[in] minh min height to consider; use 0 or 1 for any height - * \return pix with holes filled in selected c.c., or NULL on error - */ -PIX * -pixSelectiveConnCompFill(PIX *pixs, - l_int32 connectivity, - l_int32 minw, - l_int32 minh) -{ -l_int32 n, i, x, y, w, h; -BOXA *boxa; -PIX *pix1, *pix2, *pixd; -PIXA *pixa; - - PROCNAME("pixSelectiveConnCompFill"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (minw <= 0) minw = 1; - if (minh <= 0) minh = 1; - - if ((boxa = pixConnComp(pixs, &pixa, connectivity)) == NULL) - return (PIX *)ERROR_PTR("boxa not made", procName, NULL); - n = boxaGetCount(boxa); - pixd = pixCopy(NULL, pixs); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, &x, &y, &w, &h); - if (w >= minw && h >= minh) { - pix1 = pixaGetPix(pixa, i, L_CLONE); - if ((pix2 = pixHolesByFilling(pix1, 12 - connectivity)) == NULL) { - L_ERROR("pix2 not made in iter %d\n", procName, i); - pixDestroy(&pix1); - continue; - } - pixRasterop(pixd, x, y, w, h, PIX_PAINT, pix2, 0, 0); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - } - pixaDestroy(&pixa); - boxaDestroy(&boxa); - - return pixd; -} - - -/*-----------------------------------------------------------------* - * Removal of matched patterns * - *-----------------------------------------------------------------*/ -/*! - * \brief pixRemoveMatchedPattern() - * - * \param[in] pixs input image, 1 bpp - * \param[in] pixp pattern to be removed from image, 1 bpp - * \param[in] pixe image after erosion by Sel that approximates pixp - * \param[in] x0, y0 center of Sel - * \param[in] dsize number of pixels on each side by which pixp is - * dilated before being subtracted from pixs; - * valid values are {0, 1, 2, 3, 4} - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *    (1) This is in-place.
- *    (2) You can use various functions in selgen to create a Sel
- *        that is used to generate pixe from pixs.
- *    (3) This function is applied after pixe has been computed.
- *        It finds the centroid of each c.c., and subtracts
- *        (the appropriately dilated version of) pixp, with the center
- *        of the Sel used to align pixp with pixs.
- * 
- */ -l_ok -pixRemoveMatchedPattern(PIX *pixs, - PIX *pixp, - PIX *pixe, - l_int32 x0, - l_int32 y0, - l_int32 dsize) -{ -l_int32 i, nc, x, y, w, h, xb, yb; -BOXA *boxa; -PIX *pix1, *pix2; -PIXA *pixa; -PTA *pta; -SEL *sel; - - PROCNAME("pixRemoveMatchedPattern"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixp) - return ERROR_INT("pixp not defined", procName, 1); - if (!pixe) - return ERROR_INT("pixe not defined", procName, 1); - if (pixGetDepth(pixs) != 1 || pixGetDepth(pixp) != 1 || - pixGetDepth(pixe) != 1) - return ERROR_INT("all input pix not 1 bpp", procName, 1); - if (dsize < 0 || dsize > 4) - return ERROR_INT("dsize not in {0,1,2,3,4}", procName, 1); - - /* Find the connected components and their centroids */ - boxa = pixConnComp(pixe, &pixa, 8); - if ((nc = boxaGetCount(boxa)) == 0) { - L_WARNING("no matched patterns\n", procName); - boxaDestroy(&boxa); - pixaDestroy(&pixa); - return 0; - } - pta = pixaCentroids(pixa); - pixaDestroy(&pixa); - - /* Optionally dilate the pattern, first adding a border that - * is large enough to accommodate the dilated pixels */ - sel = NULL; - if (dsize > 0) { - sel = selCreateBrick(2 * dsize + 1, 2 * dsize + 1, dsize, dsize, - SEL_HIT); - pix1 = pixAddBorder(pixp, dsize, 0); - pix2 = pixDilate(NULL, pix1, sel); - selDestroy(&sel); - pixDestroy(&pix1); - } else { - pix2 = pixClone(pixp); - } - - /* Subtract out each dilated pattern. The centroid of each - * component is located at: - * (box->x + x, box->y + y) - * and the 'center' of the pattern used in making pixe is located at - * (x0 + dsize, (y0 + dsize) - * relative to the UL corner of the pattern. The center of the - * pattern is placed at the center of the component. */ - pixGetDimensions(pix2, &w, &h, NULL); - for (i = 0; i < nc; i++) { - ptaGetIPt(pta, i, &x, &y); - boxaGetBoxGeometry(boxa, i, &xb, &yb, NULL, NULL); - pixRasterop(pixs, xb + x - x0 - dsize, yb + y - y0 - dsize, - w, h, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0); - } - - boxaDestroy(&boxa); - ptaDestroy(&pta); - pixDestroy(&pix2); - return 0; -} - - -/*-----------------------------------------------------------------* - * Display of matched patterns * - *-----------------------------------------------------------------*/ -/*! - * \brief pixDisplayMatchedPattern() - * - * \param[in] pixs input image, 1 bpp - * \param[in] pixp pattern to be removed from image, 1 bpp - * \param[in] pixe image after erosion by Sel that approximates pixp - * \param[in] x0, y0 center of Sel - * \param[in] color to paint the matched patterns; 0xrrggbb00 - * \param[in] scale reduction factor for output pixd - * \param[in] nlevels if scale < 1.0, threshold to this number of levels - * \return pixd 8 bpp, colormapped, or NULL on error - * - *
- * Notes:
- *    (1) A 4 bpp colormapped image is generated.
- *    (2) If scale <= 1.0, do scale to gray for the output, and threshold
- *        to nlevels of gray.
- *    (3) You can use various functions in selgen to create a Sel
- *        that will generate pixe from pixs.
- *    (4) This function is applied after pixe has been computed.
- *        It finds the centroid of each c.c., and colors the output
- *        pixels using pixp (appropriately aligned) as a stencil.
- *        Alignment is done using the origin of the Sel and the
- *        centroid of the eroded image to place the stencil pixp.
- * 
- */ -PIX * -pixDisplayMatchedPattern(PIX *pixs, - PIX *pixp, - PIX *pixe, - l_int32 x0, - l_int32 y0, - l_uint32 color, - l_float32 scale, - l_int32 nlevels) -{ -l_int32 i, nc, xb, yb, x, y, xi, yi, rval, gval, bval; -BOXA *boxa; -PIX *pixd, *pixt, *pixps; -PIXA *pixa; -PTA *pta; -PIXCMAP *cmap; - - PROCNAME("pixDisplayMatchedPattern"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!pixp) - return (PIX *)ERROR_PTR("pixp not defined", procName, NULL); - if (!pixe) - return (PIX *)ERROR_PTR("pixe not defined", procName, NULL); - if (pixGetDepth(pixs) != 1 || pixGetDepth(pixp) != 1 || - pixGetDepth(pixe) != 1) - return (PIX *)ERROR_PTR("all input pix not 1 bpp", procName, NULL); - if (scale > 1.0 || scale <= 0.0) { - L_WARNING("scale > 1.0 or < 0.0; setting to 1.0\n", procName); - scale = 1.0; - } - - /* Find the connected components and their centroids */ - boxa = pixConnComp(pixe, &pixa, 8); - if ((nc = boxaGetCount(boxa)) == 0) { - L_WARNING("no matched patterns\n", procName); - boxaDestroy(&boxa); - pixaDestroy(&pixa); - return 0; - } - pta = pixaCentroids(pixa); - - extractRGBValues(color, &rval, &gval, &bval); - if (scale == 1.0) { /* output 4 bpp at full resolution */ - pixd = pixConvert1To4(NULL, pixs, 0, 1); - cmap = pixcmapCreate(4); - pixcmapAddColor(cmap, 255, 255, 255); - pixcmapAddColor(cmap, 0, 0, 0); - pixSetColormap(pixd, cmap); - - /* Paint through pixp for each match location. The centroid of each - * component in pixe is located at: - * (box->x + x, box->y + y) - * and the 'center' of the pattern used in making pixe is located at - * (x0, y0) - * relative to the UL corner of the pattern. The center of the - * pattern is placed at the center of the component. */ - for (i = 0; i < nc; i++) { - ptaGetIPt(pta, i, &x, &y); - boxaGetBoxGeometry(boxa, i, &xb, &yb, NULL, NULL); - pixSetMaskedCmap(pixd, pixp, xb + x - x0, yb + y - y0, - rval, gval, bval); - } - } else { /* output 4 bpp downscaled */ - pixt = pixScaleToGray(pixs, scale); - pixd = pixThresholdTo4bpp(pixt, nlevels, 1); - pixps = pixScaleBySampling(pixp, scale, scale); - - for (i = 0; i < nc; i++) { - ptaGetIPt(pta, i, &x, &y); - boxaGetBoxGeometry(boxa, i, &xb, &yb, NULL, NULL); - xi = (l_int32)(scale * (xb + x - x0)); - yi = (l_int32)(scale * (yb + y - y0)); - pixSetMaskedCmap(pixd, pixps, xi, yi, rval, gval, bval); - } - pixDestroy(&pixt); - pixDestroy(&pixps); - } - - boxaDestroy(&boxa); - pixaDestroy(&pixa); - ptaDestroy(&pta); - return pixd; -} - - -/*------------------------------------------------------------------------* - * Extension of pixa by iterative erosion or dilation (and by scaling) * - *------------------------------------------------------------------------*/ -/*! - * \brief pixaExtendByMorph() - * - * \param[in] pixas - * \param[in] type L_MORPH_DILATE, L_MORPH_ERODE - * \param[in] niters - * \param[in] sel used for dilation, erosion; uses 2x2 if null - * \param[in] include 1 to include a copy of the input pixas in pixad; - * 0 to omit - * \return pixad with derived pix, using all iterations, or NULL on error - * - *
- * Notes:
- *    (1) This dilates or erodes every pix in %pixas, iteratively,
- *        using the input Sel (or, if null, a 2x2 Sel by default),
- *        and puts the results in %pixad.
- *    (2) If %niters <= 0, this is a no-op; it returns a clone of pixas.
- *    (3) If %include == 1, the output %pixad contains all the pix
- *        in %pixas.  Otherwise, it doesn't, but pixaJoin() can be
- *        used later to join pixas with pixad.
- * 
- */ -PIXA * -pixaExtendByMorph(PIXA *pixas, - l_int32 type, - l_int32 niters, - SEL *sel, - l_int32 include) -{ -l_int32 maxdepth, i, j, n; -PIX *pix0, *pix1, *pix2; -SEL *selt; -PIXA *pixad; - - PROCNAME("pixaExtendByMorph"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas undefined", procName, NULL); - if (niters <= 0) { - L_INFO("niters = %d; nothing to do\n", procName, niters); - return pixaCopy(pixas, L_CLONE); - } - if (type != L_MORPH_DILATE && type != L_MORPH_ERODE) - return (PIXA *)ERROR_PTR("invalid type", procName, NULL); - pixaGetDepthInfo(pixas, &maxdepth, NULL); - if (maxdepth > 1) - return (PIXA *)ERROR_PTR("some pix have bpp > 1", procName, NULL); - - if (!sel) - selt = selCreateBrick(2, 2, 0, 0, SEL_HIT); /* default */ - else - selt = selCopy(sel); - n = pixaGetCount(pixas); - pixad = pixaCreate(n * niters); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - if (include) pixaAddPix(pixad, pix1, L_COPY); - pix0 = pix1; /* need to keep the handle to destroy the clone */ - for (j = 0; j < niters; j++) { - if (type == L_MORPH_DILATE) { - pix2 = pixDilate(NULL, pix1, selt); - } else { /* L_MORPH_ERODE */ - pix2 = pixErode(NULL, pix1, selt); - } - pixaAddPix(pixad, pix2, L_INSERT); - pix1 = pix2; /* owned by pixad; do not destroy */ - } - pixDestroy(&pix0); - } - - selDestroy(&selt); - return pixad; -} - - -/*! - * \brief pixaExtendByScaling() - * - * \param[in] pixas - * \param[in] nasc numa of scaling factors - * \param[in] type L_HORIZ, L_VERT, L_BOTH_DIRECTIONS - * \param[in] include 1 to include a copy of the input pixas in pixad; - * 0 to omit - * \return pixad with derived pix, using all scalings, or NULL on error - * - *
- * Notes:
- *    (1) This scales every pix in %pixas by each factor in %nasc.
- *        and puts the results in %pixad.
- *    (2) If %include == 1, the output %pixad contains all the pix
- *        in %pixas.  Otherwise, it doesn't, but pixaJoin() can be
- *        used later to join pixas with pixad.
- * 
- */ -PIXA * -pixaExtendByScaling(PIXA *pixas, - NUMA *nasc, - l_int32 type, - l_int32 include) -{ -l_int32 i, j, n, nsc, w, h, scalew, scaleh; -l_float32 scalefact; -PIX *pix1, *pix2; -PIXA *pixad; - - PROCNAME("pixaExtendByScaling"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas undefined", procName, NULL); - if (!nasc || numaGetCount(nasc) == 0) - return (PIXA *)ERROR_PTR("nasc undefined or empty", procName, NULL); - if (type != L_HORIZ && type != L_VERT && type != L_BOTH_DIRECTIONS) - return (PIXA *)ERROR_PTR("invalid type", procName, NULL); - - n = pixaGetCount(pixas); - nsc = numaGetCount(nasc); - if ((pixad = pixaCreate(n * (nsc + 1))) == NULL) { - L_ERROR("pixad not made: n = %d, nsc = %d\n", procName, n, nsc); - return NULL; - } - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - if (include) pixaAddPix(pixad, pix1, L_COPY); - pixGetDimensions(pix1, &w, &h, NULL); - for (j = 0; j < nsc; j++) { - numaGetFValue(nasc, j, &scalefact); - scalew = w; - scaleh = h; - if (type == L_HORIZ || type == L_BOTH_DIRECTIONS) - scalew = w * scalefact; - if (type == L_VERT || type == L_BOTH_DIRECTIONS) - scaleh = h * scalefact; - pix2 = pixScaleToSize(pix1, scalew, scaleh); - pixaAddPix(pixad, pix2, L_INSERT); - } - pixDestroy(&pix1); - } - return pixad; -} - - -/*-----------------------------------------------------------------* - * Iterative morphological seed filling * - *-----------------------------------------------------------------*/ -/*! - * \brief pixSeedfillMorph() - * - * \param[in] pixs seed - * \param[in] pixm mask - * \param[in] maxiters use 0 to go to completion - * \param[in] connectivity 4 or 8 - * \return pixd after filling into the mask or NULL on error - * - *
- * Notes:
- *    (1) This is in general a very inefficient method for filling
- *        from a seed into a mask.  Use it for a small number of iterations,
- *        but if you expect more than a few iterations, use
- *        pixSeedfillBinary().
- *    (2) We use a 3x3 brick SEL for 8-cc filling and a 3x3 plus SEL for 4-cc.
- * 
- */ -PIX * -pixSeedfillMorph(PIX *pixs, - PIX *pixm, - l_int32 maxiters, - l_int32 connectivity) -{ -l_int32 same, i; -PIX *pixt, *pixd, *temp; -SEL *sel_3; - - PROCNAME("pixSeedfillMorph"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (!pixm) - return (PIX *)ERROR_PTR("mask pix not defined", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not in {4,8}", procName, NULL); - if (maxiters <= 0) maxiters = 1000; - if (pixSizesEqual(pixs, pixm) == 0) - return (PIX *)ERROR_PTR("pix sizes unequal", procName, NULL); - - if ((sel_3 = selCreateBrick(3, 3, 1, 1, SEL_HIT)) == NULL) - return (PIX *)ERROR_PTR("sel_3 not made", procName, NULL); - if (connectivity == 4) { /* remove corner hits to make a '+' */ - selSetElement(sel_3, 0, 0, SEL_DONT_CARE); - selSetElement(sel_3, 2, 2, SEL_DONT_CARE); - selSetElement(sel_3, 2, 0, SEL_DONT_CARE); - selSetElement(sel_3, 0, 2, SEL_DONT_CARE); - } - - pixt = pixCopy(NULL, pixs); - pixd = pixCreateTemplate(pixs); - for (i = 1; i <= maxiters; i++) { - pixDilate(pixd, pixt, sel_3); - pixAnd(pixd, pixd, pixm); - pixEqual(pixd, pixt, &same); - if (same || i == maxiters) - break; - else - SWAP(pixt, pixd); - } - lept_stderr(" Num iters in binary reconstruction = %d\n", i); - - pixDestroy(&pixt); - selDestroy(&sel_3); - return pixd; -} - - -/*-----------------------------------------------------------------* - * Granulometry on binary images * - *-----------------------------------------------------------------*/ -/*! - * \brief pixRunHistogramMorph() - * - * \param[in] pixs 1 bpp - * \param[in] runtype L_RUN_OFF, L_RUN_ON - * \param[in] direction L_HORIZ, L_VERT - * \param[in] maxsize size of largest runlength counted - * \return numa of run-lengths - */ -NUMA * -pixRunHistogramMorph(PIX *pixs, - l_int32 runtype, - l_int32 direction, - l_int32 maxsize) -{ -l_int32 count, i, size; -l_float32 val; -NUMA *na, *nah; -PIX *pix1, *pix2, *pix3; -SEL *sel_2a; - - PROCNAME("pixRunHistogramMorph"); - - if (!pixs) - return (NUMA *)ERROR_PTR("seed pix not defined", procName, NULL); - if (runtype != L_RUN_OFF && runtype != L_RUN_ON) - return (NUMA *)ERROR_PTR("invalid run type", procName, NULL); - if (direction != L_HORIZ && direction != L_VERT) - return (NUMA *)ERROR_PTR("direction not in {L_HORIZ, L_VERT}", - procName, NULL); - if (pixGetDepth(pixs) != 1) - return (NUMA *)ERROR_PTR("pixs must be binary", procName, NULL); - - if (direction == L_HORIZ) - sel_2a = selCreateBrick(1, 2, 0, 0, SEL_HIT); - else /* direction == L_VERT */ - sel_2a = selCreateBrick(2, 1, 0, 0, SEL_HIT); - if (!sel_2a) - return (NUMA *)ERROR_PTR("sel_2a not made", procName, NULL); - - if (runtype == L_RUN_OFF) { - if ((pix1 = pixCopy(NULL, pixs)) == NULL) { - selDestroy(&sel_2a); - return (NUMA *)ERROR_PTR("pix1 not made", procName, NULL); - } - pixInvert(pix1, pix1); - } else { /* runtype == L_RUN_ON */ - pix1 = pixClone(pixs); - } - - /* Get pixel counts at different stages of erosion */ - na = numaCreate(0); - pix2 = pixCreateTemplate(pixs); - pix3 = pixCreateTemplate(pixs); - pixCountPixels(pix1, &count, NULL); - numaAddNumber(na, count); - pixErode(pix2, pix1, sel_2a); - pixCountPixels(pix2, &count, NULL); - numaAddNumber(na, count); - for (i = 0; i < maxsize / 2; i++) { - pixErode(pix3, pix2, sel_2a); - pixCountPixels(pix3, &count, NULL); - numaAddNumber(na, count); - pixErode(pix2, pix3, sel_2a); - pixCountPixels(pix2, &count, NULL); - numaAddNumber(na, count); - } - - /* Compute length histogram */ - size = numaGetCount(na); - nah = numaCreate(size); - numaAddNumber(nah, 0); /* number at length 0 */ - for (i = 1; i < size - 1; i++) { - val = na->array[i+1] - 2 * na->array[i] + na->array[i-1]; - numaAddNumber(nah, val); - } - - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - selDestroy(&sel_2a); - numaDestroy(&na); - return nah; -} - - -/*-----------------------------------------------------------------* - * Composite operations on grayscale images * - *-----------------------------------------------------------------*/ -/*! - * \brief pixTophat() - * - * \param[in] pixs 1 bpp - * \param[in] hsize of Sel; must be odd; origin implicitly in center - * \param[in] vsize ditto - * \param[in] type L_TOPHAT_WHITE: image - opening - * L_TOPHAT_BLACK: closing - image - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Sel is a brick with all elements being hits
- *      (2) If hsize = vsize = 1, returns an image with all 0 data.
- *      (3) The L_TOPHAT_WHITE flag emphasizes small bright regions,
- *          whereas the L_TOPHAT_BLACK flag emphasizes small dark regions.
- *          The L_TOPHAT_WHITE tophat can be accomplished by doing a
- *          L_TOPHAT_BLACK tophat on the inverse, or v.v.
- * 
- */ -PIX * -pixTophat(PIX *pixs, - l_int32 hsize, - l_int32 vsize, - l_int32 type) -{ -PIX *pixt, *pixd; - - PROCNAME("pixTophat"); - - if (!pixs) - return (PIX *)ERROR_PTR("seed pix not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize or vsize < 1", procName, NULL); - if ((hsize & 1) == 0 ) { - L_WARNING("horiz sel size must be odd; increasing by 1\n", procName); - hsize++; - } - if ((vsize & 1) == 0 ) { - L_WARNING("vert sel size must be odd; increasing by 1\n", procName); - vsize++; - } - if (type != L_TOPHAT_WHITE && type != L_TOPHAT_BLACK) - return (PIX *)ERROR_PTR("type must be L_TOPHAT_BLACK or L_TOPHAT_WHITE", - procName, NULL); - - if (hsize == 1 && vsize == 1) - return pixCreateTemplate(pixs); - - switch (type) - { - case L_TOPHAT_WHITE: - if ((pixt = pixOpenGray(pixs, hsize, vsize)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - pixd = pixSubtractGray(NULL, pixs, pixt); - pixDestroy(&pixt); - break; - case L_TOPHAT_BLACK: - if ((pixd = pixCloseGray(pixs, hsize, vsize)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixSubtractGray(pixd, pixd, pixs); - break; - default: - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - } - - return pixd; -} - - -/*! - * \brief pixHDome() - * - * \param[in] pixs 8 bpp, filling mask - * \param[in] height of seed below the filling maskhdome; must be >= 0 - * \param[in] connectivity 4 or 8 - * \return pixd 8 bpp, or NULL on error - * - *
- * Notes:
- *      (1) It is more efficient to use a connectivity of 4 for the fill.
- *      (2) This fills bumps to some level, and extracts the unfilled
- *          part of the bump.  To extract the troughs of basins, first
- *          invert pixs and then apply pixHDome().
- *      (3) It is useful to compare the HDome operation with the TopHat.
- *          The latter extracts peaks or valleys that have a width
- *          not exceeding the size of the structuring element used
- *          in the opening or closing, rsp.  The height of the peak is
- *          irrelevant.  By contrast, for the HDome, the gray seedfill
- *          is used to extract all peaks that have a height not exceeding
- *          a given value, regardless of their width!
- *      (4) Slightly more precisely, suppose you set 'height' = 40.
- *          Then all bumps in pixs with a height greater than or equal
- *          to 40 become, in pixd, bumps with a max value of exactly 40.
- *          All shorter bumps have a max value in pixd equal to the height
- *          of the bump.
- *      (5) The method: the filling mask, pixs, is the image whose peaks
- *          are to be extracted.  The height of a peak is the distance
- *          between the top of the peak and the highest "leak" to the
- *          outside -- think of a sombrero, where the leak occurs
- *          at the highest point on the rim.
- *            (a) Generate a seed, pixd, by subtracting some value, p, from
- *                each pixel in the filling mask, pixs.  The value p is
- *                the 'height' input to this function.
- *            (b) Fill in pixd starting with this seed, clipping by pixs,
- *                in the way described in seedfillGrayLow().  The filling
- *                stops before the peaks in pixs are filled.
- *                For peaks that have a height > p, pixd is filled to
- *                the level equal to the (top-of-the-peak - p).
- *                For peaks of height < p, the peak is left unfilled
- *                from its highest saddle point (the leak to the outside).
- *            (c) Subtract the filled seed (pixd) from the filling mask (pixs).
- *          Note that in this procedure, everything is done starting
- *          with the filling mask, pixs.
- *      (6) For segmentation, the resulting image, pixd, can be thresholded
- *          and used as a seed for another filling operation.
- * 
- */ -PIX * -pixHDome(PIX *pixs, - l_int32 height, - l_int32 connectivity) -{ -PIX *pixsd, *pixd; - - PROCNAME("pixHDome"); - - if (!pixs) - return (PIX *)ERROR_PTR("src pix not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (height < 0) - return (PIX *)ERROR_PTR("height not >= 0", procName, NULL); - if (height == 0) - return pixCreateTemplate(pixs); - - if ((pixsd = pixCopy(NULL, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixsd not made", procName, NULL); - pixAddConstantGray(pixsd, -height); - pixSeedfillGray(pixsd, pixs, connectivity); - pixd = pixSubtractGray(NULL, pixs, pixsd); - pixDestroy(&pixsd); - return pixd; -} - - -/*! - * \brief pixFastTophat() - * - * \param[in] pixs 8 bpp - * \param[in] xsize width of max/min op, smoothing; any integer >= 1 - * \param[in] ysize height of max/min op, smoothing; any integer >= 1 - * \param[in] type L_TOPHAT_WHITE: image - min - * L_TOPHAT_BLACK: max - image - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Don't be fooled. This is NOT a tophat.  It is a tophat-like
- *          operation, where the result is similar to what you'd get
- *          if you used an erosion instead of an opening, or a dilation
- *          instead of a closing.
- *      (2) Instead of opening or closing at full resolution, it does
- *          a fast downscale/minmax operation, then a quick small smoothing
- *          at low res, a replicative expansion of the "background"
- *          to full res, and finally a removal of the background level
- *          from the input image.  The smoothing step may not be important.
- *      (3) It does not remove noise as well as a tophat, but it is
- *          5 to 10 times faster.
- *          If you need the preciseness of the tophat, don't use this.
- *      (4) The L_TOPHAT_WHITE flag emphasizes small bright regions,
- *          whereas the L_TOPHAT_BLACK flag emphasizes small dark regions.
- * 
- */ -PIX * -pixFastTophat(PIX *pixs, - l_int32 xsize, - l_int32 ysize, - l_int32 type) -{ -PIX *pix1, *pix2, *pix3, *pixd; - - PROCNAME("pixFastTophat"); - - if (!pixs) - return (PIX *)ERROR_PTR("seed pix not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (xsize < 1 || ysize < 1) - return (PIX *)ERROR_PTR("size < 1", procName, NULL); - if (type != L_TOPHAT_WHITE && type != L_TOPHAT_BLACK) - return (PIX *)ERROR_PTR("type must be L_TOPHAT_BLACK or L_TOPHAT_WHITE", - procName, NULL); - - if (xsize == 1 && ysize == 1) - return pixCreateTemplate(pixs); - - switch (type) - { - case L_TOPHAT_WHITE: - if ((pix1 = pixScaleGrayMinMax(pixs, xsize, ysize, L_CHOOSE_MIN)) - == NULL) - return (PIX *)ERROR_PTR("pix1 not made", procName, NULL); - pix2 = pixBlockconv(pix1, 1, 1); /* small smoothing */ - pix3 = pixScaleBySampling(pix2, xsize, ysize); - pixd = pixSubtractGray(NULL, pixs, pix3); - pixDestroy(&pix3); - break; - case L_TOPHAT_BLACK: - if ((pix1 = pixScaleGrayMinMax(pixs, xsize, ysize, L_CHOOSE_MAX)) - == NULL) - return (PIX *)ERROR_PTR("pix1 not made", procName, NULL); - pix2 = pixBlockconv(pix1, 1, 1); /* small smoothing */ - pixd = pixScaleBySampling(pix2, xsize, ysize); - pixSubtractGray(pixd, pixd, pixs); - break; - default: - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - } - - pixDestroy(&pix1); - pixDestroy(&pix2); - return pixd; -} - - -/*! - * \brief pixMorphGradient() - * - * \param[in] pixs 8 bpp - * \param[in] hsize sel width; must be odd; origin implicitly in center - * \param[in] vsize sel height - * \param[in] smoothing half-width of convolution smoothing filter. - * The width is (2 * smoothing + 1, so 0 is no-op. - * \return pixd, or NULL on error - */ -PIX * -pixMorphGradient(PIX *pixs, - l_int32 hsize, - l_int32 vsize, - l_int32 smoothing) -{ -PIX *pixg, *pixd; - - PROCNAME("pixMorphGradient"); - - if (!pixs) - return (PIX *)ERROR_PTR("seed pix not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize or vsize < 1", procName, NULL); - if ((hsize & 1) == 0 ) { - L_WARNING("horiz sel size must be odd; increasing by 1\n", procName); - hsize++; - } - if ((vsize & 1) == 0 ) { - L_WARNING("vert sel size must be odd; increasing by 1\n", procName); - vsize++; - } - - /* Optionally smooth first to remove noise. - * If smoothing is 0, just get a copy */ - pixg = pixBlockconvGray(pixs, NULL, smoothing, smoothing); - - /* This gives approximately the gradient of a transition */ - pixd = pixDilateGray(pixg, hsize, vsize); - pixSubtractGray(pixd, pixd, pixg); - pixDestroy(&pixg); - return pixd; -} - - -/*-----------------------------------------------------------------* - * Centroid of component * - *-----------------------------------------------------------------*/ -/*! - * \brief pixaCentroids() - * - * \param[in] pixa of components; 1 or 8 bpp - * \return pta of centroids relative to the UL corner of - * each pix, or NULL on error - * - *
- * Notes:
- *      (1) An error message is returned if any pix has something other
- *          than 1 bpp or 8 bpp depth, and the centroid from that pix
- *          is saved as (0, 0).
- * 
- */ -PTA * -pixaCentroids(PIXA *pixa) -{ -l_int32 i, n; -l_int32 *centtab = NULL; -l_int32 *sumtab = NULL; -l_float32 x, y; -PIX *pix; -PTA *pta; - - PROCNAME("pixaCentroids"); - - if (!pixa) - return (PTA *)ERROR_PTR("pixa not defined", procName, NULL); - if ((n = pixaGetCount(pixa)) == 0) - return (PTA *)ERROR_PTR("no pix in pixa", procName, NULL); - - if ((pta = ptaCreate(n)) == NULL) - return (PTA *)ERROR_PTR("pta not defined", procName, NULL); - centtab = makePixelCentroidTab8(); - sumtab = makePixelSumTab8(); - - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - if (pixCentroid(pix, centtab, sumtab, &x, &y) == 1) - L_ERROR("centroid failure for pix %d\n", procName, i); - pixDestroy(&pix); - ptaAddPt(pta, x, y); - } - - LEPT_FREE(centtab); - LEPT_FREE(sumtab); - return pta; -} - - -/*! - * \brief pixCentroid() - * - * \param[in] pix 1 or 8 bpp - * \param[in] centtab [optional] table for finding centroids; can be null - * \param[in] sumtab [optional] table for finding pixel sums; can be null - * \param[out] pxave x coordinate of centroid, relative to the UL corner - * of the pix - * \param[out] pyave y coordinate of centroid, relative to the UL corner - * of the pix - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The sum and centroid tables are only used for 1 bpp.
- *      (2) Any table not passed in will be made internally and destroyed
- *          after use.
- * 
- */ -l_ok -pixCentroid(PIX *pix, - l_int32 *centtab, - l_int32 *sumtab, - l_float32 *pxave, - l_float32 *pyave) -{ -l_int32 w, h, d, i, j, wpl, pixsum, rowsum, val; -l_float32 xsum, ysum; -l_uint32 *data, *line; -l_uint32 word; -l_uint8 byte; -l_int32 *ctab, *stab; - - PROCNAME("pixCentroid"); - - if (!pxave || !pyave) - return ERROR_INT("&pxave and &pyave not defined", procName, 1); - *pxave = *pyave = 0.0; - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - pixGetDimensions(pix, &w, &h, &d); - if (d != 1 && d != 8) - return ERROR_INT("pix not 1 or 8 bpp", procName, 1); - - ctab = centtab; - stab = sumtab; - if (d == 1) { - pixSetPadBits(pix, 0); - if (!centtab) - ctab = makePixelCentroidTab8(); - if (!sumtab) - stab = makePixelSumTab8(); - } - - data = pixGetData(pix); - wpl = pixGetWpl(pix); - xsum = ysum = 0.0; - pixsum = 0; - if (d == 1) { - for (i = 0; i < h; i++) { - /* The body of this loop computes the sum of the set - * (1) bits on this row, weighted by their distance - * from the left edge of pix, and accumulates that into - * xsum; it accumulates their distance from the top - * edge of pix into ysum, and their total count into - * pixsum. It's equivalent to - * for (j = 0; j < w; j++) { - * if (GET_DATA_BIT(line, j)) { - * xsum += j; - * ysum += i; - * pixsum++; - * } - * } - */ - line = data + wpl * i; - rowsum = 0; - for (j = 0; j < wpl; j++) { - word = line[j]; - if (word) { - byte = word & 0xff; - rowsum += stab[byte]; - xsum += ctab[byte] + (j * 32 + 24) * stab[byte]; - byte = (word >> 8) & 0xff; - rowsum += stab[byte]; - xsum += ctab[byte] + (j * 32 + 16) * stab[byte]; - byte = (word >> 16) & 0xff; - rowsum += stab[byte]; - xsum += ctab[byte] + (j * 32 + 8) * stab[byte]; - byte = (word >> 24) & 0xff; - rowsum += stab[byte]; - xsum += ctab[byte] + j * 32 * stab[byte]; - } - } - pixsum += rowsum; - ysum += rowsum * i; - } - if (pixsum == 0) { - L_WARNING("no ON pixels in pix\n", procName); - } else { - *pxave = xsum / (l_float32)pixsum; - *pyave = ysum / (l_float32)pixsum; - } - } else { /* d == 8 */ - for (i = 0; i < h; i++) { - line = data + wpl * i; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(line, j); - xsum += val * j; - ysum += val * i; - pixsum += val; - } - } - if (pixsum == 0) { - L_WARNING("all pixels are 0\n", procName); - } else { - *pxave = xsum / (l_float32)pixsum; - *pyave = ysum / (l_float32)pixsum; - } - } - - if (!centtab) LEPT_FREE(ctab); - if (!sumtab) LEPT_FREE(stab); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/morphdwa.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/morphdwa.c deleted file mode 100644 index b362599e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/morphdwa.c +++ /dev/null @@ -1,1599 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file morphdwa.c - *
- *
- *    Binary morphological (dwa) ops with brick Sels
- *         PIX     *pixDilateBrickDwa()
- *         PIX     *pixErodeBrickDwa()
- *         PIX     *pixOpenBrickDwa()
- *         PIX     *pixCloseBrickDwa()
- *
- *    Binary composite morphological (dwa) ops with brick Sels
- *         PIX     *pixDilateCompBrickDwa()
- *         PIX     *pixErodeCompBrickDwa()
- *         PIX     *pixOpenCompBrickDwa()
- *         PIX     *pixCloseCompBrickDwa()
- *
- *    Binary extended composite morphological (dwa) ops with brick Sels
- *         PIX     *pixDilateCompBrickExtendDwa()
- *         PIX     *pixErodeCompBrickExtendDwa()
- *         PIX     *pixOpenCompBrickExtendDwa()
- *         PIX     *pixCloseCompBrickExtendDwa()
- *         l_int32  getExtendedCompositeParameters()
- *
- *    These are higher-level interfaces for dwa morphology with brick Sels.
- *    Because many morphological operations are performed using
- *    separable brick Sels, it is useful to have a simple interface
- *    for this.
- *
- *    We have included all 58 of the brick Sels that are generated
- *    by selaAddBasic().  These are sufficient for all the decomposable
- *    bricks up to size 63, which is the limit for dwa Sels with
- *    origins at the center of the Sel.
- *
- *    All three sets can be used as the basic interface for general
- *    brick operations.  Here are the internal calling sequences:
- *
- *      (1) If you try to apply a non-decomposable operation, such as
- *          pixErodeBrickDwa(), with a Sel size that doesn't exist,
- *          this calls a decomposable operation, pixErodeCompBrickDwa(),
- *          instead.  This can differ in linear Sel size by up to
- *          2 pixels from the request.
- *
- *      (2) If either Sel brick dimension is greater than 63, the extended
- *          composite function is called.
- *
- *      (3) The extended composite function calls the composite function
- *          a number of times with size 63, and once with size < 63.
- *          Because each operation with a size of 63 is done compositely
- *          with 7 x 9 (exactly 63), the net result is correct in
- *          length to within 2 pixels.
- *
- *    For composite operations, both using a comb and extended (beyond 63),
- *    horizontal and vertical operations are composed separately
- *    and sequentially.
- *
- *    We have also included use of all the 76 comb Sels that are generated
- *    by selaAddDwaCombs().  The generated code is in dwacomb.2.c
- *    and dwacomblow.2.c.  These are used for the composite dwa
- *    brick operations.
- *
- *    The non-composite brick operations, such as pixDilateBrickDwa(),
- *    will call the associated composite operation in situations where
- *    the requisite brick Sel has not been compiled into fmorphgen*.1.c.
- *
- *    If you want to use brick Sels that are not represented in the
- *    basic set of 58, you must generate the dwa code to implement them.
- *    You have three choices for how to use these:
- *
- *    (1) Add both the new Sels and the dwa code to the library:
- *        ~ For simplicity, add your new brick Sels to those defined
- *          in selaAddBasic().
- *        ~ Recompile the library.
- *        ~ Make prog/fmorphautogen.
- *        ~ Run prog/fmorphautogen, to generate new versions of the
- *          dwa code in fmorphgen.1.c and fmorphgenlow.1.c.
- *        ~ Copy these two files to src.
- *        ~ Recompile the library again.
- *        ~ Use the new brick Sels in your program and compile it.
- *
- *    (2) Make both the new Sels and dwa code outside the library,
- *        and link it directly to an executable:
- *        ~ Write a function to generate the new Sels in a Sela, and call
- *          fmorphautogen(sela, , filename) to generate the code.
- *        ~ Compile your program that uses the newly generated function
- *          pixMorphDwa_(), and link to the two new C files.
- *
- *    (3) Make the new Sels in the library and use the dwa code outside it:
- *        ~ Add code in the library to generate your new brick Sels.
- *          (It is suggested that you NOT add these Sels to the
- *          selaAddBasic() function; write a new function that generates
- *          a new Sela.)
- *        ~ Recompile the library.
- *        ~ Write a small program that generates the Sela and calls
- *          fmorphautogen(sela, , filename) to generate the code.
- *        ~ Compile your program that uses the newly generated function
- *          pixMorphDwa_(), and link to the two new C files.
- *       As an example of this approach, see prog/dwamorph*_reg.c:
- *        ~ added selaAddDwaLinear() to sel2.c
- *        ~ wrote dwamorph1_reg.c, to generate the dwa code.
- *        ~ compiled and linked the generated code with the application,
- *          dwamorph2_reg.c.  (Note: because this was a regression test,
- *          dwamorph1_reg also builds and runs the application program.)
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -#ifndef NO_CONSOLE_IO -#define DEBUG_SEL_LOOKUP 0 -#endif /* ~NO_CONSOLE_IO */ - -/*-----------------------------------------------------------------* - * Binary morphological (dwa) ops with brick Sels * - *-----------------------------------------------------------------*/ -/*! - * \brief pixDilateBrickDwa() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd - * - *
- * Notes:
- *      (1) These implement 2D brick Sels, using linear Sels generated
- *          with selaAddBasic().
- *      (2) A brick Sel has hits for all elements.
- *      (3) The origin of the Sel is at (x, y) = (hsize/2, vsize/2)
- *      (4) Do separably if both hsize and vsize are > 1.
- *      (5) It is necessary that both horizontal and vertical Sels
- *          of the input size are defined in the basic sela.
- *      (6) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (7) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixDilateBrickDwa(NULL, pixs, ...);
- *          (b) pixDilateBrickDwa(pixs, pixs, ...);
- *          (c) pixDilateBrickDwa(pixd, pixs, ...);
- *      (8) The size of pixd is determined by pixs.
- *      (9) If either linear Sel is not found, this calls
- *          the appropriate decomposible function.
- * 
- */ -PIX * -pixDilateBrickDwa(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -l_int32 found; -char *selnameh, *selnamev; -SELA *sela; -PIX *pixt1, *pixt2, *pixt3; - - PROCNAME("pixDilateBrickDwa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - - sela = selaAddBasic(NULL); - found = TRUE; - selnameh = selnamev = NULL; - if (hsize > 1) { - selnameh = selaGetBrickName(sela, hsize, 1); - if (!selnameh) found = FALSE; - } - if (vsize > 1) { - selnamev = selaGetBrickName(sela, 1, vsize); - if (!selnamev) found = FALSE; - } - selaDestroy(&sela); - if (!found) { - L_INFO("Calling the decomposable dwa function\n", procName); - if (selnameh) LEPT_FREE(selnameh); - if (selnamev) LEPT_FREE(selnamev); - return pixDilateCompBrickDwa(pixd, pixs, hsize, vsize); - } - - if (vsize == 1) { - pixt2 = pixMorphDwa_1(NULL, pixs, L_MORPH_DILATE, selnameh); - LEPT_FREE(selnameh); - } else if (hsize == 1) { - pixt2 = pixMorphDwa_1(NULL, pixs, L_MORPH_DILATE, selnamev); - LEPT_FREE(selnamev); - } else { - pixt1 = pixAddBorder(pixs, 32, 0); - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_DILATE, selnameh); - pixFMorphopGen_1(pixt1, pixt3, L_MORPH_DILATE, selnamev); - pixt2 = pixRemoveBorder(pixt1, 32); - pixDestroy(&pixt1); - pixDestroy(&pixt3); - LEPT_FREE(selnameh); - LEPT_FREE(selnamev); - } - - if (!pixd) - return pixt2; - - pixTransferAllData(pixd, &pixt2, 0, 0); - return pixd; -} - - -/*! - * \brief pixErodeBrickDwa() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd - * - *
- * Notes:
- *      (1) These implement 2D brick Sels, using linear Sels generated
- *          with selaAddBasic().
- *      (2) A brick Sel has hits for all elements.
- *      (3) The origin of the Sel is at (x, y) = (hsize/2, vsize/2)
- *      (4) Do separably if both hsize and vsize are > 1.
- *      (5) It is necessary that both horizontal and vertical Sels
- *          of the input size are defined in the basic sela.
- *      (6) Note that we must always set or clear the border pixels
- *          before each operation, depending on the the b.c.
- *          (symmetric or asymmetric).
- *      (7) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (8) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixErodeBrickDwa(NULL, pixs, ...);
- *          (b) pixErodeBrickDwa(pixs, pixs, ...);
- *          (c) pixErodeBrickDwa(pixd, pixs, ...);
- *      (9) The size of the result is determined by pixs.
- *      (10) If either linear Sel is not found, this calls
- *           the appropriate decomposible function.
- * 
- */ -PIX * -pixErodeBrickDwa(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -l_int32 found; -char *selnameh, *selnamev; -SELA *sela; -PIX *pixt1, *pixt2, *pixt3; - - PROCNAME("pixErodeBrickDwa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - - sela = selaAddBasic(NULL); - found = TRUE; - selnameh = selnamev = NULL; - if (hsize > 1) { - selnameh = selaGetBrickName(sela, hsize, 1); - if (!selnameh) found = FALSE; - } - if (vsize > 1) { - selnamev = selaGetBrickName(sela, 1, vsize); - if (!selnamev) found = FALSE; - } - selaDestroy(&sela); - if (!found) { - L_INFO("Calling the decomposable dwa function\n", procName); - if (selnameh) LEPT_FREE(selnameh); - if (selnamev) LEPT_FREE(selnamev); - return pixErodeCompBrickDwa(pixd, pixs, hsize, vsize); - } - - if (vsize == 1) { - pixt2 = pixMorphDwa_1(NULL, pixs, L_MORPH_ERODE, selnameh); - LEPT_FREE(selnameh); - } else if (hsize == 1) { - pixt2 = pixMorphDwa_1(NULL, pixs, L_MORPH_ERODE, selnamev); - LEPT_FREE(selnamev); - } else { - pixt1 = pixAddBorder(pixs, 32, 0); - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnameh); - pixFMorphopGen_1(pixt1, pixt3, L_MORPH_ERODE, selnamev); - pixt2 = pixRemoveBorder(pixt1, 32); - pixDestroy(&pixt1); - pixDestroy(&pixt3); - LEPT_FREE(selnameh); - LEPT_FREE(selnamev); - } - - if (!pixd) - return pixt2; - - pixTransferAllData(pixd, &pixt2, 0, 0); - return pixd; -} - - -/*! - * \brief pixOpenBrickDwa() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd - * - *
- * Notes:
- *      (1) These implement 2D brick Sels, using linear Sels generated
- *          with selaAddBasic().
- *      (2) A brick Sel has hits for all elements.
- *      (3) The origin of the Sel is at (x, y) = (hsize/2, vsize/2)
- *      (4) Do separably if both hsize and vsize are > 1.
- *      (5) It is necessary that both horizontal and vertical Sels
- *          of the input size are defined in the basic sela.
- *      (6) Note that we must always set or clear the border pixels
- *          before each operation, depending on the the b.c.
- *          (symmetric or asymmetric).
- *      (7) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (8) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixOpenBrickDwa(NULL, pixs, ...);
- *          (b) pixOpenBrickDwa(pixs, pixs, ...);
- *          (c) pixOpenBrickDwa(pixd, pixs, ...);
- *      (9) The size of the result is determined by pixs.
- *      (10) If either linear Sel is not found, this calls
- *           the appropriate decomposible function.
- * 
- */ -PIX * -pixOpenBrickDwa(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -l_int32 found; -char *selnameh, *selnamev; -SELA *sela; -PIX *pixt1, *pixt2, *pixt3; - - PROCNAME("pixOpenBrickDwa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - - sela = selaAddBasic(NULL); - found = TRUE; - selnameh = selnamev = NULL; - if (hsize > 1) { - selnameh = selaGetBrickName(sela, hsize, 1); - if (!selnameh) found = FALSE; - } - if (vsize > 1) { - selnamev = selaGetBrickName(sela, 1, vsize); - if (!selnamev) found = FALSE; - } - selaDestroy(&sela); - if (!found) { - L_INFO("Calling the decomposable dwa function\n", procName); - if (selnameh) LEPT_FREE(selnameh); - if (selnamev) LEPT_FREE(selnamev); - return pixOpenCompBrickDwa(pixd, pixs, hsize, vsize); - } - - pixt1 = pixAddBorder(pixs, 32, 0); - if (vsize == 1) { /* horizontal only */ - pixt2 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_OPEN, selnameh); - LEPT_FREE(selnameh); - } else if (hsize == 1) { /* vertical only */ - pixt2 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_OPEN, selnamev); - LEPT_FREE(selnamev); - } else { /* do separable */ - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnameh); - pixt2 = pixFMorphopGen_1(NULL, pixt3, L_MORPH_ERODE, selnamev); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_DILATE, selnameh); - pixFMorphopGen_1(pixt2, pixt3, L_MORPH_DILATE, selnamev); - LEPT_FREE(selnameh); - LEPT_FREE(selnamev); - pixDestroy(&pixt3); - } - pixt3 = pixRemoveBorder(pixt2, 32); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - - if (!pixd) - return pixt3; - - pixTransferAllData(pixd, &pixt3, 0, 0); - return pixd; -} - - -/*! - * \brief pixCloseBrickDwa() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd - * - *
- * Notes:
- *      (1) This is a 'safe' closing; we add an extra border of 32 OFF
- *          pixels for the standard asymmetric b.c.
- *      (2) These implement 2D brick Sels, using linear Sels generated
- *          with selaAddBasic().
- *      (3) A brick Sel has hits for all elements.
- *      (4) The origin of the Sel is at (x, y) = (hsize/2, vsize/2)
- *      (5) Do separably if both hsize and vsize are > 1.
- *      (6) It is necessary that both horizontal and vertical Sels
- *          of the input size are defined in the basic sela.
- *      (7) Note that we must always set or clear the border pixels
- *          before each operation, depending on the the b.c.
- *          (symmetric or asymmetric).
- *      (8) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (9) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixCloseBrickDwa(NULL, pixs, ...);
- *          (b) pixCloseBrickDwa(pixs, pixs, ...);
- *          (c) pixCloseBrickDwa(pixd, pixs, ...);
- *      (10) The size of the result is determined by pixs.
- *      (11) If either linear Sel is not found, this calls
- *           the appropriate decomposible function.
- * 
- */ -PIX * -pixCloseBrickDwa(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -l_int32 bordercolor, bordersize, found; -char *selnameh, *selnamev; -SELA *sela; -PIX *pixt1, *pixt2, *pixt3; - - PROCNAME("pixCloseBrickDwa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - - sela = selaAddBasic(NULL); - found = TRUE; - selnameh = selnamev = NULL; - if (hsize > 1) { - selnameh = selaGetBrickName(sela, hsize, 1); - if (!selnameh) found = FALSE; - } - if (vsize > 1) { - selnamev = selaGetBrickName(sela, 1, vsize); - if (!selnamev) found = FALSE; - } - selaDestroy(&sela); - if (!found) { - L_INFO("Calling the decomposable dwa function\n", procName); - if (selnameh) LEPT_FREE(selnameh); - if (selnamev) LEPT_FREE(selnamev); - return pixCloseCompBrickDwa(pixd, pixs, hsize, vsize); - } - - /* For "safe closing" with ASYMMETRIC_MORPH_BC, we always need - * an extra 32 OFF pixels around the image (in addition to - * the 32 added pixels for all dwa operations), whereas with - * SYMMETRIC_MORPH_BC this is not necessary. */ - bordercolor = getMorphBorderPixelColor(L_MORPH_ERODE, 1); - if (bordercolor == 0) /* asymmetric b.c. */ - bordersize = 64; - else /* symmetric b.c. */ - bordersize = 32; - pixt1 = pixAddBorder(pixs, bordersize, 0); - - if (vsize == 1) { /* horizontal only */ - pixt2 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_CLOSE, selnameh); - LEPT_FREE(selnameh); - } else if (hsize == 1) { /* vertical only */ - pixt2 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_CLOSE, selnamev); - LEPT_FREE(selnamev); - } else { /* do separable */ - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_DILATE, selnameh); - pixt2 = pixFMorphopGen_1(NULL, pixt3, L_MORPH_DILATE, selnamev); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_ERODE, selnameh); - pixFMorphopGen_1(pixt2, pixt3, L_MORPH_ERODE, selnamev); - LEPT_FREE(selnameh); - LEPT_FREE(selnamev); - pixDestroy(&pixt3); - } - pixt3 = pixRemoveBorder(pixt2, bordersize); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - - if (!pixd) - return pixt3; - - pixTransferAllData(pixd, &pixt3, 0, 0); - return pixd; -} - - -/*-----------------------------------------------------------------* - * Binary composite morphological (dwa) ops with brick Sels * - *-----------------------------------------------------------------*/ -/*! - * \brief pixDilateCompBrickDwa() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd - * - *
- * Notes:
- *      (1) These implement a separable composite dilation with 2D brick Sels.
- *      (2) For efficiency, it may decompose each linear morphological
- *          operation into two (brick + comb).
- *      (3) A brick Sel has hits for all elements.
- *      (4) The origin of the Sel is at (x, y) = (hsize/2, vsize/2)
- *      (5) Do separably if both hsize and vsize are > 1.
- *      (6) It is necessary that both horizontal and vertical Sels
- *          of the input size are defined in the basic sela.
- *      (7) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (8) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixDilateCompBrickDwa(NULL, pixs, ...);
- *          (b) pixDilateCompBrickDwa(pixs, pixs, ...);
- *          (c) pixDilateCompBrickDwa(pixd, pixs, ...);
- *      (9) The size of pixd is determined by pixs.
- *      (10) CAUTION: both hsize and vsize are being decomposed.
- *          The decomposer chooses a product of sizes (call them
- *          'terms') for each that is close to the input size,
- *           but not necessarily equal to it.  It attempts to optimize:
- *              (a) for consistency with the input values: the product
- *                  of terms is close to the input size
- *              (b) for efficiency of the operation: the sum of the
- *                  terms is small; ideally about twice the square
- *                   root of the input size.
- *           So, for example, if the input hsize = 37, which is
- *           a prime number, the decomposer will break this into two
- *           terms, 6 and 6, so that the net result is a dilation
- *           with hsize = 36.
- * 
- */ -PIX * -pixDilateCompBrickDwa(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -char *selnameh1, *selnameh2, *selnamev1, *selnamev2; -l_int32 hsize1, hsize2, vsize1, vsize2; -PIX *pixt1, *pixt2, *pixt3; - - PROCNAME("pixDilateCompBrickDwa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - if (hsize > 63 || vsize > 63) - return pixDilateCompBrickExtendDwa(pixd, pixs, hsize, vsize); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - - hsize1 = hsize2 = vsize1 = vsize2 = 1; - selnameh1 = selnameh2 = selnamev1 = selnamev2 = NULL; - if (hsize > 1) - getCompositeParameters(hsize, &hsize1, &hsize2, &selnameh1, - &selnameh2, NULL, NULL); - if (vsize > 1) - getCompositeParameters(vsize, &vsize1, &vsize2, NULL, NULL, - &selnamev1, &selnamev2); - -#if DEBUG_SEL_LOOKUP - lept_stderr("nameh1=%s, nameh2=%s, namev1=%s, namev2=%s\n", - selnameh1, selnameh2, selnamev1, selnamev2); - lept_stderr("hsize1=%d, hsize2=%d, vsize1=%d, vsize2=%d\n", - hsize1, hsize2, vsize1, vsize2); -#endif /* DEBUG_SEL_LOOKUP */ - - pixt1 = pixAddBorder(pixs, 64, 0); - if (vsize == 1) { - if (hsize2 == 1) { - pixt2 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_DILATE, selnameh1); - } else { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_DILATE, selnameh1); - pixt2 = pixFMorphopGen_2(NULL, pixt3, L_MORPH_DILATE, selnameh2); - pixDestroy(&pixt3); - } - } else if (hsize == 1) { - if (vsize2 == 1) { - pixt2 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_DILATE, selnamev1); - } else { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_DILATE, selnamev1); - pixt2 = pixFMorphopGen_2(NULL, pixt3, L_MORPH_DILATE, selnamev2); - pixDestroy(&pixt3); - } - } else { /* vsize and hsize both > 1 */ - if (hsize2 == 1) { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_DILATE, selnameh1); - } else { - pixt2 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_DILATE, selnameh1); - pixt3 = pixFMorphopGen_2(NULL, pixt2, L_MORPH_DILATE, selnameh2); - pixDestroy(&pixt2); - } - if (vsize2 == 1) { - pixt2 = pixFMorphopGen_1(NULL, pixt3, L_MORPH_DILATE, selnamev1); - } else { - pixt2 = pixFMorphopGen_1(NULL, pixt3, L_MORPH_DILATE, selnamev1); - pixFMorphopGen_2(pixt2, pixt2, L_MORPH_DILATE, selnamev2); - } - pixDestroy(&pixt3); - } - pixDestroy(&pixt1); - pixt1 = pixRemoveBorder(pixt2, 64); - pixDestroy(&pixt2); - if (selnameh1) LEPT_FREE(selnameh1); - if (selnameh2) LEPT_FREE(selnameh2); - if (selnamev1) LEPT_FREE(selnamev1); - if (selnamev2) LEPT_FREE(selnamev2); - - if (!pixd) - return pixt1; - - pixTransferAllData(pixd, &pixt1, 0, 0); - return pixd; -} - - -/*! - * \brief pixErodeCompBrickDwa() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd - * - *
- * Notes:
- *      (1) These implement a separable composite erosion with 2D brick Sels.
- *      (2) For efficiency, it may decompose each linear morphological
- *          operation into two (brick + comb).
- *      (3) A brick Sel has hits for all elements.
- *      (4) The origin of the Sel is at (x, y) = (hsize/2, vsize/2)
- *      (5) Do separably if both hsize and vsize are > 1.
- *      (6) It is necessary that both horizontal and vertical Sels
- *          of the input size are defined in the basic sela.
- *      (7) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (8) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixErodeCompBrickDwa(NULL, pixs, ...);
- *          (b) pixErodeCompBrickDwa(pixs, pixs, ...);
- *          (c) pixErodeCompBrickDwa(pixd, pixs, ...);
- *      (9) The size of pixd is determined by pixs.
- *      (10) CAUTION: both hsize and vsize are being decomposed.
- *          The decomposer chooses a product of sizes (call them
- *          'terms') for each that is close to the input size,
- *           but not necessarily equal to it.  It attempts to optimize:
- *              (a) for consistency with the input values: the product
- *                  of terms is close to the input size
- *              (b) for efficiency of the operation: the sum of the
- *                  terms is small; ideally about twice the square
- *                   root of the input size.
- *           So, for example, if the input hsize = 37, which is
- *           a prime number, the decomposer will break this into two
- *           terms, 6 and 6, so that the net result is a dilation
- *           with hsize = 36.
- * 
- */ -PIX * -pixErodeCompBrickDwa(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -char *selnameh1, *selnameh2, *selnamev1, *selnamev2; -l_int32 hsize1, hsize2, vsize1, vsize2, bordercolor; -PIX *pixt1, *pixt2, *pixt3; - - PROCNAME("pixErodeCompBrickDwa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - if (hsize > 63 || vsize > 63) - return pixErodeCompBrickExtendDwa(pixd, pixs, hsize, vsize); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - - hsize1 = hsize2 = vsize1 = vsize2 = 1; - selnameh1 = selnameh2 = selnamev1 = selnamev2 = NULL; - if (hsize > 1) - getCompositeParameters(hsize, &hsize1, &hsize2, &selnameh1, - &selnameh2, NULL, NULL); - if (vsize > 1) - getCompositeParameters(vsize, &vsize1, &vsize2, NULL, NULL, - &selnamev1, &selnamev2); - - /* For symmetric b.c., bordercolor == 1 for erosion */ - bordercolor = getMorphBorderPixelColor(L_MORPH_ERODE, 1); - pixt1 = pixAddBorder(pixs, 64, bordercolor); - - if (vsize == 1) { - if (hsize2 == 1) { - pixt2 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnameh1); - } else { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnameh1); - pixt2 = pixFMorphopGen_2(NULL, pixt3, L_MORPH_ERODE, selnameh2); - pixDestroy(&pixt3); - } - } else if (hsize == 1) { - if (vsize2 == 1) { - pixt2 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnamev1); - } else { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnamev1); - pixt2 = pixFMorphopGen_2(NULL, pixt3, L_MORPH_ERODE, selnamev2); - pixDestroy(&pixt3); - } - } else { /* vsize and hsize both > 1 */ - if (hsize2 == 1) { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnameh1); - } else { - pixt2 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnameh1); - pixt3 = pixFMorphopGen_2(NULL, pixt2, L_MORPH_ERODE, selnameh2); - pixDestroy(&pixt2); - } - if (vsize2 == 1) { - pixt2 = pixFMorphopGen_1(NULL, pixt3, L_MORPH_ERODE, selnamev1); - } else { - pixt2 = pixFMorphopGen_1(NULL, pixt3, L_MORPH_ERODE, selnamev1); - pixFMorphopGen_2(pixt2, pixt2, L_MORPH_ERODE, selnamev2); - } - pixDestroy(&pixt3); - } - pixDestroy(&pixt1); - pixt1 = pixRemoveBorder(pixt2, 64); - pixDestroy(&pixt2); - if (selnameh1) LEPT_FREE(selnameh1); - if (selnameh2) LEPT_FREE(selnameh2); - if (selnamev1) LEPT_FREE(selnamev1); - if (selnamev2) LEPT_FREE(selnamev2); - - if (!pixd) - return pixt1; - - pixTransferAllData(pixd, &pixt1, 0, 0); - return pixd; -} - - -/*! - * \brief pixOpenCompBrickDwa() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd - * - *
- * Notes:
- *      (1) These implement a separable composite opening with 2D brick Sels.
- *      (2) For efficiency, it may decompose each linear morphological
- *          operation into two (brick + comb).
- *      (3) A brick Sel has hits for all elements.
- *      (4) The origin of the Sel is at (x, y) = (hsize/2, vsize/2)
- *      (5) Do separably if both hsize and vsize are > 1.
- *      (6) It is necessary that both horizontal and vertical Sels
- *          of the input size are defined in the basic sela.
- *      (7) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (8) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixOpenCompBrickDwa(NULL, pixs, ...);
- *          (b) pixOpenCompBrickDwa(pixs, pixs, ...);
- *          (c) pixOpenCompBrickDwa(pixd, pixs, ...);
- *      (9) The size of pixd is determined by pixs.
- *      (10) CAUTION: both hsize and vsize are being decomposed.
- *          The decomposer chooses a product of sizes (call them
- *          'terms') for each that is close to the input size,
- *           but not necessarily equal to it.  It attempts to optimize:
- *              (a) for consistency with the input values: the product
- *                  of terms is close to the input size
- *              (b) for efficiency of the operation: the sum of the
- *                  terms is small; ideally about twice the square
- *                   root of the input size.
- *           So, for example, if the input hsize = 37, which is
- *           a prime number, the decomposer will break this into two
- *           terms, 6 and 6, so that the net result is a dilation
- *           with hsize = 36.
- * 
- */ -PIX * -pixOpenCompBrickDwa(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -char *selnameh1, *selnameh2, *selnamev1, *selnamev2; -l_int32 hsize1, hsize2, vsize1, vsize2, bordercolor; -PIX *pixt1, *pixt2, *pixt3; - - PROCNAME("pixOpenCompBrickDwa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - if (hsize > 63 || vsize > 63) - return pixOpenCompBrickExtendDwa(pixd, pixs, hsize, vsize); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - - hsize1 = hsize2 = vsize1 = vsize2 = 1; - selnameh1 = selnameh2 = selnamev1 = selnamev2 = NULL; - if (hsize > 1) - getCompositeParameters(hsize, &hsize1, &hsize2, &selnameh1, - &selnameh2, NULL, NULL); - if (vsize > 1) - getCompositeParameters(vsize, &vsize1, &vsize2, NULL, NULL, - &selnamev1, &selnamev2); - - /* For symmetric b.c., initialize erosion with bordercolor == 1 */ - bordercolor = getMorphBorderPixelColor(L_MORPH_ERODE, 1); - pixt1 = pixAddBorder(pixs, 64, bordercolor); - - if (vsize == 1) { - if (hsize2 == 1) { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnameh1); - if (bordercolor == 1) - pixSetOrClearBorder(pixt3, 64, 64, 64, 64, PIX_CLR); - pixt2 = pixFMorphopGen_1(NULL, pixt3, L_MORPH_DILATE, selnameh1); - } else { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnameh1); - pixt2 = pixFMorphopGen_2(NULL, pixt3, L_MORPH_ERODE, selnameh2); - if (bordercolor == 1) - pixSetOrClearBorder(pixt2, 64, 64, 64, 64, PIX_CLR); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_DILATE, selnameh1); - pixFMorphopGen_2(pixt2, pixt3, L_MORPH_DILATE, selnameh2); - } - } else if (hsize == 1) { - if (vsize2 == 1) { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnamev1); - if (bordercolor == 1) - pixSetOrClearBorder(pixt3, 64, 64, 64, 64, PIX_CLR); - pixt2 = pixFMorphopGen_1(NULL, pixt3, L_MORPH_DILATE, selnamev1); - } else { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnamev1); - pixt2 = pixFMorphopGen_2(NULL, pixt3, L_MORPH_ERODE, selnamev2); - if (bordercolor == 1) - pixSetOrClearBorder(pixt2, 64, 64, 64, 64, PIX_CLR); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_DILATE, selnamev1); - pixFMorphopGen_2(pixt2, pixt3, L_MORPH_DILATE, selnamev2); - } - } else { /* vsize and hsize both > 1 */ - if (hsize2 == 1 && vsize2 == 1) { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnameh1); - pixt2 = pixFMorphopGen_1(NULL, pixt3, L_MORPH_ERODE, selnamev1); - if (bordercolor == 1) - pixSetOrClearBorder(pixt2, 64, 64, 64, 64, PIX_CLR); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_DILATE, selnameh1); - pixFMorphopGen_1(pixt2, pixt3, L_MORPH_DILATE, selnamev1); - } else if (vsize2 == 1) { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnameh1); - pixt2 = pixFMorphopGen_2(NULL, pixt3, L_MORPH_ERODE, selnameh2); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_ERODE, selnamev1); - if (bordercolor == 1) - pixSetOrClearBorder(pixt3, 64, 64, 64, 64, PIX_CLR); - pixFMorphopGen_1(pixt2, pixt3, L_MORPH_DILATE, selnameh1); - pixFMorphopGen_2(pixt3, pixt2, L_MORPH_DILATE, selnameh2); - pixFMorphopGen_1(pixt2, pixt3, L_MORPH_DILATE, selnamev1); - } else if (hsize2 == 1) { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnameh1); - pixt2 = pixFMorphopGen_1(NULL, pixt3, L_MORPH_ERODE, selnamev1); - pixFMorphopGen_2(pixt3, pixt2, L_MORPH_ERODE, selnamev2); - if (bordercolor == 1) - pixSetOrClearBorder(pixt3, 64, 64, 64, 64, PIX_CLR); - pixFMorphopGen_1(pixt2, pixt3, L_MORPH_DILATE, selnameh1); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_DILATE, selnamev1); - pixFMorphopGen_2(pixt2, pixt3, L_MORPH_DILATE, selnamev2); - } else { /* both directions are combed */ - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_ERODE, selnameh1); - pixt2 = pixFMorphopGen_2(NULL, pixt3, L_MORPH_ERODE, selnameh2); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_ERODE, selnamev1); - pixFMorphopGen_2(pixt2, pixt3, L_MORPH_ERODE, selnamev2); - if (bordercolor == 1) - pixSetOrClearBorder(pixt2, 64, 64, 64, 64, PIX_CLR); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_DILATE, selnameh1); - pixFMorphopGen_2(pixt2, pixt3, L_MORPH_DILATE, selnameh2); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_DILATE, selnamev1); - pixFMorphopGen_2(pixt2, pixt3, L_MORPH_DILATE, selnamev2); - } - } - pixDestroy(&pixt3); - - pixDestroy(&pixt1); - pixt1 = pixRemoveBorder(pixt2, 64); - pixDestroy(&pixt2); - if (selnameh1) LEPT_FREE(selnameh1); - if (selnameh2) LEPT_FREE(selnameh2); - if (selnamev1) LEPT_FREE(selnamev1); - if (selnamev2) LEPT_FREE(selnamev2); - - if (!pixd) - return pixt1; - - pixTransferAllData(pixd, &pixt1, 0, 0); - return pixd; -} - - -/*! - * \brief pixCloseCompBrickDwa() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd - * - *
- * Notes:
- *      (1) This implements a separable composite safe closing with 2D
- *          brick Sels.
- *      (2) For efficiency, it may decompose each linear morphological
- *          operation into two (brick + comb).
- *      (3) A brick Sel has hits for all elements.
- *      (4) The origin of the Sel is at (x, y) = (hsize/2, vsize/2)
- *      (5) Do separably if both hsize and vsize are > 1.
- *      (6) It is necessary that both horizontal and vertical Sels
- *          of the input size are defined in the basic sela.
- *      (7) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (8) For clarity, if the case is known, use these patterns:
- *          (a) pixd = pixCloseCompBrickDwa(NULL, pixs, ...);
- *          (b) pixCloseCompBrickDwa(pixs, pixs, ...);
- *          (c) pixCloseCompBrickDwa(pixd, pixs, ...);
- *      (9) The size of pixd is determined by pixs.
- *      (10) CAUTION: both hsize and vsize are being decomposed.
- *          The decomposer chooses a product of sizes (call them
- *          'terms') for each that is close to the input size,
- *           but not necessarily equal to it.  It attempts to optimize:
- *              (a) for consistency with the input values: the product
- *                  of terms is close to the input size
- *              (b) for efficiency of the operation: the sum of the
- *                  terms is small; ideally about twice the square
- *                   root of the input size.
- *           So, for example, if the input hsize = 37, which is
- *           a prime number, the decomposer will break this into two
- *           terms, 6 and 6, so that the net result is a dilation
- *           with hsize = 36.
- * 
- */ -PIX * -pixCloseCompBrickDwa(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -char *selnameh1, *selnameh2, *selnamev1, *selnamev2; -l_int32 hsize1, hsize2, vsize1, vsize2, setborder; -PIX *pixt1, *pixt2, *pixt3; - - PROCNAME("pixCloseCompBrickDwa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - if (hsize > 63 || vsize > 63) - return pixCloseCompBrickExtendDwa(pixd, pixs, hsize, vsize); - - if (hsize == 1 && vsize == 1) - return pixCopy(pixd, pixs); - - hsize1 = hsize2 = vsize1 = vsize2 = 1; - selnameh1 = selnameh2 = selnamev1 = selnamev2 = NULL; - if (hsize > 1) - getCompositeParameters(hsize, &hsize1, &hsize2, &selnameh1, - &selnameh2, NULL, NULL); - if (vsize > 1) - getCompositeParameters(vsize, &vsize1, &vsize2, NULL, NULL, - &selnamev1, &selnamev2); - - pixt3 = NULL; - /* For symmetric b.c., PIX_SET border for erosions */ - setborder = getMorphBorderPixelColor(L_MORPH_ERODE, 1); - pixt1 = pixAddBorder(pixs, 64, 0); - - if (vsize == 1) { - if (hsize2 == 1) { - pixt2 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_CLOSE, selnameh1); - } else { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_DILATE, selnameh1); - pixt2 = pixFMorphopGen_2(NULL, pixt3, L_MORPH_DILATE, selnameh2); - if (setborder == 1) - pixSetOrClearBorder(pixt2, 64, 64, 64, 64, PIX_SET); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_ERODE, selnameh1); - pixFMorphopGen_2(pixt2, pixt3, L_MORPH_ERODE, selnameh2); - } - } else if (hsize == 1) { - if (vsize2 == 1) { - pixt2 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_CLOSE, selnamev1); - } else { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_DILATE, selnamev1); - pixt2 = pixFMorphopGen_2(NULL, pixt3, L_MORPH_DILATE, selnamev2); - if (setborder == 1) - pixSetOrClearBorder(pixt2, 64, 64, 64, 64, PIX_SET); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_ERODE, selnamev1); - pixFMorphopGen_2(pixt2, pixt3, L_MORPH_ERODE, selnamev2); - } - } else { /* vsize and hsize both > 1 */ - if (hsize2 == 1 && vsize2 == 1) { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_DILATE, selnameh1); - pixt2 = pixFMorphopGen_1(NULL, pixt3, L_MORPH_DILATE, selnamev1); - if (setborder == 1) - pixSetOrClearBorder(pixt2, 64, 64, 64, 64, PIX_SET); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_ERODE, selnameh1); - pixFMorphopGen_1(pixt2, pixt3, L_MORPH_ERODE, selnamev1); - } else if (vsize2 == 1) { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_DILATE, selnameh1); - pixt2 = pixFMorphopGen_2(NULL, pixt3, L_MORPH_DILATE, selnameh2); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_DILATE, selnamev1); - if (setborder == 1) - pixSetOrClearBorder(pixt3, 64, 64, 64, 64, PIX_SET); - pixFMorphopGen_1(pixt2, pixt3, L_MORPH_ERODE, selnameh1); - pixFMorphopGen_2(pixt3, pixt2, L_MORPH_ERODE, selnameh2); - pixFMorphopGen_1(pixt2, pixt3, L_MORPH_ERODE, selnamev1); - } else if (hsize2 == 1) { - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_DILATE, selnameh1); - pixt2 = pixFMorphopGen_1(NULL, pixt3, L_MORPH_DILATE, selnamev1); - pixFMorphopGen_2(pixt3, pixt2, L_MORPH_DILATE, selnamev2); - if (setborder == 1) - pixSetOrClearBorder(pixt3, 64, 64, 64, 64, PIX_SET); - pixFMorphopGen_1(pixt2, pixt3, L_MORPH_ERODE, selnameh1); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_ERODE, selnamev1); - pixFMorphopGen_2(pixt2, pixt3, L_MORPH_ERODE, selnamev2); - } else { /* both directions are combed */ - pixt3 = pixFMorphopGen_1(NULL, pixt1, L_MORPH_DILATE, selnameh1); - pixt2 = pixFMorphopGen_2(NULL, pixt3, L_MORPH_DILATE, selnameh2); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_DILATE, selnamev1); - pixFMorphopGen_2(pixt2, pixt3, L_MORPH_DILATE, selnamev2); - if (setborder == 1) - pixSetOrClearBorder(pixt2, 64, 64, 64, 64, PIX_SET); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_ERODE, selnameh1); - pixFMorphopGen_2(pixt2, pixt3, L_MORPH_ERODE, selnameh2); - pixFMorphopGen_1(pixt3, pixt2, L_MORPH_ERODE, selnamev1); - pixFMorphopGen_2(pixt2, pixt3, L_MORPH_ERODE, selnamev2); - } - } - pixDestroy(&pixt3); - - pixDestroy(&pixt1); - pixt1 = pixRemoveBorder(pixt2, 64); - pixDestroy(&pixt2); - if (selnameh1) LEPT_FREE(selnameh1); - if (selnameh2) LEPT_FREE(selnameh2); - if (selnamev1) LEPT_FREE(selnamev1); - if (selnamev2) LEPT_FREE(selnamev2); - - if (!pixd) - return pixt1; - - pixTransferAllData(pixd, &pixt1, 0, 0); - return pixd; -} - - -/*--------------------------------------------------------------------------* - * Binary expanded composite morphological (dwa) ops with brick Sels * - *--------------------------------------------------------------------------*/ -/*! - * \brief pixDilateCompBrickExtendDwa() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd - * - *
- * Notes:
- *      (1) Ankur Jain suggested and implemented extending the composite
- *          DWA operations beyond the 63 pixel limit.  This is a
- *          simplified and approximate implementation of the extension.
- *          This allows arbitrary Dwa morph operations using brick Sels,
- *          by decomposing the horizontal and vertical dilations into
- *          a sequence of 63-element dilations plus a dilation of size
- *          between 3 and 62.
- *      (2) The 63-element dilations are exact, whereas the extra dilation
- *          is approximate, because the underlying decomposition is
- *          in pixDilateCompBrickDwa().  See there for further details.
- *      (3) There are three cases:
- *          (a) pixd == null   (result into new pixd)
- *          (b) pixd == pixs   (in-place; writes result back to pixs)
- *          (c) pixd != pixs   (puts result into existing pixd)
- *      (4) There is no need to call this directly:  pixDilateCompBrickDwa()
- *          calls this function if either brick dimension exceeds 63.
- * 
- */ -PIX * -pixDilateCompBrickExtendDwa(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -l_int32 i, nops, nh, extrah, nv, extrav; -PIX *pixt1, *pixt2, *pixt3; - - PROCNAME("pixDilateCompBrickExtendDwa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize < 64 && vsize < 64) - return pixDilateCompBrickDwa(pixd, pixs, hsize, vsize); - - if (hsize > 63) - getExtendedCompositeParameters(hsize, &nh, &extrah, NULL); - if (vsize > 63) - getExtendedCompositeParameters(vsize, &nv, &extrav, NULL); - - /* Horizontal dilation first: pixs --> pixt2. Do not alter pixs. */ - pixt1 = pixCreateTemplate(pixs); /* temp image */ - if (hsize == 1) { - pixt2 = pixClone(pixs); - } else if (hsize < 64) { - pixt2 = pixDilateCompBrickDwa(NULL, pixs, hsize, 1); - } else if (hsize == 64) { /* approximate */ - pixt2 = pixDilateCompBrickDwa(NULL, pixs, 63, 1); - } else { - nops = (extrah < 3) ? nh : nh + 1; - if (nops & 1) { /* odd */ - if (extrah > 2) - pixt2 = pixDilateCompBrickDwa(NULL, pixs, extrah, 1); - else - pixt2 = pixDilateCompBrickDwa(NULL, pixs, 63, 1); - for (i = 0; i < nops / 2; i++) { - pixDilateCompBrickDwa(pixt1, pixt2, 63, 1); - pixDilateCompBrickDwa(pixt2, pixt1, 63, 1); - } - } else { /* nops even */ - if (extrah > 2) { - pixDilateCompBrickDwa(pixt1, pixs, extrah, 1); - pixt2 = pixDilateCompBrickDwa(NULL, pixt1, 63, 1); - } else { /* they're all 63s */ - pixDilateCompBrickDwa(pixt1, pixs, 63, 1); - pixt2 = pixDilateCompBrickDwa(NULL, pixt1, 63, 1); - } - for (i = 0; i < nops / 2 - 1; i++) { - pixDilateCompBrickDwa(pixt1, pixt2, 63, 1); - pixDilateCompBrickDwa(pixt2, pixt1, 63, 1); - } - } - } - - /* Vertical dilation: pixt2 --> pixt3. */ - if (vsize == 1) { - pixt3 = pixClone(pixt2); - } else if (vsize < 64) { - pixt3 = pixDilateCompBrickDwa(NULL, pixt2, 1, vsize); - } else if (vsize == 64) { /* approximate */ - pixt3 = pixDilateCompBrickDwa(NULL, pixt2, 1, 63); - } else { - nops = (extrav < 3) ? nv : nv + 1; - if (nops & 1) { /* odd */ - if (extrav > 2) - pixt3 = pixDilateCompBrickDwa(NULL, pixt2, 1, extrav); - else - pixt3 = pixDilateCompBrickDwa(NULL, pixt2, 1, 63); - for (i = 0; i < nops / 2; i++) { - pixDilateCompBrickDwa(pixt1, pixt3, 1, 63); - pixDilateCompBrickDwa(pixt3, pixt1, 1, 63); - } - } else { /* nops even */ - if (extrav > 2) { - pixDilateCompBrickDwa(pixt1, pixt2, 1, extrav); - pixt3 = pixDilateCompBrickDwa(NULL, pixt1, 1, 63); - } else { /* they're all 63s */ - pixDilateCompBrickDwa(pixt1, pixt2, 1, 63); - pixt3 = pixDilateCompBrickDwa(NULL, pixt1, 1, 63); - } - for (i = 0; i < nops / 2 - 1; i++) { - pixDilateCompBrickDwa(pixt1, pixt3, 1, 63); - pixDilateCompBrickDwa(pixt3, pixt1, 1, 63); - } - } - } - pixDestroy(&pixt1); - pixDestroy(&pixt2); - - if (!pixd) - return pixt3; - - pixTransferAllData(pixd, &pixt3, 0, 0); - return pixd; -} - - -/*! - * \brief pixErodeCompBrickExtendDwa() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd - * - *
- * Notes:
- *      (1) See pixDilateCompBrickExtendDwa() for usage.
- *      (2) There is no need to call this directly:  pixErodeCompBrickDwa()
- *          calls this function if either brick dimension exceeds 63.
- * 
- */ -PIX * -pixErodeCompBrickExtendDwa(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -l_int32 i, nops, nh, extrah, nv, extrav; -PIX *pixt1, *pixt2, *pixt3; - - PROCNAME("pixErodeCompBrickExtendDwa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - if (hsize < 64 && vsize < 64) - return pixErodeCompBrickDwa(pixd, pixs, hsize, vsize); - - if (hsize > 63) - getExtendedCompositeParameters(hsize, &nh, &extrah, NULL); - if (vsize > 63) - getExtendedCompositeParameters(vsize, &nv, &extrav, NULL); - - /* Horizontal erosion first: pixs --> pixt2. Do not alter pixs. */ - pixt1 = pixCreateTemplate(pixs); /* temp image */ - if (hsize == 1) { - pixt2 = pixClone(pixs); - } else if (hsize < 64) { - pixt2 = pixErodeCompBrickDwa(NULL, pixs, hsize, 1); - } else if (hsize == 64) { /* approximate */ - pixt2 = pixErodeCompBrickDwa(NULL, pixs, 63, 1); - } else { - nops = (extrah < 3) ? nh : nh + 1; - if (nops & 1) { /* odd */ - if (extrah > 2) - pixt2 = pixErodeCompBrickDwa(NULL, pixs, extrah, 1); - else - pixt2 = pixErodeCompBrickDwa(NULL, pixs, 63, 1); - for (i = 0; i < nops / 2; i++) { - pixErodeCompBrickDwa(pixt1, pixt2, 63, 1); - pixErodeCompBrickDwa(pixt2, pixt1, 63, 1); - } - } else { /* nops even */ - if (extrah > 2) { - pixErodeCompBrickDwa(pixt1, pixs, extrah, 1); - pixt2 = pixErodeCompBrickDwa(NULL, pixt1, 63, 1); - } else { /* they're all 63s */ - pixErodeCompBrickDwa(pixt1, pixs, 63, 1); - pixt2 = pixErodeCompBrickDwa(NULL, pixt1, 63, 1); - } - for (i = 0; i < nops / 2 - 1; i++) { - pixErodeCompBrickDwa(pixt1, pixt2, 63, 1); - pixErodeCompBrickDwa(pixt2, pixt1, 63, 1); - } - } - } - - /* Vertical erosion: pixt2 --> pixt3. */ - if (vsize == 1) { - pixt3 = pixClone(pixt2); - } else if (vsize < 64) { - pixt3 = pixErodeCompBrickDwa(NULL, pixt2, 1, vsize); - } else if (vsize == 64) { /* approximate */ - pixt3 = pixErodeCompBrickDwa(NULL, pixt2, 1, 63); - } else { - nops = (extrav < 3) ? nv : nv + 1; - if (nops & 1) { /* odd */ - if (extrav > 2) - pixt3 = pixErodeCompBrickDwa(NULL, pixt2, 1, extrav); - else - pixt3 = pixErodeCompBrickDwa(NULL, pixt2, 1, 63); - for (i = 0; i < nops / 2; i++) { - pixErodeCompBrickDwa(pixt1, pixt3, 1, 63); - pixErodeCompBrickDwa(pixt3, pixt1, 1, 63); - } - } else { /* nops even */ - if (extrav > 2) { - pixErodeCompBrickDwa(pixt1, pixt2, 1, extrav); - pixt3 = pixErodeCompBrickDwa(NULL, pixt1, 1, 63); - } else { /* they're all 63s */ - pixErodeCompBrickDwa(pixt1, pixt2, 1, 63); - pixt3 = pixErodeCompBrickDwa(NULL, pixt1, 1, 63); - } - for (i = 0; i < nops / 2 - 1; i++) { - pixErodeCompBrickDwa(pixt1, pixt3, 1, 63); - pixErodeCompBrickDwa(pixt3, pixt1, 1, 63); - } - } - } - pixDestroy(&pixt1); - pixDestroy(&pixt2); - - if (!pixd) - return pixt3; - - pixTransferAllData(pixd, &pixt3, 0, 0); - return pixd; -} - - -/*! - * \brief pixOpenCompBrickExtendDwa() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd - * - *
- * Notes:
- *      1) There are three cases:
- *          a) pixd == null   (result into new pixd
- *          b) pixd == pixs   (in-place; writes result back to pixs
- *          c) pixd != pixs   (puts result into existing pixd
- *      2) There is no need to call this directly:  pixOpenCompBrickDwa(
- *          calls this function if either brick dimension exceeds 63.
- * 
- */ -PIX * -pixOpenCompBrickExtendDwa(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -PIX *pixt; - - PROCNAME("pixOpenCompBrickExtendDwa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - pixt = pixErodeCompBrickExtendDwa(NULL, pixs, hsize, vsize); - pixd = pixDilateCompBrickExtendDwa(pixd, pixt, hsize, vsize); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixCloseCompBrickExtendDwa() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs 1 bpp - * \param[in] hsize width of brick Sel - * \param[in] vsize height of brick Sel - * \return pixd - * - *
- * Notes:
- *      1) There are three cases:
- *          a) pixd == null   (result into new pixd
- *          b) pixd == pixs   (in-place; writes result back to pixs
- *          c) pixd != pixs   (puts result into existing pixd
- *      2) There is no need to call this directly:  pixCloseCompBrickDwa(
- *          calls this function if either brick dimension exceeds 63.
- * 
- */ -PIX * -pixCloseCompBrickExtendDwa(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize) -{ -l_int32 bordercolor, borderx, bordery; -PIX *pixt1, *pixt2, *pixt3; - - PROCNAME("pixCloseCompBrickExtendDwa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - if (hsize < 1 || vsize < 1) - return (PIX *)ERROR_PTR("hsize and vsize not >= 1", procName, pixd); - - /* For "safe closing" with ASYMMETRIC_MORPH_BC, we always need - * an extra 32 OFF pixels around the image (in addition to - * the 32 added pixels for all dwa operations), whereas with - * SYMMETRIC_MORPH_BC this is not necessary. */ - bordercolor = getMorphBorderPixelColor(L_MORPH_ERODE, 1); - if (bordercolor == 0) { /* asymmetric b.c. */ - borderx = 32 + (hsize / 64) * 32; - bordery = 32 + (vsize / 64) * 32; - } else { /* symmetric b.c. */ - borderx = bordery = 32; - } - pixt1 = pixAddBorderGeneral(pixs, borderx, borderx, bordery, bordery, 0); - - pixt2 = pixDilateCompBrickExtendDwa(NULL, pixt1, hsize, vsize); - pixErodeCompBrickExtendDwa(pixt1, pixt2, hsize, vsize); - - pixt3 = pixRemoveBorderGeneral(pixt1, borderx, borderx, bordery, bordery); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - - if (!pixd) - return pixt3; - - pixTransferAllData(pixd, &pixt3, 0, 0); - return pixd; -} - - -/*! - * \brief getExtendedCompositeParameters() - * - * \param[in] size of linear Sel - * \param[out] pn number of 63 wide convolutions - * \param[out] pextra size of extra Sel - * \param[out] pactualsize [optional] actual size used in operation - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The DWA implementation allows Sels to be used with hits
- *          up to 31 pixels from the origin, either horizontally or
- *          vertically.  Larger Sels can be used if decomposed into
- *          a set of operations with Sels not exceeding 63 pixels
- *          in either width or height (and with the origin as close
- *          to the center of the Sel as possible).
- *      (2) This returns the decomposition of a linear Sel of length
- *          %size into a set of %n Sels of length 63 plus an extra
- *          Sel of length %extra.
- *      (3) For notation, let w == %size, n == %n, and e == %extra.
- *          We have 1 < e < 63.
- *
- *          Then if w < 64, we have n = 0 and e = w.
- *          The general formula for w > 63 is:
- *             w = 63 + (n - 1) * 62 + (e - 1)
- *
- *          Where did this come from?  Each successive convolution with
- *          a Sel of length L adds a total length (L - 1) to w.
- *          This accounts for using 62 for each additional Sel of size 63,
- *          and using (e - 1) for the additional Sel of size e.
- *
- *          Solving for n and e for w > 63:
- *             n = 1 + Int((w - 63) / 62)
- *             e = w - 63 - (n - 1) * 62 + 1
- *
- *          The extra part is decomposed into two factors f1 and f2,
- *          and the actual size of the extra part is
- *             e' = f1 * f2
- *          Then the actual width is:
- *             w' = 63 + (n - 1) * 62 + f1 * f2 - 1
- * 
- */ -l_ok -getExtendedCompositeParameters(l_int32 size, - l_int32 *pn, - l_int32 *pextra, - l_int32 *pactualsize) -{ -l_int32 n, extra, fact1, fact2; - - PROCNAME("getExtendedCompositeParameters"); - - if (!pn || !pextra) - return ERROR_INT("&n and &extra not both defined", procName, 1); - - if (size <= 63) { - n = 0; - extra = L_MIN(1, size); - } else { /* size > 63 */ - n = 1 + (l_int32)((size - 63) / 62); - extra = size - 63 - (n - 1) * 62 + 1; - } - - if (pactualsize) { - selectComposableSizes(extra, &fact1, &fact2); - *pactualsize = 63 + (n - 1) * 62 + fact1 * fact2 - 1; - } - - *pn = n; - *pextra = extra; - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/morphseq.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/morphseq.c deleted file mode 100644 index e98bb3b9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/morphseq.c +++ /dev/null @@ -1,1243 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file morphseq.c - *
- *
- *      Run a sequence of binary rasterop morphological operations
- *            PIX     *pixMorphSequence()
- *
- *      Run a sequence of binary composite rasterop morphological operations
- *            PIX     *pixMorphCompSequence()
- *
- *      Run a sequence of binary dwa morphological operations
- *            PIX     *pixMorphSequenceDwa()
- *
- *      Run a sequence of binary composite dwa morphological operations
- *            PIX     *pixMorphCompSequenceDwa()
- *
- *      Parser verifier for binary morphological operations
- *            l_int32  morphSequenceVerify()
- *
- *      Run a sequence of grayscale morphological operations
- *            PIX     *pixGrayMorphSequence()
- *
- *      Run a sequence of color morphological operations
- *            PIX     *pixColorMorphSequence()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*-------------------------------------------------------------------------* - * Run a sequence of binary rasterop morphological operations * - *-------------------------------------------------------------------------*/ -/*! - * \brief pixMorphSequence() - * - * \param[in] pixs - * \param[in] sequence string specifying sequence - * \param[in] dispsep controls debug display results in the sequence: - * 0: no output - * > 0: gives horizontal separation in pixels between - * successive displays - * < 0: pdf output; abs(dispsep) is used for naming - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This does rasterop morphology on binary images.
- *      (2) This runs a pipeline of operations; no branching is allowed.
- *      (3) This only uses brick Sels, which are created on the fly.
- *          In the future this will be generalized to extract Sels from
- *          a Sela by name.
- *      (4) A new image is always produced; the input image is not changed.
- *      (5) This contains an interpreter, allowing sequences to be
- *          generated and run.
- *      (6) The format of the sequence string is defined below.
- *      (7) In addition to morphological operations, rank order reduction
- *          and replicated expansion allow operations to take place
- *          downscaled by a power of 2.
- *      (8) Intermediate results can optionally be displayed.
- *      (9) Thanks to Dar-Shyang Lee, who had the idea for this and
- *          built the first implementation.
- *      (10) The sequence string is formatted as follows:
- *            ~ An arbitrary number of operations,  each separated
- *              by a '+' character.  White space is ignored.
- *            ~ Each operation begins with a case-independent character
- *              specifying the operation:
- *                 d or D  (dilation)
- *                 e or E  (erosion)
- *                 o or O  (opening)
- *                 c or C  (closing)
- *                 r or R  (rank binary reduction)
- *                 x or X  (replicative binary expansion)
- *                 b or B  (add a border of 0 pixels of this size)
- *            ~ The args to the morphological operations are bricks of hits,
- *              and are formatted as a.b, where a and b are horizontal and
- *              vertical dimensions, rsp.
- *            ~ The args to the reduction are a sequence of up to 4 integers,
- *              each from 1 to 4.
- *            ~ The arg to the expansion is a power of two, in the set
- *              {2, 4, 8, 16}.
- *      (11) An example valid sequence is:
- *               "b32 + o1.3 + C3.1 + r23 + e2.2 + D3.2 + X4"
- *           In this example, the following operation sequence is carried out:
- *             * b32: Add a 32 pixel border around the input image
- *             * o1.3: Opening with vert sel of length 3 (e.g., 1 x 3)
- *             * C3.1: Closing with horiz sel of length 3  (e.g., 3 x 1)
- *             * r23: Two successive 2x2 reductions with rank 2 in the first
- *                    and rank 3 in the second.  The result is a 4x reduced pix.
- *             * e2.2: Erosion with a 2x2 sel (origin will be at x,y: 0,0)
- *             * d3.2: Dilation with a 3x2 sel (origin will be at x,y: 1,0)
- *             * X4: 4x replicative expansion, back to original resolution
- *      (12) The safe closing is used.  However, if you implement a
- *           closing as separable dilations followed by separable erosions,
- *           it will not be safe.  For that situation, you need to add
- *           a sufficiently large border as the first operation in
- *           the sequence.  This will be removed automatically at the
- *           end.  There are two cautions:
- *              ~ When computing what is sufficient, remember that if
- *                reductions are carried out, the border is also reduced.
- *              ~ The border is removed at the end, so if a border is
- *                added at the beginning, the result must be at the
- *                same resolution as the input!
- * 
- */ -PIX * -pixMorphSequence(PIX *pixs, - const char *sequence, - l_int32 dispsep) -{ -char *rawop, *op; -char fname[256]; -l_int32 nops, i, j, nred, fact, w, h, x, border, pdfout; -l_int32 level[4]; -PIX *pix1, *pix2; -PIXA *pixa; -SARRAY *sa; - - PROCNAME("pixMorphSequence"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!sequence) - return (PIX *)ERROR_PTR("sequence not defined", procName, NULL); - - /* Split sequence into individual operations */ - sa = sarrayCreate(0); - sarraySplitString(sa, sequence, "+"); - nops = sarrayGetCount(sa); - pdfout = (dispsep < 0) ? 1 : 0; - if (!morphSequenceVerify(sa)) { - sarrayDestroy(&sa); - return (PIX *)ERROR_PTR("sequence not valid", procName, NULL); - } - - /* Parse and operate */ - pixa = NULL; - if (pdfout) { - pixa = pixaCreate(0); - pixaAddPix(pixa, pixs, L_CLONE); - } - border = 0; - pix1 = pixCopy(NULL, pixs); - pix2 = NULL; - x = 0; - for (i = 0; i < nops; i++) { - rawop = sarrayGetString(sa, i, L_NOCOPY); - op = stringRemoveChars(rawop, " \n\t"); - switch (op[0]) - { - case 'd': - case 'D': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixDilateBrick(NULL, pix1, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'e': - case 'E': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixErodeBrick(NULL, pix1, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'o': - case 'O': - sscanf(&op[1], "%d.%d", &w, &h); - pixOpenBrick(pix1, pix1, w, h); - break; - case 'c': - case 'C': - sscanf(&op[1], "%d.%d", &w, &h); - pixCloseSafeBrick(pix1, pix1, w, h); - break; - case 'r': - case 'R': - nred = strlen(op) - 1; - for (j = 0; j < nred; j++) - level[j] = op[j + 1] - '0'; - for (j = nred; j < 4; j++) - level[j] = 0; - pix2 = pixReduceRankBinaryCascade(pix1, level[0], level[1], - level[2], level[3]); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'x': - case 'X': - sscanf(&op[1], "%d", &fact); - pix2 = pixExpandReplicate(pix1, fact); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'b': - case 'B': - sscanf(&op[1], "%d", &border); - pix2 = pixAddBorder(pix1, border, 0); - pixSwapAndDestroy(&pix1, &pix2); - break; - default: - /* All invalid ops are caught in the first pass */ - break; - } - LEPT_FREE(op); - - /* Debug output */ - if (dispsep > 0) { - pixDisplay(pix1, x, 0); - x += dispsep; - } - if (pdfout) - pixaAddPix(pixa, pix1, L_COPY); - } - if (border > 0) { - pix2 = pixRemoveBorder(pix1, border); - pixSwapAndDestroy(&pix1, &pix2); - } - - if (pdfout) { - snprintf(fname, sizeof(fname), "/tmp/lept/seq_output_%d.pdf", - L_ABS(dispsep)); - pixaConvertToPdf(pixa, 0, 1.0, L_FLATE_ENCODE, 0, fname, fname); - pixaDestroy(&pixa); - } - - sarrayDestroy(&sa); - return pix1; -} - - -/*-------------------------------------------------------------------------* - * Run a sequence of binary composite rasterop morphological operations * - *-------------------------------------------------------------------------*/ -/*! - * \brief pixMorphCompSequence() - * - * \param[in] pixs - * \param[in] sequence string specifying sequence - * \param[in] dispsep controls debug display of results in the sequence: - * 0: no output - * > 0: gives horizontal separation in pixels between - * successive displays - * < 0: pdf output; abs(dispsep) is used for naming - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This does rasterop morphology on binary images, using composite
- *          operations for extra speed on large Sels.
- *      (2) Safe closing is used atomically.  However, if you implement a
- *          closing as a sequence with a dilation followed by an
- *          erosion, it will not be safe, and to ensure that you have
- *          no boundary effects you must add a border in advance and
- *          remove it at the end.
- *      (3) For other usage details, see the notes for pixMorphSequence().
- *      (4) The sequence string is formatted as follows:
- *            ~ An arbitrary number of operations,  each separated
- *              by a '+' character.  White space is ignored.
- *            ~ Each operation begins with a case-independent character
- *              specifying the operation:
- *                 d or D  (dilation)
- *                 e or E  (erosion)
- *                 o or O  (opening)
- *                 c or C  (closing)
- *                 r or R  (rank binary reduction)
- *                 x or X  (replicative binary expansion)
- *                 b or B  (add a border of 0 pixels of this size)
- *            ~ The args to the morphological operations are bricks of hits,
- *              and are formatted as a.b, where a and b are horizontal and
- *              vertical dimensions, rsp.
- *            ~ The args to the reduction are a sequence of up to 4 integers,
- *              each from 1 to 4.
- *            ~ The arg to the expansion is a power of two, in the set
- *              {2, 4, 8, 16}.
- * 
- */ -PIX * -pixMorphCompSequence(PIX *pixs, - const char *sequence, - l_int32 dispsep) -{ -char *rawop, *op; -char fname[256]; -l_int32 nops, i, j, nred, fact, w, h, x, border, pdfout; -l_int32 level[4]; -PIX *pix1, *pix2; -PIXA *pixa; -SARRAY *sa; - - PROCNAME("pixMorphCompSequence"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!sequence) - return (PIX *)ERROR_PTR("sequence not defined", procName, NULL); - - /* Split sequence into individual operations */ - sa = sarrayCreate(0); - sarraySplitString(sa, sequence, "+"); - nops = sarrayGetCount(sa); - pdfout = (dispsep < 0) ? 1 : 0; - - if (!morphSequenceVerify(sa)) { - sarrayDestroy(&sa); - return (PIX *)ERROR_PTR("sequence not valid", procName, NULL); - } - - /* Parse and operate */ - pixa = NULL; - if (pdfout) { - pixa = pixaCreate(0); - pixaAddPix(pixa, pixs, L_CLONE); - } - border = 0; - pix1 = pixCopy(NULL, pixs); - pix2 = NULL; - x = 0; - for (i = 0; i < nops; i++) { - rawop = sarrayGetString(sa, i, L_NOCOPY); - op = stringRemoveChars(rawop, " \n\t"); - switch (op[0]) - { - case 'd': - case 'D': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixDilateCompBrick(NULL, pix1, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'e': - case 'E': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixErodeCompBrick(NULL, pix1, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'o': - case 'O': - sscanf(&op[1], "%d.%d", &w, &h); - pixOpenCompBrick(pix1, pix1, w, h); - break; - case 'c': - case 'C': - sscanf(&op[1], "%d.%d", &w, &h); - pixCloseSafeCompBrick(pix1, pix1, w, h); - break; - case 'r': - case 'R': - nred = strlen(op) - 1; - for (j = 0; j < nred; j++) - level[j] = op[j + 1] - '0'; - for (j = nred; j < 4; j++) - level[j] = 0; - pix2 = pixReduceRankBinaryCascade(pix1, level[0], level[1], - level[2], level[3]); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'x': - case 'X': - sscanf(&op[1], "%d", &fact); - pix2 = pixExpandReplicate(pix1, fact); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'b': - case 'B': - sscanf(&op[1], "%d", &border); - pix2 = pixAddBorder(pix1, border, 0); - pixSwapAndDestroy(&pix1, &pix2); - break; - default: - /* All invalid ops are caught in the first pass */ - break; - } - LEPT_FREE(op); - - /* Debug output */ - if (dispsep > 0) { - pixDisplay(pix1, x, 0); - x += dispsep; - } - if (pdfout) - pixaAddPix(pixa, pix1, L_COPY); - } - if (border > 0) { - pix2 = pixRemoveBorder(pix1, border); - pixSwapAndDestroy(&pix1, &pix2); - } - - if (pdfout) { - snprintf(fname, sizeof(fname), "/tmp/lept/seq_output_%d.pdf", - L_ABS(dispsep)); - pixaConvertToPdf(pixa, 0, 1.0, L_FLATE_ENCODE, 0, fname, fname); - pixaDestroy(&pixa); - } - - sarrayDestroy(&sa); - return pix1; -} - - -/*-------------------------------------------------------------------------* - * Run a sequence of binary dwa morphological operations * - *-------------------------------------------------------------------------*/ -/*! - * \brief pixMorphSequenceDwa() - * - * \param[in] pixs - * \param[in] sequence string specifying sequence - * \param[in] dispsep controls debug display of results in the sequence: - * 0: no output - * > 0: gives horizontal separation in pixels between - * successive displays - * < 0: pdf output; abs(dispsep) is used for naming - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This does dwa morphology on binary images.
- *      (2) This runs a pipeline of operations; no branching is allowed.
- *      (3) This only uses brick Sels that have been pre-compiled with
- *          dwa code.
- *      (4) A new image is always produced; the input image is not changed.
- *      (5) This contains an interpreter, allowing sequences to be
- *          generated and run.
- *      (6) See pixMorphSequence() for further information about usage.
- * 
- */ -PIX * -pixMorphSequenceDwa(PIX *pixs, - const char *sequence, - l_int32 dispsep) -{ -char *rawop, *op; -char fname[256]; -l_int32 nops, i, j, nred, fact, w, h, x, border, pdfout; -l_int32 level[4]; -PIX *pix1, *pix2; -PIXA *pixa; -SARRAY *sa; - - PROCNAME("pixMorphSequenceDwa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!sequence) - return (PIX *)ERROR_PTR("sequence not defined", procName, NULL); - - /* Split sequence into individual operations */ - sa = sarrayCreate(0); - sarraySplitString(sa, sequence, "+"); - nops = sarrayGetCount(sa); - pdfout = (dispsep < 0) ? 1 : 0; - - if (!morphSequenceVerify(sa)) { - sarrayDestroy(&sa); - return (PIX *)ERROR_PTR("sequence not valid", procName, NULL); - } - - /* Parse and operate */ - pixa = NULL; - if (pdfout) { - pixa = pixaCreate(0); - pixaAddPix(pixa, pixs, L_CLONE); - } - border = 0; - pix1 = pixCopy(NULL, pixs); - pix2 = NULL; - x = 0; - for (i = 0; i < nops; i++) { - rawop = sarrayGetString(sa, i, L_NOCOPY); - op = stringRemoveChars(rawop, " \n\t"); - switch (op[0]) - { - case 'd': - case 'D': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixDilateBrickDwa(NULL, pix1, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'e': - case 'E': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixErodeBrickDwa(NULL, pix1, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'o': - case 'O': - sscanf(&op[1], "%d.%d", &w, &h); - pixOpenBrickDwa(pix1, pix1, w, h); - break; - case 'c': - case 'C': - sscanf(&op[1], "%d.%d", &w, &h); - pixCloseBrickDwa(pix1, pix1, w, h); - break; - case 'r': - case 'R': - nred = strlen(op) - 1; - for (j = 0; j < nred; j++) - level[j] = op[j + 1] - '0'; - for (j = nred; j < 4; j++) - level[j] = 0; - pix2 = pixReduceRankBinaryCascade(pix1, level[0], level[1], - level[2], level[3]); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'x': - case 'X': - sscanf(&op[1], "%d", &fact); - pix2 = pixExpandReplicate(pix1, fact); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'b': - case 'B': - sscanf(&op[1], "%d", &border); - pix2 = pixAddBorder(pix1, border, 0); - pixSwapAndDestroy(&pix1, &pix2); - break; - default: - /* All invalid ops are caught in the first pass */ - break; - } - LEPT_FREE(op); - - /* Debug output */ - if (dispsep > 0) { - pixDisplay(pix1, x, 0); - x += dispsep; - } - if (pdfout) - pixaAddPix(pixa, pix1, L_COPY); - } - if (border > 0) { - pix2 = pixRemoveBorder(pix1, border); - pixSwapAndDestroy(&pix1, &pix2); - } - - if (pdfout) { - snprintf(fname, sizeof(fname), "/tmp/lept/seq_output_%d.pdf", - L_ABS(dispsep)); - pixaConvertToPdf(pixa, 0, 1.0, L_FLATE_ENCODE, 0, fname, fname); - pixaDestroy(&pixa); - } - - sarrayDestroy(&sa); - return pix1; -} - - -/*-------------------------------------------------------------------------* - * Run a sequence of binary composite dwa morphological operations * - *-------------------------------------------------------------------------*/ -/*! - * \brief pixMorphCompSequenceDwa() - * - * \param[in] pixs - * \param[in] sequence string specifying sequence - * \param[in] dispsep controls debug display of results in the sequence: - * 0: no output - * > 0: gives horizontal separation in pixels between - * successive displays - * < 0: pdf output; abs(dispsep) is used for naming - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This does dwa morphology on binary images, using brick Sels.
- *      (2) This runs a pipeline of operations; no branching is allowed.
- *      (3) It implements all brick Sels that have dimensions up to 63
- *          on each side, using a composite (linear + comb) when useful.
- *      (4) A new image is always produced; the input image is not changed.
- *      (5) This contains an interpreter, allowing sequences to be
- *          generated and run.
- *      (6) See pixMorphSequence() for further information about usage.
- * 
- */ -PIX * -pixMorphCompSequenceDwa(PIX *pixs, - const char *sequence, - l_int32 dispsep) -{ -char *rawop, *op; -char fname[256]; -l_int32 nops, i, j, nred, fact, w, h, x, border, pdfout; -l_int32 level[4]; -PIX *pix1, *pix2; -PIXA *pixa; -SARRAY *sa; - - PROCNAME("pixMorphCompSequenceDwa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!sequence) - return (PIX *)ERROR_PTR("sequence not defined", procName, NULL); - - /* Split sequence into individual operations */ - sa = sarrayCreate(0); - sarraySplitString(sa, sequence, "+"); - nops = sarrayGetCount(sa); - pdfout = (dispsep < 0) ? 1 : 0; - - if (!morphSequenceVerify(sa)) { - sarrayDestroy(&sa); - return (PIX *)ERROR_PTR("sequence not valid", procName, NULL); - } - - /* Parse and operate */ - pixa = NULL; - if (pdfout) { - pixa = pixaCreate(0); - pixaAddPix(pixa, pixs, L_CLONE); - } - border = 0; - pix1 = pixCopy(NULL, pixs); - pix2 = NULL; - x = 0; - for (i = 0; i < nops; i++) { - rawop = sarrayGetString(sa, i, L_NOCOPY); - op = stringRemoveChars(rawop, " \n\t"); - switch (op[0]) - { - case 'd': - case 'D': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixDilateCompBrickDwa(NULL, pix1, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'e': - case 'E': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixErodeCompBrickDwa(NULL, pix1, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'o': - case 'O': - sscanf(&op[1], "%d.%d", &w, &h); - pixOpenCompBrickDwa(pix1, pix1, w, h); - break; - case 'c': - case 'C': - sscanf(&op[1], "%d.%d", &w, &h); - pixCloseCompBrickDwa(pix1, pix1, w, h); - break; - case 'r': - case 'R': - nred = strlen(op) - 1; - for (j = 0; j < nred; j++) - level[j] = op[j + 1] - '0'; - for (j = nred; j < 4; j++) - level[j] = 0; - pix2 = pixReduceRankBinaryCascade(pix1, level[0], level[1], - level[2], level[3]); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'x': - case 'X': - sscanf(&op[1], "%d", &fact); - pix2 = pixExpandReplicate(pix1, fact); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'b': - case 'B': - sscanf(&op[1], "%d", &border); - pix2 = pixAddBorder(pix1, border, 0); - pixSwapAndDestroy(&pix1, &pix2); - break; - default: - /* All invalid ops are caught in the first pass */ - break; - } - LEPT_FREE(op); - - /* Debug output */ - if (dispsep > 0) { - pixDisplay(pix1, x, 0); - x += dispsep; - } - if (pdfout) - pixaAddPix(pixa, pix1, L_COPY); - } - if (border > 0) { - pix2 = pixRemoveBorder(pix1, border); - pixSwapAndDestroy(&pix1, &pix2); - } - - if (pdfout) { - snprintf(fname, sizeof(fname), "/tmp/lept/seq_output_%d.pdf", - L_ABS(dispsep)); - pixaConvertToPdf(pixa, 0, 1.0, L_FLATE_ENCODE, 0, fname, fname); - pixaDestroy(&pixa); - } - - sarrayDestroy(&sa); - return pix1; -} - - -/*-------------------------------------------------------------------------* - * Parser verifier for binary morphological operations * - *-------------------------------------------------------------------------*/ -/*! - * \brief morphSequenceVerify() - * - * \param[in] sa string array of operation sequence - * \return TRUE if valid; FALSE otherwise or on error - * - *
- * Notes:
- *      (1) This does verification of valid binary morphological
- *          operation sequences.
- *      (2) See pixMorphSequence() for notes on valid operations
- *          in the sequence.
- * 
- */ -l_int32 -morphSequenceVerify(SARRAY *sa) -{ -char *rawop, *op; -l_int32 nops, i, j, nred, fact, valid, w, h, netred, border; -l_int32 level[4]; -l_int32 intlogbase2[5] = {1, 2, 3, 0, 4}; /* of arg/4 */ - - PROCNAME("morphSequenceVerify"); - - if (!sa) - return ERROR_INT("sa not defined", procName, FALSE); - - nops = sarrayGetCount(sa); - valid = TRUE; - netred = 0; - border = 0; - for (i = 0; i < nops; i++) { - rawop = sarrayGetString(sa, i, L_NOCOPY); - op = stringRemoveChars(rawop, " \n\t"); - switch (op[0]) - { - case 'd': - case 'D': - case 'e': - case 'E': - case 'o': - case 'O': - case 'c': - case 'C': - if (sscanf(&op[1], "%d.%d", &w, &h) != 2) { - lept_stderr("*** op: %s invalid\n", op); - valid = FALSE; - break; - } - if (w <= 0 || h <= 0) { - lept_stderr("*** op: %s; w = %d, h = %d; must both be > 0\n", - op, w, h); - valid = FALSE; - break; - } -/* lept_stderr("op = %s; w = %d, h = %d\n", op, w, h); */ - break; - case 'r': - case 'R': - nred = strlen(op) - 1; - netred += nred; - if (nred < 1 || nred > 4) { - lept_stderr( - "*** op = %s; num reduct = %d; must be in {1,2,3,4}\n", - op, nred); - valid = FALSE; - break; - } - for (j = 0; j < nred; j++) { - level[j] = op[j + 1] - '0'; - if (level[j] < 1 || level[j] > 4) { - lept_stderr("*** op = %s; level[%d] = %d is invalid\n", - op, j, level[j]); - valid = FALSE; - break; - } - } - if (!valid) - break; -/* lept_stderr("op = %s", op); */ - for (j = 0; j < nred; j++) { - level[j] = op[j + 1] - '0'; -/* lept_stderr(", level[%d] = %d", j, level[j]); */ - } -/* lept_stderr("\n"); */ - break; - case 'x': - case 'X': - if (sscanf(&op[1], "%d", &fact) != 1) { - lept_stderr("*** op: %s; fact invalid\n", op); - valid = FALSE; - break; - } - if (fact != 2 && fact != 4 && fact != 8 && fact != 16) { - lept_stderr("*** op = %s; invalid fact = %d\n", op, fact); - valid = FALSE; - break; - } - netred -= intlogbase2[fact / 4]; -/* lept_stderr("op = %s; fact = %d\n", op, fact); */ - break; - case 'b': - case 'B': - if (sscanf(&op[1], "%d", &fact) != 1) { - lept_stderr("*** op: %s; fact invalid\n", op); - valid = FALSE; - break; - } - if (i > 0) { - lept_stderr("*** op = %s; must be first op\n", op); - valid = FALSE; - break; - } - if (fact < 1) { - lept_stderr("*** op = %s; invalid fact = %d\n", op, fact); - valid = FALSE; - break; - } - border = fact; -/* lept_stderr("op = %s; fact = %d\n", op, fact); */ - break; - default: - lept_stderr("*** nonexistent op = %s\n", op); - valid = FALSE; - } - LEPT_FREE(op); - } - - if (border != 0 && netred != 0) { - lept_stderr("*** op = %s; border added but net reduction not 0\n", op); - valid = FALSE; - } - return valid; -} - - -/*-----------------------------------------------------------------* - * Run a sequence of grayscale morphological operations * - *-----------------------------------------------------------------*/ -/*! - * \brief pixGrayMorphSequence() - * - * \param[in] pixs - * \param[in] sequence string specifying sequence - * \param[in] dispsep controls debug display of results in the sequence: - * 0: no output - * > 0: gives horizontal separation in pixels between - * successive displays - * < 0: pdf output; abs(dispsep) is used for naming - * \param[in] dispy if dispsep > 0, this gives the y-value of the - * UL corner for display; otherwise it is ignored - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This works on 8 bpp grayscale images.
- *      (2) This runs a pipeline of operations; no branching is allowed.
- *      (3) This only uses brick SELs.
- *      (4) A new image is always produced; the input image is not changed.
- *      (5) This contains an interpreter, allowing sequences to be
- *          generated and run.
- *      (6) The format of the sequence string is defined below.
- *      (7) In addition to morphological operations, the composite
- *          morph/subtract tophat can be performed.
- *      (8) Sel sizes (width, height) must each be odd numbers.
- *      (9) Intermediate results can optionally be displayed
- *      (10) The sequence string is formatted as follows:
- *            ~ An arbitrary number of operations,  each separated
- *              by a '+' character.  White space is ignored.
- *            ~ Each operation begins with a case-independent character
- *              specifying the operation:
- *                 d or D  (dilation)
- *                 e or E  (erosion)
- *                 o or O  (opening)
- *                 c or C  (closing)
- *                 t or T  (tophat)
- *            ~ The args to the morphological operations are bricks of hits,
- *              and are formatted as a.b, where a and b are horizontal and
- *              vertical dimensions, rsp. (each must be an odd number)
- *            ~ The args to the tophat are w or W (for white tophat)
- *              or b or B (for black tophat), followed by a.b as for
- *              the dilation, erosion, opening and closing.
- *           Example valid sequences are:
- *             "c5.3 + o7.5"
- *             "c9.9 + tw9.9"
- * 
- */ -PIX * -pixGrayMorphSequence(PIX *pixs, - const char *sequence, - l_int32 dispsep, - l_int32 dispy) -{ -char *rawop, *op; -char fname[256]; -l_int32 nops, i, valid, w, h, x, pdfout; -PIX *pix1, *pix2; -PIXA *pixa; -SARRAY *sa; - - PROCNAME("pixGrayMorphSequence"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!sequence) - return (PIX *)ERROR_PTR("sequence not defined", procName, NULL); - - /* Split sequence into individual operations */ - sa = sarrayCreate(0); - sarraySplitString(sa, sequence, "+"); - nops = sarrayGetCount(sa); - pdfout = (dispsep < 0) ? 1 : 0; - - /* Verify that the operation sequence is valid */ - valid = TRUE; - for (i = 0; i < nops; i++) { - rawop = sarrayGetString(sa, i, L_NOCOPY); - op = stringRemoveChars(rawop, " \n\t"); - switch (op[0]) - { - case 'd': - case 'D': - case 'e': - case 'E': - case 'o': - case 'O': - case 'c': - case 'C': - if (sscanf(&op[1], "%d.%d", &w, &h) != 2) { - lept_stderr("*** op: %s invalid\n", op); - valid = FALSE; - break; - } - if (w < 1 || (w & 1) == 0 || h < 1 || (h & 1) == 0 ) { - lept_stderr("*** op: %s; w = %d, h = %d; must both be odd\n", - op, w, h); - valid = FALSE; - break; - } -/* lept_stderr("op = %s; w = %d, h = %d\n", op, w, h); */ - break; - case 't': - case 'T': - if (op[1] != 'w' && op[1] != 'W' && - op[1] != 'b' && op[1] != 'B') { - lept_stderr( - "*** op = %s; arg %c must be 'w' or 'b'\n", op, op[1]); - valid = FALSE; - break; - } - sscanf(&op[2], "%d.%d", &w, &h); - if (w < 1 || (w & 1) == 0 || h < 1 || (h & 1) == 0 ) { - lept_stderr("*** op: %s; w = %d, h = %d; must both be odd\n", - op, w, h); - valid = FALSE; - break; - } -/* lept_stderr("op = %s", op); */ - break; - default: - lept_stderr("*** nonexistent op = %s\n", op); - valid = FALSE; - } - LEPT_FREE(op); - } - if (!valid) { - sarrayDestroy(&sa); - return (PIX *)ERROR_PTR("sequence invalid", procName, NULL); - } - - /* Parse and operate */ - pixa = NULL; - if (pdfout) { - pixa = pixaCreate(0); - pixaAddPix(pixa, pixs, L_CLONE); - } - pix1 = pixCopy(NULL, pixs); - pix2 = NULL; - x = 0; - for (i = 0; i < nops; i++) { - rawop = sarrayGetString(sa, i, L_NOCOPY); - op = stringRemoveChars(rawop, " \n\t"); - switch (op[0]) - { - case 'd': - case 'D': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixDilateGray(pix1, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'e': - case 'E': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixErodeGray(pix1, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'o': - case 'O': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixOpenGray(pix1, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'c': - case 'C': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixCloseGray(pix1, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 't': - case 'T': - sscanf(&op[2], "%d.%d", &w, &h); - if (op[1] == 'w' || op[1] == 'W') - pix2 = pixTophat(pix1, w, h, L_TOPHAT_WHITE); - else /* 'b' or 'B' */ - pix2 = pixTophat(pix1, w, h, L_TOPHAT_BLACK); - pixSwapAndDestroy(&pix1, &pix2); - break; - default: - /* All invalid ops are caught in the first pass */ - break; - } - LEPT_FREE(op); - - /* Debug output */ - if (dispsep > 0) { - pixDisplay(pix1, x, dispy); - x += dispsep; - } - if (pdfout) - pixaAddPix(pixa, pix1, L_COPY); - } - - if (pdfout) { - snprintf(fname, sizeof(fname), "/tmp/lept/seq_output_%d.pdf", - L_ABS(dispsep)); - pixaConvertToPdf(pixa, 0, 1.0, L_FLATE_ENCODE, 0, fname, fname); - pixaDestroy(&pixa); - } - - sarrayDestroy(&sa); - return pix1; -} - - -/*-----------------------------------------------------------------* - * Run a sequence of color morphological operations * - *-----------------------------------------------------------------*/ -/*! - * \brief pixColorMorphSequence() - * - * \param[in] pixs - * \param[in] sequence string specifying sequence - * \param[in] dispsep controls debug display of results in the sequence: - * 0: no output - * > 0: gives horizontal separation in pixels between - * successive displays - * < 0: pdf output; abs(dispsep) is used for naming - * \param[in] dispy if dispsep > 0, this gives the y-value of the - * UL corner for display; otherwise it is ignored - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This works on 32 bpp rgb images.
- *      (2) Each component is processed separately.
- *      (3) This runs a pipeline of operations; no branching is allowed.
- *      (4) This only uses brick SELs.
- *      (5) A new image is always produced; the input image is not changed.
- *      (6) This contains an interpreter, allowing sequences to be
- *          generated and run.
- *      (7) Sel sizes (width, height) must each be odd numbers.
- *      (8) The format of the sequence string is defined below.
- *      (9) Intermediate results can optionally be displayed.
- *      (10) The sequence string is formatted as follows:
- *            ~ An arbitrary number of operations,  each separated
- *              by a '+' character.  White space is ignored.
- *            ~ Each operation begins with a case-independent character
- *              specifying the operation:
- *                 d or D  (dilation)
- *                 e or E  (erosion)
- *                 o or O  (opening)
- *                 c or C  (closing)
- *            ~ The args to the morphological operations are bricks of hits,
- *              and are formatted as a.b, where a and b are horizontal and
- *              vertical dimensions, rsp. (each must be an odd number)
- *           Example valid sequences are:
- *             "c5.3 + o7.5"
- *             "D9.1"
- * 
- */ -PIX * -pixColorMorphSequence(PIX *pixs, - const char *sequence, - l_int32 dispsep, - l_int32 dispy) -{ -char *rawop, *op; -char fname[256]; -l_int32 nops, i, valid, w, h, x, pdfout; -PIX *pix1, *pix2; -PIXA *pixa; -SARRAY *sa; - - PROCNAME("pixColorMorphSequence"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!sequence) - return (PIX *)ERROR_PTR("sequence not defined", procName, NULL); - - /* Split sequence into individual operations */ - sa = sarrayCreate(0); - sarraySplitString(sa, sequence, "+"); - nops = sarrayGetCount(sa); - pdfout = (dispsep < 0) ? 1 : 0; - - /* Verify that the operation sequence is valid */ - valid = TRUE; - for (i = 0; i < nops; i++) { - rawop = sarrayGetString(sa, i, L_NOCOPY); - op = stringRemoveChars(rawop, " \n\t"); - switch (op[0]) - { - case 'd': - case 'D': - case 'e': - case 'E': - case 'o': - case 'O': - case 'c': - case 'C': - if (sscanf(&op[1], "%d.%d", &w, &h) != 2) { - lept_stderr("*** op: %s invalid\n", op); - valid = FALSE; - break; - } - if (w < 1 || (w & 1) == 0 || h < 1 || (h & 1) == 0 ) { - lept_stderr("*** op: %s; w = %d, h = %d; must both be odd\n", - op, w, h); - valid = FALSE; - break; - } -/* lept_stderr("op = %s; w = %d, h = %d\n", op, w, h); */ - break; - default: - lept_stderr("*** nonexistent op = %s\n", op); - valid = FALSE; - } - LEPT_FREE(op); - } - if (!valid) { - sarrayDestroy(&sa); - return (PIX *)ERROR_PTR("sequence invalid", procName, NULL); - } - - /* Parse and operate */ - pixa = NULL; - if (pdfout) { - pixa = pixaCreate(0); - pixaAddPix(pixa, pixs, L_CLONE); - } - pix1 = pixCopy(NULL, pixs); - pix2 = NULL; - x = 0; - for (i = 0; i < nops; i++) { - rawop = sarrayGetString(sa, i, L_NOCOPY); - op = stringRemoveChars(rawop, " \n\t"); - switch (op[0]) - { - case 'd': - case 'D': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixColorMorph(pix1, L_MORPH_DILATE, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'e': - case 'E': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixColorMorph(pix1, L_MORPH_ERODE, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'o': - case 'O': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixColorMorph(pix1, L_MORPH_OPEN, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - case 'c': - case 'C': - sscanf(&op[1], "%d.%d", &w, &h); - pix2 = pixColorMorph(pix1, L_MORPH_CLOSE, w, h); - pixSwapAndDestroy(&pix1, &pix2); - break; - default: - /* All invalid ops are caught in the first pass */ - break; - } - LEPT_FREE(op); - - /* Debug output */ - if (dispsep > 0) { - pixDisplay(pix1, x, dispy); - x += dispsep; - } - if (pdfout) - pixaAddPix(pixa, pix1, L_COPY); - } - - if (pdfout) { - snprintf(fname, sizeof(fname), "/tmp/lept/seq_output_%d.pdf", - L_ABS(dispsep)); - pixaConvertToPdf(pixa, 0, 1.0, L_FLATE_ENCODE, 0, fname, fname); - pixaDestroy(&pixa); - } - - sarrayDestroy(&sa); - return pix1; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/numabasic.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/numabasic.c deleted file mode 100644 index 2e2bc11a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/numabasic.c +++ /dev/null @@ -1,2061 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file numabasic.c - *
- *
- *      Numa creation, destruction, copy, clone, etc.
- *          NUMA        *numaCreate()
- *          NUMA        *numaCreateFromIArray()
- *          NUMA        *numaCreateFromFArray()
- *          NUMA        *numaCreateFromString()
- *          void        *numaDestroy()
- *          NUMA        *numaCopy()
- *          NUMA        *numaClone()
- *          l_int32      numaEmpty()
- *
- *      Add/remove number (float or integer)
- *          l_int32      numaAddNumber()
- *          static l_int32  numaExtendArray()
- *          l_int32      numaInsertNumber()
- *          l_int32      numaRemoveNumber()
- *          l_int32      numaReplaceNumber()
- *
- *      Numa accessors
- *          l_int32      numaGetCount()
- *          l_int32      numaSetCount()
- *          l_int32      numaGetIValue()
- *          l_int32      numaGetFValue()
- *          l_int32      numaSetValue()
- *          l_int32      numaShiftValue()
- *          l_int32     *numaGetIArray()
- *          l_float32   *numaGetFArray()
- *          l_int32      numaGetRefcount()
- *          l_int32      numaChangeRefcount()
- *          l_int32      numaGetParameters()
- *          l_int32      numaSetParameters()
- *          l_int32      numaCopyParameters()
- *
- *      Convert to string array
- *          SARRAY      *numaConvertToSarray()
- *
- *      Serialize numa for I/O
- *          NUMA        *numaRead()
- *          NUMA        *numaReadStream()
- *          NUMA        *numaReadMem()
- *          l_int32      numaWriteDebug()
- *          l_int32      numaWrite()
- *          l_int32      numaWriteStream()
- *          l_int32      numaWriteStderr()
- *          l_int32      numaWriteMem()
- *
- *      Numaa creation, destruction, truncation
- *          NUMAA       *numaaCreate()
- *          NUMAA       *numaaCreateFull()
- *          NUMAA       *numaaTruncate()
- *          void        *numaaDestroy()
- *
- *      Add Numa to Numaa
- *          l_int32      numaaAddNuma()
- *          static l_int32   numaaExtendArray()
- *
- *      Numaa accessors
- *          l_int32      numaaGetCount()
- *          l_int32      numaaGetNumaCount()
- *          l_int32      numaaGetNumberCount()
- *          NUMA       **numaaGetPtrArray()
- *          NUMA        *numaaGetNuma()
- *          NUMA        *numaaReplaceNuma()
- *          l_int32      numaaGetValue()
- *          l_int32      numaaAddNumber()
- *
- *      Serialize numaa for I/O
- *          NUMAA       *numaaRead()
- *          NUMAA       *numaaReadStream()
- *          NUMAA       *numaaReadMem()
- *          l_int32      numaaWrite()
- *          l_int32      numaaWriteStream()
- *          l_int32      numaaWriteMem()
- *
- *    (1) The Numa is a struct holding an array of floats.  It can also
- *        be used to store l_int32 values, with some loss of precision
- *        for floats larger than about 10 million.  Use the L_Dna instead
- *        if integers larger than a few million need to be stored.
- *
- *    (2) Always use the accessors in this file, never the fields directly.
- *
- *    (3) Storing and retrieving numbers:
- *
- *       * to append a new number to the array, use numaAddNumber().  If
- *         the number is an int, it will will automatically be converted
- *         to l_float32 and stored.
- *
- *       * to reset a value stored in the array, use numaSetValue().
- *
- *       * to increment or decrement a value stored in the array,
- *         use numaShiftValue().
- *
- *       * to obtain a value from the array, use either numaGetIValue()
- *         or numaGetFValue(), depending on whether you are retrieving
- *         an integer or a float.  This avoids doing an explicit cast,
- *         such as
- *           (a) return a l_float32 and cast it to an l_int32
- *           (b) cast the return directly to (l_float32 *) to
- *               satisfy the function prototype, as in
- *                 numaGetFValue(na, index, (l_float32 *)&ival);   [ugly!]
- *
- *    (4) int <--> float conversions:
- *
- *        Tradition dictates that type conversions go automatically from
- *        l_int32 --> l_float32, even though it is possible to lose
- *        precision for large integers, whereas you must cast (l_int32)
- *        to go from l_float32 --> l_int32 because you're truncating
- *        to the integer value.
- *
- *    (5) As with other arrays in leptonica, the numa has both an allocated
- *        size and a count of the stored numbers.  When you add a number, it
- *        goes on the end of the array, and causes a realloc if the array
- *        is already filled.  However, in situations where you want to
- *        add numbers randomly into an array, such as when you build a
- *        histogram, you must set the count of stored numbers in advance.
- *        This is done with numaSetCount().  If you set a count larger
- *        than the allocated array, it does a realloc to the size requested.
- *
- *    (6) In situations where the data in a numa correspond to a function
- *        y(x), the values can be either at equal spacings in x or at
- *        arbitrary spacings.  For the former, we can represent all x values
- *        by two parameters: startx (corresponding to y[0]) and delx
- *        for the change in x for adjacent values y[i] and y[i+1].
- *        startx and delx are initialized to 0.0 and 1.0, rsp.
- *        For arbitrary spacings, we use a second numa, and the two
- *        numas are typically denoted nay and nax.
- *
- *    (7) The numa is also the basic struct used for histograms.  Every numa
- *        has startx and delx fields, initialized to 0.0 and 1.0, that can
- *        be used to represent the "x" value for the location of the
- *        first bin and the bin width, respectively.  Accessors are the
- *        numa*Parameters() functions.  All functions that make numa
- *        histograms must set these fields properly, and many functions
- *        that use numa histograms rely on the correctness of these values.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - - /* Bounds on initial array size */ -static const l_uint32 MaxArraySize = 100000000; /* for numa */ -static const l_uint32 MaxPtrArraySize = 10000; /* for numaa */ -static const l_int32 InitialArraySize = 50; /*!< n'importe quoi */ - - /* Static functions */ -static l_int32 numaExtendArray(NUMA *na); -static l_int32 numaaExtendArray(NUMAA *naa); - -/*--------------------------------------------------------------------------* - * Numa creation, destruction, copy, clone, etc. * - *--------------------------------------------------------------------------*/ -/*! - * \brief numaCreate() - * - * \param[in] n size of number array to be alloc'd 0 for default - * \return na, or NULL on error - */ -NUMA * -numaCreate(l_int32 n) -{ -NUMA *na; - - PROCNAME("numaCreate"); - - if (n <= 0 || n > MaxArraySize) - n = InitialArraySize; - - na = (NUMA *)LEPT_CALLOC(1, sizeof(NUMA)); - if ((na->array = (l_float32 *)LEPT_CALLOC(n, sizeof(l_float32))) == NULL) { - numaDestroy(&na); - return (NUMA *)ERROR_PTR("number array not made", procName, NULL); - } - - na->nalloc = n; - na->n = 0; - na->refcount = 1; - na->startx = 0.0; - na->delx = 1.0; - return na; -} - - -/*! - * \brief numaCreateFromIArray() - * - * \param[in] iarray integer array - * \param[in] size of the array - * \return na, or NULL on error - * - *
- * Notes:
- *      (1) We can't insert this int array into the numa, because a numa
- *          takes a float array.  So this just copies the data from the
- *          input array into the numa.  The input array continues to be
- *          owned by the caller.
- * 
- */ -NUMA * -numaCreateFromIArray(l_int32 *iarray, - l_int32 size) -{ -l_int32 i; -NUMA *na; - - PROCNAME("numaCreateFromIArray"); - - if (!iarray) - return (NUMA *)ERROR_PTR("iarray not defined", procName, NULL); - if (size <= 0) - return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL); - - na = numaCreate(size); - for (i = 0; i < size; i++) - numaAddNumber(na, iarray[i]); - - return na; -} - - -/*! - * \brief numaCreateFromFArray() - * - * \param[in] farray float array - * \param[in] size of the array - * \param[in] copyflag L_INSERT or L_COPY - * \return na, or NULL on error - * - *
- * Notes:
- *      (1) With L_INSERT, ownership of the input array is transferred
- *          to the returned numa, and all %size elements are considered
- *          to be valid.
- * 
- */ -NUMA * -numaCreateFromFArray(l_float32 *farray, - l_int32 size, - l_int32 copyflag) -{ -l_int32 i; -NUMA *na; - - PROCNAME("numaCreateFromFArray"); - - if (!farray) - return (NUMA *)ERROR_PTR("farray not defined", procName, NULL); - if (size <= 0) - return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL); - if (copyflag != L_INSERT && copyflag != L_COPY) - return (NUMA *)ERROR_PTR("invalid copyflag", procName, NULL); - - na = numaCreate(size); - if (copyflag == L_INSERT) { - if (na->array) LEPT_FREE(na->array); - na->array = farray; - na->n = size; - } else { /* just copy the contents */ - for (i = 0; i < size; i++) - numaAddNumber(na, farray[i]); - } - - return na; -} - - -/*! - * \brief numaCreateFromString() - * - * \param[in] str string of comma-separated numbers - * \return na, or NULL on error - * - *
- * Notes:
- *      (1) The numbers can be ints or floats; they will be interpreted
- *          and stored as floats.  To use them as integers (e.g., for
- *          indexing into arrays), use numaGetIValue(...).
- * 
- */ -NUMA * -numaCreateFromString(const char *str) -{ -char *substr; -l_int32 i, n, nerrors; -l_float32 val; -NUMA *na; -SARRAY *sa; - - PROCNAME("numaCreateFromString"); - - if (!str || (strlen(str) == 0)) - return (NUMA *)ERROR_PTR("str not defined or empty", procName, NULL); - - sa = sarrayCreate(0); - sarraySplitString(sa, str, ","); - n = sarrayGetCount(sa); - na = numaCreate(n); - nerrors = 0; - for (i = 0; i < n; i++) { - substr = sarrayGetString(sa, i, L_NOCOPY); - if (sscanf(substr, "%f", &val) != 1) { - L_ERROR("substr %d not float\n", procName, i); - nerrors++; - } else { - numaAddNumber(na, val); - } - } - - sarrayDestroy(&sa); - if (nerrors > 0) { - numaDestroy(&na); - return (NUMA *)ERROR_PTR("non-floats in string", procName, NULL); - } - - return na; -} - - -/*! - * \brief numaDestroy() - * - * \param[in,out] pna numa to be destroyed and nulled if it exists - * \return void - * - *
- * Notes:
- *      (1) Decrements the ref count and, if 0, destroys the numa.
- *      (2) Always nulls the input ptr.
- * 
- */ -void -numaDestroy(NUMA **pna) -{ -NUMA *na; - - PROCNAME("numaDestroy"); - - if (pna == NULL) { - L_WARNING("ptr address is NULL\n", procName); - return; - } - - if ((na = *pna) == NULL) - return; - - /* Decrement the ref count. If it is 0, destroy the numa. */ - numaChangeRefcount(na, -1); - if (numaGetRefcount(na) <= 0) { - if (na->array) - LEPT_FREE(na->array); - LEPT_FREE(na); - } - - *pna = NULL; - return; -} - - -/*! - * \brief numaCopy() - * - * \param[in] na - * \return copy of numa, or NULL on error - */ -NUMA * -numaCopy(NUMA *na) -{ -l_int32 i; -NUMA *cna; - - PROCNAME("numaCopy"); - - if (!na) - return (NUMA *)ERROR_PTR("na not defined", procName, NULL); - - if ((cna = numaCreate(na->nalloc)) == NULL) - return (NUMA *)ERROR_PTR("cna not made", procName, NULL); - cna->startx = na->startx; - cna->delx = na->delx; - - for (i = 0; i < na->n; i++) - numaAddNumber(cna, na->array[i]); - - return cna; -} - - -/*! - * \brief numaClone() - * - * \param[in] na - * \return ptr to same numa, or NULL on error - */ -NUMA * -numaClone(NUMA *na) -{ - PROCNAME("numaClone"); - - if (!na) - return (NUMA *)ERROR_PTR("na not defined", procName, NULL); - - numaChangeRefcount(na, 1); - return na; -} - - -/*! - * \brief numaEmpty() - * - * \param[in] na - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This does not change the allocation of the array.
- *          It just clears the number of stored numbers, so that
- *          the array appears to be empty.
- * 
- */ -l_ok -numaEmpty(NUMA *na) -{ - PROCNAME("numaEmpty"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - - na->n = 0; - return 0; -} - - - -/*--------------------------------------------------------------------------* - * Number array: add number and extend array * - *--------------------------------------------------------------------------*/ -/*! - * \brief numaAddNumber() - * - * \param[in] na - * \param[in] val float or int to be added; stored as a float - * \return 0 if OK, 1 on error - */ -l_ok -numaAddNumber(NUMA *na, - l_float32 val) -{ -l_int32 n; - - PROCNAME("numaAddNumber"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - - n = numaGetCount(na); - if (n >= na->nalloc) - numaExtendArray(na); - na->array[n] = val; - na->n++; - return 0; -} - - -/*! - * \brief numaExtendArray() - * - * \param[in] na - * \return 0 if OK, 1 on error - */ -static l_int32 -numaExtendArray(NUMA *na) -{ - PROCNAME("numaExtendArray"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - - if ((na->array = (l_float32 *)reallocNew((void **)&na->array, - sizeof(l_float32) * na->nalloc, - 2 * sizeof(l_float32) * na->nalloc)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - - na->nalloc *= 2; - return 0; -} - - -/*! - * \brief numaInsertNumber() - * - * \param[in] na - * \param[in] index location in na to insert new value - * \param[in] val float32 or integer to be added - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This shifts na[i] --> na[i + 1] for all i >= index,
- *          and then inserts val as na[index].
- *      (2) It should not be used repeatedly on large arrays,
- *          because the function is O(n).
- *
- * 
- */ -l_ok -numaInsertNumber(NUMA *na, - l_int32 index, - l_float32 val) -{ -l_int32 i, n; - - PROCNAME("numaInsertNumber"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - n = numaGetCount(na); - if (index < 0 || index > n) - return ERROR_INT("index not in {0...n}", procName, 1); - - if (n >= na->nalloc) - numaExtendArray(na); - for (i = n; i > index; i--) - na->array[i] = na->array[i - 1]; - na->array[index] = val; - na->n++; - return 0; -} - - -/*! - * \brief numaRemoveNumber() - * - * \param[in] na - * \param[in] index element to be removed - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This shifts na[i] --> na[i - 1] for all i > index.
- *      (2) It should not be used repeatedly on large arrays,
- *          because the function is O(n).
- * 
- */ -l_ok -numaRemoveNumber(NUMA *na, - l_int32 index) -{ -l_int32 i, n; - - PROCNAME("numaRemoveNumber"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - n = numaGetCount(na); - if (index < 0 || index >= n) - return ERROR_INT("index not in {0...n - 1}", procName, 1); - - for (i = index + 1; i < n; i++) - na->array[i - 1] = na->array[i]; - na->n--; - return 0; -} - - -/*! - * \brief numaReplaceNumber() - * - * \param[in] na - * \param[in] index element to be replaced - * \param[in] val new value to replace old one - * \return 0 if OK, 1 on error - */ -l_ok -numaReplaceNumber(NUMA *na, - l_int32 index, - l_float32 val) -{ -l_int32 n; - - PROCNAME("numaReplaceNumber"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - n = numaGetCount(na); - if (index < 0 || index >= n) - return ERROR_INT("index not in {0...n - 1}", procName, 1); - - na->array[index] = val; - return 0; -} - - -/*----------------------------------------------------------------------* - * Numa accessors * - *----------------------------------------------------------------------*/ -/*! - * \brief numaGetCount() - * - * \param[in] na - * \return count, or 0 if no numbers or on error - */ -l_int32 -numaGetCount(NUMA *na) -{ - PROCNAME("numaGetCount"); - - if (!na) - return ERROR_INT("na not defined", procName, 0); - return na->n; -} - - -/*! - * \brief numaSetCount() - * - * \param[in] na - * \param[in] newcount - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If newcount <= na->nalloc, this resets na->n.
- *          Using newcount = 0 is equivalent to numaEmpty().
- *      (2) If newcount > na->nalloc, this causes a realloc
- *          to a size na->nalloc = newcount.
- *      (3) All the previously unused values in na are set to 0.0.
- * 
- */ -l_ok -numaSetCount(NUMA *na, - l_int32 newcount) -{ - PROCNAME("numaSetCount"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - if (newcount > na->nalloc) { - if ((na->array = (l_float32 *)reallocNew((void **)&na->array, - sizeof(l_float32) * na->nalloc, - sizeof(l_float32) * newcount)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - na->nalloc = newcount; - } - na->n = newcount; - return 0; -} - - -/*! - * \brief numaGetFValue() - * - * \param[in] na - * \param[in] index into numa - * \param[out] pval float value; set to 0.0 on error - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Caller may need to check the function return value to
- *          decide if a 0.0 in the returned ival is valid.
- * 
- */ -l_ok -numaGetFValue(NUMA *na, - l_int32 index, - l_float32 *pval) -{ - PROCNAME("numaGetFValue"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0.0; - if (!na) - return ERROR_INT("na not defined", procName, 1); - - if (index < 0 || index >= na->n) - return ERROR_INT("index not valid", procName, 1); - - *pval = na->array[index]; - return 0; -} - - -/*! - * \brief numaGetIValue() - * - * \param[in] na - * \param[in] index into numa - * \param[out] pival integer value; set to 0 on error - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Caller may need to check the function return value to
- *          decide if a 0 in the returned ival is valid.
- * 
- */ -l_ok -numaGetIValue(NUMA *na, - l_int32 index, - l_int32 *pival) -{ -l_float32 val; - - PROCNAME("numaGetIValue"); - - if (!pival) - return ERROR_INT("&ival not defined", procName, 1); - *pival = 0; - if (!na) - return ERROR_INT("na not defined", procName, 1); - - if (index < 0 || index >= na->n) - return ERROR_INT("index not valid", procName, 1); - - val = na->array[index]; - *pival = (l_int32)(val + L_SIGN(val) * 0.5); - return 0; -} - - -/*! - * \brief numaSetValue() - * - * \param[in] na - * \param[in] index to element to be set - * \param[in] val to set - * \return 0 if OK; 1 on error - */ -l_ok -numaSetValue(NUMA *na, - l_int32 index, - l_float32 val) -{ - PROCNAME("numaSetValue"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - if (index < 0 || index >= na->n) - return ERROR_INT("index not valid", procName, 1); - - na->array[index] = val; - return 0; -} - - -/*! - * \brief numaShiftValue() - * - * \param[in] na - * \param[in] index to element to change relative to the current value - * \param[in] diff increment if diff > 0 or decrement if diff < 0 - * \return 0 if OK; 1 on error - */ -l_ok -numaShiftValue(NUMA *na, - l_int32 index, - l_float32 diff) -{ - PROCNAME("numaShiftValue"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - if (index < 0 || index >= na->n) - return ERROR_INT("index not valid", procName, 1); - - na->array[index] += diff; - return 0; -} - - -/*! - * \brief numaGetIArray() - * - * \param[in] na - * \return a copy of the bare internal array, integerized - * by rounding, or NULL on error - *
- * Notes:
- *      (1) A copy of the array is always made, because we need to
- *          generate an integer array from the bare float array.
- *          The caller is responsible for freeing the array.
- *      (2) The array size is determined by the number of stored numbers,
- *          not by the size of the allocated array in the Numa.
- *      (3) This function is provided to simplify calculations
- *          using the bare internal array, rather than continually
- *          calling accessors on the numa.  It is typically used
- *          on an array of size 256.
- * 
- */ -l_int32 * -numaGetIArray(NUMA *na) -{ -l_int32 i, n, ival; -l_int32 *array; - - PROCNAME("numaGetIArray"); - - if (!na) - return (l_int32 *)ERROR_PTR("na not defined", procName, NULL); - - n = numaGetCount(na); - if ((array = (l_int32 *)LEPT_CALLOC(n, sizeof(l_int32))) == NULL) - return (l_int32 *)ERROR_PTR("array not made", procName, NULL); - for (i = 0; i < n; i++) { - numaGetIValue(na, i, &ival); - array[i] = ival; - } - - return array; -} - - -/*! - * \brief numaGetFArray() - * - * \param[in] na - * \param[in] copyflag L_NOCOPY or L_COPY - * \return either the bare internal array or a copy of it, - * or NULL on error - * - *
- * Notes:
- *      (1) If copyflag == L_COPY, it makes a copy which the caller
- *          is responsible for freeing.  Otherwise, it operates
- *          directly on the bare array of the numa.
- *      (2) Very important: for L_NOCOPY, any writes to the array
- *          will be in the numa.  Do not write beyond the size of
- *          the count field, because it will not be accessible
- *          from the numa!  If necessary, be sure to set the count
- *          field to a larger number (such as the alloc size)
- *          BEFORE calling this function.  Creating with numaMakeConstant()
- *          is another way to insure full initialization.
- * 
- */ -l_float32 * -numaGetFArray(NUMA *na, - l_int32 copyflag) -{ -l_int32 i, n; -l_float32 *array; - - PROCNAME("numaGetFArray"); - - if (!na) - return (l_float32 *)ERROR_PTR("na not defined", procName, NULL); - - if (copyflag == L_NOCOPY) { - array = na->array; - } else { /* copyflag == L_COPY */ - n = numaGetCount(na); - if ((array = (l_float32 *)LEPT_CALLOC(n, sizeof(l_float32))) == NULL) - return (l_float32 *)ERROR_PTR("array not made", procName, NULL); - for (i = 0; i < n; i++) - array[i] = na->array[i]; - } - - return array; -} - - -/*! - * \brief numaGetRefCount() - * - * \param[in] na - * \return refcount, or UNDEF on error - */ -l_int32 -numaGetRefcount(NUMA *na) -{ - PROCNAME("numaGetRefcount"); - - if (!na) - return ERROR_INT("na not defined", procName, UNDEF); - return na->refcount; -} - - -/*! - * \brief numaChangeRefCount() - * - * \param[in] na - * \param[in] delta change to be applied - * \return 0 if OK, 1 on error - */ -l_ok -numaChangeRefcount(NUMA *na, - l_int32 delta) -{ - PROCNAME("numaChangeRefcount"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - na->refcount += delta; - return 0; -} - - -/*! - * \brief numaGetParameters() - * - * \param[in] na - * \param[out] pstartx [optional] startx - * \param[out] pdelx [optional] delx - * \return 0 if OK, 1 on error - */ -l_ok -numaGetParameters(NUMA *na, - l_float32 *pstartx, - l_float32 *pdelx) -{ - PROCNAME("numaGetParameters"); - - if (!pdelx && !pstartx) - return ERROR_INT("no return val requested", procName, 1); - if (pstartx) *pstartx = 0.0; - if (pdelx) *pdelx = 1.0; - if (!na) - return ERROR_INT("na not defined", procName, 1); - - if (pstartx) *pstartx = na->startx; - if (pdelx) *pdelx = na->delx; - return 0; -} - - -/*! - * \brief numaSetParameters() - * - * \param[in] na - * \param[in] startx x value corresponding to na[0] - * \param[in] delx difference in x values for the situation where the - * elements of na correspond to the evaulation of a - * function at equal intervals of size %delx - * \return 0 if OK, 1 on error - */ -l_ok -numaSetParameters(NUMA *na, - l_float32 startx, - l_float32 delx) -{ - PROCNAME("numaSetParameters"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - - na->startx = startx; - na->delx = delx; - return 0; -} - - -/*! - * \brief numaCopyParameters() - * - * \param[in] nad destination Numa - * \param[in] nas source Numa - * \return 0 if OK, 1 on error - */ -l_ok -numaCopyParameters(NUMA *nad, - NUMA *nas) -{ -l_float32 start, binsize; - - PROCNAME("numaCopyParameters"); - - if (!nas || !nad) - return ERROR_INT("nas and nad not both defined", procName, 1); - - numaGetParameters(nas, &start, &binsize); - numaSetParameters(nad, start, binsize); - return 0; -} - - -/*----------------------------------------------------------------------* - * Convert to string array * - *----------------------------------------------------------------------*/ -/*! - * \brief numaConvertToSarray() - * - * \param[in] na - * \param[in] size1 size of conversion field - * \param[in] size2 for float conversion: size of field to the right - * of the decimal point - * \param[in] addzeros for integer conversion: to add lead zeros - * \param[in] type L_INTEGER_VALUE, L_FLOAT_VALUE - * \return a sarray of the float values converted to strings - * representing either integer or float values; or NULL on error. - * - *
- * Notes:
- *      (1) For integer conversion, size2 is ignored.
- *          For float conversion, addzeroes is ignored.
- * 
- */ -SARRAY * -numaConvertToSarray(NUMA *na, - l_int32 size1, - l_int32 size2, - l_int32 addzeros, - l_int32 type) -{ -char fmt[32], strbuf[64]; -l_int32 i, n, ival; -l_float32 fval; -SARRAY *sa; - - PROCNAME("numaConvertToSarray"); - - if (!na) - return (SARRAY *)ERROR_PTR("na not defined", procName, NULL); - if (type != L_INTEGER_VALUE && type != L_FLOAT_VALUE) - return (SARRAY *)ERROR_PTR("invalid type", procName, NULL); - - if (type == L_INTEGER_VALUE) { - if (addzeros) - snprintf(fmt, sizeof(fmt), "%%0%dd", size1); - else - snprintf(fmt, sizeof(fmt), "%%%dd", size1); - } else { /* L_FLOAT_VALUE */ - snprintf(fmt, sizeof(fmt), "%%%d.%df", size1, size2); - } - - n = numaGetCount(na); - if ((sa = sarrayCreate(n)) == NULL) - return (SARRAY *)ERROR_PTR("sa not made", procName, NULL); - - for (i = 0; i < n; i++) { - if (type == L_INTEGER_VALUE) { - numaGetIValue(na, i, &ival); - snprintf(strbuf, sizeof(strbuf), fmt, ival); - } else { /* L_FLOAT_VALUE */ - numaGetFValue(na, i, &fval); - snprintf(strbuf, sizeof(strbuf), fmt, fval); - } - sarrayAddString(sa, strbuf, L_COPY); - } - - return sa; -} - - -/*----------------------------------------------------------------------* - * Serialize numa for I/O * - *----------------------------------------------------------------------*/ -/*! - * \brief numaRead() - * - * \param[in] filename - * \return na, or NULL on error - */ -NUMA * -numaRead(const char *filename) -{ -FILE *fp; -NUMA *na; - - PROCNAME("numaRead"); - - if (!filename) - return (NUMA *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (NUMA *)ERROR_PTR("stream not opened", procName, NULL); - na = numaReadStream(fp); - fclose(fp); - if (!na) - return (NUMA *)ERROR_PTR("na not read", procName, NULL); - return na; -} - - -/*! - * \brief numaReadStream() - * - * \param[in] fp file stream - * \return numa, or NULL on error - */ -NUMA * -numaReadStream(FILE *fp) -{ -l_int32 i, n, index, ret, version; -l_float32 val, startx, delx; -NUMA *na; - - PROCNAME("numaReadStream"); - - if (!fp) - return (NUMA *)ERROR_PTR("stream not defined", procName, NULL); - - ret = fscanf(fp, "\nNuma Version %d\n", &version); - if (ret != 1) - return (NUMA *)ERROR_PTR("not a numa file", procName, NULL); - if (version != NUMA_VERSION_NUMBER) - return (NUMA *)ERROR_PTR("invalid numa version", procName, NULL); - if (fscanf(fp, "Number of numbers = %d\n", &n) != 1) - return (NUMA *)ERROR_PTR("invalid number of numbers", procName, NULL); - - if (n > MaxArraySize) { - L_ERROR("n = %d > %d\n", procName, n, MaxArraySize); - return NULL; - } - if ((na = numaCreate(n)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - - for (i = 0; i < n; i++) { - if (fscanf(fp, " [%d] = %f\n", &index, &val) != 2) { - numaDestroy(&na); - return (NUMA *)ERROR_PTR("bad input data", procName, NULL); - } - numaAddNumber(na, val); - } - - /* Optional data */ - if (fscanf(fp, "startx = %f, delx = %f\n", &startx, &delx) == 2) - numaSetParameters(na, startx, delx); - - return na; -} - - -/*! - * \brief numaReadMem() - * - * \param[in] data numa serialization; in ascii - * \param[in] size of data; can use strlen to get it - * \return na, or NULL on error - */ -NUMA * -numaReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -NUMA *na; - - PROCNAME("numaReadMem"); - - if (!data) - return (NUMA *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (NUMA *)ERROR_PTR("stream not opened", procName, NULL); - - na = numaReadStream(fp); - fclose(fp); - if (!na) L_ERROR("numa not read\n", procName); - return na; -} - - -/*! - * \brief numaWriteDebug() - * - * \param[in] filename - * \param[in] na - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Debug version, intended for use in the library when writing
- *          to files in a temp directory with names that are compiled in.
- *          This is used instead of numaWrite() for all such library calls.
- *      (2) The global variable LeptDebugOK defaults to 0, and can be set
- *          or cleared by the function setLeptDebugOK().
- * 
- */ -l_ok -numaWriteDebug(const char *filename, - NUMA *na) -{ - PROCNAME("numaWriteDebug"); - - if (LeptDebugOK) { - return numaWrite(filename, na); - } else { - L_INFO("write to named temp file %s is disabled\n", procName, filename); - return 0; - } -} - - -/*! - * \brief numaWrite() - * - * \param[in] filename - * \param[in] na - * \return 0 if OK, 1 on error - */ -l_ok -numaWrite(const char *filename, - NUMA *na) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("numaWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!na) - return ERROR_INT("na not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "w")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = numaWriteStream(fp, na); - fclose(fp); - if (ret) - return ERROR_INT("na not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief numaWriteStream() - * - * \param[in] fp file stream; use NULL to write to stderr - * \param[in] na - * \return 0 if OK, 1 on error - */ -l_ok -numaWriteStream(FILE *fp, - NUMA *na) -{ -l_int32 i, n; -l_float32 startx, delx; - - PROCNAME("numaWriteStream"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - if (!fp) - return numaWriteStderr(na); - - n = numaGetCount(na); - fprintf(fp, "\nNuma Version %d\n", NUMA_VERSION_NUMBER); - fprintf(fp, "Number of numbers = %d\n", n); - for (i = 0; i < n; i++) - fprintf(fp, " [%d] = %f\n", i, na->array[i]); - fprintf(fp, "\n"); - - /* Optional data */ - numaGetParameters(na, &startx, &delx); - if (startx != 0.0 || delx != 1.0) - fprintf(fp, "startx = %f, delx = %f\n", startx, delx); - - return 0; -} - - -/*! - * \brief numaWriteStderr() - * - * \param[in] na - * \return 0 if OK, 1 on error - */ -l_ok -numaWriteStderr(NUMA *na) -{ -l_int32 i, n; -l_float32 startx, delx; - - PROCNAME("numaWriteStderr"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - - n = numaGetCount(na); - lept_stderr("\nNuma Version %d\n", NUMA_VERSION_NUMBER); - lept_stderr("Number of numbers = %d\n", n); - for (i = 0; i < n; i++) - lept_stderr(" [%d] = %f\n", i, na->array[i]); - lept_stderr("\n"); - - /* Optional data */ - numaGetParameters(na, &startx, &delx); - if (startx != 0.0 || delx != 1.0) - lept_stderr("startx = %f, delx = %f\n", startx, delx); - - return 0; -} - - -/*! - * \brief numaWriteMem() - * - * \param[out] pdata data of serialized numa; ascii - * \param[out] psize size of returned data - * \param[in] na - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes a numa in memory and puts the result in a buffer.
- * 
- */ -l_ok -numaWriteMem(l_uint8 **pdata, - size_t *psize, - NUMA *na) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("numaWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!na) - return ERROR_INT("na not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = numaWriteStream(fp, na); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = numaWriteStream(fp, na); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - -/*--------------------------------------------------------------------------* - * Numaa creation, destruction * - *--------------------------------------------------------------------------*/ -/*! - * \brief numaaCreate() - * - * \param[in] n size of numa ptr array to be alloc'd 0 for default - * \return naa, or NULL on error - * - */ -NUMAA * -numaaCreate(l_int32 n) -{ -NUMAA *naa; - - PROCNAME("numaaCreate"); - - if (n <= 0 || n > MaxPtrArraySize) - n = InitialArraySize; - - naa = (NUMAA *)LEPT_CALLOC(1, sizeof(NUMAA)); - if ((naa->numa = (NUMA **)LEPT_CALLOC(n, sizeof(NUMA *))) == NULL) { - numaaDestroy(&naa); - return (NUMAA *)ERROR_PTR("numa ptr array not made", procName, NULL); - } - - naa->nalloc = n; - naa->n = 0; - return naa; -} - - -/*! - * \brief numaaCreateFull() - * - * \param[in] nptr size of numa ptr array to be alloc'd - * \param[in] n size of individual numa arrays to be allocated - * to 0 for default - * \return naa, or NULL on error - * - *
- * Notes:
- *      (1) This allocates numaa and fills the array with allocated numas.
- *          In use, after calling this function, use
- *              numaaAddNumber(naa, index, val);
- *          to add val to the index-th numa in naa.
- * 
- */ -NUMAA * -numaaCreateFull(l_int32 nptr, - l_int32 n) -{ -l_int32 i; -NUMAA *naa; -NUMA *na; - - naa = numaaCreate(nptr); - for (i = 0; i < nptr; i++) { - na = numaCreate(n); - numaaAddNuma(naa, na, L_INSERT); - } - - return naa; -} - - -/*! - * \brief numaaTruncate() - * - * \param[in] naa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This identifies the largest index containing a numa that
- *          has any numbers within it, destroys all numa beyond that
- *          index, and resets the count.
- * 
- */ -l_ok -numaaTruncate(NUMAA *naa) -{ -l_int32 i, n, nn; -NUMA *na; - - PROCNAME("numaaTruncate"); - - if (!naa) - return ERROR_INT("naa not defined", procName, 1); - - n = numaaGetCount(naa); - for (i = n - 1; i >= 0; i--) { - na = numaaGetNuma(naa, i, L_CLONE); - if (!na) - continue; - nn = numaGetCount(na); - numaDestroy(&na); - if (nn == 0) - numaDestroy(&naa->numa[i]); - else - break; - } - naa->n = i + 1; - return 0; -} - - -/*! - * \brief numaaDestroy() - * - * \param[in,out] pnaa to be destroyed and nulled, if it exists - * \return void - */ -void -numaaDestroy(NUMAA **pnaa) -{ -l_int32 i; -NUMAA *naa; - - PROCNAME("numaaDestroy"); - - if (pnaa == NULL) { - L_WARNING("ptr address is NULL!\n", procName); - return; - } - - if ((naa = *pnaa) == NULL) - return; - - for (i = 0; i < naa->n; i++) - numaDestroy(&naa->numa[i]); - LEPT_FREE(naa->numa); - LEPT_FREE(naa); - *pnaa = NULL; - - return; -} - - - -/*--------------------------------------------------------------------------* - * Add Numa to Numaa * - *--------------------------------------------------------------------------*/ -/*! - * \brief numaaAddNuma() - * - * \param[in] naa - * \param[in] na to be added - * \param[in] copyflag L_INSERT, L_COPY, L_CLONE - * \return 0 if OK, 1 on error - */ -l_ok -numaaAddNuma(NUMAA *naa, - NUMA *na, - l_int32 copyflag) -{ -l_int32 n; -NUMA *nac; - - PROCNAME("numaaAddNuma"); - - if (!naa) - return ERROR_INT("naa not defined", procName, 1); - if (!na) - return ERROR_INT("na not defined", procName, 1); - - if (copyflag == L_INSERT) { - nac = na; - } else if (copyflag == L_COPY) { - if ((nac = numaCopy(na)) == NULL) - return ERROR_INT("nac not made", procName, 1); - } else if (copyflag == L_CLONE) { - nac = numaClone(na); - } else { - return ERROR_INT("invalid copyflag", procName, 1); - } - - n = numaaGetCount(naa); - if (n >= naa->nalloc) - numaaExtendArray(naa); - naa->numa[n] = nac; - naa->n++; - return 0; -} - - -/*! - * \brief numaaExtendArray() - * - * \param[in] naa - * \return 0 if OK, 1 on error - */ -static l_int32 -numaaExtendArray(NUMAA *naa) -{ - PROCNAME("numaaExtendArray"); - - if (!naa) - return ERROR_INT("naa not defined", procName, 1); - - if ((naa->numa = (NUMA **)reallocNew((void **)&naa->numa, - sizeof(NUMA *) * naa->nalloc, - 2 * sizeof(NUMA *) * naa->nalloc)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - - naa->nalloc *= 2; - return 0; -} - - -/*----------------------------------------------------------------------* - * Numaa accessors * - *----------------------------------------------------------------------*/ -/*! - * \brief numaaGetCount() - * - * \param[in] naa - * \return count number of numa, or 0 if no numa or on error - */ -l_int32 -numaaGetCount(NUMAA *naa) -{ - PROCNAME("numaaGetCount"); - - if (!naa) - return ERROR_INT("naa not defined", procName, 0); - return naa->n; -} - - -/*! - * \brief numaaGetNumaCount() - * - * \param[in] naa - * \param[in] index of numa in naa - * \return count of numbers in the referenced numa, or 0 on error. - */ -l_int32 -numaaGetNumaCount(NUMAA *naa, - l_int32 index) -{ - PROCNAME("numaaGetNumaCount"); - - if (!naa) - return ERROR_INT("naa not defined", procName, 0); - if (index < 0 || index >= naa->n) - return ERROR_INT("invalid index into naa", procName, 0); - return numaGetCount(naa->numa[index]); -} - - -/*! - * \brief numaaGetNumberCount() - * - * \param[in] naa - * \return count total number of numbers in the numaa, - * or 0 if no numbers or on error - */ -l_int32 -numaaGetNumberCount(NUMAA *naa) -{ -NUMA *na; -l_int32 n, sum, i; - - PROCNAME("numaaGetNumberCount"); - - if (!naa) - return ERROR_INT("naa not defined", procName, 0); - - n = numaaGetCount(naa); - for (sum = 0, i = 0; i < n; i++) { - na = numaaGetNuma(naa, i, L_CLONE); - sum += numaGetCount(na); - numaDestroy(&na); - } - - return sum; -} - - -/*! - * \brief numaaGetPtrArray() - * - * \param[in] naa - * \return the internal array of ptrs to Numa, or NULL on error - * - *
- * Notes:
- *      (1) This function is convenient for doing direct manipulation on
- *          a fixed size array of Numas.  To do this, it sets the count
- *          to the full size of the allocated array of Numa ptrs.
- *          The originating Numaa owns this array: DO NOT free it!
- *      (2) Intended usage:
- *            Numaa *naa = numaaCreate(n);
- *            Numa **array = numaaGetPtrArray(naa);
- *             ...  [manipulate Numas directly on the array]
- *            numaaDestroy(&naa);
- *      (3) Cautions:
- *           ~ Do not free this array; it is owned by tne Numaa.
- *           ~ Do not call any functions on the Numaa, other than
- *             numaaDestroy() when you're finished with the array.
- *             Adding a Numa will force a resize, destroying the ptr array.
- *           ~ Do not address the array outside its allocated size.
- *             With the bare array, there are no protections.  If the
- *             allocated size is n, array[n] is an error.
- * 
- */ -NUMA ** -numaaGetPtrArray(NUMAA *naa) -{ - PROCNAME("numaaGetPtrArray"); - - if (!naa) - return (NUMA **)ERROR_PTR("naa not defined", procName, NULL); - - naa->n = naa->nalloc; - return naa->numa; -} - - -/*! - * \brief numaaGetNuma() - * - * \param[in] naa - * \param[in] index to the index-th numa - * \param[in] accessflag L_COPY or L_CLONE - * \return numa, or NULL on error - */ -NUMA * -numaaGetNuma(NUMAA *naa, - l_int32 index, - l_int32 accessflag) -{ - PROCNAME("numaaGetNuma"); - - if (!naa) - return (NUMA *)ERROR_PTR("naa not defined", procName, NULL); - if (index < 0 || index >= naa->n) - return (NUMA *)ERROR_PTR("index not valid", procName, NULL); - - if (accessflag == L_COPY) - return numaCopy(naa->numa[index]); - else if (accessflag == L_CLONE) - return numaClone(naa->numa[index]); - else - return (NUMA *)ERROR_PTR("invalid accessflag", procName, NULL); -} - - -/*! - * \brief numaaReplaceNuma() - * - * \param[in] naa - * \param[in] index to the index-th numa - * \param[in] na insert and replace any existing one - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Any existing numa is destroyed, and the input one
- *          is inserted in its place.
- *      (2) If the index is invalid, return 1 (error)
- * 
- */ -l_ok -numaaReplaceNuma(NUMAA *naa, - l_int32 index, - NUMA *na) -{ -l_int32 n; - - PROCNAME("numaaReplaceNuma"); - - if (!naa) - return ERROR_INT("naa not defined", procName, 1); - if (!na) - return ERROR_INT("na not defined", procName, 1); - n = numaaGetCount(naa); - if (index < 0 || index >= n) - return ERROR_INT("index not valid", procName, 1); - - numaDestroy(&naa->numa[index]); - naa->numa[index] = na; - return 0; -} - - -/*! - * \brief numaaGetValue() - * - * \param[in] naa - * \param[in] i index of numa within numaa - * \param[in] j index into numa - * \param[out] pfval [optional] float value - * \param[out] pival [optional] int value - * \return 0 if OK, 1 on error - */ -l_ok -numaaGetValue(NUMAA *naa, - l_int32 i, - l_int32 j, - l_float32 *pfval, - l_int32 *pival) -{ -l_int32 n; -NUMA *na; - - PROCNAME("numaaGetValue"); - - if (!pfval && !pival) - return ERROR_INT("no return val requested", procName, 1); - if (pfval) *pfval = 0.0; - if (pival) *pival = 0; - if (!naa) - return ERROR_INT("naa not defined", procName, 1); - n = numaaGetCount(naa); - if (i < 0 || i >= n) - return ERROR_INT("invalid index into naa", procName, 1); - na = naa->numa[i]; - if (j < 0 || j >= na->n) - return ERROR_INT("invalid index into na", procName, 1); - if (pfval) *pfval = na->array[j]; - if (pival) *pival = (l_int32)(na->array[j]); - return 0; -} - - -/*! - * \brief numaaAddNumber() - * - * \param[in] naa - * \param[in] index of numa within numaa - * \param[in] val float or int to be added; stored as a float - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Adds to an existing numa only.
- * 
- */ -l_ok -numaaAddNumber(NUMAA *naa, - l_int32 index, - l_float32 val) -{ -l_int32 n; -NUMA *na; - - PROCNAME("numaaAddNumber"); - - if (!naa) - return ERROR_INT("naa not defined", procName, 1); - n = numaaGetCount(naa); - if (index < 0 || index >= n) - return ERROR_INT("invalid index in naa", procName, 1); - - na = numaaGetNuma(naa, index, L_CLONE); - numaAddNumber(na, val); - numaDestroy(&na); - return 0; -} - - -/*----------------------------------------------------------------------* - * Serialize numaa for I/O * - *----------------------------------------------------------------------*/ -/*! - * \brief numaaRead() - * - * \param[in] filename - * \return naa, or NULL on error - */ -NUMAA * -numaaRead(const char *filename) -{ -FILE *fp; -NUMAA *naa; - - PROCNAME("numaaRead"); - - if (!filename) - return (NUMAA *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (NUMAA *)ERROR_PTR("stream not opened", procName, NULL); - naa = numaaReadStream(fp); - fclose(fp); - if (!naa) - return (NUMAA *)ERROR_PTR("naa not read", procName, NULL); - return naa; -} - - -/*! - * \brief numaaReadStream() - * - * \param[in] fp file stream - * \return naa, or NULL on error - */ -NUMAA * -numaaReadStream(FILE *fp) -{ -l_int32 i, n, index, ret, version; -NUMA *na; -NUMAA *naa; - - PROCNAME("numaaReadStream"); - - if (!fp) - return (NUMAA *)ERROR_PTR("stream not defined", procName, NULL); - - ret = fscanf(fp, "\nNumaa Version %d\n", &version); - if (ret != 1) - return (NUMAA *)ERROR_PTR("not a numa file", procName, NULL); - if (version != NUMA_VERSION_NUMBER) - return (NUMAA *)ERROR_PTR("invalid numaa version", procName, NULL); - if (fscanf(fp, "Number of numa = %d\n\n", &n) != 1) - return (NUMAA *)ERROR_PTR("invalid number of numa", procName, NULL); - - if (n > MaxPtrArraySize) { - L_ERROR("n = %d > %d\n", procName, n, MaxPtrArraySize); - return NULL; - } - if ((naa = numaaCreate(n)) == NULL) - return (NUMAA *)ERROR_PTR("naa not made", procName, NULL); - - for (i = 0; i < n; i++) { - if (fscanf(fp, "Numa[%d]:", &index) != 1) { - numaaDestroy(&naa); - return (NUMAA *)ERROR_PTR("invalid numa header", procName, NULL); - } - if ((na = numaReadStream(fp)) == NULL) { - numaaDestroy(&naa); - return (NUMAA *)ERROR_PTR("na not made", procName, NULL); - } - numaaAddNuma(naa, na, L_INSERT); - } - - return naa; -} - - -/*! - * \brief numaaReadMem() - * - * \param[in] data numaa serialization; in ascii - * \param[in] size of data; can use strlen to get it - * \return naa, or NULL on error - */ -NUMAA * -numaaReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -NUMAA *naa; - - PROCNAME("numaaReadMem"); - - if (!data) - return (NUMAA *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (NUMAA *)ERROR_PTR("stream not opened", procName, NULL); - - naa = numaaReadStream(fp); - fclose(fp); - if (!naa) L_ERROR("naa not read\n", procName); - return naa; -} - - -/*! - * \brief numaaWrite() - * - * \param[in] filename - * \param[in] naa - * \return 0 if OK, 1 on error - */ -l_ok -numaaWrite(const char *filename, - NUMAA *naa) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("numaaWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!naa) - return ERROR_INT("naa not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "w")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = numaaWriteStream(fp, naa); - fclose(fp); - if (ret) - return ERROR_INT("naa not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief numaaWriteStream() - * - * \param[in] fp file stream - * \param[in] naa - * \return 0 if OK, 1 on error - */ -l_ok -numaaWriteStream(FILE *fp, - NUMAA *naa) -{ -l_int32 i, n; -NUMA *na; - - PROCNAME("numaaWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!naa) - return ERROR_INT("naa not defined", procName, 1); - - n = numaaGetCount(naa); - fprintf(fp, "\nNumaa Version %d\n", NUMA_VERSION_NUMBER); - fprintf(fp, "Number of numa = %d\n\n", n); - for (i = 0; i < n; i++) { - if ((na = numaaGetNuma(naa, i, L_CLONE)) == NULL) - return ERROR_INT("na not found", procName, 1); - fprintf(fp, "Numa[%d]:", i); - numaWriteStream(fp, na); - numaDestroy(&na); - } - - return 0; -} - - -/*! - * \brief numaaWriteMem() - * - * \param[out] pdata data of serialized numaa; ascii - * \param[out] psize size of returned data - * \param[in] naa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes a numaa in memory and puts the result in a buffer.
- * 
- */ -l_ok -numaaWriteMem(l_uint8 **pdata, - size_t *psize, - NUMAA *naa) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("numaaWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!naa) - return ERROR_INT("naa not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = numaaWriteStream(fp, naa); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = numaaWriteStream(fp, naa); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/numafunc1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/numafunc1.c deleted file mode 100644 index 18f848b1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/numafunc1.c +++ /dev/null @@ -1,3491 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file numafunc1.c - *
- *
- *      --------------------------------------
- *      This file has these Numa utilities:
- *         - arithmetic operations
- *         - simple data analysis
- *         - generation of special sequences
- *         - permutations
- *         - interpolation
- *         - sorting
- *         - data analysis requiring sorting
- *         - joins and rearrangements
- *      --------------------------------------
- *
- *      Arithmetic and logic
- *          NUMA        *numaArithOp()
- *          NUMA        *numaLogicalOp()
- *          NUMA        *numaInvert()
- *          l_int32      numaSimilar()
- *          l_int32      numaAddToNumber()
- *
- *      Simple extractions
- *          l_int32      numaGetMin()
- *          l_int32      numaGetMax()
- *          l_int32      numaGetSum()
- *          NUMA        *numaGetPartialSums()
- *          l_int32      numaGetSumOnInterval()
- *          l_int32      numaHasOnlyIntegers()
- *          NUMA        *numaSubsample()
- *          NUMA        *numaMakeDelta()
- *          NUMA        *numaMakeSequence()
- *          NUMA        *numaMakeConstant()
- *          NUMA        *numaMakeAbsValue()
- *          NUMA        *numaAddBorder()
- *          NUMA        *numaAddSpecifiedBorder()
- *          NUMA        *numaRemoveBorder()
- *          l_int32      numaCountNonzeroRuns()
- *          l_int32      numaGetNonzeroRange()
- *          l_int32      numaGetCountRelativeToZero()
- *          NUMA        *numaClipToInterval()
- *          NUMA        *numaMakeThresholdIndicator()
- *          NUMA        *numaUniformSampling()
- *          NUMA        *numaReverse()
- *
- *      Signal feature extraction
- *          NUMA        *numaLowPassIntervals()
- *          NUMA        *numaThresholdEdges()
- *          NUMA        *numaGetSpanValues()
- *          NUMA        *numaGetEdgeValues()
- *
- *      Interpolation
- *          l_int32      numaInterpolateEqxVal()
- *          l_int32      numaInterpolateEqxInterval()
- *          l_int32      numaInterpolateArbxVal()
- *          l_int32      numaInterpolateArbxInterval()
- *
- *      Functions requiring interpolation
- *          l_int32      numaFitMax()
- *          l_int32      numaDifferentiateInterval()
- *          l_int32      numaIntegrateInterval()
- *
- *      Sorting
- *          NUMA        *numaSortGeneral()
- *          NUMA        *numaSortAutoSelect()
- *          NUMA        *numaSortIndexAutoSelect()
- *          l_int32      numaChooseSortType()
- *          NUMA        *numaSort()
- *          NUMA        *numaBinSort()
- *          NUMA        *numaGetSortIndex()
- *          NUMA        *numaGetBinSortIndex()
- *          NUMA        *numaSortByIndex()
- *          l_int32      numaIsSorted()
- *          l_int32      numaSortPair()
- *          NUMA        *numaInvertMap()
- *
- *      Random permutation
- *          NUMA        *numaPseudorandomSequence()
- *          NUMA        *numaRandomPermutation()
- *
- *      Functions requiring sorting
- *          l_int32      numaGetRankValue()
- *          l_int32      numaGetMedian()
- *          l_int32      numaGetBinnedMedian()
- *          l_int32      numaGetMeanDevFromMedian()
- *          l_int32      numaGetMedianDevFromMedian()
- *          l_int32      numaGetMode()
- *
- *      Rearrangements
- *          l_int32      numaJoin()
- *          l_int32      numaaJoin()
- *          NUMA        *numaaFlattenToNuma()
- *
- *    Things to remember when using the Numa:
- *
- *    (1) The numa is a struct, not an array.  Always use accessors
- *        (see numabasic.c), never the fields directly.
- *
- *    (2) The number array holds l_float32 values.  It can also
- *        be used to store l_int32 values.  See numabasic.c for
- *        details on using the accessors.
- *
- *    (3) If you use numaCreate(), no numbers are stored and the size is 0.
- *        You have to add numbers to increase the size.
- *        If you want to start with a numa of a fixed size, with each
- *        entry initialized to the same value, use numaMakeConstant().
- *
- *    (4) Occasionally, in the comments we denote the i-th element of a
- *        numa by na[i].  This is conceptual only -- the numa is not an array!
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*----------------------------------------------------------------------* - * Arithmetic and logical ops on Numas * - *----------------------------------------------------------------------*/ -/*! - * \brief numaArithOp() - * - * \param[in] nad [optional] can be null or equal to na1 (in-place - * \param[in] na1 - * \param[in] na2 - * \param[in] op L_ARITH_ADD, L_ARITH_SUBTRACT, - * L_ARITH_MULTIPLY, L_ARITH_DIVIDE - * \return nad always: operation applied to na1 and na2 - * - *
- * Notes:
- *      (1) The sizes of na1 and na2 must be equal.
- *      (2) nad can only null or equal to na1.
- *      (3) To add a constant to a numa, or to multipy a numa by
- *          a constant, use numaTransform().
- * 
- */ -NUMA * -numaArithOp(NUMA *nad, - NUMA *na1, - NUMA *na2, - l_int32 op) -{ -l_int32 i, n; -l_float32 val1, val2; - - PROCNAME("numaArithOp"); - - if (!na1 || !na2) - return (NUMA *)ERROR_PTR("na1, na2 not both defined", procName, nad); - n = numaGetCount(na1); - if (n != numaGetCount(na2)) - return (NUMA *)ERROR_PTR("na1, na2 sizes differ", procName, nad); - if (nad && nad != na1) - return (NUMA *)ERROR_PTR("nad defined but not in-place", procName, nad); - if (op != L_ARITH_ADD && op != L_ARITH_SUBTRACT && - op != L_ARITH_MULTIPLY && op != L_ARITH_DIVIDE) - return (NUMA *)ERROR_PTR("invalid op", procName, nad); - if (op == L_ARITH_DIVIDE) { - for (i = 0; i < n; i++) { - numaGetFValue(na2, i, &val2); - if (val2 == 0.0) - return (NUMA *)ERROR_PTR("na2 has 0 element", procName, nad); - } - } - - /* If nad is not identical to na1, make it an identical copy */ - if (!nad) - nad = numaCopy(na1); - - for (i = 0; i < n; i++) { - numaGetFValue(nad, i, &val1); - numaGetFValue(na2, i, &val2); - switch (op) { - case L_ARITH_ADD: - numaSetValue(nad, i, val1 + val2); - break; - case L_ARITH_SUBTRACT: - numaSetValue(nad, i, val1 - val2); - break; - case L_ARITH_MULTIPLY: - numaSetValue(nad, i, val1 * val2); - break; - case L_ARITH_DIVIDE: - numaSetValue(nad, i, val1 / val2); - break; - default: - lept_stderr(" Unknown arith op: %d\n", op); - return nad; - } - } - - return nad; -} - - -/*! - * \brief numaLogicalOp() - * - * \param[in] nad [optional] can be null or equal to na1 (in-place - * \param[in] na1 - * \param[in] na2 - * \param[in] op L_UNION, L_INTERSECTION, L_SUBTRACTION, L_EXCLUSIVE_OR - * \return nad always: operation applied to na1 and na2 - * - *
- * Notes:
- *      (1) The sizes of na1 and na2 must be equal.
- *      (2) nad can only be null or equal to na1.
- *      (3) This is intended for use with indicator arrays (0s and 1s).
- *          Input data is extracted as integers (0 == false, anything
- *          else == true); output results are 0 and 1.
- *      (4) L_SUBTRACTION is subtraction of val2 from val1.  For bit logical
- *          arithmetic this is (val1 & ~val2), but because these values
- *          are integers, we use (val1 && !val2).
- * 
- */ -NUMA * -numaLogicalOp(NUMA *nad, - NUMA *na1, - NUMA *na2, - l_int32 op) -{ -l_int32 i, n, val1, val2, val; - - PROCNAME("numaLogicalOp"); - - if (!na1 || !na2) - return (NUMA *)ERROR_PTR("na1, na2 not both defined", procName, nad); - n = numaGetCount(na1); - if (n != numaGetCount(na2)) - return (NUMA *)ERROR_PTR("na1, na2 sizes differ", procName, nad); - if (nad && nad != na1) - return (NUMA *)ERROR_PTR("nad defined; not in-place", procName, nad); - if (op != L_UNION && op != L_INTERSECTION && - op != L_SUBTRACTION && op != L_EXCLUSIVE_OR) - return (NUMA *)ERROR_PTR("invalid op", procName, nad); - - /* If nad is not identical to na1, make it an identical copy */ - if (!nad) - nad = numaCopy(na1); - - for (i = 0; i < n; i++) { - numaGetIValue(nad, i, &val1); - numaGetIValue(na2, i, &val2); - val1 = (val1 == 0) ? 0 : 1; - val2 = (val2 == 0) ? 0 : 1; - switch (op) { - case L_UNION: - val = (val1 || val2) ? 1 : 0; - numaSetValue(nad, i, val); - break; - case L_INTERSECTION: - val = (val1 && val2) ? 1 : 0; - numaSetValue(nad, i, val); - break; - case L_SUBTRACTION: - val = (val1 && !val2) ? 1 : 0; - numaSetValue(nad, i, val); - break; - case L_EXCLUSIVE_OR: - val = (val1 != val2) ? 1 : 0; - numaSetValue(nad, i, val); - break; - default: - lept_stderr(" Unknown logical op: %d\n", op); - return nad; - } - } - - return nad; -} - - -/*! - * \brief numaInvert() - * - * \param[in] nad [optional] can be null or equal to nas (in-place - * \param[in] nas - * \return nad always: 'inverts' nas - * - *
- * Notes:
- *      (1) This is intended for use with indicator arrays (0s and 1s).
- *          It gives a boolean-type output, taking the input as
- *          an integer and inverting it:
- *              0              -->  1
- *              anything else  -->   0
- * 
- */ -NUMA * -numaInvert(NUMA *nad, - NUMA *nas) -{ -l_int32 i, n, val; - - PROCNAME("numaInvert"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, nad); - if (nad && nad != nas) - return (NUMA *)ERROR_PTR("nad defined; not in-place", procName, nad); - - if (!nad) - nad = numaCopy(nas); - n = numaGetCount(nad); - for (i = 0; i < n; i++) { - numaGetIValue(nad, i, &val); - if (!val) - val = 1; - else - val = 0; - numaSetValue(nad, i, val); - } - - return nad; -} - - -/*! - * \brief numaSimilar() - * - * \param[in] na1 - * \param[in] na2 - * \param[in] maxdiff use 0.0 for exact equality - * \param[out] psimilar 1 if similar; 0 if different - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Float values can differ slightly due to roundoff and
- *          accumulated errors.  Using %maxdiff > 0.0 allows similar
- *          arrays to be identified.
- * 
-*/ -l_int32 -numaSimilar(NUMA *na1, - NUMA *na2, - l_float32 maxdiff, - l_int32 *psimilar) -{ -l_int32 i, n; -l_float32 val1, val2; - - PROCNAME("numaSimilar"); - - if (!psimilar) - return ERROR_INT("&similar not defined", procName, 1); - *psimilar = 0; - if (!na1 || !na2) - return ERROR_INT("na1 and na2 not both defined", procName, 1); - maxdiff = L_ABS(maxdiff); - - n = numaGetCount(na1); - if (n != numaGetCount(na2)) return 0; - - for (i = 0; i < n; i++) { - numaGetFValue(na1, i, &val1); - numaGetFValue(na2, i, &val2); - if (L_ABS(val1 - val2) > maxdiff) return 0; - } - - *psimilar = 1; - return 0; -} - - -/*! - * \brief numaAddToNumber() - * - * \param[in] na source numa - * \param[in] index element to be changed - * \param[in] val new value to be added - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is useful for accumulating sums, regardless of the index
- *          order in which the values are made available.
- *      (2) Before use, the numa has to be filled up to %index.  This would
- *          typically be used by creating the numa with the full sized
- *          array, initialized to 0.0, using numaMakeConstant().
- * 
- */ -l_ok -numaAddToNumber(NUMA *na, - l_int32 index, - l_float32 val) -{ -l_int32 n; - - PROCNAME("numaAddToNumber"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - n = numaGetCount(na); - if (index < 0 || index >= n) - return ERROR_INT("index not in {0...n - 1}", procName, 1); - - na->array[index] += val; - return 0; -} - - -/*----------------------------------------------------------------------* - * Simple extractions * - *----------------------------------------------------------------------*/ -/*! - * \brief numaGetMin() - * - * \param[in] na source numa - * \param[out] pminval [optional] min value - * \param[out] piminloc [optional] index of min location - * \return 0 if OK; 1 on error - */ -l_ok -numaGetMin(NUMA *na, - l_float32 *pminval, - l_int32 *piminloc) -{ -l_int32 i, n, iminloc; -l_float32 val, minval; - - PROCNAME("numaGetMin"); - - if (!pminval && !piminloc) - return ERROR_INT("nothing to do", procName, 1); - if (pminval) *pminval = 0.0; - if (piminloc) *piminloc = 0; - if (!na) - return ERROR_INT("na not defined", procName, 1); - - minval = +1000000000.; - iminloc = 0; - n = numaGetCount(na); - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &val); - if (val < minval) { - minval = val; - iminloc = i; - } - } - - if (pminval) *pminval = minval; - if (piminloc) *piminloc = iminloc; - return 0; -} - - -/*! - * \brief numaGetMax() - * - * \param[in] na source numa - * \param[out] pmaxval [optional] max value - * \param[out] pimaxloc [optional] index of max location - * \return 0 if OK; 1 on error - */ -l_ok -numaGetMax(NUMA *na, - l_float32 *pmaxval, - l_int32 *pimaxloc) -{ -l_int32 i, n, imaxloc; -l_float32 val, maxval; - - PROCNAME("numaGetMax"); - - if (!pmaxval && !pimaxloc) - return ERROR_INT("nothing to do", procName, 1); - if (pmaxval) *pmaxval = 0.0; - if (pimaxloc) *pimaxloc = 0; - if (!na) - return ERROR_INT("na not defined", procName, 1); - - maxval = -1000000000.; - imaxloc = 0; - n = numaGetCount(na); - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &val); - if (val > maxval) { - maxval = val; - imaxloc = i; - } - } - - if (pmaxval) *pmaxval = maxval; - if (pimaxloc) *pimaxloc = imaxloc; - return 0; -} - - -/*! - * \brief numaGetSum() - * - * \param[in] na source numa - * \param[out] psum sum of values - * \return 0 if OK, 1 on error - */ -l_ok -numaGetSum(NUMA *na, - l_float32 *psum) -{ -l_int32 i, n; -l_float32 val, sum; - - PROCNAME("numaGetSum"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - if (!psum) - return ERROR_INT("&sum not defined", procName, 1); - - sum = 0.0; - n = numaGetCount(na); - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &val); - sum += val; - } - *psum = sum; - return 0; -} - - -/*! - * \brief numaGetPartialSums() - * - * \param[in] na source numa - * \return nasum, or NULL on error - * - *
- * Notes:
- *      (1) nasum[i] is the sum for all j <= i of na[j].
- *          So nasum[0] = na[0].
- *      (2) If you want to generate a rank function, where rank[0] - 0.0,
- *          insert a 0.0 at the beginning of the nasum array.
- * 
- */ -NUMA * -numaGetPartialSums(NUMA *na) -{ -l_int32 i, n; -l_float32 val, sum; -NUMA *nasum; - - PROCNAME("numaGetPartialSums"); - - if (!na) - return (NUMA *)ERROR_PTR("na not defined", procName, NULL); - - n = numaGetCount(na); - nasum = numaCreate(n); - sum = 0.0; - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &val); - sum += val; - numaAddNumber(nasum, sum); - } - return nasum; -} - - -/*! - * \brief numaGetSumOnInterval() - * - * \param[in] na source numa - * \param[in] first beginning index - * \param[in] last final index - * \param[out] psum sum of values in the index interval range - * \return 0 if OK, 1 on error - */ -l_ok -numaGetSumOnInterval(NUMA *na, - l_int32 first, - l_int32 last, - l_float32 *psum) -{ -l_int32 i, n, truelast; -l_float32 val, sum; - - PROCNAME("numaGetSumOnInterval"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - if (!psum) - return ERROR_INT("&sum not defined", procName, 1); - *psum = 0.0; - - sum = 0.0; - n = numaGetCount(na); - if (first >= n) /* not an error */ - return 0; - truelast = L_MIN(last, n - 1); - - for (i = first; i <= truelast; i++) { - numaGetFValue(na, i, &val); - sum += val; - } - *psum = sum; - return 0; -} - - -/*! - * \brief numaHasOnlyIntegers() - * - * \param[in] na source numa - * \param[in] maxsamples maximum number of samples to check - * \param[out] pallints 1 if all sampled values are ints; else 0 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Set %maxsamples == 0 to check every integer in na.  Otherwise,
- *          this samples no more than %maxsamples.
- * 
- */ -l_ok -numaHasOnlyIntegers(NUMA *na, - l_int32 maxsamples, - l_int32 *pallints) -{ -l_int32 i, n, incr; -l_float32 val; - - PROCNAME("numaHasOnlyIntegers"); - - if (!pallints) - return ERROR_INT("&allints not defined", procName, 1); - *pallints = TRUE; - if (!na) - return ERROR_INT("na not defined", procName, 1); - - if ((n = numaGetCount(na)) == 0) - return ERROR_INT("na empty", procName, 1); - if (maxsamples <= 0) - incr = 1; - else - incr = (l_int32)((n + maxsamples - 1) / maxsamples); - for (i = 0; i < n; i += incr) { - numaGetFValue(na, i, &val); - if (val != (l_int32)val) { - *pallints = FALSE; - return 0; - } - } - - return 0; -} - - -/*! - * \brief numaSubsample() - * - * \param[in] nas - * \param[in] subfactor subsample factor, >= 1 - * \return nad evenly sampled values from nas, or NULL on error - */ -NUMA * -numaSubsample(NUMA *nas, - l_int32 subfactor) -{ -l_int32 i, n; -l_float32 val; -NUMA *nad; - - PROCNAME("numaSubsample"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (subfactor < 1) - return (NUMA *)ERROR_PTR("subfactor < 1", procName, NULL); - - nad = numaCreate(0); - n = numaGetCount(nas); - for (i = 0; i < n; i++) { - if (i % subfactor != 0) continue; - numaGetFValue(nas, i, &val); - numaAddNumber(nad, val); - } - - return nad; -} - - -/*! - * \brief numaMakeDelta() - * - * \param[in] nas input numa - * \return numa of difference values val[i+1] - val[i], - * or NULL on error - */ -NUMA * -numaMakeDelta(NUMA *nas) -{ -l_int32 i, n, prev, cur; -NUMA *nad; - - PROCNAME("numaMakeDelta"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - n = numaGetCount(nas); - nad = numaCreate(n - 1); - prev = 0; - for (i = 1; i < n; i++) { - numaGetIValue(nas, i, &cur); - numaAddNumber(nad, cur - prev); - prev = cur; - } - return nad; -} - - -/*! - * \brief numaMakeSequence() - * - * \param[in] startval - * \param[in] increment - * \param[in] size of sequence - * \return numa of sequence of evenly spaced values, or NULL on error - */ -NUMA * -numaMakeSequence(l_float32 startval, - l_float32 increment, - l_int32 size) -{ -l_int32 i; -l_float32 val; -NUMA *na; - - PROCNAME("numaMakeSequence"); - - if ((na = numaCreate(size)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - - for (i = 0; i < size; i++) { - val = startval + i * increment; - numaAddNumber(na, val); - } - - return na; -} - - -/*! - * \brief numaMakeConstant() - * - * \param[in] val - * \param[in] size of numa - * \return numa of given size with all entries equal to 'val', - * or NULL on error - */ -NUMA * -numaMakeConstant(l_float32 val, - l_int32 size) -{ - return numaMakeSequence(val, 0.0, size); -} - - -/*! - * \brief numaMakeAbsValue() - * - * \param[in] nad can be null for new array, or the same as nas for inplace - * \param[in] nas input numa - * \return nad with all numbers being the absval of the input, - * or NULL on error - */ -NUMA * -numaMakeAbsValue(NUMA *nad, - NUMA *nas) -{ -l_int32 i, n; -l_float32 val; - - PROCNAME("numaMakeAbsValue"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (nad && nad != nas) - return (NUMA *)ERROR_PTR("nad and not in-place", procName, NULL); - - if (!nad) - nad = numaCopy(nas); - n = numaGetCount(nad); - for (i = 0; i < n; i++) { - val = nad->array[i]; - nad->array[i] = L_ABS(val); - } - - return nad; -} - - -/*! - * \brief numaAddBorder() - * - * \param[in] nas - * \param[in] left number of elements to add before the start - * \param[in] right number of elements to add after the end - * \param[in] val initialize border elements - * \return nad with added elements at left and right, or NULL on error - */ -NUMA * -numaAddBorder(NUMA *nas, - l_int32 left, - l_int32 right, - l_float32 val) -{ -l_int32 i, n, len; -l_float32 startx, delx; -l_float32 *fas, *fad; -NUMA *nad; - - PROCNAME("numaAddBorder"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (left < 0) left = 0; - if (right < 0) right = 0; - if (left == 0 && right == 0) - return numaCopy(nas); - - n = numaGetCount(nas); - len = n + left + right; - nad = numaMakeConstant(val, len); - numaGetParameters(nas, &startx, &delx); - numaSetParameters(nad, startx - delx * left, delx); - fas = numaGetFArray(nas, L_NOCOPY); - fad = numaGetFArray(nad, L_NOCOPY); - for (i = 0; i < n; i++) - fad[left + i] = fas[i]; - - return nad; -} - - -/*! - * \brief numaAddSpecifiedBorder() - * - * \param[in] nas - * \param[in] left number of elements to add before the start - * \param[in] right number of elements to add after the end - * \param[in] type L_CONTINUED_BORDER, L_MIRRORED_BORDER - * \return nad with added elements at left and right, or NULL on error - */ -NUMA * -numaAddSpecifiedBorder(NUMA *nas, - l_int32 left, - l_int32 right, - l_int32 type) -{ -l_int32 i, n; -l_float32 *fa; -NUMA *nad; - - PROCNAME("numaAddSpecifiedBorder"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (left < 0) left = 0; - if (right < 0) right = 0; - if (left == 0 && right == 0) - return numaCopy(nas); - if (type != L_CONTINUED_BORDER && type != L_MIRRORED_BORDER) - return (NUMA *)ERROR_PTR("invalid type", procName, NULL); - n = numaGetCount(nas); - if (type == L_MIRRORED_BORDER && (left > n || right > n)) - return (NUMA *)ERROR_PTR("border too large", procName, NULL); - - nad = numaAddBorder(nas, left, right, 0); - n = numaGetCount(nad); - fa = numaGetFArray(nad, L_NOCOPY); - if (type == L_CONTINUED_BORDER) { - for (i = 0; i < left; i++) - fa[i] = fa[left]; - for (i = n - right; i < n; i++) - fa[i] = fa[n - right - 1]; - } else { /* type == L_MIRRORED_BORDER */ - for (i = 0; i < left; i++) - fa[i] = fa[2 * left - 1 - i]; - for (i = 0; i < right; i++) - fa[n - right + i] = fa[n - right - i - 1]; - } - - return nad; -} - - -/*! - * \brief numaRemoveBorder() - * - * \param[in] nas - * \param[in] left number of elements to remove from the start - * \param[in] right number of elements to remove up to the end - * \return nad with removed elements at left and right, or NULL on error - */ -NUMA * -numaRemoveBorder(NUMA *nas, - l_int32 left, - l_int32 right) -{ -l_int32 i, n, len; -l_float32 startx, delx; -l_float32 *fas, *fad; -NUMA *nad; - - PROCNAME("numaRemoveBorder"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (left < 0) left = 0; - if (right < 0) right = 0; - if (left == 0 && right == 0) - return numaCopy(nas); - - n = numaGetCount(nas); - if ((len = n - left - right) < 0) - return (NUMA *)ERROR_PTR("len < 0 after removal", procName, NULL); - nad = numaMakeConstant(0, len); - numaGetParameters(nas, &startx, &delx); - numaSetParameters(nad, startx + delx * left, delx); - fas = numaGetFArray(nas, L_NOCOPY); - fad = numaGetFArray(nad, L_NOCOPY); - for (i = 0; i < len; i++) - fad[i] = fas[left + i]; - - return nad; -} - - -/*! - * \brief numaCountNonzeroRuns() - * - * \param[in] na e.g., of pixel counts in rows or columns - * \param[out] pcount number of nonzero runs - * \return 0 if OK, 1 on error - */ -l_ok -numaCountNonzeroRuns(NUMA *na, - l_int32 *pcount) -{ -l_int32 n, i, val, count, inrun; - - PROCNAME("numaCountNonzeroRuns"); - - if (!pcount) - return ERROR_INT("&count not defined", procName, 1); - *pcount = 0; - if (!na) - return ERROR_INT("na not defined", procName, 1); - n = numaGetCount(na); - count = 0; - inrun = FALSE; - for (i = 0; i < n; i++) { - numaGetIValue(na, i, &val); - if (!inrun && val > 0) { - count++; - inrun = TRUE; - } else if (inrun && val == 0) { - inrun = FALSE; - } - } - *pcount = count; - return 0; -} - - -/*! - * \brief numaGetNonzeroRange() - * - * \param[in] na source numa - * \param[in] eps largest value considered to be zero - * \param[out] pfirst, plast interval of array indices - * where values are nonzero - * \return 0 if OK, 1 on error or if no nonzero range is found. - */ -l_ok -numaGetNonzeroRange(NUMA *na, - l_float32 eps, - l_int32 *pfirst, - l_int32 *plast) -{ -l_int32 n, i, found; -l_float32 val; - - PROCNAME("numaGetNonzeroRange"); - - if (pfirst) *pfirst = 0; - if (plast) *plast = 0; - if (!pfirst || !plast) - return ERROR_INT("pfirst and plast not both defined", procName, 1); - if (!na) - return ERROR_INT("na not defined", procName, 1); - n = numaGetCount(na); - found = FALSE; - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &val); - if (val > eps) { - found = TRUE; - break; - } - } - if (!found) { - *pfirst = n - 1; - *plast = 0; - return 1; - } - - *pfirst = i; - for (i = n - 1; i >= 0; i--) { - numaGetFValue(na, i, &val); - if (val > eps) - break; - } - *plast = i; - return 0; -} - - -/*! - * \brief numaGetCountRelativeToZero() - * - * \param[in] na source numa - * \param[in] type L_LESS_THAN_ZERO, L_EQUAL_TO_ZERO, L_GREATER_THAN_ZERO - * \param[out] pcount count of values of given type - * \return 0 if OK, 1 on error - */ -l_ok -numaGetCountRelativeToZero(NUMA *na, - l_int32 type, - l_int32 *pcount) -{ -l_int32 n, i, count; -l_float32 val; - - PROCNAME("numaGetCountRelativeToZero"); - - if (!pcount) - return ERROR_INT("&count not defined", procName, 1); - *pcount = 0; - if (!na) - return ERROR_INT("na not defined", procName, 1); - n = numaGetCount(na); - for (i = 0, count = 0; i < n; i++) { - numaGetFValue(na, i, &val); - if (type == L_LESS_THAN_ZERO && val < 0.0) - count++; - else if (type == L_EQUAL_TO_ZERO && val == 0.0) - count++; - else if (type == L_GREATER_THAN_ZERO && val > 0.0) - count++; - } - - *pcount = count; - return 0; -} - - -/*! - * \brief numaClipToInterval() - * - * \param[in] nas - * \param[in] first, last clipping interval - * \return numa with the same values as the input, but clipped - * to the specified interval - * - *
- * Notes:
- *        If you want the indices of the array values to be unchanged,
- *        use first = 0.
- *  Usage:
- *        This is useful to clip a histogram that has a few nonzero
- *        values to its nonzero range.
- * 
- */ -NUMA * -numaClipToInterval(NUMA *nas, - l_int32 first, - l_int32 last) -{ -l_int32 n, i, truelast; -l_float32 val, startx, delx; -NUMA *nad; - - PROCNAME("numaClipToInterval"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (first > last) - return (NUMA *)ERROR_PTR("range not valid", procName, NULL); - - n = numaGetCount(nas); - if (first >= n) - return (NUMA *)ERROR_PTR("no elements in range", procName, NULL); - truelast = L_MIN(last, n - 1); - if ((nad = numaCreate(truelast - first + 1)) == NULL) - return (NUMA *)ERROR_PTR("nad not made", procName, NULL); - for (i = first; i <= truelast; i++) { - numaGetFValue(nas, i, &val); - numaAddNumber(nad, val); - } - numaGetParameters(nas, &startx, &delx); - numaSetParameters(nad, startx + first * delx, delx); - return nad; -} - - -/*! - * \brief numaMakeThresholdIndicator() - * - * \param[in] nas input numa - * \param[in] thresh threshold value - * \param[in] type L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \return nad : indicator array: values are 0 and 1 - * - *
- * Notes:
- *      (1) For each element in nas, if the constraint given by 'type'
- *          correctly specifies its relation to thresh, a value of 1
- *          is recorded in nad.
- * 
- */ -NUMA * -numaMakeThresholdIndicator(NUMA *nas, - l_float32 thresh, - l_int32 type) -{ -l_int32 n, i, ival; -l_float32 fval; -NUMA *nai; - - PROCNAME("numaMakeThresholdIndicator"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - n = numaGetCount(nas); - nai = numaCreate(n); - for (i = 0; i < n; i++) { - numaGetFValue(nas, i, &fval); - ival = 0; - switch (type) - { - case L_SELECT_IF_LT: - if (fval < thresh) ival = 1; - break; - case L_SELECT_IF_GT: - if (fval > thresh) ival = 1; - break; - case L_SELECT_IF_LTE: - if (fval <= thresh) ival = 1; - break; - case L_SELECT_IF_GTE: - if (fval >= thresh) ival = 1; - break; - default: - numaDestroy(&nai); - return (NUMA *)ERROR_PTR("invalid type", procName, NULL); - } - numaAddNumber(nai, ival); - } - - return nai; -} - - -/*! - * \brief numaUniformSampling() - * - * \param[in] nas input numa - * \param[in] nsamp number of samples - * \return nad : resampled array, or NULL on error - * - *
- * Notes:
- *      (1) This resamples the values in the array, using %nsamp
- *          equal divisions.
- * 
- */ -NUMA * -numaUniformSampling(NUMA *nas, - l_int32 nsamp) -{ -l_int32 n, i, j, ileft, iright; -l_float32 left, right, binsize, lfract, rfract, sum, startx, delx; -l_float32 *array; -NUMA *nad; - - PROCNAME("numaUniformSampling"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (nsamp <= 0) - return (NUMA *)ERROR_PTR("nsamp must be > 0", procName, NULL); - - n = numaGetCount(nas); - nad = numaCreate(nsamp); - array = numaGetFArray(nas, L_NOCOPY); - binsize = (l_float32)n / (l_float32)nsamp; - numaGetParameters(nas, &startx, &delx); - numaSetParameters(nad, startx, binsize * delx); - left = 0.0; - for (i = 0; i < nsamp; i++) { - sum = 0.0; - right = left + binsize; - ileft = (l_int32)left; - lfract = 1.0 - left + ileft; - if (lfract >= 1.0) /* on left bin boundary */ - lfract = 0.0; - iright = (l_int32)right; - rfract = right - iright; - iright = L_MIN(iright, n - 1); - if (ileft == iright) { /* both are within the same original sample */ - sum += (lfract + rfract - 1.0) * array[ileft]; - } else { - if (lfract > 0.0001) /* left fraction */ - sum += lfract * array[ileft]; - if (rfract > 0.0001) /* right fraction */ - sum += rfract * array[iright]; - for (j = ileft + 1; j < iright; j++) /* entire pixels */ - sum += array[j]; - } - - numaAddNumber(nad, sum); - left = right; - } - return nad; -} - - -/*! - * \brief numaReverse() - * - * \param[in] nad [optional] can be null or equal to nas - * \param[in] nas input numa - * \return nad : reversed, or NULL on error - * - *
- * Notes:
- *      (1) Usage:
- *            numaReverse(nas, nas);   // in-place
- *            nad = numaReverse(NULL, nas);  // makes a new one
- * 
- */ -NUMA * -numaReverse(NUMA *nad, - NUMA *nas) -{ -l_int32 n, i; -l_float32 val1, val2; - - PROCNAME("numaReverse"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (nad && nas != nad) - return (NUMA *)ERROR_PTR("nad defined but != nas", procName, NULL); - - n = numaGetCount(nas); - if (nad) { /* in-place */ - for (i = 0; i < n / 2; i++) { - numaGetFValue(nad, i, &val1); - numaGetFValue(nad, n - i - 1, &val2); - numaSetValue(nad, i, val2); - numaSetValue(nad, n - i - 1, val1); - } - } else { - nad = numaCreate(n); - for (i = n - 1; i >= 0; i--) { - numaGetFValue(nas, i, &val1); - numaAddNumber(nad, val1); - } - } - - /* Reverse the startx and delx fields */ - nad->startx = nas->startx + (n - 1) * nas->delx; - nad->delx = -nas->delx; - return nad; -} - - -/*----------------------------------------------------------------------* - * Signal feature extraction * - *----------------------------------------------------------------------*/ -/*! - * \brief numaLowPassIntervals() - * - * \param[in] nas input numa - * \param[in] thresh threshold fraction of max; in [0.0 ... 1.0] - * \param[in] maxn for normalizing; set maxn = 0.0 to use the max in nas - * \return nad : interval abscissa pairs, or NULL on error - * - *
- * Notes:
- *      (1) For each interval where the value is less than a specified
- *          fraction of the maximum, this records the left and right "x"
- *          value.
- * 
- */ -NUMA * -numaLowPassIntervals(NUMA *nas, - l_float32 thresh, - l_float32 maxn) -{ -l_int32 n, i, inrun; -l_float32 maxval, threshval, fval, startx, delx, x0, x1; -NUMA *nad; - - PROCNAME("numaLowPassIntervals"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (thresh < 0.0 || thresh > 1.0) - return (NUMA *)ERROR_PTR("invalid thresh", procName, NULL); - - /* The input threshold is a fraction of the max. - * The first entry in nad is the value of the max. */ - n = numaGetCount(nas); - if (maxn == 0.0) - numaGetMax(nas, &maxval, NULL); - else - maxval = maxn; - numaGetParameters(nas, &startx, &delx); - threshval = thresh * maxval; - nad = numaCreate(0); - numaAddNumber(nad, maxval); - - /* Write pairs of pts (x0, x1) for the intervals */ - inrun = FALSE; - for (i = 0; i < n; i++) { - numaGetFValue(nas, i, &fval); - if (fval < threshval && inrun == FALSE) { /* start a new run */ - inrun = TRUE; - x0 = startx + i * delx; - } else if (fval > threshval && inrun == TRUE) { /* end the run */ - inrun = FALSE; - x1 = startx + i * delx; - numaAddNumber(nad, x0); - numaAddNumber(nad, x1); - } - } - if (inrun == TRUE) { /* must end the last run */ - x1 = startx + (n - 1) * delx; - numaAddNumber(nad, x0); - numaAddNumber(nad, x1); - } - - return nad; -} - - -/*! - * \brief numaThresholdEdges() - * - * \param[in] nas input numa - * \param[in] thresh1 low threshold as fraction of max; in [0.0 ... 1.0] - * \param[in] thresh2 high threshold as fraction of max; in [0.0 ... 1.0] - * \param[in] maxn for normalizing; set maxn = 0.0 to use the max in nas - * \return nad edge interval triplets, or NULL on error - * - *
- * Notes:
- *      (1) For each edge interval, where where the value is less
- *          than %thresh1 on one side, greater than %thresh2 on
- *          the other, and between these thresholds throughout the
- *          interval, this records a triplet of values: the
- *          'left' and 'right' edges, and either +1 or -1, depending
- *          on whether the edge is rising or falling.
- *      (2) No assumption is made about the value outside the array,
- *          so if the value at the array edge is between the threshold
- *          values, it is not considered part of an edge.  We start
- *          looking for edge intervals only after leaving the thresholded
- *          band.
- * 
- */ -NUMA * -numaThresholdEdges(NUMA *nas, - l_float32 thresh1, - l_float32 thresh2, - l_float32 maxn) -{ -l_int32 n, i, istart, inband, output, sign; -l_int32 startbelow, below, above, belowlast, abovelast; -l_float32 maxval, threshval1, threshval2, fval, startx, delx, x0, x1; -NUMA *nad; - - PROCNAME("numaThresholdEdges"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (thresh1 < 0.0 || thresh1 > 1.0 || thresh2 < 0.0 || thresh2 > 1.0) - return (NUMA *)ERROR_PTR("invalid thresholds", procName, NULL); - if (thresh2 < thresh1) - return (NUMA *)ERROR_PTR("thresh2 < thresh1", procName, NULL); - - /* The input thresholds are fractions of the max. - * The first entry in nad is the value of the max used - * here for normalization. */ - n = numaGetCount(nas); - if (maxn == 0.0) - numaGetMax(nas, &maxval, NULL); - else - maxval = maxn; - numaGetMax(nas, &maxval, NULL); - numaGetParameters(nas, &startx, &delx); - threshval1 = thresh1 * maxval; - threshval2 = thresh2 * maxval; - nad = numaCreate(0); - numaAddNumber(nad, maxval); - - /* Write triplets of pts (x0, x1, sign) for the edges. - * First make sure we start search from outside the band. - * Only one of {belowlast, abovelast} is true. */ - for (i = 0; i < n; i++) { - istart = i; - numaGetFValue(nas, i, &fval); - belowlast = (fval < threshval1) ? TRUE : FALSE; - abovelast = (fval > threshval2) ? TRUE : FALSE; - if (belowlast == TRUE || abovelast == TRUE) - break; - } - if (istart == n) /* no intervals found */ - return nad; - - /* x0 and x1 can only be set from outside the edge. - * They are the values just before entering the band, - * and just after entering the band. We can jump through - * the band, in which case they differ by one index in nas. */ - inband = FALSE; - startbelow = belowlast; /* one of these is true */ - output = FALSE; - x0 = startx + istart * delx; - for (i = istart + 1; i < n; i++) { - numaGetFValue(nas, i, &fval); - below = (fval < threshval1) ? TRUE : FALSE; - above = (fval > threshval2) ? TRUE : FALSE; - if (!inband && belowlast && above) { /* full jump up */ - x1 = startx + i * delx; - sign = 1; - startbelow = FALSE; /* for the next transition */ - output = TRUE; - } else if (!inband && abovelast && below) { /* full jump down */ - x1 = startx + i * delx; - sign = -1; - startbelow = TRUE; /* for the next transition */ - output = TRUE; - } else if (inband && startbelow && above) { /* exit rising; success */ - x1 = startx + i * delx; - sign = 1; - inband = FALSE; - startbelow = FALSE; /* for the next transition */ - output = TRUE; - } else if (inband && !startbelow && below) { - /* exit falling; success */ - x1 = startx + i * delx; - sign = -1; - inband = FALSE; - startbelow = TRUE; /* for the next transition */ - output = TRUE; - } else if (inband && !startbelow && above) { /* exit rising; failure */ - x0 = startx + i * delx; - inband = FALSE; - } else if (inband && startbelow && below) { /* exit falling; failure */ - x0 = startx + i * delx; - inband = FALSE; - } else if (!inband && !above && !below) { /* enter */ - inband = TRUE; - startbelow = belowlast; - } else if (!inband && (above || below)) { /* outside and remaining */ - x0 = startx + i * delx; /* update position */ - } - belowlast = below; - abovelast = above; - if (output) { /* we have exited; save new x0 */ - numaAddNumber(nad, x0); - numaAddNumber(nad, x1); - numaAddNumber(nad, sign); - output = FALSE; - x0 = startx + i * delx; - } - } - - return nad; -} - - -/*! - * \brief numaGetSpanValues() - * - * \param[in] na numa that is output of numaLowPassIntervals() - * \param[in] span span number, zero-based - * \param[out] pstart [optional] location of start of transition - * \param[out] pend [optional] location of end of transition - * \return 0 if OK, 1 on error - */ -l_int32 -numaGetSpanValues(NUMA *na, - l_int32 span, - l_int32 *pstart, - l_int32 *pend) -{ -l_int32 n, nspans; - - PROCNAME("numaGetSpanValues"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - n = numaGetCount(na); - if (n % 2 != 1) - return ERROR_INT("n is not odd", procName, 1); - nspans = n / 2; - if (nspans < 0 || span >= nspans) - return ERROR_INT("invalid span", procName, 1); - - if (pstart) numaGetIValue(na, 2 * span + 1, pstart); - if (pend) numaGetIValue(na, 2 * span + 2, pend); - return 0; -} - - -/*! - * \brief numaGetEdgeValues() - * - * \param[in] na numa that is output of numaThresholdEdges() - * \param[in] edge edge number, zero-based - * \param[out] pstart [optional] location of start of transition - * \param[out] pend [optional] location of end of transition - * \param[out] psign [optional] transition sign: +1 is rising, - * -1 is falling - * \return 0 if OK, 1 on error - */ -l_int32 -numaGetEdgeValues(NUMA *na, - l_int32 edge, - l_int32 *pstart, - l_int32 *pend, - l_int32 *psign) -{ -l_int32 n, nedges; - - PROCNAME("numaGetEdgeValues"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - n = numaGetCount(na); - if (n % 3 != 1) - return ERROR_INT("n % 3 is not 1", procName, 1); - nedges = (n - 1) / 3; - if (edge < 0 || edge >= nedges) - return ERROR_INT("invalid edge", procName, 1); - - if (pstart) numaGetIValue(na, 3 * edge + 1, pstart); - if (pend) numaGetIValue(na, 3 * edge + 2, pend); - if (psign) numaGetIValue(na, 3 * edge + 3, psign); - return 0; -} - - -/*----------------------------------------------------------------------* - * Interpolation * - *----------------------------------------------------------------------*/ -/*! - * \brief numaInterpolateEqxVal() - * - * \param[in] startx xval corresponding to first element in array - * \param[in] deltax x increment between array elements - * \param[in] nay numa of ordinate values, assumed equally spaced - * \param[in] type L_LINEAR_INTERP, L_QUADRATIC_INTERP - * \param[in] xval - * \param[out] pyval interpolated value - * \return 0 if OK, 1 on error e.g., if xval is outside range - * - *
- * Notes:
- *      (1) Considering nay as a function of x, the x values
- *          are equally spaced
- *      (2) Caller should check for valid return.
- *
- *  For linear Lagrangian interpolation (through 2 data pts):
- *         y(x) = y1(x-x2)/(x1-x2) + y2(x-x1)/(x2-x1)
- *
- *  For quadratic Lagrangian interpolation (through 3 data pts):
- *         y(x) = y1(x-x2)(x-x3)/((x1-x2)(x1-x3)) +
- *                y2(x-x1)(x-x3)/((x2-x1)(x2-x3)) +
- *                y3(x-x1)(x-x2)/((x3-x1)(x3-x2))
- *
- * 
- */ -l_ok -numaInterpolateEqxVal(l_float32 startx, - l_float32 deltax, - NUMA *nay, - l_int32 type, - l_float32 xval, - l_float32 *pyval) -{ -l_int32 i, n, i1, i2, i3; -l_float32 x1, x2, x3, fy1, fy2, fy3, d1, d2, d3, del, fi, maxx; -l_float32 *fa; - - PROCNAME("numaInterpolateEqxVal"); - - if (!pyval) - return ERROR_INT("&yval not defined", procName, 1); - *pyval = 0.0; - if (!nay) - return ERROR_INT("nay not defined", procName, 1); - if (deltax <= 0.0) - return ERROR_INT("deltax not > 0", procName, 1); - if (type != L_LINEAR_INTERP && type != L_QUADRATIC_INTERP) - return ERROR_INT("invalid interp type", procName, 1); - n = numaGetCount(nay); - if (n < 2) - return ERROR_INT("not enough points", procName, 1); - if (type == L_QUADRATIC_INTERP && n == 2) { - type = L_LINEAR_INTERP; - L_WARNING("only 2 points; using linear interp\n", procName); - } - maxx = startx + deltax * (n - 1); - if (xval < startx || xval > maxx) - return ERROR_INT("xval is out of bounds", procName, 1); - - fa = numaGetFArray(nay, L_NOCOPY); - fi = (xval - startx) / deltax; - i = (l_int32)fi; - del = fi - i; - if (del == 0.0) { /* no interpolation required */ - *pyval = fa[i]; - return 0; - } - - if (type == L_LINEAR_INTERP) { - *pyval = fa[i] + del * (fa[i + 1] - fa[i]); - return 0; - } - - /* Quadratic interpolation */ - d1 = d3 = 0.5 / (deltax * deltax); - d2 = -2. * d1; - if (i == 0) { - i1 = i; - i2 = i + 1; - i3 = i + 2; - } else { - i1 = i - 1; - i2 = i; - i3 = i + 1; - } - x1 = startx + i1 * deltax; - x2 = startx + i2 * deltax; - x3 = startx + i3 * deltax; - fy1 = d1 * fa[i1]; - fy2 = d2 * fa[i2]; - fy3 = d3 * fa[i3]; - *pyval = fy1 * (xval - x2) * (xval - x3) + - fy2 * (xval - x1) * (xval - x3) + - fy3 * (xval - x1) * (xval - x2); - return 0; -} - - -/*! - * \brief numaInterpolateArbxVal() - * - * \param[in] nax numa of abscissa values - * \param[in] nay numa of ordinate values, corresponding to nax - * \param[in] type L_LINEAR_INTERP, L_QUADRATIC_INTERP - * \param[in] xval - * \param[out] pyval interpolated value - * \return 0 if OK, 1 on error e.g., if xval is outside range - * - *
- * Notes:
- *      (1) The values in nax must be sorted in increasing order.
- *          If, additionally, they are equally spaced, you can use
- *          numaInterpolateEqxVal().
- *      (2) Caller should check for valid return.
- *      (3) Uses lagrangian interpolation.  See numaInterpolateEqxVal()
- *          for formulas.
- * 
- */ -l_ok -numaInterpolateArbxVal(NUMA *nax, - NUMA *nay, - l_int32 type, - l_float32 xval, - l_float32 *pyval) -{ -l_int32 i, im, nx, ny, i1, i2, i3; -l_float32 delu, dell, fract, d1, d2, d3; -l_float32 minx, maxx; -l_float32 *fax, *fay; - - PROCNAME("numaInterpolateArbxVal"); - - if (!pyval) - return ERROR_INT("&yval not defined", procName, 1); - *pyval = 0.0; - if (!nax) - return ERROR_INT("nax not defined", procName, 1); - if (!nay) - return ERROR_INT("nay not defined", procName, 1); - if (type != L_LINEAR_INTERP && type != L_QUADRATIC_INTERP) - return ERROR_INT("invalid interp type", procName, 1); - ny = numaGetCount(nay); - nx = numaGetCount(nax); - if (nx != ny) - return ERROR_INT("nax and nay not same size arrays", procName, 1); - if (ny < 2) - return ERROR_INT("not enough points", procName, 1); - if (type == L_QUADRATIC_INTERP && ny == 2) { - type = L_LINEAR_INTERP; - L_WARNING("only 2 points; using linear interp\n", procName); - } - numaGetFValue(nax, 0, &minx); - numaGetFValue(nax, nx - 1, &maxx); - if (xval < minx || xval > maxx) - return ERROR_INT("xval is out of bounds", procName, 1); - - fax = numaGetFArray(nax, L_NOCOPY); - fay = numaGetFArray(nay, L_NOCOPY); - - /* Linear search for interval. We are guaranteed - * to either return or break out of the loop. - * In addition, we are assured that fax[i] - fax[im] > 0.0 */ - if (xval == fax[0]) { - *pyval = fay[0]; - return 0; - } - im = 0; - dell = 0.0; - for (i = 1; i < nx; i++) { - delu = fax[i] - xval; - if (delu >= 0.0) { /* we've passed it */ - if (delu == 0.0) { - *pyval = fay[i]; - return 0; - } - im = i - 1; - dell = xval - fax[im]; /* >= 0 */ - break; - } - } - fract = dell / (fax[i] - fax[im]); - - if (type == L_LINEAR_INTERP) { - *pyval = fay[i] + fract * (fay[i + 1] - fay[i]); - return 0; - } - - /* Quadratic interpolation */ - if (im == 0) { - i1 = im; - i2 = im + 1; - i3 = im + 2; - } else { - i1 = im - 1; - i2 = im; - i3 = im + 1; - } - d1 = (fax[i1] - fax[i2]) * (fax[i1] - fax[i3]); - d2 = (fax[i2] - fax[i1]) * (fax[i2] - fax[i3]); - d3 = (fax[i3] - fax[i1]) * (fax[i3] - fax[i2]); - *pyval = fay[i1] * (xval - fax[i2]) * (xval - fax[i3]) / d1 + - fay[i2] * (xval - fax[i1]) * (xval - fax[i3]) / d2 + - fay[i3] * (xval - fax[i1]) * (xval - fax[i2]) / d3; - return 0; -} - - -/*! - * \brief numaInterpolateEqxInterval() - * - * \param[in] startx xval corresponding to first element in nas - * \param[in] deltax x increment between array elements in nas - * \param[in] nasy numa of ordinate values, assumed equally spaced - * \param[in] type L_LINEAR_INTERP, L_QUADRATIC_INTERP - * \param[in] x0 start value of interval - * \param[in] x1 end value of interval - * \param[in] npts number of points to evaluate function in interval - * \param[out] pnax [optional] array of x values in interval - * \param[out] pnay array of y values in interval - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Considering nasy as a function of x, the x values
- *          are equally spaced.
- *      (2) This creates nay (and optionally nax) of interpolated
- *          values over the specified interval (x0, x1).
- *      (3) If the interval (x0, x1) lies partially outside the array
- *          nasy (as interpreted by startx and deltax), it is an
- *          error and returns 1.
- *      (4) Note that deltax is the intrinsic x-increment for the input
- *          array nasy, whereas delx is the intrinsic x-increment for the
- *          output interpolated array nay.
- * 
- */ -l_ok -numaInterpolateEqxInterval(l_float32 startx, - l_float32 deltax, - NUMA *nasy, - l_int32 type, - l_float32 x0, - l_float32 x1, - l_int32 npts, - NUMA **pnax, - NUMA **pnay) -{ -l_int32 i, n; -l_float32 x, yval, maxx, delx; -NUMA *nax, *nay; - - PROCNAME("numaInterpolateEqxInterval"); - - if (pnax) *pnax = NULL; - if (!pnay) - return ERROR_INT("&nay not defined", procName, 1); - *pnay = NULL; - if (!nasy) - return ERROR_INT("nasy not defined", procName, 1); - if (deltax <= 0.0) - return ERROR_INT("deltax not > 0", procName, 1); - if (type != L_LINEAR_INTERP && type != L_QUADRATIC_INTERP) - return ERROR_INT("invalid interp type", procName, 1); - n = numaGetCount(nasy); - if (type == L_QUADRATIC_INTERP && n == 2) { - type = L_LINEAR_INTERP; - L_WARNING("only 2 points; using linear interp\n", procName); - } - maxx = startx + deltax * (n - 1); - if (x0 < startx || x1 > maxx || x1 <= x0) - return ERROR_INT("[x0 ... x1] is not valid", procName, 1); - if (npts < 3) - return ERROR_INT("npts < 3", procName, 1); - delx = (x1 - x0) / (l_float32)(npts - 1); /* delx is for output nay */ - - if ((nay = numaCreate(npts)) == NULL) - return ERROR_INT("nay not made", procName, 1); - numaSetParameters(nay, x0, delx); - *pnay = nay; - if (pnax) { - nax = numaCreate(npts); - *pnax = nax; - } - - for (i = 0; i < npts; i++) { - x = x0 + i * delx; - if (pnax) - numaAddNumber(nax, x); - numaInterpolateEqxVal(startx, deltax, nasy, type, x, &yval); - numaAddNumber(nay, yval); - } - - return 0; -} - - -/*! - * \brief numaInterpolateArbxInterval() - * - * \param[in] nax numa of abscissa values - * \param[in] nay numa of ordinate values, corresponding to nax - * \param[in] type L_LINEAR_INTERP, L_QUADRATIC_INTERP - * \param[in] x0 start value of interval - * \param[in] x1 end value of interval - * \param[in] npts number of points to evaluate function in interval - * \param[out] pnadx [optional] array of x values in interval - * \param[out] pnady array of y values in interval - * \return 0 if OK, 1 on error e.g., if x0 or x1 is outside range - * - *
- * Notes:
- *      (1) The values in nax must be sorted in increasing order.
- *          If they are not sorted, we do it here, and complain.
- *      (2) If the values in nax are equally spaced, you can use
- *          numaInterpolateEqxInterval().
- *      (3) Caller should check for valid return.
- *      (4) We don't call numaInterpolateArbxVal() for each output
- *          point, because that requires an O(n) search for
- *          each point.  Instead, we do a single O(n) pass through
- *          nax, saving the indices to be used for each output yval.
- *      (5) Uses lagrangian interpolation.  See numaInterpolateEqxVal()
- *          for formulas.
- * 
- */ -l_ok -numaInterpolateArbxInterval(NUMA *nax, - NUMA *nay, - l_int32 type, - l_float32 x0, - l_float32 x1, - l_int32 npts, - NUMA **pnadx, - NUMA **pnady) -{ -l_int32 i, im, j, nx, ny, i1, i2, i3, sorted; -l_int32 *index; -l_float32 del, xval, yval, excess, fract, minx, maxx, d1, d2, d3; -l_float32 *fax, *fay; -NUMA *nasx, *nasy, *nadx, *nady; - - PROCNAME("numaInterpolateArbxInterval"); - - if (pnadx) *pnadx = NULL; - if (!pnady) - return ERROR_INT("&nady not defined", procName, 1); - *pnady = NULL; - if (!nay) - return ERROR_INT("nay not defined", procName, 1); - if (!nax) - return ERROR_INT("nax not defined", procName, 1); - if (type != L_LINEAR_INTERP && type != L_QUADRATIC_INTERP) - return ERROR_INT("invalid interp type", procName, 1); - if (x0 > x1) - return ERROR_INT("x0 > x1", procName, 1); - ny = numaGetCount(nay); - nx = numaGetCount(nax); - if (nx != ny) - return ERROR_INT("nax and nay not same size arrays", procName, 1); - if (ny < 2) - return ERROR_INT("not enough points", procName, 1); - if (type == L_QUADRATIC_INTERP && ny == 2) { - type = L_LINEAR_INTERP; - L_WARNING("only 2 points; using linear interp\n", procName); - } - numaGetMin(nax, &minx, NULL); - numaGetMax(nax, &maxx, NULL); - if (x0 < minx || x1 > maxx) - return ERROR_INT("xval is out of bounds", procName, 1); - - /* Make sure that nax is sorted in increasing order */ - numaIsSorted(nax, L_SORT_INCREASING, &sorted); - if (!sorted) { - L_WARNING("we are sorting nax in increasing order\n", procName); - numaSortPair(nax, nay, L_SORT_INCREASING, &nasx, &nasy); - } else { - nasx = numaClone(nax); - nasy = numaClone(nay); - } - - fax = numaGetFArray(nasx, L_NOCOPY); - fay = numaGetFArray(nasy, L_NOCOPY); - - /* Get array of indices into fax for interpolated locations */ - if ((index = (l_int32 *)LEPT_CALLOC(npts, sizeof(l_int32))) == NULL) { - numaDestroy(&nasx); - numaDestroy(&nasy); - return ERROR_INT("ind not made", procName, 1); - } - del = (x1 - x0) / (npts - 1.0); - for (i = 0, j = 0; j < nx && i < npts; i++) { - xval = x0 + i * del; - while (j < nx - 1 && xval > fax[j]) - j++; - if (xval == fax[j]) - index[i] = L_MIN(j, nx - 1); - else /* the index of fax[] is just below xval */ - index[i] = L_MAX(j - 1, 0); - } - - /* For each point to be interpolated, get the y value */ - nady = numaCreate(npts); - *pnady = nady; - if (pnadx) { - nadx = numaCreate(npts); - *pnadx = nadx; - } - for (i = 0; i < npts; i++) { - xval = x0 + i * del; - if (pnadx) - numaAddNumber(nadx, xval); - im = index[i]; - excess = xval - fax[im]; - if (excess == 0.0) { - numaAddNumber(nady, fay[im]); - continue; - } - fract = excess / (fax[im + 1] - fax[im]); - - if (type == L_LINEAR_INTERP) { - yval = fay[im] + fract * (fay[im + 1] - fay[im]); - numaAddNumber(nady, yval); - continue; - } - - /* Quadratic interpolation */ - if (im == 0) { - i1 = im; - i2 = im + 1; - i3 = im + 2; - } else { - i1 = im - 1; - i2 = im; - i3 = im + 1; - } - d1 = (fax[i1] - fax[i2]) * (fax[i1] - fax[i3]); - d2 = (fax[i2] - fax[i1]) * (fax[i2] - fax[i3]); - d3 = (fax[i3] - fax[i1]) * (fax[i3] - fax[i2]); - yval = fay[i1] * (xval - fax[i2]) * (xval - fax[i3]) / d1 + - fay[i2] * (xval - fax[i1]) * (xval - fax[i3]) / d2 + - fay[i3] * (xval - fax[i1]) * (xval - fax[i2]) / d3; - numaAddNumber(nady, yval); - } - - LEPT_FREE(index); - numaDestroy(&nasx); - numaDestroy(&nasy); - return 0; -} - - -/*----------------------------------------------------------------------* - * Functions requiring interpolation * - *----------------------------------------------------------------------*/ -/*! - * \brief numaFitMax() - * - * \param[in] na numa of ordinate values, to fit a max to - * \param[out] pmaxval max value - * \param[in] naloc [optional] associated numa of abscissa values - * \param[out] pmaxloc abscissa value that gives max value in na; - * if naloc == null, this is given as an interpolated - * index value - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *        If %naloc is given, there is no requirement that the
- *        data points are evenly spaced.  Lagrangian interpolation
- *        handles that.  The only requirement is that the
- *        data points are ordered so that the values in naloc
- *        are either increasing or decreasing.  We test to make
- *        sure that the sizes of na and naloc are equal, and it
- *        is assumed that the correspondences %na[i] as a function
- *        of %naloc[i] are properly arranged for all i.
- *
- *  The formula for Lagrangian interpolation through 3 data pts is:
- *       y(x) = y1(x-x2)(x-x3)/((x1-x2)(x1-x3)) +
- *              y2(x-x1)(x-x3)/((x2-x1)(x2-x3)) +
- *              y3(x-x1)(x-x2)/((x3-x1)(x3-x2))
- *
- *  Then the derivative, using the constants (c1,c2,c3) defined below,
- *  is set to 0:
- *       y'(x) = 2x(c1+c2+c3) - c1(x2+x3) - c2(x1+x3) - c3(x1+x2) = 0
- * 
- */ -l_ok -numaFitMax(NUMA *na, - l_float32 *pmaxval, - NUMA *naloc, - l_float32 *pmaxloc) -{ -l_float32 val; -l_float32 smaxval; /* start value of maximum sample, before interpolating */ -l_int32 n, imaxloc; -l_float32 x1, x2, x3, y1, y2, y3, c1, c2, c3, a, b, xmax, ymax; - - PROCNAME("numaFitMax"); - - if (pmaxval) *pmaxval = 0.0; - if (pmaxloc) *pmaxloc = 0.0; - if (!na) - return ERROR_INT("na not defined", procName, 1); - if (!pmaxval) - return ERROR_INT("&maxval not defined", procName, 1); - if (!pmaxloc) - return ERROR_INT("&maxloc not defined", procName, 1); - - n = numaGetCount(na); - if (naloc) { - if (n != numaGetCount(naloc)) - return ERROR_INT("na and naloc of unequal size", procName, 1); - } - numaGetMax(na, &smaxval, &imaxloc); - - /* Simple case: max is at end point */ - if (imaxloc == 0 || imaxloc == n - 1) { - *pmaxval = smaxval; - if (naloc) { - numaGetFValue(naloc, imaxloc, &val); - *pmaxloc = val; - } else { - *pmaxloc = imaxloc; - } - return 0; - } - - /* Interior point; use quadratic interpolation */ - y2 = smaxval; - numaGetFValue(na, imaxloc - 1, &val); - y1 = val; - numaGetFValue(na, imaxloc + 1, &val); - y3 = val; - if (naloc) { - numaGetFValue(naloc, imaxloc - 1, &val); - x1 = val; - numaGetFValue(naloc, imaxloc, &val); - x2 = val; - numaGetFValue(naloc, imaxloc + 1, &val); - x3 = val; - } else { - x1 = imaxloc - 1; - x2 = imaxloc; - x3 = imaxloc + 1; - } - - /* Can't interpolate; just use the max val in na - * and the corresponding one in naloc */ - if (x1 == x2 || x1 == x3 || x2 == x3) { - *pmaxval = y2; - *pmaxloc = x2; - return 0; - } - - /* Use lagrangian interpolation; set dy/dx = 0 */ - c1 = y1 / ((x1 - x2) * (x1 - x3)); - c2 = y2 / ((x2 - x1) * (x2 - x3)); - c3 = y3 / ((x3 - x1) * (x3 - x2)); - a = c1 + c2 + c3; - b = c1 * (x2 + x3) + c2 * (x1 + x3) + c3 * (x1 + x2); - xmax = b / (2 * a); - ymax = c1 * (xmax - x2) * (xmax - x3) + - c2 * (xmax - x1) * (xmax - x3) + - c3 * (xmax - x1) * (xmax - x2); - *pmaxval = ymax; - *pmaxloc = xmax; - - return 0; -} - - -/*! - * \brief numaDifferentiateInterval() - * - * \param[in] nax numa of abscissa values - * \param[in] nay numa of ordinate values, corresponding to nax - * \param[in] x0 start value of interval - * \param[in] x1 end value of interval - * \param[in] npts number of points to evaluate function in interval - * \param[out] pnadx [optional] array of x values in interval - * \param[out] pnady array of derivatives in interval - * \return 0 if OK, 1 on error e.g., if x0 or x1 is outside range - * - *
- * Notes:
- *      (1) The values in nax must be sorted in increasing order.
- *          If they are not sorted, it is done in the interpolation
- *          step, and a warning is issued.
- *      (2) Caller should check for valid return.
- * 
- */ -l_ok -numaDifferentiateInterval(NUMA *nax, - NUMA *nay, - l_float32 x0, - l_float32 x1, - l_int32 npts, - NUMA **pnadx, - NUMA **pnady) -{ -l_int32 i, nx, ny; -l_float32 minx, maxx, der, invdel; -l_float32 *fay; -NUMA *nady, *naiy; - - PROCNAME("numaDifferentiateInterval"); - - if (pnadx) *pnadx = NULL; - if (!pnady) - return ERROR_INT("&nady not defined", procName, 1); - *pnady = NULL; - if (!nay) - return ERROR_INT("nay not defined", procName, 1); - if (!nax) - return ERROR_INT("nax not defined", procName, 1); - if (x0 > x1) - return ERROR_INT("x0 > x1", procName, 1); - ny = numaGetCount(nay); - nx = numaGetCount(nax); - if (nx != ny) - return ERROR_INT("nax and nay not same size arrays", procName, 1); - if (ny < 2) - return ERROR_INT("not enough points", procName, 1); - numaGetMin(nax, &minx, NULL); - numaGetMax(nax, &maxx, NULL); - if (x0 < minx || x1 > maxx) - return ERROR_INT("xval is out of bounds", procName, 1); - if (npts < 2) - return ERROR_INT("npts < 2", procName, 1); - - /* Generate interpolated array over specified interval */ - if (numaInterpolateArbxInterval(nax, nay, L_LINEAR_INTERP, x0, x1, - npts, pnadx, &naiy)) - return ERROR_INT("interpolation failed", procName, 1); - - nady = numaCreate(npts); - *pnady = nady; - invdel = 0.5 * ((l_float32)npts - 1.0) / (x1 - x0); - fay = numaGetFArray(naiy, L_NOCOPY); - - /* Compute and save derivatives */ - der = 0.5 * invdel * (fay[1] - fay[0]); - numaAddNumber(nady, der); - for (i = 1; i < npts - 1; i++) { - der = invdel * (fay[i + 1] - fay[i - 1]); - numaAddNumber(nady, der); - } - der = 0.5 * invdel * (fay[npts - 1] - fay[npts - 2]); - numaAddNumber(nady, der); - - numaDestroy(&naiy); - return 0; -} - - -/*! - * \brief numaIntegrateInterval() - * - * \param[in] nax numa of abscissa values - * \param[in] nay numa of ordinate values, corresponding to nax - * \param[in] x0 start value of interval - * \param[in] x1 end value of interval - * \param[in] npts number of points to evaluate function in interval - * \param[out] psum integral of function over interval - * \return 0 if OK, 1 on error e.g., if x0 or x1 is outside range - * - *
- * Notes:
- *      (1) The values in nax must be sorted in increasing order.
- *          If they are not sorted, it is done in the interpolation
- *          step, and a warning is issued.
- *      (2) Caller should check for valid return.
- * 
- */ -l_ok -numaIntegrateInterval(NUMA *nax, - NUMA *nay, - l_float32 x0, - l_float32 x1, - l_int32 npts, - l_float32 *psum) -{ -l_int32 i, nx, ny; -l_float32 minx, maxx, sum, del; -l_float32 *fay; -NUMA *naiy; - - PROCNAME("numaIntegrateInterval"); - - if (!psum) - return ERROR_INT("&sum not defined", procName, 1); - *psum = 0.0; - if (!nay) - return ERROR_INT("nay not defined", procName, 1); - if (!nax) - return ERROR_INT("nax not defined", procName, 1); - if (x0 > x1) - return ERROR_INT("x0 > x1", procName, 1); - if (npts < 2) - return ERROR_INT("npts < 2", procName, 1); - ny = numaGetCount(nay); - nx = numaGetCount(nax); - if (nx != ny) - return ERROR_INT("nax and nay not same size arrays", procName, 1); - if (ny < 2) - return ERROR_INT("not enough points", procName, 1); - numaGetMin(nax, &minx, NULL); - numaGetMax(nax, &maxx, NULL); - if (x0 < minx || x1 > maxx) - return ERROR_INT("xval is out of bounds", procName, 1); - - /* Generate interpolated array over specified interval */ - if (numaInterpolateArbxInterval(nax, nay, L_LINEAR_INTERP, x0, x1, - npts, NULL, &naiy)) - return ERROR_INT("interpolation failed", procName, 1); - - del = (x1 - x0) / ((l_float32)npts - 1.0); - fay = numaGetFArray(naiy, L_NOCOPY); - - /* Compute integral (simple trapezoid) */ - sum = 0.5 * (fay[0] + fay[npts - 1]); - for (i = 1; i < npts - 1; i++) - sum += fay[i]; - *psum = del * sum; - - numaDestroy(&naiy); - return 0; -} - - -/*----------------------------------------------------------------------* - * Sorting * - *----------------------------------------------------------------------*/ -/*! - * \brief numaSortGeneral() - * - * \param[in] na source numa - * \param[out] pnasort [optional] sorted numa - * \param[out] pnaindex [optional] index of elements in na associated - * with each element of nasort - * \param[out] pnainvert [optional] index of elements in nasort associated - * with each element of na - * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING - * \param[in] sorttype L_SHELL_SORT or L_BIN_SORT - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Sorting can be confusing.  Here's an array of five values with
- *          the results shown for the 3 output arrays.
- *
- *          na      nasort   naindex   nainvert
- *          -----------------------------------
- *          3         9         2         3
- *          4         6         3         2
- *          9         4         1         0
- *          6         3         0         1
- *          1         1         4         4
- *
- *          Note that naindex is a LUT into na for the sorted array values,
- *          and nainvert directly gives the sorted index values for the
- *          input array.  It is useful to view naindex is as a map:
- *                 0  -->  2
- *                 1  -->  3
- *                 2  -->  1
- *                 3  -->  0
- *                 4  -->  4
- *          and nainvert, the inverse of this map:
- *                 0  -->  3
- *                 1  -->  2
- *                 2  -->  0
- *                 3  -->  1
- *                 4  -->  4
- *
- *          We can write these relations symbolically as:
- *              nasort[i] = na[naindex[i]]
- *              na[i] = nasort[nainvert[i]]
- * 
- */ -l_ok -numaSortGeneral(NUMA *na, - NUMA **pnasort, - NUMA **pnaindex, - NUMA **pnainvert, - l_int32 sortorder, - l_int32 sorttype) -{ -NUMA *naindex; - - PROCNAME("numaSortGeneral"); - - if (!na) - return ERROR_INT("na not defined", procName, 1); - if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) - return ERROR_INT("invalid sort order", procName, 1); - if (sorttype != L_SHELL_SORT && sorttype != L_BIN_SORT) - return ERROR_INT("invalid sort type", procName, 1); - if (!pnasort && !pnaindex && !pnainvert) - return ERROR_INT("nothing to do", procName, 1); - if (pnasort) *pnasort = NULL; - if (pnaindex) *pnaindex = NULL; - if (pnainvert) *pnainvert = NULL; - - if (sorttype == L_SHELL_SORT) - naindex = numaGetSortIndex(na, sortorder); - else /* sorttype == L_BIN_SORT */ - naindex = numaGetBinSortIndex(na, sortorder); - - if (pnasort) - *pnasort = numaSortByIndex(na, naindex); - if (pnainvert) - *pnainvert = numaInvertMap(naindex); - if (pnaindex) - *pnaindex = naindex; - else - numaDestroy(&naindex); - return 0; -} - - -/*! - * \brief numaSortAutoSelect() - * - * \param[in] nas input numa - * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING - * \return naout output sorted numa, or NULL on error - * - *
- * Notes:
- *      (1) This does either a shell sort or a bin sort, depending on
- *          the number of elements in nas and the dynamic range.
- * 
- */ -NUMA * -numaSortAutoSelect(NUMA *nas, - l_int32 sortorder) -{ -l_int32 type; - - PROCNAME("numaSortAutoSelect"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) - return (NUMA *)ERROR_PTR("invalid sort order", procName, NULL); - - type = numaChooseSortType(nas); - if (type == L_SHELL_SORT) - return numaSort(NULL, nas, sortorder); - else if (type == L_BIN_SORT) - return numaBinSort(nas, sortorder); - else - return (NUMA *)ERROR_PTR("invalid sort type", procName, NULL); -} - - -/*! - * \brief numaSortIndexAutoSelect() - * - * \param[in] nas - * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING - * \return nad indices of nas, sorted by value in nas, or NULL on error - * - *
- * Notes:
- *      (1) This does either a shell sort or a bin sort, depending on
- *          the number of elements in nas and the dynamic range.
- * 
- */ -NUMA * -numaSortIndexAutoSelect(NUMA *nas, - l_int32 sortorder) -{ -l_int32 type; - - PROCNAME("numaSortIndexAutoSelect"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) - return (NUMA *)ERROR_PTR("invalid sort order", procName, NULL); - - type = numaChooseSortType(nas); - if (type == L_SHELL_SORT) - return numaGetSortIndex(nas, sortorder); - else if (type == L_BIN_SORT) - return numaGetBinSortIndex(nas, sortorder); - else - return (NUMA *)ERROR_PTR("invalid sort type", procName, NULL); -} - - -/*! - * \brief numaChooseSortType() - * - * \param[in] nas to be sorted - * \return sorttype L_SHELL_SORT or L_BIN_SORT, or UNDEF on error. - * - *
- * Notes:
- *      (1) This selects either a shell sort or a bin sort, depending on
- *          the number of elements in nas and the dynamic range.
- *      (2) If there are negative values in nas, it selects shell sort.
- * 
- */ -l_int32 -numaChooseSortType(NUMA *nas) -{ -l_int32 n, type; -l_float32 minval, maxval; - - PROCNAME("numaChooseSortType"); - - if (!nas) - return ERROR_INT("nas not defined", procName, UNDEF); - - numaGetMin(nas, &minval, NULL); - n = numaGetCount(nas); - - /* Very small histogram; use shell sort */ - if (minval < 0.0 || n < 200) { - L_INFO("Shell sort chosen\n", procName); - return L_SHELL_SORT; - } - - /* Need to compare nlog(n) with maxval. The factor of 0.003 - * was determined by comparing times for different histogram - * sizes and maxval. It is very small because binsort is fast - * and shell sort gets slow for large n. */ - numaGetMax(nas, &maxval, NULL); - if (n * log((l_float32)n) < 0.003 * maxval) { - type = L_SHELL_SORT; - L_INFO("Shell sort chosen\n", procName); - } else { - type = L_BIN_SORT; - L_INFO("Bin sort chosen\n", procName); - } - return type; -} - - -/*! - * \brief numaSort() - * - * \param[in] naout output numa; can be NULL or equal to nain - * \param[in] nain input numa - * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING - * \return naout output sorted numa, or NULL on error - * - *
- * Notes:
- *      (1) Set naout = nain for in-place; otherwise, set naout = NULL.
- *      (2) Source: Shell sort, modified from K&R, 2nd edition, p.62.
- *          Slow but simple O(n logn) sort.
- * 
- */ -NUMA * -numaSort(NUMA *naout, - NUMA *nain, - l_int32 sortorder) -{ -l_int32 i, n, gap, j; -l_float32 tmp; -l_float32 *array; - - PROCNAME("numaSort"); - - if (!nain) - return (NUMA *)ERROR_PTR("nain not defined", procName, NULL); - if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) - return (NUMA *)ERROR_PTR("invalid sort order", procName, NULL); - - /* Make naout if necessary; otherwise do in-place */ - if (!naout) - naout = numaCopy(nain); - else if (nain != naout) - return (NUMA *)ERROR_PTR("invalid: not in-place", procName, NULL); - array = naout->array; /* operate directly on the array */ - n = numaGetCount(naout); - - /* Shell sort */ - for (gap = n/2; gap > 0; gap = gap / 2) { - for (i = gap; i < n; i++) { - for (j = i - gap; j >= 0; j -= gap) { - if ((sortorder == L_SORT_INCREASING && - array[j] > array[j + gap]) || - (sortorder == L_SORT_DECREASING && - array[j] < array[j + gap])) - { - tmp = array[j]; - array[j] = array[j + gap]; - array[j + gap] = tmp; - } - } - } - } - - return naout; -} - - -/*! - * \brief numaBinSort() - * - * \param[in] nas of non-negative integers with a max that is - * typically less than 50,000 - * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING - * \return na sorted, or NULL on error - * - *
- * Notes:
- *      (1) Because this uses a bin sort with buckets of size 1, it
- *          is not appropriate for sorting either small arrays or
- *          arrays containing very large integer values.  For such
- *          arrays, use a standard general sort function like
- *          numaSort().
- * 
- */ -NUMA * -numaBinSort(NUMA *nas, - l_int32 sortorder) -{ -NUMA *nat, *nad; - - PROCNAME("numaBinSort"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) - return (NUMA *)ERROR_PTR("invalid sort order", procName, NULL); - - nat = numaGetBinSortIndex(nas, sortorder); - nad = numaSortByIndex(nas, nat); - numaDestroy(&nat); - return nad; -} - - -/*! - * \brief numaGetSortIndex() - * - * \param[in] na source numa - * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING - * \return na giving an array of indices that would sort - * the input array, or NULL on error - */ -NUMA * -numaGetSortIndex(NUMA *na, - l_int32 sortorder) -{ -l_int32 i, n, gap, j; -l_float32 tmp; -l_float32 *array; /* copy of input array */ -l_float32 *iarray; /* array of indices */ -NUMA *naisort; - - PROCNAME("numaGetSortIndex"); - - if (!na) - return (NUMA *)ERROR_PTR("na not defined", procName, NULL); - if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) - return (NUMA *)ERROR_PTR("invalid sortorder", procName, NULL); - - n = numaGetCount(na); - if ((array = numaGetFArray(na, L_COPY)) == NULL) - return (NUMA *)ERROR_PTR("array not made", procName, NULL); - if ((iarray = (l_float32 *)LEPT_CALLOC(n, sizeof(l_float32))) == NULL) { - LEPT_FREE(array); - return (NUMA *)ERROR_PTR("iarray not made", procName, NULL); - } - for (i = 0; i < n; i++) - iarray[i] = i; - - /* Shell sort */ - for (gap = n/2; gap > 0; gap = gap / 2) { - for (i = gap; i < n; i++) { - for (j = i - gap; j >= 0; j -= gap) { - if ((sortorder == L_SORT_INCREASING && - array[j] > array[j + gap]) || - (sortorder == L_SORT_DECREASING && - array[j] < array[j + gap])) - { - tmp = array[j]; - array[j] = array[j + gap]; - array[j + gap] = tmp; - tmp = iarray[j]; - iarray[j] = iarray[j + gap]; - iarray[j + gap] = tmp; - } - } - } - } - - naisort = numaCreate(n); - for (i = 0; i < n; i++) - numaAddNumber(naisort, iarray[i]); - - LEPT_FREE(array); - LEPT_FREE(iarray); - return naisort; -} - - -/*! - * \brief numaGetBinSortIndex() - * - * \param[in] nas of non-negative integers with a max that is - * typically less than 1,000,000 - * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING - * \return na sorted, or NULL on error - * - *
- * Notes:
- *      (1) This creates an array (or lookup table) that contains
- *          the sorted position of the elements in the input Numa.
- *      (2) Because it uses a bin sort with buckets of size 1, it
- *          is not appropriate for sorting either small arrays or
- *          arrays containing very large integer values.  For such
- *          arrays, use a standard general sort function like
- *          numaGetSortIndex().
- * 
- */ -NUMA * -numaGetBinSortIndex(NUMA *nas, - l_int32 sortorder) -{ -l_int32 i, n, isize, ival, imax; -l_float32 size; -NUMA *na, *nai, *nad; -L_PTRA *paindex; - - PROCNAME("numaGetBinSortIndex"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) - return (NUMA *)ERROR_PTR("invalid sort order", procName, NULL); - - /* Set up a ptra holding numa at indices for which there - * are values in nas. Suppose nas has the value 230 at index - * 7355. A numa holding the index 7355 is created and stored - * at the ptra index 230. If there is another value of 230 - * in nas, its index is added to the same numa (at index 230 - * in the ptra). When finished, the ptra can be scanned for numa, - * and the original indices in the nas can be read out. In this - * way, the ptra effectively sorts the input numbers in the nas. */ - numaGetMax(nas, &size, NULL); - isize = (l_int32)size; - if (isize > 1000000) - L_WARNING("large array: %d elements\n", procName, isize); - paindex = ptraCreate(isize + 1); - n = numaGetCount(nas); - for (i = 0; i < n; i++) { - numaGetIValue(nas, i, &ival); - nai = (NUMA *)ptraGetPtrToItem(paindex, ival); - if (!nai) { /* make it; no shifting will occur */ - nai = numaCreate(1); - ptraInsert(paindex, ival, nai, L_MIN_DOWNSHIFT); - } - numaAddNumber(nai, i); - } - - /* Sort by scanning the ptra, extracting numas and pulling - * the (index into nas) numbers out of each numa, taken - * successively in requested order. */ - ptraGetMaxIndex(paindex, &imax); - nad = numaCreate(0); - if (sortorder == L_SORT_INCREASING) { - for (i = 0; i <= imax; i++) { - na = (NUMA *)ptraRemove(paindex, i, L_NO_COMPACTION); - if (!na) continue; - numaJoin(nad, na, 0, -1); - numaDestroy(&na); - } - } else { /* L_SORT_DECREASING */ - for (i = imax; i >= 0; i--) { - na = (NUMA *)ptraRemoveLast(paindex); - if (!na) break; /* they've all been removed */ - numaJoin(nad, na, 0, -1); - numaDestroy(&na); - } - } - - ptraDestroy(&paindex, FALSE, FALSE); - return nad; -} - - -/*! - * \brief numaSortByIndex() - * - * \param[in] nas - * \param[in] naindex na that maps from the new numa to the input numa - * \return nad sorted, or NULL on error - */ -NUMA * -numaSortByIndex(NUMA *nas, - NUMA *naindex) -{ -l_int32 i, n, index; -l_float32 val; -NUMA *nad; - - PROCNAME("numaSortByIndex"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (!naindex) - return (NUMA *)ERROR_PTR("naindex not defined", procName, NULL); - - n = numaGetCount(nas); - nad = numaCreate(n); - for (i = 0; i < n; i++) { - numaGetIValue(naindex, i, &index); - numaGetFValue(nas, index, &val); - numaAddNumber(nad, val); - } - - return nad; -} - - -/*! - * \brief numaIsSorted() - * - * \param[in] nas - * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING - * \param[out] psorted 1 if sorted; 0 if not - * \return 1 if OK; 0 on error - * - *
- * Notes:
- *      (1) This is a quick O(n) test if nas is sorted.  It is useful
- *          in situations where the array is likely to be already
- *          sorted, and a sort operation can be avoided.
- * 
- */ -l_int32 -numaIsSorted(NUMA *nas, - l_int32 sortorder, - l_int32 *psorted) -{ -l_int32 i, n; -l_float32 prevval, val; - - PROCNAME("numaIsSorted"); - - if (!psorted) - return ERROR_INT("&sorted not defined", procName, 1); - *psorted = FALSE; - if (!nas) - return ERROR_INT("nas not defined", procName, 1); - if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) - return ERROR_INT("invalid sortorder", procName, 1); - - n = numaGetCount(nas); - numaGetFValue(nas, 0, &prevval); - for (i = 1; i < n; i++) { - numaGetFValue(nas, i, &val); - if ((sortorder == L_SORT_INCREASING && val < prevval) || - (sortorder == L_SORT_DECREASING && val > prevval)) - return 0; - } - - *psorted = TRUE; - return 0; -} - - -/*! - * \brief numaSortPair() - * - * \param[in] nax, nay input arrays - * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING - * \param[out] pnasx sorted - * \param[out] pnasy sorted exactly in order of nasx - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function sorts the two input arrays, nax and nay,
- *          together, using nax as the key for sorting.
- * 
- */ -l_ok -numaSortPair(NUMA *nax, - NUMA *nay, - l_int32 sortorder, - NUMA **pnasx, - NUMA **pnasy) -{ -l_int32 sorted; -NUMA *naindex; - - PROCNAME("numaSortPair"); - - if (pnasx) *pnasx = NULL; - if (pnasy) *pnasy = NULL; - if (!pnasx || !pnasy) - return ERROR_INT("&nasx and/or &nasy not defined", procName, 1); - if (!nax) - return ERROR_INT("nax not defined", procName, 1); - if (!nay) - return ERROR_INT("nay not defined", procName, 1); - if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) - return ERROR_INT("invalid sortorder", procName, 1); - - numaIsSorted(nax, sortorder, &sorted); - if (sorted == TRUE) { - *pnasx = numaCopy(nax); - *pnasy = numaCopy(nay); - } else { - naindex = numaGetSortIndex(nax, sortorder); - *pnasx = numaSortByIndex(nax, naindex); - *pnasy = numaSortByIndex(nay, naindex); - numaDestroy(&naindex); - } - - return 0; -} - - -/*! - * \brief numaInvertMap() - * - * \param[in] nas - * \return nad the inverted map, or NULL on error or if not invertible - * - *
- * Notes:
- *      (1) This requires that nas contain each integer from 0 to n-1.
- *          The array is typically an index array into a sort or permutation
- *          of another array.
- * 
- */ -NUMA * -numaInvertMap(NUMA *nas) -{ -l_int32 i, n, val, error; -l_int32 *test; -NUMA *nad; - - PROCNAME("numaInvertMap"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - - n = numaGetCount(nas); - nad = numaMakeConstant(0.0, n); - test = (l_int32 *)LEPT_CALLOC(n, sizeof(l_int32)); - error = 0; - for (i = 0; i < n; i++) { - numaGetIValue(nas, i, &val); - if (val >= n) { - error = 1; - break; - } - numaReplaceNumber(nad, val, i); - if (test[val] == 0) { - test[val] = 1; - } else { - error = 1; - break; - } - } - - LEPT_FREE(test); - if (error) { - numaDestroy(&nad); - return (NUMA *)ERROR_PTR("nas not invertible", procName, NULL); - } - - return nad; -} - - -/*----------------------------------------------------------------------* - * Random permutation * - *----------------------------------------------------------------------*/ -/*! - * \brief numaPseudorandomSequence() - * - * \param[in] size of sequence - * \param[in] seed for random number generation - * \return na pseudorandom on {0,...,size - 1}, or NULL on error - * - *
- * Notes:
- *      (1) This uses the Durstenfeld shuffle.
- *          See: http://en.wikipedia.org/wiki/Fisher–Yates_shuffle.
- *          Result is a pseudorandom permutation of the sequence of integers
- *          from 0 to size - 1.
- * 
- */ -NUMA * -numaPseudorandomSequence(l_int32 size, - l_int32 seed) -{ -l_int32 i, index, temp; -l_int32 *array; -NUMA *na; - - PROCNAME("numaPseudorandomSequence"); - - if (size <= 0) - return (NUMA *)ERROR_PTR("size <= 0", procName, NULL); - - if ((array = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32))) == NULL) - return (NUMA *)ERROR_PTR("array not made", procName, NULL); - for (i = 0; i < size; i++) - array[i] = i; - srand(seed); - for (i = size - 1; i > 0; i--) { - index = (l_int32)((i + 1) * ((l_float64)rand() / (l_float64)RAND_MAX)); - index = L_MIN(index, i); - temp = array[i]; - array[i] = array[index]; - array[index] = temp; - } - - na = numaCreateFromIArray(array, size); - LEPT_FREE(array); - return na; -} - - -/*! - * \brief numaRandomPermutation() - * - * \param[in] nas input array - * \param[in] seed for random number generation - * \return nas randomly shuffled array, or NULL on error - */ -NUMA * -numaRandomPermutation(NUMA *nas, - l_int32 seed) -{ -l_int32 i, index, size; -l_float32 val; -NUMA *naindex, *nad; - - PROCNAME("numaRandomPermutation"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - - size = numaGetCount(nas); - naindex = numaPseudorandomSequence(size, seed); - nad = numaCreate(size); - for (i = 0; i < size; i++) { - numaGetIValue(naindex, i, &index); - numaGetFValue(nas, index, &val); - numaAddNumber(nad, val); - } - - numaDestroy(&naindex); - return nad; -} - - -/*----------------------------------------------------------------------* - * Functions requiring sorting * - *----------------------------------------------------------------------*/ -/*! - * \brief numaGetRankValue() - * - * \param[in] na source numa - * \param[in] fract use 0.0 for smallest, 1.0 for largest - * \param[in] nasort [optional] increasing sorted version of na - * \param[in] usebins 0 for general sort; 1 for bin sort - * \param[out] pval rank val - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Computes the rank value of a number in the %na, which is
- *          the number that is a fraction %fract from the small
- *          end of the sorted version of %na.
- *      (2) If you do this multiple times for different rank values,
- *          sort the array in advance and use that for %nasort;
- *          if you're only calling this once, input %nasort == NULL.
- *      (3) If %usebins == 1, this uses a bin sorting method.
- *          Use this only where:
- *           * the numbers are non-negative integers
- *           * there are over 100 numbers
- *           * the maximum value is less than about 50,000
- *      (4) The advantage of using a bin sort is that it is O(n),
- *          instead of O(nlogn) for general sort routines.
- * 
- */ -l_ok -numaGetRankValue(NUMA *na, - l_float32 fract, - NUMA *nasort, - l_int32 usebins, - l_float32 *pval) -{ -l_int32 n, index; -NUMA *nas; - - PROCNAME("numaGetRankValue"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0.0; /* init */ - if (!na) - return ERROR_INT("na not defined", procName, 1); - if ((n = numaGetCount(na)) == 0) - return ERROR_INT("na empty", procName, 1); - if (fract < 0.0 || fract > 1.0) - return ERROR_INT("fract not in [0.0 ... 1.0]", procName, 1); - - if (nasort) { - nas = nasort; - } else { - if (usebins == 0) - nas = numaSort(NULL, na, L_SORT_INCREASING); - else - nas = numaBinSort(na, L_SORT_INCREASING); - if (!nas) - return ERROR_INT("nas not made", procName, 1); - } - index = (l_int32)(fract * (l_float32)(n - 1) + 0.5); - numaGetFValue(nas, index, pval); - - if (!nasort) numaDestroy(&nas); - return 0; -} - - -/*! - * \brief numaGetMedian() - * - * \param[in] na source numa - * \param[out] pval median value - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Computes the median value of the numbers in the numa, by
- *          sorting and finding the middle value in the sorted array.
- * 
- */ -l_ok -numaGetMedian(NUMA *na, - l_float32 *pval) -{ - PROCNAME("numaGetMedian"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0.0; /* init */ - if (!na || numaGetCount(na) == 0) - return ERROR_INT("na not defined or empty", procName, 1); - - return numaGetRankValue(na, 0.5, NULL, 0, pval); -} - - -/*! - * \brief numaGetBinnedMedian() - * - * \param[in] na source numa - * \param[out] pval integer median value - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Computes the median value of the numbers in the numa,
- *          using bin sort and finding the middle value in the sorted array.
- *      (2) See numaGetRankValue() for conditions on na for which
- *          this should be used.  Otherwise, use numaGetMedian().
- * 
- */ -l_ok -numaGetBinnedMedian(NUMA *na, - l_int32 *pval) -{ -l_int32 ret; -l_float32 fval; - - PROCNAME("numaGetBinnedMedian"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0; /* init */ - if (!na || numaGetCount(na) == 0) - return ERROR_INT("na not defined or empty", procName, 1); - - ret = numaGetRankValue(na, 0.5, NULL, 1, &fval); - *pval = lept_roundftoi(fval); - return ret; -} - - -/*! - * \brief numaGetMeanDevFromMedian() - * - * \param[in] na source numa - * \param[in] med median value - * \param[out] pdev average absolute value deviation from median value - * \return 0 if OK; 1 on error - */ -l_ok -numaGetMeanDevFromMedian(NUMA *na, - l_float32 med, - l_float32 *pdev) -{ -l_int32 i, n; -l_float32 val, dev; - - PROCNAME("numaGetMeanDevFromMedian"); - - if (!pdev) - return ERROR_INT("&dev not defined", procName, 1); - *pdev = 0.0; /* init */ - if (!na) - return ERROR_INT("na not defined", procName, 1); - if ((n = numaGetCount(na)) == 0) - return ERROR_INT("na is empty", procName, 1); - - dev = 0.0; - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &val); - dev += L_ABS(val - med); - } - *pdev = dev / (l_float32)n; - return 0; -} - - -/*! - * \brief numaGetMedianDevFromMedian() - * - * \param[in] na source numa - * \param[out] pmed [optional] median value - * \param[out] pdev median deviation from median val - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Finds the median of the absolute value of the deviation from
- *          the median value in the array.  Why take the absolute value?
- *          Consider the case where you have values equally distributed
- *          about both sides of a median value.  Without taking the absolute
- *          value of the differences, you will get 0 for the deviation,
- *          and this is not useful.
- * 
- */ -l_ok -numaGetMedianDevFromMedian(NUMA *na, - l_float32 *pmed, - l_float32 *pdev) -{ -l_int32 n, i; -l_float32 val, med; -NUMA *nadev; - - PROCNAME("numaGetMedianDevFromMedian"); - - if (pmed) *pmed = 0.0; - if (!pdev) - return ERROR_INT("&dev not defined", procName, 1); - *pdev = 0.0; - if (!na || numaGetCount(na) == 0) - return ERROR_INT("na not defined or empty", procName, 1); - - numaGetMedian(na, &med); - if (pmed) *pmed = med; - n = numaGetCount(na); - nadev = numaCreate(n); - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &val); - numaAddNumber(nadev, L_ABS(val - med)); - } - numaGetMedian(nadev, pdev); - - numaDestroy(&nadev); - return 0; -} - - -/*! - * \brief numaGetMode() - * - * \param[in] na source numa - * \param[out] pval mode val - * \param[out] pcount [optional] mode count - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Computes the mode value of the numbers in the numa, by
- *          sorting and finding the value of the number with the
- *          largest count.
- *      (2) Optionally, also returns that count.
- * 
- */ -l_ok -numaGetMode(NUMA *na, - l_float32 *pval, - l_int32 *pcount) -{ -l_int32 i, n, maxcount, prevcount; -l_float32 val, maxval, prevval; -l_float32 *array; -NUMA *nasort; - - PROCNAME("numaGetMode"); - - if (pcount) *pcount = 0; - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0.0; - if (!na) - return ERROR_INT("na not defined", procName, 1); - if ((n = numaGetCount(na)) == 0) - return ERROR_INT("na is empty", procName, 1); - - if ((nasort = numaSort(NULL, na, L_SORT_DECREASING)) == NULL) - return ERROR_INT("nas not made", procName, 1); - array = numaGetFArray(nasort, L_NOCOPY); - - /* Initialize with array[0] */ - prevval = array[0]; - prevcount = 1; - maxval = prevval; - maxcount = prevcount; - - /* Scan the sorted array, aggregating duplicates */ - for (i = 1; i < n; i++) { - val = array[i]; - if (val == prevval) { - prevcount++; - } else { /* new value */ - if (prevcount > maxcount) { /* new max */ - maxcount = prevcount; - maxval = prevval; - } - prevval = val; - prevcount = 1; - } - } - - /* Was the mode the last run of elements? */ - if (prevcount > maxcount) { - maxcount = prevcount; - maxval = prevval; - } - - *pval = maxval; - if (pcount) - *pcount = maxcount; - - numaDestroy(&nasort); - return 0; -} - - -/*----------------------------------------------------------------------* - * Rearrangements * - *----------------------------------------------------------------------*/ -/*! - * \brief numaJoin() - * - * \param[in] nad dest numa; add to this one - * \param[in] nas [optional] source numa; add from this one - * \param[in] istart starting index in nas - * \param[in] iend ending index in nas; use -1 to cat all - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) istart < 0 is taken to mean 'read from the start' (istart = 0)
- *      (2) iend < 0 means 'read to the end'
- *      (3) if nas == NULL, this is a no-op
- * 
- */ -l_ok -numaJoin(NUMA *nad, - NUMA *nas, - l_int32 istart, - l_int32 iend) -{ -l_int32 n, i; -l_float32 val; - - PROCNAME("numaJoin"); - - if (!nad) - return ERROR_INT("nad not defined", procName, 1); - if (!nas) - return 0; - - if (istart < 0) - istart = 0; - n = numaGetCount(nas); - if (iend < 0 || iend >= n) - iend = n - 1; - if (istart > iend) - return ERROR_INT("istart > iend; nothing to add", procName, 1); - - for (i = istart; i <= iend; i++) { - numaGetFValue(nas, i, &val); - numaAddNumber(nad, val); - } - - return 0; -} - - -/*! - * \brief numaaJoin() - * - * \param[in] naad dest naa; add to this one - * \param[in] naas [optional] source naa; add from this one - * \param[in] istart starting index in nas - * \param[in] iend ending index in naas; use -1 to cat all - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) istart < 0 is taken to mean 'read from the start' (istart = 0)
- *      (2) iend < 0 means 'read to the end'
- *      (3) if naas == NULL, this is a no-op
- * 
- */ -l_ok -numaaJoin(NUMAA *naad, - NUMAA *naas, - l_int32 istart, - l_int32 iend) -{ -l_int32 n, i; -NUMA *na; - - PROCNAME("numaaJoin"); - - if (!naad) - return ERROR_INT("naad not defined", procName, 1); - if (!naas) - return 0; - - if (istart < 0) - istart = 0; - n = numaaGetCount(naas); - if (iend < 0 || iend >= n) - iend = n - 1; - if (istart > iend) - return ERROR_INT("istart > iend; nothing to add", procName, 1); - - for (i = istart; i <= iend; i++) { - na = numaaGetNuma(naas, i, L_CLONE); - numaaAddNuma(naad, na, L_INSERT); - } - - return 0; -} - - -/*! - * \brief numaaFlattenToNuma() - * - * \param[in] naa - * \return numa, or NULL on error - * - *
- * Notes:
- *      (1) This 'flattens' the Numaa to a Numa, by joining successively
- *          each Numa in the Numaa.
- *      (2) It doesn't make any assumptions about the location of the
- *          Numas in the Numaa array, unlike most Numaa functions.
- *      (3) It leaves the input Numaa unchanged.
- * 
- */ -NUMA * -numaaFlattenToNuma(NUMAA *naa) -{ -l_int32 i, nalloc; -NUMA *na, *nad; -NUMA **array; - - PROCNAME("numaaFlattenToNuma"); - - if (!naa) - return (NUMA *)ERROR_PTR("naa not defined", procName, NULL); - - nalloc = naa->nalloc; - array = numaaGetPtrArray(naa); - nad = numaCreate(0); - for (i = 0; i < nalloc; i++) { - na = array[i]; - if (!na) continue; - numaJoin(nad, na, 0, -1); - } - - return nad; -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/numafunc2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/numafunc2.c deleted file mode 100644 index cf84aeff..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/numafunc2.c +++ /dev/null @@ -1,3247 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file numafunc2.c - *
- *
- *      --------------------------------------
- *      This file has these Numa utilities:
- *         - morphological operations
- *         - arithmetic transforms
- *         - windowed statistical operations
- *         - histogram extraction
- *         - histogram comparison
- *         - extrema finding
- *         - frequency and crossing analysis
- *      --------------------------------------
-
- *      Morphological (min/max) operations
- *          NUMA        *numaErode()
- *          NUMA        *numaDilate()
- *          NUMA        *numaOpen()
- *          NUMA        *numaClose()
- *
- *      Other transforms
- *          NUMA        *numaTransform()
- *          l_int32      numaSimpleStats()
- *          l_int32      numaWindowedStats()
- *          NUMA        *numaWindowedMean()
- *          NUMA        *numaWindowedMeanSquare()
- *          l_int32      numaWindowedVariance()
- *          NUMA        *numaWindowedMedian()
- *          NUMA        *numaConvertToInt()
- *
- *      Histogram generation and statistics
- *          NUMA        *numaMakeHistogram()
- *          NUMA        *numaMakeHistogramAuto()
- *          NUMA        *numaMakeHistogramClipped()
- *          NUMA        *numaRebinHistogram()
- *          NUMA        *numaNormalizeHistogram()
- *          l_int32      numaGetStatsUsingHistogram()
- *          l_int32      numaGetHistogramStats()
- *          l_int32      numaGetHistogramStatsOnInterval()
- *          l_int32      numaMakeRankFromHistogram()
- *          l_int32      numaHistogramGetRankFromVal()
- *          l_int32      numaHistogramGetValFromRank()
- *          l_int32      numaDiscretizeRankAndIntensity()
- *          l_int32      numaGetRankBinValues()
- *
- *      Splitting a distribution
- *          l_int32      numaSplitDistribution()
- *
- *      Comparing histograms
- *          l_int32      grayHistogramsToEMD()
- *          l_int32      numaEarthMoverDistance()
- *          l_int32      grayInterHistogramStats()
- *
- *      Extrema finding
- *          NUMA        *numaFindPeaks()
- *          NUMA        *numaFindExtrema()
- *          NUMA        *numaFindLocForThreshold()
- *          l_int32     *numaCountReversals()
- *
- *      Threshold crossings and frequency analysis
- *          l_int32      numaSelectCrossingThreshold()
- *          NUMA        *numaCrossingsByThreshold()
- *          NUMA        *numaCrossingsByPeaks()
- *          NUMA        *numaEvalBestHaarParameters()
- *          l_int32      numaEvalHaarSum()
- *
- *      Generating numbers in a range under constraints
- *          NUMA        *genConstrainedNumaInRange()
- *
- *    Things to remember when using the Numa:
- *
- *    (1) The numa is a struct, not an array.  Always use accessors
- *        (see numabasic.c), never the fields directly.
- *
- *    (2) The number array holds l_float32 values.  It can also
- *        be used to store l_int32 values.  See numabasic.c for
- *        details on using the accessors.  Integers larger than
- *        about 10M will lose accuracy due on retrieval due to round-off.
- *        For large integers, use the dna (array of l_float64) instead.
- *
- *    (3) Occasionally, in the comments we denote the i-th element of a
- *        numa by na[i].  This is conceptual only -- the numa is not an array!
- *
- *    Some general comments on histograms:
- *
- *    (1) Histograms are the generic statistical representation of
- *        the data about some attribute.  Typically they're not
- *        normalized -- they simply give the number of occurrences
- *        within each range of values of the attribute.  This range
- *        of values is referred to as a 'bucket'.  For example,
- *        the histogram could specify how many connected components
- *        are found for each value of their width; in that case,
- *        the bucket size is 1.
- *
- *    (2) In leptonica, all buckets have the same size.  Histograms
- *        are therefore specified by a numa of occurrences, along
- *        with two other numbers: the 'value' associated with the
- *        occupants of the first bucket and the size (i.e., 'width')
- *        of each bucket.  These two numbers then allow us to calculate
- *        the value associated with the occupants of each bucket.
- *        These numbers are fields in the numa, initialized to
- *        a startx value of 0.0 and a binsize of 1.0.  Accessors for
- *        these fields are functions numa*Parameters().  All histograms
- *        must have these two numbers properly set.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* bin sizes in numaMakeHistogram() */ -static const l_int32 BinSizeArray[] = {2, 5, 10, 20, 50, 100, 200, 500, 1000,\ - 2000, 5000, 10000, 20000, 50000, 100000, 200000,\ - 500000, 1000000, 2000000, 5000000, 10000000,\ - 200000000, 50000000, 100000000}; -static const l_int32 NBinSizes = 24; - - -#ifndef NO_CONSOLE_IO -#define DEBUG_HISTO 0 -#define DEBUG_CROSSINGS 0 -#define DEBUG_FREQUENCY 0 -#endif /* ~NO_CONSOLE_IO */ - -/*----------------------------------------------------------------------* - * Morphological operations * - *----------------------------------------------------------------------*/ -/*! - * \brief numaErode() - * - * \param[in] nas - * \param[in] size of sel; greater than 0, odd. The origin - * is implicitly in the center. - * \return nad eroded, or NULL on error - * - *
- * Notes:
- *      (1) The structuring element (sel) is linear, all "hits"
- *      (2) If size == 1, this returns a copy
- *      (3) General comment.  The morphological operations are equivalent
- *          to those that would be performed on a 1-dimensional fpix.
- *          However, because we have not implemented morphological
- *          operations on fpix, we do this here.  Because it is only
- *          1 dimensional, there is no reason to use the more
- *          complicated van Herk/Gil-Werman algorithm, and we do it
- *          by brute force.
- * 
- */ -NUMA * -numaErode(NUMA *nas, - l_int32 size) -{ -l_int32 i, j, n, hsize, len; -l_float32 minval; -l_float32 *fa, *fas, *fad; -NUMA *nad; - - PROCNAME("numaErode"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (size <= 0) - return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL); - if ((size & 1) == 0 ) { - L_WARNING("sel size must be odd; increasing by 1\n", procName); - size++; - } - - if (size == 1) - return numaCopy(nas); - - /* Make a source fa (fas) that has an added (size / 2) boundary - * on left and right, contains a copy of nas in the interior region - * (between 'size' and 'size + n', and has large values - * inserted in the boundary (because it is an erosion). */ - n = numaGetCount(nas); - hsize = size / 2; - len = n + 2 * hsize; - if ((fas = (l_float32 *)LEPT_CALLOC(len, sizeof(l_float32))) == NULL) - return (NUMA *)ERROR_PTR("fas not made", procName, NULL); - for (i = 0; i < hsize; i++) - fas[i] = 1.0e37; - for (i = hsize + n; i < len; i++) - fas[i] = 1.0e37; - fa = numaGetFArray(nas, L_NOCOPY); - for (i = 0; i < n; i++) - fas[hsize + i] = fa[i]; - - nad = numaMakeConstant(0, n); - numaCopyParameters(nad, nas); - fad = numaGetFArray(nad, L_NOCOPY); - for (i = 0; i < n; i++) { - minval = 1.0e37; /* start big */ - for (j = 0; j < size; j++) - minval = L_MIN(minval, fas[i + j]); - fad[i] = minval; - } - - LEPT_FREE(fas); - return nad; -} - - -/*! - * \brief numaDilate() - * - * \param[in] nas - * \param[in] size of sel; greater than 0, odd. The origin - * is implicitly in the center. - * \return nad dilated, or NULL on error - * - *
- * Notes:
- *      (1) The structuring element (sel) is linear, all "hits"
- *      (2) If size == 1, this returns a copy
- * 
- */ -NUMA * -numaDilate(NUMA *nas, - l_int32 size) -{ -l_int32 i, j, n, hsize, len; -l_float32 maxval; -l_float32 *fa, *fas, *fad; -NUMA *nad; - - PROCNAME("numaDilate"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (size <= 0) - return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL); - if ((size & 1) == 0 ) { - L_WARNING("sel size must be odd; increasing by 1\n", procName); - size++; - } - - if (size == 1) - return numaCopy(nas); - - /* Make a source fa (fas) that has an added (size / 2) boundary - * on left and right, contains a copy of nas in the interior region - * (between 'size' and 'size + n', and has small values - * inserted in the boundary (because it is a dilation). */ - n = numaGetCount(nas); - hsize = size / 2; - len = n + 2 * hsize; - if ((fas = (l_float32 *)LEPT_CALLOC(len, sizeof(l_float32))) == NULL) - return (NUMA *)ERROR_PTR("fas not made", procName, NULL); - for (i = 0; i < hsize; i++) - fas[i] = -1.0e37; - for (i = hsize + n; i < len; i++) - fas[i] = -1.0e37; - fa = numaGetFArray(nas, L_NOCOPY); - for (i = 0; i < n; i++) - fas[hsize + i] = fa[i]; - - nad = numaMakeConstant(0, n); - numaCopyParameters(nad, nas); - fad = numaGetFArray(nad, L_NOCOPY); - for (i = 0; i < n; i++) { - maxval = -1.0e37; /* start small */ - for (j = 0; j < size; j++) - maxval = L_MAX(maxval, fas[i + j]); - fad[i] = maxval; - } - - LEPT_FREE(fas); - return nad; -} - - -/*! - * \brief numaOpen() - * - * \param[in] nas - * \param[in] size of sel; greater than 0, odd. The origin - * is implicitly in the center. - * \return nad opened, or NULL on error - * - *
- * Notes:
- *      (1) The structuring element (sel) is linear, all "hits"
- *      (2) If size == 1, this returns a copy
- * 
- */ -NUMA * -numaOpen(NUMA *nas, - l_int32 size) -{ -NUMA *nat, *nad; - - PROCNAME("numaOpen"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (size <= 0) - return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL); - if ((size & 1) == 0 ) { - L_WARNING("sel size must be odd; increasing by 1\n", procName); - size++; - } - - if (size == 1) - return numaCopy(nas); - - nat = numaErode(nas, size); - nad = numaDilate(nat, size); - numaDestroy(&nat); - return nad; -} - - -/*! - * \brief numaClose() - * - * \param[in] nas - * \param[in] size of sel; greater than 0, odd. The origin - * is implicitly in the center. - * \return nad closed, or NULL on error - * - *
- * Notes:
- *      (1) The structuring element (sel) is linear, all "hits"
- *      (2) If size == 1, this returns a copy
- *      (3) We add a border before doing this operation, for the same
- *          reason that we add a border to a pix before doing a safe closing.
- *          Without the border, a small component near the border gets
- *          clipped at the border on dilation, and can be entirely removed
- *          by the following erosion, violating the basic extensivity
- *          property of closing.
- * 
- */ -NUMA * -numaClose(NUMA *nas, - l_int32 size) -{ -NUMA *nab, *nat1, *nat2, *nad; - - PROCNAME("numaClose"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (size <= 0) - return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL); - if ((size & 1) == 0 ) { - L_WARNING("sel size must be odd; increasing by 1\n", procName); - size++; - } - - if (size == 1) - return numaCopy(nas); - - nab = numaAddBorder(nas, size, size, 0); /* to preserve extensivity */ - nat1 = numaDilate(nab, size); - nat2 = numaErode(nat1, size); - nad = numaRemoveBorder(nat2, size, size); - numaDestroy(&nab); - numaDestroy(&nat1); - numaDestroy(&nat2); - return nad; -} - - -/*----------------------------------------------------------------------* - * Other transforms * - *----------------------------------------------------------------------*/ -/*! - * \brief numaTransform() - * - * \param[in] nas - * \param[in] shift add this to each number - * \param[in] scale multiply each number by this - * \return nad with all values shifted and scaled, or NULL on error - * - *
- * Notes:
- *      (1) Each number is shifted before scaling.
- * 
- */ -NUMA * -numaTransform(NUMA *nas, - l_float32 shift, - l_float32 scale) -{ -l_int32 i, n; -l_float32 val; -NUMA *nad; - - PROCNAME("numaTransform"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - n = numaGetCount(nas); - if ((nad = numaCreate(n)) == NULL) - return (NUMA *)ERROR_PTR("nad not made", procName, NULL); - numaCopyParameters(nad, nas); - for (i = 0; i < n; i++) { - numaGetFValue(nas, i, &val); - val = scale * (val + shift); - numaAddNumber(nad, val); - } - return nad; -} - - -/*! - * \brief numaSimpleStats() - * - * \param[in] na input numa - * \param[in] first first element to use - * \param[in] last last element to use; -1 to go to the end - * \param[out] pmean [optional] mean value - * \param[out] pvar [optional] variance - * \param[out] prvar [optional] rms deviation from the mean - * \return 0 if OK, 1 on error - */ -l_ok -numaSimpleStats(NUMA *na, - l_int32 first, - l_int32 last, - l_float32 *pmean, - l_float32 *pvar, - l_float32 *prvar) -{ -l_int32 i, n, ni; -l_float32 sum, sumsq, val, mean, var; - - PROCNAME("numaSimpleStats"); - - if (pmean) *pmean = 0.0; - if (pvar) *pvar = 0.0; - if (prvar) *prvar = 0.0; - if (!pmean && !pvar && !prvar) - return ERROR_INT("nothing requested", procName, 1); - if (!na) - return ERROR_INT("na not defined", procName, 1); - if ((n = numaGetCount(na)) == 0) - return ERROR_INT("na is empty", procName, 1); - first = L_MAX(0, first); - if (last < 0) last = n - 1; - if (first >= n) - return ERROR_INT("invalid first", procName, 1); - if (last >= n) { - L_WARNING("last = %d is beyond max index = %d; adjusting\n", - procName, last, n - 1); - last = n - 1; - } - if (first > last) - return ERROR_INT("first > last\n", procName, 1); - ni = last - first + 1; - sum = sumsq = 0.0; - for (i = first; i <= last; i++) { - numaGetFValue(na, i, &val); - sum += val; - sumsq += val * val; - } - - mean = sum / ni; - if (pmean) - *pmean = mean; - if (pvar || prvar) { - var = sumsq / ni - mean * mean; - if (pvar) *pvar = var; - if (prvar) *prvar = sqrtf(var); - } - - return 0; -} - - -/*! - * \brief numaWindowedStats() - * - * \param[in] nas input numa - * \param[in] wc half width of the window - * \param[out] pnam [optional] mean value in window - * \param[out] pnams [optional] mean square value in window - * \param[out] pnav [optional] variance in window - * \param[out] pnarv [optional] rms deviation from the mean - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is a high-level convenience function for calculating
- *          any or all of these derived arrays.
- *      (2) These statistical measures over the values in the
- *          rectangular window are:
- *            ~ average value: [x]  (nam)
- *            ~ average squared value: [x*x] (nams)
- *            ~ variance: [(x - [x])*(x - [x])] = [x*x] - [x]*[x]  (nav)
- *            ~ square-root of variance: (narv)
- *          where the brackets [ .. ] indicate that the average value is
- *          to be taken over the window.
- *      (3) Note that the variance is just the mean square difference from
- *          the mean value; and the square root of the variance is the
- *          root mean square difference from the mean, sometimes also
- *          called the 'standard deviation'.
- *      (4) Internally, use mirrored borders to handle values near the
- *          end of each array.
- * 
- */ -l_ok -numaWindowedStats(NUMA *nas, - l_int32 wc, - NUMA **pnam, - NUMA **pnams, - NUMA **pnav, - NUMA **pnarv) -{ -NUMA *nam, *nams; - - PROCNAME("numaWindowedStats"); - - if (!nas) - return ERROR_INT("nas not defined", procName, 1); - if (2 * wc + 1 > numaGetCount(nas)) - L_WARNING("filter wider than input array!\n", procName); - - if (!pnav && !pnarv) { - if (pnam) *pnam = numaWindowedMean(nas, wc); - if (pnams) *pnams = numaWindowedMeanSquare(nas, wc); - return 0; - } - - nam = numaWindowedMean(nas, wc); - nams = numaWindowedMeanSquare(nas, wc); - numaWindowedVariance(nam, nams, pnav, pnarv); - if (pnam) - *pnam = nam; - else - numaDestroy(&nam); - if (pnams) - *pnams = nams; - else - numaDestroy(&nams); - return 0; -} - - -/*! - * \brief numaWindowedMean() - * - * \param[in] nas - * \param[in] wc half width of the convolution window - * \return nad after low-pass filtering, or NULL on error - * - *
- * Notes:
- *      (1) This is a convolution.  The window has width = 2 * %wc + 1.
- *      (2) We add a mirrored border of size %wc to each end of the array.
- * 
- */ -NUMA * -numaWindowedMean(NUMA *nas, - l_int32 wc) -{ -l_int32 i, n, n1, width; -l_float32 sum, norm; -l_float32 *fa1, *fad, *suma; -NUMA *na1, *nad; - - PROCNAME("numaWindowedMean"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - n = numaGetCount(nas); - width = 2 * wc + 1; /* filter width */ - if (width > n) - L_WARNING("filter wider than input array!\n", procName); - - na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER); - n1 = n + 2 * wc; - fa1 = numaGetFArray(na1, L_NOCOPY); - nad = numaMakeConstant(0, n); - fad = numaGetFArray(nad, L_NOCOPY); - - /* Make sum array; note the indexing */ - if ((suma = (l_float32 *)LEPT_CALLOC(n1 + 1, sizeof(l_float32))) == NULL) { - numaDestroy(&na1); - numaDestroy(&nad); - return (NUMA *)ERROR_PTR("suma not made", procName, NULL); - } - sum = 0.0; - suma[0] = 0.0; - for (i = 0; i < n1; i++) { - sum += fa1[i]; - suma[i + 1] = sum; - } - - norm = 1. / (2 * wc + 1); - for (i = 0; i < n; i++) - fad[i] = norm * (suma[width + i] - suma[i]); - - LEPT_FREE(suma); - numaDestroy(&na1); - return nad; -} - - -/*! - * \brief numaWindowedMeanSquare() - * - * \param[in] nas - * \param[in] wc half width of the window - * \return nad containing windowed mean square values, or NULL on error - * - *
- * Notes:
- *      (1) The window has width = 2 * %wc + 1.
- *      (2) We add a mirrored border of size %wc to each end of the array.
- * 
- */ -NUMA * -numaWindowedMeanSquare(NUMA *nas, - l_int32 wc) -{ -l_int32 i, n, n1, width; -l_float32 sum, norm; -l_float32 *fa1, *fad, *suma; -NUMA *na1, *nad; - - PROCNAME("numaWindowedMeanSquare"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - n = numaGetCount(nas); - width = 2 * wc + 1; /* filter width */ - if (width > n) - L_WARNING("filter wider than input array!\n", procName); - - na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER); - n1 = n + 2 * wc; - fa1 = numaGetFArray(na1, L_NOCOPY); - nad = numaMakeConstant(0, n); - fad = numaGetFArray(nad, L_NOCOPY); - - /* Make sum array; note the indexing */ - if ((suma = (l_float32 *)LEPT_CALLOC(n1 + 1, sizeof(l_float32))) == NULL) { - numaDestroy(&na1); - numaDestroy(&nad); - return (NUMA *)ERROR_PTR("suma not made", procName, NULL); - } - sum = 0.0; - suma[0] = 0.0; - for (i = 0; i < n1; i++) { - sum += fa1[i] * fa1[i]; - suma[i + 1] = sum; - } - - norm = 1. / (2 * wc + 1); - for (i = 0; i < n; i++) - fad[i] = norm * (suma[width + i] - suma[i]); - - LEPT_FREE(suma); - numaDestroy(&na1); - return nad; -} - - -/*! - * \brief numaWindowedVariance() - * - * \param[in] nam windowed mean values - * \param[in] nams windowed mean square values - * \param[out] pnav [optional] numa of variance -- the ms deviation - * from the mean - * \param[out] pnarv [optional] numa of rms deviation from the mean - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The numas of windowed mean and mean square are precomputed,
- *          using numaWindowedMean() and numaWindowedMeanSquare().
- *      (2) Either or both of the variance and square-root of variance
- *          are returned, where the variance is the average over the
- *          window of the mean square difference of the pixel value
- *          from the mean:
- *                [(x - [x])*(x - [x])] = [x*x] - [x]*[x]
- * 
- */ -l_ok -numaWindowedVariance(NUMA *nam, - NUMA *nams, - NUMA **pnav, - NUMA **pnarv) -{ -l_int32 i, nm, nms; -l_float32 var; -l_float32 *fam, *fams, *fav, *farv; -NUMA *nav, *narv; /* variance and square root of variance */ - - PROCNAME("numaWindowedVariance"); - - if (pnav) *pnav = NULL; - if (pnarv) *pnarv = NULL; - if (!pnav && !pnarv) - return ERROR_INT("neither &nav nor &narv are defined", procName, 1); - if (!nam) - return ERROR_INT("nam not defined", procName, 1); - if (!nams) - return ERROR_INT("nams not defined", procName, 1); - nm = numaGetCount(nam); - nms = numaGetCount(nams); - if (nm != nms) - return ERROR_INT("sizes of nam and nams differ", procName, 1); - - if (pnav) { - nav = numaMakeConstant(0, nm); - *pnav = nav; - fav = numaGetFArray(nav, L_NOCOPY); - } - if (pnarv) { - narv = numaMakeConstant(0, nm); - *pnarv = narv; - farv = numaGetFArray(narv, L_NOCOPY); - } - fam = numaGetFArray(nam, L_NOCOPY); - fams = numaGetFArray(nams, L_NOCOPY); - - for (i = 0; i < nm; i++) { - var = fams[i] - fam[i] * fam[i]; - if (pnav) - fav[i] = var; - if (pnarv) - farv[i] = sqrtf(var); - } - - return 0; -} - - -/*! - * \brief numaWindowedMedian() - * - * \param[in] nas - * \param[in] halfwin half width of window over which the median is found - * \return nad after windowed median filtering, or NULL on error - * - *
- * Notes:
- *      (1) The requested window has width = 2 * %halfwin + 1.
- *      (2) If the input nas has less then 3 elements, return a copy.
- *      (3) If the filter is too small (%halfwin <= 0), return a copy.
- *      (4) If the filter is too large, it is reduced in size.
- *      (5) We add a mirrored border of size %halfwin to each end of
- *          the array to simplify the calculation by avoiding end-effects.
- * 
- */ -NUMA * -numaWindowedMedian(NUMA *nas, - l_int32 halfwin) -{ -l_int32 i, n; -l_float32 medval; -NUMA *na1, *na2, *nad; - - PROCNAME("numaWindowedMedian"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if ((n = numaGetCount(nas)) < 3) - return numaCopy(nas); - if (halfwin <= 0) { - L_ERROR("filter too small; returning a copy\n", procName); - return numaCopy(nas); - } - - if (halfwin > (n - 1) / 2) { - halfwin = (n - 1) / 2; - L_INFO("reducing filter to halfwin = %d\n", procName, halfwin); - } - - /* Add a border to both ends */ - na1 = numaAddSpecifiedBorder(nas, halfwin, halfwin, L_MIRRORED_BORDER); - - /* Get the median value at the center of each window, corresponding - * to locations in the input nas. */ - nad = numaCreate(n); - for (i = 0; i < n; i++) { - na2 = numaClipToInterval(na1, i, i + 2 * halfwin); - numaGetMedian(na2, &medval); - numaAddNumber(nad, medval); - numaDestroy(&na2); - } - - numaDestroy(&na1); - return nad; -} - - -/*! - * \brief numaConvertToInt() - * - * \param[in] nas source numa - * \return na with all values rounded to nearest integer, or - * NULL on error - */ -NUMA * -numaConvertToInt(NUMA *nas) -{ -l_int32 i, n, ival; -NUMA *nad; - - PROCNAME("numaConvertToInt"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - - n = numaGetCount(nas); - if ((nad = numaCreate(n)) == NULL) - return (NUMA *)ERROR_PTR("nad not made", procName, NULL); - numaCopyParameters(nad, nas); - for (i = 0; i < n; i++) { - numaGetIValue(nas, i, &ival); - numaAddNumber(nad, ival); - } - return nad; -} - - -/*----------------------------------------------------------------------* - * Histogram generation and statistics * - *----------------------------------------------------------------------*/ -/*! - * \brief numaMakeHistogram() - * - * \param[in] na - * \param[in] maxbins max number of histogram bins - * \param[out] pbinsize size of histogram bins - * \param[out] pbinstart [optional] start val of minimum bin; - * input NULL to force start at 0 - * \return na consisiting of histogram of integerized values, - * or NULL on error. - * - *
- * Notes:
- *      (1) This simple interface is designed for integer data.
- *          The bins are of integer width and start on integer boundaries,
- *          so the results on float data will not have high precision.
- *      (2) Specify the max number of input bins.   Then %binsize,
- *          the size of bins necessary to accommodate the input data,
- *          is returned.  It is one of the sequence:
- *                {1, 2, 5, 10, 20, 50, ...}.
- *      (3) If &binstart is given, all values are accommodated,
- *          and the min value of the starting bin is returned.
- *          Otherwise, all negative values are discarded and
- *          the histogram bins start at 0.
- * 
- */ -NUMA * -numaMakeHistogram(NUMA *na, - l_int32 maxbins, - l_int32 *pbinsize, - l_int32 *pbinstart) -{ -l_int32 i, n, ival, hval; -l_int32 iminval, imaxval, range, binsize, nbins, ibin; -l_float32 val, ratio; -NUMA *nai, *nahist; - - PROCNAME("numaMakeHistogram"); - - if (!na) - return (NUMA *)ERROR_PTR("na not defined", procName, NULL); - if (!pbinsize) - return (NUMA *)ERROR_PTR("&binsize not defined", procName, NULL); - - /* Determine input range */ - numaGetMin(na, &val, NULL); - iminval = (l_int32)(val + 0.5); - numaGetMax(na, &val, NULL); - imaxval = (l_int32)(val + 0.5); - if (pbinstart == NULL) { /* clip negative vals; start from 0 */ - iminval = 0; - if (imaxval < 0) - return (NUMA *)ERROR_PTR("all values < 0", procName, NULL); - } - - /* Determine binsize */ - range = imaxval - iminval + 1; - if (range > maxbins - 1) { - ratio = (l_float64)range / (l_float64)maxbins; - binsize = 0; - for (i = 0; i < NBinSizes; i++) { - if (ratio < BinSizeArray[i]) { - binsize = BinSizeArray[i]; - break; - } - } - if (binsize == 0) - return (NUMA *)ERROR_PTR("numbers too large", procName, NULL); - } else { - binsize = 1; - } - *pbinsize = binsize; - nbins = 1 + range / binsize; /* +1 seems to be sufficient */ - - /* Redetermine iminval */ - if (pbinstart && binsize > 1) { - if (iminval >= 0) - iminval = binsize * (iminval / binsize); - else - iminval = binsize * ((iminval - binsize + 1) / binsize); - } - if (pbinstart) - *pbinstart = iminval; - -#if DEBUG_HISTO - lept_stderr(" imaxval = %d, range = %d, nbins = %d\n", - imaxval, range, nbins); -#endif /* DEBUG_HISTO */ - - /* Use integerized data for input */ - if ((nai = numaConvertToInt(na)) == NULL) - return (NUMA *)ERROR_PTR("nai not made", procName, NULL); - n = numaGetCount(nai); - - /* Make histogram, converting value in input array - * into a bin number for this histogram array. */ - if ((nahist = numaCreate(nbins)) == NULL) { - numaDestroy(&nai); - return (NUMA *)ERROR_PTR("nahist not made", procName, NULL); - } - numaSetCount(nahist, nbins); - numaSetParameters(nahist, iminval, binsize); - for (i = 0; i < n; i++) { - numaGetIValue(nai, i, &ival); - ibin = (ival - iminval) / binsize; - if (ibin >= 0 && ibin < nbins) { - numaGetIValue(nahist, ibin, &hval); - numaSetValue(nahist, ibin, hval + 1.0); - } - } - - numaDestroy(&nai); - return nahist; -} - - -/*! - * \brief numaMakeHistogramAuto() - * - * \param[in] na numa of floats; these may be integers - * \param[in] maxbins max number of histogram bins; >= 1 - * \return na consisiting of histogram of quantized float values, - * or NULL on error. - * - *
- * Notes:
- *      (1) This simple interface is designed for accurate binning
- *          of both integer and float data.
- *      (2) If the array data is integers, and the range of integers
- *          is smaller than %maxbins, they are binned as they fall,
- *          with binsize = 1.
- *      (3) If the range of data, (maxval - minval), is larger than
- *          %maxbins, or if the data is floats, they are binned into
- *          exactly %maxbins bins.
- *      (4) Unlike numaMakeHistogram(), these bins in general have
- *          non-integer location and width, even for integer data.
- * 
- */ -NUMA * -numaMakeHistogramAuto(NUMA *na, - l_int32 maxbins) -{ -l_int32 i, n, imin, imax, irange, ibin, ival, allints; -l_float32 minval, maxval, range, binsize, fval; -NUMA *nah; - - PROCNAME("numaMakeHistogramAuto"); - - if (!na) - return (NUMA *)ERROR_PTR("na not defined", procName, NULL); - maxbins = L_MAX(1, maxbins); - - /* Determine input range */ - numaGetMin(na, &minval, NULL); - numaGetMax(na, &maxval, NULL); - - /* Determine if values are all integers */ - n = numaGetCount(na); - numaHasOnlyIntegers(na, maxbins, &allints); - - /* Do simple integer binning if possible */ - if (allints && (maxval - minval < maxbins)) { - imin = (l_int32)minval; - imax = (l_int32)maxval; - irange = imax - imin + 1; - nah = numaCreate(irange); - numaSetCount(nah, irange); /* init */ - numaSetParameters(nah, minval, 1.0); - for (i = 0; i < n; i++) { - numaGetIValue(na, i, &ival); - ibin = ival - imin; - numaGetIValue(nah, ibin, &ival); - numaSetValue(nah, ibin, ival + 1.0); - } - - return nah; - } - - /* Do float binning, even if the data is integers. */ - range = maxval - minval; - binsize = range / (l_float32)maxbins; - if (range == 0.0) { - nah = numaCreate(1); - numaSetParameters(nah, minval, binsize); - numaAddNumber(nah, n); - return nah; - } - nah = numaCreate(maxbins); - numaSetCount(nah, maxbins); - numaSetParameters(nah, minval, binsize); - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &fval); - ibin = (l_int32)((fval - minval) / binsize); - ibin = L_MIN(ibin, maxbins - 1); /* "edge" case; stay in bounds */ - numaGetIValue(nah, ibin, &ival); - numaSetValue(nah, ibin, ival + 1.0); - } - - return nah; -} - - -/*! - * \brief numaMakeHistogramClipped() - * - * \param[in] na - * \param[in] binsize typically 1.0 - * \param[in] maxsize of histogram ordinate - * \return na histogram of bins of size %binsize, starting with - * the na[0] (x = 0.0 and going up to a maximum of - * x = %maxsize, by increments of %binsize), or NULL on error - * - *
- * Notes:
- *      (1) This simple function generates a histogram of values
- *          from na, discarding all values < 0.0 or greater than
- *          min(%maxsize, maxval), where maxval is the maximum value in na.
- *          The histogram data is put in bins of size delx = %binsize,
- *          starting at x = 0.0.  We use as many bins as are
- *          needed to hold the data.
- * 
- */ -NUMA * -numaMakeHistogramClipped(NUMA *na, - l_float32 binsize, - l_float32 maxsize) -{ -l_int32 i, n, nbins, ival, ibin; -l_float32 val, maxval; -NUMA *nad; - - PROCNAME("numaMakeHistogramClipped"); - - if (!na) - return (NUMA *)ERROR_PTR("na not defined", procName, NULL); - if (binsize <= 0.0) - return (NUMA *)ERROR_PTR("binsize must be > 0.0", procName, NULL); - if (binsize > maxsize) - binsize = maxsize; /* just one bin */ - - numaGetMax(na, &maxval, NULL); - n = numaGetCount(na); - maxsize = L_MIN(maxsize, maxval); - nbins = (l_int32)(maxsize / binsize) + 1; - -/* lept_stderr("maxsize = %7.3f, nbins = %d\n", maxsize, nbins); */ - - if ((nad = numaCreate(nbins)) == NULL) - return (NUMA *)ERROR_PTR("nad not made", procName, NULL); - numaSetParameters(nad, 0.0, binsize); - numaSetCount(nad, nbins); /* interpret zeroes in bins as data */ - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &val); - ibin = (l_int32)(val / binsize); - if (ibin >= 0 && ibin < nbins) { - numaGetIValue(nad, ibin, &ival); - numaSetValue(nad, ibin, ival + 1.0); - } - } - - return nad; -} - - -/*! - * \brief numaRebinHistogram() - * - * \param[in] nas input histogram - * \param[in] newsize number of old bins contained in each new bin - * \return nad more coarsely re-binned histogram, or NULL on error - */ -NUMA * -numaRebinHistogram(NUMA *nas, - l_int32 newsize) -{ -l_int32 i, j, ns, nd, index, count, val; -l_float32 start, oldsize; -NUMA *nad; - - PROCNAME("numaRebinHistogram"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (newsize <= 1) - return (NUMA *)ERROR_PTR("newsize must be > 1", procName, NULL); - if ((ns = numaGetCount(nas)) == 0) - return (NUMA *)ERROR_PTR("no bins in nas", procName, NULL); - - nd = (ns + newsize - 1) / newsize; - if ((nad = numaCreate(nd)) == NULL) - return (NUMA *)ERROR_PTR("nad not made", procName, NULL); - numaGetParameters(nad, &start, &oldsize); - numaSetParameters(nad, start, oldsize * newsize); - - for (i = 0; i < nd; i++) { /* new bins */ - count = 0; - index = i * newsize; - for (j = 0; j < newsize; j++) { - if (index < ns) { - numaGetIValue(nas, index, &val); - count += val; - index++; - } - } - numaAddNumber(nad, count); - } - - return nad; -} - - -/*! - * \brief numaNormalizeHistogram() - * - * \param[in] nas input histogram - * \param[in] tsum target sum of all numbers in dest histogram; e.g., use - * %tsum= 1.0 if this represents a probability distribution - * \return nad normalized histogram, or NULL on error - */ -NUMA * -numaNormalizeHistogram(NUMA *nas, - l_float32 tsum) -{ -l_int32 i, ns; -l_float32 sum, factor, fval; -NUMA *nad; - - PROCNAME("numaNormalizeHistogram"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (tsum <= 0.0) - return (NUMA *)ERROR_PTR("tsum must be > 0.0", procName, NULL); - if ((ns = numaGetCount(nas)) == 0) - return (NUMA *)ERROR_PTR("no bins in nas", procName, NULL); - - numaGetSum(nas, &sum); - factor = tsum / sum; - - if ((nad = numaCreate(ns)) == NULL) - return (NUMA *)ERROR_PTR("nad not made", procName, NULL); - numaCopyParameters(nad, nas); - - for (i = 0; i < ns; i++) { - numaGetFValue(nas, i, &fval); - fval *= factor; - numaAddNumber(nad, fval); - } - - return nad; -} - - -/*! - * \brief numaGetStatsUsingHistogram() - * - * \param[in] na an arbitrary set of numbers; not ordered and not - * a histogram - * \param[in] maxbins the maximum number of bins to be allowed in - * the histogram; use an integer larger than the - * largest number in %na for consecutive integer bins - * \param[out] pmin [optional] min value of set - * \param[out] pmax [optional] max value of set - * \param[out] pmean [optional] mean value of set - * \param[out] pvariance [optional] variance - * \param[out] pmedian [optional] median value of set - * \param[in] rank in [0.0 ... 1.0]; median has a rank 0.5; - * ignored if &rval == NULL - * \param[out] prval [optional] value in na corresponding to %rank - * \param[out] phisto [optional] Numa histogram; use NULL to prevent - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is a simple interface for gathering statistics
- *          from a numa, where a histogram is used 'under the covers'
- *          to avoid sorting if a rank value is requested.  In that case,
- *          by using a histogram we are trading speed for accuracy, because
- *          the values in %na are quantized to the center of a set of bins.
- *      (2) If the median, other rank value, or histogram are not requested,
- *          the calculation is all performed on the input Numa.
- *      (3) The variance is the average of the square of the
- *          difference from the mean.  The median is the value in na
- *          with rank 0.5.
- *      (4) There are two situations where this gives rank results with
- *          accuracy comparable to computing stastics directly on the input
- *          data, without binning into a histogram:
- *           (a) the data is integers and the range of data is less than
- *               %maxbins, and
- *           (b) the data is floats and the range is small compared to
- *               %maxbins, so that the binsize is much less than 1.
- *      (5) If a histogram is used and the numbers in the Numa extend
- *          over a large range, you can limit the required storage by
- *          specifying the maximum number of bins in the histogram.
- *          Use %maxbins == 0 to force the bin size to be 1.
- *      (6) This optionally returns the median and one arbitrary rank value.
- *          If you need several rank values, return the histogram and use
- *               numaHistogramGetValFromRank(nah, rank, &rval)
- *          multiple times.
- * 
- */ -l_ok -numaGetStatsUsingHistogram(NUMA *na, - l_int32 maxbins, - l_float32 *pmin, - l_float32 *pmax, - l_float32 *pmean, - l_float32 *pvariance, - l_float32 *pmedian, - l_float32 rank, - l_float32 *prval, - NUMA **phisto) -{ -l_int32 i, n; -l_float32 minval, maxval, fval, mean, sum; -NUMA *nah; - - PROCNAME("numaGetStatsUsingHistogram"); - - if (pmin) *pmin = 0.0; - if (pmax) *pmax = 0.0; - if (pmean) *pmean = 0.0; - if (pvariance) *pvariance = 0.0; - if (pmedian) *pmedian = 0.0; - if (prval) *prval = 0.0; - if (phisto) *phisto = NULL; - if (!na) - return ERROR_INT("na not defined", procName, 1); - if ((n = numaGetCount(na)) == 0) - return ERROR_INT("numa is empty", procName, 1); - - numaGetMin(na, &minval, NULL); - numaGetMax(na, &maxval, NULL); - if (pmin) *pmin = minval; - if (pmax) *pmax = maxval; - if (pmean || pvariance) { - sum = 0.0; - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &fval); - sum += fval; - } - mean = sum / (l_float32)n; - if (pmean) *pmean = mean; - } - if (pvariance) { - sum = 0.0; - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &fval); - sum += fval * fval; - } - *pvariance = sum / (l_float32)n - mean * mean; - } - - if (!pmedian && !prval && !phisto) - return 0; - - nah = numaMakeHistogramAuto(na, maxbins); - if (pmedian) - numaHistogramGetValFromRank(nah, 0.5, pmedian); - if (prval) - numaHistogramGetValFromRank(nah, rank, prval); - if (phisto) - *phisto = nah; - else - numaDestroy(&nah); - return 0; -} - - -/*! - * \brief numaGetHistogramStats() - * - * \param[in] nahisto histogram: y(x(i)), i = 0 ... nbins - 1 - * \param[in] startx x value of first bin: x(0) - * \param[in] deltax x increment between bins; the bin size; x(1) - x(0) - * \param[out] pxmean [optional] mean value of histogram - * \param[out] pxmedian [optional] median value of histogram - * \param[out] pxmode [optional] mode value of histogram: - * xmode = x(imode), where y(xmode) >= y(x(i)) for - * all i != imode - * \param[out] pxvariance [optional] variance of x - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If the histogram represents the relation y(x), the
- *          computed values that are returned are the x values.
- *          These are NOT the bucket indices i; they are related to the
- *          bucket indices by
- *                x(i) = startx + i * deltax
- * 
- */ -l_ok -numaGetHistogramStats(NUMA *nahisto, - l_float32 startx, - l_float32 deltax, - l_float32 *pxmean, - l_float32 *pxmedian, - l_float32 *pxmode, - l_float32 *pxvariance) -{ - PROCNAME("numaGetHistogramStats"); - - if (pxmean) *pxmean = 0.0; - if (pxmedian) *pxmedian = 0.0; - if (pxmode) *pxmode = 0.0; - if (pxvariance) *pxvariance = 0.0; - if (!nahisto) - return ERROR_INT("nahisto not defined", procName, 1); - - return numaGetHistogramStatsOnInterval(nahisto, startx, deltax, 0, -1, - pxmean, pxmedian, pxmode, - pxvariance); -} - - -/*! - * \brief numaGetHistogramStatsOnInterval() - * - * \param[in] nahisto histogram: y(x(i)), i = 0 ... nbins - 1 - * \param[in] startx x value of first bin: x(0) - * \param[in] deltax x increment between bins; the bin size; x(1) - x(0) - * \param[in] ifirst first bin to use for collecting stats - * \param[in] ilast last bin for collecting stats; -1 to go to the end - * \param[out] pxmean [optional] mean value of histogram - * \param[out] pxmedian [optional] median value of histogram - * \param[out] pxmode [optional] mode value of histogram: - * xmode = x(imode), where y(xmode) >= y(x(i)) for - * all i != imode - * \param[out] pxvariance [optional] variance of x - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If the histogram represents the relation y(x), the
- *          computed values that are returned are the x values.
- *          These are NOT the bucket indices i; they are related to the
- *          bucket indices by
- *                x(i) = startx + i * deltax
- * 
- */ -l_ok -numaGetHistogramStatsOnInterval(NUMA *nahisto, - l_float32 startx, - l_float32 deltax, - l_int32 ifirst, - l_int32 ilast, - l_float32 *pxmean, - l_float32 *pxmedian, - l_float32 *pxmode, - l_float32 *pxvariance) -{ -l_int32 i, n, imax; -l_float32 sum, sumval, halfsum, moment, var, x, y, ymax; - - PROCNAME("numaGetHistogramStatsOnInterval"); - - if (pxmean) *pxmean = 0.0; - if (pxmedian) *pxmedian = 0.0; - if (pxmode) *pxmode = 0.0; - if (pxvariance) *pxvariance = 0.0; - if (!nahisto) - return ERROR_INT("nahisto not defined", procName, 1); - if (!pxmean && !pxmedian && !pxmode && !pxvariance) - return ERROR_INT("nothing to compute", procName, 1); - - n = numaGetCount(nahisto); - ifirst = L_MAX(0, ifirst); - if (ilast < 0) ilast = n - 1; - if (ifirst >= n) - return ERROR_INT("invalid ifirst", procName, 1); - if (ilast >= n) { - L_WARNING("ilast = %d is beyond max index = %d; adjusting\n", - procName, ilast, n - 1); - ilast = n - 1; - } - if (ifirst > ilast) - return ERROR_INT("ifirst > ilast", procName, 1); - for (sum = 0.0, moment = 0.0, var = 0.0, i = ifirst; i <= ilast ; i++) { - x = startx + i * deltax; - numaGetFValue(nahisto, i, &y); - sum += y; - moment += x * y; - var += x * x * y; - } - if (sum == 0.0) { - L_INFO("sum is 0\n", procName); - return 0; - } - - if (pxmean) - *pxmean = moment / sum; - if (pxvariance) - *pxvariance = var / sum - moment * moment / (sum * sum); - - if (pxmedian) { - halfsum = sum / 2.0; - for (sumval = 0.0, i = ifirst; i <= ilast; i++) { - numaGetFValue(nahisto, i, &y); - sumval += y; - if (sumval >= halfsum) { - *pxmedian = startx + i * deltax; - break; - } - } - } - - if (pxmode) { - imax = -1; - ymax = -1.0e10; - for (i = ifirst; i <= ilast; i++) { - numaGetFValue(nahisto, i, &y); - if (y > ymax) { - ymax = y; - imax = i; - } - } - *pxmode = startx + imax * deltax; - } - - return 0; -} - - -/*! - * \brief numaMakeRankFromHistogram() - * - * \param[in] startx xval corresponding to first element in nay - * \param[in] deltax x increment between array elements in nay - * \param[in] nasy input histogram, assumed equally spaced - * \param[in] npts number of points to evaluate rank function - * \param[out] pnax [optional] array of x values in range - * \param[out] pnay rank array of specified npts - * \return 0 if OK, 1 on error - */ -l_ok -numaMakeRankFromHistogram(l_float32 startx, - l_float32 deltax, - NUMA *nasy, - l_int32 npts, - NUMA **pnax, - NUMA **pnay) -{ -l_int32 i, n; -l_float32 sum, fval; -NUMA *nan, *nar; - - PROCNAME("numaMakeRankFromHistogram"); - - if (pnax) *pnax = NULL; - if (!pnay) - return ERROR_INT("&nay not defined", procName, 1); - *pnay = NULL; - if (!nasy) - return ERROR_INT("nasy not defined", procName, 1); - if ((n = numaGetCount(nasy)) == 0) - return ERROR_INT("no bins in nas", procName, 1); - - /* Normalize and generate the rank array corresponding to - * the binned histogram. */ - nan = numaNormalizeHistogram(nasy, 1.0); - nar = numaCreate(n + 1); /* rank numa corresponding to nan */ - sum = 0.0; - numaAddNumber(nar, sum); /* first element is 0.0 */ - for (i = 0; i < n; i++) { - numaGetFValue(nan, i, &fval); - sum += fval; - numaAddNumber(nar, sum); - } - - /* Compute rank array on full range with specified - * number of points and correspondence to x-values. */ - numaInterpolateEqxInterval(startx, deltax, nar, L_LINEAR_INTERP, - startx, startx + n * deltax, npts, - pnax, pnay); - numaDestroy(&nan); - numaDestroy(&nar); - return 0; -} - - -/*! - * \brief numaHistogramGetRankFromVal() - * - * \param[in] na histogram - * \param[in] rval value of input sample for which we want the rank - * \param[out] prank fraction of total samples below rval - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If we think of the histogram as a function y(x), normalized
- *          to 1, for a given input value of x, this computes the
- *          rank of x, which is the integral of y(x) from the start
- *          value of x to the input value.
- *      (2) This function only makes sense when applied to a Numa that
- *          is a histogram.  The values in the histogram can be ints and
- *          floats, and are computed as floats.  The rank is returned
- *          as a float between 0.0 and 1.0.
- *      (3) The numa parameters startx and binsize are used to
- *          compute x from the Numa index i.
- * 
- */ -l_ok -numaHistogramGetRankFromVal(NUMA *na, - l_float32 rval, - l_float32 *prank) -{ -l_int32 i, ibinval, n; -l_float32 startval, binsize, binval, maxval, fractval, total, sum, val; - - PROCNAME("numaHistogramGetRankFromVal"); - - if (!prank) - return ERROR_INT("prank not defined", procName, 1); - *prank = 0.0; - if (!na) - return ERROR_INT("na not defined", procName, 1); - numaGetParameters(na, &startval, &binsize); - n = numaGetCount(na); - if (rval < startval) - return 0; - maxval = startval + n * binsize; - if (rval > maxval) { - *prank = 1.0; - return 0; - } - - binval = (rval - startval) / binsize; - ibinval = (l_int32)binval; - if (ibinval >= n) { - *prank = 1.0; - return 0; - } - fractval = binval - (l_float32)ibinval; - - sum = 0.0; - for (i = 0; i < ibinval; i++) { - numaGetFValue(na, i, &val); - sum += val; - } - numaGetFValue(na, ibinval, &val); - sum += fractval * val; - numaGetSum(na, &total); - *prank = sum / total; - -/* lept_stderr("binval = %7.3f, rank = %7.3f\n", binval, *prank); */ - - return 0; -} - - -/*! - * \brief numaHistogramGetValFromRank() - * - * \param[in] na histogram - * \param[in] rank fraction of total samples - * \param[out] prval approx. to the bin value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If we think of the histogram as a function y(x), this returns
- *          the value x such that the integral of y(x) from the start
- *          value to x gives the fraction 'rank' of the integral
- *          of y(x) over all bins.
- *      (2) This function only makes sense when applied to a Numa that
- *          is a histogram.  The values in the histogram can be ints and
- *          floats, and are computed as floats.  The val is returned
- *          as a float, even though the buckets are of integer width.
- *      (3) The numa parameters startx and binsize are used to
- *          compute x from the Numa index i.
- * 
- */ -l_ok -numaHistogramGetValFromRank(NUMA *na, - l_float32 rank, - l_float32 *prval) -{ -l_int32 i, n; -l_float32 startval, binsize, rankcount, total, sum, fract, val; - - PROCNAME("numaHistogramGetValFromRank"); - - if (!prval) - return ERROR_INT("prval not defined", procName, 1); - *prval = 0.0; - if (!na) - return ERROR_INT("na not defined", procName, 1); - if (rank < 0.0) { - L_WARNING("rank < 0; setting to 0.0\n", procName); - rank = 0.0; - } - if (rank > 1.0) { - L_WARNING("rank > 1.0; setting to 1.0\n", procName); - rank = 1.0; - } - - n = numaGetCount(na); - numaGetParameters(na, &startval, &binsize); - numaGetSum(na, &total); - rankcount = rank * total; /* count that corresponds to rank */ - sum = 0.0; - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &val); - if (sum + val >= rankcount) - break; - sum += val; - } - if (val <= 0.0) /* can == 0 if rank == 0.0 */ - fract = 0.0; - else /* sum + fract * val = rankcount */ - fract = (rankcount - sum) / val; - - /* The use of the fraction of a bin allows a simple calculation - * for the histogram value at the given rank. */ - *prval = startval + binsize * ((l_float32)i + fract); - -/* lept_stderr("rank = %7.3f, val = %7.3f\n", rank, *prval); */ - - return 0; -} - - -/*! - * \brief numaDiscretizeRankAndIntensity() - * - * \param[in] na normalized histo of probability density vs intensity - * \param[in] nbins number of bins at which the rank is divided - * \param[out] pnarbin [optional] rank bin value vs intensity - * \param[out] pnam [optional] median intensity in a bin vs rank bin - * value, with %nbins of discretized rank values - * \param[out] pnar [optional] rank vs intensity; this is - * a cumulative norm histogram - * \param[out] pnabb [optional] intensity at the right bin boundary - * vs rank bin - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) We are inverting the rank(intensity) function to get
- *          the intensity(rank) function at %nbins equally spaced
- *          values of rank between 0.0 and 1.0.  We save integer values
- *          for the intensity.
- *      (2) We are using the word "intensity" to describe the type of
- *          array values, but any array of non-negative numbers will work.
- *      (3) The output arrays give the following mappings, where the
- *          input is a normalized histogram of array values:
- *             array values     -->  rank bin number  (narbin)
- *             rank bin number  -->  median array value in bin (nam)
- *             array values     -->  cumulative norm = rank  (nar)
- *             rank bin number  -->  array value at right bin edge (nabb)
- * 
- */ -l_ok -numaDiscretizeRankAndIntensity(NUMA *na, - l_int32 nbins, - NUMA **pnarbin, - NUMA **pnam, - NUMA **pnar, - NUMA **pnabb) -{ -NUMA *nar; /* rank value as function of intensity */ -NUMA *nam; /* median intensity in the rank bins */ -NUMA *nabb; /* rank bin right boundaries (in intensity) */ -NUMA *narbin; /* binned rank value as a function of intensity */ -l_int32 i, j, npts, start, midfound, mcount, rightedge; -l_float32 sum, midrank, endrank, val; - - PROCNAME("numaDiscretizeRankAndIntensity"); - - if (pnarbin) *pnarbin = NULL; - if (pnam) *pnam = NULL; - if (pnar) *pnar = NULL; - if (pnabb) *pnabb = NULL; - if (!pnarbin && !pnam && !pnar && !pnabb) - return ERROR_INT("no output requested", procName, 1); - if (!na) - return ERROR_INT("na not defined", procName, 1); - if (nbins < 2) - return ERROR_INT("nbins must be > 1", procName, 1); - - /* Get cumulative normalized histogram (rank vs intensity value). - * For a normalized histogram from an 8 bpp grayscale image - * as input, we have 256 bins and 257 points in the - * cumulative (rank) histogram. */ - npts = numaGetCount(na); - if ((nar = numaCreate(npts + 1)) == NULL) - return ERROR_INT("nar not made", procName, 1); - sum = 0.0; - numaAddNumber(nar, sum); /* left side of first bin */ - for (i = 0; i < npts; i++) { - numaGetFValue(na, i, &val); - sum += val; - numaAddNumber(nar, sum); - } - - nam = numaCreate(nbins); - narbin = numaCreate(npts); - nabb = numaCreate(nbins); - if (!nam || !narbin || !nabb) { - numaDestroy(&nar); - numaDestroy(&nam); - numaDestroy(&narbin); - numaDestroy(&nabb); - return ERROR_INT("numa not made", procName, 1); - } - - /* We find the intensity value at the right edge of each of - * the rank bins. We also find the median intensity in the bin, - * where approximately half the samples are lower and half are - * higher. This can be considered as a simple approximation - * for the average intensity in the bin. */ - start = 0; /* index in nar */ - mcount = 0; /* count of median values in rank bins; not to exceed nbins */ - for (i = 0; i < nbins; i++) { - midrank = (l_float32)(i + 0.5) / (l_float32)(nbins); - endrank = (l_float32)(i + 1.0) / (l_float32)(nbins); - endrank = L_MAX(0.0, L_MIN(endrank - 0.001, 1.0)); - midfound = FALSE; - for (j = start; j < npts; j++) { /* scan up for each bin value */ - numaGetFValue(nar, j, &val); - /* Use (j == npts - 1) tests in case all weight is at top end */ - if ((!midfound && val >= midrank) || - (mcount < nbins && j == npts - 1)) { - midfound = TRUE; - numaAddNumber(nam, j); - mcount++; - } - if ((val >= endrank) || (j == npts - 1)) { - numaAddNumber(nabb, j); - if (val == endrank) - start = j; - else - start = j - 1; - break; - } - } - } - numaSetValue(nabb, nbins - 1, npts - 1); /* extend to max */ - - /* Error checking: did we get data in all bins? */ - if (mcount != nbins) - L_WARNING("found data for %d bins; should be %d\n", - procName, mcount, nbins); - - /* Generate LUT that maps from intensity to bin number */ - start = 0; - for (i = 0; i < nbins; i++) { - numaGetIValue(nabb, i, &rightedge); - for (j = start; j < npts; j++) { - if (j <= rightedge) - numaAddNumber(narbin, i); - if (j > rightedge) { - start = j; - break; - } - if (j == npts - 1) { /* we're done */ - start = j + 1; - break; - } - } - } - - if (pnarbin) - *pnarbin = narbin; - else - numaDestroy(&narbin); - if (pnam) - *pnam = nam; - else - numaDestroy(&nam); - if (pnar) - *pnar = nar; - else - numaDestroy(&nar); - if (pnabb) - *pnabb = nabb; - else - numaDestroy(&nabb); - return 0; -} - - -/*! - * \brief numaGetRankBinValues() - * - * \param[in] na an array of values - * \param[in] nbins number of bins at which the rank is divided - * \param[out] pnarbin [optional] rank bin value vs array value - * \param[out] pnam [optional] median intensity in a bin vs rank bin - * value, with %nbins of discretized rank values - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Simple interface for getting a binned rank representation
- *          of an input array of values.  This returns two mappings:
- *             array value     -->  rank bin number  (narbin)
- *             rank bin number -->  median array value in each rank bin (nam)
- * 
- */ -l_ok -numaGetRankBinValues(NUMA *na, - l_int32 nbins, - NUMA **pnarbin, - NUMA **pnam) -{ -NUMA *nah, *nan; /* histo and normalized histo */ -l_int32 maxbins, discardval; -l_float32 maxval, delx; - - PROCNAME("numaGetRankBinValues"); - - if (pnarbin) *pnarbin = NULL; - if (pnam) *pnam = NULL; - if (!pnarbin && !pnam) - return ERROR_INT("no output requested", procName, 1); - if (!na) - return ERROR_INT("na not defined", procName, 1); - if (numaGetCount(na) == 0) - return ERROR_INT("na is empty", procName, 1); - if (nbins < 2) - return ERROR_INT("nbins must be > 1", procName, 1); - - /* Get normalized histogram */ - numaGetMax(na, &maxval, NULL); - maxbins = L_MIN(100002, (l_int32)maxval + 2); - nah = numaMakeHistogram(na, maxbins, &discardval, NULL); - nan = numaNormalizeHistogram(nah, 1.0); - - /* Warn if there is a scale change. This shouldn't happen - * unless the max value is above 100000. */ - numaGetParameters(nan, NULL, &delx); - if (delx > 1.0) - L_WARNING("scale change: delx = %6.2f\n", procName, delx); - - /* Rank bin the results */ - numaDiscretizeRankAndIntensity(nan, nbins, pnarbin, pnam, NULL, NULL); - numaDestroy(&nah); - numaDestroy(&nan); - return 0; -} - - -/*----------------------------------------------------------------------* - * Splitting a distribution * - *----------------------------------------------------------------------*/ -/*! - * \brief numaSplitDistribution() - * - * \param[in] na histogram - * \param[in] scorefract fraction of the max score, used to determine - * range over which the histogram min is searched - * \param[out] psplitindex [optional] index for splitting - * \param[out] pave1 [optional] average of lower distribution - * \param[out] pave2 [optional] average of upper distribution - * \param[out] pnum1 [optional] population of lower distribution - * \param[out] pnum2 [optional] population of upper distribution - * \param[out] pnascore [optional] for debugging; otherwise use NULL - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function is intended to be used on a distribution of
- *          values that represent two sets, such as a histogram of
- *          pixel values for an image with a fg and bg, and the goal
- *          is to determine the averages of the two sets and the
- *          best splitting point.
- *      (2) The Otsu method finds a split point that divides the distribution
- *          into two parts by maximizing a score function that is the
- *          product of two terms:
- *            (a) the square of the difference of centroids, (ave1 - ave2)^2
- *            (b) fract1 * (1 - fract1)
- *          where fract1 is the fraction in the lower distribution.
- *      (3) This works well for images where the fg and bg are
- *          each relatively homogeneous and well-separated in color.
- *          However, if the actual fg and bg sets are very different
- *          in size, and the bg is highly varied, as can occur in some
- *          scanned document images, this will bias the split point
- *          into the larger "bump" (i.e., toward the point where the
- *          (b) term reaches its maximum of 0.25 at fract1 = 0.5.
- *          To avoid this, we define a range of values near the
- *          maximum of the score function, and choose the value within
- *          this range such that the histogram itself has a minimum value.
- *          The range is determined by scorefract: we include all abscissa
- *          values to the left and right of the value that maximizes the
- *          score, such that the score stays above (1 - scorefract) * maxscore.
- *          The intuition behind this modification is to try to find
- *          a split point that both has a high variance score and is
- *          at or near a minimum in the histogram, so that the histogram
- *          slope is small at the split point.
- *      (4) We normalize the score so that if the two distributions
- *          were of equal size and at opposite ends of the numa, the
- *          score would be 1.0.
- * 
- */ -l_ok -numaSplitDistribution(NUMA *na, - l_float32 scorefract, - l_int32 *psplitindex, - l_float32 *pave1, - l_float32 *pave2, - l_float32 *pnum1, - l_float32 *pnum2, - NUMA **pnascore) -{ -l_int32 i, n, bestsplit, minrange, maxrange, maxindex; -l_float32 ave1, ave2, ave1prev, ave2prev; -l_float32 num1, num2, num1prev, num2prev; -l_float32 val, minval, sum, fract1; -l_float32 norm, score, minscore, maxscore; -NUMA *nascore, *naave1, *naave2, *nanum1, *nanum2; - - PROCNAME("numaSplitDistribution"); - - if (psplitindex) *psplitindex = 0; - if (pave1) *pave1 = 0.0; - if (pave2) *pave2 = 0.0; - if (pnum1) *pnum1 = 0.0; - if (pnum2) *pnum2 = 0.0; - if (pnascore) *pnascore = NULL; - if (!na) - return ERROR_INT("na not defined", procName, 1); - - n = numaGetCount(na); - if (n <= 1) - return ERROR_INT("n = 1 in histogram", procName, 1); - numaGetSum(na, &sum); - if (sum <= 0.0) - return ERROR_INT("sum <= 0.0", procName, 1); - norm = 4.0 / ((l_float32)(n - 1) * (n - 1)); - ave1prev = 0.0; - numaGetHistogramStats(na, 0.0, 1.0, &ave2prev, NULL, NULL, NULL); - num1prev = 0.0; - num2prev = sum; - maxindex = n / 2; /* initialize with something */ - - /* Split the histogram with [0 ... i] in the lower part - * and [i+1 ... n-1] in upper part. First, compute an otsu - * score for each possible splitting. */ - if ((nascore = numaCreate(n)) == NULL) - return ERROR_INT("nascore not made", procName, 1); - naave1 = (pave1) ? numaCreate(n) : NULL; - naave2 = (pave2) ? numaCreate(n) : NULL; - nanum1 = (pnum1) ? numaCreate(n) : NULL; - nanum2 = (pnum2) ? numaCreate(n) : NULL; - maxscore = 0.0; - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &val); - num1 = num1prev + val; - if (num1 == 0) - ave1 = ave1prev; - else - ave1 = (num1prev * ave1prev + i * val) / num1; - num2 = num2prev - val; - if (num2 == 0) - ave2 = ave2prev; - else - ave2 = (num2prev * ave2prev - i * val) / num2; - fract1 = num1 / sum; - score = norm * (fract1 * (1 - fract1)) * (ave2 - ave1) * (ave2 - ave1); - numaAddNumber(nascore, score); - if (pave1) numaAddNumber(naave1, ave1); - if (pave2) numaAddNumber(naave2, ave2); - if (pnum1) numaAddNumber(nanum1, num1); - if (pnum2) numaAddNumber(nanum2, num2); - if (score > maxscore) { - maxscore = score; - maxindex = i; - } - num1prev = num1; - num2prev = num2; - ave1prev = ave1; - ave2prev = ave2; - } - - /* Next, for all contiguous scores within a specified fraction - * of the max, choose the split point as the value with the - * minimum in the histogram. */ - minscore = (1. - scorefract) * maxscore; - for (i = maxindex - 1; i >= 0; i--) { - numaGetFValue(nascore, i, &val); - if (val < minscore) - break; - } - minrange = i + 1; - for (i = maxindex + 1; i < n; i++) { - numaGetFValue(nascore, i, &val); - if (val < minscore) - break; - } - maxrange = i - 1; - numaGetFValue(na, minrange, &minval); - bestsplit = minrange; - for (i = minrange + 1; i <= maxrange; i++) { - numaGetFValue(na, i, &val); - if (val < minval) { - minval = val; - bestsplit = i; - } - } - - /* Add one to the bestsplit value to get the threshold value, - * because when we take a threshold, as in pixThresholdToBinary(), - * we always choose the set with values below the threshold. */ - bestsplit = L_MIN(255, bestsplit + 1); - - if (psplitindex) *psplitindex = bestsplit; - if (pave1) numaGetFValue(naave1, bestsplit, pave1); - if (pave2) numaGetFValue(naave2, bestsplit, pave2); - if (pnum1) numaGetFValue(nanum1, bestsplit, pnum1); - if (pnum2) numaGetFValue(nanum2, bestsplit, pnum2); - - if (pnascore) { /* debug mode */ - lept_stderr("minrange = %d, maxrange = %d\n", minrange, maxrange); - lept_stderr("minval = %10.0f\n", minval); - gplotSimple1(nascore, GPLOT_PNG, "/tmp/lept/nascore", - "Score for split distribution"); - *pnascore = nascore; - } else { - numaDestroy(&nascore); - } - - if (pave1) numaDestroy(&naave1); - if (pave2) numaDestroy(&naave2); - if (pnum1) numaDestroy(&nanum1); - if (pnum2) numaDestroy(&nanum2); - return 0; -} - - -/*----------------------------------------------------------------------* - * Comparing histograms * - *----------------------------------------------------------------------*/ -/*! - * \brief grayHistogramsToEMD() - * - * \param[in] naa1, naa2 two numaa, each with one or more 256-element - * histograms - * \param[out] pnad nad of EM distances for each histogram - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *     (1) The two numaas must be the same size and have corresponding
- *         256-element histograms.  Pairs do not need to be normalized
- *         to the same sum.
- *     (2) This is typically used on two sets of histograms from
- *         corresponding tiles of two images.  The similarity of two
- *         images can be found with the scoring function used in
- *         pixCompareGrayByHisto():
- *             score S = 1.0 - k * D, where
- *                 k is a constant, say in the range 5-10
- *                 D = EMD
- *             for each tile; for multiple tiles, take the Min(S) over
- *             the set of tiles to be the final score.
- * 
- */ -l_ok -grayHistogramsToEMD(NUMAA *naa1, - NUMAA *naa2, - NUMA **pnad) -{ -l_int32 i, n, nt; -l_float32 dist; -NUMA *na1, *na2, *nad; - - PROCNAME("grayHistogramsToEMD"); - - if (!pnad) - return ERROR_INT("&nad not defined", procName, 1); - *pnad = NULL; - if (!naa1 || !naa2) - return ERROR_INT("na1 and na2 not both defined", procName, 1); - n = numaaGetCount(naa1); - if (n != numaaGetCount(naa2)) - return ERROR_INT("naa1 and naa2 numa counts differ", procName, 1); - nt = numaaGetNumberCount(naa1); - if (nt != numaaGetNumberCount(naa2)) - return ERROR_INT("naa1 and naa2 number counts differ", procName, 1); - if (256 * n != nt) /* good enough check */ - return ERROR_INT("na sizes must be 256", procName, 1); - - nad = numaCreate(n); - *pnad = nad; - for (i = 0; i < n; i++) { - na1 = numaaGetNuma(naa1, i, L_CLONE); - na2 = numaaGetNuma(naa2, i, L_CLONE); - numaEarthMoverDistance(na1, na2, &dist); - numaAddNumber(nad, dist / 255.); /* normalize to [0.0 - 1.0] */ - numaDestroy(&na1); - numaDestroy(&na2); - } - return 0; -} - - -/*! - * \brief numaEarthMoverDistance() - * - * \param[in] na1, na2 two numas of the same size, typically histograms - * \param[out] pdist earthmover distance - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *     (1) The two numas must have the same size.  They do not need to be
- *         normalized to the same sum before applying the function.
- *     (2) For a 1D discrete function, the implementation of the EMD
- *         is trivial.  Just keep filling or emptying buckets in one numa
- *         to match the amount in the other, moving sequentially along
- *         both arrays.
- *     (3) We divide the sum of the absolute value of everything moved
- *         (by 1 unit at a time) by the sum of the numa (amount of "earth")
- *         to get the average distance that the "earth" was moved.
- *         This is the value returned here.
- *     (4) The caller can do a further normalization, by the number of
- *         buckets (minus 1), to get the EM distance as a fraction of
- *         the maximum possible distance, which is n-1.  This fraction
- *         is 1.0 for the situation where all the 'earth' in the first
- *         array is at one end, and all in the second array is at the
- *         other end.
- * 
- */ -l_ok -numaEarthMoverDistance(NUMA *na1, - NUMA *na2, - l_float32 *pdist) -{ -l_int32 n, norm, i; -l_float32 sum1, sum2, diff, total; -l_float32 *array1, *array3; -NUMA *na3; - - PROCNAME("numaEarthMoverDistance"); - - if (!pdist) - return ERROR_INT("&dist not defined", procName, 1); - *pdist = 0.0; - if (!na1 || !na2) - return ERROR_INT("na1 and na2 not both defined", procName, 1); - n = numaGetCount(na1); - if (n != numaGetCount(na2)) - return ERROR_INT("na1 and na2 have different size", procName, 1); - - /* Generate na3; normalize to na1 if necessary */ - numaGetSum(na1, &sum1); - numaGetSum(na2, &sum2); - norm = (L_ABS(sum1 - sum2) < 0.00001 * L_ABS(sum1)) ? 1 : 0; - if (!norm) - na3 = numaTransform(na2, 0, sum1 / sum2); - else - na3 = numaCopy(na2); - array1 = numaGetFArray(na1, L_NOCOPY); - array3 = numaGetFArray(na3, L_NOCOPY); - - /* Move earth in n3 from array elements, to match n1 */ - total = 0; - for (i = 1; i < n; i++) { - diff = array1[i - 1] - array3[i - 1]; - array3[i] -= diff; - total += L_ABS(diff); - } - *pdist = total / sum1; - - numaDestroy(&na3); - return 0; -} - - -/*! - * \brief grayInterHistogramStats() - * - * \param[in] naa numaa with two or more 256-element histograms - * \param[in] wc half-width of the smoothing window - * \param[out] pnam [optional] mean values - * \param[out] pnams [optional] mean square values - * \param[out] pnav [optional] variances - * \param[out] pnarv [optional] rms deviations from the mean - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *     (1) The %naa has two or more 256-element numa histograms, which
- *         are to be compared value-wise at each of the 256 gray levels.
- *         The result are stats (mean, mean square, variance, root variance)
- *         aggregated across the set of histograms, and each is output
- *         as a 256 entry numa.  Think of these histograms as a matrix,
- *         where each histogram is one row of the array.  The stats are
- *         then aggregated column-wise, between the histograms.
- *     (2) These stats are:
- *            ~ average value:   (nam)
- *            ~ average squared value:  (nams)
- *            ~ variance: <(v - )*(v - )> =  - *  (nav)
- *            ~ square-root of variance: (narv)
- *         where the brackets < .. > indicate that the average value is
- *         to be taken over each column of the array.
- *     (3) The input histograms are optionally smoothed before these
- *         statistical operations.
- *     (4) The input histograms are normalized to a sum of 10000.  By
- *         doing this, the resulting numbers are independent of the
- *         number of samples used in building the individual histograms.
- *     (5) A typical application is on a set of histograms from tiles
- *         of an image, to distinguish between text/tables and photo
- *         regions.  If the tiles are much larger than the text line
- *         spacing, text/table regions typically have smaller variance
- *         across tiles than photo regions.  For this application, it
- *         may be useful to ignore values near white, which are large for
- *         text and would magnify the variance due to variations in
- *         illumination.  However, because the variance of a drawing or
- *         a light photo can be similar to that of grayscale text, this
- *         function is only a discriminator between darker photos/drawings
- *         and light photos/text/line-graphics.
- * 
- */ -l_ok -grayInterHistogramStats(NUMAA *naa, - l_int32 wc, - NUMA **pnam, - NUMA **pnams, - NUMA **pnav, - NUMA **pnarv) -{ -l_int32 i, j, n, nn; -l_float32 **arrays; -l_float32 mean, var, rvar; -NUMA *na1, *na2, *na3, *na4; - - PROCNAME("grayInterHistogramStats"); - - if (pnam) *pnam = NULL; - if (pnams) *pnams = NULL; - if (pnav) *pnav = NULL; - if (pnarv) *pnarv = NULL; - if (!pnam && !pnams && !pnav && !pnarv) - return ERROR_INT("nothing requested", procName, 1); - if (!naa) - return ERROR_INT("naa not defined", procName, 1); - n = numaaGetCount(naa); - for (i = 0; i < n; i++) { - nn = numaaGetNumaCount(naa, i); - if (nn != 256) { - L_ERROR("%d numbers in numa[%d]\n", procName, nn, i); - return 1; - } - } - - if (pnam) *pnam = numaCreate(256); - if (pnams) *pnams = numaCreate(256); - if (pnav) *pnav = numaCreate(256); - if (pnarv) *pnarv = numaCreate(256); - - /* First, use mean smoothing, normalize each histogram, - * and save all results in a 2D matrix. */ - arrays = (l_float32 **)LEPT_CALLOC(n, sizeof(l_float32 *)); - for (i = 0; i < n; i++) { - na1 = numaaGetNuma(naa, i, L_CLONE); - na2 = numaWindowedMean(na1, wc); - na3 = numaNormalizeHistogram(na2, 10000.); - arrays[i] = numaGetFArray(na3, L_COPY); - numaDestroy(&na1); - numaDestroy(&na2); - numaDestroy(&na3); - } - - /* Get stats between histograms */ - for (j = 0; j < 256; j++) { - na4 = numaCreate(n); - for (i = 0; i < n; i++) { - numaAddNumber(na4, arrays[i][j]); - } - numaSimpleStats(na4, 0, -1, &mean, &var, &rvar); - if (pnam) numaAddNumber(*pnam, mean); - if (pnams) numaAddNumber(*pnams, mean * mean); - if (pnav) numaAddNumber(*pnav, var); - if (pnarv) numaAddNumber(*pnarv, rvar); - numaDestroy(&na4); - } - - for (i = 0; i < n; i++) - LEPT_FREE(arrays[i]); - LEPT_FREE(arrays); - return 0; -} - - -/*----------------------------------------------------------------------* - * Extrema finding * - *----------------------------------------------------------------------*/ -/*! - * \brief numaFindPeaks() - * - * \param[in] nas source numa - * \param[in] nmax max number of peaks to be found - * \param[in] fract1 min fraction of peak value - * \param[in] fract2 min slope - * \return peak na, or NULL on error. - * - *
- * Notes:
- *     (1) The returned na consists of sets of four numbers representing
- *         the peak, in the following order:
- *            left edge; peak center; right edge; normalized peak area
- * 
- */ -NUMA * -numaFindPeaks(NUMA *nas, - l_int32 nmax, - l_float32 fract1, - l_float32 fract2) -{ -l_int32 i, k, n, maxloc, lloc, rloc; -l_float32 fmaxval, sum, total, newtotal, val, lastval; -l_float32 peakfract; -NUMA *na, *napeak; - - PROCNAME("numaFindPeaks"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - n = numaGetCount(nas); - numaGetSum(nas, &total); - - /* We munge this copy */ - if ((na = numaCopy(nas)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - if ((napeak = numaCreate(4 * nmax)) == NULL) { - numaDestroy(&na); - return (NUMA *)ERROR_PTR("napeak not made", procName, NULL); - } - - for (k = 0; k < nmax; k++) { - numaGetSum(na, &newtotal); - if (newtotal == 0.0) /* sanity check */ - break; - numaGetMax(na, &fmaxval, &maxloc); - sum = fmaxval; - lastval = fmaxval; - lloc = 0; - for (i = maxloc - 1; i >= 0; --i) { - numaGetFValue(na, i, &val); - if (val == 0.0) { - lloc = i + 1; - break; - } - if (val > fract1 * fmaxval) { - sum += val; - lastval = val; - continue; - } - if (lastval - val > fract2 * lastval) { - sum += val; - lastval = val; - continue; - } - lloc = i; - break; - } - lastval = fmaxval; - rloc = n - 1; - for (i = maxloc + 1; i < n; ++i) { - numaGetFValue(na, i, &val); - if (val == 0.0) { - rloc = i - 1; - break; - } - if (val > fract1 * fmaxval) { - sum += val; - lastval = val; - continue; - } - if (lastval - val > fract2 * lastval) { - sum += val; - lastval = val; - continue; - } - rloc = i; - break; - } - peakfract = sum / total; - numaAddNumber(napeak, lloc); - numaAddNumber(napeak, maxloc); - numaAddNumber(napeak, rloc); - numaAddNumber(napeak, peakfract); - - for (i = lloc; i <= rloc; i++) - numaSetValue(na, i, 0.0); - } - - numaDestroy(&na); - return napeak; -} - - -/*! - * \brief numaFindExtrema() - * - * \param[in] nas input values - * \param[in] delta relative amount to resolve peaks and valleys - * \param[out] pnav [optional] values of extrema - * \return nad (locations of extrema, or NULL on error - * - *
- * Notes:
- *      (1) This returns a sequence of extrema (peaks and valleys).
- *      (2) The algorithm is analogous to that for determining
- *          mountain peaks.  Suppose we have a local peak, with
- *          bumps on the side.  Under what conditions can we consider
- *          those 'bumps' to be actual peaks?  The answer: if the
- *          bump is separated from the peak by a saddle that is at
- *          least 500 feet below the bump.
- *      (3) Operationally, suppose we are trying to identify a peak.
- *          We have a previous valley, and also the largest value that
- *          we have seen since that valley.  We can identify this as
- *          a peak if we find a value that is delta BELOW it.  When
- *          we find such a value, label the peak, use the current
- *          value to label the starting point for the search for
- *          a valley, and do the same operation in reverse.  Namely,
- *          keep track of the lowest point seen, and look for a value
- *          that is delta ABOVE it.  Once found, the lowest point is
- *          labeled the valley, and continue, looking for the next peak.
- * 
- */ -NUMA * -numaFindExtrema(NUMA *nas, - l_float32 delta, - NUMA **pnav) -{ -l_int32 i, n, found, loc, direction; -l_float32 startval, val, maxval, minval; -NUMA *nav, *nad; - - PROCNAME("numaFindExtrema"); - - if (pnav) *pnav = NULL; - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (delta < 0.0) - return (NUMA *)ERROR_PTR("delta < 0", procName, NULL); - - n = numaGetCount(nas); - nad = numaCreate(0); - nav = NULL; - if (pnav) { - nav = numaCreate(0); - *pnav = nav; - } - - /* We don't know if we'll find a peak or valley first, - * but use the first element of nas as the reference point. - * Break when we deviate by 'delta' from the first point. */ - numaGetFValue(nas, 0, &startval); - found = FALSE; - for (i = 1; i < n; i++) { - numaGetFValue(nas, i, &val); - if (L_ABS(val - startval) >= delta) { - found = TRUE; - break; - } - } - - if (!found) - return nad; /* it's empty */ - - /* Are we looking for a peak or a valley? */ - if (val > startval) { /* peak */ - direction = 1; - maxval = val; - } else { - direction = -1; - minval = val; - } - loc = i; - - /* Sweep through the rest of the array, recording alternating - * peak/valley extrema. */ - for (i = i + 1; i < n; i++) { - numaGetFValue(nas, i, &val); - if (direction == 1 && val > maxval ) { /* new local max */ - maxval = val; - loc = i; - } else if (direction == -1 && val < minval ) { /* new local min */ - minval = val; - loc = i; - } else if (direction == 1 && (maxval - val >= delta)) { - numaAddNumber(nad, loc); /* save the current max location */ - if (nav) numaAddNumber(nav, maxval); - direction = -1; /* reverse: start looking for a min */ - minval = val; - loc = i; /* current min location */ - } else if (direction == -1 && (val - minval >= delta)) { - numaAddNumber(nad, loc); /* save the current min location */ - if (nav) numaAddNumber(nav, minval); - direction = 1; /* reverse: start looking for a max */ - maxval = val; - loc = i; /* current max location */ - } - } - - /* Save the final extremum */ -/* numaAddNumber(nad, loc); */ - return nad; -} - - -/*! - * \brief numaFindLocForThreshold() - * - * \param[in] nas input histogram - * \param[in] skip distance to skip to check for false min; 0 for default - * \param[out] pthresh threshold value - * \param[out] pfract [optional] fraction below or at threshold - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This finds a good place to set a threshold for a histogram
- *          of values that has two peaks.  The peaks can differ greatly
- *          in area underneath them.  The number of buckets in the
- *          histogram is expected to be 256 (e.g, from an 8 bpp gray image).
- *      (2) The input histogram should have been smoothed with a window
- *          to avoid false peak and valley detection due to noise.  For
- *          example, see pixThresholdByHisto().
- *      (3) A skip value can be input to determine the look-ahead distance
- *          to ignore a false peak on the descent from the first peak.
- *          Input 0 to use the default value (it assumes a histo size of 256).
- *      (4) Optionally, the fractional area under the first peak can
- *          be returned.
- * 
- */ -l_ok -numaFindLocForThreshold(NUMA *na, - l_int32 skip, - l_int32 *pthresh, - l_float32 *pfract) -{ -l_int32 i, n, start, found, index, minloc; -l_float32 val, pval, startval, jval, minval, sum, partsum; -l_float32 *fa; - - PROCNAME("numaFindLocForThreshold"); - - if (pfract) *pfract = 0.0; - if (!pthresh) - return ERROR_INT("&thresh not defined", procName, 1); - *pthresh = 0; - if (!na) - return ERROR_INT("na not defined", procName, 1); - if (skip <= 0) skip = 20; - - /* Look for the top of the first peak */ - n = numaGetCount(na); - fa = numaGetFArray(na, L_NOCOPY); - pval = fa[0]; - for (i = 1; i < n; i++) { - val = fa[i]; - index = L_MIN(i + skip, n - 1); - jval = fa[index]; - if (val < pval && jval < pval) /* near the top if not there */ - break; - pval = val; - } - - /* Look for the low point in the valley */ - start = i; - pval = fa[start]; - found = FALSE; /* signal for passing the min between peaks */ - for (i = start + 1; i < n; i++) { - val = fa[i]; - if (val <= pval) { /* going down */ - pval = val; - } else { /* going up */ - index = L_MIN(i + skip, n - 1); - jval = fa[index]; /* junp ahead 20 */ - if (val > jval) { /* still going down; jump ahead */ - pval = jval; - i = index; - } else { /* really going up; passed the min */ - found = TRUE; - break; - } - } - } - - /* Find the location of the minimum in the interval */ - minloc = index; /* likely passed the min; look backward */ - minval = fa[index]; - for (i = index - 1; i > index - skip; i--) { - if (fa[i] < minval) { - minval = fa[i]; - minloc = i; - } - } - *pthresh = minloc; - - /* Find the fraction under the first peak */ - if (pfract) { - numaGetSumOnInterval(na, 0, minloc, &partsum); - numaGetSum(na, &sum); - if (sum > 0.0) - *pfract = partsum / sum; - } - return 0; -} - - -/*! - * \brief numaCountReversals() - * - * \param[in] nas input values - * \param[in] minreversal relative amount to resolve peaks and valleys - * \param[out] pnr [optional] number of reversals - * \param[out] prd [optional] reversal density: reversals/length - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The input numa is can be generated from pixExtractAlongLine().
- *          If so, the x parameters can be used to find the reversal
- *          frequency along a line.
- *      (2) If the input numa was generated from a 1 bpp pix, the
- *          values will be 0 and 1.  Use %minreversal == 1 to get
- *          the number of pixel flips.  If the only values are 0 and 1,
- *          but %minreversal > 1, set the reversal count to 0 and
- *          issue a warning.
- * 
- */ -l_ok -numaCountReversals(NUMA *nas, - l_float32 minreversal, - l_int32 *pnr, - l_float32 *prd) -{ -l_int32 i, n, nr, ival, binvals; -l_int32 *ia; -l_float32 fval, delx, len; -NUMA *nat; - - PROCNAME("numaCountReversals"); - - if (pnr) *pnr = 0; - if (prd) *prd = 0.0; - if (!pnr && !prd) - return ERROR_INT("neither &nr nor &rd are defined", procName, 1); - if (!nas) - return ERROR_INT("nas not defined", procName, 1); - if ((n = numaGetCount(nas)) == 0) { - L_INFO("nas is empty\n", procName); - return 0; - } - if (minreversal < 0.0) - return ERROR_INT("minreversal < 0", procName, 1); - - /* Decide if the only values are 0 and 1 */ - binvals = TRUE; - for (i = 0; i < n; i++) { - numaGetFValue(nas, i, &fval); - if (fval != 0.0 && fval != 1.0) { - binvals = FALSE; - break; - } - } - - nr = 0; - if (binvals) { - if (minreversal > 1.0) { - L_WARNING("binary values but minreversal > 1\n", procName); - } else { - ia = numaGetIArray(nas); - ival = ia[0]; - for (i = 1; i < n; i++) { - if (ia[i] != ival) { - nr++; - ival = ia[i]; - } - } - LEPT_FREE(ia); - } - } else { - nat = numaFindExtrema(nas, minreversal, NULL); - nr = numaGetCount(nat); - numaDestroy(&nat); - } - if (pnr) *pnr = nr; - if (prd) { - numaGetParameters(nas, NULL, &delx); - len = delx * n; - *prd = (l_float32)nr / len; - } - - return 0; -} - - -/*----------------------------------------------------------------------* - * Threshold crossings and frequency analysis * - *----------------------------------------------------------------------*/ -/*! - * \brief numaSelectCrossingThreshold() - * - * \param[in] nax [optional] numa of abscissa values; can be NULL - * \param[in] nay signal - * \param[in] estthresh estimated pixel threshold for crossing: - * e.g., for images, white <--> black; typ. ~120 - * \param[out] pbestthresh robust estimate of threshold to use - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *     (1) When a valid threshold is used, the number of crossings is
- *         a maximum, because none are missed.  If no threshold intersects
- *         all the crossings, the crossings must be determined with
- *         numaCrossingsByPeaks().
- *     (2) %estthresh is an input estimate of the threshold that should
- *         be used.  We compute the crossings with 41 thresholds
- *         (20 below and 20 above).  There is a range in which the
- *         number of crossings is a maximum.  Return a threshold
- *         in the center of this stable plateau of crossings.
- *         This can then be used with numaCrossingsByThreshold()
- *         to get a good estimate of crossing locations.
- * 
- */ -l_ok -numaSelectCrossingThreshold(NUMA *nax, - NUMA *nay, - l_float32 estthresh, - l_float32 *pbestthresh) -{ -l_int32 i, inrun, istart, iend, maxstart, maxend, runlen, maxrunlen; -l_int32 val, maxval, nmax, count; -l_float32 thresh, fmaxval, fmodeval; -NUMA *nat, *nac; - - PROCNAME("numaSelectCrossingThreshold"); - - if (!pbestthresh) - return ERROR_INT("&bestthresh not defined", procName, 1); - *pbestthresh = 0.0; - if (!nay) - return ERROR_INT("nay not defined", procName, 1); - - /* Compute the number of crossings for different thresholds */ - nat = numaCreate(41); - for (i = 0; i < 41; i++) { - thresh = estthresh - 80.0 + 4.0 * i; - nac = numaCrossingsByThreshold(nax, nay, thresh); - numaAddNumber(nat, numaGetCount(nac)); - numaDestroy(&nac); - } - - /* Find the center of the plateau of max crossings, which - * extends from thresh[istart] to thresh[iend]. */ - numaGetMax(nat, &fmaxval, NULL); - maxval = (l_int32)fmaxval; - nmax = 0; - for (i = 0; i < 41; i++) { - numaGetIValue(nat, i, &val); - if (val == maxval) - nmax++; - } - if (nmax < 3) { /* likely accidental max; try the mode */ - numaGetMode(nat, &fmodeval, &count); - if (count > nmax && fmodeval > 0.5 * fmaxval) - maxval = (l_int32)fmodeval; /* use the mode */ - } - - inrun = FALSE; - iend = 40; - maxrunlen = 0, maxstart = 0, maxend = 0; - for (i = 0; i < 41; i++) { - numaGetIValue(nat, i, &val); - if (val == maxval) { - if (!inrun) { - istart = i; - inrun = TRUE; - } - continue; - } - if (inrun && (val != maxval)) { - iend = i - 1; - runlen = iend - istart + 1; - inrun = FALSE; - if (runlen > maxrunlen) { - maxstart = istart; - maxend = iend; - maxrunlen = runlen; - } - } - } - if (inrun) { - runlen = i - istart; - if (runlen > maxrunlen) { - maxstart = istart; - maxend = i - 1; - maxrunlen = runlen; - } - } - - *pbestthresh = estthresh - 80.0 + 2.0 * (l_float32)(maxstart + maxend); - -#if DEBUG_CROSSINGS - lept_stderr("\nCrossings attain a maximum at %d thresholds, between:\n" - " thresh[%d] = %5.1f and thresh[%d] = %5.1f\n", - nmax, maxstart, estthresh - 80.0 + 4.0 * maxstart, - maxend, estthresh - 80.0 + 4.0 * maxend); - lept_stderr("The best choice: %5.1f\n", *pbestthresh); - lept_stderr("Number of crossings at the 41 thresholds:"); - numaWriteStderr(nat); -#endif /* DEBUG_CROSSINGS */ - - numaDestroy(&nat); - return 0; -} - - -/*! - * \brief numaCrossingsByThreshold() - * - * \param[in] nax [optional] numa of abscissa values; can be NULL - * \param[in] nay numa of ordinate values, corresponding to nax - * \param[in] thresh threshold value for nay - * \return nad abscissa pts at threshold, or NULL on error - * - *
- * Notes:
- *      (1) If nax == NULL, we use startx and delx from nay to compute
- *          the crossing values in nad.
- * 
- */ -NUMA * -numaCrossingsByThreshold(NUMA *nax, - NUMA *nay, - l_float32 thresh) -{ -l_int32 i, n; -l_float32 startx, delx; -l_float32 xval1, xval2, yval1, yval2, delta1, delta2, crossval, fract; -NUMA *nad; - - PROCNAME("numaCrossingsByThreshold"); - - if (!nay) - return (NUMA *)ERROR_PTR("nay not defined", procName, NULL); - n = numaGetCount(nay); - - if (nax && (numaGetCount(nax) != n)) - return (NUMA *)ERROR_PTR("nax and nay sizes differ", procName, NULL); - - nad = numaCreate(0); - numaGetFValue(nay, 0, &yval1); - numaGetParameters(nay, &startx, &delx); - if (nax) - numaGetFValue(nax, 0, &xval1); - else - xval1 = startx; - for (i = 1; i < n; i++) { - numaGetFValue(nay, i, &yval2); - if (nax) - numaGetFValue(nax, i, &xval2); - else - xval2 = startx + i * delx; - delta1 = yval1 - thresh; - delta2 = yval2 - thresh; - if (delta1 == 0.0) { - numaAddNumber(nad, xval1); - } else if (delta2 == 0.0) { - numaAddNumber(nad, xval2); - } else if (delta1 * delta2 < 0.0) { /* crossing */ - fract = L_ABS(delta1) / L_ABS(yval1 - yval2); - crossval = xval1 + fract * (xval2 - xval1); - numaAddNumber(nad, crossval); - } - xval1 = xval2; - yval1 = yval2; - } - - return nad; -} - - -/*! - * \brief numaCrossingsByPeaks() - * - * \param[in] nax [optional] numa of abscissa values - * \param[in] nay numa of ordinate values, corresponding to nax - * \param[in] delta parameter used to identify when a new peak can be found - * \return nad abscissa pts at threshold, or NULL on error - * - *
- * Notes:
- *      (1) If nax == NULL, we use startx and delx from nay to compute
- *          the crossing values in nad.
- * 
- */ -NUMA * -numaCrossingsByPeaks(NUMA *nax, - NUMA *nay, - l_float32 delta) -{ -l_int32 i, j, n, np, previndex, curindex; -l_float32 startx, delx; -l_float32 xval1, xval2, yval1, yval2, delta1, delta2; -l_float32 prevval, curval, thresh, crossval, fract; -NUMA *nap, *nad; - - PROCNAME("numaCrossingsByPeaks"); - - if (!nay) - return (NUMA *)ERROR_PTR("nay not defined", procName, NULL); - - n = numaGetCount(nay); - if (nax && (numaGetCount(nax) != n)) - return (NUMA *)ERROR_PTR("nax and nay sizes differ", procName, NULL); - - /* Find the extrema. Also add last point in nay to get - * the last transition (from the last peak to the end). - * The number of crossings is 1 more than the number of extrema. */ - nap = numaFindExtrema(nay, delta, NULL); - numaAddNumber(nap, n - 1); - np = numaGetCount(nap); - L_INFO("Number of crossings: %d\n", procName, np); - - /* Do all computation in index units of nax or the delx of nay */ - nad = numaCreate(np); /* output crossing locations, in nax units */ - previndex = 0; /* prime the search with 1st point */ - numaGetFValue(nay, 0, &prevval); /* prime the search with 1st point */ - numaGetParameters(nay, &startx, &delx); - for (i = 0; i < np; i++) { - numaGetIValue(nap, i, &curindex); - numaGetFValue(nay, curindex, &curval); - thresh = (prevval + curval) / 2.0; - if (nax) - numaGetFValue(nax, previndex, &xval1); - else - xval1 = startx + previndex * delx; - numaGetFValue(nay, previndex, &yval1); - for (j = previndex + 1; j <= curindex; j++) { - if (nax) - numaGetFValue(nax, j, &xval2); - else - xval2 = startx + j * delx; - numaGetFValue(nay, j, &yval2); - delta1 = yval1 - thresh; - delta2 = yval2 - thresh; - if (delta1 == 0.0) { - numaAddNumber(nad, xval1); - break; - } else if (delta2 == 0.0) { - numaAddNumber(nad, xval2); - break; - } else if (delta1 * delta2 < 0.0) { /* crossing */ - fract = L_ABS(delta1) / L_ABS(yval1 - yval2); - crossval = xval1 + fract * (xval2 - xval1); - numaAddNumber(nad, crossval); - break; - } - xval1 = xval2; - yval1 = yval2; - } - previndex = curindex; - prevval = curval; - } - - numaDestroy(&nap); - return nad; -} - - -/*! - * \brief numaEvalBestHaarParameters() - * - * \param[in] nas numa of non-negative signal values - * \param[in] relweight relative weight of (-1 comb) / (+1 comb) - * contributions to the 'convolution'. In effect, - * the convolution kernel is a comb consisting of - * alternating +1 and -weight. - * \param[in] nwidth number of widths to consider - * \param[in] nshift number of shifts to consider for each width - * \param[in] minwidth smallest width to consider - * \param[in] maxwidth largest width to consider - * \param[out] pbestwidth width giving largest score - * \param[out] pbestshift shift giving largest score - * \param[out] pbestscore [optional] convolution with "Haar"-like comb - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This does a linear sweep of widths, evaluating at %nshift
- *          shifts for each width, computing the score from a convolution
- *          with a long comb, and finding the (width, shift) pair that
- *          gives the maximum score.  The best width is the "half-wavelength"
- *          of the signal.
- *      (2) The convolving function is a comb of alternating values
- *          +1 and -1 * relweight, separated by the width and phased by
- *          the shift.  This is similar to a Haar transform, except
- *          there the convolution is performed with a square wave.
- *      (3) The function is useful for finding the line spacing
- *          and strength of line signal from pixel sum projections.
- *      (4) The score is normalized to the size of nas divided by
- *          the number of half-widths.  For image applications, the input is
- *          typically an array of pixel projections, so one should
- *          normalize by dividing the score by the image width in the
- *          pixel projection direction.
- * 
- */ -l_ok -numaEvalBestHaarParameters(NUMA *nas, - l_float32 relweight, - l_int32 nwidth, - l_int32 nshift, - l_float32 minwidth, - l_float32 maxwidth, - l_float32 *pbestwidth, - l_float32 *pbestshift, - l_float32 *pbestscore) -{ -l_int32 i, j; -l_float32 delwidth, delshift, width, shift, score; -l_float32 bestwidth, bestshift, bestscore; - - PROCNAME("numaEvalBestHaarParameters"); - - if (pbestscore) *pbestscore = 0.0; - if (pbestwidth) *pbestwidth = 0.0; - if (pbestshift) *pbestshift = 0.0; - if (!pbestwidth || !pbestshift) - return ERROR_INT("&bestwidth and &bestshift not defined", procName, 1); - if (!nas) - return ERROR_INT("nas not defined", procName, 1); - - bestscore = bestwidth = bestshift = 0.0; - delwidth = (maxwidth - minwidth) / (nwidth - 1.0); - for (i = 0; i < nwidth; i++) { - width = minwidth + delwidth * i; - delshift = width / (l_float32)(nshift); - for (j = 0; j < nshift; j++) { - shift = j * delshift; - numaEvalHaarSum(nas, width, shift, relweight, &score); - if (score > bestscore) { - bestscore = score; - bestwidth = width; - bestshift = shift; -#if DEBUG_FREQUENCY - lept_stderr("width = %7.3f, shift = %7.3f, score = %7.3f\n", - width, shift, score); -#endif /* DEBUG_FREQUENCY */ - } - } - } - - *pbestwidth = bestwidth; - *pbestshift = bestshift; - if (pbestscore) - *pbestscore = bestscore; - return 0; -} - - -/*! - * \brief numaEvalHaarSum() - * - * \param[in] nas numa of non-negative signal values - * \param[in] width distance between +1 and -1 in convolution comb - * \param[in] shift phase of the comb: location of first +1 - * \param[in] relweight relative weight of (-1 comb) / (+1 comb) - * contributions to the 'convolution'. In effect, - * the convolution kernel is a comb consisting of - * alternating +1 and -weight. - * \param[out] pscore convolution with "Haar"-like comb - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This does a convolution with a comb of alternating values
- *          +1 and -relweight, separated by the width and phased by the shift.
- *          This is similar to a Haar transform, except that for Haar,
- *            (1) the convolution kernel is symmetric about 0, so the
- *                relweight is 1.0, and
- *            (2) the convolution is performed with a square wave.
- *      (2) The score is normalized to the size of nas divided by
- *          twice the "width".  For image applications, the input is
- *          typically an array of pixel projections, so one should
- *          normalize by dividing the score by the image width in the
- *          pixel projection direction.
- *      (3) To get a Haar-like result, use relweight = 1.0.  For detecting
- *          signals where you expect every other sample to be close to
- *          zero, as with barcodes or filtered text lines, you can
- *          use relweight > 1.0.
- * 
- */ -l_ok -numaEvalHaarSum(NUMA *nas, - l_float32 width, - l_float32 shift, - l_float32 relweight, - l_float32 *pscore) -{ -l_int32 i, n, nsamp, index; -l_float32 score, weight, val; - - PROCNAME("numaEvalHaarSum"); - - if (!pscore) - return ERROR_INT("&score not defined", procName, 1); - *pscore = 0.0; - if (!nas) - return ERROR_INT("nas not defined", procName, 1); - if ((n = numaGetCount(nas)) < 2 * width) - return ERROR_INT("nas size too small", procName, 1); - - score = 0.0; - nsamp = (l_int32)((n - shift) / width); - for (i = 0; i < nsamp; i++) { - index = (l_int32)(shift + i * width); - weight = (i % 2) ? 1.0 : -1.0 * relweight; - numaGetFValue(nas, index, &val); - score += weight * val; - } - - *pscore = 2.0 * width * score / (l_float32)n; - return 0; -} - - -/*----------------------------------------------------------------------* - * Generating numbers in a range under constraints * - *----------------------------------------------------------------------*/ -/*! - * \brief genConstrainedNumaInRange() - * - * \param[in] first first number to choose; >= 0 - * \param[in] last biggest possible number to reach; >= first - * \param[in] nmax maximum number of numbers to select; > 0 - * \param[in] use_pairs 1 = select pairs of adjacent numbers; - * 0 = select individual numbers - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *     (1) Selection is made uniformly in the range.  This can be used
- *         to select pages distributed as uniformly as possible
- *         through a book, where you are constrained to:
- *          ~ choose between [first, ... biggest],
- *          ~ choose no more than nmax numbers, and
- *         and you have the option of requiring pairs of adjacent numbers.
- * 
- */ -NUMA * -genConstrainedNumaInRange(l_int32 first, - l_int32 last, - l_int32 nmax, - l_int32 use_pairs) -{ -l_int32 i, nsets, val; -l_float32 delta; -NUMA *na; - - PROCNAME("genConstrainedNumaInRange"); - - first = L_MAX(0, first); - if (last < first) - return (NUMA *)ERROR_PTR("last < first!", procName, NULL); - if (nmax < 1) - return (NUMA *)ERROR_PTR("nmax < 1!", procName, NULL); - - nsets = L_MIN(nmax, last - first + 1); - if (use_pairs == 1) - nsets = nsets / 2; - if (nsets == 0) - return (NUMA *)ERROR_PTR("nsets == 0", procName, NULL); - - /* Select delta so that selection covers the full range if possible */ - if (nsets == 1) { - delta = 0.0; - } else { - if (use_pairs == 0) - delta = (l_float32)(last - first) / (nsets - 1); - else - delta = (l_float32)(last - first - 1) / (nsets - 1); - } - - na = numaCreate(nsets); - for (i = 0; i < nsets; i++) { - val = (l_int32)(first + i * delta + 0.5); - numaAddNumber(na, val); - if (use_pairs == 1) - numaAddNumber(na, val + 1); - } - - return na; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pageseg.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pageseg.c deleted file mode 100644 index 74fcd08f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pageseg.c +++ /dev/null @@ -1,2466 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pageseg.c - *
- *
- *      Top level page segmentation
- *          l_int32   pixGetRegionsBinary()
- *
- *      Halftone region extraction
- *          PIX      *pixGenHalftoneMask()    **Deprecated wrapper**
- *          PIX      *pixGenerateHalftoneMask()
-
- *
- *      Textline extraction
- *          PIX      *pixGenTextlineMask()
- *
- *      Textblock extraction
- *          PIX      *pixGenTextblockMask()
- *
- *      Location of page foreground
- *          PIX      *pixFindPageForeground()
- *
- *      Extraction of characters from image with only text
- *          l_int32   pixSplitIntoCharacters()
- *          BOXA     *pixSplitComponentWithProfile()
- *
- *      Extraction of lines of text
- *          PIXA     *pixExtractTextlines()
- *          PIXA     *pixExtractRawTextlines()
- *
- *      How many text columns
- *          l_int32   pixCountTextColumns()
- *
- *      Decision: text vs photo
- *          l_int32   pixDecideIfText()
- *          l_int32   pixFindThreshFgExtent()
- *
- *      Decision: table vs text
- *          l_int32   pixDecideIfTable()
- *          Pix      *pixPrepare1bpp()
- *
- *      Estimate the grayscale background value
- *          l_int32   pixEstimateBackground()
- *
- *      Largest white or black rectangles in an image
- *          l_int32   pixFindLargeRectangles()
- *          l_int32   pixFindLargestRectangle()
- *
- *      Generate rectangle inside connected component
- *          BOX      *pixFindRectangleInCC()
- *
- *      Automatic photoinvert for OCR
- *          PIX      *pixAutoPhotoinvert()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" -#include "math.h" - - /* These functions are not intended to work on very low-res images */ -static const l_int32 MinWidth = 100; -static const l_int32 MinHeight = 100; - -/*------------------------------------------------------------------* - * Top level page segmentation * - *------------------------------------------------------------------*/ -/*! - * \brief pixGetRegionsBinary() - * - * \param[in] pixs 1 bpp, assumed to be 300 to 400 ppi - * \param[out] ppixhm [optional] halftone mask - * \param[out] ppixtm [optional] textline mask - * \param[out] ppixtb [optional] textblock mask - * \param[in] pixadb input for collecting debug pix; use NULL to skip - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) It is best to deskew the image before segmenting.
- *      (2) Passing in %pixadb enables debug output.
- * 
- */ -l_ok -pixGetRegionsBinary(PIX *pixs, - PIX **ppixhm, - PIX **ppixtm, - PIX **ppixtb, - PIXA *pixadb) -{ -l_int32 w, h, htfound, tlfound; -PIX *pixr, *pix1, *pix2; -PIX *pixtext; /* text pixels only */ -PIX *pixhm2; /* halftone mask; 2x reduction */ -PIX *pixhm; /* halftone mask; */ -PIX *pixtm2; /* textline mask; 2x reduction */ -PIX *pixtm; /* textline mask */ -PIX *pixvws; /* vertical white space mask */ -PIX *pixtb2; /* textblock mask; 2x reduction */ -PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */ -PIX *pixtb; /* textblock mask */ - - PROCNAME("pixGetRegionsBinary"); - - if (ppixhm) *ppixhm = NULL; - if (ppixtm) *ppixtm = NULL; - if (ppixtb) *ppixtb = NULL; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs undefined or not 1 bpp", procName, 1); - pixGetDimensions(pixs, &w, &h, NULL); - if (w < MinWidth || h < MinHeight) { - L_ERROR("pix too small: w = %d, h = %d\n", procName, w, h); - return 1; - } - - /* 2x reduce, to 150 -200 ppi */ - pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); - if (pixadb) pixaAddPix(pixadb, pixr, L_COPY); - - /* Get the halftone mask */ - pixhm2 = pixGenerateHalftoneMask(pixr, &pixtext, &htfound, pixadb); - - /* Get the textline mask from the text pixels */ - pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, pixadb); - - /* Get the textblock mask from the textline mask */ - pixtb2 = pixGenTextblockMask(pixtm2, pixvws, pixadb); - pixDestroy(&pixr); - pixDestroy(&pixtext); - pixDestroy(&pixvws); - - /* Remove small components from the mask, where a small - * component is defined as one with both width and height < 60 */ - pixtbf2 = NULL; - if (pixtb2) { - pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER, - L_SELECT_IF_GTE, NULL); - pixDestroy(&pixtb2); - if (pixadb) pixaAddPix(pixadb, pixtbf2, L_COPY); - } - - /* Expand all masks to full resolution, and do filling or - * small dilations for better coverage. */ - pixhm = pixExpandReplicate(pixhm2, 2); - pix1 = pixSeedfillBinary(NULL, pixhm, pixs, 8); - pixOr(pixhm, pixhm, pix1); - pixDestroy(&pixhm2); - pixDestroy(&pix1); - if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY); - - pix1 = pixExpandReplicate(pixtm2, 2); - pixtm = pixDilateBrick(NULL, pix1, 3, 3); - pixDestroy(&pixtm2); - pixDestroy(&pix1); - if (pixadb) pixaAddPix(pixadb, pixtm, L_COPY); - - if (pixtbf2) { - pix1 = pixExpandReplicate(pixtbf2, 2); - pixtb = pixDilateBrick(NULL, pix1, 3, 3); - pixDestroy(&pixtbf2); - pixDestroy(&pix1); - if (pixadb) pixaAddPix(pixadb, pixtb, L_COPY); - } else { - pixtb = pixCreateTemplate(pixs); /* empty mask */ - } - - /* Debug: identify objects that are neither text nor halftone image */ - if (pixadb) { - pix1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */ - pix2 = pixSubtract(NULL, pix1, pixhm); /* remove halftone pixels */ - pixaAddPix(pixadb, pix2, L_INSERT); - pixDestroy(&pix1); - } - - /* Debug: display textline components with random colors */ - if (pixadb) { - l_int32 w, h; - BOXA *boxa; - PIXA *pixa; - boxa = pixConnComp(pixtm, &pixa, 8); - pixGetDimensions(pixtm, &w, &h, NULL); - pix1 = pixaDisplayRandomCmap(pixa, w, h); - pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); - pixaAddPix(pixadb, pix1, L_INSERT); - pixaDestroy(&pixa); - boxaDestroy(&boxa); - } - - /* Debug: identify the outlines of each textblock */ - if (pixadb) { - PIXCMAP *cmap; - PTAA *ptaa; - ptaa = pixGetOuterBordersPtaa(pixtb); - lept_mkdir("lept/pageseg"); - ptaaWriteDebug("/tmp/lept/pageseg/tb_outlines.ptaa", ptaa, 1); - pix1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1); - cmap = pixGetColormap(pix1); - pixcmapResetColor(cmap, 0, 130, 130, 130); - pixaAddPix(pixadb, pix1, L_INSERT); - ptaaDestroy(&ptaa); - } - - /* Debug: get b.b. for all mask components */ - if (pixadb) { - BOXA *bahm, *batm, *batb; - bahm = pixConnComp(pixhm, NULL, 4); - batm = pixConnComp(pixtm, NULL, 4); - batb = pixConnComp(pixtb, NULL, 4); - boxaWriteDebug("/tmp/lept/pageseg/htmask.boxa", bahm); - boxaWriteDebug("/tmp/lept/pageseg/textmask.boxa", batm); - boxaWriteDebug("/tmp/lept/pageseg/textblock.boxa", batb); - boxaDestroy(&bahm); - boxaDestroy(&batm); - boxaDestroy(&batb); - } - if (pixadb) { - pixaConvertToPdf(pixadb, 0, 1.0, 0, 0, "Debug page segmentation", - "/tmp/lept/pageseg/debug.pdf"); - L_INFO("Writing debug pdf to /tmp/lept/pageseg/debug.pdf\n", procName); - } - - if (ppixhm) - *ppixhm = pixhm; - else - pixDestroy(&pixhm); - if (ppixtm) - *ppixtm = pixtm; - else - pixDestroy(&pixtm); - if (ppixtb) - *ppixtb = pixtb; - else - pixDestroy(&pixtb); - - return 0; -} - - -/*------------------------------------------------------------------* - * Halftone region extraction * - *------------------------------------------------------------------*/ -/*! - * \brief pixGenHalftoneMask() - * - *
- * Deprecated:
- *   This wrapper avoids an ABI change with tesseract 3.0.4.
- *   It should be removed when we no longer need to support 3.0.4.
- *   The debug parameter is ignored (assumed 0).
- * 
- */ -PIX * -pixGenHalftoneMask(PIX *pixs, - PIX **ppixtext, - l_int32 *phtfound, - l_int32 debug) -{ - return pixGenerateHalftoneMask(pixs, ppixtext, phtfound, NULL); -} - - -/*! - * \brief pixGenerateHalftoneMask() - * - * \param[in] pixs 1 bpp, assumed to be 150 to 200 ppi - * \param[out] ppixtext [optional] text part of pixs - * \param[out] phtfound [optional] 1 if the mask is not empty - * \param[in] pixadb input for collecting debug pix; use NULL to skip - * \return pixd halftone mask, or NULL on error - * - *
- * Notes:
- *      (1) This is not intended to work on small thumbnails.  The
- *          dimensions of pixs must be at least MinWidth x MinHeight.
- * 
- */ -PIX * -pixGenerateHalftoneMask(PIX *pixs, - PIX **ppixtext, - l_int32 *phtfound, - PIXA *pixadb) -{ -l_int32 w, h, empty; -PIX *pix1, *pix2, *pixhs, *pixhm, *pixd; - - PROCNAME("pixGenerateHalftoneMask"); - - if (ppixtext) *ppixtext = NULL; - if (phtfound) *phtfound = 0; - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - if (w < MinWidth || h < MinHeight) { - L_ERROR("pix too small: w = %d, h = %d\n", procName, w, h); - return NULL; - } - - /* Compute seed for halftone parts at 8x reduction */ - pix1 = pixReduceRankBinaryCascade(pixs, 4, 4, 0, 0); - pix2 = pixOpenBrick(NULL, pix1, 5, 5); - pixhs = pixExpandReplicate(pix2, 4); /* back to 2x reduction */ - pixDestroy(&pix1); - pixDestroy(&pix2); - if (pixadb) pixaAddPix(pixadb, pixhs, L_COPY); - - /* Compute mask for connected regions */ - pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4); - if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY); - - /* Fill seed into mask to get halftone mask */ - pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4); - if (pixadb) pixaAddPix(pixadb, pixd, L_COPY); - -#if 0 - pixOpenBrick(pixd, pixd, 9, 9); -#endif - - /* Check if mask is empty */ - pixZero(pixd, &empty); - if (phtfound && !empty) - *phtfound = 1; - - /* Optionally, get all pixels that are not under the halftone mask */ - if (ppixtext) { - if (empty) - *ppixtext = pixCopy(NULL, pixs); - else - *ppixtext = pixSubtract(NULL, pixs, pixd); - if (pixadb) pixaAddPix(pixadb, *ppixtext, L_COPY); - } - - pixDestroy(&pixhs); - pixDestroy(&pixhm); - return pixd; -} - - -/*------------------------------------------------------------------* - * Textline extraction * - *------------------------------------------------------------------*/ -/*! - * \brief pixGenTextlineMask() - * - * \param[in] pixs 1 bpp, assumed to be 150 to 200 ppi - * \param[out] ppixvws vertical whitespace mask - * \param[out] ptlfound [optional] 1 if the mask is not empty - * \param[in] pixadb input for collecting debug pix; use NULL to skip - * \return pixd textline mask, or NULL on error - * - *
- * Notes:
- *      (1) The input pixs should be deskewed.
- *      (2) pixs should have no halftone pixels.
- *      (3) This is not intended to work on small thumbnails.  The
- *          dimensions of pixs must be at least MinWidth x MinHeight.
- *      (4) Both the input image and the returned textline mask
- *          are at the same resolution.
- * 
- */ -PIX * -pixGenTextlineMask(PIX *pixs, - PIX **ppixvws, - l_int32 *ptlfound, - PIXA *pixadb) -{ -l_int32 w, h, empty; -PIX *pix1, *pix2, *pixvws, *pixd; - - PROCNAME("pixGenTextlineMask"); - - if (ptlfound) *ptlfound = 0; - if (!ppixvws) - return (PIX *)ERROR_PTR("&pixvws not defined", procName, NULL); - *ppixvws = NULL; - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - if (w < MinWidth || h < MinHeight) { - L_ERROR("pix too small: w = %d, h = %d\n", procName, w, h); - return NULL; - } - - /* First we need a vertical whitespace mask. Invert the image. */ - pix1 = pixInvert(NULL, pixs); - - /* The whitespace mask will break textlines where there - * is a large amount of white space below or above. - * This can be prevented by identifying regions of the - * inverted image that have large horizontal extent (bigger than - * the separation between columns) and significant - * vertical extent (bigger than the separation between - * textlines), and subtracting this from the bg. */ - pix2 = pixMorphCompSequence(pix1, "o80.60", 0); - pixSubtract(pix1, pix1, pix2); - if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); - pixDestroy(&pix2); - - /* Identify vertical whitespace by opening the remaining bg. - * o5.1 removes thin vertical bg lines and o1.200 extracts - * long vertical bg lines. */ - pixvws = pixMorphCompSequence(pix1, "o5.1 + o1.200", 0); - *ppixvws = pixvws; - if (pixadb) pixaAddPix(pixadb, pixvws, L_COPY); - pixDestroy(&pix1); - - /* Three steps to getting text line mask: - * (1) close the characters and words in the textlines - * (2) open the vertical whitespace corridors back up - * (3) small opening to remove noise */ - pix1 = pixMorphSequence(pixs, "c30.1", 0); - if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); - pixd = pixSubtract(NULL, pix1, pixvws); - pixOpenBrick(pixd, pixd, 3, 3); - if (pixadb) pixaAddPix(pixadb, pixd, L_COPY); - pixDestroy(&pix1); - - /* Check if text line mask is empty */ - if (ptlfound) { - pixZero(pixd, &empty); - if (!empty) - *ptlfound = 1; - } - - return pixd; -} - - -/*------------------------------------------------------------------* - * Textblock extraction * - *------------------------------------------------------------------*/ -/*! - * \brief pixGenTextblockMask() - * - * \param[in] pixs 1 bpp, textline mask, assumed to be 150 to 200 ppi - * \param[in] pixvws vertical white space mask - * \param[in] pixadb input for collecting debug pix; use NULL to skip - * \return pixd textblock mask, or NULL if empty or on error - * - *
- * Notes:
- *      (1) Both the input masks (textline and vertical white space) and
- *          the returned textblock mask are at the same resolution.
- *      (2) This is not intended to work on small thumbnails.  The
- *          dimensions of pixs must be at least MinWidth x MinHeight.
- *      (3) The result is somewhat noisy, in that small "blocks" of
- *          text may be included.  These can be removed by post-processing,
- *          using, e.g.,
- *             pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER,
- *                             L_SELECT_IF_GTE, NULL);
- * 
- */ -PIX * -pixGenTextblockMask(PIX *pixs, - PIX *pixvws, - PIXA *pixadb) -{ -l_int32 w, h, empty; -PIX *pix1, *pix2, *pix3, *pixd; - - PROCNAME("pixGenTextblockMask"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - if (w < MinWidth || h < MinHeight) { - L_ERROR("pix too small: w = %d, h = %d\n", procName, w, h); - return NULL; - } - if (!pixvws) - return (PIX *)ERROR_PTR("pixvws not defined", procName, NULL); - - /* Join pixels vertically to make a textblock mask */ - pix1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0); - pixZero(pix1, &empty); - if (empty) { - pixDestroy(&pix1); - L_INFO("no fg pixels in textblock mask\n", procName); - return NULL; - } - if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); - - /* Solidify the textblock mask and remove noise: - * (1) For each cc, close the blocks and dilate slightly - * to form a solid mask. - * (2) Small horizontal closing between components. - * (3) Open the white space between columns, again. - * (4) Remove small components. */ - pix2 = pixMorphSequenceByComponent(pix1, "c30.30 + d3.3", 8, 0, 0, NULL); - pixCloseSafeBrick(pix2, pix2, 10, 1); - if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); - pix3 = pixSubtract(NULL, pix2, pixvws); - if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); - pixd = pixSelectBySize(pix3, 25, 5, 8, L_SELECT_IF_BOTH, - L_SELECT_IF_GTE, NULL); - if (pixadb) pixaAddPix(pixadb, pixd, L_COPY); - - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - return pixd; -} - - -/*------------------------------------------------------------------* - * Location of page foreground * - *------------------------------------------------------------------*/ -/*! - * \brief pixFindPageForeground() - * - * \param[in] pixs full resolution (any type or depth - * \param[in] threshold for binarization; typically about 128 - * \param[in] mindist min distance of text from border to allow - * cleaning near border; at 2x reduction, this - * should be larger than 50; typically about 70 - * \param[in] erasedist when conditions are satisfied, erase anything - * within this distance of the edge; - * typically 20-30 at 2x reduction - * \param[in] showmorph debug: set to a negative integer to show steps - * in generating masks; this is typically used - * for debugging region extraction - * \param[in] pixac debug: allocate outside and pass this in to - * accumulate results of each call to this function, - * which can be displayed in a mosaic or a pdf. - * \return box region including foreground, with some pixel noise - * removed, or NULL if not found - * - *
- * Notes:
- *      (1) This doesn't simply crop to the fg.  It attempts to remove
- *          pixel noise and junk at the edge of the image before cropping.
- *          The input %threshold is used if pixs is not 1 bpp.
- *      (2) This is not intended to work on small thumbnails.  The
- *          dimensions of pixs must be at least MinWidth x MinHeight.
- *      (3) Debug: set showmorph to display the intermediate image in
- *          the morphological operations on this page.
- *      (4) Debug: to get pdf output of results when called repeatedly,
- *          call with an existing pixac, which will add an image of this page,
- *          with the fg outlined.  If no foreground is found, there is
- *          no output for this page image.
- * 
- */ -BOX * -pixFindPageForeground(PIX *pixs, - l_int32 threshold, - l_int32 mindist, - l_int32 erasedist, - l_int32 showmorph, - PIXAC *pixac) -{ -l_int32 flag, nbox, intersects; -l_int32 w, h, bx, by, bw, bh, left, right, top, bottom; -PIX *pixb, *pixb2, *pixseed, *pixsf, *pixm, *pix1, *pixg2; -BOX *box, *boxfg, *boxin, *boxd; -BOXA *ba1, *ba2; - - PROCNAME("pixFindPageForeground"); - - if (!pixs) - return (BOX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - if (w < MinWidth || h < MinHeight) { - L_ERROR("pix too small: w = %d, h = %d\n", procName, w, h); - return NULL; - } - - /* Binarize, downscale by 0.5, remove the noise to generate a seed, - * and do a seedfill back from the seed into those 8-connected - * components of the binarized image for which there was at least - * one seed pixel. Also clear out any components that are within - * 10 pixels of the edge at 2x reduction. */ - flag = (showmorph) ? 100 : 0; - pixb = pixConvertTo1(pixs, threshold); - pixb2 = pixScale(pixb, 0.5, 0.5); - pixseed = pixMorphSequence(pixb2, "o1.2 + c9.9 + o3.3", flag); - pix1 = pixMorphSequence(pixb2, "o50.1", 0); - pixOr(pixseed, pixseed, pix1); - pixDestroy(&pix1); - pix1 = pixMorphSequence(pixb2, "o1.50", 0); - pixOr(pixseed, pixseed, pix1); - pixDestroy(&pix1); - pixsf = pixSeedfillBinary(NULL, pixseed, pixb2, 8); - pixSetOrClearBorder(pixsf, 10, 10, 10, 10, PIX_SET); - pixm = pixRemoveBorderConnComps(pixsf, 8); - - /* Now, where is the main block of text? We want to remove noise near - * the edge of the image, but to do that, we have to be convinced that - * (1) there is noise and (2) it is far enough from the text block - * and close enough to the edge. For each edge, if the block - * is more than mindist from that edge, then clean 'erasedist' - * pixels from the edge. */ - pix1 = pixMorphSequence(pixm, "c50.50", flag); - ba1 = pixConnComp(pix1, NULL, 8); - ba2 = boxaSort(ba1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL); - pixGetDimensions(pix1, &w, &h, NULL); - nbox = boxaGetCount(ba2); - if (nbox > 1) { - box = boxaGetBox(ba2, 0, L_CLONE); - boxGetGeometry(box, &bx, &by, &bw, &bh); - left = (bx > mindist) ? erasedist : 0; - right = (w - bx - bw > mindist) ? erasedist : 0; - top = (by > mindist) ? erasedist : 0; - bottom = (h - by - bh > mindist) ? erasedist : 0; - pixSetOrClearBorder(pixm, left, right, top, bottom, PIX_CLR); - boxDestroy(&box); - } - pixDestroy(&pix1); - boxaDestroy(&ba1); - boxaDestroy(&ba2); - - /* Locate the foreground region; don't bother cropping */ - pixClipToForeground(pixm, NULL, &boxfg); - - /* Sanity check the fg region. Make sure it's not confined - * to a thin boundary on the left and right sides of the image, - * in which case it is likely to be noise. */ - if (boxfg) { - boxin = boxCreate(0.1 * w, 0, 0.8 * w, h); - boxIntersects(boxfg, boxin, &intersects); - boxDestroy(&boxin); - if (!intersects) boxDestroy(&boxfg); - } - - boxd = NULL; - if (boxfg) { - boxAdjustSides(boxfg, boxfg, -2, 2, -2, 2); /* tiny expansion */ - boxd = boxTransform(boxfg, 0, 0, 2.0, 2.0); - - /* Save the debug image showing the box for this page */ - if (pixac) { - pixg2 = pixConvert1To4Cmap(pixb); - pixRenderBoxArb(pixg2, boxd, 3, 255, 0, 0); - pixacompAddPix(pixac, pixg2, IFF_DEFAULT); - pixDestroy(&pixg2); - } - } - - pixDestroy(&pixb); - pixDestroy(&pixb2); - pixDestroy(&pixseed); - pixDestroy(&pixsf); - pixDestroy(&pixm); - boxDestroy(&boxfg); - return boxd; -} - - -/*------------------------------------------------------------------* - * Extraction of characters from image with only text * - *------------------------------------------------------------------*/ -/*! - * \brief pixSplitIntoCharacters() - * - * \param[in] pixs 1 bpp, contains only deskewed text - * \param[in] minw min component width for initial filtering; typ. 4 - * \param[in] minh min component height for initial filtering; typ. 4 - * \param[out] pboxa [optional] character bounding boxes - * \param[out] ppixa [optional] character images - * \param[out] ppixdebug [optional] showing splittings - * - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is a simple function that attempts to find split points
- *          based on vertical pixel profiles.
- *      (2) It should be given an image that has an arbitrary number
- *          of text characters.
- *      (3) The returned pixa includes the boxes from which the
- *          (possibly split) components are extracted.
- * 
- */ -l_ok -pixSplitIntoCharacters(PIX *pixs, - l_int32 minw, - l_int32 minh, - BOXA **pboxa, - PIXA **ppixa, - PIX **ppixdebug) -{ -l_int32 ncomp, i, xoff, yoff; -BOXA *boxa1, *boxa2, *boxat1, *boxat2, *boxad; -BOXAA *baa; -PIX *pix, *pix1, *pix2, *pixdb; -PIXA *pixa1, *pixadb; - - PROCNAME("pixSplitIntoCharacters"); - - if (pboxa) *pboxa = NULL; - if (ppixa) *ppixa = NULL; - if (ppixdebug) *ppixdebug = NULL; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - - /* Remove the small stuff */ - pix1 = pixSelectBySize(pixs, minw, minh, 8, L_SELECT_IF_BOTH, - L_SELECT_IF_GT, NULL); - - /* Small vertical close for consolidation */ - pix2 = pixMorphSequence(pix1, "c1.10", 0); - pixDestroy(&pix1); - - /* Get the 8-connected components */ - boxa1 = pixConnComp(pix2, &pixa1, 8); - pixDestroy(&pix2); - boxaDestroy(&boxa1); - - /* Split the components if obvious */ - ncomp = pixaGetCount(pixa1); - boxa2 = boxaCreate(ncomp); - pixadb = (ppixdebug) ? pixaCreate(ncomp) : NULL; - for (i = 0; i < ncomp; i++) { - pix = pixaGetPix(pixa1, i, L_CLONE); - if (ppixdebug) { - boxat1 = pixSplitComponentWithProfile(pix, 10, 7, &pixdb); - if (pixdb) - pixaAddPix(pixadb, pixdb, L_INSERT); - } else { - boxat1 = pixSplitComponentWithProfile(pix, 10, 7, NULL); - } - pixaGetBoxGeometry(pixa1, i, &xoff, &yoff, NULL, NULL); - boxat2 = boxaTransform(boxat1, xoff, yoff, 1.0, 1.0); - boxaJoin(boxa2, boxat2, 0, -1); - pixDestroy(&pix); - boxaDestroy(&boxat1); - boxaDestroy(&boxat2); - } - pixaDestroy(&pixa1); - - /* Generate the debug image */ - if (ppixdebug) { - if (pixaGetCount(pixadb) > 0) { - *ppixdebug = pixaDisplayTiledInRows(pixadb, 32, 1500, - 1.0, 0, 20, 1); - } - pixaDestroy(&pixadb); - } - - /* Do a 2D sort on the bounding boxes, and flatten the result to 1D */ - baa = boxaSort2d(boxa2, NULL, 0, 0, 5); - boxad = boxaaFlattenToBoxa(baa, NULL, L_CLONE); - boxaaDestroy(&baa); - boxaDestroy(&boxa2); - - /* Optionally extract the pieces from the input image */ - if (ppixa) - *ppixa = pixClipRectangles(pixs, boxad); - if (pboxa) - *pboxa = boxad; - else - boxaDestroy(&boxad); - return 0; -} - - -/*! - * \brief pixSplitComponentWithProfile() - * - * \param[in] pixs 1 bpp, exactly one connected component - * \param[in] delta distance used in extrema finding in a numa; typ. 10 - * \param[in] mindel minimum required difference between profile - * minimum and profile values +2 and -2 away; typ. 7 - * \param[out] ppixdebug [optional] debug image of splitting - * \return boxa of c.c. after splitting, or NULL on error - * - *
- * Notes:
- *      (1) This will split the most obvious cases of touching characters.
- *          The split points it is searching for are narrow and deep
- *          minimima in the vertical pixel projection profile, after a
- *          large vertical closing has been applied to the component.
- * 
- */ -BOXA * -pixSplitComponentWithProfile(PIX *pixs, - l_int32 delta, - l_int32 mindel, - PIX **ppixdebug) -{ -l_int32 w, h, n2, i, firstmin, xmin, xshift; -l_int32 nmin, nleft, nright, nsplit, isplit, ncomp; -l_int32 *array1, *array2; -BOX *box; -BOXA *boxad; -NUMA *na1, *na2, *nasplit; -PIX *pix1, *pixdb; - - PROCNAME("pixSplitComponentsWithProfile"); - - if (ppixdebug) *ppixdebug = NULL; - if (!pixs || pixGetDepth(pixs) != 1) - return (BOXA *)ERROR_PTR("pixa undefined or not 1 bpp", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - - /* Closing to consolidate characters vertically */ - pix1 = pixCloseSafeBrick(NULL, pixs, 1, 100); - - /* Get extrema of column projections */ - boxad = boxaCreate(2); - na1 = pixCountPixelsByColumn(pix1); /* w elements */ - pixDestroy(&pix1); - na2 = numaFindExtrema(na1, delta, NULL); - n2 = numaGetCount(na2); - if (n2 < 3) { /* no split possible */ - box = boxCreate(0, 0, w, h); - boxaAddBox(boxad, box, L_INSERT); - numaDestroy(&na1); - numaDestroy(&na2); - return boxad; - } - - /* Look for sufficiently deep and narrow minima. - * All minima of of interest must be surrounded by max on each - * side. firstmin is the index of first possible minimum. */ - array1 = numaGetIArray(na1); - array2 = numaGetIArray(na2); - if (ppixdebug) numaWriteStderr(na2); - firstmin = (array1[array2[0]] > array1[array2[1]]) ? 1 : 2; - nasplit = numaCreate(n2); /* will hold split locations */ - for (i = firstmin; i < n2 - 1; i+= 2) { - xmin = array2[i]; - nmin = array1[xmin]; - if (xmin + 2 >= w) break; /* no more splits possible */ - nleft = array1[xmin - 2]; - nright = array1[xmin + 2]; - if (ppixdebug) { - lept_stderr( - "Splitting: xmin = %d, w = %d; nl = %d, nmin = %d, nr = %d\n", - xmin, w, nleft, nmin, nright); - } - if (nleft - nmin >= mindel && nright - nmin >= mindel) /* split */ - numaAddNumber(nasplit, xmin); - } - nsplit = numaGetCount(nasplit); - -#if 0 - if (ppixdebug && nsplit > 0) { - lept_mkdir("lept/split"); - gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/split/split", NULL); - } -#endif - - numaDestroy(&na1); - numaDestroy(&na2); - LEPT_FREE(array1); - LEPT_FREE(array2); - - if (nsplit == 0) { /* no splitting */ - numaDestroy(&nasplit); - box = boxCreate(0, 0, w, h); - boxaAddBox(boxad, box, L_INSERT); - return boxad; - } - - /* Use split points to generate b.b. after splitting */ - for (i = 0, xshift = 0; i < nsplit; i++) { - numaGetIValue(nasplit, i, &isplit); - box = boxCreate(xshift, 0, isplit - xshift, h); - boxaAddBox(boxad, box, L_INSERT); - xshift = isplit + 1; - } - box = boxCreate(xshift, 0, w - xshift, h); - boxaAddBox(boxad, box, L_INSERT); - numaDestroy(&nasplit); - - if (ppixdebug) { - pixdb = pixConvertTo32(pixs); - ncomp = boxaGetCount(boxad); - for (i = 0; i < ncomp; i++) { - box = boxaGetBox(boxad, i, L_CLONE); - pixRenderBoxBlend(pixdb, box, 1, 255, 0, 0, 0.5); - boxDestroy(&box); - } - *ppixdebug = pixdb; - } - - return boxad; -} - - -/*------------------------------------------------------------------* - * Extraction of lines of text * - *------------------------------------------------------------------*/ -/*! - * \brief pixExtractTextlines() - * - * \param[in] pixs any depth, assumed to have nearly horizontal text - * \param[in] maxw, maxh initial filtering: remove any components in pixs - * with components larger than maxw or maxh - * \param[in] minw, minh final filtering: remove extracted 'lines' - * with sizes smaller than minw or minh; use - * 0 for default. - * \param[in] adjw, adjh final adjustment of boxes representing each - * text line. If > 0, these increase the box - * size at each edge by this amount. - * \param[in] pixadb pixa for saving intermediate steps; NULL to omit - * \return pixa of textline images, including bounding boxes, or - * NULL on error - * - *
- * Notes:
- *      (1) This function assumes that textline fragments have sufficient
- *          vertical separation and small enough skew so that a
- *          horizontal dilation sufficient to join words will not join
- *          textlines.  It does not guarantee that horizontally adjacent
- *          textline fragments on the same line will be joined.
- *      (2) For images with multiple columns, it attempts to avoid joining
- *          textlines across the space between columns.  If that is not
- *          a concern, you can also use pixExtractRawTextlines(),
- *          which will join them with alacrity.
- *      (3) This first removes components from pixs that are either
- *          wide (> %maxw) or tall (> %maxh).
- *      (4) A final filtering operation removes small components, such
- *          that width < %minw or height < %minh.
- *      (5) For reasonable accuracy, the resolution of pixs should be
- *          at least 100 ppi.  For reasonable efficiency, the resolution
- *          should not exceed 600 ppi.
- *      (6) This can be used to determine if some region of a scanned
- *          image is horizontal text.
- *      (7) As an example, for a pix with resolution 300 ppi, a reasonable
- *          set of parameters is:
- *             pixExtractTextlines(pix, 150, 150, 36, 20, 5, 5, NULL);
- *          The defaults minw and minh for 300 ppi are about 36 and 20,
- *          so the same result is obtained with:
- *             pixExtractTextlines(pix, 150, 150, 0, 0, 5, 5, NULL);
- *      (8) The output pixa is composed of subimages, one for each textline,
- *          and the boxa in the pixa tells where in %pixs each textline goes.
- * 
- */ -PIXA * -pixExtractTextlines(PIX *pixs, - l_int32 maxw, - l_int32 maxh, - l_int32 minw, - l_int32 minh, - l_int32 adjw, - l_int32 adjh, - PIXA *pixadb) -{ -char buf[64]; -l_int32 res, csize, empty; -BOXA *boxa1, *boxa2, *boxa3; -PIX *pix1, *pix2, *pix3; -PIXA *pixa1, *pixa2, *pixa3; - - PROCNAME("pixExtractTextlines"); - - if (!pixs) - return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); - - /* Binarize carefully, if necessary */ - if (pixGetDepth(pixs) > 1) { - pix2 = pixConvertTo8(pixs, FALSE); - pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190); - pix1 = pixThresholdToBinary(pix3, 150); - pixDestroy(&pix2); - pixDestroy(&pix3); - } else { - pix1 = pixClone(pixs); - } - pixZero(pix1, &empty); - if (empty) { - pixDestroy(&pix1); - L_INFO("no fg pixels in input image\n", procName); - return NULL; - } - if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); - - /* Remove any very tall or very wide connected components */ - pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH, - L_SELECT_IF_LT, NULL); - if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); - pixDestroy(&pix1); - - /* Filter to solidify the text lines within the x-height region. - * The closing (csize) bridges gaps between words. The opening - * removes isolated bridges between textlines. */ - if ((res = pixGetXRes(pixs)) == 0) { - L_INFO("Resolution is not set: setting to 300 ppi\n", procName); - res = 300; - } - csize = L_MIN(120., 60.0 * res / 300.0); - snprintf(buf, sizeof(buf), "c%d.1 + o%d.1", csize, csize / 3); - pix3 = pixMorphCompSequence(pix2, buf, 0); - if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); - - /* Extract the connected components. These should be dilated lines */ - boxa1 = pixConnComp(pix3, &pixa1, 4); - if (pixadb) { - pix1 = pixaDisplayRandomCmap(pixa1, 0, 0); - pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); - pixaAddPix(pixadb, pix1, L_INSERT); - } - - /* Set minw, minh if default is requested */ - minw = (minw != 0) ? minw : (l_int32)(0.12 * res); - minh = (minh != 0) ? minh : (l_int32)(0.07 * res); - - /* Remove line components that are too small */ - pixa2 = pixaSelectBySize(pixa1, minw, minh, L_SELECT_IF_BOTH, - L_SELECT_IF_GTE, NULL); - if (pixadb) { - pix1 = pixaDisplayRandomCmap(pixa2, 0, 0); - pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); - pixaAddPix(pixadb, pix1, L_INSERT); - pix1 = pixConvertTo32(pix2); - pixRenderBoxaArb(pix1, pixa2->boxa, 2, 255, 0, 0); - pixaAddPix(pixadb, pix1, L_INSERT); - } - - /* Selectively AND with the version before dilation, and save */ - boxa2 = pixaGetBoxa(pixa2, L_CLONE); - boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh); - pixa3 = pixClipRectangles(pix2, boxa3); - if (pixadb) { - pix1 = pixaDisplayRandomCmap(pixa3, 0, 0); - pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); - pixaAddPix(pixadb, pix1, L_INSERT); - } - - pixDestroy(&pix2); - pixDestroy(&pix3); - pixaDestroy(&pixa1); - pixaDestroy(&pixa2); - boxaDestroy(&boxa1); - boxaDestroy(&boxa2); - boxaDestroy(&boxa3); - return pixa3; -} - - -/*! - * \brief pixExtractRawTextlines() - * - * \param[in] pixs any depth, assumed to have nearly horizontal text - * \param[in] maxw, maxh initial filtering: remove any components in pixs - * with components larger than maxw or maxh; - * use 0 for default values. - * \param[in] adjw, adjh final adjustment of boxes representing each - * text line. If > 0, these increase the box - * size at each edge by this amount. - * \param[in] pixadb pixa for saving intermediate steps; NULL to omit - * \return pixa of textline images, including bounding boxes, or - * NULL on error - * - *
- * Notes:
- *      (1) This function assumes that textlines have sufficient
- *          vertical separation and small enough skew so that a
- *          horizontal dilation sufficient to join words will not join
- *          textlines.  It aggressively joins textlines across multiple
- *          columns, so if that is not desired, you must either (a) make
- *          sure that %pixs is a single column of text or (b) use instead
- *          pixExtractTextlines(), which is more conservative
- *          about joining text fragments that have vertical overlap.
- *      (2) This first removes components from pixs that are either
- *          very wide (> %maxw) or very tall (> %maxh).
- *      (3) For reasonable accuracy, the resolution of pixs should be
- *          at least 100 ppi.  For reasonable efficiency, the resolution
- *          should not exceed 600 ppi.
- *      (4) This can be used to determine if some region of a scanned
- *          image is horizontal text.
- *      (5) As an example, for a pix with resolution 300 ppi, a reasonable
- *          set of parameters is:
- *             pixExtractRawTextlines(pix, 150, 150, 0, 0, NULL);
- *      (6) The output pixa is composed of subimages, one for each textline,
- *          and the boxa in the pixa tells where in %pixs each textline goes.
- * 
- */ -PIXA * -pixExtractRawTextlines(PIX *pixs, - l_int32 maxw, - l_int32 maxh, - l_int32 adjw, - l_int32 adjh, - PIXA *pixadb) -{ -char buf[64]; -l_int32 res, csize, empty; -BOXA *boxa1, *boxa2, *boxa3; -BOXAA *baa1; -PIX *pix1, *pix2, *pix3; -PIXA *pixa1, *pixa2; - - PROCNAME("pixExtractRawTextlines"); - - if (!pixs) - return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); - - /* Set maxw, maxh if default is requested */ - if ((res = pixGetXRes(pixs)) == 0) { - L_INFO("Resolution is not set: setting to 300 ppi\n", procName); - res = 300; - } - maxw = (maxw != 0) ? maxw : (l_int32)(0.5 * res); - maxh = (maxh != 0) ? maxh : (l_int32)(0.5 * res); - - /* Binarize carefully, if necessary */ - if (pixGetDepth(pixs) > 1) { - pix2 = pixConvertTo8(pixs, FALSE); - pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190); - pix1 = pixThresholdToBinary(pix3, 150); - pixDestroy(&pix2); - pixDestroy(&pix3); - } else { - pix1 = pixClone(pixs); - } - pixZero(pix1, &empty); - if (empty) { - pixDestroy(&pix1); - L_INFO("no fg pixels in input image\n", procName); - return NULL; - } - if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); - - /* Remove any very tall or very wide connected components */ - pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH, - L_SELECT_IF_LT, NULL); - if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); - pixDestroy(&pix1); - - /* Filter to solidify the text lines within the x-height region. - * The closing (csize) bridges gaps between words. */ - csize = L_MIN(120., 60.0 * res / 300.0); - snprintf(buf, sizeof(buf), "c%d.1", csize); - pix3 = pixMorphCompSequence(pix2, buf, 0); - if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); - - /* Extract the connected components. These should be dilated lines */ - boxa1 = pixConnComp(pix3, &pixa1, 4); - if (pixadb) { - pix1 = pixaDisplayRandomCmap(pixa1, 0, 0); - pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); - pixaAddPix(pixadb, pix1, L_INSERT); - } - - /* Do a 2-d sort, and generate a bounding box for each set of text - * line segments that is aligned horizontally (i.e., has vertical - * overlap) into a box representing a single text line. */ - baa1 = boxaSort2d(boxa1, NULL, -1, -1, 5); - boxaaGetExtent(baa1, NULL, NULL, NULL, &boxa2); - if (pixadb) { - pix1 = pixConvertTo32(pix2); - pixRenderBoxaArb(pix1, boxa2, 2, 255, 0, 0); - pixaAddPix(pixadb, pix1, L_INSERT); - } - - /* Optionally adjust the sides of each text line box, and then - * use the boxes to generate a pixa of the text lines. */ - boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh); - pixa2 = pixClipRectangles(pix2, boxa3); - if (pixadb) { - pix1 = pixaDisplayRandomCmap(pixa2, 0, 0); - pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); - pixaAddPix(pixadb, pix1, L_INSERT); - } - - pixDestroy(&pix2); - pixDestroy(&pix3); - pixaDestroy(&pixa1); - boxaDestroy(&boxa1); - boxaDestroy(&boxa2); - boxaDestroy(&boxa3); - boxaaDestroy(&baa1); - return pixa2; -} - - -/*------------------------------------------------------------------* - * How many text columns * - *------------------------------------------------------------------*/ -/*! - * \brief pixCountTextColumns() - * - * \param[in] pixs 1 bpp - * \param[in] deltafract fraction of (max - min) to be used in the delta - * for extrema finding; typ 0.3 - * \param[in] peakfract fraction of (max - min) to be used to threshold - * the peak value; typ. 0.5 - * \param[in] clipfract fraction of image dimension removed on each side; - * typ. 0.1, which leaves w and h reduced by 0.8 - * \param[out] pncols number of columns; -1 if not determined - * \param[in] pixadb [optional] pre-allocated, for showing - * intermediate computation; use null to skip - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) It is assumed that pixs has the correct resolution set.
- *          If the resolution is 0, we set to 300 and issue a warning.
- *      (2) If necessary, the image is scaled to between 37 and 75 ppi;
- *          most of the processing is done at this resolution.
- *      (3) If no text is found (essentially a blank page),
- *          this returns ncols = 0.
- *      (4) For debug output, input a pre-allocated pixa.
- * 
- */ -l_ok -pixCountTextColumns(PIX *pixs, - l_float32 deltafract, - l_float32 peakfract, - l_float32 clipfract, - l_int32 *pncols, - PIXA *pixadb) -{ -l_int32 w, h, res, i, n, npeak; -l_float32 scalefact, redfact, minval, maxval, val4, val5, fract; -BOX *box; -NUMA *na1, *na2, *na3, *na4, *na5; -PIX *pix1, *pix2, *pix3, *pix4, *pix5; - - PROCNAME("pixCountTextColumns"); - - if (!pncols) - return ERROR_INT("&ncols not defined", procName, 1); - *pncols = -1; /* init */ - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (deltafract < 0.15 || deltafract > 0.75) - L_WARNING("deltafract not in [0.15 ... 0.75]\n", procName); - if (peakfract < 0.25 || peakfract > 0.9) - L_WARNING("peakfract not in [0.25 ... 0.9]\n", procName); - if (clipfract < 0.0 || clipfract >= 0.5) - return ERROR_INT("clipfract not in [0.0 ... 0.5)\n", procName, 1); - if (pixadb) pixaAddPix(pixadb, pixs, L_COPY); - - /* Scale to between 37.5 and 75 ppi */ - if ((res = pixGetXRes(pixs)) == 0) { - L_WARNING("resolution undefined; set to 300\n", procName); - pixSetResolution(pixs, 300, 300); - res = 300; - } - if (res < 37) { - L_WARNING("resolution %d very low\n", procName, res); - scalefact = 37.5 / res; - pix1 = pixScale(pixs, scalefact, scalefact); - } else { - redfact = (l_float32)res / 37.5; - if (redfact < 2.0) - pix1 = pixClone(pixs); - else if (redfact < 4.0) - pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); - else if (redfact < 8.0) - pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 0, 0); - else if (redfact < 16.0) - pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 0); - else - pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 2); - } - if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); - - /* Crop inner 80% of image */ - pixGetDimensions(pix1, &w, &h, NULL); - box = boxCreate(clipfract * w, clipfract * h, - (1.0 - 2 * clipfract) * w, (1.0 - 2 * clipfract) * h); - pix2 = pixClipRectangle(pix1, box, NULL); - pixGetDimensions(pix2, &w, &h, NULL); - boxDestroy(&box); - if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); - - /* Deskew */ - pix3 = pixDeskew(pix2, 0); - if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); - - /* Close to increase column counts for text */ - pix4 = pixCloseSafeBrick(NULL, pix3, 5, 21); - if (pixadb) pixaAddPix(pixadb, pix4, L_COPY); - pixInvert(pix4, pix4); - na1 = pixCountByColumn(pix4, NULL); - - if (pixadb) { - gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/plot", NULL); - pix5 = pixRead("/tmp/lept/plot.png"); - pixaAddPix(pixadb, pix5, L_INSERT); - } - - /* Analyze the column counts. na4 gives the locations of - * the extrema in normalized units (0.0 to 1.0) across the - * cropped image. na5 gives the magnitude of the - * extrema, normalized to the dynamic range. The peaks - * are values that are at least peakfract of (max - min). */ - numaGetMax(na1, &maxval, NULL); - numaGetMin(na1, &minval, NULL); - fract = (l_float32)(maxval - minval) / h; /* is there much at all? */ - if (fract < 0.05) { - L_INFO("very little content on page; 0 text columns\n", procName); - *pncols = 0; - } else { - na2 = numaFindExtrema(na1, deltafract * (maxval - minval), &na3); - na4 = numaTransform(na2, 0, 1.0 / w); - na5 = numaTransform(na3, -minval, 1.0 / (maxval - minval)); - n = numaGetCount(na4); - for (i = 0, npeak = 0; i < n; i++) { - numaGetFValue(na4, i, &val4); - numaGetFValue(na5, i, &val5); - if (val4 > 0.3 && val4 < 0.7 && val5 >= peakfract) { - npeak++; - L_INFO("Peak(loc,val) = (%5.3f,%5.3f)\n", procName, val4, val5); - } - } - *pncols = npeak + 1; - numaDestroy(&na2); - numaDestroy(&na3); - numaDestroy(&na4); - numaDestroy(&na5); - } - - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - pixDestroy(&pix4); - numaDestroy(&na1); - return 0; -} - - -/*------------------------------------------------------------------* - * Decision text vs photo * - *------------------------------------------------------------------*/ -/*! - * \brief pixDecideIfText() - * - * \param[in] pixs any depth - * \param[in] box [optional] if null, use entire pixs - * \param[out] pistext 1 if text; 0 if photo; -1 if not determined or empty - * \param[in] pixadb [optional] pre-allocated, for showing intermediate - * computation; use NULL to skip - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) It is assumed that pixs has the correct resolution set.
- *          If the resolution is 0, we set to 300 and issue a warning.
- *      (2) If necessary, the image is scaled to 300 ppi; most of the
- *          processing is done at this resolution.
- *      (3) Text is assumed to be in horizontal lines.
- *      (4) Because thin vertical lines are removed before filtering for
- *          text lines, this should identify tables as text.
- *      (5) If %box is null and pixs contains both text lines and line art,
- *          this function might return %istext == true.
- *      (6) If the input pixs is empty, or for some other reason the
- *          result can not be determined, return -1.
- *      (7) For debug output, input a pre-allocated pixa.
- * 
- */ -l_ok -pixDecideIfText(PIX *pixs, - BOX *box, - l_int32 *pistext, - PIXA *pixadb) -{ -l_int32 i, empty, maxw, w, h, n1, n2, n3, minlines, big_comp; -l_float32 ratio1, ratio2; -L_BMF *bmf; -BOXA *boxa1, *boxa2, *boxa3, *boxa4, *boxa5; -PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7; -PIXA *pixa1; -SEL *sel1; - - PROCNAME("pixDecideIfText"); - - if (!pistext) - return ERROR_INT("&istext not defined", procName, 1); - *pistext = -1; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - /* Crop, convert to 1 bpp, 300 ppi */ - if ((pix1 = pixPrepare1bpp(pixs, box, 0.1, 300)) == NULL) - return ERROR_INT("pix1 not made", procName, 1); - - pixZero(pix1, &empty); - if (empty) { - pixDestroy(&pix1); - L_INFO("pix is empty\n", procName); - return 0; - } - w = pixGetWidth(pix1); - - /* Identify and remove tall, thin vertical lines (as found in tables) - * that are up to 9 pixels wide. Make a hit-miss sel with an - * 81 pixel vertical set of hits and with 3 pairs of misses that - * are 10 pixels apart horizontally. It is necessary to use a - * hit-miss transform; if we only opened with a vertical line of - * hits, we would remove solid regions of pixels that are not - * text or vertical lines. */ - pix2 = pixCreate(11, 81, 1); - for (i = 0; i < 81; i++) - pixSetPixel(pix2, 5, i, 1); - sel1 = selCreateFromPix(pix2, 40, 5, NULL); - selSetElement(sel1, 20, 0, SEL_MISS); - selSetElement(sel1, 20, 10, SEL_MISS); - selSetElement(sel1, 40, 0, SEL_MISS); - selSetElement(sel1, 40, 10, SEL_MISS); - selSetElement(sel1, 60, 0, SEL_MISS); - selSetElement(sel1, 60, 10, SEL_MISS); - pix3 = pixHMT(NULL, pix1, sel1); - pix4 = pixSeedfillBinaryRestricted(NULL, pix3, pix1, 8, 5, 1000); - pix5 = pixXor(NULL, pix1, pix4); - pixDestroy(&pix2); - selDestroy(&sel1); - - /* Convert the text lines to separate long horizontal components */ - pix6 = pixMorphCompSequence(pix5, "c30.1 + o15.1 + c60.1 + o2.2", 0); - - /* Estimate the distance to the bottom of the significant region */ - if (box) { /* use full height */ - pixGetDimensions(pix6, NULL, &h, NULL); - } else { /* use height of region that has text lines */ - pixFindThreshFgExtent(pix6, 400, NULL, &h); - } - - if (pixadb) { - bmf = bmfCreate(NULL, 6); - pixaAddPixWithText(pixadb, pix1, 1, bmf, "threshold/crop to binary", - 0x0000ff00, L_ADD_BELOW); - pixaAddPixWithText(pixadb, pix3, 2, bmf, "hit-miss for vertical line", - 0x0000ff00, L_ADD_BELOW); - pixaAddPixWithText(pixadb, pix4, 2, bmf, "restricted seed-fill", - 0x0000ff00, L_ADD_BELOW); - pixaAddPixWithText(pixadb, pix5, 2, bmf, "remove using xor", - 0x0000ff00, L_ADD_BELOW); - pixaAddPixWithText(pixadb, pix6, 2, bmf, "make long horiz components", - 0x0000ff00, L_ADD_BELOW); - } - - /* Extract the connected components */ - if (pixadb) { - boxa1 = pixConnComp(pix6, &pixa1, 8); - pix7 = pixaDisplayRandomCmap(pixa1, 0, 0); - pixcmapResetColor(pixGetColormap(pix7), 0, 255, 255, 255); - pixaAddPixWithText(pixadb, pix7, 2, bmf, "show connected components", - 0x0000ff00, L_ADD_BELOW); - pixDestroy(&pix7); - pixaDestroy(&pixa1); - bmfDestroy(&bmf); - } else { - boxa1 = pixConnComp(pix6, NULL, 8); - } - - /* Analyze the connected components. The following conditions - * at 300 ppi must be satisfied if the image is text: - * (1) There are no components that are wider than 400 pixels and - * taller than 175 pixels. - * (2) The second longest component is at least 60% of the - * (possibly cropped) image width. This catches images - * that don't have any significant content. - * (3) Of the components that are at least 40% of the length - * of the longest (n2), at least 80% of them must not exceed - * 60 pixels in height. - * (4) The number of those long, thin components (n3) must - * equal or exceed a minimum that scales linearly with the - * image height. - * Most images that are not text fail more than one of these - * conditions. */ - boxa2 = boxaSort(boxa1, L_SORT_BY_WIDTH, L_SORT_DECREASING, NULL); - boxaGetBoxGeometry(boxa2, 1, NULL, NULL, &maxw, NULL); /* 2nd longest */ - boxa3 = boxaSelectBySize(boxa1, 0.4 * maxw, 0, L_SELECT_WIDTH, - L_SELECT_IF_GTE, NULL); - boxa4 = boxaSelectBySize(boxa3, 0, 60, L_SELECT_HEIGHT, - L_SELECT_IF_LTE, NULL); - boxa5 = boxaSelectBySize(boxa1, 400, 175, L_SELECT_IF_BOTH, - L_SELECT_IF_GT, NULL); - big_comp = (boxaGetCount(boxa5) == 0) ? 0 : 1; - n1 = boxaGetCount(boxa1); - n2 = boxaGetCount(boxa3); - n3 = boxaGetCount(boxa4); - ratio1 = (l_float32)maxw / (l_float32)w; - ratio2 = (l_float32)n3 / (l_float32)n2; - minlines = L_MAX(2, h / 125); - if (big_comp || ratio1 < 0.6 || ratio2 < 0.8 || n3 < minlines) - *pistext = 0; - else - *pistext = 1; - if (pixadb) { - if (*pistext == 1) { - L_INFO("This is text: \n n1 = %d, n2 = %d, n3 = %d, " - "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, " - "big_comp = %d\n", procName, n1, n2, n3, minlines, - maxw, ratio1, h, big_comp); - } else { - L_INFO("This is not text: \n n1 = %d, n2 = %d, n3 = %d, " - "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, " - "big_comp = %d\n", procName, n1, n2, n3, minlines, - maxw, ratio1, h, big_comp); - } - } - - boxaDestroy(&boxa1); - boxaDestroy(&boxa2); - boxaDestroy(&boxa3); - boxaDestroy(&boxa4); - boxaDestroy(&boxa5); - pixDestroy(&pix1); - pixDestroy(&pix3); - pixDestroy(&pix4); - pixDestroy(&pix5); - pixDestroy(&pix6); - return 0; -} - - -/*! - * \brief pixFindThreshFgExtent() - * - * \param[in] pixs 1 bpp - * \param[in] thresh threshold number of pixels in row - * \param[out] ptop [optional] location of top of region - * \param[out] pbot [optional] location of bottom of region - * \return 0 if OK, 1 on error - */ -l_ok -pixFindThreshFgExtent(PIX *pixs, - l_int32 thresh, - l_int32 *ptop, - l_int32 *pbot) -{ -l_int32 i, n; -l_int32 *array; -NUMA *na; - - PROCNAME("pixFindThreshFgExtent"); - - if (ptop) *ptop = 0; - if (pbot) *pbot = 0; - if (!ptop && !pbot) - return ERROR_INT("nothing to determine", procName, 1); - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - - na = pixCountPixelsByRow(pixs, NULL); - n = numaGetCount(na); - array = numaGetIArray(na); - if (ptop) { - for (i = 0; i < n; i++) { - if (array[i] >= thresh) { - *ptop = i; - break; - } - } - } - if (pbot) { - for (i = n - 1; i >= 0; i--) { - if (array[i] >= thresh) { - *pbot = i; - break; - } - } - } - LEPT_FREE(array); - numaDestroy(&na); - return 0; -} - - -/*------------------------------------------------------------------* - * Decision: table vs text * - *------------------------------------------------------------------*/ -/*! - * \brief pixDecideIfTable() - * - * \param[in] pixs any depth, any resolution >= 75 ppi - * \param[in] box [optional] if null, use entire pixs - * \param[in] orient L_PORTRAIT_MODE, L_LANDSCAPE_MODE - * \param[out] pscore 0 - 4; -1 if not determined - * \param[in] pixadb [optional] pre-allocated, for showing intermediate - * computation; use NULL to skip - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) It is assumed that pixs has the correct resolution set.
- *          If the resolution is 0, we assume it is 300 ppi and issue a warning.
- *      (2) If %orient == L_LANDSCAPE_MODE, the image is rotated 90 degrees
- *          clockwise before being analyzed.
- *      (3) The interpretation of the returned score:
- *            -1     undetermined
- *             0     no table
- *             1     unlikely to have a table
- *             2     likely to have a table
- *             3     even more likely to have a table
- *             4     extremely likely to have a table
- *          * Setting the condition for finding a table at score >= 2 works
- *            well, except for false positives on kanji and landscape text.
- *          * These false positives can be removed by setting the condition
- *            at score >= 3, but recall is lowered because it will not find
- *            tables without either horizontal or vertical lines.
- *      (4) Most of the processing takes place at 75 ppi.
- *      (5) Internally, three numbers are determined, for horizontal and
- *          vertical fg lines, and for vertical bg lines.  From these,
- *          four tests are made to decide if there is a table occupying
- *          a significant part of the image.
- *      (6) Images have arbitrary content and would be likely to trigger
- *          this detector, so they are checked for first, and if found,
- *          return with a 0 (no table) score.
- *      (7) Musical scores (tablature) are likely to trigger the detector.
- *      (8) Tables of content with more than 2 columns are likely to
- *          trigger the detector.
- *      (9) For debug output, input a pre-allocated pixa.
- * 
- */ -l_ok -pixDecideIfTable(PIX *pixs, - BOX *box, - l_int32 orient, - l_int32 *pscore, - PIXA *pixadb) -{ -l_int32 empty, nhb, nvb, nvw, score, htfound; -PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9; - - PROCNAME("pixDecideIfTable"); - - if (!pscore) - return ERROR_INT("&score not defined", procName, 1); - *pscore = -1; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - /* Check if there is an image region. First convert to 1 bpp - * at 175 ppi. If an image is found, assume there is no table. */ - pix1 = pixPrepare1bpp(pixs, box, 0.1, 175); - pix2 = pixGenerateHalftoneMask(pix1, NULL, &htfound, NULL); - if (htfound && pixadb) pixaAddPix(pixadb, pix2, L_COPY); - pixDestroy(&pix1); - pixDestroy(&pix2); - if (htfound) { - *pscore = 0; - L_INFO("pix has an image region\n", procName); - return 0; - } - - /* Crop, convert to 1 bpp, 75 ppi */ - if ((pix1 = pixPrepare1bpp(pixs, box, 0.05, 75)) == NULL) - return ERROR_INT("pix1 not made", procName, 1); - - pixZero(pix1, &empty); - if (empty) { - *pscore = 0; - pixDestroy(&pix1); - L_INFO("pix is empty\n", procName); - return 0; - } - - /* The 2x2 dilation on 75 ppi makes these two approaches very similar: - * (1) pix1 = pixPrepare1bpp(..., 300); // 300 ppi resolution - * pix2 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); - * (2) pix1 = pixPrepare1bpp(..., 75); // 75 ppi resolution - * pix2 = pixDilateBrick(NULL, pix1, 2, 2); - * But (2) is more efficient if the input image to pixPrepare1bpp() - * is not at 300 ppi. */ - pix2 = pixDilateBrick(NULL, pix1, 2, 2); - - /* Deskew both horizontally and vertically; rotate by 90 - * degrees if in landscape mode. */ - pix3 = pixDeskewBoth(pix2, 1); - if (pixadb) { - pixaAddPix(pixadb, pix2, L_COPY); - pixaAddPix(pixadb, pix3, L_COPY); - } - if (orient == L_LANDSCAPE_MODE) - pix4 = pixRotate90(pix3, 1); - else - pix4 = pixClone(pix3); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - pix1 = pixClone(pix4); - pixDestroy(&pix4); - - /* Look for horizontal and vertical lines */ - pix2 = pixMorphSequence(pix1, "o100.1 + c1.4", 0); - pix3 = pixSeedfillBinary(NULL, pix2, pix1, 8); - pix4 = pixMorphSequence(pix1, "o1.100 + c4.1", 0); - pix5 = pixSeedfillBinary(NULL, pix4, pix1, 8); - pix6 = pixOr(NULL, pix3, pix5); - if (pixadb) { - pixaAddPix(pixadb, pix2, L_COPY); - pixaAddPix(pixadb, pix4, L_COPY); - pixaAddPix(pixadb, pix3, L_COPY); - pixaAddPix(pixadb, pix5, L_COPY); - pixaAddPix(pixadb, pix6, L_COPY); - } - pixCountConnComp(pix2, 8, &nhb); /* number of horizontal black lines */ - pixCountConnComp(pix4, 8, &nvb); /* number of vertical black lines */ - - /* Remove the lines */ - pixSubtract(pix1, pix1, pix6); - if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); - - /* Remove noise pixels */ - pix7 = pixMorphSequence(pix1, "c4.1 + o8.1", 0); - if (pixadb) pixaAddPix(pixadb, pix7, L_COPY); - - /* Look for vertical white space. Invert to convert white bg - * to fg. Use a single rank-1 2x reduction, which closes small - * fg holes, for the final processing at 37.5 ppi. - * The vertical opening is then about 3 inches on a 300 ppi image. - * We also remove vertical whitespace that is less than 5 pixels - * wide at this resolution (about 0.1 inches) */ - pixInvert(pix7, pix7); - pix8 = pixMorphSequence(pix7, "r1 + o1.100", 0); - pix9 = pixSelectBySize(pix8, 5, 0, 8, L_SELECT_WIDTH, - L_SELECT_IF_GTE, NULL); - pixCountConnComp(pix9, 8, &nvw); /* number of vertical white lines */ - if (pixadb) { - pixaAddPix(pixadb, pixScale(pix8, 2.0, 2.0), L_INSERT); - pixaAddPix(pixadb, pixScale(pix9, 2.0, 2.0), L_INSERT); - } - - /* Require at least 2 of the following 4 conditions for a table. - * Some tables do not have black (fg) lines, and for those we - * require more than 6 long vertical whitespace (bg) lines. */ - score = 0; - if (nhb > 1) score++; - if (nvb > 2) score++; - if (nvw > 3) score++; - if (nvw > 6) score++; - *pscore = score; - - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - pixDestroy(&pix4); - pixDestroy(&pix5); - pixDestroy(&pix6); - pixDestroy(&pix7); - pixDestroy(&pix8); - pixDestroy(&pix9); - return 0; -} - - -/*! - * \brief pixPrepare1bpp() - * - * \param[in] pixs any depth - * \param[in] box [optional] if null, use entire pixs - * \param[in] cropfract fraction to be removed from the boundary; - * use 0.0 to retain the entire image - * \param[in] outres desired resolution of output image; if the - * input image resolution is not set, assume - * 300 ppi; use 0 to skip scaling. - * \return pixd if OK, NULL on error - * - *
- * Notes:
- *      (1) This handles some common pre-processing operations,
- *          where the page segmentation algorithm takes a 1 bpp image.
- * 
- */ -PIX * -pixPrepare1bpp(PIX *pixs, - BOX *box, - l_float32 cropfract, - l_int32 outres) -{ -l_int32 w, h, res; -l_float32 factor; -BOX *box1; -PIX *pix1, *pix2, *pix3, *pix4, *pix5; - - PROCNAME("pixPrepare1bpp"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - /* Crop the image. If no box is given, use %cropfract to remove - * pixels near the image boundary; this helps avoid false - * negatives from noise that is often found there. */ - if (box) { - pix1 = pixClipRectangle(pixs, box, NULL); - } else { - pixGetDimensions(pixs, &w, &h, NULL); - box1 = boxCreate((l_int32)(cropfract * w), (l_int32)(cropfract * h), - (l_int32)((1.0 - 2 * cropfract) * w), - (l_int32)((1.0 - 2 * cropfract) * h)); - pix1 = pixClipRectangle(pixs, box1, NULL); - boxDestroy(&box1); - } - - /* Convert to 1 bpp with adaptive background cleaning */ - if (pixGetDepth(pixs) > 1) { - pix2 = pixConvertTo8(pix1, 0); - pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 160); - pixDestroy(&pix1); - pixDestroy(&pix2); - if (!pix3) { - L_INFO("pix cleaning failed\n", procName); - return NULL; - } - pix4 = pixThresholdToBinary(pix3, 200); - pixDestroy(&pix3); - } else { - pix4 = pixClone(pix1); - pixDestroy(&pix1); - } - - /* Scale the image to the requested output resolution; - do not scale if %outres <= 0 */ - if (outres <= 0) - return pix4; - if ((res = pixGetXRes(pixs)) == 0) { - L_WARNING("Resolution is not set: using 300 ppi\n", procName); - res = 300; - } - if (res != outres) { - factor = (l_float32)outres / (l_float32)res; - pix5 = pixScale(pix4, factor, factor); - } else { - pix5 = pixClone(pix4); - } - pixDestroy(&pix4); - return pix5; -} - - -/*------------------------------------------------------------------* - * Estimate the grayscale background value * - *------------------------------------------------------------------*/ -/*! - * \brief pixEstimateBackground() - * - * \param[in] pixs 8 bpp, with or without colormap - * \param[in] darkthresh pixels below this value are never considered - * part of the background; typ. 70; use 0 to skip - * \param[in] edgecrop fraction of half-width on each side, and of - * half-height at top and bottom, that are cropped - * \param[out] pbg estimated background, or 0 on error - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Caller should check that return bg value is > 0.
- * 
- */ -l_ok -pixEstimateBackground(PIX *pixs, - l_int32 darkthresh, - l_float32 edgecrop, - l_int32 *pbg) -{ -l_int32 w, h, sampling; -l_float32 fbg; -BOX *box; -PIX *pix1, *pix2, *pixm; - - PROCNAME("pixEstimateBackground"); - - if (!pbg) - return ERROR_INT("&bg not defined", procName, 1); - *pbg = 0; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (darkthresh > 128) - L_WARNING("darkthresh unusually large\n", procName); - if (edgecrop < 0.0 || edgecrop >= 1.0) - return ERROR_INT("edgecrop not in [0.0 ... 1.0)", procName, 1); - - pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - pixGetDimensions(pix1, &w, &h, NULL); - - /* Optionally crop inner part of image */ - if (edgecrop > 0.0) { - box = boxCreate(0.5 * edgecrop * w, 0.5 * edgecrop * h, - (1.0 - edgecrop) * w, (1.0 - edgecrop) * h); - pix2 = pixClipRectangle(pix1, box, NULL); - boxDestroy(&box); - } else { - pix2 = pixClone(pix1); - } - - /* We will use no more than 50K samples */ - sampling = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 50000. + 0.5)); - - /* Optionally make a mask over all pixels lighter than %darkthresh */ - pixm = NULL; - if (darkthresh > 0) { - pixm = pixThresholdToBinary(pix2, darkthresh); - pixInvert(pixm, pixm); - } - - pixGetRankValueMasked(pix2, pixm, 0, 0, sampling, 0.5, &fbg, NULL); - *pbg = (l_int32)(fbg + 0.5); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pixm); - return 0; -} - - -/*---------------------------------------------------------------------* - * Largest white or black rectangles in an image * - *---------------------------------------------------------------------*/ -/*! - * \brief pixFindLargeRectangles() - * - * \param[in] pixs 1 bpp - * \param[in] polarity 0 within background, 1 within foreground - * \param[in] nrect number of rectangles to be found - * \param[out] pboxa largest rectangles, sorted by decreasing area - * \param[in,out] ppixdb optional return output with rectangles drawn on it - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This does a greedy search to find the largest rectangles,
- *          either black or white and without overlaps, in %pix.
- *      (2) See pixFindLargestRectangle(), which is called multiple
- *          times, for details.  On each call, the largest rectangle
- *          found is painted, so that none of its pixels can be
- *          used later, before calling it again.
- *      (3) This function is surprisingly fast.  Although
- *          pixFindLargestRectangle() runs at about 50 MPix/sec, when it
- *          is run multiple times by pixFindLargeRectangles(), it processes
- *          at 150 - 250 MPix/sec, and the time is approximately linear
- *          in %nrect.  For example, for a 1 MPix image, searching for
- *          the largest 50 boxes takes about 0.2 seconds.
- * 
- */ -l_ok -pixFindLargeRectangles(PIX *pixs, - l_int32 polarity, - l_int32 nrect, - BOXA **pboxa, - PIX **ppixdb) -{ -l_int32 i, op, bx, by, bw, bh; -BOX *box; -BOXA *boxa; -PIX *pix; - - PROCNAME("pixFindLargeRectangles"); - - if (ppixdb) *ppixdb = NULL; - if (!pboxa) - return ERROR_INT("&boxa not defined", procName, 1); - *pboxa = NULL; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (polarity != 0 && polarity != 1) - return ERROR_INT("invalid polarity", procName, 1); - if (nrect > 1000) { - L_WARNING("large num rectangles = %d requested; using 1000\n", - procName, nrect); - nrect = 1000; - } - - pix = pixCopy(NULL, pixs); - boxa = boxaCreate(nrect); - *pboxa = boxa; - - /* Sequentially find largest rectangle and fill with opposite color */ - for (i = 0; i < nrect; i++) { - if (pixFindLargestRectangle(pix, polarity, &box, NULL) == 1) { - boxDestroy(&box); - L_ERROR("failure in pixFindLargestRectangle\n", procName); - break; - } - boxaAddBox(boxa, box, L_INSERT); - op = (polarity == 0) ? PIX_SET : PIX_CLR; - boxGetGeometry(box, &bx, &by, &bw, &bh); - pixRasterop(pix, bx, by, bw, bh, op, NULL, 0, 0); - } - - if (ppixdb) - *ppixdb = pixDrawBoxaRandom(pixs, boxa, 3); - - pixDestroy(&pix); - return 0; -} - - -/*! - * \brief pixFindLargestRectangle() - * - * \param[in] pixs 1 bpp - * \param[in] polarity 0 within background, 1 within foreground - * \param[out] pbox largest area rectangle - * \param[in,out] ppixdb optional return output with rectangle drawn on it - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is a simple and elegant solution to a problem in
- *          computational geometry that at first appears to be quite
- *          difficult: what is the largest rectangle that can be
- *          placed in the image, covering only pixels of one polarity
- *          (bg or fg)?  The solution is O(n), where n is the number
- *          of pixels in the image, and it requires nothing more than
- *          using a simple recursion relation in a single sweep of the image.
- *      (2) In a sweep from UL to LR with left-to-right being the fast
- *          direction, calculate the largest white rectangle at (x, y),
- *          using previously calculated values at pixels #1 and #2:
- *             #1:    (x, y - 1)
- *             #2:    (x - 1, y)
- *          We also need the most recent "black" pixels that were seen
- *          in the current row and column.
- *          Consider the largest area.  There are only two possibilities:
- *             (a)  Min(w(1), horizdist) * (h(1) + 1)
- *             (b)  Min(h(2), vertdist) * (w(2) + 1)
- *          where
- *             horizdist: the distance from the rightmost "black" pixel seen
- *                        in the current row across to the current pixel
- *             vertdist: the distance from the lowest "black" pixel seen
- *                       in the current column down to the current pixel
- *          and we choose the Max of (a) and (b).
- *      (3) To convince yourself that these recursion relations are correct,
- *          it helps to draw the maximum rectangles at #1 and #2.
- *          Then for #1, you try to extend the rectangle down one line,
- *          so that the height is h(1) + 1.  Do you get the full
- *          width of #1, w(1)?  It depends on where the black pixels are
- *          in the current row.  You know the final width is bounded by w(1)
- *          and w(2) + 1, but the actual value depends on the distribution
- *          of black pixels in the current row that are at a distance
- *          from the current pixel that is between these limits.
- *          We call that value "horizdist", and the area is then given
- *          by the expression (a) above.  Using similar reasoning for #2,
- *          where you attempt to extend the rectangle to the right
- *          by 1 pixel, you arrive at (b).  The largest rectangle is
- *          then found by taking the Max.
- * 
- */ -l_ok -pixFindLargestRectangle(PIX *pixs, - l_int32 polarity, - BOX **pbox, - PIX **ppixdb) -{ -l_int32 i, j, w, h, d, wpls, val; -l_int32 wp, hp, w1, w2, h1, h2, wmin, hmin, area1, area2; -l_int32 xmax, ymax; /* LR corner of the largest rectangle */ -l_int32 maxarea, wmax, hmax, vertdist, horizdist, prevfg; -l_int32 *lowestfg; -l_uint32 *datas, *lines; -l_uint32 **linew, **lineh; -BOX *box; -PIX *pixw, *pixh; /* keeps the width and height for the largest */ - /* rectangles whose LR corner is located there. */ - - PROCNAME("pixFindLargestRectangle"); - - if (ppixdb) *ppixdb = NULL; - if (!pbox) - return ERROR_INT("&box not defined", procName, 1); - *pbox = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1) - return ERROR_INT("pixs not 1 bpp", procName, 1); - if (polarity != 0 && polarity != 1) - return ERROR_INT("invalid polarity", procName, 1); - - /* Initialize lowest "fg" seen so far for each column */ - lowestfg = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); - for (i = 0; i < w; i++) - lowestfg[i] = -1; - - /* The combination (val ^ polarity) is the color for which we - * are searching for the maximum rectangle. For polarity == 0, - * we search in the bg (white). */ - pixw = pixCreate(w, h, 32); /* stores width */ - pixh = pixCreate(w, h, 32); /* stores height */ - linew = (l_uint32 **)pixGetLinePtrs(pixw, NULL); - lineh = (l_uint32 **)pixGetLinePtrs(pixh, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - maxarea = xmax = ymax = wmax = hmax = 0; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - prevfg = -1; - for (j = 0; j < w; j++) { - val = GET_DATA_BIT(lines, j); - if ((val ^ polarity) == 0) { /* bg (0) if polarity == 0, etc. */ - if (i == 0 && j == 0) { - wp = hp = 1; - } else if (i == 0) { - wp = linew[i][j - 1] + 1; - hp = 1; - } else if (j == 0) { - wp = 1; - hp = lineh[i - 1][j] + 1; - } else { - /* Expand #1 prev rectangle down */ - w1 = linew[i - 1][j]; - h1 = lineh[i - 1][j]; - horizdist = j - prevfg; - wmin = L_MIN(w1, horizdist); /* width of new rectangle */ - area1 = wmin * (h1 + 1); - - /* Expand #2 prev rectangle to right */ - w2 = linew[i][j - 1]; - h2 = lineh[i][j - 1]; - vertdist = i - lowestfg[j]; - hmin = L_MIN(h2, vertdist); /* height of new rectangle */ - area2 = hmin * (w2 + 1); - - if (area1 > area2) { - wp = wmin; - hp = h1 + 1; - } else { - wp = w2 + 1; - hp = hmin; - } - } - } else { /* fg (1) if polarity == 0; bg (0) if polarity == 1 */ - prevfg = j; - lowestfg[j] = i; - wp = hp = 0; - } - linew[i][j] = wp; - lineh[i][j] = hp; - if (wp * hp > maxarea) { - maxarea = wp * hp; - xmax = j; - ymax = i; - wmax = wp; - hmax = hp; - } - } - } - - /* Translate from LR corner to Box coords (UL corner, w, h) */ - box = boxCreate(xmax - wmax + 1, ymax - hmax + 1, wmax, hmax); - *pbox = box; - - if (ppixdb) { - *ppixdb = pixConvertTo8(pixs, TRUE); - pixRenderHashBoxArb(*ppixdb, box, 6, 2, L_NEG_SLOPE_LINE, 1, 255, 0, 0); - } - - LEPT_FREE(linew); - LEPT_FREE(lineh); - LEPT_FREE(lowestfg); - pixDestroy(&pixw); - pixDestroy(&pixh); - return 0; -} - - -/*---------------------------------------------------------------------* - * Generate rectangle inside connected component * - *---------------------------------------------------------------------*/ -/*! - * \brief pixFindRectangleInCC() - * - * \param[in] pixs 1 bpp, with sufficient closings to make the fg be - * a single c.c. that is a convex hull - * \param[in] boxs [optional] if NULL, %pixs should be a minimum - * container of a single c.c. - * \param[in] fract first and all consecutive lines found must be at - * least this fraction of the fast scan dimension - * \param[in] dir L_SCAN_HORIZONTAL, L_SCAN_VERTICAL; direction of - * fast scan - * \param[in] select L_GEOMETRIC_UNION, L_GEOMETRIC_INTERSECTION, - * L_LARGEST_AREA, L_SMALEST_AREA - * \param[in] debug if 1, generates output pdf showing intermediate - * computation and final result - * \return box of included rectangle, or NULL on error - * - *
- * Notes:
- *      (1) Computation is similar to pixFindLargestRectangle(), but allows
- *          a different set of results to choose from.
- *      (2) Select the fast scan direction.  Then, scanning in the slow
- *          direction, finds the longest run of ON pixels in the fast
- *          scan direction and look for the first first run that is longer
- *          than %fract of the dimension.  Continues until a shorter run
- *          is found.  This generates a box of ON pixels fitting into the c.c.
- *      (3) Do this from both slow scan directions and use %select to get
- *          a resulting box from these two.
- *      (4) The extracted rectangle is not necessarily the largest that
- *          can fit in the c.c.  To get that, use pixFindLargestRectangle().
- */
-BOX *
-pixFindRectangleInCC(PIX       *pixs,
-                     BOX       *boxs,
-                     l_float32  fract,
-                     l_int32    dir,
-                     l_int32    select,
-                     l_int32    debug)
-{
-l_int32  x, y, i, j, w, h, w1, h1, w2, h2, found, res;
-l_int32  xfirst, xlast, xstart, yfirst, ylast, length;
-BOX     *box1, *box2, *box3, *box4, *box5;
-PIX     *pix1, *pix2, *pixdb1, *pixdb2;
-PIXA    *pixadb;
-
-    PROCNAME("pixFindRectangleInCC");
-
-    if (!pixs || pixGetDepth(pixs) != 1)
-        return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL);
-    if (fract <= 0.0 || fract > 1.0)
-        return (BOX *)ERROR_PTR("invalid fraction", procName, NULL);
-    if (dir != L_SCAN_VERTICAL && dir != L_SCAN_HORIZONTAL)
-        return (BOX *)ERROR_PTR("invalid scan direction", procName, NULL);
-    if (select != L_GEOMETRIC_UNION && select != L_GEOMETRIC_INTERSECTION &&
-        select != L_LARGEST_AREA && select != L_SMALLEST_AREA)
-        return (BOX *)ERROR_PTR("invalid select", procName, NULL);
-
-        /* Extract the c.c. if necessary */
-    x = y = 0;
-    if (boxs) {
-        pix1 = pixClipRectangle(pixs, boxs, NULL);
-        boxGetGeometry(boxs, &x, &y, NULL, NULL);
-    } else {
-        pix1 = pixClone(pixs);
-    }
-
-        /* All fast scans are horizontal; rotate 90 deg cw if necessary */
-    if (dir == L_SCAN_VERTICAL)
-        pix2 = pixRotate90(pix1, 1);
-    else  /* L_SCAN_HORIZONTAL */
-        pix2 = pixClone(pix1);
-    pixGetDimensions(pix2, &w, &h, NULL);
-
-    pixadb = (debug) ? pixaCreate(0) : NULL;
-    pixdb1 = NULL;
-    if (pixadb) {
-        lept_mkdir("lept/rect");
-        pixaAddPix(pixadb, pix1, L_CLONE);
-        pixdb1 = pixConvertTo32(pix2);
-    }
-    pixDestroy(&pix1);
-
-        /* Scanning down, find the first scanline with a long enough run.
-         * That run goes from (xfirst, yfirst) to (xlast, yfirst).  */
-    found = FALSE;
-    for (i = 0; i < h; i++) {
-        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
-        if (length >= (l_int32)(fract * w + 0.5)) {
-            yfirst = i;
-            xfirst = xstart;
-            xlast = xfirst + length - 1;
-            found = TRUE;
-            break;
-        }
-    }
-    if (!found) {
-        L_WARNING("no run of sufficient size was found\n", procName);
-        pixDestroy(&pix2);
-        pixDestroy(&pixdb1);
-        pixaDestroy(&pixadb);
-        return NULL;
-    }
-
-         /* Continue down until the condition fails */
-    w1 = xlast - xfirst + 1;
-    h1 = h - yfirst;  /* initialize */
-    for (i = yfirst + 1; i < h; i++) {
-        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
-        if (xstart > xfirst || (xstart + length - 1 < xlast) ||
-            i == h - 1) {
-            ylast = i - 1;
-            h1 = ylast - yfirst + 1;
-            break;
-        }
-    }
-    box1 = boxCreate(xfirst, yfirst, w1, h1);
-
-        /* Scanning up, find the first scanline with a long enough run.
-         * That run goes from (xfirst, ylast) to (xlast, ylast).  */
-    for (i = h - 1; i >= 0; i--) {
-        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
-        if (length >= (l_int32)(fract * w + 0.5)) {
-            ylast = i;
-            xfirst = xstart;
-            xlast = xfirst + length - 1;
-            break;
-        }
-    }
-
-         /* Continue up until the condition fails */
-    w2 = xlast - xfirst + 1;
-    h2 = ylast + 1;  /* initialize */
-    for (i = ylast - 1; i >= 0; i--) {
-        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
-        if (xstart > xfirst || (xstart + length - 1 < xlast) ||
-            i == 0) {
-            yfirst = i + 1;
-            h2 = ylast - yfirst + 1;
-            break;
-        }
-    }
-    box2 = boxCreate(xfirst, yfirst, w2, h2);
-    pixDestroy(&pix2);
-
-    if (pixadb) {
-        pixRenderBoxArb(pixdb1, box1, 2, 255, 0, 0);
-        pixRenderBoxArb(pixdb1, box2, 2, 0, 255, 0);
-        pixaAddPix(pixadb, pixdb1, L_INSERT);
-    }
-
-        /* Select the final result from the two boxes */
-    if (select == L_GEOMETRIC_UNION)
-        box3 = boxBoundingRegion(box1, box2);
-    else if (select == L_GEOMETRIC_INTERSECTION)
-        box3 = boxOverlapRegion(box1, box2);
-    else if (select == L_LARGEST_AREA)
-        box3 = (w1 * h1 >= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
-    else  /* select == L_SMALLEST_AREA) */
-        box3 = (w1 * h1 <= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
-    boxDestroy(&box1);
-    boxDestroy(&box2);
-
-        /* Rotate the box 90 degrees ccw if necessary */
-    box4 = NULL;
-    if (box3) {
-        if (dir == L_SCAN_VERTICAL)
-            box4 = boxRotateOrth(box3, w, h, 3);
-        else
-            box4 = boxCopy(box3);
-    }
-
-        /* Transform back to global coordinates if %boxs exists */
-    box5 = (box4) ? boxTransform(box4, x, y, 1.0, 1.0) : NULL;
-    boxDestroy(&box3);
-    boxDestroy(&box4);
-
-        /* Debug output */
-    if (pixadb) {
-        pixdb1 = pixConvertTo8(pixs, 0);
-        pixAddConstantGray(pixdb1, 190);
-        pixdb2 = pixConvertTo32(pixdb1);
-        if (box5) pixRenderBoxArb(pixdb2, box5, 4, 0, 0, 255);
-        pixaAddPix(pixadb, pixdb2, L_INSERT);
-        res = pixGetXRes(pixs);
-        L_INFO("Writing debug files to /tmp/lept/rect/\n", procName);
-        pixaConvertToPdf(pixadb, res, 1.0, L_DEFAULT_ENCODE, 75, NULL,
-                        "/tmp/lept/rect/fitrect.pdf");
-        pix1 = pixaDisplayTiledAndScaled(pixadb, 32, 800, 1, 0, 40, 2);
-        pixWrite("/tmp/lept/rect/fitrect.png", pix1, IFF_PNG);
-        pixDestroy(&pix1);
-        pixDestroy(&pixdb1);
-        pixaDestroy(&pixadb);
-    }
-
-    return box5;
-}
-
-/*------------------------------------------------------------------*
- *                    Automatic photoinvert for OCR                 *
- *------------------------------------------------------------------*/
-/*!
- * \brief   pixAutoPhotoinvert()
- *
- * \param[in]    pixs       any depth, colormap ok
- * \param[in]    thresh     binarization threshold; use 0 for default
- * \param[out]   ppixm      [optional] image regions to be inverted
- * \param[out]   pixadb     [optional] debug; input NULL to skip
- * \return  pixd   1 bpp image to be sent to OCR, or NULL on error
- *
- * 
- * Notes:
- *      (1) A 1 bpp image is returned, where pixels in image regions are
- *          photo-inverted.
- *      (2) If there is light text with a dark background, this will
- *          identify the region and photoinvert the pixels there if
- *          there are at least 60% fg pixels in the region.
- *      (3) For debug output, input a (typically empty) %pixadb.
- * 
- */ -PIX * -pixAutoPhotoinvert(PIX *pixs, - l_int32 thresh, - PIX **ppixm, - PIXA *pixadb) -{ -l_int32 i, n, empty, x, y, w, h; -l_float32 fgfract; -BOX *box1; -BOXA *boxa1; -PIX *pix1, *pix2, *pix3, *pix4, *pix5; -PIXA *pixa1; - - PROCNAME("pixAutoPhotoinvert"); - - if (ppixm) *ppixm = NULL; - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (thresh == 0) thresh = 128; - - if ((pix1 = pixConvertTo1(pixs, thresh)) == NULL) - return (PIX *)ERROR_PTR("pix1 not made", procName, NULL); - if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); - - /* Make the halftone mask to identify region for photo-inversion */ - pix2 = pixGenerateHalftoneMask(pix1, NULL, NULL, pixadb); - pix3 = pixMorphSequence(pix2, "o15.15 + c25.25", 0); /* clean it up */ - if (pixadb) { - pixaAddPix(pixadb, pix2, L_CLONE); - pixaAddPix(pixadb, pix3, L_COPY); - } - pixDestroy(&pix2); - pixZero(pix3, &empty); - if (empty) { - pixDestroy(&pix3); - return pix1; - } - - /* Examine each component and validate the inversion. - * Require at least 60% of pixels under each component to be FG. */ - boxa1 = pixConnCompBB(pix3, 8); - n = boxaGetCount(boxa1); - for (i = 0; i < n; i++) { - box1 = boxaGetBox(boxa1, i, L_COPY); - pix5 = pixClipRectangle(pix1, box1, NULL); - pixForegroundFraction(pix5, &fgfract); - if (pixadb) lept_stderr("fg fraction: %5.3f\n", fgfract); - if (fgfract < 0.6) { /* erase from the mask */ - boxGetGeometry(box1, &x, &y, &w, &h); - pixRasterop(pix3, x, y, w, h, PIX_CLR, NULL, 0, 0); - } - pixDestroy(&pix5); - boxDestroy(&box1); - } - boxaDestroy(&boxa1); - pixZero(pix3, &empty); - if (empty) { - pixDestroy(&pix3); - return pix1; - } - - /* Combine pixels of the photo-inverted pix with the binarized input */ - pix4 = pixInvert(NULL, pix1); - pixCombineMasked(pix1, pix4, pix3); - - if (pixadb) { - pixaAddPix(pixadb, pix4, L_CLONE); - pixaAddPix(pixadb, pix1, L_COPY); - } - pixDestroy(&pix4); - if (ppixm) - *ppixm = pix3; - else - pixDestroy(&pix3); - return pix1; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/paintcmap.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/paintcmap.c deleted file mode 100644 index b0f27982..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/paintcmap.c +++ /dev/null @@ -1,765 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file paintcmap.c - *
- *
- *      These in-place functions paint onto colormap images.
- *
- *      Repaint selected pixels in region
- *           l_int32     pixSetSelectCmap()
- *
- *      Repaint non-white pixels in region
- *           l_int32     pixColorGrayRegionsCmap()
- *           l_int32     pixColorGrayCmap()
- *           l_int32     pixColorGrayMaskedCmap()
- *           l_int32     addColorizedGrayToCmap()
- *
- *      Repaint selected pixels through mask
- *           l_int32     pixSetSelectMaskedCmap()
- *
- *      Repaint all pixels through mask
- *           l_int32     pixSetMaskedCmap()
- *
- *
- *  The 'set select' functions condition the setting on a specific
- *  pixel value (i.e., index into the colormap) of the underyling
- *  Pix that is being modified.  The same conditioning is used in
- *  pixBlendCmap().
- *
- *  The pixColorGrayCmap() function sets all truly gray (r = g = b) pixels,
- *  with the exception of either black or white pixels, to a new color.
- *
- *  The pixSetSelectMaskedCmap() function conditions pixel painting
- *  on both a specific pixel value and location within the fg mask.
- *  By contrast, pixSetMaskedCmap() sets all pixels under the
- *  mask foreground, without considering the initial pixel values.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*-------------------------------------------------------------* - * Repaint selected pixels in region * - *-------------------------------------------------------------*/ -/*! - * \brief pixSetSelectCmap() - * - * \param[in] pixs 1, 2, 4 or 8 bpp, with colormap - * \param[in] box [optional] region to set color; can be NULL - * \param[in] sindex colormap index of pixels to be changed - * \param[in] rval, gval, bval new color to paint - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is an in-place operation.
- *      (2) It sets all pixels in region that have the color specified
- *          by the colormap index %sindex to the new color.
- *      (3) %sindex must be in the existing colormap; otherwise an
- *          error is returned.
- *      (4) If the new color exists in the colormap, it is used;
- *          otherwise, it is added to the colormap.  If it cannot be
- *          added because the colormap is full, an error is returned.
- *      (5) If %box is NULL, applies function to the entire image; otherwise,
- *          clips the operation to the intersection of the box and pix.
- *      (6) An example of use would be to set to a specific color all
- *          the light (background) pixels within a certain region of
- *          a 3-level 2 bpp image, while leaving light pixels outside
- *          this region unchanged.
- * 
- */ -l_ok -pixSetSelectCmap(PIX *pixs, - BOX *box, - l_int32 sindex, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -l_int32 i, j, w, h, d, n, x1, y1, x2, y2, bw, bh, val, wpls; -l_int32 index; /* of new color to be set */ -l_uint32 *lines, *datas; -PIXCMAP *cmap; - - PROCNAME("pixSetSelectCmap"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if ((cmap = pixGetColormap(pixs)) == NULL) - return ERROR_INT("no colormap", procName, 1); - d = pixGetDepth(pixs); - if (d != 1 && d != 2 && d != 4 && d != 8) - return ERROR_INT("depth not in {1,2,4,8}", procName, 1); - - /* Add new color if necessary; get index of this color in cmap */ - n = pixcmapGetCount(cmap); - if (sindex >= n) - return ERROR_INT("sindex too large; no cmap entry", procName, 1); - if (pixcmapGetIndex(cmap, rval, gval, bval, &index)) { /* not found */ - if (pixcmapAddColor(cmap, rval, gval, bval)) - return ERROR_INT("error adding cmap entry", procName, 1); - else - index = n; /* we've added one color */ - } - - /* Determine the region of substitution */ - pixGetDimensions(pixs, &w, &h, NULL); - if (!box) { - x1 = y1 = 0; - x2 = w; - y2 = h; - } else { - boxGetGeometry(box, &x1, &y1, &bw, &bh); - x2 = x1 + bw - 1; - y2 = y1 + bh - 1; - } - - /* Replace pixel value sindex by index in the region */ - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - for (i = y1; i <= y2; i++) { - if (i < 0 || i >= h) /* clip */ - continue; - lines = datas + i * wpls; - for (j = x1; j <= x2; j++) { - if (j < 0 || j >= w) /* clip */ - continue; - switch (d) { - case 1: - val = GET_DATA_BIT(lines, j); - if (val == sindex) { - if (index == 0) - CLEAR_DATA_BIT(lines, j); - else - SET_DATA_BIT(lines, j); - } - break; - case 2: - val = GET_DATA_DIBIT(lines, j); - if (val == sindex) - SET_DATA_DIBIT(lines, j, index); - break; - case 4: - val = GET_DATA_QBIT(lines, j); - if (val == sindex) - SET_DATA_QBIT(lines, j, index); - break; - case 8: - val = GET_DATA_BYTE(lines, j); - if (val == sindex) - SET_DATA_BYTE(lines, j, index); - break; - default: - return ERROR_INT("depth not in {1,2,4,8}", procName, 1); - } - } - } - - return 0; -} - - -/*-------------------------------------------------------------* - * Repaint gray pixels in region * - *-------------------------------------------------------------*/ -/*! - * \brief pixColorGrayRegionsCmap() - * - * \param[in] pixs 8 bpp, with colormap - * \param[in] boxa of regions in which to apply color - * \param[in] type L_PAINT_LIGHT, L_PAINT_DARK - * \param[in] rval, gval, bval target color - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is an in-place operation.
- *      (2) If %type == L_PAINT_LIGHT, it colorizes non-black pixels,
- *          preserving antialiasing.
- *          If %type == L_PAINT_DARK, it colorizes non-white pixels,
- *          preserving antialiasing.  See pixColorGrayCmap() for details.
- *      (3) This can also be called through pixColorGrayRegions().
- *      (4) This increases the colormap size by the number of
- *          different gray (non-black or non-white) colors in the
- *          selected regions of pixs.  If there is not enough room in
- *          the colormap for this expansion, it returns 1 (error),
- *          and the caller should check the return value.
- *      (5) Because two boxes in %boxa can overlap, pixels that
- *          are colorized in the first box must be excluded in the
- *          second because their value exceeds the size of the map.
- * 
- */ -l_ok -pixColorGrayRegionsCmap(PIX *pixs, - BOXA *boxa, - l_int32 type, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -l_int32 i, j, k, w, h, n, nc, x1, y1, x2, y2, bw, bh, wpl; -l_int32 val, nval; -l_int32 *map; -l_uint32 *line, *data; -BOX *box; -NUMA *na; -PIXCMAP *cmap; - - PROCNAME("pixColorGrayRegionsCmap"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if ((cmap = pixGetColormap(pixs)) == NULL) - return ERROR_INT("no colormap", procName, 1); - if (pixGetDepth(pixs) != 8) - return ERROR_INT("depth not 8 bpp", procName, 1); - if (type != L_PAINT_DARK && type != L_PAINT_LIGHT) - return ERROR_INT("invalid type", procName, 1); - - nc = pixcmapGetCount(cmap); - if (addColorizedGrayToCmap(cmap, type, rval, gval, bval, &na)) - return ERROR_INT("no room; cmap full", procName, 1); - map = numaGetIArray(na); - numaDestroy(&na); - if (!map) - return ERROR_INT("map not made", procName, 1); - - pixGetDimensions(pixs, &w, &h, NULL); - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - n = boxaGetCount(boxa); - for (k = 0; k < n; k++) { - box = boxaGetBox(boxa, k, L_CLONE); - boxGetGeometry(box, &x1, &y1, &bw, &bh); - x2 = x1 + bw - 1; - y2 = y1 + bh - 1; - - /* Remap gray pixels in the region */ - for (i = y1; i <= y2; i++) { - if (i < 0 || i >= h) /* clip */ - continue; - line = data + i * wpl; - for (j = x1; j <= x2; j++) { - if (j < 0 || j >= w) /* clip */ - continue; - val = GET_DATA_BYTE(line, j); - if (val >= nc) continue; /* from overlapping b.b. */ - nval = map[val]; - if (nval != 256) - SET_DATA_BYTE(line, j, nval); - } - } - boxDestroy(&box); - } - - LEPT_FREE(map); - return 0; -} - - -/*! - * \brief pixColorGrayCmap() - * - * \param[in] pixs 2, 4 or 8 bpp, with colormap - * \param[in] box [optional] region to set color; can be NULL - * \param[in] type L_PAINT_LIGHT, L_PAINT_DARK - * \param[in] rval, gval, bval target color - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is an in-place operation.
- *      (2) If %type == L_PAINT_LIGHT, it colorizes non-black pixels,
- *          preserving antialiasing.
- *          If %type == L_PAINT_DARK, it colorizes non-white pixels,
- *          preserving antialiasing.
- *      (3) %box gives the region to apply color; if NULL, this
- *          colorizes the entire image.
- *      (4) If the cmap is only 2 or 4 bpp, pixs is converted in-place
- *          to an 8 bpp cmap.  A 1 bpp cmap is not a valid input pix.
- *      (5) This can also be called through pixColorGray().
- *      (6) This operation increases the colormap size by the number of
- *          different gray (non-black or non-white) colors in the
- *          input colormap.  If there is not enough room in the colormap
- *          for this expansion, it returns 1 (error), and the caller
- *          should check the return value.
- *      (7) Using the darkness of each original pixel in the rect,
- *          it generates a new color (based on the input rgb values).
- *          If %type == L_PAINT_LIGHT, the new color is a (generally)
- *          darken-to-black version of the input rgb color, where the
- *          amount of darkening increases with the darkness of the
- *          original pixel color.
- *          If %type == L_PAINT_DARK, the new color is a (generally)
- *          faded-to-white version of the input rgb color, where the
- *          amount of fading increases with the brightness of the
- *          original pixel color.
- * 
- */ -l_ok -pixColorGrayCmap(PIX *pixs, - BOX *box, - l_int32 type, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -l_int32 w, h, d, ret; -PIX *pixt; -BOXA *boxa; -PIXCMAP *cmap; - - PROCNAME("pixColorGrayCmap"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if ((cmap = pixGetColormap(pixs)) == NULL) - return ERROR_INT("no colormap", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 2 && d != 4 && d != 8) - return ERROR_INT("depth not in {2, 4, 8}", procName, 1); - if (type != L_PAINT_DARK && type != L_PAINT_LIGHT) - return ERROR_INT("invalid type", procName, 1); - - /* If 2 bpp or 4 bpp, convert in-place to 8 bpp. */ - if (d == 2 || d == 4) { - pixt = pixConvertTo8(pixs, 1); - pixTransferAllData(pixs, &pixt, 0, 0); - } - - /* If box == NULL, color the entire image */ - boxa = boxaCreate(1); - if (box) { - boxaAddBox(boxa, box, L_COPY); - } else { - box = boxCreate(0, 0, w, h); - boxaAddBox(boxa, box, L_INSERT); - } - ret = pixColorGrayRegionsCmap(pixs, boxa, type, rval, gval, bval); - - boxaDestroy(&boxa); - return ret; -} - - -/*! - * \brief pixColorGrayMaskedCmap() - * - * \param[in] pixs 8 bpp, with colormap - * \param[in] pixm 1 bpp mask, through which to apply color - * \param[in] type L_PAINT_LIGHT, L_PAINT_DARK - * \param[in] rval, gval, bval target color - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is an in-place operation.
- *      (2) If %type == L_PAINT_LIGHT, it colorizes non-black pixels,
- *          preserving antialiasing.
- *          If %type == L_PAINT_DARK, it colorizes non-white pixels,
- *          preserving antialiasing.  See pixColorGrayCmap() for details.
- *      (3) This increases the colormap size by the number of
- *          different gray (non-black or non-white) colors in the
- *          input colormap.  If there is not enough room in the colormap
- *          for this expansion, it returns 1 (error).
- * 
- */ -l_ok -pixColorGrayMaskedCmap(PIX *pixs, - PIX *pixm, - l_int32 type, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -l_int32 i, j, w, h, wm, hm, wmin, hmin, wpl, wplm; -l_int32 val, nval; -l_int32 *map; -l_uint32 *line, *data, *linem, *datam; -NUMA *na; -PIXCMAP *cmap; - - PROCNAME("pixColorGrayMaskedCmap"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixm || pixGetDepth(pixm) != 1) - return ERROR_INT("pixm undefined or not 1 bpp", procName, 1); - if ((cmap = pixGetColormap(pixs)) == NULL) - return ERROR_INT("no colormap", procName, 1); - if (pixGetDepth(pixs) != 8) - return ERROR_INT("depth not 8 bpp", procName, 1); - if (type != L_PAINT_DARK && type != L_PAINT_LIGHT) - return ERROR_INT("invalid type", procName, 1); - - if (addColorizedGrayToCmap(cmap, type, rval, gval, bval, &na)) - return ERROR_INT("no room; cmap full", procName, 1); - map = numaGetIArray(na); - numaDestroy(&na); - if (!map) - return ERROR_INT("map not made", procName, 1); - - pixGetDimensions(pixs, &w, &h, NULL); - pixGetDimensions(pixm, &wm, &hm, NULL); - if (wm != w) - L_WARNING("wm = %d differs from w = %d\n", procName, wm, w); - if (hm != h) - L_WARNING("hm = %d differs from h = %d\n", procName, hm, h); - wmin = L_MIN(w, wm); - hmin = L_MIN(h, hm); - - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - datam = pixGetData(pixm); - wplm = pixGetWpl(pixm); - - /* Remap gray pixels in the region */ - for (i = 0; i < hmin; i++) { - line = data + i * wpl; - linem = datam + i * wplm; - for (j = 0; j < wmin; j++) { - if (GET_DATA_BIT(linem, j) == 0) - continue; - val = GET_DATA_BYTE(line, j); - nval = map[val]; - if (nval != 256) - SET_DATA_BYTE(line, j, nval); - } - } - - LEPT_FREE(map); - return 0; -} - - -/*! - * \brief addColorizedGrayToCmap() - * - * \param[in] cmap from 2 or 4 bpp pix - * \param[in] type L_PAINT_LIGHT, L_PAINT_DARK - * \param[in] rval, gval, bval target color - * \param[out] pna [optional] table for mapping new cmap entries - * \return 0 if OK; 1 on error; 2 if new colors will not fit in cmap. - * - *
- * Notes:
- *      (1) If %type == L_PAINT_LIGHT, it colorizes non-black pixels,
- *          preserving antialiasing.
- *          If %type == L_PAINT_DARK, it colorizes non-white pixels,
- *          preserving antialiasing.
- *      (2) This increases the colormap size by the number of
- *          different gray (non-black or non-white) colors in the
- *          input colormap.  If there is not enough room in the colormap
- *          for this expansion, it returns 1 (treated as a warning);
- *          the caller should check the return value.
- *      (3) This can be used to determine if the new colors will fit in
- *          the cmap, using null for &na.  Returns 0 if they fit; 2 if
- *          they don't fit.
- *      (4) The mapping table contains, for each gray color found, the
- *          index of the corresponding colorized pixel.  Non-gray
- *          pixels are assigned the invalid index 256.
- *      (5) See pixColorGrayCmap() for usage.
- * 
- */ -l_ok -addColorizedGrayToCmap(PIXCMAP *cmap, - l_int32 type, - l_int32 rval, - l_int32 gval, - l_int32 bval, - NUMA **pna) -{ -l_int32 i, n, erval, egval, ebval, nrval, ngval, nbval, newindex; -NUMA *na; - - PROCNAME("addColorizedGrayToCmap"); - - if (pna) *pna = NULL; - if (!cmap) - return ERROR_INT("cmap not defined", procName, 1); - if (type != L_PAINT_DARK && type != L_PAINT_LIGHT) - return ERROR_INT("invalid type", procName, 1); - - n = pixcmapGetCount(cmap); - na = numaCreate(n); - for (i = 0; i < n; i++) { - pixcmapGetColor(cmap, i, &erval, &egval, &ebval); - if (type == L_PAINT_LIGHT) { - if (erval == egval && erval == ebval && erval != 0) { - nrval = (l_int32)(rval * (l_float32)erval / 255.); - ngval = (l_int32)(gval * (l_float32)egval / 255.); - nbval = (l_int32)(bval * (l_float32)ebval / 255.); - if (pixcmapAddNewColor(cmap, nrval, ngval, nbval, &newindex)) { - numaDestroy(&na); - L_WARNING("no room; colormap full\n", procName); - return 2; - } - numaAddNumber(na, newindex); - } else { - numaAddNumber(na, 256); /* invalid number; not gray */ - } - } else { /* L_PAINT_DARK */ - if (erval == egval && erval == ebval && erval != 255) { - nrval = rval + - (l_int32)((255. - rval) * (l_float32)erval / 255.); - ngval = gval + - (l_int32)((255. - gval) * (l_float32)egval / 255.); - nbval = bval + - (l_int32)((255. - bval) * (l_float32)ebval / 255.); - if (pixcmapAddNewColor(cmap, nrval, ngval, nbval, &newindex)) { - numaDestroy(&na); - L_WARNING("no room; colormap full\n", procName); - return 2; - } - numaAddNumber(na, newindex); - } else { - numaAddNumber(na, 256); /* invalid number; not gray */ - } - } - } - - if (pna) - *pna = na; - else - numaDestroy(&na); - return 0; -} - - -/*-------------------------------------------------------------* - * Repaint selected pixels through mask * - *-------------------------------------------------------------*/ -/*! - * \brief pixSetSelectMaskedCmap() - * - * \param[in] pixs 2, 4 or 8 bpp, with colormap - * \param[in] pixm [optional] 1 bpp mask; no-op if NULL - * \param[in] x, y UL corner of mask relative to pixs - * \param[in] sindex cmap index of pixels in pixs to be changed - * \param[in] rval, gval, bval new color to substitute - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is an in-place operation.
- *      (2) This paints through the fg of pixm and replaces all pixels
- *          in pixs that have the value %sindex with the new color.
- *      (3) If pixm == NULL, a warning is given.
- *      (4) %sindex must be in the existing colormap; otherwise an
- *          error is returned.
- *      (5) If the new color exists in the colormap, it is used;
- *          otherwise, it is added to the colormap.  If the colormap
- *          is full, an error is returned.
- * 
- */ -l_ok -pixSetSelectMaskedCmap(PIX *pixs, - PIX *pixm, - l_int32 x, - l_int32 y, - l_int32 sindex, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -l_int32 i, j, w, h, d, n, wm, hm, wpls, wplm, val; -l_int32 index; /* of new color to be set */ -l_uint32 *lines, *linem, *datas, *datam; -PIXCMAP *cmap; - - PROCNAME("pixSetSelectMaskedCmap"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if ((cmap = pixGetColormap(pixs)) == NULL) - return ERROR_INT("no colormap", procName, 1); - if (!pixm) { - L_WARNING("no mask; nothing to do\n", procName); - return 0; - } - - d = pixGetDepth(pixs); - if (d != 2 && d != 4 && d != 8) - return ERROR_INT("depth not in {2, 4, 8}", procName, 1); - - /* add new color if necessary; get index of this color in cmap */ - n = pixcmapGetCount(cmap); - if (sindex >= n) - return ERROR_INT("sindex too large; no cmap entry", procName, 1); - if (pixcmapGetIndex(cmap, rval, gval, bval, &index)) { /* not found */ - if (pixcmapAddColor(cmap, rval, gval, bval)) - return ERROR_INT("error adding cmap entry", procName, 1); - else - index = n; /* we've added one color */ - } - - /* replace pixel value sindex by index when fg pixel in pixmc - * overlays it */ - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - wm = pixGetWidth(pixm); - hm = pixGetHeight(pixm); - datam = pixGetData(pixm); - wplm = pixGetWpl(pixm); - for (i = 0; i < hm; i++) { - if (i + y < 0 || i + y >= h) continue; - lines = datas + (y + i) * wpls; - linem = datam + i * wplm; - for (j = 0; j < wm; j++) { - if (j + x < 0 || j + x >= w) continue; - if (GET_DATA_BIT(linem, j)) { - switch (d) { - case 2: - val = GET_DATA_DIBIT(lines, x + j); - if (val == sindex) - SET_DATA_DIBIT(lines, x + j, index); - break; - case 4: - val = GET_DATA_QBIT(lines, x + j); - if (val == sindex) - SET_DATA_QBIT(lines, x + j, index); - break; - case 8: - val = GET_DATA_BYTE(lines, x + j); - if (val == sindex) - SET_DATA_BYTE(lines, x + j, index); - break; - default: - return ERROR_INT("depth not in {1,2,4,8}", procName, 1); - } - } - } - } - - return 0; -} - - -/*-------------------------------------------------------------* - * Repaint all pixels through mask * - *-------------------------------------------------------------*/ -/*! - * \brief pixSetMaskedCmap() - * - * \param[in] pixs 2, 4 or 8 bpp, colormapped - * \param[in] pixm [optional] 1 bpp mask; no-op if NULL - * \param[in] x, y origin of pixm relative to pixs; - * can be negative - * \param[in] rval, gval, bval new color to set at each masked pixel - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This is an in-place operation.
- *      (2) It paints a single color through the mask (as a stencil).
- *      (3) The mask origin is placed at (%x,%y) on %pixs, and the
- *          operation is clipped to the intersection of the mask and pixs.
- *      (4) If %pixm == NULL, a warning is given.
- *      (5) Typically, %pixm is a small binary mask located somewhere
- *          on the larger %pixs.
- *      (6) If the color is in the colormap, it is used.  Otherwise,
- *          it is added if possible; an error is returned if the
- *          colormap is already full.
- * 
- */ -l_ok -pixSetMaskedCmap(PIX *pixs, - PIX *pixm, - l_int32 x, - l_int32 y, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -l_int32 w, h, d, wpl, wm, hm, wplm; -l_int32 i, j, index; -l_uint32 *data, *datam, *line, *linem; -PIXCMAP *cmap; - - PROCNAME("pixSetMaskedCmap"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if ((cmap = pixGetColormap(pixs)) == NULL) - return ERROR_INT("no colormap in pixs", procName, 1); - if (!pixm) { - L_WARNING("no mask; nothing to do\n", procName); - return 0; - } - d = pixGetDepth(pixs); - if (d != 2 && d != 4 && d != 8) - return ERROR_INT("depth not in {2,4,8}", procName, 1); - if (pixGetDepth(pixm) != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - - /* Add new color if necessary; store in 'index' */ - if (pixcmapGetIndex(cmap, rval, gval, bval, &index)) { /* not found */ - if (pixcmapAddColor(cmap, rval, gval, bval)) - return ERROR_INT("no room in cmap", procName, 1); - index = pixcmapGetCount(cmap) - 1; - } - - pixGetDimensions(pixs, &w, &h, NULL); - wpl = pixGetWpl(pixs); - data = pixGetData(pixs); - pixGetDimensions(pixm, &wm, &hm, NULL); - wplm = pixGetWpl(pixm); - datam = pixGetData(pixm); - for (i = 0; i < hm; i++) { - if (i + y < 0 || i + y >= h) continue; - line = data + (i + y) * wpl; - linem = datam + i * wplm; - for (j = 0; j < wm; j++) { - if (j + x < 0 || j + x >= w) continue; - if (GET_DATA_BIT(linem, j)) { /* paint color */ - switch (d) { - case 2: - SET_DATA_DIBIT(line, j + x, index); - break; - case 4: - SET_DATA_QBIT(line, j + x, index); - break; - case 8: - SET_DATA_BYTE(line, j + x, index); - break; - default: - return ERROR_INT("depth not in {2,4,8}", procName, 1); - } - } - } - } - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/parseprotos.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/parseprotos.c deleted file mode 100644 index bc1a8969..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/parseprotos.c +++ /dev/null @@ -1,978 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/* - * \file parseprotos.c - *
- *
- *       char             *parseForProtos()
- *
- *    Static helpers
- *       static l_int32    getNextNonCommentLine()
- *       static l_int32    getNextNonBlankLine()
- *       static l_int32    getNextNonDoubleSlashLine()
- *       static l_int32    searchForProtoSignature()
- *       static char      *captureProtoSignature()
- *       static char      *cleanProtoSignature()
- *       static l_int32    skipToEndOfFunction()
- *       static l_int32    skipToMatchingBrace()
- *       static l_int32    skipToSemicolon()
- *       static l_int32    getOffsetForCharacter()
- *       static l_int32    getOffsetForMatchingRP()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static const l_int32 Bufsize = 2048; /* max token size */ - -static l_int32 getNextNonCommentLine(SARRAY *sa, l_int32 start, l_int32 *pnext); -static l_int32 getNextNonBlankLine(SARRAY *sa, l_int32 start, l_int32 *pnext); -static l_int32 getNextNonDoubleSlashLine(SARRAY *sa, l_int32 start, - l_int32 *pnext); -static l_int32 searchForProtoSignature(SARRAY *sa, l_int32 begin, - l_int32 *pstart, l_int32 *pstop, l_int32 *pcharindex, - l_int32 *pfound); -static char * captureProtoSignature(SARRAY *sa, l_int32 start, l_int32 stop, - l_int32 charindex); -static char * cleanProtoSignature(char *str); -static l_int32 skipToEndOfFunction(SARRAY *sa, l_int32 start, - l_int32 charindex, l_int32 *pnext); -static l_int32 skipToMatchingBrace(SARRAY *sa, l_int32 start, - l_int32 lbindex, l_int32 *prbline, l_int32 *prbindex); -static l_int32 skipToSemicolon(SARRAY *sa, l_int32 start, - l_int32 charindex, l_int32 *pnext); -static l_int32 getOffsetForCharacter(SARRAY *sa, l_int32 start, char tchar, - l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset); -static l_int32 getOffsetForMatchingRP(SARRAY *sa, l_int32 start, - l_int32 soffsetlp, l_int32 boffsetlp, l_int32 toffsetlp, - l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset); - - -/* - * \brief parseForProtos() - * - * \param[in] filein output of cpp - * \param[in] prestring [optional] string that prefaces each decl; - * use NULL to omit - * \return parsestr string of function prototypes, or NULL on error - * - *
- * Notes:
- *      (1) We parse the output of cpp:
- *              cpp -ansi 
- *          Three plans were attempted, with success on the third.
- *      (2) Plan 1.  A cursory examination of the cpp output indicated that
- *          every function was preceded by a cpp comment statement.
- *          So we just need to look at statements beginning after comments.
- *          Unfortunately, this is NOT the case.  Some functions start
- *          without cpp comment lines, typically when there are no
- *          comments in the source that immediately precede the function.
- *      (3) Plan 2.  Consider the keywords in the language that start
- *          parts of the cpp file.  Some, like 'enum', 'union' and
- *          'struct', are followed after a while by '{', and eventually
- *          end with '}, plus an optional token and a final ';'.
- *          Others, like 'extern', 'static' and 'typedef', are never
- *          the beginnings of global function definitions.   Function
- *          prototypes have one or more sets of '(' followed eventually
- *          by a ')', and end with ';'.  But function definitions have
- *          tokens, followed by '(', more tokens, ')' and then
- *          immediately a '{'.  We would generate a prototype from this
- *          by adding a ';' to all tokens up to the ')'.  So we use
- *          these special tokens to decide what we are parsing.  And
- *          whenever a function definition is found and the prototype
- *          extracted, we skip through the rest of the function
- *          past the corresponding '}'.  This token ends a line, and
- *          is often on a line of its own.  But as it turns out,
- *          the only keyword we need to consider is 'static'.
- *      (4) Plan 3.  Consider the parentheses and braces for various
- *          declarations.  A struct, enum, or union has a pair of
- *          braces followed by a semicolon.  With the exception of an
- *          __attribute__ declaration for a struct, they cannot have parentheses
- *          before the left brace, but a struct can have lots of parentheses
- *          within the brace set.  A function prototype has no braces.
- *          A function declaration can have sets of left and right
- *          parentheses, but these are followed by a left brace.
- *          So plan 3 looks at the way parentheses and braces are
- *          organized.  Once the beginning of a function definition
- *          is found, the prototype is extracted and we search for
- *          the ending right brace.
- *      (5) To find the ending right brace, it is necessary to do some
- *          careful parsing.  For example, in this file, we have
- *          left and right braces as characters, and these must not
- *          be counted.  Somewhat more tricky, the file fhmtauto.c
- *          generates code, and includes a right brace in a string.
- *          So we must not include braces that are in strings.  But how
- *          do we know if something is inside a string?  Keep state,
- *          starting with not-inside, and every time you hit a double quote
- *          that is not escaped, toggle the condition.  Any brace
- *          found in the state of being within a string is ignored.
- *      (6) When a prototype is extracted, it is put in a canonical
- *          form (i.e., cleaned up).  Finally, we check that it is
- *          not static and save it.  (If static, it is ignored).
- *      (7) The %prestring for unix is NULL; it is included here so that
- *          you can use Microsoft's declaration for importing or
- *          exporting to a dll.  See environ.h for examples of use.
- *          Here, we set: %prestring = "LEPT_DLL ".  Note in particular
- *          the space character that will separate 'LEPT_DLL' from
- *          the standard unix prototype that follows.
- * 
- */ -char * -parseForProtos(const char *filein, - const char *prestring) -{ -char *strdata, *str, *newstr, *parsestr, *secondword; -l_int32 start, next, stop, charindex, found; -size_t nbytes; -SARRAY *sa, *saout, *satest; - - PROCNAME("parseForProtos"); - - if (!filein) - return (char *)ERROR_PTR("filein not defined", procName, NULL); - - /* Read in the cpp output into memory, one string for each - * line in the file, omitting blank lines. */ - strdata = (char *)l_binaryRead(filein, &nbytes); - sa = sarrayCreateLinesFromString(strdata, 0); - - saout = sarrayCreate(0); - next = 0; - while (1) { /* repeat after each non-static prototype is extracted */ - searchForProtoSignature(sa, next, &start, &stop, &charindex, &found); - if (!found) - break; -/* lept_stderr(" start = %d, stop = %d, charindex = %d\n", - start, stop, charindex); */ - str = captureProtoSignature(sa, start, stop, charindex); - - /* Make sure that the signature found by cpp does not begin with - * static, extern or typedef. We get 'extern' declarations - * from header files, and with some versions of cpp running on - * #include we get something of the form: - * extern ... (( ... )) ... ( ... ) { ... - * For this, the 1st '(' is the lp, the 2nd ')' is the rp, - * and there is a lot of garbage between the rp and the lp. - * It is easiest to simply reject any signature that starts - * with 'extern'. Note also that an 'extern' token has been - * prepended to each prototype, so the 'static' or - * 'extern' keywords we are looking for, if they exist, - * would be the second word. We also have a typedef in - * bmpio.c that has the form: - * typedef struct __attribute__((....)) { ...} ... ; - * This is avoided by blacklisting 'typedef' along with 'extern' - * and 'static'. */ - satest = sarrayCreateWordsFromString(str); - secondword = sarrayGetString(satest, 1, L_NOCOPY); - if (strcmp(secondword, "static") && /* not static */ - strcmp(secondword, "extern") && /* not extern */ - strcmp(secondword, "typedef")) { /* not typedef */ - if (prestring) { /* prepend it to the prototype */ - newstr = stringJoin(prestring, str); - sarrayAddString(saout, newstr, L_INSERT); - LEPT_FREE(str); - } else { - sarrayAddString(saout, str, L_INSERT); - } - } else { - LEPT_FREE(str); - } - sarrayDestroy(&satest); - - skipToEndOfFunction(sa, stop, charindex, &next); - if (next == -1) break; - } - - /* Flatten into a string with newlines between prototypes */ - parsestr = sarrayToString(saout, 1); - LEPT_FREE(strdata); - sarrayDestroy(&sa); - sarrayDestroy(&saout); - - return parsestr; -} - - -/* - * \brief getNextNonCommentLine() - * - * \param[in] sa output from cpp, by line) - * \param[in] start starting index to search) - * \param[out] pnext index of first uncommented line after the start line - * \return 0 if OK, o on error - * - *
- * Notes:
- *      (1) Skips over all consecutive comment lines, beginning at 'start'
- *      (2) If all lines to the end are '#' comments, return next = -1
- * 
- */ -static l_int32 -getNextNonCommentLine(SARRAY *sa, - l_int32 start, - l_int32 *pnext) -{ -char *str; -l_int32 i, n; - - PROCNAME("getNextNonCommentLine"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!pnext) - return ERROR_INT("&pnext not defined", procName, 1); - - /* Init for situation where this line and all following are comments */ - *pnext = -1; - - n = sarrayGetCount(sa); - for (i = start; i < n; i++) { - if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL) - return ERROR_INT("str not returned; shouldn't happen", procName, 1); - if (str[0] != '#') { - *pnext = i; - return 0; - } - } - - return 0; -} - - -/* - * \brief getNextNonBlankLine() - * - * \param[in] sa output from cpp, by line - * \param[in] start starting index to search - * \param[out] pnext index of first nonblank line after the start line - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Skips over all consecutive blank lines, beginning at 'start'
- *      (2) A blank line has only whitespace characters (' ', '\t', '\n', '\r')
- *      (3) If all lines to the end are blank, return next = -1
- * 
- */ -static l_int32 -getNextNonBlankLine(SARRAY *sa, - l_int32 start, - l_int32 *pnext) -{ -char *str; -l_int32 i, j, n, len; - - PROCNAME("getNextNonBlankLine"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!pnext) - return ERROR_INT("&pnext not defined", procName, 1); - - /* Init for situation where this line and all following are blank */ - *pnext = -1; - - n = sarrayGetCount(sa); - for (i = start; i < n; i++) { - if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL) - return ERROR_INT("str not returned; shouldn't happen", procName, 1); - len = strlen(str); - for (j = 0; j < len; j++) { - if (str[j] != ' ' && str[j] != '\t' - && str[j] != '\n' && str[j] != '\r') { /* non-blank */ - *pnext = i; - return 0; - } - } - } - - return 0; -} - - -/* - * \brief getNextNonDoubleSlashLine() - * - * \param[in] sa output from cpp, by line - * \param[in] start starting index to search - * \param[out] pnext index of first uncommented line after the start line - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Skips over all consecutive '//' lines, beginning at 'start'
- *      (2) If all lines to the end start with '//', return next = -1
- * 
- */ -static l_int32 -getNextNonDoubleSlashLine(SARRAY *sa, - l_int32 start, - l_int32 *pnext) -{ -char *str; -l_int32 i, n, len; - - PROCNAME("getNextNonDoubleSlashLine"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!pnext) - return ERROR_INT("&pnext not defined", procName, 1); - - /* Init for situation where this line and all following - * start with '//' */ - *pnext = -1; - - n = sarrayGetCount(sa); - for (i = start; i < n; i++) { - if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL) - return ERROR_INT("str not returned; shouldn't happen", procName, 1); - len = strlen(str); - if (len < 2 || str[0] != '/' || str[1] != '/') { - *pnext = i; - return 0; - } - } - - return 0; -} - - -/* - * \brief searchForProtoSignature() - * - * \param[in] sa output from cpp, by line - * \param[in] begin beginning index to search - * \param[out] pstart starting index for function definition - * \param[out] pstop index of line on which proto is completed - * \param[out] pcharindex char index of completing ')' character - * \param[out] pfound 1 if valid signature is found; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If this returns found == 0, it means that there are no
- *          more function definitions in the file.  Caller must check
- *          this value and exit the loop over the entire cpp file.
- *      (2) This follows plan 3 (see above).  We skip comment and blank
- *          lines at the beginning.  Then we don't check for keywords.
- *          Instead, find the relative locations of the first occurrences
- *          of these four tokens: left parenthesis (lp), right
- *          parenthesis (rp), left brace (lb) and semicolon (sc).
- *      (3) The signature of a function definition looks like this:
- *               .... '(' .... ')' '{'
- *          where the lp and rp must both precede the lb, with only
- *          whitespace between the rp and the lb.  The '....'
- *          are sets of tokens that have no braces.
- *      (4) If a function definition is found, this returns found = 1,
- *          with 'start' being the first line of the definition and
- *          'charindex' being the position of the ')' in line 'stop'
- *          at the end of the arg list.
- * 
- */ -static l_int32 -searchForProtoSignature(SARRAY *sa, - l_int32 begin, - l_int32 *pstart, - l_int32 *pstop, - l_int32 *pcharindex, - l_int32 *pfound) -{ -l_int32 next, rbline, rbindex, scline; -l_int32 soffsetlp, soffsetrp, soffsetlb, soffsetsc; -l_int32 boffsetlp, boffsetrp, boffsetlb, boffsetsc; -l_int32 toffsetlp, toffsetrp, toffsetlb, toffsetsc; - - PROCNAME("searchForProtoSignature"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!pstart) - return ERROR_INT("&start not defined", procName, 1); - if (!pstop) - return ERROR_INT("&stop not defined", procName, 1); - if (!pcharindex) - return ERROR_INT("&charindex not defined", procName, 1); - if (!pfound) - return ERROR_INT("&found not defined", procName, 1); - - *pfound = FALSE; - - while (1) { - - /* Skip over sequential '#' comment lines */ - getNextNonCommentLine(sa, begin, &next); - if (next == -1) return 0; - if (next != begin) { - begin = next; - continue; - } - - /* Skip over sequential blank lines */ - getNextNonBlankLine(sa, begin, &next); - if (next == -1) return 0; - if (next != begin) { - begin = next; - continue; - } - - /* Skip over sequential lines starting with '//' */ - getNextNonDoubleSlashLine(sa, begin, &next); - if (next == -1) return 0; - if (next != begin) { - begin = next; - continue; - } - - /* Search for specific character sequence patterns; namely - * a lp, a matching rp, a lb and a semicolon. - * Abort the search if no lp is found. */ - getOffsetForCharacter(sa, next, '(', &soffsetlp, &boffsetlp, - &toffsetlp); - if (soffsetlp == -1) - break; - getOffsetForMatchingRP(sa, next, soffsetlp, boffsetlp, toffsetlp, - &soffsetrp, &boffsetrp, &toffsetrp); - getOffsetForCharacter(sa, next, '{', &soffsetlb, &boffsetlb, - &toffsetlb); - getOffsetForCharacter(sa, next, ';', &soffsetsc, &boffsetsc, - &toffsetsc); - - /* We've found a lp. Now weed out the case where a matching - * rp and a lb are not both found. */ - if (soffsetrp == -1 || soffsetlb == -1) - break; - - /* Check if a left brace occurs before a left parenthesis; - * if so, skip it */ - if (toffsetlb < toffsetlp) { - skipToMatchingBrace(sa, next + soffsetlb, boffsetlb, - &rbline, &rbindex); - skipToSemicolon(sa, rbline, rbindex, &scline); - begin = scline + 1; - continue; - } - - /* Check if a semicolon occurs before a left brace or - * a left parenthesis; if so, skip it */ - if ((soffsetsc != -1) && - (toffsetsc < toffsetlb || toffsetsc < toffsetlp)) { - skipToSemicolon(sa, next, 0, &scline); - begin = scline + 1; - continue; - } - - /* OK, it should be a function definition. We haven't - * checked that there is only white space between the - * rp and lb, but we've only seen problems with two - * extern inlines in sys/stat.h, and this is handled - * later by eliminating any prototype beginning with 'extern'. */ - *pstart = next; - *pstop = next + soffsetrp; - *pcharindex = boffsetrp; - *pfound = TRUE; - break; - } - - return 0; -} - - -/* - * \brief captureProtoSignature() - * - * \param[in] sa output from cpp, by line - * \param[in] start starting index to search; never a comment line - * \param[in] stop index of line on which pattern is completed - * \param[in] charindex char index of completing ')' character - * \return cleanstr prototype string, or NULL on error - * - *
- * Notes:
- *      (1) Return all characters, ending with a ';' after the ')'
- * 
- */ -static char * -captureProtoSignature(SARRAY *sa, - l_int32 start, - l_int32 stop, - l_int32 charindex) -{ -char *str, *newstr, *protostr, *cleanstr; -SARRAY *sap; -l_int32 i; - - PROCNAME("captureProtoSignature"); - - if (!sa) - return (char *)ERROR_PTR("sa not defined", procName, NULL); - - sap = sarrayCreate(0); - for (i = start; i < stop; i++) { - str = sarrayGetString(sa, i, L_COPY); - sarrayAddString(sap, str, L_INSERT); - } - str = sarrayGetString(sa, stop, L_COPY); - str[charindex + 1] = '\0'; - newstr = stringJoin(str, ";"); - sarrayAddString(sap, newstr, L_INSERT); - LEPT_FREE(str); - protostr = sarrayToString(sap, 2); - sarrayDestroy(&sap); - cleanstr = cleanProtoSignature(protostr); - LEPT_FREE(protostr); - - return cleanstr; -} - - -/* - * \brief cleanProtoSignature() - * - * \param[in] instr input prototype string - * \return cleanstr clean prototype string, or NULL on error - * - *
- * Notes:
- *      (1) Adds 'extern' at beginning and regularizes spaces
- *          between tokens.
- * 
- */ -static char * -cleanProtoSignature(char *instr) -{ -char *str, *cleanstr; -char buf[Bufsize]; -char externstring[] = "extern"; -l_int32 i, j, nwords, nchars, index, len; -SARRAY *sa, *saout; - - PROCNAME("cleanProtoSignature"); - - if (!instr) - return (char *)ERROR_PTR("instr not defined", procName, NULL); - - sa = sarrayCreateWordsFromString(instr); - nwords = sarrayGetCount(sa); - saout = sarrayCreate(0); - sarrayAddString(saout, externstring, L_COPY); - for (i = 0; i < nwords; i++) { - str = sarrayGetString(sa, i, L_NOCOPY); - nchars = strlen(str); - index = 0; - for (j = 0; j < nchars; j++) { - if (index > Bufsize - 6) { - sarrayDestroy(&sa); - sarrayDestroy(&saout); - return (char *)ERROR_PTR("token too large", procName, NULL); - } - if (str[j] == '(') { - buf[index++] = ' '; - buf[index++] = '('; - buf[index++] = ' '; - } else if (str[j] == ')') { - buf[index++] = ' '; - buf[index++] = ')'; - } else { - buf[index++] = str[j]; - } - } - buf[index] = '\0'; - sarrayAddString(saout, buf, L_COPY); - } - - /* Flatten to a prototype string with spaces added after - * each word, and remove the last space */ - cleanstr = sarrayToString(saout, 2); - len = strlen(cleanstr); - cleanstr[len - 1] = '\0'; - - sarrayDestroy(&sa); - sarrayDestroy(&saout); - return cleanstr; -} - - -/* - * \brief skipToEndOfFunction() - * - * \param[in] sa output from cpp, by line - * \param[in] start index of starting line with left bracket to search - * \param[in] lbindex starting char index for left bracket - * \param[out] pnext index of line following the ending '}' for function - * \return 0 if OK, 1 on error - */ -static l_int32 -skipToEndOfFunction(SARRAY *sa, - l_int32 start, - l_int32 lbindex, - l_int32 *pnext) -{ -l_int32 end, rbindex; -l_int32 soffsetlb, boffsetlb, toffsetlb; - - PROCNAME("skipToEndOfFunction"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!pnext) - return ERROR_INT("&next not defined", procName, 1); - - getOffsetForCharacter(sa, start, '{', &soffsetlb, &boffsetlb, - &toffsetlb); - skipToMatchingBrace(sa, start + soffsetlb, boffsetlb, &end, &rbindex); - if (end == -1) { /* shouldn't happen! */ - *pnext = -1; - return 1; - } - - *pnext = end + 1; - return 0; -} - - -/* - * \brief skipToMatchingBrace() - * - * \param[in] sa output from cpp, by line - * \param[in] start index of starting line with left bracket to search - * \param[in] lbindex starting char index for left bracket - * \param[out] pstop index of line with the matching right bracket - * \param[out] prbindex char index of matching right bracket - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If the matching right brace is not found, returns
- *          stop = -1.  This shouldn't happen.
- * 
- */ -static l_int32 -skipToMatchingBrace(SARRAY *sa, - l_int32 start, - l_int32 lbindex, - l_int32 *pstop, - l_int32 *prbindex) -{ -char *str; -l_int32 i, j, jstart, n, sumbrace, found, instring, nchars; - - PROCNAME("skipToMatchingBrace"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!pstop) - return ERROR_INT("&stop not defined", procName, 1); - if (!prbindex) - return ERROR_INT("&rbindex not defined", procName, 1); - - instring = 0; /* init to FALSE; toggle on double quotes */ - *pstop = -1; - n = sarrayGetCount(sa); - sumbrace = 1; - found = FALSE; - for (i = start; i < n; i++) { - str = sarrayGetString(sa, i, L_NOCOPY); - jstart = 0; - if (i == start) - jstart = lbindex + 1; - nchars = strlen(str); - for (j = jstart; j < nchars; j++) { - /* Toggle the instring state every time you encounter - * a double quote that is NOT escaped. */ - if (j == jstart && str[j] == '\"') - instring = 1 - instring; - if (j > jstart && str[j] == '\"' && str[j-1] != '\\') - instring = 1 - instring; - /* Record the braces if they are neither a literal character - * nor within a string. */ - if (str[j] == '{' && str[j+1] != '\'' && !instring) { - sumbrace++; - } else if (str[j] == '}' && str[j+1] != '\'' && !instring) { - sumbrace--; - if (sumbrace == 0) { - found = TRUE; - *prbindex = j; - break; - } - } - } - if (found) { - *pstop = i; - return 0; - } - } - - return ERROR_INT("matching right brace not found", procName, 1); -} - - -/* - * \brief skipToSemicolon() - * - * \param[in] sa output from cpp, by line - * \param[in] start index of starting line to search - * \param[in] charindex starting char index for search - * \param[out] pnext index of line containing the next ';' - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If the semicolon isn't found, returns next = -1.
- *          This shouldn't happen.
- *      (2) This is only used in contexts where the semicolon is
- *          not within a string.
- * 
- */ -static l_int32 -skipToSemicolon(SARRAY *sa, - l_int32 start, - l_int32 charindex, - l_int32 *pnext) -{ -char *str; -l_int32 i, j, n, jstart, nchars, found; - - PROCNAME("skipToSemicolon"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!pnext) - return ERROR_INT("&next not defined", procName, 1); - - *pnext = -1; - n = sarrayGetCount(sa); - found = FALSE; - for (i = start; i < n; i++) { - str = sarrayGetString(sa, i, L_NOCOPY); - jstart = 0; - if (i == start) - jstart = charindex + 1; - nchars = strlen(str); - for (j = jstart; j < nchars; j++) { - if (str[j] == ';') { - found = TRUE;; - break; - } - } - if (found) { - *pnext = i; - return 0; - } - } - - return ERROR_INT("semicolon not found", procName, 1); -} - - -/* - * \brief getOffsetForCharacter() - * - * \param[in] sa output from cpp, by line - * \param[in] start starting index in sa to search; - * never a comment line - * \param[in] tchar we are searching for the first instance of this - * \param[out] psoffset offset in strings from start index - * \param[out] pboffset offset in bytes within string in which - * the character is first found - * \param[out] ptoffset offset in total bytes from beginning of string - * indexed by 'start' to the location where - * the character is first found - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) We are searching for the first instance of 'tchar', starting
- *          at the beginning of the string indexed by start.
- *      (2) If the character is not found, soffset is returned as -1,
- *          and the other offsets are set to very large numbers.  The
- *          caller must check the value of soffset.
- *      (3) This is only used in contexts where it is not necessary to
- *          consider if the character is inside a string.
- * 
- */ -static l_int32 -getOffsetForCharacter(SARRAY *sa, - l_int32 start, - char tchar, - l_int32 *psoffset, - l_int32 *pboffset, - l_int32 *ptoffset) -{ -char *str; -l_int32 i, j, n, nchars, totchars, found; - - PROCNAME("getOffsetForCharacter"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!psoffset) - return ERROR_INT("&soffset not defined", procName, 1); - if (!pboffset) - return ERROR_INT("&boffset not defined", procName, 1); - if (!ptoffset) - return ERROR_INT("&toffset not defined", procName, 1); - - *psoffset = -1; /* init to not found */ - *pboffset = 100000000; - *ptoffset = 100000000; - - n = sarrayGetCount(sa); - found = FALSE; - totchars = 0; - for (i = start; i < n; i++) { - if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL) - return ERROR_INT("str not returned; shouldn't happen", procName, 1); - nchars = strlen(str); - for (j = 0; j < nchars; j++) { - if (str[j] == tchar) { - found = TRUE; - break; - } - } - if (found) - break; - totchars += nchars; - } - - if (found) { - *psoffset = i - start; - *pboffset = j; - *ptoffset = totchars + j; - } - - return 0; -} - - -/* - * \brief getOffsetForMatchingRP() - * - * \param[in] sa output from cpp, by line - * \param[in] start starting index in sa to search; - * never a comment line - * \param[in] soffsetlp string offset to first LP - * \param[in] boffsetlp byte offset within string to first LP - * \param[in] toffsetlp total byte offset to first LP - * \param[out] psoffset offset in strings from start index - * \param[out] pboffset offset in bytes within string in which - * the matching RP is found - * \param[out] ptoffset offset in total bytes from beginning of string - * indexed by 'start' to the location where - * the matching RP is found - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) We are searching for the matching right parenthesis (RP) that
- *          corresponds to the first LP found beginning at the string
- *          indexed by start.
- *      (2) If the matching RP is not found, soffset is returned as -1,
- *          and the other offsets are set to very large numbers.  The
- *          caller must check the value of soffset.
- *      (3) This is only used in contexts where it is not necessary to
- *          consider if the character is inside a string.
- *      (4) We must do this because although most arg lists have a single
- *          left and right parenthesis, it is possible to construct
- *          more complicated prototype declarations, such as those
- *          where functions are passed in.  The C++ rules for prototypes
- *          are strict, and require that for functions passed in as args,
- *          the function name arg be placed in parenthesis, as well
- *          as its arg list, thus incurring two extra levels of parentheses.
- * 
- */ -static l_int32 -getOffsetForMatchingRP(SARRAY *sa, - l_int32 start, - l_int32 soffsetlp, - l_int32 boffsetlp, - l_int32 toffsetlp, - l_int32 *psoffset, - l_int32 *pboffset, - l_int32 *ptoffset) -{ -char *str; -l_int32 i, j, n, nchars, totchars, leftmatch, firstline, jstart, found; - - PROCNAME("getOffsetForMatchingRP"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!psoffset) - return ERROR_INT("&soffset not defined", procName, 1); - if (!pboffset) - return ERROR_INT("&boffset not defined", procName, 1); - if (!ptoffset) - return ERROR_INT("&toffset not defined", procName, 1); - - *psoffset = -1; /* init to not found */ - *pboffset = 100000000; - *ptoffset = 100000000; - - n = sarrayGetCount(sa); - found = FALSE; - totchars = toffsetlp; - leftmatch = 1; /* count of (LP - RP); we're finished when it goes to 0. */ - firstline = start + soffsetlp; - for (i = firstline; i < n; i++) { - if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL) - return ERROR_INT("str not returned; shouldn't happen", procName, 1); - nchars = strlen(str); - jstart = 0; - if (i == firstline) - jstart = boffsetlp + 1; - for (j = jstart; j < nchars; j++) { - if (str[j] == '(') - leftmatch++; - else if (str[j] == ')') - leftmatch--; - if (leftmatch == 0) { - found = TRUE; - break; - } - } - if (found) - break; - if (i == firstline) - totchars += nchars - boffsetlp; - else - totchars += nchars; - } - - if (found) { - *psoffset = i - start; - *pboffset = j; - *ptoffset = totchars + j; - } - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/partify.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/partify.c deleted file mode 100644 index 4625b84a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/partify.c +++ /dev/null @@ -1,315 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file partify.c - *
- *
- *     Top level
- *         l_int32          partifyFiles()
- *         l_int32          partifyPixac()
- *
- *     Helpers
- *         static BOXA     *pixLocateStaveSets()
- *         static l_int32   boxaRemoveVGaps()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - - /* Static helplers */ -static BOXA *pixLocateStaveSets(PIX *pixs, l_int32 pageno, PIXA *pixadb); -static l_ok boxaRemoveVGaps(BOXA *boxa); - -/*---------------------------------------------------------------------* - * Top level * - *---------------------------------------------------------------------*/ -/*! - * \brief partifyFiles() - * - * \param[in] dirname directory of files - * \param[in] substr required filename substring; use NULL for all files - * \param[in] nparts number of parts to generate (counting from top) - * \param[in] outroot root name of output pdf files - * \param[in] debugfile [optional] set to NULL for no debug output - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) All page images are compressed in png format into a pixacomp.
- *      (2) Each page image is deskewed, binarized at 300 ppi,
- *          partified into %nparts, and saved in a set of pixacomps
- *          in tiff-g4 format.
- *      (3) Each partified pixacomp is rendered into a set of page images,
- *          and output as a pdf.
- * 
- */ -l_ok -partifyFiles(const char *dirname, - const char *substr, - l_int32 nparts, - const char *outroot, - const char *debugfile) -{ -PIXA *pixadb; -PIXAC *pixac; - - PROCNAME("partifyFiles"); - - if (!dirname) - return ERROR_INT("dirname not defined", procName, 1); - if (nparts < 0 || nparts > 10) - return ERROR_INT("nparts not in [1 ... 10]", procName, 1); - if (!outroot || outroot[0] == '\n') - return ERROR_INT("outroot undefined or empty", procName, 1); - - pixadb = (debugfile) ? pixaCreate(0) : NULL; - pixac = pixacompCreateFromFiles(dirname, substr, IFF_PNG); - partifyPixac(pixac, nparts, outroot, pixadb); - if (pixadb) { - L_INFO("writing debug output to %s\n", procName, debugfile); - pixaConvertToPdf(pixadb, 300, 1.0, L_FLATE_ENCODE, 0, - "Partify Debug", debugfile); - } - pixacompDestroy(&pixac); - pixaDestroy(&pixadb); - return 0; -} - - -/*! - * \brief partifyPixac() - * - * \param[in] pixac with at least one image - * \param[in] nparts number of parts to generate (counting from top) - * \param[in] outroot root name of output pdf files - * \param[in] pixadb [optional] debug pixa; can be NULL - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See partifyPixac().
- *      (2) If the image files do not have a resolution, 300 ppi is assumed.
- * 
- */ -l_ok -partifyPixac(PIXAC *pixac, - l_int32 nparts, - const char *outroot, - PIXA *pixadb) -{ -char buf[512]; -l_int32 i, j, pageno, res, npage, nbox, icount, line; -l_float32 factor; -L_BMF *bmf; -BOX *box1, *box2; -BOXA *boxa1, *boxa2, *boxa3; -PIX *pix1, *pix2, *pix3, *pix4, *pix5; -PIXAC **pixaca; - - PROCNAME("partifyPixac"); - - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - if ((npage = pixacompGetCount(pixac)) == 0) - return ERROR_INT("pixac is empty", procName, 1); - if (nparts < 1 || nparts > 10) - return ERROR_INT("nparts not in [1 ... 10]", procName, 1); - if (!outroot || outroot[0] == '\n') - return ERROR_INT("outroot undefined or empty", procName, 1); - - /* Initialize the output array for each of the nparts */ - pixaca = (PIXAC **)LEPT_CALLOC(nparts, sizeof(PIXAC *)); - for (i = 0; i < nparts; i++) - pixaca[i] = pixacompCreate(0); - - /* Process each page */ - line = 1; - bmf = bmfCreate(NULL, 10); - for (pageno = 0; pageno < npage; pageno++) { - if ((pix1 = pixacompGetPix(pixac, pageno)) == NULL) { - L_ERROR("pix for page %d not found\n", procName, pageno); - continue; - } - - /* Scale, binarize and deskew */ - res = pixGetXRes(pix1); - if (res == 0 || res == 300 || res > 600) { - pix2 = pixClone(pix1); - } else { - factor = 300.0 / (l_float32)res; - if (factor > 3) - L_WARNING("resolution is very low\n", procName); - pix2 = pixScale(pix1, factor, factor); - } - pix3 = pixConvertTo1Adaptive(pix2); - pix4 = pixDeskew(pix3, 0); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - if (!pix4) { - L_ERROR("pix for page %d not deskewed\n", procName, pageno); - continue; - } - pix1 = pixClone(pix4); /* rename */ - pixDestroy(&pix4); - - /* Find the stave sets at 4x reduction */ - boxa1 = pixLocateStaveSets(pix1, pageno, pixadb); - - /* Break each stave set into the separate staves (parts). - * A typical set will have more than one part, but if one of - * the parts is a keyboard, it will usually have two staves - * (also called a Grand Staff), composed of treble and - * bass staves. For example, a classical violin sonata - * could have a staff for the violin and two staves for - * the piano. We would set nparts == 2, and extract both - * of the piano staves as the piano part. */ - nbox = boxaGetCount(boxa1); - lept_stderr("number of boxes in page %d: %d\n", pageno, nbox); - for (i = 0; i < nbox; i++, line++) { - snprintf(buf, sizeof(buf), "%d", line); - box1 = boxaGetBox(boxa1, i, L_COPY); - pix2 = pixClipRectangle(pix1, box1, NULL); - pix3 = pixMorphSequence(pix2, "d1.20 + o50.1 + o1.30", 0); - boxa2 = pixConnCompBB(pix3, 8); - boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL); - boxaRemoveVGaps(boxa3); - icount = boxaGetCount(boxa3); - if (icount < nparts) - L_WARNING("nparts requested = %d, but only found %d\n", - procName, nparts, icount); - for (j = 0; j < icount && j < nparts; j++) { - box2 = boxaGetBox(boxa3, j, L_COPY); - if (j == nparts - 1) /* extend the box to the bottom */ - boxSetSideLocations(box2, -1, -1, -1, - pixGetHeight(pix1) - 1); - pix4 = pixClipRectangle(pix2, box2, NULL); - pix5 = pixAddTextlines(pix4, bmf, buf, 1, L_ADD_LEFT); - pixacompAddPix(pixaca[j], pix5, IFF_TIFF_G4); - boxDestroy(&box2); - pixDestroy(&pix4); - pixDestroy(&pix5); - } - boxaDestroy(&boxa2); - boxaDestroy(&boxa3); - boxDestroy(&box1); - pixDestroy(&pix2); - pixDestroy(&pix3); - } - boxaDestroy(&boxa1); - pixDestroy(&pix1); - } - - /* Output separate pdfs for each part */ - for (i = 0; i < nparts; i++) { - snprintf(buf, sizeof(buf), "%s-%d.pdf", outroot, i); - L_INFO("writing part %d: %s\n", procName, i, buf); - pixacompConvertToPdf(pixaca[i], 300, 1.0, L_G4_ENCODE, 0, NULL, buf); - pixacompDestroy(&pixaca[i]); - } - LEPT_FREE(pixaca); - bmfDestroy(&bmf); - return 0; -} - - -/* - * \brief pixLocateStaveSets() - * - * \param[in] pixs 1 bpp, 300 ppi, deskewed - * \param[in] pageno page number; used for debug output - * \param[in] pixadb [optional] debug pixa; can be NULL - * \return boxa containing the stave sets at full resolution - */ -static BOXA * -pixLocateStaveSets(PIX *pixs, - l_int32 pageno, - PIXA *pixadb) -{ -BOXA *boxa1, *boxa2, *boxa3, *boxa4; -PIX *pix1, *pix2; - - /* Find the stave sets at 4x reduction */ - pix1 = pixMorphSequence(pixs, "r11", 0); - boxa1 = pixConnCompBB(pix1, 8); - boxa2 = boxaSelectByArea(boxa1, 15000, L_SELECT_IF_GT, NULL); - boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL); - if (pixadb) { - pix2 = pixConvertTo32(pix1); - pixRenderBoxaArb(pix2, boxa3, 2, 255, 0, 0); - pixaAddPix(pixadb, pix2, L_INSERT); - pixDisplay(pix2, 100 * pageno, 100); - } - boxaDestroy(&boxa1); - boxaDestroy(&boxa2); - - boxaRemoveVGaps(boxa3); - if (pixadb) { - pix2 = pixConvertTo32(pix1); - pixRenderBoxaArb(pix2, boxa3, 2, 0, 255, 0); - pixaAddPix(pixadb, pix2, L_INSERT); - pixDisplay(pix2, 100 * pageno, 600); - } - boxa4 = boxaTransform(boxa3, 0, 0, 4.0, 4.0); /* back to full res */ - boxaDestroy(&boxa3); - pixDestroy(&pix1); - return boxa4; -} - - -/* - * \brief boxaRemoveVGaps() - * - * \param[in] boxa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The boxes in %boxa are aligned vertically.  Move the horizontal
- *          edges vertically to remove the gaps between boxes.
- * 
- */ -static l_ok -boxaRemoveVGaps(BOXA *boxa) -{ -l_int32 nbox, i, y1, h1, y2, h2, delta; - - nbox = boxaGetCount(boxa); - for (i = 0; i < nbox - 1; i++) { - boxaGetBoxGeometry(boxa, i, NULL, &y1, NULL, &h1); - boxaGetBoxGeometry(boxa, i + 1, NULL, &y2, NULL, &h2); - delta = (y2 - y1 - h1) / 2; - boxaAdjustBoxSides(boxa, i, 0, 0, 0, delta); - boxaAdjustBoxSides(boxa, i + 1, 0, 0, -delta, 0); - } - boxaAdjustBoxSides(boxa, nbox - 1, 0, 0, 0, delta); /* bot of last */ - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/partition.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/partition.c deleted file mode 100644 index 3d4c74b9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/partition.c +++ /dev/null @@ -1,662 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file partition.c - *
- *
- *      Whitespace block extraction
- *          BOXA            *boxaGetWhiteblocks()
- *
- *      Helpers
- *          static PARTEL   *partelCreate()
- *          static void      partelDestroy()
- *          static l_int32   partelSetSize()
- *          static BOXA     *boxaGenerateSubboxes()
- *          static BOX      *boxaSelectPivotBox()
- *          static l_int32   boxaCheckIfOverlapIsSmall()
- *          BOXA            *boxaPruneSortedOnOverlap()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/*! Partition element */ -struct PartitionElement { - l_float32 size; /* sorting key */ - BOX *box; /* region of the element */ - BOXA *boxa; /* set of intersecting boxes */ -}; -typedef struct PartitionElement PARTEL; - -static PARTEL * partelCreate(BOX *box); -static void partelDestroy(PARTEL **ppartel); -static l_int32 partelSetSize(PARTEL *partel, l_int32 sortflag); -static BOXA * boxaGenerateSubboxes(BOX *box, BOXA *boxa, l_int32 maxperim, - l_float32 fract); -static BOX * boxaSelectPivotBox(BOX *box, BOXA *boxa, l_int32 maxperim, - l_float32 fract); -static l_int32 boxCheckIfOverlapIsBig(BOX *box, BOXA *boxa, - l_float32 maxoverlap); - -static const l_int32 DefaultMaxPops = 20000; - - -#ifndef NO_CONSOLE_IO -#define OUTPUT_HEAP_STATS 0 -#endif /* ~NO_CONSOLE_IO */ - -/*------------------------------------------------------------------* - * Whitespace block extraction * - *------------------------------------------------------------------*/ -/*! - * \brief boxaGetWhiteblocks() - * - * \param[in] boxas typ. a set of bounding boxes of fg components - * \param[in] box initial region; typically including all boxes - * in boxas; if null, it computes the region to - * include all boxes in boxas - * \param[in] sortflag L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT, - * L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION, - * L_SORT_BY_PERIMETER, L_SORT_BY_AREA - * \param[in] maxboxes max number of output whitespace boxes; e.g., 100 - * \param[in] maxoverlap maximum fractional overlap of a box by any - * of the larger boxes; e.g., 0.2 - * \param[in] maxperim maximum half-perimeter, in pixels, for which - * pivot is selected by proximity to box centroid; - * e.g., 200 - * \param[in] fract fraction of box diagonal that is an acceptable - * distance from the box centroid to select - * the pivot; e.g., 0.2 - * \param[in] maxpops max number of pops from the heap; use 0 as default - * \return boxa of sorted whitespace boxes, or NULL on error - * - *
- * Notes:
- *      (1) This uses the elegant Breuel algorithm, found in "Two
- *          Geometric Algorithms for Layout Analysis", 2002,
- *          url: "citeseer.ist.psu.edu/breuel02two.html".
- *          It starts with the bounding boxes (b.b.) of the connected
- *          components (c.c.) in a region, along with the rectangle
- *          representing that region.  It repeatedly divides the
- *          rectangle into four maximal rectangles that exclude a
- *          pivot rectangle, sorting them in a priority queue
- *          according to one of the six sort flags.  It returns a boxa
- *          of the "largest" set that have no intersection with boxes
- *          from the input boxas.
- *      (2) If box == NULL, the initial region is the minimal region
- *          that includes the origin and every box in boxas.
- *      (3) maxboxes is the maximum number of whitespace boxes that will
- *          be returned.  The actual number will depend on the image
- *          and the values chosen for maxoverlap and maxpops.  In many
- *          cases, the actual number will be 'maxboxes'.
- *      (4) maxoverlap allows pruning of whitespace boxes depending on
- *          the overlap.  To avoid all pruning, use maxoverlap = 1.0.
- *          To select only boxes that have no overlap with each other
- *          (maximal pruning), choose maxoverlap = 0.0.
- *          Otherwise, no box can have more than the 'maxoverlap' fraction
- *          of its area overlapped by any larger (in the sense of the
- *          sortflag) box.
- *      (5) Choose maxperim (actually, maximum half-perimeter) to
- *          represent a c.c. that is small enough so that you don't care
- *          about the white space that could be inside of it.  For all such
- *          c.c., the pivot for 'quadfurcation' of a rectangle is selected
- *          as having a reasonable proximity to the rectangle centroid.
- *      (6) Use fract in the range [0.0 ... 1.0].  Set fract = 0.0
- *          to choose the small box nearest the centroid as the pivot.
- *          If you choose fract > 0.0, it is suggested that you call
- *          boxaPermuteRandom() first, to permute the boxes (see usage below).
- *          This should reduce the search time for each of the pivot boxes.
- *      (7) Choose maxpops to be the maximum number of rectangles that
- *          are popped from the heap.  This is an indirect way to limit the
- *          execution time.  Use 0 for default (a fairly large number).
- *          At any time, you can expect the heap to contain about
- *          2.5 times as many boxes as have been popped off.
- *      (8) The output result is a sorted set of overlapping
- *          boxes, constrained by 'maxboxes', 'maxoverlap' and 'maxpops'.
- *      (9) The main defect of the method is that it abstracts out the
- *          actual components, retaining only the b.b. for analysis.
- *          Consider a component with a large b.b.  If this is chosen
- *          as a pivot, all white space inside is immediately taken
- *          out of consideration.  Furthermore, even if it is never chosen
- *          as a pivot, as the partitioning continues, at no time will
- *          any of the whitespace inside this component be part of a
- *          rectangle with zero overlapping boxes.  Thus, the interiors
- *          of all boxes are necessarily excluded from the union of
- *          the returned whitespace boxes.
- *     (10) It should be noted that the algorithm puts a large number
- *          of partels on the queue.  Setting a limit of X partels to
- *          remove from the queue, one typically finds that there will be
- *          several times that number (say, 2X - 3X) left on the queue.
- *          For an efficient algorithm to find the largest white or
- *          or black rectangles, without permitting them to overlap,
- *          see pixFindLargeRectangles().
- *     (11) USAGE: One way to accommodate to this weakness is to remove such
- *          large b.b. before starting the computation.  For example,
- *          if 'box' is an input image region containing 'boxa' b.b. of c.c.:
- *
- *                   // Faster pivot choosing
- *               boxaPermuteRandom(boxa, boxa);
- *
- *                   // Remove anything either large width or height
- *               boxat = boxaSelectBySize(boxa, maxwidth, maxheight,
- *                                        L_SELECT_IF_BOTH, L_SELECT_IF_LT,
- *                                        NULL);
- *
- *               boxad = boxaGetWhiteblocks(boxat, box, type, maxboxes,
- *                                          maxoverlap, maxperim, fract,
- *                                          maxpops);
- *
- *          The result will be rectangular regions of "white space" that
- *          extend into (and often through) the excluded components.
- *     (11) As a simple example, suppose you wish to find the columns on a page.
- *          First exclude large c.c. that may block the columns, and then call:
- *
- *               boxad = boxaGetWhiteblocks(boxa, box, L_SORT_BY_HEIGHT,
- *                                          20, 0.15, 200, 0.2, 2000);
- *
- *          to get the 20 tallest boxes with no more than 0.15 overlap
- *          between a box and any of the taller ones, and avoiding the
- *          use of any c.c. with a b.b. half perimeter greater than 200
- *          as a pivot.
- * 
- */ -BOXA * -boxaGetWhiteblocks(BOXA *boxas, - BOX *box, - l_int32 sortflag, - l_int32 maxboxes, - l_float32 maxoverlap, - l_int32 maxperim, - l_float32 fract, - l_int32 maxpops) -{ -l_int32 i, w, h, n, nsub, npush, npop; -BOX *boxsub; -BOXA *boxa, *boxa4, *boxasub, *boxad; -PARTEL *partel; -L_HEAP *lh; - - PROCNAME("boxaGetWhiteblocks"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (sortflag != L_SORT_BY_WIDTH && sortflag != L_SORT_BY_HEIGHT && - sortflag != L_SORT_BY_MIN_DIMENSION && - sortflag != L_SORT_BY_MAX_DIMENSION && - sortflag != L_SORT_BY_PERIMETER && sortflag != L_SORT_BY_AREA) - return (BOXA *)ERROR_PTR("invalid sort flag", procName, NULL); - if (maxboxes < 1) { - maxboxes = 1; - L_WARNING("setting maxboxes = 1\n", procName); - } - if (maxoverlap < 0.0 || maxoverlap > 1.0) - return (BOXA *)ERROR_PTR("invalid maxoverlap", procName, NULL); - if (maxpops == 0) - maxpops = DefaultMaxPops; - - if (!box) { - boxaGetExtent(boxas, &w, &h, NULL); - box = boxCreate(0, 0, w, h); - } - - /* Prime the heap */ - lh = lheapCreate(20, L_SORT_DECREASING); - partel = partelCreate(box); - partel->boxa = boxaCopy(boxas, L_CLONE); - partelSetSize(partel, sortflag); - lheapAdd(lh, partel); - - npush = 1; - npop = 0; - boxad = boxaCreate(0); - while (1) { - if ((partel = (PARTEL *)lheapRemove(lh)) == NULL) /* we're done */ - break; - - npop++; /* How many boxes have we retrieved from the queue? */ - if (npop > maxpops) { - partelDestroy(&partel); - break; - } - - /* Extract the contents */ - boxa = boxaCopy(partel->boxa, L_CLONE); - box = boxClone(partel->box); - partelDestroy(&partel); - - /* Can we output this one? */ - n = boxaGetCount(boxa); - if (n == 0) { - if (boxCheckIfOverlapIsBig(box, boxad, maxoverlap) == 0) - boxaAddBox(boxad, box, L_INSERT); - else - boxDestroy(&box); - boxaDestroy(&boxa); - if (boxaGetCount(boxad) >= maxboxes) /* we're done */ - break; - continue; - } - - - /* Generate up to 4 subboxes and put them on the heap */ - boxa4 = boxaGenerateSubboxes(box, boxa, maxperim, fract); - boxDestroy(&box); - nsub = boxaGetCount(boxa4); - for (i = 0; i < nsub; i++) { - boxsub = boxaGetBox(boxa4, i, L_CLONE); - boxasub = boxaIntersectsBox(boxa, boxsub); - partel = partelCreate(boxsub); - partel->boxa = boxasub; - partelSetSize(partel, sortflag); - lheapAdd(lh, partel); - boxDestroy(&boxsub); - } - npush += nsub; /* How many boxes have we put on the queue? */ - -/* boxaWriteStderr(boxa4); */ - - boxaDestroy(&boxa4); - boxaDestroy(&boxa); - } - -#if OUTPUT_HEAP_STATS - lept_stderr("Heap statistics:\n"); - lept_stderr(" Number of boxes pushed: %d\n", npush); - lept_stderr(" Number of boxes popped: %d\n", npop); - lept_stderr(" Number of boxes on heap: %d\n", lheapGetCount(lh)); -#endif /* OUTPUT_HEAP_STATS */ - - /* Clean up the heap */ - while ((partel = (PARTEL *)lheapRemove(lh)) != NULL) - partelDestroy(&partel); - lheapDestroy(&lh, FALSE); - - return boxad; -} - - -/*------------------------------------------------------------------* - * Helpers * - *------------------------------------------------------------------*/ -/*! - * \brief partelCreate() - * - * \param[in] box region; inserts a copy - * \return partel, or NULL on error - */ -static PARTEL * -partelCreate(BOX *box) -{ -PARTEL *partel; - - partel = (PARTEL *)LEPT_CALLOC(1, sizeof(PARTEL)); - partel->box = boxCopy(box); - return partel; -} - - -/*! - * \brief partelDestroy() - * - * \param[in,out] ppartel contents will be set to null before returning - * \return void - */ -static void -partelDestroy(PARTEL **ppartel) -{ -PARTEL *partel; - - PROCNAME("partelDestroy"); - - if (ppartel == NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - - if ((partel = *ppartel) == NULL) - return; - - boxDestroy(&partel->box); - boxaDestroy(&partel->boxa); - LEPT_FREE(partel); - *ppartel = NULL; - return; -} - - -/*! - * \brief partelSetSize() - * - * \param[in] partel - * \param[in] sortflag L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT, - * L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION, - * L_SORT_BY_PERIMETER, L_SORT_BY_AREA - * \return 0 if OK, 1 on error - */ -static l_int32 -partelSetSize(PARTEL *partel, - l_int32 sortflag) -{ -l_int32 w, h; - - PROCNAME("partelSetSize"); - - if (!partel) - return ERROR_INT("partel not defined", procName, 1); - - boxGetGeometry(partel->box, NULL, NULL, &w, &h); - if (sortflag == L_SORT_BY_WIDTH) - partel->size = (l_float32)w; - else if (sortflag == L_SORT_BY_HEIGHT) - partel->size = (l_float32)h; - else if (sortflag == L_SORT_BY_MIN_DIMENSION) - partel->size = (l_float32)L_MIN(w, h); - else if (sortflag == L_SORT_BY_MAX_DIMENSION) - partel->size = (l_float32)L_MAX(w, h); - else if (sortflag == L_SORT_BY_PERIMETER) - partel->size = (l_float32)(w + h); - else if (sortflag == L_SORT_BY_AREA) - partel->size = (l_float32)(w * h); - else - return ERROR_INT("invalid sortflag", procName, 1); - return 0; -} - - -/*! - * \brief boxaGenerateSubboxes() - * - * \param[in] box region to be split into up to four overlapping - * subregions - * \param[in] boxa boxes of rectangles intersecting the box - * \param[in] maxperim maximum half-perimeter for which pivot - * is selected by proximity to box centroid - * \param[in] fract fraction of box diagonal that is an acceptable - * distance from the box centroid to select the pivot - * \return boxa of four or less overlapping subrectangles of - * the box, or NULL on error - */ -static BOXA * -boxaGenerateSubboxes(BOX *box, - BOXA *boxa, - l_int32 maxperim, - l_float32 fract) -{ -l_int32 x, y, w, h, xp, yp, wp, hp; -BOX *boxp; /* pivot box */ -BOX *boxsub; -BOXA *boxa4; - - PROCNAME("boxaGenerateSubboxes"); - - if (!box) - return (BOXA *)ERROR_PTR("box not defined", procName, NULL); - if (!boxa) - return (BOXA *)ERROR_PTR("boxa not defined", procName, NULL); - - boxa4 = boxaCreate(4); - boxp = boxaSelectPivotBox(box, boxa, maxperim, fract); - boxGetGeometry(box, &x, &y, &w, &h); - boxGetGeometry(boxp, &xp, &yp, &wp, &hp); - boxDestroy(&boxp); - if (xp > x) { /* left sub-box */ - boxsub = boxCreate(x, y, xp - x, h); - boxaAddBox(boxa4, boxsub, L_INSERT); - } - if (yp > y) { /* top sub-box */ - boxsub = boxCreate(x, y, w, yp - y); - boxaAddBox(boxa4, boxsub, L_INSERT); - } - if (xp + wp < x + w) { /* right sub-box */ - boxsub = boxCreate(xp + wp, y, x + w - xp - wp, h); - boxaAddBox(boxa4, boxsub, L_INSERT); - } - if (yp + hp < y + h) { /* bottom sub-box */ - boxsub = boxCreate(x, yp + hp, w, y + h - yp - hp); - boxaAddBox(boxa4, boxsub, L_INSERT); - } - - return boxa4; -} - - -/*! - * \brief boxaSelectPivotBox() - * - * \param[in] box containing box; to be split by the pivot box - * \param[in] boxa boxes of rectangles, from which 1 is to be chosen - * \param[in] maxperim maximum half-perimeter for which pivot - * is selected by proximity to box centroid - * \param[in] fract fraction of box diagonal that is an acceptable - * distance from the box centroid to select the pivot - * \return box pivot box for subdivision into 4 rectangles, - * or NULL on error - * - *
- * Notes:
- *      (1) This is a tricky piece that wasn't discussed in the
- *          Breuel's 2002 paper.
- *      (2) Selects a box from boxa whose centroid is reasonably close to
- *          the centroid of the containing box (xc, yc) and whose
- *          half-perimeter does not exceed the maxperim value.
- *      (3) If there are no boxes in the boxa that are small enough,
- *          then it selects the smallest of the larger boxes,
- *          without reference to its location in the containing box.
- *      (4) If a small box has a centroid at a distance from the
- *          centroid of the containing box that is not more than
- *          the fraction 'fract' of the diagonal of the containing
- *          box, that box is chosen as the pivot, terminating the
- *          search for the nearest small box.
- *      (5) Use fract in the range [0.0 ... 1.0].  Set fract = 0.0
- *          to choose the small box nearest the centroid.
- *      (6) Choose maxperim to represent a connected component that is
- *          small enough so that you don't care about the white space
- *          that could be inside of it.
- * 
- */ -static BOX * -boxaSelectPivotBox(BOX *box, - BOXA *boxa, - l_int32 maxperim, - l_float32 fract) -{ -l_int32 i, n, bw, bh, w, h; -l_int32 smallfound, minindex, perim, minsize; -l_float32 delx, dely, mindist, threshdist, dist, x, y, cx, cy; -BOX *boxt; - - PROCNAME("boxaSelectPivotBox"); - - if (!box) - return (BOX *)ERROR_PTR("box not defined", procName, NULL); - if (!boxa) - return (BOX *)ERROR_PTR("boxa not defined", procName, NULL); - n = boxaGetCount(boxa); - if (n == 0) - return (BOX *)ERROR_PTR("no boxes in boxa", procName, NULL); - if (fract < 0.0 || fract > 1.0) { - L_WARNING("fract out of bounds; using 0.0\n", procName); - fract = 0.0; - } - - boxGetGeometry(box, NULL, NULL, &w, &h); - boxGetCenter(box, &x, &y); - threshdist = fract * (w * w + h * h); - mindist = 1000000000.; - minindex = 0; - smallfound = FALSE; - for (i = 0; i < n; i++) { - boxt = boxaGetBox(boxa, i, L_CLONE); - boxGetGeometry(boxt, NULL, NULL, &bw, &bh); - boxGetCenter(boxt, &cx, &cy); - boxDestroy(&boxt); - if (bw + bh > maxperim) - continue; - smallfound = TRUE; - delx = cx - x; - dely = cy - y; - dist = delx * delx + dely * dely; - if (dist <= threshdist) - return boxaGetBox(boxa, i, L_COPY); - if (dist < mindist) { - minindex = i; - mindist = dist; - } - } - - /* If there are small boxes but none are within 'fract' of the - * centroid, return the nearest one. */ - if (smallfound == TRUE) - return boxaGetBox(boxa, minindex, L_COPY); - - /* No small boxes; return the smallest of the large boxes */ - minsize = 1000000000; - minindex = 0; - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh); - perim = bw + bh; - if (perim < minsize) { - minsize = perim; - minindex = i; - } - } - return boxaGetBox(boxa, minindex, L_COPY); -} - - -/*! - * \brief boxCheckIfOverlapIsBig() - * - * \param[in] box to be tested - * \param[in] boxa of boxes already stored - * \param[in] maxoverlap maximum fractional overlap of the input box - * by any of the boxes in boxa - * \return 0 if box has small overlap with every box in boxa; - * 1 otherwise or on error - */ -static l_int32 -boxCheckIfOverlapIsBig(BOX *box, - BOXA *boxa, - l_float32 maxoverlap) -{ -l_int32 i, n, bigoverlap; -l_float32 fract; -BOX *boxt; - - PROCNAME("boxCheckIfOverlapIsBig"); - - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (maxoverlap < 0.0 || maxoverlap > 1.0) - return ERROR_INT("invalid maxoverlap", procName, 1); - - n = boxaGetCount(boxa); - if (n == 0 || maxoverlap == 1.0) - return 0; - - bigoverlap = 0; - for (i = 0; i < n; i++) { - boxt = boxaGetBox(boxa, i, L_CLONE); - boxOverlapFraction(boxt, box, &fract); - boxDestroy(&boxt); - if (fract > maxoverlap) { - bigoverlap = 1; - break; - } - } - - return bigoverlap; -} - - -/*! - * \brief boxaPruneSortedOnOverlap() - * - * \param[in] boxas sorted by size in decreasing order - * \param[in] maxoverlap maximum fractional overlap of a box by any - * of the larger boxes - * \return boxad pruned, or NULL on error - * - *
- * Notes:
- *      (1) This selectively removes smaller boxes when they are overlapped
- *          by any larger box by more than the input 'maxoverlap' fraction.
- *      (2) To avoid all pruning, use maxoverlap = 1.0.  To select only
- *          boxes that have no overlap with each other (maximal pruning),
- *          set maxoverlap = 0.0.
- *      (3) If there are no boxes in boxas, returns an empty boxa.
- * 
- */ -BOXA * -boxaPruneSortedOnOverlap(BOXA *boxas, - l_float32 maxoverlap) -{ -l_int32 i, j, n, remove; -l_float32 fract; -BOX *box1, *box2; -BOXA *boxad; - - PROCNAME("boxaPruneSortedOnOverlap"); - - if (!boxas) - return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); - if (maxoverlap < 0.0 || maxoverlap > 1.0) - return (BOXA *)ERROR_PTR("invalid maxoverlap", procName, NULL); - - n = boxaGetCount(boxas); - if (n == 0 || maxoverlap == 1.0) - return boxaCopy(boxas, L_COPY); - - boxad = boxaCreate(0); - box2 = boxaGetBox(boxas, 0, L_COPY); - boxaAddBox(boxad, box2, L_INSERT); - for (j = 1; j < n; j++) { /* prune on j */ - box2 = boxaGetBox(boxas, j, L_COPY); - remove = FALSE; - for (i = 0; i < j; i++) { /* test on i */ - box1 = boxaGetBox(boxas, i, L_CLONE); - boxOverlapFraction(box1, box2, &fract); - boxDestroy(&box1); - if (fract > maxoverlap) { - remove = TRUE; - break; - } - } - if (remove == TRUE) - boxDestroy(&box2); - else - boxaAddBox(boxad, box2, L_INSERT); - } - - return boxad; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pdfio1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pdfio1.c deleted file mode 100644 index c95f2a38..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pdfio1.c +++ /dev/null @@ -1,2255 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pdfio1.c - *
- *
- *    Higher-level operations for generating pdf from images.
- *    Use poppler's pdfimages to invert the process, extracting
- *    raster images from pdf.
- *
- *    |=============================================================|
- *    |                        Important notes                      |
- *    |=============================================================|
- *    | Some of these functions require I/O libraries such as       |
- *    | libtiff, libjpeg, libpng, libz and libopenjp2.  If you do   |
- *    | not have these libraries, some calls will fail.  For        |
- *    | example, if you do not have libopenjp2, you cannot write a  |
- *    | pdf where transcoding is required to incorporate a          |
- *    | jp2k image.                                                 |
- *    |                                                             |
- *    | You can manually deactivate all pdf writing by setting      |
- *    | this in environ.h:                                          |
- *    | \code                                                       |
- *    |      #define  USE_PDFIO     0                               |
- *    | \endcode                                                    |
- *    | This will link the stub file pdfiostub.c.                   |
- *    |=============================================================|
- *
- *     Set 1. These functions convert a set of image files
- *     to a multi-page pdf file, with one image on each page.
- *     All images are rendered at the same (input) resolution.
- *     The images can be specified as being in a directory, or they
- *     can be in an sarray.  The output pdf can be either a file
- *     or an array of bytes in memory.
- *
- *     Set 2. These functions are a special case of set 1, where
- *     no scaling or change in quality is required.  For jpeg and jp2k
- *     images, the bytes in each file can be directly incorporated
- *     into the output pdf, and the wrapping up of multiple image
- *     files is very fast.  For non-interlaced png, the data bytes
- *     including the predictors can also be written directly into the
- *     flate pdf data.  For other image formats (e.g., tiff-g4),
- *     transcoding is required, where the image data is first decompressed
- *     and then the G4 or Flate (gzip) encodings are generated.
- *
- *     Set 3. These functions convert a set of images in memory
- *     to a multi-page pdf, with one image on each page.  The pdf
- *     output can be either a file or an array of bytes in memory.
- *
- *     Set 4. These functions implement a pdf output "device driver"
- *     for wrapping (encoding) any number of images on a single page
- *     in pdf.  The input can be either an image file or a Pix;
- *     the pdf output can be either a file or an array of bytes in memory.
- *
- *     Set 5. These "segmented" functions take a set of image
- *     files, along with optional segmentation information, and
- *     generate a multi-page pdf file, where each page consists
- *     in general of a mixed raster pdf of image and non-image regions.
- *     The segmentation information for each page can be input as
- *     either a mask over the image parts, or as a Boxa of those
- *     regions.
- *
- *     Set 6. These "segmented" functions convert an image and
- *     an optional Boxa of image regions into a mixed raster pdf file
- *     for the page.  The input image can be either a file or a Pix.
- *
- *     Set 7. These functions take a set of single-page pdf files
- *     and concatenates it into a multi-page pdf.  The input can be
- *     a set of either single page pdf files or pdf 'strings' in memory.
- *     The output can be either a file or an array of bytes in memory.
- *
- *     The images in the pdf file can be rendered using a pdf viewer,
- *     such as evince, gv, xpdf or acroread.
- *
- *     Reference on the pdf file format:
- *         http://www.adobe.com/devnet/pdf/pdf_reference_archive.html
- *
- *     1. Convert specified image files to pdf (one image file per page)
- *          l_int32             convertFilesToPdf()
- *          l_int32             saConvertFilesToPdf()
- *          l_int32             saConvertFilesToPdfData()
- *          l_int32             selectDefaultPdfEncoding()
- *
- *     2. Convert specified image files to pdf without scaling
- *          l_int32             convertUnscaledFilesToPdf()
- *          l_int32             saConvertUnscaledFilesToPdf()
- *          l_int32             saConvertUnscaledFilesToPdfData()
- *          l_int32             convertUnscaledToPdfData()
- *
- *     3. Convert multiple images to pdf (one image per page)
- *          l_int32             pixaConvertToPdf()
- *          l_int32             pixaConvertToPdfData()
- *
- *     4. Single page, multi-image converters
- *          l_int32             convertToPdf()
- *          l_int32             convertImageDataToPdf()
- *          l_int32             convertToPdfData()
- *          l_int32             convertImageDataToPdfData()
- *          l_int32             pixConvertToPdf()
- *          l_int32             pixWriteStreamPdf()
- *          l_int32             pixWriteMemPdf()
- *
- *     5. Segmented multi-page, multi-image converter
- *          l_int32             convertSegmentedFilesToPdf()
- *          BOXAA              *convertNumberedMasksToBoxaa()
- *
- *     6. Segmented single page, multi-image converters
- *          l_int32             convertToPdfSegmented()
- *          l_int32             pixConvertToPdfSegmented()
- *          l_int32             convertToPdfDataSegmented()
- *          l_int32             pixConvertToPdfDataSegmented()
- *
- *     7. Multipage concatenation
- *          l_int32             concatenatePdf()
- *          l_int32             saConcatenatePdf()
- *          l_int32             ptraConcatenatePdf()
- *          l_int32             concatenatePdfToData()
- *          l_int32             saConcatenatePdfToData()
- *
- *     The top-level multi-image functions can be visualized as follows:
- *          Output pdf data to file:
- *             convertToPdf()  and  convertImageDataToPdf()
- *                     --> pixConvertToPdf()
- *                           --> pixConvertToPdfData()
- *
- *          Output pdf data to array in memory:
- *             convertToPdfData()  and  convertImageDataToPdfData()
- *                     --> pixConvertToPdfData()
- *
- *     The top-level segmented image functions can be visualized as follows:
- *          Output pdf data to file:
- *             convertToPdfSegmented()
- *                     --> pixConvertToPdfSegmented()
- *                           --> pixConvertToPdfDataSegmented()
- *
- *          Output pdf data to array in memory:
- *             convertToPdfDataSegmented()
- *                     --> pixConvertToPdfDataSegmented()
- *
- *     For multi-page concatenation, there are three different types of input
- *        (1) directory and optional filename filter
- *        (2) sarray of filenames
- *        (3) ptra of byte arrays of pdf data
- *     and two types of output for the concatenated pdf data
- *        (1) filename
- *        (2) data array and size
- *     High-level interfaces are given for each of the six combinations.
- *
- *     Note: When wrapping small images into pdf, it is useful to give
- *     them a relatively low resolution value, to avoid rounding errors
- *     when rendering the images.  For example, if you want an image
- *     of width w pixels to be 5 inches wide on a screen, choose a
- *     resolution w/5.
- *
- *     The very fast functions in section (2) require neither transcoding
- *     nor parsing of the compressed jpeg file.  With three types of image
- *     compression, the compressed strings can be incorporated into
- *     the pdf data without decompression and re-encoding: jpeg, jp2k
- *     and png.  The DCTDecode and JPXDecode filters can handle the
- *     entire jpeg and jp2k encoded string as a byte array in the pdf file.
- *     The FlateDecode filter can handle the png compressed image data,
- *     including predictors that occur as the first byte in each
- *     raster line, but it is necessary to store only the png IDAT chunk
- *     data in the pdf array.  The alternative for wrapping png images
- *     is to transcode them: uncompress into a raster (a pix) and then
- *     gzip the raster data.  This typically results in a larger pdf file
- *     because it doesn't use the two-dimensional png predictor.
- *     Colormaps, which are found in png PLTE chunks, must always be
- *     pulled out and included separately in the pdf.  For CCITT-G4
- *     compression, you can not simply include a tiff G4 file -- you must
- *     either parse it and extract the G4 compressed data within it,
- *     or uncompress to a raster and G4 compress again.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -/* --------------------------------------------*/ -#if USE_PDFIO /* defined in environ.h */ - /* --------------------------------------------*/ - - /* Typical scan resolution in ppi (pixels/inch) */ -static const l_int32 DefaultInputRes = 300; - -/*---------------------------------------------------------------------* - * Convert specified image files to pdf (one image file per page) * - *---------------------------------------------------------------------*/ -/*! - * \brief convertFilesToPdf() - * - * \param[in] dirname directory name containing images - * \param[in] substr [optional] substring filter on filenames; - * can be NULL - * \param[in] res input resolution of all images - * \param[in] scalefactor scaling factor applied to each image; > 0.0 - * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, - * L_FLATE_ENCODE, L_JP2K_ENCODE or - * L_DEFAULT_ENCODE for default) - * \param[in] quality for jpeg: 1-100; 0 for default (75) - * for jp2k: 27-45; 0 for default (34) - * \param[in] title [optional] pdf title; if null, taken from - * the first image filename - * \param[in] fileout pdf file of all images - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %substr is not NULL, only image filenames that contain
- *          the substring can be used.  If %substr == NULL, all files
- *          in the directory are used.
- *      (2) The files in the directory, after optional filtering by
- *          the substring, are lexically sorted in increasing order
- *          before concatenation.
- *      (3) The scalefactor is applied to each image before encoding.
- *          If you enter a value <= 0.0, it will be set to 1.0.
- *      (4) Specifying one of the four encoding types for %type forces
- *          all images to be compressed with that type.  Use 0 to have
- *          the type determined for each image based on depth and whether
- *          or not it has a colormap.
- * 
- */ -l_ok -convertFilesToPdf(const char *dirname, - const char *substr, - l_int32 res, - l_float32 scalefactor, - l_int32 type, - l_int32 quality, - const char *title, - const char *fileout) -{ -l_int32 ret; -SARRAY *sa; - - PROCNAME("convertFilesToPdf"); - - if (!dirname) - return ERROR_INT("dirname not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - - if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) - return ERROR_INT("sa not made", procName, 1); - ret = saConvertFilesToPdf(sa, res, scalefactor, type, quality, - title, fileout); - sarrayDestroy(&sa); - return ret; -} - - -/*! - * \brief saConvertFilesToPdf() - * - * \param[in] sa string array of pathnames for images - * \param[in] res input resolution of all images - * \param[in] scalefactor scaling factor applied to each image; > 0.0 - * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, - * L_FLATE_ENCODE, L_JP2K_ENCODE or - * L_DEFAULT_ENCODE for default) - * \param[in] quality for jpeg: 1-100; 0 for default (75) - * for jp2k: 27-45; 0 for default (34) - * \param[in] title [optional] pdf title; if null, taken from - * the first image filename - * \param[in] fileout pdf file of all images - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See convertFilesToPdf().
- * 
- */ -l_ok -saConvertFilesToPdf(SARRAY *sa, - l_int32 res, - l_float32 scalefactor, - l_int32 type, - l_int32 quality, - const char *title, - const char *fileout) -{ -l_uint8 *data; -l_int32 ret; -size_t nbytes; - - PROCNAME("saConvertFilesToPdf"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - - ret = saConvertFilesToPdfData(sa, res, scalefactor, type, quality, - title, &data, &nbytes); - if (ret) { - if (data) LEPT_FREE(data); - return ERROR_INT("pdf data not made", procName, 1); - } - - ret = l_binaryWrite(fileout, "w", data, nbytes); - LEPT_FREE(data); - if (ret) - L_ERROR("pdf data not written to file\n", procName); - return ret; -} - - -/*! - * \brief saConvertFilesToPdfData() - * - * \param[in] sa string array of pathnames for images - * \param[in] res input resolution of all images - * \param[in] scalefactor scaling factor applied to each image; > 0.0 - * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, - * L_FLATE_ENCODE, L_JP2K_ENCODE or - * L_DEFAULT_ENCODE for default) - * \param[in] quality for jpeg: 1-100; 0 for default (75) - * for jp2k: 27-45; 0 for default (34) - * \param[in] title [optional] pdf title; if null, taken from - * the first image filename - * \param[out] pdata output pdf data (of all images - * \param[out] pnbytes size of output pdf data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See convertFilesToPdf().
- * 
- */ -l_ok -saConvertFilesToPdfData(SARRAY *sa, - l_int32 res, - l_float32 scalefactor, - l_int32 type, - l_int32 quality, - const char *title, - l_uint8 **pdata, - size_t *pnbytes) -{ -char *fname; -const char *pdftitle; -l_uint8 *imdata; -l_int32 i, n, ret, pagetype, npages, scaledres; -size_t imbytes; -L_BYTEA *ba; -PIX *pixs, *pix; -L_PTRA *pa_data; - - PROCNAME("saConvertFilesToPdfData"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (scalefactor <= 0.0) scalefactor = 1.0; - if (type != L_JPEG_ENCODE && type != L_G4_ENCODE && - type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) { - type = L_DEFAULT_ENCODE; - } - - /* Generate all the encoded pdf strings */ - n = sarrayGetCount(sa); - pa_data = ptraCreate(n); - pdftitle = NULL; - for (i = 0; i < n; i++) { - if (i && (i % 10 == 0)) lept_stderr(".. %d ", i); - fname = sarrayGetString(sa, i, L_NOCOPY); - if ((pixs = pixRead(fname)) == NULL) { - L_ERROR("image not readable from file %s\n", procName, fname); - continue; - } - if (!pdftitle) - pdftitle = (title) ? title : fname; - if (scalefactor != 1.0) - pix = pixScale(pixs, scalefactor, scalefactor); - else - pix = pixClone(pixs); - pixDestroy(&pixs); - scaledres = (l_int32)(res * scalefactor); - - /* Select the encoding type */ - if (type != L_DEFAULT_ENCODE) { - pagetype = type; - } else if (selectDefaultPdfEncoding(pix, &pagetype) != 0) { - pixDestroy(&pix); - L_ERROR("encoding type selection failed for file %s\n", - procName, fname); - continue; - } - - ret = pixConvertToPdfData(pix, pagetype, quality, &imdata, &imbytes, - 0, 0, scaledres, pdftitle, NULL, 0); - pixDestroy(&pix); - if (ret) { - LEPT_FREE(imdata); - L_ERROR("pdf encoding failed for %s\n", procName, fname); - continue; - } - ba = l_byteaInitFromMem(imdata, imbytes); - LEPT_FREE(imdata); - ptraAdd(pa_data, ba); - } - ptraGetActualCount(pa_data, &npages); - if (npages == 0) { - L_ERROR("no pdf files made\n", procName); - ptraDestroy(&pa_data, FALSE, FALSE); - return 1; - } - - /* Concatenate them */ - lept_stderr("\nconcatenating ... "); - ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes); - lept_stderr("done\n"); - - ptraGetActualCount(pa_data, &npages); /* recalculate in case it changes */ - for (i = 0; i < npages; i++) { - ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); - l_byteaDestroy(&ba); - } - ptraDestroy(&pa_data, FALSE, FALSE); - return ret; -} - - -/*! - * \brief selectDefaultPdfEncoding() - * - * \param[in] pix - * \param[out] ptype L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This attempts to choose an encoding for the pix that results
- *          in the smallest file, assuming that if jpeg encoded, it will
- *          use quality = 75.  The decision is approximate, in that
- *          (a) all colormapped images will be losslessly encoded with
- *          gzip (flate), and (b) an image with less than about 20 colors
- *          is likely to be smaller if flate encoded than if encoded
- *          as a jpeg (dct).  For example, an image made by pixScaleToGray3()
- *          will have 10 colors, and flate encoding will give about
- *          twice the compression as jpeg with quality = 75.
- * 
- */ -l_ok -selectDefaultPdfEncoding(PIX *pix, - l_int32 *ptype) -{ -l_int32 w, h, d, factor, ncolors; -PIXCMAP *cmap; - - PROCNAME("selectDefaultPdfEncoding"); - - if (!ptype) - return ERROR_INT("&type not defined", procName, 1); - *ptype = L_FLATE_ENCODE; /* default universal encoding */ - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - pixGetDimensions(pix, &w, &h, &d); - cmap = pixGetColormap(pix); - if (d == 8 && !cmap) { - factor = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 20000.)); - pixNumColors(pix, factor, &ncolors); - if (ncolors < 20) - *ptype = L_FLATE_ENCODE; - else - *ptype = L_JPEG_ENCODE; - } else if (d == 1) { - *ptype = L_G4_ENCODE; - } else if (cmap || d == 2 || d == 4) { - *ptype = L_FLATE_ENCODE; - } else if (d == 8 || d == 32) { - *ptype = L_JPEG_ENCODE; - } else { - return ERROR_INT("type selection failure", procName, 1); - } - - return 0; -} - - -/*---------------------------------------------------------------------* - * Convert specified image files to pdf without scaling * - *---------------------------------------------------------------------*/ -/*! - * \brief convertUnscaledFilesToPdf() - * - * \param[in] dirname directory name containing images - * \param[in] substr [optional] substring filter on filenames; can be NULL - * \param[in] title [optional] pdf title; if null, taken from the first - * image filename - * \param[in] fileout pdf file of all images - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %substr is not NULL, only image filenames that contain
- *          the substring can be used.  If %substr == NULL, all files
- *          in the directory are used.
- *      (2) The files in the directory, after optional filtering by
- *          the substring, are lexically sorted in increasing order
- *          before concatenation.
- *      (3) This is very fast for jpeg, jp2k and some png files, because
- *          the compressed data is wrapped up and concatenated.  For tiffg4
- *          and other types of png, the images must be read and recompressed.
- * 
- */ -l_ok -convertUnscaledFilesToPdf(const char *dirname, - const char *substr, - const char *title, - const char *fileout) -{ -l_int32 ret; -SARRAY *sa; - - PROCNAME("convertUnscaledFilesToPdf"); - - if (!dirname) - return ERROR_INT("dirname not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - - if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) - return ERROR_INT("sa not made", procName, 1); - ret = saConvertUnscaledFilesToPdf(sa, title, fileout); - sarrayDestroy(&sa); - return ret; -} - - -/*! - * \brief saConvertUnscaledFilesToPdf() - * - * \param[in] sa string array of pathnames for images - * \param[in] title [optional] pdf title; if null, taken from the first - * image filename - * \param[in] fileout pdf file of all images - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See convertUnscaledFilesToPdf().
- * 
- */ -l_ok -saConvertUnscaledFilesToPdf(SARRAY *sa, - const char *title, - const char *fileout) -{ -l_uint8 *data; -l_int32 ret; -size_t nbytes; - - PROCNAME("saConvertUnscaledFilesToPdf"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - - ret = saConvertUnscaledFilesToPdfData(sa, title, &data, &nbytes); - if (ret) { - if (data) LEPT_FREE(data); - return ERROR_INT("pdf data not made", procName, 1); - } - - ret = l_binaryWrite(fileout, "w", data, nbytes); - LEPT_FREE(data); - if (ret) - L_ERROR("pdf data not written to file\n", procName); - return ret; -} - - -/*! - * \brief saConvertUnscaledFilesToPdfData() - * - * \param[in] sa string array of pathnames for image files - * \param[in] title [optional] pdf title; if null, taken from the first - * image filename - * \param[out] pdata output pdf data (of all images) - * \param[out] pnbytes size of output pdf data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is very fast for jpeg, jp2k and some png files, because
- *          the compressed data is wrapped up and concatenated.  For tiffg4
- *          and other types of png, the images must be read and recompressed.
- * 
- */ -l_ok -saConvertUnscaledFilesToPdfData(SARRAY *sa, - const char *title, - l_uint8 **pdata, - size_t *pnbytes) -{ -char *fname; -l_uint8 *imdata; -l_int32 i, n, ret, npages; -size_t imbytes; -L_BYTEA *ba; -L_PTRA *pa_data; - - PROCNAME("saConvertUnscaledFilesToPdfData"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - - /* Generate all the encoded pdf strings */ - n = sarrayGetCount(sa); - pa_data = ptraCreate(n); - for (i = 0; i < n; i++) { - if (i && (i % 10 == 0)) lept_stderr(".. %d ", i); - fname = sarrayGetString(sa, i, L_NOCOPY); - - /* Generate the pdf data */ - if (convertUnscaledToPdfData(fname, title, &imdata, &imbytes)) - continue; - - /* ... and add it to the array of single page data */ - ba = l_byteaInitFromMem(imdata, imbytes); - if (imdata) LEPT_FREE(imdata); - ptraAdd(pa_data, ba); - } - ptraGetActualCount(pa_data, &npages); - if (npages == 0) { - L_ERROR("no pdf files made\n", procName); - ptraDestroy(&pa_data, FALSE, FALSE); - return 1; - } - - /* Concatenate to generate a multipage pdf */ - lept_stderr("\nconcatenating ... "); - ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes); - lept_stderr("done\n"); - - /* Clean up */ - ptraGetActualCount(pa_data, &npages); /* maybe failed to read some files */ - for (i = 0; i < npages; i++) { - ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); - l_byteaDestroy(&ba); - } - ptraDestroy(&pa_data, FALSE, FALSE); - return ret; -} - - -/*! - * \brief convertUnscaledToPdfData() - * - * \param[in] fname of image file in all formats - * \param[in] title [optional] pdf title; can be NULL - * \param[out] pdata output pdf data for image - * \param[out] pnbytes size of output pdf data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is very fast for jpeg, jp2k and some png files, because
- *          the compressed data is wrapped up and concatenated.  For tiffg4
- *          and other types of png, the images must be read and recompressed.
- * 
- */ -l_ok -convertUnscaledToPdfData(const char *fname, - const char *title, - l_uint8 **pdata, - size_t *pnbytes) -{ -const char *pdftitle = NULL; -char *tail = NULL; -l_int32 format; -L_COMP_DATA *cid; - - PROCNAME("convertUnscaledToPdfData"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!fname) - return ERROR_INT("fname not defined", procName, 1); - - findFileFormat(fname, &format); - if (format == IFF_UNKNOWN) { - L_WARNING("file %s format is unknown; skip\n", procName, fname); - return 1; - } - if (format == IFF_PS || format == IFF_LPDF) { - L_WARNING("file %s format is %d; skip\n", procName, fname, format); - return 1; - } - - /* Generate the image data required for pdf generation, always - * in binary (not ascii85) coding. Note that jpeg, jp2k and - * some png files are not transcoded. */ - l_generateCIDataForPdf(fname, NULL, 0, &cid); - if (!cid) { - L_ERROR("file %s format is %d; unreadable\n", procName, fname, format); - return 1; - } - - /* If %title == NULL, use the tail of %fname. */ - if (title) { - pdftitle = title; - } else { - splitPathAtDirectory(fname, NULL, &tail); - pdftitle = tail; - } - - /* Generate the pdf string for this page (image). This destroys - * the cid by attaching it to an lpd and destroying the lpd. */ - cidConvertToPdfData(cid, pdftitle, pdata, pnbytes); - LEPT_FREE(tail); - return 0; -} - - -/*---------------------------------------------------------------------* - * Convert multiple images to pdf (one image per page) * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaConvertToPdf() - * - * \param[in] pixa containing images all at the same resolution - * \param[in] res override the resolution of each input image, - * in ppi; use 0 to respect the resolution - * embedded in the input images - * \param[in] scalefactor scaling factor applied to each image; > 0.0 - * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, - * L_FLATE_ENCODE, L_JP2K_ENCODE, or - * L_DEFAULT_ENCODE for default) - * \param[in] quality for jpeg: 1-100; 0 for default (75) - * for jp2k: 27-45; 0 for default (34) - * \param[in] title [optional] pdf title - * \param[in] fileout pdf file of all images - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The images are encoded with G4 if 1 bpp; JPEG if 8 bpp without
- *          colormap and many colors, or 32 bpp; FLATE for anything else.
- *      (2) The scalefactor must be > 0.0; otherwise it is set to 1.0.
- *      (3) Specifying one of the three encoding types for %type forces
- *          all images to be compressed with that type.  Use 0 to have
- *          the type determined for each image based on depth and whether
- *          or not it has a colormap.
- * 
- */ -l_ok -pixaConvertToPdf(PIXA *pixa, - l_int32 res, - l_float32 scalefactor, - l_int32 type, - l_int32 quality, - const char *title, - const char *fileout) -{ -l_uint8 *data; -l_int32 ret; -size_t nbytes; - - PROCNAME("pixaConvertToPdf"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - ret = pixaConvertToPdfData(pixa, res, scalefactor, type, quality, - title, &data, &nbytes); - if (ret) { - LEPT_FREE(data); - return ERROR_INT("conversion to pdf failed", procName, 1); - } - - ret = l_binaryWrite(fileout, "w", data, nbytes); - LEPT_FREE(data); - if (ret) - L_ERROR("pdf data not written to file\n", procName); - return ret; -} - - -/*! - * \brief pixaConvertToPdfData() - * - * \param[in] pixa containing images all at the same resolution - * \param[in] res input resolution of all images - * \param[in] scalefactor scaling factor applied to each image; > 0.0 - * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, - * L_FLATE_ENCODE, L_JP2K_ENCODE, or - * L_DEFAULT_ENCODE for default) - * \param[in] quality for jpeg: 1-100; 0 for default (75) - * for jp2k: 27-45; 0 for default (34) - * \param[in] title [optional] pdf title - * \param[out] pdata output pdf data of all images - * \param[out] pnbytes size of output pdf data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See pixaConvertToPdf().
- * 
- */ -l_ok -pixaConvertToPdfData(PIXA *pixa, - l_int32 res, - l_float32 scalefactor, - l_int32 type, - l_int32 quality, - const char *title, - l_uint8 **pdata, - size_t *pnbytes) -{ -l_uint8 *imdata; -l_int32 i, n, ret, scaledres, pagetype; -size_t imbytes; -L_BYTEA *ba; -PIX *pixs, *pix; -L_PTRA *pa_data; - - PROCNAME("pixaConvertToPdfData"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (scalefactor <= 0.0) scalefactor = 1.0; - if (type != L_DEFAULT_ENCODE && type != L_JPEG_ENCODE && - type != L_G4_ENCODE && type != L_FLATE_ENCODE && - type != L_JP2K_ENCODE) { - L_WARNING("invalid compression type; using per-page default\n", - procName); - type = L_DEFAULT_ENCODE; - } - - /* Generate all the encoded pdf strings */ - n = pixaGetCount(pixa); - pa_data = ptraCreate(n); - for (i = 0; i < n; i++) { - if ((pixs = pixaGetPix(pixa, i, L_CLONE)) == NULL) { - L_ERROR("pix[%d] not retrieved\n", procName, i); - continue; - } - if (scalefactor != 1.0) - pix = pixScale(pixs, scalefactor, scalefactor); - else - pix = pixClone(pixs); - pixDestroy(&pixs); - scaledres = (l_int32)(res * scalefactor); - - /* Select the encoding type */ - if (type != L_DEFAULT_ENCODE) { - pagetype = type; - } else if (selectDefaultPdfEncoding(pix, &pagetype) != 0) { - L_ERROR("encoding type selection failed for pix[%d]\n", - procName, i); - pixDestroy(&pix); - continue; - } - - ret = pixConvertToPdfData(pix, pagetype, quality, &imdata, &imbytes, - 0, 0, scaledres, title, NULL, 0); - pixDestroy(&pix); - if (ret) { - LEPT_FREE(imdata); - L_ERROR("pdf encoding failed for pix[%d]\n", procName, i); - continue; - } - ba = l_byteaInitFromMem(imdata, imbytes); - LEPT_FREE(imdata); - ptraAdd(pa_data, ba); - } - ptraGetActualCount(pa_data, &n); - if (n == 0) { - L_ERROR("no pdf files made\n", procName); - ptraDestroy(&pa_data, FALSE, FALSE); - return 1; - } - - /* Concatenate them */ - ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes); - - ptraGetActualCount(pa_data, &n); /* recalculate in case it changes */ - for (i = 0; i < n; i++) { - ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); - l_byteaDestroy(&ba); - } - ptraDestroy(&pa_data, FALSE, FALSE); - return ret; -} - - -/*---------------------------------------------------------------------* - * Single page, multi-image converters * - *---------------------------------------------------------------------*/ -/*! - * \brief convertToPdf() - * - * \param[in] filein input image file -- any format - * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, - * L_FLATE_ENCODE, or L_JP2K_ENCODE) - * \param[in] quality for jpeg: 1-100; 0 for default (75) - * for jp2k: 27-45; 0 for default (34) - * \param[in] fileout output pdf file; only required on last - * image on page - * \param[in] x, y location of lower-left corner of image, - * in pixels, relative to the PostScript origin - * (0,0) at the lower-left corner of the page - * \param[in] res override the resolution of the input image, - * in ppi; use 0 to respect the resolution - * embedded in the input images - * \param[in] title [optional] pdf title; if null, taken from filein - * \param[in,out] plpd ptr to lpd, which is created on the first - * invocation and returned until last image is - * processed, at which time it is destroyed - * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, - * L_LAST_IMAGE - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) To wrap only one image in pdf, input %plpd = NULL, and
- *          the value of %position will be ignored:
- *            convertToPdf(...  type, quality, x, y, res, NULL, 0);
- *      (2) To wrap multiple images on a single pdf page, this is called
- *          once for each successive image.  Do it this way:
- *            L_PDF_DATA   *lpd;
- *            convertToPdf(...  type, quality, x, y, res, &lpd, L_FIRST_IMAGE);
- *            convertToPdf(...  type, quality, x, y, res, &lpd, L_NEXT_IMAGE);
- *            ...
- *            convertToPdf(...  type, quality, x, y, res, &lpd, L_LAST_IMAGE);
- *          This will write the result to the value of %fileout specified
- *          in the first call; succeeding values of %fileout are ignored.
- *          On the last call: the pdf data bytes are computed and written
- *          to %fileout, lpd is destroyed internally, and the returned
- *          value of lpd is null.  So the client has nothing to clean up.
- *      (3) (a) Set %res == 0 to respect the resolution embedded in the
- *              image file.  If no resolution is embedded, it will be set
- *              to the default value.
- *          (b) Set %res to some other value to override the file resolution.
- *      (4) (a) If the input %res and the resolution of the output device
- *              are equal, the image will be "displayed" at the same size
- *              as the original.
- *          (b) If the input %res is 72, the output device will render
- *              the image at 1 pt/pixel.
- *          (c) Some possible choices for the default input pix resolution are:
- *                 72 ppi     Render pix on any output device at one pt/pixel
- *                 96 ppi     Windows default for generated display images
- *                300 ppi     Typical default for scanned images.
- *              We choose 300, which is sensible for rendering page images.
- *              However,  images come from a variety of sources, and
- *              some are explicitly created for viewing on a display.
- * 
- */ -l_ok -convertToPdf(const char *filein, - l_int32 type, - l_int32 quality, - const char *fileout, - l_int32 x, - l_int32 y, - l_int32 res, - const char *title, - L_PDF_DATA **plpd, - l_int32 position) -{ -l_uint8 *data; -l_int32 ret; -size_t nbytes; - - PROCNAME("convertToPdf"); - - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - if (!plpd || (position == L_LAST_IMAGE)) { - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - } - - if (convertToPdfData(filein, type, quality, &data, &nbytes, x, y, - res, title, plpd, position)) - return ERROR_INT("pdf data not made", procName, 1); - - if (!plpd || (position == L_LAST_IMAGE)) { - ret = l_binaryWrite(fileout, "w", data, nbytes); - LEPT_FREE(data); - if (ret) - return ERROR_INT("pdf data not written to file", procName, 1); - } - - return 0; -} - - -/*! - * \brief convertImageDataToPdf() - * - * \param[in] imdata array of formatted image data; e.g., png, jpeg - * \param[in] size size of image data - * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, - * L_FLATE_ENCODE, or L_JP2K_ENCODE) - * \param[in] quality for jpeg: 1-100; 0 for default (75) - * for jp2k: 27-45; 0 for default (34) - * \param[in] fileout output pdf file; only required on last - * image on page - * \param[in] x, y location of lower-left corner of image, - * in pixels, relative to the PostScript origin - * (0,0) at the lower-left corner of the page - * \param[in] res override the resolution of the input image, - * in ppi; use 0 to respect the resolution - * embedded in the input images - * \param[in] title [optional] pdf title - * \param[in,out] plpd ptr to lpd, which is created on the first - * invocation and returned until last image is - * processed, at which time it is destroyed - * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, - * L_LAST_IMAGE - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %res == 0 and the input resolution field is 0,
- *          this will use DefaultInputRes.
- *      (2) See comments in convertToPdf().
- * 
- */ -l_ok -convertImageDataToPdf(l_uint8 *imdata, - size_t size, - l_int32 type, - l_int32 quality, - const char *fileout, - l_int32 x, - l_int32 y, - l_int32 res, - const char *title, - L_PDF_DATA **plpd, - l_int32 position) -{ -l_int32 ret; -PIX *pix; - - PROCNAME("convertImageDataToPdf"); - - if (!imdata) - return ERROR_INT("image data not defined", procName, 1); - if (!plpd || (position == L_LAST_IMAGE)) { - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - } - - if ((pix = pixReadMem(imdata, size)) == NULL) - return ERROR_INT("pix not read", procName, 1); - if (type != L_JPEG_ENCODE && type != L_G4_ENCODE && - type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) { - selectDefaultPdfEncoding(pix, &type); - } - ret = pixConvertToPdf(pix, type, quality, fileout, x, y, res, - title, plpd, position); - pixDestroy(&pix); - return ret; -} - - -/*! - * \brief convertToPdfData() - * - * \param[in] filein input image file -- any format - * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, - * L_FLATE_ENCODE, or L_JP2K_ENCODE) - * \param[in] quality for jpeg: 1-100; 0 for default (75) - * for jp2k: 27-45; 0 for default (34) - * \param[out] pdata pdf data in memory - * \param[out] pnbytes number of bytes in pdf data - * \param[in] x, y location of lower-left corner of image, - * in pixels, relative to the PostScript origin - * (0,0) at the lower-left corner of the page - * \param[in] res override the resolution of the input image, - * in ppi; use 0 to respect the resolution - * embedded in the input images - * \param[in] title [optional] pdf title; if null, use filein - * \param[in,out] plpd ptr to lpd, which is created on the first - * invocation and returned until last image is - * processed, at which time it is destroyed - * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, - * L_LAST_IMAGE - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %res == 0 and the input resolution field is 0,
- *          this will use DefaultInputRes.
- *      (2) See comments in convertToPdf().
- * 
- */ -l_ok -convertToPdfData(const char *filein, - l_int32 type, - l_int32 quality, - l_uint8 **pdata, - size_t *pnbytes, - l_int32 x, - l_int32 y, - l_int32 res, - const char *title, - L_PDF_DATA **plpd, - l_int32 position) -{ -PIX *pix; - - PROCNAME("convertToPdfData"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - - if ((pix = pixRead(filein)) == NULL) - return ERROR_INT("pix not made", procName, 1); - - pixConvertToPdfData(pix, type, quality, pdata, pnbytes, - x, y, res, (title) ? title : filein, plpd, position); - pixDestroy(&pix); - return 0; -} - - -/*! - * \brief convertImageDataToPdfData() - * - * \param[in] imdata array of formatted image data; e.g., png, jpeg - * \param[in] size size of image data - * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, - * L_FLATE_ENCODE, or L_JP2K_ENCODE) - * \param[in] quality for jpeg: 1-100; 0 for default (75) - * for jp2k: 27-45; 0 for default (34) - * \param[out] pdata pdf data in memory - * \param[out] pnbytes number of bytes in pdf data - * \param[in] x, y location of lower-left corner of image, - * in pixels, relative to the PostScript origin - * (0,0) at the lower-left corner of the page - * \param[in] res override the resolution of the input image, - * in ppi; use 0 to respect the resolution - * embedded in the input images - * \param[in] title [optional] pdf title - * \param[out] plpd ptr to lpd, which is created on the first - * invocation and returned until last image is - * processed, at which time it is destroyed - * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, - * L_LAST_IMAGE - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %res == 0 and the input resolution field is 0,
- *          this will use DefaultInputRes.
- *      (2) See comments in convertToPdf().
- * 
- */ -l_ok -convertImageDataToPdfData(l_uint8 *imdata, - size_t size, - l_int32 type, - l_int32 quality, - l_uint8 **pdata, - size_t *pnbytes, - l_int32 x, - l_int32 y, - l_int32 res, - const char *title, - L_PDF_DATA **plpd, - l_int32 position) -{ -l_int32 ret; -PIX *pix; - - PROCNAME("convertImageDataToPdfData"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!imdata) - return ERROR_INT("image data not defined", procName, 1); - if (plpd) { /* part of multi-page invocation */ - if (position == L_FIRST_IMAGE) - *plpd = NULL; - } - - if ((pix = pixReadMem(imdata, size)) == NULL) - return ERROR_INT("pix not read", procName, 1); - if (type != L_JPEG_ENCODE && type != L_G4_ENCODE && - type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) { - selectDefaultPdfEncoding(pix, &type); - } - ret = pixConvertToPdfData(pix, type, quality, pdata, pnbytes, - x, y, res, title, plpd, position); - pixDestroy(&pix); - return ret; -} - - -/*! - * \brief pixConvertToPdf() - * - * \param[in] pix - * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, - * L_FLATE_ENCODE, L_JP2K_ENCODE) - * \param[in] quality for jpeg: 1-100; 0 for default (75) - * for jp2k: 27-45; 0 for default (34) - * \param[in] fileout output pdf file; only required on last - * image on page - * \param[in] x, y location of lower-left corner of image, - * in pixels, relative to the PostScript origin - * (0,0) at the lower-left corner of the page - * \param[in] res override the resolution of the input image, - * in ppi; use 0 to respect the resolution - * embedded in the input images - * \param[in] title [optional] pdf title - * \param[in,out] plpd ptr to lpd, which is created on the first - * invocation and returned until last image is - * processed, at which time it is destroyed - * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, - * L_LAST_IMAGE - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %res == 0 and the input resolution field is 0,
- *          this will use DefaultInputRes.
- *      (2) This only writes data to fileout if it is the last
- *          image to be written on the page.
- *      (3) See comments in convertToPdf().
- * 
- */ -l_ok -pixConvertToPdf(PIX *pix, - l_int32 type, - l_int32 quality, - const char *fileout, - l_int32 x, - l_int32 y, - l_int32 res, - const char *title, - L_PDF_DATA **plpd, - l_int32 position) -{ -l_uint8 *data; -l_int32 ret; -size_t nbytes; - - PROCNAME("pixConvertToPdf"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!plpd || (position == L_LAST_IMAGE)) { - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - } - - if (pixConvertToPdfData(pix, type, quality, &data, &nbytes, - x, y, res, title, plpd, position)) { - LEPT_FREE(data); - return ERROR_INT("pdf data not made", procName, 1); - } - - if (!plpd || (position == L_LAST_IMAGE)) { - ret = l_binaryWrite(fileout, "w", data, nbytes); - LEPT_FREE(data); - if (ret) - return ERROR_INT("pdf data not written to file", procName, 1); - } - return 0; -} - - -/*! - * \brief pixWriteStreamPdf() - * - * \param[in] fp file stream opened for writing - * \param[in] pix all depths, cmap OK - * \param[in] res override the resolution of the input image, in ppi; - * use 0 to respect the resolution embedded in the input - * \param[in] title [optional] pdf title; taken from the first image - * placed on a page; e.g., an input image filename - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is the simplest interface for writing a single image
- *          with pdf encoding to a stream.  It uses G4 encoding for 1 bpp,
- *          JPEG encoding for 8 bpp (no cmap) and 32 bpp, and FLATE
- *          encoding for everything else.
- * 
- */ -l_ok -pixWriteStreamPdf(FILE *fp, - PIX *pix, - l_int32 res, - const char *title) -{ -l_uint8 *data; -size_t nbytes, nbytes_written; - - PROCNAME("pixWriteStreamPdf"); - - if (!fp) - return ERROR_INT("stream not opened", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - if (pixWriteMemPdf(&data, &nbytes, pix, res, title) != 0) { - LEPT_FREE(data); - return ERROR_INT("pdf data not made", procName, 1); - } - - nbytes_written = fwrite(data, 1, nbytes, fp); - LEPT_FREE(data); - if (nbytes != nbytes_written) - return ERROR_INT("failure writing pdf data to stream", procName, 1); - return 0; -} - - -/*! - * \brief pixWriteMemPdf() - * - * \param[out] pdata pdf as byte array - * \param[out] pnbytes number of bytes in pdf array - * \param[in] pix all depths, cmap OK - * \param[in] res override the resolution of the input image, in ppi; - * use 0 to respect the res embedded in the input - * \param[in] title [optional] pdf title; taken from the first image - * placed on a page; e.g., an input image filename - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is the simplest interface for writing a single image
- *          with pdf encoding to memory.  It uses G4 encoding for 1 bpp,
- *          and makes a guess whether to use JPEG or FLATE encoding for
- *          everything else.
- * 
- */ -l_ok -pixWriteMemPdf(l_uint8 **pdata, - size_t *pnbytes, - PIX *pix, - l_int32 res, - const char *title) -{ -l_int32 ret, type; - - PROCNAME("pixWriteMemPdf"); - - if (pdata) *pdata = NULL; - if (pnbytes) *pnbytes = 0; - if (!pdata || !pnbytes) - return ERROR_INT("&data or &nbytes not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - selectDefaultPdfEncoding(pix, &type); - ret = pixConvertToPdfData(pix, type, 75, pdata, pnbytes, - 0, 0, res, title, NULL, 0); - if (ret) - return ERROR_INT("pdf data not made", procName, 1); - return 0; -} - - -/*---------------------------------------------------------------------* - * Segmented multi-page, multi-image converter * - *---------------------------------------------------------------------*/ -/*! - * \brief convertSegmentedFilesToPdf() - * - * \param[in] dirname directory name containing images - * \param[in] substr [optional] substring filter on filenames; - * can be NULL - * \param[in] res input resolution of all images - * \param[in] type compression type for non-image regions; the - * image regions are always compressed with - * L_JPEG_ENCODE - * \param[in] thresh used for converting gray --> 1 bpp with - * L_G4_ENCODE - * \param[in] baa [optional] boxaa of image regions - * \param[in] quality used for JPEG only; 0 for default (75) - * \param[in] scalefactor scaling factor applied to each image region - * \param[in] title [optional] pdf title; if null, taken from - * the first image filename - * \param[in] fileout pdf file of all images - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %substr is not NULL, only image filenames that contain
- *          the substring can be used.  If %substr == NULL, all files
- *          in the directory are used.
- *      (2) The files in the directory, after optional filtering by
- *          the substring, are lexically sorted in increasing order
- *          before concatenation.
- *      (3) The images are encoded with G4 if 1 bpp; JPEG if 8 bpp without
- *          colormap and many colors, or 32 bpp; FLATE for anything else.
- *      (4) The boxaa, if it exists, contains one boxa of "image regions"
- *          for each image file.  The boxa must be aligned with the
- *          sorted set of images.
- *      (5) The scalefactor is applied to each image region.  It is
- *          typically < 1.0, to save bytes in the final pdf, because
- *          the resolution is often not critical in non-text regions.
- *      (6) If the non-image regions have pixel depth > 1 and the encoding
- *          type is G4, they are automatically scaled up by 2x and
- *          thresholded.  Otherwise, no scaling is performed on them.
- *      (7) Note that this function can be used to generate multipage
- *          G4 compressed pdf from any input, by using %boxaa == NULL
- *          and %type == L_G4_ENCODE.
- * 
- */ -l_ok -convertSegmentedFilesToPdf(const char *dirname, - const char *substr, - l_int32 res, - l_int32 type, - l_int32 thresh, - BOXAA *baa, - l_int32 quality, - l_float32 scalefactor, - const char *title, - const char *fileout) -{ -char *fname; -l_uint8 *imdata, *data; -l_int32 i, npages, nboxa, nboxes, ret; -size_t imbytes, databytes; -BOXA *boxa; -L_BYTEA *ba; -L_PTRA *pa_data; -SARRAY *sa; - - PROCNAME("convertSegmentedFilesToPdf"); - - if (!dirname) - return ERROR_INT("dirname not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - - if ((sa = getNumberedPathnamesInDirectory(dirname, substr, 0, 0, 10000)) - == NULL) - return ERROR_INT("sa not made", procName, 1); - - npages = sarrayGetCount(sa); - /* If necessary, extend the boxaa, which is page-aligned with - * the image files, to be as large as the set of images. */ - if (baa) { - nboxa = boxaaGetCount(baa); - if (nboxa < npages) { - boxa = boxaCreate(1); - boxaaExtendWithInit(baa, npages, boxa); - boxaDestroy(&boxa); - } - } - - /* Generate and save all the encoded pdf strings */ - pa_data = ptraCreate(npages); - for (i = 0; i < npages; i++) { - fname = sarrayGetString(sa, i, L_NOCOPY); - if (!strcmp(fname, "")) continue; - boxa = NULL; - if (baa) { - boxa = boxaaGetBoxa(baa, i, L_CLONE); - nboxes = boxaGetCount(boxa); - if (nboxes == 0) - boxaDestroy(&boxa); - } - ret = convertToPdfDataSegmented(fname, res, type, thresh, boxa, - quality, scalefactor, title, - &imdata, &imbytes); - boxaDestroy(&boxa); /* safe; in case nboxes > 0 */ - if (ret) { - L_ERROR("pdf encoding failed for %s\n", procName, fname); - continue; - } - ba = l_byteaInitFromMem(imdata, imbytes); - if (imdata) LEPT_FREE(imdata); - ptraAdd(pa_data, ba); - } - sarrayDestroy(&sa); - - ptraGetActualCount(pa_data, &npages); - if (npages == 0) { - L_ERROR("no pdf files made\n", procName); - ptraDestroy(&pa_data, FALSE, FALSE); - return 1; - } - - /* Concatenate */ - ret = ptraConcatenatePdfToData(pa_data, NULL, &data, &databytes); - - /* Clean up */ - ptraGetActualCount(pa_data, &npages); /* recalculate in case it changes */ - for (i = 0; i < npages; i++) { - ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); - l_byteaDestroy(&ba); - } - ptraDestroy(&pa_data, FALSE, FALSE); - - if (ret) { - if (data) LEPT_FREE(data); - return ERROR_INT("pdf data not made", procName, 1); - } - - ret = l_binaryWrite(fileout, "w", data, databytes); - LEPT_FREE(data); - if (ret) - L_ERROR("pdf data not written to file\n", procName); - return ret; -} - - -/*! - * \brief convertNumberedMasksToBoxaa() - * - * \param[in] dirname directory name containing mask images - * \param[in] substr [optional] substring filter on filenames; can be NULL - * \param[in] numpre number of characters in name before number - * \param[in] numpost number of characters in name after number, up - * to a dot before an extension - * \return boxaa of mask regions, or NULL on error - * - *
- * Notes:
- *      (1) This is conveniently used to generate the input boxaa
- *          for convertSegmentedFilesToPdf().  It guarantees that the
- *          boxa will be aligned with the page images, even if some
- *          of the boxa are empty.
- * 
- */ -BOXAA * -convertNumberedMasksToBoxaa(const char *dirname, - const char *substr, - l_int32 numpre, - l_int32 numpost) -{ -char *fname; -l_int32 i, n; -BOXA *boxa; -BOXAA *baa; -PIX *pix; -SARRAY *sa; - - PROCNAME("convertNumberedMasksToBoxaa"); - - if (!dirname) - return (BOXAA *)ERROR_PTR("dirname not defined", procName, NULL); - - if ((sa = getNumberedPathnamesInDirectory(dirname, substr, numpre, - numpost, 10000)) == NULL) - return (BOXAA *)ERROR_PTR("sa not made", procName, NULL); - - /* Generate and save all the encoded pdf strings */ - n = sarrayGetCount(sa); - baa = boxaaCreate(n); - boxa = boxaCreate(1); - boxaaInitFull(baa, boxa); - boxaDestroy(&boxa); - for (i = 0; i < n; i++) { - fname = sarrayGetString(sa, i, L_NOCOPY); - if (!strcmp(fname, "")) continue; - if ((pix = pixRead(fname)) == NULL) { - L_WARNING("invalid image on page %d\n", procName, i); - continue; - } - boxa = pixConnComp(pix, NULL, 8); - boxaaReplaceBoxa(baa, i, boxa); - pixDestroy(&pix); - } - - sarrayDestroy(&sa); - return baa; -} - - -/*---------------------------------------------------------------------* - * Segmented single page, multi-image converters * - *---------------------------------------------------------------------*/ -/*! - * \brief convertToPdfSegmented() - * - * \param[in] filein input image file -- any format - * \param[in] res input image resolution; typ. 300 ppi; - * use 0 for default - * \param[in] type compression type for non-image regions; image - * regions are always compressed with L_JPEG_ENCODE - * \param[in] thresh for converting gray --> 1 bpp with L_G4_ENCODE - * \param[in] boxa [optional] of image regions; can be null - * \param[in] quality used for jpeg image regions; 0 for default - * \param[in] scalefactor used for jpeg regions; must be <= 1.0 - * \param[in] title [optional] pdf title; typically taken from the - * input file for the pix - * \param[in] fileout output pdf file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If there are no image regions, set %boxa == NULL;
- *          %quality and %scalefactor are ignored.
- *      (2) Typically, %scalefactor is < 1.0, because the image regions
- *          can be rendered at a lower resolution (for better compression)
- *          than the text regions.  If %scalefactor == 0, we use 1.0.
- *          If the input image is 1 bpp and scalefactor < 1.0, we
- *          use scaleToGray() to downsample the image regions to gray
- *          before compressing them.
- *      (3) If the compression type for non-image regions is L_G4_ENCODE
- *          and bpp > 1, the image is upscaled 2x and thresholded
- *          to 1 bpp.  That is the only situation where %thresh is used.
- *      (4) The parameter %quality is only used for image regions.
- *          If %type == L_JPEG_ENCODE, default jpeg quality (75) is
- *          used for the non-image regions.
- *      (5) Processing matrix for non-image regions.
- *
- *          Input           G4              JPEG                FLATE
- *          ----------|---------------------------------------------------
- *          1 bpp     |  1x, 1 bpp       1x flate, 1 bpp     1x, 1 bpp
- *                    |
- *          cmap      |  2x, 1 bpp       1x flate, cmap      1x, cmap
- *                    |
- *          2,4 bpp   |  2x, 1 bpp       1x flate            1x, 2,4 bpp
- *          no cmap   |                  2,4 bpp
- *                    |
- *          8,32 bpp  |  2x, 1 bpp       1x (jpeg)           1x, 8,32 bpp
- *          no cmap   |                  8,32 bpp
- *
- *          Summary:
- *          (a) if G4 is requested, G4 is used, with 2x upscaling
- *              for all cases except 1 bpp.
- *          (b) if JPEG is requested, use flate encoding for all cases
- *              except 8 bpp without cmap and 32 bpp (rgb).
- *          (c) if FLATE is requested, use flate with no transformation
- *              of the raster data.
- *      (6) Calling options/sequence for these functions:
- *              file  -->  file      (convertToPdfSegmented)
- *                  pix  -->  file      (pixConvertToPdfSegmented)
- *                      pix  -->  data      (pixConvertToPdfDataSegmented)
- *              file  -->  data      (convertToPdfDataSegmented)
- *                      pix  -->  data      (pixConvertToPdfDataSegmented)
- * 
- */ -l_ok -convertToPdfSegmented(const char *filein, - l_int32 res, - l_int32 type, - l_int32 thresh, - BOXA *boxa, - l_int32 quality, - l_float32 scalefactor, - const char *title, - const char *fileout) -{ -l_int32 ret; -PIX *pixs; - - PROCNAME("convertToPdfSegmented"); - - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - if (type != L_G4_ENCODE && type != L_JPEG_ENCODE && - type != L_FLATE_ENCODE) - return ERROR_INT("invalid conversion type", procName, 1); - if (boxa && scalefactor > 1.0) { - L_WARNING("setting scalefactor to 1.0\n", procName); - scalefactor = 1.0; - } - - if ((pixs = pixRead(filein)) == NULL) - return ERROR_INT("pixs not made", procName, 1); - - ret = pixConvertToPdfSegmented(pixs, res, type, thresh, boxa, quality, - scalefactor, (title) ? title : filein, - fileout); - pixDestroy(&pixs); - return ret; -} - - -/*! - * \brief pixConvertToPdfSegmented() - * - * \param[in] pixs any depth, cmap OK - * \param[in] res input image resolution; typ. 300 ppi; - * use 0 for default - * \param[in] type compression type for non-image regions; image - * regions are always compressed with L_JPEG_ENCODE - * \param[in] thresh for converting gray --> 1 bpp with L_G4_ENCODE - * \param[in] boxa [optional] of image regions; can be null - * \param[in] quality used for jpeg image regions; 0 for default - * \param[in] scalefactor used for jpeg regions; must be <= 1.0 - * \param[in] title [optional] pdf title; typically taken from the - * input file for the pix - * \param[in] fileout output pdf file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See convertToPdfSegmented() for details.
- * 
- */ -l_ok -pixConvertToPdfSegmented(PIX *pixs, - l_int32 res, - l_int32 type, - l_int32 thresh, - BOXA *boxa, - l_int32 quality, - l_float32 scalefactor, - const char *title, - const char *fileout) -{ -l_uint8 *data; -l_int32 ret; -size_t nbytes; - - PROCNAME("pixConvertToPdfSegmented"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - if (type != L_G4_ENCODE && type != L_JPEG_ENCODE && - type != L_FLATE_ENCODE) - return ERROR_INT("invalid conversion type", procName, 1); - if (boxa && scalefactor > 1.0) { - L_WARNING("setting scalefactor to 1.0\n", procName); - scalefactor = 1.0; - } - - ret = pixConvertToPdfDataSegmented(pixs, res, type, thresh, boxa, quality, - scalefactor, title, &data, &nbytes); - if (ret) - return ERROR_INT("pdf generation failure", procName, 1); - - ret = l_binaryWrite(fileout, "w", data, nbytes); - if (data) LEPT_FREE(data); - return ret; -} - - -/*! - * \brief convertToPdfDataSegmented() - * - * \param[in] filein input image file -- any format - * \param[in] res input image resolution; typ. 300 ppi; - * use 0 for default - * \param[in] type compression type for non-image regions; image - * regions are always compressed with L_JPEG_ENCODE - * \param[in] thresh for converting gray --> 1 bpp with L_G4_ENCODE - * \param[in] boxa [optional] image regions; can be null - * \param[in] quality used for jpeg image regions; 0 for default - * \param[in] scalefactor used for jpeg regions; must be <= 1.0 - * \param[in] title [optional] pdf title; if null, uses filein - * \param[out] pdata pdf data in memory - * \param[out] pnbytes number of bytes in pdf data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If there are no image regions, set %boxa == NULL;
- *          %quality and %scalefactor are ignored.
- *      (2) Typically, %scalefactor is < 1.0.  The image regions are
- * 
- */ -l_ok -convertToPdfDataSegmented(const char *filein, - l_int32 res, - l_int32 type, - l_int32 thresh, - BOXA *boxa, - l_int32 quality, - l_float32 scalefactor, - const char *title, - l_uint8 **pdata, - size_t *pnbytes) -{ -l_int32 ret; -PIX *pixs; - - PROCNAME("convertToPdfDataSegmented"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - if (type != L_G4_ENCODE && type != L_JPEG_ENCODE && - type != L_FLATE_ENCODE) - return ERROR_INT("invalid conversion type", procName, 1); - if (boxa && scalefactor > 1.0) { - L_WARNING("setting scalefactor to 1.0\n", procName); - scalefactor = 1.0; - } - - if ((pixs = pixRead(filein)) == NULL) - return ERROR_INT("pixs not made", procName, 1); - - ret = pixConvertToPdfDataSegmented(pixs, res, type, thresh, boxa, - quality, scalefactor, - (title) ? title : filein, - pdata, pnbytes); - pixDestroy(&pixs); - return ret; -} - - -/*! - * \brief pixConvertToPdfDataSegmented() - * - * \param[in] pixs any depth, cmap OK - * \param[in] res input image resolution; typ. 300 ppi; - * use 0 for default - * \param[in] type compression type for non-image regions; image - * regions are always compressed with L_JPEG_ENCODE - * \param[in] thresh for converting gray --> 1 bpp with L_G4_ENCODE - * \param[in] boxa [optional] of image regions; can be null - * \param[in] quality used for jpeg image regions; 0 for default - * \param[in] scalefactor used for jpeg regions; must be <= 1.0 - * \param[in] title [optional] pdf title; typically taken from the - * input file for the pix - * \param[out] pdata pdf data in memory - * \param[out] pnbytes number of bytes in pdf data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See convertToPdfSegmented() for details.
- * 
- */ -l_ok -pixConvertToPdfDataSegmented(PIX *pixs, - l_int32 res, - l_int32 type, - l_int32 thresh, - BOXA *boxa, - l_int32 quality, - l_float32 scalefactor, - const char *title, - l_uint8 **pdata, - size_t *pnbytes) -{ -l_int32 i, nbox, seq, bx, by, bw, bh, upscale; -l_float32 scale; -BOX *box, *boxc, *box2; -PIX *pix, *pixt1, *pixt2, *pixt3, *pixt4, *pixt5, *pixt6; -PIXCMAP *cmap; -L_PDF_DATA *lpd; - - PROCNAME("pixConvertToPdfDataSegmented"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (type != L_G4_ENCODE && type != L_JPEG_ENCODE && - type != L_FLATE_ENCODE) - return ERROR_INT("invalid conversion type", procName, 1); - if (boxa && (scalefactor <= 0.0 || scalefactor > 1.0)) { - L_WARNING("setting scalefactor to 1.0\n", procName); - scalefactor = 1.0; - } - - /* Adjust scalefactor so that the product with res gives an integer */ - if (res <= 0) - res = DefaultInputRes; - scale = (l_float32)((l_int32)(scalefactor * res + 0.5)) / (l_float32)res; - cmap = pixGetColormap(pixs); - - /* Simple case: single image to be encoded */ - if (!boxa || boxaGetCount(boxa) == 0) { - if (pixGetDepth(pixs) > 1 && type == L_G4_ENCODE) { - if (cmap) - pixt1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - else - pixt1 = pixConvertTo8(pixs, FALSE); - pixt2 = pixScaleGray2xLIThresh(pixt1, thresh); - pixConvertToPdfData(pixt2, type, quality, pdata, pnbytes, - 0, 0, 2 * res, title, NULL, 0); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - } else { - pixConvertToPdfData(pixs, type, quality, pdata, pnbytes, - 0, 0, res, title, NULL, 0); - } - return 0; - } - - /* Multiple images to be encoded. If %type == L_G4_ENCODE, - * jpeg encode a version of pixs that is blanked in the non-image - * regions, and paint the scaled non-image part onto it through a mask. - * Otherwise, we must put the non-image part down first and - * then render all the image regions separately on top of it, - * at their own resolution. */ - pixt1 = pixSetBlackOrWhiteBoxa(pixs, boxa, L_SET_WHITE); /* non-image */ - nbox = boxaGetCount(boxa); - if (type == L_G4_ENCODE) { - pixt2 = pixCreateTemplate(pixs); /* only image regions */ - pixSetBlackOrWhite(pixt2, L_SET_WHITE); - for (i = 0; i < nbox; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - pix = pixClipRectangle(pixs, box, &boxc); - boxGetGeometry(boxc, &bx, &by, &bw, &bh); - pixRasterop(pixt2, bx, by, bw, bh, PIX_SRC, pix, 0, 0); - pixDestroy(&pix); - boxDestroy(&box); - boxDestroy(&boxc); - } - pixt3 = pixRemoveColormap(pixt2, REMOVE_CMAP_BASED_ON_SRC); - if (pixGetDepth(pixt3) == 1) - pixt4 = pixScaleToGray(pixt3, scale); - else - pixt4 = pixScale(pixt3, scale, scale); - pixConvertToPdfData(pixt4, L_JPEG_ENCODE, quality, pdata, pnbytes, - 0, 0, (l_int32)(scale * res), title, - &lpd, L_FIRST_IMAGE); - - if (pixGetDepth(pixt1) == 1) { - pixt5 = pixClone(pixt1); - upscale = 1; - } else { - pixt6 = pixConvertTo8(pixt1, 0); - pixt5 = pixScaleGray2xLIThresh(pixt6, thresh); - pixDestroy(&pixt6); - upscale = 2; - } - pixConvertToPdfData(pixt5, L_G4_ENCODE, quality, pdata, pnbytes, - 0, 0, upscale * res, title, &lpd, L_LAST_IMAGE); - pixDestroy(&pixt2); - pixDestroy(&pixt3); - pixDestroy(&pixt4); - pixDestroy(&pixt5); - } else { - /* Put the non-image part down first. This is the full - size of the page, so we can use it to find the page - height in pixels, which is required for determining - the LL corner of the image relative to the LL corner - of the page. */ - pixConvertToPdfData(pixt1, type, quality, pdata, pnbytes, 0, 0, - res, title, &lpd, L_FIRST_IMAGE); - for (i = 0; i < nbox; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - pixt2 = pixClipRectangle(pixs, box, &boxc); - pixt3 = pixRemoveColormap(pixt2, REMOVE_CMAP_BASED_ON_SRC); - if (pixGetDepth(pixt3) == 1) - pixt4 = pixScaleToGray(pixt3, scale); - else - pixt4 = pixScale(pixt3, scale, scale); - box2 = boxTransform(boxc, 0, 0, scale, scale); - boxGetGeometry(box2, &bx, &by, NULL, &bh); - seq = (i == nbox - 1) ? L_LAST_IMAGE : L_NEXT_IMAGE; - pixConvertToPdfData(pixt4, L_JPEG_ENCODE, quality, pdata, pnbytes, - bx, by, (l_int32)(scale * res), title, - &lpd, seq); - pixDestroy(&pixt2); - pixDestroy(&pixt3); - pixDestroy(&pixt4); - boxDestroy(&box); - boxDestroy(&boxc); - boxDestroy(&box2); - } - } - - pixDestroy(&pixt1); - return 0; -} - - -/*---------------------------------------------------------------------* - * Multi-page concatenation * - *---------------------------------------------------------------------*/ -/*! - * \brief concatenatePdf() - * - * \param[in] dirname directory name containing single-page pdf files - * \param[in] substr [optional] substring filter on filenames; can be NULL - * \param[in] fileout concatenated pdf file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This only works with leptonica-formatted single-page pdf files.
- *      (2) If %substr is not NULL, only filenames that contain
- *          the substring can be returned.  If %substr == NULL,
- *          none of the filenames are filtered out.
- *      (3) The files in the directory, after optional filtering by
- *          the substring, are lexically sorted in increasing order
- *          before concatenation.
- * 
- */ -l_ok -concatenatePdf(const char *dirname, - const char *substr, - const char *fileout) -{ -l_int32 ret; -SARRAY *sa; - - PROCNAME("concatenatePdf"); - - if (!dirname) - return ERROR_INT("dirname not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - - if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) - return ERROR_INT("sa not made", procName, 1); - ret = saConcatenatePdf(sa, fileout); - sarrayDestroy(&sa); - return ret; -} - - -/*! - * \brief saConcatenatePdf() - * - * \param[in] sa string array of pathnames for single-page pdf files - * \param[in] fileout concatenated pdf file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This only works with leptonica-formatted single-page pdf files.
- * 
- */ -l_ok -saConcatenatePdf(SARRAY *sa, - const char *fileout) -{ -l_uint8 *data; -l_int32 ret; -size_t nbytes; - - PROCNAME("saConcatenatePdf"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - - ret = saConcatenatePdfToData(sa, &data, &nbytes); - if (ret) - return ERROR_INT("pdf data not made", procName, 1); - ret = l_binaryWrite(fileout, "w", data, nbytes); - LEPT_FREE(data); - return ret; -} - - -/*! - * \brief ptraConcatenatePdf() - * - * \param[in] pa array of pdf strings, each for a single-page pdf file - * \param[in] fileout concatenated pdf file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This only works with leptonica-formatted single-page pdf files.
- * 
- */ -l_ok -ptraConcatenatePdf(L_PTRA *pa, - const char *fileout) -{ -l_uint8 *data; -l_int32 ret; -size_t nbytes; - - PROCNAME("ptraConcatenatePdf"); - - if (!pa) - return ERROR_INT("pa not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - - ret = ptraConcatenatePdfToData(pa, NULL, &data, &nbytes); - if (ret) - return ERROR_INT("pdf data not made", procName, 1); - ret = l_binaryWrite(fileout, "w", data, nbytes); - LEPT_FREE(data); - return ret; -} - - -/*! - * \brief concatenatePdfToData() - * - * \param[in] dirname directory name containing single-page pdf files - * \param[in] substr [optional] substring filter on filenames; can be NULL - * \param[out] pdata concatenated pdf data in memory - * \param[out] pnbytes number of bytes in pdf data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This only works with leptonica-formatted single-page pdf files.
- *      (2) If %substr is not NULL, only filenames that contain
- *          the substring can be returned.  If %substr == NULL,
- *          none of the filenames are filtered out.
- *      (3) The files in the directory, after optional filtering by
- *          the substring, are lexically sorted in increasing order
- *          before concatenation.
- * 
- */ -l_ok -concatenatePdfToData(const char *dirname, - const char *substr, - l_uint8 **pdata, - size_t *pnbytes) -{ -l_int32 ret; -SARRAY *sa; - - PROCNAME("concatenatePdfToData"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!dirname) - return ERROR_INT("dirname not defined", procName, 1); - - if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) - return ERROR_INT("sa not made", procName, 1); - ret = saConcatenatePdfToData(sa, pdata, pnbytes); - sarrayDestroy(&sa); - return ret; -} - - -/*! - * \brief saConcatenatePdfToData() - * - * \param[in] sa string array of pathnames for single-page pdf files - * \param[out] pdata concatenated pdf data in memory - * \param[out] pnbytes number of bytes in pdf data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This only works with leptonica-formatted single-page pdf files.
- * 
- */ -l_ok -saConcatenatePdfToData(SARRAY *sa, - l_uint8 **pdata, - size_t *pnbytes) -{ -char *fname; -l_int32 i, npages, ret; -L_BYTEA *bas; -L_PTRA *pa_data; /* input pdf data for each page */ - - PROCNAME("saConcatenatePdfToData"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - - /* Read the pdf files into memory */ - if ((npages = sarrayGetCount(sa)) == 0) - return ERROR_INT("no filenames found", procName, 1); - pa_data = ptraCreate(npages); - for (i = 0; i < npages; i++) { - fname = sarrayGetString(sa, i, L_NOCOPY); - bas = l_byteaInitFromFile(fname); - ptraAdd(pa_data, bas); - } - - ret = ptraConcatenatePdfToData(pa_data, sa, pdata, pnbytes); - - /* Cleanup: some pages could have been removed */ - ptraGetActualCount(pa_data, &npages); - for (i = 0; i < npages; i++) { - bas = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); - l_byteaDestroy(&bas); - } - ptraDestroy(&pa_data, FALSE, FALSE); - return ret; -} - -/* --------------------------------------------*/ -#endif /* USE_PDFIO */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pdfio1stub.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pdfio1stub.c deleted file mode 100644 index 78cf1158..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pdfio1stub.c +++ /dev/null @@ -1,309 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pdfio1stub.c - *
- *
- *     Stubs for pdfio1.c functions
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* --------------------------------------------*/ -#if !USE_PDFIO /* defined in environ.h */ -/* --------------------------------------------*/ - -/* ----------------------------------------------------------------------*/ - -l_ok convertFilesToPdf(const char *dirname, const char *substr, - l_int32 res, l_float32 scalefactor, - l_int32 type, l_int32 quality, - const char *title, const char *fileout) -{ - return ERROR_INT("function not present", "convertFilesToPdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok saConvertFilesToPdf(SARRAY *sa, l_int32 res, l_float32 scalefactor, - l_int32 type, l_int32 quality, - const char *title, const char *fileout) -{ - return ERROR_INT("function not present", "saConvertFilesToPdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok saConvertFilesToPdfData(SARRAY *sa, l_int32 res, - l_float32 scalefactor, l_int32 type, - l_int32 quality, const char *title, - l_uint8 **pdata, size_t *pnbytes) -{ - return ERROR_INT("function not present", "saConvertFilesToPdfData", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok selectDefaultPdfEncoding(PIX *pix, l_int32 *ptype) -{ - return ERROR_INT("function not present", "selectDefaultPdfEncoding", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertUnscaledFilesToPdf(const char *dirname, const char *substr, - const char *title, const char *fileout) -{ - return ERROR_INT("function not present", "convertUnscaledFilesToPdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok saConvertUnscaledFilesToPdf(SARRAY *sa, const char *title, - const char *fileout) -{ - return ERROR_INT("function not present", "saConvertUnscaledFilesToPdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok saConvertUnscaledFilesToPdfData(SARRAY *sa, const char *title, - l_uint8 **pdata, size_t *pnbytes) -{ - return ERROR_INT("function not present", - "saConvertUnscaledFilesToPdfData", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertUnscaledToPdfData(const char *fname, const char *title, - l_uint8 **pdata, size_t *pnbytes) -{ - return ERROR_INT("function not present", "convertUnscaledToPdfData", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixaConvertToPdf(PIXA *pixa, l_int32 res, l_float32 scalefactor, - l_int32 type, l_int32 quality, - const char *title, const char *fileout) -{ - return ERROR_INT("function not present", "pixaConvertToPdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixaConvertToPdfData(PIXA *pixa, l_int32 res, l_float32 scalefactor, - l_int32 type, l_int32 quality, const char *title, - l_uint8 **pdata, size_t *pnbytes) -{ - return ERROR_INT("function not present", "pixaConvertToPdfData", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertToPdf(const char *filein, - l_int32 type, l_int32 quality, - const char *fileout, - l_int32 x, l_int32 y, l_int32 res, - const char *title, - L_PDF_DATA **plpd, l_int32 position) -{ - return ERROR_INT("function not present", "convertToPdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertImageDataToPdf(l_uint8 *imdata, size_t size, - l_int32 type, l_int32 quality, - const char *fileout, - l_int32 x, l_int32 y, l_int32 res, - const char *title, - L_PDF_DATA **plpd, l_int32 position) -{ - return ERROR_INT("function not present", "convertImageDataToPdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertToPdfData(const char *filein, - l_int32 type, l_int32 quality, - l_uint8 **pdata, size_t *pnbytes, - l_int32 x, l_int32 y, l_int32 res, - const char *title, - L_PDF_DATA **plpd, l_int32 position) -{ - return ERROR_INT("function not present", "convertToPdfData", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertImageDataToPdfData(l_uint8 *imdata, size_t size, - l_int32 type, l_int32 quality, - l_uint8 **pdata, size_t *pnbytes, - l_int32 x, l_int32 y, l_int32 res, - const char *title, - L_PDF_DATA **plpd, l_int32 position) -{ - return ERROR_INT("function not present", "convertImageDataToPdfData", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixConvertToPdf(PIX *pix, l_int32 type, l_int32 quality, - const char *fileout, - l_int32 x, l_int32 y, l_int32 res, - const char *title, - L_PDF_DATA **plpd, l_int32 position) -{ - return ERROR_INT("function not present", "pixConvertToPdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteStreamPdf(FILE *fp, PIX *pix, l_int32 res, const char *title) -{ - return ERROR_INT("function not present", "pixWriteStreamPdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteMemPdf(l_uint8 **pdata, size_t *pnbytes, PIX *pix, - l_int32 res, const char *title) -{ - return ERROR_INT("function not present", "pixWriteMemPdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertSegmentedFilesToPdf(const char *dirname, const char *substr, - l_int32 res, l_int32 type, l_int32 thresh, - BOXAA *baa, l_int32 quality, - l_float32 scalefactor, const char *title, - const char *fileout) -{ - return ERROR_INT("function not present", "convertSegmentedFilesToPdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -BOXAA * convertNumberedMasksToBoxaa(const char *dirname, const char *substr, - l_int32 numpre, l_int32 numpost) -{ - return (BOXAA *)ERROR_PTR("function not present", - "convertNumberedMasksToBoxaa", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertToPdfSegmented(const char *filein, l_int32 res, l_int32 type, - l_int32 thresh, BOXA *boxa, l_int32 quality, - l_float32 scalefactor, const char *title, - const char *fileout) -{ - return ERROR_INT("function not present", "convertToPdfSegmented", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixConvertToPdfSegmented(PIX *pixs, l_int32 res, l_int32 type, - l_int32 thresh, BOXA *boxa, l_int32 quality, - l_float32 scalefactor, const char *title, - const char *fileout) -{ - return ERROR_INT("function not present", "pixConvertToPdfSegmented", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertToPdfDataSegmented(const char *filein, l_int32 res, - l_int32 type, l_int32 thresh, BOXA *boxa, - l_int32 quality, l_float32 scalefactor, - const char *title, - l_uint8 **pdata, size_t *pnbytes) -{ - return ERROR_INT("function not present", "convertToPdfDataSegmented", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixConvertToPdfDataSegmented(PIX *pixs, l_int32 res, l_int32 type, - l_int32 thresh, BOXA *boxa, - l_int32 quality, l_float32 scalefactor, - const char *title, - l_uint8 **pdata, size_t *pnbytes) -{ - return ERROR_INT("function not present", "pixConvertToPdfDataSegmented", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok concatenatePdf(const char *dirname, const char *substr, - const char *fileout) -{ - return ERROR_INT("function not present", "concatenatePdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok saConcatenatePdf(SARRAY *sa, const char *fileout) -{ - return ERROR_INT("function not present", "saConcatenatePdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok ptraConcatenatePdf(L_PTRA *pa, const char *fileout) -{ - return ERROR_INT("function not present", "ptraConcatenatePdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok concatenatePdfToData(const char *dirname, const char *substr, - l_uint8 **pdata, size_t *pnbytes) -{ - return ERROR_INT("function not present", "concatenatePdfToData", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok saConcatenatePdfToData(SARRAY *sa, l_uint8 **pdata, size_t *pnbytes) -{ - return ERROR_INT("function not present", "saConcatenatePdfToData", 1); -} - -/* ----------------------------------------------------------------------*/ - -/* --------------------------------------------*/ -#endif /* !USE_PDFIO */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pdfio2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pdfio2.c deleted file mode 100644 index 82407993..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pdfio2.c +++ /dev/null @@ -1,2589 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pdfio2.c - *
- *
- *    Lower-level operations for generating pdf.
- *
- *     Intermediate function for single page, multi-image conversion
- *          l_int32              pixConvertToPdfData()
- *
- *     Intermediate function for generating multipage pdf output
- *          l_int32              ptraConcatenatePdfToData()
- *
- *     Convert tiff multipage to pdf file
- *          l_int32              convertTiffMultipageToPdf()
- *
- *     Low-level CID-based operations
- *
- *       Without transcoding
- *          l_int32              l_generateCIDataForPdf()
- *          L_COMP_DATA         *l_generateFlateDataPdf()
- *          L_COMP_DATA         *l_generateJpegData()
- *          L_COMP_DATA         *l_generateJpegDataMem()
- *          static L_COMP_DATA  *l_generateJp2kData()
- *
- *       With transcoding
- *          l_int32              l_generateCIData()
- *          l_int32              pixGenerateCIData()
- *          L_COMP_DATA         *l_generateFlateData()
- *          static L_COMP_DATA  *pixGenerateFlateData()
- *          static L_COMP_DATA  *pixGenerateJpegData()
- *          static L_COMP_DATA  *pixGenerateJp2kData()
- *          static L_COMP_DATA  *pixGenerateG4Data()
- *          L_COMP_DATA         *l_generateG4Data()
- *
- *       Other
- *          l_int32              cidConvertToPdfData()
- *          void                 l_CIDataDestroy()
- *
- *     Helper functions for generating the output pdf string
- *          static l_int32       l_generatePdf()
- *          static void          generateFixedStringsPdf()
- *          static char         *generateEscapeString()
- *          static void          generateMediaboxPdf()
- *          static l_int32       generatePageStringPdf()
- *          static l_int32       generateContentStringPdf()
- *          static l_int32       generatePreXStringsPdf()
- *          static l_int32       generateColormapStringsPdf()
- *          static void          generateTrailerPdf()
- *          static l_int32       makeTrailerStringPdf()
- *          static l_int32       generateOutputDataPdf()
- *
- *     Helper functions for generating multipage pdf output
- *          static l_int32       parseTrailerPdf()
- *          static char         *generatePagesObjStringPdf()
- *          static L_BYTEA      *substituteObjectNumbers()
- *
- *     Create/destroy/access pdf data
- *          static L_PDF_DATA   *pdfdataCreate()
- *          static void          pdfdataDestroy()
- *          static L_COMP_DATA  *pdfdataGetCid()
- *
- *     Set flags for special modes
- *          void                 l_pdfSetG4ImageMask()
- *          void                 l_pdfSetDateAndVersion()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -/* --------------------------------------------*/ -#if USE_PDFIO /* defined in environ.h */ - /* --------------------------------------------*/ - - /* Typical scan resolution in ppi (pixels/inch) */ -static const l_int32 DefaultInputRes = 300; - - /* Static helpers */ -static L_COMP_DATA *l_generateJp2kData(const char *fname); -static L_COMP_DATA *pixGenerateFlateData(PIX *pixs, l_int32 ascii85flag); -static L_COMP_DATA *pixGenerateJpegData(PIX *pixs, l_int32 ascii85flag, - l_int32 quality); -static L_COMP_DATA *pixGenerateJp2kData(PIX *pixs, l_int32 quality); -static L_COMP_DATA *pixGenerateG4Data(PIX *pixs, l_int32 ascii85flag); - -static l_int32 l_generatePdf(l_uint8 **pdata, size_t *pnbytes, - L_PDF_DATA *lpd); -static void generateFixedStringsPdf(L_PDF_DATA *lpd); -static char *generateEscapeString(const char *str); -static void generateMediaboxPdf(L_PDF_DATA *lpd); -static l_int32 generatePageStringPdf(L_PDF_DATA *lpd); -static l_int32 generateContentStringPdf(L_PDF_DATA *lpd); -static l_int32 generatePreXStringsPdf(L_PDF_DATA *lpd); -static l_int32 generateColormapStringsPdf(L_PDF_DATA *lpd); -static void generateTrailerPdf(L_PDF_DATA *lpd); -static char *makeTrailerStringPdf(L_DNA *daloc); -static l_int32 generateOutputDataPdf(l_uint8 **pdata, size_t *pnbytes, - L_PDF_DATA *lpd); - -static l_int32 parseTrailerPdf(L_BYTEA *bas, L_DNA **pda); -static char *generatePagesObjStringPdf(NUMA *napage); -static L_BYTEA *substituteObjectNumbers(L_BYTEA *bas, NUMA *na_objs); - -static L_PDF_DATA *pdfdataCreate(const char *title); -static void pdfdataDestroy(L_PDF_DATA **plpd); -static L_COMP_DATA *pdfdataGetCid(L_PDF_DATA *lpd, l_int32 index); - - -/* ---------------- Defaults for rendering options ----------------- */ - /* Output G4 as writing through image mask; this is the default */ -static l_int32 var_WRITE_G4_IMAGE_MASK = 1; - /* Write date/time and lib version into pdf; this is the default */ -static l_int32 var_WRITE_DATE_AND_VERSION = 1; - -#define L_SMALLBUF 256 -#define L_BIGBUF 2048 /* must be able to hold hex colormap */ - - -#ifndef NO_CONSOLE_IO -#define DEBUG_MULTIPAGE 0 -#endif /* ~NO_CONSOLE_IO */ - - -/*---------------------------------------------------------------------* - * Intermediate function for generating multipage pdf output * - *---------------------------------------------------------------------*/ -/*! - * \brief pixConvertToPdfData() - * - * \param[in] pix all depths; cmap OK - * \param[in] type L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE, - * L_JP2K_ENCODE - * \param[in] quality for jpeg: 1-100; 0 for default (75) - * for jp2k: 27-45; 0 for default (34) - * \param[out] pdata pdf array - * \param[out] pnbytes number of bytes in pdf array - * \param[in] x, y location of lower-left corner of image, in pixels, - * relative to the PostScript origin (0,0) at - * the lower-left corner of the page) - * \param[in] res override the resolution of the input image, in ppi; - * use 0 to respect resolution embedded in the input - * \param[in] title [optional] pdf title; can be null - * \param[in,out] plpd ptr to lpd; created on the first invocation and - * returned until last image is processed - * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, - * L_LAST_IMAGE - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %res == 0 and the input resolution field is 0,
- *          this will use DefaultInputRes.
- *      (2) This only writes %data if it is the last image to be
- *          written on the page.
- *      (3) See comments in convertToPdf().
- * 
- */ -l_ok -pixConvertToPdfData(PIX *pix, - l_int32 type, - l_int32 quality, - l_uint8 **pdata, - size_t *pnbytes, - l_int32 x, - l_int32 y, - l_int32 res, - const char *title, - L_PDF_DATA **plpd, - l_int32 position) -{ -l_int32 pixres, w, h, ret; -l_float32 xpt, ypt, wpt, hpt; -L_COMP_DATA *cid = NULL; -L_PDF_DATA *lpd = NULL; - - PROCNAME("pixConvertToPdfData"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (type != L_JPEG_ENCODE && type != L_G4_ENCODE && - type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) { - selectDefaultPdfEncoding(pix, &type); - } - if (plpd) { /* part of multi-page invocation */ - if (position == L_FIRST_IMAGE) - *plpd = NULL; - } - - /* Generate the compressed image data. It must NOT - * be ascii85 encoded. */ - pixGenerateCIData(pix, type, quality, 0, &cid); - if (!cid) - return ERROR_INT("cid not made", procName, 1); - - /* Get media box in pts. Guess the input image resolution - * based on the input parameter %res, the resolution data in - * the pix, and the size of the image. */ - pixres = cid->res; - w = cid->w; - h = cid->h; - if (res <= 0.0) { - if (pixres > 0) - res = pixres; - else - res = DefaultInputRes; - } - xpt = x * 72. / res; - ypt = y * 72. / res; - wpt = w * 72. / res; - hpt = h * 72. / res; - - /* Set up lpd */ - if (!plpd) { /* single image */ - if ((lpd = pdfdataCreate(title)) == NULL) - return ERROR_INT("lpd not made", procName, 1); - } else if (position == L_FIRST_IMAGE) { /* first of multiple images */ - if ((lpd = pdfdataCreate(title)) == NULL) - return ERROR_INT("lpd not made", procName, 1); - *plpd = lpd; - } else { /* not the first of multiple images */ - lpd = *plpd; - } - - /* Add the data to the lpd */ - ptraAdd(lpd->cida, cid); - lpd->n++; - ptaAddPt(lpd->xy, xpt, ypt); - ptaAddPt(lpd->wh, wpt, hpt); - - /* If a single image or the last of multiple images, - * generate the pdf and destroy the lpd */ - if (!plpd || (position == L_LAST_IMAGE)) { - ret = l_generatePdf(pdata, pnbytes, lpd); - pdfdataDestroy(&lpd); - if (plpd) *plpd = NULL; - if (ret) - return ERROR_INT("pdf output not made", procName, 1); - } - - return 0; -} - - -/*---------------------------------------------------------------------* - * Intermediate function for generating multipage pdf output * - *---------------------------------------------------------------------*/ -/*! - * \brief ptraConcatenatePdfToData() - * - * \param[in] pa_data ptra array of pdf strings, each for a - * single-page pdf file - * \param[in] sa [optional] string array of pathnames for - * input pdf files; can be null - * \param[out] pdata concatenated pdf data in memory - * \param[out] pnbytes number of bytes in pdf data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This only works with leptonica-formatted single-page pdf files.
- *          pdf files generated by other programs will have unpredictable
- *          (and usually bad) results.  The requirements for each pdf file:
- *            (a) The Catalog and Info objects are the first two.
- *            (b) Object 3 is Pages
- *            (c) Object 4 is Page
- *            (d) The remaining objects are Contents, XObjects, and ColorSpace
- *      (2) We remove trailers from each page, and append the full trailer
- *          for all pages at the end.
- *      (3) For all but the first file, remove the ID and the first 3
- *          objects (catalog, info, pages), so that each subsequent
- *          file has only objects of these classes:
- *              Page, Contents, XObject, ColorSpace (Indexed RGB).
- *          For those objects, we substitute these refs to objects
- *          in the local file:
- *              Page:  Parent(object 3), Contents, XObject(typically multiple)
- *              XObject:  [ColorSpace if indexed]
- *          The Pages object on the first page (object 3) has a Kids array
- *          of references to all the Page objects, with a Count equal
- *          to the number of pages.  Each Page object refers back to
- *          this parent.
- * 
- */ -l_ok -ptraConcatenatePdfToData(L_PTRA *pa_data, - SARRAY *sa, - l_uint8 **pdata, - size_t *pnbytes) -{ -char *fname, *str_pages, *str_trailer; -l_uint8 *pdfdata, *data; -l_int32 i, j, index, nobj, npages; -l_int32 *sizes, *locs; -size_t size; -L_BYTEA *bas, *bad, *bat1, *bat2; -L_DNA *da_locs, *da_sizes, *da_outlocs, *da; -L_DNAA *daa_locs; /* object locations on each page */ -NUMA *na_objs, *napage; -NUMAA *naa_objs; /* object mapping numbers to new values */ - - PROCNAME("ptraConcatenatePdfToData"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!pa_data) - return ERROR_INT("pa_data not defined", procName, 1); - - /* Parse the files and find the object locations. - * Remove file data that cannot be parsed. */ - ptraGetActualCount(pa_data, &npages); - daa_locs = l_dnaaCreate(npages); - for (i = 0; i < npages; i++) { - bas = (L_BYTEA *)ptraGetPtrToItem(pa_data, i); - if (parseTrailerPdf(bas, &da_locs) != 0) { - bas = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); - l_byteaDestroy(&bas); - if (sa) { - fname = sarrayGetString(sa, i, L_NOCOPY); - L_ERROR("can't parse file %s; skipping\n", procName, fname); - } else { - L_ERROR("can't parse file %d; skipping\n", procName, i); - } - } else { - l_dnaaAddDna(daa_locs, da_locs, L_INSERT); - } - } - - /* Recompute npages in case some of the files were not pdf */ - ptraCompactArray(pa_data); - ptraGetActualCount(pa_data, &npages); - if (npages == 0) { - l_dnaaDestroy(&daa_locs); - return ERROR_INT("no parsable pdf files found", procName, 1); - } - - /* Find the mapping from initial to final object numbers */ - naa_objs = numaaCreate(npages); /* stores final object numbers */ - napage = numaCreate(npages); /* stores "Page" object numbers */ - index = 0; - for (i = 0; i < npages; i++) { - da = l_dnaaGetDna(daa_locs, i, L_CLONE); - nobj = l_dnaGetCount(da); - if (i == 0) { - numaAddNumber(napage, 4); /* object 4 on first page */ - na_objs = numaMakeSequence(0.0, 1.0, nobj - 1); - index = nobj - 1; - } else { /* skip the first 3 objects in each file */ - numaAddNumber(napage, index); /* Page object is first we add */ - na_objs = numaMakeConstant(0.0, nobj - 1); - numaReplaceNumber(na_objs, 3, 3); /* refers to parent of all */ - for (j = 4; j < nobj - 1; j++) - numaSetValue(na_objs, j, index++); - } - numaaAddNuma(naa_objs, na_objs, L_INSERT); - l_dnaDestroy(&da); - } - - /* Make the Pages object (#3) */ - str_pages = generatePagesObjStringPdf(napage); - - /* Build the output */ - bad = l_byteaCreate(5000); - da_outlocs = l_dnaCreate(0); /* locations of all output objects */ - for (i = 0; i < npages; i++) { - bas = (L_BYTEA *)ptraGetPtrToItem(pa_data, i); - pdfdata = l_byteaGetData(bas, &size); - da_locs = l_dnaaGetDna(daa_locs, i, L_CLONE); /* locs on this page */ - na_objs = numaaGetNuma(naa_objs, i, L_CLONE); /* obj # on this page */ - nobj = l_dnaGetCount(da_locs) - 1; - da_sizes = l_dnaDiffAdjValues(da_locs); /* object sizes on this page */ - sizes = l_dnaGetIArray(da_sizes); - locs = l_dnaGetIArray(da_locs); - if (i == 0) { - l_byteaAppendData(bad, pdfdata, sizes[0]); - l_byteaAppendData(bad, pdfdata + locs[1], sizes[1]); - l_byteaAppendData(bad, pdfdata + locs[2], sizes[2]); - l_byteaAppendString(bad, str_pages); - for (j = 0; j < 4; j++) - l_dnaAddNumber(da_outlocs, locs[j]); - } - for (j = 4; j < nobj; j++) { - l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad)); - bat1 = l_byteaInitFromMem(pdfdata + locs[j], sizes[j]); - bat2 = substituteObjectNumbers(bat1, na_objs); - data = l_byteaGetData(bat2, &size); - l_byteaAppendData(bad, data, size); - l_byteaDestroy(&bat1); - l_byteaDestroy(&bat2); - } - if (i == npages - 1) /* last one */ - l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad)); - LEPT_FREE(sizes); - LEPT_FREE(locs); - l_dnaDestroy(&da_locs); - numaDestroy(&na_objs); - l_dnaDestroy(&da_sizes); - } - - /* Add the trailer */ - str_trailer = makeTrailerStringPdf(da_outlocs); - l_byteaAppendString(bad, str_trailer); - - /* Transfer the output data */ - *pdata = l_byteaCopyData(bad, pnbytes); - l_byteaDestroy(&bad); - -#if DEBUG_MULTIPAGE - lept_stderr("******** object mapper **********"); - numaaWriteStream(stderr, naa_objs); - - lept_stderr("******** Page object numbers ***********"); - numaWriteStderr(napage); - - lept_stderr("******** Pages object ***********\n"); - lept_stderr("%s\n", str_pages); -#endif /* DEBUG_MULTIPAGE */ - - numaDestroy(&napage); - numaaDestroy(&naa_objs); - l_dnaDestroy(&da_outlocs); - l_dnaaDestroy(&daa_locs); - LEPT_FREE(str_pages); - LEPT_FREE(str_trailer); - return 0; -} - - -/*---------------------------------------------------------------------* - * Convert tiff multipage to pdf file * - *---------------------------------------------------------------------*/ -/*! - * \brief convertTiffMultipageToPdf() - * - * \param[in] filein (tiff) - * \param[in] fileout (pdf) - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) A multipage tiff file can also be converted to PS, using
- *          convertTiffMultipageToPS()
- * 
- */ -l_ok -convertTiffMultipageToPdf(const char *filein, - const char *fileout) -{ -l_int32 istiff; -PIXA *pixa; -FILE *fp; - - PROCNAME("convertTiffMultipageToPdf"); - - if ((fp = fopenReadStream(filein)) == NULL) - return ERROR_INT("file not found", procName, 1); - istiff = fileFormatIsTiff(fp); - fclose(fp); - if (!istiff) - return ERROR_INT("file not tiff format", procName, 1); - - pixa = pixaReadMultipageTiff(filein); - pixaConvertToPdf(pixa, 0, 1.0, 0, 0, "weasel2", fileout); - pixaDestroy(&pixa); - return 0; -} - - -/*---------------------------------------------------------------------* - * Low-level CID-based operations * - *---------------------------------------------------------------------*/ -/*! - * \brief l_generateCIDataForPdf() - * - * \param[in] fname [optional] can be null - * \param[in] pix [optional] can be null - * \param[in] quality for jpeg if transcoded: 1-100; 0 for default (75) - * for jp2k if transcoded: 27-45; 0 for default (34) - * \param[out] pcid compressed data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) You must set either filename or pix.
- *      (2) Given an image file and optionally a pix raster of that data,
- *          this provides a CID that is compatible with PDF, preferably
- *          without transcoding.
- *      (3) The pix is included for efficiency, in case transcoding
- *          is required and the pix is available to the caller.
- *      (4) We don't try to open files named "stdin" or "-" for Tesseract
- *          compatibility reasons. We may remove this restriction
- *          in the future.
- * 
- */ -l_ok -l_generateCIDataForPdf(const char *fname, - PIX *pix, - l_int32 quality, - L_COMP_DATA **pcid) -{ -l_int32 format, type; -L_COMP_DATA *cid; -PIX *pixt; - - PROCNAME("l_generateCIDataForPdf"); - - if (!pcid) - return ERROR_INT("&cid not defined", procName, 1); - *pcid = cid = NULL; - if (!fname && !pix) - return ERROR_INT("neither fname nor pix are defined", procName, 1); - - /* If a compressed file is given that is not 'stdin', see if we - * can generate the pdf output without transcoding. */ - if (fname && strcmp(fname, "-") != 0 && strcmp(fname, "stdin") != 0) { - findFileFormat(fname, &format); - if (format == IFF_UNKNOWN) - L_WARNING("file %s format is unknown\n", procName, fname); - if (format == IFF_PS || format == IFF_LPDF) { - L_ERROR("file %s is unsupported format %d\n", - procName, fname, format); - return 1; - } - if (format == IFF_JFIF_JPEG) { - cid = l_generateJpegData(fname, 0); - } else if (format == IFF_JP2) { - cid = l_generateJp2kData(fname); - } else if (format == IFF_PNG) { - cid = l_generateFlateDataPdf(fname, pix); - } - - } - - /* Otherwise, use the pix to generate the pdf output */ - if (!cid) { - if (!pix) - pixt = pixRead(fname); - else - pixt = pixClone(pix); - if (!pixt) - return ERROR_INT("pixt not made", procName, 1); - if (selectDefaultPdfEncoding(pixt, &type)) { - pixDestroy(&pixt); - return 1; - } - pixGenerateCIData(pixt, type, quality, 0, &cid); - pixDestroy(&pixt); - } - if (!cid) { - L_ERROR("totally kerflummoxed\n", procName); - return 1; - } - *pcid = cid; - return 0; -} - - -/*! - * \brief l_generateFlateDataPdf() - * - * \param[in] fname preferably png - * \param[in] pixs [optional] can be null - * \return cid containing png data, or NULL on error - * - *
- * Notes:
- *      (1) If you hand this a png file, you are going to get
- *          png predictors embedded in the flate data. So it has
- *          come to this. http://xkcd.com/1022/
- *      (2) Exception: if the png is interlaced or if it is RGBA,
- *          it will be transcoded.
- *      (3) If transcoding is required, this will not have to read from
- *          file if you also input a pix.
- * 
- */ -L_COMP_DATA * -l_generateFlateDataPdf(const char *fname, - PIX *pixs) -{ -l_uint8 *pngcomp = NULL; /* entire PNG compressed file */ -l_uint8 *datacomp = NULL; /* gzipped raster data */ -l_uint8 *cmapdata = NULL; /* uncompressed colormap */ -char *cmapdatahex = NULL; /* hex ascii uncompressed colormap */ -l_uint32 i, j, n; -l_int32 format, interlaced; -l_int32 ncolors; /* in colormap */ -l_int32 bps; /* bits/sample: usually 8 */ -l_int32 spp; /* samples/pixel: 1-grayscale/cmap); 3-rgb; 4-rgba */ -l_int32 w, h, cmapflag; -l_int32 xres, yres; -size_t nbytescomp = 0, nbytespng = 0; -FILE *fp; -L_COMP_DATA *cid; -PIX *pix; -PIXCMAP *cmap = NULL; - - PROCNAME("l_generateFlateDataPdf"); - - if (!fname) - return (L_COMP_DATA *)ERROR_PTR("fname not defined", procName, NULL); - - findFileFormat(fname, &format); - spp = 0; /* init to spp != 4 if not png */ - interlaced = 0; /* initialize to no interlacing */ - bps = 0; /* initialize to a nonsense value */ - if (format == IFF_PNG) { - isPngInterlaced(fname, &interlaced); - if (readHeaderPng(fname, NULL, NULL, &bps, &spp, NULL)) - return (L_COMP_DATA *)ERROR_PTR("bad png input", procName, NULL); - } - - /* PDF is capable of inlining some types of PNG files, but not all - of them. We need to transcode anything with interlacing, an - alpha channel, or 1 bpp (which would otherwise be photo-inverted). - - Be careful with spp. Any PNG image file with an alpha - channel is converted on reading to RGBA (spp == 4). This - includes the (gray + alpha) format with spp == 2. You - will get different results if you look at spp via - readHeaderPng() versus pixGetSpp() */ - if (format != IFF_PNG || interlaced || bps == 1 || spp == 4 || spp == 2) { - if (!pixs) - pix = pixRead(fname); - else - pix = pixClone(pixs); - if (!pix) - return (L_COMP_DATA *)ERROR_PTR("pix not made", procName, NULL); - cid = pixGenerateFlateData(pix, 0); - pixDestroy(&pix); - return cid; - } - - /* It's png. Generate the pdf data without transcoding. - * Implementation by Jeff Breidenbach. - * First, read the metadata */ - if ((fp = fopenReadStream(fname)) == NULL) - return (L_COMP_DATA *)ERROR_PTR("stream not opened", procName, NULL); - freadHeaderPng(fp, &w, &h, &bps, &spp, &cmapflag); - fgetPngResolution(fp, &xres, &yres); - fclose(fp); - - /* We get pdf corruption when inlining the data from 16 bpp png. */ - if (bps == 16) - return l_generateFlateData(fname, 0); - - /* Read the entire png file */ - if ((pngcomp = l_binaryRead(fname, &nbytespng)) == NULL) - return (L_COMP_DATA *)ERROR_PTR("unable to read file", - procName, NULL); - - /* Extract flate data, copying portions of it to memory, including - * the predictor information in a byte at the beginning of each - * raster line. The flate data makes up the vast majority of - * the png file, so after extraction we expect datacomp to - * be nearly full (i.e., nbytescomp will be only slightly less - * than nbytespng). Also extract the colormap if present. */ - if ((datacomp = (l_uint8 *)LEPT_CALLOC(1, nbytespng)) == NULL) { - LEPT_FREE(pngcomp); - return (L_COMP_DATA *)ERROR_PTR("unable to allocate memory", - procName, NULL); - } - - /* Parse the png file. Each chunk consists of: - * length: 4 bytes - * name: 4 bytes (e.g., "IDAT") - * data: n bytes - * CRC: 4 bytes - * Start at the beginning of the data section of the first chunk, - * byte 16, because the png file begins with 8 bytes of header, - * followed by the first 8 bytes of the first chunk - * (length and name). On each loop, increment by 12 bytes to - * skip over the CRC, length and name of the next chunk. */ - for (i = 16; i < nbytespng; i += 12) { /* do each successive chunk */ - /* Get the chunk length */ - n = pngcomp[i - 8] << 24; - n += pngcomp[i - 7] << 16; - n += pngcomp[i - 6] << 8; - n += pngcomp[i - 5] << 0; - if (n >= nbytespng - i) { /* "n + i" can overflow */ - LEPT_FREE(pngcomp); - LEPT_FREE(datacomp); - pixcmapDestroy(&cmap); - L_ERROR("invalid png: i = %d, n = %d, nbytes = %zu\n", procName, - i, n, nbytespng); - return NULL; - } - - /* Is it a data chunk? */ - if (memcmp(pngcomp + i - 4, "IDAT", 4) == 0) { - memcpy(datacomp + nbytescomp, pngcomp + i, n); - nbytescomp += n; - } - - /* Is it a palette chunk? */ - if (cmapflag && !cmap && - memcmp(pngcomp + i - 4, "PLTE", 4) == 0) { - if ((n / 3) > (1 << bps)) { - LEPT_FREE(pngcomp); - LEPT_FREE(datacomp); - pixcmapDestroy(&cmap); - L_ERROR("invalid png: i = %d, n = %d, cmapsize = %d\n", - procName, i, n, (1 << bps)); - return NULL; - } - cmap = pixcmapCreate(bps); - for (j = i; j < i + n; j += 3) { - pixcmapAddColor(cmap, pngcomp[j], pngcomp[j + 1], - pngcomp[j + 2]); - } - } - i += n; /* move to the end of the data chunk */ - } - LEPT_FREE(pngcomp); - - if (nbytescomp == 0) { - LEPT_FREE(datacomp); - pixcmapDestroy(&cmap); - return (L_COMP_DATA *)ERROR_PTR("invalid PNG file", procName, NULL); - } - - /* Extract and encode the colormap data as hexascii */ - ncolors = 0; - if (cmap) { - pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata); - pixcmapDestroy(&cmap); - if (!cmapdata) { - LEPT_FREE(datacomp); - return (L_COMP_DATA *)ERROR_PTR("cmapdata not made", - procName, NULL); - } - cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors); - LEPT_FREE(cmapdata); - } - - /* Note that this is the only situation where the predictor - * field of the CID is set to 1. Adobe's predictor values on - * p. 76 of pdf_reference_1-7.pdf give 1 for no predictor and - * 10-14 for inline predictors, the specifics of which are - * ignored by the pdf interpreter, which just needs to know that - * the first byte on each compressed scanline is some predictor - * whose type can be inferred from the byte itself. */ - cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA)); - cid->datacomp = datacomp; - cid->type = L_FLATE_ENCODE; - cid->cmapdatahex = cmapdatahex; - cid->nbytescomp = nbytescomp; - cid->ncolors = ncolors; - cid->predictor = TRUE; - cid->w = w; - cid->h = h; - cid->bps = bps; - cid->spp = spp; - cid->res = xres; - return cid; -} - - -/*! - * \brief l_generateJpegData() - * - * \param[in] fname of jpeg file - * \param[in] ascii85flag 0 for jpeg; 1 for ascii85-encoded jpeg - * \return cid containing jpeg data, or NULL on error - * - *
- * Notes:
- *      (1) Set ascii85flag:
- *           ~ 0 for binary data (not permitted in PostScript)
- *           ~ 1 for ascii85 (5 for 4) encoded binary data
- *               (not permitted in pdf)
- *      (2) Do not free the data.  l_generateJpegDataMem() will free
- *          the data if the data is invalid, or if it does not use
- *          ascii encoding.
- * 
- */ -L_COMP_DATA * -l_generateJpegData(const char *fname, - l_int32 ascii85flag) -{ -l_uint8 *data = NULL; -size_t nbytes; - - PROCNAME("l_generateJpegData"); - - if (!fname) - return (L_COMP_DATA *)ERROR_PTR("fname not defined", procName, NULL); - - /* The returned jpeg data in memory is the entire jpeg file, - * which starts with ffd8 and ends with ffd9 */ - if ((data = l_binaryRead(fname, &nbytes)) == NULL) - return (L_COMP_DATA *)ERROR_PTR("data not extracted", procName, NULL); - - return l_generateJpegDataMem(data, nbytes, ascii85flag); -} - - -/*! - * \brief l_generateJpegDataMem() - * - * \param[in] data of jpeg file - * \param[in] nbytes of jpeg file - * \param[in] ascii85flag 0 for jpeg; 1 for ascii85-encoded jpeg - * \return cid containing jpeg data, or NULL on error - * - *
- * Notes:
- *      (1) See l_generateJpegData().
- * 
- */ -L_COMP_DATA * -l_generateJpegDataMem(l_uint8 *data, - size_t nbytes, - l_int32 ascii85flag) -{ -char *data85 = NULL; /* ascii85 encoded jpeg compressed file */ -l_int32 w, h, xres, yres, bps, spp; -l_int32 nbytes85; -L_COMP_DATA *cid; - - PROCNAME("l_generateJpegDataMem"); - - if (!data) - return (L_COMP_DATA *)ERROR_PTR("data not defined", procName, NULL); - - /* Read the metadata */ - if (readHeaderMemJpeg(data, nbytes, &w, &h, &spp, NULL, NULL)) { - LEPT_FREE(data); - return (L_COMP_DATA *)ERROR_PTR("bad jpeg metadata", procName, NULL); - } - bps = 8; - readResolutionMemJpeg(data, nbytes, &xres, &yres); - - /* Optionally, encode the compressed data */ - if (ascii85flag == 1) { - data85 = encodeAscii85(data, nbytes, &nbytes85); - LEPT_FREE(data); - if (!data85) - return (L_COMP_DATA *)ERROR_PTR("data85 not made", procName, NULL); - else - data85[nbytes85 - 1] = '\0'; /* remove the newline */ - } - - cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA)); - if (ascii85flag == 0) { - cid->datacomp = data; - } else { /* ascii85 */ - cid->data85 = data85; - cid->nbytes85 = nbytes85; - } - cid->type = L_JPEG_ENCODE; - cid->nbytescomp = nbytes; - cid->w = w; - cid->h = h; - cid->bps = bps; - cid->spp = spp; - cid->res = xres; - return cid; -} - - -/*! - * \brief l_generateJp2kData() - * - * \param[in] fname of jp2k file - * \return cid containing jp2k data, or NULL on error - * - *
- * Notes:
- *      (1) This is only called after the file is verified to be jp2k.
- * 
- */ -static L_COMP_DATA * -l_generateJp2kData(const char *fname) -{ -l_int32 w, h, bps, spp, xres, yres; -size_t nbytes; -L_COMP_DATA *cid; -FILE *fp; - - PROCNAME("l_generateJp2kData"); - - if (!fname) - return (L_COMP_DATA *)ERROR_PTR("fname not defined", procName, NULL); - - if (readHeaderJp2k(fname, &w, &h, &bps, &spp)) - return (L_COMP_DATA *)ERROR_PTR("bad jp2k metadata", procName, NULL); - - if ((cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA))) == NULL) - return (L_COMP_DATA *)ERROR_PTR("cid not made", procName, NULL); - - /* The returned jp2k data in memory is the entire jp2k file */ - if ((cid->datacomp = l_binaryRead(fname, &nbytes)) == NULL) { - l_CIDataDestroy(&cid); - return (L_COMP_DATA *)ERROR_PTR("data not extracted", procName, NULL); - } - - xres = yres = 0; - if ((fp = fopenReadStream(fname)) != NULL) { - fgetJp2kResolution(fp, &xres, &yres); - fclose(fp); - } - cid->type = L_JP2K_ENCODE; - cid->nbytescomp = nbytes; - cid->w = w; - cid->h = h; - cid->bps = bps; - cid->spp = spp; - cid->res = xres; - return cid; -} - - -/*! - * \brief l_generateCIData() - * - * \param[in] fname - * \param[in] type L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE, - * L_JP2K_ENCODE - * \param[in] quality for jpeg if transcoded: 1-100; 0 for default (75) - * for jp2k if transcoded: 27-45; 0 for default (34) - * \param[in] ascii85 0 for binary; 1 for ascii85-encoded - * \param[out] pcid compressed data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This can be used for both PostScript and pdf.
- *      (1) Set ascii85:
- *           ~ 0 for binary data (not permitted in PostScript)
- *           ~ 1 for ascii85 (5 for 4) encoded binary data
- *      (2) This attempts to compress according to the requested type.
- *          If this can't be done, it falls back to ordinary flate encoding.
- *      (3) This differs from l_generateCIDataPdf(), which determines
- *          the format and attempts to generate the CID without transcoding.
- * 
- */ -l_ok -l_generateCIData(const char *fname, - l_int32 type, - l_int32 quality, - l_int32 ascii85, - L_COMP_DATA **pcid) -{ -l_int32 format, d, bps, spp, iscmap; -L_COMP_DATA *cid; -PIX *pix; - - PROCNAME("l_generateCIData"); - - if (!pcid) - return ERROR_INT("&cid not defined", procName, 1); - *pcid = NULL; - if (!fname) - return ERROR_INT("fname not defined", procName, 1); - if (type != L_G4_ENCODE && type != L_JPEG_ENCODE && - type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) - return ERROR_INT("invalid conversion type", procName, 1); - if (ascii85 != 0 && ascii85 != 1) - return ERROR_INT("invalid ascii85", procName, 1); - - /* Sanity check on requested encoding */ - pixReadHeader(fname, &format, NULL, NULL, &bps, &spp, &iscmap); - d = bps * spp; - if (d == 24) d = 32; - if (iscmap && type != L_FLATE_ENCODE) { - L_WARNING("pixs has cmap; using flate encoding\n", procName); - type = L_FLATE_ENCODE; - } else if (d < 8 && type == L_JPEG_ENCODE) { - L_WARNING("pixs has < 8 bpp; using flate encoding\n", procName); - type = L_FLATE_ENCODE; - } else if (d < 8 && type == L_JP2K_ENCODE) { - L_WARNING("pixs has < 8 bpp; using flate encoding\n", procName); - type = L_FLATE_ENCODE; - } else if (d > 1 && type == L_G4_ENCODE) { - L_WARNING("pixs has > 1 bpp; using flate encoding\n", procName); - type = L_FLATE_ENCODE; - } - - if (type == L_JPEG_ENCODE) { - if (format == IFF_JFIF_JPEG) { /* do not transcode */ - cid = l_generateJpegData(fname, ascii85); - } else { - if ((pix = pixRead(fname)) == NULL) - return ERROR_INT("pix not returned", procName, 1); - cid = pixGenerateJpegData(pix, ascii85, quality); - pixDestroy(&pix); - } - if (!cid) - return ERROR_INT("jpeg data not made", procName, 1); - } else if (type == L_JP2K_ENCODE) { - if (format == IFF_JP2) { /* do not transcode */ - cid = l_generateJp2kData(fname); - } else { - if ((pix = pixRead(fname)) == NULL) - return ERROR_INT("pix not returned", procName, 1); - cid = pixGenerateJp2kData(pix, quality); - pixDestroy(&pix); - } - if (!cid) - return ERROR_INT("jp2k data not made", procName, 1); - } else if (type == L_G4_ENCODE) { - if ((cid = l_generateG4Data(fname, ascii85)) == NULL) - return ERROR_INT("g4 data not made", procName, 1); - } else if (type == L_FLATE_ENCODE) { - if ((cid = l_generateFlateData(fname, ascii85)) == NULL) - return ERROR_INT("flate data not made", procName, 1); - } else { - return ERROR_INT("invalid conversion type", procName, 1); - } - *pcid = cid; - - return 0; -} - - -/*! - * \brief pixGenerateCIData() - * - * \param[in] pixs 8 or 32 bpp, no colormap - * \param[in] type L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE or - * L_JP2K_ENCODE - * \param[in] quality for jpeg if transcoded: 1-100; 0 for default (75) - * for jp2k if transcoded: 27-45; 0 for default (34) - * \param[in] ascii85 0 for binary; 1 for ascii85-encoded - * \param[out] pcid compressed data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Set ascii85:
- *           ~ 0 for binary data (not permitted in PostScript)
- *           ~ 1 for ascii85 (5 for 4) encoded binary data
- * 
- */ -l_ok -pixGenerateCIData(PIX *pixs, - l_int32 type, - l_int32 quality, - l_int32 ascii85, - L_COMP_DATA **pcid) -{ -l_int32 d; -PIXCMAP *cmap; - - PROCNAME("pixGenerateCIData"); - - if (!pcid) - return ERROR_INT("&cid not defined", procName, 1); - *pcid = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (type != L_G4_ENCODE && type != L_JPEG_ENCODE && - type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) { - selectDefaultPdfEncoding(pixs, &type); - } - if (ascii85 != 0 && ascii85 != 1) - return ERROR_INT("invalid ascii85", procName, 1); - - /* Conditionally modify the encoding type if libz is - * available and the requested library is missing. */ -#if defined(HAVE_LIBZ) -# if !defined(HAVE_LIBJPEG) - if (type == L_JPEG_ENCODE) { - L_WARNING("no libjpeg; using flate encoding\n", procName); - type = L_FLATE_ENCODE; - } -# endif /* !defined(HAVE_LIBJPEG) */ -# if !defined(HAVE_LIBJP2K) - if (type == L_JP2K_ENCODE) { - L_WARNING("no libjp2k; using flate encoding\n", procName); - type = L_FLATE_ENCODE; - } -# endif /* !defined(HAVE_LIBJP2K) */ -# if !defined(HAVE_LIBTIFF) - if (type == L_G4_ENCODE) { - L_WARNING("no libtiff; using flate encoding\n", procName); - type = L_FLATE_ENCODE; - } -# endif /* !defined(HAVE_LIBTIFF) */ -#endif /* defined(HAVE_LIBZ) */ - - /* Sanity check on requested encoding */ - d = pixGetDepth(pixs); - cmap = pixGetColormap(pixs); - if (cmap && type != L_FLATE_ENCODE) { - L_WARNING("pixs has cmap; using flate encoding\n", procName); - type = L_FLATE_ENCODE; - } else if (d < 8 && (type == L_JPEG_ENCODE || type == L_JP2K_ENCODE)) { - L_WARNING("pixs has < 8 bpp; using flate encoding\n", procName); - type = L_FLATE_ENCODE; - } else if (d > 1 && type == L_G4_ENCODE) { - L_WARNING("pixs has > 1 bpp; using flate encoding\n", procName); - type = L_FLATE_ENCODE; - } - - if (type == L_JPEG_ENCODE) { - if ((*pcid = pixGenerateJpegData(pixs, ascii85, quality)) == NULL) - return ERROR_INT("jpeg data not made", procName, 1); - } else if (type == L_JP2K_ENCODE) { - if ((*pcid = pixGenerateJp2kData(pixs, quality)) == NULL) - return ERROR_INT("jp2k data not made", procName, 1); - } else if (type == L_G4_ENCODE) { - if ((*pcid = pixGenerateG4Data(pixs, ascii85)) == NULL) - return ERROR_INT("g4 data not made", procName, 1); - } else { /* type == L_FLATE_ENCODE */ - if ((*pcid = pixGenerateFlateData(pixs, ascii85)) == NULL) - return ERROR_INT("flate data not made", procName, 1); - } - return 0; -} - - -/*! - * \brief l_generateFlateData() - * - * \param[in] fname - * \param[in] ascii85flag 0 for gzipped; 1 for ascii85-encoded gzipped - * \return cid flate compressed image data, or NULL on error - * - *
- * Notes:
- *      (1) The input image is converted to one of these 4 types:
- *           ~ 1 bpp
- *           ~ 8 bpp, no colormap
- *           ~ 8 bpp, colormap
- *           ~ 32 bpp rgb
- *      (2) Set ascii85flag:
- *           ~ 0 for binary data (not permitted in PostScript)
- *           ~ 1 for ascii85 (5 for 4) encoded binary data
- * 
- */ -L_COMP_DATA * -l_generateFlateData(const char *fname, - l_int32 ascii85flag) -{ -L_COMP_DATA *cid; -PIX *pixs; - - PROCNAME("l_generateFlateData"); - - if (!fname) - return (L_COMP_DATA *)ERROR_PTR("fname not defined", procName, NULL); - - if ((pixs = pixRead(fname)) == NULL) - return (L_COMP_DATA *)ERROR_PTR("pixs not made", procName, NULL); - cid = pixGenerateFlateData(pixs, ascii85flag); - pixDestroy(&pixs); - return cid; -} - - -/*! - * \brief pixGenerateFlateData() - * - * \param[in] pixs - * \param[in] ascii85flag 0 for gzipped; 1 for ascii85-encoded gzipped - * \return cid flate compressed image data, or NULL on error - * - *
- * Notes:
- *     (1) If called with an RGBA pix (spp == 4), the alpha channel
- *         will be removed, projecting a white backgrouond through
- *         any transparency.
- *     (2) If called with a colormapped pix, any transparency in the
- *         alpha component in the colormap will be ignored, as it is
- *         for all leptonica operations on colormapped pix.
- * 
- */ -static L_COMP_DATA * -pixGenerateFlateData(PIX *pixs, - l_int32 ascii85flag) -{ -l_uint8 *data = NULL; /* uncompressed raster data in required format */ -l_uint8 *datacomp = NULL; /* gzipped raster data */ -char *data85 = NULL; /* ascii85 encoded gzipped raster data */ -l_uint8 *cmapdata = NULL; /* uncompressed colormap */ -char *cmapdata85 = NULL; /* ascii85 encoded uncompressed colormap */ -char *cmapdatahex = NULL; /* hex ascii uncompressed colormap */ -l_int32 ncolors; /* in colormap; not used if cmapdata85 is null */ -l_int32 bps; /* bits/sample: usually 8 */ -l_int32 spp; /* samples/pixel: 1-grayscale/cmap); 3-rgb */ -l_int32 w, h, d, cmapflag; -l_int32 ncmapbytes85 = 0; -l_int32 nbytes85 = 0; -size_t nbytes, nbytescomp; -L_COMP_DATA *cid; -PIX *pixt; -PIXCMAP *cmap; - - PROCNAME("pixGenerateFlateData"); - - if (!pixs) - return (L_COMP_DATA *)ERROR_PTR("pixs not defined", procName, NULL); - - /* Convert the image to one of these 4 types: - * 1 bpp - * 8 bpp, no colormap - * 8 bpp, colormap - * 32 bpp rgb */ - pixGetDimensions(pixs, &w, &h, &d); - cmap = pixGetColormap(pixs); - cmapflag = (cmap) ? 1 : 0; - if (d == 2 || d == 4 || d == 16) { - pixt = pixConvertTo8(pixs, cmapflag); - cmap = pixGetColormap(pixt); - d = pixGetDepth(pixt); - } else if (d == 32 && pixGetSpp(pixs) == 4) { /* remove alpha */ - pixt = pixAlphaBlendUniform(pixs, 0xffffff00); - } else { - pixt = pixClone(pixs); - } - spp = (d == 32) ? 3 : 1; - bps = (d == 32) ? 8 : d; - - /* Extract and encode the colormap data as both ascii85 and hexascii */ - ncolors = 0; - if (cmap) { - pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata); - if (!cmapdata) { - pixDestroy(&pixt); - return (L_COMP_DATA *)ERROR_PTR("cmapdata not made", - procName, NULL); - } - - cmapdata85 = encodeAscii85(cmapdata, 3 * ncolors, &ncmapbytes85); - cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors); - LEPT_FREE(cmapdata); - } - - /* Extract and compress the raster data */ - pixGetRasterData(pixt, &data, &nbytes); - pixDestroy(&pixt); - datacomp = zlibCompress(data, nbytes, &nbytescomp); - LEPT_FREE(data); - if (!datacomp) { - LEPT_FREE(cmapdata85); - LEPT_FREE(cmapdatahex); - return (L_COMP_DATA *)ERROR_PTR("datacomp not made", procName, NULL); - } - - /* Optionally, encode the compressed data */ - if (ascii85flag == 1) { - data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85); - LEPT_FREE(datacomp); - if (!data85) { - LEPT_FREE(cmapdata85); - LEPT_FREE(cmapdatahex); - return (L_COMP_DATA *)ERROR_PTR("data85 not made", procName, NULL); - } else { - data85[nbytes85 - 1] = '\0'; /* remove the newline */ - } - } - - cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA)); - if (ascii85flag == 0) { - cid->datacomp = datacomp; - } else { /* ascii85 */ - cid->data85 = data85; - cid->nbytes85 = nbytes85; - } - cid->type = L_FLATE_ENCODE; - cid->cmapdatahex = cmapdatahex; - cid->cmapdata85 = cmapdata85; - cid->nbytescomp = nbytescomp; - cid->ncolors = ncolors; - cid->w = w; - cid->h = h; - cid->bps = bps; - cid->spp = spp; - cid->res = pixGetXRes(pixs); - cid->nbytes = nbytes; /* only for debugging */ - return cid; -} - - -/*! - * \brief pixGenerateJpegData() - * - * \param[in] pixs 8 or 32 bpp, no colormap - * \param[in] ascii85flag 0 for jpeg; 1 for ascii85-encoded jpeg - * \param[in] quality 0 for default, which is 75 - * \return cid jpeg compressed data, or NULL on error - * - *
- * Notes:
- *      (1) Set ascii85flag:
- *           ~ 0 for binary data (not permitted in PostScript)
- *           ~ 1 for ascii85 (5 for 4) encoded binary data
- * 
- */ -static L_COMP_DATA * -pixGenerateJpegData(PIX *pixs, - l_int32 ascii85flag, - l_int32 quality) -{ -l_int32 d; -char *fname; -L_COMP_DATA *cid; - - PROCNAME("pixGenerateJpegData"); - - if (!pixs) - return (L_COMP_DATA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs)) - return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", procName, NULL); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return (L_COMP_DATA *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL); - - /* Compress to a temp jpeg file */ - fname = l_makeTempFilename(); - if (pixWriteJpeg(fname, pixs, quality, 0)) { - LEPT_FREE(fname); - return NULL; - } - - /* Generate the data */ - cid = l_generateJpegData(fname, ascii85flag); - if (lept_rmfile(fname) != 0) - L_ERROR("temp file %s was not deleted\n", procName, fname); - LEPT_FREE(fname); - return cid; -} - - -/*! - * \brief pixGenerateJp2kData() - * - * \param[in] pixs 8 or 32 bpp, no colormap - * \param[in] quality 0 for default, which is 34 - * \return cid jp2k compressed data, or NULL on error - * - *
- * Notes:
- *      (1) The quality can be set between 27 (very poor) and 45
- *          (nearly perfect).  Use 0 for default (34). Use 100 for lossless,
- *          but this is very expensive and not recommended.
- * 
- */ -static L_COMP_DATA * -pixGenerateJp2kData(PIX *pixs, - l_int32 quality) -{ -l_int32 d; -char *fname; -L_COMP_DATA *cid; - - PROCNAME("pixGenerateJp2kData"); - - if (!pixs) - return (L_COMP_DATA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs)) - return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", procName, NULL); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return (L_COMP_DATA *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL); - - /* Compress to a temp jp2k file */ - fname = l_makeTempFilename(); - if (pixWriteJp2k(fname, pixs, quality, 5, 0, 0)) { - LEPT_FREE(fname); - return NULL; - } - - /* Generate the data */ - cid = l_generateJp2kData(fname); - if (lept_rmfile(fname) != 0) - L_ERROR("temp file %s was not deleted\n", procName, fname); - LEPT_FREE(fname); - return cid; -} - - -/*! - * \brief pixGenerateG4Data() - * - * \param[in] pixs 1 bpp - * \param[in] ascii85flag 0 for gzipped; 1 for ascii85-encoded gzipped - * \return cid g4 compressed image data, or NULL on error - * - *
- * Notes:
- *      (1) Set ascii85flag:
- *           ~ 0 for binary data (not permitted in PostScript)
- *           ~ 1 for ascii85 (5 for 4) encoded binary data
- * 
- */ -static L_COMP_DATA * -pixGenerateG4Data(PIX *pixs, - l_int32 ascii85flag) -{ -char *fname; -L_COMP_DATA *cid; - - PROCNAME("pixGenerateG4Data"); - - if (!pixs) - return (L_COMP_DATA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (L_COMP_DATA *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - - /* Compress to a temp tiff g4 file */ - fname = l_makeTempFilename(); - if (pixWrite(fname, pixs, IFF_TIFF_G4)) { - LEPT_FREE(fname); - return NULL; - } - - cid = l_generateG4Data(fname, ascii85flag); - if (lept_rmfile(fname) != 0) - L_ERROR("temp file %s was not deleted\n", procName, fname); - LEPT_FREE(fname); - return cid; -} - - -/*! - * \brief l_generateG4Data() - * - * \param[in] fname of g4 compressed file - * \param[in] ascii85flag 0 for g4 compressed; 1 for ascii85-encoded g4 - * \return cid g4 compressed image data, or NULL on error - * - *
- * Notes:
- *      (1) Set ascii85flag:
- *           ~ 0 for binary data (not permitted in PostScript)
- *           ~ 1 for ascii85 (5 for 4) encoded binary data
- *             (not permitted in pdf)
- * 
- */ -L_COMP_DATA * -l_generateG4Data(const char *fname, - l_int32 ascii85flag) -{ -l_uint8 *datacomp = NULL; /* g4 compressed raster data */ -char *data85 = NULL; /* ascii85 encoded g4 compressed data */ -l_int32 w, h, xres, yres; -l_int32 minisblack; /* TRUE or FALSE */ -l_int32 nbytes85; -size_t nbytescomp; -L_COMP_DATA *cid; -FILE *fp; - - PROCNAME("l_generateG4Data"); - - if (!fname) - return (L_COMP_DATA *)ERROR_PTR("fname not defined", procName, NULL); - - /* Read the resolution */ - if ((fp = fopenReadStream(fname)) == NULL) - return (L_COMP_DATA *)ERROR_PTR("stream not opened", procName, NULL); - getTiffResolution(fp, &xres, &yres); - fclose(fp); - - /* The returned ccitt g4 data in memory is the block of - * bytes in the tiff file, starting after 8 bytes and - * ending before the directory. */ - if (extractG4DataFromFile(fname, &datacomp, &nbytescomp, - &w, &h, &minisblack)) { - return (L_COMP_DATA *)ERROR_PTR("datacomp not extracted", - procName, NULL); - } - - /* Optionally, encode the compressed data */ - if (ascii85flag == 1) { - data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85); - LEPT_FREE(datacomp); - if (!data85) - return (L_COMP_DATA *)ERROR_PTR("data85 not made", procName, NULL); - else - data85[nbytes85 - 1] = '\0'; /* remove the newline */ - } - - cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA)); - if (ascii85flag == 0) { - cid->datacomp = datacomp; - } else { /* ascii85 */ - cid->data85 = data85; - cid->nbytes85 = nbytes85; - } - cid->type = L_G4_ENCODE; - cid->nbytescomp = nbytescomp; - cid->w = w; - cid->h = h; - cid->bps = 1; - cid->spp = 1; - cid->minisblack = minisblack; - cid->res = xres; - return cid; -} - - -/*! - * \brief cidConvertToPdfData() - * - * \param[in] cid compressed image data - * \param[in] title [optional] pdf title; can be NULL - * \param[out] pdata output pdf data for image - * \param[out] pnbytes size of output pdf data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Caller must not destroy the cid.  It is absorbed in the
- *          lpd and destroyed by this function.
- * 
- */ -l_ok -cidConvertToPdfData(L_COMP_DATA *cid, - const char *title, - l_uint8 **pdata, - size_t *pnbytes) -{ -l_int32 res, ret; -l_float32 wpt, hpt; -L_PDF_DATA *lpd = NULL; - - PROCNAME("cidConvertToPdfData"); - - if (!pdata || !pnbytes) - return ERROR_INT("&data and &nbytes not both defined", procName, 1); - *pdata = NULL; - *pnbytes = 0; - if (!cid) - return ERROR_INT("cid not defined", procName, 1); - - /* Get media box parameters, in pts */ - res = cid->res; - if (res <= 0) - res = DefaultInputRes; - wpt = cid->w * 72. / res; - hpt = cid->h * 72. / res; - - /* Set up the pdf data struct (lpd) */ - if ((lpd = pdfdataCreate(title)) == NULL) - return ERROR_INT("lpd not made", procName, 1); - ptraAdd(lpd->cida, cid); - lpd->n++; - ptaAddPt(lpd->xy, 0, 0); /* xpt = ypt = 0 */ - ptaAddPt(lpd->wh, wpt, hpt); - - /* Generate the pdf string and destroy the lpd */ - ret = l_generatePdf(pdata, pnbytes, lpd); - pdfdataDestroy(&lpd); - if (ret) - return ERROR_INT("pdf output not made", procName, 1); - return 0; -} - - -/*! - * \brief l_CIDataDestroy() - * - * \param[in,out] pcid will be set to null before returning - * \return void - */ -void -l_CIDataDestroy(L_COMP_DATA **pcid) -{ -L_COMP_DATA *cid; - - PROCNAME("l_CIDataDestroy"); - - if (pcid == NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - if ((cid = *pcid) == NULL) - return; - - if (cid->datacomp) LEPT_FREE(cid->datacomp); - if (cid->data85) LEPT_FREE(cid->data85); - if (cid->cmapdata85) LEPT_FREE(cid->cmapdata85); - if (cid->cmapdatahex) LEPT_FREE(cid->cmapdatahex); - LEPT_FREE(cid); - *pcid = NULL; - return; -} - - -/*---------------------------------------------------------------------* - * Helper functions for generating the output pdf string * - *---------------------------------------------------------------------*/ -/*! - * \brief l_generatePdf() - * - * \param[out] pdata pdf array - * \param[out] pnbytes number of bytes in pdf array - * \param[in] lpd all the required input image data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) On error, no data is returned.
- *      (2) The objects are:
- *            1: Catalog
- *            2: Info
- *            3: Pages
- *            4: Page
- *            5: Contents  (rendering command)
- *            6 to 6+n-1: n XObjects
- *            6+n to 6+n+m-1: m colormaps
- * 
- */ -static l_int32 -l_generatePdf(l_uint8 **pdata, - size_t *pnbytes, - L_PDF_DATA *lpd) -{ - PROCNAME("l_generatePdf"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!lpd) - return ERROR_INT("lpd not defined", procName, 1); - - generateFixedStringsPdf(lpd); - generateMediaboxPdf(lpd); - generatePageStringPdf(lpd); - generateContentStringPdf(lpd); - generatePreXStringsPdf(lpd); - generateColormapStringsPdf(lpd); - generateTrailerPdf(lpd); - return generateOutputDataPdf(pdata, pnbytes, lpd); -} - - -static void -generateFixedStringsPdf(L_PDF_DATA *lpd) -{ -char buf[L_SMALLBUF]; -char *version, *datestr; -SARRAY *sa; - - PROCNAME("generateFixedStringsPdf"); - - /* Accumulate data for the header and objects 1-3 */ - lpd->id = stringNew("%PDF-1.5\n"); - l_dnaAddNumber(lpd->objsize, strlen(lpd->id)); - - lpd->obj1 = stringNew("1 0 obj\n" - "<<\n" - "/Type /Catalog\n" - "/Pages 3 0 R\n" - ">>\n" - "endobj\n"); - l_dnaAddNumber(lpd->objsize, strlen(lpd->obj1)); - - sa = sarrayCreate(0); - sarrayAddString(sa, "2 0 obj\n" - "<<\n", L_COPY); - if (var_WRITE_DATE_AND_VERSION) { - datestr = l_getFormattedDate(); - snprintf(buf, sizeof(buf), "/CreationDate (D:%s)\n", datestr); - sarrayAddString(sa, buf, L_COPY); - LEPT_FREE(datestr); - version = getLeptonicaVersion(); - snprintf(buf, sizeof(buf), - "/Producer (leptonica: %s)\n", version); - LEPT_FREE(version); - } else { - snprintf(buf, sizeof(buf), "/Producer (leptonica)\n"); - } - sarrayAddString(sa, buf, L_COPY); - if (lpd->title) { - char *hexstr; - if ((hexstr = generateEscapeString(lpd->title)) != NULL) { - snprintf(buf, sizeof(buf), "/Title %s\n", hexstr); - sarrayAddString(sa, buf, L_COPY); - } else { - L_ERROR("title string is not ascii\n", procName); - } - LEPT_FREE(hexstr); - } - sarrayAddString(sa, ">>\n" - "endobj\n", L_COPY); - lpd->obj2 = sarrayToString(sa, 0); - l_dnaAddNumber(lpd->objsize, strlen(lpd->obj2)); - sarrayDestroy(&sa); - - lpd->obj3 = stringNew("3 0 obj\n" - "<<\n" - "/Type /Pages\n" - "/Kids [ 4 0 R ]\n" - "/Count 1\n" - ">>\n"); - l_dnaAddNumber(lpd->objsize, strlen(lpd->obj3)); - - /* Do the post-datastream string */ - lpd->poststream = stringNew("\n" - "endstream\n" - "endobj\n"); - return; -} - - -/*! - * \brief generateEscapeString() - * - * \param[in] str input string - * \return hex escape string, or null on error - * - *
- * Notes:
- *      (1) If the input string is not ascii, returns null.
- *      (2) This takes an input ascii string and generates a hex
- *          ascii output string with 4 bytes out for each byte in.
- *          The feff code at the beginning tells the pdf interpreter
- *          that the data is to be interpreted as big-endian, 4 bytes
- *          at a time.  For ascii, the first two bytes are 0 and the
- *          last two bytes are less than 0x80.
- * 
- */ -static char * -generateEscapeString(const char *str) -{ -char smallbuf[8]; -char *buffer; -l_int32 i, nchar, buflen; - - PROCNAME("generateEscapeString"); - - if (!str) - return (char *)ERROR_PTR("str not defined", procName, NULL); - nchar = strlen(str); - for (i = 0; i < nchar; i++) { - if (str[i] < 0) - return (char *)ERROR_PTR("str not all ascii", procName, NULL); - } - - buflen = 4 * nchar + 10; - buffer = (char *)LEPT_CALLOC(buflen, sizeof(char)); - stringCat(buffer, buflen, ""); - return buffer; -} - - -static void -generateMediaboxPdf(L_PDF_DATA *lpd) -{ -l_int32 i; -l_float32 xpt, ypt, wpt, hpt, maxx, maxy; - - /* First get the full extent of all the images. - * This is the mediabox, in pts. */ - maxx = maxy = 0; - for (i = 0; i < lpd->n; i++) { - ptaGetPt(lpd->xy, i, &xpt, &ypt); - ptaGetPt(lpd->wh, i, &wpt, &hpt); - maxx = L_MAX(maxx, xpt + wpt); - maxy = L_MAX(maxy, ypt + hpt); - } - - lpd->mediabox = boxCreate(0, 0, (l_int32)(maxx + 0.5), - (l_int32)(maxy + 0.5)); - - /* ypt is in standard image coordinates: the location of - * the UL image corner with respect to the UL media box corner. - * Rewrite each ypt for PostScript coordinates: the location of - * the LL image corner with respect to the LL media box corner. */ - for (i = 0; i < lpd->n; i++) { - ptaGetPt(lpd->xy, i, &xpt, &ypt); - ptaGetPt(lpd->wh, i, &wpt, &hpt); - ptaSetPt(lpd->xy, i, xpt, maxy - ypt - hpt); - } - - return; -} - - -static l_int32 -generatePageStringPdf(L_PDF_DATA *lpd) -{ -char *buf; -char *xstr; -l_int32 bufsize, i, wpt, hpt; -SARRAY *sa; - - PROCNAME("generatePageStringPdf"); - - /* Allocate 1000 bytes for the boilerplate text, and - * 50 bytes for each reference to an image in the - * ProcSet array. */ - bufsize = 1000 + 50 * lpd->n; - if ((buf = (char *)LEPT_CALLOC(bufsize, sizeof(char))) == NULL) - return ERROR_INT("calloc fail for buf", procName, 1); - - boxGetGeometry(lpd->mediabox, NULL, NULL, &wpt, &hpt); - sa = sarrayCreate(lpd->n); - for (i = 0; i < lpd->n; i++) { - snprintf(buf, bufsize, "/Im%d %d 0 R ", i + 1, 6 + i); - sarrayAddString(sa, buf, L_COPY); - } - xstr = sarrayToString(sa, 0); - sarrayDestroy(&sa); - if (!xstr) { - LEPT_FREE(buf); - return ERROR_INT("xstr not made", procName, 1); - } - - snprintf(buf, bufsize, "4 0 obj\n" - "<<\n" - "/Type /Page\n" - "/Parent 3 0 R\n" - "/MediaBox [%d %d %d %d]\n" - "/Contents 5 0 R\n" - "/Resources\n" - "<<\n" - "/XObject << %s >>\n" - "/ProcSet [ /ImageB /ImageI /ImageC ]\n" - ">>\n" - ">>\n" - "endobj\n", - 0, 0, wpt, hpt, xstr); - - lpd->obj4 = stringNew(buf); - l_dnaAddNumber(lpd->objsize, strlen(lpd->obj4)); - sarrayDestroy(&sa); - LEPT_FREE(buf); - LEPT_FREE(xstr); - return 0; -} - - -static l_int32 -generateContentStringPdf(L_PDF_DATA *lpd) -{ -char *buf; -char *cstr; -l_int32 i, bufsize; -l_float32 xpt, ypt, wpt, hpt; -SARRAY *sa; - - PROCNAME("generateContentStringPdf"); - - bufsize = 1000 + 200 * lpd->n; - if ((buf = (char *)LEPT_CALLOC(bufsize, sizeof(char))) == NULL) - return ERROR_INT("calloc fail for buf", procName, 1); - - sa = sarrayCreate(lpd->n); - for (i = 0; i < lpd->n; i++) { - ptaGetPt(lpd->xy, i, &xpt, &ypt); - ptaGetPt(lpd->wh, i, &wpt, &hpt); - snprintf(buf, bufsize, - "q %.4f %.4f %.4f %.4f %.4f %.4f cm /Im%d Do Q\n", - wpt, 0.0, 0.0, hpt, xpt, ypt, i + 1); - sarrayAddString(sa, buf, L_COPY); - } - cstr = sarrayToString(sa, 0); - sarrayDestroy(&sa); - if (!cstr) { - LEPT_FREE(buf); - return ERROR_INT("cstr not made", procName, 1); - } - - snprintf(buf, bufsize, "5 0 obj\n" - "<< /Length %d >>\n" - "stream\n" - "%s" - "endstream\n" - "endobj\n", - (l_int32)strlen(cstr), cstr); - - lpd->obj5 = stringNew(buf); - l_dnaAddNumber(lpd->objsize, strlen(lpd->obj5)); - sarrayDestroy(&sa); - LEPT_FREE(buf); - LEPT_FREE(cstr); - return 0; -} - - -static l_int32 -generatePreXStringsPdf(L_PDF_DATA *lpd) -{ -char buff[256]; -char buf[L_BIGBUF]; -char *cstr, *bstr, *fstr, *pstr, *xstr; -l_int32 i, cmindex; -L_COMP_DATA *cid; -SARRAY *sa; - - PROCNAME("generatePreXStringsPdf"); - - sa = lpd->saprex; - cmindex = 6 + lpd->n; /* starting value */ - for (i = 0; i < lpd->n; i++) { - pstr = cstr = NULL; - if ((cid = pdfdataGetCid(lpd, i)) == NULL) - return ERROR_INT("cid not found", procName, 1); - - if (cid->type == L_G4_ENCODE) { - if (var_WRITE_G4_IMAGE_MASK) { - cstr = stringNew("/ImageMask true\n" - "/ColorSpace /DeviceGray"); - } else { - cstr = stringNew("/ColorSpace /DeviceGray"); - } - bstr = stringNew("/BitsPerComponent 1\n" - "/Interpolate true"); - snprintf(buff, sizeof(buff), - "/Filter /CCITTFaxDecode\n" - "/DecodeParms\n" - "<<\n" - "/K -1\n" - "/Columns %d\n" - ">>", cid->w); - fstr = stringNew(buff); - } else if (cid->type == L_JPEG_ENCODE) { - if (cid->spp == 1) - cstr = stringNew("/ColorSpace /DeviceGray"); - else if (cid->spp == 3) - cstr = stringNew("/ColorSpace /DeviceRGB"); - else if (cid->spp == 4) /* pdf supports cmyk */ - cstr = stringNew("/ColorSpace /DeviceCMYK"); - else - L_ERROR("in jpeg: spp != 1, 3 or 4\n", procName); - bstr = stringNew("/BitsPerComponent 8"); - fstr = stringNew("/Filter /DCTDecode"); - } else if (cid->type == L_JP2K_ENCODE) { - if (cid->spp == 1) - cstr = stringNew("/ColorSpace /DeviceGray"); - else if (cid->spp == 3) - cstr = stringNew("/ColorSpace /DeviceRGB"); - else - L_ERROR("in jp2k: spp != 1 && spp != 3\n", procName); - bstr = stringNew("/BitsPerComponent 8"); - fstr = stringNew("/Filter /JPXDecode"); - } else { /* type == L_FLATE_ENCODE */ - if (cid->ncolors > 0) { /* cmapped */ - snprintf(buff, sizeof(buff), "/ColorSpace %d 0 R", cmindex++); - cstr = stringNew(buff); - } else { - if (cid->spp == 1 && cid->bps == 1) - cstr = stringNew("/ColorSpace /DeviceGray\n" - "/Decode [1 0]"); - else if (cid->spp == 1) /* 8 bpp */ - cstr = stringNew("/ColorSpace /DeviceGray"); - else if (cid->spp == 3) - cstr = stringNew("/ColorSpace /DeviceRGB"); - else - L_ERROR("unknown colorspace: spp = %d\n", - procName, cid->spp); - } - snprintf(buff, sizeof(buff), "/BitsPerComponent %d", cid->bps); - bstr = stringNew(buff); - fstr = stringNew("/Filter /FlateDecode"); - if (cid->predictor == TRUE) { - snprintf(buff, sizeof(buff), - "/DecodeParms\n" - "<<\n" - " /Columns %d\n" - " /Predictor 14\n" - " /Colors %d\n" - " /BitsPerComponent %d\n" - ">>\n", cid->w, cid->spp, cid->bps); - pstr = stringNew(buff); - } - } - if (!pstr) /* no decode parameters */ - pstr = stringNew(""); - - snprintf(buf, sizeof(buf), - "%d 0 obj\n" - "<<\n" - "/Length %zu\n" - "/Subtype /Image\n" - "%s\n" /* colorspace */ - "/Width %d\n" - "/Height %d\n" - "%s\n" /* bits/component */ - "%s\n" /* filter */ - "%s" /* decode parms; can be empty */ - ">>\n" - "stream\n", - 6 + i, cid->nbytescomp, cstr, - cid->w, cid->h, bstr, fstr, pstr); - xstr = stringNew(buf); - sarrayAddString(sa, xstr, L_INSERT); - l_dnaAddNumber(lpd->objsize, - strlen(xstr) + cid->nbytescomp + strlen(lpd->poststream)); - LEPT_FREE(cstr); - LEPT_FREE(bstr); - LEPT_FREE(fstr); - LEPT_FREE(pstr); - } - - return 0; -} - - -static l_int32 -generateColormapStringsPdf(L_PDF_DATA *lpd) -{ -char buf[L_BIGBUF]; -char *cmstr; -l_int32 i, cmindex, ncmap; -L_COMP_DATA *cid; -SARRAY *sa; - - PROCNAME("generateColormapStringsPdf"); - - /* In our canonical format, we have 5 objects, followed - * by n XObjects, followed by m colormaps, so the index of - * the first colormap object is 6 + n. */ - sa = lpd->sacmap; - cmindex = 6 + lpd->n; /* starting value */ - ncmap = 0; - for (i = 0; i < lpd->n; i++) { - if ((cid = pdfdataGetCid(lpd, i)) == NULL) - return ERROR_INT("cid not found", procName, 1); - if (cid->ncolors == 0) continue; - - ncmap++; - snprintf(buf, sizeof(buf), "%d 0 obj\n" - "[ /Indexed /DeviceRGB\n" - "%d\n" - "%s\n" - "]\n" - "endobj\n", - cmindex, cid->ncolors - 1, cid->cmapdatahex); - cmindex++; - cmstr = stringNew(buf); - l_dnaAddNumber(lpd->objsize, strlen(cmstr)); - sarrayAddString(sa, cmstr, L_INSERT); - } - - lpd->ncmap = ncmap; - return 0; -} - - -static void -generateTrailerPdf(L_PDF_DATA *lpd) -{ -l_int32 i, n, size, linestart; -L_DNA *daloc, *dasize; - - /* Let nobj be the number of numbered objects. These numbered - * objects are indexed by their pdf number in arrays naloc[] - * and nasize[]. The 0th object is the 9 byte header. Then - * the number of objects in nasize, which includes the header, - * is n = nobj + 1. The array naloc[] has n + 1 elements, - * because it includes as the last element the starting - * location of xref. The indexing of these objects, their - * starting locations and sizes are: - * - * Object number Starting location Size - * ------------- ----------------- -------------- - * 0 daloc[0] = 0 dasize[0] = 9 - * 1 daloc[1] = 9 dasize[1] = 49 - * n daloc[n] dasize[n] - * xref daloc[n+1] - * - * We first generate daloc. - */ - dasize = lpd->objsize; - daloc = lpd->objloc; - linestart = 0; - l_dnaAddNumber(daloc, linestart); /* header */ - n = l_dnaGetCount(dasize); - for (i = 0; i < n; i++) { - l_dnaGetIValue(dasize, i, &size); - linestart += size; - l_dnaAddNumber(daloc, linestart); - } - l_dnaGetIValue(daloc, n, &lpd->xrefloc); /* save it */ - - /* Now make the actual trailer string */ - lpd->trailer = makeTrailerStringPdf(daloc); -} - - -static char * -makeTrailerStringPdf(L_DNA *daloc) -{ -char *outstr; -char buf[L_BIGBUF]; -l_int32 i, n, linestart, xrefloc; -SARRAY *sa; - - PROCNAME("makeTrailerStringPdf"); - - if (!daloc) - return (char *)ERROR_PTR("daloc not defined", procName, NULL); - n = l_dnaGetCount(daloc) - 1; /* numbered objects + 1 (yes, +1) */ - - sa = sarrayCreate(0); - snprintf(buf, sizeof(buf), "xref\n" - "0 %d\n" - "0000000000 65535 f \n", n); - sarrayAddString(sa, buf, L_COPY); - for (i = 1; i < n; i++) { - l_dnaGetIValue(daloc, i, &linestart); - snprintf(buf, sizeof(buf), "%010d 00000 n \n", linestart); - sarrayAddString(sa, buf, L_COPY); - } - - l_dnaGetIValue(daloc, n, &xrefloc); - snprintf(buf, sizeof(buf), "trailer\n" - "<<\n" - "/Size %d\n" - "/Root 1 0 R\n" - "/Info 2 0 R\n" - ">>\n" - "startxref\n" - "%d\n" - "%%%%EOF\n", n, xrefloc); - sarrayAddString(sa, buf, L_COPY); - outstr = sarrayToString(sa, 0); - sarrayDestroy(&sa); - return outstr; -} - - -/*! - * \brief generateOutputDataPdf() - * - * \param[out] pdata pdf data array - * \param[out] pnbytes size of pdf data array - * \param[in] lpd input data used to make pdf - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Only called from l_generatePdf().  On error, no data is returned.
- * 
- */ -static l_int32 -generateOutputDataPdf(l_uint8 **pdata, - size_t *pnbytes, - L_PDF_DATA *lpd) -{ -char *str; -l_uint8 *data; -l_int32 nimages, i, len; -l_int32 *sizes, *locs; -size_t nbytes; -L_COMP_DATA *cid; - - PROCNAME("generateOutputDataPdf"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - nbytes = lpd->xrefloc + strlen(lpd->trailer); - *pnbytes = nbytes; - if ((data = (l_uint8 *)LEPT_CALLOC(nbytes, sizeof(l_uint8))) == NULL) - return ERROR_INT("calloc fail for data", procName, 1); - *pdata = data; - - sizes = l_dnaGetIArray(lpd->objsize); - locs = l_dnaGetIArray(lpd->objloc); - memcpy(data, lpd->id, sizes[0]); - memcpy(data + locs[1], lpd->obj1, sizes[1]); - memcpy(data + locs[2], lpd->obj2, sizes[2]); - memcpy(data + locs[3], lpd->obj3, sizes[3]); - memcpy(data + locs[4], lpd->obj4, sizes[4]); - memcpy(data + locs[5], lpd->obj5, sizes[5]); - - /* Each image has 3 parts: variable preamble, the compressed - * data stream, and the fixed poststream. */ - nimages = lpd->n; - for (i = 0; i < nimages; i++) { - if ((cid = pdfdataGetCid(lpd, i)) == NULL) { /* should not happen */ - LEPT_FREE(sizes); - LEPT_FREE(locs); - return ERROR_INT("cid not found", procName, 1); - } - str = sarrayGetString(lpd->saprex, i, L_NOCOPY); - len = strlen(str); - memcpy(data + locs[6 + i], str, len); - memcpy(data + locs[6 + i] + len, - cid->datacomp, cid->nbytescomp); - memcpy(data + locs[6 + i] + len + cid->nbytescomp, - lpd->poststream, strlen(lpd->poststream)); - } - - /* Each colormap is simply a stored string */ - for (i = 0; i < lpd->ncmap; i++) { - str = sarrayGetString(lpd->sacmap, i, L_NOCOPY); - memcpy(data + locs[6 + nimages + i], str, strlen(str)); - } - - /* And finally the trailer */ - memcpy(data + lpd->xrefloc, lpd->trailer, strlen(lpd->trailer)); - LEPT_FREE(sizes); - LEPT_FREE(locs); - return 0; -} - - -/*---------------------------------------------------------------------* - * Helper functions for generating multipage pdf output * - *---------------------------------------------------------------------*/ -/*! - * \brief parseTrailerPdf() - * - * \param[in] bas lba of a pdf file - * \param[out] pda byte locations of the beginning of each object - * \return 0 if OK, 1 on error - */ -static l_int32 -parseTrailerPdf(L_BYTEA *bas, - L_DNA **pda) -{ -char *str; -l_uint8 nl = '\n'; -l_uint8 *data; -l_int32 i, j, start, startloc, xrefloc, found, loc, nobj, objno, trailer_ok; -size_t size; -L_DNA *da, *daobj, *daxref; -SARRAY *sa; - - PROCNAME("parseTrailerPdf"); - - if (!pda) - return ERROR_INT("&da not defined", procName, 1); - *pda = NULL; - if (!bas) - return ERROR_INT("bas not defined", procName, 1); - data = l_byteaGetData(bas, &size); - if (memcmp(data, "%PDF-1.", 7) != 0) - return ERROR_INT("PDF header signature not found", procName, 1); - - /* Search for "startxref" starting 50 bytes from the EOF */ - start = 0; - if (size > 50) - start = size - 50; - arrayFindSequence(data + start, size - start, - (l_uint8 *)"startxref\n", 10, &loc, &found); - if (!found) - return ERROR_INT("startxref not found!", procName, 1); - if (sscanf((char *)(data + start + loc + 10), "%d\n", &xrefloc) != 1) - return ERROR_INT("xrefloc not found!", procName, 1); - if (xrefloc < 0 || xrefloc >= size) - return ERROR_INT("invalid xrefloc!", procName, 1); - sa = sarrayCreateLinesFromString((char *)(data + xrefloc), 0); - str = sarrayGetString(sa, 1, L_NOCOPY); - if ((sscanf(str, "0 %d", &nobj)) != 1) { - sarrayDestroy(&sa); - return ERROR_INT("nobj not found", procName, 1); - } - - /* Get starting locations. The numa index is the - * object number. loc[0] is the ID; loc[nobj + 1] is xrefloc. */ - da = l_dnaCreate(nobj + 1); - *pda = da; - for (i = 0; i < nobj; i++) { - str = sarrayGetString(sa, i + 2, L_NOCOPY); - sscanf(str, "%d", &startloc); - l_dnaAddNumber(da, startloc); - } - l_dnaAddNumber(da, xrefloc); - -#if DEBUG_MULTIPAGE - lept_stderr("************** Trailer string ************\n"); - lept_stderr("xrefloc = %d", xrefloc); - sarrayWriteStream(stderr, sa); - - lept_stderr("************** Object locations ************"); - l_dnaWriteStream(stderr, da); -#endif /* DEBUG_MULTIPAGE */ - sarrayDestroy(&sa); - - /* Verify correct parsing */ - trailer_ok = TRUE; - for (i = 1; i < nobj; i++) { - l_dnaGetIValue(da, i, &startloc); - if ((sscanf((char *)(data + startloc), "%d 0 obj", &objno)) != 1) { - L_ERROR("bad trailer for object %d\n", procName, i); - trailer_ok = FALSE; - break; - } - } - - /* If the trailer is broken, reconstruct the correct obj locations */ - if (!trailer_ok) { - L_INFO("rebuilding pdf trailer\n", procName); - l_dnaEmpty(da); - l_dnaAddNumber(da, 0); - l_byteaFindEachSequence(bas, (l_uint8 *)" 0 obj\n", 7, &daobj); - nobj = l_dnaGetCount(daobj); - for (i = 0; i < nobj; i++) { - l_dnaGetIValue(daobj, i, &loc); - for (j = loc - 1; j > 0; j--) { - if (data[j] == nl) - break; - } - l_dnaAddNumber(da, j + 1); - } - l_byteaFindEachSequence(bas, (l_uint8 *)"xref", 4, &daxref); - l_dnaGetIValue(daxref, 0, &loc); - l_dnaAddNumber(da, loc); - l_dnaDestroy(&daobj); - l_dnaDestroy(&daxref); - } - - return 0; -} - - -static char * -generatePagesObjStringPdf(NUMA *napage) -{ -char *str; -char *buf; -l_int32 i, n, index, bufsize; -SARRAY *sa; - - PROCNAME("generatePagesObjStringPdf"); - - if (!napage) - return (char *)ERROR_PTR("napage not defined", procName, NULL); - - n = numaGetCount(napage); - bufsize = 100 + 16 * n; /* large enough to hold the output string */ - buf = (char *)LEPT_CALLOC(bufsize, sizeof(char)); - sa = sarrayCreate(n); - for (i = 0; i < n; i++) { - numaGetIValue(napage, i, &index); - snprintf(buf, bufsize, " %d 0 R ", index); - sarrayAddString(sa, buf, L_COPY); - } - - str = sarrayToString(sa, 0); - snprintf(buf, bufsize - 1, "3 0 obj\n" - "<<\n" - "/Type /Pages\n" - "/Kids [%s]\n" - "/Count %d\n" - ">>\n", str, n); - sarrayDestroy(&sa); - LEPT_FREE(str); - return buf; -} - - -/*! - * \brief substituteObjectNumbers() - * - * \param[in] bas lba of a pdf object - * \param[in] na_objs object number mapping array - * \return bad lba of rewritten pdf for the object - * - *
- * Notes:
- *      (1) Interpret the first set of bytes as the object number,
- *          map to the new number, and write it out.
- *      (2) Find all occurrences of this 4-byte sequence: " 0 R"
- *      (3) Find the location and value of the integer preceding this,
- *          and map it to the new value.
- *      (4) Rewrite the object with new object numbers.
- * 
- */ -static L_BYTEA * -substituteObjectNumbers(L_BYTEA *bas, - NUMA *na_objs) -{ -l_uint8 space = ' '; -l_uint8 *datas; -l_uint8 buf[32]; /* only needs to hold one integer in ascii format */ -l_int32 start, nrepl, i, j, objin, objout, found; -l_int32 *objs, *matches; -size_t size; -L_BYTEA *bad; -L_DNA *da_match; - - datas = l_byteaGetData(bas, &size); - bad = l_byteaCreate(100); - objs = numaGetIArray(na_objs); /* object number mapper */ - - /* Substitute the object number on the first line */ - sscanf((char *)datas, "%d", &objin); - objout = objs[objin]; - snprintf((char *)buf, 32, "%d", objout); - l_byteaAppendString(bad, (char *)buf); - - /* Find the set of matching locations for object references */ - arrayFindSequence(datas, size, &space, 1, &start, &found); - da_match = arrayFindEachSequence(datas, size, (l_uint8 *)" 0 R", 4); - if (!da_match) { - l_byteaAppendData(bad, datas + start, size - start); - LEPT_FREE(objs); - return bad; - } - - /* Substitute all the object reference numbers */ - nrepl = l_dnaGetCount(da_match); - matches = l_dnaGetIArray(da_match); - for (i = 0; i < nrepl; i++) { - /* Find the first space before the object number */ - for (j = matches[i] - 1; j > 0; j--) { - if (datas[j] == space) - break; - } - /* Copy bytes from 'start' up to the object number */ - l_byteaAppendData(bad, datas + start, j - start + 1); - sscanf((char *)(datas + j + 1), "%d", &objin); - objout = objs[objin]; - snprintf((char *)buf, 32, "%d", objout); - l_byteaAppendString(bad, (char *)buf); - start = matches[i]; - } - l_byteaAppendData(bad, datas + start, size - start); - - LEPT_FREE(objs); - LEPT_FREE(matches); - l_dnaDestroy(&da_match); - return bad; -} - - -/*---------------------------------------------------------------------* - * Create/destroy/access pdf data * - *---------------------------------------------------------------------*/ -static L_PDF_DATA * -pdfdataCreate(const char *title) -{ -L_PDF_DATA *lpd; - - lpd = (L_PDF_DATA *)LEPT_CALLOC(1, sizeof(L_PDF_DATA)); - if (title) lpd->title = stringNew(title); - lpd->cida = ptraCreate(10); - lpd->xy = ptaCreate(10); - lpd->wh = ptaCreate(10); - lpd->saprex = sarrayCreate(10); - lpd->sacmap = sarrayCreate(10); - lpd->objsize = l_dnaCreate(20); - lpd->objloc = l_dnaCreate(20); - return lpd; -} - -static void -pdfdataDestroy(L_PDF_DATA **plpd) -{ -l_int32 i; -L_COMP_DATA *cid; -L_PDF_DATA *lpd; - - PROCNAME("pdfdataDestroy"); - - if (plpd== NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - if ((lpd = *plpd) == NULL) - return; - - if (lpd->title) LEPT_FREE(lpd->title); - for (i = 0; i < lpd->n; i++) { - cid = (L_COMP_DATA *)ptraRemove(lpd->cida, i, L_NO_COMPACTION); - l_CIDataDestroy(&cid); - } - - ptraDestroy(&lpd->cida, 0, 0); - if (lpd->id) LEPT_FREE(lpd->id); - if (lpd->obj1) LEPT_FREE(lpd->obj1); - if (lpd->obj2) LEPT_FREE(lpd->obj2); - if (lpd->obj3) LEPT_FREE(lpd->obj3); - if (lpd->obj4) LEPT_FREE(lpd->obj4); - if (lpd->obj5) LEPT_FREE(lpd->obj5); - if (lpd->poststream) LEPT_FREE(lpd->poststream); - if (lpd->trailer) LEPT_FREE(lpd->trailer); - if (lpd->xy) ptaDestroy(&lpd->xy); - if (lpd->wh) ptaDestroy(&lpd->wh); - if (lpd->mediabox) boxDestroy(&lpd->mediabox); - if (lpd->saprex) sarrayDestroy(&lpd->saprex); - if (lpd->sacmap) sarrayDestroy(&lpd->sacmap); - if (lpd->objsize) l_dnaDestroy(&lpd->objsize); - if (lpd->objloc) l_dnaDestroy(&lpd->objloc); - LEPT_FREE(lpd); - *plpd = NULL; - return; -} - - -static L_COMP_DATA * -pdfdataGetCid(L_PDF_DATA *lpd, - l_int32 index) -{ - PROCNAME("pdfdataGetCid"); - - if (!lpd) - return (L_COMP_DATA *)ERROR_PTR("lpd not defined", procName, NULL); - if (index < 0 || index >= lpd->n) - return (L_COMP_DATA *)ERROR_PTR("invalid image index", procName, NULL); - - return (L_COMP_DATA *)ptraGetPtrToItem(lpd->cida, index); -} - - -/*---------------------------------------------------------------------* - * Set flags for special modes * - *---------------------------------------------------------------------*/ -/*! - * \brief l_pdfSetG4ImageMask() - * - * \param[in] flag 1 for writing g4 data as fg only through a mask; - * 0 for writing fg and bg - * \return void - * - *
- * Notes:
- *      (1) The default is for writing only the fg (through the mask).
- *          That way when you write a 1 bpp image, the bg is transparent,
- *          so any previously written image remains visible behind it.
- * 
- */ -void -l_pdfSetG4ImageMask(l_int32 flag) -{ - var_WRITE_G4_IMAGE_MASK = flag; -} - - -/*! - * \brief l_pdfSetDateAndVersion() - * - * \param[in] flag 1 for writing date/time and leptonica version; - * 0 for omitting this from the metadata - * \return void - * - *
- * Notes:
- *      (1) The default is for writing this data.  For regression tests
- *          that compare output against golden files, it is useful to omit.
- * 
- */ -void -l_pdfSetDateAndVersion(l_int32 flag) -{ - var_WRITE_DATE_AND_VERSION = flag; -} - -/* --------------------------------------------*/ -#endif /* USE_PDFIO */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pdfio2stub.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pdfio2stub.c deleted file mode 100644 index cb297b12..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pdfio2stub.c +++ /dev/null @@ -1,172 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pdfio2stub.c - *
- *
- *     Stubs for pdfio2.c functions
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* --------------------------------------------*/ -#if !USE_PDFIO /* defined in environ.h */ -/* --------------------------------------------*/ - -/* ----------------------------------------------------------------------*/ - -l_ok pixConvertToPdfData(PIX *pix, l_int32 type, l_int32 quality, - l_uint8 **pdata, size_t *pnbytes, - l_int32 x, l_int32 y, l_int32 res, - const char *title, - L_PDF_DATA **plpd, l_int32 position) -{ - return ERROR_INT("function not present", "pixConvertToPdfData", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok ptraConcatenatePdfToData(L_PTRA *pa_data, SARRAY *sa, - l_uint8 **pdata, size_t *pnbytes) -{ - return ERROR_INT("function not present", "ptraConcatenatePdfToData", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertTiffMultipageToPdf(const char *filein, const char *fileout) -{ - return ERROR_INT("function not present", "convertTiffMultipageToPdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok l_generateCIDataForPdf(const char *fname, PIX *pix, l_int32 quality, - L_COMP_DATA **pcid) -{ - return ERROR_INT("function not present", "l_generateCIDataForPdf", 1); -} - -/* ----------------------------------------------------------------------*/ - -L_COMP_DATA * l_generateFlateDataPdf(const char *fname, PIX *pix) -{ - return (L_COMP_DATA *)ERROR_PTR("function not present", - "l_generateFlateDataPdf", NULL); -} - -/* ----------------------------------------------------------------------*/ - -L_COMP_DATA * l_generateJpegData(const char *fname, l_int32 ascii85flag) -{ - return (L_COMP_DATA *)ERROR_PTR("function not present", - "l_generateJpegData", NULL); -} - -/* ----------------------------------------------------------------------*/ - -L_COMP_DATA * l_generateJpegDataMem(l_uint8 *data, size_t nbytes, - l_int32 ascii85flag) -{ - return (L_COMP_DATA *)ERROR_PTR("function not present", - "l_generateJpegDataMem", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok l_generateCIData(const char *fname, l_int32 type, l_int32 quality, - l_int32 ascii85, L_COMP_DATA **pcid) -{ - return ERROR_INT("function not present", "l_generateCIData", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixGenerateCIData(PIX *pixs, l_int32 type, l_int32 quality, - l_int32 ascii85, L_COMP_DATA **pcid) -{ - return ERROR_INT("function not present", "pixGenerateCIData", 1); -} - -/* ----------------------------------------------------------------------*/ - -L_COMP_DATA * l_generateFlateData(const char *fname, l_int32 ascii85flag) -{ - return (L_COMP_DATA *)ERROR_PTR("function not present", - "l_generateFlateData", NULL); -} - -/* ----------------------------------------------------------------------*/ - -L_COMP_DATA * l_generateG4Data(const char *fname, l_int32 ascii85flag) -{ - return (L_COMP_DATA *)ERROR_PTR("function not present", - "l_generateG4Data", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok cidConvertToPdfData(L_COMP_DATA *cid, const char *title, - l_uint8 **pdata, size_t *pnbytes) -{ - return ERROR_INT("function not present", "cidConvertToPdfData", 1); -} - -/* ----------------------------------------------------------------------*/ - -void l_CIDataDestroy(L_COMP_DATA **pcid) -{ - L_ERROR("function not present\n", "l_CIDataDestroy"); - return; -} - -/* ----------------------------------------------------------------------*/ - -void l_pdfSetG4ImageMask(l_int32 flag) -{ - L_ERROR("function not present\n", "l_pdfSetG4ImageMask"); - return; -} - -/* ----------------------------------------------------------------------*/ - -void l_pdfSetDateAndVersion(l_int32 flag) -{ - L_ERROR("function not present\n", "l_pdfSetDateAndVersion"); - return; -} - -/* ----------------------------------------------------------------------*/ - -/* --------------------------------------------*/ -#endif /* !USE_PDFIO */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix.h deleted file mode 100644 index 597b8dbd..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix.h +++ /dev/null @@ -1,1342 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_PIX_H -#define LEPTONICA_PIX_H - -/*! - * \file pix.h - * - *
- *   Valid image types in leptonica:
- *       Pix: 1 bpp, with and without colormap
- *       Pix: 2 bpp, with and without colormap
- *       Pix: 4 bpp, with and without colormap
- *       Pix: 8 bpp, with and without colormap
- *       Pix: 16 bpp (1 spp)
- *       Pix: 32 bpp (rgb, 3 spp)
- *       Pix: 32 bpp (rgba, 4 spp)
- *       FPix: 32 bpp float
- *       DPix: 64 bpp double
- *       Notes:
- *          (1) The only valid Pix image type with alpha is rgba.
- *              In particular, the alpha component is not used in
- *              cmapped images.
- *          (2) PixComp can hold any Pix with IFF_PNG encoding.
- *
- *   Contents:
- *
- *   (1) This file defines most of the image-related structs used in leptonica:
- *         struct Pix
- *         struct PixColormap
- *         struct RGBA_Quad
- *         struct Pixa
- *         struct Pixaa
- *         struct Box
- *         struct Boxa
- *         struct Boxaa
- *         struct Pta
- *         struct Ptaa
- *         struct Pixacc
- *         struct PixTiling
- *         struct FPix
- *         struct FPixa
- *         struct DPix
- *         struct PixComp
- *         struct PixaComp
- *
- *   (2) This file has definitions for:
- *         Colors for RGBA
- *         Colors for drawing boxes
- *         Perceptual color weights
- *         Colormap conversion flags
- *         Rasterop bit flags
- *         Structure access flags (for insert, copy, clone, copy-clone)
- *         Sorting flags (by type and direction)
- *         Blending flags
- *         Graphics pixel setting flags
- *         Size and location filter flags
- *         Color component selection flags
- *         16-bit conversion flags
- *         Rotation and shear flags
- *         Affine transform order flags
- *         Grayscale filling flags
- *         Flags for setting to white or black
- *         Flags for getting white or black pixel value
- *         Flags for 8 and 16 bit pixel sums
- *         Dithering flags
- *         Distance flags
- *         Value flags
- *         Statistical measures
- *         Set selection flags
- *         Text orientation flags
- *         Edge orientation flags
- *         Line orientation flags
- *         Image orientation flags
- *         Scan direction flags
- *         Box size adjustment flags
- *         Flags for modifying box boundaries using a second box
- *         Handling overlapping bounding boxes in boxa
- *         Selecting or making a box from two (intersecting) boxes
- *         Flags for replacing invalid boxes
- *         Flags for box corners and center
- *         Horizontal warp
- *         Pixel selection for resampling
- *         Thinning flags
- *         Runlength flags
- *         Edge filter flags
- *         Subpixel color component ordering in LCD display
- *         HSV histogram flags
- *         Region flags (inclusion, exclusion)
- *         Flags for adding text to a pix
- *         Flags for plotting on a pix
- *         Flags for making simple masks
- *         Flags for selecting display program
- *         Flags in the 'special' pix field for non-default operations
- *         Handling negative values in conversion to unsigned int
- *         Relative to zero flags
- *         Flags for adding or removing trailing slash from string
- *
- *   (3) This file has typedefs for the pix allocator and deallocator functions
- *         alloc_fn()
- *         dealloc_fn().
- * 
- */ - - -/*-------------------------------------------------------------------------* - * Basic Pix * - *-------------------------------------------------------------------------*/ - /* The 'special' field is by default 0, but it can hold integers - * that direct non-default actions, e.g., in png and jpeg I/O. */ - -/*! Basic Pix */ -struct Pix -{ - l_uint32 w; /*!< width in pixels */ - l_uint32 h; /*!< height in pixels */ - l_uint32 d; /*!< depth in bits (bpp) */ - l_uint32 spp; /*!< number of samples per pixel */ - l_uint32 wpl; /*!< 32-bit words/line */ - l_uint32 refcount; /*!< reference count (1 if no clones) */ - l_int32 xres; /*!< image res (ppi) in x direction */ - /*!< (use 0 if unknown) */ - l_int32 yres; /*!< image res (ppi) in y direction */ - /*!< (use 0 if unknown) */ - l_int32 informat; /*!< input file format, IFF_* */ - l_int32 special; /*!< special instructions for I/O, etc */ - char *text; /*!< text string associated with pix */ - struct PixColormap *colormap; /*!< colormap (may be null) */ - l_uint32 *data; /*!< the image data */ -}; -typedef struct Pix PIX; - -/*! Colormap of a Pix */ -struct PixColormap -{ - void *array; /*!< colormap table (array of RGBA_QUAD) */ - l_int32 depth; /*!< of pix (1, 2, 4 or 8 bpp) */ - l_int32 nalloc; /*!< number of color entries allocated */ - l_int32 n; /*!< number of color entries used */ -}; -typedef struct PixColormap PIXCMAP; - - - /*! Colormap table entry (after the BMP version). - * Note that the BMP format stores the colormap table exactly - * as it appears here, with color samples being stored sequentially, - * in the order (b,g,r,a). */ -struct RGBA_Quad -{ - l_uint8 blue; /*!< blue value */ - l_uint8 green; /*!< green value */ - l_uint8 red; /*!< red value */ - l_uint8 alpha; /*!< alpha value */ -}; -typedef struct RGBA_Quad RGBA_QUAD; - - -/*-------------------------------------------------------------------------* - * Colors for 32 RGBA * - *-------------------------------------------------------------------------*/ -/*
- *  Notes:
- *      (1) These are the byte indices for colors in 32 bpp images.
- *          They are used through the GET/SET_DATA_BYTE accessors.
- *          The 4th byte, typically known as the "alpha channel" and used
- *          for blending, is used to a small extent in leptonica.
- *      (2) Do not change these values!  If you redefine them, functions
- *          that have the shifts hardcoded for efficiency and conciseness
- *          (instead of using the constants below) will break.  These
- *          functions are labelled with "***"  next to their names at
- *          the top of the files in which they are defined.
- *      (3) The shifts to extract the red, green, blue and alpha components
- *          from a 32 bit pixel are defined here.
- * 
- */ - -/*! RGBA Color */ -enum { - COLOR_RED = 0, /*!< red color index in RGBA_QUAD */ - COLOR_GREEN = 1, /*!< green color index in RGBA_QUAD */ - COLOR_BLUE = 2, /*!< blue color index in RGBA_QUAD */ - L_ALPHA_CHANNEL = 3 /*!< alpha value index in RGBA_QUAD */ -}; - -static const l_int32 L_RED_SHIFT = - 8 * (sizeof(l_uint32) - 1 - COLOR_RED); /* 24 */ -static const l_int32 L_GREEN_SHIFT = - 8 * (sizeof(l_uint32) - 1 - COLOR_GREEN); /* 16 */ -static const l_int32 L_BLUE_SHIFT = - 8 * (sizeof(l_uint32) - 1 - COLOR_BLUE); /* 8 */ -static const l_int32 L_ALPHA_SHIFT = - 8 * (sizeof(l_uint32) - 1 - L_ALPHA_CHANNEL); /* 0 */ - - -/*-------------------------------------------------------------------------* - * Colors for drawing boxes * - *-------------------------------------------------------------------------*/ -/*! Box Color */ -enum { - L_DRAW_RED = 0, /*!< draw in red */ - L_DRAW_GREEN = 1, /*!< draw in green */ - L_DRAW_BLUE = 2, /*!< draw in blue */ - L_DRAW_SPECIFIED = 3, /*!< draw specified color */ - L_DRAW_RGB = 4, /*!< draw as sequence of r,g,b */ - L_DRAW_RANDOM = 5 /*!< draw randomly chosen colors */ -}; - - -/*-------------------------------------------------------------------------* - * Perceptual color weights * - *-------------------------------------------------------------------------*/ -/*
- *  Notes:
- *      (1) These perceptual weighting factors are ad-hoc, but they do
- *          add up to 1.  Unlike, for example, the weighting factors for
- *          converting RGB to luminance, or more specifically to Y in the
- *          YUV colorspace.  Those numbers come from the
- *          International Telecommunications Union, via ITU-R.
- * 
- */ -static const l_float32 L_RED_WEIGHT = 0.3f; /*!< Percept. weight for red */ -static const l_float32 L_GREEN_WEIGHT = 0.5f; /*!< Percept. weight for green */ -static const l_float32 L_BLUE_WEIGHT = 0.2f; /*!< Percept. weight for blue */ - - -/*-------------------------------------------------------------------------* - * Flags for colormap conversion * - *-------------------------------------------------------------------------*/ -/*! Cmap Conversion */ -enum { - REMOVE_CMAP_TO_BINARY = 0, /*!< remove colormap for conv to 1 bpp */ - REMOVE_CMAP_TO_GRAYSCALE = 1, /*!< remove colormap for conv to 8 bpp */ - REMOVE_CMAP_TO_FULL_COLOR = 2, /*!< remove colormap for conv to 32 bpp */ - REMOVE_CMAP_WITH_ALPHA = 3, /*!< remove colormap and alpha */ - REMOVE_CMAP_BASED_ON_SRC = 4 /*!< remove depending on src format */ -}; - - -/*------------------------------------------------------------------------* - *! - *
- * The following operation bit flags have been modified from
- * Sun's pixrect.h.
- *
- * The 'op' in 'rasterop' is represented by an integer
- * composed with Boolean functions using the set of five integers
- * given below.  The integers, and the op codes resulting from
- * boolean expressions on them, need only be in the range from 0 to 15.
- * The function is applied on a per-pixel basis.
- *
- * Examples: the op code representing ORing the src and dest
- * is computed using the bit OR, as PIX_SRC | PIX_DST;  the op
- * code representing XORing src and dest is found from
- * PIX_SRC ^ PIX_DST;  the op code representing ANDing src and dest
- * is found from PIX_SRC & PIX_DST.  Note that
- * PIX_NOT(PIX_CLR) = PIX_SET, and v.v., as they must be.
- *
- * We use the following set of definitions:
- *
- *      #define   PIX_SRC      0xc
- *      #define   PIX_DST      0xa
- *      #define   PIX_NOT(op)  (op) ^ 0xf
- *      #define   PIX_CLR      0x0
- *      #define   PIX_SET      0xf
- *
- * These definitions differ from Sun's, in that Sun left-shifted
- * each value by 1 pixel, and used the least significant bit as a
- * flag for the "pseudo-operation" of clipping.  We don't need
- * this bit, because it is both efficient and safe ALWAYS to clip
- * the rectangles to the src and dest images, which is what we do.
- * See the notes in rop.h on the general choice of these bit flags.
- *
- * [If for some reason you need compatibility with Sun's xview package,
- * you can adopt the original Sun definitions to avoid redefinition conflicts:
- *
- *      #define   PIX_SRC      (0xc << 1)
- *      #define   PIX_DST      (0xa << 1)
- *      #define   PIX_NOT(op)  ((op) ^ 0x1e)
- *      #define   PIX_CLR      (0x0 << 1)
- *      #define   PIX_SET      (0xf << 1)
- * ]
- *
- * We have, for reference, the following 16 unique op flags:
- *
- *      PIX_CLR                           0000             0x0
- *      PIX_SET                           1111             0xf
- *      PIX_SRC                           1100             0xc
- *      PIX_DST                           1010             0xa
- *      PIX_NOT(PIX_SRC)                  0011             0x3
- *      PIX_NOT(PIX_DST)                  0101             0x5
- *      PIX_SRC | PIX_DST                 1110             0xe
- *      PIX_SRC & PIX_DST                 1000             0x8
- *      PIX_SRC ^ PIX_DST                 0110             0x6
- *      PIX_NOT(PIX_SRC) | PIX_DST        1011             0xb
- *      PIX_NOT(PIX_SRC) & PIX_DST        0010             0x2
- *      PIX_SRC | PIX_NOT(PIX_DST)        1101             0xd
- *      PIX_SRC & PIX_NOT(PIX_DST)        0100             0x4
- *      PIX_NOT(PIX_SRC | PIX_DST)        0001             0x1
- *      PIX_NOT(PIX_SRC & PIX_DST)        0111             0x7
- *      PIX_NOT(PIX_SRC ^ PIX_DST)        1001             0x9
- *
- * 
- *-------------------------------------------------------------------------*/ - -#define PIX_SRC (0xc) /*!< use source pixels */ -#define PIX_DST (0xa) /*!< use destination pixels */ -#define PIX_NOT(op) ((op) ^ 0x0f) /*!< invert operation %op */ -#define PIX_CLR (0x0) /*!< clear pixels */ -#define PIX_SET (0xf) /*!< set pixels */ - -#define PIX_PAINT (PIX_SRC | PIX_DST) /*!< paint = src | dst */ -#define PIX_MASK (PIX_SRC & PIX_DST) /*!< mask = src & dst */ -#define PIX_SUBTRACT (PIX_DST & PIX_NOT(PIX_SRC)) /*!< subtract = */ - /*!< src & !dst */ -#define PIX_XOR (PIX_SRC ^ PIX_DST) /*!< xor = src ^ dst */ - - -/*-------------------------------------------------------------------------* - *
- *   Important Notes:
- *
- *       (1) The image data is stored in a single contiguous
- *           array of l_uint32, into which the pixels are packed.
- *           By "packed" we mean that there are no unused bits
- *           between pixels, except for end-of-line padding to
- *           satisfy item (2) below.
- *
- *       (2) Every image raster line begins on a 32-bit word
- *           boundary within this array.
- *
- *       (3) Pix image data is stored in 32-bit units, with the
- *           pixels ordered from left to right in the image being
- *           stored in order from the MSB to LSB within the word,
- *           for both big-endian and little-endian machines.
- *           This is the natural ordering for big-endian machines,
- *           as successive bytes are stored and fetched progressively
- *           to the right.  However, for little-endians, when storing
- *           we re-order the bytes from this byte stream order, and
- *           reshuffle again for byte access on 32-bit entities.
- *           So if the bytes come in sequence from left to right, we
- *           store them on little-endians in byte order:
- *                3 2 1 0 7 6 5 4 ...
- *           This MSB to LSB ordering allows left and right shift
- *           operations on 32 bit words to move the pixels properly.
- *
- *       (4) We use 32 bit pixels for both RGB and RGBA color images.
- *           The A (alpha) byte is ignored in most leptonica functions
- *           operating on color images.  Within each 4 byte pixel, the
- *           color samples are ordered from MSB to LSB, as follows:
- *
- *                |  MSB  |  2nd MSB  |  3rd MSB  |  LSB  |
- *                   red      green       blue      alpha
- *                    0         1           2         3   (big-endian)
- *                    3         2           1         0   (little-endian)
- *
- *           Because we use MSB to LSB ordering within the 32-bit word,
- *           the individual 8-bit samples can be accessed with
- *           GET_DATA_BYTE and SET_DATA_BYTE macros, using the
- *           (implicitly big-ending) ordering
- *                 red:    byte 0  (MSB)
- *                 green:  byte 1  (2nd MSB)
- *                 blue:   byte 2  (3rd MSB)
- *                 alpha:  byte 3  (LSB)
- *
- *           The specific color assignment is made in this file,
- *           through the definitions of COLOR_RED, etc.  Then the R, G
- *           B and A sample values can be retrieved using
- *                 redval = GET_DATA_BYTE(&pixel, COLOR_RED);
- *                 greenval = GET_DATA_BYTE(&pixel, COLOR_GREEN);
- *                 blueval = GET_DATA_BYTE(&pixel, COLOR_BLUE);
- *                 alphaval = GET_DATA_BYTE(&pixel, L_ALPHA_CHANNEL);
- *           and they can be set with
- *                 SET_DATA_BYTE(&pixel, COLOR_RED, redval);
- *                 SET_DATA_BYTE(&pixel, COLOR_GREEN, greenval);
- *                 SET_DATA_BYTE(&pixel, COLOR_BLUE, blueval);
- *                 SET_DATA_BYTE(&pixel, L_ALPHA_CHANNEL, alphaval);
- *
- *           More efficiently, these components can be extracted directly
- *           by shifting and masking, explicitly using the values in
- *           L_RED_SHIFT, etc.:
- *                 (pixel32 >> L_RED_SHIFT) & 0xff;         (red)
- *                 (pixel32 >> L_GREEN_SHIFT) & 0xff;       (green)
- *                 (pixel32 >> L_BLUE_SHIFT) & 0xff;        (blue)
- *                 (pixel32 >> L_ALPHA_SHIFT) & 0xff;       (alpha)
- *           The functions extractRGBValues() and extractRGBAValues() are
- *           provided to do this.  Likewise, the pixels can be set
- *           directly by shifting, using composeRGBPixel() and
- *           composeRGBAPixel().
- *
- *           All these operations work properly on both big- and little-endians.
- *
- *       (5) A reference count is held within each pix, giving the
- *           number of ptrs to the pix.  When a pixClone() call
- *           is made, the ref count is increased by 1, and
- *           when a pixDestroy() call is made, the reference count
- *           of the pix is decremented.  The pix is only destroyed
- *           when the reference count goes to zero.
- *
- *       (6) The version numbers (below) are used in the serialization
- *           of these data structures.  They are placed in the files,
- *           and rarely (if ever) change.  Provision is currently made for
- *           backward compatibility in reading from boxaa version 2.
- *
- *       (7) The serialization dependencies are as follows:
- *               pixaa  :  pixa  :  boxa
- *               boxaa  :  boxa
- *           So, for example, pixaa and boxaa can be changed without
- *           forcing a change in pixa or boxa.  However, if pixa is
- *           changed, it forces a change in pixaa, and if boxa is
- *           changed, if forces a change in the other three.
- *           We define four version numbers:
- *               PIXAA_VERSION_NUMBER
- *               PIXA_VERSION_NUMBER
- *               BOXAA_VERSION_NUMBER
- *               BOXA_VERSION_NUMBER
- * 
- *-------------------------------------------------------------------------*/ - - - -/*-------------------------------------------------------------------------* - * Array of pix * - *-------------------------------------------------------------------------*/ - - /* Serialization for primary data structures */ -#define PIXAA_VERSION_NUMBER 2 /*!< Version for Pixaa serialization */ -#define PIXA_VERSION_NUMBER 2 /*!< Version for Pixa serialization */ -#define BOXA_VERSION_NUMBER 2 /*!< Version for Boxa serialization */ -#define BOXAA_VERSION_NUMBER 3 /*!< Version for Boxaa serialization */ - -/*! Array of pix */ -struct Pixa -{ - l_int32 n; /*!< number of Pix in ptr array */ - l_int32 nalloc; /*!< number of Pix ptrs allocated */ - l_uint32 refcount; /*!< reference count (1 if no clones) */ - struct Pix **pix; /*!< the array of ptrs to pix */ - struct Boxa *boxa; /*!< array of boxes */ -}; -typedef struct Pixa PIXA; - -/*! Array of arrays of pix */ -struct Pixaa -{ - l_int32 n; /*!< number of Pixa in ptr array */ - l_int32 nalloc; /*!< number of Pixa ptrs allocated */ - struct Pixa **pixa; /*!< array of ptrs to pixa */ - struct Boxa *boxa; /*!< array of boxes */ -}; -typedef struct Pixaa PIXAA; - - -/*-------------------------------------------------------------------------* - * Basic rectangle and rectangle arrays * - *-------------------------------------------------------------------------*/ - -/*! Basic rectangle */ -struct Box -{ - l_int32 x; /*!< left coordinate */ - l_int32 y; /*!< top coordinate */ - l_int32 w; /*!< box width */ - l_int32 h; /*!< box height */ - l_uint32 refcount; /*!< reference count (1 if no clones) */ -}; -typedef struct Box BOX; - -/*! Array of Box */ -struct Boxa -{ - l_int32 n; /*!< number of box in ptr array */ - l_int32 nalloc; /*!< number of box ptrs allocated */ - l_uint32 refcount; /*!< reference count (1 if no clones) */ - struct Box **box; /*!< box ptr array */ -}; -typedef struct Boxa BOXA; - -/*! Array of Boxa */ -struct Boxaa -{ - l_int32 n; /*!< number of boxa in ptr array */ - l_int32 nalloc; /*!< number of boxa ptrs allocated */ - struct Boxa **boxa; /*!< boxa ptr array */ -}; -typedef struct Boxaa BOXAA; - - -/*-------------------------------------------------------------------------* - * Array of points * - *-------------------------------------------------------------------------*/ -#define PTA_VERSION_NUMBER 1 /*!< Version for Pta serialization */ - -/*! Array of points */ -struct Pta -{ - l_int32 n; /*!< actual number of pts */ - l_int32 nalloc; /*!< size of allocated arrays */ - l_uint32 refcount; /*!< reference count (1 if no clones) */ - l_float32 *x, *y; /*!< arrays of floats */ -}; -typedef struct Pta PTA; - - -/*-------------------------------------------------------------------------* - * Array of Pta * - *-------------------------------------------------------------------------*/ - -/*! Array of Pta */ -struct Ptaa -{ - l_int32 n; /*!< number of pta in ptr array */ - l_int32 nalloc; /*!< number of pta ptrs allocated */ - struct Pta **pta; /*!< pta ptr array */ -}; -typedef struct Ptaa PTAA; - - -/*-------------------------------------------------------------------------* - * Pix accumulator container * - *-------------------------------------------------------------------------*/ - -/*! Pix accumulator container */ -struct Pixacc -{ - l_int32 w; /*!< array width */ - l_int32 h; /*!< array height */ - l_int32 offset; /*!< used to allow negative */ - /*!< intermediate results */ - struct Pix *pix; /*!< the 32 bit accumulator pix */ -}; -typedef struct Pixacc PIXACC; - - -/*-------------------------------------------------------------------------* - * Pix tiling * - *-------------------------------------------------------------------------*/ - -/*! Pix tiling */ -struct PixTiling -{ - struct Pix *pix; /*!< input pix (a clone) */ - l_int32 nx; /*!< number of tiles horizontally */ - l_int32 ny; /*!< number of tiles vertically */ - l_int32 w; /*!< tile width */ - l_int32 h; /*!< tile height */ - l_int32 xoverlap; /*!< overlap on left and right */ - l_int32 yoverlap; /*!< overlap on top and bottom */ - l_int32 strip; /*!< strip for paint; default is TRUE */ -}; -typedef struct PixTiling PIXTILING; - - -/*-------------------------------------------------------------------------* - * FPix: pix with float array * - *-------------------------------------------------------------------------*/ -#define FPIX_VERSION_NUMBER 2 /*!< Version for FPix serialization */ - -/*! Pix with float array */ -struct FPix -{ - l_int32 w; /*!< width in pixels */ - l_int32 h; /*!< height in pixels */ - l_int32 wpl; /*!< 32-bit words/line */ - l_uint32 refcount; /*!< reference count (1 if no clones) */ - l_int32 xres; /*!< image res (ppi) in x direction */ - /*!< (use 0 if unknown) */ - l_int32 yres; /*!< image res (ppi) in y direction */ - /*!< (use 0 if unknown) */ - l_float32 *data; /*!< the float image data */ -}; -typedef struct FPix FPIX; - -/*! Array of FPix */ -struct FPixa -{ - l_int32 n; /*!< number of fpix in ptr array */ - l_int32 nalloc; /*!< number of fpix ptrs allocated */ - l_uint32 refcount; /*!< reference count (1 if no clones) */ - struct FPix **fpix; /*!< the array of ptrs to fpix */ -}; -typedef struct FPixa FPIXA; - - -/*-------------------------------------------------------------------------* - * DPix: pix with double array * - *-------------------------------------------------------------------------*/ -#define DPIX_VERSION_NUMBER 2 /*!< Version for DPix serialization */ - -/*! Pix with double array */ -struct DPix -{ - l_int32 w; /*!< width in pixels */ - l_int32 h; /*!< height in pixels */ - l_int32 wpl; /*!< 32-bit words/line */ - l_uint32 refcount; /*!< reference count (1 if no clones) */ - l_int32 xres; /*!< image res (ppi) in x direction */ - /*!< (use 0 if unknown) */ - l_int32 yres; /*!< image res (ppi) in y direction */ - /*!< (use 0 if unknown) */ - l_float64 *data; /*!< the double image data */ -}; -typedef struct DPix DPIX; - - -/*-------------------------------------------------------------------------* - * PixComp: compressed pix * - *-------------------------------------------------------------------------*/ - -/*! Compressed Pix */ -struct PixComp -{ - l_int32 w; /*!< width in pixels */ - l_int32 h; /*!< height in pixels */ - l_int32 d; /*!< depth in bits */ - l_int32 xres; /*!< image res (ppi) in x direction */ - /*!< (use 0 if unknown) */ - l_int32 yres; /*!< image res (ppi) in y direction */ - /*!< (use 0 if unknown) */ - l_int32 comptype; /*!< compressed format (IFF_TIFF_G4, */ - /*!< IFF_PNG, IFF_JFIF_JPEG) */ - char *text; /*!< text string associated with pix */ - l_int32 cmapflag; /*!< flag (1 for cmap, 0 otherwise) */ - l_uint8 *data; /*!< the compressed image data */ - size_t size; /*!< size of the data array */ -}; -typedef struct PixComp PIXC; - - -/*-------------------------------------------------------------------------* - * PixaComp: array of compressed pix * - *-------------------------------------------------------------------------*/ -#define PIXACOMP_VERSION_NUMBER 2 /*!< Version for PixaComp serialization */ - -/*! Array of compressed pix */ -struct PixaComp -{ - l_int32 n; /*!< number of PixComp in ptr array */ - l_int32 nalloc; /*!< number of PixComp ptrs allocated */ - l_int32 offset; /*!< indexing offset into ptr array */ - struct PixComp **pixc; /*!< the array of ptrs to PixComp */ - struct Boxa *boxa; /*!< array of boxes */ -}; -typedef struct PixaComp PIXAC; - - -/*-------------------------------------------------------------------------* - * Access and storage flags * - *-------------------------------------------------------------------------*/ -/* - *
- *  For Pix, Box, Pta and Numa, there are 3 standard methods for handling
- *  the retrieval or insertion of a struct:
- *     (1) direct insertion (Don't do this if there is another handle
- *                           somewhere to this same struct!)
- *     (2) copy (Always safe, sets up a refcount of 1 on the new object.
- *               Can be undesirable if very large, such as an image or
- *               an array of images.)
- *     (3) clone (Makes another handle to the same struct, and bumps the
- *                refcount up by 1.  OK to use except in two situations:
- *                (a) You change data through one of the handles but don't
- *                    want those changes to be seen by the other handle.
- *                (b) The application is multi-threaded.  Because the clone
- *                    operation is not atomic (e.g., locked with a mutex),
- *                    it is possible to end up with an incorrect ref count,
- *                    causing either a memory leak or a crash.
- *
- *  For Pixa and Boxa, which are structs that hold an array of clonable
- *  structs, there is an additional method:
- *     (4) copy-clone (Makes a new higher-level struct with a refcount
- *                     of 1, but clones all the structs in the array.)
- *
- *  Unlike the other structs, when retrieving a string from an Sarray,
- *  you are allowed to get a handle without a copy or clone (i.e., the
- *  string is not owned by the handle).  You must not either free the string
- *  or insert it in some other struct that would own it.  Specifically,
- *  for an Sarray, the copyflag for retrieval is either:
- *         L_COPY or L_NOCOPY
- *  and for insertion, the copyflag is either:
- *         L_COPY or one of {L_INSERT , L_NOCOPY} (the latter are equivalent
- *                                                 for insertion))
- *  Typical patterns are:
- *  (1) Reference a string in an Sarray with L_NOCOPY and insert a copy
- *      of it in another Sarray with L_COPY.
- *  (2) Copy a string from an Sarray with L_COPY and insert it in
- *      another Sarray with L_INSERT (or L_NOCOPY).
- *  In both cases, a copy is made and both Sarrays own their instance
- *  of that string.
- * 
- */ - -/*! Object Access */ -enum { - L_NOCOPY = 0, /*!< do not copy the object; do not delete the ptr */ - L_INSERT = L_NOCOPY, /*!< stuff it in; do not copy or clone */ - L_COPY = 1, /*!< make/use a copy of the object */ - L_CLONE = 2, /*!< make/use clone (ref count) of the object */ - L_COPY_CLONE = 3 /*!< make a new array object (e.g., pixa) and fill */ - /*!< the array with clones (e.g., pix) */ -}; - - -/*----------------------------------------------------------------------------* - * Sort flags * - *----------------------------------------------------------------------------*/ -/*! Sort Mode */ -enum { - L_SHELL_SORT = 1, /*!< use shell sort */ - L_BIN_SORT = 2 /*!< use bin sort */ -}; - -/*! Sort Order */ -enum { - L_SORT_INCREASING = 1, /*!< sort in increasing order */ - L_SORT_DECREASING = 2 /*!< sort in decreasing order */ -}; - -/*! Sort Type */ -enum { - L_SORT_BY_X = 1, /*!< sort box or c.c. by left edge location */ - L_SORT_BY_Y = 2, /*!< sort box or c.c. by top edge location */ - L_SORT_BY_RIGHT = 3, /*!< sort box or c.c. by right edge location */ - L_SORT_BY_BOT = 4, /*!< sort box or c.c. by bot edge location */ - L_SORT_BY_WIDTH = 5, /*!< sort box or c.c. by width */ - L_SORT_BY_HEIGHT = 6, /*!< sort box or c.c. by height */ - L_SORT_BY_MIN_DIMENSION = 7, /*!< sort box or c.c. by min dimension */ - L_SORT_BY_MAX_DIMENSION = 8, /*!< sort box or c.c. by max dimension */ - L_SORT_BY_PERIMETER = 9, /*!< sort box or c.c. by perimeter */ - L_SORT_BY_AREA = 10, /*!< sort box or c.c. by area */ - L_SORT_BY_ASPECT_RATIO = 11 /*!< sort box or c.c. by width/height ratio */ -}; - - -/*---------------------------------------------------------------------------* - * Blend flags * - *---------------------------------------------------------------------------*/ -/*! Blend Types */ -enum { - L_BLEND_WITH_INVERSE = 1, /*!< add some of src inverse to itself */ - L_BLEND_TO_WHITE = 2, /*!< shift src colors towards white */ - L_BLEND_TO_BLACK = 3, /*!< shift src colors towards black */ - L_BLEND_GRAY = 4, /*!< blend src directly with blender */ - L_BLEND_GRAY_WITH_INVERSE = 5 /*!< add amount of src inverse to itself, */ - /*!< based on blender pix value */ -}; - -/*! Paint Selection */ -enum { - L_PAINT_LIGHT = 1, /*!< colorize non-black pixels */ - L_PAINT_DARK = 2 /*!< colorize non-white pixels */ -}; - - -/*-------------------------------------------------------------------------* - * Graphics pixel setting * - *-------------------------------------------------------------------------*/ -/*! Pixel Setting */ -enum { - L_SET_PIXELS = 1, /*!< set all bits in each pixel to 1 */ - L_CLEAR_PIXELS = 2, /*!< set all bits in each pixel to 0 */ - L_FLIP_PIXELS = 3 /*!< flip all bits in each pixel */ -}; - - -/*-------------------------------------------------------------------------* - * Size and location filter flags * - *-------------------------------------------------------------------------*/ -/*! Size Comparison */ -enum { - L_SELECT_IF_LT = 1, /*!< save if value is less than threshold */ - L_SELECT_IF_GT = 2, /*!< save if value is more than threshold */ - L_SELECT_IF_LTE = 3, /*!< save if value is <= to the threshold */ - L_SELECT_IF_GTE = 4 /*!< save if value is >= to the threshold */ -}; - -/*! Size Selection */ -enum { - L_SELECT_BY_WIDTH = 1, /*!< select by width; 1 bpp */ - L_SELECT_BY_HEIGHT = 2, /*!< select by height; 1 bpp */ - L_SELECT_BY_MAX_DIMENSION = 3, /*!< select by max of width and */ - /*!< height; 1 bpp */ - L_SELECT_BY_AREA = 4, /*!< select by foreground area; 1 bpp */ - L_SELECT_BY_PERIMETER = 5 /*!< select by perimeter; 1 bpp */ -}; - -/*! Location Filter */ -enum { - L_SELECT_WIDTH = 1, /*!< width must satisfy constraint */ - L_SELECT_HEIGHT = 2, /*!< height must satisfy constraint */ - L_SELECT_XVAL = 3, /*!< x value must satisfy constraint */ - L_SELECT_YVAL = 4, /*!< y value must satisfy constraint */ - L_SELECT_IF_EITHER = 5, /*!< either width or height (or xval */ - /*!< or yval) can satisfy constraint */ - L_SELECT_IF_BOTH = 6 /*!< both width and height (or xval */ - /*!< and yval must satisfy constraint */ -}; - -/*! Boxa Check */ -enum { - L_CHECK_WIDTH = 1, /*!< check and possibly modify width */ - L_CHECK_HEIGHT = 2, /*!< check and possibly modify height */ - L_CHECK_BOTH = 3 /*!< check and possibly modify both */ -}; - - -/*-------------------------------------------------------------------------* - * Color component selection flags * - *-------------------------------------------------------------------------*/ -/*! Color Selection */ -enum { - L_SELECT_RED = 1, /*!< use red component */ - L_SELECT_GREEN = 2, /*!< use green component */ - L_SELECT_BLUE = 3, /*!< use blue component */ - L_SELECT_MIN = 4, /*!< use min color component */ - L_SELECT_MAX = 5, /*!< use max color component */ - L_SELECT_AVERAGE = 6, /*!< use average of color components */ - L_SELECT_HUE = 7, /*!< use hue value (in HSV color space) */ - L_SELECT_SATURATION = 8 /*!< use saturation value (in HSV space) */ -}; - - -/*-------------------------------------------------------------------------* - * 16-bit conversion flags * - *-------------------------------------------------------------------------*/ -/*! 16-bit Conversion */ -enum { - L_LS_BYTE = 1, /*!< use LSB */ - L_MS_BYTE = 2, /*!< use MSB */ - L_AUTO_BYTE = 3, /*!< use LSB if max(val) < 256; else MSB */ - L_CLIP_TO_FF = 4, /*!< use max(val, 255) */ - L_LS_TWO_BYTES = 5, /*!< use two LSB */ - L_MS_TWO_BYTES = 6, /*!< use two MSB */ - L_CLIP_TO_FFFF = 7 /*!< use max(val, 65535) */ -}; - - -/*-------------------------------------------------------------------------* - * Rotate and shear flags * - *-------------------------------------------------------------------------*/ -/*! Rotation Type */ -enum { - L_ROTATE_AREA_MAP = 1, /*!< use area map rotation, if possible */ - L_ROTATE_SHEAR = 2, /*!< use shear rotation */ - L_ROTATE_SAMPLING = 3 /*!< use sampling */ -}; - -/*! Background Color */ -enum { - L_BRING_IN_WHITE = 1, /*!< bring in white pixels from the outside */ - L_BRING_IN_BLACK = 2 /*!< bring in black pixels from the outside */ -}; - -/*! Shear Point */ -enum { - L_SHEAR_ABOUT_CORNER = 1, /*!< shear image about UL corner */ - L_SHEAR_ABOUT_CENTER = 2 /*!< shear image about center */ -}; - - -/*-------------------------------------------------------------------------* - * Affine transform order flags * - *-------------------------------------------------------------------------*/ -/*! Affine Transform Order */ -enum { - L_TR_SC_RO = 1, /*!< translate, scale, rotate */ - L_SC_RO_TR = 2, /*!< scale, rotate, translate */ - L_RO_TR_SC = 3, /*!< rotate, translate, scale */ - L_TR_RO_SC = 4, /*!< translate, rotate, scale */ - L_RO_SC_TR = 5, /*!< rotate, scale, translate */ - L_SC_TR_RO = 6 /*!< scale, translate, rotate */ -}; - - -/*-------------------------------------------------------------------------* - * Grayscale filling flags * - *-------------------------------------------------------------------------*/ -/*! Grayscale Fill */ -enum { - L_FILL_WHITE = 1, /*!< fill white pixels (e.g, in fg map) */ - L_FILL_BLACK = 2 /*!< fill black pixels (e.g., in bg map) */ -}; - - -/*-------------------------------------------------------------------------* - * Flags for setting to white or black * - *-------------------------------------------------------------------------*/ -/*! BlackWhite Set */ -enum { - L_SET_WHITE = 1, /*!< set pixels to white */ - L_SET_BLACK = 2 /*!< set pixels to black */ -}; - - -/*-------------------------------------------------------------------------* - * Flags for getting white or black value * - *-------------------------------------------------------------------------*/ -/*! BlackWhite Get */ -enum { - L_GET_WHITE_VAL = 1, /*!< get white pixel value */ - L_GET_BLACK_VAL = 2 /*!< get black pixel value */ -}; - - -/*-------------------------------------------------------------------------* - * Flags for 8 bit and 16 bit pixel sums * - *-------------------------------------------------------------------------*/ -/*! BlackWhite Sum */ -enum { - L_WHITE_IS_MAX = 1, /*!< white pixels are 0xff or 0xffff; black are 0 */ - L_BLACK_IS_MAX = 2 /*!< black pixels are 0xff or 0xffff; white are 0 */ -}; - - -/*-------------------------------------------------------------------------* - * Dither parameters * - * If within this grayscale distance from black or white, * - * do not propagate excess or deficit to neighboring pixels. * - *-------------------------------------------------------------------------*/ -/*! Dither Distance */ -enum { - DEFAULT_CLIP_LOWER_1 = 10, /*!< dist to black with no prop; 1 bpp */ - DEFAULT_CLIP_UPPER_1 = 10, /*!< dist to black with no prop; 1 bpp */ - DEFAULT_CLIP_LOWER_2 = 5, /*!< dist to black with no prop; 2 bpp */ - DEFAULT_CLIP_UPPER_2 = 5 /*!< dist to black with no prop; 2 bpp */ -}; - - -/*-------------------------------------------------------------------------* - * Distance type flags * - *-------------------------------------------------------------------------*/ -/*! Distance Type */ -enum { - L_MANHATTAN_DISTANCE = 1, /*!< L1 distance (e.g., in color space) */ - L_EUCLIDEAN_DISTANCE = 2 /*!< L2 distance */ -}; - - -/*-------------------------------------------------------------------------* - * Distance Value flags * - *-------------------------------------------------------------------------*/ -/*! Distance Value */ -enum { - L_NEGATIVE = 1, /*!< values < 0 */ - L_NON_NEGATIVE = 2, /*!< values >= 0 */ - L_POSITIVE = 3, /*!< values > 0 */ - L_NON_POSITIVE = 4, /*!< values <= 0 */ - L_ZERO = 5, /*!< values = 0 */ - L_ALL = 6 /*!< all values */ -}; - - -/*-------------------------------------------------------------------------* - * Statistical measures * - *-------------------------------------------------------------------------*/ -/*! Stats Type */ -enum { - L_MEAN_ABSVAL = 1, /*!< average of abs values */ - L_MEDIAN_VAL = 2, /*!< median value of set */ - L_MODE_VAL = 3, /*!< mode value of set */ - L_MODE_COUNT = 4, /*!< mode count of set */ - L_ROOT_MEAN_SQUARE = 5, /*!< rms of values */ - L_STANDARD_DEVIATION = 6, /*!< standard deviation from mean */ - L_VARIANCE = 7 /*!< variance of values */ -}; - - -/*-------------------------------------------------------------------------* - * Set index selection flags * - *-------------------------------------------------------------------------*/ -/*! Index Selection */ -enum { - L_CHOOSE_CONSECUTIVE = 1, /*!< select 'n' consecutive */ - L_CHOOSE_SKIP_BY = 2 /*!< select at intervals of 'n' */ -}; - - -/*-------------------------------------------------------------------------* - * Text orientation flags * - *-------------------------------------------------------------------------*/ -/*! Text Orientation */ -enum { - L_TEXT_ORIENT_UNKNOWN = 0, /*!< low confidence on text orientation */ - L_TEXT_ORIENT_UP = 1, /*!< portrait, text rightside-up */ - L_TEXT_ORIENT_LEFT = 2, /*!< landscape, text up to left */ - L_TEXT_ORIENT_DOWN = 3, /*!< portrait, text upside-down */ - L_TEXT_ORIENT_RIGHT = 4 /*!< landscape, text up to right */ -}; - - -/*-------------------------------------------------------------------------* - * Edge orientation flags * - *-------------------------------------------------------------------------*/ -/*! Edge Orientation */ -enum { - L_HORIZONTAL_EDGES = 0, /*!< filters for horizontal edges */ - L_VERTICAL_EDGES = 1, /*!< filters for vertical edges */ - L_ALL_EDGES = 2 /*!< filters for all edges */ -}; - - -/*-------------------------------------------------------------------------* - * Line orientation flags * - *-------------------------------------------------------------------------*/ -/*! Line Orientation */ -enum { - L_HORIZONTAL_LINE = 0, /*!< horizontal line */ - L_POS_SLOPE_LINE = 1, /*!< 45 degree line with positive slope */ - L_VERTICAL_LINE = 2, /*!< vertical line */ - L_NEG_SLOPE_LINE = 3, /*!< 45 degree line with negative slope */ - L_OBLIQUE_LINE = 4 /*!< neither horizontal nor vertical */ -}; - - -/*-------------------------------------------------------------------------* - * Image orientation flags * - *-------------------------------------------------------------------------*/ -/*! Image Orientation */ -enum { - L_PORTRAIT_MODE = 0, /*!< typical: page is viewed with height > width */ - L_LANDSCAPE_MODE = 1 /*!< page is viewed at 90 deg to portrait mode */ -}; - - -/*-------------------------------------------------------------------------* - * Scan direction flags * - *-------------------------------------------------------------------------*/ -/*! Scan Direction */ -enum { - L_FROM_LEFT = 0, /*!< scan from left */ - L_FROM_RIGHT = 1, /*!< scan from right */ - L_FROM_TOP = 2, /*!< scan from top */ - L_FROM_BOT = 3, /*!< scan from bottom */ - L_SCAN_NEGATIVE = 4, /*!< scan in negative direction */ - L_SCAN_POSITIVE = 5, /*!< scan in positive direction */ - L_SCAN_BOTH = 6, /*!< scan in both directions */ - L_SCAN_HORIZONTAL = 7, /*!< horizontal scan (direction unimportant) */ - L_SCAN_VERTICAL = 8 /*!< vertical scan (direction unimportant) */ -}; - - -/*-------------------------------------------------------------------------* - * Box size adjustment and location flags * - *-------------------------------------------------------------------------*/ -/*! Box Adjustment */ -enum { - L_ADJUST_SKIP = 0, /*!< do not adjust */ - L_ADJUST_LEFT = 1, /*!< adjust left edge */ - L_ADJUST_RIGHT = 2, /*!< adjust right edge */ - L_ADJUST_LEFT_AND_RIGHT = 3, /*!< adjust both left and right edges */ - L_ADJUST_TOP = 4, /*!< adjust top edge */ - L_ADJUST_BOT = 5, /*!< adjust bottom edge */ - L_ADJUST_TOP_AND_BOT = 6, /*!< adjust both top and bottom edges */ - L_ADJUST_CHOOSE_MIN = 7, /*!< choose the min median value */ - L_ADJUST_CHOOSE_MAX = 8, /*!< choose the max median value */ - L_SET_LEFT = 9, /*!< set left side to a given value */ - L_SET_RIGHT = 10, /*!< set right side to a given value */ - L_SET_TOP = 11, /*!< set top side to a given value */ - L_SET_BOT = 12, /*!< set bottom side to a given value */ - L_GET_LEFT = 13, /*!< get left side location */ - L_GET_RIGHT = 14, /*!< get right side location */ - L_GET_TOP = 15, /*!< get top side location */ - L_GET_BOT = 16 /*!< get bottom side location */ -}; - - -/*-------------------------------------------------------------------------* - * Flags for modifying box boundaries using a second box * - *-------------------------------------------------------------------------*/ -/*! Box Boundary Mod */ -enum { - L_USE_MINSIZE = 1, /*!< use boundaries giving min size */ - L_USE_MAXSIZE = 2, /*!< use boundaries giving max size */ - L_SUB_ON_LOC_DIFF = 3, /*!< modify boundary if big location diff */ - L_SUB_ON_SIZE_DIFF = 4, /*!< modify boundary if big size diff */ - L_USE_CAPPED_MIN = 5, /*!< modify boundary with capped min */ - L_USE_CAPPED_MAX = 6 /*!< modify boundary with capped max */ -}; - - -/*-------------------------------------------------------------------------* - * Handling overlapping bounding boxes in boxa * - *-------------------------------------------------------------------------*/ -/*! Box Overlap Mod */ -enum { - L_COMBINE = 1, /*!< resize to bounding region; remove smaller */ - L_REMOVE_SMALL = 2 /*!< only remove smaller */ -}; - - -/*-------------------------------------------------------------------------* - * Selecting or making a box from two (intersecting) boxes * - *-------------------------------------------------------------------------*/ -/*! Box Combine or Select */ -enum { - L_GEOMETRIC_UNION = 1, /*!< use union of two boxes */ - L_GEOMETRIC_INTERSECTION = 2, /*!< use intersection of two boxes */ - L_LARGEST_AREA = 3, /*!< use box with largest area */ - L_SMALLEST_AREA = 4 /*!< use box with smallest area */ -}; - - -/*-------------------------------------------------------------------------* - * Flags for replacing invalid boxes * - *-------------------------------------------------------------------------*/ -/*! Box Replacement */ -enum { - L_USE_ALL_BOXES = 1, /*!< consider all boxes in the sequence */ - L_USE_SAME_PARITY_BOXES = 2 /*!< consider boxes with the same parity */ -}; - - -/*-------------------------------------------------------------------------* - * Flags for box corners and center * - *-------------------------------------------------------------------------*/ -/*! Box Corners and Center */ -enum { - L_UPPER_LEFT = 1, /*!< UL corner */ - L_UPPER_RIGHT = 2, /*!< UR corner */ - L_LOWER_LEFT = 3, /*!< LL corner */ - L_LOWER_RIGHT = 4, /*!< LR corner */ - L_BOX_CENTER = 5 /*!< center */ -}; - - -/*-------------------------------------------------------------------------* - * Horizontal warp * - *-------------------------------------------------------------------------*/ -/*! Horiz Warp Stretch */ -enum { - L_WARP_TO_LEFT = 1, /*!< increasing stretch or contraction to left */ - L_WARP_TO_RIGHT = 2 /*!< increasing stretch or contraction to right */ -}; - -/*! Horiz Warp Mode */ -enum { - L_LINEAR_WARP = 1, /*!< stretch or contraction grows linearly */ - L_QUADRATIC_WARP = 2 /*!< stretch or contraction grows quadratically */ -}; - - -/*-------------------------------------------------------------------------* - * Pixel selection for resampling * - *-------------------------------------------------------------------------*/ -/*! Pixel Selection */ -enum { - L_INTERPOLATED = 1, /*!< linear interpolation from src pixels */ - L_SAMPLED = 2 /*!< nearest src pixel sampling only */ -}; - - -/*-------------------------------------------------------------------------* - * Thinning flags * - *-------------------------------------------------------------------------*/ -/*! Thinning Polarity */ -enum { - L_THIN_FG = 1, /*!< thin foreground of 1 bpp image */ - L_THIN_BG = 2 /*!< thin background of 1 bpp image */ -}; - - -/*-------------------------------------------------------------------------* - * Runlength flags * - *-------------------------------------------------------------------------*/ -/*! Runlength Direction */ -enum { - L_HORIZONTAL_RUNS = 0, /*!< determine runlengths of horizontal runs */ - L_VERTICAL_RUNS = 1 /*!< determine runlengths of vertical runs */ -}; - - -/*-------------------------------------------------------------------------* - * Edge filter flags * - *-------------------------------------------------------------------------*/ -/*! Edge Filter */ -enum { - L_SOBEL_EDGE = 1, /*!< Sobel edge filter */ - L_TWO_SIDED_EDGE = 2 /*!< Two-sided edge filter */ -}; - - -/*-------------------------------------------------------------------------* - * Subpixel color component ordering in LCD display * - *-------------------------------------------------------------------------*/ -/*! Subpixel Color Order */ -enum { - L_SUBPIXEL_ORDER_RGB = 1, /*!< sensor order left-to-right RGB */ - L_SUBPIXEL_ORDER_BGR = 2, /*!< sensor order left-to-right BGR */ - L_SUBPIXEL_ORDER_VRGB = 3, /*!< sensor order top-to-bottom RGB */ - L_SUBPIXEL_ORDER_VBGR = 4 /*!< sensor order top-to-bottom BGR */ -}; - - -/*-------------------------------------------------------------------------* - * HSV histogram flags * - *-------------------------------------------------------------------------*/ -/*! HSV Histogram */ -enum { - L_HS_HISTO = 1, /*!< Use hue-saturation histogram */ - L_HV_HISTO = 2, /*!< Use hue-value histogram */ - L_SV_HISTO = 3 /*!< Use saturation-value histogram */ -}; - - -/*-------------------------------------------------------------------------* - * HSV Region flags (inclusion, exclusion) * - *-------------------------------------------------------------------------*/ -/*! HSV Region */ -enum { - L_INCLUDE_REGION = 1, /*!< Use pixels with specified HSV region */ - L_EXCLUDE_REGION = 2 /*!< Use pixels outside HSV region */ -}; - - -/*-------------------------------------------------------------------------* - * Location flags for adding text to a pix * - *-------------------------------------------------------------------------*/ -/*! Add Text Location */ -enum { - L_ADD_ABOVE = 1, /*!< Add text above the image */ - L_ADD_BELOW = 2, /*!< Add text below the image */ - L_ADD_LEFT = 3, /*!< Add text to the left of the image */ - L_ADD_RIGHT = 4, /*!< Add text to the right of the image */ - L_ADD_AT_TOP = 5, /*!< Add text over the top of the image */ - L_ADD_AT_BOT = 6, /*!< Add text over the bottom of the image */ - L_ADD_AT_LEFT = 7, /*!< Add text over left side of the image */ - L_ADD_AT_RIGHT = 8 /*!< Add text over right side of the image */ -}; - - -/*-------------------------------------------------------------------------* - * Flags for plotting on a pix * - *-------------------------------------------------------------------------*/ -/*! Pix Plot */ -enum { - L_PLOT_AT_TOP = 1, /*!< Plot horizontally at top */ - L_PLOT_AT_MID_HORIZ = 2, /*!< Plot horizontally at middle */ - L_PLOT_AT_BOT = 3, /*!< Plot horizontally at bottom */ - L_PLOT_AT_LEFT = 4, /*!< Plot vertically at left */ - L_PLOT_AT_MID_VERT = 5, /*!< Plot vertically at middle */ - L_PLOT_AT_RIGHT = 6 /*!< Plot vertically at right */ -}; - - -/*-------------------------------------------------------------------------* - * Flags for making simple masks * - *-------------------------------------------------------------------------*/ -/*! Mask Generation */ -enum { - L_USE_INNER = 1, /*!< Select the interior part */ - L_USE_OUTER = 2 /*!< Select the outer part (e.g., a frame) */ -}; - - -/*-------------------------------------------------------------------------* - * Flags for selecting display program * - *-------------------------------------------------------------------------*/ -/*! Display Program */ -enum { - L_DISPLAY_WITH_XZGV = 1, /*!< Use xzgv with pixDisplay() */ - L_DISPLAY_WITH_XLI = 2, /*!< Use xli with pixDisplay() */ - L_DISPLAY_WITH_XV = 3, /*!< Use xv with pixDisplay() */ - L_DISPLAY_WITH_IV = 4, /*!< Use irfvanview (win) with pixDisplay() */ - L_DISPLAY_WITH_OPEN = 5 /*!< Use open (apple) with pixDisplay() */ -}; - -/*-------------------------------------------------------------------------* - * Flag(s) used in the 'special' pix field for non-default operations * - * - 0 is default for chroma sampling in jpeg * - * - 10-19 are used for zlib compression in png write * - * - 4 and 8 are used for specifying connectivity in labelling * - *-------------------------------------------------------------------------*/ -/*! Flags used in Pix::special */ -enum { - L_NO_CHROMA_SAMPLING_JPEG = 1 /*!< Write full resolution chroma */ -}; - - -/*-------------------------------------------------------------------------* - * Handling negative values in conversion to unsigned int * - *-------------------------------------------------------------------------*/ -/*! Negative Value */ -enum { - L_CLIP_TO_ZERO = 1, /*!< Clip negative values to 0 */ - L_TAKE_ABSVAL = 2 /*!< Convert to positive using L_ABS() */ -}; - - -/*-------------------------------------------------------------------------* - * Relative to zero flags * - *-------------------------------------------------------------------------*/ -/*! Relative To Zero */ -enum { - L_LESS_THAN_ZERO = 1, /*!< Choose values less than zero */ - L_EQUAL_TO_ZERO = 2, /*!< Choose values equal to zero */ - L_GREATER_THAN_ZERO = 3 /*!< Choose values greater than zero */ -}; - - -/*-------------------------------------------------------------------------* - * Flags for adding or removing trailing slash from string * - *-------------------------------------------------------------------------*/ -/*! Trailing Slash */ -enum { - L_ADD_TRAIL_SLASH = 1, /*!< Add trailing slash to string */ - L_REMOVE_TRAIL_SLASH = 2 /*!< Remove trailing slash from string */ -}; - - -/*-------------------------------------------------------------------------* - * Pix allocator and deallocator function types * - *-------------------------------------------------------------------------*/ -/*! Allocator function type */ -typedef void *(*alloc_fn)(size_t); - -/*! Deallocator function type */ -typedef void (*dealloc_fn)(void *); - - -#endif /* LEPTONICA_PIX_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix1.c deleted file mode 100644 index 527d42fe..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix1.c +++ /dev/null @@ -1,1930 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pix1.c - *
- *
- *    The pixN.c {N = 1,2,3,4,5} files are sorted by the type of operation.
- *    The primary functions in these files are:
- *
- *        pix1.c: constructors, destructors and field accessors
- *        pix2.c: pixel poking of image, pad and border pixels
- *        pix3.c: masking and logical ops, counting, mirrored tiling
- *        pix4.c: histograms, statistics, fg/bg estimation
- *        pix5.c: property measurements, rectangle extraction
- *
- *
- *    This file has the basic constructors, destructors and field accessors
- *
- *    Pix memory management (allows custom allocator and deallocator)
- *          static void  *pix_malloc()
- *          static void   pix_free()
- *          void          setPixMemoryManager()
- *
- *    Pix creation
- *          PIX          *pixCreate()
- *          PIX          *pixCreateNoInit()
- *          PIX          *pixCreateTemplate()
- *          PIX          *pixCreateTemplateNoInit()
- *          PIX          *pixCreateWithCmap()
- *          PIX          *pixCreateHeader()
- *          PIX          *pixClone()
- *
- *    Pix destruction
- *          void          pixDestroy()
- *          static void   pixFree()
- *
- *    Pix copy
- *          PIX          *pixCopy()
- *          l_int32       pixResizeImageData()
- *          l_int32       pixCopyColormap()
- *          l_int32       pixSizesEqual()
- *          l_int32       pixTransferAllData()
- *          l_int32       pixSwapAndDestroy()
- *
- *    Pix accessors
- *          l_int32       pixGetWidth()
- *          l_int32       pixSetWidth()
- *          l_int32       pixGetHeight()
- *          l_int32       pixSetHeight()
- *          l_int32       pixGetDepth()
- *          l_int32       pixSetDepth()
- *          l_int32       pixGetDimensions()
- *          l_int32       pixSetDimensions()
- *          l_int32       pixCopyDimensions()
- *          l_int32       pixGetSpp()
- *          l_int32       pixSetSpp()
- *          l_int32       pixCopySpp()
- *          l_int32       pixGetWpl()
- *          l_int32       pixSetWpl()
- *          l_int32       pixGetRefcount()
- *          l_int32       pixChangeRefcount()
- *          l_uint32      pixGetXRes()
- *          l_int32       pixSetXRes()
- *          l_uint32      pixGetYRes()
- *          l_int32       pixSetYRes()
- *          l_int32       pixGetResolution()
- *          l_int32       pixSetResolution()
- *          l_int32       pixCopyResolution()
- *          l_int32       pixScaleResolution()
- *          l_int32       pixGetInputFormat()
- *          l_int32       pixSetInputFormat()
- *          l_int32       pixCopyInputFormat()
- *          l_int32       pixSetSpecial()
- *          char         *pixGetText()
- *          l_int32       pixSetText()
- *          l_int32       pixAddText()
- *          l_int32       pixCopyText()
- *          PIXCMAP      *pixGetColormap()
- *          l_int32       pixSetColormap()
- *          l_int32       pixDestroyColormap()
- *          l_uint32     *pixGetData()
- *          l_int32       pixSetData()
- *          l_uint32     *pixExtractData()
- *          l_int32       pixFreeData()
- *
- *    Pix line ptrs
- *          void        **pixGetLinePtrs()
- *
- *    Pix debug
- *          l_int32       pixPrintStreamInfo()
- *
- *
- *  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- *      Important notes on direct management of pix image data
- *  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- *
- *  Custom allocator and deallocator
- *  --------------------------------
- *
- *  At the lowest level, you can specify the function that does the
- *  allocation and deallocation of the data field in the pix.
- *  By default, this is malloc and free.  However, by calling
- *  setPixMemoryManager(), custom functions can be substituted.
- *  When using this, keep two things in mind:
- *
- *   (1) Call setPixMemoryManager() before any pix have been allocated
- *   (2) Destroy all pix as usual, in order to prevent leaks.
- *
- *  In pixalloc.c, we provide an example custom allocator and deallocator.
- *  To use it, you must call pmsCreate() before any pix have been allocated
- *  and pmsDestroy() at the end after all pix have been destroyed.
- *
- *
- *  Direct manipulation of the pix data field
- *  -----------------------------------------
- *
- *  Memory management of the (image) data field in the pix is
- *  handled differently from that in the colormap or text fields.
- *  For colormap and text, the functions pixSetColormap() and
- *  pixSetText() remove the existing heap data and insert the
- *  new data.  For the image data, pixSetData() just reassigns the
- *  data field; any existing data will be lost if there isn't
- *  another handle for it.
- *
- *  Why is pixSetData() limited in this way?  Because the image
- *  data can be very large, we need flexible ways to handle it,
- *  particularly when you want to re-use the data in a different
- *  context without making a copy.  Here are some different
- *  things you might want to do:
- *
- *  (1) Use pixCopy(pixd, pixs) where pixd is not the same size
- *      as pixs.  This will remove the data in pixd, allocate a
- *      new data field in pixd, and copy the data from pixs, leaving
- *      pixs unchanged.
- *
- *  (2) Use pixTransferAllData(pixd, &pixs, ...) to transfer the
- *      data from pixs to pixd without making a copy of it.  If
- *      pixs is not cloned, this will do the transfer and destroy pixs.
- *      But if the refcount of pixs is greater than 1, it just copies
- *      the data and decrements the ref count.
- *
- *  (3) Use pixSwapAndDestroy(pixd, &pixs) to replace pixs by an
- *      existing pixd.  This is similar to pixTransferAllData(), but
- *      simpler, in that it never makes any copies and if pixs is
- *      cloned, the other references are not changed by this operation.
- *
- *  (4) Use pixExtractData() to extract the image data from the pix
- *      without copying if possible.  This could be used, for example,
- *      to convert from a pix to some other data structure with minimal
- *      heap allocation.  After the data is extracated, the pixels can
- *      be munged and used in another context.  However, the danger
- *      here is that the pix might have a refcount > 1, in which case
- *      a copy of the data must be made and the input pix left unchanged.
- *      If there are no clones, the image data can be extracted without
- *      a copy, and the data ptr in the pix must be nulled before
- *      destroying it because the pix will no longer 'own' the data.
- *
- *  We have provided accessors and functions here that should be
- *  sufficient so that you can do anything you want without
- *  explicitly referencing any of the pix member fields.
- *
- *  However, to avoid memory smashes and leaks when doing special operations
- *  on the pix data field, look carefully at the behavior of the image
- *  data accessors and keep in mind that when you invoke pixDestroy(),
- *  the pix considers itself the owner of all its heap data.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static void pixFree(PIX *pix); - - -/*-------------------------------------------------------------------------* - * Pix Memory Management * - * * - * These functions give you the freedom to specify at compile or run * - * time the allocator and deallocator to be used for pix. It has no * - * effect on memory management for other data structs, which are * - * controlled by the #defines in environ.h. Likewise, the #defines * - * in environ.h have no effect on the pix memory management. * - * The default functions are malloc and free. Use setPixMemoryManager() * - * to specify other functions to use. * - *-------------------------------------------------------------------------*/ - -/*! Pix memory manager */ - /* - *
-     * Notes:
-     *      (1) The allocator and deallocator function types,
-     *          alloc_fn and dealloc_fn, are defined in pix.h.
-     * 
- */ -struct PixMemoryManager -{ - alloc_fn allocator; - dealloc_fn deallocator; -}; - -/*! Default Pix memory manager */ -static struct PixMemoryManager pix_mem_manager = { - &malloc, - &free -}; - -static void * -pix_malloc(size_t size) -{ -#ifndef _MSC_VER - return (*pix_mem_manager.allocator)(size); -#else /* _MSC_VER */ - /* Under MSVC++, pix_mem_manager is initialized after a call - * to pix_malloc. Just ignore the custom allocator feature. */ - return malloc(size); -#endif /* _MSC_VER */ -} - -static void -pix_free(void *ptr) -{ -#ifndef _MSC_VER - (*pix_mem_manager.deallocator)(ptr); - return; -#else /* _MSC_VER */ - /* Under MSVC++, pix_mem_manager is initialized after a call - * to pix_malloc. Just ignore the custom allocator feature. */ - free(ptr); - return; -#endif /* _MSC_VER */ -} - -/*! - * \brief setPixMemoryManager() - * - * \param[in] allocator [optional] use NULL to skip - * \param[in] deallocator [optional] use NULL to skip - * \return void - * - *
- * Notes:
- *      (1) Use this to change the alloc and/or dealloc functions;
- *          e.g., setPixMemoryManager(my_malloc, my_free).
- *      (2) The C99 standard (section 6.7.5.3, par. 8) says:
- *            A declaration of a parameter as "function returning type"
- *            shall be adjusted to "pointer to function returning type"
- *          so that it can be in either of these two forms:
- *            (a) type (function-ptr(type, ...))
- *            (b) type ((*function-ptr)(type, ...))
- *          because form (a) is implictly converted to form (b), as in the
- *          definition of struct PixMemoryManager above.  So, for example,
- *          we should be able to declare either of these:
- *            (a) void *(allocator(size_t))
- *            (b) void *((*allocator)(size_t))
- *          However, MSVC++ only accepts the second version.
- * 
- */ -void -setPixMemoryManager(alloc_fn allocator, - dealloc_fn deallocator) -{ - if (allocator) pix_mem_manager.allocator = allocator; - if (deallocator) pix_mem_manager.deallocator = deallocator; - return; -} - - -/*--------------------------------------------------------------------* - * Pix Creation * - *--------------------------------------------------------------------*/ -/*! - * \brief pixCreate() - * - * \param[in] width, height, depth - * \return pixd with data allocated and initialized to 0, - * or NULL on error - */ -PIX * -pixCreate(l_int32 width, - l_int32 height, - l_int32 depth) -{ -PIX *pixd; - - PROCNAME("pixCreate"); - - if ((pixd = pixCreateNoInit(width, height, depth)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - memset(pixd->data, 0, 4LL * pixd->wpl * pixd->h); - return pixd; -} - - -/*! - * \brief pixCreateNoInit() - * - * \param[in] width, height, depth - * \return pixd with data allocated but not initialized, - * or NULL on error - * - *
- * Notes:
- *      (1) Must set pad bits to avoid reading uninitialized data, because
- *          some optimized routines (e.g., pixConnComp()) read from pad bits.
- * 
- */ -PIX * -pixCreateNoInit(l_int32 width, - l_int32 height, - l_int32 depth) -{ -l_int32 wpl; -PIX *pixd; -l_uint32 *data; - - PROCNAME("pixCreateNoInit"); - if ((pixd = pixCreateHeader(width, height, depth)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - wpl = pixGetWpl(pixd); - if ((data = (l_uint32 *)pix_malloc(4LL * wpl * height)) == NULL) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("pix_malloc fail for data", procName, NULL); - } - pixSetData(pixd, data); - pixSetPadBits(pixd, 0); - return pixd; -} - - -/*! - * \brief pixCreateTemplate() - * - * \param[in] pixs - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Makes a Pix of the same size as the input Pix, with the
- *          data array allocated and initialized to 0.
- *      (2) Copies the other fields, including colormap if it exists.
- * 
- */ -PIX * -pixCreateTemplate(const PIX *pixs) -{ -PIX *pixd; - - PROCNAME("pixCreateTemplate"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - if ((pixd = pixCreateTemplateNoInit(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - memset(pixd->data, 0, 4LL * pixd->wpl * pixd->h); - return pixd; -} - - -/*! - * \brief pixCreateTemplateNoInit() - * - * \param[in] pixs - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Makes a Pix of the same size as the input Pix, with
- *          the data array allocated but not initialized to 0.
- *      (2) Copies the other fields, including colormap if it exists.
- * 
- */ -PIX * -pixCreateTemplateNoInit(const PIX *pixs) -{ -l_int32 w, h, d; -PIX *pixd; - - PROCNAME("pixCreateTemplateNoInit"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - pixGetDimensions(pixs, &w, &h, &d); - if ((pixd = pixCreateNoInit(w, h, d)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopySpp(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixCopyColormap(pixd, pixs); - pixCopyText(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - return pixd; -} - - -/*! - * \brief pixCreateWithCmap() - * - * \param[in] width - * \param[in] height - * \param[in] depth 2, 4 or 8 bpp - * \param[in] initcolor L_SET_BLACK, L_SET_WHITE - * \return pixd with the initialization color assigned to all pixels, - * or NULL on error. - * - *
- * Notes:
- *      (1) Creates a pix with a cmap, initialized to value 0.
- *      (2) Initializes the pix black or white by adding that color
- *          to the cmap at index 0.
- * 
- */ -PIX * -pixCreateWithCmap(l_int32 width, - l_int32 height, - l_int32 depth, - l_int32 initcolor) -{ -PIX *pix; -PIXCMAP *cmap; - - PROCNAME("pixCreateWithCmap"); - - if (depth != 2 && depth != 4 && depth != 8) - return (PIX *)ERROR_PTR("depth not 2, 4 or 8 bpp", procName, NULL); - - if ((pix = pixCreate(width, height, depth)) == NULL) - return (PIX *)ERROR_PTR("pix not made", procName, NULL); - cmap = pixcmapCreate(depth); - pixSetColormap(pix, cmap); - if (initcolor == L_SET_BLACK) - pixcmapAddColor(cmap, 0, 0, 0); - else /* L_SET_WHITE */ - pixcmapAddColor(cmap, 255, 255, 255); - return pix; -} - - -/*! - * \brief pixCreateHeader() - * - * \param[in] width, height, depth - * \return pixd with no data allocated, or NULL on error - * - *
- * Notes:
- *      (1) It is assumed that all 32 bit pix have 3 spp.  If there is
- *          a valid alpha channel, this will be set to 4 spp later.
- *      (2) All pixCreate*() functions call pixCreateHeader().
-            If the number of bytes to be allocated is larger than the
- *          maximum value in an int32, we can get overflow, resulting
- *          in a smaller amount of memory actually being allocated.
- *          Later, an attempt to access memory that wasn't allocated will
- *          cause a crash.  So to avoid crashing a program (or worse)
- *          with bad (or malicious) input, we limit the requested
- *          allocation of image data in a typesafe way.
- * 
- */ -PIX * -pixCreateHeader(l_int32 width, - l_int32 height, - l_int32 depth) -{ -l_int32 wpl; -l_uint64 wpl64, bignum; -PIX *pixd; - - PROCNAME("pixCreateHeader"); - - if ((depth != 1) && (depth != 2) && (depth != 4) && (depth != 8) - && (depth != 16) && (depth != 24) && (depth != 32)) - return (PIX *)ERROR_PTR("depth must be {1, 2, 4, 8, 16, 24, 32}", - procName, NULL); - if (width <= 0) - return (PIX *)ERROR_PTR("width must be > 0", procName, NULL); - if (height <= 0) - return (PIX *)ERROR_PTR("height must be > 0", procName, NULL); - - /* Avoid overflow in malloc, malicious or otherwise */ - wpl64 = ((l_uint64)width * (l_uint64)depth + 31) / 32; - if (wpl64 > ((1LL << 29) - 1)) { - L_ERROR("requested w = %d, h = %d, d = %d\n", - procName, width, height, depth); - return (PIX *)ERROR_PTR("wpl >= 2^29", procName, NULL); - } - wpl = (l_int32)wpl64; - bignum = 4LL * wpl * height; /* number of bytes to be requested */ - if (bignum > ((1LL << 31) - 1)) { - L_ERROR("requested w = %d, h = %d, d = %d\n", - procName, width, height, depth); - return (PIX *)ERROR_PTR("requested bytes >= 2^31", procName, NULL); - } - - pixd = (PIX *)LEPT_CALLOC(1, sizeof(PIX)); - pixSetWidth(pixd, width); - pixSetHeight(pixd, height); - pixSetDepth(pixd, depth); - pixSetWpl(pixd, wpl); - if (depth == 24 || depth == 32) - pixSetSpp(pixd, 3); - else - pixSetSpp(pixd, 1); - pixd->refcount = 1; - pixd->informat = IFF_UNKNOWN; - return pixd; -} - - -/*! - * \brief pixClone() - * - * \param[in] pixs - * \return same pix ptr, or NULL on error - * - *
- * Notes:
- *      (1) A "clone" is simply a handle (ptr) to an existing pix.
- *          It is implemented because (a) images can be large and
- *          hence expensive to copy, and (b) extra handles to a data
- *          structure need to be made with a simple policy to avoid
- *          both double frees and memory leaks.  Pix are reference
- *          counted.  The side effect of pixClone() is an increase
- *          by 1 in the ref count.
- *      (2) The protocol to be used is:
- *          (a) Whenever you want a new handle to an existing image,
- *              call pixClone(), which just bumps a ref count.
- *          (b) Always call pixDestroy() on all handles.  This
- *              decrements the ref count, nulls the handle, and
- *              only destroys the pix when pixDestroy() has been
- *              called on all handles.
- * 
- */ -PIX * -pixClone(PIX *pixs) -{ - PROCNAME("pixClone"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixChangeRefcount(pixs, 1); - - return pixs; -} - - -/*--------------------------------------------------------------------* - * Pix Destruction * - *--------------------------------------------------------------------*/ -/*! - * \brief pixDestroy() - * - * \param[in,out] ppix will be set to null before returning - * \return void - * - *
- * Notes:
- *      (1) Decrements the ref count and, if 0, destroys the pix.
- *      (2) Always nulls the input ptr.
- * 
- */ -void -pixDestroy(PIX **ppix) -{ -PIX *pix; - - PROCNAME("pixDestroy"); - - if (!ppix) { - L_WARNING("ptr address is null!\n", procName); - return; - } - - if ((pix = *ppix) == NULL) - return; - pixFree(pix); - *ppix = NULL; - return; -} - - -/*! - * \brief pixFree() - * - * \param[in] pix - * \return void - * - *
- * Notes:
- *      (1) Decrements the ref count and, if 0, destroys the pix.
- * 
- */ -static void -pixFree(PIX *pix) -{ -l_uint32 *data; -char *text; - - if (!pix) return; - - pixChangeRefcount(pix, -1); - if (pixGetRefcount(pix) <= 0) { - if ((data = pixGetData(pix)) != NULL) - pix_free(data); - if ((text = pixGetText(pix)) != NULL) - LEPT_FREE(text); - pixDestroyColormap(pix); - LEPT_FREE(pix); - } - return; -} - - -/*-------------------------------------------------------------------------* - * Pix Copy * - *-------------------------------------------------------------------------*/ -/*! - * \brief pixCopy() - * - * \param[in] pixd [optional] can be null, equal to pixs, - * different from pixs - * \param[in] pixs - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) There are three cases:
- *            (a) pixd == null  (makes a new pix; refcount = 1)
- *            (b) pixd == pixs  (no-op)
- *            (c) pixd != pixs  (data copy; no change in refcount)
- *          If the refcount of pixd > 1, case (c) will side-effect
- *          these handles.
- *      (2) The general pattern of use is:
- *             pixd = pixCopy(pixd, pixs);
- *          This will work for all three cases.
- *          For clarity when the case is known, you can use:
- *            (a) pixd = pixCopy(NULL, pixs);
- *            (c) pixCopy(pixd, pixs);
- *      (3) For case (c), we check if pixs and pixd are the same
- *          size (w,h,d).  If so, the data is copied directly.
- *          Otherwise, the data is reallocated to the correct size
- *          and the copy proceeds.  The refcount of pixd is unchanged.
- *      (4) This operation, like all others that may involve a pre-existing
- *          pixd, will side-effect any existing clones of pixd.
- * 
- */ -PIX * -pixCopy(PIX *pixd, /* can be null */ - const PIX *pixs) -{ -l_int32 bytes; - - PROCNAME("pixCopy"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixs == pixd) - return pixd; - - /* Total bytes in image data */ - bytes = 4 * pixGetWpl(pixs) * pixGetHeight(pixs); - - /* If we're making a new pix ... */ - if (!pixd) { - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - memcpy(pixd->data, pixs->data, bytes); - return pixd; - } - - /* Reallocate image data if sizes are different. If this fails, - * pixd hasn't been changed. But we want to signal that the copy - * failed, so return NULL. This will cause a memory leak if the - * return ptr is assigned to pixd, but that is preferred to proceeding - * with an incorrect pixd, and in any event this use case of - * pixCopy() -- reallocating into an existing pix -- is infrequent. */ - if (pixResizeImageData(pixd, pixs) == 1) - return (PIX *)ERROR_PTR("reallocation of data failed", procName, NULL); - - /* Copy non-image data fields */ - pixCopyColormap(pixd, pixs); - pixCopySpp(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - pixCopyText(pixd, pixs); - - /* Copy image data */ - memcpy(pixd->data, pixs->data, bytes); - return pixd; -} - - -/*! - * \brief pixResizeImageData() - * - * \param[in] pixd gets new uninitialized buffer for image data - * \param[in] pixs determines the size of the buffer; not changed - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If the sizes of data in pixs and pixd are unequal, this
- *          frees the existing image data in pixd and allocates
- *          an uninitialized buffer that will hold the required amount
- *          of image data in pixs.  The image data from pixs is not
- *          copied into the new buffer.
- *      (2) On failure to allocate, pixd is unchanged.
- * 
- */ -l_ok -pixResizeImageData(PIX *pixd, - const PIX *pixs) -{ -l_int32 w, h, d, wpl, bytes; -l_uint32 *data; - - PROCNAME("pixResizeImageData"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - - if (pixSizesEqual(pixs, pixd)) /* nothing to do */ - return 0; - - /* Make sure we can copy the data */ - pixGetDimensions(pixs, &w, &h, &d); - wpl = pixGetWpl(pixs); - bytes = 4 * wpl * h; - if ((data = (l_uint32 *)pix_malloc(bytes)) == NULL) - return ERROR_INT("pix_malloc fail for data", procName, 1); - - /* OK, do it */ - pixSetWidth(pixd, w); - pixSetHeight(pixd, h); - pixSetDepth(pixd, d); - pixSetWpl(pixd, wpl); - pixFreeData(pixd); /* free any existing image data */ - pixSetData(pixd, data); /* set the uninitialized memory buffer */ - pixCopyResolution(pixd, pixs); - return 0; -} - - -/*! - * \brief pixCopyColormap() - * - * \param[in] pixd - * \param[in] pixs copies the colormap to %pixd - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This destroys the colormap in pixd, unless the operation is a no-op
- * 
- */ -l_ok -pixCopyColormap(PIX *pixd, - const PIX *pixs) -{ -l_int32 valid; -const PIXCMAP *cmaps; -PIXCMAP *cmapd; - - PROCNAME("pixCopyColormap"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (pixs == pixd) - return 0; /* no-op */ - - pixDestroyColormap(pixd); - if ((cmaps = pixs->colormap) == NULL) /* not an error */ - return 0; - pixcmapIsValid(cmaps, &valid); - if (!valid) - return ERROR_INT("cmap not valid", procName, 1); - - if ((cmapd = pixcmapCopy(cmaps)) == NULL) - return ERROR_INT("cmapd not made", procName, 1); - pixSetColormap(pixd, cmapd); - return 0; -} - - -/*! - * \brief pixSizesEqual() - * - * \param[in] pix1, pix2 - * \return 1 if the two pix have same {h, w, d}; 0 otherwise. - */ -l_int32 -pixSizesEqual(const PIX *pix1, - const PIX *pix2) -{ - PROCNAME("pixSizesEqual"); - - if (!pix1 || !pix2) - return ERROR_INT("pix1 and pix2 not both defined", procName, 0); - - if (pix1 == pix2) - return 1; - - if ((pixGetWidth(pix1) != pixGetWidth(pix2)) || - (pixGetHeight(pix1) != pixGetHeight(pix2)) || - (pixGetDepth(pix1) != pixGetDepth(pix2))) - return 0; - else - return 1; -} - - -/*! - * \brief pixTransferAllData() - * - * \param[in] pixd must be different from pixs - * \param[in,out] ppixs will be nulled if refcount goes to 0 - * \param[in] copytext 1 to copy the text field; 0 to skip - * \param[in] copyformat 1 to copy the informat field; 0 to skip - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This does a complete data transfer from pixs to pixd,
- *          followed by the destruction of pixs (refcount permitting).
- *      (2) If the refcount of pixs is 1, pixs is destroyed.  Otherwise,
- *          the data in pixs is copied (rather than transferred) to pixd.
- *      (3) This operation, like all others with a pre-existing pixd,
- *          will side-effect any existing clones of pixd.  The pixd
- *          refcount does not change.
- *      (4) When might you use this?  Suppose you have an in-place Pix
- *          function (returning void) with the typical signature:
- *              void function-inplace(PIX *pix, ...)
- *          where "..." are non-pointer input parameters, and suppose
- *          further that you sometimes want to return an arbitrary Pix
- *          in place of the input Pix.  There are two ways you can do this:
- *          (a) The straightforward way is to change the function
- *              signature to take the address of the Pix ptr:
- * \code
- *                  void function-inplace(PIX **ppix, ...) {
- *                      PIX *pixt = function-makenew(*ppix);
- *                      pixDestroy(ppix);
- *                      *ppix = pixt;
- *                      return;
- *                  }
- * \endcode
- *              Here, the input and returned pix are different, as viewed
- *              by the calling function, and the inplace function is
- *              expected to destroy the input pix to avoid a memory leak.
- *          (b) Keep the signature the same and use pixTransferAllData()
- *              to return the new Pix in the input Pix struct:
- * \code
- *                  void function-inplace(PIX *pix, ...) {
- *                      PIX *pixt = function-makenew(pix);
- *                      pixTransferAllData(pix, &pixt, 0, 0);
- *                               // pixDestroy() is called on pixt
- *                      return;
- *                  }
- * \endcode
- *              Here, the input and returned pix are the same, as viewed
- *              by the calling function, and the inplace function must
- *              never destroy the input pix, because the calling function
- *              maintains an unchanged handle to it.
- * 
- */ -l_ok -pixTransferAllData(PIX *pixd, - PIX **ppixs, - l_int32 copytext, - l_int32 copyformat) -{ -l_int32 nbytes; -PIX *pixs; - - PROCNAME("pixTransferAllData"); - - if (!ppixs) - return ERROR_INT("&pixs not defined", procName, 1); - if ((pixs = *ppixs) == NULL) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (pixs == pixd) /* no-op */ - return ERROR_INT("pixd == pixs", procName, 1); - - if (pixGetRefcount(pixs) == 1) { /* transfer the data, cmap, text */ - pixFreeData(pixd); /* dealloc any existing data */ - pixSetData(pixd, pixGetData(pixs)); /* transfer new data from pixs */ - pixs->data = NULL; /* pixs no longer owns data */ - pixSetColormap(pixd, pixGetColormap(pixs)); /* frees old; sets new */ - pixs->colormap = NULL; /* pixs no longer owns colormap */ - if (copytext) { - pixSetText(pixd, pixGetText(pixs)); - pixSetText(pixs, NULL); - } - } else { /* preserve pixs by making a copy of the data, cmap, text */ - pixResizeImageData(pixd, pixs); - nbytes = 4 * pixGetWpl(pixs) * pixGetHeight(pixs); - memcpy(pixGetData(pixd), pixGetData(pixs), nbytes); - pixCopyColormap(pixd, pixs); - if (copytext) - pixCopyText(pixd, pixs); - } - - pixCopySpp(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixCopyDimensions(pixd, pixs); - if (copyformat) - pixCopyInputFormat(pixd, pixs); - - /* This will destroy pixs if data was transferred; - * otherwise, it just decrements its refcount. */ - pixDestroy(ppixs); - return 0; -} - - -/*! - * \brief pixSwapAndDestroy() - * - * \param[out] ppixd [optional] input pixd can be null, - * and it must be different from pixs - * \param[in,out] ppixs will be nulled after the swap - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Simple operation to change the handle name safely.
- *          After this operation, the original image in pixd has
- *          been destroyed, pixd points to what was pixs, and
- *          the input pixs ptr has been nulled.
- *      (2) This works safely whether or not pixs and pixd are cloned.
- *          If pixs is cloned, the other handles still point to
- *          the original image, with the ref count reduced by 1.
- *      (3) Usage example:
- * \code
- *            Pix *pix1 = pixRead("...");
- *            Pix *pix2 = function(pix1, ...);
- *            pixSwapAndDestroy(&pix1, &pix2);
- *            pixDestroy(&pix1);  // holds what was in pix2
- * \endcode
- *          Example with clones ([] shows ref count of image generated
- *                               by the function):
- * \code
- *            Pix *pixs = pixRead("...");
- *            Pix *pix1 = pixClone(pixs);
- *            Pix *pix2 = function(pix1, ...);   [1]
- *            Pix *pix3 = pixClone(pix2);   [1] --> [2]
- *            pixSwapAndDestroy(&pix1, &pix2);
- *            pixDestroy(&pixs);  // still holds read image
- *            pixDestroy(&pix1);  // holds what was in pix2  [2] --> [1]
- *            pixDestroy(&pix3);  // holds what was in pix2  [1] --> [0]
- * \endcode
- * 
- */ -l_ok -pixSwapAndDestroy(PIX **ppixd, - PIX **ppixs) -{ - PROCNAME("pixSwapAndDestroy"); - - if (!ppixd) - return ERROR_INT("&pixd not defined", procName, 1); - if (!ppixs) - return ERROR_INT("&pixs not defined", procName, 1); - if (*ppixs == NULL) - return ERROR_INT("pixs not defined", procName, 1); - if (ppixs == ppixd) /* no-op */ - return ERROR_INT("&pixd == &pixs", procName, 1); - - pixDestroy(ppixd); - *ppixd = pixClone(*ppixs); - pixDestroy(ppixs); - return 0; -} - - -/*--------------------------------------------------------------------* - * Accessors * - *--------------------------------------------------------------------*/ -l_int32 -pixGetWidth(const PIX *pix) -{ - PROCNAME("pixGetWidth"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 0); - - return pix->w; -} - - -l_int32 -pixSetWidth(PIX *pix, - l_int32 width) -{ - PROCNAME("pixSetWidth"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (width < 0) { - pix->w = 0; - return ERROR_INT("width must be >= 0", procName, 1); - } - - pix->w = width; - return 0; -} - - -l_int32 -pixGetHeight(const PIX *pix) -{ - PROCNAME("pixGetHeight"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 0); - - return pix->h; -} - - -l_int32 -pixSetHeight(PIX *pix, - l_int32 height) -{ - PROCNAME("pixSetHeight"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (height < 0) { - pix->h = 0; - return ERROR_INT("h must be >= 0", procName, 1); - } - - pix->h = height; - return 0; -} - - -l_int32 -pixGetDepth(const PIX *pix) -{ - PROCNAME("pixGetDepth"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 0); - - return pix->d; -} - - -l_int32 -pixSetDepth(PIX *pix, - l_int32 depth) -{ - PROCNAME("pixSetDepth"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (depth < 1) - return ERROR_INT("d must be >= 1", procName, 1); - - pix->d = depth; - return 0; -} - - -/*! - * \brief pixGetDimensions() - * - * \param[in] pix - * \param[out] pw, ph, pd [optional] each can be null - * \return 0 if OK, 1 on error - */ -l_ok -pixGetDimensions(const PIX *pix, - l_int32 *pw, - l_int32 *ph, - l_int32 *pd) -{ - PROCNAME("pixGetDimensions"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pd) *pd = 0; - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (pw) *pw = pix->w; - if (ph) *ph = pix->h; - if (pd) *pd = pix->d; - return 0; -} - - -/*! - * \brief pixSetDimensions() - * - * \param[in] pix - * \param[in] w, h, d use 0 to skip the setting for any of these - * \return 0 if OK, 1 on error - */ -l_ok -pixSetDimensions(PIX *pix, - l_int32 w, - l_int32 h, - l_int32 d) -{ - PROCNAME("pixSetDimensions"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (w > 0) pixSetWidth(pix, w); - if (h > 0) pixSetHeight(pix, h); - if (d > 0) pixSetDepth(pix, d); - return 0; -} - - -/*! - * \brief pixCopyDimensions() - * - * \param[in] pixd - * \param[in] pixs - * \return 0 if OK, 1 on error - */ -l_ok -pixCopyDimensions(PIX *pixd, - const PIX *pixs) -{ - PROCNAME("pixCopyDimensions"); - - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixs == pixd) - return 0; /* no-op */ - - pixSetWidth(pixd, pixGetWidth(pixs)); - pixSetHeight(pixd, pixGetHeight(pixs)); - pixSetDepth(pixd, pixGetDepth(pixs)); - pixSetWpl(pixd, pixGetWpl(pixs)); - return 0; -} - - -l_int32 -pixGetSpp(const PIX *pix) -{ - PROCNAME("pixGetSpp"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 0); - - return pix->spp; -} - - -/* - * \brief pixSetSpp() - * - * \param[in] pix - * \param[in] spp 1, 3 or 4 samples - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) For a 32 bpp pix, this can be used to ignore the
- *          alpha sample (spp == 3) or to use it (spp == 4).
- *          For example, to write a spp == 4 image without the alpha
- *          sample (as an rgb pix), call pixSetSpp(pix, 3) and
- *          then write it out as a png.
- * 
- */ -l_int32 -pixSetSpp(PIX *pix, - l_int32 spp) -{ - PROCNAME("pixSetSpp"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (spp < 1) - return ERROR_INT("spp must be >= 1", procName, 1); - - pix->spp = spp; - return 0; -} - - -/*! - * \brief pixCopySpp() - * - * \param[in] pixd - * \param[in] pixs - * \return 0 if OK, 1 on error - */ -l_ok -pixCopySpp(PIX *pixd, - const PIX *pixs) -{ - PROCNAME("pixCopySpp"); - - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixs == pixd) - return 0; /* no-op */ - - pixSetSpp(pixd, pixGetSpp(pixs)); - return 0; -} - - -l_int32 -pixGetWpl(const PIX *pix) -{ - PROCNAME("pixGetWpl"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 0); - return pix->wpl; -} - - -l_int32 -pixSetWpl(PIX *pix, - l_int32 wpl) -{ - PROCNAME("pixSetWpl"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pix->wpl = wpl; - return 0; -} - - -l_int32 -pixGetRefcount(const PIX *pix) -{ - PROCNAME("pixGetRefcount"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 0); - return pix->refcount; -} - - -l_int32 -pixChangeRefcount(PIX *pix, - l_int32 delta) -{ - PROCNAME("pixChangeRefcount"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pix->refcount += delta; - return 0; -} - - -l_int32 -pixGetXRes(const PIX *pix) -{ - PROCNAME("pixGetXRes"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 0); - return pix->xres; -} - - -l_int32 -pixSetXRes(PIX *pix, - l_int32 res) -{ - PROCNAME("pixSetXRes"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pix->xres = res; - return 0; -} - - -l_int32 -pixGetYRes(const PIX *pix) -{ - PROCNAME("pixGetYRes"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 0); - return pix->yres; -} - - -l_int32 -pixSetYRes(PIX *pix, - l_int32 res) -{ - PROCNAME("pixSetYRes"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pix->yres = res; - return 0; -} - - -/*! - * \brief pixGetResolution() - * - * \param[in] pix - * \param[out] pxres, pyres [optional] each can be null - * \return 0 if OK, 1 on error - */ -l_ok -pixGetResolution(const PIX *pix, - l_int32 *pxres, - l_int32 *pyres) -{ - PROCNAME("pixGetResolution"); - - if (pxres) *pxres = 0; - if (pyres) *pyres = 0; - if (!pxres && !pyres) - return ERROR_INT("no output requested", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (pxres) *pxres = pix->xres; - if (pyres) *pyres = pix->yres; - return 0; -} - - -/*! - * \brief pixSetResolution() - * - * \param[in] pix - * \param[in] xres, yres use 0 to skip setting a value for either of these - * \return 0 if OK, 1 on error - */ -l_ok -pixSetResolution(PIX *pix, - l_int32 xres, - l_int32 yres) -{ - PROCNAME("pixSetResolution"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (xres > 0) pix->xres = xres; - if (yres > 0) pix->yres = yres; - return 0; -} - - -l_int32 -pixCopyResolution(PIX *pixd, - const PIX *pixs) -{ - PROCNAME("pixCopyResolution"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (pixs == pixd) - return 0; /* no-op */ - - pixSetXRes(pixd, pixGetXRes(pixs)); - pixSetYRes(pixd, pixGetYRes(pixs)); - return 0; -} - - -l_int32 -pixScaleResolution(PIX *pix, - l_float32 xscale, - l_float32 yscale) -{ -l_float64 xres, yres; -l_float64 maxres = 100000000.0; - - PROCNAME("pixScaleResolution"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (xscale <= 0 || yscale <= 0) - return ERROR_INT("invalid scaling ratio", procName, 1); - - xres = (l_float64)xscale * (l_float32)(pix->xres) + 0.5; - yres = (l_float64)yscale * (l_float32)(pix->yres) + 0.5; - pix->xres = (l_uint32)L_MIN(xres, maxres); - pix->yres = (l_uint32)L_MIN(yres, maxres); - return 0; -} - - -l_int32 -pixGetInputFormat(const PIX *pix) -{ - PROCNAME("pixGetInputFormat"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 0); - return pix->informat; -} - - -l_int32 -pixSetInputFormat(PIX *pix, - l_int32 informat) -{ - PROCNAME("pixSetInputFormat"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - pix->informat = informat; - return 0; -} - - -l_int32 -pixCopyInputFormat(PIX *pixd, - const PIX *pixs) -{ - PROCNAME("pixCopyInputFormat"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (pixs == pixd) - return 0; /* no-op */ - - pixSetInputFormat(pixd, pixGetInputFormat(pixs)); - return 0; -} - - -l_int32 -pixSetSpecial(PIX *pix, - l_int32 special) -{ - PROCNAME("pixSetSpecial"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - pix->special = special; - return 0; -} - - -/*! - * \brief pixGetText() - * - * \param[in] pix - * \return ptr to existing text string - * - *
- * Notes:
- *      (1) The text string belongs to the pix:
- *          * the caller must NOT free it
- *          * it must not be used after the pix is destroyed
- * 
- */ -char * -pixGetText(PIX *pix) -{ - PROCNAME("pixGetText"); - - if (!pix) - return (char *)ERROR_PTR("pix not defined", procName, NULL); - return pix->text; -} - - -/*! - * \brief pixSetText() - * - * \param[in] pix - * \param[in] textstring can be null - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This removes any existing textstring and puts a copy of
- *          the input textstring there.
- * 
- */ -l_ok -pixSetText(PIX *pix, - const char *textstring) -{ - PROCNAME("pixSetText"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - stringReplace(&pix->text, textstring); - return 0; -} - - -/*! - * \brief pixAddText() - * - * \param[in] pix - * \param[in] textstring can be null - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This adds the new textstring to any existing text.
- *      (2) Either or both the existing text and the new text
- *          string can be null.
- * 
- */ -l_ok -pixAddText(PIX *pix, - const char *textstring) -{ -char *newstring; - - PROCNAME("pixAddText"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - newstring = stringJoin(pixGetText(pix), textstring); - stringReplace(&pix->text, newstring); - LEPT_FREE(newstring); - return 0; -} - - -l_int32 -pixCopyText(PIX *pixd, - const PIX *pixs) -{ - PROCNAME("pixCopyText"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (pixs == pixd) - return 0; /* no-op */ - - pixSetText(pixd, pixs->text); - return 0; -} - - -PIXCMAP * -pixGetColormap(PIX *pix) -{ - PROCNAME("pixGetColormap"); - - if (!pix) - return (PIXCMAP *)ERROR_PTR("pix not defined", procName, NULL); - return pix->colormap; -} - - -/*! - * \brief pixSetColormap() - * - * \param[in] pix - * \param[in] colormap to be assigned - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) Unlike with the pix data field, pixSetColormap() destroys
- *          any existing colormap before assigning the new one.
- *          Because colormaps are not ref counted, it is important that
- *          the new colormap does not belong to any other pix.
- * 
- */ -l_ok -pixSetColormap(PIX *pix, - PIXCMAP *colormap) -{ - PROCNAME("pixSetColormap"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pixDestroyColormap(pix); - pix->colormap = colormap; - return 0; -} - - -/*! - * \brief pixDestroyColormap() - * - * \param[in] pix - * \return 0 if OK, 1 on error - */ -l_ok -pixDestroyColormap(PIX *pix) -{ -PIXCMAP *cmap; - - PROCNAME("pixDestroyColormap"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - if ((cmap = pix->colormap) != NULL) { - pixcmapDestroy(&cmap); - pix->colormap = NULL; - } - return 0; -} - - -/*! - * \brief pixGetData() - * - * \param[in] pix - * \return ptr to image data - * - *
- * Notes:
- *      (1) This gives a new handle for the data.  The data is still
- *          owned by the pix, so do not call LEPT_FREE() on it.
- * 
- */ -l_uint32 * -pixGetData(PIX *pix) -{ - PROCNAME("pixGetData"); - - if (!pix) - return (l_uint32 *)ERROR_PTR("pix not defined", procName, NULL); - return pix->data; -} - - -/*! - * \brief pixSetData() - * - * \param[in] pix - * \param[in] data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This does not free any existing data.  To free existing
- *          data, use pixFreeData() before pixSetData().
- * 
- */ -l_int32 -pixSetData(PIX *pix, - l_uint32 *data) -{ - PROCNAME("pixSetData"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pix->data = data; - return 0; -} - - -/*! - * \brief pixExtractData() - * - * \param[in] pix - * \return ptr to data, or null on error - * - *
- * Notes:
- *      (1) This extracts the pix image data for use in another context.
- *          The caller still needs to use pixDestroy() on the input pix.
- *      (2) If refcount == 1, the data is extracted and the
- *          pix->data ptr is set to NULL.
- *      (3) If refcount > 1, this simply returns a copy of the data,
- *          using the pix allocator, and leaving the input pix unchanged.
- * 
- */ -l_uint32 * -pixExtractData(PIX *pixs) -{ -l_int32 count, bytes; -l_uint32 *data, *datas; - - PROCNAME("pixExtractData"); - - if (!pixs) - return (l_uint32 *)ERROR_PTR("pixs not defined", procName, NULL); - - count = pixGetRefcount(pixs); - if (count == 1) { /* extract */ - data = pixGetData(pixs); - pixSetData(pixs, NULL); - } else { /* refcount > 1; copy */ - bytes = 4 * pixGetWpl(pixs) * pixGetHeight(pixs); - datas = pixGetData(pixs); - if ((data = (l_uint32 *)pix_malloc(bytes)) == NULL) - return (l_uint32 *)ERROR_PTR("data not made", procName, NULL); - memcpy(data, datas, bytes); - } - - return data; -} - - -/*! - * \brief pixFreeData() - * - * \param[in] pix - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This frees the data and sets the pix data ptr to null.
- *          It should be used before pixSetData() in the situation where
- *          you want to free any existing data before doing
- *          a subsequent assignment with pixSetData().
- * 
- */ -l_int32 -pixFreeData(PIX *pix) -{ -l_uint32 *data; - - PROCNAME("pixFreeData"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - if ((data = pixGetData(pix)) != NULL) { - pix_free(data); - pix->data = NULL; - } - return 0; -} - - -/*--------------------------------------------------------------------* - * Pix line ptrs * - *--------------------------------------------------------------------*/ -/*! - * \brief pixGetLinePtrs() - * - * \param[in] pix - * \param[out] psize [optional] array size, which is the pix height - * \return array of line ptrs, or NULL on error - * - *
- * Notes:
- *      (1) This is intended to be used for fast random pixel access.
- *          For example, for an 8 bpp image,
- *              val = GET_DATA_BYTE(lines8[i], j);
- *          is equivalent to, but much faster than,
- *              pixGetPixel(pix, j, i, &val);
- *      (2) How much faster?  For 1 bpp, it's from 6 to 10x faster.
- *          For 8 bpp, it's an amazing 30x faster.  So if you are
- *          doing random access over a substantial part of the image,
- *          use this line ptr array.
- *      (3) When random access is used in conjunction with a stack,
- *          queue or heap, the overall computation time depends on
- *          the operations performed on each struct that is popped
- *          or pushed, and whether we are using a priority queue (O(logn))
- *          or a queue or stack (O(1)).  For example, for maze search,
- *          the overall ratio of time for line ptrs vs. pixGet/Set* is
- *             Maze type     Type                   Time ratio
- *               binary      queue                     0.4
- *               gray        heap (priority queue)     0.6
- *      (4) Because this returns a void** and the accessors take void*,
- *          the compiler cannot check the pointer types.  It is
- *          strongly recommended that you adopt a naming scheme for
- *          the returned ptr arrays that indicates the pixel depth.
- *          (This follows the original intent of Simonyi's "Hungarian"
- *          application notation, where naming is used proactively
- *          to make errors visibly obvious.)  By doing this, you can
- *          tell by inspection if the correct accessor is used.
- *          For example, for an 8 bpp pixg:
- *              void **lineg8 = pixGetLinePtrs(pixg, NULL);
- *              val = GET_DATA_BYTE(lineg8[i], j);  // fast access; BYTE, 8
- *              ...
- *              LEPT_FREE(lineg8);  // don't forget this
- *      (5) These are convenient for accessing bytes sequentially in an
- *          8 bpp grayscale image.  People who write image processing code
- *          on 8 bpp images are accustomed to grabbing pixels directly out
- *          of the raster array.  Note that for little endians, you first
- *          need to reverse the byte order in each 32-bit word.
- *          Here's a typical usage pattern:
- *              pixEndianByteSwap(pix);   // always safe; no-op on big-endians
- *              l_uint8 **lineptrs = (l_uint8 **)pixGetLinePtrs(pix, NULL);
- *              pixGetDimensions(pix, &w, &h, NULL);
- *              for (i = 0; i < h; i++) {
- *                  l_uint8 *line = lineptrs[i];
- *                  for (j = 0; j < w; j++) {
- *                      val = line[j];
- *                      ...
- *                  }
- *              }
- *              pixEndianByteSwap(pix);  // restore big-endian order
- *              LEPT_FREE(lineptrs);
- *          This can be done even more simply as follows:
- *              l_uint8 **lineptrs = pixSetupByteProcessing(pix, &w, &h);
- *              for (i = 0; i < h; i++) {
- *                  l_uint8 *line = lineptrs[i];
- *                  for (j = 0; j < w; j++) {
- *                      val = line[j];
- *                      ...
- *                  }
- *              }
- *              pixCleanupByteProcessing(pix, lineptrs);
- * 
- */ -void ** -pixGetLinePtrs(PIX *pix, - l_int32 *psize) -{ -l_int32 i, h, wpl; -l_uint32 *data; -void **lines; - - PROCNAME("pixGetLinePtrs"); - - if (psize) *psize = 0; - if (!pix) - return (void **)ERROR_PTR("pix not defined", procName, NULL); - - h = pixGetHeight(pix); - if (psize) *psize = h; - if ((lines = (void **)LEPT_CALLOC(h, sizeof(void *))) == NULL) - return (void **)ERROR_PTR("lines not made", procName, NULL); - wpl = pixGetWpl(pix); - data = pixGetData(pix); - for (i = 0; i < h; i++) - lines[i] = (void *)(data + i * wpl); - - return lines; -} - - -/*--------------------------------------------------------------------* - * Print output for debugging * - *--------------------------------------------------------------------*/ -extern const char *ImageFileFormatExtensions[]; - -/*! - * \brief pixPrintStreamInfo() - * - * \param[in] fp file stream - * \param[in] pix - * \param[in] text [optional] identifying string; can be null - * \return 0 if OK, 1 on error - */ -l_ok -pixPrintStreamInfo(FILE *fp, - const PIX *pix, - const char *text) -{ -l_int32 informat; -const PIXCMAP *cmap; - - PROCNAME("pixPrintStreamInfo"); - - if (!fp) - return ERROR_INT("fp not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - if (text) - fprintf(fp, " Pix Info for %s:\n", text); - fprintf(fp, " width = %d, height = %d, depth = %d, spp = %d\n", - pixGetWidth(pix), pixGetHeight(pix), pixGetDepth(pix), - pixGetSpp(pix)); - fprintf(fp, " wpl = %d, data = %p, refcount = %d\n", - pixGetWpl(pix), pix->data, pixGetRefcount(pix)); - fprintf(fp, " xres = %d, yres = %d\n", pixGetXRes(pix), pixGetYRes(pix)); - if ((cmap = pix->colormap) != NULL) - pixcmapWriteStream(fp, cmap); - else - fprintf(fp, " no colormap\n"); - informat = pixGetInputFormat(pix); - fprintf(fp, " input format: %d (%s)\n", informat, - ImageFileFormatExtensions[informat]); - if (pix->text != NULL) - fprintf(fp, " text: %s\n", pix->text); - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix2.c deleted file mode 100644 index ce2f39ce..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix2.c +++ /dev/null @@ -1,3506 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pix2.c - *
- *
- *    This file has these basic operations:
- *
- *      (1) Get and set: individual pixels, full image, rectangular region,
- *          pad pixels, border pixels, and color components for RGB
- *      (2) Add and remove border pixels
- *      (3) Endian byte swaps
- *      (4) Simple method for byte-processing images (instead of words)
- *
- *      Pixel poking
- *           l_int32     pixGetPixel()
- *           l_int32     pixSetPixel()
- *           l_int32     pixGetRGBPixel()
- *           l_int32     pixSetRGBPixel()
- *           l_int32     pixSetCmapPixel()
- *           l_int32     pixGetRandomPixel()
- *           l_int32     pixClearPixel()
- *           l_int32     pixFlipPixel()
- *           void        setPixelLow()
- *
- *      Find black or white value
- *           l_int32     pixGetBlackOrWhiteVal()
- *
- *      Full image clear/set/set-to-arbitrary-value
- *           l_int32     pixClearAll()
- *           l_int32     pixSetAll()
- *           l_int32     pixSetAllGray()
- *           l_int32     pixSetAllArbitrary()
- *           l_int32     pixSetBlackOrWhite()
- *           l_int32     pixSetComponentArbitrary()
- *
- *      Rectangular region clear/set/set-to-arbitrary-value/blend
- *           l_int32     pixClearInRect()
- *           l_int32     pixSetInRect()
- *           l_int32     pixSetInRectArbitrary()
- *           l_int32     pixBlendInRect()
- *
- *      Set pad bits
- *           l_int32     pixSetPadBits()
- *           l_int32     pixSetPadBitsBand()
- *
- *      Assign border pixels
- *           l_int32     pixSetOrClearBorder()
- *           l_int32     pixSetBorderVal()
- *           l_int32     pixSetBorderRingVal()
- *           l_int32     pixSetMirroredBorder()
- *           PIX        *pixCopyBorder()
- *
- *      Add and remove border
- *           PIX        *pixAddBorder()
- *           PIX        *pixAddBlackOrWhiteBorder()
- *           PIX        *pixAddBorderGeneral()
- *           PIX        *pixRemoveBorder()
- *           PIX        *pixRemoveBorderGeneral()
- *           PIX        *pixRemoveBorderToSize()
- *           PIX        *pixAddMirroredBorder()
- *           PIX        *pixAddRepeatedBorder()
- *           PIX        *pixAddMixedBorder()
- *           PIX        *pixAddContinuedBorder()
- *
- *      Helper functions using alpha
- *           l_int32     pixShiftAndTransferAlpha()
- *           PIX        *pixDisplayLayersRGBA()
- *
- *      Color sample setting and extraction
- *           PIX        *pixCreateRGBImage()
- *           PIX        *pixGetRGBComponent()
- *           l_int32     pixSetRGBComponent()
- *           PIX        *pixGetRGBComponentCmap()
- *           l_int32     pixCopyRGBComponent()
- *           l_int32     composeRGBPixel()
- *           l_int32     composeRGBAPixel()
- *           void        extractRGBValues()
- *           void        extractRGBAValues()
- *           l_int32     extractMinMaxComponent()
- *           l_int32     pixGetRGBLine()
- *
- *      Raster line pixel setter
- *           l_int32     setLineDataVal()
- *
- *      Conversion between big and little endians
- *           PIX        *pixEndianByteSwapNew()
- *           l_int32     pixEndianByteSwap()
- *           l_int32     lineEndianByteSwap()
- *           PIX        *pixEndianTwoByteSwapNew()
- *           l_int32     pixEndianTwoByteSwap()
- *
- *      Extract raster data as binary string
- *           l_int32     pixGetRasterData()
- *
- *      Test alpha component opaqueness
- *           l_int32     pixAlphaIsOpaque
- *
- *      Setup helpers for 8 bpp byte processing
- *           l_uint8   **pixSetupByteProcessing()
- *           l_int32     pixCleanupByteProcessing()
- *
- *      Setting parameters for antialias masking with alpha transforms
- *           void        l_setAlphaMaskBorder()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static const l_uint32 rmask32[] = {0x0, - 0x00000001, 0x00000003, 0x00000007, 0x0000000f, - 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, - 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, - 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, - 0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff, - 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, - 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, - 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff}; - - /* This is a global that determines the default 8 bpp alpha mask values - * for rings at distance 1 and 2 from the border. Declare extern - * to use. To change the values, use l_setAlphaMaskBorder(). */ -LEPT_DLL l_float32 AlphaMaskBorderVals[2] = {0.0, 0.5}; - - -#ifndef NO_CONSOLE_IO -#define DEBUG_SERIALIZE 0 -#endif /* ~NO_CONSOLE_IO */ - - -/*-------------------------------------------------------------* - * Pixel poking * - *-------------------------------------------------------------*/ -/*! - * \brief pixGetPixel() - * - * \param[in] pix - * \param[in] x,y pixel coords - * \param[out] pval pixel value - * \return 0 if OK; 1 or 2 on error - * - *
- * Notes:
- *      (1) This returns the value in the data array.  If the pix is
- *          colormapped, it returns the colormap index, not the rgb value.
- *      (2) Because of the function overhead and the parameter checking,
- *          this is much slower than using the GET_DATA_*() macros directly.
- *          Speed on a 1 Mpixel RGB image, using a 3 GHz machine:
- *            * pixGet/pixSet: ~25 Mpix/sec
- *            * GET_DATA/SET_DATA: ~350 MPix/sec
- *          If speed is important and you're doing random access into
- *          the pix, use pixGetLinePtrs() and the array access macros.
- *      (3) If the point is outside the image, this returns an error (2),
- *          with 0 in %pval.  To avoid spamming output, it fails silently.
- * 
- */ -l_ok -pixGetPixel(PIX *pix, - l_int32 x, - l_int32 y, - l_uint32 *pval) -{ -l_int32 w, h, d, wpl, val; -l_uint32 *line, *data; - - PROCNAME("pixGetPixel"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0; - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pixGetDimensions(pix, &w, &h, &d); - if (x < 0 || x >= w || y < 0 || y >= h) - return 2; - - wpl = pixGetWpl(pix); - data = pixGetData(pix); - line = data + y * wpl; - switch (d) - { - case 1: - val = GET_DATA_BIT(line, x); - break; - case 2: - val = GET_DATA_DIBIT(line, x); - break; - case 4: - val = GET_DATA_QBIT(line, x); - break; - case 8: - val = GET_DATA_BYTE(line, x); - break; - case 16: - val = GET_DATA_TWO_BYTES(line, x); - break; - case 32: - val = line[x]; - break; - default: - return ERROR_INT("depth must be in {1,2,4,8,16,32} bpp", procName, 1); - } - - *pval = val; - return 0; -} - - -/*! - * \brief pixSetPixel() - * - * \param[in] pix - * \param[in] x,y pixel coords - * \param[in] val value to be inserted - * \return 0 if OK; 1 or 2 on error - * - *
- * Notes:
- *      (1) Warning: the input value is not checked for overflow with respect
- *          the the depth of %pix, and the sign bit (if any) is ignored.
- *          * For d == 1, %val > 0 sets the bit on.
- *          * For d == 2, 4, 8 and 16, %val is masked to the maximum allowable
- *            pixel value, and any (invalid) higher order bits are discarded.
- *      (2) See pixGetPixel() for information on performance.
- *      (3) If the point is outside the image, this returns an error (2),
- *          with 0 in %pval.  To avoid spamming output, it fails silently.
- * 
- */ -l_ok -pixSetPixel(PIX *pix, - l_int32 x, - l_int32 y, - l_uint32 val) -{ -l_int32 w, h, d, wpl; -l_uint32 *line, *data; - - PROCNAME("pixSetPixel"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - pixGetDimensions(pix, &w, &h, &d); - if (x < 0 || x >= w || y < 0 || y >= h) - return 2; - - data = pixGetData(pix); - wpl = pixGetWpl(pix); - line = data + y * wpl; - switch (d) - { - case 1: - if (val) - SET_DATA_BIT(line, x); - else - CLEAR_DATA_BIT(line, x); - break; - case 2: - SET_DATA_DIBIT(line, x, val); - break; - case 4: - SET_DATA_QBIT(line, x, val); - break; - case 8: - SET_DATA_BYTE(line, x, val); - break; - case 16: - SET_DATA_TWO_BYTES(line, x, val); - break; - case 32: - line[x] = val; - break; - default: - return ERROR_INT("depth must be in {1,2,4,8,16,32} bpp", procName, 1); - } - - return 0; -} - - -/*! - * \brief pixGetRGBPixel() - * - * \param[in] pix 32 bpp rgb, not colormapped - * \param[in] x,y pixel coords - * \param[out] prval [optional] red component - * \param[out] pgval [optional] green component - * \param[out] pbval [optional] blue component - * \return 0 if OK; 1 or 2 on error - * - * Notes: - * (1) If the point is outside the image, this returns an error (2), - * with 0 in %pval. To avoid spamming output, it fails silently. - */ -l_ok -pixGetRGBPixel(PIX *pix, - l_int32 x, - l_int32 y, - l_int32 *prval, - l_int32 *pgval, - l_int32 *pbval) -{ -l_int32 w, h, d, wpl; -l_uint32 *data, *ppixel; - - PROCNAME("pixGetRGBPixel"); - - if (prval) *prval = 0; - if (pgval) *pgval = 0; - if (pbval) *pbval = 0; - if (!prval && !pgval && !pbval) - return ERROR_INT("no output requested", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - pixGetDimensions(pix, &w, &h, &d); - if (d != 32) - return ERROR_INT("pix not 32 bpp", procName, 1); - if (x < 0 || x >= w || y < 0 || y >= h) - return 2; - - wpl = pixGetWpl(pix); - data = pixGetData(pix); - ppixel = data + y * wpl + x; - if (prval) *prval = GET_DATA_BYTE(ppixel, COLOR_RED); - if (pgval) *pgval = GET_DATA_BYTE(ppixel, COLOR_GREEN); - if (pbval) *pbval = GET_DATA_BYTE(ppixel, COLOR_BLUE); - return 0; -} - - -/*! - * \brief pixSetRGBPixel() - * - * \param[in] pix 32 bpp rgb - * \param[in] x,y pixel coords - * \param[in] rval red component - * \param[in] gval green component - * \param[in] bval blue component - * \return 0 if OK; 1 or 2 on error - * - * Notes: - * (1) If the point is outside the image, this returns an error (2), - * and to avoid spamming output, it fails silently. - */ -l_ok -pixSetRGBPixel(PIX *pix, - l_int32 x, - l_int32 y, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -l_int32 w, h, d, wpl; -l_uint32 pixel; -l_uint32 *data, *line; - - PROCNAME("pixSetRGBPixel"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - pixGetDimensions(pix, &w, &h, &d); - if (d != 32) - return ERROR_INT("pix not 32 bpp", procName, 1); - if (x < 0 || x >= w || y < 0 || y >= h) - return 2; - - wpl = pixGetWpl(pix); - data = pixGetData(pix); - line = data + y * wpl; - composeRGBPixel(rval, gval, bval, &pixel); - *(line + x) = pixel; - return 0; -} - - -/*! - * \brief pixSetCmapPixel() - * - * \param[in] pix 2, 4 or 8 bpp, colormapped - * \param[in] x,y pixel coords - * \param[in] rval red component - * \param[in] gval green component - * \param[in] bval blue component - * \return 0 if OK; 1 or 2 on error - * - * Notes: - * (1) If the point is outside the image, this returns an error (2), - * and to avoid spamming output, it fails silently. - * (2) - If the color already exists, use it. - * - If the color does not exist in the colormap, it is added - * if possible. - * - If there is not room in the colormap for the new color: - * * if d < 8, return 2 with a warning. - * * if d == 8, find and use the nearest color. - * (3) Note that this operation scales with the number of colors - * in the colormap, and therefore can be very expensive if an - * attempt is made to set many pixels. (In that case, it should - * be implemented with a map:rgb-->index for efficiency.) - * This is best used with very small images. - */ -l_ok -pixSetCmapPixel(PIX *pix, - l_int32 x, - l_int32 y, - l_int32 rval, - l_int32 gval, - l_int32 bval) -{ -l_int32 w, h, d, index; -PIXCMAP *cmap; - - PROCNAME("pixSetCmapPixel"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if ((cmap = pixGetColormap(pix)) == NULL) - return ERROR_INT("pix is not colormapped", procName, 1); - pixGetDimensions(pix, &w, &h, &d); - if (d != 2 && d != 4 && d != 8) - return ERROR_INT("pix depth not 2, 4 or 8", procName, 1); - if (x < 0 || x >= w || y < 0 || y >= h) - return 2; - - if (d == 8) { /* always add */ - pixcmapAddNearestColor(cmap, rval, gval, bval, &index); - } else { /* d < 8 */ - if (pixcmapAddNewColor(cmap, rval, gval, bval, &index) == 2) - return ERROR_INT("colormap is full", procName, 2); - } - pixSetPixel(pix, x, y, index); - return 0; -} - - -/*! - * \brief pixGetRandomPixel() - * - * \param[in] pix any depth; can be colormapped - * \param[out] pval [optional] pixel value - * \param[out] px [optional] x coordinate chosen; can be null - * \param[out] py [optional] y coordinate chosen; can be null - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) If the pix is colormapped, it returns the rgb value.
- * 
- */ -l_ok -pixGetRandomPixel(PIX *pix, - l_uint32 *pval, - l_int32 *px, - l_int32 *py) -{ -l_int32 w, h, x, y, rval, gval, bval; -l_uint32 val; -PIXCMAP *cmap; - - PROCNAME("pixGetRandomPixel"); - - if (pval) *pval = 0; - if (px) *px = 0; - if (py) *py = 0; - if (!pval && !px && !py) - return ERROR_INT("no output requested", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pixGetDimensions(pix, &w, &h, NULL); - x = rand() % w; - y = rand() % h; - if (px) *px = x; - if (py) *py = y; - if (pval) { - pixGetPixel(pix, x, y, &val); - if ((cmap = pixGetColormap(pix)) != NULL) { - pixcmapGetColor(cmap, val, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, pval); - } else { - *pval = val; - } - } - - return 0; -} - - -/*! - * \brief pixClearPixel() - * - * \param[in] pix any depth; warning if colormapped - * \param[in] x,y pixel coords - * \return 0 if OK; 1 or 2 on error. - * - * Notes: - * (1) If the point is outside the image, this returns an error (2), - * with 0 in %pval. To avoid spamming output, it fails silently. - */ -l_ok -pixClearPixel(PIX *pix, - l_int32 x, - l_int32 y) -{ -l_int32 w, h, d, wpl; -l_uint32 *line, *data; - - PROCNAME("pixClearPixel"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (pixGetColormap(pix)) - L_WARNING("cmapped: setting to 0 may not be intended\n", procName); - pixGetDimensions(pix, &w, &h, &d); - if (x < 0 || x >= w || y < 0 || y >= h) - return 2; - - wpl = pixGetWpl(pix); - data = pixGetData(pix); - line = data + y * wpl; - switch (d) - { - case 1: - CLEAR_DATA_BIT(line, x); - break; - case 2: - CLEAR_DATA_DIBIT(line, x); - break; - case 4: - CLEAR_DATA_QBIT(line, x); - break; - case 8: - SET_DATA_BYTE(line, x, 0); - break; - case 16: - SET_DATA_TWO_BYTES(line, x, 0); - break; - case 32: - line[x] = 0; - break; - default: - return ERROR_INT("depth must be in {1,2,4,8,16,32} bpp", procName, 1); - } - - return 0; -} - - -/*! - * \brief pixFlipPixel() - * - * \param[in] pix any depth, warning if colormapped - * \param[in] x,y pixel coords - * \return 0 if OK; 1 or 2 on error - * - * Notes: - * (1) If the point is outside the image, this returns an error (2), - * with 0 in %pval. To avoid spamming output, it fails silently. - */ -l_ok -pixFlipPixel(PIX *pix, - l_int32 x, - l_int32 y) -{ -l_int32 w, h, d, wpl; -l_uint32 val; -l_uint32 *line, *data; - - PROCNAME("pixFlipPixel"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (pixGetColormap(pix)) - L_WARNING("cmapped: setting to 0 may not be intended\n", procName); - pixGetDimensions(pix, &w, &h, &d); - if (x < 0 || x >= w || y < 0 || y >= h) - return 2; - - data = pixGetData(pix); - wpl = pixGetWpl(pix); - line = data + y * wpl; - switch (d) - { - case 1: - val = GET_DATA_BIT(line, x); - if (val) - CLEAR_DATA_BIT(line, x); - else - SET_DATA_BIT(line, x); - break; - case 2: - val = GET_DATA_DIBIT(line, x); - val ^= 0x3; - SET_DATA_DIBIT(line, x, val); - break; - case 4: - val = GET_DATA_QBIT(line, x); - val ^= 0xf; - SET_DATA_QBIT(line, x, val); - break; - case 8: - val = GET_DATA_BYTE(line, x); - val ^= 0xff; - SET_DATA_BYTE(line, x, val); - break; - case 16: - val = GET_DATA_TWO_BYTES(line, x); - val ^= 0xffff; - SET_DATA_TWO_BYTES(line, x, val); - break; - case 32: - val = line[x] ^ 0xffffffff; - line[x] = val; - break; - default: - return ERROR_INT("depth must be in {1,2,4,8,16,32} bpp", procName, 1); - } - - return 0; -} - - -/*! - * \brief setPixelLow() - * - * \param[in] line ptr to beginning of line, - * \param[in] x pixel location in line - * \param[in] depth bpp - * \param[in] val to be inserted - * \return void - * - *
- * Notes:
- *      (1) Caution: input variables are not checked!
- * 
- */ -void -setPixelLow(l_uint32 *line, - l_int32 x, - l_int32 depth, - l_uint32 val) -{ - switch (depth) - { - case 1: - if (val) - SET_DATA_BIT(line, x); - else - CLEAR_DATA_BIT(line, x); - break; - case 2: - SET_DATA_DIBIT(line, x, val); - break; - case 4: - SET_DATA_QBIT(line, x, val); - break; - case 8: - SET_DATA_BYTE(line, x, val); - break; - case 16: - SET_DATA_TWO_BYTES(line, x, val); - break; - case 32: - line[x] = val; - break; - default: - lept_stderr("illegal depth in setPixelLow()\n"); - } - - return; -} - - -/*-------------------------------------------------------------* - * Find black or white value * - *-------------------------------------------------------------*/ -/*! - * \brief pixGetBlackOrWhiteVal() - * - * \param[in] pixs all depths; cmap ok - * \param[in] op L_GET_BLACK_VAL, L_GET_WHITE_VAL - * \param[out] pval pixel value - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Side effect.  For a colormapped image, if the requested
- *          color is not present and there is room to add it in the cmap,
- *          it is added and the new index is returned.  If there is no room,
- *          the index of the closest color in intensity is returned.
- * 
- */ -l_ok -pixGetBlackOrWhiteVal(PIX *pixs, - l_int32 op, - l_uint32 *pval) -{ -l_int32 d, val; -PIXCMAP *cmap; - - PROCNAME("pixGetBlackOrWhiteVal"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (op != L_GET_BLACK_VAL && op != L_GET_WHITE_VAL) - return ERROR_INT("invalid op", procName, 1); - - cmap = pixGetColormap(pixs); - d = pixGetDepth(pixs); - if (!cmap) { - if ((d == 1 && op == L_GET_WHITE_VAL) || - (d > 1 && op == L_GET_BLACK_VAL)) { /* min val */ - val = 0; - } else { /* max val */ - val = (d == 32) ? 0xffffff00 : (1 << d) - 1; - } - } else { /* handle colormap */ - if (op == L_GET_BLACK_VAL) - pixcmapAddBlackOrWhite(cmap, 0, &val); - else /* L_GET_WHITE_VAL */ - pixcmapAddBlackOrWhite(cmap, 1, &val); - } - *pval = val; - - return 0; -} - - -/*-------------------------------------------------------------* - * Full image clear/set/set-to-arbitrary-value/invert * - *-------------------------------------------------------------*/ -/*! - * \brief pixClearAll() - * - * \param[in] pix all depths; use cmapped with caution - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Clears all data to 0.  For 1 bpp, this is white; for grayscale
- *          or color, this is black.
- *      (2) Caution: for colormapped pix, this sets the color to the first
- *          one in the colormap.  Be sure that this is the intended color!
- * 
- */ -l_ok -pixClearAll(PIX *pix) -{ - PROCNAME("pixClearAll"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pixRasterop(pix, 0, 0, pixGetWidth(pix), pixGetHeight(pix), - PIX_CLR, NULL, 0, 0); - return 0; -} - - -/*! - * \brief pixSetAll() - * - * \param[in] pix all depths; use cmapped with caution - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Sets all data to 1.  For 1 bpp, this is black; for grayscale
- *          or color, this is white.
- *      (2) Caution: for colormapped pix, this sets the pixel value to the
- *          maximum value supported by the colormap: 2^d - 1.  However, this
- *          color may not be defined, because the colormap may not be full.
- * 
- */ -l_ok -pixSetAll(PIX *pix) -{ -l_int32 n; -PIXCMAP *cmap; - - PROCNAME("pixSetAll"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if ((cmap = pixGetColormap(pix)) != NULL) { - n = pixcmapGetCount(cmap); - if (n < cmap->nalloc) /* cmap is not full */ - return ERROR_INT("cmap entry does not exist", procName, 1); - } - - pixRasterop(pix, 0, 0, pixGetWidth(pix), pixGetHeight(pix), - PIX_SET, NULL, 0, 0); - return 0; -} - - -/*! - * \brief pixSetAllGray() - * - * \param[in] pix all depths, cmap ok - * \param[in] grayval in range 0 ... 255 - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) N.B.  For all images, %grayval == 0 represents black and
- *          %grayval == 255 represents white.
- *      (2) For depth < 8, we do our best to approximate the gray level.
- *          For 1 bpp images, any %grayval < 128 is black; >= 128 is white.
- *          For 32 bpp images, each r,g,b component is set to %grayval,
- *          and the alpha component is preserved.
- *      (3) If pix is colormapped, it adds the gray value, replicated in
- *          all components, to the colormap if it's not there and there
- *          is room.  If the colormap is full, it finds the closest color in
- *          L2 distance of components.  This index is written to all pixels.
- * 
- */ -l_ok -pixSetAllGray(PIX *pix, - l_int32 grayval) -{ -l_int32 d, spp, index; -l_uint32 val32; -PIX *alpha; -PIXCMAP *cmap; - - PROCNAME("pixSetAllGray"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (grayval < 0) { - L_WARNING("grayval < 0; setting to 0\n", procName); - grayval = 0; - } else if (grayval > 255) { - L_WARNING("grayval > 255; setting to 255\n", procName); - grayval = 255; - } - - /* Handle the colormap case */ - cmap = pixGetColormap(pix); - if (cmap) { - pixcmapAddNearestColor(cmap, grayval, grayval, grayval, &index); - pixSetAllArbitrary(pix, index); - return 0; - } - - /* Non-cmapped */ - d = pixGetDepth(pix); - spp = pixGetSpp(pix); - if (d == 1) { - if (grayval < 128) /* black */ - pixSetAll(pix); - else - pixClearAll(pix); /* white */ - } else if (d < 8) { - grayval >>= 8 - d; - pixSetAllArbitrary(pix, grayval); - } else if (d == 8) { - pixSetAllArbitrary(pix, grayval); - } else if (d == 16) { - grayval |= (grayval << 8); - pixSetAllArbitrary(pix, grayval); - } else if (d == 32 && spp == 3) { - composeRGBPixel(grayval, grayval, grayval, &val32); - pixSetAllArbitrary(pix, val32); - } else if (d == 32 && spp == 4) { - alpha = pixGetRGBComponent(pix, L_ALPHA_CHANNEL); - composeRGBPixel(grayval, grayval, grayval, &val32); - pixSetAllArbitrary(pix, val32); - pixSetRGBComponent(pix, alpha, L_ALPHA_CHANNEL); - pixDestroy(&alpha); - } else { - L_ERROR("invalid depth: %d\n", procName, d); - return 1; - } - - return 0; -} - - -/*! - * \brief pixSetAllArbitrary() - * - * \param[in] pix all depths; use cmapped with caution - * \param[in] val value to set all pixels - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Caution 1!  For colormapped pix, %val is used as an index
- *          into a colormap.  Be sure that index refers to the intended color.
- *          If the color is not in the colormap, you should first add it
- *          and then call this function.
- *      (2) Caution 2!  For 32 bpp pix, the interpretation of the LSB
- *          of %val depends on whether spp == 3 (RGB) or spp == 4 (RGBA).
- *          For RGB, the LSB is ignored in image transformations.
- *          For RGBA, the LSB is interpreted as the alpha (transparency)
- *          component; full transparency has alpha == 0x0, whereas
- *          full opacity has alpha = 0xff.  An RGBA image with full
- *          opacity behaves like an RGB image.
- *      (3) As an example of (2), suppose you want to initialize a 32 bpp
- *          pix with partial opacity, say 0xee337788.  If the pix is 3 spp,
- *          the 0x88 alpha component will be ignored and may be changed
- *          in subsequent processing.  However, if the pix is 4 spp, the
- *          alpha component will be retained and used. The function
- *          pixCreate(w, h, 32) makes an RGB image by default, and
- *          pixSetSpp(pix, 4) can be used to promote an RGB image to RGBA.
- * 
- */ -l_ok -pixSetAllArbitrary(PIX *pix, - l_uint32 val) -{ -l_int32 n, i, j, w, h, d, wpl, npix; -l_uint32 maxval, wordval; -l_uint32 *data, *line; -PIXCMAP *cmap; - - PROCNAME("pixSetAllArbitrary"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - /* If colormapped, make sure that val is less than the size - * of the cmap array. */ - if ((cmap = pixGetColormap(pix)) != NULL) { - n = pixcmapGetCount(cmap); - if (val >= n) { - L_WARNING("index not in colormap; using last color\n", procName); - val = n - 1; - } - } - - /* Make sure val isn't too large for the pixel depth. - * If it is too large, set the pixel color to white. */ - pixGetDimensions(pix, &w, &h, &d); - if (d < 32) { - maxval = (1 << d) - 1; - if (val > maxval) { - L_WARNING("val = %d too large for depth; using maxval = %d\n", - procName, val, maxval); - val = maxval; - } - } - - /* Set up word to tile with */ - wordval = 0; - npix = 32 / d; /* number of pixels per 32 bit word */ - for (j = 0; j < npix; j++) - wordval |= (val << (j * d)); - wpl = pixGetWpl(pix); - data = pixGetData(pix); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < wpl; j++) { - *(line + j) = wordval; - } - } - return 0; -} - - -/*! - * \brief pixSetBlackOrWhite() - * - * \param[in] pixs all depths; cmap ok - * \param[in] op L_SET_BLACK, L_SET_WHITE - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Function for setting all pixels in an image to either black
- *          or white.
- *      (2) If pixs is colormapped, it adds black or white to the
- *          colormap if it's not there and there is room.  If the colormap
- *          is full, it finds the closest color in intensity.
- *          This index is written to all pixels.
- * 
- */ -l_ok -pixSetBlackOrWhite(PIX *pixs, - l_int32 op) -{ -l_int32 d, index; -PIXCMAP *cmap; - - PROCNAME("pixSetBlackOrWhite"); - - if (!pixs) - return ERROR_INT("pix not defined", procName, 1); - if (op != L_SET_BLACK && op != L_SET_WHITE) - return ERROR_INT("invalid op", procName, 1); - - cmap = pixGetColormap(pixs); - d = pixGetDepth(pixs); - if (!cmap) { - if ((d == 1 && op == L_SET_BLACK) || (d > 1 && op == L_SET_WHITE)) - pixSetAll(pixs); - else - pixClearAll(pixs); - } else { /* handle colormap */ - if (op == L_SET_BLACK) - pixcmapAddBlackOrWhite(cmap, 0, &index); - else /* L_SET_WHITE */ - pixcmapAddBlackOrWhite(cmap, 1, &index); - pixSetAllArbitrary(pixs, index); - } - - return 0; -} - - -/*! - * \brief pixSetComponentArbitrary() - * - * \param[in] pix 32 bpp - * \param[in] comp COLOR_RED, COLOR_GREEN, COLOR_BLUE, L_ALPHA_CHANNEL - * \param[in] val value to set this component - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) For example, this can be used to set the alpha component to opaque:
- *              pixSetComponentArbitrary(pix, L_ALPHA_CHANNEL, 255)
- * 
- */ -l_ok -pixSetComponentArbitrary(PIX *pix, - l_int32 comp, - l_int32 val) -{ -l_int32 i, nwords; -l_uint32 mask1, mask2; -l_uint32 *data; - - PROCNAME("pixSetComponentArbitrary"); - - if (!pix || pixGetDepth(pix) != 32) - return ERROR_INT("pix not defined or not 32 bpp", procName, 1); - if (comp != COLOR_RED && comp != COLOR_GREEN && comp != COLOR_BLUE && - comp != L_ALPHA_CHANNEL) - return ERROR_INT("invalid component", procName, 1); - if (val < 0 || val > 255) - return ERROR_INT("val not in [0 ... 255]", procName, 1); - - mask1 = ~(255 << (8 * (3 - comp))); - mask2 = val << (8 * (3 - comp)); - nwords = pixGetHeight(pix) * pixGetWpl(pix); - data = pixGetData(pix); - for (i = 0; i < nwords; i++) { - data[i] &= mask1; /* clear out the component */ - data[i] |= mask2; /* insert the new component value */ - } - - return 0; -} - - -/*-------------------------------------------------------------* - * Rectangular region clear/set/set-to-arbitrary-value * - *-------------------------------------------------------------*/ -/*! - * \brief pixClearInRect() - * - * \param[in] pix all depths; can be cmapped - * \param[in] box in which all pixels will be cleared - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Clears all data in rect to 0.  For 1 bpp, this is white;
- *          for grayscale or color, this is black.
- *      (2) Caution: for colormapped pix, this sets the color to the first
- *          one in the colormap.  Be sure that this is the intended color!
- * 
- */ -l_ok -pixClearInRect(PIX *pix, - BOX *box) -{ -l_int32 x, y, w, h; - - PROCNAME("pixClearInRect"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - - boxGetGeometry(box, &x, &y, &w, &h); - pixRasterop(pix, x, y, w, h, PIX_CLR, NULL, 0, 0); - return 0; -} - - -/*! - * \brief pixSetInRect() - * - * \param[in] pix all depths, can be cmapped - * \param[in] box in which all pixels will be set - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Sets all data in rect to 1.  For 1 bpp, this is black;
- *          for grayscale or color, this is white.
- *      (2) Caution: for colormapped pix, this sets the pixel value to the
- *          maximum value supported by the colormap: 2^d - 1.  However, this
- *          color may not be defined, because the colormap may not be full.
- * 
- */ -l_ok -pixSetInRect(PIX *pix, - BOX *box) -{ -l_int32 n, x, y, w, h; -PIXCMAP *cmap; - - PROCNAME("pixSetInRect"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - if ((cmap = pixGetColormap(pix)) != NULL) { - n = pixcmapGetCount(cmap); - if (n < cmap->nalloc) /* cmap is not full */ - return ERROR_INT("cmap entry does not exist", procName, 1); - } - - boxGetGeometry(box, &x, &y, &w, &h); - pixRasterop(pix, x, y, w, h, PIX_SET, NULL, 0, 0); - return 0; -} - - -/*! - * \brief pixSetInRectArbitrary() - * - * \param[in] pix all depths; can be cmapped - * \param[in] box in which all pixels will be set to val - * \param[in] val value to set all pixels - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) For colormapped pix, be sure the value is the intended
- *          one in the colormap.
- *      (2) Caution: for colormapped pix, this sets each pixel in the
- *          rect to the color at the index equal to val.  Be sure that
- *          this index exists in the colormap and that it is the intended one!
- * 
- */ -l_ok -pixSetInRectArbitrary(PIX *pix, - BOX *box, - l_uint32 val) -{ -l_int32 n, x, y, xstart, xend, ystart, yend, bw, bh, w, h, d, wpl, maxval; -l_uint32 *data, *line; -BOX *boxc; -PIXCMAP *cmap; - - PROCNAME("pixSetInRectArbitrary"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - pixGetDimensions(pix, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d !=8 && d != 16 && d != 32) - return ERROR_INT("depth must be in {1,2,4,8,16,32} bpp", procName, 1); - if ((cmap = pixGetColormap(pix)) != NULL) { - n = pixcmapGetCount(cmap); - if (val >= n) { - L_WARNING("index not in colormap; using last color\n", procName); - val = n - 1; - } - } - - maxval = (d == 32) ? 0xffffff00 : (1 << d) - 1; - if (val > maxval) val = maxval; - - /* Handle the simple cases: the min and max values */ - if (val == 0) { - pixClearInRect(pix, box); - return 0; - } - if (d == 1 || - (d == 2 && val == 3) || - (d == 4 && val == 0xf) || - (d == 8 && val == 0xff) || - (d == 16 && val == 0xffff) || - (d == 32 && ((val ^ 0xffffff00) >> 8 == 0))) { - pixSetInRect(pix, box); - return 0; - } - - /* Find the overlap of box with the input pix */ - if ((boxc = boxClipToRectangle(box, w, h)) == NULL) - return ERROR_INT("no overlap of box with image", procName, 1); - boxGetGeometry(boxc, &xstart, &ystart, &bw, &bh); - xend = xstart + bw - 1; - yend = ystart + bh - 1; - boxDestroy(&boxc); - - wpl = pixGetWpl(pix); - data = pixGetData(pix); - for (y = ystart; y <= yend; y++) { - line = data + y * wpl; - for (x = xstart; x <= xend; x++) { - switch(d) - { - case 2: - SET_DATA_DIBIT(line, x, val); - break; - case 4: - SET_DATA_QBIT(line, x, val); - break; - case 8: - SET_DATA_BYTE(line, x, val); - break; - case 16: - SET_DATA_TWO_BYTES(line, x, val); - break; - case 32: - line[x] = val; - break; - default: - return ERROR_INT("depth not 2|4|8|16|32 bpp", procName, 1); - } - } - } - - return 0; -} - - -/*! - * \brief pixBlendInRect() - * - * \param[in] pixs 32 bpp rgb - * \param[in] box [optional] in which all pixels will be blended - * \param[in] val blend value; 0xrrggbb00 - * \param[in] fract fraction of color to be blended with each pixel in pixs - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This is an in-place function.  It blends the input color %val
- *          with the pixels in pixs in the specified rectangle.
- *          If no rectangle is specified, it blends over the entire image.
- * 
- */ -l_ok -pixBlendInRect(PIX *pixs, - BOX *box, - l_uint32 val, - l_float32 fract) -{ -l_int32 i, j, bx, by, bw, bh, w, h, wpls; -l_int32 prval, pgval, pbval, rval, gval, bval; -l_uint32 val32; -l_uint32 *datas, *lines; - - PROCNAME("pixBlendInRect"); - - if (!pixs || pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not defined or not 32 bpp", procName, 1); - - extractRGBValues(val, &rval, &gval, &bval); - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if (!box) { - for (i = 0; i < h; i++) { /* scan over box */ - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - val32 = *(lines + j); - extractRGBValues(val32, &prval, &pgval, &pbval); - prval = (l_int32)((1. - fract) * prval + fract * rval); - pgval = (l_int32)((1. - fract) * pgval + fract * gval); - pbval = (l_int32)((1. - fract) * pbval + fract * bval); - composeRGBPixel(prval, pgval, pbval, &val32); - *(lines + j) = val32; - } - } - return 0; - } - - boxGetGeometry(box, &bx, &by, &bw, &bh); - for (i = 0; i < bh; i++) { /* scan over box */ - if (by + i < 0 || by + i >= h) continue; - lines = datas + (by + i) * wpls; - for (j = 0; j < bw; j++) { - if (bx + j < 0 || bx + j >= w) continue; - val32 = *(lines + bx + j); - extractRGBValues(val32, &prval, &pgval, &pbval); - prval = (l_int32)((1. - fract) * prval + fract * rval); - pgval = (l_int32)((1. - fract) * pgval + fract * gval); - pbval = (l_int32)((1. - fract) * pbval + fract * bval); - composeRGBPixel(prval, pgval, pbval, &val32); - *(lines + bx + j) = val32; - } - } - return 0; -} - - -/*-------------------------------------------------------------* - * Set pad bits * - *-------------------------------------------------------------*/ -/*! - * \brief pixSetPadBits() - * - * \param[in] pix 1, 2, 4, 8, 16, 32 bpp - * \param[in] val 0 or 1 - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The pad bits are the bits that expand each scanline to a
- *          multiple of 32 bits.  They are usually not used in
- *          image processing operations.  When boundary conditions
- *          are important, as in seedfill, they must be set properly.
- *      (2) This sets the value of the pad bits (if any) in the last
- *          32-bit word in each scanline.
- *      (3) For 32 bpp pix, there are no pad bits, so this is a no-op.
- *      (4) When writing formatted output, such as tiff, png or jpeg,
- *          the pad bits have no effect on the raster image that is
- *          generated by reading back from the file.  However, in some
- *          cases, the compressed file itself will depend on the pad
- *          bits.  This is seen, for example, in Windows with 2 and 4 bpp
- *          tiff-compressed images that have pad bits on each scanline.
- *          It is sometimes convenient to use a golden file with a
- *          byte-by-byte check to verify invariance.  Consequently,
- *          and because setting the pad bits is cheap, the pad bits are
- *          set to 0 before writing these compressed files.
- * 
- */ -l_ok -pixSetPadBits(PIX *pix, - l_int32 val) -{ -l_int32 i, w, h, d, wpl, endbits, fullwords; -l_uint32 mask; -l_uint32 *data, *pword; - - PROCNAME("pixSetPadBits"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pixGetDimensions(pix, &w, &h, &d); - if (d == 32) /* no padding exists for 32 bpp */ - return 0; - - data = pixGetData(pix); - wpl = pixGetWpl(pix); - endbits = 32 - (((l_int64)w * d) % 32); - if (endbits == 32) /* no partial word */ - return 0; - fullwords = (1LL * w * d) / 32; - mask = rmask32[endbits]; - if (val == 0) - mask = ~mask; - - for (i = 0; i < h; i++) { - pword = data + i * wpl + fullwords; - if (val == 0) /* clear */ - *pword = *pword & mask; - else /* set */ - *pword = *pword | mask; - } - - return 0; -} - - -/*! - * \brief pixSetPadBitsBand() - * - * \param[in] pix 1, 2, 4, 8, 16, 32 bpp - * \param[in] by starting y value of band - * \param[in] bh height of band - * \param[in] val 0 or 1 - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The pad bits are the bits that expand each scanline to a
- *          multiple of 32 bits.  They are usually not used in
- *          image processing operations.  When boundary conditions
- *          are important, as in seedfill, they must be set properly.
- *      (2) This sets the value of the pad bits (if any) in the last
- *          32-bit word in each scanline, within the specified
- *          band of raster lines.
- *      (3) For 32 bpp pix, there are no pad bits, so this is a no-op.
- * 
- */ -l_ok -pixSetPadBitsBand(PIX *pix, - l_int32 by, - l_int32 bh, - l_int32 val) -{ -l_int32 i, w, h, d, wpl, endbits, fullwords; -l_uint32 mask; -l_uint32 *data, *pword; - - PROCNAME("pixSetPadBitsBand"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pixGetDimensions(pix, &w, &h, &d); - if (d == 32) /* no padding exists for 32 bpp */ - return 0; - - if (by < 0) - by = 0; - if (by >= h) - return ERROR_INT("start y not in image", procName, 1); - if (by + bh > h) - bh = h - by; - - data = pixGetData(pix); - wpl = pixGetWpl(pix); - endbits = 32 - (((l_int64)w * d) % 32); - if (endbits == 32) /* no partial word */ - return 0; - fullwords = (l_int64)w * d / 32; - - mask = rmask32[endbits]; - if (val == 0) - mask = ~mask; - - for (i = by; i < by + bh; i++) { - pword = data + i * wpl + fullwords; - if (val == 0) /* clear */ - *pword = *pword & mask; - else /* set */ - *pword = *pword | mask; - } - - return 0; -} - - -/*-------------------------------------------------------------* - * Set border pixels * - *-------------------------------------------------------------*/ -/*! - * \brief pixSetOrClearBorder() - * - * \param[in] pixs all depths - * \param[in] left, right, top, bot amount to set or clear - * \param[in] op operation PIX_SET or PIX_CLR - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The border region is defined to be the region in the
- *          image within a specific distance of each edge.  Here, we
- *          allow the pixels within a specified distance of each
- *          edge to be set independently.  This either sets or
- *          clears all pixels in the border region.
- *      (2) For binary images, use PIX_SET for black and PIX_CLR for white.
- *      (3) For grayscale or color images, use PIX_SET for white
- *          and PIX_CLR for black.
- * 
- */ -l_ok -pixSetOrClearBorder(PIX *pixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot, - l_int32 op) -{ -l_int32 w, h; - - PROCNAME("pixSetOrClearBorder"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (op != PIX_SET && op != PIX_CLR) - return ERROR_INT("op must be PIX_SET or PIX_CLR", procName, 1); - - pixGetDimensions(pixs, &w, &h, NULL); - pixRasterop(pixs, 0, 0, left, h, op, NULL, 0, 0); - pixRasterop(pixs, w - right, 0, right, h, op, NULL, 0, 0); - pixRasterop(pixs, 0, 0, w, top, op, NULL, 0, 0); - pixRasterop(pixs, 0, h - bot, w, bot, op, NULL, 0, 0); - - return 0; -} - - -/*! - * \brief pixSetBorderVal() - * - * \param[in] pixs 8, 16 or 32 bpp - * \param[in] left, right, top, bot amount to set - * \param[in] val value to set at each border pixel - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The border region is defined to be the region in the
- *          image within a specific distance of each edge.  Here, we
- *          allow the pixels within a specified distance of each
- *          edge to be set independently.  This sets the pixels
- *          in the border region to the given input value.
- *      (2) For efficiency, use pixSetOrClearBorder() if
- *          you're setting the border to either black or white.
- *      (3) If d != 32, the input value should be masked off
- *          to the appropriate number of least significant bits.
- *      (4) The code is easily generalized for 2 or 4 bpp.
- * 
- */ -l_ok -pixSetBorderVal(PIX *pixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot, - l_uint32 val) -{ -l_int32 w, h, d, wpls, i, j, bstart, rstart; -l_uint32 *datas, *lines; - - PROCNAME("pixSetBorderVal"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 && d != 16 && d != 32) - return ERROR_INT("depth must be 8, 16 or 32 bpp", procName, 1); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if (d == 8) { - val &= 0xff; - for (i = 0; i < top; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) - SET_DATA_BYTE(lines, j, val); - } - rstart = w - right; - bstart = h - bot; - for (i = top; i < bstart; i++) { - lines = datas + i * wpls; - for (j = 0; j < left; j++) - SET_DATA_BYTE(lines, j, val); - for (j = rstart; j < w; j++) - SET_DATA_BYTE(lines, j, val); - } - for (i = bstart; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) - SET_DATA_BYTE(lines, j, val); - } - } else if (d == 16) { - val &= 0xffff; - for (i = 0; i < top; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) - SET_DATA_TWO_BYTES(lines, j, val); - } - rstart = w - right; - bstart = h - bot; - for (i = top; i < bstart; i++) { - lines = datas + i * wpls; - for (j = 0; j < left; j++) - SET_DATA_TWO_BYTES(lines, j, val); - for (j = rstart; j < w; j++) - SET_DATA_TWO_BYTES(lines, j, val); - } - for (i = bstart; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) - SET_DATA_TWO_BYTES(lines, j, val); - } - } else { /* d == 32 */ - for (i = 0; i < top; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) - *(lines + j) = val; - } - rstart = w - right; - bstart = h - bot; - for (i = top; i < bstart; i++) { - lines = datas + i * wpls; - for (j = 0; j < left; j++) - *(lines + j) = val; - for (j = rstart; j < w; j++) - *(lines + j) = val; - } - for (i = bstart; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) - *(lines + j) = val; - } - } - - return 0; -} - - -/*! - * \brief pixSetBorderRingVal() - * - * \param[in] pixs any depth; cmap OK - * \param[in] dist distance from outside; must be > 0; first ring is 1 - * \param[in] val value to set at each border pixel - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The rings are single-pixel-wide rectangular sets of
- *          pixels at a given distance from the edge of the pix.
- *          This sets all pixels in a given ring to a value.
- * 
- */ -l_ok -pixSetBorderRingVal(PIX *pixs, - l_int32 dist, - l_uint32 val) -{ -l_int32 w, h, d, i, j, xend, yend; - - PROCNAME("pixSetBorderRingVal"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (dist < 1) - return ERROR_INT("dist must be > 0", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (w < 2 * dist + 1 || h < 2 * dist + 1) - return ERROR_INT("ring doesn't exist", procName, 1); - if (d < 32 && (val >= (1 << d))) - return ERROR_INT("invalid pixel value", procName, 1); - - xend = w - dist; - yend = h - dist; - for (j = dist - 1; j <= xend; j++) - pixSetPixel(pixs, j, dist - 1, val); - for (j = dist - 1; j <= xend; j++) - pixSetPixel(pixs, j, yend, val); - for (i = dist - 1; i <= yend; i++) - pixSetPixel(pixs, dist - 1, i, val); - for (i = dist - 1; i <= yend; i++) - pixSetPixel(pixs, xend, i, val); - - return 0; -} - - -/*! - * \brief pixSetMirroredBorder() - * - * \param[in] pixs all depths; colormap ok - * \param[in] left, right, top, bot number of pixels to set - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This applies what is effectively mirror boundary conditions
- *          to a border region in the image.  It is in-place.
- *      (2) This is useful for setting pixels near the border to a
- *          value representative of the near pixels to the interior.
- *      (3) The general pixRasterop() is used for an in-place operation here
- *          because there is no overlap between the src and dest rectangles.
- * 
- */ -l_ok -pixSetMirroredBorder(PIX *pixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot) -{ -l_int32 i, j, w, h; - - PROCNAME("pixSetMirroredBorder"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - pixGetDimensions(pixs, &w, &h, NULL); - for (j = 0; j < left; j++) - pixRasterop(pixs, left - 1 - j, top, 1, h - top - bot, PIX_SRC, - pixs, left + j, top); - for (j = 0; j < right; j++) - pixRasterop(pixs, w - right + j, top, 1, h - top - bot, PIX_SRC, - pixs, w - right - 1 - j, top); - for (i = 0; i < top; i++) - pixRasterop(pixs, 0, top - 1 - i, w, 1, PIX_SRC, - pixs, 0, top + i); - for (i = 0; i < bot; i++) - pixRasterop(pixs, 0, h - bot + i, w, 1, PIX_SRC, - pixs, 0, h - bot - 1 - i); - - return 0; -} - - -/*! - * \brief pixCopyBorder() - * - * \param[in] pixd all depths; colormap ok; can be NULL - * \param[in] pixs same depth and size as pixd - * \param[in] left, right, top, bot number of pixels to copy - * \return pixd, or NULL on error if pixd is not defined - * - *
- * Notes:
- *      (1) pixd can be null, but otherwise it must be the same size
- *          and depth as pixs.  Always returns pixd.
- *      (2) This is useful in situations where by setting a few border
- *          pixels we can avoid having to copy all pixels in pixs into
- *          pixd as an initialization step for some operation.
- *          Nevertheless, for safety, if making a new pixd, all the
- *          non-border pixels are initialized to 0.
- * 
- */ -PIX * -pixCopyBorder(PIX *pixd, - PIX *pixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot) -{ -l_int32 w, h; - - PROCNAME("pixCopyBorder"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - - if (pixd) { - if (pixd == pixs) { - L_WARNING("same: nothing to do\n", procName); - return pixd; - } else if (!pixSizesEqual(pixs, pixd)) { - return (PIX *)ERROR_PTR("pixs and pixd sizes differ", - procName, pixd); - } - } else { - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, pixd); - } - - pixGetDimensions(pixs, &w, &h, NULL); - pixRasterop(pixd, 0, 0, left, h, PIX_SRC, pixs, 0, 0); - pixRasterop(pixd, w - right, 0, right, h, PIX_SRC, pixs, w - right, 0); - pixRasterop(pixd, 0, 0, w, top, PIX_SRC, pixs, 0, 0); - pixRasterop(pixd, 0, h - bot, w, bot, PIX_SRC, pixs, 0, h - bot); - return pixd; -} - - - -/*-------------------------------------------------------------* - * Add and remove border * - *-------------------------------------------------------------*/ -/*! - * \brief pixAddBorder() - * - * \param[in] pixs all depths; colormap ok - * \param[in] npix number of pixels to be added to each side - * \param[in] val value of added border pixels - * \return pixd with the added exterior pixels, or NULL on error - * - *
- * Notes:
- *      (1) See pixGetBlackOrWhiteVal() for values of black and white pixels.
- * 
- */ -PIX * -pixAddBorder(PIX *pixs, - l_int32 npix, - l_uint32 val) -{ - PROCNAME("pixAddBorder"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (npix == 0) - return pixClone(pixs); - return pixAddBorderGeneral(pixs, npix, npix, npix, npix, val); -} - - -/*! - * \brief pixAddBlackOrWhiteBorder() - * - * \param[in] pixs all depths; colormap ok - * \param[in] left, right, top, bot number of pixels added - * \param[in] op L_GET_BLACK_VAL, L_GET_WHITE_VAL - * \return pixd with the added exterior pixels, or NULL on error - * - *
- * Notes:
- *      (1) See pixGetBlackOrWhiteVal() for possible side effect (adding
- *          a color to a colormap).
- *      (2) The only complication is that pixs may have a colormap.
- *          There are two ways to add the black or white border:
- *          (a) As done here (simplest, most efficient)
- *          (b) l_int32 ws, hs, d;
- *              pixGetDimensions(pixs, &ws, &hs, &d);
- *              Pix *pixd = pixCreate(ws + left + right, hs + top + bot, d);
- *              PixColormap *cmap = pixGetColormap(pixs);
- *              if (cmap != NULL)
- *                  pixSetColormap(pixd, pixcmapCopy(cmap));
- *              pixSetBlackOrWhite(pixd, L_SET_WHITE);  // uses cmap
- *              pixRasterop(pixd, left, top, ws, hs, PIX_SET, pixs, 0, 0);
- * 
- */ -PIX * -pixAddBlackOrWhiteBorder(PIX *pixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot, - l_int32 op) -{ -l_uint32 val; - - PROCNAME("pixAddBlackOrWhiteBorder"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (op != L_GET_BLACK_VAL && op != L_GET_WHITE_VAL) - return (PIX *)ERROR_PTR("invalid op", procName, NULL); - - pixGetBlackOrWhiteVal(pixs, op, &val); - return pixAddBorderGeneral(pixs, left, right, top, bot, val); -} - - -/*! - * \brief pixAddBorderGeneral() - * - * \param[in] pixs all depths; colormap ok - * \param[in] left, right, top, bot number of pixels added - * \param[in] val value of added border pixels - * \return pixd with the added exterior pixels, or NULL on error - * - *
- * Notes:
- *      (1) For binary images:
- *             white:  val = 0
- *             black:  val = 1
- *          For grayscale images:
- *             white:  val = 2 ** d - 1
- *             black:  val = 0
- *          For rgb color images:
- *             white:  val = 0xffffff00
- *             black:  val = 0
- *          For colormapped images, set val to the appropriate colormap index.
- *      (2) If the added border is either black or white, you can use
- *             pixAddBlackOrWhiteBorder()
- *          The black and white values for all images can be found with
- *             pixGetBlackOrWhiteVal()
- *          which, if pixs is cmapped, may add an entry to the colormap.
- *          Alternatively, if pixs has a colormap, you can find the index
- *          of the pixel whose intensity is closest to white or black:
- *             white: pixcmapGetRankIntensity(cmap, 1.0, &index);
- *             black: pixcmapGetRankIntensity(cmap, 0.0, &index);
- *          and use that for val.
- * 
- */ -PIX * -pixAddBorderGeneral(PIX *pixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot, - l_uint32 val) -{ -l_int32 ws, hs, wd, hd, d, maxval, op; -PIX *pixd; - - PROCNAME("pixAddBorderGeneral"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (left < 0 || right < 0 || top < 0 || bot < 0) - return (PIX *)ERROR_PTR("negative border added!", procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, &d); - wd = ws + left + right; - hd = hs + top + bot; - if ((pixd = pixCreateNoInit(wd, hd, d)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyColormap(pixd, pixs); - - /* Set the new border pixels */ - maxval = (d == 32) ? 0xffffff00 : (1 << d) - 1; - op = UNDEF; - if (val == 0) - op = PIX_CLR; - else if (val >= maxval) - op = PIX_SET; - if (op == UNDEF) { - pixSetAllArbitrary(pixd, val); - } else { /* just set or clear the border pixels */ - pixRasterop(pixd, 0, 0, left, hd, op, NULL, 0, 0); - pixRasterop(pixd, wd - right, 0, right, hd, op, NULL, 0, 0); - pixRasterop(pixd, 0, 0, wd, top, op, NULL, 0, 0); - pixRasterop(pixd, 0, hd - bot, wd, bot, op, NULL, 0, 0); - } - - /* Copy pixs into the interior */ - pixRasterop(pixd, left, top, ws, hs, PIX_SRC, pixs, 0, 0); - return pixd; -} - - -/*! - * \brief pixRemoveBorder() - * - * \param[in] pixs all depths; colormap ok - * \param[in] npix number to be removed from each of the 4 sides - * \return pixd with pixels removed around border, or NULL on error - */ -PIX * -pixRemoveBorder(PIX *pixs, - l_int32 npix) -{ - PROCNAME("pixRemoveBorder"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (npix == 0) - return pixClone(pixs); - return pixRemoveBorderGeneral(pixs, npix, npix, npix, npix); -} - - -/*! - * \brief pixRemoveBorderGeneral() - * - * \param[in] pixs all depths; colormap ok - * \param[in] left, right, top, bot number of pixels removed - * \return pixd with pixels removed around border, or NULL on error - */ -PIX * -pixRemoveBorderGeneral(PIX *pixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot) -{ -l_int32 ws, hs, wd, hd, d; -PIX *pixd; - - PROCNAME("pixRemoveBorderGeneral"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (left < 0 || right < 0 || top < 0 || bot < 0) - return (PIX *)ERROR_PTR("negative border removed!", procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, &d); - wd = ws - left - right; - hd = hs - top - bot; - if (wd <= 0) - return (PIX *)ERROR_PTR("width must be > 0", procName, NULL); - if (hd <= 0) - return (PIX *)ERROR_PTR("height must be > 0", procName, NULL); - if ((pixd = pixCreateNoInit(wd, hd, d)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopySpp(pixd, pixs); - pixCopyColormap(pixd, pixs); - - pixRasterop(pixd, 0, 0, wd, hd, PIX_SRC, pixs, left, top); - if (pixGetDepth(pixs) == 32 && pixGetSpp(pixs) == 4) - pixShiftAndTransferAlpha(pixd, pixs, -left, -top); - return pixd; -} - - -/*! - * \brief pixRemoveBorderToSize() - * - * \param[in] pixs all depths; colormap ok - * \param[in] wd target width; use 0 if only removing from height - * \param[in] hd target height; use 0 if only removing from width - * \return pixd with pixels removed around border, or NULL on error - * - *
- * Notes:
- *      (1) Removes pixels as evenly as possible from the sides of the
- *          image, leaving the central part.
- *      (2) Returns clone if no pixels requested removed, or the target
- *          sizes are larger than the image.
- * 
- */ -PIX * -pixRemoveBorderToSize(PIX *pixs, - l_int32 wd, - l_int32 hd) -{ -l_int32 w, h, top, bot, left, right, delta; - - PROCNAME("pixRemoveBorderToSize"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - if ((wd <= 0 || wd >= w) && (hd <= 0 || hd >= h)) - return pixClone(pixs); - - left = right = (w - wd) / 2; - delta = w - 2 * left - wd; - right += delta; - top = bot = (h - hd) / 2; - delta = h - hd - 2 * top; - bot += delta; - if (wd <= 0 || wd > w) - left = right = 0; - else if (hd <= 0 || hd > h) - top = bot = 0; - - return pixRemoveBorderGeneral(pixs, left, right, top, bot); -} - - -/*! - * \brief pixAddMirroredBorder() - * - * \param[in] pixs all depths; colormap ok - * \param[in] left, right, top, bot number of pixels added - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This applies what is effectively mirror boundary conditions.
- *          For the added border pixels in pixd, the pixels in pixs
- *          near the border are mirror-copied into the border region.
- *      (2) This is useful for avoiding special operations near
- *          boundaries when doing image processing operations
- *          such as rank filters and convolution.  In use, one first
- *          adds mirrored pixels to each side of the image.  The number
- *          of pixels added on each side is half the filter dimension.
- *          Then the image processing operations proceed over a
- *          region equal to the size of the original image, and
- *          write directly into a dest pix of the same size as pixs.
- *      (3) The general pixRasterop() is used for an in-place operation here
- *          because there is no overlap between the src and dest rectangles.
- * 
- */ -PIX * -pixAddMirroredBorder(PIX *pixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot) -{ -l_int32 i, j, w, h; -PIX *pixd; - - PROCNAME("pixAddMirroredBorder"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - if (left > w || right > w || top > h || bot > h) - return (PIX *)ERROR_PTR("border too large", procName, NULL); - - /* Set pixels on left, right, top and bottom, in that order */ - pixd = pixAddBorderGeneral(pixs, left, right, top, bot, 0); - for (j = 0; j < left; j++) - pixRasterop(pixd, left - 1 - j, top, 1, h, PIX_SRC, - pixd, left + j, top); - for (j = 0; j < right; j++) - pixRasterop(pixd, left + w + j, top, 1, h, PIX_SRC, - pixd, left + w - 1 - j, top); - for (i = 0; i < top; i++) - pixRasterop(pixd, 0, top - 1 - i, left + w + right, 1, PIX_SRC, - pixd, 0, top + i); - for (i = 0; i < bot; i++) - pixRasterop(pixd, 0, top + h + i, left + w + right, 1, PIX_SRC, - pixd, 0, top + h - 1 - i); - - return pixd; -} - - -/*! - * \brief pixAddRepeatedBorder() - * - * \param[in] pixs all depths; colormap ok - * \param[in] left, right, top, bot number of pixels added - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This applies a repeated border, as if the central part of
- *          the image is tiled over the plane.  So, for example, the
- *          pixels in the left border come from the right side of the image.
- *      (2) The general pixRasterop() is used for an in-place operation here
- *          because there is no overlap between the src and dest rectangles.
- * 
- */ -PIX * -pixAddRepeatedBorder(PIX *pixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot) -{ -l_int32 w, h; -PIX *pixd; - - PROCNAME("pixAddRepeatedBorder"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - if (left > w || right > w || top > h || bot > h) - return (PIX *)ERROR_PTR("border too large", procName, NULL); - - pixd = pixAddBorderGeneral(pixs, left, right, top, bot, 0); - - /* Set pixels on left, right, top and bottom, in that order */ - pixRasterop(pixd, 0, top, left, h, PIX_SRC, pixd, w, top); - pixRasterop(pixd, left + w, top, right, h, PIX_SRC, pixd, left, top); - pixRasterop(pixd, 0, 0, left + w + right, top, PIX_SRC, pixd, 0, h); - pixRasterop(pixd, 0, top + h, left + w + right, bot, PIX_SRC, pixd, 0, top); - - return pixd; -} - - -/*! - * \brief pixAddMixedBorder() - * - * \param[in] pixs all depths; colormap ok - * \param[in] left, right, top, bot number of pixels added - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This applies mirrored boundary conditions horizontally
- *          and repeated b.c. vertically.
- *      (2) It is specifically used for avoiding special operations
- *          near boundaries when convolving a hue-saturation histogram
- *          with a given window size.  The repeated b.c. are used
- *          vertically for hue, and the mirrored b.c. are used
- *          horizontally for saturation.  The number of pixels added
- *          on each side is approximately (but not quite) half the
- *          filter dimension.  The image processing operations can
- *          then proceed over a region equal to the size of the original
- *          image, and write directly into a dest pix of the same
- *          size as pixs.
- *      (3) The general pixRasterop() can be used for an in-place
- *          operation here because there is no overlap between the
- *          src and dest rectangles.
- * 
- */ -PIX * -pixAddMixedBorder(PIX *pixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot) -{ -l_int32 j, w, h; -PIX *pixd; - - PROCNAME("pixAddMixedBorder"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - if (left > w || right > w || top > h || bot > h) - return (PIX *)ERROR_PTR("border too large", procName, NULL); - - /* Set mirrored pixels on left and right; - * then set repeated pixels on top and bottom. */ - pixd = pixAddBorderGeneral(pixs, left, right, top, bot, 0); - for (j = 0; j < left; j++) - pixRasterop(pixd, left - 1 - j, top, 1, h, PIX_SRC, - pixd, left + j, top); - for (j = 0; j < right; j++) - pixRasterop(pixd, left + w + j, top, 1, h, PIX_SRC, - pixd, left + w - 1 - j, top); - pixRasterop(pixd, 0, 0, left + w + right, top, PIX_SRC, pixd, 0, h); - pixRasterop(pixd, 0, top + h, left + w + right, bot, PIX_SRC, pixd, 0, top); - - return pixd; -} - - -/*! - * \brief pixAddContinuedBorder() - * - * \param[in] pixs all depths; colormap ok - * \param[in] left, right, top, bot pixels on each side to be added - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This adds pixels on each side whose values are equal to
- *          the value on the closest boundary pixel.
- * 
- */ -PIX * -pixAddContinuedBorder(PIX *pixs, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot) -{ -l_int32 i, j, w, h; -PIX *pixd; - - PROCNAME("pixAddContinuedBorder"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - pixd = pixAddBorderGeneral(pixs, left, right, top, bot, 0); - pixGetDimensions(pixs, &w, &h, NULL); - for (j = 0; j < left; j++) - pixRasterop(pixd, j, top, 1, h, PIX_SRC, pixd, left, top); - for (j = 0; j < right; j++) - pixRasterop(pixd, left + w + j, top, 1, h, - PIX_SRC, pixd, left + w - 1, top); - for (i = 0; i < top; i++) - pixRasterop(pixd, 0, i, left + w + right, 1, PIX_SRC, pixd, 0, top); - for (i = 0; i < bot; i++) - pixRasterop(pixd, 0, top + h + i, left + w + right, 1, - PIX_SRC, pixd, 0, top + h - 1); - - return pixd; -} - - -/*-------------------------------------------------------------------* - * Helper functions using alpha * - *-------------------------------------------------------------------*/ -/*! - * \brief pixShiftAndTransferAlpha() - * - * \param[in] pixd 32 bpp - * \param[in] pixs 32 bpp - * \param[in] shiftx, shifty - * \return 0 if OK; 1 on error - */ -l_ok -pixShiftAndTransferAlpha(PIX *pixd, - PIX *pixs, - l_float32 shiftx, - l_float32 shifty) -{ -l_int32 w, h; -PIX *pix1, *pix2; - - PROCNAME("pixShiftAndTransferAlpha"); - - if (!pixs || !pixd) - return ERROR_INT("pixs and pixd not both defined", procName, 1); - if (pixGetDepth(pixs) != 32 || pixGetSpp(pixs) != 4) - return ERROR_INT("pixs not 32 bpp and 4 spp", procName, 1); - if (pixGetDepth(pixd) != 32) - return ERROR_INT("pixd not 32 bpp", procName, 1); - - if (shiftx == 0 && shifty == 0) { - pixCopyRGBComponent(pixd, pixs, L_ALPHA_CHANNEL); - return 0; - } - - pix1 = pixGetRGBComponent(pixs, L_ALPHA_CHANNEL); - pixGetDimensions(pixd, &w, &h, NULL); - pix2 = pixCreate(w, h, 8); - pixRasterop(pix2, 0, 0, w, h, PIX_SRC, pix1, -shiftx, -shifty); - pixSetRGBComponent(pixd, pix2, L_ALPHA_CHANNEL); - pixDestroy(&pix1); - pixDestroy(&pix2); - return 0; -} - - -/*! - * \brief pixDisplayLayersRGBA() - * - * \param[in] pixs cmap or 32 bpp rgba - * \param[in] val 32 bit unsigned color to use as background - * \param[in] maxw max output image width; 0 for no scaling - * \return pixd showing various image views, or NULL on error - * - *
- * Notes:
- *      (1) Use %val == 0xffffff00 for white background.
- *      (2) Three views are given:
- *           ~ the image with a fully opaque alpha
- *           ~ the alpha layer
- *           ~ the image as it would appear with a white background.
- * 
- */ -PIX * -pixDisplayLayersRGBA(PIX *pixs, - l_uint32 val, - l_int32 maxw) -{ -l_int32 w, width; -l_float32 scalefact; -PIX *pix1, *pix2, *pixd; -PIXA *pixa; -PIXCMAP *cmap; - - PROCNAME("pixDisplayLayersRGBA"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - cmap = pixGetColormap(pixs); - if (!cmap && !(pixGetDepth(pixs) == 32 && pixGetSpp(pixs) == 4)) - return (PIX *)ERROR_PTR("pixs not cmap and not 32 bpp rgba", - procName, NULL); - if ((w = pixGetWidth(pixs)) == 0) - return (PIX *)ERROR_PTR("pixs width 0 !!", procName, NULL); - - if (cmap) - pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_WITH_ALPHA); - else - pix1 = pixCopy(NULL, pixs); - - /* Scale if necessary so the output width is not larger than maxw */ - scalefact = (maxw == 0) ? 1.0 : L_MIN(1.0, (l_float32)(maxw) / w); - width = (l_int32)(scalefact * w); - - pixa = pixaCreate(3); - pixSetSpp(pix1, 3); - pixaAddPix(pixa, pix1, L_INSERT); /* show the rgb values */ - pix1 = pixGetRGBComponent(pixs, L_ALPHA_CHANNEL); - pix2 = pixConvertTo32(pix1); - pixaAddPix(pixa, pix2, L_INSERT); /* show the alpha channel */ - pixDestroy(&pix1); - pix1 = pixAlphaBlendUniform(pixs, (val & 0xffffff00)); - pixaAddPix(pixa, pix1, L_INSERT); /* with %val color bg showing */ - pixd = pixaDisplayTiledInRows(pixa, 32, width, scalefact, 0, 25, 2); - pixaDestroy(&pixa); - return pixd; -} - - -/*-------------------------------------------------------------* - * Color sample setting and extraction * - *-------------------------------------------------------------*/ -/*! - * \brief pixCreateRGBImage() - * - * \param[in] pixr 8 bpp red pix - * \param[in] pixg 8 bpp green pix - * \param[in] pixb 8 bpp blue pix - * \return 32 bpp pix, interleaved with 4 samples/pixel, - * or NULL on error - * - *
- * Notes:
- *      (1) the 4th byte, sometimes called the "alpha channel",
- *          and which is often used for blending between different
- *          images, is left with 0 value.
- *      (2) see Note (4) in pix.h for details on storage of
- *          8-bit samples within each 32-bit word.
- *      (3) This implementation, setting the r, g and b components
- *          sequentially, is much faster than setting them in parallel
- *          by constructing an RGB dest pixel and writing it to dest.
- *          The reason is there are many more cache misses when reading
- *          from 3 input images simultaneously.
- * 
- */ -PIX * -pixCreateRGBImage(PIX *pixr, - PIX *pixg, - PIX *pixb) -{ -l_int32 wr, wg, wb, hr, hg, hb, dr, dg, db; -PIX *pixd; - - PROCNAME("pixCreateRGBImage"); - - if (!pixr) - return (PIX *)ERROR_PTR("pixr not defined", procName, NULL); - if (!pixg) - return (PIX *)ERROR_PTR("pixg not defined", procName, NULL); - if (!pixb) - return (PIX *)ERROR_PTR("pixb not defined", procName, NULL); - pixGetDimensions(pixr, &wr, &hr, &dr); - pixGetDimensions(pixg, &wg, &hg, &dg); - pixGetDimensions(pixb, &wb, &hb, &db); - if (dr != 8 || dg != 8 || db != 8) - return (PIX *)ERROR_PTR("input pix not all 8 bpp", procName, NULL); - if (wr != wg || wr != wb) - return (PIX *)ERROR_PTR("widths not the same", procName, NULL); - if (hr != hg || hr != hb) - return (PIX *)ERROR_PTR("heights not the same", procName, NULL); - - if ((pixd = pixCreate(wr, hr, 32)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixr); - pixSetRGBComponent(pixd, pixr, COLOR_RED); - pixSetRGBComponent(pixd, pixg, COLOR_GREEN); - pixSetRGBComponent(pixd, pixb, COLOR_BLUE); - - return pixd; -} - - -/*! - * \brief pixGetRGBComponent() - * - * \param[in] pixs 32 bpp, or colormapped - * \param[in] comp one of {COLOR_RED, COLOR_GREEN, COLOR_BLUE, - * L_ALPHA_CHANNEL} - * \return pixd the selected 8 bpp component image of the - * input 32 bpp image or NULL on error - * - *
- * Notes:
- *      (1) Three calls to this function generate the r, g and b 8 bpp
- *          component images.  This is much faster than generating the
- *          three images in parallel, by extracting a src pixel and setting
- *          the pixels of each component image from it.  The reason is
- *          there are many more cache misses when writing to three
- *          output images simultaneously.
- * 
- */ -PIX * -pixGetRGBComponent(PIX *pixs, - l_int32 comp) -{ -l_int32 i, j, w, h, wpls, wpld, val; -l_uint32 *lines, *lined; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixGetRGBComponent"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs)) - return pixGetRGBComponentCmap(pixs, comp); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (comp != COLOR_RED && comp != COLOR_GREEN && - comp != COLOR_BLUE && comp != L_ALPHA_CHANNEL) - return (PIX *)ERROR_PTR("invalid comp", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - if ((pixd = pixCreate(w, h, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(lines + j, comp); - SET_DATA_BYTE(lined, j, val); - } - } - - return pixd; -} - - -/*! - * \brief pixSetRGBComponent() - * - * \param[in] pixd 32 bpp - * \param[in] pixs 8 bpp - * \param[in] comp one of the set: {COLOR_RED, COLOR_GREEN, - * COLOR_BLUE, L_ALPHA_CHANNEL} - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This places the 8 bpp pixel in pixs into the
- *          specified component (properly interleaved) in pixd,
- *      (2) The two images are registered to the UL corner; the sizes
- *          need not be the same, but a warning is issued if they differ.
- * 
- */ -l_ok -pixSetRGBComponent(PIX *pixd, - PIX *pixs, - l_int32 comp) -{ -l_uint8 srcbyte; -l_int32 i, j, w, h, ws, hs, wd, hd; -l_int32 wpls, wpld; -l_uint32 *lines, *lined; -l_uint32 *datas, *datad; - - PROCNAME("pixSetRGBComponent"); - - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetDepth(pixd) != 32) - return ERROR_INT("pixd not 32 bpp", procName, 1); - if (pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not 8 bpp", procName, 1); - if (comp != COLOR_RED && comp != COLOR_GREEN && - comp != COLOR_BLUE && comp != L_ALPHA_CHANNEL) - return ERROR_INT("invalid comp", procName, 1); - pixGetDimensions(pixs, &ws, &hs, NULL); - pixGetDimensions(pixd, &wd, &hd, NULL); - if (ws != wd || hs != hd) - L_WARNING("images sizes not equal\n", procName); - w = L_MIN(ws, wd); - h = L_MIN(hs, hd); - if (comp == L_ALPHA_CHANNEL) - pixSetSpp(pixd, 4); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - srcbyte = GET_DATA_BYTE(lines, j); - SET_DATA_BYTE(lined + j, comp, srcbyte); - } - } - - return 0; -} - - -/*! - * \brief pixGetRGBComponentCmap() - * - * \param[in] pixs colormapped - * \param[in] comp one of the set: {COLOR_RED, COLOR_GREEN, COLOR_BLUE} - * \return pixd the selected 8 bpp component image of the - * input cmapped image, or NULL on error - * - *
- * Notes:
- *      (1) In leptonica, we do not support alpha in colormaps.
- * 
- */ -PIX * -pixGetRGBComponentCmap(PIX *pixs, - l_int32 comp) -{ -l_int32 i, j, w, h, val, index; -l_int32 wplc, wpld; -l_uint32 *linec, *lined; -l_uint32 *datac, *datad; -PIX *pixc, *pixd; -PIXCMAP *cmap; -RGBA_QUAD *cta; - - PROCNAME("pixGetRGBComponentCmap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if ((cmap = pixGetColormap(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixs not cmapped", procName, NULL); - if (comp == L_ALPHA_CHANNEL) - return (PIX *)ERROR_PTR("alpha in cmaps not supported", procName, NULL); - if (comp != COLOR_RED && comp != COLOR_GREEN && comp != COLOR_BLUE) - return (PIX *)ERROR_PTR("invalid comp", procName, NULL); - - /* If not 8 bpp, make a cmapped 8 bpp pix */ - if (pixGetDepth(pixs) == 8) - pixc = pixClone(pixs); - else - pixc = pixConvertTo8(pixs, TRUE); - - pixGetDimensions(pixs, &w, &h, NULL); - if ((pixd = pixCreateNoInit(w, h, 8)) == NULL) { - pixDestroy(&pixc); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixCopyResolution(pixd, pixs); - wplc = pixGetWpl(pixc); - wpld = pixGetWpl(pixd); - datac = pixGetData(pixc); - datad = pixGetData(pixd); - cta = (RGBA_QUAD *)cmap->array; - - for (i = 0; i < h; i++) { - linec = datac + i * wplc; - lined = datad + i * wpld; - if (comp == COLOR_RED) { - for (j = 0; j < w; j++) { - index = GET_DATA_BYTE(linec, j); - val = cta[index].red; - SET_DATA_BYTE(lined, j, val); - } - } else if (comp == COLOR_GREEN) { - for (j = 0; j < w; j++) { - index = GET_DATA_BYTE(linec, j); - val = cta[index].green; - SET_DATA_BYTE(lined, j, val); - } - } else if (comp == COLOR_BLUE) { - for (j = 0; j < w; j++) { - index = GET_DATA_BYTE(linec, j); - val = cta[index].blue; - SET_DATA_BYTE(lined, j, val); - } - } - } - - pixDestroy(&pixc); - return pixd; -} - - -/*! - * \brief pixCopyRGBComponent() - * - * \param[in] pixd 32 bpp - * \param[in] pixs 32 bpp - * \param[in] comp one of the set: {COLOR_RED, COLOR_GREEN, - * COLOR_BLUE, L_ALPHA_CHANNEL} - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The two images are registered to the UL corner.  The sizes
- *          are usually the same, and a warning is issued if they differ.
- * 
- */ -l_ok -pixCopyRGBComponent(PIX *pixd, - PIX *pixs, - l_int32 comp) -{ -l_int32 i, j, w, h, ws, hs, wd, hd, val; -l_int32 wpls, wpld; -l_uint32 *lines, *lined; -l_uint32 *datas, *datad; - - PROCNAME("pixCopyRGBComponent"); - - if (!pixd && pixGetDepth(pixd) != 32) - return ERROR_INT("pixd not defined or not 32 bpp", procName, 1); - if (!pixs && pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not defined or not 32 bpp", procName, 1); - if (comp != COLOR_RED && comp != COLOR_GREEN && - comp != COLOR_BLUE && comp != L_ALPHA_CHANNEL) - return ERROR_INT("invalid component", procName, 1); - pixGetDimensions(pixs, &ws, &hs, NULL); - pixGetDimensions(pixd, &wd, &hd, NULL); - if (ws != wd || hs != hd) - L_WARNING("images sizes not equal\n", procName); - w = L_MIN(ws, wd); - h = L_MIN(hs, hd); - if (comp == L_ALPHA_CHANNEL) - pixSetSpp(pixd, 4); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(lines + j, comp); - SET_DATA_BYTE(lined + j, comp, val); - } - } - return 0; -} - - -/*! - * \brief composeRGBPixel() - * - * \param[in] rval, gval, bval - * \param[out] ppixel 32-bit pixel - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) All channels are 8 bits: the input values must be between
- *          0 and 255.  For speed, this is not enforced by masking
- *          with 0xff before shifting.
- *      (2) A slower implementation uses macros:
- *            SET_DATA_BYTE(ppixel, COLOR_RED, rval);
- *            SET_DATA_BYTE(ppixel, COLOR_GREEN, gval);
- *            SET_DATA_BYTE(ppixel, COLOR_BLUE, bval);
- * 
- */ -l_ok -composeRGBPixel(l_int32 rval, - l_int32 gval, - l_int32 bval, - l_uint32 *ppixel) -{ - PROCNAME("composeRGBPixel"); - - if (!ppixel) - return ERROR_INT("&pixel not defined", procName, 1); - - *ppixel = ((l_uint32)rval << L_RED_SHIFT) | - ((l_uint32)gval << L_GREEN_SHIFT) | - ((l_uint32)bval << L_BLUE_SHIFT); - return 0; -} - - -/*! - * \brief composeRGBAPixel() - * - * \param[in] rval, gval, bval, aval - * \param[out] ppixel 32-bit pixel - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) All channels are 8 bits: the input values must be between
- *          0 and 255.  For speed, this is not enforced by masking
- *          with 0xff before shifting.
- * 
- */ -l_ok -composeRGBAPixel(l_int32 rval, - l_int32 gval, - l_int32 bval, - l_int32 aval, - l_uint32 *ppixel) -{ - PROCNAME("composeRGBAPixel"); - - if (!ppixel) - return ERROR_INT("&pixel not defined", procName, 1); - - *ppixel = ((l_uint32)rval << L_RED_SHIFT) | - ((l_uint32)gval << L_GREEN_SHIFT) | - ((l_uint32)bval << L_BLUE_SHIFT) | - aval; - return 0; -} - - -/*! - * \brief extractRGBValues() - * - * \param[in] pixel 32 bit - * \param[out] prval [optional] red component - * \param[out] pgval [optional] green component - * \param[out] pbval [optional] blue component - * \return void - * - *
- * Notes:
- *      (1) A slower implementation uses macros:
- *             *prval = GET_DATA_BYTE(&pixel, COLOR_RED);
- *             *pgval = GET_DATA_BYTE(&pixel, COLOR_GREEN);
- *             *pbval = GET_DATA_BYTE(&pixel, COLOR_BLUE);
- * 
- */ -void -extractRGBValues(l_uint32 pixel, - l_int32 *prval, - l_int32 *pgval, - l_int32 *pbval) -{ - if (prval) *prval = (pixel >> L_RED_SHIFT) & 0xff; - if (pgval) *pgval = (pixel >> L_GREEN_SHIFT) & 0xff; - if (pbval) *pbval = (pixel >> L_BLUE_SHIFT) & 0xff; - return; -} - - -/*! - * \brief extractRGBAValues() - * - * \param[in] pixel 32 bit - * \param[out] prval [optional] red component - * \param[out] pgval [optional] green component - * \param[out] pbval [optional] blue component - * \param[out] paval [optional] alpha component - * \return void - */ -void -extractRGBAValues(l_uint32 pixel, - l_int32 *prval, - l_int32 *pgval, - l_int32 *pbval, - l_int32 *paval) -{ - if (prval) *prval = (pixel >> L_RED_SHIFT) & 0xff; - if (pgval) *pgval = (pixel >> L_GREEN_SHIFT) & 0xff; - if (pbval) *pbval = (pixel >> L_BLUE_SHIFT) & 0xff; - if (paval) *paval = (pixel >> L_ALPHA_SHIFT) & 0xff; - return; -} - - -/*! - * \brief extractMinMaxComponent() - * - * \param[in] pixel 32 bpp RGB - * \param[in] type L_CHOOSE_MIN or L_CHOOSE_MAX - * \return component in range [0 ... 255], or NULL on error - */ -l_int32 -extractMinMaxComponent(l_uint32 pixel, - l_int32 type) -{ -l_int32 rval, gval, bval, val; - - extractRGBValues(pixel, &rval, &gval, &bval); - if (type == L_CHOOSE_MIN) { - val = L_MIN(rval, gval); - val = L_MIN(val, bval); - } else { /* type == L_CHOOSE_MAX */ - val = L_MAX(rval, gval); - val = L_MAX(val, bval); - } - return val; -} - - -/*! - * \brief pixGetRGBLine() - * - * \param[in] pixs 32 bpp - * \param[in] row - * \param[in] bufr array of red samples; size w bytes - * \param[in] bufg array of green samples; size w bytes - * \param[in] bufb array of blue samples; size w bytes - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This puts rgb components from the input line in pixs
- *          into the given buffers.
- * 
- */ -l_ok -pixGetRGBLine(PIX *pixs, - l_int32 row, - l_uint8 *bufr, - l_uint8 *bufg, - l_uint8 *bufb) -{ -l_uint32 *lines; -l_int32 j, w, h; -l_int32 wpls; - - PROCNAME("pixGetRGBLine"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not 32 bpp", procName, 1); - if (!bufr || !bufg || !bufb) - return ERROR_INT("buffer not defined", procName, 1); - - pixGetDimensions(pixs, &w, &h, NULL); - if (row < 0 || row >= h) - return ERROR_INT("row out of bounds", procName, 1); - wpls = pixGetWpl(pixs); - lines = pixGetData(pixs) + row * wpls; - - for (j = 0; j < w; j++) { - bufr[j] = GET_DATA_BYTE(lines + j, COLOR_RED); - bufg[j] = GET_DATA_BYTE(lines + j, COLOR_GREEN); - bufb[j] = GET_DATA_BYTE(lines + j, COLOR_BLUE); - } - - return 0; -} - - -/*-------------------------------------------------------------* - * Raster line pixel setter * - *-------------------------------------------------------------*/ -/*! - * \brief setLineDataVal() - * - * \param[in] line ptr to first word in raster line data - * \param[in] j index of pixels into the raster line - * \param[in] d depth of the pixel - * \param[in] val pixel value to be set - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is a convenience function to set a pixel value in a
- *          raster line where the depth of the image can have different
- *          values (1, 2, 4, 8, 16 or 32).
- * 
- */ -l_ok -setLineDataVal(l_uint32 *line, - l_int32 j, - l_int32 d, - l_uint32 val) -{ - PROCNAME("setLineDataVal"); - - if (!line) - return ERROR_INT("line not defined", procName, 1); - if (j < 0) - return ERROR_INT("j must be >= 0", procName, 1); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) - return ERROR_INT("invalid d", procName, 1); - - if (d == 1) - SET_DATA_BIT_VAL(line, j, val); - else if (d == 2) - SET_DATA_DIBIT(line, j, val); - else if (d == 4) - SET_DATA_QBIT(line, j, val); - else if (d == 8) - SET_DATA_BYTE(line, j, val); - else if (d == 16) - SET_DATA_TWO_BYTES(line, j, val); - else /* d == 32 */ - *(line + j) = val; - return 0; -} - - -/*-------------------------------------------------------------* - * Pixel endian conversion * - *-------------------------------------------------------------*/ -/*! - * \brief pixEndianByteSwapNew() - * - * \param[in] pixs - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This is used to convert the data in a pix to a
- *          serialized byte buffer in raster order, and, for RGB,
- *          in order RGBA.  This requires flipping bytes within
- *          each 32-bit word for little-endian platforms, because the
- *          words have a MSB-to-the-left rule, whereas byte raster-order
- *          requires the left-most byte in each word to be byte 0.
- *          For big-endians, no swap is necessary, so this returns a clone.
- *      (2) Unlike pixEndianByteSwap(), which swaps the bytes in-place,
- *          this returns a new pix (or a clone).  We provide this
- *          because often when serialization is done, the source
- *          pix needs to be restored to canonical little-endian order,
- *          and this requires a second byte swap.  In such a situation,
- *          it is twice as fast to make a new pix in big-endian order,
- *          use it, and destroy it.
- * 
- */ -PIX * -pixEndianByteSwapNew(PIX *pixs) -{ -l_uint32 *datas, *datad; -l_int32 i, j, h, wpl; -l_uint32 word; -PIX *pixd; - - PROCNAME("pixEndianByteSwapNew"); - -#ifdef L_BIG_ENDIAN - - return pixClone(pixs); - -#else /* L_LITTLE_ENDIAN */ - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - datas = pixGetData(pixs); - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); - pixd = pixCreateTemplate(pixs); - datad = pixGetData(pixd); - for (i = 0; i < h; i++) { - for (j = 0; j < wpl; j++, datas++, datad++) { - word = *datas; - *datad = (word >> 24) | - ((word >> 8) & 0x0000ff00) | - ((word << 8) & 0x00ff0000) | - (word << 24); - } - } - - return pixd; - -#endif /* L_BIG_ENDIAN */ - -} - - -/*! - * \brief pixEndianByteSwap() - * - * \param[in] pixs - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is used on little-endian platforms to swap
- *          the bytes within a word; bytes 0 and 3 are swapped,
- *          and bytes 1 and 2 are swapped.
- *      (2) This is required for little-endians in situations
- *          where we convert from a serialized byte order that is
- *          in raster order, as one typically has in file formats,
- *          to one with MSB-to-the-left in each 32-bit word, or v.v.
- *          See pix.h for a description of the canonical format
- *          (MSB-to-the left) that is used for both little-endian
- *          and big-endian platforms.   For big-endians, the
- *          MSB-to-the-left word order has the bytes in raster
- *          order when serialized, so no byte flipping is required.
- * 
- */ -l_ok -pixEndianByteSwap(PIX *pixs) -{ -l_uint32 *data; -l_int32 i, j, h, wpl; -l_uint32 word; - - PROCNAME("pixEndianByteSwap"); - -#ifdef L_BIG_ENDIAN - - return 0; - -#else /* L_LITTLE_ENDIAN */ - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); - for (i = 0; i < h; i++) { - for (j = 0; j < wpl; j++, data++) { - word = *data; - *data = (word >> 24) | - ((word >> 8) & 0x0000ff00) | - ((word << 8) & 0x00ff0000) | - (word << 24); - } - } - - return 0; - -#endif /* L_BIG_ENDIAN */ - -} - - -/*! - * \brief lineEndianByteSwap() - * - * \param[in] datad dest byte array data, reordered on little-endians - * \param[in] datas a src line of pix data) - * \param[in] wpl number of 32 bit words in the line - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is used on little-endian platforms to swap
- *          the bytes within each word in the line of image data.
- *          Bytes 0 <==> 3 and 1 <==> 2 are swapped in the dest
- *          byte array data8d, relative to the pix data in datas.
- *      (2) The bytes represent 8 bit pixel values.  They are swapped
- *          for little endians so that when the dest array datad
- *          is addressed by bytes, the pixels are chosen sequentially
- *          from left to right in the image.
- * 
- */ -l_int32 -lineEndianByteSwap(l_uint32 *datad, - l_uint32 *datas, - l_int32 wpl) -{ -l_int32 j; -l_uint32 word; - - PROCNAME("lineEndianByteSwap"); - - if (!datad || !datas) - return ERROR_INT("datad and datas not both defined", procName, 1); - -#ifdef L_BIG_ENDIAN - - memcpy(datad, datas, 4 * wpl); - return 0; - -#else /* L_LITTLE_ENDIAN */ - - for (j = 0; j < wpl; j++, datas++, datad++) { - word = *datas; - *datad = (word >> 24) | - ((word >> 8) & 0x0000ff00) | - ((word << 8) & 0x00ff0000) | - (word << 24); - } - return 0; - -#endif /* L_BIG_ENDIAN */ - -} - - -/*! - * \brief pixEndianTwoByteSwapNew() - * - * \param[in] pixs - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is used on little-endian platforms to swap the
- *          2-byte entities within a 32-bit word.
- *      (2) This is equivalent to a full byte swap, as performed
- *          by pixEndianByteSwap(), followed by byte swaps in
- *          each of the 16-bit entities separately.
- *      (3) Unlike pixEndianTwoByteSwap(), which swaps the shorts in-place,
- *          this returns a new pix (or a clone).  We provide this
- *          to avoid having to swap twice in situations where the input
- *          pix must be restored to canonical little-endian order.
- * 
- */ -PIX * -pixEndianTwoByteSwapNew(PIX *pixs) -{ -l_uint32 *datas, *datad; -l_int32 i, j, h, wpl; -l_uint32 word; -PIX *pixd; - - PROCNAME("pixEndianTwoByteSwapNew"); - -#ifdef L_BIG_ENDIAN - - return pixClone(pixs); - -#else /* L_LITTLE_ENDIAN */ - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - datas = pixGetData(pixs); - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); - pixd = pixCreateTemplate(pixs); - datad = pixGetData(pixd); - for (i = 0; i < h; i++) { - for (j = 0; j < wpl; j++, datas++, datad++) { - word = *datas; - *datad = (word << 16) | (word >> 16); - } - } - - return pixd; - -#endif /* L_BIG_ENDIAN */ - -} - - -/*! - * \brief pixEndianTwoByteSwap() - * - * \param[in] pixs - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is used on little-endian platforms to swap the
- *          2-byte entities within a 32-bit word.
- *      (2) This is equivalent to a full byte swap, as performed
- *          by pixEndianByteSwap(), followed by byte swaps in
- *          each of the 16-bit entities separately.
- * 
- */ -l_ok -pixEndianTwoByteSwap(PIX *pixs) -{ -l_uint32 *data; -l_int32 i, j, h, wpl; -l_uint32 word; - - PROCNAME("pixEndianTwoByteSwap"); - -#ifdef L_BIG_ENDIAN - - return 0; - -#else /* L_LITTLE_ENDIAN */ - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); - for (i = 0; i < h; i++) { - for (j = 0; j < wpl; j++, data++) { - word = *data; - *data = (word << 16) | (word >> 16); - } - } - - return 0; - -#endif /* L_BIG_ENDIAN */ - -} - - -/*-------------------------------------------------------------* - * Extract raster data as binary string * - *-------------------------------------------------------------*/ -/*! - * \brief pixGetRasterData() - * - * \param[in] pixs 1, 8, 32 bpp - * \param[out] pdata raster data in memory - * \param[out] pnbytes number of bytes in data string - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This returns the raster data as a byte string, padded to the
- *          byte.  For 1 bpp, the first pixel is the MSbit in the first byte.
- *          For rgb, the bytes are in (rgb) order.  This is the format
- *          required for flate encoding of pixels in a PostScript file.
- * 
- */ -l_ok -pixGetRasterData(PIX *pixs, - l_uint8 **pdata, - size_t *pnbytes) -{ -l_int32 w, h, d, wpl, i, j, rval, gval, bval; -l_int32 databpl; /* bytes for each raster line in returned data */ -l_uint8 *line, *data; /* packed data in returned array */ -l_uint32 *rline, *rdata; /* data in pix raster */ - - PROCNAME("pixGetRasterData"); - - if (pdata) *pdata = NULL; - if (pnbytes) *pnbytes = 0; - if (!pdata || !pnbytes) - return ERROR_INT("&data and &nbytes not both defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) - return ERROR_INT("depth not in {1,2,4,8,16,32}", procName, 1); - rdata = pixGetData(pixs); - wpl = pixGetWpl(pixs); - if (d == 1) - databpl = (w + 7) / 8; - else if (d == 2) - databpl = (w + 3) / 4; - else if (d == 4) - databpl = (w + 1) / 2; - else if (d == 8 || d == 16) - databpl = w * (d / 8); - else /* d == 32 bpp rgb */ - databpl = 3 * w; - if ((data = (l_uint8 *)LEPT_CALLOC((size_t)databpl * h, sizeof(l_uint8))) - == NULL) - return ERROR_INT("data not allocated", procName, 1); - *pdata = data; - *pnbytes = (size_t)databpl * h; - - for (i = 0; i < h; i++) { - rline = rdata + i * wpl; - line = data + i * databpl; - if (d <= 8) { - for (j = 0; j < databpl; j++) - line[j] = GET_DATA_BYTE(rline, j); - } else if (d == 16) { - for (j = 0; j < w; j++) - line[2 * j] = GET_DATA_TWO_BYTES(rline, j); - } else { /* d == 32 bpp rgb */ - for (j = 0; j < w; j++) { - extractRGBValues(rline[j], &rval, &gval, &bval); - *(line + 3 * j) = rval; - *(line + 3 * j + 1) = gval; - *(line + 3 * j + 2) = bval; - } - } - } - - return 0; -} - - -/*-------------------------------------------------------------* - * Test alpha component opaqueness * - *-------------------------------------------------------------*/ -/*! - * \brief pixAlphaIsOpaque() - * - * \param[in] pix 32 bpp, spp == 4 - * \param[out] popaque 1 if spp == 4 and all alpha component - * values are 255 (opaque); 0 otherwise - * \return 0 if OK, 1 on error - * Notes: - * 1) On error, opaque is returned as 0 (FALSE). - */ -l_ok -pixAlphaIsOpaque(PIX *pix, - l_int32 *popaque) -{ -l_int32 w, h, wpl, i, j, alpha; -l_uint32 *data, *line; - - PROCNAME("pixAlphaIsOpaque"); - - if (!popaque) - return ERROR_INT("&opaque not defined", procName, 1); - *popaque = FALSE; - if (!pix) - return ERROR_INT("&pix not defined", procName, 1); - if (pixGetDepth(pix) != 32) - return ERROR_INT("&pix not 32 bpp", procName, 1); - if (pixGetSpp(pix) != 4) - return ERROR_INT("&pix not 4 spp", procName, 1); - - data = pixGetData(pix); - wpl = pixGetWpl(pix); - pixGetDimensions(pix, &w, &h, NULL); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - alpha = GET_DATA_BYTE(line + j, L_ALPHA_CHANNEL); - if (alpha ^ 0xff) /* not opaque */ - return 0; - } - } - - *popaque = TRUE; - return 0; -} - - -/*-------------------------------------------------------------* - * Setup helpers for 8 bpp byte processing * - *-------------------------------------------------------------*/ -/*! - * \brief pixSetupByteProcessing() - * - * \param[in] pix 8 bpp, no colormap - * \param[out] pw [optional] width - * \param[out] ph [optional] height - * \return line ptr array, or NULL on error - * - *
- * Notes:
- *      (1) This is a simple helper for processing 8 bpp images with
- *          direct byte access.  It can swap byte order within each word.
- *      (2) After processing, you must call pixCleanupByteProcessing(),
- *          which frees the lineptr array and restores byte order.
- *      (3) Usage:
- *              l_uint8 **lineptrs = pixSetupByteProcessing(pix, &w, &h);
- *              for (i = 0; i < h; i++) {
- *                  l_uint8 *line = lineptrs[i];
- *                  for (j = 0; j < w; j++) {
- *                      val = line[j];
- *                      ...
- *                  }
- *              }
- *              pixCleanupByteProcessing(pix, lineptrs);
- * 
- */ -l_uint8 ** -pixSetupByteProcessing(PIX *pix, - l_int32 *pw, - l_int32 *ph) -{ -l_int32 w, h; - - PROCNAME("pixSetupByteProcessing"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (!pix || pixGetDepth(pix) != 8) - return (l_uint8 **)ERROR_PTR("pix not defined or not 8 bpp", - procName, NULL); - pixGetDimensions(pix, &w, &h, NULL); - if (pw) *pw = w; - if (ph) *ph = h; - if (pixGetColormap(pix)) - return (l_uint8 **)ERROR_PTR("pix has colormap", procName, NULL); - - pixEndianByteSwap(pix); - return (l_uint8 **)pixGetLinePtrs(pix, NULL); -} - - -/*! - * \brief pixCleanupByteProcessing() - * - * \param[in] pix 8 bpp, no colormap - * \param[in] lineptrs ptrs to the beginning of each raster line of data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This must be called after processing that was initiated
- *          by pixSetupByteProcessing() has finished.
- * 
- */ -l_ok -pixCleanupByteProcessing(PIX *pix, - l_uint8 **lineptrs) -{ - PROCNAME("pixCleanupByteProcessing"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!lineptrs) - return ERROR_INT("lineptrs not defined", procName, 1); - - pixEndianByteSwap(pix); - LEPT_FREE(lineptrs); - return 0; -} - - -/*------------------------------------------------------------------------* - * Setting parameters for antialias masking with alpha transforms * - *------------------------------------------------------------------------*/ -/*! - * \brief l_setAlphaMaskBorder() - * - * \param[in] val1, val2 in [0.0 ... 1.0] - * \return void - * - *
- * Notes:
- *      (1) This sets the opacity values used to generate the two outer
- *          boundary rings in the alpha mask associated with geometric
- *          transforms such as pixRotateWithAlpha().
- *      (2) The default values are val1 = 0.0 (completely transparent
- *          in the outermost ring) and val2 = 0.5 (half transparent
- *          in the second ring).  When the image is blended, this
- *          completely removes the outer ring (shrinking the image by
- *          2 in each direction), and alpha-blends with 0.5 the second ring.
- *          Using val1 = 0.25 and val2 = 0.75 gives a slightly more
- *          blurred border, with no perceptual difference at screen resolution.
- *      (3) The actual mask values are found by multiplying these
- *          normalized opacity values by 255.
- * 
- */ -void -l_setAlphaMaskBorder(l_float32 val1, - l_float32 val2) -{ - val1 = L_MAX(0.0, L_MIN(1.0, val1)); - val2 = L_MAX(0.0, L_MIN(1.0, val2)); - AlphaMaskBorderVals[0] = val1; - AlphaMaskBorderVals[1] = val2; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix3.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix3.c deleted file mode 100644 index 20ab924c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix3.c +++ /dev/null @@ -1,3717 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pix3.c - *
- *
- *    This file has these operations:
- *
- *      (1) Mask-directed operations
- *      (2) Full-image bit-logical operations
- *      (3) Foreground pixel counting operations on 1 bpp images
- *      (4) Average and variance of pixel values
- *      (5) Mirrored tiling of a smaller image
- *
- *
- *    Masked operations
- *           l_int32     pixSetMasked()
- *           l_int32     pixSetMaskedGeneral()
- *           l_int32     pixCombineMasked()
- *           l_int32     pixCombineMaskedGeneral()
- *           l_int32     pixPaintThroughMask()
- *           l_int32     pixCopyWithBoxa()  -- this is boxa-directed
- *           PIX        *pixPaintSelfThroughMask()
- *           PIX        *pixMakeMaskFromVal()
- *           PIX        *pixMakeMaskFromLUT()
- *           PIX        *pixMakeArbMaskFromRGB()
- *           PIX        *pixSetUnderTransparency()
- *           PIX        *pixMakeAlphaFromMask()
- *           l_int32     pixGetColorNearMaskBoundary()
- *           PIX        *pixDisplaySelectedPixels()  -- for debugging
- *
- *    One and two-image boolean operations on arbitrary depth images
- *           PIX        *pixInvert()
- *           PIX        *pixOr()
- *           PIX        *pixAnd()
- *           PIX        *pixXor()
- *           PIX        *pixSubtract()
- *
- *    Foreground pixel counting in 1 bpp images
- *           l_int32     pixZero()
- *           l_int32     pixForegroundFraction()
- *           NUMA       *pixaCountPixels()
- *           l_int32     pixCountPixels()
- *           l_int32     pixCountPixelsInRect()
- *           NUMA       *pixCountByRow()
- *           NUMA       *pixCountByColumn()
- *           NUMA       *pixCountPixelsByRow()
- *           NUMA       *pixCountPixelsByColumn()
- *           l_int32     pixCountPixelsInRow()
- *           NUMA       *pixGetMomentByColumn()
- *           l_int32     pixThresholdPixelSum()
- *           l_int32    *makePixelSumTab8()
- *           l_int32    *makePixelCentroidTab8()
- *
- *    Average of pixel values in gray images
- *           NUMA       *pixAverageByRow()
- *           NUMA       *pixAverageByColumn()
- *           l_int32     pixAverageInRect()
- *
- *    Average of pixel values in RGB images
- *           l_int32     pixAverageInRectRGB()
- *
- *    Variance of pixel values in gray images
- *           NUMA       *pixVarianceByRow()
- *           NUMA       *pixVarianceByColumn()
- *           l_int32     pixVarianceInRect()
- *
- *    Average of absolute value of pixel differences in gray images
- *           NUMA       *pixAbsDiffByRow()
- *           NUMA       *pixAbsDiffByColumn()
- *           l_int32     pixAbsDiffInRect()
- *           l_int32     pixAbsDiffOnLine()
- *
- *    Count of pixels with specific value
- *           l_int32     pixCountArbInRect()
- *
- *    Mirrored tiling
- *           PIX        *pixMirroredTiling()
- *
- *    Representative tile near but outside region
- *           l_int32     pixFindRepCloseTile()
- *
- *    Static helper function
- *           static BOXA    *findTileRegionsForSearch()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -static BOXA *findTileRegionsForSearch(BOX *box, l_int32 w, l_int32 h, - l_int32 searchdir, l_int32 mindist, - l_int32 tsize, l_int32 ntiles); - -#ifndef NO_CONSOLE_IO -#define EQUAL_SIZE_WARNING 0 -#endif /* ~NO_CONSOLE_IO */ - -/*-------------------------------------------------------------* - * Masked operations * - *-------------------------------------------------------------*/ -/*! - * \brief pixSetMasked() - * - * \param[in] pixd 1, 2, 4, 8, 16 or 32 bpp; or colormapped - * \param[in] pixm [optional] 1 bpp mask; no operation if NULL - * \param[in] val value to set at each masked pixel - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) In-place operation.
- *      (2) NOTE: For cmapped images, this calls pixSetMaskedCmap().
- *          %val must be the 32-bit color representation of the RGB pixel.
- *          It is not the index into the colormap!
- *      (2) If pixm == NULL, a warning is given.
- *      (3) This is an implicitly aligned operation, where the UL
- *          corners of pixd and pixm coincide.  A warning is
- *          issued if the two image sizes differ significantly,
- *          but the operation proceeds.
- *      (4) Each pixel in pixd that co-locates with an ON pixel
- *          in pixm is set to the specified input value.
- *          Other pixels in pixd are not changed.
- *      (5) You can visualize this as painting the color through
- *          the mask, as a stencil.
- *      (6) If you do not want to have the UL corners aligned,
- *          use the function pixSetMaskedGeneral(), which requires
- *          you to input the UL corner of pixm relative to pixd.
- *      (7) Implementation details: see comments in pixPaintThroughMask()
- *          for when we use rasterop to do the painting.
- * 
- */ -l_ok -pixSetMasked(PIX *pixd, - PIX *pixm, - l_uint32 val) -{ -l_int32 wd, hd, wm, hm, w, h, d, wpld, wplm; -l_int32 i, j, rval, gval, bval; -l_uint32 *datad, *datam, *lined, *linem; - - PROCNAME("pixSetMasked"); - - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (!pixm) { - L_WARNING("no mask; nothing to do\n", procName); - return 0; - } - if (pixGetColormap(pixd)) { - extractRGBValues(val, &rval, &gval, &bval); - return pixSetMaskedCmap(pixd, pixm, 0, 0, rval, gval, bval); - } - - if (pixGetDepth(pixm) != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - d = pixGetDepth(pixd); - if (d == 1) - val &= 1; - else if (d == 2) - val &= 3; - else if (d == 4) - val &= 0x0f; - else if (d == 8) - val &= 0xff; - else if (d == 16) - val &= 0xffff; - else if (d != 32) - return ERROR_INT("pixd not 1, 2, 4, 8, 16 or 32 bpp", procName, 1); - pixGetDimensions(pixm, &wm, &hm, NULL); - - /* If d == 1, use rasterop; it's about 25x faster */ - if (d == 1) { - if (val == 0) { - PIX *pixmi = pixInvert(NULL, pixm); - pixRasterop(pixd, 0, 0, wm, hm, PIX_MASK, pixmi, 0, 0); - pixDestroy(&pixmi); - } else { /* val == 1 */ - pixRasterop(pixd, 0, 0, wm, hm, PIX_PAINT, pixm, 0, 0); - } - return 0; - } - - /* For d < 32, use rasterop for val == 0 (black); ~3x faster. */ - if (d < 32 && val == 0) { - PIX *pixmd = pixUnpackBinary(pixm, d, 1); - pixRasterop(pixd, 0, 0, wm, hm, PIX_MASK, pixmd, 0, 0); - pixDestroy(&pixmd); - return 0; - } - - /* For d < 32, use rasterop for val == maxval (white); ~3x faster. */ - if (d < 32 && val == ((1 << d) - 1)) { - PIX *pixmd = pixUnpackBinary(pixm, d, 0); - pixRasterop(pixd, 0, 0, wm, hm, PIX_PAINT, pixmd, 0, 0); - pixDestroy(&pixmd); - return 0; - } - - pixGetDimensions(pixd, &wd, &hd, &d); - w = L_MIN(wd, wm); - h = L_MIN(hd, hm); - if (L_ABS(wd - wm) > 7 || L_ABS(hd - hm) > 7) /* allow a small tolerance */ - L_WARNING("pixd and pixm sizes differ\n", procName); - - datad = pixGetData(pixd); - datam = pixGetData(pixm); - wpld = pixGetWpl(pixd); - wplm = pixGetWpl(pixm); - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - linem = datam + i * wplm; - for (j = 0; j < w; j++) { - if (GET_DATA_BIT(linem, j)) { - switch(d) - { - case 2: - SET_DATA_DIBIT(lined, j, val); - break; - case 4: - SET_DATA_QBIT(lined, j, val); - break; - case 8: - SET_DATA_BYTE(lined, j, val); - break; - case 16: - SET_DATA_TWO_BYTES(lined, j, val); - break; - case 32: - *(lined + j) = val; - break; - default: - return ERROR_INT("shouldn't get here", procName, 1); - } - } - } - } - - return 0; -} - - -/*! - * \brief pixSetMaskedGeneral() - * - * \param[in] pixd 8, 16 or 32 bpp - * \param[in] pixm [optional] 1 bpp mask; no operation if null - * \param[in] val value to set at each masked pixel - * \param[in] x, y location of UL corner of pixm relative to pixd; - * can be negative - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This is an in-place operation.
- *      (2) Alignment is explicit.  If you want the UL corners of
- *          the two images to be aligned, use pixSetMasked().
- *      (3) A typical use would be painting through the foreground
- *          of a small binary mask pixm, located somewhere on a
- *          larger pixd.  Other pixels in pixd are not changed.
- *      (4) You can visualize this as painting the color through
- *          the mask, as a stencil.
- *      (5) This uses rasterop to handle clipping and different depths of pixd.
- *      (6) If pixd has a colormap, you should call pixPaintThroughMask().
- *      (7) Why is this function here, if pixPaintThroughMask() does the
- *          same thing, and does it more generally?  I've retained it here
- *          to show how one can paint through a mask using only full
- *          image rasterops, rather than pixel peeking in pixm and poking
- *          in pixd.  It's somewhat baroque, but I found it amusing.
- * 
- */ -l_ok -pixSetMaskedGeneral(PIX *pixd, - PIX *pixm, - l_uint32 val, - l_int32 x, - l_int32 y) -{ -l_int32 wm, hm, d; -PIX *pixmu, *pixc; - - PROCNAME("pixSetMaskedGeneral"); - - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (!pixm) /* nothing to do */ - return 0; - - d = pixGetDepth(pixd); - if (d != 8 && d != 16 && d != 32) - return ERROR_INT("pixd not 8, 16 or 32 bpp", procName, 1); - if (pixGetDepth(pixm) != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - - /* Unpack binary to depth d, with inversion: 1 --> 0, 0 --> 0xff... */ - if ((pixmu = pixUnpackBinary(pixm, d, 1)) == NULL) - return ERROR_INT("pixmu not made", procName, 1); - - /* Clear stenciled pixels in pixd */ - pixGetDimensions(pixm, &wm, &hm, NULL); - pixRasterop(pixd, x, y, wm, hm, PIX_SRC & PIX_DST, pixmu, 0, 0); - - /* Generate image with requisite color */ - if ((pixc = pixCreateTemplate(pixmu)) == NULL) { - pixDestroy(&pixmu); - return ERROR_INT("pixc not made", procName, 1); - } - pixSetAllArbitrary(pixc, val); - - /* Invert stencil mask, and paint color color into stencil */ - pixInvert(pixmu, pixmu); - pixAnd(pixmu, pixmu, pixc); - - /* Finally, repaint stenciled pixels, with val, in pixd */ - pixRasterop(pixd, x, y, wm, hm, PIX_SRC | PIX_DST, pixmu, 0, 0); - - pixDestroy(&pixmu); - pixDestroy(&pixc); - return 0; -} - - -/*! - * \brief pixCombineMasked() - * - * \param[in] pixd 1 bpp, 8 bpp gray or 32 bpp rgb; no cmap - * \param[in] pixs 1 bpp, 8 bpp gray or 32 bpp rgb; no cmap - * \param[in] pixm [optional] 1 bpp mask; no operation if NULL - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) In-place operation; pixd is changed.
- *      (2) This sets each pixel in pixd that co-locates with an ON
- *          pixel in pixm to the corresponding value of pixs.
- *      (3) pixs and pixd must be the same depth and not colormapped.
- *      (4) All three input pix are aligned at the UL corner, and the
- *          operation is clipped to the intersection of all three images.
- *      (5) If pixm == NULL, it's a no-op.
- *      (6) Implementation: see notes in pixCombineMaskedGeneral().
- *          For 8 bpp selective masking, you might guess that it
- *          would be faster to generate an 8 bpp version of pixm,
- *          using pixConvert1To8(pixm, 0, 255), and then use a
- *          general combine operation
- *               d = (d & ~m) | (s & m)
- *          on a word-by-word basis.  Not always.  The word-by-word
- *          combine takes a time that is independent of the mask data.
- *          If the mask is relatively sparse, the byte-check method
- *          is actually faster!
- * 
- */ -l_ok -pixCombineMasked(PIX *pixd, - PIX *pixs, - PIX *pixm) -{ -l_int32 w, h, d, ws, hs, ds, wm, hm, dm, wmin, hmin; -l_int32 wpl, wpls, wplm, i, j, val; -l_uint32 *data, *datas, *datam, *line, *lines, *linem; -PIX *pixt; - - PROCNAME("pixCombineMasked"); - - if (!pixm) /* nothing to do */ - return 0; - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - pixGetDimensions(pixd, &w, &h, &d); - pixGetDimensions(pixs, &ws, &hs, &ds); - pixGetDimensions(pixm, &wm, &hm, &dm); - if (d != ds) - return ERROR_INT("pixs and pixd depths differ", procName, 1); - if (dm != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - if (d != 1 && d != 8 && d != 32) - return ERROR_INT("pixd not 1, 8 or 32 bpp", procName, 1); - if (pixGetColormap(pixd) || pixGetColormap(pixs)) - return ERROR_INT("pixs and/or pixd is cmapped", procName, 1); - - /* For d = 1, use rasterop. pixt is the part from pixs, under - * the fg of pixm, that is to be combined with pixd. We also - * use pixt to remove all fg of pixd that is under the fg of pixm. - * Then pixt and pixd are combined by ORing. */ - wmin = L_MIN(w, L_MIN(ws, wm)); - hmin = L_MIN(h, L_MIN(hs, hm)); - if (d == 1) { - pixt = pixAnd(NULL, pixs, pixm); - pixRasterop(pixd, 0, 0, wmin, hmin, PIX_DST & PIX_NOT(PIX_SRC), - pixm, 0, 0); - pixRasterop(pixd, 0, 0, wmin, hmin, PIX_SRC | PIX_DST, pixt, 0, 0); - pixDestroy(&pixt); - return 0; - } - - data = pixGetData(pixd); - datas = pixGetData(pixs); - datam = pixGetData(pixm); - wpl = pixGetWpl(pixd); - wpls = pixGetWpl(pixs); - wplm = pixGetWpl(pixm); - if (d == 8) { - for (i = 0; i < hmin; i++) { - line = data + i * wpl; - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = 0; j < wmin; j++) { - if (GET_DATA_BIT(linem, j)) { - val = GET_DATA_BYTE(lines, j); - SET_DATA_BYTE(line, j, val); - } - } - } - } else { /* d == 32 */ - for (i = 0; i < hmin; i++) { - line = data + i * wpl; - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = 0; j < wmin; j++) { - if (GET_DATA_BIT(linem, j)) - line[j] = lines[j]; - } - } - } - - return 0; -} - - -/*! - * \brief pixCombineMaskedGeneral() - * - * \param[in] pixd 1 bpp, 8 bpp gray or 32 bpp rgb - * \param[in] pixs 1 bpp, 8 bpp gray or 32 bpp rgb - * \param[in] pixm [optional] 1 bpp mask - * \param[in] x, y origin of pixs and pixm relative to pixd; can be negative - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) In-place operation; pixd is changed.
- *      (2) This is a generalized version of pixCombinedMasked(), where
- *          the source and mask can be placed at the same (arbitrary)
- *          location relative to pixd.
- *      (3) pixs and pixd must be the same depth and not colormapped.
- *      (4) The UL corners of both pixs and pixm are aligned with
- *          the point (x, y) of pixd, and the operation is clipped to
- *          the intersection of all three images.
- *      (5) If pixm == NULL, it's a no-op.
- *      (6) Implementation.  There are two ways to do these.  In the first,
- *          we use rasterop, ORing the part of pixs under the mask
- *          with pixd (which has been appropriately cleared there first).
- *          In the second, the mask is used one pixel at a time to
- *          selectively replace pixels of pixd with those of pixs.
- *          Here, we use rasterop for 1 bpp and pixel-wise replacement
- *          for 8 and 32 bpp.  To use rasterop for 8 bpp, for example,
- *          we must first generate an 8 bpp version of the mask.
- *          The code is simple:
- *
- *             Pix *pixm8 = pixConvert1To8(NULL, pixm, 0, 255);
- *             Pix *pixt = pixAnd(NULL, pixs, pixm8);
- *             pixRasterop(pixd, x, y, wmin, hmin, PIX_DST & PIX_NOT(PIX_SRC),
- *                         pixm8, 0, 0);
- *             pixRasterop(pixd, x, y, wmin, hmin, PIX_SRC | PIX_DST,
- *                         pixt, 0, 0);
- *             pixDestroy(&pixt);
- *             pixDestroy(&pixm8);
- * 
- */ -l_ok -pixCombineMaskedGeneral(PIX *pixd, - PIX *pixs, - PIX *pixm, - l_int32 x, - l_int32 y) -{ -l_int32 d, w, h, ws, hs, ds, wm, hm, dm, wmin, hmin; -l_int32 wpl, wpls, wplm, i, j, val; -l_uint32 *data, *datas, *datam, *line, *lines, *linem; -PIX *pixt; - - PROCNAME("pixCombineMaskedGeneral"); - - if (!pixm) /* nothing to do */ - return 0; - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - pixGetDimensions(pixd, &w, &h, &d); - pixGetDimensions(pixs, &ws, &hs, &ds); - pixGetDimensions(pixm, &wm, &hm, &dm); - if (d != ds) - return ERROR_INT("pixs and pixd depths differ", procName, 1); - if (dm != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - if (d != 1 && d != 8 && d != 32) - return ERROR_INT("pixd not 1, 8 or 32 bpp", procName, 1); - if (pixGetColormap(pixd) || pixGetColormap(pixs)) - return ERROR_INT("pixs and/or pixd is cmapped", procName, 1); - - /* For d = 1, use rasterop. pixt is the part from pixs, under - * the fg of pixm, that is to be combined with pixd. We also - * use pixt to remove all fg of pixd that is under the fg of pixm. - * Then pixt and pixd are combined by ORing. */ - wmin = L_MIN(ws, wm); - hmin = L_MIN(hs, hm); - if (d == 1) { - pixt = pixAnd(NULL, pixs, pixm); - pixRasterop(pixd, x, y, wmin, hmin, PIX_DST & PIX_NOT(PIX_SRC), - pixm, 0, 0); - pixRasterop(pixd, x, y, wmin, hmin, PIX_SRC | PIX_DST, pixt, 0, 0); - pixDestroy(&pixt); - return 0; - } - - wpl = pixGetWpl(pixd); - data = pixGetData(pixd); - wpls = pixGetWpl(pixs); - datas = pixGetData(pixs); - wplm = pixGetWpl(pixm); - datam = pixGetData(pixm); - - for (i = 0; i < hmin; i++) { - if (y + i < 0 || y + i >= h) continue; - line = data + (y + i) * wpl; - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = 0; j < wmin; j++) { - if (x + j < 0 || x + j >= w) continue; - if (GET_DATA_BIT(linem, j)) { - switch (d) - { - case 8: - val = GET_DATA_BYTE(lines, j); - SET_DATA_BYTE(line, x + j, val); - break; - case 32: - *(line + x + j) = *(lines + j); - break; - default: - return ERROR_INT("shouldn't get here", procName, 1); - } - } - } - } - - return 0; -} - - -/*! - * \brief pixPaintThroughMask() - * - * \param[in] pixd 1, 2, 4, 8, 16 or 32 bpp; or colormapped - * \param[in] pixm [optional] 1 bpp mask - * \param[in] x, y origin of pixm relative to pixd; can be negative - * \param[in] val pixel value to set at each masked pixel - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) In-place operation.  Calls pixSetMaskedCmap() for colormapped
- *          images.
- *      (2) For 1, 2, 4, 8 and 16 bpp gray, we take the appropriate
- *          number of least significant bits of val.
- *      (3) If pixm == NULL, it's a no-op.
- *      (4) The mask origin is placed at (x,y) on pixd, and the
- *          operation is clipped to the intersection of rectangles.
- *      (5) For rgb, the components in val are in the canonical locations,
- *          with red in location COLOR_RED, etc.
- *      (6) Implementation detail 1:
- *          For painting with val == 0 or val == maxval, you can use rasterop.
- *          If val == 0, invert the mask so that it's 0 over the region
- *          into which you want to write, and use PIX_SRC & PIX_DST to
- *          clear those pixels.  To write with val = maxval (all 1's),
- *          use PIX_SRC | PIX_DST to set all bits under the mask.
- *      (7) Implementation detail 2:
- *          The rasterop trick can be used for depth > 1 as well.
- *          For val == 0, generate the mask for depth d from the binary
- *          mask using
- *              pixmd = pixUnpackBinary(pixm, d, 1);
- *          and use pixRasterop() with PIX_MASK.  For val == maxval,
- *              pixmd = pixUnpackBinary(pixm, d, 0);
- *          and use pixRasterop() with PIX_PAINT.
- *          But note that if d == 32 bpp, it is about 3x faster to use
- *          the general implementation (not pixRasterop()).
- *      (8) Implementation detail 3:
- *          It might be expected that the switch in the inner loop will
- *          cause large branching delays and should be avoided.
- *          This is not the case, because the entrance is always the
- *          same and the compiler can correctly predict the jump.
- * 
- */ -l_ok -pixPaintThroughMask(PIX *pixd, - PIX *pixm, - l_int32 x, - l_int32 y, - l_uint32 val) -{ -l_int32 d, w, h, wm, hm, wpl, wplm, i, j, rval, gval, bval; -l_uint32 *data, *datam, *line, *linem; - - PROCNAME("pixPaintThroughMask"); - - if (!pixm) /* nothing to do */ - return 0; - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (pixGetColormap(pixd)) { - extractRGBValues(val, &rval, &gval, &bval); - return pixSetMaskedCmap(pixd, pixm, x, y, rval, gval, bval); - } - - if (pixGetDepth(pixm) != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - d = pixGetDepth(pixd); - if (d == 1) - val &= 1; - else if (d == 2) - val &= 3; - else if (d == 4) - val &= 0x0f; - else if (d == 8) - val &= 0xff; - else if (d == 16) - val &= 0xffff; - else if (d != 32) - return ERROR_INT("pixd not 1, 2, 4, 8, 16 or 32 bpp", procName, 1); - pixGetDimensions(pixm, &wm, &hm, NULL); - - /* If d == 1, use rasterop; it's about 25x faster. */ - if (d == 1) { - if (val == 0) { - PIX *pixmi = pixInvert(NULL, pixm); - pixRasterop(pixd, x, y, wm, hm, PIX_MASK, pixmi, 0, 0); - pixDestroy(&pixmi); - } else { /* val == 1 */ - pixRasterop(pixd, x, y, wm, hm, PIX_PAINT, pixm, 0, 0); - } - return 0; - } - - /* For d < 32, use rasterop if val == 0 (black); ~3x faster. */ - if (d < 32 && val == 0) { - PIX *pixmd = pixUnpackBinary(pixm, d, 1); - pixRasterop(pixd, x, y, wm, hm, PIX_MASK, pixmd, 0, 0); - pixDestroy(&pixmd); - return 0; - } - - /* For d < 32, use rasterop if val == maxval (white); ~3x faster. */ - if (d < 32 && val == ((1 << d) - 1)) { - PIX *pixmd = pixUnpackBinary(pixm, d, 0); - pixRasterop(pixd, x, y, wm, hm, PIX_PAINT, pixmd, 0, 0); - pixDestroy(&pixmd); - return 0; - } - - /* All other cases */ - pixGetDimensions(pixd, &w, &h, NULL); - wpl = pixGetWpl(pixd); - data = pixGetData(pixd); - wplm = pixGetWpl(pixm); - datam = pixGetData(pixm); - for (i = 0; i < hm; i++) { - if (y + i < 0 || y + i >= h) continue; - line = data + (y + i) * wpl; - linem = datam + i * wplm; - for (j = 0; j < wm; j++) { - if (x + j < 0 || x + j >= w) continue; - if (GET_DATA_BIT(linem, j)) { - switch (d) - { - case 2: - SET_DATA_DIBIT(line, x + j, val); - break; - case 4: - SET_DATA_QBIT(line, x + j, val); - break; - case 8: - SET_DATA_BYTE(line, x + j, val); - break; - case 16: - SET_DATA_TWO_BYTES(line, x + j, val); - break; - case 32: - *(line + x + j) = val; - break; - default: - return ERROR_INT("shouldn't get here", procName, 1); - } - } - } - } - - return 0; -} - - -/*! - * \brief pixCopyWithBoxa() - * - * \param[in] pixs all depths; cmap ok - * \param[in] boxa e.g., from components of a photomask - * \param[in] background L_SET_WHITE or L_SET_BLACK - * \return pixd or NULL on error - * - *
- * Notes:
- *      (1) Pixels from pixs are copied ("blitted") through each box into pixd.
- *      (2) Pixels not copied are preset to either white or black.
- *      (3) This fast and simple implementation can use rasterop because
- *          each region to be copied is rectangular.
- *      (4) A much slower implemention that doesn't use rasterop would make
- *          a 1 bpp mask from the boxa and then copy, pixel by pixel,
- *          through the mask:
- *             pixGetDimensions(pixs, &w, &h, NULL);
- *             pixm = pixCreate(w, h, 1);
- *             pixm = pixMaskBoxa(pixm, pixm, boxa);
- *             pixd = pixCreateTemplate(pixs);
- *             pixSetBlackOrWhite(pixd, background);
- *             pixCombineMasked(pixd, pixs, pixm);
- *             pixDestroy(&pixm);
- * 
- */ -PIX * -pixCopyWithBoxa(PIX *pixs, - BOXA *boxa, - l_int32 background) -{ -l_int32 i, n, x, y, w, h; -PIX *pixd; - - PROCNAME("pixCopyWithBoxa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!boxa) - return (PIX *)ERROR_PTR("boxa not defined", procName, NULL); - if (background != L_SET_WHITE && background != L_SET_BLACK) - return (PIX *)ERROR_PTR("invalid background", procName, NULL); - - pixd = pixCreateTemplate(pixs); - pixSetBlackOrWhite(pixd, background); - n = boxaGetCount(boxa); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, &x, &y, &w, &h); - pixRasterop(pixd, x, y, w, h, PIX_SRC, pixs, x, y); - } - return pixd; -} - - -/*! - * \brief pixPaintSelfThroughMask() - * - * \param[in] pixd 8 bpp gray or 32 bpp rgb; not colormapped - * \param[in] pixm 1 bpp mask - * \param[in] x, y origin of pixm relative to pixd; must not be negative - * \param[in] searchdir L_HORIZ, L_VERT or L_BOTH_DIRECTIONS - * \param[in] mindist min distance of nearest tile edge to box; >= 0 - * \param[in] tilesize requested size for tiling; may be reduced - * \param[in] ntiles number of tiles tested in each row/column - * \param[in] distblend distance outside the fg used for blending with pixs - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) In-place operation; pixd is changed.
- *      (2) If pixm == NULL, it's a no-op.
- *      (3) The mask origin is placed at (x,y) on pixd, and the
- *          operation is clipped to the intersection of pixd and the
- *          fg of the mask.
- *      (4) %tsize is the the requested size for tiling.  The actual
- *          actual size for each c.c. will be bounded by the minimum
- *          dimension of the c.c.
- *      (5) For %mindist, %searchdir and %ntiles, see pixFindRepCloseTile().
- *          They determine the set of possible tiles that can be used
- *          to build a larger mirrored tile to paint onto pixd through
- *          the c.c. of pixm.
- *      (6) %distblend is used for alpha blending.  It is only applied
- *          if there is exactly one c.c. in the mask.  Use distblend == 0
- *          to skip blending and just paint through the 1 bpp mask.
- *      (7) To apply blending to more than 1 component, call this function
- *          repeatedly with %pixm, %x and %y representing one component of
- *          the mask each time.  This would be done as follows, for an
- *          underlying image pixs and mask pixm of components to fill:
- *              Boxa *boxa = pixConnComp(pixm, &pixa, 8);
- *              n = boxaGetCount(boxa);
- *              for (i = 0; i < n; i++) {
- *                  Pix *pix = pixaGetPix(pixa, i, L_CLONE);
- *                  Box *box = pixaGetBox(pixa, i, L_CLONE);
- *                  boxGetGeometry(box, &bx, &by, &bw, &bh);
- *                  pixPaintSelfThroughMask(pixs, pix, bx, by, searchdir,
- *                                     mindist, tilesize, ntiles, distblend);
- *                  pixDestroy(&pix);
- *                  boxDestroy(&box);
- *              }
- *              pixaDestroy(&pixa);
- *              boxaDestroy(&boxa);
- *      (8) If no tiles can be found, this falls back to estimating the
- *          color near the boundary of the region to be textured.
- *      (9) This can be used to replace the pixels in some regions of
- *          an image by selected neighboring pixels.  The mask represents
- *          the pixels to be replaced.  For each connected component in
- *          the mask, this function selects up to two tiles of neighboring
- *          pixels to be used for replacement of pixels represented by
- *          the component (i.e., under the FG of that component in the mask).
- *          After selection, mirror replication is used to generate an
- *          image that is large enough to cover the component.  Alpha
- *          blending can also be used outside of the component, but near the
- *          edge, to blur the transition between painted and original pixels.
- * 
- */ -l_ok -pixPaintSelfThroughMask(PIX *pixd, - PIX *pixm, - l_int32 x, - l_int32 y, - l_int32 searchdir, - l_int32 mindist, - l_int32 tilesize, - l_int32 ntiles, - l_int32 distblend) -{ -l_int32 w, h, d, wm, hm, dm, i, n, bx, by, bw, bh, edgeblend, retval, minside; -l_uint32 pixval; -BOX *box, *boxv, *boxh; -BOXA *boxa; -PIX *pixf, *pixv, *pixh, *pix1, *pix2, *pix3, *pix4, *pix5; -PIXA *pixa; - - PROCNAME("pixPaintSelfThroughMask"); - - if (!pixm) /* nothing to do */ - return 0; - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (pixGetColormap(pixd) != NULL) - return ERROR_INT("pixd has colormap", procName, 1); - pixGetDimensions(pixd, &w, &h, &d); - if (d != 8 && d != 32) - return ERROR_INT("pixd not 8 or 32 bpp", procName, 1); - pixGetDimensions(pixm, &wm, &hm, &dm); - if (dm != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - if (x < 0 || y < 0) - return ERROR_INT("x and y must be non-negative", procName, 1); - if (searchdir != L_HORIZ && searchdir != L_VERT && - searchdir != L_BOTH_DIRECTIONS) - return ERROR_INT("invalid searchdir", procName, 1); - if (tilesize < 2) - return ERROR_INT("tilesize must be >= 2", procName, 1); - if (distblend < 0) - return ERROR_INT("distblend must be >= 0", procName, 1); - - /* Embed mask in full sized mask */ - if (wm < w || hm < h) { - pixf = pixCreate(w, h, 1); - pixRasterop(pixf, x, y, wm, hm, PIX_SRC, pixm, 0, 0); - } else { - pixf = pixCopy(NULL, pixm); - } - - /* Get connected components of mask */ - boxa = pixConnComp(pixf, &pixa, 8); - if ((n = pixaGetCount(pixa)) == 0) { - L_WARNING("no fg in mask\n", procName); - pixDestroy(&pixf); - pixaDestroy(&pixa); - boxaDestroy(&boxa); - return 1; - } - boxaDestroy(&boxa); - - /* For each c.c., generate one or two representative tiles for - * texturizing and apply through the mask. The input 'tilesize' - * is the requested value. Note that if there is exactly one - * component, and blending at the edge is requested, an alpha mask - * is generated, which is larger than the bounding box of the c.c. */ - edgeblend = (n == 1 && distblend > 0) ? 1 : 0; - if (distblend > 0 && n > 1) - L_WARNING("%d components; can not blend at edges\n", procName, n); - retval = 0; - for (i = 0; i < n; i++) { - if (edgeblend) { - pix1 = pixMakeAlphaFromMask(pixf, distblend, &box); - } else { - pix1 = pixaGetPix(pixa, i, L_CLONE); - box = pixaGetBox(pixa, i, L_CLONE); - } - boxGetGeometry(box, &bx, &by, &bw, &bh); - minside = L_MIN(bw, bh); - - boxh = boxv = NULL; - if (searchdir == L_HORIZ || searchdir == L_BOTH_DIRECTIONS) { - pixFindRepCloseTile(pixd, box, L_HORIZ, mindist, - L_MIN(minside, tilesize), ntiles, &boxh, 0); - } - if (searchdir == L_VERT || searchdir == L_BOTH_DIRECTIONS) { - pixFindRepCloseTile(pixd, box, L_VERT, mindist, - L_MIN(minside, tilesize), ntiles, &boxv, 0); - } - if (!boxh && !boxv) { - L_WARNING("tile region not selected; paint color near boundary\n", - procName); - pixDestroy(&pix1); - pix1 = pixaGetPix(pixa, i, L_CLONE); - pixaGetBoxGeometry(pixa, i, &bx, &by, NULL, NULL); - retval = pixGetColorNearMaskBoundary(pixd, pixm, box, distblend, - &pixval, 0); - pixSetMaskedGeneral(pixd, pix1, pixval, bx, by); - pixDestroy(&pix1); - boxDestroy(&box); - continue; - } - - /* Extract the selected squares from pixd */ - pixh = (boxh) ? pixClipRectangle(pixd, boxh, NULL) : NULL; - pixv = (boxv) ? pixClipRectangle(pixd, boxv, NULL) : NULL; - if (pixh && pixv) - pix2 = pixBlend(pixh, pixv, 0, 0, 0.5); - else if (pixh) - pix2 = pixClone(pixh); - else /* pixv */ - pix2 = pixClone(pixv); - pixDestroy(&pixh); - pixDestroy(&pixv); - boxDestroy(&boxh); - boxDestroy(&boxv); - - /* Generate an image the size of the b.b. of the c.c., - * possibly extended by the blending distance, which - * is then either painted through the c.c. mask or - * blended using the alpha mask for that c.c. */ - pix3 = pixMirroredTiling(pix2, bw, bh); - if (edgeblend) { - pix4 = pixClipRectangle(pixd, box, NULL); - pix5 = pixBlendWithGrayMask(pix4, pix3, pix1, 0, 0); - pixRasterop(pixd, bx, by, bw, bh, PIX_SRC, pix5, 0, 0); - pixDestroy(&pix4); - pixDestroy(&pix5); - } else { - pixCombineMaskedGeneral(pixd, pix3, pix1, bx, by); - } - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - boxDestroy(&box); - } - - pixaDestroy(&pixa); - pixDestroy(&pixf); - return retval; -} - - -/*! - * \brief pixMakeMaskFromVal() - * - * \param[in] pixs 2, 4 or 8 bpp; can be colormapped - * \param[in] val pixel value - * \return pixd 1 bpp mask, or NULL on error - * - *
- * Notes:
- *      (1) This generates a 1 bpp mask image, where a 1 is written in
- *          the mask for each pixel in pixs that has a value %val.
- *      (2) If no pixels have the value, an empty mask is generated.
- * 
- */ -PIX * -pixMakeMaskFromVal(PIX *pixs, - l_int32 val) -{ -l_int32 w, h, d, i, j, sval, wpls, wpld; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixMakeMaskFromVal"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 2 && d != 4 && d != 8) - return (PIX *)ERROR_PTR("pix not 2, 4 or 8 bpp", procName, NULL); - - pixd = pixCreate(w, h, 1); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - if (d == 2) - sval = GET_DATA_DIBIT(lines, j); - else if (d == 4) - sval = GET_DATA_QBIT(lines, j); - else /* d == 8 */ - sval = GET_DATA_BYTE(lines, j); - if (sval == val) - SET_DATA_BIT(lined, j); - } - } - - return pixd; -} - - -/*! - * \brief pixMakeMaskFromLUT() - * - * \param[in] pixs 2, 4 or 8 bpp; can be colormapped - * \param[in] tab 256-entry LUT; 1 means to write to mask - * \return pixd 1 bpp mask, or NULL on error - * - *
- * Notes:
- *      (1) This generates a 1 bpp mask image, where a 1 is written in
- *          the mask for each pixel in pixs that has a value corresponding
- *          to a 1 in the LUT.
- *      (2) The LUT should be of size 256.
- * 
- */ -PIX * -pixMakeMaskFromLUT(PIX *pixs, - l_int32 *tab) -{ -l_int32 w, h, d, i, j, val, wpls, wpld; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixMakeMaskFromLUT"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!tab) - return (PIX *)ERROR_PTR("tab not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 2 && d != 4 && d != 8) - return (PIX *)ERROR_PTR("pix not 2, 4 or 8 bpp", procName, NULL); - - pixd = pixCreate(w, h, 1); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - if (d == 2) - val = GET_DATA_DIBIT(lines, j); - else if (d == 4) - val = GET_DATA_QBIT(lines, j); - else /* d == 8 */ - val = GET_DATA_BYTE(lines, j); - if (tab[val] == 1) - SET_DATA_BIT(lined, j); - } - } - - return pixd; -} - - -/*! - * \brief pixMakeArbMaskFromRGB() - * - * \param[in] pixs 32 bpp RGB - * \param[in] rc, gc, bc arithmetic factors; can be negative - * \param[in] thresh lower threshold on weighted sum of components - * \return pixd 1 bpp mask, or NULL on error - * - *
- * Notes:
- *      (1) This generates a 1 bpp mask image, where a 1 is written in
- *          the mask for each pixel in pixs that satisfies
- *               rc * rval + gc * gval + bc * bval > thresh
- *          where rval is the red component, etc.
- *      (2) Unlike with pixConvertToGray(), there are no constraints
- *          on the color coefficients, which can be negative.  For
- *          example, a mask that discriminates against red and in favor
- *          of blue will have rc < 0.0 and bc > 0.0.
- *      (3) To make the result independent of intensity (the 'V' in HSV),
- *          select coefficients so that %thresh = 0.  Then the result
- *          is not changed when all components are multiplied by the
- *          same constant (as long as nothing saturates).  This can be
- *          useful if, for example, the illumination is not uniform.
- * 
- */ -PIX * -pixMakeArbMaskFromRGB(PIX *pixs, - l_float32 rc, - l_float32 gc, - l_float32 bc, - l_float32 thresh) -{ -PIX *pix1, *pix2; - - PROCNAME("pixMakeArbMaskFromRGB"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - if (thresh >= 255.0) thresh = 254.0; /* avoid 8 bit overflow */ - - if ((pix1 = pixConvertRGBToGrayArb(pixs, rc, gc, bc)) == NULL) - return (PIX *)ERROR_PTR("pix1 not made", procName, NULL); - pix2 = pixThresholdToBinary(pix1, thresh + 1); - pixInvert(pix2, pix2); - pixDestroy(&pix1); - return pix2; -} - - -/*! - * \brief pixSetUnderTransparency() - * - * \param[in] pixs 32 bpp rgba - * \param[in] val 32 bit unsigned color to use where alpha == 0 - * \param[in] debug displays layers of pixs - * \return pixd 32 bpp rgba, or NULL on error - * - *
- * Notes:
- *      (1) This sets the r, g and b components under every fully
- *          transparent alpha component to %val.  The alpha components
- *          are unchanged.
- *      (2) Full transparency is denoted by alpha == 0.  Setting
- *          all pixels to a constant %val where alpha is transparent
- *          can improve compressibility by reducing the entropy.
- *      (3) The visual result depends on how the image is displayed.
- *          (a) For display devices that respect the use of the alpha
- *              layer, this will not affect the appearance.
- *          (b) For typical leptonica operations, alpha is ignored,
- *              so there will be a change in appearance because this
- *              resets the rgb values in the fully transparent region.
- *      (4) pixRead() and pixWrite() will, by default, read and write
- *          4-component (rgba) pix in png format.  To ignore the alpha
- *          component after reading, or omit it on writing, pixSetSpp(..., 3).
- *      (5) Here are some examples:
- *          * To convert all fully transparent pixels in a 4 component
- *            (rgba) png file to white:
- *              pixs = pixRead();
- *              pixd = pixSetUnderTransparency(pixs, 0xffffff00, 0);
- *          * To write pixd with the alpha component:
- *              pixWrite(, pixd, IFF_PNG);
- *          * To write and rgba image without the alpha component, first do:
- *              pixSetSpp(pixd, 3);
- *            If you later want to use the alpha, spp must be reset to 4.
- *          * (fancier) To remove the alpha by blending the image over
- *            a white background:
- *              pixRemoveAlpha()
- *            This changes all pixel values where the alpha component is
- *            not opaque (255).
- *      (6) Caution.  rgb images in leptonica typically have value 0 in
- *          the alpha channel, which is fully transparent.  If spp for
- *          such an image were changed from 3 to 4, the image becomes
- *          fully transparent, and this function will set each pixel to %val.
- *          If you really want to set every pixel to the same value,
- *          use pixSetAllArbitrary().
- *      (7) This is useful for compressing an RGBA image where the part
- *          of the image that is fully transparent is random junk; compression
- *          is typically improved by setting that region to a constant.
- *          For rendering as a 3 component RGB image over a uniform
- *          background of arbitrary color, use pixAlphaBlendUniform().
- * 
- */ -PIX * -pixSetUnderTransparency(PIX *pixs, - l_uint32 val, - l_int32 debug) -{ -PIX *pixg, *pixm, *pixt, *pixd; - - PROCNAME("pixSetUnderTransparency"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not defined or not 32 bpp", - procName, NULL); - - if (pixGetSpp(pixs) != 4) { - L_WARNING("no alpha channel; returning a copy\n", procName); - return pixCopy(NULL, pixs); - } - - /* Make a mask from the alpha component with ON pixels - * wherever the alpha component is fully transparent (0). - * The hard way: - * l_int32 *lut = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - * lut[0] = 1; - * pixg = pixGetRGBComponent(pixs, L_ALPHA_CHANNEL); - * pixm = pixMakeMaskFromLUT(pixg, lut); - * LEPT_FREE(lut); - * But there's an easier way to set pixels in a mask where - * the alpha component is 0 ... */ - pixg = pixGetRGBComponent(pixs, L_ALPHA_CHANNEL); - pixm = pixThresholdToBinary(pixg, 1); - - if (debug) { - pixt = pixDisplayLayersRGBA(pixs, 0xffffff00, 600); - pixDisplay(pixt, 0, 0); - pixDestroy(&pixt); - } - - pixd = pixCopy(NULL, pixs); - pixSetMasked(pixd, pixm, (val & 0xffffff00)); - pixDestroy(&pixg); - pixDestroy(&pixm); - return pixd; -} - - -/*! - * \brief pixMakeAlphaFromMask() - * - * \param[in] pixs 1 bpp - * \param[in] dist blending distance; typically 10 - 30 - * \param[out] pbox [optional] use NULL to get the full size - * \return pixd (8 bpp gray, or NULL on error - * - *
- * Notes:
- *      (1) This generates a 8 bpp alpha layer that is opaque (256)
- *          over the FG of pixs, and goes transparent linearly away
- *          from the FG pixels, decaying to 0 (transparent) is an
- *          8-connected distance given by %dist.  If %dist == 0,
- *          this does a simple conversion from 1 to 8 bpp.
- *      (2) If &box == NULL, this returns an alpha mask that is the
- *          full size of pixs.  Otherwise, the returned mask pixd covers
- *          just the FG pixels of pixs, expanded by %dist in each
- *          direction (if possible), and the returned box gives the
- *          location of the returned mask relative to pixs.
- *      (3) This is useful for painting through a mask and allowing
- *          blending of the painted image with an underlying image
- *          in the mask background for pixels near foreground mask pixels.
- *          For example, with an underlying rgb image pix1, an overlaying
- *          image rgb pix2, binary mask pixm, and dist > 0, this
- *          blending is achieved with:
- *              pix3 = pixMakeAlphaFromMask(pixm, dist, &box);
- *              boxGetGeometry(box, &x, &y, NULL, NULL);
- *              pix4 = pixBlendWithGrayMask(pix1, pix2, pix3, x, y);
- * 
- */ -PIX * -pixMakeAlphaFromMask(PIX *pixs, - l_int32 dist, - BOX **pbox) -{ -l_int32 w, h; -BOX *box1, *box2; -PIX *pix1, *pixd; - - PROCNAME("pixMakeAlphaFromMask"); - - if (pbox) *pbox = NULL; - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (dist < 0) - return (PIX *)ERROR_PTR("dist must be >= 0", procName, NULL); - - /* If requested, extract just the region to be affected by the mask */ - if (pbox) { - pixClipToForeground(pixs, NULL, &box1); - if (!box1) { - L_WARNING("no ON pixels in mask\n", procName); - return pixCreateTemplate(pixs); /* all background (0) */ - } - - boxAdjustSides(box1, box1, -dist, dist, -dist, dist); - pixGetDimensions(pixs, &w, &h, NULL); - box2 = boxClipToRectangle(box1, w, h); - *pbox = box2; - pix1 = pixClipRectangle(pixs, box2, NULL); - boxDestroy(&box1); - } else { - pix1 = pixCopy(NULL, pixs); - } - - if (dist == 0) { - pixd = pixConvert1To8(NULL, pix1, 0, 255); - pixDestroy(&pix1); - return pixd; - } - - /* Blur the boundary of the input mask */ - pixInvert(pix1, pix1); - pixd = pixDistanceFunction(pix1, 8, 8, L_BOUNDARY_FG); - pixMultConstantGray(pixd, 256.0 / dist); - pixInvert(pixd, pixd); - pixDestroy(&pix1); - return pixd; -} - - -/*! - * \brief pixGetColorNearMaskBoundary() - * - * \param[in] pixs 32 bpp rgb - * \param[in] pixm 1 bpp mask, full image - * \param[in] box region of mask; typically b.b. of a component - * \param[in] dist distance into BG from mask boundary to use - * \param[out] pval average pixel value - * \param[in] debug 1 to output mask images - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) This finds the average color in a set of pixels that are
- *          roughly a distance %dist from the c.c. boundary and in the
- *          background of the mask image.
- * 
- */ -l_ok -pixGetColorNearMaskBoundary(PIX *pixs, - PIX *pixm, - BOX *box, - l_int32 dist, - l_uint32 *pval, - l_int32 debug) -{ -char op[64]; -l_int32 empty, bx, by; -l_float32 rval, gval, bval; -BOX *box1, *box2; -PIX *pix1, *pix2, *pix3; - - PROCNAME("pixGetColorNearMaskBoundary"); - - if (!pval) - return ERROR_INT("&pval not defined", procName, 1); - *pval = 0xffffff00; /* white */ - if (!pixs || pixGetDepth(pixs) != 32) - return ERROR_INT("pixs undefined or not 32 bpp", procName, 1); - if (!pixm || pixGetDepth(pixm) != 1) - return ERROR_INT("pixm undefined or not 1 bpp", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (dist < 0) - return ERROR_INT("dist must be >= 0", procName, 1); - - /* Clip mask piece, expanded beyond %box by (%dist + 5) on each side. - * box1 is the region requested; box2 is the actual region retrieved, - * which is clipped to %pixm */ - box1 = boxAdjustSides(NULL, box, -dist - 5, dist + 5, -dist - 5, dist + 5); - pix1 = pixClipRectangle(pixm, box1, &box2); - - /* Expand FG by %dist into the BG */ - if (dist == 0) { - pix2 = pixCopy(NULL, pix1); - } else { - snprintf(op, sizeof(op), "d%d.%d", 2 * dist, 2 * dist); - pix2 = pixMorphSequence(pix1, op, 0); - } - - /* Expand again by 5 pixels on all sides (dilate 11x11) and XOR, - * getting the annulus of FG pixels between %dist and %dist + 5 */ - pix3 = pixCopy(NULL, pix2); - pixDilateBrick(pix3, pix3, 11, 11); - pixXor(pix3, pix3, pix2); - pixZero(pix3, &empty); - if (!empty) { - /* Scan the same region in %pixs, to get average under FG in pix3 */ - boxGetGeometry(box2, &bx, &by, NULL, NULL); - pixGetAverageMaskedRGB(pixs, pix3, bx, by, 1, L_MEAN_ABSVAL, - &rval, &gval, &bval); - composeRGBPixel((l_int32)(rval + 0.5), (l_int32)(gval + 0.5), - (l_int32)(bval + 0.5), pval); - } else { - L_WARNING("no pixels found\n", procName); - } - - if (debug) { - lept_rmdir("masknear"); /* erase previous images */ - lept_mkdir("masknear"); - pixWriteDebug("/tmp/masknear/input.png", pix1, IFF_PNG); - pixWriteDebug("/tmp/masknear/adjusted.png", pix2, IFF_PNG); - pixWriteDebug("/tmp/masknear/outerfive.png", pix3, IFF_PNG); - lept_stderr("Input box; with adjusted sides; clipped\n"); - boxPrintStreamInfo(stderr, box); - boxPrintStreamInfo(stderr, box1); - boxPrintStreamInfo(stderr, box2); - } - - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - boxDestroy(&box1); - boxDestroy(&box2); - return 0; -} - - -/*! - * \brief pixDisplaySelectedPixels() - * - * \param[in] pixs [optional] any depth - * \param[in] pixm 1 bpp mask, aligned UL corner with %pixs - * \param[in] sel [optional] pattern to paint at each pixel in pixm - * \param[in] val rgb rendering of pattern - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) For every fg pixel in %pixm, this paints the pattern in %sel
- *          in color %val on a copy of %pixs.
- *      (2) The implementation is to dilate %pixm by %sel, and then
- *          paint through the dilated mask onto %pixs.
- *      (3) If %pixs == NULL, it paints on a white image.
- *      (4) If %sel == NULL, it paints only the pixels in the input %pixm.
- *      (5) This visualization would typically be used in debugging.
- * 
- */ -PIX * -pixDisplaySelectedPixels(PIX *pixs, - PIX *pixm, - SEL *sel, - l_uint32 val) -{ -l_int32 w, h; -PIX *pix1, *pix2; - - PROCNAME("pixDisplaySelectedPixels"); - - if (!pixm || pixGetDepth(pixm) != 1) - return (PIX *)ERROR_PTR("pixm undefined or not 1 bpp", procName, NULL); - - if (pixs) { - pix1 = pixConvertTo32(pixs); - } else { - pixGetDimensions(pixm, &w, &h, NULL); - pix1 = pixCreate(w, h, 32); - pixSetAll(pix1); - } - - if (sel) - pix2 = pixDilate(NULL, pixm, sel); - else - pix2 = pixClone(pixm); - pixSetMasked(pix1, pix2, val); - pixDestroy(&pix2); - return pix1; -} - - -/*-------------------------------------------------------------* - * One and two-image boolean ops on arbitrary depth images * - *-------------------------------------------------------------*/ -/*! - * \brief pixInvert() - * - * \param[in] pixd [optional]; this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This inverts pixs, for all pixel depths.
- *      (2) There are 3 cases:
- *           (a) pixd == null,   ~src --> new pixd
- *           (b) pixd == pixs,   ~src --> src  (in-place)
- *           (c) pixd != pixs,   ~src --> input pixd
- *      (3) For clarity, if the case is known, use these patterns:
- *           (a) pixd = pixInvert(NULL, pixs);
- *           (b) pixInvert(pixs, pixs);
- *           (c) pixInvert(pixd, pixs);
- * 
- */ -PIX * -pixInvert(PIX *pixd, - PIX *pixs) -{ - PROCNAME("pixInvert"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - /* Prepare pixd for in-place operation */ - if ((pixd = pixCopy(pixd, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - - pixRasterop(pixd, 0, 0, pixGetWidth(pixd), pixGetHeight(pixd), - PIX_NOT(PIX_DST), NULL, 0, 0); /* invert pixd */ - - return pixd; -} - - -/*! - * \brief pixOr() - * - * \param[in] pixd [optional]; this can be null, equal to pixs1, - * different from pixs1 - * \param[in] pixs1 can be == pixd - * \param[in] pixs2 must be != pixd - * \return pixd always - * - *
- * Notes:
- *      (1) This gives the union of two images with equal depth,
- *          aligning them to the the UL corner.  pixs1 and pixs2
- *          need not have the same width and height.
- *      (2) There are 3 cases:
- *            (a) pixd == null,   (src1 | src2) --> new pixd
- *            (b) pixd == pixs1,  (src1 | src2) --> src1  (in-place)
- *            (c) pixd != pixs1,  (src1 | src2) --> input pixd
- *      (3) For clarity, if the case is known, use these patterns:
- *            (a) pixd = pixOr(NULL, pixs1, pixs2);
- *            (b) pixOr(pixs1, pixs1, pixs2);
- *            (c) pixOr(pixd, pixs1, pixs2);
- *      (4) The size of the result is determined by pixs1.
- *      (5) The depths of pixs1 and pixs2 must be equal.
- *      (6) Note carefully that the order of pixs1 and pixs2 only matters
- *          for the in-place case.  For in-place, you must have
- *          pixd == pixs1.  Setting pixd == pixs2 gives an incorrect
- *          result: the copy puts pixs1 image data in pixs2, and
- *          the rasterop is then between pixs2 and pixs2 (a no-op).
- * 
- */ -PIX * -pixOr(PIX *pixd, - PIX *pixs1, - PIX *pixs2) -{ - PROCNAME("pixOr"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); - if (pixd == pixs2) - return (PIX *)ERROR_PTR("cannot have pixs2 == pixd", procName, pixd); - if (pixGetDepth(pixs1) != pixGetDepth(pixs2)) - return (PIX *)ERROR_PTR("depths of pixs* unequal", procName, pixd); - -#if EQUAL_SIZE_WARNING - if (!pixSizesEqual(pixs1, pixs2)) - L_WARNING("pixs1 and pixs2 not equal sizes\n", procName); -#endif /* EQUAL_SIZE_WARNING */ - - /* Prepare pixd to be a copy of pixs1 */ - if ((pixd = pixCopy(pixd, pixs1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, pixd); - - /* src1 | src2 --> dest */ - pixRasterop(pixd, 0, 0, pixGetWidth(pixd), pixGetHeight(pixd), - PIX_SRC | PIX_DST, pixs2, 0, 0); - - return pixd; -} - - -/*! - * \brief pixAnd() - * - * \param[in] pixd [optional]; this can be null, equal to pixs1, - * different from pixs1 - * \param[in] pixs1 can be == pixd - * \param[in] pixs2 must be != pixd - * \return pixd always - * - *
- * Notes:
- *      (1) This gives the intersection of two images with equal depth,
- *          aligning them to the the UL corner.  pixs1 and pixs2
- *          need not have the same width and height.
- *      (2) There are 3 cases:
- *            (a) pixd == null,   (src1 & src2) --> new pixd
- *            (b) pixd == pixs1,  (src1 & src2) --> src1  (in-place)
- *            (c) pixd != pixs1,  (src1 & src2) --> input pixd
- *      (3) For clarity, if the case is known, use these patterns:
- *            (a) pixd = pixAnd(NULL, pixs1, pixs2);
- *            (b) pixAnd(pixs1, pixs1, pixs2);
- *            (c) pixAnd(pixd, pixs1, pixs2);
- *      (4) The size of the result is determined by pixs1.
- *      (5) The depths of pixs1 and pixs2 must be equal.
- *      (6) Note carefully that the order of pixs1 and pixs2 only matters
- *          for the in-place case.  For in-place, you must have
- *          pixd == pixs1.  Setting pixd == pixs2 gives an incorrect
- *          result: the copy puts pixs1 image data in pixs2, and
- *          the rasterop is then between pixs2 and pixs2 (a no-op).
- * 
- */ -PIX * -pixAnd(PIX *pixd, - PIX *pixs1, - PIX *pixs2) -{ - PROCNAME("pixAnd"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); - if (pixd == pixs2) - return (PIX *)ERROR_PTR("cannot have pixs2 == pixd", procName, pixd); - if (pixGetDepth(pixs1) != pixGetDepth(pixs2)) - return (PIX *)ERROR_PTR("depths of pixs* unequal", procName, pixd); - -#if EQUAL_SIZE_WARNING - if (!pixSizesEqual(pixs1, pixs2)) - L_WARNING("pixs1 and pixs2 not equal sizes\n", procName); -#endif /* EQUAL_SIZE_WARNING */ - - /* Prepare pixd to be a copy of pixs1 */ - if ((pixd = pixCopy(pixd, pixs1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, pixd); - - /* src1 & src2 --> dest */ - pixRasterop(pixd, 0, 0, pixGetWidth(pixd), pixGetHeight(pixd), - PIX_SRC & PIX_DST, pixs2, 0, 0); - - return pixd; -} - - -/*! - * \brief pixXor() - * - * \param[in] pixd [optional]; this can be null, equal to pixs1, - * different from pixs1 - * \param[in] pixs1 can be == pixd - * \param[in] pixs2 must be != pixd - * \return pixd always - * - *
- * Notes:
- *      (1) This gives the XOR of two images with equal depth,
- *          aligning them to the the UL corner.  pixs1 and pixs2
- *          need not have the same width and height.
- *      (2) There are 3 cases:
- *            (a) pixd == null,   (src1 ^ src2) --> new pixd
- *            (b) pixd == pixs1,  (src1 ^ src2) --> src1  (in-place)
- *            (c) pixd != pixs1,  (src1 ^ src2) --> input pixd
- *      (3) For clarity, if the case is known, use these patterns:
- *            (a) pixd = pixXor(NULL, pixs1, pixs2);
- *            (b) pixXor(pixs1, pixs1, pixs2);
- *            (c) pixXor(pixd, pixs1, pixs2);
- *      (4) The size of the result is determined by pixs1.
- *      (5) The depths of pixs1 and pixs2 must be equal.
- *      (6) Note carefully that the order of pixs1 and pixs2 only matters
- *          for the in-place case.  For in-place, you must have
- *          pixd == pixs1.  Setting pixd == pixs2 gives an incorrect
- *          result: the copy puts pixs1 image data in pixs2, and
- *          the rasterop is then between pixs2 and pixs2 (a no-op).
- * 
- */ -PIX * -pixXor(PIX *pixd, - PIX *pixs1, - PIX *pixs2) -{ - PROCNAME("pixXor"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); - if (pixd == pixs2) - return (PIX *)ERROR_PTR("cannot have pixs2 == pixd", procName, pixd); - if (pixGetDepth(pixs1) != pixGetDepth(pixs2)) - return (PIX *)ERROR_PTR("depths of pixs* unequal", procName, pixd); - -#if EQUAL_SIZE_WARNING - if (!pixSizesEqual(pixs1, pixs2)) - L_WARNING("pixs1 and pixs2 not equal sizes\n", procName); -#endif /* EQUAL_SIZE_WARNING */ - - /* Prepare pixd to be a copy of pixs1 */ - if ((pixd = pixCopy(pixd, pixs1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, pixd); - - /* src1 ^ src2 --> dest */ - pixRasterop(pixd, 0, 0, pixGetWidth(pixd), pixGetHeight(pixd), - PIX_SRC ^ PIX_DST, pixs2, 0, 0); - - return pixd; -} - - -/*! - * \brief pixSubtract() - * - * \param[in] pixd [optional]; this can be null, equal to pixs1, - * equal to pixs2, or different from both pixs1 and pixs2 - * \param[in] pixs1 can be == pixd - * \param[in] pixs2 can be == pixd - * \return pixd always - * - *
- * Notes:
- *      (1) This gives the set subtraction of two images with equal depth,
- *          aligning them to the the UL corner.  pixs1 and pixs2
- *          need not have the same width and height.
- *      (2) Source pixs2 is always subtracted from source pixs1.
- *          The result is
- *                  pixs1 \ pixs2 = pixs1 & (~pixs2)
- *      (3) There are 4 cases:
- *            (a) pixd == null,   (src1 - src2) --> new pixd
- *            (b) pixd == pixs1,  (src1 - src2) --> src1  (in-place)
- *            (c) pixd == pixs2,  (src1 - src2) --> src2  (in-place)
- *            (d) pixd != pixs1 && pixd != pixs2),
- *                                 (src1 - src2) --> input pixd
- *      (4) For clarity, if the case is known, use these patterns:
- *            (a) pixd = pixSubtract(NULL, pixs1, pixs2);
- *            (b) pixSubtract(pixs1, pixs1, pixs2);
- *            (c) pixSubtract(pixs2, pixs1, pixs2);
- *            (d) pixSubtract(pixd, pixs1, pixs2);
- *      (5) The size of the result is determined by pixs1.
- *      (6) The depths of pixs1 and pixs2 must be equal.
- * 
- */ -PIX * -pixSubtract(PIX *pixd, - PIX *pixs1, - PIX *pixs2) -{ -l_int32 w, h; - - PROCNAME("pixSubtract"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); - if (pixGetDepth(pixs1) != pixGetDepth(pixs2)) - return (PIX *)ERROR_PTR("depths of pixs* unequal", procName, pixd); - -#if EQUAL_SIZE_WARNING - if (!pixSizesEqual(pixs1, pixs2)) - L_WARNING("pixs1 and pixs2 not equal sizes\n", procName); -#endif /* EQUAL_SIZE_WARNING */ - - pixGetDimensions(pixs1, &w, &h, NULL); - if (!pixd) { - pixd = pixCopy(NULL, pixs1); - pixRasterop(pixd, 0, 0, w, h, PIX_DST & PIX_NOT(PIX_SRC), - pixs2, 0, 0); /* src1 & (~src2) */ - } else if (pixd == pixs1) { - pixRasterop(pixd, 0, 0, w, h, PIX_DST & PIX_NOT(PIX_SRC), - pixs2, 0, 0); /* src1 & (~src2) */ - } else if (pixd == pixs2) { - pixRasterop(pixd, 0, 0, w, h, PIX_NOT(PIX_DST) & PIX_SRC, - pixs1, 0, 0); /* src1 & (~src2) */ - } else { /* pixd != pixs1 && pixd != pixs2 */ - pixCopy(pixd, pixs1); /* sizes pixd to pixs1 if unequal */ - pixRasterop(pixd, 0, 0, w, h, PIX_DST & PIX_NOT(PIX_SRC), - pixs2, 0, 0); /* src1 & (~src2) */ - } - - return pixd; -} - - -/*-------------------------------------------------------------* - * Pixel counting * - *-------------------------------------------------------------*/ -/*! - * \brief pixZero() - * - * \param[in] pix all depths; colormap OK - * \param[out] pempty 1 if all bits in image data field are 0; 0 otherwise - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) For a binary image, if there are no fg (black) pixels, empty = 1.
- *      (2) For a grayscale image, if all pixels are black (0), empty = 1.
- *      (3) For an RGB image, if all 4 components in every pixel is 0,
- *          empty = 1.
- *      (4) For a colormapped image, pixel values are 0.  The colormap
- *          is ignored.
- * 
- */ -l_ok -pixZero(PIX *pix, - l_int32 *pempty) -{ -l_int32 w, h, wpl, i, j, fullwords, endbits; -l_uint32 endmask; -l_uint32 *data, *line; - - PROCNAME("pixZero"); - - if (!pempty) - return ERROR_INT("&empty not defined", procName, 1); - *pempty = 1; - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - w = pixGetWidth(pix) * pixGetDepth(pix); /* in bits */ - h = pixGetHeight(pix); - wpl = pixGetWpl(pix); - data = pixGetData(pix); - fullwords = w / 32; - endbits = w & 31; - endmask = (endbits == 0) ? 0 : (0xffffffffU << (32 - endbits)); - - for (i = 0; i < h; i++) { - line = data + wpl * i; - for (j = 0; j < fullwords; j++) - if (*line++) { - *pempty = 0; - return 0; - } - if (endbits) { - if (*line & endmask) { - *pempty = 0; - return 0; - } - } - } - - return 0; -} - - -/*! - * \brief pixForegroundFraction() - * - * \param[in] pix 1 bpp - * \param[out] pfract fraction of ON pixels - * \return 0 if OK; 1 on error - */ -l_ok -pixForegroundFraction(PIX *pix, - l_float32 *pfract) -{ -l_int32 w, h, count; - - PROCNAME("pixForegroundFraction"); - - if (!pfract) - return ERROR_INT("&fract not defined", procName, 1); - *pfract = 0.0; - if (!pix || pixGetDepth(pix) != 1) - return ERROR_INT("pix not defined or not 1 bpp", procName, 1); - - pixCountPixels(pix, &count, NULL); - pixGetDimensions(pix, &w, &h, NULL); - *pfract = (l_float32)count / (l_float32)(w * h); - return 0; -} - - -/*! - * \brief pixaCountPixels() - * - * \param[in] pixa array of 1 bpp pix - * \return na of ON pixels in each pix, or NULL on error - */ -NUMA * -pixaCountPixels(PIXA *pixa) -{ -l_int32 d, i, n, count; -l_int32 *tab; -NUMA *na; -PIX *pix; - - PROCNAME("pixaCountPixels"); - - if (!pixa) - return (NUMA *)ERROR_PTR("pix not defined", procName, NULL); - - if ((n = pixaGetCount(pixa)) == 0) - return numaCreate(1); - - pix = pixaGetPix(pixa, 0, L_CLONE); - d = pixGetDepth(pix); - pixDestroy(&pix); - if (d != 1) - return (NUMA *)ERROR_PTR("pixa not 1 bpp", procName, NULL); - - if ((na = numaCreate(n)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - tab = makePixelSumTab8(); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - pixCountPixels(pix, &count, tab); - numaAddNumber(na, count); - pixDestroy(&pix); - } - - LEPT_FREE(tab); - return na; -} - - -/*! - * \brief pixCountPixels() - * - * \param[in] pixs 1 bpp - * \param[out] pcount count of ON pixels - * \param[in] tab8 [optional] 8-bit pixel lookup table - * \return 0 if OK; 1 on error - */ -l_ok -pixCountPixels(PIX *pixs, - l_int32 *pcount, - l_int32 *tab8) -{ -l_uint32 endmask; -l_int32 w, h, wpl, i, j; -l_int32 fullwords, endbits, sum; -l_int32 *tab; -l_uint32 *data; - - PROCNAME("pixCountPixels"); - - if (!pcount) - return ERROR_INT("&count not defined", procName, 1); - *pcount = 0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - - tab = (tab8) ? tab8 : makePixelSumTab8(); - pixGetDimensions(pixs, &w, &h, NULL); - wpl = pixGetWpl(pixs); - data = pixGetData(pixs); - fullwords = w >> 5; - endbits = w & 31; - endmask = (endbits == 0) ? 0 : (0xffffffffU << (32 - endbits)); - - sum = 0; - for (i = 0; i < h; i++, data += wpl) { - for (j = 0; j < fullwords; j++) { - l_uint32 word = data[j]; - if (word) { - sum += tab[word & 0xff] + - tab[(word >> 8) & 0xff] + - tab[(word >> 16) & 0xff] + - tab[(word >> 24) & 0xff]; - } - } - if (endbits) { - l_uint32 word = data[j] & endmask; - if (word) { - sum += tab[word & 0xff] + - tab[(word >> 8) & 0xff] + - tab[(word >> 16) & 0xff] + - tab[(word >> 24) & 0xff]; - } - } - } - *pcount = sum; - - if (!tab8) LEPT_FREE(tab); - return 0; -} - - -/*! - * \brief pixCountPixelsInRect() - * - * \param[in] pixs 1 bpp - * \param[in] box (can be null) - * \param[out] pcount count of ON pixels - * \param[in] tab8 [optional] 8-bit pixel lookup table - * \return 0 if OK; 1 on error - */ -l_ok -pixCountPixelsInRect(PIX *pixs, - BOX *box, - l_int32 *pcount, - l_int32 *tab8) -{ -l_int32 bx, by, bw, bh; -PIX *pix1; - - PROCNAME("pixCountPixelsInRect"); - - if (!pcount) - return ERROR_INT("&count not defined", procName, 1); - *pcount = 0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - - if (box) { - boxGetGeometry(box, &bx, &by, &bw, &bh); - pix1 = pixCreate(bw, bh, 1); - pixRasterop(pix1, 0, 0, bw, bh, PIX_SRC, pixs, bx, by); - pixCountPixels(pix1, pcount, tab8); - pixDestroy(&pix1); - } else { - pixCountPixels(pixs, pcount, tab8); - } - - return 0; -} - - -/*! - * \brief pixCountByRow() - * - * \param[in] pix 1 bpp - * \param[in] box [optional] clipping box for count; can be null - * \return na of number of ON pixels by row, or NULL on error - * - *
- * Notes:
- *      (1) To resample for a bin size different from 1, use
- *          numaUniformSampling() on the result of this function.
- * 
- */ -NUMA * -pixCountByRow(PIX *pix, - BOX *box) -{ -l_int32 i, j, w, h, wpl, count, xstart, xend, ystart, yend, bw, bh; -l_uint32 *line, *data; -NUMA *na; - - PROCNAME("pixCountByRow"); - - if (!pix || pixGetDepth(pix) != 1) - return (NUMA *)ERROR_PTR("pix undefined or not 1 bpp", procName, NULL); - if (!box) - return pixCountPixelsByRow(pix, NULL); - - pixGetDimensions(pix, &w, &h, NULL); - if (boxClipToRectangleParams(box, w, h, &xstart, &ystart, &xend, ¥d, - &bw, &bh) == 1) - return (NUMA *)ERROR_PTR("invalid clipping box", procName, NULL); - - if ((na = numaCreate(bh)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - numaSetParameters(na, ystart, 1); - data = pixGetData(pix); - wpl = pixGetWpl(pix); - for (i = ystart; i < yend; i++) { - count = 0; - line = data + i * wpl; - for (j = xstart; j < xend; j++) { - if (GET_DATA_BIT(line, j)) - count++; - } - numaAddNumber(na, count); - } - - return na; -} - - -/*! - * \brief pixCountByColumn() - * - * \param[in] pix 1 bpp - * \param[in] box [optional] clipping box for count; can be null - * \return na of number of ON pixels by column, or NULL on error - * - *
- * Notes:
- *      (1) To resample for a bin size different from 1, use
- *          numaUniformSampling() on the result of this function.
- * 
- */ -NUMA * -pixCountByColumn(PIX *pix, - BOX *box) -{ -l_int32 i, j, w, h, wpl, count, xstart, xend, ystart, yend, bw, bh; -l_uint32 *line, *data; -NUMA *na; - - PROCNAME("pixCountByColumn"); - - if (!pix || pixGetDepth(pix) != 1) - return (NUMA *)ERROR_PTR("pix undefined or not 1 bpp", procName, NULL); - if (!box) - return pixCountPixelsByColumn(pix); - - pixGetDimensions(pix, &w, &h, NULL); - if (boxClipToRectangleParams(box, w, h, &xstart, &ystart, &xend, ¥d, - &bw, &bh) == 1) - return (NUMA *)ERROR_PTR("invalid clipping box", procName, NULL); - - if ((na = numaCreate(bw)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - numaSetParameters(na, xstart, 1); - data = pixGetData(pix); - wpl = pixGetWpl(pix); - for (j = xstart; j < xend; j++) { - count = 0; - for (i = ystart; i < yend; i++) { - line = data + i * wpl; - if (GET_DATA_BIT(line, j)) - count++; - } - numaAddNumber(na, count); - } - - return na; -} - - -/*! - * \brief pixCountPixelsByRow() - * - * \param[in] pix 1 bpp - * \param[in] tab8 [optional] 8-bit pixel lookup table - * \return na of counts, or NULL on error - */ -NUMA * -pixCountPixelsByRow(PIX *pix, - l_int32 *tab8) -{ -l_int32 h, i, count; -l_int32 *tab; -NUMA *na; - - PROCNAME("pixCountPixelsByRow"); - - if (!pix || pixGetDepth(pix) != 1) - return (NUMA *)ERROR_PTR("pix undefined or not 1 bpp", procName, NULL); - - h = pixGetHeight(pix); - if ((na = numaCreate(h)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - - tab = (tab8) ? tab8 : makePixelSumTab8(); - for (i = 0; i < h; i++) { - pixCountPixelsInRow(pix, i, &count, tab); - numaAddNumber(na, count); - } - - if (!tab8) LEPT_FREE(tab); - return na; -} - - -/*! - * \brief pixCountPixelsByColumn() - * - * \param[in] pix 1 bpp - * \return na of counts in each column, or NULL on error - */ -NUMA * -pixCountPixelsByColumn(PIX *pix) -{ -l_int32 i, j, w, h, wpl; -l_uint32 *line, *data; -l_float32 *array; -NUMA *na; - - PROCNAME("pixCountPixelsByColumn"); - - if (!pix || pixGetDepth(pix) != 1) - return (NUMA *)ERROR_PTR("pix undefined or not 1 bpp", procName, NULL); - - pixGetDimensions(pix, &w, &h, NULL); - if ((na = numaCreate(w)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - numaSetCount(na, w); - array = numaGetFArray(na, L_NOCOPY); - data = pixGetData(pix); - wpl = pixGetWpl(pix); - for (i = 0; i < h; i++) { - line = data + wpl * i; - for (j = 0; j < w; j++) { - if (GET_DATA_BIT(line, j)) - array[j] += 1.0; - } - } - - return na; -} - - -/*! - * \brief pixCountPixelsInRow() - * - * \param[in] pix 1 bpp - * \param[in] row number - * \param[out] pcount sum of ON pixels in raster line - * \param[in] tab8 [optional] 8-bit pixel lookup table - * \return 0 if OK; 1 on error - */ -l_ok -pixCountPixelsInRow(PIX *pix, - l_int32 row, - l_int32 *pcount, - l_int32 *tab8) -{ -l_uint32 word, endmask; -l_int32 j, w, h, wpl; -l_int32 fullwords, endbits, sum; -l_int32 *tab; -l_uint32 *line; - - PROCNAME("pixCountPixelsInRow"); - - if (!pcount) - return ERROR_INT("&count not defined", procName, 1); - *pcount = 0; - if (!pix || pixGetDepth(pix) != 1) - return ERROR_INT("pix not defined or not 1 bpp", procName, 1); - - pixGetDimensions(pix, &w, &h, NULL); - if (row < 0 || row >= h) - return ERROR_INT("row out of bounds", procName, 1); - wpl = pixGetWpl(pix); - line = pixGetData(pix) + row * wpl; - fullwords = w >> 5; - endbits = w & 31; - endmask = (endbits == 0) ? 0 : (0xffffffffU << (32 - endbits)); - - tab = (tab8) ? tab8 : makePixelSumTab8(); - sum = 0; - for (j = 0; j < fullwords; j++) { - word = line[j]; - if (word) { - sum += tab[word & 0xff] + - tab[(word >> 8) & 0xff] + - tab[(word >> 16) & 0xff] + - tab[(word >> 24) & 0xff]; - } - } - if (endbits) { - word = line[j] & endmask; - if (word) { - sum += tab[word & 0xff] + - tab[(word >> 8) & 0xff] + - tab[(word >> 16) & 0xff] + - tab[(word >> 24) & 0xff]; - } - } - *pcount = sum; - - if (!tab8) LEPT_FREE(tab); - return 0; -} - - -/*! - * \brief pixGetMomentByColumn() - * - * \param[in] pix 1 bpp - * \param[in] order of moment, either 1 or 2 - * \return na of first moment of fg pixels, by column, or NULL on error - */ -NUMA * -pixGetMomentByColumn(PIX *pix, - l_int32 order) -{ -l_int32 i, j, w, h, wpl; -l_uint32 *line, *data; -l_float32 *array; -NUMA *na; - - PROCNAME("pixGetMomentByColumn"); - - if (!pix || pixGetDepth(pix) != 1) - return (NUMA *)ERROR_PTR("pix undefined or not 1 bpp", procName, NULL); - if (order != 1 && order != 2) - return (NUMA *)ERROR_PTR("order of moment not 1 or 2", procName, NULL); - - pixGetDimensions(pix, &w, &h, NULL); - if ((na = numaCreate(w)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - numaSetCount(na, w); - array = numaGetFArray(na, L_NOCOPY); - data = pixGetData(pix); - wpl = pixGetWpl(pix); - for (i = 0; i < h; i++) { - line = data + wpl * i; - for (j = 0; j < w; j++) { - if (GET_DATA_BIT(line, j)) { - if (order == 1) - array[j] += i; - else /* order == 2 */ - array[j] += i * i; - } - } - } - - return na; -} - - -/*! - * \brief pixThresholdPixelSum() - * - * \param[in] pix 1 bpp - * \param[in] thresh threshold - * \param[out] pabove 1 if above threshold; - * 0 if equal to or less than threshold - * \param[in] tab8 [optional] 8-bit pixel lookup table - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This sums the ON pixels and returns immediately if the count
- *          goes above threshold.  It is therefore more efficient
- *          for matching images (by running this function on the xor of
- *          the 2 images) than using pixCountPixels(), which counts all
- *          pixels before returning.
- * 
- */ -l_ok -pixThresholdPixelSum(PIX *pix, - l_int32 thresh, - l_int32 *pabove, - l_int32 *tab8) -{ -l_uint32 word, endmask; -l_int32 *tab; -l_int32 w, h, wpl, i, j; -l_int32 fullwords, endbits, sum; -l_uint32 *line, *data; - - PROCNAME("pixThresholdPixelSum"); - - if (!pabove) - return ERROR_INT("&above not defined", procName, 1); - *pabove = 0; - if (!pix || pixGetDepth(pix) != 1) - return ERROR_INT("pix not defined or not 1 bpp", procName, 1); - - tab = (tab8) ? tab8 : makePixelSumTab8(); - pixGetDimensions(pix, &w, &h, NULL); - wpl = pixGetWpl(pix); - data = pixGetData(pix); - fullwords = w >> 5; - endbits = w & 31; - endmask = 0xffffffff << (32 - endbits); - - sum = 0; - for (i = 0; i < h; i++) { - line = data + wpl * i; - for (j = 0; j < fullwords; j++) { - word = line[j]; - if (word) { - sum += tab[word & 0xff] + - tab[(word >> 8) & 0xff] + - tab[(word >> 16) & 0xff] + - tab[(word >> 24) & 0xff]; - } - } - if (endbits) { - word = line[j] & endmask; - if (word) { - sum += tab[word & 0xff] + - tab[(word >> 8) & 0xff] + - tab[(word >> 16) & 0xff] + - tab[(word >> 24) & 0xff]; - } - } - if (sum > thresh) { - *pabove = 1; - if (!tab8) LEPT_FREE(tab); - return 0; - } - } - - if (!tab8) LEPT_FREE(tab); - return 0; -} - - -/*! - * \brief makePixelSumTab8() - * - * \return table of 256 l_int32. - * - *
- * Notes:
- *      (1) This table of integers gives the number of 1 bits
- *          in the 8 bit index.
- * 
- */ -l_int32 * -makePixelSumTab8(void) -{ -l_uint8 byte; -l_int32 i; -l_int32 *tab; - - tab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - for (i = 0; i < 256; i++) { - byte = (l_uint8)i; - tab[i] = (byte & 0x1) + - ((byte >> 1) & 0x1) + - ((byte >> 2) & 0x1) + - ((byte >> 3) & 0x1) + - ((byte >> 4) & 0x1) + - ((byte >> 5) & 0x1) + - ((byte >> 6) & 0x1) + - ((byte >> 7) & 0x1); - } - return tab; -} - - -/*! - * \brief makePixelCentroidTab8() - * - * \return table of 256 l_int32. - * - *
- * Notes:
- *      (1) This table of integers gives the centroid weight of the 1 bits
- *          in the 8 bit index.  In other words, if sumtab is obtained by
- *          makePixelSumTab8, and centroidtab is obtained by
- *          makePixelCentroidTab8, then, for 1 <= i <= 255,
- *          centroidtab[i] / (float)sumtab[i]
- *          is the centroid of the 1 bits in the 8-bit index i, where the
- *          MSB is considered to have position 0 and the LSB is considered
- *          to have position 7.
- * 
- */ -l_int32 * -makePixelCentroidTab8(void) -{ -l_int32 i; -l_int32 *tab; - - tab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - tab[0] = 0; - tab[1] = 7; - for (i = 2; i < 4; i++) { - tab[i] = tab[i - 2] + 6; - } - for (i = 4; i < 8; i++) { - tab[i] = tab[i - 4] + 5; - } - for (i = 8; i < 16; i++) { - tab[i] = tab[i - 8] + 4; - } - for (i = 16; i < 32; i++) { - tab[i] = tab[i - 16] + 3; - } - for (i = 32; i < 64; i++) { - tab[i] = tab[i - 32] + 2; - } - for (i = 64; i < 128; i++) { - tab[i] = tab[i - 64] + 1; - } - for (i = 128; i < 256; i++) { - tab[i] = tab[i - 128]; - } - return tab; -} - - -/*-------------------------------------------------------------* - * Average of pixel values in gray images * - *-------------------------------------------------------------*/ -/*! - * \brief pixAverageByRow() - * - * \param[in] pix 8 or 16 bpp; no colormap - * \param[in] box [optional] clipping box for sum; can be null - * \param[in] type L_WHITE_IS_MAX, L_BLACK_IS_MAX - * \return na of pixel averages by row, or NULL on error - * - *
- * Notes:
- *      (1) To resample for a bin size different from 1, use
- *          numaUniformSampling() on the result of this function.
- *      (2) If type == L_BLACK_IS_MAX, black pixels get the maximum
- *          value (0xff for 8 bpp, 0xffff for 16 bpp) and white get 0.
- * 
- */ -NUMA * -pixAverageByRow(PIX *pix, - BOX *box, - l_int32 type) -{ -l_int32 i, j, w, h, d, wpl, xstart, xend, ystart, yend, bw, bh; -l_uint32 *line, *data; -l_float64 norm, sum; -NUMA *na; - - PROCNAME("pixAverageByRow"); - - if (!pix) - return (NUMA *)ERROR_PTR("pix not defined", procName, NULL); - pixGetDimensions(pix, &w, &h, &d); - if (d != 8 && d != 16) - return (NUMA *)ERROR_PTR("pix not 8 or 16 bpp", procName, NULL); - if (type != L_WHITE_IS_MAX && type != L_BLACK_IS_MAX) - return (NUMA *)ERROR_PTR("invalid type", procName, NULL); - if (pixGetColormap(pix) != NULL) - return (NUMA *)ERROR_PTR("pix colormapped", procName, NULL); - - if (boxClipToRectangleParams(box, w, h, &xstart, &ystart, &xend, ¥d, - &bw, &bh) == 1) - return (NUMA *)ERROR_PTR("invalid clipping box", procName, NULL); - - norm = 1. / (l_float32)bw; - if ((na = numaCreate(bh)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - numaSetParameters(na, ystart, 1); - data = pixGetData(pix); - wpl = pixGetWpl(pix); - for (i = ystart; i < yend; i++) { - sum = 0.0; - line = data + i * wpl; - if (d == 8) { - for (j = xstart; j < xend; j++) - sum += GET_DATA_BYTE(line, j); - if (type == L_BLACK_IS_MAX) - sum = bw * 255 - sum; - } else { /* d == 16 */ - for (j = xstart; j < xend; j++) - sum += GET_DATA_TWO_BYTES(line, j); - if (type == L_BLACK_IS_MAX) - sum = bw * 0xffff - sum; - } - numaAddNumber(na, (l_float32)(norm * sum)); - } - - return na; -} - - -/*! - * \brief pixAverageByColumn() - * - * \param[in] pix 8 or 16 bpp; no colormap - * \param[in] box [optional] clipping box for sum; can be null - * \param[in] type L_WHITE_IS_MAX, L_BLACK_IS_MAX - * \return na of pixel averages by column, or NULL on error - * - *
- * Notes:
- *      (1) To resample for a bin size different from 1, use
- *          numaUniformSampling() on the result of this function.
- *      (2) If type == L_BLACK_IS_MAX, black pixels get the maximum
- *          value (0xff for 8 bpp, 0xffff for 16 bpp) and white get 0.
- * 
- */ -NUMA * -pixAverageByColumn(PIX *pix, - BOX *box, - l_int32 type) -{ -l_int32 i, j, w, h, d, wpl, xstart, xend, ystart, yend, bw, bh; -l_uint32 *line, *data; -l_float32 norm, sum; -NUMA *na; - - PROCNAME("pixAverageByColumn"); - - if (!pix) - return (NUMA *)ERROR_PTR("pix not defined", procName, NULL); - pixGetDimensions(pix, &w, &h, &d); - - if (d != 8 && d != 16) - return (NUMA *)ERROR_PTR("pix not 8 or 16 bpp", procName, NULL); - if (type != L_WHITE_IS_MAX && type != L_BLACK_IS_MAX) - return (NUMA *)ERROR_PTR("invalid type", procName, NULL); - if (pixGetColormap(pix) != NULL) - return (NUMA *)ERROR_PTR("pix colormapped", procName, NULL); - - if (boxClipToRectangleParams(box, w, h, &xstart, &ystart, &xend, ¥d, - &bw, &bh) == 1) - return (NUMA *)ERROR_PTR("invalid clipping box", procName, NULL); - - if ((na = numaCreate(bw)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - numaSetParameters(na, xstart, 1); - norm = 1. / (l_float32)bh; - data = pixGetData(pix); - wpl = pixGetWpl(pix); - for (j = xstart; j < xend; j++) { - sum = 0.0; - if (d == 8) { - for (i = ystart; i < yend; i++) { - line = data + i * wpl; - sum += GET_DATA_BYTE(line, j); - } - if (type == L_BLACK_IS_MAX) - sum = bh * 255 - sum; - } else { /* d == 16 */ - for (i = ystart; i < yend; i++) { - line = data + i * wpl; - sum += GET_DATA_TWO_BYTES(line, j); - } - if (type == L_BLACK_IS_MAX) - sum = bh * 0xffff - sum; - } - numaAddNumber(na, (l_float32)(norm * sum)); - } - - return na; -} - - -/*! - * \brief pixAverageInRect() - * - * \param[in] pixs 1, 2, 4, 8 bpp; not cmapped - * \param[in] pixm [optional] 1 bpp mask; if null, use all pixels - * \param[in] box [optional] if null, use entire image - * \param[in] minval ignore values less than this - * \param[in] maxval ignore values greater than this - * \param[in] subsamp subsample factor: integer; use 1 for all pixels - * \param[out] pave average of pixel values under consideration - * \return 0 if OK; 1 on error; 2 if all pixels are filtered out - * - *
- * Notes:
- *      (1) The average is computed with 4 optional filters: a rectangle,
- *          a mask, a contiguous set of range values, and subsampling.
- *          In practice you might use only one or two of these.
- *      (2) The mask %pixm is a blocking mask: only count pixels in the bg.
- *          If it exists, alignment is assumed at UL corner and computation
- *          is over the minimum intersection of %pixs and %pixm.
- *          If you want the average of pixels under the mask fg, invert it.
- *      (3) Set the range limits %minval = 0 and %maxval = 255 to use
- *          all non-masked pixels (regardless of value) in the average.
- *      (4) If no pixels are used in the averaging, the returned average
- *          value is 0 and the function returns 2.  This is not an error,
- *          but it says to disregard the returned average value.
- *      (5) For example, to average all pixels in a given clipping rect %box,
- *              pixAverageInRect(pixs, NULL, box, 0, 255, 1, &aveval);
- * 
- */ -l_ok -pixAverageInRect(PIX *pixs, - PIX *pixm, - BOX *box, - l_int32 minval, - l_int32 maxval, - l_int32 subsamp, - l_float32 *pave) -{ -l_int32 w, h, d, wpls, wm, hm, dm, wplm, val, count; -l_int32 i, j, xstart, xend, ystart, yend; -l_uint32 *datas, *datam, *lines, *linem; -l_float32 ave; -l_float64 sum; - - PROCNAME("pixAverageInRect"); - - if (!pave) - return ERROR_INT("&ave not defined", procName, 1); - *pave = 0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetColormap(pixs) != NULL) - return ERROR_INT("pixs is colormapped", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8) - return ERROR_INT("pixs not 1, 2, 4 or 8 bpp", procName, 1); - if (pixm) { - pixGetDimensions(pixm, &wm, &hm, &dm); - if (dm != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - w = L_MIN(w, wm); - h = L_MIN(h, hm); - } - if (subsamp < 1) - return ERROR_INT("subsamp must be >= 1", procName, 1); - - if (boxClipToRectangleParams(box, w, h, &xstart, &ystart, &xend, ¥d, - NULL, NULL) == 1) - return ERROR_INT("invalid clipping box", procName, 1); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if (pixm) { - datam = pixGetData(pixm); - wplm = pixGetWpl(pixm); - } - sum = 0.0; - count = 0; - for (i = ystart; i < yend; i += subsamp) { - lines = datas + i * wpls; - if (pixm) - linem = datam + i * wplm; - for (j = xstart; j < xend; j += subsamp) { - if (pixm && (GET_DATA_BIT(linem, j) == 1)) - continue; - if (d == 1) - val = GET_DATA_BIT(lines, j); - else if (d == 2) - val = GET_DATA_DIBIT(lines, j); - else if (d == 4) - val = GET_DATA_QBIT(lines, j); - else /* d == 8 */ - val = GET_DATA_BYTE(lines, j); - if (val >= minval && val <= maxval) { - sum += val; - count++; - } - } - } - - if (count == 0) - return 2; /* not an error; don't use the average value (0.0) */ - *pave = sum / (l_float32)count; - return 0; -} - - -/*-------------------------------------------------------------* - * Average of pixel values in RGB images * - *-------------------------------------------------------------*/ -/*! - * \brief pixAverageInRectRGB() - * - * \param[in] pixs rgb; not cmapped - * \param[in] pixm [optional] 1 bpp mask; if null, use all pixels - * \param[in] box [optional] if null, use entire image - * \param[in] subsamp subsample factor: integer; use 1 for all pixels - * \param[out] pave average color of pixel values under consideration, - * in format 0xrrggbb00. - * \return 0 if OK; 1 on error; 2 if all pixels are filtered out - * - *
- * Notes:
- *      (1) The average is computed with 3 optional filters: a rectangle,
- *          a mask, and subsampling.
- *          In practice you might use only one or two of these.
- *      (2) The mask %pixm is a blocking mask: only count pixels in the bg.
- *          If it exists, alignment is assumed at UL corner and computation
- *          is over the minimum intersection of %pixs and %pixm.
- *          If you want the average of pixels under the mask fg, invert it.
- *      (3) If no pixels are used in the averaging, the returned average
- *          value is 0 and the function returns 2.  This is not an error,
- *          but it says to disregard the returned average value.
- *      (4) For example, to average all pixels in a given clipping rect %box,
- *              pixAverageInRectRGB(pixs, NULL, box, 1, &aveval);
- * 
- */ -l_ok -pixAverageInRectRGB(PIX *pixs, - PIX *pixm, - BOX *box, - l_int32 subsamp, - l_uint32 *pave) -{ -l_int32 w, h, wpls, wm, hm, dm, wplm, i, j, xstart, xend, ystart, yend; -l_int32 rval, gval, bval, rave, gave, bave, count; -l_uint32 *datas, *datam, *lines, *linem; -l_uint32 pixel; -l_float64 rsum, gsum, bsum; - - PROCNAME("pixAverageInRectRGB"); - - if (!pave) - return ERROR_INT("&ave not defined", procName, 1); - *pave = 0; - if (!pixs || pixGetDepth(pixs) != 32) - return ERROR_INT("pixs undefined or not 32 bpp", procName, 1); - pixGetDimensions(pixs, &w, &h, NULL); - if (pixm) { - pixGetDimensions(pixm, &wm, &hm, &dm); - if (dm != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - w = L_MIN(w, wm); - h = L_MIN(h, hm); - } - if (subsamp < 1) - return ERROR_INT("subsamp must be >= 1", procName, 1); - - if (boxClipToRectangleParams(box, w, h, &xstart, &ystart, &xend, ¥d, - NULL, NULL) == 1) - return ERROR_INT("invalid clipping box", procName, 1); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if (pixm) { - datam = pixGetData(pixm); - wplm = pixGetWpl(pixm); - } - rsum = gsum = bsum = 0.0; - count = 0; - for (i = ystart; i < yend; i += subsamp) { - lines = datas + i * wpls; - if (pixm) - linem = datam + i * wplm; - for (j = xstart; j < xend; j += subsamp) { - if (pixm && (GET_DATA_BIT(linem, j) == 1)) - continue; - pixel = *(lines + j); - extractRGBValues(pixel, &rval, &gval, &bval); - rsum += rval; - gsum += gval; - bsum += bval; - count++; - } - } - - if (count == 0) - return 2; /* not an error */ - rave = (l_uint32)(rsum / (l_float64)count); - gave = (l_uint32)(gsum / (l_float64)count); - bave = (l_uint32)(bsum / (l_float64)count); - composeRGBPixel(rave, gave, bave, pave); - return 0; -} - - -/*------------------------------------------------------------------* - * Variance of pixel values in gray images * - *------------------------------------------------------------------*/ -/*! - * \brief pixVarianceByRow() - * - * \param[in] pix 8 or 16 bpp; no colormap - * \param[in] box [optional] clipping box for variance; can be null - * \return na of rmsdev by row, or NULL on error - * - *
- * Notes:
- *      (1) To resample for a bin size different from 1, use
- *          numaUniformSampling() on the result of this function.
- *      (2) We are actually computing the RMS deviation in each row.
- *          This is the square root of the variance.
- * 
- */ -NUMA * -pixVarianceByRow(PIX *pix, - BOX *box) -{ -l_int32 i, j, w, h, d, wpl, xstart, xend, ystart, yend, bw, bh, val; -l_uint32 *line, *data; -l_float64 sum1, sum2, norm, ave, var, rootvar; -NUMA *na; - - PROCNAME("pixVarianceByRow"); - - if (!pix) - return (NUMA *)ERROR_PTR("pix not defined", procName, NULL); - pixGetDimensions(pix, &w, &h, &d); - if (d != 8 && d != 16) - return (NUMA *)ERROR_PTR("pix not 8 or 16 bpp", procName, NULL); - if (pixGetColormap(pix) != NULL) - return (NUMA *)ERROR_PTR("pix colormapped", procName, NULL); - - if (boxClipToRectangleParams(box, w, h, &xstart, &ystart, &xend, ¥d, - &bw, &bh) == 1) - return (NUMA *)ERROR_PTR("invalid clipping box", procName, NULL); - - if ((na = numaCreate(bh)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - numaSetParameters(na, ystart, 1); - norm = 1. / (l_float32)bw; - data = pixGetData(pix); - wpl = pixGetWpl(pix); - for (i = ystart; i < yend; i++) { - sum1 = sum2 = 0.0; - line = data + i * wpl; - for (j = xstart; j < xend; j++) { - if (d == 8) - val = GET_DATA_BYTE(line, j); - else /* d == 16 */ - val = GET_DATA_TWO_BYTES(line, j); - sum1 += val; - sum2 += (l_float64)(val) * val; - } - ave = norm * sum1; - var = norm * sum2 - ave * ave; - rootvar = sqrt(var); - numaAddNumber(na, (l_float32)rootvar); - } - - return na; -} - - -/*! - * \brief pixVarianceByColumn() - * - * \param[in] pix 8 or 16 bpp; no colormap - * \param[in] box [optional] clipping box for variance; can be null - * \return na of rmsdev by column, or NULL on error - * - *
- * Notes:
- *      (1) To resample for a bin size different from 1, use
- *          numaUniformSampling() on the result of this function.
- *      (2) We are actually computing the RMS deviation in each row.
- *          This is the square root of the variance.
- * 
- */ -NUMA * -pixVarianceByColumn(PIX *pix, - BOX *box) -{ -l_int32 i, j, w, h, d, wpl, xstart, xend, ystart, yend, bw, bh, val; -l_uint32 *line, *data; -l_float64 sum1, sum2, norm, ave, var, rootvar; -NUMA *na; - - PROCNAME("pixVarianceByColumn"); - - if (!pix) - return (NUMA *)ERROR_PTR("pix not defined", procName, NULL); - pixGetDimensions(pix, &w, &h, &d); - if (d != 8 && d != 16) - return (NUMA *)ERROR_PTR("pix not 8 or 16 bpp", procName, NULL); - if (pixGetColormap(pix) != NULL) - return (NUMA *)ERROR_PTR("pix colormapped", procName, NULL); - - if (boxClipToRectangleParams(box, w, h, &xstart, &ystart, &xend, ¥d, - &bw, &bh) == 1) - return (NUMA *)ERROR_PTR("invalid clipping box", procName, NULL); - - if ((na = numaCreate(bw)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - numaSetParameters(na, xstart, 1); - norm = 1. / (l_float32)bh; - data = pixGetData(pix); - wpl = pixGetWpl(pix); - for (j = xstart; j < xend; j++) { - sum1 = sum2 = 0.0; - for (i = ystart; i < yend; i++) { - line = data + wpl * i; - if (d == 8) - val = GET_DATA_BYTE(line, j); - else /* d == 16 */ - val = GET_DATA_TWO_BYTES(line, j); - sum1 += val; - sum2 += (l_float64)(val) * val; - } - ave = norm * sum1; - var = norm * sum2 - ave * ave; - rootvar = sqrt(var); - numaAddNumber(na, (l_float32)rootvar); - } - - return na; -} - - -/*! - * \brief pixVarianceInRect() - * - * \param[in] pix 1, 2, 4, 8 bpp; not cmapped - * \param[in] box [optional] if null, use entire image - * \param[out] prootvar sqrt variance of pixel values in region - * \return 0 if OK; 1 on error - */ -l_ok -pixVarianceInRect(PIX *pix, - BOX *box, - l_float32 *prootvar) -{ -l_int32 w, h, d, wpl, i, j, xstart, xend, ystart, yend, bw, bh, val; -l_uint32 *data, *line; -l_float64 sum1, sum2, norm, ave, var; - - PROCNAME("pixVarianceInRect"); - - if (!prootvar) - return ERROR_INT("&rootvar not defined", procName, 1); - *prootvar = 0.0; - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - pixGetDimensions(pix, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8) - return ERROR_INT("pix not 1, 2, 4 or 8 bpp", procName, 1); - if (pixGetColormap(pix) != NULL) - return ERROR_INT("pix is colormapped", procName, 1); - - if (boxClipToRectangleParams(box, w, h, &xstart, &ystart, &xend, ¥d, - &bw, &bh) == 1) - return ERROR_INT("invalid clipping box", procName, 1); - - wpl = pixGetWpl(pix); - data = pixGetData(pix); - sum1 = sum2 = 0.0; - for (i = ystart; i < yend; i++) { - line = data + i * wpl; - for (j = xstart; j < xend; j++) { - if (d == 1) { - val = GET_DATA_BIT(line, j); - sum1 += val; - sum2 += (l_float64)(val) * val; - } else if (d == 2) { - val = GET_DATA_DIBIT(line, j); - sum1 += val; - sum2 += (l_float64)(val) * val; - } else if (d == 4) { - val = GET_DATA_QBIT(line, j); - sum1 += val; - sum2 += (l_float64)(val) * val; - } else { /* d == 8 */ - val = GET_DATA_BYTE(line, j); - sum1 += val; - sum2 += (l_float64)(val) * val; - } - } - } - norm = 1.0 / ((l_float64)(bw) * bh); - ave = norm * sum1; - var = norm * sum2 - ave * ave; - *prootvar = (l_float32)sqrt(var); - return 0; -} - - -/*---------------------------------------------------------------------* - * Average of absolute value of pixel differences in gray images * - *---------------------------------------------------------------------*/ -/*! - * \brief pixAbsDiffByRow() - * - * \param[in] pix 8 bpp; no colormap - * \param[in] box [optional] clipping box for region; can be null - * \return na of abs val pixel difference averages by row, or NULL on error - * - *
- * Notes:
- *      (1) This is an average over differences of adjacent pixels along
- *          each row.
- *      (2) To resample for a bin size different from 1, use
- *          numaUniformSampling() on the result of this function.
- * 
- */ -NUMA * -pixAbsDiffByRow(PIX *pix, - BOX *box) -{ -l_int32 i, j, w, h, wpl, xstart, xend, ystart, yend, bw, bh, val0, val1; -l_uint32 *line, *data; -l_float64 norm, sum; -NUMA *na; - - PROCNAME("pixAbsDiffByRow"); - - if (!pix || pixGetDepth(pix) != 8) - return (NUMA *)ERROR_PTR("pix undefined or not 8 bpp", procName, NULL); - if (pixGetColormap(pix) != NULL) - return (NUMA *)ERROR_PTR("pix colormapped", procName, NULL); - - pixGetDimensions(pix, &w, &h, NULL); - if (boxClipToRectangleParams(box, w, h, &xstart, &ystart, &xend, ¥d, - &bw, &bh) == 1) - return (NUMA *)ERROR_PTR("invalid clipping box", procName, NULL); - if (bw < 2) - return (NUMA *)ERROR_PTR("row width must be >= 2", procName, NULL); - - norm = 1. / (l_float32)(bw - 1); - if ((na = numaCreate(bh)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - numaSetParameters(na, ystart, 1); - data = pixGetData(pix); - wpl = pixGetWpl(pix); - for (i = ystart; i < yend; i++) { - sum = 0.0; - line = data + i * wpl; - val0 = GET_DATA_BYTE(line, xstart); - for (j = xstart + 1; j < xend; j++) { - val1 = GET_DATA_BYTE(line, j); - sum += L_ABS(val1 - val0); - val0 = val1; - } - numaAddNumber(na, (l_float32)(norm * sum)); - } - - return na; -} - - -/*! - * \brief pixAbsDiffByColumn() - * - * \param[in] pix 8 bpp; no colormap - * \param[in] box [optional] clipping box for region; can be null - * \return na of abs val pixel difference averages by column, - * or NULL on error - * - *
- * Notes:
- *      (1) This is an average over differences of adjacent pixels along
- *          each column.
- *      (2) To resample for a bin size different from 1, use
- *          numaUniformSampling() on the result of this function.
- * 
- */ -NUMA * -pixAbsDiffByColumn(PIX *pix, - BOX *box) -{ -l_int32 i, j, w, h, wpl, xstart, xend, ystart, yend, bw, bh, val0, val1; -l_uint32 *line, *data; -l_float64 norm, sum; -NUMA *na; - - PROCNAME("pixAbsDiffByColumn"); - - if (!pix || pixGetDepth(pix) != 8) - return (NUMA *)ERROR_PTR("pix undefined or not 8 bpp", procName, NULL); - if (pixGetColormap(pix) != NULL) - return (NUMA *)ERROR_PTR("pix colormapped", procName, NULL); - - pixGetDimensions(pix, &w, &h, NULL); - if (boxClipToRectangleParams(box, w, h, &xstart, &ystart, &xend, ¥d, - &bw, &bh) == 1) - return (NUMA *)ERROR_PTR("invalid clipping box", procName, NULL); - if (bh < 2) - return (NUMA *)ERROR_PTR("column height must be >= 2", procName, NULL); - - norm = 1. / (l_float32)(bh - 1); - if ((na = numaCreate(bw)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - numaSetParameters(na, xstart, 1); - data = pixGetData(pix); - wpl = pixGetWpl(pix); - for (j = xstart; j < xend; j++) { - sum = 0.0; - line = data + ystart * wpl; - val0 = GET_DATA_BYTE(line, j); - for (i = ystart + 1; i < yend; i++) { - line = data + i * wpl; - val1 = GET_DATA_BYTE(line, j); - sum += L_ABS(val1 - val0); - val0 = val1; - } - numaAddNumber(na, (l_float32)(norm * sum)); - } - - return na; -} - - -/*! - * \brief pixAbsDiffInRect() - * - * \param[in] pix 8 bpp; not cmapped - * \param[in] box [optional] if null, use entire image - * \param[in] dir differences along L_HORIZONTAL_LINE or L_VERTICAL_LINE - * \param[out] pabsdiff average of abs diff pixel values in region - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This gives the average over the abs val of differences of
- *          adjacent pixels values, along either each
- *             row:     dir == L_HORIZONTAL_LINE
- *             column:  dir == L_VERTICAL_LINE
- * 
- */ -l_ok -pixAbsDiffInRect(PIX *pix, - BOX *box, - l_int32 dir, - l_float32 *pabsdiff) -{ -l_int32 w, h, wpl, i, j, xstart, xend, ystart, yend, bw, bh, val0, val1; -l_uint32 *data, *line; -l_float64 norm, sum; - - PROCNAME("pixAbsDiffInRect"); - - if (!pabsdiff) - return ERROR_INT("&absdiff not defined", procName, 1); - *pabsdiff = 0.0; - if (!pix || pixGetDepth(pix) != 8) - return ERROR_INT("pix undefined or not 8 bpp", procName, 1); - if (dir != L_HORIZONTAL_LINE && dir != L_VERTICAL_LINE) - return ERROR_INT("invalid direction", procName, 1); - if (pixGetColormap(pix) != NULL) - return ERROR_INT("pix is colormapped", procName, 1); - - pixGetDimensions(pix, &w, &h, NULL); - if (boxClipToRectangleParams(box, w, h, &xstart, &ystart, &xend, ¥d, - &bw, &bh) == 1) - return ERROR_INT("invalid clipping box", procName, 1); - - wpl = pixGetWpl(pix); - data = pixGetData(pix); - if (dir == L_HORIZONTAL_LINE) { - norm = 1. / (l_float32)(bh * (bw - 1)); - sum = 0.0; - for (i = ystart; i < yend; i++) { - line = data + i * wpl; - val0 = GET_DATA_BYTE(line, xstart); - for (j = xstart + 1; j < xend; j++) { - val1 = GET_DATA_BYTE(line, j); - sum += L_ABS(val1 - val0); - val0 = val1; - } - } - } else { /* vertical line */ - norm = 1. / (l_float32)(bw * (bh - 1)); - sum = 0.0; - for (j = xstart; j < xend; j++) { - line = data + ystart * wpl; - val0 = GET_DATA_BYTE(line, j); - for (i = ystart + 1; i < yend; i++) { - line = data + i * wpl; - val1 = GET_DATA_BYTE(line, j); - sum += L_ABS(val1 - val0); - val0 = val1; - } - } - } - *pabsdiff = (l_float32)(norm * sum); - return 0; -} - - -/*! - * \brief pixAbsDiffOnLine() - * - * \param[in] pix 8 bpp; not cmapped - * \param[in] x1, y1 first point; x1 <= x2, y1 <= y2 - * \param[in] x2, y2 first point - * \param[out] pabsdiff average of abs diff pixel values on line - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This gives the average over the abs val of differences of
- *          adjacent pixels values, along a line that is either horizontal
- *          or vertical.
- *      (2) If horizontal, require x1 < x2; if vertical, require y1 < y2.
- * 
- */ -l_ok -pixAbsDiffOnLine(PIX *pix, - l_int32 x1, - l_int32 y1, - l_int32 x2, - l_int32 y2, - l_float32 *pabsdiff) -{ -l_int32 w, h, i, j, dir, size, sum; -l_uint32 val0, val1; - - PROCNAME("pixAbsDiffOnLine"); - - if (!pabsdiff) - return ERROR_INT("&absdiff not defined", procName, 1); - *pabsdiff = 0.0; - if (!pix || pixGetDepth(pix) != 8) - return ERROR_INT("pix undefined or not 8 bpp", procName, 1); - if (y1 == y2) { - dir = L_HORIZONTAL_LINE; - } else if (x1 == x2) { - dir = L_VERTICAL_LINE; - } else { - return ERROR_INT("line is neither horiz nor vert", procName, 1); - } - if (pixGetColormap(pix) != NULL) - return ERROR_INT("pix is colormapped", procName, 1); - - pixGetDimensions(pix, &w, &h, NULL); - sum = 0; - if (dir == L_HORIZONTAL_LINE) { - x1 = L_MAX(x1, 0); - x2 = L_MIN(x2, w - 1); - if (x1 >= x2) - return ERROR_INT("x1 >= x2", procName, 1); - size = x2 - x1; - pixGetPixel(pix, x1, y1, &val0); - for (j = x1 + 1; j <= x2; j++) { - pixGetPixel(pix, j, y1, &val1); - sum += L_ABS((l_int32)val1 - (l_int32)val0); - val0 = val1; - } - } else { /* vertical */ - y1 = L_MAX(y1, 0); - y2 = L_MIN(y2, h - 1); - if (y1 >= y2) - return ERROR_INT("y1 >= y2", procName, 1); - size = y2 - y1; - pixGetPixel(pix, x1, y1, &val0); - for (i = y1 + 1; i <= y2; i++) { - pixGetPixel(pix, x1, i, &val1); - sum += L_ABS((l_int32)val1 - (l_int32)val0); - val0 = val1; - } - } - *pabsdiff = (l_float32)sum / (l_float32)size; - return 0; -} - - -/*-------------------------------------------------------------* - * Count of pixels with specific value * - *-------------------------------------------------------------*/ -/*! - * \brief pixCountArbInRect() - * - * \param[in] pixs 8 bpp, or colormapped - * \param[in] box [optional] over which count is made; - * use entire image if NULL - * \param[in] val pixel value to count - * \param[in] factor subsampling factor; integer >= 1 - * \param[out] pcount count; estimate it if factor > 1 - * \return na histogram, or NULL on error - * - *
- * Notes:
- *      (1) If pixs is cmapped, %val is compared to the colormap index;
- *          otherwise, %val is compared to the grayscale value.
- *      (2) Set the subsampling %factor > 1 to reduce the amount of computation.
- *          If %factor > 1, multiply the count by %factor * %factor.
- * 
- */ -l_int32 -pixCountArbInRect(PIX *pixs, - BOX *box, - l_int32 val, - l_int32 factor, - l_int32 *pcount) -{ -l_int32 i, j, bx, by, bw, bh, w, h, wpl, pixval; -l_uint32 *data, *line; - - PROCNAME("pixCountArbInRect"); - - if (!pcount) - return ERROR_INT("&count not defined", procName, 1); - *pcount = 0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetDepth(pixs) != 8 && !pixGetColormap(pixs)) - return ERROR_INT("pixs neither 8 bpp nor colormapped", - procName, 1); - if (factor < 1) - return ERROR_INT("sampling factor < 1", procName, 1); - - pixGetDimensions(pixs, &w, &h, NULL); - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - - if (!box) { - for (i = 0; i < h; i += factor) { - line = data + i * wpl; - for (j = 0; j < w; j += factor) { - pixval = GET_DATA_BYTE(line, j); - if (pixval == val) (*pcount)++; - } - } - } else { - boxGetGeometry(box, &bx, &by, &bw, &bh); - for (i = 0; i < bh; i += factor) { - if (by + i < 0 || by + i >= h) continue; - line = data + (by + i) * wpl; - for (j = 0; j < bw; j += factor) { - if (bx + j < 0 || bx + j >= w) continue; - pixval = GET_DATA_BYTE(line, bx + j); - if (pixval == val) (*pcount)++; - } - } - } - - if (factor > 1) /* assume pixel color is randomly distributed */ - *pcount = *pcount * factor * factor; - return 0; -} - - -/*-------------------------------------------------------------* - * Mirrored tiling of a smaller image * - *-------------------------------------------------------------*/ -/*! - * \brief pixMirroredTiling() - * - * \param[in] pixs 8 or 32 bpp, small tile; to be replicated - * \param[in] w, h dimensions of output pix - * \return pixd usually larger pix, mirror-tiled with pixs, - * or NULL on error - * - *
- * Notes:
- *      (1) This uses mirrored tiling, where each row alternates
- *          with LR flips and every column alternates with TB
- *          flips, such that the result is a tiling with identical
- *          2 x 2 tiles, each of which is composed of these transforms:
- *                  -----------------
- *                  | 1    |  LR    |
- *                  -----------------
- *                  | TB   |  LR/TB |
- *                  -----------------
- * 
- */ -PIX * -pixMirroredTiling(PIX *pixs, - l_int32 w, - l_int32 h) -{ -l_int32 wt, ht, d, i, j, nx, ny; -PIX *pixd, *pixsfx, *pixsfy, *pixsfxy, *pix; - - PROCNAME("pixMirroredTiling"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &wt, &ht, &d); - if (wt <= 0 || ht <= 0) - return (PIX *)ERROR_PTR("pixs size illegal", procName, NULL); - if (d != 8 && d != 32) - return (PIX *)ERROR_PTR("depth not 32 bpp", procName, NULL); - - if ((pixd = pixCreate(w, h, d)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopySpp(pixd, pixs); - - nx = (w + wt - 1) / wt; - ny = (h + ht - 1) / ht; - pixsfx = pixFlipLR(NULL, pixs); - pixsfy = pixFlipTB(NULL, pixs); - pixsfxy = pixFlipTB(NULL, pixsfx); - for (i = 0; i < ny; i++) { - for (j = 0; j < nx; j++) { - pix = pixs; - if ((i & 1) && !(j & 1)) - pix = pixsfy; - else if (!(i & 1) && (j & 1)) - pix = pixsfx; - else if ((i & 1) && (j & 1)) - pix = pixsfxy; - pixRasterop(pixd, j * wt, i * ht, wt, ht, PIX_SRC, pix, 0, 0); - } - } - - pixDestroy(&pixsfx); - pixDestroy(&pixsfy); - pixDestroy(&pixsfxy); - return pixd; -} - - -/*! - * \brief pixFindRepCloseTile() - * - * \param[in] pixs 32 bpp rgb - * \param[in] box region of pixs to search around - * \param[in] searchdir L_HORIZ or L_VERT; direction to search - * \param[in] mindist min distance of selected tile edge from box; >= 0 - * \param[in] tsize tile size; > 1; even; typically ~50 - * \param[in] ntiles number of tiles tested in each row/column - * \param[out] pboxtile region of best tile - * \param[in] debug 1 for debug output - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This looks for one or two square tiles with conforming median
- *          intensity and low variance, that is outside but near the input box.
- *      (2) %mindist specifies the gap between the box and the
- *          potential tiles.  The tiles are given an overlap of 50%.
- *          %ntiles specifies the number of tiles that are tested
- *          beyond %mindist for each row or column.
- *      (3) For example, if %mindist = 20, %tilesize = 50 and %ntiles = 3,
- *          a horizontal search to the right will have 3 tiles in each row,
- *          with left edges at 20, 45 and 70 from the right edge of the
- *          input %box.  The number of rows of tiles is determined by
- *          the height of %box and %tsize, with the 50% overlap..
- * 
- */ -l_ok -pixFindRepCloseTile(PIX *pixs, - BOX *box, - l_int32 searchdir, - l_int32 mindist, - l_int32 tsize, - l_int32 ntiles, - BOX **pboxtile, - l_int32 debug) -{ -l_int32 w, h, i, n, bestindex; -l_float32 var_of_mean, median_of_mean, median_of_stdev, mean_val, stdev_val; -l_float32 mindels, bestdelm, delm, dels, mean, stdev; -BOXA *boxa; -NUMA *namean, *nastdev; -PIX *pix, *pixg; -PIXA *pixa; - - PROCNAME("pixFindRepCloseTile"); - - if (!pboxtile) - return ERROR_INT("&boxtile not defined", procName, 1); - *pboxtile = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (searchdir != L_HORIZ && searchdir != L_VERT) - return ERROR_INT("invalid searchdir", procName, 1); - if (mindist < 0) - return ERROR_INT("mindist must be >= 0", procName, 1); - if (tsize < 2) - return ERROR_INT("tsize must be > 1", procName, 1); - if (ntiles > 7) { - L_WARNING("ntiles = %d; larger than suggested max of 7\n", - procName, ntiles); - } - - /* Locate tile regions */ - pixGetDimensions(pixs, &w, &h, NULL); - boxa = findTileRegionsForSearch(box, w, h, searchdir, mindist, - tsize, ntiles); - if (!boxa) - return ERROR_INT("no tiles found", procName, 1); - - /* Generate the tiles and the mean and stdev of intensity */ - pixa = pixClipRectangles(pixs, boxa); - n = pixaGetCount(pixa); - namean = numaCreate(n); - nastdev = numaCreate(n); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - pixg = pixConvertRGBToGray(pix, 0.33, 0.34, 0.33); - pixGetAverageMasked(pixg, NULL, 0, 0, 1, L_MEAN_ABSVAL, &mean); - pixGetAverageMasked(pixg, NULL, 0, 0, 1, L_STANDARD_DEVIATION, &stdev); - numaAddNumber(namean, mean); - numaAddNumber(nastdev, stdev); - pixDestroy(&pix); - pixDestroy(&pixg); - } - - /* Find the median and variance of the averages. We require - * the best tile to have a mean pixel intensity within a standard - * deviation of the median of mean intensities, and choose the - * tile in that set with the smallest stdev of pixel intensities - * (as a proxy for the tile with least visible structure). - * The median of the stdev is used, for debugging, as a normalizing - * factor for the stdev of intensities within a tile. */ - numaGetStatsUsingHistogram(namean, 256, NULL, NULL, NULL, &var_of_mean, - &median_of_mean, 0.0, NULL, NULL); - numaGetStatsUsingHistogram(nastdev, 256, NULL, NULL, NULL, NULL, - &median_of_stdev, 0.0, NULL, NULL); - mindels = 1000.0; - bestdelm = 1000.0; - bestindex = 0; - for (i = 0; i < n; i++) { - numaGetFValue(namean, i, &mean_val); - numaGetFValue(nastdev, i, &stdev_val); - if (var_of_mean == 0.0) { /* uniform color; any box will do */ - delm = 0.0; /* any value < 1.01 */ - dels = 1.0; /* n'importe quoi */ - } else { - delm = L_ABS(mean_val - median_of_mean) / sqrt(var_of_mean); - dels = stdev_val / median_of_stdev; - } - if (delm < 1.01) { - if (dels < mindels) { - if (debug) { - lept_stderr("i = %d, mean = %7.3f, delm = %7.3f," - " stdev = %7.3f, dels = %7.3f\n", - i, mean_val, delm, stdev_val, dels); - } - mindels = dels; - bestdelm = delm; - bestindex = i; - } - } - } - *pboxtile = boxaGetBox(boxa, bestindex, L_COPY); - - if (debug) { - L_INFO("median of mean = %7.3f\n", procName, median_of_mean); - L_INFO("standard dev of mean = %7.3f\n", procName, sqrt(var_of_mean)); - L_INFO("median of stdev = %7.3f\n", procName, median_of_stdev); - L_INFO("best tile: index = %d\n", procName, bestindex); - L_INFO("delta from median in units of stdev = %5.3f\n", - procName, bestdelm); - L_INFO("stdev as fraction of median stdev = %5.3f\n", - procName, mindels); - } - - numaDestroy(&namean); - numaDestroy(&nastdev); - pixaDestroy(&pixa); - boxaDestroy(&boxa); - return 0; -} - - -/*! - * \brief findTileRegionsForSearch() - * - * \param[in] box region of Pix to search around - * \param[in] w, h dimensions of Pix - * \param[in] searchdir L_HORIZ or L_VERT; direction to search - * \param[in] mindist min distance of selected tile edge from box; >= 0 - * \param[in] tsize tile size; > 1; even; typically ~50 - * \param[in] ntiles number of tiles tested in each row/column - * \return boxa if OK, or NULL on error - * - *
- * Notes:
- *      (1) See calling function pixfindRepCloseTile().
- * 
- */ -static BOXA * -findTileRegionsForSearch(BOX *box, - l_int32 w, - l_int32 h, - l_int32 searchdir, - l_int32 mindist, - l_int32 tsize, - l_int32 ntiles) -{ -l_int32 bx, by, bw, bh, left, right, top, bot, i, j, nrows, ncols; -l_int32 x0, y0, x, y, w_avail, w_needed, h_avail, h_needed, t_avail; -BOX *box1; -BOXA *boxa; - - PROCNAME("findTileRegionsForSearch"); - - if (!box) - return (BOXA *)ERROR_PTR("box not defined", procName, NULL); - if (ntiles == 0) - return (BOXA *)ERROR_PTR("no tiles requested", procName, NULL); - - boxGetGeometry(box, &bx, &by, &bw, &bh); - if (searchdir == L_HORIZ) { - /* Find the tile parameters for the search. Note that the - * tiles are overlapping by 50% in each direction. */ - left = bx; /* distance to left of box */ - right = w - bx - bw + 1; /* distance to right of box */ - w_avail = L_MAX(left, right) - mindist; - if (tsize & 1) tsize++; /* be sure it's even */ - if (w_avail < tsize) { - L_ERROR("tsize = %d, w_avail = %d\n", procName, tsize, w_avail); - return NULL; - } - w_needed = tsize + (ntiles - 1) * (tsize / 2); - if (w_needed > w_avail) { - t_avail = 1 + 2 * (w_avail - tsize) / tsize; - L_WARNING("ntiles = %d; room for only %d\n", procName, - ntiles, t_avail); - ntiles = t_avail; - w_needed = tsize + (ntiles - 1) * (tsize / 2); - } - nrows = L_MAX(1, 1 + 2 * (bh - tsize) / tsize); - - /* Generate the tile regions to search */ - boxa = boxaCreate(0); - if (left > right) /* search to left */ - x0 = bx - w_needed; - else /* search to right */ - x0 = bx + bw + mindist; - for (i = 0; i < nrows; i++) { - y = by + i * tsize / 2; - for (j = 0; j < ntiles; j++) { - x = x0 + j * tsize / 2; - box1 = boxCreate(x, y, tsize, tsize); - boxaAddBox(boxa, box1, L_INSERT); - } - } - } else { /* L_VERT */ - /* Find the tile parameters for the search */ - top = by; /* distance above box */ - bot = h - by - bh + 1; /* distance below box */ - h_avail = L_MAX(top, bot) - mindist; - if (h_avail < tsize) { - L_ERROR("tsize = %d, h_avail = %d\n", procName, tsize, h_avail); - return NULL; - } - h_needed = tsize + (ntiles - 1) * (tsize / 2); - if (h_needed > h_avail) { - t_avail = 1 + 2 * (h_avail - tsize) / tsize; - L_WARNING("ntiles = %d; room for only %d\n", procName, - ntiles, t_avail); - ntiles = t_avail; - h_needed = tsize + (ntiles - 1) * (tsize / 2); - } - ncols = L_MAX(1, 1 + 2 * (bw - tsize) / tsize); - - /* Generate the tile regions to search */ - boxa = boxaCreate(0); - if (top > bot) /* search above */ - y0 = by - h_needed; - else /* search below */ - y0 = by + bh + mindist; - for (j = 0; j < ncols; j++) { - x = bx + j * tsize / 2; - for (i = 0; i < ntiles; i++) { - y = y0 + i * tsize / 2; - box1 = boxCreate(x, y, tsize, tsize); - boxaAddBox(boxa, box1, L_INSERT); - } - } - } - return boxa; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix4.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix4.c deleted file mode 100644 index eb40bd9c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix4.c +++ /dev/null @@ -1,3460 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pix4.c - *
- *
- *    This file has these operations:
- *
- *      (1) Pixel histograms
- *      (2) Pixel row/column statistics
- *      (3) Foreground/background estimation
- *
- *    Pixel histogram, rank val, averaging and min/max
- *           NUMA       *pixGetGrayHistogram()
- *           NUMA       *pixGetGrayHistogramMasked()
- *           NUMA       *pixGetGrayHistogramInRect()
- *           NUMAA      *pixGetGrayHistogramTiled()
- *           l_int32     pixGetColorHistogram()
- *           l_int32     pixGetColorHistogramMasked()
- *           NUMA       *pixGetCmapHistogram()
- *           NUMA       *pixGetCmapHistogramMasked()
- *           NUMA       *pixGetCmapHistogramInRect()
- *           l_int32     pixCountRGBColors()
- *           L_AMAP     *pixGetColorAmapHistogram()
- *           l_int32     amapGetCountForColor()
- *           l_int32     pixGetRankValue()
- *           l_int32     pixGetRankValueMaskedRGB()
- *           l_int32     pixGetRankValueMasked()
- *           l_int32     pixGetPixelAverage()
- *           l_int32     pixGetPixelStats()
- *           l_int32     pixGetAverageMaskedRGB()
- *           l_int32     pixGetAverageMasked()
- *           l_int32     pixGetAverageTiledRGB()
- *           PIX        *pixGetAverageTiled()
- *           NUMA       *pixRowStats()
- *           NUMA       *pixColumnStats()
- *           l_int32     pixGetRangeValues()
- *           l_int32     pixGetExtremeValue()
- *           l_int32     pixGetMaxValueInRect()
- *           l_int32     pixGetBinnedComponentRange()
- *           l_int32     pixGetRankColorArray()
- *           l_int32     pixGetBinnedColor()
- *           PIX        *pixDisplayColorArray()
- *           PIX        *pixRankBinByStrip()
- *
- *    Pixelwise aligned statistics
- *           PIX        *pixaGetAlignedStats()
- *           l_int32     pixaExtractColumnFromEachPix()
- *           l_int32     pixGetRowStats()
- *           l_int32     pixGetColumnStats()
- *           l_int32     pixSetPixelColumn()
- *
- *    Foreground/background estimation
- *           l_int32     pixThresholdForFgBg()
- *           l_int32     pixSplitDistributionFgBg()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - - -/*------------------------------------------------------------------* - * Pixel histogram and averaging * - *------------------------------------------------------------------*/ -/*! - * \brief pixGetGrayHistogram() - * - * \param[in] pixs 1, 2, 4, 8, 16 bpp; can be colormapped - * \param[in] factor subsampling factor; integer >= 1 - * \return na histogram, or NULL on error - * - *
- * Notes:
- *      (1) If pixs has a colormap, it is converted to 8 bpp gray.
- *          If you want a histogram of the colormap indices, use
- *          pixGetCmapHistogram().
- *      (2) If pixs does not have a colormap, the output histogram is
- *          of size 2^d, where d is the depth of pixs.
- *      (3) Set the subsampling factor > 1 to reduce the amount of computation.
- * 
- */ -NUMA * -pixGetGrayHistogram(PIX *pixs, - l_int32 factor) -{ -l_int32 i, j, w, h, d, wpl, val, size, count; -l_uint32 *data, *line; -l_float32 *array; -NUMA *na; -PIX *pixg; - - PROCNAME("pixGetGrayHistogram"); - - if (!pixs) - return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d > 16) - return (NUMA *)ERROR_PTR("depth not in {1,2,4,8,16}", procName, NULL); - if (factor < 1) - return (NUMA *)ERROR_PTR("sampling must be >= 1", procName, NULL); - - if (pixGetColormap(pixs)) - pixg = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - else - pixg = pixClone(pixs); - - pixGetDimensions(pixg, &w, &h, &d); - size = 1 << d; - if ((na = numaCreate(size)) == NULL) { - pixDestroy(&pixg); - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - } - numaSetCount(na, size); /* all initialized to 0.0 */ - array = numaGetFArray(na, L_NOCOPY); - - if (d == 1) { /* special case */ - pixCountPixels(pixg, &count, NULL); - array[0] = w * h - count; - array[1] = count; - pixDestroy(&pixg); - return na; - } - - wpl = pixGetWpl(pixg); - data = pixGetData(pixg); - for (i = 0; i < h; i += factor) { - line = data + i * wpl; - if (d == 2) { - for (j = 0; j < w; j += factor) { - val = GET_DATA_DIBIT(line, j); - array[val] += 1.0; - } - } else if (d == 4) { - for (j = 0; j < w; j += factor) { - val = GET_DATA_QBIT(line, j); - array[val] += 1.0; - } - } else if (d == 8) { - for (j = 0; j < w; j += factor) { - val = GET_DATA_BYTE(line, j); - array[val] += 1.0; - } - } else { /* d == 16 */ - for (j = 0; j < w; j += factor) { - val = GET_DATA_TWO_BYTES(line, j); - array[val] += 1.0; - } - } - } - - pixDestroy(&pixg); - return na; -} - - -/*! - * \brief pixGetGrayHistogramMasked() - * - * \param[in] pixs 8 bpp, or colormapped - * \param[in] pixm [optional] 1 bpp mask over which histogram is - * to be computed; use all pixels if null - * \param[in] x, y UL corner of pixm relative to the UL corner of pixs; - * can be < 0; these values are ignored if pixm is null - * \param[in] factor subsampling factor; integer >= 1 - * \return na histogram, or NULL on error - * - *
- * Notes:
- *      (1) If pixs is cmapped, it is converted to 8 bpp gray.
- *          If you want a histogram of the colormap indices, use
- *          pixGetCmapHistogramMasked().
- *      (2) This always returns a 256-value histogram of pixel values.
- *      (3) Set the subsampling factor > 1 to reduce the amount of computation.
- *      (4) Clipping of pixm (if it exists) to pixs is done in the inner loop.
- *      (5) Input x,y are ignored unless pixm exists.
- * 
- */ -NUMA * -pixGetGrayHistogramMasked(PIX *pixs, - PIX *pixm, - l_int32 x, - l_int32 y, - l_int32 factor) -{ -l_int32 i, j, w, h, wm, hm, dm, wplg, wplm, val; -l_uint32 *datag, *datam, *lineg, *linem; -l_float32 *array; -NUMA *na; -PIX *pixg; - - PROCNAME("pixGetGrayHistogramMasked"); - - if (!pixm) - return pixGetGrayHistogram(pixs, factor); - if (!pixs) - return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8 && !pixGetColormap(pixs)) - return (NUMA *)ERROR_PTR("pixs neither 8 bpp nor colormapped", - procName, NULL); - pixGetDimensions(pixm, &wm, &hm, &dm); - if (dm != 1) - return (NUMA *)ERROR_PTR("pixm not 1 bpp", procName, NULL); - if (factor < 1) - return (NUMA *)ERROR_PTR("sampling must be >= 1", procName, NULL); - - if ((na = numaCreate(256)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - numaSetCount(na, 256); /* all initialized to 0.0 */ - array = numaGetFArray(na, L_NOCOPY); - - if (pixGetColormap(pixs)) - pixg = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - else - pixg = pixClone(pixs); - pixGetDimensions(pixg, &w, &h, NULL); - datag = pixGetData(pixg); - wplg = pixGetWpl(pixg); - datam = pixGetData(pixm); - wplm = pixGetWpl(pixm); - - /* Generate the histogram */ - for (i = 0; i < hm; i += factor) { - if (y + i < 0 || y + i >= h) continue; - lineg = datag + (y + i) * wplg; - linem = datam + i * wplm; - for (j = 0; j < wm; j += factor) { - if (x + j < 0 || x + j >= w) continue; - if (GET_DATA_BIT(linem, j)) { - val = GET_DATA_BYTE(lineg, x + j); - array[val] += 1.0; - } - } - } - - pixDestroy(&pixg); - return na; -} - - -/*! - * \brief pixGetGrayHistogramInRect() - * - * \param[in] pixs 8 bpp, or colormapped - * \param[in] box [optional] over which histogram is to be computed; - * use full image if NULL - * \param[in] factor subsampling factor; integer >= 1 - * \return na histogram, or NULL on error - * - *
- * Notes:
- *      (1) If pixs is cmapped, it is converted to 8 bpp gray.
- *          If you want a histogram of the colormap indices, use
- *          pixGetCmapHistogramInRect().
- *      (2) This always returns a 256-value histogram of pixel values.
- *      (3) Set the subsampling %factor > 1 to reduce the amount of computation.
- * 
- */ -NUMA * -pixGetGrayHistogramInRect(PIX *pixs, - BOX *box, - l_int32 factor) -{ -l_int32 i, j, bx, by, bw, bh, w, h, wplg, val; -l_uint32 *datag, *lineg; -l_float32 *array; -NUMA *na; -PIX *pixg; - - PROCNAME("pixGetGrayHistogramInRect"); - - if (!box) - return pixGetGrayHistogram(pixs, factor); - if (!pixs) - return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8 && !pixGetColormap(pixs)) - return (NUMA *)ERROR_PTR("pixs neither 8 bpp nor colormapped", - procName, NULL); - if (factor < 1) - return (NUMA *)ERROR_PTR("sampling must be >= 1", procName, NULL); - - if ((na = numaCreate(256)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - numaSetCount(na, 256); /* all initialized to 0.0 */ - array = numaGetFArray(na, L_NOCOPY); - - if (pixGetColormap(pixs)) - pixg = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - else - pixg = pixClone(pixs); - pixGetDimensions(pixg, &w, &h, NULL); - datag = pixGetData(pixg); - wplg = pixGetWpl(pixg); - boxGetGeometry(box, &bx, &by, &bw, &bh); - - /* Generate the histogram */ - for (i = 0; i < bh; i += factor) { - if (by + i < 0 || by + i >= h) continue; - lineg = datag + (by + i) * wplg; - for (j = 0; j < bw; j += factor) { - if (bx + j < 0 || bx + j >= w) continue; - val = GET_DATA_BYTE(lineg, bx + j); - array[val] += 1.0; - } - } - - pixDestroy(&pixg); - return na; -} - - -/*! - * \brief pixGetGrayHistogramTiled() - * - * \param[in] pixs any depth, colormap OK - * \param[in] factor subsampling factor; integer >= 1 - * \param[in] nx, ny tiling; >= 1; typically small - * \return naa set of histograms, or NULL on error - * - *
- * Notes:
- *      (1) If pixs is cmapped, it is converted to 8 bpp gray.
- *      (2) This returns a set of 256-value histograms of pixel values.
- *      (3) Set the subsampling factor > 1 to reduce the amount of computation.
- * 
- */ -NUMAA * -pixGetGrayHistogramTiled(PIX *pixs, - l_int32 factor, - l_int32 nx, - l_int32 ny) -{ -l_int32 i, n; -NUMA *na; -NUMAA *naa; -PIX *pix1, *pix2; -PIXA *pixa; - - PROCNAME("pixGetGrayHistogramTiled"); - - if (!pixs) - return (NUMAA *)ERROR_PTR("pixs not defined", procName, NULL); - if (factor < 1) - return (NUMAA *)ERROR_PTR("sampling must be >= 1", procName, NULL); - if (nx < 1 || ny < 1) - return (NUMAA *)ERROR_PTR("nx and ny must both be > 0", procName, NULL); - - n = nx * ny; - if ((naa = numaaCreate(n)) == NULL) - return (NUMAA *)ERROR_PTR("naa not made", procName, NULL); - - pix1 = pixConvertTo8(pixs, FALSE); - pixa = pixaSplitPix(pix1, nx, ny, 0, 0); - for (i = 0; i < n; i++) { - pix2 = pixaGetPix(pixa, i, L_CLONE); - na = pixGetGrayHistogram(pix2, factor); - numaaAddNuma(naa, na, L_INSERT); - pixDestroy(&pix2); - } - - pixDestroy(&pix1); - pixaDestroy(&pixa); - return naa; -} - - -/*! - * \brief pixGetColorHistogram() - * - * \param[in] pixs rgb or colormapped - * \param[in] factor subsampling factor; integer >= 1 - * \param[out] pnar red histogram - * \param[out] pnag green histogram - * \param[out] pnab blue histogram - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates a set of three 256 entry histograms,
- *          one for each color component (r,g,b).
- *      (2) Set the subsampling %factor > 1 to reduce the amount of computation.
- * 
- */ -l_ok -pixGetColorHistogram(PIX *pixs, - l_int32 factor, - NUMA **pnar, - NUMA **pnag, - NUMA **pnab) -{ -l_int32 i, j, w, h, d, wpl, index, rval, gval, bval; -l_uint32 *data, *line; -l_float32 *rarray, *garray, *barray; -NUMA *nar, *nag, *nab; -PIXCMAP *cmap; - - PROCNAME("pixGetColorHistogram"); - - if (pnar) *pnar = NULL; - if (pnag) *pnag = NULL; - if (pnab) *pnab = NULL; - if (!pnar || !pnag || !pnab) - return ERROR_INT("&nar, &nag, &nab not all defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - cmap = pixGetColormap(pixs); - if (cmap && (d != 2 && d != 4 && d != 8)) - return ERROR_INT("colormap and not 2, 4, or 8 bpp", procName, 1); - if (!cmap && d != 32) - return ERROR_INT("no colormap and not rgb", procName, 1); - if (factor < 1) - return ERROR_INT("sampling factor must be >= 1", procName, 1); - - /* Set up the histogram arrays */ - nar = numaCreate(256); - nag = numaCreate(256); - nab = numaCreate(256); - numaSetCount(nar, 256); - numaSetCount(nag, 256); - numaSetCount(nab, 256); - rarray = numaGetFArray(nar, L_NOCOPY); - garray = numaGetFArray(nag, L_NOCOPY); - barray = numaGetFArray(nab, L_NOCOPY); - *pnar = nar; - *pnag = nag; - *pnab = nab; - - /* Generate the color histograms */ - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - if (cmap) { - for (i = 0; i < h; i += factor) { - line = data + i * wpl; - for (j = 0; j < w; j += factor) { - if (d == 8) - index = GET_DATA_BYTE(line, j); - else if (d == 4) - index = GET_DATA_QBIT(line, j); - else /* 2 bpp */ - index = GET_DATA_DIBIT(line, j); - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - rarray[rval] += 1.0; - garray[gval] += 1.0; - barray[bval] += 1.0; - } - } - } else { /* 32 bpp rgb */ - for (i = 0; i < h; i += factor) { - line = data + i * wpl; - for (j = 0; j < w; j += factor) { - extractRGBValues(line[j], &rval, &gval, &bval); - rarray[rval] += 1.0; - garray[gval] += 1.0; - barray[bval] += 1.0; - } - } - } - - return 0; -} - - -/*! - * \brief pixGetColorHistogramMasked() - * - * \param[in] pixs 32 bpp rgb, or colormapped - * \param[in] pixm [optional] 1 bpp mask over which histogram is - * to be computed; use all pixels if null - * \param[in] x, y UL corner of pixm relative to the UL corner of pixs; - * can be < 0; these values are ignored if pixm is null - * \param[in] factor subsampling factor; integer >= 1 - * \param[out] pnar red histogram - * \param[out] pnag green histogram - * \param[out] pnab blue histogram - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates a set of three 256 entry histograms,
- *      (2) Set the subsampling %factor > 1 to reduce the amount of computation.
- *      (3) Clipping of pixm (if it exists) to pixs is done in the inner loop.
- *      (4) Input x,y are ignored unless pixm exists.
- * 
- */ -l_ok -pixGetColorHistogramMasked(PIX *pixs, - PIX *pixm, - l_int32 x, - l_int32 y, - l_int32 factor, - NUMA **pnar, - NUMA **pnag, - NUMA **pnab) -{ -l_int32 i, j, w, h, d, wm, hm, dm, wpls, wplm, index, rval, gval, bval; -l_uint32 *datas, *datam, *lines, *linem; -l_float32 *rarray, *garray, *barray; -NUMA *nar, *nag, *nab; -PIXCMAP *cmap; - - PROCNAME("pixGetColorHistogramMasked"); - - if (!pixm) - return pixGetColorHistogram(pixs, factor, pnar, pnag, pnab); - - if (pnar) *pnar = NULL; - if (pnag) *pnag = NULL; - if (pnab) *pnab = NULL; - if (!pnar || !pnag || !pnab) - return ERROR_INT("&nar, &nag, &nab not all defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - cmap = pixGetColormap(pixs); - if (cmap && (d != 2 && d != 4 && d != 8)) - return ERROR_INT("colormap and not 2, 4, or 8 bpp", procName, 1); - if (!cmap && d != 32) - return ERROR_INT("no colormap and not rgb", procName, 1); - pixGetDimensions(pixm, &wm, &hm, &dm); - if (dm != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - if (factor < 1) - return ERROR_INT("sampling factor must be >= 1", procName, 1); - - /* Set up the histogram arrays */ - nar = numaCreate(256); - nag = numaCreate(256); - nab = numaCreate(256); - numaSetCount(nar, 256); - numaSetCount(nag, 256); - numaSetCount(nab, 256); - rarray = numaGetFArray(nar, L_NOCOPY); - garray = numaGetFArray(nag, L_NOCOPY); - barray = numaGetFArray(nab, L_NOCOPY); - *pnar = nar; - *pnag = nag; - *pnab = nab; - - /* Generate the color histograms */ - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datam = pixGetData(pixm); - wplm = pixGetWpl(pixm); - if (cmap) { - for (i = 0; i < hm; i += factor) { - if (y + i < 0 || y + i >= h) continue; - lines = datas + (y + i) * wpls; - linem = datam + i * wplm; - for (j = 0; j < wm; j += factor) { - if (x + j < 0 || x + j >= w) continue; - if (GET_DATA_BIT(linem, j)) { - if (d == 8) - index = GET_DATA_BYTE(lines, x + j); - else if (d == 4) - index = GET_DATA_QBIT(lines, x + j); - else /* 2 bpp */ - index = GET_DATA_DIBIT(lines, x + j); - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - rarray[rval] += 1.0; - garray[gval] += 1.0; - barray[bval] += 1.0; - } - } - } - } else { /* 32 bpp rgb */ - for (i = 0; i < hm; i += factor) { - if (y + i < 0 || y + i >= h) continue; - lines = datas + (y + i) * wpls; - linem = datam + i * wplm; - for (j = 0; j < wm; j += factor) { - if (x + j < 0 || x + j >= w) continue; - if (GET_DATA_BIT(linem, j)) { - extractRGBValues(lines[x + j], &rval, &gval, &bval); - rarray[rval] += 1.0; - garray[gval] += 1.0; - barray[bval] += 1.0; - } - } - } - } - - return 0; -} - - -/*! - * \brief pixGetCmapHistogram() - * - * \param[in] pixs colormapped: d = 2, 4 or 8 - * \param[in] factor subsampling factor; integer >= 1 - * \return na histogram of cmap indices, or NULL on error - * - *
- * Notes:
- *      (1) This generates a histogram of colormap pixel indices,
- *          and is of size 2^d.
- *      (2) Set the subsampling %factor > 1 to reduce the amount of computation.
- * 
- */ -NUMA * -pixGetCmapHistogram(PIX *pixs, - l_int32 factor) -{ -l_int32 i, j, w, h, d, wpl, val, size; -l_uint32 *data, *line; -l_float32 *array; -NUMA *na; - - PROCNAME("pixGetCmapHistogram"); - - if (!pixs) - return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs) == NULL) - return (NUMA *)ERROR_PTR("pixs not cmapped", procName, NULL); - if (factor < 1) - return (NUMA *)ERROR_PTR("sampling must be >= 1", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 2 && d != 4 && d != 8) - return (NUMA *)ERROR_PTR("d not 2, 4 or 8", procName, NULL); - - size = 1 << d; - if ((na = numaCreate(size)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - numaSetCount(na, size); /* all initialized to 0.0 */ - array = numaGetFArray(na, L_NOCOPY); - - wpl = pixGetWpl(pixs); - data = pixGetData(pixs); - for (i = 0; i < h; i += factor) { - line = data + i * wpl; - for (j = 0; j < w; j += factor) { - if (d == 8) - val = GET_DATA_BYTE(line, j); - else if (d == 4) - val = GET_DATA_QBIT(line, j); - else /* d == 2 */ - val = GET_DATA_DIBIT(line, j); - array[val] += 1.0; - } - } - - return na; -} - - -/*! - * \brief pixGetCmapHistogramMasked() - * - * \param[in] pixs colormapped: d = 2, 4 or 8 - * \param[in] pixm [optional] 1 bpp mask over which histogram is - * to be computed; use all pixels if null - * \param[in] x, y UL corner of pixm relative to the UL corner of pixs; - * can be < 0; these values are ignored if pixm is null - * \param[in] factor subsampling factor; integer >= 1 - * \return na histogram, or NULL on error - * - *
- * Notes:
- *      (1) This generates a histogram of colormap pixel indices,
- *          and is of size 2^d.
- *      (2) Set the subsampling %factor > 1 to reduce the amount of computation.
- *      (3) Clipping of pixm to pixs is done in the inner loop.
- * 
- */ -NUMA * -pixGetCmapHistogramMasked(PIX *pixs, - PIX *pixm, - l_int32 x, - l_int32 y, - l_int32 factor) -{ -l_int32 i, j, w, h, d, wm, hm, dm, wpls, wplm, val, size; -l_uint32 *datas, *datam, *lines, *linem; -l_float32 *array; -NUMA *na; - - PROCNAME("pixGetCmapHistogramMasked"); - - if (!pixm) - return pixGetCmapHistogram(pixs, factor); - - if (!pixs) - return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs) == NULL) - return (NUMA *)ERROR_PTR("pixs not cmapped", procName, NULL); - pixGetDimensions(pixm, &wm, &hm, &dm); - if (dm != 1) - return (NUMA *)ERROR_PTR("pixm not 1 bpp", procName, NULL); - if (factor < 1) - return (NUMA *)ERROR_PTR("sampling must be >= 1", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 2 && d != 4 && d != 8) - return (NUMA *)ERROR_PTR("d not 2, 4 or 8", procName, NULL); - - size = 1 << d; - if ((na = numaCreate(size)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - numaSetCount(na, size); /* all initialized to 0.0 */ - array = numaGetFArray(na, L_NOCOPY); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datam = pixGetData(pixm); - wplm = pixGetWpl(pixm); - - for (i = 0; i < hm; i += factor) { - if (y + i < 0 || y + i >= h) continue; - lines = datas + (y + i) * wpls; - linem = datam + i * wplm; - for (j = 0; j < wm; j += factor) { - if (x + j < 0 || x + j >= w) continue; - if (GET_DATA_BIT(linem, j)) { - if (d == 8) - val = GET_DATA_BYTE(lines, x + j); - else if (d == 4) - val = GET_DATA_QBIT(lines, x + j); - else /* d == 2 */ - val = GET_DATA_DIBIT(lines, x + j); - array[val] += 1.0; - } - } - } - - return na; -} - - -/*! - * \brief pixGetCmapHistogramInRect() - * - * \param[in] pixs colormapped: d = 2, 4 or 8 - * \param[in] box [optional] over which histogram is to be computed; - * use full image if NULL - * \param[in] factor subsampling factor; integer >= 1 - * \return na histogram, or NULL on error - * - *
- * Notes:
- *      (1) This generates a histogram of colormap pixel indices,
- *          and is of size 2^d.
- *      (2) Set the subsampling %factor > 1 to reduce the amount of computation.
- *      (3) Clipping to the box is done in the inner loop.
- * 
- */ -NUMA * -pixGetCmapHistogramInRect(PIX *pixs, - BOX *box, - l_int32 factor) -{ -l_int32 i, j, bx, by, bw, bh, w, h, d, wpls, val, size; -l_uint32 *datas, *lines; -l_float32 *array; -NUMA *na; - - PROCNAME("pixGetCmapHistogramInRect"); - - if (!box) - return pixGetCmapHistogram(pixs, factor); - if (!pixs) - return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs) == NULL) - return (NUMA *)ERROR_PTR("pixs not cmapped", procName, NULL); - if (factor < 1) - return (NUMA *)ERROR_PTR("sampling must be >= 1", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 2 && d != 4 && d != 8) - return (NUMA *)ERROR_PTR("d not 2, 4 or 8", procName, NULL); - - size = 1 << d; - if ((na = numaCreate(size)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - numaSetCount(na, size); /* all initialized to 0.0 */ - array = numaGetFArray(na, L_NOCOPY); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - boxGetGeometry(box, &bx, &by, &bw, &bh); - - for (i = 0; i < bh; i += factor) { - if (by + i < 0 || by + i >= h) continue; - lines = datas + (by + i) * wpls; - for (j = 0; j < bw; j += factor) { - if (bx + j < 0 || bx + j >= w) continue; - if (d == 8) - val = GET_DATA_BYTE(lines, bx + j); - else if (d == 4) - val = GET_DATA_QBIT(lines, bx + j); - else /* d == 2 */ - val = GET_DATA_DIBIT(lines, bx + j); - array[val] += 1.0; - } - } - - return na; -} - - -/*! - * \brief pixCountRGBColors() - * - * \param[in] pixs rgb or rgba - * \param[in] factor subsampling factor; integer >= 1 - * \param[out] pncolors number of colors found - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %factor == 1, this gives the exact number of colors.
- * 
- */ -l_ok -pixCountRGBColors(PIX *pixs, - l_int32 factor, - l_int32 *pncolors) -{ -L_AMAP *amap; - - PROCNAME("pixCountRGBColors"); - - if (!pncolors) - return ERROR_INT("&ncolors not defined", procName, 1); - if (!pixs || pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not defined or not 32 bpp", procName, 1); - if (factor <= 0) - return ERROR_INT("factor must be > 0", procName, 1); - amap = pixGetColorAmapHistogram(pixs, factor); - *pncolors = l_amapSize(amap); - l_amapDestroy(&amap); - return 0; -} - - -/*! - * \brief pixGetColorAmapHistogram() - * - * \param[in] pixs rgb or rgba - * \param[in] factor subsampling factor; integer >= 1 - * \return amap, or NULL on error - * - *
- * Notes:
- *      (1) This generates an ordered map from pixel value to histogram count.
- *      (2) Use amapGetCountForColor() to use the map to look up a count.
- * 
- */ -L_AMAP * -pixGetColorAmapHistogram(PIX *pixs, - l_int32 factor) -{ -l_int32 i, j, w, h, wpl; -l_uint32 *data, *line; -L_AMAP *amap; -RB_TYPE key, value; -RB_TYPE *pval; - - PROCNAME("pixGetColorAmapHistogram"); - - if (!pixs) - return (L_AMAP *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (L_AMAP *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (factor <= 0) - return (L_AMAP *)ERROR_PTR("factor must be > 0", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - amap = l_amapCreate(L_UINT_TYPE); - for (i = 0; i < h; i += factor) { - line = data + i * wpl; - for (j = 0; j < w; j += factor) { - key.utype = line[j]; - pval = l_amapFind(amap, key); - if (!pval) - value.itype = 1; - else - value.itype = 1 + pval->itype; - l_amapInsert(amap, key, value); - } - } - - return amap; -} - - -/*! - * \brief amapGetCountForColor() - * - * \param[in] amap map from pixel value to count - * \param[in] val rgb or rgba pixel value - * \return count, or -1 on error - * - *
- * Notes:
- *      (1) The ordered map is made by pixGetColorAmapHistogram().
- * 
- */ -l_int32 -amapGetCountForColor(L_AMAP *amap, - l_uint32 val) -{ -RB_TYPE key; -RB_TYPE *pval; - - PROCNAME("amapGetCountForColor"); - - if (!amap) - return ERROR_INT("amap not defined", procName, -1); - - key.utype = val; - pval = l_amapFind(amap, key); - return (pval) ? pval->itype : 0; -} - - -/*! - * \brief pixGetRankValue() - * - * \param[in] pixs 8 bpp, 32 bpp or colormapped - * \param[in] factor subsampling factor; integer >= 1 - * \param[in] rank between 0.0 and 1.0; 1.0 is brightest, 0.0 is darkest - * \param[out] pvalue pixel value corresponding to input rank - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Simple function to get rank values of an image.
- *          For a color image, the median value (rank = 0.5) can be
- *          used to linearly remap the colors based on the median
- *          of a target image, using pixLinearMapToTargetColor().
- * 
- */ -l_ok -pixGetRankValue(PIX *pixs, - l_int32 factor, - l_float32 rank, - l_uint32 *pvalue) -{ -l_int32 d; -l_float32 val, rval, gval, bval; -PIX *pixt; -PIXCMAP *cmap; - - PROCNAME("pixGetRankValue"); - - if (!pvalue) - return ERROR_INT("&value not defined", procName, 1); - *pvalue = 0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - d = pixGetDepth(pixs); - cmap = pixGetColormap(pixs); - if (d != 8 && d != 32 && !cmap) - return ERROR_INT("pixs not 8 or 32 bpp, or cmapped", procName, 1); - if (cmap) - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - else - pixt = pixClone(pixs); - d = pixGetDepth(pixt); - - if (d == 8) { - pixGetRankValueMasked(pixt, NULL, 0, 0, factor, rank, &val, NULL); - *pvalue = lept_roundftoi(val); - } else { - pixGetRankValueMaskedRGB(pixt, NULL, 0, 0, factor, rank, - &rval, &gval, &bval); - composeRGBPixel(lept_roundftoi(rval), lept_roundftoi(gval), - lept_roundftoi(bval), pvalue); - } - - pixDestroy(&pixt); - return 0; -} - - -/*! - * \brief pixGetRankValueMaskedRGB() - * - * \param[in] pixs 32 bpp - * \param[in] pixm [optional] 1 bpp mask over which rank val is to be taken; - * use all pixels if null - * \param[in] x, y UL corner of pixm relative to the UL corner of pixs; - * can be < 0; these values are ignored if pixm is null - * \param[in] factor subsampling factor; integer >= 1 - * \param[in] rank between 0.0 and 1.0; 1.0 is brightest, 0.0 is darkest - * \param[out] prval [optional] red component val for input rank - * \param[out] pgval [optional] green component val for input rank - * \param[out] pbval [optional] blue component val for input rank - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Computes the rank component values of pixels in pixs that
- *          are under the fg of the optional mask.  If the mask is null, it
- *          computes the average of the pixels in pixs.
- *      (2) Set the subsampling %factor > 1 to reduce the amount of
- *          computation.
- *      (4) Input x,y are ignored unless pixm exists.
- *      (5) The rank must be in [0.0 ... 1.0], where the brightest pixel
- *          has rank 1.0.  For the median pixel value, use 0.5.
- * 
- */ -l_ok -pixGetRankValueMaskedRGB(PIX *pixs, - PIX *pixm, - l_int32 x, - l_int32 y, - l_int32 factor, - l_float32 rank, - l_float32 *prval, - l_float32 *pgval, - l_float32 *pbval) -{ -l_float32 scale; -PIX *pixmt, *pixt; - - PROCNAME("pixGetRankValueMaskedRGB"); - - if (prval) *prval = 0.0; - if (pgval) *pgval = 0.0; - if (pbval) *pbval = 0.0; - if (!prval && !pgval && !pbval) - return ERROR_INT("no results requested", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not 32 bpp", procName, 1); - if (pixm && pixGetDepth(pixm) != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - if (factor < 1) - return ERROR_INT("sampling factor must be >= 1", procName, 1); - if (rank < 0.0 || rank > 1.0) - return ERROR_INT("rank not in [0.0 ... 1.0]", procName, 1); - - pixmt = NULL; - if (pixm) { - scale = 1.0 / (l_float32)factor; - pixmt = pixScale(pixm, scale, scale); - } - if (prval) { - pixt = pixScaleRGBToGrayFast(pixs, factor, COLOR_RED); - pixGetRankValueMasked(pixt, pixmt, x / factor, y / factor, - factor, rank, prval, NULL); - pixDestroy(&pixt); - } - if (pgval) { - pixt = pixScaleRGBToGrayFast(pixs, factor, COLOR_GREEN); - pixGetRankValueMasked(pixt, pixmt, x / factor, y / factor, - factor, rank, pgval, NULL); - pixDestroy(&pixt); - } - if (pbval) { - pixt = pixScaleRGBToGrayFast(pixs, factor, COLOR_BLUE); - pixGetRankValueMasked(pixt, pixmt, x / factor, y / factor, - factor, rank, pbval, NULL); - pixDestroy(&pixt); - } - pixDestroy(&pixmt); - return 0; -} - - -/*! - * \brief pixGetRankValueMasked() - * - * \param[in] pixs 8 bpp, or colormapped - * \param[in] pixm [optional] 1 bpp mask, over which the rank val - * is to be taken; use all pixels if null - * \param[in] x, y UL corner of pixm relative to the UL corner of pixs; - * can be < 0; these values are ignored if pixm is null - * \param[in] factor subsampling factor; integer >= 1 - * \param[in] rank between 0.0 and 1.0; 1.0 is brightest, 0.0 is darkest - * \param[out] pval pixel value corresponding to input rank - * \param[out] pna [optional] of histogram - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Computes the rank value of pixels in pixs that are under
- *          the fg of the optional mask.  If the mask is null, it
- *          computes the average of the pixels in pixs.
- *      (2) Set the subsampling %factor > 1 to reduce the amount of
- *          computation.
- *      (3) Clipping of pixm (if it exists) to pixs is done in the inner loop.
- *      (4) Input x,y are ignored unless pixm exists.
- *      (5) The rank must be in [0.0 ... 1.0], where the brightest pixel
- *          has rank 1.0.  For the median pixel value, use 0.5.
- *      (6) The histogram can optionally be returned, so that other rank
- *          values can be extracted without recomputing the histogram.
- *          In that case, just use
- *              numaHistogramGetValFromRank(na, rank, &val);
- *          on the returned Numa for additional rank values.
- * 
- */ -l_ok -pixGetRankValueMasked(PIX *pixs, - PIX *pixm, - l_int32 x, - l_int32 y, - l_int32 factor, - l_float32 rank, - l_float32 *pval, - NUMA **pna) -{ -NUMA *na; - - PROCNAME("pixGetRankValueMasked"); - - if (pna) *pna = NULL; - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0.0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetDepth(pixs) != 8 && !pixGetColormap(pixs)) - return ERROR_INT("pixs neither 8 bpp nor colormapped", procName, 1); - if (pixm && pixGetDepth(pixm) != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - if (factor < 1) - return ERROR_INT("sampling factor must be >= 1", procName, 1); - if (rank < 0.0 || rank > 1.0) - return ERROR_INT("rank not in [0.0 ... 1.0]", procName, 1); - - if ((na = pixGetGrayHistogramMasked(pixs, pixm, x, y, factor)) == NULL) - return ERROR_INT("na not made", procName, 1); - numaHistogramGetValFromRank(na, rank, pval); - if (pna) - *pna = na; - else - numaDestroy(&na); - - return 0; -} - - -/*! - * \brief pixGetPixelAverage() - * - * \param[in] pixs 8 or 32 bpp, or colormapped - * \param[in] pixm [optional] 1 bpp mask over which average is - * to be taken; use all pixels if null - * \param[in] x, y UL corner of pixm relative to the UL corner of pixs; - * can be < 0 - * \param[in] factor subsampling factor; >= 1 - * \param[out] pval average pixel value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) For rgb pix, this is a more direct computation of the
- *          average value of the pixels in %pixs that are under the
- *          mask %pixm. It is faster than pixGetPixelStats(), which
- *          calls pixGetAverageMaskedRGB() and has the overhead of
- *          generating a temporary pix of each of the three components;
- *          this can take most of the time if %factor > 1.
- *      (2) If %pixm is null, this gives the average value of all
- *          pixels in %pixs.  The returned value is an integer.
- *      (3) For color %pixs, the returned pixel value is in the standard
- *          uint32 RGBA packing.
- *      (4) Clipping of pixm (if it exists) to pixs is done in the inner loop.
- *      (5) Input x,y are ignored if %pixm does not exist.
- *      (6) For general averaging of 1, 2, 4 or 8 bpp grayscale, use
- *          pixAverageInRect().
- * 
- */ -l_ok -pixGetPixelAverage(PIX *pixs, - PIX *pixm, - l_int32 x, - l_int32 y, - l_int32 factor, - l_uint32 *pval) -{ -l_int32 i, j, w, h, d, wm, hm, wpl1, wplm, val, rval, gval, bval, count; -l_uint32 *data1, *datam, *line1, *linem; -l_float64 sum, rsum, gsum, bsum; -PIX *pix1; - - PROCNAME("pixGetPixelAverage"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - d = pixGetDepth(pixs); - if (d != 32 && !pixGetColormap(pixs)) - return ERROR_INT("pixs not rgb or colormapped", procName, 1); - if (pixm && pixGetDepth(pixm) != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - if (factor < 1) - return ERROR_INT("sampling factor must be >= 1", procName, 1); - - if (pixGetColormap(pixs)) - pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - else - pix1 = pixClone(pixs); - pixGetDimensions(pix1, &w, &h, &d); - if (d == 1) { - pixDestroy(&pix1); - return ERROR_INT("pix1 is just 1 bpp", procName, 1); - } - data1 = pixGetData(pix1); - wpl1 = pixGetWpl(pix1); - - sum = rsum = gsum = bsum = 0.0; - count = 0; - if (!pixm) { - for (i = 0; i < h; i += factor) { - line1 = data1 + i * wpl1; - for (j = 0; j < w; j += factor) { - if (d == 8) { - val = GET_DATA_BYTE(line1, j); - sum += val; - } else { /* rgb */ - extractRGBValues(*(line1 + j), &rval, &gval, &bval); - rsum += rval; - gsum += gval; - bsum += bval; - } - count++; - } - } - } else { /* masked */ - pixGetDimensions(pixm, &wm, &hm, NULL); - datam = pixGetData(pixm); - wplm = pixGetWpl(pixm); - for (i = 0; i < hm; i += factor) { - if (y + i < 0 || y + i >= h) continue; - line1 = data1 + (y + i) * wpl1; - linem = datam + i * wplm; - for (j = 0; j < wm; j += factor) { - if (x + j < 0 || x + j >= w) continue; - if (GET_DATA_BIT(linem, j)) { - if (d == 8) { - val = GET_DATA_BYTE(line1, x + j); - sum += val; - } else { /* rgb */ - extractRGBValues(*(line1 + x + j), &rval, &gval, &bval); - rsum += rval; - gsum += gval; - bsum += bval; - } - count++; - } - } - } - } - - pixDestroy(&pix1); - if (count == 0) - return ERROR_INT("no pixels sampled", procName, 1); - if (d == 8) { - *pval = (l_uint32)(sum / (l_float64)count); - } else { /* d == 32 */ - rval = (l_uint32)(rsum / (l_float64)count); - gval = (l_uint32)(gsum / (l_float64)count); - bval = (l_uint32)(bsum / (l_float64)count); - composeRGBPixel(rval, gval, bval, pval); - } - - return 0; -} - - -/*! - * \brief pixGetPixelStats() - * - * \param[in] pixs 8 bpp, 32 bpp or colormapped - * \param[in] factor subsampling factor; integer >= 1 - * \param[in] type L_MEAN_ABSVAL, L_ROOT_MEAN_SQUARE, - * L_STANDARD_DEVIATION, L_VARIANCE - * \param[out] pvalue pixel value corresponding to input type - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Simple function to get one of four statistical values of an image.
- *      (2) It does not take a mask: it uses the entire image.
- *      (3) To get the average pixel value of an RGB image, suggest using
- *          pixGetPixelAverage(), which is considerably faster.
- * 
- */ -l_ok -pixGetPixelStats(PIX *pixs, - l_int32 factor, - l_int32 type, - l_uint32 *pvalue) -{ -l_int32 d; -l_float32 val, rval, gval, bval; -PIX *pixt; -PIXCMAP *cmap; - - PROCNAME("pixGetPixelStats"); - - if (!pvalue) - return ERROR_INT("&value not defined", procName, 1); - *pvalue = 0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - d = pixGetDepth(pixs); - cmap = pixGetColormap(pixs); - if (d != 8 && d != 32 && !cmap) - return ERROR_INT("pixs not 8 or 32 bpp, or cmapped", procName, 1); - if (cmap) - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - else - pixt = pixClone(pixs); - d = pixGetDepth(pixt); - - if (d == 8) { - pixGetAverageMasked(pixt, NULL, 0, 0, factor, type, &val); - *pvalue = lept_roundftoi(val); - } else { - pixGetAverageMaskedRGB(pixt, NULL, 0, 0, factor, type, - &rval, &gval, &bval); - composeRGBPixel(lept_roundftoi(rval), lept_roundftoi(gval), - lept_roundftoi(bval), pvalue); - } - - pixDestroy(&pixt); - return 0; -} - - -/*! - * \brief pixGetAverageMaskedRGB() - * - * \param[in] pixs 32 bpp, or colormapped - * \param[in] pixm [optional] 1 bpp mask over which average is - * to be taken; use all pixels if null - * \param[in] x, y UL corner of pixm relative to the UL corner of pixs; - * can be < 0 - * \param[in] factor subsampling factor; >= 1 - * \param[in] type L_MEAN_ABSVAL, L_ROOT_MEAN_SQUARE, - * L_STANDARD_DEVIATION, L_VARIANCE - * \param[out] prval [optional] measured red value of given 'type' - * \param[out] pgval [optional] measured green value of given 'type' - * \param[out] pbval [optional] measured blue value of given 'type' - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) For usage, see pixGetAverageMasked().
- *      (2) If there is a colormap, it is removed before the 8 bpp
- *          component images are extracted.
- *      (3) A better name for this would be: pixGetPixelStatsRGB()
- * 
- */ -l_ok -pixGetAverageMaskedRGB(PIX *pixs, - PIX *pixm, - l_int32 x, - l_int32 y, - l_int32 factor, - l_int32 type, - l_float32 *prval, - l_float32 *pgval, - l_float32 *pbval) -{ -PIX *pixt; -PIXCMAP *cmap; - - PROCNAME("pixGetAverageMaskedRGB"); - - if (prval) *prval = 0.0; - if (pgval) *pgval = 0.0; - if (pbval) *pbval = 0.0; - if (!prval && !pgval && !pbval) - return ERROR_INT("no values requested", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - cmap = pixGetColormap(pixs); - if (pixGetDepth(pixs) != 32 && !cmap) - return ERROR_INT("pixs neither 32 bpp nor colormapped", procName, 1); - if (pixm && pixGetDepth(pixm) != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - if (factor < 1) - return ERROR_INT("sampling factor must be >= 1", procName, 1); - if (type != L_MEAN_ABSVAL && type != L_ROOT_MEAN_SQUARE && - type != L_STANDARD_DEVIATION && type != L_VARIANCE) - return ERROR_INT("invalid measure type", procName, 1); - - if (prval) { - if (cmap) - pixt = pixGetRGBComponentCmap(pixs, COLOR_RED); - else - pixt = pixGetRGBComponent(pixs, COLOR_RED); - pixGetAverageMasked(pixt, pixm, x, y, factor, type, prval); - pixDestroy(&pixt); - } - if (pgval) { - if (cmap) - pixt = pixGetRGBComponentCmap(pixs, COLOR_GREEN); - else - pixt = pixGetRGBComponent(pixs, COLOR_GREEN); - pixGetAverageMasked(pixt, pixm, x, y, factor, type, pgval); - pixDestroy(&pixt); - } - if (pbval) { - if (cmap) - pixt = pixGetRGBComponentCmap(pixs, COLOR_BLUE); - else - pixt = pixGetRGBComponent(pixs, COLOR_BLUE); - pixGetAverageMasked(pixt, pixm, x, y, factor, type, pbval); - pixDestroy(&pixt); - } - - return 0; -} - - -/*! - * \brief pixGetAverageMasked() - * - * \param[in] pixs 8 or 16 bpp, or colormapped - * \param[in] pixm [optional] 1 bpp mask over which average is - * to be taken; use all pixels if null - * \param[in] x, y UL corner of pixm relative to the UL corner of pixs; - * can be < 0 - * \param[in] factor subsampling factor; >= 1 - * \param[in] type L_MEAN_ABSVAL, L_ROOT_MEAN_SQUARE, - * L_STANDARD_DEVIATION, L_VARIANCE - * \param[out] pval measured value of given 'type' - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Use L_MEAN_ABSVAL to get the average value of pixels in pixs
- *          that are under the fg of the optional mask.  If the mask
- *          is null, it finds the average of the pixels in pixs.
- *      (2) Likewise, use L_ROOT_MEAN_SQUARE to get the rms value of
- *          pixels in pixs, either masked or not; L_STANDARD_DEVIATION
- *          to get the standard deviation from the mean of the pixels;
- *          L_VARIANCE to get the average squared difference from the
- *          expected value.  The variance is the square of the stdev.
- *          For the standard deviation, we use
- *              sqrt([([x] - x)]^2) = sqrt([x^2] - [x]^2)
- *      (3) Set the subsampling %factor > 1 to reduce the amount of
- *          computation.
- *      (4) Clipping of pixm (if it exists) to pixs is done in the inner loop.
- *      (5) Input x,y are ignored unless pixm exists.
- *      (6) A better name for this would be: pixGetPixelStatsGray()
- * 
- */ -l_ok -pixGetAverageMasked(PIX *pixs, - PIX *pixm, - l_int32 x, - l_int32 y, - l_int32 factor, - l_int32 type, - l_float32 *pval) -{ -l_int32 i, j, w, h, d, wm, hm, wplg, wplm, val, count; -l_uint32 *datag, *datam, *lineg, *linem; -l_float64 sumave, summs, ave, meansq, var; -PIX *pixg; - - PROCNAME("pixGetAverageMasked"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0.0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - d = pixGetDepth(pixs); - if (d != 8 && d != 16 && !pixGetColormap(pixs)) - return ERROR_INT("pixs not 8 or 16 bpp or colormapped", procName, 1); - if (pixm && pixGetDepth(pixm) != 1) - return ERROR_INT("pixm not 1 bpp", procName, 1); - if (factor < 1) - return ERROR_INT("sampling factor must be >= 1", procName, 1); - if (type != L_MEAN_ABSVAL && type != L_ROOT_MEAN_SQUARE && - type != L_STANDARD_DEVIATION && type != L_VARIANCE) - return ERROR_INT("invalid measure type", procName, 1); - - if (pixGetColormap(pixs)) - pixg = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - else - pixg = pixClone(pixs); - pixGetDimensions(pixg, &w, &h, &d); - datag = pixGetData(pixg); - wplg = pixGetWpl(pixg); - - sumave = summs = 0.0; - count = 0; - if (!pixm) { - for (i = 0; i < h; i += factor) { - lineg = datag + i * wplg; - for (j = 0; j < w; j += factor) { - if (d == 8) - val = GET_DATA_BYTE(lineg, j); - else /* d == 16 */ - val = GET_DATA_TWO_BYTES(lineg, j); - if (type != L_ROOT_MEAN_SQUARE) - sumave += val; - if (type != L_MEAN_ABSVAL) - summs += (l_float64)(val) * val; - count++; - } - } - } else { - pixGetDimensions(pixm, &wm, &hm, NULL); - datam = pixGetData(pixm); - wplm = pixGetWpl(pixm); - for (i = 0; i < hm; i += factor) { - if (y + i < 0 || y + i >= h) continue; - lineg = datag + (y + i) * wplg; - linem = datam + i * wplm; - for (j = 0; j < wm; j += factor) { - if (x + j < 0 || x + j >= w) continue; - if (GET_DATA_BIT(linem, j)) { - if (d == 8) - val = GET_DATA_BYTE(lineg, x + j); - else /* d == 16 */ - val = GET_DATA_TWO_BYTES(lineg, x + j); - if (type != L_ROOT_MEAN_SQUARE) - sumave += val; - if (type != L_MEAN_ABSVAL) - summs += (l_float64)(val) * val; - count++; - } - } - } - } - - pixDestroy(&pixg); - if (count == 0) - return ERROR_INT("no pixels sampled", procName, 1); - ave = sumave / (l_float64)count; - meansq = summs / (l_float64)count; - var = meansq - ave * ave; - if (type == L_MEAN_ABSVAL) - *pval = (l_float32)ave; - else if (type == L_ROOT_MEAN_SQUARE) - *pval = (l_float32)sqrt(meansq); - else if (type == L_STANDARD_DEVIATION) - *pval = (l_float32)sqrt(var); - else /* type == L_VARIANCE */ - *pval = (l_float32)var; - - return 0; -} - - -/*! - * \brief pixGetAverageTiledRGB() - * - * \param[in] pixs 32 bpp, or colormapped - * \param[in] sx, sy tile size; must be at least 2 x 2 - * \param[in] type L_MEAN_ABSVAL, L_ROOT_MEAN_SQUARE, L_STANDARD_DEVIATION - * \param[out] ppixr [optional] tiled 'average' of red component - * \param[out] ppixg [optional] tiled 'average' of green component - * \param[out] ppixb [optional] tiled 'average' of blue component - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) For usage, see pixGetAverageTiled().
- *      (2) If there is a colormap, it is removed before the 8 bpp
- *          component images are extracted.
- * 
- */ -l_ok -pixGetAverageTiledRGB(PIX *pixs, - l_int32 sx, - l_int32 sy, - l_int32 type, - PIX **ppixr, - PIX **ppixg, - PIX **ppixb) -{ -PIX *pixt; -PIXCMAP *cmap; - - PROCNAME("pixGetAverageTiledRGB"); - - if (ppixr) *ppixr = NULL; - if (ppixg) *ppixg = NULL; - if (ppixb) *ppixb = NULL; - if (!ppixr && !ppixg && !ppixb) - return ERROR_INT("no data requested", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - cmap = pixGetColormap(pixs); - if (pixGetDepth(pixs) != 32 && !cmap) - return ERROR_INT("pixs neither 32 bpp nor colormapped", procName, 1); - if (sx < 2 || sy < 2) - return ERROR_INT("sx and sy not both > 1", procName, 1); - if (type != L_MEAN_ABSVAL && type != L_ROOT_MEAN_SQUARE && - type != L_STANDARD_DEVIATION) - return ERROR_INT("invalid measure type", procName, 1); - - if (ppixr) { - if (cmap) - pixt = pixGetRGBComponentCmap(pixs, COLOR_RED); - else - pixt = pixGetRGBComponent(pixs, COLOR_RED); - *ppixr = pixGetAverageTiled(pixt, sx, sy, type); - pixDestroy(&pixt); - } - if (ppixg) { - if (cmap) - pixt = pixGetRGBComponentCmap(pixs, COLOR_GREEN); - else - pixt = pixGetRGBComponent(pixs, COLOR_GREEN); - *ppixg = pixGetAverageTiled(pixt, sx, sy, type); - pixDestroy(&pixt); - } - if (ppixb) { - if (cmap) - pixt = pixGetRGBComponentCmap(pixs, COLOR_BLUE); - else - pixt = pixGetRGBComponent(pixs, COLOR_BLUE); - *ppixb = pixGetAverageTiled(pixt, sx, sy, type); - pixDestroy(&pixt); - } - - return 0; -} - - -/*! - * \brief pixGetAverageTiled() - * - * \param[in] pixs 8 bpp, or colormapped - * \param[in] sx, sy tile size; must be at least 2 x 2 - * \param[in] type L_MEAN_ABSVAL, L_ROOT_MEAN_SQUARE, L_STANDARD_DEVIATION - * \return pixd average values in each tile, or NULL on error - * - *
- * Notes:
- *      (1) Only computes for tiles that are entirely contained in pixs.
- *      (2) Use L_MEAN_ABSVAL to get the average abs value within the tile;
- *          L_ROOT_MEAN_SQUARE to get the rms value within each tile;
- *          L_STANDARD_DEVIATION to get the standard dev. from the average
- *          within each tile.
- *      (3) If colormapped, converts to 8 bpp gray.
- * 
- */ -PIX * -pixGetAverageTiled(PIX *pixs, - l_int32 sx, - l_int32 sy, - l_int32 type) -{ -l_int32 i, j, k, m, w, h, wd, hd, d, pos, wplt, wpld, valt; -l_uint32 *datat, *datad, *linet, *lined, *startt; -l_float64 sumave, summs, ave, meansq, normfact; -PIX *pixt, *pixd; - - PROCNAME("pixGetAverageTiled"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 && !pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs not 8 bpp or cmapped", procName, NULL); - if (sx < 2 || sy < 2) - return (PIX *)ERROR_PTR("sx and sy not both > 1", procName, NULL); - wd = w / sx; - hd = h / sy; - if (wd < 1 || hd < 1) - return (PIX *)ERROR_PTR("wd or hd == 0", procName, NULL); - if (type != L_MEAN_ABSVAL && type != L_ROOT_MEAN_SQUARE && - type != L_STANDARD_DEVIATION) - return (PIX *)ERROR_PTR("invalid measure type", procName, NULL); - - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - pixd = pixCreate(wd, hd, 8); - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - normfact = 1. / (l_float64)(sx * sy); - for (i = 0; i < hd; i++) { - lined = datad + i * wpld; - linet = datat + i * sy * wplt; - for (j = 0; j < wd; j++) { - if (type == L_MEAN_ABSVAL || type == L_STANDARD_DEVIATION) { - sumave = 0.0; - for (k = 0; k < sy; k++) { - startt = linet + k * wplt; - for (m = 0; m < sx; m++) { - pos = j * sx + m; - valt = GET_DATA_BYTE(startt, pos); - sumave += valt; - } - } - ave = normfact * sumave; - } - if (type == L_ROOT_MEAN_SQUARE || type == L_STANDARD_DEVIATION) { - summs = 0.0; - for (k = 0; k < sy; k++) { - startt = linet + k * wplt; - for (m = 0; m < sx; m++) { - pos = j * sx + m; - valt = GET_DATA_BYTE(startt, pos); - summs += (l_float64)(valt) * valt; - } - } - meansq = normfact * summs; - } - if (type == L_MEAN_ABSVAL) - valt = (l_int32)(ave + 0.5); - else if (type == L_ROOT_MEAN_SQUARE) - valt = (l_int32)(sqrt(meansq) + 0.5); - else /* type == L_STANDARD_DEVIATION */ - valt = (l_int32)(sqrt(meansq - ave * ave) + 0.5); - SET_DATA_BYTE(lined, j, valt); - } - } - - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixRowStats() - * - * \param[in] pixs 8 bpp; not cmapped - * \param[in] box [optional] clipping box; can be null - * \param[out] pnamean [optional] numa of mean values - * \param[out] pnamedian [optional] numa of median values - * \param[out] pnamode [optional] numa of mode intensity values - * \param[out] pnamodecount [optional] numa of mode counts - * \param[out] pnavar [optional] numa of variance - * \param[out] pnarootvar [optional] numa of square root of variance - * \return na numa of requested statistic for each row, or NULL on error - * - *
- * Notes:
- *      (1) This computes numas that represent column vectors of statistics,
- *          with each of its values derived from the corresponding row of a Pix.
- *      (2) Use NULL on input to prevent computation of any of the 5 numas.
- *      (3) Other functions that compute pixel row statistics are:
- *             pixCountPixelsByRow()
- *             pixAverageByRow()
- *             pixVarianceByRow()
- *             pixGetRowStats()
- * 
- */ -l_int32 -pixRowStats(PIX *pixs, - BOX *box, - NUMA **pnamean, - NUMA **pnamedian, - NUMA **pnamode, - NUMA **pnamodecount, - NUMA **pnavar, - NUMA **pnarootvar) -{ -l_int32 i, j, k, w, h, val, wpls, sum, sumsq, target, max, modeval; -l_int32 xstart, xend, ystart, yend, bw, bh; -l_int32 *histo; -l_uint32 *lines, *datas; -l_float32 norm; -l_float32 *famean, *fameansq, *favar, *farootvar; -l_float32 *famedian, *famode, *famodecount; - - PROCNAME("pixRowStats"); - - if (pnamean) *pnamean = NULL; - if (pnamedian) *pnamedian = NULL; - if (pnamode) *pnamode = NULL; - if (pnamodecount) *pnamodecount = NULL; - if (pnavar) *pnavar = NULL; - if (pnarootvar) *pnarootvar = NULL; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs undefined or not 8 bpp", procName, 1); - famean = fameansq = favar = farootvar = NULL; - famedian = famode = famodecount = NULL; - - pixGetDimensions(pixs, &w, &h, NULL); - if (boxClipToRectangleParams(box, w, h, &xstart, &ystart, &xend, ¥d, - &bw, &bh) == 1) - return ERROR_INT("invalid clipping box", procName, 1); - - /* We need the mean for variance and root variance */ - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if (pnamean || pnavar || pnarootvar) { - norm = 1. / (l_float32)bw; - famean = (l_float32 *)LEPT_CALLOC(bh, sizeof(l_float32)); - fameansq = (l_float32 *)LEPT_CALLOC(bh, sizeof(l_float32)); - if (pnavar || pnarootvar) { - favar = (l_float32 *)LEPT_CALLOC(bh, sizeof(l_float32)); - if (pnarootvar) - farootvar = (l_float32 *)LEPT_CALLOC(bh, sizeof(l_float32)); - } - for (i = ystart; i < yend; i++) { - sum = sumsq = 0; - lines = datas + i * wpls; - for (j = xstart; j < xend; j++) { - val = GET_DATA_BYTE(lines, j); - sum += val; - sumsq += val * val; - } - famean[i] = norm * sum; - fameansq[i] = norm * sumsq; - if (pnavar || pnarootvar) { - favar[i] = fameansq[i] - famean[i] * famean[i]; - if (pnarootvar) - farootvar[i] = sqrtf(favar[i]); - } - } - LEPT_FREE(fameansq); - if (pnamean) - *pnamean = numaCreateFromFArray(famean, bh, L_INSERT); - else - LEPT_FREE(famean); - if (pnavar) - *pnavar = numaCreateFromFArray(favar, bh, L_INSERT); - else - LEPT_FREE(favar); - if (pnarootvar) - *pnarootvar = numaCreateFromFArray(farootvar, bh, L_INSERT); - } - - /* We need a histogram to find the median and/or mode values */ - if (pnamedian || pnamode || pnamodecount) { - histo = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - if (pnamedian) { - *pnamedian = numaMakeConstant(0, bh); - famedian = numaGetFArray(*pnamedian, L_NOCOPY); - } - if (pnamode) { - *pnamode = numaMakeConstant(0, bh); - famode = numaGetFArray(*pnamode, L_NOCOPY); - } - if (pnamodecount) { - *pnamodecount = numaMakeConstant(0, bh); - famodecount = numaGetFArray(*pnamodecount, L_NOCOPY); - } - for (i = ystart; i < yend; i++) { - lines = datas + i * wpls; - memset(histo, 0, 1024); - for (j = xstart; j < xend; j++) { - val = GET_DATA_BYTE(lines, j); - histo[val]++; - } - - if (pnamedian) { - sum = 0; - target = (bw + 1) / 2; - for (k = 0; k < 256; k++) { - sum += histo[k]; - if (sum >= target) { - famedian[i] = k; - break; - } - } - } - - if (pnamode || pnamodecount) { - max = 0; - modeval = 0; - for (k = 0; k < 256; k++) { - if (histo[k] > max) { - max = histo[k]; - modeval = k; - } - } - if (pnamode) - famode[i] = modeval; - if (pnamodecount) - famodecount[i] = max; - } - } - LEPT_FREE(histo); - } - - return 0; -} - - -/*! - * \brief pixColumnStats() - * - * \param[in] pixs 8 bpp; not cmapped - * \param[in] box [optional] clipping box; can be null - * \param[out] pnamean [optional] numa of mean values - * \param[out] pnamedian [optional] numa of median values - * \param[out] pnamode [optional] numa of mode intensity values - * \param[out] pnamodecount [optional] numa of mode counts - * \param[out] pnavar [optional] numa of variance - * \param[out] pnarootvar [optional] numa of square root of variance - * \return na numa of requested statistic for each column, - * or NULL on error - * - *
- * Notes:
- *      (1) This computes numas that represent row vectors of statistics,
- *          with each of its values derived from the corresponding col of a Pix.
- *      (2) Use NULL on input to prevent computation of any of the 5 numas.
- *      (3) Other functions that compute pixel column statistics are:
- *             pixCountPixelsByColumn()
- *             pixAverageByColumn()
- *             pixVarianceByColumn()
- *             pixGetColumnStats()
- * 
- */ -l_int32 -pixColumnStats(PIX *pixs, - BOX *box, - NUMA **pnamean, - NUMA **pnamedian, - NUMA **pnamode, - NUMA **pnamodecount, - NUMA **pnavar, - NUMA **pnarootvar) -{ -l_int32 i, j, k, w, h, val, wpls, sum, sumsq, target, max, modeval; -l_int32 xstart, xend, ystart, yend, bw, bh; -l_int32 *histo; -l_uint32 *lines, *datas; -l_float32 norm; -l_float32 *famean, *fameansq, *favar, *farootvar; -l_float32 *famedian, *famode, *famodecount; - - PROCNAME("pixColumnStats"); - - if (pnamean) *pnamean = NULL; - if (pnamedian) *pnamedian = NULL; - if (pnamode) *pnamode = NULL; - if (pnamodecount) *pnamodecount = NULL; - if (pnavar) *pnavar = NULL; - if (pnarootvar) *pnarootvar = NULL; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs undefined or not 8 bpp", procName, 1); - famean = fameansq = favar = farootvar = NULL; - famedian = famode = famodecount = NULL; - - pixGetDimensions(pixs, &w, &h, NULL); - if (boxClipToRectangleParams(box, w, h, &xstart, &ystart, &xend, ¥d, - &bw, &bh) == 1) - return ERROR_INT("invalid clipping box", procName, 1); - - /* We need the mean for variance and root variance */ - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if (pnamean || pnavar || pnarootvar) { - norm = 1. / (l_float32)bh; - famean = (l_float32 *)LEPT_CALLOC(bw, sizeof(l_float32)); - fameansq = (l_float32 *)LEPT_CALLOC(bw, sizeof(l_float32)); - if (pnavar || pnarootvar) { - favar = (l_float32 *)LEPT_CALLOC(bw, sizeof(l_float32)); - if (pnarootvar) - farootvar = (l_float32 *)LEPT_CALLOC(bw, sizeof(l_float32)); - } - for (j = xstart; j < xend; j++) { - sum = sumsq = 0; - for (i = ystart, lines = datas; i < yend; lines += wpls, i++) { - val = GET_DATA_BYTE(lines, j); - sum += val; - sumsq += val * val; - } - famean[j] = norm * sum; - fameansq[j] = norm * sumsq; - if (pnavar || pnarootvar) { - favar[j] = fameansq[j] - famean[j] * famean[j]; - if (pnarootvar) - farootvar[j] = sqrtf(favar[j]); - } - } - LEPT_FREE(fameansq); - if (pnamean) - *pnamean = numaCreateFromFArray(famean, bw, L_INSERT); - else - LEPT_FREE(famean); - if (pnavar) - *pnavar = numaCreateFromFArray(favar, bw, L_INSERT); - else - LEPT_FREE(favar); - if (pnarootvar) - *pnarootvar = numaCreateFromFArray(farootvar, bw, L_INSERT); - } - - /* We need a histogram to find the median and/or mode values */ - if (pnamedian || pnamode || pnamodecount) { - histo = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - if (pnamedian) { - *pnamedian = numaMakeConstant(0, bw); - famedian = numaGetFArray(*pnamedian, L_NOCOPY); - } - if (pnamode) { - *pnamode = numaMakeConstant(0, bw); - famode = numaGetFArray(*pnamode, L_NOCOPY); - } - if (pnamodecount) { - *pnamodecount = numaMakeConstant(0, bw); - famodecount = numaGetFArray(*pnamodecount, L_NOCOPY); - } - for (j = xstart; j < xend; j++) { - memset(histo, 0, 1024); - for (i = ystart, lines = datas; i < yend; lines += wpls, i++) { - val = GET_DATA_BYTE(lines, j); - histo[val]++; - } - - if (pnamedian) { - sum = 0; - target = (bh + 1) / 2; - for (k = 0; k < 256; k++) { - sum += histo[k]; - if (sum >= target) { - famedian[j] = k; - break; - } - } - } - - if (pnamode || pnamodecount) { - max = 0; - modeval = 0; - for (k = 0; k < 256; k++) { - if (histo[k] > max) { - max = histo[k]; - modeval = k; - } - } - if (pnamode) - famode[j] = modeval; - if (pnamodecount) - famodecount[j] = max; - } - } - LEPT_FREE(histo); - } - - return 0; -} - - -/*! - * \brief pixGetRangeValues() - * - * \param[in] pixs 8 bpp grayscale, 32 bpp rgb, or colormapped - * \param[in] factor subsampling factor; >= 1; ignored if colormapped - * \param[in] color L_SELECT_RED, L_SELECT_GREEN or L_SELECT_BLUE - * \param[out] pminval [optional] minimum value of component - * \param[out] pmaxval [optional] maximum value of component - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If pixs is 8 bpp grayscale, the color selection type is ignored.
- * 
- */ -l_ok -pixGetRangeValues(PIX *pixs, - l_int32 factor, - l_int32 color, - l_int32 *pminval, - l_int32 *pmaxval) -{ -l_int32 d; -PIXCMAP *cmap; - - PROCNAME("pixGetRangeValues"); - - if (pminval) *pminval = 0; - if (pmaxval) *pmaxval = 0; - if (!pminval && !pmaxval) - return ERROR_INT("no result requested", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - cmap = pixGetColormap(pixs); - if (cmap) - return pixcmapGetRangeValues(cmap, color, pminval, pmaxval, - NULL, NULL); - - if (factor < 1) - return ERROR_INT("sampling factor must be >= 1", procName, 1); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return ERROR_INT("pixs not 8 or 32 bpp", procName, 1); - - if (d == 8) { - pixGetExtremeValue(pixs, factor, L_SELECT_MIN, - NULL, NULL, NULL, pminval); - pixGetExtremeValue(pixs, factor, L_SELECT_MAX, - NULL, NULL, NULL, pmaxval); - } else if (color == L_SELECT_RED) { - pixGetExtremeValue(pixs, factor, L_SELECT_MIN, - pminval, NULL, NULL, NULL); - pixGetExtremeValue(pixs, factor, L_SELECT_MAX, - pmaxval, NULL, NULL, NULL); - } else if (color == L_SELECT_GREEN) { - pixGetExtremeValue(pixs, factor, L_SELECT_MIN, - NULL, pminval, NULL, NULL); - pixGetExtremeValue(pixs, factor, L_SELECT_MAX, - NULL, pmaxval, NULL, NULL); - } else if (color == L_SELECT_BLUE) { - pixGetExtremeValue(pixs, factor, L_SELECT_MIN, - NULL, NULL, pminval, NULL); - pixGetExtremeValue(pixs, factor, L_SELECT_MAX, - NULL, NULL, pmaxval, NULL); - } else { - return ERROR_INT("invalid color", procName, 1); - } - - return 0; -} - - -/*! - * \brief pixGetExtremeValue() - * - * \param[in] pixs 8 bpp grayscale, 32 bpp rgb, or colormapped - * \param[in] factor subsampling factor; >= 1; ignored if colormapped - * \param[in] type L_SELECT_MIN or L_SELECT_MAX - * \param[out] prval [optional] red component - * \param[out] pgval [optional] green component - * \param[out] pbval [optional] blue component - * \param[out] pgrayval [optional] min or max gray value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If pixs is grayscale, the result is returned in &grayval.
- *          Otherwise, if there is a colormap or d == 32,
- *          each requested color component is returned.  At least
- *          one color component (address) must be input.
- * 
- */ -l_ok -pixGetExtremeValue(PIX *pixs, - l_int32 factor, - l_int32 type, - l_int32 *prval, - l_int32 *pgval, - l_int32 *pbval, - l_int32 *pgrayval) -{ -l_int32 i, j, w, h, d, wpl; -l_int32 val, extval, rval, gval, bval, extrval, extgval, extbval; -l_uint32 pixel; -l_uint32 *data, *line; -PIXCMAP *cmap; - - PROCNAME("pixGetExtremeValue"); - - if (prval) *prval = -1; - if (pgval) *pgval = -1; - if (pbval) *pbval = -1; - if (pgrayval) *pgrayval = -1; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (type != L_SELECT_MIN && type != L_SELECT_MAX) - return ERROR_INT("invalid type", procName, 1); - - cmap = pixGetColormap(pixs); - if (cmap) { - if (type == L_SELECT_MIN) { - if (prval) pixcmapGetRangeValues(cmap, L_SELECT_RED, prval, NULL, - NULL, NULL); - if (pgval) pixcmapGetRangeValues(cmap, L_SELECT_GREEN, pgval, NULL, - NULL, NULL); - if (pbval) pixcmapGetRangeValues(cmap, L_SELECT_BLUE, pbval, NULL, - NULL, NULL); - } else { /* type == L_SELECT_MAX */ - if (prval) pixcmapGetRangeValues(cmap, L_SELECT_RED, NULL, prval, - NULL, NULL); - if (pgval) pixcmapGetRangeValues(cmap, L_SELECT_GREEN, NULL, pgval, - NULL, NULL); - if (pbval) pixcmapGetRangeValues(cmap, L_SELECT_BLUE, NULL, pbval, - NULL, NULL); - } - return 0; - } - - pixGetDimensions(pixs, &w, &h, &d); - if (factor < 1) - return ERROR_INT("sampling factor must be >= 1", procName, 1); - if (d != 8 && d != 32) - return ERROR_INT("pixs not 8 or 32 bpp", procName, 1); - if (d == 8 && !pgrayval) - return ERROR_INT("can't return result in grayval", procName, 1); - if (d == 32 && !prval && !pgval && !pbval) - return ERROR_INT("can't return result in r/g/b-val", procName, 1); - - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - if (d == 8) { - if (type == L_SELECT_MIN) - extval = 100000; - else /* get max */ - extval = -1; - - for (i = 0; i < h; i += factor) { - line = data + i * wpl; - for (j = 0; j < w; j += factor) { - val = GET_DATA_BYTE(line, j); - if ((type == L_SELECT_MIN && val < extval) || - (type == L_SELECT_MAX && val > extval)) - extval = val; - } - } - *pgrayval = extval; - return 0; - } - - /* 32 bpp rgb */ - if (type == L_SELECT_MIN) { - extrval = 100000; - extgval = 100000; - extbval = 100000; - } else { - extrval = -1; - extgval = -1; - extbval = -1; - } - for (i = 0; i < h; i += factor) { - line = data + i * wpl; - for (j = 0; j < w; j += factor) { - pixel = line[j]; - if (prval) { - rval = (pixel >> L_RED_SHIFT) & 0xff; - if ((type == L_SELECT_MIN && rval < extrval) || - (type == L_SELECT_MAX && rval > extrval)) - extrval = rval; - } - if (pgval) { - gval = (pixel >> L_GREEN_SHIFT) & 0xff; - if ((type == L_SELECT_MIN && gval < extgval) || - (type == L_SELECT_MAX && gval > extgval)) - extgval = gval; - } - if (pbval) { - bval = (pixel >> L_BLUE_SHIFT) & 0xff; - if ((type == L_SELECT_MIN && bval < extbval) || - (type == L_SELECT_MAX && bval > extbval)) - extbval = bval; - } - } - } - if (prval) *prval = extrval; - if (pgval) *pgval = extgval; - if (pbval) *pbval = extbval; - return 0; -} - - -/*! - * \brief pixGetMaxValueInRect() - * - * \param[in] pixs 8, 16 or 32 bpp grayscale; no color space components - * \param[in] box [optional] region; set box = NULL to use entire pixs - * \param[out] pmaxval [optional] max value in region - * \param[out] pxmax [optional] x location of max value - * \param[out] pymax [optional] y location of max value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This can be used to find the maximum and its location
- *          in a 2-dimensional histogram, where the x and y directions
- *          represent two color components (e.g., saturation and hue).
- *      (2) Note that here a 32 bpp pixs has pixel values that are simply
- *          numbers.  They are not 8 bpp components in a colorspace.
- * 
- */ -l_ok -pixGetMaxValueInRect(PIX *pixs, - BOX *box, - l_uint32 *pmaxval, - l_int32 *pxmax, - l_int32 *pymax) -{ -l_int32 i, j, w, h, d, wpl, bw, bh; -l_int32 xstart, ystart, xend, yend, xmax, ymax; -l_uint32 val, maxval; -l_uint32 *data, *line; - - PROCNAME("pixGetMaxValueInRect"); - - if (pmaxval) *pmaxval = 0; - if (pxmax) *pxmax = 0; - if (pymax) *pymax = 0; - if (!pmaxval && !pxmax && !pymax) - return ERROR_INT("no data requested", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetColormap(pixs) != NULL) - return ERROR_INT("pixs has colormap", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 && d != 16 && d != 32) - return ERROR_INT("pixs not 8, 16 or 32 bpp", procName, 1); - - xstart = ystart = 0; - xend = w - 1; - yend = h - 1; - if (box) { - boxGetGeometry(box, &xstart, &ystart, &bw, &bh); - xend = xstart + bw - 1; - yend = ystart + bh - 1; - } - - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - maxval = 0; - xmax = ymax = 0; - for (i = ystart; i <= yend; i++) { - line = data + i * wpl; - for (j = xstart; j <= xend; j++) { - if (d == 8) - val = GET_DATA_BYTE(line, j); - else if (d == 16) - val = GET_DATA_TWO_BYTES(line, j); - else /* d == 32 */ - val = line[j]; - if (val > maxval) { - maxval = val; - xmax = j; - ymax = i; - } - } - } - if (maxval == 0) { /* no counts; pick the center of the rectangle */ - xmax = (xstart + xend) / 2; - ymax = (ystart + yend) / 2; - } - - if (pmaxval) *pmaxval = maxval; - if (pxmax) *pxmax = xmax; - if (pymax) *pymax = ymax; - return 0; -} - - -/*! - * \brief pixGetBinnedComponentRange() - * - * \param[in] pixs 32 bpp rgb - * \param[in] nbins number of equal population bins; must be > 1 - * \param[in] factor subsampling factor; >= 1 - * \param[in] color L_SELECT_RED, L_SELECT_GREEN or L_SELECT_BLUE - * \param[out] pminval [optional] minimum value of component - * \param[out] pmaxval [optional] maximum value of component - * \param[out] pcarray [optional] color array of bins - * \param[in] fontsize [optional] 0 for no debug; for debug, valid set - * is {4,6,8,10,12,14,16,18,20}. - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This returns the min and max average values of the
- *          selected color component in the set of rank bins,
- *          where the ranking is done using the specified component.
- * 
- */ -l_ok -pixGetBinnedComponentRange(PIX *pixs, - l_int32 nbins, - l_int32 factor, - l_int32 color, - l_int32 *pminval, - l_int32 *pmaxval, - l_uint32 **pcarray, - l_int32 fontsize) -{ -l_int32 i, minval, maxval, rval, gval, bval; -l_uint32 *carray; -PIX *pixt; - - PROCNAME("pixGetBinnedComponentRange"); - - if (pminval) *pminval = 0; - if (pmaxval) *pmaxval = 0; - if (pcarray) *pcarray = NULL; - if (!pminval && !pmaxval) - return ERROR_INT("no result requested", procName, 1); - if (!pixs || pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not defined or not 32 bpp", procName, 1); - if (factor < 1) - return ERROR_INT("sampling factor must be >= 1", procName, 1); - if (color != L_SELECT_RED && color != L_SELECT_GREEN && - color != L_SELECT_BLUE) - return ERROR_INT("invalid color", procName, 1); - if (fontsize < 0 || fontsize > 20 || fontsize & 1 || fontsize == 2) - return ERROR_INT("invalid fontsize", procName, 1); - - pixGetRankColorArray(pixs, nbins, color, factor, &carray, NULL, 0); - if (fontsize > 0) { - for (i = 0; i < nbins; i++) - L_INFO("c[%d] = %x\n", procName, i, carray[i]); - pixt = pixDisplayColorArray(carray, nbins, 200, 5, fontsize); - pixDisplay(pixt, 100, 100); - pixDestroy(&pixt); - } - - extractRGBValues(carray[0], &rval, &gval, &bval); - minval = rval; - if (color == L_SELECT_GREEN) - minval = gval; - else if (color == L_SELECT_BLUE) - minval = bval; - extractRGBValues(carray[nbins - 1], &rval, &gval, &bval); - maxval = rval; - if (color == L_SELECT_GREEN) - maxval = gval; - else if (color == L_SELECT_BLUE) - maxval = bval; - - if (pminval) *pminval = minval; - if (pmaxval) *pmaxval = maxval; - if (pcarray) - *pcarray = carray; - else - LEPT_FREE(carray); - return 0; -} - - -/*! - * \brief pixGetRankColorArray() - * - * \param[in] pixs 32 bpp or cmapped - * \param[in] nbins number of equal population bins; must be > 1 - * \param[in] type color selection flag - * \param[in] factor subsampling factor; integer >= 1 - * \param[out] pcarray array of colors, ranked by intensity - * \param[in] pixadb [optional] debug: caller passes this in. - * Use to display color squares and to - * capture plots of color components - * \param[in] fontsize [optional] debug: only used if pixadb exists. - * Valid set is {4,6,8,10,12,14,16,18,20}. - * fontsize == 6 is typical. - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The color selection flag is one of: L_SELECT_RED, L_SELECT_GREEN,
- *          L_SELECT_BLUE, L_SELECT_MIN, L_SELECT_MAX, L_SELECT_AVERAGE,
- *          L_SELECT_HUE, L_SELECT_SATURATION.
- *      (2) Then it finds the histogram of the selected color type in each
- *          RGB pixel.  For each of the %nbins sets of pixels,
- *          ordered by this color type value, find the average RGB color,
- *          and return this as a "rank color" array.  The output array
- *          has %nbins colors.
- *      (3) Set the subsampling factor > 1 to reduce the amount of
- *          computation.  Typically you want at least 10,000 pixels
- *          for reasonable statistics.
- *      (4) The rank color as a function of rank can then be found from
- *             rankint = (l_int32)(rank * (nbins - 1) + 0.5);
- *             extractRGBValues(array[rankint], &rval, &gval, &bval);
- *          where the rank is in [0.0 ... 1.0].
- *          This function is meant to be simple and approximate.
- *      (5) Compare this with pixGetBinnedColor(), which generates equal
- *          width intensity bins and finds the average color in each bin.
- * 
- */ -l_ok -pixGetRankColorArray(PIX *pixs, - l_int32 nbins, - l_int32 type, - l_int32 factor, - l_uint32 **pcarray, - PIXA *pixadb, - l_int32 fontsize) -{ -l_int32 ret; -l_uint32 *array; -NUMA *na, *nan, *narbin; -PIX *pix1, *pixc, *pixg, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixGetRankColorArray"); - - if (!pcarray) - return ERROR_INT("&carray not defined", procName, 1); - *pcarray = NULL; - if (factor < 1) - return ERROR_INT("sampling factor must be >= 1", procName, 1); - if (nbins < 2) - return ERROR_INT("nbins must be at least 2", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - cmap = pixGetColormap(pixs); - if (pixGetDepth(pixs) != 32 && !cmap) - return ERROR_INT("pixs neither 32 bpp nor cmapped", procName, 1); - if (type != L_SELECT_RED && type != L_SELECT_GREEN && - type != L_SELECT_BLUE && type != L_SELECT_MIN && - type != L_SELECT_MAX && type != L_SELECT_AVERAGE && - type != L_SELECT_HUE && type != L_SELECT_SATURATION) - return ERROR_INT("invalid type", procName, 1); - if (pixadb) { - if (fontsize < 0 || fontsize > 20 || fontsize & 1 || fontsize == 2) { - L_WARNING("invalid fontsize %d; setting to 6\n", procName, - fontsize); - fontsize = 6; - } - } - - /* Downscale by factor and remove colormap if it exists */ - pix1 = pixScaleByIntSampling(pixs, factor); - if (cmap) - pixc = pixRemoveColormap(pix1, REMOVE_CMAP_TO_FULL_COLOR); - else - pixc = pixClone(pix1); - pixDestroy(&pix1); - - /* Get normalized histogram of the selected component */ - if (type == L_SELECT_RED) - pixg = pixGetRGBComponent(pixc, COLOR_RED); - else if (type == L_SELECT_GREEN) - pixg = pixGetRGBComponent(pixc, COLOR_GREEN); - else if (type == L_SELECT_BLUE) - pixg = pixGetRGBComponent(pixc, COLOR_BLUE); - else if (type == L_SELECT_MIN) - pixg = pixConvertRGBToGrayMinMax(pixc, L_CHOOSE_MIN); - else if (type == L_SELECT_MAX) - pixg = pixConvertRGBToGrayMinMax(pixc, L_CHOOSE_MAX); - else if (type == L_SELECT_AVERAGE) - pixg = pixConvertRGBToGray(pixc, 0.34, 0.33, 0.33); - else if (type == L_SELECT_HUE) - pixg = pixConvertRGBToHue(pixc); - else /* L_SELECT_SATURATION */ - pixg = pixConvertRGBToSaturation(pixc); - if ((na = pixGetGrayHistogram(pixg, 1)) == NULL) { - pixDestroy(&pixc); - pixDestroy(&pixg); - return ERROR_INT("na not made", procName, 1); - } - nan = numaNormalizeHistogram(na, 1.0); - - /* Get the following arrays: - * (1) nar: cumulative normalized histogram (rank vs intensity value). - * With 256 intensity values, we have 257 rank values. - * (2) nai: "average" intensity as function of rank bin, for - * %nbins equally spaced in rank between 0.0 and 1.0. - * (3) narbin: bin number of discretized rank as a function of - * intensity. This is the 'inverse' of nai. - * (4) nabb: intensity value of the right bin boundary, for each - * of the %nbins discretized rank bins. */ - if (!pixadb) { - numaDiscretizeRankAndIntensity(nan, nbins, &narbin, NULL, NULL, NULL); - } else { - NUMA *nai, *nar, *nabb; - numaDiscretizeRankAndIntensity(nan, nbins, &narbin, &nai, &nar, &nabb); - lept_mkdir("lept/regout"); - pix1 = gplotSimplePix1(nan, "Normalized Histogram"); - pixaAddPix(pixadb, pix1, L_INSERT); - pix1 = gplotSimplePix1(nar, "Cumulative Histogram"); - pixaAddPix(pixadb, pix1, L_INSERT); - pix1 = gplotSimplePix1(nai, "Intensity vs. rank bin"); - pixaAddPix(pixadb, pix1, L_INSERT); - pix1 = gplotSimplePix1(narbin, "LUT: rank bin vs. Intensity"); - pixaAddPix(pixadb, pix1, L_INSERT); - pix1 = gplotSimplePix1(nabb, "Intensity of right edge vs. rank bin"); - pixaAddPix(pixadb, pix1, L_INSERT); - numaDestroy(&nai); - numaDestroy(&nar); - numaDestroy(&nabb); - } - - /* Get the average color in each bin for pixels whose grayscale - * values fall in the bin range. %narbin is the LUT that - * determines the bin number from the grayscale version of - * the image. Because this mapping may not be unique, - * some bins may not be represented in the LUT. In use, to get fair - * allocation into all the bins, bin population is monitored - * as pixels are accumulated, and when bins fill up, - * pixels are required to overflow into succeeding bins. */ - pixGetBinnedColor(pixc, pixg, 1, nbins, narbin, pcarray, pixadb); - ret = 0; - if ((array = *pcarray) == NULL) { - L_ERROR("color array not returned\n", procName); - ret = 1; - } - if (array && pixadb) { - pixd = pixDisplayColorArray(array, nbins, 200, 5, fontsize); - pixWriteDebug("/tmp/lept/regout/rankhisto.png", pixd, IFF_PNG); - pixDestroy(&pixd); - } - - pixDestroy(&pixc); - pixDestroy(&pixg); - numaDestroy(&na); - numaDestroy(&nan); - numaDestroy(&narbin); - return ret; -} - - -/*! - * \brief pixGetBinnedColor() - * - * \param[in] pixs 32 bpp - * \param[in] pixg 8 bpp grayscale version of pixs - * \param[in] factor sampling factor along pixel counting direction - * \param[in] nbins number of intensity bins - * \param[in] nalut LUT for mapping from intensity to bin number - * \param[out] pcarray array of average color values in each bin - * \param[in] pixadb [optional] debug: caller passes this in. - * Use to display color squares and to - * capture plots of color components - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This takes a color image, a grayscale (intensity) version,
- *          a LUT from intensity to bin number, and the number of bins.
- *          It computes the average color for pixels whose intensity
- *          is in each bin.  This is returned as an array of l_uint32
- *          colors in our standard RGBA ordering.
- *      (2) This function generates equal width intensity bins and
- *          finds the average color in each bin.  Compare this with
- *          pixGetRankColorArray(), which rank orders the pixels
- *          by the value of the selected component in each pixel,
- *          sets up bins with equal population (not intensity width!),
- *          and gets the average color in each bin.
- * 
- */ -l_ok -pixGetBinnedColor(PIX *pixs, - PIX *pixg, - l_int32 factor, - l_int32 nbins, - NUMA *nalut, - l_uint32 **pcarray, - PIXA *pixadb) -{ -l_int32 i, j, w, h, wpls, wplg, grayval, bin, rval, gval, bval, success; -l_int32 npts, avepts, maxpts; -l_uint32 *datas, *datag, *lines, *lineg, *carray; -l_float64 norm; -l_float64 *rarray, *garray, *barray, *narray; -PIX *pix1; - - PROCNAME("pixGetBinnedColor"); - - if (!pcarray) - return ERROR_INT("&carray not defined", procName, 1); - *pcarray = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixg) - return ERROR_INT("pixg not defined", procName, 1); - if (!nalut) - return ERROR_INT("nalut not defined", procName, 1); - if (factor < 1) { - L_WARNING("sampling factor less than 1; setting to 1\n", procName); - factor = 1; - } - - /* Find the color for each rank bin. Note that we can have - * multiple bins filled with pixels having the same gray value. - * Therefore, because in general the mapping from gray value - * to bin number is not unique, if a bin fills up (actually, - * we allow it to slightly overfill), we roll the excess - * over to the next bin, etc. */ - pixGetDimensions(pixs, &w, &h, NULL); - npts = (w + factor - 1) * (h + factor - 1) / (factor * factor); - avepts = (npts + nbins - 1) / nbins; /* average number of pts in a bin */ - maxpts = (l_int32)((1.0 + 0.5 / (l_float32)nbins) * avepts); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datag = pixGetData(pixg); - wplg = pixGetWpl(pixg); - rarray = (l_float64 *)LEPT_CALLOC(nbins, sizeof(l_float64)); - garray = (l_float64 *)LEPT_CALLOC(nbins, sizeof(l_float64)); - barray = (l_float64 *)LEPT_CALLOC(nbins, sizeof(l_float64)); - narray = (l_float64 *)LEPT_CALLOC(nbins, sizeof(l_float64)); - for (i = 0; i < h; i += factor) { - lines = datas + i * wpls; - lineg = datag + i * wplg; - for (j = 0; j < w; j += factor) { - grayval = GET_DATA_BYTE(lineg, j); - numaGetIValue(nalut, grayval, &bin); - extractRGBValues(lines[j], &rval, &gval, &bval); - while (narray[bin] >= maxpts && bin < nbins - 1) - bin++; - rarray[bin] += rval; - garray[bin] += gval; - barray[bin] += bval; - narray[bin] += 1.0; /* count samples in each bin */ - } - } - - for (i = 0; i < nbins; i++) { - norm = 1. / narray[i]; - rarray[i] *= norm; - garray[i] *= norm; - barray[i] *= norm; -/* lept_stderr("narray[%d] = %f\n", i, narray[i]); */ - } - - if (pixadb) { - NUMA *nared, *nagreen, *nablue; - nared = numaCreate(nbins); - nagreen = numaCreate(nbins); - nablue = numaCreate(nbins); - for (i = 0; i < nbins; i++) { - numaAddNumber(nared, rarray[i]); - numaAddNumber(nagreen, garray[i]); - numaAddNumber(nablue, barray[i]); - } - lept_mkdir("lept/regout"); - pix1 = gplotSimplePix1(nared, "Average red val vs. rank bin"); - pixaAddPix(pixadb, pix1, L_INSERT); - pix1 = gplotSimplePix1(nagreen, "Average green val vs. rank bin"); - pixaAddPix(pixadb, pix1, L_INSERT); - pix1 = gplotSimplePix1(nablue, "Average blue val vs. rank bin"); - pixaAddPix(pixadb, pix1, L_INSERT); - numaDestroy(&nared); - numaDestroy(&nagreen); - numaDestroy(&nablue); - } - - /* Save colors for all bins in a single array */ - success = TRUE; - if ((carray = (l_uint32 *)LEPT_CALLOC(nbins, sizeof(l_uint32))) == NULL) { - success = FALSE; - L_ERROR("carray not made\n", procName); - goto cleanup_arrays; - } - *pcarray = carray; - for (i = 0; i < nbins; i++) { - rval = (l_int32)(rarray[i] + 0.5); - gval = (l_int32)(garray[i] + 0.5); - bval = (l_int32)(barray[i] + 0.5); - composeRGBPixel(rval, gval, bval, carray + i); - } - -cleanup_arrays: - LEPT_FREE(rarray); - LEPT_FREE(garray); - LEPT_FREE(barray); - LEPT_FREE(narray); - return (success) ? 0 : 1; -} - - -/*! - * \brief pixDisplayColorArray() - * - * \param[in] carray array of colors: 0xrrggbb00 - * \param[in] ncolors size of array - * \param[in] side size of each color square; suggest 200 - * \param[in] ncols number of columns in output color matrix - * \param[in] fontsize to label each square with text. Valid set is - * {4,6,8,10,12,14,16,18,20}. Use 0 to disable. - * \return pixd color array, or NULL on error - */ -PIX * -pixDisplayColorArray(l_uint32 *carray, - l_int32 ncolors, - l_int32 side, - l_int32 ncols, - l_int32 fontsize) -{ -char textstr[256]; -l_int32 i, rval, gval, bval; -L_BMF *bmf; -PIX *pix1, *pix2, *pix3, *pix4; -PIXA *pixa; - - PROCNAME("pixDisplayColorArray"); - - if (!carray) - return (PIX *)ERROR_PTR("carray not defined", procName, NULL); - if (fontsize < 0 || fontsize > 20 || fontsize & 1 || fontsize == 2) - return (PIX *)ERROR_PTR("invalid fontsize", procName, NULL); - - bmf = (fontsize == 0) ? NULL : bmfCreate(NULL, fontsize); - pixa = pixaCreate(ncolors); - for (i = 0; i < ncolors; i++) { - pix1 = pixCreate(side, side, 32); - pixSetAllArbitrary(pix1, carray[i]); - pix2 = pixAddBorder(pix1, 2, 1); - if (bmf) { - extractRGBValues(carray[i], &rval, &gval, &bval); - snprintf(textstr, sizeof(textstr), - "%d: (%d %d %d)", i, rval, gval, bval); - pix3 = pixAddSingleTextblock(pix2, bmf, textstr, 0xff000000, - L_ADD_BELOW, NULL); - } else { - pix3 = pixClone(pix2); - } - pixaAddPix(pixa, pix3, L_INSERT); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - pix4 = pixaDisplayTiledInColumns(pixa, ncols, 1.0, 20, 2); - pixaDestroy(&pixa); - bmfDestroy(&bmf); - return pix4; -} - - -/*! - * \brief pixRankBinByStrip() - * - * \param[in] pixs 32 bpp or cmapped - * \param[in] direction L_SCAN_HORIZONTAL or L_SCAN_VERTICAL - * \param[in] size of strips in scan direction - * \param[in] nbins number of equal population bins; must be > 1 - * \param[in] type color selection flag - * \return pixd result, or NULL on error - * - *
- * Notes:
- *      (1) This generates a pix where each column represents a strip of
- *          the input image.  If %direction == L_SCAN_HORIZONTAL, the
- *          input impage is tiled into vertical strips of width %size,
- *          where %size is a compromise between getting better spatial
- *          columnwise resolution (small %size) and getting better
- *          columnwise statistical information (larger %size).  Likewise
- *          with rows of the image if %direction == L_SCAN_VERTICAL.
- *      (2) For L_HORIZONTAL_SCAN, the output pix contains rank binned
- *          median colors in each column that correspond to a vertical
- *          strip of width %size in the input image.
- *      (3) The color selection flag is one of: L_SELECT_RED, L_SELECT_GREEN,
- *          L_SELECT_BLUE, L_SELECT_MIN, L_SELECT_MAX, L_SELECT_AVERAGE.
- *          It determines how the rank ordering is done.
- *      (4) Typical input values might be %size = 5, %nbins = 10.
- * 
- */ -PIX * -pixRankBinByStrip(PIX *pixs, - l_int32 direction, - l_int32 size, - l_int32 nbins, - l_int32 type) -{ -l_int32 i, j, w, h, nstrips; -l_uint32 *array; -BOXA *boxa; -PIX *pix1, *pix2, *pixd; -PIXA *pixa; -PIXCMAP *cmap; - - PROCNAME("pixRankBinByStrip"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - cmap = pixGetColormap(pixs); - if (pixGetDepth(pixs) != 32 && !cmap) - return (PIX *)ERROR_PTR("pixs neither 32 bpp nor cmapped", - procName, NULL); - if (direction != L_SCAN_HORIZONTAL && direction != L_SCAN_VERTICAL) - return (PIX *)ERROR_PTR("invalid direction", procName, NULL); - if (size < 1) - return (PIX *)ERROR_PTR("size < 1", procName, NULL); - if (nbins < 2) - return (PIX *)ERROR_PTR("nbins must be at least 2", procName, NULL); - if (type != L_SELECT_RED && type != L_SELECT_GREEN && - type != L_SELECT_BLUE && type != L_SELECT_MIN && - type != L_SELECT_MAX && type != L_SELECT_AVERAGE) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - - /* Downscale by factor and remove colormap if it exists */ - if (cmap) - pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_FULL_COLOR); - else - pix1 = pixClone(pixs); - pixGetDimensions(pixs, &w, &h, NULL); - - pixd = NULL; - boxa = makeMosaicStrips(w, h, direction, size); - pixa = pixClipRectangles(pix1, boxa); - nstrips = pixaGetCount(pixa); - if (direction == L_SCAN_HORIZONTAL) { - pixd = pixCreate(nstrips, nbins, 32); - for (i = 0; i < nstrips; i++) { - pix2 = pixaGetPix(pixa, i, L_CLONE); - pixGetRankColorArray(pix2, nbins, type, 1, &array, NULL, 0); - for (j = 0; j < nbins; j++) - pixSetPixel(pixd, i, j, array[j]); - LEPT_FREE(array); - pixDestroy(&pix2); - } - } else { /* L_SCAN_VERTICAL */ - pixd = pixCreate(nbins, nstrips, 32); - for (i = 0; i < nstrips; i++) { - pix2 = pixaGetPix(pixa, i, L_CLONE); - pixGetRankColorArray(pix2, nbins, type, 1, &array, NULL, 0); - for (j = 0; j < nbins; j++) - pixSetPixel(pixd, j, i, array[j]); - LEPT_FREE(array); - pixDestroy(&pix2); - } - } - pixDestroy(&pix1); - boxaDestroy(&boxa); - pixaDestroy(&pixa); - return pixd; -} - - - -/*-------------------------------------------------------------* - * Pixelwise aligned statistics * - *-------------------------------------------------------------*/ -/*! - * \brief pixaGetAlignedStats() - * - * \param[in] pixa of identically sized, 8 bpp pix; not cmapped - * \param[in] type L_MEAN_ABSVAL, L_MEDIAN_VAL, L_MODE_VAL, L_MODE_COUNT - * \param[in] nbins of histogram for median and mode; ignored for mean - * \param[in] thresh on histogram for mode val; ignored for all other types - * \return pix with pixelwise aligned stats, or NULL on error. - * - *
- * Notes:
- *      (1) Each pixel in the returned pix represents an average
- *          (or median, or mode) over the corresponding pixels in each
- *          pix in the pixa.
- *      (2) The %thresh parameter works with L_MODE_VAL only, and
- *          sets a minimum occupancy of the mode bin.
- *          If the occupancy of the mode bin is less than %thresh, the
- *          mode value is returned as 0.  To always return the actual
- *          mode value, set %thresh = 0.  See pixGetRowStats().
- * 
- */ -PIX * -pixaGetAlignedStats(PIXA *pixa, - l_int32 type, - l_int32 nbins, - l_int32 thresh) -{ -l_int32 j, n, w, h, d; -l_float32 *colvect; -PIX *pixt, *pixd; - - PROCNAME("pixaGetAlignedStats"); - - if (!pixa) - return (PIX *)ERROR_PTR("pixa not defined", procName, NULL); - if (type != L_MEAN_ABSVAL && type != L_MEDIAN_VAL && - type != L_MODE_VAL && type != L_MODE_COUNT) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - n = pixaGetCount(pixa); - if (n == 0) - return (PIX *)ERROR_PTR("no pix in pixa", procName, NULL); - pixaGetPixDimensions(pixa, 0, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pix not 8 bpp", procName, NULL); - - pixd = pixCreate(w, h, 8); - pixt = pixCreate(n, h, 8); - colvect = (l_float32 *)LEPT_CALLOC(h, sizeof(l_float32)); - for (j = 0; j < w; j++) { - pixaExtractColumnFromEachPix(pixa, j, pixt); - pixGetRowStats(pixt, type, nbins, thresh, colvect); - pixSetPixelColumn(pixd, j, colvect); - } - - LEPT_FREE(colvect); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixaExtractColumnFromEachPix() - * - * \param[in] pixa of identically sized, 8 bpp; not cmapped - * \param[in] col column index - * \param[in] pixd pix into which each column is inserted - * \return 0 if OK, 1 on error - */ -l_ok -pixaExtractColumnFromEachPix(PIXA *pixa, - l_int32 col, - PIX *pixd) -{ -l_int32 i, k, n, w, h, ht, val, wplt, wpld; -l_uint32 *datad, *datat; -PIX *pixt; - - PROCNAME("pixaExtractColumnFromEachPix"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (!pixd || pixGetDepth(pixd) != 8) - return ERROR_INT("pixd not defined or not 8 bpp", procName, 1); - n = pixaGetCount(pixa); - pixGetDimensions(pixd, &w, &h, NULL); - if (n != w) - return ERROR_INT("pix width != n", procName, 1); - pixt = pixaGetPix(pixa, 0, L_CLONE); - wplt = pixGetWpl(pixt); - pixGetDimensions(pixt, NULL, &ht, NULL); - pixDestroy(&pixt); - if (h != ht) - return ERROR_INT("pixd height != column height", procName, 1); - - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (k = 0; k < n; k++) { - pixt = pixaGetPix(pixa, k, L_CLONE); - datat = pixGetData(pixt); - for (i = 0; i < h; i++) { - val = GET_DATA_BYTE(datat, col); - SET_DATA_BYTE(datad + i * wpld, k, val); - datat += wplt; - } - pixDestroy(&pixt); - } - - return 0; -} - - -/*! - * \brief pixGetRowStats() - * - * \param[in] pixs 8 bpp; not cmapped - * \param[in] type L_MEAN_ABSVAL, L_MEDIAN_VAL, L_MODE_VAL, L_MODE_COUNT - * \param[in] nbins of histogram for median and mode; ignored for mean - * \param[in] thresh on histogram for mode; ignored for mean and median - * \param[in] colvect vector of results gathered across the rows of pixs - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This computes a column vector of statistics using each
- *          row of a Pix.  The result is put in %colvect.
- *      (2) The %thresh parameter works with L_MODE_VAL only, and
- *          sets a minimum occupancy of the mode bin.
- *          If the occupancy of the mode bin is less than %thresh, the
- *          mode value is returned as 0.  To always return the actual
- *          mode value, set %thresh = 0.
- *      (3) What is the meaning of this %thresh parameter?
- *          For each row, the total count in the histogram is w, the
- *          image width.  So %thresh, relative to w, gives a measure
- *          of the ratio of the bin width to the width of the distribution.
- *          The larger %thresh, the narrower the distribution must be
- *          for the mode value to be returned (instead of returning 0).
- *      (4) If the Pix consists of a set of corresponding columns,
- *          one for each Pix in a Pixa, the width of the Pix is the
- *          number of Pix in the Pixa and the column vector can
- *          be stored as a column in a Pix of the same size as
- *          each Pix in the Pixa.
- * 
- */ -l_ok -pixGetRowStats(PIX *pixs, - l_int32 type, - l_int32 nbins, - l_int32 thresh, - l_float32 *colvect) -{ -l_int32 i, j, k, w, h, val, wpls, sum, target, max, modeval; -l_int32 *histo, *gray2bin, *bin2gray; -l_uint32 *lines, *datas; - - PROCNAME("pixGetRowStats"); - - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (!colvect) - return ERROR_INT("colvect not defined", procName, 1); - if (type != L_MEAN_ABSVAL && type != L_MEDIAN_VAL && - type != L_MODE_VAL && type != L_MODE_COUNT) - return ERROR_INT("invalid type", procName, 1); - if (type != L_MEAN_ABSVAL && (nbins < 1 || nbins > 256)) - return ERROR_INT("invalid nbins", procName, 1); - pixGetDimensions(pixs, &w, &h, NULL); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if (type == L_MEAN_ABSVAL) { - for (i = 0; i < h; i++) { - sum = 0; - lines = datas + i * wpls; - for (j = 0; j < w; j++) - sum += GET_DATA_BYTE(lines, j); - colvect[i] = (l_float32)sum / (l_float32)w; - } - return 0; - } - - /* We need a histogram; binwidth ~ 256 / nbins */ - histo = (l_int32 *)LEPT_CALLOC(nbins, sizeof(l_int32)); - gray2bin = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - bin2gray = (l_int32 *)LEPT_CALLOC(nbins, sizeof(l_int32)); - for (i = 0; i < 256; i++) /* gray value --> histo bin */ - gray2bin[i] = (i * nbins) / 256; - for (i = 0; i < nbins; i++) /* histo bin --> gray value */ - bin2gray[i] = (i * 256 + 128) / nbins; - - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (k = 0; k < nbins; k++) - histo[k] = 0; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(lines, j); - histo[gray2bin[val]]++; - } - - if (type == L_MEDIAN_VAL) { - sum = 0; - target = (w + 1) / 2; - for (k = 0; k < nbins; k++) { - sum += histo[k]; - if (sum >= target) { - colvect[i] = bin2gray[k]; - break; - } - } - } else if (type == L_MODE_VAL) { - max = 0; - modeval = 0; - for (k = 0; k < nbins; k++) { - if (histo[k] > max) { - max = histo[k]; - modeval = k; - } - } - if (max < thresh) - colvect[i] = 0; - else - colvect[i] = bin2gray[modeval]; - } else { /* type == L_MODE_COUNT */ - max = 0; - for (k = 0; k < nbins; k++) { - if (histo[k] > max) - max = histo[k]; - } - colvect[i] = max; - } - } - - LEPT_FREE(histo); - LEPT_FREE(gray2bin); - LEPT_FREE(bin2gray); - return 0; -} - - -/*! - * \brief pixGetColumnStats() - * - * \param[in] pixs 8 bpp; not cmapped - * \param[in] type L_MEAN_ABSVAL, L_MEDIAN_VAL, L_MODE_VAL, L_MODE_COUNT - * \param[in] nbins of histogram for median and mode; ignored for mean - * \param[in] thresh on histogram for mode val; ignored for all other types - * \param[in] rowvect vector of results gathered down the columns of pixs - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This computes a row vector of statistics using each
- *          column of a Pix.  The result is put in %rowvect.
- *      (2) The %thresh parameter works with L_MODE_VAL only, and
- *          sets a minimum occupancy of the mode bin.
- *          If the occupancy of the mode bin is less than %thresh, the
- *          mode value is returned as 0.  To always return the actual
- *          mode value, set %thresh = 0.
- *      (3) What is the meaning of this %thresh parameter?
- *          For each column, the total count in the histogram is h, the
- *          image height.  So %thresh, relative to h, gives a measure
- *          of the ratio of the bin width to the width of the distribution.
- *          The larger %thresh, the narrower the distribution must be
- *          for the mode value to be returned (instead of returning 0).
- * 
- */ -l_ok -pixGetColumnStats(PIX *pixs, - l_int32 type, - l_int32 nbins, - l_int32 thresh, - l_float32 *rowvect) -{ -l_int32 i, j, k, w, h, val, wpls, sum, target, max, modeval; -l_int32 *histo, *gray2bin, *bin2gray; -l_uint32 *datas; - - PROCNAME("pixGetColumnStats"); - - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (!rowvect) - return ERROR_INT("rowvect not defined", procName, 1); - if (type != L_MEAN_ABSVAL && type != L_MEDIAN_VAL && - type != L_MODE_VAL && type != L_MODE_COUNT) - return ERROR_INT("invalid type", procName, 1); - if (type != L_MEAN_ABSVAL && (nbins < 1 || nbins > 256)) - return ERROR_INT("invalid nbins", procName, 1); - pixGetDimensions(pixs, &w, &h, NULL); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if (type == L_MEAN_ABSVAL) { - for (j = 0; j < w; j++) { - sum = 0; - for (i = 0; i < h; i++) - sum += GET_DATA_BYTE(datas + i * wpls, j); - rowvect[j] = (l_float32)sum / (l_float32)h; - } - return 0; - } - - /* We need a histogram; binwidth ~ 256 / nbins */ - histo = (l_int32 *)LEPT_CALLOC(nbins, sizeof(l_int32)); - gray2bin = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - bin2gray = (l_int32 *)LEPT_CALLOC(nbins, sizeof(l_int32)); - for (i = 0; i < 256; i++) /* gray value --> histo bin */ - gray2bin[i] = (i * nbins) / 256; - for (i = 0; i < nbins; i++) /* histo bin --> gray value */ - bin2gray[i] = (i * 256 + 128) / nbins; - - for (j = 0; j < w; j++) { - for (i = 0; i < h; i++) { - val = GET_DATA_BYTE(datas + i * wpls, j); - histo[gray2bin[val]]++; - } - - if (type == L_MEDIAN_VAL) { - sum = 0; - target = (h + 1) / 2; - for (k = 0; k < nbins; k++) { - sum += histo[k]; - if (sum >= target) { - rowvect[j] = bin2gray[k]; - break; - } - } - } else if (type == L_MODE_VAL) { - max = 0; - modeval = 0; - for (k = 0; k < nbins; k++) { - if (histo[k] > max) { - max = histo[k]; - modeval = k; - } - } - if (max < thresh) - rowvect[j] = 0; - else - rowvect[j] = bin2gray[modeval]; - } else { /* type == L_MODE_COUNT */ - max = 0; - for (k = 0; k < nbins; k++) { - if (histo[k] > max) - max = histo[k]; - } - rowvect[j] = max; - } - for (k = 0; k < nbins; k++) - histo[k] = 0; - } - - LEPT_FREE(histo); - LEPT_FREE(gray2bin); - LEPT_FREE(bin2gray); - return 0; -} - - -/*! - * \brief pixSetPixelColumn() - * - * \param[in] pix 8 bpp; not cmapped - * \param[in] col column index - * \param[in] colvect vector of floats - * \return 0 if OK, 1 on error - */ -l_ok -pixSetPixelColumn(PIX *pix, - l_int32 col, - l_float32 *colvect) -{ -l_int32 i, w, h, wpl; -l_uint32 *data; - - PROCNAME("pixSetCPixelColumn"); - - if (!pix || pixGetDepth(pix) != 8) - return ERROR_INT("pix not defined or not 8 bpp", procName, 1); - if (!colvect) - return ERROR_INT("colvect not defined", procName, 1); - pixGetDimensions(pix, &w, &h, NULL); - if (col < 0 || col > w) - return ERROR_INT("invalid col", procName, 1); - - data = pixGetData(pix); - wpl = pixGetWpl(pix); - for (i = 0; i < h; i++) - SET_DATA_BYTE(data + i * wpl, col, (l_int32)colvect[i]); - - return 0; -} - - -/*-------------------------------------------------------------* - * Foreground/background estimation * - *-------------------------------------------------------------*/ -/*! - * \brief pixThresholdForFgBg() - * - * \param[in] pixs any depth; cmapped ok - * \param[in] factor subsampling factor; integer >= 1 - * \param[in] thresh threshold for generating foreground mask - * \param[out] pfgval [optional] average foreground value - * \param[out] pbgval [optional] average background value - * \return 0 if OK, 1 on error - */ -l_ok -pixThresholdForFgBg(PIX *pixs, - l_int32 factor, - l_int32 thresh, - l_int32 *pfgval, - l_int32 *pbgval) -{ -l_float32 fval; -PIX *pixg, *pixm; - - PROCNAME("pixThresholdForFgBg"); - - if (pfgval) *pfgval = 0; - if (pbgval) *pbgval = 0; - if (!pfgval && !pbgval) - return ERROR_INT("no data requested", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - /* Generate a subsampled 8 bpp version and a mask over the fg */ - pixg = pixConvertTo8BySampling(pixs, factor, 0); - pixm = pixThresholdToBinary(pixg, thresh); - - if (pfgval) { - pixGetAverageMasked(pixg, pixm, 0, 0, 1, L_MEAN_ABSVAL, &fval); - *pfgval = (l_int32)(fval + 0.5); - } - - if (pbgval) { - pixInvert(pixm, pixm); - pixGetAverageMasked(pixg, pixm, 0, 0, 1, L_MEAN_ABSVAL, &fval); - *pbgval = (l_int32)(fval + 0.5); - } - - pixDestroy(&pixg); - pixDestroy(&pixm); - return 0; -} - - -/*! - * \brief pixSplitDistributionFgBg() - * - * \param[in] pixs any depth; cmapped ok - * \param[in] scorefract fraction of the max score, used to determine - * the range over which the histogram min is searched - * \param[in] factor subsampling factor; integer >= 1 - * \param[out] pthresh [optional] best threshold for separating - * \param[out] pfgval [optional] average foreground value - * \param[out] pbgval [optional] average background value - * \param[out] ppixdb [optional] plot of distribution and split point - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See numaSplitDistribution() for details on the underlying
- *          method of choosing a threshold.
- * 
- */ -l_ok -pixSplitDistributionFgBg(PIX *pixs, - l_float32 scorefract, - l_int32 factor, - l_int32 *pthresh, - l_int32 *pfgval, - l_int32 *pbgval, - PIX **ppixdb) -{ -char buf[256]; -l_int32 thresh; -l_float32 avefg, avebg, maxnum; -GPLOT *gplot; -NUMA *na, *nascore, *nax, *nay; -PIX *pixg; - - PROCNAME("pixSplitDistributionFgBg"); - - if (pthresh) *pthresh = 0; - if (pfgval) *pfgval = 0; - if (pbgval) *pbgval = 0; - if (ppixdb) *ppixdb = NULL; - if (!pthresh && !pfgval && !pbgval) - return ERROR_INT("no data requested", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - /* Generate a subsampled 8 bpp version */ - pixg = pixConvertTo8BySampling(pixs, factor, 0); - - /* Make the fg/bg estimates */ - na = pixGetGrayHistogram(pixg, 1); - if (ppixdb) { - numaSplitDistribution(na, scorefract, &thresh, &avefg, &avebg, - NULL, NULL, &nascore); - numaDestroy(&nascore); - } else { - numaSplitDistribution(na, scorefract, &thresh, &avefg, &avebg, - NULL, NULL, NULL); - } - - if (pthresh) *pthresh = thresh; - if (pfgval) *pfgval = (l_int32)(avefg + 0.5); - if (pbgval) *pbgval = (l_int32)(avebg + 0.5); - - if (ppixdb) { - lept_mkdir("lept/redout"); - gplot = gplotCreate("/tmp/lept/redout/histplot", GPLOT_PNG, "Histogram", - "Grayscale value", "Number of pixels"); - gplotAddPlot(gplot, NULL, na, GPLOT_LINES, NULL); - nax = numaMakeConstant(thresh, 2); - numaGetMax(na, &maxnum, NULL); - nay = numaMakeConstant(0, 2); - numaReplaceNumber(nay, 1, (l_int32)(0.5 * maxnum)); - snprintf(buf, sizeof(buf), "score fract = %3.1f", scorefract); - gplotAddPlot(gplot, nax, nay, GPLOT_LINES, buf); - *ppixdb = gplotMakeOutputPix(gplot); - gplotDestroy(&gplot); - numaDestroy(&nax); - numaDestroy(&nay); - } - - pixDestroy(&pixg); - numaDestroy(&na); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix5.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix5.c deleted file mode 100644 index 7b12ead8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pix5.c +++ /dev/null @@ -1,3157 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pix5.c - *
- *
- *    This file has these operations:
- *
- *      (1) Measurement of 1 bpp image properties
- *      (2) Extract rectangular regions
- *      (3) Clip to foreground
- *      (4) Extract pixel averages, reversals and variance along lines
- *      (5) Rank row and column transforms
- *
- *    Measurement of properties
- *           l_int32     pixaFindDimensions()
- *           l_int32     pixFindAreaPerimRatio()
- *           NUMA       *pixaFindPerimToAreaRatio()
- *           l_int32     pixFindPerimToAreaRatio()
- *           NUMA       *pixaFindPerimSizeRatio()
- *           l_int32     pixFindPerimSizeRatio()
- *           NUMA       *pixaFindAreaFraction()
- *           l_int32     pixFindAreaFraction()
- *           NUMA       *pixaFindAreaFractionMasked()
- *           l_int32     pixFindAreaFractionMasked()
- *           NUMA       *pixaFindWidthHeightRatio()
- *           NUMA       *pixaFindWidthHeightProduct()
- *           l_int32     pixFindOverlapFraction()
- *           BOXA       *pixFindRectangleComps()
- *           l_int32     pixConformsToRectangle()
- *
- *    Extract rectangular region
- *           PIXA       *pixClipRectangles()
- *           PIX        *pixClipRectangle()
- *           PIX        *pixClipMasked()
- *           l_int32     pixCropToMatch()
- *           PIX        *pixCropToSize()
- *           PIX        *pixResizeToMatch()
- *
- *    Select a connected component by size
- *           PIX        *pixSelectComponentBySize()
- *           PIX        *pixFilterComponentBySize()
- *
- *    Make special masks
- *           PIX        *pixMakeSymmetricMask()
- *           PIX        *pixMakeFrameMask()
- *
- *    Generate a covering of rectangles over connected components
- *           PIX        * pixMakeCoveringOfRectangles()
- *
- *    Fraction of Fg pixels under a mask
- *           l_int32     pixFractionFgInMask()
- *
- *    Clip to foreground
- *           PIX        *pixClipToForeground()
- *           l_int32     pixTestClipToForeground()
- *           l_int32     pixClipBoxToForeground()
- *           l_int32     pixScanForForeground()
- *           l_int32     pixClipBoxToEdges()
- *           l_int32     pixScanForEdge()
- *
- *    Extract pixel averages and reversals along lines
- *           NUMA       *pixExtractOnLine()
- *           l_float32   pixAverageOnLine()
- *           NUMA       *pixAverageIntensityProfile()
- *           NUMA       *pixReversalProfile()
- *
- *    Extract windowed variance along a line
- *           NUMA       *pixWindowedVarianceOnLine()
- *
- *    Extract min/max of pixel values near lines
- *           l_int32     pixMinMaxNearLine()
- *
- *    Rank row and column transforms
- *           PIX        *pixRankRowTransform()
- *           PIX        *pixRankColumnTransform()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -static const l_uint32 rmask32[] = {0x0, - 0x00000001, 0x00000003, 0x00000007, 0x0000000f, - 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, - 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, - 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, - 0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff, - 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, - 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, - 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff}; - -#ifndef NO_CONSOLE_IO -#define DEBUG_EDGES 0 -#endif /* ~NO_CONSOLE_IO */ - - -/*-------------------------------------------------------------* - * Measurement of properties * - *-------------------------------------------------------------*/ -/*! - * \brief pixaFindDimensions() - * - * \param[in] pixa - * \param[out] pnaw [optional] numa of pix widths - * \param[out] pnah [optional] numa of pix heights - * \return 0 if OK, 1 on error - */ -l_ok -pixaFindDimensions(PIXA *pixa, - NUMA **pnaw, - NUMA **pnah) -{ -l_int32 i, n, w, h; -PIX *pixt; - - PROCNAME("pixaFindDimensions"); - - if (pnaw) *pnaw = NULL; - if (pnah) *pnah = NULL; - if (!pnaw && !pnah) - return ERROR_INT("no output requested", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - n = pixaGetCount(pixa); - if (pnaw) *pnaw = numaCreate(n); - if (pnah) *pnah = numaCreate(n); - for (i = 0; i < n; i++) { - pixt = pixaGetPix(pixa, i, L_CLONE); - pixGetDimensions(pixt, &w, &h, NULL); - if (pnaw) - numaAddNumber(*pnaw, w); - if (pnah) - numaAddNumber(*pnah, h); - pixDestroy(&pixt); - } - return 0; -} - - -/*! - * \brief pixFindAreaPerimRatio() - * - * \param[in] pixs 1 bpp - * \param[in] tab [optional] pixel sum table, can be NULL - * \param[out] pfract area/perimeter ratio - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The area is the number of fg pixels that are not on the
- *          boundary (i.e., are not 8-connected to a bg pixel), and the
- *          perimeter is the number of fg boundary pixels.  Returns
- *          0.0 if there are no fg pixels.
- *      (2) This function is retained because clients are using it.
- * 
- */ -l_ok -pixFindAreaPerimRatio(PIX *pixs, - l_int32 *tab, - l_float32 *pfract) -{ -l_int32 *tab8; -l_int32 nfg, nbound; -PIX *pixt; - - PROCNAME("pixFindAreaPerimRatio"); - - if (!pfract) - return ERROR_INT("&fract not defined", procName, 1); - *pfract = 0.0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - - if (!tab) - tab8 = makePixelSumTab8(); - else - tab8 = tab; - - pixt = pixErodeBrick(NULL, pixs, 3, 3); - pixCountPixels(pixt, &nfg, tab8); - if (nfg == 0) { - pixDestroy(&pixt); - if (!tab) LEPT_FREE(tab8); - return 0; - } - pixXor(pixt, pixt, pixs); - pixCountPixels(pixt, &nbound, tab8); - *pfract = (l_float32)nfg / (l_float32)nbound; - pixDestroy(&pixt); - - if (!tab) LEPT_FREE(tab8); - return 0; -} - - -/*! - * \brief pixaFindPerimToAreaRatio() - * - * \param[in] pixa of 1 bpp pix - * \return na of perimeter/arear ratio for each pix, or NULL on error - * - *
- * Notes:
- *      (1) This is typically used for a pixa consisting of
- *          1 bpp connected components.
- * 
- */ -NUMA * -pixaFindPerimToAreaRatio(PIXA *pixa) -{ -l_int32 i, n; -l_int32 *tab; -l_float32 fract; -NUMA *na; -PIX *pixt; - - PROCNAME("pixaFindPerimToAreaRatio"); - - if (!pixa) - return (NUMA *)ERROR_PTR("pixa not defined", procName, NULL); - - n = pixaGetCount(pixa); - na = numaCreate(n); - tab = makePixelSumTab8(); - for (i = 0; i < n; i++) { - pixt = pixaGetPix(pixa, i, L_CLONE); - pixFindPerimToAreaRatio(pixt, tab, &fract); - numaAddNumber(na, fract); - pixDestroy(&pixt); - } - LEPT_FREE(tab); - return na; -} - - -/*! - * \brief pixFindPerimToAreaRatio() - * - * \param[in] pixs 1 bpp - * \param[in] tab [optional] pixel sum table, can be NULL - * \param[out] pfract perimeter/area ratio - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The perimeter is the number of fg boundary pixels, and the
- *          area is the number of fg pixels.  This returns 0.0 if
- *          there are no fg pixels.
- *      (2) Unlike pixFindAreaPerimRatio(), this uses the full set of
- *          fg pixels for the area, and the ratio is taken in the opposite
- *          order.
- *      (3) This is typically used for a single connected component.
- *          This always has a value <= 1.0, and if the average distance
- *          of a fg pixel from the nearest bg pixel is d, this has
- *          a value ~1/d.
- * 
- */ -l_ok -pixFindPerimToAreaRatio(PIX *pixs, - l_int32 *tab, - l_float32 *pfract) -{ -l_int32 *tab8; -l_int32 nfg, nbound; -PIX *pixt; - - PROCNAME("pixFindPerimToAreaRatio"); - - if (!pfract) - return ERROR_INT("&fract not defined", procName, 1); - *pfract = 0.0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - - if (!tab) - tab8 = makePixelSumTab8(); - else - tab8 = tab; - - pixCountPixels(pixs, &nfg, tab8); - if (nfg == 0) { - if (!tab) LEPT_FREE(tab8); - return 0; - } - pixt = pixErodeBrick(NULL, pixs, 3, 3); - pixXor(pixt, pixt, pixs); - pixCountPixels(pixt, &nbound, tab8); - *pfract = (l_float32)nbound / (l_float32)nfg; - pixDestroy(&pixt); - - if (!tab) LEPT_FREE(tab8); - return 0; -} - - -/*! - * \brief pixaFindPerimSizeRatio() - * - * \param[in] pixa of 1 bpp pix - * \return na of fg perimeter/(2*(w+h)) ratio for each pix, - * or NULL on error - * - *
- * Notes:
- *      (1) This is typically used for a pixa consisting of
- *          1 bpp connected components.
- *      (2) This has a minimum value for a circle of pi/4; a value for
- *          a rectangle component of approx. 1.0; and a value much larger
- *          than 1.0 for a component with a highly irregular boundary.
- * 
- */ -NUMA * -pixaFindPerimSizeRatio(PIXA *pixa) -{ -l_int32 i, n; -l_int32 *tab; -l_float32 ratio; -NUMA *na; -PIX *pixt; - - PROCNAME("pixaFindPerimSizeRatio"); - - if (!pixa) - return (NUMA *)ERROR_PTR("pixa not defined", procName, NULL); - - n = pixaGetCount(pixa); - na = numaCreate(n); - tab = makePixelSumTab8(); - for (i = 0; i < n; i++) { - pixt = pixaGetPix(pixa, i, L_CLONE); - pixFindPerimSizeRatio(pixt, tab, &ratio); - numaAddNumber(na, ratio); - pixDestroy(&pixt); - } - LEPT_FREE(tab); - return na; -} - - -/*! - * \brief pixFindPerimSizeRatio() - * - * \param[in] pixs 1 bpp - * \param[in] tab [optional] pixel sum table, can be NULL - * \param[out] pratio perimeter/size ratio - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) We take the 'size' as twice the sum of the width and
- *          height of pixs, and the perimeter is the number of fg
- *          boundary pixels.  We use the fg pixels of the boundary
- *          because the pix may be clipped to the boundary, so an
- *          erosion is required to count all boundary pixels.
- *      (2) This has a large value for dendritic, fractal-like components
- *          with highly irregular boundaries.
- *      (3) This is typically used for a single connected component.
- *          It has a value of about 1.0 for rectangular components with
- *          relatively smooth boundaries.
- * 
- */ -l_ok -pixFindPerimSizeRatio(PIX *pixs, - l_int32 *tab, - l_float32 *pratio) -{ -l_int32 *tab8; -l_int32 w, h, nbound; -PIX *pixt; - - PROCNAME("pixFindPerimSizeRatio"); - - if (!pratio) - return ERROR_INT("&ratio not defined", procName, 1); - *pratio = 0.0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - - if (!tab) - tab8 = makePixelSumTab8(); - else - tab8 = tab; - - pixt = pixErodeBrick(NULL, pixs, 3, 3); - pixXor(pixt, pixt, pixs); - pixCountPixels(pixt, &nbound, tab8); - pixGetDimensions(pixs, &w, &h, NULL); - *pratio = (0.5 * nbound) / (l_float32)(w + h); - pixDestroy(&pixt); - - if (!tab) LEPT_FREE(tab8); - return 0; -} - - -/*! - * \brief pixaFindAreaFraction() - * - * \param[in] pixa of 1 bpp pix - * \return na of area fractions for each pix, or NULL on error - * - *
- * Notes:
- *      (1) This is typically used for a pixa consisting of
- *          1 bpp connected components.
- * 
- */ -NUMA * -pixaFindAreaFraction(PIXA *pixa) -{ -l_int32 i, n; -l_int32 *tab; -l_float32 fract; -NUMA *na; -PIX *pixt; - - PROCNAME("pixaFindAreaFraction"); - - if (!pixa) - return (NUMA *)ERROR_PTR("pixa not defined", procName, NULL); - - n = pixaGetCount(pixa); - na = numaCreate(n); - tab = makePixelSumTab8(); - for (i = 0; i < n; i++) { - pixt = pixaGetPix(pixa, i, L_CLONE); - pixFindAreaFraction(pixt, tab, &fract); - numaAddNumber(na, fract); - pixDestroy(&pixt); - } - LEPT_FREE(tab); - return na; -} - - -/*! - * \brief pixFindAreaFraction() - * - * \param[in] pixs 1 bpp - * \param[in] tab [optional] pixel sum table, can be NULL - * \param[out] pfract fg area/size ratio - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This finds the ratio of the number of fg pixels to the
- *          size of the pix (w * h).  It is typically used for a
- *          single connected component.
- * 
- */ -l_ok -pixFindAreaFraction(PIX *pixs, - l_int32 *tab, - l_float32 *pfract) -{ -l_int32 w, h, sum; -l_int32 *tab8; - - PROCNAME("pixFindAreaFraction"); - - if (!pfract) - return ERROR_INT("&fract not defined", procName, 1); - *pfract = 0.0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - - if (!tab) - tab8 = makePixelSumTab8(); - else - tab8 = tab; - pixGetDimensions(pixs, &w, &h, NULL); - pixCountPixels(pixs, &sum, tab8); - *pfract = (l_float32)sum / (l_float32)(w * h); - - if (!tab) LEPT_FREE(tab8); - return 0; -} - - -/*! - * \brief pixaFindAreaFractionMasked() - * - * \param[in] pixa of 1 bpp pix - * \param[in] pixm mask image - * \param[in] debug 1 for output, 0 to suppress - * \return na of ratio masked/total fractions for each pix, - * or NULL on error - * - *
- * Notes:
- *      (1) This is typically used for a pixa consisting of
- *          1 bpp connected components, which has an associated
- *          boxa giving the location of the components relative
- *          to the mask origin.
- *      (2) The debug flag displays in green and red the masked and
- *          unmasked parts of the image from which pixa was derived.
- * 
- */ -NUMA * -pixaFindAreaFractionMasked(PIXA *pixa, - PIX *pixm, - l_int32 debug) -{ -l_int32 i, n, full; -l_int32 *tab; -l_float32 fract; -BOX *box; -NUMA *na; -PIX *pix; - - PROCNAME("pixaFindAreaFractionMasked"); - - if (!pixa) - return (NUMA *)ERROR_PTR("pixa not defined", procName, NULL); - if (!pixm || pixGetDepth(pixm) != 1) - return (NUMA *)ERROR_PTR("pixm undefined or not 1 bpp", procName, NULL); - - n = pixaGetCount(pixa); - na = numaCreate(n); - tab = makePixelSumTab8(); - pixaIsFull(pixa, NULL, &full); /* check boxa */ - box = NULL; - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - if (full) - box = pixaGetBox(pixa, i, L_CLONE); - pixFindAreaFractionMasked(pix, box, pixm, tab, &fract); - numaAddNumber(na, fract); - boxDestroy(&box); - pixDestroy(&pix); - } - LEPT_FREE(tab); - - if (debug) { - l_int32 w, h; - PIX *pix1, *pix2; - pixGetDimensions(pixm, &w, &h, NULL); - pix1 = pixaDisplay(pixa, w, h); /* recover original image */ - pix2 = pixCreate(w, h, 8); /* make an 8 bpp white image ... */ - pixSetColormap(pix2, pixcmapCreate(8)); /* that's cmapped ... */ - pixSetBlackOrWhite(pix2, L_SET_WHITE); /* and init to white */ - pixSetMaskedCmap(pix2, pix1, 0, 0, 255, 0, 0); /* color all fg red */ - pixRasterop(pix1, 0, 0, w, h, PIX_MASK, pixm, 0, 0); - pixSetMaskedCmap(pix2, pix1, 0, 0, 0, 255, 0); /* turn masked green */ - pixDisplay(pix2, 100, 100); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - return na; -} - - -/*! - * \brief pixFindAreaFractionMasked() - * - * \param[in] pixs 1 bpp, typically a single component - * \param[in] box [optional] for pixs relative to pixm - * \param[in] pixm 1 bpp mask, typically over the entire image from - * which the component pixs was extracted - * \param[in] tab [optional] pixel sum table, can be NULL - * \param[out] pfract fg area/size ratio - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This finds the ratio of the number of masked fg pixels
- *          in pixs to the total number of fg pixels in pixs.
- *          It is typically used for a single connected component.
- *          If there are no fg pixels, this returns a ratio of 0.0.
- *      (2) The box gives the location of the pix relative to that
- *          of the UL corner of the mask.  Therefore, the rasterop
- *          is performed with the pix translated to its location
- *          (x, y) in the mask before ANDing.
- *          If box == NULL, the UL corners of pixs and pixm are aligned.
- * 
- */ -l_ok -pixFindAreaFractionMasked(PIX *pixs, - BOX *box, - PIX *pixm, - l_int32 *tab, - l_float32 *pfract) -{ -l_int32 x, y, w, h, sum, masksum; -l_int32 *tab8; -PIX *pix1; - - PROCNAME("pixFindAreaFractionMasked"); - - if (!pfract) - return ERROR_INT("&fract not defined", procName, 1); - *pfract = 0.0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (!pixm || pixGetDepth(pixm) != 1) - return ERROR_INT("pixm not defined or not 1 bpp", procName, 1); - - if (!tab) - tab8 = makePixelSumTab8(); - else - tab8 = tab; - x = y = 0; - if (box) - boxGetGeometry(box, &x, &y, NULL, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - - pix1 = pixCopy(NULL, pixs); - pixRasterop(pix1, 0, 0, w, h, PIX_MASK, pixm, x, y); - pixCountPixels(pixs, &sum, tab8); - if (sum == 0) { - pixDestroy(&pix1); - if (!tab) LEPT_FREE(tab8); - return 0; - } - pixCountPixels(pix1, &masksum, tab8); - *pfract = (l_float32)masksum / (l_float32)sum; - - if (!tab) LEPT_FREE(tab8); - pixDestroy(&pix1); - return 0; -} - - -/*! - * \brief pixaFindWidthHeightRatio() - * - * \param[in] pixa of 1 bpp pix - * \return na of width/height ratios for each pix, or NULL on error - * - *
- * Notes:
- *      (1) This is typically used for a pixa consisting of
- *          1 bpp connected components.
- * 
- */ -NUMA * -pixaFindWidthHeightRatio(PIXA *pixa) -{ -l_int32 i, n, w, h; -NUMA *na; -PIX *pixt; - - PROCNAME("pixaFindWidthHeightRatio"); - - if (!pixa) - return (NUMA *)ERROR_PTR("pixa not defined", procName, NULL); - - n = pixaGetCount(pixa); - na = numaCreate(n); - for (i = 0; i < n; i++) { - pixt = pixaGetPix(pixa, i, L_CLONE); - pixGetDimensions(pixt, &w, &h, NULL); - numaAddNumber(na, (l_float32)w / (l_float32)h); - pixDestroy(&pixt); - } - return na; -} - - -/*! - * \brief pixaFindWidthHeightProduct() - * - * \param[in] pixa of 1 bpp pix - * \return na of width*height products for each pix, or NULL on error - * - *
- * Notes:
- *      (1) This is typically used for a pixa consisting of
- *          1 bpp connected components.
- * 
- */ -NUMA * -pixaFindWidthHeightProduct(PIXA *pixa) -{ -l_int32 i, n, w, h; -NUMA *na; -PIX *pixt; - - PROCNAME("pixaFindWidthHeightProduct"); - - if (!pixa) - return (NUMA *)ERROR_PTR("pixa not defined", procName, NULL); - - n = pixaGetCount(pixa); - na = numaCreate(n); - for (i = 0; i < n; i++) { - pixt = pixaGetPix(pixa, i, L_CLONE); - pixGetDimensions(pixt, &w, &h, NULL); - numaAddNumber(na, w * h); - pixDestroy(&pixt); - } - return na; -} - - -/*! - * \brief pixFindOverlapFraction() - * - * \param[in] pixs1, pixs2 1 bpp - * \param[in] x2, y2 location in pixs1 of UL corner of pixs2 - * \param[in] tab [optional] pixel sum table, can be null - * \param[out] pratio ratio fg intersection to fg union - * \param[out] pnoverlap [optional] number of overlapping pixels - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The UL corner of pixs2 is placed at (x2, y2) in pixs1.
- *      (2) This measure is similar to the correlation.
- * 
- */ -l_ok -pixFindOverlapFraction(PIX *pixs1, - PIX *pixs2, - l_int32 x2, - l_int32 y2, - l_int32 *tab, - l_float32 *pratio, - l_int32 *pnoverlap) -{ -l_int32 *tab8; -l_int32 w, h, nintersect, nunion; -PIX *pixt; - - PROCNAME("pixFindOverlapFraction"); - - if (pnoverlap) *pnoverlap = 0; - if (!pratio) - return ERROR_INT("&ratio not defined", procName, 1); - *pratio = 0.0; - if (!pixs1 || pixGetDepth(pixs1) != 1) - return ERROR_INT("pixs1 not defined or not 1 bpp", procName, 1); - if (!pixs2 || pixGetDepth(pixs2) != 1) - return ERROR_INT("pixs2 not defined or not 1 bpp", procName, 1); - - if (!tab) - tab8 = makePixelSumTab8(); - else - tab8 = tab; - - pixGetDimensions(pixs2, &w, &h, NULL); - pixt = pixCopy(NULL, pixs1); - pixRasterop(pixt, x2, y2, w, h, PIX_MASK, pixs2, 0, 0); /* AND */ - pixCountPixels(pixt, &nintersect, tab8); - if (pnoverlap) - *pnoverlap = nintersect; - pixCopy(pixt, pixs1); - pixRasterop(pixt, x2, y2, w, h, PIX_PAINT, pixs2, 0, 0); /* OR */ - pixCountPixels(pixt, &nunion, tab8); - if (!tab) LEPT_FREE(tab8); - pixDestroy(&pixt); - - if (nunion > 0) - *pratio = (l_float32)nintersect / (l_float32)nunion; - return 0; -} - - -/*! - * \brief pixFindRectangleComps() - * - * \param[in] pixs 1 bpp - * \param[in] dist max distance allowed between bounding box - * and nearest foreground pixel within it - * \param[in] minw, minh minimum size in each direction as a requirement - * for a conforming rectangle - * \return boxa of components that conform, or NULL on error - * - *
- * Notes:
- *      (1) This applies the function pixConformsToRectangle() to
- *          each 8-c.c. in pixs, and returns a boxa containing the
- *          regions of all components that are conforming.
- *      (2) Conforming components must satisfy both the size constraint
- *          given by %minsize and the slop in conforming to a rectangle
- *          determined by %dist.
- * 
- */ -BOXA * -pixFindRectangleComps(PIX *pixs, - l_int32 dist, - l_int32 minw, - l_int32 minh) -{ -l_int32 w, h, i, n, conforms; -BOX *box; -BOXA *boxa, *boxad; -PIX *pix; -PIXA *pixa; - - PROCNAME("pixFindRectangleComps"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (BOXA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (dist < 0) - return (BOXA *)ERROR_PTR("dist must be >= 0", procName, NULL); - if (minw <= 2 * dist && minh <= 2 * dist) - return (BOXA *)ERROR_PTR("invalid parameters", procName, NULL); - - boxa = pixConnComp(pixs, &pixa, 8); - boxad = boxaCreate(0); - n = pixaGetCount(pixa); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - pixGetDimensions(pix, &w, &h, NULL); - if (w < minw || h < minh) { - pixDestroy(&pix); - continue; - } - pixConformsToRectangle(pix, NULL, dist, &conforms); - if (conforms) { - box = boxaGetBox(boxa, i, L_COPY); - boxaAddBox(boxad, box, L_INSERT); - } - pixDestroy(&pix); - } - boxaDestroy(&boxa); - pixaDestroy(&pixa); - return boxad; -} - - -/*! - * \brief pixConformsToRectangle() - * - * \param[in] pixs 1 bpp - * \param[in] box [optional] if null, use the entire pixs - * \param[in] dist max distance allowed between bounding box and - * nearest foreground pixel within it - * \param[out] pconforms 0 (false) if not conforming; - * 1 (true) if conforming - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) There are several ways to test if a connected component has
- *          an essentially rectangular boundary, such as:
- *           a. Fraction of fill into the bounding box
- *           b. Max-min distance of fg pixel from periphery of bounding box
- *           c. Max depth of bg intrusions into component within bounding box
- *          The weakness of (a) is that it is highly sensitive to holes
- *          within the c.c.  The weakness of (b) is that it can have
- *          arbitrarily large intrusions into the c.c.  Method (c) tests
- *          the integrity of the outer boundary of the c.c., with respect
- *          to the enclosing bounding box, so we use it.
- *      (2) This tests if the connected component within the box conforms
- *          to the box at all points on the periphery within %dist.
- *          Inside, at a distance from the box boundary that is greater
- *          than %dist, we don't care about the pixels in the c.c.
- *      (3) We can think of the conforming condition as follows:
- *          No pixel inside a distance %dist from the boundary
- *          can connect to the boundary through a path through the bg.
- *          To implement this, we need to do a flood fill.  We can go
- *          either from inside toward the boundary, or the other direction.
- *          It's easiest to fill from the boundary, and then verify that
- *          there are no filled pixels farther than %dist from the boundary.
- * 
- */ -l_ok -pixConformsToRectangle(PIX *pixs, - BOX *box, - l_int32 dist, - l_int32 *pconforms) -{ -l_int32 w, h, empty; -PIX *pix1, *pix2; - - PROCNAME("pixConformsToRectangle"); - - if (!pconforms) - return ERROR_INT("&conforms not defined", procName, 1); - *pconforms = 0; - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (dist < 0) - return ERROR_INT("dist must be >= 0", procName, 1); - pixGetDimensions(pixs, &w, &h, NULL); - if (w <= 2 * dist || h <= 2 * dist) { - L_WARNING("automatic conformation: distance too large\n", procName); - *pconforms = 1; - return 0; - } - - /* Extract the region, if necessary */ - if (box) - pix1 = pixClipRectangle(pixs, box, NULL); - else - pix1 = pixCopy(NULL, pixs); - - /* Invert and fill from the boundary into the interior. - * Because we're considering the connected component in an - * 8-connected sense, we do the background filling as 4 c.c. */ - pixInvert(pix1, pix1); - pix2 = pixExtractBorderConnComps(pix1, 4); - - /* Mask out all pixels within a distance %dist from the box - * boundary. Any remaining pixels are from filling that goes - * more than %dist from the boundary. If no pixels remain, - * the component conforms to the bounding rectangle within - * a distance %dist. */ - pixSetOrClearBorder(pix2, dist, dist, dist, dist, PIX_CLR); - pixZero(pix2, &empty); - pixDestroy(&pix1); - pixDestroy(&pix2); - *pconforms = (empty) ? 1 : 0; - return 0; -} - - -/*-----------------------------------------------------------------------* - * Extract rectangular region * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixClipRectangles() - * - * \param[in] pixs - * \param[in] boxa requested clipping regions - * \return pixa consisting of requested regions, or NULL on error - * - *
- * Notes:
- *     (1) The returned pixa includes the actual regions clipped out from
- *         the input pixs.
- * 
- */ -PIXA * -pixClipRectangles(PIX *pixs, - BOXA *boxa) -{ -l_int32 i, n; -BOX *box, *boxc; -PIX *pix; -PIXA *pixa; - - PROCNAME("pixClipRectangles"); - - if (!pixs) - return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); - if (!boxa) - return (PIXA *)ERROR_PTR("boxa not defined", procName, NULL); - - n = boxaGetCount(boxa); - pixa = pixaCreate(n); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - pix = pixClipRectangle(pixs, box, &boxc); - pixaAddPix(pixa, pix, L_INSERT); - pixaAddBox(pixa, boxc, L_INSERT); - boxDestroy(&box); - } - - return pixa; -} - - -/*! - * \brief pixClipRectangle() - * - * \param[in] pixs - * \param[in] box requested clipping region; const - * \param[out] pboxc [optional] actual box of clipped region - * \return clipped pix, or NULL on error or if rectangle - * doesn't intersect pixs - * - *
- * Notes:
- *
- *  This should be simple, but there are choices to be made.
- *  The box is defined relative to the pix coordinates.  However,
- *  if the box is not contained within the pix, we have two choices:
- *
- *      (1) clip the box to the pix
- *      (2) make a new pix equal to the full box dimensions,
- *          but let rasterop do the clipping and positioning
- *          of the src with respect to the dest
- *
- *  Choice (2) immediately brings up the problem of what pixel values
- *  to use that were not taken from the src.  For example, on a grayscale
- *  image, do you want the pixels not taken from the src to be black
- *  or white or something else?  To implement choice 2, one needs to
- *  specify the color of these extra pixels.
- *
- *  So we adopt (1), and clip the box first, if necessary,
- *  before making the dest pix and doing the rasterop.  But there
- *  is another issue to consider.  If you want to paste the
- *  clipped pix back into pixs, it must be properly aligned, and
- *  it is necessary to use the clipped box for alignment.
- *  Accordingly, this function has a third (optional) argument, which is
- *  the input box clipped to the src pix.
- * 
- */ -PIX * -pixClipRectangle(PIX *pixs, - BOX *box, - BOX **pboxc) -{ -l_int32 w, h, d, bx, by, bw, bh; -BOX *boxc; -PIX *pixd; - - PROCNAME("pixClipRectangle"); - - if (pboxc) *pboxc = NULL; - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!box) - return (PIX *)ERROR_PTR("box not defined", procName, NULL); - - /* Clip the input box to the pix */ - pixGetDimensions(pixs, &w, &h, &d); - if ((boxc = boxClipToRectangle(box, w, h)) == NULL) { - L_WARNING("box doesn't overlap pix\n", procName); - return NULL; - } - boxGetGeometry(boxc, &bx, &by, &bw, &bh); - - /* Extract the block */ - if ((pixd = pixCreate(bw, bh, d)) == NULL) { - boxDestroy(&boxc); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixCopyResolution(pixd, pixs); - pixCopyColormap(pixd, pixs); - pixCopyText(pixd, pixs); - pixRasterop(pixd, 0, 0, bw, bh, PIX_SRC, pixs, bx, by); - - if (pboxc) - *pboxc = boxc; - else - boxDestroy(&boxc); - - return pixd; -} - - -/*! - * \brief pixClipMasked() - * - * \param[in] pixs 1, 2, 4, 8, 16, 32 bpp; colormap ok - * \param[in] pixm clipping mask, 1 bpp - * \param[in] x, y origin of clipping mask relative to pixs - * \param[in] outval val to use for pixels that are outside the mask - * \return pixd, clipped pix or NULL on error or if pixm doesn't - * intersect pixs - * - *
- * Notes:
- *      (1) If pixs has a colormap, it is preserved in pixd.
- *      (2) The depth of pixd is the same as that of pixs.
- *      (3) If the depth of pixs is 1, use %outval = 0 for white background
- *          and 1 for black; otherwise, use the max value for white
- *          and 0 for black.  If pixs has a colormap, the max value for
- *          %outval is 0xffffffff; otherwise, it is 2^d - 1.
- *      (4) When using 1 bpp pixs, this is a simple clip and
- *          blend operation.  For example, if both pix1 and pix2 are
- *          black text on white background, and you want to OR the
- *          fg on the two images, let pixm be the inverse of pix2.
- *          Then the operation takes all of pix1 that's in the bg of
- *          pix2, and for the remainder (which are the pixels
- *          corresponding to the fg of the pix2), paint them black
- *          (1) in pix1.  The function call looks like
- *             pixClipMasked(pix2, pixInvert(pix1, pix1), x, y, 1);
- * 
- */ -PIX * -pixClipMasked(PIX *pixs, - PIX *pixm, - l_int32 x, - l_int32 y, - l_uint32 outval) -{ -l_int32 wm, hm, index, rval, gval, bval; -l_uint32 pixel; -BOX *box; -PIX *pixmi, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixClipMasked"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!pixm || pixGetDepth(pixm) != 1) - return (PIX *)ERROR_PTR("pixm undefined or not 1 bpp", procName, NULL); - - /* Clip out the region specified by pixm and (x,y) */ - pixGetDimensions(pixm, &wm, &hm, NULL); - box = boxCreate(x, y, wm, hm); - pixd = pixClipRectangle(pixs, box, NULL); - - /* Paint 'outval' (or something close to it if cmapped) through - * the pixels not masked by pixm */ - cmap = pixGetColormap(pixd); - pixmi = pixInvert(NULL, pixm); - if (cmap) { - extractRGBValues(outval, &rval, &gval, &bval); - pixcmapGetNearestIndex(cmap, rval, gval, bval, &index); - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, &pixel); - pixPaintThroughMask(pixd, pixmi, 0, 0, pixel); - } else { - pixPaintThroughMask(pixd, pixmi, 0, 0, outval); - } - - boxDestroy(&box); - pixDestroy(&pixmi); - return pixd; -} - - -/*! - * \brief pixCropToMatch() - * - * \param[in] pixs1 any depth, colormap OK - * \param[in] pixs2 any depth, colormap OK - * \param[out] ppixd1 may be a clone - * \param[out] ppixd2 may be a clone - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This resizes pixs1 and/or pixs2 by cropping at the right
- *          and bottom, so that they're the same size.
- *      (2) If a pix doesn't need to be cropped, a clone is returned.
- *      (3) Note: the images are implicitly aligned to the UL corner.
- * 
- */ -l_ok -pixCropToMatch(PIX *pixs1, - PIX *pixs2, - PIX **ppixd1, - PIX **ppixd2) -{ -l_int32 w1, h1, w2, h2, w, h; - - PROCNAME("pixCropToMatch"); - - if (!ppixd1 || !ppixd2) - return ERROR_INT("&pixd1 and &pixd2 not both defined", procName, 1); - *ppixd1 = *ppixd2 = NULL; - if (!pixs1 || !pixs2) - return ERROR_INT("pixs1 and pixs2 not defined", procName, 1); - - pixGetDimensions(pixs1, &w1, &h1, NULL); - pixGetDimensions(pixs2, &w2, &h2, NULL); - w = L_MIN(w1, w2); - h = L_MIN(h1, h2); - - *ppixd1 = pixCropToSize(pixs1, w, h); - *ppixd2 = pixCropToSize(pixs2, w, h); - if (*ppixd1 == NULL || *ppixd2 == NULL) - return ERROR_INT("cropped image failure", procName, 1); - return 0; -} - - -/*! - * \brief pixCropToSize() - * - * \param[in] pixs any depth, colormap OK - * \param[in] w, h max dimensions of cropped image - * \return pixd cropped if necessary or NULL on error. - * - *
- * Notes:
- *      (1) If either w or h is smaller than the corresponding dimension
- *          of pixs, this returns a cropped image; otherwise it returns
- *          a clone of pixs.
- * 
- */ -PIX * -pixCropToSize(PIX *pixs, - l_int32 w, - l_int32 h) -{ -l_int32 ws, hs, wd, hd, d; -PIX *pixd; - - PROCNAME("pixCropToSize"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, &d); - if (ws <= w && hs <= h) /* no cropping necessary */ - return pixClone(pixs); - - wd = L_MIN(ws, w); - hd = L_MIN(hs, h); - if ((pixd = pixCreate(wd, hd, d)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyColormap(pixd, pixs); - pixCopyText(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - pixRasterop(pixd, 0, 0, wd, hd, PIX_SRC, pixs, 0, 0); - return pixd; -} - - -/*! - * \brief pixResizeToMatch() - * - * \param[in] pixs 1, 2, 4, 8, 16, 32 bpp; colormap ok - * \param[in] pixt can be null; we use only the size - * \param[in] w, h ignored if pixt is defined - * \return pixd resized to match or NULL on error - * - *
- * Notes:
- *      (1) This resizes pixs to make pixd, without scaling, by either
- *          cropping or extending separately in both width and height.
- *          Extension is done by replicating the last row or column.
- *          This is useful in a situation where, due to scaling
- *          operations, two images that are expected to be the
- *          same size can differ slightly in each dimension.
- *      (2) You can use either an existing pixt or specify
- *          both %w and %h.  If pixt is defined, the values
- *          in %w and %h are ignored.
- *      (3) If pixt is larger than pixs (or if w and/or d is larger
- *          than the dimension of pixs, replicate the outer row and
- *          column of pixels in pixs into pixd.
- * 
- */ -PIX * -pixResizeToMatch(PIX *pixs, - PIX *pixt, - l_int32 w, - l_int32 h) -{ -l_int32 i, j, ws, hs, d; -PIX *pixd; - - PROCNAME("pixResizeToMatch"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!pixt && (w <= 0 || h <= 0)) - return (PIX *)ERROR_PTR("both w and h not > 0", procName, NULL); - - if (pixt) /* redefine w, h */ - pixGetDimensions(pixt, &w, &h, NULL); - pixGetDimensions(pixs, &ws, &hs, &d); - if (ws == w && hs == h) - return pixCopy(NULL, pixs); - - if ((pixd = pixCreate(w, h, d)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyColormap(pixd, pixs); - pixCopyText(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - pixRasterop(pixd, 0, 0, ws, hs, PIX_SRC, pixs, 0, 0); - if (ws >= w && hs >= h) - return pixd; - - /* Replicate the last column and then the last row */ - if (ws < w) { - for (j = ws; j < w; j++) - pixRasterop(pixd, j, 0, 1, h, PIX_SRC, pixd, ws - 1, 0); - } - if (hs < h) { - for (i = hs; i < h; i++) - pixRasterop(pixd, 0, i, w, 1, PIX_SRC, pixd, 0, hs - 1); - } - - return pixd; -} - - -/*---------------------------------------------------------------------* - * Select a connected component by size * - *---------------------------------------------------------------------*/ -/*! - * \brief pixSelectComponentBySize() - * - * \param[in] pixs 1 bpp - * \param[in] rankorder in decreasing size: 0 for largest. - * \param[in] type L_SELECT_BY_WIDTH, L_SELECT_BY_HEIGHT, - * L_SELECT_BY_MAX_DIMENSION, - * L_SELECT_BY_AREA, L_SELECT_BY_PERIMETER - * \param[in] connectivity 4 or 8 - * \param[out] pbox [optional] location of returned component - * \return pix of rank order connected component, or NULL on error. - * - *
- * Notes:
- *      (1) This selects the Nth largest connected component, based on
- *          the selection type and connectivity.
- *      (2) Note that %rankorder is an integer.  Use %rankorder = 0 for
- *          the largest component and %rankorder = -1 for the smallest.
- *          If %rankorder >= number of components, select the smallest.
- */
-PIX *
-pixSelectComponentBySize(PIX     *pixs,
-                         l_int32  rankorder,
-                         l_int32  type,
-                         l_int32  connectivity,
-                         BOX    **pbox)
-{
-l_int32  n, empty, sorttype, index;
-BOXA    *boxa1;
-NUMA    *naindex;
-PIX     *pixd;
-PIXA    *pixa1, *pixa2;
-
-    PROCNAME("pixSelectComponentBySize");
-
-    if (pbox) *pbox = NULL;
-    if (!pixs || pixGetDepth(pixs) != 1)
-        return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL);
-    if (type == L_SELECT_BY_WIDTH)
-        sorttype = L_SORT_BY_WIDTH;
-    else if (type == L_SELECT_BY_HEIGHT)
-        sorttype = L_SORT_BY_HEIGHT;
-    else if (type == L_SELECT_BY_MAX_DIMENSION)
-        sorttype = L_SORT_BY_MAX_DIMENSION;
-    else if (type == L_SELECT_BY_AREA)
-        sorttype = L_SORT_BY_AREA;
-    else if (type == L_SELECT_BY_PERIMETER)
-        sorttype = L_SORT_BY_PERIMETER;
-    else
-        return (PIX *)ERROR_PTR("invalid selection type", procName, NULL);
-    if (connectivity != 4 && connectivity != 8)
-        return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL);
-    pixZero(pixs, &empty);
-    if (empty)
-        return (PIX *)ERROR_PTR("no foreground pixels", procName, NULL);
-
-    boxa1 = pixConnComp(pixs, &pixa1, connectivity);
-    n = boxaGetCount(boxa1);
-    if (rankorder < 0 || rankorder >= n)
-        rankorder = n - 1;  /* smallest */
-    pixa2 = pixaSort(pixa1, sorttype, L_SORT_DECREASING, &naindex, L_CLONE);
-    pixd = pixaGetPix(pixa2, rankorder, L_COPY);
-    if (pbox) {
-        numaGetIValue(naindex, rankorder, &index);
-        *pbox = boxaGetBox(boxa1, index, L_COPY);
-    }
-
-    numaDestroy(&naindex);
-    boxaDestroy(&boxa1);
-    pixaDestroy(&pixa1);
-    pixaDestroy(&pixa2);
-    return pixd;
-}
-
-
-/*!
- * \brief   pixFilterComponentBySize()
- *
- * \param[in]    pixs          1 bpp
- * \param[in]    rankorder     in decreasing size: 0 for largest.
- * \param[in]    type          L_SELECT_BY_WIDTH, L_SELECT_BY_HEIGHT,
- *                             L_SELECT_BY_MAX_DIMENSION,
- *                             L_SELECT_BY_AREA, L_SELECT_BY_PERIMETER
- * \param[in]    connectivity  4 or 8
- * \param[out]   pbox          [optional] location of returned component
- * \return  pix with all other components removed, or NULL on error.
- *
- * 
- * Notes:
- *      (1) See notes in pixSelectComponentBySize().
- *      (2) This returns a copy of %pixs, with all components removed
- *          except for the selected one.
- */
-PIX *
-pixFilterComponentBySize(PIX     *pixs,
-                         l_int32  rankorder,
-                         l_int32  type,
-                         l_int32  connectivity,
-                         BOX    **pbox)
-{
-l_int32  x, y, w, h;
-BOX     *box;
-PIX     *pix1, *pix2;
-
-    PROCNAME("pixFilterComponentBySize");
-
-    if (!pixs || pixGetDepth(pixs) != 1)
-        return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL);
-
-    pix1 = pixSelectComponentBySize(pixs, rankorder, type, connectivity, &box);
-    if (!pix1) {
-        boxDestroy(&box);
-        return (PIX *)ERROR_PTR("pix1 not made", procName, NULL);
-    }
-
-        /* Put the selected component in a new pix at the same
-         * location as it had in %pixs */
-    boxGetGeometry(box, &x, &y, &w, &h);
-    pix2 = pixCreateTemplate(pixs);
-    pixRasterop(pix2, x, y, w, h, PIX_SRC, pix1, 0, 0);
-    if (pbox)
-        *pbox = box;
-    else
-        boxDestroy(&box);
-    pixDestroy(&pix1);
-    return pix2;
-}
-
-
-/*---------------------------------------------------------------------*
- *                         Make special masks                          *
- *---------------------------------------------------------------------*/
-/*!
- * \brief   pixMakeSymmetricMask()
- *
- * \param[in]    w, h    dimensions of output 1 bpp pix
- * \param[in]    hf      horizontal fraction of half-width
- * \param[in]    vf      vertical fraction of half-height
- * \param[in]    type    L_USE_INNER, L_USE_OUTER
- * \return  pixd 1 bpp, or NULL on error.
- *
- * 
- * Notes:
- *      (1) This is a convenience function for generating masks with
- *          horizontal and vertical reflection symmetry, over either
- *          the inner or outer parts of an image.
- *      (2) Using L_USE_INNER to generate a mask over the inner part
- *          of the image, the mask is a solid rectangle, and the fractions
- *          describe the distance between the boundary of the image and
- *          the rectangle boundary.  For example, with hf == vf == 0.0,
- *          the mask covers the full image.
- *      (3) Using L_USE_OUTER to generate a mask over an outer frame
- *          of the image, the mask touches the boundary of the image,
- *          and the fractions describe the location of the inner
- *          boundary of the frame.  For example, with hf == vf == 1.0,
- *          the inner boundary is at the center of the image, so the
- *          mask covers the full image.
- *      (4) More examples:
- *           * mask covering the inner 70%: hf = vf = 0.3, type = L_USE_INNER
- *           * frame covering the outer 30%: hf = vf = 0.3, type = L_USE_OUTER
- * 
- */ -PIX * -pixMakeSymmetricMask(l_int32 w, - l_int32 h, - l_float32 hf, - l_float32 vf, - l_int32 type) -{ - PROCNAME("pixMakeSymmetricMask"); - - if (w <= 0 || h <= 0) - return (PIX *)ERROR_PTR("mask size 0", procName, NULL); - if (hf < 0.0 || hf > 1.0) - return (PIX *)ERROR_PTR("invalid horiz fractions", procName, NULL); - if (vf < 0.0 || vf > 1.0) - return (PIX *)ERROR_PTR("invalid vert fractions", procName, NULL); - - if (type == L_USE_INNER) - return pixMakeFrameMask(w, h, hf, 1.0, vf, 1.0); - else if (type == L_USE_OUTER) - return pixMakeFrameMask(w, h, 0.0, hf, 0.0, vf); - else - return (PIX *)ERROR_PTR("invalid type", procName, NULL); -} - - -/*! - * \brief pixMakeFrameMask() - * - * \param[in] w, h dimensions of output 1 bpp pix - * \param[in] hf1 horizontal fraction of half-width at outer frame bdry - * \param[in] hf2 horizontal fraction of half-width at inner frame bdry - * \param[in] vf1 vertical fraction of half-width at outer frame bdry - * \param[in] vf2 vertical fraction of half-width at inner frame bdry - * \return pixd 1 bpp, or NULL on error. - * - *
- * Notes:
- *      (1) This makes an arbitrary 1-component mask with a centered fg
- *          frame, which can have both an inner and an outer boundary.
- *          All input fractional distances are measured from the image
- *          border to the frame boundary, in units of the image half-width
- *          for hf1 and hf2 and the image half-height for vf1 and vf2.
- *          The distances to the outer frame boundary are given by hf1
- *          and vf1; to the inner frame boundary, by hf2 and vf2.
- *          Input fractions are thus in [0.0 ... 1.0], with hf1 <= hf2
- *          and vf1 <= vf2.  Horizontal and vertical frame widths are
- *          thus independently specified.
- *      (2) Special cases:
- *           * full fg mask: hf1 = vf1 = 0.0, hf2 = vf2 = 1.0.
- *           * empty fg (zero width) mask: set  hf1 = hf2  and vf1 = vf2.
- *           * fg rectangle with no hole: set hf2 = vf2 = 1.0.
- *           * frame touching outer boundary: set hf1 = vf1 = 0.0.
- *      (3) The vertical thickness of the horizontal mask parts
- *          is 0.5 * (vf2 - vf1) * h.  The horizontal thickness of the
- *          vertical mask parts is 0.5 * (hf2 - hf1) * w.
- * 
- */ -PIX * -pixMakeFrameMask(l_int32 w, - l_int32 h, - l_float32 hf1, - l_float32 hf2, - l_float32 vf1, - l_float32 vf2) -{ -l_int32 h1, h2, v1, v2; -PIX *pixd; - - PROCNAME("pixMakeFrameMask"); - - if (w <= 0 || h <= 0) - return (PIX *)ERROR_PTR("mask size 0", procName, NULL); - if (hf1 < 0.0 || hf1 > 1.0 || hf2 < 0.0 || hf2 > 1.0) - return (PIX *)ERROR_PTR("invalid horiz fractions", procName, NULL); - if (vf1 < 0.0 || vf1 > 1.0 || vf2 < 0.0 || vf2 > 1.0) - return (PIX *)ERROR_PTR("invalid vert fractions", procName, NULL); - if (hf1 > hf2 || vf1 > vf2) - return (PIX *)ERROR_PTR("invalid relative sizes", procName, NULL); - - pixd = pixCreate(w, h, 1); - - /* Special cases */ - if (hf1 == 0.0 && vf1 == 0.0 && hf2 == 1.0 && vf2 == 1.0) { /* full */ - pixSetAll(pixd); - return pixd; - } - if (hf1 == hf2 && vf1 == vf2) { /* empty */ - return pixd; - } - - /* General case */ - h1 = 0.5 * hf1 * w; - h2 = 0.5 * hf2 * w; - v1 = 0.5 * vf1 * h; - v2 = 0.5 * vf2 * h; - pixRasterop(pixd, h1, v1, w - 2 * h1, h - 2 * v1, PIX_SET, NULL, 0, 0); - if (hf2 < 1.0 && vf2 < 1.0) - pixRasterop(pixd, h2, v2, w - 2 * h2, h - 2 * v2, PIX_CLR, NULL, 0, 0); - return pixd; -} - - -/*---------------------------------------------------------------------* - * Generate a covering of rectangles over connected components * - *---------------------------------------------------------------------*/ -/*! - * \brief pixMakeCoveringOfRectangles() - * - * \param[in] pixs 1 bpp - * \param[in] maxiters max iterations: use 0 to iterate to completion - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This iteratively finds the bounding boxes of the connected
- *          components and generates a mask from them.  Two iterations
- *          should suffice for most situations.
- *      (2) Returns an empty pix if %pixs is empty.
- *      (3) If there are many small components in proximity, it may
- *          be useful to merge them with a morphological closing before
- *          calling this one.
- * 
- */ -PIX * -pixMakeCoveringOfRectangles(PIX *pixs, - l_int32 maxiters) -{ -l_int32 empty, same, niters; -BOXA *boxa; -PIX *pix1, *pix2; - - PROCNAME("pixMakeCoveringOfRectangles"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (maxiters < 0) - return (PIX *)ERROR_PTR("maxiters must be >= 0", procName, NULL); - if (maxiters == 0) maxiters = 50; /* ridiculously large number */ - - pixZero(pixs, &empty); - pix1 = pixCreateTemplate(pixs); - if (empty) return pix1; - - /* Do first iteration */ - boxa = pixConnCompBB(pixs, 8); - pixMaskBoxa(pix1, pix1, boxa, L_SET_PIXELS); - boxaDestroy(&boxa); - if (maxiters == 1) return pix1; - - niters = 1; - while (niters < maxiters) { /* continue to add pixels to pix1 */ - niters++; - boxa = pixConnCompBB(pix1, 8); - pix2 = pixCopy(NULL, pix1); - pixMaskBoxa(pix1, pix1, boxa, L_SET_PIXELS); - boxaDestroy(&boxa); - pixEqual(pix1, pix2, &same); - pixDestroy(&pix2); - if (same) { - L_INFO("%d iterations\n", procName, niters - 1); - return pix1; - } - } - L_INFO("maxiters = %d reached\n", procName, niters); - return pix1; -} - - -/*---------------------------------------------------------------------* - * Fraction of Fg pixels under a mask * - *---------------------------------------------------------------------*/ -/*! - * \brief pixFractionFgInMask() - * - * \param[in] pix1 1 bpp - * \param[in] pix2 1 bpp - * \param[out] pfract fraction of fg pixels in 1 that are - * aligned with the fg of 2 - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) This gives the fraction of fg pixels in pix1 that are in
- *          the intersection (i.e., under the fg) of pix2:
- *          |1 & 2|/|1|, where |...| means the number of fg pixels.
- *          Note that this is different from the situation where
- *          pix1 and pix2 are reversed.
- *      (2) Both pix1 and pix2 are registered to the UL corners.  A warning
- *          is issued if pix1 and pix2 have different sizes.
- *      (3) This can also be used to find the fraction of fg pixels in pix1
- *          that are NOT under the fg of pix2: 1.0 - |1 & 2|/|1|
- *      (4) If pix1 or pix2 are empty, this returns %fract = 0.0.
- *      (5) For example, pix2 could be a frame around the outside of the
- *          image, made from pixMakeFrameMask().
- * 
- */ -l_ok -pixFractionFgInMask(PIX *pix1, - PIX *pix2, - l_float32 *pfract) -{ -l_int32 w1, h1, w2, h2, empty, count1, count3; -PIX *pix3; - - PROCNAME("pixFractionFgInMask"); - - if (!pfract) - return ERROR_INT("&fract not defined", procName, 1); - *pfract = 0.0; - if (!pix1 || pixGetDepth(pix1) != 1) - return ERROR_INT("pix1 not defined or not 1 bpp", procName, 1); - if (!pix2 || pixGetDepth(pix2) != 1) - return ERROR_INT("pix2 not defined or not 1 bpp", procName, 1); - - pixGetDimensions(pix1, &w1, &h1, NULL); - pixGetDimensions(pix2, &w2, &h2, NULL); - if (w1 != w2 || h1 != h2) { - L_INFO("sizes unequal: (w1,w2) = (%d,%d), (h1,h2) = (%d,%d)\n", - procName, w1, w2, h1, h2); - } - pixZero(pix1, &empty); - if (empty) return 0; - pixZero(pix2, &empty); - if (empty) return 0; - - pix3 = pixCopy(NULL, pix1); - pixAnd(pix3, pix3, pix2); - pixCountPixels(pix1, &count1, NULL); /* |1| */ - pixCountPixels(pix3, &count3, NULL); /* |1 & 2| */ - *pfract = (l_float32)count3 / (l_float32)count1; - pixDestroy(&pix3); - return 0; -} - - -/*---------------------------------------------------------------------* - * Clip to Foreground * - *---------------------------------------------------------------------*/ -/*! - * \brief pixClipToForeground() - * - * \param[in] pixs 1 bpp - * \param[out] ppixd [optional] clipped pix returned - * \param[out] pbox [optional] bounding box - * \return 0 if OK; 1 on error or if there are no fg pixels - * - *
- * Notes:
- *      (1) At least one of {&pixd, &box} must be specified.
- *      (2) If there are no fg pixels, the returned ptrs are null.
- * 
- */ -l_ok -pixClipToForeground(PIX *pixs, - PIX **ppixd, - BOX **pbox) -{ -l_int32 w, h, wpl, nfullwords, extra, i, j; -l_int32 minx, miny, maxx, maxy; -l_uint32 result, mask; -l_uint32 *data, *line; -BOX *box; - - PROCNAME("pixClipToForeground"); - - if (ppixd) *ppixd = NULL; - if (pbox) *pbox = NULL; - if (!ppixd && !pbox) - return ERROR_INT("no output requested", procName, 1); - if (!pixs || (pixGetDepth(pixs) != 1)) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - - pixGetDimensions(pixs, &w, &h, NULL); - nfullwords = w / 32; - extra = w & 31; - mask = ~rmask32[32 - extra]; - wpl = pixGetWpl(pixs); - data = pixGetData(pixs); - - result = 0; - for (i = 0, miny = 0; i < h; i++, miny++) { - line = data + i * wpl; - for (j = 0; j < nfullwords; j++) - result |= line[j]; - if (extra) - result |= (line[j] & mask); - if (result) - break; - } - if (miny == h) /* no ON pixels */ - return 1; - - result = 0; - for (i = h - 1, maxy = h - 1; i >= 0; i--, maxy--) { - line = data + i * wpl; - for (j = 0; j < nfullwords; j++) - result |= line[j]; - if (extra) - result |= (line[j] & mask); - if (result) - break; - } - - minx = 0; - for (j = 0, minx = 0; j < w; j++, minx++) { - for (i = 0; i < h; i++) { - line = data + i * wpl; - if (GET_DATA_BIT(line, j)) - goto minx_found; - } - } - -minx_found: - for (j = w - 1, maxx = w - 1; j >= 0; j--, maxx--) { - for (i = 0; i < h; i++) { - line = data + i * wpl; - if (GET_DATA_BIT(line, j)) - goto maxx_found; - } - } - -maxx_found: - box = boxCreate(minx, miny, maxx - minx + 1, maxy - miny + 1); - - if (ppixd) - *ppixd = pixClipRectangle(pixs, box, NULL); - if (pbox) - *pbox = box; - else - boxDestroy(&box); - - return 0; -} - - -/*! - * \brief pixTestClipToForeground() - * - * \param[in] pixs 1 bpp - * \param[out] pcanclip 1 if fg does not extend to all four edges - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This is a lightweight test to determine if a 1 bpp image
- *          can be further cropped without loss of fg pixels.
- *          If it cannot, canclip is set to 0.
- *      (2) It does not test for the existence of any fg pixels.
- *          If there are no fg pixels, it will return %canclip = 1.
- *          Check the output of the subsequent call to pixClipToForeground().
- * 
- */ -l_ok -pixTestClipToForeground(PIX *pixs, - l_int32 *pcanclip) -{ -l_int32 i, j, w, h, wpl, found; -l_uint32 *data, *line; - - PROCNAME("pixTestClipToForeground"); - - if (!pcanclip) - return ERROR_INT("&canclip not defined", procName, 1); - *pcanclip = 0; - if (!pixs || (pixGetDepth(pixs) != 1)) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - - /* Check top and bottom raster lines */ - pixGetDimensions(pixs, &w, &h, NULL); - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - found = FALSE; - for (j = 0; found == FALSE && j < w; j++) - found = GET_DATA_BIT(data, j); - if (!found) { - *pcanclip = 1; - return 0; - } - - line = data + (h - 1) * wpl; - found = FALSE; - for (j = 0; found == FALSE && j < w; j++) - found = GET_DATA_BIT(data, j); - if (!found) { - *pcanclip = 1; - return 0; - } - - /* Check left and right edges */ - found = FALSE; - for (i = 0, line = data; found == FALSE && i < h; line += wpl, i++) - found = GET_DATA_BIT(line, 0); - if (!found) { - *pcanclip = 1; - return 0; - } - - found = FALSE; - for (i = 0, line = data; found == FALSE && i < h; line += wpl, i++) - found = GET_DATA_BIT(line, w - 1); - if (!found) - *pcanclip = 1; - - return 0; /* fg pixels found on all edges */ -} - - -/*! - * \brief pixClipBoxToForeground() - * - * \param[in] pixs 1 bpp - * \param[in] boxs [optional] use full image if null - * \param[out] ppixd [optional] clipped pix returned - * \param[out] pboxd [optional] bounding box - * \return 0 if OK; 1 on error or if there are no fg pixels - * - *
- * Notes:
- *      (1) At least one of {&pixd, &boxd} must be specified.
- *      (2) If there are no fg pixels, the returned ptrs are null.
- *      (3) Do not use &pixs for the 3rd arg or &boxs for the 4th arg;
- *          this will leak memory.
- * 
- */ -l_ok -pixClipBoxToForeground(PIX *pixs, - BOX *boxs, - PIX **ppixd, - BOX **pboxd) -{ -l_int32 w, h, bx, by, bw, bh, cbw, cbh, left, right, top, bottom; -BOX *boxt, *boxd; - - PROCNAME("pixClipBoxToForeground"); - - if (ppixd) *ppixd = NULL; - if (pboxd) *pboxd = NULL; - if (!ppixd && !pboxd) - return ERROR_INT("no output requested", procName, 1); - if (!pixs || (pixGetDepth(pixs) != 1)) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - - if (!boxs) - return pixClipToForeground(pixs, ppixd, pboxd); - - pixGetDimensions(pixs, &w, &h, NULL); - boxGetGeometry(boxs, &bx, &by, &bw, &bh); - cbw = L_MIN(bw, w - bx); - cbh = L_MIN(bh, h - by); - if (cbw < 0 || cbh < 0) - return ERROR_INT("box not within image", procName, 1); - boxt = boxCreate(bx, by, cbw, cbh); - - if (pixScanForForeground(pixs, boxt, L_FROM_LEFT, &left)) { - boxDestroy(&boxt); - return 1; - } - pixScanForForeground(pixs, boxt, L_FROM_RIGHT, &right); - pixScanForForeground(pixs, boxt, L_FROM_TOP, &top); - pixScanForForeground(pixs, boxt, L_FROM_BOT, &bottom); - - boxd = boxCreate(left, top, right - left + 1, bottom - top + 1); - if (ppixd) - *ppixd = pixClipRectangle(pixs, boxd, NULL); - if (pboxd) - *pboxd = boxd; - else - boxDestroy(&boxd); - - boxDestroy(&boxt); - return 0; -} - - -/*! - * \brief pixScanForForeground() - * - * \param[in] pixs 1 bpp - * \param[in] box [optional] within which the search is conducted - * \param[in] scanflag direction of scan; e.g., L_FROM_LEFT - * \param[out] ploc location in scan direction of first black pixel - * \return 0 if OK; 1 on error or if no fg pixels are found - * - *
- * Notes:
- *      (1) If there are no fg pixels, the position is set to 0.
- *          Caller must check the return value!
- *      (2) Use %box == NULL to scan from edge of pixs
- * 
- */ -l_ok -pixScanForForeground(PIX *pixs, - BOX *box, - l_int32 scanflag, - l_int32 *ploc) -{ -l_int32 bx, by, bw, bh, x, xstart, xend, y, ystart, yend, wpl; -l_uint32 *data, *line; -BOX *boxt; - - PROCNAME("pixScanForForeground"); - - if (!ploc) - return ERROR_INT("&loc not defined", procName, 1); - *ploc = 0; - if (!pixs || (pixGetDepth(pixs) != 1)) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - - /* Clip box to pixs if it exists */ - pixGetDimensions(pixs, &bw, &bh, NULL); - if (box) { - if ((boxt = boxClipToRectangle(box, bw, bh)) == NULL) - return ERROR_INT("invalid box", procName, 1); - boxGetGeometry(boxt, &bx, &by, &bw, &bh); - boxDestroy(&boxt); - } else { - bx = by = 0; - } - xstart = bx; - ystart = by; - xend = bx + bw - 1; - yend = by + bh - 1; - - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - if (scanflag == L_FROM_LEFT) { - for (x = xstart; x <= xend; x++) { - for (y = ystart; y <= yend; y++) { - line = data + y * wpl; - if (GET_DATA_BIT(line, x)) { - *ploc = x; - return 0; - } - } - } - } else if (scanflag == L_FROM_RIGHT) { - for (x = xend; x >= xstart; x--) { - for (y = ystart; y <= yend; y++) { - line = data + y * wpl; - if (GET_DATA_BIT(line, x)) { - *ploc = x; - return 0; - } - } - } - } else if (scanflag == L_FROM_TOP) { - for (y = ystart; y <= yend; y++) { - line = data + y * wpl; - for (x = xstart; x <= xend; x++) { - if (GET_DATA_BIT(line, x)) { - *ploc = y; - return 0; - } - } - } - } else if (scanflag == L_FROM_BOT) { - for (y = yend; y >= ystart; y--) { - line = data + y * wpl; - for (x = xstart; x <= xend; x++) { - if (GET_DATA_BIT(line, x)) { - *ploc = y; - return 0; - } - } - } - } else { - return ERROR_INT("invalid scanflag", procName, 1); - } - - return 1; /* no fg found */ -} - - -/*! - * \brief pixClipBoxToEdges() - * - * \param[in] pixs 1 bpp - * \param[in] boxs [optional] ; use full image if null - * \param[in] lowthresh threshold to choose clipping location - * \param[in] highthresh threshold required to find an edge - * \param[in] maxwidth max allowed width between low and high thresh locs - * \param[in] factor sampling factor along pixel counting direction - * \param[out] ppixd [optional] clipped pix returned - * \param[out] pboxd [optional] bounding box - * \return 0 if OK; 1 on error or if a fg edge is not found from - * all four sides. - * - *
- * Notes:
- *      (1) At least one of {&pixd, &boxd} must be specified.
- *      (2) If there are no fg pixels, the returned ptrs are null.
- *      (3) This function attempts to locate rectangular "image" regions
- *          of high-density fg pixels, that have well-defined edges
- *          on the four sides.
- *      (4) Edges are searched for on each side, iterating in order
- *          from left, right, top and bottom.  As each new edge is
- *          found, the search box is resized to use that location.
- *          Once an edge is found, it is held.  If no more edges
- *          are found in one iteration, the search fails.
- *      (5) See pixScanForEdge() for usage of the thresholds and %maxwidth.
- *      (6) The thresholds must be at least 1, and the low threshold
- *          cannot be larger than the high threshold.
- *      (7) If the low and high thresholds are both 1, this is equivalent
- *          to pixClipBoxToForeground().
- * 
- */ -l_ok -pixClipBoxToEdges(PIX *pixs, - BOX *boxs, - l_int32 lowthresh, - l_int32 highthresh, - l_int32 maxwidth, - l_int32 factor, - PIX **ppixd, - BOX **pboxd) -{ -l_int32 w, h, bx, by, bw, bh, cbw, cbh, left, right, top, bottom; -l_int32 lfound, rfound, tfound, bfound, change; -BOX *boxt, *boxd; - - PROCNAME("pixClipBoxToEdges"); - - if (ppixd) *ppixd = NULL; - if (pboxd) *pboxd = NULL; - if (!ppixd && !pboxd) - return ERROR_INT("no output requested", procName, 1); - if (!pixs || (pixGetDepth(pixs) != 1)) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (lowthresh < 1 || highthresh < 1 || - lowthresh > highthresh || maxwidth < 1) - return ERROR_INT("invalid thresholds", procName, 1); - factor = L_MIN(1, factor); - - if (lowthresh == 1 && highthresh == 1) - return pixClipBoxToForeground(pixs, boxs, ppixd, pboxd); - - pixGetDimensions(pixs, &w, &h, NULL); - if (boxs) { - boxGetGeometry(boxs, &bx, &by, &bw, &bh); - cbw = L_MIN(bw, w - bx); - cbh = L_MIN(bh, h - by); - if (cbw < 0 || cbh < 0) - return ERROR_INT("box not within image", procName, 1); - boxt = boxCreate(bx, by, cbw, cbh); - } else { - boxt = boxCreate(0, 0, w, h); - } - - lfound = rfound = tfound = bfound = 0; - while (!lfound || !rfound || !tfound || !bfound) { - change = 0; - if (!lfound) { - if (!pixScanForEdge(pixs, boxt, lowthresh, highthresh, maxwidth, - factor, L_FROM_LEFT, &left)) { - lfound = 1; - change = 1; - boxRelocateOneSide(boxt, boxt, left, L_FROM_LEFT); - } - } - if (!rfound) { - if (!pixScanForEdge(pixs, boxt, lowthresh, highthresh, maxwidth, - factor, L_FROM_RIGHT, &right)) { - rfound = 1; - change = 1; - boxRelocateOneSide(boxt, boxt, right, L_FROM_RIGHT); - } - } - if (!tfound) { - if (!pixScanForEdge(pixs, boxt, lowthresh, highthresh, maxwidth, - factor, L_FROM_TOP, &top)) { - tfound = 1; - change = 1; - boxRelocateOneSide(boxt, boxt, top, L_FROM_TOP); - } - } - if (!bfound) { - if (!pixScanForEdge(pixs, boxt, lowthresh, highthresh, maxwidth, - factor, L_FROM_BOT, &bottom)) { - bfound = 1; - change = 1; - boxRelocateOneSide(boxt, boxt, bottom, L_FROM_BOT); - } - } - -#if DEBUG_EDGES - lept_stderr("iter: %d %d %d %d\n", lfound, rfound, tfound, bfound); -#endif /* DEBUG_EDGES */ - - if (change == 0) break; - } - boxDestroy(&boxt); - - if (change == 0) - return ERROR_INT("not all edges found", procName, 1); - - boxd = boxCreate(left, top, right - left + 1, bottom - top + 1); - if (ppixd) - *ppixd = pixClipRectangle(pixs, boxd, NULL); - if (pboxd) - *pboxd = boxd; - else - boxDestroy(&boxd); - - return 0; -} - - -/*! - * \brief pixScanForEdge() - * - * \param[in] pixs 1 bpp - * \param[in] box [optional] within which the search is conducted - * \param[in] lowthresh threshold to choose clipping location - * \param[in] highthresh threshold required to find an edge - * \param[in] maxwidth max allowed width between low and high thresh locs - * \param[in] factor sampling factor along pixel counting direction - * \param[in] scanflag direction of scan; e.g., L_FROM_LEFT - * \param[out] ploc location in scan direction of first black pixel - * \return 0 if OK; 1 on error or if the edge is not found - * - *
- * Notes:
- *      (1) If there are no fg pixels, the position is set to 0.
- *          Caller must check the return value!
- *      (2) Use %box == NULL to scan from edge of pixs
- *      (3) As the scan progresses, the location where the sum of
- *          pixels equals or excees %lowthresh is noted (loc).  The
- *          scan is stopped when the sum of pixels equals or exceeds
- *          %highthresh.  If the scan distance between loc and that
- *          point does not exceed %maxwidth, an edge is found and
- *          its position is taken to be loc.  %maxwidth implicitly
- *          sets a minimum on the required gradient of the edge.
- *      (4) The thresholds must be at least 1, and the low threshold
- *          cannot be larger than the high threshold.
- * 
- */ -l_ok -pixScanForEdge(PIX *pixs, - BOX *box, - l_int32 lowthresh, - l_int32 highthresh, - l_int32 maxwidth, - l_int32 factor, - l_int32 scanflag, - l_int32 *ploc) -{ -l_int32 bx, by, bw, bh, foundmin, loc, sum, wpl; -l_int32 x, xstart, xend, y, ystart, yend; -l_uint32 *data, *line; -BOX *boxt; - - PROCNAME("pixScanForEdge"); - - if (!ploc) - return ERROR_INT("&ploc not defined", procName, 1); - *ploc = 0; - if (!pixs || (pixGetDepth(pixs) != 1)) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (lowthresh < 1 || highthresh < 1 || - lowthresh > highthresh || maxwidth < 1) - return ERROR_INT("invalid thresholds", procName, 1); - factor = L_MIN(1, factor); - - /* Clip box to pixs if it exists */ - pixGetDimensions(pixs, &bw, &bh, NULL); - if (box) { - if ((boxt = boxClipToRectangle(box, bw, bh)) == NULL) - return ERROR_INT("invalid box", procName, 1); - boxGetGeometry(boxt, &bx, &by, &bw, &bh); - boxDestroy(&boxt); - } else { - bx = by = 0; - } - xstart = bx; - ystart = by; - xend = bx + bw - 1; - yend = by + bh - 1; - - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - foundmin = 0; - if (scanflag == L_FROM_LEFT) { - for (x = xstart; x <= xend; x++) { - sum = 0; - for (y = ystart; y <= yend; y += factor) { - line = data + y * wpl; - if (GET_DATA_BIT(line, x)) - sum++; - } - if (!foundmin && sum < lowthresh) - continue; - if (!foundmin) { /* save the loc of the beginning of the edge */ - foundmin = 1; - loc = x; - } - if (sum >= highthresh) { -#if DEBUG_EDGES - lept_stderr("Left: x = %d, loc = %d\n", x, loc); -#endif /* DEBUG_EDGES */ - if (x - loc < maxwidth) { - *ploc = loc; - return 0; - } else { - return 1; - } - } - } - } else if (scanflag == L_FROM_RIGHT) { - for (x = xend; x >= xstart; x--) { - sum = 0; - for (y = ystart; y <= yend; y += factor) { - line = data + y * wpl; - if (GET_DATA_BIT(line, x)) - sum++; - } - if (!foundmin && sum < lowthresh) - continue; - if (!foundmin) { - foundmin = 1; - loc = x; - } - if (sum >= highthresh) { -#if DEBUG_EDGES - lept_stderr("Right: x = %d, loc = %d\n", x, loc); -#endif /* DEBUG_EDGES */ - if (loc - x < maxwidth) { - *ploc = loc; - return 0; - } else { - return 1; - } - } - } - } else if (scanflag == L_FROM_TOP) { - for (y = ystart; y <= yend; y++) { - sum = 0; - line = data + y * wpl; - for (x = xstart; x <= xend; x += factor) { - if (GET_DATA_BIT(line, x)) - sum++; - } - if (!foundmin && sum < lowthresh) - continue; - if (!foundmin) { - foundmin = 1; - loc = y; - } - if (sum >= highthresh) { -#if DEBUG_EDGES - lept_stderr("Top: y = %d, loc = %d\n", y, loc); -#endif /* DEBUG_EDGES */ - if (y - loc < maxwidth) { - *ploc = loc; - return 0; - } else { - return 1; - } - } - } - } else if (scanflag == L_FROM_BOT) { - for (y = yend; y >= ystart; y--) { - sum = 0; - line = data + y * wpl; - for (x = xstart; x <= xend; x += factor) { - if (GET_DATA_BIT(line, x)) - sum++; - } - if (!foundmin && sum < lowthresh) - continue; - if (!foundmin) { - foundmin = 1; - loc = y; - } - if (sum >= highthresh) { -#if DEBUG_EDGES - lept_stderr("Bottom: y = %d, loc = %d\n", y, loc); -#endif /* DEBUG_EDGES */ - if (loc - y < maxwidth) { - *ploc = loc; - return 0; - } else { - return 1; - } - } - } - } else { - return ERROR_INT("invalid scanflag", procName, 1); - } - - return 1; /* edge not found */ -} - - -/*---------------------------------------------------------------------* - * Extract pixel averages and reversals along lines * - *---------------------------------------------------------------------*/ -/*! - * \brief pixExtractOnLine() - * - * \param[in] pixs 1 bpp or 8 bpp; no colormap - * \param[in] x1, y1 one end point for line - * \param[in] x2, y2 another end pt for line - * \param[in] factor sampling; >= 1 - * \return na of pixel values along line, or NULL on error. - * - *
- * Notes:
- *      (1) Input end points are clipped to the pix.
- *      (2) If the line is either horizontal, or closer to horizontal
- *          than to vertical, the points will be extracted from left
- *          to right in the pix.  Likewise, if the line is vertical,
- *          or closer to vertical than to horizontal, the points will
- *          be extracted from top to bottom.
- *      (3) Can be used with numaCountReverals(), for example, to
- *          characterize the intensity smoothness along a line.
- * 
- */ -NUMA * -pixExtractOnLine(PIX *pixs, - l_int32 x1, - l_int32 y1, - l_int32 x2, - l_int32 y2, - l_int32 factor) -{ -l_int32 i, w, h, d, xmin, ymin, xmax, ymax, npts, direction; -l_uint32 val; -l_float32 x, y; -l_float64 slope; -NUMA *na; -PTA *pta; - - PROCNAME("pixExtractOnLine"); - - if (!pixs) - return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 8) - return (NUMA *)ERROR_PTR("d not 1 or 8 bpp", procName, NULL); - if (pixGetColormap(pixs)) - return (NUMA *)ERROR_PTR("pixs has a colormap", procName, NULL); - if (factor < 1) { - L_WARNING("factor must be >= 1; setting to 1\n", procName); - factor = 1; - } - - /* Clip line to the image */ - x1 = L_MAX(0, L_MIN(x1, w - 1)); - x2 = L_MAX(0, L_MIN(x2, w - 1)); - y1 = L_MAX(0, L_MIN(y1, h - 1)); - y2 = L_MAX(0, L_MIN(y2, h - 1)); - - if (x1 == x2 && y1 == y2) { - pixGetPixel(pixs, x1, y1, &val); - na = numaCreate(1); - numaAddNumber(na, val); - return na; - } - - if (y1 == y2) - direction = L_HORIZONTAL_LINE; - else if (x1 == x2) - direction = L_VERTICAL_LINE; - else - direction = L_OBLIQUE_LINE; - - na = numaCreate(0); - if (direction == L_HORIZONTAL_LINE) { /* plot against x */ - xmin = L_MIN(x1, x2); - xmax = L_MAX(x1, x2); - numaSetParameters(na, xmin, factor); - for (i = xmin; i <= xmax; i += factor) { - pixGetPixel(pixs, i, y1, &val); - numaAddNumber(na, val); - } - } else if (direction == L_VERTICAL_LINE) { /* plot against y */ - ymin = L_MIN(y1, y2); - ymax = L_MAX(y1, y2); - numaSetParameters(na, ymin, factor); - for (i = ymin; i <= ymax; i += factor) { - pixGetPixel(pixs, x1, i, &val); - numaAddNumber(na, val); - } - } else { /* direction == L_OBLIQUE_LINE */ - slope = (l_float64)((y2 - y1) / (x2 - x1)); - if (L_ABS(slope) < 1.0) { /* quasi-horizontal */ - xmin = L_MIN(x1, x2); - xmax = L_MAX(x1, x2); - ymin = (xmin == x1) ? y1 : y2; /* pt that goes with xmin */ - ymax = (ymin == y1) ? y2 : y1; /* pt that goes with xmax */ - pta = generatePtaLine(xmin, ymin, xmax, ymax); - numaSetParameters(na, xmin, (l_float32)factor); - } else { /* quasi-vertical */ - ymin = L_MIN(y1, y2); - ymax = L_MAX(y1, y2); - xmin = (ymin == y1) ? x1 : x2; /* pt that goes with ymin */ - xmax = (xmin == x1) ? x2 : x1; /* pt that goes with ymax */ - pta = generatePtaLine(xmin, ymin, xmax, ymax); - numaSetParameters(na, ymin, (l_float32)factor); - } - npts = ptaGetCount(pta); - for (i = 0; i < npts; i += factor) { - ptaGetPt(pta, i, &x, &y); - pixGetPixel(pixs, (l_int32)x, (l_int32)y, &val); - numaAddNumber(na, val); - } - -#if 0 /* debugging */ - pixPlotAlongPta(pixs, pta, GPLOT_PNG, NULL); -#endif - - ptaDestroy(&pta); - } - - return na; -} - - -/*! - * \brief pixAverageOnLine() - * - * \param[in] pixs 1 bpp or 8 bpp; no colormap - * \param[in] x1, y1 starting pt for line - * \param[in] x2, y2 end pt for line - * \param[in] factor sampling; >= 1 - * \return average of pixel values along line, or NULL on error. - * - *
- * Notes:
- *      (1) The line must be either horizontal or vertical, so either
- *          y1 == y2 (horizontal) or x1 == x2 (vertical).
- *      (2) If horizontal, x1 must be <= x2.
- *          If vertical, y1 must be <= y2.
- *          characterize the intensity smoothness along a line.
- *      (3) Input end points are clipped to the pix.
- * 
- */ -l_float32 -pixAverageOnLine(PIX *pixs, - l_int32 x1, - l_int32 y1, - l_int32 x2, - l_int32 y2, - l_int32 factor) -{ -l_int32 i, j, w, h, d, direction, count, wpl; -l_uint32 *data, *line; -l_float32 sum; - - PROCNAME("pixAverageOnLine"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 8) - return ERROR_INT("d not 1 or 8 bpp", procName, 1); - if (pixGetColormap(pixs)) - return ERROR_INT("pixs has a colormap", procName, 1); - if (x1 > x2 || y1 > y2) - return ERROR_INT("x1 > x2 or y1 > y2", procName, 1); - - if (y1 == y2) { - x1 = L_MAX(0, x1); - x2 = L_MIN(w - 1, x2); - y1 = L_MAX(0, L_MIN(y1, h - 1)); - direction = L_HORIZONTAL_LINE; - } else if (x1 == x2) { - y1 = L_MAX(0, y1); - y2 = L_MIN(h - 1, y2); - x1 = L_MAX(0, L_MIN(x1, w - 1)); - direction = L_VERTICAL_LINE; - } else { - return ERROR_INT("line neither horiz nor vert", procName, 1); - } - - if (factor < 1) { - L_WARNING("factor must be >= 1; setting to 1\n", procName); - factor = 1; - } - - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - sum = 0; - count = 0; - if (direction == L_HORIZONTAL_LINE) { - line = data + y1 * wpl; - for (j = x1, count = 0; j <= x2; count++, j += factor) { - if (d == 1) - sum += GET_DATA_BIT(line, j); - else /* d == 8 */ - sum += GET_DATA_BYTE(line, j); - } - } else if (direction == L_VERTICAL_LINE) { - for (i = y1, count = 0; i <= y2; count++, i += factor) { - line = data + i * wpl; - if (d == 1) - sum += GET_DATA_BIT(line, x1); - else /* d == 8 */ - sum += GET_DATA_BYTE(line, x1); - } - } - - return sum / (l_float32)count; -} - - -/*! - * \brief pixAverageIntensityProfile() - * - * \param[in] pixs any depth; colormap OK - * \param[in] fract fraction of image width or height to be used - * \param[in] dir averaging direction: L_HORIZONTAL_LINE or - * L_VERTICAL_LINE - * \param[in] first, last span of rows or columns to measure - * \param[in] factor1 sampling along fast scan direction; >= 1 - * \param[in] factor2 sampling along slow scan direction; >= 1 - * \return na of reversal profile, or NULL on error. - * - *
- * Notes:
- *      (1) If d != 1 bpp, colormaps are removed and the result
- *          is converted to 8 bpp.
- *      (2) If %dir == L_HORIZONTAL_LINE, the intensity is averaged
- *          along each horizontal raster line (sampled by %factor1),
- *          and the profile is the array of these averages in the
- *          vertical direction between %first and %last raster lines,
- *          and sampled by %factor2.
- *      (3) If %dir == L_VERTICAL_LINE, the intensity is averaged
- *          along each vertical line (sampled by %factor1),
- *          and the profile is the array of these averages in the
- *          horizontal direction between %first and %last columns,
- *          and sampled by %factor2.
- *      (4) The averages are measured over the central %fract of the image.
- *          Use %fract == 1.0 to average across the entire width or height.
- * 
- */ -NUMA * -pixAverageIntensityProfile(PIX *pixs, - l_float32 fract, - l_int32 dir, - l_int32 first, - l_int32 last, - l_int32 factor1, - l_int32 factor2) -{ -l_int32 i, j, w, h, d, start, end; -l_float32 ave; -NUMA *nad; -PIX *pixr, *pixg; - - PROCNAME("pixAverageIntensityProfile"); - - if (!pixs) - return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); - if (fract < 0.0 || fract > 1.0) - return (NUMA *)ERROR_PTR("fract < 0.0 or > 1.0", procName, NULL); - if (dir != L_HORIZONTAL_LINE && dir != L_VERTICAL_LINE) - return (NUMA *)ERROR_PTR("invalid direction", procName, NULL); - if (first < 0) first = 0; - if (last < first) - return (NUMA *)ERROR_PTR("last must be >= first", procName, NULL); - if (factor1 < 1) { - L_WARNING("factor1 must be >= 1; setting to 1\n", procName); - factor1 = 1; - } - if (factor2 < 1) { - L_WARNING("factor2 must be >= 1; setting to 1\n", procName); - factor2 = 1; - } - - /* Use 1 or 8 bpp, without colormap */ - if (pixGetColormap(pixs)) - pixr = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - else - pixr = pixClone(pixs); - pixGetDimensions(pixr, &w, &h, &d); - if (d == 1) - pixg = pixClone(pixr); - else - pixg = pixConvertTo8(pixr, 0); - - nad = numaCreate(0); /* output: samples in slow scan direction */ - numaSetParameters(nad, 0, factor2); - if (dir == L_HORIZONTAL_LINE) { - start = (l_int32)(0.5 * (1.0 - fract) * (l_float32)w); - end = w - start; - if (last > h - 1) { - L_WARNING("last > h - 1; clipping\n", procName); - last = h - 1; - } - for (i = first; i <= last; i += factor2) { - ave = pixAverageOnLine(pixg, start, i, end, i, factor1); - numaAddNumber(nad, ave); - } - } else if (dir == L_VERTICAL_LINE) { - start = (l_int32)(0.5 * (1.0 - fract) * (l_float32)h); - end = h - start; - if (last > w - 1) { - L_WARNING("last > w - 1; clipping\n", procName); - last = w - 1; - } - for (j = first; j <= last; j += factor2) { - ave = pixAverageOnLine(pixg, j, start, j, end, factor1); - numaAddNumber(nad, ave); - } - } - - pixDestroy(&pixr); - pixDestroy(&pixg); - return nad; -} - - -/*! - * \brief pixReversalProfile() - * - * \param[in] pixs any depth; colormap OK - * \param[in] fract fraction of image width or height to be used - * \param[in] dir profile direction: L_HORIZONTAL_LINE or - * L_VERTICAL_LINE - * \param[in] first, last span of rows or columns to measure - * \param[in] minreversal minimum change in intensity to trigger a reversal - * \param[in] factor1 sampling along raster line (fast scan); >= 1 - * \param[in] factor2 sampling of raster lines (slow scan); >= 1 - * \return na of reversal profile, or NULL on error. - * - *
- * Notes:
- *      (1) If d != 1 bpp, colormaps are removed and the result
- *          is converted to 8 bpp.
- *      (2) If %dir == L_HORIZONTAL_LINE, the the reversals are counted
- *          along each horizontal raster line (sampled by %factor1),
- *          and the profile is the array of these sums in the
- *          vertical direction between %first and %last raster lines,
- *          and sampled by %factor2.
- *      (3) If %dir == L_VERTICAL_LINE, the the reversals are counted
- *          along each vertical column (sampled by %factor1),
- *          and the profile is the array of these sums in the
- *          horizontal direction between %first and %last columns,
- *          and sampled by %factor2.
- *      (4) For each row or column, the reversals are summed over the
- *          central %fract of the image.  Use %fract == 1.0 to sum
- *          across the entire width (of row) or height (of column).
- *      (5) %minreversal is the relative change in intensity that is
- *          required to resolve peaks and valleys.  A typical number for
- *          locating text in 8 bpp might be 50.  For 1 bpp, minreversal
- *          must be 1.
- *      (6) The reversal profile is simply the number of reversals
- *          in a row or column, vs the row or column index.
- * 
- */ -NUMA * -pixReversalProfile(PIX *pixs, - l_float32 fract, - l_int32 dir, - l_int32 first, - l_int32 last, - l_int32 minreversal, - l_int32 factor1, - l_int32 factor2) -{ -l_int32 i, j, w, h, d, start, end, nr; -NUMA *naline, *nad; -PIX *pixr, *pixg; - - PROCNAME("pixReversalProfile"); - - if (!pixs) - return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); - if (fract < 0.0 || fract > 1.0) - return (NUMA *)ERROR_PTR("fract < 0.0 or > 1.0", procName, NULL); - if (dir != L_HORIZONTAL_LINE && dir != L_VERTICAL_LINE) - return (NUMA *)ERROR_PTR("invalid direction", procName, NULL); - if (first < 0) first = 0; - if (last < first) - return (NUMA *)ERROR_PTR("last must be >= first", procName, NULL); - if (factor1 < 1) { - L_WARNING("factor1 must be >= 1; setting to 1\n", procName); - factor1 = 1; - } - if (factor2 < 1) { - L_WARNING("factor2 must be >= 1; setting to 1\n", procName); - factor2 = 1; - } - - /* Use 1 or 8 bpp, without colormap */ - if (pixGetColormap(pixs)) - pixr = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - else - pixr = pixClone(pixs); - pixGetDimensions(pixr, &w, &h, &d); - if (d == 1) { - pixg = pixClone(pixr); - minreversal = 1; /* enforce this */ - } else { - pixg = pixConvertTo8(pixr, 0); - } - - nad = numaCreate(0); /* output: samples in slow scan direction */ - numaSetParameters(nad, 0, factor2); - if (dir == L_HORIZONTAL_LINE) { - start = (l_int32)(0.5 * (1.0 - fract) * (l_float32)w); - end = w - start; - if (last > h - 1) { - L_WARNING("last > h - 1; clipping\n", procName); - last = h - 1; - } - for (i = first; i <= last; i += factor2) { - naline = pixExtractOnLine(pixg, start, i, end, i, factor1); - numaCountReversals(naline, minreversal, &nr, NULL); - numaAddNumber(nad, nr); - numaDestroy(&naline); - } - } else if (dir == L_VERTICAL_LINE) { - start = (l_int32)(0.5 * (1.0 - fract) * (l_float32)h); - end = h - start; - if (last > w - 1) { - L_WARNING("last > w - 1; clipping\n", procName); - last = w - 1; - } - for (j = first; j <= last; j += factor2) { - naline = pixExtractOnLine(pixg, j, start, j, end, factor1); - numaCountReversals(naline, minreversal, &nr, NULL); - numaAddNumber(nad, nr); - numaDestroy(&naline); - } - } - - pixDestroy(&pixr); - pixDestroy(&pixg); - return nad; -} - - -/*---------------------------------------------------------------------* - * Extract windowed variance along a line * - *---------------------------------------------------------------------*/ -/*! - * \brief pixWindowedVarianceOnLine() - * - * \param[in] pixs 8 bpp; no colormap - * \param[in] dir L_HORIZONTAL_LINE or L_VERTICAL_LINE - * \param[in] loc location of the constant coordinate for the line - * \param[in] c1, c2 end point coordinates for the line - * \param[in] size window size; must be > 1 - * \param[out] pnad windowed square root of variance - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The returned variance array traverses the line starting
- *          from the smallest coordinate, min(c1,c2).
- *      (2) Line end points are clipped to pixs.
- *      (3) The reference point for the variance calculation is the center of
- *          the window.  Therefore, the numa start parameter from
- *          pixExtractOnLine() is incremented by %size/2,
- *          to align the variance values with the pixel coordinate.
- *      (4) The square root of the variance is the RMS deviation from the mean.
- * 
- */ -l_ok -pixWindowedVarianceOnLine(PIX *pixs, - l_int32 dir, - l_int32 loc, - l_int32 c1, - l_int32 c2, - l_int32 size, - NUMA **pnad) -{ -l_int32 i, j, w, h, cmin, cmax, maxloc, n, x, y; -l_uint32 val; -l_float32 norm, rootvar; -l_float32 *array; -l_float64 sum1, sum2, ave, var; -NUMA *na1, *nad; -PTA *pta; - - PROCNAME("pixWindowedVarianceOnLine"); - - if (!pnad) - return ERROR_INT("&nad not defined", procName, 1); - *pnad = NULL; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8bpp", procName, 1); - if (size < 2) - return ERROR_INT("window size must be > 1", procName, 1); - if (dir != L_HORIZONTAL_LINE && dir != L_VERTICAL_LINE) - return ERROR_INT("invalid direction", procName, 1); - pixGetDimensions(pixs, &w, &h, NULL); - maxloc = (dir == L_HORIZONTAL_LINE) ? h - 1 : w - 1; - if (loc < 0 || loc > maxloc) - return ERROR_INT("invalid line position", procName, 1); - - /* Clip line to the image */ - cmin = L_MIN(c1, c2); - cmax = L_MAX(c1, c2); - maxloc = (dir == L_HORIZONTAL_LINE) ? w - 1 : h - 1; - cmin = L_MAX(0, L_MIN(cmin, maxloc)); - cmax = L_MAX(0, L_MIN(cmax, maxloc)); - n = cmax - cmin + 1; - - /* Generate pta along the line */ - pta = ptaCreate(n); - if (dir == L_HORIZONTAL_LINE) { - for (i = cmin; i <= cmax; i++) - ptaAddPt(pta, i, loc); - } else { /* vertical line */ - for (i = cmin; i <= cmax; i++) - ptaAddPt(pta, loc, i); - } - - /* Get numa of pixel values on the line */ - na1 = numaCreate(n); - numaSetParameters(na1, cmin, 1); - for (i = 0; i < n; i++) { - ptaGetIPt(pta, i, &x, &y); - pixGetPixel(pixs, x, y, &val); - numaAddNumber(na1, val); - } - array = numaGetFArray(na1, L_NOCOPY); - ptaDestroy(&pta); - - /* Compute root variance on overlapping windows */ - nad = numaCreate(n); - *pnad = nad; - numaSetParameters(nad, cmin + size / 2, 1); - norm = 1.0 / (l_float32)size; - for (i = 0; i < n - size; i++) { /* along the line */ - sum1 = sum2 = 0; - for (j = 0; j < size; j++) { /* over the window */ - val = array[i + j]; - sum1 += val; - sum2 += (l_float64)(val) * val; - } - ave = norm * sum1; - var = norm * sum2 - ave * ave; - rootvar = (l_float32)sqrt(var); - numaAddNumber(nad, rootvar); - } - - numaDestroy(&na1); - return 0; -} - - -/*---------------------------------------------------------------------* - * Extract min/max of pixel values near lines * - *---------------------------------------------------------------------*/ -/*! - * \brief pixMinMaxNearLine() - * - * \param[in] pixs 8 bpp; no colormap - * \param[in] x1, y1 starting pt for line - * \param[in] x2, y2 end pt for line - * \param[in] dist distance to search from line in each direction - * \param[in] direction L_SCAN_NEGATIVE, L_SCAN_POSITIVE, L_SCAN_BOTH - * \param[out] pnamin [optional] minimum values - * \param[out] pnamax [optional] maximum values - * \param[out] pminave [optional] average of minimum values - * \param[out] pmaxave [optional] average of maximum values - * \return 0 if OK; 1 on error or if there are no sampled points - * within the image. - * - *
- * Notes:
- *      (1) If the line is more horizontal than vertical, the values
- *          are computed for [x1, x2], and the pixels are taken
- *          below and/or above the local y-value.  Otherwise, the
- *          values are computed for [y1, y2] and the pixels are taken
- *          to the left and/or right of the local x value.
- *      (2) %direction specifies which side (or both sides) of the
- *          line are scanned for min and max values.
- *      (3) There are two ways to tell if the returned values of min
- *          and max averages are valid: the returned values cannot be
- *          negative and the function must return 0.
- *      (4) All accessed pixels are clipped to the pix.
- * 
- */ -l_ok -pixMinMaxNearLine(PIX *pixs, - l_int32 x1, - l_int32 y1, - l_int32 x2, - l_int32 y2, - l_int32 dist, - l_int32 direction, - NUMA **pnamin, - NUMA **pnamax, - l_float32 *pminave, - l_float32 *pmaxave) -{ -l_int32 i, j, w, h, d, x, y, n, dir, found, minval, maxval, negloc, posloc; -l_uint32 val; -l_float32 sum; -NUMA *namin, *namax; -PTA *pta; - - PROCNAME("pixMinMaxNearLine"); - - if (pnamin) *pnamin = NULL; - if (pnamax) *pnamax = NULL; - if (pminave) *pminave = UNDEF; - if (pmaxave) *pmaxave = UNDEF; - if (!pnamin && !pnamax && !pminave && !pmaxave) - return ERROR_INT("no output requested", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 || pixGetColormap(pixs)) - return ERROR_INT("pixs not 8 bpp or has colormap", procName, 1); - dist = L_ABS(dist); - if (direction != L_SCAN_NEGATIVE && direction != L_SCAN_POSITIVE && - direction != L_SCAN_BOTH) - return ERROR_INT("invalid direction", procName, 1); - - pta = generatePtaLine(x1, y1, x2, y2); - n = ptaGetCount(pta); - dir = (L_ABS(x1 - x2) == n - 1) ? L_HORIZ : L_VERT; - namin = numaCreate(n); - namax = numaCreate(n); - negloc = -dist; - posloc = dist; - if (direction == L_SCAN_NEGATIVE) - posloc = 0; - else if (direction == L_SCAN_POSITIVE) - negloc = 0; - for (i = 0; i < n; i++) { - ptaGetIPt(pta, i, &x, &y); - minval = 255; - maxval = 0; - found = FALSE; - if (dir == L_HORIZ) { - if (x < 0 || x >= w) continue; - for (j = negloc; j <= posloc; j++) { - if (y + j < 0 || y + j >= h) continue; - pixGetPixel(pixs, x, y + j, &val); - found = TRUE; - if (val < minval) minval = val; - if (val > maxval) maxval = val; - } - } else { /* dir == L_VERT */ - if (y < 0 || y >= h) continue; - for (j = negloc; j <= posloc; j++) { - if (x + j < 0 || x + j >= w) continue; - pixGetPixel(pixs, x + j, y, &val); - found = TRUE; - if (val < minval) minval = val; - if (val > maxval) maxval = val; - } - } - if (found) { - numaAddNumber(namin, minval); - numaAddNumber(namax, maxval); - } - } - - n = numaGetCount(namin); - if (n == 0) { - numaDestroy(&namin); - numaDestroy(&namax); - ptaDestroy(&pta); - return ERROR_INT("no output from this line", procName, 1); - } - - if (pminave) { - numaGetSum(namin, &sum); - *pminave = sum / n; - } - if (pmaxave) { - numaGetSum(namax, &sum); - *pmaxave = sum / n; - } - if (pnamin) - *pnamin = namin; - else - numaDestroy(&namin); - if (pnamax) - *pnamax = namax; - else - numaDestroy(&namax); - ptaDestroy(&pta); - return 0; -} - - -/*---------------------------------------------------------------------* - * Rank row and column transforms * - *---------------------------------------------------------------------*/ -/*! - * \brief pixRankRowTransform() - * - * \param[in] pixs 8 bpp; no colormap - * \return pixd with pixels sorted in each row, from - * min to max value - * - *
- * Notes:
- *     (1) The time is O(n) in the number of pixels and runs about
- *         100 Mpixels/sec on a 3 GHz machine.
- * 
- */ -PIX * -pixRankRowTransform(PIX *pixs) -{ -l_int32 i, j, k, m, w, h, wpl, val; -l_int32 histo[256]; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixRankRowTransform"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs has a colormap", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - pixd = pixCreateTemplate(pixs); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpl = pixGetWpl(pixs); - for (i = 0; i < h; i++) { - memset(histo, 0, 1024); - lines = datas + i * wpl; - lined = datad + i * wpl; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(lines, j); - histo[val]++; - } - for (m = 0, j = 0; m < 256; m++) { - for (k = 0; k < histo[m]; k++, j++) - SET_DATA_BYTE(lined, j, m); - } - } - - return pixd; -} - - -/*! - * \brief pixRankColumnTransform() - * - * \param[in] pixs 8 bpp; no colormap - * \return pixd with pixels sorted in each column, from - * min to max value - * - *
- * Notes:
- *     (1) The time is O(n) in the number of pixels and runs about
- *         50 Mpixels/sec on a 3 GHz machine.
- * 
- */ -PIX * -pixRankColumnTransform(PIX *pixs) -{ -l_int32 i, j, k, m, w, h, val; -l_int32 histo[256]; -void **lines8, **lined8; -PIX *pixd; - - PROCNAME("pixRankColumnTransform"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs has a colormap", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - pixd = pixCreateTemplate(pixs); - lines8 = pixGetLinePtrs(pixs, NULL); - lined8 = pixGetLinePtrs(pixd, NULL); - for (j = 0; j < w; j++) { - memset(histo, 0, 1024); - for (i = 0; i < h; i++) { - val = GET_DATA_BYTE(lines8[i], j); - histo[val]++; - } - for (m = 0, i = 0; m < 256; m++) { - for (k = 0; k < histo[m]; k++, i++) - SET_DATA_BYTE(lined8[i], j, m); - } - } - - LEPT_FREE(lines8); - LEPT_FREE(lined8); - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixabasic.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixabasic.c deleted file mode 100644 index 68123337..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixabasic.c +++ /dev/null @@ -1,3233 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pixabasic.c - *
- *
- *      Pixa creation, destruction, copying
- *           PIXA     *pixaCreate()
- *           PIXA     *pixaCreateFromPix()
- *           PIXA     *pixaCreateFromBoxa()
- *           PIXA     *pixaSplitPix()
- *           void      pixaDestroy()
- *           PIXA     *pixaCopy()
- *
- *      Pixa addition
- *           l_int32   pixaAddPix()
- *           l_int32   pixaAddBox()
- *           static l_int32   pixaExtendArray()
- *           l_int32   pixaExtendArrayToSize()
- *
- *      Pixa accessors
- *           l_int32   pixaGetCount()
- *           l_int32   pixaChangeRefcount()
- *           PIX      *pixaGetPix()
- *           l_int32   pixaGetPixDimensions()
- *           BOXA     *pixaGetBoxa()
- *           l_int32   pixaGetBoxaCount()
- *           BOX      *pixaGetBox()
- *           l_int32   pixaGetBoxGeometry()
- *           l_int32   pixaSetBoxa()
- *           PIX     **pixaGetPixArray()
- *           l_int32   pixaVerifyDepth()
- *           l_int32   pixaVerifyDimensions()
- *           l_int32   pixaIsFull()
- *           l_int32   pixaCountText()
- *           l_int32   pixaSetText()
- *           void   ***pixaGetLinePtrs()
- *
- *      Pixa output info
- *           l_int32   pixaWriteStreamInfo()
- *
- *      Pixa array modifiers
- *           l_int32   pixaReplacePix()
- *           l_int32   pixaInsertPix()
- *           l_int32   pixaRemovePix()
- *           l_int32   pixaRemovePixAndSave()
- *           l_int32   pixaRemoveSelected()
- *           l_int32   pixaInitFull()
- *           l_int32   pixaClear()
- *
- *      Pixa and Pixaa combination
- *           l_int32   pixaJoin()
- *           PIXA     *pixaInterleave()
- *           l_int32   pixaaJoin()
- *
- *      Pixaa creation, destruction
- *           PIXAA    *pixaaCreate()
- *           PIXAA    *pixaaCreateFromPixa()
- *           void      pixaaDestroy()
- *
- *      Pixaa addition
- *           l_int32   pixaaAddPixa()
- *           l_int32   pixaaExtendArray()
- *           l_int32   pixaaAddPix()
- *           l_int32   pixaaAddBox()
- *
- *      Pixaa accessors
- *           l_int32   pixaaGetCount()
- *           PIXA     *pixaaGetPixa()
- *           BOXA     *pixaaGetBoxa()
- *           PIX      *pixaaGetPix()
- *           l_int32   pixaaVerifyDepth()
- *           l_int32   pixaaVerifyDimensions()
- *           l_int32   pixaaIsFull()
- *
- *      Pixaa array modifiers
- *           l_int32   pixaaInitFull()
- *           l_int32   pixaaReplacePixa()
- *           l_int32   pixaaClear()
- *           l_int32   pixaaTruncate()
- *
- *      Pixa serialized I/O  (requires png support)
- *           PIXA     *pixaRead()
- *           PIXA     *pixaReadStream()
- *           PIXA     *pixaReadMem()
- *           l_int32   pixaWriteDebug()
- *           l_int32   pixaWrite()
- *           l_int32   pixaWriteStream()
- *           l_int32   pixaWriteMem()
- *           PIXA     *pixaReadBoth()
- *
- *      Pixaa serialized I/O  (requires png support)
- *           PIXAA    *pixaaReadFromFiles()
- *           PIXAA    *pixaaRead()
- *           PIXAA    *pixaaReadStream()
- *           PIXAA    *pixaaReadMem()
- *           l_int32   pixaaWrite()
- *           l_int32   pixaaWriteStream()
- *           l_int32   pixaaWriteMem()
- *
- *
- *   Important note on reference counting:
- *     Reference counting for the Pixa is analogous to that for the Boxa.
- *     See pix.h for details.   pixaCopy() provides three possible modes
- *     of copy.  The basic rule is that however a Pixa is obtained
- *     (e.g., from pixaCreate*(), pixaCopy(), or a Pixaa accessor),
- *     it is necessary to call pixaDestroy() on it.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Bounds on initial array size */ -static const l_uint32 MaxPtrArraySize = 100000; -static const l_int32 InitialPtrArraySize = 20; /*!< n'importe quoi */ - - /* Static functions */ -static l_int32 pixaExtendArray(PIXA *pixa); - -/*---------------------------------------------------------------------* - * Pixa creation, destruction, copy * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaCreate() - * - * \param[in] n initial number of ptrs - * \return pixa, or NULL on error - * - *
- * Notes:
- *      (1) This creates an empty boxa.
- * 
- */ -PIXA * -pixaCreate(l_int32 n) -{ -PIXA *pixa; - - PROCNAME("pixaCreate"); - - if (n <= 0 || n > MaxPtrArraySize) - n = InitialPtrArraySize; - - pixa = (PIXA *)LEPT_CALLOC(1, sizeof(PIXA)); - pixa->n = 0; - pixa->nalloc = n; - pixa->refcount = 1; - pixa->pix = (PIX **)LEPT_CALLOC(n, sizeof(PIX *)); - pixa->boxa = boxaCreate(n); - if (!pixa->pix || !pixa->boxa) { - pixaDestroy(&pixa); - return (PIXA *)ERROR_PTR("pix or boxa not made", procName, NULL); - } - return pixa; -} - - -/*! - * \brief pixaCreateFromPix() - * - * \param[in] pixs with individual components on a lattice - * \param[in] n number of components - * \param[in] cellw width of each cell - * \param[in] cellh height of each cell - * \return pixa, or NULL on error - * - *
- * Notes:
- *      (1) For bpp = 1, we truncate each retrieved pix to the ON
- *          pixels, which we assume for now start at (0,0)
- * 
- */ -PIXA * -pixaCreateFromPix(PIX *pixs, - l_int32 n, - l_int32 cellw, - l_int32 cellh) -{ -l_int32 w, h, d, nw, nh, i, j, index; -PIX *pix1, *pix2; -PIXA *pixa; - - PROCNAME("pixaCreateFromPix"); - - if (!pixs) - return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); - if (n <= 0) - return (PIXA *)ERROR_PTR("n must be > 0", procName, NULL); - - if ((pixa = pixaCreate(n)) == NULL) - return (PIXA *)ERROR_PTR("pixa not made", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if ((pix1 = pixCreate(cellw, cellh, d)) == NULL) { - pixaDestroy(&pixa); - return (PIXA *)ERROR_PTR("pix1 not made", procName, NULL); - } - - nw = (w + cellw - 1) / cellw; - nh = (h + cellh - 1) / cellh; - for (i = 0, index = 0; i < nh; i++) { - for (j = 0; j < nw && index < n; j++, index++) { - pixRasterop(pix1, 0, 0, cellw, cellh, PIX_SRC, pixs, - j * cellw, i * cellh); - if (d == 1 && !pixClipToForeground(pix1, &pix2, NULL)) - pixaAddPix(pixa, pix2, L_INSERT); - else - pixaAddPix(pixa, pix1, L_COPY); - } - } - - pixDestroy(&pix1); - return pixa; -} - - -/*! - * \brief pixaCreateFromBoxa() - * - * \param[in] pixs - * \param[in] boxa - * \param[in] start first box to use - * \param[in] num number of boxes; use 0 to go to the end - * \param[out] pcropwarn [optional] TRUE if the boxa extent - * is larger than pixs. - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) This simply extracts from pixs the region corresponding to each
- *          box in the boxa.  To extract all the regions, set both %start
- *          and %num to 0.
- *      (2) The 5th arg is optional.  If the extent of the boxa exceeds the
- *          size of the pixa, so that some boxes are either clipped
- *          or entirely outside the pix, a warning is returned as TRUE.
- *      (3) pixad will have only the properly clipped elements, and
- *          the internal boxa will be correct.
- * 
- */ -PIXA * -pixaCreateFromBoxa(PIX *pixs, - BOXA *boxa, - l_int32 start, - l_int32 num, - l_int32 *pcropwarn) -{ -l_int32 i, n, end, w, h, wbox, hbox, cropwarn; -BOX *box, *boxc; -PIX *pixd; -PIXA *pixad; - - PROCNAME("pixaCreateFromBoxa"); - - if (!pixs) - return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); - if (!boxa) - return (PIXA *)ERROR_PTR("boxa not defined", procName, NULL); - if (num < 0) - return (PIXA *)ERROR_PTR("num must be >= 0", procName, NULL); - - n = boxaGetCount(boxa); - end = (num == 0) ? n - 1 : L_MIN(start + num - 1, n - 1); - if ((pixad = pixaCreate(end - start + 1)) == NULL) - return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); - - boxaGetExtent(boxa, &wbox, &hbox, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - cropwarn = FALSE; - if (wbox > w || hbox > h) - cropwarn = TRUE; - if (pcropwarn) - *pcropwarn = cropwarn; - - for (i = start; i <= end; i++) { - box = boxaGetBox(boxa, i, L_COPY); - if (cropwarn) { /* if box is outside pixs, pixd is NULL */ - pixd = pixClipRectangle(pixs, box, &boxc); /* may be NULL */ - if (pixd) { - pixaAddPix(pixad, pixd, L_INSERT); - pixaAddBox(pixad, boxc, L_INSERT); - } - boxDestroy(&box); - } else { - pixd = pixClipRectangle(pixs, box, NULL); - pixaAddPix(pixad, pixd, L_INSERT); - pixaAddBox(pixad, box, L_INSERT); - } - } - - return pixad; -} - - -/*! - * \brief pixaSplitPix() - * - * \param[in] pixs with individual components on a lattice - * \param[in] nx number of mosaic cells horizontally - * \param[in] ny number of mosaic cells vertically - * \param[in] borderwidth of added border on all sides - * \param[in] bordercolor in our RGBA format: 0xrrggbbaa - * \return pixa, or NULL on error - * - *
- * Notes:
- *      (1) This is a variant on pixaCreateFromPix(), where we
- *          simply divide the image up into (approximately) equal
- *          subunits.  If you want the subimages to have essentially
- *          the same aspect ratio as the input pix, use nx = ny.
- *      (2) If borderwidth is 0, we ignore the input bordercolor and
- *          redefine it to white.
- *      (3) The bordercolor is always used to initialize each tiled pix,
- *          so that if the src is clipped, the unblitted part will
- *          be this color.  This avoids 1 pixel wide black stripes at the
- *          left and lower edges.
- * 
- */ -PIXA * -pixaSplitPix(PIX *pixs, - l_int32 nx, - l_int32 ny, - l_int32 borderwidth, - l_uint32 bordercolor) -{ -l_int32 w, h, d, cellw, cellh, i, j; -PIX *pix1; -PIXA *pixa; - - PROCNAME("pixaSplitPix"); - - if (!pixs) - return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); - if (nx <= 0 || ny <= 0) - return (PIXA *)ERROR_PTR("nx and ny must be > 0", procName, NULL); - borderwidth = L_MAX(0, borderwidth); - - if ((pixa = pixaCreate(nx * ny)) == NULL) - return (PIXA *)ERROR_PTR("pixa not made", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - cellw = (w + nx - 1) / nx; /* round up */ - cellh = (h + ny - 1) / ny; - - for (i = 0; i < ny; i++) { - for (j = 0; j < nx; j++) { - if ((pix1 = pixCreate(cellw + 2 * borderwidth, - cellh + 2 * borderwidth, d)) == NULL) { - pixaDestroy(&pixa); - return (PIXA *)ERROR_PTR("pix1 not made", procName, NULL); - } - pixCopyColormap(pix1, pixs); - if (borderwidth == 0) { /* initialize full image to white */ - if (d == 1) - pixClearAll(pix1); - else - pixSetAll(pix1); - } else { - pixSetAllArbitrary(pix1, bordercolor); - } - pixRasterop(pix1, borderwidth, borderwidth, cellw, cellh, - PIX_SRC, pixs, j * cellw, i * cellh); - pixaAddPix(pixa, pix1, L_INSERT); - } - } - - return pixa; -} - - -/*! - * \brief pixaDestroy() - * - * \param[in,out] ppixa use ptr address so it will be nulled - * - *
- * Notes:
- *      (1) Decrements the ref count and, if 0, destroys the pixa.
- *      (2) Always nulls the input ptr.
- * 
- */ -void -pixaDestroy(PIXA **ppixa) -{ -l_int32 i; -PIXA *pixa; - - PROCNAME("pixaDestroy"); - - if (ppixa == NULL) { - L_WARNING("ptr address is NULL!\n", procName); - return; - } - - if ((pixa = *ppixa) == NULL) - return; - - /* Decrement the refcount. If it is 0, destroy the pixa. */ - pixaChangeRefcount(pixa, -1); - if (pixa->refcount <= 0) { - for (i = 0; i < pixa->n; i++) - pixDestroy(&pixa->pix[i]); - LEPT_FREE(pixa->pix); - boxaDestroy(&pixa->boxa); - LEPT_FREE(pixa); - } - - *ppixa = NULL; - return; -} - - -/*! - * \brief pixaCopy() - * - * \param[in] pixa - * \param[in] copyflag see pix.h for details: - * L_COPY makes a new pixa and copies each pix and each box; - * L_CLONE gives a new ref-counted handle to the input pixa; - * L_COPY_CLONE makes a new pixa and inserts clones of - * all pix and boxes - * \return new pixa, or NULL on error - */ -PIXA * -pixaCopy(PIXA *pixa, - l_int32 copyflag) -{ -l_int32 i, nb; -BOX *boxc; -PIX *pixc; -PIXA *pixac; - - PROCNAME("pixaCopy"); - - if (!pixa) - return (PIXA *)ERROR_PTR("pixa not defined", procName, NULL); - - if (copyflag == L_CLONE) { - pixaChangeRefcount(pixa, 1); - return pixa; - } - - if (copyflag != L_COPY && copyflag != L_COPY_CLONE) - return (PIXA *)ERROR_PTR("invalid copyflag", procName, NULL); - - if ((pixac = pixaCreate(pixa->n)) == NULL) - return (PIXA *)ERROR_PTR("pixac not made", procName, NULL); - nb = pixaGetBoxaCount(pixa); - for (i = 0; i < pixa->n; i++) { - if (copyflag == L_COPY) { - pixc = pixaGetPix(pixa, i, L_COPY); - if (i < nb) boxc = pixaGetBox(pixa, i, L_COPY); - } else { /* copy-clone */ - pixc = pixaGetPix(pixa, i, L_CLONE); - if (i < nb) boxc = pixaGetBox(pixa, i, L_CLONE); - } - pixaAddPix(pixac, pixc, L_INSERT); - if (i < nb) pixaAddBox(pixac, boxc, L_INSERT); - } - - return pixac; -} - - - -/*---------------------------------------------------------------------* - * Pixa addition * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaAddPix() - * - * \param[in] pixa - * \param[in] pix to be added - * \param[in] copyflag L_INSERT, L_COPY, L_CLONE - * \return 0 if OK; 1 on error - */ -l_ok -pixaAddPix(PIXA *pixa, - PIX *pix, - l_int32 copyflag) -{ -l_int32 n; -PIX *pixc; - - PROCNAME("pixaAddPix"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - if (copyflag == L_INSERT) - pixc = pix; - else if (copyflag == L_COPY) - pixc = pixCopy(NULL, pix); - else if (copyflag == L_CLONE) - pixc = pixClone(pix); - else - return ERROR_INT("invalid copyflag", procName, 1); - if (!pixc) - return ERROR_INT("pixc not made", procName, 1); - - n = pixaGetCount(pixa); - if (n >= pixa->nalloc) - pixaExtendArray(pixa); - pixa->pix[n] = pixc; - pixa->n++; - - return 0; -} - - -/*! - * \brief pixaAddBox() - * - * \param[in] pixa - * \param[in] box - * \param[in] copyflag L_INSERT, L_COPY, L_CLONE - * \return 0 if OK, 1 on error - */ -l_ok -pixaAddBox(PIXA *pixa, - BOX *box, - l_int32 copyflag) -{ - PROCNAME("pixaAddBox"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (copyflag != L_INSERT && copyflag != L_COPY && copyflag != L_CLONE) - return ERROR_INT("invalid copyflag", procName, 1); - - boxaAddBox(pixa->boxa, box, copyflag); - return 0; -} - - -/*! - * \brief pixaExtendArray() - * - * \param[in] pixa - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Doubles the size of the pixa and boxa ptr arrays.
- * 
- */ -static l_int32 -pixaExtendArray(PIXA *pixa) -{ - PROCNAME("pixaExtendArray"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - return pixaExtendArrayToSize(pixa, 2 * pixa->nalloc); -} - - -/*! - * \brief pixaExtendArrayToSize() - * - * \param[in] pixa - * \param[in] size - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) If necessary, reallocs new pixa and boxa ptrs arrays to %size.
- *          The pixa and boxa ptr arrays must always be equal in size.
- * 
- */ -l_ok -pixaExtendArrayToSize(PIXA *pixa, - l_int32 size) -{ - PROCNAME("pixaExtendArrayToSize"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - if (size > pixa->nalloc) { - if ((pixa->pix = (PIX **)reallocNew((void **)&pixa->pix, - sizeof(PIX *) * pixa->nalloc, - size * sizeof(PIX *))) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - pixa->nalloc = size; - } - return boxaExtendArrayToSize(pixa->boxa, size); -} - - -/*---------------------------------------------------------------------* - * Pixa accessors * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaGetCount() - * - * \param[in] pixa - * \return count, or 0 if no pixa - */ -l_int32 -pixaGetCount(PIXA *pixa) -{ - PROCNAME("pixaGetCount"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 0); - - return pixa->n; -} - - -/*! - * \brief pixaChangeRefcount() - * - * \param[in] pixa - * \param[in] delta - * \return 0 if OK, 1 on error - */ -l_ok -pixaChangeRefcount(PIXA *pixa, - l_int32 delta) -{ - PROCNAME("pixaChangeRefcount"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - pixa->refcount += delta; - return 0; -} - - -/*! - * \brief pixaGetPix() - * - * \param[in] pixa - * \param[in] index to the index-th pix - * \param[in] accesstype L_COPY or L_CLONE - * \return pix, or NULL on error - */ -PIX * -pixaGetPix(PIXA *pixa, - l_int32 index, - l_int32 accesstype) -{ -PIX *pix; - - PROCNAME("pixaGetPix"); - - if (!pixa) - return (PIX *)ERROR_PTR("pixa not defined", procName, NULL); - if (index < 0 || index >= pixa->n) - return (PIX *)ERROR_PTR("index not valid", procName, NULL); - if ((pix = pixa->pix[index]) == NULL) { - L_ERROR("no pix at pixa[%d]\n", procName, index); - return (PIX *)ERROR_PTR("pix not found!", procName, NULL); - } - - if (accesstype == L_COPY) - return pixCopy(NULL, pix); - else if (accesstype == L_CLONE) - return pixClone(pix); - else - return (PIX *)ERROR_PTR("invalid accesstype", procName, NULL); -} - - -/*! - * \brief pixaGetPixDimensions() - * - * \param[in] pixa - * \param[in] index to the index-th box - * \param[out] pw, ph, pd [optional] each can be null - * \return 0 if OK, 1 on error - */ -l_ok -pixaGetPixDimensions(PIXA *pixa, - l_int32 index, - l_int32 *pw, - l_int32 *ph, - l_int32 *pd) -{ -PIX *pix; - - PROCNAME("pixaGetPixDimensions"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pd) *pd = 0; - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (index < 0 || index >= pixa->n) - return ERROR_INT("index not valid", procName, 1); - - if ((pix = pixaGetPix(pixa, index, L_CLONE)) == NULL) - return ERROR_INT("pix not found!", procName, 1); - pixGetDimensions(pix, pw, ph, pd); - pixDestroy(&pix); - return 0; -} - - -/*! - * \brief pixaGetBoxa() - * - * \param[in] pixa - * \param[in] accesstype L_COPY, L_CLONE, L_COPY_CLONE - * \return boxa, or NULL on error - */ -BOXA * -pixaGetBoxa(PIXA *pixa, - l_int32 accesstype) -{ - PROCNAME("pixaGetBoxa"); - - if (!pixa) - return (BOXA *)ERROR_PTR("pixa not defined", procName, NULL); - if (!pixa->boxa) - return (BOXA *)ERROR_PTR("boxa not defined", procName, NULL); - if (accesstype != L_COPY && accesstype != L_CLONE && - accesstype != L_COPY_CLONE) - return (BOXA *)ERROR_PTR("invalid accesstype", procName, NULL); - - return boxaCopy(pixa->boxa, accesstype); -} - - -/*! - * \brief pixaGetBoxaCount() - * - * \param[in] pixa - * \return count, or 0 on error - */ -l_int32 -pixaGetBoxaCount(PIXA *pixa) -{ - PROCNAME("pixaGetBoxaCount"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 0); - - return boxaGetCount(pixa->boxa); -} - - -/*! - * \brief pixaGetBox() - * - * \param[in] pixa - * \param[in] index to the index-th pix - * \param[in] accesstype L_COPY or L_CLONE - * \return box if null, not automatically an error, or NULL on error - * - *
- * Notes:
- *      (1) There is always a boxa with a pixa, and it is initialized so
- *          that each box ptr is NULL.
- *      (2) In general, we expect that there is either a box associated
- *          with each pix, or no boxes at all in the boxa.
- *      (3) Having no boxes is thus not an automatic error.  Whether it
- *          is an actual error is determined by the calling program.
- *          If the caller expects to get a box, it is an error; see, e.g.,
- *          pixaGetBoxGeometry().
- * 
- */ -BOX * -pixaGetBox(PIXA *pixa, - l_int32 index, - l_int32 accesstype) -{ -BOX *box; - - PROCNAME("pixaGetBox"); - - if (!pixa) - return (BOX *)ERROR_PTR("pixa not defined", procName, NULL); - if (!pixa->boxa) - return (BOX *)ERROR_PTR("boxa not defined", procName, NULL); - if (index < 0 || index >= pixa->boxa->n) - return (BOX *)ERROR_PTR("index not valid", procName, NULL); - if (accesstype != L_COPY && accesstype != L_CLONE) - return (BOX *)ERROR_PTR("invalid accesstype", procName, NULL); - - box = pixa->boxa->box[index]; - if (box) { - if (accesstype == L_COPY) - return boxCopy(box); - else /* accesstype == L_CLONE */ - return boxClone(box); - } else { - return NULL; - } -} - - -/*! - * \brief pixaGetBoxGeometry() - * - * \param[in] pixa - * \param[in] index to the index-th box - * \param[out] px, py, pw, ph [optional] each can be null - * \return 0 if OK, 1 on error - */ -l_ok -pixaGetBoxGeometry(PIXA *pixa, - l_int32 index, - l_int32 *px, - l_int32 *py, - l_int32 *pw, - l_int32 *ph) -{ -BOX *box; - - PROCNAME("pixaGetBoxGeometry"); - - if (px) *px = 0; - if (py) *py = 0; - if (pw) *pw = 0; - if (ph) *ph = 0; - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (index < 0 || index >= pixa->n) - return ERROR_INT("index not valid", procName, 1); - - if ((box = pixaGetBox(pixa, index, L_CLONE)) == NULL) - return ERROR_INT("box not found!", procName, 1); - boxGetGeometry(box, px, py, pw, ph); - boxDestroy(&box); - return 0; -} - - -/*! - * \brief pixaSetBoxa() - * - * \param[in] pixa - * \param[in] boxa - * \param[in] accesstype L_INSERT, L_COPY, L_CLONE - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This destroys the existing boxa in the pixa.
- * 
- */ -l_ok -pixaSetBoxa(PIXA *pixa, - BOXA *boxa, - l_int32 accesstype) -{ - PROCNAME("pixaSetBoxa"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (!boxa) - return ERROR_INT("boxa not defined", procName, 1); - if (accesstype != L_INSERT && accesstype != L_COPY && - accesstype != L_CLONE) - return ERROR_INT("invalid access type", procName, 1); - - boxaDestroy(&pixa->boxa); - if (accesstype == L_INSERT) - pixa->boxa = boxa; - else - pixa->boxa = boxaCopy(boxa, accesstype); - - return 0; -} - - -/*! - * \brief pixaGetPixArray() - * - * \param[in] pixa - * \return pix array, or NULL on error - * - *
- * Notes:
- *      (1) This returns a ptr to the actual array.  The array is
- *          owned by the pixa, so it must not be destroyed.
- *      (2) The caller should always check if the return value is NULL
- *          before accessing any of the pix ptrs in this array!
- * 
- */ -PIX ** -pixaGetPixArray(PIXA *pixa) -{ - PROCNAME("pixaGetPixArray"); - - if (!pixa) - return (PIX **)ERROR_PTR("pixa not defined", procName, NULL); - - return pixa->pix; -} - - -/*! - * \brief pixaVerifyDepth() - * - * \param[in] pixa - * \param[out] psame 1 if depth is the same for all pix; 0 otherwise - * \param[out] pmaxd [optional] max depth of all pix - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) It is considered to be an error if there are no pix.
- * 
- */ -l_ok -pixaVerifyDepth(PIXA *pixa, - l_int32 *psame, - l_int32 *pmaxd) -{ -l_int32 i, n, d, maxd, same; - - PROCNAME("pixaVerifyDepth"); - - if (pmaxd) *pmaxd = 0; - if (!psame) - return ERROR_INT("psame not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if ((n = pixaGetCount(pixa)) == 0) - return ERROR_INT("no pix in pixa", procName, 1); - - same = 1; - pixaGetPixDimensions(pixa, 0, NULL, NULL, &maxd); - for (i = 1; i < n; i++) { - if (pixaGetPixDimensions(pixa, i, NULL, NULL, &d)) - return ERROR_INT("pix depth not found", procName, 1); - maxd = L_MAX(maxd, d); - if (d != maxd) - same = 0; - } - *psame = same; - if (pmaxd) *pmaxd = maxd; - return 0; -} - - -/*! - * \brief pixaVerifyDimensions() - * - * \param[in] pixa - * \param[out] psame 1 if dimensions are the same for all pix; 0 otherwise - * \param[out] pmaxw [optional] max width of all pix - * \param[out] pmaxh [optional] max height of all pix - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) It is considered to be an error if there are no pix.
- * 
- */ -l_ok -pixaVerifyDimensions(PIXA *pixa, - l_int32 *psame, - l_int32 *pmaxw, - l_int32 *pmaxh) -{ -l_int32 i, n, w, h, maxw, maxh, same; - - PROCNAME("pixaVerifyDimensions"); - - if (pmaxw) *pmaxw = 0; - if (pmaxh) *pmaxh = 0; - if (!psame) - return ERROR_INT("psame not defined", procName, 1); - *psame = 0; - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if ((n = pixaGetCount(pixa)) == 0) - return ERROR_INT("no pix in pixa", procName, 1); - - same = 1; - pixaGetPixDimensions(pixa, 0, &maxw, &maxh, NULL); - for (i = 1; i < n; i++) { - if (pixaGetPixDimensions(pixa, i, &w, &h, NULL)) - return ERROR_INT("pix dimensions not found", procName, 1); - maxw = L_MAX(maxw, w); - maxh = L_MAX(maxh, h); - if (w != maxw || h != maxh) - same = 0; - } - *psame = same; - if (pmaxw) *pmaxw = maxw; - if (pmaxh) *pmaxh = maxh; - return 0; -} - - -/*! - * \brief pixaIsFull() - * - * \param[in] pixa - * \param[out] pfullpa [optional] 1 if pixa is full - * \param[out] pfullba [optional] 1 if boxa is full - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) A pixa is "full" if the array of pix is fully
- *          occupied from index 0 to index (pixa->n - 1).
- * 
- */ -l_ok -pixaIsFull(PIXA *pixa, - l_int32 *pfullpa, - l_int32 *pfullba) -{ -l_int32 i, n, full; -BOXA *boxa; -PIX *pix; - - PROCNAME("pixaIsFull"); - - if (pfullpa) *pfullpa = 0; - if (pfullba) *pfullba = 0; - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - n = pixaGetCount(pixa); - if (pfullpa) { - full = 1; - for (i = 0; i < n; i++) { - if ((pix = pixaGetPix(pixa, i, L_CLONE)) == NULL) { - full = 0; - break; - } - pixDestroy(&pix); - } - *pfullpa = full; - } - if (pfullba) { - boxa = pixaGetBoxa(pixa, L_CLONE); - boxaIsFull(boxa, pfullba); - boxaDestroy(&boxa); - } - return 0; -} - - -/*! - * \brief pixaCountText() - * - * \param[in] pixa - * \param[out] pntext number of pix with non-empty text strings - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) All pix have non-empty text strings if the returned value %ntext
- *          equals the pixa count.
- * 
- */ -l_ok -pixaCountText(PIXA *pixa, - l_int32 *pntext) -{ -char *text; -l_int32 i, n; -PIX *pix; - - PROCNAME("pixaCountText"); - - if (!pntext) - return ERROR_INT("&ntext not defined", procName, 1); - *pntext = 0; - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - n = pixaGetCount(pixa); - for (i = 0; i < n; i++) { - if ((pix = pixaGetPix(pixa, i, L_CLONE)) == NULL) - continue; - text = pixGetText(pix); - if (text && strlen(text) > 0) - (*pntext)++; - pixDestroy(&pix); - } - - return 0; -} - - -/*! - * \brief pixaSetText() - * - * \param[in] pixa - * \param[in] text [optional] single text string, to insert in each pix - * \param[in] sa [optional] array of text strings, to insert in each pix - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) To clear all the text fields, use %sa == NULL and %text == NULL.
- *      (2) To set all the text fields to the same value %text, use %sa = NULL.
- *      (3) If %sa is defined, we ignore %text and use it; %sa must have
- *          the same count as %pixa.
- * 
- */ -l_ok -pixaSetText(PIXA *pixa, - const char *text, - SARRAY *sa) -{ -char *str; -l_int32 i, n; -PIX *pix; - - PROCNAME("pixaSetText"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - n = pixaGetCount(pixa); - if (sa && (sarrayGetCount(sa) != n)) - return ERROR_INT("pixa and sa sizes differ", procName, 1); - - if (!sa) { - for (i = 0; i < n; i++) { - if ((pix = pixaGetPix(pixa, i, L_CLONE)) == NULL) - continue; - pixSetText(pix, text); - pixDestroy(&pix); - } - return 0; - } - - for (i = 0; i < n; i++) { - if ((pix = pixaGetPix(pixa, i, L_CLONE)) == NULL) - continue; - str = sarrayGetString(sa, i, L_NOCOPY); - pixSetText(pix, str); - pixDestroy(&pix); - } - - return 0; -} - - -/*! - * \brief pixaGetLinePtrs() - * - * \param[in] pixa of pix that all have the same depth - * \param[out] psize [optional] number of pix in the pixa - * \return array of array of line ptrs, or NULL on error - * - *
- * Notes:
- *      (1) See pixGetLinePtrs() for details.
- *      (2) It is best if all pix in the pixa are the same size.
- *          The size of each line ptr array is equal to the height
- *          of the pix that it refers to.
- *      (3) This is an array of arrays.  To destroy it:
- *            for (i = 0; i < size; i++)
- *                LEPT_FREE(lineset[i]);
- *            LEPT_FREE(lineset);
- * 
- */ -void *** -pixaGetLinePtrs(PIXA *pixa, - l_int32 *psize) -{ -l_int32 i, n, same; -void **lineptrs; -void ***lineset; -PIX *pix; - - PROCNAME("pixaGetLinePtrs"); - - if (psize) *psize = 0; - if (!pixa) - return (void ***)ERROR_PTR("pixa not defined", procName, NULL); - pixaVerifyDepth(pixa, &same, NULL); - if (!same) - return (void ***)ERROR_PTR("pixa not all same depth", procName, NULL); - n = pixaGetCount(pixa); - if (psize) *psize = n; - if ((lineset = (void ***)LEPT_CALLOC(n, sizeof(void **))) == NULL) - return (void ***)ERROR_PTR("lineset not made", procName, NULL); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - lineptrs = pixGetLinePtrs(pix, NULL); - lineset[i] = lineptrs; - pixDestroy(&pix); - } - - return lineset; -} - - -/*---------------------------------------------------------------------* - * Pixa output info * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaWriteStreamInfo() - * - * \param[in] fp file stream - * \param[in] pixa - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) For each pix in the pixa, write out the pix dimensions, spp,
- *          text string (if it exists), and cmap info.
- * 
- */ -l_ok -pixaWriteStreamInfo(FILE *fp, - PIXA *pixa) -{ -char *text; -l_int32 i, n, w, h, d, spp, count, hastext; -PIX *pix; -PIXCMAP *cmap; - - PROCNAME("pixaWriteStreamInfo"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - n = pixaGetCount(pixa); - for (i = 0; i < n; i++) { - if ((pix = pixaGetPix(pixa, i, L_CLONE)) == NULL) { - fprintf(fp, "%d: no pix at this index\n", i); - continue; - } - pixGetDimensions(pix, &w, &h, &d); - spp = pixGetSpp(pix); - text = pixGetText(pix); - hastext = (text && strlen(text) > 0); - if ((cmap = pixGetColormap(pix)) != NULL) - count = pixcmapGetCount(cmap); - fprintf(fp, "Pix %d: w = %d, h = %d, d = %d, spp = %d", - i, w, h, d, spp); - if (cmap) fprintf(fp, ", cmap(%d colors)", count); - if (hastext) fprintf(fp, ", text = %s", text); - fprintf(fp, "\n"); - pixDestroy(&pix); - } - - return 0; -} - - -/*---------------------------------------------------------------------* - * Pixa array modifiers * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaReplacePix() - * - * \param[in] pixa - * \param[in] index to the index-th pix - * \param[in] pix insert to replace existing one - * \param[in] box [optional] insert to replace existing - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) In-place replacement of one pix.
- *      (2) The previous pix at that location is destroyed.
- * 
- */ -l_ok -pixaReplacePix(PIXA *pixa, - l_int32 index, - PIX *pix, - BOX *box) -{ -BOXA *boxa; - - PROCNAME("pixaReplacePix"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (index < 0 || index >= pixa->n) - return ERROR_INT("index not valid", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pixDestroy(&(pixa->pix[index])); - pixa->pix[index] = pix; - - if (box) { - boxa = pixa->boxa; - if (index > boxa->n) - return ERROR_INT("boxa index not valid", procName, 1); - boxaReplaceBox(boxa, index, box); - } - - return 0; -} - - -/*! - * \brief pixaInsertPix() - * - * \param[in] pixa - * \param[in] index at which pix is to be inserted - * \param[in] pixs new pix to be inserted - * \param[in] box [optional] new box to be inserted - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This shifts pixa[i] --> pixa[i + 1] for all i >= index,
- *          and then inserts at pixa[index].
- *      (2) To insert at the beginning of the array, set index = 0.
- *      (3) It should not be used repeatedly on large arrays,
- *          because the function is O(n).
- *      (4) To append a pix to a pixa, it's easier to use pixaAddPix().
- * 
- */ -l_ok -pixaInsertPix(PIXA *pixa, - l_int32 index, - PIX *pixs, - BOX *box) -{ -l_int32 i, n; - - PROCNAME("pixaInsertPix"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - n = pixaGetCount(pixa); - if (index < 0 || index > n) - return ERROR_INT("index not in {0...n}", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - if (n >= pixa->nalloc) { /* extend both ptr arrays */ - pixaExtendArray(pixa); - boxaExtendArray(pixa->boxa); - } - pixa->n++; - for (i = n; i > index; i--) - pixa->pix[i] = pixa->pix[i - 1]; - pixa->pix[index] = pixs; - - /* Optionally, insert the box */ - if (box) - boxaInsertBox(pixa->boxa, index, box); - - return 0; -} - - -/*! - * \brief pixaRemovePix() - * - * \param[in] pixa - * \param[in] index of pix to be removed - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This shifts pixa[i] --> pixa[i - 1] for all i > index.
- *      (2) It should not be used repeatedly on large arrays,
- *          because the function is O(n).
- *      (3) The corresponding box is removed as well, if it exists.
- * 
- */ -l_ok -pixaRemovePix(PIXA *pixa, - l_int32 index) -{ -l_int32 i, n, nbox; -BOXA *boxa; -PIX **array; - - PROCNAME("pixaRemovePix"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - n = pixaGetCount(pixa); - if (index < 0 || index >= n) - return ERROR_INT("index not in {0...n - 1}", procName, 1); - - /* Remove the pix */ - array = pixa->pix; - pixDestroy(&array[index]); - for (i = index + 1; i < n; i++) - array[i - 1] = array[i]; - array[n - 1] = NULL; - pixa->n--; - - /* Remove the box if it exists */ - boxa = pixa->boxa; - nbox = boxaGetCount(boxa); - if (index < nbox) - boxaRemoveBox(boxa, index); - - return 0; -} - - -/*! - * \brief pixaRemovePixAndSave() - * - * \param[in] pixa - * \param[in] index of pix to be removed - * \param[out] ppix [optional] removed pix - * \param[out] pbox [optional] removed box - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This shifts pixa[i] --> pixa[i - 1] for all i > index.
- *      (2) It should not be used repeatedly on large arrays,
- *          because the function is O(n).
- *      (3) The corresponding box is removed as well, if it exists.
- *      (4) The removed pix and box can either be retained or destroyed.
- * 
- */ -l_ok -pixaRemovePixAndSave(PIXA *pixa, - l_int32 index, - PIX **ppix, - BOX **pbox) -{ -l_int32 i, n, nbox; -BOXA *boxa; -PIX **array; - - PROCNAME("pixaRemovePixAndSave"); - - if (ppix) *ppix = NULL; - if (pbox) *pbox = NULL; - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - n = pixaGetCount(pixa); - if (index < 0 || index >= n) - return ERROR_INT("index not in {0...n - 1}", procName, 1); - - /* Remove the pix */ - array = pixa->pix; - if (ppix) - *ppix = pixaGetPix(pixa, index, L_CLONE); - pixDestroy(&array[index]); - for (i = index + 1; i < n; i++) - array[i - 1] = array[i]; - array[n - 1] = NULL; - pixa->n--; - - /* Remove the box if it exists */ - boxa = pixa->boxa; - nbox = boxaGetCount(boxa); - if (index < nbox) - boxaRemoveBoxAndSave(boxa, index, pbox); - - return 0; -} - - -/*! - * \brief pixaRemoveSelected() - * - * \param[in] pixa - * \param[in] naindex numa of indices of pix to be removed - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This gives error messages for invalid indices
- * 
- */ -l_ok -pixaRemoveSelected(PIXA *pixa, - NUMA *naindex) -{ -l_int32 i, n, index; -NUMA *na1; - - PROCNAME("pixaRemoveSelected"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (!naindex) - return ERROR_INT("naindex not defined", procName, 1); - if ((n = numaGetCount(naindex)) == 0) - return ERROR_INT("naindex is empty", procName, 1); - - /* Remove from highest indices first */ - na1 = numaSort(NULL, naindex, L_SORT_DECREASING); - for (i = 0; i < n; i++) { - numaGetIValue(na1, i, &index); - pixaRemovePix(pixa, index); - } - numaDestroy(&na1); - return 0; -} - - -/*! - * \brief pixaInitFull() - * - * \param[in] pixa typically empty - * \param[in] pix [optional] to be replicated to the entire pixa ptr array - * \param[in] box [optional] to be replicated to the entire boxa ptr array - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This initializes a pixa by filling up the entire pix ptr array
- *          with copies of %pix.  If %pix == NULL, we use a tiny placeholder
- *          pix (w = h = d = 1).  Any existing pix are destroyed.
- *          It also optionally fills the boxa with copies of %box.
- *          After this operation, the numbers of pix and (optionally)
- *          boxes are equal to the number of allocated ptrs.
- *      (2) Note that we use pixaReplacePix() instead of pixaInsertPix().
- *          They both have the same effect when inserting into a NULL ptr
- *          in the pixa ptr array:
- *      (3) If the boxa is not initialized (i.e., filled with boxes),
- *          later insertion of boxes will cause an error, because the
- *          'n' field is 0.
- *      (4) Example usage.  This function is useful to prepare for a
- *          random insertion (or replacement) of pix into a pixa.
- *          To randomly insert pix into a pixa, without boxes, up to
- *          some index "max":
- *             Pixa *pixa = pixaCreate(max);
- *             pixaInitFull(pixa, NULL, NULL);
- *          An existing pixa with a smaller ptr array can also be reused:
- *             pixaExtendArrayToSize(pixa, max);
- *             pixaInitFull(pixa, NULL, NULL);
- *          The initialization allows the pixa to always be properly
- *          filled, even if all pix (and boxes) are not later replaced.
- * 
- */ -l_ok -pixaInitFull(PIXA *pixa, - PIX *pix, - BOX *box) -{ -l_int32 i, n; -PIX *pix1; - - PROCNAME("pixaInitFull"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - n = pixa->nalloc; - pixa->n = n; - for (i = 0; i < n; i++) { - if (pix) - pix1 = pixCopy(NULL, pix); - else - pix1 = pixCreate(1, 1, 1); - pixaReplacePix(pixa, i, pix1, NULL); - } - if (box) - boxaInitFull(pixa->boxa, box); - - return 0; -} - - -/*! - * \brief pixaClear() - * - * \param[in] pixa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This destroys all pix in the pixa, as well as
- *          all boxes in the boxa.  The ptrs in the pix ptr array
- *          are all null'd.  The number of allocated pix, n, is set to 0.
- * 
- */ -l_ok -pixaClear(PIXA *pixa) -{ -l_int32 i, n; - - PROCNAME("pixaClear"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - n = pixaGetCount(pixa); - for (i = 0; i < n; i++) - pixDestroy(&pixa->pix[i]); - pixa->n = 0; - return boxaClear(pixa->boxa); -} - - -/*---------------------------------------------------------------------* - * Pixa and Pixaa combination * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaJoin() - * - * \param[in] pixad dest pixa; add to this one - * \param[in] pixas [optional] source pixa; add from this one - * \param[in] istart starting index in pixas - * \param[in] iend ending index in pixas; use -1 to cat all - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This appends a clone of each indicated pix in pixas to pixad
- *      (2) istart < 0 is taken to mean 'read from the start' (istart = 0)
- *      (3) iend < 0 means 'read to the end'
- *      (4) If pixas is NULL or contains no pix, this is a no-op.
- * 
- */ -l_ok -pixaJoin(PIXA *pixad, - PIXA *pixas, - l_int32 istart, - l_int32 iend) -{ -l_int32 i, n, nb; -BOXA *boxas, *boxad; -PIX *pix; - - PROCNAME("pixaJoin"); - - if (!pixad) - return ERROR_INT("pixad not defined", procName, 1); - if (!pixas || ((n = pixaGetCount(pixas)) == 0)) - return 0; - - if (istart < 0) - istart = 0; - if (iend < 0 || iend >= n) - iend = n - 1; - if (istart > iend) - return ERROR_INT("istart > iend; nothing to add", procName, 1); - - for (i = istart; i <= iend; i++) { - pix = pixaGetPix(pixas, i, L_CLONE); - pixaAddPix(pixad, pix, L_INSERT); - } - - boxas = pixaGetBoxa(pixas, L_CLONE); - boxad = pixaGetBoxa(pixad, L_CLONE); - nb = pixaGetBoxaCount(pixas); - iend = L_MIN(iend, nb - 1); - boxaJoin(boxad, boxas, istart, iend); - boxaDestroy(&boxas); /* just the clones */ - boxaDestroy(&boxad); - return 0; -} - - -/*! - * \brief pixaInterleave() - * - * \param[in] pixa1 first src pixa - * \param[in] pixa2 second src pixa - * \param[in] copyflag L_CLONE, L_COPY - * \return pixa interleaved from sources, or NULL on error. - * - *
- * Notes:
- *      (1) %copyflag determines if the pix are copied or cloned.
- *          The boxes, if they exist, are copied.
- *      (2) If the two pixa have different sizes, a warning is issued,
- *          and the number of pairs returned is the minimum size.
- * 
- */ -PIXA * -pixaInterleave(PIXA *pixa1, - PIXA *pixa2, - l_int32 copyflag) -{ -l_int32 i, n1, n2, n, nb1, nb2; -BOX *box; -PIX *pix; -PIXA *pixad; - - PROCNAME("pixaInterleave"); - - if (!pixa1) - return (PIXA *)ERROR_PTR("pixa1 not defined", procName, NULL); - if (!pixa2) - return (PIXA *)ERROR_PTR("pixa2 not defined", procName, NULL); - if (copyflag != L_COPY && copyflag != L_CLONE) - return (PIXA *)ERROR_PTR("invalid copyflag", procName, NULL); - n1 = pixaGetCount(pixa1); - n2 = pixaGetCount(pixa2); - n = L_MIN(n1, n2); - if (n == 0) - return (PIXA *)ERROR_PTR("at least one input pixa is empty", - procName, NULL); - if (n1 != n2) - L_WARNING("counts differ: %d != %d\n", procName, n1, n2); - - pixad = pixaCreate(2 * n); - nb1 = pixaGetBoxaCount(pixa1); - nb2 = pixaGetBoxaCount(pixa2); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa1, i, copyflag); - pixaAddPix(pixad, pix, L_INSERT); - if (i < nb1) { - box = pixaGetBox(pixa1, i, L_COPY); - pixaAddBox(pixad, box, L_INSERT); - } - pix = pixaGetPix(pixa2, i, copyflag); - pixaAddPix(pixad, pix, L_INSERT); - if (i < nb2) { - box = pixaGetBox(pixa2, i, L_COPY); - pixaAddBox(pixad, box, L_INSERT); - } - } - - return pixad; -} - - -/*! - * \brief pixaaJoin() - * - * \param[in] paad dest pixaa; add to this one - * \param[in] paas [optional] source pixaa; add from this one - * \param[in] istart starting index in pixaas - * \param[in] iend ending index in pixaas; use -1 to cat all - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This appends a clone of each indicated pixa in paas to pixaad
- *      (2) istart < 0 is taken to mean 'read from the start' (istart = 0)
- *      (3) iend < 0 means 'read to the end'
- * 
- */ -l_ok -pixaaJoin(PIXAA *paad, - PIXAA *paas, - l_int32 istart, - l_int32 iend) -{ -l_int32 i, n; -PIXA *pixa; - - PROCNAME("pixaaJoin"); - - if (!paad) - return ERROR_INT("pixaad not defined", procName, 1); - if (!paas) - return 0; - - if (istart < 0) - istart = 0; - n = pixaaGetCount(paas, NULL); - if (iend < 0 || iend >= n) - iend = n - 1; - if (istart > iend) - return ERROR_INT("istart > iend; nothing to add", procName, 1); - - for (i = istart; i <= iend; i++) { - pixa = pixaaGetPixa(paas, i, L_CLONE); - pixaaAddPixa(paad, pixa, L_INSERT); - } - - return 0; -} - - -/*---------------------------------------------------------------------* - * Pixaa creation and destruction * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaaCreate() - * - * \param[in] n initial number of pixa ptrs - * \return paa, or NULL on error - * - *
- * Notes:
- *      (1) A pixaa provides a 2-level hierarchy of images.
- *          A common use is for segmentation masks, which are
- *          inexpensive to store in png format.
- *      (2) For example, suppose you want a mask for each textline
- *          in a two-column page.  The textline masks for each column
- *          can be represented by a pixa, of which there are 2 in the pixaa.
- *          The boxes for the textline mask components within a column
- *          can have their origin referred to the column rather than the page.
- *          Then the boxa field can be used to represent the two box (regions)
- *          for the columns, and the (x,y) components of each box can
- *          be used to get the absolute position of the textlines on
- *          the page.
- * 
- */ -PIXAA * -pixaaCreate(l_int32 n) -{ -PIXAA *paa; - - PROCNAME("pixaaCreate"); - - if (n <= 0 || n > MaxPtrArraySize) - n = InitialPtrArraySize; - - paa = (PIXAA *)LEPT_CALLOC(1, sizeof(PIXAA)); - paa->n = 0; - paa->nalloc = n; - if ((paa->pixa = (PIXA **)LEPT_CALLOC(n, sizeof(PIXA *))) == NULL) { - pixaaDestroy(&paa); - return (PIXAA *)ERROR_PTR("pixa ptrs not made", procName, NULL); - } - paa->boxa = boxaCreate(n); - - return paa; -} - - -/*! - * \brief pixaaCreateFromPixa() - * - * \param[in] pixa - * \param[in] n number specifying subdivision of pixa - * \param[in] type L_CHOOSE_CONSECUTIVE, L_CHOOSE_SKIP_BY - * \param[in] copyflag L_CLONE, L_COPY - * \return paa, or NULL on error - * - *
- * Notes:
- *      (1) This subdivides a pixa into a set of smaller pixa that
- *          are accumulated into a pixaa.
- *      (2) If type == L_CHOOSE_CONSECUTIVE, the first 'n' pix are
- *          put in a pixa and added to pixaa, then the next 'n', etc.
- *          If type == L_CHOOSE_SKIP_BY, the first pixa is made by
- *          aggregating pix[0], pix[n], pix[2*n], etc.
- *      (3) The copyflag specifies if each new pix is a copy or a clone.
- * 
- */ -PIXAA * -pixaaCreateFromPixa(PIXA *pixa, - l_int32 n, - l_int32 type, - l_int32 copyflag) -{ -l_int32 count, i, j, npixa; -PIX *pix; -PIXA *pixat; -PIXAA *paa; - - PROCNAME("pixaaCreateFromPixa"); - - if (!pixa) - return (PIXAA *)ERROR_PTR("pixa not defined", procName, NULL); - count = pixaGetCount(pixa); - if (count == 0) - return (PIXAA *)ERROR_PTR("no pix in pixa", procName, NULL); - if (n <= 0) - return (PIXAA *)ERROR_PTR("n must be > 0", procName, NULL); - if (type != L_CHOOSE_CONSECUTIVE && type != L_CHOOSE_SKIP_BY) - return (PIXAA *)ERROR_PTR("invalid type", procName, NULL); - if (copyflag != L_CLONE && copyflag != L_COPY) - return (PIXAA *)ERROR_PTR("invalid copyflag", procName, NULL); - - if (type == L_CHOOSE_CONSECUTIVE) - npixa = (count + n - 1) / n; - else /* L_CHOOSE_SKIP_BY */ - npixa = L_MIN(n, count); - paa = pixaaCreate(npixa); - if (type == L_CHOOSE_CONSECUTIVE) { - for (i = 0; i < count; i++) { - if (i % n == 0) - pixat = pixaCreate(n); - pix = pixaGetPix(pixa, i, copyflag); - pixaAddPix(pixat, pix, L_INSERT); - if (i % n == n - 1) - pixaaAddPixa(paa, pixat, L_INSERT); - } - if (i % n != 0) - pixaaAddPixa(paa, pixat, L_INSERT); - } else { /* L_CHOOSE_SKIP_BY */ - for (i = 0; i < npixa; i++) { - pixat = pixaCreate(count / npixa + 1); - for (j = i; j < count; j += n) { - pix = pixaGetPix(pixa, j, copyflag); - pixaAddPix(pixat, pix, L_INSERT); - } - pixaaAddPixa(paa, pixat, L_INSERT); - } - } - - return paa; -} - - -/*! - * \brief pixaaDestroy() - * - * \param[in,out] ppaa use ptr address so it will be nulled - * \return void - */ -void -pixaaDestroy(PIXAA **ppaa) -{ -l_int32 i; -PIXAA *paa; - - PROCNAME("pixaaDestroy"); - - if (ppaa == NULL) { - L_WARNING("ptr address is NULL!\n", procName); - return; - } - - if ((paa = *ppaa) == NULL) - return; - - for (i = 0; i < paa->n; i++) - pixaDestroy(&paa->pixa[i]); - LEPT_FREE(paa->pixa); - boxaDestroy(&paa->boxa); - - LEPT_FREE(paa); - *ppaa = NULL; - - return; -} - - -/*---------------------------------------------------------------------* - * Pixaa addition * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaaAddPixa() - * - * \param[in] paa - * \param[in] pixa to be added - * \param[in] copyflag: - * L_INSERT inserts the pixa directly; - * L_COPY makes a new pixa and copies each pix and each box; - * L_CLONE gives a new handle to the input pixa; - * L_COPY_CLONE makes a new pixa and inserts clones of - * all pix and boxes - * \return 0 if OK; 1 on error - */ -l_ok -pixaaAddPixa(PIXAA *paa, - PIXA *pixa, - l_int32 copyflag) -{ -l_int32 n; -PIXA *pixac; - - PROCNAME("pixaaAddPixa"); - - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (copyflag != L_INSERT && copyflag != L_COPY && - copyflag != L_CLONE && copyflag != L_COPY_CLONE) - return ERROR_INT("invalid copyflag", procName, 1); - - if (copyflag == L_INSERT) { - pixac = pixa; - } else { - if ((pixac = pixaCopy(pixa, copyflag)) == NULL) - return ERROR_INT("pixac not made", procName, 1); - } - - n = pixaaGetCount(paa, NULL); - if (n >= paa->nalloc) - pixaaExtendArray(paa); - paa->pixa[n] = pixac; - paa->n++; - - return 0; -} - - -/*! - * \brief pixaaExtendArray() - * - * \param[in] paa - * \return 0 if OK; 1 on error - */ -l_ok -pixaaExtendArray(PIXAA *paa) -{ - PROCNAME("pixaaExtendArray"); - - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - - if ((paa->pixa = (PIXA **)reallocNew((void **)&paa->pixa, - sizeof(PIXA *) * paa->nalloc, - 2 * sizeof(PIXA *) * paa->nalloc)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - - paa->nalloc = 2 * paa->nalloc; - return 0; -} - - -/*! - * \brief pixaaAddPix() - * - * \param[in] paa input paa - * \param[in] index index of pixa in paa - * \param[in] pix to be added - * \param[in] box [optional] to be added - * \param[in] copyflag L_INSERT, L_COPY, L_CLONE - * \return 0 if OK; 1 on error - */ -l_ok -pixaaAddPix(PIXAA *paa, - l_int32 index, - PIX *pix, - BOX *box, - l_int32 copyflag) -{ -PIXA *pixa; - - PROCNAME("pixaaAddPix"); - - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - if ((pixa = pixaaGetPixa(paa, index, L_CLONE)) == NULL) - return ERROR_INT("pixa not found", procName, 1); - pixaAddPix(pixa, pix, copyflag); - if (box) pixaAddBox(pixa, box, copyflag); - pixaDestroy(&pixa); - return 0; -} - - -/*! - * \brief pixaaAddBox() - * - * \param[in] paa - * \param[in] box - * \param[in] copyflag L_INSERT, L_COPY, L_CLONE - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The box can be used, for example, to hold the support region
- *          of a pixa that is being added to the pixaa.
- * 
- */ -l_ok -pixaaAddBox(PIXAA *paa, - BOX *box, - l_int32 copyflag) -{ - PROCNAME("pixaaAddBox"); - - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (copyflag != L_INSERT && copyflag != L_COPY && copyflag != L_CLONE) - return ERROR_INT("invalid copyflag", procName, 1); - - boxaAddBox(paa->boxa, box, copyflag); - return 0; -} - - - -/*---------------------------------------------------------------------* - * Pixaa accessors * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaaGetCount() - * - * \param[in] paa - * \param[out] pna [optional] number of pix in each pixa - * \return count, or 0 if no pixaa - * - *
- * Notes:
- *      (1) If paa is empty, a returned na will also be empty.
- * 
- */ -l_int32 -pixaaGetCount(PIXAA *paa, - NUMA **pna) -{ -l_int32 i, n; -NUMA *na; -PIXA *pixa; - - PROCNAME("pixaaGetCount"); - - if (pna) *pna = NULL; - if (!paa) - return ERROR_INT("paa not defined", procName, 0); - - n = paa->n; - if (pna) { - if ((na = numaCreate(n)) == NULL) - return ERROR_INT("na not made", procName, 0); - *pna = na; - for (i = 0; i < n; i++) { - pixa = pixaaGetPixa(paa, i, L_CLONE); - numaAddNumber(na, pixaGetCount(pixa)); - pixaDestroy(&pixa); - } - } - return n; -} - - -/*! - * \brief pixaaGetPixa() - * - * \param[in] paa - * \param[in] index to the index-th pixa - * \param[in] accesstype L_COPY, L_CLONE, L_COPY_CLONE - * \return pixa, or NULL on error - * - *
- * Notes:
- *      (1) L_COPY makes a new pixa with a copy of every pix
- *      (2) L_CLONE just makes a new reference to the pixa,
- *          and bumps the counter.  You would use this, for example,
- *          when you need to extract some data from a pix within a
- *          pixa within a pixaa.
- *      (3) L_COPY_CLONE makes a new pixa with a clone of every pix
- *          and box
- *      (4) In all cases, you must invoke pixaDestroy() on the returned pixa
- * 
- */ -PIXA * -pixaaGetPixa(PIXAA *paa, - l_int32 index, - l_int32 accesstype) -{ -PIXA *pixa; - - PROCNAME("pixaaGetPixa"); - - if (!paa) - return (PIXA *)ERROR_PTR("paa not defined", procName, NULL); - if (index < 0 || index >= paa->n) - return (PIXA *)ERROR_PTR("index not valid", procName, NULL); - if (accesstype != L_COPY && accesstype != L_CLONE && - accesstype != L_COPY_CLONE) - return (PIXA *)ERROR_PTR("invalid accesstype", procName, NULL); - - if ((pixa = paa->pixa[index]) == NULL) { /* shouldn't happen! */ - L_ERROR("missing pixa[%d]\n", procName, index); - return (PIXA *)ERROR_PTR("pixa not found at index", procName, NULL); - } - return pixaCopy(pixa, accesstype); -} - - -/*! - * \brief pixaaGetBoxa() - * - * \param[in] paa - * \param[in] accesstype L_COPY, L_CLONE - * \return boxa, or NULL on error - * - *
- * Notes:
- *      (1) L_COPY returns a copy; L_CLONE returns a new reference to the boxa.
- *      (2) In both cases, invoke boxaDestroy() on the returned boxa.
- * 
- */ -BOXA * -pixaaGetBoxa(PIXAA *paa, - l_int32 accesstype) -{ - PROCNAME("pixaaGetBoxa"); - - if (!paa) - return (BOXA *)ERROR_PTR("paa not defined", procName, NULL); - if (accesstype != L_COPY && accesstype != L_CLONE) - return (BOXA *)ERROR_PTR("invalid access type", procName, NULL); - - return boxaCopy(paa->boxa, accesstype); -} - - -/*! - * \brief pixaaGetPix() - * - * \param[in] paa - * \param[in] index index into the pixa array in the pixaa - * \param[in] ipix index into the pix array in the pixa - * \param[in] accessflag L_COPY or L_CLONE - * \return pix, or NULL on error - */ -PIX * -pixaaGetPix(PIXAA *paa, - l_int32 index, - l_int32 ipix, - l_int32 accessflag) -{ -PIX *pix; -PIXA *pixa; - - PROCNAME("pixaaGetPix"); - - if ((pixa = pixaaGetPixa(paa, index, L_CLONE)) == NULL) - return (PIX *)ERROR_PTR("pixa not retrieved", procName, NULL); - if ((pix = pixaGetPix(pixa, ipix, accessflag)) == NULL) - L_ERROR("pix not retrieved\n", procName); - pixaDestroy(&pixa); - return pix; -} - - -/*! - * \brief pixaaVerifyDepth() - * - * \param[in] paa - * \param[out] psame 1 if all pix have the same depth; 0 otherwise - * \param[out] pmaxd [optional] max depth of all pix in pixaa - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) It is considered to be an error if any pixa have no pix.
- * 
- */ -l_ok -pixaaVerifyDepth(PIXAA *paa, - l_int32 *psame, - l_int32 *pmaxd) -{ -l_int32 i, n, d, maxd, same, samed; -PIXA *pixa; - - PROCNAME("pixaaVerifyDepth"); - - if (pmaxd) *pmaxd = 0; - if (!psame) - return ERROR_INT("psame not defined", procName, 1); - *psame = 0; - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - if ((n = pixaaGetCount(paa, NULL)) == 0) - return ERROR_INT("no pixa in paa", procName, 1); - - pixa = pixaaGetPixa(paa, 0, L_CLONE); - pixaVerifyDepth(pixa, &same, &maxd); /* init same, maxd with first pixa */ - pixaDestroy(&pixa); - for (i = 1; i < n; i++) { - pixa = pixaaGetPixa(paa, i, L_CLONE); - pixaVerifyDepth(pixa, &samed, &d); - pixaDestroy(&pixa); - maxd = L_MAX(maxd, d); - if (!samed || maxd != d) - same = 0; - } - *psame = same; - if (pmaxd) *pmaxd = maxd; - return 0; -} - - -/*! - * \brief pixaaVerifyDimensions() - * - * \param[in] paa - * \param[out] psame 1 if all pix have the same depth; 0 otherwise - * \param[out] pmaxw [optional] max width of all pix in pixaa - * \param[out] pmaxh [optional] max height of all pix in pixaa - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) It is considered to be an error if any pixa have no pix.
- * 
- */ -l_ok -pixaaVerifyDimensions(PIXAA *paa, - l_int32 *psame, - l_int32 *pmaxw, - l_int32 *pmaxh) -{ -l_int32 i, n, w, h, maxw, maxh, same, same2; -PIXA *pixa; - - PROCNAME("pixaaVerifyDimensions"); - - if (pmaxw) *pmaxw = 0; - if (pmaxh) *pmaxh = 0; - if (!psame) - return ERROR_INT("psame not defined", procName, 1); - *psame = 0; - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - if ((n = pixaaGetCount(paa, NULL)) == 0) - return ERROR_INT("no pixa in paa", procName, 1); - - /* Init same; init maxw and maxh from first pixa */ - pixa = pixaaGetPixa(paa, 0, L_CLONE); - pixaVerifyDimensions(pixa, &same, &maxw, &maxh); - pixaDestroy(&pixa); - - for (i = 1; i < n; i++) { - pixa = pixaaGetPixa(paa, i, L_CLONE); - pixaVerifyDimensions(pixa, &same2, &w, &h); - pixaDestroy(&pixa); - maxw = L_MAX(maxw, w); - maxh = L_MAX(maxh, h); - if (!same2 || maxw != w || maxh != h) - same = 0; - } - *psame = same; - if (pmaxw) *pmaxw = maxw; - if (pmaxh) *pmaxh = maxh; - return 0; -} - - -/*! - * \brief pixaaIsFull() - * - * \param[in] paa - * \param[out] pfull 1 if all pixa in the paa have full pix arrays - * \return return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Does not require boxa associated with each pixa to be full.
- * 
- */ -l_int32 -pixaaIsFull(PIXAA *paa, - l_int32 *pfull) -{ -l_int32 i, n, full; -PIXA *pixa; - - PROCNAME("pixaaIsFull"); - - if (!pfull) - return ERROR_INT("&full not defined", procName, 0); - *pfull = 0; - if (!paa) - return ERROR_INT("paa not defined", procName, 0); - - n = pixaaGetCount(paa, NULL); - full = 1; - for (i = 0; i < n; i++) { - pixa = pixaaGetPixa(paa, i, L_CLONE); - pixaIsFull(pixa, &full, NULL); - pixaDestroy(&pixa); - if (!full) break; - } - *pfull = full; - return 0; -} - - -/*---------------------------------------------------------------------* - * Pixaa array modifiers * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaaInitFull() - * - * \param[in] paa typically empty - * \param[in] pixa to be replicated into the entire pixa ptr array - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This initializes a pixaa by filling up the entire pixa ptr array
- *          with copies of %pixa.  Any existing pixa are destroyed.
- *      (2) Example usage.  This function is useful to prepare for a
- *          random insertion (or replacement) of pixa into a pixaa.
- *          To randomly insert pixa into a pixaa, up to some index "max":
- *             Pixaa *paa = pixaaCreate(max);
- *             Pixa *pixa = pixaCreate(1);  // if you want little memory
- *             pixaaInitFull(paa, pixa);  // copy it to entire array
- *             pixaDestroy(&pixa);  // no longer needed
- *          The initialization allows the pixaa to always be properly filled.
- * 
- */ -l_ok -pixaaInitFull(PIXAA *paa, - PIXA *pixa) -{ -l_int32 i, n; -PIXA *pixat; - - PROCNAME("pixaaInitFull"); - - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - n = paa->nalloc; - paa->n = n; - for (i = 0; i < n; i++) { - pixat = pixaCopy(pixa, L_COPY); - pixaaReplacePixa(paa, i, pixat); - } - - return 0; -} - - -/*! - * \brief pixaaReplacePixa() - * - * \param[in] paa - * \param[in] index to the index-th pixa - * \param[in] pixa insert to replace existing one - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This allows random insertion of a pixa into a pixaa, with
- *          destruction of any existing pixa at that location.
- *          The input pixa is now owned by the pixaa.
- *      (2) No other pixa in the array are affected.
- *      (3) The index must be within the allowed set.
- * 
- */ -l_ok -pixaaReplacePixa(PIXAA *paa, - l_int32 index, - PIXA *pixa) -{ - - PROCNAME("pixaaReplacePixa"); - - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - if (index < 0 || index >= paa->n) - return ERROR_INT("index not valid", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - pixaDestroy(&(paa->pixa[index])); - paa->pixa[index] = pixa; - return 0; -} - - -/*! - * \brief pixaaClear() - * - * \param[in] paa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This destroys all pixa in the pixaa, and nulls the ptrs
- *          in the pixa ptr array.
- * 
- */ -l_ok -pixaaClear(PIXAA *paa) -{ -l_int32 i, n; - - PROCNAME("pixaClear"); - - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - - n = pixaaGetCount(paa, NULL); - for (i = 0; i < n; i++) - pixaDestroy(&paa->pixa[i]); - paa->n = 0; - return 0; -} - - -/*! - * \brief pixaaTruncate() - * - * \param[in] paa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This identifies the largest index containing a pixa that
- *          has any pix within it, destroys all pixa above that index,
- *          and resets the count.
- * 
- */ -l_ok -pixaaTruncate(PIXAA *paa) -{ -l_int32 i, n, np; -PIXA *pixa; - - PROCNAME("pixaaTruncate"); - - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - - n = pixaaGetCount(paa, NULL); - for (i = n - 1; i >= 0; i--) { - pixa = pixaaGetPixa(paa, i, L_CLONE); - if (!pixa) { - paa->n--; - continue; - } - np = pixaGetCount(pixa); - pixaDestroy(&pixa); - if (np == 0) { - pixaDestroy(&paa->pixa[i]); - paa->n--; - } else { - break; - } - } - return 0; -} - - - -/*---------------------------------------------------------------------* - * Pixa serialized I/O * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaRead() - * - * \param[in] filename - * \return pixa, or NULL on error - * - *
- * Notes:
- *      (1) The pix are stored in the file as png.
- *          If the png library is not linked, this will fail.
- * 
- */ -PIXA * -pixaRead(const char *filename) -{ -FILE *fp; -PIXA *pixa; - - PROCNAME("pixaRead"); - -#if !HAVE_LIBPNG /* defined in environ.h and config_auto.h */ - return (PIXA *)ERROR_PTR("no libpng: can't read data", procName, NULL); -#endif /* !HAVE_LIBPNG */ - - if (!filename) - return (PIXA *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (PIXA *)ERROR_PTR("stream not opened", procName, NULL); - pixa = pixaReadStream(fp); - fclose(fp); - if (!pixa) - return (PIXA *)ERROR_PTR("pixa not read", procName, NULL); - return pixa; -} - - -/*! - * \brief pixaReadStream() - * - * \param[in] fp file stream - * \return pixa, or NULL on error - * - *
- * Notes:
- *      (1) The pix are stored in the file as png.
- *          If the png library is not linked, this will fail.
- * 
- */ -PIXA * -pixaReadStream(FILE *fp) -{ -l_int32 n, i, xres, yres, version; -l_int32 ignore; -BOXA *boxa; -PIX *pix; -PIXA *pixa; - - PROCNAME("pixaReadStream"); - -#if !HAVE_LIBPNG /* defined in environ.h and config_auto.h */ - return (PIXA *)ERROR_PTR("no libpng: can't read data", procName, NULL); -#endif /* !HAVE_LIBPNG */ - - if (!fp) - return (PIXA *)ERROR_PTR("stream not defined", procName, NULL); - - if (fscanf(fp, "\nPixa Version %d\n", &version) != 1) - return (PIXA *)ERROR_PTR("not a pixa file", procName, NULL); - if (version != PIXA_VERSION_NUMBER) - return (PIXA *)ERROR_PTR("invalid pixa version", procName, NULL); - if (fscanf(fp, "Number of pix = %d\n", &n) != 1) - return (PIXA *)ERROR_PTR("not a pixa file", procName, NULL); - - if ((boxa = boxaReadStream(fp)) == NULL) - return (PIXA *)ERROR_PTR("boxa not made", procName, NULL); - if ((pixa = pixaCreate(n)) == NULL) { - boxaDestroy(&boxa); - return (PIXA *)ERROR_PTR("pixa not made", procName, NULL); - } - boxaDestroy(&pixa->boxa); - pixa->boxa = boxa; - - for (i = 0; i < n; i++) { - if ((fscanf(fp, " pix[%d]: xres = %d, yres = %d\n", - &ignore, &xres, &yres)) != 3) { - pixaDestroy(&pixa); - return (PIXA *)ERROR_PTR("res reading error", procName, NULL); - } - if ((pix = pixReadStreamPng(fp)) == NULL) { - pixaDestroy(&pixa); - return (PIXA *)ERROR_PTR("pix not read", procName, NULL); - } - pixSetXRes(pix, xres); - pixSetYRes(pix, yres); - pixaAddPix(pixa, pix, L_INSERT); - } - return pixa; -} - - -/*! - * \brief pixaReadMem() - * - * \param[in] data of serialized pixa - * \param[in] size of data in bytes - * \return pixa, or NULL on error - */ -PIXA * -pixaReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -PIXA *pixa; - - PROCNAME("pixaReadMem"); - - if (!data) - return (PIXA *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (PIXA *)ERROR_PTR("stream not opened", procName, NULL); - - pixa = pixaReadStream(fp); - fclose(fp); - if (!pixa) L_ERROR("pixa not read\n", procName); - return pixa; -} - - -/*! - * \brief pixaWriteDebug() - * - * \param[in] fname - * \param[in] pixa - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Debug version, intended for use in the library when writing
- *          to files in a temp directory with names that are compiled in.
- *          This is used instead of pixaWrite() for all such library calls.
- *      (2) The global variable LeptDebugOK defaults to 0, and can be set
- *          or cleared by the function setLeptDebugOK().
- * 
- */ -l_ok -pixaWriteDebug(const char *fname, - PIXA *pixa) -{ - PROCNAME("pixaWriteDebug"); - - if (LeptDebugOK) { - return pixaWrite(fname, pixa); - } else { - L_INFO("write to named temp file %s is disabled\n", procName, fname); - return 0; - } -} - - -/*! - * \brief pixaWrite() - * - * \param[in] filename - * \param[in] pixa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The pix are stored in the file as png.
- *          If the png library is not linked, this will fail.
- * 
- */ -l_ok -pixaWrite(const char *filename, - PIXA *pixa) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("pixaWrite"); - -#if !HAVE_LIBPNG /* defined in environ.h and config_auto.h */ - return ERROR_INT("no libpng: can't write data", procName, 1); -#endif /* !HAVE_LIBPNG */ - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "wb")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = pixaWriteStream(fp, pixa); - fclose(fp); - if (ret) - return ERROR_INT("pixa not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief pixaWriteStream() - * - * \param[in] fp file stream opened for "wb" - * \param[in] pixa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The pix are stored in the file as png.
- *          If the png library is not linked, this will fail.
- * 
- */ -l_ok -pixaWriteStream(FILE *fp, - PIXA *pixa) -{ -l_int32 n, i; -PIX *pix; - - PROCNAME("pixaWriteStream"); - -#if !HAVE_LIBPNG /* defined in environ.h and config_auto.h */ - return ERROR_INT("no libpng: can't write data", procName, 1); -#endif /* !HAVE_LIBPNG */ - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - n = pixaGetCount(pixa); - fprintf(fp, "\nPixa Version %d\n", PIXA_VERSION_NUMBER); - fprintf(fp, "Number of pix = %d\n", n); - boxaWriteStream(fp, pixa->boxa); - for (i = 0; i < n; i++) { - if ((pix = pixaGetPix(pixa, i, L_CLONE)) == NULL) - return ERROR_INT("pix not found", procName, 1); - fprintf(fp, " pix[%d]: xres = %d, yres = %d\n", - i, pix->xres, pix->yres); - pixWriteStreamPng(fp, pix, 0.0); - pixDestroy(&pix); - } - return 0; -} - - -/*! - * \brief pixaWriteMem() - * - * \param[out] pdata data of serialized pixa - * \param[out] psize size of returned data - * \param[in] pixa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes a pixa in memory and puts the result in a buffer.
- * 
- */ -l_ok -pixaWriteMem(l_uint8 **pdata, - size_t *psize, - PIXA *pixa) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("pixaWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = pixaWriteStream(fp, pixa); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = pixaWriteStream(fp, pixa); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - -/*! - * \brief pixaReadBoth() - * - * \param[in] filename - * \return pixa, or NULL on error - * - *
- * Notes:
- *      (1) This reads serialized files of either a pixa or a pixacomp,
- *          and returns a pixa in memory.  It requires png and jpeg libraries.
- * 
- */ -PIXA * -pixaReadBoth(const char *filename) -{ -char buf[32]; -char *sname; -PIXA *pixa; -PIXAC *pac; - - PROCNAME("pixaReadBoth"); - - if (!filename) - return (PIXA *)ERROR_PTR("filename not defined", procName, NULL); - - l_getStructStrFromFile(filename, L_STR_NAME, &sname); - if (!sname) - return (PIXA *)ERROR_PTR("struct name not found", procName, NULL); - snprintf(buf, sizeof(buf), "%s", sname); - LEPT_FREE(sname); - - if (strcmp(buf, "Pixacomp") == 0) { - if ((pac = pixacompRead(filename)) == NULL) - return (PIXA *)ERROR_PTR("pac not made", procName, NULL); - pixa = pixaCreateFromPixacomp(pac, L_COPY); - pixacompDestroy(&pac); - } else if (strcmp(buf, "Pixa") == 0) { - if ((pixa = pixaRead(filename)) == NULL) - return (PIXA *)ERROR_PTR("pixa not made", procName, NULL); - } else { - return (PIXA *)ERROR_PTR("invalid file type", procName, NULL); - } - return pixa; -} - - -/*---------------------------------------------------------------------* - * Pixaa serialized I/O * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaaReadFromFiles() - * - * \param[in] dirname directory - * \param[in] substr [optional] substring filter on filenames; can be NULL - * \param[in] first 0-based - * \param[in] nfiles use 0 for everything from %first to the end - * \return paa, or NULL on error or if no pixa files are found. - * - *
- * Notes:
- *      (1) The files must be serialized pixa files (e.g., *.pa)
- *          If some files cannot be read, warnings are issued.
- *      (2) Use %substr to filter filenames in the directory.  If
- *          %substr == NULL, this takes all files.
- *      (3) After filtering, use %first and %nfiles to select
- *          a contiguous set of files, that have been lexically
- *          sorted in increasing order.
- * 
- */ -PIXAA * -pixaaReadFromFiles(const char *dirname, - const char *substr, - l_int32 first, - l_int32 nfiles) -{ -char *fname; -l_int32 i, n; -PIXA *pixa; -PIXAA *paa; -SARRAY *sa; - - PROCNAME("pixaaReadFromFiles"); - - if (!dirname) - return (PIXAA *)ERROR_PTR("dirname not defined", procName, NULL); - - sa = getSortedPathnamesInDirectory(dirname, substr, first, nfiles); - if (!sa || ((n = sarrayGetCount(sa)) == 0)) { - sarrayDestroy(&sa); - return (PIXAA *)ERROR_PTR("no pixa files found", procName, NULL); - } - - paa = pixaaCreate(n); - for (i = 0; i < n; i++) { - fname = sarrayGetString(sa, i, L_NOCOPY); - if ((pixa = pixaRead(fname)) == NULL) { - L_ERROR("pixa not read for %d-th file", procName, i); - continue; - } - pixaaAddPixa(paa, pixa, L_INSERT); - } - - sarrayDestroy(&sa); - return paa; -} - - -/*! - * \brief pixaaRead() - * - * \param[in] filename - * \return paa, or NULL on error - * - *
- * Notes:
- *      (1) The pix are stored in the file as png.
- *          If the png library is not linked, this will fail.
- * 
- */ -PIXAA * -pixaaRead(const char *filename) -{ -FILE *fp; -PIXAA *paa; - - PROCNAME("pixaaRead"); - -#if !HAVE_LIBPNG /* defined in environ.h and config_auto.h */ - return (PIXAA *)ERROR_PTR("no libpng: can't read data", procName, NULL); -#endif /* !HAVE_LIBPNG */ - - if (!filename) - return (PIXAA *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (PIXAA *)ERROR_PTR("stream not opened", procName, NULL); - paa = pixaaReadStream(fp); - fclose(fp); - if (!paa) - return (PIXAA *)ERROR_PTR("paa not read", procName, NULL); - return paa; -} - - -/*! - * \brief pixaaReadStream() - * - * \param[in] fp file stream - * \return paa, or NULL on error - * - *
- * Notes:
- *      (1) The pix are stored in the file as png.
- *          If the png library is not linked, this will fail.
- * 
- */ -PIXAA * -pixaaReadStream(FILE *fp) -{ -l_int32 n, i, version; -l_int32 ignore; -BOXA *boxa; -PIXA *pixa; -PIXAA *paa; - - PROCNAME("pixaaReadStream"); - -#if !HAVE_LIBPNG /* defined in environ.h and config_auto.h */ - return (PIXAA *)ERROR_PTR("no libpng: can't read data", procName, NULL); -#endif /* !HAVE_LIBPNG */ - - if (!fp) - return (PIXAA *)ERROR_PTR("stream not defined", procName, NULL); - - if (fscanf(fp, "\nPixaa Version %d\n", &version) != 1) - return (PIXAA *)ERROR_PTR("not a pixaa file", procName, NULL); - if (version != PIXAA_VERSION_NUMBER) - return (PIXAA *)ERROR_PTR("invalid pixaa version", procName, NULL); - if (fscanf(fp, "Number of pixa = %d\n", &n) != 1) - return (PIXAA *)ERROR_PTR("not a pixaa file", procName, NULL); - - if ((paa = pixaaCreate(n)) == NULL) - return (PIXAA *)ERROR_PTR("paa not made", procName, NULL); - if ((boxa = boxaReadStream(fp)) == NULL) { - pixaaDestroy(&paa); - return (PIXAA *)ERROR_PTR("boxa not made", procName, NULL); - } - boxaDestroy(&paa->boxa); - paa->boxa = boxa; - - for (i = 0; i < n; i++) { - if ((fscanf(fp, "\n\n --------------- pixa[%d] ---------------\n", - &ignore)) != 1) { - pixaaDestroy(&paa); - return (PIXAA *)ERROR_PTR("text reading", procName, NULL); - } - if ((pixa = pixaReadStream(fp)) == NULL) { - pixaaDestroy(&paa); - return (PIXAA *)ERROR_PTR("pixa not read", procName, NULL); - } - pixaaAddPixa(paa, pixa, L_INSERT); - } - - return paa; -} - - -/*! - * \brief pixaaReadMem() - * - * \param[in] data of serialized pixaa - * \param[in] size of data in bytes - * \return paa, or NULL on error - */ -PIXAA * -pixaaReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -PIXAA *paa; - - PROCNAME("paaReadMem"); - - if (!data) - return (PIXAA *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (PIXAA *)ERROR_PTR("stream not opened", procName, NULL); - - paa = pixaaReadStream(fp); - fclose(fp); - if (!paa) L_ERROR("paa not read\n", procName); - return paa; -} - - -/*! - * \brief pixaaWrite() - * - * \param[in] filename - * \param[in] paa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The pix are stored in the file as png.
- *          If the png library is not linked, this will fail.
- * 
- */ -l_ok -pixaaWrite(const char *filename, - PIXAA *paa) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("pixaaWrite"); - -#if !HAVE_LIBPNG /* defined in environ.h and config_auto.h */ - return ERROR_INT("no libpng: can't read data", procName, 1); -#endif /* !HAVE_LIBPNG */ - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "wb")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = pixaaWriteStream(fp, paa); - fclose(fp); - if (ret) - return ERROR_INT("paa not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief pixaaWriteStream() - * - * \param[in] fp file stream opened for "wb" - * \param[in] paa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The pix are stored in the file as png.
- *          If the png library is not linked, this will fail.
- * 
- */ -l_ok -pixaaWriteStream(FILE *fp, - PIXAA *paa) -{ -l_int32 n, i; -PIXA *pixa; - - PROCNAME("pixaaWriteStream"); - -#if !HAVE_LIBPNG /* defined in environ.h and config_auto.h */ - return ERROR_INT("no libpng: can't read data", procName, 1); -#endif /* !HAVE_LIBPNG */ - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - - n = pixaaGetCount(paa, NULL); - fprintf(fp, "\nPixaa Version %d\n", PIXAA_VERSION_NUMBER); - fprintf(fp, "Number of pixa = %d\n", n); - boxaWriteStream(fp, paa->boxa); - for (i = 0; i < n; i++) { - if ((pixa = pixaaGetPixa(paa, i, L_CLONE)) == NULL) - return ERROR_INT("pixa not found", procName, 1); - fprintf(fp, "\n\n --------------- pixa[%d] ---------------\n", i); - pixaWriteStream(fp, pixa); - pixaDestroy(&pixa); - } - return 0; -} - - -/*! - * \brief pixaaWriteMem() - * - * \param[out] pdata data of serialized pixaa - * \param[out] psize size of returned data - * \param[in] paa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes a pixaa in memory and puts the result in a buffer.
- * 
- */ -l_ok -pixaaWriteMem(l_uint8 **pdata, - size_t *psize, - PIXAA *paa) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("pixaaWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = pixaaWriteStream(fp, paa); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = pixaaWriteStream(fp, paa); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixacc.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixacc.c deleted file mode 100644 index fbf54ba8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixacc.c +++ /dev/null @@ -1,356 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pixacc.c - *
- *
- *      Pixacc creation, destruction
- *           PIXACC   *pixaccCreate()
- *           PIXACC   *pixaccCreateFromPix()
- *           void      pixaccDestroy()
- *
- *      Pixacc finalization
- *           PIX      *pixaccFinal()
- *
- *      Pixacc accessors
- *           PIX      *pixaccGetPix()
- *           l_int32   pixaccGetOffset()
- *
- *      Pixacc accumulators
- *           l_int32   pixaccAdd()
- *           l_int32   pixaccSubtract()
- *           l_int32   pixaccMultConst()
- *           l_int32   pixaccMultConstAccumulate()
- *
- *  This is a simple interface for some of the pixel arithmetic operations
- *  in pixarith.c.  These are easy to code up, but not as fast as
- *  hand-coded functions that do arithmetic on corresponding pixels.
- *
- *  Suppose you want to make a linear combination of pix1 and pix2:
- *     pixd = 0.4 * pix1 + 0.6 * pix2
- *  where pix1 and pix2 are the same size and have depth 'd'.  Then:
- *     Pixacc *pacc = pixaccCreateFromPix(pix1, 0);  // first; addition only
- *     pixaccMultConst(pacc, 0.4);
- *     pixaccMultConstAccumulate(pacc, pix2, 0.6);  // Add in 0.6 of the second
- *     pixd = pixaccFinal(pacc, d);  // Get the result
- *     pixaccDestroy(&pacc);
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/*---------------------------------------------------------------------* - * Pixacc creation, destruction * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaccCreate() - * - * \param[in] w, h of 32 bpp internal Pix - * \param[in] negflag 0 if only positive numbers are involved; - * 1 if there will be negative numbers - * \return pixacc, or NULL on error - * - *
- * Notes:
- *      (1) Use %negflag = 1 for safety if any negative numbers are going
- *          to be used in the chain of operations.  Negative numbers
- *          arise, e.g., by subtracting a pix, or by adding a pix
- *          that has been pre-multiplied by a negative number.
- *      (2) Initializes the internal 32 bpp pix, similarly to the
- *          initialization in pixInitAccumulate().
- * 
- */ -PIXACC * -pixaccCreate(l_int32 w, - l_int32 h, - l_int32 negflag) -{ -PIXACC *pixacc; - - PROCNAME("pixaccCreate"); - - if ((pixacc = (PIXACC *)LEPT_CALLOC(1, sizeof(PIXACC))) == NULL) - return (PIXACC *)ERROR_PTR("pixacc not made", procName, NULL); - pixacc->w = w; - pixacc->h = h; - - if ((pixacc->pix = pixCreate(w, h, 32)) == NULL) { - pixaccDestroy(&pixacc); - return (PIXACC *)ERROR_PTR("pix not made", procName, NULL); - } - - if (negflag) { - pixacc->offset = 0x40000000; - pixSetAllArbitrary(pixacc->pix, pixacc->offset); - } - - return pixacc; -} - - -/*! - * \brief pixaccCreateFromPix() - * - * \param[in] pix - * \param[in] negflag 0 if only positive numbers are involved; - * 1 if there will be negative numbers - * \return pixacc, or NULL on error - * - *
- * Notes:
- *      (1) See pixaccCreate()
- * 
- */ -PIXACC * -pixaccCreateFromPix(PIX *pix, - l_int32 negflag) -{ -l_int32 w, h; -PIXACC *pixacc; - - PROCNAME("pixaccCreateFromPix"); - - if (!pix) - return (PIXACC *)ERROR_PTR("pix not defined", procName, NULL); - - pixGetDimensions(pix, &w, &h, NULL); - pixacc = pixaccCreate(w, h, negflag); - pixaccAdd(pixacc, pix); - return pixacc; -} - - -/*! - * \brief pixaccDestroy() - * - * \param[in,out] ppixacc will be set to null before returning - * \return void - * - *
- * Notes:
- *      (1) Always nulls the input ptr.
- * 
- */ -void -pixaccDestroy(PIXACC **ppixacc) -{ -PIXACC *pixacc; - - PROCNAME("pixaccDestroy"); - - if (ppixacc == NULL) { - L_WARNING("ptr address is NULL!", procName); - return; - } - - if ((pixacc = *ppixacc) == NULL) - return; - - pixDestroy(&pixacc->pix); - LEPT_FREE(pixacc); - *ppixacc = NULL; - return; -} - - -/*---------------------------------------------------------------------* - * Pixacc finalization * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaccFinal() - * - * \param[in] pixacc - * \param[in] outdepth 8, 16 or 32 bpp - * \return pixd 8, 16 or 32 bpp, or NULL on error - */ -PIX * -pixaccFinal(PIXACC *pixacc, - l_int32 outdepth) -{ - PROCNAME("pixaccFinal"); - - if (!pixacc) - return (PIX *)ERROR_PTR("pixacc not defined", procName, NULL); - - return pixFinalAccumulate(pixaccGetPix(pixacc), pixaccGetOffset(pixacc), - outdepth); -} - - -/*---------------------------------------------------------------------* - * Pixacc accessors * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaccGetPix() - * - * \param[in] pixacc - * \return pix, or NULL on error - */ -PIX * -pixaccGetPix(PIXACC *pixacc) -{ - PROCNAME("pixaccGetPix"); - - if (!pixacc) - return (PIX *)ERROR_PTR("pixacc not defined", procName, NULL); - return pixacc->pix; -} - - -/*! - * \brief pixaccGetOffset() - * - * \param[in] pixacc - * \return offset, or -1 on error - */ -l_int32 -pixaccGetOffset(PIXACC *pixacc) -{ - PROCNAME("pixaccGetOffset"); - - if (!pixacc) - return ERROR_INT("pixacc not defined", procName, -1); - return pixacc->offset; -} - - -/*---------------------------------------------------------------------* - * Pixacc accumulators * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaccAdd() - * - * \param[in] pixacc - * \param[in] pix to be added - * \return 0 if OK, 1 on error - */ -l_ok -pixaccAdd(PIXACC *pixacc, - PIX *pix) -{ - PROCNAME("pixaccAdd"); - - if (!pixacc) - return ERROR_INT("pixacc not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - pixAccumulate(pixaccGetPix(pixacc), pix, L_ARITH_ADD); - return 0; -} - - -/*! - * \brief pixaccSubtract() - * - * \param[in] pixacc - * \param[in] pix to be subtracted - * \return 0 if OK, 1 on error - */ -l_ok -pixaccSubtract(PIXACC *pixacc, - PIX *pix) -{ - PROCNAME("pixaccSubtract"); - - if (!pixacc) - return ERROR_INT("pixacc not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - pixAccumulate(pixaccGetPix(pixacc), pix, L_ARITH_SUBTRACT); - return 0; -} - - -/*! - * \brief pixaccMultConst() - * - * \param[in] pixacc - * \param[in] factor - * \return 0 if OK, 1 on error - */ -l_ok -pixaccMultConst(PIXACC *pixacc, - l_float32 factor) -{ - PROCNAME("pixaccMultConst"); - - if (!pixacc) - return ERROR_INT("pixacc not defined", procName, 1); - pixMultConstAccumulate(pixaccGetPix(pixacc), factor, - pixaccGetOffset(pixacc)); - return 0; -} - - -/*! - * \brief pixaccMultConstAccumulate() - * - * \param[in] pixacc - * \param[in] pix - * \param[in] factor - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This creates a temp pix that is %pix multiplied by the
- *          constant %factor.  It then adds that into %pixacc.
- * 
- */ -l_ok -pixaccMultConstAccumulate(PIXACC *pixacc, - PIX *pix, - l_float32 factor) -{ -l_int32 w, h, d, negflag; -PIX *pixt; -PIXACC *pacct; - - PROCNAME("pixaccMultConstAccumulate"); - - if (!pixacc) - return ERROR_INT("pixacc not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - if (factor == 0.0) return 0; - - pixGetDimensions(pix, &w, &h, &d); - negflag = (factor > 0.0) ? 0 : 1; - pacct = pixaccCreate(w, h, negflag); - pixaccAdd(pacct, pix); - pixaccMultConst(pacct, factor); - pixt = pixaccFinal(pacct, d); - pixaccAdd(pixacc, pixt); - - pixaccDestroy(&pacct); - pixDestroy(&pixt); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixafunc1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixafunc1.c deleted file mode 100644 index 7c9547be..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixafunc1.c +++ /dev/null @@ -1,2975 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pixafunc1.c - *
- *
- *      Filters
- *           PIX      *pixSelectBySize()
- *           PIXA     *pixaSelectBySize()
- *           NUMA     *pixaMakeSizeIndicator()
- *
- *           PIX      *pixSelectByPerimToAreaRatio()
- *           PIXA     *pixaSelectByPerimToAreaRatio()
- *           PIX      *pixSelectByPerimSizeRatio()
- *           PIXA     *pixaSelectByPerimSizeRatio()
- *           PIX      *pixSelectByAreaFraction()
- *           PIXA     *pixaSelectByAreaFraction()
- *           PIX      *pixSelectByWidthHeightRatio()
- *           PIXA     *pixaSelectByWidthHeightRatio()
- *           PIXA     *pixaSelectByNumConnComp()
- *
- *           PIXA     *pixaSelectWithIndicator()
- *           l_int32   pixRemoveWithIndicator()
- *           l_int32   pixAddWithIndicator()
- *           PIXA     *pixaSelectWithString()
- *           PIX      *pixaRenderComponent()
- *
- *      Sort functions
- *           PIXA     *pixaSort()
- *           PIXA     *pixaBinSort()
- *           PIXA     *pixaSortByIndex()
- *           PIXAA    *pixaSort2dByIndex()
- *
- *      Pixa and Pixaa range selection
- *           PIXA     *pixaSelectRange()
- *           PIXAA    *pixaaSelectRange()
- *
- *      Pixa and Pixaa scaling
- *           PIXAA    *pixaaScaleToSize()
- *           PIXAA    *pixaaScaleToSizeVar()
- *           PIXA     *pixaScaleToSize()
- *           PIXA     *pixaScaleToSizeRel()
- *           PIXA     *pixaScale()
- *           PIXA     *pixaScaleBySampling()
- *
- *      Pixa rotation and translation
- *           PIXA     *pixaRotate()
- *           PIXA     *pixaRotateOrth()
- *           PIXA     *pixaTranslate()
- *
- *      Miscellaneous
- *           PIXA     *pixaAddBorderGeneral()
- *           PIXA     *pixaaFlattenToPixa()
- *           l_int32   pixaaSizeRange()
- *           l_int32   pixaSizeRange()
- *           PIXA     *pixaClipToPix()
- *           PIXA     *pixaClipToForeground()
- *           l_int32   pixaGetRenderingDepth()
- *           l_int32   pixaHasColor()
- *           l_int32   pixaAnyColormaps()
- *           l_int32   pixaGetDepthInfo()
- *           PIXA     *pixaConvertToSameDepth()
- *           l_int32   pixaEqual()
- *           l_int32   pixaSetFullSizeBoxa()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* For more than this number of c.c. in a binarized image of - * semi-perimeter (w + h) about 5000 or less, the O(n) binsort - * is faster than the O(nlogn) shellsort. */ -static const l_int32 MinCompsForBinSort = 200; - - /* Don't rotate any angle smaller than this */ -static const l_float32 MinAngleToRotate = 0.001; /* radians; ~0.06 deg */ - -/*---------------------------------------------------------------------* - * Filters * - *---------------------------------------------------------------------*/ -/* - * These filters work on the connected components of 1 bpp images. - * They are typically used on pixa that have been generated from a Pix - * using pixConnComp(), so that the corresponding Boxa is available. - * - * The filters remove or retain c.c. based on these properties: - * (a) size [pixaFindDimensions()] - * (b) area-to-perimeter ratio [pixaFindAreaPerimRatio()] - * (c) foreground area as a fraction of bounding box area (w * h) - * [pixaFindForegroundArea()] - * (d) number of foreground pixels [pixaCountPixels()] - * (e) width/height aspect ratio [pixFindWidthHeightRatio()] - * - * We provide two different high-level interfaces: - * (1) Functions that use one of the filters on either - * a pix or the pixa of components. - * (2) A general method that generates numas of indicator functions, - * logically combines them, and efficiently removes or adds - * the selected components. - * - * For interface (1), the filtering is performed with a single function call. - * This is the easiest way to do simple filtering. These functions - * are named pixSelectBy*() and pixaSelectBy*(), where the '*' is one of: - * Size - * PerimToAreaRatio - * PerimSizeRatio - * AreaFraction - * WidthHeightRatio - * - * For more complicated filtering, use the general method (2). - * The numa indicator functions for a pixa are generated by these functions: - * pixaFindDimensions() - * pixaFindPerimToAreaRatio() - * pixaFindPerimSizeRatio() - * pixaFindAreaFraction() - * pixaCountPixels() - * pixaFindWidthHeightRatio() - * pixaFindWidthHeightProduct() - * - * Here is an illustration using the general method. Suppose you want - * all 8-connected components that have a height greater than 40 pixels, - * a width not more than 30 pixels, between 150 and 300 fg pixels, - * and a perimeter-to-size ratio between 1.2 and 2.0. - * - * // Generate the pixa of 8 cc pieces. - * boxa = pixConnComp(pixs, &pixa, 8); - * - * // Extract the data we need about each component. - * pixaFindDimensions(pixa, &naw, &nah); - * nas = pixaCountPixels(pixa); - * nar = pixaFindPerimSizeRatio(pixa); - * - * // Build the indicator arrays for the set of components, - * // based on thresholds and selection criteria. - * na1 = numaMakeThresholdIndicator(nah, 40, L_SELECT_IF_GT); - * na2 = numaMakeThresholdIndicator(naw, 30, L_SELECT_IF_LTE); - * na3 = numaMakeThresholdIndicator(nas, 150, L_SELECT_IF_GTE); - * na4 = numaMakeThresholdIndicator(nas, 300, L_SELECT_IF_LTE); - * na5 = numaMakeThresholdIndicator(nar, 1.2, L_SELECT_IF_GTE); - * na6 = numaMakeThresholdIndicator(nar, 2.0, L_SELECT_IF_LTE); - * - * // Combine the indicator arrays logically to find - * // the components that will be retained. - * nad = numaLogicalOp(NULL, na1, na2, L_INTERSECTION); - * numaLogicalOp(nad, nad, na3, L_INTERSECTION); - * numaLogicalOp(nad, nad, na4, L_INTERSECTION); - * numaLogicalOp(nad, nad, na5, L_INTERSECTION); - * numaLogicalOp(nad, nad, na6, L_INTERSECTION); - * - * // Invert to get the components that will be removed. - * numaInvert(nad, nad); - * - * // Remove the components, in-place. - * pixRemoveWithIndicator(pixs, pixa, nad); - */ - - -/*! - * \brief pixSelectBySize() - * - * \param[in] pixs 1 bpp - * \param[in] width, height threshold dimensions - * \param[in] connectivity 4 or 8 - * \param[in] type L_SELECT_WIDTH, L_SELECT_HEIGHT, - * L_SELECT_IF_EITHER, L_SELECT_IF_BOTH - * \param[in] relation L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \param[out] pchanged [optional] 1 if changed; 0 otherwise - * \return filtered pixd, or NULL on error - * - *
- * Notes:
- *      (1) The args specify constraints on the size of the
- *          components that are kept.
- *      (2) If unchanged, returns a copy of pixs.  Otherwise,
- *          returns a new pix with the filtered components.
- *      (3) If the selection type is L_SELECT_WIDTH, the input
- *          height is ignored, and v.v.
- *      (4) To keep small components, use relation = L_SELECT_IF_LT or
- *          L_SELECT_IF_LTE.
- *          To keep large components, use relation = L_SELECT_IF_GT or
- *          L_SELECT_IF_GTE.
- * 
- */ -PIX * -pixSelectBySize(PIX *pixs, - l_int32 width, - l_int32 height, - l_int32 connectivity, - l_int32 type, - l_int32 relation, - l_int32 *pchanged) -{ -l_int32 w, h, empty, changed, count; -BOXA *boxa; -PIX *pixd; -PIXA *pixas, *pixad; - - PROCNAME("pixSelectBySize"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - if (type != L_SELECT_WIDTH && type != L_SELECT_HEIGHT && - type != L_SELECT_IF_EITHER && type != L_SELECT_IF_BOTH) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - if (relation != L_SELECT_IF_LT && relation != L_SELECT_IF_GT && - relation != L_SELECT_IF_LTE && relation != L_SELECT_IF_GTE) - return (PIX *)ERROR_PTR("invalid relation", procName, NULL); - if (pchanged) *pchanged = FALSE; - - /* Check if any components exist */ - pixZero(pixs, &empty); - if (empty) - return pixCopy(NULL, pixs); - - /* Identify and select the components */ - boxa = pixConnComp(pixs, &pixas, connectivity); - pixad = pixaSelectBySize(pixas, width, height, type, relation, &changed); - boxaDestroy(&boxa); - pixaDestroy(&pixas); - - if (!changed) { - pixaDestroy(&pixad); - return pixCopy(NULL, pixs); - } - - /* Render the result */ - if (pchanged) *pchanged = TRUE; - pixGetDimensions(pixs, &w, &h, NULL); - count = pixaGetCount(pixad); - if (count == 0) { /* return empty pix */ - pixd = pixCreateTemplate(pixs); - } else { - pixd = pixaDisplay(pixad, w, h); - pixCopyResolution(pixd, pixs); - pixCopyColormap(pixd, pixs); - pixCopyText(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - } - pixaDestroy(&pixad); - return pixd; -} - - -/*! - * \brief pixaSelectBySize() - * - * \param[in] pixas - * \param[in] width, height threshold dimensions - * \param[in] type L_SELECT_WIDTH, L_SELECT_HEIGHT, - * L_SELECT_IF_EITHER, L_SELECT_IF_BOTH - * \param[in] relation L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \param[out] pchanged [optional] 1 if changed; 0 otherwise - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) The args specify constraints on the size of the
- *          components that are kept.
- *      (2) Uses pix and box clones in the new pixa.
- *      (3) If the selection type is L_SELECT_WIDTH, the input
- *          height is ignored, and v.v.
- *      (4) To keep small components, use relation = L_SELECT_IF_LT or
- *          L_SELECT_IF_LTE.
- *          To keep large components, use relation = L_SELECT_IF_GT or
- *          L_SELECT_IF_GTE.
- * 
- */ -PIXA * -pixaSelectBySize(PIXA *pixas, - l_int32 width, - l_int32 height, - l_int32 type, - l_int32 relation, - l_int32 *pchanged) -{ -NUMA *na; -PIXA *pixad; - - PROCNAME("pixaSelectBySize"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (type != L_SELECT_WIDTH && type != L_SELECT_HEIGHT && - type != L_SELECT_IF_EITHER && type != L_SELECT_IF_BOTH) - return (PIXA *)ERROR_PTR("invalid type", procName, NULL); - if (relation != L_SELECT_IF_LT && relation != L_SELECT_IF_GT && - relation != L_SELECT_IF_LTE && relation != L_SELECT_IF_GTE) - return (PIXA *)ERROR_PTR("invalid relation", procName, NULL); - - /* Compute the indicator array for saving components */ - na = pixaMakeSizeIndicator(pixas, width, height, type, relation); - - /* Filter to get output */ - pixad = pixaSelectWithIndicator(pixas, na, pchanged); - - numaDestroy(&na); - return pixad; -} - - -/*! - * \brief pixaMakeSizeIndicator() - * - * \param[in] pixa - * \param[in] width, height threshold dimensions - * \param[in] type L_SELECT_WIDTH, L_SELECT_HEIGHT, - * L_SELECT_IF_EITHER, L_SELECT_IF_BOTH - * \param[in] relation L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \return na indicator array, or NULL on error - * - *
- * Notes:
- *      (1) The args specify constraints on the size of the
- *          components that are kept.
- *      (2) If the selection type is L_SELECT_WIDTH, the input
- *          height is ignored, and v.v.
- *      (3) To keep small components, use relation = L_SELECT_IF_LT or
- *          L_SELECT_IF_LTE.
- *          To keep large components, use relation = L_SELECT_IF_GT or
- *          L_SELECT_IF_GTE.
- * 
- */ -NUMA * -pixaMakeSizeIndicator(PIXA *pixa, - l_int32 width, - l_int32 height, - l_int32 type, - l_int32 relation) -{ -l_int32 i, n, w, h, ival; -NUMA *na; - - PROCNAME("pixaMakeSizeIndicator"); - - if (!pixa) - return (NUMA *)ERROR_PTR("pixa not defined", procName, NULL); - if (type != L_SELECT_WIDTH && type != L_SELECT_HEIGHT && - type != L_SELECT_IF_EITHER && type != L_SELECT_IF_BOTH) - return (NUMA *)ERROR_PTR("invalid type", procName, NULL); - if (relation != L_SELECT_IF_LT && relation != L_SELECT_IF_GT && - relation != L_SELECT_IF_LTE && relation != L_SELECT_IF_GTE) - return (NUMA *)ERROR_PTR("invalid relation", procName, NULL); - - n = pixaGetCount(pixa); - na = numaCreate(n); - for (i = 0; i < n; i++) { - ival = 0; - pixaGetPixDimensions(pixa, i, &w, &h, NULL); - switch (type) - { - case L_SELECT_WIDTH: - if ((relation == L_SELECT_IF_LT && w < width) || - (relation == L_SELECT_IF_GT && w > width) || - (relation == L_SELECT_IF_LTE && w <= width) || - (relation == L_SELECT_IF_GTE && w >= width)) - ival = 1; - break; - case L_SELECT_HEIGHT: - if ((relation == L_SELECT_IF_LT && h < height) || - (relation == L_SELECT_IF_GT && h > height) || - (relation == L_SELECT_IF_LTE && h <= height) || - (relation == L_SELECT_IF_GTE && h >= height)) - ival = 1; - break; - case L_SELECT_IF_EITHER: - if (((relation == L_SELECT_IF_LT) && (w < width || h < height)) || - ((relation == L_SELECT_IF_GT) && (w > width || h > height)) || - ((relation == L_SELECT_IF_LTE) && (w <= width || h <= height)) || - ((relation == L_SELECT_IF_GTE) && (w >= width || h >= height))) - ival = 1; - break; - case L_SELECT_IF_BOTH: - if (((relation == L_SELECT_IF_LT) && (w < width && h < height)) || - ((relation == L_SELECT_IF_GT) && (w > width && h > height)) || - ((relation == L_SELECT_IF_LTE) && (w <= width && h <= height)) || - ((relation == L_SELECT_IF_GTE) && (w >= width && h >= height))) - ival = 1; - break; - default: - L_WARNING("can't get here!\n", procName); - break; - } - numaAddNumber(na, ival); - } - - return na; -} - - -/*! - * \brief pixSelectByPerimToAreaRatio() - * - * \param[in] pixs 1 bpp - * \param[in] thresh threshold ratio of fg boundary to fg pixels - * \param[in] connectivity 4 or 8 - * \param[in] type L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \param[out] pchanged [optional] 1 if changed; 0 if clone returned - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The args specify constraints on the size of the
- *          components that are kept.
- *      (2) If unchanged, returns a copy of pixs.  Otherwise,
- *          returns a new pix with the filtered components.
- *      (3) This filters "thick" components, where a thick component
- *          is defined to have a ratio of boundary to interior pixels
- *          that is smaller than a given threshold value.
- *      (4) Use L_SELECT_IF_LT or L_SELECT_IF_LTE to save the thicker
- *          components, and L_SELECT_IF_GT or L_SELECT_IF_GTE to remove them.
- * 
- */ -PIX * -pixSelectByPerimToAreaRatio(PIX *pixs, - l_float32 thresh, - l_int32 connectivity, - l_int32 type, - l_int32 *pchanged) -{ -l_int32 w, h, empty, changed, count; -BOXA *boxa; -PIX *pixd; -PIXA *pixas, *pixad; - - PROCNAME("pixSelectByPerimToAreaRatio"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - if (type != L_SELECT_IF_LT && type != L_SELECT_IF_GT && - type != L_SELECT_IF_LTE && type != L_SELECT_IF_GTE) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - if (pchanged) *pchanged = FALSE; - - /* Check if any components exist */ - pixZero(pixs, &empty); - if (empty) - return pixCopy(NULL, pixs); - - /* Filter thin components */ - boxa = pixConnComp(pixs, &pixas, connectivity); - pixad = pixaSelectByPerimToAreaRatio(pixas, thresh, type, &changed); - boxaDestroy(&boxa); - pixaDestroy(&pixas); - - if (!changed) { - pixaDestroy(&pixad); - return pixCopy(NULL, pixs); - } - - /* Render the result */ - if (pchanged) *pchanged = TRUE; - pixGetDimensions(pixs, &w, &h, NULL); - count = pixaGetCount(pixad); - if (count == 0) { /* return empty pix */ - pixd = pixCreateTemplate(pixs); - } else { - pixd = pixaDisplay(pixad, w, h); - pixCopyResolution(pixd, pixs); - pixCopyColormap(pixd, pixs); - pixCopyText(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - } - pixaDestroy(&pixad); - return pixd; -} - - -/*! - * \brief pixaSelectByPerimToAreaRatio() - * - * \param[in] pixas - * \param[in] thresh threshold ratio of fg boundary to fg pixels - * \param[in] type L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \param[out] pchanged [optional] 1 if changed; 0 if clone returned - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) Returns a pixa clone if no components are removed.
- *      (2) Uses pix and box clones in the new pixa.
- *      (3) See pixSelectByPerimToAreaRatio().
- * 
- */ -PIXA * -pixaSelectByPerimToAreaRatio(PIXA *pixas, - l_float32 thresh, - l_int32 type, - l_int32 *pchanged) -{ -NUMA *na, *nai; -PIXA *pixad; - - PROCNAME("pixaSelectByPerimToAreaRatio"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (type != L_SELECT_IF_LT && type != L_SELECT_IF_GT && - type != L_SELECT_IF_LTE && type != L_SELECT_IF_GTE) - return (PIXA *)ERROR_PTR("invalid type", procName, NULL); - - /* Compute component ratios. */ - na = pixaFindPerimToAreaRatio(pixas); - - /* Generate indicator array for elements to be saved. */ - nai = numaMakeThresholdIndicator(na, thresh, type); - numaDestroy(&na); - - /* Filter to get output */ - pixad = pixaSelectWithIndicator(pixas, nai, pchanged); - - numaDestroy(&nai); - return pixad; -} - - -/*! - * \brief pixSelectByPerimSizeRatio() - * - * \param[in] pixs 1 bpp - * \param[in] thresh threshold ratio of fg boundary to fg pixels - * \param[in] connectivity 4 or 8 - * \param[in] type L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \param[out] pchanged [optional] 1 if changed; 0 if clone returned - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The args specify constraints on the size of the
- *          components that are kept.
- *      (2) If unchanged, returns a copy of pixs.  Otherwise,
- *          returns a new pix with the filtered components.
- *      (3) This filters components with smooth vs. dendritic shape, using
- *          the ratio of the fg boundary pixels to the circumference of
- *          the bounding box, and comparing it to a threshold value.
- *      (4) Use L_SELECT_IF_LT or L_SELECT_IF_LTE to save the smooth
- *          boundary components, and L_SELECT_IF_GT or L_SELECT_IF_GTE
- *          to remove them.
- * 
- */ -PIX * -pixSelectByPerimSizeRatio(PIX *pixs, - l_float32 thresh, - l_int32 connectivity, - l_int32 type, - l_int32 *pchanged) -{ -l_int32 w, h, empty, changed, count; -BOXA *boxa; -PIX *pixd; -PIXA *pixas, *pixad; - - PROCNAME("pixSelectByPerimSizeRatio"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - if (type != L_SELECT_IF_LT && type != L_SELECT_IF_GT && - type != L_SELECT_IF_LTE && type != L_SELECT_IF_GTE) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - if (pchanged) *pchanged = FALSE; - - /* Check if any components exist */ - pixZero(pixs, &empty); - if (empty) - return pixCopy(NULL, pixs); - - /* Filter thin components */ - boxa = pixConnComp(pixs, &pixas, connectivity); - pixad = pixaSelectByPerimSizeRatio(pixas, thresh, type, &changed); - boxaDestroy(&boxa); - pixaDestroy(&pixas); - - if (!changed) { - pixaDestroy(&pixad); - return pixCopy(NULL, pixs); - } - - /* Render the result */ - if (pchanged) *pchanged = TRUE; - pixGetDimensions(pixs, &w, &h, NULL); - count = pixaGetCount(pixad); - if (count == 0) { /* return empty pix */ - pixd = pixCreateTemplate(pixs); - } else { - pixd = pixaDisplay(pixad, w, h); - pixCopyResolution(pixd, pixs); - pixCopyColormap(pixd, pixs); - pixCopyText(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - } - pixaDestroy(&pixad); - return pixd; -} - - -/*! - * \brief pixaSelectByPerimSizeRatio() - * - * \param[in] pixas - * \param[in] thresh threshold ratio of fg boundary to b.b. circumference - * \param[in] type L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \param[out] pchanged [optional] 1 if changed; 0 if clone returned - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) Returns a pixa clone if no components are removed.
- *      (2) Uses pix and box clones in the new pixa.
- *      (3) See pixSelectByPerimSizeRatio().
- * 
- */ -PIXA * -pixaSelectByPerimSizeRatio(PIXA *pixas, - l_float32 thresh, - l_int32 type, - l_int32 *pchanged) -{ -NUMA *na, *nai; -PIXA *pixad; - - PROCNAME("pixaSelectByPerimSizeRatio"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (type != L_SELECT_IF_LT && type != L_SELECT_IF_GT && - type != L_SELECT_IF_LTE && type != L_SELECT_IF_GTE) - return (PIXA *)ERROR_PTR("invalid type", procName, NULL); - - /* Compute component ratios. */ - na = pixaFindPerimSizeRatio(pixas); - - /* Generate indicator array for elements to be saved. */ - nai = numaMakeThresholdIndicator(na, thresh, type); - numaDestroy(&na); - - /* Filter to get output */ - pixad = pixaSelectWithIndicator(pixas, nai, pchanged); - - numaDestroy(&nai); - return pixad; -} - - -/*! - * \brief pixSelectByAreaFraction() - * - * \param[in] pixs 1 bpp - * \param[in] thresh threshold ratio of fg pixels to (w * h) - * \param[in] connectivity 4 or 8 - * \param[in] type L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \param[out] pchanged [optional] 1 if changed; 0 if clone returned - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The args specify constraints on the amount of foreground
- *          coverage of the components that are kept.
- *      (2) If unchanged, returns a copy of pixs.  Otherwise,
- *          returns a new pix with the filtered components.
- *      (3) This filters components based on the fraction of fg pixels
- *          of the component in its bounding box.
- *      (4) Use L_SELECT_IF_LT or L_SELECT_IF_LTE to save components
- *          with less than the threshold fraction of foreground, and
- *          L_SELECT_IF_GT or L_SELECT_IF_GTE to remove them.
- * 
- */ -PIX * -pixSelectByAreaFraction(PIX *pixs, - l_float32 thresh, - l_int32 connectivity, - l_int32 type, - l_int32 *pchanged) -{ -l_int32 w, h, empty, changed, count; -BOXA *boxa; -PIX *pixd; -PIXA *pixas, *pixad; - - PROCNAME("pixSelectByAreaFraction"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - if (type != L_SELECT_IF_LT && type != L_SELECT_IF_GT && - type != L_SELECT_IF_LTE && type != L_SELECT_IF_GTE) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - if (pchanged) *pchanged = FALSE; - - /* Check if any components exist */ - pixZero(pixs, &empty); - if (empty) - return pixCopy(NULL, pixs); - - /* Filter components */ - boxa = pixConnComp(pixs, &pixas, connectivity); - pixad = pixaSelectByAreaFraction(pixas, thresh, type, &changed); - boxaDestroy(&boxa); - pixaDestroy(&pixas); - - if (!changed) { - pixaDestroy(&pixad); - return pixCopy(NULL, pixs); - } - - /* Render the result */ - if (pchanged) *pchanged = TRUE; - pixGetDimensions(pixs, &w, &h, NULL); - count = pixaGetCount(pixad); - if (count == 0) { /* return empty pix */ - pixd = pixCreateTemplate(pixs); - } else { - pixd = pixaDisplay(pixad, w, h); - pixCopyResolution(pixd, pixs); - pixCopyColormap(pixd, pixs); - pixCopyText(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - } - pixaDestroy(&pixad); - return pixd; -} - - -/*! - * \brief pixaSelectByAreaFraction() - * - * \param[in] pixas - * \param[in] thresh threshold ratio of fg pixels to (w * h) - * \param[in] type L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \param[out] pchanged [optional] 1 if changed; 0 if clone returned - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) Returns a pixa clone if no components are removed.
- *      (2) Uses pix and box clones in the new pixa.
- *      (3) This filters components based on the fraction of fg pixels
- *          of the component in its bounding box.
- *      (4) Use L_SELECT_IF_LT or L_SELECT_IF_LTE to save components
- *          with less than the threshold fraction of foreground, and
- *          L_SELECT_IF_GT or L_SELECT_IF_GTE to remove them.
- * 
- */ -PIXA * -pixaSelectByAreaFraction(PIXA *pixas, - l_float32 thresh, - l_int32 type, - l_int32 *pchanged) -{ -NUMA *na, *nai; -PIXA *pixad; - - PROCNAME("pixaSelectByAreaFraction"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (type != L_SELECT_IF_LT && type != L_SELECT_IF_GT && - type != L_SELECT_IF_LTE && type != L_SELECT_IF_GTE) - return (PIXA *)ERROR_PTR("invalid type", procName, NULL); - - /* Compute component ratios. */ - na = pixaFindAreaFraction(pixas); - - /* Generate indicator array for elements to be saved. */ - nai = numaMakeThresholdIndicator(na, thresh, type); - numaDestroy(&na); - - /* Filter to get output */ - pixad = pixaSelectWithIndicator(pixas, nai, pchanged); - - numaDestroy(&nai); - return pixad; -} - - -/*! - * \brief pixSelectByWidthHeightRatio() - * - * \param[in] pixs 1 bpp - * \param[in] thresh threshold ratio of width/height - * \param[in] connectivity 4 or 8 - * \param[in] type L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \param[out] pchanged [optional] 1 if changed; 0 if clone returned - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The args specify constraints on the width-to-height ratio
- *          for components that are kept.
- *      (2) If unchanged, returns a copy of pixs.  Otherwise,
- *          returns a new pix with the filtered components.
- *      (3) This filters components based on the width-to-height ratios.
- *      (4) Use L_SELECT_IF_LT or L_SELECT_IF_LTE to save components
- *          with less than the threshold ratio, and
- *          L_SELECT_IF_GT or L_SELECT_IF_GTE to remove them.
- * 
- */ -PIX * -pixSelectByWidthHeightRatio(PIX *pixs, - l_float32 thresh, - l_int32 connectivity, - l_int32 type, - l_int32 *pchanged) -{ -l_int32 w, h, empty, changed, count; -BOXA *boxa; -PIX *pixd; -PIXA *pixas, *pixad; - - PROCNAME("pixSelectByWidthHeightRatio"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - if (type != L_SELECT_IF_LT && type != L_SELECT_IF_GT && - type != L_SELECT_IF_LTE && type != L_SELECT_IF_GTE) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - if (pchanged) *pchanged = FALSE; - - /* Check if any components exist */ - pixZero(pixs, &empty); - if (empty) - return pixCopy(NULL, pixs); - - /* Filter components */ - boxa = pixConnComp(pixs, &pixas, connectivity); - pixad = pixaSelectByWidthHeightRatio(pixas, thresh, type, &changed); - boxaDestroy(&boxa); - pixaDestroy(&pixas); - - if (!changed) { - pixaDestroy(&pixad); - return pixCopy(NULL, pixs); - } - - /* Render the result */ - if (pchanged) *pchanged = TRUE; - pixGetDimensions(pixs, &w, &h, NULL); - count = pixaGetCount(pixad); - if (count == 0) { /* return empty pix */ - pixd = pixCreateTemplate(pixs); - } else { - pixd = pixaDisplay(pixad, w, h); - pixCopyResolution(pixd, pixs); - pixCopyColormap(pixd, pixs); - pixCopyText(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - } - pixaDestroy(&pixad); - return pixd; -} - - -/*! - * \brief pixaSelectByWidthHeightRatio() - * - * \param[in] pixas - * \param[in] thresh threshold ratio of width/height - * \param[in] type L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \param[out] pchanged [optional] 1 if changed; 0 if clone returned - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) Returns a pixa clone if no components are removed.
- *      (2) Uses pix and box clones in the new pixa.
- *      (3) This filters components based on the width-to-height ratio
- *          of each pix.
- *      (4) Use L_SELECT_IF_LT or L_SELECT_IF_LTE to save components
- *          with less than the threshold ratio, and
- *          L_SELECT_IF_GT or L_SELECT_IF_GTE to remove them.
- * 
- */ -PIXA * -pixaSelectByWidthHeightRatio(PIXA *pixas, - l_float32 thresh, - l_int32 type, - l_int32 *pchanged) -{ -NUMA *na, *nai; -PIXA *pixad; - - PROCNAME("pixaSelectByWidthHeightRatio"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (type != L_SELECT_IF_LT && type != L_SELECT_IF_GT && - type != L_SELECT_IF_LTE && type != L_SELECT_IF_GTE) - return (PIXA *)ERROR_PTR("invalid type", procName, NULL); - - /* Compute component ratios. */ - na = pixaFindWidthHeightRatio(pixas); - - /* Generate indicator array for elements to be saved. */ - nai = numaMakeThresholdIndicator(na, thresh, type); - numaDestroy(&na); - - /* Filter to get output */ - pixad = pixaSelectWithIndicator(pixas, nai, pchanged); - - numaDestroy(&nai); - return pixad; -} - - -/*! - * \brief pixaSelectByNumConnComp() - * - * \param[in] pixas - * \param[in] nmin minimum number of components - * \param[in] nmax maximum number of components - * \param[in] connectivity 4 or 8 - * \param[out] pchanged [optional] 1 if changed; 0 if clone returned - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) Returns a pixa clone if no components are removed.
- *      (2) Uses pix and box clones in the new pixa.
- *      (3) This filters by the number of connected components in
- *          a given range.
- * 
- */ -PIXA * -pixaSelectByNumConnComp(PIXA *pixas, - l_int32 nmin, - l_int32 nmax, - l_int32 connectivity, - l_int32 *pchanged) -{ -l_int32 n, i, count; -NUMA *na; -PIX *pix; -PIXA *pixad; - - PROCNAME("pixaSelectByNumConnComp"); - - if (pchanged) *pchanged = 0; - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (nmin > nmax) - return (PIXA *)ERROR_PTR("nmin > nmax", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIXA *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - - /* Get indicator array based on number of c.c. */ - n = pixaGetCount(pixas); - na = numaCreate(n); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixas, i, L_CLONE); - pixCountConnComp(pix, connectivity, &count); - if (count >= nmin && count <= nmax) - numaAddNumber(na, 1); - else - numaAddNumber(na, 0); - pixDestroy(&pix); - } - - /* Filter to get output */ - pixad = pixaSelectWithIndicator(pixas, na, pchanged); - numaDestroy(&na); - return pixad; -} - - -/*! - * \brief pixaSelectWithIndicator() - * - * \param[in] pixas - * \param[in] na indicator numa - * \param[out] pchanged [optional] 1 if changed; 0 if clone returned - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) Returns a pixa clone if no components are removed.
- *      (2) Uses pix and box clones in the new pixa.
- *      (3) The indicator numa has values 0 (ignore) and 1 (accept).
- *      (4) If the source boxa is not fully populated, it is left
- *          empty in the dest pixa.
- * 
- */ -PIXA * -pixaSelectWithIndicator(PIXA *pixas, - NUMA *na, - l_int32 *pchanged) -{ -l_int32 i, n, nbox, ival, nsave; -BOX *box; -PIX *pix1; -PIXA *pixad; - - PROCNAME("pixaSelectWithIndicator"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (!na) - return (PIXA *)ERROR_PTR("na not defined", procName, NULL); - - nsave = 0; - n = numaGetCount(na); - for (i = 0; i < n; i++) { - numaGetIValue(na, i, &ival); - if (ival == 1) nsave++; - } - - if (nsave == n) { - if (pchanged) *pchanged = FALSE; - return pixaCopy(pixas, L_CLONE); - } - if (pchanged) *pchanged = TRUE; - pixad = pixaCreate(nsave); - nbox = pixaGetBoxaCount(pixas); - for (i = 0; i < n; i++) { - numaGetIValue(na, i, &ival); - if (ival == 0) continue; - pix1 = pixaGetPix(pixas, i, L_CLONE); - pixaAddPix(pixad, pix1, L_INSERT); - if (nbox == n) { /* fully populated boxa */ - box = pixaGetBox(pixas, i, L_CLONE); - pixaAddBox(pixad, box, L_INSERT); - } - } - - return pixad; -} - - -/*! - * \brief pixRemoveWithIndicator() - * - * \param[in] pixs 1 bpp pix from which components are removed; in-place - * \param[in] pixa of connected components in pixs - * \param[in] na numa indicator: remove components corresponding to 1s - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This complements pixAddWithIndicator().   Here, the selected
- *          components are set subtracted from pixs.
- * 
- */ -l_ok -pixRemoveWithIndicator(PIX *pixs, - PIXA *pixa, - NUMA *na) -{ -l_int32 i, n, ival, x, y, w, h; -BOX *box; -PIX *pix; - - PROCNAME("pixRemoveWithIndicator"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (!na) - return ERROR_INT("na not defined", procName, 1); - n = pixaGetCount(pixa); - if (n != numaGetCount(na)) - return ERROR_INT("pixa and na sizes not equal", procName, 1); - - for (i = 0; i < n; i++) { - numaGetIValue(na, i, &ival); - if (ival == 1) { - pix = pixaGetPix(pixa, i, L_CLONE); - box = pixaGetBox(pixa, i, L_CLONE); - boxGetGeometry(box, &x, &y, &w, &h); - pixRasterop(pixs, x, y, w, h, PIX_DST & PIX_NOT(PIX_SRC), - pix, 0, 0); - boxDestroy(&box); - pixDestroy(&pix); - } - } - - return 0; -} - - -/*! - * \brief pixAddWithIndicator() - * - * \param[in] pixs 1 bpp pix from which components are added; in-place - * \param[in] pixa of connected components, some of which will be put - * into pixs - * \param[in] na numa indicator: add components corresponding to 1s - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This complements pixRemoveWithIndicator().   Here, the selected
- *          components are added to pixs.
- * 
- */ -l_ok -pixAddWithIndicator(PIX *pixs, - PIXA *pixa, - NUMA *na) -{ -l_int32 i, n, ival, x, y, w, h; -BOX *box; -PIX *pix; - - PROCNAME("pixAddWithIndicator"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (!na) - return ERROR_INT("na not defined", procName, 1); - n = pixaGetCount(pixa); - if (n != numaGetCount(na)) - return ERROR_INT("pixa and na sizes not equal", procName, 1); - - for (i = 0; i < n; i++) { - numaGetIValue(na, i, &ival); - if (ival == 1) { - pix = pixaGetPix(pixa, i, L_CLONE); - box = pixaGetBox(pixa, i, L_CLONE); - boxGetGeometry(box, &x, &y, &w, &h); - pixRasterop(pixs, x, y, w, h, PIX_SRC | PIX_DST, pix, 0, 0); - boxDestroy(&box); - pixDestroy(&pix); - } - } - - return 0; -} - - -/*! - * \brief pixaSelectWithString() - * - * \param[in] pixas - * \param[in] str string of indices into pixa, giving the pix to - * be selected - * \param[out] perror [optional] 1 if any indices are invalid; - * 0 if all indices are valid - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) Returns a pixa with copies of selected pix.
- *      (2) Associated boxes are also copied, if fully populated.
- * 
- */ -PIXA * -pixaSelectWithString(PIXA *pixas, - const char *str, - l_int32 *perror) -{ -l_int32 i, nval, npix, nbox, val, imaxval; -l_float32 maxval; -BOX *box; -NUMA *na; -PIX *pix1; -PIXA *pixad; - - PROCNAME("pixaSelectWithString"); - - if (perror) *perror = 0; - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (!str) - return (PIXA *)ERROR_PTR("str not defined", procName, NULL); - - if ((na = numaCreateFromString(str)) == NULL) - return (PIXA *)ERROR_PTR("na not made", procName, NULL); - if ((nval = numaGetCount(na)) == 0) { - numaDestroy(&na); - return (PIXA *)ERROR_PTR("no indices found", procName, NULL); - } - numaGetMax(na, &maxval, NULL); - imaxval = (l_int32)(maxval + 0.1); - nbox = pixaGetBoxaCount(pixas); - npix = pixaGetCount(pixas); - if (imaxval >= npix) { - if (perror) *perror = 1; - L_ERROR("max index = %d, size of pixa = %d\n", procName, imaxval, npix); - } - - pixad = pixaCreate(nval); - for (i = 0; i < nval; i++) { - numaGetIValue(na, i, &val); - if (val < 0 || val >= npix) { - L_ERROR("index %d out of range of pix\n", procName, val); - continue; - } - pix1 = pixaGetPix(pixas, val, L_COPY); - pixaAddPix(pixad, pix1, L_INSERT); - if (nbox == npix) { /* fully populated boxa */ - box = pixaGetBox(pixas, val, L_COPY); - pixaAddBox(pixad, box, L_INSERT); - } - } - numaDestroy(&na); - return pixad; -} - - -/*! - * \brief pixaRenderComponent() - * - * \param[in] pixs [optional] 1 bpp pix - * \param[in] pixa of 1 bpp connected components, one of which will - * be rendered in pixs, with its origin determined - * by the associated box. - * \param[in] index of component to be rendered - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) If pixs is null, this generates an empty pix of a size determined
- *          by union of the component bounding boxes, and including the origin.
- *      (2) The selected component is blitted into pixs.
- * 
- */ -PIX * -pixaRenderComponent(PIX *pixs, - PIXA *pixa, - l_int32 index) -{ -l_int32 n, x, y, w, h, same, maxd; -BOX *box; -BOXA *boxa; -PIX *pix; - - PROCNAME("pixaRenderComponent"); - - if (!pixa) - return (PIX *)ERROR_PTR("pixa not defined", procName, pixs); - n = pixaGetCount(pixa); - if (index < 0 || index >= n) - return (PIX *)ERROR_PTR("invalid index", procName, pixs); - if (pixs && (pixGetDepth(pixs) != 1)) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixs); - pixaVerifyDepth(pixa, &same, &maxd); - if (maxd > 1) - return (PIX *)ERROR_PTR("not all pix with d == 1", procName, pixs); - - boxa = pixaGetBoxa(pixa, L_CLONE); - if (!pixs) { - boxaGetExtent(boxa, &w, &h, NULL); - pixs = pixCreate(w, h, 1); - } - - pix = pixaGetPix(pixa, index, L_CLONE); - box = boxaGetBox(boxa, index, L_CLONE); - boxGetGeometry(box, &x, &y, &w, &h); - pixRasterop(pixs, x, y, w, h, PIX_SRC | PIX_DST, pix, 0, 0); - boxDestroy(&box); - pixDestroy(&pix); - boxaDestroy(&boxa); - - return pixs; -} - - -/*---------------------------------------------------------------------* - * Sort functions * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaSort() - * - * \param[in] pixas - * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH, - * L_SORT_BY_HEIGHT, L_SORT_BY_MIN_DIMENSION, - * L_SORT_BY_MAX_DIMENSION, L_SORT_BY_PERIMETER, - * L_SORT_BY_AREA, L_SORT_BY_ASPECT_RATIO - * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING - * \param[out] pnaindex [optional] index of sorted order into - * original array - * \param[in] copyflag L_COPY, L_CLONE - * \return pixad sorted version of pixas, or NULL on error - * - *
- * Notes:
- *      (1) This sorts based on the data in the boxa.  If the boxa
- *          count is not the same as the pixa count, this returns an error.
- *      (2) If the boxa is empty, it makes one corresponding to the
- *          dimensions of each pix, which allows meaningful sorting on
- *          all types except x and y.
- *      (3) The copyflag refers to the pix and box copies that are
- *          inserted into the sorted pixa.  These are either L_COPY
- *          or L_CLONE.
- * 
- */ -PIXA * -pixaSort(PIXA *pixas, - l_int32 sorttype, - l_int32 sortorder, - NUMA **pnaindex, - l_int32 copyflag) -{ -l_int32 i, n, nb, x, y, w, h; -BOXA *boxa; -NUMA *na, *naindex; -PIXA *pixad; - - PROCNAME("pixaSort"); - - if (pnaindex) *pnaindex = NULL; - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y && - sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT && - sorttype != L_SORT_BY_MIN_DIMENSION && - sorttype != L_SORT_BY_MAX_DIMENSION && - sorttype != L_SORT_BY_PERIMETER && - sorttype != L_SORT_BY_AREA && - sorttype != L_SORT_BY_ASPECT_RATIO) - return (PIXA *)ERROR_PTR("invalid sort type", procName, NULL); - if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) - return (PIXA *)ERROR_PTR("invalid sort order", procName, NULL); - if (copyflag != L_COPY && copyflag != L_CLONE) - return (PIXA *)ERROR_PTR("invalid copy flag", procName, NULL); - - /* Check the pixa and boxa counts. Make a boxa if required. */ - if ((n = pixaGetCount(pixas)) == 0) { - L_INFO("no pix in pixa\n", procName); - return pixaCopy(pixas, copyflag); - } - if ((boxa = pixas->boxa) == NULL) /* not owned; do not destroy */ - return (PIXA *)ERROR_PTR("boxa not found!", procName, NULL); - nb = boxaGetCount(boxa); - if (nb == 0) { - pixaSetFullSizeBoxa(pixas); - nb = n; - boxa = pixas->boxa; /* not owned */ - if (sorttype == L_SORT_BY_X || sorttype == L_SORT_BY_Y) - L_WARNING("sort by x or y where all values are 0\n", procName); - } - if (nb != n) - return (PIXA *)ERROR_PTR("boxa and pixa counts differ", procName, NULL); - - /* Use O(n) binsort if possible */ - if (n > MinCompsForBinSort && - ((sorttype == L_SORT_BY_X) || (sorttype == L_SORT_BY_Y) || - (sorttype == L_SORT_BY_WIDTH) || (sorttype == L_SORT_BY_HEIGHT) || - (sorttype == L_SORT_BY_PERIMETER))) - return pixaBinSort(pixas, sorttype, sortorder, pnaindex, copyflag); - - /* Build up numa of specific data */ - if ((na = numaCreate(n)) == NULL) - return (PIXA *)ERROR_PTR("na not made", procName, NULL); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, &x, &y, &w, &h); - switch (sorttype) - { - case L_SORT_BY_X: - numaAddNumber(na, x); - break; - case L_SORT_BY_Y: - numaAddNumber(na, y); - break; - case L_SORT_BY_WIDTH: - numaAddNumber(na, w); - break; - case L_SORT_BY_HEIGHT: - numaAddNumber(na, h); - break; - case L_SORT_BY_MIN_DIMENSION: - numaAddNumber(na, L_MIN(w, h)); - break; - case L_SORT_BY_MAX_DIMENSION: - numaAddNumber(na, L_MAX(w, h)); - break; - case L_SORT_BY_PERIMETER: - numaAddNumber(na, w + h); - break; - case L_SORT_BY_AREA: - numaAddNumber(na, w * h); - break; - case L_SORT_BY_ASPECT_RATIO: - numaAddNumber(na, (l_float32)w / (l_float32)h); - break; - default: - L_WARNING("invalid sort type\n", procName); - } - } - - /* Get the sort index for data array */ - naindex = numaGetSortIndex(na, sortorder); - numaDestroy(&na); - if (!naindex) - return (PIXA *)ERROR_PTR("naindex not made", procName, NULL); - - /* Build up sorted pixa using sort index */ - if ((pixad = pixaSortByIndex(pixas, naindex, copyflag)) == NULL) { - numaDestroy(&naindex); - return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); - } - - if (pnaindex) - *pnaindex = naindex; - else - numaDestroy(&naindex); - return pixad; -} - - -/*! - * \brief pixaBinSort() - * - * \param[in] pixas - * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH, - * L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER - * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING - * \param[out] pnaindex [optional] index of sorted order into - * original array - * \param[in] copyflag L_COPY, L_CLONE - * \return pixad sorted version of pixas, or NULL on error - * - *
- * Notes:
- *      (1) This sorts based on the data in the boxa.  If the boxa
- *          count is not the same as the pixa count, this returns an error.
- *      (2) The copyflag refers to the pix and box copies that are
- *          inserted into the sorted pixa.  These are either L_COPY
- *          or L_CLONE.
- *      (3) For a large number of boxes (say, greater than 1000), this
- *          O(n) binsort is much faster than the O(nlogn) shellsort.
- *          For 5000 components, this is over 20x faster than boxaSort().
- *      (4) Consequently, pixaSort() calls this function if it will
- *          likely go much faster.
- * 
- */ -PIXA * -pixaBinSort(PIXA *pixas, - l_int32 sorttype, - l_int32 sortorder, - NUMA **pnaindex, - l_int32 copyflag) -{ -l_int32 i, n, x, y, w, h; -BOXA *boxa; -NUMA *na, *naindex; -PIXA *pixad; - - PROCNAME("pixaBinSort"); - - if (pnaindex) *pnaindex = NULL; - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y && - sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT && - sorttype != L_SORT_BY_PERIMETER) - return (PIXA *)ERROR_PTR("invalid sort type", procName, NULL); - if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) - return (PIXA *)ERROR_PTR("invalid sort order", procName, NULL); - if (copyflag != L_COPY && copyflag != L_CLONE) - return (PIXA *)ERROR_PTR("invalid copy flag", procName, NULL); - - /* Verify that the pixa and its boxa have the same count */ - if ((boxa = pixas->boxa) == NULL) /* not owned; do not destroy */ - return (PIXA *)ERROR_PTR("boxa not found", procName, NULL); - n = pixaGetCount(pixas); - if (boxaGetCount(boxa) != n) - return (PIXA *)ERROR_PTR("boxa and pixa counts differ", procName, NULL); - - /* Generate Numa of appropriate box dimensions */ - if ((na = numaCreate(n)) == NULL) - return (PIXA *)ERROR_PTR("na not made", procName, NULL); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, &x, &y, &w, &h); - switch (sorttype) - { - case L_SORT_BY_X: - numaAddNumber(na, x); - break; - case L_SORT_BY_Y: - numaAddNumber(na, y); - break; - case L_SORT_BY_WIDTH: - numaAddNumber(na, w); - break; - case L_SORT_BY_HEIGHT: - numaAddNumber(na, h); - break; - case L_SORT_BY_PERIMETER: - numaAddNumber(na, w + h); - break; - default: - L_WARNING("invalid sort type\n", procName); - } - } - - /* Get the sort index for data array */ - naindex = numaGetBinSortIndex(na, sortorder); - numaDestroy(&na); - if (!naindex) - return (PIXA *)ERROR_PTR("naindex not made", procName, NULL); - - /* Build up sorted pixa using sort index */ - if ((pixad = pixaSortByIndex(pixas, naindex, copyflag)) == NULL) { - numaDestroy(&naindex); - return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); - } - - if (pnaindex) - *pnaindex = naindex; - else - numaDestroy(&naindex); - return pixad; -} - - -/*! - * \brief pixaSortByIndex() - * - * \param[in] pixas - * \param[in] naindex na that maps from the new pixa to the input pixa - * \param[in] copyflag L_COPY, L_CLONE - * \return pixad sorted, or NULL on error - */ -PIXA * -pixaSortByIndex(PIXA *pixas, - NUMA *naindex, - l_int32 copyflag) -{ -l_int32 i, n, index; -BOX *box; -PIX *pix; -PIXA *pixad; - - PROCNAME("pixaSortByIndex"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (!naindex) - return (PIXA *)ERROR_PTR("naindex not defined", procName, NULL); - if (copyflag != L_CLONE && copyflag != L_COPY) - return (PIXA *)ERROR_PTR("invalid copyflag", procName, NULL); - - n = pixaGetCount(pixas); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - numaGetIValue(naindex, i, &index); - pix = pixaGetPix(pixas, index, copyflag); - box = pixaGetBox(pixas, index, copyflag); - pixaAddPix(pixad, pix, L_INSERT); - pixaAddBox(pixad, box, L_INSERT); - } - - return pixad; -} - - -/*! - * \brief pixaSort2dByIndex() - * - * \param[in] pixas - * \param[in] naa numaa that maps from the new pixaa to the input pixas - * \param[in] copyflag L_CLONE or L_COPY - * \return paa sorted, or NULL on error - */ -PIXAA * -pixaSort2dByIndex(PIXA *pixas, - NUMAA *naa, - l_int32 copyflag) -{ -l_int32 pixtot, ntot, i, j, n, nn, index; -BOX *box; -NUMA *na; -PIX *pix; -PIXA *pixa; -PIXAA *paa; - - PROCNAME("pixaSort2dByIndex"); - - if (!pixas) - return (PIXAA *)ERROR_PTR("pixas not defined", procName, NULL); - if (!naa) - return (PIXAA *)ERROR_PTR("naindex not defined", procName, NULL); - - /* Check counts */ - ntot = numaaGetNumberCount(naa); - pixtot = pixaGetCount(pixas); - if (ntot != pixtot) - return (PIXAA *)ERROR_PTR("element count mismatch", procName, NULL); - - n = numaaGetCount(naa); - paa = pixaaCreate(n); - for (i = 0; i < n; i++) { - na = numaaGetNuma(naa, i, L_CLONE); - nn = numaGetCount(na); - pixa = pixaCreate(nn); - for (j = 0; j < nn; j++) { - numaGetIValue(na, j, &index); - pix = pixaGetPix(pixas, index, copyflag); - box = pixaGetBox(pixas, index, copyflag); - pixaAddPix(pixa, pix, L_INSERT); - pixaAddBox(pixa, box, L_INSERT); - } - pixaaAddPixa(paa, pixa, L_INSERT); - numaDestroy(&na); - } - - return paa; -} - - -/*---------------------------------------------------------------------* - * Pixa and Pixaa range selection * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaSelectRange() - * - * \param[in] pixas - * \param[in] first use 0 to select from the beginning - * \param[in] last use -1 to select to the end - * \param[in] copyflag L_COPY, L_CLONE - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) The copyflag specifies what we do with each pix from pixas.
- *          Specifically, L_CLONE inserts a clone into pixad of each
- *          selected pix from pixas.
- * 
- */ -PIXA * -pixaSelectRange(PIXA *pixas, - l_int32 first, - l_int32 last, - l_int32 copyflag) -{ -l_int32 n, npix, i; -PIX *pix; -PIXA *pixad; - - PROCNAME("pixaSelectRange"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (copyflag != L_COPY && copyflag != L_CLONE) - return (PIXA *)ERROR_PTR("invalid copyflag", procName, NULL); - n = pixaGetCount(pixas); - first = L_MAX(0, first); - if (last < 0) last = n - 1; - if (first >= n) - return (PIXA *)ERROR_PTR("invalid first", procName, NULL); - if (last >= n) { - L_WARNING("last = %d is beyond max index = %d; adjusting\n", - procName, last, n - 1); - last = n - 1; - } - if (first > last) - return (PIXA *)ERROR_PTR("first > last", procName, NULL); - - npix = last - first + 1; - pixad = pixaCreate(npix); - for (i = first; i <= last; i++) { - pix = pixaGetPix(pixas, i, copyflag); - pixaAddPix(pixad, pix, L_INSERT); - } - return pixad; -} - - -/*! - * \brief pixaaSelectRange() - * - * \param[in] paas - * \param[in] first use 0 to select from the beginning - * \param[in] last use -1 to select to the end - * \param[in] copyflag L_COPY, L_CLONE - * \return paad, or NULL on error - * - *
- * Notes:
- *      (1) The copyflag specifies what we do with each pixa from paas.
- *          Specifically, L_CLONE inserts a clone into paad of each
- *          selected pixa from paas.
- * 
- */ -PIXAA * -pixaaSelectRange(PIXAA *paas, - l_int32 first, - l_int32 last, - l_int32 copyflag) -{ -l_int32 n, npixa, i; -PIXA *pixa; -PIXAA *paad; - - PROCNAME("pixaaSelectRange"); - - if (!paas) - return (PIXAA *)ERROR_PTR("paas not defined", procName, NULL); - if (copyflag != L_COPY && copyflag != L_CLONE) - return (PIXAA *)ERROR_PTR("invalid copyflag", procName, NULL); - n = pixaaGetCount(paas, NULL); - first = L_MAX(0, first); - if (last < 0) last = n - 1; - if (first >= n) - return (PIXAA *)ERROR_PTR("invalid first", procName, NULL); - if (last >= n) { - L_WARNING("last = %d is beyond max index = %d; adjusting\n", - procName, last, n - 1); - last = n - 1; - } - if (first > last) - return (PIXAA *)ERROR_PTR("first > last", procName, NULL); - - npixa = last - first + 1; - paad = pixaaCreate(npixa); - for (i = first; i <= last; i++) { - pixa = pixaaGetPixa(paas, i, copyflag); - pixaaAddPixa(paad, pixa, L_INSERT); - } - return paad; -} - - -/*---------------------------------------------------------------------* - * Pixa and Pixaa scaling * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaaScaleToSize() - * - * \param[in] paas - * \param[in] wd target width; use 0 if using height as target - * \param[in] hd target height; use 0 if using width as target - * \return paad, or NULL on error - * - *
- * Notes:
- *      (1) This guarantees that each output scaled image has the
- *          dimension(s) you specify.
- *           ~ To specify the width with isotropic scaling, set %hd = 0.
- *           ~ To specify the height with isotropic scaling, set %wd = 0.
- *           ~ If both %wd and %hd are specified, the image is scaled
- *             (in general, anisotropically) to that size.
- *           ~ It is an error to set both %wd and %hd to 0.
- * 
- */ -PIXAA * -pixaaScaleToSize(PIXAA *paas, - l_int32 wd, - l_int32 hd) -{ -l_int32 n, i; -PIXA *pixa1, *pixa2; -PIXAA *paad; - - PROCNAME("pixaaScaleToSize"); - - if (!paas) - return (PIXAA *)ERROR_PTR("paas not defined", procName, NULL); - if (wd <= 0 && hd <= 0) - return (PIXAA *)ERROR_PTR("neither wd nor hd > 0", procName, NULL); - - n = pixaaGetCount(paas, NULL); - paad = pixaaCreate(n); - for (i = 0; i < n; i++) { - pixa1 = pixaaGetPixa(paas, i, L_CLONE); - pixa2 = pixaScaleToSize(pixa1, wd, hd); - pixaaAddPixa(paad, pixa2, L_INSERT); - pixaDestroy(&pixa1); - } - return paad; -} - - -/*! - * \brief pixaaScaleToSizeVar() - * - * \param[in] paas - * \param[in] nawd [optional] target widths; use NULL if using height - * \param[in] nahd [optional] target height; use NULL if using width - * \return paad, or NULL on error - * - *
- * Notes:
- *      (1) This guarantees that the scaled images in each pixa have the
- *          dimension(s) you specify in the numas.
- *           ~ To specify the width with isotropic scaling, set %nahd = NULL.
- *           ~ To specify the height with isotropic scaling, set %nawd = NULL.
- *           ~ If both %nawd and %nahd are specified, the image is scaled
- *             (in general, anisotropically) to that size.
- *           ~ It is an error to set both %nawd and %nahd to NULL.
- *      (2) If either nawd and/or nahd is defined, it must have the same
- *          count as the number of pixa in paas.
- * 
- */ -PIXAA * -pixaaScaleToSizeVar(PIXAA *paas, - NUMA *nawd, - NUMA *nahd) -{ -l_int32 n, i, wd, hd; -PIXA *pixa1, *pixa2; -PIXAA *paad; - - PROCNAME("pixaaScaleToSizeVar"); - - if (!paas) - return (PIXAA *)ERROR_PTR("paas not defined", procName, NULL); - if (!nawd && !nahd) - return (PIXAA *)ERROR_PTR("!nawd && !nahd", procName, NULL); - - n = pixaaGetCount(paas, NULL); - if (nawd && (n != numaGetCount(nawd))) - return (PIXAA *)ERROR_PTR("nawd wrong size", procName, NULL); - if (nahd && (n != numaGetCount(nahd))) - return (PIXAA *)ERROR_PTR("nahd wrong size", procName, NULL); - paad = pixaaCreate(n); - for (i = 0; i < n; i++) { - wd = hd = 0; - if (nawd) numaGetIValue(nawd, i, &wd); - if (nahd) numaGetIValue(nahd, i, &hd); - pixa1 = pixaaGetPixa(paas, i, L_CLONE); - pixa2 = pixaScaleToSize(pixa1, wd, hd); - pixaaAddPixa(paad, pixa2, L_INSERT); - pixaDestroy(&pixa1); - } - return paad; -} - - -/*! - * \brief pixaScaleToSize() - * - * \param[in] pixas - * \param[in] wd target width; use 0 if using height as target - * \param[in] hd target height; use 0 if using width as target - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) See pixaaScaleToSize()
- * 
- */ -PIXA * -pixaScaleToSize(PIXA *pixas, - l_int32 wd, - l_int32 hd) -{ -l_int32 n, i; -PIX *pix1, *pix2; -PIXA *pixad; - - PROCNAME("pixaScaleToSize"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - - if (wd <= 0 && hd <= 0) /* no scaling requested */ - return pixaCopy(pixas, L_CLONE); - - n = pixaGetCount(pixas); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - pix2 = pixScaleToSize(pix1, wd, hd); - pixCopyText(pix2, pix1); - pixaAddPix(pixad, pix2, L_INSERT); - pixDestroy(&pix1); - } - return pixad; -} - - -/*! - * \brief pixaScaleToSizeRel() - * - * \param[in] pixas - * \param[in] delw change in width, in pixels; 0 means no change - * \param[in] delh change in height, in pixels; 0 means no change - * return pixad, or NULL on error - * - *
- * Notes:
- *      (1) If a requested change in a pix is not possible because
- *          either the requested width or height is <= 0, issue a
- *          warning and return a copy.
- * 
- */ -PIXA * -pixaScaleToSizeRel(PIXA *pixas, - l_int32 delw, - l_int32 delh) -{ -l_int32 n, i; -PIX *pix1, *pix2; -PIXA *pixad; - - PROCNAME("pixaScaleToSizeRel"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - - n = pixaGetCount(pixas); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - pix2 = pixScaleToSizeRel(pix1, delw, delh); - if (pix2) { - pixaAddPix(pixad, pix2, L_INSERT); - } else { - L_WARNING("relative scale to size failed; use a copy\n", procName); - pixaAddPix(pixad, pix1, L_COPY); - } - pixDestroy(&pix1); - } - return pixad; -} - - -/*! - * \brief pixaScale() - * - * \param[in] pixas - * \param[in] scalex - * \param[in] scaley - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) If pixas has a full boxes, it is scaled as well.
- * 
- */ -PIXA * -pixaScale(PIXA *pixas, - l_float32 scalex, - l_float32 scaley) -{ -l_int32 i, n, nb; -BOXA *boxa1, *boxa2; -PIX *pix1, *pix2; -PIXA *pixad; - - PROCNAME("pixaScale"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (scalex <= 0.0 || scaley <= 0.0) - return (PIXA *)ERROR_PTR("invalid scaling parameters", procName, NULL); - - n = pixaGetCount(pixas); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - pix2 = pixScale(pix1, scalex, scaley); - pixCopyText(pix2, pix1); - pixaAddPix(pixad, pix2, L_INSERT); - pixDestroy(&pix1); - } - - boxa1 = pixaGetBoxa(pixas, L_CLONE); - nb = boxaGetCount(boxa1); - if (nb == n) { - boxa2 = boxaTransform(boxa1, 0, 0, scalex, scaley); - pixaSetBoxa(pixad, boxa2, L_INSERT); - } - boxaDestroy(&boxa1); - return pixad; -} - - -/*! - * \brief pixaScaleBySampling() - * - * \param[in] pixas - * \param[in] scalex - * \param[in] scaley - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) If pixas has a full boxes, it is scaled as well.
- * 
- */ -PIXA * -pixaScaleBySampling(PIXA *pixas, - l_float32 scalex, - l_float32 scaley) -{ -l_int32 i, n, nb; -BOXA *boxa1, *boxa2; -PIX *pix1, *pix2; -PIXA *pixad; - - PROCNAME("pixaScaleBySampling"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (scalex <= 0.0 || scaley <= 0.0) - return (PIXA *)ERROR_PTR("invalid scaling parameters", procName, NULL); - - n = pixaGetCount(pixas); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - pix2 = pixScaleBySampling(pix1, scalex, scaley); - pixCopyText(pix2, pix1); - pixaAddPix(pixad, pix2, L_INSERT); - pixDestroy(&pix1); - } - - boxa1 = pixaGetBoxa(pixas, L_CLONE); - nb = boxaGetCount(boxa1); - if (nb == n) { - boxa2 = boxaTransform(boxa1, 0, 0, scalex, scaley); - pixaSetBoxa(pixad, boxa2, L_INSERT); - } - boxaDestroy(&boxa1); - return pixad; -} - - -/*---------------------------------------------------------------------* - * Pixa rotation and translation * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaRotate() - * - * \param[in] pixas 1, 2, 4, 8, 32 bpp rgb - * \param[in] angle rotation angle in radians; clockwise is positive - * \param[in] type L_ROTATE_AREA_MAP, L_ROTATE_SHEAR, L_ROTATE_SAMPLING - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \param[in] width original width; use 0 to avoid embedding - * \param[in] height original height; use 0 to avoid embedding - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) Each pix is rotated about its center.  See pixRotate() for details.
- *      (2) The boxa array is copied.  Why is it not rotated?
- *          If a boxa exists, the array of boxes is in 1-to-1
- *          correspondence with the array of pix, and each box typically
- *          represents the location of the pix relative to an image from
- *          which it has been extracted.  Like the pix, we could rotate
- *          each box around its center, and then generate a box that
- *          contains all four corners, as is done in boxaRotate(), but
- *          this seems unnecessary.
- * 
- */ -PIXA * -pixaRotate(PIXA *pixas, - l_float32 angle, - l_int32 type, - l_int32 incolor, - l_int32 width, - l_int32 height) -{ -l_int32 i, n; -BOXA *boxa; -PIX *pixs, *pixd; -PIXA *pixad; - - PROCNAME("pixaRotate"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (type != L_ROTATE_SHEAR && type != L_ROTATE_AREA_MAP && - type != L_ROTATE_SAMPLING) - return (PIXA *)ERROR_PTR("invalid type", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIXA *)ERROR_PTR("invalid incolor", procName, NULL); - if (L_ABS(angle) < MinAngleToRotate) - return pixaCopy(pixas, L_COPY); - - n = pixaGetCount(pixas); - if ((pixad = pixaCreate(n)) == NULL) - return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); - boxa = pixaGetBoxa(pixad, L_COPY); - pixaSetBoxa(pixad, boxa, L_INSERT); - for (i = 0; i < n; i++) { - if ((pixs = pixaGetPix(pixas, i, L_CLONE)) == NULL) { - pixaDestroy(&pixad); - return (PIXA *)ERROR_PTR("pixs not found", procName, NULL); - } - pixd = pixRotate(pixs, angle, type, incolor, width, height); - pixaAddPix(pixad, pixd, L_INSERT); - pixDestroy(&pixs); - } - - return pixad; -} - - -/*! - * \brief pixaRotateOrth() - * - * \param[in] pixas - * \param[in] rotation 0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg; - * all rotations are clockwise - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) Rotates each pix in the pixa.  Rotates and saves the boxes in
- *          the boxa if the boxa is full.
- * 
- */ -PIXA * -pixaRotateOrth(PIXA *pixas, - l_int32 rotation) -{ -l_int32 i, n, nb, w, h; -BOX *boxs, *boxd; -PIX *pixs, *pixd; -PIXA *pixad; - - PROCNAME("pixaRotateOrth"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (rotation < 0 || rotation > 3) - return (PIXA *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL); - if (rotation == 0) - return pixaCopy(pixas, L_COPY); - - n = pixaGetCount(pixas); - nb = pixaGetBoxaCount(pixas); - if ((pixad = pixaCreate(n)) == NULL) - return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); - for (i = 0; i < n; i++) { - if ((pixs = pixaGetPix(pixas, i, L_CLONE)) == NULL) { - pixaDestroy(&pixad); - return (PIXA *)ERROR_PTR("pixs not found", procName, NULL); - } - pixd = pixRotateOrth(pixs, rotation); - pixaAddPix(pixad, pixd, L_INSERT); - if (n == nb) { - boxs = pixaGetBox(pixas, i, L_COPY); - pixGetDimensions(pixs, &w, &h, NULL); - boxd = boxRotateOrth(boxs, w, h, rotation); - pixaAddBox(pixad, boxd, L_INSERT); - boxDestroy(&boxs); - } - pixDestroy(&pixs); - } - - return pixad; -} - - -/*! - * \brief pixaTranslate() - * - * \param[in] pixas - * \param[in] hshift horizontal shift; hshift > 0 is to right - * \param[in] vshift vertical shift; vshift > 0 is down - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixad, or NULL on error. - */ -PIXA * -pixaTranslate(PIXA *pixas, - l_int32 hshift, - l_int32 vshift, - l_int32 incolor) -{ -l_int32 i, n, nb; -BOXA *boxas, *boxad; -PIX *pixs, *pixd; -PIXA *pixad; - - PROCNAME("pixaTranslate"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (hshift == 0 && vshift == 0) - return pixaCopy(pixas, L_COPY); - - n = pixaGetCount(pixas); - nb = pixaGetBoxaCount(pixas); - if ((pixad = pixaCreate(n)) == NULL) - return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); - for (i = 0; i < n; i++) { - if ((pixs = pixaGetPix(pixas, i, L_CLONE)) == NULL) { - pixaDestroy(&pixad); - return (PIXA *)ERROR_PTR("pixs not found", procName, NULL); - } - pixd = pixTranslate(NULL, pixs, hshift, vshift, incolor); - pixaAddPix(pixad, pixd, L_INSERT); - pixDestroy(&pixs); - } - if (n == nb) { - boxas = pixaGetBoxa(pixas, L_CLONE); - boxad = boxaTransform(boxas, hshift, vshift, 1.0, 1.0); - pixaSetBoxa(pixad, boxad, L_INSERT); - boxaDestroy(&boxas); - } - - return pixad; -} - - -/*---------------------------------------------------------------------* - * Miscellaneous functions * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaAddBorderGeneral() - * - * \param[in] pixad can be null or equal to pixas - * \param[in] pixas containing pix of all depths; colormap ok - * \param[in] left, right, top, bot number of pixels added - * \param[in] val value of added border pixels - * \return pixad with border added to each pix, including on error - * - *
- * Notes:
- *      (1) For binary images:
- *             white:  val = 0
- *             black:  val = 1
- *          For grayscale images:
- *             white:  val = 2 ** d - 1
- *             black:  val = 0
- *          For rgb color images:
- *             white:  val = 0xffffff00
- *             black:  val = 0
- *          For colormapped images, use 'index' found this way:
- *             white: pixcmapGetRankIntensity(cmap, 1.0, &index);
- *             black: pixcmapGetRankIntensity(cmap, 0.0, &index);
- *      (2) For in-place replacement of each pix with a bordered version,
- *          use %pixad = %pixas.  To make a new pixa, use %pixad = NULL.
- *      (3) In both cases, the boxa has sides adjusted as if it were
- *          expanded by the border.
- * 
- */ -PIXA * -pixaAddBorderGeneral(PIXA *pixad, - PIXA *pixas, - l_int32 left, - l_int32 right, - l_int32 top, - l_int32 bot, - l_uint32 val) -{ -l_int32 i, n, nbox; -BOX *box; -BOXA *boxad; -PIX *pixs, *pixd; - - PROCNAME("pixaAddBorderGeneral"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, pixad); - if (left < 0 || right < 0 || top < 0 || bot < 0) - return (PIXA *)ERROR_PTR("negative border added!", procName, pixad); - if (pixad && (pixad != pixas)) - return (PIXA *)ERROR_PTR("pixad defined but != pixas", procName, pixad); - - n = pixaGetCount(pixas); - if (!pixad) - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pixs = pixaGetPix(pixas, i, L_CLONE); - pixd = pixAddBorderGeneral(pixs, left, right, top, bot, val); - if (pixad == pixas) /* replace */ - pixaReplacePix(pixad, i, pixd, NULL); - else - pixaAddPix(pixad, pixd, L_INSERT); - pixDestroy(&pixs); - } - - nbox = pixaGetBoxaCount(pixas); - boxad = pixaGetBoxa(pixad, L_CLONE); - for (i = 0; i < nbox; i++) { - if ((box = pixaGetBox(pixas, i, L_COPY)) == NULL) { - L_WARNING("box %d not found\n", procName, i); - break; - } - boxAdjustSides(box, box, -left, right, -top, bot); - if (pixad == pixas) /* replace */ - boxaReplaceBox(boxad, i, box); - else - boxaAddBox(boxad, box, L_INSERT); - } - boxaDestroy(&boxad); - - return pixad; -} - - -/*! - * \brief pixaaFlattenToPixa() - * - * \param[in] paa - * \param[out] pnaindex [optional] the pixa index in the pixaa - * \param[in] copyflag L_COPY or L_CLONE - * \return pixa, or NULL on error - * - *
- * Notes:
- *      (1) This 'flattens' the pixaa to a pixa, taking the pix in
- *          order in the first pixa, then the second, etc.
- *      (2) If &naindex is defined, we generate a Numa that gives, for
- *          each pix in the pixaa, the index of the pixa to which it belongs.
- * 
- */ -PIXA * -pixaaFlattenToPixa(PIXAA *paa, - NUMA **pnaindex, - l_int32 copyflag) -{ -l_int32 i, j, m, mb, n; -BOX *box; -NUMA *naindex; -PIX *pix; -PIXA *pixa, *pixat; - - PROCNAME("pixaaFlattenToPixa"); - - if (pnaindex) *pnaindex = NULL; - if (!paa) - return (PIXA *)ERROR_PTR("paa not defined", procName, NULL); - if (copyflag != L_COPY && copyflag != L_CLONE) - return (PIXA *)ERROR_PTR("invalid copyflag", procName, NULL); - - if (pnaindex) { - naindex = numaCreate(0); - *pnaindex = naindex; - } - - n = pixaaGetCount(paa, NULL); - pixa = pixaCreate(n); - for (i = 0; i < n; i++) { - pixat = pixaaGetPixa(paa, i, L_CLONE); - m = pixaGetCount(pixat); - mb = pixaGetBoxaCount(pixat); - for (j = 0; j < m; j++) { - pix = pixaGetPix(pixat, j, copyflag); - pixaAddPix(pixa, pix, L_INSERT); - if (j < mb) { - box = pixaGetBox(pixat, j, copyflag); - pixaAddBox(pixa, box, L_INSERT); - } - if (pnaindex) - numaAddNumber(naindex, i); /* save 'row' number */ - } - pixaDestroy(&pixat); - } - - return pixa; -} - - -/*! - * \brief pixaaSizeRange() - * - * \param[in] paa - * \param[out] pminw, pminh, pmaxw, pmaxh [optional] range of - * dimensions of all boxes - * \return 0 if OK, 1 on error - */ -l_ok -pixaaSizeRange(PIXAA *paa, - l_int32 *pminw, - l_int32 *pminh, - l_int32 *pmaxw, - l_int32 *pmaxh) -{ -l_int32 minw, minh, maxw, maxh, minpw, minph, maxpw, maxph, i, n; -PIXA *pixa; - - PROCNAME("pixaaSizeRange"); - - if (pminw) *pminw = 0; - if (pminh) *pminh = 0; - if (pmaxw) *pmaxw = 0; - if (pmaxh) *pmaxh = 0; - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - if (!pminw && !pmaxw && !pminh && !pmaxh) - return ERROR_INT("no data can be returned", procName, 1); - - minw = minh = 100000000; - maxw = maxh = 0; - n = pixaaGetCount(paa, NULL); - for (i = 0; i < n; i++) { - pixa = pixaaGetPixa(paa, i, L_CLONE); - pixaSizeRange(pixa, &minpw, &minph, &maxpw, &maxph); - if (minpw < minw) - minw = minpw; - if (minph < minh) - minh = minph; - if (maxpw > maxw) - maxw = maxpw; - if (maxph > maxh) - maxh = maxph; - pixaDestroy(&pixa); - } - - if (pminw) *pminw = minw; - if (pminh) *pminh = minh; - if (pmaxw) *pmaxw = maxw; - if (pmaxh) *pmaxh = maxh; - return 0; -} - - -/*! - * \brief pixaSizeRange() - * - * \param[in] pixa - * \param[out] pminw, pminh, pmaxw, pmaxh [optional] range of - * dimensions of pix in the array - * \return 0 if OK, 1 on error - */ -l_ok -pixaSizeRange(PIXA *pixa, - l_int32 *pminw, - l_int32 *pminh, - l_int32 *pmaxw, - l_int32 *pmaxh) -{ -l_int32 minw, minh, maxw, maxh, i, n, w, h; -PIX *pix; - - PROCNAME("pixaSizeRange"); - - if (pminw) *pminw = 0; - if (pminh) *pminh = 0; - if (pmaxw) *pmaxw = 0; - if (pmaxh) *pmaxh = 0; - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (!pminw && !pmaxw && !pminh && !pmaxh) - return ERROR_INT("no data can be returned", procName, 1); - - minw = minh = 1000000; - maxw = maxh = 0; - n = pixaGetCount(pixa); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - w = pixGetWidth(pix); - h = pixGetHeight(pix); - if (w < minw) - minw = w; - if (h < minh) - minh = h; - if (w > maxw) - maxw = w; - if (h > maxh) - maxh = h; - pixDestroy(&pix); - } - - if (pminw) *pminw = minw; - if (pminh) *pminh = minh; - if (pmaxw) *pmaxw = maxw; - if (pmaxh) *pmaxh = maxh; - - return 0; -} - - -/*! - * \brief pixaClipToPix() - * - * \param[in] pixas - * \param[in] pixs - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) This is intended for use in situations where pixas
- *          was originally generated from the input pixs.
- *      (2) Returns a pixad where each pix in pixas is ANDed
- *          with its associated region of the input pixs.  This
- *          region is specified by the the box that is associated
- *          with the pix.
- *      (3) In a typical application of this function, pixas has
- *          a set of region masks, so this generates a pixa of
- *          the parts of pixs that correspond to each region
- *          mask component, along with the bounding box for
- *          the region.
- * 
- */ -PIXA * -pixaClipToPix(PIXA *pixas, - PIX *pixs) -{ -l_int32 i, n; -BOX *box; -PIX *pix, *pixc; -PIXA *pixad; - - PROCNAME("pixaClipToPix"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (!pixs) - return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); - - n = pixaGetCount(pixas); - if ((pixad = pixaCreate(n)) == NULL) - return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); - - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixas, i, L_CLONE); - box = pixaGetBox(pixas, i, L_COPY); - pixc = pixClipRectangle(pixs, box, NULL); - pixAnd(pixc, pixc, pix); - pixaAddPix(pixad, pixc, L_INSERT); - pixaAddBox(pixad, box, L_INSERT); - pixDestroy(&pix); - } - - return pixad; -} - - -/*! - * \brief pixaClipToForeground() - * - * \param[in] pixas - * \param[out] ppixad [optional] pixa of clipped pix returned - * \param[out] pboxa [optional] clipping boxes returned - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) At least one of [&pixd, &boxa] must be specified.
- *      (2) Any pix with no fg pixels is skipped.
- *      (3) See pixClipToForeground().
- * 
- */ -l_ok -pixaClipToForeground(PIXA *pixas, - PIXA **ppixad, - BOXA **pboxa) -{ -l_int32 i, n; -BOX *box1; -PIX *pix1, *pix2; - - PROCNAME("pixaClipToForeground"); - - if (ppixad) *ppixad = NULL; - if (pboxa) *pboxa = NULL; - if (!pixas) - return ERROR_INT("pixas not defined", procName, 1); - if (!ppixad && !pboxa) - return ERROR_INT("no output requested", procName, 1); - - n = pixaGetCount(pixas); - if (ppixad) *ppixad = pixaCreate(n); - if (pboxa) *pboxa = boxaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - pixClipToForeground(pix1, &pix2, &box1); - pixDestroy(&pix1); - if (ppixad) - pixaAddPix(*ppixad, pix2, L_INSERT); - else - pixDestroy(&pix2); - if (pboxa) - boxaAddBox(*pboxa, box1, L_INSERT); - else - boxDestroy(&box1); - } - - return 0; -} - - -/*! - * \brief pixaGetRenderingDepth() - * - * \param[in] pixa - * \param[out] pdepth depth required to render if all colormaps are removed - * \return 0 if OK; 1 on error - */ -l_ok -pixaGetRenderingDepth(PIXA *pixa, - l_int32 *pdepth) -{ -l_int32 hascolor, maxdepth; - - PROCNAME("pixaGetRenderingDepth"); - - if (!pdepth) - return ERROR_INT("&depth not defined", procName, 1); - *pdepth = 0; - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - pixaHasColor(pixa, &hascolor); - if (hascolor) { - *pdepth = 32; - return 0; - } - - pixaGetDepthInfo(pixa, &maxdepth, NULL); - if (maxdepth == 1) - *pdepth = 1; - else /* 2, 4, 8 or 16 */ - *pdepth = 8; - return 0; -} - - -/*! - * \brief pixaHasColor() - * - * \param[in] pixa - * \param[out] phascolor 1 if any pix is rgb or has a colormap with color; - * 0 otherwise - * \return 0 if OK; 1 on error - */ -l_ok -pixaHasColor(PIXA *pixa, - l_int32 *phascolor) -{ -l_int32 i, n, hascolor, d; -PIX *pix; -PIXCMAP *cmap; - - PROCNAME("pixaHasColor"); - - if (!phascolor) - return ERROR_INT("&hascolor not defined", procName, 1); - *phascolor = 0; - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - n = pixaGetCount(pixa); - hascolor = 0; - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - if ((cmap = pixGetColormap(pix)) != NULL) - pixcmapHasColor(cmap, &hascolor); - d = pixGetDepth(pix); - pixDestroy(&pix); - if (d == 32 || hascolor == 1) { - *phascolor = 1; - break; - } - } - - return 0; -} - - -/*! - * \brief pixaAnyColormaps() - * - * \param[in] pixa - * \param[out] phascmap 1 if any pix has a colormap; 0 otherwise - * \return 0 if OK; 1 on error - */ -l_ok -pixaAnyColormaps(PIXA *pixa, - l_int32 *phascmap) -{ -l_int32 i, n; -PIX *pix; -PIXCMAP *cmap; - - PROCNAME("pixaAnyColormaps"); - - if (!phascmap) - return ERROR_INT("&hascmap not defined", procName, 1); - *phascmap = 0; - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - n = pixaGetCount(pixa); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - cmap = pixGetColormap(pix); - pixDestroy(&pix); - if (cmap) { - *phascmap = 1; - return 0; - } - } - - return 0; -} - - -/*! - * \brief pixaGetDepthInfo() - * - * \param[in] pixa - * \param[out] pmaxdepth [optional] max pixel depth of pix in pixa - * \param[out] psame [optional] true if all depths are equal - * \return 0 if OK; 1 on error - */ -l_ok -pixaGetDepthInfo(PIXA *pixa, - l_int32 *pmaxdepth, - l_int32 *psame) -{ -l_int32 i, n, d, d0; -l_int32 maxd, same; /* depth info */ - - PROCNAME("pixaGetDepthInfo"); - - if (pmaxdepth) *pmaxdepth = 0; - if (psame) *psame = TRUE; - if (!pmaxdepth && !psame) return 0; - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if ((n = pixaGetCount(pixa)) == 0) - return ERROR_INT("pixa is empty", procName, 1); - - same = TRUE; - maxd = 0; - for (i = 0; i < n; i++) { - pixaGetPixDimensions(pixa, i, NULL, NULL, &d); - if (i == 0) - d0 = d; - else if (d != d0) - same = FALSE; - if (d > maxd) maxd = d; - } - - if (pmaxdepth) *pmaxdepth = maxd; - if (psame) *psame = same; - return 0; -} - - -/*! - * \brief pixaConvertToSameDepth() - * - * \param[in] pixas - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) If any pix has a colormap, they are all converted to rgb.
- *          Otherwise, they are all converted to the maximum depth of
- *          all the pix.
- *      (2) This can be used to allow lossless rendering onto a single pix.
- * 
- */ -PIXA * -pixaConvertToSameDepth(PIXA *pixas) -{ -l_int32 i, n, same, hascmap, maxdepth; -BOXA *boxa; -PIX *pix1, *pix2; -PIXA *pixa1, *pixad; - - PROCNAME("pixaConvertToSameDepth"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - - /* Remove colormaps to rgb */ - if ((n = pixaGetCount(pixas)) == 0) - return (PIXA *)ERROR_PTR("no components", procName, NULL); - pixaAnyColormaps(pixas, &hascmap); - if (hascmap) { - pixa1 = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - pix2 = pixConvertTo32(pix1); - pixaAddPix(pixa1, pix2, L_INSERT); - pixDestroy(&pix1); - } - } else { - pixa1 = pixaCopy(pixas, L_CLONE); - } - - pixaGetDepthInfo(pixa1, &maxdepth, &same); - if (!same) { /* at least one pix has depth < maxdepth */ - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixa1, i, L_CLONE); - if (maxdepth <= 8) - pix2 = pixConvertTo8(pix1, 0); - else - pix2 = pixConvertTo32(pix1); - pixaAddPix(pixad, pix2, L_INSERT); - pixDestroy(&pix1); - } - } else { - pixad = pixaCopy(pixa1, L_CLONE); - } - - boxa = pixaGetBoxa(pixas, L_COPY); - pixaSetBoxa(pixad, boxa, L_INSERT); - pixaDestroy(&pixa1); - return pixad; -} - - -/*! - * \brief pixaEqual() - * - * \param[in] pixa1 - * \param[in] pixa2 - * \param[in] maxdist - * \param[out] pnaindex [optional] index array of correspondences - * \param[out] psame 1 if equal; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The two pixa are the "same" if they contain the same
- *          boxa and the same ordered set of pix.  However, if they
- *          have boxa, the pix in each pixa can differ in ordering
- *          by an amount given by the parameter %maxdist.  If they
- *          don't have a boxa, the %maxdist parameter is ignored,
- *          and the ordering of the pix must be identical.
- *      (2) This applies only to boxa geometry, pixels and ordering;
- *          other fields in the pix are ignored.
- *      (3) naindex[i] gives the position of the box in pixa2 that
- *          corresponds to box i in pixa1.  It is only returned if the
- *          pixa have boxa and the boxa are equal.
- *      (4) In situations where the ordering is very different, so that
- *          a large %maxdist is required for "equality", this should be
- *          implemented with a hash function for efficiency.
- * 
- */ -l_ok -pixaEqual(PIXA *pixa1, - PIXA *pixa2, - l_int32 maxdist, - NUMA **pnaindex, - l_int32 *psame) -{ -l_int32 i, j, n, empty1, empty2, same, sameboxa; -BOXA *boxa1, *boxa2; -NUMA *na; -PIX *pix1, *pix2; - - PROCNAME("pixaEqual"); - - if (pnaindex) *pnaindex = NULL; - if (!psame) - return ERROR_INT("&same not defined", procName, 1); - *psame = 0; - sameboxa = 0; - na = NULL; - if (!pixa1 || !pixa2) - return ERROR_INT("pixa1 and pixa2 not both defined", procName, 1); - n = pixaGetCount(pixa1); - if (n != pixaGetCount(pixa2)) - return 0; - - /* If there are no boxes, strict ordering of the pix in each - * pixa is required. */ - boxa1 = pixaGetBoxa(pixa1, L_CLONE); - boxa2 = pixaGetBoxa(pixa2, L_CLONE); - empty1 = (boxaGetCount(boxa1) == 0) ? 1 : 0; - empty2 = (boxaGetCount(boxa2) == 0) ? 1 : 0; - if (!empty1 && !empty2) { - boxaEqual(boxa1, boxa2, maxdist, &na, &sameboxa); - if (!sameboxa) { - boxaDestroy(&boxa1); - boxaDestroy(&boxa2); - numaDestroy(&na); - return 0; - } - } - boxaDestroy(&boxa1); - boxaDestroy(&boxa2); - if ((!empty1 && empty2) || (empty1 && !empty2)) - return 0; - - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixa1, i, L_CLONE); - if (na) - numaGetIValue(na, i, &j); - else - j = i; - pix2 = pixaGetPix(pixa2, j, L_CLONE); - pixEqual(pix1, pix2, &same); - pixDestroy(&pix1); - pixDestroy(&pix2); - if (!same) { - numaDestroy(&na); - return 0; - } - } - - *psame = 1; - if (pnaindex) - *pnaindex = na; - else - numaDestroy(&na); - return 0; -} - - -/*! - * \brief pixaSetFullSizeBoxa() - * - * \param[in] pixa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Replaces the existing boxa.  Each box gives the dimensions
- *          of the corresponding pix.  This is needed for functions
- *          like pixaSort() that sort based on the boxes.
- * 
- */ -l_ok -pixaSetFullSizeBoxa(PIXA *pixa) -{ -l_int32 i, n, w, h; -BOX *box; -BOXA *boxa; -PIX *pix; - - PROCNAME("pixaSetFullSizeBoxa"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if ((n = pixaGetCount(pixa)) == 0) { - L_INFO("pixa contains no pix\n", procName); - return 0; - } - - boxa = boxaCreate(n); - pixaSetBoxa(pixa, boxa, L_INSERT); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - pixGetDimensions(pix, &w, &h, NULL); - box = boxCreate(0, 0, w, h); - boxaAddBox(boxa, box, L_INSERT); - pixDestroy(&pix); - } - return 0; -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixafunc2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixafunc2.c deleted file mode 100644 index a362cfe5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixafunc2.c +++ /dev/null @@ -1,2610 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pixafunc2.c - *
- *
- *      Pixa display (render into a pix)
- *           PIX      *pixaDisplay()
- *           PIX      *pixaDisplayRandomCmap()
- *           PIX      *pixaDisplayLinearly()
- *           PIX      *pixaDisplayOnLattice()
- *           PIX      *pixaDisplayUnsplit()
- *           PIX      *pixaDisplayTiled()
- *           PIX      *pixaDisplayTiledInRows()
- *           PIX      *pixaDisplayTiledInColumns()
- *           PIX      *pixaDisplayTiledAndScaled()
- *           PIX      *pixaDisplayTiledWithText()
- *           PIX      *pixaDisplayTiledByIndex()
- *
- *      Pixaa display (render into a pix)
- *           PIX      *pixaaDisplay()
- *           PIX      *pixaaDisplayByPixa()
- *           PIXA     *pixaaDisplayTiledAndScaled()
- *
- *      Conversion of all pix to specified type (e.g., depth)
- *           PIXA     *pixaConvertTo1()
- *           PIXA     *pixaConvertTo8()
- *           PIXA     *pixaConvertTo8Colormap()
- *           PIXA     *pixaConvertTo32()
- *
- *      Pixa constrained selection and pdf generation
- *           PIXA     *pixaConstrainedSelect()
- *           l_int32   pixaSelectToPdf()
- *
- *      Generate pixa from tiled images
- *           PIXA     *pixaMakeFromTiledPixa()
- *           PIXA     *pixaMakeFromTiledPix()
- *           l_int32   pixGetTileCount()
- *
- *      Pixa display into multiple tiles
- *           PIXA     *pixaDisplayMultiTiled()
- *
- *      Split pixa into files
- *           l_int32   pixaSplitIntoFiles()
- *
- *      Tile N-Up
- *           l_int32   convertToNUpFiles()
- *           PIXA     *convertToNUpPixa()
- *           PIXA     *pixaConvertToNUpPixa()
- *
- *      Render two pixa side-by-side for comparison                   *
- *           l_int32   pixaCompareInPdf()
- *
- *  We give twelve pixaDisplay*() methods for tiling a pixa in a pix.
- *  Some work for 1 bpp input; others for any input depth.
- *  Some give an output depth that depends on the input depth;
- *  others give a different output depth or allow you to choose it.
- *  Some use a boxes to determine where each pix goes; others tile
- *  onto a regular lattice; others tile onto an irregular lattice;
- *  one uses an associated index array to determine which column
- *  each pix goes into.
- *
- *  Here is a brief description of what the pixa display functions do.
- *
- *    pixaDisplay()
- *        This uses the boxes in the pixa to lay out each pix.  This
- *        can be used to reconstruct a pix that has been broken into
- *        components, if the boxes represents the positions of the
- *        components in the original image.
- *    pixaDisplayRandomCmap()
- *        This also uses the boxes to lay out each pix.  However, it creates
- *        a colormapped dest, where each 1 bpp pix is given a randomly
- *        generated color (up to 256 are used).
- *    pixaDisplayLinearly()
- *        This puts each pix, sequentially, in a line, either horizontally
- *        or vertically.
- *    pixaDisplayOnLattice()
- *        This puts each pix, sequentially, onto a regular lattice,
- *        omitting any pix that are too big for the lattice size.
- *        This is useful, for example, to store bitmapped fonts,
- *        where all the characters are stored in a single image.
- *    pixaDisplayUnsplit()
- *        This lays out a mosaic of tiles (the pix in the pixa) that
- *        are all of equal size.  (Don't use this for unequal sized pix!)
- *        For example, it can be used to invert the action of
- *        pixaSplitPix().
- *    pixaDisplayTiled()
- *        Like pixaDisplayOnLattice(), this places each pix on a regular
- *        lattice, but here the lattice size is determined by the
- *        largest component, and no components are omitted.  This is
- *        dangerous if there are thousands of small components and
- *        one or more very large one, because the size of the resulting
- *        pix can be huge!
- *    pixaDisplayTiledInRows()
- *        This puts each pix down in a series of rows, where the upper
- *        edges of each pix in a row are aligned and there is a uniform
- *        spacing between the pix.  The height of each row is determined
- *        by the tallest pix that was put in the row.  This function
- *        is a reasonably efficient way to pack the subimages.
- *        A boxa of the locations of each input pix is stored in the output.
- *    pixaDisplayTiledInColumns()
- *        This puts each pix down in a series of rows, each row having
- *        a specified number of pix.  The upper edges of each pix in a
- *        row are aligned and there is a uniform spacing between the pix.
- *        The height of each row is determined by the tallest pix that
- *        was put in the row.  A boxa of the locations of each input
- *        pix is stored in the output.
- *    pixaDisplayTiledAndScaled()
- *        This scales each pix to a given width and output depth, and then
- *        tiles them in rows with a given number placed in each row.
- *        This is useful for presenting a sequence of images that can be
- *        at different resolutions, but which are derived from the same
- *        initial image.
- *    pixaDisplayTiledWithText()
- *        This is a version of pixaDisplayTiledInRows() that prints, below
- *        each pix, the text in the pix text field.  It renders a pixa
- *        to an image with white background that does not exceed a
- *        given value in width.
- *    pixaDisplayTiledByIndex()
- *        This scales each pix to a given width and output depth,
- *        and then tiles them in columns corresponding to the value
- *        in an associated numa.  All pix with the same index value are
- *        rendered in the same column.  Text in the pix text field are
- *        rendered below the pix.
- *
- *  To render mosaics of images in a pixaa, display functions are
- *  provided that handle situations where the images are all scaled to
- *  the same size, or the number of images on each row needs to vary.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include /* for sqrt() */ -#include "allheaders.h" - -/*---------------------------------------------------------------------* - * Pixa Display * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaDisplay() - * - * \param[in] pixa - * \param[in] w, h if set to 0, the size is determined from the - * bounding box of the components in pixa - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) This uses the boxes to place each pix in the rendered composite.
- *      (2) Set w = h = 0 to use the b.b. of the components to determine
- *          the size of the returned pix.
- *      (3) Uses the first pix in pixa to determine the depth.
- *      (4) The background is written "white".  On 1 bpp, each successive
- *          pix is "painted" (adding foreground), whereas for grayscale
- *          or color each successive pix is blitted with just the src.
- *      (5) If the pixa is empty, returns an empty 1 bpp pix.
- * 
- */ -PIX * -pixaDisplay(PIXA *pixa, - l_int32 w, - l_int32 h) -{ -l_int32 i, n, d, xb, yb, wb, hb, res; -BOXA *boxa; -PIX *pix1, *pixd; - - PROCNAME("pixaDisplay"); - - if (!pixa) - return (PIX *)ERROR_PTR("pixa not defined", procName, NULL); - - n = pixaGetCount(pixa); - if (n == 0 && w == 0 && h == 0) - return (PIX *)ERROR_PTR("no components; no size", procName, NULL); - if (n == 0) { - L_WARNING("no components; returning empty 1 bpp pix\n", procName); - return pixCreate(w, h, 1); - } - - /* If w and h not input, determine the minimum size required - * to contain the origin and all c.c. */ - if (w == 0 || h == 0) { - boxa = pixaGetBoxa(pixa, L_CLONE); - boxaGetExtent(boxa, &w, &h, NULL); - boxaDestroy(&boxa); - if (w == 0 || h == 0) - return (PIX *)ERROR_PTR("no associated boxa", procName, NULL); - } - - /* Use the first pix in pixa to determine depth and resolution */ - pix1 = pixaGetPix(pixa, 0, L_CLONE); - d = pixGetDepth(pix1); - res = pixGetXRes(pix1); - pixDestroy(&pix1); - - if ((pixd = pixCreate(w, h, d)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixSetResolution(pixd, res, res); - if (d > 1) - pixSetAll(pixd); - for (i = 0; i < n; i++) { - if (pixaGetBoxGeometry(pixa, i, &xb, &yb, &wb, &hb)) { - L_WARNING("no box found!\n", procName); - continue; - } - pix1 = pixaGetPix(pixa, i, L_CLONE); - if (d == 1) - pixRasterop(pixd, xb, yb, wb, hb, PIX_PAINT, pix1, 0, 0); - else - pixRasterop(pixd, xb, yb, wb, hb, PIX_SRC, pix1, 0, 0); - pixDestroy(&pix1); - } - - return pixd; -} - - -/*! - * \brief pixaDisplayRandomCmap() - * - * \param[in] pixa 1 bpp regions, with boxa delineating those regions - * \param[in] w, h if set to 0, the size is determined from the - * bounding box of the components in pixa - * \return pix 8 bpp, cmapped, with random colors assigned to each region, - * or NULL on error. - * - *
- * Notes:
- *      (1) This uses the boxes to place each pix in the rendered composite.
- *          The fg of each pix in %pixa, such as a single connected
- *          component or a line of text, is given a random color.
- *      (2) By default, the background color is black (cmap index 0).
- *          This can be changed by pixcmapResetColor()
- * 
- */ -PIX * -pixaDisplayRandomCmap(PIXA *pixa, - l_int32 w, - l_int32 h) -{ -l_int32 i, n, same, maxd, index, xb, yb, wb, hb, res; -BOXA *boxa; -PIX *pixs, *pix1, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixaDisplayRandomCmap"); - - if (!pixa) - return (PIX *)ERROR_PTR("pixa not defined", procName, NULL); - - if ((n = pixaGetCount(pixa)) == 0) - return (PIX *)ERROR_PTR("no components", procName, NULL); - pixaVerifyDepth(pixa, &same, &maxd); - if (maxd > 1) - return (PIX *)ERROR_PTR("not all components are 1 bpp", procName, NULL); - - /* If w and h are not input, determine the minimum size required - * to contain the origin and all c.c. */ - if (w == 0 || h == 0) { - boxa = pixaGetBoxa(pixa, L_CLONE); - boxaGetExtent(boxa, &w, &h, NULL); - boxaDestroy(&boxa); - } - - /* Set up an 8 bpp dest pix, with a colormap with 254 random colors */ - if ((pixd = pixCreate(w, h, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - cmap = pixcmapCreateRandom(8, 1, 1); - pixSetColormap(pixd, cmap); - - /* Color each component and blit it in */ - for (i = 0; i < n; i++) { - index = 1 + (i % 254); - pixaGetBoxGeometry(pixa, i, &xb, &yb, &wb, &hb); - pixs = pixaGetPix(pixa, i, L_CLONE); - if (i == 0) res = pixGetXRes(pixs); - pix1 = pixConvert1To8(NULL, pixs, 0, index); - pixRasterop(pixd, xb, yb, wb, hb, PIX_PAINT, pix1, 0, 0); - pixDestroy(&pixs); - pixDestroy(&pix1); - } - - pixSetResolution(pixd, res, res); - return pixd; -} - - -/*! - * \brief pixaDisplayLinearly() - * - * \param[in] pixas - * \param[in] direction L_HORIZ or L_VERT - * \param[in] scalefactor applied to every pix; use 1.0 for no scaling - * \param[in] background 0 for white, 1 for black; this is the color - * of the spacing between the images - * \param[in] spacing between images, and on outside - * \param[in] border width of black border added to each image; - * use 0 for no border - * \param[out] pboxa [optional] location of images in output pix - * \return pix of composite images, or NULL on error - * - *
- * Notes:
- *      (1) This puts each pix, sequentially, in a line, either horizontally
- *          or vertically.
- *      (2) If any pix has a colormap, all pix are rendered in rgb.
- *      (3) The boxa gives the location of each image.
- * 
- */ -PIX * -pixaDisplayLinearly(PIXA *pixas, - l_int32 direction, - l_float32 scalefactor, - l_int32 background, /* not used */ - l_int32 spacing, - l_int32 border, - BOXA **pboxa) -{ -l_int32 i, n, x, y, w, h, size, depth, bordval; -BOX *box; -PIX *pix1, *pix2, *pix3, *pixd; -PIXA *pixa1, *pixa2; - - PROCNAME("pixaDisplayLinearly"); - - if (pboxa) *pboxa = NULL; - if (!pixas) - return (PIX *)ERROR_PTR("pixas not defined", procName, NULL); - if (direction != L_HORIZ && direction != L_VERT) - return (PIX *)ERROR_PTR("invalid direction", procName, NULL); - - /* Make sure all pix are at the same depth */ - pixa1 = pixaConvertToSameDepth(pixas); - pixaGetDepthInfo(pixa1, &depth, NULL); - - /* Scale and add border if requested */ - n = pixaGetCount(pixa1); - pixa2 = pixaCreate(n); - bordval = (depth == 1) ? 1 : 0; - size = (n - 1) * spacing; - x = y = 0; - for (i = 0; i < n; i++) { - if ((pix1 = pixaGetPix(pixa1, i, L_CLONE)) == NULL) { - L_WARNING("missing pix at index %d\n", procName, i); - continue; - } - - if (scalefactor != 1.0) - pix2 = pixScale(pix1, scalefactor, scalefactor); - else - pix2 = pixClone(pix1); - if (border) - pix3 = pixAddBorder(pix2, border, bordval); - else - pix3 = pixClone(pix2); - - pixGetDimensions(pix3, &w, &h, NULL); - box = boxCreate(x, y, w, h); - if (direction == L_HORIZ) { - size += w; - x += w + spacing; - } else { /* vertical */ - size += h; - y += h + spacing; - } - pixaAddPix(pixa2, pix3, L_INSERT); - pixaAddBox(pixa2, box, L_INSERT); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - pixd = pixaDisplay(pixa2, 0, 0); - - if (pboxa) - *pboxa = pixaGetBoxa(pixa2, L_COPY); - pixaDestroy(&pixa1); - pixaDestroy(&pixa2); - return pixd; -} - - -/*! - * \brief pixaDisplayOnLattice() - * - * \param[in] pixa - * \param[in] cellw lattice cell width - * \param[in] cellh lattice cell height - * \param[out] pncols [optional] number of columns in output lattice - * \param[out] pboxa [optional] location of images in lattice - * \return pix of composite images, or NULL on error - * - *
- * Notes:
- *      (1) This places each pix on sequentially on a regular lattice
- *          in the rendered composite.  If a pix is too large to fit in the
- *          allocated lattice space, it is not rendered.
- *      (2) If any pix has a colormap, all pix are rendered in rgb.
- *      (3) This is useful when putting bitmaps of components,
- *          such as characters, into a single image.
- *      (4) Save the number of tiled images in the text field of the pix,
- *          in the format: n = %d.  This survives write/read into png files,
- *          for example.
- *      (5) The boxa gives the location of each image.  The UL corner
- *          of each image is on a lattice cell corner.  Omitted images
- *          (due to size) are assigned an invalid width and height of 0.
- * 
- */ -PIX * -pixaDisplayOnLattice(PIXA *pixa, - l_int32 cellw, - l_int32 cellh, - l_int32 *pncols, - BOXA **pboxa) -{ -char buf[16]; -l_int32 n, nw, nh, w, h, d, wt, ht, res, samedepth; -l_int32 index, i, j, hascmap; -BOX *box; -BOXA *boxa; -PIX *pix1, *pix2, *pixd; -PIXA *pixa1; - - PROCNAME("pixaDisplayOnLattice"); - - if (pncols) *pncols = 0; - if (pboxa) *pboxa = NULL; - if (!pixa) - return (PIX *)ERROR_PTR("pixa not defined", procName, NULL); - - /* If any pix have colormaps, or if the depths differ, generate rgb */ - if ((n = pixaGetCount(pixa)) == 0) - return (PIX *)ERROR_PTR("no components", procName, NULL); - pixaAnyColormaps(pixa, &hascmap); - pixaVerifyDepth(pixa, &samedepth, NULL); - if (hascmap || !samedepth) { - pixa1 = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixa, i, L_CLONE); - pix2 = pixConvertTo32(pix1); - pixaAddPix(pixa1, pix2, L_INSERT); - pixDestroy(&pix1); - } - } else { - pixa1 = pixaCopy(pixa, L_CLONE); - } - - /* Have number of rows and columns approximately equal */ - nw = (l_int32)sqrt((l_float64)n); - nh = (n + nw - 1) / nw; - w = cellw * nw; - h = cellh * nh; - - /* Use the first pix to determine output depth and resolution */ - pix1 = pixaGetPix(pixa1, 0, L_CLONE); - d = pixGetDepth(pix1); - res = pixGetXRes(pix1); - pixDestroy(&pix1); - if ((pixd = pixCreate(w, h, d)) == NULL) { - pixaDestroy(&pixa1); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixSetBlackOrWhite(pixd, L_SET_WHITE); - pixSetResolution(pixd, res, res); - boxa = boxaCreate(n); - - /* Tile the output */ - index = 0; - for (i = 0; i < nh; i++) { - for (j = 0; j < nw && index < n; j++, index++) { - pix1 = pixaGetPix(pixa1, index, L_CLONE); - pixGetDimensions(pix1, &wt, &ht, NULL); - if (wt > cellw || ht > cellh) { - L_INFO("pix(%d) omitted; size %dx%x\n", procName, index, - wt, ht); - box = boxCreate(0, 0, 0, 0); - boxaAddBox(boxa, box, L_INSERT); - pixDestroy(&pix1); - continue; - } - pixRasterop(pixd, j * cellw, i * cellh, wt, ht, - PIX_SRC, pix1, 0, 0); - box = boxCreate(j * cellw, i * cellh, wt, ht); - boxaAddBox(boxa, box, L_INSERT); - pixDestroy(&pix1); - } - } - - /* Save the number of tiles in the text field */ - snprintf(buf, sizeof(buf), "n = %d", boxaGetCount(boxa)); - pixSetText(pixd, buf); - - if (pncols) *pncols = nw; - if (pboxa) - *pboxa = boxa; - else - boxaDestroy(&boxa); - pixaDestroy(&pixa1); - return pixd; -} - - -/*! - * \brief pixaDisplayUnsplit() - * - * \param[in] pixa - * \param[in] nx number of mosaic cells horizontally - * \param[in] ny number of mosaic cells vertically - * \param[in] borderwidth of added border on all sides - * \param[in] bordercolor in our RGBA format: 0xrrggbbaa - * \return pix of tiled images, or NULL on error - * - *
- * Notes:
- *      (1) This is a logical inverse of pixaSplitPix().  It
- *          constructs a pix from a mosaic of tiles, all of equal size.
- *      (2) For added generality, a border of arbitrary color can
- *          be added to each of the tiles.
- *      (3) In use, pixa will typically have either been generated
- *          from pixaSplitPix() or will derived from a pixa that
- *          was so generated.
- *      (4) All pix in the pixa must be of equal depth, and, if
- *          colormapped, have the same colormap.
- * 
- */ -PIX * -pixaDisplayUnsplit(PIXA *pixa, - l_int32 nx, - l_int32 ny, - l_int32 borderwidth, - l_uint32 bordercolor) -{ -l_int32 w, h, d, wt, ht; -l_int32 i, j, k, x, y, n; -PIX *pix1, *pixd; - - PROCNAME("pixaDisplayUnsplit"); - - if (!pixa) - return (PIX *)ERROR_PTR("pixa not defined", procName, NULL); - if (nx <= 0 || ny <= 0) - return (PIX *)ERROR_PTR("nx and ny must be > 0", procName, NULL); - if ((n = pixaGetCount(pixa)) == 0) - return (PIX *)ERROR_PTR("no components", procName, NULL); - if (n != nx * ny) - return (PIX *)ERROR_PTR("n != nx * ny", procName, NULL); - borderwidth = L_MAX(0, borderwidth); - - pixaGetPixDimensions(pixa, 0, &wt, &ht, &d); - w = nx * (wt + 2 * borderwidth); - h = ny * (ht + 2 * borderwidth); - - if ((pixd = pixCreate(w, h, d)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pix1 = pixaGetPix(pixa, 0, L_CLONE); - pixCopyColormap(pixd, pix1); - pixDestroy(&pix1); - if (borderwidth > 0) - pixSetAllArbitrary(pixd, bordercolor); - - y = borderwidth; - for (i = 0, k = 0; i < ny; i++) { - x = borderwidth; - for (j = 0; j < nx; j++, k++) { - pix1 = pixaGetPix(pixa, k, L_CLONE); - pixRasterop(pixd, x, y, wt, ht, PIX_SRC, pix1, 0, 0); - pixDestroy(&pix1); - x += wt + 2 * borderwidth; - } - y += ht + 2 * borderwidth; - } - - return pixd; -} - - -/*! - * \brief pixaDisplayTiled() - * - * \param[in] pixa - * \param[in] maxwidth of output image - * \param[in] background 0 for white, 1 for black - * \param[in] spacing - * \return pix of tiled images, or NULL on error - * - *
- * Notes:
- *      (1) This renders a pixa to a single image of width not to
- *          exceed maxwidth, with background color either white or black,
- *          and with each subimage spaced on a regular lattice.
- *      (2) The lattice size is determined from the largest width and height,
- *          separately, of all pix in the pixa.
- *      (3) All pix in the pixa must be of equal depth.
- *      (4) If any pix has a colormap, all pix are rendered in rgb.
- *      (5) Careful: because no components are omitted, this is
- *          dangerous if there are thousands of small components and
- *          one or more very large one, because the size of the
- *          resulting pix can be huge!
- * 
- */ -PIX * -pixaDisplayTiled(PIXA *pixa, - l_int32 maxwidth, - l_int32 background, - l_int32 spacing) -{ -l_int32 wmax, hmax, wd, hd, d, hascmap, res, same; -l_int32 i, j, n, ni, ncols, nrows; -l_int32 ystart, xstart, wt, ht; -PIX *pix1, *pix2, *pixd; -PIXA *pixa1; - - PROCNAME("pixaDisplayTiled"); - - if (!pixa) - return (PIX *)ERROR_PTR("pixa not defined", procName, NULL); - - /* If any pix have colormaps, generate rgb */ - if ((n = pixaGetCount(pixa)) == 0) - return (PIX *)ERROR_PTR("no components", procName, NULL); - pixaAnyColormaps(pixa, &hascmap); - if (hascmap) { - pixa1 = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixa, i, L_CLONE); - pix2 = pixConvertTo32(pix1); - pixaAddPix(pixa1, pix2, L_INSERT); - pixDestroy(&pix1); - } - } else { - pixa1 = pixaCopy(pixa, L_CLONE); - } - - /* Find the max dimensions and depth subimages */ - pixaGetDepthInfo(pixa1, &d, &same); - if (!same) { - pixaDestroy(&pixa1); - return (PIX *)ERROR_PTR("depths not equal", procName, NULL); - } - pixaSizeRange(pixa1, NULL, NULL, &wmax, &hmax); - - /* Get the number of rows and columns and the output image size */ - spacing = L_MAX(spacing, 0); - ncols = (l_int32)((l_float32)(maxwidth - spacing) / - (l_float32)(wmax + spacing)); - ncols = L_MAX(ncols, 1); - nrows = (n + ncols - 1) / ncols; - wd = wmax * ncols + spacing * (ncols + 1); - hd = hmax * nrows + spacing * (nrows + 1); - if ((pixd = pixCreate(wd, hd, d)) == NULL) { - pixaDestroy(&pixa1); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - - /* Reset the background color if necessary */ - if ((background == 1 && d == 1) || (background == 0 && d != 1)) - pixSetAll(pixd); - - /* Blit the images to the dest */ - for (i = 0, ni = 0; i < nrows; i++) { - ystart = spacing + i * (hmax + spacing); - for (j = 0; j < ncols && ni < n; j++, ni++) { - xstart = spacing + j * (wmax + spacing); - pix1 = pixaGetPix(pixa1, ni, L_CLONE); - if (ni == 0) res = pixGetXRes(pix1); - pixGetDimensions(pix1, &wt, &ht, NULL); - pixRasterop(pixd, xstart, ystart, wt, ht, PIX_SRC, pix1, 0, 0); - pixDestroy(&pix1); - } - } - pixSetResolution(pixd, res, res); - - pixaDestroy(&pixa1); - return pixd; -} - - -/*! - * \brief pixaDisplayTiledInRows() - * - * \param[in] pixa - * \param[in] outdepth output depth: 1, 8 or 32 bpp - * \param[in] maxwidth of output image - * \param[in] scalefactor applied to every pix; use 1.0 for no scaling - * \param[in] background 0 for white, 1 for black; this is the color - * of the spacing between the images - * \param[in] spacing between images, and on outside - * \param[in] border width of black border added to each image; - * use 0 for no border - * \return pixd of tiled images, or NULL on error - * - *
- * Notes:
- *      (1) This renders a pixa to a single image of width not to
- *          exceed maxwidth, with background color either white or black,
- *          and with each row tiled such that the top of each pix is
- *          aligned and separated by 'spacing' from the next one.
- *          A black border can be added to each pix.
- *      (2) All pix are converted to outdepth; existing colormaps are removed.
- *      (3) This does a reasonably spacewise-efficient job of laying
- *          out the individual pix images into a tiled composite.
- *      (4) A serialized boxa giving the location in pixd of each input
- *          pix (without added border) is stored in the text string of pixd.
- *          This allows, e.g., regeneration of a pixa from pixd, using
- *          pixaCreateFromBoxa().  If there is no scaling and the depth of
- *          each input pix in the pixa is the same, this tiling operation
- *          can be inverted using the boxa (except for loss of text in
- *          each of the input pix):
- *            pix1 = pixaDisplayTiledInRows(pixa1, 1, 1500, 1.0, 0, 30, 0);
- *            char *boxatxt = pixGetText(pix1);
- *            boxa1 = boxaReadMem((l_uint8 *)boxatxt, strlen(boxatxt));
- *            pixa2 = pixaCreateFromBoxa(pix1, boxa1, 0, 0, NULL);
- * 
- */ -PIX * -pixaDisplayTiledInRows(PIXA *pixa, - l_int32 outdepth, - l_int32 maxwidth, - l_float32 scalefactor, - l_int32 background, - l_int32 spacing, - l_int32 border) -{ -l_int32 h; /* cumulative height over all the rows */ -l_int32 w; /* cumulative height in the current row */ -l_int32 bordval, wtry, wt, ht; -l_int32 irow; /* index of current pix in current row */ -l_int32 wmaxrow; /* width of the largest row */ -l_int32 maxh; /* max height in row */ -l_int32 i, j, index, n, x, y, nrows, ninrow, res; -size_t size; -l_uint8 *data; -BOXA *boxa; -NUMA *nainrow; /* number of pix in the row */ -NUMA *namaxh; /* height of max pix in the row */ -PIX *pix, *pixn, *pix1, *pixd; -PIXA *pixan; - - PROCNAME("pixaDisplayTiledInRows"); - - if (!pixa) - return (PIX *)ERROR_PTR("pixa not defined", procName, NULL); - if (outdepth != 1 && outdepth != 8 && outdepth != 32) - return (PIX *)ERROR_PTR("outdepth not in {1, 8, 32}", procName, NULL); - if (border < 0) - border = 0; - if (scalefactor <= 0.0) scalefactor = 1.0; - - if ((n = pixaGetCount(pixa)) == 0) - return (PIX *)ERROR_PTR("no components", procName, NULL); - - /* Normalize depths, scale, remove colormaps; optionally add border */ - pixan = pixaCreate(n); - bordval = (outdepth == 1) ? 1 : 0; - for (i = 0; i < n; i++) { - if ((pix = pixaGetPix(pixa, i, L_CLONE)) == NULL) - continue; - - if (outdepth == 1) - pixn = pixConvertTo1(pix, 128); - else if (outdepth == 8) - pixn = pixConvertTo8(pix, FALSE); - else /* outdepth == 32 */ - pixn = pixConvertTo32(pix); - pixDestroy(&pix); - - if (scalefactor != 1.0) - pix1 = pixScale(pixn, scalefactor, scalefactor); - else - pix1 = pixClone(pixn); - if (border) - pixd = pixAddBorder(pix1, border, bordval); - else - pixd = pixClone(pix1); - pixDestroy(&pixn); - pixDestroy(&pix1); - - pixaAddPix(pixan, pixd, L_INSERT); - } - if (pixaGetCount(pixan) != n) { - n = pixaGetCount(pixan); - L_WARNING("only got %d components\n", procName, n); - if (n == 0) { - pixaDestroy(&pixan); - return (PIX *)ERROR_PTR("no components", procName, NULL); - } - } - - /* Compute parameters for layout */ - nainrow = numaCreate(0); - namaxh = numaCreate(0); - wmaxrow = 0; - w = h = spacing; - maxh = 0; /* max height in row */ - for (i = 0, irow = 0; i < n; i++, irow++) { - pixaGetPixDimensions(pixan, i, &wt, &ht, NULL); - wtry = w + wt + spacing; - if (wtry > maxwidth) { /* end the current row and start next one */ - numaAddNumber(nainrow, irow); - numaAddNumber(namaxh, maxh); - wmaxrow = L_MAX(wmaxrow, w); - h += maxh + spacing; - irow = 0; - w = wt + 2 * spacing; - maxh = ht; - } else { - w = wtry; - maxh = L_MAX(maxh, ht); - } - } - - /* Enter the parameters for the last row */ - numaAddNumber(nainrow, irow); - numaAddNumber(namaxh, maxh); - wmaxrow = L_MAX(wmaxrow, w); - h += maxh + spacing; - - if ((pixd = pixCreate(wmaxrow, h, outdepth)) == NULL) { - numaDestroy(&nainrow); - numaDestroy(&namaxh); - pixaDestroy(&pixan); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - - /* Reset the background color if necessary */ - if ((background == 1 && outdepth == 1) || - (background == 0 && outdepth != 1)) - pixSetAll(pixd); - - /* Blit the images to the dest, and save the boxa identifying - * the image regions that do not include the borders. */ - nrows = numaGetCount(nainrow); - y = spacing; - boxa = boxaCreate(n); - for (i = 0, index = 0; i < nrows; i++) { /* over rows */ - numaGetIValue(nainrow, i, &ninrow); - numaGetIValue(namaxh, i, &maxh); - x = spacing; - for (j = 0; j < ninrow; j++, index++) { /* over pix in row */ - pix = pixaGetPix(pixan, index, L_CLONE); - if (index == 0) { - res = pixGetXRes(pix); - pixSetResolution(pixd, res, res); - } - pixGetDimensions(pix, &wt, &ht, NULL); - boxaAddBox(boxa, boxCreate(x + border, y + border, - wt - 2 * border, ht - 2 *border), L_INSERT); - pixRasterop(pixd, x, y, wt, ht, PIX_SRC, pix, 0, 0); - pixDestroy(&pix); - x += wt + spacing; - } - y += maxh + spacing; - } - boxaWriteMem(&data, &size, boxa); - pixSetText(pixd, (char *)data); /* data is ascii */ - LEPT_FREE(data); - boxaDestroy(&boxa); - - numaDestroy(&nainrow); - numaDestroy(&namaxh); - pixaDestroy(&pixan); - return pixd; -} - - -/*! - * \brief pixaDisplayTiledInColumns() - * - * \param[in] pixas - * \param[in] nx number of columns in output image - * \param[in] scalefactor applied to every pix; use 1.0 for no scaling - * \param[in] spacing between images, and on outside - * \param[in] border width of black border added to each image; - * use 0 for no border - * \return pixd of tiled images, or NULL on error - * - *
- * Notes:
- *      (1) This renders a pixa to a single image with &nx columns of
- *          subimages.  The background color is white, and each row
- *          is tiled such that the top of each pix is aligned and
- *          each pix is separated by 'spacing' from the next one.
- *          A black border can be added to each pix.
- *      (2) The output depth is determined by the largest depth
- *          required by the pix in the pixa.  Colormaps are removed.
- *      (3) A serialized boxa giving the location in pixd of each input
- *          pix (without added border) is stored in the text string of pixd.
- *          This allows, e.g., regeneration of a pixa from pixd, using
- *          pixaCreateFromBoxa().  If there is no scaling and the depth of
- *          each input pix in the pixa is the same, this tiling operation
- *          can be inverted using the boxa (except for loss of text in
- *          each of the input pix):
- *            pix1 = pixaDisplayTiledInColumns(pixa1, 3, 1.0, 0, 30, 2);
- *            char *boxatxt = pixGetText(pix1);
- *            boxa1 = boxaReadMem((l_uint8 *)boxatxt, strlen(boxatxt));
- *            pixa2 = pixaCreateFromBoxa(pix1, boxa1, NULL);
- * 
- */ -PIX * -pixaDisplayTiledInColumns(PIXA *pixas, - l_int32 nx, - l_float32 scalefactor, - l_int32 spacing, - l_int32 border) -{ -l_int32 i, j, index, n, x, y, nrows, wb, hb, w, h, maxd, maxh, bordval, res; -size_t size; -l_uint8 *data; -BOX *box; -BOXA *boxa; -PIX *pix1, *pix2, *pix3, *pixd; -PIXA *pixa1, *pixa2; - - PROCNAME("pixaDisplayTiledInColumns"); - - if (!pixas) - return (PIX *)ERROR_PTR("pixas not defined", procName, NULL); - if (border < 0) - border = 0; - if (scalefactor <= 0.0) scalefactor = 1.0; - - if ((n = pixaGetCount(pixas)) == 0) - return (PIX *)ERROR_PTR("no components", procName, NULL); - - /* Convert to same depth, if necessary */ - pixa1 = pixaConvertToSameDepth(pixas); - pixaGetDepthInfo(pixa1, &maxd, NULL); - - /* Scale and optionally add border */ - pixa2 = pixaCreate(n); - bordval = (maxd == 1) ? 1 : 0; - for (i = 0; i < n; i++) { - if ((pix1 = pixaGetPix(pixa1, i, L_CLONE)) == NULL) - continue; - if (scalefactor != 1.0) - pix2 = pixScale(pix1, scalefactor, scalefactor); - else - pix2 = pixClone(pix1); - if (border) - pix3 = pixAddBorder(pix2, border, bordval); - else - pix3 = pixClone(pix2); - if (i == 0) res = pixGetXRes(pix3); - pixaAddPix(pixa2, pix3, L_INSERT); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - pixaDestroy(&pixa1); - if (pixaGetCount(pixa2) != n) { - n = pixaGetCount(pixa2); - L_WARNING("only got %d components\n", procName, n); - if (n == 0) { - pixaDestroy(&pixa2); - return (PIX *)ERROR_PTR("no components", procName, NULL); - } - } - - /* Compute layout parameters and save as a boxa */ - boxa = boxaCreate(n); - nrows = (n + nx - 1) / nx; - y = spacing; - for (i = 0, index = 0; i < nrows; i++) { - x = spacing; - maxh = 0; - for (j = 0; j < nx && index < n; j++) { - pixaGetPixDimensions(pixa2, index, &wb, &hb, NULL); - box = boxCreate(x, y, wb, hb); - boxaAddBox(boxa, box, L_INSERT); - maxh = L_MAX(maxh, hb + spacing); - x += wb + spacing; - index++; - } - y += maxh; - } - pixaSetBoxa(pixa2, boxa, L_INSERT); - - /* Render the output pix */ - boxaGetExtent(boxa, &w, &h, NULL); - pixd = pixaDisplay(pixa2, w + spacing, h + spacing); - pixSetResolution(pixd, res, res); - - /* Save the boxa in the text field of the output pix */ - boxaWriteMem(&data, &size, boxa); - pixSetText(pixd, (char *)data); /* data is ascii */ - LEPT_FREE(data); - - pixaDestroy(&pixa2); - return pixd; -} - - -/*! - * \brief pixaDisplayTiledAndScaled() - * - * \param[in] pixa - * \param[in] outdepth output depth: 1, 8 or 32 bpp - * \param[in] tilewidth each pix is scaled to this width - * \param[in] ncols number of tiles in each row - * \param[in] background 0 for white, 1 for black; this is the color - * of the spacing between the images - * \param[in] spacing between images, and on outside - * \param[in] border width of additional black border on each image; - * use 0 for no border - * \return pix of tiled images, or NULL on error - * - *
- * Notes:
- *      (1) This can be used to tile a number of renderings of
- *          an image that are at different scales and depths.
- *      (2) Each image, after scaling and optionally adding the
- *          black border, has width 'tilewidth'.  Thus, the border does
- *          not affect the spacing between the image tiles.  The
- *          maximum allowed border width is tilewidth / 5.
- * 
- */ -PIX * -pixaDisplayTiledAndScaled(PIXA *pixa, - l_int32 outdepth, - l_int32 tilewidth, - l_int32 ncols, - l_int32 background, - l_int32 spacing, - l_int32 border) -{ -l_int32 x, y, w, h, wd, hd, d, res; -l_int32 i, n, nrows, maxht, ninrow, irow, bordval; -l_int32 *rowht; -l_float32 scalefact; -PIX *pix, *pixn, *pix1, *pixb, *pixd; -PIXA *pixan; - - PROCNAME("pixaDisplayTiledAndScaled"); - - if (!pixa) - return (PIX *)ERROR_PTR("pixa not defined", procName, NULL); - if (outdepth != 1 && outdepth != 8 && outdepth != 32) - return (PIX *)ERROR_PTR("outdepth not in {1, 8, 32}", procName, NULL); - if (ncols <= 0) - return (PIX *)ERROR_PTR("ncols must be > 0", procName, NULL); - if (border < 0 || border > tilewidth / 5) - border = 0; - - if ((n = pixaGetCount(pixa)) == 0) - return (PIX *)ERROR_PTR("no components", procName, NULL); - - /* Normalize scale and depth for each pix; optionally add border */ - pixan = pixaCreate(n); - bordval = (outdepth == 1) ? 1 : 0; - for (i = 0; i < n; i++) { - if ((pix = pixaGetPix(pixa, i, L_CLONE)) == NULL) - continue; - - pixGetDimensions(pix, &w, &h, &d); - scalefact = (l_float32)(tilewidth - 2 * border) / (l_float32)w; - if (d == 1 && outdepth > 1 && scalefact < 1.0) - pix1 = pixScaleToGray(pix, scalefact); - else - pix1 = pixScale(pix, scalefact, scalefact); - - if (outdepth == 1) - pixn = pixConvertTo1(pix1, 128); - else if (outdepth == 8) - pixn = pixConvertTo8(pix1, FALSE); - else /* outdepth == 32 */ - pixn = pixConvertTo32(pix1); - pixDestroy(&pix1); - - if (border) - pixb = pixAddBorder(pixn, border, bordval); - else - pixb = pixClone(pixn); - - pixaAddPix(pixan, pixb, L_INSERT); - pixDestroy(&pix); - pixDestroy(&pixn); - } - if ((n = pixaGetCount(pixan)) == 0) { /* should not have changed! */ - pixaDestroy(&pixan); - return (PIX *)ERROR_PTR("no components", procName, NULL); - } - - /* Determine the size of each row and of pixd */ - wd = tilewidth * ncols + spacing * (ncols + 1); - nrows = (n + ncols - 1) / ncols; - if ((rowht = (l_int32 *)LEPT_CALLOC(nrows, sizeof(l_int32))) == NULL) { - pixaDestroy(&pixan); - return (PIX *)ERROR_PTR("rowht array not made", procName, NULL); - } - maxht = 0; - ninrow = 0; - irow = 0; - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixan, i, L_CLONE); - ninrow++; - pixGetDimensions(pix, &w, &h, NULL); - maxht = L_MAX(h, maxht); - if (ninrow == ncols) { - rowht[irow] = maxht; - maxht = ninrow = 0; /* reset */ - irow++; - } - pixDestroy(&pix); - } - if (ninrow > 0) { /* last fencepost */ - rowht[irow] = maxht; - irow++; /* total number of rows */ - } - nrows = irow; - hd = spacing * (nrows + 1); - for (i = 0; i < nrows; i++) - hd += rowht[i]; - - pixd = pixCreate(wd, hd, outdepth); - if ((background == 1 && outdepth == 1) || - (background == 0 && outdepth != 1)) - pixSetAll(pixd); - - /* Now blit images to pixd */ - x = y = spacing; - irow = 0; - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixan, i, L_CLONE); - if (i == 0) { - res = pixGetXRes(pix); - pixSetResolution(pixd, res, res); - } - pixGetDimensions(pix, &w, &h, NULL); - if (i && ((i % ncols) == 0)) { /* start new row */ - x = spacing; - y += spacing + rowht[irow]; - irow++; - } - pixRasterop(pixd, x, y, w, h, PIX_SRC, pix, 0, 0); - x += tilewidth + spacing; - pixDestroy(&pix); - } - - pixaDestroy(&pixan); - LEPT_FREE(rowht); - return pixd; -} - - -/*! - * \brief pixaDisplayTiledWithText() - * - * \param[in] pixa - * \param[in] maxwidth of output image - * \param[in] scalefactor applied to every pix; use 1.0 for no scaling - * \param[in] spacing between images, and on outside - * \param[in] border width of black border added to each image; - * use 0 for no border - * \param[in] fontsize 4, 6, ... 20 - * \param[in] textcolor 0xrrggbb00 - * \return pixd of tiled images, or NULL on error - * - *
- * Notes:
- *      (1) This is a version of pixaDisplayTiledInRows() that prints, below
- *          each pix, the text in the pix text field.  Up to 127 chars
- *          of text in the pix text field are rendered below each pix.
- *      (2) It renders a pixa to a single image of width not to
- *          exceed %maxwidth, with white background color, with each row
- *          tiled such that the top of each pix is aligned and separated
- *          by %spacing from the next one.
- *      (3) All pix are converted to 32 bpp.
- *      (4) This does a reasonably spacewise-efficient job of laying
- *          out the individual pix images into a tiled composite.
- * 
- */ -PIX * -pixaDisplayTiledWithText(PIXA *pixa, - l_int32 maxwidth, - l_float32 scalefactor, - l_int32 spacing, - l_int32 border, - l_int32 fontsize, - l_uint32 textcolor) -{ -char buf[128]; -char *textstr; -l_int32 i, n, maxw; -L_BMF *bmf; -PIX *pix1, *pix2, *pix3, *pix4, *pixd; -PIXA *pixad; - - PROCNAME("pixaDisplayTiledWithText"); - - if (!pixa) - return (PIX *)ERROR_PTR("pixa not defined", procName, NULL); - if ((n = pixaGetCount(pixa)) == 0) - return (PIX *)ERROR_PTR("no components", procName, NULL); - if (maxwidth <= 0) - return (PIX *)ERROR_PTR("invalid maxwidth", procName, NULL); - if (border < 0) - border = 0; - if (scalefactor <= 0.0) { - L_WARNING("invalid scalefactor; setting to 1.0\n", procName); - scalefactor = 1.0; - } - if (fontsize < 4 || fontsize > 20 || (fontsize & 1)) { - l_int32 fsize = L_MAX(L_MIN(fontsize, 20), 4); - if (fsize & 1) fsize--; - L_WARNING("changed fontsize from %d to %d\n", procName, - fontsize, fsize); - fontsize = fsize; - } - - /* Be sure the width can accommodate a single column of images */ - pixaSizeRange(pixa, NULL, NULL, &maxw, NULL); - maxwidth = L_MAX(maxwidth, scalefactor * (maxw + 2 * spacing + 2 * border)); - - bmf = bmfCreate(NULL, fontsize); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixa, i, L_CLONE); - pix2 = pixConvertTo32(pix1); - pix3 = pixAddBorderGeneral(pix2, spacing, spacing, spacing, - spacing, 0xffffff00); - textstr = pixGetText(pix1); - if (textstr && strlen(textstr) > 0) { - snprintf(buf, sizeof(buf), "%s", textstr); - pix4 = pixAddSingleTextblock(pix3, bmf, buf, textcolor, - L_ADD_BELOW, NULL); - } else { - pix4 = pixClone(pix3); - } - pixaAddPix(pixad, pix4, L_INSERT); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - } - bmfDestroy(&bmf); - - pixd = pixaDisplayTiledInRows(pixad, 32, maxwidth, scalefactor, - 0, 10, border); - pixaDestroy(&pixad); - return pixd; -} - - -/*! - * \brief pixaDisplayTiledByIndex() - * - * \param[in] pixa - * \param[in] na numa with indices corresponding to the pix in pixa - * \param[in] width each pix is scaled to this width - * \param[in] spacing between images, and on outside - * \param[in] border width of black border added to each image; - * use 0 for no border - * \param[in] fontsize 4, 6, ... 20 - * \param[in] textcolor 0xrrggbb00 - * \return pixd of tiled images, or NULL on error - * - *
- * Notes:
- *      (1) This renders a pixa to a single image with white
- *          background color, where the pix are placed in columns
- *          given by the index value in the numa.  Each pix
- *          is separated by %spacing from the adjacent ones, and
- *          an optional border is placed around them.
- *      (2) Up to 127 chars of text in the pix text field are rendered
- *          below each pix.  Use newlines in the text field to write
- *          the text in multiple lines that fit within the pix width.
- *      (3) To avoid having empty columns, if there are N different
- *          index values, they should be in [0 ... N-1].
- *      (4) All pix are converted to 32 bpp.
- * 
- */ -PIX * -pixaDisplayTiledByIndex(PIXA *pixa, - NUMA *na, - l_int32 width, - l_int32 spacing, - l_int32 border, - l_int32 fontsize, - l_uint32 textcolor) -{ -char buf[128]; -char *textstr; -l_int32 i, n, x, y, w, h, yval, index; -l_float32 maxindex; -L_BMF *bmf; -BOX *box; -NUMA *nay; /* top of the next pix to add in that column */ -PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pixd; -PIXA *pixad; - - PROCNAME("pixaDisplayTiledByIndex"); - - if (!pixa) - return (PIX *)ERROR_PTR("pixa not defined", procName, NULL); - if (!na) - return (PIX *)ERROR_PTR("na not defined", procName, NULL); - if ((n = pixaGetCount(pixa)) == 0) - return (PIX *)ERROR_PTR("no pixa components", procName, NULL); - if (n != numaGetCount(na)) - return (PIX *)ERROR_PTR("pixa and na counts differ", procName, NULL); - if (width <= 0) - return (PIX *)ERROR_PTR("invalid width", procName, NULL); - if (width < 20) - L_WARNING("very small width: %d\n", procName, width); - if (border < 0) - border = 0; - if (fontsize < 4 || fontsize > 20 || (fontsize & 1)) { - l_int32 fsize = L_MAX(L_MIN(fontsize, 20), 4); - if (fsize & 1) fsize--; - L_WARNING("changed fontsize from %d to %d\n", procName, - fontsize, fsize); - fontsize = fsize; - } - - /* The pix will be rendered in the order they occupy in pixa. */ - bmf = bmfCreate(NULL, fontsize); - pixad = pixaCreate(n); - numaGetMax(na, &maxindex, NULL); - nay = numaMakeConstant(spacing, lept_roundftoi(maxindex) + 1); - for (i = 0; i < n; i++) { - numaGetIValue(na, i, &index); - numaGetIValue(nay, index, &yval); - pix1 = pixaGetPix(pixa, i, L_CLONE); - pix2 = pixConvertTo32(pix1); - pix3 = pixScaleToSize(pix2, width, 0); - pix4 = pixAddBorderGeneral(pix3, border, border, border, border, 0); - textstr = pixGetText(pix1); - if (textstr && strlen(textstr) > 0) { - snprintf(buf, sizeof(buf), "%s", textstr); - pix5 = pixAddTextlines(pix4, bmf, textstr, textcolor, L_ADD_BELOW); - } else { - pix5 = pixClone(pix4); - } - pixaAddPix(pixad, pix5, L_INSERT); - x = spacing + border + index * (2 * border + width + spacing); - y = yval; - pixGetDimensions(pix5, &w, &h, NULL); - yval += h + spacing; - numaSetValue(nay, index, yval); - box = boxCreate(x, y, w, h); - pixaAddBox(pixad, box, L_INSERT); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - pixDestroy(&pix4); - } - numaDestroy(&nay); - bmfDestroy(&bmf); - - pixd = pixaDisplay(pixad, 0, 0); - pixaDestroy(&pixad); - return pixd; -} - - - -/*---------------------------------------------------------------------* - * Pixaa Display * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaaDisplay() - * - * \param[in] paa - * \param[in] w, h if set to 0, the size is determined from the - * bounding box of the components in pixa - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) Each pix of the paa is displayed at the location given by
- *          its box, translated by the box of the containing pixa
- *          if it exists.
- * 
- */ -PIX * -pixaaDisplay(PIXAA *paa, - l_int32 w, - l_int32 h) -{ -l_int32 i, j, n, nbox, na, d, wmax, hmax, x, y, xb, yb, wb, hb; -BOXA *boxa1; /* top-level boxa */ -BOXA *boxa; -PIX *pix1, *pixd; -PIXA *pixa; - - PROCNAME("pixaaDisplay"); - - if (!paa) - return (PIX *)ERROR_PTR("paa not defined", procName, NULL); - - n = pixaaGetCount(paa, NULL); - if (n == 0) - return (PIX *)ERROR_PTR("no components", procName, NULL); - - /* If w and h not input, determine the minimum size required - * to contain the origin and all c.c. */ - boxa1 = pixaaGetBoxa(paa, L_CLONE); - nbox = boxaGetCount(boxa1); - if (w == 0 || h == 0) { - if (nbox == n) { - boxaGetExtent(boxa1, &w, &h, NULL); - } else { /* have to use the lower-level boxa for each pixa */ - wmax = hmax = 0; - for (i = 0; i < n; i++) { - pixa = pixaaGetPixa(paa, i, L_CLONE); - boxa = pixaGetBoxa(pixa, L_CLONE); - boxaGetExtent(boxa, &w, &h, NULL); - wmax = L_MAX(wmax, w); - hmax = L_MAX(hmax, h); - pixaDestroy(&pixa); - boxaDestroy(&boxa); - } - w = wmax; - h = hmax; - } - } - - /* Get depth from first pix */ - pixa = pixaaGetPixa(paa, 0, L_CLONE); - pix1 = pixaGetPix(pixa, 0, L_CLONE); - d = pixGetDepth(pix1); - pixaDestroy(&pixa); - pixDestroy(&pix1); - - if ((pixd = pixCreate(w, h, d)) == NULL) { - boxaDestroy(&boxa1); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - - x = y = 0; - for (i = 0; i < n; i++) { - pixa = pixaaGetPixa(paa, i, L_CLONE); - if (nbox == n) - boxaGetBoxGeometry(boxa1, i, &x, &y, NULL, NULL); - na = pixaGetCount(pixa); - for (j = 0; j < na; j++) { - pixaGetBoxGeometry(pixa, j, &xb, &yb, &wb, &hb); - pix1 = pixaGetPix(pixa, j, L_CLONE); - pixRasterop(pixd, x + xb, y + yb, wb, hb, PIX_PAINT, pix1, 0, 0); - pixDestroy(&pix1); - } - pixaDestroy(&pixa); - } - boxaDestroy(&boxa1); - - return pixd; -} - - -/*! - * \brief pixaaDisplayByPixa() - * - * \param[in] paa - * \param[in] maxnx maximum number of columns for rendering each pixa - * \param[in] scalefactor applied to every pix; use 1.0 for no scaling - * \param[in] hspacing between images on a row (in the pixa) - * \param[in] vspacing between tiles rows, each corresponding to a pixa - * \param[in] border width of black border added to each image; - * use 0 for no border - * \return pixd of images in %paa, tiled by pixa in row-major order - * - *
- * Notes:
- *      (1) This renders a pixaa into a single image.  The pix from each pixa
- *          are rendered on a row.  If the number of pix in the pixa is
- *          larger than %maxnx, the pix will be rendered into more than 1 row.
- *          To insure that each pixa is rendered into one row, use %maxnx
- *          at least as large as the max number of pix in the pixa.
- *      (2) Each row is tiled such that the top of each pix is aligned and
- *          each pix is separated by %hspacing from the next one.
- *          A black border can be added to each pix.
- *      (3) The resulting pix from each row are then rendered vertically,
- *          separated by %vspacing from each other.
- *      (4) The output depth is determined by the largest depth of all
- *          the pix in %paa. Colormaps are removed.
- * 
- */ -PIX * -pixaaDisplayByPixa(PIXAA *paa, - l_int32 maxnx, - l_float32 scalefactor, - l_int32 hspacing, - l_int32 vspacing, - l_int32 border) -{ -l_int32 i, n, vs; -PIX *pix1, *pix2; -PIXA *pixa1, *pixa2; - - PROCNAME("pixaaDisplayByPixa"); - - if (!paa) - return (PIX *)ERROR_PTR("paa not defined", procName, NULL); - if (scalefactor <= 0.0) scalefactor = 1.0; - if (hspacing < 0) hspacing = 0; - if (vspacing < 0) vspacing = 0; - if (border < 0) border = 0; - - if ((n = pixaaGetCount(paa, NULL)) == 0) - return (PIX *)ERROR_PTR("no components", procName, NULL); - - /* Vertical spacing of amount %hspacing is also added at this step */ - pixa2 = pixaCreate(0); - for (i = 0; i < n; i++) { - pixa1 = pixaaGetPixa(paa, i, L_CLONE); - pix1 = pixaDisplayTiledInColumns(pixa1, maxnx, scalefactor, - hspacing, border); - pixaAddPix(pixa2, pix1, L_INSERT); - pixaDestroy(&pixa1); - } - - vs = vspacing - 2 * hspacing; - pix2 = pixaDisplayTiledInColumns(pixa2, 1, scalefactor, vs, 0); - pixaDestroy(&pixa2); - return pix2; -} - - -/*! - * \brief pixaaDisplayTiledAndScaled() - * - * \param[in] paa - * \param[in] outdepth output depth: 1, 8 or 32 bpp - * \param[in] tilewidth each pix is scaled to this width - * \param[in] ncols number of tiles in each row - * \param[in] background 0 for white, 1 for black; this is the color - * of the spacing between the images - * \param[in] spacing between images, and on outside - * \param[in] border width of additional black border on each image; - * use 0 for no border - * \return pixa of tiled images, one image for each pixa in - * the paa, or NULL on error - * - *
- * Notes:
- *      (1) For each pixa, this generates from all the pix a
- *          tiled/scaled output pix, and puts it in the output pixa.
- *      (2) See comments in pixaDisplayTiledAndScaled().
- * 
- */ -PIXA * -pixaaDisplayTiledAndScaled(PIXAA *paa, - l_int32 outdepth, - l_int32 tilewidth, - l_int32 ncols, - l_int32 background, - l_int32 spacing, - l_int32 border) -{ -l_int32 i, n; -PIX *pix; -PIXA *pixa, *pixad; - - PROCNAME("pixaaDisplayTiledAndScaled"); - - if (!paa) - return (PIXA *)ERROR_PTR("paa not defined", procName, NULL); - if (outdepth != 1 && outdepth != 8 && outdepth != 32) - return (PIXA *)ERROR_PTR("outdepth not in {1, 8, 32}", procName, NULL); - if (ncols <= 0) - return (PIXA *)ERROR_PTR("ncols must be > 0", procName, NULL); - if (border < 0 || border > tilewidth / 5) - border = 0; - - if ((n = pixaaGetCount(paa, NULL)) == 0) - return (PIXA *)ERROR_PTR("no components", procName, NULL); - - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pixa = pixaaGetPixa(paa, i, L_CLONE); - pix = pixaDisplayTiledAndScaled(pixa, outdepth, tilewidth, ncols, - background, spacing, border); - pixaAddPix(pixad, pix, L_INSERT); - pixaDestroy(&pixa); - } - - return pixad; -} - - -/*---------------------------------------------------------------------* - * Conversion of all pix to specified type (e.g., depth) * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaConvertTo1() - * - * \param[in] pixas - * \param[in] thresh threshold for final binarization from 8 bpp gray - * \return pixad, or NULL on error - */ -PIXA * -pixaConvertTo1(PIXA *pixas, - l_int32 thresh) -{ -l_int32 i, n; -BOXA *boxa; -PIX *pix1, *pix2; -PIXA *pixad; - - PROCNAME("pixaConvertTo1"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - - n = pixaGetCount(pixas); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - pix2 = pixConvertTo1(pix1, thresh); - pixaAddPix(pixad, pix2, L_INSERT); - pixDestroy(&pix1); - } - - boxa = pixaGetBoxa(pixas, L_COPY); - pixaSetBoxa(pixad, boxa, L_INSERT); - return pixad; -} - - -/*! - * \brief pixaConvertTo8() - * - * \param[in] pixas - * \param[in] cmapflag 1 to give pixd a colormap; 0 otherwise - * \return pixad each pix is 8 bpp, or NULL on error - * - *
- * Notes:
- *      (1) See notes for pixConvertTo8(), applied to each pix in pixas.
- * 
- */ -PIXA * -pixaConvertTo8(PIXA *pixas, - l_int32 cmapflag) -{ -l_int32 i, n; -BOXA *boxa; -PIX *pix1, *pix2; -PIXA *pixad; - - PROCNAME("pixaConvertTo8"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - - n = pixaGetCount(pixas); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - pix2 = pixConvertTo8(pix1, cmapflag); - pixaAddPix(pixad, pix2, L_INSERT); - pixDestroy(&pix1); - } - - boxa = pixaGetBoxa(pixas, L_COPY); - pixaSetBoxa(pixad, boxa, L_INSERT); - return pixad; -} - - -/*! - * \brief pixaConvertTo8Colormap() - * - * \param[in] pixas - * \param[in] dither 1 to dither if necessary; 0 otherwise - * \return pixad each pix is 8 bpp, or NULL on error - * - *
- * Notes:
- *      (1) See notes for pixConvertTo8Colormap(), applied to each pix in pixas.
- * 
- */ -PIXA * -pixaConvertTo8Colormap(PIXA *pixas, - l_int32 dither) -{ -l_int32 i, n; -BOXA *boxa; -PIX *pix1, *pix2; -PIXA *pixad; - - PROCNAME("pixaConvertTo8Colormap"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - - n = pixaGetCount(pixas); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - pix2 = pixConvertTo8Colormap(pix1, dither); - pixaAddPix(pixad, pix2, L_INSERT); - pixDestroy(&pix1); - } - - boxa = pixaGetBoxa(pixas, L_COPY); - pixaSetBoxa(pixad, boxa, L_INSERT); - return pixad; -} - - -/*! - * \brief pixaConvertTo32() - * - * \param[in] pixas - * \return pixad 32 bpp rgb, or NULL on error - * - *
- * Notes:
- *      (1) See notes for pixConvertTo32(), applied to each pix in pixas.
- *      (2) This can be used to allow 1 bpp pix in a pixa to be displayed
- *          with color.
- * 
- */ -PIXA * -pixaConvertTo32(PIXA *pixas) -{ -l_int32 i, n; -BOXA *boxa; -PIX *pix1, *pix2; -PIXA *pixad; - - PROCNAME("pixaConvertTo32"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - - n = pixaGetCount(pixas); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - pix2 = pixConvertTo32(pix1); - pixaAddPix(pixad, pix2, L_INSERT); - pixDestroy(&pix1); - } - - boxa = pixaGetBoxa(pixas, L_COPY); - pixaSetBoxa(pixad, boxa, L_INSERT); - return pixad; -} - - -/*---------------------------------------------------------------------* - * Pixa constrained selection * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaConstrainedSelect() - * - * \param[in] pixas - * \param[in] first first index to choose; >= 0 - * \param[in] last biggest possible index to reach; - * use -1 to go to the end; otherwise, last >= first - * \param[in] nmax maximum number of pix to select; > 0 - * \param[in] use_pairs 1 = select pairs of adjacent pix; - * 0 = select individual pix - * \param[in] copyflag L_COPY, L_CLONE - * \return pixad if OK, NULL on error - * - *
- * Notes:
- *     (1) See notes in genConstrainedNumaInRange() for how selection
- *         is made.
- *     (2) This returns a selection of the pix in the input pixa.
- *     (3) Use copyflag == L_COPY if you don't want changes in the pix
- *         in the returned pixa to affect those in the input pixa.
- * 
- */ -PIXA * -pixaConstrainedSelect(PIXA *pixas, - l_int32 first, - l_int32 last, - l_int32 nmax, - l_int32 use_pairs, - l_int32 copyflag) -{ -l_int32 i, n, nselect, index; -NUMA *na; -PIX *pix1; -PIXA *pixad; - - PROCNAME("pixaConstrainedSelect"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - n = pixaGetCount(pixas); - first = L_MAX(0, first); - last = (last < 0) ? n - 1 : L_MIN(n - 1, last); - if (last < first) - return (PIXA *)ERROR_PTR("last < first!", procName, NULL); - if (nmax < 1) - return (PIXA *)ERROR_PTR("nmax < 1!", procName, NULL); - - na = genConstrainedNumaInRange(first, last, nmax, use_pairs); - nselect = numaGetCount(na); - pixad = pixaCreate(nselect); - for (i = 0; i < nselect; i++) { - numaGetIValue(na, i, &index); - pix1 = pixaGetPix(pixas, index, copyflag); - pixaAddPix(pixad, pix1, L_INSERT); - } - numaDestroy(&na); - return pixad; -} - - -/*! - * \brief pixaSelectToPdf() - * - * \param[in] pixas - * \param[in] first first index to choose; >= 0 - * \param[in] last biggest possible index to reach; - * use -1 to go to the end; otherwise, last >= first - * \param[in] res override the resolution of each input image, in ppi; - * use 0 to respect the resolution embedded in the input - * \param[in] scalefactor scaling factor applied to each image; > 0.0 - * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, - * L_FLATE_ENCODE, or 0 for default - * \param[in] quality used for JPEG only; 0 for default (75) - * \param[in] color of numbers added to each image (e.g., 0xff000000) - * \param[in] fontsize to print number below each image. The valid set - * is {4,6,8,10,12,14,16,18,20}. Use 0 to disable. - * \param[in] fileout pdf file of all images - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This writes a pdf of the selected images from %pixas, one to
- *          a page.  They are optionally scaled and annotated with the
- *          index printed to the left of the image.
- *      (2) If the input images are 1 bpp and you want the numbers to be
- *          in color, first promote each pix to 8 bpp with a colormap:
- *                pixa1 = pixaConvertTo8(pixas, 1);
- *          and then call this function with the specified color
- * 
- */ -l_ok -pixaSelectToPdf(PIXA *pixas, - l_int32 first, - l_int32 last, - l_int32 res, - l_float32 scalefactor, - l_int32 type, - l_int32 quality, - l_uint32 color, - l_int32 fontsize, - const char *fileout) -{ -l_int32 n; -L_BMF *bmf; -NUMA *na; -PIXA *pixa1, *pixa2; - - PROCNAME("pixaSelectToPdf"); - - if (!pixas) - return ERROR_INT("pixas not defined", procName, 1); - if (type < 0 || type > L_FLATE_ENCODE) { - L_WARNING("invalid compression type; using default\n", procName); - type = 0; - } - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - - /* Select from given range */ - n = pixaGetCount(pixas); - first = L_MAX(0, first); - last = (last < 0) ? n - 1 : L_MIN(n - 1, last); - if (first > last) { - L_ERROR("first = %d > last = %d\n", procName, first, last); - return 1; - } - pixa1 = pixaSelectRange(pixas, first, last, L_CLONE); - - /* Optionally add index numbers */ - bmf = (fontsize <= 0) ? NULL : bmfCreate(NULL, fontsize); - if (bmf) { - na = numaMakeSequence(first, 1.0, last - first + 1); - pixa2 = pixaAddTextNumber(pixa1, bmf, na, color, L_ADD_LEFT); - numaDestroy(&na); - } else { - pixa2 = pixaCopy(pixa1, L_CLONE); - } - pixaDestroy(&pixa1); - bmfDestroy(&bmf); - - pixaConvertToPdf(pixa2, res, scalefactor, type, quality, NULL, fileout); - pixaDestroy(&pixa2); - return 0; -} - - -/*---------------------------------------------------------------------* - * Generate pixa from tiled images * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaMakeFromTiledPixa() - * - * \param[in] pixas of mosaiced templates, one for each digit - * \param[in] w width of samples (use 0 for default = 20) - * \param[in] h height of samples (use 0 for default = 30) - * \param[in] nsamp number of requested samples (use 0 for default = 100) - * \return pixa of individual, scaled templates, or NULL on error - * - *
- * Notes:
- *      (1) This converts from a compressed representation of 1 bpp digit
- *          templates to a pixa where each pix has a single labeled template.
- *      (2) The mosaics hold 100 templates each, and the number of templates
- *          %nsamp selected for each digit can be between 1 and 100.
- *      (3) Each mosaic has the number of images written in the text field,
- *          and the i-th pix contains samples of the i-th digit.  That value
- *          is written into the text field of each template in the output.
- * 
- */ -PIXA * -pixaMakeFromTiledPixa(PIXA *pixas, - l_int32 w, - l_int32 h, - l_int32 nsamp) -{ -char buf[8]; -l_int32 ntiles, i; -PIX *pix1; -PIXA *pixad, *pixa1; - - PROCNAME("pixaMakeFromTiledPixa"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (nsamp > 1000) - return (PIXA *)ERROR_PTR("nsamp too large; typ. 100", procName, NULL); - - if (w <= 0) w = 20; - if (h <= 0) h = 30; - if (nsamp <= 0) nsamp = 100; - - /* pixas has 10 pix of mosaic'd digits. Each of these images - * must be extracted into a pixa of templates, where each template - * is labeled with the digit value, and then selectively - * concatenated into an output pixa. */ - pixad = pixaCreate(10 * nsamp); - for (i = 0; i < 10; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - pixGetTileCount(pix1, &ntiles); - if (nsamp > ntiles) - L_WARNING("requested %d; only %d tiles\n", procName, nsamp, ntiles); - pixa1 = pixaMakeFromTiledPix(pix1, w, h, 0, nsamp, NULL); - snprintf(buf, sizeof(buf), "%d", i); - pixaSetText(pixa1, buf, NULL); - pixaJoin(pixad, pixa1, 0, -1); - pixaDestroy(&pixa1); - pixDestroy(&pix1); - } - return pixad; -} - - -/*! - * \brief pixaMakeFromTiledPix() - * - * \param[in] pixs any depth; colormap OK - * \param[in] w width of each tile - * \param[in] h height of each tile - * \param[in] start first tile to use - * \param[in] num number of tiles; use 0 to go to the end - * \param[in] boxa [optional] location of rectangular regions - * to be extracted - * \return pixa if OK, NULL on error - * - *
- * Notes:
- *      (1) Operations that generate a pix by tiling from a pixa, and
- *          the inverse that generate a pixa from tiles of a pix,
- *          are useful.  One such pair is pixaDisplayUnsplit() and
- *          pixaSplitPix().  This function is a very simple one that
- *          generates a pixa from tiles of a pix. There are two cases:
- *            - the tiles can all be the same size (the inverse of
- *              pixaDisplayOnLattice(), or
- *            - the tiles can differ in size, where there is an
- *              associated boxa (the inverse of pixaCreateFromBoxa().
- *      (2) If all tiles are the same size, %w by %h, use %boxa = NULL.
- *          If the tiles differ in size, use %boxa to extract the
- *          individual images (%w and %h are then ignored).
- *      (3) If the pix was made by pixaDisplayOnLattice(), the number
- *          of tiled images is written into the text field, in the format
- *               n = .
- *      (4) Typical usage: a set of character templates all scaled to
- *          the same size can be stored on a lattice of that size in
- *          a pix, and this function can regenerate the pixa.  If the
- *          templates differ in size, a boxa generated when the tiled
- *          pix was made can be used to indicate the location of
- *          the templates.
- * 
- */ -PIXA * -pixaMakeFromTiledPix(PIX *pixs, - l_int32 w, - l_int32 h, - l_int32 start, - l_int32 num, - BOXA *boxa) -{ -l_int32 i, j, k, ws, hs, d, nx, ny, n, n_isvalid, ntiles, nmax; -PIX *pix1; -PIXA *pixa1; -PIXCMAP *cmap; - - PROCNAME("pixaMakeFromTiledPix"); - - if (!pixs) - return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); - if (!boxa && (w <= 0 || h <= 0)) - return (PIXA *)ERROR_PTR("w and h must be > 0", procName, NULL); - - if (boxa) /* general case */ - return pixaCreateFromBoxa(pixs, boxa, start, num, NULL); - - /* All tiles are the same size */ - pixGetDimensions(pixs, &ws, &hs, &d); - nx = ws / w; - ny = hs / h; - if (nx < 1 || ny < 1) - return (PIXA *)ERROR_PTR("invalid dimensions", procName, NULL); - if (nx * w != ws || ny * h != hs) - L_WARNING("some tiles will be clipped\n", procName); - - /* Check the text field of the pix. It may tell how many - * tiles hold valid data. If a valid value is not found, - * assume all (nx * ny) tiles are valid. */ - pixGetTileCount(pixs, &n); - n_isvalid = (n <= nx * ny && n > nx * (ny - 1)) ? TRUE : FALSE; - ntiles = (n_isvalid) ? n : nx * ny; - nmax = ntiles - start; /* max available from start */ - num = (num == 0) ? nmax : L_MIN(num, nmax); - - /* Extract the tiles */ - if ((pixa1 = pixaCreate(num)) == NULL) { - return (PIXA *)ERROR_PTR("pixa1 not made", procName, NULL); - } - cmap = pixGetColormap(pixs); - for (i = 0, k = 0; i < ny; i++) { - for (j = 0; j < nx; j++, k++) { - if (k < start) continue; - if (k >= start + num) break; - pix1 = pixCreate(w, h, d); - if (cmap) pixSetColormap(pix1, pixcmapCopy(cmap)); - pixRasterop(pix1, 0, 0, w, h, PIX_SRC, pixs, j * w, i * h); - pixaAddPix(pixa1, pix1, L_INSERT); - } - } - return pixa1; -} - - -/*! - * \brief pixGetTileCount() - * - * \param[in] pix - * \param[out] *pn number embedded in pix text field - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If the pix was made by pixaDisplayOnLattice(), the number
- *          of tiled images is written into the text field, in the format
- *               n = .
- *      (2) This returns 0 if the data is not in the text field, or on error.
- * 
- */ -l_ok -pixGetTileCount(PIX *pix, - l_int32 *pn) -{ -char *text; -l_int32 n; - - PROCNAME("pixGetTileCount"); - - if (!pn) - return ERROR_INT("&n not defined", procName, 1); - *pn = 0; - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - text = pixGetText(pix); - if (text && strlen(text) > 4) { - if (sscanf(text, "n = %d", &n) == 1) - *pn = n; - } - return 0; -} - - -/*---------------------------------------------------------------------* - * Pixa display into multiple tiles * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaDisplayMultiTiled() - * - * \param[in] pixas - * \param[in] nx, ny in [1, ... 50], tiling factors in each direction - * \param[in] maxw, maxh max sizes to keep - * \param[in] scalefactor scale each image by this - * \param[in] spacing between images, and on outside - * \param[in] border width of additional black border on each image; - * use 0 for no border - * \return pixad if OK, NULL on error - * - *
- * Notes:
- *      (1) Each set of %nx * %ny images is optionally scaled and saved
- *          into a new pix, and then aggregated.
- *      (2) Set %maxw = %maxh = 0 if you want to include all pix from %pixs.
- *      (3) This is useful for generating a pdf from the output pixa, where
- *          each page is a tile of (%nx * %ny) images from the input pixa.
- * 
- */ -PIXA * -pixaDisplayMultiTiled(PIXA *pixas, - l_int32 nx, - l_int32 ny, - l_int32 maxw, - l_int32 maxh, - l_float32 scalefactor, - l_int32 spacing, - l_int32 border) -{ -l_int32 n, i, j, ntile, nout, index; -PIX *pix1, *pix2; -PIXA *pixa1, *pixa2, *pixad; - - PROCNAME("pixaDisplayMultiTiled"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (nx < 1 || ny < 1 || nx > 50 || ny > 50) - return (PIXA *)ERROR_PTR("invalid tiling factor(s)", procName, NULL); - if ((n = pixaGetCount(pixas)) == 0) - return (PIXA *)ERROR_PTR("pixas is empty", procName, NULL); - - /* Filter out large ones if requested */ - if (maxw == 0 && maxh == 0) { - pixa1 = pixaCopy(pixas, L_CLONE); - } else { - maxw = (maxw == 0) ? 1000000 : maxw; - maxh = (maxh == 0) ? 1000000 : maxh; - pixa1 = pixaSelectBySize(pixas, maxw, maxh, L_SELECT_IF_BOTH, - L_SELECT_IF_LTE, NULL); - n = pixaGetCount(pixa1); - } - - ntile = nx * ny; - nout = L_MAX(1, (n + ntile - 1) / ntile); - pixad = pixaCreate(nout); - for (i = 0, index = 0; i < nout; i++) { /* over tiles */ - pixa2 = pixaCreate(ntile); - for (j = 0; j < ntile && index < n; j++, index++) { - pix1 = pixaGetPix(pixa1, index, L_COPY); - pixaAddPix(pixa2, pix1, L_INSERT); - } - pix2 = pixaDisplayTiledInColumns(pixa2, nx, scalefactor, spacing, - border); - pixaAddPix(pixad, pix2, L_INSERT); - pixaDestroy(&pixa2); - } - pixaDestroy(&pixa1); - - return pixad; -} - - -/*---------------------------------------------------------------------* - * Split pixa into files * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaSplitIntoFiles() - * - * \param[in] pixas - * \param[in] nsplit split pixas into this number of pixa; >= 2 - * \param[in] scale scalefactor applied to each pix - * \param[in] outwidth the maxwidth parameter of tiled images - * for write_pix - * \param[in] write_pixa 1 to write the split pixa as separate files - * \param[in] write_pix 1 to write tiled images of the split pixa - * \param[in] write_pdf 1 to write pdfs of the split pixa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) For each requested output, %nsplit files are written into
- *          directory /tmp/lept/split/.
- *      (2) This is useful when a pixa is so large that the images
- *          are not conveniently displayed as a single tiled image at
- *          full resolution.
- * 
- */ -l_ok -pixaSplitIntoFiles(PIXA *pixas, - l_int32 nsplit, - l_float32 scale, - l_int32 outwidth, - l_int32 write_pixa, - l_int32 write_pix, - l_int32 write_pdf) -{ -char buf[64]; -l_int32 i, j, index, n, nt; -PIX *pix1, *pix2; -PIXA *pixa1; - - PROCNAME("pixaSplitIntoFiles"); - - if (!pixas) - return ERROR_INT("pixas not defined", procName, 1); - if (nsplit <= 1) - return ERROR_INT("nsplit must be >= 2", procName, 1); - if ((nt = pixaGetCount(pixas)) == 0) - return ERROR_INT("pixas is empty", procName, 1); - if (!write_pixa && !write_pix && !write_pdf) - return ERROR_INT("no output is requested", procName, 1); - - lept_mkdir("lept/split"); - n = (nt + nsplit - 1) / nsplit; - lept_stderr("nt = %d, n = %d, nsplit = %d\n", nt, n, nsplit); - for (i = 0, index = 0; i < nsplit; i++) { - pixa1 = pixaCreate(n); - for (j = 0; j < n && index < nt; j++, index++) { - pix1 = pixaGetPix(pixas, index, L_CLONE); - pix2 = pixScale(pix1, scale, scale); - pixaAddPix(pixa1, pix2, L_INSERT); - pixDestroy(&pix1); - } - if (write_pixa) { - snprintf(buf, sizeof(buf), "/tmp/lept/split/split%d.pa", i + 1); - pixaWriteDebug(buf, pixa1); - } - if (write_pix) { - snprintf(buf, sizeof(buf), "/tmp/lept/split/split%d.tif", i + 1); - pix1 = pixaDisplayTiledInRows(pixa1, 1, outwidth, 1.0, 0, 20, 2); - pixWriteDebug(buf, pix1, IFF_TIFF_G4); - pixDestroy(&pix1); - } - if (write_pdf) { - snprintf(buf, sizeof(buf), "/tmp/lept/split/split%d.pdf", i + 1); - pixaConvertToPdf(pixa1, 0, 1.0, L_G4_ENCODE, 0, buf, buf); - } - pixaDestroy(&pixa1); - } - - return 0; -} - - -/*---------------------------------------------------------------------* - * Tile N-Up * - *---------------------------------------------------------------------*/ -/*! - * \brief convertToNUpFiles() - * - * \param[in] dir full path to directory of images - * \param[in] substr [optional] can be null - * \param[in] nx, ny in [1, ... 50], tiling factors in each direction - * \param[in] tw target width, in pixels; must be >= 20 - * \param[in] spacing between images, and on outside - * \param[in] border width of additional black border on each image; - * use 0 for no border - * \param[in] fontsize to print tail of filename with image. Valid set is - * {4,6,8,10,12,14,16,18,20}. Use 0 to disable. - * \param[in] outdir subdirectory of /tmp to put N-up tiled images - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Each set of %nx * %ny images is scaled and tiled into a single
- *          image, that is written out to %outdir.
- *      (2) All images in each %nx * %ny set are scaled to the same
- *          width, %tw.  This is typically used when all images are
- *          roughly the same size.
- *      (3) This is useful for generating a pdf from the set of input
- *          files, where each page is a tile of (%nx * %ny) input images.
- *          Typical values for %nx and %ny are in the range [2 ... 5].
- *      (4) If %fontsize != 0, each image has the tail of its filename
- *          rendered below it.
- * 
- */ -l_ok -convertToNUpFiles(const char *dir, - const char *substr, - l_int32 nx, - l_int32 ny, - l_int32 tw, - l_int32 spacing, - l_int32 border, - l_int32 fontsize, - const char *outdir) -{ -l_int32 d, format; -char rootpath[256]; -PIXA *pixa; - - PROCNAME("convertToNUpFiles"); - - if (!dir) - return ERROR_INT("dir not defined", procName, 1); - if (nx < 1 || ny < 1 || nx > 50 || ny > 50) - return ERROR_INT("invalid tiling N-factor", procName, 1); - if (fontsize < 0 || fontsize > 20 || fontsize & 1 || fontsize == 2) - return ERROR_INT("invalid fontsize", procName, 1); - if (!outdir) - return ERROR_INT("outdir not defined", procName, 1); - - pixa = convertToNUpPixa(dir, substr, nx, ny, tw, spacing, border, - fontsize); - if (!pixa) - return ERROR_INT("pixa not made", procName, 1); - - lept_rmdir(outdir); - lept_mkdir(outdir); - pixaGetRenderingDepth(pixa, &d); - format = (d == 1) ? IFF_TIFF_G4 : IFF_JFIF_JPEG; - makeTempDirname(rootpath, 256, outdir); - modifyTrailingSlash(rootpath, 256, L_ADD_TRAIL_SLASH); - pixaWriteFiles(rootpath, pixa, format); - pixaDestroy(&pixa); - return 0; -} - - -/*! - * \brief convertToNUpPixa() - * - * \param[in] dir full path to directory of images - * \param[in] substr [optional] can be null - * \param[in] nx, ny in [1, ... 50], tiling factors in each direction - * \param[in] tw target width, in pixels; must be >= 20 - * \param[in] spacing between images, and on outside - * \param[in] border width of additional black border on each image; - * use 0 for no border - * \param[in] fontsize to print tail of filename with image. Valid set is - * {4,6,8,10,12,14,16,18,20}. Use 0 to disable. - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) See notes for convertToNUpFiles()
- * 
- */ -PIXA * -convertToNUpPixa(const char *dir, - const char *substr, - l_int32 nx, - l_int32 ny, - l_int32 tw, - l_int32 spacing, - l_int32 border, - l_int32 fontsize) -{ -l_int32 i, n; -char *fname, *tail; -PIXA *pixa1, *pixa2; -SARRAY *sa1, *sa2; - - PROCNAME("convertToNUpPixa"); - - if (!dir) - return (PIXA *)ERROR_PTR("dir not defined", procName, NULL); - if (nx < 1 || ny < 1 || nx > 50 || ny > 50) - return (PIXA *)ERROR_PTR("invalid tiling N-factor", procName, NULL); - if (tw < 20) - return (PIXA *)ERROR_PTR("tw must be >= 20", procName, NULL); - if (fontsize < 0 || fontsize > 20 || fontsize & 1 || fontsize == 2) - return (PIXA *)ERROR_PTR("invalid fontsize", procName, NULL); - - sa1 = getSortedPathnamesInDirectory(dir, substr, 0, 0); - pixa1 = pixaReadFilesSA(sa1); - n = sarrayGetCount(sa1); - sa2 = sarrayCreate(n); - for (i = 0; i < n; i++) { - fname = sarrayGetString(sa1, i, L_NOCOPY); - splitPathAtDirectory(fname, NULL, &tail); - sarrayAddString(sa2, tail, L_INSERT); - } - sarrayDestroy(&sa1); - pixa2 = pixaConvertToNUpPixa(pixa1, sa2, nx, ny, tw, spacing, - border, fontsize); - pixaDestroy(&pixa1); - sarrayDestroy(&sa2); - return pixa2; -} - - -/*! - * \brief pixaConvertToNUpPixa() - * - * \param[in] pixas - * \param[in] sa [optional] array of strings associated with each pix - * \param[in] nx, ny in [1, ... 50], tiling factors in each direction - * \param[in] tw target width, in pixels; must be >= 20 - * \param[in] spacing between images, and on outside - * \param[in] border width of additional black border on each image; - * use 0 for no border - * \param[in] fontsize to print string with each image. Valid set is - * {4,6,8,10,12,14,16,18,20}. Use 0 to disable. - * \return pixad, or NULL on error - * - *
- * Notes:
- *      (1) This takes an input pixa and an optional array of strings, and
- *          generates a pixa of NUp tiles from the input, labeled with
- *          the strings if they exist and %fontsize != 0.
- *      (2) See notes for convertToNUpFiles()
- * 
- */ -PIXA * -pixaConvertToNUpPixa(PIXA *pixas, - SARRAY *sa, - l_int32 nx, - l_int32 ny, - l_int32 tw, - l_int32 spacing, - l_int32 border, - l_int32 fontsize) -{ -l_int32 i, j, k, nt, n2, nout, d; -char *str; -L_BMF *bmf; -PIX *pix1, *pix2, *pix3, *pix4; -PIXA *pixa1, *pixad; - - PROCNAME("pixaConvertToNUpPixa"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (nx < 1 || ny < 1 || nx > 50 || ny > 50) - return (PIXA *)ERROR_PTR("invalid tiling N-factor", procName, NULL); - if (tw < 20) - return (PIXA *)ERROR_PTR("tw must be >= 20", procName, NULL); - if (fontsize < 0 || fontsize > 20 || fontsize & 1 || fontsize == 2) - return (PIXA *)ERROR_PTR("invalid fontsize", procName, NULL); - - nt = pixaGetCount(pixas); - if (sa && (sarrayGetCount(sa) != nt)) { - L_WARNING("pixa size %d not equal to sarray size %d\n", procName, - nt, sarrayGetCount(sa)); - } - - n2 = nx * ny; - nout = (nt + n2 - 1) / n2; - pixad = pixaCreate(nout); - bmf = (fontsize == 0) ? NULL : bmfCreate(NULL, fontsize); - for (i = 0, j = 0; i < nout; i++) { - pixa1 = pixaCreate(n2); - for (k = 0; k < n2 && j < nt; j++, k++) { - pix1 = pixaGetPix(pixas, j, L_CLONE); - pix2 = pixScaleToSize(pix1, tw, 0); /* all images have width tw */ - if (bmf && sa) { - str = sarrayGetString(sa, j, L_NOCOPY); - pix3 = pixAddTextlines(pix2, bmf, str, 0xff000000, - L_ADD_BELOW); - } else { - pix3 = pixClone(pix2); - } - pixaAddPix(pixa1, pix3, L_INSERT); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - if (pixaGetCount(pixa1) == 0) { /* probably won't happen */ - pixaDestroy(&pixa1); - continue; - } - - /* Add 2 * border to image width to prevent scaling */ - pixaGetRenderingDepth(pixa1, &d); - pix4 = pixaDisplayTiledAndScaled(pixa1, d, tw + 2 * border, nx, 0, - spacing, border); - pixaAddPix(pixad, pix4, L_INSERT); - pixaDestroy(&pixa1); - } - - bmfDestroy(&bmf); - return pixad; -} - - -/*---------------------------------------------------------------------* - * Render two pixa side-by-side for comparison * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaCompareInPdf() - * - * \param[in] pixa1 - * \param[in] pixa2 - * \param[in] nx, ny in [1, ... 20], tiling factors in each direction - * \param[in] tw target width, in pixels; must be >= 20 - * \param[in] spacing between images, and on outside - * \param[in] border width of additional black border on each image - * and on each pair; use 0 for no border - * \param[in] fontsize to print index of each pair of images. Valid set - * is {4,6,8,10,12,14,16,18,20}. Use 0 to disable. - * \param[in] fileout output pdf file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This takes two pixa and renders them interleaved, side-by-side
- *          in a pdf.  A warning is issued if the input pixa arrays
- *          have different lengths.
- *      (2) %nx and %ny specify how many side-by-side pairs are displayed
- *          on each pdf page.  For example, if %nx = 1 and %ny = 2, then
- *          two pairs are shown, one above the other, on each page.
- *      (3) The input pix are scaled to a target width of %tw, and
- *          then paired with optional %spacing between and optional
- *          black border of width %border.
- *      (4) After a pixa is generated of these tiled images, it is
- *          written to %fileout as a pdf.
- *      (5) Typical numbers for the input parameters are:
- *            %nx = small integer (1 - 4)
- *            %ny = 2 * %nx
- *            %tw = 200 - 500 pixels
- *            %spacing = 10
- *            %border = 2
- *            %fontsize = 10
- *      (6) If %fontsize != 0, the index of the pix pair in their pixa
- *          is printed out below each pair.
- * 
- */ -l_ok -pixaCompareInPdf(PIXA *pixa1, - PIXA *pixa2, - l_int32 nx, - l_int32 ny, - l_int32 tw, - l_int32 spacing, - l_int32 border, - l_int32 fontsize, - const char *fileout) -{ -l_int32 n1, n2, npairs; -PIXA *pixa3, *pixa4, *pixa5; -SARRAY *sa; - - PROCNAME("pixaCompareInPdf"); - - if (!pixa1 || !pixa2) - return ERROR_INT("pixa1 and pixa2 not both defined", procName, 1); - if (nx < 1 || ny < 1 || nx > 20 || ny > 20) - return ERROR_INT("invalid tiling factors", procName, 1); - if (tw < 20) - return ERROR_INT("invalid tw; tw must be >= 20", procName, 1); - if (fontsize < 0 || fontsize > 20 || fontsize & 1 || fontsize == 2) - return ERROR_INT("invalid fontsize", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - n1 = pixaGetCount(pixa1); - n2 = pixaGetCount(pixa2); - if (n1 == 0 || n2 == 0) - return ERROR_INT("at least one pixa is empty", procName, 1); - if (n1 != n2) - L_WARNING("sizes (%d, %d) differ; using the minimum in interleave\n", - procName, n1, n2); - - /* Interleave the input pixa */ - if ((pixa3 = pixaInterleave(pixa1, pixa2, L_CLONE)) == NULL) - return ERROR_INT("pixa3 not made", procName, 1); - - /* Scale the images if necessary and pair them up side/by/side */ - pixa4 = pixaConvertToNUpPixa(pixa3, NULL, 2, 1, tw, spacing, border, 0); - pixaDestroy(&pixa3); - - /* Label the pairs and mosaic into pages without further scaling */ - npairs = pixaGetCount(pixa4); - sa = (fontsize > 0) ? sarrayGenerateIntegers(npairs) : NULL; - pixa5 = pixaConvertToNUpPixa(pixa4, sa, nx, ny, - 2 * tw + 4 * border + spacing, - spacing, border, fontsize); - pixaDestroy(&pixa4); - sarrayDestroy(&sa); - - /* Output as pdf without scaling */ - pixaConvertToPdf(pixa5, 0, 1.0, 0, 0, NULL, fileout); - pixaDestroy(&pixa5); - return 0; -} - - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixalloc.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixalloc.c deleted file mode 100644 index 72e86ce6..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixalloc.c +++ /dev/null @@ -1,536 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pixalloc.c - *
- *
- *      Custom memory storage with allocator and deallocator
- *
- *          l_int32       pmsCreate()
- *          void          pmsDestroy()
- *          void         *pmsCustomAlloc()
- *          void          pmsCustomDealloc()
- *          void         *pmsGetAlloc()
- *          l_int32       pmsGetLevelForAlloc()
- *          l_int32       pmsGetLevelForDealloc()
- *          void          pmsLogInfo()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/*-------------------------------------------------------------------------* - * Pix Memory Storage * - * * - * This is a simple utility for handling pix memory storage. It is * - * enabled by setting the PixMemoryManager allocators to the functions * - * that are defined here * - * pmsCustomAlloc() * - * pmsCustomDealloc() * - * Use pmsCreate() at the beginning to do the pre-allocation, and * - * pmsDestroy() at the end to clean it up. * - *-------------------------------------------------------------------------*/ -/* - * In the following, the "memory" refers to the image data - * field that is used within the pix. The memory store is a - * continuous block of memory, that is logically divided into - * smaller "chunks" starting with a set at a minimum size, and - * followed by sets of increasing size that are a power of 2 larger - * than the minimum size. You must specify the number of chunks - * of each size. - * - * A requested data chunk, if it exists, is borrowed from the memory - * storage, and returned after use. If the chunk is too small, or - * too large, or if all chunks in the appropriate size range are - * in use, the memory is allocated dynamically and freed after use. - * - * There are four parameters that determine the use of pre-allocated memory: - * - * minsize: any requested chunk smaller than this is allocated - * dynamically and destroyed after use. No preallocated - * memory is used. - * smallest: the size of the smallest pre-allocated memory chunk. - * nlevels: the number of different sizes of data chunks, each a - * power of 2 larger than 'smallest'. - * numalloc: a Numa of size 'nlevels' containing the number of data - * chunks for each size that are in the memory store. - * - * As an example, suppose: - * minsize = 0.5MB - * smallest = 1.0MB - * nlevels = 4 - * numalloc = {10, 5, 5, 5} - * Then the total amount of allocated memory (in MB) is - * 10 * 1 + 5 * 2 + 5 * 4 + 5 * 8 = 80 MB - * Any pix requiring less than 0.5 MB or more than 8 MB of memory will - * not come from the memory store. Instead, it will be dynamically - * allocated and freed after use. - * - * How is this implemented? - * - * At setup, the full data block size is computed and allocated. - * The addresses of the individual chunks are found, and the pointers - * are stored in a set of Ptra (generic pointer arrays), using one Ptra - * for each of the sizes of the chunks. When returning a chunk after - * use, it is necessary to determine from the address which size level - * (ptra) the chunk belongs to. This is done by comparing the address - * of the associated chunk. - * - * In the event that memory chunks need to be dynamically allocated, - * either (1) because they are too small or too large for the memory - * store or (2) because all the pix of that size (i.e., in the - * appropriate level) in the memory store are in use, the - * addresses generated will be outside the pre-allocated block. - * After use they won't be returned to a ptra; instead the deallocator - * will free them. - */ - -/*! Pix memory storage */ -struct PixMemoryStore -{ - struct L_Ptraa *paa; /*!< Holds ptrs to allocated memory */ - size_t minsize; /*!< Pix smaller than this (in bytes) */ - /*!< are allocated dynamically */ - size_t smallest; /*!< Smallest mem (in bytes) alloc'd */ - size_t largest; /*!< Larest mem (in bytes) alloc'd */ - size_t nbytes; /*!< Size of allocated block w/ all chunks */ - l_int32 nlevels; /*!< Num of power-of-2 sizes pre-alloc'd */ - size_t *sizes; /*!< Mem sizes at each power-of-2 level */ - l_int32 *allocarray; /*!< Number of mem alloc'd at each size */ - l_uint32 *baseptr; /*!< ptr to allocated array */ - l_uint32 *maxptr; /*!< ptr just beyond allocated memory */ - l_uint32 **firstptr; /*!< array of ptrs to first chunk in size */ - l_int32 *memused; /*!< log: total # of pix used (by level) */ - l_int32 *meminuse; /*!< log: # of pix in use (by level) */ - l_int32 *memmax; /*!< log: max # of pix in use (by level) */ - l_int32 *memempty; /*!< log: # of pix alloc'd because */ - /*!< the store was empty (by level) */ - char *logfile; /*!< log: set to null if no logging */ -}; -typedef struct PixMemoryStore L_PIX_MEM_STORE; - -static L_PIX_MEM_STORE *CustomPMS = NULL; - - -/*! - * \brief pmsCreate() - * - * \param[in] minsize of data chunk that can be supplied by pms - * \param[in] smallest bytes of the smallest pre-allocated data chunk. - * \param[in] numalloc array with the number of data chunks for each - * size that are in the memory store - * \param[in] logfile use for debugging; null otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This computes the size of the block of memory required
- *          and allocates it.  Each chunk starts on a 32-bit word boundary.
- *          The chunk sizes are in powers of 2, starting at %smallest,
- *          and the number of levels and chunks at each level is
- *          specified by %numalloc.
- *      (2) This is intended to manage the image data for a small number
- *          of relatively large pix.  The system malloc is expected to
- *          handle very large numbers of small chunks efficiently.
- *      (3) Important: set the allocators and call this function
- *          before any pix have been allocated.  Destroy all the pix
- *          in the normal way before calling pmsDestroy().
- *      (4) The pms struct is stored in a static global, so this function
- *          is not thread-safe.  When used, there must be only one thread
- *          per process.
- * 
- */ -l_ok -pmsCreate(size_t minsize, - size_t smallest, - NUMA *numalloc, - const char *logfile) -{ -l_int32 nlevels, i, j, nbytes; -l_int32 *alloca; -l_float32 nchunks; -l_uint32 *baseptr, *data; -l_uint32 **firstptr; -size_t *sizes; -L_PIX_MEM_STORE *pms; -L_PTRA *pa; -L_PTRAA *paa; - - PROCNAME("createPMS"); - - if (!numalloc) - return ERROR_INT("numalloc not defined", procName, 1); - numaGetSum(numalloc, &nchunks); - if (nchunks > 1000.0) - L_WARNING("There are %.0f chunks\n", procName, nchunks); - - pms = (L_PIX_MEM_STORE *)LEPT_CALLOC(1, sizeof(L_PIX_MEM_STORE)); - CustomPMS = pms; - - /* Make sure that minsize and smallest are multiples of 32 bit words */ - if (minsize % 4 != 0) - minsize -= minsize % 4; - pms->minsize = minsize; - nlevels = numaGetCount(numalloc); - pms->nlevels = nlevels; - - if ((sizes = (size_t *)LEPT_CALLOC(nlevels, sizeof(size_t))) == NULL) - return ERROR_INT("sizes not made", procName, 1); - pms->sizes = sizes; - if (smallest % 4 != 0) - smallest += 4 - (smallest % 4); - pms->smallest = smallest; - for (i = 0; i < nlevels; i++) - sizes[i] = smallest * (1 << i); - pms->largest = sizes[nlevels - 1]; - - alloca = numaGetIArray(numalloc); - pms->allocarray = alloca; - if ((paa = ptraaCreate(nlevels)) == NULL) - return ERROR_INT("paa not made", procName, 1); - pms->paa = paa; - - for (i = 0, nbytes = 0; i < nlevels; i++) - nbytes += alloca[i] * sizes[i]; - pms->nbytes = nbytes; - - if ((baseptr = (l_uint32 *)LEPT_CALLOC(nbytes / 4, sizeof(l_uint32))) - == NULL) - return ERROR_INT("calloc fail for baseptr", procName, 1); - pms->baseptr = baseptr; - pms->maxptr = baseptr + nbytes / 4; /* just beyond the memory store */ - if ((firstptr = (l_uint32 **)LEPT_CALLOC(nlevels, sizeof(l_uint32 *))) - == NULL) - return ERROR_INT("calloc fail for firstptr", procName, 1); - pms->firstptr = firstptr; - - data = baseptr; - for (i = 0; i < nlevels; i++) { - if ((pa = ptraCreate(alloca[i])) == NULL) - return ERROR_INT("pa not made", procName, 1); - ptraaInsertPtra(paa, i, pa); - firstptr[i] = data; - for (j = 0; j < alloca[i]; j++) { - ptraAdd(pa, data); - data += sizes[i] / 4; - } - } - - if (logfile) { - pms->memused = (l_int32 *)LEPT_CALLOC(nlevels, sizeof(l_int32)); - pms->meminuse = (l_int32 *)LEPT_CALLOC(nlevels, sizeof(l_int32)); - pms->memmax = (l_int32 *)LEPT_CALLOC(nlevels, sizeof(l_int32)); - pms->memempty = (l_int32 *)LEPT_CALLOC(nlevels, sizeof(l_int32)); - pms->logfile = stringNew(logfile); - } - - return 0; -} - - -/*! - * \brief pmsDestroy() - * - *
- * Notes:
- *      (1) Important: call this function at the end of the program, after
- *          the last pix has been destroyed.
- * 
- */ -void -pmsDestroy(void) -{ -L_PIX_MEM_STORE *pms; - - if ((pms = CustomPMS) == NULL) - return; - - ptraaDestroy(&pms->paa, FALSE, FALSE); /* don't touch the ptrs */ - LEPT_FREE(pms->baseptr); /* free the memory */ - - if (pms->logfile) { - pmsLogInfo(); - LEPT_FREE(pms->logfile); - LEPT_FREE(pms->memused); - LEPT_FREE(pms->meminuse); - LEPT_FREE(pms->memmax); - LEPT_FREE(pms->memempty); - } - - LEPT_FREE(pms->sizes); - LEPT_FREE(pms->allocarray); - LEPT_FREE(pms->firstptr); - LEPT_FREE(pms); - CustomPMS = NULL; - return; -} - - -/*! - * \brief pmsCustomAlloc() - * - * \param[in] nbytes min number of bytes in the chunk to be retrieved - * \return data ptr to chunk - * - *
- * Notes:
- *      (1) This attempts to find a suitable pre-allocated chunk.
- *          If not found, it dynamically allocates the chunk.
- *      (2) If logging is turned on, the allocations that are not taken
- *          from the memory store, and are at least as large as the
- *          minimum size the store can handle, are logged to file.
- * 
- */ -void * -pmsCustomAlloc(size_t nbytes) -{ -l_int32 level; -void *data; -L_PIX_MEM_STORE *pms; -L_PTRA *pa; - - PROCNAME("pmsCustomAlloc"); - - if ((pms = CustomPMS) == NULL) - return (void *)ERROR_PTR("pms not defined", procName, NULL); - - pmsGetLevelForAlloc(nbytes, &level); - - if (level < 0) { /* size range invalid; must alloc */ - if ((data = pmsGetAlloc(nbytes)) == NULL) - return (void *)ERROR_PTR("data not made", procName, NULL); - } else { /* get from store */ - pa = ptraaGetPtra(pms->paa, level, L_HANDLE_ONLY); - data = ptraRemoveLast(pa); - if (data && pms->logfile) { - pms->memused[level]++; - pms->meminuse[level]++; - if (pms->meminuse[level] > pms->memmax[level]) - pms->memmax[level]++; - } - if (!data) { /* none left at this level */ - data = pmsGetAlloc(nbytes); - if (pms->logfile) - pms->memempty[level]++; - } - } - - return data; -} - - -/*! - * \brief pmsCustomDealloc() - * - * \param[in] data to be freed or returned to the storage - * \return void - */ -void -pmsCustomDealloc(void *data) -{ -l_int32 level; -L_PIX_MEM_STORE *pms; -L_PTRA *pa; - - PROCNAME("pmsCustomDealloc"); - - if ((pms = CustomPMS) == NULL) { - L_ERROR("pms not defined\n", procName); - return; - } - - if (pmsGetLevelForDealloc(data, &level) == 1) { - L_ERROR("level not found\n", procName); - return; - } - - if (level < 0) { /* no logging; just free the data */ - LEPT_FREE(data); - } else { /* return the data to the store */ - pa = ptraaGetPtra(pms->paa, level, L_HANDLE_ONLY); - ptraAdd(pa, data); - if (pms->logfile) - pms->meminuse[level]--; - } - - return; -} - - -/*! - * \brief pmsGetAlloc() - * - * \param[in] nbytes - * \return data - * - *
- * Notes:
- *      (1) This is called when a request for pix data cannot be
- *          obtained from the preallocated memory store.  After use it
- *          is freed like normal memory.
- *      (2) If logging is on, only write out allocs that are as large as
- *          the minimum size handled by the memory store.
- *      (3) size_t is %lu on 64 bit platforms and %u on 32 bit platforms.
- *          The C99 platform-independent format specifier for size_t is %zu.
- *          Windows since at least VC-2015 is conforming; we can now use %zu.
- * 
- */ -void * -pmsGetAlloc(size_t nbytes) -{ -void *data; -FILE *fp; -L_PIX_MEM_STORE *pms; - - PROCNAME("pmsGetAlloc"); - - if ((pms = CustomPMS) == NULL) - return (void *)ERROR_PTR("pms not defined", procName, NULL); - - if ((data = (void *)LEPT_CALLOC(nbytes, sizeof(char))) == NULL) - return (void *)ERROR_PTR("data not made", procName, NULL); - if (pms->logfile && nbytes >= pms->smallest) { - fp = fopenWriteStream(pms->logfile, "a"); - fprintf(fp, "Alloc %zu bytes at %p\n", nbytes, data); - fclose(fp); - } - - return data; -} - - -/*! - * \brief pmsGetLevelForAlloc() - * - * \param[in] nbytes min number of bytes in the chunk to be retrieved - * \param[out] plevel -1 if either too small or too large - * \return 0 if OK, 1 on error - */ -l_ok -pmsGetLevelForAlloc(size_t nbytes, - l_int32 *plevel) -{ -l_int32 i; -l_float64 ratio; -L_PIX_MEM_STORE *pms; - - PROCNAME("pmsGetLevelForAlloc"); - - if (!plevel) - return ERROR_INT("&level not defined", procName, 1); - *plevel = -1; - if ((pms = CustomPMS) == NULL) - return ERROR_INT("pms not defined", procName, 1); - - if (nbytes < pms->minsize || nbytes > pms->largest) - return 0; /* -1 */ - - ratio = (l_float64)nbytes / (l_float64)(pms->smallest); - for (i = 0; i < pms->nlevels; i++) { - if (ratio <= 1.0) - break; - ratio /= 2.0; - } - *plevel = i; - - return 0; -} - - -/*! - * \brief pmsGetLevelForDealloc() - * - * \param[in] data ptr to memory chunk - * \param[out] plevel level in memory store; -1 if allocated - * outside the store - * \return 0 if OK, 1 on error - */ -l_ok -pmsGetLevelForDealloc(void *data, - l_int32 *plevel) -{ -l_int32 i; -l_uint32 *first; -L_PIX_MEM_STORE *pms; - - PROCNAME("pmsGetLevelForDealloc"); - - if (!plevel) - return ERROR_INT("&level not defined", procName, 1); - *plevel = -1; - if (!data) - return ERROR_INT("data not defined", procName, 1); - if ((pms = CustomPMS) == NULL) - return ERROR_INT("pms not defined", procName, 1); - - if (data < (void *)pms->baseptr || data >= (void *)pms->maxptr) - return 0; /* -1 */ - - for (i = 1; i < pms->nlevels; i++) { - first = pms->firstptr[i]; - if (data < (void *)first) - break; - } - *plevel = i - 1; - - return 0; -} - - -/*! - * \brief pmsLogInfo() - */ -void -pmsLogInfo(void) -{ -l_int32 i; -L_PIX_MEM_STORE *pms; - - if ((pms = CustomPMS) == NULL) - return; - - lept_stderr("Total number of pix used at each level\n"); - for (i = 0; i < pms->nlevels; i++) - lept_stderr(" Level %d (%zu bytes): %d\n", i, - pms->sizes[i], pms->memused[i]); - - lept_stderr("Max number of pix in use at any time in each level\n"); - for (i = 0; i < pms->nlevels; i++) - lept_stderr(" Level %d (%zu bytes): %d\n", i, - pms->sizes[i], pms->memmax[i]); - - lept_stderr("Number of pix alloc'd because none were available\n"); - for (i = 0; i < pms->nlevels; i++) - lept_stderr(" Level %d (%zu bytes): %d\n", i, - pms->sizes[i], pms->memempty[i]); - - return; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixarith.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixarith.c deleted file mode 100644 index df75e6a6..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixarith.c +++ /dev/null @@ -1,1629 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pixarith.c - *
- *
- *      One-image grayscale arithmetic operations (8, 16, 32 bpp)
- *           l_int32     pixAddConstantGray()
- *           l_int32     pixMultConstantGray()
- *
- *      Two-image grayscale arithmetic operations (8, 16, 32 bpp)
- *           PIX        *pixAddGray()
- *           PIX        *pixSubtractGray()
- *           PIX        *pixMultiplyGray()
- *
- *      Grayscale threshold operation (8, 16, 32 bpp)
- *           PIX        *pixThresholdToValue()
- *
- *      Image accumulator arithmetic operations
- *           PIX        *pixInitAccumulate()
- *           PIX        *pixFinalAccumulate()
- *           PIX        *pixFinalAccumulateThreshold()
- *           l_int32     pixAccumulate()
- *           l_int32     pixMultConstAccumulate()
- *
- *      Absolute value of difference
- *           PIX        *pixAbsDifference()
- *
- *      Sum of color images
- *           PIX        *pixAddRGB()
- *
- *      Two-image min and max operations (8 and 16 bpp)
- *           PIX        *pixMinOrMax()
- *
- *      Scale pix for maximum dynamic range
- *           PIX        *pixMaxDynamicRange()
- *           PIX        *pixMaxDynamicRangeRGB()
- *
- *      RGB pixel value scaling
- *           l_uint32    linearScaleRGBVal()
- *           l_uint32    logScaleRGBVal()
- *
- *      Log base2 lookup
- *           l_float32  *makeLogBase2Tab()
- *           l_float32   getLogBase2()
- *
- *      The image accumulator operations are used when you expect
- *      overflow from 8 bits on intermediate results.  For example,
- *      you might want a tophat contrast operator which is
- *         3*I - opening(I,S) - closing(I,S)
- *      To use these operations, first use the init to generate
- *      a 16 bpp image, use the accumulate to add or subtract 8 bpp
- *      images from that, or the multiply constant to multiply
- *      by a small constant (much less than 256 -- we don't want
- *      overflow from the 16 bit images!), and when you're finished
- *      use final to bring the result back to 8 bpp, clipped
- *      if necessary.  There is also a divide function, which
- *      can be used to divide one image by another, scaling the
- *      result for maximum dynamic range, and giving back the
- *      8 bpp result.
- *
- *      A simpler interface to the arithmetic operations is
- *      provided in pixacc.c.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -/*-------------------------------------------------------------* - * One-image grayscale arithmetic operations * - *-------------------------------------------------------------*/ -/*! - * \brief pixAddConstantGray() - * - * \param[in] pixs 8, 16 or 32 bpp - * \param[in] val amount to add to each pixel - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) In-place operation.
- *      (2) No clipping for 32 bpp.
- *      (3) For 8 and 16 bpp, if val > 0 the result is clipped
- *          to 0xff and 0xffff, rsp.
- *      (4) For 8 and 16 bpp, if val < 0 the result is clipped to 0.
- * 
- */ -l_ok -pixAddConstantGray(PIX *pixs, - l_int32 val) -{ -l_int32 i, j, w, h, d, wpl, pval; -l_uint32 *data, *line; - - PROCNAME("pixAddConstantGray"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 && d != 16 && d != 32) - return ERROR_INT("pixs not 8, 16 or 32 bpp", procName, 1); - - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - for (i = 0; i < h; i++) { - line = data + i * wpl; - if (d == 8) { - if (val < 0) { - for (j = 0; j < w; j++) { - pval = GET_DATA_BYTE(line, j); - pval = L_MAX(0, pval + val); - SET_DATA_BYTE(line, j, pval); - } - } else { /* val >= 0 */ - for (j = 0; j < w; j++) { - pval = GET_DATA_BYTE(line, j); - pval = L_MIN(255, pval + val); - SET_DATA_BYTE(line, j, pval); - } - } - } else if (d == 16) { - if (val < 0) { - for (j = 0; j < w; j++) { - pval = GET_DATA_TWO_BYTES(line, j); - pval = L_MAX(0, pval + val); - SET_DATA_TWO_BYTES(line, j, pval); - } - } else { /* val >= 0 */ - for (j = 0; j < w; j++) { - pval = GET_DATA_TWO_BYTES(line, j); - pval = L_MIN(0xffff, pval + val); - SET_DATA_TWO_BYTES(line, j, pval); - } - } - } else { /* d == 32; no check for overflow (< 0 or > 0xffffffff) */ - for (j = 0; j < w; j++) - *(line + j) += val; - } - } - - return 0; -} - - -/*! - * \brief pixMultConstantGray() - * - * \param[in] pixs 8, 16 or 32 bpp - * \param[in] val >= 0.0; amount to multiply by each pixel - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) In-place operation; val must be >= 0.
- *      (2) No clipping for 32 bpp.
- *      (3) For 8 and 16 bpp, the result is clipped to 0xff and 0xffff, rsp.
- * 
- */ -l_ok -pixMultConstantGray(PIX *pixs, - l_float32 val) -{ -l_int32 i, j, w, h, d, wpl, pval; -l_uint32 upval; -l_uint32 *data, *line; - - PROCNAME("pixMultConstantGray"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 && d != 16 && d != 32) - return ERROR_INT("pixs not 8, 16 or 32 bpp", procName, 1); - if (val < 0.0) - return ERROR_INT("val < 0.0", procName, 1); - - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - for (i = 0; i < h; i++) { - line = data + i * wpl; - if (d == 8) { - for (j = 0; j < w; j++) { - pval = GET_DATA_BYTE(line, j); - pval = (l_int32)(val * pval); - pval = L_MIN(255, pval); - SET_DATA_BYTE(line, j, pval); - } - } else if (d == 16) { - for (j = 0; j < w; j++) { - pval = GET_DATA_TWO_BYTES(line, j); - pval = (l_int32)(val * pval); - pval = L_MIN(0xffff, pval); - SET_DATA_TWO_BYTES(line, j, pval); - } - } else { /* d == 32; no clipping */ - for (j = 0; j < w; j++) { - upval = *(line + j); - upval = (l_uint32)(val * upval); - *(line + j) = upval; - } - } - } - - return 0; -} - - -/*-------------------------------------------------------------* - * Two-image grayscale arithmetic ops * - *-------------------------------------------------------------*/ -/*! - * \brief pixAddGray() - * - * \param[in] pixd [optional]; this can be null, equal to pixs1, or - * different from pixs1 - * \param[in] pixs1 can be equal to pixd - * \param[in] pixs2 - * \return pixd always - * - *
- * Notes:
- *      (1) Arithmetic addition of two 8, 16 or 32 bpp images.
- *      (2) For 8 and 16 bpp, we do explicit clipping to 0xff and 0xffff,
- *          respectively.
- *      (3) Alignment is to UL corner.
- *      (4) There are 3 cases.  The result can go to a new dest,
- *          in-place to pixs1, or to an existing input dest:
- *          * pixd == null:   (src1 + src2) --> new pixd
- *          * pixd == pixs1:  (src1 + src2) --> src1  (in-place)
- *          * pixd != pixs1:  (src1 + src2) --> input pixd
- *      (5) pixs2 must be different from both pixd and pixs1.
- * 
- */ -PIX * -pixAddGray(PIX *pixd, - PIX *pixs1, - PIX *pixs2) -{ -l_int32 i, j, d, ws, hs, w, h, wpls, wpld, val, sum; -l_uint32 *datas, *datad, *lines, *lined; - - PROCNAME("pixAddGray"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); - if (pixs2 == pixs1) - return (PIX *)ERROR_PTR("pixs2 and pixs1 must differ", procName, pixd); - if (pixs2 == pixd) - return (PIX *)ERROR_PTR("pixs2 and pixd must differ", procName, pixd); - d = pixGetDepth(pixs1); - if (d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("pix are not 8, 16 or 32 bpp", procName, pixd); - if (pixGetDepth(pixs2) != d) - return (PIX *)ERROR_PTR("depths differ (pixs1, pixs2)", procName, pixd); - if (pixd && (pixGetDepth(pixd) != d)) - return (PIX *)ERROR_PTR("depths differ (pixs1, pixd)", procName, pixd); - - if (!pixSizesEqual(pixs1, pixs2)) - L_WARNING("pixs1 and pixs2 not equal in size\n", procName); - if (pixd && !pixSizesEqual(pixs1, pixd)) - L_WARNING("pixs1 and pixd not equal in size\n", procName); - - if (pixs1 != pixd) - pixd = pixCopy(pixd, pixs1); - - /* pixd + pixs2 ==> pixd */ - datas = pixGetData(pixs2); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs2); - wpld = pixGetWpl(pixd); - pixGetDimensions(pixs2, &ws, &hs, NULL); - pixGetDimensions(pixd, &w, &h, NULL); - w = L_MIN(ws, w); - h = L_MIN(hs, h); - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - lines = datas + i * wpls; - if (d == 8) { - for (j = 0; j < w; j++) { - sum = GET_DATA_BYTE(lines, j) + GET_DATA_BYTE(lined, j); - val = L_MIN(sum, 255); - SET_DATA_BYTE(lined, j, val); - } - } else if (d == 16) { - for (j = 0; j < w; j++) { - sum = GET_DATA_TWO_BYTES(lines, j) - + GET_DATA_TWO_BYTES(lined, j); - val = L_MIN(sum, 0xffff); - SET_DATA_TWO_BYTES(lined, j, val); - } - } else { /* d == 32; no clipping */ - for (j = 0; j < w; j++) - *(lined + j) += *(lines + j); - } - } - - return pixd; -} - - -/*! - * \brief pixSubtractGray() - * - * \param[in] pixd [optional]; this can be null, equal to pixs1, or - * different from pixs1 - * \param[in] pixs1 can be equal to pixd - * \param[in] pixs2 - * \return pixd always - * - *
- * Notes:
- *      (1) Arithmetic subtraction of two 8, 16 or 32 bpp images.
- *      (2) Source pixs2 is always subtracted from source pixs1.
- *      (3) Do explicit clipping to 0.
- *      (4) Alignment is to UL corner.
- *      (5) There are 3 cases.  The result can go to a new dest,
- *          in-place to pixs1, or to an existing input dest:
- *          (a) pixd == null   (src1 - src2) --> new pixd
- *          (b) pixd == pixs1  (src1 - src2) --> src1  (in-place)
- *          (d) pixd != pixs1  (src1 - src2) --> input pixd
- *      (6) pixs2 must be different from both pixd and pixs1.
- * 
- */ -PIX * -pixSubtractGray(PIX *pixd, - PIX *pixs1, - PIX *pixs2) -{ -l_int32 i, j, w, h, ws, hs, d, wpls, wpld, val, diff; -l_uint32 *datas, *datad, *lines, *lined; - - PROCNAME("pixSubtractGray"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); - if (pixs2 == pixs1) - return (PIX *)ERROR_PTR("pixs2 and pixs1 must differ", procName, pixd); - if (pixs2 == pixd) - return (PIX *)ERROR_PTR("pixs2 and pixd must differ", procName, pixd); - d = pixGetDepth(pixs1); - if (d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("pix are not 8, 16 or 32 bpp", procName, pixd); - if (pixGetDepth(pixs2) != d) - return (PIX *)ERROR_PTR("depths differ (pixs1, pixs2)", procName, pixd); - if (pixd && (pixGetDepth(pixd) != d)) - return (PIX *)ERROR_PTR("depths differ (pixs1, pixd)", procName, pixd); - - if (!pixSizesEqual(pixs1, pixs2)) - L_WARNING("pixs1 and pixs2 not equal in size\n", procName); - if (pixd && !pixSizesEqual(pixs1, pixd)) - L_WARNING("pixs1 and pixd not equal in size\n", procName); - - if (pixs1 != pixd) - pixd = pixCopy(pixd, pixs1); - - /* pixd - pixs2 ==> pixd */ - datas = pixGetData(pixs2); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs2); - wpld = pixGetWpl(pixd); - pixGetDimensions(pixs2, &ws, &hs, NULL); - pixGetDimensions(pixd, &w, &h, NULL); - w = L_MIN(ws, w); - h = L_MIN(hs, h); - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - lines = datas + i * wpls; - if (d == 8) { - for (j = 0; j < w; j++) { - diff = GET_DATA_BYTE(lined, j) - GET_DATA_BYTE(lines, j); - val = L_MAX(diff, 0); - SET_DATA_BYTE(lined, j, val); - } - } else if (d == 16) { - for (j = 0; j < w; j++) { - diff = GET_DATA_TWO_BYTES(lined, j) - - GET_DATA_TWO_BYTES(lines, j); - val = L_MAX(diff, 0); - SET_DATA_TWO_BYTES(lined, j, val); - } - } else { /* d == 32; no clipping */ - for (j = 0; j < w; j++) - *(lined + j) -= *(lines + j); - } - } - - return pixd; -} - - -/*! - * \brief pixMultiplyGray() - * - * \param[in] pixs 32 bpp rgb or 8 bpp gray - * \param[in] pixg 8 bpp gray - * \param[in] norm multiplicative factor to avoid overflow; 0 for default - * \return pixd, or null on error - * - *
- * Notes:
- *      (1) This function can be used for correcting a scanned image
- *          under non-uniform illumination.  For that application,
- *          %pixs is the scanned image, %pixg is an image whose values
- *          are inversely related to light from a uniform (say, white)
- *          target, and %norm is typically the inverse of the maximum
- *          pixel value in %pixg.
- *      (2) Set norm = 0 to get the default value, which is the inverse
- *          of the max value in %pixg.  This avoids overflow in the product.
- *      (3) For 32 bpp %pixs, all 3 components are multiplied by the
- *          same number.
- *      (4) Alignment is to UL corner.
- * 
- */ -PIX * -pixMultiplyGray(PIX *pixs, - PIX *pixg, - l_float32 norm) -{ -l_int32 i, j, w, h, d, ws, hs, ds, wpls, wplg, wpld; -l_int32 rval, gval, bval, rval2, gval2, bval2, vals, valg, val, maxgray; -l_uint32 val32; -l_uint32 *datas, *datag, *datad, *lines, *lineg, *lined; -PIX *pixd; - - PROCNAME("pixMultiplyGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &ws, &hs, &ds); - if (ds != 8 && ds != 32) - return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL); - if (!pixg) - return (PIX *)ERROR_PTR("pixg not defined", procName, NULL); - pixGetDimensions(pixg, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pixg not 8 bpp", procName, NULL); - - if (norm <= 0.0) { - pixGetExtremeValue(pixg, 1, L_SELECT_MAX, NULL, NULL, NULL, &maxgray); - norm = (maxgray > 0) ? 1.0 / (l_float32)maxgray : 1.0; - } - - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - datas = pixGetData(pixs); - datag = pixGetData(pixg); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wplg = pixGetWpl(pixg); - wpld = pixGetWpl(pixd); - w = L_MIN(ws, w); - h = L_MIN(hs, h); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lineg = datag + i * wplg; - lined = datad + i * wpld; - if (ds == 8) { - for (j = 0; j < w; j++) { - vals = GET_DATA_BYTE(lines, j); - valg = GET_DATA_BYTE(lineg, j); - val = (l_int32)(vals * valg * norm + 0.5); - val = L_MIN(255, val); - SET_DATA_BYTE(lined, j, val); - } - } else { /* ds == 32 */ - for (j = 0; j < w; j++) { - val32 = *(lines + j); - extractRGBValues(val32, &rval, &gval, &bval); - valg = GET_DATA_BYTE(lineg, j); - rval2 = (l_int32)(rval * valg * norm + 0.5); - rval2 = L_MIN(255, rval2); - gval2 = (l_int32)(gval * valg * norm + 0.5); - gval2 = L_MIN(255, gval2); - bval2 = (l_int32)(bval * valg * norm + 0.5); - bval2 = L_MIN(255, bval2); - composeRGBPixel(rval2, gval2, bval2, lined + j); - } - } - } - - return pixd; -} - - -/*-------------------------------------------------------------* - * Grayscale threshold operation * - *-------------------------------------------------------------*/ -/*! - * \brief pixThresholdToValue() - * - * \param[in] pixd [optional]; if not null, must be equal to pixs - * \param[in] pixs 8, 16, 32 bpp - * \param[in] threshval - * \param[in] setval - * \return pixd always - * - *
- * Notes:
- *    ~ operation can be in-place (pixs == pixd) or to a new pixd
- *    ~ if %setval > %threshval, sets pixels with a value >= threshval to setval
- *    ~ if %setval < %threshval, sets pixels with a value <= threshval to setval
- *    ~ if %setval == %threshval, no-op
- * 
- */ -PIX * -pixThresholdToValue(PIX *pixd, - PIX *pixs, - l_int32 threshval, - l_int32 setval) -{ -l_int32 i, j, w, h, d, wpld, setabove; -l_uint32 *datad, *lined; - - PROCNAME("pixThresholdToValue"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - d = pixGetDepth(pixs); - if (d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("pixs not 8, 16 or 32 bpp", procName, pixd); - if (pixd && (pixs != pixd)) - return (PIX *)ERROR_PTR("pixd exists and is not pixs", procName, pixd); - if (threshval < 0 || setval < 0) - return (PIX *)ERROR_PTR("threshval & setval not < 0", procName, pixd); - if (d == 8 && setval > 255) - return (PIX *)ERROR_PTR("setval > 255 for 8 bpp", procName, pixd); - if (d == 16 && setval > 0xffff) - return (PIX *)ERROR_PTR("setval > 0xffff for 16 bpp", procName, pixd); - - if (!pixd) - pixd = pixCopy(NULL, pixs); - if (setval == threshval) { - L_WARNING("setval == threshval; no operation\n", procName); - return pixd; - } - - datad = pixGetData(pixd); - pixGetDimensions(pixd, &w, &h, NULL); - wpld = pixGetWpl(pixd); - if (setval > threshval) - setabove = TRUE; - else - setabove = FALSE; - - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - if (setabove == TRUE) { - if (d == 8) { - for (j = 0; j < w; j++) { - if (GET_DATA_BYTE(lined, j) - threshval >= 0) - SET_DATA_BYTE(lined, j, setval); - } - } else if (d == 16) { - for (j = 0; j < w; j++) { - if (GET_DATA_TWO_BYTES(lined, j) - threshval >= 0) - SET_DATA_TWO_BYTES(lined, j, setval); - } - } else { /* d == 32 */ - for (j = 0; j < w; j++) { - if (*(lined + j) >= threshval) - *(lined + j) = setval; - } - } - } else { /* set if below or at threshold */ - if (d == 8) { - for (j = 0; j < w; j++) { - if (GET_DATA_BYTE(lined, j) - threshval <= 0) - SET_DATA_BYTE(lined, j, setval); - } - } else if (d == 16) { - for (j = 0; j < w; j++) { - if (GET_DATA_TWO_BYTES(lined, j) - threshval <= 0) - SET_DATA_TWO_BYTES(lined, j, setval); - } - } else { /* d == 32 */ - for (j = 0; j < w; j++) { - if (*(lined + j) <= threshval) - *(lined + j) = setval; - } - } - } - } - - return pixd; -} - - -/*-------------------------------------------------------------* - * Image accumulator arithmetic operations * - *-------------------------------------------------------------*/ -/*! - * \brief pixInitAccumulate() - * - * \param[in] w, h of accumulate array - * \param[in] offset initialize the 32 bpp to have this - * value; not more than 0x40000000 - * \return pixd 32 bpp, or NULL on error - * - *
- * Notes:
- *      (1) %offset must be >= 0.
- *      (2) %offset is used so that we can do arithmetic
- *          with negative number results on l_uint32 data; it
- *          prevents the l_uint32 data from going negative.
- *      (3) Because we use l_int32 intermediate data results,
- *          these should never exceed the max of l_int32 (0x7fffffff).
- *          We do not permit the offset to be above 0x40000000,
- *          which is half way between 0 and the max of l_int32.
- *      (4) The same offset should be used for initialization,
- *          multiplication by a constant, and final extraction!
- *      (5) If you're only adding positive values, %offset can be 0.
- * 
- */ -PIX * -pixInitAccumulate(l_int32 w, - l_int32 h, - l_uint32 offset) -{ -PIX *pixd; - - PROCNAME("pixInitAccumulate"); - - if ((pixd = pixCreate(w, h, 32)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - if (offset > 0x40000000) - offset = 0x40000000; - pixSetAllArbitrary(pixd, offset); - return pixd; -} - - -/*! - * \brief pixFinalAccumulate() - * - * \param[in] pixs 32 bpp - * \param[in] offset same as used for initialization - * \param[in] depth 8, 16 or 32 bpp, of destination - * \return pixd 8, 16 or 32 bpp, or NULL on error - * - *
- * Notes:
- *      (1) %offset must be >= 0 and should not exceed 0x40000000.
- *      (2) %offset is subtracted from the src 32 bpp image
- *      (3) For 8 bpp dest, the result is clipped to [0, 0xff]
- *      (4) For 16 bpp dest, the result is clipped to [0, 0xffff]
- * 
- */ -PIX * -pixFinalAccumulate(PIX *pixs, - l_uint32 offset, - l_int32 depth) -{ -l_int32 i, j, w, h, wpls, wpld, val; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixFinalAccumulate"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (depth != 8 && depth != 16 && depth != 32) - return (PIX *)ERROR_PTR("dest depth not 8, 16, 32 bpp", procName, NULL); - if (offset > 0x40000000) - offset = 0x40000000; - - pixGetDimensions(pixs, &w, &h, NULL); - if ((pixd = pixCreate(w, h, depth)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); /* but how did pixs get it initially? */ - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - if (depth == 8) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = lines[j] - offset; - val = L_MAX(0, val); - val = L_MIN(255, val); - SET_DATA_BYTE(lined, j, (l_uint8)val); - } - } - } else if (depth == 16) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = lines[j] - offset; - val = L_MAX(0, val); - val = L_MIN(0xffff, val); - SET_DATA_TWO_BYTES(lined, j, (l_uint16)val); - } - } - } else { /* depth == 32 */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) - lined[j] = lines[j] - offset; - } - } - - return pixd; -} - - -/*! - * \brief pixFinalAccumulateThreshold() - * - * \param[in] pixs 32 bpp - * \param[in] offset same as used for initialization - * \param[in] threshold values less than this are set in the destination - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) %offset must be >= 0 and should not exceed 0x40000000.
- *      (2) %offset is subtracted from the src 32 bpp image
- * 
- */ -PIX * -pixFinalAccumulateThreshold(PIX *pixs, - l_uint32 offset, - l_uint32 threshold) -{ -l_int32 i, j, w, h, wpls, wpld, val; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixFinalAccumulateThreshold"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (offset > 0x40000000) - offset = 0x40000000; - - pixGetDimensions(pixs, &w, &h, NULL); - if ((pixd = pixCreate(w, h, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); /* but how did pixs get it initially? */ - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = lines[j] - offset; - if (val >= threshold) { - SET_DATA_BIT(lined, j); - } - } - } - - return pixd; -} - - -/*! - * \brief pixAccumulate() - * - * \param[in] pixd 32 bpp - * \param[in] pixs 1, 8, 16 or 32 bpp - * \param[in] op L_ARITH_ADD or L_ARITH_SUBTRACT - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This adds or subtracts each pixs value from pixd.
- *      (2) This clips to the minimum of pixs and pixd, so they
- *          do not need to be the same size.
- *      (3) The alignment is to the origin [UL corner] of pixs & pixd.
- * 
- */ -l_ok -pixAccumulate(PIX *pixd, - PIX *pixs, - l_int32 op) -{ -l_int32 i, j, w, h, d, wd, hd, wpls, wpld; -l_uint32 *datas, *datad, *lines, *lined; - - - PROCNAME("pixAccumulate"); - - if (!pixd || (pixGetDepth(pixd) != 32)) - return ERROR_INT("pixd not defined or not 32 bpp", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - d = pixGetDepth(pixs); - if (d != 1 && d != 8 && d != 16 && d != 32) - return ERROR_INT("pixs not 1, 8, 16 or 32 bpp", procName, 1); - if (op != L_ARITH_ADD && op != L_ARITH_SUBTRACT) - return ERROR_INT("op must be in {L_ARITH_ADD, L_ARITH_SUBTRACT}", - procName, 1); - - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - pixGetDimensions(pixs, &w, &h, NULL); - pixGetDimensions(pixd, &wd, &hd, NULL); - w = L_MIN(w, wd); - h = L_MIN(h, hd); - if (d == 1) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - if (op == L_ARITH_ADD) { - for (j = 0; j < w; j++) - lined[j] += GET_DATA_BIT(lines, j); - } else { /* op == L_ARITH_SUBTRACT */ - for (j = 0; j < w; j++) - lined[j] -= GET_DATA_BIT(lines, j); - } - } - } else if (d == 8) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - if (op == L_ARITH_ADD) { - for (j = 0; j < w; j++) - lined[j] += GET_DATA_BYTE(lines, j); - } else { /* op == L_ARITH_SUBTRACT */ - for (j = 0; j < w; j++) - lined[j] -= GET_DATA_BYTE(lines, j); - } - } - } else if (d == 16) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - if (op == L_ARITH_ADD) { - for (j = 0; j < w; j++) - lined[j] += GET_DATA_TWO_BYTES(lines, j); - } else { /* op == L_ARITH_SUBTRACT */ - for (j = 0; j < w; j++) - lined[j] -= GET_DATA_TWO_BYTES(lines, j); - } - } - } else { /* d == 32 */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - if (op == L_ARITH_ADD) { - for (j = 0; j < w; j++) - lined[j] += lines[j]; - } else { /* op == L_ARITH_SUBTRACT */ - for (j = 0; j < w; j++) - lined[j] -= lines[j]; - } - } - } - - return 0; -} - - -/*! - * \brief pixMultConstAccumulate() - * - * \param[in] pixs 32 bpp - * \param[in] factor - * \param[in] offset same as used for initialization - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) %offset must be >= 0 and should not exceed 0x40000000.
- *      (2) This multiplies each pixel, relative to offset, by %factor.
- *      (3) The result is returned with %offset back in place.
- * 
- */ -l_ok -pixMultConstAccumulate(PIX *pixs, - l_float32 factor, - l_uint32 offset) -{ -l_int32 i, j, w, h, wpl, val; -l_uint32 *data, *line; - - PROCNAME("pixMultConstAccumulate"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not 32 bpp", procName, 1); - if (offset > 0x40000000) - offset = 0x40000000; - - pixGetDimensions(pixs, &w, &h, NULL); - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - val = line[j] - offset; - val = (l_int32)(val * factor); - val += offset; - line[j] = (l_uint32)val; - } - } - - return 0; -} - - -/*-----------------------------------------------------------------------* - * Absolute value of difference * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixAbsDifference() - * - * \param[in] pixs1, pixs2 both either 8 or 16 bpp gray, or 32 bpp RGB - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The depth of pixs1 and pixs2 must be equal.
- *      (2) Clips computation to the min size, aligning the UL corners
- *      (3) For 8 and 16 bpp, assumes one gray component.
- *      (4) For 32 bpp, assumes 3 color components, and ignores the
- *          LSB of each word (the alpha channel)
- *      (5) Computes the absolute value of the difference between
- *          each component value.
- * 
- */ -PIX * -pixAbsDifference(PIX *pixs1, - PIX *pixs2) -{ -l_int32 i, j, w, h, w2, h2, d, wpls1, wpls2, wpld, val1, val2, diff; -l_int32 rval1, gval1, bval1, rval2, gval2, bval2, rdiff, gdiff, bdiff; -l_uint32 *datas1, *datas2, *datad, *lines1, *lines2, *lined; -PIX *pixd; - - PROCNAME("pixAbsDifference"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, NULL); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, NULL); - d = pixGetDepth(pixs1); - if (d != pixGetDepth(pixs2)) - return (PIX *)ERROR_PTR("src1 and src2 depths unequal", procName, NULL); - if (d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("depths not in {8, 16, 32}", procName, NULL); - - pixGetDimensions(pixs1, &w, &h, NULL); - pixGetDimensions(pixs2, &w2, &h2, NULL); - w = L_MIN(w, w2); - h = L_MIN(h, h2); - if ((pixd = pixCreate(w, h, d)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs1); - datas1 = pixGetData(pixs1); - datas2 = pixGetData(pixs2); - datad = pixGetData(pixd); - wpls1 = pixGetWpl(pixs1); - wpls2 = pixGetWpl(pixs2); - wpld = pixGetWpl(pixd); - if (d == 8) { - for (i = 0; i < h; i++) { - lines1 = datas1 + i * wpls1; - lines2 = datas2 + i * wpls2; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val1 = GET_DATA_BYTE(lines1, j); - val2 = GET_DATA_BYTE(lines2, j); - diff = L_ABS(val1 - val2); - SET_DATA_BYTE(lined, j, diff); - } - } - } else if (d == 16) { - for (i = 0; i < h; i++) { - lines1 = datas1 + i * wpls1; - lines2 = datas2 + i * wpls2; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val1 = GET_DATA_TWO_BYTES(lines1, j); - val2 = GET_DATA_TWO_BYTES(lines2, j); - diff = L_ABS(val1 - val2); - SET_DATA_TWO_BYTES(lined, j, diff); - } - } - } else { /* d == 32 */ - for (i = 0; i < h; i++) { - lines1 = datas1 + i * wpls1; - lines2 = datas2 + i * wpls2; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines1[j], &rval1, &gval1, &bval1); - extractRGBValues(lines2[j], &rval2, &gval2, &bval2); - rdiff = L_ABS(rval1 - rval2); - gdiff = L_ABS(gval1 - gval2); - bdiff = L_ABS(bval1 - bval2); - composeRGBPixel(rdiff, gdiff, bdiff, lined + j); - } - } - } - - return pixd; -} - - -/*-----------------------------------------------------------------------* - * Sum of color images * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixAddRGB() - * - * \param[in] pixs1, pixs2 32 bpp RGB, or colormapped - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Clips computation to the minimum size, aligning the UL corners.
- *      (2) Removes any colormap to RGB, and ignores the LSB of each
- *          pixel word (the alpha channel).
- *      (3) Adds each component value, pixelwise, clipping to 255.
- *      (4) This is useful to combine two images where most of the
- *          pixels are essentially black, such as in pixPerceptualDiff().
- * 
- */ -PIX * -pixAddRGB(PIX *pixs1, - PIX *pixs2) -{ -l_int32 i, j, w, h, d, w2, h2, d2, wplc1, wplc2, wpld; -l_int32 rval1, gval1, bval1, rval2, gval2, bval2, rval, gval, bval; -l_uint32 *datac1, *datac2, *datad, *linec1, *linec2, *lined; -PIX *pixc1, *pixc2, *pixd; - - PROCNAME("pixAddRGB"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, NULL); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, NULL); - pixGetDimensions(pixs1, &w, &h, &d); - pixGetDimensions(pixs2, &w2, &h2, &d2); - if (!pixGetColormap(pixs1) && d != 32) - return (PIX *)ERROR_PTR("pixs1 not cmapped or rgb", procName, NULL); - if (!pixGetColormap(pixs2) && d2 != 32) - return (PIX *)ERROR_PTR("pixs2 not cmapped or rgb", procName, NULL); - if (pixGetColormap(pixs1)) - pixc1 = pixRemoveColormap(pixs1, REMOVE_CMAP_TO_FULL_COLOR); - else - pixc1 = pixClone(pixs1); - if (pixGetColormap(pixs2)) - pixc2 = pixRemoveColormap(pixs2, REMOVE_CMAP_TO_FULL_COLOR); - else - pixc2 = pixClone(pixs2); - - w = L_MIN(w, w2); - h = L_MIN(h, h2); - pixd = pixCreate(w, h, 32); - pixCopyResolution(pixd, pixs1); - datac1 = pixGetData(pixc1); - datac2 = pixGetData(pixc2); - datad = pixGetData(pixd); - wplc1 = pixGetWpl(pixc1); - wplc2 = pixGetWpl(pixc2); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - linec1 = datac1 + i * wplc1; - linec2 = datac2 + i * wplc2; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(linec1[j], &rval1, &gval1, &bval1); - extractRGBValues(linec2[j], &rval2, &gval2, &bval2); - rval = L_MIN(255, rval1 + rval2); - gval = L_MIN(255, gval1 + gval2); - bval = L_MIN(255, bval1 + bval2); - composeRGBPixel(rval, gval, bval, lined + j); - } - } - - pixDestroy(&pixc1); - pixDestroy(&pixc2); - return pixd; -} - - -/*-----------------------------------------------------------------------* - * Two-image min and max operations (8 and 16 bpp) * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixMinOrMax() - * - * \param[in] pixd [optional] destination: this can be null, - * equal to pixs1, or different from pixs1 - * \param[in] pixs1 can be equal to pixd - * \param[in] pixs2 - * \param[in] type L_CHOOSE_MIN, L_CHOOSE_MAX - * \return pixd always - * - *
- * Notes:
- *      (1) This gives the min or max of two images, component-wise.
- *      (2) The depth can be 8 or 16 bpp for 1 component, and 32 bpp
- *          for a 3 component image.  For 32 bpp, ignore the LSB
- *          of each word (the alpha channel)
- *      (3) There are 3 cases:
- *          ~  if pixd == null,   Min(src1, src2) --> new pixd
- *          ~  if pixd == pixs1,  Min(src1, src2) --> src1  (in-place)
- *          ~  if pixd != pixs1,  Min(src1, src2) --> input pixd
- * 
- */ -PIX * -pixMinOrMax(PIX *pixd, - PIX *pixs1, - PIX *pixs2, - l_int32 type) -{ -l_int32 d, ws, hs, w, h, wpls, wpld, i, j, vals, vald, val; -l_int32 rval1, gval1, bval1, rval2, gval2, bval2, rval, gval, bval; -l_uint32 *datas, *datad, *lines, *lined; - - PROCNAME("pixMinOrMax"); - - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); - if (pixs1 == pixs2) - return (PIX *)ERROR_PTR("pixs1 and pixs2 must differ", procName, pixd); - if (type != L_CHOOSE_MIN && type != L_CHOOSE_MAX) - return (PIX *)ERROR_PTR("invalid type", procName, pixd); - d = pixGetDepth(pixs1); - if (pixGetDepth(pixs2) != d) - return (PIX *)ERROR_PTR("depths unequal", procName, pixd); - if (d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("depth not 8, 16 or 32 bpp", procName, pixd); - - if (pixs1 != pixd) - pixd = pixCopy(pixd, pixs1); - - pixGetDimensions(pixs2, &ws, &hs, NULL); - pixGetDimensions(pixd, &w, &h, NULL); - w = L_MIN(w, ws); - h = L_MIN(h, hs); - datas = pixGetData(pixs2); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs2); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - if (d == 8) { - for (j = 0; j < w; j++) { - vals = GET_DATA_BYTE(lines, j); - vald = GET_DATA_BYTE(lined, j); - if (type == L_CHOOSE_MIN) - val = L_MIN(vals, vald); - else /* type == L_CHOOSE_MAX */ - val = L_MAX(vals, vald); - SET_DATA_BYTE(lined, j, val); - } - } else if (d == 16) { - for (j = 0; j < w; j++) { - vals = GET_DATA_TWO_BYTES(lines, j); - vald = GET_DATA_TWO_BYTES(lined, j); - if (type == L_CHOOSE_MIN) - val = L_MIN(vals, vald); - else /* type == L_CHOOSE_MAX */ - val = L_MAX(vals, vald); - SET_DATA_TWO_BYTES(lined, j, val); - } - } else { /* d == 32 */ - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval1, &gval1, &bval1); - extractRGBValues(lined[j], &rval2, &gval2, &bval2); - if (type == L_CHOOSE_MIN) { - rval = L_MIN(rval1, rval2); - gval = L_MIN(gval1, gval2); - bval = L_MIN(bval1, bval2); - } else { /* type == L_CHOOSE_MAX */ - rval = L_MAX(rval1, rval2); - gval = L_MAX(gval1, gval2); - bval = L_MAX(bval1, bval2); - } - composeRGBPixel(rval, gval, bval, lined + j); - } - } - } - - return pixd; -} - - -/*-----------------------------------------------------------------------* - * Scale for maximum dynamic range * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixMaxDynamicRange() - * - * \param[in] pixs 4, 8, 16 or 32 bpp source - * \param[in] type L_LINEAR_SCALE or L_LOG_SCALE - * \return pixd 8 bpp, or NULL on error - * - *
- * Notes:
- *      (1) Scales pixel values to fit maximally within the dest 8 bpp pixd
- *      (2) Assumes the source 'pixels' are a 1-component scalar.  For
- *          a 32 bpp source, each pixel is treated as a single number --
- *          not as a 3-component rgb pixel value.
- *      (3) Uses a LUT for log scaling.
- * 
- */ -PIX * -pixMaxDynamicRange(PIX *pixs, - l_int32 type) -{ -l_uint8 dval; -l_int32 i, j, w, h, d, wpls, wpld, max; -l_uint32 *datas, *datad; -l_uint32 word, sval; -l_uint32 *lines, *lined; -l_float32 factor; -l_float32 *tab; -PIX *pixd; - - PROCNAME("pixMaxDynamicRange"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 4 && d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("pixs not in {4,8,16,32} bpp", procName, NULL); - if (type != L_LINEAR_SCALE && type != L_LOG_SCALE) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - - if ((pixd = pixCreate(w, h, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - - /* Get max */ - max = 0; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < wpls; j++) { - word = *(lines + j); - if (d == 4) { - max = L_MAX(max, word >> 28); - max = L_MAX(max, (word >> 24) & 0xf); - max = L_MAX(max, (word >> 20) & 0xf); - max = L_MAX(max, (word >> 16) & 0xf); - max = L_MAX(max, (word >> 12) & 0xf); - max = L_MAX(max, (word >> 8) & 0xf); - max = L_MAX(max, (word >> 4) & 0xf); - max = L_MAX(max, word & 0xf); - } else if (d == 8) { - max = L_MAX(max, word >> 24); - max = L_MAX(max, (word >> 16) & 0xff); - max = L_MAX(max, (word >> 8) & 0xff); - max = L_MAX(max, word & 0xff); - } else if (d == 16) { - max = L_MAX(max, word >> 16); - max = L_MAX(max, word & 0xffff); - } else { /* d == 32 (rgb) */ - max = L_MAX(max, word); - } - } - } - - /* Map to the full dynamic range */ - if (d == 4) { - if (type == L_LINEAR_SCALE) { - factor = 255. / (l_float32)max; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - sval = GET_DATA_QBIT(lines, j); - dval = (l_uint8)(factor * (l_float32)sval + 0.5); - SET_DATA_QBIT(lined, j, dval); - } - } - } else { /* type == L_LOG_SCALE) */ - tab = makeLogBase2Tab(); - factor = 255. / getLogBase2(max, tab); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - sval = GET_DATA_QBIT(lines, j); - dval = (l_uint8)(factor * getLogBase2(sval, tab) + 0.5); - SET_DATA_BYTE(lined, j, dval); - } - } - LEPT_FREE(tab); - } - } else if (d == 8) { - if (type == L_LINEAR_SCALE) { - factor = 255. / (l_float32)max; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - sval = GET_DATA_BYTE(lines, j); - dval = (l_uint8)(factor * (l_float32)sval + 0.5); - SET_DATA_BYTE(lined, j, dval); - } - } - } else { /* type == L_LOG_SCALE) */ - tab = makeLogBase2Tab(); - factor = 255. / getLogBase2(max, tab); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - sval = GET_DATA_BYTE(lines, j); - dval = (l_uint8)(factor * getLogBase2(sval, tab) + 0.5); - SET_DATA_BYTE(lined, j, dval); - } - } - LEPT_FREE(tab); - } - } else if (d == 16) { - if (type == L_LINEAR_SCALE) { - factor = 255. / (l_float32)max; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - sval = GET_DATA_TWO_BYTES(lines, j); - dval = (l_uint8)(factor * (l_float32)sval + 0.5); - SET_DATA_BYTE(lined, j, dval); - } - } - } else { /* type == L_LOG_SCALE) */ - tab = makeLogBase2Tab(); - factor = 255. / getLogBase2(max, tab); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - sval = GET_DATA_TWO_BYTES(lines, j); - dval = (l_uint8)(factor * getLogBase2(sval, tab) + 0.5); - SET_DATA_BYTE(lined, j, dval); - } - } - LEPT_FREE(tab); - } - } else { /* d == 32 */ - if (type == L_LINEAR_SCALE) { - factor = 255. / (l_float32)max; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - sval = lines[j]; - dval = (l_uint8)(factor * (l_float32)sval + 0.5); - SET_DATA_BYTE(lined, j, dval); - } - } - } else { /* type == L_LOG_SCALE) */ - tab = makeLogBase2Tab(); - factor = 255. / getLogBase2(max, tab); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - sval = lines[j]; - dval = (l_uint8)(factor * getLogBase2(sval, tab) + 0.5); - SET_DATA_BYTE(lined, j, dval); - } - } - LEPT_FREE(tab); - } - } - - return pixd; -} - - -/*! - * \brief pixMaxDynamicRangeRGB() - * - * \param[in] pixs 32 bpp rgb source - * \param[in] type L_LINEAR_SCALE or L_LOG_SCALE - * \return pixd 32 bpp, or NULL on error - * - *
- * Notes:
- *      (1) Scales pixel values to fit maximally within a 32 bpp dest pixd
- *      (2) All color components are scaled with the same factor, based
- *          on the maximum r,g or b component in the image.  This should
- *          not be used if the 32-bit value is a single number (e.g., a
- *          count in a histogram generated by pixMakeHistoHS()).
- *      (3) Uses a LUT for log scaling.
- * 
- */ -PIX * -pixMaxDynamicRangeRGB(PIX *pixs, - l_int32 type) -{ -l_int32 i, j, w, h, wpls, wpld, max; -l_uint32 sval, dval, word; -l_uint32 *datas, *datad; -l_uint32 *lines, *lined; -l_float32 factor; -l_float32 *tab; -PIX *pixd; - - PROCNAME("pixMaxDynamicRangeRGB"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - if (type != L_LINEAR_SCALE && type != L_LOG_SCALE) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - - /* Get max */ - pixd = pixCreateTemplate(pixs); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - pixGetDimensions(pixs, &w, &h, NULL); - max = 0; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < wpls; j++) { - word = lines[j]; - max = L_MAX(max, word >> 24); - max = L_MAX(max, (word >> 16) & 0xff); - max = L_MAX(max, (word >> 8) & 0xff); - } - } - - /* Map to the full dynamic range */ - if (type == L_LINEAR_SCALE) { - factor = 255. / (l_float32)max; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - sval = lines[j]; - dval = linearScaleRGBVal(sval, factor); - lined[j] = dval; - } - } - } else { /* type == L_LOG_SCALE) */ - tab = makeLogBase2Tab(); - factor = 255. / getLogBase2(max, tab); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - sval = lines[j]; - dval = logScaleRGBVal(sval, tab, factor); - lined[j] = dval; - } - } - LEPT_FREE(tab); - } - - return pixd; -} - - -/*-----------------------------------------------------------------------* - * RGB pixel value scaling * - *-----------------------------------------------------------------------*/ -/*! - * \brief linearScaleRGBVal() - * - * \param[in] sval 32-bit rgb pixel value - * \param[in] factor multiplication factor on each component - * \return dval linearly scaled version of %sval - * - *
- * Notes:
- *      (1) %factor must be chosen to be not greater than (255 / maxcomp),
- *          where maxcomp is the maximum value of the pixel components.
- *          Otherwise, the product will overflow a uint8.  In use, factor
- *          is the same for all pixels in a pix.
- *      (2) No scaling is performed on the transparency ("A") component.
- * 
- */ -l_uint32 -linearScaleRGBVal(l_uint32 sval, - l_float32 factor) -{ -l_uint32 dval; - - dval = ((l_uint8)(factor * (sval >> 24) + 0.5) << 24) | - ((l_uint8)(factor * ((sval >> 16) & 0xff) + 0.5) << 16) | - ((l_uint8)(factor * ((sval >> 8) & 0xff) + 0.5) << 8) | - (sval & 0xff); - return dval; -} - - -/*! - * \brief logScaleRGBVal() - * - * \param[in] sval 32-bit rgb pixel value - * \param[in] tab 256 entry log-base-2 table - * \param[in] factor multiplication factor on each component - * \return dval log scaled version of %sval - * - *
- * Notes:
- *      (1) %tab is made with makeLogBase2Tab().
- *      (2) %factor must be chosen to be not greater than
- *          255.0 / log[base2](maxcomp), where maxcomp is the maximum
- *          value of the pixel components.  Otherwise, the product
- *          will overflow a uint8.  In use, factor is the same for
- *          all pixels in a pix.
- *      (3) No scaling is performed on the transparency ("A") component.
- * 
- */ -l_uint32 -logScaleRGBVal(l_uint32 sval, - l_float32 *tab, - l_float32 factor) -{ -l_uint32 dval; - - dval = ((l_uint8)(factor * getLogBase2(sval >> 24, tab) + 0.5) << 24) | - ((l_uint8)(factor * getLogBase2(((sval >> 16) & 0xff), tab) + 0.5) - << 16) | - ((l_uint8)(factor * getLogBase2(((sval >> 8) & 0xff), tab) + 0.5) - << 8) | - (sval & 0xff); - return dval; -} - - -/*-----------------------------------------------------------------------* - * Log base2 lookup * - *-----------------------------------------------------------------------*/ -/* - * \brief makeLogBase2Tab() - * - * \return tab table giving the log[base2] of values from 1 to 255 - */ -l_float32 * -makeLogBase2Tab(void) -{ -l_int32 i; -l_float32 log2; -l_float32 *tab; - - PROCNAME("makeLogBase2Tab"); - - if ((tab = (l_float32 *)LEPT_CALLOC(256, sizeof(l_float32))) == NULL) - return (l_float32 *)ERROR_PTR("tab not made", procName, NULL); - - log2 = (l_float32)log((l_float32)2); - for (i = 0; i < 256; i++) - tab[i] = (l_float32)log((l_float32)i) / log2; - - return tab; -} - - -/* - * \brief getLogBase2() - * - * \param[in] val in range [0 ... 255] - * \param[in] logtab 256-entry table of logs - * \return logval log[base2] of %val, or 0 on error - */ -l_float32 -getLogBase2(l_int32 val, - l_float32 *logtab) -{ - PROCNAME("getLogBase2"); - - if (!logtab) - return ERROR_INT("logtab not defined", procName, 0); - - if (val < 0x100) - return logtab[val]; - else if (val < 0x10000) - return 8.0 + logtab[val >> 8]; - else if (val < 0x1000000) - return 16.0 + logtab[val >> 16]; - else - return 24.0 + logtab[val >> 24]; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixcomp.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixcomp.c deleted file mode 100644 index c1f3384e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixcomp.c +++ /dev/null @@ -1,2456 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pixcomp.c - *
- *
- *      Pixcomp creation and destruction
- *           PIXC     *pixcompCreateFromPix()
- *           PIXC     *pixcompCreateFromString()
- *           PIXC     *pixcompCreateFromFile()
- *           void      pixcompDestroy()
- *           PIXC     *pixcompCopy()
-
- *      Pixcomp accessors
- *           l_int32   pixcompGetDimensions()
- *           l_int32   pixcompGetParameters()
- *
- *      Pixcomp compression selection
- *           l_int32   pixcompDetermineFormat()
- *
- *      Pixcomp conversion to Pix
- *           PIX      *pixCreateFromPixcomp()
- *
- *      Pixacomp creation and destruction
- *           PIXAC    *pixacompCreate()
- *           PIXAC    *pixacompCreateWithInit()
- *           PIXAC    *pixacompCreateFromPixa()
- *           PIXAC    *pixacompCreateFromFiles()
- *           PIXAC    *pixacompCreateFromSA()
- *           void      pixacompDestroy()
- *
- *      Pixacomp addition/replacement
- *           l_int32   pixacompAddPix()
- *           l_int32   pixacompAddPixcomp()
- *           static l_int32  pixacompExtendArray()
- *           l_int32   pixacompReplacePix()
- *           l_int32   pixacompReplacePixcomp()
- *           l_int32   pixacompAddBox()
- *
- *      Pixacomp accessors
- *           l_int32   pixacompGetCount()
- *           PIXC     *pixacompGetPixcomp()
- *           PIX      *pixacompGetPix()
- *           l_int32   pixacompGetPixDimensions()
- *           BOXA     *pixacompGetBoxa()
- *           l_int32   pixacompGetBoxaCount()
- *           BOX      *pixacompGetBox()
- *           l_int32   pixacompGetBoxGeometry()
- *           l_int32   pixacompGetOffset()
- *           l_int32   pixacompSetOffset()
- *
- *      Pixacomp conversion to Pixa
- *           PIXA     *pixaCreateFromPixacomp()
- *
- *      Combining pixacomp
- *           l_int32   pixacompJoin()
- *           PIXAC    *pixacompInterleave()
- *
- *      Pixacomp serialized I/O
- *           PIXAC    *pixacompRead()
- *           PIXAC    *pixacompReadStream()
- *           PIXAC    *pixacompReadMem()
- *           l_int32   pixacompWrite()
- *           l_int32   pixacompWriteStream()
- *           l_int32   pixacompWriteMem()
- *
- *      Conversion to pdf
- *           l_int32   pixacompConvertToPdf()
- *           l_int32   pixacompConvertToPdfData()
- *           l_int32   pixacompFastConvertToPdfData()
- *
- *      Output for debugging
- *           l_int32   pixacompWriteStreamInfo()
- *           l_int32   pixcompWriteStreamInfo()
- *           PIX      *pixacompDisplayTiledAndScaled()
- *           l_int32   pixacompWriteFiles()
- *           l_int32   pixcompWriteFile()
- *
- *   The Pixacomp is an array of Pixcomp, where each Pixcomp is a compressed
- *   string of the image.  We don't use reference counting here.
- *   The basic application is to allow a large array of highly
- *   compressible images to reside in memory.  We purposely don't
- *   reuse the Pixa for this, to avoid confusion and programming errors.
- *
- *   Three compression formats are used: g4, png and jpeg.
- *   The compression type can be either specified or defaulted.
- *   If specified and it is not possible to compress (for example,
- *   you specify a jpeg on a 1 bpp image or one with a colormap),
- *   the compression type defaults to png.  The jpeg compression quality
- *   can be specified using l_setJpegQuality(); otherwise the default is 75.
- *
- *   The serialized version of the Pixacomp is similar to that for
- *   a Pixa, except that each Pixcomp can be compressed by one of
- *   tiffg4, png, or jpeg.  Unlike serialization of the Pixa,
- *   serialization of the Pixacomp does not require any imaging
- *   libraries because it simply reads and writes the compressed data.
- *
- *   There are two modes of use in accumulating images:
- *     (1) addition to the end of the array
- *     (2) random insertion (replacement) into the array
- *
- *   In use, we assume that the array is fully populated up to the
- *   index value (n - 1), where n is the value of the pixcomp field n.
- *   Addition can only be made to the end of the fully populated array,
- *   at the index value n.  Insertion can be made randomly, but again
- *   only within the array of pixcomps; i.e., within the set of
- *   indices {0 .... n-1}.  The functions are pixacompReplacePix()
- *   and pixacompReplacePixcomp(), and they destroy the existing pixcomp.
- *
- *   For addition to the end of the array, initialize the pixacomp with
- *   pixacompCreate(), which generates an empty array of pixcomps ptrs.
- *   For random insertion and replacement of pixcomp into a pixacomp,
- *   initialize a fully populated array using pixacompCreateWithInit().
- *
- *   The offset field allows you to use an offset-based index to
- *   access the 0-based ptr array in the pixacomp.  This would typically
- *   be used to map the pixacomp array index to a page number, or v.v.
- *   By default, the offset is 0.  For example, suppose you have 50 images,
- *   corresponding to page numbers 10 - 59.  Then you could use
- *      pixac = pixacompCreateWithInit(50, 10, ...);
- *   This would allocate an array of 50 pixcomps, but if you asked for
- *   the pix at index 10, using pixacompGetPix(pixac, 10), it would
- *   apply the offset internally, returning the pix at index 0 in the array.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Bounds on initial array size */ -static const l_uint32 MaxPtrArraySize = 1000000; -static const l_int32 InitialPtrArraySize = 20; /*!< n'importe quoi */ - - /* Bound on data size */ -static const size_t MaxDataSize = 1000000000; - - /* These two globals are defined in writefile.c */ -extern l_int32 NumImageFileFormatExtensions; -extern const char *ImageFileFormatExtensions[]; - - /* Static functions */ -static l_int32 pixacompExtendArray(PIXAC *pixac); -static l_int32 pixcompFastConvertToPdfData(PIXC *pixc, const char *title, - l_uint8 **pdata, size_t *pnbytes); - - -/*---------------------------------------------------------------------* - * Pixcomp creation and destruction * - *---------------------------------------------------------------------*/ -/*! - * \brief pixcompCreateFromPix() - * - * \param[in] pix - * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG - * \return pixc, or NULL on error - * - *
- * Notes:
- *      (1) Use %comptype == IFF_DEFAULT to have the compression
- *          type automatically determined.
- *      (2) To compress jpeg with a quality other than the default (75), use
- *             l_jpegSetQuality()
- * 
- */ -PIXC * -pixcompCreateFromPix(PIX *pix, - l_int32 comptype) -{ -size_t size; -char *text; -l_int32 ret, format; -l_uint8 *data; -PIXC *pixc; - - PROCNAME("pixcompCreateFromPix"); - - if (!pix) - return (PIXC *)ERROR_PTR("pix not defined", procName, NULL); - if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 && - comptype != IFF_PNG && comptype != IFF_JFIF_JPEG) - return (PIXC *)ERROR_PTR("invalid comptype", procName, NULL); - - pixc = (PIXC *)LEPT_CALLOC(1, sizeof(PIXC)); - pixGetDimensions(pix, &pixc->w, &pixc->h, &pixc->d); - pixGetResolution(pix, &pixc->xres, &pixc->yres); - if (pixGetColormap(pix)) - pixc->cmapflag = 1; - if ((text = pixGetText(pix)) != NULL) - pixc->text = stringNew(text); - - pixcompDetermineFormat(comptype, pixc->d, pixc->cmapflag, &format); - pixc->comptype = format; - ret = pixWriteMem(&data, &size, pix, format); - if (ret) { - L_ERROR("write to memory failed\n", procName); - pixcompDestroy(&pixc); - return NULL; - } - pixc->data = data; - pixc->size = size; - - return pixc; -} - - -/*! - * \brief pixcompCreateFromString() - * - * \param[in] data compressed string - * \param[in] size number of bytes - * \param[in] copyflag L_INSERT or L_COPY - * \return pixc, or NULL on error - * - *
- * Notes:
- *      (1) This works when the compressed string is png, jpeg or tiffg4.
- *      (2) The copyflag determines if the data in the new Pixcomp is
- *          a copy of the input data.
- * 
- */ -PIXC * -pixcompCreateFromString(l_uint8 *data, - size_t size, - l_int32 copyflag) -{ -l_int32 format, w, h, d, bps, spp, iscmap; -PIXC *pixc; - - PROCNAME("pixcompCreateFromString"); - - if (!data) - return (PIXC *)ERROR_PTR("data not defined", procName, NULL); - if (copyflag != L_INSERT && copyflag != L_COPY) - return (PIXC *)ERROR_PTR("invalid copyflag", procName, NULL); - - if (pixReadHeaderMem(data, size, &format, &w, &h, &bps, &spp, &iscmap) == 1) - return (PIXC *)ERROR_PTR("header data not read", procName, NULL); - pixc = (PIXC *)LEPT_CALLOC(1, sizeof(PIXC)); - d = (spp == 3) ? 32 : bps * spp; - pixc->w = w; - pixc->h = h; - pixc->d = d; - pixc->comptype = format; - pixc->cmapflag = iscmap; - if (copyflag == L_INSERT) - pixc->data = data; - else - pixc->data = l_binaryCopy(data, size); - pixc->size = size; - return pixc; -} - - -/*! - * \brief pixcompCreateFromFile() - * - * \param[in] filename - * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG - * \return pixc, or NULL on error - * - *
- * Notes:
- *      (1) Use %comptype == IFF_DEFAULT to have the compression
- *          type automatically determined.
- *      (2) If the comptype is invalid for this file, the default will
- *          be substituted.
- * 
- */ -PIXC * -pixcompCreateFromFile(const char *filename, - l_int32 comptype) -{ -l_int32 format; -size_t nbytes; -l_uint8 *data; -PIX *pix; -PIXC *pixc; - - PROCNAME("pixcompCreateFromFile"); - - if (!filename) - return (PIXC *)ERROR_PTR("filename not defined", procName, NULL); - if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 && - comptype != IFF_PNG && comptype != IFF_JFIF_JPEG) - return (PIXC *)ERROR_PTR("invalid comptype", procName, NULL); - - findFileFormat(filename, &format); - if (format == IFF_UNKNOWN) { - L_ERROR("unreadable file: %s\n", procName, filename); - return NULL; - } - - /* Can we accept the encoded file directly? Remember that - * png is the "universal" compression type, so if requested - * it takes precedence. Otherwise, if the file is already - * compressed in g4 or jpeg, just accept the string. */ - if ((format == IFF_TIFF_G4 && comptype != IFF_PNG) || - (format == IFF_JFIF_JPEG && comptype != IFF_PNG)) - comptype = format; - if (comptype != IFF_DEFAULT && comptype == format) { - data = l_binaryRead(filename, &nbytes); - if ((pixc = pixcompCreateFromString(data, nbytes, L_INSERT)) == NULL) { - LEPT_FREE(data); - return (PIXC *)ERROR_PTR("pixc not made (string)", procName, NULL); - } - return pixc; - } - - /* Need to recompress in the default format */ - if ((pix = pixRead(filename)) == NULL) - return (PIXC *)ERROR_PTR("pix not read", procName, NULL); - if ((pixc = pixcompCreateFromPix(pix, comptype)) == NULL) { - pixDestroy(&pix); - return (PIXC *)ERROR_PTR("pixc not made", procName, NULL); - } - pixDestroy(&pix); - return pixc; -} - - -/*! - * \brief pixcompDestroy() - * - * \param[in,out] ppixc use ptr address so it will be nulled - * \return void - * - *
- * Notes:
- *      (1) Always nulls the input ptr.
- * 
- */ -void -pixcompDestroy(PIXC **ppixc) -{ -PIXC *pixc; - - PROCNAME("pixcompDestroy"); - - if (!ppixc) { - L_WARNING("ptr address is null!\n", procName); - return; - } - - if ((pixc = *ppixc) == NULL) - return; - - LEPT_FREE(pixc->data); - if (pixc->text) - LEPT_FREE(pixc->text); - LEPT_FREE(pixc); - *ppixc = NULL; - return; -} - - -/*! - * \brief pixcompCopy() - * - * \param[in] pixcs - * \return pixcd, or NULL on error - */ -PIXC * -pixcompCopy(PIXC *pixcs) -{ -size_t size; -l_uint8 *datas, *datad; -PIXC *pixcd; - - PROCNAME("pixcompCopy"); - - if (!pixcs) - return (PIXC *)ERROR_PTR("pixcs not defined", procName, NULL); - - pixcd = (PIXC *)LEPT_CALLOC(1, sizeof(PIXC)); - pixcd->w = pixcs->w; - pixcd->h = pixcs->h; - pixcd->d = pixcs->d; - pixcd->xres = pixcs->xres; - pixcd->yres = pixcs->yres; - pixcd->comptype = pixcs->comptype; - if (pixcs->text != NULL) - pixcd->text = stringNew(pixcs->text); - pixcd->cmapflag = pixcs->cmapflag; - - /* Copy image data */ - size = pixcs->size; - datas = pixcs->data; - if ((datad = (l_uint8 *)LEPT_CALLOC(size, sizeof(l_int8))) == NULL) { - pixcompDestroy(&pixcd); - return (PIXC *)ERROR_PTR("pixcd not made", procName, NULL); - } - memcpy(datad, datas, size); - pixcd->data = datad; - pixcd->size = size; - return pixcd; -} - - -/*---------------------------------------------------------------------* - * Pixcomp accessors * - *---------------------------------------------------------------------*/ -/*! - * \brief pixcompGetDimensions() - * - * \param[in] pixc - * \param[out] pw, ph, pd [optional] - * \return 0 if OK, 1 on error - */ -l_ok -pixcompGetDimensions(PIXC *pixc, - l_int32 *pw, - l_int32 *ph, - l_int32 *pd) -{ - PROCNAME("pixcompGetDimensions"); - - if (!pixc) - return ERROR_INT("pixc not defined", procName, 1); - if (pw) *pw = pixc->w; - if (ph) *ph = pixc->h; - if (pd) *pd = pixc->d; - return 0; -} - - -/*! - * \brief pixcompGetParameters() - * - * \param[in] pixc - * \param[out] pxres, pyres, pcomptype, pcmapflag [optional] - * \return 0 if OK, 1 on error - */ -l_ok -pixcompGetParameters(PIXC *pixc, - l_int32 *pxres, - l_int32 *pyres, - l_int32 *pcomptype, - l_int32 *pcmapflag) -{ - PROCNAME("pixcompGetParameters"); - - if (!pixc) - return ERROR_INT("pixc not defined", procName, 1); - if (pxres) *pxres = pixc->xres; - if (pyres) *pyres = pixc->yres; - if (pcomptype) *pcomptype = pixc->comptype; - if (pcmapflag) *pcmapflag = pixc->cmapflag; - return 0; -} - - -/*---------------------------------------------------------------------* - * Pixcomp compression selection * - *---------------------------------------------------------------------*/ -/*! - * \brief pixcompDetermineFormat() - * - * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG - * \param[in] d pix depth - * \param[in] cmapflag 1 if pix to be compressed as a colormap; 0 otherwise - * \param[out] pformat IFF_TIFF, IFF_PNG or IFF_JFIF_JPEG - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This determines the best format for a pix, given both
- *          the request (%comptype) and the image characteristics.
- *      (2) If %comptype == IFF_DEFAULT, this does not necessarily result
- *          in png encoding.  Instead, it returns one of the three formats
- *          that is both valid and most likely to give best compression.
- *      (3) If %d == 8 with no colormap and:
- *          * you wish to compress with png, use %comptype == IFF_PNG
- *          * you wish to compress with jpeg, use either
- *            %comptype == IFF_JFIF_JPEG or %comptype == IFF_DEFAULT.
- *      (4) If the pix cannot be compressed by the input value of
- *          %comptype, this selects IFF_PNG, which can compress all pix.
- * 
- */ -l_ok -pixcompDetermineFormat(l_int32 comptype, - l_int32 d, - l_int32 cmapflag, - l_int32 *pformat) -{ - - PROCNAME("pixcompDetermineFormat"); - - if (!pformat) - return ERROR_INT("&format not defined", procName, 1); - *pformat = IFF_PNG; /* init value and default */ - if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 && - comptype != IFF_PNG && comptype != IFF_JFIF_JPEG) - return ERROR_INT("invalid comptype", procName, 1); - - if (comptype == IFF_DEFAULT) { - if (d == 1) - *pformat = IFF_TIFF_G4; - else if (d == 16) - *pformat = IFF_PNG; - else if (d >= 8 && !cmapflag) - *pformat = IFF_JFIF_JPEG; - } else if (comptype == IFF_TIFF_G4 && d == 1) { - *pformat = IFF_TIFF_G4; - } else if (comptype == IFF_JFIF_JPEG && d >= 8 && !cmapflag) { - *pformat = IFF_JFIF_JPEG; - } - - return 0; -} - - -/*---------------------------------------------------------------------* - * Pixcomp conversion to Pix * - *---------------------------------------------------------------------*/ -/*! - * \brief pixCreateFromPixcomp() - * - * \param[in] pixc - * \return pix, or NULL on error - */ -PIX * -pixCreateFromPixcomp(PIXC *pixc) -{ -l_int32 w, h, d, cmapinpix, format; -PIX *pix; - - PROCNAME("pixCreateFromPixcomp"); - - if (!pixc) - return (PIX *)ERROR_PTR("pixc not defined", procName, NULL); - - if ((pix = pixReadMem(pixc->data, pixc->size)) == NULL) - return (PIX *)ERROR_PTR("pix not read", procName, NULL); - pixSetResolution(pix, pixc->xres, pixc->yres); - if (pixc->text) - pixSetText(pix, pixc->text); - - /* Check fields for consistency */ - pixGetDimensions(pix, &w, &h, &d); - if (pixc->w != w) { - L_INFO("pix width %d != pixc width %d\n", procName, w, pixc->w); - L_ERROR("pix width %d != pixc width\n", procName, w); - } - if (pixc->h != h) - L_ERROR("pix height %d != pixc height\n", procName, h); - if (pixc->d != d) { - if (pixc->d == 16) /* we strip 16 --> 8 bpp by default */ - L_WARNING("pix depth %d != pixc depth 16\n", procName, d); - else - L_ERROR("pix depth %d != pixc depth\n", procName, d); - } - cmapinpix = (pixGetColormap(pix) != NULL); - if ((cmapinpix && !pixc->cmapflag) || (!cmapinpix && pixc->cmapflag)) - L_ERROR("pix cmap flag inconsistent\n", procName); - format = pixGetInputFormat(pix); - if (format != pixc->comptype) { - L_ERROR("pix comptype %d not equal to pixc comptype\n", - procName, format); - } - - return pix; -} - - -/*---------------------------------------------------------------------* - * Pixacomp creation and destruction * - *---------------------------------------------------------------------*/ -/*! - * \brief pixacompCreate() - * - * \param[in] n initial number of ptrs - * \return pixac, or NULL on error - */ -PIXAC * -pixacompCreate(l_int32 n) -{ -PIXAC *pixac; - - PROCNAME("pixacompCreate"); - - if (n <= 0 || n > MaxPtrArraySize) - n = InitialPtrArraySize; - - pixac = (PIXAC *)LEPT_CALLOC(1, sizeof(PIXAC)); - pixac->n = 0; - pixac->nalloc = n; - pixac->offset = 0; - if ((pixac->pixc = (PIXC **)LEPT_CALLOC(n, sizeof(PIXC *))) == NULL) { - pixacompDestroy(&pixac); - return (PIXAC *)ERROR_PTR("pixc ptrs not made", procName, NULL); - } - if ((pixac->boxa = boxaCreate(n)) == NULL) { - pixacompDestroy(&pixac); - return (PIXAC *)ERROR_PTR("boxa not made", procName, NULL); - } - - return pixac; -} - - -/*! - * \brief pixacompCreateWithInit() - * - * \param[in] n initial number of ptrs - * \param[in] offset difference: accessor index - pixacomp array index - * \param[in] pix [optional] initialize each ptr in pixacomp - * to this pix; can be NULL - * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG - * \return pixac, or NULL on error - * - *
- * Notes:
- *      (1) Initializes a pixacomp to be fully populated with %pix,
- *          compressed using %comptype.  If %pix == NULL, %comptype
- *          is ignored.
- *      (2) Typically, the array is initialized with a tiny pix.
- *          This is most easily done by setting %pix == NULL, causing
- *          initialization of each array element with a tiny placeholder
- *          pix (w = h = d = 1), using comptype = IFF_TIFF_G4 .
- *      (3) Example usage:
- *            // Generate pixacomp for pages 30 - 49.  This has an array
- *            // size of 20 and the page number offset is 30.
- *            PixaComp *pixac = pixacompCreateWithInit(20, 30, NULL,
- *                                                     IFF_TIFF_G4);
- *            // Now insert png-compressed images into the initialized array
- *            for (pageno = 30; pageno < 50; pageno++) {
- *                Pix *pixt = ...   // derived from image[pageno]
- *                if (pixt)
- *                    pixacompReplacePix(pixac, pageno, pixt, IFF_PNG);
- *                pixDestroy(&pixt);
- *            }
- *          The result is a pixac with 20 compressed strings, and with
- *          selected pixt replacing the placeholders.
- *          To extract the image for page 38, which is decompressed
- *          from element 8 in the array, use:
- *            pixt = pixacompGetPix(pixac, 38);
- * 
- */ -PIXAC * -pixacompCreateWithInit(l_int32 n, - l_int32 offset, - PIX *pix, - l_int32 comptype) -{ -l_int32 i; -PIX *pixt; -PIXC *pixc; -PIXAC *pixac; - - PROCNAME("pixacompCreateWithInit"); - - if (n <= 0 || n > MaxPtrArraySize) - return (PIXAC *)ERROR_PTR("n out of valid bounds", procName, NULL); - if (pix) { - if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 && - comptype != IFF_PNG && comptype != IFF_JFIF_JPEG) - return (PIXAC *)ERROR_PTR("invalid comptype", procName, NULL); - } else { - comptype = IFF_TIFF_G4; - } - if (offset < 0) { - L_WARNING("offset < 0; setting to 0\n", procName); - offset = 0; - } - - if ((pixac = pixacompCreate(n)) == NULL) - return (PIXAC *)ERROR_PTR("pixac not made", procName, NULL); - pixacompSetOffset(pixac, offset); - if (pix) - pixt = pixClone(pix); - else - pixt = pixCreate(1, 1, 1); - for (i = 0; i < n; i++) { - pixc = pixcompCreateFromPix(pixt, comptype); - pixacompAddPixcomp(pixac, pixc, L_INSERT); - } - pixDestroy(&pixt); - - return pixac; -} - - -/*! - * \brief pixacompCreateFromPixa() - * - * \param[in] pixa - * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG - * \param[in] accesstype L_COPY, L_CLONE, L_COPY_CLONE - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If %format == IFF_DEFAULT, the conversion format for each
- *          image is chosen automatically.  Otherwise, we use the
- *          specified format unless it can't be done (e.g., jpeg
- *          for a 1, 2 or 4 bpp pix, or a pix with a colormap),
- *          in which case we use the default (assumed best) compression.
- *      (2) %accesstype is used to extract a boxa from %pixa.
- *      (3) To compress jpeg with a quality other than the default (75), use
- *             l_jpegSetQuality()
- * 
- */ -PIXAC * -pixacompCreateFromPixa(PIXA *pixa, - l_int32 comptype, - l_int32 accesstype) -{ -l_int32 i, n; -BOXA *boxa; -PIX *pix; -PIXAC *pixac; - - PROCNAME("pixacompCreateFromPixa"); - - if (!pixa) - return (PIXAC *)ERROR_PTR("pixa not defined", procName, NULL); - if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 && - comptype != IFF_PNG && comptype != IFF_JFIF_JPEG) - return (PIXAC *)ERROR_PTR("invalid comptype", procName, NULL); - if (accesstype != L_COPY && accesstype != L_CLONE && - accesstype != L_COPY_CLONE) - return (PIXAC *)ERROR_PTR("invalid accesstype", procName, NULL); - - n = pixaGetCount(pixa); - if ((pixac = pixacompCreate(n)) == NULL) - return (PIXAC *)ERROR_PTR("pixac not made", procName, NULL); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - pixacompAddPix(pixac, pix, comptype); - pixDestroy(&pix); - } - if ((boxa = pixaGetBoxa(pixa, accesstype)) != NULL) { - boxaDestroy(&pixac->boxa); - pixac->boxa = boxa; - } - - return pixac; -} - - -/*! - * \brief pixacompCreateFromFiles() - * - * \param[in] dirname - * \param[in] substr [optional] substring filter on filenames; can be null - * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG - * \return pixac, or NULL on error - * - *
- * Notes:
- *      (1) %dirname is the full path for the directory.
- *      (2) %substr is the part of the file name (excluding
- *          the directory) that is to be matched.  All matching
- *          filenames are read into the Pixa.  If substr is NULL,
- *          all filenames are read into the Pixa.
- *      (3) Use %comptype == IFF_DEFAULT to have the compression
- *          type automatically determined for each file.
- *      (4) If the comptype is invalid for a file, the default will
- *          be substituted.
- * 
- */ -PIXAC * -pixacompCreateFromFiles(const char *dirname, - const char *substr, - l_int32 comptype) -{ -PIXAC *pixac; -SARRAY *sa; - - PROCNAME("pixacompCreateFromFiles"); - - if (!dirname) - return (PIXAC *)ERROR_PTR("dirname not defined", procName, NULL); - if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 && - comptype != IFF_PNG && comptype != IFF_JFIF_JPEG) - return (PIXAC *)ERROR_PTR("invalid comptype", procName, NULL); - - if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) - return (PIXAC *)ERROR_PTR("sa not made", procName, NULL); - pixac = pixacompCreateFromSA(sa, comptype); - sarrayDestroy(&sa); - return pixac; -} - - -/*! - * \brief pixacompCreateFromSA() - * - * \param[in] sa full pathnames for all files - * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG - * \return pixac, or NULL on error - * - *
- * Notes:
- *      (1) Use %comptype == IFF_DEFAULT to have the compression
- *          type automatically determined for each file.
- *      (2) If the comptype is invalid for a file, the default will
- *          be substituted.
- * 
- */ -PIXAC * -pixacompCreateFromSA(SARRAY *sa, - l_int32 comptype) -{ -char *str; -l_int32 i, n; -PIXC *pixc; -PIXAC *pixac; - - PROCNAME("pixacompCreateFromSA"); - - if (!sa) - return (PIXAC *)ERROR_PTR("sarray not defined", procName, NULL); - if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 && - comptype != IFF_PNG && comptype != IFF_JFIF_JPEG) - return (PIXAC *)ERROR_PTR("invalid comptype", procName, NULL); - - n = sarrayGetCount(sa); - pixac = pixacompCreate(n); - for (i = 0; i < n; i++) { - str = sarrayGetString(sa, i, L_NOCOPY); - if ((pixc = pixcompCreateFromFile(str, comptype)) == NULL) { - L_ERROR("pixc not read from file: %s\n", procName, str); - continue; - } - pixacompAddPixcomp(pixac, pixc, L_INSERT); - } - return pixac; -} - - -/*! - * \brief pixacompDestroy() - * - * \param[in,out] ppixac use ptr address so it will be nulled - * \return void - * - *
- * Notes:
- *      (1) Always nulls the input ptr.
- * 
- */ -void -pixacompDestroy(PIXAC **ppixac) -{ -l_int32 i; -PIXAC *pixac; - - PROCNAME("pixacompDestroy"); - - if (ppixac == NULL) { - L_WARNING("ptr address is NULL!\n", procName); - return; - } - - if ((pixac = *ppixac) == NULL) - return; - - for (i = 0; i < pixac->n; i++) - pixcompDestroy(&pixac->pixc[i]); - LEPT_FREE(pixac->pixc); - boxaDestroy(&pixac->boxa); - LEPT_FREE(pixac); - - *ppixac = NULL; - return; -} - - -/*---------------------------------------------------------------------* - * Pixacomp addition * - *---------------------------------------------------------------------*/ -/*! - * \brief pixacompAddPix() - * - * \param[in] pixac - * \param[in] pix to be added - * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The array is filled up to the (n-1)-th element, and this
- *          converts the input pix to a pixc and adds it at
- *          the n-th position.
- *      (2) The pixc produced from the pix is owned by the pixac.
- *          The input pix is not affected.
- * 
- */ -l_ok -pixacompAddPix(PIXAC *pixac, - PIX *pix, - l_int32 comptype) -{ -l_int32 cmapflag, format; -PIXC *pixc; - - PROCNAME("pixacompAddPix"); - - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 && - comptype != IFF_PNG && comptype != IFF_JFIF_JPEG) - return ERROR_INT("invalid format", procName, 1); - - cmapflag = pixGetColormap(pix) ? 1 : 0; - pixcompDetermineFormat(comptype, pixGetDepth(pix), cmapflag, &format); - if ((pixc = pixcompCreateFromPix(pix, format)) == NULL) - return ERROR_INT("pixc not made", procName, 1); - pixacompAddPixcomp(pixac, pixc, L_INSERT); - return 0; -} - - -/*! - * \brief pixacompAddPixcomp() - * - * \param[in] pixac - * \param[in] pixc to be added by insertion - * \param[in] copyflag L_INSERT, L_COPY - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Anything added to a pixac is owned by the pixac.
- *          So do not L_INSERT a pixc that is owned by another pixac,
- *          or destroy a pixc that has been L_INSERTed.
- * 
- */ -l_ok -pixacompAddPixcomp(PIXAC *pixac, - PIXC *pixc, - l_int32 copyflag) -{ -l_int32 n; - - PROCNAME("pixacompAddPixcomp"); - - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - if (!pixc) - return ERROR_INT("pixc not defined", procName, 1); - if (copyflag != L_INSERT && copyflag != L_COPY) - return ERROR_INT("invalid copyflag", procName, 1); - - n = pixac->n; - if (n >= pixac->nalloc) - pixacompExtendArray(pixac); - if (copyflag == L_INSERT) - pixac->pixc[n] = pixc; - else /* L_COPY */ - pixac->pixc[n] = pixcompCopy(pixc); - pixac->n++; - - return 0; -} - - -/*! - * \brief pixacompExtendArray() - * - * \param[in] pixac - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) We extend the boxa array simultaneously.  This is
- *          necessary in case we are NOT adding boxes simultaneously
- *          with adding pixc.  We always want the sizes of the
- *          pixac and boxa ptr arrays to be equal.
- * 
- */ -static l_int32 -pixacompExtendArray(PIXAC *pixac) -{ - PROCNAME("pixacompExtendArray"); - - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - - if ((pixac->pixc = (PIXC **)reallocNew((void **)&pixac->pixc, - sizeof(PIXC *) * pixac->nalloc, - 2 * sizeof(PIXC *) * pixac->nalloc)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - pixac->nalloc = 2 * pixac->nalloc; - boxaExtendArray(pixac->boxa); - return 0; -} - - -/*! - * \brief pixacompReplacePix() - * - * \param[in] pixac - * \param[in] index caller's view of index within pixac; includes offset - * \param[in] pix owned by the caller - * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The %index includes the offset, which must be subtracted
- *          to get the actual index into the ptr array.
- *      (2) The input %pix is converted to a pixc, which is then inserted
- *          into the pixac.
- * 
- */ -l_ok -pixacompReplacePix(PIXAC *pixac, - l_int32 index, - PIX *pix, - l_int32 comptype) -{ -l_int32 n, aindex; -PIXC *pixc; - - PROCNAME("pixacompReplacePix"); - - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - n = pixacompGetCount(pixac); - aindex = index - pixac->offset; - if (aindex < 0 || aindex >= n) - return ERROR_INT("array index out of bounds", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 && - comptype != IFF_PNG && comptype != IFF_JFIF_JPEG) - return ERROR_INT("invalid format", procName, 1); - - pixc = pixcompCreateFromPix(pix, comptype); - pixacompReplacePixcomp(pixac, index, pixc); - return 0; -} - - -/*! - * \brief pixacompReplacePixcomp() - * - * \param[in] pixac - * \param[in] index caller's view of index within pixac; includes offset - * \param[in] pixc to replace existing one, which is destroyed - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The %index includes the offset, which must be subtracted
- *          to get the actual index into the ptr array.
- *      (2) The inserted %pixc is now owned by the pixac.  The caller
- *          must not destroy it.
- * 
- */ -l_ok -pixacompReplacePixcomp(PIXAC *pixac, - l_int32 index, - PIXC *pixc) -{ -l_int32 n, aindex; -PIXC *pixct; - - PROCNAME("pixacompReplacePixcomp"); - - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - n = pixacompGetCount(pixac); - aindex = index - pixac->offset; - if (aindex < 0 || aindex >= n) - return ERROR_INT("array index out of bounds", procName, 1); - if (!pixc) - return ERROR_INT("pixc not defined", procName, 1); - - pixct = pixacompGetPixcomp(pixac, index, L_NOCOPY); /* use %index */ - pixcompDestroy(&pixct); - pixac->pixc[aindex] = pixc; /* replace; use array index */ - - return 0; -} - - -/*! - * \brief pixacompAddBox() - * - * \param[in] pixac - * \param[in] box - * \param[in] copyflag L_INSERT, L_COPY - * \return 0 if OK, 1 on error - */ -l_ok -pixacompAddBox(PIXAC *pixac, - BOX *box, - l_int32 copyflag) -{ - PROCNAME("pixacompAddBox"); - - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (copyflag != L_INSERT && copyflag != L_COPY) - return ERROR_INT("invalid copyflag", procName, 1); - - boxaAddBox(pixac->boxa, box, copyflag); - return 0; -} - - -/*---------------------------------------------------------------------* - * Pixacomp accessors * - *---------------------------------------------------------------------*/ -/*! - * \brief pixacompGetCount() - * - * \param[in] pixac - * \return count, or 0 if no pixa - */ -l_int32 -pixacompGetCount(PIXAC *pixac) -{ - PROCNAME("pixacompGetCount"); - - if (!pixac) - return ERROR_INT("pixac not defined", procName, 0); - - return pixac->n; -} - - -/*! - * \brief pixacompGetPixcomp() - * - * \param[in] pixac - * \param[in] index caller's view of index within pixac; includes offset - * \param[in] copyflag L_NOCOPY, L_COPY - * \return pixc, or NULL on error - * - *
- * Notes:
- *      (1) The %index includes the offset, which must be subtracted
- *          to get the actual index into the ptr array.
- *      (2) If copyflag == L_NOCOPY, the pixc is owned by %pixac; do
- *          not destroy.
- * 
- */ -PIXC * -pixacompGetPixcomp(PIXAC *pixac, - l_int32 index, - l_int32 copyflag) -{ -l_int32 aindex; - - PROCNAME("pixacompGetPixcomp"); - - if (!pixac) - return (PIXC *)ERROR_PTR("pixac not defined", procName, NULL); - if (copyflag != L_NOCOPY && copyflag != L_COPY) - return (PIXC *)ERROR_PTR("invalid copyflag", procName, NULL); - aindex = index - pixac->offset; - if (aindex < 0 || aindex >= pixac->n) - return (PIXC *)ERROR_PTR("array index not valid", procName, NULL); - - if (copyflag == L_NOCOPY) - return pixac->pixc[aindex]; - else /* L_COPY */ - return pixcompCopy(pixac->pixc[aindex]); -} - - -/*! - * \brief pixacompGetPix() - * - * \param[in] pixac - * \param[in] index caller's view of index within pixac; includes offset - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) The %index includes the offset, which must be subtracted
- *          to get the actual index into the ptr array.
- * 
- */ -PIX * -pixacompGetPix(PIXAC *pixac, - l_int32 index) -{ -l_int32 aindex; -PIXC *pixc; - - PROCNAME("pixacompGetPix"); - - if (!pixac) - return (PIX *)ERROR_PTR("pixac not defined", procName, NULL); - aindex = index - pixac->offset; - if (aindex < 0 || aindex >= pixac->n) - return (PIX *)ERROR_PTR("array index not valid", procName, NULL); - - pixc = pixacompGetPixcomp(pixac, index, L_NOCOPY); - return pixCreateFromPixcomp(pixc); -} - - -/*! - * \brief pixacompGetPixDimensions() - * - * \param[in] pixac - * \param[in] index caller's view of index within pixac; - * includes offset - * \param[out] pw, ph, pd [optional] each can be null - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The %index includes the offset, which must be subtracted
- *          to get the actual index into the ptr array.
- * 
- */ -l_ok -pixacompGetPixDimensions(PIXAC *pixac, - l_int32 index, - l_int32 *pw, - l_int32 *ph, - l_int32 *pd) -{ -l_int32 aindex; -PIXC *pixc; - - PROCNAME("pixacompGetPixDimensions"); - - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - aindex = index - pixac->offset; - if (aindex < 0 || aindex >= pixac->n) - return ERROR_INT("array index not valid", procName, 1); - - if ((pixc = pixac->pixc[aindex]) == NULL) - return ERROR_INT("pixc not found!", procName, 1); - pixcompGetDimensions(pixc, pw, ph, pd); - return 0; -} - - -/*! - * \brief pixacompGetBoxa() - * - * \param[in] pixac - * \param[in] accesstype L_COPY, L_CLONE, L_COPY_CLONE - * \return boxa, or NULL on error - */ -BOXA * -pixacompGetBoxa(PIXAC *pixac, - l_int32 accesstype) -{ - PROCNAME("pixacompGetBoxa"); - - if (!pixac) - return (BOXA *)ERROR_PTR("pixac not defined", procName, NULL); - if (!pixac->boxa) - return (BOXA *)ERROR_PTR("boxa not defined", procName, NULL); - if (accesstype != L_COPY && accesstype != L_CLONE && - accesstype != L_COPY_CLONE) - return (BOXA *)ERROR_PTR("invalid accesstype", procName, NULL); - - return boxaCopy(pixac->boxa, accesstype); -} - - -/*! - * \brief pixacompGetBoxaCount() - * - * \param[in] pixac - * \return count, or 0 on error - */ -l_int32 -pixacompGetBoxaCount(PIXAC *pixac) -{ - PROCNAME("pixacompGetBoxaCount"); - - if (!pixac) - return ERROR_INT("pixac not defined", procName, 0); - - return boxaGetCount(pixac->boxa); -} - - -/*! - * \brief pixacompGetBox() - * - * \param[in] pixac - * \param[in] index caller's view of index within pixac; - * includes offset - * \param[in] accesstype L_COPY or L_CLONE - * \return box if null, not automatically an error, or NULL on error - * - *
- * Notes:
- *      (1) The %index includes the offset, which must be subtracted
- *          to get the actual index into the ptr array.
- *      (2) There is always a boxa with a pixac, and it is initialized so
- *          that each box ptr is NULL.
- *      (3) In general, we expect that there is either a box associated
- *          with each pixc, or no boxes at all in the boxa.
- *      (4) Having no boxes is thus not an automatic error.  Whether it
- *          is an actual error is determined by the calling program.
- *          If the caller expects to get a box, it is an error; see, e.g.,
- *          pixacGetBoxGeometry().
- * 
- */ -BOX * -pixacompGetBox(PIXAC *pixac, - l_int32 index, - l_int32 accesstype) -{ -l_int32 aindex; -BOX *box; - - PROCNAME("pixacompGetBox"); - - if (!pixac) - return (BOX *)ERROR_PTR("pixac not defined", procName, NULL); - if (!pixac->boxa) - return (BOX *)ERROR_PTR("boxa not defined", procName, NULL); - aindex = index - pixac->offset; - if (aindex < 0 || aindex >= pixac->boxa->n) - return (BOX *)ERROR_PTR("array index not valid", procName, NULL); - if (accesstype != L_COPY && accesstype != L_CLONE) - return (BOX *)ERROR_PTR("invalid accesstype", procName, NULL); - - box = pixac->boxa->box[aindex]; - if (box) { - if (accesstype == L_COPY) - return boxCopy(box); - else /* accesstype == L_CLONE */ - return boxClone(box); - } else { - return NULL; - } -} - - -/*! - * \brief pixacompGetBoxGeometry() - * - * \param[in] pixac - * \param[in] index caller's view of index within pixac; - * includes offset - * \param[out] px, py, pw, ph [optional] each can be null - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The %index includes the offset, which must be subtracted
- *          to get the actual index into the ptr array.
- * 
- */ -l_ok -pixacompGetBoxGeometry(PIXAC *pixac, - l_int32 index, - l_int32 *px, - l_int32 *py, - l_int32 *pw, - l_int32 *ph) -{ -l_int32 aindex; -BOX *box; - - PROCNAME("pixacompGetBoxGeometry"); - - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - aindex = index - pixac->offset; - if (aindex < 0 || aindex >= pixac->n) - return ERROR_INT("array index not valid", procName, 1); - - if ((box = pixacompGetBox(pixac, aindex, L_CLONE)) == NULL) - return ERROR_INT("box not found!", procName, 1); - boxGetGeometry(box, px, py, pw, ph); - boxDestroy(&box); - return 0; -} - - -/*! - * \brief pixacompGetOffset() - * - * \param[in] pixac - * \return offset, or 0 on error - * - *
- * Notes:
- *      (1) The offset is the difference between the caller's view of
- *          the index into the array and the actual array index.
- *          By default it is 0.
- * 
- */ -l_int32 -pixacompGetOffset(PIXAC *pixac) -{ - PROCNAME("pixacompGetOffset"); - - if (!pixac) - return ERROR_INT("pixac not defined", procName, 0); - return pixac->offset; -} - - -/*! - * \brief pixacompSetOffset() - * - * \param[in] pixac - * \param[in] offset non-negative - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The offset is the difference between the caller's view of
- *          the index into the array and the actual array index.
- *          By default it is 0.
- * 
- */ -l_ok -pixacompSetOffset(PIXAC *pixac, - l_int32 offset) -{ - PROCNAME("pixacompSetOffset"); - - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - pixac->offset = L_MAX(0, offset); - return 0; -} - - -/*---------------------------------------------------------------------* - * Pixacomp conversion to Pixa * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaCreateFromPixacomp() - * - * \param[in] pixac - * \param[in] accesstype L_COPY, L_CLONE, L_COPY_CLONE; for boxa - * \return pixa if OK, or NULL on error - * - *
- * Notes:
- *      (1) Because the pixa has no notion of offset, the offset must
- *          be set to 0 before the conversion, so that pixacompGetPix()
- *          fetches all the pixcomps.  It is reset at the end.
- * 
- */ -PIXA * -pixaCreateFromPixacomp(PIXAC *pixac, - l_int32 accesstype) -{ -l_int32 i, n, offset; -PIX *pix; -PIXA *pixa; - - PROCNAME("pixaCreateFromPixacomp"); - - if (!pixac) - return (PIXA *)ERROR_PTR("pixac not defined", procName, NULL); - if (accesstype != L_COPY && accesstype != L_CLONE && - accesstype != L_COPY_CLONE) - return (PIXA *)ERROR_PTR("invalid accesstype", procName, NULL); - - n = pixacompGetCount(pixac); - offset = pixacompGetOffset(pixac); - pixacompSetOffset(pixac, 0); - if ((pixa = pixaCreate(n)) == NULL) - return (PIXA *)ERROR_PTR("pixa not made", procName, NULL); - for (i = 0; i < n; i++) { - if ((pix = pixacompGetPix(pixac, i)) == NULL) { - L_WARNING("pix %d not made\n", procName, i); - continue; - } - pixaAddPix(pixa, pix, L_INSERT); - } - if (pixa->boxa) { - boxaDestroy(&pixa->boxa); - pixa->boxa = pixacompGetBoxa(pixac, accesstype); - } - pixacompSetOffset(pixac, offset); - - return pixa; -} - - -/*---------------------------------------------------------------------* - * Combining pixacomp - *---------------------------------------------------------------------*/ -/*! - * \brief pixacompJoin() - * - * \param[in] pixacd dest pixac; add to this one - * \param[in] pixacs [optional] source pixac; add from this one - * \param[in] istart starting index in pixacs - * \param[in] iend ending index in pixacs; use -1 to cat all - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This appends a clone of each indicated pixc in pixcas to pixcad
- *      (2) istart < 0 is taken to mean 'read from the start' (istart = 0)
- *      (3) iend < 0 means 'read to the end'
- *      (4) If pixacs is NULL or contains no pixc, this is a no-op.
- * 
- */ -l_ok -pixacompJoin(PIXAC *pixacd, - PIXAC *pixacs, - l_int32 istart, - l_int32 iend) -{ -l_int32 i, n, nb; -BOXA *boxas, *boxad; -PIXC *pixc; - - PROCNAME("pixacompJoin"); - - if (!pixacd) - return ERROR_INT("pixacd not defined", procName, 1); - if (!pixacs || ((n = pixacompGetCount(pixacs)) == 0)) - return 0; - - if (istart < 0) - istart = 0; - if (iend < 0 || iend >= n) - iend = n - 1; - if (istart > iend) - return ERROR_INT("istart > iend; nothing to add", procName, 1); - - for (i = istart; i <= iend; i++) { - pixc = pixacompGetPixcomp(pixacs, i, L_NOCOPY); - pixacompAddPixcomp(pixacd, pixc, L_COPY); - } - - boxas = pixacompGetBoxa(pixacs, L_CLONE); - boxad = pixacompGetBoxa(pixacd, L_CLONE); - nb = pixacompGetBoxaCount(pixacs); - iend = L_MIN(iend, nb - 1); - boxaJoin(boxad, boxas, istart, iend); - boxaDestroy(&boxas); /* just the clones */ - boxaDestroy(&boxad); /* ditto */ - return 0; -} - - -/*! - * \brief pixacompInterleave() - * - * \param[in] pixac1 first src pixac - * \param[in] pixac2 second src pixac - * \return pixacd interleaved from sources, or NULL on error. - * - *
- * Notes:
- *      (1) If the two pixac have different sizes, a warning is issued,
- *          and the number of pairs returned is the minimum size.
- * 
- */ -PIXAC * -pixacompInterleave(PIXAC *pixac1, - PIXAC *pixac2) -{ -l_int32 i, n1, n2, n, nb1, nb2; -BOX *box; -PIXC *pixc1, *pixc2; -PIXAC *pixacd; - - PROCNAME("pixacompInterleave"); - - if (!pixac1) - return (PIXAC *)ERROR_PTR("pixac1 not defined", procName, NULL); - if (!pixac2) - return (PIXAC *)ERROR_PTR("pixac2 not defined", procName, NULL); - n1 = pixacompGetCount(pixac1); - n2 = pixacompGetCount(pixac2); - n = L_MIN(n1, n2); - if (n == 0) - return (PIXAC *)ERROR_PTR("at least one input pixac is empty", - procName, NULL); - if (n1 != n2) - L_WARNING("counts differ: %d != %d\n", procName, n1, n2); - - pixacd = pixacompCreate(2 * n); - nb1 = pixacompGetBoxaCount(pixac1); - nb2 = pixacompGetBoxaCount(pixac2); - for (i = 0; i < n; i++) { - pixc1 = pixacompGetPixcomp(pixac1, i, L_COPY); - pixacompAddPixcomp(pixacd, pixc1, L_INSERT); - if (i < nb1) { - box = pixacompGetBox(pixac1, i, L_COPY); - pixacompAddBox(pixacd, box, L_INSERT); - } - pixc2 = pixacompGetPixcomp(pixac2, i, L_COPY); - pixacompAddPixcomp(pixacd, pixc2, L_INSERT); - if (i < nb2) { - box = pixacompGetBox(pixac2, i, L_COPY); - pixacompAddBox(pixacd, box, L_INSERT); - } - } - - return pixacd; -} - - -/*---------------------------------------------------------------------* - * Pixacomp serialized I/O * - *---------------------------------------------------------------------*/ -/*! - * \brief pixacompRead() - * - * \param[in] filename - * \return pixac, or NULL on error - * - *
- * Notes:
- *      (1) Unlike the situation with serialized Pixa, where the image
- *          data is stored in png format, the Pixacomp image data
- *          can be stored in tiffg4, png and jpg formats.
- * 
- */ -PIXAC * -pixacompRead(const char *filename) -{ -FILE *fp; -PIXAC *pixac; - - PROCNAME("pixacompRead"); - - if (!filename) - return (PIXAC *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (PIXAC *)ERROR_PTR("stream not opened", procName, NULL); - pixac = pixacompReadStream(fp); - fclose(fp); - if (!pixac) - return (PIXAC *)ERROR_PTR("pixac not read", procName, NULL); - return pixac; -} - - -/*! - * \brief pixacompReadStream() - * - * \param[in] fp file stream - * \return pixac, or NULL on error - */ -PIXAC * -pixacompReadStream(FILE *fp) -{ -char buf[256]; -l_uint8 *data; -l_int32 n, offset, i, w, h, d, ignore; -l_int32 comptype, cmapflag, version, xres, yres; -size_t size; -BOXA *boxa; -PIXC *pixc; -PIXAC *pixac; - - PROCNAME("pixacompReadStream"); - - if (!fp) - return (PIXAC *)ERROR_PTR("stream not defined", procName, NULL); - - if (fscanf(fp, "\nPixacomp Version %d\n", &version) != 1) - return (PIXAC *)ERROR_PTR("not a pixacomp file", procName, NULL); - if (version != PIXACOMP_VERSION_NUMBER) - return (PIXAC *)ERROR_PTR("invalid pixacomp version", procName, NULL); - if (fscanf(fp, "Number of pixcomp = %d\n", &n) != 1) - return (PIXAC *)ERROR_PTR("not a pixacomp file", procName, NULL); - if (fscanf(fp, "Offset of index into array = %d", &offset) != 1) - return (PIXAC *)ERROR_PTR("offset not read", procName, NULL); - - if ((pixac = pixacompCreate(n)) == NULL) - return (PIXAC *)ERROR_PTR("pixac not made", procName, NULL); - if ((boxa = boxaReadStream(fp)) == NULL) { - pixacompDestroy(&pixac); - return (PIXAC *)ERROR_PTR("boxa not made", procName, NULL); - } - boxaDestroy(&pixac->boxa); /* empty */ - pixac->boxa = boxa; - pixacompSetOffset(pixac, offset); - - for (i = 0; i < n; i++) { - if (fscanf(fp, "\nPixcomp[%d]: w = %d, h = %d, d = %d\n", - &ignore, &w, &h, &d) != 4) { - pixacompDestroy(&pixac); - return (PIXAC *)ERROR_PTR("dimension reading", procName, NULL); - } - if (fscanf(fp, " comptype = %d, size = %zu, cmapflag = %d\n", - &comptype, &size, &cmapflag) != 3) { - pixacompDestroy(&pixac); - return (PIXAC *)ERROR_PTR("comptype/size reading", procName, NULL); - } - if (size > MaxDataSize) { - pixacompDestroy(&pixac); - L_ERROR("data size = %zu is too big", procName, size); - return NULL; - } - - /* Use fgets() and sscanf(); not fscanf(), for the last - * bit of header data before the binary data. The reason is - * that fscanf throws away white space, and if the binary data - * happens to begin with ascii character(s) that are white - * space, it will swallow them and all will be lost! */ - if (fgets(buf, sizeof(buf), fp) == NULL) { - pixacompDestroy(&pixac); - return (PIXAC *)ERROR_PTR("fgets read fail", procName, NULL); - } - if (sscanf(buf, " xres = %d, yres = %d\n", &xres, &yres) != 2) { - pixacompDestroy(&pixac); - return (PIXAC *)ERROR_PTR("read fail for res", procName, NULL); - } - if ((data = (l_uint8 *)LEPT_CALLOC(1, size)) == NULL) { - pixacompDestroy(&pixac); - return (PIXAC *)ERROR_PTR("calloc fail for data", procName, NULL); - } - if (fread(data, 1, size, fp) != size) { - pixacompDestroy(&pixac); - LEPT_FREE(data); - return (PIXAC *)ERROR_PTR("error reading data", procName, NULL); - } - fgetc(fp); /* swallow the ending nl */ - pixc = (PIXC *)LEPT_CALLOC(1, sizeof(PIXC)); - pixc->w = w; - pixc->h = h; - pixc->d = d; - pixc->xres = xres; - pixc->yres = yres; - pixc->comptype = comptype; - pixc->cmapflag = cmapflag; - pixc->data = data; - pixc->size = size; - pixacompAddPixcomp(pixac, pixc, L_INSERT); - } - return pixac; -} - - -/*! - * \brief pixacompReadMem() - * - * \param[in] data in pixacomp format - * \param[in] size of data - * \return pixac, or NULL on error - * - *
- * Notes:
- *      (1) Deseralizes a buffer of pixacomp data into a pixac in memory.
- * 
- */ -PIXAC * -pixacompReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -PIXAC *pixac; - - PROCNAME("pixacompReadMem"); - - if (!data) - return (PIXAC *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (PIXAC *)ERROR_PTR("stream not opened", procName, NULL); - - pixac = pixacompReadStream(fp); - fclose(fp); - if (!pixac) L_ERROR("pixac not read\n", procName); - return pixac; -} - - -/*! - * \brief pixacompWrite() - * - * \param[in] filename - * \param[in] pixac - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Unlike the situation with serialized Pixa, where the image
- *          data is stored in png format, the Pixacomp image data
- *          can be stored in tiffg4, png and jpg formats.
- * 
- */ -l_ok -pixacompWrite(const char *filename, - PIXAC *pixac) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("pixacompWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!pixac) - return ERROR_INT("pixacomp not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "wb")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = pixacompWriteStream(fp, pixac); - fclose(fp); - if (ret) - return ERROR_INT("pixacomp not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief pixacompWriteStream() - * - * \param[in] fp file stream - * \param[in] pixac - * \return 0 if OK, 1 on error - */ -l_ok -pixacompWriteStream(FILE *fp, - PIXAC *pixac) -{ -l_int32 n, i; -PIXC *pixc; - - PROCNAME("pixacompWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - - n = pixacompGetCount(pixac); - fprintf(fp, "\nPixacomp Version %d\n", PIXACOMP_VERSION_NUMBER); - fprintf(fp, "Number of pixcomp = %d\n", n); - fprintf(fp, "Offset of index into array = %d", pixac->offset); - boxaWriteStream(fp, pixac->boxa); - for (i = 0; i < n; i++) { - if ((pixc = pixacompGetPixcomp(pixac, pixac->offset + i, L_NOCOPY)) - == NULL) - return ERROR_INT("pixc not found", procName, 1); - fprintf(fp, "\nPixcomp[%d]: w = %d, h = %d, d = %d\n", - i, pixc->w, pixc->h, pixc->d); - fprintf(fp, " comptype = %d, size = %zu, cmapflag = %d\n", - pixc->comptype, pixc->size, pixc->cmapflag); - fprintf(fp, " xres = %d, yres = %d\n", pixc->xres, pixc->yres); - fwrite(pixc->data, 1, pixc->size, fp); - fprintf(fp, "\n"); - } - return 0; -} - - -/*! - * \brief pixacompWriteMem() - * - * \param[out] pdata serialized data of pixac - * \param[out] psize size of serialized data - * \param[in] pixac - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes a pixac in memory and puts the result in a buffer.
- * 
- */ -l_ok -pixacompWriteMem(l_uint8 **pdata, - size_t *psize, - PIXAC *pixac) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("pixacompWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!pixac) - return ERROR_INT("&pixac not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = pixacompWriteStream(fp, pixac); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = pixacompWriteStream(fp, pixac); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - -/*--------------------------------------------------------------------* - * Conversion to pdf * - *--------------------------------------------------------------------*/ -/*! - * \brief pixacompConvertToPdf() - * - * \param[in] pixac containing images all at the same resolution - * \param[in] res override the resolution of each input image, - * in ppi; 0 to respect the resolution embedded - * in the input - * \param[in] scalefactor scaling factor applied to each image; > 0.0 - * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, - * L_FLATE_ENCODE, L_JP2K_ENCODE, or - * L_DEFAULT_ENCODE for default) - * \param[in] quality used for JPEG only; 0 for default (75) - * \param[in] title [optional] pdf title - * \param[in] fileout pdf file of all images - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This follows closely the function pixaConvertToPdf() in pdfio.c.
- *      (2) The images are encoded with G4 if 1 bpp; JPEG if 8 bpp without
- *          colormap and many colors, or 32 bpp; FLATE for anything else.
- *      (3) The scalefactor must be > 0.0; otherwise it is set to 1.0.
- *      (4) Specifying one of the three encoding types for %type forces
- *          all images to be compressed with that type.  Use 0 to have
- *          the type determined for each image based on depth and whether
- *          or not it has a colormap.
- *      (5) If all images are jpeg compressed, don't require scaling
- *          and have the same resolution, it is much faster to skip
- *          transcoding with pixacompFastConvertToPdfData(), and then
- *          write the data out to file.
- * 
- */ -l_ok -pixacompConvertToPdf(PIXAC *pixac, - l_int32 res, - l_float32 scalefactor, - l_int32 type, - l_int32 quality, - const char *title, - const char *fileout) -{ -l_uint8 *data; -l_int32 ret; -size_t nbytes; - - PROCNAME("pixacompConvertToPdf"); - - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - - ret = pixacompConvertToPdfData(pixac, res, scalefactor, type, quality, - title, &data, &nbytes); - if (ret) { - LEPT_FREE(data); - return ERROR_INT("conversion to pdf failed", procName, 1); - } - - ret = l_binaryWrite(fileout, "w", data, nbytes); - LEPT_FREE(data); - if (ret) - L_ERROR("pdf data not written to file\n", procName); - return ret; -} - - -/*! - * \brief pixacompConvertToPdfData() - * - * \param[in] pixac containing images all at the same resolution - * \param[in] res input resolution of all images - * \param[in] scalefactor scaling factor applied to each image; > 0.0 - * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, - * L_FLATE_ENCODE, L_JP2K_ENCODE, or - * L_DEFAULT_ENCODE for default) - * \param[in] quality used for JPEG only; 0 for default (75) - * \param[in] title [optional] pdf title - * \param[out] pdata output pdf data (of all images - * \param[out] pnbytes size of output pdf data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See pixacompConvertToPdf().
- * 
- */ -l_ok -pixacompConvertToPdfData(PIXAC *pixac, - l_int32 res, - l_float32 scalefactor, - l_int32 type, - l_int32 quality, - const char *title, - l_uint8 **pdata, - size_t *pnbytes) -{ -l_uint8 *imdata; -l_int32 i, n, ret, scaledres, pagetype; -size_t imbytes; -L_BYTEA *ba; -PIX *pixs, *pix; -L_PTRA *pa_data; - - PROCNAME("pixacompConvertToPdfData"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - if (scalefactor <= 0.0) scalefactor = 1.0; - if (type != L_DEFAULT_ENCODE && type != L_JPEG_ENCODE && - type != L_G4_ENCODE && type != L_FLATE_ENCODE && - type != L_JP2K_ENCODE) { - L_WARNING("invalid compression type; using per-page default\n", - procName); - type = L_DEFAULT_ENCODE; - } - - /* Generate all the encoded pdf strings */ - n = pixacompGetCount(pixac); - pa_data = ptraCreate(n); - for (i = 0; i < n; i++) { - if ((pixs = - pixacompGetPix(pixac, pixacompGetOffset(pixac) + i)) == NULL) { - L_ERROR("pix[%d] not retrieved\n", procName, i); - continue; - } - if (pixGetWidth(pixs) == 1) { /* used sometimes as placeholders */ - L_INFO("placeholder image[%d] has w = 1\n", procName, i); - pixDestroy(&pixs); - continue; - } - if (scalefactor != 1.0) - pix = pixScale(pixs, scalefactor, scalefactor); - else - pix = pixClone(pixs); - pixDestroy(&pixs); - scaledres = (l_int32)(res * scalefactor); - - /* Select the encoding type */ - if (type != L_DEFAULT_ENCODE) { - pagetype = type; - } else if (selectDefaultPdfEncoding(pix, &pagetype) != 0) { - L_ERROR("encoding type selection failed for pix[%d]\n", - procName, i); - pixDestroy(&pix); - continue; - } - - ret = pixConvertToPdfData(pix, pagetype, quality, &imdata, &imbytes, - 0, 0, scaledres, title, NULL, 0); - pixDestroy(&pix); - if (ret) { - L_ERROR("pdf encoding failed for pix[%d]\n", procName, i); - continue; - } - ba = l_byteaInitFromMem(imdata, imbytes); - LEPT_FREE(imdata); - ptraAdd(pa_data, ba); - } - ptraGetActualCount(pa_data, &n); - if (n == 0) { - L_ERROR("no pdf files made\n", procName); - ptraDestroy(&pa_data, FALSE, FALSE); - return 1; - } - - /* Concatenate them */ - ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes); - - ptraGetActualCount(pa_data, &n); /* recalculate in case it changes */ - for (i = 0; i < n; i++) { - ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); - l_byteaDestroy(&ba); - } - ptraDestroy(&pa_data, FALSE, FALSE); - return ret; -} - - -/*! - * \brief pixacompFastConvertToPdfData() - * - * \param[in] pixac containing images all at the same resolution - * \param[in] title [optional] pdf title - * \param[out] pdata output pdf data (of all images - * \param[out] pnbytes size of output pdf data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates the pdf without transcoding if all the
- *          images in %pixac are compressed with jpeg.
- *          Images not jpeg compressed are skipped.
- *      (2) It assumes all images have the same resolution, and that
- *          the resolution embedded in each jpeg file is correct.
- * 
- */ -l_ok -pixacompFastConvertToPdfData(PIXAC *pixac, - const char *title, - l_uint8 **pdata, - size_t *pnbytes) -{ -l_uint8 *imdata; -l_int32 i, n, ret, comptype; -size_t imbytes; -L_BYTEA *ba; -PIXC *pixc; -L_PTRA *pa_data; - - PROCNAME("pixacompFastConvertToPdfData"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - - /* Generate all the encoded pdf strings */ - n = pixacompGetCount(pixac); - pa_data = ptraCreate(n); - for (i = 0; i < n; i++) { - if ((pixc = pixacompGetPixcomp(pixac, i, L_NOCOPY)) == NULL) { - L_ERROR("pixc[%d] not retrieved\n", procName, i); - continue; - } - pixcompGetParameters(pixc, NULL, NULL, &comptype, NULL); - if (comptype != IFF_JFIF_JPEG) { - L_ERROR("pixc[%d] not jpeg compressed\n", procName, i); - continue; - } - ret = pixcompFastConvertToPdfData(pixc, title, &imdata, &imbytes); - if (ret) { - L_ERROR("pdf encoding failed for pixc[%d]\n", procName, i); - continue; - } - ba = l_byteaInitFromMem(imdata, imbytes); - LEPT_FREE(imdata); - ptraAdd(pa_data, ba); - } - ptraGetActualCount(pa_data, &n); - if (n == 0) { - L_ERROR("no pdf files made\n", procName); - ptraDestroy(&pa_data, FALSE, FALSE); - return 1; - } - - /* Concatenate them */ - ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes); - - /* Clean up */ - ptraGetActualCount(pa_data, &n); /* recalculate in case it changes */ - for (i = 0; i < n; i++) { - ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); - l_byteaDestroy(&ba); - } - ptraDestroy(&pa_data, FALSE, FALSE); - return ret; -} - - -/*! - * \brief pixcompFastConvertToPdfData() - * - * \param[in] pixc containing images all at the same resolution - * \param[in] title [optional] pdf title - * \param[out] pdata output pdf data (of all images - * \param[out] pnbytes size of output pdf data - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates the pdf without transcoding.
- *      (2) It assumes all images are jpeg encoded, have the same
- *          resolution, and that the resolution embedded in each
- *          jpeg file is correct.  (It is transferred to the pdf
- *          via the cid.)
- * 
- */ -static l_int32 -pixcompFastConvertToPdfData(PIXC *pixc, - const char *title, - l_uint8 **pdata, - size_t *pnbytes) -{ -l_uint8 *data; -L_COMP_DATA *cid; - - PROCNAME("pixacompFastConvertToPdfData"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - *pdata = NULL; - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - if (!pixc) - return ERROR_INT("pixc not defined", procName, 1); - - /* Make a copy of the data */ - data = l_binaryCopy(pixc->data, pixc->size); - cid = l_generateJpegDataMem(data, pixc->size, 0); - - /* Note: cid is destroyed, along with data, by this function */ - return cidConvertToPdfData(cid, title, pdata, pnbytes); -} - - -/*--------------------------------------------------------------------* - * Output for debugging * - *--------------------------------------------------------------------*/ -/*! - * \brief pixacompWriteStreamInfo() - * - * \param[in] fp file stream - * \param[in] pixac - * \param[in] text [optional] identifying string; can be null - * \return 0 if OK, 1 on error - */ -l_ok -pixacompWriteStreamInfo(FILE *fp, - PIXAC *pixac, - const char *text) -{ -l_int32 i, n, nboxes; -PIXC *pixc; - - PROCNAME("pixacompWriteStreamInfo"); - - if (!fp) - return ERROR_INT("fp not defined", procName, 1); - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - - if (text) - fprintf(fp, "Pixacomp Info for %s:\n", text); - else - fprintf(fp, "Pixacomp Info:\n"); - n = pixacompGetCount(pixac); - nboxes = pixacompGetBoxaCount(pixac); - fprintf(fp, "Number of pixcomp: %d\n", n); - fprintf(fp, "Size of pixcomp array alloc: %d\n", pixac->nalloc); - fprintf(fp, "Offset of index into array: %d\n", pixac->offset); - if (nboxes > 0) - fprintf(fp, "Boxa has %d boxes\n", nboxes); - else - fprintf(fp, "Boxa is empty\n"); - for (i = 0; i < n; i++) { - pixc = pixacompGetPixcomp(pixac, pixac->offset + i, L_NOCOPY); - pixcompWriteStreamInfo(fp, pixc, NULL); - } - return 0; -} - - -/*! - * \brief pixcompWriteStreamInfo() - * - * \param[in] fp file stream - * \param[in] pixc - * \param[in] text [optional] identifying string; can be null - * \return 0 if OK, 1 on error - */ -l_ok -pixcompWriteStreamInfo(FILE *fp, - PIXC *pixc, - const char *text) -{ - PROCNAME("pixcompWriteStreamInfo"); - - if (!fp) - return ERROR_INT("fp not defined", procName, 1); - if (!pixc) - return ERROR_INT("pixc not defined", procName, 1); - - if (text) - fprintf(fp, " Pixcomp Info for %s:", text); - else - fprintf(fp, " Pixcomp Info:"); - fprintf(fp, " width = %d, height = %d, depth = %d\n", - pixc->w, pixc->h, pixc->d); - fprintf(fp, " xres = %d, yres = %d, size in bytes = %zu\n", - pixc->xres, pixc->yres, pixc->size); - if (pixc->cmapflag) - fprintf(fp, " has colormap\n"); - else - fprintf(fp, " no colormap\n"); - if (pixc->comptype < NumImageFileFormatExtensions) { - fprintf(fp, " comptype = %s (%d)\n", - ImageFileFormatExtensions[pixc->comptype], pixc->comptype); - } else { - fprintf(fp, " Error!! Invalid comptype index: %d\n", pixc->comptype); - } - return 0; -} - - -/*! - * \brief pixacompDisplayTiledAndScaled() - * - * \param[in] pixac - * \param[in] outdepth output depth: 1, 8 or 32 bpp - * \param[in] tilewidth each pix is scaled to this width - * \param[in] ncols number of tiles in each row - * \param[in] background 0 for white, 1 for black; this is the color - * of the spacing between the images - * \param[in] spacing between images, and on outside - * \param[in] border width of additional black border on each image; - * use 0 for no border - * \return pix of tiled images, or NULL on error - * - *
- * Notes:
- *      (1) This is the same function as pixaDisplayTiledAndScaled(),
- *          except it works on a Pixacomp instead of a Pix.  It is particularly
- *          useful for showing the images in a Pixacomp at reduced resolution.
- *      (2) See pixaDisplayTiledAndScaled() for details.
- * 
- */ -PIX * -pixacompDisplayTiledAndScaled(PIXAC *pixac, - l_int32 outdepth, - l_int32 tilewidth, - l_int32 ncols, - l_int32 background, - l_int32 spacing, - l_int32 border) -{ -PIX *pixd; -PIXA *pixa; - - PROCNAME("pixacompDisplayTiledAndScaled"); - - if (!pixac) - return (PIX *)ERROR_PTR("pixac not defined", procName, NULL); - - if ((pixa = pixaCreateFromPixacomp(pixac, L_COPY)) == NULL) - return (PIX *)ERROR_PTR("pixa not made", procName, NULL); - - pixd = pixaDisplayTiledAndScaled(pixa, outdepth, tilewidth, ncols, - background, spacing, border); - pixaDestroy(&pixa); - return pixd; -} - - -/*! - * \brief pixacompWriteFiles() - * - * \param[in] pixac - * \param[in] subdir subdirectory of /tmp - * \return 0 if OK, 1 on error - */ -l_ok -pixacompWriteFiles(PIXAC *pixac, - const char *subdir) -{ -char buf[128]; -l_int32 i, n; -PIXC *pixc; - - PROCNAME("pixacompWriteFiles"); - - if (!pixac) - return ERROR_INT("pixac not defined", procName, 1); - - if (lept_mkdir(subdir) > 0) - return ERROR_INT("invalid subdir", procName, 1); - - n = pixacompGetCount(pixac); - for (i = 0; i < n; i++) { - pixc = pixacompGetPixcomp(pixac, i, L_NOCOPY); - snprintf(buf, sizeof(buf), "/tmp/%s/%03d", subdir, i); - pixcompWriteFile(buf, pixc); - } - return 0; -} - -extern const char *ImageFileFormatExtensions[]; - -/*! - * \brief pixcompWriteFile() - * - * \param[in] rootname - * \param[in] pixc - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The compressed data is written to file, and the filename is
- *          generated by appending the format extension to %rootname.
- * 
- */ -l_ok -pixcompWriteFile(const char *rootname, - PIXC *pixc) -{ -char buf[128]; - - PROCNAME("pixcompWriteFile"); - - if (!pixc) - return ERROR_INT("pixc not defined", procName, 1); - - snprintf(buf, sizeof(buf), "%s.%s", rootname, - ImageFileFormatExtensions[pixc->comptype]); - l_binaryWrite(buf, "w", pixc->data, pixc->size); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixconv.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixconv.c deleted file mode 100644 index c827aaa6..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixconv.c +++ /dev/null @@ -1,4266 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pixconv.c - *
- *
- *      These functions convert between images of different types
- *      without scaling.
- *
- *      Conversion from 8 bpp grayscale to 1, 2, 4 and 8 bpp
- *           PIX        *pixThreshold8()
- *
- *      Conversion from colormap to full color or grayscale
- *           PIX        *pixRemoveColormapGeneral()
- *           PIX        *pixRemoveColormap()
- *
- *      Add colormap losslessly (8 to 8)
- *           l_int32     pixAddGrayColormap8()
- *           PIX        *pixAddMinimalGrayColormap8()
- *
- *      Conversion from RGB color to grayscale
- *           PIX        *pixConvertRGBToLuminance()
- *           PIX        *pixConvertRGBToGray()
- *           PIX        *pixConvertRGBToGrayFast()
- *           PIX        *pixConvertRGBToGrayMinMax()
- *           PIX        *pixConvertRGBToGraySatBoost()
- *           PIX        *pixConvertRGBToGrayArb()
- *           PIX        *pixConvertRGBToBinaryArb()
- *
- *      Conversion from grayscale to colormap
- *           PIX        *pixConvertGrayToColormap()  -- 2, 4, 8 bpp
- *           PIX        *pixConvertGrayToColormap8()  -- 8 bpp only
- *
- *      Colorizing conversion from grayscale to color
- *           PIX        *pixColorizeGray()  -- 8 bpp or cmapped
- *
- *      Conversion from RGB color to colormap
- *           PIX        *pixConvertRGBToColormap()
- *
- *      Conversion from colormap to 1 bpp
- *           PIX        *pixConvertCmapTo1()
- *
- *      Quantization for relatively small number of colors in source
- *           l_int32     pixQuantizeIfFewColors()
- *
- *      Conversion from 16 bpp to 8 bpp
- *           PIX        *pixConvert16To8()
- *
- *      Conversion from grayscale to false color
- *           PIX        *pixConvertGrayToFalseColor()
- *
- *      Unpacking conversion from 1 bpp to 2, 4, 8, 16 and 32 bpp
- *           PIX        *pixUnpackBinary()
- *           PIX        *pixConvert1To16()
- *           PIX        *pixConvert1To32()
- *
- *      Unpacking conversion from 1 bpp to 2 bpp
- *           PIX        *pixConvert1To2Cmap()
- *           PIX        *pixConvert1To2()
- *
- *      Unpacking conversion from 1 bpp to 4 bpp
- *           PIX        *pixConvert1To4Cmap()
- *           PIX        *pixConvert1To4()
- *
- *      Unpacking conversion from 1, 2 and 4 bpp to 8 bpp
- *           PIX        *pixConvert1To8()
- *           PIX        *pixConvert2To8()
- *           PIX        *pixConvert4To8()
- *
- *      Unpacking conversion from 8 bpp to 16 bpp
- *           PIX        *pixConvert8To16()
- *
- *      Top-level conversion to 1 bpp
- *           PIX        *pixConvertTo1Adaptive()
- *           PIX        *pixConvertTo1()
- *           PIX        *pixConvertTo1BySampling()
- *
- *      Top-level conversion to 2 bpp
- *           PIX        *pixConvertTo2()
- *           PIX        *pixConvert8To2()
- *
- *      Top-level conversion to 4 bpp
- *           PIX        *pixConvertTo4()
- *           PIX        *pixConvert8To4()
- *
- *      Top-level conversion to 8 bpp
- *           PIX        *pixConvertTo8()
- *           PIX        *pixConvertTo8BySampling()
- *           PIX        *pixConvertTo8Colormap()
- *
- *      Top-level conversion to 16 bpp
- *           PIX        *pixConvertTo16()
- *
- *      Top-level conversion to 32 bpp (RGB)
- *           PIX        *pixConvertTo32()   ***
- *           PIX        *pixConvertTo32BySampling()   ***
- *           PIX        *pixConvert8To32()  ***
- *
- *      Top-level conversion to 8 or 32 bpp, without colormap
- *           PIX        *pixConvertTo8Or32
- *
- *      Conversion between 24 bpp and 32 bpp rgb
- *           PIX        *pixConvert24To32()
- *           PIX        *pixConvert32To24()
- *
- *      Conversion between 32 bpp (1 spp) and 16 or 8 bpp
- *           PIX        *pixConvert32To16()
- *           PIX        *pixConvert32To8()
- *
- *      Removal of alpha component by blending with white background
- *           PIX        *pixRemoveAlpha()
- *
- *      Addition of alpha component to 1 bpp
- *           PIX        *pixAddAlphaTo1bpp()
- *
- *      Lossless depth conversion (unpacking)
- *           PIX        *pixConvertLossless()
- *
- *      Conversion for printing in PostScript
- *           PIX        *pixConvertForPSWrap()
- *
- *      Scaling conversion to subpixel RGB
- *           PIX        *pixConvertToSubpixelRGB()
- *           PIX        *pixConvertGrayToSubpixelRGB()
- *           PIX        *pixConvertColorToSubpixelRGB()
- *
- *      Setting neutral point for min/max boost conversion to gray
- *          void         l_setNeutralBoostVal()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -/* ------- Set neutral point for min/max boost conversion to gray ------ */ - /* Call l_setNeutralBoostVal() to change this */ -static l_int32 var_NEUTRAL_BOOST_VAL = 180; - - -#ifndef NO_CONSOLE_IO -#define DEBUG_CONVERT_TO_COLORMAP 0 -#define DEBUG_UNROLLING 0 -#endif /* ~NO_CONSOLE_IO */ - - -/*-------------------------------------------------------------* - * Conversion from 8 bpp grayscale to 1, 2 4 and 8 bpp * - *-------------------------------------------------------------*/ -/*! - * \brief pixThreshold8() - * - * \param[in] pixs 8 bpp grayscale - * \param[in] d destination depth: 1, 2, 4 or 8 - * \param[in] nlevels number of levels to be used for colormap - * \param[in] cmapflag 1 if makes colormap; 0 otherwise - * \return pixd thresholded with standard dest thresholds, - * or NULL on error - * - *
- * Notes:
- *      (1) This uses, by default, equally spaced "target" values
- *          that depend on the number of levels, with thresholds
- *          halfway between.  For N levels, with separation (N-1)/255,
- *          there are N-1 fixed thresholds.
- *      (2) For 1 bpp destination, the number of levels can only be 2
- *          and if a cmap is made, black is (0,0,0) and white
- *          is (255,255,255), which is opposite to the convention
- *          without a colormap.
- *      (3) For 1, 2 and 4 bpp, the nlevels arg is used if a colormap
- *          is made; otherwise, we take the most significant bits
- *          from the src that will fit in the dest.
- *      (4) For 8 bpp, the input pixs is quantized to nlevels.  The
- *          dest quantized with that mapping, either through a colormap
- *          table or directly with 8 bit values.
- *      (5) Typically you should not use make a colormap for 1 bpp dest.
- *      (6) This is not dithering.  Each pixel is treated independently.
- * 
- */ -PIX * -pixThreshold8(PIX *pixs, - l_int32 d, - l_int32 nlevels, - l_int32 cmapflag) -{ -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixThreshold8"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (cmapflag && nlevels < 2) - return (PIX *)ERROR_PTR("nlevels must be at least 2", procName, NULL); - - switch (d) { - case 1: - pixd = pixThresholdToBinary(pixs, 128); - if (cmapflag) { - cmap = pixcmapCreateLinear(1, 2); - pixSetColormap(pixd, cmap); - } - break; - case 2: - pixd = pixThresholdTo2bpp(pixs, nlevels, cmapflag); - break; - case 4: - pixd = pixThresholdTo4bpp(pixs, nlevels, cmapflag); - break; - case 8: - pixd = pixThresholdOn8bpp(pixs, nlevels, cmapflag); - break; - default: - return (PIX *)ERROR_PTR("d must be in {1,2,4,8}", procName, NULL); - } - - if (!pixd) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyInputFormat(pixd, pixs); - return pixd; -} - - -/*-------------------------------------------------------------* - * Conversion from colormapped pix * - *-------------------------------------------------------------*/ -/*! - * \brief pixRemoveColormapGeneral() - * - * \param[in] pixs any depth, with or without colormap - * \param[in] type REMOVE_CMAP_TO_BINARY, - * REMOVE_CMAP_TO_GRAYSCALE, - * REMOVE_CMAP_TO_FULL_COLOR, - * REMOVE_CMAP_WITH_ALPHA, - * REMOVE_CMAP_BASED_ON_SRC - * \param[in] ifnocmap L_CLONE, L_COPY - * \return pixd always a new pix; without colormap, or NULL on error - * - *
- * Notes:
- *      (1) Convenience function that allows choice between returning
- *          a clone or a copy if pixs does not have a colormap.
- *      (2) See pixRemoveColormap().
- * 
- */ -PIX * -pixRemoveColormapGeneral(PIX *pixs, - l_int32 type, - l_int32 ifnocmap) -{ - PROCNAME("pixRemoveColormapGeneral"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (ifnocmap != L_CLONE && ifnocmap != L_COPY) - return (PIX *)ERROR_PTR("invalid value for ifnocmap", procName, NULL); - - if (pixGetColormap(pixs)) - return pixRemoveColormap(pixs, type); - - if (ifnocmap == L_CLONE) - return pixClone(pixs); - else - return pixCopy(NULL, pixs); -} - - -/*! - * \brief pixRemoveColormap() - * - * \param[in] pixs see restrictions below - * \param[in] type REMOVE_CMAP_TO_BINARY, - * REMOVE_CMAP_TO_GRAYSCALE, - * REMOVE_CMAP_TO_FULL_COLOR, - * REMOVE_CMAP_WITH_ALPHA, - * REMOVE_CMAP_BASED_ON_SRC - * \return pixd without colormap, or NULL on error - * - *
- * Notes:
- *      (1) If pixs does not have a colormap, a clone is returned.
- *      (2) Otherwise, the input pixs is restricted to 1, 2, 4 or 8 bpp.
- *      (3) Use REMOVE_CMAP_TO_BINARY only on 1 bpp pix.
- *      (4) For grayscale conversion from RGB, use a weighted average
- *          of RGB values, and always return an 8 bpp pix, regardless
- *          of whether the input pixs depth is 2, 4 or 8 bpp.
- *      (5) REMOVE_CMAP_TO_FULL_COLOR ignores the alpha component and
- *          returns a 32 bpp pix with spp == 3 and the alpha bytes are 0.
- *      (6) For REMOVE_CMAP_BASED_ON_SRC, if there is no color, this
- *          returns either a 1 bpp or 8 bpp grayscale pix.
- *          If there is color, this returns a 32 bpp pix, with either:
- *           * 3 spp, if the alpha values are all 255 (opaque), or
- *           * 4 spp (preserving the alpha), if any alpha values are not 255.
- * 
- */ -PIX * -pixRemoveColormap(PIX *pixs, - l_int32 type) -{ -l_int32 sval, rval, gval, bval, val0, val1; -l_int32 i, j, k, w, h, d, wpls, wpld, ncolors, nalloc, count; -l_int32 opaque, colorfound, blackwhite; -l_int32 *rmap, *gmap, *bmap, *amap; -l_uint32 *datas, *lines, *datad, *lined, *lut, *graymap; -l_uint32 sword, dword; -PIXCMAP *cmap; -PIX *pixd; - - PROCNAME("pixRemoveColormap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if ((cmap = pixGetColormap(pixs)) == NULL) - return pixClone(pixs); - if (type != REMOVE_CMAP_TO_BINARY && - type != REMOVE_CMAP_TO_GRAYSCALE && - type != REMOVE_CMAP_TO_FULL_COLOR && - type != REMOVE_CMAP_WITH_ALPHA && - type != REMOVE_CMAP_BASED_ON_SRC) { - L_WARNING("Invalid type; converting based on src\n", procName); - type = REMOVE_CMAP_BASED_ON_SRC; - } - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8) - return (PIX *)ERROR_PTR("pixs must be {1,2,4,8} bpp", procName, NULL); - - ncolors = pixcmapGetCount(cmap); - nalloc = 1 << d; /* allocate for max size in case of pixel corruption */ - if (ncolors > nalloc) - return (PIX *)ERROR_PTR("too many colors for pixel depth", - procName, NULL); - - if (pixcmapToArrays(cmap, &rmap, &gmap, &bmap, &amap)) - return (PIX *)ERROR_PTR("colormap arrays not made", procName, NULL); - - if (d != 1 && type == REMOVE_CMAP_TO_BINARY) { - L_WARNING("not 1 bpp; can't remove cmap to binary\n", procName); - type = REMOVE_CMAP_BASED_ON_SRC; - } - - /* Select output type depending on colormap content */ - if (type == REMOVE_CMAP_BASED_ON_SRC) { - pixcmapIsOpaque(cmap, &opaque); - pixcmapHasColor(cmap, &colorfound); - pixcmapIsBlackAndWhite(cmap, &blackwhite); - if (!opaque) { /* save the alpha */ - type = REMOVE_CMAP_WITH_ALPHA; - } else if (colorfound) { - type = REMOVE_CMAP_TO_FULL_COLOR; - } else { /* opaque and no color */ - if (d == 1 && blackwhite) /* can binarize without loss */ - type = REMOVE_CMAP_TO_BINARY; - else - type = REMOVE_CMAP_TO_GRAYSCALE; - } - } - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if (type == REMOVE_CMAP_TO_BINARY) { - if ((pixd = pixCopy(NULL, pixs)) == NULL) { - L_ERROR("pixd not made\n", procName); - goto cleanup_arrays; - } - pixcmapGetColor(cmap, 0, &rval, &gval, &bval); - val0 = rval + gval + bval; - pixcmapGetColor(cmap, 1, &rval, &gval, &bval); - val1 = rval + gval + bval; - if (val0 < val1) /* photometrically inverted from standard */ - pixInvert(pixd, pixd); - pixDestroyColormap(pixd); - } else if (type == REMOVE_CMAP_TO_GRAYSCALE) { - if ((pixd = pixCreate(w, h, 8)) == NULL) { - L_ERROR("pixd not made\n", procName); - goto cleanup_arrays; - } - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - graymap = (l_uint32 *)LEPT_CALLOC(nalloc, sizeof(l_uint32)); - for (i = 0; i < ncolors; i++) { - graymap[i] = (l_uint32)(L_RED_WEIGHT * rmap[i] + - L_GREEN_WEIGHT * gmap[i] + - L_BLUE_WEIGHT * bmap[i] + 0.5); - } - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - switch (d) /* depth test above; no default permitted */ - { - case 8: - /* Unrolled 4x */ - for (j = 0, count = 0; j + 3 < w; j += 4, count++) { - sword = lines[count]; - dword = (graymap[(sword >> 24) & 0xff] << 24) | - (graymap[(sword >> 16) & 0xff] << 16) | - (graymap[(sword >> 8) & 0xff] << 8) | - graymap[sword & 0xff]; - lined[count] = dword; - } - /* Cleanup partial word */ - for (; j < w; j++) { - sval = GET_DATA_BYTE(lines, j); - gval = graymap[sval]; - SET_DATA_BYTE(lined, j, gval); - } -#if DEBUG_UNROLLING -#define CHECK_VALUE(a, b, c) if (GET_DATA_BYTE(a, b) != c) { \ - lept_stderr("Error: mismatch at %d, %d vs %d\n", \ - j, GET_DATA_BYTE(a, b), c); } - for (j = 0; j < w; j++) { - sval = GET_DATA_BYTE(lines, j); - gval = graymap[sval]; - CHECK_VALUE(lined, j, gval); - } -#endif - break; - case 4: - /* Unrolled 8x */ - for (j = 0, count = 0; j + 7 < w; j += 8, count++) { - sword = lines[count]; - dword = (graymap[(sword >> 28) & 0xf] << 24) | - (graymap[(sword >> 24) & 0xf] << 16) | - (graymap[(sword >> 20) & 0xf] << 8) | - graymap[(sword >> 16) & 0xf]; - lined[2 * count] = dword; - dword = (graymap[(sword >> 12) & 0xf] << 24) | - (graymap[(sword >> 8) & 0xf] << 16) | - (graymap[(sword >> 4) & 0xf] << 8) | - graymap[sword & 0xf]; - lined[2 * count + 1] = dword; - } - /* Cleanup partial word */ - for (; j < w; j++) { - sval = GET_DATA_QBIT(lines, j); - gval = graymap[sval]; - SET_DATA_BYTE(lined, j, gval); - } -#if DEBUG_UNROLLING - for (j = 0; j < w; j++) { - sval = GET_DATA_QBIT(lines, j); - gval = graymap[sval]; - CHECK_VALUE(lined, j, gval); - } -#endif - break; - case 2: - /* Unrolled 16x */ - for (j = 0, count = 0; j + 15 < w; j += 16, count++) { - sword = lines[count]; - dword = (graymap[(sword >> 30) & 0x3] << 24) | - (graymap[(sword >> 28) & 0x3] << 16) | - (graymap[(sword >> 26) & 0x3] << 8) | - graymap[(sword >> 24) & 0x3]; - lined[4 * count] = dword; - dword = (graymap[(sword >> 22) & 0x3] << 24) | - (graymap[(sword >> 20) & 0x3] << 16) | - (graymap[(sword >> 18) & 0x3] << 8) | - graymap[(sword >> 16) & 0x3]; - lined[4 * count + 1] = dword; - dword = (graymap[(sword >> 14) & 0x3] << 24) | - (graymap[(sword >> 12) & 0x3] << 16) | - (graymap[(sword >> 10) & 0x3] << 8) | - graymap[(sword >> 8) & 0x3]; - lined[4 * count + 2] = dword; - dword = (graymap[(sword >> 6) & 0x3] << 24) | - (graymap[(sword >> 4) & 0x3] << 16) | - (graymap[(sword >> 2) & 0x3] << 8) | - graymap[sword & 0x3]; - lined[4 * count + 3] = dword; - } - /* Cleanup partial word */ - for (; j < w; j++) { - sval = GET_DATA_DIBIT(lines, j); - gval = graymap[sval]; - SET_DATA_BYTE(lined, j, gval); - } -#if DEBUG_UNROLLING - for (j = 0; j < w; j++) { - sval = GET_DATA_DIBIT(lines, j); - gval = graymap[sval]; - CHECK_VALUE(lined, j, gval); - } -#endif - break; - case 1: - /* Unrolled 8x */ - for (j = 0, count = 0; j + 31 < w; j += 32, count++) { - sword = lines[count]; - for (k = 0; k < 4; k++) { - /* The top byte is always the relevant one */ - dword = (graymap[(sword >> 31) & 0x1] << 24) | - (graymap[(sword >> 30) & 0x1] << 16) | - (graymap[(sword >> 29) & 0x1] << 8) | - graymap[(sword >> 28) & 0x1]; - lined[8 * count + 2 * k] = dword; - dword = (graymap[(sword >> 27) & 0x1] << 24) | - (graymap[(sword >> 26) & 0x1] << 16) | - (graymap[(sword >> 25) & 0x1] << 8) | - graymap[(sword >> 24) & 0x1]; - lined[8 * count + 2 * k + 1] = dword; - sword <<= 8; /* Move up the next byte */ - } - } - /* Cleanup partial word */ - for (; j < w; j++) { - sval = GET_DATA_BIT(lines, j); - gval = graymap[sval]; - SET_DATA_BYTE(lined, j, gval); - } -#if DEBUG_UNROLLING - for (j = 0; j < w; j++) { - sval = GET_DATA_BIT(lines, j); - gval = graymap[sval]; - CHECK_VALUE(lined, j, gval); - } -#undef CHECK_VALUE -#endif - break; - default: - return NULL; - } - } - if (graymap) - LEPT_FREE(graymap); - } else { /* type == REMOVE_CMAP_TO_FULL_COLOR or REMOVE_CMAP_WITH_ALPHA */ - if ((pixd = pixCreate(w, h, 32)) == NULL) { - L_ERROR("pixd not made\n", procName); - goto cleanup_arrays; - } - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - if (type == REMOVE_CMAP_WITH_ALPHA) - pixSetSpp(pixd, 4); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - lut = (l_uint32 *)LEPT_CALLOC(nalloc, sizeof(l_uint32)); - for (i = 0; i < ncolors; i++) { - if (type == REMOVE_CMAP_TO_FULL_COLOR) - composeRGBPixel(rmap[i], gmap[i], bmap[i], lut + i); - else /* full color plus alpha */ - composeRGBAPixel(rmap[i], gmap[i], bmap[i], amap[i], lut + i); - } - - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - if (d == 8) - sval = GET_DATA_BYTE(lines, j); - else if (d == 4) - sval = GET_DATA_QBIT(lines, j); - else if (d == 2) - sval = GET_DATA_DIBIT(lines, j); - else /* (d == 1) */ - sval = GET_DATA_BIT(lines, j); - if (sval >= ncolors) - L_WARNING("pixel value out of bounds\n", procName); - else - lined[j] = lut[sval]; - } - } - LEPT_FREE(lut); - } - -cleanup_arrays: - LEPT_FREE(rmap); - LEPT_FREE(gmap); - LEPT_FREE(bmap); - LEPT_FREE(amap); - return pixd; -} - - -/*-------------------------------------------------------------* - * Add colormap losslessly (8 to 8) * - *-------------------------------------------------------------*/ -/*! - * \brief pixAddGrayColormap8() - * - * \param[in] pixs 8 bpp - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If pixs has a colormap, this is a no-op.
- * 
- */ -l_ok -pixAddGrayColormap8(PIX *pixs) -{ -PIXCMAP *cmap; - - PROCNAME("pixAddGrayColormap8"); - - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (pixGetColormap(pixs)) - return 0; - - cmap = pixcmapCreateLinear(8, 256); - pixSetColormap(pixs, cmap); - return 0; -} - - -/*! - * \brief pixAddMinimalGrayColormap8() - * - * \param[in] pixs 8 bpp - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates a colormapped version of the input image
- *          that has the same number of colormap entries as the
- *          input image has unique gray levels.
- * 
- */ -PIX * -pixAddMinimalGrayColormap8(PIX *pixs) -{ -l_int32 ncolors, w, h, i, j, wpl1, wpld, index, val; -l_int32 *inta, *revmap; -l_uint32 *data1, *datad, *line1, *lined; -PIX *pix1, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixAddMinimalGrayColormap8"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - - /* Eliminate the easy cases */ - pixNumColors(pixs, 1, &ncolors); - cmap = pixGetColormap(pixs); - if (cmap) { - if (pixcmapGetCount(cmap) == ncolors) /* irreducible */ - return pixCopy(NULL, pixs); - else - pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - } else { - if (ncolors == 256) { - pix1 = pixCopy(NULL, pixs); - pixAddGrayColormap8(pix1); - return pix1; - } - pix1 = pixClone(pixs); - } - - /* Find the gray levels and make a reverse map */ - pixGetDimensions(pix1, &w, &h, NULL); - data1 = pixGetData(pix1); - wpl1 = pixGetWpl(pix1); - inta = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - for (i = 0; i < h; i++) { - line1 = data1 + i * wpl1; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(line1, j); - inta[val] = 1; - } - } - cmap = pixcmapCreate(8); - revmap = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - for (i = 0, index = 0; i < 256; i++) { - if (inta[i]) { - pixcmapAddColor(cmap, i, i, i); - revmap[i] = index++; - } - } - - /* Set all pixels in pixd to the colormap index */ - pixd = pixCreateTemplate(pix1); - pixSetColormap(pixd, cmap); - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - line1 = data1 + i * wpl1; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(line1, j); - SET_DATA_BYTE(lined, j, revmap[val]); - } - } - - pixDestroy(&pix1); - LEPT_FREE(inta); - LEPT_FREE(revmap); - return pixd; -} - - -/*-------------------------------------------------------------* - * Conversion from RGB color to grayscale * - *-------------------------------------------------------------*/ -/*! - * \brief pixConvertRGBToLuminance() - * - * \param[in] pixs 32 bpp RGB - * \return 8 bpp pix, or NULL on error - * - *
- * Notes:
- *      (1) Use a standard luminance conversion.
- * 
- */ -PIX * -pixConvertRGBToLuminance(PIX *pixs) -{ - return pixConvertRGBToGray(pixs, 0.0, 0.0, 0.0); -} - - -/*! - * \brief pixConvertRGBToGray() - * - * \param[in] pixs 32 bpp RGB - * \param[in] rwt, gwt, bwt non-negative; these should add to 1.0, - * or use 0.0 for default - * \return 8 bpp pix, or NULL on error - * - *
- * Notes:
- *      (1) Use a weighted average of the RGB values.
- * 
- */ -PIX * -pixConvertRGBToGray(PIX *pixs, - l_float32 rwt, - l_float32 gwt, - l_float32 bwt) -{ -l_int32 i, j, w, h, wpls, wpld, val; -l_uint32 word; -l_uint32 *datas, *lines, *datad, *lined; -l_float32 sum; -PIX *pixd; - - PROCNAME("pixConvertRGBToGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (rwt < 0.0 || gwt < 0.0 || bwt < 0.0) - return (PIX *)ERROR_PTR("weights not all >= 0.0", procName, NULL); - - /* Make sure the sum of weights is 1.0; otherwise, you can get - * overflow in the gray value. */ - if (rwt == 0.0 && gwt == 0.0 && bwt == 0.0) { - rwt = L_RED_WEIGHT; - gwt = L_GREEN_WEIGHT; - bwt = L_BLUE_WEIGHT; - } - sum = rwt + gwt + bwt; - if (L_ABS(sum - 1.0) > 0.0001) { /* maintain ratios with sum == 1.0 */ - L_WARNING("weights don't sum to 1; maintaining ratios\n", procName); - rwt = rwt / sum; - gwt = gwt / sum; - bwt = bwt / sum; - } - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if ((pixd = pixCreate(w, h, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - word = *(lines + j); - val = (l_int32)(rwt * ((word >> L_RED_SHIFT) & 0xff) + - gwt * ((word >> L_GREEN_SHIFT) & 0xff) + - bwt * ((word >> L_BLUE_SHIFT) & 0xff) + 0.5); - SET_DATA_BYTE(lined, j, val); - } - } - - return pixd; -} - - -/*! - * \brief pixConvertRGBToGrayFast() - * - * \param[in] pixs 32 bpp RGB - * \return 8 bpp pix, or NULL on error - * - *
- * Notes:
- *      (1) This function should be used if speed of conversion
- *          is paramount, and the green channel can be used as
- *          a fair representative of the RGB intensity.  It is
- *          several times faster than pixConvertRGBToGray().
- *      (2) To combine RGB to gray conversion with subsampling,
- *          use pixScaleRGBToGrayFast() instead.
- * 
- */ -PIX * -pixConvertRGBToGrayFast(PIX *pixs) -{ -l_int32 i, j, w, h, wpls, wpld, val; -l_uint32 *datas, *lines, *datad, *lined; -PIX *pixd; - - PROCNAME("pixConvertRGBToGrayFast"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if ((pixd = pixCreate(w, h, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++, lines++) { - val = ((*lines) >> L_GREEN_SHIFT) & 0xff; - SET_DATA_BYTE(lined, j, val); - } - } - - return pixd; -} - - -/*! - * \brief pixConvertRGBToGrayMinMax() - * - * \param[in] pixs 32 bpp RGB - * \param[in] type L_CHOOSE_MIN, L_CHOOSE_MAX, L_CHOOSE_MAXDIFF, - * L_CHOOSE_MIN_BOOST, L_CHOOSE_MAX_BOOST - * \return 8 bpp pix, or NULL on error - * - *
- * Notes:
- *      (1) This chooses various components or combinations of them,
- *          from the three RGB sample values.  In addition to choosing
- *          the min, max, and maxdiff (difference between max and min),
- *          this also allows boosting the min and max about a reference
- *          value.
- *      (2) The default reference value for boosting the min and max
- *          is 200.  This can be changed with l_setNeutralBoostVal()
- *      (3) The result with L_CHOOSE_MAXDIFF is surprisingly sensitive
- *          to a jpeg compression/decompression cycle with quality = 75.
- * 
- */ -PIX * -pixConvertRGBToGrayMinMax(PIX *pixs, - l_int32 type) -{ -l_int32 i, j, w, h, wpls, wpld, rval, gval, bval, val, minval, maxval; -l_uint32 *datas, *lines, *datad, *lined; -PIX *pixd; - - PROCNAME("pixConvertRGBToGrayMinMax"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (type != L_CHOOSE_MIN && type != L_CHOOSE_MAX && - type != L_CHOOSE_MAXDIFF && type != L_CHOOSE_MIN_BOOST && - type != L_CHOOSE_MAX_BOOST) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if ((pixd = pixCreate(w, h, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - if (type == L_CHOOSE_MIN || type == L_CHOOSE_MIN_BOOST) { - val = L_MIN(rval, gval); - val = L_MIN(val, bval); - if (type == L_CHOOSE_MIN_BOOST) - val = L_MIN(255, (val * val) / var_NEUTRAL_BOOST_VAL); - } else if (type == L_CHOOSE_MAX || type == L_CHOOSE_MAX_BOOST) { - val = L_MAX(rval, gval); - val = L_MAX(val, bval); - if (type == L_CHOOSE_MAX_BOOST) - val = L_MIN(255, (val * val) / var_NEUTRAL_BOOST_VAL); - } else { /* L_CHOOSE_MAXDIFF */ - minval = L_MIN(rval, gval); - minval = L_MIN(minval, bval); - maxval = L_MAX(rval, gval); - maxval = L_MAX(maxval, bval); - val = maxval - minval; - } - SET_DATA_BYTE(lined, j, val); - } - } - - return pixd; -} - - -/*! - * \brief pixConvertRGBToGraySatBoost() - * - * \param[in] pixs 32 bpp rgb - * \param[in] refval between 1 and 255; typ. less than 128 - * \return pixd 8 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This returns the max component value, boosted by
- *          the saturation. The maximum boost occurs where
- *          the maximum component value is equal to some reference value.
- *          This particular weighting is due to Dany Qumsiyeh.
- *      (2) For gray pixels (zero saturation), this returns
- *          the intensity of any component.
- *      (3) For fully saturated pixels ('fullsat'), this rises linearly
- *          with the max value and has a slope equal to 255 divided
- *          by the reference value; for a max value greater than
- *          the reference value, it is clipped to 255.
- *      (4) For saturation values in between, the output is a linear
- *          combination of (2) and (3), weighted by saturation.
- *          It falls between these two curves, and does not exceed 255.
- *      (5) This can be useful for distinguishing an object that has nonzero
- *          saturation from a gray background.  For this, the refval
- *          should be chosen near the expected value of the background,
- *          to achieve maximum saturation boost there.
- * 
- */ -PIX * -pixConvertRGBToGraySatBoost(PIX *pixs, - l_int32 refval) -{ -l_int32 w, h, d, i, j, wplt, wpld; -l_int32 rval, gval, bval, sval, minrg, maxrg, min, max, delta; -l_int32 fullsat, newval; -l_float32 *invmax, *ratio; -l_uint32 *linet, *lined, *datat, *datad; -PIX *pixt, *pixd; - - PROCNAME("pixConvertRGBToGraySatBoost"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 32 && !pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs not cmapped or rgb", procName, NULL); - if (refval < 1 || refval > 255) - return (PIX *)ERROR_PTR("refval not in [1 ... 255]", procName, NULL); - - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_FULL_COLOR); - pixd = pixCreate(w, h, 8); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - wplt = pixGetWpl(pixt); - datat = pixGetData(pixt); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - invmax = (l_float32 *)LEPT_CALLOC(256, sizeof(l_float32)); - ratio = (l_float32 *)LEPT_CALLOC(256, sizeof(l_float32)); - for (i = 1; i < 256; i++) { /* i == 0 --> delta = sval = newval = 0 */ - invmax[i] = 1.0 / (l_float32)i; - ratio[i] = (l_float32)i / (l_float32)refval; - } - for (i = 0; i < h; i++) { - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(linet[j], &rval, &gval, &bval); - minrg = L_MIN(rval, gval); - min = L_MIN(minrg, bval); - maxrg = L_MAX(rval, gval); - max = L_MAX(maxrg, bval); - delta = max - min; - if (delta == 0) /* gray; no chroma */ - sval = 0; - else - sval = (l_int32)(255. * (l_float32)delta * invmax[max] + 0.5); - - fullsat = L_MIN(255, 255 * ratio[max]); - newval = (sval * fullsat + (255 - sval) * max) / 255; - SET_DATA_BYTE(lined, j, newval); - } - } - - pixDestroy(&pixt); - LEPT_FREE(invmax); - LEPT_FREE(ratio); - return pixd; -} - - -/*! - * \brief pixConvertRGBToGrayArb() - * - * \param[in] pixs 32 bpp RGB - * \param[in] rc, gc, bc arithmetic factors; can be negative - * \return 8 bpp pix, or NULL on error - * - *
- * Notes:
- *      (1) This converts to gray using an arbitrary linear combination
- *          of the rgb color components.  It differs from pixConvertToGray(),
- *          which uses only positive coefficients that sum to 1.
- *      (2) The gray output values are clipped to 0 and 255.
- * 
- */ -PIX * -pixConvertRGBToGrayArb(PIX *pixs, - l_float32 rc, - l_float32 gc, - l_float32 bc) -{ -l_int32 i, j, w, h, wpls, wpld, rval, gval, bval, val; -l_uint32 *datas, *lines, *datad, *lined; -PIX *pixd; - - PROCNAME("pixConvertRGBToGrayArb"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (rc <= 0 && gc <= 0 && bc <= 0) - return (PIX *)ERROR_PTR("all coefficients <= 0", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if ((pixd = pixCreate(w, h, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - val = (l_int32)(rc * rval + gc * gval + bc * bval); - val = L_MIN(255, L_MAX(0, val)); - SET_DATA_BYTE(lined, j, val); - } - } - - return pixd; -} - - -/*! - * \brief pixConvertRGBToBinaryArb() - * - * \param[in] pixs 32 bpp RGB - * \param[in] rc, gc, bc arithmetic factors; can be negative - * \param[in] thresh binarization threshold - * \param[in] relation L_SELECT_IF_LT, L_SELECT_IF_GT - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \return 1 bpp pix, or NULL on error - * - *
- * Notes:
- *      (1) This makes a 1 bpp mask from an RGB image, using an arbitrary
- *          linear combination of the rgb color components, along with
- *          a threshold and a selection choice of the gray value relative
- *          to %thresh.
- * 
- */ -PIX * -pixConvertRGBToBinaryArb(PIX *pixs, - l_float32 rc, - l_float32 gc, - l_float32 bc, - l_int32 thresh, - l_int32 relation) -{ -l_int32 threshold; -PIX *pix1, *pix2; - - PROCNAME("pixConvertRGBToBinaryArb"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - if (rc <= 0 && gc <= 0 && bc <= 0) - return (PIX *)ERROR_PTR("all coefficients <= 0", procName, NULL); - if (relation != L_SELECT_IF_LT && relation != L_SELECT_IF_GT && - relation != L_SELECT_IF_LTE && relation != L_SELECT_IF_GTE) - return (PIX *)ERROR_PTR("invalid relation", procName, NULL); - - pix1 = pixConvertRGBToGrayArb(pixs, rc, gc, bc); - threshold = (relation == L_SELECT_IF_LTE || relation == L_SELECT_IF_GT) ? - thresh : thresh + 1; - pix2 = pixThresholdToBinary(pix1, threshold); - if (relation == L_SELECT_IF_GT || relation == L_SELECT_IF_GTE) - pixInvert(pix2, pix2); - pixDestroy(&pix1); - return pix2; -} - - -/*---------------------------------------------------------------------------* - * Conversion from grayscale to colormap * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertGrayToColormap() - * - * \param[in] pixs 2, 4 or 8 bpp grayscale - * \return pixd 2, 4 or 8 bpp with colormap, or NULL on error - * - *
- * Notes:
- *      (1) This is a simple interface for adding a colormap to a
- *          2, 4 or 8 bpp grayscale image without causing any
- *          quantization.  There is some similarity to operations
- *          in grayquant.c, such as pixThresholdOn8bpp(), where
- *          the emphasis is on quantization with an arbitrary number
- *          of levels, and a colormap is an option.
- *      (2) Returns a copy if pixs already has a colormap.
- *      (3) For 8 bpp src, this is a lossless transformation.
- *      (4) For 2 and 4 bpp src, this generates a colormap that
- *          assumes full coverage of the gray space, with equally spaced
- *          levels: 4 levels for d = 2 and 16 levels for d = 4.
- *      (5) In all cases, the depth of the dest is the same as the src.
- * 
- */ -PIX * -pixConvertGrayToColormap(PIX *pixs) -{ -l_int32 d; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixConvertGrayToColormap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 2 && d != 4 && d != 8) - return (PIX *)ERROR_PTR("pixs not 2, 4 or 8 bpp", procName, NULL); - - if (pixGetColormap(pixs)) { - L_INFO("pixs already has a colormap\n", procName); - return pixCopy(NULL, pixs); - } - - if (d == 8) /* lossless conversion */ - return pixConvertGrayToColormap8(pixs, 2); - - /* Build a cmap with equally spaced target values over the - * full 8 bpp range. */ - pixd = pixCopy(NULL, pixs); - cmap = pixcmapCreateLinear(d, 1 << d); - pixSetColormap(pixd, cmap); - pixCopyInputFormat(pixd, pixs); - return pixd; -} - - -/*! - * \brief pixConvertGrayToColormap8() - * - * \param[in] pixs 8 bpp grayscale - * \param[in] mindepth of pixd; valid values are 2, 4 and 8 - * \return pixd 2, 4 or 8 bpp with colormap, or NULL on error - * - *
- * Notes:
- *      (1) Returns a copy if pixs already has a colormap.
- *      (2) This is a lossless transformation; there is no quantization.
- *          We compute the number of different gray values in pixs,
- *          and construct a colormap that has exactly these values.
- *      (3) 'mindepth' is the minimum depth of pixd.  If mindepth == 8,
- *          pixd will always be 8 bpp.  Let the number of different
- *          gray values in pixs be ngray.  If mindepth == 4, we attempt
- *          to save pixd as a 4 bpp image, but if ngray > 16,
- *          pixd must be 8 bpp.  Likewise, if mindepth == 2,
- *          the depth of pixd will be 2 if ngray <= 4 and 4 if ngray > 4
- *          but <= 16.
- * 
- */ -PIX * -pixConvertGrayToColormap8(PIX *pixs, - l_int32 mindepth) -{ -l_int32 ncolors, w, h, depth, i, j, wpls, wpld; -l_int32 index, num, val, newval; -l_int32 array[256]; -l_uint32 *lines, *lined, *datas, *datad; -NUMA *na; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixConvertGrayToColormap8"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (mindepth != 2 && mindepth != 4 && mindepth != 8) { - L_WARNING("invalid value of mindepth; setting to 8\n", procName); - mindepth = 8; - } - - if (pixGetColormap(pixs)) { - L_INFO("pixs already has a colormap\n", procName); - return pixCopy(NULL, pixs); - } - - na = pixGetGrayHistogram(pixs, 1); - numaGetCountRelativeToZero(na, L_GREATER_THAN_ZERO, &ncolors); - if (mindepth == 8 || ncolors > 16) - depth = 8; - else if (mindepth == 4 || ncolors > 4) - depth = 4; - else - depth = 2; - - pixGetDimensions(pixs, &w, &h, NULL); - pixd = pixCreate(w, h, depth); - cmap = pixcmapCreate(depth); - pixSetColormap(pixd, cmap); - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - - index = 0; - for (i = 0; i < 256; i++) { - array[i] = 0; /* only to quiet the static checker */ - numaGetIValue(na, i, &num); - if (num > 0) { - pixcmapAddColor(cmap, i, i, i); - array[i] = index; - index++; - } - } - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(lines, j); - newval = array[val]; - if (depth == 2) - SET_DATA_DIBIT(lined, j, newval); - else if (depth == 4) - SET_DATA_QBIT(lined, j, newval); - else /* depth == 8 */ - SET_DATA_BYTE(lined, j, newval); - } - } - - numaDestroy(&na); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Colorizing conversion from grayscale to color * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixColorizeGray() - * - * \param[in] pixs 8 bpp gray; 2, 4 or 8 bpp colormapped - * \param[in] color 32 bit rgba pixel - * \param[in] cmapflag 1 for result to have colormap; 0 for RGB - * \return pixd 8 bpp colormapped or 32 bpp rgb, or NULL on error - * - *
- * Notes:
- *      (1) This applies the specific color to the grayscale image.
- *      (2) If pixs already has a colormap, it is removed to gray
- *          before colorizing.
- * 
- */ -PIX * -pixColorizeGray(PIX *pixs, - l_uint32 color, - l_int32 cmapflag) -{ -l_int32 i, j, w, h, wplt, wpld, val8; -l_uint32 *datad, *datat, *lined, *linet, *tab; -PIX *pixt, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixColorizeGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8 && !pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs not 8 bpp or cmapped", procName, NULL); - - if (pixGetColormap(pixs)) - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - else - pixt = pixClone(pixs); - - cmap = pixcmapGrayToColor(color); - if (cmapflag) { - pixd = pixCopy(NULL, pixt); - pixSetColormap(pixd, cmap); - pixDestroy(&pixt); - return pixd; - } - - /* Make an RGB pix */ - pixcmapToRGBTable(cmap, &tab, NULL); - pixGetDimensions(pixt, &w, &h, NULL); - pixd = pixCreate(w, h, 32); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - linet = datat + i * wplt; - for (j = 0; j < w; j++) { - val8 = GET_DATA_BYTE(linet, j); - lined[j] = tab[val8]; - } - } - - pixDestroy(&pixt); - pixcmapDestroy(&cmap); - LEPT_FREE(tab); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Conversion from RGB color to colormap * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertRGBToColormap() - * - * \param[in] pixs 32 bpp rgb - * \param[in] ditherflag 1 to dither, 0 otherwise - * \return pixd 2, 4 or 8 bpp with colormap, or NULL on error - * - *
- * Notes:
- *      (1) This function has two relatively simple modes of color
- *          quantization:
- *            (a) If the image is made orthographically and has not more
- *                than 256 'colors' at the level 4 octcube leaves,
- *                it is quantized nearly exactly.  The ditherflag
- *                is ignored.
- *            (b) Most natural images have more than 256 different colors;
- *                in that case we use adaptive octree quantization,
- *                with dithering if requested.
- *      (2) If there are not more than 256 occupied level 4 octcubes,
- *          the color in the colormap that represents all pixels in
- *          one of those octcubes is given by the first pixel that
- *          falls into that octcube.
- *      (3) If there are more than 256 colors, we use adaptive octree
- *          color quantization.
- *      (4) Dithering gives better visual results on images where
- *          there is a color wash (a slow variation of color), but it
- *          is about twice as slow and results in significantly larger
- *          files when losslessly compressed (e.g., into png).
- * 
- */ -PIX * -pixConvertRGBToColormap(PIX *pixs, - l_int32 ditherflag) -{ -l_int32 ncolors; -NUMA *na; -PIX *pixd; - - PROCNAME("pixConvertRGBToColormap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (pixGetSpp(pixs) == 4) - L_WARNING("pixs has alpha; removing\n", procName); - - /* Get the histogram and count the number of occupied level 4 - * leaf octcubes. We don't yet know if this is the number of - * actual colors, but if it's not, all pixels falling into - * the same leaf octcube will be assigned to the color of the - * first pixel that lands there. */ - na = pixOctcubeHistogram(pixs, 4, &ncolors); - - /* If there are too many occupied leaf octcubes to be - * represented directly in a colormap, fall back to octree - * quantization, optionally with dithering. */ - if (ncolors > 256) { - numaDestroy(&na); - if (ditherflag) - L_INFO("More than 256 colors; using octree quant with dithering\n", - procName); - else - L_INFO("More than 256 colors; using octree quant; no dithering\n", - procName); - return pixOctreeColorQuant(pixs, 240, ditherflag); - } - - /* There are not more than 256 occupied leaf octcubes. - * Quantize to those octcubes. */ - pixd = pixFewColorsOctcubeQuant2(pixs, 4, na, ncolors, NULL); - pixCopyInputFormat(pixd, pixs); - numaDestroy(&na); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Conversion from colormap to 1 bpp * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertCmapTo1() - * - * \param[in] pixs cmapped - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This is an extreme color quantizer.  It decides which
- *          colors map to FG (black) and which to BG (white).
- *      (2) This uses two heuristics to make the decision:
- *          (a) colors similar to each other are likely to be in the same class
- *          (b) there is usually much less FG than BG.
- * 
- */ -PIX * -pixConvertCmapTo1(PIX *pixs) -{ -l_int32 i, j, nc, w, h, imin, imax, factor, wpl1, wpld; -l_int32 index, rmin, gmin, bmin, rmax, gmax, bmax, dmin, dmax; -l_float32 minfract, ifract; -l_int32 *lut; -l_uint32 *line1, *lined, *data1, *datad; -NUMA *na1, *na2; /* histograms */ -PIX *pix1, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixConvertCmapTo1"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if ((cmap = pixGetColormap(pixs)) == NULL) - return (PIX *)ERROR_PTR("no colormap", procName, NULL); - - /* Select target colors for the two classes. Find the - * colors with smallest and largest average component values. - * The smallest is class 0 and the largest is class 1. */ - pixcmapGetRangeValues(cmap, L_SELECT_AVERAGE, NULL, NULL, &imin, &imax); - pixcmapGetColor(cmap, imin, &rmin, &gmin, &bmin); - pixcmapGetColor(cmap, imax, &rmax, &gmax, &bmax); - nc = pixcmapGetCount(cmap); - - /* Assign colors to the two classes. The histogram is - * initialized to 0, so any colors not found when computing - * the sampled histogram will get zero weight in minfract. */ - if ((lut = (l_int32 *)LEPT_CALLOC(nc, sizeof(l_int32))) == NULL) - return (PIX *)ERROR_PTR("calloc fail for lut", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - factor = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 50000. + 0.5)); - na1 = pixGetCmapHistogram(pixs, factor); - na2 = numaNormalizeHistogram(na1, 1.0); - minfract = 0.0; - for (i = 0; i < nc; i++) { - numaGetFValue(na2, i, &ifract); - pixcmapGetDistanceToColor(cmap, i, rmin, gmin, bmin, &dmin); - pixcmapGetDistanceToColor(cmap, i, rmax, gmax, bmax, &dmax); - if (dmin < dmax) { /* closer to dark extreme value */ - lut[i] = 1; /* black pixel in 1 bpp image */ - minfract += ifract; - } - } - numaDestroy(&na1); - numaDestroy(&na2); - - /* Generate the output binarized image */ - pix1 = pixConvertTo8(pixs, 1); - pixd = pixCreate(w, h, 1); - data1 = pixGetData(pix1); - datad = pixGetData(pixd); - wpl1 = pixGetWpl(pix1); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - line1 = data1 + i * wpl1; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - index = GET_DATA_BYTE(line1, j); - if (lut[index] == 1) SET_DATA_BIT(lined, j); - } - } - pixDestroy(&pix1); - LEPT_FREE(lut); - - /* We expect minfract (the dark colors) to be less than 0.5. - * If that is not the case, invert pixd. */ - if (minfract > 0.5) { - L_INFO("minfract = %5.3f; inverting\n", procName, minfract); - pixInvert(pixd, pixd); - } - - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Quantization for relatively small number of colors in source * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixQuantizeIfFewColors() - * - * \param[in] pixs 8 bpp gray or 32 bpp rgb - * \param[in] maxcolors max number of colors allowed to be returned - * from pixColorsForQuantization(); - * use 0 for default - * \param[in] mingraycolors min number of gray levels that a grayscale - * image is quantized to; use 0 for default - * \param[in] octlevel for octcube quantization: 3 or 4 - * \param[out] ppixd 2,4 or 8 bpp quantized; null if too many colors - * \return 0 if OK, 1 on error or if pixs can't be quantized into - * a small number of colors. - * - *
- * Notes:
- *      (1) This is a wrapper that tests if the pix can be quantized
- *          with good quality using a small number of colors.  If so,
- *          it does the quantization, defining a colormap and using
- *          pixels whose value is an index into the colormap.
- *      (2) If the image has color, it is quantized with 8 bpp pixels.
- *          If the image is essentially grayscale, the pixels are
- *          either 4 or 8 bpp, depending on the size of the required
- *          colormap.
- *      (3) %octlevel = 4 generates a larger colormap and larger
- *          compressed image than %octlevel = 3.  If image quality is
- *          important, you should use %octlevel = 4.
- *      (4) If the image already has a colormap, it returns a clone.
- * 
- */ -l_ok -pixQuantizeIfFewColors(PIX *pixs, - l_int32 maxcolors, - l_int32 mingraycolors, - l_int32 octlevel, - PIX **ppixd) -{ -l_int32 d, ncolors, iscolor, graycolors; -PIX *pixg, *pixd; - - PROCNAME("pixQuantizeIfFewColors"); - - if (!ppixd) - return ERROR_INT("&pixd not defined", procName, 1); - *ppixd = NULL; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetColormap(pixs) != NULL) { - *ppixd = pixClone(pixs); - return 0; - } - if (maxcolors <= 0) - maxcolors = 15; /* default */ - if (maxcolors > 50) - L_WARNING("maxcolors > 50; very large!\n", procName); - if (mingraycolors <= 0) - mingraycolors = 10; /* default */ - if (mingraycolors > 30) - L_WARNING("mingraycolors > 30; very large!\n", procName); - if (octlevel != 3 && octlevel != 4) { - L_WARNING("invalid octlevel; setting to 3\n", procName); - octlevel = 3; - } - - /* Test the number of colors. For color, the octcube leaves - * are at level 4. */ - pixColorsForQuantization(pixs, 0, &ncolors, &iscolor, 0); - if (ncolors > maxcolors) - return ERROR_INT("too many colors", procName, 1); - - /* Quantize! - * (1) For color: - * If octlevel == 4, try to quantize to an octree where - * the octcube leaves are at level 4. If that fails, - * back off to level 3. - * If octlevel == 3, quantize to level 3 directly. - * For level 3, the quality is usually good enough and there - * is negligible chance of getting more than 256 colors. - * (2) For grayscale, multiply ncolors by 1.5 for extra quality, - * but use at least mingraycolors and not more than 256. */ - if (iscolor) { - pixd = pixFewColorsOctcubeQuant1(pixs, octlevel); - if (!pixd) { /* backoff */ - pixd = pixFewColorsOctcubeQuant1(pixs, octlevel - 1); - if (octlevel == 3) /* shouldn't happen */ - L_WARNING("quantized at level 2; low quality\n", procName); - } - } else { /* image is really grayscale */ - if (d == 32) - pixg = pixConvertRGBToLuminance(pixs); - else - pixg = pixClone(pixs); - graycolors = L_MAX(mingraycolors, (l_int32)(1.5 * ncolors)); - graycolors = L_MIN(graycolors, 256); - if (graycolors < 16) - pixd = pixThresholdTo4bpp(pixg, graycolors, 1); - else - pixd = pixThresholdOn8bpp(pixg, graycolors, 1); - pixDestroy(&pixg); - } - *ppixd = pixd; - - if (!pixd) - return ERROR_INT("pixd not made", procName, 1); - pixCopyInputFormat(pixd, pixs); - return 0; -} - - - -/*---------------------------------------------------------------------------* - * Conversion from 16 bpp to 8 bpp * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvert16To8() - * - * \param[in] pixs 16 bpp - * \param[in] type L_LS_BYTE, L_MS_BYTE, L_AUTO_BYTE, L_CLIP_TO_FF - * \return pixd 8 bpp, or NULL on error - * - *
- * Notes:
- *      (1) With L_AUTO_BYTE, if the max pixel value is greater than 255,
- *          use the MSB; otherwise, use the LSB.
- *      (2) With L_CLIP_TO_FF, use min(pixel-value, 0xff) for each
- *          16-bit src pixel.
- * 
- */ -PIX * -pixConvert16To8(PIX *pixs, - l_int32 type) -{ -l_uint16 dword; -l_int32 w, h, wpls, wpld, i, j, val, use_lsb; -l_uint32 sword, first, second; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixConvert16To8"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 16) - return (PIX *)ERROR_PTR("pixs not 16 bpp", procName, NULL); - if (type != L_LS_BYTE && type != L_MS_BYTE && - type != L_AUTO_BYTE && type != L_CLIP_TO_FF) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - if ((pixd = pixCreate(w, h, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - wpls = pixGetWpl(pixs); - datas = pixGetData(pixs); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - - if (type == L_AUTO_BYTE) { - use_lsb = TRUE; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < wpls; j++) { - val = GET_DATA_TWO_BYTES(lines, j); - if (val > 255) { - use_lsb = FALSE; - break; - } - } - if (!use_lsb) break; - } - type = (use_lsb) ? L_LS_BYTE : L_MS_BYTE; - } - - /* Convert 2 pixels at a time */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - if (type == L_LS_BYTE) { - for (j = 0; j < wpls; j++) { - sword = *(lines + j); - dword = ((sword >> 8) & 0xff00) | (sword & 0xff); - SET_DATA_TWO_BYTES(lined, j, dword); - } - } else if (type == L_MS_BYTE) { - for (j = 0; j < wpls; j++) { - sword = *(lines + j); - dword = ((sword >> 16) & 0xff00) | ((sword >> 8) & 0xff); - SET_DATA_TWO_BYTES(lined, j, dword); - } - } else { /* type == L_CLIP_TO_FF */ - for (j = 0; j < wpls; j++) { - sword = *(lines + j); - first = (sword >> 24) ? 255 : ((sword >> 16) & 0xff); - second = ((sword >> 8) & 0xff) ? 255 : (sword & 0xff); - dword = (first << 8) | second; - SET_DATA_TWO_BYTES(lined, j, dword); - } - } - } - - return pixd; -} - - - -/*---------------------------------------------------------------------------* - * Conversion from grayscale to false color - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertGrayToFalseColor() - * - * \param[in] pixs 8 or 16 bpp grayscale - * \param[in] gamma (factor) 0.0 or 1.0 for default; > 1.0 for brighter; - * 2.0 is quite nice - * \return pixd 8 bpp with colormap, or NULL on error - * - *
- * Notes:
- *      (1) For 8 bpp input, this simply adds a colormap to the input image.
- *      (2) For 16 bpp input, it first converts to 8 bpp, using the MSB,
- *          and then adds the colormap.
- *      (3) The colormap is modeled after the Matlab "jet" configuration.
- * 
- */ -PIX * -pixConvertGrayToFalseColor(PIX *pixs, - l_float32 gamma) -{ -l_int32 d, i, rval, bval, gval; -l_int32 *curve; -l_float32 invgamma, x; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixConvertGrayToFalseColor"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 8 && d != 16) - return (PIX *)ERROR_PTR("pixs not 8 or 16 bpp", procName, NULL); - - if (d == 16) { - pixd = pixConvert16To8(pixs, L_MS_BYTE); - } else { /* d == 8 */ - if (pixGetColormap(pixs)) - pixd = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - else - pixd = pixCopy(NULL, pixs); - } - if (!pixd) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - cmap = pixcmapCreate(8); - pixSetColormap(pixd, cmap); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - - /* Generate curve for transition part of color map */ - curve = (l_int32 *)LEPT_CALLOC(64, sizeof(l_int32)); - if (gamma == 0.0) gamma = 1.0; - invgamma = 1. / gamma; - for (i = 0; i < 64; i++) { - x = (l_float32)i / 64.; - curve[i] = (l_int32)(255. * powf(x, invgamma) + 0.5); - } - - for (i = 0; i < 256; i++) { - if (i < 32) { - rval = 0; - gval = 0; - bval = curve[i + 32]; - } else if (i < 96) { /* 32 - 95 */ - rval = 0; - gval = curve[i - 32]; - bval = 255; - } else if (i < 160) { /* 96 - 159 */ - rval = curve[i - 96]; - gval = 255; - bval = curve[159 - i]; - } else if (i < 224) { /* 160 - 223 */ - rval = 255; - gval = curve[223 - i]; - bval = 0; - } else { /* 224 - 255 */ - rval = curve[287 - i]; - gval = 0; - bval = 0; - } - pixcmapAddColor(cmap, rval, gval, bval); - } - - LEPT_FREE(curve); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Unpacking conversion from 1 bpp to 2, 4, 8, 16 and 32 bpp * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixUnpackBinary() - * - * \param[in] pixs 1 bpp - * \param[in] depth of destination: 2, 4, 8, 16 or 32 bpp - * \param[in] invert 0: binary 0 --> grayscale 0 - * binary 1 --> grayscale 0xff... - * 1: binary 0 --> grayscale 0xff... - * binary 1 --> grayscale 0 - * \return pixd 2, 4, 8, 16 or 32 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This function calls special cases of pixConvert1To*(),
- *          for 2, 4, 8, 16 and 32 bpp destinations.
- * 
- */ -PIX * -pixUnpackBinary(PIX *pixs, - l_int32 depth, - l_int32 invert) -{ -PIX *pixd; - - PROCNAME("pixUnpackBinary"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (depth != 2 && depth != 4 && depth != 8 && depth != 16 && depth != 32) - return (PIX *)ERROR_PTR("depth not 2, 4, 8, 16 or 32 bpp", - procName, NULL); - - if (depth == 2) { - if (invert == 0) - pixd = pixConvert1To2(NULL, pixs, 0, 3); - else /* invert bits */ - pixd = pixConvert1To2(NULL, pixs, 3, 0); - } else if (depth == 4) { - if (invert == 0) - pixd = pixConvert1To4(NULL, pixs, 0, 15); - else /* invert bits */ - pixd = pixConvert1To4(NULL, pixs, 15, 0); - } else if (depth == 8) { - if (invert == 0) - pixd = pixConvert1To8(NULL, pixs, 0, 255); - else /* invert bits */ - pixd = pixConvert1To8(NULL, pixs, 255, 0); - } else if (depth == 16) { - if (invert == 0) - pixd = pixConvert1To16(NULL, pixs, 0, 0xffff); - else /* invert bits */ - pixd = pixConvert1To16(NULL, pixs, 0xffff, 0); - } else { - if (invert == 0) - pixd = pixConvert1To32(NULL, pixs, 0, 0xffffffff); - else /* invert bits */ - pixd = pixConvert1To32(NULL, pixs, 0xffffffff, 0); - } - - pixCopyInputFormat(pixd, pixs); - return pixd; -} - - -/*! - * \brief pixConvert1To16() - * - * \param[in] pixd [optional] 16 bpp, can be null - * \param[in] pixs 1 bpp - * \param[in] val0 16 bit value to be used for 0s in pixs - * \param[in] val1 16 bit value to be used for 1s in pixs - * \return pixd 16 bpp - * - *
- * Notes:
- *      (1) If pixd is null, a new pix is made.
- *      (2) If pixd is not null, it must be of equal width and height
- *          as pixs.  It is always returned.
- * 
- */ -PIX * -pixConvert1To16(PIX *pixd, - PIX *pixs, - l_uint16 val0, - l_uint16 val1) -{ -l_int32 w, h, i, j, dibit, ndibits, wpls, wpld; -l_uint16 val[2]; -l_uint32 index; -l_uint32 *tab, *datas, *datad, *lines, *lined; - - PROCNAME("pixConvert1To16"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - if (pixd) { - if (w != pixGetWidth(pixd) || h != pixGetHeight(pixd)) - return (PIX *)ERROR_PTR("pix sizes unequal", procName, pixd); - if (pixGetDepth(pixd) != 16) - return (PIX *)ERROR_PTR("pixd not 16 bpp", procName, pixd); - } else { - if ((pixd = pixCreate(w, h, 16)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - - /* Use a table to convert 2 src bits at a time */ - tab = (l_uint32 *)LEPT_CALLOC(4, sizeof(l_uint32)); - val[0] = val0; - val[1] = val1; - for (index = 0; index < 4; index++) { - tab[index] = (val[(index >> 1) & 1] << 16) | val[index & 1]; - } - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - ndibits = (w + 1) / 2; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < ndibits; j++) { - dibit = GET_DATA_DIBIT(lines, j); - lined[j] = tab[dibit]; - } - } - - LEPT_FREE(tab); - return pixd; -} - - -/*! - * \brief pixConvert1To32() - * - * \param[in] pixd [optional] 32 bpp, can be null - * \param[in] pixs 1 bpp - * \param[in] val0 32 bit value to be used for 0s in pixs - * \param[in] val1 32 bit value to be used for 1s in pixs - * \return pixd 32 bpp - * - *
- * Notes:
- *      (1) If pixd is null, a new pix is made.
- *      (2) If pixd is not null, it must be of equal width and height
- *          as pixs.  It is always returned.
- * 
- */ -PIX * -pixConvert1To32(PIX *pixd, - PIX *pixs, - l_uint32 val0, - l_uint32 val1) -{ -l_int32 w, h, i, j, wpls, wpld, bit; -l_uint32 val[2]; -l_uint32 *datas, *datad, *lines, *lined; - - PROCNAME("pixConvert1To32"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - if (pixd) { - if (w != pixGetWidth(pixd) || h != pixGetHeight(pixd)) - return (PIX *)ERROR_PTR("pix sizes unequal", procName, pixd); - if (pixGetDepth(pixd) != 32) - return (PIX *)ERROR_PTR("pixd not 32 bpp", procName, pixd); - } else { - if ((pixd = pixCreate(w, h, 32)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - - val[0] = val0; - val[1] = val1; - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j - * Notes: - * (1) Input 0 is mapped to (255, 255, 255); 1 is mapped to (0, 0, 0) - *
- */ -PIX * -pixConvert1To2Cmap(PIX *pixs) -{ -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixConvert1To2Cmap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - - if ((pixd = pixConvert1To2(NULL, pixs, 0, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - cmap = pixcmapCreate(2); - pixcmapAddColor(cmap, 255, 255, 255); - pixcmapAddColor(cmap, 0, 0, 0); - pixSetColormap(pixd, cmap); - pixCopyInputFormat(pixd, pixs); - - return pixd; -} - - -/*! - * \brief pixConvert1To2() - * - * \param[in] pixd [optional] 2 bpp, can be null - * \param[in] pixs 1 bpp - * \param[in] val0 2 bit value to be used for 0s in pixs - * \param[in] val1 2 bit value to be used for 1s in pixs - * \return pixd 2 bpp - * - *
- * Notes:
- *      (1) If pixd is null, a new pix is made.
- *      (2) If pixd is not null, it must be of equal width and height
- *          as pixs.  It is always returned.
- *      (3) A simple unpacking might use val0 = 0 and val1 = 3.
- *      (4) If you want a colormapped pixd, use pixConvert1To2Cmap().
- * 
- */ -PIX * -pixConvert1To2(PIX *pixd, - PIX *pixs, - l_int32 val0, - l_int32 val1) -{ -l_int32 w, h, i, j, byteval, nbytes, wpls, wpld; -l_uint8 val[2]; -l_uint32 index; -l_uint16 *tab; -l_uint32 *datas, *datad, *lines, *lined; - - PROCNAME("pixConvert1To2"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - - pixGetDimensions(pixs, &w, &h, NULL); - if (pixd) { - if (w != pixGetWidth(pixd) || h != pixGetHeight(pixd)) - return (PIX *)ERROR_PTR("pix sizes unequal", procName, pixd); - if (pixGetDepth(pixd) != 2) - return (PIX *)ERROR_PTR("pixd not 2 bpp", procName, pixd); - } else { - if ((pixd = pixCreate(w, h, 2)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - - /* Use a table to convert 8 src bits to 16 dest bits */ - tab = (l_uint16 *)LEPT_CALLOC(256, sizeof(l_uint16)); - val[0] = val0; - val[1] = val1; - for (index = 0; index < 256; index++) { - tab[index] = (val[(index >> 7) & 1] << 14) | - (val[(index >> 6) & 1] << 12) | - (val[(index >> 5) & 1] << 10) | - (val[(index >> 4) & 1] << 8) | - (val[(index >> 3) & 1] << 6) | - (val[(index >> 2) & 1] << 4) | - (val[(index >> 1) & 1] << 2) | val[index & 1]; - } - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - nbytes = (w + 7) / 8; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < nbytes; j++) { - byteval = GET_DATA_BYTE(lines, j); - SET_DATA_TWO_BYTES(lined, j, tab[byteval]); - } - } - - LEPT_FREE(tab); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Conversion from 1 bpp to 4 bpp * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvert1To4Cmap() - * - * \param[in] pixs 1 bpp - * \return pixd 4 bpp, cmapped - * - *
- * Notes:
- *      (1) Input 0 is mapped to (255, 255, 255); 1 is mapped to (0, 0, 0)
- * 
- */ -PIX * -pixConvert1To4Cmap(PIX *pixs) -{ -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixConvert1To4Cmap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - - if ((pixd = pixConvert1To4(NULL, pixs, 0, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - cmap = pixcmapCreate(4); - pixcmapAddColor(cmap, 255, 255, 255); - pixcmapAddColor(cmap, 0, 0, 0); - pixSetColormap(pixd, cmap); - pixCopyInputFormat(pixd, pixs); - - return pixd; -} - - -/*! - * \brief pixConvert1To4() - * - * \param[in] pixd [optional] 4 bpp, can be null - * \param[in] pixs 1 bpp - * \param[in] val0 4 bit value to be used for 0s in pixs - * \param[in] val1 4 bit value to be used for 1s in pixs - * \return pixd 4 bpp - * - *
- * Notes:
- *      (1) If pixd is null, a new pix is made.
- *      (2) If pixd is not null, it must be of equal width and height
- *          as pixs.  It is always returned.
- *      (3) A simple unpacking might use val0 = 0 and val1 = 15, or v.v.
- *      (4) If you want a colormapped pixd, use pixConvert1To4Cmap().
- * 
- */ -PIX * -pixConvert1To4(PIX *pixd, - PIX *pixs, - l_int32 val0, - l_int32 val1) -{ -l_int32 w, h, i, j, byteval, nbytes, wpls, wpld; -l_uint8 val[2]; -l_uint32 index; -l_uint32 *tab, *datas, *datad, *lines, *lined; - - PROCNAME("pixConvert1To4"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - - pixGetDimensions(pixs, &w, &h, NULL); - if (pixd) { - if (w != pixGetWidth(pixd) || h != pixGetHeight(pixd)) - return (PIX *)ERROR_PTR("pix sizes unequal", procName, pixd); - if (pixGetDepth(pixd) != 4) - return (PIX *)ERROR_PTR("pixd not 4 bpp", procName, pixd); - } else { - if ((pixd = pixCreate(w, h, 4)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - - /* Use a table to convert 8 src bits to 32 bit dest word */ - tab = (l_uint32 *)LEPT_CALLOC(256, sizeof(l_uint32)); - val[0] = val0; - val[1] = val1; - for (index = 0; index < 256; index++) { - tab[index] = (val[(index >> 7) & 1] << 28) | - (val[(index >> 6) & 1] << 24) | - (val[(index >> 5) & 1] << 20) | - (val[(index >> 4) & 1] << 16) | - (val[(index >> 3) & 1] << 12) | - (val[(index >> 2) & 1] << 8) | - (val[(index >> 1) & 1] << 4) | val[index & 1]; - } - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - nbytes = (w + 7) / 8; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < nbytes; j++) { - byteval = GET_DATA_BYTE(lines, j); - lined[j] = tab[byteval]; - } - } - - LEPT_FREE(tab); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Conversion from 1, 2 and 4 bpp to 8 bpp * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvert1To8Cmap() - * - * \param[in] pixs 1 bpp - * \return pixd 8 bpp, cmapped - * - *
- * Notes:
- *      (1) Input 0 is mapped to (255, 255, 255); 1 is mapped to (0, 0, 0)
- * 
- */ -PIX * -pixConvert1To8Cmap(PIX *pixs) -{ -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixConvert1To8Cmap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - - if ((pixd = pixConvert1To8(NULL, pixs, 0, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - cmap = pixcmapCreate(8); - pixcmapAddColor(cmap, 255, 255, 255); - pixcmapAddColor(cmap, 0, 0, 0); - pixSetColormap(pixd, cmap); - pixCopyInputFormat(pixd, pixs); - return pixd; -} - - -/*! - * \brief pixConvert1To8() - * - * \param[in] pixd [optional] 8 bpp, can be null - * \param[in] pixs 1 bpp - * \param[in] val0 8 bit value to be used for 0s in pixs - * \param[in] val1 8 bit value to be used for 1s in pixs - * \return pixd 8 bpp - * - *
- * Notes:
- *      (1) If pixd is null, a new pix is made.
- *      (2) If pixd is not null, it must be of equal width and height
- *          as pixs.  It is always returned.
- *      (3) A simple unpacking might use val0 = 0 and val1 = 255, or v.v.
- *      (4) To have a colormap associated with the 8 bpp pixd,
- *          use pixConvert1To8Cmap().
- * 
- */ -PIX * -pixConvert1To8(PIX *pixd, - PIX *pixs, - l_uint8 val0, - l_uint8 val1) -{ -l_int32 w, h, i, j, qbit, nqbits, wpls, wpld; -l_uint8 val[2]; -l_uint32 index; -l_uint32 *tab, *datas, *datad, *lines, *lined; - - PROCNAME("pixConvert1To8"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, pixd); - - pixGetDimensions(pixs, &w, &h, NULL); - if (pixd) { - if (w != pixGetWidth(pixd) || h != pixGetHeight(pixd)) - return (PIX *)ERROR_PTR("pix sizes unequal", procName, pixd); - if (pixGetDepth(pixd) != 8) - return (PIX *)ERROR_PTR("pixd not 8 bpp", procName, pixd); - } else { - if ((pixd = pixCreate(w, h, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - pixSetPadBits(pixs, 0); - - /* Use a table to convert 4 src bits at a time */ - tab = (l_uint32 *)LEPT_CALLOC(16, sizeof(l_uint32)); - val[0] = val0; - val[1] = val1; - for (index = 0; index < 16; index++) { - tab[index] = ((l_uint32)val[(index >> 3) & 1] << 24) | - (val[(index >> 2) & 1] << 16) | - (val[(index >> 1) & 1] << 8) | val[index & 1]; - } - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - nqbits = (w + 3) / 4; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < nqbits; j++) { - qbit = GET_DATA_QBIT(lines, j); - lined[j] = tab[qbit]; - } - } - - LEPT_FREE(tab); - return pixd; -} - - -/*! - * \brief pixConvert2To8() - * - * \param[in] pixs 2 bpp - * \param[in] val0 8 bit value to be used for 00 in pixs - * \param[in] val1 8 bit value to be used for 01 in pixs - * \param[in] val2 8 bit value to be used for 10 in pixs - * \param[in] val3 8 bit value to be used for 11 in pixs - * \param[in] cmapflag TRUE if pixd is to have a colormap; FALSE otherwise - * \return pixd 8 bpp, or NULL on error - * - *
- * Notes:
- *      ~ A simple unpacking might use val0 = 0,
- *        val1 = 85 (0x55), val2 = 170 (0xaa), val3 = 255.
- *      ~ If cmapflag is TRUE:
- *          ~ The 8 bpp image is made with a colormap.
- *          ~ If pixs has a colormap, the input values are ignored and
- *            the 8 bpp image is made using the colormap
- *          ~ If pixs does not have a colormap, the input values are
- *            used to build the colormap.
- *      ~ If cmapflag is FALSE:
- *          ~ The 8 bpp image is made without a colormap.
- *          ~ If pixs has a colormap, the input values are ignored,
- *            the colormap is removed, and the values stored in the 8 bpp
- *            image are from the colormap.
- *          ~ If pixs does not have a colormap, the input values are
- *            used to populate the 8 bpp image.
- * 
- */ -PIX * -pixConvert2To8(PIX *pixs, - l_uint8 val0, - l_uint8 val1, - l_uint8 val2, - l_uint8 val3, - l_int32 cmapflag) -{ -l_int32 w, h, i, j, nbytes, wpls, wpld, dibit, byte; -l_uint8 val[4]; -l_uint32 index; -l_uint32 *tab, *datas, *datad, *lines, *lined; -PIX *pixd; -PIXCMAP *cmaps, *cmapd; - - PROCNAME("pixConvert2To8"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 2) - return (PIX *)ERROR_PTR("pixs not 2 bpp", procName, NULL); - - cmaps = pixGetColormap(pixs); - if (cmaps && cmapflag == FALSE) - return pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - - pixGetDimensions(pixs, &w, &h, NULL); - if ((pixd = pixCreate(w, h, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixSetPadBits(pixs, 0); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - if (cmapflag == TRUE) { /* pixd will have a colormap */ - if (cmaps) { /* use the existing colormap from pixs */ - cmapd = pixcmapConvertTo8(cmaps); - } else { /* make a colormap from the input values */ - cmapd = pixcmapCreate(8); - pixcmapAddColor(cmapd, val0, val0, val0); - pixcmapAddColor(cmapd, val1, val1, val1); - pixcmapAddColor(cmapd, val2, val2, val2); - pixcmapAddColor(cmapd, val3, val3, val3); - } - pixSetColormap(pixd, cmapd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - dibit = GET_DATA_DIBIT(lines, j); - SET_DATA_BYTE(lined, j, dibit); - } - } - return pixd; - } - - /* Last case: no colormap in either pixs or pixd. - * Use input values and build a table to convert 1 src byte - * (4 src pixels) at a time */ - tab = (l_uint32 *)LEPT_CALLOC(256, sizeof(l_uint32)); - val[0] = val0; - val[1] = val1; - val[2] = val2; - val[3] = val3; - for (index = 0; index < 256; index++) { - tab[index] = (val[(index >> 6) & 3] << 24) | - (val[(index >> 4) & 3] << 16) | - (val[(index >> 2) & 3] << 8) | val[index & 3]; - } - - nbytes = (w + 3) / 4; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < nbytes; j++) { - byte = GET_DATA_BYTE(lines, j); - lined[j] = tab[byte]; - } - } - - LEPT_FREE(tab); - return pixd; -} - - -/*! - * \brief pixConvert4To8() - * - * \param[in] pixs 4 bpp - * \param[in] cmapflag TRUE if pixd is to have a colormap; FALSE otherwise - * \return pixd 8 bpp, or NULL on error - * - *
- * Notes:
- *      ~ If cmapflag is TRUE:
- *          ~ pixd is made with a colormap.
- *          ~ If pixs has a colormap, it is copied and the colormap
- *            index values are placed in pixd.
- *          ~ If pixs does not have a colormap, a colormap with linear
- *            trc is built and the pixel values in pixs are placed in
- *            pixd as colormap index values.
- *      ~ If cmapflag is FALSE:
- *          ~ pixd is made without a colormap.
- *          ~ If pixs has a colormap, it is removed and the values stored
- *            in pixd are from the colormap (converted to gray).
- *          ~ If pixs does not have a colormap, the pixel values in pixs
- *            are used, with shift replication, to populate pixd.
- * 
- */ -PIX * -pixConvert4To8(PIX *pixs, - l_int32 cmapflag) -{ -l_int32 w, h, i, j, wpls, wpld, byte, qbit; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; -PIXCMAP *cmaps, *cmapd; - - PROCNAME("pixConvert4To8"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 4) - return (PIX *)ERROR_PTR("pixs not 4 bpp", procName, NULL); - - cmaps = pixGetColormap(pixs); - if (cmaps && cmapflag == FALSE) - return pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - - pixGetDimensions(pixs, &w, &h, NULL); - if ((pixd = pixCreate(w, h, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - if (cmapflag == TRUE) { /* pixd will have a colormap */ - if (cmaps) { /* use the existing colormap from pixs */ - cmapd = pixcmapConvertTo8(cmaps); - } else { /* make a colormap with a linear trc */ - cmapd = pixcmapCreate(8); - for (i = 0; i < 16; i++) - pixcmapAddColor(cmapd, 17 * i, 17 * i, 17 * i); - } - pixSetColormap(pixd, cmapd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - qbit = GET_DATA_QBIT(lines, j); - SET_DATA_BYTE(lined, j, qbit); - } - } - return pixd; - } - - /* Last case: no colormap in either pixs or pixd. - * Replicate the qbit value into 8 bits. */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - qbit = GET_DATA_QBIT(lines, j); - byte = (qbit << 4) | qbit; - SET_DATA_BYTE(lined, j, byte); - } - } - return pixd; -} - - - -/*---------------------------------------------------------------------------* - * Unpacking conversion from 8 bpp to 16 bpp * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvert8To16() - * - * \param[in] pixs 8 bpp; colormap removed to gray - * \param[in] leftshift number of bits: 0 is no shift; - * 8 replicates in MSB and LSB of dest - * \return pixd 16 bpp, or NULL on error - * - *
- * Notes:
- *      (1) For left shift of 8, the 8 bit value is replicated in both
- *          the MSB and the LSB of the pixels in pixd.  That way, we get
- *          proportional mapping, with a correct map from 8 bpp white
- *          (0xff) to 16 bpp white (0xffff).
- * 
- */ -PIX * -pixConvert8To16(PIX *pixs, - l_int32 leftshift) -{ -l_int32 i, j, w, h, d, wplt, wpld, val; -l_uint32 *datat, *datad, *linet, *lined; -PIX *pixt, *pixd; - - PROCNAME("pixConvert8To16"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (leftshift < 0 || leftshift > 8) - return (PIX *)ERROR_PTR("leftshift not in [0 ... 8]", procName, NULL); - - if (pixGetColormap(pixs) != NULL) - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - else - pixt = pixClone(pixs); - - pixd = pixCreate(w, h, 16); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datat = pixGetData(pixt); - datad = pixGetData(pixd); - wplt = pixGetWpl(pixt); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(linet, j); - if (leftshift == 8) - val = val | (val << leftshift); - else - val <<= leftshift; - SET_DATA_TWO_BYTES(lined, j, val); - } - } - - pixDestroy(&pixt); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Top-level conversion to 2 bpp * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertTo2() - * - * \param[in] pixs 1, 2, 4, 8, 32 bpp; colormap OK but will be removed - * \return pixd 2 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This is a top-level function, with simple default values
- *          used in pixConvertTo8() if unpacking is necessary.
- *      (2) Any existing colormap is removed; the result is always gray.
- *      (3) If the input image has 2 bpp and no colormap, the operation is
- *          lossless and a copy is returned.
- * 
- */ -PIX * -pixConvertTo2(PIX *pixs) -{ -l_int32 d; -PIX *pix1, *pix2, *pix3, *pixd; - - PROCNAME("pixConvertTo2"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 32) - return (PIX *)ERROR_PTR("depth not {1,2,4,8,32}", procName, NULL); - - if (pixGetColormap(pixs) != NULL) { - pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - d = pixGetDepth(pix1); - } else { - pix1 = pixCopy(NULL, pixs); - } - if (d == 32) - pix2 = pixConvertTo8(pix1, FALSE); - else - pix2 = pixClone(pix1); - pixDestroy(&pix1); - if (d == 1) { - pixd = pixConvert1To2(NULL, pix2, 3, 0); - } else if (d == 2) { - pixd = pixClone(pix2); - } else if (d == 4) { - pix3 = pixConvert4To8(pix2, FALSE); /* unpack to 8 */ - pixd = pixConvert8To2(pix3); - pixDestroy(&pix3); - } else { /* d == 8 */ - pixd = pixConvert8To2(pix2); - } - pixDestroy(&pix2); - return pixd; -} - - -/*! - * \brief pixConvert8To2() - * - * \param[in] pix 8 bpp; colormap OK - * \return pixd 2 bpp, or NULL on error - * - *
- * Notes:
- *      (1) Any existing colormap is removed to gray.
- * 
- */ -PIX * -pixConvert8To2(PIX *pix) -{ -l_int32 i, j, w, h, wpls, wpld; -l_uint32 word; -l_uint32 *datas, *lines, *datad, *lined; -PIX *pixs, *pixd; - - PROCNAME("pixConvert8To2"); - - if (!pix || pixGetDepth(pix) != 8) - return (PIX *)ERROR_PTR("pix undefined or not 8 bpp", procName, NULL); - - if (pixGetColormap(pix) != NULL) - pixs = pixRemoveColormap(pix, REMOVE_CMAP_TO_GRAYSCALE); - else - pixs = pixClone(pix); - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreate(w, h, 2); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < wpls; j++) { /* march through 4 pixels at a time */ - word = lines[j] & 0xc0c0c0c0; /* top 2 bits of each byte */ - word = (word >> 24) | ((word & 0xff0000) >> 18) | - ((word & 0xff00) >> 12) | ((word & 0xff) >> 6); - SET_DATA_BYTE(lined, j, word); /* only LS byte is filled */ - } - } - pixDestroy(&pixs); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Top-level conversion to 4 bpp * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertTo4() - * - * \param[in] pixs 1, 2, 4, 8, 32 bpp; colormap OK but will be removed - * \return pixd 4 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This is a top-level function, with simple default values
- *          used in pixConvertTo8() if unpacking is necessary.
- *      (2) Any existing colormap is removed; the result is always gray.
- *      (3) If the input image has 4 bpp and no colormap, the operation is
- *          lossless and a copy is returned.
- * 
- */ -PIX * -pixConvertTo4(PIX *pixs) -{ -l_int32 d; -PIX *pix1, *pix2, *pix3, *pixd; - - PROCNAME("pixConvertTo4"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 32) - return (PIX *)ERROR_PTR("depth not {1,2,4,8,32}", procName, NULL); - - if (pixGetColormap(pixs) != NULL) { - pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - d = pixGetDepth(pix1); - } else { - pix1 = pixCopy(NULL, pixs); - } - if (d == 32) - pix2 = pixConvertTo8(pix1, FALSE); - else - pix2 = pixClone(pix1); - pixDestroy(&pix1); - if (d == 1) { - pixd = pixConvert1To4(NULL, pix2, 15, 0); - } else if (d == 2) { - pix3 = pixConvert2To8(pix2, 0, 0x55, 0xaa, 0xff, FALSE); - pixd = pixConvert8To4(pix3); - pixDestroy(&pix3); - } else if (d == 4) { - pixd = pixClone(pix2); - } else { /* d == 8 */ - pixd = pixConvert8To4(pix2); - } - pixDestroy(&pix2); - return pixd; -} - - -/*! - * \brief pixConvert8To4() - * - * \param[in] pix 8 bpp; colormap OK - * \return pixd 4 bpp, or NULL on error - * - *
- * Notes:
- *      (1) Any existing colormap is removed to gray.
- * 
- */ -PIX * -pixConvert8To4(PIX *pix) -{ -l_int32 i, j, w, h, wpls, wpld, val; -l_uint32 *datas, *lines, *datad, *lined; -PIX *pixs, *pixd; - - PROCNAME("pixConvert8To4"); - - if (!pix || pixGetDepth(pix) != 8) - return (PIX *)ERROR_PTR("pix undefined or not 8 bpp", procName, NULL); - - if (pixGetColormap(pix) != NULL) - pixs = pixRemoveColormap(pix, REMOVE_CMAP_TO_GRAYSCALE); - else - pixs = pixClone(pix); - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreate(w, h, 4); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(lines, j); - val = val >> 4; /* take top 4 bits */ - SET_DATA_QBIT(lined, j, val); - } - } - pixDestroy(&pixs); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Top-level conversion to 1 bpp * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertTo1Adaptive() - * - * \param[in] pixs 1, 2, 4, 8, 16 or 32 bpp - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This is a top-level function, that uses default values for
- *          adaptive thresholding, if necessary.  Otherwise, it is the same as
- *          pixConvertTo1(), which uses a global threshold for binarization.
- * 
- */ -PIX * -pixConvertTo1Adaptive(PIX *pixs) -{ -l_int32 d, color0, color1, rval, gval, bval; -PIX *pix1, *pix2, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixConvertTo1Adaptive"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("depth not {1,2,4,8,16,32}", procName, NULL); - - cmap = pixGetColormap(pixs); - if (d == 1) { - if (!cmap) { - return pixCopy(NULL, pixs); - } else { /* strip the colormap off, and invert if reasonable - for standard binary photometry. */ - pixcmapGetColor(cmap, 0, &rval, &gval, &bval); - color0 = rval + gval + bval; - pixcmapGetColor(cmap, 1, &rval, &gval, &bval); - color1 = rval + gval + bval; - pixd = pixCopy(NULL, pixs); - pixDestroyColormap(pixd); - if (color1 > color0) - pixInvert(pixd, pixd); - return pixd; - } - } - - /* For all other depths, use 8 bpp as an intermediary */ - pix1 = pixConvertTo8(pixs, FALSE); - pix2 = pixBackgroundNormSimple(pix1, NULL, NULL); - pixd = pixThresholdToBinary(pix2, 180); - pixDestroy(&pix1); - pixDestroy(&pix2); - return pixd; -} - - -/*! - * \brief pixConvertTo1() - * - * \param[in] pixs 1, 2, 4, 8, 16 or 32 bpp - * \param[in] threshold for final binarization, relative to 8 bpp - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This is a top-level function, with simple default values
- *          used in pixConvertTo8() if unpacking is necessary.
- *      (2) Any existing colormap is removed.
- *      (3) If the input image has 1 bpp and no colormap, the operation is
- *          lossless and a copy is returned.
- * 
- */ -PIX * -pixConvertTo1(PIX *pixs, - l_int32 threshold) -{ -l_int32 d, color0, color1, rval, gval, bval; -PIX *pixg, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixConvertTo1"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("depth not {1,2,4,8,16,32}", procName, NULL); - - cmap = pixGetColormap(pixs); - if (d == 1) { - if (!cmap) { - return pixCopy(NULL, pixs); - } else { /* strip the colormap off, and invert if reasonable - for standard binary photometry. */ - pixcmapGetColor(cmap, 0, &rval, &gval, &bval); - color0 = rval + gval + bval; - pixcmapGetColor(cmap, 1, &rval, &gval, &bval); - color1 = rval + gval + bval; - pixd = pixCopy(NULL, pixs); - pixDestroyColormap(pixd); - if (color1 > color0) - pixInvert(pixd, pixd); - return pixd; - } - } - - /* For all other depths, use 8 bpp as an intermediary */ - pixg = pixConvertTo8(pixs, FALSE); - pixd = pixThresholdToBinary(pixg, threshold); - pixDestroy(&pixg); - return pixd; -} - - -/*! - * \brief pixConvertTo1BySampling() - * - * \param[in] pixs 1, 2, 4, 8, 16 or 32 bpp - * \param[in] factor submsampling factor; integer >= 1 - * \param[in] threshold for final binarization, relative to 8 bpp - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This is a quick and dirty, top-level converter.
- *      (2) See pixConvertTo1() for default values.
- * 
- */ -PIX * -pixConvertTo1BySampling(PIX *pixs, - l_int32 factor, - l_int32 threshold) -{ -l_float32 scalefactor; -PIX *pixt, *pixd; - - PROCNAME("pixConvertTo1BySampling"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (factor < 1) - return (PIX *)ERROR_PTR("factor must be >= 1", procName, NULL); - - scalefactor = 1. / (l_float32)factor; - pixt = pixScaleBySampling(pixs, scalefactor, scalefactor); - pixd = pixConvertTo1(pixt, threshold); - - pixDestroy(&pixt); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Top-level conversion to 8 bpp * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertTo8() - * - * \param[in] pixs 1, 2, 4, 8, 16 or 32 bpp - * \param[in] cmapflag TRUE if pixd is to have a colormap; FALSE otherwise - * \return pixd 8 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This is a top-level function, with simple default values
- *          for unpacking.
- *      (2) The result, pixd, is made with a colormap if specified.
- *          It is always a new image -- never a clone.  For example,
- *          if d == 8, and cmapflag matches the existence of a cmap
- *          in pixs, the operation is lossless and it returns a copy.
- *      (3) The default values used are:
- *          ~ 1 bpp: val0 = 255, val1 = 0
- *          ~ 2 bpp: 4 bpp:  even increments over dynamic range
- *          ~ 8 bpp: lossless if cmap matches cmapflag
- *          ~ 16 bpp: use most significant byte
- *      (4) If 32 bpp RGB, this is converted to gray.  If you want
- *          to do color quantization, you must specify the type
- *          explicitly, using the color quantization code.
- * 
- */ -PIX * -pixConvertTo8(PIX *pixs, - l_int32 cmapflag) -{ -l_int32 d; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixConvertTo8"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("depth not {1,2,4,8,16,32}", procName, NULL); - - if (d == 1) { - if (cmapflag) - return pixConvert1To8Cmap(pixs); - else - return pixConvert1To8(NULL, pixs, 255, 0); - } else if (d == 2) { - return pixConvert2To8(pixs, 0, 85, 170, 255, cmapflag); - } else if (d == 4) { - return pixConvert4To8(pixs, cmapflag); - } else if (d == 8) { - cmap = pixGetColormap(pixs); - if ((cmap && cmapflag) || (!cmap && !cmapflag)) { - return pixCopy(NULL, pixs); - } else if (cmap) { /* !cmapflag */ - return pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - } else { /* !cmap && cmapflag; add colormap to pixd */ - pixd = pixCopy(NULL, pixs); - pixAddGrayColormap8(pixd); - return pixd; - } - } else if (d == 16) { - pixd = pixConvert16To8(pixs, L_MS_BYTE); - if (cmapflag) - pixAddGrayColormap8(pixd); - return pixd; - } else { /* d == 32 */ - pixd = pixConvertRGBToLuminance(pixs); - if (cmapflag) - pixAddGrayColormap8(pixd); - return pixd; - } -} - - -/*! - * \brief pixConvertTo8BySampling() - * - * \param[in] pixs 1, 2, 4, 8, 16 or 32 bpp - * \param[in] factor submsampling factor; integer >= 1 - * \param[in] cmapflag TRUE if pixd is to have a colormap; FALSE otherwise - * \return pixd 8 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This is a fast, quick/dirty, top-level converter.
- *      (2) See pixConvertTo8() for default values.
- * 
- */ -PIX * -pixConvertTo8BySampling(PIX *pixs, - l_int32 factor, - l_int32 cmapflag) -{ -l_float32 scalefactor; -PIX *pixt, *pixd; - - PROCNAME("pixConvertTo8BySampling"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (factor < 1) - return (PIX *)ERROR_PTR("factor must be >= 1", procName, NULL); - - scalefactor = 1. / (l_float32)factor; - pixt = pixScaleBySampling(pixs, scalefactor, scalefactor); - pixd = pixConvertTo8(pixt, cmapflag); - - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixConvertTo8Colormap() - * - * \param[in] pixs 1, 2, 4, 8, 16 or 32 bpp - * \param[in] dither 1 to dither if necessary; 0 otherwise - * \return pixd 8 bpp, cmapped, or NULL on error - * - *
- * Notes:
- *      (1) This is a top-level function, with simple default values
- *          for unpacking.
- *      (2) The result, pixd, is always made with a colormap.
- *      (3) If d == 8, the operation is lossless and it returns a copy.
- *      (4) The default values used for increasing depth are:
- *          ~ 1 bpp: val0 = 255, val1 = 0
- *          ~ 2 bpp: 4 bpp:  even increments over dynamic range
- *      (5) For 16 bpp, use the most significant byte.
- *      (6) For 32 bpp RGB, use octcube quantization with optional dithering.
- * 
- */ -PIX * -pixConvertTo8Colormap(PIX *pixs, - l_int32 dither) -{ -l_int32 d; - - PROCNAME("pixConvertTo8Colormap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("depth not {1,2,4,8,16,32}", procName, NULL); - - if (d != 32) - return pixConvertTo8(pixs, 1); - - return pixConvertRGBToColormap(pixs, dither); -} - - -/*---------------------------------------------------------------------------* - * Top-level conversion to 16 bpp * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertTo16() - * - * \param[in] pixs 1, 8 bpp - * \return pixd 16 bpp, or NULL on error - * - * Usage: Top-level function, with simple default values for unpacking. - * 1 bpp: val0 = 0xffff, val1 = 0 - * 8 bpp: replicates the 8 bit value in both the MSB and LSB - * of the 16 bit pixel. - */ -PIX * -pixConvertTo16(PIX *pixs) -{ -l_int32 d; - - PROCNAME("pixConvertTo16"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - d = pixGetDepth(pixs); - if (d == 1) - return pixConvert1To16(NULL, pixs, 0xffff, 0); - else if (d == 8) - return pixConvert8To16(pixs, 8); - else - return (PIX *)ERROR_PTR("src depth not 1 or 8 bpp", procName, NULL); -} - - - -/*---------------------------------------------------------------------------* - * Top-level conversion to 32 bpp * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertTo32() - * - * \param[in] pixs 1, 2, 4, 8, 16, 24 or 32 bpp - * \return pixd 32 bpp, or NULL on error - * - * Usage: Top-level function, with simple default values for unpacking. - * 1 bpp: val0 = 255, val1 = 0 - * and then replication into R, G and B components - * 2 bpp: if colormapped, use the colormap values; otherwise, - * use val0 = 0, val1 = 0x55, val2 = 0xaa, val3 = 255 - * and replicate gray into R, G and B components - * 4 bpp: if colormapped, use the colormap values; otherwise, - * replicate 2 nybs into a byte, and then into R,G,B components - * 8 bpp: if colormapped, use the colormap values; otherwise, - * replicate gray values into R, G and B components - * 16 bpp: replicate MSB into R, G and B components - * 24 bpp: unpack the pixels, maintaining word alignment on each scanline - * 32 bpp: makes a copy - * - *
- * Notes:
- *      (1) Never returns a clone of pixs.
- * 
- */ -PIX * -pixConvertTo32(PIX *pixs) -{ -l_int32 d; -PIX *pix1, *pixd; - - PROCNAME("pixConvertTo32"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - d = pixGetDepth(pixs); - if (d == 1) { - return pixConvert1To32(NULL, pixs, 0xffffffff, 0); - } else if (d == 2) { - pix1 = pixConvert2To8(pixs, 0, 85, 170, 255, TRUE); - pixd = pixConvert8To32(pix1); - pixDestroy(&pix1); - return pixd; - } else if (d == 4) { - pix1 = pixConvert4To8(pixs, TRUE); - pixd = pixConvert8To32(pix1); - pixDestroy(&pix1); - return pixd; - } else if (d == 8) { - return pixConvert8To32(pixs); - } else if (d == 16) { - pix1 = pixConvert16To8(pixs, L_MS_BYTE); - pixd = pixConvert8To32(pix1); - pixDestroy(&pix1); - return pixd; - } else if (d == 24) { - return pixConvert24To32(pixs); - } else if (d == 32) { - return pixCopy(NULL, pixs); - } else { - return (PIX *)ERROR_PTR("depth not 1, 2, 4, 8, 16, 32 bpp", - procName, NULL); - } -} - - -/*! - * \brief pixConvertTo32BySampling() - * - * \param[in] pixs 1, 2, 4, 8, 16, 24 or 32 bpp - * \param[in] factor submsampling factor; integer >= 1 - * \return pixd 32 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This is a fast, quick/dirty, top-level converter.
- *      (2) See pixConvertTo32() for default values.
- * 
- */ -PIX * -pixConvertTo32BySampling(PIX *pixs, - l_int32 factor) -{ -l_float32 scalefactor; -PIX *pix1, *pixd; - - PROCNAME("pixConvertTo32BySampling"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (factor < 1) - return (PIX *)ERROR_PTR("factor must be >= 1", procName, NULL); - - scalefactor = 1. / (l_float32)factor; - pix1 = pixScaleBySampling(pixs, scalefactor, scalefactor); - pixd = pixConvertTo32(pix1); - - pixDestroy(&pix1); - return pixd; -} - - -/*! - * \brief pixConvert8To32() - * - * \param[in] pixs 8 bpp - * \return 32 bpp rgb pix, or NULL on error - * - *
- * Notes:
- *      (1) If there is no colormap, replicates the gray value
- *          into the 3 MSB of the dest pixel.
- * 
- */ -PIX * -pixConvert8To32(PIX *pixs) -{ -l_int32 i, j, w, h, wpls, wpld, val; -l_uint32 *datas, *datad, *lines, *lined; -l_uint32 *tab; -PIX *pixd; - - PROCNAME("pixConvert8To32"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - - if (pixGetColormap(pixs)) - return pixRemoveColormap(pixs, REMOVE_CMAP_TO_FULL_COLOR); - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if ((pixd = pixCreate(w, h, 32)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* Replication table gray --> rgb */ - tab = (l_uint32 *)LEPT_CALLOC(256, sizeof(l_uint32)); - for (i = 0; i < 256; i++) - tab[i] = (i << 24) | (i << 16) | (i << 8); - - /* Replicate 1 --> 4 bytes (alpha byte not set) */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(lines, j); - lined[j] = tab[val]; - } - } - - LEPT_FREE(tab); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Top-level conversion to 8 or 32 bpp, without colormap * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertTo8Or32() - * - * \param[in] pixs 1, 2, 4, 8, 16, with or without colormap; - * or 32 bpp rgb - * \param[in] copyflag L_CLONE or L_COPY - * \param[in] warnflag 1 to issue warning if colormap is removed; else 0 - * \return pixd 8 bpp grayscale or 32 bpp rgb, or NULL on error - * - *
- * Notes:
- *      (1) If there is a colormap, the colormap is removed to 8 or 32 bpp,
- *          depending on whether the colors in the colormap are all gray.
- *      (2) If the input is either rgb or 8 bpp without a colormap,
- *          this returns either a clone or a copy, depending on %copyflag.
- *      (3) Otherwise, the pix is converted to 8 bpp grayscale.
- *          In all cases, pixd does not have a colormap.
- * 
- */ -PIX * -pixConvertTo8Or32(PIX *pixs, - l_int32 copyflag, - l_int32 warnflag) -{ -l_int32 d; -PIX *pixd; - - PROCNAME("pixConvertTo8Or32"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (copyflag != L_CLONE && copyflag != L_COPY) - return (PIX *)ERROR_PTR("invalid copyflag", procName, NULL); - - d = pixGetDepth(pixs); - if (pixGetColormap(pixs)) { - if (warnflag) L_WARNING("pix has colormap; removing\n", procName); - pixd = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - } else if (d == 8 || d == 32) { - if (copyflag == L_CLONE) - pixd = pixClone(pixs); - else /* copyflag == L_COPY */ - pixd = pixCopy(NULL, pixs); - } else { - pixd = pixConvertTo8(pixs, 0); - } - - /* Sanity check on result */ - d = pixGetDepth(pixd); - if (d != 8 && d != 32) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("depth not 8 or 32 bpp", procName, NULL); - } - - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Conversion between 24 bpp and 32 bpp rgb * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvert24To32() - * - * \param[in] pixs 24 bpp rgb - * \return pixd 32 bpp rgb, or NULL on error - * - *
- * Notes:
- *      (1) 24 bpp rgb pix are not supported in leptonica, except for a small
- *          number of formatted write operations.  The data is a byte array,
- *          with pixels in order r,g,b, and padded to 32 bit boundaries
- *          in each line.
- *      (2) Because 24 bpp rgb pix are conveniently generated by programs
- *          such as xpdf (which has SplashBitmaps that store the raster
- *          data in consecutive 24-bit rgb pixels), it is useful to provide
- *          24 bpp pix that simply incorporate that data.  The only things
- *          we can do with these are:
- *            (a) write them to file in png, jpeg, tiff and pnm
- *            (b) interconvert between 24 and 32 bpp in memory (for testing).
- * 
- */ -PIX * -pixConvert24To32(PIX *pixs) -{ -l_uint8 *lines; -l_int32 w, h, d, i, j, wpls, wpld, rval, gval, bval; -l_uint32 pixel; -l_uint32 *datas, *datad, *lined; -PIX *pixd; - - PROCNAME("pixConvert24to32"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 24) - return (PIX *)ERROR_PTR("pixs not 24 bpp", procName, NULL); - - pixd = pixCreateNoInit(w, h, 32); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = (l_uint8 *)(datas + i * wpls); - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - rval = *lines++; - gval = *lines++; - bval = *lines++; - composeRGBPixel(rval, gval, bval, &pixel); - lined[j] = pixel; - } - } - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - return pixd; -} - - -/*! - * \brief pixConvert32To24() - * - * \param[in] pixs 32 bpp rgb - * \return pixd 24 bpp rgb, or NULL on error - * - *
- * Notes:
- *      (1) See pixconvert24To32().
- * 
- */ -PIX * -pixConvert32To24(PIX *pixs) -{ -l_uint8 *rgbdata8; -l_int32 w, h, d, i, j, wpls, wpld, rval, gval, bval; -l_uint32 *datas, *lines, *rgbdata; -PIX *pixd; - - PROCNAME("pixConvert32to24"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreateNoInit(w, h, 24); - rgbdata = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - rgbdata8 = (l_uint8 *)(rgbdata + i * wpld); - for (j = 0; j < w; j++) { - extractRGBValues(lines[j], &rval, &gval, &bval); - *rgbdata8++ = rval; - *rgbdata8++ = gval; - *rgbdata8++ = bval; - } - } - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Conversion between 32 bpp (1 spp) and 16 or 8 bpp * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvert32To16() - * - * \param[in] pixs 32 bpp, single component - * \param[in] type L_LS_TWO_BYTES, L_MS_TWO_BYTES, L_CLIP_TO_FFFF - * \return pixd 16 bpp , or NULL on error - * - *
- * Notes:
- *      (1) The data in pixs is typically used for labelling.
- *          It is an array of l_uint32 values, not rgb or rgba.
- * 
- */ -PIX * -pixConvert32To16(PIX *pixs, - l_int32 type) -{ -l_uint16 dword; -l_int32 w, h, i, j, wpls, wpld; -l_uint32 sword; -l_uint32 *datas, *lines, *datad, *lined; -PIX *pixd; - - PROCNAME("pixConvert32to16"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - if (type != L_LS_TWO_BYTES && type != L_MS_TWO_BYTES && - type != L_CLIP_TO_FFFF) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - if ((pixd = pixCreate(w, h, 16)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - wpls = pixGetWpl(pixs); - datas = pixGetData(pixs); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - if (type == L_LS_TWO_BYTES) { - for (j = 0; j < wpls; j++) { - sword = *(lines + j); - dword = sword & 0xffff; - SET_DATA_TWO_BYTES(lined, j, dword); - } - } else if (type == L_MS_TWO_BYTES) { - for (j = 0; j < wpls; j++) { - sword = *(lines + j); - dword = sword >> 16; - SET_DATA_TWO_BYTES(lined, j, dword); - } - } else { /* type == L_CLIP_TO_FFFF */ - for (j = 0; j < wpls; j++) { - sword = *(lines + j); - dword = (sword >> 16) ? 0xffff : (sword & 0xffff); - SET_DATA_TWO_BYTES(lined, j, dword); - } - } - } - - return pixd; -} - - -/*! - * \brief pixConvert32To8() - * - * \param[in] pixs 32 bpp, single component - * \param[in] type16 L_LS_TWO_BYTES, L_MS_TWO_BYTES, L_CLIP_TO_FFFF - * \param[in] type8 L_LS_BYTE, L_MS_BYTE, L_CLIP_TO_FF - * \return pixd 8 bpp, or NULL on error - */ -PIX * -pixConvert32To8(PIX *pixs, - l_int32 type16, - l_int32 type8) -{ -PIX *pix1, *pixd; - - PROCNAME("pixConvert32to8"); - - if (!pixs || pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - if (type16 != L_LS_TWO_BYTES && type16 != L_MS_TWO_BYTES && - type16 != L_CLIP_TO_FFFF) - return (PIX *)ERROR_PTR("invalid type16", procName, NULL); - if (type8 != L_LS_BYTE && type8 != L_MS_BYTE && type8 != L_CLIP_TO_FF) - return (PIX *)ERROR_PTR("invalid type8", procName, NULL); - - pix1 = pixConvert32To16(pixs, type16); - pixd = pixConvert16To8(pix1, type8); - pixDestroy(&pix1); - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Removal of alpha component by blending with white background * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixRemoveAlpha() - * - * \param[in] pixs any depth - * \return pixd if 32 bpp rgba, pixs blended over a white background; - * a clone of pixs otherwise, and NULL on error - * - *
- * Notes:
- *      (1) This is a wrapper on pixAlphaBlendUniform()
- * 
- */ -PIX * -pixRemoveAlpha(PIX *pixs) -{ - PROCNAME("pixRemoveAlpha"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - if (pixGetDepth(pixs) == 32 && pixGetSpp(pixs) == 4) - return pixAlphaBlendUniform(pixs, 0xffffff00); - else - return pixClone(pixs); -} - - -/*---------------------------------------------------------------------------* - * Addition of alpha component to 1 bpp * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixAddAlphaTo1bpp() - * - * \param[in] pixd [optional] 1 bpp, can be null or equal to pixs - * \param[in] pixs 1 bpp - * \return pixd 1 bpp with colormap and non-opaque alpha, - * or NULL on error - * - *
- * Notes:
- *      (1) We don't use 1 bpp colormapped images with alpha in leptonica,
- *          but we support generating them (here), writing to png, and reading
- *          the png.  On reading, they are converted to 32 bpp RGBA.
- *      (2) The background (0) pixels in pixs become fully transparent, and the
- *          foreground (1) pixels are fully opaque.  Thus, pixd is a 1 bpp
- *          representation of a stencil, that can be used to paint over pixels
- *          of a backing image that are masked by the foreground in pixs.
- * 
- */ -PIX * -pixAddAlphaTo1bpp(PIX *pixd, - PIX *pixs) -{ -PIXCMAP *cmap; - - PROCNAME("pixAddAlphaTo1bpp"); - - if (!pixs || (pixGetDepth(pixs) != 1)) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (pixd && (pixd != pixs)) - return (PIX *)ERROR_PTR("pixd defined but != pixs", procName, NULL); - - pixd = pixCopy(pixd, pixs); - cmap = pixcmapCreate(1); - pixSetColormap(pixd, cmap); - pixcmapAddRGBA(cmap, 255, 255, 255, 0); /* 0 ==> white + transparent */ - pixcmapAddRGBA(cmap, 0, 0, 0, 255); /* 1 ==> black + opaque */ - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Lossless depth conversion (unpacking) * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertLossless() - * - * \param[in] pixs 1, 2, 4, 8 bpp, not cmapped - * \param[in] d destination depth: 2, 4 or 8 - * \return pixd 2, 4 or 8 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This is a lossless unpacking (depth-increasing)
- *          conversion.  If ds is the depth of pixs, then
- *           ~ if d < ds, returns NULL
- *           ~ if d == ds, returns a copy
- *           ~ if d > ds, does the unpacking conversion
- *      (2) If pixs has a colormap, this is an error.
- * 
- */ -PIX * -pixConvertLossless(PIX *pixs, - l_int32 d) -{ -l_int32 w, h, ds, wpls, wpld, i, j, val; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixConvertLossless"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs has colormap", procName, NULL); - if (d != 2 && d != 4 && d != 8) - return (PIX *)ERROR_PTR("invalid dest depth", procName, NULL); - - pixGetDimensions(pixs, &w, &h, &ds); - if (d < ds) - return (PIX *)ERROR_PTR("depth > d", procName, NULL); - else if (d == ds) - return pixCopy(NULL, pixs); - - if ((pixd = pixCreate(w, h, d)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - - /* Unpack the bits */ - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - switch (ds) - { - case 1: - for (j = 0; j < w; j++) { - val = GET_DATA_BIT(lines, j); - if (d == 8) - SET_DATA_BYTE(lined, j, val); - else if (d == 4) - SET_DATA_QBIT(lined, j, val); - else /* d == 2 */ - SET_DATA_DIBIT(lined, j, val); - } - break; - case 2: - for (j = 0; j < w; j++) { - val = GET_DATA_DIBIT(lines, j); - if (d == 8) - SET_DATA_BYTE(lined, j, val); - else /* d == 4 */ - SET_DATA_QBIT(lined, j, val); - } - break; - case 4: - for (j = 0; j < w; j++) { - val = GET_DATA_DIBIT(lines, j); - SET_DATA_BYTE(lined, j, val); - } - break; - } - } - - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Conversion for printing in PostScript * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertForPSWrap() - * - * \param[in] pixs 1, 2, 4, 8, 16, 32 bpp - * \return pixd 1, 8, or 32 bpp, or NULL on error - * - *
- * Notes:
- *      (1) For wrapping in PostScript, we convert pixs to
- *          1 bpp, 8 bpp (gray) and 32 bpp (RGB color).
- *      (2) Colormaps are removed.  For pixs with colormaps, the
- *          images are converted to either 8 bpp gray or 32 bpp
- *          RGB, depending on whether the colormap has color content.
- *      (3) Images without colormaps, that are not 1 bpp or 32 bpp,
- *          are converted to 8 bpp gray.
- * 
- */ -PIX * -pixConvertForPSWrap(PIX *pixs) -{ -l_int32 d; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixConvertForPSWrap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - cmap = pixGetColormap(pixs); - d = pixGetDepth(pixs); - switch (d) - { - case 1: - case 32: - pixd = pixClone(pixs); - break; - case 2: - if (cmap) - pixd = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - else - pixd = pixConvert2To8(pixs, 0, 0x55, 0xaa, 0xff, FALSE); - break; - case 4: - if (cmap) - pixd = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - else - pixd = pixConvert4To8(pixs, FALSE); - break; - case 8: - pixd = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - break; - case 16: - pixd = pixConvert16To8(pixs, L_MS_BYTE); - break; - default: - lept_stderr("depth not in {1, 2, 4, 8, 16, 32}"); - return NULL; - } - - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Scaling conversion to subpixel RGB * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixConvertToSubpixelRGB() - * - * \param[in] pixs 8 bpp grayscale, 32 bpp rgb, or colormapped - * \param[in] scalex, scaley anisotropic scaling permitted between - * source and destination - * \param[in] order of subpixel rgb color components in - * composition of pixd: - * L_SUBPIXEL_ORDER_RGB, L_SUBPIXEL_ORDER_BGR, - * L_SUBPIXEL_ORDER_VRGB, L_SUBPIXEL_ORDER_VBGR - * \return pixd 32 bpp, or NULL on error - * - *
- * Notes:
- *      (1) If pixs has a colormap, it is removed based on its contents
- *          to either 8 bpp gray or rgb.
- *      (2) For horizontal subpixel splitting, the input image
- *          is rescaled by %scaley vertically and by 3.0 times
- *          %scalex horizontally.  Then each horizontal triplet
- *          of pixels is mapped back to a single rgb pixel, with the
- *          r, g and b values being assigned based on the pixel triplet.
- *          For gray triplets, the r, g, and b values are set equal to
- *          the three gray values.  For color triplets, the r, g and b
- *          values are set equal to the components from the appropriate
- *          subpixel.  Vertical subpixel splitting is handled similarly.
- *      (3) See pixConvertGrayToSubpixelRGB() and
- *          pixConvertColorToSubpixelRGB() for further details.
- * 
- */ -PIX * -pixConvertToSubpixelRGB(PIX *pixs, - l_float32 scalex, - l_float32 scaley, - l_int32 order) -{ -l_int32 d; -PIX *pix1, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixConvertToSubpixelRGB"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - cmap = pixGetColormap(pixs); - if (d != 8 && d != 32 && !cmap) - return (PIX *)ERROR_PTR("pix not 8 or 32 bpp and not cmapped", - procName, NULL); - if (scalex <= 0.0 || scaley <= 0.0) - return (PIX *)ERROR_PTR("scale factors must be > 0", procName, NULL); - if (order != L_SUBPIXEL_ORDER_RGB && order != L_SUBPIXEL_ORDER_BGR && - order != L_SUBPIXEL_ORDER_VRGB && order != L_SUBPIXEL_ORDER_VBGR) - return (PIX *)ERROR_PTR("invalid subpixel order", procName, NULL); - if ((pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC)) == NULL) - return (PIX *)ERROR_PTR("pix1 not made", procName, NULL); - - d = pixGetDepth(pix1); - pixd = NULL; - if (d == 8) - pixd = pixConvertGrayToSubpixelRGB(pix1, scalex, scaley, order); - else if (d == 32) - pixd = pixConvertColorToSubpixelRGB(pix1, scalex, scaley, order); - else - L_ERROR("invalid depth %d\n", procName, d); - - pixDestroy(&pix1); - return pixd; -} - - -/*! - * \brief pixConvertGrayToSubpixelRGB() - * - * \param[in] pixs 8 bpp or colormapped - * \param[in] scalex, scaley - * \param[in] order of subpixel rgb color components in - * composition of pixd: - * L_SUBPIXEL_ORDER_RGB, L_SUBPIXEL_ORDER_BGR, - * L_SUBPIXEL_ORDER_VRGB, L_SUBPIXEL_ORDER_VBGR - * \return pixd 32 bpp, or NULL on error - * - *
- * Notes:
- *      (1) If pixs has a colormap, it is removed to 8 bpp.
- *      (2) For horizontal subpixel splitting, the input gray image
- *          is rescaled by %scaley vertically and by 3.0 times
- *          %scalex horizontally.  Then each horizontal triplet
- *          of pixels is mapped back to a single rgb pixel, with the
- *          r, g and b values being assigned from the triplet of gray values.
- *          Similar operations are used for vertical subpixel splitting.
- *      (3) This is a form of subpixel rendering that tends to give the
- *          resulting text a sharper and somewhat chromatic display.
- *          For horizontal subpixel splitting, the observable difference
- *          between %order=L_SUBPIXEL_ORDER_RGB and
- *          %order=L_SUBPIXEL_ORDER_BGR is reduced by optical diffusers
- *          in the display that make the pixel color appear to emerge
- *          from the entire pixel.
- * 
- */ -PIX * -pixConvertGrayToSubpixelRGB(PIX *pixs, - l_float32 scalex, - l_float32 scaley, - l_int32 order) -{ -l_int32 w, h, d, wd, hd, wplt, wpld, i, j, rval, gval, bval, direction; -l_uint32 *datat, *datad, *linet, *lined; -PIX *pix1, *pix2, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixConvertGrayToSubpixelRGB"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - cmap = pixGetColormap(pixs); - if (d != 8 && !cmap) - return (PIX *)ERROR_PTR("pix not 8 bpp & not cmapped", procName, NULL); - if (scalex <= 0.0 || scaley <= 0.0) - return (PIX *)ERROR_PTR("scale factors must be > 0", procName, NULL); - if (order != L_SUBPIXEL_ORDER_RGB && order != L_SUBPIXEL_ORDER_BGR && - order != L_SUBPIXEL_ORDER_VRGB && order != L_SUBPIXEL_ORDER_VBGR) - return (PIX *)ERROR_PTR("invalid subpixel order", procName, NULL); - - direction = - (order == L_SUBPIXEL_ORDER_RGB || order == L_SUBPIXEL_ORDER_BGR) - ? L_HORIZ : L_VERT; - pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); - if (direction == L_HORIZ) - pix2 = pixScale(pix1, 3.0 * scalex, scaley); - else /* L_VERT */ - pix2 = pixScale(pix1, scalex, 3.0 * scaley); - - pixGetDimensions(pix2, &w, &h, NULL); - wd = (direction == L_HORIZ) ? w / 3 : w; - hd = (direction == L_VERT) ? h / 3 : h; - pixd = pixCreate(wd, hd, 32); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - datat = pixGetData(pix2); - wplt = pixGetWpl(pix2); - if (direction == L_HORIZ) { - for (i = 0; i < hd; i++) { - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - rval = GET_DATA_BYTE(linet, 3 * j); - gval = GET_DATA_BYTE(linet, 3 * j + 1); - bval = GET_DATA_BYTE(linet, 3 * j + 2); - if (order == L_SUBPIXEL_ORDER_RGB) - composeRGBPixel(rval, gval, bval, &lined[j]); - else /* order BGR */ - composeRGBPixel(bval, gval, rval, &lined[j]); - } - } - } else { /* L_VERT */ - for (i = 0; i < hd; i++) { - linet = datat + 3 * i * wplt; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - rval = GET_DATA_BYTE(linet, j); - gval = GET_DATA_BYTE(linet + wplt, j); - bval = GET_DATA_BYTE(linet + 2 * wplt, j); - if (order == L_SUBPIXEL_ORDER_VRGB) - composeRGBPixel(rval, gval, bval, &lined[j]); - else /* order VBGR */ - composeRGBPixel(bval, gval, rval, &lined[j]); - } - } - } - - pixDestroy(&pix1); - pixDestroy(&pix2); - return pixd; -} - - -/*! - * \brief pixConvertColorToSubpixelRGB() - * - * \param[in] pixs 32 bpp or colormapped - * \param[in] scalex, scaley - * \param[in] order of subpixel rgb color components in - * composition of pixd: - * L_SUBPIXEL_ORDER_RGB, L_SUBPIXEL_ORDER_BGR, - * L_SUBPIXEL_ORDER_VRGB, L_SUBPIXEL_ORDER_VBGR - * \return pixd 32 bpp, or NULL on error - * - *
- * Notes:
- *      (1) If pixs has a colormap, it is removed to 32 bpp rgb.
- *          If the colormap has no color, pixConvertGrayToSubpixelRGB()
- *          should be called instead, because it will give the same result
- *          more efficiently.  The function pixConvertToSubpixelRGB()
- *          will do the best thing for all cases.
- *      (2) For horizontal subpixel splitting, the input rgb image
- *          is rescaled by %scaley vertically and by 3.0 times
- *          %scalex horizontally.  Then for each horizontal triplet
- *          of pixels, the r component of the final pixel is selected
- *          from the r component of the appropriate pixel in the triplet,
- *          and likewise for g and b.  Vertical subpixel splitting is
- *          handled similarly.
- * 
- */ -PIX * -pixConvertColorToSubpixelRGB(PIX *pixs, - l_float32 scalex, - l_float32 scaley, - l_int32 order) -{ -l_int32 w, h, d, wd, hd, wplt, wpld, i, j, rval, gval, bval, direction; -l_uint32 *datat, *datad, *linet, *lined; -PIX *pix1, *pix2, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixConvertColorToSubpixelRGB"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - cmap = pixGetColormap(pixs); - if (d != 32 && !cmap) - return (PIX *)ERROR_PTR("pix not 32 bpp & not cmapped", procName, NULL); - if (scalex <= 0.0 || scaley <= 0.0) - return (PIX *)ERROR_PTR("scale factors must be > 0", procName, NULL); - if (order != L_SUBPIXEL_ORDER_RGB && order != L_SUBPIXEL_ORDER_BGR && - order != L_SUBPIXEL_ORDER_VRGB && order != L_SUBPIXEL_ORDER_VBGR) - return (PIX *)ERROR_PTR("invalid subpixel order", procName, NULL); - - direction = - (order == L_SUBPIXEL_ORDER_RGB || order == L_SUBPIXEL_ORDER_BGR) - ? L_HORIZ : L_VERT; - pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_FULL_COLOR); - if (direction == L_HORIZ) - pix2 = pixScale(pix1, 3.0 * scalex, scaley); - else /* L_VERT */ - pix2 = pixScale(pix1, scalex, 3.0 * scaley); - - pixGetDimensions(pix2, &w, &h, NULL); - wd = (direction == L_HORIZ) ? w / 3 : w; - hd = (direction == L_VERT) ? h / 3 : h; - pixd = pixCreate(wd, hd, 32); - pixCopyInputFormat(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - datat = pixGetData(pix2); - wplt = pixGetWpl(pix2); - if (direction == L_HORIZ) { - for (i = 0; i < hd; i++) { - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - if (order == L_SUBPIXEL_ORDER_RGB) { - extractRGBValues(linet[3 * j], &rval, NULL, NULL); - extractRGBValues(linet[3 * j + 1], NULL, &gval, NULL); - extractRGBValues(linet[3 * j + 2], NULL, NULL, &bval); - } else { /* order BGR */ - extractRGBValues(linet[3 * j], NULL, NULL, &bval); - extractRGBValues(linet[3 * j + 1], NULL, &gval, NULL); - extractRGBValues(linet[3 * j + 2], &rval, NULL, NULL); - } - composeRGBPixel(rval, gval, bval, &lined[j]); - } - } - } else { /* L_VERT */ - for (i = 0; i < hd; i++) { - linet = datat + 3 * i * wplt; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - if (order == L_SUBPIXEL_ORDER_VRGB) { - extractRGBValues(linet[j], &rval, NULL, NULL); - extractRGBValues((linet + wplt)[j], NULL, &gval, NULL); - extractRGBValues((linet + 2 * wplt)[j], NULL, NULL, &bval); - } else { /* order VBGR */ - extractRGBValues(linet[j], NULL, NULL, &bval); - extractRGBValues((linet + wplt)[j], NULL, &gval, NULL); - extractRGBValues((linet + 2 * wplt)[j], &rval, NULL, NULL); - } - composeRGBPixel(rval, gval, bval, &lined[j]); - } - } - } - - if (pixGetSpp(pixs) == 4) - pixScaleAndTransferAlpha(pixd, pixs, scalex, scaley); - - pixDestroy(&pix1); - pixDestroy(&pix2); - return pixd; -} - - -/*---------------------------------------------------------------------* - * Setting neutral point for min/max boost conversion to gray * - *---------------------------------------------------------------------*/ -/*! - * \brief l_setNeutralBoostVal() - * - * \param[in] val between 1 and 255; typical value is 180 - * \return void - * - *
- * Notes:
- *      (1) This raises or lowers the selected min or max RGB component value,
- *          depending on if that component is above or below this value.
- * 
- */ -void -l_setNeutralBoostVal(l_int32 val) -{ - PROCNAME("l_setNeutralBoostVal"); - - if (val <= 0) { - L_ERROR("invalid reference value for neutral boost\n", procName); - return; - } - var_NEUTRAL_BOOST_VAL = val; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixlabel.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixlabel.c deleted file mode 100644 index 576ea55a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixlabel.c +++ /dev/null @@ -1,637 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pixlabel.c - *
- *
- *     Label pixels by an index for connected component membership
- *           PIX         *pixConnCompTransform()
- *
- *     Label pixels by the area of their connected component
- *           PIX         *pixConnCompAreaTransform()
- *
- *     Label pixels to allow incremental computation of connected components
- *           l_int32      pixConnCompIncrInit()
- *           l_int32      pixConnCompIncrAdd()
- *           l_int32      pixGetSortedNeighborValues()
- *
- *     Label pixels with spatially-dependent color coding
- *           PIX         *pixLocToColorTransform()
- *
- *  Pixels get labelled in various ways throughout the leptonica library,
- *  but most of the labelling is implicit, where the new value isn't
- *  even considered to be a label -- it is just a transformed pixel value
- *  that may be transformed again by another operation.  Quantization
- *  by thresholding, and dilation by a structuring element, are examples
- *  of these typical image processing operations.
- *
- *  However, there are some explicit labelling procedures that are useful
- *  as end-points of analysis, where it typically would not make sense
- *  to do further image processing on the result.  Assigning false color
- *  based on pixel properties is an example of such labelling operations.
- *  Such operations typically have 1 bpp input images, and result
- *  in grayscale or color images.
- *
- *  The procedures in this file are concerned with such explicit labelling.
- *  Some of these labelling procedures are also in other places in leptonica:
- *
- *    runlength.c:
- *       This file has two labelling transforms based on runlengths:
- *       pixStrokeWidthTransform() and pixvRunlengthTransform().
- *       The pixels are labelled based on the width of the "stroke" to
- *       which they belong, or on the length of the horizontal or
- *       vertical run in which they are a member.  Runlengths can easily
- *       be filtered using a threshold.
- *
- *    pixafunc2.c:
- *       This file has an operation, pixaDisplayRandomCmap(), that
- *       randomly labels pix in a pixa (that are typically found using
- *       pixConnComp) with up to 256 values, and assigns each value to
- *       a random colormap color.
- *
- *    seedfill.c:
- *       This file has pixDistanceFunction(), that labels each pixel with
- *       its distance from either the foreground or the background.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -/*-----------------------------------------------------------------------* - * Label pixels by an index for connected component membership * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixConnCompTransform() - * - * \param[in] pixs 1 bpp - * \param[in] connect connectivity: 4 or 8 - * \param[in] depth of pixd: 8 or 16 bpp; use 0 for auto determination - * \return pixd 8, 16 or 32 bpp, or NULL on error - * - *
- * Notes:
- *      (1) pixd is 8, 16 or 32 bpp, and the pixel values label the
- *          fg component, starting with 1.  Pixels in the bg are labelled 0.
- *      (2) If %depth = 0, the depth of pixd is 8 if the number of c.c.
- *          is less than 254, 16 if the number of c.c is less than 0xfffe,
- *          and 32 otherwise.
- *      (3) If %depth = 8, the assigned label for the n-th component is
- *          1 + n % 254.  We use mod 254 because 0 is uniquely assigned
- *          to black: e.g., see pixcmapCreateRandom().  Likewise,
- *          if %depth = 16, the assigned label uses mod(2^16 - 2), and
- *          if %depth = 32, no mod is taken.
- * 
- */ -PIX * -pixConnCompTransform(PIX *pixs, - l_int32 connect, - l_int32 depth) -{ -l_int32 i, n, index, w, h, xb, yb, wb, hb; -BOXA *boxa; -PIX *pix1, *pix2, *pixd; -PIXA *pixa; - - PROCNAME("pixConnCompTransform"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (connect != 4 && connect != 8) - return (PIX *)ERROR_PTR("connectivity must be 4 or 8", procName, NULL); - if (depth != 0 && depth != 8 && depth != 16 && depth != 32) - return (PIX *)ERROR_PTR("depth must be 0, 8, 16 or 32", procName, NULL); - - boxa = pixConnComp(pixs, &pixa, connect); - n = pixaGetCount(pixa); - boxaDestroy(&boxa); - pixGetDimensions(pixs, &w, &h, NULL); - if (depth == 0) { - if (n < 254) - depth = 8; - else if (n < 0xfffe) - depth = 16; - else - depth = 32; - } - pixd = pixCreate(w, h, depth); - pixSetSpp(pixd, 1); - if (n == 0) { /* no fg */ - pixaDestroy(&pixa); - return pixd; - } - - /* Label each component and blit it in */ - for (i = 0; i < n; i++) { - pixaGetBoxGeometry(pixa, i, &xb, &yb, &wb, &hb); - pix1 = pixaGetPix(pixa, i, L_CLONE); - if (depth == 8) { - index = 1 + (i % 254); - pix2 = pixConvert1To8(NULL, pix1, 0, index); - } else if (depth == 16) { - index = 1 + (i % 0xfffe); - pix2 = pixConvert1To16(NULL, pix1, 0, index); - } else { /* depth == 32 */ - index = 1 + i; - pix2 = pixConvert1To32(NULL, pix1, 0, index); - } - pixRasterop(pixd, xb, yb, wb, hb, PIX_PAINT, pix2, 0, 0); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - pixaDestroy(&pixa); - return pixd; -} - - -/*-----------------------------------------------------------------------* - * Label pixels by the area of their connected component * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixConnCompAreaTransform() - * - * \param[in] pixs 1 bpp - * \param[in] connect connectivity: 4 or 8 - * \return pixd 32 bpp, 1 spp, or NULL on error - * - *
- * Notes:
- *      (1) The pixel values in pixd label the area of the fg component
- *          to which the pixel belongs.  Pixels in the bg are labelled 0.
- *      (2) For purposes of visualization, the output can be converted
- *          to 8 bpp, using pixConvert32To8() or pixMaxDynamicRange().
- * 
- */ -PIX * -pixConnCompAreaTransform(PIX *pixs, - l_int32 connect) -{ -l_int32 i, n, npix, w, h, xb, yb, wb, hb; -l_int32 *tab8; -BOXA *boxa; -PIX *pix1, *pix2, *pixd; -PIXA *pixa; - - PROCNAME("pixConnCompAreaTransform"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (connect != 4 && connect != 8) - return (PIX *)ERROR_PTR("connectivity must be 4 or 8", procName, NULL); - - boxa = pixConnComp(pixs, &pixa, connect); - n = pixaGetCount(pixa); - boxaDestroy(&boxa); - pixGetDimensions(pixs, &w, &h, NULL); - pixd = pixCreate(w, h, 32); - pixSetSpp(pixd, 1); - if (n == 0) { /* no fg */ - pixaDestroy(&pixa); - return pixd; - } - - /* Label each component and blit it in */ - tab8 = makePixelSumTab8(); - for (i = 0; i < n; i++) { - pixaGetBoxGeometry(pixa, i, &xb, &yb, &wb, &hb); - pix1 = pixaGetPix(pixa, i, L_CLONE); - pixCountPixels(pix1, &npix, tab8); - pix2 = pixConvert1To32(NULL, pix1, 0, npix); - pixRasterop(pixd, xb, yb, wb, hb, PIX_PAINT, pix2, 0, 0); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - pixaDestroy(&pixa); - LEPT_FREE(tab8); - return pixd; -} - - -/*-------------------------------------------------------------------------* - * Label pixels to allow incremental computation of connected components * - *-------------------------------------------------------------------------*/ -/*! - * \brief pixConnCompIncrInit() - * - * \param[in] pixs 1 bpp - * \param[in] conn connectivity: 4 or 8 - * \param[out] ppixd 32 bpp, with c.c. labelled - * \param[out] pptaa with pixel locations indexed by c.c. - * \param[out] pncc initial number of c.c. - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This labels the connected components in a 1 bpp pix, and
- *          additionally sets up a ptaa that lists the locations of pixels
- *          in each of the components.
- *      (2) It can be used to initialize the output image and arrays for
- *          an application that maintains information about connected
- *          components incrementally as pixels are added.
- *      (3) pixs can be empty or have some foreground pixels.
- *      (4) The connectivity is stored in pixd->special.
- *      (5) Always initialize with the first pta in ptaa being empty
- *          and representing the background value (index 0) in the pix.
- * 
- */ -l_ok -pixConnCompIncrInit(PIX *pixs, - l_int32 conn, - PIX **ppixd, - PTAA **pptaa, - l_int32 *pncc) -{ -l_int32 empty, w, h, ncc; -PIX *pixd; -PTA *pta; -PTAA *ptaa; - - PROCNAME("pixConnCompIncrInit"); - - if (ppixd) *ppixd = NULL; - if (pptaa) *pptaa = NULL; - if (pncc) *pncc = 0; - if (!ppixd || !pptaa || !pncc) - return ERROR_INT("&pixd, &ptaa, &ncc not all defined", procName, 1); - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs undefined or not 1 bpp", procName, 1); - if (conn != 4 && conn != 8) - return ERROR_INT("connectivity must be 4 or 8", procName, 1); - - pixGetDimensions(pixs, &w, &h, NULL); - pixZero(pixs, &empty); - if (empty) { - *ppixd = pixCreate(w, h, 32); - pixSetSpp(*ppixd, 1); - pixSetSpecial(*ppixd, conn); - *pptaa = ptaaCreate(0); - pta = ptaCreate(1); - ptaaAddPta(*pptaa, pta, L_INSERT); /* reserve index 0 for background */ - return 0; - } - - /* Set up the initial labeled image and indexed pixel arrays */ - if ((pixd = pixConnCompTransform(pixs, conn, 32)) == NULL) - return ERROR_INT("pixd not made", procName, 1); - pixSetSpecial(pixd, conn); - *ppixd = pixd; - if ((ptaa = ptaaIndexLabeledPixels(pixd, &ncc)) == NULL) - return ERROR_INT("ptaa not made", procName, 1); - *pptaa = ptaa; - *pncc = ncc; - return 0; -} - - -/*! - * \brief pixConnCompIncrAdd() - * - * \param[in] pixs 32 bpp, with pixels labeled by c.c. - * \param[in] ptaa with each pta of pixel locations indexed by c.c. - * \param[out] pncc number of c.c - * \param[in] x,y location of added pixel - * \param[in] debug 0 for no output; otherwise output whenever - * debug <= nvals, up to debug == 3 - * \return -1 if nothing happens; 0 if a pixel is added; 1 on error - * - *
- * Notes:
- *      (1) This adds a pixel and updates the labeled connected components.
- *          Before calling this function, initialize the process using
- *          pixConnCompIncrInit().
- *      (2) As a result of adding a pixel, one of the following can happen,
- *          depending on the number of neighbors with non-zero value:
- *          (a) nothing: the pixel is already a member of a c.c.
- *          (b) no neighbors: a new component is added, increasing the
- *              number of c.c.
- *          (c) one neighbor: the pixel is added to an existing c.c.
- *          (d) more than one neighbor: the added pixel causes joining of
- *              two or more c.c., reducing the number of c.c.  A maximum
- *              of 4 c.c. can be joined.
- *      (3) When two c.c. are joined, the pixels in the larger index are
- *          relabeled to those of the smaller in pixs, and their locations
- *          are transferred to the pta with the smaller index in the ptaa.
- *          The pta corresponding to the larger index is then deleted.
- *      (4) This is an efficient implementation of a "union-find" operation,
- *          which supports the generation and merging of disjoint sets
- *          of pixels.  This function can be called about 1.3 million times
- *          per second.
- * 
- */ -l_int32 -pixConnCompIncrAdd(PIX *pixs, - PTAA *ptaa, - l_int32 *pncc, - l_float32 x, - l_float32 y, - l_int32 debug) -{ -l_int32 conn, i, j, w, h, count, nvals, ns, firstindex; -l_uint32 val; -l_int32 *neigh; -PTA *ptas, *ptad; - - PROCNAME("pixConnCompIncrAdd"); - - if (!pixs || pixGetDepth(pixs) != 32) - return ERROR_INT("pixs not defined or not 32 bpp", procName, 1); - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 1); - if (!pncc) - return ERROR_INT("&ncc not defined", procName, 1); - conn = pixs->special; - if (conn != 4 && conn != 8) - return ERROR_INT("connectivity must be 4 or 8", procName, 1); - pixGetDimensions(pixs, &w, &h, NULL); - if (x < 0 || x >= w) - return ERROR_INT("invalid x pixel location", procName, 1); - if (y < 0 || y >= h) - return ERROR_INT("invalid y pixel location", procName, 1); - - pixGetPixel(pixs, x, y, &val); - if (val > 0) /* already belongs to a set */ - return -1; - - /* Find unique neighbor pixel values in increasing order of value. - * If %nvals > 0, these are returned in the %neigh array, which - * is of size %nvals. Note that the pixel values in each - * connected component are used as the index into the pta - * array of the ptaa, giving the pixel locations. */ - pixGetSortedNeighborValues(pixs, x, y, conn, &neigh, &nvals); - - /* If there are no neighbors, just add a new component */ - if (nvals == 0) { - count = ptaaGetCount(ptaa); - pixSetPixel(pixs, x, y, count); - ptas = ptaCreate(1); - ptaAddPt(ptas, x, y); - ptaaAddPta(ptaa, ptas, L_INSERT); - *pncc += 1; - LEPT_FREE(neigh); - return 0; - } - - /* Otherwise, there is at least one neighbor. Add the pixel - * to the first neighbor c.c. */ - firstindex = neigh[0]; - pixSetPixel(pixs, x, y, firstindex); - ptaaAddPt(ptaa, neigh[0], x, y); - if (nvals == 1) { - if (debug == 1) - lept_stderr("nvals = %d: neigh = (%d)\n", nvals, neigh[0]); - LEPT_FREE(neigh); - return 0; - } - - /* If nvals > 1, there are at least 2 neighbors, so this pixel - * joins at least one pair of existing c.c. Join each component - * to the first component in the list, which is the one with - * the smallest integer label. This is done in two steps: - * (a) re-label the pixels in the component to the label of the - * first component, and - * (b) save the pixel locations in the pta for the first component. */ - if (nvals == 2) { - if (debug >= 1 && debug <= 2) { - lept_stderr("nvals = %d: neigh = (%d,%d)\n", nvals, - neigh[0], neigh[1]); - } - } else if (nvals == 3) { - if (debug >= 1 && debug <= 3) { - lept_stderr("nvals = %d: neigh = (%d,%d,%d)\n", nvals, - neigh[0], neigh[1], neigh[2]); - } - } else { /* nvals == 4 */ - if (debug >= 1 && debug <= 4) { - lept_stderr("nvals = %d: neigh = (%d,%d,%d,%d)\n", nvals, - neigh[0], neigh[1], neigh[2], neigh[3]); - } - } - ptad = ptaaGetPta(ptaa, firstindex, L_CLONE); - for (i = 1; i < nvals; i++) { - ptas = ptaaGetPta(ptaa, neigh[i], L_CLONE); - ns = ptaGetCount(ptas); - for (j = 0; j < ns; j++) { /* relabel pixels */ - ptaGetPt(ptas, j, &x, &y); - pixSetPixel(pixs, x, y, firstindex); - } - ptaJoin(ptad, ptas, 0, -1); /* add relabeled pixel locations */ - *pncc -= 1; - ptaDestroy(&ptaa->pta[neigh[i]]); - ptaDestroy(&ptas); /* the clone */ - } - ptaDestroy(&ptad); /* the clone */ - LEPT_FREE(neigh); - return 0; -} - - -/*! - * \brief pixGetSortedNeighborValues() - * - * \param[in] pixs 8, 16 or 32 bpp, with pixels labeled by c.c. - * \param[in] x, y location of pixel - * \param[in] conn 4 or 8 connected neighbors - * \param[out] pneigh array of integers, to be filled with - * the values of the neighbors, if any - * \param[out] pnvals the number of unique neighbor values found - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The returned %neigh array is the unique set of neighboring
- *          pixel values, of size nvals, sorted from smallest to largest.
- *          The value 0, which represents background pixels that do
- *          not belong to any set of connected components, is discarded.
- *      (2) If there are no neighbors, this returns %neigh = NULL; otherwise,
- *          the caller must free the array.
- *      (3) For either 4 or 8 connectivity, the maximum number of unique
- *          neighbor values is 4.
- * 
- */ -l_ok -pixGetSortedNeighborValues(PIX *pixs, - l_int32 x, - l_int32 y, - l_int32 conn, - l_int32 **pneigh, - l_int32 *pnvals) -{ -l_int32 i, npt, index; -l_int32 neigh[4]; -l_uint32 val; -l_float32 fx, fy; -L_ASET *aset; -L_ASET_NODE *node; -PTA *pta; -RB_TYPE key; - - PROCNAME("pixGetSortedNeighborValues"); - - if (pneigh) *pneigh = NULL; - if (pnvals) *pnvals = 0; - if (!pneigh || !pnvals) - return ERROR_INT("&neigh and &nvals not both defined", procName, 1); - if (!pixs || pixGetDepth(pixs) < 8) - return ERROR_INT("pixs not defined or depth < 8", procName, 1); - - /* Identify the locations of nearest neighbor pixels */ - if ((pta = ptaGetNeighborPixLocs(pixs, x, y, conn)) == NULL) - return ERROR_INT("pta of neighbors not made", procName, 1); - - /* Find the pixel values and insert into a set as keys */ - aset = l_asetCreate(L_UINT_TYPE); - npt = ptaGetCount(pta); - for (i = 0; i < npt; i++) { - ptaGetPt(pta, i, &fx, &fy); - pixGetPixel(pixs, (l_int32)fx, (l_int32)fy, &val); - key.utype = val; - l_asetInsert(aset, key); - } - - /* Extract the set keys and put them into the %neigh array. - * Omit the value 0, which indicates the pixel doesn't - * belong to one of the sets of connected components. */ - node = l_asetGetFirst(aset); - index = 0; - while (node) { - val = node->key.utype; - if (val > 0) - neigh[index++] = (l_int32)val; - node = l_asetGetNext(node); - } - *pnvals = index; - if (index > 0) { - *pneigh = (l_int32 *)LEPT_CALLOC(index, sizeof(l_int32)); - for (i = 0; i < index; i++) - (*pneigh)[i] = neigh[i]; - } - - ptaDestroy(&pta); - l_asetDestroy(&aset); - return 0; -} - - -/*-----------------------------------------------------------------------* - * Label pixels with spatially-dependent color coding * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixLocToColorTransform() - * - * \param[in] pixs 1 bpp - * \return pixd 32 bpp rgb, or NULL on error - * - *
- * Notes:
- *      (1) This generates an RGB image where each component value
- *          is coded depending on the (x.y) location and the size
- *          of the fg connected component that the pixel in pixs belongs to.
- *          It is independent of the 4-fold orthogonal orientation, and
- *          only weakly depends on translations and small angle rotations.
- *          Background pixels are black.
- *      (2) Such encodings can be compared between two 1 bpp images
- *          by performing this transform and calculating the
- *          "earth-mover" distance on the resulting R,G,B histograms.
- * 
- */ -PIX * -pixLocToColorTransform(PIX *pixs) -{ -l_int32 w, h, w2, h2, wpls, wplr, wplg, wplb, wplcc, i, j, rval, gval, bval; -l_float32 invw2, invh2; -l_uint32 *datas, *datar, *datag, *datab, *datacc; -l_uint32 *lines, *liner, *lineg, *lineb, *linecc; -PIX *pix1, *pixcc, *pixr, *pixg, *pixb, *pixd; - - PROCNAME("pixLocToColorTransform"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - - /* Label each pixel with the area of the c.c. to which it belongs. - * Clip the result to 255 in an 8 bpp pix. This is used for - * the blue component of pixd. */ - pixGetDimensions(pixs, &w, &h, NULL); - w2 = w / 2; - h2 = h / 2; - invw2 = 255.0 / (l_float32)w2; - invh2 = 255.0 / (l_float32)h2; - pix1 = pixConnCompAreaTransform(pixs, 8); - pixcc = pixConvert32To8(pix1, L_LS_TWO_BYTES, L_CLIP_TO_FF); - pixDestroy(&pix1); - - /* Label the red and green components depending on the location - * of the fg pixels, in a way that is 4-fold rotationally invariant. */ - pixr = pixCreate(w, h, 8); - pixg = pixCreate(w, h, 8); - pixb = pixCreate(w, h, 8); - wpls = pixGetWpl(pixs); - wplr = pixGetWpl(pixr); - wplg = pixGetWpl(pixg); - wplb = pixGetWpl(pixb); - wplcc = pixGetWpl(pixcc); - datas = pixGetData(pixs); - datar = pixGetData(pixr); - datag = pixGetData(pixg); - datab = pixGetData(pixb); - datacc = pixGetData(pixcc); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - liner = datar + i * wplr; - lineg = datag + i * wplg; - lineb = datab + i * wplb; - linecc = datacc+ i * wplcc; - for (j = 0; j < w; j++) { - if (GET_DATA_BIT(lines, j) == 0) continue; - if (w < h) { - rval = invh2 * L_ABS((l_float32)(i - h2)); - gval = invw2 * L_ABS((l_float32)(j - w2)); - } else { - rval = invw2 * L_ABS((l_float32)(j - w2)); - gval = invh2 * L_ABS((l_float32)(i - h2)); - } - bval = GET_DATA_BYTE(linecc, j); - SET_DATA_BYTE(liner, j, rval); - SET_DATA_BYTE(lineg, j, gval); - SET_DATA_BYTE(lineb, j, bval); - } - } - pixd = pixCreateRGBImage(pixr, pixg, pixb); - - pixDestroy(&pixcc); - pixDestroy(&pixr); - pixDestroy(&pixg); - pixDestroy(&pixb); - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixtiling.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixtiling.c deleted file mode 100644 index 480a3d1d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pixtiling.c +++ /dev/null @@ -1,423 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pixtiling.c - *
- *
- *        PIXTILING       *pixTilingCreate()
- *        void            *pixTilingDestroy()
- *        l_int32          pixTilingGetCount()
- *        l_int32          pixTilingGetSize()
- *        PIX             *pixTilingGetTile()
- *        l_int32          pixTilingNoStripOnPaint()
- *        l_int32          pixTilingPaintTile()
- *
- *   This provides a simple way to split an image into tiles
- *   and to perform operations independently on each tile.
- *
- *   The tile created with pixTilingGetTile() can have pixels in
- *   adjacent tiles for computation.  The number of extra pixels
- *   on each side of the tile is given by an 'overlap' parameter
- *   to pixTilingCreate().  For tiles at the boundary of
- *   the input image, quasi-overlap pixels are created by reflection
- *   symmetry into the tile.
- *
- *   Here's a typical intended usage.  Suppose you want to parallelize
- *   the operation on an image, by operating on tiles.  For each
- *   tile, you want to generate an in-place image result at the same
- *   resolution.  Suppose you choose a one-dimensional vertical tiling,
- *   where the desired tile width is 256 pixels and the overlap is
- *   30 pixels on left and right sides:
- *
- *     PIX *pixd = pixCreateTemplate(pixs);  // output
- *     PIXTILING  *pt = pixTilingCreate(pixs, 0, 1, 256, 30, 0);
- *     pixTilingGetCount(pt, &nx, NULL);
- *     for (j = 0; j < nx; j++) {
- *         PIX *pixt = pixTilingGetTile(pt, 0, j);
- *         SomeInPlaceOperation(pixt, 30, 0, ...);
- *         pixTilingPaintTile(pixd, 0, j, pixt, pt);
- *         pixDestroy(&pixt);
- *     }
- *
- *   In this example, note the following:
- *    ~ The unspecfified in-place operation could instead generate
- *      a new pix.  If this is done, the resulting pix must be the
- *      same size as pixt, because pixTilingPaintTile() makes that
- *      assumption, removing the overlap pixels before painting
- *      into the destination.
- *    ~ The 'overlap' parameters have been included in your function,
- *      to indicate which pixels are not in the exterior overlap region.
- *      You will need to change only pixels that are not in the overlap
- *      region, because those are the pixels that will be painted
- *      into the destination.
- *    ~ For tiles on the outside of the image, mirrored pixels are
- *      added to substitute for the overlap that is added to interior
- *      tiles.  This allows you to implement your function without
- *      reference to which tile it is; no special coding is necessary
- *      for pixels that are near the image boundary.
- *    ~ The tiles are labeled by (i, j) = (row, column),
- *      and in this example there is one row and nx columns.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/*! - * \brief pixTilingCreate() - * - * \param[in] pixs pix to be tiled; any depth; colormap OK - * \param[in] nx number of tiles across image - * \param[in] ny number of tiles down image - * \param[in] w desired width of each tile - * \param[in] h desired height of each tile - * \param[in] xoverlap overlap into neighboring tiles on each side - * \param[in] yoverlap overlap into neighboring tiles above and below - * \return pixtiling, or NULL on error - * - *
- * Notes:
- *      (1) We put a clone of pixs in the PixTiling.
- *      (2) The input to pixTilingCreate() for horizontal tiling can be
- *          either the number of tiles across the image or the approximate
- *          width of the tiles.  If the latter, the actual width will be
- *          determined by making all tiles but the last of equal width, and
- *          making the last as close to the others as possible.  The same
- *          consideration is applied independently to the vertical tiling.
- *          To specify tile width, set nx = 0; to specify the number of
- *          tiles horizontally across the image, set w = 0.
- *      (3) If pixs is to be tiled in one-dimensional strips, use ny = 1 for
- *          vertical strips and nx = 1 for horizontal strips.
- *      (4) The overlap must not be larger than the width or height of
- *          the leftmost or topmost tile(s).
- * 
- */ -PIXTILING * -pixTilingCreate(PIX *pixs, - l_int32 nx, - l_int32 ny, - l_int32 w, - l_int32 h, - l_int32 xoverlap, - l_int32 yoverlap) -{ -l_int32 width, height; -PIXTILING *pt; - - PROCNAME("pixTilingCreate"); - - if (!pixs) - return (PIXTILING *)ERROR_PTR("pixs not defined", procName, NULL); - if (nx < 1 && w < 1) - return (PIXTILING *)ERROR_PTR("invalid width spec", procName, NULL); - if (ny < 1 && h < 1) - return (PIXTILING *)ERROR_PTR("invalid height spec", procName, NULL); - - /* Find the tile width and number of tiles. All tiles except the - * rightmost ones have the same width. The width of the - * rightmost ones are at least the width of the others and - * less than twice that width. Ditto for tile height. */ - pixGetDimensions(pixs, &width, &height, NULL); - if (nx == 0) - nx = L_MAX(1, width / w); - w = width / nx; /* possibly reset */ - if (ny == 0) - ny = L_MAX(1, height / h); - h = height / ny; /* possibly reset */ - if (xoverlap > w || yoverlap > h) { - L_INFO("tile width = %d, tile height = %d\n", procName, w, h); - return (PIXTILING *)ERROR_PTR("overlap too large", procName, NULL); - } - - pt = (PIXTILING *)LEPT_CALLOC(1, sizeof(PIXTILING)); - pt->pix = pixClone(pixs); - pt->xoverlap = xoverlap; - pt->yoverlap = yoverlap; - pt->nx = nx; - pt->ny = ny; - pt->w = w; - pt->h = h; - pt->strip = TRUE; - return pt; -} - - -/*! - * \brief pixTilingDestroy() - * - * \param[in,out] ppt will be set to null before returning - * \return void - */ -void -pixTilingDestroy(PIXTILING **ppt) -{ -PIXTILING *pt; - - PROCNAME("pixTilingDestroy"); - - if (ppt == NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - - if ((pt = *ppt) == NULL) - return; - - pixDestroy(&pt->pix); - LEPT_FREE(pt); - *ppt = NULL; - return; -} - - -/*! - * \brief pixTilingGetCount() - * - * \param[in] pt pixtiling - * \param[out] pnx [optional] nx; can be null - * \param[out] pny [optional] ny; can be null - * \return 0 if OK, 1 on error - */ -l_ok -pixTilingGetCount(PIXTILING *pt, - l_int32 *pnx, - l_int32 *pny) -{ - PROCNAME("pixTilingGetCount"); - - if (!pt) - return ERROR_INT("pt not defined", procName, 1); - if (pnx) *pnx = pt->nx; - if (pny) *pny = pt->ny; - return 0; -} - - -/*! - * \brief pixTilingGetSize() - * - * \param[in] pt pixtiling - * \param[out] pw [optional] tile width; can be null - * \param[out] ph [optional] tile height; can be null - * \return 0 if OK, 1 on error - */ -l_ok -pixTilingGetSize(PIXTILING *pt, - l_int32 *pw, - l_int32 *ph) -{ - PROCNAME("pixTilingGetSize"); - - if (!pt) - return ERROR_INT("pt not defined", procName, 1); - if (pw) *pw = pt->w; - if (ph) *ph = pt->h; - return 0; -} - - -/*! - * \brief pixTilingGetTile() - * - * \param[in] pt pixtiling - * \param[in] i tile row index - * \param[in] j tile column index - * \return pixd tile with appropriate boundary (overlap) pixels added, - * or NULL on error - */ -PIX * -pixTilingGetTile(PIXTILING *pt, - l_int32 i, - l_int32 j) -{ -l_int32 wpix, hpix, wt, ht, nx, ny; -l_int32 xoverlap, yoverlap, wtlast, htlast; -l_int32 left, top, xtraleft, xtraright, xtratop, xtrabot, width, height; -BOX *box; -PIX *pixs, *pixt, *pixd; - - PROCNAME("pixTilingGetTile"); - - if (!pt) - return (PIX *)ERROR_PTR("pt not defined", procName, NULL); - if ((pixs = pt->pix) == NULL) - return (PIX *)ERROR_PTR("pix not found", procName, NULL); - pixTilingGetCount(pt, &nx, &ny); - if (i < 0 || i >= ny) - return (PIX *)ERROR_PTR("invalid row index i", procName, NULL); - if (j < 0 || j >= nx) - return (PIX *)ERROR_PTR("invalid column index j", procName, NULL); - - /* Grab the tile with as much overlap as exists within the - * input pix. First, compute the (left, top) coordinates. */ - pixGetDimensions(pixs, &wpix, &hpix, NULL); - pixTilingGetSize(pt, &wt, &ht); - xoverlap = pt->xoverlap; - yoverlap = pt->yoverlap; - wtlast = wpix - wt * (nx - 1); - htlast = hpix - ht * (ny - 1); - left = L_MAX(0, j * wt - xoverlap); - top = L_MAX(0, i * ht - yoverlap); - - /* Get the width and height of the tile, including whatever - * overlap is available. */ - if (nx == 1) - width = wpix; - else if (j == 0) - width = wt + xoverlap; - else if (j == nx - 1) - width = wtlast + xoverlap; - else - width = wt + 2 * xoverlap; - - if (ny == 1) - height = hpix; - else if (i == 0) - height = ht + yoverlap; - else if (i == ny - 1) - height = htlast + yoverlap; - else - height = ht + 2 * yoverlap; - box = boxCreate(left, top, width, height); - pixt = pixClipRectangle(pixs, box, NULL); - boxDestroy(&box); - - /* If no overlap, do not add any special case borders */ - if (xoverlap == 0 && yoverlap == 0) - return pixt; - - /* Add overlap as a mirrored border, in the 8 special cases where - * the tile touches the border of the input pix. The xtratop (etc) - * parameters are required where the tile is either full width - * or full height. */ - xtratop = xtrabot = xtraleft = xtraright = 0; - if (nx == 1) - xtraleft = xtraright = xoverlap; - if (ny == 1) - xtratop = xtrabot = yoverlap; - if (i == 0 && j == 0) - pixd = pixAddMirroredBorder(pixt, xoverlap, xtraright, - yoverlap, xtrabot); - else if (i == 0 && j == nx - 1) - pixd = pixAddMirroredBorder(pixt, xtraleft, xoverlap, - yoverlap, xtrabot); - else if (i == ny - 1 && j == 0) - pixd = pixAddMirroredBorder(pixt, xoverlap, xtraright, - xtratop, yoverlap); - else if (i == ny - 1 && j == nx - 1) - pixd = pixAddMirroredBorder(pixt, xtraleft, xoverlap, - xtratop, yoverlap); - else if (i == 0) - pixd = pixAddMirroredBorder(pixt, 0, 0, yoverlap, xtrabot); - else if (i == ny - 1) - pixd = pixAddMirroredBorder(pixt, 0, 0, xtratop, yoverlap); - else if (j == 0) - pixd = pixAddMirroredBorder(pixt, xoverlap, xtraright, 0, 0); - else if (j == nx - 1) - pixd = pixAddMirroredBorder(pixt, xtraleft, xoverlap, 0, 0); - else - pixd = pixClone(pixt); - pixDestroy(&pixt); - - return pixd; -} - - -/*! - * \brief pixTilingNoStripOnPaint() - * - * \param[in] pt pixtiling - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The default for paint is to strip out the overlap pixels
- *          that are added by pixTilingGetTile().  However, some
- *          operations will generate an image with these pixels
- *          stripped off.  This tells the paint operation not
- *          to strip the added boundary pixels when painting.
- * 
- */ -l_ok -pixTilingNoStripOnPaint(PIXTILING *pt) -{ - PROCNAME("pixTilingNoStripOnPaint"); - - if (!pt) - return ERROR_INT("pt not defined", procName, 1); - pt->strip = FALSE; - return 0; -} - - -/*! - * \brief pixTilingPaintTile() - * - * \param[in] pixd dest: paint tile onto this, without overlap - * \param[in] i tile row index - * \param[in] j tile column index - * \param[in] pixs source: tile to be painted from - * \param[in] pt pixtiling struct - * \return 0 if OK, 1 on error - */ -l_ok -pixTilingPaintTile(PIX *pixd, - l_int32 i, - l_int32 j, - PIX *pixs, - PIXTILING *pt) -{ -l_int32 w, h; - - PROCNAME("pixTilingPaintTile"); - - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pt) - return ERROR_INT("pt not defined", procName, 1); - if (i < 0 || i >= pt->ny) - return ERROR_INT("invalid row index i", procName, 1); - if (j < 0 || j >= pt->nx) - return ERROR_INT("invalid column index j", procName, 1); - - /* Strip added border pixels off if requested */ - pixGetDimensions(pixs, &w, &h, NULL); - if (pt->strip == TRUE) { - pixRasterop(pixd, j * pt->w, i * pt->h, - w - 2 * pt->xoverlap, h - 2 * pt->yoverlap, PIX_SRC, - pixs, pt->xoverlap, pt->yoverlap); - } else { - pixRasterop(pixd, j * pt->w, i * pt->h, w, h, PIX_SRC, pixs, 0, 0); - } - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pngio.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pngio.c deleted file mode 100644 index ff602a29..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pngio.c +++ /dev/null @@ -1,2117 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - Copyright (C) 2017 Milner Technologies, Inc. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pngio.c - *
- *
- *    Reading png through stream
- *          PIX        *pixReadStreamPng()
- *
- *    Reading png header
- *          l_int32     readHeaderPng()
- *          l_int32     freadHeaderPng()
- *          l_int32     readHeaderMemPng()
- *
- *    Reading png metadata
- *          l_int32     fgetPngResolution()
- *          l_int32     isPngInterlaced()
- *          l_int32     fgetPngColormapInfo()
- *
- *    Writing png through stream
- *          l_int32     pixWritePng()  [ special top level ]
- *          l_int32     pixWriteStreamPng()
- *          l_int32     pixSetZlibCompression()
- *
- *    Set flag for special read mode
- *          void        l_pngSetReadStrip16To8()
- *
- *    Low-level memio utility (thanks to T. D. Hintz)
- *          static void memio_png_write_data()
- *          static void memio_png_flush()
- *          static void memio_png_read_data()
- *          static void memio_free()
- *
- *    Reading png from memory
- *          PIX        *pixReadMemPng()
- *
- *    Writing png to memory
- *          l_int32     pixWriteMemPng()
- *
- *    Documentation: libpng.txt and example.c
- *
- *    On input (decompression from file), palette color images
- *    are read into an 8 bpp Pix with a colormap, and 24 bpp
- *    3 component color images are read into a 32 bpp Pix with
- *    rgb samples.  On output (compression to file), palette color
- *    images are written as 8 bpp with the colormap, and 32 bpp
- *    full color images are written compressed as a 24 bpp,
- *    3 component color image.
- *
- *    In the following, we use these abbreviations:
- *       bps == bit/sample
- *       spp == samples/pixel
- *       bpp == bits/pixel of image in Pix (memory)
- *    where each component is referred to as a "sample".
- *
- *    For reading and writing rgb and rgba images, we read and write
- *    alpha if it exists (spp == 4) and do not read or write if
- *    it doesn't (spp == 3).  The alpha component can be 'removed'
- *    simply by setting spp to 3.  In leptonica, we make relatively
- *    little explicit use of the alpha sample.  Note that the alpha
- *    sample in the image is also called "alpha transparency",
- *    "alpha component" and "alpha layer."
- *
- *    To change the zlib compression level, use pixSetZlibCompression()
- *    before writing the file.  The default is for standard png compression.
- *    The zlib compression value can be set [0 ... 9], with
- *         0     no compression (huge files)
- *         1     fastest compression
- *         -1    default compression  (equivalent to 6 in latest version)
- *         9     best compression
- *    Note that if you are using the defined constants in zlib instead
- *    of the compression integers given above, you must include zlib.h.
- *
- *    There is global for determining the size of retained samples:
- *             var_PNG_STRIP_16_to_8
- *    and a function l_pngSetReadStrip16To8() for setting it.
- *    The default is TRUE, which causes pixRead() to strip each 16 bit
- *    sample down to 8 bps:
- *     ~ For 16 bps rgb (16 bps, 3 spp) --> 32 bpp rgb Pix
- *     ~ For 16 bps gray (16 bps, 1 spp) --> 8 bpp grayscale Pix
- *    If the variable is set to FALSE, the 16 bit gray samples
- *    are saved when read; the 16 bit rgb samples return an error.
- *    Note: results can be non-deterministic if used with
- *    multi-threaded applications.
- *
- *    Thanks to a memory buffering utility contributed by T. D. Hintz,
- *    encoding png directly into memory (and decoding from memory)
- *    is now enabled without the use of any temp files.  Unlike with webp,
- *    it is necessary to preserve the stream interface to enable writing
- *    pixa to memory.  So there are two independent but very similar
- *    implementations of png reading and writing.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/* --------------------------------------------*/ -#if HAVE_LIBPNG /* defined in environ.h */ -/* --------------------------------------------*/ - -#include "png.h" - -#if HAVE_LIBZ -#include "zlib.h" -#else -#define Z_DEFAULT_COMPRESSION (-1) -#endif /* HAVE_LIBZ */ - -/* ------------------ Set default for read option -------------------- */ - /* Strip 16 bpp --> 8 bpp on reading png; default is for stripping. - * If you don't strip, you can't read the gray-alpha spp = 2 images. */ -static l_int32 var_PNG_STRIP_16_TO_8 = 1; - -#ifndef NO_CONSOLE_IO -#define DEBUG_READ 0 -#define DEBUG_WRITE 0 -#endif /* ~NO_CONSOLE_IO */ - - -/*---------------------------------------------------------------------* - * Reading png through stream * - *---------------------------------------------------------------------*/ -/*! - * \brief pixReadStreamPng() - * - * \param[in] fp file stream - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) If called from pixReadStream(), the stream is positioned
- *          at the beginning of the file.
- *      (2) To do sequential reads of png format images from a stream,
- *          use pixReadStreamPng()
- *      (3) Any image with alpha is converted to RGBA (spp = 4, with
- *          equal red, green and blue channels) on reading.
- *          There are three important cases with alpha:
- *          (a) grayscale-with-alpha (spp = 2), where bpp = 8, and each
- *              pixel has an associated alpha (transparency) value
- *              in the second component of the image data.
- *          (b) spp = 1, d = 1 with colormap and alpha in the trans array.
- *              Transparency is usually associated with the white background.
- *          (c) spp = 1, d = 8 with colormap and alpha in the trans array.
- *              Each color in the colormap has a separate transparency value.
- *      (4) We use the high level png interface, where the transforms are set
- *          up in advance and the header and image are read with a single
- *          call.  The more complicated interface, where the header is
- *          read first and the buffers for the raster image are user-
- *          allocated before reading the image, works for single images,
- *          but I could not get it to work properly for the successive
- *          png reads that are required by pixaReadStream().
- * 
- */ -PIX * -pixReadStreamPng(FILE *fp) -{ -l_uint8 byte; -l_int32 rval, gval, bval; -l_int32 i, j, k, index, ncolors, bitval; -l_int32 wpl, d, spp, cindex, tRNS; -l_uint32 png_transforms; -l_uint32 *data, *line, *ppixel; -int num_palette, num_text, num_trans; -png_byte bit_depth, color_type, channels; -png_uint_32 w, h, rowbytes; -png_uint_32 xres, yres; -png_bytep rowptr, trans; -png_bytep *row_pointers; -png_structp png_ptr; -png_infop info_ptr, end_info; -png_colorp palette; -png_textp text_ptr; /* ptr to text_chunk */ -PIX *pix, *pix1; -PIXCMAP *cmap; - - PROCNAME("pixReadStreamPng"); - - if (!fp) - return (PIX *)ERROR_PTR("fp not defined", procName, NULL); - pix = NULL; - - /* Allocate the 3 data structures */ - if ((png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, - (png_voidp)NULL, NULL, NULL)) == NULL) - return (PIX *)ERROR_PTR("png_ptr not made", procName, NULL); - - if ((info_ptr = png_create_info_struct(png_ptr)) == NULL) { - png_destroy_read_struct(&png_ptr, (png_infopp)NULL, (png_infopp)NULL); - return (PIX *)ERROR_PTR("info_ptr not made", procName, NULL); - } - - if ((end_info = png_create_info_struct(png_ptr)) == NULL) { - png_destroy_read_struct(&png_ptr, &info_ptr, (png_infopp)NULL); - return (PIX *)ERROR_PTR("end_info not made", procName, NULL); - } - - /* Set up png setjmp error handling */ - if (setjmp(png_jmpbuf(png_ptr))) { - png_destroy_read_struct(&png_ptr, &info_ptr, &end_info); - return (PIX *)ERROR_PTR("internal png error", procName, NULL); - } - - png_init_io(png_ptr, fp); - - /* ---------------------------------------------------------- * - * - Set the transforms flags. Whatever happens here, - * NEVER invert 1 bpp using PNG_TRANSFORM_INVERT_MONO. - * - Do not use PNG_TRANSFORM_EXPAND, which would - * expand all images with bpp < 8 to 8 bpp. - * - Strip 16 --> 8 if reading 16-bit gray+alpha - * ---------------------------------------------------------- */ - /* To strip 16 --> 8 bit depth, use PNG_TRANSFORM_STRIP_16 */ - if (var_PNG_STRIP_16_TO_8 == 1) { /* our default */ - png_transforms = PNG_TRANSFORM_STRIP_16; - } else { - png_transforms = PNG_TRANSFORM_IDENTITY; - L_INFO("not stripping 16 --> 8 in png reading\n", procName); - } - - /* Read it */ - png_read_png(png_ptr, info_ptr, png_transforms, NULL); - - row_pointers = png_get_rows(png_ptr, info_ptr); - w = png_get_image_width(png_ptr, info_ptr); - h = png_get_image_height(png_ptr, info_ptr); - bit_depth = png_get_bit_depth(png_ptr, info_ptr); - rowbytes = png_get_rowbytes(png_ptr, info_ptr); - color_type = png_get_color_type(png_ptr, info_ptr); - channels = png_get_channels(png_ptr, info_ptr); - spp = channels; - tRNS = png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS) ? 1 : 0; - - if (spp == 1) { - d = bit_depth; - } else { /* spp == 2 (gray + alpha), spp == 3 (rgb), spp == 4 (rgba) */ - d = 4 * bit_depth; - } - - /* Remove if/when this is implemented for all bit_depths */ - if (spp != 1 && bit_depth != 8) { - L_ERROR("spp = %d and bps = %d != 8\n" - "turn on 16 --> 8 stripping\n", procName, spp, bit_depth); - png_destroy_read_struct(&png_ptr, &info_ptr, &end_info); - return (PIX *)ERROR_PTR("not implemented for this image", - procName, NULL); - } - - cmap = NULL; - if (color_type == PNG_COLOR_TYPE_PALETTE || - color_type == PNG_COLOR_MASK_PALETTE) { /* generate a colormap */ - png_get_PLTE(png_ptr, info_ptr, &palette, &num_palette); - cmap = pixcmapCreate(d); /* spp == 1 */ - for (cindex = 0; cindex < num_palette; cindex++) { - rval = palette[cindex].red; - gval = palette[cindex].green; - bval = palette[cindex].blue; - pixcmapAddColor(cmap, rval, gval, bval); - } - } - - if ((pix = pixCreate(w, h, d)) == NULL) { - pixcmapDestroy(&cmap); - png_destroy_read_struct(&png_ptr, &info_ptr, &end_info); - return (PIX *)ERROR_PTR("pix not made", procName, NULL); - } - pixSetInputFormat(pix, IFF_PNG); - wpl = pixGetWpl(pix); - data = pixGetData(pix); - pixSetColormap(pix, cmap); - pixSetSpp(pix, spp); - - if (spp == 1 && !tRNS) { /* copy straight from buffer to pix */ - for (i = 0; i < h; i++) { - line = data + i * wpl; - rowptr = row_pointers[i]; - for (j = 0; j < rowbytes; j++) { - SET_DATA_BYTE(line, j, rowptr[j]); - } - } - } else if (spp == 2) { /* grayscale + alpha; convert to RGBA */ - L_INFO("converting (gray + alpha) ==> RGBA\n", procName); - for (i = 0; i < h; i++) { - ppixel = data + i * wpl; - rowptr = row_pointers[i]; - for (j = k = 0; j < w; j++) { - /* Copy gray value into r, g and b */ - SET_DATA_BYTE(ppixel, COLOR_RED, rowptr[k]); - SET_DATA_BYTE(ppixel, COLOR_GREEN, rowptr[k]); - SET_DATA_BYTE(ppixel, COLOR_BLUE, rowptr[k++]); - SET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL, rowptr[k++]); - ppixel++; - } - } - pixSetSpp(pix, 4); /* we do not support 2 spp pix */ - } else if (spp == 3 || spp == 4) { - for (i = 0; i < h; i++) { - ppixel = data + i * wpl; - rowptr = row_pointers[i]; - for (j = k = 0; j < w; j++) { - SET_DATA_BYTE(ppixel, COLOR_RED, rowptr[k++]); - SET_DATA_BYTE(ppixel, COLOR_GREEN, rowptr[k++]); - SET_DATA_BYTE(ppixel, COLOR_BLUE, rowptr[k++]); - if (spp == 3) /* set to opaque; some readers are buggy */ - SET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL, 255); - else /* spp == 4 */ - SET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL, rowptr[k++]); - ppixel++; - } - } - } - - /* Special spp == 1 cases with transparency: - * (1) 8 bpp without colormap; assume full transparency - * (2) 1 bpp with colormap + trans array (for alpha) - * (3) 8 bpp with colormap + trans array (for alpha) - * These all require converting to RGBA */ - if (spp == 1 && tRNS) { - if (!cmap) { - /* Case 1: make fully transparent RGBA image */ - L_INFO("transparency, 1 spp, no colormap, no transparency array: " - "convention is fully transparent image\n", procName); - L_INFO("converting (fully transparent 1 spp) ==> RGBA\n", procName); - pixDestroy(&pix); - pix = pixCreate(w, h, 32); /* init to alpha = 0 (transparent) */ - pixSetSpp(pix, 4); - } else { - L_INFO("converting (cmap + alpha) ==> RGBA\n", procName); - - /* Grab the transparency array */ - png_get_tRNS(png_ptr, info_ptr, &trans, &num_trans, NULL); - if (!trans) { /* invalid png file */ - pixDestroy(&pix); - png_destroy_read_struct(&png_ptr, &info_ptr, &end_info); - return (PIX *)ERROR_PTR("cmap, tRNS, but no transparency array", - procName, NULL); - } - - /* Save the cmap and destroy the pix */ - cmap = pixcmapCopy(pixGetColormap(pix)); - ncolors = pixcmapGetCount(cmap); - pixDestroy(&pix); - - /* Start over with 32 bit RGBA */ - pix = pixCreate(w, h, 32); - wpl = pixGetWpl(pix); - data = pixGetData(pix); - pixSetSpp(pix, 4); - -#if DEBUG_READ - lept_stderr("ncolors = %d, num_trans = %d\n", - ncolors, num_trans); - for (i = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - if (i < num_trans) { - lept_stderr("(r,g,b,a) = (%d,%d,%d,%d)\n", - rval, gval, bval, trans[i]); - } else { - lept_stderr("(r,g,b,a) = (%d,%d,%d,<<255>>)\n", - rval, gval, bval); - } - } -#endif /* DEBUG_READ */ - - /* Extract the data and convert to RGBA */ - if (d == 1) { - /* Case 2: 1 bpp with transparency (usually) behind white */ - L_INFO("converting 1 bpp cmap with alpha ==> RGBA\n", procName); - if (num_trans == 1) - L_INFO("num_trans = 1; second color opaque by default\n", - procName); - for (i = 0; i < h; i++) { - ppixel = data + i * wpl; - rowptr = row_pointers[i]; - for (j = 0, index = 0; j < rowbytes; j++) { - byte = rowptr[j]; - for (k = 0; k < 8 && index < w; k++, index++) { - bitval = (byte >> (7 - k)) & 1; - pixcmapGetColor(cmap, bitval, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, ppixel); - SET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL, - bitval < num_trans ? trans[bitval] : 255); - ppixel++; - } - } - } - } else if (d == 8) { - /* Case 3: 8 bpp with cmap and associated transparency */ - L_INFO("converting 8 bpp cmap with alpha ==> RGBA\n", procName); - for (i = 0; i < h; i++) { - ppixel = data + i * wpl; - rowptr = row_pointers[i]; - for (j = 0; j < w; j++) { - index = rowptr[j]; - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, ppixel); - /* Assume missing entries to be 255 (opaque) - * according to the spec: - * http://www.w3.org/TR/PNG/#11tRNS */ - SET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL, - index < num_trans ? trans[index] : 255); - ppixel++; - } - } - } else { - L_ERROR("spp == 1, cmap, trans array, invalid depth: %d\n", - procName, d); - } - pixcmapDestroy(&cmap); - } - } - -#if DEBUG_READ - if (cmap) { - for (i = 0; i < 16; i++) { - lept_stderr("[%d] = %d\n", i, ((l_uint8 *)(cmap->array))[i]); - } - } -#endif /* DEBUG_READ */ - - /* Final adjustments for bpp = 1. - * + If there is no colormap, the image must be inverted because - * png stores black pixels as 0. - * + We have already handled the case of cmapped, 1 bpp pix - * with transparency, where the output pix is 32 bpp RGBA. - * If there is no transparency but the pix has a colormap, - * we remove the colormap, because functions operating on - * 1 bpp images in leptonica assume no colormap. - * + The colormap must be removed in such a way that the pixel - * values are not changed. If the values are only black and - * white, we return a 1 bpp image; if gray, return an 8 bpp pix; - * otherwise, return a 32 bpp rgb pix. - * - * Note that we cannot use the PNG_TRANSFORM_INVERT_MONO flag - * to do the inversion, because that flag (since version 1.0.9) - * inverts 8 bpp grayscale as well, which we don't want to do. - * (It also doesn't work if there is a colormap.) - * - * Note that if the input png is a 1-bit with colormap and - * transparency, it has already been rendered as a 32 bpp, - * spp = 4 rgba pix. - */ - if (pixGetDepth(pix) == 1) { - if (!cmap) { - pixInvert(pix, pix); - } else { - L_INFO("removing opaque cmap from 1 bpp\n", procName); - pix1 = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC); - pixDestroy(&pix); - pix = pix1; - } - } - - xres = png_get_x_pixels_per_meter(png_ptr, info_ptr); - yres = png_get_y_pixels_per_meter(png_ptr, info_ptr); - pixSetXRes(pix, (l_int32)((l_float32)xres / 39.37 + 0.5)); /* to ppi */ - pixSetYRes(pix, (l_int32)((l_float32)yres / 39.37 + 0.5)); /* to ppi */ - - /* Get the text if there is any */ - png_get_text(png_ptr, info_ptr, &text_ptr, &num_text); - if (num_text && text_ptr) - pixSetText(pix, text_ptr->text); - - png_destroy_read_struct(&png_ptr, &info_ptr, &end_info); - return pix; -} - - -/*---------------------------------------------------------------------* - * Reading png header * - *---------------------------------------------------------------------*/ -/*! - * \brief readHeaderPng() - * - * \param[in] filename - * \param[out] pw [optional] - * \param[out] ph [optional] - * \param[out] pbps [optional] bits/sample - * \param[out] pspp [optional] samples/pixel - * \param[out] piscmap [optional] - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If there is a colormap, iscmap is returned as 1; else 0.
- *      (2) For gray+alpha, although the png records bps = 16, we
- *          consider this as two 8 bpp samples (gray and alpha).
- *          When a gray+alpha is read, it is converted to 32 bpp RGBA.
- * 
- */ -l_ok -readHeaderPng(const char *filename, - l_int32 *pw, - l_int32 *ph, - l_int32 *pbps, - l_int32 *pspp, - l_int32 *piscmap) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("readHeaderPng"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pbps) *pbps = 0; - if (pspp) *pspp = 0; - if (piscmap) *piscmap = 0; - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if ((fp = fopenReadStream(filename)) == NULL) - return ERROR_INT("image file not found", procName, 1); - ret = freadHeaderPng(fp, pw, ph, pbps, pspp, piscmap); - fclose(fp); - return ret; -} - - -/*! - * \brief freadHeaderPng() - * - * \param[in] fp file stream - * \param[out] pw [optional] - * \param[out] ph [optional] - * \param[out] pbps [optional] bits/sample - * \param[out] pspp [optional] samples/pixel - * \param[out] piscmap [optional] - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See readHeaderPng().  We only need the first 40 bytes in the file.
- * 
- */ -l_ok -freadHeaderPng(FILE *fp, - l_int32 *pw, - l_int32 *ph, - l_int32 *pbps, - l_int32 *pspp, - l_int32 *piscmap) -{ -l_int32 nbytes, ret; -l_uint8 data[40]; - - PROCNAME("freadHeaderPng"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pbps) *pbps = 0; - if (pspp) *pspp = 0; - if (piscmap) *piscmap = 0; - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - - nbytes = fnbytesInFile(fp); - if (nbytes < 40) - return ERROR_INT("file too small to be png", procName, 1); - if (fread(data, 1, 40, fp) != 40) - return ERROR_INT("error reading data", procName, 1); - ret = readHeaderMemPng(data, 40, pw, ph, pbps, pspp, piscmap); - return ret; -} - - -/*! - * \brief readHeaderMemPng() - * - * \param[in] data - * \param[in] size 40 bytes is sufficient - * \param[out] pw [optional] - * \param[out] ph [optional] - * \param[out] pbps [optional] bits/sample - * \param[out] pspp [optional] samples/pixel - * \param[out] piscmap [optional] input NULL to ignore - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See readHeaderPng().
- *      (2) png colortypes (see png.h: PNG_COLOR_TYPE_*):
- *          0:  gray; fully transparent (with tRNS) (1 spp)
- *          2:  RGB (3 spp)
- *          3:  colormap; colormap+alpha (with tRNS) (1 spp)
- *          4:  gray + alpha (2 spp)
- *          6:  RGBA (4 spp)
- *          Note:
- *            0 and 3 have the alpha information in a tRNS chunk
- *            4 and 6 have separate alpha samples with each pixel.
- * 
- */ -l_ok -readHeaderMemPng(const l_uint8 *data, - size_t size, - l_int32 *pw, - l_int32 *ph, - l_int32 *pbps, - l_int32 *pspp, - l_int32 *piscmap) -{ -l_uint16 twobytes; -l_uint16 *pshort; -l_int32 colortype, w, h, bps, spp; -l_uint32 *pword; - - PROCNAME("readHeaderMemPng"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pbps) *pbps = 0; - if (pspp) *pspp = 0; - if (piscmap) *piscmap = 0; - if (!data) - return ERROR_INT("data not defined", procName, 1); - if (size < 40) - return ERROR_INT("size < 40", procName, 1); - - /* Check password */ - if (data[0] != 137 || data[1] != 80 || data[2] != 78 || - data[3] != 71 || data[4] != 13 || data[5] != 10 || - data[6] != 26 || data[7] != 10) - return ERROR_INT("not a valid png file", procName, 1); - - pword = (l_uint32 *)data; - pshort = (l_uint16 *)data; - w = convertOnLittleEnd32(pword[4]); - h = convertOnLittleEnd32(pword[5]); - if (w < 1 || h < 1) - return ERROR_INT("invalid w or h", procName, 1); - twobytes = convertOnLittleEnd16(pshort[12]); /* contains depth/sample */ - /* and the color type */ - colortype = twobytes & 0xff; /* color type */ - bps = twobytes >> 8; /* bits/sample */ - - /* Special case with alpha that is extracted as RGBA. - * Note that the cmap+alpha is also extracted as RGBA, - * but only if the tRNS chunk exists, which we can't tell - * by this simple parser.*/ - if (colortype == 4) - L_INFO("gray + alpha: will extract as RGBA (spp = 4)\n", procName); - - if (colortype == 2) { /* RGB */ - spp = 3; - } else if (colortype == 6) { /* RGBA */ - spp = 4; - } else if (colortype == 4) { /* gray + alpha */ - spp = 2; - bps = 8; /* both the gray and alpha are 8-bit samples */ - } else { /* gray (0) or cmap (3) or cmap+alpha (3) */ - spp = 1; - } - if (bps < 1 || bps > 16) { - L_ERROR("invalid bps = %d\n", procName, bps); - return 1; - } - if (pw) *pw = w; - if (ph) *ph = h; - if (pbps) *pbps = bps; - if (pspp) *pspp = spp; - if (piscmap) { - if (colortype & 1) /* palette */ - *piscmap = 1; - else - *piscmap = 0; - } - - return 0; -} - - -/*---------------------------------------------------------------------* - * Reading png metadata * - *---------------------------------------------------------------------*/ -/* - * fgetPngResolution() - * - * Input: fp (file stream opened for read) - * &xres, &yres ( resolution in ppi) - * Return: 0 if OK; 1 on error - * - * Notes: - * (1) If neither resolution field is set, this is not an error; - * the returned resolution values are 0 (designating 'unknown'). - * (2) Side-effect: this rewinds the stream. - */ -l_int32 -fgetPngResolution(FILE *fp, - l_int32 *pxres, - l_int32 *pyres) -{ -png_uint_32 xres, yres; -png_structp png_ptr; -png_infop info_ptr; - - PROCNAME("fgetPngResolution"); - - if (pxres) *pxres = 0; - if (pyres) *pyres = 0; - if (!fp) - return ERROR_INT("stream not opened", procName, 1); - if (!pxres || !pyres) - return ERROR_INT("&xres and &yres not both defined", procName, 1); - - /* Make the two required structs */ - if ((png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, - (png_voidp)NULL, NULL, NULL)) == NULL) - return ERROR_INT("png_ptr not made", procName, 1); - if ((info_ptr = png_create_info_struct(png_ptr)) == NULL) { - png_destroy_read_struct(&png_ptr, (png_infopp)NULL, (png_infopp)NULL); - return ERROR_INT("info_ptr not made", procName, 1); - } - - /* Set up png setjmp error handling. - * Without this, an error calls exit. */ - if (setjmp(png_jmpbuf(png_ptr))) { - png_destroy_read_struct(&png_ptr, &info_ptr, (png_infopp)NULL); - return ERROR_INT("internal png error", procName, 1); - } - - /* Read the metadata */ - rewind(fp); - png_init_io(png_ptr, fp); - png_read_info(png_ptr, info_ptr); - - xres = png_get_x_pixels_per_meter(png_ptr, info_ptr); - yres = png_get_y_pixels_per_meter(png_ptr, info_ptr); - *pxres = (l_int32)((l_float32)xres / 39.37 + 0.5); /* to ppi */ - *pyres = (l_int32)((l_float32)yres / 39.37 + 0.5); - - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); - rewind(fp); - return 0; -} - - -/*! - * \brief isPngInterlaced() - * - * \param[in] filename - * \param[out] pinterlaced 1 if interlaced png; 0 otherwise - * \return 0 if OK, 1 on error - */ -l_ok -isPngInterlaced(const char *filename, - l_int32 *pinterlaced) -{ -l_uint8 buf[32]; -FILE *fp; - - PROCNAME("isPngInterlaced"); - - if (!pinterlaced) - return ERROR_INT("&interlaced not defined", procName, 1); - *pinterlaced = 0; - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - - if ((fp = fopenReadStream(filename)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - if (fread(buf, 1, 32, fp) != 32) { - fclose(fp); - return ERROR_INT("data not read", procName, 1); - } - fclose(fp); - - *pinterlaced = (buf[28] == 0) ? 0 : 1; - return 0; -} - - -/* - * \brief fgetPngColormapInfo() - * - * \param[in] fp file stream opened for read - * \param[out] pcmap optional; use NULL to skip - * \param[out] ptransparency optional; 1 if colormapped with - * transparency, 0 otherwise; use NULL to skip - * \return 0 if OK, 1 on error - * - * Notes: - * (1) The transparency information in a png is in the tRNA array, - * which is separate from the colormap. If this array exists - * and if any element is less than 255, there exists some - * transparency. - * (2) Side-effect: this rewinds the stream. - */ -l_ok -fgetPngColormapInfo(FILE *fp, - PIXCMAP **pcmap, - l_int32 *ptransparency) -{ -l_int32 i, cindex, rval, gval, bval, num_palette, num_trans; -png_byte bit_depth, color_type; -png_bytep trans; -png_colorp palette; -png_structp png_ptr; -png_infop info_ptr; - - PROCNAME("fgetPngColormapInfo"); - - if (pcmap) *pcmap = NULL; - if (ptransparency) *ptransparency = 0; - if (!pcmap && !ptransparency) - return ERROR_INT("no output defined", procName, 1); - if (!fp) - return ERROR_INT("stream not opened", procName, 1); - - /* Make the two required structs */ - if ((png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, - (png_voidp)NULL, NULL, NULL)) == NULL) - return ERROR_INT("png_ptr not made", procName, 1); - if ((info_ptr = png_create_info_struct(png_ptr)) == NULL) { - png_destroy_read_struct(&png_ptr, (png_infopp)NULL, (png_infopp)NULL); - return ERROR_INT("info_ptr not made", procName, 1); - } - - /* Set up png setjmp error handling. - * Without this, an error calls exit. */ - if (setjmp(png_jmpbuf(png_ptr))) { - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); - if (pcmap && *pcmap) pixcmapDestroy(pcmap); - return ERROR_INT("internal png error", procName, 1); - } - - /* Read the metadata and check if there is a colormap */ - rewind(fp); - png_init_io(png_ptr, fp); - png_read_info(png_ptr, info_ptr); - color_type = png_get_color_type(png_ptr, info_ptr); - if (color_type != PNG_COLOR_TYPE_PALETTE && - color_type != PNG_COLOR_MASK_PALETTE) { - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); - return 0; - } - - /* Optionally, read the colormap */ - if (pcmap) { - bit_depth = png_get_bit_depth(png_ptr, info_ptr); - png_get_PLTE(png_ptr, info_ptr, &palette, &num_palette); - *pcmap = pixcmapCreate(bit_depth); /* spp == 1 */ - for (cindex = 0; cindex < num_palette; cindex++) { - rval = palette[cindex].red; - gval = palette[cindex].green; - bval = palette[cindex].blue; - pixcmapAddColor(*pcmap, rval, gval, bval); - } - } - - /* Optionally, look for transparency. Note that the colormap - * has been initialized to fully opaque. */ - if (ptransparency && png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)) { - png_get_tRNS(png_ptr, info_ptr, &trans, &num_trans, NULL); - if (trans) { - for (i = 0; i < num_trans; i++) { - if (trans[i] < 255) { /* not fully opaque */ - *ptransparency = 1; - if (pcmap) pixcmapSetAlpha(*pcmap, i, trans[i]); - } - } - } else { - L_ERROR("transparency array not returned\n", procName); - } - } - - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); - rewind(fp); - return 0; -} - - -/*---------------------------------------------------------------------* - * Writing png through stream * - *---------------------------------------------------------------------*/ -/*! - * \brief pixWritePng() - * - * \param[in] filename - * \param[in] pix - * \param[in] gamma - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Special version for writing png with a specified gamma.
- *          When using pixWrite(), no field is given for gamma.
- * 
- */ -l_ok -pixWritePng(const char *filename, - PIX *pix, - l_float32 gamma) -{ -FILE *fp; - - PROCNAME("pixWritePng"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "wb+")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - - if (pixWriteStreamPng(fp, pix, gamma)) { - fclose(fp); - return ERROR_INT("pix not written to stream", procName, 1); - } - - fclose(fp); - return 0; -} - - -/*! - * \brief pixWriteStreamPng() - * - * \param[in] fp file stream - * \param[in] pix - * \param[in] gamma use 0.0 if gamma is not defined - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) If called from pixWriteStream(), the stream is positioned
- *          at the beginning of the file.
- *      (2) To do sequential writes of png format images to a stream,
- *          use pixWriteStreamPng() directly.
- *      (3) gamma is an optional png chunk.  If no gamma value is to be
- *          placed into the file, use gamma = 0.0.  Otherwise, if
- *          gamma > 0.0, its value is written into the header.
- *      (4) The use of gamma in png is highly problematic.  For an illuminating
- *          discussion, see:  http://hsivonen.iki.fi/png-gamma/
- *      (5) What is the effect/meaning of gamma in the png file?  This
- *          gamma, which we can call the 'source' gamma, is the
- *          inverse of the gamma that was used in enhance.c to brighten
- *          or darken images.  The 'source' gamma is supposed to indicate
- *          the intensity mapping that was done at the time the
- *          image was captured.  Display programs typically apply a
- *          'display' gamma of 2.2 to the output, which is intended
- *          to linearize the intensity based on the response of
- *          thermionic tubes (CRTs).  Flat panel LCDs have typically
- *          been designed to give a similar response as CRTs (call it
- *          "backward compatibility").  The 'display' gamma is
- *          in some sense the inverse of the 'source' gamma.
- *          jpeg encoders attached to scanners and cameras will lighten
- *          the pixels, applying a gamma corresponding to approximately
- *          a square-root relation of output vs input:
- *                output = input^(gamma)
- *          where gamma is often set near 0.4545  (1/gamma is 2.2).
- *          This is stored in the image file.  Then if the display
- *          program reads the gamma, it will apply a display gamma,
- *          typically about 2.2; the product is 1.0, and the
- *          display program produces a linear output.  This works because
- *          the dark colors were appropriately boosted by the scanner,
- *          as described by the 'source' gamma, so they should not
- *          be further boosted by the display program.
- *      (6) As an example, with xv and display, if no gamma is stored,
- *          the program acts as if gamma were 0.4545, multiplies this by 2.2,
- *          and does a linear rendering.  Taking this as a baseline
- *          brightness, if the stored gamma is:
- *              > 0.4545, the image is rendered lighter than baseline
- *              < 0.4545, the image is rendered darker than baseline
- *          In contrast, gqview seems to ignore the gamma chunk in png.
- *      (7) The only valid pixel depths in leptonica are 1, 2, 4, 8, 16
- *          and 32.  However, it is possible, and in some cases desirable,
- *          to write out a png file using an rgb pix that has 24 bpp.
- *          For example, the open source xpdf SplashBitmap class generates
- *          24 bpp rgb images.  Consequently, we enable writing 24 bpp pix.
- *          To generate such a pix, you can make a 24 bpp pix without data
- *          and assign the data array to the pix; e.g.,
- *              pix = pixCreateHeader(w, h, 24);
- *              pixSetData(pix, rgbdata);
- *          See pixConvert32To24() for an example, where we get rgbdata
- *          from the 32 bpp pix.  Caution: do not call pixSetPadBits(),
- *          because the alignment is wrong and you may erase part of the
- *          last pixel on each line.
- *      (8) If the pix has a colormap, it is written to file.  In most
- *          situations, the alpha component is 255 for each colormap entry,
- *          which is opaque and indicates that it should be ignored.
- *          However, if any alpha component is not 255, it is assumed that
- *          the alpha values are valid, and they are written to the png
- *          file in a tRNS segment.  On readback, the tRNS segment is
- *          identified, and the colormapped image with alpha is converted
- *          to a 4 spp rgba image.
- * 
- */ -l_ok -pixWriteStreamPng(FILE *fp, - PIX *pix, - l_float32 gamma) -{ -char commentstring[] = "Comment"; -l_int32 i, j, k; -l_int32 wpl, d, spp, cmflag, opaque; -l_int32 ncolors, compval; -l_int32 *rmap, *gmap, *bmap, *amap; -l_uint32 *data, *ppixel; -png_byte bit_depth, color_type; -png_byte alpha[256]; -png_uint_32 w, h; -png_uint_32 xres, yres; -png_bytep *row_pointers; -png_bytep rowbuffer; -png_structp png_ptr; -png_infop info_ptr; -png_colorp palette; -PIX *pix1; -PIXCMAP *cmap; -char *text; - - PROCNAME("pixWriteStreamPng"); - - if (!fp) - return ERROR_INT("stream not open", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - /* Allocate the 2 data structures */ - if ((png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, - (png_voidp)NULL, NULL, NULL)) == NULL) - return ERROR_INT("png_ptr not made", procName, 1); - - if ((info_ptr = png_create_info_struct(png_ptr)) == NULL) { - png_destroy_write_struct(&png_ptr, (png_infopp)NULL); - return ERROR_INT("info_ptr not made", procName, 1); - } - - /* Set up png setjmp error handling */ - if (setjmp(png_jmpbuf(png_ptr))) { - png_destroy_write_struct(&png_ptr, &info_ptr); - return ERROR_INT("internal png error", procName, 1); - } - - png_init_io(png_ptr, fp); - - /* With best zlib compression (9), get between 1 and 10% improvement - * over default (6), but the compression is 3 to 10 times slower. - * Use the zlib default (6) as our default compression unless - * pix->special falls in the range [10 ... 19]; then subtract 10 - * to get the compression value. */ - compval = Z_DEFAULT_COMPRESSION; - if (pix->special >= 10 && pix->special < 20) - compval = pix->special - 10; - png_set_compression_level(png_ptr, compval); - - w = pixGetWidth(pix); - h = pixGetHeight(pix); - d = pixGetDepth(pix); - spp = pixGetSpp(pix); - if ((cmap = pixGetColormap(pix))) - cmflag = 1; - else - cmflag = 0; - pixSetPadBits(pix, 0); - - /* Set the color type and bit depth. */ - if (d == 32 && spp == 4) { - bit_depth = 8; - color_type = PNG_COLOR_TYPE_RGBA; /* 6 */ - cmflag = 0; /* ignore if it exists */ - } else if (d == 24 || d == 32) { - bit_depth = 8; - color_type = PNG_COLOR_TYPE_RGB; /* 2 */ - cmflag = 0; /* ignore if it exists */ - } else { - bit_depth = d; - color_type = PNG_COLOR_TYPE_GRAY; /* 0 */ - } - if (cmflag) - color_type = PNG_COLOR_TYPE_PALETTE; /* 3 */ - -#if DEBUG_WRITE - lept_stderr("cmflag = %d, bit_depth = %d, color_type = %d\n", - cmflag, bit_depth, color_type); -#endif /* DEBUG_WRITE */ - - png_set_IHDR(png_ptr, info_ptr, w, h, bit_depth, color_type, - PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, - PNG_FILTER_TYPE_BASE); - - /* Store resolution in ppm, if known */ - xres = (png_uint_32)(39.37 * (l_float32)pixGetXRes(pix) + 0.5); - yres = (png_uint_32)(39.37 * (l_float32)pixGetYRes(pix) + 0.5); - if ((xres == 0) || (yres == 0)) - png_set_pHYs(png_ptr, info_ptr, 0, 0, PNG_RESOLUTION_UNKNOWN); - else - png_set_pHYs(png_ptr, info_ptr, xres, yres, PNG_RESOLUTION_METER); - - if (cmflag) { - pixcmapToArrays(cmap, &rmap, &gmap, &bmap, &amap); - ncolors = pixcmapGetCount(cmap); - pixcmapIsOpaque(cmap, &opaque); - - /* Make and save the palette */ - palette = (png_colorp)LEPT_CALLOC(ncolors, sizeof(png_color)); - for (i = 0; i < ncolors; i++) { - palette[i].red = (png_byte)rmap[i]; - palette[i].green = (png_byte)gmap[i]; - palette[i].blue = (png_byte)bmap[i]; - alpha[i] = (png_byte)amap[i]; - } - - png_set_PLTE(png_ptr, info_ptr, palette, (int)ncolors); - if (!opaque) /* alpha channel has some transparency; assume valid */ - png_set_tRNS(png_ptr, info_ptr, (png_bytep)alpha, - (int)ncolors, NULL); - LEPT_FREE(rmap); - LEPT_FREE(gmap); - LEPT_FREE(bmap); - LEPT_FREE(amap); - } - - /* 0.4545 is treated as the default by some image - * display programs (not gqview). A value > 0.4545 will - * lighten an image as displayed by xv, display, etc. */ - if (gamma > 0.0) - png_set_gAMA(png_ptr, info_ptr, (l_float64)gamma); - - if ((text = pixGetText(pix))) { - png_text text_chunk; - text_chunk.compression = PNG_TEXT_COMPRESSION_NONE; - text_chunk.key = commentstring; - text_chunk.text = text; - text_chunk.text_length = strlen(text); -#ifdef PNG_ITXT_SUPPORTED - text_chunk.itxt_length = 0; - text_chunk.lang = NULL; - text_chunk.lang_key = NULL; -#endif - png_set_text(png_ptr, info_ptr, &text_chunk, 1); - } - - /* Write header and palette info */ - png_write_info(png_ptr, info_ptr); - - if ((d != 32) && (d != 24)) { /* not rgb color */ - /* Generate a temporary pix with bytes swapped. - * For writing a 1 bpp image as png: - * ~ if no colormap, invert the data, because png writes - * black as 0 - * ~ if colormapped, do not invert the data; the two RGBA - * colors can have any value. */ - if (d == 1 && !cmap) { - pix1 = pixInvert(NULL, pix); - pixEndianByteSwap(pix1); - } else { - pix1 = pixEndianByteSwapNew(pix); - } - if (!pix1) { - png_destroy_write_struct(&png_ptr, &info_ptr); - if (cmflag) LEPT_FREE(palette); - return ERROR_INT("pix1 not made", procName, 1); - } - - /* Make and assign array of image row pointers */ - row_pointers = (png_bytep *)LEPT_CALLOC(h, sizeof(png_bytep)); - wpl = pixGetWpl(pix1); - data = pixGetData(pix1); - for (i = 0; i < h; i++) - row_pointers[i] = (png_bytep)(data + i * wpl); - png_set_rows(png_ptr, info_ptr, row_pointers); - - /* Transfer the data */ - png_write_image(png_ptr, row_pointers); - png_write_end(png_ptr, info_ptr); - - if (cmflag) LEPT_FREE(palette); - LEPT_FREE(row_pointers); - pixDestroy(&pix1); - png_destroy_write_struct(&png_ptr, &info_ptr); - return 0; - } - - /* For rgb, compose and write a row at a time */ - data = pixGetData(pix); - wpl = pixGetWpl(pix); - if (d == 24) { /* See note 7 above: special case of 24 bpp rgb */ - for (i = 0; i < h; i++) { - ppixel = data + i * wpl; - png_write_rows(png_ptr, (png_bytepp)&ppixel, 1); - } - } else { /* 32 bpp rgb and rgba. Write out the alpha channel if either - * the pix has 4 spp or writing it is requested anyway */ - rowbuffer = (png_bytep)LEPT_CALLOC(w, 4); - for (i = 0; i < h; i++) { - ppixel = data + i * wpl; - for (j = k = 0; j < w; j++) { - rowbuffer[k++] = GET_DATA_BYTE(ppixel, COLOR_RED); - rowbuffer[k++] = GET_DATA_BYTE(ppixel, COLOR_GREEN); - rowbuffer[k++] = GET_DATA_BYTE(ppixel, COLOR_BLUE); - if (spp == 4) - rowbuffer[k++] = GET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL); - ppixel++; - } - - png_write_rows(png_ptr, &rowbuffer, 1); - } - LEPT_FREE(rowbuffer); - } - - png_write_end(png_ptr, info_ptr); - - if (cmflag) - LEPT_FREE(palette); - png_destroy_write_struct(&png_ptr, &info_ptr); - return 0; -} - - -/*! - * \brief pixSetZlibCompression() - * - * \param[in] pix - * \param[in] compval zlib compression value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Valid zlib compression values are in the interval [0 ... 9],
- *          where, as defined in zlib.h:
- *            0         Z_NO_COMPRESSION
- *            1         Z_BEST_SPEED    (poorest compression)
- *            9         Z_BEST_COMPRESSION
- *          For the default value, use either of these:
- *            6         Z_DEFAULT_COMPRESSION
- *           -1         (resolves to Z_DEFAULT_COMPRESSION)
- *      (2) If you use the defined constants in zlib.h instead of the
- *          compression integers given above, you must include zlib.h.
- * 
- */ -l_ok -pixSetZlibCompression(PIX *pix, - l_int32 compval) -{ - PROCNAME("pixSetZlibCompression"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (compval < 0 || compval > 9) { - L_ERROR("Invalid zlib comp val; using default\n", procName); - compval = Z_DEFAULT_COMPRESSION; - } - pixSetSpecial(pix, 10 + compval); /* valid range [10 ... 19] */ - return 0; -} - - -/*---------------------------------------------------------------------* - * Set flag for stripping 16 bits on reading * - *---------------------------------------------------------------------*/ -/*! - * \brief l_pngSetReadStrip16To8() - * - * \param[in] flag 1 for stripping 16 bpp to 8 bpp on reading; - * 0 for leaving 16 bpp - * \return void - */ -void -l_pngSetReadStrip16To8(l_int32 flag) -{ - var_PNG_STRIP_16_TO_8 = flag; -} - - -/*-------------------------------------------------------------------------* - * Memio utility * - * libpng read/write callback replacements for performing memory I/O * - * * - * Copyright (C) 2017 Milner Technologies, Inc. This content is a * - * component of leptonica and is provided under the terms of the * - * Leptonica license. * - *-------------------------------------------------------------------------*/ - - /*! A node in a linked list of memory buffers that hold I/O content */ -struct MemIOData -{ - char* m_Buffer; /*!< pointer to this node's I/O content */ - l_int32 m_Count; /*!< number of I/O content bytes read or written */ - l_int32 m_Size; /*!< allocated size of m_buffer */ - struct MemIOData *m_Next; /*!< pointer to the next node in the list; */ - /*!< zero if this is the last node */ - struct MemIOData *m_Last; /*!< pointer to the last node in the linked */ - /*!< list. The last node is where new */ - /*!< content is written. */ -}; -typedef struct MemIOData MEMIODATA; - -static void memio_png_write_data(png_structp png_ptr, png_bytep data, - png_size_t length); -static void memio_png_flush(MEMIODATA* pthing); -static void memio_png_read_data(png_structp png_ptr, png_bytep outBytes, - png_size_t byteCountToRead); -static void memio_free(MEMIODATA* pthing); - -static const l_int32 MEMIO_BUFFER_SIZE = 8192; /*! buffer alloc size */ - -/* - * \brief memio_png_write_data() - * - * \param[in] png_ptr - * \param[in] data - * \param[in] len size of array data in bytes - * - *
- * Notes:
- *      (1) This is a libpng callback for writing an image into a
- *          linked list of memory buffers.
- * 
- */ -static void -memio_png_write_data(png_structp png_ptr, - png_bytep data, - png_size_t len) -{ -MEMIODATA *thing, *last; -l_int32 written = 0; -l_int32 remainingSpace, remainingToWrite; - - thing = (struct MemIOData*)png_get_io_ptr(png_ptr); - last = (struct MemIOData*)thing->m_Last; - if (last->m_Buffer == NULL) { - if (len > MEMIO_BUFFER_SIZE) { - last->m_Buffer = (char *)LEPT_MALLOC(len); - memcpy(last->m_Buffer, data, len); - last->m_Size = last->m_Count = len; - return; - } - - last->m_Buffer = (char *)LEPT_MALLOC(MEMIO_BUFFER_SIZE); - last->m_Size = MEMIO_BUFFER_SIZE; - } - - while (written < len) { - if (last->m_Count == last->m_Size) { - MEMIODATA* next = (MEMIODATA *)LEPT_MALLOC(sizeof(MEMIODATA)); - next->m_Next = NULL; - next->m_Count = 0; - next->m_Last = next; - - last->m_Next = next; - last = thing->m_Last = next; - - last->m_Buffer = (char *)LEPT_MALLOC(MEMIO_BUFFER_SIZE); - last->m_Size = MEMIO_BUFFER_SIZE; - } - - remainingSpace = last->m_Size - last->m_Count; - remainingToWrite = len - written; - if (remainingSpace < remainingToWrite) { - memcpy(last->m_Buffer + last->m_Count, data + written, - remainingSpace); - written += remainingSpace; - last->m_Count += remainingSpace; - } else { - memcpy(last->m_Buffer + last->m_Count, data + written, - remainingToWrite); - written += remainingToWrite; - last->m_Count += remainingToWrite; - } - } -} - - -/* - * \brief memio_png_flush() - * - * \param[in] pthing - * - *
- * Notes:
- *      (1) This consolidates write buffers into a single buffer at the
- *          haed of the link list of buffers.
- * 
- */ -static void -memio_png_flush(MEMIODATA *pthing) -{ -l_int32 amount = 0; -l_int32 copied = 0; -MEMIODATA *buffer = 0; -char *data = 0; - - /* If the data is in one buffer, give the buffer to the user. */ - if (pthing->m_Next == NULL) return; - - /* Consolidate multiple buffers into one new one; add the buffer - * sizes together. */ - amount = pthing->m_Count; - buffer = pthing->m_Next; - while (buffer != NULL) { - amount += buffer->m_Count; - buffer = buffer->m_Next; - } - - /* Copy data to a new buffer. */ - data = (char *)LEPT_MALLOC(amount); - memcpy(data, pthing->m_Buffer, pthing->m_Count); - copied = pthing->m_Count; - - LEPT_FREE(pthing->m_Buffer); - pthing->m_Buffer = NULL; - - /* Don't delete original "thing" because we don't control it. */ - buffer = pthing->m_Next; - pthing->m_Next = NULL; - while (buffer != NULL && copied < amount) { - MEMIODATA* old; - memcpy(data + copied, buffer->m_Buffer, buffer->m_Count); - copied += buffer->m_Count; - - old = buffer; - buffer = buffer->m_Next; - - LEPT_FREE(old->m_Buffer); - LEPT_FREE(old); - } - - pthing->m_Buffer = data; - pthing->m_Count = copied; - pthing->m_Size = amount; - return; -} - - -/* - * \brief memio_png_read_data() - * - * \param[in] png_ptr - * \param[in] outBytes - * \param[in] byteCountToRead - * - *
- * Notes:
- *      (1) This is a libpng callback that reads an image from a single
- *          memory buffer.
- * 
- */ -static void -memio_png_read_data(png_structp png_ptr, - png_bytep outBytes, - png_size_t byteCountToRead) -{ -MEMIODATA *thing; - - thing = (MEMIODATA *)png_get_io_ptr(png_ptr); - if (byteCountToRead > (thing->m_Size - thing->m_Count)) { - png_error(png_ptr, "read error in memio_png_read_data"); - } - memcpy(outBytes, thing->m_Buffer + thing->m_Count, byteCountToRead); - thing->m_Count += byteCountToRead; -} - - -/* - * \brief memio_free() - * - * \param[in] pthing - * - *
- * Notes:
- *      (1) This frees all the write buffers in the linked list.  It must
- *          be done before exiting the pixWriteMemPng().
- * 
- */ -static void -memio_free(MEMIODATA* pthing) -{ -MEMIODATA *buffer, *old; - - if (pthing->m_Buffer != NULL) - LEPT_FREE(pthing->m_Buffer); - - pthing->m_Buffer = NULL; - buffer = pthing->m_Next; - while (buffer != NULL) { - old = buffer; - buffer = buffer->m_Next; - - if (old->m_Buffer != NULL) - LEPT_FREE(old->m_Buffer); - LEPT_FREE(old); - } -} - - -/*---------------------------------------------------------------------* - * Reading png from memory * - *---------------------------------------------------------------------*/ -/*! - * \brief pixReadMemPng() - * - * \param[in] filedata png compressed data in memory - * \param[in] filesize number of bytes in data - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) See pixReastreamPng().
- * 
- */ -PIX * -pixReadMemPng(const l_uint8 *filedata, - size_t filesize) -{ -l_uint8 byte; -l_int32 rval, gval, bval; -l_int32 i, j, k, index, ncolors, bitval; -l_int32 wpl, d, spp, cindex, tRNS; -l_uint32 png_transforms; -l_uint32 *data, *line, *ppixel; -int num_palette, num_text, num_trans; -png_byte bit_depth, color_type, channels; -png_uint_32 w, h, rowbytes; -png_uint_32 xres, yres; -png_bytep rowptr, trans; -png_bytep *row_pointers; -png_structp png_ptr; -png_infop info_ptr, end_info; -png_colorp palette; -png_textp text_ptr; /* ptr to text_chunk */ -PIX *pix, *pix1; -PIXCMAP *cmap; -MEMIODATA state; - - PROCNAME("pixReadMemPng"); - - if (!filedata) - return (PIX *)ERROR_PTR("filedata not defined", procName, NULL); - if (filesize < 1) - return (PIX *)ERROR_PTR("invalid filesize", procName, NULL); - - state.m_Next = 0; - state.m_Count = 0; - state.m_Last = &state; - state.m_Buffer = (char*)filedata; - state.m_Size = filesize; - pix = NULL; - - /* Allocate the 3 data structures */ - if ((png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, - (png_voidp)NULL, NULL, NULL)) == NULL) - return (PIX *)ERROR_PTR("png_ptr not made", procName, NULL); - - if ((info_ptr = png_create_info_struct(png_ptr)) == NULL) { - png_destroy_read_struct(&png_ptr, (png_infopp)NULL, (png_infopp)NULL); - return (PIX *)ERROR_PTR("info_ptr not made", procName, NULL); - } - - if ((end_info = png_create_info_struct(png_ptr)) == NULL) { - png_destroy_read_struct(&png_ptr, &info_ptr, (png_infopp)NULL); - return (PIX *)ERROR_PTR("end_info not made", procName, NULL); - } - - /* Set up png setjmp error handling */ - if (setjmp(png_jmpbuf(png_ptr))) { - png_destroy_read_struct(&png_ptr, &info_ptr, &end_info); - return (PIX *)ERROR_PTR("internal png error", procName, NULL); - } - - png_set_read_fn(png_ptr, &state, memio_png_read_data); - - /* ---------------------------------------------------------- * - * Set the transforms flags. Whatever happens here, - * NEVER invert 1 bpp using PNG_TRANSFORM_INVERT_MONO. - * Also, do not use PNG_TRANSFORM_EXPAND, which would - * expand all images with bpp < 8 to 8 bpp. - * ---------------------------------------------------------- */ - /* To strip 16 --> 8 bit depth, use PNG_TRANSFORM_STRIP_16 */ - if (var_PNG_STRIP_16_TO_8 == 1) { /* our default */ - png_transforms = PNG_TRANSFORM_STRIP_16; - } else { - png_transforms = PNG_TRANSFORM_IDENTITY; - L_INFO("not stripping 16 --> 8 in png reading\n", procName); - } - - /* Read it */ - png_read_png(png_ptr, info_ptr, png_transforms, NULL); - - row_pointers = png_get_rows(png_ptr, info_ptr); - w = png_get_image_width(png_ptr, info_ptr); - h = png_get_image_height(png_ptr, info_ptr); - bit_depth = png_get_bit_depth(png_ptr, info_ptr); - rowbytes = png_get_rowbytes(png_ptr, info_ptr); - color_type = png_get_color_type(png_ptr, info_ptr); - channels = png_get_channels(png_ptr, info_ptr); - spp = channels; - tRNS = png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS) ? 1 : 0; - - if (spp == 1) { - d = bit_depth; - } else { /* spp == 2 (gray + alpha), spp == 3 (rgb), spp == 4 (rgba) */ - d = 4 * bit_depth; - } - - /* Remove if/when this is implemented for all bit_depths */ - if (spp == 3 && bit_depth != 8) { - lept_stderr("Help: spp = 3 and depth = %d != 8\n!!", bit_depth); - png_destroy_read_struct(&png_ptr, &info_ptr, &end_info); - return (PIX *)ERROR_PTR("not implemented for this depth", - procName, NULL); - } - - cmap = NULL; - if (color_type == PNG_COLOR_TYPE_PALETTE || - color_type == PNG_COLOR_MASK_PALETTE) { /* generate a colormap */ - png_get_PLTE(png_ptr, info_ptr, &palette, &num_palette); - cmap = pixcmapCreate(d); /* spp == 1 */ - for (cindex = 0; cindex < num_palette; cindex++) { - rval = palette[cindex].red; - gval = palette[cindex].green; - bval = palette[cindex].blue; - pixcmapAddColor(cmap, rval, gval, bval); - } - } - - if ((pix = pixCreate(w, h, d)) == NULL) { - pixcmapDestroy(&cmap); - png_destroy_read_struct(&png_ptr, &info_ptr, &end_info); - pixcmapDestroy(&cmap); - return (PIX *)ERROR_PTR("pix not made", procName, NULL); - } - pixSetInputFormat(pix, IFF_PNG); - wpl = pixGetWpl(pix); - data = pixGetData(pix); - pixSetColormap(pix, cmap); - pixSetSpp(pix, spp); - - if (spp == 1 && !tRNS) { /* copy straight from buffer to pix */ - for (i = 0; i < h; i++) { - line = data + i * wpl; - rowptr = row_pointers[i]; - for (j = 0; j < rowbytes; j++) { - SET_DATA_BYTE(line, j, rowptr[j]); - } - } - } else if (spp == 2) { /* grayscale + alpha; convert to RGBA */ - L_INFO("converting (gray + alpha) ==> RGBA\n", procName); - for (i = 0; i < h; i++) { - ppixel = data + i * wpl; - rowptr = row_pointers[i]; - for (j = k = 0; j < w; j++) { - /* Copy gray value into r, g and b */ - SET_DATA_BYTE(ppixel, COLOR_RED, rowptr[k]); - SET_DATA_BYTE(ppixel, COLOR_GREEN, rowptr[k]); - SET_DATA_BYTE(ppixel, COLOR_BLUE, rowptr[k++]); - SET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL, rowptr[k++]); - ppixel++; - } - } - pixSetSpp(pix, 4); /* we do not support 2 spp pix */ - } else if (spp == 3 || spp == 4) { - for (i = 0; i < h; i++) { - ppixel = data + i * wpl; - rowptr = row_pointers[i]; - for (j = k = 0; j < w; j++) { - SET_DATA_BYTE(ppixel, COLOR_RED, rowptr[k++]); - SET_DATA_BYTE(ppixel, COLOR_GREEN, rowptr[k++]); - SET_DATA_BYTE(ppixel, COLOR_BLUE, rowptr[k++]); - if (spp == 4) - SET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL, rowptr[k++]); - ppixel++; - } - } - } - - /* Special spp == 1 cases with transparency: - * (1) 8 bpp without colormap; assume full transparency - * (2) 1 bpp with colormap + trans array (for alpha) - * (3) 8 bpp with colormap + trans array (for alpha) - * These all require converting to RGBA */ - if (spp == 1 && tRNS) { - if (!cmap) { - /* Case 1: make fully transparent RGBA image */ - L_INFO("transparency, 1 spp, no colormap, no transparency array: " - "convention is fully transparent image\n", procName); - L_INFO("converting (fully transparent 1 spp) ==> RGBA\n", procName); - pixDestroy(&pix); - pix = pixCreate(w, h, 32); /* init to alpha = 0 (transparent) */ - pixSetSpp(pix, 4); - } else { - L_INFO("converting (cmap + alpha) ==> RGBA\n", procName); - - /* Grab the transparency array */ - png_get_tRNS(png_ptr, info_ptr, &trans, &num_trans, NULL); - if (!trans) { /* invalid png file */ - pixDestroy(&pix); - png_destroy_read_struct(&png_ptr, &info_ptr, &end_info); - return (PIX *)ERROR_PTR("cmap, tRNS, but no transparency array", - procName, NULL); - } - - /* Save the cmap and destroy the pix */ - cmap = pixcmapCopy(pixGetColormap(pix)); - ncolors = pixcmapGetCount(cmap); - pixDestroy(&pix); - - /* Start over with 32 bit RGBA */ - pix = pixCreate(w, h, 32); - wpl = pixGetWpl(pix); - data = pixGetData(pix); - pixSetSpp(pix, 4); - -#if DEBUG_READ - lept_stderr("ncolors = %d, num_trans = %d\n", - ncolors, num_trans); - for (i = 0; i < ncolors; i++) { - pixcmapGetColor(cmap, i, &rval, &gval, &bval); - if (i < num_trans) { - lept_stderr("(r,g,b,a) = (%d,%d,%d,%d)\n", - rval, gval, bval, trans[i]); - } else { - lept_stderr("(r,g,b,a) = (%d,%d,%d,<<255>>)\n", - rval, gval, bval); - } - } -#endif /* DEBUG_READ */ - - /* Extract the data and convert to RGBA */ - if (d == 1) { - /* Case 2: 1 bpp with transparency (usually) behind white */ - L_INFO("converting 1 bpp cmap with alpha ==> RGBA\n", procName); - if (num_trans == 1) - L_INFO("num_trans = 1; second color opaque by default\n", - procName); - for (i = 0; i < h; i++) { - ppixel = data + i * wpl; - rowptr = row_pointers[i]; - for (j = 0, index = 0; j < rowbytes; j++) { - byte = rowptr[j]; - for (k = 0; k < 8 && index < w; k++, index++) { - bitval = (byte >> (7 - k)) & 1; - pixcmapGetColor(cmap, bitval, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, ppixel); - SET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL, - bitval < num_trans ? trans[bitval] : 255); - ppixel++; - } - } - } - } else if (d == 8) { - /* Case 3: 8 bpp with cmap and associated transparency */ - L_INFO("converting 8 bpp cmap with alpha ==> RGBA\n", procName); - for (i = 0; i < h; i++) { - ppixel = data + i * wpl; - rowptr = row_pointers[i]; - for (j = 0; j < w; j++) { - index = rowptr[j]; - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, ppixel); - /* Assume missing entries to be 255 (opaque) - * according to the spec: - * http://www.w3.org/TR/PNG/#11tRNS */ - SET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL, - index < num_trans ? trans[index] : 255); - ppixel++; - } - } - } else { - L_ERROR("spp == 1, cmap, trans array, invalid depth: %d\n", - procName, d); - } - pixcmapDestroy(&cmap); - } - } - -#if DEBUG_READ - if (cmap) { - for (i = 0; i < 16; i++) { - lept_stderr("[%d] = %d\n", i, ((l_uint8 *)(cmap->array))[i]); - } - } -#endif /* DEBUG_READ */ - - /* Final adjustments for bpp = 1. - * + If there is no colormap, the image must be inverted because - * png stores black pixels as 0. - * + We have already handled the case of cmapped, 1 bpp pix - * with transparency, where the output pix is 32 bpp RGBA. - * If there is no transparency but the pix has a colormap, - * we remove the colormap, because functions operating on - * 1 bpp images in leptonica assume no colormap. - * + The colormap must be removed in such a way that the pixel - * values are not changed. If the values are only black and - * white, we return a 1 bpp image; if gray, return an 8 bpp pix; - * otherwise, return a 32 bpp rgb pix. - * - * Note that we cannot use the PNG_TRANSFORM_INVERT_MONO flag - * to do the inversion, because that flag (since version 1.0.9) - * inverts 8 bpp grayscale as well, which we don't want to do. - * (It also doesn't work if there is a colormap.) - * - * Note that if the input png is a 1-bit with colormap and - * transparency, it has already been rendered as a 32 bpp, - * spp = 4 rgba pix. - */ - if (pixGetDepth(pix) == 1) { - if (!cmap) { - pixInvert(pix, pix); - } else { - pix1 = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC); - pixDestroy(&pix); - pix = pix1; - } - } - - xres = png_get_x_pixels_per_meter(png_ptr, info_ptr); - yres = png_get_y_pixels_per_meter(png_ptr, info_ptr); - pixSetXRes(pix, (l_int32)((l_float32)xres / 39.37 + 0.5)); /* to ppi */ - pixSetYRes(pix, (l_int32)((l_float32)yres / 39.37 + 0.5)); /* to ppi */ - - /* Get the text if there is any */ - png_get_text(png_ptr, info_ptr, &text_ptr, &num_text); - if (num_text && text_ptr) - pixSetText(pix, text_ptr->text); - - png_destroy_read_struct(&png_ptr, &info_ptr, &end_info); - return pix; -} - - -/*---------------------------------------------------------------------* - * Writing png to memory * - *---------------------------------------------------------------------*/ -/*! - * \brief pixWriteMemPng() - * - * \param[out] pfiledata png encoded data of pix - * \param[out] pfilesize size of png encoded data - * \param[in] pix - * \param[in] gamma use 0.0 if gamma is not defined - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) See pixWriteStreamPng()
- * 
- */ -l_ok -pixWriteMemPng(l_uint8 **pfiledata, - size_t *pfilesize, - PIX *pix, - l_float32 gamma) -{ -char commentstring[] = "Comment"; -l_int32 i, j, k; -l_int32 wpl, d, spp, cmflag, opaque; -l_int32 ncolors, compval; -l_int32 *rmap, *gmap, *bmap, *amap; -l_uint32 *data, *ppixel; -png_byte bit_depth, color_type; -png_byte alpha[256]; -png_uint_32 w, h; -png_uint_32 xres, yres; -png_bytep *row_pointers; -png_bytep rowbuffer; -png_structp png_ptr; -png_infop info_ptr; -png_colorp palette; -PIX *pix1; -PIXCMAP *cmap; -char *text; -MEMIODATA state; - - PROCNAME("pixWriteMemPng"); - - if (pfiledata) *pfiledata = NULL; - if (pfilesize) *pfilesize = 0; - if (!pfiledata) - return ERROR_INT("&filedata not defined", procName, 1); - if (!pfilesize) - return ERROR_INT("&filesize not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - state.m_Buffer = 0; - state.m_Size = 0; - state.m_Next = 0; - state.m_Count = 0; - state.m_Last = &state; - - /* Allocate the 2 data structures */ - if ((png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, - (png_voidp)NULL, NULL, NULL)) == NULL) - return ERROR_INT("png_ptr not made", procName, 1); - - if ((info_ptr = png_create_info_struct(png_ptr)) == NULL) { - png_destroy_write_struct(&png_ptr, (png_infopp)NULL); - return ERROR_INT("info_ptr not made", procName, 1); - } - - /* Set up png setjmp error handling */ - if (setjmp(png_jmpbuf(png_ptr))) { - png_destroy_write_struct(&png_ptr, &info_ptr); - return ERROR_INT("internal png error", procName, 1); - } - - png_set_write_fn(png_ptr, &state, memio_png_write_data, - (png_flush_ptr)NULL); - - /* With best zlib compression (9), get between 1 and 10% improvement - * over default (6), but the compression is 3 to 10 times slower. - * Use the zlib default (6) as our default compression unless - * pix->special falls in the range [10 ... 19]; then subtract 10 - * to get the compression value. */ - compval = Z_DEFAULT_COMPRESSION; - if (pix->special >= 10 && pix->special < 20) - compval = pix->special - 10; - png_set_compression_level(png_ptr, compval); - - w = pixGetWidth(pix); - h = pixGetHeight(pix); - d = pixGetDepth(pix); - spp = pixGetSpp(pix); - if ((cmap = pixGetColormap(pix))) - cmflag = 1; - else - cmflag = 0; - - /* Set the color type and bit depth. */ - if (d == 32 && spp == 4) { - bit_depth = 8; - color_type = PNG_COLOR_TYPE_RGBA; /* 6 */ - cmflag = 0; /* ignore if it exists */ - } else if (d == 24 || d == 32) { - bit_depth = 8; - color_type = PNG_COLOR_TYPE_RGB; /* 2 */ - cmflag = 0; /* ignore if it exists */ - } else { - bit_depth = d; - color_type = PNG_COLOR_TYPE_GRAY; /* 0 */ - } - if (cmflag) - color_type = PNG_COLOR_TYPE_PALETTE; /* 3 */ - -#if DEBUG_WRITE - lept_stderr("cmflag = %d, bit_depth = %d, color_type = %d\n", - cmflag, bit_depth, color_type); -#endif /* DEBUG_WRITE */ - - png_set_IHDR(png_ptr, info_ptr, w, h, bit_depth, color_type, - PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, - PNG_FILTER_TYPE_BASE); - - /* Store resolution in ppm, if known */ - xres = (png_uint_32)(39.37 * (l_float32)pixGetXRes(pix) + 0.5); - yres = (png_uint_32)(39.37 * (l_float32)pixGetYRes(pix) + 0.5); - if ((xres == 0) || (yres == 0)) - png_set_pHYs(png_ptr, info_ptr, 0, 0, PNG_RESOLUTION_UNKNOWN); - else - png_set_pHYs(png_ptr, info_ptr, xres, yres, PNG_RESOLUTION_METER); - - if (cmflag) { - pixcmapToArrays(cmap, &rmap, &gmap, &bmap, &amap); - ncolors = pixcmapGetCount(cmap); - pixcmapIsOpaque(cmap, &opaque); - - /* Make and save the palette */ - palette = (png_colorp)LEPT_CALLOC(ncolors, sizeof(png_color)); - for (i = 0; i < ncolors; i++) { - palette[i].red = (png_byte)rmap[i]; - palette[i].green = (png_byte)gmap[i]; - palette[i].blue = (png_byte)bmap[i]; - alpha[i] = (png_byte)amap[i]; - } - - png_set_PLTE(png_ptr, info_ptr, palette, (int)ncolors); - if (!opaque) /* alpha channel has some transparency; assume valid */ - png_set_tRNS(png_ptr, info_ptr, (png_bytep)alpha, - (int)ncolors, NULL); - LEPT_FREE(rmap); - LEPT_FREE(gmap); - LEPT_FREE(bmap); - LEPT_FREE(amap); - } - - /* 0.4545 is treated as the default by some image - * display programs (not gqview). A value > 0.4545 will - * lighten an image as displayed by xv, display, etc. */ - if (gamma > 0.0) - png_set_gAMA(png_ptr, info_ptr, (l_float64)gamma); - - if ((text = pixGetText(pix))) { - png_text text_chunk; - text_chunk.compression = PNG_TEXT_COMPRESSION_NONE; - text_chunk.key = commentstring; - text_chunk.text = text; - text_chunk.text_length = strlen(text); -#ifdef PNG_ITXT_SUPPORTED - text_chunk.itxt_length = 0; - text_chunk.lang = NULL; - text_chunk.lang_key = NULL; -#endif - png_set_text(png_ptr, info_ptr, &text_chunk, 1); - } - - /* Write header and palette info */ - png_write_info(png_ptr, info_ptr); - - if ((d != 32) && (d != 24)) { /* not rgb color */ - /* Generate a temporary pix with bytes swapped. - * For writing a 1 bpp image as png: - * ~ if no colormap, invert the data, because png writes - * black as 0 - * ~ if colormapped, do not invert the data; the two RGBA - * colors can have any value. */ - if (d == 1 && !cmap) { - pix1 = pixInvert(NULL, pix); - pixEndianByteSwap(pix1); - } else { - pix1 = pixEndianByteSwapNew(pix); - } - if (!pix1) { - png_destroy_write_struct(&png_ptr, &info_ptr); - if (cmflag) LEPT_FREE(palette); - memio_free(&state); - return ERROR_INT("pix1 not made", procName, 1); - } - - /* Make and assign array of image row pointers */ - row_pointers = (png_bytep *)LEPT_CALLOC(h, sizeof(png_bytep)); - wpl = pixGetWpl(pix1); - data = pixGetData(pix1); - for (i = 0; i < h; i++) - row_pointers[i] = (png_bytep)(data + i * wpl); - png_set_rows(png_ptr, info_ptr, row_pointers); - - /* Transfer the data */ - png_write_image(png_ptr, row_pointers); - png_write_end(png_ptr, info_ptr); - - if (cmflag) LEPT_FREE(palette); - LEPT_FREE(row_pointers); - pixDestroy(&pix1); - png_destroy_write_struct(&png_ptr, &info_ptr); - - memio_png_flush(&state); - *pfiledata = (l_uint8 *)state.m_Buffer; - state.m_Buffer = 0; - *pfilesize = state.m_Count; - memio_free(&state); - return 0; - } - - /* For rgb, compose and write a row at a time */ - data = pixGetData(pix); - wpl = pixGetWpl(pix); - if (d == 24) { /* See note 7 above: special case of 24 bpp rgb */ - for (i = 0; i < h; i++) { - ppixel = data + i * wpl; - png_write_rows(png_ptr, (png_bytepp)&ppixel, 1); - } - } else { /* 32 bpp rgb and rgba. Write out the alpha channel if either - * the pix has 4 spp or writing it is requested anyway */ - rowbuffer = (png_bytep)LEPT_CALLOC(w, 4); - for (i = 0; i < h; i++) { - ppixel = data + i * wpl; - for (j = k = 0; j < w; j++) { - rowbuffer[k++] = GET_DATA_BYTE(ppixel, COLOR_RED); - rowbuffer[k++] = GET_DATA_BYTE(ppixel, COLOR_GREEN); - rowbuffer[k++] = GET_DATA_BYTE(ppixel, COLOR_BLUE); - if (spp == 4) - rowbuffer[k++] = GET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL); - ppixel++; - } - - png_write_rows(png_ptr, &rowbuffer, 1); - } - LEPT_FREE(rowbuffer); - } - - png_write_end(png_ptr, info_ptr); - - if (cmflag) - LEPT_FREE(palette); - png_destroy_write_struct(&png_ptr, &info_ptr); - - memio_png_flush(&state); - *pfiledata = (l_uint8 *)state.m_Buffer; - state.m_Buffer = 0; - *pfilesize = state.m_Count; - memio_free(&state); - return 0; -} - -/* --------------------------------------------*/ -#endif /* HAVE_LIBPNG */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pngiostub.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pngiostub.c deleted file mode 100644 index f3c8bed9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pngiostub.c +++ /dev/null @@ -1,143 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pngiostub.c - *
- *
- *     Stubs for pngio.c functions
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* --------------------------------------------*/ -#if !HAVE_LIBPNG /* defined in environ.h */ -/* --------------------------------------------*/ - -PIX * pixReadStreamPng(FILE *fp) -{ - return (PIX * )ERROR_PTR("function not present", "pixReadStreamPng", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok readHeaderPng(const char *filename, l_int32 *pwidth, l_int32 *pheight, - l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap) -{ - return ERROR_INT("function not present", "readHeaderPng", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok freadHeaderPng(FILE *fp, l_int32 *pwidth, l_int32 *pheight, - l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap) -{ - return ERROR_INT("function not present", "freadHeaderPng", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok readHeaderMemPng(const l_uint8 *data, size_t size, l_int32 *pwidth, - l_int32 *pheight, l_int32 *pbps, l_int32 *pspp, - l_int32 *piscmap) -{ - return ERROR_INT("function not present", "readHeaderMemPng", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_int32 fgetPngResolution(FILE *fp, l_int32 *pxres, l_int32 *pyres) -{ - return ERROR_INT("function not present", "fgetPngResolution", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok isPngInterlaced(const char *filename, l_int32 *pinterlaced) -{ - return ERROR_INT("function not present", "isPngInterlaced", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok fgetPngColormapInfo(FILE *fp, PIXCMAP **pcmap, l_int32 *ptransparency) -{ - return ERROR_INT("function not present", "fgetPngColormapInfo", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWritePng(const char *filename, PIX *pix, l_float32 gamma) -{ - return ERROR_INT("function not present", "pixWritePng", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteStreamPng(FILE *fp, PIX *pix, l_float32 gamma) -{ - return ERROR_INT("function not present", "pixWriteStreamPng", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixSetZlibCompression(PIX *pix, l_int32 compval) - -{ - return ERROR_INT("function not present", "pixSetZlibCompression", 1); -} - -/* ----------------------------------------------------------------------*/ - -void l_pngSetReadStrip16To8(l_int32 flag) -{ - L_ERROR("function not present\n", "l_pngSetReadStrip16To8"); - return; -} - -/* ----------------------------------------------------------------------*/ - -PIX * pixReadMemPng(const l_uint8 *filedata, size_t filesize) -{ - return (PIX * )ERROR_PTR("function not present", "pixReadMemPng", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteMemPng(l_uint8 **pfiledata, size_t *pfilesize, PIX *pix, - l_float32 gamma) -{ - return ERROR_INT("function not present", "pixWriteMemPng", 1); -} - -/* --------------------------------------------*/ -#endif /* !HAVE_LIBPNG */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pnmio.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pnmio.c deleted file mode 100644 index e9cfad57..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pnmio.c +++ /dev/null @@ -1,1534 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pnmio.c - *
- *
- *      Stream interface
- *          PIX             *pixReadStreamPnm()
- *          l_int32          readHeaderPnm()
- *          l_int32          freadHeaderPnm()
- *          l_int32          pixWriteStreamPnm()
- *          l_int32          pixWriteStreamAsciiPnm()
- *          l_int32          pixWriteStreamPam()
- *
- *      Read/write to memory
- *          PIX             *pixReadMemPnm()
- *          l_int32          readHeaderMemPnm()
- *          l_int32          pixWriteMemPnm()
- *          l_int32          pixWriteMemPam()
- *
- *      Local helpers
- *          static l_int32   pnmReadNextAsciiValue();
- *          static l_int32   pnmReadNextNumber();
- *          static l_int32   pnmReadNextString();
- *          static l_int32   pnmSkipCommentLines();
- *
- *      These are here by popular demand, with the help of Mattias
- *      Kregert (mattias@kregert.se), who provided the first implementation.
- *
- *      The pnm formats are exceedingly simple, because they have
- *      no compression and no colormaps.  They support images that
- *      are 1 bpp; 2, 4, 8 and 16 bpp grayscale; and rgb.
- *
- *      The original pnm formats ("ASCII") are included for completeness,
- *      but their use is deprecated for all but tiny iconic images.
- *      They are extremely wasteful of memory; for example, the P1 binary
- *      ASCII format is 16 times as big as the packed uncompressed
- *      format, because 2 characters are used to represent every bit
- *      (pixel) in the image.  Reading is slow because we check for extra
- *      white space and EOL at every sample value.
- *
- *      The packed pnm formats ("raw") give file sizes similar to
- *      bmp files, which are uncompressed packed.  However, bmp
- *      are more flexible, because they can support colormaps.
- *
- *      We don't differentiate between the different types ("pbm",
- *      "pgm", "ppm") at the interface level, because this is really a
- *      "distinction without a difference."  You read a file, you get
- *      the appropriate Pix.  You write a file from a Pix, you get the
- *      appropriate type of file.  If there is a colormap on the Pix,
- *      and the Pix is more than 1 bpp, you get either an 8 bpp pgm
- *      or a 24 bpp RGB pnm, depending on whether the colormap colors
- *      are gray or rgb, respectively.
- *
- *      This follows the general policy that the I/O routines don't
- *      make decisions about the content of the image -- you do that
- *      with image processing before you write it out to file.
- *      The I/O routines just try to make the closest connection
- *      possible between the file and the Pix in memory.
- *
- *      On systems like windows without fmemopen() and open_memstream(),
- *      we write data to a temp file and read it back for operations
- *      between pix and compressed-data, such as pixReadMemPnm() and
- *      pixWriteMemPnm().
- *
- *      The P7 format is new. It introduced a header with multiple
- *      lines containing distinct tags for the various fields.
- *      See: http://netpbm.sourceforge.net/doc/pam.html
- *
- *        WIDTH          ; mandatory, exactly once
- *        HEIGHT         ; mandatory, exactly once
- *        DEPTH          ; mandatory, exactly once,
- *                            ; its meaning is equivalent to spp
- *        MAXVAL         ; mandatory, one of 1, 3, 15, 255 or 65535
- *        TUPLTYPE    ; optional; BLACKANDWHITE, GRAYSCALE, RGB
- *                            ; and optional suffix _ALPHA, e.g. RGB_ALPHA
- *        ENDHDR              ; mandatory, last header line
- *
- *      Reading BLACKANDWHITE_ALPHA and GRAYSCALE_ALPHA, which have a DEPTH
- *      value of 2, is supported. The original image is converted to a Pix
- *      with 32-bpp and alpha channel (spp == 4).
- *
- *      Writing P7 format is currently selected for 32-bpp with alpha
- *      channel, i.e. for Pix which have spp == 4, using pixWriteStreamPam().
- *      Jürgen Buchmüller provided the implementation for the P7 (pam) format.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -/* --------------------------------------------*/ -#if USE_PNMIO /* defined in environ.h */ -/* --------------------------------------------*/ - -static l_int32 pnmReadNextAsciiValue(FILE *fp, l_int32 *pval); -static l_int32 pnmReadNextNumber(FILE *fp, l_int32 *pval); -static l_int32 pnmReadNextString(FILE *fp, char *buff, l_int32 size); -static l_int32 pnmSkipCommentLines(FILE *fp); - - /* a sanity check on the size read from file */ -static const l_int32 MAX_PNM_WIDTH = 100000; -static const l_int32 MAX_PNM_HEIGHT = 100000; - - -/*--------------------------------------------------------------------* - * Stream interface * - *--------------------------------------------------------------------*/ -/*! - * \brief pixReadStreamPnm() - * - * \param[in] fp file stream opened for read - * \return pix, or NULL on error - */ -PIX * -pixReadStreamPnm(FILE *fp) -{ -l_uint8 val8, rval8, gval8, bval8, aval8, mask8; -l_uint16 val16, rval16, gval16, bval16, aval16; -l_int32 w, h, d, bps, spp, bpl, wpl, i, j, type; -l_int32 val, rval, gval, bval; -l_uint32 rgbval; -l_uint32 *line, *data; -PIX *pix; - - PROCNAME("pixReadStreamPnm"); - - if (!fp) - return (PIX *)ERROR_PTR("fp not defined", procName, NULL); - - if (freadHeaderPnm(fp, &w, &h, &d, &type, &bps, &spp)) - return (PIX *)ERROR_PTR("header read failed", procName, NULL); - if (bps < 1 || bps > 16) - return (PIX *)ERROR_PTR("invalid bps", procName, NULL); - if (spp < 1 || spp > 4) - return (PIX *)ERROR_PTR("invalid spp", procName, NULL); - if ((pix = pixCreate(w, h, d)) == NULL) - return (PIX *)ERROR_PTR("pix not made", procName, NULL); - pixSetInputFormat(pix, IFF_PNM); - data = pixGetData(pix); - wpl = pixGetWpl(pix); - - /* If type == 6 and bps == 16, we use the code in type 7 - * to read 6 bytes/pixel from the input file. */ - if (type == 6 && bps == 16) - type = 7; - - switch (type) { - case 1: - case 2: - /* Old "ASCII" binary or gray format */ - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - if (pnmReadNextAsciiValue(fp, &val)) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read abend", procName, NULL); - } - pixSetPixel(pix, j, i, val); - } - } - break; - - case 3: - /* Old "ASCII" rgb format */ - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - if (pnmReadNextAsciiValue(fp, &rval)) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read abend", procName, NULL); - } - if (pnmReadNextAsciiValue(fp, &gval)) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read abend", procName, NULL); - } - if (pnmReadNextAsciiValue(fp, &bval)) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read abend", procName, NULL); - } - composeRGBPixel(rval, gval, bval, &rgbval); - pixSetPixel(pix, j, i, rgbval); - } - } - break; - - case 4: - /* "raw" format for 1 bpp */ - bpl = (d * w + 7) / 8; - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < bpl; j++) { - if (fread(&val8, 1, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error in 4", procName, NULL); - } - SET_DATA_BYTE(line, j, val8); - } - } - break; - - case 5: - /* "raw" format for grayscale */ - for (i = 0; i < h; i++) { - line = data + i * wpl; - if (d != 16) { - for (j = 0; j < w; j++) { - if (fread(&val8, 1, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("error in 5", procName, NULL); - } - if (d == 2) - SET_DATA_DIBIT(line, j, val8); - else if (d == 4) - SET_DATA_QBIT(line, j, val8); - else /* d == 8 */ - SET_DATA_BYTE(line, j, val8); - } - } else { /* d == 16 */ - for (j = 0; j < w; j++) { - if (fread(&val16, 2, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("16 bpp error", procName, NULL); - } - SET_DATA_TWO_BYTES(line, j, val16); - } - } - } - break; - - case 6: - /* "raw" format, type == 6; 8 bps, rgb */ - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < wpl; j++) { - if (fread(&rval8, 1, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 6", - procName, NULL); - } - if (fread(&gval8, 1, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 6", - procName, NULL); - } - if (fread(&bval8, 1, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 6", - procName, NULL); - } - composeRGBPixel(rval8, gval8, bval8, &rgbval); - line[j] = rgbval; - } - } - break; - - case 7: - /* "arbitrary" format; type == 7; */ - if (bps != 16) { - mask8 = (1 << bps) - 1; - switch (spp) { - case 1: /* 1, 2, 4, 8 bpp grayscale */ - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - if (fread(&val8, 1, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - val8 = val8 & mask8; - if (bps == 1) val8 ^= 1; /* white-is-1 photometry */ - pixSetPixel(pix, j, i, val8); - } - } - break; - - case 2: /* 1, 2, 4, 8 bpp grayscale + alpha */ - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - if (fread(&val8, 1, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - if (fread(&aval8, 1, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - val8 = val8 & mask8; - aval8 = aval8 & mask8; - composeRGBAPixel(val8, val8, val8, aval8, &rgbval); - pixSetPixel(pix, j, i, rgbval); - } - } - pixSetSpp(pix, 4); - break; - - case 3: /* rgb */ - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < wpl; j++) { - if (fread(&rval8, 1, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - if (fread(&gval8, 1, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - if (fread(&bval8, 1, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - rval8 = rval8 & mask8; - gval8 = gval8 & mask8; - bval8 = bval8 & mask8; - composeRGBPixel(rval8, gval8, bval8, &rgbval); - line[j] = rgbval; - } - } - break; - - case 4: /* rgba */ - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < wpl; j++) { - if (fread(&rval8, 1, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - if (fread(&gval8, 1, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - if (fread(&bval8, 1, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - if (fread(&aval8, 1, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - rval8 = rval8 & mask8; - gval8 = gval8 & mask8; - bval8 = bval8 & mask8; - aval8 = aval8 & mask8; - composeRGBAPixel(rval8, gval8, bval8, aval8, &rgbval); - line[j] = rgbval; - } - } - pixSetSpp(pix, 4); - break; - } - } else { /* bps == 16 */ - /* I have only seen one example that is type 6, 16 bps. - * It was 3 spp (rgb), and the 8 bps of real data was stored - * in the second byte. In the following, I make the wild - * assumption that for all 16 bpp pnm/pam files, we can - * take the second byte. */ - switch (spp) { - case 1: /* 16 bps grayscale */ - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - if (fread(&val16, 2, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - val8 = val16 & 0xff; - pixSetPixel(pix, j, i, val8); - } - } - break; - - case 2: /* 16 bps grayscale + alpha */ - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - if (fread(&val16, 2, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - if (fread(&aval16, 2, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - val8 = val16 & 0xff; - aval8 = aval16 & 0xff; - composeRGBAPixel(val8, val8, val8, aval8, &rgbval); - pixSetPixel(pix, j, i, rgbval); - } - } - pixSetSpp(pix, 4); - break; - - case 3: /* 16bps rgb */ - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < wpl; j++) { - if (fread(&rval16, 2, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - if (fread(&gval16, 2, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - if (fread(&bval16, 2, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - rval8 = rval16 & 0xff; - gval8 = gval16 & 0xff; - bval8 = bval16 & 0xff; - composeRGBPixel(rval8, gval8, bval8, &rgbval); - line[j] = rgbval; - } - } - break; - - case 4: /* 16bps rgba */ - for (i = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < wpl; j++) { - if (fread(&rval16, 2, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - if (fread(&gval16, 2, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - if (fread(&bval16, 2, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - if (fread(&aval16, 2, 1, fp) != 1) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("read error type 7", - procName, NULL); - } - rval8 = rval16 & 0xff; - gval8 = gval16 & 0xff; - bval8 = bval16 & 0xff; - aval8 = aval16 & 0xff; - composeRGBAPixel(rval8, gval8, bval8, aval8, &rgbval); - line[j] = rgbval; - } - } - pixSetSpp(pix, 4); - break; - } - } - break; - } - return pix; -} - - -/*! - * \brief readHeaderPnm() - * - * \param[in] filename - * \param[out] pw [optional] - * \param[out] ph [optional] - * \param[out] pd [optional] - * \param[out] ptype [optional] pnm type - * \param[out] pbps [optional] bits/sample - * \param[out] pspp [optional] samples/pixel - * \return 0 if OK, 1 on error - */ -l_ok -readHeaderPnm(const char *filename, - l_int32 *pw, - l_int32 *ph, - l_int32 *pd, - l_int32 *ptype, - l_int32 *pbps, - l_int32 *pspp) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("readHeaderPnm"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pd) *pd = 0; - if (ptype) *ptype = 0; - if (pbps) *pbps = 0; - if (pspp) *pspp = 0; - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - - if ((fp = fopenReadStream(filename)) == NULL) - return ERROR_INT("image file not found", procName, 1); - ret = freadHeaderPnm(fp, pw, ph, pd, ptype, pbps, pspp); - fclose(fp); - return ret; -} - - -/*! - * \brief freadHeaderPnm() - * - * \param[in] fp file stream opened for read - * \param[out] pw [optional] - * \param[out] ph [optional] - * \param[out] pd [optional] - * \param[out] ptype [optional] pnm type - * \param[out] pbps [optional] bits/sample - * \param[out] pspp [optional] samples/pixel - * \return 0 if OK, 1 on error - */ -l_ok -freadHeaderPnm(FILE *fp, - l_int32 *pw, - l_int32 *ph, - l_int32 *pd, - l_int32 *ptype, - l_int32 *pbps, - l_int32 *pspp) -{ -char tag[16], tupltype[32]; -l_int32 i, w, h, d, bps, spp, type; -l_int32 maxval; -l_int32 ch; - - PROCNAME("freadHeaderPnm"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pd) *pd = 0; - if (ptype) *ptype = 0; - if (pbps) *pbps = 0; - if (pspp) *pspp = 0; - if (!fp) - return ERROR_INT("fp not defined", procName, 1); - - if (fscanf(fp, "P%d\n", &type) != 1) - return ERROR_INT("invalid read for type", procName, 1); - if (type < 1 || type > 7) - return ERROR_INT("invalid pnm file", procName, 1); - - if (pnmSkipCommentLines(fp)) - return ERROR_INT("no data in file", procName, 1); - - if (type == 7) { - w = h = d = bps = spp = maxval = 0; - for (i = 0; i < 10; i++) { /* limit to 10 lines of this header */ - if (pnmReadNextString(fp, tag, sizeof(tag))) - return ERROR_INT("found no next tag", procName, 1); - if (!strcmp(tag, "WIDTH")) { - if (pnmReadNextNumber(fp, &w)) - return ERROR_INT("failed reading width", procName, 1); - continue; - } - if (!strcmp(tag, "HEIGHT")) { - if (pnmReadNextNumber(fp, &h)) - return ERROR_INT("failed reading height", procName, 1); - continue; - } - if (!strcmp(tag, "DEPTH")) { - if (pnmReadNextNumber(fp, &spp)) - return ERROR_INT("failed reading depth", procName, 1); - continue; - } - if (!strcmp(tag, "MAXVAL")) { - if (pnmReadNextNumber(fp, &maxval)) - return ERROR_INT("failed reading maxval", procName, 1); - continue; - } - if (!strcmp(tag, "TUPLTYPE")) { - if (pnmReadNextString(fp, tupltype, sizeof(tupltype))) - return ERROR_INT("failed reading tuple type", procName, 1); - continue; - } - if (!strcmp(tag, "ENDHDR")) { - if ('\n' != (ch = fgetc(fp))) - return ERROR_INT("missing LF after ENDHDR", procName, 1); - break; - } - } - if (w <= 0 || h <= 0 || w > MAX_PNM_WIDTH || h > MAX_PNM_HEIGHT) { - L_INFO("invalid size: w = %d, h = %d\n", procName, w, h); - return 1; - } - if (maxval == 1) { - d = bps = 1; - } else if (maxval == 3) { - d = bps = 2; - } else if (maxval == 15) { - d = bps = 4; - } else if (maxval == 255) { - d = bps = 8; - } else if (maxval == 0xffff) { - d = bps = 16; - } else { - L_INFO("invalid maxval = %d\n", procName, maxval); - return 1; - } - switch (spp) { - case 1: - /* d and bps are already set */ - break; - case 2: - case 3: - case 4: - /* create a 32 bpp Pix */ - d = 32; - break; - default: - L_INFO("invalid depth = %d\n", procName, spp); - return 1; - } - } else { - - if (fscanf(fp, "%d %d\n", &w, &h) != 2) - return ERROR_INT("invalid read for w,h", procName, 1); - if (w <= 0 || h <= 0 || w > MAX_PNM_WIDTH || h > MAX_PNM_HEIGHT) { - L_INFO("invalid size: w = %d, h = %d\n", procName, w, h); - return 1; - } - - /* Get depth of pix. For types 2 and 5, we use the maxval. - * Important implementation note: - * - You can't use fscanf(), which throws away whitespace, - * and will discard binary data if it starts with whitespace(s). - * - You can't use fgets(), which stops at newlines, but this - * dumb format doesn't require a newline after the maxval - * number -- it just requires one whitespace character. - * - Which leaves repeated calls to fgetc, including swallowing - * the single whitespace character. */ - if (type == 1 || type == 4) { - d = 1; - spp = 1; - bps = 1; - } else if (type == 2 || type == 5) { - if (pnmReadNextNumber(fp, &maxval)) - return ERROR_INT("invalid read for maxval (2,5)", procName, 1); - if (maxval == 3) { - d = 2; - } else if (maxval == 15) { - d = 4; - } else if (maxval == 255) { - d = 8; - } else if (maxval == 0xffff) { - d = 16; - } else { - lept_stderr("maxval = %d\n", maxval); - return ERROR_INT("invalid maxval", procName, 1); - } - bps = d; - spp = 1; - } else { /* type == 3 || type == 6; this is rgb */ - if (pnmReadNextNumber(fp, &maxval)) - return ERROR_INT("invalid read for maxval (3,6)", procName, 1); - if (maxval != 255 && maxval != 0xffff) { - L_ERROR("unexpected maxval = %d\n", procName, maxval); - return 1; - } - bps = (maxval == 255) ? 8 : 16; - d = 32; - spp = 3; - } - } - if (pw) *pw = w; - if (ph) *ph = h; - if (pd) *pd = d; - if (ptype) *ptype = type; - if (pbps) *pbps = bps; - if (pspp) *pspp = spp; - return 0; -} - - -/*! - * \brief pixWriteStreamPnm() - * - * \param[in] fp file stream opened for write - * \param[in] pix - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This writes "raw" packed format only:
- *          1 bpp --> pbm (P4)
- *          2, 4, 8, 16 bpp, no colormap or grayscale colormap --> pgm (P5)
- *          2, 4, 8 bpp with color-valued colormap, or rgb --> rgb ppm (P6)
- *      (2) 24 bpp rgb are not supported in leptonica, but this will
- *          write them out as a packed array of bytes (3 to a pixel).
- * 
- */ -l_ok -pixWriteStreamPnm(FILE *fp, - PIX *pix) -{ -l_uint8 val8; -l_uint8 pel[4]; -l_uint16 val16; -l_int32 h, w, d, ds, i, j, wpls, bpl, filebpl, writeerror, maxval; -l_uint32 *pword, *datas, *lines; -PIX *pixs; - - PROCNAME("pixWriteStreamPnm"); - - if (!fp) - return ERROR_INT("fp not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pixGetDimensions(pix, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 24 && d != 32) - return ERROR_INT("d not in {1,2,4,8,16,24,32}", procName, 1); - if (d == 32 && pixGetSpp(pix) == 4) - return pixWriteStreamPam(fp, pix); - - /* If a colormap exists, remove and convert to grayscale or rgb */ - if (pixGetColormap(pix) != NULL) - pixs = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC); - else - pixs = pixClone(pix); - ds = pixGetDepth(pixs); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - - writeerror = 0; - - if (ds == 1) { /* binary */ - fprintf(fp, "P4\n# Raw PBM file written by leptonica " - "(www.leptonica.com)\n%d %d\n", w, h); - - bpl = (w + 7) / 8; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < bpl; j++) { - val8 = GET_DATA_BYTE(lines, j); - fwrite(&val8, 1, 1, fp); - } - } - } else if (ds == 2 || ds == 4 || ds == 8 || ds == 16) { /* grayscale */ - maxval = (1 << ds) - 1; - fprintf(fp, "P5\n# Raw PGM file written by leptonica " - "(www.leptonica.com)\n%d %d\n%d\n", w, h, maxval); - - if (ds != 16) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - if (ds == 2) - val8 = GET_DATA_DIBIT(lines, j); - else if (ds == 4) - val8 = GET_DATA_QBIT(lines, j); - else /* ds == 8 */ - val8 = GET_DATA_BYTE(lines, j); - fwrite(&val8, 1, 1, fp); - } - } - } else { /* ds == 16 */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - val16 = GET_DATA_TWO_BYTES(lines, j); - fwrite(&val16, 2, 1, fp); - } - } - } - } else { /* rgb color */ - fprintf(fp, "P6\n# Raw PPM file written by leptonica " - "(www.leptonica.com)\n%d %d\n255\n", w, h); - - if (d == 24) { /* packed, 3 bytes to a pixel */ - filebpl = 3 * w; - for (i = 0; i < h; i++) { /* write out each raster line */ - lines = datas + i * wpls; - if (fwrite(lines, 1, filebpl, fp) != filebpl) - writeerror = 1; - } - } else { /* 32 bpp rgb */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < wpls; j++) { - pword = lines + j; - pel[0] = GET_DATA_BYTE(pword, COLOR_RED); - pel[1] = GET_DATA_BYTE(pword, COLOR_GREEN); - pel[2] = GET_DATA_BYTE(pword, COLOR_BLUE); - if (fwrite(pel, 1, 3, fp) != 3) - writeerror = 1; - } - } - } - } - - pixDestroy(&pixs); - if (writeerror) - return ERROR_INT("image write fail", procName, 1); - return 0; -} - - -/*! - * \brief pixWriteStreamAsciiPnm() - * - * \param[in] fp file stream opened for write - * \param[in] pix - * \return 0 if OK; 1 on error - * - * Writes "ASCII" format only: - * 1 bpp --> pbm P1 - * 2, 4, 8, 16 bpp, no colormap or grayscale colormap --> pgm P2 - * 2, 4, 8 bpp with color-valued colormap, or rgb --> rgb ppm P3 - */ -l_ok -pixWriteStreamAsciiPnm(FILE *fp, - PIX *pix) -{ -char buffer[256]; -l_uint8 cval[3]; -l_int32 h, w, d, ds, i, j, k, maxval, count; -l_uint32 val; -PIX *pixs; - - PROCNAME("pixWriteStreamAsciiPnm"); - - if (!fp) - return ERROR_INT("fp not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pixGetDimensions(pix, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) - return ERROR_INT("d not in {1,2,4,8,16,32}", procName, 1); - - /* If a colormap exists, remove and convert to grayscale or rgb */ - if (pixGetColormap(pix) != NULL) - pixs = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC); - else - pixs = pixClone(pix); - ds = pixGetDepth(pixs); - - if (ds == 1) { /* binary */ - fprintf(fp, "P1\n# Ascii PBM file written by leptonica " - "(www.leptonica.com)\n%d %d\n", w, h); - - count = 0; - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - pixGetPixel(pixs, j, i, &val); - if (val == 0) - fputc('0', fp); - else /* val == 1 */ - fputc('1', fp); - fputc(' ', fp); - count += 2; - if (count >= 70) { - fputc('\n', fp); - count = 0; - } - } - } - } else if (ds == 2 || ds == 4 || ds == 8 || ds == 16) { /* grayscale */ - maxval = (1 << ds) - 1; - fprintf(fp, "P2\n# Ascii PGM file written by leptonica " - "(www.leptonica.com)\n%d %d\n%d\n", w, h, maxval); - - count = 0; - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - pixGetPixel(pixs, j, i, &val); - if (ds == 2) { - snprintf(buffer, sizeof(buffer), "%1d ", val); - fwrite(buffer, 1, 2, fp); - count += 2; - } else if (ds == 4) { - snprintf(buffer, sizeof(buffer), "%2d ", val); - fwrite(buffer, 1, 3, fp); - count += 3; - } else if (ds == 8) { - snprintf(buffer, sizeof(buffer), "%3d ", val); - fwrite(buffer, 1, 4, fp); - count += 4; - } else { /* ds == 16 */ - snprintf(buffer, sizeof(buffer), "%5d ", val); - fwrite(buffer, 1, 6, fp); - count += 6; - } - if (count >= 60) { - fputc('\n', fp); - count = 0; - } - } - } - } else { /* rgb color */ - fprintf(fp, "P3\n# Ascii PPM file written by leptonica " - "(www.leptonica.com)\n%d %d\n255\n", w, h); - count = 0; - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - pixGetPixel(pixs, j, i, &val); - cval[0] = GET_DATA_BYTE(&val, COLOR_RED); - cval[1] = GET_DATA_BYTE(&val, COLOR_GREEN); - cval[2] = GET_DATA_BYTE(&val, COLOR_BLUE); - for (k = 0; k < 3; k++) { - snprintf(buffer, sizeof(buffer), "%3d ", cval[k]); - fwrite(buffer, 1, 4, fp); - count += 4; - if (count >= 60) { - fputc('\n', fp); - count = 0; - } - } - } - } - } - - pixDestroy(&pixs); - return 0; -} - - -/*! - * \brief pixWriteStreamPam() - * - * \param[in] fp file stream opened for write - * \param[in] pix - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This writes arbitrary PAM (P7) packed format.
- *      (2) 24 bpp rgb are not supported in leptonica, but this will
- *          write them out as a packed array of bytes (3 to a pixel).
- * 
- */ -l_ok -pixWriteStreamPam(FILE *fp, - PIX *pix) -{ -l_uint8 val8; -l_uint8 pel[8]; -l_uint16 val16; -l_int32 h, w, d, ds, i, j; -l_int32 wpls, spps, filebpl, writeerror, maxval; -l_uint32 *pword, *datas, *lines; -PIX *pixs; - - PROCNAME("pixWriteStreamPam"); - - if (!fp) - return ERROR_INT("fp not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - pixGetDimensions(pix, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 24 && d != 32) - return ERROR_INT("d not in {1,2,4,8,16,24,32}", procName, 1); - - /* If a colormap exists, remove and convert to grayscale or rgb */ - if (pixGetColormap(pix) != NULL) - pixs = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC); - else - pixs = pixClone(pix); - ds = pixGetDepth(pixs); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - spps = pixGetSpp(pixs); - if (ds < 24) - maxval = (1 << ds) - 1; - else - maxval = 255; - - writeerror = 0; - fprintf(fp, "P7\n# Arbitrary PAM file written by leptonica " - "(www.leptonica.com)\n"); - fprintf(fp, "WIDTH %d\n", w); - fprintf(fp, "HEIGHT %d\n", h); - fprintf(fp, "DEPTH %d\n", spps); - fprintf(fp, "MAXVAL %d\n", maxval); - if (spps == 1 && ds == 1) - fprintf(fp, "TUPLTYPE BLACKANDWHITE\n"); - else if (spps == 1) - fprintf(fp, "TUPLTYPE GRAYSCALE\n"); - else if (spps == 3) - fprintf(fp, "TUPLTYPE RGB\n"); - else if (spps == 4) - fprintf(fp, "TUPLTYPE RGB_ALPHA\n"); - fprintf(fp, "ENDHDR\n"); - - switch (d) { - case 1: - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - val8 = GET_DATA_BIT(lines, j); - val8 ^= 1; /* pam apparently uses white-is-1 photometry */ - if (fwrite(&val8, 1, 1, fp) != 1) - writeerror = 1; - } - } - break; - - case 2: - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - val8 = GET_DATA_DIBIT(lines, j); - if (fwrite(&val8, 1, 1, fp) != 1) - writeerror = 1; - } - } - break; - - case 4: - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - val8 = GET_DATA_QBIT(lines, j); - if (fwrite(&val8, 1, 1, fp) != 1) - writeerror = 1; - } - } - break; - - case 8: - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - val8 = GET_DATA_BYTE(lines, j); - if (fwrite(&val8, 1, 1, fp) != 1) - writeerror = 1; - } - } - break; - - case 16: - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < w; j++) { - val16 = GET_DATA_TWO_BYTES(lines, j); - if (fwrite(&val16, 2, 1, fp) != 1) - writeerror = 1; - } - } - break; - - case 24: - filebpl = 3 * w; - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - if (fwrite(lines, 1, filebpl, fp) != filebpl) - writeerror = 1; - } - break; - - case 32: - switch (spps) { - case 3: - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < wpls; j++) { - pword = lines + j; - pel[0] = GET_DATA_BYTE(pword, COLOR_RED); - pel[1] = GET_DATA_BYTE(pword, COLOR_GREEN); - pel[2] = GET_DATA_BYTE(pword, COLOR_BLUE); - if (fwrite(pel, 1, 3, fp) != 3) - writeerror = 1; - } - } - break; - case 4: - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - for (j = 0; j < wpls; j++) { - pword = lines + j; - pel[0] = GET_DATA_BYTE(pword, COLOR_RED); - pel[1] = GET_DATA_BYTE(pword, COLOR_GREEN); - pel[2] = GET_DATA_BYTE(pword, COLOR_BLUE); - pel[3] = GET_DATA_BYTE(pword, L_ALPHA_CHANNEL); - if (fwrite(pel, 1, 4, fp) != 4) - writeerror = 1; - } - } - break; - } - break; - } - - pixDestroy(&pixs); - if (writeerror) - return ERROR_INT("image write fail", procName, 1); - return 0; -} - - -/*---------------------------------------------------------------------* - * Read/write to memory * - *---------------------------------------------------------------------*/ - -/*! - * \brief pixReadMemPnm() - * - * \param[in] data const; pnm-encoded - * \param[in] size of data - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) The %size byte of %data must be a null character.
- * 
- */ -PIX * -pixReadMemPnm(const l_uint8 *data, - size_t size) -{ -FILE *fp; -PIX *pix; - - PROCNAME("pixReadMemPnm"); - - if (!data) - return (PIX *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (PIX *)ERROR_PTR("stream not opened", procName, NULL); - pix = pixReadStreamPnm(fp); - fclose(fp); - if (!pix) L_ERROR("pix not read\n", procName); - return pix; -} - - -/*! - * \brief readHeaderMemPnm() - * - * \param[in] data const; pnm-encoded - * \param[in] size of data - * \param[out] pw [optional] - * \param[out] ph [optional] - * \param[out] pd [optional] - * \param[out] ptype [optional] pnm type - * \param[out] pbps [optional] bits/sample - * \param[out] pspp [optional] samples/pixel - * \return 0 if OK, 1 on error - */ -l_ok -readHeaderMemPnm(const l_uint8 *data, - size_t size, - l_int32 *pw, - l_int32 *ph, - l_int32 *pd, - l_int32 *ptype, - l_int32 *pbps, - l_int32 *pspp) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("readHeaderMemPnm"); - - if (!data) - return ERROR_INT("data not defined", procName, 1); - - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = freadHeaderPnm(fp, pw, ph, pd, ptype, pbps, pspp); - fclose(fp); - if (ret) - return ERROR_INT("header data read failed", procName, 1); - return 0; -} - - -/*! - * \brief pixWriteMemPnm() - * - * \param[out] pdata data of PNM image - * \param[out] psize size of returned data - * \param[in] pix - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See pixWriteStreamPnm() for usage.  This version writes to
- *          memory instead of to a file stream.
- * 
- */ -l_ok -pixWriteMemPnm(l_uint8 **pdata, - size_t *psize, - PIX *pix) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("pixWriteMemPnm"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1 ); - if (!psize) - return ERROR_INT("&size not defined", procName, 1 ); - if (!pix) - return ERROR_INT("&pix not defined", procName, 1 ); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = pixWriteStreamPnm(fp, pix); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = pixWriteStreamPnm(fp, pix); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - -/*! - * \brief pixWriteMemPam() - * - * \param[out] pdata data of PAM image - * \param[out] psize size of returned data - * \param[in] pix - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See pixWriteStreamPnm() for usage.  This version writes to
- *          memory instead of to a file stream.
- * 
- */ -l_ok -pixWriteMemPam(l_uint8 **pdata, - size_t *psize, - PIX *pix) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("pixWriteMemPam"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1 ); - if (!psize) - return ERROR_INT("&size not defined", procName, 1 ); - if (!pix) - return ERROR_INT("&pix not defined", procName, 1 ); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = pixWriteStreamPam(fp, pix); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = pixWriteStreamPam(fp, pix); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - - -/*--------------------------------------------------------------------* - * Static helpers * - *--------------------------------------------------------------------*/ -/*! - * \brief pnmReadNextAsciiValue() - * - * Return: 0 if OK, 1 on error or EOF. - * - * Notes: - * (1) This reads the next sample value in ASCII from the file. - */ -static l_int32 -pnmReadNextAsciiValue(FILE *fp, - l_int32 *pval) -{ -l_int32 c, ignore; - - PROCNAME("pnmReadNextAsciiValue"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0; - if (!fp) - return ERROR_INT("stream not open", procName, 1); - do { /* skip whitespace */ - if ((c = fgetc(fp)) == EOF) - return 1; - } while (c == ' ' || c == '\t' || c == '\n' || c == '\r'); - - fseek(fp, -1L, SEEK_CUR); /* back up one byte */ - ignore = fscanf(fp, "%d", pval); - return 0; -} - - -/*! - * \brief pnmReadNextNumber() - * - * \param[in] fp file stream - * \param[out] pval value as an integer - * \return 0 if OK, 1 on error or EOF. - * - *
- * Notes:
- *      (1) This reads the next set of numeric chars, returning
- *          the value and swallowing the trailing whitespace character.
- *          This is needed to read the maxval in the header, which
- *          precedes the binary data.
- * 
- */ -static l_int32 -pnmReadNextNumber(FILE *fp, - l_int32 *pval) -{ -char buf[8]; -l_int32 i, c, foundws; - - PROCNAME("pnmReadNextNumber"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0; - if (!fp) - return ERROR_INT("stream not open", procName, 1); - - /* The ASCII characters for the number are followed by exactly - * one whitespace character. */ - foundws = FALSE; - for (i = 0; i < 8; i++) - buf[i] = '\0'; - for (i = 0; i < 8; i++) { - if ((c = fgetc(fp)) == EOF) - return ERROR_INT("end of file reached", procName, 1); - if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { - foundws = TRUE; - buf[i] = '\n'; - break; - } - if (!isdigit(c)) - return ERROR_INT("char read is not a digit", procName, 1); - buf[i] = c; - } - if (!foundws) - return ERROR_INT("no whitespace found", procName, 1); - if (sscanf(buf, "%d", pval) != 1) - return ERROR_INT("invalid read", procName, 1); - return 0; -} - -/*! - * \brief pnmReadNextString() - * - * \param[in] fp file stream - * \param[out] buff pointer to the string buffer - * \param[in] size max. number of charactes in buffer - * \return 0 if OK, 1 on error or EOF. - * - *
- * Notes:
- *      (1) This reads the next set of alphanumeric chars,
- *          returning the string and swallowing the trailing
- *          whitespace characters.
- *          This is needed to read header lines, which precede
- *          the P7 format binary data.
- * 
- */ -static l_int32 -pnmReadNextString(FILE *fp, - char *buff, - l_int32 size) -{ -l_int32 i, c; - - PROCNAME("pnmReadNextString"); - - if (!buff) - return ERROR_INT("buff not defined", procName, 1); - *buff = '\0'; - if (!fp) - return ERROR_INT("stream not open", procName, 1); - if (size <= 0) - return ERROR_INT("size is too small", procName, 1); - - do { /* skip whitespace */ - if ((c = fgetc(fp)) == EOF) - return ERROR_INT("end of file reached", procName, 1); - } while (c == ' ' || c == '\t' || c == '\n' || c == '\r'); - - /* Comment lines are allowed to appear - * anywhere in the header lines */ - if (c == '#') { - do { /* each line starting with '#' */ - do { /* this entire line */ - if ((c = fgetc(fp)) == EOF) - return ERROR_INT("end of file reached", procName, 1); - } while (c != '\n'); - if ((c = fgetc(fp)) == EOF) - return ERROR_INT("end of file reached", procName, 1); - } while (c == '#'); - } - - /* The next string ends when there is - * a whitespace character following. */ - for (i = 0; i < size - 1; i++) { - if (c == ' ' || c == '\t' || c == '\n' || c == '\r') - break; - buff[i] = c; - if ((c = fgetc(fp)) == EOF) - return ERROR_INT("end of file reached", procName, 1); - } - buff[i] = '\0'; - - /* Back up one byte */ - fseek(fp, -1L, SEEK_CUR); - if (i >= size - 1) - return ERROR_INT("buff size too small", procName, 1); - - /* Skip over trailing spaces and tabs */ - for (;;) { - if ((c = fgetc(fp)) == EOF) - return ERROR_INT("end of file reached", procName, 1); - if (c != ' ' && c != '\t') - break; - } - - /* Back up one byte */ - fseek(fp, -1L, SEEK_CUR); - return 0; -} - - -/*! - * \brief pnmSkipCommentLines() - * - * Return: 0 if OK, 1 on error or EOF - * - * Notes: - * (1) Comment lines begin with '#' - * (2) Usage: caller should check return value for EOF - */ -static l_int32 -pnmSkipCommentLines(FILE *fp) -{ -l_int32 c; - - PROCNAME("pnmSkipCommentLines"); - - if (!fp) - return ERROR_INT("stream not open", procName, 1); - if ((c = fgetc(fp)) == EOF) - return 1; - if (c == '#') { - do { /* each line starting with '#' */ - do { /* this entire line */ - if ((c = fgetc(fp)) == EOF) - return 1; - } while (c != '\n'); - if ((c = fgetc(fp)) == EOF) - return 1; - } while (c == '#'); - } - - /* Back up one byte */ - fseek(fp, -1L, SEEK_CUR); - return 0; -} - -/* --------------------------------------------*/ -#endif /* USE_PNMIO */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pnmiostub.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pnmiostub.c deleted file mode 100644 index 2238755c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/pnmiostub.c +++ /dev/null @@ -1,120 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file pnmiostub.c - *
- *
- *     Stubs for pnmio.c functions
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* --------------------------------------------*/ -#if !USE_PNMIO /* defined in environ.h */ -/* --------------------------------------------*/ - -PIX * pixReadStreamPnm(FILE *fp) -{ - return (PIX * )ERROR_PTR("function not present", "pixReadStreamPnm", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok readHeaderPnm(const char *filename, l_int32 *pw, l_int32 *ph, - l_int32 *pd, l_int32 *ptype, l_int32 *pbps, - l_int32 *pspp) -{ - return ERROR_INT("function not present", "readHeaderPnm", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok freadHeaderPnm(FILE *fp, l_int32 *pw, l_int32 *ph, l_int32 *pd, - l_int32 *ptype, l_int32 *pbps, l_int32 *pspp) -{ - return ERROR_INT("function not present", "freadHeaderPnm", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteStreamPnm(FILE *fp, PIX *pix) -{ - return ERROR_INT("function not present", "pixWriteStreamPnm", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteStreamAsciiPnm(FILE *fp, PIX *pix) -{ - return ERROR_INT("function not present", "pixWriteStreamAsciiPnm", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteStreamPam(FILE *fp, PIX *pix) -{ - return ERROR_INT("function not present", "pixWriteStreamPam", 1); -} - -/* ----------------------------------------------------------------------*/ - -PIX * pixReadMemPnm(const l_uint8 *cdata, size_t size) -{ - return (PIX * )ERROR_PTR("function not present", "pixReadMemPnm", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok readHeaderMemPnm(const l_uint8 *cdata, size_t size, l_int32 *pw, - l_int32 *ph, l_int32 *pd, l_int32 *ptype, - l_int32 *pbps, l_int32 *pspp) -{ - return ERROR_INT("function not present", "readHeaderMemPnm", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteMemPnm(l_uint8 **pdata, size_t *psize, PIX *pix) -{ - return ERROR_INT("function not present", "pixWriteMemPnm", 1); -} -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteMemPam(l_uint8 **pdata, size_t *psize, PIX *pix) -{ - return ERROR_INT("function not present", "pixWriteMemPam", 1); -} - - -/* --------------------------------------------*/ -#endif /* !USE_PNMIO */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/projective.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/projective.c deleted file mode 100644 index 527b80c0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/projective.c +++ /dev/null @@ -1,926 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file projective.c - *
- *
- *      Projective (4 pt) image transformation using a sampled
- *      (to nearest integer) transform on each dest point
- *           PIX      *pixProjectiveSampledPta()
- *           PIX      *pixProjectiveSampled()
- *
- *      Projective (4 pt) image transformation using interpolation
- *      (or area mapping) for anti-aliasing images that are
- *      2, 4, or 8 bpp gray, or colormapped, or 32 bpp RGB
- *           PIX      *pixProjectivePta()
- *           PIX      *pixProjective()
- *           PIX      *pixProjectivePtaColor()
- *           PIX      *pixProjectiveColor()
- *           PIX      *pixProjectivePtaGray()
- *           PIX      *pixProjectiveGray()
- *
- *      Projective transform including alpha (blend) component
- *           PIX      *pixProjectivePtaWithAlpha()
- *
- *      Projective coordinate transformation
- *           l_int32   getProjectiveXformCoeffs()
- *           l_int32   projectiveXformSampledPt()
- *           l_int32   projectiveXformPt()
- *
- *      A projective transform can be specified as a specific functional
- *      mapping between 4 points in the source and 4 points in the dest.
- *      It preserves straight lines, but is less stable than a bilinear
- *      transform, because it contains a division by a quantity that
- *      can get arbitrarily small.)
- *
- *      We give both a projective coordinate transformation and
- *      two projective image transformations.
- *
- *      For the former, we ask for the coordinate value (x',y')
- *      in the transformed space for any point (x,y) in the original
- *      space.  The coefficients of the transformation are found by
- *      solving 8 simultaneous equations for the 8 coordinates of
- *      the 4 points in src and dest.  The transformation can then
- *      be used to compute the associated image transform, by
- *      computing, for each dest pixel, the relevant pixel(s) in
- *      the source.  This can be done either by taking the closest
- *      src pixel to each transformed dest pixel ("sampling") or
- *      by doing an interpolation and averaging over 4 source
- *      pixels with appropriate weightings ("interpolated").
- *
- *      A typical application would be to remove keystoning
- *      due to a projective transform in the imaging system.
- *
- *      The projective transform is given by specifying two equations:
- *
- *          x' = (ax + by + c) / (gx + hy + 1)
- *          y' = (dx + ey + f) / (gx + hy + 1)
- *
- *      where the eight coefficients have been computed from four
- *      sets of these equations, each for two corresponding data pts.
- *      In practice, once the coefficients are known, we use the
- *      equations "backwards": for each point (x,y) in the dest image,
- *      these two equations are used to compute the corresponding point
- *      (x',y') in the src.  That computed point in the src is then used
- *      to determine the corresponding dest pixel value in one of two ways:
- *
- *       ~ sampling: simply take the value of the src pixel in which this
- *                   point falls
- *       ~ interpolation: take appropriate linear combinations of the
- *                        four src pixels that this dest pixel would
- *                        overlap, with the coefficients proportional
- *                        to the amount of overlap
- *
- *      For small warp where there is little scale change, (e.g.,
- *      for rotation) area mapping is nearly equivalent to interpolation.
- *
- *      Typical relative timing of pointwise transforms (sampled = 1.0):
- *      8 bpp:   sampled        1.0
- *               interpolated   1.5
- *      32 bpp:  sampled        1.0
- *               interpolated   1.6
- *      Additionally, the computation time/pixel is nearly the same
- *      for 8 bpp and 32 bpp, for both sampled and interpolated.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -extern l_float32 AlphaMaskBorderVals[2]; - -/*------------------------------------------------------------n - * Sampled projective image transformation * - *-------------------------------------------------------------*/ -/*! - * \brief pixProjectiveSampledPta() - * - * \param[in] pixs all depths - * \param[in] ptad 4 pts of final coordinate space - * \param[in] ptas 4 pts of initial coordinate space - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Brings in either black or white pixels from the boundary.
- *      (2) Retains colormap, which you can do for a sampled transform..
- *      (3) No 3 of the 4 points may be collinear.
- *      (4) For 8 and 32 bpp pix, better quality is obtained by the
- *          somewhat slower pixProjectivePta().  See that
- *          function for relative timings between sampled and interpolated.
- * 
- */ -PIX * -pixProjectiveSampledPta(PIX *pixs, - PTA *ptad, - PTA *ptas, - l_int32 incolor) -{ -l_float32 *vc; -PIX *pixd; - - PROCNAME("pixProjectiveSampledPta"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - if (ptaGetCount(ptas) != 4) - return (PIX *)ERROR_PTR("ptas count not 4", procName, NULL); - if (ptaGetCount(ptad) != 4) - return (PIX *)ERROR_PTR("ptad count not 4", procName, NULL); - - /* Get backwards transform from dest to src, and apply it */ - getProjectiveXformCoeffs(ptad, ptas, &vc); - pixd = pixProjectiveSampled(pixs, vc, incolor); - LEPT_FREE(vc); - - return pixd; -} - - -/*! - * \brief pixProjectiveSampled() - * - * \param[in] pixs all depths - * \param[in] vc vector of 8 coefficients for projective transform - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Brings in either black or white pixels from the boundary.
- *      (2) Retains colormap, which you can do for a sampled transform..
- *      (3) For 8 or 32 bpp, much better quality is obtained by the
- *          somewhat slower pixProjective().  See that function
- *          for relative timings between sampled and interpolated.
- * 
- */ -PIX * -pixProjectiveSampled(PIX *pixs, - l_float32 *vc, - l_int32 incolor) -{ -l_int32 i, j, w, h, d, x, y, wpls, wpld, color, cmapindex; -l_uint32 val; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; -PIXCMAP *cmap; - - PROCNAME("pixProjectiveSampled"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!vc) - return (PIX *)ERROR_PTR("vc not defined", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 32) - return (PIX *)ERROR_PTR("depth not 1, 2, 4, 8 or 16", procName, NULL); - - /* Init all dest pixels to color to be brought in from outside */ - pixd = pixCreateTemplate(pixs); - if ((cmap = pixGetColormap(pixs)) != NULL) { - if (incolor == L_BRING_IN_WHITE) - color = 1; - else - color = 0; - pixcmapAddBlackOrWhite(cmap, color, &cmapindex); - pixSetAllArbitrary(pixd, cmapindex); - } else { - if ((d == 1 && incolor == L_BRING_IN_WHITE) || - (d > 1 && incolor == L_BRING_IN_BLACK)) { - pixClearAll(pixd); - } else { - pixSetAll(pixd); - } - } - - /* Scan over the dest pixels */ - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - projectiveXformSampledPt(vc, j, i, &x, &y); - if (x < 0 || y < 0 || x >=w || y >= h) - continue; - lines = datas + y * wpls; - if (d == 1) { - val = GET_DATA_BIT(lines, x); - SET_DATA_BIT_VAL(lined, j, val); - } else if (d == 8) { - val = GET_DATA_BYTE(lines, x); - SET_DATA_BYTE(lined, j, val); - } else if (d == 32) { - lined[j] = lines[x]; - } else if (d == 2) { - val = GET_DATA_DIBIT(lines, x); - SET_DATA_DIBIT(lined, j, val); - } else if (d == 4) { - val = GET_DATA_QBIT(lines, x); - SET_DATA_QBIT(lined, j, val); - } - } - } - - return pixd; -} - - -/*---------------------------------------------------------------------* - * Interpolated projective image transformation * - *---------------------------------------------------------------------*/ -/*! - * \brief pixProjectivePta() - * - * \param[in] pixs all depths; colormap ok - * \param[in] ptad 4 pts of final coordinate space - * \param[in] ptas 4 pts of initial coordinate space - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Brings in either black or white pixels from the boundary
- *      (2) Removes any existing colormap, if necessary, before transforming
- * 
- */ -PIX * -pixProjectivePta(PIX *pixs, - PTA *ptad, - PTA *ptas, - l_int32 incolor) -{ -l_int32 d; -l_uint32 colorval; -PIX *pixt1, *pixt2, *pixd; - - PROCNAME("pixProjectivePta"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - if (ptaGetCount(ptas) != 4) - return (PIX *)ERROR_PTR("ptas count not 4", procName, NULL); - if (ptaGetCount(ptad) != 4) - return (PIX *)ERROR_PTR("ptad count not 4", procName, NULL); - - if (pixGetDepth(pixs) == 1) - return pixProjectiveSampledPta(pixs, ptad, ptas, incolor); - - /* Remove cmap if it exists, and unpack to 8 bpp if necessary */ - pixt1 = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - d = pixGetDepth(pixt1); - if (d < 8) - pixt2 = pixConvertTo8(pixt1, FALSE); - else - pixt2 = pixClone(pixt1); - d = pixGetDepth(pixt2); - - /* Compute actual color to bring in from edges */ - colorval = 0; - if (incolor == L_BRING_IN_WHITE) { - if (d == 8) - colorval = 255; - else /* d == 32 */ - colorval = 0xffffff00; - } - - if (d == 8) - pixd = pixProjectivePtaGray(pixt2, ptad, ptas, colorval); - else /* d == 32 */ - pixd = pixProjectivePtaColor(pixt2, ptad, ptas, colorval); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - return pixd; -} - - -/*! - * \brief pixProjective() - * - * \param[in] pixs all depths; colormap ok - * \param[in] vc vector of 8 coefficients for projective transform - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Brings in either black or white pixels from the boundary
- *      (2) Removes any existing colormap, if necessary, before transforming
- * 
- */ -PIX * -pixProjective(PIX *pixs, - l_float32 *vc, - l_int32 incolor) -{ -l_int32 d; -l_uint32 colorval; -PIX *pixt1, *pixt2, *pixd; - - PROCNAME("pixProjective"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!vc) - return (PIX *)ERROR_PTR("vc not defined", procName, NULL); - - if (pixGetDepth(pixs) == 1) - return pixProjectiveSampled(pixs, vc, incolor); - - /* Remove cmap if it exists, and unpack to 8 bpp if necessary */ - pixt1 = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - d = pixGetDepth(pixt1); - if (d < 8) - pixt2 = pixConvertTo8(pixt1, FALSE); - else - pixt2 = pixClone(pixt1); - d = pixGetDepth(pixt2); - - /* Compute actual color to bring in from edges */ - colorval = 0; - if (incolor == L_BRING_IN_WHITE) { - if (d == 8) - colorval = 255; - else /* d == 32 */ - colorval = 0xffffff00; - } - - if (d == 8) - pixd = pixProjectiveGray(pixt2, vc, colorval); - else /* d == 32 */ - pixd = pixProjectiveColor(pixt2, vc, colorval); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - return pixd; -} - - -/*! - * \brief pixProjectivePtaColor() - * - * \param[in] pixs 32 bpp - * \param[in] ptad 4 pts of final coordinate space - * \param[in] ptas 4 pts of initial coordinate space - * \param[in] colorval e.g., 0 to bring in BLACK, 0xffffff00 for WHITE - * \return pixd, or NULL on error - */ -PIX * -pixProjectivePtaColor(PIX *pixs, - PTA *ptad, - PTA *ptas, - l_uint32 colorval) -{ -l_float32 *vc; -PIX *pixd; - - PROCNAME("pixProjectivePtaColor"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs must be 32 bpp", procName, NULL); - if (ptaGetCount(ptas) != 4) - return (PIX *)ERROR_PTR("ptas count not 4", procName, NULL); - if (ptaGetCount(ptad) != 4) - return (PIX *)ERROR_PTR("ptad count not 4", procName, NULL); - - /* Get backwards transform from dest to src, and apply it */ - getProjectiveXformCoeffs(ptad, ptas, &vc); - pixd = pixProjectiveColor(pixs, vc, colorval); - LEPT_FREE(vc); - - return pixd; -} - - -/*! - * \brief pixProjectiveColor() - * - * \param[in] pixs 32 bpp - * \param[in] vc vector of 8 coefficients for projective transform - * \param[in] colorval e.g., 0 to bring in BLACK, 0xffffff00 for WHITE - * \return pixd, or NULL on error - */ -PIX * -pixProjectiveColor(PIX *pixs, - l_float32 *vc, - l_uint32 colorval) -{ -l_int32 i, j, w, h, d, wpls, wpld; -l_uint32 val; -l_uint32 *datas, *datad, *lined; -l_float32 x, y; -PIX *pix1, *pix2, *pixd; - - PROCNAME("pixProjectiveColor"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 32) - return (PIX *)ERROR_PTR("pixs must be 32 bpp", procName, NULL); - if (!vc) - return (PIX *)ERROR_PTR("vc not defined", procName, NULL); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreateTemplate(pixs); - pixSetAllArbitrary(pixd, colorval); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* Iterate over destination pixels */ - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - /* Compute float src pixel location corresponding to (i,j) */ - projectiveXformPt(vc, j, i, &x, &y); - linearInterpolatePixelColor(datas, wpls, w, h, x, y, colorval, - &val); - *(lined + j) = val; - } - } - - /* If rgba, transform the pixs alpha channel and insert in pixd */ - if (pixGetSpp(pixs) == 4) { - pix1 = pixGetRGBComponent(pixs, L_ALPHA_CHANNEL); - pix2 = pixProjectiveGray(pix1, vc, 255); /* bring in opaque */ - pixSetRGBComponent(pixd, pix2, L_ALPHA_CHANNEL); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - return pixd; -} - - -/*! - * \brief pixProjectivePtaGray() - * - * \param[in] pixs 8 bpp - * \param[in] ptad 4 pts of final coordinate space - * \param[in] ptas 4 pts of initial coordinate space - * \param[in] grayval 0 to bring in BLACK, 255 for WHITE - * \return pixd, or NULL on error - */ -PIX * -pixProjectivePtaGray(PIX *pixs, - PTA *ptad, - PTA *ptas, - l_uint8 grayval) -{ -l_float32 *vc; -PIX *pixd; - - PROCNAME("pixProjectivePtaGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs must be 8 bpp", procName, NULL); - if (ptaGetCount(ptas) != 4) - return (PIX *)ERROR_PTR("ptas count not 4", procName, NULL); - if (ptaGetCount(ptad) != 4) - return (PIX *)ERROR_PTR("ptad count not 4", procName, NULL); - - /* Get backwards transform from dest to src, and apply it */ - getProjectiveXformCoeffs(ptad, ptas, &vc); - pixd = pixProjectiveGray(pixs, vc, grayval); - LEPT_FREE(vc); - - return pixd; -} - - - -/*! - * \brief pixProjectiveGray() - * - * \param[in] pixs 8 bpp - * \param[in] vc vector of 8 coefficients for projective transform - * \param[in] grayval 0 to bring in BLACK, 255 for WHITE - * \return pixd, or NULL on error - */ -PIX * -pixProjectiveGray(PIX *pixs, - l_float32 *vc, - l_uint8 grayval) -{ -l_int32 i, j, w, h, wpls, wpld, val; -l_uint32 *datas, *datad, *lined; -l_float32 x, y; -PIX *pixd; - - PROCNAME("pixProjectiveGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs must be 8 bpp", procName, NULL); - if (!vc) - return (PIX *)ERROR_PTR("vc not defined", procName, NULL); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreateTemplate(pixs); - pixSetAllArbitrary(pixd, grayval); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* Iterate over destination pixels */ - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - /* Compute float src pixel location corresponding to (i,j) */ - projectiveXformPt(vc, j, i, &x, &y); - linearInterpolatePixelGray(datas, wpls, w, h, x, y, grayval, &val); - SET_DATA_BYTE(lined, j, val); - } - } - - return pixd; -} - - -/*---------------------------------------------------------------------------* - * Projective transform including alpha (blend) component * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixProjectivePtaWithAlpha() - * - * \param[in] pixs 32 bpp rgb - * \param[in] ptad 4 pts of final coordinate space - * \param[in] ptas 4 pts of initial coordinate space - * \param[in] pixg [optional] 8 bpp, for alpha channel, can be null - * \param[in] fract between 0.0 and 1.0, with 0.0 fully transparent - * and 1.0 fully opaque - * \param[in] border of pixels added to capture transformed source pixels - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The alpha channel is transformed separately from pixs,
- *          and aligns with it, being fully transparent outside the
- *          boundary of the transformed pixs.  For pixels that are fully
- *          transparent, a blending function like pixBlendWithGrayMask()
- *          will give zero weight to corresponding pixels in pixs.
- *      (2) If pixg is NULL, it is generated as an alpha layer that is
- *          partially opaque, using %fract.  Otherwise, it is cropped
- *          to pixs if required and %fract is ignored.  The alpha channel
- *          in pixs is never used.
- *      (3) Colormaps are removed.
- *      (4) When pixs is transformed, it doesn't matter what color is brought
- *          in because the alpha channel will be transparent (0) there.
- *      (5) To avoid losing source pixels in the destination, it may be
- *          necessary to add a border to the source pix before doing
- *          the projective transformation.  This can be any non-negative
- *          number.
- *      (6) The input %ptad and %ptas are in a coordinate space before
- *          the border is added.  Internally, we compensate for this
- *          before doing the projective transform on the image after
- *          the border is added.
- *      (7) The default setting for the border values in the alpha channel
- *          is 0 (transparent) for the outermost ring of pixels and
- *          (0.5 * fract * 255) for the second ring.  When blended over
- *          a second image, this
- *          (a) shrinks the visible image to make a clean overlap edge
- *              with an image below, and
- *          (b) softens the edges by weakening the aliasing there.
- *          Use l_setAlphaMaskBorder() to change these values.
- * 
- */ -PIX * -pixProjectivePtaWithAlpha(PIX *pixs, - PTA *ptad, - PTA *ptas, - PIX *pixg, - l_float32 fract, - l_int32 border) -{ -l_int32 ws, hs, d; -PIX *pixd, *pixb1, *pixb2, *pixg2, *pixga; -PTA *ptad2, *ptas2; - - PROCNAME("pixProjectivePtaWithAlpha"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &ws, &hs, &d); - if (d != 32 && pixGetColormap(pixs) == NULL) - return (PIX *)ERROR_PTR("pixs not cmapped or 32 bpp", procName, NULL); - if (pixg && pixGetDepth(pixg) != 8) { - L_WARNING("pixg not 8 bpp; using 'fract' transparent alpha\n", - procName); - pixg = NULL; - } - if (!pixg && (fract < 0.0 || fract > 1.0)) { - L_WARNING("invalid fract; using 1.0 (fully transparent)\n", procName); - fract = 1.0; - } - if (!pixg && fract == 0.0) - L_WARNING("fully opaque alpha; image will not be blended\n", procName); - if (!ptad) - return (PIX *)ERROR_PTR("ptad not defined", procName, NULL); - if (!ptas) - return (PIX *)ERROR_PTR("ptas not defined", procName, NULL); - - /* Add border; the color doesn't matter */ - pixb1 = pixAddBorder(pixs, border, 0); - - /* Transform the ptr arrays to work on the bordered image */ - ptad2 = ptaTransform(ptad, border, border, 1.0, 1.0); - ptas2 = ptaTransform(ptas, border, border, 1.0, 1.0); - - /* Do separate projective transform of rgb channels of pixs - * and of pixg */ - pixd = pixProjectivePtaColor(pixb1, ptad2, ptas2, 0); - if (!pixg) { - pixg2 = pixCreate(ws, hs, 8); - if (fract == 1.0) - pixSetAll(pixg2); - else - pixSetAllArbitrary(pixg2, (l_int32)(255.0 * fract)); - } else { - pixg2 = pixResizeToMatch(pixg, NULL, ws, hs); - } - if (ws > 10 && hs > 10) { /* see note 7 */ - pixSetBorderRingVal(pixg2, 1, - (l_int32)(255.0 * fract * AlphaMaskBorderVals[0])); - pixSetBorderRingVal(pixg2, 2, - (l_int32)(255.0 * fract * AlphaMaskBorderVals[1])); - - } - pixb2 = pixAddBorder(pixg2, border, 0); /* must be black border */ - pixga = pixProjectivePtaGray(pixb2, ptad2, ptas2, 0); - pixSetRGBComponent(pixd, pixga, L_ALPHA_CHANNEL); - pixSetSpp(pixd, 4); - - pixDestroy(&pixg2); - pixDestroy(&pixb1); - pixDestroy(&pixb2); - pixDestroy(&pixga); - ptaDestroy(&ptad2); - ptaDestroy(&ptas2); - return pixd; -} - - -/*-------------------------------------------------------------* - * Projective coordinate transformation * - *-------------------------------------------------------------*/ -/*! - * \brief getProjectiveXformCoeffs() - * - * \param[in] ptas source 4 points; unprimed - * \param[in] ptad transformed 4 points; primed - * \param[out] pvc vector of coefficients of transform - * \return 0 if OK; 1 on error - * - * We have a set of 8 equations, describing the projective - * transformation that takes 4 points ptas into 4 other - * points ptad. These equations are: - * - * x1' = c[0]*x1 + c[1]*y1 + c[2]) / (c[6]*x1 + c[7]*y1 + 1 - * y1' = c[3]*x1 + c[4]*y1 + c[5]) / (c[6]*x1 + c[7]*y1 + 1 - * x2' = c[0]*x2 + c[1]*y2 + c[2]) / (c[6]*x2 + c[7]*y2 + 1 - * y2' = c[3]*x2 + c[4]*y2 + c[5]) / (c[6]*x2 + c[7]*y2 + 1 - * x3' = c[0]*x3 + c[1]*y3 + c[2]) / (c[6]*x3 + c[7]*y3 + 1 - * y3' = c[3]*x3 + c[4]*y3 + c[5]) / (c[6]*x3 + c[7]*y3 + 1 - * x4' = c[0]*x4 + c[1]*y4 + c[2]) / (c[6]*x4 + c[7]*y4 + 1 - * y4' = c[3]*x4 + c[4]*y4 + c[5]) / (c[6]*x4 + c[7]*y4 + 1 - * - * Multiplying both sides of each eqn by the denominator, we get - * - * AC = B - * - * where B and C are column vectors - * - * B = [ x1' y1' x2' y2' x3' y3' x4' y4' ] - * C = [ c[0] c[1] c[2] c[3] c[4] c[5] c[6] c[7] ] - * - * and A is the 8x8 matrix - * - * x1 y1 1 0 0 0 -x1*x1' -y1*x1' - * 0 0 0 x1 y1 1 -x1*y1' -y1*y1' - * x2 y2 1 0 0 0 -x2*x2' -y2*x2' - * 0 0 0 x2 y2 1 -x2*y2' -y2*y2' - * x3 y3 1 0 0 0 -x3*x3' -y3*x3' - * 0 0 0 x3 y3 1 -x3*y3' -y3*y3' - * x4 y4 1 0 0 0 -x4*x4' -y4*x4' - * 0 0 0 x4 y4 1 -x4*y4' -y4*y4' - * - * These eight equations are solved here for the coefficients C. - * - * These eight coefficients can then be used to find the mapping - * x,y) --> (x',y': - * - * x' = c[0]x + c[1]y + c[2]) / (c[6]x + c[7]y + 1 - * y' = c[3]x + c[4]y + c[5]) / (c[6]x + c[7]y + 1 - * - * that is implemented in projectiveXformSampled and - * projectiveXFormInterpolated. - */ -l_ok -getProjectiveXformCoeffs(PTA *ptas, - PTA *ptad, - l_float32 **pvc) -{ -l_int32 i; -l_float32 x1, y1, x2, y2, x3, y3, x4, y4; -l_float32 *b; /* rhs vector of primed coords X'; coeffs returned in *pvc */ -l_float32 *a[8]; /* 8x8 matrix A */ - - PROCNAME("getProjectiveXformCoeffs"); - - if (!ptas) - return ERROR_INT("ptas not defined", procName, 1); - if (!ptad) - return ERROR_INT("ptad not defined", procName, 1); - if (!pvc) - return ERROR_INT("&vc not defined", procName, 1); - - b = (l_float32 *)LEPT_CALLOC(8, sizeof(l_float32)); - *pvc = b; - ptaGetPt(ptas, 0, &x1, &y1); - ptaGetPt(ptas, 1, &x2, &y2); - ptaGetPt(ptas, 2, &x3, &y3); - ptaGetPt(ptas, 3, &x4, &y4); - ptaGetPt(ptad, 0, &b[0], &b[1]); - ptaGetPt(ptad, 1, &b[2], &b[3]); - ptaGetPt(ptad, 2, &b[4], &b[5]); - ptaGetPt(ptad, 3, &b[6], &b[7]); - - for (i = 0; i < 8; i++) - a[i] = (l_float32 *)LEPT_CALLOC(8, sizeof(l_float32)); - a[0][0] = x1; - a[0][1] = y1; - a[0][2] = 1.; - a[0][6] = -x1 * b[0]; - a[0][7] = -y1 * b[0]; - a[1][3] = x1; - a[1][4] = y1; - a[1][5] = 1; - a[1][6] = -x1 * b[1]; - a[1][7] = -y1 * b[1]; - a[2][0] = x2; - a[2][1] = y2; - a[2][2] = 1.; - a[2][6] = -x2 * b[2]; - a[2][7] = -y2 * b[2]; - a[3][3] = x2; - a[3][4] = y2; - a[3][5] = 1; - a[3][6] = -x2 * b[3]; - a[3][7] = -y2 * b[3]; - a[4][0] = x3; - a[4][1] = y3; - a[4][2] = 1.; - a[4][6] = -x3 * b[4]; - a[4][7] = -y3 * b[4]; - a[5][3] = x3; - a[5][4] = y3; - a[5][5] = 1; - a[5][6] = -x3 * b[5]; - a[5][7] = -y3 * b[5]; - a[6][0] = x4; - a[6][1] = y4; - a[6][2] = 1.; - a[6][6] = -x4 * b[6]; - a[6][7] = -y4 * b[6]; - a[7][3] = x4; - a[7][4] = y4; - a[7][5] = 1; - a[7][6] = -x4 * b[7]; - a[7][7] = -y4 * b[7]; - - gaussjordan(a, b, 8); - - for (i = 0; i < 8; i++) - LEPT_FREE(a[i]); - - return 0; -} - - -/*! - * \brief projectiveXformSampledPt() - * - * \param[in] vc vector of 8 coefficients - * \param[in] x, y initial point - * \param[out] pxp, pyp transformed point - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This finds the nearest pixel coordinates of the transformed point.
- *      (2) It does not check ptrs for returned data!
- * 
- */ -l_ok -projectiveXformSampledPt(l_float32 *vc, - l_int32 x, - l_int32 y, - l_int32 *pxp, - l_int32 *pyp) -{ -l_float32 factor; - - PROCNAME("projectiveXformSampledPt"); - - if (!vc) - return ERROR_INT("vc not defined", procName, 1); - - factor = 1. / (vc[6] * x + vc[7] * y + 1.); - *pxp = (l_int32)(factor * (vc[0] * x + vc[1] * y + vc[2]) + 0.5); - *pyp = (l_int32)(factor * (vc[3] * x + vc[4] * y + vc[5]) + 0.5); - return 0; -} - - -/*! - * \brief projectiveXformPt() - * - * \param[in] vc vector of 8 coefficients - * \param[in] x, y initial point - * \param[out] pxp, pyp transformed point - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This computes the floating point location of the transformed point.
- *      (2) It does not check ptrs for returned data!
- * 
- */ -l_ok -projectiveXformPt(l_float32 *vc, - l_int32 x, - l_int32 y, - l_float32 *pxp, - l_float32 *pyp) -{ -l_float32 factor; - - PROCNAME("projectiveXformPt"); - - if (!vc) - return ERROR_INT("vc not defined", procName, 1); - - factor = 1. / (vc[6] * x + vc[7] * y + 1.); - *pxp = factor * (vc[0] * x + vc[1] * y + vc[2]); - *pyp = factor * (vc[3] * x + vc[4] * y + vc[5]); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/psio1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/psio1.c deleted file mode 100644 index bf825a97..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/psio1.c +++ /dev/null @@ -1,1077 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file psio1.c - *
- *
- *    |=============================================================|
- *    |                         Important note                      |
- *    |=============================================================|
- *    | Some of these functions require I/O libraries such as       |
- *    | libtiff, libjpeg, and libz.  If you do not have these       |
- *    | libraries, some calls will fail.                            |
- *    |                                                             |
- *    | You can manually deactivate all PostScript writing by       |
- *    | setting this in environ.h:                                  |
- *    | \code                                                       |
- *    |     #define  USE_PSIO     0                                 |
- *    | \endcode                                                    |
- *    | in environ.h.  This will link psio1stub.c                   |
- *    |=============================================================|
- *
- *     This is a PostScript "device driver" for wrapping images
- *     in PostScript.  The images can be rendered by a PostScript
- *     interpreter for viewing, using evince or gv.  They can also be
- *     rasterized for printing, using gs or an embedded interpreter
- *     in a PostScript printer.  And they can be converted to a pdf
- *     using gs (ps2pdf).
- *
- *     Convert specified files to PS
- *          l_int32          convertFilesToPS()
- *          l_int32          sarrayConvertFilesToPS()
- *          l_int32          convertFilesFittedToPS()
- *          l_int32          sarrayConvertFilesFittedToPS()
- *          l_int32          writeImageCompressedToPSFile()
- *
- *     Convert mixed text/image files to PS
- *          l_int32          convertSegmentedPagesToPS()
- *          l_int32          pixWriteSegmentedPageToPS()
- *          l_int32          pixWriteMixedToPS()
- *
- *     Convert any image file to PS for embedding
- *          l_int32          convertToPSEmbed()
- *
- *     Write all images in a pixa out to PS
- *          l_int32          pixaWriteCompressedToPS()
- *          l_int32          pixWriteCompressedToPS()
- *
- *  These PostScript converters are used in three different ways.
- *
- *  (1) For embedding a PS file in a program like TeX.
- *      convertToPSEmbed() handles this for levels 1, 2 and 3 output,
- *      and prog/converttops wraps this in an executable.
- *      converttops is a generalization of Thomas Merz's jpeg2ps wrapper,
- *      in that it works for all types (formats, depth, colormap)
- *      of input images and gives PS output in one of these formats
- *        * level 1 (uncompressed)
- *        * level 2 (compressed ccittg4 or dct)
- *        * level 3 (compressed flate)
- *
- *  (2) For composing a set of pages with any number of images
- *      painted on them, in either level 2 or level 3 formats.
- *
- *  (3) For printing a page image or a set of page images, at a
- *      resolution that optimally fills the page, using
- *      convertFilesFittedToPS().
- *
- *  The top-level calls of utilities in category 2, which can compose
- *  multiple images on a page, and which generate a PostScript file for
- *  printing or display (e.g., conversion to pdf), are:
- *      convertFilesToPS()
- *      convertFilesFittedToPS()
- *      convertSegmentedPagesToPS()
- *
- *  All images are output with page numbers.  Bounding box hints are
- *  more subtle.  They must be included for embeding images in
- *  TeX, for example, and the low-level writers include bounding
- *  box hints by default.  However, these hints should not be included for
- *  multi-page PostScript that is composed of a sequence of images;
- *  consequently, they are not written when calling higher level
- *  functions such as convertFilesToPS(), convertFilesFittedToPS()
- *  and convertSegmentedPagesToPS().  The function l_psWriteBoundingBox()
- *  sets a flag to give low-level control over this.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/* --------------------------------------------*/ -#if USE_PSIO /* defined in environ.h */ - /* --------------------------------------------*/ - -/*-------------------------------------------------------------* - * Convert files in a directory to PS * - *-------------------------------------------------------------*/ -/* - * \brief convertFilesToPS() - * - * \param[in] dirin input directory - * \param[in] substr [optional] substring filter on filenames; can be NULL - * \param[in] res typ. 300 or 600 ppi - * \param[in] fileout output ps file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates a PS file for all image files in a specified
- *          directory that contain the substr pattern to be matched.
- *      (2) Each image is written to a separate page in the output PS file.
- *      (3) All images are written compressed:
- *              * if tiffg4  -->  use ccittg4
- *              * if jpeg    -->  use dct
- *              * all others -->  use flate
- *          If the image is jpeg or tiffg4, we use the existing compressed
- *          strings for the encoding; otherwise, we read the image into
- *          a pix and flate-encode the pieces.
- *      (4) The resolution is often confusing.  It is interpreted
- *          as the resolution of the output display device:  "If the
- *          input image were digitized at 300 ppi, what would it
- *          look like when displayed at res ppi."  So, for example,
- *          if res = 100 ppi, then the display pixels are 3x larger
- *          than the 300 ppi pixels, and the image will be rendered
- *          3x larger.
- *      (5) The size of the PostScript file is independent of the resolution,
- *          because the entire file is encoded.  The res parameter just
- *          tells the PS decomposer how to render the page.  Therefore,
- *          for minimum file size without loss of visual information,
- *          if the output res is less than 300, you should downscale
- *          the image to the output resolution before wrapping in PS.
- *      (6) The "canvas" on which the image is rendered, at the given
- *          output resolution, is a standard page size (8.5 x 11 in).
- * 
- */ -l_ok -convertFilesToPS(const char *dirin, - const char *substr, - l_int32 res, - const char *fileout) -{ -SARRAY *sa; - - PROCNAME("convertFilesToPS"); - - if (!dirin) - return ERROR_INT("dirin not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - if (res <= 0) { - L_INFO("setting res to 300 ppi\n", procName); - res = 300; - } - if (res < 10 || res > 4000) - L_WARNING("res is typically in the range 300-600 ppi\n", procName); - - /* Get all filtered and sorted full pathnames. */ - sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0); - - /* Generate the PS file. Don't use bounding boxes. */ - l_psWriteBoundingBox(FALSE); - sarrayConvertFilesToPS(sa, res, fileout); - l_psWriteBoundingBox(TRUE); - sarrayDestroy(&sa); - return 0; -} - - -/* - - * \brief sarrayConvertFilesToPS() - * - * \param[in] sarray of full path names - * \param[in] res typ. 300 or 600 ppi - * \param[in] fileout output ps file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *     (1) See convertFilesToPS()
- * 
- */ -l_ok -sarrayConvertFilesToPS(SARRAY *sa, - l_int32 res, - const char *fileout) -{ -char *fname; -l_int32 i, nfiles, index, ret, format; - - PROCNAME("sarrayConvertFilesToPS"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - if (res <= 0) { - L_INFO("setting res to 300 ppi\n", procName); - res = 300; - } - if (res < 10 || res > 4000) - L_WARNING("res is typically in the range 300-600 ppi\n", procName); - - nfiles = sarrayGetCount(sa); - for (i = 0, index = 0; i < nfiles; i++) { - fname = sarrayGetString(sa, i, L_NOCOPY); - ret = pixReadHeader(fname, &format, NULL, NULL, NULL, NULL, NULL); - if (ret) continue; - if (format == IFF_UNKNOWN) - continue; - - writeImageCompressedToPSFile(fname, fileout, res, &index); - } - - return 0; -} - - -/* - * \brief convertFilesFittedToPS() - * - * \param[in] dirin input directory - * \param[in] substr [optional] substring filter on filenames; can be NULL) - * \param[in] xpts desired size in printer points; use 0 for default - * \param[in] ypts desired size in printer points; use 0 for default - * \param[in] fileout output ps file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates a PS file for all files in a specified directory
- *          that contain the substr pattern to be matched.
- *      (2) Each image is written to a separate page in the output PS file.
- *      (3) All images are written compressed:
- *              * if tiffg4  -->  use ccittg4
- *              * if jpeg    -->  use dct
- *              * all others -->  use flate
- *          If the image is jpeg or tiffg4, we use the existing compressed
- *          strings for the encoding; otherwise, we read the image into
- *          a pix and flate-encode the pieces.
- *      (4) The resolution is internally determined such that the images
- *          are rendered, in at least one direction, at 100% of the given
- *          size in printer points.  Use 0.0 for xpts or ypts to get
- *          the default value, which is 612.0 or 792.0, rsp.
- *      (5) The size of the PostScript file is independent of the resolution,
- *          because the entire file is encoded.  The %xpts and %ypts
- *          parameter tells the PS decomposer how to render the page.
- * 
- */ -l_ok -convertFilesFittedToPS(const char *dirin, - const char *substr, - l_float32 xpts, - l_float32 ypts, - const char *fileout) -{ -SARRAY *sa; - - PROCNAME("convertFilesFittedToPS"); - - if (!dirin) - return ERROR_INT("dirin not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - if (xpts <= 0.0) { - L_INFO("setting xpts to 612.0 ppi\n", procName); - xpts = 612.0; - } - if (ypts <= 0.0) { - L_INFO("setting ypts to 792.0 ppi\n", procName); - ypts = 792.0; - } - if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0) - L_WARNING("xpts,ypts are typically in the range 500-800\n", procName); - - /* Get all filtered and sorted full pathnames. */ - sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0); - - /* Generate the PS file. Don't use bounding boxes. */ - l_psWriteBoundingBox(FALSE); - sarrayConvertFilesFittedToPS(sa, xpts, ypts, fileout); - l_psWriteBoundingBox(TRUE); - sarrayDestroy(&sa); - return 0; -} - - -/* - * \brief sarrayConvertFilesFittedToPS() - * - * \param[in] sarray of full path names - * \param[in] xpts desired size in printer points; use 0 for default - * \param[in] ypts desired size in printer points; use 0 for default - * \param[in] fileout output ps file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *     (1) See convertFilesFittedToPS()
- * 
- */ -l_ok -sarrayConvertFilesFittedToPS(SARRAY *sa, - l_float32 xpts, - l_float32 ypts, - const char *fileout) -{ -char *fname; -l_int32 ret, i, w, h, nfiles, index, format, res; - - PROCNAME("sarrayConvertFilesFittedToPS"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - if (xpts <= 0.0) { - L_INFO("setting xpts to 612.0\n", procName); - xpts = 612.0; - } - if (ypts <= 0.0) { - L_INFO("setting ypts to 792.0\n", procName); - ypts = 792.0; - } - if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0) - L_WARNING("xpts,ypts are typically in the range 500-800\n", procName); - - nfiles = sarrayGetCount(sa); - for (i = 0, index = 0; i < nfiles; i++) { - fname = sarrayGetString(sa, i, L_NOCOPY); - ret = pixReadHeader(fname, &format, &w, &h, NULL, NULL, NULL); - if (ret) continue; - if (format == IFF_UNKNOWN) - continue; - - /* Be sure the entire image is wrapped */ - if (xpts * h < ypts * w) - res = (l_int32)((l_float32)w * 72.0 / xpts); - else - res = (l_int32)((l_float32)h * 72.0 / ypts); - - writeImageCompressedToPSFile(fname, fileout, res, &index); - } - - return 0; -} - - -/* - * \brief writeImageCompressedToPSFile() - * - * \param[in] filein input image file - * \param[in] fileout output ps file - * \param[in] res output printer resolution - * \param[in,out] pindex index of image in output ps file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This wraps a single page image in PS.
- *      (2) The input file can be in any format.  It is compressed as follows:
- *             * if in tiffg4  -->  use ccittg4
- *             * if in jpeg    -->  use dct
- *             * all others    -->  use flate
- *      (3) Before the first call, set %index = 0.  %index is incremented
- *          if the page is successfully written.  It is used to decide
- *          whether to write (index == 0) or append (index > 0) to the file.
- * 
- */ -l_ok -writeImageCompressedToPSFile(const char *filein, - const char *fileout, - l_int32 res, - l_int32 *pindex) -{ -const char *op; -l_int32 format, retval; - - PROCNAME("writeImageCompressedToPSFile"); - - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - - findFileFormat(filein, &format); - if (format == IFF_UNKNOWN) { - L_ERROR("format of %s not known\n", procName, filein); - return 1; - } - - op = (*pindex == 0) ? "w" : "a"; - if (format == IFF_JFIF_JPEG) { - retval = convertJpegToPS(filein, fileout, op, 0, 0, - res, 1.0, *pindex + 1, TRUE); - } else if (format == IFF_TIFF_G4) { - retval = convertG4ToPS(filein, fileout, op, 0, 0, - res, 1.0, *pindex + 1, FALSE, TRUE); - } else { /* all other image formats */ - retval = convertFlateToPS(filein, fileout, op, 0, 0, - res, 1.0, *pindex + 1, TRUE); - } - if (retval == 0) (*pindex)++; - - return retval; -} - - -/*-------------------------------------------------------------* - * Convert mixed text/image files to PS * - *-------------------------------------------------------------*/ -/* - * \brief convertSegmentedPagesToPS() - * - * \param[in] pagedir input page image directory - * \param[in] pagestr [optional] substring filter on page filenames; - * can be NULL - * \param[in] page_numpre number of characters in page name before number - * \param[in] maskdir input mask image directory - * \param[in] maskstr [optional] substring filter on mask filenames; - * can be NULL - * \param[in] mask_numpre number of characters in mask name before number - * \param[in] numpost number of characters in names after number - * \param[in] maxnum only consider page numbers up to this value - * \param[in] textscale scale of text output relative to pixs - * \param[in] imagescale scale of image output relative to pixs - * \param[in] threshold for binarization; typ. about 190; 0 for default - * \param[in] fileout output ps file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates a PS file for all page image and mask files in two
- *          specified directories and that contain the page numbers as
- *          specified below.  The two directories can be the same, in which
- *          case the page and mask files are differentiated by the two
- *          substrings for string matches.
- *      (2) The page images are taken in lexicographic order.
- *          Mask images whose numbers match the page images are used to
- *          segment the page images.  Page images without a matching
- *          mask image are scaled, thresholded and rendered entirely as text.
- *      (3) Each PS page is generated as a compressed representation of
- *          the page image, where the part of the image under the mask
- *          is suitably scaled and compressed as DCT (i.e., jpeg), and
- *          the remaining part of the page is suitably scaled, thresholded,
- *          compressed as G4 (i.e., tiff g4), and rendered by painting
- *          black through the resulting text mask.
- *      (4) The scaling is typically 2x down for the DCT component
- *          (%imagescale = 0.5) and 2x up for the G4 component
- *          (%textscale = 2.0).
- *      (5) The resolution is automatically set to fit to a
- *          letter-size (8.5 x 11 inch) page.
- *      (6) Both the DCT and the G4 encoding are PostScript level 2.
- *      (7) It is assumed that the page number is contained within
- *          the basename (the filename without directory or extension).
- *          %page_numpre is the number of characters in the page basename
- *          preceding the actual page number; %mask_numpre is likewise for
- *          the mask basename; %numpost is the number of characters
- *          following the page number.  For example, for mask name
- *          mask_006.tif, mask_numpre = 5 ("mask_).
- *      (8) To render a page as is -- that is, with no thresholding
- *          of any pixels -- use a mask in the mask directory that is
- *          full size with all pixels set to 1.  If the page is 1 bpp,
- *          it is not necessary to have a mask.
- * 
- */ -l_ok -convertSegmentedPagesToPS(const char *pagedir, - const char *pagestr, - l_int32 page_numpre, - const char *maskdir, - const char *maskstr, - l_int32 mask_numpre, - l_int32 numpost, - l_int32 maxnum, - l_float32 textscale, - l_float32 imagescale, - l_int32 threshold, - const char *fileout) -{ -l_int32 pageno, i, npages; -PIX *pixs, *pixm; -SARRAY *sapage, *samask; - - PROCNAME("convertSegmentedPagesToPS"); - - if (!pagedir) - return ERROR_INT("pagedir not defined", procName, 1); - if (!maskdir) - return ERROR_INT("maskdir not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - if (threshold <= 0) { - L_INFO("setting threshold to 190\n", procName); - threshold = 190; - } - - /* Get numbered full pathnames; max size of sarray is maxnum */ - sapage = getNumberedPathnamesInDirectory(pagedir, pagestr, - page_numpre, numpost, maxnum); - samask = getNumberedPathnamesInDirectory(maskdir, maskstr, - mask_numpre, numpost, maxnum); - sarrayPadToSameSize(sapage, samask, ""); - if ((npages = sarrayGetCount(sapage)) == 0) { - sarrayDestroy(&sapage); - sarrayDestroy(&samask); - return ERROR_INT("no matching pages found", procName, 1); - } - - /* Generate the PS file */ - pageno = 1; - for (i = 0; i < npages; i++) { - if ((pixs = pixReadIndexed(sapage, i)) == NULL) - continue; - pixm = pixReadIndexed(samask, i); - pixWriteSegmentedPageToPS(pixs, pixm, textscale, imagescale, - threshold, pageno, fileout); - pixDestroy(&pixs); - pixDestroy(&pixm); - pageno++; - } - - sarrayDestroy(&sapage); - sarrayDestroy(&samask); - return 0; -} - - -/* - * \brief pixWriteSegmentedPageToPS() - * - * \param[in] pixs all depths; colormap ok - * \param[in] pixm [optional] 1 bpp segmentation mask over image region - * \param[in] textscale scale of text output relative to pixs - * \param[in] imagescale scale of image output relative to pixs - * \param[in] threshold for binarization; typ. about 190; 0 for default - * \param[in] pageno page number in set; use 1 for new output file - * \param[in] fileout output ps file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates the PS string for a mixed text/image page,
- *          and adds it to an existing file if %pageno > 1.
- *          The PS output is determined by fitting the result to
- *          a letter-size (8.5 x 11 inch) page.
- *      (2) The two images (pixs and pixm) are at the same resolution
- *          (typically 300 ppi).  They are used to generate two compressed
- *          images, pixb and pixc, that are put directly into the output
- *          PS file.
- *      (3) pixb is the text component.  In the PostScript world, we think of
- *          it as a mask through which we paint black.  It is produced by
- *          scaling pixs by %textscale, and thresholding to 1 bpp.
- *      (4) pixc is the image component, which is that part of pixs under
- *          the mask pixm.  It is scaled from pixs by %imagescale.
- *      (5) Typical values are textscale = 2.0 and imagescale = 0.5.
- *      (6) If pixm == NULL, the page has only text.  If it is all black,
- *          the page is all image and has no text.
- *      (7) This can be used to write a multi-page PS file, by using
- *          sequential page numbers with the same output file.  It can
- *          also be used to write separate PS files for each page,
- *          by using different output files with %pageno = 0 or 1.
- * 
- */ -l_ok -pixWriteSegmentedPageToPS(PIX *pixs, - PIX *pixm, - l_float32 textscale, - l_float32 imagescale, - l_int32 threshold, - l_int32 pageno, - const char *fileout) -{ -l_int32 alltext, notext, d, ret; -l_uint32 val; -l_float32 scaleratio; -PIX *pixmi, *pixmis, *pixt, *pixg, *pixsc, *pixb, *pixc; - - PROCNAME("pixWriteSegmentedPageToPS"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - if (imagescale <= 0.0 || textscale <= 0.0) - return ERROR_INT("relative scales must be > 0.0", procName, 1); - - /* Analyze the page. Determine the ratio by which the - * binary text mask is scaled relative to the image part. - * If there is no image region (alltext == TRUE), the - * text mask will be rendered directly to fit the page, - * and scaleratio = 1.0. */ - alltext = TRUE; - notext = FALSE; - scaleratio = 1.0; - if (pixm) { - pixZero(pixm, &alltext); /* pixm empty: all text */ - if (alltext) { - pixm = NULL; /* treat it as not existing here */ - } else { - pixmi = pixInvert(NULL, pixm); - pixZero(pixmi, ¬ext); /* pixm full; no text */ - pixDestroy(&pixmi); - scaleratio = textscale / imagescale; - } - } - - if (pixGetDepth(pixs) == 1) { /* render tiff g4 */ - pixb = pixClone(pixs); - pixc = NULL; - } else { - pixt = pixConvertTo8Or32(pixs, L_CLONE, 0); /* clone if possible */ - - /* Get the binary text mask. Note that pixg cannot be a - * clone of pixs, because it may be altered by pixSetMasked(). */ - pixb = NULL; - if (notext == FALSE) { - d = pixGetDepth(pixt); - if (d == 8) - pixg = pixCopy(NULL, pixt); - else /* d == 32 */ - pixg = pixConvertRGBToLuminance(pixt); - if (pixm) /* clear out the image parts */ - pixSetMasked(pixg, pixm, 255); - if (textscale == 1.0) - pixsc = pixClone(pixg); - else if (textscale >= 0.7) - pixsc = pixScaleGrayLI(pixg, textscale, textscale); - else - pixsc = pixScaleAreaMap(pixg, textscale, textscale); - pixb = pixThresholdToBinary(pixsc, threshold); - pixDestroy(&pixg); - pixDestroy(&pixsc); - } - - /* Get the scaled image region */ - pixc = NULL; - if (pixm) { - if (imagescale == 1.0) - pixsc = pixClone(pixt); /* can possibly be a clone of pixs */ - else - pixsc = pixScale(pixt, imagescale, imagescale); - - /* If pixm is not full, clear the pixels in pixsc - * corresponding to bg in pixm, where there can be text - * that is written through the mask pixb. Note that - * we could skip this and use pixsc directly in - * pixWriteMixedToPS(); however, clearing these - * non-image regions to a white background will reduce - * the size of pixc (relative to pixsc), and hence - * reduce the size of the PS file that is generated. - * Use a copy so that we don't accidentally alter pixs. */ - if (notext == FALSE) { - pixmis = pixScale(pixm, imagescale, imagescale); - pixmi = pixInvert(NULL, pixmis); - val = (d == 8) ? 0xff : 0xffffff00; - pixc = pixCopy(NULL, pixsc); - pixSetMasked(pixc, pixmi, val); /* clear non-image part */ - pixDestroy(&pixmis); - pixDestroy(&pixmi); - } else { - pixc = pixClone(pixsc); - } - pixDestroy(&pixsc); - } - pixDestroy(&pixt); - } - - /* Generate the PS file. Don't use bounding boxes. */ - l_psWriteBoundingBox(FALSE); - ret = pixWriteMixedToPS(pixb, pixc, scaleratio, pageno, fileout); - l_psWriteBoundingBox(TRUE); - pixDestroy(&pixb); - pixDestroy(&pixc); - return ret; -} - - -/* - * \brief pixWriteMixedToPS() - * - * \param[in] pixb [optional] 1 bpp mask; typically for text - * \param[in] pixc [optional] 8 or 32 bpp image regions - * \param[in] scale scale factor for rendering pixb, relative to pixc; - * typ. 4.0 - * \param[in] pageno page number in set; use 1 for new output file - * \param[in] fileout output ps file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This low level function generates the PS string for a mixed
- *          text/image page, and adds it to an existing file if
- *          %pageno > 1.
- *      (2) The two images (pixb and pixc) are typically generated at the
- *          resolution that they will be rendered in the PS file.
- *      (3) pixb is the text component.  In the PostScript world, we think of
- *          it as a mask through which we paint black.
- *      (4) pixc is the (typically halftone) image component.  It is
- *          white in the rest of the page.  To minimize the size of the
- *          PS file, it should be rendered at a resolution that is at
- *          least equal to its actual resolution.
- *      (5) %scale gives the ratio of resolution of pixb to pixc.
- *          Typical resolutions are: 600 ppi for pixb, 150 ppi for pixc;
- *          so %scale = 4.0.  If one of the images is not defined,
- *          the value of %scale is ignored.
- *      (6) We write pixc with DCT compression (jpeg).  This is followed
- *          by painting the text as black through the mask pixb.  If
- *          pixc doesn't exist (alltext), we write the text with the
- *          PS "image" operator instead of the "imagemask" operator,
- *          because ghostscript's ps2pdf is flaky when the latter is used.
- *      (7) The actual output resolution is determined by fitting the
- *          result to a letter-size (8.5 x 11 inch) page.
- * 
- */
-l_ok
-pixWriteMixedToPS(PIX         *pixb,
-                  PIX         *pixc,
-                  l_float32    scale,
-                  l_int32      pageno,
-                  const char  *fileout)
-{
-char        *tname;
-const char  *op;
-l_int32      resb, resc, endpage, maskop, ret;
-
-    PROCNAME("pixWriteMixedToPS");
-
-    if (!pixb && !pixc)
-        return ERROR_INT("pixb and pixc both undefined", procName, 1);
-    if (!fileout)
-        return ERROR_INT("fileout not defined", procName, 1);
-
-        /* Compute the resolution that fills a letter-size page. */
-    if (!pixc) {
-       resb = getResLetterPage(pixGetWidth(pixb), pixGetHeight(pixb), 0);
-    } else {
-       resc = getResLetterPage(pixGetWidth(pixc), pixGetHeight(pixc), 0);
-       if (pixb)
-           resb = (l_int32)(scale * resc);
-    }
-
-        /* Write the jpeg image first */
-    if (pixc) {
-        tname = l_makeTempFilename();
-        pixWrite(tname, pixc, IFF_JFIF_JPEG);
-        endpage = (pixb) ? FALSE : TRUE;
-        op = (pageno <= 1) ? "w" : "a";
-        ret = convertJpegToPS(tname, fileout, op, 0, 0, resc, 1.0,
-                              pageno, endpage);
-        lept_rmfile(tname);
-        LEPT_FREE(tname);
-        if (ret)
-            return ERROR_INT("jpeg data not written", procName, 1);
-    }
-
-        /* Write the binary data, either directly or, if there is
-         * a jpeg image on the page, through the mask. */
-    if (pixb) {
-        tname = l_makeTempFilename();
-        pixWrite(tname, pixb, IFF_TIFF_G4);
-        op = (pageno <= 1 && !pixc) ? "w" : "a";
-        maskop = (pixc) ? 1 : 0;
-        ret = convertG4ToPS(tname, fileout, op, 0, 0, resb, 1.0,
-                            pageno, maskop, 1);
-        lept_rmfile(tname);
-        LEPT_FREE(tname);
-        if (ret)
-            return ERROR_INT("tiff data not written", procName, 1);
-    }
-
-    return 0;
-}
-
-
-/*-------------------------------------------------------------*
- *            Convert any image file to PS for embedding       *
- *-------------------------------------------------------------*/
-/*
- * \brief  convertToPSEmbed()
- *
- * \param[in]     filein    input image file, any format
- * \param[in]     fileout   output ps file
- * \param[in]     level     PostScript compression: 1 (uncompressed), 2 or 3
- * \return  0 if OK, 1 on error
- *
- * 
- * Notes:
- *      (1) This is a wrapper function that generates a PS file with
- *          a bounding box, from any input image file.
- *      (2) Do the best job of compression given the specified level.
- *          %level=3 does flate compression on anything that is not
- *          tiffg4 (1 bpp) or jpeg (8 bpp or rgb).
- *      (3) If %level=2 and the file is not tiffg4 or jpeg, it will
- *          first be written to file as jpeg with quality = 75.
- *          This will remove the colormap and cause some degradation
- *          in the image.
- *      (4) The bounding box is required when a program such as TeX
- *          (through epsf) places and rescales the image.  It is
- *          sized for fitting the image to an 8.5 x 11.0 inch page.
- * 
- */ -l_ok -convertToPSEmbed(const char *filein, - const char *fileout, - l_int32 level) -{ -char *tname; -l_int32 d, format; -PIX *pix, *pixs; - - PROCNAME("convertToPSEmbed"); - - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - if (level != 1 && level != 2 && level != 3) { - L_ERROR("invalid level specified; using level 2\n", procName); - level = 2; - } - - if (level == 1) { /* no compression */ - pixWritePSEmbed(filein, fileout); - return 0; - } - - /* Find the format and write out directly if in jpeg or tiff g4 */ - findFileFormat(filein, &format); - if (format == IFF_JFIF_JPEG) { - convertJpegToPSEmbed(filein, fileout); - return 0; - } else if (format == IFF_TIFF_G4) { - convertG4ToPSEmbed(filein, fileout); - return 0; - } else if (format == IFF_UNKNOWN) { - L_ERROR("format of %s not known\n", procName, filein); - return 1; - } - - /* If level 3, flate encode. */ - if (level == 3) { - convertFlateToPSEmbed(filein, fileout); - return 0; - } - - /* OK, it's level 2, so we must convert to jpeg or tiff g4 */ - if ((pixs = pixRead(filein)) == NULL) - return ERROR_INT("image not read from file", procName, 1); - d = pixGetDepth(pixs); - if ((d == 2 || d == 4) && !pixGetColormap(pixs)) - pix = pixConvertTo8(pixs, 0); - else if (d == 16) - pix = pixConvert16To8(pixs, L_MS_BYTE); - else - pix = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - pixDestroy(&pixs); - if (!pix) - return ERROR_INT("converted pix not made", procName, 1); - - d = pixGetDepth(pix); - tname = l_makeTempFilename(); - if (d == 1) { - if (pixWrite(tname, pix, IFF_TIFF_G4)) { - LEPT_FREE(tname); - pixDestroy(&pix); - return ERROR_INT("g4 tiff not written", procName, 1); - } - convertG4ToPSEmbed(tname, fileout); - } else { - if (pixWrite(tname, pix, IFF_JFIF_JPEG)) { - LEPT_FREE(tname); - pixDestroy(&pix); - return ERROR_INT("jpeg not written", procName, 1); - } - convertJpegToPSEmbed(tname, fileout); - } - - lept_rmfile(tname); - LEPT_FREE(tname); - pixDestroy(&pix); - return 0; -} - - -/*-------------------------------------------------------------* - * Write all images in a pixa out to PS * - *-------------------------------------------------------------*/ -/* - * \brief pixaWriteCompressedToPS() - * - * \param[in] pixa any set of images - * \param[in] fileout output ps file - * \param[in] res resolution for the set of input images - * \param[in] level PostScript compression capability: 2 or 3 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates a PostScript file of multiple page images,
- *          all with bounding boxes.
- *      (2) See pixWriteCompressedToPS() for details.
- *      (3) To generate a pdf from %fileout, use:
- *             ps2pdf  
- * 
- */ -l_ok -pixaWriteCompressedToPS(PIXA *pixa, - const char *fileout, - l_int32 res, - l_int32 level) -{ -l_int32 i, n, index, ret; -PIX *pix; - - PROCNAME("pixaWriteCompressedToPS"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - if (level != 2 && level != 3) { - L_ERROR("only levels 2 and 3 permitted; using level 2\n", procName); - level = 2; - } - - index = 0; - n = pixaGetCount(pixa); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - ret = pixWriteCompressedToPS(pix, fileout, res, level, &index); - if (ret) L_ERROR("PS string not written for image %d\n", procName, i); - pixDestroy(&pix); - } - return 0; -} - - -/* - * \brief pixWriteCompressedToPS() - * - * \param[in] pix any depth; colormap OK - * \param[in] fileout output ps file - * \param[in] res of input image - * \param[in] level PostScript compression capability: 2 or 3 - * \param[in,out] pindex index of image in output ps file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates a PostScript string for %pix, and writes it
- *          to a file, with a bounding box.
- *      (2) *pindex keeps track of the number of images that have been
- *          written to %fileout.  If this is the first image to be
- *          converted, set *pindex == 0 before passing it in.  If the
- *          PostScript string is successfully generated, this will increment
- *          *pindex.  If *pindex > 0, the PostScript string will be
- *          appended to %fileout.
- *      (3) PostScript level 2 enables lossless tiffg4 and lossy jpeg
- *          compression.  Level 3 adds lossless flate (essentially gzip)
- *          compression.
- *          * For images with a colormap, lossless flate is often better in
- *            both quality and size than jpeg.
- *          * The decision for images without a colormap affects compression
- *            efficiency: %level2 (jpeg) is usually better than %level3 (flate)
- *          * Because jpeg does not handle 16 bpp, if %level == 2, the image
- *            is converted to 8 bpp (using MSB) and compressed with jpeg,
- *              cmap + level2:        jpeg
- *              cmap + level3:        flate
- *              1 bpp:                tiffg4
- *              2 or 4 bpp + level2:  jpeg
- *              2 or 4 bpp + level3:  flate
- *              8 bpp + level2:       jpeg
- *              8 bpp + level3:       flate
- *              16 bpp + level2:      jpeg   [converted to 8 bpp, with warning]
- *              16 bpp + level3:      flate
- *              32 bpp + level2:      jpeg
- *              32 bpp + level3:      flate
- * 
- */ -l_ok -pixWriteCompressedToPS(PIX *pix, - const char *fileout, - l_int32 res, - l_int32 level, - l_int32 *pindex) -{ -char *tname; -l_int32 writeout, d; -PIX *pixt; -PIXCMAP *cmap; - - PROCNAME("pixWriteCompressedToPS"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - if (level != 2 && level != 3) { - L_ERROR("only levels 2 and 3 permitted; using level 2\n", procName); - level = 2; - } - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - - tname = l_makeTempFilename(); - writeout = TRUE; - d = pixGetDepth(pix); - cmap = pixGetColormap(pix); - if (d == 1) { - if (pixWrite(tname, pix, IFF_TIFF_G4)) - writeout = FALSE; - } else if (level == 3) { - if (pixWrite(tname, pix, IFF_PNG)) - writeout = FALSE; - } else { /* level == 2 */ - if (cmap) { - pixt = pixConvertForPSWrap(pix); - if (pixWrite(tname, pixt, IFF_JFIF_JPEG)) - writeout = FALSE; - pixDestroy(&pixt); - } else if (d == 16) { - L_WARNING("d = 16; converting to 8 bpp for jpeg\n", procName); - pixt = pixConvert16To8(pix, L_MS_BYTE); - if (pixWrite(tname, pixt, IFF_JFIF_JPEG)) - writeout = FALSE; - pixDestroy(&pixt); - } else if (d == 2 || d == 4) { - pixt = pixConvertTo8(pix, 0); - if (pixWrite(tname, pixt, IFF_JFIF_JPEG)) - writeout = FALSE; - pixDestroy(&pixt); - } else if (d == 8 || d == 32) { - if (pixWrite(tname, pix, IFF_JFIF_JPEG)) - writeout = FALSE; - } else { /* shouldn't happen */ - L_ERROR("invalid depth with level 2: %d\n", procName, d); - writeout = FALSE; - } - } - - if (writeout) - writeImageCompressedToPSFile(tname, fileout, res, pindex); - - if (lept_rmfile(tname) != 0) - L_ERROR("temp file %s was not deleted\n", procName, tname); - LEPT_FREE(tname); - return (writeout) ? 0 : 1; -} - -/* --------------------------------------------*/ -#endif /* USE_PSIO */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/psio1stub.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/psio1stub.c deleted file mode 100644 index f36b5e0d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/psio1stub.c +++ /dev/null @@ -1,137 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file psio1stub.c - *
- *
- *     Stubs for psio1.c functions
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* --------------------------------------------*/ -#if !USE_PSIO /* defined in environ.h */ -/* --------------------------------------------*/ - -l_ok convertFilesToPS(const char *dirin, const char *substr, - l_int32 res, const char *fileout) -{ - return ERROR_INT("function not present", "convertFilesToPS", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok sarrayConvertFilesToPS(SARRAY *sa, l_int32 res, const char *fileout) -{ - return ERROR_INT("function not present", "sarrayConvertFilesToPS", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertFilesFittedToPS(const char *dirin, const char *substr, - l_float32 xpts, l_float32 ypts, - const char *fileout) -{ - return ERROR_INT("function not present", "convertFilesFittedToPS", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok sarrayConvertFilesFittedToPS(SARRAY *sa, l_float32 xpts, - l_float32 ypts, const char *fileout) -{ - return ERROR_INT("function not present", "sarrayConvertFilesFittedToPS", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok writeImageCompressedToPSFile(const char *filein, const char *fileout, - l_int32 res, l_int32 *pindex) -{ - return ERROR_INT("function not present", "writeImageCompressedToPSFile", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertSegmentedPagesToPS(const char *pagedir, const char *pagestr, - l_int32 page_numpre, const char *maskdir, - const char *maskstr, l_int32 mask_numpre, - l_int32 numpost, l_int32 maxnum, - l_float32 textscale, l_float32 imagescale, - l_int32 threshold, const char *fileout) -{ - return ERROR_INT("function not present", "convertSegmentedPagesToPS", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteSegmentedPageToPS(PIX *pixs, PIX *pixm, l_float32 textscale, - l_float32 imagescale, l_int32 threshold, - l_int32 pageno, const char *fileout) -{ - return ERROR_INT("function not present", "pixWriteSegmentedPagesToPS", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteMixedToPS(PIX *pixb, PIX *pixc, l_float32 scale, - l_int32 pageno, const char *fileout) -{ - return ERROR_INT("function not present", "pixWriteMixedToPS", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertToPSEmbed(const char *filein, const char *fileout, l_int32 level) -{ - return ERROR_INT("function not present", "convertToPSEmbed", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixaWriteCompressedToPS(PIXA *pixa, const char *fileout, - l_int32 res, l_int32 level) -{ - return ERROR_INT("function not present", "pixaWriteCompressedtoPS", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteCompressedToPS(PIX *pix, const char *fileout, l_int32 res, - l_int32 level, l_int32 *pindex) -{ - return ERROR_INT("function not present", "pixWriteCompressedtoPS", 1); -} - -/* --------------------------------------------*/ -#endif /* !USE_PSIO */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/psio2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/psio2.c deleted file mode 100644 index ea7cede1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/psio2.c +++ /dev/null @@ -1,2044 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file psio2.c - *
- *
- *    |=============================================================|
- *    |                         Important note                      |
- *    |=============================================================|
- *    | Some of these functions require I/O libraries such as       |
- *    | libtiff, libjpeg, and libz.  If you do not have these       |
- *    | libraries, some calls will fail.                            |
- *    |                                                             |
- *    | You can manually deactivate all PostScript writing by       |
- *    | setting this in environ.h:                                  |
- *    | \code                                                       |
- *    |     #define  USE_PSIO     0                                 |
- *    | \endcode                                                    |
- *    | in environ.h.  This will link psio2stub.c                   |
- *    |=============================================================|
- *
- *     These are lower-level functions that implement a PostScript
- *     "device driver" for wrapping images in PostScript.  The images
- *     can be rendered by a PostScript interpreter for viewing,
- *     using evince or gv.  They can also be rasterized for printing,
- *     using gs or an embedded interpreter in a PostScript printer.
- *     And they can be converted to a pdf using gs (ps2pdf).
- *
- *     For uncompressed images
- *          l_int32              pixWritePSEmbed()
- *          l_int32              pixWriteStreamPS()
- *          char                *pixWriteStringPS()
- *          char                *generateUncompressedPS()
- *          static void          getScaledParametersPS()
- *          static l_int32       convertByteToHexAscii()
- *
- *     For jpeg compressed images (use dct compression)
- *          l_int32              convertJpegToPSEmbed()
- *          l_int32              convertJpegToPS()
- *          static l_int32       convertJpegToPSString()
- *          static char         *generateJpegPS()
- *
- *     For g4 fax compressed images (use ccitt g4 compression)
- *          l_int32              convertG4ToPSEmbed()
- *          l_int32              convertG4ToPS()
- *          static l_int32       convertG4ToPSString()
- *          static char         *generateG4PS()
- *
- *     For multipage tiff images
- *          l_int32              convertTiffMultipageToPS()
- *
- *     For flate (gzip) compressed images (e.g., png)
- *          l_int32              convertFlateToPSEmbed()
- *          l_int32              convertFlateToPS()
- *          static l_int32       convertFlateToPSString()
- *          static char         *generateFlatePS()
- *
- *     Write to memory
- *          l_int32              pixWriteMemPS()
- *
- *     Converting resolution
- *          l_int32              getResLetterPage()
- *          static l_int32       getResA4Page()
- *
- *     Setting flag for writing bounding box hint
- *          void                 l_psWriteBoundingBox()
- *
- *  See psio1.c for higher-level functions and their usage.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/* --------------------------------------------*/ -#if USE_PSIO /* defined in environ.h */ - /* --------------------------------------------*/ - - /* Set default for writing bounding box hint */ -static l_int32 var_PS_WRITE_BOUNDING_BOX = 1; - -static const l_int32 Bufsize = 512; -static const l_int32 DefaultInputRes = 300; /* typical scan res, ppi */ -static const l_int32 MinRes = 5; -static const l_int32 MaxRes = 3000; - - /* For computing resolution that fills page to desired amount */ -static const l_int32 LetterWidth = 612; /* points */ -static const l_int32 LetterHeight = 792; /* points */ -static const l_int32 A4Width = 595; /* points */ -static const l_int32 A4Height = 842; /* points */ -static const l_float32 DefaultFillFraction = 0.95; - -#ifndef NO_CONSOLE_IO -#define DEBUG_JPEG 0 -#define DEBUG_G4 0 -#define DEBUG_FLATE 0 -#endif /* ~NO_CONSOLE_IO */ - -/* Note that the bounding box hint at the top of the generated PostScript - * file is required for the "*Embed" functions. These generate a - * PostScript file for an individual image that can be translated and - * scaled by an application that embeds the image in its output - * (e.g., in the PS output from a TeX file). - * However, bounding box hints should not be embedded in any - * PostScript image that will be composited with other images, - * where more than one image may be placed in an arbitrary location - * on a page. */ - - /* Static helper functions */ -static void getScaledParametersPS(BOX *box, l_int32 wpix, l_int32 hpix, - l_int32 res, l_float32 scale, - l_float32 *pxpt, l_float32 *pypt, - l_float32 *pwpt, l_float32 *phpt); -static void convertByteToHexAscii(l_uint8 byteval, char *pnib1, char *pnib2); -static l_ok convertJpegToPSString(const char *filein, char **poutstr, - l_int32 *pnbytes, l_int32 x, l_int32 y, - l_int32 res, l_float32 scale, - l_int32 pageno, l_int32 endpage); -static char *generateJpegPS(const char *filein, L_COMP_DATA *cid, - l_float32 xpt, l_float32 ypt, l_float32 wpt, - l_float32 hpt, l_int32 pageno, l_int32 endpage); -static l_ok convertG4ToPSString(const char *filein, char **poutstr, - l_int32 *pnbytes, l_int32 x, l_int32 y, - l_int32 res, l_float32 scale, l_int32 pageno, - l_int32 maskflag, l_int32 endpage); -static char *generateG4PS(const char *filein, L_COMP_DATA *cid, l_float32 xpt, - l_float32 ypt, l_float32 wpt, l_float32 hpt, - l_int32 maskflag, l_int32 pageno, l_int32 endpage); -static l_ok convertFlateToPSString(const char *filein, char **poutstr, - l_int32 *pnbytes, l_int32 x, l_int32 y, - l_int32 res, l_float32 scale, - l_int32 pageno, l_int32 endpage); -static char *generateFlatePS(const char *filein, L_COMP_DATA *cid, - l_float32 xpt, l_float32 ypt, l_float32 wpt, - l_float32 hpt, l_int32 pageno, l_int32 endpage); - - -/*-------------------------------------------------------------* - * For uncompressed images * - *-------------------------------------------------------------*/ -/*! - * \brief pixWritePSEmbed() - * - * \param[in] filein input file, all depths, colormap OK - * \param[in] fileout output ps file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is a simple wrapper function that generates an
- *          uncompressed PS file, with a bounding box.
- *      (2) The bounding box is required when a program such as TeX
- *          (through epsf) places and rescales the image.
- *      (3) The bounding box is sized for fitting the image to an
- *          8.5 x 11.0 inch page.
- * 
- */ -l_ok -pixWritePSEmbed(const char *filein, - const char *fileout) -{ -l_int32 w, h, ret; -l_float32 scale; -FILE *fp; -PIX *pix; - - PROCNAME("pixWritePSEmbed"); - - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - - if ((pix = pixRead(filein)) == NULL) - return ERROR_INT("image not read from file", procName, 1); - w = pixGetWidth(pix); - h = pixGetHeight(pix); - if (w * 11.0 > h * 8.5) - scale = 8.5 * 300. / (l_float32)w; - else - scale = 11.0 * 300. / (l_float32)h; - - if ((fp = fopenWriteStream(fileout, "wb")) == NULL) - return ERROR_INT("file not opened for write", procName, 1); - ret = pixWriteStreamPS(fp, pix, NULL, 0, scale); - fclose(fp); - - pixDestroy(&pix); - return ret; -} - - -/*! - * \brief pixWriteStreamPS() - * - * \param[in] fp file stream - * \param[in] pix - * \param[in] box [optional] - * \param[in] res can use 0 for default of 300 ppi - * \param[in] scale to prevent scaling, use either 1.0 or 0.0 - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This writes image in PS format, optionally scaled,
- *          adjusted for the printer resolution, and with
- *          a bounding box.
- *      (2) For details on use of parameters, see pixWriteStringPS().
- * 
- */ -l_ok -pixWriteStreamPS(FILE *fp, - PIX *pix, - BOX *box, - l_int32 res, - l_float32 scale) -{ -char *outstr; -l_int32 length; -PIX *pixc; - - PROCNAME("pixWriteStreamPS"); - - if (!fp) - return (l_int32)ERROR_INT("stream not open", procName, 1); - if (!pix) - return (l_int32)ERROR_INT("pix not defined", procName, 1); - - if ((pixc = pixConvertForPSWrap(pix)) == NULL) - return (l_int32)ERROR_INT("pixc not made", procName, 1); - - if ((outstr = pixWriteStringPS(pixc, box, res, scale)) == NULL) { - pixDestroy(&pixc); - return (l_int32)ERROR_INT("outstr not made", procName, 1); - } - length = strlen(outstr); - fwrite(outstr, 1, length, fp); - LEPT_FREE(outstr); - pixDestroy(&pixc); - return 0; -} - - -/*! - * \brief pixWriteStringPS() - * - * \param[in] pixs all depths, colormap OK - * \param[in] box bounding box; can be NULL - * \param[in] res resolution, in printer ppi. Use 0 for default 300 ppi. - * \param[in] scale scale factor. If no scaling is desired, use - * either 1.0 or 0.0. Scaling just resets the resolution - * parameter; the actual scaling is done in the - * interpreter at rendering time. This is important: - * it allows you to scale the image up without - * increasing the file size. - * \return ps string if OK, or NULL on error - * - *
- * a) If %box == NULL, image is placed, optionally scaled,
- *      in a standard b.b. at the center of the page.
- *      This is to be used when another program like
- *      TeX through epsf places the image.
- * b) If %box != NULL, image is placed without a
- *      b.b. at the specified page location and with
- *      optional scaling.  This is to be used when
- *      you want to specify exactly where and optionally
- *      how big you want the image to be.
- *      Note that all coordinates are in PS convention,
- *      with 0,0 at LL corner of the page:
- *          x,y    location of LL corner of image, in mils.
- *          w,h    scaled size, in mils.  Use 0 to
- *                 scale with "scale" and "res" input.
- *
- * %scale: If no scaling is desired, use either 1.0 or 0.0.
- * Scaling just resets the resolution parameter; the actual
- * scaling is done in the interpreter at rendering time.
- * This is important: * it allows you to scale the image up
- * without increasing the file size.
- *
- * Notes:
- *      (1) OK, this seems a bit complicated, because there are various
- *          ways to scale and not to scale.  Here's a summary:
- *      (2) If you don't want any scaling at all:
- *           * if you are using a box:
- *               set w = 0, h = 0, and use scale = 1.0; it will print
- *               each pixel unscaled at printer resolution
- *           * if you are not using a box:
- *               set scale = 1.0; it will print at printer resolution
- *      (3) If you want the image to be a certain size in inches:
- *           * you must use a box and set the box (w,h) in mils
- *      (4) If you want the image to be scaled by a scale factor != 1.0:
- *           * if you are using a box:
- *               set w = 0, h = 0, and use the desired scale factor;
- *               the higher the printer resolution, the smaller the
- *               image will actually appear.
- *           * if you are not using a box:
- *               set the desired scale factor; the higher the printer
- *               resolution, the smaller the image will actually appear.
- *      (5) Another complication is the proliferation of distance units:
- *           * The interface distances are in milli-inches.
- *           * Three different units are used internally:
- *              ~ pixels  (units of 1/res inch)
- *              ~ printer pts (units of 1/72 inch)
- *              ~ inches
- *           * Here is a quiz on volume units from a reviewer:
- *             How many UK milli-cups in a US kilo-teaspoon?
- *               (Hint: 1.0 US cup = 0.75 UK cup + 0.2 US gill;
- *                      1.0 US gill = 24.0 US teaspoons)
- * 
- */ -char * -pixWriteStringPS(PIX *pixs, - BOX *box, - l_int32 res, - l_float32 scale) -{ -char nib1, nib2; -char *hexdata, *outstr; -l_uint8 byteval; -l_int32 i, j, k, w, h, d; -l_float32 wpt, hpt, xpt, ypt; -l_int32 wpl, psbpl, hexbytes, boxflag, bps; -l_uint32 *line, *data; -PIX *pix; - - PROCNAME("pixWriteStringPS"); - - if (!pixs) - return (char *)ERROR_PTR("pixs not defined", procName, NULL); - - if ((pix = pixConvertForPSWrap(pixs)) == NULL) - return (char *)ERROR_PTR("pix not made", procName, NULL); - pixGetDimensions(pix, &w, &h, &d); - - /* Get the factors by which PS scales and translates, in pts */ - if (!box) - boxflag = 0; /* no scaling; b.b. at center */ - else - boxflag = 1; /* no b.b., specify placement and optional scaling */ - getScaledParametersPS(box, w, h, res, scale, &xpt, &ypt, &wpt, &hpt); - - if (d == 1) - bps = 1; /* bits/sample */ - else /* d == 8 || d == 32 */ - bps = 8; - - /* Convert image data to hex string. psbpl is the number of - * bytes in each raster line when it is packed to the byte - * boundary (not the 32 bit word boundary, as with the pix). - * When converted to hex, the hex string has 2 bytes for - * every byte of raster data. */ - wpl = pixGetWpl(pix); - if (d == 1 || d == 8) - psbpl = (w * d + 7) / 8; - else /* d == 32 */ - psbpl = 3 * w; - data = pixGetData(pix); - hexbytes = 2 * psbpl * h; /* size of ps hex array */ - if ((hexdata = (char *)LEPT_CALLOC(hexbytes + 1, sizeof(char))) == NULL) - return (char *)ERROR_PTR("hexdata not made", procName, NULL); - if (d == 1 || d == 8) { - for (i = 0, k = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < psbpl; j++) { - byteval = GET_DATA_BYTE(line, j); - convertByteToHexAscii(byteval, &nib1, &nib2); - hexdata[k++] = nib1; - hexdata[k++] = nib2; - } - } - } else { /* d == 32; hexdata bytes packed RGBRGB..., 2 per sample */ - for (i = 0, k = 0; i < h; i++) { - line = data + i * wpl; - for (j = 0; j < w; j++) { - byteval = GET_DATA_BYTE(line + j, 0); /* red */ - convertByteToHexAscii(byteval, &nib1, &nib2); - hexdata[k++] = nib1; - hexdata[k++] = nib2; - byteval = GET_DATA_BYTE(line + j, 1); /* green */ - convertByteToHexAscii(byteval, &nib1, &nib2); - hexdata[k++] = nib1; - hexdata[k++] = nib2; - byteval = GET_DATA_BYTE(line + j, 2); /* blue */ - convertByteToHexAscii(byteval, &nib1, &nib2); - hexdata[k++] = nib1; - hexdata[k++] = nib2; - } - } - } - hexdata[k] = '\0'; - - outstr = generateUncompressedPS(hexdata, w, h, d, psbpl, bps, - xpt, ypt, wpt, hpt, boxflag); - pixDestroy(&pix); - if (!outstr) - return (char *)ERROR_PTR("outstr not made", procName, NULL); - return outstr; -} - - -/*! - * \brief generateUncompressedPS() - * - * \param[in] hexdata - * \param[in] w, h raster image size in pixels - * \param[in] d image depth in bpp; rgb is 32 - * \param[in] psbpl raster bytes/line, when packed to the byte boundary - * \param[in] bps bits/sample: either 1 or 8 - * \param[in] xpt, ypt location of LL corner of image, in pts, relative - * to the PostScript origin (0,0) at the LL corner - * of the page - * \param[in] wpt, hpt rendered image size in pts - * \param[in] boxflag 1 to print out bounding box hint; 0 to skip - * \return PS string, or NULL on error - * - *
- * Notes:
- *      (1) Low-level function.
- * 
- */ -char * -generateUncompressedPS(char *hexdata, - l_int32 w, - l_int32 h, - l_int32 d, - l_int32 psbpl, - l_int32 bps, - l_float32 xpt, - l_float32 ypt, - l_float32 wpt, - l_float32 hpt, - l_int32 boxflag) -{ -char *outstr; -char bigbuf[Bufsize]; -SARRAY *sa; - - PROCNAME("generateUncompressedPS"); - - if (!hexdata) - return (char *)ERROR_PTR("hexdata not defined", procName, NULL); - - sa = sarrayCreate(0); - sarrayAddString(sa, "%!Adobe-PS", L_COPY); - if (boxflag == 0) { - snprintf(bigbuf, sizeof(bigbuf), - "%%%%BoundingBox: %7.2f %7.2f %7.2f %7.2f", - xpt, ypt, xpt + wpt, ypt + hpt); - sarrayAddString(sa, bigbuf, L_COPY); - } else { /* boxflag == 1 */ - sarrayAddString(sa, "gsave", L_COPY); - } - - if (d == 1) - sarrayAddString(sa, - "{1 exch sub} settransfer %invert binary", L_COPY); - - snprintf(bigbuf, sizeof(bigbuf), - "/bpl %d string def %%bpl as a string", psbpl); - sarrayAddString(sa, bigbuf, L_COPY); - snprintf(bigbuf, sizeof(bigbuf), - "%7.2f %7.2f translate %%set image origin in pts", xpt, ypt); - sarrayAddString(sa, bigbuf, L_COPY); - snprintf(bigbuf, sizeof(bigbuf), - "%7.2f %7.2f scale %%set image size in pts", wpt, hpt); - sarrayAddString(sa, bigbuf, L_COPY); - snprintf(bigbuf, sizeof(bigbuf), - "%d %d %d %%image dimensions in pixels", w, h, bps); - sarrayAddString(sa, bigbuf, L_COPY); - snprintf(bigbuf, sizeof(bigbuf), - "[%d %d %d %d %d %d] %%mapping matrix: [w 0 0 -h 0 h]", - w, 0, 0, -h, 0, h); - sarrayAddString(sa, bigbuf, L_COPY); - - if (boxflag == 0) { - if (d == 1 || d == 8) - sarrayAddString(sa, - "{currentfile bpl readhexstring pop} image", L_COPY); - else /* d == 32 */ - sarrayAddString(sa, - "{currentfile bpl readhexstring pop} false 3 colorimage", - L_COPY); - } else { /* boxflag == 1 */ - if (d == 1 || d == 8) - sarrayAddString(sa, - "{currentfile bpl readhexstring pop} bind image", L_COPY); - else /* d == 32 */ - sarrayAddString(sa, - "{currentfile bpl readhexstring pop} bind false 3 colorimage", - L_COPY); - } - - sarrayAddString(sa, hexdata, L_INSERT); - - if (boxflag == 0) - sarrayAddString(sa, "\nshowpage", L_COPY); - else /* boxflag == 1 */ - sarrayAddString(sa, "\ngrestore", L_COPY); - - outstr = sarrayToString(sa, 1); - sarrayDestroy(&sa); - if (!outstr) L_ERROR("outstr not made\n", procName); - return outstr; -} - - -/*! - * \brief getScaledParametersPS() - * - * \param[in] box [optional] location of image in mils; x,y is LL corner - * \param[in] wpix pix width in pixels - * \param[in] hpix pix height in pixels - * \param[in] res of printer; use 0 for default - * \param[in] scale use 1.0 or 0.0 for no scaling - * \param[out] pxpt location of llx in pts - * \param[out] pypt location of lly in pts - * \param[out] pwpt image width in pts - * \param[out] phpt image height in pts - * \return void no arg checking - * - *
- * Notes:
- *      (1) The image is always scaled, depending on res and scale.
- *      (2) If no box, the image is centered on the page.
- *      (3) If there is a box, the image is placed within it.
- * 
- */ -static void -getScaledParametersPS(BOX *box, - l_int32 wpix, - l_int32 hpix, - l_int32 res, - l_float32 scale, - l_float32 *pxpt, - l_float32 *pypt, - l_float32 *pwpt, - l_float32 *phpt) -{ -l_int32 bx, by, bw, bh; -l_float32 winch, hinch, xinch, yinch, fres; - - PROCNAME("getScaledParametersPS"); - - if (res == 0) - res = DefaultInputRes; - fres = (l_float32)res; - - /* Allow the PS interpreter to scale the resolution */ - if (scale == 0.0) - scale = 1.0; - if (scale != 1.0) { - fres = (l_float32)res / scale; - res = (l_int32)fres; - } - - /* Limit valid resolution interval */ - if (res < MinRes || res > MaxRes) { - L_WARNING("res %d out of bounds; using default res; no scaling\n", - procName, res); - res = DefaultInputRes; - fres = (l_float32)res; - } - - if (!box) { /* center on page */ - winch = (l_float32)wpix / fres; - hinch = (l_float32)hpix / fres; - xinch = (8.5 - winch) / 2.; - yinch = (11.0 - hinch) / 2.; - } else { - boxGetGeometry(box, &bx, &by, &bw, &bh); - if (bw == 0) - winch = (l_float32)wpix / fres; - else - winch = (l_float32)bw / 1000.; - if (bh == 0) - hinch = (l_float32)hpix / fres; - else - hinch = (l_float32)bh / 1000.; - xinch = (l_float32)bx / 1000.; - yinch = (l_float32)by / 1000.; - } - - if (xinch < 0) - L_WARNING("left edge < 0.0 inch\n", procName); - if (xinch + winch > 8.5) - L_WARNING("right edge > 8.5 inch\n", procName); - if (yinch < 0.0) - L_WARNING("bottom edge < 0.0 inch\n", procName); - if (yinch + hinch > 11.0) - L_WARNING("top edge > 11.0 inch\n", procName); - - *pwpt = 72. * winch; - *phpt = 72. * hinch; - *pxpt = 72. * xinch; - *pypt = 72. * yinch; - return; -} - - -/*! - * \brief convertByteToHexAscii() - * - * \param[in] byteval input byte - * \param[out] pnib1, pnib2 two hex ascii characters - * \return void - */ -static void -convertByteToHexAscii(l_uint8 byteval, - char *pnib1, - char *pnib2) -{ -l_uint8 nib; - - nib = byteval >> 4; - if (nib < 10) - *pnib1 = '0' + nib; - else - *pnib1 = 'a' + (nib - 10); - nib = byteval & 0xf; - if (nib < 10) - *pnib2 = '0' + nib; - else - *pnib2 = 'a' + (nib - 10); - return; -} - - -/*-------------------------------------------------------------* - * For jpeg compressed images * - *-------------------------------------------------------------*/ -/*! - * \brief convertJpegToPSEmbed() - * - * \param[in] filein input jpeg file - * \param[in] fileout output ps file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function takes a jpeg file as input and generates a DCT
- *          compressed, ascii85 encoded PS file, with a bounding box.
- *      (2) The bounding box is required when a program such as TeX
- *          (through epsf) places and rescales the image.
- *      (3) The bounding box is sized for fitting the image to an
- *          8.5 x 11.0 inch page.
- * 
- */ -l_ok -convertJpegToPSEmbed(const char *filein, - const char *fileout) -{ -char *outstr; -l_int32 w, h, nbytes, ret; -l_float32 xpt, ypt, wpt, hpt; -L_COMP_DATA *cid; - - PROCNAME("convertJpegToPSEmbed"); - - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - - /* Generate the ascii encoded jpeg data */ - if ((cid = l_generateJpegData(filein, 1)) == NULL) - return ERROR_INT("jpeg data not made", procName, 1); - w = cid->w; - h = cid->h; - - /* Scale for 20 pt boundary and otherwise full filling - * in one direction on 8.5 x 11 inch device */ - xpt = 20.0; - ypt = 20.0; - if (w * 11.0 > h * 8.5) { - wpt = 572.0; /* 612 - 2 * 20 */ - hpt = wpt * (l_float32)h / (l_float32)w; - } else { - hpt = 752.0; /* 792 - 2 * 20 */ - wpt = hpt * (l_float32)w / (l_float32)h; - } - - /* Generate the PS. - * The bounding box information should be inserted (default). */ - outstr = generateJpegPS(NULL, cid, xpt, ypt, wpt, hpt, 1, 1); - l_CIDataDestroy(&cid); - if (!outstr) - return ERROR_INT("outstr not made", procName, 1); - nbytes = strlen(outstr); - - ret = l_binaryWrite(fileout, "w", outstr, nbytes); - LEPT_FREE(outstr); - if (ret) L_ERROR("ps string not written to file\n", procName); - return ret; -} - - -/*! - * \brief convertJpegToPS() - * - * \param[in] filein input jpeg file - * \param[in] fileout output ps file - * \param[in] operation "w" for write; "a" for append - * \param[in] x, y location of LL corner of image, in pixels, relative - * to the PostScript origin (0,0) at the LL corner - * of the page - * \param[in] res resolution of the input image, in ppi; - * use 0 for default - * \param[in] scale scaling by printer; use 0.0 or 1.0 for no scaling - * \param[in] pageno page number; must start with 1; you can use 0 - * if there is only one page - * \param[in] endpage boolean: use TRUE if this is the last image to be - * added to the page; FALSE otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is simpler to use than pixWriteStringPS(), and
- *          it outputs in level 2 PS as compressed DCT (overlaid
- *          with ascii85 encoding).
- *      (2) An output file can contain multiple pages, each with
- *          multiple images.  The arguments to convertJpegToPS()
- *          allow you to control placement of jpeg images on multiple
- *          pages within a PostScript file.
- *      (3) For the first image written to a file, use "w", which
- *          opens for write and clears the file.  For all subsequent
- *          images written to that file, use "a".
- *      (4) The (x, y) parameters give the LL corner of the image
- *          relative to the LL corner of the page.  They are in
- *          units of pixels if scale = 1.0.  If you use (e.g.)
- *          scale = 2.0, the image is placed at (2x, 2y) on the page,
- *          and the image dimensions are also doubled.
- *      (5) Display vs printed resolution:
- *           * If your display is 75 ppi and your image was created
- *             at a resolution of 300 ppi, you can get the image
- *             to print at the same size as it appears on your display
- *             by either setting scale = 4.0 or by setting  res = 75.
- *             Both tell the printer to make a 4x enlarged image.
- *           * If your image is generated at 150 ppi and you use scale = 1,
- *             it will be rendered such that 150 pixels correspond
- *             to 72 pts (1 inch on the printer).  This function does
- *             the conversion from pixels (with or without scaling) to
- *             pts, which are the units that the printer uses.
- *           * The printer will choose its own resolution to use
- *             in rendering the image, which will not affect the size
- *             of the rendered image.  That is because the output
- *             PostScript file describes the geometry in terms of pts,
- *             which are defined to be 1/72 inch.  The printer will
- *             only see the size of the image in pts, through the
- *             scale and translate parameters and the affine
- *             transform (the ImageMatrix) of the image.
- *      (6) To render multiple images on the same page, set
- *          endpage = FALSE for each image until you get to the
- *          last, for which you set endpage = TRUE.  This causes the
- *          "showpage" command to be invoked.  Showpage outputs
- *          the entire page and clears the raster buffer for the
- *          next page to be added.  Without a "showpage",
- *          subsequent images from the next page will overlay those
- *          previously put down.
- *      (7) For multiple pages, increment the page number, starting
- *          with page 1.  This allows PostScript (and PDF) to build
- *          a page directory, which viewers use for navigation.
- * 
- */ -l_ok -convertJpegToPS(const char *filein, - const char *fileout, - const char *operation, - l_int32 x, - l_int32 y, - l_int32 res, - l_float32 scale, - l_int32 pageno, - l_int32 endpage) -{ -char *outstr; -l_int32 nbytes; - - PROCNAME("convertJpegToPS"); - - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - if (strcmp(operation, "w") && strcmp(operation, "a")) - return ERROR_INT("operation must be \"w\" or \"a\"", procName, 1); - - if (convertJpegToPSString(filein, &outstr, &nbytes, x, y, res, scale, - pageno, endpage)) - return ERROR_INT("ps string not made", procName, 1); - - if (l_binaryWrite(fileout, operation, outstr, nbytes)) { - LEPT_FREE(outstr); - return ERROR_INT("ps string not written to file", procName, 1); - } - - LEPT_FREE(outstr); - return 0; -} - - -/*! - * \brief convertJpegToPSString() - * - * Generates PS string in jpeg format from jpeg file - * - * \param[in] filein input jpeg file - * \param[out] poutstr PS string - * \param[out] pnbytes number of bytes in PS string - * \param[in] x, y location of LL corner of image, in pixels, relative - * to the PostScript origin (0,0) at the LL corner - * of the page - * \param[in] res resolution of the input image, in ppi; - * use 0 for default - * \param[in] scale scaling by printer; use 0.0 or 1.0 for no scaling - * \param[in] pageno page number; must start with 1; you can use 0 - * if there is only one page - * \param[in] endpage boolean: use TRUE if this is the last image to be - * added to the page; FALSE otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) For usage, see convertJpegToPS()
- * 
- */ -static l_ok -convertJpegToPSString(const char *filein, - char **poutstr, - l_int32 *pnbytes, - l_int32 x, - l_int32 y, - l_int32 res, - l_float32 scale, - l_int32 pageno, - l_int32 endpage) -{ -char *outstr; -l_float32 xpt, ypt, wpt, hpt; -L_COMP_DATA *cid; - - PROCNAME("convertJpegToPSString"); - - if (!poutstr) - return ERROR_INT("&outstr not defined", procName, 1); - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *poutstr = NULL; - *pnbytes = 0; - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - - /* Generate the ascii encoded jpeg data */ - if ((cid = l_generateJpegData(filein, 1)) == NULL) - return ERROR_INT("jpeg data not made", procName, 1); - - /* Get scaled location in pts. Guess the input scan resolution - * based on the input parameter %res, the resolution data in - * the pix, and the size of the image. */ - if (scale == 0.0) - scale = 1.0; - if (res <= 0) { - if (cid->res > 0) - res = cid->res; - else - res = DefaultInputRes; - } - - /* Get scaled location in pts */ - if (scale == 0.0) - scale = 1.0; - xpt = scale * x * 72. / res; - ypt = scale * y * 72. / res; - wpt = scale * cid->w * 72. / res; - hpt = scale * cid->h * 72. / res; - - if (pageno == 0) - pageno = 1; - -#if DEBUG_JPEG - lept_stderr("w = %d, h = %d, bps = %d, spp = %d\n", - cid->w, cid->h, cid->bps, cid->spp); - lept_stderr("comp bytes = %ld, nbytes85 = %ld, ratio = %5.3f\n", - (unsigned long)cid->nbytescomp, (unsigned long)cid->nbytes85, - (l_float32)cid->nbytes85 / (l_float32)cid->nbytescomp); - lept_stderr("xpt = %7.2f, ypt = %7.2f, wpt = %7.2f, hpt = %7.2f\n", - xpt, ypt, wpt, hpt); -#endif /* DEBUG_JPEG */ - - /* Generate the PS */ - outstr = generateJpegPS(NULL, cid, xpt, ypt, wpt, hpt, pageno, endpage); - l_CIDataDestroy(&cid); - if (!outstr) - return ERROR_INT("outstr not made", procName, 1); - *poutstr = outstr; - *pnbytes = strlen(outstr); - return 0; -} - - -/*! - * \brief generateJpegPS() - * - * \param[in] filein [optional] input jpeg filename; can be null - * \param[in] cid jpeg compressed image data - * \param[in] xpt, ypt location of LL corner of image, in pts, relative - * to the PostScript origin (0,0) at the LL corner - * of the page - * \param[in] wpt, hpt rendered image size in pts - * \param[in] pageno page number; must start with 1; you can use 0 - * if there is only one page. - * \param[in] endpage boolean: use TRUE if this is the last image to be - * added to the page; FALSE otherwise - * \return PS string, or NULL on error - * - *
- * Notes:
- *      (1) Low-level function.
- * 
- */ -static char * -generateJpegPS(const char *filein, - L_COMP_DATA *cid, - l_float32 xpt, - l_float32 ypt, - l_float32 wpt, - l_float32 hpt, - l_int32 pageno, - l_int32 endpage) -{ -l_int32 w, h, bps, spp; -char *outstr; -char bigbuf[Bufsize]; -SARRAY *sa; - - PROCNAME("generateJpegPS"); - - if (!cid) - return (char *)ERROR_PTR("jpeg data not defined", procName, NULL); - w = cid->w; - h = cid->h; - bps = cid->bps; - spp = cid->spp; - - sa = sarrayCreate(50); - sarrayAddString(sa, "%!PS-Adobe-3.0", L_COPY); - sarrayAddString(sa, "%%Creator: leptonica", L_COPY); - if (filein) - snprintf(bigbuf, sizeof(bigbuf), "%%%%Title: %s", filein); - else - snprintf(bigbuf, sizeof(bigbuf), "%%%%Title: Jpeg compressed PS"); - sarrayAddString(sa, bigbuf, L_COPY); - sarrayAddString(sa, "%%DocumentData: Clean7Bit", L_COPY); - - if (var_PS_WRITE_BOUNDING_BOX == 1) { - snprintf(bigbuf, sizeof(bigbuf), - "%%%%BoundingBox: %7.2f %7.2f %7.2f %7.2f", - xpt, ypt, xpt + wpt, ypt + hpt); - sarrayAddString(sa, bigbuf, L_COPY); - } - - sarrayAddString(sa, "%%LanguageLevel: 2", L_COPY); - sarrayAddString(sa, "%%EndComments", L_COPY); - snprintf(bigbuf, sizeof(bigbuf), "%%%%Page: %d %d", pageno, pageno); - sarrayAddString(sa, bigbuf, L_COPY); - - sarrayAddString(sa, "save", L_COPY); - sarrayAddString(sa, - "/RawData currentfile /ASCII85Decode filter def", L_COPY); - sarrayAddString(sa, "/Data RawData << >> /DCTDecode filter def", L_COPY); - - snprintf(bigbuf, sizeof(bigbuf), - "%7.2f %7.2f translate %%set image origin in pts", xpt, ypt); - sarrayAddString(sa, bigbuf, L_COPY); - - snprintf(bigbuf, sizeof(bigbuf), - "%7.2f %7.2f scale %%set image size in pts", wpt, hpt); - sarrayAddString(sa, bigbuf, L_COPY); - - if (spp == 1) - sarrayAddString(sa, "/DeviceGray setcolorspace", L_COPY); - else if (spp == 3) - sarrayAddString(sa, "/DeviceRGB setcolorspace", L_COPY); - else /*spp == 4 */ - sarrayAddString(sa, "/DeviceCMYK setcolorspace", L_COPY); - - sarrayAddString(sa, "{ << /ImageType 1", L_COPY); - snprintf(bigbuf, sizeof(bigbuf), " /Width %d", w); - sarrayAddString(sa, bigbuf, L_COPY); - snprintf(bigbuf, sizeof(bigbuf), " /Height %d", h); - sarrayAddString(sa, bigbuf, L_COPY); - snprintf(bigbuf, sizeof(bigbuf), - " /ImageMatrix [ %d 0 0 %d 0 %d ]", w, -h, h); - sarrayAddString(sa, bigbuf, L_COPY); - sarrayAddString(sa, " /DataSource Data", L_COPY); - snprintf(bigbuf, sizeof(bigbuf), " /BitsPerComponent %d", bps); - sarrayAddString(sa, bigbuf, L_COPY); - - if (spp == 1) - sarrayAddString(sa, " /Decode [0 1]", L_COPY); - else if (spp == 3) - sarrayAddString(sa, " /Decode [0 1 0 1 0 1]", L_COPY); - else /* spp == 4 */ - sarrayAddString(sa, " /Decode [0 1 0 1 0 1 0 1]", L_COPY); - - sarrayAddString(sa, " >> image", L_COPY); - sarrayAddString(sa, " Data closefile", L_COPY); - sarrayAddString(sa, " RawData flushfile", L_COPY); - if (endpage == TRUE) - sarrayAddString(sa, " showpage", L_COPY); - sarrayAddString(sa, " restore", L_COPY); - sarrayAddString(sa, "} exec", L_COPY); - - /* Insert the ascii85 jpeg data; this is now owned by sa */ - sarrayAddString(sa, cid->data85, L_INSERT); - cid->data85 = NULL; /* it has been transferred and destroyed */ - - /* Generate and return the output string */ - outstr = sarrayToString(sa, 1); - sarrayDestroy(&sa); - return outstr; -} - - -/*-------------------------------------------------------------* - * For ccitt g4 compressed images * - *-------------------------------------------------------------*/ -/*! - * \brief convertG4ToPSEmbed() - * - * \param[in] filein input tiff file - * \param[in] fileout output ps file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function takes a g4 compressed tif file as input and
- *          generates a g4 compressed, ascii85 encoded PS file, with
- *          a bounding box.
- *      (2) The bounding box is required when a program such as TeX
- *          (through epsf) places and rescales the image.
- *      (3) The bounding box is sized for fitting the image to an
- *          8.5 x 11.0 inch page.
- *      (4) We paint this through a mask, over whatever is below.
- * 
- */ -l_ok -convertG4ToPSEmbed(const char *filein, - const char *fileout) -{ -char *outstr; -l_int32 w, h, nbytes, ret; -l_float32 xpt, ypt, wpt, hpt; -L_COMP_DATA *cid; - - PROCNAME("convertG4ToPSEmbed"); - - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - - if ((cid = l_generateG4Data(filein, 1)) == NULL) - return ERROR_INT("g4 data not made", procName, 1); - w = cid->w; - h = cid->h; - - /* Scale for 20 pt boundary and otherwise full filling - * in one direction on 8.5 x 11 inch device */ - xpt = 20.0; - ypt = 20.0; - if (w * 11.0 > h * 8.5) { - wpt = 572.0; /* 612 - 2 * 20 */ - hpt = wpt * (l_float32)h / (l_float32)w; - } else { - hpt = 752.0; /* 792 - 2 * 20 */ - wpt = hpt * (l_float32)w / (l_float32)h; - } - - /* Generate the PS, painting through the image mask. - * The bounding box information should be inserted (default). */ - outstr = generateG4PS(NULL, cid, xpt, ypt, wpt, hpt, 1, 1, 1); - l_CIDataDestroy(&cid); - if (!outstr) - return ERROR_INT("outstr not made", procName, 1); - nbytes = strlen(outstr); - - ret = l_binaryWrite(fileout, "w", outstr, nbytes); - LEPT_FREE(outstr); - if (ret) L_ERROR("ps string not written to file\n", procName); - return ret; -} - - -/*! - * \brief convertG4ToPS() - * - * \param[in] filein input tiff g4 file - * \param[in] fileout output ps file - * \param[in] operation "w" for write; "a" for append - * \param[in] x, y location of LL corner of image, in pixels, relative - * to the PostScript origin (0,0) at the LL corner - * of the page - * \param[in] res resolution of the input image, in ppi; typ. values - * are 300 and 600; use 0 for automatic determination - * based on image size - * \param[in] scale scaling by printer; use 0.0 or 1.0 for no scaling - * \param[in] pageno page number; must start with 1; you can use 0 - * if there is only one page. - * \param[in] maskflag boolean: use TRUE if just painting through fg; - * FALSE if painting both fg and bg. - * \param[in] endpage boolean: use TRUE if this is the last image to be - * added to the page; FALSE otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See the usage comments in convertJpegToPS(), some of
- *          which are repeated here.
- *      (2) This is a wrapper for tiff g4.  The PostScript that
- *          is generated is expanded by about 5/4 (due to the
- *          ascii85 encoding.  If you convert to pdf (ps2pdf), the
- *          ascii85 decoder is automatically invoked, so that the
- *          pdf wrapped g4 file is essentially the same size as
- *          the original g4 file.  It's useful to have the PS
- *          file ascii85 encoded, because many printers will not
- *          print binary PS files.
- *      (3) For the first image written to a file, use "w", which
- *          opens for write and clears the file.  For all subsequent
- *          images written to that file, use "a".
- *      (4) To render multiple images on the same page, set
- *          endpage = FALSE for each image until you get to the
- *          last, for which you set endpage = TRUE.  This causes the
- *          "showpage" command to be invoked.  Showpage outputs
- *          the entire page and clears the raster buffer for the
- *          next page to be added.  Without a "showpage",
- *          subsequent images from the next page will overlay those
- *          previously put down.
- *      (5) For multiple images to the same page, where you are writing
- *          both jpeg and tiff-g4, you have two options:
- *           (a) write the g4 first, as either image (maskflag == FALSE)
- *               or imagemask (maskflag == TRUE), and then write the
- *               jpeg over it.
- *           (b) write the jpeg first and as the last item, write
- *               the g4 as an imagemask (maskflag == TRUE), to paint
- *               through the foreground only.
- *          We have this flexibility with the tiff-g4 because it is 1 bpp.
- *      (6) For multiple pages, increment the page number, starting
- *          with page 1.  This allows PostScript (and PDF) to build
- *          a page directory, which viewers use for navigation.
- * 
- */ -l_ok -convertG4ToPS(const char *filein, - const char *fileout, - const char *operation, - l_int32 x, - l_int32 y, - l_int32 res, - l_float32 scale, - l_int32 pageno, - l_int32 maskflag, - l_int32 endpage) -{ -char *outstr; -l_int32 nbytes, ret; - - PROCNAME("convertG4ToPS"); - - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - if (strcmp(operation, "w") && strcmp(operation, "a")) - return ERROR_INT("operation must be \"w\" or \"a\"", procName, 1); - - if (convertG4ToPSString(filein, &outstr, &nbytes, x, y, res, scale, - pageno, maskflag, endpage)) - return ERROR_INT("ps string not made", procName, 1); - - ret = l_binaryWrite(fileout, operation, outstr, nbytes); - LEPT_FREE(outstr); - if (ret) - return ERROR_INT("ps string not written to file", procName, 1); - return 0; -} - - -/*! - * \brief convertG4ToPSString() - * - * \param[in] filein input tiff g4 file - * \param[out] poutstr PS string - * \param[out] pnbytes number of bytes in PS string - * \param[in] x, y location of LL corner of image, in pixels, relative - * to the PostScript origin (0,0) at the LL corner - * of the page - * \param[in] res resolution of the input image, in ppi; typ. values - * are 300 and 600; use 0 for automatic determination - * based on image size - * \param[in] scale scaling by printer; use 0.0 or 1.0 for no scaling - * \param[in] pageno page number; must start with 1; you can use 0 - * if there is only one page. - * \param[in] maskflag boolean: use TRUE if just painting through fg; - * FALSE if painting both fg and bg. - * \param[in] endpage boolean: use TRUE if this is the last image to be - * added to the page; FALSE otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Generates PS string in G4 compressed tiff format from G4 tiff file.
- *      (2) For usage, see convertG4ToPS().
- * 
- */ -static l_ok -convertG4ToPSString(const char *filein, - char **poutstr, - l_int32 *pnbytes, - l_int32 x, - l_int32 y, - l_int32 res, - l_float32 scale, - l_int32 pageno, - l_int32 maskflag, - l_int32 endpage) -{ -char *outstr; -l_float32 xpt, ypt, wpt, hpt; -L_COMP_DATA *cid; - - PROCNAME("convertG4ToPSString"); - - if (!poutstr) - return ERROR_INT("&outstr not defined", procName, 1); - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *poutstr = NULL; - *pnbytes = 0; - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - - if ((cid = l_generateG4Data(filein, 1)) == NULL) - return ERROR_INT("g4 data not made", procName, 1); - - /* Get scaled location in pts. Guess the input scan resolution - * based on the input parameter %res, the resolution data in - * the pix, and the size of the image. */ - if (scale == 0.0) - scale = 1.0; - if (res <= 0) { - if (cid->res > 0) { - res = cid->res; - } else { - if (cid->h <= 3509) /* A4 height at 300 ppi */ - res = 300; - else - res = 600; - } - } - xpt = scale * x * 72. / res; - ypt = scale * y * 72. / res; - wpt = scale * cid->w * 72. / res; - hpt = scale * cid->h * 72. / res; - - if (pageno == 0) - pageno = 1; - -#if DEBUG_G4 - lept_stderr("w = %d, h = %d, minisblack = %d\n", - cid->w, cid->h, cid->minisblack); - lept_stderr("comp bytes = %ld, nbytes85 = %ld\n", - (unsigned long)cid->nbytescomp, (unsigned long)cid->nbytes85); - lept_stderr("xpt = %7.2f, ypt = %7.2f, wpt = %7.2f, hpt = %7.2f\n", - xpt, ypt, wpt, hpt); -#endif /* DEBUG_G4 */ - - /* Generate the PS */ - outstr = generateG4PS(NULL, cid, xpt, ypt, wpt, hpt, - maskflag, pageno, endpage); - l_CIDataDestroy(&cid); - if (!outstr) - return ERROR_INT("outstr not made", procName, 1); - *poutstr = outstr; - *pnbytes = strlen(outstr); - return 0; -} - - -/*! - * \brief generateG4PS() - * - * \param[in] filein [optional] input tiff g4 file; can be null - * \param[in] cid g4 compressed image data - * \param[in] xpt, ypt location of LL corner of image, in pts, relative - * to the PostScript origin (0,0) at the LL corner - * of the page - * \param[in] wpt, hpt rendered image size in pts - * \param[in] maskflag boolean: use TRUE if just painting through fg; - * FALSE if painting both fg and bg. - * \param[in] pageno page number; must start with 1; you can use 0 - * if there is only one page. - * \param[in] endpage boolean: use TRUE if this is the last image to be - * added to the page; FALSE otherwise - * \return PS string, or NULL on error - * - *
- * Notes:
- *      (1) Low-level function.
- * 
- */ -static char * -generateG4PS(const char *filein, - L_COMP_DATA *cid, - l_float32 xpt, - l_float32 ypt, - l_float32 wpt, - l_float32 hpt, - l_int32 maskflag, - l_int32 pageno, - l_int32 endpage) -{ -l_int32 w, h; -char *outstr; -char bigbuf[Bufsize]; -SARRAY *sa; - - PROCNAME("generateG4PS"); - - if (!cid) - return (char *)ERROR_PTR("g4 data not defined", procName, NULL); - w = cid->w; - h = cid->h; - - sa = sarrayCreate(50); - sarrayAddString(sa, "%!PS-Adobe-3.0", L_COPY); - sarrayAddString(sa, "%%Creator: leptonica", L_COPY); - if (filein) - snprintf(bigbuf, sizeof(bigbuf), "%%%%Title: %s", filein); - else - snprintf(bigbuf, sizeof(bigbuf), "%%%%Title: G4 compressed PS"); - sarrayAddString(sa, bigbuf, L_COPY); - sarrayAddString(sa, "%%DocumentData: Clean7Bit", L_COPY); - - if (var_PS_WRITE_BOUNDING_BOX == 1) { - snprintf(bigbuf, sizeof(bigbuf), - "%%%%BoundingBox: %7.2f %7.2f %7.2f %7.2f", - xpt, ypt, xpt + wpt, ypt + hpt); - sarrayAddString(sa, bigbuf, L_COPY); - } - - sarrayAddString(sa, "%%LanguageLevel: 2", L_COPY); - sarrayAddString(sa, "%%EndComments", L_COPY); - snprintf(bigbuf, sizeof(bigbuf), "%%%%Page: %d %d", pageno, pageno); - sarrayAddString(sa, bigbuf, L_COPY); - - sarrayAddString(sa, "save", L_COPY); - sarrayAddString(sa, "100 dict begin", L_COPY); - - snprintf(bigbuf, sizeof(bigbuf), - "%7.2f %7.2f translate %%set image origin in pts", xpt, ypt); - sarrayAddString(sa, bigbuf, L_COPY); - - snprintf(bigbuf, sizeof(bigbuf), - "%7.2f %7.2f scale %%set image size in pts", wpt, hpt); - sarrayAddString(sa, bigbuf, L_COPY); - - sarrayAddString(sa, "/DeviceGray setcolorspace", L_COPY); - - sarrayAddString(sa, "{", L_COPY); - sarrayAddString(sa, - " /RawData currentfile /ASCII85Decode filter def", L_COPY); - sarrayAddString(sa, " << ", L_COPY); - sarrayAddString(sa, " /ImageType 1", L_COPY); - snprintf(bigbuf, sizeof(bigbuf), " /Width %d", w); - sarrayAddString(sa, bigbuf, L_COPY); - snprintf(bigbuf, sizeof(bigbuf), " /Height %d", h); - sarrayAddString(sa, bigbuf, L_COPY); - snprintf(bigbuf, sizeof(bigbuf), - " /ImageMatrix [ %d 0 0 %d 0 %d ]", w, -h, h); - sarrayAddString(sa, bigbuf, L_COPY); - sarrayAddString(sa, " /BitsPerComponent 1", L_COPY); - sarrayAddString(sa, " /Interpolate true", L_COPY); - if (cid->minisblack) - sarrayAddString(sa, " /Decode [1 0]", L_COPY); - else /* miniswhite; typical for 1 bpp */ - sarrayAddString(sa, " /Decode [0 1]", L_COPY); - sarrayAddString(sa, " /DataSource RawData", L_COPY); - sarrayAddString(sa, " <<", L_COPY); - sarrayAddString(sa, " /K -1", L_COPY); - snprintf(bigbuf, sizeof(bigbuf), " /Columns %d", w); - sarrayAddString(sa, bigbuf, L_COPY); - snprintf(bigbuf, sizeof(bigbuf), " /Rows %d", h); - sarrayAddString(sa, bigbuf, L_COPY); - sarrayAddString(sa, " >> /CCITTFaxDecode filter", L_COPY); - if (maskflag == TRUE) /* just paint through the fg */ - sarrayAddString(sa, " >> imagemask", L_COPY); - else /* Paint full image */ - sarrayAddString(sa, " >> image", L_COPY); - sarrayAddString(sa, " RawData flushfile", L_COPY); - if (endpage == TRUE) - sarrayAddString(sa, " showpage", L_COPY); - sarrayAddString(sa, "}", L_COPY); - - sarrayAddString(sa, "%%BeginData:", L_COPY); - sarrayAddString(sa, "exec", L_COPY); - - /* Insert the ascii85 ccittg4 data; this is now owned by sa */ - sarrayAddString(sa, cid->data85, L_INSERT); - - /* Concat the trailing data */ - sarrayAddString(sa, "%%EndData", L_COPY); - sarrayAddString(sa, "end", L_COPY); - sarrayAddString(sa, "restore", L_COPY); - - outstr = sarrayToString(sa, 1); - sarrayDestroy(&sa); - cid->data85 = NULL; /* it has been transferred and destroyed */ - return outstr; -} - - -/*-------------------------------------------------------------* - * For tiff multipage files * - *-------------------------------------------------------------*/ -/*! - * \brief convertTiffMultipageToPS() - * - * \param[in] filein input tiff multipage file - * \param[in] fileout output ps file - * \param[in] fillfract factor for filling 8.5 x 11 inch page; - * use 0.0 for DefaultFillFraction - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This converts a multipage tiff file of binary page images
- *          into a ccitt g4 compressed PS file.
- *      (2) If the images are generated from a standard resolution fax,
- *          the vertical resolution is doubled to give a normal-looking
- *          aspect ratio.
- * 
- */ -l_ok -convertTiffMultipageToPS(const char *filein, - const char *fileout, - l_float32 fillfract) -{ -char *tempfile; -l_int32 i, npages, w, h, istiff; -l_float32 scale; -PIX *pix, *pixs; -FILE *fp; - - PROCNAME("convertTiffMultipageToPS"); - - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - - if ((fp = fopenReadStream(filein)) == NULL) - return ERROR_INT("file not found", procName, 1); - istiff = fileFormatIsTiff(fp); - if (!istiff) { - fclose(fp); - return ERROR_INT("file not tiff format", procName, 1); - } - tiffGetCount(fp, &npages); - fclose(fp); - - if (fillfract == 0.0) - fillfract = DefaultFillFraction; - - for (i = 0; i < npages; i++) { - if ((pix = pixReadTiff(filein, i)) == NULL) - return ERROR_INT("pix not made", procName, 1); - - pixGetDimensions(pix, &w, &h, NULL); - if (w == 1728 && h < w) /* it's a std res fax */ - pixs = pixScale(pix, 1.0, 2.0); - else - pixs = pixClone(pix); - - tempfile = l_makeTempFilename(); - pixWrite(tempfile, pixs, IFF_TIFF_G4); - scale = L_MIN(fillfract * 2550 / w, fillfract * 3300 / h); - if (i == 0) - convertG4ToPS(tempfile, fileout, "w", 0, 0, 300, scale, - i + 1, FALSE, TRUE); - else - convertG4ToPS(tempfile, fileout, "a", 0, 0, 300, scale, - i + 1, FALSE, TRUE); - lept_rmfile(tempfile); - LEPT_FREE(tempfile); - pixDestroy(&pix); - pixDestroy(&pixs); - } - - return 0; -} - - -/*---------------------------------------------------------------------* - * For flate (gzip) compressed images (e.g., png) * - *---------------------------------------------------------------------*/ -/*! - * \brief convertFlateToPSEmbed() - * - * \param[in] filein input file -- any format - * \param[in] fileout output ps file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function takes any image file as input and generates a
- *          flate-compressed, ascii85 encoded PS file, with a bounding box.
- *      (2) The bounding box is required when a program such as TeX
- *          (through epsf) places and rescales the image.
- *      (3) The bounding box is sized for fitting the image to an
- *          8.5 x 11.0 inch page.
- * 
- */ -l_ok -convertFlateToPSEmbed(const char *filein, - const char *fileout) -{ -char *outstr; -l_int32 w, h, nbytes, ret; -l_float32 xpt, ypt, wpt, hpt; -L_COMP_DATA *cid; - - PROCNAME("convertFlateToPSEmbed"); - - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - - if ((cid = l_generateFlateData(filein, 1)) == NULL) - return ERROR_INT("flate data not made", procName, 1); - w = cid->w; - h = cid->h; - - /* Scale for 20 pt boundary and otherwise full filling - * in one direction on 8.5 x 11 inch device */ - xpt = 20.0; - ypt = 20.0; - if (w * 11.0 > h * 8.5) { - wpt = 572.0; /* 612 - 2 * 20 */ - hpt = wpt * (l_float32)h / (l_float32)w; - } else { - hpt = 752.0; /* 792 - 2 * 20 */ - wpt = hpt * (l_float32)w / (l_float32)h; - } - - /* Generate the PS. - * The bounding box information should be inserted (default). */ - outstr = generateFlatePS(NULL, cid, xpt, ypt, wpt, hpt, 1, 1); - l_CIDataDestroy(&cid); - if (!outstr) - return ERROR_INT("outstr not made", procName, 1); - nbytes = strlen(outstr); - - ret = l_binaryWrite(fileout, "w", outstr, nbytes); - LEPT_FREE(outstr); - if (ret) L_ERROR("ps string not written to file\n", procName); - return ret; -} - - -/*! - * \brief convertFlateToPS() - * - * \param[in] filein input file -- any format - * \param[in] fileout output ps file - * \param[in] operation "w" for write; "a" for append - * \param[in] x, y location of LL corner of image, in pixels, relative - * to the PostScript origin (0,0) at the LL corner - * of the page - * \param[in] res resolution of the input image, in ppi; - * use 0 for default - * \param[in] scale scaling by printer; use 0.0 or 1.0 for no scaling - * \param[in] pageno page number; must start with 1; you can use 0 - * if there is only one page. - * \param[in] endpage boolean: use TRUE if this is the last image to be - * added to the page; FALSE otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This outputs level 3 PS as flate compressed (overlaid
- *          with ascii85 encoding).
- *      (2) An output file can contain multiple pages, each with
- *          multiple images.  The arguments to convertFlateToPS()
- *          allow you to control placement of png images on multiple
- *          pages within a PostScript file.
- *      (3) For the first image written to a file, use "w", which
- *          opens for write and clears the file.  For all subsequent
- *          images written to that file, use "a".
- *      (4) The (x, y) parameters give the LL corner of the image
- *          relative to the LL corner of the page.  They are in
- *          units of pixels if scale = 1.0.  If you use (e.g.)
- *          scale = 2.0, the image is placed at (2x, 2y) on the page,
- *          and the image dimensions are also doubled.
- *      (5) Display vs printed resolution:
- *           * If your display is 75 ppi and your image was created
- *             at a resolution of 300 ppi, you can get the image
- *             to print at the same size as it appears on your display
- *             by either setting scale = 4.0 or by setting  res = 75.
- *             Both tell the printer to make a 4x enlarged image.
- *           * If your image is generated at 150 ppi and you use scale = 1,
- *             it will be rendered such that 150 pixels correspond
- *             to 72 pts (1 inch on the printer).  This function does
- *             the conversion from pixels (with or without scaling) to
- *             pts, which are the units that the printer uses.
- *           * The printer will choose its own resolution to use
- *             in rendering the image, which will not affect the size
- *             of the rendered image.  That is because the output
- *             PostScript file describes the geometry in terms of pts,
- *             which are defined to be 1/72 inch.  The printer will
- *             only see the size of the image in pts, through the
- *             scale and translate parameters and the affine
- *             transform (the ImageMatrix) of the image.
- *      (6) To render multiple images on the same page, set
- *          endpage = FALSE for each image until you get to the
- *          last, for which you set endpage = TRUE.  This causes the
- *          "showpage" command to be invoked.  Showpage outputs
- *          the entire page and clears the raster buffer for the
- *          next page to be added.  Without a "showpage",
- *          subsequent images from the next page will overlay those
- *          previously put down.
- *      (7) For multiple pages, increment the page number, starting
- *          with page 1.  This allows PostScript (and PDF) to build
- *          a page directory, which viewers use for navigation.
- * 
- */ -l_ok -convertFlateToPS(const char *filein, - const char *fileout, - const char *operation, - l_int32 x, - l_int32 y, - l_int32 res, - l_float32 scale, - l_int32 pageno, - l_int32 endpage) -{ -char *outstr; -l_int32 nbytes, ret; - - PROCNAME("convertFlateToPS"); - - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - if (strcmp(operation, "w") && strcmp(operation, "a")) - return ERROR_INT("operation must be \"w\" or \"a\"", procName, 1); - - if (convertFlateToPSString(filein, &outstr, &nbytes, x, y, res, scale, - pageno, endpage)) - return ERROR_INT("ps string not made", procName, 1); - - ret = l_binaryWrite(fileout, operation, outstr, nbytes); - LEPT_FREE(outstr); - if (ret) L_ERROR("ps string not written to file\n", procName); - return ret; -} - - -/*! - * \brief convertFlateToPSString() - * - * Generates level 3 PS string in flate compressed format. - * - * \param[in] filein input image file - * \param[out] poutstr PS string - * \param[out] pnbytes number of bytes in PS string - * \param[in] x, y location of LL corner of image, in pixels, relative - * to the PostScript origin (0,0) at the LL corner - * of the page - * \param[in] res resolution of the input image, in ppi; - * use 0 for default - * \param[in] scale scaling by printer; use 0.0 or 1.0 for no scaling - * \param[in] pageno page number; must start with 1; you can use 0 - * if there is only one page. - * \param[in] endpage boolean: use TRUE if this is the last image to be - * added to the page; FALSE otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The returned PS character array is a null-terminated
- *          ascii string.  All the raster data is ascii85 encoded, so
- *          there are no null bytes embedded in it.
- *      (2) The raster encoding is made with gzip, the same as that
- *          in a png file that is compressed without prediction.
- *          The raster data itself is 25% larger than that in the
- *          binary form, due to the ascii85 encoding.
- *
- *  Usage:  See convertFlateToPS()
- * 
- */ -static l_ok -convertFlateToPSString(const char *filein, - char **poutstr, - l_int32 *pnbytes, - l_int32 x, - l_int32 y, - l_int32 res, - l_float32 scale, - l_int32 pageno, - l_int32 endpage) -{ -char *outstr; -l_float32 xpt, ypt, wpt, hpt; -L_COMP_DATA *cid; - - PROCNAME("convertFlateToPSString"); - - if (!poutstr) - return ERROR_INT("&outstr not defined", procName, 1); - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - *pnbytes = 0; - *poutstr = NULL; - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - - if ((cid = l_generateFlateData(filein, 1)) == NULL) - return ERROR_INT("flate data not made", procName, 1); - - /* Get scaled location in pts. Guess the input scan resolution - * based on the input parameter %res, the resolution data in - * the pix, and the size of the image. */ - if (scale == 0.0) - scale = 1.0; - if (res <= 0) { - if (cid->res > 0) - res = cid->res; - else - res = DefaultInputRes; - } - xpt = scale * x * 72. / res; - ypt = scale * y * 72. / res; - wpt = scale * cid->w * 72. / res; - hpt = scale * cid->h * 72. / res; - - if (pageno == 0) - pageno = 1; - -#if DEBUG_FLATE - lept_stderr("w = %d, h = %d, bps = %d, spp = %d\n", - cid->w, cid->h, cid->bps, cid->spp); - lept_stderr("uncomp bytes = %ld, comp bytes = %ld, nbytes85 = %ld\n", - (unsigned long)cid->nbytes, (unsigned long)cid->nbytescomp, - (unsigned long)cid->nbytes85); - lept_stderr("xpt = %7.2f, ypt = %7.2f, wpt = %7.2f, hpt = %7.2f\n", - xpt, ypt, wpt, hpt); -#endif /* DEBUG_FLATE */ - - /* Generate the PS */ - outstr = generateFlatePS(NULL, cid, xpt, ypt, wpt, hpt, pageno, endpage); - l_CIDataDestroy(&cid); - if (!outstr) - return ERROR_INT("outstr not made", procName, 1); - *poutstr = outstr; - *pnbytes = strlen(outstr); - return 0; -} - - -/*! - * \brief generateFlatePS() - * - * \param[in] filein [optional] input filename; can be null - * \param[in] cid flate compressed image data - * \param[in] xpt, ypt location of LL corner of image, in pts, relative - * to the PostScript origin (0,0) at the LL corner - * of the page - * \param[in] wpt, hpt rendered image size in pts - * \param[in] pageno page number; must start with 1; you can use 0 - * if there is only one page - * \param[in] endpage boolean: use TRUE if this is the last image to be - * added to the page; FALSE otherwise - * \return PS string, or NULL on error - */ -static char * -generateFlatePS(const char *filein, - L_COMP_DATA *cid, - l_float32 xpt, - l_float32 ypt, - l_float32 wpt, - l_float32 hpt, - l_int32 pageno, - l_int32 endpage) -{ -l_int32 w, h, bps, spp; -char *outstr; -char bigbuf[Bufsize]; -SARRAY *sa; - - PROCNAME("generateFlatePS"); - - if (!cid) - return (char *)ERROR_PTR("flate data not defined", procName, NULL); - w = cid->w; - h = cid->h; - bps = cid->bps; - spp = cid->spp; - - sa = sarrayCreate(50); - sarrayAddString(sa, "%!PS-Adobe-3.0 EPSF-3.0", L_COPY); - sarrayAddString(sa, "%%Creator: leptonica", L_COPY); - if (filein) - snprintf(bigbuf, sizeof(bigbuf), "%%%%Title: %s", filein); - else - snprintf(bigbuf, sizeof(bigbuf), "%%%%Title: Flate compressed PS"); - sarrayAddString(sa, bigbuf, L_COPY); - sarrayAddString(sa, "%%DocumentData: Clean7Bit", L_COPY); - - if (var_PS_WRITE_BOUNDING_BOX == 1) { - snprintf(bigbuf, sizeof(bigbuf), - "%%%%BoundingBox: %7.2f %7.2f %7.2f %7.2f", - xpt, ypt, xpt + wpt, ypt + hpt); - sarrayAddString(sa, bigbuf, L_COPY); - } - - sarrayAddString(sa, "%%LanguageLevel: 3", L_COPY); - sarrayAddString(sa, "%%EndComments", L_COPY); - snprintf(bigbuf, sizeof(bigbuf), "%%%%Page: %d %d", pageno, pageno); - sarrayAddString(sa, bigbuf, L_COPY); - - sarrayAddString(sa, "save", L_COPY); - snprintf(bigbuf, sizeof(bigbuf), - "%7.2f %7.2f translate %%set image origin in pts", xpt, ypt); - sarrayAddString(sa, bigbuf, L_COPY); - - snprintf(bigbuf, sizeof(bigbuf), - "%7.2f %7.2f scale %%set image size in pts", wpt, hpt); - sarrayAddString(sa, bigbuf, L_COPY); - - /* If there is a colormap, add the data; it is now owned by sa */ - if (cid->cmapdata85) { - snprintf(bigbuf, sizeof(bigbuf), - "[ /Indexed /DeviceRGB %d %%set colormap type/size", - cid->ncolors - 1); - sarrayAddString(sa, bigbuf, L_COPY); - sarrayAddString(sa, " <~", L_COPY); - sarrayAddString(sa, cid->cmapdata85, L_INSERT); - sarrayAddString(sa, " ] setcolorspace", L_COPY); - } else if (spp == 1) { - sarrayAddString(sa, "/DeviceGray setcolorspace", L_COPY); - } else { /* spp == 3 */ - sarrayAddString(sa, "/DeviceRGB setcolorspace", L_COPY); - } - - sarrayAddString(sa, - "/RawData currentfile /ASCII85Decode filter def", L_COPY); - sarrayAddString(sa, - "/Data RawData << >> /FlateDecode filter def", L_COPY); - - sarrayAddString(sa, "{ << /ImageType 1", L_COPY); - snprintf(bigbuf, sizeof(bigbuf), " /Width %d", w); - sarrayAddString(sa, bigbuf, L_COPY); - snprintf(bigbuf, sizeof(bigbuf), " /Height %d", h); - sarrayAddString(sa, bigbuf, L_COPY); - snprintf(bigbuf, sizeof(bigbuf), " /BitsPerComponent %d", bps); - sarrayAddString(sa, bigbuf, L_COPY); - snprintf(bigbuf, sizeof(bigbuf), - " /ImageMatrix [ %d 0 0 %d 0 %d ]", w, -h, h); - sarrayAddString(sa, bigbuf, L_COPY); - - if (cid->cmapdata85) { - sarrayAddString(sa, " /Decode [0 255]", L_COPY); - } else if (spp == 1) { - if (bps == 1) /* miniswhite photometry */ - sarrayAddString(sa, " /Decode [1 0]", L_COPY); - else /* bps > 1 */ - sarrayAddString(sa, " /Decode [0 1]", L_COPY); - } else { /* spp == 3 */ - sarrayAddString(sa, " /Decode [0 1 0 1 0 1]", L_COPY); - } - - sarrayAddString(sa, " /DataSource Data", L_COPY); - sarrayAddString(sa, " >> image", L_COPY); - sarrayAddString(sa, " Data closefile", L_COPY); - sarrayAddString(sa, " RawData flushfile", L_COPY); - if (endpage == TRUE) - sarrayAddString(sa, " showpage", L_COPY); - sarrayAddString(sa, " restore", L_COPY); - sarrayAddString(sa, "} exec", L_COPY); - - /* Insert the ascii85 gzipped data; this is now owned by sa */ - sarrayAddString(sa, cid->data85, L_INSERT); - - /* Generate and return the output string */ - outstr = sarrayToString(sa, 1); - sarrayDestroy(&sa); - cid->cmapdata85 = NULL; /* it has been transferred to sa and destroyed */ - cid->data85 = NULL; /* it has been transferred to sa and destroyed */ - return outstr; -} - - -/*---------------------------------------------------------------------* - * Write to memory * - *---------------------------------------------------------------------*/ -/*! - * \brief pixWriteMemPS() - * - * \param[out] pdata data of tiff compressed image - * \param[out] psize size of returned data - * \param[in] pix - * \param[in] box [optional] - * \param[in] res can use 0 for default of 300 ppi - * \param[in] scale to prevent scaling, use either 1.0 or 0.0 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See pixWriteStringPS() for usage.
- *      (2) This is just a wrapper for pixWriteStringPS(), which
- *          writes uncompressed image data to memory.
- * 
- */ -l_ok -pixWriteMemPS(l_uint8 **pdata, - size_t *psize, - PIX *pix, - BOX *box, - l_int32 res, - l_float32 scale) -{ - PROCNAME("pixWriteMemPS"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1 ); - if (!psize) - return ERROR_INT("&size not defined", procName, 1 ); - if (!pix) - return ERROR_INT("&pix not defined", procName, 1 ); - - *pdata = (l_uint8 *)pixWriteStringPS(pix, box, res, scale); - *psize = strlen((char *)(*pdata)); - return 0; -} - - -/*-------------------------------------------------------------* - * Converting resolution * - *-------------------------------------------------------------*/ -/*! - * \brief getResLetterPage() - * - * \param[in] w image width, pixels - * \param[in] h image height, pixels - * \param[in] fillfract fraction in linear dimension of full page, - * not to be exceeded; use 0 for default - * \return resolution - */ -l_int32 -getResLetterPage(l_int32 w, - l_int32 h, - l_float32 fillfract) -{ -l_int32 resw, resh, res; - - if (fillfract == 0.0) - fillfract = DefaultFillFraction; - resw = (l_int32)((w * 72.) / (LetterWidth * fillfract)); - resh = (l_int32)((h * 72.) / (LetterHeight * fillfract)); - res = L_MAX(resw, resh); - return res; -} - - -/*! - * \brief getResA4Page() - * - * \param[in] w image width, pixels - * \param[in] h image height, pixels - * \param[in] fillfract fraction in linear dimension of full page, - * not to be exceeded; use 0 for default - * \return resolution - */ -l_int32 -getResA4Page(l_int32 w, - l_int32 h, - l_float32 fillfract) -{ -l_int32 resw, resh, res; - - if (fillfract == 0.0) - fillfract = DefaultFillFraction; - resw = (l_int32)((w * 72.) / (A4Width * fillfract)); - resh = (l_int32)((h * 72.) / (A4Height * fillfract)); - res = L_MAX(resw, resh); - return res; -} - - -/*-------------------------------------------------------------* - * Setting flag for writing bounding box hint * - *-------------------------------------------------------------*/ -void -l_psWriteBoundingBox(l_int32 flag) -{ - var_PS_WRITE_BOUNDING_BOX = flag; -} - - -/* --------------------------------------------*/ -#endif /* USE_PSIO */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/psio2stub.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/psio2stub.c deleted file mode 100644 index 8c96777e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/psio2stub.c +++ /dev/null @@ -1,160 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file psio2stub.c - *
- *
- *     Stubs for psio2.c functions
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* --------------------------------------------*/ -#if !USE_PSIO /* defined in environ.h */ -/* --------------------------------------------*/ - -l_ok pixWritePSEmbed(const char *filein, const char *fileout) -{ - return ERROR_INT("function not present", "pixWritePSEmbed", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteStreamPS(FILE *fp, PIX *pix, BOX *box, l_int32 res, - l_float32 scale) -{ - return ERROR_INT("function not present", "pixWriteStreamPS", 1); -} - -/* ----------------------------------------------------------------------*/ - -char * pixWriteStringPS(PIX *pixs, BOX *box, l_int32 res, l_float32 scale) -{ - return (char *)ERROR_PTR("function not present", "pixWriteStringPS", NULL); -} - -/* ----------------------------------------------------------------------*/ - -char * generateUncompressedPS(char *hexdata, l_int32 w, l_int32 h, l_int32 d, - l_int32 psbpl, l_int32 bps, l_float32 xpt, - l_float32 ypt, l_float32 wpt, l_float32 hpt, - l_int32 boxflag) -{ - return (char *)ERROR_PTR("function not present", - "generateUncompressedPS", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertJpegToPSEmbed(const char *filein, const char *fileout) -{ - return ERROR_INT("function not present", "convertJpegToPSEmbed", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertJpegToPS(const char *filein, const char *fileout, - const char *operation, l_int32 x, l_int32 y, - l_int32 res, l_float32 scale, l_int32 pageno, - l_int32 endpage) -{ - return ERROR_INT("function not present", "convertJpegToPS", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertG4ToPSEmbed(const char *filein, const char *fileout) -{ - return ERROR_INT("function not present", "convertG4ToPSEmbed", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertG4ToPS(const char *filein, const char *fileout, - const char *operation, l_int32 x, l_int32 y, - l_int32 res, l_float32 scale, l_int32 pageno, - l_int32 maskflag, l_int32 endpage) -{ - return ERROR_INT("function not present", "convertG4ToPS", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertTiffMultipageToPS(const char *filein, const char *fileout, - l_float32 fillfract) -{ - return ERROR_INT("function not present", "convertTiffMultipageToPS", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertFlateToPSEmbed(const char *filein, const char *fileout) -{ - return ERROR_INT("function not present", "convertFlateToPSEmbed", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok convertFlateToPS(const char *filein, const char *fileout, - const char *operation, l_int32 x, l_int32 y, - l_int32 res, l_float32 scale, l_int32 pageno, - l_int32 endpage) -{ - return ERROR_INT("function not present", "convertFlateToPS", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteMemPS(l_uint8 **pdata, size_t *psize, PIX *pix, BOX *box, - l_int32 res, l_float32 scale) -{ - return ERROR_INT("function not present", "pixWriteMemPS", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_int32 getResLetterPage(l_int32 w, l_int32 h, l_float32 fillfract) -{ - return ERROR_INT("function not present", "getResLetterPage", 1); -} - -/* ----------------------------------------------------------------------*/ - -void l_psWriteBoundingBox(l_int32 flag) -{ - L_ERROR("function not present\n", "l_psWriteBoundingBox"); - return; -} - -/* --------------------------------------------*/ -#endif /* !USE_PSIO */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ptabasic.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ptabasic.c deleted file mode 100644 index 62bad820..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ptabasic.c +++ /dev/null @@ -1,1553 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file ptabasic.c - *
- *
- *      Pta creation, destruction, copy, clone, empty
- *           PTA            *ptaCreate()
- *           PTA            *ptaCreateFromNuma()
- *           void            ptaDestroy()
- *           PTA            *ptaCopy()
- *           PTA            *ptaCopyRange()
- *           PTA            *ptaClone()
- *           l_int32         ptaEmpty()
- *
- *      Pta array extension
- *           l_int32         ptaAddPt()
- *           static l_int32  ptaExtendArrays()
- *
- *      Pta insertion and removal
- *           l_int32         ptaInsertPt()
- *           l_int32         ptaRemovePt()
- *
- *      Pta accessors
- *           l_int32         ptaGetRefcount()
- *           l_int32         ptaChangeRefcount()
- *           l_int32         ptaGetCount()
- *           l_int32         ptaGetPt()
- *           l_int32         ptaGetIPt()
- *           l_int32         ptaSetPt()
- *           l_int32         ptaGetArrays()
- *
- *      Pta serialized for I/O
- *           PTA            *ptaRead()
- *           PTA            *ptaReadStream()
- *           PTA            *ptaReadMem()
- *           l_int32         ptaWriteDebug()
- *           l_int32         ptaWrite()
- *           l_int32         ptaWriteStream()
- *           l_int32         ptaWriteMem()
- *
- *      Ptaa creation, destruction
- *           PTAA           *ptaaCreate()
- *           void            ptaaDestroy()
- *
- *      Ptaa array extension
- *           l_int32         ptaaAddPta()
- *           static l_int32  ptaaExtendArray()
- *
- *      Ptaa accessors
- *           l_int32         ptaaGetCount()
- *           l_int32         ptaaGetPta()
- *           l_int32         ptaaGetPt()
- *
- *      Ptaa array modifiers
- *           l_int32         ptaaInitFull()
- *           l_int32         ptaaReplacePta()
- *           l_int32         ptaaAddPt()
- *           l_int32         ptaaTruncate()
- *
- *      Ptaa serialized for I/O
- *           PTAA           *ptaaRead()
- *           PTAA           *ptaaReadStream()
- *           PTAA           *ptaaReadMem()
- *           l_int32         ptaaWrite()
- *           l_int32         ptaaWriteStream()
- *           l_int32         ptaaWriteMem()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static const l_uint32 MaxPtrArraySize = 10000000; -static const l_int32 InitialPtrArraySize = 50; /*!< n'importe quoi */ - - /* Static functions */ -static l_int32 ptaExtendArrays(PTA *pta); -static l_int32 ptaaExtendArray(PTAA *ptaa); - -/*---------------------------------------------------------------------* - * Pta creation, destruction, copy, clone * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaCreate() - * - * \param[in] n initial array sizes - * \return pta, or NULL on error. - */ -PTA * -ptaCreate(l_int32 n) -{ -PTA *pta; - - PROCNAME("ptaCreate"); - - if (n <= 0 || n > MaxPtrArraySize) - n = InitialPtrArraySize; - - pta = (PTA *)LEPT_CALLOC(1, sizeof(PTA)); - pta->n = 0; - pta->nalloc = n; - ptaChangeRefcount(pta, 1); /* sets to 1 */ - pta->x = (l_float32 *)LEPT_CALLOC(n, sizeof(l_float32)); - pta->y = (l_float32 *)LEPT_CALLOC(n, sizeof(l_float32)); - if (!pta->x || !pta->y) { - ptaDestroy(&pta); - return (PTA *)ERROR_PTR("x and y arrays not both made", procName, NULL); - } - - return pta; -} - - -/*! - * \brief ptaCreateFromNuma() - * - * \param[in] nax [optional] can be null - * \param[in] nay - * \return pta, or NULL on error. - */ -PTA * -ptaCreateFromNuma(NUMA *nax, - NUMA *nay) -{ -l_int32 i, n; -l_float32 startx, delx, xval, yval; -PTA *pta; - - PROCNAME("ptaCreateFromNuma"); - - if (!nay) - return (PTA *)ERROR_PTR("nay not defined", procName, NULL); - n = numaGetCount(nay); - if (nax && numaGetCount(nax) != n) - return (PTA *)ERROR_PTR("nax and nay sizes differ", procName, NULL); - - pta = ptaCreate(n); - numaGetParameters(nay, &startx, &delx); - for (i = 0; i < n; i++) { - if (nax) - numaGetFValue(nax, i, &xval); - else /* use implicit x values from nay */ - xval = startx + i * delx; - numaGetFValue(nay, i, &yval); - ptaAddPt(pta, xval, yval); - } - - return pta; -} - - -/*! - * \brief ptaDestroy() - * - * \param[in,out] ppta will be set to null before returning - * \return void - * - *
- * Notes:
- *      (1) Decrements the ref count and, if 0, destroys the pta.
- *      (2) Always nulls the input ptr.
- * 
- */ -void -ptaDestroy(PTA **ppta) -{ -PTA *pta; - - PROCNAME("ptaDestroy"); - - if (ppta == NULL) { - L_WARNING("ptr address is NULL!\n", procName); - return; - } - - if ((pta = *ppta) == NULL) - return; - - ptaChangeRefcount(pta, -1); - if (ptaGetRefcount(pta) <= 0) { - LEPT_FREE(pta->x); - LEPT_FREE(pta->y); - LEPT_FREE(pta); - } - - *ppta = NULL; - return; -} - - -/*! - * \brief ptaCopy() - * - * \param[in] pta - * \return copy of pta, or NULL on error - */ -PTA * -ptaCopy(PTA *pta) -{ -l_int32 i; -l_float32 x, y; -PTA *npta; - - PROCNAME("ptaCopy"); - - if (!pta) - return (PTA *)ERROR_PTR("pta not defined", procName, NULL); - - if ((npta = ptaCreate(pta->nalloc)) == NULL) - return (PTA *)ERROR_PTR("npta not made", procName, NULL); - - for (i = 0; i < pta->n; i++) { - ptaGetPt(pta, i, &x, &y); - ptaAddPt(npta, x, y); - } - - return npta; -} - - -/*! - * \brief ptaCopyRange() - * - * \param[in] ptas - * \param[in] istart starting index in ptas - * \param[in] iend ending index in ptas; use 0 to copy to end - * \return 0 if OK, 1 on error - */ -PTA * -ptaCopyRange(PTA *ptas, - l_int32 istart, - l_int32 iend) -{ -l_int32 n, i, x, y; -PTA *ptad; - - PROCNAME("ptaCopyRange"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - n = ptaGetCount(ptas); - if (istart < 0) - istart = 0; - if (istart >= n) - return (PTA *)ERROR_PTR("istart out of bounds", procName, NULL); - if (iend <= 0 || iend >= n) - iend = n - 1; - if (istart > iend) - return (PTA *)ERROR_PTR("istart > iend; no pts", procName, NULL); - - if ((ptad = ptaCreate(iend - istart + 1)) == NULL) - return (PTA *)ERROR_PTR("ptad not made", procName, NULL); - for (i = istart; i <= iend; i++) { - ptaGetIPt(ptas, i, &x, &y); - ptaAddPt(ptad, x, y); - } - - return ptad; -} - - -/*! - * \brief ptaClone() - * - * \param[in] pta - * \return ptr to same pta, or NULL on error - */ -PTA * -ptaClone(PTA *pta) -{ - PROCNAME("ptaClone"); - - if (!pta) - return (PTA *)ERROR_PTR("pta not defined", procName, NULL); - - ptaChangeRefcount(pta, 1); - return pta; -} - - -/*! - * \brief ptaEmpty() - * - * \param[in] pta - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      This only resets the Pta::n field, for reuse
- * 
- */ -l_ok -ptaEmpty(PTA *pta) -{ - PROCNAME("ptaEmpty"); - - if (!pta) - return ERROR_INT("ptad not defined", procName, 1); - pta->n = 0; - return 0; -} - - -/*---------------------------------------------------------------------* - * Pta array extension * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaAddPt() - * - * \param[in] pta - * \param[in] x, y - * \return 0 if OK, 1 on error - */ -l_ok -ptaAddPt(PTA *pta, - l_float32 x, - l_float32 y) -{ -l_int32 n; - - PROCNAME("ptaAddPt"); - - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - - n = pta->n; - if (n >= pta->nalloc) - ptaExtendArrays(pta); - pta->x[n] = x; - pta->y[n] = y; - pta->n++; - - return 0; -} - - -/*! - * \brief ptaExtendArrays() - * - * \param[in] pta - * \return 0 if OK; 1 on error - */ -static l_int32 -ptaExtendArrays(PTA *pta) -{ - PROCNAME("ptaExtendArrays"); - - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - - if ((pta->x = (l_float32 *)reallocNew((void **)&pta->x, - sizeof(l_float32) * pta->nalloc, - 2 * sizeof(l_float32) * pta->nalloc)) == NULL) - return ERROR_INT("new x array not returned", procName, 1); - if ((pta->y = (l_float32 *)reallocNew((void **)&pta->y, - sizeof(l_float32) * pta->nalloc, - 2 * sizeof(l_float32) * pta->nalloc)) == NULL) - return ERROR_INT("new y array not returned", procName, 1); - - pta->nalloc = 2 * pta->nalloc; - return 0; -} - - -/*---------------------------------------------------------------------* - * Pta insertion and removal * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaInsertPt() - * - * \param[in] pta - * \param[in] index at which pt is to be inserted - * \param[in] x, y point values - * \return 0 if OK; 1 on error - */ -l_ok -ptaInsertPt(PTA *pta, - l_int32 index, - l_int32 x, - l_int32 y) -{ -l_int32 i, n; - - PROCNAME("ptaInsertPt"); - - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - n = ptaGetCount(pta); - if (index < 0 || index > n) - return ERROR_INT("index not in {0...n}", procName, 1); - - if (n > pta->nalloc) - ptaExtendArrays(pta); - pta->n++; - for (i = n; i > index; i--) { - pta->x[i] = pta->x[i - 1]; - pta->y[i] = pta->y[i - 1]; - } - pta->x[index] = x; - pta->y[index] = y; - return 0; -} - - -/*! - * \brief ptaRemovePt() - * - * \param[in] pta - * \param[in] index of point to be removed - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This shifts pta[i] --> pta[i - 1] for all i > index.
- *      (2) It should not be used repeatedly on large arrays,
- *          because the function is O(n).
- * 
- */ -l_ok -ptaRemovePt(PTA *pta, - l_int32 index) -{ -l_int32 i, n; - - PROCNAME("ptaRemovePt"); - - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - n = ptaGetCount(pta); - if (index < 0 || index >= n) - return ERROR_INT("index not in {0...n - 1}", procName, 1); - - /* Remove the point */ - for (i = index + 1; i < n; i++) { - pta->x[i - 1] = pta->x[i]; - pta->y[i - 1] = pta->y[i]; - } - pta->n--; - return 0; -} - - -/*---------------------------------------------------------------------* - * Pta accessors * - *---------------------------------------------------------------------*/ -l_int32 -ptaGetRefcount(PTA *pta) -{ - PROCNAME("ptaGetRefcount"); - - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - return pta->refcount; -} - - -l_int32 -ptaChangeRefcount(PTA *pta, - l_int32 delta) -{ - PROCNAME("ptaChangeRefcount"); - - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - pta->refcount += delta; - return 0; -} - - -/*! - * \brief ptaGetCount() - * - * \param[in] pta - * \return count, or 0 if no pta - */ -l_int32 -ptaGetCount(PTA *pta) -{ - PROCNAME("ptaGetCount"); - - if (!pta) - return ERROR_INT("pta not defined", procName, 0); - - return pta->n; -} - - -/*! - * \brief ptaGetPt() - * - * \param[in] pta - * \param[in] index into arrays - * \param[out] px [optional] float x value - * \param[out] py [optional] float y value - * \return 0 if OK; 1 on error - */ -l_ok -ptaGetPt(PTA *pta, - l_int32 index, - l_float32 *px, - l_float32 *py) -{ - PROCNAME("ptaGetPt"); - - if (px) *px = 0; - if (py) *py = 0; - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if (index < 0 || index >= pta->n) - return ERROR_INT("invalid index", procName, 1); - - if (px) *px = pta->x[index]; - if (py) *py = pta->y[index]; - return 0; -} - - -/*! - * \brief ptaGetIPt() - * - * \param[in] pta - * \param[in] index into arrays - * \param[out] px [optional] integer x value - * \param[out] py [optional] integer y value - * \return 0 if OK; 1 on error - */ -l_ok -ptaGetIPt(PTA *pta, - l_int32 index, - l_int32 *px, - l_int32 *py) -{ - PROCNAME("ptaGetIPt"); - - if (px) *px = 0; - if (py) *py = 0; - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if (index < 0 || index >= pta->n) - return ERROR_INT("invalid index", procName, 1); - - if (px) *px = (l_int32)(pta->x[index] + 0.5); - if (py) *py = (l_int32)(pta->y[index] + 0.5); - return 0; -} - - -/*! - * \brief ptaSetPt() - * - * \param[in] pta - * \param[in] index into arrays - * \param[in] x, y - * \return 0 if OK; 1 on error - */ -l_ok -ptaSetPt(PTA *pta, - l_int32 index, - l_float32 x, - l_float32 y) -{ - PROCNAME("ptaSetPt"); - - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if (index < 0 || index >= pta->n) - return ERROR_INT("invalid index", procName, 1); - - pta->x[index] = x; - pta->y[index] = y; - return 0; -} - - -/*! - * \brief ptaGetArrays() - * - * \param[in] pta - * \param[out] pnax [optional] numa of x array - * \param[out] pnay [optional] numa of y array - * \return 0 if OK; 1 on error or if pta is empty - * - *
- * Notes:
- *      (1) This copies the internal arrays into new Numas.
- * 
- */ -l_ok -ptaGetArrays(PTA *pta, - NUMA **pnax, - NUMA **pnay) -{ -l_int32 i, n; -NUMA *nax, *nay; - - PROCNAME("ptaGetArrays"); - - if (!pnax && !pnay) - return ERROR_INT("no output requested", procName, 1); - if (pnax) *pnax = NULL; - if (pnay) *pnay = NULL; - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if ((n = ptaGetCount(pta)) == 0) - return ERROR_INT("pta is empty", procName, 1); - - if (pnax) { - if ((nax = numaCreate(n)) == NULL) - return ERROR_INT("nax not made", procName, 1); - *pnax = nax; - for (i = 0; i < n; i++) - nax->array[i] = pta->x[i]; - nax->n = n; - } - if (pnay) { - if ((nay = numaCreate(n)) == NULL) - return ERROR_INT("nay not made", procName, 1); - *pnay = nay; - for (i = 0; i < n; i++) - nay->array[i] = pta->y[i]; - nay->n = n; - } - return 0; -} - - -/*---------------------------------------------------------------------* - * Pta serialized for I/O * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaRead() - * - * \param[in] filename - * \return pta, or NULL on error - */ -PTA * -ptaRead(const char *filename) -{ -FILE *fp; -PTA *pta; - - PROCNAME("ptaRead"); - - if (!filename) - return (PTA *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (PTA *)ERROR_PTR("stream not opened", procName, NULL); - pta = ptaReadStream(fp); - fclose(fp); - if (!pta) - return (PTA *)ERROR_PTR("pta not read", procName, NULL); - return pta; -} - - -/*! - * \brief ptaReadStream() - * - * \param[in] fp file stream - * \return pta, or NULL on error - */ -PTA * -ptaReadStream(FILE *fp) -{ -char typestr[128]; /* hardcoded below in fscanf */ -l_int32 i, n, ix, iy, type, version; -l_float32 x, y; -PTA *pta; - - PROCNAME("ptaReadStream"); - - if (!fp) - return (PTA *)ERROR_PTR("stream not defined", procName, NULL); - - if (fscanf(fp, "\n Pta Version %d\n", &version) != 1) - return (PTA *)ERROR_PTR("not a pta file", procName, NULL); - if (version != PTA_VERSION_NUMBER) - return (PTA *)ERROR_PTR("invalid pta version", procName, NULL); - if (fscanf(fp, " Number of pts = %d; format = %127s\n", &n, typestr) != 2) - return (PTA *)ERROR_PTR("not a pta file", procName, NULL); - if (!strcmp(typestr, "float")) - type = 0; - else /* typestr is "integer" */ - type = 1; - - if ((pta = ptaCreate(n)) == NULL) - return (PTA *)ERROR_PTR("pta not made", procName, NULL); - for (i = 0; i < n; i++) { - if (type == 0) { /* data is float */ - if (fscanf(fp, " (%f, %f)\n", &x, &y) != 2) { - ptaDestroy(&pta); - return (PTA *)ERROR_PTR("error reading floats", procName, NULL); - } - ptaAddPt(pta, x, y); - } else { /* data is integer */ - if (fscanf(fp, " (%d, %d)\n", &ix, &iy) != 2) { - ptaDestroy(&pta); - return (PTA *)ERROR_PTR("error reading ints", procName, NULL); - } - ptaAddPt(pta, ix, iy); - } - } - - return pta; -} - - -/*! - * \brief ptaReadMem() - * - * \param[in] data serialization in ascii - * \param[in] size of data in bytes; can use strlen to get it - * \return pta, or NULL on error - */ -PTA * -ptaReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -PTA *pta; - - PROCNAME("ptaReadMem"); - - if (!data) - return (PTA *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (PTA *)ERROR_PTR("stream not opened", procName, NULL); - - pta = ptaReadStream(fp); - fclose(fp); - if (!pta) L_ERROR("pta not read\n", procName); - return pta; -} - - -/*! - * \brief ptaWriteDebug() - * - * \param[in] filename - * \param[in] pta - * \param[in] type 0 for float values; 1 for integer values - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Debug version, intended for use in the library when writing
- *          to files in a temp directory with names that are compiled in.
- *          This is used instead of ptaWrite() for all such library calls.
- *      (2) The global variable LeptDebugOK defaults to 0, and can be set
- *          or cleared by the function setLeptDebugOK().
- * 
- */ -l_ok -ptaWriteDebug(const char *filename, - PTA *pta, - l_int32 type) -{ - PROCNAME("ptaWriteDebug"); - - if (LeptDebugOK) { - return ptaWrite(filename, pta, type); - } else { - L_INFO("write to named temp file %s is disabled\n", procName, filename); - return 0; - } -} - - -/*! - * \brief ptaWrite() - * - * \param[in] filename - * \param[in] pta - * \param[in] type 0 for float values; 1 for integer values - * \return 0 if OK, 1 on error - */ -l_ok -ptaWrite(const char *filename, - PTA *pta, - l_int32 type) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("ptaWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "w")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = ptaWriteStream(fp, pta, type); - fclose(fp); - if (ret) - return ERROR_INT("pta not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief ptaWriteStream() - * - * \param[in] fp file stream - * \param[in] pta - * \param[in] type 0 for float values; 1 for integer values - * \return 0 if OK; 1 on error - */ -l_ok -ptaWriteStream(FILE *fp, - PTA *pta, - l_int32 type) -{ -l_int32 i, n, ix, iy; -l_float32 x, y; - - PROCNAME("ptaWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - - n = ptaGetCount(pta); - fprintf(fp, "\n Pta Version %d\n", PTA_VERSION_NUMBER); - if (type == 0) - fprintf(fp, " Number of pts = %d; format = float\n", n); - else /* type == 1 */ - fprintf(fp, " Number of pts = %d; format = integer\n", n); - for (i = 0; i < n; i++) { - if (type == 0) { /* data is float */ - ptaGetPt(pta, i, &x, &y); - fprintf(fp, " (%f, %f)\n", x, y); - } else { /* data is integer */ - ptaGetIPt(pta, i, &ix, &iy); - fprintf(fp, " (%d, %d)\n", ix, iy); - } - } - - return 0; -} - - -/*! - * \brief ptaWriteMem() - * - * \param[out] pdata data of serialized pta; ascii - * \param[out] psize size of returned data - * \param[in] pta - * \param[in] type 0 for float values; 1 for integer values - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes a pta in memory and puts the result in a buffer.
- * 
- */ -l_ok -ptaWriteMem(l_uint8 **pdata, - size_t *psize, - PTA *pta, - l_int32 type) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("ptaWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = ptaWriteStream(fp, pta, type); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = ptaWriteStream(fp, pta, type); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - -/*---------------------------------------------------------------------* - * PTAA creation, destruction * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaaCreate() - * - * \param[in] n initial number of ptrs - * \return ptaa, or NULL on error - */ -PTAA * -ptaaCreate(l_int32 n) -{ -PTAA *ptaa; - - PROCNAME("ptaaCreate"); - - if (n <= 0 || n > MaxPtrArraySize) - n = InitialPtrArraySize; - - ptaa = (PTAA *)LEPT_CALLOC(1, sizeof(PTAA)); - ptaa->n = 0; - ptaa->nalloc = n; - if ((ptaa->pta = (PTA **)LEPT_CALLOC(n, sizeof(PTA *))) == NULL) { - ptaaDestroy(&ptaa); - return (PTAA *)ERROR_PTR("pta ptrs not made", procName, NULL); - } - return ptaa; -} - - -/*! - * \brief ptaaDestroy() - * - * \param[in,out] pptaa will be set to null before returning - * \return void - */ -void -ptaaDestroy(PTAA **pptaa) -{ -l_int32 i; -PTAA *ptaa; - - PROCNAME("ptaaDestroy"); - - if (pptaa == NULL) { - L_WARNING("ptr address is NULL!\n", procName); - return; - } - - if ((ptaa = *pptaa) == NULL) - return; - - for (i = 0; i < ptaa->n; i++) - ptaDestroy(&ptaa->pta[i]); - LEPT_FREE(ptaa->pta); - - LEPT_FREE(ptaa); - *pptaa = NULL; - return; -} - - -/*---------------------------------------------------------------------* - * PTAA array extension * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaaAddPta() - * - * \param[in] ptaa - * \param[in] pta to be added - * \param[in] copyflag L_INSERT, L_COPY, L_CLONE - * \return 0 if OK, 1 on error - */ -l_ok -ptaaAddPta(PTAA *ptaa, - PTA *pta, - l_int32 copyflag) -{ -l_int32 n; -PTA *ptac; - - PROCNAME("ptaaAddPta"); - - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - - if (copyflag == L_INSERT) { - ptac = pta; - } else if (copyflag == L_COPY) { - if ((ptac = ptaCopy(pta)) == NULL) - return ERROR_INT("ptac not made", procName, 1); - } else if (copyflag == L_CLONE) { - if ((ptac = ptaClone(pta)) == NULL) - return ERROR_INT("pta clone not made", procName, 1); - } else { - return ERROR_INT("invalid copyflag", procName, 1); - } - - n = ptaaGetCount(ptaa); - if (n >= ptaa->nalloc) - ptaaExtendArray(ptaa); - ptaa->pta[n] = ptac; - ptaa->n++; - - return 0; -} - - -/*! - * \brief ptaaExtendArray() - * - * \param[in] ptaa - * \return 0 if OK, 1 on error - */ -static l_int32 -ptaaExtendArray(PTAA *ptaa) -{ - PROCNAME("ptaaExtendArray"); - - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 1); - - if ((ptaa->pta = (PTA **)reallocNew((void **)&ptaa->pta, - sizeof(PTA *) * ptaa->nalloc, - 2 * sizeof(PTA *) * ptaa->nalloc)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - - ptaa->nalloc = 2 * ptaa->nalloc; - return 0; -} - - -/*---------------------------------------------------------------------* - * Ptaa accessors * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaaGetCount() - * - * \param[in] ptaa - * \return count, or 0 if no ptaa - */ -l_int32 -ptaaGetCount(PTAA *ptaa) -{ - PROCNAME("ptaaGetCount"); - - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 0); - - return ptaa->n; -} - - -/*! - * \brief ptaaGetPta() - * - * \param[in] ptaa - * \param[in] index to the i-th pta - * \param[in] accessflag L_COPY or L_CLONE - * \return pta, or NULL on error - */ -PTA * -ptaaGetPta(PTAA *ptaa, - l_int32 index, - l_int32 accessflag) -{ - PROCNAME("ptaaGetPta"); - - if (!ptaa) - return (PTA *)ERROR_PTR("ptaa not defined", procName, NULL); - if (index < 0 || index >= ptaa->n) - return (PTA *)ERROR_PTR("index not valid", procName, NULL); - - if (accessflag == L_COPY) - return ptaCopy(ptaa->pta[index]); - else if (accessflag == L_CLONE) - return ptaClone(ptaa->pta[index]); - else - return (PTA *)ERROR_PTR("invalid accessflag", procName, NULL); -} - - -/*! - * \brief ptaaGetPt() - * - * \param[in] ptaa - * \param[in] ipta to the i-th pta - * \param[in] jpt index to the j-th pt in the pta - * \param[out] px [optional] float x value - * \param[out] py [optional] float y value - * \return 0 if OK; 1 on error - */ -l_ok -ptaaGetPt(PTAA *ptaa, - l_int32 ipta, - l_int32 jpt, - l_float32 *px, - l_float32 *py) -{ -PTA *pta; - - PROCNAME("ptaaGetPt"); - - if (px) *px = 0; - if (py) *py = 0; - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 1); - if (ipta < 0 || ipta >= ptaa->n) - return ERROR_INT("index ipta not valid", procName, 1); - - pta = ptaaGetPta(ptaa, ipta, L_CLONE); - if (jpt < 0 || jpt >= pta->n) { - ptaDestroy(&pta); - return ERROR_INT("index jpt not valid", procName, 1); - } - - ptaGetPt(pta, jpt, px, py); - ptaDestroy(&pta); - return 0; -} - - -/*---------------------------------------------------------------------* - * Ptaa array modifiers * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaaInitFull() - * - * \param[in] ptaa can have non-null ptrs in the ptr array - * \param[in] pta to be replicated into the entire ptr array - * \return 0 if OK; 1 on error - */ -l_ok -ptaaInitFull(PTAA *ptaa, - PTA *pta) -{ -l_int32 n, i; -PTA *ptat; - - PROCNAME("ptaaInitFull"); - - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - - n = ptaa->nalloc; - ptaa->n = n; - for (i = 0; i < n; i++) { - ptat = ptaCopy(pta); - ptaaReplacePta(ptaa, i, ptat); - } - return 0; -} - - -/*! - * \brief ptaaReplacePta() - * - * \param[in] ptaa - * \param[in] index to the index-th pta - * \param[in] pta insert and replace any existing one - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Any existing pta is destroyed, and the input one
- *          is inserted in its place.
- *      (2) If %index is invalid, return 1 (error)
- * 
- */ -l_ok -ptaaReplacePta(PTAA *ptaa, - l_int32 index, - PTA *pta) -{ -l_int32 n; - - PROCNAME("ptaaReplacePta"); - - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - n = ptaaGetCount(ptaa); - if (index < 0 || index >= n) - return ERROR_INT("index not valid", procName, 1); - - ptaDestroy(&ptaa->pta[index]); - ptaa->pta[index] = pta; - return 0; -} - - -/*! - * \brief ptaaAddPt() - * - * \param[in] ptaa - * \param[in] ipta to the i-th pta - * \param[in] x,y point coordinates - * \return 0 if OK; 1 on error - */ -l_ok -ptaaAddPt(PTAA *ptaa, - l_int32 ipta, - l_float32 x, - l_float32 y) -{ -PTA *pta; - - PROCNAME("ptaaAddPt"); - - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 1); - if (ipta < 0 || ipta >= ptaa->n) - return ERROR_INT("index ipta not valid", procName, 1); - - pta = ptaaGetPta(ptaa, ipta, L_CLONE); - ptaAddPt(pta, x, y); - ptaDestroy(&pta); - return 0; -} - - -/*! - * \brief ptaaTruncate() - * - * \param[in] ptaa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This identifies the largest index containing a pta that
- *          has any points within it, destroys all pta above that index,
- *          and resets the count.
- * 
- */ -l_ok -ptaaTruncate(PTAA *ptaa) -{ -l_int32 i, n, np; -PTA *pta; - - PROCNAME("ptaaTruncate"); - - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 1); - - n = ptaaGetCount(ptaa); - for (i = n - 1; i >= 0; i--) { - pta = ptaaGetPta(ptaa, i, L_CLONE); - if (!pta) { - ptaa->n--; - continue; - } - np = ptaGetCount(pta); - ptaDestroy(&pta); - if (np == 0) { - ptaDestroy(&ptaa->pta[i]); - ptaa->n--; - } else { - break; - } - } - return 0; -} - - -/*---------------------------------------------------------------------* - * Ptaa serialized for I/O * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaaRead() - * - * \param[in] filename - * \return ptaa, or NULL on error - */ -PTAA * -ptaaRead(const char *filename) -{ -FILE *fp; -PTAA *ptaa; - - PROCNAME("ptaaRead"); - - if (!filename) - return (PTAA *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (PTAA *)ERROR_PTR("stream not opened", procName, NULL); - ptaa = ptaaReadStream(fp); - fclose(fp); - if (!ptaa) - return (PTAA *)ERROR_PTR("ptaa not read", procName, NULL); - return ptaa; -} - - -/*! - * \brief ptaaReadStream() - * - * \param[in] fp file stream - * \return ptaa, or NULL on error - */ -PTAA * -ptaaReadStream(FILE *fp) -{ -l_int32 i, n, version; -PTA *pta; -PTAA *ptaa; - - PROCNAME("ptaaReadStream"); - - if (!fp) - return (PTAA *)ERROR_PTR("stream not defined", procName, NULL); - - if (fscanf(fp, "\nPtaa Version %d\n", &version) != 1) - return (PTAA *)ERROR_PTR("not a ptaa file", procName, NULL); - if (version != PTA_VERSION_NUMBER) - return (PTAA *)ERROR_PTR("invalid ptaa version", procName, NULL); - if (fscanf(fp, "Number of Pta = %d\n", &n) != 1) - return (PTAA *)ERROR_PTR("not a ptaa file", procName, NULL); - - if ((ptaa = ptaaCreate(n)) == NULL) - return (PTAA *)ERROR_PTR("ptaa not made", procName, NULL); - for (i = 0; i < n; i++) { - if ((pta = ptaReadStream(fp)) == NULL) { - ptaaDestroy(&ptaa); - return (PTAA *)ERROR_PTR("error reading pta", procName, NULL); - } - ptaaAddPta(ptaa, pta, L_INSERT); - } - - return ptaa; -} - - -/*! - * \brief ptaaReadMem() - * - * \param[in] data serialization in ascii - * \param[in] size of data in bytes; can use strlen to get it - * \return ptaa, or NULL on error - */ -PTAA * -ptaaReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -PTAA *ptaa; - - PROCNAME("ptaaReadMem"); - - if (!data) - return (PTAA *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (PTAA *)ERROR_PTR("stream not opened", procName, NULL); - - ptaa = ptaaReadStream(fp); - fclose(fp); - if (!ptaa) L_ERROR("ptaa not read\n", procName); - return ptaa; -} - - -/*! - * \brief ptaaWriteDebug() - * - * \param[in] filename - * \param[in] ptaa - * \param[in] type 0 for float values; 1 for integer values - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Debug version, intended for use in the library when writing
- *          to files in a temp directory with names that are compiled in.
- *          This is used instead of ptaaWrite() for all such library calls.
- *      (2) The global variable LeptDebugOK defaults to 0, and can be set
- *          or cleared by the function setLeptDebugOK().
- * 
- */ -l_ok -ptaaWriteDebug(const char *filename, - PTAA *ptaa, - l_int32 type) -{ - PROCNAME("ptaaWriteDebug"); - - if (LeptDebugOK) { - return ptaaWrite(filename, ptaa, type); - } else { - L_INFO("write to named temp file %s is disabled\n", procName, filename); - return 0; - } -} - - -/*! - * \brief ptaaWrite() - * - * \param[in] filename - * \param[in] ptaa - * \param[in] type 0 for float values; 1 for integer values - * \return 0 if OK, 1 on error - */ -l_ok -ptaaWrite(const char *filename, - PTAA *ptaa, - l_int32 type) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("ptaaWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "w")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = ptaaWriteStream(fp, ptaa, type); - fclose(fp); - if (ret) - return ERROR_INT("ptaa not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief ptaaWriteStream() - * - * \param[in] fp file stream - * \param[in] ptaa - * \param[in] type 0 for float values; 1 for integer values - * \return 0 if OK; 1 on error - */ -l_ok -ptaaWriteStream(FILE *fp, - PTAA *ptaa, - l_int32 type) -{ -l_int32 i, n; -PTA *pta; - - PROCNAME("ptaaWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 1); - - n = ptaaGetCount(ptaa); - fprintf(fp, "\nPtaa Version %d\n", PTA_VERSION_NUMBER); - fprintf(fp, "Number of Pta = %d\n", n); - for (i = 0; i < n; i++) { - pta = ptaaGetPta(ptaa, i, L_CLONE); - ptaWriteStream(fp, pta, type); - ptaDestroy(&pta); - } - - return 0; -} - - -/*! - * \brief ptaaWriteMem() - * - * \param[out] pdata data of serialized ptaa; ascii - * \param[out] psize size of returned data - * \param[in] ptaa - * \param[in] type 0 for float values; 1 for integer values - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes %ptaa in memory and puts the result in a buffer.
- * 
- */ -l_ok -ptaaWriteMem(l_uint8 **pdata, - size_t *psize, - PTAA *ptaa, - l_int32 type) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("ptaaWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = ptaaWriteStream(fp, ptaa, type); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = ptaaWriteStream(fp, ptaa, type); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ptafunc1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ptafunc1.c deleted file mode 100644 index 8dfcd8a1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ptafunc1.c +++ /dev/null @@ -1,2667 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file ptafunc1.c - *
- *
- *      --------------------------------------
- *      This file has these Pta utilities:
- *         - simple rearrangements
- *         - geometric analysis
- *         - min/max and filtering
- *         - least squares fitting
- *         - interconversions with Pix and Numa
- *         - display into a pix
- *      --------------------------------------
- *
- *      Simple rearrangements
- *           PTA      *ptaSubsample()
- *           l_int32   ptaJoin()
- *           l_int32   ptaaJoin()
- *           PTA      *ptaReverse()
- *           PTA      *ptaTranspose()
- *           PTA      *ptaCyclicPerm()
- *           PTA      *ptaSelectRange()
- *
- *      Geometric
- *           BOX      *ptaGetBoundingRegion()
- *           l_int32  *ptaGetRange()
- *           PTA      *ptaGetInsideBox()
- *           PTA      *pixFindCornerPixels()
- *           l_int32   ptaContainsPt()
- *           l_int32   ptaTestIntersection()
- *           PTA      *ptaTransform()
- *           l_int32   ptaPtInsidePolygon()
- *           l_float32 l_angleBetweenVectors()
- *
- *      Min/max and filtering
- *           l_int32   ptaGetMinMax()
- *           PTA      *ptaSelectByValue()
- *           PTA      *ptaCropToMask()
- *
- *      Least Squares Fit
- *           l_int32   ptaGetLinearLSF()
- *           l_int32   ptaGetQuadraticLSF()
- *           l_int32   ptaGetCubicLSF()
- *           l_int32   ptaGetQuarticLSF()
- *           l_int32   ptaNoisyLinearLSF()
- *           l_int32   ptaNoisyQuadraticLSF()
- *           l_int32   applyLinearFit()
- *           l_int32   applyQuadraticFit()
- *           l_int32   applyCubicFit()
- *           l_int32   applyQuarticFit()
- *
- *      Interconversions with Pix
- *           l_int32   pixPlotAlongPta()
- *           PTA      *ptaGetPixelsFromPix()
- *           PIX      *pixGenerateFromPta()
- *           PTA      *ptaGetBoundaryPixels()
- *           PTAA     *ptaaGetBoundaryPixels()
- *           PTAA     *ptaaIndexLabeledPixels()
- *           PTA      *ptaGetNeighborPixLocs()
- *
- *      Interconversion with Numa
- *           PTA      *numaConvertToPta1()
- *           PTA      *numaConvertToPta2()
- *           l_int32   ptaConvertToNuma()
- *
- *      Display Pta and Ptaa
- *           PIX      *pixDisplayPta()
- *           PIX      *pixDisplayPtaaPattern()
- *           PIX      *pixDisplayPtaPattern()
- *           PTA      *ptaReplicatePattern()
- *           PIX      *pixDisplayPtaa()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif /* M_PI */ - -/*---------------------------------------------------------------------* - * Simple rearrangements * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaSubsample() - * - * \param[in] ptas - * \param[in] subfactor subsample factor, >= 1 - * \return ptad evenly sampled pt values from ptas, or NULL on error - */ -PTA * -ptaSubsample(PTA *ptas, - l_int32 subfactor) -{ -l_int32 n, i; -l_float32 x, y; -PTA *ptad; - - PROCNAME("pixSubsample"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - if (subfactor < 1) - return (PTA *)ERROR_PTR("subfactor < 1", procName, NULL); - - ptad = ptaCreate(0); - n = ptaGetCount(ptas); - for (i = 0; i < n; i++) { - if (i % subfactor != 0) continue; - ptaGetPt(ptas, i, &x, &y); - ptaAddPt(ptad, x, y); - } - - return ptad; -} - - -/*! - * \brief ptaJoin() - * - * \param[in] ptad dest pta; add to this one - * \param[in] ptas source pta; add from this one - * \param[in] istart starting index in ptas - * \param[in] iend ending index in ptas; use -1 to cat all - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) istart < 0 is taken to mean 'read from the start' (istart = 0)
- *      (2) iend < 0 means 'read to the end'
- *      (3) if ptas == NULL, this is a no-op
- * 
- */ -l_ok -ptaJoin(PTA *ptad, - PTA *ptas, - l_int32 istart, - l_int32 iend) -{ -l_int32 n, i, x, y; - - PROCNAME("ptaJoin"); - - if (!ptad) - return ERROR_INT("ptad not defined", procName, 1); - if (!ptas) - return 0; - - if (istart < 0) - istart = 0; - n = ptaGetCount(ptas); - if (iend < 0 || iend >= n) - iend = n - 1; - if (istart > iend) - return ERROR_INT("istart > iend; no pts", procName, 1); - - for (i = istart; i <= iend; i++) { - ptaGetIPt(ptas, i, &x, &y); - ptaAddPt(ptad, x, y); - } - - return 0; -} - - -/*! - * \brief ptaaJoin() - * - * \param[in] ptaad dest ptaa; add to this one - * \param[in] ptaas source ptaa; add from this one - * \param[in] istart starting index in ptaas - * \param[in] iend ending index in ptaas; use -1 to cat all - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) istart < 0 is taken to mean 'read from the start' (istart = 0)
- *      (2) iend < 0 means 'read to the end'
- *      (3) if ptas == NULL, this is a no-op
- * 
- */ -l_ok -ptaaJoin(PTAA *ptaad, - PTAA *ptaas, - l_int32 istart, - l_int32 iend) -{ -l_int32 n, i; -PTA *pta; - - PROCNAME("ptaaJoin"); - - if (!ptaad) - return ERROR_INT("ptaad not defined", procName, 1); - if (!ptaas) - return 0; - - if (istart < 0) - istart = 0; - n = ptaaGetCount(ptaas); - if (iend < 0 || iend >= n) - iend = n - 1; - if (istart > iend) - return ERROR_INT("istart > iend; no pts", procName, 1); - - for (i = istart; i <= iend; i++) { - pta = ptaaGetPta(ptaas, i, L_CLONE); - ptaaAddPta(ptaad, pta, L_INSERT); - } - - return 0; -} - - -/*! - * \brief ptaReverse() - * - * \param[in] ptas - * \param[in] type 0 for float values; 1 for integer values - * \return ptad reversed pta, or NULL on error - */ -PTA * -ptaReverse(PTA *ptas, - l_int32 type) -{ -l_int32 n, i, ix, iy; -l_float32 x, y; -PTA *ptad; - - PROCNAME("ptaReverse"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - - n = ptaGetCount(ptas); - if ((ptad = ptaCreate(n)) == NULL) - return (PTA *)ERROR_PTR("ptad not made", procName, NULL); - for (i = n - 1; i >= 0; i--) { - if (type == 0) { - ptaGetPt(ptas, i, &x, &y); - ptaAddPt(ptad, x, y); - } else { /* type == 1 */ - ptaGetIPt(ptas, i, &ix, &iy); - ptaAddPt(ptad, ix, iy); - } - } - - return ptad; -} - - -/*! - * \brief ptaTranspose() - * - * \param[in] ptas - * \return ptad with x and y values swapped, or NULL on error - */ -PTA * -ptaTranspose(PTA *ptas) -{ -l_int32 n, i; -l_float32 x, y; -PTA *ptad; - - PROCNAME("ptaTranspose"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - - n = ptaGetCount(ptas); - if ((ptad = ptaCreate(n)) == NULL) - return (PTA *)ERROR_PTR("ptad not made", procName, NULL); - for (i = 0; i < n; i++) { - ptaGetPt(ptas, i, &x, &y); - ptaAddPt(ptad, y, x); - } - - return ptad; -} - - -/*! - * \brief ptaCyclicPerm() - * - * \param[in] ptas - * \param[in] xs, ys start point; must be in ptas - * \return ptad cyclic permutation, starting and ending at (xs, ys, - * or NULL on error - * - *
- * Notes:
- *      (1) Check to insure that (a) ptas is a closed path where
- *          the first and last points are identical, and (b) the
- *          resulting pta also starts and ends on the same point
- *          (which in this case is (xs, ys).
- * 
- */ -PTA * -ptaCyclicPerm(PTA *ptas, - l_int32 xs, - l_int32 ys) -{ -l_int32 n, i, x, y, j, index, state; -l_int32 x1, y1, x2, y2; -PTA *ptad; - - PROCNAME("ptaCyclicPerm"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - - n = ptaGetCount(ptas); - - /* Verify input data */ - ptaGetIPt(ptas, 0, &x1, &y1); - ptaGetIPt(ptas, n - 1, &x2, &y2); - if (x1 != x2 || y1 != y2) - return (PTA *)ERROR_PTR("start and end pts not same", procName, NULL); - state = L_NOT_FOUND; - for (i = 0; i < n; i++) { - ptaGetIPt(ptas, i, &x, &y); - if (x == xs && y == ys) { - state = L_FOUND; - break; - } - } - if (state == L_NOT_FOUND) - return (PTA *)ERROR_PTR("start pt not in ptas", procName, NULL); - - if ((ptad = ptaCreate(n)) == NULL) - return (PTA *)ERROR_PTR("ptad not made", procName, NULL); - for (j = 0; j < n - 1; j++) { - if (i + j < n - 1) - index = i + j; - else - index = (i + j + 1) % n; - ptaGetIPt(ptas, index, &x, &y); - ptaAddPt(ptad, x, y); - } - ptaAddPt(ptad, xs, ys); - - return ptad; -} - - -/*! - * \brief ptaSelectRange() - * - * \param[in] ptas - * \param[in] first use 0 to select from the beginning - * \param[in] last use -1 to select to the end - * \return ptad, or NULL on error - */ -PTA * -ptaSelectRange(PTA *ptas, - l_int32 first, - l_int32 last) -{ -l_int32 n, npt, i; -l_float32 x, y; -PTA *ptad; - - PROCNAME("ptaSelectRange"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - if ((n = ptaGetCount(ptas)) == 0) { - L_WARNING("ptas is empty\n", procName); - return ptaCopy(ptas); - } - first = L_MAX(0, first); - if (last < 0) last = n - 1; - if (first >= n) - return (PTA *)ERROR_PTR("invalid first", procName, NULL); - if (last >= n) { - L_WARNING("last = %d is beyond max index = %d; adjusting\n", - procName, last, n - 1); - last = n - 1; - } - if (first > last) - return (PTA *)ERROR_PTR("first > last", procName, NULL); - - npt = last - first + 1; - ptad = ptaCreate(npt); - for (i = first; i <= last; i++) { - ptaGetPt(ptas, i, &x, &y); - ptaAddPt(ptad, x, y); - } - return ptad; -} - - -/*---------------------------------------------------------------------* - * Geometric * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaGetBoundingRegion() - * - * \param[in] pta - * \return box, or NULL on error - * - *
- * Notes:
- *      (1) This is used when the pta represents a set of points in
- *          a two-dimensional image.  It returns the box of minimum
- *          size containing the pts in the pta.
- * 
- */ -BOX * -ptaGetBoundingRegion(PTA *pta) -{ -l_int32 n, i, x, y, minx, maxx, miny, maxy; - - PROCNAME("ptaGetBoundingRegion"); - - if (!pta) - return (BOX *)ERROR_PTR("pta not defined", procName, NULL); - - minx = 10000000; - miny = 10000000; - maxx = -10000000; - maxy = -10000000; - n = ptaGetCount(pta); - for (i = 0; i < n; i++) { - ptaGetIPt(pta, i, &x, &y); - if (x < minx) minx = x; - if (x > maxx) maxx = x; - if (y < miny) miny = y; - if (y > maxy) maxy = y; - } - - return boxCreate(minx, miny, maxx - minx + 1, maxy - miny + 1); -} - - -/*! - * \brief ptaGetRange() - * - * \param[in] pta - * \param[out] pminx [optional] min value of x - * \param[out] pmaxx [optional] max value of x - * \param[out] pminy [optional] min value of y - * \param[out] pmaxy [optional] max value of y - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) We can use pts to represent pairs of floating values, that
- *          are not necessarily tied to a two-dimension region.  For
- *          example, the pts can represent a general function y(x).
- * 
- */ -l_ok -ptaGetRange(PTA *pta, - l_float32 *pminx, - l_float32 *pmaxx, - l_float32 *pminy, - l_float32 *pmaxy) -{ -l_int32 n, i; -l_float32 x, y, minx, maxx, miny, maxy; - - PROCNAME("ptaGetRange"); - - if (!pminx && !pmaxx && !pminy && !pmaxy) - return ERROR_INT("no output requested", procName, 1); - if (pminx) *pminx = 0; - if (pmaxx) *pmaxx = 0; - if (pminy) *pminy = 0; - if (pmaxy) *pmaxy = 0; - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if ((n = ptaGetCount(pta)) == 0) - return ERROR_INT("no points in pta", procName, 1); - - ptaGetPt(pta, 0, &x, &y); - minx = x; - maxx = x; - miny = y; - maxy = y; - for (i = 1; i < n; i++) { - ptaGetPt(pta, i, &x, &y); - if (x < minx) minx = x; - if (x > maxx) maxx = x; - if (y < miny) miny = y; - if (y > maxy) maxy = y; - } - if (pminx) *pminx = minx; - if (pmaxx) *pmaxx = maxx; - if (pminy) *pminy = miny; - if (pmaxy) *pmaxy = maxy; - return 0; -} - - -/*! - * \brief ptaGetInsideBox() - * - * \param[in] ptas input pts - * \param[in] box - * \return ptad of pts in ptas that are inside the box, or NULL on error - */ -PTA * -ptaGetInsideBox(PTA *ptas, - BOX *box) -{ -PTA *ptad; -l_int32 n, i, contains; -l_float32 x, y; - - PROCNAME("ptaGetInsideBox"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - if (!box) - return (PTA *)ERROR_PTR("box not defined", procName, NULL); - - n = ptaGetCount(ptas); - ptad = ptaCreate(0); - for (i = 0; i < n; i++) { - ptaGetPt(ptas, i, &x, &y); - boxContainsPt(box, x, y, &contains); - if (contains) - ptaAddPt(ptad, x, y); - } - - return ptad; -} - - -/*! - * \brief pixFindCornerPixels() - * - * \param[in] pixs 1 bpp - * \return pta, or NULL on error - * - *
- * Notes:
- *      (1) Finds the 4 corner-most pixels, as defined by a search
- *          inward from each corner, using a 45 degree line.
- * 
- */ -PTA * -pixFindCornerPixels(PIX *pixs) -{ -l_int32 i, j, x, y, w, h, wpl, mindim, found; -l_uint32 *data, *line; -PTA *pta; - - PROCNAME("pixFindCornerPixels"); - - if (!pixs) - return (PTA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PTA *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - - w = pixGetWidth(pixs); - h = pixGetHeight(pixs); - mindim = L_MIN(w, h); - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - - if ((pta = ptaCreate(4)) == NULL) - return (PTA *)ERROR_PTR("pta not made", procName, NULL); - - for (found = FALSE, i = 0; i < mindim; i++) { - for (j = 0; j <= i; j++) { - y = i - j; - line = data + y * wpl; - if (GET_DATA_BIT(line, j)) { - ptaAddPt(pta, j, y); - found = TRUE; - break; - } - } - if (found == TRUE) - break; - } - - for (found = FALSE, i = 0; i < mindim; i++) { - for (j = 0; j <= i; j++) { - y = i - j; - line = data + y * wpl; - x = w - 1 - j; - if (GET_DATA_BIT(line, x)) { - ptaAddPt(pta, x, y); - found = TRUE; - break; - } - } - if (found == TRUE) - break; - } - - for (found = FALSE, i = 0; i < mindim; i++) { - for (j = 0; j <= i; j++) { - y = h - 1 - i + j; - line = data + y * wpl; - if (GET_DATA_BIT(line, j)) { - ptaAddPt(pta, j, y); - found = TRUE; - break; - } - } - if (found == TRUE) - break; - } - - for (found = FALSE, i = 0; i < mindim; i++) { - for (j = 0; j <= i; j++) { - y = h - 1 - i + j; - line = data + y * wpl; - x = w - 1 - j; - if (GET_DATA_BIT(line, x)) { - ptaAddPt(pta, x, y); - found = TRUE; - break; - } - } - if (found == TRUE) - break; - } - - return pta; -} - - -/*! - * \brief ptaContainsPt() - * - * \param[in] pta - * \param[in] x, y point - * \return 1 if contained, 0 otherwise or on error - */ -l_int32 -ptaContainsPt(PTA *pta, - l_int32 x, - l_int32 y) -{ -l_int32 i, n, ix, iy; - - PROCNAME("ptaContainsPt"); - - if (!pta) - return ERROR_INT("pta not defined", procName, 0); - - n = ptaGetCount(pta); - for (i = 0; i < n; i++) { - ptaGetIPt(pta, i, &ix, &iy); - if (x == ix && y == iy) - return 1; - } - return 0; -} - - -/*! - * \brief ptaTestIntersection() - * - * \param[in] pta1, pta2 - * \return bval which is 1 if they have any elements in common; - * 0 otherwise or on error. - */ -l_int32 -ptaTestIntersection(PTA *pta1, - PTA *pta2) -{ -l_int32 i, j, n1, n2, x1, y1, x2, y2; - - PROCNAME("ptaTestIntersection"); - - if (!pta1) - return ERROR_INT("pta1 not defined", procName, 0); - if (!pta2) - return ERROR_INT("pta2 not defined", procName, 0); - - n1 = ptaGetCount(pta1); - n2 = ptaGetCount(pta2); - for (i = 0; i < n1; i++) { - ptaGetIPt(pta1, i, &x1, &y1); - for (j = 0; j < n2; j++) { - ptaGetIPt(pta2, i, &x2, &y2); - if (x1 == x2 && y1 == y2) - return 1; - } - } - - return 0; -} - - -/*! - * \brief ptaTransform() - * - * \param[in] ptas - * \param[in] shiftx, shifty - * \param[in] scalex, scaley - * \return pta, or NULL on error - * - *
- * Notes:
- *      (1) Shift first, then scale.
- * 
- */ -PTA * -ptaTransform(PTA *ptas, - l_int32 shiftx, - l_int32 shifty, - l_float32 scalex, - l_float32 scaley) -{ -l_int32 n, i, x, y; -PTA *ptad; - - PROCNAME("ptaTransform"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - n = ptaGetCount(ptas); - ptad = ptaCreate(n); - for (i = 0; i < n; i++) { - ptaGetIPt(ptas, i, &x, &y); - x = (l_int32)(scalex * (x + shiftx) + 0.5); - y = (l_int32)(scaley * (y + shifty) + 0.5); - ptaAddPt(ptad, x, y); - } - - return ptad; -} - - -/*! - * \brief ptaPtInsidePolygon() - * - * \param[in] pta vertices of a polygon - * \param[in] x, y point to be tested - * \param[out] pinside 1 if inside; 0 if outside or on boundary - * \return 1 if OK, 0 on error - * - * The abs value of the sum of the angles subtended from a point by - * the sides of a polygon, when taken in order traversing the polygon, - * is 0 if the point is outside the polygon and 2*pi if inside. - * The sign will be positive if traversed cw and negative if ccw. - */ -l_int32 -ptaPtInsidePolygon(PTA *pta, - l_float32 x, - l_float32 y, - l_int32 *pinside) -{ -l_int32 i, n; -l_float32 sum, x1, y1, x2, y2, xp1, yp1, xp2, yp2; - - PROCNAME("ptaPtInsidePolygon"); - - if (!pinside) - return ERROR_INT("&inside not defined", procName, 1); - *pinside = 0; - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - - /* Think of (x1,y1) as the end point of a vector that starts - * from the origin (0,0), and ditto for (x2,y2). */ - n = ptaGetCount(pta); - sum = 0.0; - for (i = 0; i < n; i++) { - ptaGetPt(pta, i, &xp1, &yp1); - ptaGetPt(pta, (i + 1) % n, &xp2, &yp2); - x1 = xp1 - x; - y1 = yp1 - y; - x2 = xp2 - x; - y2 = yp2 - y; - sum += l_angleBetweenVectors(x1, y1, x2, y2); - } - - if (L_ABS(sum) > M_PI) - *pinside = 1; - return 0; -} - - -/*! - * \brief l_angleBetweenVectors() - * - * \param[in] x1, y1 end point of first vector - * \param[in] x2, y2 end point of second vector - * \return angle radians, or 0.0 on error - * - *
- * Notes:
- *      (1) This gives the angle between two vectors, going between
- *          vector1 (x1,y1) and vector2 (x2,y2).  The angle is swept
- *          out from 1 --> 2.  If this is clockwise, the angle is
- *          positive, but the result is folded into the interval [-pi, pi].
- * 
- */ -l_float32 -l_angleBetweenVectors(l_float32 x1, - l_float32 y1, - l_float32 x2, - l_float32 y2) -{ -l_float64 ang; - - ang = atan2(y2, x2) - atan2(y1, x1); - if (ang > M_PI) ang -= 2.0 * M_PI; - if (ang < -M_PI) ang += 2.0 * M_PI; - return ang; -} - - -/*---------------------------------------------------------------------* - * Min/max and filtering * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaGetMinMax() - * - * \param[in] pta - * \param[out] pxmin [optional] min of x - * \param[out] pymin [optional] min of y - * \param[out] pxmax [optional] max of x - * \param[out] pymax [optional] max of y - * \return 0 if OK, 1 on error. If pta is empty, requested - * values are returned as -1.0. - */ -l_ok -ptaGetMinMax(PTA *pta, - l_float32 *pxmin, - l_float32 *pymin, - l_float32 *pxmax, - l_float32 *pymax) -{ -l_int32 i, n; -l_float32 x, y, xmin, ymin, xmax, ymax; - - PROCNAME("ptaGetMinMax"); - - if (pxmin) *pxmin = -1.0; - if (pymin) *pymin = -1.0; - if (pxmax) *pxmax = -1.0; - if (pymax) *pymax = -1.0; - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if (!pxmin && !pxmax && !pymin && !pymax) - return ERROR_INT("no output requested", procName, 1); - if ((n = ptaGetCount(pta)) == 0) { - L_WARNING("pta is empty\n", procName); - return 0; - } - - xmin = ymin = 1.0e20; - xmax = ymax = -1.0e20; - for (i = 0; i < n; i++) { - ptaGetPt(pta, i, &x, &y); - if (x < xmin) xmin = x; - if (y < ymin) ymin = y; - if (x > xmax) xmax = x; - if (y > ymax) ymax = y; - } - if (pxmin) *pxmin = xmin; - if (pymin) *pymin = ymin; - if (pxmax) *pxmax = xmax; - if (pymax) *pymax = ymax; - return 0; -} - - -/*! - * \brief ptaSelectByValue() - * - * \param[in] ptas - * \param[in] xth, yth threshold values - * \param[in] type L_SELECT_XVAL, L_SELECT_YVAL, - * L_SELECT_IF_EITHER, L_SELECT_IF_BOTH - * \param[in] relation L_SELECT_IF_LT, L_SELECT_IF_GT, - * L_SELECT_IF_LTE, L_SELECT_IF_GTE - * \return ptad filtered set, or NULL on error - */ -PTA * -ptaSelectByValue(PTA *ptas, - l_float32 xth, - l_float32 yth, - l_int32 type, - l_int32 relation) -{ -l_int32 i, n; -l_float32 x, y; -PTA *ptad; - - PROCNAME("ptaSelectByValue"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - if (ptaGetCount(ptas) == 0) { - L_WARNING("ptas is empty\n", procName); - return ptaCopy(ptas); - } - if (type != L_SELECT_XVAL && type != L_SELECT_YVAL && - type != L_SELECT_IF_EITHER && type != L_SELECT_IF_BOTH) - return (PTA *)ERROR_PTR("invalid type", procName, NULL); - if (relation != L_SELECT_IF_LT && relation != L_SELECT_IF_GT && - relation != L_SELECT_IF_LTE && relation != L_SELECT_IF_GTE) - return (PTA *)ERROR_PTR("invalid relation", procName, NULL); - - n = ptaGetCount(ptas); - ptad = ptaCreate(n); - for (i = 0; i < n; i++) { - ptaGetPt(ptas, i, &x, &y); - if (type == L_SELECT_XVAL) { - if ((relation == L_SELECT_IF_LT && x < xth) || - (relation == L_SELECT_IF_GT && x > xth) || - (relation == L_SELECT_IF_LTE && x <= xth) || - (relation == L_SELECT_IF_GTE && x >= xth)) - ptaAddPt(ptad, x, y); - } else if (type == L_SELECT_YVAL) { - if ((relation == L_SELECT_IF_LT && y < yth) || - (relation == L_SELECT_IF_GT && y > yth) || - (relation == L_SELECT_IF_LTE && y <= yth) || - (relation == L_SELECT_IF_GTE && y >= yth)) - ptaAddPt(ptad, x, y); - } else if (type == L_SELECT_IF_EITHER) { - if (((relation == L_SELECT_IF_LT) && (x < xth || y < yth)) || - ((relation == L_SELECT_IF_GT) && (x > xth || y > yth)) || - ((relation == L_SELECT_IF_LTE) && (x <= xth || y <= yth)) || - ((relation == L_SELECT_IF_GTE) && (x >= xth || y >= yth))) - ptaAddPt(ptad, x, y); - } else { /* L_SELECT_IF_BOTH */ - if (((relation == L_SELECT_IF_LT) && (x < xth && y < yth)) || - ((relation == L_SELECT_IF_GT) && (x > xth && y > yth)) || - ((relation == L_SELECT_IF_LTE) && (x <= xth && y <= yth)) || - ((relation == L_SELECT_IF_GTE) && (x >= xth && y >= yth))) - ptaAddPt(ptad, x, y); - } - } - - return ptad; -} - - -/*! - * \brief ptaCropToMask() - * - * \param[in] ptas input pta - * \param[in] pixm 1 bpp mask - * \return ptad with only pts under the mask fg, or NULL on error - */ -PTA * -ptaCropToMask(PTA *ptas, - PIX *pixm) -{ -l_int32 i, n, x, y; -l_uint32 val; -PTA *ptad; - - PROCNAME("ptaCropToMask"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - if (!pixm || pixGetDepth(pixm) != 1) - return (PTA *)ERROR_PTR("pixm undefined or not 1 bpp", procName, NULL); - if (ptaGetCount(ptas) == 0) { - L_INFO("ptas is empty\n", procName); - return ptaCopy(ptas); - } - - n = ptaGetCount(ptas); - ptad = ptaCreate(n); - for (i = 0; i < n; i++) { - ptaGetIPt(ptas, i, &x, &y); - pixGetPixel(pixm, x, y, &val); - if (val == 1) - ptaAddPt(ptad, x, y); - } - return ptad; -} - - -/*---------------------------------------------------------------------* - * Least Squares Fit * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaGetLinearLSF() - * - * \param[in] pta - * \param[out] pa [optional] slope a of least square fit: y = ax + b - * \param[out] pb [optional] intercept b of least square fit - * \param[out] pnafit [optional] numa of least square fit - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Either or both &a and &b must be input.  They determine the
- *          type of line that is fit.
- *      (2) If both &a and &b are defined, this returns a and b that minimize:
- *
- *              sum (yi - axi -b)^2
- *               i
- *
- *          The method is simple: differentiate this expression w/rt a and b,
- *          and solve the resulting two equations for a and b in terms of
- *          various sums over the input data (xi, yi).
- *      (3) We also allow two special cases, where either a = 0 or b = 0:
- *           (a) If &a is given and &b = null, find the linear LSF that
- *               goes through the origin (b = 0).
- *           (b) If &b is given and &a = null, find the linear LSF with
- *               zero slope (a = 0).
- *      (4) If &nafit is defined, this returns an array of fitted values,
- *          corresponding to the two implicit Numa arrays (nax and nay) in pta.
- *          Thus, just as you can plot the data in pta as nay vs. nax,
- *          you can plot the linear least square fit as nafit vs. nax.
- *          Get the nax array using ptaGetArrays(pta, &nax, NULL);
- * 
- */ -l_ok -ptaGetLinearLSF(PTA *pta, - l_float32 *pa, - l_float32 *pb, - NUMA **pnafit) -{ -l_int32 n, i; -l_float32 a, b, factor, sx, sy, sxx, sxy, val; -l_float32 *xa, *ya; - - PROCNAME("ptaGetLinearLSF"); - - if (pa) *pa = 0.0; - if (pb) *pb = 0.0; - if (pnafit) *pnafit = NULL; - if (!pa && !pb && !pnafit) - return ERROR_INT("no output requested", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if ((n = ptaGetCount(pta)) < 2) - return ERROR_INT("less than 2 pts found", procName, 1); - - xa = pta->x; /* not a copy */ - ya = pta->y; /* not a copy */ - sx = sy = sxx = sxy = 0.; - if (pa && pb) { /* general line */ - for (i = 0; i < n; i++) { - sx += xa[i]; - sy += ya[i]; - sxx += xa[i] * xa[i]; - sxy += xa[i] * ya[i]; - } - factor = n * sxx - sx * sx; - if (factor == 0.0) - return ERROR_INT("no solution found", procName, 1); - factor = 1. / factor; - - a = factor * ((l_float32)n * sxy - sx * sy); - b = factor * (sxx * sy - sx * sxy); - } else if (pa) { /* b = 0; line through origin */ - for (i = 0; i < n; i++) { - sxx += xa[i] * xa[i]; - sxy += xa[i] * ya[i]; - } - if (sxx == 0.0) - return ERROR_INT("no solution found", procName, 1); - a = sxy / sxx; - b = 0.0; - } else { /* a = 0; horizontal line */ - for (i = 0; i < n; i++) - sy += ya[i]; - a = 0.0; - b = sy / (l_float32)n; - } - - if (pnafit) { - *pnafit = numaCreate(n); - for (i = 0; i < n; i++) { - val = a * xa[i] + b; - numaAddNumber(*pnafit, val); - } - } - - if (pa) *pa = a; - if (pb) *pb = b; - return 0; -} - - -/*! - * \brief ptaGetQuadraticLSF() - * - * \param[in] pta - * \param[out] pa [optional] coeff a of LSF: y = ax^2 + bx + c - * \param[out] pb [optional] coeff b of LSF: y = ax^2 + bx + c - * \param[out] pc [optional] coeff c of LSF: y = ax^2 + bx + c - * \param[out] pnafit [optional] numa of least square fit - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This does a quadratic least square fit to the set of points
- *          in %pta.  That is, it finds coefficients a, b and c that minimize:
- *
- *              sum (yi - a*xi*xi -b*xi -c)^2
- *               i
- *
- *          The method is simple: differentiate this expression w/rt
- *          a, b and c, and solve the resulting three equations for these
- *          coefficients in terms of various sums over the input data (xi, yi).
- *          The three equations are in the form:
- *             f[0][0]a + f[0][1]b + f[0][2]c = g[0]
- *             f[1][0]a + f[1][1]b + f[1][2]c = g[1]
- *             f[2][0]a + f[2][1]b + f[2][2]c = g[2]
- *      (2) If &nafit is defined, this returns an array of fitted values,
- *          corresponding to the two implicit Numa arrays (nax and nay) in pta.
- *          Thus, just as you can plot the data in pta as nay vs. nax,
- *          you can plot the linear least square fit as nafit vs. nax.
- *          Get the nax array using ptaGetArrays(pta, &nax, NULL);
- * 
- */ -l_ok -ptaGetQuadraticLSF(PTA *pta, - l_float32 *pa, - l_float32 *pb, - l_float32 *pc, - NUMA **pnafit) -{ -l_int32 n, i, ret; -l_float32 x, y, sx, sy, sx2, sx3, sx4, sxy, sx2y; -l_float32 *xa, *ya; -l_float32 *f[3]; -l_float32 g[3]; - - PROCNAME("ptaGetQuadraticLSF"); - - if (pa) *pa = 0.0; - if (pb) *pb = 0.0; - if (pc) *pc = 0.0; - if (pnafit) *pnafit = NULL; - if (!pa && !pb && !pc && !pnafit) - return ERROR_INT("no output requested", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if ((n = ptaGetCount(pta)) < 3) - return ERROR_INT("less than 3 pts found", procName, 1); - - xa = pta->x; /* not a copy */ - ya = pta->y; /* not a copy */ - sx = sy = sx2 = sx3 = sx4 = sxy = sx2y = 0.; - for (i = 0; i < n; i++) { - x = xa[i]; - y = ya[i]; - sx += x; - sy += y; - sx2 += x * x; - sx3 += x * x * x; - sx4 += x * x * x * x; - sxy += x * y; - sx2y += x * x * y; - } - - for (i = 0; i < 3; i++) - f[i] = (l_float32 *)LEPT_CALLOC(3, sizeof(l_float32)); - f[0][0] = sx4; - f[0][1] = sx3; - f[0][2] = sx2; - f[1][0] = sx3; - f[1][1] = sx2; - f[1][2] = sx; - f[2][0] = sx2; - f[2][1] = sx; - f[2][2] = n; - g[0] = sx2y; - g[1] = sxy; - g[2] = sy; - - /* Solve for the unknowns, also putting f-inverse into f */ - ret = gaussjordan(f, g, 3); - for (i = 0; i < 3; i++) - LEPT_FREE(f[i]); - if (ret) - return ERROR_INT("quadratic solution failed", procName, 1); - - if (pa) *pa = g[0]; - if (pb) *pb = g[1]; - if (pc) *pc = g[2]; - if (pnafit) { - *pnafit = numaCreate(n); - for (i = 0; i < n; i++) { - x = xa[i]; - y = g[0] * x * x + g[1] * x + g[2]; - numaAddNumber(*pnafit, y); - } - } - return 0; -} - - -/*! - * \brief ptaGetCubicLSF() - * - * \param[in] pta - * \param[out] pa [optional] coeff a of LSF: y = ax^3 + bx^2 + cx + d - * \param[out] pb [optional] coeff b of LSF - * \param[out] pc [optional] coeff c of LSF - * \param[out] pd [optional] coeff d of LSF - * \param[out] pnafit [optional] numa of least square fit - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This does a cubic least square fit to the set of points
- *          in %pta.  That is, it finds coefficients a, b, c and d
- *          that minimize:
- *
- *              sum (yi - a*xi*xi*xi -b*xi*xi -c*xi - d)^2
- *               i
- *
- *          Differentiate this expression w/rt a, b, c and d, and solve
- *          the resulting four equations for these coefficients in
- *          terms of various sums over the input data (xi, yi).
- *          The four equations are in the form:
- *             f[0][0]a + f[0][1]b + f[0][2]c + f[0][3] = g[0]
- *             f[1][0]a + f[1][1]b + f[1][2]c + f[1][3] = g[1]
- *             f[2][0]a + f[2][1]b + f[2][2]c + f[2][3] = g[2]
- *             f[3][0]a + f[3][1]b + f[3][2]c + f[3][3] = g[3]
- *      (2) If &nafit is defined, this returns an array of fitted values,
- *          corresponding to the two implicit Numa arrays (nax and nay) in pta.
- *          Thus, just as you can plot the data in pta as nay vs. nax,
- *          you can plot the linear least square fit as nafit vs. nax.
- *          Get the nax array using ptaGetArrays(pta, &nax, NULL);
- * 
- */ -l_ok -ptaGetCubicLSF(PTA *pta, - l_float32 *pa, - l_float32 *pb, - l_float32 *pc, - l_float32 *pd, - NUMA **pnafit) -{ -l_int32 n, i, ret; -l_float32 x, y, sx, sy, sx2, sx3, sx4, sx5, sx6, sxy, sx2y, sx3y; -l_float32 *xa, *ya; -l_float32 *f[4]; -l_float32 g[4]; - - PROCNAME("ptaGetCubicLSF"); - - if (pa) *pa = 0.0; - if (pb) *pb = 0.0; - if (pc) *pc = 0.0; - if (pd) *pd = 0.0; - if (pnafit) *pnafit = NULL; - if (!pa && !pb && !pc && !pd && !pnafit) - return ERROR_INT("no output requested", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if ((n = ptaGetCount(pta)) < 4) - return ERROR_INT("less than 4 pts found", procName, 1); - - xa = pta->x; /* not a copy */ - ya = pta->y; /* not a copy */ - sx = sy = sx2 = sx3 = sx4 = sx5 = sx6 = sxy = sx2y = sx3y = 0.; - for (i = 0; i < n; i++) { - x = xa[i]; - y = ya[i]; - sx += x; - sy += y; - sx2 += x * x; - sx3 += x * x * x; - sx4 += x * x * x * x; - sx5 += x * x * x * x * x; - sx6 += x * x * x * x * x * x; - sxy += x * y; - sx2y += x * x * y; - sx3y += x * x * x * y; - } - - for (i = 0; i < 4; i++) - f[i] = (l_float32 *)LEPT_CALLOC(4, sizeof(l_float32)); - f[0][0] = sx6; - f[0][1] = sx5; - f[0][2] = sx4; - f[0][3] = sx3; - f[1][0] = sx5; - f[1][1] = sx4; - f[1][2] = sx3; - f[1][3] = sx2; - f[2][0] = sx4; - f[2][1] = sx3; - f[2][2] = sx2; - f[2][3] = sx; - f[3][0] = sx3; - f[3][1] = sx2; - f[3][2] = sx; - f[3][3] = n; - g[0] = sx3y; - g[1] = sx2y; - g[2] = sxy; - g[3] = sy; - - /* Solve for the unknowns, also putting f-inverse into f */ - ret = gaussjordan(f, g, 4); - for (i = 0; i < 4; i++) - LEPT_FREE(f[i]); - if (ret) - return ERROR_INT("cubic solution failed", procName, 1); - - if (pa) *pa = g[0]; - if (pb) *pb = g[1]; - if (pc) *pc = g[2]; - if (pd) *pd = g[3]; - if (pnafit) { - *pnafit = numaCreate(n); - for (i = 0; i < n; i++) { - x = xa[i]; - y = g[0] * x * x * x + g[1] * x * x + g[2] * x + g[3]; - numaAddNumber(*pnafit, y); - } - } - return 0; -} - - -/*! - * \brief ptaGetQuarticLSF() - * - * \param[in] pta - * \param[out] pa [optional] coeff a of LSF: - * y = ax^4 + bx^3 + cx^2 + dx + e - * \param[out] pb [optional] coeff b of LSF - * \param[out] pc [optional] coeff c of LSF - * \param[out] pd [optional] coeff d of LSF - * \param[out] pe [optional] coeff e of LSF - * \param[out] pnafit [optional] numa of least square fit - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This does a quartic least square fit to the set of points
- *          in %pta.  That is, it finds coefficients a, b, c, d and 3
- *          that minimize:
- *
- *              sum (yi - a*xi*xi*xi*xi -b*xi*xi*xi -c*xi*xi - d*xi - e)^2
- *               i
- *
- *          Differentiate this expression w/rt a, b, c, d and e, and solve
- *          the resulting five equations for these coefficients in
- *          terms of various sums over the input data (xi, yi).
- *          The five equations are in the form:
- *             f[0][0]a + f[0][1]b + f[0][2]c + f[0][3] + f[0][4] = g[0]
- *             f[1][0]a + f[1][1]b + f[1][2]c + f[1][3] + f[1][4] = g[1]
- *             f[2][0]a + f[2][1]b + f[2][2]c + f[2][3] + f[2][4] = g[2]
- *             f[3][0]a + f[3][1]b + f[3][2]c + f[3][3] + f[3][4] = g[3]
- *             f[4][0]a + f[4][1]b + f[4][2]c + f[4][3] + f[4][4] = g[4]
- *      (2) If &nafit is defined, this returns an array of fitted values,
- *          corresponding to the two implicit Numa arrays (nax and nay) in pta.
- *          Thus, just as you can plot the data in pta as nay vs. nax,
- *          you can plot the linear least square fit as nafit vs. nax.
- *          Get the nax array using ptaGetArrays(pta, &nax, NULL);
- * 
- */ -l_ok -ptaGetQuarticLSF(PTA *pta, - l_float32 *pa, - l_float32 *pb, - l_float32 *pc, - l_float32 *pd, - l_float32 *pe, - NUMA **pnafit) -{ -l_int32 n, i, ret; -l_float32 x, y, sx, sy, sx2, sx3, sx4, sx5, sx6, sx7, sx8; -l_float32 sxy, sx2y, sx3y, sx4y; -l_float32 *xa, *ya; -l_float32 *f[5]; -l_float32 g[5]; - - PROCNAME("ptaGetQuarticLSF"); - - if (pa) *pa = 0.0; - if (pb) *pb = 0.0; - if (pc) *pc = 0.0; - if (pd) *pd = 0.0; - if (pe) *pe = 0.0; - if (pnafit) *pnafit = NULL; - if (!pa && !pb && !pc && !pd && !pe && !pnafit) - return ERROR_INT("no output requested", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if ((n = ptaGetCount(pta)) < 5) - return ERROR_INT("less than 5 pts found", procName, 1); - - xa = pta->x; /* not a copy */ - ya = pta->y; /* not a copy */ - sx = sy = sx2 = sx3 = sx4 = sx5 = sx6 = sx7 = sx8 = 0; - sxy = sx2y = sx3y = sx4y = 0.; - for (i = 0; i < n; i++) { - x = xa[i]; - y = ya[i]; - sx += x; - sy += y; - sx2 += x * x; - sx3 += x * x * x; - sx4 += x * x * x * x; - sx5 += x * x * x * x * x; - sx6 += x * x * x * x * x * x; - sx7 += x * x * x * x * x * x * x; - sx8 += x * x * x * x * x * x * x * x; - sxy += x * y; - sx2y += x * x * y; - sx3y += x * x * x * y; - sx4y += x * x * x * x * y; - } - - for (i = 0; i < 5; i++) - f[i] = (l_float32 *)LEPT_CALLOC(5, sizeof(l_float32)); - f[0][0] = sx8; - f[0][1] = sx7; - f[0][2] = sx6; - f[0][3] = sx5; - f[0][4] = sx4; - f[1][0] = sx7; - f[1][1] = sx6; - f[1][2] = sx5; - f[1][3] = sx4; - f[1][4] = sx3; - f[2][0] = sx6; - f[2][1] = sx5; - f[2][2] = sx4; - f[2][3] = sx3; - f[2][4] = sx2; - f[3][0] = sx5; - f[3][1] = sx4; - f[3][2] = sx3; - f[3][3] = sx2; - f[3][4] = sx; - f[4][0] = sx4; - f[4][1] = sx3; - f[4][2] = sx2; - f[4][3] = sx; - f[4][4] = n; - g[0] = sx4y; - g[1] = sx3y; - g[2] = sx2y; - g[3] = sxy; - g[4] = sy; - - /* Solve for the unknowns, also putting f-inverse into f */ - ret = gaussjordan(f, g, 5); - for (i = 0; i < 5; i++) - LEPT_FREE(f[i]); - if (ret) - return ERROR_INT("quartic solution failed", procName, 1); - - if (pa) *pa = g[0]; - if (pb) *pb = g[1]; - if (pc) *pc = g[2]; - if (pd) *pd = g[3]; - if (pe) *pe = g[4]; - if (pnafit) { - *pnafit = numaCreate(n); - for (i = 0; i < n; i++) { - x = xa[i]; - y = g[0] * x * x * x * x + g[1] * x * x * x + g[2] * x * x - + g[3] * x + g[4]; - numaAddNumber(*pnafit, y); - } - } - return 0; -} - - -/*! - * \brief ptaNoisyLinearLSF() - * - * \param[in] pta - * \param[in] factor reject outliers with error greater than this - * number of medians; typically ~ 3 - * \param[out] pptad [optional] with outliers removed - * \param[out] pa [optional] slope a of least square fit: y = ax + b - * \param[out] pb [optional] intercept b of least square fit - * \param[out] pmederr [optional] median error - * \param[out] pnafit [optional] numa of least square fit to ptad - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This does a linear least square fit to the set of points
- *          in %pta.  It then evaluates the errors and removes points
- *          whose error is >= factor * median_error.  It then re-runs
- *          the linear LSF on the resulting points.
- *      (2) Either or both &a and &b must be input.  They determine the
- *          type of line that is fit.
- *      (3) The median error can give an indication of how good the fit
- *          is likely to be.
- * 
- */ -l_ok -ptaNoisyLinearLSF(PTA *pta, - l_float32 factor, - PTA **pptad, - l_float32 *pa, - l_float32 *pb, - l_float32 *pmederr, - NUMA **pnafit) -{ -l_int32 n, i, ret; -l_float32 x, y, yf, val, mederr; -NUMA *nafit, *naerror; -PTA *ptad; - - PROCNAME("ptaNoisyLinearLSF"); - - if (pptad) *pptad = NULL; - if (pa) *pa = 0.0; - if (pb) *pb = 0.0; - if (pmederr) *pmederr = 0.0; - if (pnafit) *pnafit = NULL; - if (!pptad && !pa && !pb && !pnafit) - return ERROR_INT("no output requested", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if (factor <= 0.0) - return ERROR_INT("factor must be > 0.0", procName, 1); - if ((n = ptaGetCount(pta)) < 3) - return ERROR_INT("less than 2 pts found", procName, 1); - - if (ptaGetLinearLSF(pta, pa, pb, &nafit) != 0) - return ERROR_INT("error in linear LSF", procName, 1); - - /* Get the median error */ - naerror = numaCreate(n); - for (i = 0; i < n; i++) { - ptaGetPt(pta, i, &x, &y); - numaGetFValue(nafit, i, &yf); - numaAddNumber(naerror, L_ABS(y - yf)); - } - numaGetMedian(naerror, &mederr); - if (pmederr) *pmederr = mederr; - numaDestroy(&nafit); - - /* Remove outliers */ - ptad = ptaCreate(n); - for (i = 0; i < n; i++) { - ptaGetPt(pta, i, &x, &y); - numaGetFValue(naerror, i, &val); - if (val <= factor * mederr) /* <= in case mederr = 0 */ - ptaAddPt(ptad, x, y); - } - numaDestroy(&naerror); - - /* Do LSF again */ - ret = ptaGetLinearLSF(ptad, pa, pb, pnafit); - if (pptad) - *pptad = ptad; - else - ptaDestroy(&ptad); - - return ret; -} - - -/*! - * \brief ptaNoisyQuadraticLSF() - * - * \param[in] pta - * \param[in] factor reject outliers with error greater than this - * number of medians; typically ~ 3 - * \param[out] pptad [optional] with outliers removed - * \param[out] pa [optional] coeff a of LSF: y = ax^2 + bx + c - * \param[out] pb [optional] coeff b of LSF: y = ax^2 + bx + c - * \param[out] pc [optional] coeff c of LSF: y = ax^2 + bx + c - * \param[out] pmederr [optional] median error - * \param[out] pnafit [optional] numa of least square fit to ptad - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This does a quadratic least square fit to the set of points
- *          in %pta.  It then evaluates the errors and removes points
- *          whose error is >= factor * median_error.  It then re-runs
- *          a quadratic LSF on the resulting points.
- * 
- */ -l_ok -ptaNoisyQuadraticLSF(PTA *pta, - l_float32 factor, - PTA **pptad, - l_float32 *pa, - l_float32 *pb, - l_float32 *pc, - l_float32 *pmederr, - NUMA **pnafit) -{ -l_int32 n, i, ret; -l_float32 x, y, yf, val, mederr; -NUMA *nafit, *naerror; -PTA *ptad; - - PROCNAME("ptaNoisyQuadraticLSF"); - - if (pptad) *pptad = NULL; - if (pa) *pa = 0.0; - if (pb) *pb = 0.0; - if (pc) *pc = 0.0; - if (pmederr) *pmederr = 0.0; - if (pnafit) *pnafit = NULL; - if (!pptad && !pa && !pb && !pc && !pnafit) - return ERROR_INT("no output requested", procName, 1); - if (factor <= 0.0) - return ERROR_INT("factor must be > 0.0", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if ((n = ptaGetCount(pta)) < 3) - return ERROR_INT("less than 3 pts found", procName, 1); - - if (ptaGetQuadraticLSF(pta, NULL, NULL, NULL, &nafit) != 0) - return ERROR_INT("error in quadratic LSF", procName, 1); - - /* Get the median error */ - naerror = numaCreate(n); - for (i = 0; i < n; i++) { - ptaGetPt(pta, i, &x, &y); - numaGetFValue(nafit, i, &yf); - numaAddNumber(naerror, L_ABS(y - yf)); - } - numaGetMedian(naerror, &mederr); - if (pmederr) *pmederr = mederr; - numaDestroy(&nafit); - - /* Remove outliers */ - ptad = ptaCreate(n); - for (i = 0; i < n; i++) { - ptaGetPt(pta, i, &x, &y); - numaGetFValue(naerror, i, &val); - if (val <= factor * mederr) /* <= in case mederr = 0 */ - ptaAddPt(ptad, x, y); - } - numaDestroy(&naerror); - n = ptaGetCount(ptad); - if ((n = ptaGetCount(ptad)) < 3) { - ptaDestroy(&ptad); - return ERROR_INT("less than 3 pts found", procName, 1); - } - - /* Do LSF again */ - ret = ptaGetQuadraticLSF(ptad, pa, pb, pc, pnafit); - if (pptad) - *pptad = ptad; - else - ptaDestroy(&ptad); - - return ret; -} - - -/*! - * \brief applyLinearFit() - * - * \param[in] a, b linear fit coefficients - * \param[in] x - * \param[out] py y = a * x + b - * \return 0 if OK, 1 on error - */ -l_ok -applyLinearFit(l_float32 a, - l_float32 b, - l_float32 x, - l_float32 *py) -{ - PROCNAME("applyLinearFit"); - - if (!py) - return ERROR_INT("&y not defined", procName, 1); - - *py = a * x + b; - return 0; -} - - -/*! - * \brief applyQuadraticFit() - * - * \param[in] a, b, c quadratic fit coefficients - * \param[in] x - * \param[out] py y = a * x^2 + b * x + c - * \return 0 if OK, 1 on error - */ -l_ok -applyQuadraticFit(l_float32 a, - l_float32 b, - l_float32 c, - l_float32 x, - l_float32 *py) -{ - PROCNAME("applyQuadraticFit"); - - if (!py) - return ERROR_INT("&y not defined", procName, 1); - - *py = a * x * x + b * x + c; - return 0; -} - - -/*! - * \brief applyCubicFit() - * - * \param[in] a, b, c, d cubic fit coefficients - * \param[in] x - * \param[out] py y = a * x^3 + b * x^2 + c * x + d - * \return 0 if OK, 1 on error - */ -l_ok -applyCubicFit(l_float32 a, - l_float32 b, - l_float32 c, - l_float32 d, - l_float32 x, - l_float32 *py) -{ - PROCNAME("applyCubicFit"); - - if (!py) - return ERROR_INT("&y not defined", procName, 1); - - *py = a * x * x * x + b * x * x + c * x + d; - return 0; -} - - -/*! - * \brief applyQuarticFit() - * - * \param[in] a, b, c, d, e quartic fit coefficients - * \param[in] x - * \param[out] py y = a * x^4 + b * x^3 + c * x^2 + d * x + e - * \return 0 if OK, 1 on error - */ -l_ok -applyQuarticFit(l_float32 a, - l_float32 b, - l_float32 c, - l_float32 d, - l_float32 e, - l_float32 x, - l_float32 *py) -{ -l_float32 x2; - - PROCNAME("applyQuarticFit"); - - if (!py) - return ERROR_INT("&y not defined", procName, 1); - - x2 = x * x; - *py = a * x2 * x2 + b * x2 * x + c * x2 + d * x + e; - return 0; -} - - -/*---------------------------------------------------------------------* - * Interconversions with Pix * - *---------------------------------------------------------------------*/ -/*! - * \brief pixPlotAlongPta() - * - * \param[in] pixs any depth - * \param[in] pta set of points on which to plot - * \param[in] outformat GPLOT_PNG, GPLOT_PS, GPLOT_EPS, GPLOT_LATEX - * \param[in] title [optional] for plot; can be null - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is a debugging function.
- *      (2) Removes existing colormaps and clips the pta to the input %pixs.
- *      (3) If the image is RGB, three separate plots are generated.
- * 
- */ -l_ok -pixPlotAlongPta(PIX *pixs, - PTA *pta, - l_int32 outformat, - const char *title) -{ -char buffer[128]; -char *rtitle, *gtitle, *btitle; -static l_int32 count = 0; /* require separate temp files for each call */ -l_int32 i, x, y, d, w, h, npts, rval, gval, bval; -l_uint32 val; -NUMA *na, *nar, *nag, *nab; -PIX *pixt; - - PROCNAME("pixPlotAlongPta"); - - lept_mkdir("lept/plot"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if (outformat != GPLOT_PNG && outformat != GPLOT_PS && - outformat != GPLOT_EPS && outformat != GPLOT_LATEX) { - L_WARNING("outformat invalid; using GPLOT_PNG\n", procName); - outformat = GPLOT_PNG; - } - - pixt = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - d = pixGetDepth(pixt); - w = pixGetWidth(pixt); - h = pixGetHeight(pixt); - npts = ptaGetCount(pta); - if (d == 32) { - nar = numaCreate(npts); - nag = numaCreate(npts); - nab = numaCreate(npts); - for (i = 0; i < npts; i++) { - ptaGetIPt(pta, i, &x, &y); - if (x < 0 || x >= w) - continue; - if (y < 0 || y >= h) - continue; - pixGetPixel(pixt, x, y, &val); - rval = GET_DATA_BYTE(&val, COLOR_RED); - gval = GET_DATA_BYTE(&val, COLOR_GREEN); - bval = GET_DATA_BYTE(&val, COLOR_BLUE); - numaAddNumber(nar, rval); - numaAddNumber(nag, gval); - numaAddNumber(nab, bval); - } - - snprintf(buffer, sizeof(buffer), "/tmp/lept/plot/%03d", count++); - rtitle = stringJoin("Red: ", title); - gplotSimple1(nar, outformat, buffer, rtitle); - snprintf(buffer, sizeof(buffer), "/tmp/lept/plot/%03d", count++); - gtitle = stringJoin("Green: ", title); - gplotSimple1(nag, outformat, buffer, gtitle); - snprintf(buffer, sizeof(buffer), "/tmp/lept/plot/%03d", count++); - btitle = stringJoin("Blue: ", title); - gplotSimple1(nab, outformat, buffer, btitle); - numaDestroy(&nar); - numaDestroy(&nag); - numaDestroy(&nab); - LEPT_FREE(rtitle); - LEPT_FREE(gtitle); - LEPT_FREE(btitle); - } else { - na = numaCreate(npts); - for (i = 0; i < npts; i++) { - ptaGetIPt(pta, i, &x, &y); - if (x < 0 || x >= w) - continue; - if (y < 0 || y >= h) - continue; - pixGetPixel(pixt, x, y, &val); - numaAddNumber(na, (l_float32)val); - } - - snprintf(buffer, sizeof(buffer), "/tmp/lept/plot/%03d", count++); - gplotSimple1(na, outformat, buffer, title); - numaDestroy(&na); - } - pixDestroy(&pixt); - return 0; -} - - -/*! - * \brief ptaGetPixelsFromPix() - * - * \param[in] pixs 1 bpp - * \param[in] box [optional] can be null - * \return pta, or NULL on error - * - *
- * Notes:
- *      (1) Generates a pta of fg pixels in the pix, within the box.
- *          If box == NULL, it uses the entire pix.
- * 
- */ -PTA * -ptaGetPixelsFromPix(PIX *pixs, - BOX *box) -{ -l_int32 i, j, w, h, wpl, xstart, xend, ystart, yend, bw, bh; -l_uint32 *data, *line; -PTA *pta; - - PROCNAME("ptaGetPixelsFromPix"); - - if (!pixs || (pixGetDepth(pixs) != 1)) - return (PTA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - xstart = ystart = 0; - xend = w - 1; - yend = h - 1; - if (box) { - boxGetGeometry(box, &xstart, &ystart, &bw, &bh); - xend = xstart + bw - 1; - yend = ystart + bh - 1; - } - - if ((pta = ptaCreate(0)) == NULL) - return (PTA *)ERROR_PTR("pta not made", procName, NULL); - for (i = ystart; i <= yend; i++) { - line = data + i * wpl; - for (j = xstart; j <= xend; j++) { - if (GET_DATA_BIT(line, j)) - ptaAddPt(pta, j, i); - } - } - - return pta; -} - - -/*! - * \brief pixGenerateFromPta() - * - * \param[in] pta - * \param[in] w, h of pix - * \return pix 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) Points are rounded to nearest ints.
- *      (2) Any points outside (w,h) are silently discarded.
- *      (3) Output 1 bpp pix has values 1 for each point in the pta.
- * 
- */ -PIX * -pixGenerateFromPta(PTA *pta, - l_int32 w, - l_int32 h) -{ -l_int32 n, i, x, y; -PIX *pix; - - PROCNAME("pixGenerateFromPta"); - - if (!pta) - return (PIX *)ERROR_PTR("pta not defined", procName, NULL); - - if ((pix = pixCreate(w, h, 1)) == NULL) - return (PIX *)ERROR_PTR("pix not made", procName, NULL); - n = ptaGetCount(pta); - for (i = 0; i < n; i++) { - ptaGetIPt(pta, i, &x, &y); - if (x < 0 || x >= w || y < 0 || y >= h) - continue; - pixSetPixel(pix, x, y, 1); - } - - return pix; -} - - -/*! - * \brief ptaGetBoundaryPixels() - * - * \param[in] pixs 1 bpp - * \param[in] type L_BOUNDARY_FG, L_BOUNDARY_BG - * \return pta, or NULL on error - * - *
- * Notes:
- *      (1) This generates a pta of either fg or bg boundary pixels.
- *      (2) See also pixGeneratePtaBoundary() for rendering of
- *          fg boundary pixels.
- * 
- */ -PTA * -ptaGetBoundaryPixels(PIX *pixs, - l_int32 type) -{ -PIX *pixt; -PTA *pta; - - PROCNAME("ptaGetBoundaryPixels"); - - if (!pixs || (pixGetDepth(pixs) != 1)) - return (PTA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (type != L_BOUNDARY_FG && type != L_BOUNDARY_BG) - return (PTA *)ERROR_PTR("invalid type", procName, NULL); - - if (type == L_BOUNDARY_FG) - pixt = pixMorphSequence(pixs, "e3.3", 0); - else - pixt = pixMorphSequence(pixs, "d3.3", 0); - pixXor(pixt, pixt, pixs); - pta = ptaGetPixelsFromPix(pixt, NULL); - - pixDestroy(&pixt); - return pta; -} - - -/*! - * \brief ptaaGetBoundaryPixels() - * - * \param[in] pixs 1 bpp - * \param[in] type L_BOUNDARY_FG, L_BOUNDARY_BG - * \param[in] connectivity 4 or 8 - * \param[out] pboxa [optional] bounding boxes of the c.c. - * \param[out] ppixa [optional] pixa of the c.c. - * \return ptaa, or NULL on error - * - *
- * Notes:
- *      (1) This generates a ptaa of either fg or bg boundary pixels,
- *          where each pta has the boundary pixels for a connected
- *          component.
- *      (2) We can't simply find all the boundary pixels and then select
- *          those within the bounding box of each component, because
- *          bounding boxes can overlap.  It is necessary to extract and
- *          dilate or erode each component separately.  Note also that
- *          special handling is required for bg pixels when the
- *          component touches the pix boundary.
- * 
- */ -PTAA * -ptaaGetBoundaryPixels(PIX *pixs, - l_int32 type, - l_int32 connectivity, - BOXA **pboxa, - PIXA **ppixa) -{ -l_int32 i, n, w, h, x, y, bw, bh, left, right, top, bot; -BOXA *boxa; -PIX *pixt1, *pixt2; -PIXA *pixa; -PTA *pta1, *pta2; -PTAA *ptaa; - - PROCNAME("ptaaGetBoundaryPixels"); - - if (pboxa) *pboxa = NULL; - if (ppixa) *ppixa = NULL; - if (!pixs || (pixGetDepth(pixs) != 1)) - return (PTAA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (type != L_BOUNDARY_FG && type != L_BOUNDARY_BG) - return (PTAA *)ERROR_PTR("invalid type", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PTAA *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - boxa = pixConnComp(pixs, &pixa, connectivity); - n = boxaGetCount(boxa); - ptaa = ptaaCreate(0); - for (i = 0; i < n; i++) { - pixt1 = pixaGetPix(pixa, i, L_CLONE); - boxaGetBoxGeometry(boxa, i, &x, &y, &bw, &bh); - left = right = top = bot = 0; - if (type == L_BOUNDARY_BG) { - if (x > 0) left = 1; - if (y > 0) top = 1; - if (x + bw < w) right = 1; - if (y + bh < h) bot = 1; - pixt2 = pixAddBorderGeneral(pixt1, left, right, top, bot, 0); - } else { - pixt2 = pixClone(pixt1); - } - pta1 = ptaGetBoundaryPixels(pixt2, type); - pta2 = ptaTransform(pta1, x - left, y - top, 1.0, 1.0); - ptaaAddPta(ptaa, pta2, L_INSERT); - ptaDestroy(&pta1); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - } - - if (pboxa) - *pboxa = boxa; - else - boxaDestroy(&boxa); - if (ppixa) - *ppixa = pixa; - else - pixaDestroy(&pixa); - return ptaa; -} - - -/*! - * \brief ptaaIndexLabeledPixels() - * - * \param[in] pixs 32 bpp, of indices of c.c. - * \param[out] pncc [optional] number of connected components - * \return ptaa, or NULL on error - * - *
- * Notes:
- *      (1) The pixel values in %pixs are the index of the connected component
- *          to which the pixel belongs; %pixs is typically generated from
- *          a 1 bpp pix by pixConnCompTransform().  Background pixels in
- *          the generating 1 bpp pix are represented in %pixs by 0.
- *          We do not check that the pixel values are correctly labelled.
- *      (2) Each pta in the returned ptaa gives the pixel locations
- *          correspnding to a connected component, with the label of each
- *          given by the index of the pta into the ptaa.
- *      (3) Initialize with the first pta in ptaa being empty and
- *          representing the background value (index 0) in the pix.
- * 
- */ -PTAA * -ptaaIndexLabeledPixels(PIX *pixs, - l_int32 *pncc) -{ -l_int32 wpl, index, i, j, w, h; -l_uint32 maxval; -l_uint32 *data, *line; -PTA *pta; -PTAA *ptaa; - - PROCNAME("ptaaIndexLabeledPixels"); - - if (pncc) *pncc = 0; - if (!pixs || (pixGetDepth(pixs) != 32)) - return (PTAA *)ERROR_PTR("pixs undef or not 32 bpp", procName, NULL); - - /* The number of c.c. is the maximum pixel value. Use this to - * initialize ptaa with sufficient pta arrays */ - pixGetMaxValueInRect(pixs, NULL, &maxval, NULL, NULL); - if (pncc) *pncc = maxval; - pta = ptaCreate(1); - ptaa = ptaaCreate(maxval + 1); - ptaaInitFull(ptaa, pta); - ptaDestroy(&pta); - - /* Sweep over %pixs, saving the pixel coordinates of each pixel - * with nonzero value in the appropriate pta, indexed by that value. */ - pixGetDimensions(pixs, &w, &h, NULL); - data = pixGetData(pixs); - wpl = pixGetWpl(pixs); - for (i = 0; i < h; i++) { - line = data + wpl * i; - for (j = 0; j < w; j++) { - index = line[j]; - if (index > 0) - ptaaAddPt(ptaa, index, j, i); - } - } - - return ptaa; -} - - -/*! - * \brief ptaGetNeighborPixLocs() - * - * \param[in] pixs any depth - * \param[in] x, y pixel from which we search for nearest neighbors - * \param[in] conn 4 or 8 connectivity - * \return pta, or NULL on error - * - *
- * Notes:
- *      (1) Generates a pta of all valid neighbor pixel locations,
- *          or NULL on error.
- * 
- */ -PTA * -ptaGetNeighborPixLocs(PIX *pixs, - l_int32 x, - l_int32 y, - l_int32 conn) -{ -l_int32 w, h; -PTA *pta; - - PROCNAME("ptaGetNeighborPixLocs"); - - if (!pixs) - return (PTA *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - if (x < 0 || x >= w || y < 0 || y >= h) - return (PTA *)ERROR_PTR("(x,y) not in pixs", procName, NULL); - if (conn != 4 && conn != 8) - return (PTA *)ERROR_PTR("conn not 4 or 8", procName, NULL); - - pta = ptaCreate(conn); - if (x > 0) - ptaAddPt(pta, x - 1, y); - if (x < w - 1) - ptaAddPt(pta, x + 1, y); - if (y > 0) - ptaAddPt(pta, x, y - 1); - if (y < h - 1) - ptaAddPt(pta, x, y + 1); - if (conn == 8) { - if (x > 0) { - if (y > 0) - ptaAddPt(pta, x - 1, y - 1); - if (y < h - 1) - ptaAddPt(pta, x - 1, y + 1); - } - if (x < w - 1) { - if (y > 0) - ptaAddPt(pta, x + 1, y - 1); - if (y < h - 1) - ptaAddPt(pta, x + 1, y + 1); - } - } - - return pta; -} - - -/*---------------------------------------------------------------------* - * Interconversion with Numa * - *---------------------------------------------------------------------*/ -/*! - * \brief numaConvertToPta1() - * - * \param[in] na numa with implicit y(x) - * \return pta if OK; null on error - */ -PTA * -numaConvertToPta1(NUMA *na) -{ -l_int32 i, n; -l_float32 startx, delx, val; -PTA *pta; - - PROCNAME("numaConvertToPta1"); - - if (!na) - return (PTA *)ERROR_PTR("na not defined", procName, NULL); - - n = numaGetCount(na); - pta = ptaCreate(n); - numaGetParameters(na, &startx, &delx); - for (i = 0; i < n; i++) { - numaGetFValue(na, i, &val); - ptaAddPt(pta, startx + i * delx, val); - } - return pta; -} - - -/*! - * \brief numaConvertToPta2() - * - * \param[in] nax - * \param[in] nay - * \return pta if OK; null on error - */ -PTA * -numaConvertToPta2(NUMA *nax, - NUMA *nay) -{ -l_int32 i, n, nx, ny; -l_float32 valx, valy; -PTA *pta; - - PROCNAME("numaConvertToPta2"); - - if (!nax || !nay) - return (PTA *)ERROR_PTR("nax and nay not both defined", procName, NULL); - - nx = numaGetCount(nax); - ny = numaGetCount(nay); - n = L_MIN(nx, ny); - if (nx != ny) - L_WARNING("nx = %d does not equal ny = %d\n", procName, nx, ny); - pta = ptaCreate(n); - for (i = 0; i < n; i++) { - numaGetFValue(nax, i, &valx); - numaGetFValue(nay, i, &valy); - ptaAddPt(pta, valx, valy); - } - return pta; -} - - -/*! - * \brief ptaConvertToNuma() - * - * \param[in] pta - * \param[out] pnax addr of nax - * \param[out] pnay addr of nay - * \return 0 if OK, 1 on error - */ -l_ok -ptaConvertToNuma(PTA *pta, - NUMA **pnax, - NUMA **pnay) -{ -l_int32 i, n; -l_float32 valx, valy; - - PROCNAME("ptaConvertToNuma"); - - if (pnax) *pnax = NULL; - if (pnay) *pnay = NULL; - if (!pnax || !pnay) - return ERROR_INT("&nax and &nay not both defined", procName, 1); - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - - n = ptaGetCount(pta); - *pnax = numaCreate(n); - *pnay = numaCreate(n); - for (i = 0; i < n; i++) { - ptaGetPt(pta, i, &valx, &valy); - numaAddNumber(*pnax, valx); - numaAddNumber(*pnay, valy); - } - return 0; -} - - -/*---------------------------------------------------------------------* - * Display Pta and Ptaa * - *---------------------------------------------------------------------*/ -/*! - * \brief pixDisplayPta() - * - * \param[in] pixd can be same as pixs or NULL; 32 bpp if in-place - * \param[in] pixs 1, 2, 4, 8, 16 or 32 bpp - * \param[in] pta of path to be plotted - * \return pixd 32 bpp RGB version of pixs, with path in green. - * - *
- * Notes:
- *      (1) To write on an existing pixs, pixs must be 32 bpp and
- *          call with pixd == pixs:
- *             pixDisplayPta(pixs, pixs, pta);
- *          To write to a new pix, use pixd == NULL and call:
- *             pixd = pixDisplayPta(NULL, pixs, pta);
- *      (2) On error, returns pixd to avoid losing pixs if called as
- *             pixs = pixDisplayPta(pixs, pixs, pta);
- * 
- */ -PIX * -pixDisplayPta(PIX *pixd, - PIX *pixs, - PTA *pta) -{ -l_int32 i, n, w, h, x, y; -l_uint32 rpixel, gpixel, bpixel; - - PROCNAME("pixDisplayPta"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (!pta) - return (PIX *)ERROR_PTR("pta not defined", procName, pixd); - if (pixd && (pixd != pixs || pixGetDepth(pixd) != 32)) - return (PIX *)ERROR_PTR("invalid pixd", procName, pixd); - - if (!pixd) - pixd = pixConvertTo32(pixs); - pixGetDimensions(pixd, &w, &h, NULL); - composeRGBPixel(255, 0, 0, &rpixel); /* start point */ - composeRGBPixel(0, 255, 0, &gpixel); - composeRGBPixel(0, 0, 255, &bpixel); /* end point */ - - n = ptaGetCount(pta); - for (i = 0; i < n; i++) { - ptaGetIPt(pta, i, &x, &y); - if (x < 0 || x >= w || y < 0 || y >= h) - continue; - if (i == 0) - pixSetPixel(pixd, x, y, rpixel); - else if (i < n - 1) - pixSetPixel(pixd, x, y, gpixel); - else - pixSetPixel(pixd, x, y, bpixel); - } - - return pixd; -} - - -/*! - * \brief pixDisplayPtaaPattern() - * - * \param[in] pixd 32 bpp - * \param[in] pixs 1, 2, 4, 8, 16 or 32 bpp; 32 bpp if in place - * \param[in] ptaa giving locations at which the pattern is displayed - * \param[in] pixp 1 bpp pattern to be placed such that its reference - * point co-locates with each point in pta - * \param[in] cx, cy reference point in pattern - * \return pixd 32 bpp RGB version of pixs. - * - *
- * Notes:
- *      (1) To write on an existing pixs, pixs must be 32 bpp and
- *          call with pixd == pixs:
- *             pixDisplayPtaPattern(pixs, pixs, pta, ...);
- *          To write to a new pix, use pixd == NULL and call:
- *             pixd = pixDisplayPtaPattern(NULL, pixs, pta, ...);
- *      (2) Puts a random color on each pattern associated with a pta.
- *      (3) On error, returns pixd to avoid losing pixs if called as
- *             pixs = pixDisplayPtaPattern(pixs, pixs, pta, ...);
- *      (4) A typical pattern to be used is a circle, generated with
- *             generatePtaFilledCircle()
- * 
- */ -PIX * -pixDisplayPtaaPattern(PIX *pixd, - PIX *pixs, - PTAA *ptaa, - PIX *pixp, - l_int32 cx, - l_int32 cy) -{ -l_int32 i, n; -l_uint32 color; -PIXCMAP *cmap; -PTA *pta; - - PROCNAME("pixDisplayPtaaPattern"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (!ptaa) - return (PIX *)ERROR_PTR("ptaa not defined", procName, pixd); - if (pixd && (pixd != pixs || pixGetDepth(pixd) != 32)) - return (PIX *)ERROR_PTR("invalid pixd", procName, pixd); - if (!pixp) - return (PIX *)ERROR_PTR("pixp not defined", procName, pixd); - - if (!pixd) - pixd = pixConvertTo32(pixs); - - /* Use 256 random colors */ - cmap = pixcmapCreateRandom(8, 0, 0); - n = ptaaGetCount(ptaa); - for (i = 0; i < n; i++) { - pixcmapGetColor32(cmap, i % 256, &color); - pta = ptaaGetPta(ptaa, i, L_CLONE); - pixDisplayPtaPattern(pixd, pixd, pta, pixp, cx, cy, color); - ptaDestroy(&pta); - } - - pixcmapDestroy(&cmap); - return pixd; -} - - -/*! - * \brief pixDisplayPtaPattern() - * - * \param[in] pixd can be same as pixs or NULL; 32 bpp if in-place - * \param[in] pixs 1, 2, 4, 8, 16 or 32 bpp - * \param[in] pta giving locations at which the pattern is displayed - * \param[in] pixp 1 bpp pattern to be placed such that its reference - * point co-locates with each point in pta - * \param[in] cx, cy reference point in pattern - * \param[in] color in 0xrrggbb00 format - * \return pixd 32 bpp RGB version of pixs. - * - *
- * Notes:
- *      (1) To write on an existing pixs, pixs must be 32 bpp and
- *          call with pixd == pixs:
- *             pixDisplayPtaPattern(pixs, pixs, pta, ...);
- *          To write to a new pix, use pixd == NULL and call:
- *             pixd = pixDisplayPtaPattern(NULL, pixs, pta, ...);
- *      (2) On error, returns pixd to avoid losing pixs if called as
- *             pixs = pixDisplayPtaPattern(pixs, pixs, pta, ...);
- *      (3) A typical pattern to be used is a circle, generated with
- *             generatePtaFilledCircle()
- * 
- */ -PIX * -pixDisplayPtaPattern(PIX *pixd, - PIX *pixs, - PTA *pta, - PIX *pixp, - l_int32 cx, - l_int32 cy, - l_uint32 color) -{ -l_int32 i, n, w, h, x, y; -PTA *ptat; - - PROCNAME("pixDisplayPtaPattern"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (!pta) - return (PIX *)ERROR_PTR("pta not defined", procName, pixd); - if (pixd && (pixd != pixs || pixGetDepth(pixd) != 32)) - return (PIX *)ERROR_PTR("invalid pixd", procName, pixd); - if (!pixp) - return (PIX *)ERROR_PTR("pixp not defined", procName, pixd); - - if (!pixd) - pixd = pixConvertTo32(pixs); - pixGetDimensions(pixs, &w, &h, NULL); - ptat = ptaReplicatePattern(pta, pixp, NULL, cx, cy, w, h); - - n = ptaGetCount(ptat); - for (i = 0; i < n; i++) { - ptaGetIPt(ptat, i, &x, &y); - if (x < 0 || x >= w || y < 0 || y >= h) - continue; - pixSetPixel(pixd, x, y, color); - } - - ptaDestroy(&ptat); - return pixd; -} - - -/*! - * \brief ptaReplicatePattern() - * - * \param[in] ptas "sparse" input pta - * \param[in] pixp [optional] 1 bpp pattern, to be replicated - * in output pta - * \param[in] ptap [optional] set of pts, to be replicated in output pta - * \param[in] cx, cy reference point in pattern - * \param[in] w, h clipping sizes for output pta - * \return ptad with all points of replicated pattern, or NULL on error - * - *
- * Notes:
- *      (1) You can use either the image %pixp or the set of pts %ptap.
- *      (2) The pattern is placed with its reference point at each point
- *          in ptas, and all the fg pixels are colleced into ptad.
- *          For %pixp, this is equivalent to blitting pixp at each point
- *          in ptas, and then converting the resulting pix to a pta.
- * 
- */ -PTA * -ptaReplicatePattern(PTA *ptas, - PIX *pixp, - PTA *ptap, - l_int32 cx, - l_int32 cy, - l_int32 w, - l_int32 h) -{ -l_int32 i, j, n, np, x, y, xp, yp, xf, yf; -PTA *ptat, *ptad; - - PROCNAME("ptaReplicatePattern"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - if (!pixp && !ptap) - return (PTA *)ERROR_PTR("no pattern is defined", procName, NULL); - if (pixp && ptap) - L_WARNING("pixp and ptap defined; using ptap\n", procName); - - n = ptaGetCount(ptas); - ptad = ptaCreate(n); - if (ptap) - ptat = ptaClone(ptap); - else - ptat = ptaGetPixelsFromPix(pixp, NULL); - np = ptaGetCount(ptat); - for (i = 0; i < n; i++) { - ptaGetIPt(ptas, i, &x, &y); - for (j = 0; j < np; j++) { - ptaGetIPt(ptat, j, &xp, &yp); - xf = x - cx + xp; - yf = y - cy + yp; - if (xf >= 0 && xf < w && yf >= 0 && yf < h) - ptaAddPt(ptad, xf, yf); - } - } - - ptaDestroy(&ptat); - return ptad; -} - - -/*! - * \brief pixDisplayPtaa() - * - * \param[in] pixs 1, 2, 4, 8, 16 or 32 bpp - * \param[in] ptaa array of paths to be plotted - * \return pixd 32 bpp RGB version of pixs, with paths plotted - * in different colors, or NULL on error - */ -PIX * -pixDisplayPtaa(PIX *pixs, - PTAA *ptaa) -{ -l_int32 i, j, w, h, npta, npt, x, y, rv, gv, bv; -l_uint32 *pixela; -NUMA *na1, *na2, *na3; -PIX *pixd; -PTA *pta; - - PROCNAME("pixDisplayPtaa"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (!ptaa) - return (PIX *)ERROR_PTR("ptaa not defined", procName, NULL); - npta = ptaaGetCount(ptaa); - if (npta == 0) - return (PIX *)ERROR_PTR("no pta", procName, NULL); - - if ((pixd = pixConvertTo32(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixGetDimensions(pixd, &w, &h, NULL); - - /* Make a colormap for the paths */ - if ((pixela = (l_uint32 *)LEPT_CALLOC(npta, sizeof(l_uint32))) == NULL) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("calloc fail for pixela", procName, NULL); - } - na1 = numaPseudorandomSequence(256, 14657); - na2 = numaPseudorandomSequence(256, 34631); - na3 = numaPseudorandomSequence(256, 54617); - for (i = 0; i < npta; i++) { - numaGetIValue(na1, i % 256, &rv); - numaGetIValue(na2, i % 256, &gv); - numaGetIValue(na3, i % 256, &bv); - composeRGBPixel(rv, gv, bv, &pixela[i]); - } - numaDestroy(&na1); - numaDestroy(&na2); - numaDestroy(&na3); - - for (i = 0; i < npta; i++) { - pta = ptaaGetPta(ptaa, i, L_CLONE); - npt = ptaGetCount(pta); - for (j = 0; j < npt; j++) { - ptaGetIPt(pta, j, &x, &y); - if (x < 0 || x >= w || y < 0 || y >= h) - continue; - pixSetPixel(pixd, x, y, pixela[i]); - } - ptaDestroy(&pta); - } - - LEPT_FREE(pixela); - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ptafunc2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ptafunc2.c deleted file mode 100644 index 1949a6cf..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ptafunc2.c +++ /dev/null @@ -1,899 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file ptafunc2.c - *
- *
- *      --------------------------------------
- *      This file has these Pta utilities:
- *         - sorting
- *         - ordered set operations
- *         - hash map operations
- *      --------------------------------------
- *
- *      Sorting
- *           PTA        *ptaSort()
- *           l_int32     ptaGetSortIndex()
- *           PTA        *ptaSortByIndex()
- *           PTAA       *ptaaSortByIndex()
- *           l_int32     ptaGetRankValue()
- *           PTA        *ptaSort2d()
- *           l_int32     ptaEqual()
- *
- *      Set operations using aset (rbtree)
- *           PTA        *ptaUnionByAset()
- *           PTA        *ptaRemoveDupsByAset()
- *           PTA        *ptaIntersectionByAset()
- *           L_ASET     *l_asetCreateFromPta()
- *
- *      Set operations using hashing (dnahash)
- *           PTA        *ptaUnionByHash()
- *           l_int32     ptaRemoveDupsByHash()
- *           PTA        *ptaIntersectionByHash();
- *           l_int32     ptaFindPtByHash()
- *           L_DNAHASH  *l_dnaHashCreateFromPta()
- *
- *
- * We have two implementations of set operations on an array of points:
- *
- *   (1) Using an underlying tree (rbtree)
- *       This uses a good 64 bit hashing function for the key,
- *       that is not expected to have hash collisions (and we do
- *       not test for them).  The tree is built up of the hash
- *       values, and if the hash is found in the tree, it is
- *       assumed that the point has already been found.
- *
- *   (2) Using an underlying hashing of the keys (dnahash)
- *       This uses a fast 64 bit hashing function for the key,
- *       which is then hashed into a bucket (a dna in a dnaHash).
- *       Because hash collisions can occur, the index into the
- *       pta for the point that gave rise to that key is stored,
- *       and the dna (bucket) is traversed, using the stored indices
- *       to determine if that point had already been seen.
- *
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/*---------------------------------------------------------------------* - * Sorting * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaSort() - * - * \param[in] ptas - * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y - * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING - * \param[out] pnaindex [optional] index of sorted order into - * original array - * \return ptad sorted version of ptas, or NULL on error - */ -PTA * -ptaSort(PTA *ptas, - l_int32 sorttype, - l_int32 sortorder, - NUMA **pnaindex) -{ -PTA *ptad; -NUMA *naindex; - - PROCNAME("ptaSort"); - - if (pnaindex) *pnaindex = NULL; - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y) - return (PTA *)ERROR_PTR("invalid sort type", procName, NULL); - if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) - return (PTA *)ERROR_PTR("invalid sort order", procName, NULL); - - if (ptaGetSortIndex(ptas, sorttype, sortorder, &naindex) != 0) - return (PTA *)ERROR_PTR("naindex not made", procName, NULL); - - ptad = ptaSortByIndex(ptas, naindex); - if (pnaindex) - *pnaindex = naindex; - else - numaDestroy(&naindex); - if (!ptad) - return (PTA *)ERROR_PTR("ptad not made", procName, NULL); - return ptad; -} - - -/*! - * \brief ptaGetSortIndex() - * - * \param[in] ptas - * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y - * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING - * \param[out] pnaindex index of sorted order into original array - * \return 0 if OK, 1 on error - */ -l_ok -ptaGetSortIndex(PTA *ptas, - l_int32 sorttype, - l_int32 sortorder, - NUMA **pnaindex) -{ -l_int32 i, n; -l_float32 x, y; -NUMA *na, *nai; - - PROCNAME("ptaGetSortIndex"); - - if (!pnaindex) - return ERROR_INT("&naindex not defined", procName, 1); - *pnaindex = NULL; - if (!ptas) - return ERROR_INT("ptas not defined", procName, 1); - if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y) - return ERROR_INT("invalid sort type", procName, 1); - if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) - return ERROR_INT("invalid sort order", procName, 1); - - /* Build up numa of specific data */ - n = ptaGetCount(ptas); - if ((na = numaCreate(n)) == NULL) - return ERROR_INT("na not made", procName, 1); - for (i = 0; i < n; i++) { - ptaGetPt(ptas, i, &x, &y); - if (sorttype == L_SORT_BY_X) - numaAddNumber(na, x); - else - numaAddNumber(na, y); - } - - /* Get the sort index for data array */ - nai = numaGetSortIndex(na, sortorder); - numaDestroy(&na); - if (!nai) - return ERROR_INT("naindex not made", procName, 1); - *pnaindex = nai; - return 0; -} - - -/*! - * \brief ptaSortByIndex() - * - * \param[in] ptas - * \param[in] naindex na that maps from the new pta to the input pta - * \return ptad sorted, or NULL on error - */ -PTA * -ptaSortByIndex(PTA *ptas, - NUMA *naindex) -{ -l_int32 i, index, n; -l_float32 x, y; -PTA *ptad; - - PROCNAME("ptaSortByIndex"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - if (!naindex) - return (PTA *)ERROR_PTR("naindex not defined", procName, NULL); - - /* Build up sorted pta using sort index */ - n = numaGetCount(naindex); - if ((ptad = ptaCreate(n)) == NULL) - return (PTA *)ERROR_PTR("ptad not made", procName, NULL); - for (i = 0; i < n; i++) { - numaGetIValue(naindex, i, &index); - ptaGetPt(ptas, index, &x, &y); - ptaAddPt(ptad, x, y); - } - - return ptad; -} - - -/*! - * \brief ptaaSortByIndex() - * - * \param[in] ptaas - * \param[in] naindex na that maps from the new ptaa to the input ptaa - * \return ptaad sorted, or NULL on error - */ -PTAA * -ptaaSortByIndex(PTAA *ptaas, - NUMA *naindex) -{ -l_int32 i, n, index; -PTA *pta; -PTAA *ptaad; - - PROCNAME("ptaaSortByIndex"); - - if (!ptaas) - return (PTAA *)ERROR_PTR("ptaas not defined", procName, NULL); - if (!naindex) - return (PTAA *)ERROR_PTR("naindex not defined", procName, NULL); - - n = ptaaGetCount(ptaas); - if (numaGetCount(naindex) != n) - return (PTAA *)ERROR_PTR("numa and ptaa sizes differ", procName, NULL); - ptaad = ptaaCreate(n); - for (i = 0; i < n; i++) { - numaGetIValue(naindex, i, &index); - pta = ptaaGetPta(ptaas, index, L_COPY); - ptaaAddPta(ptaad, pta, L_INSERT); - } - - return ptaad; -} - - -/*! - * \brief ptaGetRankValue() - * - * \param[in] pta - * \param[in] fract use 0.0 for smallest, 1.0 for largest - * \param[in] ptasort [optional] version of %pta sorted by %sorttype - * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y - * \param[out] pval rankval: the x or y value at %fract - * \return 0 if OK, 1 on error - */ -l_ok -ptaGetRankValue(PTA *pta, - l_float32 fract, - PTA *ptasort, - l_int32 sorttype, - l_float32 *pval) -{ -l_int32 index, n; -PTA *ptas; - - PROCNAME("ptaGetRankValue"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0.0; - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y) - return ERROR_INT("invalid sort type", procName, 1); - if (fract < 0.0 || fract > 1.0) - return ERROR_INT("fract not in [0.0 ... 1.0]", procName, 1); - if ((n = ptaGetCount(pta)) == 0) - return ERROR_INT("pta empty", procName, 1); - - if (ptasort) - ptas = ptasort; - else - ptas = ptaSort(pta, sorttype, L_SORT_INCREASING, NULL); - - index = (l_int32)(fract * (l_float32)(n - 1) + 0.5); - if (sorttype == L_SORT_BY_X) - ptaGetPt(ptas, index, pval, NULL); - else /* sort by y */ - ptaGetPt(ptas, index, NULL, pval); - - if (!ptasort) ptaDestroy(&ptas); - return 0; -} - - -/*! - * \brief ptaSort2d() - * - * \param[in] ptas - * \return ptad, or NULL on error - * - *
- * Notes:
- *      (1) Sort increasing by row-major, scanning down from the UL corner,
- *          where for each value of y, order the pts from left to right.
- * 
- */ -PTA * -ptaSort2d(PTA *pta) -{ -l_int32 index, i, j, n, nx, ny, start, end; -l_float32 x, y, yp, val; -NUMA *na1, *na2, *nas, *nax; -PTA *pta1, *ptad; - - PROCNAME("ptaSort2d"); - - if (!pta) - return (PTA *)ERROR_PTR("pta not defined", procName, NULL); - - /* Sort by row-major (y first, then x). After sort by y, - * the x values at the same y are not sorted. */ - pta1 = ptaSort(pta, L_SORT_BY_Y, L_SORT_INCREASING, NULL); - - /* Find start and ending indices with the same y value */ - n = ptaGetCount(pta1); - na1 = numaCreate(0); /* holds start index of sequence with same y */ - na2 = numaCreate(0); /* holds end index of sequence with same y */ - numaAddNumber(na1, 0); - ptaGetPt(pta1, 0, &x, &yp); - for (i = 1; i < n; i++) { - ptaGetPt(pta1, i, &x, &y); - if (y != yp) { - numaAddNumber(na1, i); - numaAddNumber(na2, i - 1); - } - yp = y; - } - numaAddNumber(na2, n - 1); - - /* Sort by increasing x each set with the same y value */ - ptad = ptaCreate(n); - ny = numaGetCount(na1); /* number of distinct y values */ - for (i = 0, index = 0; i < ny; i++) { - numaGetIValue(na1, i, &start); - numaGetIValue(na2, i, &end); - nx = end - start + 1; /* number of points with current y value */ - if (nx == 1) { - ptaGetPt(pta1, index++, &x, &y); - ptaAddPt(ptad, x, y); - } else { - /* More than 1 point; extract and sort the x values */ - nax = numaCreate(nx); - for (j = 0; j < nx; j++) { - ptaGetPt(pta1, index + j, &x, &y); - numaAddNumber(nax, x); - } - nas = numaSort(NULL, nax, L_SORT_INCREASING); - /* Add the points with x sorted */ - for (j = 0; j < nx; j++) { - numaGetFValue(nas, j, &val); - ptaAddPt(ptad, val, y); - } - index += nx; - numaDestroy(&nax); - numaDestroy(&nas); - } - } - numaDestroy(&na1); - numaDestroy(&na2); - ptaDestroy(&pta1); - return ptad; -} - - -/*! - * \brief ptaEqual() - * - * \param[in] pta1 - * \param[in] pta2 - * \param[out] psame 1 if same; 0 if different - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Equality is defined as having the same set of points,
- *          independent of the order in which they are presented.
- * 
- */ -l_ok -ptaEqual(PTA *pta1, - PTA *pta2, - l_int32 *psame) -{ -l_int32 i, n1, n2; -l_float32 x1, y1, x2, y2; -PTA *ptas1, *ptas2; - - PROCNAME("ptaEqual"); - - if (!psame) - return ERROR_INT("&same not defined", procName, 1); - *psame = 0.0; - if (!pta1 || !pta2) - return ERROR_INT("pta1 and pta2 not both defined", procName, 1); - - n1 = ptaGetCount(pta1); - n2 = ptaGetCount(pta2); - if (n1 != n2) return 0; - - /* 2d sort each and compare */ - ptas1 = ptaSort2d(pta1); - ptas2 = ptaSort2d(pta2); - for (i = 0; i < n1; i++) { - ptaGetPt(ptas1, i, &x1, &y1); - ptaGetPt(ptas2, i, &x2, &y2); - if (x1 != x2 || y1 != y2) { - ptaDestroy(&ptas1); - ptaDestroy(&ptas2); - return 0; - } - } - - *psame = 1; - ptaDestroy(&ptas1); - ptaDestroy(&ptas2); - return 0; -} - - - - -/*---------------------------------------------------------------------* - * Set operations using aset (rbtree) * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaUnionByAset() - * - * \param[in] pta1, pta2 - * \return ptad with the union of the set of points, or NULL on error - * - *
- * Notes:
- *      (1) See sarrayRemoveDupsByAset() for the approach.
- *      (2) The key is a 64-bit hash from the (x,y) pair.
- *      (3) This is slower than ptaUnionByHash(), mostly because of the
- *          nlogn sort to build up the rbtree.  Do not use for large
- *          numbers of points (say, > 1M).
- *      (4) The *Aset() functions use the sorted l_Aset, which is just
- *          an rbtree in disguise.
- * 
- */ -PTA * -ptaUnionByAset(PTA *pta1, - PTA *pta2) -{ -PTA *pta3, *ptad; - - PROCNAME("ptaUnionByAset"); - - if (!pta1) - return (PTA *)ERROR_PTR("pta1 not defined", procName, NULL); - if (!pta2) - return (PTA *)ERROR_PTR("pta2 not defined", procName, NULL); - - /* Join */ - pta3 = ptaCopy(pta1); - ptaJoin(pta3, pta2, 0, -1); - - /* Eliminate duplicates */ - ptad = ptaRemoveDupsByAset(pta3); - ptaDestroy(&pta3); - return ptad; -} - - -/*! - * \brief ptaRemoveDupsByAset() - * - * \param[in] ptas assumed to be integer values - * \return ptad with duplicates removed, or NULL on error - * - *
- * Notes:
- *      (1) This is slower than ptaRemoveDupsByHash(), mostly because
- *          of the nlogn sort to build up the rbtree.  Do not use for
- *          large numbers of points (say, > 1M).
- * 
- */ -PTA * -ptaRemoveDupsByAset(PTA *ptas) -{ -l_int32 i, n, x, y; -PTA *ptad; -l_uint64 hash; -L_ASET *set; -RB_TYPE key; - - PROCNAME("ptaRemoveDupsByAset"); - - if (!ptas) - return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); - - set = l_asetCreate(L_UINT_TYPE); - n = ptaGetCount(ptas); - ptad = ptaCreate(n); - for (i = 0; i < n; i++) { - ptaGetIPt(ptas, i, &x, &y); - l_hashPtToUint64(x, y, &hash); - key.utype = hash; - if (!l_asetFind(set, key)) { - ptaAddPt(ptad, x, y); - l_asetInsert(set, key); - } - } - - l_asetDestroy(&set); - return ptad; -} - - -/*! - * \brief ptaIntersectionByAset() - * - * \param[in] pta1, pta2 - * \return ptad intersection of the point sets, or NULL on error - * - *
- * Notes:
- *      (1) See sarrayIntersectionByAset() for the approach.
- *      (2) The key is a 64-bit hash from the (x,y) pair.
- *      (3) This is slower than ptaIntersectionByHash(), mostly because
- *          of the nlogn sort to build up the rbtree.  Do not use for
- *          large numbers of points (say, > 1M).
- * 
- */ -PTA * -ptaIntersectionByAset(PTA *pta1, - PTA *pta2) -{ -l_int32 n1, n2, i, n, x, y; -l_uint64 hash; -L_ASET *set1, *set2; -RB_TYPE key; -PTA *pta_small, *pta_big, *ptad; - - PROCNAME("ptaIntersectionByAset"); - - if (!pta1) - return (PTA *)ERROR_PTR("pta1 not defined", procName, NULL); - if (!pta2) - return (PTA *)ERROR_PTR("pta2 not defined", procName, NULL); - - /* Put the elements of the biggest array into a set */ - n1 = ptaGetCount(pta1); - n2 = ptaGetCount(pta2); - pta_small = (n1 < n2) ? pta1 : pta2; /* do not destroy pta_small */ - pta_big = (n1 < n2) ? pta2 : pta1; /* do not destroy pta_big */ - set1 = l_asetCreateFromPta(pta_big); - - /* Build up the intersection of points */ - ptad = ptaCreate(0); - n = ptaGetCount(pta_small); - set2 = l_asetCreate(L_UINT_TYPE); - for (i = 0; i < n; i++) { - ptaGetIPt(pta_small, i, &x, &y); - l_hashPtToUint64(x, y, &hash); - key.utype = hash; - if (l_asetFind(set1, key) && !l_asetFind(set2, key)) { - ptaAddPt(ptad, x, y); - l_asetInsert(set2, key); - } - } - - l_asetDestroy(&set1); - l_asetDestroy(&set2); - return ptad; -} - - -/*! - * \brief l_asetCreateFromPta() - * - * \param[in] pta - * \return set using a 64-bit hash of (x,y) as the key - */ -L_ASET * -l_asetCreateFromPta(PTA *pta) -{ -l_int32 i, n, x, y; -l_uint64 hash; -L_ASET *set; -RB_TYPE key; - - PROCNAME("l_asetCreateFromPta"); - - if (!pta) - return (L_ASET *)ERROR_PTR("pta not defined", procName, NULL); - - set = l_asetCreate(L_UINT_TYPE); - n = ptaGetCount(pta); - for (i = 0; i < n; i++) { - ptaGetIPt(pta, i, &x, &y); - l_hashPtToUint64(x, y, &hash); - key.utype = hash; - l_asetInsert(set, key); - } - - return set; -} - - -/*---------------------------------------------------------------------* - * Set operations using hashing (rbtree) * - *---------------------------------------------------------------------*/ -/*! - * \brief ptaUnionByHash() - * - * \param[in] pta1, pta2 - * \return ptad with the union of the set of points, or NULL on error - * - *
- * Notes:
- *      (1) This is faster than ptaUnionByAset(), because the
- *          bucket lookup is O(n).  It should be used if the pts are
- *          integers (e.g., representing pixel positions).
- * 
- */ -PTA * -ptaUnionByHash(PTA *pta1, - PTA *pta2) -{ -PTA *pta3, *ptad; - - PROCNAME("ptaUnionByHash"); - - if (!pta1) - return (PTA *)ERROR_PTR("pta1 not defined", procName, NULL); - if (!pta2) - return (PTA *)ERROR_PTR("pta2 not defined", procName, NULL); - - /* Join */ - pta3 = ptaCopy(pta1); - ptaJoin(pta3, pta2, 0, -1); - - /* Eliminate duplicates */ - ptaRemoveDupsByHash(pta3, &ptad, NULL); - ptaDestroy(&pta3); - return ptad; -} - - -/*! - * \brief ptaRemoveDupsByHash() - * - * \param[in] ptas assumed to be integer values - * \param[out] pptad unique set of pts; duplicates removed - * \param[out] pdahash [optional] dnahash used for lookup - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Generates a pta with unique values.
- *      (2) The dnahash is built up with ptad to assure uniqueness.
- *          It can be used to find if a point is in the set:
- *              ptaFindPtByHash(ptad, dahash, x, y, &index)
- *      (3) The hash of the (x,y) location is simple and fast.  It scales
- *          up with the number of buckets to insure a fairly random
- *          bucket selection for adjacent points.
- *      (4) A Dna is used rather than a Numa because we need accurate
- *          representation of 32-bit integers that are indices into ptas.
- *          Integer --> float --> integer conversion makes errors for
- *          integers larger than 10M.
- *      (5) This is faster than ptaRemoveDupsByAset(), because the
- *          bucket lookup is O(n), although there is a double-loop
- *          lookup within the dna in each bucket.
- * 
- */ -l_ok -ptaRemoveDupsByHash(PTA *ptas, - PTA **pptad, - L_DNAHASH **pdahash) -{ -l_int32 i, n, index, items, x, y; -l_uint32 nsize; -l_uint64 key; -PTA *ptad; -L_DNAHASH *dahash; - - PROCNAME("ptaRemoveDupsByHash"); - - if (pdahash) *pdahash = NULL; - if (!pptad) - return ERROR_INT("&ptad not defined", procName, 1); - *pptad = NULL; - if (!ptas) - return ERROR_INT("ptas not defined", procName, 1); - - n = ptaGetCount(ptas); - findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */ - dahash = l_dnaHashCreate(nsize, 8); - ptad = ptaCreate(n); - *pptad = ptad; - for (i = 0, items = 0; i < n; i++) { - ptaGetIPt(ptas, i, &x, &y); - ptaFindPtByHash(ptad, dahash, x, y, &index); - if (index < 0) { /* not found */ - l_hashPtToUint64(x, y, &key); - l_dnaHashAdd(dahash, key, (l_float64)items); - ptaAddPt(ptad, x, y); - items++; - } - } - - if (pdahash) - *pdahash = dahash; - else - l_dnaHashDestroy(&dahash); - return 0; -} - - -/*! - * \brief ptaIntersectionByHash() - * - * \param[in] pta1, pta2 - * \return ptad intersection of the point sets, or NULL on error - * - *
- * Notes:
- *      (1) This is faster than ptaIntersectionByAset(), because the
- *          bucket lookup is O(n).  It should be used if the pts are
- *          integers (e.g., representing pixel positions).
- * 
- */ -PTA * -ptaIntersectionByHash(PTA *pta1, - PTA *pta2) -{ -l_int32 n1, n2, nsmall, i, x, y, index1, index2; -l_uint32 nsize2; -l_uint64 key; -L_DNAHASH *dahash1, *dahash2; -PTA *pta_small, *pta_big, *ptad; - - PROCNAME("ptaIntersectionByHash"); - - if (!pta1) - return (PTA *)ERROR_PTR("pta1 not defined", procName, NULL); - if (!pta2) - return (PTA *)ERROR_PTR("pta2 not defined", procName, NULL); - - /* Put the elements of the biggest pta into a dnahash */ - n1 = ptaGetCount(pta1); - n2 = ptaGetCount(pta2); - pta_small = (n1 < n2) ? pta1 : pta2; /* do not destroy pta_small */ - pta_big = (n1 < n2) ? pta2 : pta1; /* do not destroy pta_big */ - dahash1 = l_dnaHashCreateFromPta(pta_big); - - /* Build up the intersection of points. Add to ptad - * if the point is in pta_big (using dahash1) but hasn't - * yet been seen in the traversal of pta_small (using dahash2). */ - ptad = ptaCreate(0); - nsmall = ptaGetCount(pta_small); - findNextLargerPrime(nsmall / 20, &nsize2); /* buckets in hash table */ - dahash2 = l_dnaHashCreate(nsize2, 0); - for (i = 0; i < nsmall; i++) { - ptaGetIPt(pta_small, i, &x, &y); - ptaFindPtByHash(pta_big, dahash1, x, y, &index1); - if (index1 >= 0) { /* found */ - ptaFindPtByHash(pta_small, dahash2, x, y, &index2); - if (index2 == -1) { /* not found */ - ptaAddPt(ptad, x, y); - l_hashPtToUint64(x, y, &key); - l_dnaHashAdd(dahash2, key, (l_float64)i); - } - } - } - - l_dnaHashDestroy(&dahash1); - l_dnaHashDestroy(&dahash2); - return ptad; -} - - -/*! - * \brief ptaFindPtByHash() - * - * \param[in] pta - * \param[in] dahash built from pta - * \param[in] x, y arbitrary points - * \param[out] pindex index into pta if (x,y) is in pta; -1 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Fast lookup in dnaHash associated with a pta, to see if a
- *          random point (x,y) is already stored in the hash table.
- *      (2) We use a strong hash function to minimize the chance that
- *          two different points hash to the same key value.
- *      (3) We select the number of buckets to be about 5% of the size
- *          of the input %pta, so that when fully populated, each
- *          bucket (dna) will have about 20 entries, each being an index
- *          into %pta.  In lookup, after hashing to the key, and then
- *          again to the bucket, we traverse the bucket (dna), using the
- *          index into %pta to check if the point (x,y) has been found before.
- * 
- */ -l_ok -ptaFindPtByHash(PTA *pta, - L_DNAHASH *dahash, - l_int32 x, - l_int32 y, - l_int32 *pindex) -{ -l_int32 i, nvals, index, xi, yi; -l_uint64 key; -L_DNA *da; - - PROCNAME("ptaFindPtByHash"); - - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - *pindex = -1; - if (!pta) - return ERROR_INT("pta not defined", procName, 1); - if (!dahash) - return ERROR_INT("dahash not defined", procName, 1); - - l_hashPtToUint64(x, y, &key); - da = l_dnaHashGetDna(dahash, key, L_NOCOPY); - if (!da) return 0; - - /* Run through the da, looking for this point */ - nvals = l_dnaGetCount(da); - for (i = 0; i < nvals; i++) { - l_dnaGetIValue(da, i, &index); - ptaGetIPt(pta, index, &xi, &yi); - if (x == xi && y == yi) { - *pindex = index; - return 0; - } - } - - return 0; -} - - -/*! - * \brief l_dnaHashCreateFromPta() - * - * \param[in] pta - * \return dahash, or NULL on error - */ -L_DNAHASH * -l_dnaHashCreateFromPta(PTA *pta) -{ -l_int32 i, n, x, y; -l_uint32 nsize; -l_uint64 key; -L_DNAHASH *dahash; - - PROCNAME("l_dnaHashCreateFromPta"); - - if (!pta) - return (L_DNAHASH *)ERROR_PTR("pta not defined", procName, NULL); - - /* Build up dnaHash of indices, hashed by a key that is - * a large linear combination of x and y values designed to - * randomize the key. Having about 20 pts in each bucket is - * roughly optimal for speed for large sets. */ - n = ptaGetCount(pta); - findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */ -/* lept_stderr("Prime used: %d\n", nsize); */ - - /* Add each point, using the hash as key and the index into - * %ptas as the value. Storing the index enables operations - * that check for duplicates. */ - dahash = l_dnaHashCreate(nsize, 8); - for (i = 0; i < n; i++) { - ptaGetIPt(pta, i, &x, &y); - l_hashPtToUint64(x, y, &key); - l_dnaHashAdd(dahash, key, (l_float64)i); - } - - return dahash; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ptra.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ptra.c deleted file mode 100644 index 5a04631e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ptra.c +++ /dev/null @@ -1,1009 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file ptra.c - *
- *
- *      Ptra creation and destruction
- *          L_PTRA      *ptraCreate()
- *          void        *ptraDestroy()
- *
- *      Add/insert/remove/replace generic ptr object
- *          l_int32      ptraAdd()
- *          static l_int32  ptraExtendArray()
- *          l_int32      ptraInsert()
- *          void        *ptraRemove()
- *          void        *ptraRemoveLast()
- *          void        *ptraReplace()
- *          l_int32      ptraSwap()
- *          l_int32      ptraCompactArray()
- *
- *      Other array operations
- *          l_int32      ptraReverse()
- *          l_int32      ptraJoin()
- *
- *      Simple Ptra accessors
- *          l_int32      ptraGetMaxIndex()
- *          l_int32      ptraGetActualCount()
- *          void        *ptraGetPtrToItem()
- *
- *      Ptraa creation and destruction
- *          L_PTRAA     *ptraaCreate()
- *          void        *ptraaDestroy()
- *
- *      Ptraa accessors
- *          l_int32      ptraaGetSize()
- *          l_int32      ptraaInsertPtra()
- *          L_PTRA      *ptraaGetPtra()
- *
- *      Ptraa conversion
- *          L_PTRA      *ptraaFlattenToPtra()
- *
- *    Notes on the Ptra:
- *
- *    (1) The Ptra is a struct, not an array.  Always use the accessors
- *        in this file, never the fields directly.
- *    (2) Items can be placed anywhere in the allocated ptr array,
- *        including one index beyond the last ptr (in which case the
- *        ptr array is realloc'd).
- *    (3) Thus, the items on the ptr array need not be compacted.  In
- *        general there will be null pointers in the ptr array.
- *    (4) A compacted array will remain compacted on removal if
- *        arbitrary items are removed with compaction, or if items
- *        are removed from the end of the array.
- *    (5) For addition to and removal from the end of the array, this
- *        functions exactly like a stack, and with the same O(1) cost.
- *    (6) This differs from the generic stack in that we allow
- *        random access for insertion, removal and replacement.
- *        Removal can be done without compacting the array.
- *        Insertion into a null ptr in the array has no effect on
- *        the other pointers, but insertion into a location already
- *        occupied by an item has a cost proportional to the
- *        distance to the next null ptr in the array.
- *    (7) Null ptrs are valid input args for both insertion and
- *        replacement; this allows arbitrary swapping.
- *    (8) The item in the array with the largest index is at pa->imax.
- *        This can be any value from -1 (initialized; all array ptrs
- *        are null) up to pa->nalloc - 1 (the last ptr in the array).
- *    (9) In referring to the array: the first ptr is the "top" or
- *        "beginning"; the last pointer is the "bottom" or "end";
- *        items are shifted "up" towards the top when compaction occurs;
- *        and items are shifted "down" towards the bottom when forced to
- *        move due to an insertion.
- *   (10) It should be emphasized that insertion, removal and replacement
- *        are general:
- *         * You can insert an item into any ptr location in the
- *           allocated ptr array, as well as into the next ptr address
- *           beyond the allocated array (in which case a realloc will occur).
- *         * You can remove or replace an item from any ptr location
- *           in the allocated ptr array.
- *         * When inserting into an occupied location, you have
- *           three options for downshifting.
- *         * When removing, you can either leave the ptr null or
- *           compact the array.
- *
- *    Notes on the Ptraa:
- *
- *    (1) The Ptraa is a fixed size ptr array for holding Ptra.
- *        In that respect, it is different from other pointer arrays, which
- *        are extensible and grow using the *Add*() functions.
- *    (2) In general, the Ptra ptrs in the Ptraa can be randomly occupied.
- *        A typical usage is to allow an O(n) horizontal sort of Pix,
- *        where the size of the Ptra array is the width of the image,
- *        and each Ptra is an array of all the Pix at a specific x location.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - - /* Bounds on initial array size */ -static const l_uint32 MaxPtrArraySize = 100000; -static const l_int32 InitialPtrArraySize = 20; /*!< n'importe quoi */ - - /* Static function */ -static l_int32 ptraExtendArray(L_PTRA *pa); - -/*--------------------------------------------------------------------------* - * Ptra creation and destruction * - *--------------------------------------------------------------------------*/ -/*! - * \brief ptraCreate() - * - * \param[in] n size of ptr array to be alloc'd; use 0 for default - * \return pa, or NULL on error - */ -L_PTRA * -ptraCreate(l_int32 n) -{ -L_PTRA *pa; - - PROCNAME("ptraCreate"); - - if (n <= 0 || n > MaxPtrArraySize) - n = InitialPtrArraySize; - - pa = (L_PTRA *)LEPT_CALLOC(1, sizeof(L_PTRA)); - if ((pa->array = (void **)LEPT_CALLOC(n, sizeof(void *))) == NULL) { - ptraDestroy(&pa, 0, 0); - return (L_PTRA *)ERROR_PTR("ptr array not made", procName, NULL); - } - pa->nalloc = n; - pa->imax = -1; - pa->nactual = 0; - return pa; -} - - -/*! - * \brief ptraDestroy() - * - * \param[in,out] ppa will be set to null before returning - * \param[in] freeflag TRUE to free each remaining item in the array - * \param[in] warnflag TRUE to warn if any remaining items - * are not destroyed - * \return void - * - *
- * Notes:
- *      (1) If %freeflag == TRUE, frees each item in the array.
- *      (2) If %freeflag == FALSE and %warnflag == TRUE, and there are
- *          items on the array, this gives a warning and destroys the array.
- *          If these items are not owned elsewhere, this will cause
- *          a memory leak of all the items that were on the array.
- *          So if the items are not owned elsewhere and require their
- *          own destroy function, they must be destroyed before the ptra.
- *      (3) If %warnflag == FALSE, no warnings will be issued.  This is
- *          useful if the items are owned elsewhere, such as a
- *          PixMemoryStore().
- *      (4) To destroy the ptra, we destroy the ptr array, then
- *          the ptra, and then null the contents of the input ptr.
- * 
- */ -void -ptraDestroy(L_PTRA **ppa, - l_int32 freeflag, - l_int32 warnflag) -{ -l_int32 i, nactual; -void *item; -L_PTRA *pa; - - PROCNAME("ptraDestroy"); - - if (ppa == NULL) { - L_WARNING("ptr address is NULL\n", procName); - return; - } - if ((pa = *ppa) == NULL) - return; - - ptraGetActualCount(pa, &nactual); - if (nactual > 0) { - if (freeflag) { - for (i = 0; i <= pa->imax; i++) { - if ((item = ptraRemove(pa, i, L_NO_COMPACTION)) != NULL) - LEPT_FREE(item); - } - } else if (warnflag) { - L_WARNING("potential memory leak of %d items in ptra\n", - procName, nactual); - } - } - - LEPT_FREE(pa->array); - LEPT_FREE(pa); - *ppa = NULL; - return; -} - - -/*--------------------------------------------------------------------------* - * Add/insert/remove/replace generic ptr object * - *--------------------------------------------------------------------------*/ -/*! - * \brief ptraAdd() - * - * \param[in] pa ptra - * \param[in] item generic ptr to a struct - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This adds the element to the next location beyond imax,
- *          which is the largest occupied ptr in the array.  This is
- *          what you expect from a stack, where all ptrs up to and
- *          including imax are occupied, but here the occuption of
- *          items in the array is entirely arbitrary.
- * 
- */ -l_ok -ptraAdd(L_PTRA *pa, - void *item) -{ -l_int32 imax; - - PROCNAME("ptraAdd"); - - if (!pa) - return ERROR_INT("pa not defined", procName, 1); - if (!item) - return ERROR_INT("item not defined", procName, 1); - - ptraGetMaxIndex(pa, &imax); - if (imax >= pa->nalloc - 1 && ptraExtendArray(pa)) - return ERROR_INT("extension failure", procName, 1); - pa->array[imax + 1] = (void *)item; - pa->imax++; - pa->nactual++; - return 0; -} - - -/*! - * \brief ptraExtendArray() - * - * \param[in] pa - * \return 0 if OK, 1 on error - */ -static l_int32 -ptraExtendArray(L_PTRA *pa) -{ - PROCNAME("ptraExtendArray"); - - if (!pa) - return ERROR_INT("pa not defined", procName, 1); - - if ((pa->array = (void **)reallocNew((void **)&pa->array, - sizeof(void *) * pa->nalloc, - 2 * sizeof(void *) * pa->nalloc)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - - pa->nalloc *= 2; - return 0; -} - - -/*! - * \brief ptraInsert() - * - * \param[in] pa ptra - * \param[in] index location in ptra to insert new value - * \param[in] item generic ptr to a struct; can be null - * \param[in] shiftflag L_AUTO_DOWNSHIFT, L_MIN_DOWNSHIFT, L_FULL_DOWNSHIFT - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This checks first to see if the location is valid, and
- *          then if there is presently an item there.  If there is not,
- *          it is simply inserted into that location.
- *      (2) If there is an item at the insert location, items must be
- *          moved down to make room for the insert.  In the downward
- *          shift there are three options, given by %shiftflag.
- *            ~ If %shiftflag == L_AUTO_DOWNSHIFT, a decision is made
- *              whether, in a cascade of items, to downshift a minimum
- *              amount or for all items above %index.  The decision is
- *              based on the expectation of finding holes (null ptrs)
- *              between %index and the bottom of the array.
- *              Assuming the holes are distributed uniformly, if 2 or more
- *              holes are expected, we do a minimum shift.
- *            ~ If %shiftflag == L_MIN_DOWNSHIFT, the downward shifting
- *              cascade of items progresses a minimum amount, until
- *              the first empty slot is reached.  This mode requires
- *              some computation before the actual shifting is done.
- *            ~ If %shiftflag == L_FULL_DOWNSHIFT, a shifting cascade is
- *              performed where pa[i] --> pa[i + 1] for all i >= index.
- *              Then, the item is inserted at pa[index].
- *      (3) If you are not using L_AUTO_DOWNSHIFT, the rule of thumb is
- *          to use L_FULL_DOWNSHIFT if the array is compacted (each
- *          element points to an item), and to use L_MIN_DOWNSHIFT
- *          if there are a significant number of null pointers.
- *          There is no penalty to using L_MIN_DOWNSHIFT for a
- *          compacted array, however, because the full shift is required
- *          and we don't do the O(n) computation to look for holes.
- *      (4) This should not be used repeatedly on large arrays,
- *          because the function is generally O(n).
- *      (5) However, it can be used repeatedly if we start with an empty
- *          ptr array and insert only once at each location.  For example,
- *          you can support an array of Numa, where at each ptr location
- *          you store either 0 or 1 Numa, and the Numa can be added
- *          randomly to the ptr array.
- * 
- */ -l_ok -ptraInsert(L_PTRA *pa, - l_int32 index, - void *item, - l_int32 shiftflag) -{ -l_int32 i, ihole, imax; -l_float32 nexpected; - - PROCNAME("ptraInsert"); - - if (!pa) - return ERROR_INT("pa not defined", procName, 1); - if (index < 0 || index > pa->nalloc) - return ERROR_INT("index not in [0 ... nalloc]", procName, 1); - if (shiftflag != L_AUTO_DOWNSHIFT && shiftflag != L_MIN_DOWNSHIFT && - shiftflag != L_FULL_DOWNSHIFT) - return ERROR_INT("invalid shiftflag", procName, 1); - - if (item) pa->nactual++; - if (index == pa->nalloc) { /* can happen when index == n */ - if (ptraExtendArray(pa)) - return ERROR_INT("extension failure", procName, 1); - } - - /* We are inserting into a hole or adding to the end of the array. - * No existing items are moved. */ - ptraGetMaxIndex(pa, &imax); - if (pa->array[index] == NULL) { - pa->array[index] = item; - if (item && index > imax) /* new item put beyond max so far */ - pa->imax = index; - return 0; - } - - /* We are inserting at the location of an existing item, - * forcing the existing item and those below to shift down. - * First, extend the array automatically if the last element - * (nalloc - 1) is occupied (imax). This may not be necessary - * in every situation, but only an anomalous sequence of insertions - * into the array would cause extra ptr allocation. */ - if (imax >= pa->nalloc - 1 && ptraExtendArray(pa)) - return ERROR_INT("extension failure", procName, 1); - - /* If there are no holes, do a full downshift. - * Otherwise, if L_AUTO_DOWNSHIFT, use the expected number - * of holes between index and n to determine the shift mode */ - if (imax + 1 == pa->nactual) { - shiftflag = L_FULL_DOWNSHIFT; - } else if (shiftflag == L_AUTO_DOWNSHIFT) { - if (imax < 10) { - shiftflag = L_FULL_DOWNSHIFT; /* no big deal */ - } else { - nexpected = (l_float32)(imax - pa->nactual) * - (l_float32)((imax - index) / imax); - shiftflag = (nexpected > 2.0) ? L_MIN_DOWNSHIFT : L_FULL_DOWNSHIFT; - } - } - - if (shiftflag == L_MIN_DOWNSHIFT) { /* run down looking for a hole */ - for (ihole = index + 1; ihole <= imax; ihole++) { - if (pa->array[ihole] == NULL) - break; - } - } else { /* L_FULL_DOWNSHIFT */ - ihole = imax + 1; - } - - for (i = ihole; i > index; i--) - pa->array[i] = pa->array[i - 1]; - pa->array[index] = (void *)item; - if (ihole == imax + 1) /* the last item was shifted down */ - pa->imax++; - - return 0; -} - - -/*! - * \brief ptraRemove() - * - * \param[in] pa ptra - * \param[in] index element to be removed - * \param[in] flag L_NO_COMPACTION, L_COMPACTION - * \return item, or NULL on error - * - *
- * Notes:
- *      (1) If flag == L_NO_COMPACTION, this removes the item and
- *          nulls the ptr on the array.  If it takes the last item
- *          in the array, pa->n is reduced to the next item.
- *      (2) If flag == L_COMPACTION, this compacts the array for
- *          for all i >= index.  It should not be used repeatedly on
- *          large arrays, because compaction is O(n).
- *      (3) The ability to remove without automatic compaction allows
- *          removal with cost O(1).
- * 
- */ -void * -ptraRemove(L_PTRA *pa, - l_int32 index, - l_int32 flag) -{ -l_int32 i, imax, fromend, icurrent; -void *item; - - PROCNAME("ptraRemove"); - - if (!pa) - return (void *)ERROR_PTR("pa not defined", procName, NULL); - ptraGetMaxIndex(pa, &imax); - if (index < 0 || index > imax) - return (void *)ERROR_PTR("index not in [0 ... imax]", procName, NULL); - - item = pa->array[index]; - if (item) - pa->nactual--; - pa->array[index] = NULL; - - /* If we took the last item, need to reduce pa->n */ - fromend = (index == imax); - if (fromend) { - for (i = index - 1; i >= 0; i--) { - if (pa->array[i]) - break; - } - pa->imax = i; - } - - /* Compact from index to the end of the array */ - if (!fromend && flag == L_COMPACTION) { - for (icurrent = index, i = index + 1; i <= imax; i++) { - if (pa->array[i]) - pa->array[icurrent++] = pa->array[i]; - } - pa->imax = icurrent - 1; - } - return item; -} - - -/*! - * \brief ptraRemoveLast() - * - * \param[in] pa ptra - * \return item, or NULL on error or if the array is empty - */ -void * -ptraRemoveLast(L_PTRA *pa) -{ -l_int32 imax; - - PROCNAME("ptraRemoveLast"); - - if (!pa) - return (void *)ERROR_PTR("pa not defined", procName, NULL); - - /* Remove the last item in the array. No compaction is required. */ - ptraGetMaxIndex(pa, &imax); - if (imax >= 0) - return ptraRemove(pa, imax, L_NO_COMPACTION); - else /* empty */ - return NULL; -} - - -/*! - * \brief ptraReplace() - * - * \param[in] pa ptra - * \param[in] index element to be replaced - * \param[in] item new generic ptr to a struct; can be null - * \param[in] freeflag TRUE to free old item; FALSE to return it - * \return item old item, if it exists and is not freed, - * or NULL on error - */ -void * -ptraReplace(L_PTRA *pa, - l_int32 index, - void *item, - l_int32 freeflag) -{ -l_int32 imax; -void *olditem; - - PROCNAME("ptraReplace"); - - if (!pa) - return (void *)ERROR_PTR("pa not defined", procName, NULL); - ptraGetMaxIndex(pa, &imax); - if (index < 0 || index > imax) - return (void *)ERROR_PTR("index not in [0 ... imax]", procName, NULL); - - olditem = pa->array[index]; - pa->array[index] = item; - if (!item && olditem) - pa->nactual--; - else if (item && !olditem) - pa->nactual++; - - if (freeflag == FALSE) - return olditem; - - if (olditem) - LEPT_FREE(olditem); - return NULL; -} - - -/*! - * \brief ptraSwap() - * - * \param[in] pa ptra - * \param[in] index1 - * \param[in] index2 - * \return 0 if OK, 1 on error - */ -l_ok -ptraSwap(L_PTRA *pa, - l_int32 index1, - l_int32 index2) -{ -l_int32 imax; -void *item; - - PROCNAME("ptraSwap"); - - if (!pa) - return ERROR_INT("pa not defined", procName, 1); - if (index1 == index2) - return 0; - ptraGetMaxIndex(pa, &imax); - if (index1 < 0 || index1 > imax || index2 < 0 || index2 > imax) - return ERROR_INT("invalid index: not in [0 ... imax]", procName, 1); - - item = ptraRemove(pa, index1, L_NO_COMPACTION); - item = ptraReplace(pa, index2, item, FALSE); - ptraInsert(pa, index1, item, L_MIN_DOWNSHIFT); - return 0; -} - - -/*! - * \brief ptraCompactArray() - * - * \param[in] pa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This compacts the items on the array, filling any empty ptrs.
- *      (2) This does not change the size of the array of ptrs.
- * 
- */ -l_ok -ptraCompactArray(L_PTRA *pa) -{ -l_int32 i, imax, nactual, index; - - PROCNAME("ptraCompactArray"); - - if (!pa) - return ERROR_INT("pa not defined", procName, 1); - ptraGetMaxIndex(pa, &imax); - ptraGetActualCount(pa, &nactual); - if (imax + 1 == nactual) return 0; - - /* Compact the array */ - for (i = 0, index = 0; i <= imax; i++) { - if (pa->array[i]) - pa->array[index++] = pa->array[i]; - } - pa->imax = index - 1; - if (nactual != index) - L_ERROR("index = %d; != nactual\n", procName, index); - - return 0; -} - - -/*----------------------------------------------------------------------* - * Other array operations * - *----------------------------------------------------------------------*/ -/*! - * \brief ptraReverse() - * - * \param[in] pa ptra - * \return 0 if OK, 1 on error - */ -l_ok -ptraReverse(L_PTRA *pa) -{ -l_int32 i, imax; - - PROCNAME("ptraReverse"); - - if (!pa) - return ERROR_INT("pa not defined", procName, 1); - ptraGetMaxIndex(pa, &imax); - - for (i = 0; i < (imax + 1) / 2; i++) - ptraSwap(pa, i, imax - i); - return 0; -} - - -/*! - * \brief ptraJoin() - * - * \param[in] pa1 add to this one - * \param[in] pa2 appended to pa1, and emptied of items; can be null - * \return 0 if OK, 1 on error - */ -l_ok -ptraJoin(L_PTRA *pa1, - L_PTRA *pa2) -{ -l_int32 i, imax; -void *item; - - PROCNAME("ptraJoin"); - - if (!pa1) - return ERROR_INT("pa1 not defined", procName, 1); - if (!pa2) - return 0; - - ptraGetMaxIndex(pa2, &imax); - for (i = 0; i <= imax; i++) { - item = ptraRemove(pa2, i, L_NO_COMPACTION); - ptraAdd(pa1, item); - } - - return 0; -} - - - -/*----------------------------------------------------------------------* - * Simple ptra accessors * - *----------------------------------------------------------------------*/ -/*! - * \brief ptraGetMaxIndex() - * - * \param[in] pa ptra - * \param[out] pmaxindex index of last item in the array; - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The largest index to an item in the array is %maxindex.
- *          %maxindex is one less than the number of items that would be
- *          in the array if there were no null pointers between 0
- *          and %maxindex - 1.  However, because the internal ptr array
- *          need not be compacted, there may be NULL pointers at
- *          indices below %maxindex; for example, if items have
- *          been removed.
- *      (2) When an item is added to the end of the array, it goes
- *          into pa->array[maxindex + 1], and maxindex is then
- *          incremented by 1.
- *      (3) If there are no items in the array, this returns %maxindex = -1.
- * 
- */ -l_ok -ptraGetMaxIndex(L_PTRA *pa, - l_int32 *pmaxindex) -{ - PROCNAME("ptraGetMaxIndex"); - - if (!pa) - return ERROR_INT("pa not defined", procName, 1); - if (!pmaxindex) - return ERROR_INT("&maxindex not defined", procName, 1); - *pmaxindex = pa->imax; - return 0; -} - - -/*! - * \brief ptraGetActualCount() - * - * \param[in] pa ptra - * \param[out] pcount actual number of items on the ptr array - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) The actual number of items on the ptr array, pa->nactual,
- *          will be smaller than pa->n if the array is not compacted.
- * 
- */ -l_ok -ptraGetActualCount(L_PTRA *pa, - l_int32 *pcount) -{ - PROCNAME("ptraGetActualCount"); - - if (!pa) - return ERROR_INT("pa not defined", procName, 1); - if (!pcount) - return ERROR_INT("&count not defined", procName, 1); - *pcount = pa->nactual; - - return 0; -} - - -/*! - * \brief ptraGetPtrToItem() - * - * \param[in] pa ptra - * \param[in] index of element to be retrieved - * \return a ptr to the element, or NULL on error - * - *
- * Notes:
- *      (1) This returns a ptr to the item.  You must cast it to
- *          the type of item.  Do not destroy it; the item belongs
- *          to the Ptra.
- *      (2) This can access all possible items on the ptr array.
- *          If an item doesn't exist, it returns null.
- * 
- */ -void * -ptraGetPtrToItem(L_PTRA *pa, - l_int32 index) -{ - PROCNAME("ptraGetPtrToItem"); - - if (!pa) - return (void *)ERROR_PTR("pa not defined", procName, NULL); - if (index < 0 || index >= pa->nalloc) - return (void *)ERROR_PTR("index not in [0 ... nalloc-1]", - procName, NULL); - - return pa->array[index]; -} - - -/*--------------------------------------------------------------------------* - * Ptraa creation and destruction * - *--------------------------------------------------------------------------*/ -/*! - * \brief ptraaCreate() - * - * \param[in] n size of ptr array to be alloc'd - * \return paa, or NULL on error - * - *
- * Notes:
- *      (1) The ptraa is generated with a fixed size, that can not change.
- *          The ptra can be generated and inserted randomly into this array.
- * 
- */ -L_PTRAA * -ptraaCreate(l_int32 n) -{ -L_PTRAA *paa; - - PROCNAME("ptraaCreate"); - - if (n <= 0) - return (L_PTRAA *)ERROR_PTR("n must be > 0", procName, NULL); - - paa = (L_PTRAA *)LEPT_CALLOC(1, sizeof(L_PTRAA)); - if ((paa->ptra = (L_PTRA **)LEPT_CALLOC(n, sizeof(L_PTRA *))) == NULL) { - ptraaDestroy(&paa, 0, 0); - return (L_PTRAA *)ERROR_PTR("ptr array not made", procName, NULL); - } - paa->nalloc = n; - return paa; -} - - -/*! - * \brief ptraaDestroy() - * - * \param[in,out] ppaa will be set to null before returning - * \param[in] freeflag TRUE to free each remaining item in each ptra - * \param[in] warnflag TRUE to warn if any remaining items - * are not destroyed - * \return void - * - *
- * Notes:
- *      (1) See ptraDestroy() for use of %freeflag and %warnflag.
- *      (2) To destroy the ptraa, we destroy each ptra, then the ptr array,
- *          then the ptraa, and then null the contents of the input ptr.
- * 
- */ -void -ptraaDestroy(L_PTRAA **ppaa, - l_int32 freeflag, - l_int32 warnflag) -{ -l_int32 i, n; -L_PTRA *pa; -L_PTRAA *paa; - - PROCNAME("ptraaDestroy"); - - if (ppaa == NULL) { - L_WARNING("ptr address is NULL\n", procName); - return; - } - if ((paa = *ppaa) == NULL) - return; - - ptraaGetSize(paa, &n); - for (i = 0; i < n; i++) { - pa = ptraaGetPtra(paa, i, L_REMOVE); - ptraDestroy(&pa, freeflag, warnflag); - } - - LEPT_FREE(paa->ptra); - LEPT_FREE(paa); - *ppaa = NULL; - return; -} - - -/*--------------------------------------------------------------------------* - * Ptraa accessors * - *--------------------------------------------------------------------------*/ -/*! - * \brief ptraaGetSize() - * - * \param[in] paa - * \param[out] psize size of ptr array - * \return 0 if OK; 1 on error - */ -l_ok -ptraaGetSize(L_PTRAA *paa, - l_int32 *psize) -{ - PROCNAME("ptraaGetSize"); - - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - *psize = paa->nalloc; - - return 0; -} - - -/*! - * \brief ptraaInsertPtra() - * - * \param[in] paa ptraa - * \param[in] index location in array for insertion - * \param[in] pa to be inserted - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Caller should check return value.  On success, the Ptra
- *          is inserted in the Ptraa and is owned by it.  However,
- *          on error, the Ptra remains owned by the caller.
- * 
- */ -l_ok -ptraaInsertPtra(L_PTRAA *paa, - l_int32 index, - L_PTRA *pa) -{ -l_int32 n; - - PROCNAME("ptraaInsertPtra"); - - if (!paa) - return ERROR_INT("paa not defined", procName, 1); - if (!pa) - return ERROR_INT("pa not defined", procName, 1); - ptraaGetSize(paa, &n); - if (index < 0 || index >= n) - return ERROR_INT("invalid index", procName, 1); - if (paa->ptra[index] != NULL) - return ERROR_INT("ptra already stored at index", procName, 1); - - paa->ptra[index] = pa; - return 0; -} - - -/*! - * \brief ptraaGetPtra() - * - * \param[in] paa ptraa - * \param[in] index location in array - * \param[in] accessflag L_HANDLE_ONLY, L_REMOVE - * \return ptra at index location, or NULL on error or if there - * is no ptra there. - * - *
- * Notes:
- *      (1) This returns the ptra ptr.  If %accessflag == L_HANDLE_ONLY,
- *          the ptra is left on the ptraa.  If %accessflag == L_REMOVE,
- *          the ptr in the ptraa is set to NULL, and the caller
- *          is responsible for disposing of the ptra (either putting it
- *          back on the ptraa, or destroying it).
- *      (2) This returns NULL if there is no Ptra at the index location.
- * 
- */ -L_PTRA * -ptraaGetPtra(L_PTRAA *paa, - l_int32 index, - l_int32 accessflag) -{ -l_int32 n; -L_PTRA *pa; - - PROCNAME("ptraaGetPtra"); - - if (!paa) - return (L_PTRA *)ERROR_PTR("paa not defined", procName, NULL); - ptraaGetSize(paa, &n); - if (index < 0 || index >= n) - return (L_PTRA *)ERROR_PTR("invalid index", procName, NULL); - if (accessflag != L_HANDLE_ONLY && accessflag != L_REMOVE) - return (L_PTRA *)ERROR_PTR("invalid accessflag", procName, NULL); - - pa = paa->ptra[index]; - if (accessflag == L_REMOVE) - paa->ptra[index] = NULL; - return pa; -} - - -/*--------------------------------------------------------------------------* - * Ptraa conversion * - *--------------------------------------------------------------------------*/ -/*! - * \brief ptraaFlattenToPtra() - * - * \param[in] paa ptraa - * \return ptra, or NULL on error - * - *
- * Notes:
- *      (1) This 'flattens' the ptraa to a ptra, taking the items in
- *          each ptra, in order, starting with the first ptra, etc.
- *      (2) As a side-effect, the ptra are all removed from the ptraa
- *          and destroyed, leaving an empty ptraa.
- * 
- */ -L_PTRA * -ptraaFlattenToPtra(L_PTRAA *paa) -{ -l_int32 i, n; -L_PTRA *pat, *pad; - - PROCNAME("ptraaFlattenToPtra"); - - if (!paa) - return (L_PTRA *)ERROR_PTR("paa not defined", procName, NULL); - - pad = ptraCreate(0); - ptraaGetSize(paa, &n); - for (i = 0; i < n; i++) { - pat = ptraaGetPtra(paa, i, L_REMOVE); - if (!pat) continue; - ptraJoin(pad, pat); - ptraDestroy(&pat, FALSE, FALSE); /* they're all empty */ - } - - return pad; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ptra.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ptra.h deleted file mode 100644 index dc5216cd..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/ptra.h +++ /dev/null @@ -1,95 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_PTRA_H -#define LEPTONICA_PTRA_H - -/*! - * \file ptra.h - * - *
- *  Contains the following structs:
- *      struct L_Ptra
- *      struct L_Ptraa
- *
- *  Contains definitions for:
- *      L_Ptra compaction flags for removal
- *      L_Ptra shifting flags for insert
- *      L_Ptraa accessor flags
- * 
- */ - - -/*------------------------------------------------------------------------* - * Generic Ptr Array Structs * - *------------------------------------------------------------------------*/ - - /*! Generic pointer array */ -struct L_Ptra -{ - l_int32 nalloc; /*!< size of allocated ptr array */ - l_int32 imax; /*!< greatest valid index */ - l_int32 nactual; /*!< actual number of stored elements */ - void **array; /*!< ptr array */ -}; -typedef struct L_Ptra L_PTRA; - - - /*! Array of generic pointer arrays */ -struct L_Ptraa -{ - l_int32 nalloc; /*!< size of allocated ptr array */ - struct L_Ptra **ptra; /*!< array of ptra */ -}; -typedef struct L_Ptraa L_PTRAA; - - - -/*------------------------------------------------------------------------* - * Accessor and modifier flags for L_Ptra and L_Ptraa * - *------------------------------------------------------------------------*/ - -/*! Ptra Removal */ -enum { - L_NO_COMPACTION = 1, /*!< null the pointer only */ - L_COMPACTION = 2 /*!< compact the array */ -}; - -/*! Ptra Insertion */ -enum { - L_AUTO_DOWNSHIFT = 0, /*!< choose based on number of holes */ - L_MIN_DOWNSHIFT = 1, /*!< downshifts min # of ptrs below insert */ - L_FULL_DOWNSHIFT = 2 /*!< downshifts all ptrs below insert */ -}; - -/*! Ptraa Accessor */ -enum { - L_HANDLE_ONLY = 0, /*!< ptr to L_Ptra; caller can inspect only */ - L_REMOVE = 1 /*!< caller owns; destroy or save in L_Ptraa */ -}; - - -#endif /* LEPTONICA_PTRA_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/quadtree.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/quadtree.c deleted file mode 100644 index 6c10232a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/quadtree.c +++ /dev/null @@ -1,701 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file quadtree.c - *
- *
- *      Top level quadtree linear statistics
- *          l_int32   pixQuadtreeMean()
- *          l_int32   pixQuadtreeVariance()
- *
- *      Statistics in an arbitrary rectangle
- *          l_int32   pixMeanInRectangle()
- *          l_int32   pixVarianceInRectangle()
- *
- *      Quadtree regions
- *          BOXAA    *boxaaQuadtreeRegions()
- *
- *      Quadtree access
- *          l_int32   quadtreeGetParent()
- *          l_int32   quadtreeGetChildren()
- *          l_int32   quadtreeMaxLevels()
- *
- *      Display quadtree
- *          PIX      *fpixaDisplayQuadtree()
- *
- *
- *  There are many other statistical quantities that can be computed
- *  in a quadtree, such as rank values, and these can be added as
- *  the need arises.
- *
- *  Similar results that can approximate a single level of the quadtree
- *  can be generated by pixGetAverageTiled().  There we specify the
- *  tile size over which the mean, mean square, and root variance
- *  are generated; the results are saved in a (reduced size) pix.
- *  Because the tile dimensions are integers, it is usually not possible
- *  to obtain tilings that are a power of 2, as required for quadtrees.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -#ifndef NO_CONSOLE_IO -#define DEBUG_BOXES 0 -#endif /* !NO_CONSOLE_IO */ - - -/*----------------------------------------------------------------------* - * Top-level quadtree linear statistics * - *----------------------------------------------------------------------*/ -/*! - * \brief pixQuadtreeMean() - * - * \param[in] pixs 8 bpp, no colormap - * \param[in] nlevels in quadtree; max allowed depends on image size - * \param[in] pix_ma input mean accumulator; can be null - * \param[out] pfpixa mean values in quadtree - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The returned fpixa has %nlevels of fpix, each containing
- *          the mean values at its level.  Level 0 has a
- *          single value; level 1 has 4 values; level 2 has 16; etc.
- * 
- */ -l_ok -pixQuadtreeMean(PIX *pixs, - l_int32 nlevels, - PIX *pix_ma, - FPIXA **pfpixa) -{ -l_int32 i, j, w, h, size, n; -l_float32 val; -BOX *box; -BOXA *boxa; -BOXAA *baa; -FPIX *fpix; -PIX *pix_mac; - - PROCNAME("pixQuadtreeMean"); - - if (!pfpixa) - return ERROR_INT("&fpixa not defined", procName, 1); - *pfpixa = NULL; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - pixGetDimensions(pixs, &w, &h, NULL); - if (nlevels > quadtreeMaxLevels(w, h)) - return ERROR_INT("nlevels too large for image", procName, 1); - - if (!pix_ma) - pix_mac = pixBlockconvAccum(pixs); - else - pix_mac = pixClone(pix_ma); - if (!pix_mac) - return ERROR_INT("pix_mac not made", procName, 1); - - if ((baa = boxaaQuadtreeRegions(w, h, nlevels)) == NULL) { - pixDestroy(&pix_mac); - return ERROR_INT("baa not made", procName, 1); - } - - *pfpixa = fpixaCreate(nlevels); - for (i = 0; i < nlevels; i++) { - boxa = boxaaGetBoxa(baa, i, L_CLONE); - size = 1 << i; - n = boxaGetCount(boxa); /* n == size * size */ - fpix = fpixCreate(size, size); - for (j = 0; j < n; j++) { - box = boxaGetBox(boxa, j, L_CLONE); - pixMeanInRectangle(pixs, box, pix_mac, &val); - fpixSetPixel(fpix, j % size, j / size, val); - boxDestroy(&box); - } - fpixaAddFPix(*pfpixa, fpix, L_INSERT); - boxaDestroy(&boxa); - } - - pixDestroy(&pix_mac); - boxaaDestroy(&baa); - return 0; -} - - -/*! - * \brief pixQuadtreeVariance() - * - * \param[in] pixs 8 bpp, no colormap - * \param[in] nlevels in quadtree - * \param[in] pix_ma input mean accumulator; can be null - * \param[in] dpix_msa input mean square accumulator; can be null - * \param[out] pfpixa_v [optional] variance values in quadtree - * \param[out] pfpixa_rv [optional] root variance values in quadtree - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The returned fpixav and fpixarv have %nlevels of fpix,
- *          each containing at the respective levels the variance
- *          and root variance values.
- * 
- */ -l_ok -pixQuadtreeVariance(PIX *pixs, - l_int32 nlevels, - PIX *pix_ma, - DPIX *dpix_msa, - FPIXA **pfpixa_v, - FPIXA **pfpixa_rv) -{ -l_int32 i, j, w, h, size, n; -l_float32 var, rvar; -BOX *box; -BOXA *boxa; -BOXAA *baa; -FPIX *fpixv, *fpixrv; -PIX *pix_mac; /* copy of mean accumulator */ -DPIX *dpix_msac; /* msa clone */ - - PROCNAME("pixQuadtreeVariance"); - - if (!pfpixa_v && !pfpixa_rv) - return ERROR_INT("neither &fpixav nor &fpixarv defined", procName, 1); - if (pfpixa_v) *pfpixa_v = NULL; - if (pfpixa_rv) *pfpixa_rv = NULL; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - pixGetDimensions(pixs, &w, &h, NULL); - if (nlevels > quadtreeMaxLevels(w, h)) - return ERROR_INT("nlevels too large for image", procName, 1); - - if (!pix_ma) - pix_mac = pixBlockconvAccum(pixs); - else - pix_mac = pixClone(pix_ma); - if (!pix_mac) - return ERROR_INT("pix_mac not made", procName, 1); - if (!dpix_msa) - dpix_msac = pixMeanSquareAccum(pixs); - else - dpix_msac = dpixClone(dpix_msa); - if (!dpix_msac) { - pixDestroy(&pix_mac); - return ERROR_INT("dpix_msac not made", procName, 1); - } - - if ((baa = boxaaQuadtreeRegions(w, h, nlevels)) == NULL) { - pixDestroy(&pix_mac); - dpixDestroy(&dpix_msac); - return ERROR_INT("baa not made", procName, 1); - } - - if (pfpixa_v) *pfpixa_v = fpixaCreate(nlevels); - if (pfpixa_rv) *pfpixa_rv = fpixaCreate(nlevels); - for (i = 0; i < nlevels; i++) { - boxa = boxaaGetBoxa(baa, i, L_CLONE); - size = 1 << i; - n = boxaGetCount(boxa); /* n == size * size */ - if (pfpixa_v) fpixv = fpixCreate(size, size); - if (pfpixa_rv) fpixrv = fpixCreate(size, size); - for (j = 0; j < n; j++) { - box = boxaGetBox(boxa, j, L_CLONE); - pixVarianceInRectangle(pixs, box, pix_mac, dpix_msac, &var, &rvar); - if (pfpixa_v) fpixSetPixel(fpixv, j % size, j / size, var); - if (pfpixa_rv) fpixSetPixel(fpixrv, j % size, j / size, rvar); - boxDestroy(&box); - } - if (pfpixa_v) fpixaAddFPix(*pfpixa_v, fpixv, L_INSERT); - if (pfpixa_rv) fpixaAddFPix(*pfpixa_rv, fpixrv, L_INSERT); - boxaDestroy(&boxa); - } - - pixDestroy(&pix_mac); - dpixDestroy(&dpix_msac); - boxaaDestroy(&baa); - return 0; -} - - -/*----------------------------------------------------------------------* - * Statistics in an arbitrary rectangle * - *----------------------------------------------------------------------*/ -/*! - * \brief pixMeanInRectangle() - * - * \param[in] pixs 8 bpp - * \param[in] box region to compute mean value - * \param[in] pixma mean accumulator - * \param[out] pval mean value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function is intended to be used for many rectangles
- *          on the same image.  It can find the mean within a
- *          rectangle in O(1), independent of the size of the rectangle.
- * 
- */ -l_ok -pixMeanInRectangle(PIX *pixs, - BOX *box, - PIX *pixma, - l_float32 *pval) -{ -l_int32 w, h, bx, by, bw, bh; -l_uint32 val00, val01, val10, val11; -l_float32 norm; -BOX *boxc; - - PROCNAME("pixMeanInRectangle"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0.0; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (!pixma) - return ERROR_INT("pixma not defined", procName, 1); - - /* Clip rectangle to image */ - pixGetDimensions(pixs, &w, &h, NULL); - boxc = boxClipToRectangle(box, w, h); - boxGetGeometry(boxc, &bx, &by, &bw, &bh); - boxDestroy(&boxc); - - if (bw == 0 || bh == 0) - return ERROR_INT("no pixels in box", procName, 1); - - /* Use up to 4 points in the accumulator */ - norm = 1.0 / ((l_float32)(bw) * bh); - if (bx > 0 && by > 0) { - pixGetPixel(pixma, bx + bw - 1, by + bh - 1, &val11); - pixGetPixel(pixma, bx + bw - 1, by - 1, &val10); - pixGetPixel(pixma, bx - 1, by + bh - 1, &val01); - pixGetPixel(pixma, bx - 1, by - 1, &val00); - *pval = norm * (val11 - val01 + val00 - val10); - } else if (by > 0) { /* bx == 0 */ - pixGetPixel(pixma, bw - 1, by + bh - 1, &val11); - pixGetPixel(pixma, bw - 1, by - 1, &val10); - *pval = norm * (val11 - val10); - } else if (bx > 0) { /* by == 0 */ - pixGetPixel(pixma, bx + bw - 1, bh - 1, &val11); - pixGetPixel(pixma, bx - 1, bh - 1, &val01); - *pval = norm * (val11 - val01); - } else { /* bx == 0 && by == 0 */ - pixGetPixel(pixma, bw - 1, bh - 1, &val11); - *pval = norm * val11; - } - - return 0; -} - - -/*! - * \brief pixVarianceInRectangle() - * - * \param[in] pixs 8 bpp - * \param[in] box region to compute variance and/or root variance - * \param[in] pix_ma mean accumulator - * \param[in] dpix_msa mean square accumulator - * \param[out] pvar [optional] variance - * \param[out] prvar [optional] root variance - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function is intended to be used for many rectangles
- *          on the same image.  It can find the variance and/or the
- *          square root of the variance within a rectangle in O(1),
- *          independent of the size of the rectangle.
- * 
- */ -l_ok -pixVarianceInRectangle(PIX *pixs, - BOX *box, - PIX *pix_ma, - DPIX *dpix_msa, - l_float32 *pvar, - l_float32 *prvar) -{ -l_int32 w, h, bx, by, bw, bh; -l_uint32 val00, val01, val10, val11; -l_float64 dval00, dval01, dval10, dval11, mval, msval, var, norm; -BOX *boxc; - - PROCNAME("pixVarianceInRectangle"); - - if (!pvar && !prvar) - return ERROR_INT("neither &var nor &rvar defined", procName, 1); - if (pvar) *pvar = 0.0; - if (prvar) *prvar = 0.0; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined", procName, 1); - if (!box) - return ERROR_INT("box not defined", procName, 1); - if (!pix_ma) - return ERROR_INT("pix_ma not defined", procName, 1); - if (!dpix_msa) - return ERROR_INT("dpix_msa not defined", procName, 1); - - /* Clip rectangle to image */ - pixGetDimensions(pixs, &w, &h, NULL); - boxc = boxClipToRectangle(box, w, h); - boxGetGeometry(boxc, &bx, &by, &bw, &bh); - boxDestroy(&boxc); - - if (bw == 0 || bh == 0) - return ERROR_INT("no pixels in box", procName, 1); - - /* Use up to 4 points in the accumulators */ - norm = 1.0 / ((l_float32)(bw) * bh); - if (bx > 0 && by > 0) { - pixGetPixel(pix_ma, bx + bw - 1, by + bh - 1, &val11); - pixGetPixel(pix_ma, bx + bw - 1, by - 1, &val10); - pixGetPixel(pix_ma, bx - 1, by + bh - 1, &val01); - pixGetPixel(pix_ma, bx - 1, by - 1, &val00); - dpixGetPixel(dpix_msa, bx + bw - 1, by + bh - 1, &dval11); - dpixGetPixel(dpix_msa, bx + bw - 1, by - 1, &dval10); - dpixGetPixel(dpix_msa, bx - 1, by + bh - 1, &dval01); - dpixGetPixel(dpix_msa, bx - 1, by - 1, &dval00); - mval = norm * (val11 - val01 + val00 - val10); - msval = norm * (dval11 - dval01 + dval00 - dval10); - var = (msval - mval * mval); - if (pvar) *pvar = (l_float32)var; - if (prvar) *prvar = (l_float32)(sqrt(var)); - } else if (by > 0) { /* bx == 0 */ - pixGetPixel(pix_ma, bw - 1, by + bh - 1, &val11); - pixGetPixel(pix_ma, bw - 1, by - 1, &val10); - dpixGetPixel(dpix_msa, bw - 1, by + bh - 1, &dval11); - dpixGetPixel(dpix_msa, bw - 1, by - 1, &dval10); - mval = norm * (val11 - val10); - msval = norm * (dval11 - dval10); - var = (msval - mval * mval); - if (pvar) *pvar = (l_float32)var; - if (prvar) *prvar = (l_float32)(sqrt(var)); - } else if (bx > 0) { /* by == 0 */ - pixGetPixel(pix_ma, bx + bw - 1, bh - 1, &val11); - pixGetPixel(pix_ma, bx - 1, bh - 1, &val01); - dpixGetPixel(dpix_msa, bx + bw - 1, bh - 1, &dval11); - dpixGetPixel(dpix_msa, bx - 1, bh - 1, &dval01); - mval = norm * (val11 - val01); - msval = norm * (dval11 - dval01); - var = (msval - mval * mval); - if (pvar) *pvar = (l_float32)var; - if (prvar) *prvar = (l_float32)(sqrt(var)); - } else { /* bx == 0 && by == 0 */ - pixGetPixel(pix_ma, bw - 1, bh - 1, &val11); - dpixGetPixel(dpix_msa, bw - 1, bh - 1, &dval11); - mval = norm * val11; - msval = norm * dval11; - var = (msval - mval * mval); - if (pvar) *pvar = (l_float32)var; - if (prvar) *prvar = (l_float32)(sqrt(var)); - } - - return 0; -} - - -/*----------------------------------------------------------------------* - * Quadtree regions * - *----------------------------------------------------------------------*/ -/*! - * \brief boxaaQuadtreeRegions() - * - * \param[in] w, h size of pix that is being quadtree-ized - * \param[in] nlevels number of levels in quadtree - * \return baa for quadtree regions at each level, or NULL on error - * - *
- * Notes:
- *      (1) The returned boxaa has %nlevels of boxa, each containing
- *          the set of rectangles at that level.  The rectangle at
- *          level 0 is the entire region; at level 1 the region is
- *          divided into 4 rectangles, and at level n there are n^4
- *          rectangles.
- *      (2) At each level, the rectangles in the boxa are in "raster"
- *          order, with LR (fast scan) and TB (slow scan).
- * 
- */ -BOXAA * -boxaaQuadtreeRegions(l_int32 w, - l_int32 h, - l_int32 nlevels) -{ -l_int32 i, j, k, maxpts, nside, nbox, bw, bh; -l_int32 *xstart, *xend, *ystart, *yend; -BOX *box; -BOXA *boxa; -BOXAA *baa; - - PROCNAME("boxaaQuadtreeRegions"); - - if (nlevels < 1) - return (BOXAA *)ERROR_PTR("nlevels must be >= 1", procName, NULL); - if (w < (1 << (nlevels - 1))) - return (BOXAA *)ERROR_PTR("w doesn't support nlevels", procName, NULL); - if (h < (1 << (nlevels - 1))) - return (BOXAA *)ERROR_PTR("h doesn't support nlevels", procName, NULL); - - baa = boxaaCreate(nlevels); - maxpts = 1 << (nlevels - 1); - xstart = (l_int32 *)LEPT_CALLOC(maxpts, sizeof(l_int32)); - xend = (l_int32 *)LEPT_CALLOC(maxpts, sizeof(l_int32)); - ystart = (l_int32 *)LEPT_CALLOC(maxpts, sizeof(l_int32)); - yend = (l_int32 *)LEPT_CALLOC(maxpts, sizeof(l_int32)); - for (k = 0; k < nlevels; k++) { - nside = 1 << k; /* number of boxes in each direction */ - for (i = 0; i < nside; i++) { - xstart[i] = (w - 1) * i / nside; - if (i > 0) xstart[i]++; - xend[i] = (w - 1) * (i + 1) / nside; - ystart[i] = (h - 1) * i / nside; - if (i > 0) ystart[i]++; - yend[i] = (h - 1) * (i + 1) / nside; -#if DEBUG_BOXES - lept_stderr( - "k = %d, xs[%d] = %d, xe[%d] = %d, ys[%d] = %d, ye[%d] = %d\n", - k, i, xstart[i], i, xend[i], i, ystart[i], i, yend[i]); -#endif /* DEBUG_BOXES */ - } - nbox = 1 << (2 * k); - boxa = boxaCreate(nbox); - for (i = 0; i < nside; i++) { - bh = yend[i] - ystart[i] + 1; - for (j = 0; j < nside; j++) { - bw = xend[j] - xstart[j] + 1; - box = boxCreate(xstart[j], ystart[i], bw, bh); - boxaAddBox(boxa, box, L_INSERT); - } - } - boxaaAddBoxa(baa, boxa, L_INSERT); - } - - LEPT_FREE(xstart); - LEPT_FREE(xend); - LEPT_FREE(ystart); - LEPT_FREE(yend); - return baa; -} - - -/*----------------------------------------------------------------------* - * Quadtree access * - *----------------------------------------------------------------------*/ -/*! - * \brief quadtreeGetParent() - * - * \param[in] fpixa mean, variance or root variance - * \param[in] level, x, y of current pixel - * \param[out] pval parent pixel value, or 0.0 on error - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Check return value for error.  On error, val is returned as 0.0.
- *      (2) The parent is located at:
- *             level - 1
- *             (x/2, y/2)
- * 
- */ -l_ok -quadtreeGetParent(FPIXA *fpixa, - l_int32 level, - l_int32 x, - l_int32 y, - l_float32 *pval) -{ -l_int32 n; - - PROCNAME("quadtreeGetParent"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0.0; - if (!fpixa) - return ERROR_INT("fpixa not defined", procName, 1); - n = fpixaGetCount(fpixa); - if (level < 1 || level >= n) - return ERROR_INT("invalid level", procName, 1); - - if (fpixaGetPixel(fpixa, level - 1, x / 2, y / 2, pval) != 0) - return ERROR_INT("invalid coordinates", procName, 1); - return 0; -} - - -/*! - * \brief quadtreeGetChildren() - * - * \param[in] fpixa mean, variance or root variance - * \param[in] level, x, y of current pixel - * \param[out] pval00, pval01, - * pval10, pval11 four child pixel values - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Check return value for error.  On error, all return vals are 0.0.
- *      (2) The returned child pixels are located at:
- *             level + 1
- *             (2x, 2y), (2x+1, 2y), (2x, 2y+1), (2x+1, 2y+1)
- * 
- */ -l_ok -quadtreeGetChildren(FPIXA *fpixa, - l_int32 level, - l_int32 x, - l_int32 y, - l_float32 *pval00, - l_float32 *pval10, - l_float32 *pval01, - l_float32 *pval11) -{ -l_int32 n; - - PROCNAME("quadtreeGetChildren"); - - if (!pval00 || !pval01 || !pval10 || !pval11) - return ERROR_INT("&val* not all defined", procName, 1); - *pval00 = *pval10 = *pval01 = *pval11 = 0.0; - if (!fpixa) - return ERROR_INT("fpixa not defined", procName, 1); - n = fpixaGetCount(fpixa); - if (level < 0 || level >= n - 1) - return ERROR_INT("invalid level", procName, 1); - - if (fpixaGetPixel(fpixa, level + 1, 2 * x, 2 * y, pval00) != 0) - return ERROR_INT("invalid coordinates", procName, 1); - fpixaGetPixel(fpixa, level + 1, 2 * x + 1, 2 * y, pval10); - fpixaGetPixel(fpixa, level + 1, 2 * x, 2 * y + 1, pval01); - fpixaGetPixel(fpixa, level + 1, 2 * x + 1, 2 * y + 1, pval11); - return 0; -} - - -/*! - * \brief quadtreeMaxLevels() - * - * \param[in] w, h dimensions of image - * \return maxlevels maximum number of levels allowed, or -1 on error - * - *
- * Notes:
- *      (1) The criterion for maxlevels is that the subdivision not
- *          go down below the single pixel level.  The 1.5 factor
- *          is intended to keep any rectangle from accidentally
- *          having zero dimension due to integer truncation.
- * 
- */ -l_int32 -quadtreeMaxLevels(l_int32 w, - l_int32 h) -{ -l_int32 i, minside; - - minside = L_MIN(w, h); - for (i = 0; i < 20; i++) { /* 2^10 = one million */ - if (minside < (1.5 * (1 << i))) - return i - 1; - } - - return -1; /* fail if the image has over a trillion pixels! */ -} - - -/*----------------------------------------------------------------------* - * Display quadtree * - *----------------------------------------------------------------------*/ -/*! - * \brief fpixaDisplayQuadtree() - * - * \param[in] fpixa mean, variance or root variance - * \param[in] factor replication factor at lowest level - * \param[in] fontsize 4, ... 20 - * \return pixd 8 bpp, mosaic of quadtree images, or NULL on error - * - *
- * Notes:
- *      (1) The mean and root variance fall naturally in the 8 bpp range,
- *          but the variance is typically outside the range.  This
- *          function displays 8 bpp pix clipped to 255, so the image
- *          pixels will mostly be 255 (white).
- * 
- */ -PIX * -fpixaDisplayQuadtree(FPIXA *fpixa, - l_int32 factor, - l_int32 fontsize) -{ -char buf[256]; -l_int32 nlevels, i, mag, w; -L_BMF *bmf; -FPIX *fpix; -PIX *pixt1, *pixt2, *pixt3, *pixt4, *pixd; -PIXA *pixat; - - PROCNAME("fpixaDisplayQuadtree"); - - if (!fpixa) - return (PIX *)ERROR_PTR("fpixa not defined", procName, NULL); - - if ((nlevels = fpixaGetCount(fpixa)) == 0) - return (PIX *)ERROR_PTR("pixas empty", procName, NULL); - - if ((bmf = bmfCreate(NULL, fontsize)) == NULL) - L_ERROR("bmf not made; text will not be added", procName); - pixat = pixaCreate(nlevels); - for (i = 0; i < nlevels; i++) { - fpix = fpixaGetFPix(fpixa, i, L_CLONE); - pixt1 = fpixConvertToPix(fpix, 8, L_CLIP_TO_ZERO, 0); - mag = factor * (1 << (nlevels - i - 1)); - pixt2 = pixExpandReplicate(pixt1, mag); - pixt3 = pixConvertTo32(pixt2); - snprintf(buf, sizeof(buf), "Level %d\n", i); - pixt4 = pixAddSingleTextblock(pixt3, bmf, buf, 0xff000000, - L_ADD_BELOW, NULL); - pixaAddPix(pixat, pixt4, L_INSERT); - fpixDestroy(&fpix); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - pixDestroy(&pixt3); - } - w = pixGetWidth(pixt4); - pixd = pixaDisplayTiledInRows(pixat, 32, nlevels * (w + 80), 1.0, 0, 30, 2); - - pixaDestroy(&pixat); - bmfDestroy(&bmf); - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/queue.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/queue.c deleted file mode 100644 index e0397bcc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/queue.c +++ /dev/null @@ -1,326 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file queue.c - *
- *
- *      Create/Destroy L_Queue
- *          L_QUEUE        *lqueueCreate()
- *          void           *lqueueDestroy()
- *
- *      Operations to add/remove to/from a L_Queue
- *          l_int32         lqueueAdd()
- *          static l_int32  lqueueExtendArray()
- *          void           *lqueueRemove()
- *
- *      Accessors
- *          l_int32         lqueueGetCount()
- *
- *      Debug output
- *          l_int32         lqueuePrint()
- *
- *    The lqueue is a fifo that implements a queue of void* pointers.
- *    It can be used to hold a queue of any type of struct.
- *    Internally, it maintains two counters:
- *        nhead:  location of head (in ptrs) from the beginning
- *                of the buffer
- *        nelem:  number of ptr elements stored in the queue
- *    As items are added to the queue, nelem increases.
- *    As items are removed, nhead increases and nelem decreases.
- *    Any time the tail reaches the end of the allocated buffer,
- *      all the pointers are shifted to the left, so that the head
- *      is at the beginning of the array.
- *    If the buffer becomes more than 3/4 full, it doubles in size.
- *
- *    [A circular queue would allow us to skip the shifting and
- *    to resize only when the buffer is full.  For most applications,
- *    the extra work we do for a linear queue is not significant.]
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static const l_int32 MIN_BUFFER_SIZE = 20; /* n'importe quoi */ -static const l_int32 INITIAL_BUFFER_ARRAYSIZE = 1024; /* n'importe quoi */ - - /* Static function */ -static l_int32 lqueueExtendArray(L_QUEUE *lq); - -/*--------------------------------------------------------------------------* - * L_Queue create/destroy * - *--------------------------------------------------------------------------*/ -/*! - * \brief lqueueCreate() - * - * \param[in] nalloc size of ptr array to be alloc'd; 0 for default - * \return lqueue, or NULL on error - * - *
- * Notes:
- *      (1) Allocates a ptr array of given size, and initializes counters.
- * 
- */ -L_QUEUE * -lqueueCreate(l_int32 nalloc) -{ -L_QUEUE *lq; - - PROCNAME("lqueueCreate"); - - if (nalloc < MIN_BUFFER_SIZE) - nalloc = INITIAL_BUFFER_ARRAYSIZE; - - lq = (L_QUEUE *)LEPT_CALLOC(1, sizeof(L_QUEUE)); - if ((lq->array = (void **)LEPT_CALLOC(nalloc, sizeof(void *))) == NULL) { - lqueueDestroy(&lq, 0); - return (L_QUEUE *)ERROR_PTR("ptr array not made", procName, NULL); - } - lq->nalloc = nalloc; - lq->nhead = lq->nelem = 0; - return lq; -} - - -/*! - * \brief lqueueDestroy() - * - * \param[in,out] plq will be set to null before returning - * \param[in] freeflag TRUE to free each remaining struct in the array - * \return void - * - *
- * Notes:
- *      (1) If freeflag is TRUE, frees each struct in the array.
- *      (2) If freeflag is FALSE but there are elements on the array,
- *          gives a warning and destroys the array.  This will
- *          cause a memory leak of all the items that were on the queue.
- *          So if the items require their own destroy function, they
- *          must be destroyed before the queue.  The same applies to the
- *          auxiliary stack, if it is used.
- *      (3) To destroy the L_Queue, we destroy the ptr array, then
- *          the lqueue, and then null the contents of the input ptr.
- * 
- */ -void -lqueueDestroy(L_QUEUE **plq, - l_int32 freeflag) -{ -void *item; -L_QUEUE *lq; - - PROCNAME("lqueueDestroy"); - - if (plq == NULL) { - L_WARNING("ptr address is NULL\n", procName); - return; - } - if ((lq = *plq) == NULL) - return; - - if (freeflag) { - while(lq->nelem > 0) { - item = lqueueRemove(lq); - LEPT_FREE(item); - } - } else if (lq->nelem > 0) { - L_WARNING("memory leak of %d items in lqueue!\n", procName, lq->nelem); - } - - if (lq->array) - LEPT_FREE(lq->array); - if (lq->stack) - lstackDestroy(&lq->stack, freeflag); - LEPT_FREE(lq); - *plq = NULL; - - return; -} - - -/*--------------------------------------------------------------------------* - * Accessors * - *--------------------------------------------------------------------------*/ -/*! - * \brief lqueueAdd() - * - * \param[in] lq lqueue - * \param[in] item to be added to the tail of the queue - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The algorithm is as follows.  If the queue is populated
- *          to the end of the allocated array, shift all ptrs toward
- *          the beginning of the array, so that the head of the queue
- *          is at the beginning of the array.  Then, if the array is
- *          more than 0.75 full, realloc with double the array size.
- *          Finally, add the item to the tail of the queue.
- * 
- */ -l_ok -lqueueAdd(L_QUEUE *lq, - void *item) -{ - PROCNAME("lqueueAdd"); - - if (!lq) - return ERROR_INT("lq not defined", procName, 1); - if (!item) - return ERROR_INT("item not defined", procName, 1); - - /* If filled to the end and the ptrs can be shifted to the left, - * shift them. */ - if ((lq->nhead + lq->nelem >= lq->nalloc) && (lq->nhead != 0)) { - memmove(lq->array, lq->array + lq->nhead, sizeof(void *) * lq->nelem); - lq->nhead = 0; - } - - /* If necessary, expand the allocated array by a factor of 2 */ - if (lq->nelem > 0.75 * lq->nalloc) - lqueueExtendArray(lq); - - /* Now add the item */ - lq->array[lq->nhead + lq->nelem] = (void *)item; - lq->nelem++; - - return 0; -} - - -/*! - * \brief lqueueExtendArray() - * - * \param[in] lq lqueue - * \return 0 if OK, 1 on error - */ -static l_int32 -lqueueExtendArray(L_QUEUE *lq) -{ - PROCNAME("lqueueExtendArray"); - - if (!lq) - return ERROR_INT("lq not defined", procName, 1); - - if ((lq->array = (void **)reallocNew((void **)&lq->array, - sizeof(void *) * lq->nalloc, - 2 * sizeof(void *) * lq->nalloc)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - - lq->nalloc = 2 * lq->nalloc; - return 0; -} - - -/*! - * \brief lqueueRemove() - * - * \param[in] lq lqueue - * \return ptr to item popped from the head of the queue, - * or NULL if the queue is empty or on error - * - *
- * Notes:
- *      (1) If this is the last item on the queue, so that the queue
- *          becomes empty, nhead is reset to the beginning of the array.
- * 
- */ -void * -lqueueRemove(L_QUEUE *lq) -{ -void *item; - - PROCNAME("lqueueRemove"); - - if (!lq) - return (void *)ERROR_PTR("lq not defined", procName, NULL); - - if (lq->nelem == 0) - return NULL; - item = lq->array[lq->nhead]; - lq->array[lq->nhead] = NULL; - if (lq->nelem == 1) - lq->nhead = 0; /* reset head ptr */ - else - (lq->nhead)++; /* can't go off end of array because nelem > 1 */ - lq->nelem--; - return item; -} - - -/*! - * \brief lqueueGetCount() - * - * \param[in] lq lqueue - * \return count, or 0 on error - */ -l_int32 -lqueueGetCount(L_QUEUE *lq) -{ - PROCNAME("lqueueGetCount"); - - if (!lq) - return ERROR_INT("lq not defined", procName, 0); - - return lq->nelem; -} - - -/*---------------------------------------------------------------------* - * Debug output * - *---------------------------------------------------------------------*/ -/*! - * \brief lqueuePrint() - * - * \param[in] fp file stream - * \param[in] lq lqueue - * \return 0 if OK; 1 on error - */ -l_ok -lqueuePrint(FILE *fp, - L_QUEUE *lq) -{ -l_int32 i; - - PROCNAME("lqueuePrint"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!lq) - return ERROR_INT("lq not defined", procName, 1); - - fprintf(fp, "\n L_Queue: nalloc = %d, nhead = %d, nelem = %d, array = %p\n", - lq->nalloc, lq->nhead, lq->nelem, lq->array); - for (i = lq->nhead; i < lq->nhead + lq->nelem; i++) - fprintf(fp, "array[%d] = %p\n", i, lq->array[i]); - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/queue.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/queue.h deleted file mode 100644 index fd380e83..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/queue.h +++ /dev/null @@ -1,77 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_QUEUE_H -#define LEPTONICA_QUEUE_H - -/*! - * \file queue.h - * - *
- *      Expandable pointer queue for arbitrary void* data.
- *
- *      The L_Queue is a fifo that implements a queue of void* pointers.
- *      It can be used to hold a queue of any type of struct.
- *
- *      Internally, it maintains two counters:
- *          nhead:  location of head (in ptrs) from the beginning
- *                  of the array.
- *          nelem:  number of ptr elements stored in the queue.
- *
- *      The element at the head of the queue, which is the next to
- *      be removed, is array[nhead].  The location at the tail of the
- *      queue to which the next element will be added is
- *      array[nhead + nelem].
- *
- *      As items are added to the queue, nelem increases.
- *      As items are removed, nhead increases and nelem decreases.
- *      Any time the tail reaches the end of the allocated array,
- *      all the pointers are shifted to the left, so that the head
- *      is at the beginning of the array.
- *      If the array becomes more than 3/4 full, it doubles in size.
- *
- *      The auxiliary stack can be used in a wrapper for re-using
- *      items popped from the queue.  It is not made by default.
- *
- *      For further implementation details, see queue.c.
- * 
- */ - -/*! Expandable pointer queue for arbitrary void* data */ -struct L_Queue -{ - l_int32 nalloc; /*!< size of allocated ptr array */ - l_int32 nhead; /*!< location of head (in ptrs) from the */ - /*!< beginning of the array */ - l_int32 nelem; /*!< number of elements stored in the queue */ - void **array; /*!< ptr array */ - struct L_Stack *stack; /*!< auxiliary stack */ - -}; -typedef struct L_Queue L_QUEUE; - - -#endif /* LEPTONICA_QUEUE_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rank.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rank.c deleted file mode 100644 index ece9b717..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rank.c +++ /dev/null @@ -1,544 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file rank.c - *
- *
- *      Rank filter (gray and rgb)
- *          PIX      *pixRankFilter()
- *          PIX      *pixRankFilterRGB()
- *          PIX      *pixRankFilterGray()
- *
- *      Median filter
- *          PIX      *pixMedianFilter()
- *
- *      Rank filter (accelerated with downscaling)
- *          PIX      *pixRankFilterWithScaling()
- *
- *  What is a brick rank filter?
- *
- *    A brick rank order filter evaluates, for every pixel in the image,
- *    a rectangular set of n = wf x hf pixels in its neighborhood (where the
- *    pixel in question is at the "center" of the rectangle and is
- *    included in the evaluation).  It determines the value of the
- *    neighboring pixel that is the r-th smallest in the set,
- *    where r is some integer between 1 and n.  The input rank parameter
- *    is a fraction between 0.0 and 1.0, where 0.0 represents the
- *    smallest value (r = 1) and 1.0 represents the largest value (r = n).
- *    A median filter is a rank filter where rank = 0.5.
- *
- *    It is important to note that grayscale erosion is equivalent
- *    to rank = 0.0, and grayscale dilation is equivalent to rank = 1.0.
- *    These are much easier to calculate than the general rank value,
- *    thanks to the van Herk/Gil-Werman algorithm:
- *       http://www.leptonica.com/grayscale-morphology.html
- *    so you should use pixErodeGray() and pixDilateGray() for
- *    rank 0.0 and 1.0, rsp.  See notes below in the function header.
- *
- *  How is a rank filter implemented efficiently on an image?
- *
- *    Sorting will not work.
- *
- *      * The best sort algorithms are O(n*logn), where n is the number
- *        of values to be sorted (the area of the filter).  For large
- *        filters this is an impractically large number.
- *
- *      * Selection of the rank value is O(n).  (To understand why it's not
- *        O(n*logn), see Numerical Recipes in C, 2nd edition, 1992,  p. 355ff).
- *        This also still far too much computation for large filters.
- *
- *      * Suppose we get clever.  We really only need to do an incremental
- *        selection or sorting, because, for example, moving the filter
- *        down by one pixel causes one filter width of pixels to be added
- *        and another to be removed.  Can we do this incrementally in
- *        an efficient way?  Unfortunately, no.  The sorted values will be
- *        in an array.  Even if the filter width is 1, we can expect to
- *        have to move O(n) pixels, because insertion and deletion can happen
- *        anywhere in the array.  By comparison, heapsort is excellent for
- *        incremental sorting, where the cost for insertion or deletion
- *        is O(logn), because the array itself doesn't need to
- *        be sorted into strictly increasing order.  However, heapsort
- *        only gives the max (or min) value, not the general rank value.
- *
- *    This leaves histograms.
- *
- *      * Represented as an array.  The problem with an array of 256
- *        bins is that, in general, a significant fraction of the
- *        entire histogram must be summed to find the rank value bin.
- *        Suppose the filter size is 5x5.  You spend most of your time
- *        adding zeroes.  Ouch!
- *
- *      * Represented as a linked list.  This would overcome the
- *        summing-over-empty-bin problem, but you lose random access
- *        for insertions and deletions.  No way.
- *
- *      * Two histogram solution.  Maintain two histograms with
- *        bin sizes of 1 and 16.  Proceed from coarse to fine.
- *        First locate the coarse bin for the given rank, of which
- *        there are only 16.  Then, in the 256 entry (fine) histogram,
- *        you need look at a maximum of 16 bins.  For each output
- *        pixel, the average number of bins summed over, both in the
- *        coarse and fine histograms, is thus 16.
- *
- *  If someone has a better method, please let me know!
- *
- *  The rank filtering operation is relatively expensive, compared to most
- *  of the other imaging operations.  The speed is only weakly dependent
- *  on the size of the rank filter.  On standard hardware, it runs at
- *  about 10 Mpix/sec for a 50 x 50 filter, and 25 Mpix/sec for
- *  a 5 x 5 filter.   For applications where the rank filter can be
- *  performed on a downscaled image, significant speedup can be
- *  achieved because the time goes as the square of the scaling factor.
- *  We provide an interface that handles the details, and only
- *  requires the amount of downscaling to be input.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/*----------------------------------------------------------------------* - * Rank order filter * - *----------------------------------------------------------------------*/ -/*! - * \brief pixRankFilter() - * - * \param[in] pixs 8 or 32 bpp; no colormap - * \param[in] wf, hf width and height of filter; each is >= 1 - * \param[in] rank in [0.0 ... 1.0] - * \return pixd of rank values, or NULL on error - * - *
- * Notes:
- *      (1) This defines, for each pixel in pixs, a neighborhood of
- *          pixels given by a rectangle "centered" on the pixel.
- *          This set of wf*hf pixels has a distribution of values.
- *          For each component, if the values are sorted in increasing
- *          order, we choose the component such that rank*(wf*hf-1)
- *          pixels have a lower or equal value and
- *          (1-rank)*(wf*hf-1) pixels have an equal or greater value.
- *      (2) See notes in pixRankFilterGray() for further details.
- * 
- */ -PIX * -pixRankFilter(PIX *pixs, - l_int32 wf, - l_int32 hf, - l_float32 rank) -{ -l_int32 d; - - PROCNAME("pixRankFilter"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs) != NULL) - return (PIX *)ERROR_PTR("pixs has colormap", procName, NULL); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL); - if (wf < 1 || hf < 1) - return (PIX *)ERROR_PTR("wf < 1 || hf < 1", procName, NULL); - if (rank < 0.0 || rank > 1.0) - return (PIX *)ERROR_PTR("rank must be in [0.0, 1.0]", procName, NULL); - if (wf == 1 && hf == 1) /* no-op */ - return pixCopy(NULL, pixs); - - if (d == 8) - return pixRankFilterGray(pixs, wf, hf, rank); - else /* d == 32 */ - return pixRankFilterRGB(pixs, wf, hf, rank); -} - - -/*! - * \brief pixRankFilterRGB() - * - * \param[in] pixs 32 bpp - * \param[in] wf, hf width and height of filter; each is >= 1 - * \param[in] rank in [0.0 ... 1.0] - * \return pixd of rank values, or NULL on error - * - *
- * Notes:
- *      (1) This defines, for each pixel in pixs, a neighborhood of
- *          pixels given by a rectangle "centered" on the pixel.
- *          This set of wf*hf pixels has a distribution of values.
- *          For each component, if the values are sorted in increasing
- *          order, we choose the component such that rank*(wf*hf-1)
- *          pixels have a lower or equal value and
- *          (1-rank)*(wf*hf-1) pixels have an equal or greater value.
- *      (2) Apply gray rank filtering to each component independently.
- *      (3) See notes in pixRankFilterGray() for further details.
- * 
- */ -PIX * -pixRankFilterRGB(PIX *pixs, - l_int32 wf, - l_int32 hf, - l_float32 rank) -{ -PIX *pixr, *pixg, *pixb, *pixrf, *pixgf, *pixbf, *pixd; - - PROCNAME("pixRankFilterRGB"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (wf < 1 || hf < 1) - return (PIX *)ERROR_PTR("wf < 1 || hf < 1", procName, NULL); - if (rank < 0.0 || rank > 1.0) - return (PIX *)ERROR_PTR("rank must be in [0.0, 1.0]", procName, NULL); - if (wf == 1 && hf == 1) /* no-op */ - return pixCopy(NULL, pixs); - - pixr = pixGetRGBComponent(pixs, COLOR_RED); - pixg = pixGetRGBComponent(pixs, COLOR_GREEN); - pixb = pixGetRGBComponent(pixs, COLOR_BLUE); - - pixrf = pixRankFilterGray(pixr, wf, hf, rank); - pixgf = pixRankFilterGray(pixg, wf, hf, rank); - pixbf = pixRankFilterGray(pixb, wf, hf, rank); - - pixd = pixCreateRGBImage(pixrf, pixgf, pixbf); - pixDestroy(&pixr); - pixDestroy(&pixg); - pixDestroy(&pixb); - pixDestroy(&pixrf); - pixDestroy(&pixgf); - pixDestroy(&pixbf); - return pixd; -} - - -/*! - * \brief pixRankFilterGray() - * - * \param[in] pixs 8 bpp; no colormap - * \param[in] wf, hf width and height of filter; each is >= 1 - * \param[in] rank in [0.0 ... 1.0] - * \return pixd of rank values, or NULL on error - * - *
- * Notes:
- *      (1) This defines, for each pixel in pixs, a neighborhood of
- *          pixels given by a rectangle "centered" on the pixel.
- *          This set of wf*hf pixels has a distribution of values,
- *          and if they are sorted in increasing order, we choose
- *          the pixel such that rank*(wf*hf-1) pixels have a lower
- *          or equal value and (1-rank)*(wf*hf-1) pixels have an equal
- *          or greater value.
- *      (2) By this definition, the rank = 0.0 pixel has the lowest
- *          value, and the rank = 1.0 pixel has the highest value.
- *      (3) We add mirrored boundary pixels to avoid boundary effects,
- *          and put the filter center at (0, 0).
- *      (4) This dispatches to grayscale erosion or dilation if the
- *          filter dimensions are odd and the rank is 0.0 or 1.0, rsp.
- *      (5) Returns a copy if both wf and hf are 1.
- *      (6) Uses row-major or column-major incremental updates to the
- *          histograms depending on whether hf > wf or hv <= wf, rsp.
- * 
- */ -PIX * -pixRankFilterGray(PIX *pixs, - l_int32 wf, - l_int32 hf, - l_float32 rank) -{ -l_int32 w, h, d, i, j, k, m, n, rankloc, wplt, wpld, val, sum; -l_int32 *histo, *histo16; -l_uint32 *datat, *linet, *datad, *lined; -PIX *pixt, *pixd; - - PROCNAME("pixRankFilterGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs) != NULL) - return (PIX *)ERROR_PTR("pixs has colormap", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (wf < 1 || hf < 1) - return (PIX *)ERROR_PTR("wf < 1 || hf < 1", procName, NULL); - if (rank < 0.0 || rank > 1.0) - return (PIX *)ERROR_PTR("rank must be in [0.0, 1.0]", procName, NULL); - if (wf == 1 && hf == 1) /* no-op */ - return pixCopy(NULL, pixs); - - /* For rank = 0.0, this is a grayscale erosion, and for rank = 1.0, - * a dilation. Grayscale morphology operations are implemented - * for filters of odd dimension, so we dispatch to grayscale - * morphology if both wf and hf are odd. Otherwise, we - * slightly adjust the rank (to get the correct behavior) and - * use the slower rank filter here. */ - if (wf % 2 && hf % 2) { - if (rank == 0.0) - return pixErodeGray(pixs, wf, hf); - else if (rank == 1.0) - return pixDilateGray(pixs, wf, hf); - } - if (rank == 0.0) rank = 0.0001; - if (rank == 1.0) rank = 0.9999; - - /* Add wf/2 to each side, and hf/2 to top and bottom of the - * image, mirroring for accuracy and to avoid special-casing - * the boundary. */ - if ((pixt = pixAddMirroredBorder(pixs, wf / 2, wf / 2, hf / 2, hf / 2)) - == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - - /* Set up the two histogram arrays. */ - histo = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - histo16 = (l_int32 *)LEPT_CALLOC(16, sizeof(l_int32)); - rankloc = (l_int32)(rank * wf * hf); - - /* Place the filter center at (0, 0). This is just a - * convenient location, because it allows us to perform - * the rank filter over x:(0 ... w - 1) and y:(0 ... h - 1). */ - pixd = pixCreateTemplate(pixs); - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* If hf > wf, it's more efficient to use row-major scanning. - * Otherwise, traverse the image in use column-major order. */ - if (hf > wf) { - for (j = 0; j < w; j++) { /* row-major */ - /* Start each column with clean histogram arrays. */ - for (n = 0; n < 256; n++) - histo[n] = 0; - for (n = 0; n < 16; n++) - histo16[n] = 0; - - for (i = 0; i < h; i++) { /* fast scan on columns */ - /* Update the histos for the new location */ - lined = datad + i * wpld; - if (i == 0) { /* do full histo */ - for (k = 0; k < hf; k++) { - linet = datat + (i + k) * wplt; - for (m = 0; m < wf; m++) { - val = GET_DATA_BYTE(linet, j + m); - histo[val]++; - histo16[val >> 4]++; - } - } - } else { /* incremental update */ - linet = datat + (i - 1) * wplt; - for (m = 0; m < wf; m++) { /* remove top line */ - val = GET_DATA_BYTE(linet, j + m); - histo[val]--; - histo16[val >> 4]--; - } - linet = datat + (i + hf - 1) * wplt; - for (m = 0; m < wf; m++) { /* add bottom line */ - val = GET_DATA_BYTE(linet, j + m); - histo[val]++; - histo16[val >> 4]++; - } - } - - /* Find the rank value */ - sum = 0; - for (n = 0; n < 16; n++) { /* search over coarse histo */ - sum += histo16[n]; - if (sum > rankloc) { - sum -= histo16[n]; - break; - } - } - if (n == 16) { /* avoid accessing out of bounds */ - L_WARNING("n = 16; reducing\n", procName); - n = 15; - sum -= histo16[n]; - } - k = 16 * n; /* starting value in fine histo */ - for (m = 0; m < 16; m++) { - sum += histo[k]; - if (sum > rankloc) { - SET_DATA_BYTE(lined, j, k); - break; - } - k++; - } - } - } - } else { /* wf >= hf */ - for (i = 0; i < h; i++) { /* column-major */ - /* Start each row with clean histogram arrays. */ - for (n = 0; n < 256; n++) - histo[n] = 0; - for (n = 0; n < 16; n++) - histo16[n] = 0; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { /* fast scan on rows */ - /* Update the histos for the new location */ - if (j == 0) { /* do full histo */ - for (k = 0; k < hf; k++) { - linet = datat + (i + k) * wplt; - for (m = 0; m < wf; m++) { - val = GET_DATA_BYTE(linet, j + m); - histo[val]++; - histo16[val >> 4]++; - } - } - } else { /* incremental update at left and right sides */ - for (k = 0; k < hf; k++) { - linet = datat + (i + k) * wplt; - val = GET_DATA_BYTE(linet, j - 1); - histo[val]--; - histo16[val >> 4]--; - val = GET_DATA_BYTE(linet, j + wf - 1); - histo[val]++; - histo16[val >> 4]++; - } - } - - /* Find the rank value */ - sum = 0; - for (n = 0; n < 16; n++) { /* search over coarse histo */ - sum += histo16[n]; - if (sum > rankloc) { - sum -= histo16[n]; - break; - } - } - if (n == 16) { /* avoid accessing out of bounds */ - L_WARNING("n = 16; reducing\n", procName); - n = 15; - sum -= histo16[n]; - } - k = 16 * n; /* starting value in fine histo */ - for (m = 0; m < 16; m++) { - sum += histo[k]; - if (sum > rankloc) { - SET_DATA_BYTE(lined, j, k); - break; - } - k++; - } - } - } - } - - pixDestroy(&pixt); - LEPT_FREE(histo); - LEPT_FREE(histo16); - return pixd; -} - - -/*----------------------------------------------------------------------* - * Median filter * - *----------------------------------------------------------------------*/ -/*! - * \brief pixMedianFilter() - * - * \param[in] pixs 8 or 32 bpp; no colormap - * \param[in] wf, hf width and height of filter; each is >= 1 - * \return pixd of median values, or NULL on error - */ -PIX * -pixMedianFilter(PIX *pixs, - l_int32 wf, - l_int32 hf) -{ - PROCNAME("pixMedianFilter"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - return pixRankFilter(pixs, wf, hf, 0.5); -} - - -/*----------------------------------------------------------------------* - * Rank filter (accelerated with downscaling) * - *----------------------------------------------------------------------*/ -/*! - * \brief pixRankFilterWithScaling() - * - * \param[in] pixs 8 or 32 bpp; no colormap - * \param[in] wf, hf width and height of filter; each is >= 1 - * \param[in] rank in [0.0 ... 1.0] - * \param[in] scalefactor scale factor; must be >= 0.2 and <= 0.7 - * \return pixd of rank values, or NULL on error - * - *
- * Notes:
- *      (1) This is a convenience function that downscales, does
- *          the rank filtering, and upscales.  Because the down-
- *          and up-scaling functions are very fast compared to
- *          rank filtering, the time it takes is reduced from that
- *          for the simple rank filtering operation by approximately
- *          the square of the scaling factor.
- * 
- */ -PIX * -pixRankFilterWithScaling(PIX *pixs, - l_int32 wf, - l_int32 hf, - l_float32 rank, - l_float32 scalefactor) -{ -l_int32 w, h, d, wfs, hfs; -PIX *pix1, *pix2, *pixd; - - PROCNAME("pixRankFilterWithScaling"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetColormap(pixs) != NULL) - return (PIX *)ERROR_PTR("pixs has colormap", procName, NULL); - d = pixGetDepth(pixs); - if (d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL); - if (wf < 1 || hf < 1) - return (PIX *)ERROR_PTR("wf < 1 || hf < 1", procName, NULL); - if (rank < 0.0 || rank > 1.0) - return (PIX *)ERROR_PTR("rank must be in [0.0, 1.0]", procName, NULL); - if (wf == 1 && hf == 1) /* no-op */ - return pixCopy(NULL, pixs); - if (scalefactor < 0.2 || scalefactor > 0.7) { - L_ERROR("invalid scale factor; no scaling used\n", procName); - return pixRankFilter(pixs, wf, hf, rank); - } - - pix1 = pixScaleAreaMap(pixs, scalefactor, scalefactor); - wfs = L_MAX(1, (l_int32)(scalefactor * wf + 0.5)); - hfs = L_MAX(1, (l_int32)(scalefactor * hf + 0.5)); - pix2 = pixRankFilter(pix1, wfs, hfs, rank); - pixGetDimensions(pixs, &w, &h, NULL); - pixd = pixScaleToSize(pix2, w, h); - pixDestroy(&pix1); - pixDestroy(&pix2); - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rbtree.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rbtree.c deleted file mode 100644 index 922033b2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rbtree.c +++ /dev/null @@ -1,902 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/* - * Modified from the excellent code here: - * http://en.literateprograms.org/Red-black_tree_(C)?oldid=19567 - * which has been placed in the public domain under the Creative Commons - * CC0 1.0 waiver (http://creativecommons.org/publicdomain/zero/1.0/). - */ - -/*! - * \file rbtree.c - *
- *
- *  Basic functions for using red-black trees.  These are "nearly" balanced
- *  sorted trees with ordering by key that allows insertion, lookup and
- *  deletion of key/value pairs in log(n) time.
- *
- *  We use red-black trees to implement our version of:
- *    * a map: a function that maps keys to values (e.g., int64 --> int64).
- *    * a set: a collection that is sorted by unique keys (without
- *      associated values)
- *
- *  There are 5 invariant properties of RB trees:
- *  (1) Each node is either red or black.
- *  (2) The root node is black.
- *  (3) All leaves are black and contain no data (null).
- *  (4) Every red node has two children and both are black.  This is
- *      equivalent to requiring the parent of every red node to be black.
- *  (5) All paths from any given node to its leaf nodes contain the
- *      same number of black nodes.
- *
- *  Interface to red-black tree
- *           L_RBTREE       *l_rbtreeCreate()
- *           RB_TYPE        *l_rbtreeLookup()
- *           void            l_rbtreeInsert()
- *           void            l_rbtreeDelete()
- *           void            l_rbtreeDestroy()
- *           L_RBTREE_NODE  *l_rbtreeGetFirst()
- *           L_RBTREE_NODE  *l_rbtreeGetNext()
- *           L_RBTREE_NODE  *l_rbtreeGetLast()
- *           L_RBTREE_NODE  *l_rbtreeGetPrev()
- *           l_int32         l_rbtreeGetCount()
- *           void            l_rbtreePrint()
- *
- *  General comparison function
- *           static l_int32  compareKeys()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - - /* The node color enum is only needed in the rbtree implementation */ -enum { - L_RED_NODE = 1, - L_BLACK_NODE = 2 -}; - - /* This makes it simpler to read the code */ -typedef L_RBTREE_NODE node; - - /* Lots of static helper functions */ -static void destroy_helper(node *n); -static void count_helper(node *n, l_int32 *pcount); -static void print_tree_helper(FILE *fp, node *n, l_int32 keytype, - l_int32 indent); - -static l_int32 compareKeys(l_int32 keytype, RB_TYPE left, RB_TYPE right); - -static node *grandparent(node *n); -static node *sibling(node *n); -static node *uncle(node *n); -static l_int32 node_color(node *n); -static node *new_node(RB_TYPE key, RB_TYPE value, l_int32 node_color, - node *left, node *right); -static node *lookup_node(L_RBTREE *t, RB_TYPE key); -static void rotate_left(L_RBTREE *t, node *n); -static void rotate_right(L_RBTREE *t, node *n); -static void replace_node(L_RBTREE *t, node *oldn, node *newn); -static void insert_case1(L_RBTREE *t, node *n); -static void insert_case2(L_RBTREE *t, node *n); -static void insert_case3(L_RBTREE *t, node *n); -static void insert_case4(L_RBTREE *t, node *n); -static void insert_case5(L_RBTREE *t, node *n); -static node *maximum_node(node *root); -static void delete_case1(L_RBTREE *t, node *n); -static void delete_case2(L_RBTREE *t, node *n); -static void delete_case3(L_RBTREE *t, node *n); -static void delete_case4(L_RBTREE *t, node *n); -static void delete_case5(L_RBTREE *t, node *n); -static void delete_case6(L_RBTREE *t, node *n); -static void verify_properties(L_RBTREE *t); - -#ifndef NO_CONSOLE_IO -#define VERIFY_RBTREE 0 /* only for debugging */ -#endif /* ~NO_CONSOLE_IO */ - -/* ------------------------------------------------------------- * - * Interface to Red-black Tree * - * ------------------------------------------------------------- */ -/*! - * \brief l_rbtreeCreate() - * - * \param[in] keytype defined by an enum for an RB_TYPE union - * \return rbtree container with empty ptr to the root - */ -L_RBTREE * -l_rbtreeCreate(l_int32 keytype) -{ - PROCNAME("l_rbtreeCreate"); - - if (keytype != L_INT_TYPE && keytype != L_UINT_TYPE && - keytype != L_FLOAT_TYPE && keytype) - return (L_RBTREE *)ERROR_PTR("invalid keytype", procName, NULL); - - L_RBTREE *t = (L_RBTREE *)LEPT_CALLOC(1, sizeof(L_RBTREE)); - t->keytype = keytype; - verify_properties(t); - return t; -} - -/*! - * \brief l_rbtreeLookup() - * - * \param[in] t rbtree, including root node - * \param[in] key find a node with this key - * \return &value a pointer to a union, if the node exists; else NULL - */ -RB_TYPE * -l_rbtreeLookup(L_RBTREE *t, - RB_TYPE key) -{ - PROCNAME("l_rbtreeLookup"); - - if (!t) - return (RB_TYPE *)ERROR_PTR("tree is null\n", procName, NULL); - - node *n = lookup_node(t, key); - return n == NULL ? NULL : &n->value; -} - -/*! - * \brief l_rbtreeInsert() - * - * \param[in] t rbtree, including root node - * \param[in] key insert a node with this key, if the key does not - * already exist in the tree - * \param[in] value typically an int, used for an index - * \return void - * - *
- * Notes:
- *      (1) If a node with the key already exists, this just updates the value.
- * 
- */ -void -l_rbtreeInsert(L_RBTREE *t, - RB_TYPE key, - RB_TYPE value) -{ -node *n, *inserted_node; - - PROCNAME("l_rbtreeInsert"); - - if (!t) { - L_ERROR("tree is null\n", procName); - return; - } - - inserted_node = new_node(key, value, L_RED_NODE, NULL, NULL); - if (t->root == NULL) { - t->root = inserted_node; - } else { - n = t->root; - while (1) { - int comp_result = compareKeys(t->keytype, key, n->key); - if (comp_result == 0) { - n->value = value; - LEPT_FREE(inserted_node); - return; - } else if (comp_result < 0) { - if (n->left == NULL) { - n->left = inserted_node; - break; - } else { - n = n->left; - } - } else { /* comp_result > 0 */ - if (n->right == NULL) { - n->right = inserted_node; - break; - } else { - n = n->right; - } - } - } - inserted_node->parent = n; - } - insert_case1(t, inserted_node); - verify_properties(t); -} - -/*! - * \brief l_rbtreeDelete() - * - * \param[in] t rbtree, including root node - * \param[in] key delete the node with this key - * \return void - */ -void -l_rbtreeDelete(L_RBTREE *t, - RB_TYPE key) -{ -node *n, *child; - - PROCNAME("l_rbtreeDelete"); - - if (!t) { - L_ERROR("tree is null\n", procName); - return; - } - - n = lookup_node(t, key); - if (n == NULL) return; /* Key not found, do nothing */ - if (n->left != NULL && n->right != NULL) { - /* Copy key/value from predecessor and then delete it instead */ - node *pred = maximum_node(n->left); - n->key = pred->key; - n->value = pred->value; - n = pred; - } - - /* n->left == NULL || n->right == NULL */ - child = n->right == NULL ? n->left : n->right; - if (node_color(n) == L_BLACK_NODE) { - n->color = node_color(child); - delete_case1(t, n); - } - replace_node(t, n, child); - if (n->parent == NULL && child != NULL) /* root should be black */ - child->color = L_BLACK_NODE; - LEPT_FREE(n); - - verify_properties(t); -} - -/*! - * \brief l_rbtreeDestroy() - * - * \param[in] pt pointer to tree; will be wet to null before returning - * \return void - * - *
- * Notes:
- *      (1) Destroys the tree and nulls the input tree ptr.
- * 
- */ -void -l_rbtreeDestroy(L_RBTREE **pt) -{ -node *n; - - if (!pt) return; - if (*pt == NULL) return; - n = (*pt)->root; - destroy_helper(n); - LEPT_FREE(*pt); - *pt = NULL; - return; -} - - /* postorder DFS */ -static void -destroy_helper(node *n) -{ - if (!n) return; - destroy_helper(n->left); - destroy_helper(n->right); - LEPT_FREE(n); -} - -/*! - * \brief l_rbtreeGetFirst() - * - * \param[in] t rbtree, including root node - * \return void - * - *
- * Notes:
- *      (1) This is the first node in an in-order traversal.
- * 
- */ -L_RBTREE_NODE * -l_rbtreeGetFirst(L_RBTREE *t) -{ -node *n; - - PROCNAME("l_rbtreeGetFirst"); - - if (!t) - return (L_RBTREE_NODE *)ERROR_PTR("tree is null", procName, NULL); - if (t->root == NULL) { - L_INFO("tree is empty\n", procName); - return NULL; - } - - /* Just go down the left side as far as possible */ - n = t->root; - while (n && n->left) - n = n->left; - return n; -} - -/*! - * \brief l_rbtreeGetNext() - * - * \param[in] n current node - * \return next node, or NULL if it's the last node - * - *
- * Notes:
- *      (1) This finds the next node, in an in-order traversal, from
- *          the current node.
- *      (2) It is useful as an iterator for a map.
- *      (3) Call l_rbtreeGetFirst() to get the first node.
- * 
- */ -L_RBTREE_NODE * -l_rbtreeGetNext(L_RBTREE_NODE *n) -{ - PROCNAME("l_rbtreeGetNext"); - - if (!n) - return (L_RBTREE_NODE *)ERROR_PTR("n not defined", procName, NULL); - - /* If there is a right child, go to it, and then go left all the - * way to the end. Otherwise go up to the parent; continue upward - * as long as you're on the right branch, but stop at the parent - * when you hit it from the left branch. */ - if (n->right) { - n = n->right; - while (n->left) - n = n->left; - return n; - } else { - while (n->parent && n->parent->right == n) - n = n->parent; - return n->parent; - } -} - -/*! - * \brief l_rbtreeGetLast() - * - * \param[in] t rbtree, including root node - * \return void - * - *
- * Notes:
- *      (1) This is the last node in an in-order traversal.
- * 
- */ -L_RBTREE_NODE * -l_rbtreeGetLast(L_RBTREE *t) -{ -node *n; - - PROCNAME("l_rbtreeGetLast"); - - if (!t) - return (L_RBTREE_NODE *)ERROR_PTR("tree is null", procName, NULL); - if (t->root == NULL) { - L_INFO("tree is empty\n", procName); - return NULL; - } - - /* Just go down the right side as far as possible */ - n = t->root; - while (n && n->right) - n = n->right; - return n; -} - -/*! - * \brief l_rbtreeGetPrev() - * - * \param[in] n current node - * \return next node, or NULL if it's the first node - * - *
- * Notes:
- *      (1) This finds the previous node, in an in-order traversal, from
- *          the current node.
- *      (2) It is useful as an iterator for a map.
- *      (3) Call l_rbtreeGetLast() to get the last node.
- * 
- */ -L_RBTREE_NODE * -l_rbtreeGetPrev(L_RBTREE_NODE *n) -{ - PROCNAME("l_rbtreeGetPrev"); - - if (!n) - return (L_RBTREE_NODE *)ERROR_PTR("n not defined", procName, NULL); - - /* If there is a left child, go to it, and then go right all the - * way to the end. Otherwise go up to the parent; continue upward - * as long as you're on the left branch, but stop at the parent - * when you hit it from the right branch. */ - if (n->left) { - n = n->left; - while (n->right) - n = n->right; - return n; - } else { - while (n->parent && n->parent->left == n) - n = n->parent; - return n->parent; - } -} - -/*! - * \brief l_rbtreeGetCount() - * - * \param[in] t rbtree - * \return count the number of nodes in the tree, or 0 on error - */ -l_int32 -l_rbtreeGetCount(L_RBTREE *t) -{ -l_int32 count = 0; -node *n; - - if (!t) return 0; - n = t->root; - count_helper(n, &count); - return count; -} - - /* preorder DFS */ -static void -count_helper(node *n, l_int32 *pcount) -{ - if (n) - (*pcount)++; - else - return; - - count_helper(n->left, pcount); - count_helper(n->right, pcount); -} - - -/*! - * \brief l_rbtreePrint() - * - * \param[in] fp file stream - * \param[in] t rbtree - * \return void - */ -void -l_rbtreePrint(FILE *fp, - L_RBTREE *t) -{ - PROCNAME("l_rbtreePrint"); - if (!fp) { - L_ERROR("stream not defined\n", procName); - return; - } - if (!t) { - L_ERROR("tree not defined\n", procName); - return; - } - - print_tree_helper(fp, t->root, t->keytype, 0); - fprintf(fp, "\n"); -} - -#define INDENT_STEP 4 - -static void -print_tree_helper(FILE *fp, - node *n, - l_int32 keytype, - l_int32 indent) -{ -l_int32 i; - - if (n == NULL) { - fprintf(fp, ""); - return; - } - if (n->right != NULL) { - print_tree_helper(fp, n->right, keytype, indent + INDENT_STEP); - } - for (i = 0; i < indent; i++) - fprintf(fp, " "); - if (n->color == L_BLACK_NODE) { - if (keytype == L_INT_TYPE) - fprintf(fp, "%lld\n", n->key.itype); - else if (keytype == L_UINT_TYPE) - fprintf(fp, "%llx\n", n->key.utype); - else if (keytype == L_FLOAT_TYPE) - fprintf(fp, "%f\n", n->key.ftype); - } else { - if (keytype == L_INT_TYPE) - fprintf(fp, "<%lld>\n", n->key.itype); - else if (keytype == L_UINT_TYPE) - fprintf(fp, "<%llx>\n", n->key.utype); - else if (keytype == L_FLOAT_TYPE) - fprintf(fp, "<%f>\n", n->key.ftype); - } - if (n->left != NULL) { - print_tree_helper(fp, n->left, keytype, indent + INDENT_STEP); - } -} - - -/* ------------------------------------------------------------- * - * Static key comparison function * - * ------------------------------------------------------------- */ -static l_int32 -compareKeys(l_int32 keytype, - RB_TYPE left, - RB_TYPE right) -{ -static char procName[] = "compareKeys"; - - if (keytype == L_INT_TYPE) { - if (left.itype < right.itype) - return -1; - else if (left.itype > right.itype) - return 1; - else { /* equality */ - return 0; - } - } else if (keytype == L_UINT_TYPE) { - if (left.utype < right.utype) - return -1; - else if (left.utype > right.utype) - return 1; - else { /* equality */ - return 0; - } - } else if (keytype == L_FLOAT_TYPE) { - if (left.ftype < right.ftype) - return -1; - else if (left.ftype > right.ftype) - return 1; - else { /* equality */ - return 0; - } - } else { - L_ERROR("unknown keytype %d\n", procName, keytype); - return 0; - } -} - - -/* ------------------------------------------------------------- * - * Static red-black tree helpers * - * ------------------------------------------------------------- */ -static node *grandparent(node *n) { - if (!n || !n->parent || !n->parent->parent) { - L_ERROR("root and child of root have no grandparent\n", "grandparent"); - return NULL; - } - return n->parent->parent; -} - -static node *sibling(node *n) { - if (!n || !n->parent) { - L_ERROR("root has no sibling\n", "sibling"); - return NULL; - } - if (n == n->parent->left) - return n->parent->right; - else - return n->parent->left; -} - -static node *uncle(node *n) { - if (!n || !n->parent || !n->parent->parent) { - L_ERROR("root and child of root have no uncle\n", "uncle"); - return NULL; - } - return sibling(n->parent); -} - -static l_int32 node_color(node *n) { - return n == NULL ? L_BLACK_NODE : n->color; -} - - -static node *new_node(RB_TYPE key, RB_TYPE value, l_int32 node_color, - node *left, node *right) { - node *result = (node *)LEPT_CALLOC(1, sizeof(node)); - result->key = key; - result->value = value; - result->color = node_color; - result->left = left; - result->right = right; - if (left != NULL) left->parent = result; - if (right != NULL) right->parent = result; - result->parent = NULL; - return result; -} - -static node *lookup_node(L_RBTREE *t, RB_TYPE key) { - node *n = t->root; - while (n != NULL) { - int comp_result = compareKeys(t->keytype, key, n->key); - if (comp_result == 0) { - return n; - } else if (comp_result < 0) { - n = n->left; - } else { /* comp_result > 0 */ - n = n->right; - } - } - return n; -} - -static void rotate_left(L_RBTREE *t, node *n) { - node *r = n->right; - replace_node(t, n, r); - n->right = r->left; - if (r->left != NULL) { - r->left->parent = n; - } - r->left = n; - n->parent = r; -} - -static void rotate_right(L_RBTREE *t, node *n) { - node *L = n->left; - replace_node(t, n, L); - n->left = L->right; - if (L->right != NULL) { - L->right->parent = n; - } - L->right = n; - n->parent = L; -} - -static void replace_node(L_RBTREE *t, node *oldn, node *newn) { - if (oldn->parent == NULL) { - t->root = newn; - } else { - if (oldn == oldn->parent->left) - oldn->parent->left = newn; - else - oldn->parent->right = newn; - } - if (newn != NULL) { - newn->parent = oldn->parent; - } -} - -static void insert_case1(L_RBTREE *t, node *n) { - if (n->parent == NULL) - n->color = L_BLACK_NODE; - else - insert_case2(t, n); -} - -static void insert_case2(L_RBTREE *t, node *n) { - if (node_color(n->parent) == L_BLACK_NODE) - return; /* Tree is still valid */ - else - insert_case3(t, n); -} - -static void insert_case3(L_RBTREE *t, node *n) { - if (node_color(uncle(n)) == L_RED_NODE) { - n->parent->color = L_BLACK_NODE; - uncle(n)->color = L_BLACK_NODE; - grandparent(n)->color = L_RED_NODE; - insert_case1(t, grandparent(n)); - } else { - insert_case4(t, n); - } -} - -static void insert_case4(L_RBTREE *t, node *n) { - if (n == n->parent->right && n->parent == grandparent(n)->left) { - rotate_left(t, n->parent); - n = n->left; - } else if (n == n->parent->left && n->parent == grandparent(n)->right) { - rotate_right(t, n->parent); - n = n->right; - } - insert_case5(t, n); -} - -static void insert_case5(L_RBTREE *t, node *n) { - n->parent->color = L_BLACK_NODE; - grandparent(n)->color = L_RED_NODE; - if (n == n->parent->left && n->parent == grandparent(n)->left) { - rotate_right(t, grandparent(n)); - } else if (n == n->parent->right && n->parent == grandparent(n)->right) { - rotate_left(t, grandparent(n)); - } else { - L_ERROR("identity confusion\n", "insert_case5"); - } -} - -static node *maximum_node(node *n) { - if (!n) { - L_ERROR("n not defined\n", "maximum_node"); - return NULL; - } - while (n->right != NULL) { - n = n->right; - } - return n; -} - -static void delete_case1(L_RBTREE *t, node *n) { - if (n->parent == NULL) - return; - else - delete_case2(t, n); -} - -static void delete_case2(L_RBTREE *t, node *n) { - if (node_color(sibling(n)) == L_RED_NODE) { - n->parent->color = L_RED_NODE; - sibling(n)->color = L_BLACK_NODE; - if (n == n->parent->left) - rotate_left(t, n->parent); - else - rotate_right(t, n->parent); - } - delete_case3(t, n); -} - -static void delete_case3(L_RBTREE *t, node *n) { - if (node_color(n->parent) == L_BLACK_NODE && - node_color(sibling(n)) == L_BLACK_NODE && - node_color(sibling(n)->left) == L_BLACK_NODE && - node_color(sibling(n)->right) == L_BLACK_NODE) { - sibling(n)->color = L_RED_NODE; - delete_case1(t, n->parent); - } else { - delete_case4(t, n); - } -} - -static void delete_case4(L_RBTREE *t, node *n) { - if (node_color(n->parent) == L_RED_NODE && - node_color(sibling(n)) == L_BLACK_NODE && - node_color(sibling(n)->left) == L_BLACK_NODE && - node_color(sibling(n)->right) == L_BLACK_NODE) { - sibling(n)->color = L_RED_NODE; - n->parent->color = L_BLACK_NODE; - } else { - delete_case5(t, n); - } -} - -static void delete_case5(L_RBTREE *t, node *n) { - if (n == n->parent->left && - node_color(sibling(n)) == L_BLACK_NODE && - node_color(sibling(n)->left) == L_RED_NODE && - node_color(sibling(n)->right) == L_BLACK_NODE) { - sibling(n)->color = L_RED_NODE; - sibling(n)->left->color = L_BLACK_NODE; - rotate_right(t, sibling(n)); - } else if (n == n->parent->right && - node_color(sibling(n)) == L_BLACK_NODE && - node_color(sibling(n)->right) == L_RED_NODE && - node_color(sibling(n)->left) == L_BLACK_NODE) { - sibling(n)->color = L_RED_NODE; - sibling(n)->right->color = L_BLACK_NODE; - rotate_left(t, sibling(n)); - } - delete_case6(t, n); -} - -static void delete_case6(L_RBTREE *t, node *n) { - sibling(n)->color = node_color(n->parent); - n->parent->color = L_BLACK_NODE; - if (n == n->parent->left) { - if (node_color(sibling(n)->right) != L_RED_NODE) { - L_ERROR("right sibling is not RED", "delete_case6"); - return; - } - sibling(n)->right->color = L_BLACK_NODE; - rotate_left(t, n->parent); - } else { - if (node_color(sibling(n)->left) != L_RED_NODE) { - L_ERROR("left sibling is not RED", "delete_case6"); - return; - } - sibling(n)->left->color = L_BLACK_NODE; - rotate_right(t, n->parent); - } -} - - -/* ------------------------------------------------------------- * - * Debugging: verify if tree is valid * - * ------------------------------------------------------------- */ -#if VERIFY_RBTREE -static void verify_property_1(node *root); -static void verify_property_2(node *root); -static void verify_property_4(node *root); -static void verify_property_5(node *root); -static void verify_property_5_helper(node *n, int black_count, - int* black_count_path); -#endif - -static void verify_properties(L_RBTREE *t) { -#if VERIFY_RBTREE - verify_property_1(t->root); - verify_property_2(t->root); - /* Property 3 is implicit */ - verify_property_4(t->root); - verify_property_5(t->root); -#endif -} - -#if VERIFY_RBTREE -static void verify_property_1(node *n) { - if (node_color(n) != L_RED_NODE && node_color(n) != L_BLACK_NODE) { - L_ERROR("color neither RED nor BLACK\n", "verify_property_1"); - return; - } - if (n == NULL) return; - verify_property_1(n->left); - verify_property_1(n->right); -} - -static void verify_property_2(node *root) { - if (node_color(root) != L_BLACK_NODE) - L_ERROR("root is not black!\n", "verify_property_2"); -} - -static void verify_property_4(node *n) { - if (node_color(n) == L_RED_NODE) { - if (node_color(n->left) != L_BLACK_NODE || - node_color(n->right) != L_BLACK_NODE || - node_color(n->parent) != L_BLACK_NODE) { - L_ERROR("children & parent not all BLACK", "verify_property_4"); - return; - } - } - if (n == NULL) return; - verify_property_4(n->left); - verify_property_4(n->right); -} - -static void verify_property_5(node *root) { - int black_count_path = -1; - verify_property_5_helper(root, 0, &black_count_path); -} - -static void verify_property_5_helper(node *n, int black_count, - int* path_black_count) { - if (node_color(n) == L_BLACK_NODE) { - black_count++; - } - if (n == NULL) { - if (*path_black_count == -1) { - *path_black_count = black_count; - } else if (*path_black_count != black_count) { - L_ERROR("incorrect black count", "verify_property_5_helper"); - } - return; - } - verify_property_5_helper(n->left, black_count, path_black_count); - verify_property_5_helper(n->right, black_count, path_black_count); -} -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rbtree.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rbtree.h deleted file mode 100644 index 6977d336..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rbtree.h +++ /dev/null @@ -1,91 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/* - * Modified from the excellent code here: - * http://en.literateprograms.org/Red-black_tree_(C)?oldid=19567 - * which has been placed in the public domain under the Creative Commons - * CC0 1.0 waiver (http://creativecommons.org/publicdomain/zero/1.0/). - * - * When the key is generated from a hash (e.g., string --> uint64), - * there is always the possibility of having collisions, but to make - * the collision probability very low requires using a large hash. - * For that reason, the key types are 64 bit quantities, which will result - * in a negligible probabililty of collisions for millions of hashed values. - * Using 8 byte keys instead of 4 byte keys requires a little more - * storage, but the simplification in being able to ignore collisions - * with the red-black trees for most applications is worth it. - */ - -#ifndef LEPTONICA_RBTREE_H -#define LEPTONICA_RBTREE_H - - /*! The three valid key types for red-black trees, maps and sets. */ -/*! RBTree Key Type */ -enum { - L_INT_TYPE = 1, - L_UINT_TYPE = 2, - L_FLOAT_TYPE = 3 -}; - - /*! - * Storage for keys and values for red-black trees, maps and sets. - *
-     * Note:
-     *   (1) Keys and values of the valid key types are all 64-bit
-     *   (2) (void *) can be used for values but not for keys.
-     * 
- */ -union Rb_Type { - l_int64 itype; - l_uint64 utype; - l_float64 ftype; - void *ptype; -}; -typedef union Rb_Type RB_TYPE; - -struct L_Rbtree { - struct L_Rbtree_Node *root; - l_int32 keytype; -}; -typedef struct L_Rbtree L_RBTREE; -typedef struct L_Rbtree L_AMAP; /* hide underlying implementation for map */ -typedef struct L_Rbtree L_ASET; /* hide underlying implementation for set */ - -struct L_Rbtree_Node { - union Rb_Type key; - union Rb_Type value; - struct L_Rbtree_Node *left; - struct L_Rbtree_Node *right; - struct L_Rbtree_Node *parent; - l_int32 color; -}; -typedef struct L_Rbtree_Node L_RBTREE_NODE; -typedef struct L_Rbtree_Node L_AMAP_NODE; /* hide tree implementation */ -typedef struct L_Rbtree_Node L_ASET_NODE; /* hide tree implementation */ - - -#endif /* LEPTONICA_RBTREE_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/readbarcode.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/readbarcode.c deleted file mode 100644 index 83b0cdef..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/readbarcode.c +++ /dev/null @@ -1,1498 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file readbarcode.c - *
- *
- *      Basic operations to locate and identify the line widths
- *      in 1D barcodes.
- *
- *      Top level
- *          SARRAY          *pixProcessBarcodes()
- *
- *      Next levels
- *          PIXA            *pixExtractBarcodes()
- *          SARRAY          *pixReadBarcodes()
- *          l_int32          pixReadBarcodeWidths()
- *
- *      Location
- *          BOXA            *pixLocateBarcodes()
- *          static PIX      *pixGenerateBarcodeMask()
- *
- *      Extraction and deskew
- *          PIXA            *pixDeskewBarcodes()
- *
- *      Process to get line widths
- *          NUMA            *pixExtractBarcodeWidths1()
- *          NUMA            *pixExtractBarcodeWidths2()
- *          NUMA            *pixExtractBarcodeCrossings()
- *
- *      Average adjacent rasters
- *          static NUMA     *pixAverageRasterScans()
- *
- *      Signal processing for barcode widths
- *          NUMA            *numaQuantizeCrossingsByWidth()
- *          static l_int32   numaGetCrossingDistances()
- *          static NUMA     *numaLocatePeakRanges()
- *          static NUMA     *numaGetPeakCentroids()
- *          static NUMA     *numaGetPeakWidthLUT()
- *          NUMA            *numaQuantizeCrossingsByWindow()
- *          static l_int32   numaEvalBestWidthAndShift()
- *          static l_int32   numaEvalSyncError()
- *
- *
- *  NOTE CAREFULLY: This is "early beta" code.  It has not been tuned
- *  to work robustly on a large database of barcode images.  I'm putting
- *  it out so that people can play with it, find out how it breaks, and
- *  contribute decoders for other barcode formats.  Both the functional
- *  interfaces and ABI will almost certainly change in the coming
- *  few months.  The actual decoder, in bardecode.c, at present only
- *  works on the following codes: Code I2of5, Code 2of5, Code 39, Code 93
- *  Codabar and UPCA.  To add another barcode format, it is necessary
- *  to make changes in readbarcode.h and bardecode.c.
- *  The program prog/barcodetest shows how to run from the top level
- *  (image --> decoded data).
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" -#include "readbarcode.h" - - /* Parameters for pixGenerateBarcodeMask() */ -static const l_int32 MAX_SPACE_WIDTH = 19; /* was 15 */ -static const l_int32 MAX_NOISE_WIDTH = 50; /* smaller than barcode width */ -static const l_int32 MAX_NOISE_HEIGHT = 30; /* smaller than barcode height */ - - /* Static functions */ -static PIX *pixGenerateBarcodeMask(PIX *pixs, l_int32 maxspace, - l_int32 nwidth, l_int32 nheight); -static NUMA *pixAverageRasterScans(PIX *pixs, l_int32 nscans); -static l_int32 numaGetCrossingDistances(NUMA *nas, NUMA **pnaedist, - NUMA **pnaodist, l_float32 *pmindist, - l_float32 *pmaxdist); -static NUMA *numaLocatePeakRanges(NUMA *nas, l_float32 minfirst, - l_float32 minsep, l_float32 maxmin); -static NUMA *numaGetPeakCentroids(NUMA *nahist, NUMA *narange); -static NUMA *numaGetPeakWidthLUT(NUMA *narange, NUMA *nacent); -static l_int32 numaEvalBestWidthAndShift(NUMA *nas, l_int32 nwidth, - l_int32 nshift, l_float32 minwidth, - l_float32 maxwidth, - l_float32 *pbestwidth, - l_float32 *pbestshift, - l_float32 *pbestscore); -static l_int32 numaEvalSyncError(NUMA *nas, l_int32 ifirst, l_int32 ilast, - l_float32 width, l_float32 shift, - l_float32 *pscore, NUMA **pnad); - - -#ifndef NO_CONSOLE_IO -#define DEBUG_DESKEW 1 -#define DEBUG_WIDTHS 0 -#endif /* ~NO_CONSOLE_IO */ - - -/*------------------------------------------------------------------------* - * Top level * - *------------------------------------------------------------------------*/ -/*! - * \brief pixProcessBarcodes() - * - * \param[in] pixs any depth - * \param[in] format L_BF_ANY, L_BF_CODEI2OF5, L_BF_CODE93, ... - * \param[in] method L_USE_WIDTHS, L_USE_WINDOWS - * \param[out] psaw [optional] sarray of bar widths - * \param[in] debugflag use 1 to generate debug output - * \return sarray text of barcodes, or NULL if none found or on error - */ -SARRAY * -pixProcessBarcodes(PIX *pixs, - l_int32 format, - l_int32 method, - SARRAY **psaw, - l_int32 debugflag) -{ -PIX *pixg; -PIXA *pixa; -SARRAY *sad; - - PROCNAME("pixProcessBarcodes"); - - if (psaw) *psaw = NULL; - if (!pixs) - return (SARRAY *)ERROR_PTR("pixs not defined", procName, NULL); - if (format != L_BF_ANY && !barcodeFormatIsSupported(format)) - return (SARRAY *)ERROR_PTR("unsupported format", procName, NULL); - if (method != L_USE_WIDTHS && method != L_USE_WINDOWS) - return (SARRAY *)ERROR_PTR("invalid method", procName, NULL); - - /* Get an 8 bpp image, no cmap */ - if (pixGetDepth(pixs) == 8 && !pixGetColormap(pixs)) - pixg = pixClone(pixs); - else - pixg = pixConvertTo8(pixs, 0); - - if ((pixa = pixExtractBarcodes(pixg, debugflag)) == NULL) { - pixDestroy(&pixg); - return (SARRAY *)ERROR_PTR("no barcode(s) found", procName, NULL); - } - - sad = pixReadBarcodes(pixa, format, method, psaw, debugflag); - - pixDestroy(&pixg); - pixaDestroy(&pixa); - return sad; -} - - -/*! - * \brief pixExtractBarcodes() - * - * \param[in] pixs 8 bpp, no colormap - * \param[in] debugflag use 1 to generate debug output - * \return pixa deskewed and cropped barcodes, or NULL if none found - * or on error - */ -PIXA * -pixExtractBarcodes(PIX *pixs, - l_int32 debugflag) -{ -l_int32 i, n; -l_float32 angle, conf; -BOX *box; -BOXA *boxa; -PIX *pixb, *pixm, *pixt; -PIXA *pixa; - - PROCNAME("pixExtractBarcodes"); - - if (!pixs || pixGetDepth(pixs) != 8 || pixGetColormap(pixs)) - return (PIXA *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - - /* Locate them; use small threshold for edges. */ - boxa = pixLocateBarcodes(pixs, 20, &pixb, &pixm); - n = boxaGetCount(boxa); - L_INFO("%d possible barcode(s) found\n", procName, n); - if (n == 0) { - boxaDestroy(&boxa); - pixDestroy(&pixb); - pixDestroy(&pixm); - return NULL; - } - - if (debugflag) { - boxaWriteStderr(boxa); - pixDisplay(pixb, 100, 100); - pixDisplay(pixm, 800, 100); - } - - /* Deskew each barcode individually */ - pixa = pixaCreate(n); - for (i = 0; i < n; i++) { - box = boxaGetBox(boxa, i, L_CLONE); - pixt = pixDeskewBarcode(pixs, pixb, box, 15, 20, &angle, &conf); - L_INFO("angle = %6.2f, conf = %6.2f\n", procName, angle, conf); - if (conf > 5.0) { - pixaAddPix(pixa, pixt, L_INSERT); - pixaAddBox(pixa, box, L_INSERT); - } else { - pixDestroy(&pixt); - boxDestroy(&box); - } - } - -#if DEBUG_DESKEW - pixt = pixaDisplayTiledInRows(pixa, 8, 1000, 1.0, 0, 30, 2); - pixWrite("junkpixt", pixt, IFF_PNG); - pixDestroy(&pixt); -#endif /* DEBUG_DESKEW */ - - pixDestroy(&pixb); - pixDestroy(&pixm); - boxaDestroy(&boxa); - return pixa; -} - - -/*! - * \brief pixReadBarcodes() - * - * \param[in] pixa of 8 bpp deskewed and cropped barcodes - * \param[in] format L_BF_ANY, L_BF_CODEI2OF5, L_BF_CODE93, ... - * \param[in] method L_USE_WIDTHS, L_USE_WINDOWS; - * \param[out] psaw [optional] sarray of bar widths - * \param[in] debugflag use 1 to generate debug output - * \return sa sarray of widths, one string for each barcode found, - * or NULL on error - */ -SARRAY * -pixReadBarcodes(PIXA *pixa, - l_int32 format, - l_int32 method, - SARRAY **psaw, - l_int32 debugflag) -{ -char *barstr, *data; -char emptystring[] = ""; -l_int32 i, j, n, nbars, ival; -NUMA *na; -PIX *pixt; -SARRAY *saw, *sad; - - PROCNAME("pixReadBarcodes"); - - if (psaw) *psaw = NULL; - if (!pixa) - return (SARRAY *)ERROR_PTR("pixa not defined", procName, NULL); - if (format != L_BF_ANY && !barcodeFormatIsSupported(format)) - return (SARRAY *)ERROR_PTR("unsupported format", procName, NULL); - if (method != L_USE_WIDTHS && method != L_USE_WINDOWS) - return (SARRAY *)ERROR_PTR("invalid method", procName, NULL); - - n = pixaGetCount(pixa); - saw = sarrayCreate(n); - sad = sarrayCreate(n); - for (i = 0; i < n; i++) { - /* Extract the widths of the lines in each barcode */ - pixt = pixaGetPix(pixa, i, L_CLONE); - na = pixReadBarcodeWidths(pixt, method, debugflag); - pixDestroy(&pixt); - if (!na) { - ERROR_INT("valid barcode widths not returned", procName, 1); - continue; - } - - /* Save the widths as a string */ - nbars = numaGetCount(na); - barstr = (char *)LEPT_CALLOC(nbars + 1, sizeof(char)); - for (j = 0; j < nbars; j++) { - numaGetIValue(na, j, &ival); - barstr[j] = 0x30 + ival; - } - sarrayAddString(saw, barstr, L_INSERT); - numaDestroy(&na); - - /* Decode the width strings */ - data = barcodeDispatchDecoder(barstr, format, debugflag); - if (!data) { - ERROR_INT("barcode not decoded", procName, 1); - sarrayAddString(sad, emptystring, L_COPY); - continue; - } - sarrayAddString(sad, data, L_INSERT); - } - - /* If nothing found, clean up */ - if (sarrayGetCount(saw) == 0) { - sarrayDestroy(&saw); - sarrayDestroy(&sad); - return (SARRAY *)ERROR_PTR("no valid barcode data", procName, NULL); - } - - if (psaw) - *psaw = saw; - else - sarrayDestroy(&saw); - - return sad; -} - - -/*! - * \brief pixReadBarcodeWidths() - * - * \param[in] pixs of 8 bpp deskewed and cropped barcode - * \param[in] method L_USE_WIDTHS, L_USE_WINDOWS; - * \param[in] debugflag use 1 to generate debug output - * \return na numa of widths (each in set {1,2,3,4}, or NULL on error - */ -NUMA * -pixReadBarcodeWidths(PIX *pixs, - l_int32 method, - l_int32 debugflag) -{ -l_float32 winwidth; -NUMA *na; - - PROCNAME("pixReadBarcodeWidths"); - - if (!pixs) - return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (NUMA *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - if (method != L_USE_WIDTHS && method != L_USE_WINDOWS) - return (NUMA *)ERROR_PTR("invalid method", procName, NULL); - - /* Extract the widths of the lines in each barcode */ - if (method == L_USE_WIDTHS) - na = pixExtractBarcodeWidths1(pixs, 120, 0.25, NULL, NULL, - debugflag); - else /* method == L_USE_WINDOWS */ - na = pixExtractBarcodeWidths2(pixs, 120, &winwidth, - NULL, debugflag); -#if DEBUG_WIDTHS - if (method == L_USE_WINDOWS) - lept_stderr("Window width for barcode: %7.3f\n", winwidth); - numaWriteStderr(na); -#endif /* DEBUG_WIDTHS */ - - if (!na) - return (NUMA *)ERROR_PTR("barcode widths invalid", procName, NULL); - - return na; -} - - -/*------------------------------------------------------------------------* - * Locate barcode in image * - *------------------------------------------------------------------------*/ -/*! - * \brief pixLocateBarcodes() - * - * \param[in] pixs any depth - * \param[in] thresh for binarization of edge filter output; typ. 20 - * \param[out] ppixb [optional] binarized edge filtered input image - * \param[out] ppixm [optional] mask over barcodes - * \return boxa location of barcodes, or NULL if none found or on error - */ -BOXA * -pixLocateBarcodes(PIX *pixs, - l_int32 thresh, - PIX **ppixb, - PIX **ppixm) -{ -BOXA *boxa; -PIX *pix8, *pixe, *pixb, *pixm; - - PROCNAME("pixLocateBarcodes"); - - if (!pixs) - return (BOXA *)ERROR_PTR("pixs not defined", procName, NULL); - - /* Get an 8 bpp image, no cmap */ - if (pixGetDepth(pixs) == 8 && !pixGetColormap(pixs)) - pix8 = pixClone(pixs); - else - pix8 = pixConvertTo8(pixs, 0); - - /* Get a 1 bpp image of the edges */ - pixe = pixSobelEdgeFilter(pix8, L_ALL_EDGES); - pixb = pixThresholdToBinary(pixe, thresh); - pixInvert(pixb, pixb); - pixDestroy(&pix8); - pixDestroy(&pixe); - - pixm = pixGenerateBarcodeMask(pixb, MAX_SPACE_WIDTH, MAX_NOISE_WIDTH, - MAX_NOISE_HEIGHT); - boxa = pixConnComp(pixm, NULL, 8); - - if (ppixb) - *ppixb = pixb; - else - pixDestroy(&pixb); - if (ppixm) - *ppixm = pixm; - else - pixDestroy(&pixm); - - return boxa; -} - - -/*! - * \brief pixGenerateBarcodeMask() - * - * \param[in] pixs 1 bpp - * \param[in] maxspace largest space in the barcode, in pixels - * \param[in] nwidth opening 'width' to remove noise - * \param[in] nheight opening 'height' to remove noise - * \return pixm mask over barcodes, or NULL if none found or on error - * - *
- * Notes:
- *      (1) For noise removal, 'width' and 'height' are referred to the
- *          barcode orientation.
- *      (2) If there is skew, the mask will not cover the barcode corners.
- * 
- */ -static PIX * -pixGenerateBarcodeMask(PIX *pixs, - l_int32 maxspace, - l_int32 nwidth, - l_int32 nheight) -{ -PIX *pixt1, *pixt2, *pixd; - - PROCNAME("pixGenerateBarcodeMask"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - /* Identify horizontal barcodes */ - pixt1 = pixCloseBrick(NULL, pixs, maxspace + 1, 1); - pixt2 = pixOpenBrick(NULL, pixs, maxspace + 1, 1); - pixXor(pixt2, pixt2, pixt1); - pixOpenBrick(pixt2, pixt2, nwidth, nheight); - pixDestroy(&pixt1); - - /* Identify vertical barcodes */ - pixt1 = pixCloseBrick(NULL, pixs, 1, maxspace + 1); - pixd = pixOpenBrick(NULL, pixs, 1, maxspace + 1); - pixXor(pixd, pixd, pixt1); - pixOpenBrick(pixd, pixd, nheight, nwidth); - pixDestroy(&pixt1); - - /* Combine to get all barcodes */ - pixOr(pixd, pixd, pixt2); - pixDestroy(&pixt2); - - return pixd; -} - - -/*------------------------------------------------------------------------* - * Extract and deskew barcode * - *------------------------------------------------------------------------*/ -/*! - * \brief pixDeskewBarcode() - * - * \param[in] pixs input image; 8 bpp - * \param[in] pixb binarized edge-filtered input image - * \param[in] box identified region containing barcode - * \param[in] margin of extra pixels around box to extract - * \param[in] threshold for binarization; ~20 - * \param[out] pangle [optional] in degrees, clockwise is positive - * \param[out] pconf [optional] confidence - * \return pixd deskewed barcode, or NULL on error - * - *
- * Notes:
- *     (1) The (optional) angle returned is the angle in degrees (cw positive)
- *         necessary to rotate the image so that it is deskewed.
- * 
- */ -PIX * -pixDeskewBarcode(PIX *pixs, - PIX *pixb, - BOX *box, - l_int32 margin, - l_int32 threshold, - l_float32 *pangle, - l_float32 *pconf) -{ -l_int32 x, y, w, h, n; -l_float32 angle, angle1, angle2, conf, conf1, conf2, score1, score2, deg2rad; -BOX *boxe, *boxt; -BOXA *boxa, *boxat; -PIX *pixt1, *pixt2, *pixt3, *pixt4, *pixt5, *pixt6, *pixd; - - PROCNAME("pixDeskewBarcode"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - if (!pixb || pixGetDepth(pixb) != 1) - return (PIX *)ERROR_PTR("pixb undefined or not 1 bpp", procName, NULL); - if (!box) - return (PIX *)ERROR_PTR("box not defined or 1 bpp", procName, NULL); - - /* Clip out */ - deg2rad = 3.1415926535 / 180.; - boxGetGeometry(box, &x, &y, &w, &h); - boxe = boxCreate(x - 25, y - 25, w + 51, h + 51); - pixt1 = pixClipRectangle(pixb, boxe, NULL); - pixt2 = pixClipRectangle(pixs, boxe, NULL); - boxDestroy(&boxe); - - /* Deskew, looking at all possible orientations over 180 degrees */ - pixt3 = pixRotateOrth(pixt1, 1); /* look for vertical bar lines */ - pixt4 = pixClone(pixt1); /* look for horizontal bar lines */ - pixFindSkewSweepAndSearchScore(pixt3, &angle1, &conf1, &score1, - 1, 1, 0.0, 45.0, 2.5, 0.01); - pixFindSkewSweepAndSearchScore(pixt4, &angle2, &conf2, &score2, - 1, 1, 0.0, 45.0, 2.5, 0.01); - - /* Because we're using the boundary pixels of the barcodes, - * the peak can be sharper (and the confidence ratio higher) - * from the signal across the top and bottom of the barcode. - * However, the max score, which is the magnitude of the signal - * at the optimum skew angle, will be smaller, so we use the - * max score as the primary indicator of orientation. */ - if (score1 >= score2) { - conf = conf1; - if (conf1 > 6.0 && L_ABS(angle1) > 0.1) { - angle = angle1; - pixt5 = pixRotate(pixt2, deg2rad * angle1, L_ROTATE_AREA_MAP, - L_BRING_IN_WHITE, 0, 0); - } else { - angle = 0.0; - pixt5 = pixClone(pixt2); - } - } else { /* score2 > score1 */ - conf = conf2; - pixt6 = pixRotateOrth(pixt2, 1); - if (conf2 > 6.0 && L_ABS(angle2) > 0.1) { - angle = 90.0 + angle2; - pixt5 = pixRotate(pixt6, deg2rad * angle2, L_ROTATE_AREA_MAP, - L_BRING_IN_WHITE, 0, 0); - } else { - angle = 90.0; - pixt5 = pixClone(pixt6); - } - pixDestroy(&pixt6); - } - pixDestroy(&pixt3); - pixDestroy(&pixt4); - - /* Extract barcode plus a margin around it */ - boxa = pixLocateBarcodes(pixt5, threshold, 0, 0); - if ((n = boxaGetCount(boxa)) != 1) { - L_WARNING("barcode mask in %d components\n", procName, n); - boxat = boxaSort(boxa, L_SORT_BY_AREA, L_SORT_DECREASING, NULL); - } else { - boxat = boxaCopy(boxa, L_CLONE); - } - boxt = boxaGetBox(boxat, 0, L_CLONE); - boxGetGeometry(boxt, &x, &y, &w, &h); - boxe = boxCreate(x - margin, y - margin, w + 2 * margin, - h + 2 * margin); - pixd = pixClipRectangle(pixt5, boxe, NULL); - boxDestroy(&boxt); - boxDestroy(&boxe); - boxaDestroy(&boxa); - boxaDestroy(&boxat); - - if (pangle) *pangle = angle; - if (pconf) *pconf = conf; - - pixDestroy(&pixt1); - pixDestroy(&pixt2); - pixDestroy(&pixt5); - return pixd; -} - - -/*------------------------------------------------------------------------* - * Process to get line widths * - *------------------------------------------------------------------------*/ -/*! - * \brief pixExtractBarcodeWidths1() - * - * \param[in] pixs input image; 8 bpp - * \param[in] thresh estimated pixel threshold for crossing - * white <--> black; typ. ~120 - * \param[in] binfract histo binsize as a fraction of minsize; e.g., 0.25 - * \param[out] pnaehist [optional] histogram of black widths; NULL ok - * \param[out] pnaohist [optional] histogram of white widths; NULL ok - * \param[in] debugflag use 1 to generate debug output - * \return nad numa of barcode widths in encoded integer units, - * or NULL on error - * - *
- * Notes:
- *     (1) The widths are alternating black/white, starting with black
- *         and ending with black.
- *     (2) This method uses the widths of the bars directly, in terms
- *         of the (float) number of pixels between transitions.
- *         The histograms of these widths for black and white bars is
- *         generated and interpreted.
- * 
- */ -NUMA * -pixExtractBarcodeWidths1(PIX *pixs, - l_float32 thresh, - l_float32 binfract, - NUMA **pnaehist, - NUMA **pnaohist, - l_int32 debugflag) -{ -NUMA *nac, *nad; - - PROCNAME("pixExtractBarcodeWidths1"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (NUMA *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - - /* Get the best estimate of the crossings, in pixel units */ - nac = pixExtractBarcodeCrossings(pixs, thresh, debugflag); - - /* Get the array of bar widths, starting with a black bar */ - nad = numaQuantizeCrossingsByWidth(nac, binfract, pnaehist, - pnaohist, debugflag); - - numaDestroy(&nac); - return nad; -} - - -/*! - * \brief pixExtractBarcodeWidths2() - * - * \param[in] pixs input image; 8 bpp - * \param[in] thresh estimated pixel threshold for crossing - * white <--> black; typ. ~120 - * \param[out] pwidth [optional] best decoding window width, in pixels - * \param[out] pnac [optional] number of transitions in each window - * \param[in] debugflag use 1 to generate debug output - * \return nad numa of barcode widths in encoded integer units, - * or NULL on error - * - *
- * Notes:
- *      (1) The widths are alternating black/white, starting with black
- *          and ending with black.
- *      (2) The optional best decoding window width is the width of the window
- *          that is used to make a decision about whether a transition occurs.
- *          It is approximately the average width in pixels of the narrowest
- *          white and black bars (i.e., those corresponding to unit width).
- *      (3) The optional return signal %nac is a sequence of 0s, 1s,
- *          and perhaps a few 2s, giving the number of crossings in each window.
- *          On the occasion where there is a '2', it is interpreted as
- *          as ending two runs: the previous one and another one that has length 1.
- * 
- */ -NUMA * -pixExtractBarcodeWidths2(PIX *pixs, - l_float32 thresh, - l_float32 *pwidth, - NUMA **pnac, - l_int32 debugflag) -{ -NUMA *nacp, *nad; - - PROCNAME("pixExtractBarcodeWidths2"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (NUMA *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - - /* Get the best estimate of the crossings, in pixel units */ - nacp = pixExtractBarcodeCrossings(pixs, thresh, debugflag); - - /* Quantize the crossings to get actual windowed data */ - nad = numaQuantizeCrossingsByWindow(nacp, 2.0, pwidth, NULL, pnac, debugflag); - - numaDestroy(&nacp); - return nad; -} - - -/*! - * \brief pixExtractBarcodeCrossings() - * - * \param[in] pixs input image; 8 bpp - * \param[in] thresh estimated pixel threshold for crossing - * white <--> black; typ. ~120 - * \param[in] debugflag use 1 to generate debug output - * \return numa of crossings, in pixel units, or NULL on error - */ -NUMA * -pixExtractBarcodeCrossings(PIX *pixs, - l_float32 thresh, - l_int32 debugflag) -{ -l_int32 w; -l_float32 bestthresh; -NUMA *nas, *nax, *nay, *nad; - - PROCNAME("pixExtractBarcodeCrossings"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (NUMA *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - - /* Scan pixels horizontally and average results */ - nas = pixAverageRasterScans(pixs, 51); - - /* Interpolate to get 4x the number of values */ - w = pixGetWidth(pixs); - numaInterpolateEqxInterval(0.0, 1.0, nas, L_QUADRATIC_INTERP, 0.0, - (l_float32)(w - 1), 4 * w + 1, &nax, &nay); - - if (debugflag) { - lept_mkdir("lept/barcode"); - GPLOT *gplot = gplotCreate("/tmp/lept/barcode/signal", GPLOT_PNG, - "Pixel values", "dist in pixels", "value"); - gplotAddPlot(gplot, nax, nay, GPLOT_LINES, "plot 1"); - gplotMakeOutput(gplot); - gplotDestroy(&gplot); - } - - /* Locate the crossings. Run multiple times with different - * thresholds, and choose a threshold in the center of the - * run of thresholds that all give the maximum number of crossings. */ - numaSelectCrossingThreshold(nax, nay, thresh, &bestthresh); - - /* Get the crossings with the best threshold. */ - nad = numaCrossingsByThreshold(nax, nay, bestthresh); - - numaDestroy(&nas); - numaDestroy(&nax); - numaDestroy(&nay); - return nad; -} - - -/*------------------------------------------------------------------------* - * Average adjacent rasters * - *------------------------------------------------------------------------*/ -/*! - * \brief pixAverageRasterScans() - * - * \param[in] pixs input image; 8 bpp - * \param[in] nscans number of adjacent scans, about the center vertically - * \return numa of average pixel values across image, or NULL on error - */ -static NUMA * -pixAverageRasterScans(PIX *pixs, - l_int32 nscans) -{ -l_int32 w, h, first, last, i, j, wpl, val; -l_uint32 *line, *data; -l_float32 *array; -NUMA *nad; - - PROCNAME("pixAverageRasterScans"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (NUMA *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - if (nscans <= h) { - first = 0; - last = h - 1; - nscans = h; - } else { - first = (h - nscans) / 2; - last = first + nscans - 1; - } - - nad = numaCreate(w); - numaSetCount(nad, w); - array = numaGetFArray(nad, L_NOCOPY); - wpl = pixGetWpl(pixs); - data = pixGetData(pixs); - for (j = 0; j < w; j++) { - for (i = first; i <= last; i++) { - line = data + i * wpl; - val = GET_DATA_BYTE(line, j); - array[j] += val; - } - array[j] = array[j] / (l_float32)nscans; - } - - return nad; -} - - -/*------------------------------------------------------------------------* - * Signal processing for barcode widths * - *------------------------------------------------------------------------*/ -/*! - * \brief numaQuantizeCrossingsByWidth() - * - * \param[in] nas numa of crossing locations, in pixel units - * \param[in] binfract histo binsize as a fraction of minsize; e.g., 0.25 - * \param[out] pnaehist [optional] histo of even (black) bar widths - * \param[out] pnaohist [optional] histo of odd (white) bar widths - * \param[in] debugflag 1 to generate plots of histograms of bar widths - * \return nad sequence of widths, in unit sizes, or NULL on error - * - *
- * Notes:
- *      (1) This first computes the histogram of black and white bar widths,
- *          binned in appropriate units.  There should be well-defined
- *          peaks, each corresponding to a specific width.  The sequence
- *          of barcode widths (namely, the integers from the set {1,2,3,4})
- *          is returned.
- *      (2) The optional returned histograms are binned in width units
- *          that are inversely proportional to %binfract.  For example,
- *          if %binfract = 0.25, there are 4.0 bins in the distance of
- *          the width of the narrowest bar.
- * 
- */ -NUMA * -numaQuantizeCrossingsByWidth(NUMA *nas, - l_float32 binfract, - NUMA **pnaehist, - NUMA **pnaohist, - l_int32 debugflag) -{ -l_int32 i, n, ned, nod, iw, width; -l_float32 val, minsize, maxsize, factor; -GPLOT *gplot; -NUMA *naedist, *naodist, *naehist, *naohist, *naecent, *naocent; -NUMA *naerange, *naorange, *naelut, *naolut, *nad; - - PROCNAME("numaQuantizeCrossingsByWidth"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - n = numaGetCount(nas); - if (n < 2) - return (NUMA *)ERROR_PTR("n < 2", procName, NULL); - if (binfract <= 0.0) - return (NUMA *)ERROR_PTR("binfract <= 0.0", procName, NULL); - - /* Get even and odd crossing distances */ - numaGetCrossingDistances(nas, &naedist, &naodist, &minsize, &maxsize); - - /* Bin the spans in units of binfract * minsize. These - * units are convenient because they scale to make at least - * 1/binfract bins in the smallest span (width). We want this - * number to be large enough to clearly separate the - * widths, but small enough so that the histogram peaks - * have very few if any holes (zeroes) within them. */ - naehist = numaMakeHistogramClipped(naedist, binfract * minsize, - (1.25 / binfract) * maxsize); - naohist = numaMakeHistogramClipped(naodist, binfract * minsize, - (1.25 / binfract) * maxsize); - - if (debugflag) { - lept_mkdir("lept/barcode"); - gplot = gplotCreate("/tmp/lept/barcode/histw", GPLOT_PNG, - "Raw width histogram", "Width", "Number"); - gplotAddPlot(gplot, NULL, naehist, GPLOT_LINES, "plot black"); - gplotAddPlot(gplot, NULL, naohist, GPLOT_LINES, "plot white"); - gplotMakeOutput(gplot); - gplotDestroy(&gplot); - } - - /* Compute the peak ranges, still in units of binfract * minsize. */ - naerange = numaLocatePeakRanges(naehist, 1.0 / binfract, - 1.0 / binfract, 0.0); - naorange = numaLocatePeakRanges(naohist, 1.0 / binfract, - 1.0 / binfract, 0.0); - - /* Find the centroid values of each peak */ - naecent = numaGetPeakCentroids(naehist, naerange); - naocent = numaGetPeakCentroids(naohist, naorange); - - /* Generate the lookup tables that map from the bar width, in - * units of (binfract * minsize), to the integerized barcode - * units (1, 2, 3, 4), which are the output integer widths - * between transitions. */ - naelut = numaGetPeakWidthLUT(naerange, naecent); - naolut = numaGetPeakWidthLUT(naorange, naocent); - - /* Get the widths. Because the LUT accepts our funny units, - * we first must convert the pixel widths to these units, - * which is what 'factor' does. */ - nad = numaCreate(0); - ned = numaGetCount(naedist); - nod = numaGetCount(naodist); - if (nod != ned - 1) - L_WARNING("ned != nod + 1\n", procName); - factor = 1.0 / (binfract * minsize); /* for converting units */ - for (i = 0; i < ned - 1; i++) { - numaGetFValue(naedist, i, &val); - width = (l_int32)(factor * val); - numaGetIValue(naelut, width, &iw); - numaAddNumber(nad, iw); -/* lept_stderr("even: val = %7.3f, width = %d, iw = %d\n", - val, width, iw); */ - numaGetFValue(naodist, i, &val); - width = (l_int32)(factor * val); - numaGetIValue(naolut, width, &iw); - numaAddNumber(nad, iw); -/* lept_stderr("odd: val = %7.3f, width = %d, iw = %d\n", - val, width, iw); */ - } - numaGetFValue(naedist, ned - 1, &val); - width = (l_int32)(factor * val); - numaGetIValue(naelut, width, &iw); - numaAddNumber(nad, iw); - - if (debugflag) { - lept_stderr(" ---- Black bar widths (pixels) ------ \n"); - numaWriteStderr(naedist); - lept_stderr(" ---- Histogram of black bar widths ------ \n"); - numaWriteStderr(naehist); - lept_stderr(" ---- Peak ranges in black bar histogram bins --- \n"); - numaWriteStderr(naerange); - lept_stderr(" ---- Peak black bar centroid width values ------ \n"); - numaWriteStderr(naecent); - lept_stderr(" ---- Black bar lookup table ------ \n"); - numaWriteStderr(naelut); - lept_stderr(" ---- White bar widths (pixels) ------ \n"); - numaWriteStderr(naodist); - lept_stderr(" ---- Histogram of white bar widths ------ \n"); - numaWriteStderr(naohist); - lept_stderr(" ---- Peak ranges in white bar histogram bins --- \n"); - numaWriteStderr(naorange); - lept_stderr(" ---- Peak white bar centroid width values ------ \n"); - numaWriteStderr(naocent); - lept_stderr(" ---- White bar lookup table ------ \n"); - numaWriteStderr(naolut); - } - - numaDestroy(&naedist); - numaDestroy(&naodist); - numaDestroy(&naerange); - numaDestroy(&naorange); - numaDestroy(&naecent); - numaDestroy(&naocent); - numaDestroy(&naelut); - numaDestroy(&naolut); - if (pnaehist) - *pnaehist = naehist; - else - numaDestroy(&naehist); - if (pnaohist) - *pnaohist = naohist; - else - numaDestroy(&naohist); - return nad; -} - - -/*! - * \brief numaGetCrossingDistances() - * - * \param[in] nas numa of crossing locations - * \param[out] pnaedist [optional] even distances between crossings - * \param[out] pnaodist [optional] odd distances between crossings - * \param[out] pmindist [optional] min distance between crossings - * \param[out] pmaxdist [optional] max distance between crossings - * \return 0 if OK, 1 on error - */ -static l_int32 -numaGetCrossingDistances(NUMA *nas, - NUMA **pnaedist, - NUMA **pnaodist, - l_float32 *pmindist, - l_float32 *pmaxdist) -{ -l_int32 i, n; -l_float32 val, newval, mindist, maxdist, dist; -NUMA *naedist, *naodist; - - PROCNAME("numaGetCrossingDistances"); - - if (pnaedist) *pnaedist = NULL; - if (pnaodist) *pnaodist = NULL; - if (pmindist) *pmindist = 0.0; - if (pmaxdist) *pmaxdist = 0.0; - if (!nas) - return ERROR_INT("nas not defined", procName, 1); - if ((n = numaGetCount(nas)) < 2) - return ERROR_INT("n < 2", procName, 1); - - /* Get numas of distances between crossings. Separate these - * into even (e.g., black) and odd (e.g., white) spans. - * For barcodes, the black spans are 0, 2, etc. These - * distances are in pixel units. */ - naedist = numaCreate(n / 2 + 1); - naodist = numaCreate(n / 2); - numaGetFValue(nas, 0, &val); - for (i = 1; i < n; i++) { - numaGetFValue(nas, i, &newval); - if (i % 2) - numaAddNumber(naedist, newval - val); - else - numaAddNumber(naodist, newval - val); - val = newval; - } - - /* The mindist and maxdist of the spans are in pixel units. */ - numaGetMin(naedist, &mindist, NULL); - numaGetMin(naodist, &dist, NULL); - mindist = L_MIN(dist, mindist); - numaGetMax(naedist, &maxdist, NULL); - numaGetMax(naodist, &dist, NULL); - maxdist = L_MAX(dist, maxdist); - L_INFO("mindist = %7.3f, maxdist = %7.3f\n", procName, mindist, maxdist); - - if (pnaedist) - *pnaedist = naedist; - else - numaDestroy(&naedist); - if (pnaodist) - *pnaodist = naodist; - else - numaDestroy(&naodist); - if (pmindist) *pmindist = mindist; - if (pmaxdist) *pmaxdist = maxdist; - return 0; -} - - -/*! - * \brief numaLocatePeakRanges() - * - * \param[in] nas numa of histogram of crossing widths - * \param[in] minfirst min location of center of first peak - * \param[in] minsep min separation between peak range centers - * \param[in] maxmin max allowed value for min histo value between peaks - * \return nad ranges for each peak found, in pairs, or NULL on error - * - *
- * Notes:
- *      (1) Units of %minsep are the index into nas.
- *          This puts useful constraints on peak-finding.
- *      (2) If maxmin == 0.0, the value of nas[i] must go to 0.0 (or less)
- *          between peaks.
- *      (3) All calculations are done in units of the index into nas.
- *          The resulting ranges are therefore integers.
- *      (4) The output nad gives pairs of range values for successive peaks.
- *          Any location [i] for which maxmin = nas[i] = 0.0 will NOT be
- *          included in a peak range.  This works fine for histograms where
- *          if nas[i] == 0.0, it means that there are no samples at [i].
- *      (5) For barcodes, when this is used on a histogram of barcode
- *          widths, use maxmin = 0.0.  This requires that there is at
- *          least one histogram bin corresponding to a width value between
- *          adjacent peak ranges that is unpopulated, making the separation
- *          of the histogram peaks unambiguous.
- * 
- */ -static NUMA * -numaLocatePeakRanges(NUMA *nas, - l_float32 minfirst, - l_float32 minsep, - l_float32 maxmin) -{ -l_int32 i, n, inpeak, left; -l_float32 center, prevcenter, val; -NUMA *nad; - - PROCNAME("numaLocatePeakRanges"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - n = numaGetCount(nas); - nad = numaCreate(0); - - inpeak = FALSE; - prevcenter = minfirst - minsep - 1.0; - for (i = 0; i < n; i++) { - numaGetFValue(nas, i, &val); - if (inpeak == FALSE && val > maxmin) { - inpeak = TRUE; - left = i; - } else if (inpeak == TRUE && val <= maxmin) { /* end peak */ - center = (left + i - 1.0) / 2.0; - if (center - prevcenter >= minsep) { /* save new peak */ - inpeak = FALSE; - numaAddNumber(nad, left); - numaAddNumber(nad, i - 1); - prevcenter = center; - } else { /* attach to previous peak; revise the right edge */ - numaSetValue(nad, numaGetCount(nad) - 1, i - 1); - } - } - } - if (inpeak == TRUE) { /* save the last peak */ - numaAddNumber(nad, left); - numaAddNumber(nad, n - 1); - } - - return nad; -} - - -/*! - * \brief numaGetPeakCentroids() - * - * \param[in] nahist numa of histogram of crossing widths - * \param[in] narange numa of ranges of x-values for the peaks in %nahist - * \return nad centroids for each peak found; max of 4, corresponding - * to 4 different barcode line widths, or NULL on error - */ -static NUMA * -numaGetPeakCentroids(NUMA *nahist, - NUMA *narange) -{ -l_int32 i, j, nr, low, high; -l_float32 cent, sum, val; -NUMA *nad; - - PROCNAME("numaGetPeakCentroids"); - - if (!nahist) - return (NUMA *)ERROR_PTR("nahist not defined", procName, NULL); - if (!narange) - return (NUMA *)ERROR_PTR("narange not defined", procName, NULL); - nr = numaGetCount(narange) / 2; - - nad = numaCreate(4); - for (i = 0; i < nr; i++) { - numaGetIValue(narange, 2 * i, &low); - numaGetIValue(narange, 2 * i + 1, &high); - cent = 0.0; - sum = 0.0; - for (j = low; j <= high; j++) { - numaGetFValue(nahist, j, &val); - cent += j * val; - sum += val; - } - numaAddNumber(nad, cent / sum); - } - - return nad; -} - - -/*! - * \brief numaGetPeakWidthLUT() - * - * \param[in] narange numa of x-val ranges for the histogram width peaks - * \param[in] nacent numa of centroids of each peak -- up to 4 - * \return nalut lookup table from the width of a bar to one of the four - * integerized barcode units, or NULL on error - * - *
- * Notes:
- *      (1) This generates the lookup table that maps from a sequence of widths
- *          (in some units) to the integerized barcode units (1, 2, 3, 4),
- *          which are the output integer widths between transitions.
- *      (2) The smallest width can be lost in float roundoff.  To avoid
- *          losing it, we expand the peak range of the smallest width.
- * 
- */ -static NUMA * -numaGetPeakWidthLUT(NUMA *narange, - NUMA *nacent) -{ -l_int32 i, j, nc, low, high, imax; -l_int32 assign[4]; -l_float32 *warray; -l_float32 max, rat21, rat32, rat42; -NUMA *nalut; - - PROCNAME("numaGetPeakWidthLUT"); - - if (!narange) - return (NUMA *)ERROR_PTR("narange not defined", procName, NULL); - if (!nacent) - return (NUMA *)ERROR_PTR("nacent not defined", procName, NULL); - nc = numaGetCount(nacent); /* half the size of narange */ - if (nc < 1 || nc > 4) - return (NUMA *)ERROR_PTR("nc must be 1, 2, 3, or 4", procName, NULL); - - /* Check the peak centroids for consistency with bar widths. - * The third peak can correspond to a width of either 3 or 4. - * Use ratios 3/2 and 4/2 instead of 3/1 and 4/1 because the - * former are more stable and closer to the expected ratio. */ - if (nc > 1) { - warray = numaGetFArray(nacent, L_NOCOPY); - if (warray[0] == 0) - return (NUMA *)ERROR_PTR("first peak has width 0.0", - procName, NULL); - rat21 = warray[1] / warray[0]; - if (rat21 < 1.5 || rat21 > 2.6) - L_WARNING("width ratio 2/1 = %f\n", procName, rat21); - if (nc > 2) { - rat32 = warray[2] / warray[1]; - if (rat32 < 1.3 || rat32 > 2.25) - L_WARNING("width ratio 3/2 = %f\n", procName, rat32); - } - if (nc == 4) { - rat42 = warray[3] / warray[1]; - if (rat42 < 1.7 || rat42 > 2.3) - L_WARNING("width ratio 4/2 = %f\n", procName, rat42); - } - } - - /* Set width assignments. - * The only possible ambiguity is with nc = 3 */ - for (i = 0; i < 4; i++) - assign[i] = i + 1; - if (nc == 3) { - if (rat32 > 1.75) - assign[2] = 4; - } - - /* Put widths into the LUT */ - numaGetMax(narange, &max, NULL); - imax = (l_int32)max; - nalut = numaCreate(imax + 1); - numaSetCount(nalut, imax + 1); /* fill the array with zeroes */ - for (i = 0; i < nc; i++) { - numaGetIValue(narange, 2 * i, &low); - if (i == 0) low--; /* catch smallest width */ - numaGetIValue(narange, 2 * i + 1, &high); - for (j = low; j <= high; j++) - numaSetValue(nalut, j, assign[i]); - } - - return nalut; -} - - -/*! - * \brief numaQuantizeCrossingsByWindow() - * - * \param[in] nas numa of crossing locations - * \param[in] ratio of max window size over min window size in search; - * typ. 2.0 - * \param[out] pwidth [optional] best window width - * \param[out] pfirstloc [optional] center of window for first xing - * \param[out] pnac [optional] array of window crossings (0, 1, 2) - * \param[in] debugflag 1 to generate various plots of intermediate results - * \return nad sequence of widths, in unit sizes, or NULL on error - * - *
- * Notes:
- *      (1) The minimum size of the window is set by the minimum
- *          distance between zero crossings.
- *      (2) The optional return signal %nac is a sequence of 0s, 1s,
- *          and perhaps a few 2s, giving the number of crossings in each window.
- *          On the occasion where there is a '2', it is interpreted as
- *          ending two runs: the previous one and another one that has length 1.
- * 
- */ -NUMA * -numaQuantizeCrossingsByWindow(NUMA *nas, - l_float32 ratio, - l_float32 *pwidth, - l_float32 *pfirstloc, - NUMA **pnac, - l_int32 debugflag) -{ -l_int32 i, nw, started, count, trans; -l_float32 minsize, minwidth, minshift, xfirst; -NUMA *nac, *nad; - - PROCNAME("numaQuantizeCrossingsByWindow"); - - if (!nas) - return (NUMA *)ERROR_PTR("nas not defined", procName, NULL); - if (numaGetCount(nas) < 2) - return (NUMA *)ERROR_PTR("nas size < 2", procName, NULL); - - /* Get the minsize, which is needed for the search for - * the window width (ultimately found as 'minwidth') */ - numaGetCrossingDistances(nas, NULL, NULL, &minsize, NULL); - - /* Compute the width and shift increments; start at minsize - * and go up to ratio * minsize */ - numaEvalBestWidthAndShift(nas, 100, 10, minsize, ratio * minsize, - &minwidth, &minshift, NULL); - - /* Refine width and shift calculation */ - numaEvalBestWidthAndShift(nas, 100, 10, 0.98 * minwidth, 1.02 * minwidth, - &minwidth, &minshift, NULL); - - L_INFO("best width = %7.3f, best shift = %7.3f\n", - procName, minwidth, minshift); - - /* Get the crossing array (0,1,2) for the best window width and shift */ - numaEvalSyncError(nas, 0, 0, minwidth, minshift, NULL, &nac); - if (pwidth) *pwidth = minwidth; - if (pfirstloc) { - numaGetFValue(nas, 0, &xfirst); - *pfirstloc = xfirst + minshift; - } - - /* Get the array of bar widths, starting with a black bar */ - nad = numaCreate(0); - nw = numaGetCount(nac); /* number of window measurements */ - started = FALSE; - count = 0; /* unnecessary init */ - for (i = 0; i < nw; i++) { - numaGetIValue(nac, i, &trans); - if (trans > 2) - L_WARNING("trans = %d > 2 !!!\n", procName, trans); - if (started) { - if (trans > 1) { /* i.e., when trans == 2 */ - numaAddNumber(nad, count); - trans--; - count = 1; - } - if (trans == 1) { - numaAddNumber(nad, count); - count = 1; - } else { - count++; - } - } - if (!started && trans) { - started = TRUE; - if (trans == 2) /* a whole bar in this window */ - numaAddNumber(nad, 1); - count = 1; - } - } - - if (pnac) - *pnac = nac; - else - numaDestroy(&nac); - return nad; -} - - -/*! - * \brief numaEvalBestWidthAndShift() - * - * \param[in] nas numa of crossing locations - * \param[in] nwidth number of widths to consider - * \param[in] nshift number of shifts to consider for each width - * \param[in] minwidth smallest width to consider - * \param[in] maxwidth largest width to consider - * \param[out] pbestwidth best size of window - * \param[out] pbestshift best shift for the window - * \param[out] pbestscore [optional] average squared error of dist - * of crossing signal from the center of the window - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This does a linear sweep of widths, evaluating at %nshift
- *          shifts for each width, finding the (width, shift) pair that
- *          gives the minimum score.
- * 
- */ -static l_int32 -numaEvalBestWidthAndShift(NUMA *nas, - l_int32 nwidth, - l_int32 nshift, - l_float32 minwidth, - l_float32 maxwidth, - l_float32 *pbestwidth, - l_float32 *pbestshift, - l_float32 *pbestscore) -{ -l_int32 i, j; -l_float32 delwidth, delshift, width, shift, score; -l_float32 bestwidth, bestshift, bestscore; - - PROCNAME("numaEvalBestWidthAndShift"); - - if (!nas) - return ERROR_INT("nas not defined", procName, 1); - if (!pbestwidth || !pbestshift) - return ERROR_INT("&bestwidth and &bestshift not defined", procName, 1); - - bestwidth = 0.0f; - bestshift = 0.0f; - bestscore = 1.0; - delwidth = (maxwidth - minwidth) / (nwidth - 1.0); - for (i = 0; i < nwidth; i++) { - width = minwidth + delwidth * i; - delshift = width / (l_float32)(nshift); - for (j = 0; j < nshift; j++) { - shift = -0.5 * (width - delshift) + j * delshift; - numaEvalSyncError(nas, 0, 0, width, shift, &score, NULL); - if (score < bestscore) { - bestscore = score; - bestwidth = width; - bestshift = shift; -#if DEBUG_FREQUENCY - lept_stderr("width = %7.3f, shift = %7.3f, score = %7.3f\n", - width, shift, score); -#endif /* DEBUG_FREQUENCY */ - } - } - } - - *pbestwidth = bestwidth; - *pbestshift = bestshift; - if (pbestscore) - *pbestscore = bestscore; - return 0; -} - - -/*! - * \brief numaEvalSyncError() - * - * \param[in] nas numa of crossing locations - * \param[in] ifirst first crossing to use - * \param[in] ilast last crossing to use; use 0 for all crossings - * \param[in] width size of window - * \param[in] shift of center of window w/rt first crossing - * \param[out] pscore [optional] average squared error of dist - * of crossing signal from the center of the window - * \param[out] pnad [optional] numa of 1s and 0s for crossings - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The score is computed only on the part of the signal from the
- *          %ifirst to %ilast crossings.  Use 0 for both of these to
- *          use all the crossings.  The score is normalized for
- *          the number of crossings and with half-width of the window.
- *      (2) The optional return %nad is a sequence of 0s and 1s, where a '1'
- *          indicates a crossing in the window.
- * 
- */ -static l_int32 -numaEvalSyncError(NUMA *nas, - l_int32 ifirst, - l_int32 ilast, - l_float32 width, - l_float32 shift, - l_float32 *pscore, - NUMA **pnad) -{ -l_int32 i, n, nc, nw, ival; -l_int32 iw; /* cell in which transition occurs */ -l_float32 score, xfirst, xlast, xleft, xc, xwc; -NUMA *nad; - - PROCNAME("numaEvalSyncError"); - - if (!nas) - return ERROR_INT("nas not defined", procName, 1); - if ((n = numaGetCount(nas)) < 2) - return ERROR_INT("nas size < 2", procName, 1); - if (ifirst < 0) ifirst = 0; - if (ilast <= 0) ilast = n - 1; - if (ifirst >= ilast) - return ERROR_INT("ifirst not < ilast", procName, 1); - nc = ilast - ifirst + 1; - - /* Set up an array corresponding to the (shifted) windows, - * and fill in the crossings. */ - score = 0.0; - numaGetFValue(nas, ifirst, &xfirst); - numaGetFValue(nas, ilast, &xlast); - nw = (l_int32) ((xlast - xfirst + 2.0 * width) / width); - nad = numaCreate(nw); - numaSetCount(nad, nw); /* init to all 0.0 */ - xleft = xfirst - width / 2.0 + shift; /* left edge of first window */ - for (i = ifirst; i <= ilast; i++) { - numaGetFValue(nas, i, &xc); - iw = (l_int32)((xc - xleft) / width); - xwc = xleft + (iw + 0.5) * width; /* center of cell iw */ - score += (xwc - xc) * (xwc - xc); - numaGetIValue(nad, iw, &ival); - numaSetValue(nad, iw, ival + 1); - } - - if (pscore) - *pscore = 4.0 * score / (width * width * (l_float32)nc); - if (pnad) - *pnad = nad; - else - numaDestroy(&nad); - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/readbarcode.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/readbarcode.h deleted file mode 100644 index 358ff4e5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/readbarcode.h +++ /dev/null @@ -1,239 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_READBARCODE_H -#define LEPTONICA_READBARCODE_H - - /* ----------------------------------------------------------------- * - * Flags for method of extracting barcode widths * - * ----------------------------------------------------------------- */ - -/*! Barcode Method */ -enum { - L_USE_WIDTHS = 1, /*!< use histogram of barcode widths */ - L_USE_WINDOWS = 2 /*!< find best window for decoding transitions */ -}; - - /* ----------------------------------------------------------------- * - * Flags for barcode formats * - * These are used both to identify a barcode format and to identify * - * the decoding method to use on a barcode. * - * ----------------------------------------------------------------- */ - -/*! Barcode Format */ -enum { - L_BF_UNKNOWN = 0, /*!< unknown format */ - L_BF_ANY = 1, /*!< try decoding with all known formats */ - L_BF_CODE128 = 2, /*!< decode with Code128 format */ - L_BF_EAN8 = 3, /*!< decode with EAN8 format */ - L_BF_EAN13 = 4, /*!< decode with EAN13 format */ - L_BF_CODE2OF5 = 5, /*!< decode with Code 2 of 5 format */ - L_BF_CODEI2OF5 = 6, /*!< decode with Interleaved 2 of 5 format */ - L_BF_CODE39 = 7, /*!< decode with Code39 format */ - L_BF_CODE93 = 8, /*!< decode with Code93 format */ - L_BF_CODABAR = 9, /*!< decode with Code93 format */ - L_BF_UPCA = 10 /*!< decode with UPC A format */ -}; - - /* ----------------------------------------------------------------- * - * Currently supported formats * - * Update these arrays as new formats are added. * - * ----------------------------------------------------------------- */ - -/*! Currently supported formats */ -static const l_int32 SupportedBarcodeFormat[] = { - L_BF_CODE2OF5, - L_BF_CODEI2OF5, - L_BF_CODE93, - L_BF_CODE39, - L_BF_CODABAR, - L_BF_UPCA, - L_BF_EAN13 -}; - -/*! Currently supported format names */ -static const char *SupportedBarcodeFormatName[] = { - "Code2of5", - "CodeI2of5", - "Code93", - "Code39", - "Codabar", - "Upca", - "Ean13" -}; -static const l_int32 NumSupportedBarcodeFormats = 7; /*!< Number of formats */ - - - /* ----------------------------------------------------------------- * - * Code 2 of 5 symbology * - * ----------------------------------------------------------------- */ -static const char *Code2of5[] = { - "111121211", "211111112", "112111112", "212111111", /* 0 - 3 */ - "111121112", "211121111", "112121111", "111111212", /* 4 - 7 */ - "211111211", "112111211", /* 8 - 9 */ - "21211", "21112" /* Start, Stop */ -}; - -static const l_int32 C25_START = 10; -static const l_int32 C25_STOP = 11; - - - /* ----------------------------------------------------------------- * - * Code Interleaved 2 of 5 symbology * - * ----------------------------------------------------------------- */ -static const char *CodeI2of5[] = { - "11221", "21112", "12112", "22111", "11212", /* 0 - 4 */ - "21211", "12211", "11122", "21121", "12121", /* 5 - 9 */ - "1111", "211" /* start, stop */ -}; - -static const l_int32 CI25_START = 10; -static const l_int32 CI25_STOP = 11; - - - /* ----------------------------------------------------------------- * - * Code 93 symbology * - * ----------------------------------------------------------------- */ -static const char *Code93[] = { - "131112", "111213", "111312", "111411", "121113", /* 0: 0 - 4 */ - "121212", "121311", "111114", "131211", "141111", /* 5: 5 - 9 */ - "211113", "211212", "211311", "221112", "221211", /* 10: A - E */ - "231111", "112113", "112212", "112311", "122112", /* 15: F - J */ - "132111", "111123", "111222", "111321", "121122", /* 20: K - O */ - "131121", "212112", "212211", "211122", "211221", /* 25: P - T */ - "221121", "222111", "112122", "112221", "122121", /* 30: U - Y */ - "123111", "121131", "311112", "311211", "321111", /* 35: Z,-,.,SP,$ */ - "112131", "113121", "211131", "131221", "312111", /* 40: /,+,%,($),(%) */ - "311121", "122211", "111141" /* 45: (/),(+), Start */ -}; - - /* Use "[]{}#" to represent special codes 43-47 */ -static const char Code93Val[] = - "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ-. $/+%[]{}#"; - -static const l_int32 C93_START = 47; -static const l_int32 C93_STOP = 47; - - - /* ----------------------------------------------------------------- * - * Code 39 symbology * - * ----------------------------------------------------------------- */ -static const char *Code39[] = { - "111221211", "211211112", "112211112", "212211111", /* 0: 0 - 3 */ - "111221112", "211221111", "112221111", "111211212", /* 4: 4 - 7 */ - "211211211", "112211211", "211112112", "112112112", /* 8: 8 - B */ - "212112111", "111122112", "211122111", "112122111", /* 12: C - F */ - "111112212", "211112211", "112112211", "111122211", /* 16: G - J */ - "211111122", "112111122", "212111121", "111121122", /* 20: K - N */ - "211121121", "112121121", "111111222", "211111221", /* 24: O - R */ - "112111221", "111121221", "221111112", "122111112", /* 28: S - V */ - "222111111", "121121112", "221121111", "122121111", /* 32: W - Z */ - "121111212", "221111211", "122111211", "121212111", /* 36: -,.,SP,$ */ - "121211121", "121112121", "111212121", "121121211" /* 40: /,+,%,* */ -}; - - /* Use "*" to represent the Start and Stop codes (43) */ -static const char Code39Val[] = - "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ-. $/+%*"; - -static const l_int32 C39_START = 43; -static const l_int32 C39_STOP = 43; - - - /* ----------------------------------------------------------------- * - * Codabar symbology * - * ----------------------------------------------------------------- */ -static const char *Codabar[] = { - "1111122", "1111221", "1112112", "2211111", "1121121", /* 0: 0 - 4 */ - "2111121", "1211112", "1211211", "1221111", "2112111", /* 5: 5 - 9 */ - "1112211", "1122111", "2111212", "2121112", "2121211", /* 10: -,$,:,/,. */ - "1121212", "1122121", "1212112", "1112122", "1112221" /* 15: +,A,B,C,D */ -}; - - /* Ascii representations for codes 16-19: (A or T), (B or N), (C or *), - * (D or E). These are used in pairs for the Start and Stop codes. */ -static const char CodabarVal[] = "0123456789-$:/.+ABCD"; - - - /* ----------------------------------------------------------------- * - * UPC-A symbology * - * ----------------------------------------------------------------- */ -static const char *Upca[] = { - "3211", "2221", "2122", "1411", "1132", /* 0: 0 - 4 */ - "1231", "1114", "1312", "1213", "3112", /* 5: 5 - 9 */ - "111", "111", "11111" /* 10: Start, Stop, Mid */ -}; - -static const l_int32 UPCA_START = 10; -static const l_int32 UPCA_STOP = 11; -static const l_int32 UPCA_MID = 12; - - - /* ----------------------------------------------------------------- * - * Code128 symbology * - * ----------------------------------------------------------------- */ -static const char *Code128[] = { - "212222", "222122", "222221", "121223", "121322", /* 0 - 4 */ - "131222", "122213", "122312", "132212", "221213", /* 5 - 9 */ - "221312", "231212", "112232", "122132", "122231", /* 10 - 14 */ - "113222", "123122", "123221", "223211", "221132", /* 15 - 19 */ - "221231", "213212", "223112", "312131", "311222", /* 20 - 24 */ - "321122", "321221", "312212", "322112", "322211", /* 25 - 29 */ - "212123", "212321", "232121", "111323", "131123", /* 30 - 34 */ - "131321", "112313", "132113", "132311", "211313", /* 35 - 39 */ - "231113", "231311", "112133", "112331", "132131", /* 40 - 44 */ - "113123", "113321", "133121", "313121", "211331", /* 45 - 49 */ - "231131", "213113", "213311", "213131", "311123", /* 50 - 54 */ - "311321", "331121", "312113", "312311", "332111", /* 55 - 59 */ - "314111", "221411", "431111", "111224", "111422", /* 60 - 64 */ - "121124", "121421", "141122", "141221", "112214", /* 65 - 69 */ - "112412", "122114", "122411", "142112", "142211", /* 70 - 74 */ - "241211", "221114", "413111", "241112", "134111", /* 75 - 79 */ - "111242", "121142", "121241", "114212", "124112", /* 80 - 84 */ - "124211", "411212", "421112", "421211", "212141", /* 85 - 89 */ - "214121", "412121", "111143", "111341", "131141", /* 90 - 94 */ - "114113", "114311", "411113", "411311", "113141", /* 95 - 99 */ - "114131", "311141", "411131", "211412", "211214", /* 100 - 104 */ - "211232", "2331112" /* 105 - 106 */ -}; - -static const l_int32 C128_FUN_3 = 96; /* in A or B only; in C it is 96 */ -static const l_int32 C128_FUNC_2 = 97; /* in A or B only; in C it is 97 */ -static const l_int32 C128_SHIFT = 98; /* in A or B only; in C it is 98 */ -static const l_int32 C128_GOTO_C = 99; /* in A or B only; in C it is 99 */ -static const l_int32 C128_GOTO_B = 100; -static const l_int32 C128_GOTO_A = 101; -static const l_int32 C128_FUNC_1 = 102; -static const l_int32 C128_START_A = 103; -static const l_int32 C128_START_B = 104; -static const l_int32 C128_START_C = 105; -static const l_int32 C128_STOP = 106; - /* code 128 symbols are 11 units */ -static const l_int32 C128_SYMBOL_WIDTH = 11; - - - -#endif /* LEPTONICA_READBARCODE_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/readfile.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/readfile.c deleted file mode 100644 index 07ba5cde..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/readfile.c +++ /dev/null @@ -1,1627 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file readfile.c: reads image on file into memory - *
- *
- *      Top-level functions for reading images from file
- *           PIXA      *pixaReadFiles()
- *           PIXA      *pixaReadFilesSA()
- *           PIX       *pixRead()
- *           PIX       *pixReadWithHint()
- *           PIX       *pixReadIndexed()
- *           PIX       *pixReadStream()
- *
- *      Read header information from file
- *           l_int32    pixReadHeader()
- *
- *      Format finders
- *           l_int32    findFileFormat()
- *           l_int32    findFileFormatStream()
- *           l_int32    findFileFormatBuffer()
- *           l_int32    fileFormatIsTiff()
- *
- *      Read from memory
- *           PIX       *pixReadMem()
- *           l_int32    pixReadHeaderMem()
- *
- *      Output image file information
- *           void       writeImageFileInfo()
- *
- *      Test function for I/O with different formats
- *           l_int32    ioFormatTest()
- *
- *  Supported file formats:
- *  (1) Reading is supported without any external libraries:
- *          bmp
- *          pnm   (including pbm, pgm, etc)
- *          spix  (raw serialized)
- *  (2) Reading is supported with installation of external libraries:
- *          png
- *          jpg   (standard jfif version)
- *          tiff  (including most varieties of compression)
- *          gif
- *          webp
- *          jp2 (jpeg 2000)
- *  (3) Other file types will get an "unknown format" error.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Output files for ioFormatTest(). */ -static const char *FILE_BMP = "/tmp/lept/format/file.bmp"; -static const char *FILE_PNG = "/tmp/lept/format/file.png"; -static const char *FILE_PNM = "/tmp/lept/format/file.pnm"; -static const char *FILE_G3 = "/tmp/lept/format/file_g3.tif"; -static const char *FILE_G4 = "/tmp/lept/format/file_g4.tif"; -static const char *FILE_RLE = "/tmp/lept/format/file_rle.tif"; -static const char *FILE_PB = "/tmp/lept/format/file_packbits.tif"; -static const char *FILE_LZW = "/tmp/lept/format/file_lzw.tif"; -static const char *FILE_ZIP = "/tmp/lept/format/file_zip.tif"; -static const char *FILE_TIFF_JPEG = "/tmp/lept/format/file_jpeg.tif"; -static const char *FILE_TIFF = "/tmp/lept/format/file.tif"; -static const char *FILE_JPG = "/tmp/lept/format/file.jpg"; -static const char *FILE_GIF = "/tmp/lept/format/file.gif"; -static const char *FILE_WEBP = "/tmp/lept/format/file.webp"; -static const char *FILE_JP2K = "/tmp/lept/format/file.jp2"; - -static const unsigned char JP2K_CODESTREAM[4] = { 0xff, 0x4f, 0xff, 0x51 }; -static const unsigned char JP2K_IMAGE_DATA[12] = { 0x00, 0x00, 0x00, 0x0C, - 0x6A, 0x50, 0x20, 0x20, - 0x0D, 0x0A, 0x87, 0x0A }; - - -/*---------------------------------------------------------------------* - * Top-level functions for reading images from file * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaReadFiles() - * - * \param[in] dirname - * \param[in] substr [optional] substring filter on filenames; can be null - * \return pixa, or NULL on error - * - *
- * Notes:
- *      (1) %dirname is the full path for the directory.
- *      (2) %substr is the part of the file name (excluding
- *          the directory) that is to be matched.  All matching
- *          filenames are read into the Pixa.  If substr is NULL,
- *          all filenames are read into the Pixa.
- * 
- */ -PIXA * -pixaReadFiles(const char *dirname, - const char *substr) -{ -PIXA *pixa; -SARRAY *sa; - - PROCNAME("pixaReadFiles"); - - if (!dirname) - return (PIXA *)ERROR_PTR("dirname not defined", procName, NULL); - - if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) - return (PIXA *)ERROR_PTR("sa not made", procName, NULL); - - pixa = pixaReadFilesSA(sa); - sarrayDestroy(&sa); - return pixa; -} - - -/*! - * \brief pixaReadFilesSA() - * - * \param[in] sa full pathnames for all files - * \return pixa, or NULL on error - */ -PIXA * -pixaReadFilesSA(SARRAY *sa) -{ -char *str; -l_int32 i, n; -PIX *pix; -PIXA *pixa; - - PROCNAME("pixaReadFilesSA"); - - if (!sa) - return (PIXA *)ERROR_PTR("sa not defined", procName, NULL); - - n = sarrayGetCount(sa); - pixa = pixaCreate(n); - for (i = 0; i < n; i++) { - str = sarrayGetString(sa, i, L_NOCOPY); - if ((pix = pixRead(str)) == NULL) { - L_WARNING("pix not read from file %s\n", procName, str); - continue; - } - pixaAddPix(pixa, pix, L_INSERT); - } - - return pixa; -} - - -/*! - * \brief pixRead() - * - * \param[in] filename with full pathname or in local directory - * \return pix if OK; NULL on error - * - *
- * Notes:
- *      (1) See at top of file for supported formats.
- * 
- */ -PIX * -pixRead(const char *filename) -{ -FILE *fp; -PIX *pix; - - PROCNAME("pixRead"); - - if (!filename) - return (PIX *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) { - L_ERROR("image file not found: %s\n", procName, filename); - return NULL; - } - pix = pixReadStream(fp, 0); - fclose(fp); - if (!pix) - return (PIX *)ERROR_PTR("pix not read", procName, NULL); - return pix; -} - - -/*! - * \brief pixReadWithHint() - * - * \param[in] filename with full pathname or in local directory - * \param[in] hint bitwise OR of L_HINT_* values for jpeg; - * use 0 for no hint - * \return pix if OK; NULL on error - * - *
- * Notes:
- *      (1) The hint is not binding, but may be used to optimize jpeg decoding.
- *          Use 0 for no hinting.
- * 
- */ -PIX * -pixReadWithHint(const char *filename, - l_int32 hint) -{ -FILE *fp; -PIX *pix; - - PROCNAME("pixReadWithHint"); - - if (!filename) - return (PIX *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (PIX *)ERROR_PTR("image file not found", procName, NULL); - pix = pixReadStream(fp, hint); - fclose(fp); - - if (!pix) - return (PIX *)ERROR_PTR("image not returned", procName, NULL); - return pix; -} - - -/*! - * \brief pixReadIndexed() - * - * \param[in] sa string array of full pathnames - * \param[in] index into pathname array - * \return pix if OK; null if not found - * - *
- * Notes:
- *      (1) This function is useful for selecting image files from a
- *          directory, where the integer %index is embedded into
- *          the file name.
- *      (2) This is typically done by generating the sarray using
- *          getNumberedPathnamesInDirectory(), so that the %index
- *          pathname would have the number %index in it.  The size
- *          of the sarray should be the largest number (plus 1) appearing
- *          in the file names, respecting the constraints in the
- *          call to getNumberedPathnamesInDirectory().
- *      (3) Consequently, for some indices into the sarray, there may
- *          be no pathnames in the directory containing that number.
- *          By convention, we place empty C strings ("") in those
- *          locations in the sarray, and it is not an error if such
- *          a string is encountered and no pix is returned.
- *          Therefore, the caller must verify that a pix is returned.
- *      (4) See convertSegmentedPagesToPS() in src/psio1.c for an
- *          example of usage.
- * 
- */ -PIX * -pixReadIndexed(SARRAY *sa, - l_int32 index) -{ -char *fname; -l_int32 n; -PIX *pix; - - PROCNAME("pixReadIndexed"); - - if (!sa) - return (PIX *)ERROR_PTR("sa not defined", procName, NULL); - n = sarrayGetCount(sa); - if (index < 0 || index >= n) - return (PIX *)ERROR_PTR("index out of bounds", procName, NULL); - - fname = sarrayGetString(sa, index, L_NOCOPY); - if (fname[0] == '\0') - return NULL; - - if ((pix = pixRead(fname)) == NULL) { - L_ERROR("pix not read from file %s\n", procName, fname); - return NULL; - } - - return pix; -} - - -/*! - * \brief pixReadStream() - * - * \param[in] fp file stream - * \param[in] hint bitwise OR of L_HINT_* values for jpeg; 0 for no hint - * \return pix if OK; NULL on error - * - *
- * Notes:
- *      (1) The hint only applies to jpeg.
- * 
- */ -PIX * -pixReadStream(FILE *fp, - l_int32 hint) -{ -l_int32 format, ret, valid; -l_uint8 *comment; -PIX *pix; -PIXCMAP *cmap; - - PROCNAME("pixReadStream"); - - if (!fp) - return (PIX *)ERROR_PTR("stream not defined", procName, NULL); - pix = NULL; - - findFileFormatStream(fp, &format); - switch (format) - { - case IFF_BMP: - if ((pix = pixReadStreamBmp(fp)) == NULL ) - return (PIX *)ERROR_PTR( "bmp: no pix returned", procName, NULL); - break; - - case IFF_JFIF_JPEG: - if ((pix = pixReadStreamJpeg(fp, 0, 1, NULL, hint)) == NULL) - return (PIX *)ERROR_PTR( "jpeg: no pix returned", procName, NULL); - ret = fgetJpegComment(fp, &comment); - if (!ret && comment) - pixSetText(pix, (char *)comment); - LEPT_FREE(comment); - break; - - case IFF_PNG: - if ((pix = pixReadStreamPng(fp)) == NULL) - return (PIX *)ERROR_PTR("png: no pix returned", procName, NULL); - break; - - case IFF_TIFF: - case IFF_TIFF_PACKBITS: - case IFF_TIFF_RLE: - case IFF_TIFF_G3: - case IFF_TIFF_G4: - case IFF_TIFF_LZW: - case IFF_TIFF_ZIP: - case IFF_TIFF_JPEG: - if ((pix = pixReadStreamTiff(fp, 0)) == NULL) /* page 0 by default */ - return (PIX *)ERROR_PTR("tiff: no pix returned", procName, NULL); - break; - - case IFF_PNM: - if ((pix = pixReadStreamPnm(fp)) == NULL) - return (PIX *)ERROR_PTR("pnm: no pix returned", procName, NULL); - break; - - case IFF_GIF: - if ((pix = pixReadStreamGif(fp)) == NULL) - return (PIX *)ERROR_PTR("gif: no pix returned", procName, NULL); - break; - - case IFF_JP2: - if ((pix = pixReadStreamJp2k(fp, 1, NULL, 0, 0)) == NULL) - return (PIX *)ERROR_PTR("jp2: no pix returned", procName, NULL); - break; - - case IFF_WEBP: - if ((pix = pixReadStreamWebP(fp)) == NULL) - return (PIX *)ERROR_PTR("webp: no pix returned", procName, NULL); - break; - - case IFF_PS: - L_ERROR("PostScript reading is not supported\n", procName); - return NULL; - - case IFF_LPDF: - L_ERROR("Pdf reading is not supported\n", procName); - return NULL; - - case IFF_SPIX: - if ((pix = pixReadStreamSpix(fp)) == NULL) - return (PIX *)ERROR_PTR("spix: no pix returned", procName, NULL); - break; - - case IFF_UNKNOWN: - return (PIX *)ERROR_PTR( "Unknown format: no pix returned", - procName, NULL); - break; - } - - if (pix) { - pixSetInputFormat(pix, format); - if ((cmap = pixGetColormap(pix))) { - pixcmapIsValid(cmap, &valid); - if (!valid) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("invalid colormap", procName, NULL); - } - } - } - return pix; -} - - - -/*---------------------------------------------------------------------* - * Read header information from file * - *---------------------------------------------------------------------*/ -/*! - * \brief pixReadHeader() - * - * \param[in] filename with full pathname or in local directory - * \param[out] pformat [optional] file format - * \param[out] pw, ph [optional] width and height - * \param[out] pbps [optional] bits/sample - * \param[out] pspp [optional] samples/pixel 1, 3 or 4 - * \param[out] piscmap [optional] 1 if cmap exists; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This reads the actual headers for jpeg, png, tiff and pnm.
- *          For bmp and gif, we cheat and read the entire file into a pix,
- *          from which we extract the "header" information.
- * 
- */ -l_ok -pixReadHeader(const char *filename, - l_int32 *pformat, - l_int32 *pw, - l_int32 *ph, - l_int32 *pbps, - l_int32 *pspp, - l_int32 *piscmap) -{ -l_int32 format, ret, w, h, d, bps, spp, iscmap; -l_int32 type; /* ignored */ -FILE *fp; -PIX *pix; - - PROCNAME("pixReadHeader"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pbps) *pbps = 0; - if (pspp) *pspp = 0; - if (piscmap) *piscmap = 0; - if (pformat) *pformat = 0; - iscmap = 0; /* init to false */ - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - - if ((fp = fopenReadStream(filename)) == NULL) - return ERROR_INT("image file not found", procName, 1); - findFileFormatStream(fp, &format); - fclose(fp); - - switch (format) - { - case IFF_BMP: /* cheating: reading the entire file */ - if ((pix = pixRead(filename)) == NULL) - return ERROR_INT( "bmp: pix not read", procName, 1); - pixGetDimensions(pix, &w, &h, &d); - if (pixGetColormap(pix)) - iscmap = 1; - pixDestroy(&pix); - bps = (d == 32) ? 8 : d; - spp = (d == 32) ? 3 : 1; - break; - - case IFF_JFIF_JPEG: - ret = readHeaderJpeg(filename, &w, &h, &spp, NULL, NULL); - bps = 8; - if (ret) - return ERROR_INT( "jpeg: no header info returned", procName, 1); - break; - - case IFF_PNG: - ret = readHeaderPng(filename, &w, &h, &bps, &spp, &iscmap); - if (ret) - return ERROR_INT( "png: no header info returned", procName, 1); - break; - - case IFF_TIFF: - case IFF_TIFF_PACKBITS: - case IFF_TIFF_RLE: - case IFF_TIFF_G3: - case IFF_TIFF_G4: - case IFF_TIFF_LZW: - case IFF_TIFF_ZIP: - case IFF_TIFF_JPEG: - /* Reading page 0 by default; possibly redefine format */ - ret = readHeaderTiff(filename, 0, &w, &h, &bps, &spp, NULL, &iscmap, - &format); - if (ret) - return ERROR_INT( "tiff: no header info returned", procName, 1); - break; - - case IFF_PNM: - ret = readHeaderPnm(filename, &w, &h, &d, &type, &bps, &spp); - if (ret) - return ERROR_INT( "pnm: no header info returned", procName, 1); - break; - - case IFF_GIF: /* cheating: reading the entire file */ - if ((pix = pixRead(filename)) == NULL) - return ERROR_INT( "gif: pix not read", procName, 1); - pixGetDimensions(pix, &w, &h, &d); - pixDestroy(&pix); - iscmap = 1; /* always colormapped; max 256 colors */ - spp = 1; - bps = d; - break; - - case IFF_JP2: - ret = readHeaderJp2k(filename, &w, &h, &bps, &spp); - break; - - case IFF_WEBP: - if (readHeaderWebP(filename, &w, &h, &spp)) - return ERROR_INT( "webp: no header info returned", procName, 1); - bps = 8; - break; - - case IFF_PS: - if (pformat) *pformat = format; - return ERROR_INT("PostScript reading is not supported\n", procName, 1); - - case IFF_LPDF: - if (pformat) *pformat = format; - return ERROR_INT("Pdf reading is not supported\n", procName, 1); - - case IFF_SPIX: - ret = readHeaderSpix(filename, &w, &h, &bps, &spp, &iscmap); - if (ret) - return ERROR_INT( "spix: no header info returned", procName, 1); - break; - - case IFF_UNKNOWN: - L_ERROR("unknown format in file %s\n", procName, filename); - return 1; - break; - } - - if (pw) *pw = w; - if (ph) *ph = h; - if (pbps) *pbps = bps; - if (pspp) *pspp = spp; - if (piscmap) *piscmap = iscmap; - if (pformat) *pformat = format; - return 0; -} - - -/*---------------------------------------------------------------------* - * Format finders * - *---------------------------------------------------------------------*/ -/*! - * \brief findFileFormat() - * - * \param[in] filename - * \param[out] pformat found format - * \return 0 if OK, 1 on error or if format is not recognized - */ -l_ok -findFileFormat(const char *filename, - l_int32 *pformat) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("findFileFormat"); - - if (!pformat) - return ERROR_INT("&format not defined", procName, 1); - *pformat = IFF_UNKNOWN; - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - - if ((fp = fopenReadStream(filename)) == NULL) - return ERROR_INT("image file not found", procName, 1); - ret = findFileFormatStream(fp, pformat); - fclose(fp); - return ret; -} - - -/*! - * \brief findFileFormatStream() - * - * \param[in] fp file stream - * \param[out] pformat found format - * \return 0 if OK, 1 on error or if format is not recognized - * - *
- * Notes:
- *      (1) Important: Side effect -- this resets fp to BOF.
- * 
- */ -l_ok -findFileFormatStream(FILE *fp, - l_int32 *pformat) -{ -l_uint8 firstbytes[12]; -l_int32 format; - - PROCNAME("findFileFormatStream"); - - if (!pformat) - return ERROR_INT("&format not defined", procName, 1); - *pformat = IFF_UNKNOWN; - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - - rewind(fp); - if (fnbytesInFile(fp) < 12) - return ERROR_INT("truncated file", procName, 1); - - if (fread(&firstbytes, 1, 12, fp) != 12) - return ERROR_INT("failed to read first 12 bytes of file", procName, 1); - rewind(fp); - - findFileFormatBuffer(firstbytes, &format); - if (format == IFF_TIFF) { - findTiffCompression(fp, &format); - rewind(fp); - } - *pformat = format; - if (format == IFF_UNKNOWN) - return 1; - else - return 0; -} - - -/*! - * \brief findFileFormatBuffer() - * - * \param[in] buf byte buffer at least 12 bytes in size; we can't check - * \param[out] pformat found format - * \return 0 if OK, 1 on error or if format is not recognized - * - *
- * Notes:
- *      (1) This determines the file format from the first 12 bytes in
- *          the compressed data stream, which are stored in memory.
- *      (2) For tiff files, this returns IFF_TIFF.  The specific tiff
- *          compression is then determined using findTiffCompression().
- * 
- */ -l_ok -findFileFormatBuffer(const l_uint8 *buf, - l_int32 *pformat) -{ -l_uint16 twobytepw; - - PROCNAME("findFileFormatBuffer"); - - if (!pformat) - return ERROR_INT("&format not defined", procName, 1); - *pformat = IFF_UNKNOWN; - if (!buf) - return ERROR_INT("byte buffer not defined", procName, 0); - - /* Check the bmp and tiff 2-byte header ids */ - ((char *)(&twobytepw))[0] = buf[0]; - ((char *)(&twobytepw))[1] = buf[1]; - - if (convertOnBigEnd16(twobytepw) == BMP_ID) { - *pformat = IFF_BMP; - return 0; - } - - if (twobytepw == TIFF_BIGEND_ID || twobytepw == TIFF_LITTLEEND_ID) { - *pformat = IFF_TIFF; - return 0; - } - - /* Check for the p*m 2-byte header ids */ - if ((buf[0] == 'P' && buf[1] == '4') || /* newer packed */ - (buf[0] == 'P' && buf[1] == '1')) { /* old ASCII format */ - *pformat = IFF_PNM; - return 0; - } - - if ((buf[0] == 'P' && buf[1] == '5') || /* newer */ - (buf[0] == 'P' && buf[1] == '2')) { /* old */ - *pformat = IFF_PNM; - return 0; - } - - if ((buf[0] == 'P' && buf[1] == '6') || /* newer */ - (buf[0] == 'P' && buf[1] == '3')) { /* old */ - *pformat = IFF_PNM; - return 0; - } - - if (buf[0] == 'P' && buf[1] == '7') { /* new arbitrary (PAM) */ - *pformat = IFF_PNM; - return 0; - } - - /* Consider the first 11 bytes of the standard JFIF JPEG header: - * - The first two bytes are the most important: 0xffd8. - * - The next two bytes are the jfif marker: 0xffe0. - * Not all jpeg files have this marker. - * - The next two bytes are the header length. - * - The next 5 bytes are a null-terminated string. - * For JFIF, the string is "JFIF", naturally. For others it - * can be "Exif" or just about anything else. - * - Because of all this variability, we only check the first - * two byte marker. All jpeg files are identified as - * IFF_JFIF_JPEG. */ - if (buf[0] == 0xff && buf[1] == 0xd8) { - *pformat = IFF_JFIF_JPEG; - return 0; - } - - /* Check for the 8 byte PNG signature (png_signature in png.c): - * {137, 80, 78, 71, 13, 10, 26, 10} */ - if (buf[0] == 137 && buf[1] == 80 && buf[2] == 78 && buf[3] == 71 && - buf[4] == 13 && buf[5] == 10 && buf[6] == 26 && buf[7] == 10) { - *pformat = IFF_PNG; - return 0; - } - - /* Look for "GIF87a" or "GIF89a" */ - if (buf[0] == 'G' && buf[1] == 'I' && buf[2] == 'F' && buf[3] == '8' && - (buf[4] == '7' || buf[4] == '9') && buf[5] == 'a') { - *pformat = IFF_GIF; - return 0; - } - - /* Check for both types of jp2k file */ - if (memcmp(buf, JP2K_CODESTREAM, 4) == 0 || - memcmp(buf, JP2K_IMAGE_DATA, 12) == 0) { - *pformat = IFF_JP2; - return 0; - } - - /* Check for webp */ - if (buf[0] == 'R' && buf[1] == 'I' && buf[2] == 'F' && buf[3] == 'F' && - buf[8] == 'W' && buf[9] == 'E' && buf[10] == 'B' && buf[11] == 'P') { - *pformat = IFF_WEBP; - return 0; - } - - /* Check for ps */ - if (buf[0] == '%' && buf[1] == '!' && buf[2] == 'P' && buf[3] == 'S' && - buf[4] == '-' && buf[5] == 'A' && buf[6] == 'd' && buf[7] == 'o' && - buf[8] == 'b' && buf[9] == 'e') { - *pformat = IFF_PS; - return 0; - } - - /* Check for pdf */ - if (buf[0] == '%' && buf[1] == 'P' && buf[2] == 'D' && buf[3] == 'F' && - buf[4] == '-' && buf[5] == '1') { - *pformat = IFF_LPDF; - return 0; - } - - /* Check for "spix" serialized pix */ - if (buf[0] == 's' && buf[1] == 'p' && buf[2] == 'i' && buf[3] == 'x') { - *pformat = IFF_SPIX; - return 0; - } - - /* File format identifier not found; unknown */ - return 1; -} - - -/*! - * \brief fileFormatIsTiff() - * - * \param[in] fp file stream - * \return 1 if file is tiff; 0 otherwise or on error - */ -l_int32 -fileFormatIsTiff(FILE *fp) -{ -l_int32 format; - - PROCNAME("fileFormatIsTiff"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 0); - - findFileFormatStream(fp, &format); - if (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || - format == IFF_TIFF_RLE || format == IFF_TIFF_G3 || - format == IFF_TIFF_G4 || format == IFF_TIFF_LZW || - format == IFF_TIFF_ZIP || format == IFF_TIFF_JPEG) - return 1; - else - return 0; -} - - -/*---------------------------------------------------------------------* - * Read from memory * - *---------------------------------------------------------------------*/ -/*! - * \brief pixReadMem() - * - * \param[in] data const; encoded - * \param[in] size size of data - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) This is a variation of pixReadStream(), where the data is read
- *          from a memory buffer rather than a file.
- *      (2) On windows, this only reads tiff formatted files directly from
- *          memory.  For other formats, it writes to a temp file and
- *          decompresses from file.
- *      (3) findFileFormatBuffer() requires up to 12 bytes to decide on
- *          the format.  That determines the constraint here.  But in
- *          fact the data must contain the entire compressed string for
- *          the image.
- * 
- */ -PIX * -pixReadMem(const l_uint8 *data, - size_t size) -{ -l_int32 format, valid; -PIX *pix; -PIXCMAP *cmap; - - PROCNAME("pixReadMem"); - - if (!data) - return (PIX *)ERROR_PTR("data not defined", procName, NULL); - if (size < 12) - return (PIX *)ERROR_PTR("size < 12", procName, NULL); - pix = NULL; - - findFileFormatBuffer(data, &format); - switch (format) - { - case IFF_BMP: - if ((pix = pixReadMemBmp(data, size)) == NULL ) - return (PIX *)ERROR_PTR( "bmp: no pix returned", procName, NULL); - break; - - case IFF_JFIF_JPEG: - if ((pix = pixReadMemJpeg(data, size, 0, 1, NULL, 0)) == NULL) - return (PIX *)ERROR_PTR( "jpeg: no pix returned", procName, NULL); - break; - - case IFF_PNG: - if ((pix = pixReadMemPng(data, size)) == NULL) - return (PIX *)ERROR_PTR("png: no pix returned", procName, NULL); - break; - - case IFF_TIFF: - case IFF_TIFF_PACKBITS: - case IFF_TIFF_RLE: - case IFF_TIFF_G3: - case IFF_TIFF_G4: - case IFF_TIFF_LZW: - case IFF_TIFF_ZIP: - /* Reading page 0 by default */ - if ((pix = pixReadMemTiff(data, size, 0)) == NULL) - return (PIX *)ERROR_PTR("tiff: no pix returned", procName, NULL); - break; - - case IFF_PNM: - if ((pix = pixReadMemPnm(data, size)) == NULL) - return (PIX *)ERROR_PTR("pnm: no pix returned", procName, NULL); - break; - - case IFF_GIF: - if ((pix = pixReadMemGif(data, size)) == NULL) - return (PIX *)ERROR_PTR("gif: no pix returned", procName, NULL); - break; - - case IFF_JP2: - if ((pix = pixReadMemJp2k(data, size, 1, NULL, 0, 0)) == NULL) - return (PIX *)ERROR_PTR("jp2k: no pix returned", procName, NULL); - break; - - case IFF_WEBP: - if ((pix = pixReadMemWebP(data, size)) == NULL) - return (PIX *)ERROR_PTR("webp: no pix returned", procName, NULL); - break; - - case IFF_PS: - L_ERROR("PostScript reading is not supported\n", procName); - return NULL; - - case IFF_LPDF: - L_ERROR("Pdf reading is not supported\n", procName); - return NULL; - - case IFF_SPIX: - if ((pix = pixReadMemSpix(data, size)) == NULL) - return (PIX *)ERROR_PTR("spix: no pix returned", procName, NULL); - break; - - case IFF_UNKNOWN: - return (PIX *)ERROR_PTR("Unknown format: no pix returned", - procName, NULL); - break; - } - - /* Set the input format. For tiff reading from memory we lose - * the actual input format; for 1 bpp, default to G4. Also - * verify that the colormap is valid. */ - if (pix) { - if (format == IFF_TIFF && pixGetDepth(pix) == 1) - format = IFF_TIFF_G4; - pixSetInputFormat(pix, format); - if ((cmap = pixGetColormap(pix))) { - pixcmapIsValid(cmap, &valid); - if (!valid) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("invalid colormap", procName, NULL); - } - } - pixSetPadBits(pix, 0); - } - return pix; -} - - -/*! - * \brief pixReadHeaderMem() - * - * \param[in] data const; encoded - * \param[in] size size of data - * \param[out] pformat [optional] image format - * \param[out] pw, ph [optional] width and height - * \param[out] pbps [optional] bits/sample - * \param[out] pspp [optional] samples/pixel 1, 3 or 4 - * \param[out] piscmap [optional] 1 if cmap exists; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This reads the actual headers for jpeg, png, tiff, jp2k and pnm.
- *          For bmp and gif, we cheat and read all the data into a pix,
- *          from which we extract the "header" information.
- *      (2) The amount of data required depends on the format.  For
- *          png, it requires less than 30 bytes, but for jpeg it can
- *          require most of the compressed file.  In practice, the data
- *          is typically the entire compressed file in memory.
- *      (3) findFileFormatBuffer() requires up to 12 bytes to decide on
- *          the format, which we require.
- * 
- */ -l_ok -pixReadHeaderMem(const l_uint8 *data, - size_t size, - l_int32 *pformat, - l_int32 *pw, - l_int32 *ph, - l_int32 *pbps, - l_int32 *pspp, - l_int32 *piscmap) -{ -l_int32 format, ret, w, h, d, bps, spp, iscmap; -l_int32 type; /* not used */ -PIX *pix; - - PROCNAME("pixReadHeaderMem"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pbps) *pbps = 0; - if (pspp) *pspp = 0; - if (piscmap) *piscmap = 0; - if (pformat) *pformat = 0; - iscmap = 0; /* init to false */ - if (!data) - return ERROR_INT("data not defined", procName, 1); - if (size < 12) - return ERROR_INT("size < 12", procName, 1); - - findFileFormatBuffer(data, &format); - - switch (format) - { - case IFF_BMP: /* cheating: read the pix */ - if ((pix = pixReadMemBmp(data, size)) == NULL) - return ERROR_INT( "bmp: pix not read", procName, 1); - pixGetDimensions(pix, &w, &h, &d); - pixDestroy(&pix); - bps = (d == 32) ? 8 : d; - spp = (d == 32) ? 3 : 1; - break; - - case IFF_JFIF_JPEG: - ret = readHeaderMemJpeg(data, size, &w, &h, &spp, NULL, NULL); - bps = 8; - if (ret) - return ERROR_INT( "jpeg: no header info returned", procName, 1); - break; - - case IFF_PNG: - ret = readHeaderMemPng(data, size, &w, &h, &bps, &spp, &iscmap); - if (ret) - return ERROR_INT( "png: no header info returned", procName, 1); - break; - - case IFF_TIFF: - case IFF_TIFF_PACKBITS: - case IFF_TIFF_RLE: - case IFF_TIFF_G3: - case IFF_TIFF_G4: - case IFF_TIFF_LZW: - case IFF_TIFF_ZIP: - case IFF_TIFF_JPEG: - /* Reading page 0 by default; possibly redefine format */ - ret = readHeaderMemTiff(data, size, 0, &w, &h, &bps, &spp, - NULL, &iscmap, &format); - if (ret) - return ERROR_INT( "tiff: no header info returned", procName, 1); - break; - - case IFF_PNM: - ret = readHeaderMemPnm(data, size, &w, &h, &d, &type, &bps, &spp); - if (ret) - return ERROR_INT( "pnm: no header info returned", procName, 1); - break; - - case IFF_GIF: /* cheating: read the pix */ - if ((pix = pixReadMemGif(data, size)) == NULL) - return ERROR_INT( "gif: pix not read", procName, 1); - pixGetDimensions(pix, &w, &h, &d); - pixDestroy(&pix); - iscmap = 1; /* always colormapped; max 256 colors */ - spp = 1; - bps = d; - break; - - case IFF_JP2: - ret = readHeaderMemJp2k(data, size, &w, &h, &bps, &spp); - break; - - case IFF_WEBP: - bps = 8; - ret = readHeaderMemWebP(data, size, &w, &h, &spp); - break; - - case IFF_PS: - if (pformat) *pformat = format; - return ERROR_INT("PostScript reading is not supported\n", procName, 1); - - case IFF_LPDF: - if (pformat) *pformat = format; - return ERROR_INT("Pdf reading is not supported\n", procName, 1); - - case IFF_SPIX: - ret = sreadHeaderSpix((l_uint32 *)data, &w, &h, &bps, - &spp, &iscmap); - if (ret) - return ERROR_INT( "pnm: no header info returned", procName, 1); - break; - - case IFF_UNKNOWN: - return ERROR_INT("unknown format; no data returned", procName, 1); - break; - } - - if (pw) *pw = w; - if (ph) *ph = h; - if (pbps) *pbps = bps; - if (pspp) *pspp = spp; - if (piscmap) *piscmap = iscmap; - if (pformat) *pformat = format; - return 0; -} - - -/*---------------------------------------------------------------------* - * Output image file information * - *---------------------------------------------------------------------*/ -extern const char *ImageFileFormatExtensions[]; - -/*! - * \brief writeImageFileInfo() - * - * \param[in] filename input file - * \param[in] fpout output file stream - * \param[in] headeronly 1 to read only the header; 0 to read both - * the header and the input file - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) If headeronly == 0 and the image has spp == 4,this will
- *          also call pixDisplayLayersRGBA() to display the image
- *          in three views.
- *      (2) This is a debug function that changes the value of
- *          var_PNG_STRIP_16_TO_8 to 1 (the default).
- * 
- */ -l_ok -writeImageFileInfo(const char *filename, - FILE *fpout, - l_int32 headeronly) -{ -char *text; -l_int32 w, h, d, wpl, count, npages, color; -l_int32 format, bps, spp, iscmap, xres, yres, transparency; -FILE *fpin; -PIX *pix, *pixt; -PIXCMAP *cmap; - - PROCNAME("writeImageFileInfo"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!fpout) - return ERROR_INT("stream not defined", procName, 1); - - /* Read the header */ - if (pixReadHeader(filename, &format, &w, &h, &bps, &spp, &iscmap)) { - L_ERROR("failure to read header of %s\n", procName, filename); - return 1; - } - fprintf(fpout, "===============================================\n" - "Reading the header:\n"); - fprintf(fpout, " input image format type: %s\n", - ImageFileFormatExtensions[format]); - fprintf(fpout, " w = %d, h = %d, bps = %d, spp = %d, iscmap = %d\n", - w, h, bps, spp, iscmap); - - findFileFormat(filename, &format); - if (format == IFF_JP2) { - fpin = lept_fopen(filename, "rb"); - fgetJp2kResolution(fpin, &xres, &yres); - fclose(fpin); - fprintf(fpout, " xres = %d, yres = %d\n", xres, yres); - } else if (format == IFF_PNG) { - fpin = lept_fopen(filename, "rb"); - fgetPngResolution(fpin, &xres, &yres); - fclose(fpin); - fprintf(fpout, " xres = %d, yres = %d\n", xres, yres); - if (iscmap) { - fpin = lept_fopen(filename, "rb"); - fgetPngColormapInfo(fpin, &cmap, &transparency); - fclose(fpin); - if (transparency) - fprintf(fpout, " colormap has transparency\n"); - else - fprintf(fpout, " colormap does not have transparency\n"); - pixcmapWriteStream(fpout, cmap); - pixcmapDestroy(&cmap); - } - } else if (format == IFF_JFIF_JPEG) { - fpin = lept_fopen(filename, "rb"); - fgetJpegResolution(fpin, &xres, &yres); - fclose(fpin); - fprintf(fpout, " xres = %d, yres = %d\n", xres, yres); - } - - if (headeronly) - return 0; - - /* Read the full image. Note that when we read an image that - * has transparency in a colormap, we convert it to RGBA. */ - fprintf(fpout, "===============================================\n" - "Reading the full image:\n"); - - /* Preserve 16 bpp if the format is png */ - if (format == IFF_PNG && bps == 16) - l_pngSetReadStrip16To8(0); - - if ((pix = pixRead(filename)) == NULL) { - L_ERROR("failure to read full image of %s\n", procName, filename); - return 1; - } - - format = pixGetInputFormat(pix); - pixGetDimensions(pix, &w, &h, &d); - wpl = pixGetWpl(pix); - spp = pixGetSpp(pix); - fprintf(fpout, " input image format type: %s\n", - ImageFileFormatExtensions[format]); - fprintf(fpout, " w = %d, h = %d, d = %d, spp = %d, wpl = %d\n", - w, h, d, spp, wpl); - fprintf(fpout, " xres = %d, yres = %d\n", - pixGetXRes(pix), pixGetYRes(pix)); - - text = pixGetText(pix); - if (text) /* not null */ - fprintf(fpout, " text: %s\n", text); - - cmap = pixGetColormap(pix); - if (cmap) { - pixcmapHasColor(cmap, &color); - if (color) - fprintf(fpout, " colormap exists and has color values:"); - else - fprintf(fpout, " colormap exists and has only gray values:"); - pixcmapWriteStream(fpout, pixGetColormap(pix)); - } - else - fprintf(fpout, " colormap does not exist\n"); - - if (format == IFF_TIFF || format == IFF_TIFF_G4 || - format == IFF_TIFF_G3 || format == IFF_TIFF_PACKBITS) { - fprintf(fpout, " Tiff header information:\n"); - fpin = lept_fopen(filename, "rb"); - tiffGetCount(fpin, &npages); - lept_fclose(fpin); - if (npages == 1) - fprintf(fpout, " One page in file\n"); - else - fprintf(fpout, " %d pages in file\n", npages); - fprintTiffInfo(fpout, filename); - } - - if (d == 1) { - pixCountPixels(pix, &count, NULL); - pixGetDimensions(pix, &w, &h, NULL); - fprintf(fpout, " 1 bpp: foreground pixel fraction ON/Total = %g\n", - (l_float32)count / (l_float32)(w * h)); - } - fprintf(fpout, "===============================================\n"); - - /* If there is an alpha component, visualize it. Note that when - * alpha == 0, the rgb layer is transparent. We visualize the - * result when a white background is visible through the - * transparency layer. */ - if (pixGetSpp(pix) == 4) { - pixt = pixDisplayLayersRGBA(pix, 0xffffff00, 600.0); - pixDisplay(pixt, 100, 100); - pixDestroy(&pixt); - } - - if (format == IFF_PNG && bps == 16) - l_pngSetReadStrip16To8(1); /* return to default if format is png */ - - pixDestroy(&pix); - return 0; -} - - -/*---------------------------------------------------------------------* - * Test function for I/O with different formats * - *---------------------------------------------------------------------*/ -/*! - * \brief ioFormatTest() - * - * \param[in] filename input image file - * \return 0 if OK; 1 on error or if the test fails - * - *
- * Notes:
- *      (1) This writes and reads a set of output files losslessly
- *          in different formats to /tmp/format/, and tests that the
- *          result before and after is unchanged.
- *      (2) This should work properly on input images of any depth,
- *          with and without colormaps.
- *      (3) All supported formats are tested for bmp, png, tiff and
- *          non-ascii pnm.  Ascii pnm also works (but who'd ever want
- *          to use it?)   We allow 2 bpp bmp, although it's not
- *          supported elsewhere.  And we don't support reading
- *          16 bpp png, although this can be turned on in pngio.c.
- *      (4) This silently skips png or tiff testing if HAVE_LIBPNG
- *          or HAVE_LIBTIFF are 0, respectively.
- * 
- */ -l_ok -ioFormatTest(const char *filename) -{ -l_int32 w, h, d, depth, equal, problems; -l_float32 diff; -BOX *box; -PIX *pixs, *pixc, *pix1, *pix2; -PIXCMAP *cmap; - - PROCNAME("ioFormatTest"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - - /* Read the input file and limit the size */ - if ((pix1 = pixRead(filename)) == NULL) - return ERROR_INT("pix1 not made", procName, 1); - pixGetDimensions(pix1, &w, &h, NULL); - if (w > 250 && h > 250) { /* take the central 250 x 250 region */ - box = boxCreate(w / 2 - 125, h / 2 - 125, 250, 250); - pixs = pixClipRectangle(pix1, box, NULL); - boxDestroy(&box); - } else { - pixs = pixClone(pix1); - } - pixDestroy(&pix1); - - lept_mkdir("lept/format"); - - /* Note that the reader automatically removes colormaps - * from 1 bpp BMP images, but not from 8 bpp BMP images. - * Therefore, if our 8 bpp image initially doesn't have a - * colormap, we are going to need to remove it from any - * pix read from a BMP file. */ - pixc = pixClone(pixs); /* laziness */ - - /* This does not test the alpha layer pixels, because most - * formats don't support it. Remove any alpha. */ - if (pixGetSpp(pixc) == 4) - pixSetSpp(pixc, 3); - cmap = pixGetColormap(pixc); /* colormap; can be NULL */ - d = pixGetDepth(pixc); - - problems = FALSE; - - /* ----------------------- BMP -------------------------- */ - - /* BMP works for 1, 2, 4, 8 and 32 bpp images. - * It always writes colormaps for 1 and 8 bpp, so we must - * remove it after readback if the input image doesn't have - * a colormap. Although we can write/read 2 bpp BMP, nobody - * else can read them! */ - if (d == 1 || d == 8) { - L_INFO("write/read bmp\n", procName); - pixWrite(FILE_BMP, pixc, IFF_BMP); - pix1 = pixRead(FILE_BMP); - if (!cmap) - pix2 = pixRemoveColormap(pix1, REMOVE_CMAP_BASED_ON_SRC); - else - pix2 = pixClone(pix1); - pixEqual(pixc, pix2, &equal); - if (!equal) { - L_INFO(" **** bad bmp image: d = %d ****\n", procName, d); - problems = TRUE; - } - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - if (d == 2 || d == 4 || d == 32) { - L_INFO("write/read bmp\n", procName); - pixWrite(FILE_BMP, pixc, IFF_BMP); - pix1 = pixRead(FILE_BMP); - pixEqual(pixc, pix1, &equal); - if (!equal) { - L_INFO(" **** bad bmp image: d = %d ****\n", procName, d); - problems = TRUE; - } - pixDestroy(&pix1); - } - - /* ----------------------- PNG -------------------------- */ -#if HAVE_LIBPNG - /* PNG works for all depths, but here, because we strip - * 16 --> 8 bpp on reading, we don't test png for 16 bpp. */ - if (d != 16) { - L_INFO("write/read png\n", procName); - pixWrite(FILE_PNG, pixc, IFF_PNG); - pix1 = pixRead(FILE_PNG); - pixEqual(pixc, pix1, &equal); - if (!equal) { - L_INFO(" **** bad png image: d = %d ****\n", procName, d); - problems = TRUE; - } - pixDestroy(&pix1); - } -#endif /* HAVE_LIBPNG */ - - /* ----------------------- TIFF -------------------------- */ -#if HAVE_LIBTIFF - /* TIFF works for 1, 2, 4, 8, 16 and 32 bpp images. - * Because 8 bpp tiff always writes 256 entry colormaps, the - * colormap sizes may be different for 8 bpp images with - * colormap; we are testing if the image content is the same. - * Likewise, the 2 and 4 bpp tiff images with colormaps - * have colormap sizes 4 and 16, rsp. This test should - * work properly on the content, regardless of the number - * of color entries in pixc. */ - - /* tiff uncompressed works for all pixel depths */ - L_INFO("write/read uncompressed tiff\n", procName); - pixWrite(FILE_TIFF, pixc, IFF_TIFF); - pix1 = pixRead(FILE_TIFF); - pixEqual(pixc, pix1, &equal); - if (!equal) { - L_INFO(" **** bad tiff uncompressed image: d = %d ****\n", - procName, d); - problems = TRUE; - } - pixDestroy(&pix1); - - /* tiff lzw works for all pixel depths */ - L_INFO("write/read lzw compressed tiff\n", procName); - pixWrite(FILE_LZW, pixc, IFF_TIFF_LZW); - pix1 = pixRead(FILE_LZW); - pixEqual(pixc, pix1, &equal); - if (!equal) { - L_INFO(" **** bad tiff lzw compressed image: d = %d ****\n", - procName, d); - problems = TRUE; - } - pixDestroy(&pix1); - - /* tiff adobe deflate (zip) works for all pixel depths */ - L_INFO("write/read zip compressed tiff\n", procName); - pixWrite(FILE_ZIP, pixc, IFF_TIFF_ZIP); - pix1 = pixRead(FILE_ZIP); - pixEqual(pixc, pix1, &equal); - if (!equal) { - L_INFO(" **** bad tiff zip compressed image: d = %d ****\n", - procName, d); - problems = TRUE; - } - pixDestroy(&pix1); - - /* tiff jpeg encoding works for grayscale and rgb */ - if (d == 8 || d == 32) { - PIX *pixc1; - L_INFO("write/read jpeg compressed tiff\n", procName); - if (d == 8 && pixGetColormap(pixc)) { - pixc1 = pixRemoveColormap(pixc, REMOVE_CMAP_BASED_ON_SRC); - pixWrite(FILE_TIFF_JPEG, pixc1, IFF_TIFF_JPEG); - if ((pix1 = pixRead(FILE_TIFF_JPEG)) == NULL) { - L_INFO(" did not read FILE_TIFF_JPEG\n", procName); - problems = TRUE; - } - pixDestroy(&pixc1); - } else { - pixWrite(FILE_TIFF_JPEG, pixc, IFF_TIFF_JPEG); - pix1 = pixRead(FILE_TIFF_JPEG); - if (d == 8) { - pixCompareGray(pix1, pixc, L_COMPARE_ABS_DIFF, 0, NULL, &diff, - NULL, NULL); - } else { - pixCompareRGB(pix1, pixc, L_COMPARE_ABS_DIFF, 0, NULL, &diff, - NULL, NULL); - } - if (diff > 8.0) { - L_INFO(" **** bad tiff jpeg compressed image: " - "d = %d, diff = %5.2f ****\n", procName, d, diff); - problems = TRUE; - } - } - pixDestroy(&pix1); - } - - /* tiff g4, g3, rle and packbits work for 1 bpp */ - if (d == 1) { - L_INFO("write/read g4 compressed tiff\n", procName); - pixWrite(FILE_G4, pixc, IFF_TIFF_G4); - pix1 = pixRead(FILE_G4); - pixEqual(pixc, pix1, &equal); - if (!equal) { - L_INFO(" **** bad tiff g4 image ****\n", procName); - problems = TRUE; - } - pixDestroy(&pix1); - - L_INFO("write/read g3 compressed tiff\n", procName); - pixWrite(FILE_G3, pixc, IFF_TIFF_G3); - pix1 = pixRead(FILE_G3); - pixEqual(pixc, pix1, &equal); - if (!equal) { - L_INFO(" **** bad tiff g3 image ****\n", procName); - problems = TRUE; - } - pixDestroy(&pix1); - - L_INFO("write/read rle compressed tiff\n", procName); - pixWrite(FILE_RLE, pixc, IFF_TIFF_RLE); - pix1 = pixRead(FILE_RLE); - pixEqual(pixc, pix1, &equal); - if (!equal) { - L_INFO(" **** bad tiff rle image: d = %d ****\n", procName, d); - problems = TRUE; - } - pixDestroy(&pix1); - - L_INFO("write/read packbits compressed tiff\n", procName); - pixWrite(FILE_PB, pixc, IFF_TIFF_PACKBITS); - pix1 = pixRead(FILE_PB); - pixEqual(pixc, pix1, &equal); - if (!equal) { - L_INFO(" **** bad tiff packbits image: d = %d ****\n", - procName, d); - problems = TRUE; - } - pixDestroy(&pix1); - } -#endif /* HAVE_LIBTIFF */ - - /* ----------------------- PNM -------------------------- */ - - /* pnm works for 1, 2, 4, 8, 16 and 32 bpp. - * pnm doesn't have colormaps, so when we write colormapped - * pix out as pnm, the colormap is removed. Thus for the test, - * we must remove the colormap from pixc before testing. */ - L_INFO("write/read pnm\n", procName); - pixWrite(FILE_PNM, pixc, IFF_PNM); - pix1 = pixRead(FILE_PNM); - if (cmap) - pix2 = pixRemoveColormap(pixc, REMOVE_CMAP_BASED_ON_SRC); - else - pix2 = pixClone(pixc); - pixEqual(pix1, pix2, &equal); - if (!equal) { - L_INFO(" **** bad pnm image: d = %d ****\n", procName, d); - problems = TRUE; - } - pixDestroy(&pix1); - pixDestroy(&pix2); - - /* ----------------------- GIF -------------------------- */ -#if HAVE_LIBGIF - /* GIF works for only 1 and 8 bpp, colormapped */ - if (d != 8 || !cmap) - pix1 = pixConvertTo8(pixc, 1); - else - pix1 = pixClone(pixc); - L_INFO("write/read gif\n", procName); - pixWrite(FILE_GIF, pix1, IFF_GIF); - pix2 = pixRead(FILE_GIF); - pixEqual(pix1, pix2, &equal); - if (!equal) { - L_INFO(" **** bad gif image: d = %d ****\n", procName, d); - problems = TRUE; - } - pixDestroy(&pix1); - pixDestroy(&pix2); -#endif /* HAVE_LIBGIF */ - - /* ----------------------- JPEG ------------------------- */ -#if HAVE_LIBJPEG - /* JPEG works for only 8 bpp gray and rgb */ - if (cmap || d > 8) - pix1 = pixConvertTo32(pixc); - else - pix1 = pixConvertTo8(pixc, 0); - depth = pixGetDepth(pix1); - L_INFO("write/read jpeg\n", procName); - pixWrite(FILE_JPG, pix1, IFF_JFIF_JPEG); - pix2 = pixRead(FILE_JPG); - if (depth == 8) { - pixCompareGray(pix1, pix2, L_COMPARE_ABS_DIFF, 0, NULL, &diff, - NULL, NULL); - } else { - pixCompareRGB(pix1, pix2, L_COMPARE_ABS_DIFF, 0, NULL, &diff, - NULL, NULL); - } - if (diff > 8.0) { - L_INFO(" **** bad jpeg image: d = %d, diff = %5.2f ****\n", - procName, depth, diff); - problems = TRUE; - } - pixDestroy(&pix1); - pixDestroy(&pix2); -#endif /* HAVE_LIBJPEG */ - - /* ----------------------- WEBP ------------------------- */ -#if HAVE_LIBWEBP - /* WEBP works for rgb and rgba */ - if (cmap || d <= 16) - pix1 = pixConvertTo32(pixc); - else - pix1 = pixClone(pixc); - depth = pixGetDepth(pix1); - L_INFO("write/read webp\n", procName); - pixWrite(FILE_WEBP, pix1, IFF_WEBP); - pix2 = pixRead(FILE_WEBP); - pixCompareRGB(pix1, pix2, L_COMPARE_ABS_DIFF, 0, NULL, &diff, NULL, NULL); - if (diff > 5.0) { - L_INFO(" **** bad webp image: d = %d, diff = %5.2f ****\n", - procName, depth, diff); - problems = TRUE; - } - pixDestroy(&pix1); - pixDestroy(&pix2); -#endif /* HAVE_LIBWEBP */ - - /* ----------------------- JP2K ------------------------- */ -#if HAVE_LIBJP2K - /* JP2K works for only 8 bpp gray, rgb and rgba */ - if (cmap || d > 8) - pix1 = pixConvertTo32(pixc); - else - pix1 = pixConvertTo8(pixc, 0); - depth = pixGetDepth(pix1); - L_INFO("write/read jp2k\n", procName); - pixWrite(FILE_JP2K, pix1, IFF_JP2); - pix2 = pixRead(FILE_JP2K); - if (depth == 8) { - pixCompareGray(pix1, pix2, L_COMPARE_ABS_DIFF, 0, NULL, &diff, - NULL, NULL); - } else { - pixCompareRGB(pix1, pix2, L_COMPARE_ABS_DIFF, 0, NULL, &diff, - NULL, NULL); - } - lept_stderr("diff = %7.3f\n", diff); - if (diff > 7.0) { - L_INFO(" **** bad jp2k image: d = %d, diff = %5.2f ****\n", - procName, depth, diff); - problems = TRUE; - } - pixDestroy(&pix1); - pixDestroy(&pix2); -#endif /* HAVE_LIBJP2K */ - - if (problems == FALSE) - L_INFO("All formats read and written OK!\n", procName); - - pixDestroy(&pixc); - pixDestroy(&pixs); - return problems; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/recog.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/recog.h deleted file mode 100644 index 44e6aa18..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/recog.h +++ /dev/null @@ -1,264 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_RECOG_H -#define LEPTONICA_RECOG_H - -/*! - * \file recog.h - * - *
- *     This is a simple utility for training and recognizing individual
- *     machine-printed text characters.  It is designed to be adapted
- *     to a particular set of character images; e.g., from a book.
- *
- *     There are two methods of training the recognizer.  In the most
- *     simple, a set of bitmaps has been labeled by some means, such
- *     a generic OCR program.  This is input either one template at a time
- *     or as a pixa of templates, to a function that creates a recog.
- *     If in a pixa, the text string label must be embedded in the
- *     text field of each pix.
- *
- *     If labeled data is not available, we start with a bootstrap
- *     recognizer (BSR) that has labeled data from a variety of sources.
- *     These images are scaled, typically to a fixed height, and then
- *     fed similarly scaled unlabeled images from the source (e.g., book),
- *     and the BSR attempts to identify them.  All images that have
- *     a high enough correlation score with one of the templates in the
- *     BSR are emitted in a pixa, which now holds unscaled and labeled
- *     templates from the source.  This is the generator for a book adapted
- *     recognizer (BAR).
- *
- *     The pixa should always be thought of as the primary structure.
- *     It is the generator for the recog, because a recog is built
- *     from a pixa of unscaled images.
- *
- *     New image templates can be added to a recog as long as it is
- *     in training mode.  Once training is finished, to add templates
- *     it is necessary to extract the generating pixa, add templates
- *     to that pixa, and make a new recog.  Similarly, we do not
- *     join two recog; instead, we simply join their generating pixa,
- *     and make a recog from that.
- *
- *     To remove outliers from a pixa of labeled pix, make a recog,
- *     determine the outliers, and generate a new pixa with the
- *     outliers removed.  The outliers are determined by building
- *     special templates for each character set that are scaled averages
- *     of the individual templates.  Then a correlation score is found
- *     between each template and the averaged templates.  There are
- *     two implementations; outliers are determined as either:
- *      (1) a template having a correlation score with its class average
- *          that is below a threshold, or
- *      (2) a template having a correlation score with its class average
- *          that is smaller than the correlation score with the average
- *          of another class.
- *     Outliers are removed from the generating pixa.  Scaled averaging
- *     is only performed for determining outliers and for splitting
- *     characters; it is never used in a trained recognizer for identifying
- *     unlabeled samples.
- *
- *     Two methods using averaged templates are provided for splitting
- *     touching characters:
- *      (1) greedy matching
- *      (2) document image decoding (DID)
- *     The DID method is the default.  It is about 5x faster and
- *     possibly more accurate.
- *
- *     Once a BAR has been made, unlabeled sample images are identified
- *     by finding the individual template in the BAR with highest
- *     correlation.  The input images and images in the BAR can be
- *     represented in two ways:
- *      (1) as scanned, binarized to 1 bpp
- *      (2) as a width-normalized outline formed by thinning to a
- *          skeleton and then dilating by a fixed amount.
- *
- *     The recog can be serialized to file and read back.  The serialized
- *     version holds the templates used for correlation (which may have
- *     been modified by scaling and turning into lines from the unscaled
- *     templates), plus, for arbitrary character sets, the UTF8
- *     representation and the lookup table mapping from the character
- *     representation to index.
- *
- *     Why do we not use averaged templates for recognition?
- *     Letterforms can take on significantly different shapes (eg.,
- *     the letters 'a' and 'g'), and it makes no sense to average these.
- *     The previous version of this utility allowed multiple recognizers
- *     to exist, but this is an unnecessary complication if recognition
- *     is done on all samples instead of on averages.
- * 
- */ - -#define RECOG_VERSION_NUMBER 2 - -struct L_Recog { - l_int32 scalew; /*!< scale all examples to this width; */ - /*!< use 0 prevent horizontal scaling */ - l_int32 scaleh; /*!< scale all examples to this height; */ - /*!< use 0 prevent vertical scaling */ - l_int32 linew; /*!< use a value > 0 to convert the bitmap */ - /*!< to lines of fixed width; 0 to skip */ - l_int32 templ_use; /*!< template use: use either the average */ - /*!< or all temmplates (L_USE_AVERAGE or */ - /*!< L_USE_ALL) */ - l_int32 maxarraysize; /*!< initialize container arrays to this */ - l_int32 setsize; /*!< size of character set */ - l_int32 threshold; /*!< for binarizing if depth > 1 */ - l_int32 maxyshift; /*!< vertical jiggle on nominal centroid */ - /*!< alignment; typically 0 or 1 */ - l_int32 charset_type; /*!< one of L_ARABIC_NUMERALS, etc. */ - l_int32 charset_size; /*!< expected number of classes in charset */ - l_int32 min_nopad; /*!< min number of samples without padding */ - l_int32 num_samples; /*!< number of training samples */ - l_int32 minwidth_u; /*!< min width averaged unscaled templates */ - l_int32 maxwidth_u; /*!< max width averaged unscaled templates */ - l_int32 minheight_u; /*!< min height averaged unscaled templates */ - l_int32 maxheight_u; /*!< max height averaged unscaled templates */ - l_int32 minwidth; /*!< min width averaged scaled templates */ - l_int32 maxwidth; /*!< max width averaged scaled templates */ - l_int32 ave_done; /*!< set to 1 when averaged bitmaps are made */ - l_int32 train_done; /*!< set to 1 when training is complete or */ - /*!< identification has started */ - l_float32 max_wh_ratio; /*!< max width/height ratio to split */ - l_float32 max_ht_ratio; /*!< max of max/min template height ratio */ - l_int32 min_splitw; /*!< min component width kept in splitting */ - l_int32 max_splith; /*!< max component height kept in splitting */ - struct Sarray *sa_text; /*!< text array for arbitrary char set */ - struct L_Dna *dna_tochar; /*!< index-to-char lut for arbitrary charset */ - l_int32 *centtab; /*!< table for finding centroids */ - l_int32 *sumtab; /*!< table for finding pixel sums */ - struct Pixaa *pixaa_u; /*!< all unscaled templates for each class */ - struct Ptaa *ptaa_u; /*!< centroids of all unscaled templates */ - struct Numaa *naasum_u; /*!< area of all unscaled templates */ - struct Pixaa *pixaa; /*!< all (scaled) templates for each class */ - struct Ptaa *ptaa; /*!< centroids of all (scaledl) templates */ - struct Numaa *naasum; /*!< area of all (scaled) templates */ - struct Pixa *pixa_u; /*!< averaged unscaled templates per class */ - struct Pta *pta_u; /*!< centroids of unscaled ave. templates */ - struct Numa *nasum_u; /*!< area of unscaled averaged templates */ - struct Pixa *pixa; /*!< averaged (scaled) templates per class */ - struct Pta *pta; /*!< centroids of (scaled) ave. templates */ - struct Numa *nasum; /*!< area of (scaled) averaged templates */ - struct Pixa *pixa_tr; /*!< all input training images */ - struct Pixa *pixadb_ave; /*!< unscaled and scaled averaged bitmaps */ - struct Pixa *pixa_id; /*!< input images for identifying */ - struct Pix *pixdb_ave; /*!< debug: best match of input against ave. */ - struct Pix *pixdb_range; /*!< debug: best matches within range */ - struct Pixa *pixadb_boot; /*!< debug: bootstrap training results */ - struct Pixa *pixadb_split; /*!< debug: splitting results */ - struct L_Bmf *bmf; /*!< bmf fonts */ - l_int32 bmf_size; /*!< font size of bmf; default is 6 pt */ - struct L_Rdid *did; /*!< temp data used for image decoding */ - struct L_Rch *rch; /*!< temp data used for holding best char */ - struct L_Rcha *rcha; /*!< temp data used for array of best chars */ -}; -typedef struct L_Recog L_RECOG; - -/*! - * Data returned from correlation matching on a single character - */ -struct L_Rch { - l_int32 index; /*!< index of best template */ - l_float32 score; /*!< correlation score of best template */ - char *text; /*!< character string of best template */ - l_int32 sample; /*!< index of best sample (within the best */ - /*!< template class, if all samples are used) */ - l_int32 xloc; /*!< x-location of template (delx + shiftx) */ - l_int32 yloc; /*!< y-location of template (dely + shifty) */ - l_int32 width; /*!< width of best template */ -}; -typedef struct L_Rch L_RCH; - -/*! - * Data returned from correlation matching on an array of characters - */ -struct L_Rcha { - struct Numa *naindex; /*!< indices of best templates */ - struct Numa *nascore; /*!< correlation scores of best templates */ - struct Sarray *satext; /*!< character strings of best templates */ - struct Numa *nasample; /*!< indices of best samples */ - struct Numa *naxloc; /*!< x-locations of templates (delx + shiftx) */ - struct Numa *nayloc; /*!< y-locations of templates (dely + shifty) */ - struct Numa *nawidth; /*!< widths of best templates */ -}; -typedef struct L_Rcha L_RCHA; - -/*! - * Data used for decoding a line of characters. - */ -struct L_Rdid { - struct Pix *pixs; /*!< clone of pix to be decoded */ - l_int32 **counta; /*!< count array for each averaged template */ - l_int32 **delya; /*!< best y-shift array per average template */ - l_int32 narray; /*!< number of averaged templates */ - l_int32 size; /*!< size of count array (width of pixs) */ - l_int32 *setwidth; /*!< setwidths for each template */ - struct Numa *nasum; /*!< pixel count in pixs by column */ - struct Numa *namoment; /*!< first moment of pixels in pixs by cols */ - l_int32 fullarrays; /*!< 1 if full arrays are made; 0 otherwise */ - l_float32 *beta; /*!< channel coeffs for template fg term */ - l_float32 *gamma; /*!< channel coeffs for bit-and term */ - l_float32 *trellisscore; /*!< score on trellis */ - l_int32 *trellistempl; /*!< template on trellis (for backtrack) */ - struct Numa *natempl; /*!< indices of best path templates */ - struct Numa *naxloc; /*!< x locations of best path templates */ - struct Numa *nadely; /*!< y locations of best path templates */ - struct Numa *nawidth; /*!< widths of best path templates */ - struct Boxa *boxa; /*!< Viterbi result for splitting input pixs */ - struct Numa *nascore; /*!< correlation scores: best path templates */ - struct Numa *natempl_r; /*!< indices of best rescored templates */ - struct Numa *nasample_r; /*!< samples of best scored templates */ - struct Numa *naxloc_r; /*!< x locations of best rescoredtemplates */ - struct Numa *nadely_r; /*!< y locations of best rescoredtemplates */ - struct Numa *nawidth_r; /*!< widths of best rescoredtemplates */ - struct Numa *nascore_r; /*!< correlation scores: rescored templates */ -}; -typedef struct L_Rdid L_RDID; - - -/*-------------------------------------------------------------------------* - * Flags for describing limited character sets * - *-------------------------------------------------------------------------*/ -/*! Character Set */ -enum { - L_UNKNOWN = 0, /*!< character set type is not specified */ - L_ARABIC_NUMERALS = 1, /*!< 10 digits */ - L_LC_ROMAN_NUMERALS = 2, /*!< 7 lower-case letters (i,v,x,l,c,d,m) */ - L_UC_ROMAN_NUMERALS = 3, /*!< 7 upper-case letters (I,V,X,L,C,D,M) */ - L_LC_ALPHA = 4, /*!< 26 lower-case letters */ - L_UC_ALPHA = 5 /*!< 26 upper-case letters */ -}; - -/*-------------------------------------------------------------------------* - * Flags for selecting between using average and all templates: * - * recog->templ_use * - *-------------------------------------------------------------------------*/ -/*! Template Select */ -enum { - L_USE_ALL_TEMPLATES = 0, /*!< use all templates; default */ - L_USE_AVERAGE_TEMPLATES = 1 /*!< use average templates; special cases */ -}; - -#endif /* LEPTONICA_RECOG_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/recogbasic.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/recogbasic.c deleted file mode 100644 index f3b6aad9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/recogbasic.c +++ /dev/null @@ -1,1232 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file recogbasic.c - *
- *
- *      Recog creation, destruction and access
- *         L_RECOG            *recogCreateFromRecog()
- *         L_RECOG            *recogCreateFromPixa()
- *         L_RECOG            *recogCreateFromPixaNoFinish()
- *         L_RECOG            *recogCreate()
- *         void                recogDestroy()
- *
- *      Recog accessors
- *         l_int32             recogGetCount()
- *         l_int32             recogSetParams()
- *         static l_int32      recogGetCharsetSize()
- *
- *      Character/index lookup
- *         l_int32             recogGetClassIndex()
- *         l_int32             recogStringToIndex()
- *         l_int32             recogGetClassString()
- *         l_int32             l_convertCharstrToInt()
- *
- *      Serialization
- *         L_RECOG            *recogRead()
- *         L_RECOG            *recogReadStream()
- *         L_RECOG            *recogReadMem()
- *         l_int32             recogWrite()
- *         l_int32             recogWriteStream()
- *         l_int32             recogWriteMem()
- *         PIXA               *recogExtractPixa()
- *         static l_int32      recogAddCharstrLabels()
- *         static l_int32      recogAddAllSamples()
- *
- *  The recognizer functionality is split into four files:
- *    recogbasic.c: create, destroy, access, serialize
- *    recogtrain.c: training on labeled and unlabeled data
- *    recogident.c: running the recognizer(s) on input
- *    recogdid.c:   running the recognizer(s) on input using a
- *                  document image decoding (DID) hidden markov model
- *
- *  This is a content-adapted (or book-adapted) recognizer (BAR) application.
- *  The recognizers here are typically assembled from data that has
- *  been labeled by a generic recognition system, such as Tesseract.
- *  The general procedure to create a recognizer (recog) from labeled data is
- *  to add the labeled character bitmaps, either one at a time or
- *  all together from a pixa with labeled pix.
- *
- *  The suggested use for a BAR that consists of labeled templates drawn
- *  from a single source (e.g., a book) is to identify unlabeled samples
- *  by using unscaled character templates in the BAR, picking the
- *  template closest to the unlabeled sample.
- *
- *  Outliers can be removed from a pixa of labeled pix.  This is one of
- *  two methods that use averaged templates (the other is greedy splitting
- *  of characters).  See recogtrain.c for a discussion and the implementation.
- *
- *  A special bootstrap recognizer (BSR) can be used to make a BAR from
- *  unlabeled book data.  This is done by comparing character images
- *  from the book with labeled templates in the BSR, where all images
- *  are scaled to h = 40.  The templates can be either the scanned images
- *  or images consisting of width-normalized strokes derived from
- *  the skeleton of the character bitmaps.
- *
- *  Two BARs of labeled character data, that have been made by
- *  different recognizers, can be joined by extracting a pixa of the
- *  labeled templates from each, joining the two pixa, and then
- *  and regenerating a BAR from the joined set of templates.
- *  If all the labeled character data is from a single source (e.g, a book),
- *  identification can proceed using unscaled templates (either the input
- *  image or width-normalized lines).  But if the labeled data comes from
- *  more than one source, (a "hybrid" recognizer), the templates should
- *  be scaled, and we recommend scaling to a fixed height.
- *
- *  Suppose it is not possible to generate a BAR with a sufficient number
- *  of templates of each class taken from a single source.  In that case,
- *  templates from the BSR itself can be added.  This is the condition
- *  described above, where the labeled templates come from multiple
- *  sources, and it is necessary to do all character matches using
- *  templates that have been scaled to a fixed height (e.g., 40).
- *  Likewise, the samples to be identified using this hybrid recognizer
- *  must be modified in the same way.  See prog/recogtest3.c for an
- *  example of the steps that can be taken in the construction of a BAR
- *  using a BSR.
- *
- *  For training numeric input, an example set of calls that scales
- *  each training input to fixed h and will use the line templates of
- *  width linew for identifying unknown characters is:
- *         L_Recog  *rec = recogCreate(0, h, linew, 128, 1);
- *         for (i = 0; i < n; i++) {  // read in n training digits
- *             Pix *pix = ...
- *             recogTrainLabeled(rec, pix, NULL, text[i], 0);
- *         }
- *         recogTrainingFinished(&rec, 1, -1, -1.0);  // required
- *
- *  It is an error if any function that computes averages, removes
- *  outliers or requests identification of an unlabeled character,
- *  such as:
- *     (1) computing the sample averages: recogAverageSamples()
- *     (2) removing outliers: recogRemoveOutliers1() or recogRemoveOutliers2()
- *     (3) requesting identification of an unlabeled character:
- *         recogIdentifyPix()
- *  is called before an explicit call to finish training.  Note that
- *  to do further training on a "finished" recognizer, you can set
- *         recog->train_done = FALSE;
- *  add the new training samples, and again call
- *         recogTrainingFinished(&rec, 1, -1, -1.0);  // required
- *
- *  If not scaling, using the images directly for identification, and
- *  removing outliers, do something like this:
- *      L_Recog  *rec = recogCreate(0, 0, 0, 128, 1);
- *      for (i = 0; i < n; i++) {  // read in n training characters
- *          Pix *pix = ...
- *          recogTrainLabeled(rec, pix, NULL, text[i], 0);
- *      }
- *      recogTrainingFinished(&rec, 1, -1, -1.0);
- *      if (!rec) ... [return]
- *      // remove outliers
- *      recogRemoveOutliers1(&rec, 0.7, 2, NULL, NULL);
- *
- *  You can generate a recognizer from a pixa where the text field in
- *  each pix is the character string label for the pix.  For example,
- *  the following recognizer will store unscaled line images:
- *      L_Recog  *rec = recogCreateFromPixa(pixa, 0, 0, linew, 128, 1);
- *  and in use, it is fed unscaled line images to identify.
- *
- *  For the following, assume that you have a pixa of labeled templates.
- *  If it is likely that some of the input templates are mislabeled,
- *  there are several things that can be done to remove them.
- *  The first is to put a size and quantity filter on them; e.g.
- *       Pixa *pixa2 = recogFilterPixaBySize(pixa1, 10, 15, 2.6);
- *  Then you can remove outliers; e.g.,
- *       Pixa *pixa3 = pixaRemoveOutliers2(pixa2, -1.0, -1, NULL, NULL);
- *
- *  To this point, all templates are from a single source, so you
- *  can make a recognizer that uses the unscaled templates and optionally
- *  attempts to split touching characters:
- *       L_Recog *recog1 = recogCreateFromPixa(pixa3, ...);
- *  Alternatively, if you need more templates for some of the classes,
- *  you can pad with templates from a "bootstrap" recognizer (BSR).
- *  If you pad, it is necessary to scale the templates and input
- *  samples to a fixed height, and no attempt will be made to split
- *  the input sample connected components:
- *       L_Recog *recog1 = recogCreateFromPixa(pixa3, 0, 40, 0, 128, 0);
- *       recogPadDigitTrainingSet(&recog1, 40, 0);
- *
- *  A special case is a pure BSR, that contains images scaled to a fixed
- *  height (we use 40 in these examples).
- *  For this,use either the scanned bitmap:
- *      L_Recog  *recboot = recogCreateFromPixa(pixa, 0, 40, 0, 128, 1);
- *  or width-normalized lines (use width of 5 here):
- *      L_Recog  *recboot = recogCreateFromPixa(pixa, 0, 40, 5, 128, 1);
- *
- *  This can be used to train a new book adapted recognizer (BAC), on
- *  unlabeled data from, e.g., a book.  To do this, the following is required:
- *   (1) the input images from the book must be scaled in the same
- *       way as those in the BSR, and
- *   (2) both the BSR and the input images must be set up to be either
- *       input scanned images or width-normalized lines.
- *
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static const l_int32 MaxExamplesInClass = 256; - - /* Default recog parameters that can be changed */ -static const l_int32 DefaultCharsetType = L_ARABIC_NUMERALS; -static const l_int32 DefaultMinNopad = 1; -static const l_float32 DefaultMaxWHRatio = 3.0; /* max allowed w/h - ratio for a component to be split */ -static const l_float32 DefaultMaxHTRatio = 2.6; /* max allowed ratio of - max/min unscaled averaged template heights */ -static const l_int32 DefaultThreshold = 150; /* for binarization */ -static const l_int32 DefaultMaxYShift = 1; /* for identification */ - - /* Static functions */ -static l_int32 recogGetCharsetSize(l_int32 type); -static l_int32 recogAddCharstrLabels(L_RECOG *recog); -static l_int32 recogAddAllSamples(L_RECOG **precog, PIXAA *paa, l_int32 debug); - - -/*------------------------------------------------------------------------* - * Recog: initialization and destruction * - *------------------------------------------------------------------------*/ -/*! - * \brief recogCreateFromRecog() - * - * \param[in] recs source recog with arbitrary input parameters - * \param[in] scalew scale all widths to this; use 0 otherwise - * \param[in] scaleh scale all heights to this; use 0 otherwise - * \param[in] linew width of normalized strokes; use 0 to skip - * \param[in] threshold for binarization; typically ~128 - * \param[in] maxyshift from nominal centroid alignment; default is 1 - * \return recd, or NULL on error - * - *
- * Notes:
- *      (1) This is a convenience function that generates a recog using
- *          the unscaled training data in an existing recog.
- *      (2) It is recommended to use %maxyshift = 1 (the default value)
- *      (3) See recogCreate() for use of %scalew, %scaleh and %linew.
- * 
- */ -L_RECOG * -recogCreateFromRecog(L_RECOG *recs, - l_int32 scalew, - l_int32 scaleh, - l_int32 linew, - l_int32 threshold, - l_int32 maxyshift) -{ -L_RECOG *recd; -PIXA *pixa; - - PROCNAME("recogCreateFromRecog"); - - if (!recs) - return (L_RECOG *)ERROR_PTR("recs not defined", procName, NULL); - - pixa = recogExtractPixa(recs); - recd = recogCreateFromPixa(pixa, scalew, scaleh, linew, threshold, - maxyshift); - pixaDestroy(&pixa); - return recd; -} - - -/*! - * \brief recogCreateFromPixa() - * - * \param[in] pixa of labeled, 1 bpp images - * \param[in] scalew scale all widths to this; use 0 otherwise - * \param[in] scaleh scale all heights to this; use 0 otherwise - * \param[in] linew width of normalized strokes; use 0 to skip - * \param[in] threshold for binarization; typically ~150 - * \param[in] maxyshift from nominal centroid alignment; default is 1 - * \return recog, or NULL on error - * - *
- * Notes:
- *      (1) This is a convenience function for training from labeled data.
- *          The pixa can be read from file.
- *      (2) The pixa should contain the unscaled bitmaps used for training.
- *      (3) See recogCreate() for use of %scalew, %scaleh and %linew.
- *      (4) It is recommended to use %maxyshift = 1 (the default value)
- *      (5) All examples in the same class (i.e., with the same character
- *          label) should be similar.  They can be made similar by invoking
- *          recogRemoveOutliers[1,2]() on %pixa before calling this function.
- * 
- */ -L_RECOG * -recogCreateFromPixa(PIXA *pixa, - l_int32 scalew, - l_int32 scaleh, - l_int32 linew, - l_int32 threshold, - l_int32 maxyshift) -{ -L_RECOG *recog; - - PROCNAME("recogCreateFromPixa"); - - if (!pixa) - return (L_RECOG *)ERROR_PTR("pixa not defined", procName, NULL); - - recog = recogCreateFromPixaNoFinish(pixa, scalew, scaleh, linew, - threshold, maxyshift); - if (!recog) - return (L_RECOG *)ERROR_PTR("recog not made", procName, NULL); - - recogTrainingFinished(&recog, 1, -1, -1.0); - if (!recog) - return (L_RECOG *)ERROR_PTR("bad templates", procName, NULL); - return recog; -} - - -/*! - * \brief recogCreateFromPixaNoFinish() - * - * \param[in] pixa of labeled, 1 bpp images - * \param[in] scalew scale all widths to this; use 0 otherwise - * \param[in] scaleh scale all heights to this; use 0 otherwise - * \param[in] linew width of normalized strokes; use 0 to skip - * \param[in] threshold for binarization; typically ~150 - * \param[in] maxyshift from nominal centroid alignment; default is 1 - * \return recog, or NULL on error - * - *
- * Notes:
- *      (1) See recogCreateFromPixa() for details.
- *      (2) This is also used to generate a pixaa with templates
- *          in each class within a pixa.  For that, all args except for
- *          %pixa are ignored.
- * 
- */ -L_RECOG * -recogCreateFromPixaNoFinish(PIXA *pixa, - l_int32 scalew, - l_int32 scaleh, - l_int32 linew, - l_int32 threshold, - l_int32 maxyshift) -{ -char *text; -l_int32 full, n, i, ntext, same, maxd; -PIX *pix; -L_RECOG *recog; - - PROCNAME("recogCreateFromPixaNoFinish"); - - if (!pixa) - return (L_RECOG *)ERROR_PTR("pixa not defined", procName, NULL); - pixaVerifyDepth(pixa, &same, &maxd); - if (maxd > 1) - return (L_RECOG *)ERROR_PTR("not all pix are 1 bpp", procName, NULL); - - pixaIsFull(pixa, &full, NULL); - if (!full) - return (L_RECOG *)ERROR_PTR("not all pix are present", procName, NULL); - - n = pixaGetCount(pixa); - pixaCountText(pixa, &ntext); - if (ntext == 0) - return (L_RECOG *)ERROR_PTR("no pix have text strings", procName, NULL); - if (ntext < n) - L_ERROR("%d text strings < %d pix\n", procName, ntext, n); - - recog = recogCreate(scalew, scaleh, linew, threshold, maxyshift); - if (!recog) - return (L_RECOG *)ERROR_PTR("recog not made", procName, NULL); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - text = pixGetText(pix); - if (!text || strlen(text) == 0) { - L_ERROR("pix[%d] has no text\n", procName, i); - pixDestroy(&pix); - continue; - } - recogTrainLabeled(recog, pix, NULL, text, 0); - pixDestroy(&pix); - } - - return recog; -} - - -/*! - * \brief recogCreate() - * - * \param[in] scalew scale all widths to this; use 0 otherwise - * \param[in] scaleh scale all heights to this; use 0 otherwise - * \param[in] linew width of normalized strokes; use 0 to skip - * \param[in] threshold for binarization; typically ~128; 0 for default - * \param[in] maxyshift from nominal centroid alignment; default is 1 - * \return recog, or NULL on error - * - *
- * Notes:
- *      (1) If %scalew == 0 and %scaleh == 0, no scaling is done.
- *          If one of these is 0 and the other is > 0, scaling is isotropic
- *          to the requested size.  We typically do not set both > 0.
- *      (2) Use linew > 0 to convert the templates to images with fixed
- *          width strokes.  linew == 0 skips the conversion.
- *      (3) The only valid values for %maxyshift are 0, 1 and 2.
- *          It is recommended to use %maxyshift == 1 (default value).
- *          Using %maxyshift == 0 is much faster than %maxyshift == 1, but
- *          it is much less likely to find the template with the best
- *          correlation.  Use of anything but 1 results in a warning.
- *      (4) Scaling is used for finding outliers and for training a
- *          book-adapted recognizer (BAR) from a bootstrap recognizer (BSR).
- *          Scaling the height to a fixed value and scaling the width
- *          accordingly (e.g., %scaleh = 40, %scalew = 0) is recommended.
- *      (5) The storage for most of the arrays is allocated when training
- *          is finished.
- * 
- */ -L_RECOG * -recogCreate(l_int32 scalew, - l_int32 scaleh, - l_int32 linew, - l_int32 threshold, - l_int32 maxyshift) -{ -L_RECOG *recog; - - PROCNAME("recogCreate"); - - if (scalew < 0 || scaleh < 0) - return (L_RECOG *)ERROR_PTR("invalid scalew or scaleh", procName, NULL); - if (linew > 10) - return (L_RECOG *)ERROR_PTR("invalid linew > 10", procName, NULL); - if (threshold == 0) threshold = DefaultThreshold; - if (threshold < 0 || threshold > 255) { - L_WARNING("invalid threshold; using default\n", procName); - threshold = DefaultThreshold; - } - if (maxyshift < 0 || maxyshift > 2) { - L_WARNING("invalid maxyshift; using default value\n", procName); - maxyshift = DefaultMaxYShift; - } else if (maxyshift == 0) { - L_WARNING("Using maxyshift = 0; faster, worse correlation results\n", - procName); - } else if (maxyshift == 2) { - L_WARNING("Using maxyshift = 2; slower\n", procName); - } - - recog = (L_RECOG *)LEPT_CALLOC(1, sizeof(L_RECOG)); - recog->templ_use = L_USE_ALL_TEMPLATES; /* default */ - recog->threshold = threshold; - recog->scalew = scalew; - recog->scaleh = scaleh; - recog->linew = linew; - recog->maxyshift = maxyshift; - recogSetParams(recog, 1, -1, -1.0, -1.0); - recog->bmf = bmfCreate(NULL, 6); - recog->bmf_size = 6; - recog->maxarraysize = MaxExamplesInClass; - - /* Generate the LUTs */ - recog->centtab = makePixelCentroidTab8(); - recog->sumtab = makePixelSumTab8(); - recog->sa_text = sarrayCreate(0); - recog->dna_tochar = l_dnaCreate(0); - - /* Input default values for min component size for splitting. - * These are overwritten when pixTrainingFinished() is called. */ - recog->min_splitw = 6; - recog->max_splith = 60; - - /* Allocate the paa for the unscaled training bitmaps */ - recog->pixaa_u = pixaaCreate(recog->maxarraysize); - - /* Generate the storage for debugging */ - recog->pixadb_boot = pixaCreate(2); - recog->pixadb_split = pixaCreate(2); - return recog; -} - - -/*! - * \brief recogDestroy() - * - * \param[in,out] precog will be set to null before returning - * \return void - */ -void -recogDestroy(L_RECOG **precog) -{ -L_RECOG *recog; - - PROCNAME("recogDestroy"); - - if (!precog) { - L_WARNING("ptr address is null\n", procName); - return; - } - - if ((recog = *precog) == NULL) return; - - LEPT_FREE(recog->centtab); - LEPT_FREE(recog->sumtab); - sarrayDestroy(&recog->sa_text); - l_dnaDestroy(&recog->dna_tochar); - pixaaDestroy(&recog->pixaa_u); - pixaDestroy(&recog->pixa_u); - ptaaDestroy(&recog->ptaa_u); - ptaDestroy(&recog->pta_u); - numaDestroy(&recog->nasum_u); - numaaDestroy(&recog->naasum_u); - pixaaDestroy(&recog->pixaa); - pixaDestroy(&recog->pixa); - ptaaDestroy(&recog->ptaa); - ptaDestroy(&recog->pta); - numaDestroy(&recog->nasum); - numaaDestroy(&recog->naasum); - pixaDestroy(&recog->pixa_tr); - pixaDestroy(&recog->pixadb_ave); - pixaDestroy(&recog->pixa_id); - pixDestroy(&recog->pixdb_ave); - pixDestroy(&recog->pixdb_range); - pixaDestroy(&recog->pixadb_boot); - pixaDestroy(&recog->pixadb_split); - bmfDestroy(&recog->bmf); - rchDestroy(&recog->rch); - rchaDestroy(&recog->rcha); - recogDestroyDid(recog); - LEPT_FREE(recog); - *precog = NULL; - return; -} - - -/*------------------------------------------------------------------------* - * Recog accessors * - *------------------------------------------------------------------------*/ -/*! - * \brief recogGetCount() - * - * \param[in] recog - * \return count of classes in recog; 0 if no recog or on error - */ -l_int32 -recogGetCount(L_RECOG *recog) -{ - PROCNAME("recogGetCount"); - - if (!recog) - return ERROR_INT("recog not defined", procName, 0); - return recog->setsize; -} - - -/*! - * \brief recogSetParams() - * - * \param[in] recog to be padded, if necessary - * \param[in] type type of char set; -1 for default; - * see enum in recog.h - * \param[in] min_nopad min number in a class without padding; - * use -1 for default - * \param[in] max_wh_ratio max width/height ratio allowed for splitting; - * use -1.0 for default - * \param[in] max_ht_ratio max of max/min averaged template height ratio; - * use -1.0 for default - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is called when a recog is created.
- *      (2) Default %min_nopad value allows for some padding.
- *          To disable padding, set %min_nopad = 0.  To pad only when
- *          no samples are available for the class, set %min_nopad = 1.
- *      (3) The %max_wh_ratio limits the width/height ratio for components
- *          that we attempt to split.  Splitting long components is expensive.
- *      (4) The %max_ht_ratio is a quality requirement on the training data.
- *          The recognizer will not run if the averages are computed and
- *          the templates do not satisfy it.
- * 
- */ -l_ok -recogSetParams(L_RECOG *recog, - l_int32 type, - l_int32 min_nopad, - l_float32 max_wh_ratio, - l_float32 max_ht_ratio) -{ - PROCNAME("recogSetParams"); - - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - - recog->charset_type = (type >= 0) ? type : DefaultCharsetType; - recog->charset_size = recogGetCharsetSize(recog->charset_type); - recog->min_nopad = (min_nopad >= 0) ? min_nopad : DefaultMinNopad; - recog->max_wh_ratio = (max_wh_ratio > 0.0) ? max_wh_ratio : - DefaultMaxWHRatio; - recog->max_ht_ratio = (max_ht_ratio > 1.0) ? max_ht_ratio : - DefaultMaxHTRatio; - return 0; -} - - -/*! - * \brief recogGetCharsetSize() - * - * \param[in] type of charset - * \return size of charset, or 0 if unknown or on error - */ -static l_int32 -recogGetCharsetSize(l_int32 type) -{ - PROCNAME("recogGetCharsetSize"); - - switch (type) { - case L_UNKNOWN: - return 0; - case L_ARABIC_NUMERALS: - return 10; - case L_LC_ROMAN_NUMERALS: - return 7; - case L_UC_ROMAN_NUMERALS: - return 7; - case L_LC_ALPHA: - return 26; - case L_UC_ALPHA: - return 26; - default: - L_ERROR("invalid charset_type %d\n", procName, type); - return 0; - } - return 0; /* shouldn't happen */ -} - - -/*------------------------------------------------------------------------* - * Character/index lookup * - *------------------------------------------------------------------------*/ -/*! - * \brief recogGetClassIndex() - * - * \param[in] recog with LUT's pre-computed - * \param[in] val integer value; can be up to 3 bytes for UTF-8 - * \param[in] text text from which %val was derived; used if not found - * \param[out] pindex index into dna_tochar - * \return 0 if found; 1 if not found and added; 2 on error. - * - *
- * Notes:
- *      (1) This is used during training.  There is one entry in
- *          recog->dna_tochar (integer value, e.g., ascii) and
- *          one in recog->sa_text (e.g, ascii letter in a string)
- *          for each character class.
- *      (2) This searches the dna character array for %val.  If it is
- *          not found, the template represents a character class not
- *          already seen: it increments setsize (the number of character
- *          classes) by 1, and augments both the index (dna_tochar)
- *          and text (sa_text) arrays.
- *      (3) Returns the index in &index, except on error.
- *      (4) Caller must check the function return value.
- * 
- */ -l_int32 -recogGetClassIndex(L_RECOG *recog, - l_int32 val, - char *text, - l_int32 *pindex) -{ -l_int32 i, n, ival; - - PROCNAME("recogGetClassIndex"); - - if (!pindex) - return ERROR_INT("&index not defined", procName, 2); - *pindex = -1; - if (!recog) - return ERROR_INT("recog not defined", procName, 2); - if (!text) - return ERROR_INT("text not defined", procName, 2); - - /* Search existing characters */ - n = l_dnaGetCount(recog->dna_tochar); - for (i = 0; i < n; i++) { - l_dnaGetIValue(recog->dna_tochar, i, &ival); - if (val == ival) { /* found */ - *pindex = i; - return 0; - } - } - - /* If not found... */ - l_dnaAddNumber(recog->dna_tochar, val); - sarrayAddString(recog->sa_text, text, L_COPY); - recog->setsize++; - *pindex = n; - return 1; -} - - -/*! - * \brief recogStringToIndex() - * - * \param[in] recog - * \param[in] text text string for some class - * \param[out] pindex index for that class; -1 if not found - * \return 0 if OK, 1 on error not finding the string is an error - */ -l_ok -recogStringToIndex(L_RECOG *recog, - char *text, - l_int32 *pindex) -{ -char *charstr; -l_int32 i, n, diff; - - PROCNAME("recogStringtoIndex"); - - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - *pindex = -1; - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if (!text) - return ERROR_INT("text not defined", procName, 1); - - /* Search existing characters */ - n = recog->setsize; - for (i = 0; i < n; i++) { - recogGetClassString(recog, i, &charstr); - if (!charstr) { - L_ERROR("string not found for index %d\n", procName, i); - continue; - } - diff = strcmp(text, charstr); - LEPT_FREE(charstr); - if (diff) continue; - *pindex = i; - return 0; - } - - return 1; /* not found */ -} - - -/*! - * \brief recogGetClassString() - * - * \param[in] recog - * \param[in] index into array of char types - * \param[out] pcharstr string representation; - * returns an empty string on error - * \return 0 if found, 1 on error - * - *
- * Notes:
- *      (1) Extracts a copy of the string from sa_text, which
- *          the caller must free.
- *      (2) Caller must check the function return value.
- * 
- */ -l_int32 -recogGetClassString(L_RECOG *recog, - l_int32 index, - char **pcharstr) -{ - PROCNAME("recogGetClassString"); - - if (!pcharstr) - return ERROR_INT("&charstr not defined", procName, 1); - *pcharstr = stringNew(""); - if (!recog) - return ERROR_INT("recog not defined", procName, 2); - - if (index < 0 || index >= recog->setsize) - return ERROR_INT("invalid index", procName, 1); - LEPT_FREE(*pcharstr); - *pcharstr = sarrayGetString(recog->sa_text, index, L_COPY); - return 0; -} - - -/*! - * \brief l_convertCharstrToInt() - * - * \param[in] str input string representing one UTF-8 character; - * not more than 4 bytes - * \param[out] pval integer value for the input. Think of it - * as a 1-to-1 hash code. - * \return 0 if OK, 1 on error - */ -l_ok -l_convertCharstrToInt(const char *str, - l_int32 *pval) -{ -l_int32 size, val; - - PROCNAME("l_convertCharstrToInt"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0; - if (!str) - return ERROR_INT("str not defined", procName, 1); - size = strlen(str); - if (size == 0) - return ERROR_INT("empty string", procName, 1); - if (size > 4) - return ERROR_INT("invalid string: > 4 bytes", procName, 1); - - val = (l_int32)str[0]; - if (size > 1) - val = (val << 8) + (l_int32)str[1]; - if (size > 2) - val = (val << 8) + (l_int32)str[2]; - if (size > 3) - val = (val << 8) + (l_int32)str[3]; - *pval = val; - return 0; -} - - -/*------------------------------------------------------------------------* - * Serialization * - *------------------------------------------------------------------------*/ -/*! - * \brief recogRead() - * - * \param[in] filename - * \return recog, or NULL on error - * - *
- * Notes:
- *      (1) When a recog is serialized, a pixaa of the templates that are
- *          actually used for correlation is saved in the pixaa_u array
- *          of the recog.  These can be different from the templates that
- *          were used to generate the recog, because those original templates
- *          can be scaled and turned into normalized lines.  When recog1
- *          is deserialized to recog2, these templates are put in both the
- *          unscaled array (pixaa_u) and the modified array (pixaa) in recog2.
- *          Why not put it in only the unscaled array and let
- *          recogTrainingFinalized() regenerate the modified templates?
- *          The reason is that with normalized lines, the operation of
- *          thinning to a skeleton and dilating back to a fixed width
- *          is not idempotent.  Thinning to a skeleton saves pixels at
- *          the end of a line segment, and thickening the skeleton puts
- *          additional pixels at the end of the lines.  This tends to
- *          close gaps.
- * 
- */ -L_RECOG * -recogRead(const char *filename) -{ -FILE *fp; -L_RECOG *recog; - - PROCNAME("recogRead"); - - if (!filename) - return (L_RECOG *)ERROR_PTR("filename not defined", procName, NULL); - if ((fp = fopenReadStream(filename)) == NULL) - return (L_RECOG *)ERROR_PTR("stream not opened", procName, NULL); - - if ((recog = recogReadStream(fp)) == NULL) { - fclose(fp); - return (L_RECOG *)ERROR_PTR("recog not read", procName, NULL); - } - - fclose(fp); - return recog; -} - - -/*! - * \brief recogReadStream() - * - * \param[in] fp file stream - * \return recog, or NULL on error - */ -L_RECOG * -recogReadStream(FILE *fp) -{ -l_int32 version, setsize, threshold, scalew, scaleh, linew; -l_int32 maxyshift, nc; -L_DNA *dna_tochar; -PIXAA *paa; -L_RECOG *recog; -SARRAY *sa_text; - - PROCNAME("recogReadStream"); - - if (!fp) - return (L_RECOG *)ERROR_PTR("stream not defined", procName, NULL); - - if (fscanf(fp, "\nRecog Version %d\n", &version) != 1) - return (L_RECOG *)ERROR_PTR("not a recog file", procName, NULL); - if (version != RECOG_VERSION_NUMBER) - return (L_RECOG *)ERROR_PTR("invalid recog version", procName, NULL); - if (fscanf(fp, "Size of character set = %d\n", &setsize) != 1) - return (L_RECOG *)ERROR_PTR("setsize not read", procName, NULL); - if (fscanf(fp, "Binarization threshold = %d\n", &threshold) != 1) - return (L_RECOG *)ERROR_PTR("binary thresh not read", procName, NULL); - if (fscanf(fp, "Maxyshift = %d\n", &maxyshift) != 1) - return (L_RECOG *)ERROR_PTR("maxyshift not read", procName, NULL); - if (fscanf(fp, "Scale to width = %d\n", &scalew) != 1) - return (L_RECOG *)ERROR_PTR("width not read", procName, NULL); - if (fscanf(fp, "Scale to height = %d\n", &scaleh) != 1) - return (L_RECOG *)ERROR_PTR("height not read", procName, NULL); - if (fscanf(fp, "Normalized line width = %d\n", &linew) != 1) - return (L_RECOG *)ERROR_PTR("line width not read", procName, NULL); - if ((recog = recogCreate(scalew, scaleh, linew, threshold, - maxyshift)) == NULL) - return (L_RECOG *)ERROR_PTR("recog not made", procName, NULL); - - if (fscanf(fp, "\nLabels for character set:\n") != 0) { - recogDestroy(&recog); - return (L_RECOG *)ERROR_PTR("label intro not read", procName, NULL); - } - l_dnaDestroy(&recog->dna_tochar); - if ((dna_tochar = l_dnaReadStream(fp)) == NULL) { - recogDestroy(&recog); - return (L_RECOG *)ERROR_PTR("dna_tochar not read", procName, NULL); - } - recog->dna_tochar = dna_tochar; - sarrayDestroy(&recog->sa_text); - if ((sa_text = sarrayReadStream(fp)) == NULL) { - recogDestroy(&recog); - return (L_RECOG *)ERROR_PTR("sa_text not read", procName, NULL); - } - recog->sa_text = sa_text; - - if (fscanf(fp, "\nPixaa of all samples in the training set:\n") != 0) { - recogDestroy(&recog); - return (L_RECOG *)ERROR_PTR("pixaa intro not read", procName, NULL); - } - if ((paa = pixaaReadStream(fp)) == NULL) { - recogDestroy(&recog); - return (L_RECOG *)ERROR_PTR("pixaa not read", procName, NULL); - } - recog->setsize = setsize; - nc = pixaaGetCount(paa, NULL); - if (nc != setsize) { - recogDestroy(&recog); - pixaaDestroy(&paa); - L_ERROR("(setsize = %d) != (paa count = %d)\n", procName, - setsize, nc); - return NULL; - } - - recogAddAllSamples(&recog, paa, 0); /* this finishes */ - pixaaDestroy(&paa); - if (!recog) - return (L_RECOG *)ERROR_PTR("bad templates", procName, NULL); - return recog; -} - - -/*! - * \brief recogReadMem() - * - * \param[in] data serialization of recog (not ascii) - * \param[in] size of data in bytes - * \return recog, or NULL on error - */ -L_RECOG * -recogReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -L_RECOG *recog; - - PROCNAME("recogReadMem"); - - if (!data) - return (L_RECOG *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (L_RECOG *)ERROR_PTR("stream not opened", procName, NULL); - - recog = recogReadStream(fp); - fclose(fp); - if (!recog) L_ERROR("recog not read\n", procName); - return recog; -} - - -/*! - * \brief recogWrite() - * - * \param[in] filename - * \param[in] recog - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The pixaa of templates that is written is the modified one
- *          in the pixaa field. It is the pixaa that is actually used
- *          for correlation. This is not the unscaled array of labeled
- *          bitmaps, in pixaa_u, that was used to generate the recog in the
- *          first place.  See the notes in recogRead() for the rationale.
- * 
- */ -l_ok -recogWrite(const char *filename, - L_RECOG *recog) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("recogWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "wb")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = recogWriteStream(fp, recog); - fclose(fp); - if (ret) - return ERROR_INT("recog not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief recogWriteStream() - * - * \param[in] fp file stream opened for "wb" - * \param[in] recog - * \return 0 if OK, 1 on error - */ -l_ok -recogWriteStream(FILE *fp, - L_RECOG *recog) -{ - PROCNAME("recogWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - - fprintf(fp, "\nRecog Version %d\n", RECOG_VERSION_NUMBER); - fprintf(fp, "Size of character set = %d\n", recog->setsize); - fprintf(fp, "Binarization threshold = %d\n", recog->threshold); - fprintf(fp, "Maxyshift = %d\n", recog->maxyshift); - fprintf(fp, "Scale to width = %d\n", recog->scalew); - fprintf(fp, "Scale to height = %d\n", recog->scaleh); - fprintf(fp, "Normalized line width = %d\n", recog->linew); - fprintf(fp, "\nLabels for character set:\n"); - l_dnaWriteStream(fp, recog->dna_tochar); - sarrayWriteStream(fp, recog->sa_text); - fprintf(fp, "\nPixaa of all samples in the training set:\n"); - pixaaWriteStream(fp, recog->pixaa); - - return 0; -} - - -/*! - * \brief recogWriteMem() - * - * \param[out] pdata data of serialized recog (not ascii) - * \param[out] psize size of returned data - * \param[in] recog - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes a recog in memory and puts the result in a buffer.
- * 
- */ -l_ok -recogWriteMem(l_uint8 **pdata, - size_t *psize, - L_RECOG *recog) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("recogWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = recogWriteStream(fp, recog); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = recogWriteStream(fp, recog); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - -/*! - * \brief recogExtractPixa() - * - * \param[in] recog - * \return pixa if OK, NULL on error - * - *
- * Notes:
- *      (1) This generates a pixa of all the unscaled images in the
- *          recognizer, where each one has its character class label in
- *          the pix text field, by flattening pixaa_u to a pixa.
- * 
- */ -PIXA * -recogExtractPixa(L_RECOG *recog) -{ - PROCNAME("recogExtractPixa"); - - if (!recog) - return (PIXA *)ERROR_PTR("recog not defined", procName, NULL); - - recogAddCharstrLabels(recog); - return pixaaFlattenToPixa(recog->pixaa_u, NULL, L_CLONE); -} - - -/*! - * \brief recogAddCharstrLabels() - * - * \param[in] recog - * \return 0 if OK, 1 on error - */ -static l_int32 -recogAddCharstrLabels(L_RECOG *recog) -{ -char *text; -l_int32 i, j, n1, n2; -PIX *pix; -PIXA *pixa; -PIXAA *paa; - - PROCNAME("recogAddCharstrLabels"); - - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - - /* Add the labels to each unscaled pix */ - paa = recog->pixaa_u; - n1 = pixaaGetCount(paa, NULL); - for (i = 0; i < n1; i++) { - pixa = pixaaGetPixa(paa, i, L_CLONE); - text = sarrayGetString(recog->sa_text, i, L_NOCOPY); - n2 = pixaGetCount(pixa); - for (j = 0; j < n2; j++) { - pix = pixaGetPix(pixa, j, L_CLONE); - pixSetText(pix, text); - pixDestroy(&pix); - } - pixaDestroy(&pixa); - } - - return 0; -} - - -/*! - * \brief recogAddAllSamples() - * - * \param[in] precog addr of recog - * \param[in] paa pixaa from previously trained recog - * \param[in] debug - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) On error, the input recog is destroyed.
- *      (2) This is used with the serialization routine recogRead(),
- *          where each pixa in the pixaa represents a set of characters
- *          in a different class.  Before calling this function, we have
- *          verified that the number of character classes, given by the
- *          setsize field in %recog, equals the number of pixa in the paa.
- *          The character labels for each set are in the sa_text field.
- * 
- */ -static l_int32 -recogAddAllSamples(L_RECOG **precog, - PIXAA *paa, - l_int32 debug) -{ -char *text; -l_int32 i, j, nc, ns; -PIX *pix; -PIXA *pixa, *pixa1; -L_RECOG *recog; - - PROCNAME("recogAddAllSamples"); - - if (!precog) - return ERROR_INT("&recog not defined", procName, 1); - if ((recog = *precog) == NULL) - return ERROR_INT("recog not defined", procName, 1); - if (!paa) { - recogDestroy(&recog); - return ERROR_INT("paa not defined", procName, 1); - } - - nc = pixaaGetCount(paa, NULL); - for (i = 0; i < nc; i++) { - pixa = pixaaGetPixa(paa, i, L_CLONE); - ns = pixaGetCount(pixa); - text = sarrayGetString(recog->sa_text, i, L_NOCOPY); - pixa1 = pixaCreate(ns); - pixaaAddPixa(recog->pixaa_u, pixa1, L_INSERT); - for (j = 0; j < ns; j++) { - pix = pixaGetPix(pixa, j, L_CLONE); - if (debug) lept_stderr("pix[%d,%d]: text = %s\n", i, j, text); - pixaaAddPix(recog->pixaa_u, i, pix, NULL, L_INSERT); - } - pixaDestroy(&pixa); - } - - recogTrainingFinished(&recog, 0, -1, -1.0); /* For second parameter, - see comment in recogRead() */ - if (!recog) - return ERROR_INT("bad templates; recog destroyed", procName, 1); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/recogdid.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/recogdid.c deleted file mode 100644 index 5652c4ac..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/recogdid.c +++ /dev/null @@ -1,1078 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file recogdid.c - *
- *
- *      Top-level identification
- *         BOXA             *recogDecode()
- *
- *      Generate decoding arrays
- *         static l_int32    recogPrepareForDecoding()
- *         static l_int32    recogMakeDecodingArray()
- *
- *      Dynamic programming for best path
- *         static l_int32    recogRunViterbi()
- *         static l_int32    recogRescoreDidResult()
- *         static PIX       *recogShowPath()
- *
- *      Create/destroy temporary DID data
- *         l_int32           recogCreateDid()
- *         l_int32           recogDestroyDid()
- *
- *      Various helpers
- *         l_int32           recogDidExists()
- *         L_RDID           *recogGetDid()
- *         static l_int32    recogGetWindowedArea()
- *         l_int32           recogSetChannelParams()
- *         static l_int32    recogTransferRchToDid()
- *
- *  See recogbasic.c for examples of training a recognizer, which is
- *  required before it can be used for document image decoding.
- *
- *  Gary Kopec pioneered this hidden markov approach to "Document Image
- *  Decoding" (DID) in the early 1990s.  It is based on estimation
- *  using a generative model of the image generation process, and
- *  provides the most likely decoding of an image if the model is correct.
- *  Given the model, it finds the maximum a posteriori (MAP) "message"
- *  given the observed image.  The model describes how to generate
- *  an image from a message, and the MAP message is derived from the
- *  observed image using Bayes' theorem.  This approach can also be used
- *  to build the model, using the iterative expectation/maximization
- *  method from labeled but errorful data.
- *
- *  In a little more detail: The model comprises three things: the ideal
- *  printed character templates, the independent bit-flip noise model, and
- *  the character setwidths.  When a character is printed, the setwidth
- *  is the distance in pixels that you move forward before being able
- *  to print the next character.  It is typically slightly less than the
- *  width of the character template: if too small, an extra character can be
- *  hallucinated; if too large, it will not be able to match the next
- *  character template on the line.  The model assumes that the probabilities
- *  of bit flip depend only on the assignment of the pixel to background
- *  or template foreground.  The multilevel templates have different
- *  bit flip probabilities for each level.  Because a character image
- *  is composed of many pixels, each of which can be independently flipped,
- *  the actual probability of seeing any rendering is exceedingly small,
- *  being composed of the product of the probabilities for each pixel.
- *  The log likelihood is used both to avoid numeric underflow and,
- *  more importantly, because it results in a summation of independent
- *  pixel probabilities.  That summation can be shown, in Kopec's
- *  original paper, to consist of a sum of two terms: (a) the number of
- *  fg pixels in the bit-and of the observed image with the ideal
- *  template and (b) the number of fg pixels in the template.  Each
- *  has a coefficient that depends only on the bit-flip probabilities
- *  for the fg and bg.  A beautiful result, and computationally simple!
- *  One nice feature of this approach is that the result of the decoding
- *  is not very sensitive to the values  used for the bit flip probabilities.
- *
- *  The procedure for finding the best decoding (MAP) for a given image goes
- *  under several names: Viterbi, dynamic programming, hidden markov model.
- *  It is called a "hidden markov model" because the templates are assumed
- *  to be printed serially and we don't know what they are -- the identity
- *  of the templates must be inferred from the observed image.
- *  The possible decodings form a dense trellis over the pixel positions,
- *  where at each pixel position you have the possibility of having any
- *  of the characters printed there (with some reference point) or having
- *  a single pixel wide space inserted there.  Thus, before the trellis
- *  can be traversed, we must do the work of finding the log probability,
- *  at each pixel location, that each of the templates was printed there.
- *  Armed with those arrays of data, the dynamic programming procedure
- *  moves from left to right, one pixel at a time, recursively finding
- *  the path with the highest log probability that gets to that pixel
- *  position (and noting which template was printed to arrive there).
- *  After reaching the right side of the image, we can simply backtrack
- *  along the path, jumping over each template that lies on the highest
- *  scoring path.  This best path thus only goes through a few of the
- *  pixel positions.
- *
- *  There are two refinements to the original Kopec paper.  In the first,
- *  one uses multiple, non-overlapping fg templates, each with its own
- *  bit flip probability.  This makes sense, because the probability
- *  that a fg boundary pixel flips to bg is greater than that of a fg
- *  pixel not on the boundary.  And the flip probability of a fg boundary
- *  pixel is smaller than that of a bg boundary pixel, which in turn
- *  is greater than that of a bg pixel not on a boundary (the latter
- *  is taken to be the true background).  Then the simplest realistic
- *  multiple template model has three templates that are not background.
- *
- *  In the second refinement, a heuristic (strict upper bound) is used
- *  iteratively in the Viterbi process to compute the log probabilities.
- *  Using the heuristic, you find the best path, and then score all nodes
- *  on that path with the actual probability, which is guaranteed to
- *  be a smaller number.  You run this iteratively, rescoring just the best
- *  found path each time.  After each rescoring, the path may change because
- *  the local scores have been reduced.  However, the process converges
- *  rapidly, and when it doesn't change, it must be the best path because
- *  it is properly scored (even if neighboring paths are heuristically
- *  scored).  The heuristic score is found column-wise by assuming
- *  that all the fg pixels in the template are on fg pixels in the image --
- *  we just take the minimum of the number of pixels in the template
- *  and image column.  This can easily give a 10-fold reduction in
- *  computation because the heuristic score can be computed much faster
- *  than the exact score.
- *
- *  For reference, the classic paper on the approach by Kopec is:
- *  * "Document Image Decoding Using Markov Source Models", IEEE Trans.
- *    PAMI, Vol 16, No. 6, June 1994, pp 602-617.
- *  A refinement of the method for multilevel templates by Kopec is:
- *  * "Multilevel Character Templates for Document Image Decoding",
- *    Proc. SPIE 3027, Document Recognition IV, p. 168ff, 1997.
- *  Further refinements for more efficient decoding are given in these
- *  two papers, which are both stored on leptonica.org:
- *  * "Document Image Decoding using Iterated Complete Path Search", Minka,
- *    Bloomberg and Popat, Proc. SPIE Vol 4307, p. 250-258, Document
- *    Recognition and Retrieval VIII, San Jose, CA 2001.
- *  * "Document Image Decoding using Iterated Complete Path Search with
- *    Subsampled Heuristic Scoring", Bloomberg, Minka and Popat, ICDAR 2001,
- *    p. 344-349, Sept. 2001, Seattle.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -static l_int32 recogPrepareForDecoding(L_RECOG *recog, PIX *pixs, - l_int32 debug); -static l_int32 recogMakeDecodingArray(L_RECOG *recog, l_int32 index, - l_int32 debug); -static l_int32 recogRunViterbi(L_RECOG *recog, PIX **ppixdb); -static l_int32 recogRescoreDidResult(L_RECOG *recog, PIX **ppixdb); -static PIX *recogShowPath(L_RECOG *recog, l_int32 select); -static l_int32 recogGetWindowedArea(L_RECOG *recog, l_int32 index, - l_int32 x, l_int32 *pdely, l_int32 *pwsum); -static l_int32 recogTransferRchToDid(L_RECOG *recog, l_int32 x, l_int32 y); - - /* Parameters for modeling the decoding */ -static const l_float32 SetwidthFraction = 0.95; -static const l_int32 MaxYShift = 1; - - /* Channel parameters. alpha[0] is the probability that a bg pixel - * is OFF. alpha[1] is the probability that level 1 fg is ON. - * The actual values are not too critical, but they must be larger - * than 0.5 and smaller than 1.0. For more accuracy in template - * matching, use a 4-level template, where levels 2 and 3 are - * boundary pixels in the fg and bg, respectively. */ -static const l_float32 DefaultAlpha2[] = {0.95f, 0.9f}; -static const l_float32 DefaultAlpha4[] = {0.95f, 0.9f, 0.75f, 0.25f}; - - -/*------------------------------------------------------------------------* - * Top-level identification * - *------------------------------------------------------------------------*/ -/*! - * \brief recogDecode() - * - * \param[in] recog with LUT's pre-computed - * \param[in] pixs typically of multiple touching characters, 1 bpp - * \param[in] nlevels of templates; 2 for now - * \param[out] ppixdb [optional] debug result; can be null - * \return boxa segmentation of pixs into characters, or NULL on error - * - *
- * Notes:
- *      (1) The input pixs has been filtered so that it is likely to be
- *          composed of more than one touching character.  Specifically,
- *          its height can only slightly exceed that of the tallest
- *          unscaled template, the width is somewhat larger than the
- *          width of the widest unscaled template, and the w/h aspect ratio
- *          is bounded by max_wh_ratio.
- *      (2) This uses the DID mechanism with labeled templates to
- *          segment the input %pixs.  The resulting segmentation is
- *          returned.  (It is given by did->boxa).
- *      (3) In debug mode, the Viterbi path is rescored based on all
- *          the templates.  In non-debug mode, the same procedure is
- *          carried out by recogIdentifyPix() on the result of the
- *          segmentation.
- * 
- */ -BOXA * -recogDecode(L_RECOG *recog, - PIX *pixs, - l_int32 nlevels, - PIX **ppixdb) -{ -l_int32 debug; -PIX *pix1; -PIXA *pixa; - - PROCNAME("recogDecode"); - - if (ppixdb) *ppixdb = NULL; - if (!recog) - return (BOXA *)ERROR_PTR("recog not defined", procName, NULL); - if (!pixs || pixGetDepth(pixs) != 1) - return (BOXA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (!recog->train_done) - return (BOXA *)ERROR_PTR("training not finished", procName, NULL); - if (nlevels != 2) - return (BOXA *)ERROR_PTR("nlevels != 2 (for now)", procName, NULL); - - debug = (ppixdb) ? 1 : 0; - if (recogPrepareForDecoding(recog, pixs, debug)) - return (BOXA *)ERROR_PTR("error making arrays", procName, NULL); - recogSetChannelParams(recog, nlevels); - - /* Normal path; just run Viterbi */ - if (!debug) { - if (recogRunViterbi(recog, NULL) == 0) - return boxaCopy(recog->did->boxa, L_COPY); - else - return (BOXA *)ERROR_PTR("error in Viterbi", procName, NULL); - } - - /* Debug path */ - if (recogRunViterbi(recog, &pix1)) - return (BOXA *)ERROR_PTR("error in viterbi", procName, NULL); - pixa = pixaCreate(2); - pixaAddPix(pixa, pix1, L_INSERT); - if (recogRescoreDidResult(recog, &pix1)) { - pixaDestroy(&pixa); - return (BOXA *)ERROR_PTR("error in rescoring", procName, NULL); - } - pixaAddPix(pixa, pix1, L_INSERT); - *ppixdb = pixaDisplayTiledInRows(pixa, 32, 2 * pixGetWidth(pix1) + 100, - 1.0, 0, 30, 2); - pixaDestroy(&pixa); - return boxaCopy(recog->did->boxa, L_COPY); -} - - -/*------------------------------------------------------------------------* - * Generate decoding arrays * - *------------------------------------------------------------------------*/ -/*! - * \brief recogPrepareForDecoding() - * - * \param[in] recog with LUT's pre-computed - * \param[in] pixs typically of multiple touching characters, 1 bpp - * \param[in] debug 1 for debug output; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Binarizes and crops input %pixs.
- *      (2) Removes previous L_RDID struct and makes a new one.
- *      (3) Generates the bit-and sum arrays for each character template
- *          at each pixel position in %pixs.  These are used in the
- *          Viterbi dynamic programming step.
- *      (4) The values are saved in the scoring arrays at the left edge
- *          of the template.  They are used in the Viterbi process
- *          at the setwidth position (which is near the RHS of the template
- *          as it is positioned on pixs) in the generated trellis.
- * 
- */ -static l_int32 -recogPrepareForDecoding(L_RECOG *recog, - PIX *pixs, - l_int32 debug) -{ -l_int32 i; -PIX *pix1; -L_RDID *did; - - PROCNAME("recogPrepareForDecoding"); - - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (!recog->train_done) - return ERROR_INT("training not finished", procName, 1); - - if (!recog->ave_done) - recogAverageSamples(&recog, 0); - - /* Binarize and crop to foreground if necessary */ - if ((pix1 = recogProcessToIdentify(recog, pixs, 0)) == NULL) - return ERROR_INT("pix1 not made", procName, 1); - - /* Remove any existing RecogDID and set up a new one */ - recogDestroyDid(recog); - if (recogCreateDid(recog, pix1)) { - pixDestroy(&pix1); - return ERROR_INT("decoder not made", procName, 1); - } - - /* Compute vertical sum and first moment arrays */ - did = recogGetDid(recog); /* owned by recog */ - did->nasum = pixCountPixelsByColumn(pix1); - did->namoment = pixGetMomentByColumn(pix1, 1); - - /* Generate the arrays */ - for (i = 0; i < recog->did->narray; i++) - recogMakeDecodingArray(recog, i, debug); - - pixDestroy(&pix1); - return 0; -} - - -/*! - * \brief recogMakeDecodingArray() - * - * \param[in] recog - * \param[in] index of averaged template - * \param[in] debug 1 for debug output; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Generates the bit-and sum array for a character template along pixs.
- *      (2) The values are saved in the scoring arrays at the left edge
- *          of the template as it is positioned on pixs.
- * 
- */ -static l_int32 -recogMakeDecodingArray(L_RECOG *recog, - l_int32 index, - l_int32 debug) -{ -l_int32 i, j, w1, h1, w2, h2, nx, ycent2, count, maxcount, maxdely; -l_int32 sum, moment, dely, shifty; -l_int32 *counta, *delya, *ycent1, *arraysum, *arraymoment, *sumtab; -NUMA *nasum, *namoment; -PIX *pix1, *pix2, *pix3; -L_RDID *did; - - PROCNAME("recogMakeDecodingArray"); - - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if ((did = recogGetDid(recog)) == NULL) - return ERROR_INT("did not defined", procName, 1); - if (index < 0 || index >= did->narray) - return ERROR_INT("invalid index", procName, 1); - - /* Check that pix1 is large enough for this template. */ - pix1 = did->pixs; /* owned by did; do not destroy */ - pixGetDimensions(pix1, &w1, &h1, NULL); - pix2 = pixaGetPix(recog->pixa_u, index, L_CLONE); - pixGetDimensions(pix2, &w2, &h2, NULL); - if (w1 < w2) { - L_INFO("w1 = %d < w2 = %d for index %d\n", procName, w1, w2, index); - pixDestroy(&pix2); - return 0; - } - - nasum = did->nasum; - namoment = did->namoment; - ptaGetIPt(recog->pta_u, index, NULL, &ycent2); - sumtab = recog->sumtab; - counta = did->counta[index]; - delya = did->delya[index]; - - /* Set up the array for ycent1. This gives the y-centroid location - * for a window of width w2, starting at location i. */ - nx = w1 - w2 + 1; /* number of positions w2 can be placed in w1 */ - ycent1 = (l_int32 *)LEPT_CALLOC(nx, sizeof(l_int32)); - arraysum = numaGetIArray(nasum); - arraymoment = numaGetIArray(namoment); - for (i = 0, sum = 0, moment = 0; i < w2; i++) { - sum += arraysum[i]; - moment += arraymoment[i]; - } - for (i = 0; i < nx - 1; i++) { - ycent1[i] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum; - sum += arraysum[w2 + i] - arraysum[i]; - moment += arraymoment[w2 + i] - arraymoment[i]; - } - ycent1[nx - 1] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum; - - /* Compute the bit-and sum between the template pix2 and pix1, at - * locations where the left side of pix2 goes from 0 to nx - 1 - * in pix1. Do this around the vertical alignment of the pix2 - * centroid and the windowed pix1 centroid. - * (1) Start with pix3 cleared and approximately equal in size to pix1. - * (2) Blit the y-shifted pix2 onto pix3. Then all ON pixels - * are within the intersection of pix1 and the shifted pix2. - * (3) AND pix1 with pix3. */ - pix3 = pixCreate(w2, h1, 1); - for (i = 0; i < nx; i++) { - shifty = (l_int32)(ycent1[i] - ycent2 + 0.5); - maxcount = 0; - maxdely = 0; - for (j = -MaxYShift; j <= MaxYShift; j++) { - pixClearAll(pix3); - dely = shifty + j; /* amount pix2 is shifted relative to pix1 */ - pixRasterop(pix3, 0, dely, w2, h2, PIX_SRC, pix2, 0, 0); - pixRasterop(pix3, 0, 0, w2, h1, PIX_SRC & PIX_DST, pix1, i, 0); - pixCountPixels(pix3, &count, sumtab); - if (count > maxcount) { - maxcount = count; - maxdely = dely; - } - } - counta[i] = maxcount; - delya[i] = maxdely; - } - did->fullarrays = TRUE; - - pixDestroy(&pix2); - pixDestroy(&pix3); - LEPT_FREE(ycent1); - LEPT_FREE(arraysum); - LEPT_FREE(arraymoment); - return 0; -} - - -/*------------------------------------------------------------------------* - * Dynamic programming for best path - *------------------------------------------------------------------------*/ -/*! - * \brief recogRunViterbi() - * - * \param[in] recog with LUT's pre-computed - * \param[out] ppixdb [optional] debug result; can be null - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This can be used when the templates are unscaled.  It works by
- *          matching the average, unscaled templates of each class to
- *          all positions.
- *      (2) It is recursive, in that
- *          (a) we compute the score successively at all pixel positions x,
- *          (b) to compute the score at x in the trellis, for each
- *              template we look backwards to (x - setwidth) to get the
- *              score if that template were to be printed with its
- *              setwidth location at x.  We save at x the template and
- *              score that maximizes the sum of the score at (x - setwidth)
- *              and the log-likelihood for the template to be printed with
- *              its LHS there.
- *      (3) The primary output is a boxa of the locations for splitting
- *          the input image.  These locations are used later to split the
- *          image and send the pieces individually for recognition.
- *          This can be done in either recogIdentifyMultiple(), or
- *          for debugging in recogRescoreDidResult().
- * 
- */ -static l_int32 -recogRunViterbi(L_RECOG *recog, - PIX **ppixdb) -{ -l_int32 i, w1, w2, h1, xnz, x, narray, minsetw; -l_int32 first, templ, xloc, dely, counts, area1; -l_int32 besttempl, spacetempl; -l_int32 *setw, *didtempl; -l_int32 *area2; /* must be freed */ -l_float32 prevscore, matchscore, maxscore, correl; -l_float32 *didscore; -BOX *box; -PIX *pix1; -L_RDID *did; - - PROCNAME("recogRunViterbi"); - - if (ppixdb) *ppixdb = NULL; - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if ((did = recogGetDid(recog)) == NULL) - return ERROR_INT("did not defined", procName, 1); - if (did->fullarrays == 0) - return ERROR_INT("did full arrays not made", procName, 1); - - /* Compute the minimum setwidth. Bad templates with very small - * width can cause havoc because the setwidth is too small. */ - w1 = did->size; - narray = did->narray; - spacetempl = narray; - setw = did->setwidth; - minsetw = 100000; - for (i = 0; i < narray; i++) { - if (setw[i] < minsetw) - minsetw = setw[i]; - } - if (minsetw <= 2) - return ERROR_INT("minsetw <= 2; bad templates", procName, 1); - - /* The score array is initialized to 0.0. As we proceed to - * the left, the log likelihood for the partial paths goes - * negative, and we prune for the max (least negative) path. - * No matches will be computed until we reach x = min(setwidth); - * until then first == TRUE after looping over templates. */ - didscore = did->trellisscore; - didtempl = did->trellistempl; - area2 = numaGetIArray(recog->nasum_u); - besttempl = 0; /* just tells compiler it is initialized */ - maxscore = 0.0; /* ditto */ - for (x = minsetw; x < w1; x++) { /* will always get a score */ - first = TRUE; - for (i = 0; i < narray; i++) { - if (x - setw[i] < 0) continue; - matchscore = didscore[x - setw[i]] + - did->gamma[1] * did->counta[i][x - setw[i]] + - did->beta[1] * area2[i]; - if (first) { - maxscore = matchscore; - besttempl = i; - first = FALSE; - } else { - if (matchscore > maxscore) { - maxscore = matchscore; - besttempl = i; - } - } - } - - /* We can also put down a single pixel space, with no cost - * because all pixels are bg. */ - prevscore = didscore[x - 1]; - if (prevscore > maxscore) { /* 1 pixel space is best */ - maxscore = prevscore; - besttempl = spacetempl; - } - didscore[x] = maxscore; - didtempl[x] = besttempl; - } - - /* Backtrack to get the best path. - * Skip over (i.e., ignore) all single pixel spaces. */ - for (x = w1 - 1; x >= 0; x--) { - if (didtempl[x] != spacetempl) break; - } - h1 = pixGetHeight(did->pixs); - while (x > 0) { - if (didtempl[x] == spacetempl) { /* skip over spaces */ - x--; - continue; - } - templ = didtempl[x]; - xloc = x - setw[templ]; - if (xloc < 0) break; - counts = did->counta[templ][xloc]; /* bit-and counts */ - recogGetWindowedArea(recog, templ, xloc, &dely, &area1); - correl = ((l_float32)(counts) * counts) / - (l_float32)(area2[templ] * area1); - pix1 = pixaGetPix(recog->pixa_u, templ, L_CLONE); - w2 = pixGetWidth(pix1); - numaAddNumber(did->natempl, templ); - numaAddNumber(did->naxloc, xloc); - numaAddNumber(did->nadely, dely); - numaAddNumber(did->nawidth, pixGetWidth(pix1)); - numaAddNumber(did->nascore, correl); - xnz = L_MAX(xloc, 0); - box = boxCreate(xnz, dely, w2, h1); - boxaAddBox(did->boxa, box, L_INSERT); - pixDestroy(&pix1); - x = xloc; - } - - if (ppixdb) { - numaWriteStderr(did->natempl); - numaWriteStderr(did->naxloc); - numaWriteStderr(did->nadely); - numaWriteStderr(did->nawidth); - numaWriteStderr(did->nascore); - boxaWriteStderr(did->boxa); - *ppixdb = recogShowPath(recog, 0); - } - - LEPT_FREE(area2); - return 0; -} - - -/*! - * \brief recogRescoreDidResult() - * - * \param[in] recog with LUT's pre-computed - * \param[out] ppixdb [optional] debug result; can be null - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This does correlation matching with all unscaled templates,
- *          using the character segmentation determined by the Viterbi path.
- * 
- */ -static l_int32 -recogRescoreDidResult(L_RECOG *recog, - PIX **ppixdb) -{ -l_int32 i, n, sample, x, dely, index; -char *text; -l_float32 score; -BOX *box1; -PIX *pixs, *pix1; -L_RDID *did; - - PROCNAME("recogRescoreDidResult"); - - if (ppixdb) *ppixdb = NULL; - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if ((did = recogGetDid(recog)) == NULL) - return ERROR_INT("did not defined", procName, 1); - if (did->fullarrays == 0) - return ERROR_INT("did full arrays not made", procName, 1); - if ((n = numaGetCount(did->naxloc)) == 0) - return ERROR_INT("no elements in path", procName, 1); - - pixs = did->pixs; - for (i = 0; i < n; i++) { - box1 = boxaGetBox(did->boxa, i, L_COPY); - boxGetGeometry(box1, &x, &dely, NULL, NULL); - pix1 = pixClipRectangle(pixs, box1, NULL); - recogIdentifyPix(recog, pix1, NULL); - recogTransferRchToDid(recog, x, dely); - if (ppixdb) { - rchExtract(recog->rch, &index, &score, &text, - &sample, NULL, NULL, NULL); - lept_stderr("text = %s, index = %d, sample = %d," - " score = %5.3f\n", text, index, sample, score); - } - pixDestroy(&pix1); - boxDestroy(&box1); - LEPT_FREE(text); - } - - if (ppixdb) - *ppixdb = recogShowPath(recog, 1); - - return 0; -} - - -/*! - * \brief recogShowPath() - * - * \param[in] recog with LUT's pre-computed - * \param[in] select 0 for Viterbi; 1 for rescored - * \return pix debug output), or NULL on error - */ -static PIX * -recogShowPath(L_RECOG *recog, - l_int32 select) -{ -char textstr[16]; -l_int32 i, j, n, index, xloc, dely; -l_float32 score; -L_BMF *bmf; -NUMA *natempl_s, *nasample_s, *nascore_s, *naxloc_s, *nadely_s; -PIX *pixs, *pix0, *pix1, *pix2, *pix3, *pix4, *pix5; -L_RDID *did; - - PROCNAME("recogShowPath"); - - if (!recog) - return (PIX *)ERROR_PTR("recog not defined", procName, NULL); - if ((did = recogGetDid(recog)) == NULL) - return (PIX *)ERROR_PTR("did not defined", procName, NULL); - - bmf = bmfCreate(NULL, 8); - pixs = pixScale(did->pixs, 4.0, 4.0); - pix0 = pixAddBorderGeneral(pixs, 0, 0, 0, 40, 0); - pix1 = pixConvertTo32(pix0); - if (select == 0) { /* Viterbi */ - natempl_s = did->natempl; - nascore_s = did->nascore; - naxloc_s = did->naxloc; - nadely_s = did->nadely; - } else { /* rescored */ - natempl_s = did->natempl_r; - nasample_s = did->nasample_r; - nascore_s = did->nascore_r; - naxloc_s = did->naxloc_r; - nadely_s = did->nadely_r; - } - - n = numaGetCount(natempl_s); - for (i = 0; i < n; i++) { - numaGetIValue(natempl_s, i, &index); - if (select == 0) { - pix2 = pixaGetPix(recog->pixa_u, index, L_CLONE); - } else { - numaGetIValue(nasample_s, i, &j); - pix2 = pixaaGetPix(recog->pixaa_u, index, j, L_CLONE); - } - pix3 = pixScale(pix2, 4.0, 4.0); - pix4 = pixErodeBrick(NULL, pix3, 5, 5); - pixXor(pix4, pix4, pix3); - numaGetFValue(nascore_s, i, &score); - snprintf(textstr, sizeof(textstr), "%5.3f", score); - pix5 = pixAddTextlines(pix4, bmf, textstr, 1, L_ADD_BELOW); - numaGetIValue(naxloc_s, i, &xloc); - numaGetIValue(nadely_s, i, &dely); - pixPaintThroughMask(pix1, pix5, 4 * xloc, 4 * dely, 0xff000000); - pixDestroy(&pix2); - pixDestroy(&pix3); - pixDestroy(&pix4); - pixDestroy(&pix5); - } - pixDestroy(&pixs); - pixDestroy(&pix0); - bmfDestroy(&bmf); - return pix1; -} - - -/*------------------------------------------------------------------------* - * Create/destroy temporary DID data * - *------------------------------------------------------------------------*/ -/*! - * \brief recogCreateDid() - * - * \param[in] recog - * \param[in] pixs of 1 bpp image to match - * \return 0 if OK, 1 on error - */ -l_ok -recogCreateDid(L_RECOG *recog, - PIX *pixs) -{ -l_int32 i; -PIX *pix1; -L_RDID *did; - - PROCNAME("recogCreateDid"); - - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - recogDestroyDid(recog); - - did = (L_RDID *)LEPT_CALLOC(1, sizeof(L_RDID)); - recog->did = did; - did->pixs = pixClone(pixs); - did->narray = recog->setsize; - did->size = pixGetWidth(pixs); - did->natempl = numaCreate(5); - did->naxloc = numaCreate(5); - did->nadely = numaCreate(5); - did->nawidth = numaCreate(5); - did->boxa = boxaCreate(5); - did->nascore = numaCreate(5); - did->natempl_r = numaCreate(5); - did->nasample_r = numaCreate(5); - did->naxloc_r = numaCreate(5); - did->nadely_r = numaCreate(5); - did->nawidth_r = numaCreate(5); - did->nascore_r = numaCreate(5); - - /* Make the arrays */ - did->setwidth = (l_int32 *)LEPT_CALLOC(did->narray, sizeof(l_int32)); - did->counta = (l_int32 **)LEPT_CALLOC(did->narray, sizeof(l_int32 *)); - did->delya = (l_int32 **)LEPT_CALLOC(did->narray, sizeof(l_int32 *)); - did->beta = (l_float32 *)LEPT_CALLOC(5, sizeof(l_float32)); - did->gamma = (l_float32 *)LEPT_CALLOC(5, sizeof(l_float32)); - did->trellisscore = (l_float32 *)LEPT_CALLOC(did->size, sizeof(l_float32)); - did->trellistempl = (l_int32 *)LEPT_CALLOC(did->size, sizeof(l_int32)); - for (i = 0; i < did->narray; i++) { - did->counta[i] = (l_int32 *)LEPT_CALLOC(did->size, sizeof(l_int32)); - did->delya[i] = (l_int32 *)LEPT_CALLOC(did->size, sizeof(l_int32)); - } - - /* Populate the setwidth array */ - for (i = 0; i < did->narray; i++) { - pix1 = pixaGetPix(recog->pixa_u, i, L_CLONE); - did->setwidth[i] = (l_int32)(SetwidthFraction * pixGetWidth(pix1)); - pixDestroy(&pix1); - } - - return 0; -} - - -/*! - * \brief recogDestroyDid() - * - * \param[in] recog - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) As the signature indicates, this is owned by the recog, and can
- *          only be destroyed using this function.
- * 
- */ -l_ok -recogDestroyDid(L_RECOG *recog) -{ -l_int32 i; -L_RDID *did; - - PROCNAME("recogDestroyDid"); - - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - - if ((did = recog->did) == NULL) return 0; - if (!did->counta || !did->delya) - return ERROR_INT("ptr array is null; shouldn't happen!", procName, 1); - - for (i = 0; i < did->narray; i++) { - LEPT_FREE(did->counta[i]); - LEPT_FREE(did->delya[i]); - } - LEPT_FREE(did->setwidth); - LEPT_FREE(did->counta); - LEPT_FREE(did->delya); - LEPT_FREE(did->beta); - LEPT_FREE(did->gamma); - LEPT_FREE(did->trellisscore); - LEPT_FREE(did->trellistempl); - pixDestroy(&did->pixs); - numaDestroy(&did->nasum); - numaDestroy(&did->namoment); - numaDestroy(&did->natempl); - numaDestroy(&did->naxloc); - numaDestroy(&did->nadely); - numaDestroy(&did->nawidth); - boxaDestroy(&did->boxa); - numaDestroy(&did->nascore); - numaDestroy(&did->natempl_r); - numaDestroy(&did->nasample_r); - numaDestroy(&did->naxloc_r); - numaDestroy(&did->nadely_r); - numaDestroy(&did->nawidth_r); - numaDestroy(&did->nascore_r); - LEPT_FREE(did); - recog->did = NULL; - return 0; -} - - -/*------------------------------------------------------------------------* - * Various helpers * - *------------------------------------------------------------------------*/ -/*! - * \brief recogDidExists() - * - * \param[in] recog - * \return 1 if recog->did exists; 0 if not or on error. - */ -l_int32 -recogDidExists(L_RECOG *recog) -{ - PROCNAME("recogDidExists"); - - if (!recog) - return ERROR_INT("recog not defined", procName, 0); - return (recog->did) ? 1 : 0; -} - - -/*! - * \brief recogGetDid() - * - * \param[in] recog - * \return did still owned by the recog, or NULL on error - * - *
- * Notes:
- *      (1) This also makes sure the arrays are defined.
- * 
- */ -L_RDID * -recogGetDid(L_RECOG *recog) -{ -l_int32 i; -L_RDID *did; - - PROCNAME("recogGetDid"); - - if (!recog) - return (L_RDID *)ERROR_PTR("recog not defined", procName, NULL); - if ((did = recog->did) == NULL) - return (L_RDID *)ERROR_PTR("did not defined", procName, NULL); - if (!did->counta || !did->delya) - return (L_RDID *)ERROR_PTR("did array ptrs not defined", - procName, NULL); - for (i = 0; i < did->narray; i++) { - if (!did->counta[i] || !did->delya[i]) - return (L_RDID *)ERROR_PTR("did arrays not defined", - procName, NULL); - } - - return did; -} - - -/*! - * \brief recogGetWindowedArea() - * - * \param[in] recog - * \param[in] index of template - * \param[in] x pixel position of left hand edge of template - * \param[out] pdely y shift of template relative to pix1 - * \param[out] pwsum number of fg pixels in window of pixs - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is called after the best path has been found through
- *          the trellis, in order to produce a correlation that can be used
- *          to evaluate the confidence we have in the identification.
- *          The correlation is |1 & 2|^2 / (|1| * |2|).
- *          |1 & 2| is given by the count array, |2| is found from
- *          nasum_u[], and |1| is wsum returned from this function.
- * 
- */ -static l_int32 -recogGetWindowedArea(L_RECOG *recog, - l_int32 index, - l_int32 x, - l_int32 *pdely, - l_int32 *pwsum) -{ -l_int32 w1, h1, w2, h2; -PIX *pix1, *pix2, *pixt; -L_RDID *did; - - PROCNAME("recogGetWindowedArea"); - - if (pdely) *pdely = 0; - if (pwsum) *pwsum = 0; - if (!pdely || !pwsum) - return ERROR_INT("&dely and &wsum not both defined", procName, 1); - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if ((did = recogGetDid(recog)) == NULL) - return ERROR_INT("did not defined", procName, 1); - if (index < 0 || index >= did->narray) - return ERROR_INT("invalid index", procName, 1); - pix1 = did->pixs; - pixGetDimensions(pix1, &w1, &h1, NULL); - if (x >= w1) - return ERROR_INT("invalid x position", procName, 1); - - pix2 = pixaGetPix(recog->pixa_u, index, L_CLONE); - pixGetDimensions(pix2, &w2, &h2, NULL); - if (w1 < w2) { - L_INFO("template %d too small\n", procName, index); - pixDestroy(&pix2); - return 0; - } - - *pdely = did->delya[index][x]; - pixt = pixCreate(w2, h1, 1); - pixRasterop(pixt, 0, *pdely, w2, h2, PIX_SRC, pix2, 0, 0); - pixRasterop(pixt, 0, 0, w2, h1, PIX_SRC & PIX_DST, pix1, x, 0); - pixCountPixels(pixt, pwsum, recog->sumtab); - pixDestroy(&pix2); - pixDestroy(&pixt); - return 0; -} - - -/*! - * \brief recogSetChannelParams() - * - * \param[in] recog - * \param[in] nlevels - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This converts the independent bit-flip probabilities in the
- *          "channel" into log-likelihood coefficients on image sums.
- *          These coefficients are only defined for the non-background
- *          template levels.  Thus for nlevels = 2 (one fg, one bg),
- *          only beta[1] and gamma[1] are used.  For nlevels = 4 (three
- *          fg templates), we use beta[1-3] and gamma[1-3].
- * 
- */ -l_ok -recogSetChannelParams(L_RECOG *recog, - l_int32 nlevels) -{ -l_int32 i; -const l_float32 *da; -L_RDID *did; - - PROCNAME("recogSetChannelParams"); - - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if ((did = recogGetDid(recog)) == NULL) - return ERROR_INT("did not defined", procName, 1); - if (nlevels == 2) - da = DefaultAlpha2; - else if (nlevels == 4) - da = DefaultAlpha4; - else - return ERROR_INT("nlevels not 2 or 4", procName, 1); - - for (i = 1; i < nlevels; i++) { - did->beta[i] = log((1.0 - da[i]) / da[0]); - did->gamma[i] = log(da[0] * da[i] / ((1.0 - da[0]) * (1.0 - da[i]))); -/* lept_stderr("beta[%d] = %7.3f, gamma[%d] = %7.3f\n", - i, did->beta[i], i, did->gamma[i]); */ - } - - return 0; -} - - -/*! - * \brief recogTransferRchToDid() - * - * \param[in] recog with rch and did defined - * \param[in] x left edge of extracted region, relative to decoded line - * \param[in] y top edge of extracted region, relative to input image - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is used to transfer the results for a single character match
- *          to the rescored did arrays.
- * 
- */ -static l_int32 -recogTransferRchToDid(L_RECOG *recog, - l_int32 x, - l_int32 y) -{ -L_RDID *did; -L_RCH *rch; - - PROCNAME("recogTransferRchToDid"); - - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if ((did = recogGetDid(recog)) == NULL) - return ERROR_INT("did not defined", procName, 1); - if ((rch = recog->rch) == NULL) - return ERROR_INT("rch not defined", procName, 1); - - numaAddNumber(did->natempl_r, rch->index); - numaAddNumber(did->nasample_r, rch->sample); - numaAddNumber(did->naxloc_r, rch->xloc + x); - numaAddNumber(did->nadely_r, rch->yloc + y); - numaAddNumber(did->nawidth_r, rch->width); - numaAddNumber(did->nascore_r, rch->score); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/recogident.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/recogident.c deleted file mode 100644 index cdbcbda4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/recogident.c +++ /dev/null @@ -1,1885 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file recogident.c - *
- *
- *      Top-level identification
- *         l_int32             recogIdentifyMultiple()
- *
- *      Segmentation and noise removal
- *         l_int32             recogSplitIntoCharacters()
- *
- *      Greedy character splitting
- *         l_int32             recogCorrelationBestRow()
- *         l_int32             recogCorrelationBestChar()
- *         static l_int32      pixCorrelationBestShift()
- *
- *      Low-level identification of single characters
- *         l_int32             recogIdentifyPixa()
- *         l_int32             recogIdentifyPix()
- *         l_int32             recogSkipIdentify()
- *
- *      Operations for handling identification results
- *         static L_RCHA      *rchaCreate()
- *         l_int32            *rchaDestroy()
- *         static L_RCH       *rchCreate()
- *         l_int32            *rchDestroy()
- *         l_int32             rchaExtract()
- *         l_int32             rchExtract()
- *         static l_int32      transferRchToRcha()
- *
- *      Preprocessing and filtering
- *         l_int32             recogProcessToIdentify()
- *         static PIX         *recogPreSplittingFilter()
- *         static PIX         *recogSplittingFilter()
- *
- *      Postprocessing
- *         SARRAY             *recogExtractNumbers()
- *         PIX                *showExtractNumbers()
- *
- *      Static debug helper
- *         static void         l_showIndicatorSplitValues()
- *
- *  See recogbasic.c for examples of training a recognizer, which is
- *  required before it can be used for identification.
- *
- *  The character splitter repeatedly does a greedy correlation with each
- *  averaged unscaled template, at all pixel locations along the text to
- *  be identified.  The vertical alignment is between the template
- *  centroid and the (moving) windowed centroid, including a delta of
- *  1 pixel above and below.  The best match then removes part of the
- *  input image, leaving 1 or 2 pieces, which, after filtering,
- *  are put in a queue.  The process ends when the queue is empty.
- *  The filtering is based on the size and aspect ratio of the
- *  remaining pieces; the intent is to remove anything that is
- *  unlikely to be text, such as small pieces and line graphics.
- *
- *  After splitting, the selected segments are identified using
- *  the input parameters that were initially specified for the
- *  recognizer.  Unlike the splitter, which uses the averaged
- *  templates from the unscaled input, the recognizer can use
- *  either all training examples or averaged templates, and these
- *  can be either scaled or unscaled.  These choices are specified
- *  when the recognizer is constructed.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* There are two methods for splitting characters: DID and greedy. - * The default method is DID. */ -#define SPLIT_WITH_DID 1 - - /* Padding on pix1: added before correlations and removed from result */ -static const l_int32 LeftRightPadding = 32; - - /* Parameters for filtering and sorting connected components in splitter */ -static const l_float32 MinFillFactor = 0.10; -static const l_int32 DefaultMinHeight = 15; /* min unscaled height */ -static const l_int32 MinOverlap1 = 6; /* in pass 1 of boxaSort2d() */ -static const l_int32 MinOverlap2 = 6; /* in pass 2 of boxaSort2d() */ -static const l_int32 MinHeightPass1 = 5; /* min height to start pass 1 */ - - -static l_int32 pixCorrelationBestShift(PIX *pix1, PIX *pix2, NUMA *nasum1, - NUMA *namoment1, l_int32 area2, - l_int32 ycent2, l_int32 maxyshift, - l_int32 *tab8, l_int32 *pdelx, - l_int32 *pdely, l_float32 *pscore, - l_int32 debugflag ); -static L_RCH *rchCreate(l_int32 index, l_float32 score, char *text, - l_int32 sample, l_int32 xloc, l_int32 yloc, - l_int32 width); -static L_RCHA *rchaCreate(); -static l_int32 transferRchToRcha(L_RCH *rch, L_RCHA *rcha); -static PIX *recogPreSplittingFilter(L_RECOG *recog, PIX *pixs, l_int32 minh, - l_float32 minaf, l_int32 debug); -static l_int32 recogSplittingFilter(L_RECOG *recog, PIX *pixs, l_int32 min, - l_float32 minaf, l_int32 *premove, - l_int32 debug); -static void l_showIndicatorSplitValues(NUMA *na1, NUMA *na2, NUMA *na3, - NUMA *na4, NUMA *na5, NUMA *na6); - -/*------------------------------------------------------------------------* - * Identification - *------------------------------------------------------------------------*/ -/*! - * \brief recogIdentifyMultiple() - * - * \param[in] recog with training finished - * \param[in] pixs containing typically a small number of characters - * \param[in] minh remove shorter components; use 0 for default - * \param[in] skipsplit 1 to skip the splitting step - * \param[out] pboxa [optional] locations of identified components - * \param[out] ppixa [optional] images of identified components - * \param[out] ppixdb [optional] debug pix: inputs and best fits - * \param[in] debugsplit 1 returns pix split debugging images - * \return 0 if OK; 1 if nothing is found; 2 for other errors. - * - *
- * Notes:
- *      (1) This filters the input pixa and calls recogIdentifyPixa()
- *      (2) Splitting is relatively slow, because it tries to match all
- *          character templates to all locations.  This step can be skipped.
- *      (3) An attempt is made to order the (optionally) returned images
- *          and boxes in 2-dimensional sorted order.  These can then
- *          be used to aggregate identified characters into numbers or words.
- *          One typically wants the pixa, which contains a boxa of the
- *          extracted subimages.
- * 
- */ -l_ok -recogIdentifyMultiple(L_RECOG *recog, - PIX *pixs, - l_int32 minh, - l_int32 skipsplit, - BOXA **pboxa, - PIXA **ppixa, - PIX **ppixdb, - l_int32 debugsplit) -{ -l_int32 n; -BOXA *boxa; -PIX *pixb; -PIXA *pixa; - - PROCNAME("recogIdentifyMultiple"); - - if (pboxa) *pboxa = NULL; - if (ppixa) *ppixa = NULL; - if (ppixdb) *ppixdb = NULL; - if (!recog) - return ERROR_INT("recog not defined", procName, 2); - if (!recog->train_done) - return ERROR_INT("training not finished", procName, 2); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 2); - - /* Binarize if necessary */ - if (pixGetDepth(pixs) > 1) - pixb = pixConvertTo1(pixs, recog->threshold); - else - pixb = pixClone(pixs); - - /* Noise removal and splitting of touching characters */ - recogSplitIntoCharacters(recog, pixb, minh, skipsplit, &boxa, &pixa, - debugsplit); - pixDestroy(&pixb); - if (!pixa || (n = pixaGetCount(pixa)) == 0) { - pixaDestroy(&pixa); - boxaDestroy(&boxa); - L_WARNING("nothing found\n", procName); - return 1; - } - - recogIdentifyPixa(recog, pixa, ppixdb); - if (pboxa) - *pboxa = boxa; - else - boxaDestroy(&boxa); - if (ppixa) - *ppixa = pixa; - else - pixaDestroy(&pixa); - return 0; -} - - -/*------------------------------------------------------------------------* - * Segmentation and noise removal * - *------------------------------------------------------------------------*/ -/*! - * \brief recogSplitIntoCharacters() - * - * \param[in] recog - * \param[in] pixs 1 bpp, contains only mostly deskewed text - * \param[in] minh remove shorter components; use 0 for default - * \param[in] skipsplit 1 to skip the splitting step - * \param[out] pboxa character bounding boxes - * \param[out] ppixa character images - * \param[in] debug 1 for results written to pixadb_split - * \return 0 if OK, 1 on error or if no components are returned - * - *
- * Notes:
- *      (1) This can be given an image that has an arbitrary number
- *          of text characters.  It optionally splits connected
- *          components based on document image decoding in recogDecode().
- *          The returned pixa includes the boxes from which the
- *          (possibly split) components are extracted.
- *      (2) After noise filtering, the resulting components are put in
- *          row-major (2D) order, and the smaller of overlapping
- *          components are removed if they satisfy conditions of
- *          relative size and fractional overlap.
- *      (3) Note that the splitting function uses unscaled templates
- *          and does not bother returning the class results and scores.
- *          These are more accurately found later using the scaled templates.
- * 
- */ -l_ok -recogSplitIntoCharacters(L_RECOG *recog, - PIX *pixs, - l_int32 minh, - l_int32 skipsplit, - BOXA **pboxa, - PIXA **ppixa, - l_int32 debug) -{ -static l_int32 ind = 0; -char buf[32]; -l_int32 i, xoff, yoff, empty, maxw, bw, ncomp, scaling; -BOX *box; -BOXA *boxa1, *boxa2, *boxa3, *boxa4, *boxad; -BOXAA *baa; -PIX *pix, *pix1, *pix2, *pix3; -PIXA *pixa; - - PROCNAME("recogSplitIntoCharacters"); - - lept_mkdir("lept/recog"); - - if (pboxa) *pboxa = NULL; - if (ppixa) *ppixa = NULL; - if (!pboxa || !ppixa) - return ERROR_INT("&boxa and &pixa not defined", procName, 1); - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if (!recog->train_done) - return ERROR_INT("training not finished", procName, 1); - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (minh <= 0) minh = DefaultMinHeight; - pixZero(pixs, &empty); - if (empty) return 1; - - /* Small vertical close for consolidation. Don't do a horizontal - * closing, because it might join separate characters. */ - pix1 = pixMorphSequence(pixs, "c1.3", 0); - - /* Carefully filter out noise */ - pix2 = recogPreSplittingFilter(recog, pix1, minh, MinFillFactor, debug); - pixDestroy(&pix1); - - /* Get the 8-connected components to be split/identified */ - boxa1 = pixConnComp(pix2, NULL, 8); - pixDestroy(&pix2); - ncomp = boxaGetCount(boxa1); - if (ncomp == 0) { - boxaDestroy(&boxa1); - L_WARNING("all components removed\n", procName); - return 1; - } - - /* Save everything and split the large components */ - boxa2 = boxaCreate(ncomp); - maxw = recog->maxwidth_u + 5; - scaling = (recog->scalew > 0 || recog->scaleh > 0) ? TRUE : FALSE; - pixa = (debug) ? pixaCreate(ncomp) : NULL; - for (i = 0; i < ncomp; i++) { - box = boxaGetBox(boxa1, i, L_CLONE); - boxGetGeometry(box, &xoff, &yoff, &bw, NULL); - /* Treat as one character if it is small, if the images - * have been scaled, or if splitting is not to be run. */ - if (bw <= maxw || scaling || skipsplit) { - boxaAddBox(boxa2, box, L_INSERT); - } else { - pix = pixClipRectangle(pixs, box, NULL); -#if SPLIT_WITH_DID - if (!debug) { - boxa3 = recogDecode(recog, pix, 2, NULL); - } else { - boxa3 = recogDecode(recog, pix, 2, &pix2); - pixaAddPix(pixa, pix2, L_INSERT); - } -#else /* use greedy splitting */ - recogCorrelationBestRow(recog, pix, &boxa3, NULL, NULL, - NULL, debug); - if (debug) { - pix2 = pixConvertTo32(pix); - pixRenderBoxaArb(pix2, boxa3, 2, 255, 0, 0); - pixaAddPix(pixa, pix2, L_INSERT); - } -#endif /* SPLIT_WITH_DID */ - pixDestroy(&pix); - boxDestroy(&box); - if (!boxa3) { - L_ERROR("boxa3 not found for component %d\n", procName, i); - } else { - boxa4 = boxaTransform(boxa3, xoff, yoff, 1.0, 1.0); - boxaJoin(boxa2, boxa4, 0, -1); - boxaDestroy(&boxa3); - boxaDestroy(&boxa4); - } - } - } - boxaDestroy(&boxa1); - if (pixa) { /* debug */ - pix3 = pixaDisplayTiledInColumns(pixa, 1, 1.0, 20, 2); - snprintf(buf, sizeof(buf), "/tmp/lept/recog/decode-%d.png", ind++); - pixWrite(buf, pix3, IFF_PNG); - pixaDestroy(&pixa); - pixDestroy(&pix3); - } - - /* Do a 2D sort on the bounding boxes, and flatten the result to 1D. - * For the 2D sort, to add a box to an existing boxa, we require - * specified minimum vertical overlaps for the first two passes - * of the 2D sort. In pass 1, only components with sufficient - * height can start a new boxa. */ - baa = boxaSort2d(boxa2, NULL, MinOverlap1, MinOverlap2, MinHeightPass1); - boxa3 = boxaaFlattenToBoxa(baa, NULL, L_CLONE); - boxaaDestroy(&baa); - boxaDestroy(&boxa2); - - /* Remove smaller components of overlapping pairs. - * We only remove the small component if the overlap is - * at least half its area and if its area is no more - * than 30% of the area of the large component. Because the - * components are in a flattened 2D sort, we don't need to - * look far ahead in the array to find all overlapping boxes; - * 10 boxes is plenty. */ - boxad = boxaHandleOverlaps(boxa3, L_COMBINE, 10, 0.5, 0.3, NULL); - boxaDestroy(&boxa3); - - /* Extract and save the image pieces from the input image. */ - *ppixa = pixClipRectangles(pixs, boxad); - *pboxa = boxad; - return 0; -} - - -/*------------------------------------------------------------------------* - * Greedy character splitting * - *------------------------------------------------------------------------*/ -/*! - * \brief recogCorrelationBestRow() - * - * \param[in] recog with LUT's pre-computed - * \param[in] pixs typically of multiple touching characters, 1 bpp - * \param[out] pboxa bounding boxs of best fit character - * \param[out] pnascore [optional] correlation scores - * \param[out] pnaindex [optional] indices of classes - * \param[out] psachar [optional] array of character strings - * \param[in] debug 1 for results written to pixadb_split - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Supervises character matching for (in general) a c.c with
- *          multiple touching characters.  Finds the best match greedily.
- *          Rejects small parts that are left over after splitting.
- *      (2) Matching is to the average, and without character scaling.
- * 
- */ -l_ok -recogCorrelationBestRow(L_RECOG *recog, - PIX *pixs, - BOXA **pboxa, - NUMA **pnascore, - NUMA **pnaindex, - SARRAY **psachar, - l_int32 debug) -{ -char *charstr; -l_int32 index, remove, w, h, bx, bw, bxc, bwc, w1, w2, w3; -l_float32 score; -BOX *box, *boxc, *boxtrans, *boxl, *boxr, *boxlt, *boxrt; -BOXA *boxat; -NUMA *nascoret, *naindext, *nasort; -PIX *pixb, *pixc, *pixl, *pixr, *pixdb, *pixd; -PIXA *pixar, *pixadb; -SARRAY *sachart; - -l_int32 iter; - - PROCNAME("recogCorrelationBestRow"); - - if (pnascore) *pnascore = NULL; - if (pnaindex) *pnaindex = NULL; - if (psachar) *psachar = NULL; - if (!pboxa) - return ERROR_INT("&boxa not defined", procName, 1); - *pboxa = NULL; - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (pixGetWidth(pixs) < recog->minwidth_u - 4) - return ERROR_INT("pixs too narrow", procName, 1); - if (!recog->train_done) - return ERROR_INT("training not finished", procName, 1); - - /* Binarize and crop to foreground if necessary */ - pixb = recogProcessToIdentify(recog, pixs, 0); - - /* Initialize the arrays */ - boxat = boxaCreate(4); - nascoret = numaCreate(4); - naindext = numaCreate(4); - sachart = sarrayCreate(4); - pixadb = (debug) ? pixaCreate(4) : NULL; - - /* Initialize the images remaining to be processed with the input. - * These are stored in pixar, which is used here as a queue, - * on which we only put image fragments that are large enough to - * contain at least one character. */ - pixar = pixaCreate(1); - pixGetDimensions(pixb, &w, &h, NULL); - box = boxCreate(0, 0, w, h); - pixaAddPix(pixar, pixb, L_INSERT); - pixaAddBox(pixar, box, L_INSERT); - - /* Successively split on the best match until nothing is left. - * To be safe, we limit the search to 10 characters. */ - for (iter = 0; iter < 11; iter++) { - if (pixaGetCount(pixar) == 0) - break; - if (iter == 10) { - L_WARNING("more than 10 chars; ending search\n", procName); - break; - } - - /* Pop one from the queue */ - pixaRemovePixAndSave(pixar, 0, &pixc, &boxc); - boxGetGeometry(boxc, &bxc, NULL, &bwc, NULL); - - /* This is a single component; if noise, remove it */ - recogSplittingFilter(recog, pixc, 0, MinFillFactor, &remove, debug); - if (debug) - lept_stderr("iter = %d, removed = %d\n", iter, remove); - if (remove) { - pixDestroy(&pixc); - boxDestroy(&boxc); - continue; - } - - /* Find the best character match */ - if (debug) { - recogCorrelationBestChar(recog, pixc, &box, &score, - &index, &charstr, &pixdb); - pixaAddPix(pixadb, pixdb, L_INSERT); - } else { - recogCorrelationBestChar(recog, pixc, &box, &score, - &index, &charstr, NULL); - } - - /* Find the box in original coordinates, and append - * the results to the arrays. */ - boxtrans = boxTransform(box, bxc, 0, 1.0, 1.0); - boxaAddBox(boxat, boxtrans, L_INSERT); - numaAddNumber(nascoret, score); - numaAddNumber(naindext, index); - sarrayAddString(sachart, charstr, L_INSERT); - - /* Split the current pixc into three regions and save - * each region if it is large enough. */ - boxGetGeometry(box, &bx, NULL, &bw, NULL); - w1 = bx; - w2 = bw; - w3 = bwc - bx - bw; - if (debug) - lept_stderr(" w1 = %d, w2 = %d, w3 = %d\n", w1, w2, w3); - if (w1 < recog->minwidth_u - 4) { - if (debug) L_INFO("discarding width %d on left\n", procName, w1); - } else { /* extract and save left region */ - boxl = boxCreate(0, 0, bx + 1, h); - pixl = pixClipRectangle(pixc, boxl, NULL); - boxlt = boxTransform(boxl, bxc, 0, 1.0, 1.0); - pixaAddPix(pixar, pixl, L_INSERT); - pixaAddBox(pixar, boxlt, L_INSERT); - boxDestroy(&boxl); - } - if (w3 < recog->minwidth_u - 4) { - if (debug) L_INFO("discarding width %d on right\n", procName, w3); - } else { /* extract and save left region */ - boxr = boxCreate(bx + bw - 1, 0, w3 + 1, h); - pixr = pixClipRectangle(pixc, boxr, NULL); - boxrt = boxTransform(boxr, bxc, 0, 1.0, 1.0); - pixaAddPix(pixar, pixr, L_INSERT); - pixaAddBox(pixar, boxrt, L_INSERT); - boxDestroy(&boxr); - } - pixDestroy(&pixc); - boxDestroy(&box); - boxDestroy(&boxc); - } - pixaDestroy(&pixar); - - - /* Sort the output results by left-to-right in the boxa */ - *pboxa = boxaSort(boxat, L_SORT_BY_X, L_SORT_INCREASING, &nasort); - if (pnascore) - *pnascore = numaSortByIndex(nascoret, nasort); - if (pnaindex) - *pnaindex = numaSortByIndex(naindext, nasort); - if (psachar) - *psachar = sarraySortByIndex(sachart, nasort); - numaDestroy(&nasort); - boxaDestroy(&boxat); - numaDestroy(&nascoret); - numaDestroy(&naindext); - sarrayDestroy(&sachart); - - /* Final debug output */ - if (debug) { - pixd = pixaDisplayTiledInRows(pixadb, 32, 2000, 1.0, 0, 15, 2); - pixDisplay(pixd, 400, 400); - pixaAddPix(recog->pixadb_split, pixd, L_INSERT); - pixaDestroy(&pixadb); - } - return 0; -} - - -/*! - * \brief recogCorrelationBestChar() - * - * \param[in] recog with LUT's pre-computed - * \param[in] pixs can be of multiple touching characters, 1 bpp - * \param[out] pbox bounding box of best fit character - * \param[out] pscore correlation score - * \param[out] pindex [optional] index of class - * \param[out] pcharstr [optional] character string of class - * \param[out] ppixdb [optional] debug pix showing input and best fit - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Basic matching character splitter.  Finds the best match among
- *          all templates to some region of the image.  This can result
- *          in splitting the image into two parts.  This is "image decoding"
- *          without dynamic programming, because we don't use a setwidth
- *          and compute the best matching score for the entire image.
- *      (2) Matching is to the average templates, without character scaling.
- * 
- */ -l_ok -recogCorrelationBestChar(L_RECOG *recog, - PIX *pixs, - BOX **pbox, - l_float32 *pscore, - l_int32 *pindex, - char **pcharstr, - PIX **ppixdb) -{ -l_int32 i, n, w1, h1, w2, area2, ycent2, delx, dely; -l_int32 bestdelx, bestdely, bestindex; -l_float32 score, bestscore; -BOX *box; -BOXA *boxa; -NUMA *nasum, *namoment; -PIX *pix1, *pix2; - - PROCNAME("recogCorrelationBestChar"); - - if (pindex) *pindex = 0; - if (pcharstr) *pcharstr = NULL; - if (ppixdb) *ppixdb = NULL; - if (pbox) *pbox = NULL; - if (pscore) *pscore = 0.0; - if (!pbox || !pscore) - return ERROR_INT("&box and &score not both defined", procName, 1); - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (!recog->train_done) - return ERROR_INT("training not finished", procName, 1); - - /* Binarize and crop to foreground if necessary. Add padding - * to both the left and right side; this is compensated for - * when reporting the bounding box of the best matched character. */ - pix1 = recogProcessToIdentify(recog, pixs, LeftRightPadding); - pixGetDimensions(pix1, &w1, &h1, NULL); - - /* Compute vertical sum and moment arrays */ - nasum = pixCountPixelsByColumn(pix1); - namoment = pixGetMomentByColumn(pix1, 1); - - /* Do shifted correlation against all averaged templates. */ - n = recog->setsize; - boxa = boxaCreate(n); /* location of best fits for each character */ - bestscore = 0.0; - bestindex = bestdelx = bestdely = 0; - for (i = 0; i < n; i++) { - pix2 = pixaGetPix(recog->pixa_u, i, L_CLONE); - w2 = pixGetWidth(pix2); - /* Note that the slightly expended w1 is typically larger - * than w2 (the template). */ - if (w1 >= w2) { - numaGetIValue(recog->nasum_u, i, &area2); - ptaGetIPt(recog->pta_u, i, NULL, &ycent2); - pixCorrelationBestShift(pix1, pix2, nasum, namoment, area2, ycent2, - recog->maxyshift, recog->sumtab, &delx, - &dely, &score, 1); - if (ppixdb) { - lept_stderr( - "Best match template %d: (x,y) = (%d,%d), score = %5.3f\n", - i, delx, dely, score); - } - /* Compensate for padding */ - box = boxCreate(delx - LeftRightPadding, 0, w2, h1); - if (score > bestscore) { - bestscore = score; - bestdelx = delx - LeftRightPadding; - bestdely = dely; - bestindex = i; - } - } else { - box = boxCreate(0, 0, 1, 1); /* placeholder */ - if (ppixdb) - lept_stderr("Component too thin: w1 = %d, w2 = %d\n", w1, w2); - } - boxaAddBox(boxa, box, L_INSERT); - pixDestroy(&pix2); - } - - *pscore = bestscore; - *pbox = boxaGetBox(boxa, bestindex, L_COPY); - if (pindex) *pindex = bestindex; - if (pcharstr) - recogGetClassString(recog, bestindex, pcharstr); - - if (ppixdb) { - L_INFO("Best match: class %d; shifts (%d, %d)\n", - procName, bestindex, bestdelx, bestdely); - pix2 = pixaGetPix(recog->pixa_u, bestindex, L_CLONE); - *ppixdb = recogShowMatch(recog, pix1, pix2, NULL, -1, 0.0); - pixDestroy(&pix2); - } - - pixDestroy(&pix1); - boxaDestroy(&boxa); - numaDestroy(&nasum); - numaDestroy(&namoment); - return 0; -} - - -/*! - * \brief pixCorrelationBestShift() - * - * \param[in] pix1 1 bpp, the unknown image; typically larger - * \param[in] pix2 1 bpp, the matching template image) - * \param[in] nasum1 vertical column pixel sums for pix1 - * \param[in] namoment1 vertical column first moment of pixels for pix1 - * \param[in] area2 number of on pixels in pix2 - * \param[in] ycent2 y component of centroid of pix2 - * \param[in] maxyshift max y shift of pix2 around the location where - * the centroids of pix2 and a windowed part of pix1 - * are vertically aligned - * \param[in] tab8 [optional] sum tab for ON pixels in byte; - * can be NULL - * \param[out] pdelx [optional] best x shift of pix2 relative to pix1 - * \param[out] pdely [optional] best y shift of pix2 relative to pix1 - * \param[out] pscore [optional] maximum score found; can be NULL - * \param[in] debugflag <= 0 to skip; positive to generate output; - * the integer is used to label the debug image. - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This maximizes the correlation score between two 1 bpp images,
- *          one of which is typically wider.  In a typical example,
- *          pix1 is a bitmap of 2 or more touching characters and pix2 is
- *          a single character template.  This finds the location of pix2
- *          that gives the largest correlation.
- *      (2) The windowed area of fg pixels and windowed first moment
- *          in the y direction are computed from the input sum and moment
- *          column arrays, %nasum1 and %namoment1
- *      (3) This is a brute force operation.  We compute the correlation
- *          at every x shift for which pix2 fits entirely within pix1,
- *          and where the centroid of pix2 is aligned, within +-maxyshift,
- *          with the centroid of a window of pix1 of the same width.
- *          The correlation is taken over the full height of pix1.
- *          This can be made more efficient.
- * 
- */ -static l_int32 -pixCorrelationBestShift(PIX *pix1, - PIX *pix2, - NUMA *nasum1, - NUMA *namoment1, - l_int32 area2, - l_int32 ycent2, - l_int32 maxyshift, - l_int32 *tab8, - l_int32 *pdelx, - l_int32 *pdely, - l_float32 *pscore, - l_int32 debugflag) -{ -l_int32 w1, w2, h1, h2, i, j, nx, shifty, delx, dely; -l_int32 sum, moment, count; -l_int32 *tab, *area1, *arraysum, *arraymoment; -l_float32 maxscore, score; -l_float32 *ycent1; -FPIX *fpix; -PIX *pixt, *pixt1, *pixt2; - - PROCNAME("pixCorrelationBestShift"); - - if (pdelx) *pdelx = 0; - if (pdely) *pdely = 0; - if (pscore) *pscore = 0.0; - if (!pix1 || pixGetDepth(pix1) != 1) - return ERROR_INT("pix1 not defined or not 1 bpp", procName, 1); - if (!pix2 || pixGetDepth(pix2) != 1) - return ERROR_INT("pix2 not defined or not 1 bpp", procName, 1); - if (!nasum1 || !namoment1) - return ERROR_INT("nasum1 and namoment1 not both defined", procName, 1); - if (area2 <= 0 || ycent2 <= 0) - return ERROR_INT("area2 and ycent2 must be > 0", procName, 1); - - /* If pix1 (the unknown image) is narrower than pix2, - * don't bother to try the match. pix1 is already padded with - * 2 pixels on each side. */ - pixGetDimensions(pix1, &w1, &h1, NULL); - pixGetDimensions(pix2, &w2, &h2, NULL); - if (w1 < w2) { - if (debugflag > 0) { - L_INFO("skipping match with w1 = %d and w2 = %d\n", - procName, w1, w2); - } - return 0; - } - nx = w1 - w2 + 1; - - if (debugflag > 0) - fpix = fpixCreate(nx, 2 * maxyshift + 1); - if (!tab8) - tab = makePixelSumTab8(); - else - tab = tab8; - - /* Set up the arrays for area1 and ycent1. We have to do this - * for each template (pix2) because the window width is w2. */ - area1 = (l_int32 *)LEPT_CALLOC(nx, sizeof(l_int32)); - ycent1 = (l_float32 *)LEPT_CALLOC(nx, sizeof(l_int32)); - arraysum = numaGetIArray(nasum1); - arraymoment = numaGetIArray(namoment1); - for (i = 0, sum = 0, moment = 0; i < w2; i++) { - sum += arraysum[i]; - moment += arraymoment[i]; - } - for (i = 0; i < nx - 1; i++) { - area1[i] = sum; - ycent1[i] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum; - sum += arraysum[w2 + i] - arraysum[i]; - moment += arraymoment[w2 + i] - arraymoment[i]; - } - area1[nx - 1] = sum; - ycent1[nx - 1] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum; - - /* Find the best match location for pix2. At each location, - * to insure that pixels are ON only within the intersection of - * pix and the shifted pix2: - * (1) Start with pixt cleared and equal in size to pix1. - * (2) Blit the shifted pix2 onto pixt. Then all ON pixels - * are within the intersection of pix1 and the shifted pix2. - * (3) AND pix1 with pixt. */ - pixt = pixCreate(w2, h1, 1); - maxscore = 0; - delx = 0; - dely = 0; /* amount to shift pix2 relative to pix1 to get alignment */ - for (i = 0; i < nx; i++) { - shifty = (l_int32)(ycent1[i] - ycent2 + 0.5); - for (j = -maxyshift; j <= maxyshift; j++) { - pixClearAll(pixt); - pixRasterop(pixt, 0, shifty + j, w2, h2, PIX_SRC, pix2, 0, 0); - pixRasterop(pixt, 0, 0, w2, h1, PIX_SRC & PIX_DST, pix1, i, 0); - pixCountPixels(pixt, &count, tab); - score = (l_float32)count * (l_float32)count / - ((l_float32)area1[i] * (l_float32)area2); - if (score > maxscore) { - maxscore = score; - delx = i; - dely = shifty + j; - } - - if (debugflag > 0) - fpixSetPixel(fpix, i, maxyshift + j, 1000.0 * score); - } - } - - if (debugflag > 0) { - lept_mkdir("lept/recog"); - char buf[128]; - pixt1 = fpixDisplayMaxDynamicRange(fpix); - pixt2 = pixExpandReplicate(pixt1, 5); - snprintf(buf, sizeof(buf), "/tmp/lept/recog/junkbs_%d.png", debugflag); - pixWrite(buf, pixt2, IFF_PNG); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - fpixDestroy(&fpix); - } - - if (pdelx) *pdelx = delx; - if (pdely) *pdely = dely; - if (pscore) *pscore = maxscore; - if (!tab8) LEPT_FREE(tab); - LEPT_FREE(area1); - LEPT_FREE(ycent1); - LEPT_FREE(arraysum); - LEPT_FREE(arraymoment); - pixDestroy(&pixt); - return 0; -} - - -/*------------------------------------------------------------------------* - * Low-level identification * - *------------------------------------------------------------------------*/ -/*! - * \brief recogIdentifyPixa() - * - * \param[in] recog - * \param[in] pixa of 1 bpp images to match - * \param[out] ppixdb [optional] pix showing inputs and best fits - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This should be called by recogIdentifyMuliple(), which
- *          binarizes and splits characters before sending %pixa here.
- *      (2) This calls recogIdentifyPix(), which does the same operation
- *          on each pix in %pixa, and optionally returns the arrays
- *          of results (scores, class index and character string)
- *          for the best correlation match.
- * 
- */ -l_ok -recogIdentifyPixa(L_RECOG *recog, - PIXA *pixa, - PIX **ppixdb) -{ -char *text; -l_int32 i, n, fail, index, depth; -l_float32 score; -PIX *pix1, *pix2, *pix3; -PIXA *pixa1; -L_RCH *rch; - - PROCNAME("recogIdentifyPixa"); - - if (ppixdb) *ppixdb = NULL; - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - /* Run the recognizer on the set of images. This writes - * the text string into each pix in pixa. */ - n = pixaGetCount(pixa); - rchaDestroy(&recog->rcha); - recog->rcha = rchaCreate(); - pixa1 = (ppixdb) ? pixaCreate(n) : NULL; - depth = 1; - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixa, i, L_CLONE); - pix2 = NULL; - fail = FALSE; - if (!ppixdb) - fail = recogIdentifyPix(recog, pix1, NULL); - else - fail = recogIdentifyPix(recog, pix1, &pix2); - if (fail) - recogSkipIdentify(recog); - if ((rch = recog->rch) == NULL) { - L_ERROR("rch not found for char %d\n", procName, i); - pixDestroy(&pix1); - pixDestroy(&pix2); - continue; - } - rchExtract(rch, NULL, NULL, &text, NULL, NULL, NULL, NULL); - pixSetText(pix1, text); - LEPT_FREE(text); - if (ppixdb) { - rchExtract(rch, &index, &score, NULL, NULL, NULL, NULL, NULL); - pix3 = recogShowMatch(recog, pix2, NULL, NULL, index, score); - if (i == 0) depth = pixGetDepth(pix3); - pixaAddPix(pixa1, pix3, L_INSERT); - pixDestroy(&pix2); - } - transferRchToRcha(rch, recog->rcha); - pixDestroy(&pix1); - } - - /* Package the images for debug */ - if (ppixdb) { - *ppixdb = pixaDisplayTiledInRows(pixa1, depth, 2500, 1.0, 0, 20, 1); - pixaDestroy(&pixa1); - } - - return 0; -} - - -/*! - * \brief recogIdentifyPix() - * - * \param[in] recog with LUT's pre-computed - * \param[in] pixs of a single character, 1 bpp - * \param[out] ppixdb [optional] debug pix showing input and best fit - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Basic recognition function for a single character.
- *      (2) If templ_use == L_USE_ALL_TEMPLATES, which is the default
- *          situation, matching is attempted to every bitmap in the recog,
- *          and the identify of the best match is returned.
- *      (3) For finding outliers, templ_use == L_USE_AVERAGE_TEMPLATES, and
- *          matching is only attemplted to the averaged bitmaps.  For this
- *          case, the index of the bestsample is meaningless (0 is returned
- *          if requested).
- *      (4) The score is related to the confidence (probability of correct
- *          identification), in that a higher score is correlated with
- *          a higher probability.  However, the actual relation between
- *          the correlation (score) and the probability is not known;
- *          we call this a "score" because "confidence" can be misinterpreted
- *          as an actual probability.
- * 
- */ -l_ok -recogIdentifyPix(L_RECOG *recog, - PIX *pixs, - PIX **ppixdb) -{ -char *text; -l_int32 i, j, n, bestindex, bestsample, area1, area2; -l_int32 shiftx, shifty, bestdelx, bestdely, bestwidth, maxyshift; -l_float32 x1, y1, x2, y2, delx, dely, score, maxscore; -NUMA *numa; -PIX *pix0, *pix1, *pix2; -PIXA *pixa; -PTA *pta; - - PROCNAME("recogIdentifyPix"); - - if (ppixdb) *ppixdb = NULL; - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - - /* Do the averaging if required and not yet done. */ - if (recog->templ_use == L_USE_AVERAGE_TEMPLATES && !recog->ave_done) { - recogAverageSamples(&recog, 0); - if (!recog) - return ERROR_INT("averaging failed", procName, 1); - } - - /* Binarize and crop to foreground if necessary */ - if ((pix0 = recogProcessToIdentify(recog, pixs, 0)) == NULL) - return ERROR_INT("no fg pixels in pix0", procName, 1); - - /* Optionally scale and/or convert to fixed stroke width */ - pix1 = recogModifyTemplate(recog, pix0); - pixDestroy(&pix0); - if (!pix1) - return ERROR_INT("no fg pixels in pix1", procName, 1); - - /* Do correlation at all positions within +-maxyshift of - * the nominal centroid alignment. */ - pixCountPixels(pix1, &area1, recog->sumtab); - pixCentroid(pix1, recog->centtab, recog->sumtab, &x1, &y1); - bestindex = bestsample = bestdelx = bestdely = bestwidth = 0; - maxscore = 0.0; - maxyshift = recog->maxyshift; - if (recog->templ_use == L_USE_AVERAGE_TEMPLATES) { - for (i = 0; i < recog->setsize; i++) { - numaGetIValue(recog->nasum, i, &area2); - if (area2 == 0) continue; /* no template available */ - pix2 = pixaGetPix(recog->pixa, i, L_CLONE); - ptaGetPt(recog->pta, i, &x2, &y2); - delx = x1 - x2; - dely = y1 - y2; - for (shifty = -maxyshift; shifty <= maxyshift; shifty++) { - for (shiftx = -maxyshift; shiftx <= maxyshift; shiftx++) { - pixCorrelationScoreSimple(pix1, pix2, area1, area2, - delx + shiftx, dely + shifty, - 5, 5, recog->sumtab, &score); - if (score > maxscore) { - bestindex = i; - bestdelx = delx + shiftx; - bestdely = dely + shifty; - maxscore = score; - } - } - } - pixDestroy(&pix2); - } - } else { /* use all the samples */ - for (i = 0; i < recog->setsize; i++) { - pixa = pixaaGetPixa(recog->pixaa, i, L_CLONE); - n = pixaGetCount(pixa); - if (n == 0) { - pixaDestroy(&pixa); - continue; - } - numa = numaaGetNuma(recog->naasum, i, L_CLONE); - pta = ptaaGetPta(recog->ptaa, i, L_CLONE); - for (j = 0; j < n; j++) { - pix2 = pixaGetPix(pixa, j, L_CLONE); - numaGetIValue(numa, j, &area2); - ptaGetPt(pta, j, &x2, &y2); - delx = x1 - x2; - dely = y1 - y2; - for (shifty = -maxyshift; shifty <= maxyshift; shifty++) { - for (shiftx = -maxyshift; shiftx <= maxyshift; shiftx++) { - pixCorrelationScoreSimple(pix1, pix2, area1, area2, - delx + shiftx, dely + shifty, - 5, 5, recog->sumtab, &score); - if (score > maxscore) { - bestindex = i; - bestsample = j; - bestdelx = delx + shiftx; - bestdely = dely + shifty; - maxscore = score; - bestwidth = pixGetWidth(pix2); - } - } - } - pixDestroy(&pix2); - } - pixaDestroy(&pixa); - numaDestroy(&numa); - ptaDestroy(&pta); - } - } - - /* Package up the results */ - recogGetClassString(recog, bestindex, &text); - rchDestroy(&recog->rch); - recog->rch = rchCreate(bestindex, maxscore, text, bestsample, - bestdelx, bestdely, bestwidth); - - if (ppixdb) { - if (recog->templ_use == L_USE_AVERAGE_TEMPLATES) { - L_INFO("Best match: str %s; class %d; sh (%d, %d); score %5.3f\n", - procName, text, bestindex, bestdelx, bestdely, maxscore); - pix2 = pixaGetPix(recog->pixa, bestindex, L_CLONE); - } else { /* L_USE_ALL_TEMPLATES */ - L_INFO("Best match: str %s; sample %d in class %d; score %5.3f\n", - procName, text, bestsample, bestindex, maxscore); - if (maxyshift > 0 && (L_ABS(bestdelx) > 0 || L_ABS(bestdely) > 0)) { - L_INFO(" Best shift: (%d, %d)\n", - procName, bestdelx, bestdely); - } - pix2 = pixaaGetPix(recog->pixaa, bestindex, bestsample, L_CLONE); - } - *ppixdb = recogShowMatch(recog, pix1, pix2, NULL, -1, 0.0); - pixDestroy(&pix2); - } - - pixDestroy(&pix1); - return 0; -} - - -/*! - * \brief recogSkipIdentify() - * - * \param[in] recog - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This just writes a "dummy" result with 0 score and empty
- *          string id into the rch.
- * 
- */ -l_ok -recogSkipIdentify(L_RECOG *recog) -{ - PROCNAME("recogSkipIdentify"); - - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - - /* Package up placeholder results */ - rchDestroy(&recog->rch); - recog->rch = rchCreate(0, 0.0, stringNew(""), 0, 0, 0, 0); - return 0; -} - - -/*------------------------------------------------------------------------* - * Operations for handling identification results * - *------------------------------------------------------------------------*/ -/*! - * \brief rchaCreate() - * - * Return: 0 if OK, 1 on error - * - * Notes: - * (1) Be sure to destroy any existing rcha before assigning this. - */ -static L_RCHA * -rchaCreate() -{ -L_RCHA *rcha; - - rcha = (L_RCHA *)LEPT_CALLOC(1, sizeof(L_RCHA)); - rcha->naindex = numaCreate(0); - rcha->nascore = numaCreate(0); - rcha->satext = sarrayCreate(0); - rcha->nasample = numaCreate(0); - rcha->naxloc = numaCreate(0); - rcha->nayloc = numaCreate(0); - rcha->nawidth = numaCreate(0); - return rcha; -} - - -/*! - * \brief rchaDestroy() - * - * \param[in,out] prcha to be nulled - */ -void -rchaDestroy(L_RCHA **prcha) -{ -L_RCHA *rcha; - - PROCNAME("rchaDestroy"); - - if (prcha == NULL) { - L_WARNING("&rcha is null!\n", procName); - return; - } - if ((rcha = *prcha) == NULL) - return; - - numaDestroy(&rcha->naindex); - numaDestroy(&rcha->nascore); - sarrayDestroy(&rcha->satext); - numaDestroy(&rcha->nasample); - numaDestroy(&rcha->naxloc); - numaDestroy(&rcha->nayloc); - numaDestroy(&rcha->nawidth); - LEPT_FREE(rcha); - *prcha = NULL; - return; -} - - -/*! - * \brief rchCreate() - * - * \param[in] index index of best template - * \param[in] score correlation score of best template - * \param[in] text character string of best template - * \param[in] sample index of best sample; -1 if averages are used - * \param[in] xloc x-location of template: delx + shiftx - * \param[in] yloc y-location of template: dely + shifty - * \param[in] width width of best template - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Be sure to destroy any existing rch before assigning this.
- *      (2) This stores the text string, not a copy of it, so the
- *          caller must not destroy the string.
- * 
- */ -static L_RCH * -rchCreate(l_int32 index, - l_float32 score, - char *text, - l_int32 sample, - l_int32 xloc, - l_int32 yloc, - l_int32 width) -{ -L_RCH *rch; - - rch = (L_RCH *)LEPT_CALLOC(1, sizeof(L_RCH)); - rch->index = index; - rch->score = score; - rch->text = text; - rch->sample = sample; - rch->xloc = xloc; - rch->yloc = yloc; - rch->width = width; - return rch; -} - - -/*! - * \brief rchDestroy() - * - * \param[in,out] prch to be nulled - */ -void -rchDestroy(L_RCH **prch) -{ -L_RCH *rch; - - PROCNAME("rchDestroy"); - - if (prch == NULL) { - L_WARNING("&rch is null!\n", procName); - return; - } - if ((rch = *prch) == NULL) - return; - LEPT_FREE(rch->text); - LEPT_FREE(rch); - *prch = NULL; - return; -} - - -/*! - * \brief rchaExtract() - * - * \param[in] rcha - * \param[out] pnaindex [optional] indices of best templates - * \param[out] pnascore [optional] correl scores of best templates - * \param[out] psatext [optional] character strings of best templates - * \param[out] pnasample [optional] indices of best samples - * \param[out] pnaxloc [optional] x-locations of templates - * \param[out] pnayloc [optional] y-locations of templates - * \param[out] pnawidth [optional] widths of best templates - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This returns clones of the number and string arrays.  They must
- *          be destroyed by the caller.
- * 
- */ -l_ok -rchaExtract(L_RCHA *rcha, - NUMA **pnaindex, - NUMA **pnascore, - SARRAY **psatext, - NUMA **pnasample, - NUMA **pnaxloc, - NUMA **pnayloc, - NUMA **pnawidth) -{ - PROCNAME("rchaExtract"); - - if (pnaindex) *pnaindex = NULL; - if (pnascore) *pnascore = NULL; - if (psatext) *psatext = NULL; - if (pnasample) *pnasample = NULL; - if (pnaxloc) *pnaxloc = NULL; - if (pnayloc) *pnayloc = NULL; - if (pnawidth) *pnawidth = NULL; - if (!rcha) - return ERROR_INT("rcha not defined", procName, 1); - - if (pnaindex) *pnaindex = numaClone(rcha->naindex); - if (pnascore) *pnascore = numaClone(rcha->nascore); - if (psatext) *psatext = sarrayClone(rcha->satext); - if (pnasample) *pnasample = numaClone(rcha->nasample); - if (pnaxloc) *pnaxloc = numaClone(rcha->naxloc); - if (pnayloc) *pnayloc = numaClone(rcha->nayloc); - if (pnawidth) *pnawidth = numaClone(rcha->nawidth); - return 0; -} - - -/*! - * \brief rchExtract() - * - * \param[in] rch - * \param[out] pindex [optional] index of best template - * \param[out] pscore [optional] correlation score of best template - * \param[out] ptext [optional] character string of best template - * \param[out] psample [optional] index of best sample - * \param[out] pxloc [optional] x-location of template - * \param[out] pyloc [optional] y-location of template - * \param[out] pwidth [optional] width of best template - * \return 0 if OK, 1 on error - */ -l_ok -rchExtract(L_RCH *rch, - l_int32 *pindex, - l_float32 *pscore, - char **ptext, - l_int32 *psample, - l_int32 *pxloc, - l_int32 *pyloc, - l_int32 *pwidth) -{ - PROCNAME("rchExtract"); - - if (pindex) *pindex = 0; - if (pscore) *pscore = 0.0; - if (ptext) *ptext = NULL; - if (psample) *psample = 0; - if (pxloc) *pxloc = 0; - if (pyloc) *pyloc = 0; - if (pwidth) *pwidth = 0; - if (!rch) - return ERROR_INT("rch not defined", procName, 1); - - if (pindex) *pindex = rch->index; - if (pscore) *pscore = rch->score; - if (ptext) *ptext = stringNew(rch->text); /* new string: owned by caller */ - if (psample) *psample = rch->sample; - if (pxloc) *pxloc = rch->xloc; - if (pyloc) *pyloc = rch->yloc; - if (pwidth) *pwidth = rch->width; - return 0; -} - - -/*! - * \brief transferRchToRcha() - * - * \param[in] rch source of data - * \param[in] rcha append to arrays in this destination - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is used to transfer the results of a single character
- *          identification to an rcha array for the array of characters.
- * 
- */ -static l_int32 -transferRchToRcha(L_RCH *rch, - L_RCHA *rcha) -{ - - PROCNAME("transferRchToRcha"); - - if (!rch) - return ERROR_INT("rch not defined", procName, 1); - if (!rcha) - return ERROR_INT("rcha not defined", procName, 1); - - numaAddNumber(rcha->naindex, rch->index); - numaAddNumber(rcha->nascore, rch->score); - sarrayAddString(rcha->satext, rch->text, L_COPY); - numaAddNumber(rcha->nasample, rch->sample); - numaAddNumber(rcha->naxloc, rch->xloc); - numaAddNumber(rcha->nayloc, rch->yloc); - numaAddNumber(rcha->nawidth, rch->width); - return 0; -} - - -/*------------------------------------------------------------------------* - * Preprocessing and filtering * - *------------------------------------------------------------------------*/ -/*! - * \brief recogProcessToIdentify() - * - * \param[in] recog with LUT's pre-computed - * \param[in] pixs typ. single character, possibly d > 1 and uncropped - * \param[in] pad extra pixels added to left and right sides - * \return pixd 1 bpp, clipped to foreground, or NULL if there - * are no fg pixels or on error. - * - *
- * Notes:
- *      (1) This is a lightweight operation to insure that the input
- *          image is 1 bpp, properly cropped, and padded on each side.
- *          If bpp > 1, the image is thresholded.
- * 
- */ -PIX * -recogProcessToIdentify(L_RECOG *recog, - PIX *pixs, - l_int32 pad) -{ -l_int32 canclip; -PIX *pix1, *pix2, *pixd; - - PROCNAME("recogProcessToIdentify"); - - if (!recog) - return (PIX *)ERROR_PTR("recog not defined", procName, NULL); - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - if (pixGetDepth(pixs) != 1) - pix1 = pixThresholdToBinary(pixs, recog->threshold); - else - pix1 = pixClone(pixs); - pixTestClipToForeground(pix1, &canclip); - if (canclip) - pixClipToForeground(pix1, &pix2, NULL); - else - pix2 = pixClone(pix1); - pixDestroy(&pix1); - if (!pix2) - return (PIX *)ERROR_PTR("no foreground pixels", procName, NULL); - - pixd = pixAddBorderGeneral(pix2, pad, pad, 0, 0, 0); - pixDestroy(&pix2); - return pixd; -} - - -/*! - * \brief recogPreSplittingFilter() - * - * \param[in] recog - * \param[in] pixs 1 bpp, many connected components - * \param[in] minh minimum height of components to be retained - * \param[in] minaf minimum area fraction (|fg|/(w*h)) to be retained - * \param[in] debug 1 to output indicator arrays - * \return pixd with filtered components removed or NULL on error - */ -static PIX * -recogPreSplittingFilter(L_RECOG *recog, - PIX *pixs, - l_int32 minh, - l_float32 minaf, - l_int32 debug) -{ -l_int32 scaling, minsplitw, maxsplith, maxasp; -BOXA *boxas; -NUMA *naw, *nah, *na1, *na1c, *na2, *na3, *na4, *na5, *na6, *na7; -PIX *pixd; -PIXA *pixas; - - PROCNAME("recogPreSplittingFilter"); - - if (!recog) - return (PIX *)ERROR_PTR("recog not defined", procName, NULL); - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - /* If there is scaling, do not remove components based on the - * values of min_splitw and max_splith. */ - scaling = (recog->scalew > 0 || recog->scaleh > 0) ? TRUE : FALSE; - minsplitw = (scaling) ? 1 : recog->min_splitw - 3; - maxsplith = (scaling) ? 150 : recog->max_splith; - maxasp = recog->max_wh_ratio; - - /* Generate an indicator array of connected components to remove: - * short stuff - * tall stuff - * components with large width/height ratio - * components with small area fill fraction */ - boxas = pixConnComp(pixs, &pixas, 8); - pixaFindDimensions(pixas, &naw, &nah); - na1 = numaMakeThresholdIndicator(naw, minsplitw, L_SELECT_IF_LT); - na1c = numaCopy(na1); - na2 = numaMakeThresholdIndicator(nah, minh, L_SELECT_IF_LT); - na3 = numaMakeThresholdIndicator(nah, maxsplith, L_SELECT_IF_GT); - na4 = pixaFindWidthHeightRatio(pixas); - na5 = numaMakeThresholdIndicator(na4, maxasp, L_SELECT_IF_GT); - na6 = pixaFindAreaFraction(pixas); - na7 = numaMakeThresholdIndicator(na6, minaf, L_SELECT_IF_LT); - numaLogicalOp(na1, na1, na2, L_UNION); - numaLogicalOp(na1, na1, na3, L_UNION); - numaLogicalOp(na1, na1, na5, L_UNION); - numaLogicalOp(na1, na1, na7, L_UNION); - pixd = pixCopy(NULL, pixs); - pixRemoveWithIndicator(pixd, pixas, na1); - if (debug) - l_showIndicatorSplitValues(na1c, na2, na3, na5, na7, na1); - numaDestroy(&naw); - numaDestroy(&nah); - numaDestroy(&na1); - numaDestroy(&na1c); - numaDestroy(&na2); - numaDestroy(&na3); - numaDestroy(&na4); - numaDestroy(&na5); - numaDestroy(&na6); - numaDestroy(&na7); - boxaDestroy(&boxas); - pixaDestroy(&pixas); - return pixd; -} - - -/*! - * \brief recogSplittingFilter() - * - * \param[in] recog - * \param[in] pixs 1 bpp, single connected component - * \param[in] minh minimum height of component; 0 for default - * \param[in] minaf minimum area fraction (|fg|/(w*h)) to be retained - * \param[out] premove 0 to save, 1 to remove - * \param[in] debug 1 to output indicator arrays - * \return 0 if OK, 1 on error - */ -static l_int32 -recogSplittingFilter(L_RECOG *recog, - PIX *pixs, - l_int32 minh, - l_float32 minaf, - l_int32 *premove, - l_int32 debug) -{ -l_int32 w, h; -l_float32 aspratio, fract; - - PROCNAME("recogSplittingFilter"); - - if (!premove) - return ERROR_INT("&remove not defined", procName, 1); - *premove = 0; - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (minh <= 0) minh = DefaultMinHeight; - - /* Remove from further consideration: - * small stuff - * components with large width/height ratio - * components with small area fill fraction */ - pixGetDimensions(pixs, &w, &h, NULL); - if (w < recog->min_splitw) { - if (debug) L_INFO("w = %d < %d\n", procName, w, recog->min_splitw); - *premove = 1; - return 0; - } - if (h < minh) { - if (debug) L_INFO("h = %d < %d\n", procName, h, minh); - *premove = 1; - return 0; - } - aspratio = (l_float32)w / (l_float32)h; - if (aspratio > recog->max_wh_ratio) { - if (debug) L_INFO("w/h = %5.3f too large\n", procName, aspratio); - *premove = 1; - return 0; - } - pixFindAreaFraction(pixs, recog->sumtab, &fract); - if (fract < minaf) { - if (debug) L_INFO("area fill fract %5.3f < %5.3f\n", - procName, fract, minaf); - *premove = 1; - return 0; - } - - return 0; -} - - -/*------------------------------------------------------------------------* - * Postprocessing * - *------------------------------------------------------------------------*/ -/*! - * \brief recogExtractNumbers() - * - * \param[in] recog - * \param[in] boxas location of components - * \param[in] scorethresh min score for which we accept a component - * \param[in] spacethresh max horizontal distance allowed between digits; - * use -1 for default - * \param[out] pbaa [optional] bounding boxes of identified numbers - * \param[out] pnaa [optional] scores of identified digits - * \return sa of identified numbers, or NULL on error - * - *
- * Notes:
- *      (1) This extracts digit data after recogaIdentifyMultiple() or
- *          lower-level identification has taken place.
- *      (2) Each string in the returned sa contains a sequence of ascii
- *          digits in a number.
- *      (3) The horizontal distance between boxes (limited by %spacethresh)
- *          is the negative of the horizontal overlap.
- *      (4) Components with a score less than %scorethresh, which may
- *          be hyphens or other small characters, will signal the
- *          end of the current sequence of digits in the number.  A typical
- *          value for %scorethresh is 0.60.
- *      (5) We allow two digits to be combined if these conditions apply:
- *            (a) the first is to the left of the second
- *            (b) the second has a horizontal separation less than %spacethresh
- *            (c) the vertical overlap >= 0 (vertical separation < 0)
- *            (d) both have a score that exceeds %scorethresh
- *      (6) Each numa in the optionally returned naa contains the digit
- *          scores of a number.  Each boxa in the optionally returned baa
- *          contains the bounding boxes of the digits in the number.
- * 
- */ -SARRAY * -recogExtractNumbers(L_RECOG *recog, - BOXA *boxas, - l_float32 scorethresh, - l_int32 spacethresh, - BOXAA **pbaa, - NUMAA **pnaa) -{ -char *str, *text; -l_int32 i, n, x1, x2, h_ovl, v_ovl, h_sep, v_sep; -l_float32 score; -BOX *box, *prebox; -BOXA *ba; -BOXAA *baa; -NUMA *nascore, *na; -NUMAA *naa; -SARRAY *satext, *sa, *saout; - - PROCNAME("recogExtractNumbers"); - - if (pbaa) *pbaa = NULL; - if (pnaa) *pnaa = NULL; - if (!recog || !recog->rcha) - return (SARRAY *)ERROR_PTR("recog and rcha not both defined", - procName, NULL); - if (!boxas) - return (SARRAY *)ERROR_PTR("boxas not defined", procName, NULL); - - if (spacethresh < 0) - spacethresh = L_MAX(recog->maxheight_u, 20); - rchaExtract(recog->rcha, NULL, &nascore, &satext, NULL, NULL, NULL, NULL); - if (!nascore || !satext) { - numaDestroy(&nascore); - sarrayDestroy(&satext); - return (SARRAY *)ERROR_PTR("nascore and satext not both returned", - procName, NULL); - } - - saout = sarrayCreate(0); - naa = numaaCreate(0); - baa = boxaaCreate(0); - prebox = NULL; - n = numaGetCount(nascore); - for (i = 0; i < n; i++) { - numaGetFValue(nascore, i, &score); - text = sarrayGetString(satext, i, L_NOCOPY); - if (prebox == NULL) { /* no current run */ - if (score < scorethresh) { - continue; - } else { /* start a number run */ - sa = sarrayCreate(0); - ba = boxaCreate(0); - na = numaCreate(0); - sarrayAddString(sa, text, L_COPY); - prebox = boxaGetBox(boxas, i, L_CLONE); - boxaAddBox(ba, prebox, L_COPY); - numaAddNumber(na, score); - } - } else { /* in a current number run */ - box = boxaGetBox(boxas, i, L_CLONE); - boxGetGeometry(prebox, &x1, NULL, NULL, NULL); - boxGetGeometry(box, &x2, NULL, NULL, NULL); - boxOverlapDistance(box, prebox, &h_ovl, &v_ovl); - h_sep = -h_ovl; - v_sep = -v_ovl; - boxDestroy(&prebox); - if (x1 < x2 && h_sep <= spacethresh && - v_sep < 0 && score >= scorethresh) { /* add to number */ - sarrayAddString(sa, text, L_COPY); - boxaAddBox(ba, box, L_COPY); - numaAddNumber(na, score); - prebox = box; - } else { /* save the completed number */ - str = sarrayToString(sa, 0); - sarrayAddString(saout, str, L_INSERT); - sarrayDestroy(&sa); - boxaaAddBoxa(baa, ba, L_INSERT); - numaaAddNuma(naa, na, L_INSERT); - boxDestroy(&box); - if (score >= scorethresh) { /* start a new number */ - i--; - continue; - } - } - } - } - - if (prebox) { /* save the last number */ - str = sarrayToString(sa, 0); - sarrayAddString(saout, str, L_INSERT); - boxaaAddBoxa(baa, ba, L_INSERT); - numaaAddNuma(naa, na, L_INSERT); - sarrayDestroy(&sa); - boxDestroy(&prebox); - } - - numaDestroy(&nascore); - sarrayDestroy(&satext); - if (sarrayGetCount(saout) == 0) { - sarrayDestroy(&saout); - boxaaDestroy(&baa); - numaaDestroy(&naa); - L_INFO("saout has no identified text\n", procName); - return NULL; - } - - if (pbaa) - *pbaa = baa; - else - boxaaDestroy(&baa); - if (pnaa) - *pnaa = naa; - else - numaaDestroy(&naa); - return saout; -} - -/*! - * \brief showExtractNumbers() - * - * \param[in] pixs input 1 bpp image - * \param[in] sa recognized text strings - * \param[in] baa boxa array for location of characters in each string - * \param[in] naa numa array for scores of characters in each string - * \param[out] ppixdb [optional] input pixs with identified chars outlined - * \return pixa of identified strings with text and scores, or NULL on error - * - *
- * Notes:
- *      (1) This is a debugging routine on digit identification; e.g.:
- *            recogIdentifyMultiple(recog, pixs, 0, 1, &boxa, NULL, NULL, 0);
- *            sa = recogExtractNumbers(recog, boxa, 0.8, -1, &baa, &naa);
- *            pixa = showExtractNumbers(pixs, sa, baa, naa, NULL);
- * 
- */ -PIXA * -showExtractNumbers(PIX *pixs, - SARRAY *sa, - BOXAA *baa, - NUMAA *naa, - PIX **ppixdb) -{ -char buf[128]; -char *textstr, *scorestr; -l_int32 i, j, n, nchar, len; -l_float32 score; -L_BMF *bmf; -BOX *box1, *box2; -BOXA *ba; -NUMA *na; -PIX *pix1, *pix2, *pix3, *pix4; -PIXA *pixa; - - PROCNAME("showExtractNumbers"); - - if (ppixdb) *ppixdb = NULL; - if (!pixs) - return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); - if (!sa) - return (PIXA *)ERROR_PTR("sa not defined", procName, NULL); - if (!baa) - return (PIXA *)ERROR_PTR("baa not defined", procName, NULL); - if (!naa) - return (PIXA *)ERROR_PTR("naa not defined", procName, NULL); - - n = sarrayGetCount(sa); - pixa = pixaCreate(n); - bmf = bmfCreate(NULL, 6); - if (ppixdb) *ppixdb = pixConvertTo8(pixs, 1); - for (i = 0; i < n; i++) { - textstr = sarrayGetString(sa, i, L_NOCOPY); - ba = boxaaGetBoxa(baa, i, L_CLONE); - na = numaaGetNuma(naa, i, L_CLONE); - boxaGetExtent(ba, NULL, NULL, &box1); - box2 = boxAdjustSides(NULL, box1, -5, 5, -5, 5); - if (ppixdb) pixRenderBoxArb(*ppixdb, box2, 3, 255, 0, 0); - pix1 = pixClipRectangle(pixs, box1, NULL); - len = strlen(textstr) + 1; - pix2 = pixAddBlackOrWhiteBorder(pix1, 14 * len, 14 * len, - 5, 3, L_SET_WHITE); - pix3 = pixConvertTo8(pix2, 1); - nchar = numaGetCount(na); - scorestr = NULL; - for (j = 0; j < nchar; j++) { - numaGetFValue(na, j, &score); - snprintf(buf, sizeof(buf), "%d", (l_int32)(100 * score)); - stringJoinIP(&scorestr, buf); - if (j < nchar - 1) stringJoinIP(&scorestr, ","); - } - snprintf(buf, sizeof(buf), "%s: %s\n", textstr, scorestr); - pix4 = pixAddTextlines(pix3, bmf, buf, 0xff000000, L_ADD_BELOW); - pixaAddPix(pixa, pix4, L_INSERT); - boxDestroy(&box1); - boxDestroy(&box2); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - boxaDestroy(&ba); - numaDestroy(&na); - LEPT_FREE(scorestr); - } - - bmfDestroy(&bmf); - return pixa; -} - - -/*------------------------------------------------------------------------* - * Static debug helper * - *------------------------------------------------------------------------*/ -/*! - * \brief l_showIndicatorSplitValues() - * - * \param[in] na1, na2, na3, na4, na5, na6 6 indicator array - * - *
- * Notes:
- *      (1) The values indicate that specific criteria has been met
- *          for component removal by pre-splitting filter..
- *          The 'result' line shows which components have been removed.
- * 
- */ -static void -l_showIndicatorSplitValues(NUMA *na1, - NUMA *na2, - NUMA *na3, - NUMA *na4, - NUMA *na5, - NUMA *na6) -{ -l_int32 i, n; - - n = numaGetCount(na1); - lept_stderr("================================================\n"); - lept_stderr("lt minw: "); - for (i = 0; i < n; i++) - lept_stderr("%4d ", (l_int32)na1->array[i]); - lept_stderr("\nlt minh: "); - for (i = 0; i < n; i++) - lept_stderr("%4d ", (l_int32)na2->array[i]); - lept_stderr("\ngt maxh: "); - for (i = 0; i < n; i++) - lept_stderr("%4d ", (l_int32)na3->array[i]); - lept_stderr("\ngt maxasp: "); - for (i = 0; i < n; i++) - lept_stderr("%4d ", (l_int32)na4->array[i]); - lept_stderr("\nlt minaf: "); - for (i = 0; i < n; i++) - lept_stderr("%4d ", (l_int32)na5->array[i]); - lept_stderr("\n------------------------------------------------"); - lept_stderr("\nresult: "); - for (i = 0; i < n; i++) - lept_stderr("%4d ", (l_int32)na6->array[i]); - lept_stderr("\n================================================\n"); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/recogtrain.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/recogtrain.c deleted file mode 100644 index 39f7a76c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/recogtrain.c +++ /dev/null @@ -1,2482 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file recogtrain.c - *
- *
- *      Training on labeled data
- *         l_int32             recogTrainLabeled()
- *         PIX                *recogProcessLabeled()
- *         l_int32             recogAddSample()
- *         PIX                *recogModifyTemplate()
- *         l_int32             recogAverageSamples()
- *         l_int32             pixaAccumulateSamples()
- *         l_int32             recogTrainingFinished()
- *         static l_int32      recogTemplatesAreOK()
- *         PIXA               *recogFilterPixaBySize()
- *         PIXAA              *recogSortPixaByClass()
- *         l_int32             recogRemoveOutliers1()
- *         PIXA               *pixaRemoveOutliers1()
- *         l_int32             recogRemoveOutliers2()
- *         PIXA               *pixaRemoveOutliers2()
- *
- *      Training on unlabeled data
- *         L_RECOG             recogTrainFromBoot()
- *
- *      Padding the digit training set
- *         l_int32             recogPadDigitTrainingSet()
- *         l_int32             recogIsPaddingNeeded()
- *         static SARRAY      *recogAddMissingClassStrings()
- *         PIXA               *recogAddDigitPadTemplates()
- *         static l_int32      recogCharsetAvailable()
- *
- *      Making a boot digit recognizer
- *         L_RECOG            *recogMakeBootDigitRecog()
- *         PIXA               *recogMakeBootDigitTemplates()
- *
- *      Debugging
- *         l_int32             recogShowContent()
- *         l_int32             recogDebugAverages()
- *         l_int32             recogShowAverageTemplates()
- *         static PIX         *pixDisplayOutliers()
- *         PIX                *recogDisplayOutlier()
- *         PIX                *recogShowMatchesInRange()
- *         PIX                *recogShowMatch()
- *
- *  These abbreviations are for the type of template to be used:
- *    * SI (for the scanned images)
- *    * WNL (for width-normalized lines, formed by first skeletonizing
- *           the scanned images, and then dilating to a fixed width)
- *  These abbreviations are for the type of recognizer:
- *    * BAR (book-adapted recognizer; the best type; can do identification
- *           with unscaled images and separation of touching characters.
- *    * BSR (bootstrap recognizer; used if more labeled templates are
- *           required for a BAR, either for finding more templates from
- *           the book, or making a hybrid BAR/BSR.
- *
- *  The recog struct typically holds two versions of the input templates
- *  (e.g. from a pixa) that were used to generate it.  One version is
- *  the unscaled input templates.  The other version is the one that
- *  will be used by the recog to identify unlabeled data.  That version
- *  depends on the input parameters when the recog is created.  The choices
- *  for the latter version, and their suggested use, are:
- *  (1) unscaled SI -- typical for BAR, generated from book images
- *  (2) unscaled WNL -- ditto
- *  (3) scaled SI -- typical for recognizers containing template
- *      images from sources other than the book to be recognized
- *  (4) scaled WNL -- ditto
- *  For cases (3) and (4), we recommend scaling to fixed height; e.g.,
- *  scalew = 0, scaleh = 40.
- *  When using WNL, we recommend using a width of 5 in the template
- *  and 4 in the unlabeled data.
- *  It appears that better results for a BAR are usually obtained using
- *  SI than WNL, but more experimentation is needed.
- *
- *  This utility is designed to build recognizers that are specifically
- *  adapted from a large amount of material, such as a book.  These
- *  use labeled templates taken from the material, and not scaled.
- *  In addition, two special recognizers are useful:
- *  (1) Bootstrap recognizer (BSR).  This uses height-scaled templates,
- *      that have been extended with several repetitions in one of two ways:
- *      (a) aniotropic width scaling (for either SI or WNL)
- *      (b) iterative erosions/dilations (for SI).
- *  (2) Outlier removal.  This uses height scaled templates.  It can be
- *      implemented without using templates that are aligned averages of all
- *      templates in a class.
- *
- *  Recognizers are inexpensive to generate, for example, from a pixa
- *  of labeled templates.  The general process of building a BAR is
- *  to start with labeled templates, e.g., in a pixa, make a BAR, and
- *  analyze new samples from the book to augment the BAR until it has
- *  enough samples for each character class.  Along the way, samples
- *  from a BSR may be added for help in training.  If not enough samples
- *  are available for the BAR, it can finally be augmented with BSR
- *  samples, in which case the resulting hybrid BAR/BSR recognizer
- *  must work on scaled images.
- *
- *  Here are the steps in doing recog training:
- *  A. Generate a BAR from any existing labeled templates
- *    (1) Create a recog and add the templates, using recogAddSample().
- *        This stores the unscaled templates.
- *        [Note: this can be done in one step if the labeled templates are put
- *         into a pixa:
- *           L_Recog *rec = recogCreateFromPixa(pixa, ...);  ]
- *    (2) Call recogTrainingFinished() to generate the (sometimes modified)
- *        templates to be used for correlation.
- *    (3) Optionally, remove outliers.
- *    If there are sufficient samples in the classes, we're done. Otherwise,
- *  B. Try to get more samples from the book to pad the BAR.
- *     (1) Save the unscaled, labeled templates from the BAR.
- *     (2) Supplement the BAR with bootstrap templates to make a hybrid BAR/BSR.
- *     (3) Do recognition on more unlabeled images, scaled to a fixed height
- *     (4) Add the unscaled, labeled images to the saved set.
- *     (5) Optionally, remove outliers.
- *     If there are sufficient samples in the classes, we're done. Otherwise,
- *  C. For classes without a sufficient number of templates, we can
- *     supplement the BAR with templates from a BSR (a hybrid RAR/BSR),
- *     and do recognition scaled to a fixed height.
- *
- *  Here are several methods that can be used for identifying outliers:
- *  (1) Compute average templates for each class and remove a candidate
- *      that is poorly correlated with the average.  This is the most
- *      simple method.  recogRemoveOutliers1() uses this, supplemented with
- *      a second threshold and a target number of templates to be saved.
- *  (2) Compute average templates for each class and remove a candidate
- *      that is more highly correlated with the average of some other class.
- *      This does not require setting a threshold for the correlation.
- *      recogRemoveOutliers2() uses this method, supplemented with a minimum
- *      correlation score.
- *  (3) For each candidate, find the average correlation with other
- *      members of its class, and remove those that have a relatively
- *      low average correlation.  This is similar to (1), gives comparable
- *      results and because it does not use average templates, it requires
- *      a bit more computation.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Static functions */ -static l_int32 recogTemplatesAreOK(L_RECOG *recog, l_int32 minsize, - l_float32 minfract, l_int32 *pok); -static SARRAY *recogAddMissingClassStrings(L_RECOG *recog); -static l_int32 recogCharsetAvailable(l_int32 type); -static PIX *pixDisplayOutliers(PIXA *pixas, NUMA *nas); -static PIX *recogDisplayOutlier(L_RECOG *recog, l_int32 iclass, l_int32 jsamp, - l_int32 maxclass, l_float32 maxscore); - - /* Default parameters that are used in recogTemplatesAreOK() and - * in outlier removal functions, and that use template set size - * to decide if the set of templates (before outliers are removed) - * is valid. Values are set to accept most sets of sample templates. */ -static const l_int32 DefaultMinSetSize = 1; /* minimum number of - samples for a valid class */ -static const l_float32 DefaultMinSetFract = 0.4; /* minimum fraction - of classes required for a valid recog */ - - /* Defaults in pixaRemoveOutliers1() and pixaRemoveOutliers2() */ -static const l_float32 DefaultMinScore = 0.75; /* keep everything above */ -static const l_int32 DefaultMinTarget = 3; /* to be kept if possible */ -static const l_float32 LowerScoreThreshold = 0.5; /* templates can be - * kept down to this score to if needed to retain the - * desired minimum number of templates */ - - -/*------------------------------------------------------------------------* - * Training * - *------------------------------------------------------------------------*/ -/*! - * \brief recogTrainLabeled() - * - * \param[in] recog in training mode - * \param[in] pixs if depth > 1, will be thresholded to 1 bpp - * \param[in] box [optional] cropping box - * \param[in] text [optional] if null, use text field in pix - * \param[in] debug 1 to display images of samples not captured - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Training is restricted to the addition of a single
- *          character in an arbitrary (e.g., UTF8) charset
- *      (2) If box != null, it should represent the location in %pixs
- *          of the character image.
- * 
- */ -l_ok -recogTrainLabeled(L_RECOG *recog, - PIX *pixs, - BOX *box, - char *text, - l_int32 debug) -{ -l_int32 ret; -PIX *pix; - - PROCNAME("recogTrainLabeled"); - - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - /* Prepare the sample to be added. This step also acts - * as a filter, and can invalidate pixs as a template. */ - ret = recogProcessLabeled(recog, pixs, box, text, &pix); - if (ret) { - pixDestroy(&pix); - L_WARNING("failure to get sample '%s' for training\n", procName, - text); - return 1; - } - - recogAddSample(recog, pix, debug); - pixDestroy(&pix); - return 0; -} - - -/*! - * \brief recogProcessLabeled() - * - * \param[in] recog in training mode - * \param[in] pixs if depth > 1, will be thresholded to 1 bpp - * \param[in] box [optional] cropping box - * \param[in] text [optional] if null, use text field in pix - * \param[out] ppix addr of pix, 1 bpp, labeled - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This crops and binarizes the input image, generating a pix
- *          of one character where the charval is inserted into the pix.
- * 
- */ -l_ok -recogProcessLabeled(L_RECOG *recog, - PIX *pixs, - BOX *box, - char *text, - PIX **ppix) -{ -char *textdata; -l_int32 textinpix, textin, nsets; -NUMA *na; -PIX *pix1, *pix2, *pix3, *pix4; - - PROCNAME("recogProcessLabeled"); - - if (!ppix) - return ERROR_INT("&pix not defined", procName, 1); - *ppix = NULL; - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - /* Find the text; this will be stored with the output images */ - textin = text && (text[0] != '\0'); - textinpix = (pixs->text && (pixs->text[0] != '\0')); - if (!textin && !textinpix) { - L_ERROR("no text: %d\n", procName, recog->num_samples); - return 1; - } - textdata = (textin) ? text : pixs->text; /* do not free */ - - /* Crop and binarize if necessary */ - if (box) - pix1 = pixClipRectangle(pixs, box, NULL); - else - pix1 = pixClone(pixs); - if (pixGetDepth(pix1) > 1) - pix2 = pixConvertTo1(pix1, recog->threshold); - else - pix2 = pixClone(pix1); - pixDestroy(&pix1); - - /* Remove isolated noise, using as a criterion all components - * that are removed by a vertical opening of size 5. */ - pix3 = pixMorphSequence(pix2, "o1.5", 0); /* seed */ - pixSeedfillBinary(pix3, pix3, pix2, 8); /* fill from seed; clip to pix2 */ - pixDestroy(&pix2); - - /* Clip to foreground */ - pixClipToForeground(pix3, &pix4, NULL); - pixDestroy(&pix3); - if (!pix4) - return ERROR_INT("pix4 is empty", procName, 1); - - /* Verify that if there is more than 1 c.c., they all have - * horizontal overlap */ - na = pixCountByColumn(pix4, NULL); - numaCountNonzeroRuns(na, &nsets); - numaDestroy(&na); - if (nsets > 1) { - L_WARNING("found %d sets of horiz separated c.c.; skipping\n", - procName, nsets); - pixDestroy(&pix4); - return 1; - } - - pixSetText(pix4, textdata); - *ppix = pix4; - return 0; -} - - -/*! - * \brief recogAddSample() - * - * \param[in] recog - * \param[in] pix a single character, 1 bpp - * \param[in] debug - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The pix is 1 bpp, with the character string label embedded.
- *      (2) The pixaa_u array of the recog is initialized to accept
- *          up to 256 different classes.  When training is finished,
- *          the arrays are truncated to the actual number of classes.
- *          To pad an existing recog from the boot recognizers, training
- *          is started again; if samples from a new class are added,
- *          the pixaa_u array is extended by adding a pixa to hold them.
- * 
- */ -l_ok -recogAddSample(L_RECOG *recog, - PIX *pix, - l_int32 debug) -{ -char *text; -l_int32 npa, charint, index; -PIXA *pixa1; -PIXAA *paa; - - PROCNAME("recogAddSample"); - - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if (!pix || pixGetDepth(pix) != 1) - return ERROR_INT("pix not defined or not 1 bpp\n", procName, 1); - if (recog->train_done) - return ERROR_INT("not added: training has been completed", procName, 1); - paa = recog->pixaa_u; - - /* Make sure the character is in the set */ - text = pixGetText(pix); - if (l_convertCharstrToInt(text, &charint) == 1) { - L_ERROR("invalid text: %s\n", procName, text); - return 1; - } - - /* Determine the class array index. Check if the class - * alreadly exists, and if not, add it. */ - if (recogGetClassIndex(recog, charint, text, &index) == 1) { - /* New class must be added */ - npa = pixaaGetCount(paa, NULL); - if (index > npa) { - L_ERROR("oops: bad index %d > npa %d!!\n", procName, index, npa); - return 1; - } - if (index == npa) { /* paa needs to be extended */ - L_INFO("Adding new class and pixa: index = %d, text = %s\n", - procName, index, text); - pixa1 = pixaCreate(10); - pixaaAddPixa(paa, pixa1, L_INSERT); - } - } - if (debug) { - L_INFO("Identified text label: %s\n", procName, text); - L_INFO("Identified: charint = %d, index = %d\n", - procName, charint, index); - } - - /* Insert the unscaled character image into the right pixa. - * (Unscaled images are required to split touching characters.) */ - recog->num_samples++; - pixaaAddPix(paa, index, pix, NULL, L_COPY); - return 0; -} - - -/*! - * \brief recogModifyTemplate() - * - * \param[in] recog - * \param[in] pixs 1 bpp, to be optionally scaled and turned into - * strokes of fixed width - * \return pixd modified pix if OK, NULL on error - */ -PIX * -recogModifyTemplate(L_RECOG *recog, - PIX *pixs) -{ -l_int32 w, h, empty; -PIX *pix1, *pix2; - - PROCNAME("recogModifyTemplate"); - - if (!recog) - return (PIX *)ERROR_PTR("recog not defined", procName, NULL); - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - /* Scale first */ - pixGetDimensions(pixs, &w, &h, NULL); - if ((recog->scalew == 0 || recog->scalew == w) && - (recog->scaleh == 0 || recog->scaleh == h)) { /* no scaling */ - pix1 = pixCopy(NULL, pixs); - } else { - pix1 = pixScaleToSize(pixs, recog->scalew, recog->scaleh); - } - if (!pix1) - return (PIX *)ERROR_PTR("pix1 not made", procName, NULL); - - /* Then optionally convert to lines */ - if (recog->linew <= 0) { - pix2 = pixClone(pix1); - } else { - pix2 = pixSetStrokeWidth(pix1, recog->linew, 1, 8); - } - pixDestroy(&pix1); - if (!pix2) - return (PIX *)ERROR_PTR("pix2 not made", procName, NULL); - - /* Make sure we still have some pixels */ - pixZero(pix2, &empty); - if (empty) { - pixDestroy(&pix2); - return (PIX *)ERROR_PTR("modified template has no pixels", - procName, NULL); - } - return pix2; -} - - -/*! - * \brief recogAverageSamples() - * - * \param[in] precog addr of existing recog; may be destroyed - * \param[in] debug - * \return 0 on success, 1 on failure - * - *
- * Notes:
- *      (1) This is only called in two situations:
- *          (a) When splitting characters using either the DID method
- *              recogDecode() or the the greedy splitter
- *              recogCorrelationBestRow()
- *          (b) By a special recognizer that is used to remove outliers.
- *          Both unscaled and scaled inputs are averaged.
- *      (2) If the data in any class is nonexistent (no samples), or
- *          very bad (no fg pixels in the average), or if the ratio
- *          of max/min average unscaled class template heights is
- *          greater than max_ht_ratio, this destroys the recog.
- *          The caller must check the return value of the recog.
- *      (3) Set debug = 1 to view the resulting templates and their centroids.
- * 
- */ -l_int32 -recogAverageSamples(L_RECOG **precog, - l_int32 debug) -{ -l_int32 i, nsamp, size, area, bx, by, badclass; -l_float32 x, y, hratio; -BOX *box; -PIXA *pixa1; -PIX *pix1, *pix2, *pix3; -PTA *pta1; -L_RECOG *recog; - - PROCNAME("recogAverageSamples"); - - if (!precog) - return ERROR_INT("&recog not defined", procName, 1); - if ((recog = *precog) == NULL) - return ERROR_INT("recog not defined", procName, 1); - - if (recog->ave_done) { - if (debug) /* always do this if requested */ - recogShowAverageTemplates(recog); - return 0; - } - - /* Remove any previous averaging data */ - size = recog->setsize; - pixaDestroy(&recog->pixa_u); - ptaDestroy(&recog->pta_u); - numaDestroy(&recog->nasum_u); - recog->pixa_u = pixaCreate(size); - recog->pta_u = ptaCreate(size); - recog->nasum_u = numaCreate(size); - - pixaDestroy(&recog->pixa); - ptaDestroy(&recog->pta); - numaDestroy(&recog->nasum); - recog->pixa = pixaCreate(size); - recog->pta = ptaCreate(size); - recog->nasum = numaCreate(size); - - /* Unscaled bitmaps: compute averaged bitmap, centroid, and fg area. - * Note that when we threshold to 1 bpp the 8 bpp averaged template - * that is returned from the accumulator, it will not be cropped - * to the foreground. We must crop it, because the correlator - * makes that assumption and will return a zero value if the - * width or height of the two images differs by several pixels. - * But cropping to fg can cause the value of the centroid to - * change, if bx > 0 or by > 0. */ - badclass = FALSE; - for (i = 0; i < size; i++) { - pixa1 = pixaaGetPixa(recog->pixaa_u, i, L_CLONE); - pta1 = ptaaGetPta(recog->ptaa_u, i, L_CLONE); - nsamp = pixaGetCount(pixa1); - nsamp = L_MIN(nsamp, 256); /* we only use the first 256 */ - if (nsamp == 0) { /* no information for this class */ - L_ERROR("no samples in class %d\n", procName, i); - badclass = TRUE; - pixaDestroy(&pixa1); - ptaDestroy(&pta1); - break; - } else { - pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y); - pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2)); - pixInvert(pix2, pix2); - pixClipToForeground(pix2, &pix3, &box); - if (!box) { - L_ERROR("no fg pixels in average for uclass %d\n", procName, i); - badclass = TRUE; - pixDestroy(&pix1); - pixDestroy(&pix2); - pixaDestroy(&pixa1); - ptaDestroy(&pta1); - break; - } else { - boxGetGeometry(box, &bx, &by, NULL, NULL); - pixaAddPix(recog->pixa_u, pix3, L_INSERT); - ptaAddPt(recog->pta_u, x - bx, y - by); /* correct centroid */ - pixCountPixels(pix3, &area, recog->sumtab); - numaAddNumber(recog->nasum_u, area); /* foreground */ - boxDestroy(&box); - } - pixDestroy(&pix1); - pixDestroy(&pix2); - } - pixaDestroy(&pixa1); - ptaDestroy(&pta1); - } - - /* Are any classes bad? If so, destroy the recog and return an error */ - if (badclass) { - recogDestroy(precog); - return ERROR_INT("at least 1 bad class; destroying recog", procName, 1); - } - - /* Get the range of sizes of the unscaled average templates. - * Reject if the height ratio is too large. */ - pixaSizeRange(recog->pixa_u, &recog->minwidth_u, &recog->minheight_u, - &recog->maxwidth_u, &recog->maxheight_u); - hratio = (l_float32)recog->maxheight_u / (l_float32)recog->minheight_u; - if (hratio > recog->max_ht_ratio) { - L_ERROR("ratio of max/min height of average templates = %4.1f;" - " destroying recog\n", procName, hratio); - recogDestroy(precog); - return 1; - } - - /* Scaled bitmaps: compute averaged bitmap, centroid, and fg area */ - for (i = 0; i < size; i++) { - pixa1 = pixaaGetPixa(recog->pixaa, i, L_CLONE); - pta1 = ptaaGetPta(recog->ptaa, i, L_CLONE); - nsamp = pixaGetCount(pixa1); - nsamp = L_MIN(nsamp, 256); /* we only use the first 256 */ - pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y); - pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2)); - pixInvert(pix2, pix2); - pixClipToForeground(pix2, &pix3, &box); - if (!box) { - L_ERROR("no fg pixels in average for sclass %d\n", procName, i); - badclass = TRUE; - pixDestroy(&pix1); - pixDestroy(&pix2); - pixaDestroy(&pixa1); - ptaDestroy(&pta1); - break; - } else { - boxGetGeometry(box, &bx, &by, NULL, NULL); - pixaAddPix(recog->pixa, pix3, L_INSERT); - ptaAddPt(recog->pta, x - bx, y - by); /* correct centroid */ - pixCountPixels(pix3, &area, recog->sumtab); - numaAddNumber(recog->nasum, area); /* foreground */ - boxDestroy(&box); - } - pixDestroy(&pix1); - pixDestroy(&pix2); - pixaDestroy(&pixa1); - ptaDestroy(&pta1); - } - - if (badclass) { - recogDestroy(precog); - return ERROR_INT("at least 1 bad class; destroying recog", procName, 1); - } - - /* Get the range of widths of the scaled average templates */ - pixaSizeRange(recog->pixa, &recog->minwidth, NULL, &recog->maxwidth, NULL); - - /* Get dimensions useful for splitting */ - recog->min_splitw = L_MAX(5, recog->minwidth_u - 5); - recog->max_splith = recog->maxheight_u + 12; /* allow for skew */ - - if (debug) - recogShowAverageTemplates(recog); - - recog->ave_done = TRUE; - return 0; -} - - -/*! - * \brief pixaAccumulateSamples() - * - * \param[in] pixa of samples from the same class, 1 bpp - * \param[in] pta [optional] of centroids of the samples - * \param[out] ppixd accumulated samples, 8 bpp - * \param[out] px [optional] average x coordinate of centroids - * \param[out] py [optional] average y coordinate of centroids - * \return 0 on success, 1 on failure - * - *
- * Notes:
- *      (1) This generates an aligned (by centroid) sum of the input pix.
- *      (2) We use only the first 256 samples; that's plenty.
- *      (3) If pta is not input, we generate two tables, and discard
- *          after use.  If this is called many times, it is better
- *          to precompute the pta.
- * 
- */ -l_int32 -pixaAccumulateSamples(PIXA *pixa, - PTA *pta, - PIX **ppixd, - l_float32 *px, - l_float32 *py) -{ -l_int32 i, n, maxw, maxh, xdiff, ydiff; -l_int32 *centtab, *sumtab; -l_float32 xc, yc, xave, yave; -PIX *pix1, *pix2, *pixsum; -PTA *ptac; - - PROCNAME("pixaAccumulateSamples"); - - if (px) *px = 0; - if (py) *py = 0; - if (!ppixd) - return ERROR_INT("&pixd not defined", procName, 1); - *ppixd = NULL; - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - n = pixaGetCount(pixa); - if (pta && ptaGetCount(pta) != n) - return ERROR_INT("pta count differs from pixa count", procName, 1); - n = L_MIN(n, 256); /* take the first 256 only */ - if (n == 0) - return ERROR_INT("pixa array empty", procName, 1); - - /* Find the centroids */ - if (pta) { - ptac = ptaClone(pta); - } else { /* generate them here */ - ptac = ptaCreate(n); - centtab = makePixelCentroidTab8(); - sumtab = makePixelSumTab8(); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixa, i, L_CLONE); - pixCentroid(pix1, centtab, sumtab, &xc, &yc); - ptaAddPt(ptac, xc, yc); - } - LEPT_FREE(centtab); - LEPT_FREE(sumtab); - } - - /* Find the average value of the centroids */ - xave = yave = 0; - for (i = 0; i < n; i++) { - ptaGetPt(pta, i, &xc, &yc); - xave += xc; - yave += yc; - } - xave = xave / (l_float32)n; - yave = yave / (l_float32)n; - if (px) *px = xave; - if (py) *py = yave; - - /* Place all pix with their centroids located at the average - * centroid value, and sum the results. Make the accumulator - * image slightly larger than the largest sample to insure - * that all pixels are represented in the accumulator. */ - pixaSizeRange(pixa, NULL, NULL, &maxw, &maxh); - pixsum = pixInitAccumulate(maxw + 5, maxh + 5, 0); - pix1 = pixCreate(maxw, maxh, 1); - for (i = 0; i < n; i++) { - pix2 = pixaGetPix(pixa, i, L_CLONE); - ptaGetPt(ptac, i, &xc, &yc); - xdiff = (l_int32)(xave - xc); - ydiff = (l_int32)(yave - yc); - pixClearAll(pix1); - pixRasterop(pix1, xdiff, ydiff, maxw, maxh, PIX_SRC, - pix2, 0, 0); - pixAccumulate(pixsum, pix1, L_ARITH_ADD); - pixDestroy(&pix2); - } - *ppixd = pixFinalAccumulate(pixsum, 0, 8); - - pixDestroy(&pix1); - pixDestroy(&pixsum); - ptaDestroy(&ptac); - return 0; -} - - -/*! - * \brief recogTrainingFinished() - * - * \param[in] precog addr of recog - * \param[in] modifyflag 1 to use recogModifyTemplate(); 0 otherwise - * \param[in] minsize set to -1 for default - * \param[in] minfract set to -1.0 for default - * \return 0 if OK, 1 on error (input recog will be destroyed) - * - *
- * Notes:
- *      (1) This must be called after all training samples have been added.
- *      (2) If the templates are not good enough, the recog input is destroyed.
- *      (3) Usually, %modifyflag == 1, because we want to apply
- *          recogModifyTemplate() to generate the actual templates
- *          that will be used.  The one exception is when reading a
- *          serialized recog: there we want to put the same set of
- *          templates in both the unscaled and modified pixaa.
- *          See recogReadStream() to see why we do this.
- *      (4) See recogTemplatesAreOK() for %minsize and %minfract usage.
- *      (5) The following things are done here:
- *          (a) Allocate (or reallocate) storage for (possibly) modified
- *              bitmaps, centroids, and fg areas.
- *          (b) Generate the (possibly) modified bitmaps.
- *          (c) Compute centroid and fg area data for both unscaled and
- *              modified bitmaps.
- *          (d) Truncate the pixaa, ptaa and numaa arrays down from
- *              256 to the actual size.
- *      (6) Putting these operations here makes it simple to recompute
- *          the recog with different modifications on the bitmaps.
- *      (7) Call recogShowContent() to display the templates, both
- *          unscaled and modified.
- * 
- */ -l_ok -recogTrainingFinished(L_RECOG **precog, - l_int32 modifyflag, - l_int32 minsize, - l_float32 minfract) -{ -l_int32 ok, i, j, size, nc, ns, area; -l_float32 xave, yave; -PIX *pix, *pixd; -PIXA *pixa; -PIXAA *paa; -PTA *pta; -PTAA *ptaa; -L_RECOG *recog; - - PROCNAME("recogTrainingFinished"); - - if (!precog) - return ERROR_INT("&recog not defined", procName, 1); - if ((recog = *precog) == NULL) - return ERROR_INT("recog not defined", procName, 1); - if (recog->train_done) return 0; - - /* Test the input templates */ - recogTemplatesAreOK(recog, minsize, minfract, &ok); - if (!ok) { - recogDestroy(precog); - return ERROR_INT("bad templates", procName, 1); - } - - /* Generate the storage for the possibly-scaled training bitmaps */ - size = recog->maxarraysize; - paa = pixaaCreate(size); - pixa = pixaCreate(1); - pixaaInitFull(paa, pixa); - pixaDestroy(&pixa); - pixaaDestroy(&recog->pixaa); - recog->pixaa = paa; - - /* Generate the storage for the unscaled centroid training data */ - ptaa = ptaaCreate(size); - pta = ptaCreate(0); - ptaaInitFull(ptaa, pta); - ptaaDestroy(&recog->ptaa_u); - recog->ptaa_u = ptaa; - - /* Generate the storage for the possibly-scaled centroid data */ - ptaa = ptaaCreate(size); - ptaaInitFull(ptaa, pta); - ptaDestroy(&pta); - ptaaDestroy(&recog->ptaa); - recog->ptaa = ptaa; - - /* Generate the storage for the fg area data */ - numaaDestroy(&recog->naasum_u); - numaaDestroy(&recog->naasum); - recog->naasum_u = numaaCreateFull(size, 0); - recog->naasum = numaaCreateFull(size, 0); - - paa = recog->pixaa_u; - nc = recog->setsize; - for (i = 0; i < nc; i++) { - pixa = pixaaGetPixa(paa, i, L_CLONE); - ns = pixaGetCount(pixa); - for (j = 0; j < ns; j++) { - /* Save centroid and area data for the unscaled pix */ - pix = pixaGetPix(pixa, j, L_CLONE); - pixCentroid(pix, recog->centtab, recog->sumtab, &xave, &yave); - ptaaAddPt(recog->ptaa_u, i, xave, yave); - pixCountPixels(pix, &area, recog->sumtab); - numaaAddNumber(recog->naasum_u, i, area); /* foreground */ - - /* Insert the (optionally) scaled character image, and - * save centroid and area data for it */ - if (modifyflag == 1) - pixd = recogModifyTemplate(recog, pix); - else - pixd = pixClone(pix); - if (pixd) { - pixaaAddPix(recog->pixaa, i, pixd, NULL, L_INSERT); - pixCentroid(pixd, recog->centtab, recog->sumtab, &xave, &yave); - ptaaAddPt(recog->ptaa, i, xave, yave); - pixCountPixels(pixd, &area, recog->sumtab); - numaaAddNumber(recog->naasum, i, area); - } else { - L_ERROR("failed: modified template for class %d, sample %d\n", - procName, i, j); - } - pixDestroy(&pix); - } - pixaDestroy(&pixa); - } - - /* Truncate the arrays to those with non-empty containers */ - pixaaTruncate(recog->pixaa_u); - pixaaTruncate(recog->pixaa); - ptaaTruncate(recog->ptaa_u); - ptaaTruncate(recog->ptaa); - numaaTruncate(recog->naasum_u); - numaaTruncate(recog->naasum); - - recog->train_done = TRUE; - return 0; -} - - -/*! - * \brief recogTemplatesAreOK() - * - * \param[in] recog - * \param[in] minsize set to -1 for default - * \param[in] minfract set to -1.0 for default - * \param[out] pok set to 1 if template set is valid; 0 otherwise - * \return 1 on error; 0 otherwise. An invalid template set is not an error. - * - *
- * Notes:
- *      (1) This is called by recogTrainingFinished().  A return value of 0
- *          will cause recogTrainingFinished() to destroy the recog.
- *      (2) %minsize is the minimum number of samples required for
- *          the class; -1 uses the default
- *      (3) %minfract is the minimum fraction of classes required for
- *          the recog to be usable; -1.0 uses the default
- * 
- */ -static l_int32 -recogTemplatesAreOK(L_RECOG *recog, - l_int32 minsize, - l_float32 minfract, - l_int32 *pok) -{ -l_int32 i, n, validsets, nt; -l_float32 ratio; -NUMA *na; - - PROCNAME("recogTemplatesAreOK"); - - if (!pok) - return ERROR_INT("&ok not defined", procName, 1); - *pok = 0; - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - - minsize = (minsize < 0) ? DefaultMinSetSize : minsize; - minfract = (minfract < 0) ? DefaultMinSetFract : minfract; - n = pixaaGetCount(recog->pixaa_u, &na); - validsets = 0; - for (i = 0, validsets = 0; i < n; i++) { - numaGetIValue(na, i, &nt); - if (nt >= minsize) - validsets++; - } - numaDestroy(&na); - ratio = (l_float32)validsets / (l_float32)recog->charset_size; - *pok = (ratio >= minfract) ? 1 : 0; - return 0; -} - - -/*! - * \brief recogFilterPixaBySize() - * - * \param[in] pixas labeled templates - * \param[in] setsize size of character set (number of classes) - * \param[in] maxkeep max number of templates to keep in a class - * \param[in] max_ht_ratio max allowed height ratio (see below) - * \param[out] pna [optional] debug output, giving the number - * in each class after filtering; use NULL to skip - * \return pixa filtered templates, or NULL on error - * - *
- * Notes:
- *      (1) The basic assumption is that the most common and larger
- *          templates in each class are more likely to represent the
- *          characters we are interested in.  For example, larger digits
- *          are more likely to represent page numbers, and smaller digits
- *          could be data in tables.  Therefore, we bias the first
- *          stage of filtering toward the larger characters by removing
- *          very small ones, and select based on proximity of the
- *          remaining characters to median height.
- *      (2) For each of the %setsize classes, order the templates
- *          increasingly by height.  Take the rank 0.9 height.  Eliminate
- *          all templates that are shorter by more than %max_ht_ratio.
- *          Of the remaining ones, select up to %maxkeep that are closest
- *          in rank order height to the median template.
- * 
- */ -PIXA * -recogFilterPixaBySize(PIXA *pixas, - l_int32 setsize, - l_int32 maxkeep, - l_float32 max_ht_ratio, - NUMA **pna) -{ -l_int32 i, j, h90, hj, j1, j2, j90, n, nc; -l_float32 ratio; -NUMA *na; -PIXA *pixa1, *pixa2, *pixa3, *pixa4, *pixa5; -PIXAA *paa; - - PROCNAME("recogFilterPixaBySize"); - - if (pna) *pna = NULL; - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - - if ((paa = recogSortPixaByClass(pixas, setsize)) == NULL) - return (PIXA *)ERROR_PTR("paa not made", procName, NULL); - nc = pixaaGetCount(paa, NULL); - na = (pna) ? numaCreate(0) : NULL; - if (pna) *pna = na; - pixa5 = pixaCreate(0); - for (i = 0; i < nc; i++) { - pixa1 = pixaaGetPixa(paa, i, L_CLONE); - if ((n = pixaGetCount(pixa1)) == 0) { - pixaDestroy(&pixa1); - continue; - } - pixa2 = pixaSort(pixa1, L_SORT_BY_HEIGHT, L_SORT_INCREASING, NULL, - L_COPY); - j90 = (l_int32)(0.9 * n); - pixaGetPixDimensions(pixa2, j90, NULL, &h90, NULL); - pixa3 = pixaCreate(n); - for (j = 0; j < n; j++) { - pixaGetPixDimensions(pixa2, j, NULL, &hj, NULL); - ratio = (l_float32)h90 / (l_float32)hj; - if (ratio <= max_ht_ratio) - pixaAddPix(pixa3, pixaGetPix(pixa2, j, L_COPY), L_INSERT); - } - n = pixaGetCount(pixa3); - if (n <= maxkeep) { - pixa4 = pixaCopy(pixa3, L_CLONE); - } else { - j1 = (n - maxkeep) / 2; - j2 = j1 + maxkeep - 1; - pixa4 = pixaSelectRange(pixa3, j1, j2, L_CLONE); - } - if (na) numaAddNumber(na, pixaGetCount(pixa4)); - pixaJoin(pixa5, pixa4, 0, -1); - pixaDestroy(&pixa1); - pixaDestroy(&pixa2); - pixaDestroy(&pixa3); - pixaDestroy(&pixa4); - } - - pixaaDestroy(&paa); - return pixa5; -} - - -/*! - * \brief recogSortPixaByClass() - * - * \param[in] pixa labeled templates - * \param[in] setsize size of character set (number of classes) - * \return paa pixaa where each pixa has templates for one class, - * or null on error - */ -PIXAA * -recogSortPixaByClass(PIXA *pixa, - l_int32 setsize) -{ -PIXAA *paa; -L_RECOG *recog; - - PROCNAME("recogSortPixaByClass"); - - if (!pixa) - return (PIXAA *)ERROR_PTR("pixa not defined", procName, NULL); - - if ((recog = recogCreateFromPixaNoFinish(pixa, 0, 0, 0, 0, 0)) == NULL) - return (PIXAA *)ERROR_PTR("recog not made", procName, NULL); - paa = recog->pixaa_u; /* grab the paa of unscaled templates */ - recog->pixaa_u = NULL; - recogDestroy(&recog); - return paa; -} - - -/*! - * \brief recogRemoveOutliers1() - * - * \param[in] precog addr of recog with unscaled labeled templates - * \param[in] minscore keep everything with at least this score - * \param[in] mintarget minimum desired number to retain if possible - * \param[in] minsize minimum number of samples required for a class - * \param[out] ppixsave [optional debug] saved templates, with scores - * \param[out] ppixrem [optional debug] removed templates, with scores - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) This is a convenience wrapper when using default parameters
- *          for the recog.  See pixaRemoveOutliers1() for details.
- *      (2) If this succeeds, the new recog replaces the input recog;
- *          if it fails, the input recog is destroyed.
- * 
- */ -l_ok -recogRemoveOutliers1(L_RECOG **precog, - l_float32 minscore, - l_int32 mintarget, - l_int32 minsize, - PIX **ppixsave, - PIX **ppixrem) -{ -PIXA *pixa1, *pixa2; -L_RECOG *recog; - - PROCNAME("recogRemoveOutliers1"); - - if (!precog) - return ERROR_INT("&recog not defined", procName, 1); - if (*precog == NULL) - return ERROR_INT("recog not defined", procName, 1); - - /* Extract the unscaled templates */ - pixa1 = recogExtractPixa(*precog); - recogDestroy(precog); - - pixa2 = pixaRemoveOutliers1(pixa1, minscore, mintarget, minsize, - ppixsave, ppixrem); - pixaDestroy(&pixa1); - if (!pixa2) - return ERROR_INT("failure to remove outliers", procName, 1); - - recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1); - pixaDestroy(&pixa2); - if (!recog) - return ERROR_INT("failure to make recog from pixa sans outliers", - procName, 1); - - *precog = recog; - return 0; -} - - -/*! - * \brief pixaRemoveOutliers1() - * - * \param[in] pixas unscaled labeled templates - * \param[in] minscore keep everything with at least this score; - * use -1.0 for default. - * \param[in] mintarget minimum desired number to retain if possible; - * use -1 for default. - * \param[in] minsize minimum number of samples required for a class; - * use -1 for default. - * \param[out] ppixsave [optional debug] saved templates, with scores - * \param[out] ppixrem [optional debug] removed templates, with scores - * \return pixa of unscaled templates to be kept, or NULL on error - * - *
- * Notes:
- *      (1) Removing outliers is particularly important when recognition
- *          goes against all the samples in the training set, as opposed
- *          to the averages for each class.  The reason is that we get
- *          an identification error if a mislabeled template is a best
- *          match for an input sample.
- *      (2) Because the score values depend strongly on the quality
- *          of the character images, to avoid losing too many samples
- *          we supplement a minimum score for retention with a score
- *          necessary to acquire the minimum target number of templates.
- *          To do this we are willing to use a lower threshold,
- *          LowerScoreThreshold, on the score.  Consequently, with
- *          poor quality templates, we may keep samples with a score
- *          less than %minscore, but never less than LowerScoreThreshold.
- *          And if the number of samples is less than %minsize, we do
- *          not use any.
- *      (3) This is meant to be used on a BAR, where the templates all
- *          come from the same book; use minscore ~0.75.
- *      (4) Method: make a scaled recog from the input %pixas.  Then,
- *          for each class: generate the averages, match each
- *          scaled template against the average, and save unscaled
- *          templates that had a sufficiently good match.
- * 
- */ -PIXA * -pixaRemoveOutliers1(PIXA *pixas, - l_float32 minscore, - l_int32 mintarget, - l_int32 minsize, - PIX **ppixsave, - PIX **ppixrem) -{ -l_int32 i, j, debug, n, area1, area2; -l_float32 x1, y1, x2, y2, minfract, score, rankscore, threshscore; -NUMA *nasum, *narem, *nasave, *nascore; -PIX *pix1, *pix2; -PIXA *pixa, *pixarem, *pixad; -PTA *pta; -L_RECOG *recog; - - PROCNAME("pixaRemoveOutliers1"); - - if (ppixsave) *ppixsave = NULL; - if (ppixrem) *ppixrem = NULL; - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - minscore = L_MIN(minscore, 1.0); - if (minscore <= 0.0) - minscore = DefaultMinScore; - mintarget = L_MIN(mintarget, 3); - if (mintarget <= 0) - mintarget = DefaultMinTarget; - if (minsize < 0) - minsize = DefaultMinSetSize; - - /* Make a special height-scaled recognizer with average templates */ - debug = (ppixsave || ppixrem) ? 1 : 0; - recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1); - if (!recog) - return (PIXA *)ERROR_PTR("bad pixas; recog not made", procName, NULL); - recogAverageSamples(&recog, debug); - if (!recog) - return (PIXA *)ERROR_PTR("bad templates", procName, NULL); - - nasave = (ppixsave) ? numaCreate(0) : NULL; - pixarem = (ppixrem) ? pixaCreate(0) : NULL; - narem = (ppixrem) ? numaCreate(0) : NULL; - - pixad = pixaCreate(0); - for (i = 0; i < recog->setsize; i++) { - /* Access the average template and values for scaled - * images in this class */ - pix1 = pixaGetPix(recog->pixa, i, L_CLONE); - ptaGetPt(recog->pta, i, &x1, &y1); - numaGetIValue(recog->nasum, i, &area1); - - /* Get the scores for each sample in the class */ - pixa = pixaaGetPixa(recog->pixaa, i, L_CLONE); - pta = ptaaGetPta(recog->ptaa, i, L_CLONE); /* centroids */ - nasum = numaaGetNuma(recog->naasum, i, L_CLONE); /* fg areas */ - n = pixaGetCount(pixa); - nascore = numaCreate(n); - for (j = 0; j < n; j++) { - pix2 = pixaGetPix(pixa, j, L_CLONE); - ptaGetPt(pta, j, &x2, &y2); /* centroid average */ - numaGetIValue(nasum, j, &area2); /* fg sum average */ - pixCorrelationScoreSimple(pix1, pix2, area1, area2, - x1 - x2, y1 - y2, 5, 5, - recog->sumtab, &score); - numaAddNumber(nascore, score); - if (debug && score == 0.0) /* typ. large size difference */ - lept_stderr("Got 0 score for i = %d, j = %d\n", i, j); - pixDestroy(&pix2); - } - pixDestroy(&pix1); - - /* Find the rankscore, corresponding to the 1.0 - minfract. - * To attempt to maintain the minfract of templates, use as a - * cutoff the minimum of minscore and the rank score. However, - * no template is saved with an actual score less than - * that at least one template is kept. */ - minfract = (l_float32)mintarget / (l_float32)n; - numaGetRankValue(nascore, 1.0 - minfract, NULL, 0, &rankscore); - threshscore = L_MAX(LowerScoreThreshold, - L_MIN(minscore, rankscore)); - if (debug) { - L_INFO("minscore = %4.2f, rankscore = %4.2f, threshscore = %4.2f\n", - procName, minscore, rankscore, threshscore); - } - - /* Save templates that are at or above threshold. - * Toss any classes with less than %minsize templates. */ - for (j = 0; j < n; j++) { - numaGetFValue(nascore, j, &score); - pix1 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY); - if (score >= threshscore && n >= minsize) { - pixaAddPix(pixad, pix1, L_INSERT); - if (nasave) numaAddNumber(nasave, score); - } else if (debug) { - pixaAddPix(pixarem, pix1, L_INSERT); - numaAddNumber(narem, score); - } else { - pixDestroy(&pix1); - } - } - - pixaDestroy(&pixa); - ptaDestroy(&pta); - numaDestroy(&nasum); - numaDestroy(&nascore); - } - - if (ppixsave) { - *ppixsave = pixDisplayOutliers(pixad, nasave); - numaDestroy(&nasave); - } - if (ppixrem) { - *ppixrem = pixDisplayOutliers(pixarem, narem); - pixaDestroy(&pixarem); - numaDestroy(&narem); - } - recogDestroy(&recog); - return pixad; -} - - -/*! - * \brief recogRemoveOutliers2() - * - * \param[in] precog addr of recog with unscaled labeled templates - * \param[in] minscore keep everything with at least this score - * \param[in] minsize minimum number of samples required for a class - * \param[out] ppixsave [optional debug] saved templates, with scores - * \param[out] ppixrem [optional debug] removed templates, with scores - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) This is a convenience wrapper when using default parameters
- *          for the recog.  See pixaRemoveOutliers2() for details.
- *      (2) If this succeeds, the new recog replaces the input recog;
- *          if it fails, the input recog is destroyed.
- * 
- */ -l_ok -recogRemoveOutliers2(L_RECOG **precog, - l_float32 minscore, - l_int32 minsize, - PIX **ppixsave, - PIX **ppixrem) -{ -PIXA *pixa1, *pixa2; -L_RECOG *recog; - - PROCNAME("recogRemoveOutliers2"); - - if (!precog) - return ERROR_INT("&recog not defined", procName, 1); - if (*precog == NULL) - return ERROR_INT("recog not defined", procName, 1); - - /* Extract the unscaled templates */ - pixa1 = recogExtractPixa(*precog); - recogDestroy(precog); - - pixa2 = pixaRemoveOutliers2(pixa1, minscore, minsize, ppixsave, ppixrem); - pixaDestroy(&pixa1); - if (!pixa2) - return ERROR_INT("failure to remove outliers", procName, 1); - - recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1); - pixaDestroy(&pixa2); - if (!recog) - return ERROR_INT("failure to make recog from pixa sans outliers", - procName, 1); - - *precog = recog; - return 0; -} - - -/*! - * \brief pixaRemoveOutliers2() - * - * \param[in] pixas unscaled labeled templates - * \param[in] minscore keep everything with at least this score; - * use -1.0 for default. - * \param[in] minsize minimum number of samples required for a class; - * use -1 for default. - * \param[out] ppixsave [optional debug] saved templates, with scores - * \param[out] ppixrem [optional debug] removed templates, with scores - * \return pixa of unscaled templates to be kept, or NULL on error - * - *
- * Notes:
- *      (1) Removing outliers is particularly important when recognition
- *          goes against all the samples in the training set, as opposed
- *          to the averages for each class.  The reason is that we get
- *          an identification error if a mislabeled template is a best
- *          match for an input sample.
- *      (2) This method compares each template against the average templates
- *          of each class, and discards any template that has a higher
- *          correlation to a class different from its own.  It also
- *          sets a lower bound on correlation scores with its class average.
- *      (3) This is meant to be used on a BAR, where the templates all
- *          come from the same book; use minscore ~0.75.
- * 
- */ -PIXA * -pixaRemoveOutliers2(PIXA *pixas, - l_float32 minscore, - l_int32 minsize, - PIX **ppixsave, - PIX **ppixrem) -{ -l_int32 i, j, k, n, area1, area2, maxk, debug; -l_float32 x1, y1, x2, y2, score, maxscore; -NUMA *nan, *nascore, *nasave; -PIX *pix1, *pix2, *pix3; -PIXA *pixarem, *pixad; -L_RECOG *recog; - - PROCNAME("pixaRemoveOutliers2"); - - if (ppixsave) *ppixsave = NULL; - if (ppixrem) *ppixrem = NULL; - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - minscore = L_MIN(minscore, 1.0); - if (minscore <= 0.0) - minscore = DefaultMinScore; - if (minsize < 0) - minsize = DefaultMinSetSize; - - /* Make a special height-scaled recognizer with average templates */ - debug = (ppixsave || ppixrem) ? 1 : 0; - recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1); - if (!recog) - return (PIXA *)ERROR_PTR("bad pixas; recog not made", procName, NULL); - recogAverageSamples(&recog, debug); - if (!recog) - return (PIXA *)ERROR_PTR("bad templates", procName, NULL); - - nasave = (ppixsave) ? numaCreate(0) : NULL; - pixarem = (ppixrem) ? pixaCreate(0) : NULL; - - pixad = pixaCreate(0); - pixaaGetCount(recog->pixaa, &nan); /* number of templates in each class */ - for (i = 0; i < recog->setsize; i++) { - /* Get the scores for each sample in the class, when comparing - * with averages from all the classes. */ - numaGetIValue(nan, i, &n); - for (j = 0; j < n; j++) { - pix1 = pixaaGetPix(recog->pixaa, i, j, L_CLONE); - ptaaGetPt(recog->ptaa, i, j, &x1, &y1); /* centroid */ - numaaGetValue(recog->naasum, i, j, NULL, &area1); /* fg sum */ - nascore = numaCreate(n); - for (k = 0; k < recog->setsize; k++) { /* average templates */ - pix2 = pixaGetPix(recog->pixa, k, L_CLONE); - ptaGetPt(recog->pta, k, &x2, &y2); /* average centroid */ - numaGetIValue(recog->nasum, k, &area2); /* average fg sum */ - pixCorrelationScoreSimple(pix1, pix2, area1, area2, - x1 - x2, y1 - y2, 5, 5, - recog->sumtab, &score); - numaAddNumber(nascore, score); - pixDestroy(&pix2); - } - - /* Save templates that are in the correct class and - * at or above threshold. Toss any classes with less - * than %minsize templates. */ - numaGetMax(nascore, &maxscore, &maxk); - if (maxk == i && maxscore >= minscore && n >= minsize) { - /* save it */ - pix3 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY); - pixaAddPix(pixad, pix3, L_INSERT); - if (nasave) numaAddNumber(nasave, maxscore); - } else if (ppixrem) { /* outlier */ - pix3 = recogDisplayOutlier(recog, i, j, maxk, maxscore); - pixaAddPix(pixarem, pix3, L_INSERT); - } - numaDestroy(&nascore); - pixDestroy(&pix1); - } - } - - if (ppixsave) { - *ppixsave = pixDisplayOutliers(pixad, nasave); - numaDestroy(&nasave); - } - if (ppixrem) { - *ppixrem = pixaDisplayTiledInRows(pixarem, 32, 1500, 1.0, 0, 20, 2); - pixaDestroy(&pixarem); - } - - numaDestroy(&nan); - recogDestroy(&recog); - return pixad; -} - - -/*------------------------------------------------------------------------* - * Training on unlabeled data * - *------------------------------------------------------------------------*/ -/*! - * \brief recogTrainFromBoot() - * - * \param[in] recogboot labeled boot recognizer - * \param[in] pixas set of unlabeled input characters - * \param[in] minscore min score for accepting the example; e.g., 0.75 - * \param[in] threshold for binarization, if needed - * \param[in] debug 1 for debug output saved to recogboot; 0 otherwise - * \return pixad labeled version of input pixas, trained on a BSR, - * or NULL on error - * - *
- * Notes:
- *      (1) This takes %pixas of unscaled single characters and %recboot,
- *          a bootstrep recognizer (BSR) that has been set up with parameters
- *            * scaleh: scale all templates to this height
- *            * linew: width of normalized strokes, or 0 if using
- *              the input image
- *          It modifies the pix in %pixas accordingly and correlates
- *          with the templates in the BSR.  It returns those input
- *          images in %pixas whose best correlation with the BSR is at
- *          or above %minscore.  The returned pix have added text labels
- *          for the text string of the class to which the best
- *          correlated template belongs.
- *      (2) Identification occurs in scaled mode (typically with h = 40),
- *          optionally using a width-normalized line images derived
- *          from those in %pixas.
- * 
- */ -PIXA * -recogTrainFromBoot(L_RECOG *recogboot, - PIXA *pixas, - l_float32 minscore, - l_int32 threshold, - l_int32 debug) -{ -char *text; -l_int32 i, n, same, maxd, scaleh, linew; -l_float32 score; -PIX *pix1, *pix2, *pixdb; -PIXA *pixa1, *pixa2, *pixa3, *pixad; - - PROCNAME("recogTrainFromBoot"); - - if (!recogboot) - return (PIXA *)ERROR_PTR("recogboot not defined", procName, NULL); - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - - /* Make sure all input pix are 1 bpp */ - if ((n = pixaGetCount(pixas)) == 0) - return (PIXA *)ERROR_PTR("no pix in pixa", procName, NULL); - pixaVerifyDepth(pixas, &same, &maxd); - if (maxd == 1) { - pixa1 = pixaCopy(pixas, L_COPY); - } else { - pixa1 = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - pix2 = pixConvertTo1(pix1, threshold); - pixaAddPix(pixa1, pix2, L_INSERT); - pixDestroy(&pix1); - } - } - - /* Scale the input images to match the BSR */ - scaleh = recogboot->scaleh; - linew = recogboot->linew; - pixa2 = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixa1, i, L_CLONE); - pix2 = pixScaleToSize(pix1, 0, scaleh); - pixaAddPix(pixa2, pix2, L_INSERT); - pixDestroy(&pix1); - } - pixaDestroy(&pixa1); - - /* Optionally convert to width-normalized line */ - if (linew > 0) - pixa3 = pixaSetStrokeWidth(pixa2, linew, 4, 8); - else - pixa3 = pixaCopy(pixa2, L_CLONE); - pixaDestroy(&pixa2); - - /* Identify using recogboot */ - n = pixaGetCount(pixa3); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixa3, i, L_COPY); - pixSetText(pix1, NULL); /* remove any existing text or labelling */ - if (!debug) { - recogIdentifyPix(recogboot, pix1, NULL); - } else { - recogIdentifyPix(recogboot, pix1, &pixdb); - pixaAddPix(recogboot->pixadb_boot, pixdb, L_INSERT); - } - rchExtract(recogboot->rch, NULL, &score, &text, NULL, NULL, NULL, NULL); - if (score >= minscore) { - pix2 = pixaGetPix(pixas, i, L_COPY); - pixSetText(pix2, text); - pixaAddPix(pixad, pix2, L_INSERT); - pixaAddPix(recogboot->pixadb_boot, pixdb, L_COPY); - } - LEPT_FREE(text); - pixDestroy(&pix1); - } - pixaDestroy(&pixa3); - - return pixad; -} - - -/*------------------------------------------------------------------------* - * Padding the digit training set * - *------------------------------------------------------------------------*/ -/*! - * \brief recogPadDigitTrainingSet() - * - * \param[in,out] precog trained; if padding is needed, it is replaced - * by a a new padded recog - * \param[in] scaleh must be > 0; suggest ~40. - * \param[in] linew use 0 for original scanned images - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is a no-op if padding is not needed.  However,
- *          if it is, this replaces the input recog with a new recog,
- *          padded appropriately with templates from a boot recognizer,
- *          and set up with correlation templates derived from
- *          %scaleh and %linew.
- * 
- */ -l_ok -recogPadDigitTrainingSet(L_RECOG **precog, - l_int32 scaleh, - l_int32 linew) -{ -PIXA *pixa; -L_RECOG *recog1, *recog2; -SARRAY *sa; - - PROCNAME("recogPadDigitTrainingSet"); - - if (!precog) - return ERROR_INT("&recog not defined", procName, 1); - recog1 = *precog; - - recogIsPaddingNeeded(recog1, &sa); - if (!sa) return 0; - - /* Get a new pixa with the padding templates added */ - pixa = recogAddDigitPadTemplates(recog1, sa); - sarrayDestroy(&sa); - if (!pixa) - return ERROR_INT("pixa not made", procName, 1); - - /* Need to use templates that are scaled to a fixed height. */ - if (scaleh <= 0) { - L_WARNING("templates must be scaled to fixed height; using %d\n", - procName, 40); - scaleh = 40; - } - - /* Create a hybrid recog, composed of templates from both - * the original and bootstrap sources. */ - recog2 = recogCreateFromPixa(pixa, 0, scaleh, linew, recog1->threshold, - recog1->maxyshift); - pixaDestroy(&pixa); - recogDestroy(precog); - *precog = recog2; - return 0; -} - - -/*! - * \brief recogIsPaddingNeeded() - * - * \param[in] recog trained - * \param[out] psa addr of returned string containing text value - * \return 1 on error; 0 if OK, whether or not additional padding - * templates are required. - * - *
- * Notes:
- *      (1) This returns a string array in &sa containing character values
- *          for which extra templates are needed; this sarray is
- *          used by recogGetPadTemplates().  It returns NULL
- *          if no padding templates are needed.
- * 
- */ -l_int32 -recogIsPaddingNeeded(L_RECOG *recog, - SARRAY **psa) -{ -char *str; -l_int32 i, nt, min_nopad, nclass, allclasses; -l_float32 minval; -NUMA *naclass; -SARRAY *sa; - - PROCNAME("recogIsPaddingNeeded"); - - if (!psa) - return ERROR_INT("&sa not defined", procName, 1); - *psa = NULL; - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - - /* Do we have samples from all classes? */ - nclass = pixaaGetCount(recog->pixaa_u, &naclass); /* unscaled bitmaps */ - allclasses = (nclass == recog->charset_size) ? 1 : 0; - - /* Are there enough samples in each class already? */ - min_nopad = recog->min_nopad; - numaGetMin(naclass, &minval, NULL); - if (allclasses && (minval >= min_nopad)) { - numaDestroy(&naclass); - return 0; - } - - /* Are any classes not represented? */ - sa = recogAddMissingClassStrings(recog); - *psa = sa; - - /* Are any other classes under-represented? */ - for (i = 0; i < nclass; i++) { - numaGetIValue(naclass, i, &nt); - if (nt < min_nopad) { - str = sarrayGetString(recog->sa_text, i, L_COPY); - sarrayAddString(sa, str, L_INSERT); - } - } - numaDestroy(&naclass); - return 0; -} - - -/*! - * \brief recogAddMissingClassStrings() - * - * \param[in] recog trained - * \return sa of class string missing in %recog, or NULL on error - * - *
- * Notes:
- *      (1) This returns an empty %sa if there is at least one template
- *          in each class in %recog.
- * 
- */ -static SARRAY * -recogAddMissingClassStrings(L_RECOG *recog) -{ -char *text; -char str[4]; -l_int32 i, nclass, index, ival; -NUMA *na; -SARRAY *sa; - - PROCNAME("recogAddMissingClassStrings"); - - if (!recog) - return (SARRAY *)ERROR_PTR("recog not defined", procName, NULL); - - /* Only handling digits */ - nclass = pixaaGetCount(recog->pixaa_u, NULL); /* unscaled bitmaps */ - if (recog->charset_type != 1 || nclass == 10) - return sarrayCreate(0); /* empty */ - - /* Make an indicator array for missing classes */ - na = numaCreate(0); - sa = sarrayCreate(0); - for (i = 0; i < recog->charset_size; i++) - numaAddNumber(na, 1); - for (i = 0; i < nclass; i++) { - text = sarrayGetString(recog->sa_text, i, L_NOCOPY); - index = text[0] - '0'; - numaSetValue(na, index, 0); - } - - /* Convert to string and add to output */ - for (i = 0; i < nclass; i++) { - numaGetIValue(na, i, &ival); - if (ival == 1) { - str[0] = '0' + i; - str[1] = '\0'; - sarrayAddString(sa, str, L_COPY); - } - } - numaDestroy(&na); - return sa; -} - - -/*! - * \brief recogAddDigitPadTemplates() - * - * \param[in] recog trained - * \param[in] sa set of text strings that need to be padded - * \return pixa of all templates from %recog and the additional pad - * templates from a boot recognizer; or NULL on error - * - *
- * Notes:
- *      (1) Call recogIsPaddingNeeded() first, which returns %sa of
- *          template text strings for classes where more templates
- *          are needed.
- * 
- */ -PIXA * -recogAddDigitPadTemplates(L_RECOG *recog, - SARRAY *sa) -{ -char *str, *text; -l_int32 i, j, n, nt; -PIX *pix; -PIXA *pixa1, *pixa2; - - PROCNAME("recogAddDigitPadTemplates"); - - if (!recog) - return (PIXA *)ERROR_PTR("recog not defined", procName, NULL); - if (!sa) - return (PIXA *)ERROR_PTR("sa not defined", procName, NULL); - if (recogCharsetAvailable(recog->charset_type) == FALSE) - return (PIXA *)ERROR_PTR("boot charset not available", procName, NULL); - - /* Make boot recog templates */ - pixa1 = recogMakeBootDigitTemplates(0, 0); - n = pixaGetCount(pixa1); - - /* Extract the unscaled templates from %recog */ - pixa2 = recogExtractPixa(recog); - - /* Add selected boot recog templates based on the text strings in sa */ - nt = sarrayGetCount(sa); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa1, i, L_CLONE); - text = pixGetText(pix); - for (j = 0; j < nt; j++) { - str = sarrayGetString(sa, j, L_NOCOPY); - if (!strcmp(text, str)) { - pixaAddPix(pixa2, pix, L_COPY); - break; - } - } - pixDestroy(&pix); - } - - pixaDestroy(&pixa1); - return pixa2; -} - - -/*! - * \brief recogCharsetAvailable() - * - * \param[in] type of charset for padding - * \return 1 if available; 0 if not. - */ -static l_int32 -recogCharsetAvailable(l_int32 type) -{ -l_int32 ret; - - PROCNAME("recogCharsetAvailable"); - - switch (type) - { - case L_ARABIC_NUMERALS: - ret = TRUE; - break; - case L_LC_ROMAN_NUMERALS: - case L_UC_ROMAN_NUMERALS: - case L_LC_ALPHA: - case L_UC_ALPHA: - L_INFO("charset type %d not available\n", procName, type); - ret = FALSE; - break; - default: - L_INFO("charset type %d is unknown\n", procName, type); - ret = FALSE; - break; - } - - return ret; -} - - -/*------------------------------------------------------------------------* - * Making a boot digit recognizer * - *------------------------------------------------------------------------*/ -/*! - * \brief recogMakeBootDigitRecog() - * - * \param[in] nsamp number of samples of each digit; or 0 - * \param[in] scaleh scale all heights to this; typ. use 40 - * \param[in] linew normalized line width; typ. use 5; 0 to skip - * \param[in] maxyshift from nominal centroid alignment; typically 0 or 1 - * \param[in] debug 1 for showing templates; 0 otherwise - * \return recog, or NULL on error - * - *
- * Notes:
- *     (1) This takes a set of pre-computed, labeled pixa of single
- *         digits, and generates a recognizer from them.
- *         The templates used in the recognizer can be modified by:
- *         - scaling (isotropically to fixed height)
- *         - generating a skeleton and thickening so that all strokes
- *           have the same width.
- *     (2) The resulting templates are scaled versions of either the
- *         input bitmaps or images with fixed line widths.  To use the
- *         input bitmaps, set %linew = 0; otherwise, set %linew to the
- *         desired line width.
- *     (3) If %nsamp == 0, this uses and extends the output from
- *         three boot generators:
- *            l_bootnum_gen1, l_bootnum_gen2, l_bootnum_gen3.
- *         Otherwise, it uses exactly %nsamp templates of each digit,
- *         extracted by l_bootnum_gen4.
- * 
- */ -L_RECOG * -recogMakeBootDigitRecog(l_int32 nsamp, - l_int32 scaleh, - l_int32 linew, - l_int32 maxyshift, - l_int32 debug) - -{ -PIXA *pixa; -L_RECOG *recog; - - /* Get the templates, extended by horizontal scaling */ - pixa = recogMakeBootDigitTemplates(nsamp, debug); - - /* Make the boot recog; recogModifyTemplate() will scale the - * templates and optionally turn them into strokes of fixed width. */ - recog = recogCreateFromPixa(pixa, 0, scaleh, linew, 128, maxyshift); - pixaDestroy(&pixa); - if (debug) - recogShowContent(stderr, recog, 0, 1); - - return recog; -} - - -/*! - * \brief recogMakeBootDigitTemplates() - * - * \param[in] nsamp number of samples of each digit; or 0 - * \param[in] debug 1 for display of templates - * \return pixa of templates; or NULL on error - * - *
- * Notes:
- *     (1) See recogMakeBootDigitRecog().
- * 
- */ -PIXA * -recogMakeBootDigitTemplates(l_int32 nsamp, - l_int32 debug) -{ -NUMA *na1; -PIX *pix1, *pix2, *pix3; -PIXA *pixa1, *pixa2, *pixa3; - - if (nsamp > 0) { - pixa1 = l_bootnum_gen4(nsamp); - if (debug) { - pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 10, - 2, 6, 0xff000000); - pixDisplay(pix1, 0, 0); - pixDestroy(&pix1); - } - return pixa1; - } - - /* Else, generate from 3 pixa */ - pixa1 = l_bootnum_gen1(); - pixa2 = l_bootnum_gen2(); - pixa3 = l_bootnum_gen3(); - if (debug) { - pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 10, 2, 6, 0xff000000); - pix2 = pixaDisplayTiledWithText(pixa2, 1500, 1.0, 10, 2, 6, 0xff000000); - pix3 = pixaDisplayTiledWithText(pixa3, 1500, 1.0, 10, 2, 6, 0xff000000); - pixDisplay(pix1, 0, 0); - pixDisplay(pix2, 600, 0); - pixDisplay(pix3, 1200, 0); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - } - pixaJoin(pixa1, pixa2, 0, -1); - pixaJoin(pixa1, pixa3, 0, -1); - pixaDestroy(&pixa2); - pixaDestroy(&pixa3); - - /* Extend by horizontal scaling */ - na1 = numaCreate(4); - numaAddNumber(na1, 0.9); - numaAddNumber(na1, 1.1); - numaAddNumber(na1, 1.2); - pixa2 = pixaExtendByScaling(pixa1, na1, L_HORIZ, 1); - - pixaDestroy(&pixa1); - numaDestroy(&na1); - return pixa2; -} - - -/*------------------------------------------------------------------------* - * Debugging * - *------------------------------------------------------------------------*/ -/*! - * \brief recogShowContent() - * - * \param[in] fp file stream - * \param[in] recog - * \param[in] index for naming of output files of template images - * \param[in] display 1 for showing template images; 0 otherwise - * \return 0 if OK, 1 on error - */ -l_ok -recogShowContent(FILE *fp, - L_RECOG *recog, - l_int32 index, - l_int32 display) -{ -char buf[128]; -l_int32 i, val, count; -PIX *pix; -NUMA *na; - - PROCNAME("recogShowContent"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - - fprintf(fp, "Debug print of recog contents\n"); - fprintf(fp, " Setsize: %d\n", recog->setsize); - fprintf(fp, " Binarization threshold: %d\n", recog->threshold); - fprintf(fp, " Maximum matching y-jiggle: %d\n", recog->maxyshift); - if (recog->linew <= 0) - fprintf(fp, " Using image templates for matching\n"); - else - fprintf(fp, " Using templates with fixed line width for matching\n"); - if (recog->scalew == 0) - fprintf(fp, " No width scaling of templates\n"); - else - fprintf(fp, " Template width scaled to %d\n", recog->scalew); - if (recog->scaleh == 0) - fprintf(fp, " No height scaling of templates\n"); - else - fprintf(fp, " Template height scaled to %d\n", recog->scaleh); - fprintf(fp, " Number of samples in each class:\n"); - pixaaGetCount(recog->pixaa_u, &na); - for (i = 0; i < recog->setsize; i++) { - l_dnaGetIValue(recog->dna_tochar, i, &val); - numaGetIValue(na, i, &count); - if (val < 128) - fprintf(fp, " class %d, char %c: %d\n", i, val, count); - else - fprintf(fp, " class %d, val %d: %d\n", i, val, count); - } - numaDestroy(&na); - - if (display) { - lept_mkdir("lept/recog"); - pix = pixaaDisplayByPixa(recog->pixaa_u, 50, 1.0, 20, 20, 0); - snprintf(buf, sizeof(buf), "/tmp/lept/recog/templates_u.%d.png", index); - pixWriteDebug(buf, pix, IFF_PNG); - pixDisplay(pix, 0, 200 * index); - pixDestroy(&pix); - if (recog->train_done) { - pix = pixaaDisplayByPixa(recog->pixaa, 50, 1.0, 20, 20, 0); - snprintf(buf, sizeof(buf), - "/tmp/lept/recog/templates.%d.png", index); - pixWriteDebug(buf, pix, IFF_PNG); - pixDisplay(pix, 800, 200 * index); - pixDestroy(&pix); - } - } - return 0; -} - - -/*! - * \brief recogDebugAverages() - * - * \param[in] precog addr of recog - * \param[in] debug 0 no output; 1 for images; 2 for text; 3 for both - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Generates an image that pairs each of the input images used
- *          in training with the average template that it is best
- *          correlated to.  This is written into the recog.
- *      (2) It also generates pixa_tr of all the input training images,
- *          which can be used, e.g., in recogShowMatchesInRange().
- *      (3) Destroys the recog if the averaging function finds any bad classes.
- * 
- */ -l_ok -recogDebugAverages(L_RECOG **precog, - l_int32 debug) -{ -l_int32 i, j, n, np, index; -l_float32 score; -PIX *pix1, *pix2, *pix3; -PIXA *pixa, *pixat; -PIXAA *paa1, *paa2; -L_RECOG *recog; - - PROCNAME("recogDebugAverages"); - - if (!precog) - return ERROR_INT("&recog not defined", procName, 1); - if ((recog = *precog) == NULL) - return ERROR_INT("recog not defined", procName, 1); - - /* Mark the training as finished if necessary, and make sure - * that the average templates have been built. */ - recogAverageSamples(&recog, 0); - if (!recog) - return ERROR_INT("averaging failed; recog destroyed", procName, 1); - - /* Save a pixa of all the training examples */ - paa1 = recog->pixaa; - if (!recog->pixa_tr) - recog->pixa_tr = pixaaFlattenToPixa(paa1, NULL, L_CLONE); - - /* Destroy any existing image and make a new one */ - if (recog->pixdb_ave) - pixDestroy(&recog->pixdb_ave); - n = pixaaGetCount(paa1, NULL); - paa2 = pixaaCreate(n); - for (i = 0; i < n; i++) { - pixa = pixaCreate(0); - pixat = pixaaGetPixa(paa1, i, L_CLONE); - np = pixaGetCount(pixat); - for (j = 0; j < np; j++) { - pix1 = pixaaGetPix(paa1, i, j, L_CLONE); - recogIdentifyPix(recog, pix1, &pix2); - rchExtract(recog->rch, &index, &score, NULL, NULL, NULL, - NULL, NULL); - if (debug >= 2) - lept_stderr("index = %d, score = %7.3f\n", index, score); - pix3 = pixAddBorder(pix2, 2, 1); - pixaAddPix(pixa, pix3, L_INSERT); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - pixaaAddPixa(paa2, pixa, L_INSERT); - pixaDestroy(&pixat); - } - recog->pixdb_ave = pixaaDisplayByPixa(paa2, 50, 1.0, 20, 20, 0); - if (debug % 2) { - lept_mkdir("lept/recog"); - pixWriteDebug("/tmp/lept/recog/templ_match.png", recog->pixdb_ave, - IFF_PNG); - pixDisplay(recog->pixdb_ave, 100, 100); - } - - pixaaDestroy(&paa2); - return 0; -} - - -/*! - * \brief recogShowAverageTemplates() - * - * \param[in] recog - * \return 0 on success, 1 on failure - * - *
- * Notes:
- *      (1) This debug routine generates a display of the averaged templates,
- *          both scaled and unscaled, with the centroid visible in red.
- * 
- */ -l_int32 -recogShowAverageTemplates(L_RECOG *recog) -{ -l_int32 i, size; -l_float32 x, y; -PIX *pix1, *pix2, *pixr; -PIXA *pixat, *pixadb; - - PROCNAME("recogShowAverageTemplates"); - - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - - lept_stderr("min/max width_u = (%d,%d); min/max height_u = (%d,%d)\n", - recog->minwidth_u, recog->maxwidth_u, - recog->minheight_u, recog->maxheight_u); - lept_stderr("min splitw = %d, max splith = %d\n", - recog->min_splitw, recog->max_splith); - - pixaDestroy(&recog->pixadb_ave); - - pixr = pixCreate(3, 3, 32); /* 3x3 red square for centroid location */ - pixSetAllArbitrary(pixr, 0xff000000); - pixadb = pixaCreate(2); - - /* Unscaled bitmaps */ - size = recog->setsize; - pixat = pixaCreate(size); - for (i = 0; i < size; i++) { - if ((pix1 = pixaGetPix(recog->pixa_u, i, L_CLONE)) == NULL) - continue; - pix2 = pixConvertTo32(pix1); - ptaGetPt(recog->pta_u, i, &x, &y); - pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3, - PIX_SRC, pixr, 0, 0); - pixaAddPix(pixat, pix2, L_INSERT); - pixDestroy(&pix1); - } - pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0); - pixaAddPix(pixadb, pix1, L_INSERT); - pixDisplay(pix1, 100, 100); - pixaDestroy(&pixat); - - /* Scaled bitmaps */ - pixat = pixaCreate(size); - for (i = 0; i < size; i++) { - if ((pix1 = pixaGetPix(recog->pixa, i, L_CLONE)) == NULL) - continue; - pix2 = pixConvertTo32(pix1); - ptaGetPt(recog->pta, i, &x, &y); - pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3, - PIX_SRC, pixr, 0, 0); - pixaAddPix(pixat, pix2, L_INSERT); - pixDestroy(&pix1); - } - pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0); - pixaAddPix(pixadb, pix1, L_INSERT); - pixDisplay(pix1, 100, 100); - pixaDestroy(&pixat); - pixDestroy(&pixr); - recog->pixadb_ave = pixadb; - return 0; -} - - -/*! - * \brief pixDisplayOutliers() - * - * \param[in] pixas unscaled labeled templates - * \param[in] nas scores of templates (against class averages) - * \return pix tiled pixa with text and scores, or NULL on failure - * - *
- * Notes:
- *      (1) This debug routine is called from recogRemoveOutliers2(),
- *          and takes the saved templates and their scores as input.
- * 
- */ -static PIX * -pixDisplayOutliers(PIXA *pixas, - NUMA *nas) -{ -char *text; -char buf[16]; -l_int32 i, n; -l_float32 fval; -PIX *pix1, *pix2; -PIXA *pixa1; - - PROCNAME("pixDisplayOutliers"); - - if (!pixas) - return (PIX *)ERROR_PTR("pixas not defined", procName, NULL); - if (!nas) - return (PIX *)ERROR_PTR("nas not defined", procName, NULL); - n = pixaGetCount(pixas); - if (numaGetCount(nas) != n) - return (PIX *)ERROR_PTR("pixas and nas sizes differ", procName, NULL); - - pixa1 = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - pix2 = pixAddBlackOrWhiteBorder(pix1, 25, 25, 0, 0, L_GET_WHITE_VAL); - text = pixGetText(pix1); - numaGetFValue(nas, i, &fval); - snprintf(buf, sizeof(buf), "'%s': %5.2f", text, fval); - pixSetText(pix2, buf); - pixaAddPix(pixa1, pix2, L_INSERT); - pixDestroy(&pix1); - } - pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 20, 2, 6, 0xff000000); - pixaDestroy(&pixa1); - return pix1; -} - - -/*! - * \brief recogDisplayOutlier() - * - * \param[in] recog - * \param[in] iclass sample is in this class - * \param[in] jsamp index of sample is class i - * \param[in] maxclass index of class with closest average to sample - * \param[in] maxscore score of sample with average of class %maxclass - * \return pix sample and template images, with score, or NULL on error - * - *
- * Notes:
- *      (1) This shows three templates, side-by-side:
- *          - The outlier sample
- *          - The average template from the same class
- *          - The average class template that best matched the outlier sample
- * 
- */ -static PIX * -recogDisplayOutlier(L_RECOG *recog, - l_int32 iclass, - l_int32 jsamp, - l_int32 maxclass, - l_float32 maxscore) -{ -char buf[64]; -PIX *pix1, *pix2, *pix3, *pix4, *pix5; -PIXA *pixa; - - PROCNAME("recogDisplayOutlier"); - - if (!recog) - return (PIX *)ERROR_PTR("recog not defined", procName, NULL); - - pix1 = pixaaGetPix(recog->pixaa, iclass, jsamp, L_CLONE); - pix2 = pixaGetPix(recog->pixa, iclass, L_CLONE); - pix3 = pixaGetPix(recog->pixa, maxclass, L_CLONE); - pixa = pixaCreate(3); - pixaAddPix(pixa, pix1, L_INSERT); - pixaAddPix(pixa, pix2, L_INSERT); - pixaAddPix(pixa, pix3, L_INSERT); - pix4 = pixaDisplayTiledInRows(pixa, 32, 400, 2.0, 0, 12, 2); - snprintf(buf, sizeof(buf), "C=%d, BAC=%d, S=%4.2f", iclass, maxclass, - maxscore); - pix5 = pixAddSingleTextblock(pix4, recog->bmf, buf, 0xff000000, - L_ADD_BELOW, NULL); - pixDestroy(&pix4); - pixaDestroy(&pixa); - return pix5; -} - - -/*! - * \brief recogShowMatchesInRange() - * - * \param[in] recog - * \param[in] pixa of 1 bpp images to match - * \param[in] minscore min score to include output - * \param[in] maxscore max score to include output - * \param[in] display 1 to display the result - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This gives a visual output of the best matches for a given
- *          range of scores.  Each pair of images can optionally be
- *          labeled with the index of the best match and the correlation.
- *      (2) To use this, save a set of 1 bpp images (labeled or
- *          unlabeled) that can be given to a recognizer in a pixa.
- *          Then call this function with the pixa and parameters
- *          to filter a range of scores.
- * 
- */ -l_ok -recogShowMatchesInRange(L_RECOG *recog, - PIXA *pixa, - l_float32 minscore, - l_float32 maxscore, - l_int32 display) -{ -l_int32 i, n, index, depth; -l_float32 score; -NUMA *nascore, *naindex; -PIX *pix1, *pix2; -PIXA *pixa1, *pixa2; - - PROCNAME("recogShowMatchesInRange"); - - if (!recog) - return ERROR_INT("recog not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - /* Run the recognizer on the set of images */ - n = pixaGetCount(pixa); - nascore = numaCreate(n); - naindex = numaCreate(n); - pixa1 = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixa, i, L_CLONE); - recogIdentifyPix(recog, pix1, &pix2); - rchExtract(recog->rch, &index, &score, NULL, NULL, NULL, NULL, NULL); - numaAddNumber(nascore, score); - numaAddNumber(naindex, index); - pixaAddPix(pixa1, pix2, L_INSERT); - pixDestroy(&pix1); - } - - /* Filter the set and optionally add text to each */ - pixa2 = pixaCreate(n); - depth = 1; - for (i = 0; i < n; i++) { - numaGetFValue(nascore, i, &score); - if (score < minscore || score > maxscore) continue; - pix1 = pixaGetPix(pixa1, i, L_CLONE); - numaGetIValue(naindex, i, &index); - pix2 = recogShowMatch(recog, pix1, NULL, NULL, index, score); - if (i == 0) depth = pixGetDepth(pix2); - pixaAddPix(pixa2, pix2, L_INSERT); - pixDestroy(&pix1); - } - - /* Package it up */ - pixDestroy(&recog->pixdb_range); - if (pixaGetCount(pixa2) > 0) { - recog->pixdb_range = - pixaDisplayTiledInRows(pixa2, depth, 2500, 1.0, 0, 20, 1); - if (display) - pixDisplay(recog->pixdb_range, 300, 100); - } else { - L_INFO("no character matches in the range of scores\n", procName); - } - - pixaDestroy(&pixa1); - pixaDestroy(&pixa2); - numaDestroy(&nascore); - numaDestroy(&naindex); - return 0; -} - - -/*! - * \brief recogShowMatch() - * - * \param[in] recog - * \param[in] pix1 input pix; several possibilities - * \param[in] pix2 [optional] matching template - * \param[in] box [optional] region in pix1 for which pix2 matches - * \param[in] index index of matching template; use -1 to disable printing - * \param[in] score score of match - * \return pixd pair of images, showing input pix and best template, - * optionally with matching information, or NULL on error. - * - *
- * Notes:
- *      (1) pix1 can be one of these:
- *          (a) The input pix alone, which can be either a single character
- *              (box == NULL) or several characters that need to be
- *              segmented.  If more than character is present, the box
- *              region is displayed with an outline.
- *          (b) Both the input pix and the matching template.  In this case,
- *              pix2 and box will both be null.
- *      (2) If the bmf has been made (by a call to recogMakeBmf())
- *          and the index >= 0, the text field, match score and index
- *          will be rendered; otherwise their values will be ignored.
- * 
- */ -PIX * -recogShowMatch(L_RECOG *recog, - PIX *pix1, - PIX *pix2, - BOX *box, - l_int32 index, - l_float32 score) -{ -char buf[32]; -char *text; -L_BMF *bmf; -PIX *pix3, *pix4, *pix5, *pixd; -PIXA *pixa; - - PROCNAME("recogShowMatch"); - - if (!recog) - return (PIX *)ERROR_PTR("recog not defined", procName, NULL); - if (!pix1) - return (PIX *)ERROR_PTR("pix1 not defined", procName, NULL); - - bmf = (recog->bmf && index >= 0) ? recog->bmf : NULL; - if (!pix2 && !box && !bmf) /* nothing to do */ - return pixCopy(NULL, pix1); - - pix3 = pixConvertTo32(pix1); - if (box) - pixRenderBoxArb(pix3, box, 1, 255, 0, 0); - - if (pix2) { - pixa = pixaCreate(2); - pixaAddPix(pixa, pix3, L_CLONE); - pixaAddPix(pixa, pix2, L_CLONE); - pix4 = pixaDisplayTiledInRows(pixa, 1, 500, 1.0, 0, 15, 0); - pixaDestroy(&pixa); - } else { - pix4 = pixCopy(NULL, pix3); - } - pixDestroy(&pix3); - - if (bmf) { - pix5 = pixAddBorderGeneral(pix4, 55, 55, 0, 0, 0xffffff00); - recogGetClassString(recog, index, &text); - snprintf(buf, sizeof(buf), "C=%s, S=%4.3f, I=%d", text, score, index); - pixd = pixAddSingleTextblock(pix5, bmf, buf, 0xff000000, - L_ADD_BELOW, NULL); - pixDestroy(&pix5); - LEPT_FREE(text); - } else { - pixd = pixClone(pix4); - } - pixDestroy(&pix4); - - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/regutils.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/regutils.c deleted file mode 100644 index a2070aa4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/regutils.c +++ /dev/null @@ -1,887 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file regutils.c - *
- *
- *       Regression test utilities
- *           l_int32    regTestSetup()
- *           l_int32    regTestCleanup()
- *           l_int32    regTestCompareValues()
- *           l_int32    regTestCompareStrings()
- *           l_int32    regTestComparePix()
- *           l_int32    regTestCompareSimilarPix()
- *           l_int32    regTestCheckFile()
- *           l_int32    regTestCompareFiles()
- *           l_int32    regTestWritePixAndCheck()
- *           l_int32    regTestWriteDataAndCheck()
- *           char      *regTestGenLocalFilename()
- *
- *       Static function
- *           char      *getRootNameFromArgv0()
- *
- *  These functions are for testing and development.  They are not intended
- *  for use with programs that run in a production environment, such as a
- *  cloud service with unrestricted access.
- *
- *  See regutils.h for how to use this.  Here is a minimal setup:
- *
- *  main(int argc, char **argv) {
- *  ...
- *  L_REGPARAMS  *rp;
- *
- *      if (regTestSetup(argc, argv, &rp))
- *          return 1;
- *      ...
- *      regTestWritePixAndCheck(rp, pix, IFF_PNG);  // 0
- *      ...
- *      return regTestCleanup(rp);
- *  }
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -extern l_int32 NumImageFileFormatExtensions; -extern const char *ImageFileFormatExtensions[]; - -static char *getRootNameFromArgv0(const char *argv0); - - -/*--------------------------------------------------------------------* - * Regression test utilities * - *--------------------------------------------------------------------*/ -/*! - * \brief regTestSetup() - * - * \param[in] argc from invocation; can be either 1 or 2 - * \param[in] argv to regtest: %argv[1] is one of these: - * "generate", "compare", "display" - * \param[out] prp all regression params - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Call this function with the args to the reg test.  The first arg
- *          is the name of the reg test.  There are three cases:
- *          Case 1:
- *              There is either only one arg, or the second arg is "compare".
- *              This is the mode in which you run a regression test
- *              (or a set of them), looking for failures and logging
- *              the results to a file.  The output, which includes
- *              logging of all reg test failures plus a SUCCESS or
- *              FAILURE summary for each test, is appended to the file
- *              "/tmp/lept/reg_results.txt.  For this case, as in Case 2,
- *              the display field in rp is set to FALSE, preventing
- *              image display.
- *          Case 2:
- *              The second arg is "generate".  This will cause
- *              generation of new golden files for the reg test.
- *              The results of the reg test are not recorded, and
- *              the display field in rp is set to FALSE.
- *          Case 3:
- *              The second arg is "display".  The test will run and
- *              files will be written.  Comparisons with golden files
- *              will not be carried out, so the only notion of success
- *              or failure is with tests that do not involve golden files.
- *              The display field in rp is TRUE, and this is used by
- *              pixDisplayWithTitle().
- *      (2) See regutils.h for examples of usage.
- * 
- */ -l_ok -regTestSetup(l_int32 argc, - char **argv, - L_REGPARAMS **prp) -{ -char *testname, *vers; -char errormsg[64]; -L_REGPARAMS *rp; - - PROCNAME("regTestSetup"); - - if (argc != 1 && argc != 2) { - snprintf(errormsg, sizeof(errormsg), - "Syntax: %s [ [compare] | generate | display ]", argv[0]); - return ERROR_INT(errormsg, procName, 1); - } - - if ((testname = getRootNameFromArgv0(argv[0])) == NULL) - return ERROR_INT("invalid root", procName, 1); - - setLeptDebugOK(1); /* required for testing */ - - rp = (L_REGPARAMS *)LEPT_CALLOC(1, sizeof(L_REGPARAMS)); - *prp = rp; - rp->testname = testname; - rp->index = -1; /* increment before each test */ - - /* Initialize to true. A failure in any test is registered - * as a failure of the regression test. */ - rp->success = TRUE; - - /* Make sure the lept/regout subdirectory exists */ - lept_mkdir("lept/regout"); - - /* Only open a stream to a temp file for the 'compare' case */ - if (argc == 1 || !strcmp(argv[1], "compare")) { - rp->mode = L_REG_COMPARE; - rp->tempfile = stringNew("/tmp/lept/regout/regtest_output.txt"); - rp->fp = fopenWriteStream(rp->tempfile, "wb"); - if (rp->fp == NULL) { - rp->success = FALSE; - return ERROR_INT("stream not opened for tempfile", procName, 1); - } - } else if (!strcmp(argv[1], "generate")) { - rp->mode = L_REG_GENERATE; - lept_mkdir("lept/golden"); - } else if (!strcmp(argv[1], "display")) { - rp->mode = L_REG_DISPLAY; - rp->display = TRUE; - } else { - LEPT_FREE(rp); - snprintf(errormsg, sizeof(errormsg), - "Syntax: %s [ [generate] | compare | display ]", argv[0]); - return ERROR_INT(errormsg, procName, 1); - } - - /* Print out test name and both the leptonica and - * image libarary versions */ - lept_stderr("\n////////////////////////////////////////////////\n" - "//////////////// %s_reg ///////////////\n" - "////////////////////////////////////////////////\n", - rp->testname); - vers = getLeptonicaVersion(); - lept_stderr("%s : ", vers); - LEPT_FREE(vers); - vers = getImagelibVersions(); - lept_stderr("%s\n", vers); - LEPT_FREE(vers); - - rp->tstart = startTimerNested(); - return 0; -} - - -/*! - * \brief regTestCleanup() - * - * \param[in] rp regression test parameters - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This copies anything written to the temporary file to the
- *          output file /tmp/lept/reg_results.txt.
- * 
- */ -l_ok -regTestCleanup(L_REGPARAMS *rp) -{ -char result[512]; -char *results_file; /* success/failure output in 'compare' mode */ -char *text, *message; -l_int32 retval; -size_t nbytes; - - PROCNAME("regTestCleanup"); - - if (!rp) - return ERROR_INT("rp not defined", procName, 1); - - lept_stderr("Time: %7.3f sec\n", stopTimerNested(rp->tstart)); - - /* If generating golden files or running in display mode, release rp */ - if (!rp->fp) { - LEPT_FREE(rp->testname); - LEPT_FREE(rp->tempfile); - LEPT_FREE(rp); - return 0; - } - - /* Compare mode: read back data from temp file */ - fclose(rp->fp); - text = (char *)l_binaryRead(rp->tempfile, &nbytes); - LEPT_FREE(rp->tempfile); - if (!text) { - rp->success = FALSE; - LEPT_FREE(rp->testname); - LEPT_FREE(rp); - return ERROR_INT("text not returned", procName, 1); - } - - /* Prepare result message */ - if (rp->success) - snprintf(result, sizeof(result), "SUCCESS: %s_reg\n", rp->testname); - else - snprintf(result, sizeof(result), "FAILURE: %s_reg\n", rp->testname); - message = stringJoin(text, result); - LEPT_FREE(text); - results_file = stringNew("/tmp/lept/reg_results.txt"); - fileAppendString(results_file, message); - retval = (rp->success) ? 0 : 1; - LEPT_FREE(results_file); - LEPT_FREE(message); - - LEPT_FREE(rp->testname); - LEPT_FREE(rp); - return retval; -} - - -/*! - * \brief regTestCompareValues() - * - * \param[in] rp regtest parameters - * \param[in] val1 typ. the golden value - * \param[in] val2 typ. the value computed - * \param[in] delta allowed max absolute difference - * \return 0 if OK, 1 on error a failure in comparison is not an error - */ -l_ok -regTestCompareValues(L_REGPARAMS *rp, - l_float32 val1, - l_float32 val2, - l_float32 delta) -{ -l_float32 diff; - - PROCNAME("regTestCompareValues"); - - if (!rp) - return ERROR_INT("rp not defined", procName, 1); - - rp->index++; - diff = L_ABS(val2 - val1); - - /* Record on failure */ - if (diff > delta) { - if (rp->fp) { - fprintf(rp->fp, - "Failure in %s_reg: value comparison for index %d\n" - "difference = %f but allowed delta = %f\n", - rp->testname, rp->index, diff, delta); - } - lept_stderr("Failure in %s_reg: value comparison for index %d\n" - "difference = %f but allowed delta = %f\n", - rp->testname, rp->index, diff, delta); - rp->success = FALSE; - } - return 0; -} - - -/*! - * \brief regTestCompareStrings() - * - * \param[in] rp regtest parameters - * \param[in] string1 typ. the expected string - * \param[in] bytes1 size of string1 - * \param[in] string2 typ. the computed string - * \param[in] bytes2 size of string2 - * \return 0 if OK, 1 on error a failure in comparison is not an error - */ -l_ok -regTestCompareStrings(L_REGPARAMS *rp, - l_uint8 *string1, - size_t bytes1, - l_uint8 *string2, - size_t bytes2) -{ -l_int32 same; -char buf[256]; - - PROCNAME("regTestCompareStrings"); - - if (!rp) - return ERROR_INT("rp not defined", procName, 1); - - rp->index++; - l_binaryCompare(string1, bytes1, string2, bytes2, &same); - - /* Output on failure */ - if (!same) { - /* Write the two strings to file */ - snprintf(buf, sizeof(buf), "/tmp/lept/regout/string1_%d_%zu", - rp->index, bytes1); - l_binaryWrite(buf, "w", string1, bytes1); - snprintf(buf, sizeof(buf), "/tmp/lept/regout/string2_%d_%zu", - rp->index, bytes2); - l_binaryWrite(buf, "w", string2, bytes2); - - /* Report comparison failure */ - snprintf(buf, sizeof(buf), "/tmp/lept/regout/string*_%d_*", rp->index); - if (rp->fp) { - fprintf(rp->fp, - "Failure in %s_reg: string comp for index %d; " - "written to %s\n", rp->testname, rp->index, buf); - } - lept_stderr("Failure in %s_reg: string comp for index %d; " - "written to %s\n", rp->testname, rp->index, buf); - rp->success = FALSE; - } - return 0; -} - - -/*! - * \brief regTestComparePix() - * - * \param[in] rp regtest parameters - * \param[in] pix1, pix2 to be tested for equality - * \return 0 if OK, 1 on error a failure in comparison is not an error - * - *
- * Notes:
- *      (1) This function compares two pix for equality.  On failure,
- *          this writes to stderr.
- * 
- */ -l_ok -regTestComparePix(L_REGPARAMS *rp, - PIX *pix1, - PIX *pix2) -{ -l_int32 same; - - PROCNAME("regTestComparePix"); - - if (!rp) - return ERROR_INT("rp not defined", procName, 1); - if (!pix1 || !pix2) { - rp->success = FALSE; - return ERROR_INT("pix1 and pix2 not both defined", procName, 1); - } - - rp->index++; - pixEqual(pix1, pix2, &same); - - /* Record on failure */ - if (!same) { - if (rp->fp) { - fprintf(rp->fp, "Failure in %s_reg: pix comparison for index %d\n", - rp->testname, rp->index); - } - lept_stderr("Failure in %s_reg: pix comparison for index %d\n", - rp->testname, rp->index); - rp->success = FALSE; - } - return 0; -} - - -/*! - * \brief regTestCompareSimilarPix() - * - * \param[in] rp regtest parameters - * \param[in] pix1, pix2 to be tested for near equality - * \param[in] mindiff minimum pixel difference to be counted; > 0 - * \param[in] maxfract maximum fraction of pixels allowed to have - * diff greater than or equal to mindiff - * \param[in] printstats use 1 to print normalized histogram to stderr - * \return 0 if OK, 1 on error a failure in similarity comparison - * is not an error - * - *
- * Notes:
- *      (1) This function compares two pix for near equality.  On failure,
- *          this writes to stderr.
- *      (2) The pix are similar if the fraction of non-conforming pixels
- *          does not exceed %maxfract.  Pixels are non-conforming if
- *          the difference in pixel values equals or exceeds %mindiff.
- *          Typical values might be %mindiff = 15 and %maxfract = 0.01.
- *      (3) The input images must have the same size and depth.  The
- *          pixels for comparison are typically subsampled from the images.
- *      (4) Normally, use %printstats = 0.  In debugging mode, to see
- *          the relation between %mindiff and the minimum value of
- *          %maxfract for success, set this to 1.
- * 
- */ -l_ok -regTestCompareSimilarPix(L_REGPARAMS *rp, - PIX *pix1, - PIX *pix2, - l_int32 mindiff, - l_float32 maxfract, - l_int32 printstats) -{ -l_int32 w, h, factor, similar; - - PROCNAME("regTestCompareSimilarPix"); - - if (!rp) - return ERROR_INT("rp not defined", procName, 1); - if (!pix1 || !pix2) { - rp->success = FALSE; - return ERROR_INT("pix1 and pix2 not both defined", procName, 1); - } - - rp->index++; - pixGetDimensions(pix1, &w, &h, NULL); - factor = L_MAX(w, h) / 400; - factor = L_MAX(1, L_MIN(factor, 4)); /* between 1 and 4 */ - pixTestForSimilarity(pix1, pix2, factor, mindiff, maxfract, 0.0, - &similar, printstats); - - /* Record on failure */ - if (!similar) { - if (rp->fp) { - fprintf(rp->fp, - "Failure in %s_reg: pix similarity comp for index %d\n", - rp->testname, rp->index); - } - lept_stderr("Failure in %s_reg: pix similarity comp for index %d\n", - rp->testname, rp->index); - rp->success = FALSE; - } - return 0; -} - - -/*! - * \brief regTestCheckFile() - * - * \param[in] rp regtest parameters - * \param[in] localname name of output file from reg test - * \return 0 if OK, 1 on error a failure in comparison is not an error - * - *
- * Notes:
- *      (1) This function does one of three things, depending on the mode:
- *           * "generate": makes a "golden" file as a copy of %localname.
- *           * "compare": compares %localname contents with the golden file
- *           * "display": this does nothing
- *      (2) The canonical format of the golden filenames is:
- *            /tmp/lept/golden/[root of main name]_golden.[index].
- *                                                       [ext of localname]
- *          e.g.,
- *             /tmp/lept/golden/maze_golden.0.png
- *      (3) The local file can be made in any subdirectory of /tmp/lept,
- *          including /tmp/lept/regout/.
- *      (4) It is important to add an extension to the local name, such as
- *             /tmp/lept/maze/file1.png    (extension ".png")
- *          because the extension is added to the name of the golden file.
- * 
- */ -l_ok -regTestCheckFile(L_REGPARAMS *rp, - const char *localname) -{ -char *ext; -char namebuf[256]; -l_int32 ret, same, format; -PIX *pix1, *pix2; - - PROCNAME("regTestCheckFile"); - - if (!rp) - return ERROR_INT("rp not defined", procName, 1); - if (!localname) { - rp->success = FALSE; - return ERROR_INT("local name not defined", procName, 1); - } - if (rp->mode != L_REG_GENERATE && rp->mode != L_REG_COMPARE && - rp->mode != L_REG_DISPLAY) { - rp->success = FALSE; - return ERROR_INT("invalid mode", procName, 1); - } - rp->index++; - - /* If display mode, no generation and no testing */ - if (rp->mode == L_REG_DISPLAY) return 0; - - /* Generate the golden file name; used in 'generate' and 'compare' */ - splitPathAtExtension(localname, NULL, &ext); - snprintf(namebuf, sizeof(namebuf), "/tmp/lept/golden/%s_golden.%02d%s", - rp->testname, rp->index, ext); - LEPT_FREE(ext); - - /* Generate mode. No testing. */ - if (rp->mode == L_REG_GENERATE) { - /* Save the file as a golden file */ - ret = fileCopy(localname, namebuf); -#if 0 /* Enable for details on writing of golden files */ - if (!ret) { - char *local = genPathname(localname, NULL); - char *golden = genPathname(namebuf, NULL); - L_INFO("Copy: %s to %s\n", procName, local, golden); - LEPT_FREE(local); - LEPT_FREE(golden); - } -#endif - return ret; - } - - /* Compare mode: test and record on failure. This can be used - * for all image formats, as well as for all files of serialized - * data, such as boxa, pta, etc. In all cases except for - * GIF compressed images, we compare the files to see if they - * are identical. GIF doesn't support RGB images; to write - * a 32 bpp RGB image in GIF, we do a lossy quantization to - * 256 colors, so the cycle read-RGB/write-GIF is not idempotent. - * And although the read/write cycle for GIF images with bpp <= 8 - * is idempotent in the image pixels, it is not idempotent in the - * actual file bytes; tests comparing file bytes before and after - * a GIF read/write cycle will fail. So for GIF we uncompress - * the two images and compare the actual pixels. PNG is both - * lossless and idempotent in file bytes on read/write, so it is - * not necessary to compare pixels. (Comparing pixels requires - * decompression, and thus would increase the regression test - * time. JPEG is lossy and not idempotent in the image pixels, - * so no tests are constructed that would require it. */ - findFileFormat(localname, &format); - if (format == IFF_GIF) { - same = 0; - pix1 = pixRead(localname); - pix2 = pixRead(namebuf); - pixEqual(pix1, pix2, &same); - pixDestroy(&pix1); - pixDestroy(&pix2); - } else { - filesAreIdentical(localname, namebuf, &same); - } - if (!same) { - fprintf(rp->fp, "Failure in %s_reg, index %d: comparing %s with %s\n", - rp->testname, rp->index, localname, namebuf); - lept_stderr("Failure in %s_reg, index %d: comparing %s with %s\n", - rp->testname, rp->index, localname, namebuf); - rp->success = FALSE; - } - - return 0; -} - - -/*! - * \brief regTestCompareFiles() - * - * \param[in] rp regtest parameters - * \param[in] index1 of one output file from reg test - * \param[in] index2 of another output file from reg test - * \return 0 if OK, 1 on error a failure in comparison is not an error - * - *
- * Notes:
- *      (1) This only does something in "compare" mode.
- *      (2) The canonical format of the golden filenames is:
- *            /tmp/lept/golden/[root of main name]_golden.[index].
- *                                                      [ext of localname]
- *          e.g.,
- *            /tmp/lept/golden/maze_golden.0.png
- * 
- */ -l_ok -regTestCompareFiles(L_REGPARAMS *rp, - l_int32 index1, - l_int32 index2) -{ -char *name1, *name2; -char namebuf[256]; -l_int32 same; -SARRAY *sa; - - PROCNAME("regTestCompareFiles"); - - if (!rp) - return ERROR_INT("rp not defined", procName, 1); - if (index1 < 0 || index2 < 0) { - rp->success = FALSE; - return ERROR_INT("index1 and/or index2 is negative", procName, 1); - } - if (index1 == index2) { - rp->success = FALSE; - return ERROR_INT("index1 must differ from index2", procName, 1); - } - - rp->index++; - if (rp->mode != L_REG_COMPARE) return 0; - - /* Generate the golden file names */ - snprintf(namebuf, sizeof(namebuf), "%s_golden.%02d", rp->testname, index1); - sa = getSortedPathnamesInDirectory("/tmp/lept/golden", namebuf, 0, 0); - if (sarrayGetCount(sa) != 1) { - sarrayDestroy(&sa); - rp->success = FALSE; - L_ERROR("golden file %s not found\n", procName, namebuf); - return 1; - } - name1 = sarrayGetString(sa, 0, L_COPY); - sarrayDestroy(&sa); - - snprintf(namebuf, sizeof(namebuf), "%s_golden.%02d", rp->testname, index2); - sa = getSortedPathnamesInDirectory("/tmp/lept/golden", namebuf, 0, 0); - if (sarrayGetCount(sa) != 1) { - sarrayDestroy(&sa); - rp->success = FALSE; - LEPT_FREE(name1); - L_ERROR("golden file %s not found\n", procName, namebuf); - return 1; - } - name2 = sarrayGetString(sa, 0, L_COPY); - sarrayDestroy(&sa); - - /* Test and record on failure */ - filesAreIdentical(name1, name2, &same); - if (!same) { - fprintf(rp->fp, - "Failure in %s_reg, index %d: comparing %s with %s\n", - rp->testname, rp->index, name1, name2); - lept_stderr("Failure in %s_reg, index %d: comparing %s with %s\n", - rp->testname, rp->index, name1, name2); - rp->success = FALSE; - } - - LEPT_FREE(name1); - LEPT_FREE(name2); - return 0; -} - - -/*! - * \brief regTestWritePixAndCheck() - * - * \param[in] rp regtest parameters - * \param[in] pix to be written - * \param[in] format of output pix - * \return 0 if OK, 1 on error a failure in comparison is not an error - * - *
- * Notes:
- *      (1) This function makes it easy to write the pix in a numbered
- *          sequence of files, and either to:
- *             (a) write the golden file ("generate" arg to regression test)
- *             (b) make a local file and "compare" with the golden file
- *             (c) make a local file and "display" the results
- *      (2) The canonical format of the local filename is:
- *            /tmp/lept/regout/[root of main name].[count].[format extension]
- *          e.g., for scale_reg,
- *            /tmp/lept/regout/scale.0.png
- *          The golden file name mirrors this in the usual way.
- *      (3) The check is done between the written files, which requires
- *          the files to be identical. The exception is for GIF, which
- *          only requires that all pixels in the decoded pix are identical.
- * 
- */ -l_ok -regTestWritePixAndCheck(L_REGPARAMS *rp, - PIX *pix, - l_int32 format) -{ -char namebuf[256]; - - PROCNAME("regTestWritePixAndCheck"); - - if (!rp) - return ERROR_INT("rp not defined", procName, 1); - if (!pix) { - rp->success = FALSE; - return ERROR_INT("pix not defined", procName, 1); - } - if (format < 0 || format >= NumImageFileFormatExtensions) { - rp->success = FALSE; - return ERROR_INT("invalid format", procName, 1); - } - - /* Use bmp format for testing if library for requested - * format for jpeg, png or tiff is not available */ - changeFormatForMissingLib(&format); - - /* Generate the local file name */ - snprintf(namebuf, sizeof(namebuf), "/tmp/lept/regout/%s.%02d.%s", - rp->testname, rp->index + 1, ImageFileFormatExtensions[format]); - - /* Write the local file */ - if (pixGetDepth(pix) < 8) - pixSetPadBits(pix, 0); - pixWrite(namebuf, pix, format); - - /* Either write the golden file ("generate") or check the - local file against an existing golden file ("compare") */ - regTestCheckFile(rp, namebuf); - - return 0; -} - - -/*! - * \brief regTestWriteDataAndCheck() - * - * \param[in] rp regtest parameters - * \param[in] data to be written - * \param[in] nbytes of data to be written - * \param[in] ext filename extension (e.g.: "ba", "pta") - * \return 0 if OK, 1 on error a failure in comparison is not an error - * - *
- * Notes:
- *      (1) This function makes it easy to write data in a numbered
- *          sequence of files, and either to:
- *             (a) write the golden file ("generate" arg to regression test)
- *             (b) make a local file and "compare" with the golden file
- *             (c) make a local file and "display" the results
- *      (2) The canonical format of the local filename is:
- *            /tmp/lept/regout/[root of main name].[count].[ext]
- *          e.g., for the first boxaa in quadtree_reg,
- *            /tmp/lept/regout/quadtree.0.baa
- *          The golden file name mirrors this in the usual way.
- *      (3) The data can be anything.  It is most useful for serialized
- *          output of data, such as boxa, pta, etc.
- *      (4) The file extension is arbitrary.  It is included simply
- *          to make the content type obvious when examining written files.
- *      (5) The check is done between the written files, which requires
- *          the files to be identical.
- * 
- */ -l_ok -regTestWriteDataAndCheck(L_REGPARAMS *rp, - void *data, - size_t nbytes, - const char *ext) -{ -char namebuf[256]; - - PROCNAME("regTestWriteDataAndCheck"); - - if (!rp) - return ERROR_INT("rp not defined", procName, 1); - if (!data || nbytes == 0) { - rp->success = FALSE; - return ERROR_INT("data not defined or size == 0", procName, 1); - } - - /* Generate the local file name */ - snprintf(namebuf, sizeof(namebuf), "/tmp/lept/regout/%s.%02d.%s", - rp->testname, rp->index + 1, ext); - - /* Write the local file */ - l_binaryWrite(namebuf, "w", data, nbytes); - - /* Either write the golden file ("generate") or check the - local file against an existing golden file ("compare") */ - regTestCheckFile(rp, namebuf); - return 0; -} - - -/*! - * \brief regTestGenLocalFilename() - * - * \param[in] rp regtest parameters - * \param[in] index use -1 for current index - * \param[in] format of image; e.g., IFF_PNG - * \return filename if OK, or NULL on error - * - *
- * Notes:
- *      (1) This is used to get the name of a file in the regout
- *          subdirectory, that has been made and is used to test against
- *          the golden file.  You can either specify a particular index
- *          value, or with %index == -1, this returns the most recently
- *          written file.  The latter case lets you read a pix from a
- *          file that has just been written with regTestWritePixAndCheck(),
- *          which is useful for testing formatted read/write functions.
- *
- * 
- */ -char * -regTestGenLocalFilename(L_REGPARAMS *rp, - l_int32 index, - l_int32 format) -{ -char buf[64]; -l_int32 ind; - - PROCNAME("regTestGenLocalFilename"); - - if (!rp) - return (char *)ERROR_PTR("rp not defined", procName, NULL); - - ind = (index >= 0) ? index : rp->index; - snprintf(buf, sizeof(buf), "/tmp/lept/regout/%s.%02d.%s", - rp->testname, ind, ImageFileFormatExtensions[format]); - return stringNew(buf); -} - - -/*! - * \brief getRootNameFromArgv0() - * - * \param[in] argv0 - * \return root name without the '_reg', or NULL on error - * - *
- * Notes:
- *      (1) For example, from psioseg_reg, we want to extract
- *          just 'psioseg' as the root.
- *      (2) In unix with autotools, the executable is not X,
- *          but ./.libs/lt-X.   So in addition to stripping out the
- *          last 4 characters of the tail, we have to check for
- *          the '-' and strip out the "lt-" prefix if we find it.
- * 
- */ -static char * -getRootNameFromArgv0(const char *argv0) -{ -l_int32 len; -char *root; - - PROCNAME("getRootNameFromArgv0"); - - splitPathAtDirectory(argv0, NULL, &root); - if ((len = strlen(root)) <= 4) { - LEPT_FREE(root); - return (char *)ERROR_PTR("invalid argv0; too small", procName, NULL); - } - -#ifndef _WIN32 - { - char *newroot; - l_int32 loc; - if (stringFindSubstr(root, "-", &loc)) { - newroot = stringNew(root + loc + 1); /* strip out "lt-" */ - LEPT_FREE(root); - root = newroot; - len = strlen(root); - } - len -= 4; /* remove the "_reg" suffix */ - } -#else - if (strstr(root, ".exe") != NULL) - len -= 4; - if (strstr(root, "_reg") == root + len - 4) - len -= 4; -#endif /* ! _WIN32 */ - - root[len] = '\0'; /* terminate */ - return root; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/regutils.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/regutils.h deleted file mode 100644 index 2f1d5e4a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/regutils.h +++ /dev/null @@ -1,141 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_REGUTILS_H -#define LEPTONICA_REGUTILS_H - -/*! - * \file regutils.h - * - *
- *   Contains this regression test parameter packaging struct
- *       struct L_RegParams
- *
- *   The regression test utility allows you to write regression tests
- *   that compare results with existing "golden files" and with
- *   compiled in data.
- *
- *   Regression tests can be called in three ways.
- *   For example, for distance_reg:
- *
- *       Case 1: distance_reg [compare]
- *           This runs the test against the set of golden files.  It
- *           appends to 'outfile.txt' either "SUCCESS" or "FAILURE",
- *           as well as the details of any parts of the test that failed.
- *           It writes to a temporary file stream (fp).
- *           Using 'compare' on the command line is optional.
- *
- *       Case 2: distance_reg generate
- *           This generates golden files in /tmp for the reg test.
- *
- *       Case 3: distance_reg display
- *           This runs the test but makes no comparison of the output
- *           against the set of golden files.  In addition, this displays
- *           images and plots that are specified in the test under
- *           control of the display variable.  Display is enabled only
- *           for this case.
- *
- *   Regression tests follow the pattern given below.  Tests are
- *   automatically numbered sequentially, and it is convenient to
- *   comment each with a number to keep track (for comparison tests
- *   and for debugging).  In an actual case, comparisons of pix and
- *   of files can occur in any order.  We give a specific order here
- *   for clarity.
- *
- *       L_REGPARAMS  *rp;  // holds data required by the test functions
- *
- *       // Setup variables; optionally open stream
- *       if (regTestSetup(argc, argv, &rp))
- *           return 1;
- *
- *       // Test pairs of generated pix for identity.  This compares
- *       // two pix; no golden file is generated.
- *       regTestComparePix(rp, pix1, pix2);  // 0
- *
- *       // Test pairs of generated pix for similarity.  This compares
- *       // two pix; no golden file is generated.  The last arg determines
- *       // if stats are to be written to stderr.
- *       regTestCompareSimilarPix(rp, pix1, pix2, 15, 0.001, 0);  // 1
- *
- *       // Generation of  outputs and testing for identity
- *       // These files can be anything, of course.
- *       regTestCheckFile(rp, );  // 2
- *       regTestCheckFile(rp, );  // 3
- *
- *       // Test pairs of output golden files for identity.  Here we
- *       // are comparing golden files 2 and 3.
- *       regTestCompareFiles(rp, 2, 3);  // 4
- *
- *       // "Write and check".  This writes a pix using a canonical
- *       // formulation for the local filename and either:
- *       //     case 1: generates a golden file
- *       //     case 2: compares the local file with a golden file
- *       //     case 3: generates local files and displays
- *       // Here we write the pix compressed with png and jpeg, respectively;
- *       // Then check against the golden file.  The internal %index
- *       // is incremented; it is embedded in the local filename and,
- *       // if generating, in the golden file as well.
- *       regTestWritePixAndCheck(rp, pix1, IFF_PNG);  // 5
- *       regTestWritePixAndCheck(rp, pix2, IFF_JFIF_JPEG);  // 6
- *
- *       // Display if reg test was called in 'display' mode
- *       pixDisplayWithTitle(pix1, 100, 100, NULL, rp->display);
- *
- *       // Clean up and output result
- *       regTestCleanup(rp);
- * 
- */ - -/*----------------------------------------------------------------------------* - * Regression test parameter packer * - *----------------------------------------------------------------------------*/ - -/*! Regression test parameter packer */ -struct L_RegParams -{ - FILE *fp; /*!< stream to temporary output file for compare mode */ - char *testname; /*!< name of test, without '_reg' */ - char *tempfile; /*!< name of temp file for compare mode output */ - l_int32 mode; /*!< generate, compare or display */ - l_int32 index; /*!< index into saved files for this test; 0-based */ - l_int32 success; /*!< overall result of the test */ - l_int32 display; /*!< 1 if in display mode; 0 otherwise */ - L_TIMER tstart; /*!< marks beginning of the reg test */ -}; -typedef struct L_RegParams L_REGPARAMS; - - - /*! Running modes for the test */ -/*! Regtest Mode */ -enum { - L_REG_GENERATE = 0, - L_REG_COMPARE = 1, - L_REG_DISPLAY = 2 -}; - - -#endif /* LEPTONICA_REGUTILS_H */ - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rop.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rop.c deleted file mode 100644 index 59a41d8d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rop.c +++ /dev/null @@ -1,520 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file rop.c - *
- *      General rasterop
- *           l_int32    pixRasterop()
- *
- *      In-place full band translation
- *           l_int32    pixRasteropVip()
- *           l_int32    pixRasteropHip()
- *
- *      Full image translation (general and in-place)
- *           l_int32    pixTranslate()
- *           l_int32    pixRasteropIP()
- *
- *      Full image rasterop with no translation
- *           l_int32    pixRasteropFullImage()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*--------------------------------------------------------------------* - * General rasterop (basic pix interface) * - *--------------------------------------------------------------------*/ -/*! - * \brief pixRasterop() - * - * \param[in] pixd dest pix - * \param[in] dx x val of UL corner of dest rectangle - * \param[in] dy y val of UL corner of dest rectangle - * \param[in] dw width of dest rectangle - * \param[in] dh height of dest rectangle - * \param[in] op op code - * \param[in] pixs src pix - * \param[in] sx x val of UL corner of src rectangle - * \param[in] sy y val of UL corner of src rectangle - * \return 0 if OK; 1 on error. - * - *
- * Notes:
- *      (1) This has the standard set of 9 args for rasterop.
- *          This function is your friend; it is worth memorizing!
- *      (2) If the operation involves only dest, this calls
- *          rasteropUniLow().  Otherwise, checks depth of the
- *          src and dest, and if they match, calls rasteropLow().
- *      (3) For the two-image operation, where both pixs and pixd
- *          are defined, they are typically different images.  However
- *          there are cases, such as pixSetMirroredBorder(), where
- *          in-place operations can be done, blitting pixels from
- *          one part of pixd to another.  Consequently, we permit
- *          such operations.  If you use them, be sure that there
- *          is no overlap between the source and destination rectangles
- *          in pixd (!)
- *
- *  Background:
- *  -----------
- *
- *  There are 18 operations, described by the op codes in pix.h.
- *
- *  One, PIX_DST, is a no-op.
- *
- *  Three, PIX_CLR, PIX_SET, and PIX_NOT(PIX_DST) operate only on the dest.
- *  These are handled by the low-level rasteropUniLow().
- *
- *  The other 14 involve the both the src and the dest, and depend on
- *  the bit values of either just the src or the bit values of both
- *  src and dest.  They are handled by rasteropLow():
- *
- *          PIX_SRC                             s
- *          PIX_NOT(PIX_SRC)                   ~s
- *          PIX_SRC | PIX_DST                   s | d
- *          PIX_SRC & PIX_DST                   s & d
- *          PIX_SRC ^ PIX_DST                   s ^ d
- *          PIX_NOT(PIX_SRC) | PIX_DST         ~s | d
- *          PIX_NOT(PIX_SRC) & PIX_DST         ~s & d
- *          PIX_NOT(PIX_SRC) ^ PIX_DST         ~s ^ d
- *          PIX_SRC | PIX_NOT(PIX_DST)          s | ~d
- *          PIX_SRC & PIX_NOT(PIX_DST)          s & ~d
- *          PIX_SRC ^ PIX_NOT(PIX_DST)          s ^ ~d
- *          PIX_NOT(PIX_SRC | PIX_DST)         ~(s | d)
- *          PIX_NOT(PIX_SRC & PIX_DST)         ~(s & d)
- *          PIX_NOT(PIX_SRC ^ PIX_DST)         ~(s ^ d)
- *
- *  Each of these is implemented with one of three low-level
- *  functions, depending on the alignment of the left edge
- *  of the src and dest rectangles:
- *      * a fastest implementation if both left edges are
- *        (32-bit) word aligned
- *      * a very slightly slower implementation if both left
- *        edges have the same relative (32-bit) word alignment
- *      * the general routine that is invoked when
- *        both left edges have different word alignment
- *
- *  Of the 14 binary rasterops above, only 12 are unique
- *  logical combinations (out of a possible 16) of src
- *  and dst bits:
- *
- *        (sd)         (11)   (10)   (01)   (00)
- *   -----------------------------------------------
- *         s            1      1      0      0
- *        ~s            0      1      0      1
- *       s | d          1      1      1      0
- *       s & d          1      0      0      0
- *       s ^ d          0      1      1      0
- *      ~s | d          1      0      1      1
- *      ~s & d          0      0      1      0
- *      ~s ^ d          1      0      0      1
- *       s | ~d         1      1      0      1
- *       s & ~d         0      1      0      0
- *       s ^ ~d         1      0      0      1
- *      ~(s | d)        0      0      0      1
- *      ~(s & d)        0      1      1      1
- *      ~(s ^ d)        1      0      0      1
- *
- *  Note that the following three operations are equivalent:
- *      ~(s ^ d)
- *      ~s ^ d
- *      s ^ ~d
- *  and in the implementation, we call them out with the first form;
- *  namely, ~(s ^ d).
- *
- *  Of the 16 possible binary combinations of src and dest bits,
- *  the remaining 4 unique ones are independent of the src bit.
- *  They depend on either just the dest bit or on neither
- *  the src nor dest bits:
- *
- *         d            1      0      1      0    (indep. of s)
- *        ~d            0      1      0      1    (indep. of s)
- *        CLR           0      0      0      0    (indep. of both s & d)
- *        SET           1      1      1      1    (indep. of both s & d)
- *
- *  As mentioned above, three of these are implemented by
- *  rasteropUniLow(), and one is a no-op.
- *
- *  How can these operation codes be represented by bits
- *  in such a way that when the basic operations are performed
- *  on the bits the results are unique for unique
- *  operations, and mimic the logic table given above?
- *
- *  The answer is to choose a particular order of the pairings:
- *         (sd)         (11)   (10)   (01)   (00)
- *  (which happens to be the same as in the above table)
- *  and to translate the result into 4-bit representations
- *  of s and d.  For example, the Sun rasterop choice
- *  (omitting the extra bit for clipping) is
- *
- *      PIX_SRC      0xc
- *      PIX_DST      0xa
- *
- *  This corresponds to our pairing order given above:
- *         (sd)         (11)   (10)   (01)   (00)
- *  where for s = 1 we get the bit pattern
- *       PIX_SRC:        1      1      0      0     (0xc)
- *  and for d = 1 we get the pattern
- *       PIX_DST:         1      0      1      0    (0xa)
- *
- *  OK, that's the pairing order that Sun chose.  How many different
- *  ways can we assign bit patterns to PIX_SRC and PIX_DST to get
- *  the boolean ops to work out?  Any of the 4 pairs can be put
- *  in the first position, any of the remaining 3 pairs can go
- *  in the second; and one of the remaining 2 pairs can go the the third.
- *  There is a total of 4*3*2 = 24 ways these pairs can be permuted.
- * 
- */ -l_ok -pixRasterop(PIX *pixd, - l_int32 dx, - l_int32 dy, - l_int32 dw, - l_int32 dh, - l_int32 op, - PIX *pixs, - l_int32 sx, - l_int32 sy) -{ -l_int32 dd; - - PROCNAME("pixRasterop"); - - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - - if (op == PIX_DST) /* no-op */ - return 0; - - /* Check if operation is only on dest */ - dd = pixGetDepth(pixd); - if (op == PIX_CLR || op == PIX_SET || op == PIX_NOT(PIX_DST)) { - rasteropUniLow(pixGetData(pixd), - pixGetWidth(pixd), pixGetHeight(pixd), dd, - pixGetWpl(pixd), - dx, dy, dw, dh, - op); - return 0; - } - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - /* Check depth of src and dest; these must agree */ - if (dd != pixGetDepth(pixs)) - return ERROR_INT("depths of pixs and pixd differ", procName, 1); - - rasteropLow(pixGetData(pixd), - pixGetWidth(pixd), pixGetHeight(pixd), dd, - pixGetWpl(pixd), - dx, dy, dw, dh, - op, - pixGetData(pixs), - pixGetWidth(pixs), pixGetHeight(pixs), - pixGetWpl(pixs), - sx, sy); - - return 0; -} - - -/*--------------------------------------------------------------------* - * In-place full band translation * - *--------------------------------------------------------------------*/ -/*! - * \brief pixRasteropVip() - * - * \param[in] pixd in-place - * \param[in] bx left edge of vertical band - * \param[in] bw width of vertical band - * \param[in] vshift vertical shift of band; vshift > 0 is down - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This rasterop translates a vertical band of the
- *          image either up or down, bringing in either white
- *          or black pixels from outside the image.
- *      (2) The vertical band extends the full height of pixd.
- *      (3) If a colormap exists, the nearest color to white or black
- *          is brought in.
- * 
- */ -l_ok -pixRasteropVip(PIX *pixd, - l_int32 bx, - l_int32 bw, - l_int32 vshift, - l_int32 incolor) -{ -l_int32 w, h, d, index, op; -PIX *pixt; -PIXCMAP *cmap; - - PROCNAME("pixRasteropVip"); - - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return ERROR_INT("invalid value for incolor", procName, 1); - if (bw <= 0) - return ERROR_INT("bw must be > 0", procName, 1); - - if (vshift == 0) - return 0; - - pixGetDimensions(pixd, &w, &h, &d); - rasteropVipLow(pixGetData(pixd), w, h, d, pixGetWpl(pixd), bx, bw, vshift); - - cmap = pixGetColormap(pixd); - if (!cmap) { - if ((d == 1 && incolor == L_BRING_IN_BLACK) || - (d > 1 && incolor == L_BRING_IN_WHITE)) - op = PIX_SET; - else - op = PIX_CLR; - - /* Set the pixels brought in at top or bottom */ - if (vshift > 0) - pixRasterop(pixd, bx, 0, bw, vshift, op, NULL, 0, 0); - else /* vshift < 0 */ - pixRasterop(pixd, bx, h + vshift, bw, -vshift, op, NULL, 0, 0); - return 0; - } - - /* Get the nearest index and fill with that */ - if (incolor == L_BRING_IN_BLACK) - pixcmapGetRankIntensity(cmap, 0.0, &index); - else /* white */ - pixcmapGetRankIntensity(cmap, 1.0, &index); - pixt = pixCreate(bw, L_ABS(vshift), d); - pixSetAllArbitrary(pixt, index); - if (vshift > 0) - pixRasterop(pixd, bx, 0, bw, vshift, PIX_SRC, pixt, 0, 0); - else /* vshift < 0 */ - pixRasterop(pixd, bx, h + vshift, bw, -vshift, PIX_SRC, pixt, 0, 0); - pixDestroy(&pixt); - return 0; -} - - -/*! - * \brief pixRasteropHip() - * - * \param[in] pixd in-place operation - * \param[in] by top of horizontal band - * \param[in] bh height of horizontal band - * \param[in] hshift horizontal shift of band; hshift > 0 is to right - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This rasterop translates a horizontal band of the
- *          image either left or right, bringing in either white
- *          or black pixels from outside the image.
- *      (2) The horizontal band extends the full width of pixd.
- *      (3) If a colormap exists, the nearest color to white or black
- *          is brought in.
- * 
- */ -l_ok -pixRasteropHip(PIX *pixd, - l_int32 by, - l_int32 bh, - l_int32 hshift, - l_int32 incolor) -{ -l_int32 w, h, d, index, op; -PIX *pixt; -PIXCMAP *cmap; - - PROCNAME("pixRasteropHip"); - - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return ERROR_INT("invalid value for incolor", procName, 1); - if (bh <= 0) - return ERROR_INT("bh must be > 0", procName, 1); - - if (hshift == 0) - return 0; - - pixGetDimensions(pixd, &w, &h, &d); - rasteropHipLow(pixGetData(pixd), h, d, pixGetWpl(pixd), by, bh, hshift); - - cmap = pixGetColormap(pixd); - if (!cmap) { - if ((d == 1 && incolor == L_BRING_IN_BLACK) || - (d > 1 && incolor == L_BRING_IN_WHITE)) - op = PIX_SET; - else - op = PIX_CLR; - - /* Set the pixels brought in at left or right */ - if (hshift > 0) - pixRasterop(pixd, 0, by, hshift, bh, op, NULL, 0, 0); - else /* hshift < 0 */ - pixRasterop(pixd, w + hshift, by, -hshift, bh, op, NULL, 0, 0); - return 0; - } - - /* Get the nearest index and fill with that */ - if (incolor == L_BRING_IN_BLACK) - pixcmapGetRankIntensity(cmap, 0.0, &index); - else /* white */ - pixcmapGetRankIntensity(cmap, 1.0, &index); - pixt = pixCreate(L_ABS(hshift), bh, d); - pixSetAllArbitrary(pixt, index); - if (hshift > 0) - pixRasterop(pixd, 0, by, hshift, bh, PIX_SRC, pixt, 0, 0); - else /* hshift < 0 */ - pixRasterop(pixd, w + hshift, by, -hshift, bh, PIX_SRC, pixt, 0, 0); - pixDestroy(&pixt); - return 0; -} - - -/*--------------------------------------------------------------------* - * Full image translation (general and in-place) * - *--------------------------------------------------------------------*/ -/*! - * \brief pixTranslate() - * - * \param[in] pixd [optional] destination: this can be null, - * equal to pixs, or different from pixs - * \param[in] pixs - * \param[in] hshift horizontal shift; hshift > 0 is to right - * \param[in] vshift vertical shift; vshift > 0 is down - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error. - * - *
- * Notes:
- *      (1) The general pattern is:
- *            pixd = pixTranslate(pixd, pixs, ...);
- *          For clarity, when you know the case, use one of these:
- *            pixd = pixTranslate(NULL, pixs, ...);  // new
- *            pixTranslate(pixs, pixs, ...);         // in-place
- *            pixTranslate(pixd, pixs, ...);         // to existing pixd
- *      (2) If an existing pixd is not the same size as pixs, the
- *          image data will be reallocated.
- * 
- */ -PIX * -pixTranslate(PIX *pixd, - PIX *pixs, - l_int32 hshift, - l_int32 vshift, - l_int32 incolor) -{ - PROCNAME("pixTranslate"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - /* Prepare pixd for in-place operation */ - if ((pixd = pixCopy(pixd, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - - pixRasteropIP(pixd, hshift, vshift, incolor); - return pixd; -} - - -/*! - * \brief pixRasteropIP() - * - * \param[in] pixd in-place translation - * \param[in] hshift horizontal shift; hshift > 0 is to right - * \param[in] vshift vertical shift; vshift > 0 is down - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return 0 if OK; 1 on error - */ -l_ok -pixRasteropIP(PIX *pixd, - l_int32 hshift, - l_int32 vshift, - l_int32 incolor) -{ -l_int32 w, h; - - PROCNAME("pixRasteropIP"); - - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - - pixGetDimensions(pixd, &w, &h, NULL); - pixRasteropHip(pixd, 0, h, hshift, incolor); - pixRasteropVip(pixd, 0, w, vshift, incolor); - - return 0; -} - - -/*--------------------------------------------------------------------* - * Full image rasterop with no shifts * - *--------------------------------------------------------------------*/ -/*! - * \brief pixRasteropFullImage() - * - * \param[in] pixd - * \param[in] pixs - * \param[in] op any of the op-codes - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      ~ this is a wrapper for a common 2-image raster operation
- *      ~ both pixs and pixd must be defined
- *      ~ the operation is performed with aligned UL corners of pixs and pixd
- *      ~ the operation clips to the smallest pix; if the width or height
- *        of pixd is larger than pixs, some pixels in pixd will be unchanged
- * 
- */ -l_ok -pixRasteropFullImage(PIX *pixd, - PIX *pixs, - l_int32 op) -{ - PROCNAME("pixRasteropFullImage"); - - if (!pixd) - return ERROR_INT("pixd not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - pixRasterop(pixd, 0, 0, pixGetWidth(pixd), pixGetHeight(pixd), op, - pixs, 0, 0); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/roplow.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/roplow.c deleted file mode 100644 index f13e29ba..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/roplow.c +++ /dev/null @@ -1,2483 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file roplow.c - *
- *      Low level dest-only
- *           void            rasteropUniLow()
- *           static void     rasteropUniWordAlignedlLow()
- *           static void     rasteropUniGeneralLow()
- *
- *      Low level src and dest
- *           void            rasteropLow()
- *           static void     rasteropWordAlignedLow()
- *           static void     rasteropVAlignedLow()
- *           static void     rasteropGeneralLow()
- *
- *      Low level in-place full height vertical block transfer
- *           void            rasteropVipLow()
- *
- *      Low level in-place full width horizontal block transfer
- *           void            rasteropHipLow()
- *           static void     shiftDataHorizontalLow()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Static helpers */ -static void rasteropUniWordAlignedLow(l_uint32 *datad, l_int32 dwpl, l_int32 dx, - l_int32 dy, l_int32 dw, l_int32 dh, - l_int32 op); -static void rasteropUniGeneralLow(l_uint32 *datad, l_int32 dwpl, l_int32 dx, - l_int32 dy, l_int32 dw, l_int32 dh, - l_int32 op); -static void rasteropWordAlignedLow(l_uint32 *datad, l_int32 dwpl, l_int32 dx, - l_int32 dy, l_int32 dw, l_int32 dh, - l_int32 op, l_uint32 *datas, l_int32 swpl, - l_int32 sx, l_int32 sy); -static void rasteropVAlignedLow(l_uint32 *datad, l_int32 dwpl, l_int32 dx, - l_int32 dy, l_int32 dw, l_int32 dh, - l_int32 op, l_uint32 *datas, l_int32 swpl, - l_int32 sx, l_int32 sy); -static void rasteropGeneralLow(l_uint32 *datad, l_int32 dwpl, l_int32 dx, - l_int32 dy, l_int32 dw, l_int32 dh, - l_int32 op, l_uint32 *datas, l_int32 swpl, - l_int32 sx, l_int32 sy); -static void shiftDataHorizontalLow(l_uint32 *datad, l_int32 wpld, - l_uint32 *datas, l_int32 wpls, - l_int32 shift); - -#define COMBINE_PARTIAL(d, s, m) ( ((d) & ~(m)) | ((s) & (m)) ) - -static const l_int32 SHIFT_LEFT = 0; -static const l_int32 SHIFT_RIGHT = 1; - -static const l_uint32 lmask32[] = {0x0, - 0x80000000, 0xc0000000, 0xe0000000, 0xf0000000, - 0xf8000000, 0xfc000000, 0xfe000000, 0xff000000, - 0xff800000, 0xffc00000, 0xffe00000, 0xfff00000, - 0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000, - 0xffff8000, 0xffffc000, 0xffffe000, 0xfffff000, - 0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00, - 0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0, - 0xfffffff8, 0xfffffffc, 0xfffffffe, 0xffffffff}; - -static const l_uint32 rmask32[] = {0x0, - 0x00000001, 0x00000003, 0x00000007, 0x0000000f, - 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, - 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, - 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, - 0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff, - 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, - 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, - 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff}; - - -/*--------------------------------------------------------------------* - * Low-level dest-only rasterops * - *--------------------------------------------------------------------*/ -/*! - * \brief rasteropUniLow() - * - * \param[in] datad ptr to dest image data - * \param[in] dpixw width of dest - * \param[in] dpixh height of dest - * \param[in] depth depth of src and dest - * \param[in] dwpl wpl of dest - * \param[in] dx x val of UL corner of dest rectangle - * \param[in] dy y val of UL corner of dest rectangle - * \param[in] dw width of dest rectangle - * \param[in] dh height of dest rectangle - * \param[in] op op code - * \return void - * - * Action: scales width, performs clipping, checks alignment, and - * dispatches for the rasterop. - */ -void -rasteropUniLow(l_uint32 *datad, - l_int32 dpixw, - l_int32 dpixh, - l_int32 depth, - l_int32 dwpl, - l_int32 dx, - l_int32 dy, - l_int32 dw, - l_int32 dh, - l_int32 op) -{ -l_int32 dhangw, dhangh; - - /* -------------------------------------------------------* - * scale horizontal dimensions by depth - * -------------------------------------------------------*/ - if (depth != 1) { - dpixw *= depth; - dx *= depth; - dw *= depth; - } - - /* -------------------------------------------------------* - * clip rectangle to dest image - * -------------------------------------------------------*/ - /* first, clip horizontally (dx, dw) */ - if (dx < 0) { - dw += dx; /* reduce dw */ - dx = 0; - } - dhangw = dx + dw - dpixw; /* rect ovhang dest to right */ - if (dhangw > 0) - dw -= dhangw; /* reduce dw */ - - /* then, clip vertically (dy, dh) */ - if (dy < 0) { - dh += dy; /* reduce dh */ - dy = 0; - } - dhangh = dy + dh - dpixh; /* rect ovhang dest below */ - if (dhangh > 0) - dh -= dhangh; /* reduce dh */ - - /* if clipped entirely, quit */ - if ((dw <= 0) || (dh <= 0)) - return; - - /* -------------------------------------------------------* - * dispatch to aligned or non-aligned blitters - * -------------------------------------------------------*/ - if ((dx & 31) == 0) - rasteropUniWordAlignedLow(datad, dwpl, dx, dy, dw, dh, op); - else - rasteropUniGeneralLow(datad, dwpl, dx, dy, dw, dh, op); -} - - - -/*--------------------------------------------------------------------* - * Static low-level uni rasterop with word alignment * - *--------------------------------------------------------------------*/ -/*! - * \brief rasteropUniWordAlignedLow() - * - * \param[in] datad ptr to dest image data - * \param[in] dwpl wpl of dest - * \param[in] dx x val of UL corner of dest rectangle - * \param[in] dy y val of UL corner of dest rectangle - * \param[in] dw width of dest rectangle - * \param[in] dh height of dest rectangle - * \param[in] op op code - * \return void - * - * This is called when the dest rect is left aligned - * on 32-bit word boundaries. That is: dx & 31 == 0. - * - * We make an optimized implementation of this because - * it is a common case: e.g., operating on a full dest image. - */ -static void -rasteropUniWordAlignedLow(l_uint32 *datad, - l_int32 dwpl, - l_int32 dx, - l_int32 dy, - l_int32 dw, - l_int32 dh, - l_int32 op) -{ -l_int32 nfullw; /* number of full words */ -l_uint32 *pfword; /* ptr to first word */ -l_int32 lwbits; /* number of ovrhang bits in last partial word */ -l_uint32 lwmask; /* mask for last partial word */ -l_uint32 *lined; -l_int32 i, j; - - /*--------------------------------------------------------* - * Preliminary calculations * - *--------------------------------------------------------*/ - nfullw = dw >> 5; - lwbits = dw & 31; - if (lwbits) - lwmask = lmask32[lwbits]; - pfword = datad + dwpl * dy + (dx >> 5); - - - /*--------------------------------------------------------* - * Now we're ready to do the ops * - *--------------------------------------------------------*/ - switch (op) - { - case PIX_CLR: - for (i = 0; i < dh; i++) { - lined = pfword + i * dwpl; - for (j = 0; j < nfullw; j++) - *lined++ = 0x0; - if (lwbits) - *lined = COMBINE_PARTIAL(*lined, 0x0, lwmask); - } - break; - case PIX_SET: - for (i = 0; i < dh; i++) { - lined = pfword + i * dwpl; - for (j = 0; j < nfullw; j++) - *lined++ = 0xffffffff; - if (lwbits) - *lined = COMBINE_PARTIAL(*lined, 0xffffffff, lwmask); - } - break; - case PIX_NOT(PIX_DST): - for (i = 0; i < dh; i++) { - lined = pfword + i * dwpl; - for (j = 0; j < nfullw; j++) { - *lined = ~(*lined); - lined++; - } - if (lwbits) - *lined = COMBINE_PARTIAL(*lined, ~(*lined), lwmask); - } - break; - default: - lept_stderr("Operation %d not permitted here!\n", op); - } -} - - -/*--------------------------------------------------------------------* - * Static low-level uni rasterop without word alignment * - *--------------------------------------------------------------------*/ -/*! - * \brief rasteropUniGeneralLow() - * - * \param[in] datad ptr to dest image data - * \param[in] dwpl wpl of dest - * \param[in] dx x val of UL corner of dest rectangle - * \param[in] dy y val of UL corner of dest rectangle - * \param[in] dw width of dest rectangle - * \param[in] dh height of dest rectangle - * \param[in] op op code - * \return void - */ -static void -rasteropUniGeneralLow(l_uint32 *datad, - l_int32 dwpl, - l_int32 dx, - l_int32 dy, - l_int32 dw, - l_int32 dh, - l_int32 op) -{ -l_int32 dfwpartb; /* boolean (1, 0) if first dest word is partial */ -l_int32 dfwpart2b; /* boolean (1, 0) if first dest word is doubly partial */ -l_uint32 dfwmask; /* mask for first partial dest word */ -l_int32 dfwbits; /* first word dest bits in ovrhang */ -l_uint32 *pdfwpart; /* ptr to first partial dest word */ -l_int32 dfwfullb; /* boolean (1, 0) if there exists a full dest word */ -l_int32 dnfullw; /* number of full words in dest */ -l_uint32 *pdfwfull; /* ptr to first full dest word */ -l_int32 dlwpartb; /* boolean (1, 0) if last dest word is partial */ -l_uint32 dlwmask; /* mask for last partial dest word */ -l_int32 dlwbits; /* last word dest bits in ovrhang */ -l_uint32 *pdlwpart; /* ptr to last partial dest word */ -l_int32 i, j; - - - /*--------------------------------------------------------* - * Preliminary calculations * - *--------------------------------------------------------*/ - /* is the first word partial? */ - dfwmask = 0; - if ((dx & 31) == 0) { /* if not */ - dfwpartb = 0; - dfwbits = 0; - } else { /* if so */ - dfwpartb = 1; - dfwbits = 32 - (dx & 31); - dfwmask = rmask32[dfwbits]; - pdfwpart = datad + dwpl * dy + (dx >> 5); - } - - /* is the first word doubly partial? */ - if (dw >= dfwbits) { /* if not */ - dfwpart2b = 0; - } else { /* if so */ - dfwpart2b = 1; - dfwmask &= lmask32[32 - dfwbits + dw]; - } - - /* is there a full dest word? */ - if (dfwpart2b == 1) { /* not */ - dfwfullb = 0; - dnfullw = 0; - } else { - dnfullw = (dw - dfwbits) >> 5; - if (dnfullw == 0) { /* if not */ - dfwfullb = 0; - } else { /* if so */ - dfwfullb = 1; - if (dfwpartb) - pdfwfull = pdfwpart + 1; - else - pdfwfull = datad + dwpl * dy + (dx >> 5); - } - } - - /* is the last word partial? */ - dlwbits = (dx + dw) & 31; - if (dfwpart2b == 1 || dlwbits == 0) { /* if not */ - dlwpartb = 0; - } else { - dlwpartb = 1; - dlwmask = lmask32[dlwbits]; - if (dfwpartb) - pdlwpart = pdfwpart + 1 + dnfullw; - else - pdlwpart = datad + dwpl * dy + (dx >> 5) + dnfullw; - } - - - /*--------------------------------------------------------* - * Now we're ready to do the ops * - *--------------------------------------------------------*/ - switch (op) - { - case PIX_CLR: - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, 0x0, dfwmask); - pdfwpart += dwpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) - *(pdfwfull + j) = 0x0; - pdfwfull += dwpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, 0x0, dlwmask); - pdlwpart += dwpl; - } - } - break; - case PIX_SET: - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, 0xffffffff, dfwmask); - pdfwpart += dwpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) - *(pdfwfull + j) = 0xffffffff; - pdfwfull += dwpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, 0xffffffff, dlwmask); - pdlwpart += dwpl; - } - } - break; - case PIX_NOT(PIX_DST): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, ~(*pdfwpart), dfwmask); - pdfwpart += dwpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) - *(pdfwfull + j) = ~(*(pdfwfull + j)); - pdfwfull += dwpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, ~(*pdlwpart), dlwmask); - pdlwpart += dwpl; - } - } - break; - default: - lept_stderr("Operation %d not permitted here!\n", op); - } -} - - -/*--------------------------------------------------------------------* - * Low-level src and dest rasterops * - *--------------------------------------------------------------------*/ -/*! - * \brief rasteropLow() - * - * \param[in] datad ptr to dest image data - * \param[in] dpixw width of dest - * \param[in] dpixh height of dest - * \param[in] depth depth of src and dest - * \param[in] dwpl wpl of dest - * \param[in] dx x val of UL corner of dest rectangle - * \param[in] dy y val of UL corner of dest rectangle - * \param[in] dw width of dest rectangle - * \param[in] dh height of dest rectangle - * \param[in] op op code - * \param[in] datas ptr to src image data - * \param[in] spixw width of src - * \param[in] spixh height of src - * \param[in] swpl wpl of src - * \param[in] sx x val of UL corner of src rectangle - * \param[in] sy y val of UL corner of src rectangle - * \return void - * - * Action: Scales width, performs clipping, checks alignment, and - * dispatches for the rasterop. - * - * Warning: the two images must have equal depth. This is not checked. - */ -void -rasteropLow(l_uint32 *datad, - l_int32 dpixw, - l_int32 dpixh, - l_int32 depth, - l_int32 dwpl, - l_int32 dx, - l_int32 dy, - l_int32 dw, - l_int32 dh, - l_int32 op, - l_uint32 *datas, - l_int32 spixw, - l_int32 spixh, - l_int32 swpl, - l_int32 sx, - l_int32 sy) -{ -l_int32 dhangw, shangw, dhangh, shangh; - - /* -------------------------------------------------------* - * scale horizontal dimensions by depth - * -------------------------------------------------------*/ - if (depth != 1) { - dpixw *= depth; - dx *= depth; - dw *= depth; - spixw *= depth; - sx *= depth; - } - - - /* -------------------------------------------------------* - * clip to max rectangle within both src and dest - * -------------------------------------------------------*/ - /* first, clip horizontally (sx, dx, dw) */ - if (dx < 0) { - sx -= dx; /* increase sx */ - dw += dx; /* reduce dw */ - dx = 0; - } - if (sx < 0) { - dx -= sx; /* increase dx */ - dw += sx; /* reduce dw */ - sx = 0; - } - dhangw = dx + dw - dpixw; /* rect ovhang dest to right */ - if (dhangw > 0) - dw -= dhangw; /* reduce dw */ - shangw = sx + dw - spixw; /* rect ovhang src to right */ - if (shangw > 0) - dw -= shangw; /* reduce dw */ - - /* then, clip vertically (sy, dy, dh) */ - if (dy < 0) { - sy -= dy; /* increase sy */ - dh += dy; /* reduce dh */ - dy = 0; - } - if (sy < 0) { - dy -= sy; /* increase dy */ - dh += sy; /* reduce dh */ - sy = 0; - } - dhangh = dy + dh - dpixh; /* rect ovhang dest below */ - if (dhangh > 0) - dh -= dhangh; /* reduce dh */ - shangh = sy + dh - spixh; /* rect ovhang src below */ - if (shangh > 0) - dh -= shangh; /* reduce dh */ - - /* if clipped entirely, quit */ - if ((dw <= 0) || (dh <= 0)) - return; - - /* -------------------------------------------------------* - * dispatch to aligned or non-aligned blitters - * -------------------------------------------------------*/ - if (((dx & 31) == 0) && ((sx & 31) == 0)) - rasteropWordAlignedLow(datad, dwpl, dx, dy, dw, dh, op, - datas, swpl, sx, sy); - else if ((dx & 31) == (sx & 31)) - rasteropVAlignedLow(datad, dwpl, dx, dy, dw, dh, op, - datas, swpl, sx, sy); - else - rasteropGeneralLow(datad, dwpl, dx, dy, dw, dh, op, - datas, swpl, sx, sy); -} - - -/*--------------------------------------------------------------------* - * Static low-level rasterop with vertical word alignment * - *--------------------------------------------------------------------*/ -/*! - * \brief rasteropWordAlignedLow() - * - * \param[in] datad ptr to dest image data - * \param[in] dwpl wpl of dest - * \param[in] dx x val of UL corner of dest rectangle - * \param[in] dy y val of UL corner of dest rectangle - * \param[in] dw width of dest rectangle - * \param[in] dh height of dest rectangle - * \param[in] op op code - * \param[in] datas ptr to src image data - * \param[in] swpl wpl of src - * \param[in] sx x val of UL corner of src rectangle - * \param[in] sy y val of UL corner of src rectangle - * \return void - * - * This is called when both the src and dest rects - * are left aligned on 32-bit word boundaries. - * That is: dx & 31 == 0 and sx & 31 == 0 - * - * We make an optimized implementation of this because - * it is a common case: e.g., two images are rasterop'd - * starting from their UL corners 0,0. - */ -static void -rasteropWordAlignedLow(l_uint32 *datad, - l_int32 dwpl, - l_int32 dx, - l_int32 dy, - l_int32 dw, - l_int32 dh, - l_int32 op, - l_uint32 *datas, - l_int32 swpl, - l_int32 sx, - l_int32 sy) -{ -l_int32 nfullw; /* number of full words */ -l_uint32 *psfword; /* ptr to first src word */ -l_uint32 *pdfword; /* ptr to first dest word */ -l_int32 lwbits; /* number of ovrhang bits in last partial word */ -l_uint32 lwmask; /* mask for last partial word */ -l_uint32 *lines, *lined; -l_int32 i, j; - - - /*--------------------------------------------------------* - * Preliminary calculations * - *--------------------------------------------------------*/ - nfullw = dw >> 5; - lwbits = dw & 31; - if (lwbits) - lwmask = lmask32[lwbits]; - psfword = datas + swpl * sy + (sx >> 5); - pdfword = datad + dwpl * dy + (dx >> 5); - - /*--------------------------------------------------------* - * Now we're ready to do the ops * - *--------------------------------------------------------*/ - switch (op) - { - case PIX_SRC: - for (i = 0; i < dh; i++) { - lines = psfword + i * swpl; - lined = pdfword + i * dwpl; - for (j = 0; j < nfullw; j++) { - *lined = *lines; - lined++; - lines++; - } - if (lwbits) - *lined = COMBINE_PARTIAL(*lined, *lines, lwmask); - } - break; - case PIX_NOT(PIX_SRC): - for (i = 0; i < dh; i++) { - lines = psfword + i * swpl; - lined = pdfword + i * dwpl; - for (j = 0; j < nfullw; j++) { - *lined = ~(*lines); - lined++; - lines++; - } - if (lwbits) - *lined = COMBINE_PARTIAL(*lined, ~(*lines), lwmask); - } - break; - case (PIX_SRC | PIX_DST): - for (i = 0; i < dh; i++) { - lines = psfword + i * swpl; - lined = pdfword + i * dwpl; - for (j = 0; j < nfullw; j++) { - *lined = (*lines | *lined); - lined++; - lines++; - } - if (lwbits) - *lined = COMBINE_PARTIAL(*lined, (*lines | *lined), lwmask); - } - break; - case (PIX_SRC & PIX_DST): - for (i = 0; i < dh; i++) { - lines = psfword + i * swpl; - lined = pdfword + i * dwpl; - for (j = 0; j < nfullw; j++) { - *lined = (*lines & *lined); - lined++; - lines++; - } - if (lwbits) - *lined = COMBINE_PARTIAL(*lined, (*lines & *lined), lwmask); - } - break; - case (PIX_SRC ^ PIX_DST): - for (i = 0; i < dh; i++) { - lines = psfword + i * swpl; - lined = pdfword + i * dwpl; - for (j = 0; j < nfullw; j++) { - *lined = (*lines ^ *lined); - lined++; - lines++; - } - if (lwbits) - *lined = COMBINE_PARTIAL(*lined, (*lines ^ *lined), lwmask); - } - break; - case (PIX_NOT(PIX_SRC) | PIX_DST): - for (i = 0; i < dh; i++) { - lines = psfword + i * swpl; - lined = pdfword + i * dwpl; - for (j = 0; j < nfullw; j++) { - *lined = (~(*lines) | *lined); - lined++; - lines++; - } - if (lwbits) - *lined = COMBINE_PARTIAL(*lined, (~(*lines) | *lined), lwmask); - } - break; - case (PIX_NOT(PIX_SRC) & PIX_DST): - for (i = 0; i < dh; i++) { - lines = psfword + i * swpl; - lined = pdfword + i * dwpl; - for (j = 0; j < nfullw; j++) { - *lined = (~(*lines) & *lined); - lined++; - lines++; - } - if (lwbits) - *lined = COMBINE_PARTIAL(*lined, (~(*lines) & *lined), lwmask); - } - break; - case (PIX_SRC | PIX_NOT(PIX_DST)): - for (i = 0; i < dh; i++) { - lines = psfword + i * swpl; - lined = pdfword + i * dwpl; - for (j = 0; j < nfullw; j++) { - *lined = (*lines | ~(*lined)); - lined++; - lines++; - } - if (lwbits) - *lined = COMBINE_PARTIAL(*lined, (*lines | ~(*lined)), lwmask); - } - break; - case (PIX_SRC & PIX_NOT(PIX_DST)): - for (i = 0; i < dh; i++) { - lines = psfword + i * swpl; - lined = pdfword + i * dwpl; - for (j = 0; j < nfullw; j++) { - *lined = (*lines & ~(*lined)); - lined++; - lines++; - } - if (lwbits) - *lined = COMBINE_PARTIAL(*lined, (*lines & ~(*lined)), lwmask); - } - break; - case (PIX_NOT(PIX_SRC | PIX_DST)): - for (i = 0; i < dh; i++) { - lines = psfword + i * swpl; - lined = pdfword + i * dwpl; - for (j = 0; j < nfullw; j++) { - *lined = ~(*lines | *lined); - lined++; - lines++; - } - if (lwbits) - *lined = COMBINE_PARTIAL(*lined, ~(*lines | *lined), lwmask); - } - break; - case (PIX_NOT(PIX_SRC & PIX_DST)): - for (i = 0; i < dh; i++) { - lines = psfword + i * swpl; - lined = pdfword + i * dwpl; - for (j = 0; j < nfullw; j++) { - *lined = ~(*lines & *lined); - lined++; - lines++; - } - if (lwbits) - *lined = COMBINE_PARTIAL(*lined, ~(*lines & *lined), lwmask); - } - break; - /* this is three cases: ~(s ^ d), ~s ^ d, s ^ ~d */ - case (PIX_NOT(PIX_SRC ^ PIX_DST)): - for (i = 0; i < dh; i++) { - lines = psfword + i * swpl; - lined = pdfword + i * dwpl; - for (j = 0; j < nfullw; j++) { - *lined = ~(*lines ^ *lined); - lined++; - lines++; - } - if (lwbits) - *lined = COMBINE_PARTIAL(*lined, ~(*lines ^ *lined), lwmask); - } - break; - default: - lept_stderr("Operation %d invalid\n", op); - } -} - - - -/*--------------------------------------------------------------------* - * Static low-level rasterop with vertical word alignment * - *--------------------------------------------------------------------*/ -/*! - * \brief rasteropVAlignedLow() - * - * \param[in] datad ptr to dest image data - * \param[in] dwpl wpl of dest - * \param[in] dx x val of UL corner of dest rectangle - * \param[in] dy y val of UL corner of dest rectangle - * \param[in] dw width of dest rectangle - * \param[in] dh height of dest rectangle - * \param[in] op op code - * \param[in] datas ptr to src image data - * \param[in] swpl wpl of src - * \param[in] sx x val of UL corner of src rectangle - * \param[in] sy y val of UL corner of src rectangle - * \return void - * - * This is called when the left side of the src and dest - * rects have the same alignment relative to 32-bit word - * boundaries; i.e., dx & 31) == (sx & 31 - */ -static void -rasteropVAlignedLow(l_uint32 *datad, - l_int32 dwpl, - l_int32 dx, - l_int32 dy, - l_int32 dw, - l_int32 dh, - l_int32 op, - l_uint32 *datas, - l_int32 swpl, - l_int32 sx, - l_int32 sy) -{ -l_int32 dfwpartb; /* boolean (1, 0) if first dest word is partial */ -l_int32 dfwpart2b; /* boolean (1, 0) if first dest word is doubly partial */ -l_uint32 dfwmask; /* mask for first partial dest word */ -l_int32 dfwbits; /* first word dest bits in ovrhang */ -l_uint32 *pdfwpart; /* ptr to first partial dest word */ -l_uint32 *psfwpart; /* ptr to first partial src word */ -l_int32 dfwfullb; /* boolean (1, 0) if there exists a full dest word */ -l_int32 dnfullw; /* number of full words in dest */ -l_uint32 *pdfwfull; /* ptr to first full dest word */ -l_uint32 *psfwfull; /* ptr to first full src word */ -l_int32 dlwpartb; /* boolean (1, 0) if last dest word is partial */ -l_uint32 dlwmask; /* mask for last partial dest word */ -l_int32 dlwbits; /* last word dest bits in ovrhang */ -l_uint32 *pdlwpart; /* ptr to last partial dest word */ -l_uint32 *pslwpart; /* ptr to last partial src word */ -l_int32 i, j; - - - /*--------------------------------------------------------* - * Preliminary calculations * - *--------------------------------------------------------*/ - /* is the first word partial? */ - dfwmask = 0; - if ((dx & 31) == 0) { /* if not */ - dfwpartb = 0; - dfwbits = 0; - } else { /* if so */ - dfwpartb = 1; - dfwbits = 32 - (dx & 31); - dfwmask = rmask32[dfwbits]; - pdfwpart = datad + dwpl * dy + (dx >> 5); - psfwpart = datas + swpl * sy + (sx >> 5); - } - - /* is the first word doubly partial? */ - if (dw >= dfwbits) { /* if not */ - dfwpart2b = 0; - } else { /* if so */ - dfwpart2b = 1; - dfwmask &= lmask32[32 - dfwbits + dw]; - } - - /* is there a full dest word? */ - if (dfwpart2b == 1) { /* not */ - dfwfullb = 0; - dnfullw = 0; - } else { - dnfullw = (dw - dfwbits) >> 5; - if (dnfullw == 0) { /* if not */ - dfwfullb = 0; - } else { /* if so */ - dfwfullb = 1; - if (dfwpartb) { - pdfwfull = pdfwpart + 1; - psfwfull = psfwpart + 1; - } else { - pdfwfull = datad + dwpl * dy + (dx >> 5); - psfwfull = datas + swpl * sy + (sx >> 5); - } - } - } - - /* is the last word partial? */ - dlwbits = (dx + dw) & 31; - if (dfwpart2b == 1 || dlwbits == 0) { /* if not */ - dlwpartb = 0; - } else { - dlwpartb = 1; - dlwmask = lmask32[dlwbits]; - if (dfwpartb) { - pdlwpart = pdfwpart + 1 + dnfullw; - pslwpart = psfwpart + 1 + dnfullw; - } else { - pdlwpart = datad + dwpl * dy + (dx >> 5) + dnfullw; - pslwpart = datas + swpl * sy + (sx >> 5) + dnfullw; - } - } - - - /*--------------------------------------------------------* - * Now we're ready to do the ops * - *--------------------------------------------------------*/ - switch (op) - { - case PIX_SRC: - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, *psfwpart, dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) - *(pdfwfull + j) = *(psfwfull + j); - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, *pslwpart, dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case PIX_NOT(PIX_SRC): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, ~(*psfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) - *(pdfwfull + j) = ~(*(psfwfull + j)); - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, ~(*pslwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_SRC | PIX_DST): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - (*psfwpart | *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) - *(pdfwfull + j) |= *(psfwfull + j); - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - (*pslwpart | *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_SRC & PIX_DST): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - (*psfwpart & *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) - *(pdfwfull + j) &= *(psfwfull + j); - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - (*pslwpart & *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_SRC ^ PIX_DST): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - (*psfwpart ^ *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) - *(pdfwfull + j) ^= *(psfwfull + j); - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - (*pslwpart ^ *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_NOT(PIX_SRC) | PIX_DST): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - (~(*psfwpart) | *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) - *(pdfwfull + j) |= ~(*(psfwfull + j)); - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - (~(*pslwpart) | *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_NOT(PIX_SRC) & PIX_DST): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - (~(*psfwpart) & *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) - *(pdfwfull + j) &= ~(*(psfwfull + j)); - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - (~(*pslwpart) & *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_SRC | PIX_NOT(PIX_DST)): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - (*psfwpart | ~(*pdfwpart)), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) - *(pdfwfull + j) = *(psfwfull + j) | ~(*(pdfwfull + j)); - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - (*pslwpart | ~(*pdlwpart)), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_SRC & PIX_NOT(PIX_DST)): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - (*psfwpart & ~(*pdfwpart)), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) - *(pdfwfull + j) = *(psfwfull + j) & ~(*(pdfwfull + j)); - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - (*pslwpart & ~(*pdlwpart)), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_NOT(PIX_SRC | PIX_DST)): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - ~(*psfwpart | *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) - *(pdfwfull + j) = ~(*(psfwfull + j) | *(pdfwfull + j)); - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - ~(*pslwpart | *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_NOT(PIX_SRC & PIX_DST)): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - ~(*psfwpart & *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) - *(pdfwfull + j) = ~(*(psfwfull + j) & *(pdfwfull + j)); - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - ~(*pslwpart & *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - /* this is three cases: ~(s ^ d), ~s ^ d, s ^ ~d */ - case (PIX_NOT(PIX_SRC ^ PIX_DST)): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - ~(*psfwpart ^ *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) - *(pdfwfull + j) = ~(*(psfwfull + j) ^ *(pdfwfull + j)); - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - ~(*pslwpart ^ *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - default: - lept_stderr("Operation %x invalid\n", op); - } -} - - -/*--------------------------------------------------------------------* - * Static low-level rasterop without vertical word alignment * - *--------------------------------------------------------------------*/ -/*! - * \brief rasteropGeneralLow() - * - * \param[in] datad ptr to dest image data - * \param[in] dwpl wpl of dest - * \param[in] dx x val of UL corner of dest rectangle - * \param[in] dy y val of UL corner of dest rectangle - * \param[in] dw width of dest rectangle - * \param[in] dh height of dest rectangle - * \param[in] op op code - * \param[in] datas ptr to src image data - * \param[in] swpl wpl of src - * \param[in] sx x val of UL corner of src rectangle - * \param[in] sy y val of UL corner of src rectangle - * \return void - * - * This is called when the src and dest rects are - * do not have the same 32-bit word alignment. - * - * The method is a generalization of rasteropVAlignLow. - * There, the src image pieces were directly merged - * with the dest. Here, we shift the source bits - * to fill words that are aligned with the dest, and - * then use those "source words" exactly in place - * of the source words that were used in rasteropVAlignLow. - * - * The critical parameter is thus the shift required - * for the src. Consider the left edge of the rectangle. - * The overhang into the src and dest words are found, - * and the difference is exactly this shift. There are - * two separate cases, depending on whether the src pixels - * are shifted left or right. If the src overhang is - * larger than the dest overhang, the src is shifted to - * the right, a number of pixels equal to the shift are - * left over for filling the next dest word, if necessary. - * But if the dest overhang is larger than the src, - * the src is shifted to the left, and it may also be - * necessary to shift an equal number of pixels in from - * the next src word. However, in both cases, after - * the first partial or complete dest word has been - * filled, the next src pixels will come from a left - * shift that exhausts the pixels in the src word. - */ -static void -rasteropGeneralLow(l_uint32 *datad, - l_int32 dwpl, - l_int32 dx, - l_int32 dy, - l_int32 dw, - l_int32 dh, - l_int32 op, - l_uint32 *datas, - l_int32 swpl, - l_int32 sx, - l_int32 sy) -{ -l_int32 dfwpartb; /* boolean (1, 0) if first dest word is partial */ -l_int32 dfwpart2b; /* boolean (1, 0) if 1st dest word is doubly partial */ -l_uint32 dfwmask; /* mask for first partial dest word */ -l_int32 dfwbits; /* first word dest bits in overhang; 0-31 */ -l_int32 dhang; /* dest overhang in first partial word, */ - /* or 0 if dest is word aligned (same as dfwbits) */ -l_uint32 *pdfwpart; /* ptr to first partial dest word */ -l_uint32 *psfwpart; /* ptr to first partial src word */ -l_int32 dfwfullb; /* boolean (1, 0) if there exists a full dest word */ -l_int32 dnfullw; /* number of full words in dest */ -l_uint32 *pdfwfull; /* ptr to first full dest word */ -l_uint32 *psfwfull; /* ptr to first full src word */ -l_int32 dlwpartb; /* boolean (1, 0) if last dest word is partial */ -l_uint32 dlwmask; /* mask for last partial dest word */ -l_int32 dlwbits; /* last word dest bits in ovrhang */ -l_uint32 *pdlwpart; /* ptr to last partial dest word */ -l_uint32 *pslwpart; /* ptr to last partial src word */ -l_uint32 sword; /* compose src word aligned with the dest words */ -l_int32 sfwbits; /* first word src bits in overhang (1-32), */ - /* or 32 if src is word aligned */ -l_int32 shang; /* source overhang in the first partial word, */ - /* or 0 if src is word aligned (not same as sfwbits) */ -l_int32 sleftshift; /* bits to shift left for source word to align */ - /* with the dest. Also the number of bits that */ - /* get shifted to the right to align with the dest. */ -l_int32 srightshift; /* bits to shift right for source word to align */ - /* with dest. Also, the number of bits that get */ - /* shifted left to align with the dest. */ -l_int32 srightmask; /* mask for selecting sleftshift bits that have */ - /* been shifted right by srightshift bits */ -l_int32 sfwshiftdir; /* either SHIFT_LEFT or SHIFT_RIGHT */ -l_int32 sfwaddb; /* boolean: do we need an additional sfw right shift? */ -l_int32 slwaddb; /* boolean: do we need an additional slw right shift? */ -l_int32 i, j; - - - /*--------------------------------------------------------* - * Preliminary calculations * - *--------------------------------------------------------*/ - /* To get alignment of src with dst (e.g., in the - * full words) the src must do a left shift of its - * relative overhang in the current src word, - * and OR that with a right shift of - * (31 - relative overhang) from the next src word. - * We find the absolute overhangs, the relative overhangs, - * the required shifts and the src mask */ - if ((sx & 31) == 0) - shang = 0; - else - shang = 32 - (sx & 31); - if ((dx & 31) == 0) - dhang = 0; - else - dhang = 32 - (dx & 31); - - if (shang == 0 && dhang == 0) { /* this should be treated by an - aligned operation, not by - this general rasterop! */ - sleftshift = 0; - srightshift = 0; - srightmask = rmask32[0]; - } else { - if (dhang > shang) - sleftshift = dhang - shang; - else - sleftshift = 32 - (shang - dhang); - srightshift = 32 - sleftshift; - srightmask = rmask32[sleftshift]; - } - - /* is the first dest word partial? */ - dfwmask = 0; - if ((dx & 31) == 0) { /* if not */ - dfwpartb = 0; - dfwbits = 0; - } else { /* if so */ - dfwpartb = 1; - dfwbits = 32 - (dx & 31); - dfwmask = rmask32[dfwbits]; - pdfwpart = datad + dwpl * dy + (dx >> 5); - psfwpart = datas + swpl * sy + (sx >> 5); - sfwbits = 32 - (sx & 31); - if (dfwbits > sfwbits) { - sfwshiftdir = SHIFT_LEFT; /* and shift by sleftshift */ - if (dw < shang) - sfwaddb = 0; - else - sfwaddb = 1; /* and rshift in next src word by srightshift */ - } else { - sfwshiftdir = SHIFT_RIGHT; /* and shift by srightshift */ - } - } - - /* is the first dest word doubly partial? */ - if (dw >= dfwbits) { /* if not */ - dfwpart2b = 0; - } else { /* if so */ - dfwpart2b = 1; - dfwmask &= lmask32[32 - dfwbits + dw]; - } - - /* is there a full dest word? */ - if (dfwpart2b == 1) { /* not */ - dfwfullb = 0; - dnfullw = 0; - } else { - dnfullw = (dw - dfwbits) >> 5; - if (dnfullw == 0) { /* if not */ - dfwfullb = 0; - } else { /* if so */ - dfwfullb = 1; - pdfwfull = datad + dwpl * dy + ((dx + dhang) >> 5); - psfwfull = datas + swpl * sy + ((sx + dhang) >> 5); /* yes, dhang */ - } - } - - /* is the last dest word partial? */ - dlwbits = (dx + dw) & 31; - if (dfwpart2b == 1 || dlwbits == 0) { /* if not */ - dlwpartb = 0; - } else { - dlwpartb = 1; - dlwmask = lmask32[dlwbits]; - pdlwpart = datad + dwpl * dy + ((dx + dhang) >> 5) + dnfullw; - pslwpart = datas + swpl * sy + ((sx + dhang) >> 5) + dnfullw; - if (dlwbits <= srightshift) /* must be <= here !!! */ - slwaddb = 0; /* we got enough bits from current src word */ - else - slwaddb = 1; /* must rshift in next src word by srightshift */ - } - - - /*--------------------------------------------------------* - * Now we're ready to do the ops * - *--------------------------------------------------------*/ - switch (op) - { - case PIX_SRC: - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) - { - if (sfwshiftdir == SHIFT_LEFT) { - sword = *psfwpart << sleftshift; - if (sfwaddb) - sword = COMBINE_PARTIAL(sword, - *(psfwpart + 1) >> srightshift, - srightmask); - } else { /* shift right */ - sword = *psfwpart >> srightshift; - } - - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, sword, dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) { - sword = COMBINE_PARTIAL(*(psfwfull + j) << sleftshift, - *(psfwfull + j + 1) >> srightshift, - srightmask); - *(pdfwfull + j) = sword; - } - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - sword = *pslwpart << sleftshift; - if (slwaddb) - sword = COMBINE_PARTIAL(sword, - *(pslwpart + 1) >> srightshift, - srightmask); - - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, sword, dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case PIX_NOT(PIX_SRC): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) - { - if (sfwshiftdir == SHIFT_LEFT) { - sword = *psfwpart << sleftshift; - if (sfwaddb) - sword = COMBINE_PARTIAL(sword, - *(psfwpart + 1) >> srightshift, - srightmask); - } else { /* shift right */ - sword = *psfwpart >> srightshift; - } - - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, ~sword, dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) { - sword = COMBINE_PARTIAL(*(psfwfull + j) << sleftshift, - *(psfwfull + j + 1) >> srightshift, - srightmask); - *(pdfwfull + j) = ~sword; - } - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - sword = *pslwpart << sleftshift; - if (slwaddb) - sword = COMBINE_PARTIAL(sword, - *(pslwpart + 1) >> srightshift, - srightmask); - - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, ~sword, dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_SRC | PIX_DST): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) - { - if (sfwshiftdir == SHIFT_LEFT) { - sword = *psfwpart << sleftshift; - if (sfwaddb) - sword = COMBINE_PARTIAL(sword, - *(psfwpart + 1) >> srightshift, - srightmask); - } else { /* shift right */ - sword = *psfwpart >> srightshift; - } - - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - (sword | *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) { - sword = COMBINE_PARTIAL(*(psfwfull + j) << sleftshift, - *(psfwfull + j + 1) >> srightshift, - srightmask); - *(pdfwfull + j) |= sword; - } - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - sword = *pslwpart << sleftshift; - if (slwaddb) - sword = COMBINE_PARTIAL(sword, - *(pslwpart + 1) >> srightshift, - srightmask); - - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - (sword | *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_SRC & PIX_DST): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) - { - if (sfwshiftdir == SHIFT_LEFT) { - sword = *psfwpart << sleftshift; - if (sfwaddb) - sword = COMBINE_PARTIAL(sword, - *(psfwpart + 1) >> srightshift, - srightmask); - } else { /* shift right */ - sword = *psfwpart >> srightshift; - } - - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - (sword & *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) { - sword = COMBINE_PARTIAL(*(psfwfull + j) << sleftshift, - *(psfwfull + j + 1) >> srightshift, - srightmask); - *(pdfwfull + j) &= sword; - } - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - sword = *pslwpart << sleftshift; - if (slwaddb) - sword = COMBINE_PARTIAL(sword, - *(pslwpart + 1) >> srightshift, - srightmask); - - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - (sword & *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_SRC ^ PIX_DST): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) - { - if (sfwshiftdir == SHIFT_LEFT) { - sword = *psfwpart << sleftshift; - if (sfwaddb) - sword = COMBINE_PARTIAL(sword, - *(psfwpart + 1) >> srightshift, - srightmask); - } else { /* shift right */ - sword = *psfwpart >> srightshift; - } - - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - (sword ^ *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) { - sword = COMBINE_PARTIAL(*(psfwfull + j) << sleftshift, - *(psfwfull + j + 1) >> srightshift, - srightmask); - *(pdfwfull + j) ^= sword; - } - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - sword = *pslwpart << sleftshift; - if (slwaddb) - sword = COMBINE_PARTIAL(sword, - *(pslwpart + 1) >> srightshift, - srightmask); - - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - (sword ^ *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_NOT(PIX_SRC) | PIX_DST): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) - { - if (sfwshiftdir == SHIFT_LEFT) { - sword = *psfwpart << sleftshift; - if (sfwaddb) - sword = COMBINE_PARTIAL(sword, - *(psfwpart + 1) >> srightshift, - srightmask); - } else { /* shift right */ - sword = *psfwpart >> srightshift; - } - - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - (~sword | *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) { - sword = COMBINE_PARTIAL(*(psfwfull + j) << sleftshift, - *(psfwfull + j + 1) >> srightshift, - srightmask); - *(pdfwfull + j) |= ~sword; - } - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - sword = *pslwpart << sleftshift; - if (slwaddb) - sword = COMBINE_PARTIAL(sword, - *(pslwpart + 1) >> srightshift, - srightmask); - - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - (~sword | *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_NOT(PIX_SRC) & PIX_DST): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) - { - if (sfwshiftdir == SHIFT_LEFT) { - sword = *psfwpart << sleftshift; - if (sfwaddb) - sword = COMBINE_PARTIAL(sword, - *(psfwpart + 1) >> srightshift, - srightmask); - } else { /* shift right */ - sword = *psfwpart >> srightshift; - } - - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - (~sword & *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) { - sword = COMBINE_PARTIAL(*(psfwfull + j) << sleftshift, - *(psfwfull + j + 1) >> srightshift, - srightmask); - *(pdfwfull + j) &= ~sword; - } - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - sword = *pslwpart << sleftshift; - if (slwaddb) - sword = COMBINE_PARTIAL(sword, - *(pslwpart + 1) >> srightshift, - srightmask); - - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - (~sword & *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_SRC | PIX_NOT(PIX_DST)): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) - { - if (sfwshiftdir == SHIFT_LEFT) { - sword = *psfwpart << sleftshift; - if (sfwaddb) - sword = COMBINE_PARTIAL(sword, - *(psfwpart + 1) >> srightshift, - srightmask); - } else { /* shift right */ - sword = *psfwpart >> srightshift; - } - - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - (sword | ~(*pdfwpart)), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) { - sword = COMBINE_PARTIAL(*(psfwfull + j) << sleftshift, - *(psfwfull + j + 1) >> srightshift, - srightmask); - *(pdfwfull + j) = sword | ~(*(pdfwfull + j)); - } - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - sword = *pslwpart << sleftshift; - if (slwaddb) - sword = COMBINE_PARTIAL(sword, - *(pslwpart + 1) >> srightshift, - srightmask); - - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - (sword | ~(*pdlwpart)), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_SRC & PIX_NOT(PIX_DST)): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) - { - if (sfwshiftdir == SHIFT_LEFT) { - sword = *psfwpart << sleftshift; - if (sfwaddb) - sword = COMBINE_PARTIAL(sword, - *(psfwpart + 1) >> srightshift, - srightmask); - } else { /* shift right */ - sword = *psfwpart >> srightshift; - } - - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - (sword & ~(*pdfwpart)), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) { - sword = COMBINE_PARTIAL(*(psfwfull + j) << sleftshift, - *(psfwfull + j + 1) >> srightshift, - srightmask); - *(pdfwfull + j) = sword & ~(*(pdfwfull + j)); - } - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - sword = *pslwpart << sleftshift; - if (slwaddb) - sword = COMBINE_PARTIAL(sword, - *(pslwpart + 1) >> srightshift, - srightmask); - - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - (sword & ~(*pdlwpart)), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_NOT(PIX_SRC | PIX_DST)): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) - { - if (sfwshiftdir == SHIFT_LEFT) { - sword = *psfwpart << sleftshift; - if (sfwaddb) - sword = COMBINE_PARTIAL(sword, - *(psfwpart + 1) >> srightshift, - srightmask); - } else { /* shift right */ - sword = *psfwpart >> srightshift; - } - - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - ~(sword | *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) { - sword = COMBINE_PARTIAL(*(psfwfull + j) << sleftshift, - *(psfwfull + j + 1) >> srightshift, - srightmask); - *(pdfwfull + j) = ~(sword | *(pdfwfull + j)); - } - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - sword = *pslwpart << sleftshift; - if (slwaddb) - sword = COMBINE_PARTIAL(sword, - *(pslwpart + 1) >> srightshift, - srightmask); - - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - ~(sword | *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - case (PIX_NOT(PIX_SRC & PIX_DST)): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) - { - if (sfwshiftdir == SHIFT_LEFT) { - sword = *psfwpart << sleftshift; - if (sfwaddb) - sword = COMBINE_PARTIAL(sword, - *(psfwpart + 1) >> srightshift, - srightmask); - } else { /* shift right */ - sword = *psfwpart >> srightshift; - } - - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - ~(sword & *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) { - sword = COMBINE_PARTIAL(*(psfwfull + j) << sleftshift, - *(psfwfull + j + 1) >> srightshift, - srightmask); - *(pdfwfull + j) = ~(sword & *(pdfwfull + j)); - } - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - sword = *pslwpart << sleftshift; - if (slwaddb) - sword = COMBINE_PARTIAL(sword, - *(pslwpart + 1) >> srightshift, - srightmask); - - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - ~(sword & *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - /* this is three cases: ~(s ^ d), ~s ^ d, s ^ ~d */ - case (PIX_NOT(PIX_SRC ^ PIX_DST)): - /* do the first partial word */ - if (dfwpartb) { - for (i = 0; i < dh; i++) - { - if (sfwshiftdir == SHIFT_LEFT) { - sword = *psfwpart << sleftshift; - if (sfwaddb) - sword = COMBINE_PARTIAL(sword, - *(psfwpart + 1) >> srightshift, - srightmask); - } else { /* shift right */ - sword = *psfwpart >> srightshift; - } - - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, - ~(sword ^ *pdfwpart), dfwmask); - pdfwpart += dwpl; - psfwpart += swpl; - } - } - - /* do the full words */ - if (dfwfullb) { - for (i = 0; i < dh; i++) { - for (j = 0; j < dnfullw; j++) { - sword = COMBINE_PARTIAL(*(psfwfull + j) << sleftshift, - *(psfwfull + j + 1) >> srightshift, - srightmask); - *(pdfwfull + j) = ~(sword ^ *(pdfwfull + j)); - } - pdfwfull += dwpl; - psfwfull += swpl; - } - } - - /* do the last partial word */ - if (dlwpartb) { - for (i = 0; i < dh; i++) { - sword = *pslwpart << sleftshift; - if (slwaddb) - sword = COMBINE_PARTIAL(sword, - *(pslwpart + 1) >> srightshift, - srightmask); - - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, - ~(sword ^ *pdlwpart), dlwmask); - pdlwpart += dwpl; - pslwpart += swpl; - } - } - break; - default: - lept_stderr("Operation %x invalid\n", op); - } -} - - -/*--------------------------------------------------------------------* - * Low level in-place full height vertical block transfer * - *--------------------------------------------------------------------*/ -/*! - * \brief rasteropVipLow() - * - * \param[in] data ptr to image data - * \param[in] pixw width - * \param[in] pixh height - * \param[in] depth depth - * \param[in] wpl wpl - * \param[in] x x val of UL corner of rectangle - * \param[in] w width of rectangle - * \param[in] shift + shifts data downward in vertical column - * \return 0 if OK; 1 on error. - * - *
- * Notes:
- *      (1) This clears the pixels that are left exposed after the
- *          translation.  You can consider them as pixels that are
- *          shifted in from outside the image.  This can be later
- *          overridden by the incolor parameter in higher-level functions
- *          that call this.  For example, for images with depth > 1,
- *          these pixels are cleared to black; to be white they
- *          must later be SET to white.  See, e.g., pixRasteropVip().
- *      (2) This function scales the width to accommodate any depth,
- *          performs clipping, and then does the in-place rasterop.
- * 
- */ -void -rasteropVipLow(l_uint32 *data, - l_int32 pixw, - l_int32 pixh, - l_int32 depth, - l_int32 wpl, - l_int32 x, - l_int32 w, - l_int32 shift) -{ -l_int32 fwpartb; /* boolean (1, 0) if first word is partial */ -l_int32 fwpart2b; /* boolean (1, 0) if first word is doubly partial */ -l_uint32 fwmask; /* mask for first partial word */ -l_int32 fwbits; /* first word bits in ovrhang */ -l_uint32 *pdfwpart; /* ptr to first partial dest word */ -l_uint32 *psfwpart; /* ptr to first partial src word */ -l_int32 fwfullb; /* boolean (1, 0) if there exists a full word */ -l_int32 nfullw; /* number of full words */ -l_uint32 *pdfwfull; /* ptr to first full dest word */ -l_uint32 *psfwfull; /* ptr to first full src word */ -l_int32 lwpartb; /* boolean (1, 0) if last word is partial */ -l_uint32 lwmask; /* mask for last partial word */ -l_int32 lwbits; /* last word bits in ovrhang */ -l_uint32 *pdlwpart; /* ptr to last partial dest word */ -l_uint32 *pslwpart; /* ptr to last partial src word */ -l_int32 dirwpl; /* directed wpl (-wpl * sign(shift)) */ -l_int32 absshift; /* absolute value of shift; for use in iterator */ -l_int32 vlimit; /* vertical limit value for iterations */ -l_int32 i, j; - - - /*--------------------------------------------------------* - * Scale horizontal dimensions by depth * - *--------------------------------------------------------*/ - if (depth != 1) { - pixw *= depth; - x *= depth; - w *= depth; - } - - - /*--------------------------------------------------------* - * Clip horizontally * - *--------------------------------------------------------*/ - if (x < 0) { - w += x; /* reduce w */ - x = 0; /* clip to x = 0 */ - } - if (x >= pixw || w <= 0) /* no part of vertical slice is in the image */ - return; - - if (x + w > pixw) - w = pixw - x; /* clip to x + w = pixw */ - - /*--------------------------------------------------------* - * Preliminary calculations * - *--------------------------------------------------------*/ - /* is the first word partial? */ - if ((x & 31) == 0) { /* if not */ - fwpartb = 0; - fwbits = 0; - } else { /* if so */ - fwpartb = 1; - fwbits = 32 - (x & 31); - fwmask = rmask32[fwbits]; - if (shift >= 0) { /* go up from bottom */ - pdfwpart = data + wpl * (pixh - 1) + (x >> 5); - psfwpart = data + wpl * (pixh - 1 - shift) + (x >> 5); - } else { /* go down from top */ - pdfwpart = data + (x >> 5); - psfwpart = data - wpl * shift + (x >> 5); - } - } - - /* is the first word doubly partial? */ - if (w >= fwbits) { /* if not */ - fwpart2b = 0; - } else { /* if so */ - fwpart2b = 1; - fwmask &= lmask32[32 - fwbits + w]; - } - - /* is there a full dest word? */ - if (fwpart2b == 1) { /* not */ - fwfullb = 0; - nfullw = 0; - } else { - nfullw = (w - fwbits) >> 5; - if (nfullw == 0) { /* if not */ - fwfullb = 0; - } else { /* if so */ - fwfullb = 1; - if (fwpartb) { - pdfwfull = pdfwpart + 1; - psfwfull = psfwpart + 1; - } else if (shift >= 0) { /* go up from bottom */ - pdfwfull = data + wpl * (pixh - 1) + (x >> 5); - psfwfull = data + wpl * (pixh - 1 - shift) + (x >> 5); - } else { /* go down from top */ - pdfwfull = data + (x >> 5); - psfwfull = data - wpl * shift + (x >> 5); - } - } - } - - /* is the last word partial? */ - lwbits = (x + w) & 31; - if (fwpart2b == 1 || lwbits == 0) { /* if not */ - lwpartb = 0; - } else { - lwpartb = 1; - lwmask = lmask32[lwbits]; - if (fwpartb) { - pdlwpart = pdfwpart + 1 + nfullw; - pslwpart = psfwpart + 1 + nfullw; - } else if (shift >= 0) { /* go up from bottom */ - pdlwpart = data + wpl * (pixh - 1) + (x >> 5) + nfullw; - pslwpart = data + wpl * (pixh - 1 - shift) + (x >> 5) + nfullw; - } else { /* go down from top */ - pdlwpart = data + (x >> 5) + nfullw; - pslwpart = data - wpl * shift + (x >> 5) + nfullw; - } - } - - /* determine the direction of flow from the shift - * If the shift >= 0, data flows downard from src - * to dest, starting at the bottom and working up. - * If shift < 0, data flows upward from src to - * dest, starting at the top and working down. */ - dirwpl = (shift >= 0) ? -wpl : wpl; - absshift = L_ABS(shift); - vlimit = L_MAX(0, pixh - absshift); - - - /*--------------------------------------------------------* - * Now we're ready to do the ops * - *--------------------------------------------------------*/ - - /* Do the first partial word */ - if (fwpartb) { - for (i = 0; i < vlimit; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, *psfwpart, fwmask); - pdfwpart += dirwpl; - psfwpart += dirwpl; - } - - /* Clear the incoming pixels */ - for (i = vlimit; i < pixh; i++) { - *pdfwpart = COMBINE_PARTIAL(*pdfwpart, 0x0, fwmask); - pdfwpart += dirwpl; - } - } - - /* Do the full words */ - if (fwfullb) { - for (i = 0; i < vlimit; i++) { - for (j = 0; j < nfullw; j++) - *(pdfwfull + j) = *(psfwfull + j); - pdfwfull += dirwpl; - psfwfull += dirwpl; - } - - /* Clear the incoming pixels */ - for (i = vlimit; i < pixh; i++) { - for (j = 0; j < nfullw; j++) - *(pdfwfull + j) = 0x0; - pdfwfull += dirwpl; - } - } - - /* Do the last partial word */ - if (lwpartb) { - for (i = 0; i < vlimit; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, *pslwpart, lwmask); - pdlwpart += dirwpl; - pslwpart += dirwpl; - } - - /* Clear the incoming pixels */ - for (i = vlimit; i < pixh; i++) { - *pdlwpart = COMBINE_PARTIAL(*pdlwpart, 0x0, lwmask); - pdlwpart += dirwpl; - } - } -} - - - -/*--------------------------------------------------------------------* - * Low level in-place full width horizontal block transfer * - *--------------------------------------------------------------------*/ -/*! - * \brief rasteropHipLow() - * - * \param[in] data ptr to image data - * \param[in] pixh height - * \param[in] depth depth - * \param[in] wpl wpl - * \param[in] y y val of UL corner of rectangle - * \param[in] h height of rectangle - * \param[in] shift + shifts data to the left in a horizontal column - * \return 0 if OK; 1 on error. - * - *
- * Notes:
- *      (1) This clears the pixels that are left exposed after the rasterop.
- *          Therefore, for Pix with depth > 1, these pixels become black,
- *          and must be subsequently SET if they are to be white.
- *          For example, see pixRasteropHip().
- *      (2) This function performs clipping and calls shiftDataHorizontalLow()
- *          to do the in-place rasterop on each line.
- * 
- */ -void -rasteropHipLow(l_uint32 *data, - l_int32 pixh, - l_int32 depth, - l_int32 wpl, - l_int32 y, - l_int32 h, - l_int32 shift) -{ -l_int32 i; -l_uint32 *line; - - /* clip band if necessary */ - if (y < 0) { - h += y; /* reduce h */ - y = 0; /* clip to y = 0 */ - } - if (h <= 0 || y > pixh) /* no part of horizontal slice is in the image */ - return; - - if (y + h > pixh) - h = pixh - y; /* clip to y + h = pixh */ - - for (i = y; i < y + h; i++) { - line = data + i * wpl; - shiftDataHorizontalLow(line, wpl, line, wpl, shift * depth); - } -} - - -/*! - * \brief shiftDataHorizontalLow() - * - * \param[in] datad ptr to beginning of dest line - * \param[in] wpld wpl of dest - * \param[in] datas ptr to beginning of src line - * \param[in] wpls wpl of src - * \param[in] shift horizontal shift of block; >0 is to right - * \return void - * - *
- * Notes:
- *      (1) This can also be used for in-place operation; see, e.g.,
- *          rasteropHipLow().
- *      (2) We are clearing the pixels that are shifted in from
- *          outside the image.  This can be overridden by the
- *          incolor parameter in higher-level functions that call this.
- * 
- */ -static void -shiftDataHorizontalLow(l_uint32 *datad, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_int32 shift) -{ -l_int32 j, firstdw, wpl, rshift, lshift; -l_uint32 *lined, *lines; - - lined = datad; - lines = datas; - - if (shift >= 0) { /* src shift to right; data flows to - * right, starting at right edge and - * progressing leftward. */ - firstdw = shift / 32; - wpl = L_MIN(wpls, wpld - firstdw); - lined += firstdw + wpl - 1; - lines += wpl - 1; - rshift = shift & 31; - if (rshift == 0) { - for (j = 0; j < wpl; j++) - *lined-- = *lines--; - - /* clear out the rest to the left edge */ - for (j = 0; j < firstdw; j++) - *lined-- = 0; - } else { - lshift = 32 - rshift; - for (j = 1; j < wpl; j++) { - *lined-- = *(lines - 1) << lshift | *lines >> rshift; - lines--; - } - *lined = *lines >> rshift; /* partial first */ - - /* clear out the rest to the left edge */ - *lined &= ~lmask32[rshift]; - lined--; - for (j = 0; j < firstdw; j++) - *lined-- = 0; - } - } else { /* src shift to left; data flows to left, starting - * at left edge and progressing rightward. */ - firstdw = (-shift) / 32; - wpl = L_MIN(wpls - firstdw, wpld); - lines += firstdw; - lshift = (-shift) & 31; - if (lshift == 0) { - for (j = 0; j < wpl; j++) - *lined++ = *lines++; - - /* clear out the rest to the right edge */ - for (j = 0; j < firstdw; j++) - *lined++ = 0; - } else { - rshift = 32 - lshift; - for (j = 1; j < wpl; j++) { - *lined++ = *lines << lshift | *(lines + 1) >> rshift; - lines++; - } - *lined = *lines << lshift; /* partial last */ - - /* clear out the rest to the right edge */ - /* first clear the lshift pixels of this partial word */ - *lined &= ~rmask32[lshift]; - lined++; - /* then the remaining words to the right edge */ - for (j = 0; j < firstdw; j++) - *lined++ = 0; - } - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rotate.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rotate.c deleted file mode 100644 index 6fcdb1cc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rotate.c +++ /dev/null @@ -1,598 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file rotate.c - *
- *
- *     General rotation about image center
- *              PIX     *pixRotate()
- *              PIX     *pixEmbedForRotation()
- *
- *     General rotation by sampling
- *              PIX     *pixRotateBySampling()
- *
- *     Nice (slow) rotation of 1 bpp image
- *              PIX     *pixRotateBinaryNice()
- *
- *     Rotation including alpha (blend) component
- *              PIX     *pixRotateWithAlpha()
- *
- *     Rotations are measured in radians; clockwise is positive.
- *
- *     The general rotation pixRotate() does the best job for
- *     rotating about the image center.  For 1 bpp, it uses shear;
- *     for others, it uses either shear or area mapping.
- *     If requested, it expands the output image so that no pixels are lost
- *     in the rotation, and this can be done on multiple successive shears
- *     without expanding beyond the maximum necessary size.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -extern l_float32 AlphaMaskBorderVals[2]; -static const l_float32 MinAngleToRotate = 0.001; /* radians; ~0.06 deg */ -static const l_float32 Max1BppShearAngle = 0.06; /* radians; ~3 deg */ -static const l_float32 LimitShearAngle = 0.35; /* radians; ~20 deg */ - -/*------------------------------------------------------------------* - * General rotation about the center * - *------------------------------------------------------------------*/ -/*! - * \brief pixRotate() - * - * \param[in] pixs 1, 2, 4, 8, 32 bpp rgb - * \param[in] angle radians; clockwise is positive - * \param[in] type L_ROTATE_AREA_MAP, L_ROTATE_SHEAR, L_ROTATE_SAMPLING - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \param[in] width original width; use 0 to avoid embedding - * \param[in] height original height; use 0 to avoid embedding - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This is a high-level, simple interface for rotating images
- *          about their center.
- *      (2) For very small rotations, just return a clone.
- *      (3) Rotation brings either white or black pixels in
- *          from outside the image.
- *      (4) The rotation type is adjusted if necessary for the image
- *          depth and size of rotation angle.  For 1 bpp images, we
- *          rotate either by shear or sampling.
- *      (5) Colormaps are removed for rotation by area mapping.
- *      (6) The dest can be expanded so that no image pixels
- *          are lost.  To invoke expansion, input the original
- *          width and height.  For repeated rotation, use of the
- *          original width and height allows the expansion to
- *          stop at the maximum required size, which is a square
- *          with side = sqrt(w*w + h*h).
- * 
- */ -PIX * -pixRotate(PIX *pixs, - l_float32 angle, - l_int32 type, - l_int32 incolor, - l_int32 width, - l_int32 height) -{ -l_int32 w, h, d; -l_uint32 fillval; -PIX *pix1, *pix2, *pix3, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixRotate"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (type != L_ROTATE_SHEAR && type != L_ROTATE_AREA_MAP && - type != L_ROTATE_SAMPLING) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - - if (L_ABS(angle) < MinAngleToRotate) - return pixClone(pixs); - - /* Adjust rotation type if necessary: - * - If d == 1 bpp and the angle is more than about 6 degrees, - * rotate by sampling; otherwise rotate by shear. - * - If d > 1, only allow shear rotation up to about 20 degrees; - * beyond that, default a shear request to sampling. */ - if (pixGetDepth(pixs) == 1) { - if (L_ABS(angle) > Max1BppShearAngle) { - if (type != L_ROTATE_SAMPLING) - L_INFO("1 bpp, large angle; rotate by sampling\n", procName); - type = L_ROTATE_SAMPLING; - } else if (type != L_ROTATE_SHEAR) { - L_INFO("1 bpp; rotate by shear\n", procName); - type = L_ROTATE_SHEAR; - } - } else if (L_ABS(angle) > LimitShearAngle && type == L_ROTATE_SHEAR) { - L_INFO("large angle; rotate by sampling\n", procName); - type = L_ROTATE_SAMPLING; - } - - /* Remove colormap if we rotate by area mapping. */ - cmap = pixGetColormap(pixs); - if (cmap && type == L_ROTATE_AREA_MAP) - pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - else - pix1 = pixClone(pixs); - cmap = pixGetColormap(pix1); - - /* Otherwise, if there is a colormap and we're not embedding, - * add white color if it doesn't exist. */ - if (cmap && width == 0) { /* no embedding; generate %incolor */ - if (incolor == L_BRING_IN_BLACK) - pixcmapAddBlackOrWhite(cmap, 0, NULL); - else /* L_BRING_IN_WHITE */ - pixcmapAddBlackOrWhite(cmap, 1, NULL); - } - - /* Request to embed in a larger image; do if necessary */ - pix2 = pixEmbedForRotation(pix1, angle, incolor, width, height); - - /* Area mapping requires 8 or 32 bpp. If less than 8 bpp and - * area map rotation is requested, convert to 8 bpp. */ - d = pixGetDepth(pix2); - if (type == L_ROTATE_AREA_MAP && d < 8) - pix3 = pixConvertTo8(pix2, FALSE); - else - pix3 = pixClone(pix2); - - /* Do the rotation: shear, sampling or area mapping */ - pixGetDimensions(pix3, &w, &h, &d); - if (type == L_ROTATE_SHEAR) { - pixd = pixRotateShearCenter(pix3, angle, incolor); - } else if (type == L_ROTATE_SAMPLING) { - pixd = pixRotateBySampling(pix3, w / 2, h / 2, angle, incolor); - } else { /* rotate by area mapping */ - fillval = 0; - if (incolor == L_BRING_IN_WHITE) { - if (d == 8) - fillval = 255; - else /* d == 32 */ - fillval = 0xffffff00; - } - if (d == 8) - pixd = pixRotateAMGray(pix3, angle, fillval); - else /* d == 32 */ - pixd = pixRotateAMColor(pix3, angle, fillval); - } - - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - return pixd; -} - - -/*! - * \brief pixEmbedForRotation() - * - * \param[in] pixs 1, 2, 4, 8, 32 bpp rgb - * \param[in] angle radians; clockwise is positive - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \param[in] width original width; use 0 to avoid embedding - * \param[in] height original height; use 0 to avoid embedding - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) For very small rotations, just return a clone.
- *      (2) Generate larger image to embed pixs if necessary, and
- *          place the center of the input image in the center.
- *      (3) Rotation brings either white or black pixels in
- *          from outside the image.  For colormapped images where
- *          there is no white or black, a new color is added if
- *          possible for these pixels; otherwise, either the
- *          lightest or darkest color is used.  In most cases,
- *          the colormap will be removed prior to rotation.
- *      (4) The dest is to be expanded so that no image pixels
- *          are lost after rotation.  Input of the original width
- *          and height allows the expansion to stop at the maximum
- *          required size, which is a square with side equal to
- *          sqrt(w*w + h*h).
- *      (5) For an arbitrary angle, the expansion can be found by
- *          considering the UL and UR corners.  As the image is
- *          rotated, these move in an arc centered at the center of
- *          the image.  Normalize to a unit circle by dividing by half
- *          the image diagonal.  After a rotation of T radians, the UL
- *          and UR corners are at points T radians along the unit
- *          circle.  Compute the x and y coordinates of both these
- *          points and take the max of absolute values; these represent
- *          the half width and half height of the containing rectangle.
- *          The arithmetic is done using formulas for sin(a+b) and cos(a+b),
- *          where b = T.  For the UR corner, sin(a) = h/d and cos(a) = w/d.
- *          For the UL corner, replace a by (pi - a), and you have
- *          sin(pi - a) = h/d, cos(pi - a) = -w/d.  The equations
- *          given below follow directly.
- * 
- */ -PIX * -pixEmbedForRotation(PIX *pixs, - l_float32 angle, - l_int32 incolor, - l_int32 width, - l_int32 height) -{ -l_int32 w, h, d, w1, h1, w2, h2, maxside, wnew, hnew, xoff, yoff, setcolor; -l_float64 sina, cosa, fw, fh; -PIX *pixd; - - PROCNAME("pixEmbedForRotation"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - if (L_ABS(angle) < MinAngleToRotate) - return pixClone(pixs); - - /* Test if big enough to hold any rotation of the original image */ - pixGetDimensions(pixs, &w, &h, &d); - maxside = (l_int32)(sqrt((l_float64)(width * width) + - (l_float64)(height * height)) + 0.5); - if (w >= maxside && h >= maxside) /* big enough */ - return pixClone(pixs); - - /* Find the new sizes required to hold the image after rotation. - * Note that the new dimensions must be at least as large as those - * of pixs, because we're rasterop-ing into it before rotation. */ - cosa = cos(angle); - sina = sin(angle); - fw = (l_float64)w; - fh = (l_float64)h; - w1 = (l_int32)(L_ABS(fw * cosa - fh * sina) + 0.5); - w2 = (l_int32)(L_ABS(-fw * cosa - fh * sina) + 0.5); - h1 = (l_int32)(L_ABS(fw * sina + fh * cosa) + 0.5); - h2 = (l_int32)(L_ABS(-fw * sina + fh * cosa) + 0.5); - wnew = L_MAX(w, L_MAX(w1, w2)); - hnew = L_MAX(h, L_MAX(h1, h2)); - - if ((pixd = pixCreate(wnew, hnew, d)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyColormap(pixd, pixs); - pixCopySpp(pixd, pixs); - pixCopyText(pixd, pixs); - xoff = (wnew - w) / 2; - yoff = (hnew - h) / 2; - - /* Set background to color to be rotated in */ - setcolor = (incolor == L_BRING_IN_BLACK) ? L_SET_BLACK : L_SET_WHITE; - pixSetBlackOrWhite(pixd, setcolor); - - /* Rasterop automatically handles all 4 channels for rgba */ - pixRasterop(pixd, xoff, yoff, w, h, PIX_SRC, pixs, 0, 0); - return pixd; -} - - -/*------------------------------------------------------------------* - * General rotation by sampling * - *------------------------------------------------------------------*/ -/*! - * \brief pixRotateBySampling() - * - * \param[in] pixs 1, 2, 4, 8, 16, 32 bpp rgb; can be cmapped - * \param[in] xcen x value of center of rotation - * \param[in] ycen y value of center of rotation - * \param[in] angle radians; clockwise is positive - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) For very small rotations, just return a clone.
- *      (2) Rotation brings either white or black pixels in
- *          from outside the image.
- *      (3) Colormaps are retained.
- * 
- */ -PIX * -pixRotateBySampling(PIX *pixs, - l_int32 xcen, - l_int32 ycen, - l_float32 angle, - l_int32 incolor) -{ -l_int32 w, h, d, i, j, x, y, xdif, ydif, wm1, hm1, wpld; -l_uint32 val; -l_float32 sina, cosa; -l_uint32 *datad, *lined; -void **lines; -PIX *pixd; - - PROCNAME("pixRotateBySampling"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("invalid depth", procName, NULL); - - if (L_ABS(angle) < MinAngleToRotate) - return pixClone(pixs); - - if ((pixd = pixCreateTemplateNoInit(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixSetBlackOrWhite(pixd, incolor); - - sina = sin(angle); - cosa = cos(angle); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - wm1 = w - 1; - hm1 = h - 1; - lines = pixGetLinePtrs(pixs, NULL); - - /* Treat 1 bpp case specially */ - if (d == 1) { - for (i = 0; i < h; i++) { /* scan over pixd */ - lined = datad + i * wpld; - ydif = ycen - i; - for (j = 0; j < w; j++) { - xdif = xcen - j; - x = xcen + (l_int32)(-xdif * cosa - ydif * sina); - if (x < 0 || x > wm1) continue; - y = ycen + (l_int32)(-ydif * cosa + xdif * sina); - if (y < 0 || y > hm1) continue; - if (incolor == L_BRING_IN_WHITE) { - if (GET_DATA_BIT(lines[y], x)) - SET_DATA_BIT(lined, j); - } else { - if (!GET_DATA_BIT(lines[y], x)) - CLEAR_DATA_BIT(lined, j); - } - } - } - LEPT_FREE(lines); - return pixd; - } - - for (i = 0; i < h; i++) { /* scan over pixd */ - lined = datad + i * wpld; - ydif = ycen - i; - for (j = 0; j < w; j++) { - xdif = xcen - j; - x = xcen + (l_int32)(-xdif * cosa - ydif * sina); - if (x < 0 || x > wm1) continue; - y = ycen + (l_int32)(-ydif * cosa + xdif * sina); - if (y < 0 || y > hm1) continue; - switch (d) - { - case 8: - val = GET_DATA_BYTE(lines[y], x); - SET_DATA_BYTE(lined, j, val); - break; - case 32: - val = GET_DATA_FOUR_BYTES(lines[y], x); - SET_DATA_FOUR_BYTES(lined, j, val); - break; - case 2: - val = GET_DATA_DIBIT(lines[y], x); - SET_DATA_DIBIT(lined, j, val); - break; - case 4: - val = GET_DATA_QBIT(lines[y], x); - SET_DATA_QBIT(lined, j, val); - break; - case 16: - val = GET_DATA_TWO_BYTES(lines[y], x); - SET_DATA_TWO_BYTES(lined, j, val); - break; - default: - return (PIX *)ERROR_PTR("invalid depth", procName, NULL); - } - } - } - - LEPT_FREE(lines); - return pixd; -} - - -/*------------------------------------------------------------------* - * Nice (slow) rotation of 1 bpp image * - *------------------------------------------------------------------*/ -/*! - * \brief pixRotateBinaryNice() - * - * \param[in] pixs 1 bpp - * \param[in] angle radians; clockwise is positive; about the center - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) For very small rotations, just return a clone.
- *      (2) This does a computationally expensive rotation of 1 bpp images.
- *          The fastest rotators (using shears or subsampling) leave
- *          visible horizontal and vertical shear lines across which
- *          the image shear changes by one pixel.  To ameliorate the
- *          visual effect one can introduce random dithering.  One
- *          way to do this in a not-too-random fashion is given here.
- *          We convert to 8 bpp, do a very small blur, rotate using
- *          linear interpolation (same as area mapping), do a
- *          small amount of sharpening to compensate for the initial
- *          blur, and threshold back to binary.  The shear lines
- *          are magically removed.
- *      (3) This operation is about 5x slower than rotation by sampling.
- * 
- */ -PIX * -pixRotateBinaryNice(PIX *pixs, - l_float32 angle, - l_int32 incolor) -{ -PIX *pix1, *pix2, *pix3, *pix4, *pixd; - - PROCNAME("pixRotateBinaryNice"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - - pix1 = pixConvertTo8(pixs, 0); - pix2 = pixBlockconv(pix1, 1, 1); /* smallest blur allowed */ - pix3 = pixRotateAM(pix2, angle, incolor); - pix4 = pixUnsharpMasking(pix3, 1, 1.0); /* sharpen a bit */ - pixd = pixThresholdToBinary(pix4, 128); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - pixDestroy(&pix4); - return pixd; -} - - -/*------------------------------------------------------------------* - * Rotation including alpha (blend) component * - *------------------------------------------------------------------*/ -/*! - * \brief pixRotateWithAlpha() - * - * \param[in] pixs 32 bpp rgb or cmapped - * \param[in] angle radians; clockwise is positive - * \param[in] pixg [optional] 8 bpp, can be null - * \param[in] fract between 0.0 and 1.0, with 0.0 fully transparent - * and 1.0 fully opaque - * \return pixd 32 bpp rgba, or NULL on error - * - *
- * Notes:
- *      (1) The alpha channel is transformed separately from pixs,
- *          and aligns with it, being fully transparent outside the
- *          boundary of the transformed pixs.  For pixels that are fully
- *          transparent, a blending function like pixBlendWithGrayMask()
- *          will give zero weight to corresponding pixels in pixs.
- *      (2) Rotation is about the center of the image; for very small
- *          rotations, just return a clone.  The dest is automatically
- *          expanded so that no image pixels are lost.
- *      (3) Rotation is by area mapping.  It doesn't matter what
- *          color is brought in because the alpha channel will
- *          be transparent (black) there.
- *      (4) If pixg is NULL, it is generated as an alpha layer that is
- *          partially opaque, using %fract.  Otherwise, it is cropped
- *          to pixs if required and %fract is ignored.  The alpha
- *          channel in pixs is never used.
- *      (4) Colormaps are removed to 32 bpp.
- *      (5) The default setting for the border values in the alpha channel
- *          is 0 (transparent) for the outermost ring of pixels and
- *          (0.5 * fract * 255) for the second ring.  When blended over
- *          a second image, this
- *          (a) shrinks the visible image to make a clean overlap edge
- *              with an image below, and
- *          (b) softens the edges by weakening the aliasing there.
- *          Use l_setAlphaMaskBorder() to change these values.
- *      (6) A subtle use of gamma correction is to remove gamma correction
- *          before rotation and restore it afterwards.  This is done
- *          by sandwiching this function between a gamma/inverse-gamma
- *          photometric transform:
- *              pixt = pixGammaTRCWithAlpha(NULL, pixs, 1.0 / gamma, 0, 255);
- *              pixd = pixRotateWithAlpha(pixt, angle, NULL, fract);
- *              pixGammaTRCWithAlpha(pixd, pixd, gamma, 0, 255);
- *              pixDestroy(&pixt);
- *          This has the side-effect of producing artifacts in the very
- *          dark regions.
- * 
- */ -PIX * -pixRotateWithAlpha(PIX *pixs, - l_float32 angle, - PIX *pixg, - l_float32 fract) -{ -l_int32 ws, hs, d, spp; -PIX *pixd, *pix32, *pixg2, *pixgr; - - PROCNAME("pixRotateWithAlpha"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &ws, &hs, &d); - if (d != 32 && pixGetColormap(pixs) == NULL) - return (PIX *)ERROR_PTR("pixs not cmapped or 32 bpp", procName, NULL); - if (pixg && pixGetDepth(pixg) != 8) { - L_WARNING("pixg not 8 bpp; using 'fract' transparent alpha\n", - procName); - pixg = NULL; - } - if (!pixg && (fract < 0.0 || fract > 1.0)) { - L_WARNING("invalid fract; using fully opaque\n", procName); - fract = 1.0; - } - if (!pixg && fract == 0.0) - L_WARNING("transparent alpha; image will not be blended\n", procName); - - /* Make sure input to rotation is 32 bpp rgb, and rotate it */ - if (d != 32) - pix32 = pixConvertTo32(pixs); - else - pix32 = pixClone(pixs); - spp = pixGetSpp(pix32); - pixSetSpp(pix32, 3); /* ignore the alpha channel for the rotation */ - pixd = pixRotate(pix32, angle, L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, ws, hs); - pixSetSpp(pix32, spp); /* restore initial value in case it's a clone */ - pixDestroy(&pix32); - - /* Set up alpha layer with a fading border and rotate it */ - if (!pixg) { - pixg2 = pixCreate(ws, hs, 8); - if (fract == 1.0) - pixSetAll(pixg2); - else if (fract > 0.0) - pixSetAllArbitrary(pixg2, (l_int32)(255.0 * fract)); - } else { - pixg2 = pixResizeToMatch(pixg, NULL, ws, hs); - } - if (ws > 10 && hs > 10) { /* see note 8 */ - pixSetBorderRingVal(pixg2, 1, - (l_int32)(255.0 * fract * AlphaMaskBorderVals[0])); - pixSetBorderRingVal(pixg2, 2, - (l_int32)(255.0 * fract * AlphaMaskBorderVals[1])); - } - pixgr = pixRotate(pixg2, angle, L_ROTATE_AREA_MAP, - L_BRING_IN_BLACK, ws, hs); - - /* Combine into a 4 spp result */ - pixSetRGBComponent(pixd, pixgr, L_ALPHA_CHANNEL); - - pixDestroy(&pixg2); - pixDestroy(&pixgr); - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rotateam.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rotateam.c deleted file mode 100644 index a8ef5d73..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rotateam.c +++ /dev/null @@ -1,1132 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file rotateam.c - *
- *
- *     Grayscale and color rotation for area mapping (== interpolation)
- *
- *         Rotation about the image center
- *                PIX         *pixRotateAM()
- *                PIX         *pixRotateAMColor()
- *                PIX         *pixRotateAMGray()
- *                static void  rotateAMColorLow()
- *                static void  rotateAMGrayLow()
- *
- *         Rotation about the UL corner of the image
- *                PIX         *pixRotateAMCorner()
- *                PIX         *pixRotateAMColorCorner()
- *                PIX         *pixRotateAMGrayCorner()
- *                static void  rotateAMColorCornerLow()
- *                static void  rotateAMGrayCornerLow()
- *
- *         Faster color rotation about the image center
- *                PIX         *pixRotateAMColorFast()
- *                static void  rotateAMColorFastLow()
- *
- *     Rotations are measured in radians; clockwise is positive.
- *
- *     The basic area mapping grayscale rotation works on 8 bpp images.
- *     For color, the same method is applied to each color separately.
- *     This can be done in two ways: (1) as here, computing each dest
- *     rgb pixel from the appropriate four src rgb pixels, or (2) separating
- *     the color image into three 8 bpp images, rotate each of these,
- *     and then combine the result.  Method (1) is about 2.5x faster.
- *     We have also implemented a fast approximation for color area-mapping
- *     rotation (pixRotateAMColorFast()), which is about 25% faster
- *     than the standard color rotator.  If you need the extra speed,
- *     use it.
- *
- *     Area mapping works as follows.  For each dest
- *     pixel you find the 4 source pixels that it partially
- *     covers.  You then compute the dest pixel value as
- *     the area-weighted average of those 4 source pixels.
- *     We make two simplifying approximations:
- *
- *       ~  For simplicity, compute the areas as if the dest
- *          pixel were translated but not rotated.
- *
- *       ~  Compute area overlaps on a discrete sub-pixel grid.
- *          Because we are using 8 bpp images with 256 levels,
- *          it is convenient to break each pixel into a
- *          16x16 sub-pixel grid, and count the number of
- *          overlapped sub-pixels.
- *
- *     It is interesting to note that the digital filter that
- *     implements the area mapping algorithm for rotation
- *     is identical to the digital filter used for linear
- *     interpolation when arbitrarily scaling grayscale images.
- *
- *     The advantage of area mapping over pixel sampling
- *     in grayscale rotation is that the former naturally
- *     blurs sharp edges ("anti-aliasing"), so that stair-step
- *     artifacts are not introduced.  The disadvantage is that
- *     it is significantly slower.
- *
- *     But it is still pretty fast.  With standard 3 GHz hardware,
- *     the anti-aliased (area-mapped) color rotation speed is
- *     about 15 million pixels/sec.
- *
- *     The function pixRotateAMColorFast() is about 10-20% faster
- *     than pixRotateAMColor().  The quality is slightly worse,
- *     and if you make many successive small rotations, with a
- *     total angle of 360 degrees, it has been noted that the
- *     center wanders -- it seems to be doing a 1 pixel translation
- *     in addition to the rotation.
- *
- *     Consider again the comparison of image quality between sampling
- *     and area mapping.  With sampling, sharp edges such as found in
- *     text images remain sharp.  However, sampling artifacts such as
- *     characters randomly bouncing up and down by one pixel, or
- *     one pixel horizontal shear lines going through a line of text
- *     (causing the characters to look like badly rendered italic),
- *     are highly visible.  It does not help to sample the source pixel
- *     with the largest area covering each dest pixel; the result has
- *     the same ugly sampling artifacts.
- *
- *     With area mapping, these annoying artifacts are avoided, but the
- *     blurring of edges makes small text a bit more difficult to read.
- *     However, if you are willing to do more computation, you can have
- *     the best of both worlds: no sampling artifacts and sharp edges.
- *     Use area mapping to avoid sampling issues, and follow it with
- *     unsharp masking.  Experiment with the sharpening parameters.
- *     I have found that a small amount of sharpening is sufficient to
- *     restore the sharp edges in text; e.g.,
- *         pix2 = pixUnsharpMasking(pix1, 1, 0.3);
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include /* required for sin and tan */ -#include "allheaders.h" - -static void rotateAMColorLow(l_uint32 *datad, l_int32 w, l_int32 h, - l_int32 wpld, l_uint32 *datas, l_int32 wpls, - l_float32 angle, l_uint32 colorval); -static void rotateAMGrayLow(l_uint32 *datad, l_int32 w, l_int32 h, - l_int32 wpld, l_uint32 *datas, l_int32 wpls, - l_float32 angle, l_uint8 grayval); -static void rotateAMColorCornerLow(l_uint32 *datad, l_int32 w, l_int32 h, - l_int32 wpld, l_uint32 *datas, - l_int32 wpls, l_float32 angle, - l_uint32 colorval); -static void rotateAMGrayCornerLow(l_uint32 *datad, l_int32 w, l_int32 h, - l_int32 wpld, l_uint32 *datas, l_int32 wpls, - l_float32 angle, l_uint8 grayval); - -static void rotateAMColorFastLow(l_uint32 *datad, l_int32 w, l_int32 h, - l_int32 wpld, l_uint32 *datas, l_int32 wpls, - l_float32 angle, l_uint32 colorval); - -static const l_float32 MinAngleToRotate = 0.001; /* radians; ~0.06 deg */ - - -/*------------------------------------------------------------------* - * Rotation about the center * - *------------------------------------------------------------------*/ -/*! - * \brief pixRotateAM() - * - * \param[in] pixs 2, 4, 8 bpp gray or colormapped, or 32 bpp RGB - * \param[in] angle radians; clockwise is positive - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Rotates about image center.
- *      (2) A positive angle gives a clockwise rotation.
- *      (3) Brings in either black or white pixels from the boundary.
- * 
- */ -PIX * -pixRotateAM(PIX *pixs, - l_float32 angle, - l_int32 incolor) -{ -l_int32 d; -l_uint32 fillval; -PIX *pixt1, *pixt2, *pixd; - - PROCNAME("pixRotateAM"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) == 1) - return (PIX *)ERROR_PTR("pixs is 1 bpp", procName, NULL); - - if (L_ABS(angle) < MinAngleToRotate) - return pixClone(pixs); - - /* Remove cmap if it exists, and unpack to 8 bpp if necessary */ - pixt1 = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - d = pixGetDepth(pixt1); - if (d < 8) - pixt2 = pixConvertTo8(pixt1, FALSE); - else - pixt2 = pixClone(pixt1); - d = pixGetDepth(pixt2); - - /* Compute actual incoming color */ - fillval = 0; - if (incolor == L_BRING_IN_WHITE) { - if (d == 8) - fillval = 255; - else /* d == 32 */ - fillval = 0xffffff00; - } - - if (d == 8) - pixd = pixRotateAMGray(pixt2, angle, fillval); - else /* d == 32 */ - pixd = pixRotateAMColor(pixt2, angle, fillval); - - pixDestroy(&pixt1); - pixDestroy(&pixt2); - return pixd; -} - - -/*! - * \brief pixRotateAMColor() - * - * \param[in] pixs 32 bpp - * \param[in] angle radians; clockwise is positive - * \param[in] colorval e.g., 0 to bring in BLACK, 0xffffff00 for WHITE - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Rotates about image center.
- *      (2) A positive angle gives a clockwise rotation.
- *      (3) Specify the color to be brought in from outside the image.
- * 
- */ -PIX * -pixRotateAMColor(PIX *pixs, - l_float32 angle, - l_uint32 colorval) -{ -l_int32 w, h, wpls, wpld; -l_uint32 *datas, *datad; -PIX *pix1, *pix2, *pixd; - - PROCNAME("pixRotateAMColor"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs must be 32 bpp", procName, NULL); - - if (L_ABS(angle) < MinAngleToRotate) - return pixClone(pixs); - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreateTemplate(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - rotateAMColorLow(datad, w, h, wpld, datas, wpls, angle, colorval); - if (pixGetSpp(pixs) == 4) { - pix1 = pixGetRGBComponent(pixs, L_ALPHA_CHANNEL); - pix2 = pixRotateAMGray(pix1, angle, 255); /* bring in opaque */ - pixSetRGBComponent(pixd, pix2, L_ALPHA_CHANNEL); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - return pixd; -} - - -/*! - * \brief pixRotateAMGray() - * - * \param[in] pixs 8 bpp - * \param[in] angle radians; clockwise is positive - * \param[in] grayval 0 to bring in BLACK, 255 for WHITE - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Rotates about image center.
- *      (2) A positive angle gives a clockwise rotation.
- *      (3) Specify the grayvalue to be brought in from outside the image.
- * 
- */ -PIX * -pixRotateAMGray(PIX *pixs, - l_float32 angle, - l_uint8 grayval) -{ -l_int32 w, h, wpls, wpld; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixRotateAMGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs must be 8 bpp", procName, NULL); - - if (L_ABS(angle) < MinAngleToRotate) - return pixClone(pixs); - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreateTemplate(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - rotateAMGrayLow(datad, w, h, wpld, datas, wpls, angle, grayval); - - return pixd; -} - - -static void -rotateAMColorLow(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_float32 angle, - l_uint32 colorval) -{ -l_int32 i, j, xcen, ycen, wm2, hm2; -l_int32 xdif, ydif, xpm, ypm, xp, yp, xf, yf; -l_int32 rval, gval, bval; -l_uint32 word00, word01, word10, word11; -l_uint32 *lines, *lined; -l_float32 sina, cosa; - - xcen = w / 2; - wm2 = w - 2; - ycen = h / 2; - hm2 = h - 2; - sina = 16. * sin(angle); - cosa = 16. * cos(angle); - - for (i = 0; i < h; i++) { - ydif = ycen - i; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - xdif = xcen - j; - xpm = (l_int32)(-xdif * cosa - ydif * sina); - ypm = (l_int32)(-ydif * cosa + xdif * sina); - xp = xcen + (xpm >> 4); - yp = ycen + (ypm >> 4); - xf = xpm & 0x0f; - yf = ypm & 0x0f; - - /* if off the edge, write input colorval */ - if (xp < 0 || yp < 0 || xp > wm2 || yp > hm2) { - *(lined + j) = colorval; - continue; - } - - lines = datas + yp * wpls; - - /* do area weighting. Without this, we would - * simply do: - * *(lined + j) = *(lines + xp); - * which is faster but gives lousy results! - */ - word00 = *(lines + xp); - word10 = *(lines + xp + 1); - word01 = *(lines + wpls + xp); - word11 = *(lines + wpls + xp + 1); - rval = ((16 - xf) * (16 - yf) * ((word00 >> L_RED_SHIFT) & 0xff) + - xf * (16 - yf) * ((word10 >> L_RED_SHIFT) & 0xff) + - (16 - xf) * yf * ((word01 >> L_RED_SHIFT) & 0xff) + - xf * yf * ((word11 >> L_RED_SHIFT) & 0xff) + 128) / 256; - gval = ((16 - xf) * (16 - yf) * ((word00 >> L_GREEN_SHIFT) & 0xff) + - xf * (16 - yf) * ((word10 >> L_GREEN_SHIFT) & 0xff) + - (16 - xf) * yf * ((word01 >> L_GREEN_SHIFT) & 0xff) + - xf * yf * ((word11 >> L_GREEN_SHIFT) & 0xff) + 128) / 256; - bval = ((16 - xf) * (16 - yf) * ((word00 >> L_BLUE_SHIFT) & 0xff) + - xf * (16 - yf) * ((word10 >> L_BLUE_SHIFT) & 0xff) + - (16 - xf) * yf * ((word01 >> L_BLUE_SHIFT) & 0xff) + - xf * yf * ((word11 >> L_BLUE_SHIFT) & 0xff) + 128) / 256; - composeRGBPixel(rval, gval, bval, lined + j); - } - } -} - - -static void -rotateAMGrayLow(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_float32 angle, - l_uint8 grayval) -{ -l_int32 i, j, xcen, ycen, wm2, hm2; -l_int32 xdif, ydif, xpm, ypm, xp, yp, xf, yf; -l_int32 v00, v01, v10, v11; -l_uint8 val; -l_uint32 *lines, *lined; -l_float32 sina, cosa; - - xcen = w / 2; - wm2 = w - 2; - ycen = h / 2; - hm2 = h - 2; - sina = 16. * sin(angle); - cosa = 16. * cos(angle); - - for (i = 0; i < h; i++) { - ydif = ycen - i; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - xdif = xcen - j; - xpm = (l_int32)(-xdif * cosa - ydif * sina); - ypm = (l_int32)(-ydif * cosa + xdif * sina); - xp = xcen + (xpm >> 4); - yp = ycen + (ypm >> 4); - xf = xpm & 0x0f; - yf = ypm & 0x0f; - - /* if off the edge, write input grayval */ - if (xp < 0 || yp < 0 || xp > wm2 || yp > hm2) { - SET_DATA_BYTE(lined, j, grayval); - continue; - } - - lines = datas + yp * wpls; - - /* do area weighting. Without this, we would - * simply do: - * SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xp)); - * which is faster but gives lousy results! - */ - v00 = (16 - xf) * (16 - yf) * GET_DATA_BYTE(lines, xp); - v10 = xf * (16 - yf) * GET_DATA_BYTE(lines, xp + 1); - v01 = (16 - xf) * yf * GET_DATA_BYTE(lines + wpls, xp); - v11 = xf * yf * GET_DATA_BYTE(lines + wpls, xp + 1); - val = (l_uint8)((v00 + v01 + v10 + v11 + 128) / 256); - SET_DATA_BYTE(lined, j, val); - } - } -} - - -/*------------------------------------------------------------------* - * Rotation about the UL corner * - *------------------------------------------------------------------*/ -/*! - * \brief pixRotateAMCorner() - * - * \param[in] pixs 1, 2, 4, 8 bpp gray or colormapped, or 32 bpp RGB - * \param[in] angle radians; clockwise is positive - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Rotates about the UL corner of the image.
- *      (2) A positive angle gives a clockwise rotation.
- *      (3) Brings in either black or white pixels from the boundary.
- * 
- */ -PIX * -pixRotateAMCorner(PIX *pixs, - l_float32 angle, - l_int32 incolor) -{ -l_int32 d; -l_uint32 fillval; -PIX *pixt1, *pixt2, *pixd; - - PROCNAME("pixRotateAMCorner"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - if (L_ABS(angle) < MinAngleToRotate) - return pixClone(pixs); - - /* Remove cmap if it exists, and unpack to 8 bpp if necessary */ - pixt1 = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - d = pixGetDepth(pixt1); - if (d < 8) - pixt2 = pixConvertTo8(pixt1, FALSE); - else - pixt2 = pixClone(pixt1); - d = pixGetDepth(pixt2); - - /* Compute actual incoming color */ - fillval = 0; - if (incolor == L_BRING_IN_WHITE) { - if (d == 8) - fillval = 255; - else /* d == 32 */ - fillval = 0xffffff00; - } - - if (d == 8) - pixd = pixRotateAMGrayCorner(pixt2, angle, fillval); - else /* d == 32 */ - pixd = pixRotateAMColorCorner(pixt2, angle, fillval); - - pixDestroy(&pixt1); - pixDestroy(&pixt2); - return pixd; -} - - -/*! - * \brief pixRotateAMColorCorner() - * - * \param[in] pixs - * \param[in] angle radians; clockwise is positive - * \param[in] fillval e.g., 0 to bring in BLACK, 0xffffff00 for WHITE - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Rotates the image about the UL corner.
- *      (2) A positive angle gives a clockwise rotation.
- *      (3) Specify the color to be brought in from outside the image.
- * 
- */ -PIX * -pixRotateAMColorCorner(PIX *pixs, - l_float32 angle, - l_uint32 fillval) -{ -l_int32 w, h, wpls, wpld; -l_uint32 *datas, *datad; -PIX *pix1, *pix2, *pixd; - - PROCNAME("pixRotateAMColorCorner"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs must be 32 bpp", procName, NULL); - - if (L_ABS(angle) < MinAngleToRotate) - return pixClone(pixs); - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreateTemplate(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - rotateAMColorCornerLow(datad, w, h, wpld, datas, wpls, angle, fillval); - if (pixGetSpp(pixs) == 4) { - pix1 = pixGetRGBComponent(pixs, L_ALPHA_CHANNEL); - pix2 = pixRotateAMGrayCorner(pix1, angle, 255); /* bring in opaque */ - pixSetRGBComponent(pixd, pix2, L_ALPHA_CHANNEL); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - - return pixd; -} - - -/*! - * \brief pixRotateAMGrayCorner() - * - * \param[in] pixs - * \param[in] angle radians; clockwise is positive - * \param[in] grayval 0 to bring in BLACK, 255 for WHITE - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Rotates the image about the UL corner.
- *      (2) A positive angle gives a clockwise rotation.
- *      (3) Specify the grayvalue to be brought in from outside the image.
- * 
- */ -PIX * -pixRotateAMGrayCorner(PIX *pixs, - l_float32 angle, - l_uint8 grayval) -{ -l_int32 w, h, wpls, wpld; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixRotateAMGrayCorner"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("pixs must be 8 bpp", procName, NULL); - - if (L_ABS(angle) < MinAngleToRotate) - return pixClone(pixs); - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreateTemplate(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - rotateAMGrayCornerLow(datad, w, h, wpld, datas, wpls, angle, grayval); - - return pixd; -} - - -static void -rotateAMColorCornerLow(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_float32 angle, - l_uint32 colorval) -{ -l_int32 i, j, wm2, hm2; -l_int32 xpm, ypm, xp, yp, xf, yf; -l_int32 rval, gval, bval; -l_uint32 word00, word01, word10, word11; -l_uint32 *lines, *lined; -l_float32 sina, cosa; - - wm2 = w - 2; - hm2 = h - 2; - sina = 16. * sin(angle); - cosa = 16. * cos(angle); - - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - xpm = (l_int32)(j * cosa + i * sina); - ypm = (l_int32)(i * cosa - j * sina); - xp = xpm >> 4; - yp = ypm >> 4; - xf = xpm & 0x0f; - yf = ypm & 0x0f; - - /* if off the edge, write input colorval */ - if (xp < 0 || yp < 0 || xp > wm2 || yp > hm2) { - *(lined + j) = colorval; - continue; - } - - lines = datas + yp * wpls; - - /* do area weighting. Without this, we would - * simply do: - * *(lined + j) = *(lines + xp); - * which is faster but gives lousy results! - */ - word00 = *(lines + xp); - word10 = *(lines + xp + 1); - word01 = *(lines + wpls + xp); - word11 = *(lines + wpls + xp + 1); - rval = ((16 - xf) * (16 - yf) * ((word00 >> L_RED_SHIFT) & 0xff) + - xf * (16 - yf) * ((word10 >> L_RED_SHIFT) & 0xff) + - (16 - xf) * yf * ((word01 >> L_RED_SHIFT) & 0xff) + - xf * yf * ((word11 >> L_RED_SHIFT) & 0xff) + 128) / 256; - gval = ((16 - xf) * (16 - yf) * ((word00 >> L_GREEN_SHIFT) & 0xff) + - xf * (16 - yf) * ((word10 >> L_GREEN_SHIFT) & 0xff) + - (16 - xf) * yf * ((word01 >> L_GREEN_SHIFT) & 0xff) + - xf * yf * ((word11 >> L_GREEN_SHIFT) & 0xff) + 128) / 256; - bval = ((16 - xf) * (16 - yf) * ((word00 >> L_BLUE_SHIFT) & 0xff) + - xf * (16 - yf) * ((word10 >> L_BLUE_SHIFT) & 0xff) + - (16 - xf) * yf * ((word01 >> L_BLUE_SHIFT) & 0xff) + - xf * yf * ((word11 >> L_BLUE_SHIFT) & 0xff) + 128) / 256; - composeRGBPixel(rval, gval, bval, lined + j); - } - } -} - - -static void -rotateAMGrayCornerLow(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_float32 angle, - l_uint8 grayval) -{ -l_int32 i, j, wm2, hm2; -l_int32 xpm, ypm, xp, yp, xf, yf; -l_int32 v00, v01, v10, v11; -l_uint8 val; -l_uint32 *lines, *lined; -l_float32 sina, cosa; - - wm2 = w - 2; - hm2 = h - 2; - sina = 16. * sin(angle); - cosa = 16. * cos(angle); - - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - xpm = (l_int32)(j * cosa + i * sina); - ypm = (l_int32)(i * cosa - j * sina); - xp = xpm >> 4; - yp = ypm >> 4; - xf = xpm & 0x0f; - yf = ypm & 0x0f; - - /* if off the edge, write input grayval */ - if (xp < 0 || yp < 0 || xp > wm2 || yp > hm2) { - SET_DATA_BYTE(lined, j, grayval); - continue; - } - - lines = datas + yp * wpls; - - /* do area weighting. Without this, we would - * simply do: - * SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xp)); - * which is faster but gives lousy results! - */ - v00 = (16 - xf) * (16 - yf) * GET_DATA_BYTE(lines, xp); - v10 = xf * (16 - yf) * GET_DATA_BYTE(lines, xp + 1); - v01 = (16 - xf) * yf * GET_DATA_BYTE(lines + wpls, xp); - v11 = xf * yf * GET_DATA_BYTE(lines + wpls, xp + 1); - val = (l_uint8)((v00 + v01 + v10 + v11 + 128) / 256); - SET_DATA_BYTE(lined, j, val); - } - } -} - - -/*------------------------------------------------------------------* - * Fast RGB color rotation about center * - *------------------------------------------------------------------*/ -/*! - * \brief pixRotateAMColorFast() - * - * \param[in] pixs - * \param[in] angle radians; clockwise is positive - * \param[in] colorval e.g., 0 to bring in BLACK, 0xffffff00 for WHITE - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This rotates a color image about the image center.
- *      (2) A positive angle gives a clockwise rotation.
- *      (3) It uses area mapping, dividing each pixel into
- *          16 subpixels.
- *      (4) It is about 10% to 20% faster than the more accurate linear
- *          interpolation function pixRotateAMColor(),
- *          which uses 256 subpixels.
- *      (5) For some reason it shifts the image center.
- *          No attempt is made to rotate the alpha component.
- * 
- */ -PIX * -pixRotateAMColorFast(PIX *pixs, - l_float32 angle, - l_uint32 colorval) -{ -l_int32 w, h, wpls, wpld; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixRotateAMColorFast"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs must be 32 bpp", procName, NULL); - - if (L_ABS(angle) < MinAngleToRotate) - return pixClone(pixs); - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreateTemplate(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - rotateAMColorFastLow(datad, w, h, wpld, datas, wpls, angle, colorval); - return pixd; -} - - -/*! - * \brief rotateAMColorFastLow() - * - * This is a special simplification of area mapping with division - * of each pixel into 16 sub-pixels. The exact coefficients that - * should be used are the same as for the 4x linear interpolation - * scaling case, and are given there. I tried to approximate these - * as weighted coefficients with a maximum sum of 4, which - * allows us to do the arithmetic in parallel for the R, G and B - * components in a 32 bit pixel. However, there are three reasons - * for not doing that: - * (1) the loss of accuracy in the parallel implementation - * is visually significant - * (2) the parallel implementation (described below) is slower - * (3) the parallel implementation requires allocation of - * a temporary color image - * - * There are 16 cases for the choice of the subpixel, and - * for each, the mapping to the relevant source - * pixels is as follows: - * - * subpixel src pixel weights - * -------- ----------------- - * 0 sp1 - * 1 (3 * sp1 + sp2) / 4 - * 2 (sp1 + sp2) / 2 - * 3 (sp1 + 3 * sp2) / 4 - * 4 (3 * sp1 + sp3) / 4 - * 5 (9 * sp1 + 3 * sp2 + 3 * sp3 + sp4) / 16 - * 6 (3 * sp1 + 3 * sp2 + sp3 + sp4) / 8 - * 7 (3 * sp1 + 9 * sp2 + sp3 + 3 * sp4) / 16 - * 8 (sp1 + sp3) / 2 - * 9 (3 * sp1 + sp2 + 3 * sp3 + sp4) / 8 - * 10 (sp1 + sp2 + sp3 + sp4) / 4 - * 11 (sp1 + 3 * sp2 + sp3 + 3 * sp4) / 8 - * 12 (sp1 + 3 * sp3) / 4 - * 13 (3 * sp1 + sp2 + 9 * sp3 + 3 * sp4) / 16 - * 14 (sp1 + sp2 + 3 * sp3 + 3 * sp4) / 8 - * 15 (sp1 + 3 * sp2 + 3 * sp3 + 9 * sp4) / 16 - * - * Another way to visualize this is to consider the area mapping - * (or linear interpolation) coefficients for the pixel sp1. - * Expressed in fourths, they can be written as asymmetric matrix: - * - * 4 3 2 1 - * 3 2.25 1.5 0.75 - * 2 1.5 1 0.5 - * 1 0.75 0.5 0.25 - * - * The coefficients for the three neighboring pixels can be - * similarly written. - * - * This is implemented here, where, for each color component, - * we inline its extraction from each participating word, - * construct the linear combination, and combine the results - * into the destination 32 bit RGB pixel, using the appropriate shifts. - * - * It is interesting to note that an alternative method, where - * we do the arithmetic on the 32 bit pixels directly (after - * shifting the components so they won't overflow into each other) - * is significantly inferior. Because we have only 8 bits for - * internal overflows, which can be distributed as 2, 3, 3, it - * is impossible to add these with the correct linear - * interpolation coefficients, which require a sum of up to 16. - * Rounding off to a sum of 4 causes appreciable visual artifacts - * in the rotated image. The code for the inferior method - * can be found in prog/rotatefastalt.c, for reference. - */ -static void -rotateAMColorFastLow(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_float32 angle, - l_uint32 colorval) -{ -l_int32 i, j, xcen, ycen, wm2, hm2; -l_int32 xdif, ydif, xpm, ypm, xp, yp, xf, yf; -l_uint32 word1, word2, word3, word4, red, blue, green; -l_uint32 *pword, *lines, *lined; -l_float32 sina, cosa; - - xcen = w / 2; - wm2 = w - 2; - ycen = h / 2; - hm2 = h - 2; - sina = 4. * sin(angle); - cosa = 4. * cos(angle); - - for (i = 0; i < h; i++) { - ydif = ycen - i; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - xdif = xcen - j; - xpm = (l_int32)(-xdif * cosa - ydif * sina); - ypm = (l_int32)(-ydif * cosa + xdif * sina); - xp = xcen + (xpm >> 2); - yp = ycen + (ypm >> 2); - xf = xpm & 0x03; - yf = ypm & 0x03; - - /* if off the edge, write input grayval */ - if (xp < 0 || yp < 0 || xp > wm2 || yp > hm2) { - *(lined + j) = colorval; - continue; - } - - lines = datas + yp * wpls; - pword = lines + xp; - - switch (xf + 4 * yf) - { - case 0: - *(lined + j) = *pword; - break; - case 1: - word1 = *pword; - word2 = *(pword + 1); - red = 3 * (word1 >> 24) + (word2 >> 24); - green = 3 * ((word1 >> 16) & 0xff) + - ((word2 >> 16) & 0xff); - blue = 3 * ((word1 >> 8) & 0xff) + - ((word2 >> 8) & 0xff); - *(lined + j) = ((red << 22) & 0xff000000) | - ((green << 14) & 0x00ff0000) | - ((blue << 6) & 0x0000ff00); - break; - case 2: - word1 = *pword; - word2 = *(pword + 1); - red = (word1 >> 24) + (word2 >> 24); - green = ((word1 >> 16) & 0xff) + ((word2 >> 16) & 0xff); - blue = ((word1 >> 8) & 0xff) + ((word2 >> 8) & 0xff); - *(lined + j) = ((red << 23) & 0xff000000) | - ((green << 15) & 0x00ff0000) | - ((blue << 7) & 0x0000ff00); - break; - case 3: - word1 = *pword; - word2 = *(pword + 1); - red = (word1 >> 24) + 3 * (word2 >> 24); - green = ((word1 >> 16) & 0xff) + - 3 * ((word2 >> 16) & 0xff); - blue = ((word1 >> 8) & 0xff) + - 3 * ((word2 >> 8) & 0xff); - *(lined + j) = ((red << 22) & 0xff000000) | - ((green << 14) & 0x00ff0000) | - ((blue << 6) & 0x0000ff00); - break; - case 4: - word1 = *pword; - word3 = *(pword + wpls); - red = 3 * (word1 >> 24) + (word3 >> 24); - green = 3 * ((word1 >> 16) & 0xff) + - ((word3 >> 16) & 0xff); - blue = 3 * ((word1 >> 8) & 0xff) + - ((word3 >> 8) & 0xff); - *(lined + j) = ((red << 22) & 0xff000000) | - ((green << 14) & 0x00ff0000) | - ((blue << 6) & 0x0000ff00); - break; - case 5: - word1 = *pword; - word2 = *(pword + 1); - word3 = *(pword + wpls); - word4 = *(pword + wpls + 1); - red = 9 * (word1 >> 24) + 3 * (word2 >> 24) + - 3 * (word3 >> 24) + (word4 >> 24); - green = 9 * ((word1 >> 16) & 0xff) + - 3 * ((word2 >> 16) & 0xff) + - 3 * ((word3 >> 16) & 0xff) + - ((word4 >> 16) & 0xff); - blue = 9 * ((word1 >> 8) & 0xff) + - 3 * ((word2 >> 8) & 0xff) + - 3 * ((word3 >> 8) & 0xff) + - ((word4 >> 8) & 0xff); - *(lined + j) = ((red << 20) & 0xff000000) | - ((green << 12) & 0x00ff0000) | - ((blue << 4) & 0x0000ff00); - break; - case 6: - word1 = *pword; - word2 = *(pword + 1); - word3 = *(pword + wpls); - word4 = *(pword + wpls + 1); - red = 3 * (word1 >> 24) + 3 * (word2 >> 24) + - (word3 >> 24) + (word4 >> 24); - green = 3 * ((word1 >> 16) & 0xff) + - 3 * ((word2 >> 16) & 0xff) + - ((word3 >> 16) & 0xff) + - ((word4 >> 16) & 0xff); - blue = 3 * ((word1 >> 8) & 0xff) + - 3 * ((word2 >> 8) & 0xff) + - ((word3 >> 8) & 0xff) + - ((word4 >> 8) & 0xff); - *(lined + j) = ((red << 21) & 0xff000000) | - ((green << 13) & 0x00ff0000) | - ((blue << 5) & 0x0000ff00); - break; - case 7: - word1 = *pword; - word2 = *(pword + 1); - word3 = *(pword + wpls); - word4 = *(pword + wpls + 1); - red = 3 * (word1 >> 24) + 9 * (word2 >> 24) + - (word3 >> 24) + 3 * (word4 >> 24); - green = 3 * ((word1 >> 16) & 0xff) + - 9 * ((word2 >> 16) & 0xff) + - ((word3 >> 16) & 0xff) + - 3 * ((word4 >> 16) & 0xff); - blue = 3 * ((word1 >> 8) & 0xff) + - 9 * ((word2 >> 8) & 0xff) + - ((word3 >> 8) & 0xff) + - 3 * ((word4 >> 8) & 0xff); - *(lined + j) = ((red << 20) & 0xff000000) | - ((green << 12) & 0x00ff0000) | - ((blue << 4) & 0x0000ff00); - break; - case 8: - word1 = *pword; - word3 = *(pword + wpls); - red = (word1 >> 24) + (word3 >> 24); - green = ((word1 >> 16) & 0xff) + ((word3 >> 16) & 0xff); - blue = ((word1 >> 8) & 0xff) + ((word3 >> 8) & 0xff); - *(lined + j) = ((red << 23) & 0xff000000) | - ((green << 15) & 0x00ff0000) | - ((blue << 7) & 0x0000ff00); - break; - case 9: - word1 = *pword; - word2 = *(pword + 1); - word3 = *(pword + wpls); - word4 = *(pword + wpls + 1); - red = 3 * (word1 >> 24) + (word2 >> 24) + - 3 * (word3 >> 24) + (word4 >> 24); - green = 3 * ((word1 >> 16) & 0xff) + ((word2 >> 16) & 0xff) + - 3 * ((word3 >> 16) & 0xff) + ((word4 >> 16) & 0xff); - blue = 3 * ((word1 >> 8) & 0xff) + ((word2 >> 8) & 0xff) + - 3 * ((word3 >> 8) & 0xff) + ((word4 >> 8) & 0xff); - *(lined + j) = ((red << 21) & 0xff000000) | - ((green << 13) & 0x00ff0000) | - ((blue << 5) & 0x0000ff00); - break; - case 10: - word1 = *pword; - word2 = *(pword + 1); - word3 = *(pword + wpls); - word4 = *(pword + wpls + 1); - red = (word1 >> 24) + (word2 >> 24) + - (word3 >> 24) + (word4 >> 24); - green = ((word1 >> 16) & 0xff) + ((word2 >> 16) & 0xff) + - ((word3 >> 16) & 0xff) + ((word4 >> 16) & 0xff); - blue = ((word1 >> 8) & 0xff) + ((word2 >> 8) & 0xff) + - ((word3 >> 8) & 0xff) + ((word4 >> 8) & 0xff); - *(lined + j) = ((red << 22) & 0xff000000) | - ((green << 14) & 0x00ff0000) | - ((blue << 6) & 0x0000ff00); - break; - case 11: - word1 = *pword; - word2 = *(pword + 1); - word3 = *(pword + wpls); - word4 = *(pword + wpls + 1); - red = (word1 >> 24) + 3 * (word2 >> 24) + - (word3 >> 24) + 3 * (word4 >> 24); - green = ((word1 >> 16) & 0xff) + 3 * ((word2 >> 16) & 0xff) + - ((word3 >> 16) & 0xff) + 3 * ((word4 >> 16) & 0xff); - blue = ((word1 >> 8) & 0xff) + 3 * ((word2 >> 8) & 0xff) + - ((word3 >> 8) & 0xff) + 3 * ((word4 >> 8) & 0xff); - *(lined + j) = ((red << 21) & 0xff000000) | - ((green << 13) & 0x00ff0000) | - ((blue << 5) & 0x0000ff00); - break; - case 12: - word1 = *pword; - word3 = *(pword + wpls); - red = (word1 >> 24) + 3 * (word3 >> 24); - green = ((word1 >> 16) & 0xff) + - 3 * ((word3 >> 16) & 0xff); - blue = ((word1 >> 8) & 0xff) + - 3 * ((word3 >> 8) & 0xff); - *(lined + j) = ((red << 22) & 0xff000000) | - ((green << 14) & 0x00ff0000) | - ((blue << 6) & 0x0000ff00); - break; - case 13: - word1 = *pword; - word2 = *(pword + 1); - word3 = *(pword + wpls); - word4 = *(pword + wpls + 1); - red = 3 * (word1 >> 24) + (word2 >> 24) + - 9 * (word3 >> 24) + 3 * (word4 >> 24); - green = 3 * ((word1 >> 16) & 0xff) + ((word2 >> 16) & 0xff) + - 9 * ((word3 >> 16) & 0xff) + 3 * ((word4 >> 16) & 0xff); - blue = 3 *((word1 >> 8) & 0xff) + ((word2 >> 8) & 0xff) + - 9 * ((word3 >> 8) & 0xff) + 3 * ((word4 >> 8) & 0xff); - *(lined + j) = ((red << 20) & 0xff000000) | - ((green << 12) & 0x00ff0000) | - ((blue << 4) & 0x0000ff00); - break; - case 14: - word1 = *pword; - word2 = *(pword + 1); - word3 = *(pword + wpls); - word4 = *(pword + wpls + 1); - red = (word1 >> 24) + (word2 >> 24) + - 3 * (word3 >> 24) + 3 * (word4 >> 24); - green = ((word1 >> 16) & 0xff) +((word2 >> 16) & 0xff) + - 3 * ((word3 >> 16) & 0xff) + 3 * ((word4 >> 16) & 0xff); - blue = ((word1 >> 8) & 0xff) + ((word2 >> 8) & 0xff) + - 3 * ((word3 >> 8) & 0xff) + 3 * ((word4 >> 8) & 0xff); - *(lined + j) = ((red << 21) & 0xff000000) | - ((green << 13) & 0x00ff0000) | - ((blue << 5) & 0x0000ff00); - break; - case 15: - word1 = *pword; - word2 = *(pword + 1); - word3 = *(pword + wpls); - word4 = *(pword + wpls + 1); - red = (word1 >> 24) + 3 * (word2 >> 24) + - 3 * (word3 >> 24) + 9 * (word4 >> 24); - green = ((word1 >> 16) & 0xff) + 3 * ((word2 >> 16) & 0xff) + - 3 * ((word3 >> 16) & 0xff) + 9 * ((word4 >> 16) & 0xff); - blue = ((word1 >> 8) & 0xff) + 3 * ((word2 >> 8) & 0xff) + - 3 * ((word3 >> 8) & 0xff) + 9 * ((word4 >> 8) & 0xff); - *(lined + j) = ((red << 20) & 0xff000000) | - ((green << 12) & 0x00ff0000) | - ((blue << 4) & 0x0000ff00); - break; - default: - lept_stderr("shouldn't get here\n"); - break; - } - } - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rotateorth.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rotateorth.c deleted file mode 100644 index 2c83d1e0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rotateorth.c +++ /dev/null @@ -1,715 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file rotateorth.c - *
- *
- *      Top-level rotation by multiples of 90 degrees
- *            PIX             *pixRotateOrth()
- *
- *      180-degree rotation
- *            PIX             *pixRotate180()
- *
- *      90-degree rotation (both directions)
- *            PIX             *pixRotate90()
- *
- *      Left-right flip
- *            PIX             *pixFlipLR()
- *
- *      Top-bottom flip
- *            PIX             *pixFlipTB()
- *
- *      Byte reverse tables
- *            static l_uint8  *makeReverseByteTab1()
- *            static l_uint8  *makeReverseByteTab2()
- *            static l_uint8  *makeReverseByteTab4()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static l_uint8 *makeReverseByteTab1(void); -static l_uint8 *makeReverseByteTab2(void); -static l_uint8 *makeReverseByteTab4(void); - -/*------------------------------------------------------------------* - * Top-level rotation by multiples of 90 degrees * - *------------------------------------------------------------------*/ -/*! - * \brief pixRotateOrth() - * - * \param[in] pixs all depths - * \param[in] quads 0-3; number of 90 degree cw rotations - * \return pixd, or NULL on error - */ -PIX * -pixRotateOrth(PIX *pixs, - l_int32 quads) -{ - PROCNAME("pixRotateOrth"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (quads < 0 || quads > 3) - return (PIX *)ERROR_PTR("quads not in {0,1,2,3}", procName, NULL); - - if (quads == 0) - return pixCopy(NULL, pixs); - else if (quads == 1) - return pixRotate90(pixs, 1); - else if (quads == 2) - return pixRotate180(NULL, pixs); - else /* quads == 3 */ - return pixRotate90(pixs, -1); -} - - -/*------------------------------------------------------------------* - * 180 degree rotation * - *------------------------------------------------------------------*/ -/*! - * \brief pixRotate180() - * - * \param[in] pixd [optional]; can be null, equal to pixs, - * or different from pixs - * \param[in] pixs all depths - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This does a 180 rotation of the image about the center,
- *          which is equivalent to a left-right flip about a vertical
- *          line through the image center, followed by a top-bottom
- *          flip about a horizontal line through the image center.
- *      (2) There are 3 cases for input:
- *          (a) pixd == null (creates a new pixd)
- *          (b) pixd == pixs (in-place operation)
- *          (c) pixd != pixs (existing pixd)
- *      (3) For clarity, use these three patterns, respectively:
- *          (a) pixd = pixRotate180(NULL, pixs);
- *          (b) pixRotate180(pixs, pixs);
- *          (c) pixRotate180(pixd, pixs);
- * 
- */ -PIX * -pixRotate180(PIX *pixd, - PIX *pixs) -{ -l_int32 d; - - PROCNAME("pixRotate180"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("pixs not in {1,2,4,8,16,32} bpp", - procName, NULL); - - /* Prepare pixd for in-place operation */ - if ((pixd = pixCopy(pixd, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - - pixFlipLR(pixd, pixd); - pixFlipTB(pixd, pixd); - return pixd; -} - - -/*------------------------------------------------------------------* - * 90 degree rotation * - *------------------------------------------------------------------*/ -/*! - * \brief pixRotate90() - * - * \param[in] pixs all depths - * \param[in] direction clockwise = 1, counterclockwise = -1 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This does a 90 degree rotation of the image about the center,
- *          either cw or ccw, returning a new pix.
- *      (2) The direction must be either 1 (cw) or -1 (ccw).
- * 
- */ -PIX * -pixRotate90(PIX *pixs, - l_int32 direction) -{ -l_int32 wd, hd, d, wpls, wpld; -l_int32 i, j, k, m, iend, nswords; -l_uint32 val, word; -l_uint32 *lines, *datas, *lined, *datad; -PIX *pixd; - - PROCNAME("pixRotate90"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &hd, &wd, &d); /* note: reversed */ - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("pixs not in {1,2,4,8,16,32} bpp", - procName, NULL); - if (direction != 1 && direction != -1) - return (PIX *)ERROR_PTR("invalid direction", procName, NULL); - - if ((pixd = pixCreate(wd, hd, d)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyColormap(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - pixCopySpp(pixd, pixs); - - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - if (direction == 1) { /* clockwise */ - switch (d) - { - case 32: - for (i = 0; i < hd; i++) { - lined = datad + i * wpld; - lines = datas + (wd - 1) * wpls; - for (j = 0; j < wd; j++) { - lined[j] = lines[i]; - lines -= wpls; - } - } - break; - case 16: - for (i = 0; i < hd; i++) { - lined = datad + i * wpld; - lines = datas + (wd - 1) * wpls; - for (j = 0; j < wd; j++) { - if ((val = GET_DATA_TWO_BYTES(lines, i))) - SET_DATA_TWO_BYTES(lined, j, val); - lines -= wpls; - } - } - break; - case 8: - for (i = 0; i < hd; i++) { - lined = datad + i * wpld; - lines = datas + (wd - 1) * wpls; - for (j = 0; j < wd; j++) { - if ((val = GET_DATA_BYTE(lines, i))) - SET_DATA_BYTE(lined, j, val); - lines -= wpls; - } - } - break; - case 4: - for (i = 0; i < hd; i++) { - lined = datad + i * wpld; - lines = datas + (wd - 1) * wpls; - for (j = 0; j < wd; j++) { - if ((val = GET_DATA_QBIT(lines, i))) - SET_DATA_QBIT(lined, j, val); - lines -= wpls; - } - } - break; - case 2: - for (i = 0; i < hd; i++) { - lined = datad + i * wpld; - lines = datas + (wd - 1) * wpls; - for (j = 0; j < wd; j++) { - if ((val = GET_DATA_DIBIT(lines, i))) - SET_DATA_DIBIT(lined, j, val); - lines -= wpls; - } - } - break; - case 1: - nswords = hd / 32; - for (j = 0; j < wd; j++) { - lined = datad; - lines = datas + (wd - 1 - j) * wpls; - for (k = 0; k < nswords; k++) { - word = lines[k]; - if (!word) { - lined += 32 * wpld; - continue; - } else { - iend = 32 * (k + 1); - for (m = 0, i = 32 * k; i < iend; i++, m++) { - if ((word << m) & 0x80000000) - SET_DATA_BIT(lined, j); - lined += wpld; - } - } - } - for (i = 32 * nswords; i < hd; i++) { - if (GET_DATA_BIT(lines, i)) - SET_DATA_BIT(lined, j); - lined += wpld; - } - } - break; - default: - pixDestroy(&pixd); - L_ERROR("illegal depth: %d\n", procName, d); - break; - } - } else { /* direction counter-clockwise */ - switch (d) - { - case 32: - for (i = 0; i < hd; i++) { - lined = datad + i * wpld; - lines = datas; - for (j = 0; j < wd; j++) { - lined[j] = lines[hd - 1 - i]; - lines += wpls; - } - } - break; - case 16: - for (i = 0; i < hd; i++) { - lined = datad + i * wpld; - lines = datas; - for (j = 0; j < wd; j++) { - if ((val = GET_DATA_TWO_BYTES(lines, hd - 1 - i))) - SET_DATA_TWO_BYTES(lined, j, val); - lines += wpls; - } - } - break; - case 8: - for (i = 0; i < hd; i++) { - lined = datad + i * wpld; - lines = datas; - for (j = 0; j < wd; j++) { - if ((val = GET_DATA_BYTE(lines, hd - 1 - i))) - SET_DATA_BYTE(lined, j, val); - lines += wpls; - } - } - break; - case 4: - for (i = 0; i < hd; i++) { - lined = datad + i * wpld; - lines = datas; - for (j = 0; j < wd; j++) { - if ((val = GET_DATA_QBIT(lines, hd - 1 - i))) - SET_DATA_QBIT(lined, j, val); - lines += wpls; - } - } - break; - case 2: - for (i = 0; i < hd; i++) { - lined = datad + i * wpld; - lines = datas; - for (j = 0; j < wd; j++) { - if ((val = GET_DATA_DIBIT(lines, hd - 1 - i))) - SET_DATA_DIBIT(lined, j, val); - lines += wpls; - } - } - break; - case 1: - nswords = hd / 32; - for (j = 0; j < wd; j++) { - lined = datad + (hd - 1) * wpld; - lines = datas + (wd - 1 - j) * wpls; - for (k = 0; k < nswords; k++) { - word = lines[k]; - if (!word) { - lined -= 32 * wpld; - continue; - } else { - iend = 32 * (k + 1); - for (m = 0, i = 32 * k; i < iend; i++, m++) { - if ((word << m) & 0x80000000) - SET_DATA_BIT(lined, wd - 1 - j); - lined -= wpld; - } - } - } - for (i = 32 * nswords; i < hd; i++) { - if (GET_DATA_BIT(lines, i)) - SET_DATA_BIT(lined, wd - 1 - j); - lined -= wpld; - } - } - break; - default: - pixDestroy(&pixd); - L_ERROR("illegal depth: %d\n", procName, d); - break; - } - } - - return pixd; -} - - -/*------------------------------------------------------------------* - * Left-right flip * - *------------------------------------------------------------------*/ -/*! - * \brief pixFlipLR() - * - * \param[in] pixd [optional]; can be null, equal to pixs, - * or different from pixs - * \param[in] pixs all depths - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This does a left-right flip of the image, which is
- *          equivalent to a rotation out of the plane about a
- *          vertical line through the image center.
- *      (2) There are 3 cases for input:
- *          (a) pixd == null (creates a new pixd)
- *          (b) pixd == pixs (in-place operation)
- *          (c) pixd != pixs (existing pixd)
- *      (3) For clarity, use these three patterns, respectively:
- *          (a) pixd = pixFlipLR(NULL, pixs);
- *          (b) pixFlipLR(pixs, pixs);
- *          (c) pixFlipLR(pixd, pixs);
- *      (4) If an existing pixd is not the same size as pixs, the
- *          image data will be reallocated.
- *      (5) The pixel access routines allow a trivial implementation.
- *          However, for d < 8, it is more efficient to right-justify
- *          each line to a 32-bit boundary and then extract bytes and
- *          do pixel reversing.   In those cases, as in the 180 degree
- *          rotation, we right-shift the data (if necessary) to
- *          right-justify on the 32 bit boundary, and then read the
- *          bytes off each raster line in reverse order, reversing
- *          the pixels in each byte using a table.  These functions
- *          for 1, 2 and 4 bpp were tested against the "trivial"
- *          version (shown here for 4 bpp):
- *              for (i = 0; i < h; i++) {
- *                  line = data + i * wpl;
- *                  memcpy(buffer, line, bpl);
- *                    for (j = 0; j < w; j++) {
- *                      val = GET_DATA_QBIT(buffer, w - 1 - j);
- *                        SET_DATA_QBIT(line, j, val);
- *                  }
- *              }
- * 
- */ -PIX * -pixFlipLR(PIX *pixd, - PIX *pixs) -{ -l_uint8 *tab; -l_int32 w, h, d, wpl; -l_int32 extra, shift, databpl, bpl, i, j; -l_uint32 val; -l_uint32 *line, *data, *buffer; - - PROCNAME("pixFlipLR"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("pixs not in {1,2,4,8,16,32} bpp", - procName, NULL); - - /* Prepare pixd for in-place operation */ - if ((pixd = pixCopy(pixd, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - - data = pixGetData(pixd); - wpl = pixGetWpl(pixd); - switch (d) - { - case 1: - tab = makeReverseByteTab1(); - break; - case 2: - tab = makeReverseByteTab2(); - break; - case 4: - tab = makeReverseByteTab4(); - break; - default: - tab = NULL; - break; - } - - /* Possibly inplace assigning return val, so on failure return pixd */ - if ((buffer = (l_uint32 *)LEPT_CALLOC(wpl, sizeof(l_uint32))) == NULL) { - if (tab) LEPT_FREE(tab); - return (PIX *)ERROR_PTR("buffer not made", procName, pixd); - } - - bpl = 4 * wpl; - switch (d) - { - case 32: - for (i = 0; i < h; i++) { - line = data + i * wpl; - memcpy(buffer, line, bpl); - for (j = 0; j < w; j++) - line[j] = buffer[w - 1 - j]; - } - break; - case 16: - for (i = 0; i < h; i++) { - line = data + i * wpl; - memcpy(buffer, line, bpl); - for (j = 0; j < w; j++) { - val = GET_DATA_TWO_BYTES(buffer, w - 1 - j); - SET_DATA_TWO_BYTES(line, j, val); - } - } - break; - case 8: - for (i = 0; i < h; i++) { - line = data + i * wpl; - memcpy(buffer, line, bpl); - for (j = 0; j < w; j++) { - val = GET_DATA_BYTE(buffer, w - 1 - j); - SET_DATA_BYTE(line, j, val); - } - } - break; - case 4: - extra = (w * d) & 31; - if (extra) - shift = 8 - extra / 4; - else - shift = 0; - if (shift) - rasteropHipLow(data, h, d, wpl, 0, h, shift); - - databpl = (w + 1) / 2; - for (i = 0; i < h; i++) { - line = data + i * wpl; - memcpy(buffer, line, bpl); - for (j = 0; j < databpl; j++) { - val = GET_DATA_BYTE(buffer, bpl - 1 - j); - SET_DATA_BYTE(line, j, tab[val]); - } - } - break; - case 2: - extra = (w * d) & 31; - if (extra) - shift = 16 - extra / 2; - else - shift = 0; - if (shift) - rasteropHipLow(data, h, d, wpl, 0, h, shift); - - databpl = (w + 3) / 4; - for (i = 0; i < h; i++) { - line = data + i * wpl; - memcpy(buffer, line, bpl); - for (j = 0; j < databpl; j++) { - val = GET_DATA_BYTE(buffer, bpl - 1 - j); - SET_DATA_BYTE(line, j, tab[val]); - } - } - break; - case 1: - extra = (w * d) & 31; - if (extra) - shift = 32 - extra; - else - shift = 0; - if (shift) - rasteropHipLow(data, h, d, wpl, 0, h, shift); - - databpl = (w + 7) / 8; - for (i = 0; i < h; i++) { - line = data + i * wpl; - memcpy(buffer, line, bpl); - for (j = 0; j < databpl; j++) { - val = GET_DATA_BYTE(buffer, bpl - 1 - j); - SET_DATA_BYTE(line, j, tab[val]); - } - } - break; - default: - pixDestroy(&pixd); - L_ERROR("illegal depth: %d\n", procName, d); - break; - } - - LEPT_FREE(buffer); - if (tab) LEPT_FREE(tab); - return pixd; -} - - -/*------------------------------------------------------------------* - * Top-bottom flip * - *------------------------------------------------------------------*/ -/*! - * \brief pixFlipTB() - * - * \param[in] pixd [optional]; can be null, equal to pixs, - * or different from pixs - * \param[in] pixs all depths - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This does a top-bottom flip of the image, which is
- *          equivalent to a rotation out of the plane about a
- *          horizontal line through the image center.
- *      (2) There are 3 cases for input:
- *          (a) pixd == null (creates a new pixd)
- *          (b) pixd == pixs (in-place operation)
- *          (c) pixd != pixs (existing pixd)
- *      (3) For clarity, use these three patterns, respectively:
- *          (a) pixd = pixFlipTB(NULL, pixs);
- *          (b) pixFlipTB(pixs, pixs);
- *          (c) pixFlipTB(pixd, pixs);
- *      (4) If an existing pixd is not the same size as pixs, the
- *          image data will be reallocated.
- *      (5) This is simple and fast.  We use the memcpy function
- *          to do all the work on aligned data, regardless of pixel
- *          depth.
- * 
- */ -PIX * -pixFlipTB(PIX *pixd, - PIX *pixs) -{ -l_int32 h, d, wpl, i, k, h2, bpl; -l_uint32 *linet, *lineb; -l_uint32 *data, *buffer; - - PROCNAME("pixFlipTB"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, NULL, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("pixs not in {1,2,4,8,16,32} bpp", - procName, NULL); - - /* Prepare pixd for in-place operation */ - if ((pixd = pixCopy(pixd, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - - data = pixGetData(pixd); - wpl = pixGetWpl(pixd); - if ((buffer = (l_uint32 *)LEPT_CALLOC(wpl, sizeof(l_uint32))) == NULL) - return (PIX *)ERROR_PTR("buffer not made", procName, pixd); - - h2 = h / 2; - bpl = 4 * wpl; - for (i = 0, k = h - 1; i < h2; i++, k--) { - linet = data + i * wpl; - lineb = data + k * wpl; - memcpy(buffer, linet, bpl); - memcpy(linet, lineb, bpl); - memcpy(lineb, buffer, bpl); - } - - LEPT_FREE(buffer); - return pixd; -} - - -/*------------------------------------------------------------------* - * Static byte reverse tables * - *------------------------------------------------------------------*/ -/*! - * \brief makeReverseByteTab1() - * - * Notes: - * (1) This generates an 8 bit lookup table for reversing - * the order of eight 1-bit pixels. - */ -static l_uint8 * -makeReverseByteTab1(void) -{ -l_int32 i; -l_uint8 *tab; - - tab = (l_uint8 *)LEPT_CALLOC(256, sizeof(l_uint8)); - for (i = 0; i < 256; i++) - tab[i] = ((0x80 & i) >> 7) | - ((0x40 & i) >> 5) | - ((0x20 & i) >> 3) | - ((0x10 & i) >> 1) | - ((0x08 & i) << 1) | - ((0x04 & i) << 3) | - ((0x02 & i) << 5) | - ((0x01 & i) << 7); - return tab; -} - - -/*! - * \brief makeReverseByteTab2() - * - * Notes: - * (1) This generates an 8 bit lookup table for reversing - * the order of four 2-bit pixels. - */ -static l_uint8 * -makeReverseByteTab2(void) -{ -l_int32 i; -l_uint8 *tab; - - tab = (l_uint8 *)LEPT_CALLOC(256, sizeof(l_uint8)); - for (i = 0; i < 256; i++) - tab[i] = ((0xc0 & i) >> 6) | - ((0x30 & i) >> 2) | - ((0x0c & i) << 2) | - ((0x03 & i) << 6); - return tab; -} - - -/*! - * \brief makeReverseByteTab4() - * - * Notes: - * (1) This generates an 8 bit lookup table for reversing - * the order of two 4-bit pixels. - */ -static l_uint8 * -makeReverseByteTab4(void) -{ -l_int32 i; -l_uint8 *tab; - - tab = (l_uint8 *)LEPT_CALLOC(256, sizeof(l_uint8)); - for (i = 0; i < 256; i++) - tab[i] = ((0xf0 & i) >> 4) | ((0x0f & i) << 4); - return tab; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rotateshear.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rotateshear.c deleted file mode 100644 index 7af69659..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/rotateshear.c +++ /dev/null @@ -1,498 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file rotateshear.c - *
- *
- *      Shear rotation about arbitrary point using 2 and 3 shears
- *
- *              PIX      *pixRotateShear()
- *              PIX      *pixRotate2Shear()
- *              PIX      *pixRotate3Shear()
- *
- *      Shear rotation in-place about arbitrary point using 3 shears
- *              l_int32   pixRotateShearIP()
- *
- *      Shear rotation around the image center
- *              PIX      *pixRotateShearCenter()    (2 or 3 shears)
- *              l_int32   pixRotateShearCenterIP()  (3 shears)
- *
- *  Rotation is measured in radians; clockwise rotations are positive.
- *
- *  Rotation by shear works on images of any depth,
- *  including 8 bpp color paletted images and 32 bpp
- *  rgb images.  It works by translating each src pixel
- *  value to the appropriate pixel in the rotated dest.
- *  For 8 bpp grayscale images, it is about 10-15x faster
- *  than rotation by area-mapping.
- *
- *  This speed and flexibility comes at the following cost,
- *  relative to area-mapped rotation:
- *
- *    ~  Jaggies are created on edges of straight lines
- *
- *    ~  For large angles, where you must use 3 shears,
- *       there is some extra clipping from the shears.
- *
- *  For small angles, typically less than 0.05 radians,
- *  rotation can be done with 2 orthogonal shears.
- *  Two such continuous shears (as opposed to the discrete
- *  shears on a pixel lattice that we have here) give
- *  a rotated image that has a distortion in the lengths
- *  of the two rotated and still-perpendicular axes.  The
- *  length/width ratio changes by a fraction
- *
- *       0.5 * (angle)**2
- *
- *  For an angle of 0.05 radians, this is about 1 part in
- *  a thousand.  This distortion is absent when you use
- *  3 continuous shears with the correct angles (see below).
- *
- *  Of course, the image is on a discrete pixel lattice.
- *  Rotation by shear gives an approximation to a continuous
- *  rotation, leaving pixel jaggies at sharp boundaries.
- *  For very small rotations, rotating from a corner gives
- *  better sensitivity than rotating from the image center.
- *  Here's why.  Define the shear "center" to be the line such
- *  that the image is sheared in opposite directions on
- *  each side of and parallel to the line.  For small
- *  rotations there is a "dead space" on each side of the
- *  shear center of width equal to half the shear angle,
- *  in radians.  Thus, when the image is sheared about the center,
- *  the dead space width equals the shear angle, but when
- *  the image is sheared from a corner, the dead space
- *  width is only half the shear angle.
- *
- *  All horizontal and vertical shears are implemented by
- *  rasterop.  The in-place rotation uses special in-place
- *  shears that copy rows sideways or columns vertically
- *  without buffering, and then rewrite old pixels that are
- *  no longer covered by sheared pixels.  For that rewriting,
- *  you have the choice of using white or black pixels.
- *  When not in-place, the new pix is initialized with white or black
- *  pixels by pixSetBlackOrWhite(), which also works for cmapped pix.
- *  But for in-place, this initialization is not possible, so
- *  in-place shear operations on cmapped pix are not allowed.
- *
- *  Rotation by shear is fast and depth-independent.  However, it
- *  does not work well for large rotation angles.  In fact, for
- *  rotation angles greater than about 7 degrees, more pixels are
- *  lost at the edges than when using pixRotationBySampling(), which
- *  only loses pixels because they are rotated out of the image.
- *  For larger rotations, use pixRotationBySampling() or, for
- *  more accuracy when d > 1 bpp, pixRotateAM().
- *
- *  For small angles, when comparing the quality of rotation by
- *  sampling and by shear, you can see that rotation by sampling
- *  is slightly more accurate.  However, the difference in
- *  accuracy of rotation by sampling when compared to 3-shear and
- *  (for angles less than 2 degrees, when compared to 2-shear) is
- *  less than 1 pixel at any point.  For very small angles, rotation by
- *  sampling is much slower than rotation by shear.  The speed difference
- *  depends on the pixel depth and the rotation angle.  Rotation
- *  by shear is very fast for small angles and for small depth (esp. 1 bpp).
- *  Rotation by sampling speed is independent of angle and relatively
- *  more efficient for 8 and 32 bpp images.  Here are some timings
- *  for the ratio of rotation times: (time for sampling)/ (time for shear)
-  *
- *       depth (bpp)       ratio (2 deg)       ratio (10 deg)
- *       -----------------------------------------------------
- *          1                  25                  6
- *          8                   5                  2.6
- *          32                  1.6                1.0
- *
- *  In summary:
- *    * For d == 1 and small angles, use rotation by shear.  By default
- *      this will use 2-shear rotations, because 3-shears cause more
- *      visible artifacts in straight lines and, for small angles, the
- *      distortion in asperity ratio is small.
- *    * For d > 1, shear is faster than sampling, which is faster than
- *      area mapping.  However, area mapping gives the best results.
- *  These results are used in selecting the rotation methods in
- *  pixRotateShear().
- *
- *  There has been some work on what is called a "quasishear
- *  rotation" ("The Quasi-Shear Rotation, Eric Andres,
- *  DGCI 1996, pp. 307-314).  I believe they use a 3-shear
- *  approximation to the continuous rotation, exactly as
- *  we do here.  The approximation is due to being on
- *  a square pixel lattice.  They also use integers to specify
- *  the rotation angle and center offset, but that makes
- *  little sense on a machine where you have a few GFLOPS
- *  and only a few hundred floating point operations to do (!)
- *  They also allow subpixel specification of the center of
- *  rotation, which I haven't bothered with, and claim that
- *  better results are possible if each of the 4 quadrants is
- *  handled separately.
- *
- *  But the bottom line is that you are going to see shear lines when
- *  you rotate 1 bpp images.  Although the 3-shear rotation is
- *  mathematically exact in the limit of infinitesimal pixels, artifacts
- *  will be evident in real images.  One might imagine using dithering
- *  to break up the horizontal and vertical shear lines, but this
- *  is hard with block shears, where you need to dither on the block
- *  boundaries.  Dithering (by accumulation of 'error') with sampling
- *  makes more sense, but I haven't tried to do this.  There is only
- *  so much you can do with 1 bpp images!
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - - /* Angle limits: - * angle < MinAngleToRotate ==> clone - * angle > MaxTwoShearAngle ==> warning for 2-angle shears - * angle > MaxThreeShearAngle ==> warning for 3-angle shears - * angle > MaxShearAngle ==> error - */ -static const l_float32 MinAngleToRotate = 0.001; /* radians; ~0.06 deg */ -static const l_float32 MaxTwoShearAngle = 0.06; /* radians; ~3 deg */ -static const l_float32 MaxThreeShearAngle = 0.35; /* radians; ~20 deg */ -static const l_float32 MaxShearAngle = 0.50; /* radians; ~29 deg */ - -/*------------------------------------------------------------------* - * Rotations about an arbitrary point * - *------------------------------------------------------------------*/ -/*! - * \brief pixRotateShear() - * - * \param[in] pixs any depth; cmap ok - * \param[in] xcen x value for which there is no horizontal shear - * \param[in] ycen y value for which there is no vertical shear - * \param[in] angle radians - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK; - * \return pixd, or NULL on error. - * - *
- * Notes:
- *      (1) This rotates an image about the given point, using
- *          either 2 or 3 shears.
- *      (2) A positive angle gives a clockwise rotation.
- *      (3) This brings in 'incolor' pixels from outside the image.
- *      (4) For rotation angles larger than about 0.35 radians, we issue
- *          a warning because you should probably be using another method
- *          (either sampling or area mapping)
- * 
- */ -PIX * -pixRotateShear(PIX *pixs, - l_int32 xcen, - l_int32 ycen, - l_float32 angle, - l_int32 incolor) -{ - PROCNAME("pixRotateShear"); - - if (!pixs) - return (PIX *)(PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)(PIX *)ERROR_PTR("invalid incolor value", procName, NULL); - - if (L_ABS(angle) > MaxShearAngle) { - L_ERROR("%6.2f radians; too large for shear rotation\n", procName, - L_ABS(angle)); - return NULL; - } - if (L_ABS(angle) < MinAngleToRotate) - return pixClone(pixs); - - if (L_ABS(angle) <= MaxTwoShearAngle) - return pixRotate2Shear(pixs, xcen, ycen, angle, incolor); - else - return pixRotate3Shear(pixs, xcen, ycen, angle, incolor); -} - - -/*! - * \brief pixRotate2Shear() - * - * \param[in] pixs any depth; cmap ok - * \param[in] xcen, ycen center of rotation - * \param[in] angle radians - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK; - * \return pixd, or NULL on error. - * - *
- * Notes:
- *      (1) This rotates the image about the given point, using the 2-shear
- *          method.  It should only be used for angles no larger than
- *          MaxTwoShearAngle.  For larger angles, a warning is issued.
- *      (2) A positive angle gives a clockwise rotation.
- *      (3) 2-shear rotation by a specified angle is equivalent
- *          to the sequential transformations
- *             x' = x + tan(angle) * (y - ycen)     for x-shear
- *             y' = y + tan(angle) * (x - xcen)     for y-shear
- *      (4) Computation of tan(angle) is performed within the shear operation.
- *      (5) This brings in 'incolor' pixels from outside the image.
- *      (6) If the image has an alpha layer, it is rotated separately by
- *          two shears.
- * 
- */ -PIX * -pixRotate2Shear(PIX *pixs, - l_int32 xcen, - l_int32 ycen, - l_float32 angle, - l_int32 incolor) -{ -PIX *pix1, *pix2, *pixd; - - PROCNAME("pixRotate2Shear"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)(PIX *)ERROR_PTR("invalid incolor value", procName, NULL); - - if (L_ABS(angle) > MaxShearAngle) { - L_ERROR("%6.2f radians; too large for shear rotation\n", procName, - L_ABS(angle)); - return NULL; - } - if (L_ABS(angle) < MinAngleToRotate) - return pixClone(pixs); - if (L_ABS(angle) > MaxTwoShearAngle) - L_WARNING("%6.2f radians; large angle for 2-shear rotation\n", - procName, L_ABS(angle)); - - if ((pix1 = pixHShear(NULL, pixs, ycen, angle, incolor)) == NULL) - return (PIX *)ERROR_PTR("pix1 not made", procName, NULL); - pixd = pixVShear(NULL, pix1, xcen, angle, incolor); - pixDestroy(&pix1); - if (!pixd) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - - if (pixGetDepth(pixs) == 32 && pixGetSpp(pixs) == 4) { - pix1 = pixGetRGBComponent(pixs, L_ALPHA_CHANNEL); - /* L_BRING_IN_WHITE brings in opaque for the alpha component */ - pix2 = pixRotate2Shear(pix1, xcen, ycen, angle, L_BRING_IN_WHITE); - pixSetRGBComponent(pixd, pix2, L_ALPHA_CHANNEL); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - return pixd; -} - - -/*! - * \brief pixRotate3Shear() - * - * \param[in] pixs any depth; cmap ok - * \param[in] xcen, ycen center of rotation - * \param[in] angle radians - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK; - * \return pixd, or NULL on error. - * - *
- * Notes:
- *      (1) This rotates the image about the given point, using the 3-shear
- *          method.  It should only be used for angles smaller than
- *          MaxThreeShearAngle.  For larger angles, a warning is issued.
- *      (2) A positive angle gives a clockwise rotation.
- *      (3) 3-shear rotation by a specified angle is equivalent
- *          to the sequential transformations
- *            y' = y + tan(angle/2) * (x - xcen)     for first y-shear
- *            x' = x + sin(angle) * (y - ycen)       for x-shear
- *            y' = y + tan(angle/2) * (x - xcen)     for second y-shear
- *      (4) Computation of tan(angle) is performed in the shear operations.
- *      (5) This brings in 'incolor' pixels from outside the image.
- *      (6) If the image has an alpha layer, it is rotated separately by
- *          two shears.
- *      (7) The algorithm was published by Alan Paeth: "A Fast Algorithm
- *          for General Raster Rotation," Graphics Interface '86,
- *          pp. 77-81, May 1986.  A description of the method, along with
- *          an implementation, can be found in Graphics Gems, p. 179,
- *          edited by Andrew Glassner, published by Academic Press, 1990.
- * 
- */ -PIX * -pixRotate3Shear(PIX *pixs, - l_int32 xcen, - l_int32 ycen, - l_float32 angle, - l_int32 incolor) -{ -l_float32 hangle; -PIX *pix1, *pix2, *pixd; - - PROCNAME("pixRotate3Shear"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)(PIX *)ERROR_PTR("invalid incolor value", procName, NULL); - - if (L_ABS(angle) > MaxShearAngle) { - L_ERROR("%6.2f radians; too large for shear rotation\n", procName, - L_ABS(angle)); - return NULL; - } - if (L_ABS(angle) < MinAngleToRotate) - return pixClone(pixs); - if (L_ABS(angle) > MaxThreeShearAngle) { - L_WARNING("%6.2f radians; large angle for 3-shear rotation\n", - procName, L_ABS(angle)); - } - - hangle = atan(sin(angle)); - if ((pixd = pixVShear(NULL, pixs, xcen, angle / 2., incolor)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - if ((pix1 = pixHShear(NULL, pixd, ycen, hangle, incolor)) == NULL) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("pix1 not made", procName, NULL); - } - pixVShear(pixd, pix1, xcen, angle / 2., incolor); - pixDestroy(&pix1); - - if (pixGetDepth(pixs) == 32 && pixGetSpp(pixs) == 4) { - pix1 = pixGetRGBComponent(pixs, L_ALPHA_CHANNEL); - /* L_BRING_IN_WHITE brings in opaque for the alpha component */ - pix2 = pixRotate3Shear(pix1, xcen, ycen, angle, L_BRING_IN_WHITE); - pixSetRGBComponent(pixd, pix2, L_ALPHA_CHANNEL); - pixDestroy(&pix1); - pixDestroy(&pix2); - } - return pixd; -} - - -/*------------------------------------------------------------------* - * Rotations in-place about an arbitrary point * - *------------------------------------------------------------------*/ -/*! - * \brief pixRotateShearIP() - * - * \param[in] pixs any depth; no cmap - * \param[in] xcen, ycen center of rotation - * \param[in] angle radians - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This does an in-place rotation of the image about the
- *          specified point, using the 3-shear method.  It should only
- *          be used for angles smaller than MaxThreeShearAngle.
- *          For larger angles, a warning is issued.
- *      (2) A positive angle gives a clockwise rotation.
- *      (3) 3-shear rotation by a specified angle is equivalent
- *          to the sequential transformations
- *            y' = y + tan(angle/2) * (x - xcen)      for first y-shear
- *            x' = x + sin(angle) * (y - ycen)        for x-shear
- *            y' = y + tan(angle/2) * (x - xcen)      for second y-shear
- *      (4) Computation of tan(angle) is performed in the shear operations.
- *      (5) This brings in 'incolor' pixels from outside the image.
- *      (6) The pix cannot be colormapped, because the in-place operation
- *          only blits in 0 or 1 bits, not an arbitrary colormap index.
- * 
- */ -l_ok -pixRotateShearIP(PIX *pixs, - l_int32 xcen, - l_int32 ycen, - l_float32 angle, - l_int32 incolor) -{ -l_float32 hangle; - - PROCNAME("pixRotateShearIP"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return ERROR_INT("invalid value for incolor", procName, 1); - if (pixGetColormap(pixs) != NULL) - return ERROR_INT("pixs is colormapped", procName, 1); - - if (angle == 0.0) - return 0; - if (L_ABS(angle) > MaxThreeShearAngle) { - L_WARNING("%6.2f radians; large angle for in-place 3-shear rotation\n", - procName, L_ABS(angle)); - } - - hangle = atan(sin(angle)); - pixHShearIP(pixs, ycen, angle / 2., incolor); - pixVShearIP(pixs, xcen, hangle, incolor); - pixHShearIP(pixs, ycen, angle / 2., incolor); - return 0; -} - - -/*------------------------------------------------------------------* - * Rotations about the image center * - *------------------------------------------------------------------*/ -/*! - * \brief pixRotateShearCenter() - * - * \param[in] pixs any depth; cmap ok - * \param[in] angle radians - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return pixd, or NULL on error - */ -PIX * -pixRotateShearCenter(PIX *pixs, - l_float32 angle, - l_int32 incolor) -{ - PROCNAME("pixRotateShearCenter"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - return pixRotateShear(pixs, pixGetWidth(pixs) / 2, - pixGetHeight(pixs) / 2, angle, incolor); -} - - -/*! - * \brief pixRotateShearCenterIP() - * - * \param[in] pixs any depth; no cmap - * \param[in] angle radians - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK - * \return 0 if OK, 1 on error - */ -l_ok -pixRotateShearCenterIP(PIX *pixs, - l_float32 angle, - l_int32 incolor) -{ - PROCNAME("pixRotateShearCenterIP"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - return pixRotateShearIP(pixs, pixGetWidth(pixs) / 2, - pixGetHeight(pixs) / 2, angle, incolor); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/runlength.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/runlength.c deleted file mode 100644 index 0ad1a68e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/runlength.c +++ /dev/null @@ -1,814 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file runlength.c - *
- *
- *     Label pixels by membership in runs
- *           PIX         *pixStrokeWidthTransform()
- *           static PIX  *pixFindMinRunsOrthogonal()
- *           PIX         *pixRunlengthTransform()
- *
- *     Find runs along horizontal and vertical lines
- *           l_int32      pixFindHorizontalRuns()
- *           l_int32      pixFindVerticalRuns()
- *
- *     Find max runs along horizontal and vertical lines
- *           l_int32      pixFindMaxRuns()
- *           l_int32      pixFindMaxHorizontalRunOnLine()
- *           l_int32      pixFindMaxVerticalRunOnLine()
- *
- *     Compute runlength-to-membership transform on a line
- *           l_int32      runlengthMembershipOnLine()
- *
- *     Make byte position LUT
- *           l_int32      makeMSBitLocTab()
- *
- *  Here we're handling runs of either black or white pixels on 1 bpp
- *  images.  The directions of the runs in the stroke width transform
- *  are selectable from given sets of angles.  Most of the other runs
- *  are oriented either horizontally along the raster lines or
- *  vertically along pixel columns.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - -static PIX *pixFindMinRunsOrthogonal(PIX *pixs, l_float32 angle, l_int32 depth); - -/*-----------------------------------------------------------------------* - * Label pixels by membership in runs * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixStrokeWidthTransform() - * - * \param[in] pixs 1 bpp - * \param[in] color 0 for white runs, 1 for black runs - * \param[in] depth of pixd: 8 or 16 bpp - * \param[in] nangles 2, 4, 6 or 8 - * \return pixd 8 or 16 bpp, or NULL on error - * - *
- * Notes:
- *      (1) The dest Pix is 8 or 16 bpp, with the pixel values
- *          equal to the stroke width in which it is a member.
- *          The values are clipped to the max pixel value if necessary.
- *      (2) %color determines if we're labelling white or black strokes.
- *      (3) A pixel that is not a member of the chosen color gets
- *          value 0; it belongs to a width of length 0 of the
- *          chosen color.
- *      (4) This chooses, for each dest pixel, the minimum of sets
- *          of runlengths through each pixel.  Here are the sets:
- *            nangles    increment          set
- *            -------    ---------    --------------------------------
- *               2          90       {0, 90}
- *               4          45       {0, 45, 90, 135}
- *               6          30       {0, 30, 60, 90, 120, 150}
- *               8          22.5     {0, 22.5, 45, 67.5, 90, 112.5, 135, 157.5}
- *      (5) Runtime scales linearly with (%nangles - 2).
- * 
- */ -PIX * -pixStrokeWidthTransform(PIX *pixs, - l_int32 color, - l_int32 depth, - l_int32 nangles) -{ -l_float32 angle, pi; -PIX *pixh, *pixv, *pixt, *pixg1, *pixg2, *pixg3, *pixg4; - - PROCNAME("pixStrokeWidthTransform"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (depth != 8 && depth != 16) - return (PIX *)ERROR_PTR("depth must be 8 or 16 bpp", procName, NULL); - if (nangles != 2 && nangles != 4 && nangles != 6 && nangles != 8) - return (PIX *)ERROR_PTR("nangles not in {2,4,6,8}", procName, NULL); - - /* Use fg runs for evaluation */ - if (color == 0) - pixt = pixInvert(NULL, pixs); - else - pixt = pixClone(pixs); - - /* Find min length at 0 and 90 degrees */ - pixh = pixRunlengthTransform(pixt, 1, L_HORIZONTAL_RUNS, depth); - pixv = pixRunlengthTransform(pixt, 1, L_VERTICAL_RUNS, depth); - pixg1 = pixMinOrMax(NULL, pixh, pixv, L_CHOOSE_MIN); - pixDestroy(&pixh); - pixDestroy(&pixv); - - pixg2 = pixg3 = pixg4 = NULL; - pi = 3.1415926535; - if (nangles == 4 || nangles == 8) { - /* Find min length at +45 and -45 degrees */ - angle = pi / 4.0; - pixg2 = pixFindMinRunsOrthogonal(pixt, angle, depth); - } - - if (nangles == 6) { - /* Find min length at +30 and -60 degrees */ - angle = pi / 6.0; - pixg2 = pixFindMinRunsOrthogonal(pixt, angle, depth); - - /* Find min length at +60 and -30 degrees */ - angle = pi / 3.0; - pixg3 = pixFindMinRunsOrthogonal(pixt, angle, depth); - } - - if (nangles == 8) { - /* Find min length at +22.5 and -67.5 degrees */ - angle = pi / 8.0; - pixg3 = pixFindMinRunsOrthogonal(pixt, angle, depth); - - /* Find min length at +67.5 and -22.5 degrees */ - angle = 3.0 * pi / 8.0; - pixg4 = pixFindMinRunsOrthogonal(pixt, angle, depth); - } - pixDestroy(&pixt); - - if (nangles > 2) - pixMinOrMax(pixg1, pixg1, pixg2, L_CHOOSE_MIN); - if (nangles > 4) - pixMinOrMax(pixg1, pixg1, pixg3, L_CHOOSE_MIN); - if (nangles > 6) - pixMinOrMax(pixg1, pixg1, pixg4, L_CHOOSE_MIN); - pixDestroy(&pixg2); - pixDestroy(&pixg3); - pixDestroy(&pixg4); - return pixg1; -} - - -/*! - * \brief pixFindMinRunsOrthogonal() - * - * \param[in] pixs 1 bpp - * \param[in] angle in radians - * \param[in] depth of pixd: 8 or 16 bpp - * \return pixd 8 or 16 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This computes, for each fg pixel in pixs, the minimum of
- *          the runlengths going through that pixel in two orthogonal
- *          directions: at %angle and at (90 + %angle).
- *      (2) We use rotation by shear because the forward and backward
- *          rotations by the same angle are exact inverse operations.
- *          As a result, the nonzero pixels in pixd correspond exactly
- *          to the fg pixels in pixs.  This is not the case with
- *          sampled rotation, due to spatial quantization.  Nevertheless,
- *          the result suffers from lack of exact correspondence
- *          between original and rotated pixels, also due to spatial
- *          quantization, causing some boundary pixels to be
- *          shifted from bg to fg or v.v.
- * 
- */ -static PIX * -pixFindMinRunsOrthogonal(PIX *pixs, - l_float32 angle, - l_int32 depth) -{ -l_int32 w, h, diag, xoff, yoff; -PIX *pixb, *pixr, *pixh, *pixv, *pixg1, *pixg2, *pixd; -BOX *box; - - PROCNAME("pixFindMinRunsOrthogonal"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - - /* Rasterop into the center of a sufficiently large image - * so we don't lose pixels for any rotation angle. */ - pixGetDimensions(pixs, &w, &h, NULL); - diag = (l_int32)(sqrt((l_float64)(w * w + h * h)) + 2.5); - xoff = (diag - w) / 2; - yoff = (diag - h) / 2; - pixb = pixCreate(diag, diag, 1); - pixRasterop(pixb, xoff, yoff, w, h, PIX_SRC, pixs, 0, 0); - - /* Rotate about the 'center', get the min of orthogonal transforms, - * rotate back, and crop the part corresponding to pixs. */ - pixr = pixRotateShear(pixb, diag / 2, diag / 2, angle, L_BRING_IN_WHITE); - pixh = pixRunlengthTransform(pixr, 1, L_HORIZONTAL_RUNS, depth); - pixv = pixRunlengthTransform(pixr, 1, L_VERTICAL_RUNS, depth); - pixg1 = pixMinOrMax(NULL, pixh, pixv, L_CHOOSE_MIN); - pixg2 = pixRotateShear(pixg1, diag / 2, diag / 2, -angle, L_BRING_IN_WHITE); - box = boxCreate(xoff, yoff, w, h); - pixd = pixClipRectangle(pixg2, box, NULL); - - pixDestroy(&pixb); - pixDestroy(&pixr); - pixDestroy(&pixh); - pixDestroy(&pixv); - pixDestroy(&pixg1); - pixDestroy(&pixg2); - boxDestroy(&box); - return pixd; -} - - -/*! - * \brief pixRunlengthTransform() - * - * \param[in] pixs 1 bpp - * \param[in] color 0 for white runs, 1 for black runs - * \param[in] direction L_HORIZONTAL_RUNS, L_VERTICAL_RUNS - * \param[in] depth 8 or 16 bpp - * \return pixd 8 or 16 bpp, or NULL on error - * - *
- * Notes:
- *      (1) The dest Pix is 8 or 16 bpp, with the pixel values
- *          equal to the runlength in which it is a member.
- *          The length is clipped to the max pixel value if necessary.
- *      (2) %color determines if we're labelling white or black runs.
- *      (3) A pixel that is not a member of the chosen color gets
- *          value 0; it belongs to a run of length 0 of the
- *          chosen color.
- *      (4) To convert for maximum dynamic range, either linear or
- *          log, use pixMaxDynamicRange().
- * 
- */ -PIX * -pixRunlengthTransform(PIX *pixs, - l_int32 color, - l_int32 direction, - l_int32 depth) -{ -l_int32 i, j, w, h, wpld, bufsize, maxsize, n; -l_int32 *start, *end, *buffer; -l_uint32 *datad, *lined; -PIX *pixt, *pixd; - - PROCNAME("pixRunlengthTransform"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (depth != 8 && depth != 16) - return (PIX *)ERROR_PTR("depth must be 8 or 16 bpp", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - if (direction == L_HORIZONTAL_RUNS) - maxsize = 1 + w / 2; - else if (direction == L_VERTICAL_RUNS) - maxsize = 1 + h / 2; - else - return (PIX *)ERROR_PTR("invalid direction", procName, NULL); - bufsize = L_MAX(w, h); - if (bufsize > 1000000) { - L_ERROR("largest image dimension = %d; too big\n", procName, bufsize); - return NULL; - } - - if ((pixd = pixCreate(w, h, depth)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - start = (l_int32 *)LEPT_CALLOC(maxsize, sizeof(l_int32)); - end = (l_int32 *)LEPT_CALLOC(maxsize, sizeof(l_int32)); - buffer = (l_int32 *)LEPT_CALLOC(bufsize, sizeof(l_int32)); - - /* Use fg runs for evaluation */ - if (color == 0) - pixt = pixInvert(NULL, pixs); - else - pixt = pixClone(pixs); - - if (direction == L_HORIZONTAL_RUNS) { - for (i = 0; i < h; i++) { - pixFindHorizontalRuns(pixt, i, start, end, &n); - runlengthMembershipOnLine(buffer, w, depth, start, end, n); - lined = datad + i * wpld; - if (depth == 8) { - for (j = 0; j < w; j++) - SET_DATA_BYTE(lined, j, buffer[j]); - } else { /* depth == 16 */ - for (j = 0; j < w; j++) - SET_DATA_TWO_BYTES(lined, j, buffer[j]); - } - } - } else { /* L_VERTICAL_RUNS */ - for (j = 0; j < w; j++) { - pixFindVerticalRuns(pixt, j, start, end, &n); - runlengthMembershipOnLine(buffer, h, depth, start, end, n); - if (depth == 8) { - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - SET_DATA_BYTE(lined, j, buffer[i]); - } - } else { /* depth == 16 */ - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - SET_DATA_TWO_BYTES(lined, j, buffer[i]); - } - } - } - } - - pixDestroy(&pixt); - LEPT_FREE(start); - LEPT_FREE(end); - LEPT_FREE(buffer); - return pixd; -} - - -/*-----------------------------------------------------------------------* - * Find runs along horizontal and vertical lines * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixFindHorizontalRuns() - * - * \param[in] pix 1 bpp - * \param[in] y line to traverse - * \param[in] xstart returns array of start positions for fg runs - * \param[in] xend returns array of end positions for fg runs - * \param[out] pn the number of runs found - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This finds foreground horizontal runs on a single scanline.
- *      (2) To find background runs, use pixInvert() before applying
- *          this function.
- *      (3) %xstart and %xend arrays are input.  They should be
- *          of size w/2 + 1 to insure that they can hold
- *          the maximum number of runs in the raster line.
- * 
- */ -l_ok -pixFindHorizontalRuns(PIX *pix, - l_int32 y, - l_int32 *xstart, - l_int32 *xend, - l_int32 *pn) -{ -l_int32 inrun; /* boolean */ -l_int32 index, w, h, d, j, wpl, val; -l_uint32 *line; - - PROCNAME("pixFindHorizontalRuns"); - - if (!pn) - return ERROR_INT("&n not defined", procName, 1); - *pn = 0; - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - pixGetDimensions(pix, &w, &h, &d); - if (d != 1) - return ERROR_INT("pix not 1 bpp", procName, 1); - if (y < 0 || y >= h) - return ERROR_INT("y not in [0 ... h - 1]", procName, 1); - if (!xstart) - return ERROR_INT("xstart not defined", procName, 1); - if (!xend) - return ERROR_INT("xend not defined", procName, 1); - - wpl = pixGetWpl(pix); - line = pixGetData(pix) + y * wpl; - - inrun = FALSE; - index = 0; - for (j = 0; j < w; j++) { - val = GET_DATA_BIT(line, j); - if (!inrun) { - if (val) { - xstart[index] = j; - inrun = TRUE; - } - } else { - if (!val) { - xend[index++] = j - 1; - inrun = FALSE; - } - } - } - - /* Finish last run if necessary */ - if (inrun) - xend[index++] = w - 1; - - *pn = index; - return 0; -} - - -/*! - * \brief pixFindVerticalRuns() - * - * \param[in] pix 1 bpp - * \param[in] x line to traverse - * \param[in] ystart returns array of start positions for fg runs - * \param[in] yend returns array of end positions for fg runs - * \param[out] pn the number of runs found - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This finds foreground vertical runs on a single scanline.
- *      (2) To find background runs, use pixInvert() before applying
- *          this function.
- *      (3) %ystart and %yend arrays are input.  They should be
- *          of size h/2 + 1 to insure that they can hold
- *          the maximum number of runs in the raster line.
- * 
- */ -l_ok -pixFindVerticalRuns(PIX *pix, - l_int32 x, - l_int32 *ystart, - l_int32 *yend, - l_int32 *pn) -{ -l_int32 inrun; /* boolean */ -l_int32 index, w, h, d, i, wpl, val; -l_uint32 *data, *line; - - PROCNAME("pixFindVerticalRuns"); - - if (!pn) - return ERROR_INT("&n not defined", procName, 1); - *pn = 0; - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - pixGetDimensions(pix, &w, &h, &d); - if (d != 1) - return ERROR_INT("pix not 1 bpp", procName, 1); - if (x < 0 || x >= w) - return ERROR_INT("x not in [0 ... w - 1]", procName, 1); - if (!ystart) - return ERROR_INT("ystart not defined", procName, 1); - if (!yend) - return ERROR_INT("yend not defined", procName, 1); - - wpl = pixGetWpl(pix); - data = pixGetData(pix); - - inrun = FALSE; - index = 0; - for (i = 0; i < h; i++) { - line = data + i * wpl; - val = GET_DATA_BIT(line, x); - if (!inrun) { - if (val) { - ystart[index] = i; - inrun = TRUE; - } - } else { - if (!val) { - yend[index++] = i - 1; - inrun = FALSE; - } - } - } - - /* Finish last run if necessary */ - if (inrun) - yend[index++] = h - 1; - - *pn = index; - return 0; -} - - -/*-----------------------------------------------------------------------* - * Find max runs along horizontal and vertical lines * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixFindMaxRuns() - * - * \param[in] pix 1 bpp - * \param[in] direction L_HORIZONTAL_RUNS or L_VERTICAL_RUNS - * \param[out] pnastart [optional] start locations of longest runs - * \return na of lengths of runs, or NULL on error - * - *
- * Notes:
- *      (1) This finds the longest foreground runs by row or column
- *      (2) To find background runs, use pixInvert() before applying
- *          this function.
- * 
- */ -NUMA * -pixFindMaxRuns(PIX *pix, - l_int32 direction, - NUMA **pnastart) -{ -l_int32 w, h, i, start, size; -NUMA *nasize; - - PROCNAME("pixFindMaxRuns"); - - if (pnastart) *pnastart = NULL; - if (direction != L_HORIZONTAL_RUNS && direction != L_VERTICAL_RUNS) - return (NUMA *)ERROR_PTR("direction invalid", procName, NULL); - if (!pix || pixGetDepth(pix) != 1) - return (NUMA *)ERROR_PTR("pix undefined or not 1 bpp", procName, NULL); - - pixGetDimensions(pix, &w, &h, NULL); - nasize = numaCreate(w); - if (pnastart) *pnastart = numaCreate(w); - if (direction == L_HORIZONTAL_RUNS) { - for (i = 0; i < h; i++) { - pixFindMaxHorizontalRunOnLine(pix, i, &start, &size); - numaAddNumber(nasize, size); - if (pnastart) numaAddNumber(*pnastart, start); - } - } else { /* vertical scans */ - for (i = 0; i < w; i++) { - pixFindMaxVerticalRunOnLine(pix, i, &start, &size); - numaAddNumber(nasize, size); - if (pnastart) numaAddNumber(*pnastart, start); - } - } - - return nasize; -} - - -/*! - * \brief pixFindMaxHorizontalRunOnLine() - * - * \param[in] pix 1 bpp - * \param[in] y line to traverse - * \param[out] pxstart [optional] start position - * \param[out] psize the size of the run - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This finds the longest foreground horizontal run on a scanline.
- *      (2) To find background runs, use pixInvert() before applying
- *          this function.
- * 
- */ -l_ok -pixFindMaxHorizontalRunOnLine(PIX *pix, - l_int32 y, - l_int32 *pxstart, - l_int32 *psize) -{ -l_int32 inrun; /* boolean */ -l_int32 w, h, j, wpl, val, maxstart, maxsize, length, start; -l_uint32 *line; - - PROCNAME("pixFindMaxHorizontalRunOnLine"); - - if (pxstart) *pxstart = 0; - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - *psize = 0; - if (!pix || pixGetDepth(pix) != 1) - return ERROR_INT("pix not defined or not 1 bpp", procName, 1); - pixGetDimensions(pix, &w, &h, NULL); - if (y < 0 || y >= h) - return ERROR_INT("y not in [0 ... h - 1]", procName, 1); - - wpl = pixGetWpl(pix); - line = pixGetData(pix) + y * wpl; - inrun = FALSE; - start = 0; - maxstart = 0; - maxsize = 0; - for (j = 0; j < w; j++) { - val = GET_DATA_BIT(line, j); - if (!inrun) { - if (val) { - start = j; - inrun = TRUE; - } - } else if (!val) { /* run just ended */ - length = j - start; - if (length > maxsize) { - maxsize = length; - maxstart = start; - } - inrun = FALSE; - } - } - - if (inrun) { /* a run has continued to the end of the row */ - length = j - start; - if (length > maxsize) { - maxsize = length; - maxstart = start; - } - } - if (pxstart) *pxstart = maxstart; - *psize = maxsize; - return 0; -} - - -/*! - * \brief pixFindMaxVerticalRunOnLine() - * - * \param[in] pix 1 bpp - * \param[in] x column to traverse - * \param[out] pystart [optional] start position - * \param[out] psize the size of the run - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This finds the longest foreground vertical run on a scanline.
- *      (2) To find background runs, use pixInvert() before applying
- *          this function.
- * 
- */ -l_ok -pixFindMaxVerticalRunOnLine(PIX *pix, - l_int32 x, - l_int32 *pystart, - l_int32 *psize) -{ -l_int32 inrun; /* boolean */ -l_int32 w, h, i, wpl, val, maxstart, maxsize, length, start; -l_uint32 *data, *line; - - PROCNAME("pixFindMaxVerticalRunOnLine"); - - if (pystart) *pystart = 0; - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - *psize = 0; - if (!pix || pixGetDepth(pix) != 1) - return ERROR_INT("pix not defined or not 1 bpp", procName, 1); - pixGetDimensions(pix, &w, &h, NULL); - if (x < 0 || x >= w) - return ERROR_INT("x not in [0 ... w - 1]", procName, 1); - - wpl = pixGetWpl(pix); - data = pixGetData(pix); - inrun = FALSE; - start = 0; - maxstart = 0; - maxsize = 0; - for (i = 0; i < h; i++) { - line = data + i * wpl; - val = GET_DATA_BIT(line, x); - if (!inrun) { - if (val) { - start = i; - inrun = TRUE; - } - } else if (!val) { /* run just ended */ - length = i - start; - if (length > maxsize) { - maxsize = length; - maxstart = start; - } - inrun = FALSE; - } - } - - if (inrun) { /* a run has continued to the end of the column */ - length = i - start; - if (length > maxsize) { - maxsize = length; - maxstart = start; - } - } - if (pystart) *pystart = maxstart; - *psize = maxsize; - return 0; -} - - -/*-----------------------------------------------------------------------* - * Compute runlength-to-membership transform on a line * - *-----------------------------------------------------------------------*/ -/*! - * \brief runlengthMembershipOnLine() - * - * \param[in] buffer into which full line of data is placed - * \param[in] size full size of line; w or h - * \param[in] depth 8 or 16 bpp - * \param[in] start array of start positions for fg runs - * \param[in] end array of end positions for fg runs - * \param[in] n the number of runs - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Converts a set of runlengths into a buffer of
- *          runlength membership values.
- *      (2) Initialization of the array gives pixels that are
- *          not within a run the value 0.
- * 
- */ -l_ok -runlengthMembershipOnLine(l_int32 *buffer, - l_int32 size, - l_int32 depth, - l_int32 *start, - l_int32 *end, - l_int32 n) -{ -l_int32 i, j, first, last, diff, max; - - PROCNAME("runlengthMembershipOnLine"); - - if (!buffer) - return ERROR_INT("buffer not defined", procName, 1); - if (!start) - return ERROR_INT("start not defined", procName, 1); - if (!end) - return ERROR_INT("end not defined", procName, 1); - - if (depth == 8) - max = 0xff; - else /* depth == 16 */ - max = 0xffff; - - memset(buffer, 0, 4 * size); - for (i = 0; i < n; i++) { - first = start[i]; - last = end[i]; - diff = last - first + 1; - diff = L_MIN(diff, max); - for (j = first; j <= last; j++) - buffer[j] = diff; - } - - return 0; -} - - -/*-----------------------------------------------------------------------* - * Make byte position LUT * - *-----------------------------------------------------------------------*/ -/*! - * \brief makeMSBitLocTab() - * - * \param[in] bitval either 0 or 1 - * \return table: for an input byte, the MS bit location, starting at 0 - * with the MSBit in the byte, or NULL on error. - * - *
- * Notes:
- *      (1) If %bitval == 1, it finds the leftmost ON pixel in a byte;
- *          otherwise if %bitval == 0, it finds the leftmost OFF pixel.
- *      (2) If there are no pixels of the indicated color in the byte,
- *          this returns 8.
- * 
- */ -l_int32 * -makeMSBitLocTab(l_int32 bitval) -{ -l_int32 i, j; -l_int32 *tab; -l_uint8 byte, mask; - - tab = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32)); - for (i = 0; i < 256; i++) { - byte = (l_uint8)i; - if (bitval == 0) - byte = ~byte; - tab[i] = 8; - mask = 0x80; - for (j = 0; j < 8; j++) { - if (byte & mask) { - tab[i] = j; - break; - } - mask >>= 1; - } - } - return tab; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sarray1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sarray1.c deleted file mode 100644 index 360ea2ba..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sarray1.c +++ /dev/null @@ -1,1969 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file sarray1.c - *
- *
- *      Create/Destroy/Copy
- *          SARRAY    *sarrayCreate()
- *          SARRAY    *sarrayCreateInitialized()
- *          SARRAY    *sarrayCreateWordsFromString()
- *          SARRAY    *sarrayCreateLinesFromString()
- *          void      *sarrayDestroy()
- *          SARRAY    *sarrayCopy()
- *          SARRAY    *sarrayClone()
- *
- *      Add/Remove string
- *          l_int32    sarrayAddString()
- *          static l_int32  sarrayExtendArray()
- *          char      *sarrayRemoveString()
- *          l_int32    sarrayReplaceString()
- *          l_int32    sarrayClear()
- *
- *      Accessors
- *          l_int32    sarrayGetCount()
- *          char     **sarrayGetArray()
- *          char      *sarrayGetString()
- *          l_int32    sarrayGetRefcount()
- *          l_int32    sarrayChangeRefcount()
- *
- *      Conversion back to string
- *          char      *sarrayToString()
- *          char      *sarrayToStringRange()
- *
- *      Join 2 sarrays
- *          l_int32    sarrayJoin()
- *          l_int32    sarrayAppendRange()
- *
- *      Pad an sarray to be the same size as another sarray
- *          l_int32    sarrayPadToSameSize()
- *
- *      Convert word sarray to (formatted) line sarray
- *          SARRAY    *sarrayConvertWordsToLines()
- *
- *      Split string on separator list
- *          SARRAY    *sarraySplitString()
- *
- *      Filter sarray
- *          SARRAY    *sarraySelectBySubstring()
- *          SARRAY    *sarraySelectByRange()
- *          l_int32    sarrayParseRange()
- *
- *      Serialize for I/O
- *          SARRAY    *sarrayRead()
- *          SARRAY    *sarrayReadStream()
- *          SARRAY    *sarrayReadMem()
- *          l_int32    sarrayWrite()
- *          l_int32    sarrayWriteStream()
- *          l_int32    sarrayWriteMem()
- *          l_int32    sarrayAppend()
- *
- *      Directory filenames
- *          SARRAY    *getNumberedPathnamesInDirectory()
- *          SARRAY    *getSortedPathnamesInDirectory()
- *          SARRAY    *convertSortedToNumberedPathnames()
- *          SARRAY    *getFilenamesInDirectory()
- *
- *      These functions are important for efficient manipulation
- *      of string data, and they have found widespread use in
- *      leptonica.  For example:
- *         (1) to generate text files: e.g., PostScript and PDF
- *             wrappers around sets of images
- *         (2) to parse text files: e.g., extracting prototypes
- *             from the source to generate allheaders.h
- *         (3) to generate code for compilation: e.g., the fast
- *             dwa code for arbitrary structuring elements.
- *
- *      Comments on usage:
- *
- *          The user is responsible for correctly disposing of strings
- *          that have been extracted from sarrays.  In the following,
- *          "str_not_owned" means the returned handle does not own the string,
- *          and "str_owned" means the returned handle owns the string.
- *            - To extract a string from an Sarray in order to inspect it
- *              or to make a copy of it later, get a handle to it:
- *                  copyflag = L_NOCOPY.
- *              In this case, you must neither free the string nor put it
- *              directly in another array:
- *                 str-not-owned = sarrayGetString(sa, index, L_NOCOPY);
- *            - To extract a copy of a string from an Sarray, use:
- *                 str-owned = sarrayGetString(sa, index, L_COPY);
- *            ~ To insert a string that is in one array into another
- *              array (always leaving the first array intact), there are
- *              two options:
- *                 (1) use copyflag = L_COPY to make an immediate copy,
- *                     which you then add to the second array by insertion:
- *                       str-owned = sarrayGetString(sa, index, L_COPY);
- *                       sarrayAddString(sa, str-owned, L_INSERT);
- *                 (2) use copyflag = L_NOCOPY to get another handle to
- *                     the string; you then add a copy of it to the
- *                     second string array:
- *                       str-not-owned = sarrayGetString(sa, index, L_NOCOPY);
- *                       sarrayAddString(sa, str-not-owned, L_COPY).
- *              sarrayAddString() transfers ownership to the Sarray, so never
- *              use L_INSERT if the string is owned by another array.
- *
- *              In all cases, when you use copyflag = L_COPY to extract
- *              a string from an array, you must either free it
- *              or insert it in an array that will be freed later.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#ifndef _WIN32 -#include /* unix only */ -#include -#include /* needed for realpath() */ -#include /* needed for realpath() */ -#endif /* ! _WIN32 */ -#include "allheaders.h" - -static const l_uint32 MaxPtrArraySize = 100000; -static const l_int32 InitialPtrArraySize = 50; /*!< n'importe quoi */ - - /* Static functions */ -static l_int32 sarrayExtendArray(SARRAY *sa); - - -/*--------------------------------------------------------------------------* - * String array create/destroy/copy/extend * - *--------------------------------------------------------------------------*/ -/*! - * \brief sarrayCreate() - * - * \param[in] n size of string ptr array to be alloc'd; use 0 for default - * \return sarray, or NULL on error - */ -SARRAY * -sarrayCreate(l_int32 n) -{ -SARRAY *sa; - - PROCNAME("sarrayCreate"); - - if (n <= 0 || n > MaxPtrArraySize) - n = InitialPtrArraySize; - - sa = (SARRAY *)LEPT_CALLOC(1, sizeof(SARRAY)); - if ((sa->array = (char **)LEPT_CALLOC(n, sizeof(char *))) == NULL) { - sarrayDestroy(&sa); - return (SARRAY *)ERROR_PTR("ptr array not made", procName, NULL); - } - - sa->nalloc = n; - sa->n = 0; - sa->refcount = 1; - return sa; -} - - -/*! - * \brief sarrayCreateInitialized() - * - * \param[in] n size of string ptr array to be alloc'd - * \param[in] initstr string to be initialized on the full array - * \return sarray, or NULL on error - */ -SARRAY * -sarrayCreateInitialized(l_int32 n, - const char *initstr) -{ -l_int32 i; -SARRAY *sa; - - PROCNAME("sarrayCreateInitialized"); - - if (n <= 0) - return (SARRAY *)ERROR_PTR("n must be > 0", procName, NULL); - if (!initstr) - return (SARRAY *)ERROR_PTR("initstr not defined", procName, NULL); - - sa = sarrayCreate(n); - for (i = 0; i < n; i++) - sarrayAddString(sa, initstr, L_COPY); - return sa; -} - - -/*! - * \brief sarrayCreateWordsFromString() - * - * \param[in] string - * \return sarray, or NULL on error - * - *
- * Notes:
- *      (1) This finds the number of word substrings, creates an sarray
- *          of this size, and puts copies of each substring into the sarray.
- * 
- */ -SARRAY * -sarrayCreateWordsFromString(const char *string) -{ -char separators[] = " \n\t"; -l_int32 i, nsub, size, inword; -SARRAY *sa; - - PROCNAME("sarrayCreateWordsFromString"); - - if (!string) - return (SARRAY *)ERROR_PTR("textstr not defined", procName, NULL); - - /* Find the number of words */ - size = strlen(string); - nsub = 0; - inword = FALSE; - for (i = 0; i < size; i++) { - if (inword == FALSE && - (string[i] != ' ' && string[i] != '\t' && string[i] != '\n')) { - inword = TRUE; - nsub++; - } else if (inword == TRUE && - (string[i] == ' ' || string[i] == '\t' || string[i] == '\n')) { - inword = FALSE; - } - } - - if ((sa = sarrayCreate(nsub)) == NULL) - return (SARRAY *)ERROR_PTR("sa not made", procName, NULL); - sarraySplitString(sa, string, separators); - - return sa; -} - - -/*! - * \brief sarrayCreateLinesFromString() - * - * \param[in] string - * \param[in] blankflag 0 to exclude blank lines; 1 to include - * \return sarray, or NULL on error - * - *
- * Notes:
- *      (1) This finds the number of line substrings, each of which
- *          ends with a newline, and puts a copy of each substring
- *          in a new sarray.
- *      (2) The newline characters are removed from each substring.
- * 
- */ -SARRAY * -sarrayCreateLinesFromString(const char *string, - l_int32 blankflag) -{ -l_int32 i, nsub, size, startptr; -char *cstring, *substring; -SARRAY *sa; - - PROCNAME("sarrayCreateLinesFromString"); - - if (!string) - return (SARRAY *)ERROR_PTR("textstr not defined", procName, NULL); - - /* Find the number of lines */ - size = strlen(string); - nsub = 0; - for (i = 0; i < size; i++) { - if (string[i] == '\n') - nsub++; - } - - if ((sa = sarrayCreate(nsub)) == NULL) - return (SARRAY *)ERROR_PTR("sa not made", procName, NULL); - - if (blankflag) { /* keep blank lines as null strings */ - /* Make a copy for munging */ - if ((cstring = stringNew(string)) == NULL) { - sarrayDestroy(&sa); - return (SARRAY *)ERROR_PTR("cstring not made", procName, NULL); - } - /* We'll insert nulls like strtok */ - startptr = 0; - for (i = 0; i < size; i++) { - if (cstring[i] == '\n') { - cstring[i] = '\0'; - if (i > 0 && cstring[i - 1] == '\r') - cstring[i - 1] = '\0'; /* also remove Windows CR */ - if ((substring = stringNew(cstring + startptr)) == NULL) { - sarrayDestroy(&sa); - LEPT_FREE(cstring); - return (SARRAY *)ERROR_PTR("substring not made", - procName, NULL); - } - sarrayAddString(sa, substring, L_INSERT); -/* lept_stderr("substring = %s\n", substring); */ - startptr = i + 1; - } - } - if (startptr < size) { /* no newline at end of last line */ - if ((substring = stringNew(cstring + startptr)) == NULL) { - sarrayDestroy(&sa); - LEPT_FREE(cstring); - return (SARRAY *)ERROR_PTR("substring not made", - procName, NULL); - } - sarrayAddString(sa, substring, L_INSERT); -/* lept_stderr("substring = %s\n", substring); */ - } - LEPT_FREE(cstring); - } else { /* remove blank lines; use strtok */ - sarraySplitString(sa, string, "\r\n"); - } - - return sa; -} - - -/*! - * \brief sarrayDestroy() - * - * \param[in,out] psa will be set to null before returning - * \return void - * - *
- * Notes:
- *      (1) Decrements the ref count and, if 0, destroys the sarray.
- *      (2) Always nulls the input ptr.
- * 
- */ -void -sarrayDestroy(SARRAY **psa) -{ -l_int32 i; -SARRAY *sa; - - PROCNAME("sarrayDestroy"); - - if (psa == NULL) { - L_WARNING("ptr address is NULL!\n", procName); - return; - } - if ((sa = *psa) == NULL) - return; - - sarrayChangeRefcount(sa, -1); - if (sarrayGetRefcount(sa) <= 0) { - if (sa->array) { - for (i = 0; i < sa->n; i++) { - if (sa->array[i]) - LEPT_FREE(sa->array[i]); - } - LEPT_FREE(sa->array); - } - LEPT_FREE(sa); - } - - *psa = NULL; - return; -} - - -/*! - * \brief sarrayCopy() - * - * \param[in] sa string array - * \return copy of sarray, or NULL on error - */ -SARRAY * -sarrayCopy(SARRAY *sa) -{ -l_int32 i; -SARRAY *csa; - - PROCNAME("sarrayCopy"); - - if (!sa) - return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL); - - if ((csa = sarrayCreate(sa->nalloc)) == NULL) - return (SARRAY *)ERROR_PTR("csa not made", procName, NULL); - - for (i = 0; i < sa->n; i++) - sarrayAddString(csa, sa->array[i], L_COPY); - - return csa; -} - - -/*! - * \brief sarrayClone() - * - * \param[in] sa string array - * \return ptr to same sarray, or NULL on error - */ -SARRAY * -sarrayClone(SARRAY *sa) -{ - PROCNAME("sarrayClone"); - - if (!sa) - return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL); - sarrayChangeRefcount(sa, 1); - return sa; -} - - -/*! - * \brief sarrayAddString() - * - * \param[in] sa string array - * \param[in] string string to be added - * \param[in] copyflag L_INSERT, L_NOCOPY or L_COPY - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See usage comments at the top of this file.  L_INSERT is
- *          equivalent to L_NOCOPY.
- * 
- */ -l_ok -sarrayAddString(SARRAY *sa, - const char *string, - l_int32 copyflag) -{ -l_int32 n; - - PROCNAME("sarrayAddString"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!string) - return ERROR_INT("string not defined", procName, 1); - if (copyflag != L_INSERT && copyflag != L_NOCOPY && copyflag != L_COPY) - return ERROR_INT("invalid copyflag", procName, 1); - - n = sarrayGetCount(sa); - if (n >= sa->nalloc) - sarrayExtendArray(sa); - - if (copyflag == L_COPY) - sa->array[n] = stringNew(string); - else /* L_INSERT or L_NOCOPY */ - sa->array[n] = (char *)string; - sa->n++; - return 0; -} - - -/*! - * \brief sarrayExtendArray() - * - * \param[in] sa string array - * \return 0 if OK, 1 on error - */ -static l_int32 -sarrayExtendArray(SARRAY *sa) -{ - PROCNAME("sarrayExtendArray"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - - if ((sa->array = (char **)reallocNew((void **)&sa->array, - sizeof(char *) * sa->nalloc, - 2 * sizeof(char *) * sa->nalloc)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - - sa->nalloc *= 2; - return 0; -} - - -/*! - * \brief sarrayRemoveString() - * - * \param[in] sa string array - * \param[in] index of string within sarray - * \return removed string, or NULL on error - */ -char * -sarrayRemoveString(SARRAY *sa, - l_int32 index) -{ -char *string; -char **array; -l_int32 i, n, nalloc; - - PROCNAME("sarrayRemoveString"); - - if (!sa) - return (char *)ERROR_PTR("sa not defined", procName, NULL); - - if ((array = sarrayGetArray(sa, &nalloc, &n)) == NULL) - return (char *)ERROR_PTR("array not returned", procName, NULL); - - if (index < 0 || index >= n) - return (char *)ERROR_PTR("array index out of bounds", procName, NULL); - - string = array[index]; - - /* If removed string is not at end of array, shift - * to fill in, maintaining original ordering. - * Note: if we didn't care about the order, we could - * put the last string array[n - 1] directly into the hole. */ - for (i = index; i < n - 1; i++) - array[i] = array[i + 1]; - - sa->n--; - return string; -} - - -/*! - * \brief sarrayReplaceString() - * - * \param[in] sa string array - * \param[in] index of string within sarray to be replaced - * \param[in] newstr string to replace existing one - * \param[in] copyflag L_INSERT, L_COPY - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This destroys an existing string and replaces it with
- *          the new string or a copy of it.
- *      (2) By design, an sarray is always compacted, so there are
- *          never any holes (null ptrs) in the ptr array up to the
- *          current count.
- * 
- */ -l_ok -sarrayReplaceString(SARRAY *sa, - l_int32 index, - char *newstr, - l_int32 copyflag) -{ -char *str; -l_int32 n; - - PROCNAME("sarrayReplaceString"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - n = sarrayGetCount(sa); - if (index < 0 || index >= n) - return ERROR_INT("array index out of bounds", procName, 1); - if (!newstr) - return ERROR_INT("newstr not defined", procName, 1); - if (copyflag != L_INSERT && copyflag != L_COPY) - return ERROR_INT("invalid copyflag", procName, 1); - - LEPT_FREE(sa->array[index]); - if (copyflag == L_INSERT) - str = newstr; - else /* L_COPY */ - str = stringNew(newstr); - sa->array[index] = str; - return 0; -} - - -/*! - * \brief sarrayClear() - * - * \param[in] sa string array - * \return 0 if OK; 1 on error - */ -l_ok -sarrayClear(SARRAY *sa) -{ -l_int32 i; - - PROCNAME("sarrayClear"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - for (i = 0; i < sa->n; i++) { /* free strings and null ptrs */ - LEPT_FREE(sa->array[i]); - sa->array[i] = NULL; - } - sa->n = 0; - return 0; -} - - -/*----------------------------------------------------------------------* - * Accessors * - *----------------------------------------------------------------------*/ -/*! - * \brief sarrayGetCount() - * - * \param[in] sa string array - * \return count, or 0 if no strings or on error - */ -l_int32 -sarrayGetCount(SARRAY *sa) -{ - PROCNAME("sarrayGetCount"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 0); - return sa->n; -} - - -/*! - * \brief sarrayGetArray() - * - * \param[in] sa string array - * \param[out] pnalloc [optional] number allocated string ptrs - * \param[out] pn [optional] number allocated strings - * \return ptr to string array, or NULL on error - * - *
- * Notes:
- *      (1) Caution: the returned array is not a copy, so caller
- *          must not destroy it!
- * 
- */ -char ** -sarrayGetArray(SARRAY *sa, - l_int32 *pnalloc, - l_int32 *pn) -{ -char **array; - - PROCNAME("sarrayGetArray"); - - if (!sa) - return (char **)ERROR_PTR("sa not defined", procName, NULL); - - array = sa->array; - if (pnalloc) *pnalloc = sa->nalloc; - if (pn) *pn = sa->n; - - return array; -} - - -/*! - * \brief sarrayGetString() - * - * \param[in] sa string array - * \param[in] index to the index-th string - * \param[in] copyflag L_NOCOPY or L_COPY - * \return string, or NULL on error - * - *
- * Notes:
- *      (1) See usage comments at the top of this file.
- *      (2) To get a pointer to the string itself, use L_NOCOPY.
- *          To get a copy of the string, use L_COPY.
- * 
- */ -char * -sarrayGetString(SARRAY *sa, - l_int32 index, - l_int32 copyflag) -{ - PROCNAME("sarrayGetString"); - - if (!sa) - return (char *)ERROR_PTR("sa not defined", procName, NULL); - if (index < 0 || index >= sa->n) - return (char *)ERROR_PTR("index not valid", procName, NULL); - if (copyflag != L_NOCOPY && copyflag != L_COPY) - return (char *)ERROR_PTR("invalid copyflag", procName, NULL); - - if (copyflag == L_NOCOPY) - return sa->array[index]; - else /* L_COPY */ - return stringNew(sa->array[index]); -} - - -/*! - * \brief sarrayGetRefCount() - * - * \param[in] sa string array - * \return refcount, or UNDEF on error - */ -l_int32 -sarrayGetRefcount(SARRAY *sa) -{ - PROCNAME("sarrayGetRefcount"); - - if (!sa) - return ERROR_INT("sa not defined", procName, UNDEF); - return sa->refcount; -} - - -/*! - * \brief sarrayChangeRefCount() - * - * \param[in] sa string array - * \param[in] delta change to be applied - * \return 0 if OK, 1 on error - */ -l_ok -sarrayChangeRefcount(SARRAY *sa, - l_int32 delta) -{ - PROCNAME("sarrayChangeRefcount"); - - if (!sa) - return ERROR_INT("sa not defined", procName, UNDEF); - sa->refcount += delta; - return 0; -} - - -/*----------------------------------------------------------------------* - * Conversion to string * - *----------------------------------------------------------------------*/ -/*! - * \brief sarrayToString() - * - * \param[in] sa string array - * \param[in] addnlflag flag: 0 adds nothing to each substring - * 1 adds '\n' to each substring - * 2 adds ' ' to each substring - * \return dest string, or NULL on error - * - *
- * Notes:
- *      (1) Concatenates all the strings in the sarray, preserving
- *          all white space.
- *      (2) If addnlflag != 0, adds either a '\n' or a ' ' after
- *          each substring.
- *      (3) This function was NOT implemented as:
- *            for (i = 0; i < n; i++)
- *                     strcat(dest, sarrayGetString(sa, i, L_NOCOPY));
- *          Do you see why?
- * 
- */ -char * -sarrayToString(SARRAY *sa, - l_int32 addnlflag) -{ - PROCNAME("sarrayToString"); - - if (!sa) - return (char *)ERROR_PTR("sa not defined", procName, NULL); - - return sarrayToStringRange(sa, 0, 0, addnlflag); -} - - -/*! - * \brief sarrayToStringRange() - * - * \param[in] sa string array - * \param[in] first index of first string to use; starts with 0 - * \param[in] nstrings number of strings to append into the result; use - * 0 to append to the end of the sarray - * \param[in] addnlflag flag: 0 adds nothing to each substring - * 1 adds '\n' to each substring - * 2 adds ' ' to each substring - * \return dest string, or NULL on error - * - *
- * Notes:
- *      (1) Concatenates the specified strings inthe sarray, preserving
- *          all white space.
- *      (2) If addnlflag != 0, adds either a '\n' or a ' ' after
- *          each substring.
- *      (3) If the sarray is empty, this returns a string with just
- *          the character corresponding to %addnlflag.
- * 
- */ -char * -sarrayToStringRange(SARRAY *sa, - l_int32 first, - l_int32 nstrings, - l_int32 addnlflag) -{ -char *dest, *src, *str; -l_int32 n, i, last, size, index, len; - - PROCNAME("sarrayToStringRange"); - - if (!sa) - return (char *)ERROR_PTR("sa not defined", procName, NULL); - if (addnlflag != 0 && addnlflag != 1 && addnlflag != 2) - return (char *)ERROR_PTR("invalid addnlflag", procName, NULL); - - n = sarrayGetCount(sa); - - /* Empty sa; return char corresponding to addnlflag only */ - if (n == 0) { - if (first == 0) { - if (addnlflag == 0) - return stringNew(""); - if (addnlflag == 1) - return stringNew("\n"); - else /* addnlflag == 2) */ - return stringNew(" "); - } else { - return (char *)ERROR_PTR("first not valid", procName, NULL); - } - } - - if (first < 0 || first >= n) - return (char *)ERROR_PTR("first not valid", procName, NULL); - if (nstrings == 0 || (nstrings > n - first)) - nstrings = n - first; /* no overflow */ - last = first + nstrings - 1; - - size = 0; - for (i = first; i <= last; i++) { - if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL) - return (char *)ERROR_PTR("str not found", procName, NULL); - size += strlen(str) + 2; - } - - if ((dest = (char *)LEPT_CALLOC(size + 1, sizeof(char))) == NULL) - return (char *)ERROR_PTR("dest not made", procName, NULL); - - index = 0; - for (i = first; i <= last; i++) { - src = sarrayGetString(sa, i, L_NOCOPY); - len = strlen(src); - memcpy(dest + index, src, len); - index += len; - if (addnlflag == 1) { - dest[index] = '\n'; - index++; - } else if (addnlflag == 2) { - dest[index] = ' '; - index++; - } - } - - return dest; -} - - -/*----------------------------------------------------------------------* - * Join 2 sarrays * - *----------------------------------------------------------------------*/ -/*! - * \brief sarrayJoin() - * - * \param[in] sa1 to be added to - * \param[in] sa2 append to sa1 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Copies of the strings in sarray2 are added to sarray1.
- * 
- */ -l_ok -sarrayJoin(SARRAY *sa1, - SARRAY *sa2) -{ -char *str; -l_int32 n, i; - - PROCNAME("sarrayJoin"); - - if (!sa1) - return ERROR_INT("sa1 not defined", procName, 1); - if (!sa2) - return ERROR_INT("sa2 not defined", procName, 1); - - n = sarrayGetCount(sa2); - for (i = 0; i < n; i++) { - str = sarrayGetString(sa2, i, L_NOCOPY); - sarrayAddString(sa1, str, L_COPY); - } - - return 0; -} - - -/*! - * \brief sarrayAppendRange() - * - * \param[in] sa1 to be added to - * \param[in] sa2 append specified range of strings in sa2 to sa1 - * \param[in] start index of first string of sa2 to append - * \param[in] end index of last string of sa2 to append; - * -1 to append to end of array - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Copies of the strings in sarray2 are added to sarray1.
- *      (2) The [start ... end] range is truncated if necessary.
- *      (3) Use end == -1 to append to the end of sa2.
- * 
- */ -l_ok -sarrayAppendRange(SARRAY *sa1, - SARRAY *sa2, - l_int32 start, - l_int32 end) -{ -char *str; -l_int32 n, i; - - PROCNAME("sarrayAppendRange"); - - if (!sa1) - return ERROR_INT("sa1 not defined", procName, 1); - if (!sa2) - return ERROR_INT("sa2 not defined", procName, 1); - - if (start < 0) - start = 0; - n = sarrayGetCount(sa2); - if (end < 0 || end >= n) - end = n - 1; - if (start > end) - return ERROR_INT("start > end", procName, 1); - - for (i = start; i <= end; i++) { - str = sarrayGetString(sa2, i, L_NOCOPY); - sarrayAddString(sa1, str, L_COPY); - } - - return 0; -} - - -/*----------------------------------------------------------------------* - * Pad an sarray to be the same size as another sarray * - *----------------------------------------------------------------------*/ -/*! - * \brief sarrayPadToSameSize() - * - * \param[in] sa1, sa2 - * \param[in] padstring - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If two sarrays have different size, this adds enough
- *          instances of %padstring to the smaller so that they are
- *          the same size.  It is useful when two or more sarrays
- *          are being sequenced in parallel, and it is necessary to
- *          find a valid string at each index.
- * 
- */ -l_ok -sarrayPadToSameSize(SARRAY *sa1, - SARRAY *sa2, - const char *padstring) -{ -l_int32 i, n1, n2; - - PROCNAME("sarrayPadToSameSize"); - - if (!sa1 || !sa2) - return ERROR_INT("both sa1 and sa2 not defined", procName, 1); - - n1 = sarrayGetCount(sa1); - n2 = sarrayGetCount(sa2); - if (n1 < n2) { - for (i = n1; i < n2; i++) - sarrayAddString(sa1, padstring, L_COPY); - } else if (n1 > n2) { - for (i = n2; i < n1; i++) - sarrayAddString(sa2, padstring, L_COPY); - } - - return 0; -} - - -/*----------------------------------------------------------------------* - * Convert word sarray to line sarray * - *----------------------------------------------------------------------*/ -/*! - * \brief sarrayConvertWordsToLines() - * - * \param[in] sa sa of individual words - * \param[in] linesize max num of chars in each line - * \return saout sa of formatted lines, or NULL on error - * - *
- * Notes:
- *      (1) This is useful for re-typesetting text to a specific maximum
- *          line length.  The individual words in the input sarray
- *          are concatenated into textlines.  An input word string of zero
- *          length is taken to be a paragraph separator.  Each time
- *          such a string is found, the current line is ended and
- *          a new line is also produced that contains just the
- *          string of zero length "".  When the output sarray
- *          of lines is eventually converted to a string with newlines
- *          typically appended to each line string, the empty
- *          strings are just converted to newlines, producing the visible
- *          paragraph separation.
- *      (2) What happens when a word is larger than linesize?
- *          We write it out as a single line anyway!  Words preceding
- *          or following this long word are placed on lines preceding
- *          or following the line with the long word.  Why this choice?
- *          Long "words" found in text documents are typically URLs, and
- *          it's often desirable not to put newlines in the middle of a URL.
- *          The text display program e.g., text editor will typically
- *          wrap the long "word" to fit in the window.
- * 
- */ -SARRAY * -sarrayConvertWordsToLines(SARRAY *sa, - l_int32 linesize) -{ -char *wd, *strl; -char emptystring[] = ""; -l_int32 n, i, len, totlen; -SARRAY *sal, *saout; - - PROCNAME("sarrayConvertWordsToLines"); - - if (!sa) - return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL); - - saout = sarrayCreate(0); - n = sarrayGetCount(sa); - totlen = 0; - sal = NULL; - for (i = 0; i < n; i++) { - if (!sal) - sal = sarrayCreate(0); - wd = sarrayGetString(sa, i, L_NOCOPY); - len = strlen(wd); - if (len == 0) { /* end of paragraph: end line & insert blank line */ - if (totlen > 0) { - strl = sarrayToString(sal, 2); - sarrayAddString(saout, strl, L_INSERT); - } - sarrayAddString(saout, emptystring, L_COPY); - sarrayDestroy(&sal); - totlen = 0; - } else if (totlen == 0 && len + 1 > linesize) { /* long word! */ - sarrayAddString(saout, wd, L_COPY); /* copy to one line */ - } else if (totlen + len + 1 > linesize) { /* end line & start new */ - strl = sarrayToString(sal, 2); - sarrayAddString(saout, strl, L_INSERT); - sarrayDestroy(&sal); - sal = sarrayCreate(0); - sarrayAddString(sal, wd, L_COPY); - totlen = len + 1; - } else { /* add to current line */ - sarrayAddString(sal, wd, L_COPY); - totlen += len + 1; - } - } - if (totlen > 0) { /* didn't end with blank line; output last line */ - strl = sarrayToString(sal, 2); - sarrayAddString(saout, strl, L_INSERT); - sarrayDestroy(&sal); - } - - return saout; -} - - -/*----------------------------------------------------------------------* - * Split string on separator list * - *----------------------------------------------------------------------*/ -/* - * \brief sarraySplitString() - * - * \param[in] sa to append to; typically empty initially - * \param[in] str string to split; not changed - * \param[in] separators characters that split input string - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) This uses strtokSafe().  See the notes there in utils.c.
- * 
- */ -l_int32 -sarraySplitString(SARRAY *sa, - const char *str, - const char *separators) -{ -char *cstr, *substr, *saveptr; - - PROCNAME("sarraySplitString"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!str) - return ERROR_INT("str not defined", procName, 1); - if (!separators) - return ERROR_INT("separators not defined", procName, 1); - - cstr = stringNew(str); /* preserves const-ness of input str */ - saveptr = NULL; - substr = strtokSafe(cstr, separators, &saveptr); - if (substr) - sarrayAddString(sa, substr, L_INSERT); - while ((substr = strtokSafe(NULL, separators, &saveptr))) - sarrayAddString(sa, substr, L_INSERT); - LEPT_FREE(cstr); - - return 0; -} - - -/*----------------------------------------------------------------------* - * Filter sarray * - *----------------------------------------------------------------------*/ -/*! - * \brief sarraySelectBySubstring() - * - * \param[in] sain input sarray - * \param[in] substr [optional] substring for matching; can be NULL - * \return saout output sarray, filtered with substring or NULL on error - * - *
- * Notes:
- *      (1) This selects all strings in sain that have substr as a substring.
- *          Note that we can't use strncmp() because we're looking for
- *          a match to the substring anywhere within each filename.
- *      (2) If substr == NULL, returns a copy of the sarray.
- * 
- */ -SARRAY * -sarraySelectBySubstring(SARRAY *sain, - const char *substr) -{ -char *str; -l_int32 n, i, offset, found; -SARRAY *saout; - - PROCNAME("sarraySelectBySubstring"); - - if (!sain) - return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL); - - n = sarrayGetCount(sain); - if (!substr || n == 0) - return sarrayCopy(sain); - - saout = sarrayCreate(n); - for (i = 0; i < n; i++) { - str = sarrayGetString(sain, i, L_NOCOPY); - arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr, - strlen(substr), &offset, &found); - if (found) - sarrayAddString(saout, str, L_COPY); - } - - return saout; -} - - -/*! - * \brief sarraySelectByRange() - * - * \param[in] sain input sarray - * \param[in] first index of first string to be selected - * \param[in] last index of last string to be selected; - * use 0 to go to the end of the sarray - * \return saout output sarray, or NULL on error - * - *
- * Notes:
- *      (1) This makes %saout consisting of copies of all strings in %sain
- *          in the index set [first ... last].  Use %last == 0 to get all
- *          strings from %first to the last string in the sarray.
- * 
- */ -SARRAY * -sarraySelectByRange(SARRAY *sain, - l_int32 first, - l_int32 last) -{ -char *str; -l_int32 n, i; -SARRAY *saout; - - PROCNAME("sarraySelectByRange"); - - if (!sain) - return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL); - if (first < 0) first = 0; - n = sarrayGetCount(sain); - if (last <= 0) last = n - 1; - if (last >= n) { - L_WARNING("last > n - 1; setting to n - 1\n", procName); - last = n - 1; - } - if (first > last) - return (SARRAY *)ERROR_PTR("first must be >= last", procName, NULL); - - saout = sarrayCreate(0); - for (i = first; i <= last; i++) { - str = sarrayGetString(sain, i, L_COPY); - sarrayAddString(saout, str, L_INSERT); - } - - return saout; -} - - -/*! - * \brief sarrayParseRange() - * - * \param[in] sa input sarray - * \param[in] start index to start range search - * \param[out] pactualstart index of actual start; may be > 'start' - * \param[out] pend index of end - * \param[out] pnewstart index of start of next range - * \param[in] substr substring for matching at beginning of string - * \param[in] loc byte offset within the string for the pattern; - * use -1 if the location does not matter. - * \return 0 if valid range found; 1 otherwise - * - *
- * Notes:
- *      (1) This finds the range of the next set of strings in SA,
- *          beginning the search at 'start', that does NOT have
- *          the substring 'substr' either at the indicated location
- *          in the string or anywhere in the string.  The input
- *          variable 'loc' is the specified offset within the string;
- *          use -1 to indicate 'anywhere in the string'.
- *      (2) Always check the return value to verify that a valid range
- *          was found.
- *      (3) If a valid range is not found, the values of actstart,
- *          end and newstart are all set to the size of sa.
- *      (4) If this is the last valid range, newstart returns the value n.
- *          In use, this should be tested before calling the function.
- *      (5) Usage example.  To find all the valid ranges in a file
- *          where the invalid lines begin with two dashes, copy each
- *          line in the file to a string in an sarray, and do:
- *             start = 0;
- *             while (!sarrayParseRange(sa, start, &actstart, &end, &start,
- *                    "--", 0))
- *                 lept_stderr("start = %d, end = %d\n", actstart, end);
- * 
- */ -l_int32 -sarrayParseRange(SARRAY *sa, - l_int32 start, - l_int32 *pactualstart, - l_int32 *pend, - l_int32 *pnewstart, - const char *substr, - l_int32 loc) -{ -char *str; -l_int32 n, i, offset, found; - - PROCNAME("sarrayParseRange"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!pactualstart || !pend || !pnewstart) - return ERROR_INT("not all range addresses defined", procName, 1); - n = sarrayGetCount(sa); - *pactualstart = *pend = *pnewstart = n; - if (!substr) - return ERROR_INT("substr not defined", procName, 1); - - /* Look for the first string without the marker */ - if (start < 0 || start >= n) - return 1; - for (i = start; i < n; i++) { - str = sarrayGetString(sa, i, L_NOCOPY); - arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr, - strlen(substr), &offset, &found); - if (loc < 0) { - if (!found) break; - } else { - if (!found || offset != loc) break; - } - } - start = i; - if (i == n) /* couldn't get started */ - return 1; - - /* Look for the last string without the marker */ - *pactualstart = start; - for (i = start + 1; i < n; i++) { - str = sarrayGetString(sa, i, L_NOCOPY); - arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr, - strlen(substr), &offset, &found); - if (loc < 0) { - if (found) break; - } else { - if (found && offset == loc) break; - } - } - *pend = i - 1; - start = i; - if (i == n) /* no further range */ - return 0; - - /* Look for the first string after *pend without the marker. - * This will start the next run of strings, if it exists. */ - for (i = start; i < n; i++) { - str = sarrayGetString(sa, i, L_NOCOPY); - arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr, - strlen(substr), &offset, &found); - if (loc < 0) { - if (!found) break; - } else { - if (!found || offset != loc) break; - } - } - if (i < n) - *pnewstart = i; - - return 0; -} - - -/*----------------------------------------------------------------------* - * Serialize for I/O * - *----------------------------------------------------------------------*/ -/*! - * \brief sarrayRead() - * - * \param[in] filename - * \return sarray, or NULL on error - */ -SARRAY * -sarrayRead(const char *filename) -{ -FILE *fp; -SARRAY *sa; - - PROCNAME("sarrayRead"); - - if (!filename) - return (SARRAY *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (SARRAY *)ERROR_PTR("stream not opened", procName, NULL); - sa = sarrayReadStream(fp); - fclose(fp); - if (!sa) - return (SARRAY *)ERROR_PTR("sa not read", procName, NULL); - return sa; -} - - -/*! - * \brief sarrayReadStream() - * - * \param[in] fp file stream - * \return sarray, or NULL on error - * - *
- * Notes:
- *      (1) We store the size of each string along with the string.
- *          The limit on the number of strings is 2^24.
- *          The limit on the size of any string is 2^30 bytes.
- *      (2) This allows a string to have embedded newlines.  By reading
- *          the entire string, as determined by its size, we are
- *          not affected by any number of embedded newlines.
- * 
- */ -SARRAY * -sarrayReadStream(FILE *fp) -{ -char *stringbuf; -l_int32 i, n, size, index, bufsize, version, ignore, success; -SARRAY *sa; - - PROCNAME("sarrayReadStream"); - - if (!fp) - return (SARRAY *)ERROR_PTR("stream not defined", procName, NULL); - - if (fscanf(fp, "\nSarray Version %d\n", &version) != 1) - return (SARRAY *)ERROR_PTR("not an sarray file", procName, NULL); - if (version != SARRAY_VERSION_NUMBER) - return (SARRAY *)ERROR_PTR("invalid sarray version", procName, NULL); - if (fscanf(fp, "Number of strings = %d\n", &n) != 1) - return (SARRAY *)ERROR_PTR("error on # strings", procName, NULL); - if (n > (1 << 24)) - return (SARRAY *)ERROR_PTR("more than 2^24 strings!", procName, NULL); - - success = TRUE; - if ((sa = sarrayCreate(n)) == NULL) - return (SARRAY *)ERROR_PTR("sa not made", procName, NULL); - bufsize = 512 + 1; - stringbuf = (char *)LEPT_CALLOC(bufsize, sizeof(char)); - - for (i = 0; i < n; i++) { - /* Get the size of the stored string */ - if ((fscanf(fp, "%d[%d]:", &index, &size) != 2) || (size > (1 << 30))) { - success = FALSE; - L_ERROR("error on string size\n", procName); - goto cleanup; - } - /* Expand the string buffer if necessary */ - if (size > bufsize - 5) { - LEPT_FREE(stringbuf); - bufsize = (l_int32)(1.5 * size); - stringbuf = (char *)LEPT_CALLOC(bufsize, sizeof(char)); - } - /* Read the stored string, plus leading spaces and trailing \n */ - if (fread(stringbuf, 1, size + 3, fp) != size + 3) { - success = FALSE; - L_ERROR("error reading string\n", procName); - goto cleanup; - } - /* Remove the \n that was added by sarrayWriteStream() */ - stringbuf[size + 2] = '\0'; - /* Copy it in, skipping the 2 leading spaces */ - sarrayAddString(sa, stringbuf + 2, L_COPY); - } - ignore = fscanf(fp, "\n"); - -cleanup: - LEPT_FREE(stringbuf); - if (!success) sarrayDestroy(&sa); - return sa; -} - - -/*! - * \brief sarrayReadMem() - * - * \param[in] data serialization in ascii - * \param[in] size of data; can use strlen to get it - * \return sarray, or NULL on error - */ -SARRAY * -sarrayReadMem(const l_uint8 *data, - size_t size) -{ -FILE *fp; -SARRAY *sa; - - PROCNAME("sarrayReadMem"); - - if (!data) - return (SARRAY *)ERROR_PTR("data not defined", procName, NULL); - if ((fp = fopenReadFromMemory(data, size)) == NULL) - return (SARRAY *)ERROR_PTR("stream not opened", procName, NULL); - - sa = sarrayReadStream(fp); - fclose(fp); - if (!sa) L_ERROR("sarray not read\n", procName); - return sa; -} - - -/*! - * \brief sarrayWrite() - * - * \param[in] filename - * \param[in] sa string array - * \return 0 if OK; 1 on error - */ -l_ok -sarrayWrite(const char *filename, - SARRAY *sa) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("sarrayWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "w")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = sarrayWriteStream(fp, sa); - fclose(fp); - if (ret) - return ERROR_INT("sa not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief sarrayWriteStream() - * - * \param[in] fp file stream - * \param[in] sa string array - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This appends a '\n' to each string, which is stripped
- *          off by sarrayReadStream().
- * 
- */ -l_ok -sarrayWriteStream(FILE *fp, - SARRAY *sa) -{ -l_int32 i, n, len; - - PROCNAME("sarrayWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - - n = sarrayGetCount(sa); - fprintf(fp, "\nSarray Version %d\n", SARRAY_VERSION_NUMBER); - fprintf(fp, "Number of strings = %d\n", n); - for (i = 0; i < n; i++) { - len = strlen(sa->array[i]); - fprintf(fp, " %d[%d]: %s\n", i, len, sa->array[i]); - } - fprintf(fp, "\n"); - - return 0; -} - - -/*! - * \brief sarrayWriteMem() - * - * \param[out] pdata data of serialized sarray; ascii - * \param[out] psize size of returned data - * \param[in] sa - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Serializes a sarray in memory and puts the result in a buffer.
- * 
- */ -l_ok -sarrayWriteMem(l_uint8 **pdata, - size_t *psize, - SARRAY *sa) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("sarrayWriteMem"); - - if (pdata) *pdata = NULL; - if (psize) *psize = 0; - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - -#if HAVE_FMEMOPEN - if ((fp = open_memstream((char **)pdata, psize)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = sarrayWriteStream(fp, sa); -#else - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); - #endif /* _WIN32 */ - ret = sarrayWriteStream(fp, sa); - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); -#endif /* HAVE_FMEMOPEN */ - fclose(fp); - return ret; -} - - -/*! - * \brief sarrayAppend() - * - * \param[in] filename - * \param[in] sa - * \return 0 if OK; 1 on error - */ -l_ok -sarrayAppend(const char *filename, - SARRAY *sa) -{ -FILE *fp; - - PROCNAME("sarrayAppend"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "a")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - if (sarrayWriteStream(fp, sa)) { - fclose(fp); - return ERROR_INT("sa not appended to stream", procName, 1); - } - - fclose(fp); - return 0; -} - - -/*---------------------------------------------------------------------* - * Directory filenames * - *---------------------------------------------------------------------*/ -/*! - * \brief getNumberedPathnamesInDirectory() - * - * \param[in] dirname directory name - * \param[in] substr [optional] substring filter on filenames; can be NULL - * \param[in] numpre number of characters in name before number - * \param[in] numpost number of characters in name after the number, - * up to a dot before an extension - * \param[in] maxnum only consider page numbers up to this value - * \return sarray of numbered pathnames, or NULL on error - * - *
- * Notes:
- *      (1) Returns the full pathnames of the numbered filenames in
- *          the directory.  The number in the filename is the index
- *          into the sarray.  For indices for which there are no filenames,
- *          an empty string ("") is placed into the sarray.
- *          This makes reading numbered files very simple.  For example,
- *          the image whose filename includes number N can be retrieved using
- *               pixReadIndexed(sa, N);
- *      (2) If %substr is not NULL, only filenames that contain
- *          the substring can be included.  If %substr is NULL,
- *          all matching filenames are used.
- *      (3) If no numbered files are found, it returns an empty sarray,
- *          with no initialized strings.
- *      (4) It is assumed that the page number is contained within
- *          the basename (the filename without directory or extension).
- *          %numpre is the number of characters in the basename
- *          preceding the actual page number; %numpost is the number
- *          following the page number, up to either the end of the
- *          basename or a ".", whichever comes first.
- *      (5) This is useful when all filenames contain numbers that are
- *          not necessarily consecutive.  0-padding is not required.
- *      (6) To use a O(n) matching algorithm, the largest page number
- *          is found and two internal arrays of this size are created.
- *          This maximum is constrained not to exceed %maxsum,
- *          to make sure that an unrealistically large number is not
- *          accidentally used to determine the array sizes.
- * 
- */ -SARRAY * -getNumberedPathnamesInDirectory(const char *dirname, - const char *substr, - l_int32 numpre, - l_int32 numpost, - l_int32 maxnum) -{ -l_int32 nfiles; -SARRAY *sa, *saout; - - PROCNAME("getNumberedPathnamesInDirectory"); - - if (!dirname) - return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL); - - if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) - return (SARRAY *)ERROR_PTR("sa not made", procName, NULL); - if ((nfiles = sarrayGetCount(sa)) == 0) { - sarrayDestroy(&sa); - return sarrayCreate(1); - } - - saout = convertSortedToNumberedPathnames(sa, numpre, numpost, maxnum); - sarrayDestroy(&sa); - return saout; -} - - -/*! - * \brief getSortedPathnamesInDirectory() - * - * \param[in] dirname directory name - * \param[in] substr [optional] substring filter on filenames; can be NULL - * \param[in] first 0-based - * \param[in] nfiles use 0 for all to the end - * \return sarray of sorted pathnames, or NULL on error - * - *
- * Notes:
- *      (1) Use %substr to filter filenames in the directory.  If
- *          %substr == NULL, this takes all files.
- *      (2) The files in the directory, after optional filtering by
- *          the substring, are lexically sorted in increasing order.
- *          Use %first and %nfiles to select a contiguous set of files.
- *      (3) The full pathnames are returned for the requested sequence.
- *          If no files are found after filtering, returns an empty sarray.
- * 
- */ -SARRAY * -getSortedPathnamesInDirectory(const char *dirname, - const char *substr, - l_int32 first, - l_int32 nfiles) -{ -char *fname, *fullname; -l_int32 i, n, last; -SARRAY *sa, *safiles, *saout; - - PROCNAME("getSortedPathnamesInDirectory"); - - if (!dirname) - return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL); - - if ((sa = getFilenamesInDirectory(dirname)) == NULL) - return (SARRAY *)ERROR_PTR("sa not made", procName, NULL); - safiles = sarraySelectBySubstring(sa, substr); - sarrayDestroy(&sa); - n = sarrayGetCount(safiles); - if (n == 0) { - L_WARNING("no files found\n", procName); - return safiles; - } - - sarraySort(safiles, safiles, L_SORT_INCREASING); - - first = L_MIN(L_MAX(first, 0), n - 1); - if (nfiles == 0) - nfiles = n - first; - last = L_MIN(first + nfiles - 1, n - 1); - - saout = sarrayCreate(last - first + 1); - for (i = first; i <= last; i++) { - fname = sarrayGetString(safiles, i, L_NOCOPY); - fullname = pathJoin(dirname, fname); - sarrayAddString(saout, fullname, L_INSERT); - } - - sarrayDestroy(&safiles); - return saout; -} - - -/*! - * \brief convertSortedToNumberedPathnames() - * - * \param[in] sa sorted pathnames including zero-padded integers - * \param[in] numpre number of characters in name before number - * \param[in] numpost number of characters in name after the number, - * up to a dot before an extension - * \param[in] maxnum only consider page numbers up to this value - * \return sarray of numbered pathnames, or NULL on error - * - *
- * Notes:
- *      (1) Typically, numpre = numpost = 0; e.g., when the filename
- *          just has a number followed by an optional extension.
- * 
- */ -SARRAY * -convertSortedToNumberedPathnames(SARRAY *sa, - l_int32 numpre, - l_int32 numpost, - l_int32 maxnum) -{ -char *fname, *str; -l_int32 i, nfiles, num, index; -SARRAY *saout; - - PROCNAME("convertSortedToNumberedPathnames"); - - if (!sa) - return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL); - if ((nfiles = sarrayGetCount(sa)) == 0) - return sarrayCreate(1); - - /* Find the last file in the sorted array that has a number - * that (a) matches the count pattern and (b) does not - * exceed %maxnum. %maxnum sets an upper limit on the size - * of the sarray. */ - num = 0; - for (i = nfiles - 1; i >= 0; i--) { - fname = sarrayGetString(sa, i, L_NOCOPY); - num = extractNumberFromFilename(fname, numpre, numpost); - if (num < 0) continue; - num = L_MIN(num + 1, maxnum); - break; - } - - if (num <= 0) /* none found */ - return sarrayCreate(1); - - /* Insert pathnames into the output sarray. - * Ignore numbers that are out of the range of sarray. */ - saout = sarrayCreateInitialized(num, ""); - for (i = 0; i < nfiles; i++) { - fname = sarrayGetString(sa, i, L_NOCOPY); - index = extractNumberFromFilename(fname, numpre, numpost); - if (index < 0 || index >= num) continue; - str = sarrayGetString(saout, index, L_NOCOPY); - if (str[0] != '\0') { - L_WARNING("\n Multiple files with same number: %d\n", - procName, index); - } - sarrayReplaceString(saout, index, fname, L_COPY); - } - - return saout; -} - - -/*! - * \brief getFilenamesInDirectory() - * - * \param[in] dirname directory name - * \return sarray of file names, or NULL on error - * - *
- * Notes:
- *      (1) The versions compiled under unix and cygwin use the POSIX C
- *          library commands for handling directories.  For windows,
- *          there is a separate implementation.
- *      (2) It returns an array of filename tails; i.e., only the part of
- *          the path after the last slash.
- *      (3) Use of the d_type field of dirent is not portable:
- *          "According to POSIX, the dirent structure contains a field
- *          char d_name[] of unspecified size, with at most NAME_MAX
- *          characters preceding the terminating null character.  Use
- *          of other fields will harm the portability of your programs."
- *      (4) As a consequence of (3), we note several things:
- *           ~ MINGW doesn't have a d_type member.
- *           ~ Older versions of gcc (e.g., 2.95.3) return DT_UNKNOWN
- *             for d_type from all files.
- *          On these systems, this function will return directories
- *          (except for '.' and '..', which are eliminated using
- *          the d_name field).
- * 
- */ - -#ifndef _WIN32 - -SARRAY * -getFilenamesInDirectory(const char *dirname) -{ -char dir[PATH_MAX + 1]; -char *realdir, *stat_path, *ignore; -size_t size; -SARRAY *safiles; -DIR *pdir; -struct dirent *pdirentry; -int dfd, stat_ret; -struct stat st; - - PROCNAME("getFilenamesInDirectory"); - - if (!dirname) - return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL); - - /* It's nice to ignore directories. fstatat() works with relative - directory paths, but stat() requires using the absolute path. - Also, do not pass NULL as the second parameter to realpath(); - use a buffer of sufficient size. */ - ignore = realpath(dirname, dir); /* see note above */ - realdir = genPathname(dir, NULL); - if ((pdir = opendir(realdir)) == NULL) { - LEPT_FREE(realdir); - return (SARRAY *)ERROR_PTR("pdir not opened", procName, NULL); - } - safiles = sarrayCreate(0); - dfd = dirfd(pdir); - while ((pdirentry = readdir(pdir))) { -#if HAVE_FSTATAT - stat_ret = fstatat(dfd, pdirentry->d_name, &st, 0); -#else - size = strlen(realdir) + strlen(pdirentry->d_name) + 2; - if (size > PATH_MAX) { - L_ERROR("size = %zu too large; skipping\n", procName, size); - continue; - } - stat_path = (char *)LEPT_CALLOC(size, 1); - snprintf(stat_path, size, "%s/%s", realdir, pdirentry->d_name); - stat_ret = stat(stat_path, &st); - LEPT_FREE(stat_path); -#endif - if (stat_ret == 0 && S_ISDIR(st.st_mode)) - continue; - sarrayAddString(safiles, pdirentry->d_name, L_COPY); - } - closedir(pdir); - LEPT_FREE(realdir); - return safiles; -} - -#else /* _WIN32 */ - - /* http://msdn2.microsoft.com/en-us/library/aa365200(VS.85).aspx */ -#include - -SARRAY * -getFilenamesInDirectory(const char *dirname) -{ -char *pszDir; -char *realdir; -HANDLE hFind = INVALID_HANDLE_VALUE; -SARRAY *safiles; -WIN32_FIND_DATAA ffd; - - PROCNAME("getFilenamesInDirectory"); - - if (!dirname) - return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL); - - realdir = genPathname(dirname, NULL); - pszDir = stringJoin(realdir, "\\*"); - LEPT_FREE(realdir); - - if (strlen(pszDir) + 1 > MAX_PATH) { - LEPT_FREE(pszDir); - return (SARRAY *)ERROR_PTR("dirname is too long", procName, NULL); - } - - if ((safiles = sarrayCreate(0)) == NULL) { - LEPT_FREE(pszDir); - return (SARRAY *)ERROR_PTR("safiles not made", procName, NULL); - } - - hFind = FindFirstFileA(pszDir, &ffd); - if (INVALID_HANDLE_VALUE == hFind) { - sarrayDestroy(&safiles); - LEPT_FREE(pszDir); - return (SARRAY *)ERROR_PTR("hFind not opened", procName, NULL); - } - - while (FindNextFileA(hFind, &ffd) != 0) { - if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) /* skip dirs */ - continue; - convertSepCharsInPath(ffd.cFileName, UNIX_PATH_SEPCHAR); - sarrayAddString(safiles, ffd.cFileName, L_COPY); - } - - FindClose(hFind); - LEPT_FREE(pszDir); - return safiles; -} -#endif /* _WIN32 */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sarray2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sarray2.c deleted file mode 100644 index ec8a683f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sarray2.c +++ /dev/null @@ -1,730 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file sarray2.c - *
- *
- *      Sort
- *          SARRAY     *sarraySort()
- *          SARRAY     *sarraySortByIndex()
- *          l_int32     stringCompareLexical()
- *
- *      Set operations using aset (rbtree)
- *          SARRAY     *sarrayUnionByAset()
- *          SARRAY     *sarrayRemoveDupsByAset()
- *          SARRAY     *sarrayIntersectionByAset()
- *          L_ASET     *l_asetCreateFromSarray()
- *
- *      Set operations using hashing (dnahash)
- *          l_int32     sarrayRemoveDupsByHash()
- *          SARRAY     *sarrayIntersectionByHash()
- *          l_int32     sarrayFindStringByHash()
- *          L_DNAHASH  *l_dnaHashCreateFromSarray()
- *
- *      Miscellaneous operations
- *          SARRAY     *sarrayGenerateIntegers()
- *          l_int32     sarrayLookupCSKV()
- *
- *
- * We have two implementations of set operations on an array of strings:
- *
- *   (1) Using an underlying tree (rbtree)
- *       This uses a good 64 bit hashing function for the key,
- *       that is not expected to have hash collisions (and we do
- *       not test for them).  The tree is built up of the hash
- *       values, and if the hash is found in the tree, it is
- *       assumed that the string has already been found.
- *
- *   (2) Using an underlying hashing of the keys (dnahash)
- *       This uses a fast 64 bit hashing function for the key,
- *       which is then hashed into a bucket (a dna in a dnaHash).
- *       Because hash collisions can occur, the index into the
- *       sarray for the string that gave rise to that key is stored,
- *       and the dna (bucket) is traversed, using the stored indices
- *       to determine if that string had already been seen.
- *
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -/*----------------------------------------------------------------------* - * Sort * - *----------------------------------------------------------------------*/ -/*! - * \brief sarraySort() - * - * \param[in] saout output sarray; can be NULL or equal to sain - * \param[in] sain input sarray - * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING - * \return saout output sarray, sorted by ascii value, or NULL on error - * - *
- * Notes:
- *      (1) Set saout = sain for in-place; otherwise, set naout = NULL.
- *      (2) Shell sort, modified from K&R, 2nd edition, p.62.
- *          Slow but simple O(n logn) sort.
- * 
- */ -SARRAY * -sarraySort(SARRAY *saout, - SARRAY *sain, - l_int32 sortorder) -{ -char **array; -char *tmp; -l_int32 n, i, j, gap; - - PROCNAME("sarraySort"); - - if (!sain) - return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL); - - /* Make saout if necessary; otherwise do in-place */ - if (!saout) - saout = sarrayCopy(sain); - else if (sain != saout) - return (SARRAY *)ERROR_PTR("invalid: not in-place", procName, NULL); - array = saout->array; /* operate directly on the array */ - n = sarrayGetCount(saout); - - /* Shell sort */ - for (gap = n/2; gap > 0; gap = gap / 2) { - for (i = gap; i < n; i++) { - for (j = i - gap; j >= 0; j -= gap) { - if ((sortorder == L_SORT_INCREASING && - stringCompareLexical(array[j], array[j + gap])) || - (sortorder == L_SORT_DECREASING && - stringCompareLexical(array[j + gap], array[j]))) - { - tmp = array[j]; - array[j] = array[j + gap]; - array[j + gap] = tmp; - } - } - } - } - - return saout; -} - - -/*! - * \brief sarraySortByIndex() - * - * \param[in] sain - * \param[in] naindex na that maps from the new sarray to the input sarray - * \return saout sorted, or NULL on error - */ -SARRAY * -sarraySortByIndex(SARRAY *sain, - NUMA *naindex) -{ -char *str; -l_int32 i, n, index; -SARRAY *saout; - - PROCNAME("sarraySortByIndex"); - - if (!sain) - return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL); - if (!naindex) - return (SARRAY *)ERROR_PTR("naindex not defined", procName, NULL); - - n = sarrayGetCount(sain); - saout = sarrayCreate(n); - for (i = 0; i < n; i++) { - numaGetIValue(naindex, i, &index); - str = sarrayGetString(sain, index, L_COPY); - sarrayAddString(saout, str, L_INSERT); - } - - return saout; -} - - -/*! - * \brief stringCompareLexical() - * - * \param[in] str1 - * \param[in] str2 - * \return 1 if str1 > str2 lexically; 0 otherwise - * - *
- * Notes:
- *      (1) If the lexical values are identical, return a 0, to
- *          indicate that no swapping is required to sort the strings.
- * 
- */ -l_int32 -stringCompareLexical(const char *str1, - const char *str2) -{ -l_int32 i, len1, len2, len; - - PROCNAME("sarrayCompareLexical"); - - if (!str1) - return ERROR_INT("str1 not defined", procName, 1); - if (!str2) - return ERROR_INT("str2 not defined", procName, 1); - - len1 = strlen(str1); - len2 = strlen(str2); - len = L_MIN(len1, len2); - - for (i = 0; i < len; i++) { - if (str1[i] == str2[i]) - continue; - if (str1[i] > str2[i]) - return 1; - else - return 0; - } - - if (len1 > len2) - return 1; - else - return 0; -} - - -/*----------------------------------------------------------------------* - * Set operations using aset (rbtree) * - *----------------------------------------------------------------------*/ -/*! - * \brief sarrayUnionByAset() - * - * \param[in] sa1, sa2 - * \return sad with the union of the string set, or NULL on error - * - *
- * Notes:
- *      (1) Duplicates are removed from the concatenation of the two arrays.
- *      (2) The key for each string is a 64-bit hash.
- *      (2) Algorithm: Concatenate the two sarrays.  Then build a set,
- *          using hashed strings as keys.  As the set is built, first do
- *          a find; if not found, add the key to the set and add the string
- *          to the output sarray.  This is O(nlogn).
- * 
- */ -SARRAY * -sarrayUnionByAset(SARRAY *sa1, - SARRAY *sa2) -{ -SARRAY *sa3, *sad; - - PROCNAME("sarrayUnionByAset"); - - if (!sa1) - return (SARRAY *)ERROR_PTR("sa1 not defined", procName, NULL); - if (!sa2) - return (SARRAY *)ERROR_PTR("sa2 not defined", procName, NULL); - - /* Join */ - sa3 = sarrayCopy(sa1); - sarrayJoin(sa3, sa2); - - /* Eliminate duplicates */ - sad = sarrayRemoveDupsByAset(sa3); - sarrayDestroy(&sa3); - return sad; -} - - -/*! - * \brief sarrayRemoveDupsByAset() - * - * \param[in] sas - * \return sad with duplicates removed, or NULL on error - * - *
- * Notes:
- *      (1) This is O(nlogn), considerably slower than
- *          sarrayRemoveDupsByHash() for large string arrays.
- *      (2) The key for each string is a 64-bit hash.
- *      (3) Build a set, using hashed strings as keys.  As the set is
- *          built, first do a find; if not found, add the key to the
- *          set and add the string to the output sarray.
- * 
- */ -SARRAY * -sarrayRemoveDupsByAset(SARRAY *sas) -{ -char *str; -l_int32 i, n; -l_uint64 hash; -L_ASET *set; -RB_TYPE key; -SARRAY *sad; - - PROCNAME("sarrayRemoveDupsByAset"); - - if (!sas) - return (SARRAY *)ERROR_PTR("sas not defined", procName, NULL); - - set = l_asetCreate(L_UINT_TYPE); - sad = sarrayCreate(0); - n = sarrayGetCount(sas); - for (i = 0; i < n; i++) { - str = sarrayGetString(sas, i, L_NOCOPY); - l_hashStringToUint64(str, &hash); - key.utype = hash; - if (!l_asetFind(set, key)) { - sarrayAddString(sad, str, L_COPY); - l_asetInsert(set, key); - } - } - - l_asetDestroy(&set); - return sad; -} - - -/*! - * \brief sarrayIntersectionByAset() - * - * \param[in] sa1, sa2 - * \return sad with the intersection of the string set, or NULL on error - * - *
- * Notes:
- *      (1) Algorithm: put the larger sarray into a set, using the string
- *          hashes as the key values.  Then run through the smaller sarray,
- *          building an output sarray and a second set from the strings
- *          in the larger array: if a string is in the first set but
- *          not in the second, add the string to the output sarray and hash
- *          it into the second set.  The second set is required to make
- *          sure only one instance of each string is put into the output sarray.
- *          This is O(mlogn), {m,n} = sizes of {smaller,larger} input arrays.
- * 
- */ -SARRAY * -sarrayIntersectionByAset(SARRAY *sa1, - SARRAY *sa2) -{ -char *str; -l_int32 n1, n2, i, n; -l_uint64 hash; -L_ASET *set1, *set2; -RB_TYPE key; -SARRAY *sa_small, *sa_big, *sad; - - PROCNAME("sarrayIntersectionByAset"); - - if (!sa1) - return (SARRAY *)ERROR_PTR("sa1 not defined", procName, NULL); - if (!sa2) - return (SARRAY *)ERROR_PTR("sa2 not defined", procName, NULL); - - /* Put the elements of the biggest array into a set */ - n1 = sarrayGetCount(sa1); - n2 = sarrayGetCount(sa2); - sa_small = (n1 < n2) ? sa1 : sa2; /* do not destroy sa_small */ - sa_big = (n1 < n2) ? sa2 : sa1; /* do not destroy sa_big */ - set1 = l_asetCreateFromSarray(sa_big); - - /* Build up the intersection of strings */ - sad = sarrayCreate(0); - n = sarrayGetCount(sa_small); - set2 = l_asetCreate(L_UINT_TYPE); - for (i = 0; i < n; i++) { - str = sarrayGetString(sa_small, i, L_NOCOPY); - l_hashStringToUint64(str, &hash); - key.utype = hash; - if (l_asetFind(set1, key) && !l_asetFind(set2, key)) { - sarrayAddString(sad, str, L_COPY); - l_asetInsert(set2, key); - } - } - - l_asetDestroy(&set1); - l_asetDestroy(&set2); - return sad; -} - - -/*! - * \brief l_asetCreateFromSarray() - * - * \param[in] sa - * \return set using a string hash into a uint64 as the key - */ -L_ASET * -l_asetCreateFromSarray(SARRAY *sa) -{ -char *str; -l_int32 i, n; -l_uint64 hash; -L_ASET *set; -RB_TYPE key; - - PROCNAME("l_asetCreateFromSarray"); - - if (!sa) - return (L_ASET *)ERROR_PTR("sa not defined", procName, NULL); - - set = l_asetCreate(L_UINT_TYPE); - n = sarrayGetCount(sa); - for (i = 0; i < n; i++) { - str = sarrayGetString(sa, i, L_NOCOPY); - l_hashStringToUint64(str, &hash); - key.utype = hash; - l_asetInsert(set, key); - } - - return set; -} - - -/*----------------------------------------------------------------------* - * Set operations using hashing (dnahash) * - *----------------------------------------------------------------------*/ -/*! - * \brief sarrayRemoveDupsByHash() - * - * \param[in] sas - * \param[out] psad unique set of strings; duplicates removed - * \param[out] pdahash [optional] dnahash used for lookup - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Generates a sarray with unique values.
- *      (2) The dnahash is built up with sad to assure uniqueness.
- *          It can be used to find if a string is in the set:
- *              sarrayFindValByHash(sad, dahash, str, &index)
- *      (3) The hash of the string location is simple and fast.  It scales
- *          up with the number of buckets to insure a fairly random
- *          bucket selection input strings.
- *      (4) This is faster than sarrayRemoveDupsByAset(), because the
- *          bucket lookup is O(n), although there is a double-loop
- *          lookup within the dna in each bucket.
- * 
- */ -l_ok -sarrayRemoveDupsByHash(SARRAY *sas, - SARRAY **psad, - L_DNAHASH **pdahash) -{ -char *str; -l_int32 i, n, index, items; -l_uint32 nsize; -l_uint64 key; -SARRAY *sad; -L_DNAHASH *dahash; - - PROCNAME("sarrayRemoveDupsByHash"); - - if (pdahash) *pdahash = NULL; - if (!psad) - return ERROR_INT("&sad not defined", procName, 1); - *psad = NULL; - if (!sas) - return ERROR_INT("sas not defined", procName, 1); - - n = sarrayGetCount(sas); - findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */ - dahash = l_dnaHashCreate(nsize, 8); - sad = sarrayCreate(n); - *psad = sad; - for (i = 0, items = 0; i < n; i++) { - str = sarrayGetString(sas, i, L_NOCOPY); - sarrayFindStringByHash(sad, dahash, str, &index); - if (index < 0) { /* not found */ - l_hashStringToUint64(str, &key); - l_dnaHashAdd(dahash, key, (l_float64)items); - sarrayAddString(sad, str, L_COPY); - items++; - } - } - - if (pdahash) - *pdahash = dahash; - else - l_dnaHashDestroy(&dahash); - return 0; -} - - -/*! - * \brief sarrayIntersectionByHash() - * - * \param[in] sa1, sa2 - * \return sad intersection of the strings, or NULL on error - * - *
- * Notes:
- *      (1) This is faster than sarrayIntersectionByAset(), because the
- *          bucket lookup is O(n).
- * 
- */ -SARRAY * -sarrayIntersectionByHash(SARRAY *sa1, - SARRAY *sa2) -{ -char *str; -l_int32 n1, n2, nsmall, i, index1, index2; -l_uint32 nsize2; -l_uint64 key; -L_DNAHASH *dahash1, *dahash2; -SARRAY *sa_small, *sa_big, *sad; - - PROCNAME("sarrayIntersectionByHash"); - - if (!sa1) - return (SARRAY *)ERROR_PTR("sa1 not defined", procName, NULL); - if (!sa2) - return (SARRAY *)ERROR_PTR("sa2 not defined", procName, NULL); - - /* Put the elements of the biggest sarray into a dnahash */ - n1 = sarrayGetCount(sa1); - n2 = sarrayGetCount(sa2); - sa_small = (n1 < n2) ? sa1 : sa2; /* do not destroy sa_small */ - sa_big = (n1 < n2) ? sa2 : sa1; /* do not destroy sa_big */ - dahash1 = l_dnaHashCreateFromSarray(sa_big); - - /* Build up the intersection of strings. Add to %sad - * if the string is in sa_big (using dahash1) but hasn't - * yet been seen in the traversal of sa_small (using dahash2). */ - sad = sarrayCreate(0); - nsmall = sarrayGetCount(sa_small); - findNextLargerPrime(nsmall / 20, &nsize2); /* buckets in hash table */ - dahash2 = l_dnaHashCreate(nsize2, 0); - for (i = 0; i < nsmall; i++) { - str = sarrayGetString(sa_small, i, L_NOCOPY); - sarrayFindStringByHash(sa_big, dahash1, str, &index1); - if (index1 >= 0) { - sarrayFindStringByHash(sa_small, dahash2, str, &index2); - if (index2 == -1) { - sarrayAddString(sad, str, L_COPY); - l_hashStringToUint64(str, &key); - l_dnaHashAdd(dahash2, key, (l_float64)i); - } - } - } - - l_dnaHashDestroy(&dahash1); - l_dnaHashDestroy(&dahash2); - return sad; -} - - -/*! - * \brief sarrayFindStringByHash() - * - * \param[in] sa - * \param[in] dahash built from sa - * \param[in] str arbitrary string - * \param[out] pindex index into %sa if %str is in %sa; -1 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Fast lookup in dnaHash associated with a sarray, to see if a
- *          random string %str is already stored in the hash table.
- *      (2) We use a strong hash function to minimize the chance that
- *          two different strings hash to the same key value.
- *      (3) We select the number of buckets to be about 5% of the size
- *          of the input sarray, so that when fully populated, each
- *          bucket (dna) will have about 20 entries, each being an index
- *          into sa.  In lookup, after hashing to the key, and then
- *          again to the bucket, we traverse the bucket (dna), using the
- *          index into sa to check if %str has been found before.
- * 
- */ -l_ok -sarrayFindStringByHash(SARRAY *sa, - L_DNAHASH *dahash, - const char *str, - l_int32 *pindex) -{ -char *stri; -l_int32 i, nvals, index; -l_uint64 key; -L_DNA *da; - - PROCNAME("sarrayFindStringByHash"); - - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - *pindex = -1; - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!dahash) - return ERROR_INT("dahash not defined", procName, 1); - - l_hashStringToUint64(str, &key); - da = l_dnaHashGetDna(dahash, key, L_NOCOPY); - if (!da) return 0; - - /* Run through the da, looking for this string */ - nvals = l_dnaGetCount(da); - for (i = 0; i < nvals; i++) { - l_dnaGetIValue(da, i, &index); - stri = sarrayGetString(sa, index, L_NOCOPY); - if (!strcmp(str, stri)) { /* duplicate */ - *pindex = index; - return 0; - } - } - - return 0; -} - - -/*! - * \brief l_dnaHashCreateFromSarray() - * - * \param[in] sa - * \return dahash, or NULL on error - */ -L_DNAHASH * -l_dnaHashCreateFromSarray(SARRAY *sa) -{ -char *str; -l_int32 i, n; -l_uint32 nsize; -l_uint64 key; -L_DNAHASH *dahash; - - /* Build up dnaHash of indices, hashed by a 64-bit key that - * should randomize the lower bits used in bucket selection. - * Having about 20 pts in each bucket is roughly optimal. */ - n = sarrayGetCount(sa); - findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */ -/* lept_stderr("Prime used: %d\n", nsize); */ - - /* Add each string, using the hash as key and the index into %sa - * as the value. Storing the index enables operations that check - * for duplicates. */ - dahash = l_dnaHashCreate(nsize, 8); - for (i = 0; i < n; i++) { - str = sarrayGetString(sa, i, L_NOCOPY); - l_hashStringToUint64(str, &key); - l_dnaHashAdd(dahash, key, (l_float64)i); - } - - return dahash; -} - - -/*----------------------------------------------------------------------* - * Miscellaneous operations * - *----------------------------------------------------------------------*/ -/*! - * \brief sarrayGenerateIntegers() - * - * \param[in] n - * \return sa of printed numbers, 1 - n, or NULL on error - */ -SARRAY * -sarrayGenerateIntegers(l_int32 n) -{ -char buf[32]; -l_int32 i; -SARRAY *sa; - - PROCNAME("sarrayGenerateIntegers"); - - if ((sa = sarrayCreate(n)) == NULL) - return (SARRAY *)ERROR_PTR("sa not made", procName, NULL); - for (i = 0; i < n; i++) { - snprintf(buf, sizeof(buf), "%d", i); - sarrayAddString(sa, buf, L_COPY); - } - return sa; -} - - -/*! - * \brief sarrayLookupCSKV() - * - * \param[in] sa of strings, each being a comma-separated pair - * of strings, the first being a key and the - * second a value - * \param[in] keystring an input string to match with each key in %sa - * \param[out] pvalstring the returned value string corresponding to the - * input key string, if found; otherwise NULL - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The input %sa can have other strings that are not in
- *          comma-separated key-value format.  These will be ignored.
- *      (2) This returns a copy of the first value string in %sa whose
- *          key string matches the input %keystring.
- *      (3) White space is not ignored; all white space before the ','
- *          is used for the keystring in matching.  This allows the
- *          key and val strings to have white space (e.g., multiple words).
- * 
- */ -l_ok -sarrayLookupCSKV(SARRAY *sa, - const char *keystring, - char **pvalstring) -{ -char *key, *val, *str; -l_int32 i, n; -SARRAY *sa1; - - PROCNAME("sarrayLookupCSKV"); - - if (!pvalstring) - return ERROR_INT("&valstring not defined", procName, 1); - *pvalstring = NULL; - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!keystring) - return ERROR_INT("keystring not defined", procName, 1); - - n = sarrayGetCount(sa); - for (i = 0; i < n; i++) { - str = sarrayGetString(sa, i, L_NOCOPY); - sa1 = sarrayCreate(2); - sarraySplitString(sa1, str, ","); - if (sarrayGetCount(sa1) != 2) { - sarrayDestroy(&sa1); - continue; - } - key = sarrayGetString(sa1, 0, L_NOCOPY); - val = sarrayGetString(sa1, 1, L_NOCOPY); - if (!strcmp(key, keystring)) { - *pvalstring = stringNew(val); - sarrayDestroy(&sa1); - return 0; - } - sarrayDestroy(&sa1); - } - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/scale1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/scale1.c deleted file mode 100644 index d4de265f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/scale1.c +++ /dev/null @@ -1,3755 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file scale1.c - *
- *         Top-level scaling
- *               PIX      *pixScale()
- *               PIX      *pixScaleToSizeRel()
- *               PIX      *pixScaleToSize()
- *               PIX      *pixScaleToResolution()
- *               PIX      *pixScaleGeneral()
- *
- *         Linearly interpreted (usually up-) scaling
- *               PIX      *pixScaleLI()
- *               PIX      *pixScaleColorLI()
- *               PIX      *pixScaleColor2xLI()
- *               PIX      *pixScaleColor4xLI()
- *               PIX      *pixScaleGrayLI()
- *               PIX      *pixScaleGray2xLI()
- *               PIX      *pixScaleGray4xLI()
- *
- *         Upscale 2x followed by binarization
- *               PIX      *pixScaleGray2xLIThresh()
- *               PIX      *pixScaleGray2xLIDither()
- *
- *         Upscale 4x followed by binarization
- *               PIX      *pixScaleGray4xLIThresh()
- *               PIX      *pixScaleGray4xLIDither()
- *
- *         Scaling by closest pixel sampling
- *               PIX      *pixScaleBySampling()
- *               PIX      *pixScaleBySamplingToSize()
- *               PIX      *pixScaleByIntSampling()
- *
- *         Fast integer factor subsampling RGB to gray and to binary
- *               PIX      *pixScaleRGBToGrayFast()
- *               PIX      *pixScaleRGBToBinaryFast()
- *               PIX      *pixScaleGrayToBinaryFast()
- *
- *         Downscaling with (antialias) smoothing
- *               PIX      *pixScaleSmooth()
- *               PIX      *pixScaleSmoothToSize()
- *               PIX      *pixScaleRGBToGray2()   [special 2x reduction to gray]
- *
- *         Downscaling with (antialias) area mapping
- *               PIX      *pixScaleAreaMap()
- *               PIX      *pixScaleAreaMap2()
- *               PIX      *pixScaleAreaMapToSize()
- *
- *         Binary scaling by closest pixel sampling
- *               PIX      *pixScaleBinary()
- *
- *     Low-level static functions:
- *
- *         Color (interpolated) scaling: general case
- *               static void       scaleColorLILow()
- *
- *         Grayscale (interpolated) scaling: general case
- *               static void       scaleGrayLILow()
- *
- *         Color (interpolated) scaling: 2x upscaling
- *               static void       scaleColor2xLILow()
- *               static void       scaleColor2xLILineLow()
- *
- *         Grayscale (interpolated) scaling: 2x upscaling
- *               static void       scaleGray2xLILow()
- *               static void       scaleGray2xLILineLow()
- *
- *         Grayscale (interpolated) scaling: 4x upscaling
- *               static void       scaleGray4xLILow()
- *               static void       scaleGray4xLILineLow()
- *
- *         Grayscale and color scaling by closest pixel sampling
- *               static l_int32    scaleBySamplingLow()
- *
- *         Color and grayscale downsampling with (antialias) lowpass filter
- *               static l_int32    scaleSmoothLow()
- *               static void       scaleRGBToGray2Low()
- *
- *         Color and grayscale downsampling with (antialias) area mapping
- *               static l_int32    scaleColorAreaMapLow()
- *               static l_int32    scaleGrayAreaMapLow()
- *               static l_int32    scaleAreaMapLow2()
- *
- *         Binary scaling by closest pixel sampling
- *               static l_int32    scaleBinaryLow()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static void scaleColorLILow(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas, l_int32 ws, - l_int32 hs, l_int32 wpls); -static void scaleGrayLILow(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas, l_int32 ws, - l_int32 hs, l_int32 wpls); -static void scaleColor2xLILow(l_uint32 *datad, l_int32 wpld, l_uint32 *datas, - l_int32 ws, l_int32 hs, l_int32 wpls); -static void scaleColor2xLILineLow(l_uint32 *lined, l_int32 wpld, - l_uint32 *lines, l_int32 ws, l_int32 wpls, - l_int32 lastlineflag); -static void scaleGray2xLILow(l_uint32 *datad, l_int32 wpld, l_uint32 *datas, - l_int32 ws, l_int32 hs, l_int32 wpls); -static void scaleGray2xLILineLow(l_uint32 *lined, l_int32 wpld, - l_uint32 *lines, l_int32 ws, l_int32 wpls, - l_int32 lastlineflag); -static void scaleGray4xLILow(l_uint32 *datad, l_int32 wpld, l_uint32 *datas, - l_int32 ws, l_int32 hs, l_int32 wpls); -static void scaleGray4xLILineLow(l_uint32 *lined, l_int32 wpld, - l_uint32 *lines, l_int32 ws, l_int32 wpls, - l_int32 lastlineflag); -static l_int32 scaleBySamplingLow(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas, l_int32 ws, - l_int32 hs, l_int32 d, l_int32 wpls); -static l_int32 scaleSmoothLow(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas, l_int32 ws, - l_int32 hs, l_int32 d, l_int32 wpls, - l_int32 size); -static void scaleRGBToGray2Low(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas, l_int32 wpls, - l_float32 rwt, l_float32 gwt, l_float32 bwt); -static void scaleColorAreaMapLow(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas, l_int32 ws, - l_int32 hs, l_int32 wpls); -static void scaleGrayAreaMapLow(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas, l_int32 ws, - l_int32 hs, l_int32 wpls); -static void scaleAreaMapLow2(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas, l_int32 d, - l_int32 wpls); -static l_int32 scaleBinaryLow(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas, l_int32 ws, - l_int32 hs, l_int32 wpls); - -#ifndef NO_CONSOLE_IO -#define DEBUG_OVERFLOW 0 -#define DEBUG_UNROLLING 0 -#endif /* ~NO_CONSOLE_IO */ - - -/*------------------------------------------------------------------* - * Top level scaling dispatcher * - *------------------------------------------------------------------*/ -/*! - * \brief pixScale() - * - * \param[in] pixs 1, 2, 4, 8, 16 and 32 bpp - * \param[in] scalex, scaley - * \return pixd, or NULL on error - * - * This function scales 32 bpp RGB; 2, 4 or 8 bpp palette color; - * 2, 4, 8 or 16 bpp gray; and binary images. - * - * When the input has palette color, the colormap is removed and - * the result is either 8 bpp gray or 32 bpp RGB, depending on whether - * the colormap has color entries. Images with 2, 4 or 16 bpp are - * converted to 8 bpp. - * - * Because pixScale is meant to be a very simple interface to a - * number of scaling functions, including the use of unsharp masking, - * the type of scaling and the sharpening parameters are chosen - * by default. Grayscale and color images are scaled using one - * of four methods, depending on the scale factors: - * 1 antialiased subsampling (lowpass filtering followed by - * subsampling, implemented here by area mapping), for scale factors - * less than 0.2 - * 2 antialiased subsampling with sharpening, for scale factors - * between 0.2 and 0.7 - * 3 linear interpolation with sharpening, for scale factors between - * 0.7 and 1.4 - * 4 linear interpolation without sharpening, for scale factors >= 1.4. - * - * One could use subsampling for scale factors very close to 1.0, - * because it preserves sharp edges. Linear interpolation blurs - * edges because the dest pixels will typically straddle two src edge - * pixels. Subsmpling removes entire columns and rows, so the edge is - * not blurred. However, there are two reasons for not doing this. - * First, it moves edges, so that a straight line at a large angle to - * both horizontal and vertical will have noticeable kinks where - * horizontal and vertical rasters are removed. Second, although it - * is very fast, you get good results on sharp edges by applying - * a sharpening filter. - * - * For images with sharp edges, sharpening substantially improves the - * image quality for scale factors between about 0.2 and about 2.0. - * pixScale uses a small amount of sharpening by default because - * it strengthens edge pixels that are weak due to anti-aliasing. - * The default sharpening factors are: - * * for scaling factors < 0.7: sharpfract = 0.2 sharpwidth = 1 - * * for scaling factors >= 0.7: sharpfract = 0.4 sharpwidth = 2 - * The cases where the sharpening halfwidth is 1 or 2 have special - * implementations and are about twice as fast as the general case. - * - * However, sharpening is computationally expensive, and one needs - * to consider the speed-quality tradeoff: - * * For upscaling of RGB images, linear interpolation plus default - * sharpening is about 5 times slower than upscaling alone. - * * For downscaling, area mapping plus default sharpening is - * about 10 times slower than downscaling alone. - * When the scale factor is larger than 1.4, the cost of sharpening, - * which is proportional to image area, is very large compared to the - * incremental quality improvement, so we cut off the default use of - * sharpening at 1.4. Thus, for scale factors greater than 1.4, - * pixScale only does linear interpolation. - * - * In many situations you will get a satisfactory result by scaling - * without sharpening: call pixScaleGeneral with %sharpfract = 0.0. - * Alternatively, if you wish to sharpen but not use the default - * value, first call pixScaleGeneral with %sharpfract = 0.0, and - * then sharpen explicitly using pixUnsharpMasking. - * - * Binary images are scaled to binary by sampling the closest pixel, - * without any low-pass filtering averaging of neighboring pixels. - * This will introduce aliasing for reductions. Aliasing can be - * prevented by using pixScaleToGray instead. - */ -PIX * -pixScale(PIX *pixs, - l_float32 scalex, - l_float32 scaley) -{ -l_int32 sharpwidth; -l_float32 maxscale, sharpfract; - - PROCNAME("pixScale"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - /* Reduce the default sharpening factors by 2 if maxscale < 0.7 */ - maxscale = L_MAX(scalex, scaley); - sharpfract = (maxscale < 0.7) ? 0.2 : 0.4; - sharpwidth = (maxscale < 0.7) ? 1 : 2; - - return pixScaleGeneral(pixs, scalex, scaley, sharpfract, sharpwidth); -} - - -/*! - * \brief pixScaleToSizeRel() - * - * \param[in] pixs - * \param[in] delw change in width, in pixels; 0 means no change - * \param[in] delh change in height, in pixels; 0 means no change - * \return pixd, or NULL on error - */ -PIX * -pixScaleToSizeRel(PIX *pixs, - l_int32 delw, - l_int32 delh) -{ -l_int32 w, h, wd, hd; - - PROCNAME("pixScaleToSizeRel"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - if (delw == 0 && delh == 0) - return pixCopy(NULL, pixs); - - pixGetDimensions(pixs, &w, &h, NULL); - wd = w + delw; - hd = h + delh; - if (wd <= 0 || hd <= 0) - return (PIX *)ERROR_PTR("pix dimension reduced to 0", procName, NULL); - - return pixScaleToSize(pixs, wd, hd); -} - - -/*! - * \brief pixScaleToSize() - * - * \param[in] pixs 1, 2, 4, 8, 16 and 32 bpp - * \param[in] wd target width; use 0 if using height as target - * \param[in] hd target height; use 0 if using width as target - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The output scaled image has the dimension(s) you specify:
- *          * To specify the width with isotropic scaling, set %hd = 0.
- *          * To specify the height with isotropic scaling, set %wd = 0.
- *          * If both %wd and %hd are specified, the image is scaled
- *             (in general, anisotropically) to that size.
- *          * It is an error to set both %wd and %hd to 0.
- * 
- */ -PIX * -pixScaleToSize(PIX *pixs, - l_int32 wd, - l_int32 hd) -{ -l_int32 w, h; -l_float32 scalex, scaley; - - PROCNAME("pixScaleToSize"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (wd <= 0 && hd <= 0) - return (PIX *)ERROR_PTR("neither wd nor hd > 0", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - if (wd <= 0) { - scaley = (l_float32)hd / (l_float32)h; - scalex = scaley; - } else if (hd <= 0) { - scalex = (l_float32)wd / (l_float32)w; - scaley = scalex; - } else { - scalex = (l_float32)wd / (l_float32)w; - scaley = (l_float32)hd / (l_float32)h; - } - - return pixScale(pixs, scalex, scaley); -} - - -/*! - * \brief pixScaleToResolution() - * - * \param[in] pixs - * \param[in] target desired resolution - * \param[in] assumed assumed resolution if not defined; typ. 300. - * \param[out] pscalefact [optional] actual scaling factor used - * \return pixd, or NULL on error - */ -PIX * -pixScaleToResolution(PIX *pixs, - l_float32 target, - l_float32 assumed, - l_float32 *pscalefact) -{ -l_int32 xres; -l_float32 factor; - - PROCNAME("pixScaleToResolution"); - - if (pscalefact) *pscalefact = 1.0; - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (target <= 0) - return (PIX *)ERROR_PTR("target resolution <= 0", procName, NULL); - - xres = pixGetXRes(pixs); - if (xres <= 0) { - if (assumed == 0) - return pixCopy(NULL, pixs); - xres = assumed; - } - factor = target / (l_float32)xres; - if (pscalefact) *pscalefact = factor; - - return pixScale(pixs, factor, factor); -} - - -/*! - * \brief pixScaleGeneral() - * - * \param[in] pixs 1, 2, 4, 8, 16 and 32 bpp - * \param[in] scalex must be > 0.0 - * \param[in] scaley must be > 0.0 - * \param[in] sharpfract use 0.0 to skip sharpening - * \param[in] sharpwidth halfwidth of low-pass filter; typ. 1 or 2 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) See pixScale() for usage.
- *      (2) This interface may change in the future, as other special
- *          cases are added.
- *      (3) The actual sharpening factors used depend on the maximum
- *          of the two scale factors (maxscale):
- *            maxscale <= 0.2:        no sharpening
- *            0.2 < maxscale < 1.4:   uses the input parameters
- *            maxscale >= 1.4:        no sharpening
- *      (4) To avoid sharpening for grayscale and color images with
- *          scaling factors between 0.2 and 1.4, call this function
- *          with %sharpfract == 0.0.
- *      (5) To use arbitrary sharpening in conjunction with scaling,
- *          call this function with %sharpfract = 0.0, and follow this
- *          with a call to pixUnsharpMasking() with your chosen parameters.
- * 
- */ -PIX * -pixScaleGeneral(PIX *pixs, - l_float32 scalex, - l_float32 scaley, - l_float32 sharpfract, - l_int32 sharpwidth) -{ -l_int32 d; -l_float32 maxscale; -PIX *pix1, *pix2, *pixd; - - PROCNAME("pixScaleGeneral"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("pixs not {1,2,4,8,16,32} bpp", procName, NULL); - if (scalex <= 0.0 || scaley <= 0.0) - return (PIX *)ERROR_PTR("scale factor <= 0", procName, NULL); - if (scalex == 1.0 && scaley == 1.0) - return pixCopy(NULL, pixs); - - if (d == 1) - return pixScaleBinary(pixs, scalex, scaley); - - /* Remove colormap; clone if possible; result is either 8 or 32 bpp */ - if ((pix1 = pixConvertTo8Or32(pixs, L_CLONE, 0)) == NULL) - return (PIX *)ERROR_PTR("pix1 not made", procName, NULL); - - /* Scale (up or down) */ - d = pixGetDepth(pix1); - maxscale = L_MAX(scalex, scaley); - if (maxscale < 0.7) { /* area mapping for anti-aliasing */ - pix2 = pixScaleAreaMap(pix1, scalex, scaley); - if (maxscale > 0.2 && sharpfract > 0.0 && sharpwidth > 0) - pixd = pixUnsharpMasking(pix2, sharpwidth, sharpfract); - else - pixd = pixClone(pix2); - } else { /* use linear interpolation */ - if (d == 8) - pix2 = pixScaleGrayLI(pix1, scalex, scaley); - else /* d == 32 */ - pix2 = pixScaleColorLI(pix1, scalex, scaley); - if (maxscale < 1.4 && sharpfract > 0.0 && sharpwidth > 0) - pixd = pixUnsharpMasking(pix2, sharpwidth, sharpfract); - else - pixd = pixClone(pix2); - } - - pixDestroy(&pix1); - pixDestroy(&pix2); - pixCopyText(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - return pixd; -} - - -/*------------------------------------------------------------------* - * Scaling by linear interpolation * - *------------------------------------------------------------------*/ -/*! - * \brief pixScaleLI() - * - * \param[in] pixs 2, 4, 8 or 32 bpp; with or without colormap - * \param[in] scalex must be >= 0.7 - * \param[in] scaley must be >= 0.7 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This function should only be used when the scale factors are
- *          greater than or equal to 0.7, and typically greater than 1.
- *          If both scale factors are smaller than 0.7, we issue a warning
- *          and call pixScaleGeneral(), which will invoke area mapping
- *          without sharpening.
- *      (2) This works on 2, 4, 8, 16 and 32 bpp images, as well as on
- *          2, 4 and 8 bpp images that have a colormap.  If there is a
- *          colormap, it is removed to either gray or RGB, depending
- *          on the colormap.
- *      (3) This does a linear interpolation on the src image.
- *      (4) It dispatches to much faster implementations for
- *          the special cases of 2x and 4x expansion.
- * 
- */ -PIX * -pixScaleLI(PIX *pixs, - l_float32 scalex, - l_float32 scaley) -{ -l_int32 d; -l_float32 maxscale; -PIX *pixt, *pixd; - - PROCNAME("pixScaleLI"); - - if (!pixs || (pixGetDepth(pixs) == 1)) - return (PIX *)ERROR_PTR("pixs not defined or 1 bpp", procName, NULL); - maxscale = L_MAX(scalex, scaley); - if (maxscale < 0.7) { - L_WARNING("scaling factors < 0.7; do regular scaling\n", procName); - return pixScaleGeneral(pixs, scalex, scaley, 0.0, 0); - } - d = pixGetDepth(pixs); - if (d != 2 && d != 4 && d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("pixs not {2,4,8,16,32} bpp", procName, NULL); - - /* Remove colormap; clone if possible; result is either 8 or 32 bpp */ - if ((pixt = pixConvertTo8Or32(pixs, L_CLONE, 0)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - - d = pixGetDepth(pixt); - if (d == 8) - pixd = pixScaleGrayLI(pixt, scalex, scaley); - else /* d == 32 */ - pixd = pixScaleColorLI(pixt, scalex, scaley); - - pixDestroy(&pixt); - pixCopyInputFormat(pixd, pixs); - return pixd; -} - - -/*! - * \brief pixScaleColorLI() - * - * \param[in] pixs 32 bpp, representing rgb - * \param[in] scalex must be >= 0.7 - * \param[in] scaley must be >= 0.7 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) If both scale factors are smaller than 0.7, we issue a warning
- *          and call pixScaleGeneral(), which will invoke area mapping
- *          without sharpening.  This is particularly important for
- *          document images with sharp edges.
- *      (2) For the general case, it's about 4x faster to manipulate
- *          the color pixels directly, rather than to make images
- *          out of each of the 3 components, scale each component
- *          using the pixScaleGrayLI(), and combine the results back
- *          into an rgb image.
- * 
- */ -PIX * -pixScaleColorLI(PIX *pixs, - l_float32 scalex, - l_float32 scaley) -{ -l_int32 ws, hs, wpls, wd, hd, wpld; -l_uint32 *datas, *datad; -l_float32 maxscale; -PIX *pixd; - - PROCNAME("pixScaleColorLI"); - - if (!pixs || (pixGetDepth(pixs) != 32)) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - maxscale = L_MAX(scalex, scaley); - if (maxscale < 0.7) { - L_WARNING("scaling factors < 0.7; do regular scaling\n", procName); - return pixScaleGeneral(pixs, scalex, scaley, 0.0, 0); - } - - /* Do fast special cases if possible */ - if (scalex == 1.0 && scaley == 1.0) - return pixCopy(NULL, pixs); - if (scalex == 2.0 && scaley == 2.0) - return pixScaleColor2xLI(pixs); - if (scalex == 4.0 && scaley == 4.0) - return pixScaleColor4xLI(pixs); - - /* General case */ - pixGetDimensions(pixs, &ws, &hs, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - wd = (l_int32)(scalex * (l_float32)ws + 0.5); - hd = (l_int32)(scaley * (l_float32)hs + 0.5); - if ((pixd = pixCreate(wd, hd, 32)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, scalex, scaley); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - scaleColorLILow(datad, wd, hd, wpld, datas, ws, hs, wpls); - if (pixGetSpp(pixs) == 4) - pixScaleAndTransferAlpha(pixd, pixs, scalex, scaley); - - pixCopyInputFormat(pixd, pixs); - return pixd; -} - - -/*! - * \brief pixScaleColor2xLI() - * - * \param[in] pixs 32 bpp, representing rgb - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This is a special case of linear interpolated scaling,
- *          for 2x upscaling.  It is about 8x faster than using
- *          the generic pixScaleColorLI(), and about 4x faster than
- *          using the special 2x scale function pixScaleGray2xLI()
- *          on each of the three components separately.
- * 
- */ -PIX * -pixScaleColor2xLI(PIX *pixs) -{ -l_int32 ws, hs, wpls, wpld; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixScaleColor2xLI"); - - if (!pixs || (pixGetDepth(pixs) != 32)) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if ((pixd = pixCreate(2 * ws, 2 * hs, 32)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, 2.0, 2.0); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - scaleColor2xLILow(datad, wpld, datas, ws, hs, wpls); - if (pixGetSpp(pixs) == 4) - pixScaleAndTransferAlpha(pixd, pixs, 2.0, 2.0); - - pixCopyInputFormat(pixd, pixs); - return pixd; -} - - -/*! - * \brief pixScaleColor4xLI() - * - * \param[in] pixs 32 bpp, representing rgb - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This is a special case of color linear interpolated scaling,
- *          for 4x upscaling.  It is about 3x faster than using
- *          the generic pixScaleColorLI().
- *      (2) This scales each component separately, using pixScaleGray4xLI().
- *          It would be about 4x faster to inline the color code properly,
- *          in analogy to scaleColor4xLILow(), and I leave this as
- *          an exercise for someone who really needs it.
- * 
- */ -PIX * -pixScaleColor4xLI(PIX *pixs) -{ -PIX *pixr, *pixg, *pixb; -PIX *pixrs, *pixgs, *pixbs; -PIX *pixd; - - PROCNAME("pixScaleColor4xLI"); - - if (!pixs || (pixGetDepth(pixs) != 32)) - return (PIX *)ERROR_PTR("pixs undefined or not 32 bpp", procName, NULL); - - pixr = pixGetRGBComponent(pixs, COLOR_RED); - pixrs = pixScaleGray4xLI(pixr); - pixDestroy(&pixr); - pixg = pixGetRGBComponent(pixs, COLOR_GREEN); - pixgs = pixScaleGray4xLI(pixg); - pixDestroy(&pixg); - pixb = pixGetRGBComponent(pixs, COLOR_BLUE); - pixbs = pixScaleGray4xLI(pixb); - pixDestroy(&pixb); - - if ((pixd = pixCreateRGBImage(pixrs, pixgs, pixbs)) == NULL) { - L_ERROR("pixd not made\n", procName); - } else { - if (pixGetSpp(pixs) == 4) - pixScaleAndTransferAlpha(pixd, pixs, 4.0, 4.0); - pixCopyInputFormat(pixd, pixs); - } - - pixDestroy(&pixrs); - pixDestroy(&pixgs); - pixDestroy(&pixbs); - return pixd; -} - - -/*! - * \brief pixScaleGrayLI() - * - * \param[in] pixs 8 bpp grayscale, no cmap - * \param[in] scalex must be >= 0.7 - * \param[in] scaley must be >= 0.7 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This function is appropriate for upscaling magnification, where the
- *          scale factor is > 1, as well as for a small amount of downscaling
- *          reduction, with scale factor > 0.7.  If the scale factor is < 0.7,
- *          the best result is obtained by area mapping, but this is relatiely
- *          expensive.  A less expensive alternative with scale factor < 0.7
- *          is low-pass filtering followed by subsampling (pixScaleSmooth()),
- *          which is effectively a cheap form of area mapping.
- *      (2) Here are some details:
- *          - For each pixel in the dest, this does a linear
- *            interpolation of 4 neighboring pixels in the src.
- *            Specifically, consider the UL corner of src and
- *            dest pixels.  The UL corner of the dest falls within
- *            a src pixel, whose four corners are the UL corners
- *            of 4 adjacent src pixels.  The value of the dest
- *            is taken by linear interpolation using the values of
- *            the four src pixels and the distance of the UL corner
- *            of the dest from each corner.
- *          - If the image is expanded so that the dest pixel is
- *            smaller than the src pixel, such interpolation
- *            is a reasonable approach.  This interpolation is
- *            also good for a small image reduction factor that
- *            is not more than a 2x reduction.
- *          - The linear interpolation algorithm for scaling is
- *            identical in form to the area-mapping algorithm
- *            for grayscale rotation.  The latter corresponds to a
- *            translation of each pixel without scaling.
- *          - This function is NOT optimal if the scaling involves
- *            a large reduction.  If the image is significantly
- *            reduced, so that the dest pixel is much larger than
- *            the src pixels, this interpolation, which is over src
- *            pixels only near the UL corner of the dest pixel,
- *            is not going to give a good area-mapping average.
- *            Because area mapping for image scaling is considerably
- *            more computationally intensive than linear interpolation,
- *            we choose not to use it.  For large image reduction,
- *            linear interpolation over adjacent src pixels
- *            degenerates asymptotically to subsampling.  But
- *            subsampling without a low-pass pre-filter causes
- *            aliasing by the nyquist theorem.  To avoid aliasing,
- *            a low-pass filter e.g., an averaging filter of
- *            size roughly equal to the dest pixel i.e., the reduction
- *            factor should be applied to the src before subsampling.
- *          - As an alternative to low-pass filtering and subsampling
- *            for large reduction factors, linear interpolation can
- *            also be done between the widely separated src pixels in
- *            which the corners of the dest pixel lie.  This also is
- *            not optimal, as it samples src pixels only near the
- *            corners of the dest pixel, and it is not implemented.
- * 
- */ -PIX * -pixScaleGrayLI(PIX *pixs, - l_float32 scalex, - l_float32 scaley) -{ -l_int32 ws, hs, wpls, wd, hd, wpld; -l_uint32 *datas, *datad; -l_float32 maxscale; -PIX *pixd; - - PROCNAME("pixScaleGrayLI"); - - if (!pixs || pixGetDepth(pixs) != 8 || pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs undefined, cmapped or not 8 bpp", - procName, NULL); - maxscale = L_MAX(scalex, scaley); - if (maxscale < 0.7) { - L_WARNING("scaling factors < 0.7; do regular scaling\n", procName); - return pixScaleGeneral(pixs, scalex, scaley, 0.0, 0); - } - - /* Do fast special cases if possible */ - if (scalex == 1.0 && scaley == 1.0) - return pixCopy(NULL, pixs); - if (scalex == 2.0 && scaley == 2.0) - return pixScaleGray2xLI(pixs); - if (scalex == 4.0 && scaley == 4.0) - return pixScaleGray4xLI(pixs); - - /* General case */ - pixGetDimensions(pixs, &ws, &hs, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - wd = (l_int32)(scalex * (l_float32)ws + 0.5); - hd = (l_int32)(scaley * (l_float32)hs + 0.5); - if ((pixd = pixCreate(wd, hd, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyText(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - pixScaleResolution(pixd, scalex, scaley); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - scaleGrayLILow(datad, wd, hd, wpld, datas, ws, hs, wpls); - return pixd; -} - - -/*! - * \brief pixScaleGray2xLI() - * - * \param[in] pixs 8 bpp grayscale, not cmapped - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This is a special case of gray linear interpolated scaling,
- *          for 2x upscaling.  It is about 6x faster than using
- *          the generic pixScaleGrayLI().
- * 
- */ -PIX * -pixScaleGray2xLI(PIX *pixs) -{ -l_int32 ws, hs, wpls, wpld; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixScaleGray2xLI"); - - if (!pixs || pixGetDepth(pixs) != 8 || pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs undefined, cmapped or not 8 bpp", - procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if ((pixd = pixCreate(2 * ws, 2 * hs, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - pixScaleResolution(pixd, 2.0, 2.0); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - scaleGray2xLILow(datad, wpld, datas, ws, hs, wpls); - return pixd; -} - - -/*! - * \brief pixScaleGray4xLI() - * - * \param[in] pixs 8 bpp grayscale, not cmapped - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This is a special case of gray linear interpolated scaling,
- *          for 4x upscaling.  It is about 12x faster than using
- *          the generic pixScaleGrayLI().
- * 
- */ -PIX * -pixScaleGray4xLI(PIX *pixs) -{ -l_int32 ws, hs, wpls, wpld; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixScaleGray4xLI"); - - if (!pixs || pixGetDepth(pixs) != 8 || pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs undefined, cmapped or not 8 bpp", - procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - if ((pixd = pixCreate(4 * ws, 4 * hs, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - pixScaleResolution(pixd, 4.0, 4.0); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - scaleGray4xLILow(datad, wpld, datas, ws, hs, wpls); - return pixd; -} - - -/*------------------------------------------------------------------* - * Scale 2x followed by binarization * - *------------------------------------------------------------------*/ -/*! - * \brief pixScaleGray2xLIThresh() - * - * \param[in] pixs 8 bpp, not cmapped - * \param[in] thresh between 0 and 256 - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This does 2x upscale on pixs, using linear interpolation,
- *          followed by thresholding to binary.
- *      (2) Buffers are used to avoid making a large grayscale image.
- * 
- */ -PIX * -pixScaleGray2xLIThresh(PIX *pixs, - l_int32 thresh) -{ -l_int32 i, ws, hs, hsm, wd, hd, wpls, wplb, wpld; -l_uint32 *datas, *datad, *lines, *lined, *lineb; -PIX *pixd; - - PROCNAME("pixScaleGray2xLIThresh"); - - if (!pixs || pixGetDepth(pixs) != 8 || pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs undefined, not 8 bpp, or cmapped", - procName, NULL); - if (thresh < 0 || thresh > 256) - return (PIX *)ERROR_PTR("thresh must be in [0, ... 256]", - procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - wd = 2 * ws; - hd = 2 * hs; - hsm = hs - 1; - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - - /* Make line buffer for 2 lines of virtual intermediate image */ - wplb = (wd + 3) / 4; - if ((lineb = (l_uint32 *)LEPT_CALLOC(2 * wplb, sizeof(l_uint32))) == NULL) - return (PIX *)ERROR_PTR("lineb not made", procName, NULL); - - /* Make dest binary image */ - if ((pixd = pixCreate(wd, hd, 1)) == NULL) { - LEPT_FREE(lineb); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, 2.0, 2.0); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - - /* Do all but last src line */ - for (i = 0; i < hsm; i++) { - lines = datas + i * wpls; - lined = datad + 2 * i * wpld; /* do 2 dest lines at a time */ - scaleGray2xLILineLow(lineb, wplb, lines, ws, wpls, 0); - thresholdToBinaryLineLow(lined, wd, lineb, 8, thresh); - thresholdToBinaryLineLow(lined + wpld, wd, lineb + wplb, 8, thresh); - } - - /* Do last src line */ - lines = datas + hsm * wpls; - lined = datad + 2 * hsm * wpld; - scaleGray2xLILineLow(lineb, wplb, lines, ws, wpls, 1); - thresholdToBinaryLineLow(lined, wd, lineb, 8, thresh); - thresholdToBinaryLineLow(lined + wpld, wd, lineb + wplb, 8, thresh); - - LEPT_FREE(lineb); - return pixd; -} - - -/*! - * \brief pixScaleGray2xLIDither() - * - * \param[in] pixs 8 bpp, not cmapped - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This does 2x upscale on pixs, using linear interpolation,
- *          followed by Floyd-Steinberg dithering to binary.
- *      (2) Buffers are used to avoid making a large grayscale image.
- *          ~ Two line buffers are used for the src, required for the 2x
- *            LI upscale.
- *          ~ Three line buffers are used for the intermediate image.
- *            Two are filled with each 2xLI row operation; the third is
- *            needed because the upscale and dithering ops are out of sync.
- * 
- */ -PIX * -pixScaleGray2xLIDither(PIX *pixs) -{ -l_int32 i, ws, hs, hsm, wd, hd, wpls, wplb, wpld; -l_uint32 *datas, *datad; -l_uint32 *lined; -l_uint32 *lineb = NULL; /* 2 intermediate buffer lines */ -l_uint32 *linebp = NULL; /* 1 intermediate buffer line */ -l_uint32 *bufs = NULL; /* 2 source buffer lines */ -PIX *pixd = NULL; - - PROCNAME("pixScaleGray2xLIDither"); - - if (!pixs || pixGetDepth(pixs) != 8 || pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs undefined, not 8 bpp, or cmapped", - procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - wd = 2 * ws; - hd = 2 * hs; - hsm = hs - 1; - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - - /* Make line buffers for 2 lines of src image */ - if ((bufs = (l_uint32 *)LEPT_CALLOC(2 * wpls, sizeof(l_uint32))) == NULL) - return (PIX *)ERROR_PTR("bufs not made", procName, NULL); - - /* Make line buffer for 2 lines of virtual intermediate image */ - wplb = (wd + 3) / 4; - if ((lineb = (l_uint32 *)LEPT_CALLOC(2 * wplb, sizeof(l_uint32))) == NULL) { - L_ERROR("lineb not made\n", procName); - goto cleanup; - } - - /* Make line buffer for 1 line of virtual intermediate image */ - if ((linebp = (l_uint32 *)LEPT_CALLOC(wplb, sizeof(l_uint32))) == NULL) { - L_ERROR("linebp not made\n", procName); - goto cleanup; - } - - /* Make dest binary image */ - if ((pixd = pixCreate(wd, hd, 1)) == NULL) { - L_ERROR("pixd not made\n", procName); - goto cleanup; - } - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, 2.0, 2.0); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - - /* Start with the first src and the first dest line */ - memcpy(bufs, datas, 4 * wpls); /* first src line */ - memcpy(bufs + wpls, datas + wpls, 4 * wpls); /* 2nd src line */ - scaleGray2xLILineLow(lineb, wplb, bufs, ws, wpls, 0); /* 2 i lines */ - lined = datad; - ditherToBinaryLineLow(lined, wd, lineb, lineb + wplb, - DEFAULT_CLIP_LOWER_1, DEFAULT_CLIP_UPPER_1, 0); - /* 1st d line */ - - /* Do all but last src line */ - for (i = 1; i < hsm; i++) { - memcpy(bufs, datas + i * wpls, 4 * wpls); /* i-th src line */ - memcpy(bufs + wpls, datas + (i + 1) * wpls, 4 * wpls); - memcpy(linebp, lineb + wplb, 4 * wplb); - scaleGray2xLILineLow(lineb, wplb, bufs, ws, wpls, 0); /* 2 i lines */ - lined = datad + 2 * i * wpld; - ditherToBinaryLineLow(lined - wpld, wd, linebp, lineb, - DEFAULT_CLIP_LOWER_1, DEFAULT_CLIP_UPPER_1, 0); - /* odd dest line */ - ditherToBinaryLineLow(lined, wd, lineb, lineb + wplb, - DEFAULT_CLIP_LOWER_1, DEFAULT_CLIP_UPPER_1, 0); - /* even dest line */ - } - - /* Do the last src line and the last 3 dest lines */ - memcpy(bufs, datas + hsm * wpls, 4 * wpls); /* hsm-th src line */ - memcpy(linebp, lineb + wplb, 4 * wplb); /* 1 i line */ - scaleGray2xLILineLow(lineb, wplb, bufs, ws, wpls, 1); /* 2 i lines */ - ditherToBinaryLineLow(lined + wpld, wd, linebp, lineb, - DEFAULT_CLIP_LOWER_1, DEFAULT_CLIP_UPPER_1, 0); - /* odd dest line */ - ditherToBinaryLineLow(lined + 2 * wpld, wd, lineb, lineb + wplb, - DEFAULT_CLIP_LOWER_1, DEFAULT_CLIP_UPPER_1, 0); - /* even dest line */ - ditherToBinaryLineLow(lined + 3 * wpld, wd, lineb + wplb, NULL, - DEFAULT_CLIP_LOWER_1, DEFAULT_CLIP_UPPER_1, 1); - /* last dest line */ - -cleanup: - LEPT_FREE(bufs); - LEPT_FREE(lineb); - LEPT_FREE(linebp); - return pixd; -} - - -/*------------------------------------------------------------------* - * Scale 4x followed by binarization * - *------------------------------------------------------------------*/ -/*! - * \brief pixScaleGray4xLIThresh() - * - * \param[in] pixs 8 bpp - * \param[in] thresh between 0 and 256 - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This does 4x upscale on pixs, using linear interpolation,
- *          followed by thresholding to binary.
- *      (2) Buffers are used to avoid making a large grayscale image.
- *      (3) If a full 4x expanded grayscale image can be kept in memory,
- *          this function is only about 10% faster than separately doing
- *          a linear interpolation to a large grayscale image, followed
- *          by thresholding to binary.
- * 
- */ -PIX * -pixScaleGray4xLIThresh(PIX *pixs, - l_int32 thresh) -{ -l_int32 i, j, ws, hs, hsm, wd, hd, wpls, wplb, wpld; -l_uint32 *datas, *datad, *lines, *lined, *lineb; -PIX *pixd; - - PROCNAME("pixScaleGray4xLIThresh"); - - if (!pixs || pixGetDepth(pixs) != 8 || pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs undefined, not 8 bpp, or cmapped", - procName, NULL); - if (thresh < 0 || thresh > 256) - return (PIX *)ERROR_PTR("thresh must be in [0, ... 256]", - procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - wd = 4 * ws; - hd = 4 * hs; - hsm = hs - 1; - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - - /* Make line buffer for 4 lines of virtual intermediate image */ - wplb = (wd + 3) / 4; - if ((lineb = (l_uint32 *)LEPT_CALLOC(4 * wplb, sizeof(l_uint32))) == NULL) - return (PIX *)ERROR_PTR("lineb not made", procName, NULL); - - /* Make dest binary image */ - if ((pixd = pixCreate(wd, hd, 1)) == NULL) { - LEPT_FREE(lineb); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, 4.0, 4.0); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - - /* Do all but last src line */ - for (i = 0; i < hsm; i++) { - lines = datas + i * wpls; - lined = datad + 4 * i * wpld; /* do 4 dest lines at a time */ - scaleGray4xLILineLow(lineb, wplb, lines, ws, wpls, 0); - for (j = 0; j < 4; j++) { - thresholdToBinaryLineLow(lined + j * wpld, wd, - lineb + j * wplb, 8, thresh); - } - } - - /* Do last src line */ - lines = datas + hsm * wpls; - lined = datad + 4 * hsm * wpld; - scaleGray4xLILineLow(lineb, wplb, lines, ws, wpls, 1); - for (j = 0; j < 4; j++) { - thresholdToBinaryLineLow(lined + j * wpld, wd, - lineb + j * wplb, 8, thresh); - } - - LEPT_FREE(lineb); - return pixd; -} - - -/*! - * \brief pixScaleGray4xLIDither() - * - * \param[in] pixs 8 bpp, not cmapped - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This does 4x upscale on pixs, using linear interpolation,
- *          followed by Floyd-Steinberg dithering to binary.
- *      (2) Buffers are used to avoid making a large grayscale image.
- *          ~ Two line buffers are used for the src, required for the
- *            4xLI upscale.
- *          ~ Five line buffers are used for the intermediate image.
- *            Four are filled with each 4xLI row operation; the fifth
- *            is needed because the upscale and dithering ops are
- *            out of sync.
- *      (3) If a full 4x expanded grayscale image can be kept in memory,
- *          this function is only about 5% faster than separately doing
- *          a linear interpolation to a large grayscale image, followed
- *          by error-diffusion dithering to binary.
- * 
- */ -PIX * -pixScaleGray4xLIDither(PIX *pixs) -{ -l_int32 i, j, ws, hs, hsm, wd, hd, wpls, wplb, wpld; -l_uint32 *datas, *datad; -l_uint32 *lined; -l_uint32 *lineb = NULL; /* 4 intermediate buffer lines */ -l_uint32 *linebp = NULL; /* 1 intermediate buffer line */ -l_uint32 *bufs = NULL; /* 2 source buffer lines */ -PIX *pixd = NULL; - - PROCNAME("pixScaleGray4xLIDither"); - - if (!pixs || pixGetDepth(pixs) != 8 || pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs undefined, not 8 bpp, or cmapped", - procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - wd = 4 * ws; - hd = 4 * hs; - hsm = hs - 1; - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - - /* Make line buffers for 2 lines of src image */ - if ((bufs = (l_uint32 *)LEPT_CALLOC(2 * wpls, sizeof(l_uint32))) == NULL) - return (PIX *)ERROR_PTR("bufs not made", procName, NULL); - - /* Make line buffer for 4 lines of virtual intermediate image */ - wplb = (wd + 3) / 4; - if ((lineb = (l_uint32 *)LEPT_CALLOC(4 * wplb, sizeof(l_uint32))) == NULL) { - L_ERROR("lineb not made\n", procName); - goto cleanup; - } - - /* Make line buffer for 1 line of virtual intermediate image */ - if ((linebp = (l_uint32 *)LEPT_CALLOC(wplb, sizeof(l_uint32))) == NULL) { - L_ERROR("linebp not made\n", procName); - goto cleanup; - } - - /* Make dest binary image */ - if ((pixd = pixCreate(wd, hd, 1)) == NULL) { - L_ERROR("pixd not made\n", procName); - goto cleanup; - } - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, 4.0, 4.0); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - - /* Start with the first src and the first 3 dest lines */ - memcpy(bufs, datas, 4 * wpls); /* first src line */ - memcpy(bufs + wpls, datas + wpls, 4 * wpls); /* 2nd src line */ - scaleGray4xLILineLow(lineb, wplb, bufs, ws, wpls, 0); /* 4 b lines */ - lined = datad; - for (j = 0; j < 3; j++) { /* first 3 d lines of Q */ - ditherToBinaryLineLow(lined + j * wpld, wd, lineb + j * wplb, - lineb + (j + 1) * wplb, - DEFAULT_CLIP_LOWER_1, DEFAULT_CLIP_UPPER_1, 0); - } - - /* Do all but last src line */ - for (i = 1; i < hsm; i++) { - memcpy(bufs, datas + i * wpls, 4 * wpls); /* i-th src line */ - memcpy(bufs + wpls, datas + (i + 1) * wpls, 4 * wpls); - memcpy(linebp, lineb + 3 * wplb, 4 * wplb); - scaleGray4xLILineLow(lineb, wplb, bufs, ws, wpls, 0); /* 4 b lines */ - lined = datad + 4 * i * wpld; - ditherToBinaryLineLow(lined - wpld, wd, linebp, lineb, - DEFAULT_CLIP_LOWER_1, DEFAULT_CLIP_UPPER_1, 0); - /* 4th dest line of Q */ - for (j = 0; j < 3; j++) { /* next 3 d lines of Quad */ - ditherToBinaryLineLow(lined + j * wpld, wd, lineb + j * wplb, - lineb + (j + 1) * wplb, - DEFAULT_CLIP_LOWER_1, DEFAULT_CLIP_UPPER_1, 0); - } - } - - /* Do the last src line and the last 5 dest lines */ - memcpy(bufs, datas + hsm * wpls, 4 * wpls); /* hsm-th src line */ - memcpy(linebp, lineb + 3 * wplb, 4 * wplb); /* 1 b line */ - scaleGray4xLILineLow(lineb, wplb, bufs, ws, wpls, 1); /* 4 b lines */ - lined = datad + 4 * hsm * wpld; - ditherToBinaryLineLow(lined - wpld, wd, linebp, lineb, - DEFAULT_CLIP_LOWER_1, DEFAULT_CLIP_UPPER_1, 0); - /* 4th dest line of Q */ - for (j = 0; j < 3; j++) { /* next 3 d lines of Quad */ - ditherToBinaryLineLow(lined + j * wpld, wd, lineb + j * wplb, - lineb + (j + 1) * wplb, - DEFAULT_CLIP_LOWER_1, DEFAULT_CLIP_UPPER_1, 0); - } - /* And finally, the last dest line */ - ditherToBinaryLineLow(lined + 3 * wpld, wd, lineb + 3 * wplb, NULL, - DEFAULT_CLIP_LOWER_1, DEFAULT_CLIP_UPPER_1, 1); - -cleanup: - LEPT_FREE(bufs); - LEPT_FREE(lineb); - LEPT_FREE(linebp); - return pixd; -} - - -/*------------------------------------------------------------------* - * Scaling by closest pixel sampling * - *------------------------------------------------------------------*/ -/*! - * \brief pixScaleBySampling() - * - * \param[in] pixs 1, 2, 4, 8, 16, 32 bpp - * \param[in] scalex must be > 0.0 - * \param[in] scaley must be > 0.0 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This function samples from the source without
- *          filtering.  As a result, aliasing will occur for
- *          subsampling (%scalex and/or %scaley < 1.0).
- *      (2) If %scalex == 1.0 and %scaley == 1.0, returns a copy.
- * 
- */ -PIX * -pixScaleBySampling(PIX *pixs, - l_float32 scalex, - l_float32 scaley) -{ -l_int32 ws, hs, d, wpls, wd, hd, wpld; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixScaleBySampling"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (scalex <= 0.0 || scaley <= 0.0) - return (PIX *)ERROR_PTR("scale factor <= 0", procName, NULL); - if (scalex == 1.0 && scaley == 1.0) - return pixCopy(NULL, pixs); - if ((d = pixGetDepth(pixs)) == 1) - return pixScaleBinary(pixs, scalex, scaley); - - pixGetDimensions(pixs, &ws, &hs, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - wd = (l_int32)(scalex * (l_float32)ws + 0.5); - hd = (l_int32)(scaley * (l_float32)hs + 0.5); - if ((pixd = pixCreate(wd, hd, d)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, scalex, scaley); - pixCopyColormap(pixd, pixs); - pixCopyText(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - pixCopySpp(pixd, pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - scaleBySamplingLow(datad, wd, hd, wpld, datas, ws, hs, d, wpls); - if (d == 32 && pixGetSpp(pixs) == 4) - pixScaleAndTransferAlpha(pixd, pixs, scalex, scaley); - - return pixd; -} - - -/*! - * \brief pixScaleBySamplingToSize() - * - * \param[in] pixs 1, 2, 4, 8, 16 and 32 bpp - * \param[in] wd target width; use 0 if using height as target - * \param[in] hd target height; use 0 if using width as target - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This guarantees that the output scaled image has the
- *          dimension(s) you specify.
- *          ~ To specify the width with isotropic scaling, set %hd = 0.
- *          ~ To specify the height with isotropic scaling, set %wd = 0.
- *          ~ If both %wd and %hd are specified, the image is scaled
- *            (in general, anisotropically) to that size.
- *          ~ It is an error to set both %wd and %hd to 0.
- * 
- */ -PIX * -pixScaleBySamplingToSize(PIX *pixs, - l_int32 wd, - l_int32 hd) -{ -l_int32 w, h; -l_float32 scalex, scaley; - - PROCNAME("pixScaleBySamplingToSize"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (wd <= 0 && hd <= 0) - return (PIX *)ERROR_PTR("neither wd nor hd > 0", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - if (wd <= 0) { - scaley = (l_float32)hd / (l_float32)h; - scalex = scaley; - } else if (hd <= 0) { - scalex = (l_float32)wd / (l_float32)w; - scaley = scalex; - } else { - scalex = (l_float32)wd / (l_float32)w; - scaley = (l_float32)hd / (l_float32)h; - } - - return pixScaleBySampling(pixs, scalex, scaley); -} - - -/*! - * \brief pixScaleByIntSampling() - * - * \param[in] pixs 1, 2, 4, 8, 16, 32 bpp - * \param[in] factor integer subsampling - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) Simple interface to pixScaleBySampling(), for
- *          isotropic integer reduction.
- *      (2) If %factor == 1, returns a copy.
- * 
- */ -PIX * -pixScaleByIntSampling(PIX *pixs, - l_int32 factor) -{ -l_float32 scale; - - PROCNAME("pixScaleByIntSampling"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (factor <= 1) { - if (factor < 1) - L_ERROR("factor must be >= 1; returning a copy\n", procName); - return pixCopy(NULL, pixs); - } - - scale = 1. / (l_float32)factor; - return pixScaleBySampling(pixs, scale, scale); -} - - -/*------------------------------------------------------------------* - * Fast integer factor subsampling RGB to gray * - *------------------------------------------------------------------*/ -/*! - * \brief pixScaleRGBToGrayFast() - * - * \param[in] pixs 32 bpp rgb - * \param[in] factor integer reduction factor >= 1 - * \param[in] color one of COLOR_RED, COLOR_GREEN, COLOR_BLUE - * \return pixd 8 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This does simultaneous subsampling by an integer factor and
- *          extraction of the color from the RGB pix.
- *      (2) It is designed for maximum speed, and is used for quickly
- *          generating a downsized grayscale image from a higher resolution
- *          RGB image.  This would typically be used for image analysis.
- *      (3) The standard color byte order (RGBA) is assumed.
- * 
- */ -PIX * -pixScaleRGBToGrayFast(PIX *pixs, - l_int32 factor, - l_int32 color) -{ -l_int32 byteval, shift; -l_int32 i, j, ws, hs, wd, hd, wpls, wpld; -l_uint32 *datas, *words, *datad, *lined; -l_float32 scale; -PIX *pixd; - - PROCNAME("pixScaleRGBToGrayFast"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("depth not 32 bpp", procName, NULL); - if (factor < 1) - return (PIX *)ERROR_PTR("factor must be >= 1", procName, NULL); - - if (color == COLOR_RED) - shift = L_RED_SHIFT; - else if (color == COLOR_GREEN) - shift = L_GREEN_SHIFT; - else if (color == COLOR_BLUE) - shift = L_BLUE_SHIFT; - else - return (PIX *)ERROR_PTR("invalid color", procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - - wd = ws / factor; - hd = hs / factor; - if ((pixd = pixCreate(wd, hd, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - scale = 1. / (l_float32) factor; - pixScaleResolution(pixd, scale, scale); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - for (i = 0; i < hd; i++) { - words = datas + i * factor * wpls; - lined = datad + i * wpld; - for (j = 0; j < wd; j++, words += factor) { - byteval = ((*words) >> shift) & 0xff; - SET_DATA_BYTE(lined, j, byteval); - } - } - - return pixd; -} - - -/*! - * \brief pixScaleRGBToBinaryFast() - * - * \param[in] pixs 32 bpp RGB - * \param[in] factor integer reduction factor >= 1 - * \param[in] thresh binarization threshold - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This does simultaneous subsampling by an integer factor and
- *          conversion from RGB to gray to binary.
- *      (2) It is designed for maximum speed, and is used for quickly
- *          generating a downsized binary image from a higher resolution
- *          RGB image.  This would typically be used for image analysis.
- *      (3) It uses the green channel to represent the RGB pixel intensity.
- * 
- */ -PIX * -pixScaleRGBToBinaryFast(PIX *pixs, - l_int32 factor, - l_int32 thresh) -{ -l_int32 byteval; -l_int32 i, j, ws, hs, wd, hd, wpls, wpld; -l_uint32 *datas, *words, *datad, *lined; -l_float32 scale; -PIX *pixd; - - PROCNAME("pixScaleRGBToBinaryFast"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (factor < 1) - return (PIX *)ERROR_PTR("factor must be >= 1", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("depth not 32 bpp", procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - - wd = ws / factor; - hd = hs / factor; - if ((pixd = pixCreate(wd, hd, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - scale = 1. / (l_float32) factor; - pixScaleResolution(pixd, scale, scale); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - for (i = 0; i < hd; i++) { - words = datas + i * factor * wpls; - lined = datad + i * wpld; - for (j = 0; j < wd; j++, words += factor) { - byteval = ((*words) >> L_GREEN_SHIFT) & 0xff; - if (byteval < thresh) - SET_DATA_BIT(lined, j); - } - } - - return pixd; -} - - -/*! - * \brief pixScaleGrayToBinaryFast() - * - * \param[in] pixs 8 bpp grayscale - * \param[in] factor integer reduction factor >= 1 - * \param[in] thresh binarization threshold - * \return pixd 1 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This does simultaneous subsampling by an integer factor and
- *          thresholding from gray to binary.
- *      (2) It is designed for maximum speed, and is used for quickly
- *          generating a downsized binary image from a higher resolution
- *          gray image.  This would typically be used for image analysis.
- * 
- */ -PIX * -pixScaleGrayToBinaryFast(PIX *pixs, - l_int32 factor, - l_int32 thresh) -{ -l_int32 byteval; -l_int32 i, j, ws, hs, wd, hd, wpls, wpld, sj; -l_uint32 *datas, *datad, *lines, *lined; -l_float32 scale; -PIX *pixd; - - PROCNAME("pixScaleGrayToBinaryFast"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (factor < 1) - return (PIX *)ERROR_PTR("factor must be >= 1", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("depth not 8 bpp", procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - - wd = ws / factor; - hd = hs / factor; - if ((pixd = pixCreate(wd, hd, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - scale = 1. / (l_float32) factor; - pixScaleResolution(pixd, scale, scale); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - for (i = 0; i < hd; i++) { - lines = datas + i * factor * wpls; - lined = datad + i * wpld; - for (j = 0, sj = 0; j < wd; j++, sj += factor) { - byteval = GET_DATA_BYTE(lines, sj); - if (byteval < thresh) - SET_DATA_BIT(lined, j); - } - } - - return pixd; -} - - -/*------------------------------------------------------------------* - * Downscaling with (antialias) smoothing * - *------------------------------------------------------------------*/ -/*! - * \brief pixScaleSmooth() - * - * \param[in] pix 2, 4, 8 or 32 bpp; and 2, 4, 8 bpp with colormap - * \param[in] scalex must be < 0.7 - * \param[in] scaley must be < 0.7 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This function should only be used when the scale factors are less
- *          than 0.7 (i.e., more than about 1.42x reduction).  If either
- *          scale factor is greater than or equal to 0.7, we issue a warning
- *          and call pixScaleGeneral(), which will invoke linear
- *          interpolation without sharpening.
- *      (2) This works only on 2, 4, 8 and 32 bpp images, and if there is
- *          a colormap, it is removed by converting to RGB.  In other
- *          cases, we issue a warning and call pixScaleGeneral().
- *      (3) It does simple (flat filter) convolution, with a filter size
- *          commensurate with the amount of reduction, to avoid antialiasing.
- *      (4) It does simple subsampling after smoothing, which is appropriate
- *          for this range of scaling.  Linear interpolation gives essentially
- *          the same result with more computation for these scale factors,
- *          so we don't use it.
- *      (5) The result is the same as doing a full block convolution followed by
- *          subsampling, but this is faster because the results of the block
- *          convolution are only computed at the subsampling locations.
- *          In fact, the computation time is approximately independent of
- *          the scale factor, because the convolution kernel is adjusted
- *          so that each source pixel is summed approximately once.
- * 
- */ -PIX * -pixScaleSmooth(PIX *pix, - l_float32 scalex, - l_float32 scaley) -{ -l_int32 ws, hs, d, wd, hd, wpls, wpld, isize; -l_uint32 *datas, *datad; -l_float32 minscale, size; -PIX *pixs, *pixd; - - PROCNAME("pixScaleSmooth"); - - if (!pix) - return (PIX *)ERROR_PTR("pix not defined", procName, NULL); - if (scalex >= 0.7 || scaley >= 0.7) { - L_WARNING("scaling factor not < 0.7; do regular scaling\n", procName); - return pixScaleGeneral(pix, scalex, scaley, 0.0, 0); - } - - /* Remove colormap if necessary. - * If 2 bpp or 4 bpp gray, convert to 8 bpp */ - d = pixGetDepth(pix); - if ((d == 2 || d == 4 || d == 8) && pixGetColormap(pix)) { - L_WARNING("pix has colormap; removing\n", procName); - pixs = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC); - d = pixGetDepth(pixs); - } else if (d == 2 || d == 4) { - pixs = pixConvertTo8(pix, FALSE); - d = 8; - } else { - pixs = pixClone(pix); - } - - if (d != 8 && d != 32) { /* d == 1 or d == 16 */ - L_WARNING("depth not 8 or 32 bpp; do regular scaling\n", procName); - pixDestroy(&pixs); - return pixScaleGeneral(pix, scalex, scaley, 0.0, 0); - } - - /* If 1.42 < 1/minscale < 2.5, use isize = 2 - * If 2.5 =< 1/minscale < 3.5, use isize = 3, etc. - * Under no conditions use isize < 2 */ - minscale = L_MIN(scalex, scaley); - size = 1.0 / minscale; /* ideal filter full width */ - isize = L_MAX(2, (l_int32)(size + 0.5)); - - pixGetDimensions(pixs, &ws, &hs, NULL); - if ((ws < isize) || (hs < isize)) { - pixDestroy(&pixs); - return (PIX *)ERROR_PTR("pixs too small", procName, NULL); - } - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - wd = (l_int32)(scalex * (l_float32)ws + 0.5); - hd = (l_int32)(scaley * (l_float32)hs + 0.5); - if (wd < 1 || hd < 1) { - pixDestroy(&pixs); - return (PIX *)ERROR_PTR("pixd too small", procName, NULL); - } - if ((pixd = pixCreate(wd, hd, d)) == NULL) { - pixDestroy(&pixs); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - pixScaleResolution(pixd, scalex, scaley); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - scaleSmoothLow(datad, wd, hd, wpld, datas, ws, hs, d, wpls, isize); - if (d == 32 && pixGetSpp(pixs) == 4) - pixScaleAndTransferAlpha(pixd, pixs, scalex, scaley); - - pixDestroy(&pixs); - return pixd; -} - - -/*! - * \brief pixScaleSmoothToSize() - * - * \param[in] pixs 2, 4, 8 or 32 bpp; and 2, 4, 8 bpp with colormap - * \param[in] wd target width; use 0 if using height as target - * \param[in] hd target height; use 0 if using width as target - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) See notes in pixScaleSmooth().
- *      (2) The output scaled image has the dimension(s) you specify:
- *          - To specify the width with isotropic scaling, set %hd = 0.
- *          - To specify the height with isotropic scaling, set %wd = 0.
- *          - If both %wd and %hd are specified, the image is scaled
- *             (in general, anisotropically) to that size.
- *          - It is an error to set both %wd and %hd to 0.
- * 
- */ -PIX * -pixScaleSmoothToSize(PIX *pixs, - l_int32 wd, - l_int32 hd) -{ -l_int32 w, h; -l_float32 scalex, scaley; - - PROCNAME("pixScaleSmoothToSize"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (wd <= 0 && hd <= 0) - return (PIX *)ERROR_PTR("neither wd nor hd > 0", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - if (wd <= 0) { - scaley = (l_float32)hd / (l_float32)h; - scalex = scaley; - } else if (hd <= 0) { - scalex = (l_float32)wd / (l_float32)w; - scaley = scalex; - } else { - scalex = (l_float32)wd / (l_float32)w; - scaley = (l_float32)hd / (l_float32)h; - } - - return pixScaleSmooth(pixs, scalex, scaley); -} - - -/*! - * \brief pixScaleRGBToGray2() - * - * \param[in] pixs 32 bpp rgb - * \param[in] rwt, gwt, bwt must sum to 1.0 - * \return pixd, 8 bpp, 2x reduced, or NULL on error - */ -PIX * -pixScaleRGBToGray2(PIX *pixs, - l_float32 rwt, - l_float32 gwt, - l_float32 bwt) -{ -l_int32 wd, hd, wpls, wpld; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixScaleRGBToGray2"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 32) - return (PIX *)ERROR_PTR("pixs not 32 bpp", procName, NULL); - if (rwt + gwt + bwt < 0.98 || rwt + gwt + bwt > 1.02) - return (PIX *)ERROR_PTR("sum of wts should be 1.0", procName, NULL); - - wd = pixGetWidth(pixs) / 2; - hd = pixGetHeight(pixs) / 2; - wpls = pixGetWpl(pixs); - datas = pixGetData(pixs); - if ((pixd = pixCreate(wd, hd, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyResolution(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - pixScaleResolution(pixd, 0.5, 0.5); - wpld = pixGetWpl(pixd); - datad = pixGetData(pixd); - scaleRGBToGray2Low(datad, wd, hd, wpld, datas, wpls, rwt, gwt, bwt); - return pixd; -} - - -/*------------------------------------------------------------------* - * Downscaling with (antialias) area mapping * - *------------------------------------------------------------------*/ -/*! - * \brief pixScaleAreaMap() - * - * \param[in] pix 2, 4, 8 or 32 bpp; and 2, 4, 8 bpp with colormap - * \param[in] scalex must be < 0.7 - * \param[in] scaley must be < 0.7 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This function should only be used when the scale factors are less
- *          than 0.7 (i.e., more than about 1.42x reduction).  If either
- *          scale factor is greater than or equal to 0.7, we issue a warning
- *          and call pixScaleGeneral(), which will invoke linear
- *          interpolation without sharpening.
- *      (2) This works only on 2, 4, 8 and 32 bpp images.  If there is
- *          a colormap, it is removed by converting to RGB.  In other
- *          cases, we issue a warning and call pixScaleGeneral().
- *      (3) This is faster than pixScale() because it does not do sharpening.
- *      (4) It does a relatively expensive area mapping computation, to
- *          avoid antialiasing.  It is about 2x slower than pixScaleSmooth(),
- *          but the results are much better on fine text.
- *      (5) pixScaleAreaMap2() is typically about 7x faster for the special
- *          case of 2x reduction for color images, and about 9x faster
- *          for grayscale images.  Surprisingly, the improvement in speed
- *          when using a cascade of 2x reductions for small scale factors is
- *          less than one might expect, and in most situations gives
- *          poorer image quality.  But see (6).
- *      (6) For reductions between 0.35 and 0.5, a 2x area map reduction
- *          followed by using pixScaleGeneral() on a 2x larger scalefactor
- *          (which further reduces the image size using bilinear interpolation)
- *          would give a significant speed increase, with little loss of
- *          quality, but this is not enabled as it would break too many tests.
- *          For scaling factors below 0.35, scaling atomically is nearly
- *          as fast as using a cascade of 2x scalings, and gives
- *          better results.
- * 
- */ -PIX * -pixScaleAreaMap(PIX *pix, - l_float32 scalex, - l_float32 scaley) -{ -l_int32 ws, hs, d, wd, hd, wpls, wpld; -l_uint32 *datas, *datad; -l_float32 maxscale; -PIX *pixs, *pixd, *pix1, *pix2, *pix3; - - PROCNAME("pixScaleAreaMap"); - - if (!pix) - return (PIX *)ERROR_PTR("pix not defined", procName, NULL); - d = pixGetDepth(pix); - if (d != 2 && d != 4 && d != 8 && d != 32) - return (PIX *)ERROR_PTR("pix not 2, 4, 8 or 32 bpp", procName, NULL); - maxscale = L_MAX(scalex, scaley); - if (maxscale >= 0.7) { - L_WARNING("scaling factors not < 0.7; do regular scaling\n", procName); - return pixScaleGeneral(pix, scalex, scaley, 0.0, 0); - } - - /* Special cases: 2x, 4x, 8x, 16x reduction */ - if (scalex == 0.5 && scaley == 0.5) - return pixScaleAreaMap2(pix); - if (scalex == 0.25 && scaley == 0.25) { - pix1 = pixScaleAreaMap2(pix); - pixd = pixScaleAreaMap2(pix1); - pixDestroy(&pix1); - return pixd; - } - if (scalex == 0.125 && scaley == 0.125) { - pix1 = pixScaleAreaMap2(pix); - pix2 = pixScaleAreaMap2(pix1); - pixd = pixScaleAreaMap2(pix2); - pixDestroy(&pix1); - pixDestroy(&pix2); - return pixd; - } - if (scalex == 0.0625 && scaley == 0.0625) { - pix1 = pixScaleAreaMap2(pix); - pix2 = pixScaleAreaMap2(pix1); - pix3 = pixScaleAreaMap2(pix2); - pixd = pixScaleAreaMap2(pix3); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - return pixd; - } - -#if 0 /* Not enabled because it breaks too many tests that rely on exact - * pixel matches. */ - /* Special case where it is significantly faster to downscale first - * by 2x, with relatively little degradation in image quality. */ - if (scalex > 0.35 && scalex < 0.5) { - pix1 = pixScaleAreaMap2(pix); - pixd = pixScaleAreaMap(pix1, 2.0 * scalex, 2.0 * scaley); - pixDestroy(&pix1); - return pixd; - } -#endif - - /* Remove colormap if necessary. - * If 2 bpp or 4 bpp gray, convert to 8 bpp */ - if ((d == 2 || d == 4 || d == 8) && pixGetColormap(pix)) { - L_WARNING("pix has colormap; removing\n", procName); - pixs = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC); - d = pixGetDepth(pixs); - } else if (d == 2 || d == 4) { - pixs = pixConvertTo8(pix, FALSE); - d = 8; - } else { - pixs = pixClone(pix); - } - - pixGetDimensions(pixs, &ws, &hs, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - wd = (l_int32)(scalex * (l_float32)ws + 0.5); - hd = (l_int32)(scaley * (l_float32)hs + 0.5); - if (wd < 1 || hd < 1) { - pixDestroy(&pixs); - return (PIX *)ERROR_PTR("pixd too small", procName, NULL); - } - if ((pixd = pixCreate(wd, hd, d)) == NULL) { - pixDestroy(&pixs); - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, scalex, scaley); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - if (d == 8) { - scaleGrayAreaMapLow(datad, wd, hd, wpld, datas, ws, hs, wpls); - } else { /* RGB, d == 32 */ - scaleColorAreaMapLow(datad, wd, hd, wpld, datas, ws, hs, wpls); - if (pixGetSpp(pixs) == 4) - pixScaleAndTransferAlpha(pixd, pixs, scalex, scaley); - } - - pixDestroy(&pixs); - return pixd; -} - - -/*! - * \brief pixScaleAreaMap2() - * - * \param[in] pix 2, 4, 8 or 32 bpp; and 2, 4, 8 bpp with colormap - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This function does an area mapping (average) for 2x
- *          reduction.
- *      (2) This works only on 2, 4, 8 and 32 bpp images.  If there is
- *          a colormap, it is removed by converting to RGB.
- *      (3) Compared to the general pixScaleAreaMap(), for this function
- *          gray processing is about 14x faster and color processing
- *          is about 4x faster.  Consequently, pixScaleAreaMap2() is
- *          incorporated into the general area map scaling function,
- *          for the special cases of 2x, 4x, 8x and 16x reduction.
- * 
- */ -PIX * -pixScaleAreaMap2(PIX *pix) -{ -l_int32 wd, hd, d, wpls, wpld; -l_uint32 *datas, *datad; -PIX *pixs, *pixd; - - PROCNAME("pixScaleAreaMap2"); - - if (!pix) - return (PIX *)ERROR_PTR("pix not defined", procName, NULL); - d = pixGetDepth(pix); - if (d != 2 && d != 4 && d != 8 && d != 32) - return (PIX *)ERROR_PTR("pix not 2, 4, 8 or 32 bpp", procName, NULL); - - /* Remove colormap if necessary. - * If 2 bpp or 4 bpp gray, convert to 8 bpp */ - if ((d == 2 || d == 4 || d == 8) && pixGetColormap(pix)) { - L_WARNING("pix has colormap; removing\n", procName); - pixs = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC); - d = pixGetDepth(pixs); - } else if (d == 2 || d == 4) { - pixs = pixConvertTo8(pix, FALSE); - d = 8; - } else { - pixs = pixClone(pix); - } - - wd = pixGetWidth(pixs) / 2; - hd = pixGetHeight(pixs) / 2; - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - pixd = pixCreate(wd, hd, d); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, 0.5, 0.5); - scaleAreaMapLow2(datad, wd, hd, wpld, datas, d, wpls); - if (pixGetSpp(pixs) == 4) - pixScaleAndTransferAlpha(pixd, pixs, 0.5, 0.5); - pixDestroy(&pixs); - return pixd; -} - - -/*! - * \brief pixScaleAreaMapToSize() - * - * \param[in] pixs 2, 4, 8 or 32 bpp; and 2, 4, 8 bpp with colormap - * \param[in] wd target width; use 0 if using height as target - * \param[in] hd target height; use 0 if using width as target - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) See notes in pixScaleAreaMap().
- *      (2) The output scaled image has the dimension(s) you specify:
- *          - To specify the width with isotropic scaling, set %hd = 0.
- *          - To specify the height with isotropic scaling, set %wd = 0.
- *          - If both %wd and %hd are specified, the image is scaled
- *             (in general, anisotropically) to that size.
- *          - It is an error to set both %wd and %hd to 0.
- * 
- */ -PIX * -pixScaleAreaMapToSize(PIX *pixs, - l_int32 wd, - l_int32 hd) -{ -l_int32 w, h; -l_float32 scalex, scaley; - - PROCNAME("pixScaleAreaMapToSize"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (wd <= 0 && hd <= 0) - return (PIX *)ERROR_PTR("neither wd nor hd > 0", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - if (wd <= 0) { - scaley = (l_float32)hd / (l_float32)h; - scalex = scaley; - } else if (hd <= 0) { - scalex = (l_float32)wd / (l_float32)w; - scaley = scalex; - } else { - scalex = (l_float32)wd / (l_float32)w; - scaley = (l_float32)hd / (l_float32)h; - } - - return pixScaleAreaMap(pixs, scalex, scaley); -} - - -/*------------------------------------------------------------------* - * Binary scaling by closest pixel sampling * - *------------------------------------------------------------------*/ -/*! - * \brief pixScaleBinary() - * - * \param[in] pixs 1 bpp - * \param[in] scalex must be > 0.0 - * \param[in] scaley must be > 0.0 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This function samples from the source without
- *          filtering.  As a result, aliasing will occur for
- *          subsampling (scalex and scaley < 1.0).
- * 
- */ -PIX * -pixScaleBinary(PIX *pixs, - l_float32 scalex, - l_float32 scaley) -{ -l_int32 ws, hs, wpls, wd, hd, wpld; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixScaleBinary"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs must be 1 bpp", procName, NULL); - if (scalex <= 0.0 || scaley <= 0.0) - return (PIX *)ERROR_PTR("scale factor <= 0", procName, NULL); - if (scalex == 1.0 && scaley == 1.0) - return pixCopy(NULL, pixs); - - pixGetDimensions(pixs, &ws, &hs, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - wd = (l_int32)(scalex * (l_float32)ws + 0.5); - hd = (l_int32)(scaley * (l_float32)hs + 0.5); - if ((pixd = pixCreate(wd, hd, 1)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyColormap(pixd, pixs); - pixCopyText(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, scalex, scaley); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - scaleBinaryLow(datad, wd, hd, wpld, datas, ws, hs, wpls); - return pixd; -} - - -/* ================================================================ * - * Low level static functions * - * ================================================================ */ - -/*------------------------------------------------------------------* - * General linear interpolated color scaling * - *------------------------------------------------------------------*/ -/*! - * \brief scaleColorLILow() - * - *
- * Notes:
- *      (1) We choose to divide each pixel into 16 x 16 sub-pixels.
- *          Linear interpolation is equivalent to finding the
- *          fractional area (i.e., number of sub-pixels divided
- *          by 256) associated with each of the four nearest src pixels,
- *          and weighting each pixel value by this fractional area.
- * 
- */ -static void -scaleColorLILow(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas, - l_int32 ws, - l_int32 hs, - l_int32 wpls) -{ -l_int32 i, j, wm2, hm2; -l_int32 xpm, ypm; /* location in src image, to 1/16 of a pixel */ -l_int32 xp, yp, xf, yf; /* src pixel and pixel fraction coordinates */ -l_int32 v00r, v01r, v10r, v11r, v00g, v01g, v10g, v11g; -l_int32 v00b, v01b, v10b, v11b, area00, area01, area10, area11; -l_uint32 pixels1, pixels2, pixels3, pixels4, pixel; -l_uint32 *lines, *lined; -l_float32 scx, scy; - - /* (scx, scy) are scaling factors that are applied to the - * dest coords to get the corresponding src coords. - * We need them because we iterate over dest pixels - * and must find the corresponding set of src pixels. */ - scx = 16. * (l_float32)ws / (l_float32)wd; - scy = 16. * (l_float32)hs / (l_float32)hd; - wm2 = ws - 2; - hm2 = hs - 2; - - /* Iterate over the destination pixels */ - for (i = 0; i < hd; i++) { - ypm = (l_int32)(scy * (l_float32)i); - yp = ypm >> 4; - yf = ypm & 0x0f; - lined = datad + i * wpld; - lines = datas + yp * wpls; - for (j = 0; j < wd; j++) { - xpm = (l_int32)(scx * (l_float32)j); - xp = xpm >> 4; - xf = xpm & 0x0f; - - /* Do bilinear interpolation. This is a simple - * generalization of the calculation in scaleGrayLILow(). - * Without this, we could simply subsample: - * *(lined + j) = *(lines + xp); - * which is faster but gives lousy results! */ - pixels1 = *(lines + xp); - - if (xp > wm2 || yp > hm2) { - if (yp > hm2 && xp <= wm2) { /* pixels near bottom */ - pixels2 = *(lines + xp + 1); - pixels3 = pixels1; - pixels4 = pixels2; - } else if (xp > wm2 && yp <= hm2) { /* pixels near rt side */ - pixels2 = pixels1; - pixels3 = *(lines + wpls + xp); - pixels4 = pixels3; - } else { /* pixels at LR corner */ - pixels4 = pixels3 = pixels2 = pixels1; - } - } else { - pixels2 = *(lines + xp + 1); - pixels3 = *(lines + wpls + xp); - pixels4 = *(lines + wpls + xp + 1); - } - - area00 = (16 - xf) * (16 - yf); - area10 = xf * (16 - yf); - area01 = (16 - xf) * yf; - area11 = xf * yf; - v00r = area00 * ((pixels1 >> L_RED_SHIFT) & 0xff); - v00g = area00 * ((pixels1 >> L_GREEN_SHIFT) & 0xff); - v00b = area00 * ((pixels1 >> L_BLUE_SHIFT) & 0xff); - v10r = area10 * ((pixels2 >> L_RED_SHIFT) & 0xff); - v10g = area10 * ((pixels2 >> L_GREEN_SHIFT) & 0xff); - v10b = area10 * ((pixels2 >> L_BLUE_SHIFT) & 0xff); - v01r = area01 * ((pixels3 >> L_RED_SHIFT) & 0xff); - v01g = area01 * ((pixels3 >> L_GREEN_SHIFT) & 0xff); - v01b = area01 * ((pixels3 >> L_BLUE_SHIFT) & 0xff); - v11r = area11 * ((pixels4 >> L_RED_SHIFT) & 0xff); - v11g = area11 * ((pixels4 >> L_GREEN_SHIFT) & 0xff); - v11b = area11 * ((pixels4 >> L_BLUE_SHIFT) & 0xff); - pixel = (((v00r + v10r + v01r + v11r + 128) << 16) & 0xff000000) | - (((v00g + v10g + v01g + v11g + 128) << 8) & 0x00ff0000) | - ((v00b + v10b + v01b + v11b + 128) & 0x0000ff00); - *(lined + j) = pixel; - } - } -} - - -/*------------------------------------------------------------------* - * General linear interpolated gray scaling * - *------------------------------------------------------------------*/ -/*! - * \brief scaleGrayLILow() - * - *
- * Notes:
- *      (1) We choose to divide each pixel into 16 x 16 sub-pixels.
- *          Linear interpolation is equivalent to finding the
- *          fractional area (i.e., number of sub-pixels divided
- *          by 256) associated with each of the four nearest src pixels,
- *          and weighting each pixel value by this fractional area.
- * 
- */ -static void -scaleGrayLILow(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas, - l_int32 ws, - l_int32 hs, - l_int32 wpls) -{ -l_int32 i, j, wm2, hm2; -l_int32 xpm, ypm; /* location in src image, to 1/16 of a pixel */ -l_int32 xp, yp, xf, yf; /* src pixel and pixel fraction coordinates */ -l_int32 v00, v01, v10, v11, v00_val, v01_val, v10_val, v11_val; -l_uint8 val; -l_uint32 *lines, *lined; -l_float32 scx, scy; - - /* (scx, scy) are scaling factors that are applied to the - * dest coords to get the corresponding src coords. - * We need them because we iterate over dest pixels - * and must find the corresponding set of src pixels. */ - scx = 16. * (l_float32)ws / (l_float32)wd; - scy = 16. * (l_float32)hs / (l_float32)hd; - wm2 = ws - 2; - hm2 = hs - 2; - - /* Iterate over the destination pixels */ - for (i = 0; i < hd; i++) { - ypm = (l_int32)(scy * (l_float32)i); - yp = ypm >> 4; - yf = ypm & 0x0f; - lined = datad + i * wpld; - lines = datas + yp * wpls; - for (j = 0; j < wd; j++) { - xpm = (l_int32)(scx * (l_float32)j); - xp = xpm >> 4; - xf = xpm & 0x0f; - - /* Do bilinear interpolation. Without this, we could - * simply subsample: - * SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xp)); - * which is faster but gives lousy results! */ - v00_val = GET_DATA_BYTE(lines, xp); - if (xp > wm2 || yp > hm2) { - if (yp > hm2 && xp <= wm2) { /* pixels near bottom */ - v01_val = v00_val; - v10_val = GET_DATA_BYTE(lines, xp + 1); - v11_val = v10_val; - } else if (xp > wm2 && yp <= hm2) { /* pixels near rt side */ - v01_val = GET_DATA_BYTE(lines + wpls, xp); - v10_val = v00_val; - v11_val = v01_val; - } else { /* pixels at LR corner */ - v10_val = v01_val = v11_val = v00_val; - } - } else { - v10_val = GET_DATA_BYTE(lines, xp + 1); - v01_val = GET_DATA_BYTE(lines + wpls, xp); - v11_val = GET_DATA_BYTE(lines + wpls, xp + 1); - } - - v00 = (16 - xf) * (16 - yf) * v00_val; - v10 = xf * (16 - yf) * v10_val; - v01 = (16 - xf) * yf * v01_val; - v11 = xf * yf * v11_val; - - val = (l_uint8)((v00 + v01 + v10 + v11 + 128) / 256); - SET_DATA_BYTE(lined, j, val); - } - } -} - - -/*------------------------------------------------------------------* - * 2x linear interpolated color scaling * - *------------------------------------------------------------------*/ -/*! - * \brief scaleColor2xLILow() - * - *
- * Notes:
- *      (1) This is a special case of 2x expansion by linear
- *          interpolation.  Each src pixel contains 4 dest pixels.
- *          The 4 dest pixels in src pixel 1 are numbered at
- *          their UL corners.  The 4 dest pixels in src pixel 1
- *          are related to that src pixel and its 3 neighboring
- *          src pixels as follows:
- *
- *             1-----2-----|-----|-----|
- *             |     |     |     |     |
- *             |     |     |     |     |
- *  src 1 -->  3-----4-----|     |     |  <-- src 2
- *             |     |     |     |     |
- *             |     |     |     |     |
- *             |-----|-----|-----|-----|
- *             |     |     |     |     |
- *             |     |     |     |     |
- *  src 3 -->  |     |     |     |     |  <-- src 4
- *             |     |     |     |     |
- *             |     |     |     |     |
- *             |-----|-----|-----|-----|
- *
- *           dest      src
- *           ----      ---
- *           dp1    =  sp1
- *           dp2    =  (sp1 + sp2) / 2
- *           dp3    =  (sp1 + sp3) / 2
- *           dp4    =  (sp1 + sp2 + sp3 + sp4) / 4
- *
- *      (2) We iterate over the src pixels, and unroll the calculation
- *          for each set of 4 dest pixels corresponding to that src
- *          pixel, caching pixels for the next src pixel whenever possible.
- *          The method is exactly analogous to the one we use for
- *          scaleGray2xLILow() and its line version.
- * 
- */ -static void -scaleColor2xLILow(l_uint32 *datad, - l_int32 wpld, - l_uint32 *datas, - l_int32 ws, - l_int32 hs, - l_int32 wpls) -{ -l_int32 i, hsm; -l_uint32 *lines, *lined; - - hsm = hs - 1; - - /* We're taking 2 src and 2 dest lines at a time, - * and for each src line, we're computing 2 dest lines. - * Call these 2 dest lines: destline1 and destline2. - * The first src line is used for destline 1. - * On all but the last src line, both src lines are - * used in the linear interpolation for destline2. - * On the last src line, both destline1 and destline2 - * are computed using only that src line (because there - * isn't a lower src line). */ - - /* iterate over all but the last src line */ - for (i = 0; i < hsm; i++) { - lines = datas + i * wpls; - lined = datad + 2 * i * wpld; - scaleColor2xLILineLow(lined, wpld, lines, ws, wpls, 0); - } - - /* last src line */ - lines = datas + hsm * wpls; - lined = datad + 2 * hsm * wpld; - scaleColor2xLILineLow(lined, wpld, lines, ws, wpls, 1); -} - - -/*! - * \brief scaleColor2xLILineLow() - * - * \param[in] lined ptr to top destline, to be made from current src line - * \param[in] wpld - * \param[in] lines ptr to current src line - * \param[in] ws - * \param[in] wpls - * \param[in] lastlineflag 1 if last src line; 0 otherwise - * \return void - */ -static void -scaleColor2xLILineLow(l_uint32 *lined, - l_int32 wpld, - l_uint32 *lines, - l_int32 ws, - l_int32 wpls, - l_int32 lastlineflag) -{ -l_int32 j, jd, wsm; -l_uint32 rval1, rval2, rval3, rval4, gval1, gval2, gval3, gval4; -l_uint32 bval1, bval2, bval3, bval4; -l_uint32 pixels1, pixels2, pixels3, pixels4, pixel; -l_uint32 *linesp, *linedp; - - wsm = ws - 1; - - if (lastlineflag == 0) { - linesp = lines + wpls; - linedp = lined + wpld; - pixels1 = *lines; - pixels3 = *linesp; - - /* initialize with v(2) and v(4) */ - rval2 = pixels1 >> 24; - gval2 = (pixels1 >> 16) & 0xff; - bval2 = (pixels1 >> 8) & 0xff; - rval4 = pixels3 >> 24; - gval4 = (pixels3 >> 16) & 0xff; - bval4 = (pixels3 >> 8) & 0xff; - - for (j = 0, jd = 0; j < wsm; j++, jd += 2) { - /* shift in previous src values */ - rval1 = rval2; - gval1 = gval2; - bval1 = bval2; - rval3 = rval4; - gval3 = gval4; - bval3 = bval4; - /* get new src values */ - pixels2 = *(lines + j + 1); - pixels4 = *(linesp + j + 1); - rval2 = pixels2 >> 24; - gval2 = (pixels2 >> 16) & 0xff; - bval2 = (pixels2 >> 8) & 0xff; - rval4 = pixels4 >> 24; - gval4 = (pixels4 >> 16) & 0xff; - bval4 = (pixels4 >> 8) & 0xff; - /* save dest values */ - pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8); - *(lined + jd) = pixel; /* pix 1 */ - pixel = ((((rval1 + rval2) << 23) & 0xff000000) | - (((gval1 + gval2) << 15) & 0x00ff0000) | - (((bval1 + bval2) << 7) & 0x0000ff00)); - *(lined + jd + 1) = pixel; /* pix 2 */ - pixel = ((((rval1 + rval3) << 23) & 0xff000000) | - (((gval1 + gval3) << 15) & 0x00ff0000) | - (((bval1 + bval3) << 7) & 0x0000ff00)); - *(linedp + jd) = pixel; /* pix 3 */ - pixel = ((((rval1 + rval2 + rval3 + rval4) << 22) & 0xff000000) | - (((gval1 + gval2 + gval3 + gval4) << 14) & 0x00ff0000) | - (((bval1 + bval2 + bval3 + bval4) << 6) & 0x0000ff00)); - *(linedp + jd + 1) = pixel; /* pix 4 */ - } - /* last src pixel on line */ - rval1 = rval2; - gval1 = gval2; - bval1 = bval2; - rval3 = rval4; - gval3 = gval4; - bval3 = bval4; - pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8); - *(lined + 2 * wsm) = pixel; /* pix 1 */ - *(lined + 2 * wsm + 1) = pixel; /* pix 2 */ - pixel = ((((rval1 + rval3) << 23) & 0xff000000) | - (((gval1 + gval3) << 15) & 0x00ff0000) | - (((bval1 + bval3) << 7) & 0x0000ff00)); - *(linedp + 2 * wsm) = pixel; /* pix 3 */ - *(linedp + 2 * wsm + 1) = pixel; /* pix 4 */ - } else { /* last row of src pixels: lastlineflag == 1 */ - linedp = lined + wpld; - pixels2 = *lines; - rval2 = pixels2 >> 24; - gval2 = (pixels2 >> 16) & 0xff; - bval2 = (pixels2 >> 8) & 0xff; - for (j = 0, jd = 0; j < wsm; j++, jd += 2) { - rval1 = rval2; - gval1 = gval2; - bval1 = bval2; - pixels2 = *(lines + j + 1); - rval2 = pixels2 >> 24; - gval2 = (pixels2 >> 16) & 0xff; - bval2 = (pixels2 >> 8) & 0xff; - pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8); - *(lined + jd) = pixel; /* pix 1 */ - *(linedp + jd) = pixel; /* pix 2 */ - pixel = ((((rval1 + rval2) << 23) & 0xff000000) | - (((gval1 + gval2) << 15) & 0x00ff0000) | - (((bval1 + bval2) << 7) & 0x0000ff00)); - *(lined + jd + 1) = pixel; /* pix 3 */ - *(linedp + jd + 1) = pixel; /* pix 4 */ - } - rval1 = rval2; - gval1 = gval2; - bval1 = bval2; - pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8); - *(lined + 2 * wsm) = pixel; /* pix 1 */ - *(lined + 2 * wsm + 1) = pixel; /* pix 2 */ - *(linedp + 2 * wsm) = pixel; /* pix 3 */ - *(linedp + 2 * wsm + 1) = pixel; /* pix 4 */ - } -} - - -/*------------------------------------------------------------------* - * 2x linear interpolated gray scaling * - *------------------------------------------------------------------*/ -/*! - * \brief scaleGray2xLILow() - * - *
- * Notes:
- *      (1) This is a special case of 2x expansion by linear
- *          interpolation.  Each src pixel contains 4 dest pixels.
- *          The 4 dest pixels in src pixel 1 are numbered at
- *          their UL corners.  The 4 dest pixels in src pixel 1
- *          are related to that src pixel and its 3 neighboring
- *          src pixels as follows:
- *
- *             1-----2-----|-----|-----|
- *             |     |     |     |     |
- *             |     |     |     |     |
- *  src 1 -->  3-----4-----|     |     |  <-- src 2
- *             |     |     |     |     |
- *             |     |     |     |     |
- *             |-----|-----|-----|-----|
- *             |     |     |     |     |
- *             |     |     |     |     |
- *  src 3 -->  |     |     |     |     |  <-- src 4
- *             |     |     |     |     |
- *             |     |     |     |     |
- *             |-----|-----|-----|-----|
- *
- *           dest      src
- *           ----      ---
- *           dp1    =  sp1
- *           dp2    =  (sp1 + sp2) / 2
- *           dp3    =  (sp1 + sp3) / 2
- *           dp4    =  (sp1 + sp2 + sp3 + sp4) / 4
- *
- *      (2) We iterate over the src pixels, and unroll the calculation
- *          for each set of 4 dest pixels corresponding to that src
- *          pixel, caching pixels for the next src pixel whenever possible.
- * 
- */ -static void -scaleGray2xLILow(l_uint32 *datad, - l_int32 wpld, - l_uint32 *datas, - l_int32 ws, - l_int32 hs, - l_int32 wpls) -{ -l_int32 i, hsm; -l_uint32 *lines, *lined; - - hsm = hs - 1; - - /* We're taking 2 src and 2 dest lines at a time, - * and for each src line, we're computing 2 dest lines. - * Call these 2 dest lines: destline1 and destline2. - * The first src line is used for destline 1. - * On all but the last src line, both src lines are - * used in the linear interpolation for destline2. - * On the last src line, both destline1 and destline2 - * are computed using only that src line (because there - * isn't a lower src line). */ - - /* iterate over all but the last src line */ - for (i = 0; i < hsm; i++) { - lines = datas + i * wpls; - lined = datad + 2 * i * wpld; - scaleGray2xLILineLow(lined, wpld, lines, ws, wpls, 0); - } - - /* last src line */ - lines = datas + hsm * wpls; - lined = datad + 2 * hsm * wpld; - scaleGray2xLILineLow(lined, wpld, lines, ws, wpls, 1); -} - - -/*! - * \brief scaleGray2xLILineLow() - * - * \param[in] lined ptr to top destline, to be made from current src line - * \param[in] wpld - * \param[in] lines ptr to current src line - * \param[in] ws - * \param[in] wpls - * \param[in] lastlineflag 1 if last src line; 0 otherwise - * \return void - */ -static void -scaleGray2xLILineLow(l_uint32 *lined, - l_int32 wpld, - l_uint32 *lines, - l_int32 ws, - l_int32 wpls, - l_int32 lastlineflag) -{ -l_int32 j, jd, wsm, w; -l_uint32 sval1, sval2, sval3, sval4; -l_uint32 *linesp, *linedp; -l_uint32 words, wordsp, wordd, worddp; - - wsm = ws - 1; - - if (lastlineflag == 0) { - linesp = lines + wpls; - linedp = lined + wpld; - - /* Unroll the loop 4x and work on full words */ - words = lines[0]; - wordsp = linesp[0]; - sval2 = (words >> 24) & 0xff; - sval4 = (wordsp >> 24) & 0xff; - for (j = 0, jd = 0, w = 0; j + 3 < wsm; j += 4, jd += 8, w++) { - /* At the top of the loop, - * words == lines[w], wordsp == linesp[w] - * and the top bytes of those have been loaded into - * sval2 and sval4. */ - sval1 = sval2; - sval2 = (words >> 16) & 0xff; - sval3 = sval4; - sval4 = (wordsp >> 16) & 0xff; - wordd = (sval1 << 24) | (((sval1 + sval2) >> 1) << 16); - worddp = (((sval1 + sval3) >> 1) << 24) | - (((sval1 + sval2 + sval3 + sval4) >> 2) << 16); - - sval1 = sval2; - sval2 = (words >> 8) & 0xff; - sval3 = sval4; - sval4 = (wordsp >> 8) & 0xff; - wordd |= (sval1 << 8) | ((sval1 + sval2) >> 1); - worddp |= (((sval1 + sval3) >> 1) << 8) | - ((sval1 + sval2 + sval3 + sval4) >> 2); - lined[w * 2] = wordd; - linedp[w * 2] = worddp; - - sval1 = sval2; - sval2 = words & 0xff; - sval3 = sval4; - sval4 = wordsp & 0xff; - wordd = (sval1 << 24) | /* pix 1 */ - (((sval1 + sval2) >> 1) << 16); /* pix 2 */ - worddp = (((sval1 + sval3) >> 1) << 24) | /* pix 3 */ - (((sval1 + sval2 + sval3 + sval4) >> 2) << 16); /* pix 4 */ - - /* Load the next word as we need its first byte */ - words = lines[w + 1]; - wordsp = linesp[w + 1]; - sval1 = sval2; - sval2 = (words >> 24) & 0xff; - sval3 = sval4; - sval4 = (wordsp >> 24) & 0xff; - wordd |= (sval1 << 8) | /* pix 1 */ - ((sval1 + sval2) >> 1); /* pix 2 */ - worddp |= (((sval1 + sval3) >> 1) << 8) | /* pix 3 */ - ((sval1 + sval2 + sval3 + sval4) >> 2); /* pix 4 */ - lined[w * 2 + 1] = wordd; - linedp[w * 2 + 1] = worddp; - } - - /* Finish up the last word */ - for (; j < wsm; j++, jd += 2) { - sval1 = sval2; - sval3 = sval4; - sval2 = GET_DATA_BYTE(lines, j + 1); - sval4 = GET_DATA_BYTE(linesp, j + 1); - SET_DATA_BYTE(lined, jd, sval1); /* pix 1 */ - SET_DATA_BYTE(lined, jd + 1, (sval1 + sval2) / 2); /* pix 2 */ - SET_DATA_BYTE(linedp, jd, (sval1 + sval3) / 2); /* pix 3 */ - SET_DATA_BYTE(linedp, jd + 1, - (sval1 + sval2 + sval3 + sval4) / 4); /* pix 4 */ - } - sval1 = sval2; - sval3 = sval4; - SET_DATA_BYTE(lined, 2 * wsm, sval1); /* pix 1 */ - SET_DATA_BYTE(lined, 2 * wsm + 1, sval1); /* pix 2 */ - SET_DATA_BYTE(linedp, 2 * wsm, (sval1 + sval3) / 2); /* pix 3 */ - SET_DATA_BYTE(linedp, 2 * wsm + 1, (sval1 + sval3) / 2); /* pix 4 */ - -#if DEBUG_UNROLLING -#define CHECK_BYTE(a, b, c) if (GET_DATA_BYTE(a, b) != c) {\ - lept_stderr("Error: mismatch at %d, %d vs %d\n", \ - j, GET_DATA_BYTE(a, b), c); } - - sval2 = GET_DATA_BYTE(lines, 0); - sval4 = GET_DATA_BYTE(linesp, 0); - for (j = 0, jd = 0; j < wsm; j++, jd += 2) { - sval1 = sval2; - sval3 = sval4; - sval2 = GET_DATA_BYTE(lines, j + 1); - sval4 = GET_DATA_BYTE(linesp, j + 1); - CHECK_BYTE(lined, jd, sval1); /* pix 1 */ - CHECK_BYTE(lined, jd + 1, (sval1 + sval2) / 2); /* pix 2 */ - CHECK_BYTE(linedp, jd, (sval1 + sval3) / 2); /* pix 3 */ - CHECK_BYTE(linedp, jd + 1, - (sval1 + sval2 + sval3 + sval4) / 4); /* pix 4 */ - } - sval1 = sval2; - sval3 = sval4; - CHECK_BYTE(lined, 2 * wsm, sval1); /* pix 1 */ - CHECK_BYTE(lined, 2 * wsm + 1, sval1); /* pix 2 */ - CHECK_BYTE(linedp, 2 * wsm, (sval1 + sval3) / 2); /* pix 3 */ - CHECK_BYTE(linedp, 2 * wsm + 1, (sval1 + sval3) / 2); /* pix 4 */ -#undef CHECK_BYTE -#endif - } else { /* last row of src pixels: lastlineflag == 1 */ - linedp = lined + wpld; - sval2 = GET_DATA_BYTE(lines, 0); - for (j = 0, jd = 0; j < wsm; j++, jd += 2) { - sval1 = sval2; - sval2 = GET_DATA_BYTE(lines, j + 1); - SET_DATA_BYTE(lined, jd, sval1); /* pix 1 */ - SET_DATA_BYTE(linedp, jd, sval1); /* pix 3 */ - SET_DATA_BYTE(lined, jd + 1, (sval1 + sval2) / 2); /* pix 2 */ - SET_DATA_BYTE(linedp, jd + 1, (sval1 + sval2) / 2); /* pix 4 */ - } - sval1 = sval2; - SET_DATA_BYTE(lined, 2 * wsm, sval1); /* pix 1 */ - SET_DATA_BYTE(lined, 2 * wsm + 1, sval1); /* pix 2 */ - SET_DATA_BYTE(linedp, 2 * wsm, sval1); /* pix 3 */ - SET_DATA_BYTE(linedp, 2 * wsm + 1, sval1); /* pix 4 */ - } -} - - -/*------------------------------------------------------------------* - * 4x linear interpolated gray scaling * - *------------------------------------------------------------------*/ -/*! - * \brief scaleGray4xLILow() - * - *
- * Notes:
- *      (1) This is a special case of 4x expansion by linear
- *          interpolation.  Each src pixel contains 16 dest pixels.
- *          The 16 dest pixels in src pixel 1 are numbered at
- *          their UL corners.  The 16 dest pixels in src pixel 1
- *          are related to that src pixel and its 3 neighboring
- *          src pixels as follows:
- *
- *             1---2---3---4---|---|---|---|---|
- *             |   |   |   |   |   |   |   |   |
- *             5---6---7---8---|---|---|---|---|
- *             |   |   |   |   |   |   |   |   |
- *  src 1 -->  9---a---b---c---|---|---|---|---|  <-- src 2
- *             |   |   |   |   |   |   |   |   |
- *             d---e---f---g---|---|---|---|---|
- *             |   |   |   |   |   |   |   |   |
- *             |===|===|===|===|===|===|===|===|
- *             |   |   |   |   |   |   |   |   |
- *             |---|---|---|---|---|---|---|---|
- *             |   |   |   |   |   |   |   |   |
- *  src 3 -->  |---|---|---|---|---|---|---|---|  <-- src 4
- *             |   |   |   |   |   |   |   |   |
- *             |---|---|---|---|---|---|---|---|
- *             |   |   |   |   |   |   |   |   |
- *             |---|---|---|---|---|---|---|---|
- *
- *           dest      src
- *           ----      ---
- *           dp1    =  sp1
- *           dp2    =  (3 * sp1 + sp2) / 4
- *           dp3    =  (sp1 + sp2) / 2
- *           dp4    =  (sp1 + 3 * sp2) / 4
- *           dp5    =  (3 * sp1 + sp3) / 4
- *           dp6    =  (9 * sp1 + 3 * sp2 + 3 * sp3 + sp4) / 16
- *           dp7    =  (3 * sp1 + 3 * sp2 + sp3 + sp4) / 8
- *           dp8    =  (3 * sp1 + 9 * sp2 + 1 * sp3 + 3 * sp4) / 16
- *           dp9    =  (sp1 + sp3) / 2
- *           dp10   =  (3 * sp1 + sp2 + 3 * sp3 + sp4) / 8
- *           dp11   =  (sp1 + sp2 + sp3 + sp4) / 4
- *           dp12   =  (sp1 + 3 * sp2 + sp3 + 3 * sp4) / 8
- *           dp13   =  (sp1 + 3 * sp3) / 4
- *           dp14   =  (3 * sp1 + sp2 + 9 * sp3 + 3 * sp4) / 16
- *           dp15   =  (sp1 + sp2 + 3 * sp3 + 3 * sp4) / 8
- *           dp16   =  (sp1 + 3 * sp2 + 3 * sp3 + 9 * sp4) / 16
- *
- *      (2) We iterate over the src pixels, and unroll the calculation
- *          for each set of 16 dest pixels corresponding to that src
- *          pixel, caching pixels for the next src pixel whenever possible.
- * 
- */ -static void -scaleGray4xLILow(l_uint32 *datad, - l_int32 wpld, - l_uint32 *datas, - l_int32 ws, - l_int32 hs, - l_int32 wpls) -{ -l_int32 i, hsm; -l_uint32 *lines, *lined; - - hsm = hs - 1; - - /* We're taking 2 src and 4 dest lines at a time, - * and for each src line, we're computing 4 dest lines. - * Call these 4 dest lines: destline1 - destline4. - * The first src line is used for destline 1. - * Two src lines are used for all other dest lines, - * except for the last 4 dest lines, which are computed - * using only the last src line. */ - - /* iterate over all but the last src line */ - for (i = 0; i < hsm; i++) { - lines = datas + i * wpls; - lined = datad + 4 * i * wpld; - scaleGray4xLILineLow(lined, wpld, lines, ws, wpls, 0); - } - - /* last src line */ - lines = datas + hsm * wpls; - lined = datad + 4 * hsm * wpld; - scaleGray4xLILineLow(lined, wpld, lines, ws, wpls, 1); -} - - -/*! - * \brief scaleGray4xLILineLow() - * - * \param[in] lined ptr to top destline, to be made from current src line - * \param[in] wpld - * \param[in] lines ptr to current src line - * \param[in] ws - * \param[in] wpls - * \param[in] lastlineflag 1 if last src line; 0 otherwise - * \return void - */ -static void -scaleGray4xLILineLow(l_uint32 *lined, - l_int32 wpld, - l_uint32 *lines, - l_int32 ws, - l_int32 wpls, - l_int32 lastlineflag) -{ -l_int32 j, jd, wsm, wsm4; -l_int32 s1, s2, s3, s4, s1t, s2t, s3t, s4t; -l_uint32 *linesp, *linedp1, *linedp2, *linedp3; - - wsm = ws - 1; - wsm4 = 4 * wsm; - - if (lastlineflag == 0) { - linesp = lines + wpls; - linedp1 = lined + wpld; - linedp2 = lined + 2 * wpld; - linedp3 = lined + 3 * wpld; - s2 = GET_DATA_BYTE(lines, 0); - s4 = GET_DATA_BYTE(linesp, 0); - for (j = 0, jd = 0; j < wsm; j++, jd += 4) { - s1 = s2; - s3 = s4; - s2 = GET_DATA_BYTE(lines, j + 1); - s4 = GET_DATA_BYTE(linesp, j + 1); - s1t = 3 * s1; - s2t = 3 * s2; - s3t = 3 * s3; - s4t = 3 * s4; - SET_DATA_BYTE(lined, jd, s1); /* d1 */ - SET_DATA_BYTE(lined, jd + 1, (s1t + s2) / 4); /* d2 */ - SET_DATA_BYTE(lined, jd + 2, (s1 + s2) / 2); /* d3 */ - SET_DATA_BYTE(lined, jd + 3, (s1 + s2t) / 4); /* d4 */ - SET_DATA_BYTE(linedp1, jd, (s1t + s3) / 4); /* d5 */ - SET_DATA_BYTE(linedp1, jd + 1, (9*s1 + s2t + s3t + s4) / 16); /*d6*/ - SET_DATA_BYTE(linedp1, jd + 2, (s1t + s2t + s3 + s4) / 8); /* d7 */ - SET_DATA_BYTE(linedp1, jd + 3, (s1t + 9*s2 + s3 + s4t) / 16);/*d8*/ - SET_DATA_BYTE(linedp2, jd, (s1 + s3) / 2); /* d9 */ - SET_DATA_BYTE(linedp2, jd + 1, (s1t + s2 + s3t + s4) / 8);/* d10 */ - SET_DATA_BYTE(linedp2, jd + 2, (s1 + s2 + s3 + s4) / 4); /* d11 */ - SET_DATA_BYTE(linedp2, jd + 3, (s1 + s2t + s3 + s4t) / 8);/* d12 */ - SET_DATA_BYTE(linedp3, jd, (s1 + s3t) / 4); /* d13 */ - SET_DATA_BYTE(linedp3, jd + 1, (s1t + s2 + 9*s3 + s4t) / 16);/*d14*/ - SET_DATA_BYTE(linedp3, jd + 2, (s1 + s2 + s3t + s4t) / 8); /* d15 */ - SET_DATA_BYTE(linedp3, jd + 3, (s1 + s2t + s3t + 9*s4) / 16);/*d16*/ - } - s1 = s2; - s3 = s4; - s1t = 3 * s1; - s3t = 3 * s3; - SET_DATA_BYTE(lined, wsm4, s1); /* d1 */ - SET_DATA_BYTE(lined, wsm4 + 1, s1); /* d2 */ - SET_DATA_BYTE(lined, wsm4 + 2, s1); /* d3 */ - SET_DATA_BYTE(lined, wsm4 + 3, s1); /* d4 */ - SET_DATA_BYTE(linedp1, wsm4, (s1t + s3) / 4); /* d5 */ - SET_DATA_BYTE(linedp1, wsm4 + 1, (s1t + s3) / 4); /* d6 */ - SET_DATA_BYTE(linedp1, wsm4 + 2, (s1t + s3) / 4); /* d7 */ - SET_DATA_BYTE(linedp1, wsm4 + 3, (s1t + s3) / 4); /* d8 */ - SET_DATA_BYTE(linedp2, wsm4, (s1 + s3) / 2); /* d9 */ - SET_DATA_BYTE(linedp2, wsm4 + 1, (s1 + s3) / 2); /* d10 */ - SET_DATA_BYTE(linedp2, wsm4 + 2, (s1 + s3) / 2); /* d11 */ - SET_DATA_BYTE(linedp2, wsm4 + 3, (s1 + s3) / 2); /* d12 */ - SET_DATA_BYTE(linedp3, wsm4, (s1 + s3t) / 4); /* d13 */ - SET_DATA_BYTE(linedp3, wsm4 + 1, (s1 + s3t) / 4); /* d14 */ - SET_DATA_BYTE(linedp3, wsm4 + 2, (s1 + s3t) / 4); /* d15 */ - SET_DATA_BYTE(linedp3, wsm4 + 3, (s1 + s3t) / 4); /* d16 */ - } else { /* last row of src pixels: lastlineflag == 1 */ - linedp1 = lined + wpld; - linedp2 = lined + 2 * wpld; - linedp3 = lined + 3 * wpld; - s2 = GET_DATA_BYTE(lines, 0); - for (j = 0, jd = 0; j < wsm; j++, jd += 4) { - s1 = s2; - s2 = GET_DATA_BYTE(lines, j + 1); - s1t = 3 * s1; - s2t = 3 * s2; - SET_DATA_BYTE(lined, jd, s1); /* d1 */ - SET_DATA_BYTE(lined, jd + 1, (s1t + s2) / 4 ); /* d2 */ - SET_DATA_BYTE(lined, jd + 2, (s1 + s2) / 2 ); /* d3 */ - SET_DATA_BYTE(lined, jd + 3, (s1 + s2t) / 4 ); /* d4 */ - SET_DATA_BYTE(linedp1, jd, s1); /* d5 */ - SET_DATA_BYTE(linedp1, jd + 1, (s1t + s2) / 4 ); /* d6 */ - SET_DATA_BYTE(linedp1, jd + 2, (s1 + s2) / 2 ); /* d7 */ - SET_DATA_BYTE(linedp1, jd + 3, (s1 + s2t) / 4 ); /* d8 */ - SET_DATA_BYTE(linedp2, jd, s1); /* d9 */ - SET_DATA_BYTE(linedp2, jd + 1, (s1t + s2) / 4 ); /* d10 */ - SET_DATA_BYTE(linedp2, jd + 2, (s1 + s2) / 2 ); /* d11 */ - SET_DATA_BYTE(linedp2, jd + 3, (s1 + s2t) / 4 ); /* d12 */ - SET_DATA_BYTE(linedp3, jd, s1); /* d13 */ - SET_DATA_BYTE(linedp3, jd + 1, (s1t + s2) / 4 ); /* d14 */ - SET_DATA_BYTE(linedp3, jd + 2, (s1 + s2) / 2 ); /* d15 */ - SET_DATA_BYTE(linedp3, jd + 3, (s1 + s2t) / 4 ); /* d16 */ - } - s1 = s2; - SET_DATA_BYTE(lined, wsm4, s1); /* d1 */ - SET_DATA_BYTE(lined, wsm4 + 1, s1); /* d2 */ - SET_DATA_BYTE(lined, wsm4 + 2, s1); /* d3 */ - SET_DATA_BYTE(lined, wsm4 + 3, s1); /* d4 */ - SET_DATA_BYTE(linedp1, wsm4, s1); /* d5 */ - SET_DATA_BYTE(linedp1, wsm4 + 1, s1); /* d6 */ - SET_DATA_BYTE(linedp1, wsm4 + 2, s1); /* d7 */ - SET_DATA_BYTE(linedp1, wsm4 + 3, s1); /* d8 */ - SET_DATA_BYTE(linedp2, wsm4, s1); /* d9 */ - SET_DATA_BYTE(linedp2, wsm4 + 1, s1); /* d10 */ - SET_DATA_BYTE(linedp2, wsm4 + 2, s1); /* d11 */ - SET_DATA_BYTE(linedp2, wsm4 + 3, s1); /* d12 */ - SET_DATA_BYTE(linedp3, wsm4, s1); /* d13 */ - SET_DATA_BYTE(linedp3, wsm4 + 1, s1); /* d14 */ - SET_DATA_BYTE(linedp3, wsm4 + 2, s1); /* d15 */ - SET_DATA_BYTE(linedp3, wsm4 + 3, s1); /* d16 */ - } -} - - -/*------------------------------------------------------------------* - * Grayscale and color scaling by closest pixel sampling * - *------------------------------------------------------------------*/ -/*! - * \brief scaleBySamplingLow() - * - *
- * Notes:
- *      (1) The dest must be cleared prior to this operation,
- *          and we clear it here in the low-level code.
- *      (2) We reuse dest pixels and dest pixel rows whenever
- *          possible.  This speeds the upscaling; downscaling
- *          is done by strict subsampling and is unaffected.
- *      (3) Because we are sampling and not interpolating, this
- *          routine works directly, without conversion to full
- *          RGB color, for 2, 4 or 8 bpp palette color images.
- * 
- */ -static l_int32 -scaleBySamplingLow(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas, - l_int32 ws, - l_int32 hs, - l_int32 d, - l_int32 wpls) -{ -l_int32 i, j; -l_int32 xs, prevxs, sval; -l_int32 *srow, *scol; -l_uint32 csval; -l_uint32 *lines, *prevlines, *lined, *prevlined; -l_float32 wratio, hratio; - - PROCNAME("scaleBySamplingLow"); - - if (d != 2 && d != 4 && d !=8 && d != 16 && d != 32) - return ERROR_INT("pixel depth not supported", procName, 1); - - /* Clear dest */ - memset(datad, 0, 4LL * hd * wpld); - - /* the source row corresponding to dest row i ==> srow[i] - * the source col corresponding to dest col j ==> scol[j] */ - if ((srow = (l_int32 *)LEPT_CALLOC(hd, sizeof(l_int32))) == NULL) - return ERROR_INT("srow not made", procName, 1); - if ((scol = (l_int32 *)LEPT_CALLOC(wd, sizeof(l_int32))) == NULL) { - LEPT_FREE(srow); - return ERROR_INT("scol not made", procName, 1); - } - - wratio = (l_float32)ws / (l_float32)wd; - hratio = (l_float32)hs / (l_float32)hd; - for (i = 0; i < hd; i++) - srow[i] = L_MIN((l_int32)(hratio * i + 0.5), hs - 1); - for (j = 0; j < wd; j++) - scol[j] = L_MIN((l_int32)(wratio * j + 0.5), ws - 1); - - prevlines = NULL; - for (i = 0; i < hd; i++) { - lines = datas + srow[i] * wpls; - lined = datad + i * wpld; - if (lines != prevlines) { /* make dest from new source row */ - prevxs = -1; - sval = 0; - csval = 0; - if (d == 2) { - for (j = 0; j < wd; j++) { - xs = scol[j]; - if (xs != prevxs) { /* get dest pix from source col */ - sval = GET_DATA_DIBIT(lines, xs); - SET_DATA_DIBIT(lined, j, sval); - prevxs = xs; - } else { /* copy prev dest pix */ - SET_DATA_DIBIT(lined, j, sval); - } - } - } else if (d == 4) { - for (j = 0; j < wd; j++) { - xs = scol[j]; - if (xs != prevxs) { /* get dest pix from source col */ - sval = GET_DATA_QBIT(lines, xs); - SET_DATA_QBIT(lined, j, sval); - prevxs = xs; - } else { /* copy prev dest pix */ - SET_DATA_QBIT(lined, j, sval); - } - } - } else if (d == 8) { - for (j = 0; j < wd; j++) { - xs = scol[j]; - if (xs != prevxs) { /* get dest pix from source col */ - sval = GET_DATA_BYTE(lines, xs); - SET_DATA_BYTE(lined, j, sval); - prevxs = xs; - } else { /* copy prev dest pix */ - SET_DATA_BYTE(lined, j, sval); - } - } - } else if (d == 16) { - for (j = 0; j < wd; j++) { - xs = scol[j]; - if (xs != prevxs) { /* get dest pix from source col */ - sval = GET_DATA_TWO_BYTES(lines, xs); - SET_DATA_TWO_BYTES(lined, j, sval); - prevxs = xs; - } else { /* copy prev dest pix */ - SET_DATA_TWO_BYTES(lined, j, sval); - } - } - } else { /* d == 32 */ - for (j = 0; j < wd; j++) { - xs = scol[j]; - if (xs != prevxs) { /* get dest pix from source col */ - csval = lines[xs]; - lined[j] = csval; - prevxs = xs; - } else { /* copy prev dest pix */ - lined[j] = csval; - } - } - } - } else { /* lines == prevlines; copy prev dest row */ - prevlined = lined - wpld; - memcpy(lined, prevlined, 4 * wpld); - } - prevlines = lines; - } - - LEPT_FREE(srow); - LEPT_FREE(scol); - return 0; -} - - -/*------------------------------------------------------------------* - * Color and grayscale downsampling with (antialias) smoothing * - *------------------------------------------------------------------*/ -/*! - * \brief scaleSmoothLow() - * - *
- * Notes:
- *      (1) This function is called on 8 or 32 bpp src and dest images.
- *      (2) size is the full width of the lowpass smoothing filter.
- *          It is correlated with the reduction ratio, being the
- *          nearest integer such that size is approximately equal to hs / hd.
- * 
- */ -static l_int32 -scaleSmoothLow(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas, - l_int32 ws, - l_int32 hs, - l_int32 d, - l_int32 wpls, - l_int32 size) -{ -l_int32 i, j, m, n, xstart; -l_int32 val, rval, gval, bval; -l_int32 *srow, *scol; -l_uint32 *lines, *lined, *line, *ppixel; -l_uint32 pixel; -l_float32 wratio, hratio, norm; - - PROCNAME("scaleSmoothLow"); - - /* Clear dest */ - memset(datad, 0, 4LL * wpld * hd); - - /* Each dest pixel at (j,i) is computed as the average - of size^2 corresponding src pixels. - We store the UL corner location of the square of - src pixels that correspond to dest pixel (j,i). - The are labeled by the arrays srow[i] and scol[j]. */ - if ((srow = (l_int32 *)LEPT_CALLOC(hd, sizeof(l_int32))) == NULL) - return ERROR_INT("srow not made", procName, 1); - if ((scol = (l_int32 *)LEPT_CALLOC(wd, sizeof(l_int32))) == NULL) { - LEPT_FREE(srow); - return ERROR_INT("scol not made", procName, 1); - } - - norm = 1. / (l_float32)(size * size); - wratio = (l_float32)ws / (l_float32)wd; - hratio = (l_float32)hs / (l_float32)hd; - for (i = 0; i < hd; i++) - srow[i] = L_MIN((l_int32)(hratio * i), hs - size); - for (j = 0; j < wd; j++) - scol[j] = L_MIN((l_int32)(wratio * j), ws - size); - - /* For each dest pixel, compute average */ - if (d == 8) { - for (i = 0; i < hd; i++) { - lines = datas + srow[i] * wpls; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - xstart = scol[j]; - val = 0; - for (m = 0; m < size; m++) { - line = lines + m * wpls; - for (n = 0; n < size; n++) { - val += GET_DATA_BYTE(line, xstart + n); - } - } - val = (l_int32)((l_float32)val * norm); - SET_DATA_BYTE(lined, j, val); - } - } - } else { /* d == 32 */ - for (i = 0; i < hd; i++) { - lines = datas + srow[i] * wpls; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - xstart = scol[j]; - rval = gval = bval = 0; - for (m = 0; m < size; m++) { - ppixel = lines + m * wpls + xstart; - for (n = 0; n < size; n++) { - pixel = *(ppixel + n); - rval += (pixel >> L_RED_SHIFT) & 0xff; - gval += (pixel >> L_GREEN_SHIFT) & 0xff; - bval += (pixel >> L_BLUE_SHIFT) & 0xff; - } - } - rval = (l_int32)((l_float32)rval * norm); - gval = (l_int32)((l_float32)gval * norm); - bval = (l_int32)((l_float32)bval * norm); - composeRGBPixel(rval, gval, bval, lined + j); - } - } - } - - LEPT_FREE(srow); - LEPT_FREE(scol); - return 0; -} - - -/*! - * \brief scaleRGBToGray2Low() - * - *
- * Notes:
- *      (1) This function is called with 32 bpp RGB src and 8 bpp,
- *          half-resolution dest.  The weights should add to 1.0.
- * 
- */ -static void -scaleRGBToGray2Low(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_float32 rwt, - l_float32 gwt, - l_float32 bwt) -{ -l_int32 i, j, val, rval, gval, bval; -l_uint32 *lines, *lined; -l_uint32 pixel; - - rwt *= 0.25; - gwt *= 0.25; - bwt *= 0.25; - for (i = 0; i < hd; i++) { - lines = datas + 2 * i * wpls; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - /* Sum each of the color components from 4 src pixels */ - pixel = *(lines + 2 * j); - rval = (pixel >> L_RED_SHIFT) & 0xff; - gval = (pixel >> L_GREEN_SHIFT) & 0xff; - bval = (pixel >> L_BLUE_SHIFT) & 0xff; - pixel = *(lines + 2 * j + 1); - rval += (pixel >> L_RED_SHIFT) & 0xff; - gval += (pixel >> L_GREEN_SHIFT) & 0xff; - bval += (pixel >> L_BLUE_SHIFT) & 0xff; - pixel = *(lines + wpls + 2 * j); - rval += (pixel >> L_RED_SHIFT) & 0xff; - gval += (pixel >> L_GREEN_SHIFT) & 0xff; - bval += (pixel >> L_BLUE_SHIFT) & 0xff; - pixel = *(lines + wpls + 2 * j + 1); - rval += (pixel >> L_RED_SHIFT) & 0xff; - gval += (pixel >> L_GREEN_SHIFT) & 0xff; - bval += (pixel >> L_BLUE_SHIFT) & 0xff; - /* Generate the dest byte as a weighted sum of the averages */ - val = (l_int32)(rwt * rval + gwt * gval + bwt * bval); - SET_DATA_BYTE(lined, j, val); - } - } -} - - -/*------------------------------------------------------------------* - * General area mapped gray scaling * - *------------------------------------------------------------------*/ -/*! - * \brief scaleColorAreaMapLow() - * - *
- * Notes:
- *      (1) This should only be used for downscaling.
- *          We choose to divide each pixel into 16 x 16 sub-pixels.
- *          This is much slower than scaleSmoothLow(), but it gives a
- *          better representation, esp. for downscaling factors between
- *          1.5 and 5.  All src pixels are subdivided into 256 sub-pixels,
- *          and are weighted by the number of sub-pixels covered by
- *          the dest pixel.  This is about 2x slower than scaleSmoothLow(),
- *          but the results are significantly better on small text.
- * 
- */ -static void -scaleColorAreaMapLow(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas, - l_int32 ws, - l_int32 hs, - l_int32 wpls) -{ -l_int32 i, j, k, m, wm2, hm2; -l_int32 area00, area10, area01, area11, areal, arear, areat, areab; -l_int32 xu, yu; /* UL corner in src image, to 1/16 of a pixel */ -l_int32 xl, yl; /* LR corner in src image, to 1/16 of a pixel */ -l_int32 xup, yup, xuf, yuf; /* UL src pixel: integer and fraction */ -l_int32 xlp, ylp, xlf, ylf; /* LR src pixel: integer and fraction */ -l_int32 delx, dely, area; -l_int32 v00r, v00g, v00b; /* contrib. from UL src pixel */ -l_int32 v01r, v01g, v01b; /* contrib. from LL src pixel */ -l_int32 v10r, v10g, v10b; /* contrib from UR src pixel */ -l_int32 v11r, v11g, v11b; /* contrib from LR src pixel */ -l_int32 vinr, ving, vinb; /* contrib from all full interior src pixels */ -l_int32 vmidr, vmidg, vmidb; /* contrib from side parts */ -l_int32 rval, gval, bval; -l_uint32 pixel00, pixel10, pixel01, pixel11, pixel; -l_uint32 *lines, *lined; -l_float32 scx, scy; - - /* (scx, scy) are scaling factors that are applied to the - * dest coords to get the corresponding src coords. - * We need them because we iterate over dest pixels - * and must find the corresponding set of src pixels. */ - scx = 16. * (l_float32)ws / (l_float32)wd; - scy = 16. * (l_float32)hs / (l_float32)hd; - wm2 = ws - 2; - hm2 = hs - 2; - - /* Iterate over the destination pixels */ - for (i = 0; i < hd; i++) { - yu = (l_int32)(scy * i); - yl = (l_int32)(scy * (i + 1.0)); - yup = yu >> 4; - yuf = yu & 0x0f; - ylp = yl >> 4; - ylf = yl & 0x0f; - dely = ylp - yup; - lined = datad + i * wpld; - lines = datas + yup * wpls; - for (j = 0; j < wd; j++) { - xu = (l_int32)(scx * j); - xl = (l_int32)(scx * (j + 1.0)); - xup = xu >> 4; - xuf = xu & 0x0f; - xlp = xl >> 4; - xlf = xl & 0x0f; - delx = xlp - xup; - - /* If near the edge, just use a src pixel value */ - if (xlp > wm2 || ylp > hm2) { - *(lined + j) = *(lines + xup); - continue; - } - - /* Area summed over, in subpixels. This varies - * due to the quantization, so we can't simply take - * the area to be a constant: area = scx * scy. */ - area = ((16 - xuf) + 16 * (delx - 1) + xlf) * - ((16 - yuf) + 16 * (dely - 1) + ylf); - - /* Do area map summation */ - pixel00 = *(lines + xup); - pixel10 = *(lines + xlp); - pixel01 = *(lines + dely * wpls + xup); - pixel11 = *(lines + dely * wpls + xlp); - area00 = (16 - xuf) * (16 - yuf); - area10 = xlf * (16 - yuf); - area01 = (16 - xuf) * ylf; - area11 = xlf * ylf; - v00r = area00 * ((pixel00 >> L_RED_SHIFT) & 0xff); - v00g = area00 * ((pixel00 >> L_GREEN_SHIFT) & 0xff); - v00b = area00 * ((pixel00 >> L_BLUE_SHIFT) & 0xff); - v10r = area10 * ((pixel10 >> L_RED_SHIFT) & 0xff); - v10g = area10 * ((pixel10 >> L_GREEN_SHIFT) & 0xff); - v10b = area10 * ((pixel10 >> L_BLUE_SHIFT) & 0xff); - v01r = area01 * ((pixel01 >> L_RED_SHIFT) & 0xff); - v01g = area01 * ((pixel01 >> L_GREEN_SHIFT) & 0xff); - v01b = area01 * ((pixel01 >> L_BLUE_SHIFT) & 0xff); - v11r = area11 * ((pixel11 >> L_RED_SHIFT) & 0xff); - v11g = area11 * ((pixel11 >> L_GREEN_SHIFT) & 0xff); - v11b = area11 * ((pixel11 >> L_BLUE_SHIFT) & 0xff); - vinr = ving = vinb = 0; - for (k = 1; k < dely; k++) { /* for full src pixels */ - for (m = 1; m < delx; m++) { - pixel = *(lines + k * wpls + xup + m); - vinr += 256 * ((pixel >> L_RED_SHIFT) & 0xff); - ving += 256 * ((pixel >> L_GREEN_SHIFT) & 0xff); - vinb += 256 * ((pixel >> L_BLUE_SHIFT) & 0xff); - } - } - vmidr = vmidg = vmidb = 0; - areal = (16 - xuf) * 16; - arear = xlf * 16; - areat = 16 * (16 - yuf); - areab = 16 * ylf; - for (k = 1; k < dely; k++) { /* for left side */ - pixel = *(lines + k * wpls + xup); - vmidr += areal * ((pixel >> L_RED_SHIFT) & 0xff); - vmidg += areal * ((pixel >> L_GREEN_SHIFT) & 0xff); - vmidb += areal * ((pixel >> L_BLUE_SHIFT) & 0xff); - } - for (k = 1; k < dely; k++) { /* for right side */ - pixel = *(lines + k * wpls + xlp); - vmidr += arear * ((pixel >> L_RED_SHIFT) & 0xff); - vmidg += arear * ((pixel >> L_GREEN_SHIFT) & 0xff); - vmidb += arear * ((pixel >> L_BLUE_SHIFT) & 0xff); - } - for (m = 1; m < delx; m++) { /* for top side */ - pixel = *(lines + xup + m); - vmidr += areat * ((pixel >> L_RED_SHIFT) & 0xff); - vmidg += areat * ((pixel >> L_GREEN_SHIFT) & 0xff); - vmidb += areat * ((pixel >> L_BLUE_SHIFT) & 0xff); - } - for (m = 1; m < delx; m++) { /* for bottom side */ - pixel = *(lines + dely * wpls + xup + m); - vmidr += areab * ((pixel >> L_RED_SHIFT) & 0xff); - vmidg += areab * ((pixel >> L_GREEN_SHIFT) & 0xff); - vmidb += areab * ((pixel >> L_BLUE_SHIFT) & 0xff); - } - - /* Sum all the contributions */ - rval = (v00r + v01r + v10r + v11r + vinr + vmidr + 128) / area; - gval = (v00g + v01g + v10g + v11g + ving + vmidg + 128) / area; - bval = (v00b + v01b + v10b + v11b + vinb + vmidb + 128) / area; -#if DEBUG_OVERFLOW - if (rval > 255) lept_stderr("rval ovfl: %d\n", rval); - if (gval > 255) lept_stderr("gval ovfl: %d\n", gval); - if (bval > 255) lept_stderr("bval ovfl: %d\n", bval); -#endif /* DEBUG_OVERFLOW */ - composeRGBPixel(rval, gval, bval, lined + j); - } - } -} - - -/*! - * \brief scaleGrayAreaMapLow() - * - *
- * Notes:
- *      (1) This should only be used for downscaling.
- *          We choose to divide each pixel into 16 x 16 sub-pixels.
- *          This is about 2x slower than scaleSmoothLow(), but the results
- *          are significantly better on small text, esp. for downscaling
- *          factors between 1.5 and 5.  All src pixels are subdivided
- *          into 256 sub-pixels, and are weighted by the number of
- *          sub-pixels covered by the dest pixel.
- * 
- */ -static void -scaleGrayAreaMapLow(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas, - l_int32 ws, - l_int32 hs, - l_int32 wpls) -{ -l_int32 i, j, k, m, wm2, hm2; -l_int32 xu, yu; /* UL corner in src image, to 1/16 of a pixel */ -l_int32 xl, yl; /* LR corner in src image, to 1/16 of a pixel */ -l_int32 xup, yup, xuf, yuf; /* UL src pixel: integer and fraction */ -l_int32 xlp, ylp, xlf, ylf; /* LR src pixel: integer and fraction */ -l_int32 delx, dely, area; -l_int32 v00; /* contrib. from UL src pixel */ -l_int32 v01; /* contrib. from LL src pixel */ -l_int32 v10; /* contrib from UR src pixel */ -l_int32 v11; /* contrib from LR src pixel */ -l_int32 vin; /* contrib from all full interior src pixels */ -l_int32 vmid; /* contrib from side parts that are full in 1 direction */ -l_int32 val; -l_uint32 *lines, *lined; -l_float32 scx, scy; - - /* (scx, scy) are scaling factors that are applied to the - * dest coords to get the corresponding src coords. - * We need them because we iterate over dest pixels - * and must find the corresponding set of src pixels. */ - scx = 16. * (l_float32)ws / (l_float32)wd; - scy = 16. * (l_float32)hs / (l_float32)hd; - wm2 = ws - 2; - hm2 = hs - 2; - - /* Iterate over the destination pixels */ - for (i = 0; i < hd; i++) { - yu = (l_int32)(scy * i); - yl = (l_int32)(scy * (i + 1.0)); - yup = yu >> 4; - yuf = yu & 0x0f; - ylp = yl >> 4; - ylf = yl & 0x0f; - dely = ylp - yup; - lined = datad + i * wpld; - lines = datas + yup * wpls; - for (j = 0; j < wd; j++) { - xu = (l_int32)(scx * j); - xl = (l_int32)(scx * (j + 1.0)); - xup = xu >> 4; - xuf = xu & 0x0f; - xlp = xl >> 4; - xlf = xl & 0x0f; - delx = xlp - xup; - - /* If near the edge, just use a src pixel value */ - if (xlp > wm2 || ylp > hm2) { - SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xup)); - continue; - } - - /* Area summed over, in subpixels. This varies - * due to the quantization, so we can't simply take - * the area to be a constant: area = scx * scy. */ - area = ((16 - xuf) + 16 * (delx - 1) + xlf) * - ((16 - yuf) + 16 * (dely - 1) + ylf); - - /* Do area map summation */ - v00 = (16 - xuf) * (16 - yuf) * GET_DATA_BYTE(lines, xup); - v10 = xlf * (16 - yuf) * GET_DATA_BYTE(lines, xlp); - v01 = (16 - xuf) * ylf * GET_DATA_BYTE(lines + dely * wpls, xup); - v11 = xlf * ylf * GET_DATA_BYTE(lines + dely * wpls, xlp); - for (vin = 0, k = 1; k < dely; k++) { /* for full src pixels */ - for (m = 1; m < delx; m++) { - vin += 256 * GET_DATA_BYTE(lines + k * wpls, xup + m); - } - } - for (vmid = 0, k = 1; k < dely; k++) /* for left side */ - vmid += (16 - xuf) * 16 * GET_DATA_BYTE(lines + k * wpls, xup); - for (k = 1; k < dely; k++) /* for right side */ - vmid += xlf * 16 * GET_DATA_BYTE(lines + k * wpls, xlp); - for (m = 1; m < delx; m++) /* for top side */ - vmid += 16 * (16 - yuf) * GET_DATA_BYTE(lines, xup + m); - for (m = 1; m < delx; m++) /* for bottom side */ - vmid += 16 * ylf * GET_DATA_BYTE(lines + dely * wpls, xup + m); - val = (v00 + v01 + v10 + v11 + vin + vmid + 128) / area; -#if DEBUG_OVERFLOW - if (val > 255) lept_stderr("val overflow: %d\n", val); -#endif /* DEBUG_OVERFLOW */ - SET_DATA_BYTE(lined, j, val); - } - } -} - - -/*------------------------------------------------------------------* - * 2x area mapped downscaling * - *------------------------------------------------------------------*/ -/*! - * \brief scaleAreaMapLow2() - * - *
- * Notes:
- *      (1) This function is called with either 8 bpp gray or 32 bpp RGB.
- *          The result is a 2x reduced dest.
- * 
- */ -static void -scaleAreaMapLow2(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas, - l_int32 d, - l_int32 wpls) -{ -l_int32 i, j, val, rval, gval, bval; -l_uint32 *lines, *lined; -l_uint32 pixel; - - if (d == 8) { - for (i = 0; i < hd; i++) { - lines = datas + 2 * i * wpls; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - /* Average each dest pixel using 4 src pixels */ - val = GET_DATA_BYTE(lines, 2 * j); - val += GET_DATA_BYTE(lines, 2 * j + 1); - val += GET_DATA_BYTE(lines + wpls, 2 * j); - val += GET_DATA_BYTE(lines + wpls, 2 * j + 1); - val >>= 2; - SET_DATA_BYTE(lined, j, val); - } - } - } else { /* d == 32 */ - for (i = 0; i < hd; i++) { - lines = datas + 2 * i * wpls; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - /* Average each of the color components from 4 src pixels */ - pixel = *(lines + 2 * j); - rval = (pixel >> L_RED_SHIFT) & 0xff; - gval = (pixel >> L_GREEN_SHIFT) & 0xff; - bval = (pixel >> L_BLUE_SHIFT) & 0xff; - pixel = *(lines + 2 * j + 1); - rval += (pixel >> L_RED_SHIFT) & 0xff; - gval += (pixel >> L_GREEN_SHIFT) & 0xff; - bval += (pixel >> L_BLUE_SHIFT) & 0xff; - pixel = *(lines + wpls + 2 * j); - rval += (pixel >> L_RED_SHIFT) & 0xff; - gval += (pixel >> L_GREEN_SHIFT) & 0xff; - bval += (pixel >> L_BLUE_SHIFT) & 0xff; - pixel = *(lines + wpls + 2 * j + 1); - rval += (pixel >> L_RED_SHIFT) & 0xff; - gval += (pixel >> L_GREEN_SHIFT) & 0xff; - bval += (pixel >> L_BLUE_SHIFT) & 0xff; - composeRGBPixel(rval >> 2, gval >> 2, bval >> 2, &pixel); - *(lined + j) = pixel; - } - } - } -} - - -/*------------------------------------------------------------------* - * Binary scaling by closest pixel sampling * - *------------------------------------------------------------------*/ -/* - * \brief scaleBinaryLow() - * - *
- * Notes:
- *      (1) The dest must be cleared prior to this operation,
- *          and we clear it here in the low-level code.
- *      (2) We reuse dest pixels and dest pixel rows whenever
- *          possible for upscaling; downscaling is done by
- *          strict subsampling.
- * 
- */ -static l_int32 -scaleBinaryLow(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas, - l_int32 ws, - l_int32 hs, - l_int32 wpls) -{ -l_int32 i, j; -l_int32 xs, prevxs, sval; -l_int32 *srow, *scol; -l_uint32 *lines, *prevlines, *lined, *prevlined; -l_float32 wratio, hratio; - - PROCNAME("scaleBinaryLow"); - - /* Clear dest */ - memset(datad, 0, 4LL * hd * wpld); - - /* The source row corresponding to dest row i ==> srow[i] - * The source col corresponding to dest col j ==> scol[j] */ - if ((srow = (l_int32 *)LEPT_CALLOC(hd, sizeof(l_int32))) == NULL) - return ERROR_INT("srow not made", procName, 1); - if ((scol = (l_int32 *)LEPT_CALLOC(wd, sizeof(l_int32))) == NULL) { - LEPT_FREE(srow); - return ERROR_INT("scol not made", procName, 1); - } - - wratio = (l_float32)ws / (l_float32)wd; - hratio = (l_float32)hs / (l_float32)hd; - for (i = 0; i < hd; i++) - srow[i] = L_MIN((l_int32)(hratio * i + 0.5), hs - 1); - for (j = 0; j < wd; j++) - scol[j] = L_MIN((l_int32)(wratio * j + 0.5), ws - 1); - - prevlines = NULL; - prevxs = -1; - sval = 0; - for (i = 0; i < hd; i++) { - lines = datas + srow[i] * wpls; - lined = datad + i * wpld; - if (lines != prevlines) { /* make dest from new source row */ - for (j = 0; j < wd; j++) { - xs = scol[j]; - if (xs != prevxs) { /* get dest pix from source col */ - if ((sval = GET_DATA_BIT(lines, xs))) - SET_DATA_BIT(lined, j); - prevxs = xs; - } else { /* copy prev dest pix, if set */ - if (sval) - SET_DATA_BIT(lined, j); - } - } - } else { /* lines == prevlines; copy prev dest row */ - prevlined = lined - wpld; - memcpy(lined, prevlined, 4 * wpld); - } - prevlines = lines; - } - - LEPT_FREE(srow); - LEPT_FREE(scol); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/scale2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/scale2.c deleted file mode 100644 index e0ce0134..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/scale2.c +++ /dev/null @@ -1,2358 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file scale2.c - *
- *         Scale-to-gray (1 bpp --> 8 bpp; arbitrary downscaling)
- *               PIX      *pixScaleToGray()
- *               PIX      *pixScaleToGrayFast()
- *
- *         Scale-to-gray (1 bpp --> 8 bpp; integer downscaling)
- *               PIX      *pixScaleToGray2()
- *               PIX      *pixScaleToGray3()
- *               PIX      *pixScaleToGray4()
- *               PIX      *pixScaleToGray6()
- *               PIX      *pixScaleToGray8()
- *               PIX      *pixScaleToGray16()
- *
- *         Scale-to-gray by mipmap(1 bpp --> 8 bpp, arbitrary reduction)
- *               PIX      *pixScaleToGrayMipmap()
- *
- *         Grayscale scaling using mipmap
- *               PIX      *pixScaleMipmap()
- *
- *         Replicated (integer) expansion (all depths)
- *               PIX      *pixExpandReplicate()
- *
- *         Grayscale downscaling using min and max
- *               PIX      *pixScaleGrayMinMax()
- *               PIX      *pixScaleGrayMinMax2()
- *
- *         Grayscale downscaling using rank value
- *               PIX      *pixScaleGrayRankCascade()
- *               PIX      *pixScaleGrayRank2()
- *
- *         Helper function for transferring alpha with scaling
- *               l_int32   pixScaleAndTransferAlpha()
- *
- *         RGB scaling including alpha (blend) component
- *               PIX      *pixScaleWithAlpha()
- *
- *     Low-level static functions:
- *
- *         Scale-to-gray 2x
- *                  static void       scaleToGray2Low()
- *                  static l_uint32  *makeSumTabSG2()
- *                  static l_uint8   *makeValTabSG2()
- *
- *         Scale-to-gray 3x
- *                  static void       scaleToGray3Low()
- *                  static l_uint32  *makeSumTabSG3()
- *                  static l_uint8   *makeValTabSG3()
- *
- *         Scale-to-gray 4x
- *                  static void       scaleToGray4Low()
- *                  static l_uint32  *makeSumTabSG4()
- *                  static l_uint8   *makeValTabSG4()
- *
- *         Scale-to-gray 6x
- *                  static void       scaleToGray6Low()
- *                  static l_uint8   *makeValTabSG6()
- *
- *         Scale-to-gray 8x
- *                  static void       scaleToGray8Low()
- *                  static l_uint8   *makeValTabSG8()
- *
- *         Scale-to-gray 16x
- *                  static void       scaleToGray16Low()
- *
- *         Grayscale mipmap
- *                  static l_int32    scaleMipmapLow()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static void scaleToGray2Low(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas, l_int32 wpls, - l_uint32 *sumtab, l_uint8 *valtab); -static l_uint32 *makeSumTabSG2(void); -static l_uint8 *makeValTabSG2(void); -static void scaleToGray3Low(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas, l_int32 wpls, - l_uint32 *sumtab, l_uint8 *valtab); -static l_uint32 *makeSumTabSG3(void); -static l_uint8 *makeValTabSG3(void); -static void scaleToGray4Low(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas, l_int32 wpls, - l_uint32 *sumtab, l_uint8 *valtab); -static l_uint32 *makeSumTabSG4(void); -static l_uint8 *makeValTabSG4(void); -static void scaleToGray6Low(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas, l_int32 wpls, - l_int32 *tab8, l_uint8 *valtab); -static l_uint8 *makeValTabSG6(void); -static void scaleToGray8Low(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas, l_int32 wpls, - l_int32 *tab8, l_uint8 *valtab); -static l_uint8 *makeValTabSG8(void); -static void scaleToGray16Low(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas, l_int32 wpls, - l_int32 *tab8); -static l_int32 scaleMipmapLow(l_uint32 *datad, l_int32 wd, l_int32 hd, - l_int32 wpld, l_uint32 *datas1, l_int32 wpls1, - l_uint32 *datas2, l_int32 wpls2, l_float32 red); - -extern l_float32 AlphaMaskBorderVals[2]; - - -/*------------------------------------------------------------------* - * Scale-to-gray (1 bpp --> 8 bpp; arbitrary downscaling) * - *------------------------------------------------------------------*/ -/*! - * \brief pixScaleToGray() - * - * \param[in] pixs 1 bpp - * \param[in] scalefactor reduction: must be > 0.0 and < 1.0 - * \return pixd 8 bpp, scaled down by scalefactor in each direction, - * or NULL on error. - * - *
- * Notes:
- *
- *  For faster scaling in the range of scalefactors from 0.0625 to 0.5,
- *  with very little difference in quality, use pixScaleToGrayFast().
- *
- *  Binary images have sharp edges, so they intrinsically have very
- *  high frequency content.  To avoid aliasing, they must be low-pass
- *  filtered, which tends to blur the edges.  How can we keep relatively
- *  crisp edges without aliasing?  The trick is to do binary upscaling
- *  followed by a power-of-2 scaleToGray.  For large reductions, where
- *  you don't end up with much detail, some corners can be cut.
- *
- *  The intent here is to get high quality reduced grayscale
- *  images with relatively little computation.  We do binary
- *  pre-scaling followed by scaleToGrayN() for best results,
- *  esp. to avoid excess blur when the scale factor is near
- *  an inverse power of 2.  Where a low-pass filter is required,
- *  we use simple convolution kernels: either the hat filter for
- *  linear interpolation or a flat filter for larger downscaling.
- *  Other choices, such as a perfect bandpass filter with infinite extent
- *  (the sinc) or various approximations to it (e.g., lanczos), are
- *  unnecessarily expensive.
- *
- *  The choices made are as follows:
- *      (1) Do binary upscaling before scaleToGrayN() for scalefactors > 1/8
- *      (2) Do binary downscaling before scaleToGray8() for scalefactors
- *          between 1/16 and 1/8.
- *      (3) Use scaleToGray16() before grayscale downscaling for
- *          scalefactors less than 1/16
- *  Another reasonable choice would be to start binary downscaling
- *  for scalefactors below 1/4, rather than below 1/8 as we do here.
- *
- *  The general scaling rules, not all of which are used here, go as follows:
- *      (1) For grayscale upscaling, use pixScaleGrayLI().  However,
- *          note that edges will be visibly blurred for scalefactors
- *          near (but above) 1.0.  Replication will avoid edge blur,
- *          and should be considered for factors very near 1.0.
- *      (2) For grayscale downscaling with a scale factor larger than
- *          about 0.7, use pixScaleGrayLI().  For scalefactors near
- *          (but below) 1.0, you tread between Scylla and Charybdis.
- *          pixScaleGrayLI() again gives edge blurring, but
- *          pixScaleBySampling() gives visible aliasing.
- *      (3) For grayscale downscaling with a scale factor smaller than
- *          about 0.7, use pixScaleSmooth()
- *      (4) For binary input images, do as much scale to gray as possible
- *          using the special integer functions (2, 3, 4, 8 and 16).
- *      (5) It is better to upscale in binary, followed by scaleToGrayN()
- *          than to do scaleToGrayN() followed by an upscale using either
- *          LI or oversampling.
- *      (6) It may be better to downscale in binary, followed by
- *          scaleToGrayN() than to first use scaleToGrayN() followed by
- *          downscaling.  For downscaling between 8x and 16x, this is
- *          a reasonable option.
- *      (7) For reductions greater than 16x, it's reasonable to use
- *          scaleToGray16() followed by further grayscale downscaling.
- * 
- */ -PIX * -pixScaleToGray(PIX *pixs, - l_float32 scalefactor) -{ -l_int32 w, h, minsrc, mindest; -l_float32 mag, red; -PIX *pixt, *pixd; - - PROCNAME("pixScaleToGray"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (scalefactor <= 0.0) - return (PIX *)ERROR_PTR("scalefactor <= 0.0", procName, NULL); - if (scalefactor >= 1.0) - return (PIX *)ERROR_PTR("scalefactor >= 1.0", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - minsrc = L_MIN(w, h); - mindest = (l_int32)((l_float32)minsrc * scalefactor); - if (mindest < 2) - return (PIX *)ERROR_PTR("scalefactor too small", procName, NULL); - - if (scalefactor > 0.5) { /* see note (5) */ - mag = 2.0 * scalefactor; /* will be < 2.0 */ -/* lept_stderr("2x with mag %7.3f\n", mag); */ - if ((pixt = pixScaleBinary(pixs, mag, mag)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - pixd = pixScaleToGray2(pixt); - } else if (scalefactor == 0.5) { - return pixd = pixScaleToGray2(pixs); - } else if (scalefactor > 0.33333) { /* see note (5) */ - mag = 3.0 * scalefactor; /* will be < 1.5 */ -/* lept_stderr("3x with mag %7.3f\n", mag); */ - if ((pixt = pixScaleBinary(pixs, mag, mag)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - pixd = pixScaleToGray3(pixt); - } else if (scalefactor > 0.25) { /* see note (5) */ - mag = 4.0 * scalefactor; /* will be < 1.3333 */ -/* lept_stderr("4x with mag %7.3f\n", mag); */ - if ((pixt = pixScaleBinary(pixs, mag, mag)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - pixd = pixScaleToGray4(pixt); - } else if (scalefactor == 0.25) { - return pixd = pixScaleToGray4(pixs); - } else if (scalefactor > 0.16667) { /* see note (5) */ - mag = 6.0 * scalefactor; /* will be < 1.5 */ -/* lept_stderr("6x with mag %7.3f\n", mag); */ - if ((pixt = pixScaleBinary(pixs, mag, mag)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - pixd = pixScaleToGray6(pixt); - } else if (scalefactor == 0.16667) { - return pixd = pixScaleToGray6(pixs); - } else if (scalefactor > 0.125) { /* see note (5) */ - mag = 8.0 * scalefactor; /* will be < 1.3333 */ -/* lept_stderr("8x with mag %7.3f\n", mag); */ - if ((pixt = pixScaleBinary(pixs, mag, mag)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - pixd = pixScaleToGray8(pixt); - } else if (scalefactor == 0.125) { - return pixd = pixScaleToGray8(pixs); - } else if (scalefactor > 0.0625) { /* see note (6) */ - red = 8.0 * scalefactor; /* will be > 0.5 */ -/* lept_stderr("8x with red %7.3f\n", red); */ - if ((pixt = pixScaleBinary(pixs, red, red)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - pixd = pixScaleToGray8(pixt); - } else if (scalefactor == 0.0625) { - return pixd = pixScaleToGray16(pixs); - } else { /* see note (7) */ - red = 16.0 * scalefactor; /* will be <= 1.0 */ -/* lept_stderr("16x with red %7.3f\n", red); */ - if ((pixt = pixScaleToGray16(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - if (red < 0.7) - pixd = pixScaleSmooth(pixt, red, red); /* see note (3) */ - else - pixd = pixScaleGrayLI(pixt, red, red); /* see note (2) */ - } - - pixDestroy(&pixt); - if (!pixd) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyInputFormat(pixd, pixs); - return pixd; -} - - -/*! - * \brief pixScaleToGrayFast() - * - * \param[in] pixs 1 bpp - * \param[in] scalefactor reduction: must be > 0.0 and < 1.0 - * \return pixd 8 bpp, scaled down by scalefactor in each direction, - * or NULL on error. - * - *
- * Notes:
- *      (1) See notes in pixScaleToGray() for the basic approach.
- *      (2) This function is considerably less expensive than pixScaleToGray()
- *          for scalefactor in the range (0.0625 ... 0.5), and the
- *          quality is nearly as good.
- *      (3) Unlike pixScaleToGray(), which does binary upscaling before
- *          downscaling for scale factors >= 0.0625, pixScaleToGrayFast()
- *          first downscales in binary for all scale factors < 0.5, and
- *          then does a 2x scale-to-gray as the final step.  For
- *          scale factors < 0.0625, both do a 16x scale-to-gray, followed
- *          by further grayscale reduction.
- * 
- */ -PIX * -pixScaleToGrayFast(PIX *pixs, - l_float32 scalefactor) -{ -l_int32 w, h, minsrc, mindest; -l_float32 eps, factor; -PIX *pixt, *pixd; - - PROCNAME("pixScaleToGrayFast"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (scalefactor <= 0.0) - return (PIX *)ERROR_PTR("scalefactor <= 0.0", procName, NULL); - if (scalefactor >= 1.0) - return (PIX *)ERROR_PTR("scalefactor >= 1.0", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - minsrc = L_MIN(w, h); - mindest = (l_int32)((l_float32)minsrc * scalefactor); - if (mindest < 2) - return (PIX *)ERROR_PTR("scalefactor too small", procName, NULL); - eps = 0.0001; - - /* Handle the special cases */ - if (scalefactor > 0.5 - eps && scalefactor < 0.5 + eps) - return pixScaleToGray2(pixs); - else if (scalefactor > 0.33333 - eps && scalefactor < 0.33333 + eps) - return pixScaleToGray3(pixs); - else if (scalefactor > 0.25 - eps && scalefactor < 0.25 + eps) - return pixScaleToGray4(pixs); - else if (scalefactor > 0.16666 - eps && scalefactor < 0.16666 + eps) - return pixScaleToGray6(pixs); - else if (scalefactor > 0.125 - eps && scalefactor < 0.125 + eps) - return pixScaleToGray8(pixs); - else if (scalefactor > 0.0625 - eps && scalefactor < 0.0625 + eps) - return pixScaleToGray16(pixs); - - if (scalefactor > 0.0625) { /* scale binary first */ - factor = 2.0 * scalefactor; - if ((pixt = pixScaleBinary(pixs, factor, factor)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - pixd = pixScaleToGray2(pixt); - } else { /* scalefactor < 0.0625; scale-to-gray first */ - factor = 16.0 * scalefactor; /* will be < 1.0 */ - if ((pixt = pixScaleToGray16(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - if (factor < 0.7) - pixd = pixScaleSmooth(pixt, factor, factor); - else - pixd = pixScaleGrayLI(pixt, factor, factor); - } - pixDestroy(&pixt); - if (!pixd) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyInputFormat(pixd, pixs); - return pixd; -} - - -/*-----------------------------------------------------------------------* - * Scale-to-gray (1 bpp --> 8 bpp; integer downscaling) * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixScaleToGray2() - * - * \param[in] pixs 1 bpp - * \return pixd 8 bpp, scaled down by 2x in each direction, - * or NULL on error. - */ -PIX * -pixScaleToGray2(PIX *pixs) -{ -l_uint8 *valtab; -l_int32 ws, hs, wd, hd; -l_int32 wpld, wpls; -l_uint32 *sumtab; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixScaleToGray2"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs must be 1 bpp", procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - wd = ws / 2; - hd = hs / 2; - if (wd == 0 || hd == 0) - return (PIX *)ERROR_PTR("pixs too small", procName, NULL); - - if ((pixd = pixCreate(wd, hd, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixSetPadBits(pixs, 0); - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, 0.5, 0.5); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - - sumtab = makeSumTabSG2(); - valtab = makeValTabSG2(); - scaleToGray2Low(datad, wd, hd, wpld, datas, wpls, sumtab, valtab); - LEPT_FREE(sumtab); - LEPT_FREE(valtab); - return pixd; -} - - -/*! - * \brief pixScaleToGray3() - * - * \param[in] pixs 1 bpp - * \return pixd 8 bpp, scaled down by 3x in each direction, - * or NULL on error. - * - *
- * Notes:
- *      (1) Speed is about 100 x 10^6 src-pixels/sec/GHz.
- *          Another way to express this is it processes 1 src pixel
- *          in about 10 cycles.
- *      (2) The width of pixd is truncated is truncated to a factor of 8.
- * 
- */ -PIX * -pixScaleToGray3(PIX *pixs) -{ -l_uint8 *valtab; -l_int32 ws, hs, wd, hd; -l_int32 wpld, wpls; -l_uint32 *sumtab; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixScaleToGray3"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - wd = (ws / 3) & 0xfffffff8; /* truncate to factor of 8 */ - hd = hs / 3; - if (wd == 0 || hd == 0) - return (PIX *)ERROR_PTR("pixs too small", procName, NULL); - - if ((pixd = pixCreate(wd, hd, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, 0.33333, 0.33333); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - - sumtab = makeSumTabSG3(); - valtab = makeValTabSG3(); - scaleToGray3Low(datad, wd, hd, wpld, datas, wpls, sumtab, valtab); - LEPT_FREE(sumtab); - LEPT_FREE(valtab); - return pixd; -} - - -/*! - * \brief pixScaleToGray4() - * - * \param[in] pixs 1 bpp - * \return pixd 8 bpp, scaled down by 4x in each direction, - * or NULL on error. - * - *
- * Notes:
- *      (1) The width of pixd is truncated is truncated to a factor of 2.
- * 
- */ -PIX * -pixScaleToGray4(PIX *pixs) -{ -l_uint8 *valtab; -l_int32 ws, hs, wd, hd; -l_int32 wpld, wpls; -l_uint32 *sumtab; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixScaleToGray4"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs must be 1 bpp", procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - wd = (ws / 4) & 0xfffffffe; /* truncate to factor of 2 */ - hd = hs / 4; - if (wd == 0 || hd == 0) - return (PIX *)ERROR_PTR("pixs too small", procName, NULL); - - if ((pixd = pixCreate(wd, hd, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, 0.25, 0.25); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - - sumtab = makeSumTabSG4(); - valtab = makeValTabSG4(); - scaleToGray4Low(datad, wd, hd, wpld, datas, wpls, sumtab, valtab); - LEPT_FREE(sumtab); - LEPT_FREE(valtab); - return pixd; -} - - - -/*! - * \brief pixScaleToGray6() - * - * \param[in] pixs 1 bpp - * \return pixd 8 bpp, scaled down by 6x in each direction, - * or NULL on error. - * - *
- * Notes:
- *      (1) The width of pixd is truncated is truncated to a factor of 8.
- * 
- */ -PIX * -pixScaleToGray6(PIX *pixs) -{ -l_uint8 *valtab; -l_int32 ws, hs, wd, hd, wpld, wpls; -l_int32 *tab8; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixScaleToGray6"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - wd = (ws / 6) & 0xfffffff8; /* truncate to factor of 8 */ - hd = hs / 6; - if (wd == 0 || hd == 0) - return (PIX *)ERROR_PTR("pixs too small", procName, NULL); - - if ((pixd = pixCreate(wd, hd, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, 0.16667, 0.16667); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - - tab8 = makePixelSumTab8(); - valtab = makeValTabSG6(); - scaleToGray6Low(datad, wd, hd, wpld, datas, wpls, tab8, valtab); - LEPT_FREE(tab8); - LEPT_FREE(valtab); - return pixd; -} - - -/*! - * \brief pixScaleToGray8() - * - * \param[in] pixs 1 bpp - * \return pixd 8 bpp, scaled down by 8x in each direction, - * or NULL on error - */ -PIX * -pixScaleToGray8(PIX *pixs) -{ -l_uint8 *valtab; -l_int32 ws, hs, wd, hd; -l_int32 wpld, wpls; -l_int32 *tab8; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixScaleToGray8"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs must be 1 bpp", procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - wd = ws / 8; /* truncate to nearest dest byte */ - hd = hs / 8; - if (wd == 0 || hd == 0) - return (PIX *)ERROR_PTR("pixs too small", procName, NULL); - - if ((pixd = pixCreate(wd, hd, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, 0.125, 0.125); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - - tab8 = makePixelSumTab8(); - valtab = makeValTabSG8(); - scaleToGray8Low(datad, wd, hd, wpld, datas, wpls, tab8, valtab); - LEPT_FREE(tab8); - LEPT_FREE(valtab); - return pixd; -} - - -/*! - * \brief pixScaleToGray16() - * - * \param[in] pixs 1 bpp - * \return pixd 8 bpp, scaled down by 16x in each direction, - * or NULL on error. - */ -PIX * -pixScaleToGray16(PIX *pixs) -{ -l_int32 ws, hs, wd, hd; -l_int32 wpld, wpls; -l_int32 *tab8; -l_uint32 *datas, *datad; -PIX *pixd; - - PROCNAME("pixScaleToGray16"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs must be 1 bpp", procName, NULL); - - pixGetDimensions(pixs, &ws, &hs, NULL); - wd = ws / 16; - hd = hs / 16; - if (wd == 0 || hd == 0) - return (PIX *)ERROR_PTR("pixs too small", procName, NULL); - - if ((pixd = pixCreate(wd, hd, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, 0.0625, 0.0625); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - - tab8 = makePixelSumTab8(); - scaleToGray16Low(datad, wd, hd, wpld, datas, wpls, tab8); - LEPT_FREE(tab8); - return pixd; -} - - -/*------------------------------------------------------------------* - * Scale-to-gray mipmap(1 bpp --> 8 bpp, arbitrary reduction) * - *------------------------------------------------------------------*/ -/*! - * \brief pixScaleToGrayMipmap() - * - * \param[in] pixs 1 bpp - * \param[in] scalefactor reduction: must be > 0.0 and < 1.0 - * \return pixd 8 bpp, scaled down by scalefactor in each direction, - * or NULL on error. - * - *
- * Notes:
- *
- *  This function is here mainly for pedagogical reasons.
- *  Mip-mapping is widely used in graphics for texture mapping, because
- *  the texture changes smoothly with scale.  This is accomplished by
- *  constructing a multiresolution pyramid and, for each pixel,
- *  doing a linear interpolation between corresponding pixels in
- *  the two planes of the pyramid that bracket the desired resolution.
- *  The computation is very efficient, and is implemented in hardware
- *  in high-end graphics cards.
- *
- *  We can use mip-mapping for scale-to-gray by using two scale-to-gray
- *  reduced images (we don't need the entire pyramid) selected from
- *  the set {2x, 4x, ... 16x}, and interpolating.  However, we get
- *  severe aliasing, probably because we are subsampling from the
- *  higher resolution image.  The method is very fast, but the result
- *  is very poor.  In fact, the results don't look any better than
- *  either subsampling off the higher-res grayscale image or oversampling
- *  on the lower-res image.  Consequently, this method should NOT be used
- *  for generating reduced images, scale-to-gray or otherwise.
- * 
- */ -PIX * -pixScaleToGrayMipmap(PIX *pixs, - l_float32 scalefactor) -{ -l_int32 w, h, minsrc, mindest; -l_float32 red; -PIX *pixs1, *pixs2, *pixt, *pixd; - - PROCNAME("pixScaleToGrayMipmap"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (scalefactor <= 0.0) - return (PIX *)ERROR_PTR("scalefactor <= 0.0", procName, NULL); - if (scalefactor >= 1.0) - return (PIX *)ERROR_PTR("scalefactor >= 1.0", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - minsrc = L_MIN(w, h); - mindest = (l_int32)((l_float32)minsrc * scalefactor); - if (mindest < 2) - return (PIX *)ERROR_PTR("scalefactor too small", procName, NULL); - - if (scalefactor > 0.5) { - pixs1 = pixConvert1To8(NULL, pixs, 255, 0); - pixs2 = pixScaleToGray2(pixs); - red = scalefactor; - } else if (scalefactor == 0.5) { - return pixScaleToGray2(pixs); - } else if (scalefactor > 0.25) { - pixs1 = pixScaleToGray2(pixs); - pixs2 = pixScaleToGray4(pixs); - red = 2. * scalefactor; - } else if (scalefactor == 0.25) { - return pixScaleToGray4(pixs); - } else if (scalefactor > 0.125) { - pixs1 = pixScaleToGray4(pixs); - pixs2 = pixScaleToGray8(pixs); - red = 4. * scalefactor; - } else if (scalefactor == 0.125) { - return pixScaleToGray8(pixs); - } else if (scalefactor > 0.0625) { - pixs1 = pixScaleToGray8(pixs); - pixs2 = pixScaleToGray16(pixs); - red = 8. * scalefactor; - } else if (scalefactor == 0.0625) { - return pixScaleToGray16(pixs); - } else { /* end of the pyramid; just do it */ - red = 16.0 * scalefactor; /* will be <= 1.0 */ - if ((pixt = pixScaleToGray16(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, NULL); - if (red < 0.7) - pixd = pixScaleSmooth(pixt, red, red); - else - pixd = pixScaleGrayLI(pixt, red, red); - pixDestroy(&pixt); - return pixd; - } - - pixd = pixScaleMipmap(pixs1, pixs2, red); - pixCopyInputFormat(pixd, pixs); - - pixDestroy(&pixs1); - pixDestroy(&pixs2); - return pixd; -} - - -/*------------------------------------------------------------------* - * Grayscale scaling using mipmap * - *------------------------------------------------------------------*/ -/*! - * \brief pixScaleMipmap() - * - * \param[in] pixs1 high res 8 bpp, no cmap - * \param[in] pixs2 low res -- 2x reduced -- 8 bpp, no cmap - * \param[in] scale reduction with respect to high res image, > 0.5 - * \return 8 bpp pix, scaled down by reduction in each direction, - * or NULL on error. - * - *
- * Notes:
- *      (1) See notes in pixScaleToGrayMipmap().
- *      (2) This function suffers from aliasing effects that are
- *          easily seen in document images.
- * 
- */ -PIX * -pixScaleMipmap(PIX *pixs1, - PIX *pixs2, - l_float32 scale) -{ -l_int32 ws1, hs1, ws2, hs2, wd, hd, wpls1, wpls2, wpld; -l_uint32 *datas1, *datas2, *datad; -PIX *pixd; - - PROCNAME("pixScaleMipmap"); - - if (!pixs1 || pixGetDepth(pixs1) != 8 || pixGetColormap(pixs1)) - return (PIX *)ERROR_PTR("pixs1 underdefined, not 8 bpp, or cmapped", - procName, NULL); - if (!pixs2 || pixGetDepth(pixs2) != 8 || pixGetColormap(pixs2)) - return (PIX *)ERROR_PTR("pixs2 underdefined, not 8 bpp, or cmapped", - procName, NULL); - pixGetDimensions(pixs1, &ws1, &hs1, NULL); - pixGetDimensions(pixs2, &ws2, &hs2, NULL); - if (scale > 1.0 || scale < 0.5) - return (PIX *)ERROR_PTR("scale not in [0.5, 1.0]", procName, NULL); - if (ws1 < 2 * ws2) - return (PIX *)ERROR_PTR("invalid width ratio", procName, NULL); - if (hs1 < 2 * hs2) - return (PIX *)ERROR_PTR("invalid height ratio", procName, NULL); - - /* Generate wd and hd from the lower resolution dimensions, - * to guarantee staying within both src images */ - datas1 = pixGetData(pixs1); - wpls1 = pixGetWpl(pixs1); - datas2 = pixGetData(pixs2); - wpls2 = pixGetWpl(pixs2); - wd = (l_int32)(2. * scale * pixGetWidth(pixs2)); - hd = (l_int32)(2. * scale * pixGetHeight(pixs2)); - if ((pixd = pixCreate(wd, hd, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyInputFormat(pixd, pixs1); - pixCopyResolution(pixd, pixs1); - pixScaleResolution(pixd, scale, scale); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - scaleMipmapLow(datad, wd, hd, wpld, datas1, wpls1, datas2, wpls2, scale); - return pixd; -} - - -/*------------------------------------------------------------------* - * Replicated (integer) expansion * - *------------------------------------------------------------------*/ -/*! - * \brief pixExpandReplicate() - * - * \param[in] pixs 1, 2, 4, 8, 16, 32 bpp - * \param[in] factor integer scale factor for replicative expansion - * \return pixd scaled up, or NULL on error. - */ -PIX * -pixExpandReplicate(PIX *pixs, - l_int32 factor) -{ -l_int32 w, h, d, wd, hd, wpls, wpld, start, i, j, k; -l_uint8 sval; -l_uint16 sval16; -l_uint32 sval32; -l_uint32 *lines, *datas, *lined, *datad; -PIX *pixd; - - PROCNAME("pixExpandReplicate"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 2 && d != 4 && d != 8 && d != 16 && d != 32) - return (PIX *)ERROR_PTR("depth not in {1,2,4,8,16,32}", procName, NULL); - if (factor <= 0) - return (PIX *)ERROR_PTR("factor <= 0; invalid", procName, NULL); - if (factor == 1) - return pixCopy(NULL, pixs); - - if (d == 1) - return pixExpandBinaryReplicate(pixs, factor, factor); - - wd = factor * w; - hd = factor * h; - if ((pixd = pixCreate(wd, hd, d)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyColormap(pixd, pixs); - pixCopyInputFormat(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixScaleResolution(pixd, (l_float32)factor, (l_float32)factor); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - switch (d) { - case 2: - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + factor * i * wpld; - for (j = 0; j < w; j++) { - sval = GET_DATA_DIBIT(lines, j); - start = factor * j; - for (k = 0; k < factor; k++) - SET_DATA_DIBIT(lined, start + k, sval); - } - for (k = 1; k < factor; k++) - memcpy(lined + k * wpld, lined, 4 * wpld); - } - break; - case 4: - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + factor * i * wpld; - for (j = 0; j < w; j++) { - sval = GET_DATA_QBIT(lines, j); - start = factor * j; - for (k = 0; k < factor; k++) - SET_DATA_QBIT(lined, start + k, sval); - } - for (k = 1; k < factor; k++) - memcpy(lined + k * wpld, lined, 4 * wpld); - } - break; - case 8: - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + factor * i * wpld; - for (j = 0; j < w; j++) { - sval = GET_DATA_BYTE(lines, j); - start = factor * j; - for (k = 0; k < factor; k++) - SET_DATA_BYTE(lined, start + k, sval); - } - for (k = 1; k < factor; k++) - memcpy(lined + k * wpld, lined, 4 * wpld); - } - break; - case 16: - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + factor * i * wpld; - for (j = 0; j < w; j++) { - sval16 = GET_DATA_TWO_BYTES(lines, j); - start = factor * j; - for (k = 0; k < factor; k++) - SET_DATA_TWO_BYTES(lined, start + k, sval16); - } - for (k = 1; k < factor; k++) - memcpy(lined + k * wpld, lined, 4 * wpld); - } - break; - case 32: - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + factor * i * wpld; - for (j = 0; j < w; j++) { - sval32 = *(lines + j); - start = factor * j; - for (k = 0; k < factor; k++) - *(lined + start + k) = sval32; - } - for (k = 1; k < factor; k++) - memcpy(lined + k * wpld, lined, 4 * wpld); - } - break; - default: - lept_stderr("invalid depth\n"); - } - - if (d == 32 && pixGetSpp(pixs) == 4) - pixScaleAndTransferAlpha(pixd, pixs, (l_float32)factor, - (l_float32)factor); - return pixd; -} - - -/*-----------------------------------------------------------------------* - * Downscaling using min or max * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixScaleGrayMinMax() - * - * \param[in] pixs 8 bpp, not cmapped - * \param[in] xfact x downscaling factor; integer - * \param[in] yfact y downscaling factor; integer - * \param[in] type L_CHOOSE_MIN, L_CHOOSE_MAX, L_CHOOSE_MAXDIFF - * \return pixd 8 bpp - * - *
- * Notes:
- *      (1) The downscaled pixels in pixd are the min, max or (max - min)
- *          of the corresponding set of xfact * yfact pixels in pixs.
- *      (2) Using L_CHOOSE_MIN is equivalent to a grayscale erosion,
- *          using a brick Sel of size (xfact * yfact), followed by
- *          subsampling within each (xfact * yfact) cell.  Using
- *          L_CHOOSE_MAX is equivalent to the corresponding dilation.
- *      (3) Using L_CHOOSE_MAXDIFF finds the difference between max
- *          and min values in each cell.
- *      (4) For the special case of downscaling by 2x in both directions,
- *          pixScaleGrayMinMax2() is about 2x more efficient.
- * 
- */ -PIX * -pixScaleGrayMinMax(PIX *pixs, - l_int32 xfact, - l_int32 yfact, - l_int32 type) -{ -l_int32 ws, hs, wd, hd, wpls, wpld, i, j, k, m; -l_int32 minval, maxval, val; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixScaleGrayMinMax"); - - if (!pixs || pixGetDepth(pixs) != 8 || pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs undefined, not 8 bpp, or cmapped", - procName, NULL); - pixGetDimensions(pixs, &ws, &hs, NULL); - if (type != L_CHOOSE_MIN && type != L_CHOOSE_MAX && - type != L_CHOOSE_MAXDIFF) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - if (xfact < 1 || yfact < 1) - return (PIX *)ERROR_PTR("xfact and yfact must be >= 1", procName, NULL); - - if (xfact == 2 && yfact == 2) - return pixScaleGrayMinMax2(pixs, type); - - wd = ws / xfact; - if (wd == 0) { /* single tile */ - wd = 1; - xfact = ws; - } - hd = hs / yfact; - if (hd == 0) { /* single tile */ - hd = 1; - yfact = hs; - } - if ((pixd = pixCreate(wd, hd, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyInputFormat(pixd, pixs); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - for (i = 0; i < hd; i++) { - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - if (type == L_CHOOSE_MIN || type == L_CHOOSE_MAXDIFF) { - minval = 255; - for (k = 0; k < yfact; k++) { - lines = datas + (yfact * i + k) * wpls; - for (m = 0; m < xfact; m++) { - val = GET_DATA_BYTE(lines, xfact * j + m); - if (val < minval) - minval = val; - } - } - } - if (type == L_CHOOSE_MAX || type == L_CHOOSE_MAXDIFF) { - maxval = 0; - for (k = 0; k < yfact; k++) { - lines = datas + (yfact * i + k) * wpls; - for (m = 0; m < xfact; m++) { - val = GET_DATA_BYTE(lines, xfact * j + m); - if (val > maxval) - maxval = val; - } - } - } - if (type == L_CHOOSE_MIN) - SET_DATA_BYTE(lined, j, minval); - else if (type == L_CHOOSE_MAX) - SET_DATA_BYTE(lined, j, maxval); - else /* type == L_CHOOSE_MAXDIFF */ - SET_DATA_BYTE(lined, j, maxval - minval); - } - } - - return pixd; -} - - -/*! - * \brief pixScaleGrayMinMax2() - * - * \param[in] pixs 8 bpp, not cmapped - * \param[in] type L_CHOOSE_MIN, L_CHOOSE_MAX, L_CHOOSE_MAXDIFF - * \return pixd 8 bpp downscaled by 2x - * - *
- * Notes:
- *      (1) Special version for 2x reduction.  The downscaled pixels
- *          in pixd are the min, max or (max - min) of the corresponding
- *          set of 4 pixels in pixs.
- *      (2) The max and min operations are a special case (for levels 1
- *          and 4) of grayscale analog to the binary rank scaling operation
- *          pixReduceRankBinary2().  Note, however, that because of
- *          the photometric definition that higher gray values are
- *          lighter, the erosion-like L_CHOOSE_MIN will darken
- *          the resulting image, corresponding to a threshold level 1
- *          in the binary case.  Likewise, L_CHOOSE_MAX will lighten
- *          the pixd, corresponding to a threshold level of 4.
- *      (3) To choose any of the four rank levels in a 2x grayscale
- *          reduction, use pixScaleGrayRank2().
- *      (4) This runs at about 70 MPix/sec/GHz of source data for
- *          erosion and dilation.
- * 
- */ -PIX * -pixScaleGrayMinMax2(PIX *pixs, - l_int32 type) -{ -l_int32 ws, hs, wd, hd, wpls, wpld, i, j, k; -l_int32 minval, maxval; -l_int32 val[4]; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixScaleGrayMinMax2"); - - if (!pixs || pixGetDepth(pixs) != 8 || pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs undefined, not 8 bpp, or cmapped", - procName, NULL); - pixGetDimensions(pixs, &ws, &hs, NULL); - if (ws < 2 || hs < 2) - return (PIX *)ERROR_PTR("too small: ws < 2 or hs < 2", procName, NULL); - if (type != L_CHOOSE_MIN && type != L_CHOOSE_MAX && - type != L_CHOOSE_MAXDIFF) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - - wd = ws / 2; - hd = hs / 2; - if ((pixd = pixCreate(wd, hd, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyInputFormat(pixd, pixs); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - for (i = 0; i < hd; i++) { - lines = datas + 2 * i * wpls; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - val[0] = GET_DATA_BYTE(lines, 2 * j); - val[1] = GET_DATA_BYTE(lines, 2 * j + 1); - val[2] = GET_DATA_BYTE(lines + wpls, 2 * j); - val[3] = GET_DATA_BYTE(lines + wpls, 2 * j + 1); - if (type == L_CHOOSE_MIN || type == L_CHOOSE_MAXDIFF) { - minval = 255; - for (k = 0; k < 4; k++) { - if (val[k] < minval) - minval = val[k]; - } - } - if (type == L_CHOOSE_MAX || type == L_CHOOSE_MAXDIFF) { - maxval = 0; - for (k = 0; k < 4; k++) { - if (val[k] > maxval) - maxval = val[k]; - } - } - if (type == L_CHOOSE_MIN) - SET_DATA_BYTE(lined, j, minval); - else if (type == L_CHOOSE_MAX) - SET_DATA_BYTE(lined, j, maxval); - else /* type == L_CHOOSE_MAXDIFF */ - SET_DATA_BYTE(lined, j, maxval - minval); - } - } - - return pixd; -} - - -/*-----------------------------------------------------------------------* - * Grayscale downscaling using rank value * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixScaleGrayRankCascade() - * - * \param[in] pixs 8 bpp, not cmapped - * \param[in] level1, level2 ... - * \param[in] level3, level4 rank thresholds, in set {0, 1, 2, 3, 4} - * \return pixd 8 bpp, downscaled by up to 16x - * - *
- * Notes:
- *      (1) This performs up to four cascaded 2x rank reductions.
- *      (2) Use level = 0 to truncate the cascade.
- * 
- */ -PIX * -pixScaleGrayRankCascade(PIX *pixs, - l_int32 level1, - l_int32 level2, - l_int32 level3, - l_int32 level4) -{ -PIX *pixt1, *pixt2, *pixt3, *pixt4; - - PROCNAME("pixScaleGrayRankCascade"); - - if (!pixs || pixGetDepth(pixs) != 8 || pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs undefined, not 8 bpp, or cmapped", - procName, NULL); - if (level1 > 4 || level2 > 4 || level3 > 4 || level4 > 4) - return (PIX *)ERROR_PTR("levels must not exceed 4", procName, NULL); - - if (level1 <= 0) { - L_WARNING("no reduction because level1 not > 0\n", procName); - return pixCopy(NULL, pixs); - } - - pixt1 = pixScaleGrayRank2(pixs, level1); - if (level2 <= 0) - return pixt1; - - pixt2 = pixScaleGrayRank2(pixt1, level2); - pixDestroy(&pixt1); - if (level3 <= 0) - return pixt2; - - pixt3 = pixScaleGrayRank2(pixt2, level3); - pixDestroy(&pixt2); - if (level4 <= 0) - return pixt3; - - pixt4 = pixScaleGrayRank2(pixt3, level4); - pixDestroy(&pixt3); - return pixt4; -} - - -/*! - * \brief pixScaleGrayRank2() - * - * \param[in] pixs 8 bpp, no cmap - * \param[in] rank 1 (darkest), 2, 3, 4 (lightest) - * \return pixd 8 bpp, downscaled by 2x - * - *
- * Notes:
- *      (1) Rank 2x reduction.  If rank == 1(4), the downscaled pixels
- *          in pixd are the min(max) of the corresponding set of
- *          4 pixels in pixs.  Values 2 and 3 are intermediate.
- *      (2) This is the grayscale analog to the binary rank scaling operation
- *          pixReduceRankBinary2().  Here, because of the photometric
- *          definition that higher gray values are lighter, rank 1 gives
- *          the darkest pixel, whereas rank 4 gives the lightest pixel.
- *          This is opposite to the binary rank operation.
- *      (3) For rank = 1 and 4, this calls pixScaleGrayMinMax2(),
- *          which runs at about 70 MPix/sec/GHz of source data.
- *          For rank 2 and 3, this runs 3x slower, at about 25 MPix/sec/GHz.
- * 
- */ -PIX * -pixScaleGrayRank2(PIX *pixs, - l_int32 rank) -{ -l_int32 ws, hs, wd, hd, wpls, wpld, i, j, k, m; -l_int32 minval, maxval, rankval, minindex, maxindex; -l_int32 val[4]; -l_int32 midval[4]; /* should only use 2 of these */ -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixScaleGrayRank2"); - - if (!pixs || pixGetDepth(pixs) != 8 || pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs undefined, not 8 bpp, or cmapped", - procName, NULL); - if (rank < 1 || rank > 4) - return (PIX *)ERROR_PTR("invalid rank", procName, NULL); - - if (rank == 1) - return pixScaleGrayMinMax2(pixs, L_CHOOSE_MIN); - if (rank == 4) - return pixScaleGrayMinMax2(pixs, L_CHOOSE_MAX); - - pixGetDimensions(pixs, &ws, &hs, NULL); - wd = ws / 2; - hd = hs / 2; - if ((pixd = pixCreate(wd, hd, 8)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixCopyInputFormat(pixd, pixs); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - for (i = 0; i < hd; i++) { - lines = datas + 2 * i * wpls; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - val[0] = GET_DATA_BYTE(lines, 2 * j); - val[1] = GET_DATA_BYTE(lines, 2 * j + 1); - val[2] = GET_DATA_BYTE(lines + wpls, 2 * j); - val[3] = GET_DATA_BYTE(lines + wpls, 2 * j + 1); - minval = maxval = val[0]; - minindex = maxindex = 0; - for (k = 1; k < 4; k++) { - if (val[k] < minval) { - minval = val[k]; - minindex = k; - continue; - } - if (val[k] > maxval) { - maxval = val[k]; - maxindex = k; - } - } - for (k = 0, m = 0; k < 4; k++) { - if (k == minindex || k == maxindex) - continue; - midval[m++] = val[k]; - } - if (m > 2) /* minval == maxval; all val[k] are the same */ - rankval = minval; - else if (rank == 2) - rankval = L_MIN(midval[0], midval[1]); - else /* rank == 3 */ - rankval = L_MAX(midval[0], midval[1]); - SET_DATA_BYTE(lined, j, rankval); - } - } - - return pixd; -} - - -/*------------------------------------------------------------------------* - * Helper function for transferring alpha with scaling * - *------------------------------------------------------------------------*/ -/*! - * \brief pixScaleAndTransferAlpha() - * - * \param[in] pixd 32 bpp, scaled image - * \param[in] pixs 32 bpp, original unscaled image - * \param[in] scalex must be > 0.0 - * \param[in] scaley must be > 0.0 - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This scales the alpha component of pixs and inserts into pixd.
- * 
- */ -l_ok -pixScaleAndTransferAlpha(PIX *pixd, - PIX *pixs, - l_float32 scalex, - l_float32 scaley) -{ -PIX *pix1, *pix2; - - PROCNAME("pixScaleAndTransferAlpha"); - - if (!pixs || !pixd) - return ERROR_INT("pixs and pixd not both defined", procName, 1); - if (pixGetDepth(pixs) != 32 || pixGetSpp(pixs) != 4) - return ERROR_INT("pixs not 32 bpp and 4 spp", procName, 1); - if (pixGetDepth(pixd) != 32) - return ERROR_INT("pixd not 32 bpp", procName, 1); - - if (scalex == 1.0 && scaley == 1.0) { - pixCopyRGBComponent(pixd, pixs, L_ALPHA_CHANNEL); - return 0; - } - - pix1 = pixGetRGBComponent(pixs, L_ALPHA_CHANNEL); - pix2 = pixScale(pix1, scalex, scaley); - pixSetRGBComponent(pixd, pix2, L_ALPHA_CHANNEL); - pixDestroy(&pix1); - pixDestroy(&pix2); - return 0; -} - - -/*------------------------------------------------------------------------* - * RGB scaling including alpha (blend) component and gamma transform * - *------------------------------------------------------------------------*/ -/*! - * \brief pixScaleWithAlpha() - * - * \param[in] pixs 32 bpp rgb or cmapped - * \param[in] scalex must be > 0.0 - * \param[in] scaley must be > 0.0 - * \param[in] pixg [optional] 8 bpp, can be null - * \param[in] fract between 0.0 and 1.0, with 0.0 fully transparent - * and 1.0 fully opaque - * \return pixd 32 bpp rgba, or NULL on error - * - *
- * Notes:
- *      (1) The alpha channel is transformed separately from pixs,
- *          and aligns with it, being fully transparent outside the
- *          boundary of the transformed pixs.  For pixels that are fully
- *          transparent, a blending function like pixBlendWithGrayMask()
- *          will give zero weight to corresponding pixels in pixs.
- *      (2) Scaling is done with area mapping or linear interpolation,
- *          depending on the scale factors.  Default sharpening is done.
- *      (3) If pixg is NULL, it is generated as an alpha layer that is
- *          partially opaque, using %fract.  Otherwise, it is cropped
- *          to pixs if required, and %fract is ignored.  The alpha
- *          channel in pixs is never used.
- *      (4) Colormaps are removed to 32 bpp.
- *      (5) The default setting for the border values in the alpha channel
- *          is 0 (transparent) for the outermost ring of pixels and
- *          (0.5 * fract * 255) for the second ring.  When blended over
- *          a second image, this
- *          (a) shrinks the visible image to make a clean overlap edge
- *              with an image below, and
- *          (b) softens the edges by weakening the aliasing there.
- *          Use l_setAlphaMaskBorder() to change these values.
- *      (6) A subtle use of gamma correction is to remove gamma correction
- *          before scaling and restore it afterwards.  This is done
- *          by sandwiching this function between a gamma/inverse-gamma
- *          photometric transform:
- *              pixt = pixGammaTRCWithAlpha(NULL, pixs, 1.0 / gamma, 0, 255);
- *              pixd = pixScaleWithAlpha(pixt, scalex, scaley, NULL, fract);
- *              pixGammaTRCWithAlpha(pixd, pixd, gamma, 0, 255);
- *              pixDestroy(&pixt);
- *          This has the side-effect of producing artifacts in the very
- *          dark regions.
- * 
- */ -PIX * -pixScaleWithAlpha(PIX *pixs, - l_float32 scalex, - l_float32 scaley, - PIX *pixg, - l_float32 fract) -{ -l_int32 ws, hs, d, spp; -PIX *pixd, *pix32, *pixg2, *pixgs; - - PROCNAME("pixScaleWithAlpha"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &ws, &hs, &d); - if (d != 32 && !pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs not cmapped or 32 bpp", procName, NULL); - if (scalex <= 0.0 || scaley <= 0.0) - return (PIX *)ERROR_PTR("scale factor <= 0.0", procName, NULL); - if (pixg && pixGetDepth(pixg) != 8) { - L_WARNING("pixg not 8 bpp; using 'fract' transparent alpha\n", - procName); - pixg = NULL; - } - if (!pixg && (fract < 0.0 || fract > 1.0)) { - L_WARNING("invalid fract; using fully opaque\n", procName); - fract = 1.0; - } - if (!pixg && fract == 0.0) - L_WARNING("transparent alpha; image will not be blended\n", procName); - - /* Make sure input to scaling is 32 bpp rgb, and scale it */ - if (d != 32) - pix32 = pixConvertTo32(pixs); - else - pix32 = pixClone(pixs); - spp = pixGetSpp(pix32); - pixSetSpp(pix32, 3); /* ignore the alpha channel for scaling */ - pixd = pixScale(pix32, scalex, scaley); - pixSetSpp(pix32, spp); /* restore initial value in case it's a clone */ - pixDestroy(&pix32); - - /* Set up alpha layer with a fading border and scale it */ - if (!pixg) { - pixg2 = pixCreate(ws, hs, 8); - if (fract == 1.0) - pixSetAll(pixg2); - else if (fract > 0.0) - pixSetAllArbitrary(pixg2, (l_int32)(255.0 * fract)); - } else { - pixg2 = pixResizeToMatch(pixg, NULL, ws, hs); - } - if (ws > 10 && hs > 10) { /* see note 4 */ - pixSetBorderRingVal(pixg2, 1, - (l_int32)(255.0 * fract * AlphaMaskBorderVals[0])); - pixSetBorderRingVal(pixg2, 2, - (l_int32)(255.0 * fract * AlphaMaskBorderVals[1])); - } - pixgs = pixScaleGeneral(pixg2, scalex, scaley, 0.0, 0); - - /* Combine into a 4 spp result */ - pixSetRGBComponent(pixd, pixgs, L_ALPHA_CHANNEL); - pixCopyInputFormat(pixd, pixs); - - pixDestroy(&pixg2); - pixDestroy(&pixgs); - return pixd; -} - - -/* ================================================================ * - * Low level static functions * - * ================================================================ */ - -/*------------------------------------------------------------------* - * Scale-to-gray 2x * - *------------------------------------------------------------------*/ -/*! - * \brief scaleToGray2Low() - * - * \param[in] datad dest data - * \param[in] wd, hd dest width, height - * \param[in] wpld dest words/line - * \param[in] datas src data - * \param[in] wpls src words/line - * \param[in] sumtab made from makeSumTabSG2() - * \param[in] valtab made from makeValTabSG2() - * \return 0 if OK; 1 on error. - * - *
- * Notes:
- *      (1) The output is processed in sets of 4 output bytes on a row,
- *          corresponding to 4 2x2 bit-blocks in the input image.
- *          Two lookup tables are used.  The first, sumtab, gets the
- *          sum of ON pixels in 4 sets of two adjacent bits,
- *          storing the result in 4 adjacent bytes.  After sums from
- *          two rows have been added, the second table, valtab,
- *          converts from the sum of ON pixels in the 2x2 block to
- *          an 8 bpp grayscale value between 0 for 4 bits ON
- *          and 255 for 0 bits ON.
- * 
- */ -static void -scaleToGray2Low(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_uint32 *sumtab, - l_uint8 *valtab) -{ -l_int32 i, j, l, k, m, wd4, extra; -l_uint32 sbyte1, sbyte2, sum; -l_uint32 *lines, *lined; - - /* i indexes the dest lines - * l indexes the source lines - * j indexes the dest bytes - * k indexes the source bytes - * We take two bytes from the source (in 2 lines of 8 pixels - * each) and convert them into four 8 bpp bytes of the dest. */ - wd4 = wd & 0xfffffffc; - extra = wd - wd4; - for (i = 0, l = 0; i < hd; i++, l += 2) { - lines = datas + l * wpls; - lined = datad + i * wpld; - for (j = 0, k = 0; j < wd4; j += 4, k++) { - sbyte1 = GET_DATA_BYTE(lines, k); - sbyte2 = GET_DATA_BYTE(lines + wpls, k); - sum = sumtab[sbyte1] + sumtab[sbyte2]; - SET_DATA_BYTE(lined, j, valtab[sum >> 24]); - SET_DATA_BYTE(lined, j + 1, valtab[(sum >> 16) & 0xff]); - SET_DATA_BYTE(lined, j + 2, valtab[(sum >> 8) & 0xff]); - SET_DATA_BYTE(lined, j + 3, valtab[sum & 0xff]); - } - if (extra > 0) { - sbyte1 = GET_DATA_BYTE(lines, k); - sbyte2 = GET_DATA_BYTE(lines + wpls, k); - sum = sumtab[sbyte1] + sumtab[sbyte2]; - for (m = 0; m < extra; m++) { - SET_DATA_BYTE(lined, j + m, - valtab[((sum >> (24 - 8 * m)) & 0xff)]); - } - } - - } - - return; -} - - -/*! - * \brief makeSumTabSG2() - * - *
- * Notes:
- *      (1) Returns a table of 256 l_uint32s, giving the four output
- *          8-bit grayscale sums corresponding to 8 input bits of a binary
- *          image, for a 2x scale-to-gray op.  The sums from two
- *          adjacent scanlines are then added and transformed to
- *          output four 8 bpp pixel values, using makeValTabSG2().
- * 
- */ -static l_uint32 * -makeSumTabSG2(void) -{ -l_int32 i; -l_int32 sum[] = {0, 1, 1, 2}; -l_uint32 *tab; - - PROCNAME("makeSumTabSG2"); - - if ((tab = (l_uint32 *)LEPT_CALLOC(256, sizeof(l_uint32))) == NULL) - return (l_uint32 *)ERROR_PTR("tab not made", procName, NULL); - - /* Pack the four sums separately in four bytes */ - for (i = 0; i < 256; i++) { - tab[i] = (sum[i & 0x3] | sum[(i >> 2) & 0x3] << 8 | - sum[(i >> 4) & 0x3] << 16 | sum[(i >> 6) & 0x3] << 24); - } - return tab; -} - - -/*! - * \brief makeValTabSG2() - * - *
- * Notes:
- *      (1) Returns an 8 bit value for the sum of ON pixels
- *          in a 2x2 square, according to
- *               val = 255 - (255 * sum)/4
- *          where sum is in set {0,1,2,3,4}
- * 
- */ -static l_uint8 * -makeValTabSG2(void) -{ -l_int32 i; -l_uint8 *tab; - - PROCNAME("makeValTabSG2"); - - if ((tab = (l_uint8 *)LEPT_CALLOC(5, sizeof(l_uint8))) == NULL) - return (l_uint8 *)ERROR_PTR("tab not made", procName, NULL); - for (i = 0; i < 5; i++) - tab[i] = 255 - (i * 255) / 4; - return tab; -} - - -/*------------------------------------------------------------------* - * Scale-to-gray 3x * - *------------------------------------------------------------------*/ -/*! - * \brief scaleToGray3Low() - * - * \param[in] datad dest data - * \param[in] wd, hd dest width, height - * \param[in] wpld dest words/line - * \param[in] datas src data - * \param[in] wpls src words/line - * \param[in] sumtab made from makeSumTabSG3() - * \param[in] valtab made from makeValTabSG3() - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Each set of 8 3x3 bit-blocks in the source image, which
- *          consist of 72 pixels arranged 24 pixels wide by 3 scanlines,
- *          is converted to a row of 8 8-bit pixels in the dest image.
- *          These 72 pixels of the input image are runs of 24 pixels
- *          in three adjacent scanlines.  Each run of 24 pixels is
- *          stored in the 24 LSbits of a 32-bit word.  We use 2 LUTs.
- *          The first, sumtab, takes 6 of these bits and stores
- *          sum, taken 3 bits at a time, in two bytes.  (See
- *          makeSumTabSG3).  This is done for each of the 3 scanlines,
- *          and the results are added.  We now have the sum of ON pixels
- *          in the first two 3x3 blocks in two bytes.  The valtab LUT
- *          then converts these values (which go from 0 to 9) to
- *          grayscale values between between 255 and 0.  (See makeValTabSG3).
- *          This process is repeated for each of the other 3 sets of
- *          6x3 input pixels, giving 8 output pixels in total.
- *      (2) Note: because the input image is processed in groups of
- *           24 x 3 pixels, the process clips the input height to
- *           (h - h % 3) and the input width to (w - w % 24).
- * 
- */ -static void -scaleToGray3Low(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_uint32 *sumtab, - l_uint8 *valtab) -{ -l_int32 i, j, l, k; -l_uint32 threebytes1, threebytes2, threebytes3, sum; -l_uint32 *lines, *lined; - - /* i indexes the dest lines - * l indexes the source lines - * j indexes the dest bytes - * k indexes the source bytes - * We take 9 bytes from the source (72 binary pixels - * in three lines of 24 pixels each) and convert it - * into 8 bytes of the dest (8 8bpp pixels in one line) */ - for (i = 0, l = 0; i < hd; i++, l += 3) { - lines = datas + l * wpls; - lined = datad + i * wpld; - for (j = 0, k = 0; j < wd; j += 8, k += 3) { - threebytes1 = (GET_DATA_BYTE(lines, k) << 16) | - (GET_DATA_BYTE(lines, k + 1) << 8) | - GET_DATA_BYTE(lines, k + 2); - threebytes2 = (GET_DATA_BYTE(lines + wpls, k) << 16) | - (GET_DATA_BYTE(lines + wpls, k + 1) << 8) | - GET_DATA_BYTE(lines + wpls, k + 2); - threebytes3 = (GET_DATA_BYTE(lines + 2 * wpls, k) << 16) | - (GET_DATA_BYTE(lines + 2 * wpls, k + 1) << 8) | - GET_DATA_BYTE(lines + 2 * wpls, k + 2); - - sum = sumtab[(threebytes1 >> 18)] + - sumtab[(threebytes2 >> 18)] + - sumtab[(threebytes3 >> 18)]; - SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 2)]); - SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]); - - sum = sumtab[((threebytes1 >> 12) & 0x3f)] + - sumtab[((threebytes2 >> 12) & 0x3f)] + - sumtab[((threebytes3 >> 12) & 0x3f)]; - SET_DATA_BYTE(lined, j + 2, valtab[GET_DATA_BYTE(&sum, 2)]); - SET_DATA_BYTE(lined, j + 3, valtab[GET_DATA_BYTE(&sum, 3)]); - - sum = sumtab[((threebytes1 >> 6) & 0x3f)] + - sumtab[((threebytes2 >> 6) & 0x3f)] + - sumtab[((threebytes3 >> 6) & 0x3f)]; - SET_DATA_BYTE(lined, j + 4, valtab[GET_DATA_BYTE(&sum, 2)]); - SET_DATA_BYTE(lined, j + 5, valtab[GET_DATA_BYTE(&sum, 3)]); - - sum = sumtab[(threebytes1 & 0x3f)] + - sumtab[(threebytes2 & 0x3f)] + - sumtab[(threebytes3 & 0x3f)]; - SET_DATA_BYTE(lined, j + 6, valtab[GET_DATA_BYTE(&sum, 2)]); - SET_DATA_BYTE(lined, j + 7, valtab[GET_DATA_BYTE(&sum, 3)]); - } - } - - return; -} - - - -/*! - * \brief makeSumTabSG3() - * - *
- * Notes:
- *      (1) Returns a table of 64 l_uint32s, giving the two output
- *          8-bit grayscale sums corresponding to 6 input bits of a binary
- *          image, for a 3x scale-to-gray op.  In practice, this would
- *          be used three times (on adjacent scanlines), and the sums would
- *          be added and then transformed to output 8 bpp pixel values,
- *          using makeValTabSG3().
- * 
- */ -static l_uint32 * -makeSumTabSG3(void) -{ -l_int32 i; -l_int32 sum[] = {0, 1, 1, 2, 1, 2, 2, 3}; -l_uint32 *tab; - - PROCNAME("makeSumTabSG3"); - - if ((tab = (l_uint32 *)LEPT_CALLOC(64, sizeof(l_uint32))) == NULL) - return (l_uint32 *)ERROR_PTR("tab not made", procName, NULL); - - /* Pack the two sums separately in two bytes */ - for (i = 0; i < 64; i++) { - tab[i] = (sum[i & 0x07]) | (sum[(i >> 3) & 0x07] << 8); - } - return tab; -} - - -/*! - * \brief makeValTabSG3() - * - *
- * Notes:
- *      (1) Returns an 8 bit value for the sum of ON pixels
- *          in a 3x3 square, according to
- *               val = 255 - (255 * sum)/9
- *          where sum is in set {0, ... ,9}
- * 
- */ -static l_uint8 * -makeValTabSG3(void) -{ -l_int32 i; -l_uint8 *tab; - - PROCNAME("makeValTabSG3"); - - if ((tab = (l_uint8 *)LEPT_CALLOC(10, sizeof(l_uint8))) == NULL) - return (l_uint8 *)ERROR_PTR("tab not made", procName, NULL); - for (i = 0; i < 10; i++) - tab[i] = 0xff - (i * 255) / 9; - return tab; -} - - -/*------------------------------------------------------------------* - * Scale-to-gray 4x * - *------------------------------------------------------------------*/ -/*! - * \brief scaleToGray4Low() - * - * \param[in] datad dest data - * \param[in] wd, hd dest width, height - * \param[in] wpld dest words/line - * \param[in] datas src data - * \param[in] wpls src words/line - * \param[in] sumtab made from makeSumTabSG4() - * \param[in] valtab made from makeValTabSG4() - * \return 0 if OK; 1 on error. - * - *
- * Notes:
- *      (1) The output is processed in sets of 2 output bytes on a row,
- *          corresponding to 2 4x4 bit-blocks in the input image.
- *          Two lookup tables are used.  The first, sumtab, gets the
- *          sum of ON pixels in two sets of four adjacent bits,
- *          storing the result in 2 adjacent bytes.  After sums from
- *          four rows have been added, the second table, valtab,
- *          converts from the sum of ON pixels in the 4x4 block to
- *          an 8 bpp grayscale value between 0 for 16 bits ON
- *          and 255 for 0 bits ON.
- * 
- */ -static void -scaleToGray4Low(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_uint32 *sumtab, - l_uint8 *valtab) -{ -l_int32 i, j, l, k; -l_uint32 sbyte1, sbyte2, sbyte3, sbyte4, sum; -l_uint32 *lines, *lined; - - /* i indexes the dest lines - * l indexes the source lines - * j indexes the dest bytes - * k indexes the source bytes - * We take four bytes from the source (in 4 lines of 8 pixels - * each) and convert it into two 8 bpp bytes of the dest. */ - for (i = 0, l = 0; i < hd; i++, l += 4) { - lines = datas + l * wpls; - lined = datad + i * wpld; - for (j = 0, k = 0; j < wd; j += 2, k++) { - sbyte1 = GET_DATA_BYTE(lines, k); - sbyte2 = GET_DATA_BYTE(lines + wpls, k); - sbyte3 = GET_DATA_BYTE(lines + 2 * wpls, k); - sbyte4 = GET_DATA_BYTE(lines + 3 * wpls, k); - sum = sumtab[sbyte1] + sumtab[sbyte2] + - sumtab[sbyte3] + sumtab[sbyte4]; - SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 2)]); - SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]); - } - } - - return; -} - - -/*! - * \brief makeSumTabSG4() - * - *
- * Notes:
- *      (1) Returns a table of 256 l_uint32s, giving the two output
- *          8-bit grayscale sums corresponding to 8 input bits of a
- *          binary image, for a 4x scale-to-gray op.  The sums from
- *          four adjacent scanlines are then added and transformed to
- *          output 8 bpp pixel values, using makeValTabSG4().
- * 
- */ -static l_uint32 * -makeSumTabSG4(void) -{ -l_int32 i; -l_int32 sum[] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; -l_uint32 *tab; - - PROCNAME("makeSumTabSG4"); - - if ((tab = (l_uint32 *)LEPT_CALLOC(256, sizeof(l_uint32))) == NULL) - return (l_uint32 *)ERROR_PTR("tab not made", procName, NULL); - - /* Pack the two sums separately in two bytes */ - for (i = 0; i < 256; i++) { - tab[i] = (sum[i & 0xf]) | (sum[(i >> 4) & 0xf] << 8); - } - return tab; -} - - -/*! - * \brief makeValTabSG4() - * - *
- * Notes:
- *      (1) Returns an 8 bit value for the sum of ON pixels
- *          in a 4x4 square, according to
- *              val = 255 - (255 * sum)/16
- *          where sum is in set {0, ... ,16}
- * 
- */ -static l_uint8 * -makeValTabSG4(void) -{ -l_int32 i; -l_uint8 *tab; - - PROCNAME("makeValTabSG4"); - - if ((tab = (l_uint8 *)LEPT_CALLOC(17, sizeof(l_uint8))) == NULL) - return (l_uint8 *)ERROR_PTR("tab not made", procName, NULL); - for (i = 0; i < 17; i++) - tab[i] = 0xff - (i * 255) / 16; - return tab; -} - - -/*------------------------------------------------------------------* - * Scale-to-gray 6x * - *------------------------------------------------------------------*/ -/*! - * \brief scaleToGray6Low() - * - * \param[in] datad dest data - * \param[in] wd, hd dest width, height - * \param[in] wpld dest words/line - * \param[in] datas src data - * \param[in] wpls src words/line - * \param[in] tab8 made from makePixelSumTab8() - * \param[in] valtab made from makeValTabSG6() - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Each set of 4 6x6 bit-blocks in the source image, which
- *          consist of 144 pixels arranged 24 pixels wide by 6 scanlines,
- *          is converted to a row of 4 8-bit pixels in the dest image.
- *          These 144 pixels of the input image are runs of 24 pixels
- *          in six adjacent scanlines.  Each run of 24 pixels is
- *          stored in the 24 LSbits of a 32-bit word.  We use 2 LUTs.
- *          The first, tab8, takes 6 of these bits and stores
- *          sum in one byte.  This is done for each of the 6 scanlines,
- *          and the results are added.
- *          We now have the sum of ON pixels in the first 6x6 block.  The
- *          valtab LUT then converts these values (which go from 0 to 36) to
- *          grayscale values between between 255 and 0.  (See makeValTabSG6).
- *          This process is repeated for each of the other 3 sets of
- *          6x6 input pixels, giving 4 output pixels in total.
- *      (2) Note: because the input image is processed in groups of
- *          24 x 6 pixels, the process clips the input height to
- *          (h - h % 6) and the input width to (w - w % 24).
- * 
- */ -static void -scaleToGray6Low(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_int32 *tab8, - l_uint8 *valtab) -{ -l_int32 i, j, l, k; -l_uint32 threebytes1, threebytes2, threebytes3; -l_uint32 threebytes4, threebytes5, threebytes6, sum; -l_uint32 *lines, *lined; - - /* i indexes the dest lines - * l indexes the source lines - * j indexes the dest bytes - * k indexes the source bytes - * We take 18 bytes from the source (144 binary pixels - * in six lines of 24 pixels each) and convert it - * into 4 bytes of the dest (four 8 bpp pixels in one line) */ - for (i = 0, l = 0; i < hd; i++, l += 6) { - lines = datas + l * wpls; - lined = datad + i * wpld; - for (j = 0, k = 0; j < wd; j += 4, k += 3) { - /* First grab the 18 bytes, 3 at a time, and put each set - * of 3 bytes into the LS bytes of a 32-bit word. */ - threebytes1 = (GET_DATA_BYTE(lines, k) << 16) | - (GET_DATA_BYTE(lines, k + 1) << 8) | - GET_DATA_BYTE(lines, k + 2); - threebytes2 = (GET_DATA_BYTE(lines + wpls, k) << 16) | - (GET_DATA_BYTE(lines + wpls, k + 1) << 8) | - GET_DATA_BYTE(lines + wpls, k + 2); - threebytes3 = (GET_DATA_BYTE(lines + 2 * wpls, k) << 16) | - (GET_DATA_BYTE(lines + 2 * wpls, k + 1) << 8) | - GET_DATA_BYTE(lines + 2 * wpls, k + 2); - threebytes4 = (GET_DATA_BYTE(lines + 3 * wpls, k) << 16) | - (GET_DATA_BYTE(lines + 3 * wpls, k + 1) << 8) | - GET_DATA_BYTE(lines + 3 * wpls, k + 2); - threebytes5 = (GET_DATA_BYTE(lines + 4 * wpls, k) << 16) | - (GET_DATA_BYTE(lines + 4 * wpls, k + 1) << 8) | - GET_DATA_BYTE(lines + 4 * wpls, k + 2); - threebytes6 = (GET_DATA_BYTE(lines + 5 * wpls, k) << 16) | - (GET_DATA_BYTE(lines + 5 * wpls, k + 1) << 8) | - GET_DATA_BYTE(lines + 5 * wpls, k + 2); - - /* Sum first set of 36 bits and convert to 0-255 */ - sum = tab8[(threebytes1 >> 18)] + - tab8[(threebytes2 >> 18)] + - tab8[(threebytes3 >> 18)] + - tab8[(threebytes4 >> 18)] + - tab8[(threebytes5 >> 18)] + - tab8[(threebytes6 >> 18)]; - SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 3)]); - - /* Ditto for second set */ - sum = tab8[((threebytes1 >> 12) & 0x3f)] + - tab8[((threebytes2 >> 12) & 0x3f)] + - tab8[((threebytes3 >> 12) & 0x3f)] + - tab8[((threebytes4 >> 12) & 0x3f)] + - tab8[((threebytes5 >> 12) & 0x3f)] + - tab8[((threebytes6 >> 12) & 0x3f)]; - SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]); - - sum = tab8[((threebytes1 >> 6) & 0x3f)] + - tab8[((threebytes2 >> 6) & 0x3f)] + - tab8[((threebytes3 >> 6) & 0x3f)] + - tab8[((threebytes4 >> 6) & 0x3f)] + - tab8[((threebytes5 >> 6) & 0x3f)] + - tab8[((threebytes6 >> 6) & 0x3f)]; - SET_DATA_BYTE(lined, j + 2, valtab[GET_DATA_BYTE(&sum, 3)]); - - sum = tab8[(threebytes1 & 0x3f)] + - tab8[(threebytes2 & 0x3f)] + - tab8[(threebytes3 & 0x3f)] + - tab8[(threebytes4 & 0x3f)] + - tab8[(threebytes5 & 0x3f)] + - tab8[(threebytes6 & 0x3f)]; - SET_DATA_BYTE(lined, j + 3, valtab[GET_DATA_BYTE(&sum, 3)]); - } - } - return; -} - - -/*! - * \brief makeValTabSG6() - * - *
- * Notes:
- *      (1) Returns an 8 bit value for the sum of ON pixels
- *          in a 6x6 square, according to
- *              val = 255 - (255 * sum)/36
- *          where sum is in set {0, ... ,36}
- * 
- */ -static l_uint8 * -makeValTabSG6(void) -{ -l_int32 i; -l_uint8 *tab; - - PROCNAME("makeValTabSG6"); - - if ((tab = (l_uint8 *)LEPT_CALLOC(37, sizeof(l_uint8))) == NULL) - return (l_uint8 *)ERROR_PTR("tab not made", procName, NULL); - for (i = 0; i < 37; i++) - tab[i] = 0xff - (i * 255) / 36; - return tab; -} - - -/*------------------------------------------------------------------* - * Scale-to-gray 8x * - *------------------------------------------------------------------*/ -/*! - * \brief scaleToGray8Low() - * - * \param[in] datad dest data - * \param[in] wd, hd dest width, height - * \param[in] wpld dest words/line - * \param[in] datas src data - * \param[in] wpls src words/line - * \param[in] tab8 made from makePixelSumTab8() - * \param[in] valtab made from makeValTabSG8() - * \return 0 if OK; 1 on error. - * - *
- * Notes:
- *      (1) The output is processed one dest byte at a time,
- *          corresponding to 8 rows of src bytes in the input image.
- *          Two lookup tables are used.  The first, %tab8, gets the
- *          sum of ON pixels in a byte.  After sums from 8 rows have
- *          been added, the second table, %valtab, converts from this
- *          value which is between 0 and 64 to an 8 bpp grayscale
- *          value between 0 and 255: 0 for all 64 bits ON and 255
- *          for all 64 bits OFF.
- * 
- */ -static void -scaleToGray8Low(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_int32 *tab8, - l_uint8 *valtab) -{ -l_int32 i, j, k; -l_int32 sbyte0, sbyte1, sbyte2, sbyte3, sbyte4, sbyte5, sbyte6, sbyte7, sum; -l_uint32 *lines, *lined; - - /* i indexes the dest lines - * k indexes the source lines - * j indexes the src and dest bytes - * We take 8 bytes from the source (in 8 lines of 8 pixels - * each) and convert it into one 8 bpp byte of the dest. */ - for (i = 0, k = 0; i < hd; i++, k += 8) { - lines = datas + k * wpls; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - sbyte0 = GET_DATA_BYTE(lines, j); - sbyte1 = GET_DATA_BYTE(lines + wpls, j); - sbyte2 = GET_DATA_BYTE(lines + 2 * wpls, j); - sbyte3 = GET_DATA_BYTE(lines + 3 * wpls, j); - sbyte4 = GET_DATA_BYTE(lines + 4 * wpls, j); - sbyte5 = GET_DATA_BYTE(lines + 5 * wpls, j); - sbyte6 = GET_DATA_BYTE(lines + 6 * wpls, j); - sbyte7 = GET_DATA_BYTE(lines + 7 * wpls, j); - sum = tab8[sbyte0] + tab8[sbyte1] + - tab8[sbyte2] + tab8[sbyte3] + - tab8[sbyte4] + tab8[sbyte5] + - tab8[sbyte6] + tab8[sbyte7]; - SET_DATA_BYTE(lined, j, valtab[sum]); - } - } - - return; -} - - -/*! - * \brief makeValTabSG8() - * - *
- * Notes:
- *      (1) Returns an 8 bit value for the sum of ON pixels
- *          in an 8x8 square, according to
- *              val = 255 - (255 * sum)/64
- *          where sum is in set {0, ... ,64}
- * 
- */ -static l_uint8 * -makeValTabSG8(void) -{ -l_int32 i; -l_uint8 *tab; - - PROCNAME("makeValTabSG8"); - - if ((tab = (l_uint8 *)LEPT_CALLOC(65, sizeof(l_uint8))) == NULL) - return (l_uint8 *)ERROR_PTR("tab not made", procName, NULL); - for (i = 0; i < 65; i++) - tab[i] = 0xff - (i * 255) / 64; - return tab; -} - - -/*------------------------------------------------------------------* - * Scale-to-gray 16x * - *------------------------------------------------------------------*/ -/*! - * \brief scaleToGray16Low() - * - * \param[in] datad dest data - * \param[in] wd, hd dest width, height - * \param[in] wpld dest words/line - * \param[in] datas src data - * \param[in] wpls src words/line - * \param[in] tab8 made from makePixelSumTab8() - * \return 0 if OK; 1 on error. - * - *
- * Notes:
- *      (1) The output is processed one dest byte at a time, corresponding
- *          to 16 rows consisting each of 2 src bytes in the input image.
- *          This uses one lookup table, tab8, which gives the sum of
- *          ON pixels in a byte.  After summing for all ON pixels in the
- *          32 src bytes, which is between 0 and 256, this is converted
- *          to an 8 bpp grayscale value between 0 for 255 or 256 bits ON
- *          and 255 for 0 bits ON.
- * 
- */ -static void -scaleToGray16Low(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas, - l_int32 wpls, - l_int32 *tab8) -{ -l_int32 i, j, k, m; -l_int32 sum; -l_uint32 *lines, *lined; - - /* i indexes the dest lines - * k indexes the source lines - * j indexes the dest bytes - * m indexes the src bytes - * We take 32 bytes from the source (in 16 lines of 16 pixels - * each) and convert it into one 8 bpp byte of the dest. */ - for (i = 0, k = 0; i < hd; i++, k += 16) { - lines = datas + k * wpls; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - m = 2 * j; - sum = tab8[GET_DATA_BYTE(lines, m)]; - sum += tab8[GET_DATA_BYTE(lines, m + 1)]; - sum += tab8[GET_DATA_BYTE(lines + wpls, m)]; - sum += tab8[GET_DATA_BYTE(lines + wpls, m + 1)]; - sum += tab8[GET_DATA_BYTE(lines + 2 * wpls, m)]; - sum += tab8[GET_DATA_BYTE(lines + 2 * wpls, m + 1)]; - sum += tab8[GET_DATA_BYTE(lines + 3 * wpls, m)]; - sum += tab8[GET_DATA_BYTE(lines + 3 * wpls, m + 1)]; - sum += tab8[GET_DATA_BYTE(lines + 4 * wpls, m)]; - sum += tab8[GET_DATA_BYTE(lines + 4 * wpls, m + 1)]; - sum += tab8[GET_DATA_BYTE(lines + 5 * wpls, m)]; - sum += tab8[GET_DATA_BYTE(lines + 5 * wpls, m + 1)]; - sum += tab8[GET_DATA_BYTE(lines + 6 * wpls, m)]; - sum += tab8[GET_DATA_BYTE(lines + 6 * wpls, m + 1)]; - sum += tab8[GET_DATA_BYTE(lines + 7 * wpls, m)]; - sum += tab8[GET_DATA_BYTE(lines + 7 * wpls, m + 1)]; - sum += tab8[GET_DATA_BYTE(lines + 8 * wpls, m)]; - sum += tab8[GET_DATA_BYTE(lines + 8 * wpls, m + 1)]; - sum += tab8[GET_DATA_BYTE(lines + 9 * wpls, m)]; - sum += tab8[GET_DATA_BYTE(lines + 9 * wpls, m + 1)]; - sum += tab8[GET_DATA_BYTE(lines + 10 * wpls, m)]; - sum += tab8[GET_DATA_BYTE(lines + 10 * wpls, m + 1)]; - sum += tab8[GET_DATA_BYTE(lines + 11 * wpls, m)]; - sum += tab8[GET_DATA_BYTE(lines + 11 * wpls, m + 1)]; - sum += tab8[GET_DATA_BYTE(lines + 12 * wpls, m)]; - sum += tab8[GET_DATA_BYTE(lines + 12 * wpls, m + 1)]; - sum += tab8[GET_DATA_BYTE(lines + 13 * wpls, m)]; - sum += tab8[GET_DATA_BYTE(lines + 13 * wpls, m + 1)]; - sum += tab8[GET_DATA_BYTE(lines + 14 * wpls, m)]; - sum += tab8[GET_DATA_BYTE(lines + 14 * wpls, m + 1)]; - sum += tab8[GET_DATA_BYTE(lines + 15 * wpls, m)]; - sum += tab8[GET_DATA_BYTE(lines + 15 * wpls, m + 1)]; - sum = L_MIN(sum, 255); - SET_DATA_BYTE(lined, j, 255 - sum); - } - } - - return; -} - - - -/*------------------------------------------------------------------* - * Grayscale mipmap * - *------------------------------------------------------------------*/ -/*! - * \brief scaleMipmapLow() - * - *
- * Notes:
- *      (1) See notes in scale.c for pixScaleToGrayMipmap().  This function
- *          is here for pedagogical reasons.  It gives poor results on document
- *          images because of aliasing.
- * 
- */ -static l_int32 -scaleMipmapLow(l_uint32 *datad, - l_int32 wd, - l_int32 hd, - l_int32 wpld, - l_uint32 *datas1, - l_int32 wpls1, - l_uint32 *datas2, - l_int32 wpls2, - l_float32 red) -{ -l_int32 i, j, val1, val2, val, row2, col2; -l_int32 *srow, *scol; -l_uint32 *lines1, *lines2, *lined; -l_float32 ratio, w1, w2; - - PROCNAME("scaleMipmapLow"); - - /* Clear dest */ - memset(datad, 0, 4LL * wpld * hd); - - /* Each dest pixel at (j,i) is computed by interpolating - between the two src images at the corresponding location. - We store the UL corner locations of the square of - src pixels in thelower-resolution image that correspond - to dest pixel (j,i). The are labeled by the arrays - srow[i], scol[j]. The UL corner locations of the higher - resolution src pixels are obtained from these arrays - by multiplying by 2. */ - if ((srow = (l_int32 *)LEPT_CALLOC(hd, sizeof(l_int32))) == NULL) - return ERROR_INT("srow not made", procName, 1); - if ((scol = (l_int32 *)LEPT_CALLOC(wd, sizeof(l_int32))) == NULL) { - LEPT_FREE(srow); - return ERROR_INT("scol not made", procName, 1); - } - ratio = 1. / (2. * red); /* 0.5 for red = 1, 1 for red = 0.5 */ - for (i = 0; i < hd; i++) - srow[i] = (l_int32)(ratio * i); - for (j = 0; j < wd; j++) - scol[j] = (l_int32)(ratio * j); - - /* Get weights for linear interpolation: these are the - * 'distances' of the dest image plane from the two - * src image planes. */ - w1 = 2. * red - 1.; /* w1 --> 1 as red --> 1 */ - w2 = 1. - w1; - - /* For each dest pixel, compute linear interpolation */ - for (i = 0; i < hd; i++) { - row2 = srow[i]; - lines1 = datas1 + 2 * row2 * wpls1; - lines2 = datas2 + row2 * wpls2; - lined = datad + i * wpld; - for (j = 0; j < wd; j++) { - col2 = scol[j]; - val1 = GET_DATA_BYTE(lines1, 2 * col2); - val2 = GET_DATA_BYTE(lines2, col2); - val = (l_int32)(w1 * val1 + w2 * val2); - SET_DATA_BYTE(lined, j, val); - } - } - - LEPT_FREE(srow); - LEPT_FREE(scol); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/seedfill.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/seedfill.c deleted file mode 100644 index b8780844..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/seedfill.c +++ /dev/null @@ -1,3458 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file seedfill.c - *
- *
- *      Binary seedfill (source: Luc Vincent)
- *               PIX         *pixSeedfillBinary()
- *               PIX         *pixSeedfillBinaryRestricted()
- *               static void  seedfillBinaryLow()
- *
- *      Applications of binary seedfill to find and fill holes,
- *      remove c.c. touching the border and fill bg from border:
- *               PIX         *pixHolesByFilling()
- *               PIX         *pixFillClosedBorders()
- *               PIX         *pixExtractBorderConnComps()
- *               PIX         *pixRemoveBorderConnComps()
- *               PIX         *pixFillBgFromBorder()
- *
- *      Hole-filling of components to bounding rectangle
- *               PIX         *pixFillHolesToBoundingRect()
- *
- *      Gray seedfill (source: Luc Vincent:fast-hybrid-grayscale-reconstruction)
- *               l_int32      pixSeedfillGray()
- *               l_int32      pixSeedfillGrayInv()
- *               static void  seedfillGrayLow()
- *               static void  seedfillGrayInvLow()
-
- *
- *      Gray seedfill (source: Luc Vincent: sequential-reconstruction algorithm)
- *               l_int32      pixSeedfillGraySimple()
- *               l_int32      pixSeedfillGrayInvSimple()
- *               static void  seedfillGrayLowSimple()
- *               static void  seedfillGrayInvLowSimple()
- *
- *      Gray seedfill variations
- *               PIX         *pixSeedfillGrayBasin()
- *
- *      Distance function (source: Luc Vincent)
- *               PIX         *pixDistanceFunction()
- *               static void  distanceFunctionLow()
- *
- *      Seed spread (based on distance function)
- *               PIX         *pixSeedspread()
- *               static void  seedspreadLow()
- *
- *      Local extrema:
- *               l_int32      pixLocalExtrema()
- *            static l_int32  pixQualifyLocalMinima()
- *               l_int32      pixSelectedLocalExtrema()
- *               PIX         *pixFindEqualValues()
- *
- *      Selection of minima in mask of connected components
- *               PTA         *pixSelectMinInConnComp()
- *
- *      Removal of seeded connected components from a mask
- *               PIX         *pixRemoveSeededComponents()
- *
- *
- *           ITERATIVE RASTER-ORDER SEEDFILL
- *
- *      The basic method in the Vincent seedfill (aka reconstruction)
- *      algorithm is simple.  We describe here the situation for
- *      binary seedfill.  Pixels are sampled in raster order in
- *      the seed image.  If they are 4-connected to ON pixels
- *      either directly above or to the left, and are not masked
- *      out by the mask image, they are turned on (or remain on).
- *      (Ditto for 8-connected, except you need to check 3 pixels
- *      on the previous line as well as the pixel to the left
- *      on the current line.  This is extra computational work
- *      for relatively little gain, so it is preferable
- *      in most situations to use the 4-connected version.)
- *      The algorithm proceeds from UR to LL of the image, and
- *      then reverses and sweeps up from LL to UR.
- *      These double sweeps are iterated until there is no change.
- *      At this point, the seed has entirely filled the region it
- *      is allowed to, as delimited by the mask image.
- *
- *      The grayscale seedfill is a straightforward generalization
- *      of the binary seedfill, and is described in seedfillLowGray().
- *
- *      For some applications, the filled seed will later be OR'd
- *      with the negative of the mask.   This is used, for example,
- *      when you flood fill into a 4-connected region of OFF pixels
- *      and you want the result after those pixels are turned ON.
- *
- *      Note carefully that the mask we use delineates which pixels
- *      are allowed to be ON as the seed is filled.  We will call this
- *      a "filling mask".  As the seed expands, it is repeatedly
- *      ANDed with the filling mask: s & fm.  The process can equivalently
- *      be formulated using the inverse of the filling mask, which
- *      we will call a "blocking mask": bm = ~fm.   As the seed
- *      expands, the blocking mask is repeatedly used to prevent
- *      the seed from expanding into the blocking mask.  This is done
- *      by set subtracting the blocking mask from the expanded seed:
- *      s - bm.  Set subtraction of the blocking mask is equivalent
- *      to ANDing with the inverse of the blocking mask: s & (~bm).
- *      But from the inverse relation between blocking and filling
- *      masks, this is equal to s & fm, which proves the equivalence.
- *
- *      For efficiency, the pixels can be taken in larger units
- *      for processing, but still in raster order.  It is natural
- *      to take them in 32-bit words.  The outline of the work
- *      to be done for 4-cc (not including special cases for boundary
- *      words, such as the first line or the last word in each line)
- *      is as follows.  Let the filling mask be m.  The
- *      seed is to fill "under" the mask; i.e., limited by an AND
- *      with the mask.  Let the current word be w, the word
- *      in the line above be wa, and the previous word in the
- *      current line be wp.   Let t be a temporary word that
- *      is used in computation.  Note that masking is performed by
- *      w & m.  (If we had instead used a "blocking" mask, we
- *      would perform masking by the set subtraction operation,
- *      w - m, which is defined to be w & ~m.)
- *
- *      The entire operation can be implemented with shifts,
- *      logical operations and tests.  For each word in the seed image
- *      there are two steps.  The first step is to OR the word with
- *      the word above and with the rightmost pixel in wp (call it "x").
- *      Because wp is shifted one pixel to its right, "x" is ORed
- *      to the leftmost pixel of w.  We then clip to the ON pixels in
- *      the mask.  The result is
- *               t  <--  (w | wa | x000... ) & m
- *      We've now finished taking data from above and to the left.
- *      The second step is to allow filling to propagate horizontally
- *      in t, always making sure that it is properly masked at each
- *      step.  So if filling can be done (i.e., t is neither all 0s
- *      nor all 1s), iteratively take:
- *           t  <--  (t | (t >> 1) | (t << 1)) & m
- *      until t stops changing.  Then write t back into w.
- *
- *      Finally, the boundary conditions require we note that in doing
- *      the above steps:
- *          (a) The words in the first row have no wa
- *          (b) The first word in each row has no wp in that row
- *          (c) The last word in each row must be masked so that
- *              pixels don't propagate beyond the right edge of the
- *              actual image.  (This is easily accomplished by
- *              setting the out-of-bound pixels in m to OFF.)
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -struct L_Pixel -{ - l_int32 x; - l_int32 y; -}; -typedef struct L_Pixel L_PIXEL; - -static void seedfillBinaryLow(l_uint32 *datas, l_int32 hs, l_int32 wpls, - l_uint32 *datam, l_int32 hm, l_int32 wplm, - l_int32 connectivity); -static void seedfillGrayLow(l_uint32 *datas, l_int32 w, l_int32 h, - l_int32 wpls, l_uint32 *datam, l_int32 wplm, - l_int32 connectivity); -static void seedfillGrayInvLow(l_uint32 *datas, l_int32 w, l_int32 h, - l_int32 wpls, l_uint32 *datam, l_int32 wplm, - l_int32 connectivity); -static void seedfillGrayLowSimple(l_uint32 *datas, l_int32 w, l_int32 h, - l_int32 wpls, l_uint32 *datam, l_int32 wplm, - l_int32 connectivity); -static void seedfillGrayInvLowSimple(l_uint32 *datas, l_int32 w, l_int32 h, - l_int32 wpls, l_uint32 *datam, - l_int32 wplm, l_int32 connectivity); -static void distanceFunctionLow(l_uint32 *datad, l_int32 w, l_int32 h, - l_int32 d, l_int32 wpld, l_int32 connectivity); -static void seedspreadLow(l_uint32 *datad, l_int32 w, l_int32 h, l_int32 wpld, - l_uint32 *datat, l_int32 wplt, l_int32 connectivity); - - -static l_int32 pixQualifyLocalMinima(PIX *pixs, PIX *pixm, l_int32 maxval); - -#ifndef NO_CONSOLE_IO -#define DEBUG_PRINT_ITERS 0 -#endif /* ~NO_CONSOLE_IO */ - - /* Two-way (UL --> LR, LR --> UL) sweep iterations; typically need only 4 */ -static const l_int32 MaxIters = 40; - - -/*-----------------------------------------------------------------------* - * Vincent's Iterative Binary Seedfill method * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixSeedfillBinary() - * - * \param[in] pixd [optional]; can be null, equal to pixs, - * or different from pixs; 1 bpp - * \param[in] pixs 1 bpp seed - * \param[in] pixm 1 bpp filling mask - * \param[in] connectivity 4 or 8 - * \return pixd always - * - *
- * Notes:
- *      (1) This is for binary seedfill (aka "binary reconstruction").
- *      (2) There are 3 cases:
- *            (a) pixd == null (make a new pixd)
- *            (b) pixd == pixs (in-place)
- *            (c) pixd != pixs
- *      (3) If you know the case, use these patterns for clarity:
- *            (a) pixd = pixSeedfillBinary(NULL, pixs, ...);
- *            (b) pixSeedfillBinary(pixs, pixs, ...);
- *            (c) pixSeedfillBinary(pixd, pixs, ...);
- *      (4) The resulting pixd contains the filled seed.  For some
- *          applications you want to OR it with the inverse of
- *          the filling mask.
- *      (5) The input seed and mask images can be different sizes, but
- *          in typical use the difference, if any, would be only
- *          a few pixels in each direction.  If the sizes differ,
- *          the clipping is handled by the low-level function
- *          seedfillBinaryLow().
- * 
- */ -PIX * -pixSeedfillBinary(PIX *pixd, - PIX *pixs, - PIX *pixm, - l_int32 connectivity) -{ -l_int32 i, boolval; -l_int32 hd, hm, wpld, wplm; -l_uint32 *datad, *datam; -PIX *pixt; - - PROCNAME("pixSeedfillBinary"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, pixd); - if (!pixm || pixGetDepth(pixm) != 1) - return (PIX *)ERROR_PTR("pixm undefined or not 1 bpp", procName, pixd); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not in {4,8}", procName, pixd); - - /* Prepare pixd as a copy of pixs if not identical */ - if ((pixd = pixCopy(pixd, pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - - /* pixt is used to test for completion */ - if ((pixt = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixt not made", procName, pixd); - - hd = pixGetHeight(pixd); - hm = pixGetHeight(pixm); /* included so seedfillBinaryLow() can clip */ - datad = pixGetData(pixd); - datam = pixGetData(pixm); - wpld = pixGetWpl(pixd); - wplm = pixGetWpl(pixm); - - pixSetPadBits(pixm, 0); - - for (i = 0; i < MaxIters; i++) { - pixCopy(pixt, pixd); - seedfillBinaryLow(datad, hd, wpld, datam, hm, wplm, connectivity); - pixEqual(pixd, pixt, &boolval); - if (boolval == 1) { -#if DEBUG_PRINT_ITERS - lept_stderr("Binary seed fill converged: %d iters\n", i + 1); -#endif /* DEBUG_PRINT_ITERS */ - break; - } - } - - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief pixSeedfillBinaryRestricted() - * - * \param[in] pixd [optional]; can be null, equal to pixs, - * or different from pixs; 1 bpp - * \param[in] pixs 1 bpp seed - * \param[in] pixm 1 bpp filling mask - * \param[in] connectivity 4 or 8 - * \param[in] xmax max distance in x direction of fill into mask - * \param[in] ymax max distance in y direction of fill into mask - * \return pixd always - * - *
- * Notes:
- *      (1) See usage for pixSeedfillBinary(), which has unrestricted fill.
- *          In pixSeedfillBinary(), the filling distance is unrestricted
- *          and can be larger than pixs, depending on the topology of
- *          th mask.
- *      (2) There are occasions where it is useful not to permit the
- *          fill to go more than a certain distance into the mask.
- *          %xmax specifies the maximum horizontal distance allowed
- *          in the fill; %ymax does likewise in the vertical direction.
- *      (3) Operationally, the max "distance" allowed for the fill
- *          is a linear distance from the original seed, independent
- *          of the actual mask topology.
- *      (4) Another formulation of this problem, not implemented,
- *          would use the manhattan distance from the seed, as
- *          determined by a breadth-first search starting at the seed
- *          boundaries and working outward where the mask fg allows.
- *          How this might use the constraints of separate xmax and ymax
- *          is not clear.
- * 
- */ -PIX * -pixSeedfillBinaryRestricted(PIX *pixd, - PIX *pixs, - PIX *pixm, - l_int32 connectivity, - l_int32 xmax, - l_int32 ymax) -{ -l_int32 w, h; -PIX *pix1, *pix2; - - PROCNAME("pixSeedfillBinaryRestricted"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, pixd); - if (!pixm || pixGetDepth(pixm) != 1) - return (PIX *)ERROR_PTR("pixm undefined or not 1 bpp", procName, pixd); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not in {4,8}", procName, pixd); - if (xmax == 0 && ymax == 0) /* no filling permitted */ - return pixClone(pixs); - if (xmax < 0 || ymax < 0) { - L_ERROR("xmax and ymax must be non-negative", procName); - return pixClone(pixs); - } - - /* Full fill from the seed into the mask. */ - if ((pix1 = pixSeedfillBinary(NULL, pixs, pixm, connectivity)) == NULL) - return (PIX *)ERROR_PTR("pix1 not made", procName, pixd); - - /* Dilate the seed. This gives the maximal region where changes - * are permitted. Invert to get the region where pixs is - * not allowed to change. */ - pix2 = pixDilateCompBrick(NULL, pixs, 2 * xmax + 1, 2 * ymax + 1); - pixInvert(pix2, pix2); - - /* Blank the region of pix1 specified by the fg of pix2. - * This is not yet the final result, because it may have fg pixels - * that are not accessible from the seed in the restricted distance. - * For example, such pixels may be connected to the original seed, - * but through a path that goes outside the permitted region. */ - pixGetDimensions(pixs, &w, &h, NULL); - pixRasterop(pix1, 0, 0, w, h, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0); - - /* To get the accessible pixels in the restricted region, do - * a second seedfill from the original seed, using pix1 as - * a mask. The result, in pixd, will not have any bad fg - * pixels that were in pix1. */ - pixd = pixSeedfillBinary(pixd, pixs, pix1, connectivity); - - pixDestroy(&pix1); - pixDestroy(&pix2); - return pixd; -} - - -/*! - * \brief seedfillBinaryLow() - * - * Notes: - * (1) This is an in-place fill, where the seed image is - * filled, clipping to the filling mask, in one full - * cycle of UL -> LR and LR -> UL raster scans. - * (2) Assume the mask is a filling mask, not a blocking mask. - * (3) Assume that the RHS pad bits of the mask - * are properly set to 0. - * (4) Clip to the smallest dimensions to avoid invalid reads. - */ -static void -seedfillBinaryLow(l_uint32 *datas, - l_int32 hs, - l_int32 wpls, - l_uint32 *datam, - l_int32 hm, - l_int32 wplm, - l_int32 connectivity) -{ -l_int32 i, j, h, wpl; -l_uint32 word, mask; -l_uint32 wordabove, wordleft, wordbelow, wordright; -l_uint32 wordprev; /* test against this in previous iteration */ -l_uint32 *lines, *linem; - - PROCNAME("seedfillBinaryLow"); - - h = L_MIN(hs, hm); - wpl = L_MIN(wpls, wplm); - - switch (connectivity) - { - case 4: - /* UL --> LR scan */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = 0; j < wpl; j++) { - word = *(lines + j); - mask = *(linem + j); - - /* OR from word above and from word to left; mask */ - if (i > 0) { - wordabove = *(lines - wpls + j); - word |= wordabove; - } - if (j > 0) { - wordleft = *(lines + j - 1); - word |= wordleft << 31; - } - word &= mask; - - /* No need to fill horizontally? */ - if (!word || !(~word)) { - *(lines + j) = word; - continue; - } - - while (1) { - wordprev = word; - word = (word | (word >> 1) | (word << 1)) & mask; - if ((word ^ wordprev) == 0) { - *(lines + j) = word; - break; - } - } - } - } - - /* LR --> UL scan */ - for (i = h - 1; i >= 0; i--) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = wpl - 1; j >= 0; j--) { - word = *(lines + j); - mask = *(linem + j); - - /* OR from word below and from word to right; mask */ - if (i < h - 1) { - wordbelow = *(lines + wpls + j); - word |= wordbelow; - } - if (j < wpl - 1) { - wordright = *(lines + j + 1); - word |= wordright >> 31; - } - word &= mask; - - /* No need to fill horizontally? */ - if (!word || !(~word)) { - *(lines + j) = word; - continue; - } - - while (1) { - wordprev = word; - word = (word | (word >> 1) | (word << 1)) & mask; - if ((word ^ wordprev) == 0) { - *(lines + j) = word; - break; - } - } - } - } - break; - - case 8: - /* UL --> LR scan */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = 0; j < wpl; j++) { - word = *(lines + j); - mask = *(linem + j); - - /* OR from words above and from word to left; mask */ - if (i > 0) { - wordabove = *(lines - wpls + j); - word |= (wordabove | (wordabove << 1) | (wordabove >> 1)); - if (j > 0) - word |= (*(lines - wpls + j - 1)) << 31; - if (j < wpl - 1) - word |= (*(lines - wpls + j + 1)) >> 31; - } - if (j > 0) { - wordleft = *(lines + j - 1); - word |= wordleft << 31; - } - word &= mask; - - /* No need to fill horizontally? */ - if (!word || !(~word)) { - *(lines + j) = word; - continue; - } - - while (1) { - wordprev = word; - word = (word | (word >> 1) | (word << 1)) & mask; - if ((word ^ wordprev) == 0) { - *(lines + j) = word; - break; - } - } - } - } - - /* LR --> UL scan */ - for (i = h - 1; i >= 0; i--) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = wpl - 1; j >= 0; j--) { - word = *(lines + j); - mask = *(linem + j); - - /* OR from words below and from word to right; mask */ - if (i < h - 1) { - wordbelow = *(lines + wpls + j); - word |= (wordbelow | (wordbelow << 1) | (wordbelow >> 1)); - if (j > 0) - word |= (*(lines + wpls + j - 1)) << 31; - if (j < wpl - 1) - word |= (*(lines + wpls + j + 1)) >> 31; - } - if (j < wpl - 1) { - wordright = *(lines + j + 1); - word |= wordright >> 31; - } - word &= mask; - - /* No need to fill horizontally? */ - if (!word || !(~word)) { - *(lines + j) = word; - continue; - } - - while (1) { - wordprev = word; - word = (word | (word >> 1) | (word << 1)) & mask; - if ((word ^ wordprev) == 0) { - *(lines + j) = word; - break; - } - } - } - } - break; - - default: - L_ERROR("connectivity must be 4 or 8\n", procName); - return; - } -} - - -/*! - * \brief pixHolesByFilling() - * - * \param[in] pixs 1 bpp - * \param[in] connectivity 4 or 8 - * \return pixd inverted image of all holes, or NULL on error - * - * Action: - * 1 Start with 1-pixel black border on otherwise white pixd - * 2 Use the inverted pixs as the filling mask to fill in - * all the pixels from the border to the pixs foreground - * 3 OR the result with pixs to have an image with all - * ON pixels except for the holes. - * 4 Invert the result to get the holes as foreground - * - *
- * Notes:
- *     (1) To get 4-c.c. holes of the 8-c.c. as foreground, use
- *         4-connected filling; to get 8-c.c. holes of the 4-c.c.
- *         as foreground, use 8-connected filling.
- * 
- */ -PIX * -pixHolesByFilling(PIX *pixs, - l_int32 connectivity) -{ -PIX *pixsi, *pixd; - - PROCNAME("pixHolesByFilling"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - if ((pixsi = pixInvert(NULL, pixs)) == NULL) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("pixsi not made", procName, NULL); - } - - pixSetOrClearBorder(pixd, 1, 1, 1, 1, PIX_SET); - pixSeedfillBinary(pixd, pixd, pixsi, connectivity); - pixOr(pixd, pixd, pixs); - pixInvert(pixd, pixd); - pixDestroy(&pixsi); - return pixd; -} - - -/*! - * \brief pixFillClosedBorders() - * - * \param[in] pixs 1 bpp - * \param[in] connectivity filling connectivity 4 or 8 - * \return pixd all topologically outer closed borders are filled - * as connected comonents, or NULL on error - * - *
- * Notes:
- *      (1) Start with 1-pixel black border on otherwise white pixd
- *      (2) Subtract input pixs to remove border pixels that were
- *          also on the closed border
- *      (3) Use the inverted pixs as the filling mask to fill in
- *          all the pixels from the outer border to the closed border
- *          on pixs
- *      (4) Invert the result to get the filled component, including
- *          the input border
- *      (5) If the borders are 4-c.c., use 8-c.c. filling, and v.v.
- *      (6) Closed borders within c.c. that represent holes, etc., are filled.
- * 
- */ -PIX * -pixFillClosedBorders(PIX *pixs, - l_int32 connectivity) -{ -PIX *pixsi, *pixd; - - PROCNAME("pixFillClosedBorders"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixSetOrClearBorder(pixd, 1, 1, 1, 1, PIX_SET); - pixSubtract(pixd, pixd, pixs); - if ((pixsi = pixInvert(NULL, pixs)) == NULL) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("pixsi not made", procName, NULL); - } - - pixSeedfillBinary(pixd, pixd, pixsi, connectivity); - pixInvert(pixd, pixd); - pixDestroy(&pixsi); - - return pixd; -} - - -/*! - * \brief pixExtractBorderConnComps() - * - * \param[in] pixs 1 bpp - * \param[in] connectivity filling connectivity 4 or 8 - * \return pixd all pixels in the src that are in connected - * components touching the border, or NULL on error - */ -PIX * -pixExtractBorderConnComps(PIX *pixs, - l_int32 connectivity) -{ -PIX *pixd; - - PROCNAME("pixExtractBorderConnComps"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - - /* Start with 1 pixel wide black border as seed in pixd */ - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - pixSetOrClearBorder(pixd, 1, 1, 1, 1, PIX_SET); - - /* Fill in pixd from the seed, using pixs as the filling mask. - * This fills all components from pixs that are touching the border. */ - pixSeedfillBinary(pixd, pixd, pixs, connectivity); - - return pixd; -} - - -/*! - * \brief pixRemoveBorderConnComps() - * - * \param[in] pixs 1 bpp - * \param[in] connectivity filling connectivity 4 or 8 - * \return pixd all pixels in the src that are not touching the - * border or NULL on error - * - *
- * Notes:
- *      (1) This removes all fg components touching the border.
- * 
- */ -PIX * -pixRemoveBorderConnComps(PIX *pixs, - l_int32 connectivity) -{ -PIX *pixd; - - PROCNAME("pixRemoveBorderConnComps"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - - /* Fill from a 1 pixel wide seed at the border into all components - * in pixs (the filling mask) that are touching the border */ - pixd = pixExtractBorderConnComps(pixs, connectivity); - - /* Save in pixd only those components in pixs not touching the border */ - pixXor(pixd, pixd, pixs); - return pixd; -} - - -/*! - * \brief pixFillBgFromBorder() - * - * \param[in] pixs 1 bpp - * \param[in] connectivity filling connectivity 4 or 8 - * \return pixd with the background c.c. touching the border - * filled to foreground, or NULL on error - * - *
- * Notes:
- *      (1) This fills all bg components touching the border to fg.
- *          It is the photometric inverse of pixRemoveBorderConnComps().
- *      (2) Invert the result to get the "holes" left after this fill.
- *          This can be done multiple times, extracting holes within
- *          holes after each pair of fillings.  Specifically, this code
- *          peels away n successive embeddings of components:
- * \code
- *              pix1 = 
- *              for (i = 0; i < 2 * n; i++) {
- *                   pix2 = pixFillBgFromBorder(pix1, 8);
- *                   pixInvert(pix2, pix2);
- *                   pixDestroy(&pix1);
- *                   pix1 = pix2;
- *              }
- * \endcode
- * 
- */ -PIX * -pixFillBgFromBorder(PIX *pixs, - l_int32 connectivity) -{ -PIX *pixd; - - PROCNAME("pixFillBgFromBorder"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - - /* Invert to turn bg touching the border to a fg component. - * Extract this by filling from a 1 pixel wide seed at the border. */ - pixInvert(pixs, pixs); - pixd = pixExtractBorderConnComps(pixs, connectivity); - pixInvert(pixs, pixs); /* restore pixs */ - - /* Bit-or the filled bg component with pixs */ - pixOr(pixd, pixd, pixs); - return pixd; -} - - -/*-----------------------------------------------------------------------* - * Hole-filling of components to bounding rectangle * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixFillHolesToBoundingRect() - * - * \param[in] pixs 1 bpp - * \param[in] minsize min number of pixels in the hole - * \param[in] maxhfract max hole area as fraction of fg pixels in the cc - * \param[in] minfgfract min fg area as fraction of bounding rectangle - * \return pixd with some holes possibly filled and some c.c. possibly - * expanded to their bounding rects, or NULL on error - * - *
- * Notes:
- *      (1) This does not fill holes that are smaller in area than 'minsize'.
- *      (2) This does not fill holes with an area larger than
- *          'maxhfract' times the fg area of the c.c.
- *      (3) This does not expand the fg of the c.c. to bounding rect if
- *          the fg area is less than 'minfgfract' times the area of the
- *          bounding rect.
- *      (4) The decisions are made as follows:
- *           ~ Decide if we are filling the holes; if so, when using
- *             the fg area, include the filled holes.
- *           ~ Decide based on the fg area if we are filling to a bounding rect.
- *             If so, do it.
- *             If not, fill the holes if the condition is satisfied.
- *      (5) The choice of minsize depends on the resolution.
- *      (6) For solidifying image mask regions on printed materials,
- *          which tend to be rectangular, values for maxhfract
- *          and minfgfract around 0.5 are reasonable.
- * 
- */ -PIX * -pixFillHolesToBoundingRect(PIX *pixs, - l_int32 minsize, - l_float32 maxhfract, - l_float32 minfgfract) -{ -l_int32 i, x, y, w, h, n, nfg, nh, ntot, area; -l_int32 *tab; -l_float32 hfract; /* measured hole fraction */ -l_float32 fgfract; /* measured fg fraction */ -BOXA *boxa; -PIX *pixd, *pixfg, *pixh; -PIXA *pixa; - - PROCNAME("pixFillHolesToBoundingRect"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - - pixd = pixCopy(NULL, pixs); - boxa = pixConnComp(pixd, &pixa, 8); - n = boxaGetCount(boxa); - tab = makePixelSumTab8(); - for (i = 0; i < n; i++) { - boxaGetBoxGeometry(boxa, i, &x, &y, &w, &h); - area = w * h; - if (area < minsize) - continue; - pixfg = pixaGetPix(pixa, i, L_COPY); - pixh = pixHolesByFilling(pixfg, 4); /* holes only */ - pixCountPixels(pixfg, &nfg, tab); - pixCountPixels(pixh, &nh, tab); - hfract = (l_float32)nh / (l_float32)nfg; - ntot = nfg; - if (hfract <= maxhfract) /* we will fill the holes (at least) */ - ntot = nfg + nh; - fgfract = (l_float32)ntot / (l_float32)area; - if (fgfract >= minfgfract) { /* fill to bounding rect */ - pixSetAll(pixfg); - pixRasterop(pixd, x, y, w, h, PIX_SRC, pixfg, 0, 0); - } else if (hfract <= maxhfract) { /* fill just the holes */ - pixRasterop(pixd, x, y, w, h, PIX_DST | PIX_SRC , pixh, 0, 0); - } - pixDestroy(&pixfg); - pixDestroy(&pixh); - } - boxaDestroy(&boxa); - pixaDestroy(&pixa); - LEPT_FREE(tab); - - return pixd; -} - - -/*-----------------------------------------------------------------------* - * Vincent's hybrid Grayscale Seedfill method * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixSeedfillGray() - * - * \param[in] pixs 8 bpp seed; filled in place - * \param[in] pixm 8 bpp filling mask - * \param[in] connectivity 4 or 8 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is an in-place filling operation on the seed, pixs,
- *          where the clipping mask is always above or at the level
- *          of the seed as it is filled.
- *      (2) For details of the operation, see the description in
- *          seedfillGrayLow() and the code there.
- *      (3) As an example of use, see the description in pixHDome().
- *          There, the seed is an image where each pixel is a fixed
- *          amount smaller than the corresponding mask pixel.
- *      (4) Reference paper :
- *            L. Vincent, Morphological grayscale reconstruction in image
- *            analysis: applications and efficient algorithms, IEEE Transactions
- *            on  Image Processing, vol. 2, no. 2, pp. 176-201, 1993.
- * 
- */ -l_ok -pixSeedfillGray(PIX *pixs, - PIX *pixm, - l_int32 connectivity) -{ -l_int32 h, w, wpls, wplm; -l_uint32 *datas, *datam; - - PROCNAME("pixSeedfillGray"); - - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (!pixm || pixGetDepth(pixm) != 8) - return ERROR_INT("pixm not defined or not 8 bpp", procName, 1); - if (connectivity != 4 && connectivity != 8) - return ERROR_INT("connectivity not in {4,8}", procName, 1); - - /* Make sure the sizes of seed and mask images are the same */ - if (pixSizesEqual(pixs, pixm) == 0) - return ERROR_INT("pixs and pixm sizes differ", procName, 1); - - datas = pixGetData(pixs); - datam = pixGetData(pixm); - wpls = pixGetWpl(pixs); - wplm = pixGetWpl(pixm); - pixGetDimensions(pixs, &w, &h, NULL); - seedfillGrayLow(datas, w, h, wpls, datam, wplm, connectivity); - - return 0; -} - - -/*! - * \brief pixSeedfillGrayInv() - * - * \param[in] pixs 8 bpp seed; filled in place - * \param[in] pixm 8 bpp filling mask - * \param[in] connectivity 4 or 8 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is an in-place filling operation on the seed, pixs,
- *          where the clipping mask is always below or at the level
- *          of the seed as it is filled.  Think of filling up a basin
- *          to a particular level, given by the maximum seed value
- *          in the basin.  Outside the filled region, the mask
- *          is above the filling level.
- *      (2) Contrast this with pixSeedfillGray(), where the clipping mask
- *          is always above or at the level of the fill.  An example
- *          of its use is the hdome fill, where the seed is an image
- *          where each pixel is a fixed amount smaller than the
- *          corresponding mask pixel.
- *      (3) The basin fill, pixSeedfillGrayBasin(), is a special case
- *          where the seed pixel values are generated from the mask,
- *          and where the implementation uses pixSeedfillGray() by
- *          inverting both the seed and mask.
- * 
- */ -l_ok -pixSeedfillGrayInv(PIX *pixs, - PIX *pixm, - l_int32 connectivity) -{ -l_int32 h, w, wpls, wplm; -l_uint32 *datas, *datam; - - PROCNAME("pixSeedfillGrayInv"); - - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (!pixm || pixGetDepth(pixm) != 8) - return ERROR_INT("pixm not defined or not 8 bpp", procName, 1); - if (connectivity != 4 && connectivity != 8) - return ERROR_INT("connectivity not in {4,8}", procName, 1); - - /* Make sure the sizes of seed and mask images are the same */ - if (pixSizesEqual(pixs, pixm) == 0) - return ERROR_INT("pixs and pixm sizes differ", procName, 1); - - datas = pixGetData(pixs); - datam = pixGetData(pixm); - wpls = pixGetWpl(pixs); - wplm = pixGetWpl(pixm); - pixGetDimensions(pixs, &w, &h, NULL); - seedfillGrayInvLow(datas, w, h, wpls, datam, wplm, connectivity); - - return 0; -} - - -/*! - * \brief seedfillGrayLow() - * - * Notes: - * (1) The pixels are numbered as follows: - * 1 2 3 - * 4 x 5 - * 6 7 8 - * This low-level filling operation consists of two scans, - * raster and anti-raster, covering the entire seed image. - * This is followed by a breadth-first propagation operation to - * complete the fill. - * During the anti-raster scan, every pixel p whose current value - * could still be propagated after the anti-raster scan is put into - * the FIFO queue. - * The propagation step is a breadth-first fill to completion. - * Unlike the simple grayscale seedfill pixSeedfillGraySimple(), - * where at least two full raster/anti-raster iterations are required - * for completion and verification, the hybrid method uses only a - * single raster/anti-raster set of scans. - * (2) The filling action can be visualized from the following example. - * Suppose the mask, which clips the fill, is a sombrero-shaped - * surface, where the highest point is 200 and the low pixels - * around the rim are 30. Beyond the rim, the mask goes up a bit. - * Suppose the seed, which is filled, consists of a single point - * of height 150, located below the max of the mask, with - * the rest 0. Then in the raster scan, nothing happens until - * the high seed point is encountered, and then this value is - * propagated right and down, until it hits the side of the - * sombrero. The seed can never exceed the mask, so it fills - * to the rim, going lower along the mask surface. When it - * passes the rim, the seed continues to fill at the rim - * height to the edge of the seed image. Then on the - * anti-raster scan, the seed fills flat inside the - * sombrero to the upper and left, and then out from the - * rim as before. The final result has a seed that is - * flat outside the rim, and inside it fills the sombrero - * but only up to 150. If the rim height varies, the - * filled seed outside the rim will be at the highest - * point on the rim, which is a saddle point on the rim. - * (3) Reference paper : - * L. Vincent, Morphological grayscale reconstruction in image - * analysis: applications and efficient algorithms, IEEE Transactions - * on Image Processing, vol. 2, no. 2, pp. 176-201, 1993. - */ -static void -seedfillGrayLow(l_uint32 *datas, - l_int32 w, - l_int32 h, - l_int32 wpls, - l_uint32 *datam, - l_int32 wplm, - l_int32 connectivity) -{ -l_uint8 val1, val2, val3, val4, val5, val6, val7, val8; -l_uint8 val, maxval, maskval, boolval; -l_int32 i, j, imax, jmax, queue_size; -l_uint32 *lines, *linem; -L_PIXEL *pixel; -L_QUEUE *lq_pixel; - - PROCNAME("seedfillGrayLow"); - - if (connectivity != 4 && connectivity != 8) { - L_ERROR("connectivity must be 4 or 8\n", procName); - return; - } - - imax = h - 1; - jmax = w - 1; - - /* In the worst case, most of the pixels could be pushed - * onto the FIFO queue during anti-raster scan. However this - * will rarely happen, and we initialize the queue ptr size to - * the image perimeter. */ - lq_pixel = lqueueCreate(2 * (w + h)); - - switch (connectivity) - { - case 4: - /* UL --> LR scan (Raster Order) - * If I : mask image - * J : marker image - * Let p be the currect pixel; - * J(p) <- (max{J(p) union J(p) neighbors in raster order}) - * intersection I(p) */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = 0; j < w; j++) { - if ((maskval = GET_DATA_BYTE(linem, j)) > 0) { - maxval = 0; - if (i > 0) - maxval = GET_DATA_BYTE(lines - wpls, j); - if (j > 0) { - val4 = GET_DATA_BYTE(lines, j - 1); - maxval = L_MAX(maxval, val4); - } - val = GET_DATA_BYTE(lines, j); - maxval = L_MAX(maxval, val); - val = L_MIN(maxval, maskval); - SET_DATA_BYTE(lines, j, val); - } - } - } - - /* LR --> UL scan (anti-raster order) - * Let p be the currect pixel; - * J(p) <- (max{J(p) union J(p) neighbors in anti-raster order}) - * intersection I(p) */ - for (i = imax; i >= 0; i--) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = jmax; j >= 0; j--) { - boolval = FALSE; - if ((maskval = GET_DATA_BYTE(linem, j)) > 0) { - maxval = 0; - if (i < imax) - maxval = GET_DATA_BYTE(lines + wpls, j); - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - maxval = L_MAX(maxval, val5); - } - val = GET_DATA_BYTE(lines, j); - maxval = L_MAX(maxval, val); - val = L_MIN(maxval, maskval); - SET_DATA_BYTE(lines, j, val); - - /* - * If there exists a point (q) which belongs to J(p) - * neighbors in anti-raster order such that J(q) < J(p) - * and J(q) < I(q) then - * fifo_add(p) */ - if (i < imax) { - val7 = GET_DATA_BYTE(lines + wpls, j); - if ((val7 < val) && - (val7 < GET_DATA_BYTE(linem + wplm, j))) { - boolval = TRUE; - } - } - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - if (!boolval && (val5 < val) && - (val5 < GET_DATA_BYTE(linem, j + 1))) { - boolval = TRUE; - } - } - if (boolval) { - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i; - pixel->y = j; - lqueueAdd(lq_pixel, pixel); - } - } - } - } - - /* Propagation step: - * while fifo_empty = false - * p <- fifo_first() - * for every pixel (q) belong to neighbors of (p) - * if J(q) < J(p) and I(q) != J(q) - * J(q) <- min(J(p), I(q)); - * fifo_add(q); - * end - * end - * end */ - queue_size = lqueueGetCount(lq_pixel); - while (queue_size) { - pixel = (L_PIXEL *)lqueueRemove(lq_pixel); - i = pixel->x; - j = pixel->y; - LEPT_FREE(pixel); - lines = datas + i * wpls; - linem = datam + i * wplm; - - if ((val = GET_DATA_BYTE(lines, j)) > 0) { - if (i > 0) { - val2 = GET_DATA_BYTE(lines - wpls, j); - maskval = GET_DATA_BYTE(linem - wplm, j); - if (val > val2 && val2 != maskval) { - SET_DATA_BYTE(lines - wpls, j, L_MIN(val, maskval)); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i - 1; - pixel->y = j; - lqueueAdd(lq_pixel, pixel); - } - - } - if (j > 0) { - val4 = GET_DATA_BYTE(lines, j - 1); - maskval = GET_DATA_BYTE(linem, j - 1); - if (val > val4 && val4 != maskval) { - SET_DATA_BYTE(lines, j - 1, L_MIN(val, maskval)); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i; - pixel->y = j - 1; - lqueueAdd(lq_pixel, pixel); - } - } - if (i < imax) { - val7 = GET_DATA_BYTE(lines + wpls, j); - maskval = GET_DATA_BYTE(linem + wplm, j); - if (val > val7 && val7 != maskval) { - SET_DATA_BYTE(lines + wpls, j, L_MIN(val, maskval)); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i + 1; - pixel->y = j; - lqueueAdd(lq_pixel, pixel); - } - } - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - maskval = GET_DATA_BYTE(linem, j + 1); - if (val > val5 && val5 != maskval) { - SET_DATA_BYTE(lines, j + 1, L_MIN(val, maskval)); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i; - pixel->y = j + 1; - lqueueAdd(lq_pixel, pixel); - } - } - } - - queue_size = lqueueGetCount(lq_pixel); - } - - break; - - case 8: - /* UL --> LR scan (Raster Order) - * If I : mask image - * J : marker image - * Let p be the currect pixel; - * J(p) <- (max{J(p) union J(p) neighbors in raster order}) - * intersection I(p) */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = 0; j < w; j++) { - if ((maskval = GET_DATA_BYTE(linem, j)) > 0) { - maxval = 0; - if (i > 0) { - if (j > 0) - maxval = GET_DATA_BYTE(lines - wpls, j - 1); - if (j < jmax) { - val3 = GET_DATA_BYTE(lines - wpls, j + 1); - maxval = L_MAX(maxval, val3); - } - val2 = GET_DATA_BYTE(lines - wpls, j); - maxval = L_MAX(maxval, val2); - } - if (j > 0) { - val4 = GET_DATA_BYTE(lines, j - 1); - maxval = L_MAX(maxval, val4); - } - val = GET_DATA_BYTE(lines, j); - maxval = L_MAX(maxval, val); - val = L_MIN(maxval, maskval); - SET_DATA_BYTE(lines, j, val); - } - } - } - - /* LR --> UL scan (anti-raster order) - * Let p be the currect pixel; - * J(p) <- (max{J(p) union J(p) neighbors in anti-raster order}) - * intersection I(p) */ - for (i = imax; i >= 0; i--) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = jmax; j >= 0; j--) { - boolval = FALSE; - if ((maskval = GET_DATA_BYTE(linem, j)) > 0) { - maxval = 0; - if (i < imax) { - if (j > 0) { - maxval = GET_DATA_BYTE(lines + wpls, j - 1); - } - if (j < jmax) { - val8 = GET_DATA_BYTE(lines + wpls, j + 1); - maxval = L_MAX(maxval, val8); - } - val7 = GET_DATA_BYTE(lines + wpls, j); - maxval = L_MAX(maxval, val7); - } - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - maxval = L_MAX(maxval, val5); - } - val = GET_DATA_BYTE(lines, j); - maxval = L_MAX(maxval, val); - val = L_MIN(maxval, maskval); - SET_DATA_BYTE(lines, j, val); - - /* If there exists a point (q) which belongs to J(p) - * neighbors in anti-raster order such that J(q) < J(p) - * and J(q) < I(q) then - * fifo_add(p) */ - if (i < imax) { - if (j > 0) { - val6 = GET_DATA_BYTE(lines + wpls, j - 1); - if ((val6 < val) && - (val6 < GET_DATA_BYTE(linem + wplm, j - 1))) { - boolval = TRUE; - } - } - if (j < jmax) { - val8 = GET_DATA_BYTE(lines + wpls, j + 1); - if (!boolval && (val8 < val) && - (val8 < GET_DATA_BYTE(linem + wplm, j + 1))) { - boolval = TRUE; - } - } - val7 = GET_DATA_BYTE(lines + wpls, j); - if (!boolval && (val7 < val) && - (val7 < GET_DATA_BYTE(linem + wplm, j))) { - boolval = TRUE; - } - } - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - if (!boolval && (val5 < val) && - (val5 < GET_DATA_BYTE(linem, j + 1))) { - boolval = TRUE; - } - } - if (boolval) { - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i; - pixel->y = j; - lqueueAdd(lq_pixel, pixel); - } - } - } - } - - /* Propagation step: - * while fifo_empty = false - * p <- fifo_first() - * for every pixel (q) belong to neighbors of (p) - * if J(q) < J(p) and I(q) != J(q) - * J(q) <- min(J(p), I(q)); - * fifo_add(q); - * end - * end - * end */ - queue_size = lqueueGetCount(lq_pixel); - while (queue_size) { - pixel = (L_PIXEL *)lqueueRemove(lq_pixel); - i = pixel->x; - j = pixel->y; - LEPT_FREE(pixel); - lines = datas + i * wpls; - linem = datam + i * wplm; - - if ((val = GET_DATA_BYTE(lines, j)) > 0) { - if (i > 0) { - if (j > 0) { - val1 = GET_DATA_BYTE(lines - wpls, j - 1); - maskval = GET_DATA_BYTE(linem - wplm, j - 1); - if (val > val1 && val1 != maskval) { - SET_DATA_BYTE(lines - wpls, j - 1, - L_MIN(val, maskval)); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i - 1; - pixel->y = j - 1; - lqueueAdd(lq_pixel, pixel); - } - } - if (j < jmax) { - val3 = GET_DATA_BYTE(lines - wpls, j + 1); - maskval = GET_DATA_BYTE(linem - wplm, j + 1); - if (val > val3 && val3 != maskval) { - SET_DATA_BYTE(lines - wpls, j + 1, - L_MIN(val, maskval)); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i - 1; - pixel->y = j + 1; - lqueueAdd(lq_pixel, pixel); - } - } - val2 = GET_DATA_BYTE(lines - wpls, j); - maskval = GET_DATA_BYTE(linem - wplm, j); - if (val > val2 && val2 != maskval) { - SET_DATA_BYTE(lines - wpls, j, L_MIN(val, maskval)); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i - 1; - pixel->y = j; - lqueueAdd(lq_pixel, pixel); - } - - } - if (j > 0) { - val4 = GET_DATA_BYTE(lines, j - 1); - maskval = GET_DATA_BYTE(linem, j - 1); - if (val > val4 && val4 != maskval) { - SET_DATA_BYTE(lines, j - 1, L_MIN(val, maskval)); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i; - pixel->y = j - 1; - lqueueAdd(lq_pixel, pixel); - } - } - if (i < imax) { - if (j > 0) { - val6 = GET_DATA_BYTE(lines + wpls, j - 1); - maskval = GET_DATA_BYTE(linem + wplm, j - 1); - if (val > val6 && val6 != maskval) { - SET_DATA_BYTE(lines + wpls, j - 1, - L_MIN(val, maskval)); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i + 1; - pixel->y = j - 1; - lqueueAdd(lq_pixel, pixel); - } - } - if (j < jmax) { - val8 = GET_DATA_BYTE(lines + wpls, j + 1); - maskval = GET_DATA_BYTE(linem + wplm, j + 1); - if (val > val8 && val8 != maskval) { - SET_DATA_BYTE(lines + wpls, j + 1, - L_MIN(val, maskval)); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i + 1; - pixel->y = j + 1; - lqueueAdd(lq_pixel, pixel); - } - } - val7 = GET_DATA_BYTE(lines + wpls, j); - maskval = GET_DATA_BYTE(linem + wplm, j); - if (val > val7 && val7 != maskval) { - SET_DATA_BYTE(lines + wpls, j, L_MIN(val, maskval)); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i + 1; - pixel->y = j; - lqueueAdd(lq_pixel, pixel); - } - } - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - maskval = GET_DATA_BYTE(linem, j + 1); - if (val > val5 && val5 != maskval) { - SET_DATA_BYTE(lines, j + 1, L_MIN(val, maskval)); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i; - pixel->y = j + 1; - lqueueAdd(lq_pixel, pixel); - } - } - } - - queue_size = lqueueGetCount(lq_pixel); - } - break; - - default: - L_ERROR("shouldn't get here!\n", procName); - break; - } - - lqueueDestroy(&lq_pixel, TRUE); -} - - -/*! - * \brief seedfillGrayInvLow() - * - * Notes: - * (1) The pixels are numbered as follows: - * 1 2 3 - * 4 x 5 - * 6 7 8 - * This low-level filling operation consists of two scans, - * raster and anti-raster, covering the entire seed image. - * During the anti-raster scan, every pixel p such that its - * current value could still be propagated during the next - * raster scanning is put into the FIFO-queue. - * Next step is the propagation step where where we update - * and propagate the values using FIFO structure created in - * anti-raster scan. - * (2) The "Inv" signifies the fact that in this case, filling - * of the seed only takes place when the seed value is - * greater than the mask value. The mask will act to stop - * the fill when it is higher than the seed level. (This is - * in contrast to conventional grayscale filling where the - * seed always fills below the mask.) - * (3) An example of use is a basin, described by the mask (pixm), - * where within the basin, the seed pix (pixs) gets filled to the - * height of the highest seed pixel that is above its - * corresponding max pixel. Filling occurs while the - * propagating seed pixels in pixs are larger than the - * corresponding mask values in pixm. - * (4) Reference paper : - * L. Vincent, Morphological grayscale reconstruction in image - * analysis: applications and efficient algorithms, IEEE Transactions - * on Image Processing, vol. 2, no. 2, pp. 176-201, 1993. - */ -static void -seedfillGrayInvLow(l_uint32 *datas, - l_int32 w, - l_int32 h, - l_int32 wpls, - l_uint32 *datam, - l_int32 wplm, - l_int32 connectivity) -{ -l_uint8 val1, val2, val3, val4, val5, val6, val7, val8; -l_uint8 val, maxval, maskval, boolval; -l_int32 i, j, imax, jmax, queue_size; -l_uint32 *lines, *linem; -L_PIXEL *pixel; -L_QUEUE *lq_pixel; - - PROCNAME("seedfillGrayInvLow"); - - if (connectivity != 4 && connectivity != 8) { - L_ERROR("connectivity must be 4 or 8\n", procName); - return; - } - - imax = h - 1; - jmax = w - 1; - - /* In the worst case, most of the pixels could be pushed - * onto the FIFO queue during anti-raster scan. However this - * will rarely happen, and we initialize the queue ptr size to - * the image perimeter. */ - lq_pixel = lqueueCreate(2 * (w + h)); - - switch (connectivity) - { - case 4: - /* UL --> LR scan (Raster Order) - * If I : mask image - * J : marker image - * Let p be the currect pixel; - * tmp <- max{J(p) union J(p) neighbors in raster order} - * if (tmp > I(p)) - * J(p) <- tmp - * end */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = 0; j < w; j++) { - if ((maskval = GET_DATA_BYTE(linem, j)) < 255) { - maxval = GET_DATA_BYTE(lines, j); - if (i > 0) { - val2 = GET_DATA_BYTE(lines - wpls, j); - maxval = L_MAX(maxval, val2); - } - if (j > 0) { - val4 = GET_DATA_BYTE(lines, j - 1); - maxval = L_MAX(maxval, val4); - } - if (maxval > maskval) - SET_DATA_BYTE(lines, j, maxval); - } - } - } - - /* LR --> UL scan (anti-raster order) - * If I : mask image - * J : marker image - * Let p be the currect pixel; - * tmp <- max{J(p) union J(p) neighbors in anti-raster order} - * if (tmp > I(p)) - * J(p) <- tmp - * end */ - for (i = imax; i >= 0; i--) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = jmax; j >= 0; j--) { - boolval = FALSE; - if ((maskval = GET_DATA_BYTE(linem, j)) < 255) { - val = maxval = GET_DATA_BYTE(lines, j); - if (i < imax) { - val7 = GET_DATA_BYTE(lines + wpls, j); - maxval = L_MAX(maxval, val7); - } - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - maxval = L_MAX(maxval, val5); - } - if (maxval > maskval) - SET_DATA_BYTE(lines, j, maxval); - val = GET_DATA_BYTE(lines, j); - - /* - * If there exists a point (q) which belongs to J(p) - * neighbors in anti-raster order such that J(q) < J(p) - * and J(p) > I(q) then - * fifo_add(p) */ - if (i < imax) { - val7 = GET_DATA_BYTE(lines + wpls, j); - if ((val7 < val) && - (val > GET_DATA_BYTE(linem + wplm, j))) { - boolval = TRUE; - } - } - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - if (!boolval && (val5 < val) && - (val > GET_DATA_BYTE(linem, j + 1))) { - boolval = TRUE; - } - } - if (boolval) { - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i; - pixel->y = j; - lqueueAdd(lq_pixel, pixel); - } - } - } - } - - /* Propagation step: - * while fifo_empty = false - * p <- fifo_first() - * for every pixel (q) belong to neighbors of (p) - * if J(q) < J(p) and J(p) > I(q) - * J(q) <- min(J(p), I(q)); - * fifo_add(q); - * end - * end - * end */ - queue_size = lqueueGetCount(lq_pixel); - while (queue_size) { - pixel = (L_PIXEL *)lqueueRemove(lq_pixel); - i = pixel->x; - j = pixel->y; - LEPT_FREE(pixel); - lines = datas + i * wpls; - linem = datam + i * wplm; - - if ((val = GET_DATA_BYTE(lines, j)) > 0) { - if (i > 0) { - val2 = GET_DATA_BYTE(lines - wpls, j); - maskval = GET_DATA_BYTE(linem - wplm, j); - if (val > val2 && val > maskval) { - SET_DATA_BYTE(lines - wpls, j, val); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i - 1; - pixel->y = j; - lqueueAdd(lq_pixel, pixel); - } - - } - if (j > 0) { - val4 = GET_DATA_BYTE(lines, j - 1); - maskval = GET_DATA_BYTE(linem, j - 1); - if (val > val4 && val > maskval) { - SET_DATA_BYTE(lines, j - 1, val); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i; - pixel->y = j - 1; - lqueueAdd(lq_pixel, pixel); - } - } - if (i < imax) { - val7 = GET_DATA_BYTE(lines + wpls, j); - maskval = GET_DATA_BYTE(linem + wplm, j); - if (val > val7 && val > maskval) { - SET_DATA_BYTE(lines + wpls, j, val); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i + 1; - pixel->y = j; - lqueueAdd(lq_pixel, pixel); - } - } - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - maskval = GET_DATA_BYTE(linem, j + 1); - if (val > val5 && val > maskval) { - SET_DATA_BYTE(lines, j + 1, val); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i; - pixel->y = j + 1; - lqueueAdd(lq_pixel, pixel); - } - } - } - - queue_size = lqueueGetCount(lq_pixel); - } - - break; - - case 8: - /* UL --> LR scan (Raster Order) - * If I : mask image - * J : marker image - * Let p be the currect pixel; - * tmp <- max{J(p) union J(p) neighbors in raster order} - * if (tmp > I(p)) - * J(p) <- tmp - * end */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = 0; j < w; j++) { - if ((maskval = GET_DATA_BYTE(linem, j)) < 255) { - maxval = GET_DATA_BYTE(lines, j); - if (i > 0) { - if (j > 0) { - val1 = GET_DATA_BYTE(lines - wpls, j - 1); - maxval = L_MAX(maxval, val1); - } - if (j < jmax) { - val3 = GET_DATA_BYTE(lines - wpls, j + 1); - maxval = L_MAX(maxval, val3); - } - val2 = GET_DATA_BYTE(lines - wpls, j); - maxval = L_MAX(maxval, val2); - } - if (j > 0) { - val4 = GET_DATA_BYTE(lines, j - 1); - maxval = L_MAX(maxval, val4); - } - if (maxval > maskval) - SET_DATA_BYTE(lines, j, maxval); - } - } - } - - /* LR --> UL scan (anti-raster order) - * If I : mask image - * J : marker image - * Let p be the currect pixel; - * tmp <- max{J(p) union J(p) neighbors in anti-raster order} - * if (tmp > I(p)) - * J(p) <- tmp - * end */ - for (i = imax; i >= 0; i--) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = jmax; j >= 0; j--) { - boolval = FALSE; - if ((maskval = GET_DATA_BYTE(linem, j)) < 255) { - maxval = GET_DATA_BYTE(lines, j); - if (i < imax) { - if (j > 0) { - val6 = GET_DATA_BYTE(lines + wpls, j - 1); - maxval = L_MAX(maxval, val6); - } - if (j < jmax) { - val8 = GET_DATA_BYTE(lines + wpls, j + 1); - maxval = L_MAX(maxval, val8); - } - val7 = GET_DATA_BYTE(lines + wpls, j); - maxval = L_MAX(maxval, val7); - } - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - maxval = L_MAX(maxval, val5); - } - if (maxval > maskval) - SET_DATA_BYTE(lines, j, maxval); - val = GET_DATA_BYTE(lines, j); - - /* - * If there exists a point (q) which belongs to J(p) - * neighbors in anti-raster order such that J(q) < J(p) - * and J(p) > I(q) then - * fifo_add(p) */ - if (i < imax) { - if (j > 0) { - val6 = GET_DATA_BYTE(lines + wpls, j - 1); - if ((val6 < val) && - (val > GET_DATA_BYTE(linem + wplm, j - 1))) { - boolval = TRUE; - } - } - if (j < jmax) { - val8 = GET_DATA_BYTE(lines + wpls, j + 1); - if (!boolval && (val8 < val) && - (val > GET_DATA_BYTE(linem + wplm, j + 1))) { - boolval = TRUE; - } - } - val7 = GET_DATA_BYTE(lines + wpls, j); - if (!boolval && (val7 < val) && - (val > GET_DATA_BYTE(linem + wplm, j))) { - boolval = TRUE; - } - } - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - if (!boolval && (val5 < val) && - (val > GET_DATA_BYTE(linem, j + 1))) { - boolval = TRUE; - } - } - if (boolval) { - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i; - pixel->y = j; - lqueueAdd(lq_pixel, pixel); - } - } - } - } - - /* Propagation step: - * while fifo_empty = false - * p <- fifo_first() - * for every pixel (q) belong to neighbors of (p) - * if J(q) < J(p) and J(p) > I(q) - * J(q) <- min(J(p), I(q)); - * fifo_add(q); - * end - * end - * end */ - queue_size = lqueueGetCount(lq_pixel); - while (queue_size) { - pixel = (L_PIXEL *)lqueueRemove(lq_pixel); - i = pixel->x; - j = pixel->y; - LEPT_FREE(pixel); - lines = datas + i * wpls; - linem = datam + i * wplm; - - if ((val = GET_DATA_BYTE(lines, j)) > 0) { - if (i > 0) { - if (j > 0) { - val1 = GET_DATA_BYTE(lines - wpls, j - 1); - maskval = GET_DATA_BYTE(linem - wplm, j - 1); - if (val > val1 && val > maskval) { - SET_DATA_BYTE(lines - wpls, j - 1, val); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i - 1; - pixel->y = j - 1; - lqueueAdd(lq_pixel, pixel); - } - } - if (j < jmax) { - val3 = GET_DATA_BYTE(lines - wpls, j + 1); - maskval = GET_DATA_BYTE(linem - wplm, j + 1); - if (val > val3 && val > maskval) { - SET_DATA_BYTE(lines - wpls, j + 1, val); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i - 1; - pixel->y = j + 1; - lqueueAdd(lq_pixel, pixel); - } - } - val2 = GET_DATA_BYTE(lines - wpls, j); - maskval = GET_DATA_BYTE(linem - wplm, j); - if (val > val2 && val > maskval) { - SET_DATA_BYTE(lines - wpls, j, val); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i - 1; - pixel->y = j; - lqueueAdd(lq_pixel, pixel); - } - - } - if (j > 0) { - val4 = GET_DATA_BYTE(lines, j - 1); - maskval = GET_DATA_BYTE(linem, j - 1); - if (val > val4 && val > maskval) { - SET_DATA_BYTE(lines, j - 1, val); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i; - pixel->y = j - 1; - lqueueAdd(lq_pixel, pixel); - } - } - if (i < imax) { - if (j > 0) { - val6 = GET_DATA_BYTE(lines + wpls, j - 1); - maskval = GET_DATA_BYTE(linem + wplm, j - 1); - if (val > val6 && val > maskval) { - SET_DATA_BYTE(lines + wpls, j - 1, val); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i + 1; - pixel->y = j - 1; - lqueueAdd(lq_pixel, pixel); - } - } - if (j < jmax) { - val8 = GET_DATA_BYTE(lines + wpls, j + 1); - maskval = GET_DATA_BYTE(linem + wplm, j + 1); - if (val > val8 && val > maskval) { - SET_DATA_BYTE(lines + wpls, j + 1, val); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i + 1; - pixel->y = j + 1; - lqueueAdd(lq_pixel, pixel); - } - } - val7 = GET_DATA_BYTE(lines + wpls, j); - maskval = GET_DATA_BYTE(linem + wplm, j); - if (val > val7 && val > maskval) { - SET_DATA_BYTE(lines + wpls, j, val); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i + 1; - pixel->y = j; - lqueueAdd(lq_pixel, pixel); - } - } - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - maskval = GET_DATA_BYTE(linem, j + 1); - if (val > val5 && val > maskval) { - SET_DATA_BYTE(lines, j + 1, val); - pixel = (L_PIXEL *)LEPT_CALLOC(1, sizeof(L_PIXEL)); - pixel->x = i; - pixel->y = j + 1; - lqueueAdd(lq_pixel, pixel); - } - } - } - - queue_size = lqueueGetCount(lq_pixel); - } - break; - - default: - L_ERROR("shouldn't get here!\n", procName); - break; - } - - lqueueDestroy(&lq_pixel, TRUE); -} - - -/*-----------------------------------------------------------------------* - * Vincent's Iterative Grayscale Seedfill method * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixSeedfillGraySimple() - * - * \param[in] pixs 8 bpp seed; filled in place - * \param[in] pixm 8 bpp filling mask - * \param[in] connectivity 4 or 8 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is an in-place filling operation on the seed, pixs,
- *          where the clipping mask is always above or at the level
- *          of the seed as it is filled.
- *      (2) For details of the operation, see the description in
- *          seedfillGrayLowSimple() and the code there.
- *      (3) As an example of use, see the description in pixHDome().
- *          There, the seed is an image where each pixel is a fixed
- *          amount smaller than the corresponding mask pixel.
- *      (4) Reference paper :
- *            L. Vincent, Morphological grayscale reconstruction in image
- *            analysis: applications and efficient algorithms, IEEE Transactions
- *            on  Image Processing, vol. 2, no. 2, pp. 176-201, 1993.
- * 
- */ -l_ok -pixSeedfillGraySimple(PIX *pixs, - PIX *pixm, - l_int32 connectivity) -{ -l_int32 i, h, w, wpls, wplm, boolval; -l_uint32 *datas, *datam; -PIX *pixt; - - PROCNAME("pixSeedfillGraySimple"); - - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (!pixm || pixGetDepth(pixm) != 8) - return ERROR_INT("pixm not defined or not 8 bpp", procName, 1); - if (connectivity != 4 && connectivity != 8) - return ERROR_INT("connectivity not in {4,8}", procName, 1); - - /* Make sure the sizes of seed and mask images are the same */ - if (pixSizesEqual(pixs, pixm) == 0) - return ERROR_INT("pixs and pixm sizes differ", procName, 1); - - /* This is used to test for completion */ - if ((pixt = pixCreateTemplate(pixs)) == NULL) - return ERROR_INT("pixt not made", procName, 1); - - datas = pixGetData(pixs); - datam = pixGetData(pixm); - wpls = pixGetWpl(pixs); - wplm = pixGetWpl(pixm); - pixGetDimensions(pixs, &w, &h, NULL); - for (i = 0; i < MaxIters; i++) { - pixCopy(pixt, pixs); - seedfillGrayLowSimple(datas, w, h, wpls, datam, wplm, connectivity); - pixEqual(pixs, pixt, &boolval); - if (boolval == 1) { -#if DEBUG_PRINT_ITERS - L_INFO("Gray seed fill converged: %d iters\n", procName, i + 1); -#endif /* DEBUG_PRINT_ITERS */ - break; - } - } - - pixDestroy(&pixt); - return 0; -} - - -/*! - * \brief pixSeedfillGrayInvSimple() - * - * \param[in] pixs 8 bpp seed; filled in place - * \param[in] pixm 8 bpp filling mask - * \param[in] connectivity 4 or 8 - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is an in-place filling operation on the seed, pixs,
- *          where the clipping mask is always below or at the level
- *          of the seed as it is filled.  Think of filling up a basin
- *          to a particular level, given by the maximum seed value
- *          in the basin.  Outside the filled region, the mask
- *          is above the filling level.
- *      (2) Contrast this with pixSeedfillGraySimple(), where the clipping mask
- *          is always above or at the level of the fill.  An example
- *          of its use is the hdome fill, where the seed is an image
- *          where each pixel is a fixed amount smaller than the
- *          corresponding mask pixel.
- * 
- */ -l_ok -pixSeedfillGrayInvSimple(PIX *pixs, - PIX *pixm, - l_int32 connectivity) -{ -l_int32 i, h, w, wpls, wplm, boolval; -l_uint32 *datas, *datam; -PIX *pixt; - - PROCNAME("pixSeedfillGrayInvSimple"); - - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (!pixm || pixGetDepth(pixm) != 8) - return ERROR_INT("pixm not defined or not 8 bpp", procName, 1); - if (connectivity != 4 && connectivity != 8) - return ERROR_INT("connectivity not in {4,8}", procName, 1); - - /* Make sure the sizes of seed and mask images are the same */ - if (pixSizesEqual(pixs, pixm) == 0) - return ERROR_INT("pixs and pixm sizes differ", procName, 1); - - /* This is used to test for completion */ - if ((pixt = pixCreateTemplate(pixs)) == NULL) - return ERROR_INT("pixt not made", procName, 1); - - datas = pixGetData(pixs); - datam = pixGetData(pixm); - wpls = pixGetWpl(pixs); - wplm = pixGetWpl(pixm); - pixGetDimensions(pixs, &w, &h, NULL); - for (i = 0; i < MaxIters; i++) { - pixCopy(pixt, pixs); - seedfillGrayInvLowSimple(datas, w, h, wpls, datam, wplm, connectivity); - pixEqual(pixs, pixt, &boolval); - if (boolval == 1) { -#if DEBUG_PRINT_ITERS - L_INFO("Gray seed fill converged: %d iters\n", procName, i + 1); -#endif /* DEBUG_PRINT_ITERS */ - break; - } - } - - pixDestroy(&pixt); - return 0; -} - - -/*! - * \brief seedfillGrayLowSimple() - * - * Notes: - * (1) The pixels are numbered as follows: - * 1 2 3 - * 4 x 5 - * 6 7 8 - * This low-level filling operation consists of two scans, - * raster and anti-raster, covering the entire seed image. - * The caller typically iterates until the filling is - * complete. - * (2) The filling action can be visualized from the following example. - * Suppose the mask, which clips the fill, is a sombrero-shaped - * surface, where the highest point is 200 and the low pixels - * around the rim are 30. Beyond the rim, the mask goes up a bit. - * Suppose the seed, which is filled, consists of a single point - * of height 150, located below the max of the mask, with - * the rest 0. Then in the raster scan, nothing happens until - * the high seed point is encountered, and then this value is - * propagated right and down, until it hits the side of the - * sombrero. The seed can never exceed the mask, so it fills - * to the rim, going lower along the mask surface. When it - * passes the rim, the seed continues to fill at the rim - * height to the edge of the seed image. Then on the - * anti-raster scan, the seed fills flat inside the - * sombrero to the upper and left, and then out from the - * rim as before. The final result has a seed that is - * flat outside the rim, and inside it fills the sombrero - * but only up to 150. If the rim height varies, the - * filled seed outside the rim will be at the highest - * point on the rim, which is a saddle point on the rim. - */ -static void -seedfillGrayLowSimple(l_uint32 *datas, - l_int32 w, - l_int32 h, - l_int32 wpls, - l_uint32 *datam, - l_int32 wplm, - l_int32 connectivity) -{ -l_uint8 val2, val3, val4, val5, val7, val8; -l_uint8 val, maxval, maskval; -l_int32 i, j, imax, jmax; -l_uint32 *lines, *linem; - - PROCNAME("seedfillGrayLowSimple"); - - imax = h - 1; - jmax = w - 1; - - switch (connectivity) - { - case 4: - /* UL --> LR scan */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = 0; j < w; j++) { - if ((maskval = GET_DATA_BYTE(linem, j)) > 0) { - maxval = 0; - if (i > 0) - maxval = GET_DATA_BYTE(lines - wpls, j); - if (j > 0) { - val4 = GET_DATA_BYTE(lines, j - 1); - maxval = L_MAX(maxval, val4); - } - val = GET_DATA_BYTE(lines, j); - maxval = L_MAX(maxval, val); - val = L_MIN(maxval, maskval); - SET_DATA_BYTE(lines, j, val); - } - } - } - - /* LR --> UL scan */ - for (i = imax; i >= 0; i--) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = jmax; j >= 0; j--) { - if ((maskval = GET_DATA_BYTE(linem, j)) > 0) { - maxval = 0; - if (i < imax) - maxval = GET_DATA_BYTE(lines + wpls, j); - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - maxval = L_MAX(maxval, val5); - } - val = GET_DATA_BYTE(lines, j); - maxval = L_MAX(maxval, val); - val = L_MIN(maxval, maskval); - SET_DATA_BYTE(lines, j, val); - } - } - } - break; - - case 8: - /* UL --> LR scan */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = 0; j < w; j++) { - if ((maskval = GET_DATA_BYTE(linem, j)) > 0) { - maxval = 0; - if (i > 0) { - if (j > 0) - maxval = GET_DATA_BYTE(lines - wpls, j - 1); - if (j < jmax) { - val2 = GET_DATA_BYTE(lines - wpls, j + 1); - maxval = L_MAX(maxval, val2); - } - val3 = GET_DATA_BYTE(lines - wpls, j); - maxval = L_MAX(maxval, val3); - } - if (j > 0) { - val4 = GET_DATA_BYTE(lines, j - 1); - maxval = L_MAX(maxval, val4); - } - val = GET_DATA_BYTE(lines, j); - maxval = L_MAX(maxval, val); - val = L_MIN(maxval, maskval); - SET_DATA_BYTE(lines, j, val); - } - } - } - - /* LR --> UL scan */ - for (i = imax; i >= 0; i--) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = jmax; j >= 0; j--) { - if ((maskval = GET_DATA_BYTE(linem, j)) > 0) { - maxval = 0; - if (i < imax) { - if (j > 0) - maxval = GET_DATA_BYTE(lines + wpls, j - 1); - if (j < jmax) { - val8 = GET_DATA_BYTE(lines + wpls, j + 1); - maxval = L_MAX(maxval, val8); - } - val7 = GET_DATA_BYTE(lines + wpls, j); - maxval = L_MAX(maxval, val7); - } - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - maxval = L_MAX(maxval, val5); - } - val = GET_DATA_BYTE(lines, j); - maxval = L_MAX(maxval, val); - val = L_MIN(maxval, maskval); - SET_DATA_BYTE(lines, j, val); - } - } - } - break; - - default: - L_ERROR("connectivity must be 4 or 8\n", procName); - } -} - - -/*! - * \brief seedfillGrayInvLowSimple() - * - * Notes: - * (1) The pixels are numbered as follows: - * 1 2 3 - * 4 x 5 - * 6 7 8 - * This low-level filling operation consists of two scans, - * raster and anti-raster, covering the entire seed image. - * The caller typically iterates until the filling is - * complete. - * (2) The "Inv" signifies the fact that in this case, filling - * of the seed only takes place when the seed value is - * greater than the mask value. The mask will act to stop - * the fill when it is higher than the seed level. (This is - * in contrast to conventional grayscale filling where the - * seed always fills below the mask.) - * (3) An example of use is a basin, described by the mask (pixm), - * where within the basin, the seed pix (pixs) gets filled to the - * height of the highest seed pixel that is above its - * corresponding max pixel. Filling occurs while the - * propagating seed pixels in pixs are larger than the - * corresponding mask values in pixm. - */ -static void -seedfillGrayInvLowSimple(l_uint32 *datas, - l_int32 w, - l_int32 h, - l_int32 wpls, - l_uint32 *datam, - l_int32 wplm, - l_int32 connectivity) -{ -l_uint8 val1, val2, val3, val4, val5, val6, val7, val8; -l_uint8 maxval, maskval; -l_int32 i, j, imax, jmax; -l_uint32 *lines, *linem; - - PROCNAME("seedfillGrayInvLowSimple"); - - imax = h - 1; - jmax = w - 1; - - switch (connectivity) - { - case 4: - /* UL --> LR scan */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = 0; j < w; j++) { - if ((maskval = GET_DATA_BYTE(linem, j)) < 255) { - maxval = GET_DATA_BYTE(lines, j); - if (i > 0) { - val2 = GET_DATA_BYTE(lines - wpls, j); - maxval = L_MAX(maxval, val2); - } - if (j > 0) { - val4 = GET_DATA_BYTE(lines, j - 1); - maxval = L_MAX(maxval, val4); - } - if (maxval > maskval) - SET_DATA_BYTE(lines, j, maxval); - } - } - } - - /* LR --> UL scan */ - for (i = imax; i >= 0; i--) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = jmax; j >= 0; j--) { - if ((maskval = GET_DATA_BYTE(linem, j)) < 255) { - maxval = GET_DATA_BYTE(lines, j); - if (i < imax) { - val7 = GET_DATA_BYTE(lines + wpls, j); - maxval = L_MAX(maxval, val7); - } - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - maxval = L_MAX(maxval, val5); - } - if (maxval > maskval) - SET_DATA_BYTE(lines, j, maxval); - } - } - } - break; - - case 8: - /* UL --> LR scan */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = 0; j < w; j++) { - if ((maskval = GET_DATA_BYTE(linem, j)) < 255) { - maxval = GET_DATA_BYTE(lines, j); - if (i > 0) { - if (j > 0) { - val1 = GET_DATA_BYTE(lines - wpls, j - 1); - maxval = L_MAX(maxval, val1); - } - if (j < jmax) { - val2 = GET_DATA_BYTE(lines - wpls, j + 1); - maxval = L_MAX(maxval, val2); - } - val3 = GET_DATA_BYTE(lines - wpls, j); - maxval = L_MAX(maxval, val3); - } - if (j > 0) { - val4 = GET_DATA_BYTE(lines, j - 1); - maxval = L_MAX(maxval, val4); - } - if (maxval > maskval) - SET_DATA_BYTE(lines, j, maxval); - } - } - } - - /* LR --> UL scan */ - for (i = imax; i >= 0; i--) { - lines = datas + i * wpls; - linem = datam + i * wplm; - for (j = jmax; j >= 0; j--) { - if ((maskval = GET_DATA_BYTE(linem, j)) < 255) { - maxval = GET_DATA_BYTE(lines, j); - if (i < imax) { - if (j > 0) { - val6 = GET_DATA_BYTE(lines + wpls, j - 1); - maxval = L_MAX(maxval, val6); - } - if (j < jmax) { - val8 = GET_DATA_BYTE(lines + wpls, j + 1); - maxval = L_MAX(maxval, val8); - } - val7 = GET_DATA_BYTE(lines + wpls, j); - maxval = L_MAX(maxval, val7); - } - if (j < jmax) { - val5 = GET_DATA_BYTE(lines, j + 1); - maxval = L_MAX(maxval, val5); - } - if (maxval > maskval) - SET_DATA_BYTE(lines, j, maxval); - } - } - } - break; - - default: - L_ERROR("connectivity must be 4 or 8\n", procName); - } -} - - -/*-----------------------------------------------------------------------* - * Gray seedfill variations * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixSeedfillGrayBasin() - * - * \param[in] pixb binary mask giving seed locations - * \param[in] pixm 8 bpp basin-type filling mask - * \param[in] delta amount of seed value above mask - * \param[in] connectivity 4 or 8 - * \return pixd filled seed if OK, NULL on error - * - *
- * Notes:
- *      (1) This fills from a seed within basins defined by a filling mask.
- *          The seed value(s) are greater than the corresponding
- *          filling mask value, and the result has the bottoms of
- *          the basins raised by the initial seed value.
- *      (2) The seed has value 255 except where pixb has fg (1), which
- *          are the seed 'locations'.  At the seed locations, the seed
- *          value is the corresponding value of the mask pixel in pixm
- *          plus %delta.  If %delta == 0, we return a copy of pixm.
- *      (3) The actual filling is done using the standard grayscale filling
- *          operation on the inverse of the mask and using the inverse
- *          of the seed image.  After filling, we return the inverse of
- *          the filled seed.
- *      (4) As an example of use: pixm can describe a grayscale image
- *          of text, where the (dark) text pixels are basins of
- *          low values; pixb can identify the local minima in pixm (say, at
- *          the bottom of the basins); and delta is the amount that we wish
- *          to raise (lighten) the basins.  We construct the seed
- *          (a.k.a marker) image from pixb, pixm and %delta.
- * 
- */ -PIX * -pixSeedfillGrayBasin(PIX *pixb, - PIX *pixm, - l_int32 delta, - l_int32 connectivity) -{ -PIX *pixbi, *pixmi, *pixsd; - - PROCNAME("pixSeedfillGrayBasin"); - - if (!pixb || pixGetDepth(pixb) != 1) - return (PIX *)ERROR_PTR("pixb undefined or not 1 bpp", procName, NULL); - if (!pixm || pixGetDepth(pixm) != 8) - return (PIX *)ERROR_PTR("pixm undefined or not 8 bpp", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not in {4,8}", procName, NULL); - - if (delta <= 0) { - L_WARNING("delta <= 0; returning a copy of pixm\n", procName); - return pixCopy(NULL, pixm); - } - - /* Add delta to every pixel in pixm */ - pixsd = pixCopy(NULL, pixm); - pixAddConstantGray(pixsd, delta); - - /* Prepare the seed. Write 255 in all pixels of - * ([pixm] + delta) where pixb is 0. */ - pixbi = pixInvert(NULL, pixb); - pixSetMasked(pixsd, pixbi, 255); - - /* Fill the inverse seed, using the inverse clipping mask */ - pixmi = pixInvert(NULL, pixm); - pixInvert(pixsd, pixsd); - pixSeedfillGray(pixsd, pixmi, connectivity); - - /* Re-invert the filled seed */ - pixInvert(pixsd, pixsd); - - pixDestroy(&pixbi); - pixDestroy(&pixmi); - return pixsd; -} - - -/*-----------------------------------------------------------------------* - * Vincent's Distance Function method * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixDistanceFunction() - * - * \param[in] pixs 1 bpp - * \param[in] connectivity 4 or 8 - * \param[in] outdepth 8 or 16 bits for pixd - * \param[in] boundcond L_BOUNDARY_BG, L_BOUNDARY_FG - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This computes the distance of each pixel from the nearest
- *          background pixel.  All bg pixels therefore have a distance of 0,
- *          and the fg pixel distances increase linearly from 1 at the
- *          boundary.  It can also be used to compute the distance of
- *          each pixel from the nearest fg pixel, by inverting the input
- *          image before calling this function.  Then all fg pixels have
- *          a distance 0 and the bg pixel distances increase linearly
- *          from 1 at the boundary.
- *      (2) The algorithm, described in Leptonica on the page on seed
- *          filling and connected components, is due to Luc Vincent.
- *          In brief, we generate an 8 or 16 bpp image, initialized
- *          with the fg pixels of the input pix set to 1 and the
- *          1-boundary pixels (i.e., the boundary pixels of width 1 on
- *          the four sides set as either:
- *            * L_BOUNDARY_BG: 0
- *            * L_BOUNDARY_FG:  max
- *          where max = 0xff for 8 bpp and 0xffff for 16 bpp.
- *          Then do raster/anti-raster sweeps over all pixels interior
- *          to the 1-boundary, where the value of each new pixel is
- *          taken to be 1 more than the minimum of the previously-seen
- *          connected pixels (using either 4 or 8 connectivity).
- *          Finally, set the 1-boundary pixels using the mirrored method;
- *          this removes the max values there.
- *      (3) Using L_BOUNDARY_BG clamps the distance to 0 at the
- *          boundary.  Using L_BOUNDARY_FG allows the distance
- *          at the image boundary to "float".
- *      (4) For 4-connected, one could initialize only the left and top
- *          1-boundary pixels, and go all the way to the right
- *          and bottom; then coming back reset left and top.  But we
- *          instead use a method that works for both 4- and 8-connected.
- * 
- */ -PIX * -pixDistanceFunction(PIX *pixs, - l_int32 connectivity, - l_int32 outdepth, - l_int32 boundcond) -{ -l_int32 w, h, wpld; -l_uint32 *datad; -PIX *pixd; - - PROCNAME("pixDistanceFunction"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("!pixs or pixs not 1 bpp", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - if (outdepth != 8 && outdepth != 16) - return (PIX *)ERROR_PTR("outdepth not 8 or 16 bpp", procName, NULL); - if (boundcond != L_BOUNDARY_BG && boundcond != L_BOUNDARY_FG) - return (PIX *)ERROR_PTR("invalid boundcond", procName, NULL); - - pixGetDimensions(pixs, &w, &h, NULL); - if ((pixd = pixCreate(w, h, outdepth)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - /* Initialize the fg pixels to 1 and the bg pixels to 0 */ - pixSetMasked(pixd, pixs, 1); - - if (boundcond == L_BOUNDARY_BG) { - distanceFunctionLow(datad, w, h, outdepth, wpld, connectivity); - } else { /* L_BOUNDARY_FG: set boundary pixels to max val */ - pixRasterop(pixd, 0, 0, w, 1, PIX_SET, NULL, 0, 0); /* top */ - pixRasterop(pixd, 0, h - 1, w, 1, PIX_SET, NULL, 0, 0); /* bot */ - pixRasterop(pixd, 0, 0, 1, h, PIX_SET, NULL, 0, 0); /* left */ - pixRasterop(pixd, w - 1, 0, 1, h, PIX_SET, NULL, 0, 0); /* right */ - - distanceFunctionLow(datad, w, h, outdepth, wpld, connectivity); - - /* Set each boundary pixel equal to the pixel next to it */ - pixSetMirroredBorder(pixd, 1, 1, 1, 1); - } - - return pixd; -} - - -/*! - * \brief distanceFunctionLow() - */ -static void -distanceFunctionLow(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 d, - l_int32 wpld, - l_int32 connectivity) -{ -l_int32 val1, val2, val3, val4, val5, val6, val7, val8, minval, val; -l_int32 i, j, imax, jmax; -l_uint32 *lined; - - PROCNAME("distanceFunctionLow"); - - /* One raster scan followed by one anti-raster scan. - * This does not re-set the 1-boundary of pixels that - * were initialized to either 0 or maxval. */ - imax = h - 1; - jmax = w - 1; - switch (connectivity) - { - case 4: - if (d == 8) { - /* UL --> LR scan */ - for (i = 1; i < imax; i++) { - lined = datad + i * wpld; - for (j = 1; j < jmax; j++) { - if ((val = GET_DATA_BYTE(lined, j)) > 0) { - val2 = GET_DATA_BYTE(lined - wpld, j); - val4 = GET_DATA_BYTE(lined, j - 1); - minval = L_MIN(val2, val4); - minval = L_MIN(minval, 254); - SET_DATA_BYTE(lined, j, minval + 1); - } - } - } - - /* LR --> UL scan */ - for (i = imax - 1; i > 0; i--) { - lined = datad + i * wpld; - for (j = jmax - 1; j > 0; j--) { - if ((val = GET_DATA_BYTE(lined, j)) > 0) { - val7 = GET_DATA_BYTE(lined + wpld, j); - val5 = GET_DATA_BYTE(lined, j + 1); - minval = L_MIN(val5, val7); - minval = L_MIN(minval + 1, val); - SET_DATA_BYTE(lined, j, minval); - } - } - } - } else { /* d == 16 */ - /* UL --> LR scan */ - for (i = 1; i < imax; i++) { - lined = datad + i * wpld; - for (j = 1; j < jmax; j++) { - if ((val = GET_DATA_TWO_BYTES(lined, j)) > 0) { - val2 = GET_DATA_TWO_BYTES(lined - wpld, j); - val4 = GET_DATA_TWO_BYTES(lined, j - 1); - minval = L_MIN(val2, val4); - minval = L_MIN(minval, 0xfffe); - SET_DATA_TWO_BYTES(lined, j, minval + 1); - } - } - } - - /* LR --> UL scan */ - for (i = imax - 1; i > 0; i--) { - lined = datad + i * wpld; - for (j = jmax - 1; j > 0; j--) { - if ((val = GET_DATA_TWO_BYTES(lined, j)) > 0) { - val7 = GET_DATA_TWO_BYTES(lined + wpld, j); - val5 = GET_DATA_TWO_BYTES(lined, j + 1); - minval = L_MIN(val5, val7); - minval = L_MIN(minval + 1, val); - SET_DATA_TWO_BYTES(lined, j, minval); - } - } - } - } - break; - - case 8: - if (d == 8) { - /* UL --> LR scan */ - for (i = 1; i < imax; i++) { - lined = datad + i * wpld; - for (j = 1; j < jmax; j++) { - if ((val = GET_DATA_BYTE(lined, j)) > 0) { - val1 = GET_DATA_BYTE(lined - wpld, j - 1); - val2 = GET_DATA_BYTE(lined - wpld, j); - val3 = GET_DATA_BYTE(lined - wpld, j + 1); - val4 = GET_DATA_BYTE(lined, j - 1); - minval = L_MIN(val1, val2); - minval = L_MIN(minval, val3); - minval = L_MIN(minval, val4); - minval = L_MIN(minval, 254); - SET_DATA_BYTE(lined, j, minval + 1); - } - } - } - - /* LR --> UL scan */ - for (i = imax - 1; i > 0; i--) { - lined = datad + i * wpld; - for (j = jmax - 1; j > 0; j--) { - if ((val = GET_DATA_BYTE(lined, j)) > 0) { - val8 = GET_DATA_BYTE(lined + wpld, j + 1); - val7 = GET_DATA_BYTE(lined + wpld, j); - val6 = GET_DATA_BYTE(lined + wpld, j - 1); - val5 = GET_DATA_BYTE(lined, j + 1); - minval = L_MIN(val8, val7); - minval = L_MIN(minval, val6); - minval = L_MIN(minval, val5); - minval = L_MIN(minval + 1, val); - SET_DATA_BYTE(lined, j, minval); - } - } - } - } else { /* d == 16 */ - /* UL --> LR scan */ - for (i = 1; i < imax; i++) { - lined = datad + i * wpld; - for (j = 1; j < jmax; j++) { - if ((val = GET_DATA_TWO_BYTES(lined, j)) > 0) { - val1 = GET_DATA_TWO_BYTES(lined - wpld, j - 1); - val2 = GET_DATA_TWO_BYTES(lined - wpld, j); - val3 = GET_DATA_TWO_BYTES(lined - wpld, j + 1); - val4 = GET_DATA_TWO_BYTES(lined, j - 1); - minval = L_MIN(val1, val2); - minval = L_MIN(minval, val3); - minval = L_MIN(minval, val4); - minval = L_MIN(minval, 0xfffe); - SET_DATA_TWO_BYTES(lined, j, minval + 1); - } - } - } - - /* LR --> UL scan */ - for (i = imax - 1; i > 0; i--) { - lined = datad + i * wpld; - for (j = jmax - 1; j > 0; j--) { - if ((val = GET_DATA_TWO_BYTES(lined, j)) > 0) { - val8 = GET_DATA_TWO_BYTES(lined + wpld, j + 1); - val7 = GET_DATA_TWO_BYTES(lined + wpld, j); - val6 = GET_DATA_TWO_BYTES(lined + wpld, j - 1); - val5 = GET_DATA_TWO_BYTES(lined, j + 1); - minval = L_MIN(val8, val7); - minval = L_MIN(minval, val6); - minval = L_MIN(minval, val5); - minval = L_MIN(minval + 1, val); - SET_DATA_TWO_BYTES(lined, j, minval); - } - } - } - } - break; - - default: - L_ERROR("connectivity must be 4 or 8\n", procName); - break; - } -} - - -/*-----------------------------------------------------------------------* - * Seed spread (based on distance function) * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixSeedspread() - * - * \param[in] pixs 8 bpp - * \param[in] connectivity 4 or 8 - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) The raster/anti-raster method for implementing this filling
- *          operation was suggested by Ray Smith.
- *      (2) This takes an arbitrary set of nonzero pixels in pixs, which
- *          can be sparse, and spreads (extrapolates) the values to
- *          fill all the pixels in pixd with the nonzero value it is
- *          closest to in pixs.  This is similar (though not completely
- *          equivalent) to doing a Voronoi tiling of the image, with a
- *          tile surrounding each pixel that has a nonzero value.
- *          All pixels within a tile are then closer to its "central"
- *          pixel than to any others.  Then assign the value of the
- *          "central" pixel to each pixel in the tile.
- *      (3) This is implemented by computing a distance function in parallel
- *          with the fill.  The distance function uses free boundary
- *          conditions (assumed maxval outside), and it controls the
- *          propagation of the pixels in pixd away from the nonzero
- *          (seed) values.  This is done in 2 traversals (raster/antiraster).
- *          In the raster direction, whenever the distance function
- *          is nonzero, the spread pixel takes on the value of its
- *          predecessor that has the minimum distance value.  In the
- *          antiraster direction, whenever the distance function is nonzero
- *          and its value is replaced by a smaller value, the spread
- *          pixel takes the value of the predecessor with the minimum
- *          distance value.
- *      (4) At boundaries where a pixel is equidistant from two
- *          nearest nonzero (seed) pixels, the decision of which value
- *          to use is arbitrary (greedy in search for minimum distance).
- *          This can give rise to strange-looking results, particularly
- *          for 4-connectivity where the L1 distance is computed from
- *          steps in N,S,E and W directions (no diagonals).
- * 
- */ -PIX * -pixSeedspread(PIX *pixs, - l_int32 connectivity) -{ -l_int32 w, h, wplt, wplg; -l_uint32 *datat, *datag; -PIX *pixm, *pixt, *pixg, *pixd; - - PROCNAME("pixSeedspread"); - - if (!pixs || pixGetDepth(pixs) != 8) - return (PIX *)ERROR_PTR("!pixs or pixs not 8 bpp", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - - /* Add a 4 byte border to pixs. This simplifies the computation. */ - pixg = pixAddBorder(pixs, 4, 0); - pixGetDimensions(pixg, &w, &h, NULL); - - /* Initialize distance function pixt. Threshold pixs to get - * a 0 at the seed points where the pixs pixel is nonzero, and - * a 1 at all points that need to be filled. Use this as a - * mask to set a 1 in pixt at all non-seed points. Also, set all - * pixt pixels in an interior boundary of width 1 to the - * maximum value. For debugging, to view the distance function, - * use pixConvert16To8(pixt, L_LS_BYTE) on small images. */ - pixm = pixThresholdToBinary(pixg, 1); - pixt = pixCreate(w, h, 16); - pixSetMasked(pixt, pixm, 1); - pixRasterop(pixt, 0, 0, w, 1, PIX_SET, NULL, 0, 0); /* top */ - pixRasterop(pixt, 0, h - 1, w, 1, PIX_SET, NULL, 0, 0); /* bot */ - pixRasterop(pixt, 0, 0, 1, h, PIX_SET, NULL, 0, 0); /* left */ - pixRasterop(pixt, w - 1, 0, 1, h, PIX_SET, NULL, 0, 0); /* right */ - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - - /* Do the interpolation and remove the border. */ - datag = pixGetData(pixg); - wplg = pixGetWpl(pixg); - seedspreadLow(datag, w, h, wplg, datat, wplt, connectivity); - pixd = pixRemoveBorder(pixg, 4); - - pixDestroy(&pixm); - pixDestroy(&pixg); - pixDestroy(&pixt); - return pixd; -} - - -/*! - * \brief seedspreadLow() - * - * See pixSeedspread() for a brief description of the algorithm here. - */ -static void -seedspreadLow(l_uint32 *datad, - l_int32 w, - l_int32 h, - l_int32 wpld, - l_uint32 *datat, - l_int32 wplt, - l_int32 connectivity) -{ -l_int32 val1t, val2t, val3t, val4t, val5t, val6t, val7t, val8t; -l_int32 i, j, imax, jmax, minval, valt, vald; -l_uint32 *linet, *lined; - - PROCNAME("seedspreadLow"); - - /* One raster scan followed by one anti-raster scan. - * pixt is initialized to have 0 on pixels where the - * input is specified in pixd, and to have 1 on all - * other pixels. We only change pixels in pixt and pixd - * that are non-zero in pixt. */ - imax = h - 1; - jmax = w - 1; - switch (connectivity) - { - case 4: - /* UL --> LR scan */ - for (i = 1; i < h; i++) { - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = 1; j < jmax; j++) { - if ((valt = GET_DATA_TWO_BYTES(linet, j)) > 0) { - val2t = GET_DATA_TWO_BYTES(linet - wplt, j); - val4t = GET_DATA_TWO_BYTES(linet, j - 1); - minval = L_MIN(val2t, val4t); - minval = L_MIN(minval, 0xfffe); - SET_DATA_TWO_BYTES(linet, j, minval + 1); - if (val2t < val4t) - vald = GET_DATA_BYTE(lined - wpld, j); - else - vald = GET_DATA_BYTE(lined, j - 1); - SET_DATA_BYTE(lined, j, vald); - } - } - } - - /* LR --> UL scan */ - for (i = imax - 1; i > 0; i--) { - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = jmax - 1; j > 0; j--) { - if ((valt = GET_DATA_TWO_BYTES(linet, j)) > 0) { - val7t = GET_DATA_TWO_BYTES(linet + wplt, j); - val5t = GET_DATA_TWO_BYTES(linet, j + 1); - minval = L_MIN(val5t, val7t); - minval = L_MIN(minval + 1, valt); - if (valt > minval) { /* replace */ - SET_DATA_TWO_BYTES(linet, j, minval); - if (val5t < val7t) - vald = GET_DATA_BYTE(lined, j + 1); - else - vald = GET_DATA_BYTE(lined + wplt, j); - SET_DATA_BYTE(lined, j, vald); - } - } - } - } - break; - case 8: - /* UL --> LR scan */ - for (i = 1; i < h; i++) { - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = 1; j < jmax; j++) { - if ((valt = GET_DATA_TWO_BYTES(linet, j)) > 0) { - val1t = GET_DATA_TWO_BYTES(linet - wplt, j - 1); - val2t = GET_DATA_TWO_BYTES(linet - wplt, j); - val3t = GET_DATA_TWO_BYTES(linet - wplt, j + 1); - val4t = GET_DATA_TWO_BYTES(linet, j - 1); - minval = L_MIN(val1t, val2t); - minval = L_MIN(minval, val3t); - minval = L_MIN(minval, val4t); - minval = L_MIN(minval, 0xfffe); - SET_DATA_TWO_BYTES(linet, j, minval + 1); - if (minval == val1t) - vald = GET_DATA_BYTE(lined - wpld, j - 1); - else if (minval == val2t) - vald = GET_DATA_BYTE(lined - wpld, j); - else if (minval == val3t) - vald = GET_DATA_BYTE(lined - wpld, j + 1); - else /* minval == val4t */ - vald = GET_DATA_BYTE(lined, j - 1); - SET_DATA_BYTE(lined, j, vald); - } - } - } - - /* LR --> UL scan */ - for (i = imax - 1; i > 0; i--) { - linet = datat + i * wplt; - lined = datad + i * wpld; - for (j = jmax - 1; j > 0; j--) { - if ((valt = GET_DATA_TWO_BYTES(linet, j)) > 0) { - val8t = GET_DATA_TWO_BYTES(linet + wplt, j + 1); - val7t = GET_DATA_TWO_BYTES(linet + wplt, j); - val6t = GET_DATA_TWO_BYTES(linet + wplt, j - 1); - val5t = GET_DATA_TWO_BYTES(linet, j + 1); - minval = L_MIN(val8t, val7t); - minval = L_MIN(minval, val6t); - minval = L_MIN(minval, val5t); - minval = L_MIN(minval + 1, valt); - if (valt > minval) { /* replace */ - SET_DATA_TWO_BYTES(linet, j, minval); - if (minval == val5t + 1) - vald = GET_DATA_BYTE(lined, j + 1); - else if (minval == val6t + 1) - vald = GET_DATA_BYTE(lined + wpld, j - 1); - else if (minval == val7t + 1) - vald = GET_DATA_BYTE(lined + wpld, j); - else /* minval == val8t + 1 */ - vald = GET_DATA_BYTE(lined + wpld, j + 1); - SET_DATA_BYTE(lined, j, vald); - } - } - } - } - break; - default: - L_ERROR("connectivity must be 4 or 8\n", procName); - break; - } -} - - -/*-----------------------------------------------------------------------* - * Local extrema * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixLocalExtrema() - * - * \param[in] pixs 8 bpp - * \param[in] maxmin max allowed for the min in a 3x3 neighborhood; - * use 0 for default which is to have no upper bound - * \param[in] minmax min allowed for the max in a 3x3 neighborhood; - * use 0 for default which is to have no lower bound - * \param[out] ppixmin [optional] mask of local minima - * \param[out] ppixmax [optional] mask of local maxima - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This gives the actual local minima and maxima.
- *          A local minimum is a pixel whose surrounding pixels all
- *          have values at least as large, and likewise for a local
- *          maximum.  For the local minima, %maxmin is the upper
- *          bound for the value of pixs.  Likewise, for the local maxima,
- *          %minmax is the lower bound for the value of pixs.
- *      (2) The minima are found by starting with the erosion-and-equality
- *          approach of pixSelectedLocalExtrema().  This is followed
- *          by a qualification step, where each c.c. in the resulting
- *          minimum mask is extracted, the pixels bordering it are
- *          located, and they are queried.  If all of those pixels
- *          are larger than the value of that minimum, it is a true
- *          minimum and its c.c. is saved; otherwise the c.c. is
- *          rejected.  Note that if a bordering pixel has the
- *          same value as the minimum, it must then have a
- *          neighbor that is smaller, so the component is not a
- *          true minimum.
- *      (3) The maxima are found by inverting the image and looking
- *          for the minima there.
- *      (4) The generated masks can be used as markers for
- *          further operations.
- * 
- */ -l_ok -pixLocalExtrema(PIX *pixs, - l_int32 maxmin, - l_int32 minmax, - PIX **ppixmin, - PIX **ppixmax) -{ -PIX *pixmin, *pixmax, *pixt1, *pixt2; - - PROCNAME("pixLocalExtrema"); - - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (!ppixmin && !ppixmax) - return ERROR_INT("neither &pixmin, &pixmax are defined", procName, 1); - if (maxmin <= 0) maxmin = 254; - if (minmax <= 0) minmax = 1; - - if (ppixmin) { - pixt1 = pixErodeGray(pixs, 3, 3); - pixmin = pixFindEqualValues(pixs, pixt1); - pixDestroy(&pixt1); - pixQualifyLocalMinima(pixs, pixmin, maxmin); - *ppixmin = pixmin; - } - - if (ppixmax) { - pixt1 = pixInvert(NULL, pixs); - pixt2 = pixErodeGray(pixt1, 3, 3); - pixmax = pixFindEqualValues(pixt1, pixt2); - pixDestroy(&pixt2); - pixQualifyLocalMinima(pixt1, pixmax, 255 - minmax); - *ppixmax = pixmax; - pixDestroy(&pixt1); - } - - return 0; -} - - -/*! - * \brief pixQualifyLocalMinima() - * - * \param[in] pixs 8 bpp image from which pixm has been extracted - * \param[in] pixm 1 bpp mask of values equal to min in 3x3 neighborhood - * \param[in] maxval max allowed for the min in a 3x3 neighborhood; - * use 0 for default which is to have no upper bound - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function acts in-place to remove all c.c. in pixm
- *          that are not true local minima in pixs.  As seen in
- *          pixLocalExtrema(), the input pixm are found by selecting those
- *          pixels of pixs whose values do not change with a 3x3
- *          grayscale erosion.  Here, we require that for each c.c.
- *          in pixm, all pixels in pixs that correspond to the exterior
- *          boundary pixels of the c.c. have values that are greater
- *          than the value within the c.c.
- *      (2) The maximum allowed value for each local minimum can be
- *          bounded with %maxval.  Use 0 for default, which is to have
- *          no upper bound (equivalent to maxval == 254).
- * 
- */ -static l_int32 -pixQualifyLocalMinima(PIX *pixs, - PIX *pixm, - l_int32 maxval) -{ -l_int32 n, i, j, k, x, y, w, h, xc, yc, wc, hc, xon, yon; -l_int32 vals, wpls, wplc, ismin; -l_uint32 val; -l_uint32 *datas, *datac, *lines, *linec; -BOXA *boxa; -PIX *pix1, *pix2, *pix3; -PIXA *pixa; - - PROCNAME("pixQualifyLocalMinima"); - - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (!pixm || pixGetDepth(pixm) != 1) - return ERROR_INT("pixm not defined or not 1 bpp", procName, 1); - if (maxval <= 0) maxval = 254; - - pixGetDimensions(pixs, &w, &h, NULL); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - boxa = pixConnComp(pixm, &pixa, 8); - n = pixaGetCount(pixa); - for (k = 0; k < n; k++) { - boxaGetBoxGeometry(boxa, k, &xc, &yc, &wc, &hc); - pix1 = pixaGetPix(pixa, k, L_COPY); - pix2 = pixAddBorder(pix1, 1, 0); - pix3 = pixDilateBrick(NULL, pix2, 3, 3); - pixXor(pix3, pix3, pix2); /* exterior boundary pixels */ - datac = pixGetData(pix3); - wplc = pixGetWpl(pix3); - nextOnPixelInRaster(pix1, 0, 0, &xon, &yon); - pixGetPixel(pixs, xc + xon, yc + yon, &val); - if (val > maxval) { /* too large; erase */ - pixRasterop(pixm, xc, yc, wc, hc, PIX_XOR, pix1, 0, 0); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - continue; - } - ismin = TRUE; - - /* Check all values in pixs that correspond to the exterior - * boundary pixels of the c.c. in pixm. Verify that the - * value in the c.c. is always less. */ - for (i = 0, y = yc - 1; i < hc + 2 && y >= 0 && y < h; i++, y++) { - lines = datas + y * wpls; - linec = datac + i * wplc; - for (j = 0, x = xc - 1; j < wc + 2 && x >= 0 && x < w; j++, x++) { - if (GET_DATA_BIT(linec, j)) { - vals = GET_DATA_BYTE(lines, x); - if (vals <= val) { /* not a minimum! */ - ismin = FALSE; - break; - } - } - } - if (!ismin) - break; - } - if (!ismin) /* erase it */ - pixRasterop(pixm, xc, yc, wc, hc, PIX_XOR, pix1, 0, 0); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - } - - boxaDestroy(&boxa); - pixaDestroy(&pixa); - return 0; -} - - -/*! - * \brief pixSelectedLocalExtrema() - * - * \param[in] pixs 8 bpp - * \param[in] mindist -1 for keeping all pixels; >= 0 specifies distance - * \param[out] ppixmin mask of local minima - * \param[out] ppixmax mask of local maxima - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This selects those local 3x3 minima that are at least a
- *          specified distance from the nearest local 3x3 maxima, and v.v.
- *          for the selected set of local 3x3 maxima.
- *          The local 3x3 minima is the set of pixels whose value equals
- *          the value after a 3x3 brick erosion, and the local 3x3 maxima
- *          is the set of pixels whose value equals the value after
- *          a 3x3 brick dilation.
- *      (2) mindist is the minimum distance allowed between
- *          local 3x3 minima and local 3x3 maxima, in an 8-connected sense.
- *          mindist == 1 keeps all pixels found in step 1.
- *          mindist == 0 removes all pixels from each mask that are
- *          both a local 3x3 minimum and a local 3x3 maximum.
- *          mindist == 1 removes any local 3x3 minimum pixel that touches a
- *          local 3x3 maximum pixel, and likewise for the local maxima.
- *          To make the decision, visualize each local 3x3 minimum pixel
- *          as being surrounded by a square of size (2 * mindist + 1)
- *          on each side, such that no local 3x3 maximum pixel is within
- *          that square; and v.v.
- *      (3) The generated masks can be used as markers for further operations.
- * 
- */ -l_ok -pixSelectedLocalExtrema(PIX *pixs, - l_int32 mindist, - PIX **ppixmin, - PIX **ppixmax) -{ -PIX *pixmin, *pixmax, *pixt, *pixtmin, *pixtmax; - - PROCNAME("pixSelectedLocalExtrema"); - - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); - if (!ppixmin || !ppixmax) - return ERROR_INT("&pixmin and &pixmax not both defined", procName, 1); - - pixt = pixErodeGray(pixs, 3, 3); - pixmin = pixFindEqualValues(pixs, pixt); - pixDestroy(&pixt); - pixt = pixDilateGray(pixs, 3, 3); - pixmax = pixFindEqualValues(pixs, pixt); - pixDestroy(&pixt); - - /* Remove all points that are within the prescribed distance - * from each other. */ - if (mindist < 0) { /* remove no points */ - *ppixmin = pixmin; - *ppixmax = pixmax; - } else if (mindist == 0) { /* remove points belonging to both sets */ - pixt = pixAnd(NULL, pixmin, pixmax); - *ppixmin = pixSubtract(pixmin, pixmin, pixt); - *ppixmax = pixSubtract(pixmax, pixmax, pixt); - pixDestroy(&pixt); - } else { - pixtmin = pixDilateBrick(NULL, pixmin, - 2 * mindist + 1, 2 * mindist + 1); - pixtmax = pixDilateBrick(NULL, pixmax, - 2 * mindist + 1, 2 * mindist + 1); - *ppixmin = pixSubtract(pixmin, pixmin, pixtmax); - *ppixmax = pixSubtract(pixmax, pixmax, pixtmin); - pixDestroy(&pixtmin); - pixDestroy(&pixtmax); - } - return 0; -} - - -/*! - * \brief pixFindEqualValues() - * - * \param[in] pixs1 8 bpp - * \param[in] pixs2 8 bpp - * \return pixd 1 bpp mask, or NULL on error - * - *
- * Notes:
- *      (1) The two images are aligned at the UL corner, and the returned
- *          image has ON pixels where the pixels in pixs1 and pixs2
- *          have equal values.
- * 
- */ -PIX * -pixFindEqualValues(PIX *pixs1, - PIX *pixs2) -{ -l_int32 w1, h1, w2, h2, w, h; -l_int32 i, j, val1, val2, wpls1, wpls2, wpld; -l_uint32 *datas1, *datas2, *datad, *lines1, *lines2, *lined; -PIX *pixd; - - PROCNAME("pixFindEqualValues"); - - if (!pixs1 || pixGetDepth(pixs1) != 8) - return (PIX *)ERROR_PTR("pixs1 undefined or not 8 bpp", procName, NULL); - if (!pixs2 || pixGetDepth(pixs2) != 8) - return (PIX *)ERROR_PTR("pixs2 undefined or not 8 bpp", procName, NULL); - pixGetDimensions(pixs1, &w1, &h1, NULL); - pixGetDimensions(pixs2, &w2, &h2, NULL); - w = L_MIN(w1, w2); - h = L_MIN(h1, h2); - pixd = pixCreate(w, h, 1); - datas1 = pixGetData(pixs1); - datas2 = pixGetData(pixs2); - datad = pixGetData(pixd); - wpls1 = pixGetWpl(pixs1); - wpls2 = pixGetWpl(pixs2); - wpld = pixGetWpl(pixd); - - for (i = 0; i < h; i++) { - lines1 = datas1 + i * wpls1; - lines2 = datas2 + i * wpls2; - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - val1 = GET_DATA_BYTE(lines1, j); - val2 = GET_DATA_BYTE(lines2, j); - if (val1 == val2) - SET_DATA_BIT(lined, j); - } - } - - return pixd; -} - - -/*-----------------------------------------------------------------------* - * Selection of minima in mask connected components * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixSelectMinInConnComp() - * - * \param[in] pixs 8 bpp - * \param[in] pixm 1 bpp - * \param[out] ppta pta of min pixel locations - * \param[out] pnav [optional] numa of minima values - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) For each 8 connected component in pixm, this finds
- *          a pixel in pixs that has the lowest value, and saves
- *          it in a Pta.  If several pixels in pixs have the same
- *          minimum value, it picks the first one found.
- *      (2) For a mask pixm of true local minima, all pixels in each
- *          connected component have the same value in pixs, so it is
- *          fastest to select one of them using a special seedfill
- *          operation.  Not yet implemented.
- * 
- */ -l_ok -pixSelectMinInConnComp(PIX *pixs, - PIX *pixm, - PTA **ppta, - NUMA **pnav) -{ -l_int32 bx, by, bw, bh, i, j, c, n; -l_int32 xs, ys, minx, miny, wpls, wplt, val, minval; -l_uint32 *datas, *datat, *lines, *linet; -BOXA *boxa; -NUMA *nav; -PIX *pixt, *pixs2, *pixm2; -PIXA *pixa; -PTA *pta; - - PROCNAME("pixSelectMinInConnComp"); - - if (!ppta) - return ERROR_INT("&pta not defined", procName, 1); - *ppta = NULL; - if (pnav) *pnav = NULL; - if (!pixs || pixGetDepth(pixs) != 8) - return ERROR_INT("pixs undefined or not 8 bpp", procName, 1); - if (!pixm || pixGetDepth(pixm) != 1) - return ERROR_INT("pixm undefined or not 1 bpp", procName, 1); - - /* Crop to the min size if necessary */ - if (pixCropToMatch(pixs, pixm, &pixs2, &pixm2)) { - pixDestroy(&pixs2); - pixDestroy(&pixm2); - return ERROR_INT("cropping failure", procName, 1); - } - - /* Find value and location of min value pixel in each component */ - boxa = pixConnComp(pixm2, &pixa, 8); - n = boxaGetCount(boxa); - pta = ptaCreate(n); - *ppta = pta; - nav = numaCreate(n); - datas = pixGetData(pixs2); - wpls = pixGetWpl(pixs2); - for (c = 0; c < n; c++) { - pixt = pixaGetPix(pixa, c, L_CLONE); - boxaGetBoxGeometry(boxa, c, &bx, &by, &bw, &bh); - if (bw == 1 && bh == 1) { - ptaAddPt(pta, bx, by); - numaAddNumber(nav, GET_DATA_BYTE(datas + by * wpls, bx)); - pixDestroy(&pixt); - continue; - } - datat = pixGetData(pixt); - wplt = pixGetWpl(pixt); - minx = miny = 1000000; - minval = 256; - for (i = 0; i < bh; i++) { - ys = by + i; - lines = datas + ys * wpls; - linet = datat + i * wplt; - for (j = 0; j < bw; j++) { - xs = bx + j; - if (GET_DATA_BIT(linet, j)) { - val = GET_DATA_BYTE(lines, xs); - if (val < minval) { - minval = val; - minx = xs; - miny = ys; - } - } - } - } - ptaAddPt(pta, minx, miny); - numaAddNumber(nav, GET_DATA_BYTE(datas + miny * wpls, minx)); - pixDestroy(&pixt); - } - - boxaDestroy(&boxa); - pixaDestroy(&pixa); - if (pnav) - *pnav = nav; - else - numaDestroy(&nav); - pixDestroy(&pixs2); - pixDestroy(&pixm2); - return 0; -} - - -/*-----------------------------------------------------------------------* - * Removal of seeded connected components from a mask * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixRemoveSeededComponents() - * - * \param[in] pixd [optional]; can be null or equal to pixm; 1 bpp - * \param[in] pixs 1 bpp seed - * \param[in] pixm 1 bpp filling mask - * \param[in] connectivity 4 or 8 - * \param[in] bordersize amount of border clearing - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) This removes each component in pixm for which there is
- *          at least one seed in pixs.  If pixd == NULL, this returns
- *          the result in a new pixd.  Otherwise, it is an in-place
- *          operation on pixm.  In no situation is pixs altered,
- *          because we do the filling with a copy of pixs.
- *      (2) If bordersize > 0, it also clears all pixels within a
- *          distance %bordersize of the edge of pixd.  This is here
- *          because pixLocalExtrema() typically finds local minima
- *          at the border.  Use %bordersize >= 2 to remove these.
- * 
- */ -PIX * -pixRemoveSeededComponents(PIX *pixd, - PIX *pixs, - PIX *pixm, - l_int32 connectivity, - l_int32 bordersize) -{ -PIX *pixt; - - PROCNAME("pixRemoveSeededComponents"); - - if (!pixs || pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, pixd); - if (!pixm || pixGetDepth(pixm) != 1) - return (PIX *)ERROR_PTR("pixm undefined or not 1 bpp", procName, pixd); - if (pixd && pixd != pixm) - return (PIX *)ERROR_PTR("operation not inplace", procName, pixd); - - pixt = pixCopy(NULL, pixs); - pixSeedfillBinary(pixt, pixt, pixm, connectivity); - pixd = pixXor(pixd, pixm, pixt); - if (bordersize > 0) - pixSetOrClearBorder(pixd, bordersize, bordersize, bordersize, - bordersize, PIX_CLR); - pixDestroy(&pixt); - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sel1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sel1.c deleted file mode 100644 index e67d04f8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sel1.c +++ /dev/null @@ -1,2436 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file sel1.c - *
- *
- *      Basic ops on Sels and Selas
- *
- *         Create/destroy/copy:
- *            SELA      *selaCreate()
- *            void       selaDestroy()
- *            SEL       *selCreate()
- *            void       selDestroy()
- *            SEL       *selCopy()
- *            SEL       *selCreateBrick()
- *            SEL       *selCreateComb()
- *
- *         Helper proc:
- *            l_int32  **create2dIntArray()
- *
- *         Extension of sela:
- *            SELA      *selaAddSel()
- *            static l_int32  selaExtendArray()
- *
- *         Accessors:
- *            l_int32    selaGetCount()
- *            SEL       *selaGetSel()
- *            char      *selGetName()
- *            l_int32    selSetName()
- *            l_int32    selaFindSelByName()
- *            l_int32    selGetElement()
- *            l_int32    selSetElement()
- *            l_int32    selGetParameters()
- *            l_int32    selSetOrigin()
- *            l_int32    selGetTypeAtOrigin()
- *            char      *selaGetBrickName()
- *            char      *selaGetCombName()
- *     static char      *selaComputeCompositeParameters()
- *            l_int32    getCompositeParameters()
- *            SARRAY    *selaGetSelnames()
- *
- *         Max translations for erosion and hmt
- *            l_int32    selFindMaxTranslations()
- *
- *         Rotation by multiples of 90 degrees
- *            SEL       *selRotateOrth()
- *
- *         Sela and Sel serialized I/O
- *            SELA      *selaRead()
- *            SELA      *selaReadStream()
- *            SEL       *selRead()
- *            SEL       *selReadStream()
- *            l_int32    selaWrite()
- *            l_int32    selaWriteStream()
- *            l_int32    selWrite()
- *            l_int32    selWriteStream()
- *
- *         Building custom hit-miss sels from compiled strings
- *            SEL       *selCreateFromString()
- *            char      *selPrintToString()     [for debugging]
- *
- *         Building custom hit-miss sels from a simple file format
- *            SELA      *selaCreateFromFile()
- *            static SEL *selCreateFromSArray()
- *
- *         Making hit-only sels from Pta and Pix
- *            SEL       *selCreateFromPta()
- *            SEL       *selCreateFromPix()
- *
- *         Making hit-miss sels from Pix and image files
- *            SEL       *selReadFromColorImage()
- *            SEL       *selCreateFromColorPix()
-              SELA      *selaCreateFromColorPixa()
- *
- *         Printable display of sel
- *            PIX       *selDisplayInPix()
- *            PIX       *selaDisplayInPix()
- *
- *     Usage notes:
- *        In this file we have seven functions that make sels:
- *          (1)  selCreate(), with input (h, w, [name])
- *               The generic function.  Roll your own, using selSetElement().
- *          (2)  selCreateBrick(), with input (h, w, cy, cx, val)
- *               The most popular function.  Makes a rectangular sel of
- *               all hits, misses or don't-cares.  We have many morphology
- *               operations that create a sel of all hits, use it, and
- *               destroy it.
- *          (3)  selCreateFromString() with input (text, h, w, [name])
- *               Adam Langley's clever function, allows you to make a hit-miss
- *               sel from a string in code that is geometrically laid out
- *               just like the actual sel.
- *          (4)  selaCreateFromFile() with input (filename)
- *               This parses a simple file format to create an array of
- *               hit-miss sels.  The sel data uses the same encoding
- *               as in (3), with geometrical layout enforced.
- *          (5)  selCreateFromPta() with input (pta, cy, cx, [name])
- *               Another way to make a sel with only hits.
- *          (6)  selCreateFromPix() with input (pix, cy, cx, [name])
- *               Yet another way to make a sel from hits.
- *          (7)  selCreateFromColorPix() with input (pix, name).
- *               Another way to make a general hit-miss sel, starting with
- *               an image editor.
- *        In addition, there are three functions in selgen.c that
- *        automatically generate a hit-miss sel from a pix and
- *        a number of parameters.  This is useful for problems like
- *        "find all patterns that look like this one."
- *
- *        Consistency, being the hobgoblin of small minds,
- *        is adhered to here in the dimensioning and accessing of sels.
- *        Everything is done in standard matrix (row, column) order.
- *        When we set specific elements in a sel, we likewise use
- *        (row, col) ordering:
- *             selSetElement(), with input (row, col, type)
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Bounds on sel ptr array size */ -static const l_uint32 MaxPtrArraySize = 10000; -static const l_int32 InitialPtrArraySize = 50; /*!< n'importe quoi */ - - /* Bounds on kernel size */ -static const l_uint32 MaxKernelSize = 10000; - - /* Static functions */ -static l_int32 selaExtendArray(SELA *sela); -static SEL *selCreateFromSArray(SARRAY *sa, l_int32 first, l_int32 last); - -struct CompParameterMap -{ - l_int32 size; - l_int32 size1; - l_int32 size2; - char selnameh1[20]; - char selnameh2[20]; - char selnamev1[20]; - char selnamev2[20]; -}; - -static const struct CompParameterMap comp_parameter_map[] = - { { 2, 2, 1, "sel_2h", "", "sel_2v", "" }, - { 3, 3, 1, "sel_3h", "", "sel_3v", "" }, - { 4, 2, 2, "sel_2h", "sel_comb_4h", "sel_2v", "sel_comb_4v" }, - { 5, 5, 1, "sel_5h", "", "sel_5v", "" }, - { 6, 3, 2, "sel_3h", "sel_comb_6h", "sel_3v", "sel_comb_6v" }, - { 7, 7, 1, "sel_7h", "", "sel_7v", "" }, - { 8, 4, 2, "sel_4h", "sel_comb_8h", "sel_4v", "sel_comb_8v" }, - { 9, 3, 3, "sel_3h", "sel_comb_9h", "sel_3v", "sel_comb_9v" }, - { 10, 5, 2, "sel_5h", "sel_comb_10h", "sel_5v", "sel_comb_10v" }, - { 11, 4, 3, "sel_4h", "sel_comb_12h", "sel_4v", "sel_comb_12v" }, - { 12, 4, 3, "sel_4h", "sel_comb_12h", "sel_4v", "sel_comb_12v" }, - { 13, 4, 3, "sel_4h", "sel_comb_12h", "sel_4v", "sel_comb_12v" }, - { 14, 7, 2, "sel_7h", "sel_comb_14h", "sel_7v", "sel_comb_14v" }, - { 15, 5, 3, "sel_5h", "sel_comb_15h", "sel_5v", "sel_comb_15v" }, - { 16, 4, 4, "sel_4h", "sel_comb_16h", "sel_4v", "sel_comb_16v" }, - { 17, 4, 4, "sel_4h", "sel_comb_16h", "sel_4v", "sel_comb_16v" }, - { 18, 6, 3, "sel_6h", "sel_comb_18h", "sel_6v", "sel_comb_18v" }, - { 19, 5, 4, "sel_5h", "sel_comb_20h", "sel_5v", "sel_comb_20v" }, - { 20, 5, 4, "sel_5h", "sel_comb_20h", "sel_5v", "sel_comb_20v" }, - { 21, 7, 3, "sel_7h", "sel_comb_21h", "sel_7v", "sel_comb_21v" }, - { 22, 11, 2, "sel_11h", "sel_comb_22h", "sel_11v", "sel_comb_22v" }, - { 23, 6, 4, "sel_6h", "sel_comb_24h", "sel_6v", "sel_comb_24v" }, - { 24, 6, 4, "sel_6h", "sel_comb_24h", "sel_6v", "sel_comb_24v" }, - { 25, 5, 5, "sel_5h", "sel_comb_25h", "sel_5v", "sel_comb_25v" }, - { 26, 5, 5, "sel_5h", "sel_comb_25h", "sel_5v", "sel_comb_25v" }, - { 27, 9, 3, "sel_9h", "sel_comb_27h", "sel_9v", "sel_comb_27v" }, - { 28, 7, 4, "sel_7h", "sel_comb_28h", "sel_7v", "sel_comb_28v" }, - { 29, 6, 5, "sel_6h", "sel_comb_30h", "sel_6v", "sel_comb_30v" }, - { 30, 6, 5, "sel_6h", "sel_comb_30h", "sel_6v", "sel_comb_30v" }, - { 31, 6, 5, "sel_6h", "sel_comb_30h", "sel_6v", "sel_comb_30v" }, - { 32, 8, 4, "sel_8h", "sel_comb_32h", "sel_8v", "sel_comb_32v" }, - { 33, 11, 3, "sel_11h", "sel_comb_33h", "sel_11v", "sel_comb_33v" }, - { 34, 7, 5, "sel_7h", "sel_comb_35h", "sel_7v", "sel_comb_35v" }, - { 35, 7, 5, "sel_7h", "sel_comb_35h", "sel_7v", "sel_comb_35v" }, - { 36, 6, 6, "sel_6h", "sel_comb_36h", "sel_6v", "sel_comb_36v" }, - { 37, 6, 6, "sel_6h", "sel_comb_36h", "sel_6v", "sel_comb_36v" }, - { 38, 6, 6, "sel_6h", "sel_comb_36h", "sel_6v", "sel_comb_36v" }, - { 39, 13, 3, "sel_13h", "sel_comb_39h", "sel_13v", "sel_comb_39v" }, - { 40, 8, 5, "sel_8h", "sel_comb_40h", "sel_8v", "sel_comb_40v" }, - { 41, 7, 6, "sel_7h", "sel_comb_42h", "sel_7v", "sel_comb_42v" }, - { 42, 7, 6, "sel_7h", "sel_comb_42h", "sel_7v", "sel_comb_42v" }, - { 43, 7, 6, "sel_7h", "sel_comb_42h", "sel_7v", "sel_comb_42v" }, - { 44, 11, 4, "sel_11h", "sel_comb_44h", "sel_11v", "sel_comb_44v" }, - { 45, 9, 5, "sel_9h", "sel_comb_45h", "sel_9v", "sel_comb_45v" }, - { 46, 9, 5, "sel_9h", "sel_comb_45h", "sel_9v", "sel_comb_45v" }, - { 47, 8, 6, "sel_8h", "sel_comb_48h", "sel_8v", "sel_comb_48v" }, - { 48, 8, 6, "sel_8h", "sel_comb_48h", "sel_8v", "sel_comb_48v" }, - { 49, 7, 7, "sel_7h", "sel_comb_49h", "sel_7v", "sel_comb_49v" }, - { 50, 10, 5, "sel_10h", "sel_comb_50h", "sel_10v", "sel_comb_50v" }, - { 51, 10, 5, "sel_10h", "sel_comb_50h", "sel_10v", "sel_comb_50v" }, - { 52, 13, 4, "sel_13h", "sel_comb_52h", "sel_13v", "sel_comb_52v" }, - { 53, 9, 6, "sel_9h", "sel_comb_54h", "sel_9v", "sel_comb_54v" }, - { 54, 9, 6, "sel_9h", "sel_comb_54h", "sel_9v", "sel_comb_54v" }, - { 55, 11, 5, "sel_11h", "sel_comb_55h", "sel_11v", "sel_comb_55v" }, - { 56, 8, 7, "sel_8h", "sel_comb_56h", "sel_8v", "sel_comb_56v" }, - { 57, 8, 7, "sel_8h", "sel_comb_56h", "sel_8v", "sel_comb_56v" }, - { 58, 8, 7, "sel_8h", "sel_comb_56h", "sel_8v", "sel_comb_56v" }, - { 59, 10, 6, "sel_10h", "sel_comb_60h", "sel_10v", "sel_comb_60v" }, - { 60, 10, 6, "sel_10h", "sel_comb_60h", "sel_10v", "sel_comb_60v" }, - { 61, 10, 6, "sel_10h", "sel_comb_60h", "sel_10v", "sel_comb_60v" }, - { 62, 9, 7, "sel_9h", "sel_comb_63h", "sel_9v", "sel_comb_63v" }, - { 63, 9, 7, "sel_9h", "sel_comb_63h", "sel_9v", "sel_comb_63v" } }; - - - -/*------------------------------------------------------------------------* - * Create / Destroy / Copy * - *------------------------------------------------------------------------*/ -/*! - * \brief selaCreate() - * - * \param[in] n initial number of sel ptrs; use 0 for default - * \return sela, or NULL on error - */ -SELA * -selaCreate(l_int32 n) -{ -SELA *sela; - - PROCNAME("selaCreate"); - - if (n <= 0 || n > MaxPtrArraySize) - n = InitialPtrArraySize; - - /* Make array of sel ptrs */ - sela = (SELA *)LEPT_CALLOC(1, sizeof(SELA)); - sela->nalloc = n; - sela->n = 0; - if ((sela->sel = (SEL **)LEPT_CALLOC(n, sizeof(SEL *))) == NULL) { - LEPT_FREE(sela); - return (SELA *)ERROR_PTR("sel ptrs not made", procName, NULL); - } - return sela; -} - - -/*! - * \brief selaDestroy() - * - * \param[in,out] psela will be set to null before returning - * \return void - */ -void -selaDestroy(SELA **psela) -{ -SELA *sela; -l_int32 i; - - if (!psela) return; - if ((sela = *psela) == NULL) - return; - - for (i = 0; i < sela->n; i++) - selDestroy(&sela->sel[i]); - LEPT_FREE(sela->sel); - LEPT_FREE(sela); - *psela = NULL; - return; -} - - -/*! - * \brief selCreate() - * - * \param[in] height - * \param[in] width - * \param[in] name [optional] sel name; can be null - * \return sel, or NULL on error - * - *
- * Notes:
- *      (1) selCreate() initializes all values to 0.
- *      (2) After this call, (cy,cx) and nonzero data values must be
- *          assigned.  If a text name is not assigned here, it will
- *          be needed later when the sel is put into a sela.
- * 
- */ -SEL * -selCreate(l_int32 height, - l_int32 width, - const char *name) -{ -SEL *sel; - - PROCNAME("selCreate"); - - if ((sel = (SEL *)LEPT_CALLOC(1, sizeof(SEL))) == NULL) - return (SEL *)ERROR_PTR("sel not made", procName, NULL); - if (name) - sel->name = stringNew(name); - sel->sy = height; - sel->sx = width; - if ((sel->data = create2dIntArray(height, width)) == NULL) { - LEPT_FREE(sel->name); - LEPT_FREE(sel); - return (SEL *)ERROR_PTR("data not allocated", procName, NULL); - } - - return sel; -} - - -/*! - * \brief selDestroy() - * - * \param[in,out] psel will be set to null before returning - * \return void - */ -void -selDestroy(SEL **psel) -{ -l_int32 i; -SEL *sel; - - PROCNAME("selDestroy"); - - if (psel == NULL) { - L_WARNING("ptr address is NULL!\n", procName); - return; - } - if ((sel = *psel) == NULL) - return; - - for (i = 0; i < sel->sy; i++) - LEPT_FREE(sel->data[i]); - LEPT_FREE(sel->data); - if (sel->name) - LEPT_FREE(sel->name); - LEPT_FREE(sel); - - *psel = NULL; - return; -} - - -/*! - * \brief selCopy() - * - * \param[in] sel - * \return a copy of the sel, or NULL on error - */ -SEL * -selCopy(SEL *sel) -{ -l_int32 sx, sy, cx, cy, i, j; -SEL *csel; - - PROCNAME("selCopy"); - - if (!sel) - return (SEL *)ERROR_PTR("sel not defined", procName, NULL); - - if ((csel = (SEL *)LEPT_CALLOC(1, sizeof(SEL))) == NULL) - return (SEL *)ERROR_PTR("csel not made", procName, NULL); - selGetParameters(sel, &sy, &sx, &cy, &cx); - csel->sy = sy; - csel->sx = sx; - csel->cy = cy; - csel->cx = cx; - - if ((csel->data = create2dIntArray(sy, sx)) == NULL) { - LEPT_FREE(csel); - return (SEL *)ERROR_PTR("sel data not made", procName, NULL); - } - - for (i = 0; i < sy; i++) - for (j = 0; j < sx; j++) - csel->data[i][j] = sel->data[i][j]; - - if (sel->name) - csel->name = stringNew(sel->name); - - return csel; -} - - -/*! - * \brief selCreateBrick() - * - * \param[in] h, w height, width - * \param[in] cy, cx origin, relative to UL corner at 0,0 - * \param[in] type SEL_HIT, SEL_MISS, or SEL_DONT_CARE - * \return sel, or NULL on error - * - *
- * Notes:
- *      (1) This is a rectangular sel of all hits, misses or don't cares.
- * 
- */ -SEL * -selCreateBrick(l_int32 h, - l_int32 w, - l_int32 cy, - l_int32 cx, - l_int32 type) -{ -l_int32 i, j; -SEL *sel; - - PROCNAME("selCreateBrick"); - - if (h <= 0 || w <= 0) - return (SEL *)ERROR_PTR("h and w must both be > 0", procName, NULL); - if (type != SEL_HIT && type != SEL_MISS && type != SEL_DONT_CARE) - return (SEL *)ERROR_PTR("invalid sel element type", procName, NULL); - - if ((sel = selCreate(h, w, NULL)) == NULL) - return (SEL *)ERROR_PTR("sel not made", procName, NULL); - selSetOrigin(sel, cy, cx); - for (i = 0; i < h; i++) - for (j = 0; j < w; j++) - sel->data[i][j] = type; - - return sel; -} - - -/*! - * \brief selCreateComb() - * - * \param[in] factor1 contiguous space between comb tines - * \param[in] factor2 number of comb tines - * \param[in] direction L_HORIZ, L_VERT - * \return sel, or NULL on error - * - *
- * Notes:
- *      (1) This generates a comb Sel of hits with the origin as
- *          near the center as possible.
- *      (2) In use, this is complemented by a brick sel of size %factor1,
- *          Both brick and comb sels are made by selectComposableSels().
- * 
- */ -SEL * -selCreateComb(l_int32 factor1, - l_int32 factor2, - l_int32 direction) -{ -l_int32 i, size, z; -SEL *sel; - - PROCNAME("selCreateComb"); - - if (factor1 < 1 || factor2 < 1) - return (SEL *)ERROR_PTR("factors must be >= 1", procName, NULL); - if (direction != L_HORIZ && direction != L_VERT) - return (SEL *)ERROR_PTR("invalid direction", procName, NULL); - - size = factor1 * factor2; - if (direction == L_HORIZ) { - if ((sel = selCreate(1, size, NULL)) == NULL) - return (SEL *)ERROR_PTR("horiz sel not made", procName, NULL); - selSetOrigin(sel, 0, size / 2); - } else { - if ((sel = selCreate(size, 1, NULL)) == NULL) - return (SEL *)ERROR_PTR("vert sel not made", procName, NULL); - selSetOrigin(sel, size / 2, 0); - } - - /* Lay down the elements of the comb */ - for (i = 0; i < factor2; i++) { - z = factor1 / 2 + i * factor1; -/* lept_stderr("i = %d, factor1 = %d, factor2 = %d, z = %d\n", - i, factor1, factor2, z); */ - if (direction == L_HORIZ) - selSetElement(sel, 0, z, SEL_HIT); - else - selSetElement(sel, z, 0, SEL_HIT); - } - - return sel; -} - - -/*! - * \brief create2dIntArray() - * - * \param[in] sy rows == height - * \param[in] sx columns == width - * \return doubly indexed array i.e., an array of sy row pointers, - * each of which points to an array of sx ints - * - *
- * Notes:
- *      (1) The array[sy][sx] is indexed in standard "matrix notation",
- *          with the row index first.
- * 
- */ -l_int32 ** -create2dIntArray(l_int32 sy, - l_int32 sx) -{ -l_int32 i, j, success; -l_int32 **array; - - PROCNAME("create2dIntArray"); - - if (sx <= 0 || sx > MaxKernelSize) - return (l_int32 **)ERROR_PTR("sx out of bounds", procName, NULL); - if (sy <= 0 || sy > MaxKernelSize) - return (l_int32 **)ERROR_PTR("sy out of bounds", procName, NULL); - - if ((array = (l_int32 **)LEPT_CALLOC(sy, sizeof(l_int32 *))) == NULL) - return (l_int32 **)ERROR_PTR("ptr array not made", procName, NULL); - success = TRUE; - for (i = 0; i < sy; i++) { - if ((array[i] = (l_int32 *)LEPT_CALLOC(sx, sizeof(l_int32))) == NULL) { - success = FALSE; - break; - } - } - if (success) return array; - - /* Cleanup after error */ - for (j = 0; j < i; j++) - LEPT_FREE(array[j]); - LEPT_FREE(array); - return (l_int32 **)ERROR_PTR("array not made", procName, NULL); -} - - - -/*------------------------------------------------------------------------* - * Extension of sela * - *------------------------------------------------------------------------*/ -/*! - * \brief selaAddSel() - * - * \param[in] sela - * \param[in] sel to be added - * \param[in] selname ignored if already defined in sel; - * req'd in sel when added to a sela - * \param[in] copyflag L_INSERT or L_COPY - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This adds a sel, either inserting or making a copy.
- *      (2) Because every sel in a sela must have a name, it copies
- *          the input name if necessary.  You can input NULL for
- *          selname if the sel already has a name.
- * 
- */ -l_ok -selaAddSel(SELA *sela, - SEL *sel, - const char *selname, - l_int32 copyflag) -{ -l_int32 n; -SEL *csel; - - PROCNAME("selaAddSel"); - - if (!sela) - return ERROR_INT("sela not defined", procName, 1); - if (!sel) - return ERROR_INT("sel not defined", procName, 1); - if (!sel->name && !selname) - return ERROR_INT("added sel must have name", procName, 1); - if (copyflag != L_INSERT && copyflag != L_COPY) - return ERROR_INT("invalid copyflag", procName, 1); - - if (copyflag == L_COPY) { - if ((csel = selCopy(sel)) == NULL) - return ERROR_INT("csel not made", procName, 1); - } else { /* copyflag == L_INSERT */ - csel = sel; - } - if (!csel->name) - csel->name = stringNew(selname); - - n = selaGetCount(sela); - if (n >= sela->nalloc) - selaExtendArray(sela); - sela->sel[n] = csel; - sela->n++; - - return 0; -} - - -/*! - * \brief selaExtendArray() - * - * \param[in] sela - * \return 0 if OK; 1 on error - */ -static l_int32 -selaExtendArray(SELA *sela) -{ - PROCNAME("selaExtendArray"); - - if (!sela) - return ERROR_INT("sela not defined", procName, 1); - - if ((sela->sel = (SEL **)reallocNew((void **)&sela->sel, - sizeof(SEL *) * sela->nalloc, - 2 * sizeof(SEL *) * sela->nalloc)) == NULL) - return ERROR_INT("new ptr array not returned", procName, 1); - - sela->nalloc = 2 * sela->nalloc; - return 0; -} - - - -/*----------------------------------------------------------------------* - * Accessors * - *----------------------------------------------------------------------*/ -/*! - * \brief selaGetCount() - * - * \param[in] sela - * \return count, or 0 on error - */ -l_int32 -selaGetCount(SELA *sela) -{ - PROCNAME("selaGetCount"); - - if (!sela) - return ERROR_INT("sela not defined", procName, 0); - - return sela->n; -} - - -/*! - * \brief selaGetSel() - * - * \param[in] sela - * \param[in] i index of sel to be retrieved not copied - * \return sel, or NULL on error - * - *
- * Notes:
- *      (1) This returns a ptr to the sel, not a copy, so the caller
- *          must not destroy it!
- * 
- */ -SEL * -selaGetSel(SELA *sela, - l_int32 i) -{ - PROCNAME("selaGetSel"); - - if (!sela) - return (SEL *)ERROR_PTR("sela not defined", procName, NULL); - - if (i < 0 || i >= sela->n) - return (SEL *)ERROR_PTR("invalid index", procName, NULL); - return sela->sel[i]; -} - - -/*! - * \brief selGetName() - * - * \param[in] sel - * \return sel name not copied, or NULL if no name or on error - */ -char * -selGetName(SEL *sel) -{ - PROCNAME("selGetName"); - - if (!sel) - return (char *)ERROR_PTR("sel not defined", procName, NULL); - - return sel->name; -} - - -/*! - * \brief selSetName() - * - * \param[in] sel - * \param[in] name [optional]; can be null - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Always frees the existing sel name, if defined.
- *      (2) If name is not defined, just clears any existing sel name.
- * 
- */ -l_ok -selSetName(SEL *sel, - const char *name) -{ - PROCNAME("selSetName"); - - if (!sel) - return ERROR_INT("sel not defined", procName, 1); - - return stringReplace(&sel->name, name); -} - - -/*! - * \brief selaFindSelByName() - * - * \param[in] sela - * \param[in] name sel name - * \param[out] pindex [optional] - * \param[in] psel [optional] sel (not a copy) - * \return 0 if OK; 1 on error - */ -l_ok -selaFindSelByName(SELA *sela, - const char *name, - l_int32 *pindex, - SEL **psel) -{ -l_int32 i, n; -char *sname; -SEL *sel; - - PROCNAME("selaFindSelByName"); - - if (pindex) *pindex = -1; - if (psel) *psel = NULL; - - if (!sela) - return ERROR_INT("sela not defined", procName, 1); - - n = selaGetCount(sela); - for (i = 0; i < n; i++) - { - if ((sel = selaGetSel(sela, i)) == NULL) { - L_WARNING("missing sel\n", procName); - continue; - } - - sname = selGetName(sel); - if (sname && (!strcmp(name, sname))) { - if (pindex) - *pindex = i; - if (psel) - *psel = sel; - return 0; - } - } - - return 1; -} - - -/*! - * \brief selGetElement() - * - * \param[in] sel - * \param[in] row - * \param[in] col - * \param[out] ptype SEL_HIT, SEL_MISS, SEL_DONT_CARE - * \return 0 if OK; 1 on error - */ -l_ok -selGetElement(SEL *sel, - l_int32 row, - l_int32 col, - l_int32 *ptype) -{ - PROCNAME("selGetElement"); - - if (!ptype) - return ERROR_INT("&type not defined", procName, 1); - *ptype = SEL_DONT_CARE; - if (!sel) - return ERROR_INT("sel not defined", procName, 1); - if (row < 0 || row >= sel->sy) - return ERROR_INT("sel row out of bounds", procName, 1); - if (col < 0 || col >= sel->sx) - return ERROR_INT("sel col out of bounds", procName, 1); - - *ptype = sel->data[row][col]; - return 0; -} - - -/*! - * \brief selSetElement() - * - * \param[in] sel - * \param[in] row - * \param[in] col - * \param[in] type SEL_HIT, SEL_MISS, SEL_DONT_CARE - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Because we use row and column to index into an array,
- *          they are always non-negative.  The location of the origin
- *          (and the type of operation) determine the actual
- *          direction of the rasterop.
- * 
- */ -l_ok -selSetElement(SEL *sel, - l_int32 row, - l_int32 col, - l_int32 type) -{ - PROCNAME("selSetElement"); - - if (!sel) - return ERROR_INT("sel not defined", procName, 1); - if (type != SEL_HIT && type != SEL_MISS && type != SEL_DONT_CARE) - return ERROR_INT("invalid sel element type", procName, 1); - if (row < 0 || row >= sel->sy) - return ERROR_INT("sel row out of bounds", procName, 1); - if (col < 0 || col >= sel->sx) - return ERROR_INT("sel col out of bounds", procName, 1); - - sel->data[row][col] = type; - return 0; -} - - -/*! - * \brief selGetParameters() - * - * \param[in] sel - * \param[out] psy, psx, pcy, pcx [optional] each can be null - * \return 0 if OK, 1 on error - */ -l_ok -selGetParameters(SEL *sel, - l_int32 *psy, - l_int32 *psx, - l_int32 *pcy, - l_int32 *pcx) -{ - PROCNAME("selGetParameters"); - - if (psy) *psy = 0; - if (psx) *psx = 0; - if (pcy) *pcy = 0; - if (pcx) *pcx = 0; - if (!sel) - return ERROR_INT("sel not defined", procName, 1); - if (psy) *psy = sel->sy; - if (psx) *psx = sel->sx; - if (pcy) *pcy = sel->cy; - if (pcx) *pcx = sel->cx; - return 0; -} - - -/*! - * \brief selSetOrigin() - * - * \param[in] sel - * \param[in] cy, cx - * \return 0 if OK; 1 on error - */ -l_ok -selSetOrigin(SEL *sel, - l_int32 cy, - l_int32 cx) -{ - PROCNAME("selSetOrigin"); - - if (!sel) - return ERROR_INT("sel not defined", procName, 1); - sel->cy = cy; - sel->cx = cx; - return 0; -} - - -/*! - * \brief selGetTypeAtOrigin() - * - * \param[in] sel - * \param[out] ptype SEL_HIT, SEL_MISS, SEL_DONT_CARE - * \return 0 if OK; 1 on error or if origin is not found - */ -l_ok -selGetTypeAtOrigin(SEL *sel, - l_int32 *ptype) -{ -l_int32 sx, sy, cx, cy, i, j; - - PROCNAME("selGetTypeAtOrigin"); - - if (!ptype) - return ERROR_INT("&type not defined", procName, 1); - *ptype = SEL_DONT_CARE; /* init */ - if (!sel) - return ERROR_INT("sel not defined", procName, 1); - - selGetParameters(sel, &sy, &sx, &cy, &cx); - for (i = 0; i < sy; i++) { - for (j = 0; j < sx; j++) { - if (i == cy && j == cx) { - selGetElement(sel, i, j, ptype); - return 0; - } - } - } - - return ERROR_INT("sel origin not found", procName, 1); -} - - -/*! - * \brief selaGetBrickName() - * - * \param[in] sela - * \param[in] hsize, vsize of brick sel - * \return sel name new string, or NULL if no name or on error - */ -char * -selaGetBrickName(SELA *sela, - l_int32 hsize, - l_int32 vsize) -{ -l_int32 i, nsels, sx, sy; -SEL *sel; - - PROCNAME("selaGetBrickName"); - - if (!sela) - return (char *)ERROR_PTR("sela not defined", procName, NULL); - - nsels = selaGetCount(sela); - for (i = 0; i < nsels; i++) { - sel = selaGetSel(sela, i); - selGetParameters(sel, &sy, &sx, NULL, NULL); - if (hsize == sx && vsize == sy) - return stringNew(selGetName(sel)); - } - - return (char *)ERROR_PTR("sel not found", procName, NULL); -} - - -/*! - * \brief selaGetCombName() - * - * \param[in] sela - * \param[in] size the product of sizes of the brick and comb parts - * \param[in] direction L_HORIZ, L_VERT - * \return sel name new string, or NULL if name not found or on error - * - *
- * Notes:
- *      (1) Combs are by definition 1-dimensional, either horiz or vert.
- *      (2) Use this with comb Sels; e.g., from selaAddDwaCombs().
- * 
- */ -char * -selaGetCombName(SELA *sela, - l_int32 size, - l_int32 direction) -{ -char *selname; -char combname[256]; -l_int32 i, nsels, sx, sy, found; -SEL *sel; - - PROCNAME("selaGetCombName"); - - if (!sela) - return (char *)ERROR_PTR("sela not defined", procName, NULL); - if (direction != L_HORIZ && direction != L_VERT) - return (char *)ERROR_PTR("invalid direction", procName, NULL); - - /* Derive the comb name we're looking for */ - if (direction == L_HORIZ) - snprintf(combname, sizeof(combname), "sel_comb_%dh", size); - else /* direction == L_VERT */ - snprintf(combname, sizeof(combname), "sel_comb_%dv", size); - - found = FALSE; - nsels = selaGetCount(sela); - for (i = 0; i < nsels; i++) { - sel = selaGetSel(sela, i); - selGetParameters(sel, &sy, &sx, NULL, NULL); - if (sy != 1 && sx != 1) /* 2-D; not a comb */ - continue; - selname = selGetName(sel); - if (!strcmp(selname, combname)) { - found = TRUE; - break; - } - } - - if (found) - return stringNew(selname); - else - return (char *)ERROR_PTR("sel not found", procName, NULL); -} - - -/* --------- Function used to generate code in this file ---------- */ -#if 0 -static void selaComputeCompositeParameters(const char *fileout); - -/*! - * \brief selaComputeCompParameters() - * - * \param[in] fileout - * \return void - * - *
- * Notes:
- *      (1) This static function was used to construct the comp_parameter_map[]
- *          array at the top of this file.  It is static because it does
- *          not need to be called again.  It remains here to show how
- *          the composite parameter map was computed.
- *      (2) The output file was pasted directly into comp_parameter_map[].
- *          The composite parameter map is used to quickly determine
- *          the linear decomposition parameters and sel names.
- * 
- */ -static void -selaComputeCompositeParameters(const char *fileout) -{ -char *str, *nameh1, *nameh2, *namev1, *namev2; -char buf[256]; -l_int32 size, size1, size2, len; -SARRAY *sa; -SELA *selabasic, *selacomb; - - selabasic = selaAddBasic(NULL); - selacomb = selaAddDwaCombs(NULL); - sa = sarrayCreate(64); - for (size = 2; size < 64; size++) { - selectComposableSizes(size, &size1, &size2); - nameh1 = selaGetBrickName(selabasic, size1, 1); - namev1 = selaGetBrickName(selabasic, 1, size1); - if (size2 > 1) { - nameh2 = selaGetCombName(selacomb, size1 * size2, L_HORIZ); - namev2 = selaGetCombName(selacomb, size1 * size2, L_VERT); - } else { - nameh2 = stringNew(""); - namev2 = stringNew(""); - } - snprintf(buf, sizeof(buf), - " { %d, %d, %d, \"%s\", \"%s\", \"%s\", \"%s\" },", - size, size1, size2, nameh1, nameh2, namev1, namev2); - sarrayAddString(sa, buf, L_COPY); - LEPT_FREE(nameh1); - LEPT_FREE(nameh2); - LEPT_FREE(namev1); - LEPT_FREE(namev2); - } - str = sarrayToString(sa, 1); - len = strlen(str); - l_binaryWrite(fileout, "w", str, len + 1); - LEPT_FREE(str); - sarrayDestroy(&sa); - selaDestroy(&selabasic); - selaDestroy(&selacomb); - return; -} -#endif -/* -------------------------------------------------------------------- */ - - -/*! - * \brief getCompositeParameters() - * - * \param[in] size - * \param[out] psize1 [optional] brick factor size - * \param[out] psize2 [optional] comb factor size - * \param[out] pnameh1 [optional] name of horiz brick - * \param[out] pnameh2 [optional] name of horiz comb - * \param[out] pnamev1 [optional] name of vert brick - * \param[out] pnamev2 [optional] name of vert comb - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This uses the big lookup table at the top of this file.
- *      (2) All returned strings are copies that must be freed.
- * 
- */ -l_ok -getCompositeParameters(l_int32 size, - l_int32 *psize1, - l_int32 *psize2, - char **pnameh1, - char **pnameh2, - char **pnamev1, - char **pnamev2) -{ -l_int32 index; - - PROCNAME("selaGetSelnames"); - - if (psize1) *psize1 = 0; - if (psize2) *psize2 = 0; - if (pnameh1) *pnameh1 = NULL; - if (pnameh2) *pnameh2 = NULL; - if (pnamev1) *pnamev1 = NULL; - if (pnamev2) *pnamev2 = NULL; - if (size < 2 || size > 63) - return ERROR_INT("valid size range is {2 ... 63}", procName, 1); - index = size - 2; - if (psize1) - *psize1 = comp_parameter_map[index].size1; - if (psize2) - *psize2 = comp_parameter_map[index].size2; - if (pnameh1) - *pnameh1 = stringNew(comp_parameter_map[index].selnameh1); - if (pnameh2) - *pnameh2 = stringNew(comp_parameter_map[index].selnameh2); - if (pnamev1) - *pnamev1 = stringNew(comp_parameter_map[index].selnamev1); - if (pnamev2) - *pnamev2 = stringNew(comp_parameter_map[index].selnamev2); - return 0; -} - - -/*! - * \brief selaGetSelnames() - * - * \param[in] sela - * \return sa of all sel names, or NULL on error - */ -SARRAY * -selaGetSelnames(SELA *sela) -{ -char *selname; -l_int32 i, n; -SEL *sel; -SARRAY *sa; - - PROCNAME("selaGetSelnames"); - - if (!sela) - return (SARRAY *)ERROR_PTR("sela not defined", procName, NULL); - if ((n = selaGetCount(sela)) == 0) - return (SARRAY *)ERROR_PTR("no sels in sela", procName, NULL); - - if ((sa = sarrayCreate(n)) == NULL) - return (SARRAY *)ERROR_PTR("sa not made", procName, NULL); - for (i = 0; i < n; i++) { - sel = selaGetSel(sela, i); - selname = selGetName(sel); - sarrayAddString(sa, selname, L_COPY); - } - - return sa; -} - - - -/*----------------------------------------------------------------------* - * Max translations for erosion and hmt * - *----------------------------------------------------------------------*/ -/*! - * \brief selFindMaxTranslations() - * - * \param[in] sel - * \param[out] pxp, pyp, pxn, pyn max shifts - * \return 0 if OK; 1 on error - * - *
- * Notes:
-          These are the maximum shifts for the erosion operation.
- *        For example, when j < cx, the shift of the image
- *        is +x to the cx.  This is a positive xp shift.
- * 
- */ -l_ok -selFindMaxTranslations(SEL *sel, - l_int32 *pxp, - l_int32 *pyp, - l_int32 *pxn, - l_int32 *pyn) -{ -l_int32 sx, sy, cx, cy, i, j; -l_int32 maxxp, maxyp, maxxn, maxyn; - - PROCNAME("selaFindMaxTranslations"); - - if (!pxp || !pyp || !pxn || !pyn) - return ERROR_INT("&xp (etc) defined", procName, 1); - *pxp = *pyp = *pxn = *pyn = 0; - if (!sel) - return ERROR_INT("sel not defined", procName, 1); - selGetParameters(sel, &sy, &sx, &cy, &cx); - - maxxp = maxyp = maxxn = maxyn = 0; - for (i = 0; i < sy; i++) { - for (j = 0; j < sx; j++) { - if (sel->data[i][j] == 1) { - maxxp = L_MAX(maxxp, cx - j); - maxyp = L_MAX(maxyp, cy - i); - maxxn = L_MAX(maxxn, j - cx); - maxyn = L_MAX(maxyn, i - cy); - } - } - } - - *pxp = maxxp; - *pyp = maxyp; - *pxn = maxxn; - *pyn = maxyn; - - return 0; -} - - -/*----------------------------------------------------------------------* - * Rotation by multiples of 90 degrees * - *----------------------------------------------------------------------*/ -/*! - * \brief selRotateOrth() - * - * \param[in] sel - * \param[in] quads 0 - 4; number of 90 degree cw rotations - * \return seld, or NULL on error - */ -SEL * -selRotateOrth(SEL *sel, - l_int32 quads) -{ -l_int32 i, j, ni, nj, sx, sy, cx, cy, nsx, nsy, ncx, ncy, type; -SEL *seld; - - PROCNAME("selRotateOrth"); - - if (!sel) - return (SEL *)ERROR_PTR("sel not defined", procName, NULL); - if (quads < 0 || quads > 4) - return (SEL *)ERROR_PTR("quads not in {0,1,2,3,4}", procName, NULL); - if (quads == 0 || quads == 4) - return selCopy(sel); - - selGetParameters(sel, &sy, &sx, &cy, &cx); - if (quads == 1) { /* 90 degrees cw */ - nsx = sy; - nsy = sx; - ncx = sy - cy - 1; - ncy = cx; - } else if (quads == 2) { /* 180 degrees cw */ - nsx = sx; - nsy = sy; - ncx = sx - cx - 1; - ncy = sy - cy - 1; - } else { /* 270 degrees cw */ - nsx = sy; - nsy = sx; - ncx = cy; - ncy = sx - cx - 1; - } - seld = selCreateBrick(nsy, nsx, ncy, ncx, SEL_DONT_CARE); - if (sel->name) - seld->name = stringNew(sel->name); - - for (i = 0; i < sy; i++) { - for (j = 0; j < sx; j++) { - selGetElement(sel, i, j, &type); - if (quads == 1) { - ni = j; - nj = sy - i - 1; - } else if (quads == 2) { - ni = sy - i - 1; - nj = sx - j - 1; - } else { /* quads == 3 */ - ni = sx - j - 1; - nj = i; - } - selSetElement(seld, ni, nj, type); - } - } - - return seld; -} - - -/*----------------------------------------------------------------------* - * Sela and Sel serialized I/O * - *----------------------------------------------------------------------*/ -/*! - * \brief selaRead() - * - * \param[in] fname filename - * \return sela, or NULL on error - */ -SELA * -selaRead(const char *fname) -{ -FILE *fp; -SELA *sela; - - PROCNAME("selaRead"); - - if (!fname) - return (SELA *)ERROR_PTR("fname not defined", procName, NULL); - - if ((fp = fopenReadStream(fname)) == NULL) - return (SELA *)ERROR_PTR("stream not opened", procName, NULL); - if ((sela = selaReadStream(fp)) == NULL) { - fclose(fp); - return (SELA *)ERROR_PTR("sela not returned", procName, NULL); - } - fclose(fp); - - return sela; -} - - -/*! - * \brief selaReadStream() - * - * \param[in] fp file stream - * \return sela, or NULL on error - */ -SELA * -selaReadStream(FILE *fp) -{ -l_int32 i, n, version; -SEL *sel; -SELA *sela; - - PROCNAME("selaReadStream"); - - if (!fp) - return (SELA *)ERROR_PTR("stream not defined", procName, NULL); - - if (fscanf(fp, "\nSela Version %d\n", &version) != 1) - return (SELA *)ERROR_PTR("not a sela file", procName, NULL); - if (version != SEL_VERSION_NUMBER) - return (SELA *)ERROR_PTR("invalid sel version", procName, NULL); - if (fscanf(fp, "Number of Sels = %d\n\n", &n) != 1) - return (SELA *)ERROR_PTR("not a sela file", procName, NULL); - - if ((sela = selaCreate(n)) == NULL) - return (SELA *)ERROR_PTR("sela not made", procName, NULL); - sela->nalloc = n; - - for (i = 0; i < n; i++) { - if ((sel = selReadStream(fp)) == NULL) { - selaDestroy(&sela); - return (SELA *)ERROR_PTR("sel not read", procName, NULL); - } - selaAddSel(sela, sel, NULL, 0); - } - - return sela; -} - - -/*! - * \brief selRead() - * - * \param[in] fname filename - * \return sel, or NULL on error - */ -SEL * -selRead(const char *fname) -{ -FILE *fp; -SEL *sel; - - PROCNAME("selRead"); - - if (!fname) - return (SEL *)ERROR_PTR("fname not defined", procName, NULL); - - if ((fp = fopenReadStream(fname)) == NULL) - return (SEL *)ERROR_PTR("stream not opened", procName, NULL); - if ((sel = selReadStream(fp)) == NULL) { - fclose(fp); - return (SEL *)ERROR_PTR("sela not returned", procName, NULL); - } - fclose(fp); - - return sel; -} - - -/*! - * \brief selReadStream() - * - * \param[in] fp file stream - * \return sel, or NULL on error - */ -SEL * -selReadStream(FILE *fp) -{ -char *selname; -char linebuf[256]; -l_int32 sy, sx, cy, cx, i, j, version, ignore; -SEL *sel; - - PROCNAME("selReadStream"); - - if (!fp) - return (SEL *)ERROR_PTR("stream not defined", procName, NULL); - - if (fscanf(fp, " Sel Version %d\n", &version) != 1) - return (SEL *)ERROR_PTR("not a sel file", procName, NULL); - if (version != SEL_VERSION_NUMBER) - return (SEL *)ERROR_PTR("invalid sel version", procName, NULL); - - if (fgets(linebuf, sizeof(linebuf), fp) == NULL) - return (SEL *)ERROR_PTR("error reading into linebuf", procName, NULL); - selname = stringNew(linebuf); - sscanf(linebuf, " ------ %200s ------", selname); - - if (fscanf(fp, " sy = %d, sx = %d, cy = %d, cx = %d\n", - &sy, &sx, &cy, &cx) != 4) { - LEPT_FREE(selname); - return (SEL *)ERROR_PTR("dimensions not read", procName, NULL); - } - - if ((sel = selCreate(sy, sx, selname)) == NULL) { - LEPT_FREE(selname); - return (SEL *)ERROR_PTR("sel not made", procName, NULL); - } - selSetOrigin(sel, cy, cx); - - for (i = 0; i < sy; i++) { - ignore = fscanf(fp, " "); - for (j = 0; j < sx; j++) - ignore = fscanf(fp, "%1d", &sel->data[i][j]); - ignore = fscanf(fp, "\n"); - } - ignore = fscanf(fp, "\n"); - - LEPT_FREE(selname); - return sel; -} - - -/*! - * \brief selaWrite() - * - * \param[in] fname filename - * \param[in] sela - * \return 0 if OK, 1 on error - */ -l_ok -selaWrite(const char *fname, - SELA *sela) -{ -FILE *fp; - - PROCNAME("selaWrite"); - - if (!fname) - return ERROR_INT("fname not defined", procName, 1); - if (!sela) - return ERROR_INT("sela not defined", procName, 1); - - if ((fp = fopenWriteStream(fname, "wb")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - selaWriteStream(fp, sela); - fclose(fp); - - return 0; -} - - -/*! - * \brief selaWriteStream() - * - * \param[in] fp file stream - * \param[in] sela - * \return 0 if OK, 1 on error - */ -l_ok -selaWriteStream(FILE *fp, - SELA *sela) -{ -l_int32 i, n; -SEL *sel; - - PROCNAME("selaWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!sela) - return ERROR_INT("sela not defined", procName, 1); - - n = selaGetCount(sela); - fprintf(fp, "\nSela Version %d\n", SEL_VERSION_NUMBER); - fprintf(fp, "Number of Sels = %d\n\n", n); - for (i = 0; i < n; i++) { - if ((sel = selaGetSel(sela, i)) == NULL) - continue; - selWriteStream(fp, sel); - } - return 0; -} - - -/*! - * \brief selWrite() - * - * \param[in] fname filename - * \param[in] sel - * \return 0 if OK, 1 on error - */ -l_ok -selWrite(const char *fname, - SEL *sel) -{ -FILE *fp; - - PROCNAME("selWrite"); - - if (!fname) - return ERROR_INT("fname not defined", procName, 1); - if (!sel) - return ERROR_INT("sel not defined", procName, 1); - - if ((fp = fopenWriteStream(fname, "wb")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - selWriteStream(fp, sel); - fclose(fp); - - return 0; -} - - -/*! - * \brief selWriteStream() - * - * \param[in] fp file stream - * \param[in] sel - * \return 0 if OK, 1 on error - */ -l_ok -selWriteStream(FILE *fp, - SEL *sel) -{ -l_int32 sx, sy, cx, cy, i, j; - - PROCNAME("selWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!sel) - return ERROR_INT("sel not defined", procName, 1); - selGetParameters(sel, &sy, &sx, &cy, &cx); - - fprintf(fp, " Sel Version %d\n", SEL_VERSION_NUMBER); - fprintf(fp, " ------ %s ------\n", selGetName(sel)); - fprintf(fp, " sy = %d, sx = %d, cy = %d, cx = %d\n", sy, sx, cy, cx); - for (i = 0; i < sy; i++) { - fprintf(fp, " "); - for (j = 0; j < sx; j++) - fprintf(fp, "%d", sel->data[i][j]); - fprintf(fp, "\n"); - } - fprintf(fp, "\n"); - - return 0; -} - - -/*----------------------------------------------------------------------* - * Building custom hit-miss sels from compiled strings * - *----------------------------------------------------------------------*/ -/*! - * \brief selCreateFromString() - * - * \param[in] text - * \param[in] h, w height, width - * \param[in] name [optional] sel name; can be null - * \return sel of the given size, or NULL on error - * - *
- * Notes:
- *      (1) The text is an array of chars (in row-major order) where
- *          each char can be one of the following:
- *             'x': hit
- *             'o': miss
- *             ' ': don't-care
- *      (2) When the origin falls on a hit or miss, use an upper case
- *          char (e.g., 'X' or 'O') to indicate it.  When the origin
- *          falls on a don't-care, indicate this with a 'C'.
- *          The string must have exactly one origin specified.
- *      (3) The advantage of this method is that the text can be input
- *          in a format that shows the 2D layout of the Sel; e.g.,
- * \code
- *              static const char *seltext = "x    "
- *                                           "x Oo "
- *                                           "x    "
- *                                           "xxxxx";
- * \endcode
- * 
- */ -SEL * -selCreateFromString(const char *text, - l_int32 h, - l_int32 w, - const char *name) -{ -SEL *sel; -l_int32 y, x, norig; -char ch; - - PROCNAME("selCreateFromString"); - - if (!text || text[0] == '\0') - return (SEL *)ERROR_PTR("text undefined or empty", procName, NULL); - if (h < 1) - return (SEL *)ERROR_PTR("height must be > 0", procName, NULL); - if (w < 1) - return (SEL *)ERROR_PTR("width must be > 0", procName, NULL); - if (strlen(text) != (size_t)w * h) - return (SEL *)ERROR_PTR("text size != w * h", procName, NULL); - - sel = selCreate(h, w, name); - norig = 0; - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) { - ch = *(text++); - switch (ch) - { - case 'X': - norig++; - selSetOrigin(sel, y, x); - /* fall through */ - case 'x': - selSetElement(sel, y, x, SEL_HIT); - break; - - case 'O': - norig++; - selSetOrigin(sel, y, x); - /* fall through */ - case 'o': - selSetElement(sel, y, x, SEL_MISS); - break; - - case 'C': - norig++; - selSetOrigin(sel, y, x); - /* fall through */ - case ' ': - selSetElement(sel, y, x, SEL_DONT_CARE); - break; - - case '\n': - /* ignored */ - continue; - - default: - selDestroy(&sel); - return (SEL *)ERROR_PTR("unknown char", procName, NULL); - } - } - } - if (norig != 1) { - L_ERROR("Exactly one origin must be specified; this string has %d\n", - procName, norig); - selDestroy(&sel); - } - - return sel; -} - - -/*! - * \brief selPrintToString() - * - * \param[in] sel - * \return str string; caller must free - * - *
- * Notes:
- *      (1) This is an inverse function of selCreateFromString.
- *          It prints a textual representation of the SEL to a malloc'd
- *          string.  The format is the same as selCreateFromString
- *          except that newlines are inserted into the output
- *          between rows.
- *      (2) This is useful for debugging.  However, if you want to
- *          save some Sels in a file, put them in a Sela and write
- *          them out with selaWrite().  They can then be read in
- *          with selaRead().
- * 
- */ -char * -selPrintToString(SEL *sel) -{ -char is_center; -char *str, *strptr; -l_int32 type; -l_int32 sx, sy, cx, cy, x, y; - - PROCNAME("selPrintToString"); - - if (!sel) - return (char *)ERROR_PTR("sel not defined", procName, NULL); - - selGetParameters(sel, &sy, &sx, &cy, &cx); - if ((str = (char *)LEPT_CALLOC(1, sy * (sx + 1) + 1)) == NULL) - return (char *)ERROR_PTR("calloc fail for str", procName, NULL); - strptr = str; - - for (y = 0; y < sy; ++y) { - for (x = 0; x < sx; ++x) { - selGetElement(sel, y, x, &type); - is_center = (x == cx && y == cy); - switch (type) { - case SEL_HIT: - *(strptr++) = is_center ? 'X' : 'x'; - break; - case SEL_MISS: - *(strptr++) = is_center ? 'O' : 'o'; - break; - case SEL_DONT_CARE: - *(strptr++) = is_center ? 'C' : ' '; - break; - } - } - *(strptr++) = '\n'; - } - - return str; -} - - -/*----------------------------------------------------------------------* - * Building custom hit-miss sels from a simple file format * - *----------------------------------------------------------------------*/ -/*! - * \brief selaCreateFromFile() - * - * \param[in] filename - * \return sela, or NULL on error - * - *
- * Notes:
- *      (1) The file contains a sequence of Sel descriptions.
- *      (2) Each Sel is formatted as follows:
- *           ~ Any number of comment lines starting with '#' are ignored
- *           ~ The next line contains the selname
- *           ~ The next lines contain the Sel data.  They must be
- *             formatted similarly to the string format in
- *             selCreateFromString(), with each line beginning and
- *             ending with a double-quote, and showing the 2D layout.
- *           ~ Each Sel ends when a blank line, a comment line, or
- *             the end of file is reached.
- *      (3) See selCreateFromString() for a description of the string
- *          format for the Sel data.  As an example, here are the lines
- *          of is a valid file for a single Sel.  In the file, all lines
- *          are left-justified:
- *                    # diagonal sel
- *                    sel_5diag
- *                    "x    "
- *                    " x   "
- *                    "  X  "
- *                    "   x "
- *                    "    x"
- * 
- */ -SELA * -selaCreateFromFile(const char *filename) -{ -char *filestr, *line; -l_int32 i, n, first, last, nsel, insel; -size_t nbytes; -NUMA *nafirst, *nalast; -SARRAY *sa; -SEL *sel; -SELA *sela; - - PROCNAME("selaCreateFromFile"); - - if (!filename) - return (SELA *)ERROR_PTR("filename not defined", procName, NULL); - - filestr = (char *)l_binaryRead(filename, &nbytes); - sa = sarrayCreateLinesFromString(filestr, 1); - LEPT_FREE(filestr); - n = sarrayGetCount(sa); - sela = selaCreate(0); - - /* Find the start and end lines for each Sel. - * We allow the "blank" lines to be null strings or - * to have standard whitespace (' ','\t',\'n') or be '#'. */ - nafirst = numaCreate(0); - nalast = numaCreate(0); - insel = FALSE; - for (i = 0; i < n; i++) { - line = sarrayGetString(sa, i, L_NOCOPY); - if (!insel && - (line[0] != '\0' && line[0] != ' ' && - line[0] != '\t' && line[0] != '\n' && line[0] != '#')) { - numaAddNumber(nafirst, i); - insel = TRUE; - continue; - } - if (insel && - (line[0] == '\0' || line[0] == ' ' || - line[0] == '\t' || line[0] == '\n' || line[0] == '#')) { - numaAddNumber(nalast, i - 1); - insel = FALSE; - continue; - } - } - if (insel) /* fell off the end of the file */ - numaAddNumber(nalast, n - 1); - - /* Extract sels */ - nsel = numaGetCount(nafirst); - for (i = 0; i < nsel; i++) { - numaGetIValue(nafirst, i, &first); - numaGetIValue(nalast, i, &last); - if ((sel = selCreateFromSArray(sa, first, last)) == NULL) { - lept_stderr("Error reading sel from %d to %d\n", first, last); - selaDestroy(&sela); - sarrayDestroy(&sa); - numaDestroy(&nafirst); - numaDestroy(&nalast); - return (SELA *)ERROR_PTR("bad sela file", procName, NULL); - } - selaAddSel(sela, sel, NULL, 0); - } - - numaDestroy(&nafirst); - numaDestroy(&nalast); - sarrayDestroy(&sa); - return sela; -} - - -/*! - * \brief selCreateFromSArray() - * - * \param[in] sa - * \param[in] first line of sarray where Sel begins - * \param[in] last line of sarray where Sel ends - * \return sela, or NULL on error - * - *
- * Notes:
- *      (1) The Sel contains the following lines:
- *          ~ The first line is the selname
- *          ~ The remaining lines contain the Sel data.  They must
- *            be formatted similarly to the string format in
- *            selCreateFromString(), with each line beginning and
- *            ending with a double-quote, and showing the 2D layout.
- *          ~ 'last' gives the last line in the Sel data.
- *      (2) See selCreateFromString() for a description of the string
- *          format for the Sel data.  As an example, here are the lines
- *          of is a valid file for a single Sel.  In the file, all lines
- *          are left-justified:
- *                    # diagonal sel
- *                    sel_5diag
- *                    "x    "
- *                    " x   "
- *                    "  X  "
- *                    "   x "
- *                    "    x"
- * 
- */ -static SEL * -selCreateFromSArray(SARRAY *sa, - l_int32 first, - l_int32 last) -{ -char ch; -char *name, *line; -l_int32 n, len, i, w, h, y, x; -SEL *sel; - - PROCNAME("selCreateFromSArray"); - - if (!sa) - return (SEL *)ERROR_PTR("sa not defined", procName, NULL); - n = sarrayGetCount(sa); - if (first < 0 || first >= n || last <= first || last >= n) - return (SEL *)ERROR_PTR("invalid range", procName, NULL); - - name = sarrayGetString(sa, first, L_NOCOPY); - h = last - first; - line = sarrayGetString(sa, first + 1, L_NOCOPY); - len = strlen(line); - if (line[0] != '"' || line[len - 1] != '"') - return (SEL *)ERROR_PTR("invalid format", procName, NULL); - w = len - 2; - if ((sel = selCreate(h, w, name)) == NULL) - return (SEL *)ERROR_PTR("sel not made", procName, NULL); - for (i = first + 1; i <= last; i++) { - line = sarrayGetString(sa, i, L_NOCOPY); - y = i - first - 1; - for (x = 0; x < w; ++x) { - ch = line[x + 1]; /* skip the leading double-quote */ - switch (ch) - { - case 'X': - selSetOrigin(sel, y, x); /* set origin and hit */ - /* fall through */ - case 'x': - selSetElement(sel, y, x, SEL_HIT); - break; - - case 'O': - selSetOrigin(sel, y, x); /* set origin and miss */ - /* fall through */ - case 'o': - selSetElement(sel, y, x, SEL_MISS); - break; - - case 'C': - selSetOrigin(sel, y, x); /* set origin and don't-care */ - /* fall through */ - case ' ': - selSetElement(sel, y, x, SEL_DONT_CARE); - break; - - default: - selDestroy(&sel); - return (SEL *)ERROR_PTR("unknown char", procName, NULL); - } - } - } - - return sel; -} - - -/*----------------------------------------------------------------------* - * Making hit-only SELs from Pta and Pix * - *----------------------------------------------------------------------*/ -/*! - * \brief selCreateFromPta() - * - * \param[in] pta - * \param[in] cy, cx origin of sel - * \param[in] name [optional] sel name; can be null - * \return sel of minimum required size, or NULL on error - * - *
- * Notes:
- *      (1) The origin and all points in the pta must be positive.
- * 
- */ -SEL * -selCreateFromPta(PTA *pta, - l_int32 cy, - l_int32 cx, - const char *name) -{ -l_int32 i, n, x, y, w, h; -BOX *box; -SEL *sel; - - PROCNAME("selCreateFromPta"); - - if (!pta) - return (SEL *)ERROR_PTR("pta not defined", procName, NULL); - if (cy < 0 || cx < 0) - return (SEL *)ERROR_PTR("(cy, cx) not both >= 0", procName, NULL); - n = ptaGetCount(pta); - if (n == 0) - return (SEL *)ERROR_PTR("no pts in pta", procName, NULL); - - box = ptaGetBoundingRegion(pta); - boxGetGeometry(box, &x, &y, &w, &h); - boxDestroy(&box); - if (x < 0 || y < 0) - return (SEL *)ERROR_PTR("not all x and y >= 0", procName, NULL); - - sel = selCreate(y + h, x + w, name); - selSetOrigin(sel, cy, cx); - for (i = 0; i < n; i++) { - ptaGetIPt(pta, i, &x, &y); - selSetElement(sel, y, x, SEL_HIT); - } - - return sel; -} - - -/*! - * \brief selCreateFromPix() - * - * \param[in] pix - * \param[in] cy, cx origin of sel - * \param[in] name [optional] sel name; can be null - * \return sel, or NULL on error - * - *
- * Notes:
- *      (1) The origin must be positive.
- * 
- */ -SEL * -selCreateFromPix(PIX *pix, - l_int32 cy, - l_int32 cx, - const char *name) -{ -SEL *sel; -l_int32 i, j, w, h, d; -l_uint32 val; - - PROCNAME("selCreateFromPix"); - - if (!pix) - return (SEL *)ERROR_PTR("pix not defined", procName, NULL); - if (cy < 0 || cx < 0) - return (SEL *)ERROR_PTR("(cy, cx) not both >= 0", procName, NULL); - pixGetDimensions(pix, &w, &h, &d); - if (d != 1) - return (SEL *)ERROR_PTR("pix not 1 bpp", procName, NULL); - - sel = selCreate(h, w, name); - selSetOrigin(sel, cy, cx); - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - pixGetPixel(pix, j, i, &val); - if (val) - selSetElement(sel, i, j, SEL_HIT); - } - } - - return sel; -} - - -/*----------------------------------------------------------------------* - * Making hit-miss sels from color Pix and image files * - *----------------------------------------------------------------------*/ -/*! - * - * selReadFromColorImage() - * - * \param[in] pathname - * \return sel if OK; NULL on error - * - *
- * Notes:
- *      (1) Loads an image from a file and creates a (hit-miss) sel.
- *      (2) The sel name is taken from the pathname without the directory
- *          and extension.
- * 
- */ -SEL * -selReadFromColorImage(const char *pathname) -{ -PIX *pix; -SEL *sel; -char *basename, *selname; - - PROCNAME("selReadFromColorImage"); - - splitPathAtExtension (pathname, &basename, NULL); - splitPathAtDirectory (basename, NULL, &selname); - LEPT_FREE(basename); - - if ((pix = pixRead(pathname)) == NULL) { - LEPT_FREE(selname); - return (SEL *)ERROR_PTR("pix not returned", procName, NULL); - } - if ((sel = selCreateFromColorPix(pix, selname)) == NULL) - L_ERROR("sel not made\n", procName); - - LEPT_FREE(selname); - pixDestroy(&pix); - return sel; -} - - -/*! - * - * selCreateFromColorPix() - * - * \param[in] pixs cmapped or rgb - * \param[in] selname [optional] sel name; can be null - * \return sel if OK, NULL on error - * - *
- * Notes:
- *      (1) The sel size is given by the size of pixs.
- *      (2) In pixs, hits are represented by green pixels, misses by red
- *          pixels, and don't-cares by white pixels.
- *      (3) In pixs, there may be no misses, but there must be at least 1 hit.
- *      (4) At most there can be only one origin pixel, which is optionally
- *          specified by using a lower-intensity pixel:
- *            if a hit:  dark green
- *            if a miss: dark red
- *            if a don't care: gray
- *          If there is no such pixel, the origin defaults to the approximate
- *          center of the sel.
- * 
- */ -SEL * -selCreateFromColorPix(PIX *pixs, - const char *selname) -{ -PIXCMAP *cmap; -SEL *sel; -l_int32 hascolor, num_origins, nohits; -l_int32 w, h, d, i, j, red, green, blue; -l_uint32 pixval; - - PROCNAME("selCreateFromColorPix"); - - if (!pixs) - return (SEL *)ERROR_PTR("pixs not defined", procName, NULL); - - hascolor = FALSE; - cmap = pixGetColormap(pixs); - if (cmap) - pixcmapHasColor(cmap, &hascolor); - pixGetDimensions(pixs, &w, &h, &d); - if (hascolor == FALSE && d != 32) - return (SEL *)ERROR_PTR("pixs has no color", procName, NULL); - - if ((sel = selCreate (h, w, NULL)) == NULL) - return (SEL *)ERROR_PTR ("sel not made", procName, NULL); - selSetOrigin (sel, h / 2, w / 2); /* default */ - selSetName(sel, selname); - - num_origins = 0; - nohits = TRUE; - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - pixGetPixel (pixs, j, i, &pixval); - - if (cmap) { - pixcmapGetColor (cmap, pixval, &red, &green, &blue); - } else { - red = GET_DATA_BYTE (&pixval, COLOR_RED); - green = GET_DATA_BYTE (&pixval, COLOR_GREEN); - blue = GET_DATA_BYTE (&pixval, COLOR_BLUE); - } - - if (red < 255 && green < 255 && blue < 255) { - num_origins++; - if (num_origins == 1) /* first one found */ - selSetOrigin (sel, i, j); - if (num_origins == 2) - L_WARNING("multiple origins in sel image\n", procName); - } - if (!red && green && !blue) { - nohits = FALSE; - selSetElement (sel, i, j, SEL_HIT); - } else if (red && !green && !blue) { - selSetElement (sel, i, j, SEL_MISS); - } else if (red && green && blue) { - selSetElement (sel, i, j, SEL_DONT_CARE); - } else { - selDestroy(&sel); - return (SEL *)ERROR_PTR("invalid color", procName, NULL); - } - } - } - - if (nohits) { - selDestroy(&sel); - return (SEL *)ERROR_PTR("no hits in sel", procName, NULL); - } - return sel; -} - - -/*! - * - * selaCreateFromColorPixa() - * - * \param[in] pixa color pixa representing the sels - * \param[in] sa sarray of sel names - * \return sel if OK, NULL on error - * - *
- * Notes:
- *      (1) See notes in selCreateFromColorPix()
- *      (2) sa is required because all sels that are put in a sela
- *          must have a name.
- * 
- */ -SELA * -selaCreateFromColorPixa(PIXA *pixa, - SARRAY *sa) -{ -char *str; -l_int32 i, n; -PIX *pix; -SEL *sel; -SELA *sela; - - PROCNAME("selaCreateFromColorPixa"); - - if (!pixa) - return (SELA *)ERROR_PTR("pixa not defined", procName, NULL); - if (!sa) - return (SELA *)ERROR_PTR("sa of sel names not defined", procName, NULL); - - n = pixaGetCount(pixa); - if ((sela = selaCreate(n)) == NULL) - return (SELA *)ERROR_PTR("sela not allocated", procName, NULL); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - str = sarrayGetString(sa, i, L_NOCOPY); - sel = selCreateFromColorPix(pix, str); - selaAddSel(sela, sel, NULL, L_INSERT); - pixDestroy(&pix); - } - return sela; -} - - -/*----------------------------------------------------------------------* - * Printable display of sel * - *----------------------------------------------------------------------*/ -/*! - * \brief selDisplayInPix() - * - * \param[in] sel - * \param[in] size of grid interiors; odd; minimum size of 13 is enforced - * \param[in] gthick grid thickness; minimum size of 2 is enforced - * \return pix display of sel, or NULL on error - * - *
- * Notes:
- *      (1) This gives a visual representation of a general (hit-miss) sel.
- *      (2) The empty sel is represented by a grid of intersecting lines.
- *      (3) Three different patterns are generated for the sel elements:
- *          ~ hit (solid black circle)
- *          ~ miss (black ring; inner radius is radius2)
- *          ~ origin (cross, XORed with whatever is there)
- * 
- */ -PIX * -selDisplayInPix(SEL *sel, - l_int32 size, - l_int32 gthick) -{ -l_int32 i, j, w, h, sx, sy, cx, cy, type, width; -l_int32 radius1, radius2, shift1, shift2, x0, y0; -PIX *pixd, *pix2, *pixh, *pixm, *pixorig; -PTA *pta1, *pta2, *pta1t, *pta2t; - - PROCNAME("selDisplayInPix"); - - if (!sel) - return (PIX *)ERROR_PTR("sel not defined", procName, NULL); - if (size < 13) { - L_WARNING("size < 13; setting to 13\n", procName); - size = 13; - } - if (size % 2 == 0) - size++; - if (gthick < 2) { - L_WARNING("grid thickness < 2; setting to 2\n", procName); - gthick = 2; - } - selGetParameters(sel, &sy, &sx, &cy, &cx); - w = size * sx + gthick * (sx + 1); - h = size * sy + gthick * (sy + 1); - pixd = pixCreate(w, h, 1); - - /* Generate grid lines */ - for (i = 0; i <= sy; i++) - pixRenderLine(pixd, 0, gthick / 2 + i * (size + gthick), - w - 1, gthick / 2 + i * (size + gthick), - gthick, L_SET_PIXELS); - for (j = 0; j <= sx; j++) - pixRenderLine(pixd, gthick / 2 + j * (size + gthick), 0, - gthick / 2 + j * (size + gthick), h - 1, - gthick, L_SET_PIXELS); - - /* Generate hit and miss patterns */ - radius1 = (l_int32)(0.85 * ((size - 1) / 2.0) + 0.5); /* of hit */ - radius2 = (l_int32)(0.65 * ((size - 1) / 2.0) + 0.5); /* of inner miss */ - pta1 = generatePtaFilledCircle(radius1); - pta2 = generatePtaFilledCircle(radius2); - shift1 = (size - 1) / 2 - radius1; /* center circle in square */ - shift2 = (size - 1) / 2 - radius2; - pta1t = ptaTransform(pta1, shift1, shift1, 1.0, 1.0); - pta2t = ptaTransform(pta2, shift2, shift2, 1.0, 1.0); - pixh = pixGenerateFromPta(pta1t, size, size); /* hits */ - pix2 = pixGenerateFromPta(pta2t, size, size); - pixm = pixSubtract(NULL, pixh, pix2); - - /* Generate crossed lines for origin pattern */ - pixorig = pixCreate(size, size, 1); - width = size / 8; - pixRenderLine(pixorig, size / 2, (l_int32)(0.12 * size), - size / 2, (l_int32)(0.88 * size), - width, L_SET_PIXELS); - pixRenderLine(pixorig, (l_int32)(0.15 * size), size / 2, - (l_int32)(0.85 * size), size / 2, - width, L_FLIP_PIXELS); - pixRasterop(pixorig, size / 2 - width, size / 2 - width, - 2 * width, 2 * width, PIX_NOT(PIX_DST), NULL, 0, 0); - - /* Specialize origin pattern for this sel */ - selGetTypeAtOrigin(sel, &type); - if (type == SEL_HIT) - pixXor(pixorig, pixorig, pixh); - else if (type == SEL_MISS) - pixXor(pixorig, pixorig, pixm); - - /* Paste the patterns in */ - y0 = gthick; - for (i = 0; i < sy; i++) { - x0 = gthick; - for (j = 0; j < sx; j++) { - selGetElement(sel, i, j, &type); - if (i == cy && j == cx) /* origin */ - pixRasterop(pixd, x0, y0, size, size, PIX_SRC, pixorig, 0, 0); - else if (type == SEL_HIT) - pixRasterop(pixd, x0, y0, size, size, PIX_SRC, pixh, 0, 0); - else if (type == SEL_MISS) - pixRasterop(pixd, x0, y0, size, size, PIX_SRC, pixm, 0, 0); - x0 += size + gthick; - } - y0 += size + gthick; - } - - pixDestroy(&pix2); - pixDestroy(&pixh); - pixDestroy(&pixm); - pixDestroy(&pixorig); - ptaDestroy(&pta1); - ptaDestroy(&pta1t); - ptaDestroy(&pta2); - ptaDestroy(&pta2t); - return pixd; -} - - -/*! - * \brief selaDisplayInPix() - * - * \param[in] sela - * \param[in] size of grid interiors; odd; minimum size of 13 is enforced - * \param[in] gthick grid thickness; minimum size of 2 is enforced - * \param[in] spacing between sels, both horizontally and vertically - * \param[in] ncols number of sels per "line" - * \return pix display of all sels in sela, or NULL on error - * - *
- * Notes:
- *      (1) This gives a visual representation of all the sels in a sela.
- *      (2) See notes in selDisplayInPix() for display params of each sel.
- *      (3) This gives the nicest results when all sels in the sela
- *          are the same size.
- * 
- */ -PIX * -selaDisplayInPix(SELA *sela, - l_int32 size, - l_int32 gthick, - l_int32 spacing, - l_int32 ncols) -{ -l_int32 nsels, i, w, width; -PIX *pixt, *pixd; -PIXA *pixa; -SEL *sel; - - PROCNAME("selaDisplayInPix"); - - if (!sela) - return (PIX *)ERROR_PTR("sela not defined", procName, NULL); - if (size < 13) { - L_WARNING("size < 13; setting to 13\n", procName); - size = 13; - } - if (size % 2 == 0) - size++; - if (gthick < 2) { - L_WARNING("grid thickness < 2; setting to 2\n", procName); - gthick = 2; - } - if (spacing < 5) { - L_WARNING("spacing < 5; setting to 5\n", procName); - spacing = 5; - } - - /* Accumulate the pix of each sel */ - nsels = selaGetCount(sela); - pixa = pixaCreate(nsels); - for (i = 0; i < nsels; i++) { - sel = selaGetSel(sela, i); - pixt = selDisplayInPix(sel, size, gthick); - pixaAddPix(pixa, pixt, L_INSERT); - } - - /* Find the tiled output width, using just the first - * ncols pix in the pixa. If all pix have the same width, - * they will align properly in columns. */ - width = 0; - ncols = L_MIN(nsels, ncols); - for (i = 0; i < ncols; i++) { - pixt = pixaGetPix(pixa, i, L_CLONE); - pixGetDimensions(pixt, &w, NULL, NULL); - width += w; - pixDestroy(&pixt); - } - width += (ncols + 1) * spacing; /* add spacing all around as well */ - - pixd = pixaDisplayTiledInRows(pixa, 1, width, 1.0, 0, spacing, 0); - pixaDestroy(&pixa); - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sel2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sel2.c deleted file mode 100644 index 7e789cd2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sel2.c +++ /dev/null @@ -1,890 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file sel2.c - *
- *
- *      Contains definitions of simple structuring elements
- *
- *      Basic brick structuring elements
- *          SELA    *selaAddBasic()
- *               Linear horizontal and vertical
- *               Square
- *               Diagonals
- *
- *      Simple hit-miss structuring elements
- *          SELA    *selaAddHitMiss()
- *               Isolated foreground pixel
- *               Horizontal and vertical edges
- *               Slanted edge
- *               Corners
- *
- *      Structuring elements for comparing with DWA operations
- *          SELA    *selaAddDwaLinear()
- *          SELA    *selaAddDwaCombs()
- *
- *      Structuring elements for the intersection of lines
- *          SELA    *selaAddCrossJunctions()
- *          SELA    *selaAddTJunctions()
- *
- *      Structuring elements for connectivity-preserving thinning operations
- *          SELA    *sela4ccThin()
- *          SELA    *sela8ccThin()
- *          SELA    *sela4and8ccThin()
- *
- *      Other structuring elements
- *          SEL    *selMakePlusSign()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static const l_int32 L_BUF_SIZE = 512; - - /* Linear brick sel sizes, including all those that are required - * for decomposable sels up to size 63. */ -static const l_int32 num_linear = 25; -static const l_int32 basic_linear[] = {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 20, 21, 25, 30, 31, 35, 40, 41, 45, 50, 51}; - - -/* ------------------------------------------------------------------- * - * Basic brick structuring elements * - * ------------------------------------------------------------------- */ -/*! - * \brief selaAddBasic() - * - * \param[in] sela [optional] - * \return sela with additional sels, or NULL on error - * - *
- * Notes:
- *      (1) Adds the following sels:
- *            ~ all linear (horiz, vert) brick sels that are
- *              necessary for decomposable sels up to size 63
- *            ~ square brick sels up to size 10
- *            ~ 4 diagonal sels
- * 
- */ -SELA * -selaAddBasic(SELA *sela) -{ -char name[L_BUF_SIZE]; -l_int32 i, size; -SEL *sel; - - PROCNAME("selaAddBasic"); - - if (!sela) { - if ((sela = selaCreate(0)) == NULL) - return (SELA *)ERROR_PTR("sela not made", procName, NULL); - } - - /*--------------------------------------------------------------* - * Linear horizontal and vertical sels * - *--------------------------------------------------------------*/ - for (i = 0; i < num_linear; i++) { - size = basic_linear[i]; - sel = selCreateBrick(1, size, 0, size / 2, 1); - snprintf(name, L_BUF_SIZE, "sel_%dh", size); - selaAddSel(sela, sel, name, 0); - } - for (i = 0; i < num_linear; i++) { - size = basic_linear[i]; - sel = selCreateBrick(size, 1, size / 2, 0, 1); - snprintf(name, L_BUF_SIZE, "sel_%dv", size); - selaAddSel(sela, sel, name, 0); - } - - /*-----------------------------------------------------------* - * 2-d Bricks * - *-----------------------------------------------------------*/ - for (i = 2; i <= 5; i++) { - sel = selCreateBrick(i, i, i / 2, i / 2, 1); - snprintf(name, L_BUF_SIZE, "sel_%d", i); - selaAddSel(sela, sel, name, 0); - } - - /*-----------------------------------------------------------* - * Diagonals * - *-----------------------------------------------------------*/ - /* 0c 1 - 1 0 */ - sel = selCreateBrick(2, 2, 0, 0, 1); - selSetElement(sel, 0, 0, 0); - selSetElement(sel, 1, 1, 0); - selaAddSel(sela, sel, "sel_2dp", 0); - - /* 1c 0 - 0 1 */ - sel = selCreateBrick(2, 2, 0, 0, 1); - selSetElement(sel, 0, 1, 0); - selSetElement(sel, 1, 0, 0); - selaAddSel(sela, sel, "sel_2dm", 0); - - /* Diagonal, slope +, size 5 */ - sel = selCreate(5, 5, "sel_5dp"); - selSetOrigin(sel, 2, 2); - selSetElement(sel, 0, 4, 1); - selSetElement(sel, 1, 3, 1); - selSetElement(sel, 2, 2, 1); - selSetElement(sel, 3, 1, 1); - selSetElement(sel, 4, 0, 1); - selaAddSel(sela, sel, "sel_5dp", 0); - - /* Diagonal, slope -, size 5 */ - sel = selCreate(5, 5, "sel_5dm"); - selSetOrigin(sel, 2, 2); - selSetElement(sel, 0, 0, 1); - selSetElement(sel, 1, 1, 1); - selSetElement(sel, 2, 2, 1); - selSetElement(sel, 3, 3, 1); - selSetElement(sel, 4, 4, 1); - selaAddSel(sela, sel, "sel_5dm", 0); - - return sela; -} - - -/* ------------------------------------------------------------------- * - * Simple hit-miss structuring elements * - * ------------------------------------------------------------------- */ -/*! - * \brief selaAddHitMiss() - * - * \param[in] sela [optional] - * \return sela with additional sels, or NULL on error - */ -SELA * -selaAddHitMiss(SELA *sela) -{ -SEL *sel; - - PROCNAME("selaAddHitMiss"); - - if (!sela) { - if ((sela = selaCreate(0)) == NULL) - return (SELA *)ERROR_PTR("sela not made", procName, NULL); - } - -#if 0 /* use just for testing */ - sel = selCreateBrick(3, 3, 1, 1, 2); - selaAddSel(sela, sel, "sel_bad", 0); -#endif - - - /*--------------------------------------------------------------* - * Isolated foreground pixel * - *--------------------------------------------------------------*/ - sel = selCreateBrick(3, 3, 1, 1, SEL_MISS); - selSetElement(sel, 1, 1, SEL_HIT); - selaAddSel(sela, sel, "sel_3hm", 0); - - /*--------------------------------------------------------------* - * Horizontal and vertical edges * - *--------------------------------------------------------------*/ - sel = selCreateBrick(2, 3, 0, 1, SEL_HIT); - selSetElement(sel, 1, 0, SEL_MISS); - selSetElement(sel, 1, 1, SEL_MISS); - selSetElement(sel, 1, 2, SEL_MISS); - selaAddSel(sela, sel, "sel_3de", 0); - - sel = selCreateBrick(2, 3, 1, 1, SEL_HIT); - selSetElement(sel, 0, 0, SEL_MISS); - selSetElement(sel, 0, 1, SEL_MISS); - selSetElement(sel, 0, 2, SEL_MISS); - selaAddSel(sela, sel, "sel_3ue", 0); - - sel = selCreateBrick(3, 2, 1, 0, SEL_HIT); - selSetElement(sel, 0, 1, SEL_MISS); - selSetElement(sel, 1, 1, SEL_MISS); - selSetElement(sel, 2, 1, SEL_MISS); - selaAddSel(sela, sel, "sel_3re", 0); - - sel = selCreateBrick(3, 2, 1, 1, SEL_HIT); - selSetElement(sel, 0, 0, SEL_MISS); - selSetElement(sel, 1, 0, SEL_MISS); - selSetElement(sel, 2, 0, SEL_MISS); - selaAddSel(sela, sel, "sel_3le", 0); - - /*--------------------------------------------------------------* - * Slanted edge * - *--------------------------------------------------------------*/ - sel = selCreateBrick(13, 6, 6, 2, SEL_DONT_CARE); - selSetElement(sel, 0, 3, SEL_MISS); - selSetElement(sel, 0, 5, SEL_HIT); - selSetElement(sel, 4, 2, SEL_MISS); - selSetElement(sel, 4, 4, SEL_HIT); - selSetElement(sel, 8, 1, SEL_MISS); - selSetElement(sel, 8, 3, SEL_HIT); - selSetElement(sel, 12, 0, SEL_MISS); - selSetElement(sel, 12, 2, SEL_HIT); - selaAddSel(sela, sel, "sel_sl1", 0); - - /*--------------------------------------------------------------* - * Corners * - * This allows for up to 3 missing edge pixels at the corner * - *--------------------------------------------------------------*/ - sel = selCreateBrick(4, 4, 1, 1, SEL_MISS); - selSetElement(sel, 1, 1, SEL_DONT_CARE); - selSetElement(sel, 1, 2, SEL_DONT_CARE); - selSetElement(sel, 2, 1, SEL_DONT_CARE); - selSetElement(sel, 1, 3, SEL_HIT); - selSetElement(sel, 2, 2, SEL_HIT); - selSetElement(sel, 2, 3, SEL_HIT); - selSetElement(sel, 3, 1, SEL_HIT); - selSetElement(sel, 3, 2, SEL_HIT); - selSetElement(sel, 3, 3, SEL_HIT); - selaAddSel(sela, sel, "sel_ulc", 0); - - sel = selCreateBrick(4, 4, 1, 2, SEL_MISS); - selSetElement(sel, 1, 1, SEL_DONT_CARE); - selSetElement(sel, 1, 2, SEL_DONT_CARE); - selSetElement(sel, 2, 2, SEL_DONT_CARE); - selSetElement(sel, 1, 0, SEL_HIT); - selSetElement(sel, 2, 0, SEL_HIT); - selSetElement(sel, 2, 1, SEL_HIT); - selSetElement(sel, 3, 0, SEL_HIT); - selSetElement(sel, 3, 1, SEL_HIT); - selSetElement(sel, 3, 2, SEL_HIT); - selaAddSel(sela, sel, "sel_urc", 0); - - sel = selCreateBrick(4, 4, 2, 1, SEL_MISS); - selSetElement(sel, 1, 1, SEL_DONT_CARE); - selSetElement(sel, 2, 1, SEL_DONT_CARE); - selSetElement(sel, 2, 2, SEL_DONT_CARE); - selSetElement(sel, 0, 1, SEL_HIT); - selSetElement(sel, 0, 2, SEL_HIT); - selSetElement(sel, 0, 3, SEL_HIT); - selSetElement(sel, 1, 2, SEL_HIT); - selSetElement(sel, 1, 3, SEL_HIT); - selSetElement(sel, 2, 3, SEL_HIT); - selaAddSel(sela, sel, "sel_llc", 0); - - sel = selCreateBrick(4, 4, 2, 2, SEL_MISS); - selSetElement(sel, 1, 2, SEL_DONT_CARE); - selSetElement(sel, 2, 1, SEL_DONT_CARE); - selSetElement(sel, 2, 2, SEL_DONT_CARE); - selSetElement(sel, 0, 0, SEL_HIT); - selSetElement(sel, 0, 1, SEL_HIT); - selSetElement(sel, 0, 2, SEL_HIT); - selSetElement(sel, 1, 0, SEL_HIT); - selSetElement(sel, 1, 1, SEL_HIT); - selSetElement(sel, 2, 0, SEL_HIT); - selaAddSel(sela, sel, "sel_lrc", 0); - - return sela; -} - - -/* ------------------------------------------------------------------- * - * Structuring elements for comparing with DWA operations * - * ------------------------------------------------------------------- */ -/*! - * \brief selaAddDwaLinear() - * - * \param[in] sela [optional] - * \return sela with additional sels, or NULL on error - * - *
- * Notes:
- *      (1) Adds all linear (horizontal, vertical) sels from
- *          2 to 63 pixels in length, which are the sizes over
- *          which dwa code can be generated.
- * 
- */ -SELA * -selaAddDwaLinear(SELA *sela) -{ -char name[L_BUF_SIZE]; -l_int32 i; -SEL *sel; - - PROCNAME("selaAddDwaLinear"); - - if (!sela) { - if ((sela = selaCreate(0)) == NULL) - return (SELA *)ERROR_PTR("sela not made", procName, NULL); - } - - for (i = 2; i < 64; i++) { - sel = selCreateBrick(1, i, 0, i / 2, 1); - snprintf(name, L_BUF_SIZE, "sel_%dh", i); - selaAddSel(sela, sel, name, 0); - } - for (i = 2; i < 64; i++) { - sel = selCreateBrick(i, 1, i / 2, 0, 1); - snprintf(name, L_BUF_SIZE, "sel_%dv", i); - selaAddSel(sela, sel, name, 0); - } - return sela; -} - - -/*! - * \brief selaAddDwaCombs() - * - * \param[in] sela [optional] - * \return sela with additional sels, or NULL on error - * - *
- * Notes:
- *      (1) Adds all comb (horizontal, vertical) Sels that are
- *          used in composite linear morphological operations
- *          up to 63 pixels in length, which are the sizes over
- *          which dwa code can be generated.
- * 
- */ -SELA * -selaAddDwaCombs(SELA *sela) -{ -char name[L_BUF_SIZE]; -l_int32 i, f1, f2, prevsize, size; -SEL *selh, *selv; - - PROCNAME("selaAddDwaCombs"); - - if (!sela) { - if ((sela = selaCreate(0)) == NULL) - return (SELA *)ERROR_PTR("sela not made", procName, NULL); - } - - prevsize = 0; - for (i = 4; i < 64; i++) { - selectComposableSizes(i, &f1, &f2); - size = f1 * f2; - if (size == prevsize) - continue; - selectComposableSels(i, L_HORIZ, NULL, &selh); - selectComposableSels(i, L_VERT, NULL, &selv); - snprintf(name, L_BUF_SIZE, "sel_comb_%dh", size); - selaAddSel(sela, selh, name, 0); - snprintf(name, L_BUF_SIZE, "sel_comb_%dv", size); - selaAddSel(sela, selv, name, 0); - prevsize = size; - } - - return sela; -} - - -/* ------------------------------------------------------------------- * - * Structuring elements for the intersection of lines * - * ------------------------------------------------------------------- */ -/*! - * \brief selaAddCrossJunctions() - * - * \param[in] sela [optional] - * \param[in] hlsize length of each line of hits from origin - * \param[in] mdist distance of misses from the origin - * \param[in] norient number of orientations; max of 8 - * \param[in] debugflag 1 for debug output - * \return sela with additional sels, or NULL on error - * - *
- * Notes:
- *      (1) Adds hitmiss Sels for the intersection of two lines.
- *          If the lines are very thin, they must be nearly orthogonal
- *          to register.
- *      (2) The number of Sels generated is equal to %norient.
- *      (3) If %norient == 2, this generates 2 Sels of crosses, each with
- *          two perpendicular lines of hits.  One Sel has horizontal and
- *          vertical hits; the other has hits along lines at +-45 degrees.
- *          Likewise, if %norient == 3, this generates 3 Sels of crosses
- *          oriented at 30 degrees with each other.
- *      (4) It is suggested that %hlsize be chosen at least 1 greater
- *          than %mdist.  Try values of (%hlsize, %mdist) such as
- *          (6,5), (7,6), (8,7), (9,7), etc.
- * 
- */ -SELA * -selaAddCrossJunctions(SELA *sela, - l_float32 hlsize, - l_float32 mdist, - l_int32 norient, - l_int32 debugflag) -{ -char name[L_BUF_SIZE]; -l_int32 i, j, w, xc, yc; -l_float64 pi, halfpi, radincr, radang; -l_float64 angle; -PIX *pixc, *pixm, *pixt; -PIXA *pixa; -PTA *pta1, *pta2, *pta3, *pta4; -SEL *sel; - - PROCNAME("selaAddCrossJunctions"); - - if (hlsize <= 0) - return (SELA *)ERROR_PTR("hlsize not > 0", procName, NULL); - if (norient < 1 || norient > 8) - return (SELA *)ERROR_PTR("norient not in [1, ... 8]", procName, NULL); - - if (!sela) { - if ((sela = selaCreate(0)) == NULL) - return (SELA *)ERROR_PTR("sela not made", procName, NULL); - } - - pi = 3.1415926535; - halfpi = 3.1415926535 / 2.0; - radincr = halfpi / (l_float64)norient; - w = (l_int32)(2.2 * (L_MAX(hlsize, mdist) + 0.5)); - if (w % 2 == 0) - w++; - xc = w / 2; - yc = w / 2; - - pixa = pixaCreate(norient); - for (i = 0; i < norient; i++) { - - /* Set the don't cares */ - pixc = pixCreate(w, w, 32); - pixSetAll(pixc); - - /* Add the green lines of hits */ - pixm = pixCreate(w, w, 1); - radang = (l_float32)i * radincr; - pta1 = generatePtaLineFromPt(xc, yc, hlsize + 1, radang); - pta2 = generatePtaLineFromPt(xc, yc, hlsize + 1, radang + halfpi); - pta3 = generatePtaLineFromPt(xc, yc, hlsize + 1, radang + pi); - pta4 = generatePtaLineFromPt(xc, yc, hlsize + 1, radang + pi + halfpi); - ptaJoin(pta1, pta2, 0, -1); - ptaJoin(pta1, pta3, 0, -1); - ptaJoin(pta1, pta4, 0, -1); - pixRenderPta(pixm, pta1, L_SET_PIXELS); - pixPaintThroughMask(pixc, pixm, 0, 0, 0x00ff0000); - ptaDestroy(&pta1); - ptaDestroy(&pta2); - ptaDestroy(&pta3); - ptaDestroy(&pta4); - - /* Add red misses between the lines */ - for (j = 0; j < 4; j++) { - angle = radang + (j - 0.5) * halfpi; - pixSetPixel(pixc, xc + (l_int32)(mdist * cos(angle)), - yc + (l_int32)(mdist * sin(angle)), 0xff000000); - } - - /* Add dark green for origin */ - pixSetPixel(pixc, xc, yc, 0x00550000); - - /* Generate the sel */ - sel = selCreateFromColorPix(pixc, NULL); - snprintf(name, sizeof(name), "sel_cross_%d", i); - selaAddSel(sela, sel, name, 0); - - if (debugflag) { - pixt = pixScaleBySampling(pixc, 10.0, 10.0); - pixaAddPix(pixa, pixt, L_INSERT); - } - pixDestroy(&pixm); - pixDestroy(&pixc); - } - - if (debugflag) { - l_int32 w; - lept_mkdir("lept/sel"); - pixaGetPixDimensions(pixa, 0, &w, NULL, NULL); - pixt = pixaDisplayTiledAndScaled(pixa, 32, w, 1, 0, 10, 2); - pixWriteDebug("/tmp/lept/sel/xsel1.png", pixt, IFF_PNG); - pixDisplay(pixt, 0, 100); - pixDestroy(&pixt); - pixt = selaDisplayInPix(sela, 15, 2, 20, 1); - pixWriteDebug("/tmp/lept/sel/xsel2.png", pixt, IFF_PNG); - pixDisplay(pixt, 500, 100); - pixDestroy(&pixt); - selaWriteStream(stderr, sela); - } - pixaDestroy(&pixa); - - return sela; -} - - -/*! - * \brief selaAddTJunctions() - * - * \param[in] sela [optional] - * \param[in] hlsize length of each line of hits from origin - * \param[in] mdist distance of misses from the origin - * \param[in] norient number of orientations; max of 8 - * \param[in] debugflag 1 for debug output - * \return sela with additional sels, or NULL on error - * - *
- * Notes:
- *      (1) Adds hitmiss Sels for the T-junction of two lines.
- *          If the lines are very thin, they must be nearly orthogonal
- *          to register.
- *      (2) The number of Sels generated is 4 * %norient.
- *      (3) It is suggested that %hlsize be chosen at least 1 greater
- *          than %mdist.  Try values of (%hlsize, %mdist) such as
- *          (6,5), (7,6), (8,7), (9,7), etc.
- * 
- */ -SELA * -selaAddTJunctions(SELA *sela, - l_float32 hlsize, - l_float32 mdist, - l_int32 norient, - l_int32 debugflag) -{ -char name[L_BUF_SIZE]; -l_int32 i, j, k, w, xc, yc; -l_float64 pi, halfpi, radincr, jang, radang; -l_float64 angle[3], dist[3]; -PIX *pixc, *pixm, *pixt; -PIXA *pixa; -PTA *pta1, *pta2, *pta3; -SEL *sel; - - PROCNAME("selaAddTJunctions"); - - if (hlsize <= 2) - return (SELA *)ERROR_PTR("hlsizel not > 1", procName, NULL); - if (norient < 1 || norient > 8) - return (SELA *)ERROR_PTR("norient not in [1, ... 8]", procName, NULL); - - if (!sela) { - if ((sela = selaCreate(0)) == NULL) - return (SELA *)ERROR_PTR("sela not made", procName, NULL); - } - - pi = 3.1415926535; - halfpi = 3.1415926535 / 2.0; - radincr = halfpi / (l_float32)norient; - w = (l_int32)(2.4 * (L_MAX(hlsize, mdist) + 0.5)); - if (w % 2 == 0) - w++; - xc = w / 2; - yc = w / 2; - - pixa = pixaCreate(4 * norient); - for (i = 0; i < norient; i++) { - for (j = 0; j < 4; j++) { /* 4 orthogonal orientations */ - jang = (l_float32)j * halfpi; - - /* Set the don't cares */ - pixc = pixCreate(w, w, 32); - pixSetAll(pixc); - - /* Add the green lines of hits */ - pixm = pixCreate(w, w, 1); - radang = (l_float32)i * radincr; - pta1 = generatePtaLineFromPt(xc, yc, hlsize + 1, jang + radang); - pta2 = generatePtaLineFromPt(xc, yc, hlsize + 1, - jang + radang + halfpi); - pta3 = generatePtaLineFromPt(xc, yc, hlsize + 1, - jang + radang + pi); - ptaJoin(pta1, pta2, 0, -1); - ptaJoin(pta1, pta3, 0, -1); - pixRenderPta(pixm, pta1, L_SET_PIXELS); - pixPaintThroughMask(pixc, pixm, 0, 0, 0x00ff0000); - ptaDestroy(&pta1); - ptaDestroy(&pta2); - ptaDestroy(&pta3); - - /* Add red misses between the lines */ - angle[0] = radang + jang - halfpi; - angle[1] = radang + jang + 0.5 * halfpi; - angle[2] = radang + jang + 1.5 * halfpi; - dist[0] = 0.8 * mdist; - dist[1] = dist[2] = mdist; - for (k = 0; k < 3; k++) { - pixSetPixel(pixc, xc + (l_int32)(dist[k] * cos(angle[k])), - yc + (l_int32)(dist[k] * sin(angle[k])), - 0xff000000); - } - - /* Add dark green for origin */ - pixSetPixel(pixc, xc, yc, 0x00550000); - - /* Generate the sel */ - sel = selCreateFromColorPix(pixc, NULL); - snprintf(name, sizeof(name), "sel_cross_%d", 4 * i + j); - selaAddSel(sela, sel, name, 0); - - if (debugflag) { - pixt = pixScaleBySampling(pixc, 10.0, 10.0); - pixaAddPix(pixa, pixt, L_INSERT); - } - pixDestroy(&pixm); - pixDestroy(&pixc); - } - } - - if (debugflag) { - l_int32 w; - lept_mkdir("lept/sel"); - pixaGetPixDimensions(pixa, 0, &w, NULL, NULL); - pixt = pixaDisplayTiledAndScaled(pixa, 32, w, 4, 0, 10, 2); - pixWriteDebug("/tmp/lept/sel/tsel1.png", pixt, IFF_PNG); - pixDisplay(pixt, 0, 100); - pixDestroy(&pixt); - pixt = selaDisplayInPix(sela, 15, 2, 20, 4); - pixWriteDebug("/tmp/lept/sel/tsel2.png", pixt, IFF_PNG); - pixDisplay(pixt, 500, 100); - pixDestroy(&pixt); - selaWriteStream(stderr, sela); - } - pixaDestroy(&pixa); - - return sela; -} - - -/* -------------------------------------------------------------------------- * - * Structuring elements for connectivity-preserving thinning operations * - * -------------------------------------------------------------------------- */ - - /* ------------------------------------------------------------ - * These sels (and their rotated counterparts) are the useful - * 3x3 Sels for thinning. The notation is based on - * "Connectivity-preserving morphological image transformations," - * a version of which can be found at - * http://www.leptonica.com/papers/conn.pdf - * ------------------------------------------------------------ */ - - /* Sels for 4-connected thinning */ -static const char *sel_4_1 = " x" - "oCx" - " x"; -static const char *sel_4_2 = " x" - "oCx" - " o "; -static const char *sel_4_3 = " o " - "oCx" - " x"; -static const char *sel_4_4 = " o " - "oCx" - " o "; -static const char *sel_4_5 = " ox" - "oCx" - " o "; -static const char *sel_4_6 = " o " - "oCx" - " ox"; -static const char *sel_4_7 = " xx" - "oCx" - " o "; -static const char *sel_4_8 = " x" - "oCx" - "o x"; -static const char *sel_4_9 = "o x" - "oCx" - " x"; - - /* Sels for 8-connected thinning */ -static const char *sel_8_1 = " x " - "oCx" - " x "; -static const char *sel_8_2 = " x " - "oCx" - "o "; -static const char *sel_8_3 = "o " - "oCx" - " x "; -static const char *sel_8_4 = "o " - "oCx" - "o "; -static const char *sel_8_5 = "o x" - "oCx" - "o "; -static const char *sel_8_6 = "o " - "oCx" - "o x"; -static const char *sel_8_7 = " x " - "oCx" - "oo "; -static const char *sel_8_8 = " x " - "oCx" - "ox "; -static const char *sel_8_9 = "ox " - "oCx" - " x "; - - /* Sels for both 4 and 8-connected thinning */ -static const char *sel_48_1 = " xx" - "oCx" - "oo "; -static const char *sel_48_2 = "o x" - "oCx" - "o x"; - - -/*! - * \brief sela4ccThin() - * - * \param[in] sela [optional] - * \return sela with additional sels, or NULL on error - * - *
- * Notes:
- *      (1) Adds the 9 basic sels for 4-cc thinning.
- * 
- */ -SELA * -sela4ccThin(SELA *sela) -{ -SEL *sel; - - if (!sela) sela = selaCreate(9); - - sel = selCreateFromString(sel_4_1, 3, 3, "sel_4_1"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_4_2, 3, 3, "sel_4_2"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_4_3, 3, 3, "sel_4_3"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_4_4, 3, 3, "sel_4_4"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_4_5, 3, 3, "sel_4_5"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_4_6, 3, 3, "sel_4_6"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_4_7, 3, 3, "sel_4_7"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_4_8, 3, 3, "sel_4_8"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_4_9, 3, 3, "sel_4_9"); - selaAddSel(sela, sel, NULL, 0); - - return sela; -} - - -/*! - * \brief sela8ccThin() - * - * \param[in] sela [optional] - * \return sela with additional sels, or NULL on error - * - *
- * Notes:
- *      (1) Adds the 9 basic sels for 8-cc thinning.
- * 
- */ -SELA * -sela8ccThin(SELA *sela) -{ -SEL *sel; - - if (!sela) sela = selaCreate(9); - - sel = selCreateFromString(sel_8_1, 3, 3, "sel_8_1"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_8_2, 3, 3, "sel_8_2"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_8_3, 3, 3, "sel_8_3"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_8_4, 3, 3, "sel_8_4"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_8_5, 3, 3, "sel_8_5"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_8_6, 3, 3, "sel_8_6"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_8_7, 3, 3, "sel_8_7"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_8_8, 3, 3, "sel_8_8"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_8_9, 3, 3, "sel_8_9"); - selaAddSel(sela, sel, NULL, 0); - - return sela; -} - - -/*! - * \brief sela4and8ccThin() - * - * \param[in] sela [optional] - * \return sela with additional sels, or NULL on error - * - *
- * Notes:
- *      (1) Adds the 2 basic sels for either 4-cc or 8-cc thinning.
- * 
- */ -SELA * -sela4and8ccThin(SELA *sela) -{ -SEL *sel; - - if (!sela) sela = selaCreate(2); - - sel = selCreateFromString(sel_48_1, 3, 3, "sel_48_1"); - selaAddSel(sela, sel, NULL, 0); - sel = selCreateFromString(sel_48_2, 3, 3, "sel_48_2"); - selaAddSel(sela, sel, NULL, 0); - - return sela; -} - - -/* -------------------------------------------------------------------------- * - * Other structuring elements * - * -------------------------------------------------------------------------- */ -/*! - * \brief selMakePlusSign() - * - * \param[in] size side of containing square - * \param[in] linewidth of lines - * \return sel, or NULL on error - * - *
- * Notes:
- *      (1) Useful for debugging to show location of selected pixels.
- *      (2) See displaySelectedPixels() for an example of use.
- * 
- */ -SEL * -selMakePlusSign(l_int32 size, - l_int32 linewidth) -{ -PIX *pix; -SEL *sel; - - PROCNAME("selMakePlusSign"); - - if (size < 3 || linewidth > size) - return (SEL *)ERROR_PTR("invalid input", procName, NULL); - - pix = pixCreate(size, size, 1); - pixRenderLine(pix, size / 2, 0, size / 2, size - 1, - linewidth, L_SET_PIXELS); - pixRenderLine(pix, 0, size / 2, size, size / 2, - linewidth, L_SET_PIXELS); - sel = selCreateFromPix(pix, size / 2, size / 2, "plus_sign"); - pixDestroy(&pix); - return sel; -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/selgen.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/selgen.c deleted file mode 100644 index 13ee214f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/selgen.c +++ /dev/null @@ -1,987 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file selgen.c - *
- *
- *      This file contains functions that generate hit-miss Sels
- *      for doing a loose match to a small bitmap.  The hit-miss
- *      Sel is made from a given bitmap.  Several "knobs"
- *      are available to control the looseness of the match.
- *      In general, a tight match will have fewer false positives
- *      (bad matches) but more false negatives (missed patterns).
- *      The values to be used depend on the quality and variation
- *      of the image in which the pattern is to be searched,
- *      and the relative penalties of false positives and
- *      false negatives.  Default values for the three knobs --
- *      minimum distance to boundary pixels, number of extra pixels
- *      added to selected sides, and minimum acceptable runlength
- *      in eroded version -- are provided.
- *
- *      The generated hit-miss Sels can always be used in the
- *      rasterop implementation of binary morphology (in morph.h).
- *      If they are small enough (not more than 31 pixels extending
- *      in any direction from the Sel origin), they can also be used
- *      to auto-generate dwa code (fmorphauto.c).
- *
- *
- *      Generate a subsampled structuring element
- *            SEL     *pixGenerateSelWithRuns()
- *            SEL     *pixGenerateSelRandom()
- *            SEL     *pixGenerateSelBoundary()
- *
- *      Accumulate data on runs along lines
- *            NUMA    *pixGetRunCentersOnLine()
- *            NUMA    *pixGetRunsOnLine()
- *
- *      Subsample boundary pixels in relatively ordered way
- *            PTA     *pixSubsampleBoundaryPixels()
- *            PTA     *adjacentOnPixelInRaster()
- *
- *      Display generated sel with originating image
- *            PIX     *pixDisplayHitMissSel()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - - /* Default minimum distance of a hit-miss pixel element to - * a boundary pixel of its color. */ -static const l_int32 DefaultDistanceToBoundary = 1; -static const l_int32 MaxDistanceToBoundary = 4; - - /* Default min runlength to accept a hit or miss element located - * at its center */ -static const l_int32 DefaultMinRunlength = 3; - - /* Default scalefactor for displaying image and hit-miss sel - * that is derived from it */ -static const l_int32 DefaultSelScalefactor = 7; -static const l_int32 MaxSelScalefactor = 31; /* should be big enough */ - -#ifndef NO_CONSOLE_IO -#define DEBUG_DISPLAY_HM_SEL 0 -#endif /* ~NO_CONSOLE_IO */ - - -/*-----------------------------------------------------------------* - * Generate a subsampled structuring element * - *-----------------------------------------------------------------*/ -/*! - * \brief pixGenerateSelWithRuns() - * - * \param[in] pixs 1 bpp, typically small, to be used as a pattern - * \param[in] nhlines number of hor lines along which elements are found - * \param[in] nvlines number of vert lines along which elements are found - * \param[in] distance min distance from boundary pixel; use 0 for default - * \param[in] minlength min runlength to set hit or miss; use 0 for default - * \param[in] toppix number of extra pixels of bg added above - * \param[in] botpix number of extra pixels of bg added below - * \param[in] leftpix number of extra pixels of bg added to left - * \param[in] rightpix number of extra pixels of bg added to right - * \param[out] ppixe [optional] input pix expanded by extra pixels - * \return sel hit-miss for input pattern, or NULL on error - * - *
- * Notes:
- *    (1) The horizontal and vertical lines along which elements are
- *        selected are roughly equally spaced.  The actual locations of
- *        the hits and misses are the centers of respective run-lengths.
- *    (2) No elements are selected that are less than 'distance' pixels away
- *        from a boundary pixel of the same color.  This makes the
- *        match much more robust to edge noise.  Valid inputs of
- *        'distance' are 0, 1, 2, 3 and 4.  If distance is either 0 or
- *        greater than 4, we reset it to the default value.
- *    (3) The 4 numbers for adding rectangles of pixels outside the fg
- *        can be use if the pattern is expected to be surrounded by bg
- *        (white) pixels.  On the other hand, if the pattern may be near
- *        other fg (black) components on some sides, use 0 for those sides.
- *    (4) The pixels added to a side allow you to have miss elements there.
- *        There is a constraint between distance, minlength, and
- *        the added pixels for this to work.  We illustrate using the
- *        default values.  If you add 5 pixels to the top, and use a
- *        distance of 1, then you end up with a vertical run of at least
- *        4 bg pixels along the top edge of the image.  If you use a
- *        minimum runlength of 3, each vertical line will always find
- *        a miss near the center of its run.  However, if you use a
- *        minimum runlength of 5, you will not get a miss on every vertical
- *        line.  As another example, if you have 7 added pixels and a
- *        distance of 2, you can use a runlength up to 5 to guarantee
- *        that the miss element is recorded.  We give a warning if the
- *        constraint does not guarantee a miss element outside the
- *        image proper.
- *    (5) The input pix, as extended by the extra pixels on selected sides,
- *        can optionally be returned.  For debugging, call
- *        pixDisplayHitMissSel() to visualize the hit-miss sel superimposed
- *        on the generating bitmap.
- * 
- */ -SEL * -pixGenerateSelWithRuns(PIX *pixs, - l_int32 nhlines, - l_int32 nvlines, - l_int32 distance, - l_int32 minlength, - l_int32 toppix, - l_int32 botpix, - l_int32 leftpix, - l_int32 rightpix, - PIX **ppixe) -{ -l_int32 ws, hs, w, h, x, y, xval, yval, i, j, nh, nm; -l_float32 delh, delw; -NUMA *nah, *nam; -PIX *pixt1, *pixt2, *pixfg, *pixbg; -PTA *ptah, *ptam; -SEL *seld, *sel; - - PROCNAME("pixGenerateSelWithRuns"); - - if (ppixe) *ppixe = NULL; - if (!pixs) - return (SEL *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (SEL *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (nhlines < 1 && nvlines < 1) - return (SEL *)ERROR_PTR("nvlines and nhlines both < 1", procName, NULL); - - if (distance <= 0) - distance = DefaultDistanceToBoundary; - if (minlength <= 0) - minlength = DefaultMinRunlength; - if (distance > MaxDistanceToBoundary) { - L_WARNING("distance too large; setting to max value\n", procName); - distance = MaxDistanceToBoundary; - } - - /* Locate the foreground */ - pixClipToForeground(pixs, &pixt1, NULL); - if (!pixt1) - return (SEL *)ERROR_PTR("pixt1 not made", procName, NULL); - ws = pixGetWidth(pixt1); - hs = pixGetHeight(pixt1); - w = ws; - h = hs; - - /* Crop out a region including the foreground, and add pixels - * on sides depending on the side flags */ - if (toppix || botpix || leftpix || rightpix) { - x = y = 0; - if (toppix) { - h += toppix; - y = toppix; - if (toppix < distance + minlength) - L_WARNING("no miss elements in added top pixels\n", procName); - } - if (botpix) { - h += botpix; - if (botpix < distance + minlength) - L_WARNING("no miss elements in added bot pixels\n", procName); - } - if (leftpix) { - w += leftpix; - x = leftpix; - if (leftpix < distance + minlength) - L_WARNING("no miss elements in added left pixels\n", procName); - } - if (rightpix) { - w += rightpix; - if (rightpix < distance + minlength) - L_WARNING("no miss elements in added right pixels\n", procName); - } - pixt2 = pixCreate(w, h, 1); - pixRasterop(pixt2, x, y, ws, hs, PIX_SRC, pixt1, 0, 0); - } else { - pixt2 = pixClone(pixt1); - } - if (ppixe) - *ppixe = pixClone(pixt2); - pixDestroy(&pixt1); - - /* Identify fg and bg pixels that are at least 'distance' pixels - * away from the boundary pixels in their set */ - seld = selCreateBrick(2 * distance + 1, 2 * distance + 1, - distance, distance, SEL_HIT); - pixfg = pixErode(NULL, pixt2, seld); - pixbg = pixDilate(NULL, pixt2, seld); - pixInvert(pixbg, pixbg); - selDestroy(&seld); - pixDestroy(&pixt2); - - /* Accumulate hit and miss points */ - ptah = ptaCreate(0); - ptam = ptaCreate(0); - if (nhlines >= 1) { - delh = (l_float32)h / (l_float32)(nhlines + 1); - for (i = 0, y = 0; i < nhlines; i++) { - y += (l_int32)(delh + 0.5); - nah = pixGetRunCentersOnLine(pixfg, -1, y, minlength); - nam = pixGetRunCentersOnLine(pixbg, -1, y, minlength); - nh = numaGetCount(nah); - nm = numaGetCount(nam); - for (j = 0; j < nh; j++) { - numaGetIValue(nah, j, &xval); - ptaAddPt(ptah, xval, y); - } - for (j = 0; j < nm; j++) { - numaGetIValue(nam, j, &xval); - ptaAddPt(ptam, xval, y); - } - numaDestroy(&nah); - numaDestroy(&nam); - } - } - if (nvlines >= 1) { - delw = (l_float32)w / (l_float32)(nvlines + 1); - for (i = 0, x = 0; i < nvlines; i++) { - x += (l_int32)(delw + 0.5); - nah = pixGetRunCentersOnLine(pixfg, x, -1, minlength); - nam = pixGetRunCentersOnLine(pixbg, x, -1, minlength); - nh = numaGetCount(nah); - nm = numaGetCount(nam); - for (j = 0; j < nh; j++) { - numaGetIValue(nah, j, &yval); - ptaAddPt(ptah, x, yval); - } - for (j = 0; j < nm; j++) { - numaGetIValue(nam, j, &yval); - ptaAddPt(ptam, x, yval); - } - numaDestroy(&nah); - numaDestroy(&nam); - } - } - - /* Make the Sel with those points */ - sel = selCreateBrick(h, w, h / 2, w / 2, SEL_DONT_CARE); - nh = ptaGetCount(ptah); - for (i = 0; i < nh; i++) { - ptaGetIPt(ptah, i, &x, &y); - selSetElement(sel, y, x, SEL_HIT); - } - nm = ptaGetCount(ptam); - for (i = 0; i < nm; i++) { - ptaGetIPt(ptam, i, &x, &y); - selSetElement(sel, y, x, SEL_MISS); - } - - pixDestroy(&pixfg); - pixDestroy(&pixbg); - ptaDestroy(&ptah); - ptaDestroy(&ptam); - return sel; -} - - -/*! - * \brief pixGenerateSelRandom() - * - * \param[in] pixs 1 bpp, typically small, to be used as a pattern - * \param[in] hitfract fraction of allowable fg pixels that are hits - * \param[in] missfract fraction of allowable bg pixels that are misses - * \param[in] distance min distance from boundary pixel; use 0 for default - * \param[in] toppix number of extra pixels of bg added above - * \param[in] botpix number of extra pixels of bg added below - * \param[in] leftpix number of extra pixels of bg added to left - * \param[in] rightpix number of extra pixels of bg added to right - * \param[out] ppixe [optional] input pix expanded by extra pixels - * \return sel hit-miss for input pattern, or NULL on error - * - *
- * Notes:
- *    (1) Either of hitfract and missfract can be zero.  If both are zero,
- *        the sel would be empty, and NULL is returned.
- *    (2) No elements are selected that are less than 'distance' pixels away
- *        from a boundary pixel of the same color.  This makes the
- *        match much more robust to edge noise.  Valid inputs of
- *        'distance' are 0, 1, 2, 3 and 4.  If distance is either 0 or
- *        greater than 4, we reset it to the default value.
- *    (3) The 4 numbers for adding rectangles of pixels outside the fg
- *        can be use if the pattern is expected to be surrounded by bg
- *        (white) pixels.  On the other hand, if the pattern may be near
- *        other fg (black) components on some sides, use 0 for those sides.
- *    (4) The input pix, as extended by the extra pixels on selected sides,
- *        can optionally be returned.  For debugging, call
- *        pixDisplayHitMissSel() to visualize the hit-miss sel superimposed
- *        on the generating bitmap.
- * 
- */ -SEL * -pixGenerateSelRandom(PIX *pixs, - l_float32 hitfract, - l_float32 missfract, - l_int32 distance, - l_int32 toppix, - l_int32 botpix, - l_int32 leftpix, - l_int32 rightpix, - PIX **ppixe) -{ -l_int32 ws, hs, w, h, x, y, i, j, thresh; -l_uint32 val; -PIX *pixt1, *pixt2, *pixfg, *pixbg; -SEL *seld, *sel; - - PROCNAME("pixGenerateSelRandom"); - - if (ppixe) *ppixe = NULL; - if (!pixs) - return (SEL *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (SEL *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (hitfract <= 0.0 && missfract <= 0.0) - return (SEL *)ERROR_PTR("no hits or misses", procName, NULL); - if (hitfract > 1.0 || missfract > 1.0) - return (SEL *)ERROR_PTR("fraction can't be > 1.0", procName, NULL); - - if (distance <= 0) - distance = DefaultDistanceToBoundary; - if (distance > MaxDistanceToBoundary) { - L_WARNING("distance too large; setting to max value\n", procName); - distance = MaxDistanceToBoundary; - } - - /* Locate the foreground */ - pixClipToForeground(pixs, &pixt1, NULL); - if (!pixt1) - return (SEL *)ERROR_PTR("pixt1 not made", procName, NULL); - ws = pixGetWidth(pixt1); - hs = pixGetHeight(pixt1); - w = ws; - h = hs; - - /* Crop out a region including the foreground, and add pixels - * on sides depending on the side flags */ - if (toppix || botpix || leftpix || rightpix) { - x = y = 0; - if (toppix) { - h += toppix; - y = toppix; - } - if (botpix) - h += botpix; - if (leftpix) { - w += leftpix; - x = leftpix; - } - if (rightpix) - w += rightpix; - pixt2 = pixCreate(w, h, 1); - pixRasterop(pixt2, x, y, ws, hs, PIX_SRC, pixt1, 0, 0); - } else { - pixt2 = pixClone(pixt1); - } - if (ppixe) - *ppixe = pixClone(pixt2); - pixDestroy(&pixt1); - - /* Identify fg and bg pixels that are at least 'distance' pixels - * away from the boundary pixels in their set */ - seld = selCreateBrick(2 * distance + 1, 2 * distance + 1, - distance, distance, SEL_HIT); - pixfg = pixErode(NULL, pixt2, seld); - pixbg = pixDilate(NULL, pixt2, seld); - pixInvert(pixbg, pixbg); - selDestroy(&seld); - pixDestroy(&pixt2); - - /* Generate the sel from a random selection of these points */ - sel = selCreateBrick(h, w, h / 2, w / 2, SEL_DONT_CARE); - if (hitfract > 0.0) { - thresh = (l_int32)(hitfract * (l_float64)RAND_MAX); - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - pixGetPixel(pixfg, j, i, &val); - if (val) { - if (rand() < thresh) - selSetElement(sel, i, j, SEL_HIT); - } - } - } - } - if (missfract > 0.0) { - thresh = (l_int32)(missfract * (l_float64)RAND_MAX); - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - pixGetPixel(pixbg, j, i, &val); - if (val) { - if (rand() < thresh) - selSetElement(sel, i, j, SEL_MISS); - } - } - } - } - - pixDestroy(&pixfg); - pixDestroy(&pixbg); - return sel; -} - - -/*! - * \brief pixGenerateSelBoundary() - * - * \param[in] pixs 1 bpp, typically small, to be used as a pattern - * \param[in] hitdist min distance from fg boundary pixel - * \param[in] missdist min distance from bg boundary pixel - * \param[in] hitskip number of boundary pixels skipped between hits - * \param[in] missskip number of boundary pixels skipped between misses - * \param[in] topflag flag for extra pixels of bg added above - * \param[in] botflag flag for extra pixels of bg added below - * \param[in] leftflag flag for extra pixels of bg added to left - * \param[in] rightflag flag for extra pixels of bg added to right - * \param[out] ppixe [optional] input pix expanded by extra pixels - * \return sel hit-miss for input pattern, or NULL on error - * - *
- * Notes:
- *    (1) All fg elements selected are exactly hitdist pixels away from
- *        the nearest fg boundary pixel, and ditto for bg elements.
- *        Valid inputs of hitdist and missdist are 0, 1, 2, 3 and 4.
- *        For example, a hitdist of 0 puts the hits at the fg boundary.
- *        Usually, the distances should be > 0 avoid the effect of
- *        noise at the boundary.
- *    (2) Set hitskip < 0 if no hits are to be used.  Ditto for missskip.
- *        If both hitskip and missskip are < 0, the sel would be empty,
- *        and NULL is returned.
- *    (3) The 4 flags determine whether the sel is increased on that side
- *        to allow bg misses to be placed all along that boundary.
- *        The increase in sel size on that side is the minimum necessary
- *        to allow the misses to be placed at mindist.  For text characters,
- *        the topflag and botflag are typically set to 1, and the leftflag
- *        and rightflag to 0.
- *    (4) The input pix, as extended by the extra pixels on selected sides,
- *        can optionally be returned.  For debugging, call
- *        pixDisplayHitMissSel() to visualize the hit-miss sel superimposed
- *        on the generating bitmap.
- *    (5) This is probably the best of the three sel generators, in the
- *        sense that you have the most flexibility with the smallest number
- *        of hits and misses.
- * 
- */ -SEL * -pixGenerateSelBoundary(PIX *pixs, - l_int32 hitdist, - l_int32 missdist, - l_int32 hitskip, - l_int32 missskip, - l_int32 topflag, - l_int32 botflag, - l_int32 leftflag, - l_int32 rightflag, - PIX **ppixe) -{ -l_int32 ws, hs, w, h, x, y, ix, iy, i, npt; -PIX *pixt1, *pixt2, *pixt3, *pixfg, *pixbg; -SEL *selh, *selm, *sel_3, *sel; -PTA *ptah, *ptam; - - PROCNAME("pixGenerateSelBoundary"); - - if (ppixe) *ppixe = NULL; - if (!pixs) - return (SEL *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (SEL *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (hitdist < 0 || hitdist > 4 || missdist < 0 || missdist > 4) - return (SEL *)ERROR_PTR("dist not in {0 .. 4}", procName, NULL); - if (hitskip < 0 && missskip < 0) - return (SEL *)ERROR_PTR("no hits or misses", procName, NULL); - - /* Locate the foreground */ - pixClipToForeground(pixs, &pixt1, NULL); - if (!pixt1) - return (SEL *)ERROR_PTR("pixt1 not made", procName, NULL); - ws = pixGetWidth(pixt1); - hs = pixGetHeight(pixt1); - w = ws; - h = hs; - - /* Crop out a region including the foreground, and add pixels - * on sides depending on the side flags */ - if (topflag || botflag || leftflag || rightflag) { - x = y = 0; - if (topflag) { - h += missdist + 1; - y = missdist + 1; - } - if (botflag) - h += missdist + 1; - if (leftflag) { - w += missdist + 1; - x = missdist + 1; - } - if (rightflag) - w += missdist + 1; - pixt2 = pixCreate(w, h, 1); - pixRasterop(pixt2, x, y, ws, hs, PIX_SRC, pixt1, 0, 0); - } else { - pixt2 = pixClone(pixt1); - } - if (ppixe) - *ppixe = pixClone(pixt2); - pixDestroy(&pixt1); - - /* Identify fg and bg pixels that are exactly hitdist and - * missdist (rsp) away from the boundary pixels in their set. - * Then get a subsampled set of these points. */ - sel_3 = selCreateBrick(3, 3, 1, 1, SEL_HIT); - if (hitskip >= 0) { - selh = selCreateBrick(2 * hitdist + 1, 2 * hitdist + 1, - hitdist, hitdist, SEL_HIT); - pixt3 = pixErode(NULL, pixt2, selh); - pixfg = pixErode(NULL, pixt3, sel_3); - pixXor(pixfg, pixfg, pixt3); - ptah = pixSubsampleBoundaryPixels(pixfg, hitskip); - pixDestroy(&pixt3); - pixDestroy(&pixfg); - selDestroy(&selh); - } - if (missskip >= 0) { - selm = selCreateBrick(2 * missdist + 1, 2 * missdist + 1, - missdist, missdist, SEL_HIT); - pixt3 = pixDilate(NULL, pixt2, selm); - pixbg = pixDilate(NULL, pixt3, sel_3); - pixXor(pixbg, pixbg, pixt3); - ptam = pixSubsampleBoundaryPixels(pixbg, missskip); - pixDestroy(&pixt3); - pixDestroy(&pixbg); - selDestroy(&selm); - } - selDestroy(&sel_3); - pixDestroy(&pixt2); - - /* Generate the hit-miss sel from these point */ - sel = selCreateBrick(h, w, h / 2, w / 2, SEL_DONT_CARE); - if (hitskip >= 0) { - npt = ptaGetCount(ptah); - for (i = 0; i < npt; i++) { - ptaGetIPt(ptah, i, &ix, &iy); - selSetElement(sel, iy, ix, SEL_HIT); - } - } - if (missskip >= 0) { - npt = ptaGetCount(ptam); - for (i = 0; i < npt; i++) { - ptaGetIPt(ptam, i, &ix, &iy); - selSetElement(sel, iy, ix, SEL_MISS); - } - } - - ptaDestroy(&ptah); - ptaDestroy(&ptam); - return sel; -} - - -/*-----------------------------------------------------------------* - * Accumulate data on runs along lines * - *-----------------------------------------------------------------*/ -/*! - * \brief pixGetRunCentersOnLine() - * - * \param[in] pixs 1 bpp - * \param[in] x, y set one of these to -1; see notes - * \param[in] minlength minimum length of acceptable run - * \return numa of fg runs, or NULL on error - * - *
- * Notes:
- *      (1) Action: this function computes the fg (black) and bg (white)
- *          pixel runlengths along the specified horizontal or vertical line,
- *          and returns a Numa of the "center" pixels of each fg run
- *          whose length equals or exceeds the minimum length.
- *      (2) This only works on horizontal and vertical lines.
- *      (3) For horizontal runs, set x = -1 and y to the value
- *          for all points along the raster line.  For vertical runs,
- *          set y = -1 and x to the value for all points along the
- *          pixel column.
- *      (4) For horizontal runs, the points in the Numa are the x
- *          values in the center of fg runs that are of length at
- *          least 'minlength'.  For vertical runs, the points in the
- *          Numa are the y values in the center of fg runs, again
- *          of length 'minlength' or greater.
- *      (5) If there are no fg runs along the line that satisfy the
- *          minlength constraint, the returned Numa is empty.  This
- *          is not an error.
- * 
- */ -NUMA * -pixGetRunCentersOnLine(PIX *pixs, - l_int32 x, - l_int32 y, - l_int32 minlength) -{ -l_int32 w, h, i, r, nruns, len; -NUMA *naruns, *nad; - - PROCNAME("pixGetRunCentersOnLine"); - - if (!pixs) - return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (NUMA *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (x != -1 && y != -1) - return (NUMA *)ERROR_PTR("x or y must be -1", procName, NULL); - if (x == -1 && y == -1) - return (NUMA *)ERROR_PTR("x or y cannot both be -1", procName, NULL); - - if ((nad = numaCreate(0)) == NULL) - return (NUMA *)ERROR_PTR("nad not made", procName, NULL); - w = pixGetWidth(pixs); - h = pixGetHeight(pixs); - if (x == -1) { /* horizontal run */ - if (y < 0 || y >= h) - return nad; - naruns = pixGetRunsOnLine(pixs, 0, y, w - 1, y); - } else { /* vertical run */ - if (x < 0 || x >= w) - return nad; - naruns = pixGetRunsOnLine(pixs, x, 0, x, h - 1); - } - nruns = numaGetCount(naruns); - - /* extract run center values; the first run is always bg */ - r = 0; /* cumulative distance along line */ - for (i = 0; i < nruns; i++) { - if (i % 2 == 0) { /* bg run */ - numaGetIValue(naruns, i, &len); - r += len; - continue; - } else { - numaGetIValue(naruns, i, &len); - if (len >= minlength) - numaAddNumber(nad, r + len / 2); - r += len; - } - } - - numaDestroy(&naruns); - return nad; -} - - -/*! - * \brief pixGetRunsOnLine() - * - * \param[in] pixs 1 bpp - * \param[in] x1, y1, x2, y2 - * \return numa, or NULL on error - * - *
- * Notes:
- *      (1) Action: this function uses the bresenham algorithm to compute
- *          the pixels along the specified line.  It returns a Numa of the
- *          runlengths of the fg (black) and bg (white) runs, always
- *          starting with a white run.
- *      (2) If the first pixel on the line is black, the length of the
- *          first returned run (which is white) is 0.
- * 
- */ -NUMA * -pixGetRunsOnLine(PIX *pixs, - l_int32 x1, - l_int32 y1, - l_int32 x2, - l_int32 y2) -{ -l_int32 w, h, x, y, npts; -l_int32 i, runlen, preval; -l_uint32 val; -NUMA *numa; -PTA *pta; - - PROCNAME("pixGetRunsOnLine"); - - if (!pixs) - return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (NUMA *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - - w = pixGetWidth(pixs); - h = pixGetHeight(pixs); - if (x1 < 0 || x1 >= w) - return (NUMA *)ERROR_PTR("x1 not valid", procName, NULL); - if (x2 < 0 || x2 >= w) - return (NUMA *)ERROR_PTR("x2 not valid", procName, NULL); - if (y1 < 0 || y1 >= h) - return (NUMA *)ERROR_PTR("y1 not valid", procName, NULL); - if (y2 < 0 || y2 >= h) - return (NUMA *)ERROR_PTR("y2 not valid", procName, NULL); - - if ((pta = generatePtaLine(x1, y1, x2, y2)) == NULL) - return (NUMA *)ERROR_PTR("pta not made", procName, NULL); - if ((npts = ptaGetCount(pta)) == 0) { - ptaDestroy(&pta); - return (NUMA *)ERROR_PTR("pta has no pts", procName, NULL); - } - if ((numa = numaCreate(0)) == NULL) { - ptaDestroy(&pta); - return (NUMA *)ERROR_PTR("numa not made", procName, NULL); - } - - for (i = 0; i < npts; i++) { - ptaGetIPt(pta, i, &x, &y); - pixGetPixel(pixs, x, y, &val); - if (i == 0) { - if (val == 1) { /* black pixel; append white run of size 0 */ - numaAddNumber(numa, 0); - } - preval = val; - runlen = 1; - continue; - } - if (val == preval) { /* extend current run */ - preval = val; - runlen++; - } else { /* end previous run */ - numaAddNumber(numa, runlen); - preval = val; - runlen = 1; - } - } - numaAddNumber(numa, runlen); /* append last run */ - - ptaDestroy(&pta); - return numa; -} - - -/*-----------------------------------------------------------------* - * Subsample boundary pixels in relatively ordered way * - *-----------------------------------------------------------------*/ -/*! - * \brief pixSubsampleBoundaryPixels() - * - * \param[in] pixs 1 bpp, with only boundary pixels in fg - * \param[in] skip number to skip between samples as you traverse boundary - * \return pta, or NULL on error - * - *
- * Notes:
- *      (1) If skip = 0, we take all the fg pixels.
- *      (2) We try to traverse the boundaries in a regular way.
- *          Some pixels may be missed, and these are then subsampled
- *          randomly with a fraction determined by 'skip'.
- *      (3) The most natural approach is to use a depth first (stack-based)
- *          method to find the fg pixels.  However, the pixel runs are
- *          4-connected and there are relatively few branches.  So
- *          instead of doing a proper depth-first search, we get nearly
- *          the same result using two nested while loops: the outer
- *          one continues a raster-based search for the next fg pixel,
- *          and the inner one does a reasonable job running along
- *          each 4-connected coutour.
- * 
- */ -PTA * -pixSubsampleBoundaryPixels(PIX *pixs, - l_int32 skip) -{ -l_int32 x, y, xn, yn, xs, ys, xa, ya, count; -PIX *pixt; -PTA *pta; - - PROCNAME("pixSubsampleBoundaryPixels"); - - if (!pixs) - return (PTA *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PTA *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (skip < 0) - return (PTA *)ERROR_PTR("skip < 0", procName, NULL); - - if (skip == 0) - return ptaGetPixelsFromPix(pixs, NULL); - - pta = ptaCreate(0); - pixt = pixCopy(NULL, pixs); - xs = ys = 0; - while (nextOnPixelInRaster(pixt, xs, ys, &xn, &yn)) { /* new series */ - xs = xn; - ys = yn; - - /* Add first point in this series */ - ptaAddPt(pta, xs, ys); - - /* Trace out boundary, erasing all and saving every (skip + 1)th */ - x = xs; - y = ys; - pixSetPixel(pixt, x, y, 0); - count = 0; - while (adjacentOnPixelInRaster(pixt, x, y, &xa, &ya)) { - x = xa; - y = ya; - pixSetPixel(pixt, x, y, 0); - if (count == skip) { - ptaAddPt(pta, x, y); - count = 0; - } else { - count++; - } - } - } - - pixDestroy(&pixt); - return pta; -} - - -/*! - * \brief adjacentOnPixelInRaster() - * - * \param[in] pixs 1 bpp - * \param[in] x, y current pixel - * \param[out] pxa, pya adjacent ON pixel, found by simple CCW search - * \return 1 if a pixel is found; 0 otherwise or on error - * - *
- * Notes:
- *      (1) Search is in 4-connected directions first; then on diagonals.
- *          This allows traversal along a 4-connected boundary.
- * 
- */ -l_int32 -adjacentOnPixelInRaster(PIX *pixs, - l_int32 x, - l_int32 y, - l_int32 *pxa, - l_int32 *pya) -{ -l_int32 w, h, i, xa, ya, found; -l_int32 xdel[] = {-1, 0, 1, 0, -1, 1, 1, -1}; -l_int32 ydel[] = {0, 1, 0, -1, 1, 1, -1, -1}; -l_uint32 val; - - PROCNAME("adjacentOnPixelInRaster"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 0); - if (pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not 1 bpp", procName, 0); - w = pixGetWidth(pixs); - h = pixGetHeight(pixs); - found = 0; - for (i = 0; i < 8; i++) { - xa = x + xdel[i]; - ya = y + ydel[i]; - if (xa < 0 || xa >= w || ya < 0 || ya >= h) - continue; - pixGetPixel(pixs, xa, ya, &val); - if (val == 1) { - found = 1; - *pxa = xa; - *pya = ya; - break; - } - } - return found; -} - - - -/*-----------------------------------------------------------------* - * Display generated sel with originating image * - *-----------------------------------------------------------------*/ -/*! - * \brief pixDisplayHitMissSel() - * - * \param[in] pixs 1 bpp - * \param[in] sel hit-miss in general - * \param[in] scalefactor an integer >= 1; use 0 for default - * \param[in] hitcolor RGB0 color for center of hit pixels - * \param[in] misscolor RGB0 color for center of miss pixels - * \return pixd RGB showing both pixs and sel, or NULL on error - *
- * Notes:
- *    (1) We don't allow scalefactor to be larger than MaxSelScalefactor
- *    (2) The colors are conveniently given as 4 bytes in hex format,
- *        such as 0xff008800.  The least significant byte is ignored.
- * 
- */ -PIX * -pixDisplayHitMissSel(PIX *pixs, - SEL *sel, - l_int32 scalefactor, - l_uint32 hitcolor, - l_uint32 misscolor) -{ -l_int32 i, j, type; -l_float32 fscale; -PIX *pixt, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixDisplayHitMissSel"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (pixGetDepth(pixs) != 1) - return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); - if (!sel) - return (PIX *)ERROR_PTR("sel not defined", procName, NULL); - - if (scalefactor <= 0) - scalefactor = DefaultSelScalefactor; - if (scalefactor > MaxSelScalefactor) { - L_WARNING("scalefactor too large; using max value\n", procName); - scalefactor = MaxSelScalefactor; - } - - /* Generate a version of pixs with a colormap */ - pixt = pixConvert1To8(NULL, pixs, 0, 1); - cmap = pixcmapCreate(8); - pixcmapAddColor(cmap, 255, 255, 255); - pixcmapAddColor(cmap, 0, 0, 0); - pixcmapAddColor(cmap, hitcolor >> 24, (hitcolor >> 16) & 0xff, - (hitcolor >> 8) & 0xff); - pixcmapAddColor(cmap, misscolor >> 24, (misscolor >> 16) & 0xff, - (misscolor >> 8) & 0xff); - pixSetColormap(pixt, cmap); - - /* Color the hits and misses */ - for (i = 0; i < sel->sy; i++) { - for (j = 0; j < sel->sx; j++) { - selGetElement(sel, i, j, &type); - if (type == SEL_DONT_CARE) - continue; - if (type == SEL_HIT) - pixSetPixel(pixt, j, i, 2); - else /* type == SEL_MISS */ - pixSetPixel(pixt, j, i, 3); - } - } - - /* Scale it up */ - fscale = (l_float32)scalefactor; - pixd = pixScaleBySampling(pixt, fscale, fscale); - - pixDestroy(&pixt); - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/shear.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/shear.c deleted file mode 100644 index 41546a30..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/shear.c +++ /dev/null @@ -1,854 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file shear.c - *
- *
- *    About arbitrary lines
- *           PIX      *pixHShear()
- *           PIX      *pixVShear()
- *
- *    About special 'points': UL corner and center
- *           PIX      *pixHShearCorner()
- *           PIX      *pixVShearCorner()
- *           PIX      *pixHShearCenter()
- *           PIX      *pixVShearCenter()
- *
- *    In place about arbitrary lines
- *           l_int32   pixHShearIP()
- *           l_int32   pixVShearIP()
- *
- *    Linear interpolated shear about arbitrary lines
- *           PIX      *pixHShearLI()
- *           PIX      *pixVShearLI()
- *
- *    Static helper
- *      static l_float32  normalizeAngleForShear()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include "allheaders.h" - - /* Shear angle must not get too close to -pi/2 or pi/2 */ -static const l_float32 MinDiffFromHalfPi = 0.04; - -static l_float32 normalizeAngleForShear(l_float32 radang, l_float32 mindif); - - -#ifndef NO_CONSOLE_IO -#define DEBUG 0 -#endif /* ~NO_CONSOLE_IO */ - - -/*-------------------------------------------------------------* - * About arbitrary lines * - *-------------------------------------------------------------*/ -/*! - * \brief pixHShear() - * - * \param[in] pixd [optional] this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs any depth; cmap ok - * \param[in] yloc location of horizontal line, measured from origin - * \param[in] radang angle in radians - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK; - * \return pixd, always - * - *
- * Notes:
- *      (1) There are 3 cases:
- *            (a) pixd == null (make a new pixd)
- *            (b) pixd == pixs (in-place)
- *            (c) pixd != pixs
- *      (2) For these three cases, use these patterns, respectively:
- *              pixd = pixHShear(NULL, pixs, ...);
- *              pixHShear(pixs, pixs, ...);
- *              pixHShear(pixd, pixs, ...);
- *      (3) This shear leaves the horizontal line of pixels at y = yloc
- *          invariant.  For a positive shear angle, pixels above this
- *          line are shoved to the right, and pixels below this line
- *          move to the left.
- *      (4) With positive shear angle, this can be used, along with
- *          pixVShear(), to perform a cw rotation, either with 2 shears
- *          (for small angles) or in the general case with 3 shears.
- *      (5) Changing the value of yloc is equivalent to translating
- *          the result horizontally.
- *      (6) This brings in %incolor pixels from outside the image.
- *      (7) In-place shears do not work on cmapped pix, because the
- *          in-place operation cannot initialize to the requested %incolor,
- *          so we shear from a copy.
- *      (8) The angle is brought into the range [-pi, -pi].  It is
- *          not permitted to be within MinDiffFromHalfPi radians
- *          from either -pi/2 or pi/2.
- * 
- */ -PIX * -pixHShear(PIX *pixd, - PIX *pixs, - l_int32 yloc, - l_float32 radang, - l_int32 incolor) -{ -l_int32 sign, w, h; -l_int32 y, yincr, inityincr, hshift; -l_float32 tanangle, invangle; - - PROCNAME("pixHShear"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor value", procName, pixd); - - if (pixd == pixs) { /* in place */ - if (!pixGetColormap(pixs)) { - pixHShearIP(pixd, yloc, radang, incolor); - } else { /* can't do in-place with a colormap */ - PIX *pix1 = pixCopy(NULL, pixs); - pixHShear(pixd, pix1, yloc, radang, incolor); - pixDestroy(&pix1); - } - return pixd; - } - - /* Make sure pixd exists and is same size as pixs */ - if (!pixd) { - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } else { /* pixd != pixs */ - pixResizeImageData(pixd, pixs); - } - - /* Normalize angle. If no rotation, return a copy */ - radang = normalizeAngleForShear(radang, MinDiffFromHalfPi); - if (radang == 0.0 || tan(radang) == 0.0) - return pixCopy(pixd, pixs); - - /* Initialize to value of incoming pixels */ - pixSetBlackOrWhite(pixd, incolor); - - pixGetDimensions(pixs, &w, &h, NULL); - sign = L_SIGN(radang); - tanangle = tan(radang); - invangle = L_ABS(1. / tanangle); - inityincr = (l_int32)(invangle / 2.); - yincr = (l_int32)invangle; - pixRasterop(pixd, 0, yloc - inityincr, w, 2 * inityincr, PIX_SRC, - pixs, 0, yloc - inityincr); - - for (hshift = 1, y = yloc + inityincr; y < h; hshift++) { - yincr = (l_int32)(invangle * (hshift + 0.5) + 0.5) - (y - yloc); - if (h - y < yincr) /* reduce for last one if req'd */ - yincr = h - y; - pixRasterop(pixd, -sign*hshift, y, w, yincr, PIX_SRC, pixs, 0, y); -#if DEBUG - lept_stderr("y = %d, hshift = %d, yincr = %d\n", y, hshift, yincr); -#endif /* DEBUG */ - y += yincr; - } - - for (hshift = -1, y = yloc - inityincr; y > 0; hshift--) { - yincr = (y - yloc) - (l_int32)(invangle * (hshift - 0.5) + 0.5); - if (y < yincr) /* reduce for last one if req'd */ - yincr = y; - pixRasterop(pixd, -sign*hshift, y - yincr, w, yincr, PIX_SRC, - pixs, 0, y - yincr); -#if DEBUG - lept_stderr("y = %d, hshift = %d, yincr = %d\n", - y - yincr, hshift, yincr); -#endif /* DEBUG */ - y -= yincr; - } - - return pixd; -} - - -/*! - * \brief pixVShear() - * - * \param[in] pixd [optional], this can be null, equal to pixs, - * or different from pixs - * \param[in] pixs any depth; cmap ok - * \param[in] xloc location of vertical line, measured from origin - * \param[in] radang angle in radians; not too close to +-(pi / 2) - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK; - * \return pixd, or NULL on error - * - *
- * Notes:
- *      (1) There are 3 cases:
- *            (a) pixd == null (make a new pixd)
- *            (b) pixd == pixs (in-place)
- *            (c) pixd != pixs
- *      (2) For these three cases, use these patterns, respectively:
- *              pixd = pixVShear(NULL, pixs, ...);
- *              pixVShear(pixs, pixs, ...);
- *              pixVShear(pixd, pixs, ...);
- *      (3) This shear leaves the vertical line of pixels at x = xloc
- *          invariant.  For a positive shear angle, pixels to the right
- *          of this line are shoved downward, and pixels to the left
- *          of the line move upward.
- *      (4) With positive shear angle, this can be used, along with
- *          pixHShear(), to perform a cw rotation, either with 2 shears
- *          (for small angles) or in the general case with 3 shears.
- *      (5) Changing the value of xloc is equivalent to translating
- *          the result vertically.
- *      (6) This brings in %incolor pixels from outside the image.
- *      (7) In-place shears do not work on cmapped pix, because the
- *          in-place operation cannot initialize to the requested %incolor,
- *          so we shear from a copy.
- *      (8) The angle is brought into the range [-pi, -pi].  It is
- *          not permitted to be within MinDiffFromHalfPi radians
- *          from either -pi/2 or pi/2.
- * 
- */ -PIX * -pixVShear(PIX *pixd, - PIX *pixs, - l_int32 xloc, - l_float32 radang, - l_int32 incolor) -{ -l_int32 sign, w, h; -l_int32 x, xincr, initxincr, vshift; -l_float32 tanangle, invangle; - - PROCNAME("pixVShear"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor value", procName, NULL); - - if (pixd == pixs) { /* in place */ - if (!pixGetColormap(pixs)) { - pixVShearIP(pixd, xloc, radang, incolor); - } else { /* can't do in-place with a colormap */ - PIX *pix1 = pixCopy(NULL, pixs); - pixVShear(pixd, pix1, xloc, radang, incolor); - pixDestroy(&pix1); - } - return pixd; - } - - /* Make sure pixd exists and is same size as pixs */ - if (!pixd) { - if ((pixd = pixCreateTemplate(pixs)) == NULL) - return (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } else { /* pixd != pixs */ - pixResizeImageData(pixd, pixs); - } - - /* Normalize angle. If no rotation, return a copy */ - radang = normalizeAngleForShear(radang, MinDiffFromHalfPi); - if (radang == 0.0 || tan(radang) == 0.0) - return pixCopy(pixd, pixs); - - /* Initialize to value of incoming pixels */ - pixSetBlackOrWhite(pixd, incolor); - - pixGetDimensions(pixs, &w, &h, NULL); - sign = L_SIGN(radang); - tanangle = tan(radang); - invangle = L_ABS(1. / tanangle); - initxincr = (l_int32)(invangle / 2.); - xincr = (l_int32)invangle; - pixRasterop(pixd, xloc - initxincr, 0, 2 * initxincr, h, PIX_SRC, - pixs, xloc - initxincr, 0); - - for (vshift = 1, x = xloc + initxincr; x < w; vshift++) { - xincr = (l_int32)(invangle * (vshift + 0.5) + 0.5) - (x - xloc); - if (w - x < xincr) /* reduce for last one if req'd */ - xincr = w - x; - pixRasterop(pixd, x, sign*vshift, xincr, h, PIX_SRC, pixs, x, 0); -#if DEBUG - lept_stderr("x = %d, vshift = %d, xincr = %d\n", x, vshift, xincr); -#endif /* DEBUG */ - x += xincr; - } - - for (vshift = -1, x = xloc - initxincr; x > 0; vshift--) { - xincr = (x - xloc) - (l_int32)(invangle * (vshift - 0.5) + 0.5); - if (x < xincr) /* reduce for last one if req'd */ - xincr = x; - pixRasterop(pixd, x - xincr, sign*vshift, xincr, h, PIX_SRC, - pixs, x - xincr, 0); -#if DEBUG - lept_stderr("x = %d, vshift = %d, xincr = %d\n", - x - xincr, vshift, xincr); -#endif /* DEBUG */ - x -= xincr; - } - - return pixd; -} - - - -/*-------------------------------------------------------------* - * Shears about UL corner and center * - *-------------------------------------------------------------*/ -/*! - * \brief pixHShearCorner() - * - * \param[in] pixd [optional], if not null, must be equal to pixs - * \param[in] pixs any depth - * \param[in] radang angle in radians - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK; - * \return pixd, or NULL on error. - * - *
- * Notes:
- *      (1) See pixHShear() for usage.
- *      (2) This does a horizontal shear about the UL corner, with (+) shear
- *          pushing increasingly leftward (-x) with increasing y.
- * 
- */ -PIX * -pixHShearCorner(PIX *pixd, - PIX *pixs, - l_float32 radang, - l_int32 incolor) -{ - PROCNAME("pixHShearCorner"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - - return pixHShear(pixd, pixs, 0, radang, incolor); -} - - -/*! - * \brief pixVShearCorner() - * - * \param[in] pixd [optional], if not null, must be equal to pixs - * \param[in] pixs any depth - * \param[in] radang angle in radians - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK; - * \return pixd, or NULL on error. - * - *
- * Notes:
- *      (1) See pixVShear() for usage.
- *      (2) This does a vertical shear about the UL corner, with (+) shear
- *          pushing increasingly downward (+y) with increasing x.
- * 
- */ -PIX * -pixVShearCorner(PIX *pixd, - PIX *pixs, - l_float32 radang, - l_int32 incolor) -{ - PROCNAME("pixVShearCorner"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - - return pixVShear(pixd, pixs, 0, radang, incolor); -} - - -/*! - * \brief pixHShearCenter() - * - * \param[in] pixd [optional] if not null, must be equal to pixs - * \param[in] pixs any depth - * \param[in] radang angle in radians - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK; - * \return pixd, or NULL on error. - * - *
- * Notes:
- *      (1) See pixHShear() for usage.
- *      (2) This does a horizontal shear about the center, with (+) shear
- *          pushing increasingly leftward (-x) with increasing y.
- * 
- */ -PIX * -pixHShearCenter(PIX *pixd, - PIX *pixs, - l_float32 radang, - l_int32 incolor) -{ - PROCNAME("pixHShearCenter"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - - return pixHShear(pixd, pixs, pixGetHeight(pixs) / 2, radang, incolor); -} - - -/*! - * \brief pixVShearCenter() - * - * \param[in] pixd [optional] if not null, must be equal to pixs - * \param[in] pixs any depth - * \param[in] radang angle in radians - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK; - * \return pixd, or NULL on error. - * - *
- * Notes:
- *      (1) See pixVShear() for usage.
- *      (2) This does a vertical shear about the center, with (+) shear
- *          pushing increasingly downward (+y) with increasing x.
- * 
- */ -PIX * -pixVShearCenter(PIX *pixd, - PIX *pixs, - l_float32 radang, - l_int32 incolor) -{ - PROCNAME("pixVShearCenter"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, pixd); - - return pixVShear(pixd, pixs, pixGetWidth(pixs) / 2, radang, incolor); -} - - - -/*--------------------------------------------------------------------------* - * In place about arbitrary lines * - *--------------------------------------------------------------------------*/ -/*! - * \brief pixHShearIP() - * - * \param[in] pixs any depth; no cmap - * \param[in] yloc location of horizontal line, measured from origin - * \param[in] radang angle in radians - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK; - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This is an in-place version of pixHShear(); see comments there.
- *      (2) This brings in 'incolor' pixels from outside the image.
- *      (3) pixs cannot be colormapped, because the in-place operation
- *          only blits in 0 or 1 bits, not an arbitrary colormap index.
- *      (4) Does a horizontal full-band shear about the line with (+) shear
- *          pushing increasingly leftward (-x) with increasing y.
- * 
- */ -l_ok -pixHShearIP(PIX *pixs, - l_int32 yloc, - l_float32 radang, - l_int32 incolor) -{ -l_int32 sign, w, h; -l_int32 y, yincr, inityincr, hshift; -l_float32 tanangle, invangle; - - PROCNAME("pixHShearIP"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return ERROR_INT("invalid incolor value", procName, 1); - if (pixGetColormap(pixs)) - return ERROR_INT("pixs is colormapped", procName, 1); - - /* Normalize angle */ - radang = normalizeAngleForShear(radang, MinDiffFromHalfPi); - if (radang == 0.0 || tan(radang) == 0.0) - return 0; - - sign = L_SIGN(radang); - pixGetDimensions(pixs, &w, &h, NULL); - tanangle = tan(radang); - invangle = L_ABS(1. / tanangle); - inityincr = (l_int32)(invangle / 2.); - yincr = (l_int32)invangle; - - if (inityincr > 0) - pixRasteropHip(pixs, yloc - inityincr, 2 * inityincr, 0, incolor); - - for (hshift = 1, y = yloc + inityincr; y < h; hshift++) { - yincr = (l_int32)(invangle * (hshift + 0.5) + 0.5) - (y - yloc); - if (yincr == 0) continue; - if (h - y < yincr) /* reduce for last one if req'd */ - yincr = h - y; - pixRasteropHip(pixs, y, yincr, -sign*hshift, incolor); - y += yincr; - } - - for (hshift = -1, y = yloc - inityincr; y > 0; hshift--) { - yincr = (y - yloc) - (l_int32)(invangle * (hshift - 0.5) + 0.5); - if (yincr == 0) continue; - if (y < yincr) /* reduce for last one if req'd */ - yincr = y; - pixRasteropHip(pixs, y - yincr, yincr, -sign*hshift, incolor); - y -= yincr; - } - - return 0; -} - - -/*! - * \brief pixVShearIP() - * - * \param[in] pixs any depth; no cmap - * \param[in] xloc location of vertical line, measured from origin - * \param[in] radang angle in radians - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK; - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This is an in-place version of pixVShear(); see comments there.
- *      (2) This brings in 'incolor' pixels from outside the image.
- *      (3) pixs cannot be colormapped, because the in-place operation
- *          only blits in 0 or 1 bits, not an arbitrary colormap index.
- *      (4) Does a vertical full-band shear about the line with (+) shear
- *          pushing increasingly downward (+y) with increasing x.
- * 
- */ -l_ok -pixVShearIP(PIX *pixs, - l_int32 xloc, - l_float32 radang, - l_int32 incolor) -{ -l_int32 sign, w, h; -l_int32 x, xincr, initxincr, vshift; -l_float32 tanangle, invangle; - - PROCNAME("pixVShearIP"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return ERROR_INT("invalid incolor value", procName, 1); - if (pixGetColormap(pixs)) - return ERROR_INT("pixs is colormapped", procName, 1); - - /* Normalize angle */ - radang = normalizeAngleForShear(radang, MinDiffFromHalfPi); - if (radang == 0.0 || tan(radang) == 0.0) - return 0; - - sign = L_SIGN(radang); - pixGetDimensions(pixs, &w, &h, NULL); - tanangle = tan(radang); - invangle = L_ABS(1. / tanangle); - initxincr = (l_int32)(invangle / 2.); - xincr = (l_int32)invangle; - - if (initxincr > 0) - pixRasteropVip(pixs, xloc - initxincr, 2 * initxincr, 0, incolor); - - for (vshift = 1, x = xloc + initxincr; x < w; vshift++) { - xincr = (l_int32)(invangle * (vshift + 0.5) + 0.5) - (x - xloc); - if (xincr == 0) continue; - if (w - x < xincr) /* reduce for last one if req'd */ - xincr = w - x; - pixRasteropVip(pixs, x, xincr, sign*vshift, incolor); - x += xincr; - } - - for (vshift = -1, x = xloc - initxincr; x > 0; vshift--) { - xincr = (x - xloc) - (l_int32)(invangle * (vshift - 0.5) + 0.5); - if (xincr == 0) continue; - if (x < xincr) /* reduce for last one if req'd */ - xincr = x; - pixRasteropVip(pixs, x - xincr, xincr, sign*vshift, incolor); - x -= xincr; - } - - return 0; -} - - -/*-------------------------------------------------------------------------* - * Linear interpolated shear about arbitrary lines * - *-------------------------------------------------------------------------*/ -/*! - * \brief pixHShearLI() - * - * \param[in] pixs 8 bpp or 32 bpp, or colormapped - * \param[in] yloc location of horizontal line, measured from origin - * \param[in] radang angle in radians, in range (-pi/2 ... pi/2) - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK; - * \return pixd sheared, or NULL on error - * - *
- * Notes:
- *      (1) This does horizontal shear with linear interpolation for
- *          accurate results on 8 bpp gray, 32 bpp rgb, or cmapped images.
- *          It is relatively slow compared to the sampled version
- *          implemented by rasterop, but the result is much smoother.
- *      (2) This shear leaves the horizontal line of pixels at y = yloc
- *          invariant.  For a positive shear angle, pixels above this
- *          line are shoved to the right, and pixels below this line
- *          move to the left.
- *      (3) Any colormap is removed.
- *      (4) The angle is brought into the range [-pi/2 + del, pi/2 - del],
- *          where del == MinDiffFromHalfPi.
- * 
- */ -PIX * -pixHShearLI(PIX *pixs, - l_int32 yloc, - l_float32 radang, - l_int32 incolor) -{ -l_int32 i, jd, x, xp, xf, w, h, d, wm, wpls, wpld, val, rval, gval, bval; -l_uint32 word0, word1; -l_uint32 *datas, *datad, *lines, *lined; -l_float32 tanangle, xshift; -PIX *pix, *pixd; - - PROCNAME("pixHShearLI"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 && d != 32 && !pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs not 8, 32 bpp, or cmap", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor value", procName, NULL); - if (yloc < 0 || yloc >= h) - return (PIX *)ERROR_PTR("yloc not in [0 ... h-1]", procName, NULL); - - if (pixGetColormap(pixs)) - pix = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - else - pix = pixClone(pixs); - - /* Normalize angle. If no rotation, return a copy */ - radang = normalizeAngleForShear(radang, MinDiffFromHalfPi); - if (radang == 0.0 || tan(radang) == 0.0) { - pixDestroy(&pix); - return pixCopy(NULL, pixs); - } - - /* Initialize to value of incoming pixels */ - pixd = pixCreateTemplate(pix); - pixSetBlackOrWhite(pixd, incolor); - - /* Standard linear interp: subdivide each pixel into 64 parts */ - d = pixGetDepth(pixd); /* 8 or 32 */ - datas = pixGetData(pix); - datad = pixGetData(pixd); - wpls = pixGetWpl(pix); - wpld = pixGetWpl(pixd); - tanangle = tan(radang); - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - xshift = (yloc - i) * tanangle; - for (jd = 0; jd < w; jd++) { - x = (l_int32)(64.0 * (-xshift + jd) + 0.5); - xp = x / 64; - xf = x & 63; - wm = w - 1; - if (xp < 0 || xp > wm) continue; - if (d == 8) { - if (xp < wm) { - val = ((63 - xf) * GET_DATA_BYTE(lines, xp) + - xf * GET_DATA_BYTE(lines, xp + 1) + 31) / 63; - } else { /* xp == wm */ - val = GET_DATA_BYTE(lines, xp); - } - SET_DATA_BYTE(lined, jd, val); - } else { /* d == 32 */ - if (xp < wm) { - word0 = *(lines + xp); - word1 = *(lines + xp + 1); - rval = ((63 - xf) * ((word0 >> L_RED_SHIFT) & 0xff) + - xf * ((word1 >> L_RED_SHIFT) & 0xff) + 31) / 63; - gval = ((63 - xf) * ((word0 >> L_GREEN_SHIFT) & 0xff) + - xf * ((word1 >> L_GREEN_SHIFT) & 0xff) + 31) / 63; - bval = ((63 - xf) * ((word0 >> L_BLUE_SHIFT) & 0xff) + - xf * ((word1 >> L_BLUE_SHIFT) & 0xff) + 31) / 63; - composeRGBPixel(rval, gval, bval, lined + jd); - } else { /* xp == wm */ - lined[jd] = lines[xp]; - } - } - } - } - - pixDestroy(&pix); - return pixd; -} - - -/*! - * \brief pixVShearLI() - * - * \param[in] pixs 8 bpp or 32 bpp, or colormapped - * \param[in] xloc location of vertical line, measured from origin - * \param[in] radang angle in radians, in range (-pi/2 ... pi/2) - * \param[in] incolor L_BRING_IN_WHITE, L_BRING_IN_BLACK; - * \return pixd sheared, or NULL on error - * - *
- * Notes:
- *      (1) This does vertical shear with linear interpolation for
- *          accurate results on 8 bpp gray, 32 bpp rgb, or cmapped images.
- *          It is relatively slow compared to the sampled version
- *          implemented by rasterop, but the result is much smoother.
- *      (2) This shear leaves the vertical line of pixels at x = xloc
- *          invariant.  For a positive shear angle, pixels to the right
- *          of this line are shoved downward, and pixels to the left
- *          of the line move upward.
- *      (3) Any colormap is removed.
- *      (4) The angle is brought into the range [-pi/2 + del, pi/2 - del],
- *          where del == MinDiffFromHalfPi.
- * 
- */ -PIX * -pixVShearLI(PIX *pixs, - l_int32 xloc, - l_float32 radang, - l_int32 incolor) -{ -l_int32 id, y, yp, yf, j, w, h, d, hm, wpls, wpld, val, rval, gval, bval; -l_uint32 word0, word1; -l_uint32 *datas, *datad, *lines, *lined; -l_float32 tanangle, yshift; -PIX *pix, *pixd; - - PROCNAME("pixVShearLI"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 && d != 32 && !pixGetColormap(pixs)) - return (PIX *)ERROR_PTR("pixs not 8, 32 bpp, or cmap", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor value", procName, NULL); - if (xloc < 0 || xloc >= w) - return (PIX *)ERROR_PTR("xloc not in [0 ... w-1]", procName, NULL); - - if (pixGetColormap(pixs)) - pix = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - else - pix = pixClone(pixs); - - /* Normalize angle. If no rotation, return a copy */ - radang = normalizeAngleForShear(radang, MinDiffFromHalfPi); - if (radang == 0.0 || tan(radang) == 0.0) { - pixDestroy(&pix); - return pixCopy(NULL, pixs); - } - - /* Initialize to value of incoming pixels */ - pixd = pixCreateTemplate(pix); - pixSetBlackOrWhite(pixd, incolor); - - /* Standard linear interp: subdivide each pixel into 64 parts */ - d = pixGetDepth(pixd); /* 8 or 32 */ - datas = pixGetData(pix); - datad = pixGetData(pixd); - wpls = pixGetWpl(pix); - wpld = pixGetWpl(pixd); - tanangle = tan(radang); - for (j = 0; j < w; j++) { - yshift = (j - xloc) * tanangle; - for (id = 0; id < h; id++) { - y = (l_int32)(64.0 * (-yshift + id) + 0.5); - yp = y / 64; - yf = y & 63; - hm = h - 1; - if (yp < 0 || yp > hm) continue; - lines = datas + yp * wpls; - lined = datad + id * wpld; - if (d == 8) { - if (yp < hm) { - val = ((63 - yf) * GET_DATA_BYTE(lines, j) + - yf * GET_DATA_BYTE(lines + wpls, j) + 31) / 63; - } else { /* yp == hm */ - val = GET_DATA_BYTE(lines, j); - } - SET_DATA_BYTE(lined, j, val); - } else { /* d == 32 */ - if (yp < hm) { - word0 = *(lines + j); - word1 = *(lines + wpls + j); - rval = ((63 - yf) * ((word0 >> L_RED_SHIFT) & 0xff) + - yf * ((word1 >> L_RED_SHIFT) & 0xff) + 31) / 63; - gval = ((63 - yf) * ((word0 >> L_GREEN_SHIFT) & 0xff) + - yf * ((word1 >> L_GREEN_SHIFT) & 0xff) + 31) / 63; - bval = ((63 - yf) * ((word0 >> L_BLUE_SHIFT) & 0xff) + - yf * ((word1 >> L_BLUE_SHIFT) & 0xff) + 31) / 63; - composeRGBPixel(rval, gval, bval, lined + j); - } else { /* yp == hm */ - lined[j] = lines[j]; - } - } - } - } - - pixDestroy(&pix); - return pixd; -} - - -/*-------------------------------------------------------------------------* - * Angle normalization * - *-------------------------------------------------------------------------*/ -static l_float32 -normalizeAngleForShear(l_float32 radang, - l_float32 mindif) -{ -l_float32 pi2; - - PROCNAME("normalizeAngleForShear"); - - /* Bring angle into range [-pi/2, pi/2] */ - pi2 = 3.14159265 / 2.0; - if (radang < -pi2 || radang > pi2) - radang = radang - (l_int32)(radang / pi2) * pi2; - - /* If angle is too close to pi/2 or -pi/2, move it */ - if (radang > pi2 - mindif) { - L_WARNING("angle close to pi/2; shifting away\n", procName); - radang = pi2 - mindif; - } else if (radang < -pi2 + mindif) { - L_WARNING("angle close to -pi/2; shifting away\n", procName); - radang = -pi2 + mindif; - } - - return radang; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/skew.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/skew.c deleted file mode 100644 index a0b8f16b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/skew.c +++ /dev/null @@ -1,1247 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file skew.c - *
- *
- *      Top-level deskew interfaces
- *          PIX       *pixDeskewBoth()
- *          PIX       *pixDeskew()
- *          PIX       *pixFindSkewAndDeskew()
- *          PIX       *pixDeskewGeneral()
- *
- *      Top-level angle-finding interface
- *          l_int32    pixFindSkew()
- *
- *      Basic angle-finding functions
- *          l_int32    pixFindSkewSweep()
- *          l_int32    pixFindSkewSweepAndSearch()
- *          l_int32    pixFindSkewSweepAndSearchScore()
- *          l_int32    pixFindSkewSweepAndSearchScorePivot()
- *
- *      Search over arbitrary range of angles in orthogonal directions
- *          l_int32    pixFindSkewOrthogonalRange()
- *
- *      Differential square sum function for scoring
- *          l_int32    pixFindDifferentialSquareSum()
- *
- *      Measures of variance of row sums
- *          l_int32    pixFindNormalizedSquareSum()
- *
- *
- *      ==============================================================
- *      Page skew detection
- *
- *      Skew is determined by pixel profiles, which are computed
- *      as pixel sums along the raster line for each line in the
- *      image.  By vertically shearing the image by a given angle,
- *      the sums can be computed quickly along the raster lines
- *      rather than along lines at that angle.  The score is
- *      computed from these line sums by taking the square of
- *      the DIFFERENCE between adjacent line sums, summed over
- *      all lines.  The skew angle is then found as the angle
- *      that maximizes the score.  The actual computation for
- *      any sheared image is done in the function
- *      pixFindDifferentialSquareSum().
- *
- *      The search for the angle that maximizes this score is
- *      most efficiently performed by first sweeping coarsely
- *      over angles, using a significantly reduced image (say, 4x
- *      reduction), to find the approximate maximum within a half
- *      degree or so, and then doing an interval-halving binary
- *      search at higher resolution to get the skew angle to
- *      within 1/20 degree or better.
- *
- *      The differential signal is used (rather than just using
- *      that variance of line sums) because it rejects the
- *      background noise due to total number of black pixels,
- *      and has maximum contributions from the baselines and
- *      x-height lines of text when the textlines are aligned
- *      with the raster lines.  It also works well in multicolumn
- *      pages where the textlines do not line up across columns.
- *
- *      The method is fast, accurate to within an angle (in radians)
- *      of approximately the inverse width in pixels of the image,
- *      and will work on a surprisingly small amount of text data
- *      (just a couple of text lines).  Consequently, it can
- *      also be used to find local skew if the skew were to vary
- *      significantly over the page.  Local skew determination
- *      is not very important except for locating lines of
- *      handwritten text that may be mixed with printed text.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Default sweep angle parameters for pixFindSkew() */ -static const l_float32 DefaultSweepRange = 7.0; /* degrees */ -static const l_float32 DefaultSweepDelta = 1.0; /* degrees */ - - /* Default final angle difference parameter for binary - * search in pixFindSkew(). The expected accuracy is - * not better than the inverse image width in pixels, - * say, 1/2000 radians, or about 0.03 degrees. */ -static const l_float32 DefaultMinbsDelta = 0.01; /* degrees */ - - /* Default scale factors for pixFindSkew() */ -static const l_int32 DefaultSweepReduction = 4; /* sweep part; 4 is good */ -static const l_int32 DefaultBsReduction = 2; /* binary search part */ - - /* Minimum angle for deskewing in pixDeskew() */ -static const l_float32 MinDeskewAngle = 0.1; /* degree */ - - /* Minimum allowed confidence (ratio) for deskewing in pixDeskew() */ -static const l_float32 MinAllowedConfidence = 3.0; - - /* Minimum allowed maxscore to give nonzero confidence */ -static const l_int32 MinValidMaxscore = 10000; - - /* Constant setting threshold for minimum allowed minscore - * to give nonzero confidence; multiply this constant by - * (height * width^2) */ -static const l_float32 MinscoreThreshFactor = 0.000002; - - /* Default binarization threshold value */ -static const l_int32 DefaultBinaryThreshold = 130; - -#ifndef NO_CONSOLE_IO -#define DEBUG_PRINT_SCORES 0 -#define DEBUG_PRINT_SWEEP 0 -#define DEBUG_PRINT_BINARY 0 -#define DEBUG_PRINT_ORTH 0 -#define DEBUG_THRESHOLD 0 -#define DEBUG_PLOT_SCORES 0 /* requires the gnuplot executable */ -#endif /* ~NO_CONSOLE_IO */ - - - -/*-----------------------------------------------------------------------* - * Top-level deskew interfaces * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixDeskewBoth() - * - * \param[in] pixs any depth - * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; - * use 0 for default - * \return pixd deskewed pix, or NULL on error - * - *
- * Notes:
- *      (1) This binarizes if necessary and does both horizontal
- *          and vertical deskewing, using the default parameters in
- *          the underlying pixDeskew().  See usage there.
- *      (2) This may return a clone.
- * 
- */ -PIX * -pixDeskewBoth(PIX *pixs, - l_int32 redsearch) -{ -PIX *pix1, *pix2, *pix3, *pix4; - - PROCNAME("pixDeskewBoth"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (redsearch == 0) - redsearch = DefaultBsReduction; - else if (redsearch != 1 && redsearch != 2 && redsearch != 4) - return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", procName, NULL); - - pix1 = pixDeskew(pixs, redsearch); - pix2 = pixRotate90(pix1, 1); - pix3 = pixDeskew(pix2, redsearch); - pix4 = pixRotate90(pix3, -1); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - return pix4; -} - - -/*! - * \brief pixDeskew() - * - * \param[in] pixs any depth - * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; - * use 0 for default - * \return pixd deskewed pix, or NULL on error - * - *
- * Notes:
- *      (1) This binarizes if necessary and finds the skew angle.  If the
- *          angle is large enough and there is sufficient confidence,
- *          it returns a deskewed image; otherwise, it returns a clone.
- *      (2) Typical values at 300 ppi for %redsearch are 2 and 4.
- *          At 75 ppi, one should use %redsearch = 1.
- * 
- */ -PIX * -pixDeskew(PIX *pixs, - l_int32 redsearch) -{ - PROCNAME("pixDeskew"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (redsearch == 0) - redsearch = DefaultBsReduction; - else if (redsearch != 1 && redsearch != 2 && redsearch != 4) - return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", procName, NULL); - - return pixDeskewGeneral(pixs, 0, 0.0, 0.0, redsearch, 0, NULL, NULL); -} - - -/*! - * \brief pixFindSkewAndDeskew() - * - * \param[in] pixs any depth - * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; - * use 0 for default - * \param[out] pangle [optional] angle required to deskew, - * in degrees; use NULL to skip - * \param[out] pconf [optional] conf value is ratio - * of max/min scores; use NULL to skip - * \return pixd deskewed pix, or NULL on error - * - *
- * Notes:
- *      (1) This binarizes if necessary and finds the skew angle.  If the
- *          angle is large enough and there is sufficient confidence,
- *          it returns a deskewed image; otherwise, it returns a clone.
- * 
- */ -PIX * -pixFindSkewAndDeskew(PIX *pixs, - l_int32 redsearch, - l_float32 *pangle, - l_float32 *pconf) -{ - PROCNAME("pixFindSkewAndDeskew"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (redsearch == 0) - redsearch = DefaultBsReduction; - else if (redsearch != 1 && redsearch != 2 && redsearch != 4) - return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", procName, NULL); - - return pixDeskewGeneral(pixs, 0, 0.0, 0.0, redsearch, 0, pangle, pconf); -} - - -/*! - * \brief pixDeskewGeneral() - * - * \param[in] pixs any depth - * \param[in] redsweep for linear search: reduction factor = 1, 2 or 4; - * use 0 for default - * \param[in] sweeprange in degrees in each direction from 0; - * use 0.0 for default - * \param[in] sweepdelta in degrees; use 0.0 for default - * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; - * use 0 for default; - * \param[in] thresh for binarizing the image; use 0 for default - * \param[out] pangle [optional] angle required to deskew, - * in degrees; use NULL to skip - * \param[out] pconf [optional] conf value is ratio - * of max/min scores; use NULL to skip - * \return pixd deskewed pix, or NULL on error - * - *
- * Notes:
- *      (1) This binarizes if necessary and finds the skew angle.  If the
- *          angle is large enough and there is sufficient confidence,
- *          it returns a deskewed image; otherwise, it returns a clone.
- * 
- */ -PIX * -pixDeskewGeneral(PIX *pixs, - l_int32 redsweep, - l_float32 sweeprange, - l_float32 sweepdelta, - l_int32 redsearch, - l_int32 thresh, - l_float32 *pangle, - l_float32 *pconf) -{ -l_int32 ret, depth; -l_float32 angle, conf, deg2rad; -PIX *pixb, *pixd; - - PROCNAME("pixDeskewGeneral"); - - if (pangle) *pangle = 0.0; - if (pconf) *pconf = 0.0; - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (redsweep == 0) - redsweep = DefaultSweepReduction; - else if (redsweep != 1 && redsweep != 2 && redsweep != 4) - return (PIX *)ERROR_PTR("redsweep not in {1,2,4}", procName, NULL); - if (sweeprange == 0.0) - sweeprange = DefaultSweepRange; - if (sweepdelta == 0.0) - sweepdelta = DefaultSweepDelta; - if (redsearch == 0) - redsearch = DefaultBsReduction; - else if (redsearch != 1 && redsearch != 2 && redsearch != 4) - return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", procName, NULL); - if (thresh == 0) - thresh = DefaultBinaryThreshold; - - deg2rad = 3.1415926535 / 180.; - - /* Binarize if necessary */ - depth = pixGetDepth(pixs); - if (depth == 1) - pixb = pixClone(pixs); - else - pixb = pixConvertTo1(pixs, thresh); - - /* Use the 1 bpp image to find the skew */ - ret = pixFindSkewSweepAndSearch(pixb, &angle, &conf, redsweep, redsearch, - sweeprange, sweepdelta, - DefaultMinbsDelta); - pixDestroy(&pixb); - if (pangle) *pangle = angle; - if (pconf) *pconf = conf; - if (ret) - return pixClone(pixs); - - if (L_ABS(angle) < MinDeskewAngle || conf < MinAllowedConfidence) - return pixClone(pixs); - - if ((pixd = pixRotate(pixs, deg2rad * angle, L_ROTATE_AREA_MAP, - L_BRING_IN_WHITE, 0, 0)) == NULL) - return pixClone(pixs); - else - return pixd; -} - - -/*-----------------------------------------------------------------------* - * Simple top-level angle-finding interface * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixFindSkew() - * - * \param[in] pixs 1 bpp - * \param[out] pangle angle required to deskew, in degrees - * \param[out] pconf confidence value is ratio max/min scores - * \return 0 if OK, 1 on error or if angle measurement not valid - * - *
- * Notes:
- *      (1) This is a simple high-level interface, that uses default
- *          values of the parameters for reasonable speed and accuracy.
- *      (2) The angle returned is the negative of the skew angle of
- *          the image.  It is the angle required for deskew.
- *          Clockwise rotations are positive angles.
- * 
- */ -l_ok -pixFindSkew(PIX *pixs, - l_float32 *pangle, - l_float32 *pconf) -{ - PROCNAME("pixFindSkew"); - - if (pangle) *pangle = 0.0; - if (pconf) *pconf = 0.0; - if (!pangle || !pconf) - return ERROR_INT("&angle and/or &conf not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not 1 bpp", procName, 1); - - return pixFindSkewSweepAndSearch(pixs, pangle, pconf, - DefaultSweepReduction, - DefaultBsReduction, - DefaultSweepRange, - DefaultSweepDelta, - DefaultMinbsDelta); -} - - -/*-----------------------------------------------------------------------* - * Basic angle-finding functions * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixFindSkewSweep() - * - * \param[in] pixs 1 bpp - * \param[out] pangle angle required to deskew, in degrees - * \param[in] reduction factor = 1, 2, 4 or 8 - * \param[in] sweeprange half the full range; assumed about 0; in degrees - * \param[in] sweepdelta angle increment of sweep; in degrees - * \return 0 if OK, 1 on error or if angle measurement not valid - * - *
- * Notes:
- *      (1) This examines the 'score' for skew angles with equal intervals.
- *      (2) Caller must check the return value for validity of the result.
- * 
- */ -l_ok -pixFindSkewSweep(PIX *pixs, - l_float32 *pangle, - l_int32 reduction, - l_float32 sweeprange, - l_float32 sweepdelta) -{ -l_int32 ret, bzero, i, nangles; -l_float32 deg2rad, theta; -l_float32 sum, maxscore, maxangle; -NUMA *natheta, *nascore; -PIX *pix, *pixt; - - PROCNAME("pixFindSkewSweep"); - - if (!pangle) - return ERROR_INT("&angle not defined", procName, 1); - *pangle = 0.0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not 1 bpp", procName, 1); - if (reduction != 1 && reduction != 2 && reduction != 4 && reduction != 8) - return ERROR_INT("reduction must be in {1,2,4,8}", procName, 1); - - deg2rad = 3.1415926535 / 180.; - ret = 0; - - /* Generate reduced image, if requested */ - if (reduction == 1) - pix = pixClone(pixs); - else if (reduction == 2) - pix = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); - else if (reduction == 4) - pix = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); - else /* reduction == 8 */ - pix = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); - - pixZero(pix, &bzero); - if (bzero) { - pixDestroy(&pix); - return 1; - } - - nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); - natheta = numaCreate(nangles); - nascore = numaCreate(nangles); - pixt = pixCreateTemplate(pix); - - if (!pix || !pixt) { - ret = ERROR_INT("pix and pixt not both made", procName, 1); - goto cleanup; - } - if (!natheta || !nascore) { - ret = ERROR_INT("natheta and nascore not both made", procName, 1); - goto cleanup; - } - - for (i = 0; i < nangles; i++) { - theta = -sweeprange + i * sweepdelta; /* degrees */ - - /* Shear pix about the UL corner and put the result in pixt */ - pixVShearCorner(pixt, pix, deg2rad * theta, L_BRING_IN_WHITE); - - /* Get score */ - pixFindDifferentialSquareSum(pixt, &sum); - -#if DEBUG_PRINT_SCORES - L_INFO("sum(%7.2f) = %7.0f\n", procName, theta, sum); -#endif /* DEBUG_PRINT_SCORES */ - - /* Save the result in the output arrays */ - numaAddNumber(nascore, sum); - numaAddNumber(natheta, theta); - } - - /* Find the location of the maximum (i.e., the skew angle) - * by fitting the largest data point and its two neighbors - * to a quadratic, using lagrangian interpolation. */ - numaFitMax(nascore, &maxscore, natheta, &maxangle); - *pangle = maxangle; - -#if DEBUG_PRINT_SWEEP - L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", procName, - maxangle, maxscore); -#endif /* DEBUG_PRINT_SWEEP */ - -#if DEBUG_PLOT_SCORES - /* Plot the result -- the scores versus rotation angle -- - * using gnuplot with GPLOT_LINES (lines connecting data points). - * The GPLOT data structure is first created, with the - * appropriate data incorporated from the two input NUMAs, - * and then the function gplotMakeOutput() uses gnuplot to - * generate the output plot. This can be either a .png file - * or a .ps file, depending on whether you use GPLOT_PNG - * or GPLOT_PS. */ - {GPLOT *gplot; - gplot = gplotCreate("sweep_output", GPLOT_PNG, - "Sweep. Variance of difference of ON pixels vs. angle", - "angle (deg)", "score"); - gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); - gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); - gplotMakeOutput(gplot); - gplotDestroy(&gplot); - } -#endif /* DEBUG_PLOT_SCORES */ - -cleanup: - pixDestroy(&pix); - pixDestroy(&pixt); - numaDestroy(&nascore); - numaDestroy(&natheta); - return ret; -} - - -/*! - * \brief pixFindSkewSweepAndSearch() - * - * \param[in] pixs 1 bpp - * \param[out] pangle angle required to deskew; in degrees - * \param[out] pconf confidence given by ratio of max/min score - * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 - * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; - * and must not exceed redsweep - * \param[in] sweeprange half the full range, assumed about 0; in degrees - * \param[in] sweepdelta angle increment of sweep; in degrees - * \param[in] minbsdelta min binary search increment angle; in degrees - * \return 0 if OK, 1 on error or if angle measurement not valid - * - *
- * Notes:
- *      (1) This finds the skew angle, doing first a sweep through a set
- *          of equal angles, and then doing a binary search until
- *          convergence.
- *      (2) Caller must check the return value for validity of the result.
- *      (3) In computing the differential line sum variance score, we sum
- *          the result over scanlines, but we always skip:
- *           ~ at least one scanline
- *           ~ not more than 10% of the image height
- *           ~ not more than 5% of the image width
- *      (4) See also notes in pixFindSkewSweepAndSearchScore()
- * 
- */ -l_ok -pixFindSkewSweepAndSearch(PIX *pixs, - l_float32 *pangle, - l_float32 *pconf, - l_int32 redsweep, - l_int32 redsearch, - l_float32 sweeprange, - l_float32 sweepdelta, - l_float32 minbsdelta) -{ - return pixFindSkewSweepAndSearchScore(pixs, pangle, pconf, NULL, - redsweep, redsearch, 0.0, sweeprange, - sweepdelta, minbsdelta); -} - - -/*! - * \brief pixFindSkewSweepAndSearchScore() - * - * \param[in] pixs 1 bpp - * \param[out] pangle angle required to deskew; in degrees - * \param[out] pconf confidence given by ratio of max/min score - * \param[out] pendscore [optional] max score; use NULL to ignore - * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 - * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; - * and must not exceed redsweep - * \param[in] sweepcenter angle about which sweep is performed; in degrees - * \param[in] sweeprange half the full range, taken about sweepcenter; - * in degrees - * \param[in] sweepdelta angle increment of sweep; in degrees - * \param[in] minbsdelta min binary search increment angle; in degrees - * \return 0 if OK, 1 on error or if angle measurement not valid - * - *
- * Notes:
- *      (1) This finds the skew angle, doing first a sweep through a set
- *          of equal angles, and then doing a binary search until convergence.
- *      (2) There are two built-in constants that determine if the
- *          returned confidence is nonzero:
- *            ~ MinValidMaxscore (minimum allowed maxscore)
- *            ~ MinscoreThreshFactor (determines minimum allowed
- *                 minscore, by multiplying by (height * width^2)
- *          If either of these conditions is not satisfied, the returned
- *          confidence value will be zero.  The maxscore is optionally
- *          returned in this function to allow evaluation of the
- *          resulting angle by a method that is independent of the
- *          returned confidence value.
- *      (3) The larger the confidence value, the greater the probability
- *          that the proper alignment is given by the angle that maximizes
- *          variance.  It should be compared to a threshold, which depends
- *          on the application.  Values between 3.0 and 6.0 are common.
- *      (4) By default, the shear is about the UL corner.
- * 
- */ -l_ok -pixFindSkewSweepAndSearchScore(PIX *pixs, - l_float32 *pangle, - l_float32 *pconf, - l_float32 *pendscore, - l_int32 redsweep, - l_int32 redsearch, - l_float32 sweepcenter, - l_float32 sweeprange, - l_float32 sweepdelta, - l_float32 minbsdelta) -{ - return pixFindSkewSweepAndSearchScorePivot(pixs, pangle, pconf, pendscore, - redsweep, redsearch, 0.0, - sweeprange, sweepdelta, - minbsdelta, - L_SHEAR_ABOUT_CORNER); -} - - -/*! - * \brief pixFindSkewSweepAndSearchScorePivot() - * - * \param[in] pixs 1 bpp - * \param[out] pangle angle required to deskew; in degrees - * \param[out] pconf confidence given by ratio of max/min score - * \param[out] pendscore [optional] max score; use NULL to ignore - * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 - * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; - * and must not exceed redsweep - * \param[in] sweepcenter angle about which sweep is performed; in degrees - * \param[in] sweeprange half the full range, taken about sweepcenter; - * in degrees - * \param[in] sweepdelta angle increment of sweep; in degrees - * \param[in] minbsdelta min binary search increment angle; in degrees - * \param[in] pivot L_SHEAR_ABOUT_CORNER, L_SHEAR_ABOUT_CENTER - * \return 0 if OK, 1 on error or if angle measurement not valid - * - *
- * Notes:
- *      (1) See notes in pixFindSkewSweepAndSearchScore().
- *      (2) This allows choice of shear pivoting from either the UL corner
- *          or the center.  For small angles, the ability to discriminate
- *          angles is better with shearing from the UL corner.  However,
- *          for large angles (say, greater than 20 degrees), it is better
- *          to shear about the center because a shear from the UL corner
- *          loses too much of the image.
- * 
- */ -l_ok -pixFindSkewSweepAndSearchScorePivot(PIX *pixs, - l_float32 *pangle, - l_float32 *pconf, - l_float32 *pendscore, - l_int32 redsweep, - l_int32 redsearch, - l_float32 sweepcenter, - l_float32 sweeprange, - l_float32 sweepdelta, - l_float32 minbsdelta, - l_int32 pivot) -{ -l_int32 ret, bzero, i, nangles, n, ratio, maxindex, minloc; -l_int32 width, height; -l_float32 deg2rad, theta, delta; -l_float32 sum, maxscore, maxangle; -l_float32 centerangle, leftcenterangle, rightcenterangle; -l_float32 lefttemp, righttemp; -l_float32 bsearchscore[5]; -l_float32 minscore, minthresh; -l_float32 rangeleft; -NUMA *natheta, *nascore; -PIX *pixsw, *pixsch, *pixt1, *pixt2; - - PROCNAME("pixFindSkewSweepAndSearchScorePivot"); - - if (pendscore) *pendscore = 0.0; - if (pangle) *pangle = 0.0; - if (pconf) *pconf = 0.0; - if (!pangle || !pconf) - return ERROR_INT("&angle and/or &conf not defined", procName, 1); - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - if (redsweep != 1 && redsweep != 2 && redsweep != 4 && redsweep != 8) - return ERROR_INT("redsweep must be in {1,2,4,8}", procName, 1); - if (redsearch != 1 && redsearch != 2 && redsearch != 4 && redsearch != 8) - return ERROR_INT("redsearch must be in {1,2,4,8}", procName, 1); - if (redsearch > redsweep) - return ERROR_INT("redsearch must not exceed redsweep", procName, 1); - if (pivot != L_SHEAR_ABOUT_CORNER && pivot != L_SHEAR_ABOUT_CENTER) - return ERROR_INT("invalid pivot", procName, 1); - - deg2rad = 3.1415926535 / 180.; - ret = 0; - - /* Generate reduced image for binary search, if requested */ - if (redsearch == 1) - pixsch = pixClone(pixs); - else if (redsearch == 2) - pixsch = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); - else if (redsearch == 4) - pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); - else /* redsearch == 8 */ - pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); - - pixZero(pixsch, &bzero); - if (bzero) { - pixDestroy(&pixsch); - return 1; - } - - /* Generate reduced image for sweep, if requested */ - ratio = redsweep / redsearch; - if (ratio == 1) { - pixsw = pixClone(pixsch); - } else { /* ratio > 1 */ - if (ratio == 2) - pixsw = pixReduceRankBinaryCascade(pixsch, 1, 0, 0, 0); - else if (ratio == 4) - pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 0, 0); - else /* ratio == 8 */ - pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 2, 0); - } - - pixt1 = pixCreateTemplate(pixsw); - if (ratio == 1) - pixt2 = pixClone(pixt1); - else - pixt2 = pixCreateTemplate(pixsch); - - nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); - natheta = numaCreate(nangles); - nascore = numaCreate(nangles); - - if (!pixsch || !pixsw) { - ret = ERROR_INT("pixsch and pixsw not both made", procName, 1); - goto cleanup; - } - if (!pixt1 || !pixt2) { - ret = ERROR_INT("pixt1 and pixt2 not both made", procName, 1); - goto cleanup; - } - if (!natheta || !nascore) { - ret = ERROR_INT("natheta and nascore not both made", procName, 1); - goto cleanup; - } - - /* Do sweep */ - rangeleft = sweepcenter - sweeprange; - for (i = 0; i < nangles; i++) { - theta = rangeleft + i * sweepdelta; /* degrees */ - - /* Shear pix and put the result in pixt1 */ - if (pivot == L_SHEAR_ABOUT_CORNER) - pixVShearCorner(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); - else - pixVShearCenter(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); - - /* Get score */ - pixFindDifferentialSquareSum(pixt1, &sum); - -#if DEBUG_PRINT_SCORES - L_INFO("sum(%7.2f) = %7.0f\n", procName, theta, sum); -#endif /* DEBUG_PRINT_SCORES */ - - /* Save the result in the output arrays */ - numaAddNumber(nascore, sum); - numaAddNumber(natheta, theta); - } - - /* Find the largest of the set (maxscore at maxangle) */ - numaGetMax(nascore, &maxscore, &maxindex); - numaGetFValue(natheta, maxindex, &maxangle); - -#if DEBUG_PRINT_SWEEP - L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", procName, - maxangle, maxscore); -#endif /* DEBUG_PRINT_SWEEP */ - -#if DEBUG_PLOT_SCORES - /* Plot the sweep result -- the scores versus rotation angle -- - * using gnuplot with GPLOT_LINES (lines connecting data points). */ - {GPLOT *gplot; - gplot = gplotCreate("sweep_output", GPLOT_PNG, - "Sweep. Variance of difference of ON pixels vs. angle", - "angle (deg)", "score"); - gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); - gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); - gplotMakeOutput(gplot); - gplotDestroy(&gplot); - } -#endif /* DEBUG_PLOT_SCORES */ - - /* Check if the max is at the end of the sweep. */ - n = numaGetCount(natheta); - if (maxindex == 0 || maxindex == n - 1) { - L_WARNING("max found at sweep edge\n", procName); - goto cleanup; - } - - /* Empty the numas for re-use */ - numaEmpty(nascore); - numaEmpty(natheta); - - /* Do binary search to find skew angle. - * First, set up initial three points. */ - centerangle = maxangle; - if (pivot == L_SHEAR_ABOUT_CORNER) { - pixVShearCorner(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); - pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); - pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), - L_BRING_IN_WHITE); - pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); - pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), - L_BRING_IN_WHITE); - pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); - } else { - pixVShearCenter(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); - pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); - pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), - L_BRING_IN_WHITE); - pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); - pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), - L_BRING_IN_WHITE); - pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); - } - - numaAddNumber(nascore, bsearchscore[2]); - numaAddNumber(natheta, centerangle); - numaAddNumber(nascore, bsearchscore[0]); - numaAddNumber(natheta, centerangle - sweepdelta); - numaAddNumber(nascore, bsearchscore[4]); - numaAddNumber(natheta, centerangle + sweepdelta); - - /* Start the search */ - delta = 0.5 * sweepdelta; - while (delta >= minbsdelta) - { - /* Get the left intermediate score */ - leftcenterangle = centerangle - delta; - if (pivot == L_SHEAR_ABOUT_CORNER) - pixVShearCorner(pixt2, pixsch, deg2rad * leftcenterangle, - L_BRING_IN_WHITE); - else - pixVShearCenter(pixt2, pixsch, deg2rad * leftcenterangle, - L_BRING_IN_WHITE); - pixFindDifferentialSquareSum(pixt2, &bsearchscore[1]); - numaAddNumber(nascore, bsearchscore[1]); - numaAddNumber(natheta, leftcenterangle); - - /* Get the right intermediate score */ - rightcenterangle = centerangle + delta; - if (pivot == L_SHEAR_ABOUT_CORNER) - pixVShearCorner(pixt2, pixsch, deg2rad * rightcenterangle, - L_BRING_IN_WHITE); - else - pixVShearCenter(pixt2, pixsch, deg2rad * rightcenterangle, - L_BRING_IN_WHITE); - pixFindDifferentialSquareSum(pixt2, &bsearchscore[3]); - numaAddNumber(nascore, bsearchscore[3]); - numaAddNumber(natheta, rightcenterangle); - - /* Find the maximum of the five scores and its location. - * Note that the maximum must be in the center - * three values, not in the end two. */ - maxscore = bsearchscore[1]; - maxindex = 1; - for (i = 2; i < 4; i++) { - if (bsearchscore[i] > maxscore) { - maxscore = bsearchscore[i]; - maxindex = i; - } - } - - /* Set up score array to interpolate for the next iteration */ - lefttemp = bsearchscore[maxindex - 1]; - righttemp = bsearchscore[maxindex + 1]; - bsearchscore[2] = maxscore; - bsearchscore[0] = lefttemp; - bsearchscore[4] = righttemp; - - /* Get new center angle and delta for next iteration */ - centerangle = centerangle + delta * (maxindex - 2); - delta = 0.5 * delta; - } - *pangle = centerangle; - -#if DEBUG_PRINT_SCORES - L_INFO(" Binary search score = %7.3f\n", procName, bsearchscore[2]); -#endif /* DEBUG_PRINT_SCORES */ - - if (pendscore) /* save if requested */ - *pendscore = bsearchscore[2]; - - /* Return the ratio of Max score over Min score - * as a confidence value. Don't trust if the Min score - * is too small, which can happen if the image is all black - * with only a few white pixels interspersed. In that case, - * we get a contribution from the top and bottom edges when - * vertically sheared, but this contribution becomes zero when - * the shear angle is zero. For zero shear angle, the only - * contribution will be from the white pixels. We expect that - * the signal goes as the product of the (height * width^2), - * so we compute a (hopefully) normalized minimum threshold as - * a function of these dimensions. */ - numaGetMin(nascore, &minscore, &minloc); - width = pixGetWidth(pixsch); - height = pixGetHeight(pixsch); - minthresh = MinscoreThreshFactor * width * width * height; - -#if DEBUG_THRESHOLD - L_INFO(" minthresh = %10.2f, minscore = %10.2f\n", procName, - minthresh, minscore); - L_INFO(" maxscore = %10.2f\n", procName, maxscore); -#endif /* DEBUG_THRESHOLD */ - - if (minscore > minthresh) - *pconf = maxscore / minscore; - else - *pconf = 0.0; - - /* Don't trust it if too close to the edge of the sweep - * range or if maxscore is small */ - if ((centerangle > rangeleft + 2 * sweeprange - sweepdelta) || - (centerangle < rangeleft + sweepdelta) || - (maxscore < MinValidMaxscore)) - *pconf = 0.0; - -#if DEBUG_PRINT_BINARY - lept_stderr("Binary search: angle = %7.3f, score ratio = %6.2f\n", - *pangle, *pconf); - lept_stderr(" max score = %8.0f\n", maxscore); -#endif /* DEBUG_PRINT_BINARY */ - -#if DEBUG_PLOT_SCORES - /* Plot the result -- the scores versus rotation angle -- - * using gnuplot with GPLOT_POINTS. Because the data - * points are not ordered by theta (increasing or decreasing), - * using GPLOT_LINES would be confusing! */ - {GPLOT *gplot; - gplot = gplotCreate("search_output", GPLOT_PNG, - "Binary search. Variance of difference of ON pixels vs. angle", - "angle (deg)", "score"); - gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot1"); - gplotMakeOutput(gplot); - gplotDestroy(&gplot); - } -#endif /* DEBUG_PLOT_SCORES */ - -cleanup: - pixDestroy(&pixsw); - pixDestroy(&pixsch); - pixDestroy(&pixt1); - pixDestroy(&pixt2); - numaDestroy(&nascore); - numaDestroy(&natheta); - return ret; -} - - -/*---------------------------------------------------------------------* - * Search over arbitrary range of angles in orthogonal directions * - *---------------------------------------------------------------------*/ -/* - * \brief pixFindSkewOrthogonalRange() - * - * \param[in] pixs 1 bpp - * \param[out] pangle angle required to deskew; in degrees cw - * \param[out] pconf confidence given by ratio of max/min score - * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 - * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; - * and must not exceed redsweep - * \param[in] sweeprange half the full range in each orthogonal - * direction, taken about 0, in degrees - * \param[in] sweepdelta angle increment of sweep; in degrees - * \param[in] minbsdelta min binary search increment angle; in degrees - * \param[in] confprior amount by which confidence of 90 degree rotated - * result is reduced when comparing with unrotated - * confidence value - * \return 0 if OK, 1 on error or if angle measurement not valid - * - *
- * Notes:
- *      (1) This searches for the skew angle, first in the range
- *          [-sweeprange, sweeprange], and then in
- *          [90 - sweeprange, 90 + sweeprange], with angles measured
- *          clockwise.  For exploring the full range of possibilities,
- *          suggest using sweeprange = 47.0 degrees, giving some overlap
- *          at 45 and 135 degrees.  From these results, and discounting
- *          the the second confidence by %confprior, it selects the
- *          angle for maximal differential variance.  If the angle
- *          is larger than pi/4, the angle found after 90 degree rotation
- *          is selected.
- *      (2) The larger the confidence value, the greater the probability
- *          that the proper alignment is given by the angle that maximizes
- *          variance.  It should be compared to a threshold, which depends
- *          on the application.  Values between 3.0 and 6.0 are common.
- *      (3) Allowing for both portrait and landscape searches is more
- *          difficult, because if the signal from the text lines is weak,
- *          a signal from vertical rules can be larger!
- *          The most difficult documents to deskew have some or all of:
- *            (a) Multiple columns, not aligned
- *            (b) Black lines along the vertical edges
- *            (c) Text from two pages, and at different angles
- *          Rule of thumb for resolution:
- *            (a) If the margins are clean, you can work at 75 ppi,
- *                although 100 ppi is safer.
- *            (b) If there are vertical lines in the margins, do not
- *                work below 150 ppi.  The signal from the text lines must
- *                exceed that from the margin lines.
- *      (4) Choosing the %confprior parameter depends on knowing something
- *          about the source of image.  However, we're not using
- *          real probabilities here, so its use is qualitative.
- *          If landscape and portrait are equally likely, use
- *          %confprior = 0.0.  If the likelihood of portrait (non-rotated)
- *          is 100 times higher than that of landscape, we want to reduce
- *          the chance that we rotate to landscape in a situation where
- *          the landscape signal is accidentally larger than the
- *          portrait signal.  To do this use a positive value of
- *          %confprior; say 1.5.
- * 
- */ -l_int32 -pixFindSkewOrthogonalRange(PIX *pixs, - l_float32 *pangle, - l_float32 *pconf, - l_int32 redsweep, - l_int32 redsearch, - l_float32 sweeprange, - l_float32 sweepdelta, - l_float32 minbsdelta, - l_float32 confprior) -{ -l_float32 angle1, conf1, score1, angle2, conf2, score2; -PIX *pixr; - - PROCNAME("pixFindSkewOrthogonalRange"); - - if (pangle) *pangle = 0.0; - if (pconf) *pconf = 0.0; - if (!pangle || !pconf) - return ERROR_INT("&angle and/or &conf not defined", procName, 1); - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - - pixFindSkewSweepAndSearchScorePivot(pixs, &angle1, &conf1, &score1, - redsweep, redsearch, 0.0, - sweeprange, sweepdelta, minbsdelta, - L_SHEAR_ABOUT_CORNER); - pixr = pixRotateOrth(pixs, 1); - pixFindSkewSweepAndSearchScorePivot(pixr, &angle2, &conf2, &score2, - redsweep, redsearch, 0.0, - sweeprange, sweepdelta, minbsdelta, - L_SHEAR_ABOUT_CORNER); - pixDestroy(&pixr); - - if (conf1 > conf2 - confprior) { - *pangle = angle1; - *pconf = conf1; - } else { - *pangle = -90.0 + angle2; - *pconf = conf2; - } - -#if DEBUG_PRINT_ORTH - lept_stderr(" About 0: angle1 = %7.3f, conf1 = %7.3f, score1 = %f\n", - angle1, conf1, score1); - lept_stderr(" About 90: angle2 = %7.3f, conf2 = %7.3f, score2 = %f\n", - angle2, conf2, score2); - lept_stderr(" Final: angle = %7.3f, conf = %7.3f\n", *pangle, *pconf); -#endif /* DEBUG_PRINT_ORTH */ - - return 0; -} - - - -/*----------------------------------------------------------------* - * Differential square sum function * - *----------------------------------------------------------------*/ -/*! - * \brief pixFindDifferentialSquareSum() - * - * \param[in] pixs - * \param[out] psum result - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) At the top and bottom, we skip:
- *           ~ at least one scanline
- *           ~ not more than 10% of the image height
- *           ~ not more than 5% of the image width
- * 
- */ -l_ok -pixFindDifferentialSquareSum(PIX *pixs, - l_float32 *psum) -{ -l_int32 i, n; -l_int32 w, h, skiph, skip, nskip; -l_float32 val1, val2, diff, sum; -NUMA *na; - - PROCNAME("pixFindDifferentialSquareSum"); - - if (!psum) - return ERROR_INT("&sum not defined", procName, 1); - *psum = 0.0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - /* Generate a number array consisting of the sum - * of pixels in each row of pixs */ - if ((na = pixCountPixelsByRow(pixs, NULL)) == NULL) - return ERROR_INT("na not made", procName, 1); - - /* Compute the number of rows at top and bottom to omit. - * We omit these to avoid getting a spurious signal from - * the top and bottom of a (nearly) all black image. */ - w = pixGetWidth(pixs); - h = pixGetHeight(pixs); - skiph = (l_int32)(0.05 * w); /* skip for max shear of 0.025 radians */ - skip = L_MIN(h / 10, skiph); /* don't remove more than 10% of image */ - nskip = L_MAX(skip / 2, 1); /* at top & bot; skip at least one line */ - - /* Sum the squares of differential row sums, on the - * allowed rows. Note that nskip must be >= 1. */ - n = numaGetCount(na); - sum = 0.0; - for (i = nskip; i < n - nskip; i++) { - numaGetFValue(na, i - 1, &val1); - numaGetFValue(na, i, &val2); - diff = val2 - val1; - sum += diff * diff; - } - numaDestroy(&na); - *psum = sum; - return 0; -} - - -/*----------------------------------------------------------------* - * Normalized square sum * - *----------------------------------------------------------------*/ -/*! - * \brief pixFindNormalizedSquareSum() - * - * \param[in] pixs - * \param[out] phratio [optional] ratio of normalized horiz square sum - * to result if the pixel distribution were uniform - * \param[out] pvratio [optional] ratio of normalized vert square sum - * to result if the pixel distribution were uniform - * \param[out] pfract [optional] ratio of fg pixels to total pixels - * \return 0 if OK, 1 on error or if there are no fg pixels - * - *
- * Notes:
- *      (1) Let the image have h scanlines and N fg pixels.
- *          If the pixels were uniformly distributed on scanlines,
- *          the sum of squares of fg pixels on each scanline would be
- *          h * (N / h)^2.  However, if the pixels are not uniformly
- *          distributed (e.g., for text), the sum of squares of fg
- *          pixels will be larger.  We return in hratio and vratio the
- *          ratio of these two values.
- *      (2) If there are no fg pixels, hratio and vratio are returned as 0.0.
- * 
- */ -l_ok -pixFindNormalizedSquareSum(PIX *pixs, - l_float32 *phratio, - l_float32 *pvratio, - l_float32 *pfract) -{ -l_int32 i, w, h, empty; -l_float32 sum, sumsq, uniform, val; -NUMA *na; -PIX *pixt; - - PROCNAME("pixFindNormalizedSquareSum"); - - if (phratio) *phratio = 0.0; - if (pvratio) *pvratio = 0.0; - if (pfract) *pfract = 0.0; - if (!phratio && !pvratio) - return ERROR_INT("nothing to do", procName, 1); - if (!pixs || pixGetDepth(pixs) != 1) - return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); - pixGetDimensions(pixs, &w, &h, NULL); - - empty = 0; - if (phratio) { - na = pixCountPixelsByRow(pixs, NULL); - numaGetSum(na, &sum); /* fg pixels */ - if (pfract) *pfract = sum / (l_float32)(w * h); - if (sum != 0.0) { - uniform = sum * sum / h; /* h*(sum / h)^2 */ - sumsq = 0.0; - for (i = 0; i < h; i++) { - numaGetFValue(na, i, &val); - sumsq += val * val; - } - *phratio = sumsq / uniform; - } else { - empty = 1; - } - numaDestroy(&na); - } - - if (pvratio) { - if (empty == 1) return 1; - pixt = pixRotateOrth(pixs, 1); - na = pixCountPixelsByRow(pixt, NULL); - numaGetSum(na, &sum); - if (pfract) *pfract = sum / (l_float32)(w * h); - if (sum != 0.0) { - uniform = sum * sum / w; - sumsq = 0.0; - for (i = 0; i < w; i++) { - numaGetFValue(na, i, &val); - sumsq += val * val; - } - *pvratio = sumsq / uniform; - } else { - empty = 1; - } - pixDestroy(&pixt); - numaDestroy(&na); - } - - return empty; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/spixio.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/spixio.c deleted file mode 100644 index a21b4579..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/spixio.c +++ /dev/null @@ -1,497 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file spixio.c - *
- *
- *    This does fast serialization of a pix in memory to file,
- *    copying the raw data for maximum speed.  The underlying
- *    function serializes it to memory, and it is wrapped to be
- *    callable from standard pixRead() and pixWrite() file functions.
- *
- *      Reading spix from file
- *           PIX        *pixReadStreamSpix()
- *           l_int32     readHeaderSpix()
- *           l_int32     freadHeaderSpix()
- *           l_int32     sreadHeaderSpix()
- *
- *      Writing spix to file
- *           l_int32     pixWriteStreamSpix()
- *
- *      Low-level serialization of pix to/from memory (uncompressed)
- *           PIX        *pixReadMemSpix()
- *           l_int32     pixWriteMemSpix()
- *           l_int32     pixSerializeToMemory()
- *           PIX        *pixDeserializeFromMemory()
- *
- *    Note: these functions have not been extensively tested for fuzzing
- *    (bad input data that can result in, e.g., memory faults).
- *    The spix serialization format is only defined here, in leptonica.
- *    The image data is uncompressed and the serialization is not intended
- *    to be a secure file format from untrusted sources.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - - /* Image dimension limits */ -static const l_int32 MaxAllowedWidth = 1000000; -static const l_int32 MaxAllowedHeight = 1000000; -static const l_int64 MaxAllowedArea = 400000000LL; - -#ifndef NO_CONSOLE_IO -#define DEBUG_SERIALIZE 0 -#endif /* ~NO_CONSOLE_IO */ - - -/*-----------------------------------------------------------------------* - * Reading spix from file * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixReadStreamSpix() - * - * \param[in] fp file stream - * \return pix, or NULL on error. - * - *
- * Notes:
- *      (1) If called from pixReadStream(), the stream is positioned
- *          at the beginning of the file.
- * 
- */ -PIX * -pixReadStreamSpix(FILE *fp) -{ -size_t nbytes; -l_uint8 *data; -PIX *pix; - - PROCNAME("pixReadStreamSpix"); - - if (!fp) - return (PIX *)ERROR_PTR("stream not defined", procName, NULL); - - if ((data = l_binaryReadStream(fp, &nbytes)) == NULL) - return (PIX *)ERROR_PTR("data not read", procName, NULL); - pix = pixReadMemSpix(data, nbytes); - LEPT_FREE(data); - if (!pix) - return (PIX *)ERROR_PTR("pix not made", procName, NULL); - return pix; -} - - -/*! - * \brief readHeaderSpix() - * - * \param[in] filename - * \param[out] pwidth width - * \param[out] pheight height - * \param[out] pbps bits/sample - * \param[out] pspp samples/pixel - * \param[out] piscmap [optional] input NULL to ignore - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If there is a colormap, iscmap is returned as 1; else 0.
- * 
- */ -l_ok -readHeaderSpix(const char *filename, - l_int32 *pwidth, - l_int32 *pheight, - l_int32 *pbps, - l_int32 *pspp, - l_int32 *piscmap) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("readHeaderSpix"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!pwidth || !pheight || !pbps || !pspp) - return ERROR_INT("input ptr(s) not defined", procName, 1); - if ((fp = fopenReadStream(filename)) == NULL) - return ERROR_INT("image file not found", procName, 1); - ret = freadHeaderSpix(fp, pwidth, pheight, pbps, pspp, piscmap); - fclose(fp); - return ret; -} - - -/*! - * \brief freadHeaderSpix() - * - * \param[in] fp file stream - * \param[out] pwidth width - * \param[out] pheight height - * \param[out] pbps bits/sample - * \param[out] pspp samples/pixel - * \param[out] piscmap [optional] input NULL to ignore - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If there is a colormap, iscmap is returned as 1; else 0.
- * 
- */ -l_ok -freadHeaderSpix(FILE *fp, - l_int32 *pwidth, - l_int32 *pheight, - l_int32 *pbps, - l_int32 *pspp, - l_int32 *piscmap) -{ -l_int32 nbytes, ret; -l_uint32 data[6]; - - PROCNAME("freadHeaderSpix"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!pwidth || !pheight || !pbps || !pspp) - return ERROR_INT("input ptr(s) not defined", procName, 1); - - nbytes = fnbytesInFile(fp); - if (nbytes < 32) - return ERROR_INT("file too small to be spix", procName, 1); - if (fread(data, 4, 6, fp) != 6) - return ERROR_INT("error reading data", procName, 1); - ret = sreadHeaderSpix(data, pwidth, pheight, pbps, pspp, piscmap); - return ret; -} - - -/*! - * \brief sreadHeaderSpix() - * - * \param[in] data - * \param[out] pwidth width - * \param[out] pheight height - * \param[out] pbps bits/sample - * \param[out] pspp samples/pixel - * \param[out] piscmap [optional] input NULL to ignore - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If there is a colormap, iscmap is returned as 1; else 0.
- * 
- */ -l_ok -sreadHeaderSpix(const l_uint32 *data, - l_int32 *pwidth, - l_int32 *pheight, - l_int32 *pbps, - l_int32 *pspp, - l_int32 *piscmap) -{ -char *id; -l_int32 d, ncolors; - - PROCNAME("sreadHeaderSpix"); - - if (!data) - return ERROR_INT("data not defined", procName, 1); - if (!pwidth || !pheight || !pbps || !pspp) - return ERROR_INT("input ptr(s) not defined", procName, 1); - *pwidth = *pheight = *pbps = *pspp = 0; - if (piscmap) - *piscmap = 0; - - /* Check file id */ - id = (char *)data; - if (id[0] != 's' || id[1] != 'p' || id[2] != 'i' || id[3] != 'x') - return ERROR_INT("not a valid spix file", procName, 1); - - *pwidth = data[1]; - *pheight = data[2]; - d = data[3]; - if (d <= 16) { - *pbps = d; - *pspp = 1; - } else { - *pbps = 8; - *pspp = d / 8; /* if the pix is 32 bpp, call it 4 samples */ - } - ncolors = data[5]; - if (piscmap) - *piscmap = (ncolors == 0) ? 0 : 1; - - return 0; -} - - -/*-----------------------------------------------------------------------* - * Writing spix to file * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixWriteStreamSpix() - * - * \param[in] fp file stream - * \param[in] pix - * \return 0 if OK; 1 on error - */ -l_ok -pixWriteStreamSpix(FILE *fp, - PIX *pix) -{ -l_uint8 *data; -size_t size; - - PROCNAME("pixWriteStreamSpix"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - if (pixWriteMemSpix(&data, &size, pix)) - return ERROR_INT("failure to write pix to memory", procName, 1); - fwrite(data, 1, size, fp); - LEPT_FREE(data); - return 0; -} - - -/*-----------------------------------------------------------------------* - * Low-level serialization of pix to/from memory (uncompressed) * - *-----------------------------------------------------------------------*/ -/*! - * \brief pixReadMemSpix() - * - * \param[in] data const; uncompressed - * \param[in] size bytes of data - * \return pix, or NULL on error - */ -PIX * -pixReadMemSpix(const l_uint8 *data, - size_t size) -{ - return pixDeserializeFromMemory((l_uint32 *)data, size); -} - - -/*! - * \brief pixWriteMemSpix() - * - * \param[out] pdata data of serialized, uncompressed pix - * \param[out] psize size of returned data - * \param[in] pix all depths; colormap OK - * \return 0 if OK, 1 on error - */ -l_ok -pixWriteMemSpix(l_uint8 **pdata, - size_t *psize, - PIX *pix) -{ - return pixSerializeToMemory(pix, (l_uint32 **)pdata, psize); -} - - -/*! - * \brief pixSerializeToMemory() - * - * \param[in] pixs all depths, colormap OK - * \param[out] pdata serialized data in memory - * \param[out] pnbytes number of bytes in data string - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This does a fast serialization of the principal elements
- *          of the pix, as follows:
- *            "spix"    (4 bytes) -- ID for file type
- *            w         (4 bytes)
- *            h         (4 bytes)
- *            d         (4 bytes)
- *            wpl       (4 bytes)
- *            ncolors   (4 bytes) -- in colormap; 0 if there is no colormap
- *            cdata     (4 * ncolors)  -- size of serialized colormap array
- *            rdatasize (4 bytes) -- size of serialized raster data
- *                                   = 4 * wpl * h
- *            rdata     (rdatasize)
- * 
- */ -l_ok -pixSerializeToMemory(PIX *pixs, - l_uint32 **pdata, - size_t *pnbytes) -{ -char *id; -l_int32 w, h, d, wpl, rdatasize, ncolors, nbytes, index; -l_uint8 *cdata; /* data in colormap array (4 bytes/color table entry) */ -l_uint32 *data; -l_uint32 *rdata; /* data in pix raster */ -PIXCMAP *cmap; - - PROCNAME("pixSerializeToMemory"); - - if (!pdata || !pnbytes) - return ERROR_INT("&data and &nbytes not both defined", procName, 1); - *pdata = NULL; - *pnbytes = 0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - pixGetDimensions(pixs, &w, &h, &d); - wpl = pixGetWpl(pixs); - rdata = pixGetData(pixs); - rdatasize = 4 * wpl * h; - ncolors = 0; - cdata = NULL; - if ((cmap = pixGetColormap(pixs)) != NULL) - pixcmapSerializeToMemory(cmap, 4, &ncolors, &cdata); - - nbytes = 24 + 4 * ncolors + 4 + rdatasize; - if ((data = (l_uint32 *)LEPT_CALLOC(nbytes / 4, sizeof(l_uint32))) - == NULL) { - LEPT_FREE(cdata); - return ERROR_INT("data not made", procName, 1); - } - *pdata = data; - *pnbytes = nbytes; - id = (char *)data; - id[0] = 's'; - id[1] = 'p'; - id[2] = 'i'; - id[3] = 'x'; - data[1] = w; - data[2] = h; - data[3] = d; - data[4] = wpl; - data[5] = ncolors; - if (ncolors > 0) - memcpy(data + 6, cdata, 4 * ncolors); - index = 6 + ncolors; - data[index] = rdatasize; - memcpy(data + index + 1, rdata, rdatasize); - -#if DEBUG_SERIALIZE - lept_stderr("Serialize: " - "raster size = %d, ncolors in cmap = %d, total bytes = %d\n", - rdatasize, ncolors, nbytes); -#endif /* DEBUG_SERIALIZE */ - - LEPT_FREE(cdata); - return 0; -} - - -/*! - * \brief pixDeserializeFromMemory() - * - * \param[in] data serialized data in memory - * \param[in] nbytes number of bytes in data string - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) See pixSerializeToMemory() for the binary format.
- *      (2) Note the image size limits.
- * 
- */ -PIX * -pixDeserializeFromMemory(const l_uint32 *data, - size_t nbytes) -{ -char *id; -l_int32 w, h, d, pixdata_size, memdata_size, imdata_size, ncolors; -l_uint32 *imdata; /* data in pix raster */ -PIX *pix1, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixDeserializeFromMemory"); - - if (!data) - return (PIX *)ERROR_PTR("data not defined", procName, NULL); - if (nbytes < 28 || nbytes > ((1LL << 31) - 1)) { - L_ERROR("invalid nbytes = %zu\n", procName, nbytes); - return NULL; - } - - id = (char *)data; - if (id[0] != 's' || id[1] != 'p' || id[2] != 'i' || id[3] != 'x') - return (PIX *)ERROR_PTR("invalid id string", procName, NULL); - w = data[1]; - h = data[2]; - d = data[3]; - ncolors = data[5]; - - /* Sanity checks on the amount of image data */ - if (w < 1 || w > MaxAllowedWidth) - return (PIX *)ERROR_PTR("invalid width", procName, NULL); - if (h < 1 || h > MaxAllowedHeight) - return (PIX *)ERROR_PTR("invalid height", procName, NULL); - if (1LL * w * h > MaxAllowedArea) - return (PIX *)ERROR_PTR("area too large", procName, NULL); - if (ncolors < 0 || ncolors > 256 || ncolors + 6 >= nbytes/sizeof(l_int32)) - return (PIX *)ERROR_PTR("invalid ncolors", procName, NULL); - if ((pix1 = pixCreateHeader(w, h, d)) == NULL) /* just make the header */ - return (PIX *)ERROR_PTR("failed to make header", procName, NULL); - pixdata_size = 4 * h * pixGetWpl(pix1); - memdata_size = nbytes - 24 - 4 * ncolors - 4; - imdata_size = data[6 + ncolors]; - pixDestroy(&pix1); - if (pixdata_size != memdata_size || pixdata_size != imdata_size) { - L_ERROR("pixdata_size = %d, memdata_size = %d, imdata_size = %d " - "not all equal!\n", procName, pixdata_size, memdata_size, - imdata_size); - return NULL; - } - - if ((pixd = pixCreate(w, h, d)) == NULL) - return (PIX *)ERROR_PTR("pix not made", procName, NULL); - if (ncolors > 0) { - cmap = pixcmapDeserializeFromMemory((l_uint8 *)(&data[6]), 4, ncolors); - if (!cmap) { - pixDestroy(&pixd); - return (PIX *)ERROR_PTR("cmap not made", procName, NULL); - } - pixSetColormap(pixd, cmap); - } - - imdata = pixGetData(pixd); - memcpy(imdata, data + 7 + ncolors, imdata_size); - -#if DEBUG_SERIALIZE - lept_stderr("Deserialize: " - "raster size = %d, ncolors in cmap = %d, total bytes = %zu\n", - imdata_size, ncolors, nbytes); -#endif /* DEBUG_SERIALIZE */ - - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/stack.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/stack.c deleted file mode 100644 index 4420edc2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/stack.c +++ /dev/null @@ -1,291 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file stack.c - *
- *
- *      Generic stack
- *
- *      The lstack is an array of void * ptrs, onto which
- *      objects can be stored.  At any time, the number of
- *      stored objects is lstack->n.  The object at the bottom
- *      of the lstack is at array[0]; the object at the top of
- *      the lstack is at array[n-1].  New objects are added
- *      to the top of the lstack; i.e., the first available
- *      location, which is at array[n].  The lstack is expanded
- *      by doubling, when needed.  Objects are removed
- *      from the top of the lstack.  When an attempt is made
- *      to remove an object from an empty lstack, the result is null.
- *
- *      Create/Destroy
- *           L_STACK        *lstackCreate()
- *           void            lstackDestroy()
- *
- *      Accessors
- *           l_int32         lstackAdd()
- *           void           *lstackRemove()
- *           static l_int32  lstackExtendArray()
- *           l_int32         lstackGetCount()
- *
- *      Text description
- *           l_int32         lstackPrint()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - - /* Bounds on initial array size */ -static const l_uint32 MaxPtrArraySize = 100000; -static const l_int32 InitialPtrArraySize = 20; /*!< n'importe quoi */ - - /* Static function */ -static l_int32 lstackExtendArray(L_STACK *lstack); - -/*---------------------------------------------------------------------* - * Create/Destroy * - *---------------------------------------------------------------------*/ -/*! - * \brief lstackCreate() - * - * \param[in] n initial ptr array size; use 0 for default - * \return lstack, or NULL on error - */ -L_STACK * -lstackCreate(l_int32 n) -{ -L_STACK *lstack; - - PROCNAME("lstackCreate"); - - if (n <= 0 || n > MaxPtrArraySize) - n = InitialPtrArraySize; - - lstack = (L_STACK *)LEPT_CALLOC(1, sizeof(L_STACK)); - lstack->array = (void **)LEPT_CALLOC(n, sizeof(void *)); - if (!lstack->array) { - lstackDestroy(&lstack, FALSE); - return (L_STACK *)ERROR_PTR("lstack array not made", procName, NULL); - } - - lstack->nalloc = n; - lstack->n = 0; - return lstack; -} - - -/*! - * \brief lstackDestroy() - * - * \param[in,out] plstack will be set to null before returning - * \param[in] freeflag TRUE to free each remaining struct in the array - * \return void - * - *
- * Notes:
- *      (1) If %freeflag is TRUE, frees each struct in the array.
- *      (2) If %freeflag is FALSE but there are elements on the array,
- *          gives a warning and destroys the array.  This will
- *          cause a memory leak of all the items that were on the lstack.
- *          So if the items require their own destroy function, they
- *          must be destroyed before the lstack.
- *      (3) To destroy the lstack, we destroy the ptr array, then
- *          the lstack, and then null the contents of the input ptr.
- * 
- */ -void -lstackDestroy(L_STACK **plstack, - l_int32 freeflag) -{ -void *item; -L_STACK *lstack; - - PROCNAME("lstackDestroy"); - - if (plstack == NULL) { - L_WARNING("ptr address is NULL\n", procName); - return; - } - if ((lstack = *plstack) == NULL) - return; - - if (freeflag) { - while(lstack->n > 0) { - item = lstackRemove(lstack); - LEPT_FREE(item); - } - } else if (lstack->n > 0) { - L_WARNING("memory leak of %d items in lstack\n", procName, lstack->n); - } - - if (lstack->auxstack) - lstackDestroy(&lstack->auxstack, freeflag); - - if (lstack->array) - LEPT_FREE(lstack->array); - LEPT_FREE(lstack); - *plstack = NULL; -} - - - -/*---------------------------------------------------------------------* - * Accessors * - *---------------------------------------------------------------------*/ -/*! - * \brief lstackAdd() - * - * \param[in] lstack - * \param[in] item to be added to the lstack - * \return 0 if OK; 1 on error. - */ -l_ok -lstackAdd(L_STACK *lstack, - void *item) -{ - PROCNAME("lstackAdd"); - - if (!lstack) - return ERROR_INT("lstack not defined", procName, 1); - if (!item) - return ERROR_INT("item not defined", procName, 1); - - /* Do we need to extend the array? */ - if (lstack->n >= lstack->nalloc) - lstackExtendArray(lstack); - - /* Store the new pointer */ - lstack->array[lstack->n] = (void *)item; - lstack->n++; - - return 0; -} - - -/*! - * \brief lstackRemove() - * - * \param[in] lstack - * \return ptr to item popped from the top of the lstack, - * or NULL if the lstack is empty or on error - */ -void * -lstackRemove(L_STACK *lstack) -{ -void *item; - - PROCNAME("lstackRemove"); - - if (!lstack) - return ERROR_PTR("lstack not defined", procName, NULL); - - if (lstack->n == 0) - return NULL; - - lstack->n--; - item = lstack->array[lstack->n]; - - return item; -} - - -/*! - * \brief lstackExtendArray() - * - * \param[in] lstack - * \return 0 if OK; 1 on error - */ -static l_int32 -lstackExtendArray(L_STACK *lstack) -{ - PROCNAME("lstackExtendArray"); - - if (!lstack) - return ERROR_INT("lstack not defined", procName, 1); - - if ((lstack->array = (void **)reallocNew((void **)&lstack->array, - sizeof(void *) * lstack->nalloc, - 2 * sizeof(void *) * lstack->nalloc)) == NULL) - return ERROR_INT("new lstack array not defined", procName, 1); - - lstack->nalloc = 2 * lstack->nalloc; - return 0; -} - - -/*! - * \brief lstackGetCount() - * - * \param[in] lstack - * \return count, or 0 on error - */ -l_int32 -lstackGetCount(L_STACK *lstack) -{ - PROCNAME("lstackGetCount"); - - if (!lstack) - return ERROR_INT("lstack not defined", procName, 1); - - return lstack->n; -} - - - -/*---------------------------------------------------------------------* - * Debug output * - *---------------------------------------------------------------------*/ -/*! - * \brief lstackPrint() - * - * \param[in] fp file stream - * \param[in] lstack - * \return 0 if OK; 1 on error - */ -l_ok -lstackPrint(FILE *fp, - L_STACK *lstack) -{ -l_int32 i; - - PROCNAME("lstackPrint"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!lstack) - return ERROR_INT("lstack not defined", procName, 1); - - fprintf(fp, "\n Stack: nalloc = %d, n = %d, array = %p\n", - lstack->nalloc, lstack->n, lstack->array); - for (i = 0; i < lstack->n; i++) - fprintf(fp, "array[%d] = %p\n", i, lstack->array[i]); - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/stack.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/stack.h deleted file mode 100644 index 4fa61141..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/stack.h +++ /dev/null @@ -1,70 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_STACK_H -#define LEPTONICA_STACK_H - -/*! - * \file stack.h - * - *
- *       Expandable pointer stack for arbitrary void* data.
- *
- *       The L_Stack is an array of void * ptrs, onto which arbitrary
- *       objects can be stored.  At any time, the number of
- *       stored objects is stack->n.  The object at the bottom
- *       of the stack is at array[0]; the object at the top of
- *       the stack is at array[n-1].  New objects are added
- *       to the top of the stack, at the first available location,
- *       which is array[n].  Objects are removed from the top of the
- *       stack.  When an attempt is made to remove an object from an
- *       empty stack, the result is null.   When the stack becomes
- *       filled, so that n = nalloc, the size is doubled.
- *
- *       The auxiliary stack can be used to store and remove
- *       objects for re-use.  It must be created by a separate
- *       call to pstackCreate().  [Just imagine the chaos if
- *       pstackCreate() created the auxiliary stack!]
- *       pstackDestroy() checks for the auxiliary stack and removes it.
- * 
- */ - - - /*! Expandable pointer stack for arbitrary void* data. - * Note that array[n] is the first null ptr in the array - */ -struct L_Stack -{ - l_int32 nalloc; /*!< size of ptr array */ - l_int32 n; /*!< number of stored elements */ - void **array; /*!< ptr array */ - struct L_Stack *auxstack; /*!< auxiliary stack */ -}; -typedef struct L_Stack L_STACK; - - -#endif /* LEPTONICA_STACK_H */ - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/stringcode.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/stringcode.c deleted file mode 100644 index e6798238..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/stringcode.c +++ /dev/null @@ -1,807 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file stringcode.c - *
- *
- *   Generation of code for storing and extracting serializable
- *   leptonica objects (such as pixa, recog, ...).
- *
- *   The input is a set of files with serialized data.
- *   The output is two files, that must be compiled and linked:
- *     ~ autogen.*.c: code for base64 unencoding the strings and
- *                    deserializing the result.
- *     ~ autogen.*.h: function prototypes and base64 encoded strings
- *                    of the input data
- *
- *   This should work for any data structures in leptonica that have
- *   *Write() and *Read() serialization functions.  An array of 20
- *   of these, including the Pix, is given below.  (The Pix is a special
- *   case, because it is serialized by standardized compression
- *   techniques, instead of a file format determined by leptonica.)
- *
- *   Each time the generator function is invoked, three sets of strings are
- *   produced, which are written into their respective string arrays:
- *     ~ string of serialized, gzipped and base 64 encoded data
- *     ~ case string for base64 decoding, gunzipping and deserialization,
- *       to return the data struct in memory
- *     ~ description string for selecting which struct to return
- *   To create the two output files, a finalize function is invoked.
- *
- *   There are two ways to do this, both shown in prog/autogentest1.c.
- *     ~ Explicitly call strcodeGenerate() for each file with the
- *       serialized data structure, followed by strcodeFinalize().
- *     ~ Put the filenames of the serialized data structures in a file,
- *       and call strcodeCreateFromFile().
- *
- *   The generated code in autogen.X.c and autogen.X.h (where X is an
- *   integer supplied to strcodeCreate()) is then compiled, and the
- *   original data can be regenerated using the function l_autodecode_X().
- *   A test example is found in the two prog files:
- *       prog/autogentest1.c  -- generates autogen.137.c, autogen.137.h
- *       prog/autogentest2.c  -- uses autogen.137.c, autogen.137.h
- *   In general, the generator (e.g., autogentest1) would be compiled and
- *   run before compiling and running the application (e.g., autogentest2).
- *
- *       L_STRCODE       *strcodeCreate()
- *       static void      strcodeDestroy()    (called as part of finalize)
- *       void             strcodeCreateFromFile()
- *       l_int32          strcodeGenerate()
- *       void             strcodeFinalize()
- *       l_int32          l_getStructStrFromFile()   (useful externally)
- *
- *   Static helpers
- *       static l_int32   l_getIndexFromType()
- *       static l_int32   l_getIndexFromStructname()
- *       static l_int32   l_getIndexFromFile()
- *       static char     *l_genDataString()
- *       static char     *l_genCaseString()
- *       static char     *l_genDescrString()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" -#include "stringcode.h" - -#define TEMPLATE1 "stringtemplate1.txt" /* for assembling autogen.*.c */ -#define TEMPLATE2 "stringtemplate2.txt" /* for assembling autogen.*.h */ - - /*! Associations between names and functions */ -struct L_GenAssoc -{ - l_int32 index; - char type[16]; /* e.g., "PIXA" */ - char structname[16]; /* e.g., "Pixa" */ - char reader[16]; /* e.g., "pixaRead" */ - char memreader[20]; /* e.g., "pixaReadMem" */ -}; - - /*! Number of serializable data types */ -static const l_int32 l_ntypes = 19; - /*! Serializable data types */ -static const struct L_GenAssoc l_assoc[] = { - {0, "INVALID", "invalid", "invalid", "invalid" }, - {1, "BOXA", "Boxa", "boxaRead", "boxaReadMem" }, - {2, "BOXAA", "Boxaa", "boxaaRead", "boxaaReadMem" }, - {3, "L_DEWARP", "Dewarp", "dewarpRead", "dewarpReadMem" }, - {4, "L_DEWARPA", "Dewarpa", "dewarpaRead", "dewarpaReadMem" }, - {5, "L_DNA", "L_Dna", "l_dnaRead", "l_dnaReadMem" }, - {6, "L_DNAA", "L_Dnaa", "l_dnaaRead", "l_dnaaReadMem" }, - {7, "DPIX", "DPix", "dpixRead", "dpixReadMem" }, - {8, "FPIX", "FPix", "fpixRead", "fpixReadMem" }, - {9, "NUMA", "Numa", "numaRead", "numaReadMem" }, - {10, "NUMAA", "Numaa", "numaaRead", "numaaReadMem" }, - {11, "PIX", "Pix", "pixRead", "pixReadMem" }, - {12, "PIXA", "Pixa", "pixaRead", "pixaReadMem" }, - {13, "PIXAA", "Pixaa", "pixaaRead", "pixaaReadMem" }, - {14, "PIXACOMP", "Pixacomp", "pixacompRead", "pixacompReadMem" }, - {15, "PIXCMAP", "Pixcmap", "pixcmapRead", "pixcmapReadMem" }, - {16, "PTA", "Pta", "ptaRead", "ptaReadMem" }, - {17, "PTAA", "Ptaa", "ptaaRead", "ptaaReadMem" }, - {18, "RECOG", "Recog", "recogRead", "recogReadMem" }, - {19, "SARRAY", "Sarray", "sarrayRead", "sarrayReadMem" } -}; - -static l_int32 l_getIndexFromType(const char *type, l_int32 *pindex); -static l_int32 l_getIndexFromStructname(const char *sn, l_int32 *pindex); -static l_int32 l_getIndexFromFile(const char *file, l_int32 *pindex); -static char *l_genDataString(const char *filein, l_int32 ifunc); -static char *l_genCaseString(l_int32 ifunc, l_int32 itype); -static char *l_genDescrString(const char *filein, l_int32 ifunc, l_int32 itype); - -/*---------------------------------------------------------------------*/ -/* Stringcode functions */ -/*---------------------------------------------------------------------*/ -/*! - * \brief strcodeCreate() - * - * \param[in] fileno integer that labels the two output files - * \return initialized L_StrCode, or NULL on error - * - *
- * Notes:
- *      (1) This struct exists to build two files containing code for
- *          any number of data objects.  The two files are named
- *             autogen.[fileno].c
- *             autogen.[fileno].h
- * 
- */ -L_STRCODE * -strcodeCreate(l_int32 fileno) -{ -L_STRCODE *strcode; - - PROCNAME("strcodeCreate"); - - lept_mkdir("lept/auto"); - - if ((strcode = (L_STRCODE *)LEPT_CALLOC(1, sizeof(L_STRCODE))) == NULL) - return (L_STRCODE *)ERROR_PTR("strcode not made", procName, NULL); - - strcode->fileno = fileno; - strcode->function = sarrayCreate(0); - strcode->data = sarrayCreate(0); - strcode->descr = sarrayCreate(0); - return strcode; -} - - -/*! - * \brief strcodeDestroy() - * - * \param[out] pstrcode will be set to null after destroying the sarrays - * \return void - */ -static void -strcodeDestroy(L_STRCODE **pstrcode) -{ -L_STRCODE *strcode; - - PROCNAME("strcodeDestroy"); - - if (pstrcode == NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - - if ((strcode = *pstrcode) == NULL) - return; - - sarrayDestroy(&strcode->function); - sarrayDestroy(&strcode->data); - sarrayDestroy(&strcode->descr); - LEPT_FREE(strcode); - *pstrcode = NULL; - return; -} - - -/*! - * \brief strcodeCreateFromFile() - * - * \param[in] filein containing filenames of serialized data - * \param[in] fileno integer that labels the two output files - * \param[in] outdir [optional] if null, files are made in /tmp/lept/auto - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The %filein has one filename on each line.
- *          Comment lines begin with "#".
- *      (2) The output is 2 files:
- *             autogen.[fileno].c
- *             autogen.[fileno].h
- * 
- */ -l_ok -strcodeCreateFromFile(const char *filein, - l_int32 fileno, - const char *outdir) -{ -char *fname; -const char *type; -l_uint8 *data; -size_t nbytes; -l_int32 i, n, index; -SARRAY *sa; -L_STRCODE *strcode; - - PROCNAME("strcodeCreateFromFile"); - - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - - if ((data = l_binaryRead(filein, &nbytes)) == NULL) - return ERROR_INT("data not read from file", procName, 1); - sa = sarrayCreateLinesFromString((char *)data, 0); - LEPT_FREE(data); - if (!sa) - return ERROR_INT("sa not made", procName, 1); - if ((n = sarrayGetCount(sa)) == 0) { - sarrayDestroy(&sa); - return ERROR_INT("no filenames in the file", procName, 1); - } - - strcode = strcodeCreate(fileno); - - for (i = 0; i < n; i++) { - fname = sarrayGetString(sa, i, L_NOCOPY); - if (fname[0] == '#') continue; - if (l_getIndexFromFile(fname, &index)) { - L_ERROR("File %s has no recognizable type\n", procName, fname); - } else { - type = l_assoc[index].type; - L_INFO("File %s is type %s\n", procName, fname, type); - strcodeGenerate(strcode, fname, type); - } - } - strcodeFinalize(&strcode, outdir); - sarrayDestroy(&sa); - return 0; -} - - -/*! - * \brief strcodeGenerate() - * - * \param[in] strcode for accumulating data - * \param[in] filein input file with serialized data - * \param[in] type of data; use the typedef string - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) The generated function name is
- *            l_autodecode_[fileno]()
- *          where [fileno] is the index label for the pair of output files.
- *      (2) To deserialize this data, the function is called with the
- *          argument 'ifunc', which increments each time strcodeGenerate()
- *          is called.
- * 
- */ -l_ok -strcodeGenerate(L_STRCODE *strcode, - const char *filein, - const char *type) -{ -char *strdata, *strfunc, *strdescr; -l_int32 itype; - - PROCNAME("strcodeGenerate"); - - if (!strcode) - return ERROR_INT("strcode not defined", procName, 1); - if (!filein) - return ERROR_INT("filein not defined", procName, 1); - if (!type) - return ERROR_INT("type not defined", procName, 1); - - /* Get the index corresponding to type and validate */ - if (l_getIndexFromType(type, &itype) == 1) - return ERROR_INT("data type unknown", procName, 1); - - /* Generate the encoded data string */ - if ((strdata = l_genDataString(filein, strcode->ifunc)) == NULL) - return ERROR_INT("strdata not made", procName, 1); - sarrayAddString(strcode->data, strdata, L_INSERT); - - /* Generate the case data for the decoding function */ - strfunc = l_genCaseString(strcode->ifunc, itype); - sarrayAddString(strcode->function, strfunc, L_INSERT); - - /* Generate row of table for function type selection */ - strdescr = l_genDescrString(filein, strcode->ifunc, itype); - sarrayAddString(strcode->descr, strdescr, L_INSERT); - - strcode->n++; - strcode->ifunc++; - return 0; -} - - -/*! - * \brief strcodeFinalize() - * - * \param[in,out] pstrcode destroys and sets to null after .c and .h files - * have been generated - * \param[in] outdir [optional] if NULL, make files in /tmp/lept/auto - * \return void - */ -l_int32 -strcodeFinalize(L_STRCODE **pstrcode, - const char *outdir) -{ -char buf[256]; -char *filestr, *casestr, *descr, *datastr, *realoutdir; -l_int32 actstart, end, newstart, fileno, nbytes; -size_t size; -L_STRCODE *strcode; -SARRAY *sa1, *sa2, *sa3; - - PROCNAME("strcodeFinalize"); - - lept_mkdir("lept/auto"); - - if (!pstrcode || *pstrcode == NULL) - return ERROR_INT("No input data", procName, 1); - strcode = *pstrcode; - if (!outdir) { - L_INFO("no outdir specified; writing to /tmp/lept/auto\n", procName); - realoutdir = stringNew("/tmp/lept/auto"); - } else { - realoutdir = stringNew(outdir); - } - - /* ------------------------------------------------------- */ - /* Make the output autogen.*.c file */ - /* ------------------------------------------------------- */ - - /* Make array of textlines from TEMPLATE1 */ - filestr = (char *)l_binaryRead(TEMPLATE1, &size); - sa1 = sarrayCreateLinesFromString(filestr, 1); - LEPT_FREE(filestr); - sa3 = sarrayCreate(0); - - /* Copyright notice */ - sarrayParseRange(sa1, 0, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa1, actstart, end); - - /* File name comment */ - fileno = strcode->fileno; - snprintf(buf, sizeof(buf), " * autogen.%d.c", fileno); - sarrayAddString(sa3, buf, L_COPY); - - /* More text */ - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa1, actstart, end); - - /* Description of function types by index */ - descr = sarrayToString(strcode->descr, 1); - descr[strlen(descr) - 1] = '\0'; - sarrayAddString(sa3, descr, L_INSERT); - - /* Includes */ - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa1, actstart, end); - snprintf(buf, sizeof(buf), "#include \"autogen.%d.h\"", fileno); - sarrayAddString(sa3, buf, L_COPY); - - /* Header for auto-generated deserializers */ - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa1, actstart, end); - - /* Function name (as comment) */ - snprintf(buf, sizeof(buf), " * \\brief l_autodecode_%d()", fileno); - sarrayAddString(sa3, buf, L_COPY); - - /* Input and return values */ - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa1, actstart, end); - - /* Function name */ - snprintf(buf, sizeof(buf), "l_autodecode_%d(l_int32 index)", fileno); - sarrayAddString(sa3, buf, L_COPY); - - /* Stack vars */ - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa1, actstart, end); - - /* Declaration of nfunc on stack */ - snprintf(buf, sizeof(buf), "l_int32 nfunc = %d;\n", strcode->n); - sarrayAddString(sa3, buf, L_COPY); - - /* Declaration of PROCNAME */ - snprintf(buf, sizeof(buf), " PROCNAME(\"l_autodecode_%d\");", fileno); - sarrayAddString(sa3, buf, L_COPY); - - /* Test input variables */ - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa1, actstart, end); - - /* Insert case string */ - casestr = sarrayToString(strcode->function, 0); - casestr[strlen(casestr) - 1] = '\0'; - sarrayAddString(sa3, casestr, L_INSERT); - - /* End of function */ - sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa1, actstart, end); - - /* Flatten to string and output to autogen*.c file */ - filestr = sarrayToString(sa3, 1); - nbytes = strlen(filestr); - snprintf(buf, sizeof(buf), "%s/autogen.%d.c", realoutdir, fileno); - l_binaryWrite(buf, "w", filestr, nbytes); - LEPT_FREE(filestr); - sarrayDestroy(&sa1); - sarrayDestroy(&sa3); - - /* ------------------------------------------------------- */ - /* Make the output autogen.*.h file */ - /* ------------------------------------------------------- */ - - /* Make array of textlines from TEMPLATE2 */ - filestr = (char *)l_binaryRead(TEMPLATE2, &size); - sa2 = sarrayCreateLinesFromString(filestr, 1); - LEPT_FREE(filestr); - sa3 = sarrayCreate(0); - - /* Copyright notice */ - sarrayParseRange(sa2, 0, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - /* File name comment */ - snprintf(buf, sizeof(buf), " * autogen.%d.h", fileno); - sarrayAddString(sa3, buf, L_COPY); - - /* More text */ - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - /* Beginning header protection */ - snprintf(buf, sizeof(buf), "#ifndef LEPTONICA_AUTOGEN_%d_H\n" - "#define LEPTONICA_AUTOGEN_%d_H", - fileno, fileno); - sarrayAddString(sa3, buf, L_COPY); - - /* Prototype header text */ - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - /* Prototype declaration */ - snprintf(buf, sizeof(buf), "void *l_autodecode_%d(l_int32 index);", fileno); - sarrayAddString(sa3, buf, L_COPY); - - /* Prototype trailer text */ - sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0); - sarrayAppendRange(sa3, sa2, actstart, end); - - /* Insert serialized data strings */ - datastr = sarrayToString(strcode->data, 1); - datastr[strlen(datastr) - 1] = '\0'; - sarrayAddString(sa3, datastr, L_INSERT); - - /* End header protection */ - snprintf(buf, sizeof(buf), "#endif /* LEPTONICA_AUTOGEN_%d_H */", fileno); - sarrayAddString(sa3, buf, L_COPY); - - /* Flatten to string and output to autogen*.h file */ - filestr = sarrayToString(sa3, 1); - nbytes = strlen(filestr); - snprintf(buf, sizeof(buf), "%s/autogen.%d.h", realoutdir, fileno); - l_binaryWrite(buf, "w", filestr, nbytes); - LEPT_FREE(filestr); - LEPT_FREE(realoutdir); - sarrayDestroy(&sa2); - sarrayDestroy(&sa3); - - /* Cleanup */ - strcodeDestroy(pstrcode); - return 0; -} - - -/*! - * \brief l_getStructStrFromFile() - * - * \param[in] filename - * \param[in] field (L_STR_TYPE, L_STR_NAME, L_STR_READER, L_STR_MEMREADER) - * \param[out] pstr struct string for this file - * \return 0 if found, 1 on error. - * - *
- * Notes:
- *      (1) For example, if %field == L_STR_NAME, and the file is a serialized
- *          pixa, this will return "Pixa", the name of the struct.
- *      (2) Caller must free the returned string.
- * 
- */ -l_int32 -l_getStructStrFromFile(const char *filename, - l_int32 field, - char **pstr) -{ -l_int32 index; - - PROCNAME("l_getStructStrFromFile"); - - if (!pstr) - return ERROR_INT("&str not defined", procName, 1); - *pstr = NULL; - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (field != L_STR_TYPE && field != L_STR_NAME && - field != L_STR_READER && field != L_STR_MEMREADER) - return ERROR_INT("invalid field", procName, 1); - - if (l_getIndexFromFile(filename, &index)) - return ERROR_INT("index not retrieved", procName, 1); - if (field == L_STR_TYPE) - *pstr = stringNew(l_assoc[index].type); - else if (field == L_STR_NAME) - *pstr = stringNew(l_assoc[index].structname); - else if (field == L_STR_READER) - *pstr = stringNew(l_assoc[index].reader); - else /* field == L_STR_MEMREADER */ - *pstr = stringNew(l_assoc[index].memreader); - return 0; -} - - -/*---------------------------------------------------------------------*/ -/* Static helpers */ -/*---------------------------------------------------------------------*/ -/*! - * \brief l_getIndexFromType() - * - * \param[in] type e.g., "PIXA" - * \param[out] pindex found index - * \return 0 if found, 1 if not. - * - *
- * Notes:
- *      (1) For valid type, %found == true and %index > 0.
- * 
- */ -static l_int32 -l_getIndexFromType(const char *type, - l_int32 *pindex) -{ -l_int32 i, found; - - PROCNAME("l_getIndexFromType"); - - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - *pindex = 0; - if (!type) - return ERROR_INT("type string not defined", procName, 1); - - found = 0; - for (i = 1; i <= l_ntypes; i++) { - if (strcmp(type, l_assoc[i].type) == 0) { - found = 1; - *pindex = i; - break; - } - } - return !found; -} - - -/*! - * \brief l_getIndexFromStructname() - * - * \param[in] sn structname e.g., "Pixa" - * \param[out] pindex found index - * \return 0 if found, 1 if not. - * - *
- * Notes:
- *      (1) This is used to identify the type of serialized file;
- *          the first word in the file is the structname.
- *      (2) For valid structname, %found == true and %index > 0.
- * 
- */ -static l_int32 -l_getIndexFromStructname(const char *sn, - l_int32 *pindex) -{ -l_int32 i, found; - - PROCNAME("l_getIndexFromStructname"); - - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - *pindex = 0; - if (!sn) - return ERROR_INT("sn string not defined", procName, 1); - - found = 0; - for (i = 1; i <= l_ntypes; i++) { - if (strcmp(sn, l_assoc[i].structname) == 0) { - found = 1; - *pindex = i; - break; - } - } - return !found; -} - - -/*! - * \brief l_getIndexFromFile() - * - * \param[in] filename - * \param[out] pindex found index - * \return 0 if found, 1 on error. - */ -static l_int32 -l_getIndexFromFile(const char *filename, - l_int32 *pindex) -{ -char buf[256]; -char *word; -FILE *fp; -l_int32 notfound, format; -SARRAY *sa; - - PROCNAME("l_getIndexFromFile"); - - if (!pindex) - return ERROR_INT("&index not defined", procName, 1); - *pindex = 0; - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - - /* Open the stream, read lines until you find one with more - * than a newline, and grab the first word. */ - if ((fp = fopenReadStream(filename)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - do { - if ((fgets(buf, sizeof(buf), fp)) == NULL) { - fclose(fp); - return ERROR_INT("fgets read fail", procName, 1); - } - } while (buf[0] == '\n'); - fclose(fp); - sa = sarrayCreateWordsFromString(buf); - word = sarrayGetString(sa, 0, L_NOCOPY); - - /* Find the index associated with the word. If it is not - * found, test to see if the file is a compressed pix. */ - notfound = l_getIndexFromStructname(word, pindex); - sarrayDestroy(&sa); - if (notfound) { /* maybe a Pix */ - if (findFileFormat(filename, &format) == 0) { - l_getIndexFromStructname("Pix", pindex); - } else { - return ERROR_INT("no file type identified", procName, 1); - } - } - - return 0; -} - - -/*! - * \brief l_genDataString() - * - * \param[in] filein input file of serialized data - * \param[in] ifunc index into set of functions in output file - * \return encoded ascii data string, or NULL on error reading from file - */ -static char * -l_genDataString(const char *filein, - l_int32 ifunc) -{ -char buf[80]; -char *cdata1, *cdata2, *cdata3; -l_uint8 *data1, *data2; -l_int32 csize1, csize2; -size_t size1, size2; -SARRAY *sa; - - PROCNAME("l_genDataString"); - - if (!filein) - return (char *)ERROR_PTR("filein not defined", procName, NULL); - - /* Read it in, gzip it, encode, and reformat. We gzip because some - * serialized data has a significant amount of ascii content. */ - if ((data1 = l_binaryRead(filein, &size1)) == NULL) - return (char *)ERROR_PTR("bindata not returned", procName, NULL); - data2 = zlibCompress(data1, size1, &size2); - cdata1 = encodeBase64(data2, size2, &csize1); - cdata2 = reformatPacked64(cdata1, csize1, 4, 72, 1, &csize2); - LEPT_FREE(data1); - LEPT_FREE(data2); - LEPT_FREE(cdata1); - - /* Prepend the string declaration signature and put it together */ - sa = sarrayCreate(3); - snprintf(buf, sizeof(buf), "static const char *l_strdata_%d =\n", ifunc); - sarrayAddString(sa, buf, L_COPY); - sarrayAddString(sa, cdata2, L_INSERT); - sarrayAddString(sa, ";\n", L_COPY); - cdata3 = sarrayToString(sa, 0); - sarrayDestroy(&sa); - return cdata3; -} - - -/*! - * \brief l_genCaseString() - * - * \param[in] ifunc index into set of functions in generated file - * \param[in] itype index into type of function to be used - * \return case string for this decoding function - * - *
- * Notes:
- *      (1) %ifunc and %itype have been validated, so no error can occur
- * 
- */ -static char * -l_genCaseString(l_int32 ifunc, - l_int32 itype) -{ -char buf[256]; -char *code = NULL; - - snprintf(buf, sizeof(buf), " case %d:\n", ifunc); - stringJoinIP(&code, buf); - snprintf(buf, sizeof(buf), - " data1 = decodeBase64(l_strdata_%d, strlen(l_strdata_%d), " - "&size1);\n", ifunc, ifunc); - stringJoinIP(&code, buf); - stringJoinIP(&code, - " data2 = zlibUncompress(data1, size1, &size2);\n"); - snprintf(buf, sizeof(buf), - " result = (void *)%s(data2, size2);\n", - l_assoc[itype].memreader); - stringJoinIP(&code, buf); - stringJoinIP(&code, " lept_free(data1);\n"); - stringJoinIP(&code, " lept_free(data2);\n"); - stringJoinIP(&code, " break;\n"); - return code; -} - - -/*! - * \brief l_genDescrString() - * - * \param[in] filein input file of serialized data - * \param[in] ifunc index into set of functions in generated file - * \param[in] itype index into type of function to be used - * \return description string for this decoding function - */ -static char * -l_genDescrString(const char *filein, - l_int32 ifunc, - l_int32 itype) -{ -char buf[256]; -char *tail; - - PROCNAME("l_genDescrString"); - - if (!filein) - return (char *)ERROR_PTR("filein not defined", procName, NULL); - - splitPathAtDirectory(filein, NULL, &tail); - snprintf(buf, sizeof(buf), " * %-2d %-10s %-14s %s", - ifunc, l_assoc[itype].type, l_assoc[itype].reader, tail); - - LEPT_FREE(tail); - return stringNew(buf); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/stringcode.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/stringcode.h deleted file mode 100644 index 4510bdb0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/stringcode.h +++ /dev/null @@ -1,61 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_STRINGCODE_H -#define LEPTONICA_STRINGCODE_H - -/*! - * \file stringcode.h - * - * Data structure to hold accumulating generated code for storing - * and extracting serializable leptonica objects (e.g., pixa, recog). - * - * Also a flag for selecting a string from the L_GenAssoc struct - * in stringcode. - */ - -struct L_StrCode -{ - l_int32 fileno; /*!< index for function and output file names */ - l_int32 ifunc; /*!< index into struct currently being stored */ - SARRAY *function; /*!< store case code for extraction */ - SARRAY *data; /*!< store base64 encoded data as strings */ - SARRAY *descr; /*!< store line in description table */ - l_int32 n; /*!< number of data strings */ -}; -typedef struct L_StrCode L_STRCODE; - - - /*! Select string in stringcode for a specific serializable data type */ -/*! Stringcode Select */ -enum { - L_STR_TYPE = 0, /*!< typedef for the data type */ - L_STR_NAME = 1, /*!< name of the data type */ - L_STR_READER = 2, /*!< reader to get the data type from file */ - L_STR_MEMREADER = 3 /*!< reader to get the compressed string in memory */ -}; - -#endif /* LEPTONICA_STRINGCODE_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/strokes.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/strokes.c deleted file mode 100644 index d13c8857..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/strokes.c +++ /dev/null @@ -1,439 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file strokes.c - *
- *
- *      Operations on 1 bpp images to:
- *      (1) measure stroke parameters, such as length and average width
- *      (2) change the average stroke width to a given value by eroding
- *          or dilating the image.
- *
- *      These operations are intended to operate on a single text
- *      character, to regularize the stroke width. It is expected
- *      that character matching by correlation, as used in the recog
- *      application, can often be improved by pre-processing both
- *      template and character images to a fixed stroke width.
- *
- *      Stroke parameter measurement
- *            l_int32      pixFindStrokeLength()
- *            l_int32      pixFindStrokeWidth()
- *            NUMA        *pixaFindStrokeWidth()
- *
- *      Stroke width regulation
- *            PIXA        *pixaModifyStrokeWidth()
- *            PIX         *pixModifyStrokeWidth()
- *            PIXA        *pixaSetStrokeWidth()
- *            PIX         *pixSetStrokeWidth()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/*-----------------------------------------------------------------* - * Stroke parameter measurement * - *-----------------------------------------------------------------*/ -/*! - * \brief pixFindStrokeLength() - * - * \param[in] pixs 1 bpp - * \param[in] tab8 [optional] table for counting fg pixels; can be NULL - * \param[out] plength estimated length of the strokes - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Returns half the number of fg boundary pixels.
- * 
- */ -l_ok -pixFindStrokeLength(PIX *pixs, - l_int32 *tab8, - l_int32 *plength) -{ -l_int32 n; -l_int32 *tab; -PIX *pix1; - - PROCNAME("pixFindStrokeLength"); - - if (!plength) - return ERROR_INT("&length not defined", procName, 1); - *plength = 0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - pix1 = pixExtractBoundary(pixs, 1); - tab = (tab8) ? tab8 : makePixelSumTab8(); - pixCountPixels(pix1, &n, tab); - *plength = n / 2; - if (!tab8) LEPT_FREE(tab); - pixDestroy(&pix1); - return 0; -} - - -/*! - * \brief pixFindStrokeWidth() - * - * \param[in] pixs 1 bpp - * \param[in] thresh fractional count threshold relative to distance 1 - * \param[in] tab8 [optional] table for counting fg pixels; can be NULL - * \param[out] pwidth estimated width of the strokes - * \param[out] pnahisto [optional] histo of pixel distances from bg - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This uses two methods to estimate the stroke width:
- *          (a) half the fg boundary length
- *          (b) a value derived from the histogram of the fg distance transform
- *      (2) Distance is measured in 8-connected
- *      (3) %thresh is the minimum fraction N(dist=d)/N(dist=1) of pixels
- *          required to determine if the pixels at distance d are above
- *          the noise. It is typically about 0.15.
- * 
- */ -l_ok -pixFindStrokeWidth(PIX *pixs, - l_float32 thresh, - l_int32 *tab8, - l_float32 *pwidth, - NUMA **pnahisto) -{ -l_int32 i, n, count, length, first, last; -l_int32 *tab; -l_float32 width1, width2, ratio, extra; -l_float32 *fa; -NUMA *na1, *na2; -PIX *pix1; - - PROCNAME("pixFindStrokeWidth"); - - if (!pwidth) - return ERROR_INT("&width not defined", procName, 1); - *pwidth = 0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - tab = (tab8) ? tab8 : makePixelSumTab8(); - - /* ------- Method 1: via boundary length ------- */ - /* The computed stroke length is a bit larger than that actual - * length, because of the addition of the 'caps' at the - * stroke ends. Therefore the computed width is a bit - * smaller than the average width. */ - pixFindStrokeLength(pixs, tab8, &length); - pixCountPixels(pixs, &count, tab8); - width1 = (l_float32)count / (l_float32)length; - - /* ------- Method 2: via distance transform ------- */ - /* First get the histogram of distances */ - pix1 = pixDistanceFunction(pixs, 8, 8, L_BOUNDARY_BG); - na1 = pixGetGrayHistogram(pix1, 1); - pixDestroy(&pix1); - numaGetNonzeroRange(na1, 0.1, &first, &last); - na2 = numaClipToInterval(na1, 0, last); - numaWriteStderr(na2); - - /* Find the bucket with the largest distance whose contents - * exceed the threshold. */ - fa = numaGetFArray(na2, L_NOCOPY); - n = numaGetCount(na2); - for (i = n - 1; i > 0; i--) { - ratio = fa[i] / fa[1]; - if (ratio > thresh) break; - } - /* Let the last skipped bucket contribute to the stop bucket. - * This is the 'extra' term below. The result may be a slight - * over-correction, so the computed width may be a bit larger - * than the average width. */ - extra = (i < n - 1) ? fa[i + 1] / fa[1] : 0; - width2 = 2.0 * (i - 1.0 + ratio + extra); - lept_stderr("width1 = %5.2f, width2 = %5.2f\n", width1, width2); - - /* Average the two results */ - *pwidth = (width1 + width2) / 2.0; - - if (!tab8) LEPT_FREE(tab); - numaDestroy(&na1); - if (pnahisto) - *pnahisto = na2; - else - numaDestroy(&na2); - return 0; -} - - -/*! - * \brief pixaFindStrokeWidth() - * - * \param[in] pixa of 1 bpp images - * \param[in] thresh fractional count threshold relative to distance 1 - * \param[in] tab8 [optional] table for counting fg pixels; can be NULL - * \param[in] debug 1 for debug output; 0 to skip - * \return na array of stroke widths for each pix in %pixa; NULL on error - * - *
- * Notes:
- *      (1) See pixFindStrokeWidth() for details.
- * 
- */ -NUMA * -pixaFindStrokeWidth(PIXA *pixa, - l_float32 thresh, - l_int32 *tab8, - l_int32 debug) -{ -l_int32 i, n, same, maxd; -l_int32 *tab; -l_float32 width; -NUMA *na; -PIX *pix; - - PROCNAME("pixaFindStrokeWidth"); - - if (!pixa) - return (NUMA *)ERROR_PTR("pixa not defined", procName, NULL); - pixaVerifyDepth(pixa, &same, &maxd); - if (maxd > 1) - return (NUMA *)ERROR_PTR("pix not all 1 bpp", procName, NULL); - - tab = (tab8) ? tab8 : makePixelSumTab8(); - - n = pixaGetCount(pixa); - na = numaCreate(n); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - pixFindStrokeWidth(pix, thresh, tab8, &width, NULL); - numaAddNumber(na, width); - pixDestroy(&pix); - } - - if (!tab8) LEPT_FREE(tab); - return na; -} - - -/*-----------------------------------------------------------------* - * Change stroke width * - *-----------------------------------------------------------------*/ -/*! - * \brief pixaModifyStrokeWidth() - * - * \param[in] pixas of 1 bpp pix - * \param[out] targetw desired width for strokes in each pix - * \return pixa with modified stroke widths, or NULL on error - */ -PIXA * -pixaModifyStrokeWidth(PIXA *pixas, - l_float32 targetw) -{ -l_int32 i, n, same, maxd; -l_float32 width; -NUMA *na; -PIX *pix1, *pix2; -PIXA *pixad; - - PROCNAME("pixaModifyStrokeWidth"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (targetw < 1) - return (PIXA *)ERROR_PTR("target width < 1", procName, NULL); - pixaVerifyDepth(pixas, &same, &maxd); - if (maxd > 1) - return (PIXA *)ERROR_PTR("pix not all 1 bpp", procName, NULL); - - na = pixaFindStrokeWidth(pixas, 0.1, NULL, 0); - n = pixaGetCount(pixas); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - numaGetFValue(na, i, &width); - pix2 = pixModifyStrokeWidth(pix1, width, targetw); - pixaAddPix(pixad, pix2, L_INSERT); - pixDestroy(&pix1); - } - - numaDestroy(&na); - return pixad; -} - - -/*! - * \brief pixModifyStrokeWidth() - * - * \param[in] pixs of 1 bpp pix - * \param[in] width measured average stroke width - * \param[in] targetw desired stroke width - * \return pix with modified stroke width, or NULL on error - */ -PIX * -pixModifyStrokeWidth(PIX *pixs, - l_float32 width, - l_float32 targetw) -{ -char buf[32]; -l_int32 diff, size; - - PROCNAME("pixModifyStrokeWidth"); - - if (!pixs || (pixGetDepth(pixs) != 1)) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (targetw < 1) - return (PIX *)ERROR_PTR("target width < 1", procName, NULL); - - diff = lept_roundftoi(targetw - width); - if (diff == 0) return pixCopy(NULL, pixs); - - size = L_ABS(diff) + 1; - if (diff < 0) /* erode */ - snprintf(buf, sizeof(buf), "e%d.%d", size, size); - else /* diff > 0; dilate */ - snprintf(buf, sizeof(buf), "d%d.%d", size, size); - return pixMorphSequence(pixs, buf, 0); -} - - -/*! - * \brief pixaSetStrokeWidth() - * - * \param[in] pixas of 1 bpp pix - * \param[in] width set stroke width to this value, in [1 ... 100]. - * \param[in] thinfirst 1 to thin all pix to a skeleton first; 0 to skip - * \param[in] connectivity 4 or 8, to be used if %thinfirst == 1 - * \return pixa with all stroke widths being %width, or NULL on error - * - *
- * Notes:
- *      (1) If %thinfirst == 1, thin to a skeleton using the specified
- *          %connectivity.  Use %thinfirst == 0 if all pix in pixas
- *          have already been thinned as far as possible.
- *      (2) The image is dilated to the required %width.  This dilation
- *          is not connectivity preserving, so this is typically
- *          used in a situation where merging of c.c. in the individual
- *          pix is not a problem; e.g., where each pix is a single c.c.
- * 
- */ -PIXA * -pixaSetStrokeWidth(PIXA *pixas, - l_int32 width, - l_int32 thinfirst, - l_int32 connectivity) -{ -l_int32 i, n, maxd, same; -PIX *pix1, *pix2; -PIXA *pixad; - - PROCNAME("pixaSetStrokeWidth"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (width < 1 || width > 100) - return (PIXA *)ERROR_PTR("width not in [1 ... 100]", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIXA *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - pixaVerifyDepth(pixas, &same, &maxd); - if (maxd > 1) - return (PIXA *)ERROR_PTR("pix are not all 1 bpp", procName, NULL); - - n = pixaGetCount(pixas); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - pix2 = pixSetStrokeWidth(pix1, width, thinfirst, connectivity); - pixaAddPix(pixad, pix2, L_INSERT); - pixDestroy(&pix1); - } - - return pixad; -} - - -/*! - * \brief pixSetStrokeWidth() - * - * \param[in] pixs 1 bpp - * \param[in] width set stroke width to this value, in [1 ... 100]. - * \param[in] thinfirst 1 to thin all pix to a skeleton first; 0 to skip - * \param[in] connectivity 4 or 8, to be used if %thinfirst == 1 - * \return pixd with stroke width set to %width, or NULL on error - * - *
- * Notes:
- *      (1) See notes in pixaSetStrokeWidth().
- *      (2) A white border of sufficient width to avoid boundary
- *          artifacts in the thickening step is added before thinning.
- *      (3) %connectivity == 8 usually gives a slightly smoother result.
- * 
- */ -PIX * -pixSetStrokeWidth(PIX *pixs, - l_int32 width, - l_int32 thinfirst, - l_int32 connectivity) -{ -char buf[16]; -l_int32 border; -PIX *pix1, *pix2, *pixd; - - PROCNAME("pixSetStrokeWidth"); - - if (!pixs || (pixGetDepth(pixs) != 1)) - return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); - if (width < 1 || width > 100) - return (PIX *)ERROR_PTR("width not in [1 ... 100]", procName, NULL); - if (connectivity != 4 && connectivity != 8) - return (PIX *)ERROR_PTR("connectivity not 4 or 8", procName, NULL); - - if (!thinfirst && width == 1) /* nothing to do */ - return pixCopy(NULL, pixs); - - /* Add a white border */ - border = width / 2; - pix1 = pixAddBorder(pixs, border, 0); - - /* Thin to a skeleton */ - if (thinfirst) - pix2 = pixThinConnected(pix1, L_THIN_FG, connectivity, 0); - else - pix2 = pixClone(pix1); - pixDestroy(&pix1); - - /* Dilate */ - snprintf(buf, sizeof(buf), "D%d.%d", width, width); - pixd = pixMorphSequence(pix2, buf, 0); - pixCopyText(pixd, pixs); - pixDestroy(&pix2); - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sudoku.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sudoku.c deleted file mode 100644 index d4e31e24..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sudoku.c +++ /dev/null @@ -1,884 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file sudoku.c - *
- *
- *      Solve a sudoku by brute force search
- *
- *      Read input data from file or string
- *          l_int32         *sudokuReadFile()
- *          l_int32         *sudokuReadString()
- *
- *      Create/destroy
- *          L_SUDOKU        *sudokuCreate()
- *          void             sudokuDestroy()
- *
- *      Solve the puzzle
- *          l_int32          sudokuSolve()
- *          static l_int32   sudokuValidState()
- *          static l_int32   sudokuNewGuess()
- *          static l_int32   sudokuTestState()
- *
- *      Test for uniqueness
- *          l_int32          sudokuTestUniqueness()
- *          static l_int32   sudokuCompareState()
- *          static l_int32  *sudokuRotateArray()
- *
- *      Generation
- *          L_SUDOKU        *sudokuGenerate()
- *
- *      Output
- *          l_int32          sudokuOutput()
- *
- *  Solving sudokus is a somewhat addictive pastime.  The rules are
- *  simple but it takes just enough concentration to make it rewarding
- *  when you find a number.  And you get 50 to 60 such rewards each time
- *  you complete one.  The downside is that you could have been doing
- *  something more creative, like keying out a new plant, staining
- *  the deck, or even writing a computer program to discourage your
- *  wife from doing sudokus.
- *
- *  My original plan for the sudoku solver was somewhat grandiose.
- *  The program would model the way a person solves the problem.
- *  It would examine each empty position and determine how many possible
- *  numbers could fit.  The empty positions would be entered in a priority
- *  queue keyed on the number of possible numbers that could fit.
- *  If there existed a position where only a single number would work,
- *  it would greedily take it.  Otherwise it would consider a
- *  positions that could accept two and make a guess, with backtracking
- *  if an impossible state were reached.  And so on.
- *
- *  Then one of my colleagues announced she had solved the problem
- *  by brute force and it was fast.  At that point the original plan was
- *  dead in the water, because the two top requirements for a leptonica
- *  algorithm are (1) as simple as possible and (2) fast.  The brute
- *  force approach starts at the UL corner, and in succession at each
- *  blank position it finds the first valid number (testing in
- *  sequence from 1 to 9).  When no number will fit a blank position
- *  it backtracks, choosing the next valid number in the previous
- *  blank position.
- *
- *  This is an inefficient method for pruning the space of solutions
- *  (imagine backtracking from the LR corner back to the UL corner
- *  and starting over with a new guess), but it nevertheless gets
- *  the job done quickly.  I have made no effort to optimize
- *  it, because it is fast: a 5-star (highest difficulty) sudoku might
- *  require a million guesses and take 0.05 sec.  (This BF implementation
- *  does about 20M guesses/sec at 3 GHz.)
- *
- *  Proving uniqueness of a sudoku solution is tricker than finding
- *  a solution (or showing that no solution exists).  A good indication
- *  that a solution is unique is if we get the same result solving
- *  by brute force when the puzzle is also rotated by 90, 180 and 270
- *  degrees.  If there are multiple solutions, it seems unlikely
- *  that you would get the same solution four times in a row, using a
- *  brute force method that increments guesses and scans LR/TB.
- *  The function sudokuTestUniqueness() does this.
- *
- *  And given a function that can determine uniqueness, it is
- *  easy to generate valid sudokus.  We provide sudokuGenerate(),
- *  which starts with some valid initial solution, and randomly
- *  removes numbers, stopping either when a minimum number of non-zero
- *  elements are left, or when it becomes difficult to remove another
- *  element without destroying the uniqueness of the solution.
- *
- *  For further reading, see the Wikipedia articles:
- *     (1) http://en.wikipedia.org/wiki/Algorithmics_of_sudoku
- *     (2) http://en.wikipedia.org/wiki/Sudoku
- *
- *  How many 9x9 sudokus are there?  Here are the numbers.
- *   ~ From ref(1), there are about 6 x 10^27 "latin squares", where
- *     each row and column has all 9 digits.
- *   ~ There are 7.2 x 10^21 actual solutions, having the added
- *     constraint in each of the 9 3x3 squares.  (The constraint
- *     reduced the number by the fraction 1.2 x 10^(-6).)
- *   ~ There are a mere 5.5 billion essentially different solutions (EDS),
- *     when symmetries (rotation, reflection, permutation and relabelling)
- *     are removed.
- *   ~ Thus there are 1.3 x 10^12 solutions that can be derived by
- *     symmetry from each EDS.  Can we account for these?
- *   ~ Sort-of.  From an EDS, you can derive (3!)^8 = 1.7 million solutions
- *     by simply permuting rows and columns.  (Do you see why it is
- *     not (3!)^6 ?)
- *   ~ Also from an EDS, you can derive 9! solutions by relabelling,
- *     and 4 solutions by rotation, for a total of 1.45 million solutions
- *     by relabelling and rotation.  Then taking the product, by symmetry
- *     we can derive 1.7M x 1.45M = 2.45 trillion solutions from each EDS.
- *     (Something is off by about a factor of 2 -- close enough.)
- *
- *  Another interesting fact is that there are apparently 48K EDS sudokus
- *  (with unique solutions) that have only 17 givens.  No sudokus are known
- *  with less than 17, but there exists no proof that this is the minimum.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -static l_int32 sudokuValidState(l_int32 *state); -static l_int32 sudokuNewGuess(L_SUDOKU *sud); -static l_int32 sudokuTestState(l_int32 *state, l_int32 index); -static l_int32 sudokuCompareState(L_SUDOKU *sud1, L_SUDOKU *sud2, - l_int32 quads, l_int32 *psame); -static l_int32 *sudokuRotateArray(l_int32 *array, l_int32 quads); - -/* --------------------------------------------------------------- */ -/* An example of a valid solution */ -/* --------------------------------------------------------------- * -static const char valid_solution[] = "3 8 7 2 6 4 1 9 5 " - "2 6 5 8 9 1 4 3 7 " - "1 4 9 5 3 7 6 8 2 " - "5 2 3 7 1 6 8 4 9 " - "7 1 6 9 4 8 2 5 3 " - "8 9 4 3 5 2 7 1 6 " - "9 7 2 1 8 5 3 6 4 " - "4 3 1 6 7 9 5 2 8 " - "6 5 8 4 2 3 9 7 1 "; -*/ - - -/*---------------------------------------------------------------------* - * Read input data from file or string * - *---------------------------------------------------------------------*/ -/*! - * \brief sudokuReadFile() - * - * \param[in] filename formatted sudoku file - * \return array of 81 numbers, or NULL on error - * - *
- * Notes:
- *      (1) The file format has:
- *          * any number of comment lines beginning with '#'
- *          * a set of 9 lines, each having 9 digits (0-9) separated
- *            by a space
- * 
- */ -l_int32 * -sudokuReadFile(const char *filename) -{ -char *str, *strj; -l_uint8 *data; -l_int32 i, j, nlines, val, index, error; -l_int32 *array; -size_t size; -SARRAY *saline, *sa1, *sa2; - - PROCNAME("sudokuReadFile"); - - if (!filename) - return (l_int32 *)ERROR_PTR("filename not defined", procName, NULL); - data = l_binaryRead(filename, &size); - sa1 = sarrayCreateLinesFromString((char *)data, 0); - sa2 = sarrayCreate(9); - - /* Filter out the comment lines; verify that there are 9 data lines */ - nlines = sarrayGetCount(sa1); - for (i = 0; i < nlines; i++) { - str = sarrayGetString(sa1, i, L_NOCOPY); - if (str[0] != '#') - sarrayAddString(sa2, str, L_COPY); - } - LEPT_FREE(data); - sarrayDestroy(&sa1); - nlines = sarrayGetCount(sa2); - if (nlines != 9) { - sarrayDestroy(&sa2); - L_ERROR("file has %d lines\n", procName, nlines); - return (l_int32 *)ERROR_PTR("invalid file", procName, NULL); - } - - /* Read the data into the array, verifying that each data - * line has 9 numbers. */ - error = FALSE; - array = (l_int32 *)LEPT_CALLOC(81, sizeof(l_int32)); - for (i = 0, index = 0; i < 9; i++) { - str = sarrayGetString(sa2, i, L_NOCOPY); - saline = sarrayCreateWordsFromString(str); - if (sarrayGetCount(saline) != 9) { - error = TRUE; - sarrayDestroy(&saline); - break; - } - for (j = 0; j < 9; j++) { - strj = sarrayGetString(saline, j, L_NOCOPY); - if (sscanf(strj, "%d", &val) != 1) - error = TRUE; - else - array[index++] = val; - } - sarrayDestroy(&saline); - if (error) break; - } - sarrayDestroy(&sa2); - - if (error) { - LEPT_FREE(array); - return (l_int32 *)ERROR_PTR("invalid data", procName, NULL); - } - - return array; -} - - -/*! - * \brief sudokuReadString() - * - * \param[in] str formatted input data - * \return array of 81 numbers, or NULL on error - * - *
- * Notes:
- *      (1) The string is formatted as 81 single digits, each separated
- *          by 81 spaces.
- * 
- */ -l_int32 * -sudokuReadString(const char *str) -{ -l_int32 i; -l_int32 *array; - - PROCNAME("sudokuReadString"); - - if (!str) - return (l_int32 *)ERROR_PTR("str not defined", procName, NULL); - - /* Read in the initial solution */ - array = (l_int32 *)LEPT_CALLOC(81, sizeof(l_int32)); - for (i = 0; i < 81; i++) { - if (sscanf(str + 2 * i, "%d ", &array[i]) != 1) { - LEPT_FREE(array); - return (l_int32 *)ERROR_PTR("invalid format", procName, NULL); - } - } - - return array; -} - - -/*---------------------------------------------------------------------* - * Create/destroy sudoku * - *---------------------------------------------------------------------*/ -/*! - * \brief sudokuCreate() - * - * \param[in] array 81 numbers, 9 rows of 9 numbers each - * \return l_sudoku, or NULL on error - * - *
- * Notes:
- *      (1) The input array has 0 for the unknown values, and 1-9
- *          for the known initial values.  It is generated from
- *          a file using sudokuReadInput(), which checks that the file
- *          data has 81 numbers in 9 rows.
- * 
- */ -L_SUDOKU * -sudokuCreate(l_int32 *array) -{ -l_int32 i, val, locs_index; -L_SUDOKU *sud; - - PROCNAME("sudokuCreate"); - - if (!array) - return (L_SUDOKU *)ERROR_PTR("array not defined", procName, NULL); - - locs_index = 0; /* into locs array */ - sud = (L_SUDOKU *)LEPT_CALLOC(1, sizeof(L_SUDOKU)); - sud->locs = (l_int32 *)LEPT_CALLOC(81, sizeof(l_int32)); - sud->init = (l_int32 *)LEPT_CALLOC(81, sizeof(l_int32)); - sud->state = (l_int32 *)LEPT_CALLOC(81, sizeof(l_int32)); - for (i = 0; i < 81; i++) { - val = array[i]; - sud->init[i] = val; - sud->state[i] = val; - if (val == 0) - sud->locs[locs_index++] = i; - } - sud->num = locs_index; - sud->failure = FALSE; - sud->finished = FALSE; - return sud; -} - - -/*! - * \brief sudokuDestroy() - * - * \param[in,out] psud will be set to null before returning - * \return void - */ -void -sudokuDestroy(L_SUDOKU **psud) -{ -L_SUDOKU *sud; - - PROCNAME("sudokuDestroy"); - - if (psud == NULL) { - L_WARNING("ptr address is NULL\n", procName); - return; - } - if ((sud = *psud) == NULL) - return; - - LEPT_FREE(sud->locs); - LEPT_FREE(sud->init); - LEPT_FREE(sud->state); - LEPT_FREE(sud); - - *psud = NULL; - return; -} - - -/*---------------------------------------------------------------------* - * Solve the puzzle * - *---------------------------------------------------------------------*/ -/*! - * \brief sudokuSolve() - * - * \param[in] sud l_sudoku starting in initial state - * \return 1 on success, 0 on failure to solve note reversal of - * typical unix returns - */ -l_int32 -sudokuSolve(L_SUDOKU *sud) -{ - PROCNAME("sudokuSolve"); - - if (!sud) - return ERROR_INT("sud not defined", procName, 0); - - if (!sudokuValidState(sud->init)) - return ERROR_INT("initial state not valid", procName, 0); - - while (1) { - if (sudokuNewGuess(sud)) - break; - if (sud->finished == TRUE) - break; - } - - if (sud->failure == TRUE) { - lept_stderr("Failure after %d guesses\n", sud->nguess); - return 0; - } - - lept_stderr("Solved after %d guesses\n", sud->nguess); - return 1; -} - - -/*! - * \brief sudokuValidState() - * - * \param[in] state array of size 81 - * \return 1 if valid, 0 if invalid - * - *
- * Notes:
- *      (1) This can be used on either the initial state (init)
- *          or on the current state (state) of the l_soduku.
- *          All values of 0 are ignored.
- * 
- */ -static l_int32 -sudokuValidState(l_int32 *state) -{ -l_int32 i; - - PROCNAME("sudokuValidState"); - - if (!state) - return ERROR_INT("state not defined", procName, 0); - - for (i = 0; i < 81; i++) { - if (!sudokuTestState(state, i)) - return 0; - } - - return 1; -} - - -/*! - * \brief sudokuNewGuess() - * - * \param[in] sud l_sudoku - * \return 0 if OK; 1 if no solution is possible - * - *
- * Notes:
- *      (1) This attempts to increment the number in the current
- *          location.  If it can't, it backtracks (sets the number
- *          in the current location to zero and decrements the
- *          current location).  If it can, it tests that number,
- *          and if the number is valid, moves forward to the next
- *          empty location (increments the current location).
- *      (2) If there is no solution, backtracking will eventually
- *          exhaust possibilities for the first location.
- * 
- */ -static l_int32 -sudokuNewGuess(L_SUDOKU *sud) -{ -l_int32 index, val, valid; -l_int32 *locs, *state; - - locs = sud->locs; - state = sud->state; - index = locs[sud->current]; /* 0 to 80 */ - val = state[index]; - if (val == 9) { /* backtrack or give up */ - if (sud->current == 0) { - sud->failure = TRUE; - return 1; - } - state[index] = 0; - sud->current--; - } else { /* increment current value and test */ - sud->nguess++; - state[index]++; - valid = sudokuTestState(state, index); - if (valid) { - if (sud->current == sud->num - 1) { /* we're done */ - sud->finished = TRUE; - return 0; - } else { /* advance to next position */ - sud->current++; - } - } - } - - return 0; -} - - -/*! - * \brief sudokuTestState() - * - * \param[in] state current state: array of 81 values - * \param[in] index into state element that we are testing - * \return 1 if valid; 0 if invalid no error checking - */ -static l_int32 -sudokuTestState(l_int32 *state, - l_int32 index) -{ -l_int32 i, j, val, row, rowstart, rowend, col; -l_int32 blockrow, blockcol, blockstart, rowindex, locindex; - - if ((val = state[index]) == 0) /* automatically valid */ - return 1; - - /* Test row. Test val is at (x, y) = (index % 9, index / 9) */ - row = index / 9; - rowstart = 9 * row; - for (i = rowstart; i < index; i++) { - if (state[i] == val) - return 0; - } - rowend = rowstart + 9; - for (i = index + 1; i < rowend; i++) { - if (state[i] == val) - return 0; - } - - /* Test column */ - col = index % 9; - for (j = col; j < index; j += 9) { - if (state[j] == val) - return 0; - } - for (j = index + 9; j < 81; j += 9) { - if (state[j] == val) - return 0; - } - - /* Test local 3x3 block */ - blockrow = 3 * (row / 3); - blockcol = 3 * (col / 3); - blockstart = 9 * blockrow + blockcol; - for (i = 0; i < 3; i++) { - rowindex = blockstart + 9 * i; - for (j = 0; j < 3; j++) { - locindex = rowindex + j; - if (index == locindex) continue; - if (state[locindex] == val) - return 0; - } - } - - return 1; -} - - -/*---------------------------------------------------------------------* - * Test for uniqueness * - *---------------------------------------------------------------------*/ -/*! - * \brief sudokuTestUniqueness() - * - * \param[in] array of 81 numbers, 9 lines of 9 numbers each - * \param[out] punique 1 if unique, 0 if not - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This applies the brute force method to all four 90 degree
- *          rotations.  If there is more than one solution, it is highly
- *          unlikely that all four results will be the same;
- *          consequently, if they are the same, the solution is
- *          most likely to be unique.
- * 
- */ -l_ok -sudokuTestUniqueness(l_int32 *array, - l_int32 *punique) -{ -l_int32 same1, same2, same3; -l_int32 *array1, *array2, *array3; -L_SUDOKU *sud, *sud1, *sud2, *sud3; - - PROCNAME("sudokuTestUniqueness"); - - if (!punique) - return ERROR_INT("&unique not defined", procName, 1); - *punique = 0; - if (!array) - return ERROR_INT("array not defined", procName, 1); - - sud = sudokuCreate(array); - sudokuSolve(sud); - array1 = sudokuRotateArray(array, 1); - sud1 = sudokuCreate(array1); - sudokuSolve(sud1); - array2 = sudokuRotateArray(array, 2); - sud2 = sudokuCreate(array2); - sudokuSolve(sud2); - array3 = sudokuRotateArray(array, 3); - sud3 = sudokuCreate(array3); - sudokuSolve(sud3); - - sudokuCompareState(sud, sud1, 1, &same1); - sudokuCompareState(sud, sud2, 2, &same2); - sudokuCompareState(sud, sud3, 3, &same3); - *punique = (same1 && same2 && same3); - - sudokuDestroy(&sud); - sudokuDestroy(&sud1); - sudokuDestroy(&sud2); - sudokuDestroy(&sud3); - LEPT_FREE(array1); - LEPT_FREE(array2); - LEPT_FREE(array3); - return 0; -} - - -/*! - * \brief sudokuCompareState() - * - * \param[in] sud1, sud2 two l_Sudoku states (solutions) - * \param[in] quads rotation of sud2 input with respect to sud1, - * in units of 90 degrees cw - * \param[out] psame 1 if all 4 results are identical; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The input to sud2 has been rotated by %quads relative to the
- *          input to sud1.  Therefore, we must rotate the solution to
- *          sud1 by the same amount before comparing it to the
- *          solution to sud2.
- * 
- */ -static l_int32 -sudokuCompareState(L_SUDOKU *sud1, - L_SUDOKU *sud2, - l_int32 quads, - l_int32 *psame) -{ -l_int32 i, same; -l_int32 *array; - - PROCNAME("sudokuCompareState"); - - if (!psame) - return ERROR_INT("&same not defined", procName, 1); - *psame = 0; - if (!sud1) - return ERROR_INT("sud1 not defined", procName, 1); - if (!sud2) - return ERROR_INT("sud1 not defined", procName, 1); - if (quads < 1 || quads > 3) - return ERROR_INT("valid quads in {1,2,3}", procName, 1); - - same = TRUE; - if ((array = sudokuRotateArray(sud1->state, quads)) == NULL) - return ERROR_INT("array not made", procName, 1); - for (i = 0; i < 81; i++) { - if (array[i] != sud2->state[i]) { - same = FALSE; - break; - } - } - *psame = same; - LEPT_FREE(array); - return 0; -} - - -/*! - * \brief sudokuRotateArray() - * - * \param[in] array 81 numbers; 9 lines of 9 numbers each - * \param[in] quads 1-3; number of 90 degree cw rotations - * \return rarray rotated array, or NULL on error - */ -static l_int32 * -sudokuRotateArray(l_int32 *array, - l_int32 quads) -{ -l_int32 i, j, sindex, dindex; -l_int32 *rarray; - - PROCNAME("sudokuRotateArray"); - - if (!array) - return (l_int32 *)ERROR_PTR("array not defined", procName, NULL); - if (quads < 1 || quads > 3) - return (l_int32 *)ERROR_PTR("valid quads in {1,2,3}", procName, NULL); - - rarray = (l_int32 *)LEPT_CALLOC(81, sizeof(l_int32)); - if (quads == 1) { - for (j = 0, dindex = 0; j < 9; j++) { - for (i = 8; i >= 0; i--) { - sindex = 9 * i + j; - rarray[dindex++] = array[sindex]; - } - } - } else if (quads == 2) { - for (i = 8, dindex = 0; i >= 0; i--) { - for (j = 8; j >= 0; j--) { - sindex = 9 * i + j; - rarray[dindex++] = array[sindex]; - } - } - } else { /* quads == 3 */ - for (j = 8, dindex = 0; j >= 0; j--) { - for (i = 0; i < 9; i++) { - sindex = 9 * i + j; - rarray[dindex++] = array[sindex]; - } - } - } - - return rarray; -} - - -/*---------------------------------------------------------------------* - * Generation * - *---------------------------------------------------------------------*/ -/*! - * \brief sudokuGenerate() - * - * \param[in] array 81 numbers, 9 rows of 9 numbers each - * \param[in] seed random number - * \param[in] minelems min non-zero elements allowed; <= 80 - * \param[in] maxtries max tries to remove a number and get a valid sudoku - * \return l_sudoku, or NULL on error - * - *
- * Notes:
- *      (1) This is a brute force generator.  It starts with a completed
- *          sudoku solution and, by removing elements (setting them to 0),
- *          generates a valid (unique) sudoku initial condition.
- *      (2) The process stops when either %minelems, the minimum
- *          number of non-zero elements, is reached, or when the
- *          number of attempts to remove the next element exceeds %maxtries.
- *      (3) No sudoku is known with less than 17 nonzero elements.
- * 
- */ -L_SUDOKU * -sudokuGenerate(l_int32 *array, - l_int32 seed, - l_int32 minelems, - l_int32 maxtries) -{ -l_int32 index, sector, nzeros, removefirst, tries, val, oldval, unique; -L_SUDOKU *sud, *testsud; - - PROCNAME("sudokuGenerate"); - - if (!array) - return (L_SUDOKU *)ERROR_PTR("array not defined", procName, NULL); - if (minelems > 80) - return (L_SUDOKU *)ERROR_PTR("minelems must be < 81", procName, NULL); - - /* Remove up to 30 numbers at random from the solution. - * Test if the solution is valid -- the initial 'solution' may - * have been invalid. Then test if the sudoku with 30 zeroes - * is unique -- it almost always will be. */ - srand(seed); - nzeros = 0; - sector = 0; - removefirst = L_MIN(30, 81 - minelems); - while (nzeros < removefirst) { - genRandomIntegerInRange(9, 0, &val); - index = 27 * (sector / 3) + 3 * (sector % 3) + - 9 * (val / 3) + (val % 3); - if (array[index] == 0) continue; - array[index] = 0; - nzeros++; - sector++; - sector %= 9; - } - testsud = sudokuCreate(array); - sudokuSolve(testsud); - if (testsud->failure) { - sudokuDestroy(&testsud); - L_ERROR("invalid initial solution\n", procName); - return NULL; - } - sudokuTestUniqueness(testsud->init, &unique); - sudokuDestroy(&testsud); - if (!unique) { - L_ERROR("non-unique result with 30 zeroes\n", procName); - return NULL; - } - - /* Remove more numbers, testing at each removal for uniqueness. */ - tries = 0; - sector = 0; - while (1) { - if (tries > maxtries) break; - if (81 - nzeros <= minelems) break; - - if (tries == 0) { - lept_stderr("Trying %d zeros\n", nzeros); - tries = 1; - } - - /* Choose an element to be zeroed. We choose one - * at random in succession from each of the nine sectors. */ - genRandomIntegerInRange(9, 0, &val); - index = 27 * (sector / 3) + 3 * (sector % 3) + - 9 * (val / 3) + (val % 3); - sector++; - sector %= 9; - if (array[index] == 0) continue; - - /* Save the old value in case we need to revert */ - oldval = array[index]; - - /* Is there a solution? If not, try again. */ - array[index] = 0; - testsud = sudokuCreate(array); - sudokuSolve(testsud); - if (testsud->failure == TRUE) { - sudokuDestroy(&testsud); - array[index] = oldval; /* revert */ - tries++; - continue; - } - - /* Is the solution unique? If not, try again. */ - sudokuTestUniqueness(testsud->init, &unique); - sudokuDestroy(&testsud); - if (!unique) { /* revert and try again */ - array[index] = oldval; - tries++; - } else { /* accept this */ - tries = 0; - lept_stderr("Have %d zeros\n", nzeros); - nzeros++; - } - } - lept_stderr("Final: nelems = %d\n", 81 - nzeros); - - /* Show that we can recover the solution */ - sud = sudokuCreate(array); - sudokuOutput(sud, L_SUDOKU_INIT); - sudokuSolve(sud); - sudokuOutput(sud, L_SUDOKU_STATE); - - return sud; -} - - -/*---------------------------------------------------------------------* - * Output * - *---------------------------------------------------------------------*/ -/*! - * \brief sudokuOutput() - * - * \param[in] sud l_sudoku at any stage - * \param[in] arraytype L_SUDOKU_INIT, L_SUDOKU_STATE - * \return void - * - *
- * Notes:
- *      (1) Prints either the initial array or the current state
- *          of the solution.
- * 
- */ -l_int32 -sudokuOutput(L_SUDOKU *sud, - l_int32 arraytype) -{ -l_int32 i, j; -l_int32 *array; - - PROCNAME("sudokuOutput"); - - if (!sud) - return ERROR_INT("sud not defined", procName, 1); - if (arraytype == L_SUDOKU_INIT) - array = sud->init; - else if (arraytype == L_SUDOKU_STATE) - array = sud->state; - else - return ERROR_INT("invalid arraytype", procName, 1); - - for (i = 0; i < 9; i++) { - for (j = 0; j < 9; j++) - lept_stderr("%d ", array[9 * i + j]); - lept_stderr("\n"); - } - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sudoku.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sudoku.h deleted file mode 100644 index 5abb7cbc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/sudoku.h +++ /dev/null @@ -1,77 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef SUDOKU_H_INCLUDED -#define SUDOKU_H_INCLUDED - -/*! - * \file sudoku.h - * - *
- *    The L_Sudoku holds all the information of the current state.
- *
- *    The input to sudokuCreate() is a file with any number of lines
- *    starting with '#', followed by 9 lines consisting of 9 numbers
- *    in each line.  These have the known values and use 0 for the unknowns.
- *    Blank lines are ignored.
- *
- *    The %locs array holds the indices of the unknowns, numbered
- *    left-to-right and top-to-bottom from 0 to 80.  The array size
- *    is initialized to %num.  %current is the index into the %locs
- *    array of the current guess: locs[current].
- *
- *    The %state array is used to determine the validity of each guess.
- *    It is of size 81, and is initialized by setting the unknowns to 0
- *    and the knowns to their input values.
- * 
- */ - -struct L_Sudoku -{ - l_int32 num; /*!< number of unknowns */ - l_int32 *locs; /*!< location of unknowns */ - l_int32 current; /*!< index into %locs of current location */ - l_int32 *init; /*!< initial state, with 0 representing */ - /*!< the unknowns */ - l_int32 *state; /*!< present state, including inits and */ - /*!< guesses of unknowns up to %current */ - l_int32 nguess; /*!< shows current number of guesses */ - l_int32 finished; /*!< set to 1 when solved */ - l_int32 failure; /*!< set to 1 if no solution is possible */ -}; -typedef struct L_Sudoku L_SUDOKU; - - - /*! For printing out array data */ -/*! Sudoku Output */ -enum { - L_SUDOKU_INIT = 0, - L_SUDOKU_STATE = 1 -}; - -#endif /* SUDOKU_H_INCLUDED */ - - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/textops.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/textops.c deleted file mode 100644 index 3e23c22b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/textops.c +++ /dev/null @@ -1,1129 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file textops.c - *
- *
- *    Font layout
- *       PIX             *pixAddSingleTextblock()
- *       PIX             *pixAddTextlines()
- *       l_int32          pixSetTextblock()
- *       l_int32          pixSetTextline()
- *       PIXA            *pixaAddTextNumber()
- *       PIXA            *pixaAddTextlines()
- *       l_int32          pixaAddPixWithText()
- *
- *    Text size estimation and partitioning
- *       SARRAY          *bmfGetLineStrings()
- *       NUMA            *bmfGetWordWidths()
- *       l_int32          bmfGetStringWidth()
- *
- *    Text splitting
- *       SARRAY          *splitStringToParagraphs()
- *       static l_int32   stringAllWhitespace()
- *       static l_int32   stringLeadingWhitespace()
- *
- *    This is a simple utility to put text on images.  One font and style
- *    is provided, with a variety of pt sizes.  For example, to put a
- *    line of green 10 pt text on an image, with the beginning baseline
- *    at (50, 50):
- *        L_Bmf  *bmf = bmfCreate(NULL, 10);
- *        const char *textstr = "This is a funny cat";
- *        pixSetTextline(pixs, bmf, textstr, 0x00ff0000, 50, 50, NULL, NULL);
- *
- *    The simplest interfaces for adding text to an image are
- *    pixAddTextlines() and pixAddSingleTextblock().
- *    For example, to add the same text in red, centered, below the image:
- *        Pix *pixd = pixAddTextlines(pixs, bmf, textstr, 0xff000000,
- *                                    L_ADD_BELOW);  // red text
- *
- *    To add text to all pix in a pixa, generating a new pixa, use
- *    either an sarray to hold the strings for each pix, or use the
- *    strings in the text field of each pix; e.g.,
- *        Pixa *pixa2 = pixaAddTextlines(pixa1, bmf, sa, 0x0000ff00,
- *                                    L_ADD_LEFT);  // blue text
- *        Pixa *pixa2 = pixaAddTextlines(pixa1, bmf, NULL, 0x00ff0000,
- *                                    L_ADD_RIGHT);  // green text
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static l_int32 stringAllWhitespace(char *textstr, l_int32 *pval); -static l_int32 stringLeadingWhitespace(char *textstr, l_int32 *pval); - - -/*---------------------------------------------------------------------* - * Font layout * - *---------------------------------------------------------------------*/ -/*! - * \brief pixAddSingleTextblock() - * - * \param[in] pixs input pix; colormap ok - * \param[in] bmf bitmap font data - * \param[in] textstr [optional] text string to be added - * \param[in] val color to set the text - * \param[in] location L_ADD_ABOVE, L_ADD_AT_TOP, - * L_ADD_AT_BOT, L_ADD_BELOW - * \param[out] poverflow [optional] 1 if text overflows allocated - * region and is clipped; 0 otherwise - * \return pixd new pix with rendered text, or either a copy, - * or NULL on error - * - *
- * Notes:
- *      (1) This function paints a set of lines of text over an image.
- *          If %location is L_ADD_ABOVE or L_ADD_BELOW, the pix size
- *          is expanded with a border and rendered over the border.
- *      (2) %val is the pixel value to be painted through the font mask.
- *          It should be chosen to agree with the depth of pixs.
- *          If it is out of bounds, an intermediate value is chosen.
- *          For RGB, use hex notation: 0xRRGGBB00, where RR is the
- *          hex representation of the red intensity, etc.
- *      (3) If textstr == NULL, use the text field in the pix.
- *      (4) If there is a colormap, this does the best it can to use
- *          the requested color, or something similar to it.
- *      (5) Typical usage is for labelling a pix with some text data.
- * 
- */ -PIX * -pixAddSingleTextblock(PIX *pixs, - L_BMF *bmf, - const char *textstr, - l_uint32 val, - l_int32 location, - l_int32 *poverflow) -{ -char *linestr; -l_int32 w, h, d, i, y, xstart, ystart, extra, spacer, rval, gval, bval; -l_int32 nlines, htext, ovf, overflow, offset, index; -l_uint32 textcolor; -PIX *pixd; -PIXCMAP *cmap, *cmapd; -SARRAY *salines; - - PROCNAME("pixAddSingleTextblock"); - - if (poverflow) *poverflow = 0; - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (location != L_ADD_ABOVE && location != L_ADD_AT_TOP && - location != L_ADD_AT_BOT && location != L_ADD_BELOW) - return (PIX *)ERROR_PTR("invalid location", procName, NULL); - if (!bmf) { - L_ERROR("no bitmap fonts; returning a copy\n", procName); - return pixCopy(NULL, pixs); - } - if (!textstr) - textstr = pixGetText(pixs); - if (!textstr) { - L_WARNING("no textstring defined; returning a copy\n", procName); - return pixCopy(NULL, pixs); - } - - /* Make sure the "color" value for the text will work - * for the pix. If the pix is not colormapped and the - * value is out of range, set it to mid-range. */ - pixGetDimensions(pixs, &w, &h, &d); - cmap = pixGetColormap(pixs); - if (d == 1 && val > 1) - val = 1; - else if (d == 2 && val > 3 && !cmap) - val = 2; - else if (d == 4 && val > 15 && !cmap) - val = 8; - else if (d == 8 && val > 0xff && !cmap) - val = 128; - else if (d == 16 && val > 0xffff) - val = 0x8000; - else if (d == 32 && val < 256) - val = 0x80808000; - - xstart = (l_int32)(0.1 * w); - salines = bmfGetLineStrings(bmf, textstr, w - 2 * xstart, 0, &htext); - if (!salines) - return (PIX *)ERROR_PTR("line string sa not made", procName, NULL); - nlines = sarrayGetCount(salines); - - /* Add white border if required */ - spacer = 10; /* pixels away from image boundary or added border */ - if (location == L_ADD_ABOVE || location == L_ADD_BELOW) { - extra = htext + 2 * spacer; - pixd = pixCreate(w, h + extra, d); - pixCopyColormap(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixCopyText(pixd, pixs); - pixSetBlackOrWhite(pixd, L_BRING_IN_WHITE); - if (location == L_ADD_ABOVE) - pixRasterop(pixd, 0, extra, w, h, PIX_SRC, pixs, 0, 0); - else /* add below */ - pixRasterop(pixd, 0, 0, w, h, PIX_SRC, pixs, 0, 0); - } else { - pixd = pixCopy(NULL, pixs); - } - cmapd = pixGetColormap(pixd); - - /* bmf->baselinetab[93] is the approximate distance from - * the top of the tallest character to the baseline. 93 was chosen - * at random, as all the baselines are essentially equal for - * each character in a font. */ - offset = bmf->baselinetab[93]; - if (location == L_ADD_ABOVE || location == L_ADD_AT_TOP) - ystart = offset + spacer; - else if (location == L_ADD_AT_BOT) - ystart = h - htext - spacer + offset; - else /* add below */ - ystart = h + offset + spacer; - - /* If cmapped, add the color if necessary to the cmap. If the - * cmap is full, use the nearest color to the requested color. */ - if (cmapd) { - extractRGBValues(val, &rval, &gval, &bval); - pixcmapAddNearestColor(cmapd, rval, gval, bval, &index); - pixcmapGetColor(cmapd, index, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, &textcolor); - } else { - textcolor = val; - } - - /* Keep track of overflow condition on line width */ - overflow = 0; - for (i = 0, y = ystart; i < nlines; i++) { - linestr = sarrayGetString(salines, i, L_NOCOPY); - pixSetTextline(pixd, bmf, linestr, textcolor, - xstart, y, NULL, &ovf); - y += bmf->lineheight + bmf->vertlinesep; - if (ovf) - overflow = 1; - } - - /* Also consider vertical overflow where there is too much text to - * fit inside the image: the cases L_ADD_AT_TOP and L_ADD_AT_BOT. - * The text requires a total of htext + 2 * spacer vertical pixels. */ - if (location == L_ADD_AT_TOP || location == L_ADD_AT_BOT) { - if (h < htext + 2 * spacer) - overflow = 1; - } - if (poverflow) *poverflow = overflow; - - sarrayDestroy(&salines); - return pixd; -} - - -/*! - * \brief pixAddTextlines() - * - * \param[in] pixs input pix; colormap ok - * \param[in] bmf bitmap font data - * \param[in] textstr [optional] text string to be added - * \param[in] val color to set the text - * \param[in] location L_ADD_ABOVE, L_ADD_BELOW, L_ADD_LEFT, L_ADD_RIGHT - * \return pixd new pix with rendered text, or either a copy, - * or NULL on error - * - *
- * Notes:
- *      (1) This function expands an image as required to paint one or
- *          more lines of text adjacent to the image.  If %bmf == NULL,
- *          this returns a copy.  If above or below, the lines are
- *          centered with respect to the image; if left or right, they
- *          are left justified.
- *      (2) %val is the pixel value to be painted through the font mask.
- *          It should be chosen to agree with the depth of pixs.
- *          If it is out of bounds, an intermediate value is chosen.
- *          For RGB, use hex notation: 0xRRGGBB00, where RR is the
- *          hex representation of the red intensity, etc.
- *      (3) If textstr == NULL, use the text field in the pix.  The
- *          text field contains one or most "lines" of text, where newlines
- *          are used as line separators.
- *      (4) If there is a colormap, this does the best it can to use
- *          the requested color, or something similar to it.
- *      (5) Typical usage is for labelling a pix with some text data.
- * 
- */ -PIX * -pixAddTextlines(PIX *pixs, - L_BMF *bmf, - const char *textstr, - l_uint32 val, - l_int32 location) -{ -char *str; -l_int32 i, w, h, d, rval, gval, bval, index; -l_int32 wline, wtext, htext, wadd, hadd, spacer, hbaseline, nlines; -l_uint32 textcolor; -PIX *pixd; -PIXCMAP *cmap, *cmapd; -SARRAY *sa; - - PROCNAME("pixAddTextlines"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (location != L_ADD_ABOVE && location != L_ADD_BELOW && - location != L_ADD_LEFT && location != L_ADD_RIGHT) - return (PIX *)ERROR_PTR("invalid location", procName, NULL); - if (!bmf) { - L_ERROR("no bitmap fonts; returning a copy\n", procName); - return pixCopy(NULL, pixs); - } - if (!textstr) { - textstr = pixGetText(pixs); - if (!textstr) { - L_WARNING("no textstring defined; returning a copy\n", procName); - return pixCopy(NULL, pixs); - } - } - - /* Make sure the "color" value for the text will work - * for the pix. If the pix is not colormapped and the - * value is out of range, set it to mid-range. */ - pixGetDimensions(pixs, &w, &h, &d); - cmap = pixGetColormap(pixs); - if (d == 1 && val > 1) - val = 1; - else if (d == 2 && val > 3 && !cmap) - val = 2; - else if (d == 4 && val > 15 && !cmap) - val = 8; - else if (d == 8 && val > 0xff && !cmap) - val = 128; - else if (d == 16 && val > 0xffff) - val = 0x8000; - else if (d == 32 && val < 256) - val = 0x80808000; - - /* Get the text in each line */ - sa = sarrayCreateLinesFromString(textstr, 0); - nlines = sarrayGetCount(sa); - - /* Get the necessary text size */ - wtext = 0; - for (i = 0; i < nlines; i++) { - str = sarrayGetString(sa, i, L_NOCOPY); - bmfGetStringWidth(bmf, str, &wline); - if (wline > wtext) - wtext = wline; - } - hbaseline = bmf->baselinetab[93]; - htext = 1.5 * hbaseline * nlines; - - /* Add white border */ - spacer = 10; /* pixels away from the added border */ - if (location == L_ADD_ABOVE || location == L_ADD_BELOW) { - hadd = htext + 2 * spacer; - pixd = pixCreate(w, h + hadd, d); - pixCopyColormap(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixCopyText(pixd, pixs); - pixSetBlackOrWhite(pixd, L_BRING_IN_WHITE); - if (location == L_ADD_ABOVE) - pixRasterop(pixd, 0, hadd, w, h, PIX_SRC, pixs, 0, 0); - else /* add below */ - pixRasterop(pixd, 0, 0, w, h, PIX_SRC, pixs, 0, 0); - } else { /* L_ADD_LEFT or L_ADD_RIGHT */ - wadd = wtext + 2 * spacer; - pixd = pixCreate(w + wadd, h, d); - pixCopyColormap(pixd, pixs); - pixCopyResolution(pixd, pixs); - pixCopyText(pixd, pixs); - pixSetBlackOrWhite(pixd, L_BRING_IN_WHITE); - if (location == L_ADD_LEFT) - pixRasterop(pixd, wadd, 0, w, h, PIX_SRC, pixs, 0, 0); - else /* add to right */ - pixRasterop(pixd, 0, 0, w, h, PIX_SRC, pixs, 0, 0); - } - - /* If cmapped, add the color if necessary to the cmap. If the - * cmap is full, use the nearest color to the requested color. */ - cmapd = pixGetColormap(pixd); - if (cmapd) { - extractRGBValues(val, &rval, &gval, &bval); - pixcmapAddNearestColor(cmapd, rval, gval, bval, &index); - pixcmapGetColor(cmapd, index, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, &textcolor); - } else { - textcolor = val; - } - - /* Add the text */ - for (i = 0; i < nlines; i++) { - str = sarrayGetString(sa, i, L_NOCOPY); - bmfGetStringWidth(bmf, str, &wtext); - if (location == L_ADD_ABOVE) - pixSetTextline(pixd, bmf, str, textcolor, - (w - wtext) / 2, spacer + hbaseline * (1 + 1.5 * i), - NULL, NULL); - else if (location == L_ADD_BELOW) - pixSetTextline(pixd, bmf, str, textcolor, - (w - wtext) / 2, h + spacer + - hbaseline * (1 + 1.5 * i), NULL, NULL); - else if (location == L_ADD_LEFT) - pixSetTextline(pixd, bmf, str, textcolor, - spacer, (h - htext) / 2 + hbaseline * (1 + 1.5 * i), - NULL, NULL); - else /* location == L_ADD_RIGHT */ - pixSetTextline(pixd, bmf, str, textcolor, - w + spacer, (h - htext) / 2 + - hbaseline * (1 + 1.5 * i), NULL, NULL); - } - - sarrayDestroy(&sa); - return pixd; -} - - -/*! - * \brief pixSetTextblock() - * - * \param[in] pixs input image - * \param[in] bmf bitmap font data - * \param[in] textstr block text string to be set - * \param[in] val color to set the text - * \param[in] x0 left edge for each line of text - * \param[in] y0 baseline location for the first text line - * \param[in] wtext max width of each line of generated text - * \param[in] firstindent indentation of first line, in x-widths - * \param[out] poverflow [optional] 0 if text is contained in input pix; - * 1 if it is clipped - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function paints a set of lines of text over an image.
- *      (2) %val is the pixel value to be painted through the font mask.
- *          It should be chosen to agree with the depth of pixs.
- *          If it is out of bounds, an intermediate value is chosen.
- *          For RGB, use hex notation: 0xRRGGBB00, where RR is the
- *          hex representation of the red intensity, etc.
- *          The last two hex digits are 00 (byte value 0), assigned to
- *          the A component.  Note that, as usual, RGBA proceeds from
- *          left to right in the order from MSB to LSB (see pix.h
- *          for details).
- *      (3) If there is a colormap, this does the best it can to use
- *          the requested color, or something similar to it.
- * 
- */ -l_ok -pixSetTextblock(PIX *pixs, - L_BMF *bmf, - const char *textstr, - l_uint32 val, - l_int32 x0, - l_int32 y0, - l_int32 wtext, - l_int32 firstindent, - l_int32 *poverflow) -{ -char *linestr; -l_int32 d, h, i, w, x, y, nlines, htext, xwidth, wline, ovf, overflow; -SARRAY *salines; -PIXCMAP *cmap; - - PROCNAME("pixSetTextblock"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!bmf) - return ERROR_INT("bmf not defined", procName, 1); - if (!textstr) - return ERROR_INT("textstr not defined", procName, 1); - - /* Make sure the "color" value for the text will work - * for the pix. If the pix is not colormapped and the - * value is out of range, set it to mid-range. */ - pixGetDimensions(pixs, &w, &h, &d); - cmap = pixGetColormap(pixs); - if (d == 1 && val > 1) - val = 1; - else if (d == 2 && val > 3 && !cmap) - val = 2; - else if (d == 4 && val > 15 && !cmap) - val = 8; - else if (d == 8 && val > 0xff && !cmap) - val = 128; - else if (d == 16 && val > 0xffff) - val = 0x8000; - else if (d == 32 && val < 256) - val = 0x80808000; - - if (w < x0 + wtext) { - L_WARNING("reducing width of textblock\n", procName); - wtext = w - x0 - w / 10; - if (wtext <= 0) - return ERROR_INT("wtext too small; no room for text", procName, 1); - } - - salines = bmfGetLineStrings(bmf, textstr, wtext, firstindent, &htext); - if (!salines) - return ERROR_INT("line string sa not made", procName, 1); - nlines = sarrayGetCount(salines); - bmfGetWidth(bmf, 'x', &xwidth); - - y = y0; - overflow = 0; - for (i = 0; i < nlines; i++) { - if (i == 0) - x = x0 + firstindent * xwidth; - else - x = x0; - linestr = sarrayGetString(salines, i, L_NOCOPY); - pixSetTextline(pixs, bmf, linestr, val, x, y, &wline, &ovf); - y += bmf->lineheight + bmf->vertlinesep; - if (ovf) - overflow = 1; - } - - /* (y0 - baseline) is the top of the printed text. Character - * 93 was chosen at random, as all the baselines are essentially - * equal for each character in a font. */ - if (h < y0 - bmf->baselinetab[93] + htext) - overflow = 1; - if (poverflow) - *poverflow = overflow; - - sarrayDestroy(&salines); - return 0; -} - - -/*! - * \brief pixSetTextline() - * - * \param[in] pixs input image - * \param[in] bmf bitmap font data - * \param[in] textstr text string to be set on the line - * \param[in] val color to set the text - * \param[in] x0 left edge for first char - * \param[in] y0 baseline location for all text on line - * \param[out] pwidth [optional] width of generated text - * \param[out] poverflow [optional] 0 if text is contained in input pix; - * 1 if it is clipped - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This function paints a line of text over an image.
- *      (2) %val is the pixel value to be painted through the font mask.
- *          It should be chosen to agree with the depth of pixs.
- *          If it is out of bounds, an intermediate value is chosen.
- *          For RGB, use hex notation: 0xRRGGBB00, where RR is the
- *          hex representation of the red intensity, etc.
- *          The last two hex digits are 00 (byte value 0), assigned to
- *          the A component.  Note that, as usual, RGBA proceeds from
- *          left to right in the order from MSB to LSB (see pix.h
- *          for details).
- *      (3) If there is a colormap, this does the best it can to use
- *          the requested color, or something similar to it.
- * 
- */ -l_ok -pixSetTextline(PIX *pixs, - L_BMF *bmf, - const char *textstr, - l_uint32 val, - l_int32 x0, - l_int32 y0, - l_int32 *pwidth, - l_int32 *poverflow) -{ -char chr; -l_int32 d, i, x, w, nchar, baseline, index, rval, gval, bval; -l_uint32 textcolor; -PIX *pix; -PIXCMAP *cmap; - - PROCNAME("pixSetTextline"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!bmf) - return ERROR_INT("bmf not defined", procName, 1); - if (!textstr) - return ERROR_INT("teststr not defined", procName, 1); - - d = pixGetDepth(pixs); - cmap = pixGetColormap(pixs); - if (d == 1 && val > 1) - val = 1; - else if (d == 2 && val > 3 && !cmap) - val = 2; - else if (d == 4 && val > 15 && !cmap) - val = 8; - else if (d == 8 && val > 0xff && !cmap) - val = 128; - else if (d == 16 && val > 0xffff) - val = 0x8000; - else if (d == 32 && val < 256) - val = 0x80808000; - - /* If cmapped, add the color if necessary to the cmap. If the - * cmap is full, use the nearest color to the requested color. */ - if (cmap) { - extractRGBValues(val, &rval, &gval, &bval); - pixcmapAddNearestColor(cmap, rval, gval, bval, &index); - pixcmapGetColor(cmap, index, &rval, &gval, &bval); - composeRGBPixel(rval, gval, bval, &textcolor); - } else - textcolor = val; - - nchar = strlen(textstr); - x = x0; - for (i = 0; i < nchar; i++) { - chr = textstr[i]; - if ((l_int32)chr == 10) continue; /* NL */ - pix = bmfGetPix(bmf, chr); - bmfGetBaseline(bmf, chr, &baseline); - pixPaintThroughMask(pixs, pix, x, y0 - baseline, textcolor); - w = pixGetWidth(pix); - x += w + bmf->kernwidth; - pixDestroy(&pix); - } - - if (pwidth) - *pwidth = x - bmf->kernwidth - x0; - if (poverflow) - *poverflow = (x > pixGetWidth(pixs) - 1) ? 1 : 0; - return 0; -} - - -/*! - * \brief pixaAddTextNumber() - * - * \param[in] pixas input pixa; colormap ok - * \param[in] bmf bitmap font data - * \param[in] na [optional] number array; use 1 ... n if null - * \param[in] val color to set the text - * \param[in] location L_ADD_ABOVE, L_ADD_BELOW, L_ADD_LEFT, L_ADD_RIGHT - * \return pixad new pixa with rendered numbers, or NULL on error - * - *
- * Notes:
- *      (1) Typical usage is for labelling each pix in a pixa with a number.
- *      (2) This function paints numbers external to each pix, in a position
- *          given by %location.  In all cases, the pix is expanded on
- *          on side and the number is painted over white in the added region.
- *      (3) %val is the pixel value to be painted through the font mask.
- *          It should be chosen to agree with the depth of pixs.
- *          If it is out of bounds, an intermediate value is chosen.
- *          For RGB, use hex notation: 0xRRGGBB00, where RR is the
- *          hex representation of the red intensity, etc.
- *      (4) If na == NULL, number each pix sequentially, starting with 1.
- *      (5) If there is a colormap, this does the best it can to use
- *          the requested color, or something similar to it.
- * 
- */ -PIXA * -pixaAddTextNumber(PIXA *pixas, - L_BMF *bmf, - NUMA *na, - l_uint32 val, - l_int32 location) -{ -char textstr[128]; -l_int32 i, n, index; -PIX *pix1, *pix2; -PIXA *pixad; - - PROCNAME("pixaAddTextNumber"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (!bmf) - return (PIXA *)ERROR_PTR("bmf not defined", procName, NULL); - if (location != L_ADD_ABOVE && location != L_ADD_BELOW && - location != L_ADD_LEFT && location != L_ADD_RIGHT) - return (PIXA *)ERROR_PTR("invalid location", procName, NULL); - - n = pixaGetCount(pixas); - pixad = pixaCreate(n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - if (na) - numaGetIValue(na, i, &index); - else - index = i + 1; - snprintf(textstr, sizeof(textstr), "%d", index); - pix2 = pixAddTextlines(pix1, bmf, textstr, val, location); - pixaAddPix(pixad, pix2, L_INSERT); - pixDestroy(&pix1); - } - - return pixad; -} - - -/*! - * \brief pixaAddTextlines() - * - * \param[in] pixas input pixa; colormap ok - * \param[in] bmf bitmap font data - * \param[in] sa [optional] sarray; use text embedded in - * each pix if null - * \param[in] val color to set the text - * \param[in] location L_ADD_ABOVE, L_ADD_BELOW, L_ADD_LEFT, L_ADD_RIGHT - * \return pixad new pixa with rendered text, or NULL on error - * - *
- * Notes:
- *      (1) This function adds one or more lines of text externally to
- *          each pix, in a position given by %location.  In all cases,
- *          the pix is expanded as necessary to accommodate the text.
- *      (2) %val is the pixel value to be painted through the font mask.
- *          It should be chosen to agree with the depth of pixs.
- *          If it is out of bounds, an intermediate value is chosen.
- *          For RGB, use hex notation: 0xRRGGBB00, where RR is the
- *          hex representation of the red intensity, etc.
- *      (3) If sa == NULL, use the text embedded in each pix.  In all
- *          cases, newlines in the text string are used to separate the
- *          lines of text that are added to the pix.
- *      (4) If sa has a smaller count than pixa, issue a warning
- *          and do not use any embedded text.
- *      (5) If there is a colormap, this does the best it can to use
- *          the requested color, or something similar to it.
- * 
- */ -PIXA * -pixaAddTextlines(PIXA *pixas, - L_BMF *bmf, - SARRAY *sa, - l_uint32 val, - l_int32 location) -{ -char *textstr; -l_int32 i, n, nstr; -PIX *pix1, *pix2; -PIXA *pixad; - - PROCNAME("pixaAddTextlines"); - - if (!pixas) - return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); - if (!bmf) - return (PIXA *)ERROR_PTR("bmf not defined", procName, NULL); - if (location != L_ADD_ABOVE && location != L_ADD_BELOW && - location != L_ADD_LEFT && location != L_ADD_RIGHT) - return (PIXA *)ERROR_PTR("invalid location", procName, NULL); - - n = pixaGetCount(pixas); - pixad = pixaCreate(n); - nstr = (sa) ? sarrayGetCount(sa) : 0; - if (nstr > 0 && nstr < n) - L_WARNING("There are %d strings and %d pix\n", procName, nstr, n); - for (i = 0; i < n; i++) { - pix1 = pixaGetPix(pixas, i, L_CLONE); - if (i < nstr) - textstr = sarrayGetString(sa, i, L_NOCOPY); - else - textstr = pixGetText(pix1); - pix2 = pixAddTextlines(pix1, bmf, textstr, val, location); - pixaAddPix(pixad, pix2, L_INSERT); - pixDestroy(&pix1); - } - - return pixad; -} - - -/*! - * \brief pixaAddPixWithText() - * - * \param[in] pixa - * \param[in] pixs any depth, colormap ok - * \param[in] reduction integer subsampling factor - * \param[in] bmf [optional] bitmap font data - * \param[in] textstr [optional] text string to be added - * \param[in] val color to set the text - * \param[in] location L_ADD_ABOVE, L_ADD_BELOW, L_ADD_LEFT, L_ADD_RIGHT - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) This function generates a new pix with added text, and adds
- *          it by insertion into the pixa.
- *      (2) If the input pixs is not cmapped and not 32 bpp, it is
- *          converted to 32 bpp rgb.  %val is a standard 32 bpp pixel,
- *          expressed as 0xrrggbb00.  If there is a colormap, this does
- *          the best it can to use the requested color, or something close.
- *      (3) if %bmf == NULL, generate an 8 pt font; this takes about 5 msec.
- *      (4) If %textstr == NULL, use the text field in the pix.
- *      (5) In general, the text string can be written in multiple lines;
- *          use newlines as the separators.
- *      (6) Typical usage is for debugging, where the pixa of labeled images
- *          is used to generate a pdf.  Suggest using 1.0 for scalefactor.
- * 
- */ -l_ok -pixaAddPixWithText(PIXA *pixa, - PIX *pixs, - l_int32 reduction, - L_BMF *bmf, - const char *textstr, - l_uint32 val, - l_int32 location) -{ -l_int32 d; -L_BMF *bmf8; -PIX *pix1, *pix2, *pix3; -PIXCMAP *cmap; - - PROCNAME("pixaAddPixWithText"); - - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (location != L_ADD_ABOVE && location != L_ADD_BELOW && - location != L_ADD_LEFT && location != L_ADD_RIGHT) - return ERROR_INT("invalid location", procName, 1); - - if (!textstr) { - textstr = pixGetText(pixs); - if (!textstr) { - L_WARNING("no textstring defined; inserting copy", procName); - pixaAddPix(pixa, pixs, L_COPY); - return 0; - } - } - - /* Default font size is 8. */ - bmf8 = (bmf) ? bmf : bmfCreate(NULL, 8); - - if (reduction != 1) - pix1 = pixScaleByIntSampling(pixs, reduction); - else - pix1 = pixClone(pixs); - - /* We want the text to be rendered in color. This works - * automatically if pixs is cmapped or 32 bpp rgb; otherwise, - * we need to convert to rgb. */ - cmap = pixGetColormap(pix1); - d = pixGetDepth(pix1); - if (!cmap && d != 32) - pix2 = pixConvertTo32(pix1); - else - pix2 = pixClone(pix1); - - pix3 = pixAddTextlines(pix2, bmf, textstr, val, location); - pixDestroy(&pix1); - pixDestroy(&pix2); - if (!bmf) bmfDestroy(&bmf8); - if (!pix3) - return ERROR_INT("pix3 not made", procName, 1); - - pixaAddPix(pixa, pix3, L_INSERT); - return 0; -} - - -/*---------------------------------------------------------------------* - * Text size estimation and partitioning * - *---------------------------------------------------------------------*/ -/*! - * \brief bmfGetLineStrings() - * - * \param[in] bmf - * \param[in] textstr - * \param[in] maxw max width of a text line in pixels - * \param[in] firstindent indentation of first line, in x-widths - * \param[out] ph height required to hold text bitmap - * \return sarray of text strings for each line, or NULL on error - * - *
- * Notes:
- *      (1) Divides the input text string into an array of text strings,
- *          each of which will fit within maxw bits of width.
- * 
- */ -SARRAY * -bmfGetLineStrings(L_BMF *bmf, - const char *textstr, - l_int32 maxw, - l_int32 firstindent, - l_int32 *ph) -{ -char *linestr; -l_int32 i, ifirst, sumw, newsum, w, nwords, nlines, len, xwidth; -NUMA *na; -SARRAY *sa, *sawords; - - PROCNAME("bmfGetLineStrings"); - - if (!bmf) - return (SARRAY *)ERROR_PTR("bmf not defined", procName, NULL); - if (!textstr) - return (SARRAY *)ERROR_PTR("teststr not defined", procName, NULL); - - if ((sawords = sarrayCreateWordsFromString(textstr)) == NULL) - return (SARRAY *)ERROR_PTR("sawords not made", procName, NULL); - - if ((na = bmfGetWordWidths(bmf, textstr, sawords)) == NULL) { - sarrayDestroy(&sawords); - return (SARRAY *)ERROR_PTR("na not made", procName, NULL); - } - nwords = numaGetCount(na); - if (nwords == 0) { - sarrayDestroy(&sawords); - numaDestroy(&na); - return (SARRAY *)ERROR_PTR("no words in textstr", procName, NULL); - } - bmfGetWidth(bmf, 'x', &xwidth); - - sa = sarrayCreate(0); - ifirst = 0; - numaGetIValue(na, 0, &w); - sumw = firstindent * xwidth + w; - for (i = 1; i < nwords; i++) { - numaGetIValue(na, i, &w); - newsum = sumw + bmf->spacewidth + w; - if (newsum > maxw) { - linestr = sarrayToStringRange(sawords, ifirst, i - ifirst, 2); - if (!linestr) - continue; - len = strlen(linestr); - if (len > 0) /* it should always be */ - linestr[len - 1] = '\0'; /* remove the last space */ - sarrayAddString(sa, linestr, L_INSERT); - ifirst = i; - sumw = w; - } - else - sumw += bmf->spacewidth + w; - } - linestr = sarrayToStringRange(sawords, ifirst, nwords - ifirst, 2); - if (linestr) - sarrayAddString(sa, linestr, L_INSERT); - nlines = sarrayGetCount(sa); - *ph = nlines * bmf->lineheight + (nlines - 1) * bmf->vertlinesep; - - sarrayDestroy(&sawords); - numaDestroy(&na); - return sa; -} - - -/*! - * \brief bmfGetWordWidths() - * - * \param[in] bmf - * \param[in] textstr - * \param[in] sa of individual words - * \return numa of word lengths in pixels for the font represented - * by the bmf, or NULL on error - */ -NUMA * -bmfGetWordWidths(L_BMF *bmf, - const char *textstr, - SARRAY *sa) -{ -char *wordstr; -l_int32 i, nwords, width; -NUMA *na; - - PROCNAME("bmfGetWordWidths"); - - if (!bmf) - return (NUMA *)ERROR_PTR("bmf not defined", procName, NULL); - if (!textstr) - return (NUMA *)ERROR_PTR("teststr not defined", procName, NULL); - if (!sa) - return (NUMA *)ERROR_PTR("sa not defined", procName, NULL); - - nwords = sarrayGetCount(sa); - if ((na = numaCreate(nwords)) == NULL) - return (NUMA *)ERROR_PTR("na not made", procName, NULL); - - for (i = 0; i < nwords; i++) { - wordstr = sarrayGetString(sa, i, L_NOCOPY); - bmfGetStringWidth(bmf, wordstr, &width); - numaAddNumber(na, width); - } - - return na; -} - - -/*! - * \brief bmfGetStringWidth() - * - * \param[in] bmf - * \param[in] textstr - * \param[out] pw width of text string, in pixels for the - * font represented by the bmf - * \return 0 if OK, 1 on error - */ -l_ok -bmfGetStringWidth(L_BMF *bmf, - const char *textstr, - l_int32 *pw) -{ -char chr; -l_int32 i, w, width, nchar; - - PROCNAME("bmfGetStringWidth"); - - if (!bmf) - return ERROR_INT("bmf not defined", procName, 1); - if (!textstr) - return ERROR_INT("teststr not defined", procName, 1); - if (!pw) - return ERROR_INT("&w not defined", procName, 1); - - nchar = strlen(textstr); - w = 0; - for (i = 0; i < nchar; i++) { - chr = textstr[i]; - bmfGetWidth(bmf, chr, &width); - if (width != UNDEF) - w += width + bmf->kernwidth; - } - w -= bmf->kernwidth; /* remove last one */ - - *pw = w; - return 0; -} - - - -/*---------------------------------------------------------------------* - * Text splitting * - *---------------------------------------------------------------------*/ -/*! - * \brief splitStringToParagraphs() - * - * \param[in] textstr text string - * \param[in] splitflag see enum in bmf.h; valid values in {1,2,3} - * \return sarray where each string is a paragraph of the input, - * or NULL on error. - */ -SARRAY * -splitStringToParagraphs(char *textstr, - l_int32 splitflag) -{ -char *linestr, *parastring; -l_int32 nlines, i, allwhite, leadwhite; -SARRAY *salines, *satemp, *saout; - - PROCNAME("splitStringToParagraphs"); - - if (!textstr) - return (SARRAY *)ERROR_PTR("textstr not defined", procName, NULL); - - if ((salines = sarrayCreateLinesFromString(textstr, 1)) == NULL) - return (SARRAY *)ERROR_PTR("salines not made", procName, NULL); - nlines = sarrayGetCount(salines); - saout = sarrayCreate(0); - satemp = sarrayCreate(0); - - linestr = sarrayGetString(salines, 0, L_NOCOPY); - sarrayAddString(satemp, linestr, L_COPY); - for (i = 1; i < nlines; i++) { - linestr = sarrayGetString(salines, i, L_NOCOPY); - stringAllWhitespace(linestr, &allwhite); - stringLeadingWhitespace(linestr, &leadwhite); - if ((splitflag == SPLIT_ON_LEADING_WHITE && leadwhite) || - (splitflag == SPLIT_ON_BLANK_LINE && allwhite) || - (splitflag == SPLIT_ON_BOTH && (allwhite || leadwhite))) { - parastring = sarrayToString(satemp, 1); /* add nl to each line */ - sarrayAddString(saout, parastring, L_INSERT); - sarrayDestroy(&satemp); - satemp = sarrayCreate(0); - } - sarrayAddString(satemp, linestr, L_COPY); - } - parastring = sarrayToString(satemp, 1); /* add nl to each line */ - sarrayAddString(saout, parastring, L_INSERT); - sarrayDestroy(&satemp); - sarrayDestroy(&salines); - return saout; -} - - -/*! - * \brief stringAllWhitespace() - * - * \param[in] textstr text string - * \param[out] pval 1 if all whitespace; 0 otherwise - * \return 0 if OK, 1 on error - */ -static l_int32 -stringAllWhitespace(char *textstr, - l_int32 *pval) -{ -l_int32 len, i; - - PROCNAME("stringAllWhitespace"); - - if (!textstr) - return ERROR_INT("textstr not defined", procName, 1); - if (!pval) - return ERROR_INT("&va not defined", procName, 1); - - len = strlen(textstr); - *pval = 1; - for (i = 0; i < len; i++) { - if (textstr[i] != ' ' && textstr[i] != '\t' && textstr[i] != '\n') { - *pval = 0; - return 0; - } - } - return 0; -} - - -/*! - * \brief stringLeadingWhitespace() - * - * \param[in] textstr text string - * \param[out] pval 1 if leading char is [space] or [tab]; 0 otherwise - * \return 0 if OK, 1 on error - */ -static l_int32 -stringLeadingWhitespace(char *textstr, - l_int32 *pval) -{ - PROCNAME("stringLeadingWhitespace"); - - if (!textstr) - return ERROR_INT("textstr not defined", procName, 1); - if (!pval) - return ERROR_INT("&va not defined", procName, 1); - - *pval = 0; - if (textstr[0] == ' ' || textstr[0] == '\t') - *pval = 1; - - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/tiffio.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/tiffio.c deleted file mode 100644 index 9c781ec4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/tiffio.c +++ /dev/null @@ -1,2851 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file tiffio.c - *
- *
- *     TIFFClientOpen() wrappers for FILE*:
- *      static tsize_t    lept_read_proc()
- *      static tsize_t    lept_write_proc()
- *      static toff_t     lept_seek_proc()
- *      static int        lept_close_proc()
- *      static toff_t     lept_size_proc()
- *
- *     Reading tiff:
- *             PIX       *pixReadTiff()             [ special top level ]
- *             PIX       *pixReadStreamTiff()
- *      static PIX       *pixReadFromTiffStream()
- *
- *     Writing tiff:
- *             l_int32    pixWriteTiff()            [ special top level ]
- *             l_int32    pixWriteTiffCustom()      [ special top level ]
- *             l_int32    pixWriteStreamTiff()
- *             l_int32    pixWriteStreamTiffWA()
- *      static l_int32    pixWriteToTiffStream()
- *      static l_int32    writeCustomTiffTags()
- *
- *     Reading and writing multipage tiff
- *             PIX       *pixReadFromMultipageTiff()
- *             PIXA      *pixaReadMultipageTiff()   [ special top level ]
- *             l_int32    pixaWriteMultipageTiff()  [ special top level ]
- *             l_int32    writeMultipageTiff()      [ special top level ]
- *             l_int32    writeMultipageTiffSA()
- *
- *     Information about tiff file
- *             l_int32    fprintTiffInfo()
- *             l_int32    tiffGetCount()
- *             l_int32    getTiffResolution()
- *      static l_int32    getTiffStreamResolution()
- *             l_int32    readHeaderTiff()
- *             l_int32    freadHeaderTiff()
- *             l_int32    readHeaderMemTiff()
- *      static l_int32    tiffReadHeaderTiff()
- *             l_int32    findTiffCompression()
- *      static l_int32    getTiffCompressedFormat()
- *
- *     Extraction of tiff g4 data:
- *             l_int32    extractG4DataFromFile()
- *
- *     Open tiff stream from file stream
- *      static TIFF      *fopenTiff()
- *
- *     Wrapper for TIFFOpen:
- *      static TIFF      *openTiff()
- *
- *     Memory I/O: reading memory --> pix and writing pix --> memory
- *             [10 static helper functions]
- *             PIX       *pixReadMemTiff();
- *             PIX       *pixReadMemFromMultipageTiff();
- *             PIXA      *pixaReadMemMultipageTiff()    [ special top level ]
- *             l_int32    pixaWriteMemMultipageTiff()   [ special top level ]
- *             l_int32    pixWriteMemTiff();
- *             l_int32    pixWriteMemTiffCustom();
- *
- *  Note:  To include all necessary functions, use libtiff version 3.7.4
- *         (or later)
- *  Note:  On Windows with 2 bpp or 4 bpp images, the bytes in the
- *         tiff-compressed file depend on the pad bits (but not the
- *         decoded raster image when read).  Because it is sometimes
- *         convenient to use a golden file with a byte-by-byte check
- *         to verify invariance, we set the pad bits to 0 before writing,
- *         in pixWriteToTiffStream().
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include /* for isnan */ -#include -#ifndef _MSC_VER -#include -#else /* _MSC_VER */ -#include -#endif /* _MSC_VER */ -#include -#include "allheaders.h" - -/* --------------------------------------------*/ -#if HAVE_LIBTIFF /* defined in environ.h */ -/* --------------------------------------------*/ - -#include "tiff.h" -#include "tiffio.h" - -static const l_int32 DefaultResolution = 300; /* ppi */ -static const l_int32 ManyPagesInTiffFile = 3000; /* warn if big */ -static const l_uint32 MaxTiffBufferSize = 1 << 24; /* 16MiB */ - - - /* All functions with TIFF interfaces are static. */ -static PIX *pixReadFromTiffStream(TIFF *tif); -static l_int32 getTiffStreamResolution(TIFF *tif, l_int32 *pxres, - l_int32 *pyres); -static l_int32 tiffReadHeaderTiff(TIFF *tif, l_int32 *pwidth, - l_int32 *pheight, l_int32 *pbps, - l_int32 *pspp, l_int32 *pres, - l_int32 *pcmap, l_int32 *pformat); -static l_int32 writeCustomTiffTags(TIFF *tif, NUMA *natags, - SARRAY *savals, SARRAY *satypes, - NUMA *nasizes); -static l_int32 pixWriteToTiffStream(TIFF *tif, PIX *pix, l_int32 comptype, - NUMA *natags, SARRAY *savals, - SARRAY *satypes, NUMA *nasizes); -static TIFF *fopenTiff(FILE *fp, const char *modestring); -static TIFF *openTiff(const char *filename, const char *modestring); - - /* Static helper for tiff compression type */ -static l_int32 getTiffCompressedFormat(l_uint16 tiffcomp); - - /* Static function for memory I/O */ -static TIFF *fopenTiffMemstream(const char *filename, const char *operation, - l_uint8 **pdata, size_t *pdatasize); - - /* This structure defines a transform to be performed on a TIFF image - * (note that the same transformation can be represented in - * several different ways using this structure since - * vflip + hflip + counterclockwise == clockwise). */ -struct tiff_transform { - int vflip; /* if non-zero, image needs a vertical fip */ - int hflip; /* if non-zero, image needs a horizontal flip */ - int rotate; /* -1 -> counterclockwise 90-degree rotation, - 0 -> no rotation - 1 -> clockwise 90-degree rotation */ -}; - - /* This describes the transformations needed for a given orientation - * tag. The tag values start at 1, so you need to subtract 1 to get a - * valid index into this array. It is only valid when not using - * TIFFReadRGBAImageOriented(). */ -static struct tiff_transform tiff_orientation_transforms[] = { - {0, 0, 0}, - {0, 1, 0}, - {1, 1, 0}, - {1, 0, 0}, - {0, 1, -1}, - {0, 0, 1}, - {0, 1, 1}, - {0, 0, -1} -}; - - /* Same as above, except that test transformations are only valid - * when using TIFFReadRGBAImageOriented(). Transformations - * were determined empirically. See the libtiff mailing list for - * more discussion: http://www.asmail.be/msg0054683875.html */ -static struct tiff_transform tiff_partial_orientation_transforms[] = { - {0, 0, 0}, - {0, 0, 0}, - {0, 0, 0}, - {0, 0, 0}, - {0, 1, -1}, - {0, 1, 1}, - {1, 0, 1}, - {0, 1, -1} -}; - - -/*-----------------------------------------------------------------------* - * TIFFClientOpen() wrappers for FILE* * - * Provided by Jürgen Buchmüller * - * * - * We previously used TIFFFdOpen(), which used low-level file * - * descriptors. It had portability issues with Windows, along * - * with other limitations from lack of stream control operations. * - * These callbacks to TIFFClientOpen() avoid the problems. * - * * - * Jürgen made the functions use 64 bit file operations where possible * - * or required, namely for seek and size. On Windows there are specific * - * _fseeki64() and _ftelli64() functions. On unix it is common to look * - * for a macro _LARGEFILE64_SOURCE being defined, which makes available * - * the off64_t type, and to use fseeko() and ftello() in this case. * - *-----------------------------------------------------------------------*/ -static tsize_t -lept_read_proc(thandle_t cookie, - tdata_t buff, - tsize_t size) -{ - FILE* fp = (FILE *)cookie; - tsize_t done; - if (!buff || !cookie || !fp) - return (tsize_t)-1; - done = fread(buff, 1, size, fp); - return done; -} - -static tsize_t -lept_write_proc(thandle_t cookie, - tdata_t buff, - tsize_t size) -{ - FILE* fp = (FILE *)cookie; - tsize_t done; - if (!buff || !cookie || !fp) - return (tsize_t)-1; - done = fwrite(buff, 1, size, fp); - return done; -} - -static toff_t -lept_seek_proc(thandle_t cookie, - toff_t offs, - int whence) -{ - FILE* fp = (FILE *)cookie; -#if defined(_MSC_VER) - __int64 pos = 0; - if (!cookie || !fp) - return (tsize_t)-1; - switch (whence) { - case SEEK_SET: - pos = 0; - break; - case SEEK_CUR: - pos = ftell(fp); - break; - case SEEK_END: - _fseeki64(fp, 0, SEEK_END); - pos = _ftelli64(fp); - break; - } - pos = (__int64)(pos + offs); - _fseeki64(fp, pos, SEEK_SET); - if (pos == _ftelli64(fp)) - return (tsize_t)pos; -#elif defined(_LARGEFILE64_SOURCE) - off64_t pos = 0; - if (!cookie || !fp) - return (tsize_t)-1; - switch (whence) { - case SEEK_SET: - pos = 0; - break; - case SEEK_CUR: - pos = ftello(fp); - break; - case SEEK_END: - fseeko(fp, 0, SEEK_END); - pos = ftello(fp); - break; - } - pos = (off64_t)(pos + offs); - fseeko(fp, pos, SEEK_SET); - if (pos == ftello(fp)) - return (tsize_t)pos; -#else - off_t pos = 0; - if (!cookie || !fp) - return (tsize_t)-1; - switch (whence) { - case SEEK_SET: - pos = 0; - break; - case SEEK_CUR: - pos = ftell(fp); - break; - case SEEK_END: - fseek(fp, 0, SEEK_END); - pos = ftell(fp); - break; - } - pos = (off_t)(pos + offs); - fseek(fp, pos, SEEK_SET); - if (pos == ftell(fp)) - return (tsize_t)pos; -#endif - return (tsize_t)-1; -} - -static int -lept_close_proc(thandle_t cookie) -{ - FILE* fp = (FILE *)cookie; - if (!cookie || !fp) - return 0; - fseek(fp, 0, SEEK_SET); - return 0; -} - -static toff_t -lept_size_proc(thandle_t cookie) -{ - FILE* fp = (FILE *)cookie; -#if defined(_MSC_VER) - __int64 pos; - __int64 size; - if (!cookie || !fp) - return (tsize_t)-1; - pos = _ftelli64(fp); - _fseeki64(fp, 0, SEEK_END); - size = _ftelli64(fp); - _fseeki64(fp, pos, SEEK_SET); -#elif defined(_LARGEFILE64_SOURCE) - off64_t pos; - off64_t size; - if (!fp) - return (tsize_t)-1; - pos = ftello(fp); - fseeko(fp, 0, SEEK_END); - size = ftello(fp); - fseeko(fp, pos, SEEK_SET); -#else - off_t pos; - off_t size; - if (!cookie || !fp) - return (tsize_t)-1; - pos = ftell(fp); - fseek(fp, 0, SEEK_END); - size = ftell(fp); - fseek(fp, pos, SEEK_SET); -#endif - return (toff_t)size; -} - - -/*--------------------------------------------------------------* - * Reading from file * - *--------------------------------------------------------------*/ -/*! - * \brief pixReadTiff() - * - * \param[in] filename - * \param[in] n page number 0 based - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) This is a version of pixRead(), specialized for tiff
- *          files, that allows specification of the page to be returned
- *      (2) No warning messages on failure, because of how multi-page
- *          TIFF reading works. You are supposed to keep trying until
- *          it stops working.
- * 
- */ -PIX * -pixReadTiff(const char *filename, - l_int32 n) -{ -FILE *fp; -PIX *pix; - - PROCNAME("pixReadTiff"); - - if (!filename) - return (PIX *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (PIX *)ERROR_PTR("image file not found", procName, NULL); - pix = pixReadStreamTiff(fp, n); - fclose(fp); - return pix; -} - - -/*--------------------------------------------------------------* - * Reading from stream * - *--------------------------------------------------------------*/ -/*! - * \brief pixReadStreamTiff() - * - * \param[in] fp file stream - * \param[in] n page number: 0 based - * \return pix, or NULL on error or if there are no more images in the file - * - *
- * Notes:
- *      (1) No warning messages on failure, because of how multi-page
- *          TIFF reading works. You are supposed to keep trying until
- *          it stops working.
- * 
- */ -PIX * -pixReadStreamTiff(FILE *fp, - l_int32 n) -{ -PIX *pix; -TIFF *tif; - - PROCNAME("pixReadStreamTiff"); - - if (!fp) - return (PIX *)ERROR_PTR("stream not defined", procName, NULL); - - if ((tif = fopenTiff(fp, "r")) == NULL) - return (PIX *)ERROR_PTR("tif not opened", procName, NULL); - - if (TIFFSetDirectory(tif, n) == 0) { - TIFFCleanup(tif); - return NULL; - } - if ((pix = pixReadFromTiffStream(tif)) == NULL) { - TIFFCleanup(tif); - return NULL; - } - TIFFCleanup(tif); - return pix; -} - - -/*! - * \brief pixReadFromTiffStream() - * - * \param[in] tif TIFF handle - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) We can read the following images (up to 32 bits/pixel):
- *          1 spp (grayscale): 1, 2, 4, 8, 16 bps
- *          1 spp (colormapped): 1, 2, 4, 8 bps
- *          2 spp (gray+alpha): 8 bps
- *          3 spp (rgb) and 4 spp (rgba): 8 or 16 bps
- *      (2) We do not handle 16 bps for spp == 2.
- *      (3) 2 bpp gray+alpha are rasterized as 32 bit/pixel rgba, with
- *          the gray value replicated in r, g and b.
- *      (4) For colormapped images, we support 8 bits/color in the palette.
- *          Tiff colormaps have 16 bits/color, and we reduce them to 8.
- *      (5) Quoting the libtiff documentation at
- *               http://libtiff.maptools.org/libtiff.html
- *          "libtiff provides a high-level interface for reading image data
- *          from a TIFF file. This interface handles the details of data
- *          organization and format for a wide variety of TIFF files;
- *          at least the large majority of those files that one would
- *          normally encounter. Image data is, by default, returned as
- *          ABGR pixels packed into 32-bit words (8 bits per sample).
- *          Rectangular rasters can be read or data can be intercepted
- *          at an intermediate level and packed into memory in a format
- *          more suitable to the application. The library handles all
- *          the details of the format of data stored on disk and,
- *          in most cases, if any colorspace conversions are required:
- *          bilevel to RGB, greyscale to RGB, CMYK to RGB, YCbCr to RGB,
- *          16-bit samples to 8-bit samples, associated/unassociated alpha,
- *          etc."
- * 
- */ -static PIX * -pixReadFromTiffStream(TIFF *tif) -{ -char *text; -l_uint8 *linebuf, *data, *rowptr; -l_uint16 spp, bps, photometry, tiffcomp, orientation, sample_fmt; -l_uint16 *redmap, *greenmap, *bluemap; -l_int32 d, wpl, bpl, comptype, i, j, k, ncolors, rval, gval, bval, aval; -l_int32 xres, yres; -l_uint32 w, h, tiffbpl, tiffword, read_oriented; -l_uint32 *line, *ppixel, *tiffdata, *pixdata; -PIX *pix, *pix1; -PIXCMAP *cmap; - - PROCNAME("pixReadFromTiffStream"); - - if (!tif) - return (PIX *)ERROR_PTR("tif not defined", procName, NULL); - - read_oriented = 0; - - /* Only accept uint image data: - * SAMPLEFORMAT_UINT = 1; - * SAMPLEFORMAT_INT = 2; - * SAMPLEFORMAT_IEEEFP = 3; - * SAMPLEFORMAT_VOID = 4; */ - TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLEFORMAT, &sample_fmt); - if (sample_fmt != SAMPLEFORMAT_UINT) { - L_ERROR("sample format = %d is not uint\n", procName, sample_fmt); - return NULL; - } - - /* Can't read tiff in tiled format. For what is involved, see, e.g: - * https://www.cs.rochester.edu/~nelson/courses/vision/\ - * resources/tiff/libtiff.html#Tiles - * A tiled tiff can be converted to a normal (strip) tif: - * tiffcp -s */ - if (TIFFIsTiled(tif)) { - L_ERROR("tiled format is not supported\n", procName); - return NULL; - } - - /* Use default fields for bps and spp */ - TIFFGetFieldDefaulted(tif, TIFFTAG_BITSPERSAMPLE, &bps); - TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &spp); - if (bps != 1 && bps != 2 && bps != 4 && bps != 8 && bps != 16) { - L_ERROR("invalid bps = %d\n", procName, bps); - return NULL; - } - if (spp == 2 && bps != 8) { - L_WARNING("for 2 spp, only handle 8 bps\n", procName); - return NULL; - } - if (spp == 1) - d = bps; - else if (spp == 2) /* gray plus alpha */ - d = 32; /* will convert to RGBA */ - else if (spp == 3 || spp == 4) - d = 32; - else - return (PIX *)ERROR_PTR("spp not in set {1,2,3,4}", procName, NULL); - - TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &w); - TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &h); - tiffbpl = TIFFScanlineSize(tif); - if (tiffbpl < (bps * spp * w + 7) / 8) - return (PIX *)ERROR_PTR("bad tiff file: tiffbpl is too small", - procName, NULL); - if (tiffbpl > MaxTiffBufferSize) - return (PIX *)ERROR_PTR("bad tiff file: tiffbpl is too large", - procName, NULL); - - if ((pix = pixCreate(w, h, d)) == NULL) - return (PIX *)ERROR_PTR("pix not made", procName, NULL); - pixSetInputFormat(pix, IFF_TIFF); - data = (l_uint8 *)pixGetData(pix); - wpl = pixGetWpl(pix); - bpl = 4 * wpl; - - TIFFGetFieldDefaulted(tif, TIFFTAG_COMPRESSION, &tiffcomp); - - /* Thanks to Jeff Breidenbach, we now support reading 8 bpp - * images encoded in the long-deprecated old jpeg format, - * COMPRESSION_OJPEG. TIFFReadScanline() fails on this format, - * so we use RGBA reading, which generates a 4 spp image, and - * pull out the red component. */ - if (spp == 1 && tiffcomp != COMPRESSION_OJPEG) { - linebuf = (l_uint8 *)LEPT_CALLOC(tiffbpl + 1, sizeof(l_uint8)); - for (i = 0; i < h; i++) { - if (TIFFReadScanline(tif, linebuf, i, 0) < 0) { - LEPT_FREE(linebuf); - pixDestroy(&pix); - return (PIX *)ERROR_PTR("line read fail", procName, NULL); - } - memcpy(data, linebuf, tiffbpl); - data += bpl; - } - if (bps <= 8) - pixEndianByteSwap(pix); - else /* bps == 16 */ - pixEndianTwoByteSwap(pix); - LEPT_FREE(linebuf); - } else if (spp == 2 && bps == 8) { /* gray plus alpha */ - L_INFO("gray+alpha is not supported; converting to RGBA\n", procName); - pixSetSpp(pix, 4); - linebuf = (l_uint8 *)LEPT_CALLOC(tiffbpl + 1, sizeof(l_uint8)); - pixdata = pixGetData(pix); - for (i = 0; i < h; i++) { - if (TIFFReadScanline(tif, linebuf, i, 0) < 0) { - LEPT_FREE(linebuf); - pixDestroy(&pix); - return (PIX *)ERROR_PTR("line read fail", procName, NULL); - } - rowptr = linebuf; - ppixel = pixdata + i * wpl; - for (j = k = 0; j < w; j++) { - /* Copy gray value into r, g and b */ - SET_DATA_BYTE(ppixel, COLOR_RED, rowptr[k]); - SET_DATA_BYTE(ppixel, COLOR_GREEN, rowptr[k]); - SET_DATA_BYTE(ppixel, COLOR_BLUE, rowptr[k++]); - SET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL, rowptr[k++]); - ppixel++; - } - } - LEPT_FREE(linebuf); - } else { /* rgb, rgba, or old jpeg */ - if ((tiffdata = (l_uint32 *)LEPT_CALLOC((size_t)w * h, - sizeof(l_uint32))) == NULL) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("calloc fail for tiffdata", procName, NULL); - } - /* TIFFReadRGBAImageOriented() converts to 8 bps */ - if (!TIFFReadRGBAImageOriented(tif, w, h, tiffdata, - ORIENTATION_TOPLEFT, 0)) { - LEPT_FREE(tiffdata); - pixDestroy(&pix); - return (PIX *)ERROR_PTR("failed to read tiffdata", procName, NULL); - } else { - read_oriented = 1; - } - - if (spp == 1) { /* 8 bpp, old jpeg format */ - pixdata = pixGetData(pix); - for (i = 0; i < h; i++) { - line = pixdata + i * wpl; - for (j = 0; j < w; j++) { - tiffword = tiffdata[i * w + j]; - rval = TIFFGetR(tiffword); - SET_DATA_BYTE(line, j, rval); - } - } - } else { /* rgb or rgba */ - if (spp == 4) pixSetSpp(pix, 4); - line = pixGetData(pix); - for (i = 0; i < h; i++, line += wpl) { - for (j = 0, ppixel = line; j < w; j++) { - /* TIFFGet* are macros */ - tiffword = tiffdata[i * w + j]; - rval = TIFFGetR(tiffword); - gval = TIFFGetG(tiffword); - bval = TIFFGetB(tiffword); - if (spp == 3) { - composeRGBPixel(rval, gval, bval, ppixel); - } else { /* spp == 4 */ - aval = TIFFGetA(tiffword); - composeRGBAPixel(rval, gval, bval, aval, ppixel); - } - ppixel++; - } - } - } - LEPT_FREE(tiffdata); - } - - if (getTiffStreamResolution(tif, &xres, &yres) == 0) { - pixSetXRes(pix, xres); - pixSetYRes(pix, yres); - } - - /* Find and save the compression type */ - TIFFGetFieldDefaulted(tif, TIFFTAG_COMPRESSION, &tiffcomp); - comptype = getTiffCompressedFormat(tiffcomp); - pixSetInputFormat(pix, comptype); - - if (TIFFGetField(tif, TIFFTAG_COLORMAP, &redmap, &greenmap, &bluemap)) { - /* Save the colormap as a pix cmap. Because the - * tiff colormap components are 16 bit unsigned, - * and go from black (0) to white (0xffff), the - * the pix cmap takes the most significant byte. */ - if (bps > 8) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("colormap size > 256", procName, NULL); - } - if ((cmap = pixcmapCreate(bps)) == NULL) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("colormap not made", procName, NULL); - } - ncolors = 1 << bps; - for (i = 0; i < ncolors; i++) - pixcmapAddColor(cmap, redmap[i] >> 8, greenmap[i] >> 8, - bluemap[i] >> 8); - pixSetColormap(pix, cmap); - - /* Remove the colormap for 1 bpp. */ - if (bps == 1) { - pix1 = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC); - pixDestroy(&pix); - pix = pix1; - } - } else { /* No colormap: check photometry and invert if necessary */ - if (!TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &photometry)) { - /* Guess default photometry setting. Assume min_is_white - * if compressed 1 bpp; min_is_black otherwise. */ - if (tiffcomp == COMPRESSION_CCITTFAX3 || - tiffcomp == COMPRESSION_CCITTFAX4 || - tiffcomp == COMPRESSION_CCITTRLE || - tiffcomp == COMPRESSION_CCITTRLEW) { - photometry = PHOTOMETRIC_MINISWHITE; - } else { - photometry = PHOTOMETRIC_MINISBLACK; - } - } - if ((d == 1 && photometry == PHOTOMETRIC_MINISBLACK) || - (d == 8 && photometry == PHOTOMETRIC_MINISWHITE)) - pixInvert(pix, pix); - } - - if (TIFFGetField(tif, TIFFTAG_ORIENTATION, &orientation)) { - if (orientation >= 1 && orientation <= 8) { - struct tiff_transform *transform = (read_oriented) ? - &tiff_partial_orientation_transforms[orientation - 1] : - &tiff_orientation_transforms[orientation - 1]; - if (transform->vflip) pixFlipTB(pix, pix); - if (transform->hflip) pixFlipLR(pix, pix); - if (transform->rotate) { - PIX *oldpix = pix; - pix = pixRotate90(oldpix, transform->rotate); - pixDestroy(&oldpix); - } - } - } - - text = NULL; - TIFFGetField(tif, TIFFTAG_IMAGEDESCRIPTION, &text); - if (text) pixSetText(pix, text); - return pix; -} - - - -/*--------------------------------------------------------------* - * Writing to file * - *--------------------------------------------------------------*/ -/*! - * \brief pixWriteTiff() - * - * \param[in] filename to write to - * \param[in] pix any depth, colormap will be removed - * \param[in] comptype IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS, - * IFF_TIFF_G3, IFF_TIFF_G4, - * IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG - * \param[in] modestr "a" or "w" - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) For multipage tiff, write the first pix with mode "w" and
- *          all subsequent pix with mode "a".
- *      (2) For multipage tiff, there is considerable overhead in the
- *          machinery to append an image and add the directory entry,
- *          and the time required for each image increases linearly
- *          with the number of images in the file.
- * 
- */ -l_ok -pixWriteTiff(const char *filename, - PIX *pix, - l_int32 comptype, - const char *modestr) -{ - return pixWriteTiffCustom(filename, pix, comptype, modestr, - NULL, NULL, NULL, NULL); -} - - -/*! - * \brief pixWriteTiffCustom() - * - * \param[in] filename to write to - * \param[in] pix - * \param[in] comptype IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS, - * IFF_TIFF_G3, IFF_TIFF_G4, - * IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG - * \param[in] modestr "a" or "w" - * \param[in] natags [optional] NUMA of custom tiff tags - * \param[in] savals [optional] SARRAY of values - * \param[in] satypes [optional] SARRAY of types - * \param[in] nasizes [optional] NUMA of sizes - * \return 0 if OK, 1 on error - * - * Usage: - * 1 This writes a page image to a tiff file, with optional - * extra tags defined in tiff.h - * 2 For multipage tiff, write the first pix with mode "w" and - * all subsequent pix with mode "a". - * 3 For the custom tiff tags: - * a The three arrays {natags, savals, satypes} must all be - * either NULL or defined and of equal size. - * b If they are defined, the tags are an array of integers, - * the vals are an array of values in string format, and - * the types are an array of types in string format. - * c All valid tags are definined in tiff.h. - * d The types allowed are the set of strings: - * "char*" - * "l_uint8*" - * "l_uint16" - * "l_uint32" - * "l_int32" - * "l_float64" - * "l_uint16-l_uint16" note the dash; use it between the - * two l_uint16 vals in the val string - * Of these, "char*" and "l_uint16" are the most commonly used. - * e The last array, nasizes, is also optional. It is for - * tags that take an array of bytes for a value, a number of - * elements in the array, and a type that is either "char*" - * or "l_uint8*" probably either will work. - * Use NULL if there are no such tags. - * f VERY IMPORTANT: if there are any tags that require the - * extra size value, stored in nasizes, they must be - * written first! - */ -l_ok -pixWriteTiffCustom(const char *filename, - PIX *pix, - l_int32 comptype, - const char *modestr, - NUMA *natags, - SARRAY *savals, - SARRAY *satypes, - NUMA *nasizes) -{ -l_int32 ret; -TIFF *tif; - - PROCNAME("pixWriteTiffCustom"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - if ((tif = openTiff(filename, modestr)) == NULL) - return ERROR_INT("tif not opened", procName, 1); - ret = pixWriteToTiffStream(tif, pix, comptype, natags, savals, - satypes, nasizes); - TIFFClose(tif); - return ret; -} - - -/*--------------------------------------------------------------* - * Writing to stream * - *--------------------------------------------------------------*/ -/*! - * \brief pixWriteStreamTiff() - * - * \param[in] fp file stream - * \param[in] pix - * \param[in] comptype IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS, - * IFF_TIFF_G3, IFF_TIFF_G4, - * IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This writes a single image to a file stream opened for writing.
- *      (2) If the pix has a colormap, it is preserved in the output file.
- *      (3) For images with bpp > 1, this resets the comptype, if
- *          necessary, to write uncompressed data.
- *      (4) G3 and G4 are only defined for 1 bpp.
- *      (5) We only allow PACKBITS for bpp = 1, because for bpp > 1
- *          it typically expands images that are not synthetically generated.
- *      (6) G4 compression is typically about twice as good as G3.
- *          G4 is excellent for binary compression of text/line-art,
- *          but terrible for halftones and dithered patterns.  (In
- *          fact, G4 on halftones can give a file that is larger
- *          than uncompressed!)  If a binary image has dithered
- *          regions, it is usually better to compress with png.
- * 
- */ -l_ok -pixWriteStreamTiff(FILE *fp, - PIX *pix, - l_int32 comptype) -{ - return pixWriteStreamTiffWA(fp, pix, comptype, "w"); -} - - -/*! - * \brief pixWriteStreamTiffWA() - * - * \param[in] fp file stream opened for append or write - * \param[in] pix - * \param[in] comptype IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS, - * IFF_TIFF_G3, IFF_TIFF_G4, - * IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG - * \param[in] modestr "w" or "a" - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See pixWriteStreamTiff()
- * 
- */ -l_ok -pixWriteStreamTiffWA(FILE *fp, - PIX *pix, - l_int32 comptype, - const char *modestr) -{ -TIFF *tif; - - PROCNAME("pixWriteStreamTiffWA"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1 ); - if (!pix) - return ERROR_INT("pix not defined", procName, 1 ); - if (strcmp(modestr, "w") && strcmp(modestr, "a")) - return ERROR_INT("modestr not 'w' or 'a'", procName, 1 ); - - if (pixGetDepth(pix) != 1 && comptype != IFF_TIFF && - comptype != IFF_TIFF_LZW && comptype != IFF_TIFF_ZIP && - comptype != IFF_TIFF_JPEG) { - L_WARNING("invalid compression type for bpp > 1\n", procName); - comptype = IFF_TIFF_ZIP; - } - - if ((tif = fopenTiff(fp, modestr)) == NULL) - return ERROR_INT("tif not opened", procName, 1); - - if (pixWriteToTiffStream(tif, pix, comptype, NULL, NULL, NULL, NULL)) { - TIFFCleanup(tif); - return ERROR_INT("tif write error", procName, 1); - } - - TIFFCleanup(tif); - return 0; -} - - -/*! - * \brief pixWriteToTiffStream() - * - * \param[in] tif data structure, opened to a file - * \param[in] pix - * \param[in] comptype IFF_TIFF: for any image; no compression - * IFF_TIFF_RLE, IFF_TIFF_PACKBITS: for 1 bpp only - * IFF_TIFF_G4 and IFF_TIFF_G3: for 1 bpp only - * IFF_TIFF_LZW, IFF_TIFF_ZIP: lossless for any image - * IFF_TIFF_JPEG: lossy 8 bpp gray or rgb - * \param[in] natags [optional] NUMA of custom tiff tags - * \param[in] savals [optional] SARRAY of values - * \param[in] satypes [optional] SARRAY of types - * \param[in] nasizes [optional] NUMA of sizes - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This static function should only be called through higher
- *          level functions in this file; namely, pixWriteTiffCustom(),
- *          pixWriteTiff(), pixWriteStreamTiff(), pixWriteMemTiff()
- *          and pixWriteMemTiffCustom().
- *      (2) We only allow PACKBITS for bpp = 1, because for bpp > 1
- *          it typically expands images that are not synthetically generated.
- *      (3) See pixWriteTiffCustom() for details on how to use
- *          the last four parameters for customized tiff tags.
- *      (4) The only valid pixel depths in leptonica are 1, 2, 4, 8, 16
- *          and 32.  However, it is possible, and in some cases desirable,
- *          to write out a tiff file using an rgb pix that has 24 bpp.
- *          This can be created by appending the raster data for a 24 bpp
- *          image (with proper scanline padding) directly to a 24 bpp
- *          pix that was created without a data array.  See note in
- *          pixWriteStreamPng() for an example.
- * 
- */ -static l_int32 -pixWriteToTiffStream(TIFF *tif, - PIX *pix, - l_int32 comptype, - NUMA *natags, - SARRAY *savals, - SARRAY *satypes, - NUMA *nasizes) -{ -l_uint8 *linebuf, *data; -l_uint16 redmap[256], greenmap[256], bluemap[256]; -l_int32 w, h, d, spp, i, j, k, wpl, bpl, tiffbpl, ncolors, cmapsize; -l_int32 *rmap, *gmap, *bmap; -l_int32 xres, yres; -l_uint32 *line, *ppixel; -PIX *pixt; -PIXCMAP *cmap; -char *text; - - PROCNAME("pixWriteToTiffStream"); - - if (!tif) - return ERROR_INT("tif stream not defined", procName, 1); - if (!pix) - return ERROR_INT( "pix not defined", procName, 1 ); - - pixSetPadBits(pix, 0); - pixGetDimensions(pix, &w, &h, &d); - spp = pixGetSpp(pix); - xres = pixGetXRes(pix); - yres = pixGetYRes(pix); - if (xres == 0) xres = DefaultResolution; - if (yres == 0) yres = DefaultResolution; - - /* ------------------ Write out the header ------------- */ - TIFFSetField(tif, TIFFTAG_RESOLUTIONUNIT, (l_uint32)RESUNIT_INCH); - TIFFSetField(tif, TIFFTAG_XRESOLUTION, (l_float64)xres); - TIFFSetField(tif, TIFFTAG_YRESOLUTION, (l_float64)yres); - - TIFFSetField(tif, TIFFTAG_IMAGEWIDTH, (l_uint32)w); - TIFFSetField(tif, TIFFTAG_IMAGELENGTH, (l_uint32)h); - TIFFSetField(tif, TIFFTAG_ORIENTATION, ORIENTATION_TOPLEFT); - - if ((text = pixGetText(pix)) != NULL) - TIFFSetField(tif, TIFFTAG_IMAGEDESCRIPTION, text); - - if (d == 1 && !pixGetColormap(pix)) { - /* If d == 1, preserve the colormap. Note that when - * d == 1 pix with colormaps are read, the colormaps - * are removed. The only pix in leptonica that have - * colormaps are made programmatically. */ - TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISWHITE); - } else if ((d == 32 && spp == 3) || d == 24) { - TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_RGB); - TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, (l_uint16)3); - TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, - (l_uint16)8, (l_uint16)8, (l_uint16)8); - } else if (d == 32 && spp == 4) { - l_uint16 val[1]; - val[0] = EXTRASAMPLE_ASSOCALPHA; - TIFFSetField(tif, TIFFTAG_EXTRASAMPLES, (l_uint16)1, &val); - TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_RGB); - TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, (l_uint16)4); - TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, - (l_uint16)8, (l_uint16)8, (l_uint16)8, (l_uint16)8); - } else if (d == 16) { /* we only support spp = 1, bps = 16 */ - TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISBLACK); - } else if ((cmap = pixGetColormap(pix)) == NULL) { - TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISBLACK); - } else { /* Save colormap in the tiff; not more than 256 colors */ - if (d > 8) { - L_ERROR("d = %d > 8 with colormap!; reducing to 8\n", procName, d); - d = 8; - } - pixcmapToArrays(cmap, &rmap, &gmap, &bmap, NULL); - ncolors = pixcmapGetCount(cmap); - ncolors = L_MIN(256, ncolors); /* max 256 */ - cmapsize = 1 << d; - cmapsize = L_MIN(256, cmapsize); /* power of 2; max 256 */ - if (ncolors > cmapsize) { - L_WARNING("too many colors in cmap for tiff; truncating\n", - procName); - ncolors = cmapsize; - } - for (i = 0; i < ncolors; i++) { - redmap[i] = (rmap[i] << 8) | rmap[i]; - greenmap[i] = (gmap[i] << 8) | gmap[i]; - bluemap[i] = (bmap[i] << 8) | bmap[i]; - } - for (i = ncolors; i < cmapsize; i++) /* init, even though not used */ - redmap[i] = greenmap[i] = bluemap[i] = 0; - LEPT_FREE(rmap); - LEPT_FREE(gmap); - LEPT_FREE(bmap); - - TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_PALETTE); - TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, (l_uint16)1); - TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, (l_uint16)d); - TIFFSetField(tif, TIFFTAG_COLORMAP, redmap, greenmap, bluemap); - } - - if (d <= 16) { - TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, (l_uint16)d); - TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, (l_uint16)1); - } - - TIFFSetField(tif, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG); - if (comptype == IFF_TIFF) { /* no compression */ - TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_NONE); - } else if (comptype == IFF_TIFF_G4) { - TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_CCITTFAX4); - } else if (comptype == IFF_TIFF_G3) { - TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_CCITTFAX3); - } else if (comptype == IFF_TIFF_RLE) { - TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_CCITTRLE); - } else if (comptype == IFF_TIFF_PACKBITS) { - TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_PACKBITS); - } else if (comptype == IFF_TIFF_LZW) { - TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_LZW); - } else if (comptype == IFF_TIFF_ZIP) { - TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_ADOBE_DEFLATE); - } else if (comptype == IFF_TIFF_JPEG) { - TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_JPEG); - } else { - L_WARNING("unknown tiff compression; using none\n", procName); - TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_NONE); - } - - /* This is a no-op if arrays are NULL */ - writeCustomTiffTags(tif, natags, savals, satypes, nasizes); - - /* ------------- Write out the image data ------------- */ - tiffbpl = TIFFScanlineSize(tif); - wpl = pixGetWpl(pix); - bpl = 4 * wpl; - if (tiffbpl > bpl) - lept_stderr("Big trouble: tiffbpl = %d, bpl = %d\n", tiffbpl, bpl); - if ((linebuf = (l_uint8 *)LEPT_CALLOC(1, bpl)) == NULL) - return ERROR_INT("calloc fail for linebuf", procName, 1); - - /* Use single strip for image */ - TIFFSetField(tif, TIFFTAG_ROWSPERSTRIP, h); - - if (d != 24 && d != 32) { - if (d == 16) - pixt = pixEndianTwoByteSwapNew(pix); - else - pixt = pixEndianByteSwapNew(pix); - data = (l_uint8 *)pixGetData(pixt); - for (i = 0; i < h; i++, data += bpl) { - memcpy(linebuf, data, tiffbpl); - if (TIFFWriteScanline(tif, linebuf, i, 0) < 0) - break; - } - pixDestroy(&pixt); - } else if (d == 24) { /* See note 4 above: special case of 24 bpp rgb */ - for (i = 0; i < h; i++) { - line = pixGetData(pix) + i * wpl; - if (TIFFWriteScanline(tif, (l_uint8 *)line, i, 0) < 0) - break; - } - } else { /* 32 bpp rgb or rgba */ - for (i = 0; i < h; i++) { - line = pixGetData(pix) + i * wpl; - for (j = 0, k = 0, ppixel = line; j < w; j++) { - linebuf[k++] = GET_DATA_BYTE(ppixel, COLOR_RED); - linebuf[k++] = GET_DATA_BYTE(ppixel, COLOR_GREEN); - linebuf[k++] = GET_DATA_BYTE(ppixel, COLOR_BLUE); - if (spp == 4) - linebuf[k++] = GET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL); - ppixel++; - } - if (TIFFWriteScanline(tif, linebuf, i, 0) < 0) - break; - } - } - -/* TIFFWriteDirectory(tif); */ - LEPT_FREE(linebuf); - - return 0; -} - - -/*! - * \brief writeCustomTiffTags() - * - * \param[in] tif - * \param[in] natags [optional] NUMA of custom tiff tags - * \param[in] savals [optional] SARRAY of values - * \param[in] satypes [optional] SARRAY of types - * \param[in] nasizes [optional] NUMA of sizes - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This static function should be called indirectly through
- *          higher level functions, such as pixWriteTiffCustom(),
- *          which call pixWriteToTiffStream().  See details in
- *          pixWriteTiffCustom() for using the 4 input arrays.
- *      (2) This is a no-op if the first 3 arrays are all NULL.
- *      (3) Otherwise, the first 3 arrays must be defined and all
- *          of equal size.
- *      (4) The fourth array is always optional.
- *      (5) The most commonly used types are "char*" and "u_int16".
- *          See tiff.h for a full listing of the tiff tags.
- *          Note that many of these tags, in particular the bit tags,
- *          are intended to be private, and cannot be set by this function.
- *          Examples are the STRIPOFFSETS and STRIPBYTECOUNTS tags,
- *          which are bit tags that are automatically set in the header,
- *          and can be extracted using tiffdump.
- * 
- */ -static l_int32 -writeCustomTiffTags(TIFF *tif, - NUMA *natags, - SARRAY *savals, - SARRAY *satypes, - NUMA *nasizes) -{ -char *sval, *type; -l_int32 i, n, ns, size, tagval, val; -l_float64 dval; -l_uint32 uval, uval2; - - PROCNAME("writeCustomTiffTags"); - - if (!tif) - return ERROR_INT("tif stream not defined", procName, 1); - if (!natags && !savals && !satypes) - return 0; - if (!natags || !savals || !satypes) - return ERROR_INT("not all arrays defined", procName, 1); - n = numaGetCount(natags); - if ((sarrayGetCount(savals) != n) || (sarrayGetCount(satypes) != n)) - return ERROR_INT("not all sa the same size", procName, 1); - - /* The sized arrays (4 args to TIFFSetField) are written first */ - if (nasizes) { - ns = numaGetCount(nasizes); - if (ns > n) - return ERROR_INT("too many 4-arg tag calls", procName, 1); - for (i = 0; i < ns; i++) { - numaGetIValue(natags, i, &tagval); - sval = sarrayGetString(savals, i, L_NOCOPY); - type = sarrayGetString(satypes, i, L_NOCOPY); - numaGetIValue(nasizes, i, &size); - if (strcmp(type, "char*") && strcmp(type, "l_uint8*")) - L_WARNING("array type not char* or l_uint8*; ignore\n", - procName); - TIFFSetField(tif, tagval, size, sval); - } - } else { - ns = 0; - } - - /* The typical tags (3 args to TIFFSetField) are now written */ - for (i = ns; i < n; i++) { - numaGetIValue(natags, i, &tagval); - sval = sarrayGetString(savals, i, L_NOCOPY); - type = sarrayGetString(satypes, i, L_NOCOPY); - if (!strcmp(type, "char*") || !strcmp(type, "const char*")) { - TIFFSetField(tif, tagval, sval); - } else if (!strcmp(type, "l_uint16")) { - if (sscanf(sval, "%u", &uval) == 1) { - TIFFSetField(tif, tagval, (l_uint16)uval); - } else { - lept_stderr("val %s not of type %s\n", sval, type); - return ERROR_INT("custom tag(s) not written", procName, 1); - } - } else if (!strcmp(type, "l_uint32")) { - if (sscanf(sval, "%u", &uval) == 1) { - TIFFSetField(tif, tagval, uval); - } else { - lept_stderr("val %s not of type %s\n", sval, type); - return ERROR_INT("custom tag(s) not written", procName, 1); - } - } else if (!strcmp(type, "l_int32")) { - if (sscanf(sval, "%d", &val) == 1) { - TIFFSetField(tif, tagval, val); - } else { - lept_stderr("val %s not of type %s\n", sval, type); - return ERROR_INT("custom tag(s) not written", procName, 1); - } - } else if (!strcmp(type, "l_float64")) { - if (sscanf(sval, "%lf", &dval) == 1) { - TIFFSetField(tif, tagval, dval); - } else { - lept_stderr("val %s not of type %s\n", sval, type); - return ERROR_INT("custom tag(s) not written", procName, 1); - } - } else if (!strcmp(type, "l_uint16-l_uint16")) { - if (sscanf(sval, "%u-%u", &uval, &uval2) == 2) { - TIFFSetField(tif, tagval, (l_uint16)uval, (l_uint16)uval2); - } else { - lept_stderr("val %s not of type %s\n", sval, type); - return ERROR_INT("custom tag(s) not written", procName, 1); - } - } else { - lept_stderr("unknown type %s\n",type); - return ERROR_INT("unknown type; tag(s) not written", procName, 1); - } - } - return 0; -} - - -/*--------------------------------------------------------------* - * Reading and writing multipage tiff * - *--------------------------------------------------------------*/ -/*! - * \brief pixReadFromMultipageTiff() - * - * \param[in] fname filename - * \param[in,out] poffset set offset to 0 for first image - * \return pix, or NULL on error or if previous call returned the last image - * - *
- * Notes:
- *      (1) This allows overhead for traversal of a multipage tiff file
- *          to be linear in the number of images.  This will also work
- *          with a singlepage tiff file.
- *      (2) No TIFF internal data structures are exposed to the caller
- *          (thanks to Jeff Breidenbach).
- *      (3) offset is the byte offset of a particular image in a multipage
- *          tiff file. To get the first image in the file, input the
- *          special offset value of 0.
- *      (4) The offset is updated to point to the next image, for a
- *          subsequent call.
- *      (5) On the last image, the offset returned is 0.  Exit the loop
- *          when the returned offset is 0.
- *      (6) For reading a multipage tiff from a memory buffer, see
- *            pixReadMemFromMultipageTiff()
- *      (7) Example usage for reading all the images in the tif file:
- *            size_t offset = 0;
- *            do {
- *                Pix *pix = pixReadFromMultipageTiff(filename, &offset);
- *                // do something with pix
- *            } while (offset != 0);
- * 
- */ -PIX * -pixReadFromMultipageTiff(const char *fname, - size_t *poffset) -{ -l_int32 retval; -size_t offset; -PIX *pix; -TIFF *tif; - - PROCNAME("pixReadFromMultipageTiff"); - - if (!fname) - return (PIX *)ERROR_PTR("fname not defined", procName, NULL); - if (!poffset) - return (PIX *)ERROR_PTR("&offset not defined", procName, NULL); - - if ((tif = openTiff(fname, "r")) == NULL) { - L_ERROR("tif open failed for %s\n", procName, fname); - return NULL; - } - - /* Set ptrs in the TIFF to the beginning of the image */ - offset = *poffset; - retval = (offset == 0) ? TIFFSetDirectory(tif, 0) - : TIFFSetSubDirectory(tif, offset); - if (retval == 0) { - TIFFCleanup(tif); - return NULL; - } - - if ((pix = pixReadFromTiffStream(tif)) == NULL) { - TIFFCleanup(tif); - return NULL; - } - - /* Advance to the next image and return the new offset */ - TIFFReadDirectory(tif); - *poffset = TIFFCurrentDirOffset(tif); - TIFFClose(tif); - return pix; -} - - -/*! - * \brief pixaReadMultipageTiff() - * - * \param[in] filename input tiff file - * \return pixa of page images, or NULL on error - */ -PIXA * -pixaReadMultipageTiff(const char *filename) -{ -l_int32 i, npages; -FILE *fp; -PIX *pix; -PIXA *pixa; -TIFF *tif; - - PROCNAME("pixaReadMultipageTiff"); - - if (!filename) - return (PIXA *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (PIXA *)ERROR_PTR("stream not opened", procName, NULL); - if (fileFormatIsTiff(fp)) { - tiffGetCount(fp, &npages); - L_INFO(" Tiff: %d pages\n", procName, npages); - } else { - return (PIXA *)ERROR_PTR("file not tiff", procName, NULL); - } - - if ((tif = fopenTiff(fp, "r")) == NULL) - return (PIXA *)ERROR_PTR("tif not opened", procName, NULL); - - pixa = pixaCreate(npages); - pix = NULL; - for (i = 0; i < npages; i++) { - if ((pix = pixReadFromTiffStream(tif)) != NULL) { - pixaAddPix(pixa, pix, L_INSERT); - } else { - L_WARNING("pix not read for page %d\n", procName, i); - } - - /* Advance to the next directory (i.e., the next image) */ - if (TIFFReadDirectory(tif) == 0) - break; - } - - fclose(fp); - TIFFCleanup(tif); - return pixa; -} - - -/*! - * \brief pixaWriteMultipageTiff() - * - * \param[in] fname input tiff file - * \param[in] pixa any depth; colormap will be removed - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The tiff directory overhead is O(n^2).  I have not been
- *          able to reduce it to O(n).  The overhead for n = 2000 is
- *          about 1 second.
- * 
- */ -l_ok -pixaWriteMultipageTiff(const char *fname, - PIXA *pixa) -{ -const char *modestr; -l_int32 i, n; -PIX *pix1; - - PROCNAME("pixaWriteMultipageTiff"); - - if (!fname) - return ERROR_INT("fname not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - n = pixaGetCount(pixa); - for (i = 0; i < n; i++) { - modestr = (i == 0) ? "w" : "a"; - pix1 = pixaGetPix(pixa, i, L_CLONE); - if (pixGetDepth(pix1) == 1) - pixWriteTiff(fname, pix1, IFF_TIFF_G4, modestr); - else - pixWriteTiff(fname, pix1, IFF_TIFF_ZIP, modestr); - pixDestroy(&pix1); - } - - return 0; -} - - -/*! - * \brief writeMultipageTiff() - * - * \param[in] dirin input directory - * \param[in] substr [optional] substring filter on filenames; can be NULL - * \param[in] fileout output multipage tiff file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This writes a set of image files in a directory out
- *          as a multipage tiff file.  The images can be in any
- *          initial file format.
- *      (2) Images with a colormap have the colormap removed before
- *          re-encoding as tiff.
- *      (3) All images are encoded losslessly.  Those with 1 bpp are
- *          encoded 'g4'.  The rest are encoded as 'zip' (flate encoding).
- *          Because it is lossless, this is an expensive method for
- *          saving most rgb images.
- *      (4) The tiff directory overhead is quadratic in the number of
- *          images.  To avoid this for very large numbers of images to be
- *          written, apply the method used in pixaWriteMultipageTiff().
- * 
- */ -l_ok -writeMultipageTiff(const char *dirin, - const char *substr, - const char *fileout) -{ -SARRAY *sa; - - PROCNAME("writeMultipageTiff"); - - if (!dirin) - return ERROR_INT("dirin not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - - /* Get all filtered and sorted full pathnames. */ - sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0); - - /* Generate the tiff file */ - writeMultipageTiffSA(sa, fileout); - sarrayDestroy(&sa); - return 0; -} - - -/*! - * \brief writeMultipageTiffSA() - * - * \param[in] sa string array of full path names - * \param[in] fileout output ps file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See writeMultipageTiff()
- * 
- */ -l_ok -writeMultipageTiffSA(SARRAY *sa, - const char *fileout) -{ -char *fname; -const char *op; -l_int32 i, nfiles, firstfile, format; -PIX *pix; - - PROCNAME("writeMultipageTiffSA"); - - if (!sa) - return ERROR_INT("sa not defined", procName, 1); - if (!fileout) - return ERROR_INT("fileout not defined", procName, 1); - - nfiles = sarrayGetCount(sa); - firstfile = TRUE; - for (i = 0; i < nfiles; i++) { - op = (firstfile) ? "w" : "a"; - fname = sarrayGetString(sa, i, L_NOCOPY); - findFileFormat(fname, &format); - if (format == IFF_UNKNOWN) { - L_INFO("format of %s not known\n", procName, fname); - continue; - } - - if ((pix = pixRead(fname)) == NULL) { - L_WARNING("pix not made for file: %s\n", procName, fname); - continue; - } - if (pixGetDepth(pix) == 1) - pixWriteTiff(fileout, pix, IFF_TIFF_G4, op); - else - pixWriteTiff(fileout, pix, IFF_TIFF_ZIP, op); - firstfile = FALSE; - pixDestroy(&pix); - } - - return 0; -} - - -/*--------------------------------------------------------------* - * Print info to stream * - *--------------------------------------------------------------*/ -/*! - * \brief fprintTiffInfo() - * - * \param[in] fpout stream for output of tag data - * \param[in] tiffile input - * \return 0 if OK; 1 on error - */ -l_ok -fprintTiffInfo(FILE *fpout, - const char *tiffile) -{ -TIFF *tif; - - PROCNAME("fprintTiffInfo"); - - if (!tiffile) - return ERROR_INT("tiffile not defined", procName, 1); - if (!fpout) - return ERROR_INT("stream out not defined", procName, 1); - - if ((tif = openTiff(tiffile, "rb")) == NULL) - return ERROR_INT("tif not open for read", procName, 1); - - TIFFPrintDirectory(tif, fpout, 0); - TIFFClose(tif); - - return 0; -} - - -/*--------------------------------------------------------------* - * Get page count * - *--------------------------------------------------------------*/ -/*! - * \brief tiffGetCount() - * - * \param[in] fp file stream opened for read - * \param[out] pn number of images - * \return 0 if OK; 1 on error - */ -l_ok -tiffGetCount(FILE *fp, - l_int32 *pn) -{ -l_int32 i; -TIFF *tif; - - PROCNAME("tiffGetCount"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!pn) - return ERROR_INT("&n not defined", procName, 1); - *pn = 0; - - if ((tif = fopenTiff(fp, "r")) == NULL) - return ERROR_INT("tif not open for read", procName, 1); - - for (i = 1; ; i++) { - if (TIFFReadDirectory(tif) == 0) - break; - if (i == ManyPagesInTiffFile + 1) { - L_WARNING("big file: more than %d pages\n", procName, - ManyPagesInTiffFile); - } - } - *pn = i; - TIFFCleanup(tif); - return 0; -} - - -/*--------------------------------------------------------------* - * Get resolution from tif * - *--------------------------------------------------------------*/ -/*! - * \brief getTiffResolution() - * - * \param[in] fp file stream opened for read - * \param[out] pxres, pyres resolution in ppi - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) If neither resolution field is set, this is not an error;
- *          the returned resolution values are 0 (designating 'unknown').
- * 
- */ -l_ok -getTiffResolution(FILE *fp, - l_int32 *pxres, - l_int32 *pyres) -{ -TIFF *tif; - - PROCNAME("getTiffResolution"); - - if (!pxres || !pyres) - return ERROR_INT("&xres and &yres not both defined", procName, 1); - *pxres = *pyres = 0; - if (!fp) - return ERROR_INT("stream not opened", procName, 1); - - if ((tif = fopenTiff(fp, "r")) == NULL) - return ERROR_INT("tif not open for read", procName, 1); - getTiffStreamResolution(tif, pxres, pyres); - TIFFCleanup(tif); - return 0; -} - - -/*! - * \brief getTiffStreamResolution() - * - * \param[in] tif TIFF handle opened for read - * \param[out] pxres, pyres resolution in ppi - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) If neither resolution field is set, this is not an error;
- *          the returned resolution values are 0 (designating 'unknown').
- * 
- */ -static l_int32 -getTiffStreamResolution(TIFF *tif, - l_int32 *pxres, - l_int32 *pyres) -{ -l_uint16 resunit; -l_int32 foundxres, foundyres; -l_float32 fxres, fyres; - - PROCNAME("getTiffStreamResolution"); - - if (!tif) - return ERROR_INT("tif not opened", procName, 1); - if (!pxres || !pyres) - return ERROR_INT("&xres and &yres not both defined", procName, 1); - *pxres = *pyres = 0; - - TIFFGetFieldDefaulted(tif, TIFFTAG_RESOLUTIONUNIT, &resunit); - foundxres = TIFFGetField(tif, TIFFTAG_XRESOLUTION, &fxres); - foundyres = TIFFGetField(tif, TIFFTAG_YRESOLUTION, &fyres); - if (!foundxres && !foundyres) return 1; - if (isnan(fxres) || isnan(fyres)) return 1; - if (!foundxres && foundyres) - fxres = fyres; - else if (foundxres && !foundyres) - fyres = fxres; - - /* Avoid overflow into int32; set max fxres and fyres to 5 x 10^8 */ - if (fxres < 0 || fxres > (1L << 29) || fyres < 0 || fyres > (1L << 29)) - return ERROR_INT("fxres and/or fyres values are invalid", procName, 1); - - if (resunit == RESUNIT_CENTIMETER) { /* convert to ppi */ - *pxres = (l_int32)(2.54 * fxres + 0.5); - *pyres = (l_int32)(2.54 * fyres + 0.5); - } else { - *pxres = (l_int32)fxres; - *pyres = (l_int32)fyres; - } - - return 0; -} - - -/*--------------------------------------------------------------* - * Get some tiff header information * - *--------------------------------------------------------------*/ -/*! - * \brief readHeaderTiff() - * - * \param[in] filename - * \param[in] n page image number: 0-based - * \param[out] pw [optional] width - * \param[out] ph [optional] height - * \param[out] pbps [optional] bits per sample -- 1, 2, 4 or 8 - * \param[out] pspp [optional] samples per pixel -- 1 or 3 - * \param[out] pres [optional] resolution in x dir; NULL to ignore - * \param[out] pcmap [optional] colormap exists; input NULL to ignore - * \param[out] pformat [optional] tiff format; input NULL to ignore - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If there is a colormap, cmap is returned as 1; else 0.
- *      (2) If %n is equal to or greater than the number of images, returns 1.
- * 
- */ -l_ok -readHeaderTiff(const char *filename, - l_int32 n, - l_int32 *pw, - l_int32 *ph, - l_int32 *pbps, - l_int32 *pspp, - l_int32 *pres, - l_int32 *pcmap, - l_int32 *pformat) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("readHeaderTiff"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pbps) *pbps = 0; - if (pspp) *pspp = 0; - if (pres) *pres = 0; - if (pcmap) *pcmap = 0; - if (pformat) *pformat = 0; - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!pw && !ph && !pbps && !pspp && !pres && !pcmap && !pformat) - return ERROR_INT("no results requested", procName, 1); - - if ((fp = fopenReadStream(filename)) == NULL) - return ERROR_INT("image file not found", procName, 1); - ret = freadHeaderTiff(fp, n, pw, ph, pbps, pspp, pres, pcmap, pformat); - fclose(fp); - return ret; -} - - -/*! - * \brief freadHeaderTiff() - * - * \param[in] fp file stream - * \param[in] n page image number: 0-based - * \param[out] pw [optional] width - * \param[out] ph [optional] height - * \param[out] pbps [optional] bits per sample -- 1, 2, 4 or 8 - * \param[out] pspp [optional] samples per pixel -- 1 or 3 - * \param[out] pres [optional] resolution in x dir; NULL to ignore - * \param[out] pcmap [optional] colormap exists; input NULL to ignore - * \param[out] pformat [optional] tiff format; input NULL to ignore - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If there is a colormap, cmap is returned as 1; else 0.
- *      (2) If %n is equal to or greater than the number of images, returns 1.
- * 
- */ -l_ok -freadHeaderTiff(FILE *fp, - l_int32 n, - l_int32 *pw, - l_int32 *ph, - l_int32 *pbps, - l_int32 *pspp, - l_int32 *pres, - l_int32 *pcmap, - l_int32 *pformat) -{ -l_int32 i, ret, format; -TIFF *tif; - - PROCNAME("freadHeaderTiff"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pbps) *pbps = 0; - if (pspp) *pspp = 0; - if (pres) *pres = 0; - if (pcmap) *pcmap = 0; - if (pformat) *pformat = 0; - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (n < 0) - return ERROR_INT("image index must be >= 0", procName, 1); - if (!pw && !ph && !pbps && !pspp && !pres && !pcmap && !pformat) - return ERROR_INT("no results requested", procName, 1); - - findFileFormatStream(fp, &format); - if (!L_FORMAT_IS_TIFF(format)) - return ERROR_INT("file not tiff format", procName, 1); - - if ((tif = fopenTiff(fp, "r")) == NULL) - return ERROR_INT("tif not open for read", procName, 1); - - for (i = 0; i < n; i++) { - if (TIFFReadDirectory(tif) == 0) - return ERROR_INT("image n not found in file", procName, 1); - } - - ret = tiffReadHeaderTiff(tif, pw, ph, pbps, pspp, pres, pcmap, pformat); - TIFFCleanup(tif); - return ret; -} - - -/*! - * \brief readHeaderMemTiff() - * - * \param[in] cdata const; tiff-encoded - * \param[in] size size of data - * \param[in] n page image number: 0-based - * \param[out] pw [optional] width - * \param[out] ph [optional] height - * \param[out] pbps [optional] bits per sample -- 1, 2, 4 or 8 - * \param[out] pspp [optional] samples per pixel -- 1 or 3 - * \param[out] pres [optional] resolution in x dir; NULL to ignore - * \param[out] pcmap [optional] colormap exists; input NULL to ignore - * \param[out] pformat [optional] tiff format; input NULL to ignore - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Use TIFFClose(); TIFFCleanup() doesn't free internal memstream.
- * 
- */ -l_ok -readHeaderMemTiff(const l_uint8 *cdata, - size_t size, - l_int32 n, - l_int32 *pw, - l_int32 *ph, - l_int32 *pbps, - l_int32 *pspp, - l_int32 *pres, - l_int32 *pcmap, - l_int32 *pformat) -{ -l_uint8 *data; -l_int32 i, ret; -TIFF *tif; - - PROCNAME("readHeaderMemTiff"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pbps) *pbps = 0; - if (pspp) *pspp = 0; - if (pres) *pres = 0; - if (pcmap) *pcmap = 0; - if (pformat) *pformat = 0; - if (!pw && !ph && !pbps && !pspp && !pres && !pcmap && !pformat) - return ERROR_INT("no results requested", procName, 1); - if (!cdata) - return ERROR_INT("cdata not defined", procName, 1); - - /* Open a tiff stream to memory */ - data = (l_uint8 *)cdata; /* we're really not going to change this */ - if ((tif = fopenTiffMemstream("tifferror", "r", &data, &size)) == NULL) - return ERROR_INT("tiff stream not opened", procName, 1); - - for (i = 0; i < n; i++) { - if (TIFFReadDirectory(tif) == 0) { - TIFFClose(tif); - return ERROR_INT("image n not found in file", procName, 1); - } - } - - ret = tiffReadHeaderTiff(tif, pw, ph, pbps, pspp, pres, pcmap, pformat); - TIFFClose(tif); - return ret; -} - - -/*! - * \brief tiffReadHeaderTiff() - * - * \param[in] tif - * \param[out] pw [optional] width - * \param[out] ph [optional] height - * \param[out] pbps [optional] bits per sample -- 1, 2, 4 or 8 - * \param[out] pspp [optional] samples per pixel -- 1 or 3 - * \param[out] pres [optional] resolution in x dir; NULL to ignore - * \param[out] pcmap [optional] cmap exists; input NULL to ignore - * \param[out] pformat [optional] tiff format; input NULL to ignore - * \return 0 if OK, 1 on error - */ -static l_int32 -tiffReadHeaderTiff(TIFF *tif, - l_int32 *pw, - l_int32 *ph, - l_int32 *pbps, - l_int32 *pspp, - l_int32 *pres, - l_int32 *pcmap, - l_int32 *pformat) -{ -l_uint16 tiffcomp; -l_uint16 bps, spp; -l_uint16 *rmap, *gmap, *bmap; -l_int32 xres, yres; -l_uint32 w, h; - - PROCNAME("tiffReadHeaderTiff"); - - if (!tif) - return ERROR_INT("tif not opened", procName, 1); - - TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &w); - TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &h); - TIFFGetFieldDefaulted(tif, TIFFTAG_BITSPERSAMPLE, &bps); - TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &spp); - if (w < 1 || h < 1) - return ERROR_INT("tif w and h not both > 0", procName, 1); - if (bps != 1 && bps != 2 && bps != 4 && bps != 8 && bps != 16) - return ERROR_INT("bps not in set {1,2,4,8,16}", procName, 1); - if (spp != 1 && spp != 2 && spp != 3 && spp != 4) - return ERROR_INT("spp not in set {1,2,3,4}", procName, 1); - if (pw) *pw = w; - if (ph) *ph = h; - if (pbps) *pbps = bps; - if (pspp) *pspp = spp; - if (pres) { - *pres = 300; /* default ppi */ - if (getTiffStreamResolution(tif, &xres, &yres) == 0) - *pres = (l_int32)xres; - } - if (pcmap) { - *pcmap = 0; - if (TIFFGetField(tif, TIFFTAG_COLORMAP, &rmap, &gmap, &bmap)) - *pcmap = 1; - } - if (pformat) { - TIFFGetFieldDefaulted(tif, TIFFTAG_COMPRESSION, &tiffcomp); - *pformat = getTiffCompressedFormat(tiffcomp); - } - return 0; -} - - -/*! - * \brief findTiffCompression() - * - * \param[in] fp file stream; must be rewound to BOF - * \param[out] pcomptype compression type - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The returned compression type is that defined in
- *          the enum in imageio.h.  It is not the tiff flag value.
- *      (2) The compression type is initialized to IFF_UNKNOWN.
- *          If it is not one of the specified types, the returned
- *          type is IFF_TIFF, which indicates no compression.
- *      (3) When this function is called, the stream must be at BOF.
- *          If the opened stream is to be used again to read the
- *          file, it must be rewound to BOF after calling this function.
- * 
- */ -l_ok -findTiffCompression(FILE *fp, - l_int32 *pcomptype) -{ -l_uint16 tiffcomp; -TIFF *tif; - - PROCNAME("findTiffCompression"); - - if (!pcomptype) - return ERROR_INT("&comptype not defined", procName, 1); - *pcomptype = IFF_UNKNOWN; /* init */ - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - - if ((tif = fopenTiff(fp, "r")) == NULL) - return ERROR_INT("tif not opened", procName, 1); - TIFFGetFieldDefaulted(tif, TIFFTAG_COMPRESSION, &tiffcomp); - *pcomptype = getTiffCompressedFormat(tiffcomp); - TIFFCleanup(tif); - return 0; -} - - -/*! - * \brief getTiffCompressedFormat() - * - * \param[in] tiffcomp defined in tiff.h - * \return compression format defined in imageio.h - * - *
- * Notes:
- *      (1) The input must be the actual tiff compression type
- *          returned by a tiff library call.  It should always be
- *          a valid tiff type.
- *      (2) The return type is defined in the enum in imageio.h.
- * 
- */ -static l_int32 -getTiffCompressedFormat(l_uint16 tiffcomp) -{ -l_int32 comptype; - - switch (tiffcomp) - { - case COMPRESSION_CCITTFAX4: - comptype = IFF_TIFF_G4; - break; - case COMPRESSION_CCITTFAX3: - comptype = IFF_TIFF_G3; - break; - case COMPRESSION_CCITTRLE: - comptype = IFF_TIFF_RLE; - break; - case COMPRESSION_PACKBITS: - comptype = IFF_TIFF_PACKBITS; - break; - case COMPRESSION_LZW: - comptype = IFF_TIFF_LZW; - break; - case COMPRESSION_ADOBE_DEFLATE: - comptype = IFF_TIFF_ZIP; - break; - case COMPRESSION_JPEG: - comptype = IFF_TIFF_JPEG; - break; - default: - comptype = IFF_TIFF; - break; - } - return comptype; -} - - -/*--------------------------------------------------------------* - * Extraction of tiff g4 data * - *--------------------------------------------------------------*/ -/*! - * \brief extractG4DataFromFile() - * - * \param[in] filein - * \param[out] pdata binary data of ccitt g4 encoded stream - * \param[out] pnbytes size of binary data - * \param[out] pw [optional] image width - * \param[out] ph [optional] image height - * \param[out] pminisblack [optional] boolean - * \return 0 if OK, 1 on error - */ -l_ok -extractG4DataFromFile(const char *filein, - l_uint8 **pdata, - size_t *pnbytes, - l_int32 *pw, - l_int32 *ph, - l_int32 *pminisblack) -{ -l_uint8 *inarray, *data; -l_uint16 minisblack, comptype; /* accessors require l_uint16 */ -l_int32 istiff; -l_uint32 w, h, rowsperstrip; /* accessors require l_uint32 */ -l_uint32 diroff; -size_t fbytes, nbytes; -FILE *fpin; -TIFF *tif; - - PROCNAME("extractG4DataFromFile"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!pnbytes) - return ERROR_INT("&nbytes not defined", procName, 1); - if (!pw && !ph && !pminisblack) - return ERROR_INT("no output data requested", procName, 1); - *pdata = NULL; - *pnbytes = 0; - - if ((fpin = fopenReadStream(filein)) == NULL) - return ERROR_INT("stream not opened to file", procName, 1); - istiff = fileFormatIsTiff(fpin); - fclose(fpin); - if (!istiff) - return ERROR_INT("filein not tiff", procName, 1); - - if ((inarray = l_binaryRead(filein, &fbytes)) == NULL) - return ERROR_INT("inarray not made", procName, 1); - - /* Get metadata about the image */ - if ((tif = openTiff(filein, "rb")) == NULL) { - LEPT_FREE(inarray); - return ERROR_INT("tif not open for read", procName, 1); - } - TIFFGetField(tif, TIFFTAG_COMPRESSION, &comptype); - if (comptype != COMPRESSION_CCITTFAX4) { - LEPT_FREE(inarray); - TIFFClose(tif); - return ERROR_INT("filein is not g4 compressed", procName, 1); - } - - TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &w); - TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &h); - TIFFGetField(tif, TIFFTAG_ROWSPERSTRIP, &rowsperstrip); - if (h != rowsperstrip) - L_WARNING("more than 1 strip\n", procName); - TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &minisblack); /* for 1 bpp */ -/* TIFFPrintDirectory(tif, stderr, 0); */ - TIFFClose(tif); - if (pw) *pw = (l_int32)w; - if (ph) *ph = (l_int32)h; - if (pminisblack) *pminisblack = (l_int32)minisblack; - - /* The header has 8 bytes: the first 2 are the magic number, - * the next 2 are the version, and the last 4 are the - * offset to the first directory. That's what we want here. - * We have to test the byte order before decoding 4 bytes! */ - if (inarray[0] == 0x4d) { /* big-endian */ - diroff = (inarray[4] << 24) | (inarray[5] << 16) | - (inarray[6] << 8) | inarray[7]; - } else { /* inarray[0] == 0x49 : little-endian */ - diroff = (inarray[7] << 24) | (inarray[6] << 16) | - (inarray[5] << 8) | inarray[4]; - } -/* lept_stderr(" diroff = %d, %x\n", diroff, diroff); */ - - /* Extract the ccittg4 encoded data from the tiff file. - * We skip the 8 byte header and take nbytes of data, - * up to the beginning of the directory (at diroff) */ - nbytes = diroff - 8; - *pnbytes = nbytes; - if ((data = (l_uint8 *)LEPT_CALLOC(nbytes, sizeof(l_uint8))) == NULL) { - LEPT_FREE(inarray); - return ERROR_INT("data not allocated", procName, 1); - } - *pdata = data; - memcpy(data, inarray + 8, nbytes); - LEPT_FREE(inarray); - - return 0; -} - - -/*--------------------------------------------------------------* - * Open tiff stream from file stream * - *--------------------------------------------------------------*/ -/*! - * \brief fopenTiff() - * - * \param[in] fp file stream - * \param[in] modestring "r", "w", ... - * \return tiff data structure, opened for a file descriptor - * - *
- * Notes:
- *      (1) Why is this here?  Leffler did not provide a function that
- *          takes a stream and gives a TIFF.  He only gave one that
- *          generates a TIFF starting with a file descriptor.  So we
- *          need to make it here, because it is useful to have functions
- *          that take a stream as input.
- *      (2) We use TIFFClientOpen() together with a set of static wrapper
- *          functions which map TIFF read, write, seek, close and size.
- *          to functions expecting a cookie of type stream (i.e. FILE *).
- *          This implementation was contributed by Jürgen Buchmüller.
- * 
- */ -static TIFF * -fopenTiff(FILE *fp, - const char *modestring) -{ - PROCNAME("fopenTiff"); - - if (!fp) - return (TIFF *)ERROR_PTR("stream not opened", procName, NULL); - if (!modestring) - return (TIFF *)ERROR_PTR("modestring not defined", procName, NULL); - - TIFFSetWarningHandler(NULL); /* disable warnings */ - TIFFSetErrorHandler(NULL); /* disable error messages */ - - fseek(fp, 0, SEEK_SET); - return TIFFClientOpen("TIFFstream", modestring, (thandle_t)fp, - lept_read_proc, lept_write_proc, lept_seek_proc, - lept_close_proc, lept_size_proc, NULL, NULL); -} - - -/*--------------------------------------------------------------* - * Wrapper for TIFFOpen * - *--------------------------------------------------------------*/ -/*! - * \brief openTiff() - * - * \param[in] filename - * \param[in] modestring "r", "w", ... - * \return tiff data structure - * - *
- * Notes:
- *      (1) This handles multi-platform file naming.
- * 
- */ -static TIFF * -openTiff(const char *filename, - const char *modestring) -{ -char *fname; -TIFF *tif; - - PROCNAME("openTiff"); - - if (!filename) - return (TIFF *)ERROR_PTR("filename not defined", procName, NULL); - if (!modestring) - return (TIFF *)ERROR_PTR("modestring not defined", procName, NULL); - - TIFFSetWarningHandler(NULL); /* disable warnings */ - TIFFSetErrorHandler(NULL); /* disable error messages */ - - fname = genPathname(filename, NULL); - tif = TIFFOpen(fname, modestring); - LEPT_FREE(fname); - return tif; -} - - -/*----------------------------------------------------------------------* - * Memory I/O: reading memory --> pix and writing pix --> memory * - *----------------------------------------------------------------------*/ -/* It would be nice to use open_memstream() and fmemopen() - * for writing and reading to memory, rsp. These functions manage - * memory for writes and reads that use a file streams interface. - * Unfortunately, the tiff library only has an interface for reading - * and writing to file descriptors, not to file streams. The tiff - * library procedure is to open a "tiff stream" and read/write to it. - * The library provides a client interface for managing the I/O - * from memory, which requires seven callbacks. See the TIFFClientOpen - * man page for callback signatures. Adam Langley provided the code - * to do this. */ - -/*! - * \brief Memory stream buffer used with TIFFClientOpen() - * - * The L_Memstram %buffer has different functions in writing and reading. - * - * * In reading, it is assigned to the data and read from as - * the tiff library uncompresses the data and generates the pix. - * The %offset points to the current read position in the data, - * and the %hw always gives the number of bytes of data. - * The %outdata and %outsize ptrs are not used. - * When finished, tiffCloseCallback() simply frees the L_Memstream. - * - * * In writing, it accepts the data that the tiff library - * produces when a pix is compressed. the buffer points to a - * malloced area of %bufsize bytes. The current writing position - * in the buffer is %offset and the most ever written is %hw. - * The buffer is expanded as necessary. When finished, - * tiffCloseCallback() assigns the %outdata and %outsize ptrs - * to the %buffer and %bufsize results, and frees the L_Memstream. - */ -struct L_Memstream -{ - l_uint8 *buffer; /* expands to hold data when written to; */ - /* fixed size when read from. */ - size_t bufsize; /* current size allocated when written to; */ - /* fixed size of input data when read from. */ - size_t offset; /* byte offset from beginning of buffer. */ - size_t hw; /* high-water mark; max bytes in buffer. */ - l_uint8 **poutdata; /* input param for writing; data goes here. */ - size_t *poutsize; /* input param for writing; data size goes here. */ -}; -typedef struct L_Memstream L_MEMSTREAM; - - - /* These are static functions for memory I/O */ -static L_MEMSTREAM *memstreamCreateForRead(l_uint8 *indata, size_t pinsize); -static L_MEMSTREAM *memstreamCreateForWrite(l_uint8 **poutdata, - size_t *poutsize); -static tsize_t tiffReadCallback(thandle_t handle, tdata_t data, tsize_t length); -static tsize_t tiffWriteCallback(thandle_t handle, tdata_t data, - tsize_t length); -static toff_t tiffSeekCallback(thandle_t handle, toff_t offset, l_int32 whence); -static l_int32 tiffCloseCallback(thandle_t handle); -static toff_t tiffSizeCallback(thandle_t handle); -static l_int32 tiffMapCallback(thandle_t handle, tdata_t *data, toff_t *length); -static void tiffUnmapCallback(thandle_t handle, tdata_t data, toff_t length); - - -static L_MEMSTREAM * -memstreamCreateForRead(l_uint8 *indata, - size_t insize) -{ -L_MEMSTREAM *mstream; - - mstream = (L_MEMSTREAM *)LEPT_CALLOC(1, sizeof(L_MEMSTREAM)); - mstream->buffer = indata; /* handle to input data array */ - mstream->bufsize = insize; /* amount of input data */ - mstream->hw = insize; /* high-water mark fixed at input data size */ - mstream->offset = 0; /* offset always starts at 0 */ - return mstream; -} - - -static L_MEMSTREAM * -memstreamCreateForWrite(l_uint8 **poutdata, - size_t *poutsize) -{ -L_MEMSTREAM *mstream; - - mstream = (L_MEMSTREAM *)LEPT_CALLOC(1, sizeof(L_MEMSTREAM)); - mstream->buffer = (l_uint8 *)LEPT_CALLOC(8 * 1024, 1); - mstream->bufsize = 8 * 1024; - mstream->poutdata = poutdata; /* used only at end of write */ - mstream->poutsize = poutsize; /* ditto */ - mstream->hw = mstream->offset = 0; - return mstream; -} - - -static tsize_t -tiffReadCallback(thandle_t handle, - tdata_t data, - tsize_t length) -{ -L_MEMSTREAM *mstream; -size_t amount; - - mstream = (L_MEMSTREAM *)handle; - amount = L_MIN((size_t)length, mstream->hw - mstream->offset); - - /* Fuzzed files can create this condition! */ - if (mstream->offset + amount < amount || /* overflow */ - mstream->offset + amount > mstream->hw) { - lept_stderr("Bad file: amount too big: %zu\n", amount); - return 0; - } - - memcpy(data, mstream->buffer + mstream->offset, amount); - mstream->offset += amount; - return amount; -} - - -static tsize_t -tiffWriteCallback(thandle_t handle, - tdata_t data, - tsize_t length) -{ -L_MEMSTREAM *mstream; -size_t newsize; - - /* reallocNew() uses calloc to initialize the array. - * If malloc is used instead, for some of the encoding methods, - * not all the data in 'bufsize' bytes in the buffer will - * have been initialized by the end of the compression. */ - mstream = (L_MEMSTREAM *)handle; - if (mstream->offset + length > mstream->bufsize) { - newsize = 2 * (mstream->offset + length); - mstream->buffer = (l_uint8 *)reallocNew((void **)&mstream->buffer, - mstream->hw, newsize); - mstream->bufsize = newsize; - } - - memcpy(mstream->buffer + mstream->offset, data, length); - mstream->offset += length; - mstream->hw = L_MAX(mstream->offset, mstream->hw); - return length; -} - - -static toff_t -tiffSeekCallback(thandle_t handle, - toff_t offset, - l_int32 whence) -{ -L_MEMSTREAM *mstream; - - PROCNAME("tiffSeekCallback"); - mstream = (L_MEMSTREAM *)handle; - switch (whence) { - case SEEK_SET: -/* lept_stderr("seek_set: offset = %d\n", offset); */ - if((size_t)offset != offset) { /* size_t overflow on uint32 */ - return (toff_t)ERROR_INT("too large offset value", procName, 1); - } - mstream->offset = offset; - break; - case SEEK_CUR: -/* lept_stderr("seek_cur: offset = %d\n", offset); */ - mstream->offset += offset; - break; - case SEEK_END: -/* lept_stderr("seek end: hw = %d, offset = %d\n", - mstream->hw, offset); */ - mstream->offset = mstream->hw - offset; /* offset >= 0 */ - break; - default: - return (toff_t)ERROR_INT("bad whence value", procName, - mstream->offset); - } - - return mstream->offset; -} - - -static l_int32 -tiffCloseCallback(thandle_t handle) -{ -L_MEMSTREAM *mstream; - - mstream = (L_MEMSTREAM *)handle; - if (mstream->poutdata) { /* writing: save the output data */ - *mstream->poutdata = mstream->buffer; - *mstream->poutsize = mstream->hw; - } - LEPT_FREE(mstream); /* never free the buffer! */ - return 0; -} - - -static toff_t -tiffSizeCallback(thandle_t handle) -{ -L_MEMSTREAM *mstream; - - mstream = (L_MEMSTREAM *)handle; - return mstream->hw; -} - - -static l_int32 -tiffMapCallback(thandle_t handle, - tdata_t *data, - toff_t *length) -{ -L_MEMSTREAM *mstream; - - mstream = (L_MEMSTREAM *)handle; - *data = mstream->buffer; - *length = mstream->hw; - return 0; -} - - -static void -tiffUnmapCallback(thandle_t handle, - tdata_t data, - toff_t length) -{ - return; -} - - -/*! - * \brief fopenTiffMemstream() - * - * \param[in] filename for error output; can be "" - * \param[in] operation "w" for write, "r" for read - * \param[out] pdata written data - * \param[out] pdatasize size of written data - * \return tiff data structure, opened for write to memory - * - *
- * Notes:
- *      (1) This wraps up a number of callbacks for either:
- *            * reading from tiff in memory buffer --> pix
- *            * writing from pix --> tiff in memory buffer
- *      (2) After use, the memstream is automatically destroyed when
- *          TIFFClose() is called.  TIFFCleanup() doesn't free the memstream.
- *      (3) This does not work in append mode, and in write mode it
- *          does not append.
- * 
- */ -static TIFF * -fopenTiffMemstream(const char *filename, - const char *operation, - l_uint8 **pdata, - size_t *pdatasize) -{ -L_MEMSTREAM *mstream; -TIFF *tif; - - PROCNAME("fopenTiffMemstream"); - - if (!filename) - return (TIFF *)ERROR_PTR("filename not defined", procName, NULL); - if (!operation) - return (TIFF *)ERROR_PTR("operation not defined", procName, NULL); - if (!pdata) - return (TIFF *)ERROR_PTR("&data not defined", procName, NULL); - if (!pdatasize) - return (TIFF *)ERROR_PTR("&datasize not defined", procName, NULL); - if (strcmp(operation, "r") && strcmp(operation, "w")) - return (TIFF *)ERROR_PTR("op not 'r' or 'w'", procName, NULL); - - if (!strcmp(operation, "r")) - mstream = memstreamCreateForRead(*pdata, *pdatasize); - else - mstream = memstreamCreateForWrite(pdata, pdatasize); - - TIFFSetWarningHandler(NULL); /* disable warnings */ - TIFFSetErrorHandler(NULL); /* disable error messages */ - - tif = TIFFClientOpen(filename, operation, (thandle_t)mstream, - tiffReadCallback, tiffWriteCallback, - tiffSeekCallback, tiffCloseCallback, - tiffSizeCallback, tiffMapCallback, - tiffUnmapCallback); - if (!tif) - LEPT_FREE(mstream); - return tif; -} - - -/*! - * \brief pixReadMemTiff() - * - * \param[in] cdata const; tiff-encoded - * \param[in] size size of cdata - * \param[in] n page image number: 0-based - * \return pix, or NULL on error - * - *
- * Notes:
- *      (1) This is a version of pixReadTiff(), where the data is read
- *          from a memory buffer and uncompressed.
- *      (2) Use TIFFClose(); TIFFCleanup() doesn't free internal memstream.
- *      (3) No warning messages on failure, because of how multi-page
- *          TIFF reading works. You are supposed to keep trying until
- *          it stops working.
- *      (4) Tiff directory overhead is linear in the input page number.
- *          If reading many images, use pixReadMemFromMultipageTiff().
- * 
- */ -PIX * -pixReadMemTiff(const l_uint8 *cdata, - size_t size, - l_int32 n) -{ -l_uint8 *data; -l_int32 i; -PIX *pix; -TIFF *tif; - - PROCNAME("pixReadMemTiff"); - - if (!cdata) - return (PIX *)ERROR_PTR("cdata not defined", procName, NULL); - - data = (l_uint8 *)cdata; /* we're really not going to change this */ - if ((tif = fopenTiffMemstream("tifferror", "r", &data, &size)) == NULL) - return (PIX *)ERROR_PTR("tiff stream not opened", procName, NULL); - - pix = NULL; - for (i = 0; ; i++) { - if (i == n) { - if ((pix = pixReadFromTiffStream(tif)) == NULL) { - TIFFClose(tif); - return NULL; - } - pixSetInputFormat(pix, IFF_TIFF); - break; - } - if (TIFFReadDirectory(tif) == 0) - break; - if (i == ManyPagesInTiffFile + 1) { - L_WARNING("big file: more than %d pages\n", procName, - ManyPagesInTiffFile); - } - } - - TIFFClose(tif); - return pix; -} - - -/*! - * \brief pixReadMemFromMultipageTiff() - * - * \param[in] cdata const; tiff-encoded - * \param[in] size size of cdata - * \param[in,out] poffset set offset to 0 for first image - * \return pix, or NULL on error or if previous call returned the last image - * - *
- * Notes:
- *      (1) This is a read-from-memory version of pixReadFromMultipageTiff().
- *          See that function for usage.
- *      (2) If reading sequentially from the tiff data, this is more
- *          efficient than pixReadMemTiff(), which has an overhead
- *          proportional to the image index n.
- *      (3) Example usage for reading all the images:
- *            size_t offset = 0;
- *            do {
- *                Pix *pix = pixReadMemFromMultipageTiff(data, size, &offset);
- *                // do something with pix
- *            } while (offset != 0);
- * 
- */ -PIX * -pixReadMemFromMultipageTiff(const l_uint8 *cdata, - size_t size, - size_t *poffset) -{ -l_uint8 *data; -l_int32 retval; -size_t offset; -PIX *pix; -TIFF *tif; - - PROCNAME("pixReadMemFromMultipageTiff"); - - if (!cdata) - return (PIX *)ERROR_PTR("cdata not defined", procName, NULL); - if (!poffset) - return (PIX *)ERROR_PTR("&offset not defined", procName, NULL); - - data = (l_uint8 *)cdata; /* we're really not going to change this */ - if ((tif = fopenTiffMemstream("tifferror", "r", &data, &size)) == NULL) - return (PIX *)ERROR_PTR("tiff stream not opened", procName, NULL); - - /* Set ptrs in the TIFF to the beginning of the image */ - offset = *poffset; - retval = (offset == 0) ? TIFFSetDirectory(tif, 0) - : TIFFSetSubDirectory(tif, offset); - if (retval == 0) { - TIFFClose(tif); - return NULL; - } - - if ((pix = pixReadFromTiffStream(tif)) == NULL) { - TIFFClose(tif); - return NULL; - } - - /* Advance to the next image and return the new offset */ - TIFFReadDirectory(tif); - *poffset = TIFFCurrentDirOffset(tif); - TIFFClose(tif); - return pix; -} - - -/*! - * \brief pixaReadMemMultipageTiff() - * - * \param[in] data const; multiple pages; tiff-encoded - * \param[in] size size of cdata - * \return pixa, or NULL on error - * - *
- * Notes:
- *      (1) This is an O(n) read-from-memory version of pixaReadMultipageTiff().
- * 
- */ -PIXA * -pixaReadMemMultipageTiff(const l_uint8 *data, - size_t size) -{ -size_t offset; -PIX *pix; -PIXA *pixa; - - PROCNAME("pixaReadMemMultipageTiff"); - - if (!data) - return (PIXA *)ERROR_PTR("data not defined", procName, NULL); - - offset = 0; - pixa = pixaCreate(0); - do { - pix = pixReadMemFromMultipageTiff(data, size, &offset); - pixaAddPix(pixa, pix, L_INSERT); - } while (offset != 0); - return pixa; -} - - -/*! - * \brief pixaWriteMemMultipageTiff() - * - * \param[out] pdata const; tiff-encoded - * \param[out] psize size of data - * \param[in] pixa any depth; colormap will be removed - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) fopenTiffMemstream() does not work in append mode, so we
- *          must work-around with a temporary file.
- *      (2) Getting a file stream from
- *            open_memstream((char **)pdata, psize)
- *          does not work with the tiff directory.
- * 
- */ -l_ok -pixaWriteMemMultipageTiff(l_uint8 **pdata, - size_t *psize, - PIXA *pixa) -{ -const char *modestr; -l_int32 i, n; -FILE *fp; -PIX *pix1; - - PROCNAME("pixaWriteMemMultipageTiff"); - - if (pdata) *pdata = NULL; - if (!pdata) - return ERROR_INT("pdata not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - -#ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); -#else - if ((fp = tmpfile()) == NULL) - return ERROR_INT("tmpfile stream not opened", procName, 1); -#endif /* _WIN32 */ - - n = pixaGetCount(pixa); - for (i = 0; i < n; i++) { - modestr = (i == 0) ? "w" : "a"; - pix1 = pixaGetPix(pixa, i, L_CLONE); - if (pixGetDepth(pix1) == 1) - pixWriteStreamTiffWA(fp, pix1, IFF_TIFF_G4, modestr); - else - pixWriteStreamTiffWA(fp, pix1, IFF_TIFF_ZIP, modestr); - pixDestroy(&pix1); - } - - rewind(fp); - *pdata = l_binaryReadStream(fp, psize); - fclose(fp); - return 0; -} - - -/*! - * \brief pixWriteMemTiff() - * - * \param[out] pdata data of tiff compressed image - * \param[out] psize size of returned data - * \param[in] pix - * \param[in] comptype IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS, - * IFF_TIFF_G3, IFF_TIFF_G4, - * IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG - * \return 0 if OK, 1 on error - * - * Usage: - * 1) See pixWriteTiff(. This version writes to - * memory instead of to a file. - */ -l_ok -pixWriteMemTiff(l_uint8 **pdata, - size_t *psize, - PIX *pix, - l_int32 comptype) -{ - return pixWriteMemTiffCustom(pdata, psize, pix, comptype, - NULL, NULL, NULL, NULL); -} - - -/*! - * \brief pixWriteMemTiffCustom() - * - * \param[out] pdata data of tiff compressed image - * \param[out] psize size of returned data - * \param[in] pix - * \param[in] comptype IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS, - * IFF_TIFF_G3, IFF_TIFF_G4, - * IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG - * \param[in] natags [optional] NUMA of custom tiff tags - * \param[in] savals [optional] SARRAY of values - * \param[in] satypes [optional] SARRAY of types - * \param[in] nasizes [optional] NUMA of sizes - * \return 0 if OK, 1 on error - * - * Usage: - * 1) See pixWriteTiffCustom(. This version writes to - * memory instead of to a file. - * 2) Use TIFFClose(); TIFFCleanup( doesn't free internal memstream. - */ -l_ok -pixWriteMemTiffCustom(l_uint8 **pdata, - size_t *psize, - PIX *pix, - l_int32 comptype, - NUMA *natags, - SARRAY *savals, - SARRAY *satypes, - NUMA *nasizes) -{ -l_int32 ret; -TIFF *tif; - - PROCNAME("pixWriteMemTiffCustom"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1); - if (!psize) - return ERROR_INT("&size not defined", procName, 1); - if (!pix) - return ERROR_INT("&pix not defined", procName, 1); - if (pixGetDepth(pix) != 1 && comptype != IFF_TIFF && - comptype != IFF_TIFF_LZW && comptype != IFF_TIFF_ZIP && - comptype != IFF_TIFF_JPEG) { - L_WARNING("invalid compression type for bpp > 1\n", procName); - comptype = IFF_TIFF_ZIP; - } - - if ((tif = fopenTiffMemstream("tifferror", "w", pdata, psize)) == NULL) - return ERROR_INT("tiff stream not opened", procName, 1); - ret = pixWriteToTiffStream(tif, pix, comptype, natags, savals, - satypes, nasizes); - - TIFFClose(tif); - return ret; -} - -/* --------------------------------------------*/ -#endif /* HAVE_LIBTIFF */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/tiffiostub.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/tiffiostub.c deleted file mode 100644 index 181eff4b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/tiffiostub.c +++ /dev/null @@ -1,242 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file tiffiostub.c - *
- *
- *     Stubs for tiffio.c functions
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* --------------------------------------------*/ -#if !HAVE_LIBTIFF /* defined in environ.h */ -/* --------------------------------------------*/ - -PIX * pixReadTiff(const char *filename, l_int32 n) -{ - return (PIX *)ERROR_PTR("function not present", "pixReadTiff", NULL); -} - -/* ----------------------------------------------------------------------*/ - -PIX * pixReadStreamTiff(FILE *fp, l_int32 n) -{ - return (PIX *)ERROR_PTR("function not present", "pixReadStreamTiff", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteTiff(const char *filename, PIX *pix, l_int32 comptype, - const char *modestring) -{ - return ERROR_INT("function not present", "pixWriteTiff", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteTiffCustom(const char *filename, PIX *pix, l_int32 comptype, - const char *modestring, NUMA *natags, - SARRAY *savals, SARRAY *satypes, NUMA *nasizes) -{ - return ERROR_INT("function not present", "pixWriteTiffCustom", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteStreamTiff(FILE *fp, PIX *pix, l_int32 comptype) -{ - return ERROR_INT("function not present", "pixWriteStreamTiff", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteStreamTiffWA(FILE *fp, PIX *pix, l_int32 comptype, - const char *modestr) -{ - return ERROR_INT("function not present", "pixWriteStreamTiffWA", 1); -} - -/* ----------------------------------------------------------------------*/ - -PIX * pixReadFromMultipageTiff(const char *filename, size_t *poffset) -{ - return (PIX *)ERROR_PTR("function not present", - "pixReadFromMultipageTiff", NULL); -} - -/* ----------------------------------------------------------------------*/ - -PIXA * pixaReadMultipageTiff(const char *filename) -{ - return (PIXA *)ERROR_PTR("function not present", - "pixaReadMultipageTiff", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixaWriteMultipageTiff(const char *filename, PIXA *pixa) -{ - return ERROR_INT("function not present", "pixaWriteMultipageTiff", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok writeMultipageTiff(const char *dirin, const char *substr, - const char *fileout) -{ - return ERROR_INT("function not present", "writeMultipageTiff", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok writeMultipageTiffSA(SARRAY *sa, const char *fileout) -{ - return ERROR_INT("function not present", "writeMultipageTiffSA", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok fprintTiffInfo(FILE *fpout, const char *tiffile) -{ - return ERROR_INT("function not present", "fprintTiffInfo", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok tiffGetCount(FILE *fp, l_int32 *pn) -{ - return ERROR_INT("function not present", "tiffGetCount", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok getTiffResolution(FILE *fp, l_int32 *pxres, l_int32 *pyres) -{ - return ERROR_INT("function not present", "getTiffResolution", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok readHeaderTiff(const char *filename, l_int32 n, l_int32 *pwidth, - l_int32 *pheight, l_int32 *pbps, l_int32 *pspp, - l_int32 *pres, l_int32 *pcmap, l_int32 *pformat) -{ - return ERROR_INT("function not present", "readHeaderTiff", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok freadHeaderTiff(FILE *fp, l_int32 n, l_int32 *pwidth, - l_int32 *pheight, l_int32 *pbps, l_int32 *pspp, - l_int32 *pres, l_int32 *pcmap, l_int32 *pformat) -{ - return ERROR_INT("function not present", "freadHeaderTiff", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok readHeaderMemTiff(const l_uint8 *cdata, size_t size, l_int32 n, - l_int32 *pwidth, l_int32 *pheight, l_int32 *pbps, - l_int32 *pspp, l_int32 *pres, l_int32 *pcmap, - l_int32 *pformat) -{ - return ERROR_INT("function not present", "readHeaderMemTiff", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok findTiffCompression(FILE *fp, l_int32 *pcomptype) -{ - return ERROR_INT("function not present", "findTiffCompression", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok extractG4DataFromFile(const char *filein, l_uint8 **pdata, - size_t *pnbytes, l_int32 *pw, - l_int32 *ph, l_int32 *pminisblack) -{ - return ERROR_INT("function not present", "extractG4DataFromFile", 1); -} - -/* ----------------------------------------------------------------------*/ - -PIX * pixReadMemTiff(const l_uint8 *cdata, size_t size, l_int32 n) -{ - return (PIX *)ERROR_PTR("function not present", "pixReadMemTiff", NULL); -} - -/* ----------------------------------------------------------------------*/ - -PIX * pixReadMemFromMultipageTiff(const l_uint8 *cdata, size_t size, - size_t *poffset) -{ - return (PIX *)ERROR_PTR("function not present", - "pixReadMemFromMultipageTiff", NULL); -} - -/* ----------------------------------------------------------------------*/ - -PIXA * pixaReadMemMultipageTiff(const l_uint8 *data, size_t size) -{ - return (PIXA *)ERROR_PTR("function not present", - "pixaReadMemMultipageTiff", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixaWriteMemMultipageTiff(l_uint8 **pdata, size_t *psize, PIXA *pixa) -{ - return ERROR_INT("function not present", "pixaWriteMemMultipageTiff", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteMemTiff(l_uint8 **pdata, size_t *psize, PIX *pix, - l_int32 comptype) -{ - return ERROR_INT("function not present", "pixWriteMemTiff", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteMemTiffCustom(l_uint8 **pdata, size_t *psize, PIX *pix, - l_int32 comptype, NUMA *natags, SARRAY *savals, - SARRAY *satypes, NUMA *nasizes) -{ - return ERROR_INT("function not present", "pixWriteMemTiffCustom", 1); -} - -/* --------------------------------------------*/ -#endif /* !HAVE_LIBTIFF */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/utils1.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/utils1.c deleted file mode 100644 index 716d12a4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/utils1.c +++ /dev/null @@ -1,1374 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file utils1.c - *
- *
- *       ------------------------------------------
- *       This file has these utilities:
- *         - error, warning and info messages
- *         - redirection of stderr
- *         - low-level endian conversions
- *         - file corruption operations
- *         - random and prime number operations
- *         - 64-bit hash functions
- *         - leptonica version number accessor
- *         - timing and date operations
- *       ------------------------------------------
- *
- *       Control of error, warning and info messages
- *           l_int32    setMsgSeverity()
- *
- *       Error return functions, invoked by macros
- *           l_int32    returnErrorInt()
- *           l_float32  returnErrorFloat()
- *           void      *returnErrorPtr()
- *
- *       Runtime redirection of stderr
- *           void leptSetStderrHandler()
- *           void lept_stderr()
- *
- *       Test files for equivalence
- *           l_int32    filesAreIdentical()
- *
- *       Byte-swapping data conversion
- *           l_uint16   convertOnBigEnd16()
- *           l_uint32   convertOnBigEnd32()
- *           l_uint16   convertOnLittleEnd16()
- *           l_uint32   convertOnLittleEnd32()
- *
- *       File corruption and byte replacement operations
- *           l_int32    fileCorruptByDeletion()
- *           l_int32    fileCorruptByMutation()
- *           l_int32    fileReplaceBytes()
- *
- *       Generate random integer in given range
- *           l_int32    genRandomIntegerInRange()
- *
- *       Simple math function
- *           l_int32    lept_roundftoi()
- *
- *       64-bit hash functions
- *           l_int32    l_hashStringToUint64()
- *           l_int32    l_hashPtToUint64()
- *           l_int32    l_hashFloat64ToUint64()
- *
- *       Prime finders
- *           l_int32    findNextLargerPrime()
- *           l_int32    lept_isPrime()
- *
- *       Gray code conversion
- *           l_uint32   convertIntToGrayCode()
- *           l_uint32   convertGrayCodeToInt()
- *
- *       Leptonica version number
- *           char      *getLeptonicaVersion()
- *
- *       Timing
- *           void       startTimer()
- *           l_float32  stopTimer()
- *           L_TIMER    startTimerNested()
- *           l_float32  stopTimerNested()
- *           void       l_getCurrentTime()
- *           L_WALLTIMER  *startWallTimer()
- *           l_float32  stopWallTimer()
- *           void       l_getFormattedDate()
- *
- *  For all issues with cross-platform development, see utils2.c.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#if WINAPI_FAMILY_APP -#include -#elif defined (_WIN32) -#include -#endif /* _WIN32 */ - -#include -#include "allheaders.h" -#include - - /* Global for controlling message output at runtime */ -LEPT_DLL l_int32 LeptMsgSeverity = DEFAULT_SEVERITY; - -#define DEBUG_SEV 0 - -/*----------------------------------------------------------------------* - * Control of error, warning and info messages * - *----------------------------------------------------------------------*/ -/*! - * \brief setMsgSeverity() - * - * \param[in] newsev - * \return oldsev - * - *
- * Notes:
- *      (1) setMsgSeverity() allows the user to specify the desired
- *          message severity threshold.  Messages of equal or greater
- *          severity will be output.  The previous message severity is
- *          returned when the new severity is set.
- *      (2) If L_SEVERITY_EXTERNAL is passed, then the severity will be
- *          obtained from the LEPT_MSG_SEVERITY environment variable.
- * 
- */ -l_int32 -setMsgSeverity(l_int32 newsev) -{ -l_int32 oldsev; -char *envsev; - - oldsev = LeptMsgSeverity; - if (newsev == L_SEVERITY_EXTERNAL) { -#if !WINAPI_FAMILY_APP - envsev = getenv("LEPT_MSG_SEVERITY"); - if (envsev) { - LeptMsgSeverity = atoi(envsev); -#if DEBUG_SEV - L_INFO("message severity set to external\n", "setMsgSeverity"); -#endif /* DEBUG_SEV */ - } else { -#if DEBUG_SEV - L_WARNING("environment var LEPT_MSG_SEVERITY not defined\n", - "setMsgSeverity"); -#endif /* DEBUG_SEV */ - } -#endif - } else { - LeptMsgSeverity = newsev; -#if DEBUG_SEV - L_INFO("message severity set to %d\n", "setMsgSeverity", newsev); -#endif /* DEBUG_SEV */ - } - - return oldsev; -} - - -/*----------------------------------------------------------------------* - * Error return functions, invoked by macros * - *----------------------------------------------------------------------* - * * - * (1) These error functions print messages to stderr and allow * - * exit from the function that called them. * - * (2) They must be invoked only by the macros ERROR_INT, * - * ERROR_FLOAT and ERROR_PTR, which are in environ.h * - * (3) The print output can be disabled at compile time, either * - * by using -DNO_CONSOLE_IO or by setting LeptMsgSeverity. * - *----------------------------------------------------------------------*/ -/*! - * \brief returnErrorInt() - * - * \param[in] msg error message - * \param[in] procname - * \param[in] ival return error val - * \return ival typically 1 for an error return - */ -l_int32 -returnErrorInt(const char *msg, - const char *procname, - l_int32 ival) -{ - lept_stderr("Error in %s: %s\n", procname, msg); - return ival; -} - - -/*! - * \brief returnErrorFloat() - * - * \param[in] msg error message - * \param[in] procname - * \param[in] fval return error val - * \return fval - */ -l_float32 -returnErrorFloat(const char *msg, - const char *procname, - l_float32 fval) -{ - lept_stderr("Error in %s: %s\n", procname, msg); - return fval; -} - - -/*! - * \brief returnErrorPtr() - * - * \param[in] msg error message - * \param[in] procname - * \param[in] pval return error val - * \return pval typically null for an error return - */ -void * -returnErrorPtr(const char *msg, - const char *procname, - void *pval) -{ - lept_stderr("Error in %s: %s\n", procname, msg); - return pval; -} - - -/*------------------------------------------------------------------------* - * Runtime redirection of stderr * - *------------------------------------------------------------------------* - * * - * The user can provide a callback function to redirect messages * - * that would otherwise go to stderr. Here are two examples: * - * (1) to stop all messages: * - * void send_to_devnull(const char *msg) {} * - * (2) to write to the system logger: * - * void send_to_syslog(const char *msg) { * - * syslog(1, msg); * - * } * - * These would then be registered using - * leptSetStderrHandler(send_to_devnull(); - * and - * leptSetStderrHandler(send_to_syslog(); - *------------------------------------------------------------------------*/ - /* By default, all messages go to stderr */ -static void lept_default_stderr_handler(const char *formatted_msg) -{ - if (formatted_msg) - fputs(formatted_msg, stderr); -} - - /* The stderr callback handler is private to leptonica. - * By default it writes to stderr. */ -void (*stderr_handler)(const char *) = lept_default_stderr_handler; - - -/*! - * \brief leptSetStderrHandler() - * - * \param[in] handler callback function for lept_stderr output - * \return void - * - *
- * Notes:
- *      (1) This registers a handler for redirection of output to stderr
- *          at runtime.
- *      (2) If called with NULL, the output goes to stderr.
- * 
- */ -void leptSetStderrHandler(void (*handler)(const char *)) -{ - if (handler) - stderr_handler = handler; - else - stderr_handler = lept_default_stderr_handler; -} - - -#define MAX_DEBUG_MESSAGE 2000 -/*! - * \brief lept_stderr() - * - * \param[in] fmt format string - * \param[in] ... varargs - * \return void - * - *
- * Notes:
- *      (1) This is a replacement for fprintf(), to allow redirection
- *          of output.  All calls to fprintf(stderr, ...) are replaced
- *          with calls to lept_stderr(...).
- *      (2) The message size is limited to 2K bytes.
-        (3) This utility was provided by jbarlow83.
- * 
- */ -void lept_stderr(const char *fmt, ...) -{ -va_list args; -char msg[MAX_DEBUG_MESSAGE]; -l_int32 n; - - va_start(args, fmt); - n = vsnprintf(msg, sizeof(msg), fmt, args); - va_end(args); - if (n < 0) - return; - (*stderr_handler)(msg); -} - - -/*--------------------------------------------------------------------* - * Test files for equivalence * - *--------------------------------------------------------------------*/ -/*! - * \brief filesAreIdentical() - * - * \param[in] fname1 - * \param[in] fname2 - * \param[out] psame 1 if identical; 0 if different - * \return 0 if OK, 1 on error - */ -l_ok -filesAreIdentical(const char *fname1, - const char *fname2, - l_int32 *psame) -{ -l_int32 i, same; -size_t nbytes1, nbytes2; -l_uint8 *array1, *array2; - - PROCNAME("filesAreIdentical"); - - if (!psame) - return ERROR_INT("&same not defined", procName, 1); - *psame = 0; - if (!fname1 || !fname2) - return ERROR_INT("both names not defined", procName, 1); - - nbytes1 = nbytesInFile(fname1); - nbytes2 = nbytesInFile(fname2); - if (nbytes1 != nbytes2) - return 0; - - if ((array1 = l_binaryRead(fname1, &nbytes1)) == NULL) - return ERROR_INT("array1 not read", procName, 1); - if ((array2 = l_binaryRead(fname2, &nbytes2)) == NULL) { - LEPT_FREE(array1); - return ERROR_INT("array2 not read", procName, 1); - } - same = 1; - for (i = 0; i < nbytes1; i++) { - if (array1[i] != array2[i]) { - same = 0; - break; - } - } - LEPT_FREE(array1); - LEPT_FREE(array2); - *psame = same; - - return 0; -} - - -/*--------------------------------------------------------------------------* - * 16 and 32 bit byte-swapping on big endian and little endian machines * - *--------------------------------------------------------------------------* - * * - * These are typically used for I/O conversions: * - * (1) endian conversion for data that was read from a file * - * (2) endian conversion on data before it is written to a file * - *--------------------------------------------------------------------------*/ - -/*--------------------------------------------------------------------* - * 16-bit byte swapping * - *--------------------------------------------------------------------*/ -#ifdef L_BIG_ENDIAN - -l_uint16 -convertOnBigEnd16(l_uint16 shortin) -{ - return ((shortin << 8) | (shortin >> 8)); -} - -l_uint16 -convertOnLittleEnd16(l_uint16 shortin) -{ - return shortin; -} - -#else /* L_LITTLE_ENDIAN */ - -l_uint16 -convertOnLittleEnd16(l_uint16 shortin) -{ - return ((shortin << 8) | (shortin >> 8)); -} - -l_uint16 -convertOnBigEnd16(l_uint16 shortin) -{ - return shortin; -} - -#endif /* L_BIG_ENDIAN */ - - -/*--------------------------------------------------------------------* - * 32-bit byte swapping * - *--------------------------------------------------------------------*/ -#ifdef L_BIG_ENDIAN - -l_uint32 -convertOnBigEnd32(l_uint32 wordin) -{ - return ((wordin << 24) | ((wordin << 8) & 0x00ff0000) | - ((wordin >> 8) & 0x0000ff00) | (wordin >> 24)); -} - -l_uint32 -convertOnLittleEnd32(l_uint32 wordin) -{ - return wordin; -} - -#else /* L_LITTLE_ENDIAN */ - -l_uint32 -convertOnLittleEnd32(l_uint32 wordin) -{ - return ((wordin << 24) | ((wordin << 8) & 0x00ff0000) | - ((wordin >> 8) & 0x0000ff00) | (wordin >> 24)); -} - -l_uint32 -convertOnBigEnd32(l_uint32 wordin) -{ - return wordin; -} - -#endif /* L_BIG_ENDIAN */ - - -/*---------------------------------------------------------------------* - * File corruption and byte replacement operations * - *---------------------------------------------------------------------*/ -/*! - * \brief fileCorruptByDeletion() - * - * \param[in] filein - * \param[in] loc fractional location of start of deletion - * \param[in] size fractional size of deletion - * \param[in] fileout corrupted file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) %loc and %size are expressed as a fraction of the file size.
- *      (2) This makes a copy of the data in %filein, where bytes in the
- *          specified region have deleted.
- *      (3) If (%loc + %size) >= 1.0, this deletes from the position
- *          represented by %loc to the end of the file.
- *      (4) It is useful for testing robustness of I/O wrappers when the
- *          data is corrupted, by simulating data corruption by deletion.
- * 
- */ -l_ok -fileCorruptByDeletion(const char *filein, - l_float32 loc, - l_float32 size, - const char *fileout) -{ -l_int32 i, locb, sizeb, rembytes; -size_t inbytes, outbytes; -l_uint8 *datain, *dataout; - - PROCNAME("fileCorruptByDeletion"); - - if (!filein || !fileout) - return ERROR_INT("filein and fileout not both specified", procName, 1); - if (loc < 0.0 || loc >= 1.0) - return ERROR_INT("loc must be in [0.0 ... 1.0)", procName, 1); - if (size <= 0.0) - return ERROR_INT("size must be > 0.0", procName, 1); - if (loc + size > 1.0) - size = 1.0 - loc; - - datain = l_binaryRead(filein, &inbytes); - locb = (l_int32)(loc * inbytes + 0.5); - locb = L_MIN(locb, inbytes - 1); - sizeb = (l_int32)(size * inbytes + 0.5); - sizeb = L_MAX(1, sizeb); - sizeb = L_MIN(sizeb, inbytes - locb); /* >= 1 */ - L_INFO("Removed %d bytes at location %d\n", procName, sizeb, locb); - rembytes = inbytes - locb - sizeb; /* >= 0; to be copied, after excision */ - - outbytes = inbytes - sizeb; - dataout = (l_uint8 *)LEPT_CALLOC(outbytes, 1); - for (i = 0; i < locb; i++) - dataout[i] = datain[i]; - for (i = 0; i < rembytes; i++) - dataout[locb + i] = datain[locb + sizeb + i]; - l_binaryWrite(fileout, "w", dataout, outbytes); - - LEPT_FREE(datain); - LEPT_FREE(dataout); - return 0; -} - - -/*! - * \brief fileCorruptByMutation() - * - * \param[in] filein - * \param[in] loc fractional location of start of randomization - * \param[in] size fractional size of randomization - * \param[in] fileout corrupted file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) %loc and %size are expressed as a fraction of the file size.
- *      (2) This makes a copy of the data in %filein, where bytes in the
- *          specified region have been replaced by random data.
- *      (3) If (%loc + %size) >= 1.0, this modifies data from the position
- *          represented by %loc to the end of the file.
- *      (4) It is useful for testing robustness of I/O wrappers when the
- *          data is corrupted, by simulating data corruption.
- * 
- */ -l_ok -fileCorruptByMutation(const char *filein, - l_float32 loc, - l_float32 size, - const char *fileout) -{ -l_int32 i, locb, sizeb; -size_t bytes; -l_uint8 *data; - - PROCNAME("fileCorruptByMutation"); - - if (!filein || !fileout) - return ERROR_INT("filein and fileout not both specified", procName, 1); - if (loc < 0.0 || loc >= 1.0) - return ERROR_INT("loc must be in [0.0 ... 1.0)", procName, 1); - if (size <= 0.0) - return ERROR_INT("size must be > 0.0", procName, 1); - if (loc + size > 1.0) - size = 1.0 - loc; - - data = l_binaryRead(filein, &bytes); - locb = (l_int32)(loc * bytes + 0.5); - locb = L_MIN(locb, bytes - 1); - sizeb = (l_int32)(size * bytes + 0.5); - sizeb = L_MAX(1, sizeb); - sizeb = L_MIN(sizeb, bytes - locb); /* >= 1 */ - L_INFO("Randomizing %d bytes at location %d\n", procName, sizeb, locb); - - /* Make an array of random bytes and do the substitution */ - for (i = 0; i < sizeb; i++) { - data[locb + i] = - (l_uint8)(255.9 * ((l_float64)rand() / (l_float64)RAND_MAX)); - } - - l_binaryWrite(fileout, "w", data, bytes); - LEPT_FREE(data); - return 0; -} - - -/*! - * \brief fileReplaceBytes() - * - * \param[in] filein input file - * \param[in] start start location for replacement - * \param[in] nbytes number of bytes to be removed - * \param[in] newdata replacement bytes - * \param[in] newsize size of replacement bytes - * \param[in] fileout output file - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) To remove %nbytes without replacement, set %newdata == NULL.
- *      (2) One use is for replacing the date/time in a pdf file by a
- *          string of 12 '0's, effectively removing the date without
- *          invalidating the byte counters in the pdf file:
- *              fileReplaceBytes(filein 86 12 (char *)"000000000000" 12 fileout
- * 
- */ -l_ok -fileReplaceBytes(const char *filein, - l_int32 start, - l_int32 nbytes, - l_uint8 *newdata, - size_t newsize, - const char *fileout) -{ -l_int32 i, index; -size_t inbytes, outbytes; -l_uint8 *datain, *dataout; - - PROCNAME("fileReplaceBytes"); - - if (!filein || !fileout) - return ERROR_INT("filein and fileout not both specified", procName, 1); - - datain = l_binaryRead(filein, &inbytes); - if (start + nbytes > inbytes) - L_WARNING("start + nbytes > length(filein) = %zu\n", procName, inbytes); - - if (!newdata) newsize = 0; - outbytes = inbytes - nbytes + newsize; - if ((dataout = (l_uint8 *)LEPT_CALLOC(outbytes, 1)) == NULL) { - LEPT_FREE(datain); - return ERROR_INT("calloc fail for dataout", procName, 1); - } - - for (i = 0; i < start; i++) - dataout[i] = datain[i]; - for (i = start; i < start + newsize; i++) - dataout[i] = newdata[i - start]; - index = start + nbytes; /* for datain */ - start += newsize; /* for dataout */ - for (i = start; i < outbytes; i++, index++) - dataout[i] = datain[index]; - l_binaryWrite(fileout, "w", dataout, outbytes); - - LEPT_FREE(datain); - LEPT_FREE(dataout); - return 0; -} - - -/*---------------------------------------------------------------------* - * Generate random integer in given range * - *---------------------------------------------------------------------*/ -/*! - * \brief genRandomIntegerInRange() - * - * \param[in] range size of range; must be >= 2 - * \param[in] seed use 0 to skip; otherwise call srand - * \param[out] pval random integer in range {0 ... range-1} - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) For example, to choose a rand integer between 0 and 99,
- *          use %range = 100.
- * 
- */ -l_ok -genRandomIntegerInRange(l_int32 range, - l_int32 seed, - l_int32 *pval) -{ - PROCNAME("genRandomIntegerInRange"); - - if (!pval) - return ERROR_INT("&val not defined", procName, 1); - *pval = 0; - if (range < 2) - return ERROR_INT("range must be >= 2", procName, 1); - - if (seed > 0) srand(seed); - *pval = (l_int32)((l_float64)range * - ((l_float64)rand() / (l_float64)RAND_MAX)); - return 0; -} - - -/*---------------------------------------------------------------------* - * Simple math function * - *---------------------------------------------------------------------*/ -/*! - * \brief lept_roundftoi() - * - * \param[in] fval - * \return value rounded to int - * - *
- * Notes:
- *      (1) For fval >= 0, fval --> round(fval) == floor(fval + 0.5)
- *          For fval < 0, fval --> -round(-fval))
- *          This is symmetric around 0.
- *          e.g., for fval in (-0.5 ... 0.5), fval --> 0
- * 
- */ -l_int32 -lept_roundftoi(l_float32 fval) -{ - return (fval >= 0.0) ? (l_int32)(fval + 0.5) : (l_int32)(fval - 0.5); -} - - -/*---------------------------------------------------------------------* - * 64-bit hash functions * - *---------------------------------------------------------------------*/ -/*! - * \brief l_hashStringToUint64() - * - * \param[in] str - * \param[out] phash hash value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The intent of the hash is to avoid collisions by mapping
- *          the string as randomly as possible into 64 bits.
- *      (2) To the extent that the hashes are random, the probability of
- *          a collision can be approximated by the square of the number
- *          of strings divided by 2^64.  For 1 million strings, the
- *          collision probability is about 1 in 16 million.
- *      (3) I expect non-randomness of the distribution to be most evident
- *          for small text strings.  This hash function has been tested
- *          for all 5-character text strings composed of 26 letters,
- *          of which there are 26^5 = 12356630.  There are no hash
- *          collisions for this set.
- * 
- */ -l_ok -l_hashStringToUint64(const char *str, - l_uint64 *phash) -{ -l_uint64 hash, mulp; - - PROCNAME("l_hashStringToUint64"); - - if (phash) *phash = 0; - if (!str || (str[0] == '\0')) - return ERROR_INT("str not defined or empty", procName, 1); - if (!phash) - return ERROR_INT("&hash not defined", procName, 1); - - mulp = 26544357894361247; /* prime, about 1/700 of the max uint64 */ - hash = 104395301; - while (*str) { - hash += (*str++ * mulp) ^ (hash >> 7); /* shift [1...23] are ok */ - } - *phash = hash ^ (hash << 37); - return 0; -} - - -/*! - * \brief l_hashPtToUint64() - * - * \param[in] x, y - * \param[out] phash hash value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This simple hash function has no collisions for
- *          any of 400 million points with x and y up to 20000.
- *      (2) Previously used a much more complicated and slower function:
- *            mulp = 26544357894361;
- *            hash = 104395301;
- *            hash += (x * mulp) ^ (hash >> 5);
- *            hash ^= (hash << 7);
- *            hash += (y * mulp) ^ (hash >> 7);
- *            hash = hash ^ (hash << 11);
- *          Such logical gymnastics to get coverage over the 2^64
- *          values are not required.
- * 
- */ -l_ok -l_hashPtToUint64(l_int32 x, - l_int32 y, - l_uint64 *phash) -{ - PROCNAME("l_hashPtToUint64"); - - if (!phash) - return ERROR_INT("&hash not defined", procName, 1); - - *phash = (l_uint64)(2173249142.3849 * x + 3763193258.6227 * y); - return 0; -} - - -/*! - * \brief l_hashFloat64ToUint64() - * - * \param[in] nbuckets - * \param[in] val - * \param[out] phash hash value - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Simple, fast hash for using dnaHash with 64-bit data
- *          (e.g., sets and histograms).
- *      (2) The resulting hash is called a "key" in a lookup
- *          operation.  The bucket for %val in a dnaHash is simply
- *          found by taking the mod of the hash with the number of
- *          buckets (which is prime).  What gets stored in the
- *          dna in that bucket could depend on use, but for the most
- *          flexibility, we store an index into the associated dna.
- *          This is all that is required for generating either a hash set
- *          or a histogram (an example of a hash map).
- *      (3) For example, to generate a histogram, the histogram dna,
- *          a histogram of unique values aligned with the histogram dna,
- *          and a dnahash hashmap are built.  See l_dnaMakeHistoByHash().
- * 
- */ -l_ok -l_hashFloat64ToUint64(l_int32 nbuckets, - l_float64 val, - l_uint64 *phash) -{ - PROCNAME("l_hashFloatToUint64"); - - if (!phash) - return ERROR_INT("&hash not defined", procName, 1); - *phash = (l_uint64)((21.732491 * nbuckets) * val); - return 0; -} - - -/*---------------------------------------------------------------------* - * Prime finders * - *---------------------------------------------------------------------*/ -/*! - * \brief findNextLargerPrime() - * - * \param[in] start - * \param[out] pprime first prime larger than %start - * \return 0 if OK, 1 on error - */ -l_ok -findNextLargerPrime(l_int32 start, - l_uint32 *pprime) -{ -l_int32 i, is_prime; - - PROCNAME("findNextLargerPrime"); - - if (!pprime) - return ERROR_INT("&prime not defined", procName, 1); - *pprime = 0; - if (start <= 0) - return ERROR_INT("start must be > 0", procName, 1); - - for (i = start + 1; ; i++) { - lept_isPrime(i, &is_prime, NULL); - if (is_prime) { - *pprime = i; - return 0; - } - } - - return ERROR_INT("prime not found!", procName, 1); -} - - -/*! - * \brief lept_isPrime() - * - * \param[in] n 64-bit unsigned - * \param[out] pis_prime 1 if prime, 0 otherwise - * \param[out] pfactor [optional] smallest divisor, or 0 on error - * or if prime - * \return 0 if OK, 1 on error - */ -l_ok -lept_isPrime(l_uint64 n, - l_int32 *pis_prime, - l_uint32 *pfactor) -{ -l_uint32 div; -l_uint64 limit, ratio; - - PROCNAME("lept_isPrime"); - - if (pis_prime) *pis_prime = 0; - if (pfactor) *pfactor = 0; - if (!pis_prime) - return ERROR_INT("&is_prime not defined", procName, 1); - if (n <= 0) - return ERROR_INT("n must be > 0", procName, 1); - - if (n % 2 == 0) { - if (pfactor) *pfactor = 2; - return 0; - } - - limit = (l_uint64)sqrt((l_float64)n); - for (div = 3; div < limit; div += 2) { - ratio = n / div; - if (ratio * div == n) { - if (pfactor) *pfactor = div; - return 0; - } - } - - *pis_prime = 1; - return 0; -} - - -/*---------------------------------------------------------------------* - * Gray code conversion * - *---------------------------------------------------------------------*/ -/*! - * \brief convertIntToGrayCode() - * - * \param[in] val integer value - * \return corresponding gray code value - * - *
- * Notes:
- *      (1) Gray code values corresponding to integers differ by
- *          only one bit transition between successive integers.
- * 
- */ -l_uint32 -convertIntToGrayCode(l_uint32 val) -{ - return (val >> 1) ^ val; -} - - -/*! - * \brief convertGrayCodeToInt() - * - * \param[in] val gray code value - * \return corresponding integer value - */ -l_uint32 -convertGrayCodeToInt(l_uint32 val) -{ -l_uint32 shift; - - for (shift = 1; shift < 32; shift <<= 1) - val ^= val >> shift; - return val; -} - - -/*---------------------------------------------------------------------* - * Leptonica version number * - *---------------------------------------------------------------------*/ -/*! - * \brief getLeptonicaVersion() - * - * Return: string of version number (e.g., 'leptonica-1.74.2') - * - * Notes: - * (1) The caller has responsibility to free the memory. - */ -char * -getLeptonicaVersion(void) -{ -size_t bufsize = 100; - - char *version = (char *)LEPT_CALLOC(bufsize, sizeof(char)); - -#ifdef _MSC_VER - #ifdef _USRDLL - char dllStr[] = "DLL"; - #else - char dllStr[] = "LIB"; - #endif - #ifdef _DEBUG - char debugStr[] = "Debug"; - #else - char debugStr[] = "Release"; - #endif - #ifdef _M_IX86 - char bitStr[] = " x86"; - #elif _M_X64 - char bitStr[] = " x64"; - #else - char bitStr[] = ""; - #endif - snprintf(version, bufsize, "leptonica-%d.%d.%d (%s, %s) [MSC v.%d %s %s%s]", - LIBLEPT_MAJOR_VERSION, LIBLEPT_MINOR_VERSION, LIBLEPT_PATCH_VERSION, - __DATE__, __TIME__, _MSC_VER, dllStr, debugStr, bitStr); - -#else - - snprintf(version, bufsize, "leptonica-%d.%d.%d", LIBLEPT_MAJOR_VERSION, - LIBLEPT_MINOR_VERSION, LIBLEPT_PATCH_VERSION); - -#endif /* _MSC_VER */ - return version; -} - - -/*---------------------------------------------------------------------* - * Timing procs * - *---------------------------------------------------------------------*/ -#if !defined(_WIN32) && !defined(__Fuchsia__) - -#include -#include - -static struct rusage rusage_before; -static struct rusage rusage_after; - -/*! - * \brief startTimer(), stopTimer() - * - * Notes: - * (1) These measure the cpu time elapsed between the two calls: - * startTimer(); - * .... - * lept_stderr( "Elapsed time = %7.3f sec\n", stopTimer()); - */ -void -startTimer(void) -{ - getrusage(RUSAGE_SELF, &rusage_before); -} - -l_float32 -stopTimer(void) -{ -l_int32 tsec, tusec; - - getrusage(RUSAGE_SELF, &rusage_after); - - tsec = rusage_after.ru_utime.tv_sec - rusage_before.ru_utime.tv_sec; - tusec = rusage_after.ru_utime.tv_usec - rusage_before.ru_utime.tv_usec; - return (tsec + ((l_float32)tusec) / 1000000.0); -} - - -/*! - * \brief startTimerNested(), stopTimerNested() - * - * Example of usage: - * - * L_TIMER t1 = startTimerNested(); - * .... - * L_TIMER t2 = startTimerNested(); - * .... - * lept_stderr( "Elapsed time 2 = %7.3f sec\n", stopTimerNested(t2)); - * .... - * lept_stderr( "Elapsed time 1 = %7.3f sec\n", stopTimerNested(t1)); - */ -L_TIMER -startTimerNested(void) -{ -struct rusage *rusage_start; - - rusage_start = (struct rusage *)LEPT_CALLOC(1, sizeof(struct rusage)); - getrusage(RUSAGE_SELF, rusage_start); - return rusage_start; -} - -l_float32 -stopTimerNested(L_TIMER rusage_start) -{ -l_int32 tsec, tusec; -struct rusage rusage_stop; - - getrusage(RUSAGE_SELF, &rusage_stop); - - tsec = rusage_stop.ru_utime.tv_sec - - ((struct rusage *)rusage_start)->ru_utime.tv_sec; - tusec = rusage_stop.ru_utime.tv_usec - - ((struct rusage *)rusage_start)->ru_utime.tv_usec; - LEPT_FREE(rusage_start); - return (tsec + ((l_float32)tusec) / 1000000.0); -} - - -/*! - * \brief l_getCurrentTime() - * - * \param[out] sec [optional] in seconds since birth of Unix - * \param[out] usec [optional] in microseconds since birth of Unix - * \return void - */ -void -l_getCurrentTime(l_int32 *sec, - l_int32 *usec) -{ -struct timeval tv; - - gettimeofday(&tv, NULL); - if (sec) *sec = (l_int32)tv.tv_sec; - if (usec) *usec = (l_int32)tv.tv_usec; - return; -} - -#elif defined(__Fuchsia__) /* resource.h not implemented on Fuchsia. */ - - /* Timer functions are used for testing and debugging, and - * are stubbed out. If they are needed in the future, they - * can be implemented in Fuchsia using the zircon syscall - * zx_object_get_info() in ZX_INFOR_THREAD_STATS mode. */ - -void -startTimer(void) -{ -} - -l_float32 -stopTimer(void) -{ - return 0.0; -} - -L_TIMER -startTimerNested(void) -{ - return NULL; -} - -l_float32 -stopTimerNested(L_TIMER rusage_start) -{ - return 0.0; -} - -void -l_getCurrentTime(l_int32 *sec, - l_int32 *usec) -{ -} - -#else /* _WIN32 : resource.h not implemented under Windows */ - - /* Note: if division by 10^7 seems strange, the time is expressed - * as the number of 100-nanosecond intervals that have elapsed - * since 12:00 A.M. January 1, 1601. */ - -static ULARGE_INTEGER utime_before; -static ULARGE_INTEGER utime_after; - -void -startTimer(void) -{ -#if !WINAPI_FAMILY_APP -HANDLE this_process; -FILETIME start, stop, kernel, user; - - this_process = GetCurrentProcess(); - - GetProcessTimes(this_process, &start, &stop, &kernel, &user); - - utime_before.LowPart = user.dwLowDateTime; - utime_before.HighPart = user.dwHighDateTime; -#endif -} - -l_float32 -stopTimer(void) -{ -#if WINAPI_FAMILY_APP - return 0; -#else -HANDLE this_process; -FILETIME start, stop, kernel, user; -ULONGLONG hnsec; /* in units of hecto-nanosecond (100 ns) intervals */ - - this_process = GetCurrentProcess(); - - GetProcessTimes(this_process, &start, &stop, &kernel, &user); - - utime_after.LowPart = user.dwLowDateTime; - utime_after.HighPart = user.dwHighDateTime; - hnsec = utime_after.QuadPart - utime_before.QuadPart; - return (l_float32)(signed)hnsec / 10000000.0; -#endif -} - -L_TIMER -startTimerNested(void) -{ -#if WINAPI_FAMILY_APP - return 0; -#else -HANDLE this_process; -FILETIME start, stop, kernel, user; -ULARGE_INTEGER *utime_start; - - this_process = GetCurrentProcess(); - - GetProcessTimes (this_process, &start, &stop, &kernel, &user); - - utime_start = (ULARGE_INTEGER *)LEPT_CALLOC(1, sizeof(ULARGE_INTEGER)); - utime_start->LowPart = user.dwLowDateTime; - utime_start->HighPart = user.dwHighDateTime; - return utime_start; -#endif -} - -l_float32 -stopTimerNested(L_TIMER utime_start) -{ -#if WINAPI_FAMILY_APP - return 0; -#else -HANDLE this_process; -FILETIME start, stop, kernel, user; -ULARGE_INTEGER utime_stop; -ULONGLONG hnsec; /* in units of 100 ns intervals */ - - this_process = GetCurrentProcess (); - - GetProcessTimes (this_process, &start, &stop, &kernel, &user); - - utime_stop.LowPart = user.dwLowDateTime; - utime_stop.HighPart = user.dwHighDateTime; - hnsec = utime_stop.QuadPart - ((ULARGE_INTEGER *)utime_start)->QuadPart; - LEPT_FREE(utime_start); - return (l_float32)(signed)hnsec / 10000000.0; -#endif -} - -void -l_getCurrentTime(l_int32 *sec, - l_int32 *usec) -{ -ULARGE_INTEGER utime, birthunix; -FILETIME systemtime; -LONGLONG birthunixhnsec = 116444736000000000; /*in units of 100 ns */ -LONGLONG usecs; - - GetSystemTimeAsFileTime(&systemtime); - utime.LowPart = systemtime.dwLowDateTime; - utime.HighPart = systemtime.dwHighDateTime; - - birthunix.LowPart = (DWORD) birthunixhnsec; - birthunix.HighPart = birthunixhnsec >> 32; - - usecs = (LONGLONG) ((utime.QuadPart - birthunix.QuadPart) / 10); - - if (sec) *sec = (l_int32) (usecs / 1000000); - if (usec) *usec = (l_int32) (usecs % 1000000); - return; -} - -#endif - - -/*! - * \brief startWallTimer() - * - * \return walltimer-ptr - * - *
- * Notes:
- *      (1) These measure the wall clock time  elapsed between the two calls:
- *            L_WALLTIMER *timer = startWallTimer();
- *            ....
- *            lept_stderr( "Elapsed time = %f sec\n", stopWallTimer(&timer);
- *      (2) Note that the timer object is destroyed by stopWallTimer().
- * 
- */ -L_WALLTIMER * -startWallTimer(void) -{ -L_WALLTIMER *timer; - - timer = (L_WALLTIMER *)LEPT_CALLOC(1, sizeof(L_WALLTIMER)); - l_getCurrentTime(&timer->start_sec, &timer->start_usec); - return timer; -} - -/*! - * \brief stopWallTimer() - * - * \param[in,out] ptimer walltimer pointer - * \return time wall time elapsed in seconds - */ -l_float32 -stopWallTimer(L_WALLTIMER **ptimer) -{ -l_int32 tsec, tusec; -L_WALLTIMER *timer; - - PROCNAME("stopWallTimer"); - - if (!ptimer) - return (l_float32)ERROR_FLOAT("&timer not defined", procName, 0.0); - timer = *ptimer; - if (!timer) - return (l_float32)ERROR_FLOAT("timer not defined", procName, 0.0); - - l_getCurrentTime(&timer->stop_sec, &timer->stop_usec); - tsec = timer->stop_sec - timer->start_sec; - tusec = timer->stop_usec - timer->start_usec; - LEPT_FREE(timer); - *ptimer = NULL; - return (tsec + ((l_float32)tusec) / 1000000.0); -} - - -/*! - * \brief l_getFormattedDate() - * - * \return formatted date string, or NULL on error - * - *
- * Notes:
- *      (1) This is used in pdf, in the form specified in section 3.8.2 of
- *          http://partners.adobe.com/public/developer/en/pdf/PDFReference.pdf
- *      (2) Contributed by Dave Bryan.  Works on all platforms.
- * 
- */ -char * -l_getFormattedDate(void) -{ -char buf[128] = "", sep = 'Z'; -l_int32 gmt_offset, relh, relm; -time_t ut, lt; -struct tm Tm; -struct tm *tptr = &Tm; - - ut = time(NULL); - - /* This generates a second "time_t" value by calling "gmtime" to - fill in a "tm" structure expressed as UTC and then calling - "mktime", which expects a "tm" structure expressed as the - local time. The result is a value that is offset from the - value returned by the "time" function by the local UTC offset. - "tm_isdst" is set to -1 to tell "mktime" to determine for - itself whether DST is in effect. This is necessary because - "gmtime" always sets "tm_isdst" to 0, which would tell - "mktime" to presume that DST is not in effect. */ -#ifdef _WIN32 - #ifdef _MSC_VER - gmtime_s(tptr, &ut); - #else /* mingw */ - tptr = gmtime(&ut); - #endif -#else - gmtime_r(&ut, tptr); -#endif - tptr->tm_isdst = -1; - lt = mktime(tptr); - - /* Calls "difftime" to obtain the resulting difference in seconds, - * because "time_t" is an opaque type, per the C standard. */ - gmt_offset = (l_int32) difftime(ut, lt); - if (gmt_offset > 0) - sep = '+'; - else if (gmt_offset < 0) - sep = '-'; - relh = L_ABS(gmt_offset) / 3600; - relm = (L_ABS(gmt_offset) % 3600) / 60; - -#ifdef _WIN32 - #ifdef _MSC_VER - localtime_s(tptr, &ut); - #else /* mingw */ - tptr = localtime(&ut); - #endif -#else - localtime_r(&ut, tptr); -#endif - strftime(buf, sizeof(buf), "%Y%m%d%H%M%S", tptr); - sprintf(buf + 14, "%c%02d'%02d'", sep, relh, relm); - return stringNew(buf); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/utils2.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/utils2.c deleted file mode 100644 index ba01e265..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/utils2.c +++ /dev/null @@ -1,3384 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file utils2.c - *
- *
- *      ------------------------------------------
- *      This file has these utilities:
- *         - safe string operations
- *         - find/replace operations on strings
- *         - read/write between file and memory
- *         - multi-platform file and directory operations
- *         - file name operations
- *      ------------------------------------------
- *
- *       Safe string procs
- *           char      *stringNew()
- *           l_int32    stringCopy()
- *           l_int32    stringCopySegment()
- *           l_int32    stringReplace()
- *           l_int32    stringLength()
- *           l_int32    stringCat()
- *           char      *stringConcatNew()
- *           char      *stringJoin()
- *           l_int32    stringJoinIP()
- *           char      *stringReverse()
- *           char      *strtokSafe()
- *           l_int32    stringSplitOnToken()
- *
- *       Find and replace string and array procs
- *           l_int32    stringCheckForChars()
- *           char      *stringRemoveChars()
- *           char      *stringReplaceEachSubstr()
- *           char      *stringReplaceSubstr()
- *           L_DNA     *stringFindEachSubstr()
- *           l_int32    stringFindSubstr()
- *           l_uint8   *arrayReplaceEachSequence()
- *           L_DNA     *arrayFindEachSequence()
- *           l_int32    arrayFindSequence()
- *
- *       Safe realloc
- *           void      *reallocNew()
- *
- *       Read and write between file and memory
- *           l_uint8   *l_binaryRead()
- *           l_uint8   *l_binaryReadStream()
- *           l_uint8   *l_binaryReadSelect()
- *           l_uint8   *l_binaryReadSelectStream()
- *           l_int32    l_binaryWrite()
- *           l_int32    nbytesInFile()
- *           l_int32    fnbytesInFile()
- *
- *       Copy and compare in memory
- *           l_uint8   *l_binaryCopy()
- *           l_uint8   *l_binaryCompare()
- *
- *       File copy operations
- *           l_int32    fileCopy()
- *           l_int32    fileConcatenate()
- *           l_int32    fileAppendString()
- *
- *       Multi-platform functions for opening file streams
- *           FILE      *fopenReadStream()
- *           FILE      *fopenWriteStream()
- *           FILE      *fopenReadFromMemory()
- *
- *       Opening a windows tmpfile for writing
- *           FILE      *fopenWriteWinTempfile()
- *
- *       Multi-platform functions that avoid C-runtime boundary crossing
- *       with Windows DLLs
- *           FILE      *lept_fopen()
- *           l_int32    lept_fclose()
- *           void       lept_calloc()
- *           void       lept_free()
- *
- *       Multi-platform file system operations in temp directories
- *           l_int32    lept_mkdir()
- *           l_int32    lept_rmdir()
- *           l_int32    lept_direxists()
- *           l_int32    lept_mv()
- *           l_int32    lept_rm_match()
- *           l_int32    lept_rm()
- *           l_int32    lept_rmfile()
- *           l_int32    lept_cp()
- *
- *       Special debug/test function for calling 'system'
- *           void       callSystemDebug()
- *
- *       General file name operations
- *           l_int32    splitPathAtDirectory()
- *           l_int32    splitPathAtExtension()
- *           char      *pathJoin()
- *           char      *appendSubdirs()
- *
- *       Special file name operations
- *           l_int32    convertSepCharsInPath()
- *           char      *genPathname()
- *           l_int32    makeTempDirname()
- *           l_int32    modifyTrailingSlash()
- *           char      *l_makeTempFilename()
- *           l_int32    extractNumberFromFilename()
- *
- *
- *  Notes on multi-platform development
- *  -----------------------------------
- *  This is important:
- *  (1) With the exception of splitPathAtDirectory(), splitPathAtExtension()
-  *     and genPathname(), all input pathnames must have unix separators.
- *  (2) On Windows, when you specify a read or write to "/tmp/...",
- *      the filename is rewritten to use the Windows temp directory:
- *         /tmp  ==>   [Temp]...    (windows)
- *  (3) This filename rewrite, along with the conversion from unix
- *      to windows pathnames, happens in genPathname().
- *  (4) Use fopenReadStream() and fopenWriteStream() to open files,
- *      because these use genPathname() to find the platform-dependent
- *      filenames.  Likewise for l_binaryRead() and l_binaryWrite().
- *  (5) For moving, copying and removing files and directories that are in
- *      subdirectories of /tmp, use the lept_*() file system shell wrappers:
- *         lept_mkdir(), lept_rmdir(), lept_mv(), lept_rm() and lept_cp().
- *  (6) Use the lept_*() C library wrappers.  These work properly on
- *      Windows, where the same DLL must perform complementary operations
- *      on file streams (open/close) and heap memory (malloc/free):
- *         lept_fopen(), lept_fclose(), lept_calloc() and lept_free().
- *  (7) Why read and write files to temp directories?
- *      The library needs the ability to read and write ephemeral
- *      files to default places, both for generating debugging output
- *      and for supporting regression tests.  Applications also need
- *      this ability for debugging.
- *  (8) Why do the pathname rewrite on Windows?
- *      The goal is to have the library, and programs using the library,
- *      run on multiple platforms without changes.  The location of
- *      temporary files depends on the platform as well as the user's
- *      configuration.  Temp files on Windows are in some directory
- *      not known a priori.  To make everything work seamlessly on
- *      Windows, every time you open a file for reading or writing,
- *      use a special function such as fopenReadStream() or
- *      fopenWriteStream(); these call genPathname() to ensure that
- *      if it is a temp file, the correct path is used.  To indicate
- *      that this is a temp file, the application is written with the
- *      root directory of the path in a canonical form: "/tmp".
- *  (9) Why is it that multi-platform directory functions like lept_mkdir()
- *      and lept_rmdir(), as well as associated file functions like
- *      lept_rm(), lept_mv() and lept_cp(), only work in the temp dir?
- *      These functions were designed to provide easy manipulation of
- *      temp files.  The restriction to temp files is for safety -- to
- *      prevent an accidental deletion of important files.  For example,
- *      lept_rmdir() first deletes all files in a specified subdirectory
- *      of temp, and then removes the directory.
- *
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#ifdef _MSC_VER -#include -#include -#define getcwd _getcwd /* fix MSVC warning */ -#else -#include -#endif /* _MSC_VER */ - -#ifdef _WIN32 -#include -#include /* _O_CREAT, ... */ -#include /* _open */ -#include /* _S_IREAD, _S_IWRITE */ -#else -#include /* for stat, mkdir(2) */ -#include -#endif - -#ifdef OS_IOS -#include -#include -#endif - -#include -#include -#include "allheaders.h" - - -/*--------------------------------------------------------------------* - * Safe string operations * - *--------------------------------------------------------------------*/ -/*! - * \brief stringNew() - * - * \param[in] src - * \return dest copy of %src string, or NULL on error - */ -char * -stringNew(const char *src) -{ -l_int32 len; -char *dest; - - PROCNAME("stringNew"); - - if (!src) { - L_WARNING("src not defined\n", procName); - return NULL; - } - - len = strlen(src); - if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL) - return (char *)ERROR_PTR("dest not made", procName, NULL); - - stringCopy(dest, src, len); - return dest; -} - - -/*! - * \brief stringCopy() - * - * \param[in] dest existing byte buffer - * \param[in] src string [optional] can be null - * \param[in] n max number of characters to copy - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Relatively safe wrapper for strncpy, that checks the input,
- *          and does not complain if %src is null or %n < 1.
- *          If %n < 1, this is a no-op.
- *      (2) %dest needs to be at least %n bytes in size.
- *      (3) We don't call strncpy() because valgrind complains about
- *          use of uninitialized values.
- * 
- */ -l_ok -stringCopy(char *dest, - const char *src, - l_int32 n) -{ -l_int32 i; - - PROCNAME("stringCopy"); - - if (!dest) - return ERROR_INT("dest not defined", procName, 1); - if (!src || n < 1) - return 0; - - /* Implementation of strncpy that valgrind doesn't complain about */ - for (i = 0; i < n && src[i] != '\0'; i++) - dest[i] = src[i]; - for (; i < n; i++) - dest[i] = '\0'; - return 0; -} - - -/*! - * \brief stringCopySegment() - * - * - * \param[in] src string - * \param[in] start byte position at start of segment - * \param[in] nbytes number of bytes in the segment; use 0 to go to end - * \return copy of segment, or NULL on error - * - *
- * Notes:
- *      (1) This is a variant of stringNew() that makes a new string
- *          from a segment of the input string.  The segment is specified
- *          by the starting position and the number of bytes.
- *      (2) The start location %start must be within the string %src.
- *      (3) The copy is truncated to the end of the source string.
- *          Use %nbytes = 0 to copy to the end of %src.
- * 
- */ -char * -stringCopySegment(const char *src, - l_int32 start, - l_int32 nbytes) -{ -char *dest; -l_int32 len; - - PROCNAME("stringCopySegment"); - - if (!src) - return (char *)ERROR_PTR("src not defined", procName, NULL); - len = strlen(src); - if (start < 0 || start > len - 1) - return (char *)ERROR_PTR("invalid start", procName, NULL); - if (nbytes <= 0) /* copy to the end */ - nbytes = len - start; - if (start + nbytes > len) /* truncate to the end */ - nbytes = len - start; - if ((dest = (char *)LEPT_CALLOC(nbytes + 1, sizeof(char))) == NULL) - return (char *)ERROR_PTR("dest not made", procName, NULL); - stringCopy(dest, src + start, nbytes); - return dest; -} - - -/*! - * \brief stringReplace() - * - * \param[out] pdest string copy - * \param[in] src [optional] string; can be null - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Frees any existing dest string
- *      (2) Puts a copy of src string in the dest
- *      (3) If either or both strings are null, does something reasonable.
- * 
- */ -l_ok -stringReplace(char **pdest, - const char *src) -{ - PROCNAME("stringReplace"); - - if (!pdest) - return ERROR_INT("pdest not defined", procName, 1); - - if (*pdest) - LEPT_FREE(*pdest); - - if (src) - *pdest = stringNew(src); - else - *pdest = NULL; - return 0; -} - - -/*! - * \brief stringLength() - * - * \param[in] src string can be null or NULL-terminated string - * \param[in] size size of src buffer - * \return length of src in bytes. - * - *
- * Notes:
- *      (1) Safe implementation of strlen that only checks size bytes
- *          for trailing NUL.
- *      (2) Valid returned string lengths are between 0 and size - 1.
- *          If size bytes are checked without finding a NUL byte, then
- *          an error is indicated by returning size.
- * 
- */ -l_int32 -stringLength(const char *src, - size_t size) -{ -l_int32 i; - - PROCNAME("stringLength"); - - if (!src) - return ERROR_INT("src not defined", procName, 0); - if (size < 1) - return 0; - - for (i = 0; i < size; i++) { - if (src[i] == '\0') - return i; - } - return size; /* didn't find a NUL byte */ -} - - -/*! - * \brief stringCat() - * - * \param[in] dest null-terminated byte buffer - * \param[in] size size of dest - * \param[in] src string can be null or NULL-terminated string - * \return number of bytes added to dest; -1 on error - * - *
- * Notes:
- *      (1) Alternative implementation of strncat, that checks the input,
- *          is easier to use (since the size of the dest buffer is specified
- *          rather than the number of bytes to copy), and does not complain
- *          if %src is null.
- *      (2) Never writes past end of dest.
- *      (3) If there is not enough room to append the src, which is an error,
- *          it does nothing.
- *      (4) N.B. The order of 2nd and 3rd args is reversed from that in
- *          strncat, as in the Windows function strcat_s().
- * 
- */ -l_int32 -stringCat(char *dest, - size_t size, - const char *src) -{ -l_int32 i, n; -l_int32 lendest, lensrc; - - PROCNAME("stringCat"); - - if (!dest) - return ERROR_INT("dest not defined", procName, -1); - if (size < 1) - return ERROR_INT("size < 1; too small", procName, -1); - if (!src) - return 0; - - lendest = stringLength(dest, size); - if (lendest == size) - return ERROR_INT("no terminating nul byte", procName, -1); - lensrc = stringLength(src, size); - if (lensrc == 0) - return 0; - n = (lendest + lensrc > size - 1 ? 0 : lensrc); - if (n < 1) - return ERROR_INT("dest too small for append", procName, -1); - - for (i = 0; i < n; i++) - dest[lendest + i] = src[i]; - dest[lendest + n] = '\0'; - return n; -} - - -/*! - * \brief stringConcatNew() - * - * \param[in] first first string in list - * \param[in] ... NULL-terminated list of strings - * \return result new string concatenating the input strings, or - * NULL if first == NULL - * - *
- * Notes:
- *      (1) The last arg in the list of strings must be NULL.
- *      (2) Caller must free the returned string.
- * 
- */ -char * -stringConcatNew(const char *first, ...) -{ -size_t len; -char *result, *ptr; -const char *arg; -va_list args; - - if (!first) return NULL; - - /* Find the length of the output string */ - va_start(args, first); - len = strlen(first); - while ((arg = va_arg(args, const char *)) != NULL) - len += strlen(arg); - va_end(args); - result = (char *)LEPT_CALLOC(len + 1, sizeof(char)); - - /* Concatenate the args */ - va_start(args, first); - ptr = result; - arg = first; - while (*arg) - *ptr++ = *arg++; - while ((arg = va_arg(args, const char *)) != NULL) { - while (*arg) - *ptr++ = *arg++; - } - va_end(args); - return result; -} - - -/*! - * \brief stringJoin() - * - * \param[in] src1 [optional] string; can be null - * \param[in] src2 [optional] string; can be null - * \return concatenated string, or NULL on error - * - *
- * Notes:
- *      (1) This is a safe version of strcat; it makes a new string.
- *      (2) It is not an error if either or both of the strings
- *          are empty, or if either or both of the pointers are null.
- * 
- */ -char * -stringJoin(const char *src1, - const char *src2) -{ -char *dest; -l_int32 srclen1, srclen2, destlen; - - PROCNAME("stringJoin"); - - srclen1 = (src1) ? strlen(src1) : 0; - srclen2 = (src2) ? strlen(src2) : 0; - destlen = srclen1 + srclen2 + 3; - - if ((dest = (char *)LEPT_CALLOC(destlen, sizeof(char))) == NULL) - return (char *)ERROR_PTR("calloc fail for dest", procName, NULL); - - if (src1) - stringCat(dest, destlen, src1); - if (src2) - stringCat(dest, destlen, src2); - return dest; -} - - -/*! - * \brief stringJoinIP() - * - * \param[in,out] psrc1 address of string src1; cannot be on the stack - * \param[in] src2 [optional] string; can be null - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is a safe in-place version of strcat.  The contents of
- *          src1 is replaced by the concatenation of src1 and src2.
- *      (2) It is not an error if either or both of the strings
- *          are empty (""), or if the pointers to the strings (*psrc1, src2)
- *          are null.
- *      (3) src1 should be initialized to null or an empty string
- *          before the first call.  Use one of these:
- *              char *src1 = NULL;
- *              char *src1 = stringNew("");
- *          Then call with:
- *              stringJoinIP(&src1, src2);
- *      (4) This can also be implemented as a macro:
- * \code
- *              #define stringJoinIP(src1, src2) \
- *                  {tmpstr = stringJoin((src1),(src2)); \
- *                  LEPT_FREE(src1); \
- *                  (src1) = tmpstr;}
- * \endcode
- *      (5) Another function to consider for joining many strings is
- *          stringConcatNew().
- * 
- */ -l_ok -stringJoinIP(char **psrc1, - const char *src2) -{ -char *tmpstr; - - PROCNAME("stringJoinIP"); - - if (!psrc1) - return ERROR_INT("&src1 not defined", procName, 1); - - tmpstr = stringJoin(*psrc1, src2); - LEPT_FREE(*psrc1); - *psrc1 = tmpstr; - return 0; -} - - -/*! - * \brief stringReverse() - * - * \param[in] src string - * \return dest newly-allocated reversed string - */ -char * -stringReverse(const char *src) -{ -char *dest; -l_int32 i, len; - - PROCNAME("stringReverse"); - - if (!src) - return (char *)ERROR_PTR("src not defined", procName, NULL); - len = strlen(src); - if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL) - return (char *)ERROR_PTR("calloc fail for dest", procName, NULL); - for (i = 0; i < len; i++) - dest[i] = src[len - 1 - i]; - - return dest; -} - - -/*! - * \brief strtokSafe() - * - * \param[in] cstr input string to be sequentially parsed; - * use NULL after the first call - * \param[in] seps a string of character separators - * \param[out] psaveptr ptr to the next char after - * the last encountered separator - * \return substr a new string that is copied from the previous - * saveptr up to but not including the next - * separator character, or NULL if end of cstr. - * - *
- * Notes:
- *      (1) This is a thread-safe implementation of strtok.
- *      (2) It has the same interface as strtok_r.
- *      (3) It differs from strtok_r in usage in two respects:
- *          (a) the input string is not altered
- *          (b) each returned substring is newly allocated and must
- *              be freed after use.
- *      (4) Let me repeat that.  This is "safe" because the input
- *          string is not altered and because each returned string
- *          is newly allocated on the heap.
- *      (5) It is here because, surprisingly, some C libraries don't
- *          include strtok_r.
- *      (6) Important usage points:
- *          ~ Input the string to be parsed on the first invocation.
- *          ~ Then input NULL after that; the value returned in saveptr
- *            is used in all subsequent calls.
- *      (7) This is only slightly slower than strtok_r.
- * 
- */ -char * -strtokSafe(char *cstr, - const char *seps, - char **psaveptr) -{ -char nextc; -char *start, *substr; -l_int32 istart, i, j, nchars; - - PROCNAME("strtokSafe"); - - if (!seps) - return (char *)ERROR_PTR("seps not defined", procName, NULL); - if (!psaveptr) - return (char *)ERROR_PTR("&saveptr not defined", procName, NULL); - - if (!cstr) { - start = *psaveptr; - } else { - start = cstr; - *psaveptr = NULL; - } - if (!start) /* nothing to do */ - return NULL; - - /* First time, scan for the first non-sep character */ - istart = 0; - if (cstr) { - for (istart = 0;; istart++) { - if ((nextc = start[istart]) == '\0') { - *psaveptr = NULL; /* in case caller doesn't check ret value */ - return NULL; - } - if (!strchr(seps, nextc)) - break; - } - } - - /* Scan through, looking for a sep character; if none is - * found, 'i' will be at the end of the string. */ - for (i = istart;; i++) { - if ((nextc = start[i]) == '\0') - break; - if (strchr(seps, nextc)) - break; - } - - /* Save the substring */ - nchars = i - istart; - substr = (char *)LEPT_CALLOC(nchars + 1, sizeof(char)); - stringCopy(substr, start + istart, nchars); - - /* Look for the next non-sep character. - * If this is the last substring, return a null saveptr. */ - for (j = i;; j++) { - if ((nextc = start[j]) == '\0') { - *psaveptr = NULL; /* no more non-sep characters */ - break; - } - if (!strchr(seps, nextc)) { - *psaveptr = start + j; /* start here on next call */ - break; - } - } - - return substr; -} - - -/*! - * \brief stringSplitOnToken() - * - * \param[in] cstr input string to be split; not altered - * \param[in] seps a string of character separators - * \param[out] phead ptr to copy of the input string, up to - * the first separator token encountered - * \param[out] ptail ptr to copy of the part of the input string - * starting with the first non-separator character - * that occurs after the first separator is found - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The input string is not altered; all split parts are new strings.
- *      (2) The split occurs around the first consecutive sequence of
- *          tokens encountered.
- *      (3) The head goes from the beginning of the string up to
- *          but not including the first token found.
- *      (4) The tail contains the second part of the string, starting
- *          with the first char in that part that is NOT a token.
- *      (5) If no separator token is found, 'head' contains a copy
- *          of the input string and 'tail' is null.
- * 
- */ -l_ok -stringSplitOnToken(char *cstr, - const char *seps, - char **phead, - char **ptail) -{ -char *saveptr; - - PROCNAME("stringSplitOnToken"); - - if (!phead) - return ERROR_INT("&head not defined", procName, 1); - if (!ptail) - return ERROR_INT("&tail not defined", procName, 1); - *phead = *ptail = NULL; - if (!cstr) - return ERROR_INT("cstr not defined", procName, 1); - if (!seps) - return ERROR_INT("seps not defined", procName, 1); - - *phead = strtokSafe(cstr, seps, &saveptr); - if (saveptr) - *ptail = stringNew(saveptr); - return 0; -} - - -/*--------------------------------------------------------------------* - * Find and replace procs * - *--------------------------------------------------------------------*/ -/*! - * \brief stringCheckForChars() - * - * \param[in] src input string; can be of zero length - * \param[in] chars string of chars to be searched for in %src - * \param[out] pfound 1 if any characters are found; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This can be used to sanitize an operation by checking for
- *          special characters that don't belong in a string.
- * 
- */ -l_ok -stringCheckForChars(const char *src, - const char *chars, - l_int32 *pfound) -{ -char ch; -l_int32 i, n; - - PROCNAME("stringCheckForChars"); - - if (!pfound) - return ERROR_INT("&found not defined", procName, 1); - *pfound = FALSE; - if (!src || !chars) - return ERROR_INT("src and chars not both defined", procName, 1); - - n = strlen(src); - for (i = 0; i < n; i++) { - ch = src[i]; - if (strchr(chars, ch)) { - *pfound = TRUE; - break; - } - } - return 0; -} - - -/*! - * \brief stringRemoveChars() - * - * \param[in] src input string; can be of zero length - * \param[in] remchars string of chars to be removed from src - * \return dest string with specified chars removed, or NULL on error - */ -char * -stringRemoveChars(const char *src, - const char *remchars) -{ -char ch; -char *dest; -l_int32 nsrc, i, k; - - PROCNAME("stringRemoveChars"); - - if (!src) - return (char *)ERROR_PTR("src not defined", procName, NULL); - if (!remchars) - return stringNew(src); - - if ((dest = (char *)LEPT_CALLOC(strlen(src) + 1, sizeof(char))) == NULL) - return (char *)ERROR_PTR("dest not made", procName, NULL); - nsrc = strlen(src); - for (i = 0, k = 0; i < nsrc; i++) { - ch = src[i]; - if (!strchr(remchars, ch)) - dest[k++] = ch; - } - - return dest; -} - - -/*! - * \brief stringReplaceEachSubstr() - * - * \param[in] src input string; can be of zero length - * \param[in] sub1 substring to be replaced - * \param[in] sub2 substring to put in; can be "" - * \param[out] pcount [optional] the number of times that sub1 - * is found in src; 0 if not found - * \return dest string with substring replaced, or NULL if the - * substring not found or on error. - * - *
- * Notes:
- *      (1) This is a wrapper for simple string substitution that uses
- *          the more general function arrayReplaceEachSequence().
- *      (2) This finds every non-overlapping occurrence of %sub1 in
- *          %src, and replaces it with %sub2.  By "non-overlapping"
- *          we mean that after it finds each match, it removes the
- *          matching characters, replaces with the substitution string
- *          (if not empty), and continues.  For example, if you replace
- *          'aa' by 'X' in 'baaabbb', you find one match at position 1
- *          and return 'bXabbb'.
- *      (3) To only remove each instance of sub1, use "" for sub2
- *      (4) Returns a copy of %src if sub1 and sub2 are the same.
- *      (5) If the input %src is binary data that can have null characters,
- *          use arrayReplaceEachSequence() directly.
- * 
- */ -char * -stringReplaceEachSubstr(const char *src, - const char *sub1, - const char *sub2, - l_int32 *pcount) -{ -size_t datalen; - - PROCNAME("stringReplaceEachSubstr"); - - if (pcount) *pcount = 0; - if (!src || !sub1 || !sub2) - return (char *)ERROR_PTR("src, sub1, sub2 not all defined", - procName, NULL); - - if (strlen(sub2) > 0) { - return (char *)arrayReplaceEachSequence( - (const l_uint8 *)src, strlen(src), - (const l_uint8 *)sub1, strlen(sub1), - (const l_uint8 *)sub2, strlen(sub2), - &datalen, pcount); - } else { /* empty replacement string; removal only */ - return (char *)arrayReplaceEachSequence( - (const l_uint8 *)src, strlen(src), - (const l_uint8 *)sub1, strlen(sub1), - NULL, 0, &datalen, pcount); - } -} - - -/*! - * \brief stringReplaceSubstr() - * - * \param[in] src input string; can be of zero length - * \param[in] sub1 substring to be replaced - * \param[in] sub2 substring to put in; can be "" - * \param[in,out] ploc [optional] input start location for search; - * returns the loc after replacement - * \param[out] pfound [optional] 1 if sub1 is found; 0 otherwise - * \return dest string with substring replaced, or NULL on error. - * - *
- * Notes:
- *      (1) Replaces the first instance.
- *      (2) To remove sub1 without replacement, use "" for sub2.
- *      (3) Returns a copy of %src if either no instance of %sub1 is found,
- *          or if %sub1 and %sub2 are the same.
- *      (4) If %ploc == NULL, the search will start at the beginning of %src.
- *          If %ploc != NULL, *ploc must be initialized to the byte offset
- *          within %src from which the search starts.  To search the
- *          string from the beginning, set %loc = 0 and input &loc.
- *          After finding %sub1 and replacing it with %sub2, %loc will be
- *          returned as the next position after %sub2 in the output string.
- *      (5) Note that the output string also includes all the characters
- *          from the input string that occur after the single substitution.
- * 
- */ -char * -stringReplaceSubstr(const char *src, - const char *sub1, - const char *sub2, - l_int32 *ploc, - l_int32 *pfound) -{ -const char *ptr; -char *dest; -l_int32 nsrc, nsub1, nsub2, len, npre, loc; - - PROCNAME("stringReplaceSubstr"); - - if (pfound) *pfound = 0; - if (!src || !sub1 || !sub2) - return (char *)ERROR_PTR("src, sub1, sub2 not all defined", - procName, NULL); - - if (ploc) - loc = *ploc; - else - loc = 0; - if (!strcmp(sub1, sub2)) - return stringNew(src); - if ((ptr = strstr(src + loc, sub1)) == NULL) - return stringNew(src); - if (pfound) *pfound = 1; - - nsrc = strlen(src); - nsub1 = strlen(sub1); - nsub2 = strlen(sub2); - len = nsrc + nsub2 - nsub1; - if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL) - return (char *)ERROR_PTR("dest not made", procName, NULL); - npre = ptr - src; - memcpy(dest, src, npre); - strcpy(dest + npre, sub2); - strcpy(dest + npre + nsub2, ptr + nsub1); - if (ploc) *ploc = npre + nsub2; - return dest; -} - - -/*! - * \brief stringFindEachSubstr() - * - * \param[in] src input string; can be of zero length - * \param[in] sub substring to be searched for - * \return dna of offsets where the sequence is found, or NULL if - * none are found or on error - * - *
- * Notes:
- *      (1) This finds every non-overlapping occurrence in %src of %sub.
- *          After it finds each match, it moves forward in %src by the length
- *          of %sub before continuing the search.  So for example,
- *          if you search for the sequence 'aa' in the data 'baaabbb',
- *          you find one match at position 1.
-
- * 
- */ -L_DNA * -stringFindEachSubstr(const char *src, - const char *sub) -{ - PROCNAME("stringFindEachSubstr"); - - if (!src || !sub) - return (L_DNA *)ERROR_PTR("src, sub not both defined", procName, NULL); - - return arrayFindEachSequence((const l_uint8 *)src, strlen(src), - (const l_uint8 *)sub, strlen(sub)); -} - - -/*! - * \brief stringFindSubstr() - * - * \param[in] src input string; can be of zero length - * \param[in] sub substring to be searched for; must not be empty - * \param[out] ploc [optional] location of substring in src - * \return 1 if found; 0 if not found or on error - * - *
- * Notes:
- *      (1) This is a wrapper around strstr().  It finds the first
- *          instance of %sub in %src.  If the substring is not found
- *          and the location is returned, it has the value -1.
- *      (2) Both %src and %sub must be defined, and %sub must have
- *          length of at least 1.
- * 
- */ -l_int32 -stringFindSubstr(const char *src, - const char *sub, - l_int32 *ploc) -{ -const char *ptr; - - PROCNAME("stringFindSubstr"); - - if (ploc) *ploc = -1; - if (!src || !sub) - return ERROR_INT("src and sub not both defined", procName, 0); - if (strlen(sub) == 0) - return ERROR_INT("substring length 0", procName, 0); - if (strlen(src) == 0) - return 0; - - if ((ptr = strstr(src, sub)) == NULL) /* not found */ - return 0; - - if (ploc) - *ploc = ptr - src; - return 1; -} - - -/*! - * \brief arrayReplaceEachSequence() - * - * \param[in] datas source byte array - * \param[in] dataslen length of source data, in bytes - * \param[in] seq subarray of bytes to find in source data - * \param[in] seqlen length of subarray, in bytes - * \param[in] newseq replacement subarray; can be null - * \param[in] newseqlen length of replacement subarray, in bytes - * \param[out] pdatadlen length of dest byte array, in bytes - * \param[out] pcount [optional] the number of times that sub1 - * is found in src; 0 if not found - * \return datad with all all subarrays replaced (or removed) - * - *
- * Notes:
- *      (1) The byte arrays %datas, %seq and %newseq are not C strings,
- *          because they can contain null bytes.  Therefore, for each
- *          we must give the length of the array.
- *      (2) If %newseq == NULL, this just removes all instances of %seq.
- *          Otherwise, it replaces every non-overlapping occurrence of
- *          %seq in %datas with %newseq. A new array %datad and its
- *          size are returned.  See arrayFindEachSequence() for more
- *          details on finding non-overlapping occurrences.
- *      (3) If no instances of %seq are found, this returns a copy of %datas.
- *      (4) The returned %datad is null terminated.
- *      (5) Can use stringReplaceEachSubstr() if using C strings.
- * 
- */ -l_uint8 * -arrayReplaceEachSequence(const l_uint8 *datas, - size_t dataslen, - const l_uint8 *seq, - size_t seqlen, - const l_uint8 *newseq, - size_t newseqlen, - size_t *pdatadlen, - l_int32 *pcount) -{ -l_uint8 *datad; -size_t newsize; -l_int32 n, i, j, di, si, index, incr; -L_DNA *da; - - PROCNAME("arrayReplaceEachSequence"); - - if (pcount) *pcount = 0; - if (!datas || !seq) - return (l_uint8 *)ERROR_PTR("datas & seq not both defined", - procName, NULL); - if (!pdatadlen) - return (l_uint8 *)ERROR_PTR("&datadlen not defined", procName, NULL); - *pdatadlen = 0; - - /* Identify the locations of the sequence. If there are none, - * return a copy of %datas. */ - if ((da = arrayFindEachSequence(datas, dataslen, seq, seqlen)) == NULL) { - *pdatadlen = dataslen; - return l_binaryCopy(datas, dataslen); - } - - /* Allocate the output data; insure null termination */ - n = l_dnaGetCount(da); - if (pcount) *pcount = n; - if (!newseq) newseqlen = 0; - newsize = dataslen + n * (newseqlen - seqlen) + 4; - if ((datad = (l_uint8 *)LEPT_CALLOC(newsize, sizeof(l_uint8))) == NULL) { - l_dnaDestroy(&da); - return (l_uint8 *)ERROR_PTR("datad not made", procName, NULL); - } - - /* Replace each sequence instance with a new sequence */ - l_dnaGetIValue(da, 0, &si); - for (i = 0, di = 0, index = 0; i < dataslen; i++) { - if (i == si) { - index++; - if (index < n) { - l_dnaGetIValue(da, index, &si); - incr = L_MIN(seqlen, si - i); /* amount to remove from datas */ - } else { - incr = seqlen; - } - i += incr - 1; /* jump over the matched sequence in datas */ - if (newseq) { /* add new sequence to datad */ - for (j = 0; j < newseqlen; j++) - datad[di++] = newseq[j]; - } - } else { - datad[di++] = datas[i]; - } - } - - *pdatadlen = di; - l_dnaDestroy(&da); - return datad; -} - - -/*! - * \brief arrayFindEachSequence() - * - * \param[in] data byte array - * \param[in] datalen length of data, in bytes - * \param[in] sequence subarray of bytes to find in data - * \param[in] seqlen length of sequence, in bytes - * \return dna of offsets where the sequence is found, or NULL if - * none are found or on error - * - *
- * Notes:
- *      (1) The byte arrays %data and %sequence are not C strings,
- *          because they can contain null bytes.  Therefore, for each
- *          we must give the length of the array.
- *      (2) This finds every non-overlapping occurrence in %data of %sequence.
- *          After it finds each match, it moves forward by the length
- *          of the sequence before continuing the search.  So for example,
- *          if you search for the sequence 'aa' in the data 'baaabbb',
- *          you find one match at position 1.
- * 
- */ -L_DNA * -arrayFindEachSequence(const l_uint8 *data, - size_t datalen, - const l_uint8 *sequence, - size_t seqlen) -{ -l_int32 start, offset, realoffset, found; -L_DNA *da; - - PROCNAME("arrayFindEachSequence"); - - if (!data || !sequence) - return (L_DNA *)ERROR_PTR("data & sequence not both defined", - procName, NULL); - - da = l_dnaCreate(0); - start = 0; - while (1) { - arrayFindSequence(data + start, datalen - start, sequence, seqlen, - &offset, &found); - if (found == FALSE) - break; - - realoffset = start + offset; - l_dnaAddNumber(da, realoffset); - start = realoffset + seqlen; - if (start >= datalen) - break; - } - - if (l_dnaGetCount(da) == 0) - l_dnaDestroy(&da); - return da; -} - - -/*! - * \brief arrayFindSequence() - * - * \param[in] data byte array - * \param[in] datalen length of data, in bytes - * \param[in] sequence subarray of bytes to find in data - * \param[in] seqlen length of sequence, in bytes - * \param[out] poffset offset from beginning of - * data where the sequence begins - * \param[out] pfound 1 if sequence is found; 0 otherwise - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The byte arrays 'data' and 'sequence' are not C strings,
- *          because they can contain null bytes.  Therefore, for each
- *          we must give the length of the array.
- *      (2) This searches for the first occurrence in %data of %sequence,
- *          which consists of %seqlen bytes.  The parameter %seqlen
- *          must not exceed the actual length of the %sequence byte array.
- *      (3) If the sequence is not found, the offset will be 0, so you
- *          must check %found.
- * 
- */ -l_ok -arrayFindSequence(const l_uint8 *data, - size_t datalen, - const l_uint8 *sequence, - size_t seqlen, - l_int32 *poffset, - l_int32 *pfound) -{ -l_int32 i, j, found, lastpos; - - PROCNAME("arrayFindSequence"); - - if (poffset) *poffset = 0; - if (pfound) *pfound = FALSE; - if (!data || !sequence) - return ERROR_INT("data & sequence not both defined", procName, 1); - if (!poffset || !pfound) - return ERROR_INT("&offset and &found not defined", procName, 1); - - lastpos = datalen - seqlen + 1; - found = FALSE; - for (i = 0; i < lastpos; i++) { - for (j = 0; j < seqlen; j++) { - if (data[i + j] != sequence[j]) - break; - if (j == seqlen - 1) - found = TRUE; - } - if (found == TRUE) - break; - } - - if (found == TRUE) { - *poffset = i; - *pfound = TRUE; - } - return 0; -} - - -/*--------------------------------------------------------------------* - * Safe realloc * - *--------------------------------------------------------------------*/ -/*! - * \brief reallocNew() - * - * \param[in,out] pindata nulls indata before reallocing - * \param[in] oldsize size of input data to be copied, in bytes - * \param[in] newsize size of buffer to be reallocated in bytes - * \return ptr to new data, or NULL on error - * - * Action: !N.B. 3) and (4! - * 1 Allocates memory, initialized to 0 - * 2 Copies as much of the input data as possible - * to the new block, truncating the copy if necessary - * 3 Frees the input data - * 4 Zeroes the input data ptr - * - *
- * Notes:
- *      (1) If newsize <=0, just frees input data and nulls ptr
- *      (2) If input data is null, just callocs new memory
- *      (3) This differs from realloc in that it always allocates
- *          new memory (if newsize > 0) and initializes it to 0,
- *          it requires the amount of old data to be copied,
- *          and it takes the address of the input ptr and
- *          nulls the handle.
- * 
- */ -void * -reallocNew(void **pindata, - l_int32 oldsize, - l_int32 newsize) -{ -l_int32 minsize; -void *indata; -void *newdata; - - PROCNAME("reallocNew"); - - if (!pindata) - return ERROR_PTR("input data not defined", procName, NULL); - indata = *pindata; - - if (newsize <= 0) { /* nonstandard usage */ - if (indata) { - LEPT_FREE(indata); - *pindata = NULL; - } - return NULL; - } - - if (!indata) { /* nonstandard usage */ - if ((newdata = (void *)LEPT_CALLOC(1, newsize)) == NULL) - return ERROR_PTR("newdata not made", procName, NULL); - return newdata; - } - - /* Standard usage */ - if ((newdata = (void *)LEPT_CALLOC(1, newsize)) == NULL) - return ERROR_PTR("newdata not made", procName, NULL); - minsize = L_MIN(oldsize, newsize); - memcpy(newdata, indata, minsize); - LEPT_FREE(indata); - *pindata = NULL; - - return newdata; -} - - -/*--------------------------------------------------------------------* - * Read and write between file and memory * - *--------------------------------------------------------------------*/ -/*! - * \brief l_binaryRead() - * - * \param[in] filename - * \param[out] pnbytes number of bytes read - * \return data, or NULL on error - */ -l_uint8 * -l_binaryRead(const char *filename, - size_t *pnbytes) -{ -l_uint8 *data; -FILE *fp; - - PROCNAME("l_binaryRead"); - - if (!pnbytes) - return (l_uint8 *)ERROR_PTR("pnbytes not defined", procName, NULL); - *pnbytes = 0; - if (!filename) - return (l_uint8 *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (l_uint8 *)ERROR_PTR("file stream not opened", procName, NULL); - data = l_binaryReadStream(fp, pnbytes); - fclose(fp); - return data; -} - - -/*! - * \brief l_binaryReadStream() - * - * \param[in] fp file stream opened to read; can be stdin - * \param[out] pnbytes number of bytes read - * \return null-terminated array, or NULL on error; reading 0 bytes - * is not an error - * - *
- * Notes:
- *      (1) The returned array is terminated with a null byte so that it can
- *          be used to read ascii data from a file into a proper C string.
- *      (2) This can be used to capture data that is piped in via stdin,
- *          because it does not require seeking within the file.
- *      (3) For example, you can read an image from stdin into memory
- *          using shell redirection, with one of these shell commands:
- * \code
- *             cat  | readprog
- *             readprog < 
- * \endcode
- *          where readprog is:
- * \code
- *             l_uint8 *data = l_binaryReadStream(stdin, &nbytes);
- *             Pix *pix = pixReadMem(data, nbytes);
- * \endcode
- * 
- */ -l_uint8 * -l_binaryReadStream(FILE *fp, - size_t *pnbytes) -{ -l_uint8 *data; -l_int32 seekable, navail, nadd, nread; -L_BBUFFER *bb; - - PROCNAME("l_binaryReadStream"); - - if (!pnbytes) - return (l_uint8 *)ERROR_PTR("&nbytes not defined", procName, NULL); - *pnbytes = 0; - if (!fp) - return (l_uint8 *)ERROR_PTR("fp not defined", procName, NULL); - - /* Test if the stream is seekable, by attempting to seek to - * the start of data. This is a no-op. If it is seekable, use - * l_binaryReadSelectStream() to determine the size of the - * data to be read in advance. */ - seekable = (ftell(fp) == 0) ? 1 : 0; - if (seekable) - return l_binaryReadSelectStream(fp, 0, 0, pnbytes); - - /* If it is not seekable, use the bbuffer to realloc memory - * as needed during reading. */ - bb = bbufferCreate(NULL, 4096); - while (1) { - navail = bb->nalloc - bb->n; - if (navail < 4096) { - nadd = L_MAX(bb->nalloc, 4096); - bbufferExtendArray(bb, nadd); - } - nread = fread((void *)(bb->array + bb->n), 1, 4096, fp); - bb->n += nread; - if (nread != 4096) break; - } - - /* Copy the data to a new array sized for the data, because - * the bbuffer array can be nearly twice the size we need. */ - if ((data = (l_uint8 *)LEPT_CALLOC(bb->n + 1, sizeof(l_uint8))) != NULL) { - memcpy(data, bb->array, bb->n); - *pnbytes = bb->n; - } else { - L_ERROR("calloc fail for data\n", procName); - } - - bbufferDestroy(&bb); - return data; -} - - -/*! - * \brief l_binaryReadSelect() - * - * \param[in] filename - * \param[in] start first byte to read - * \param[in] nbytes number of bytes to read; use 0 to read to end of file - * \param[out] pnread number of bytes actually read - * \return data, or NULL on error - * - *
- * Notes:
- *      (1) The returned array is terminated with a null byte so that it can
- *          be used to read ascii data from a file into a proper C string.
- * 
- */ -l_uint8 * -l_binaryReadSelect(const char *filename, - size_t start, - size_t nbytes, - size_t *pnread) -{ -l_uint8 *data; -FILE *fp; - - PROCNAME("l_binaryReadSelect"); - - if (!pnread) - return (l_uint8 *)ERROR_PTR("pnread not defined", procName, NULL); - *pnread = 0; - if (!filename) - return (l_uint8 *)ERROR_PTR("filename not defined", procName, NULL); - - if ((fp = fopenReadStream(filename)) == NULL) - return (l_uint8 *)ERROR_PTR("file stream not opened", procName, NULL); - data = l_binaryReadSelectStream(fp, start, nbytes, pnread); - fclose(fp); - return data; -} - - -/*! - * \brief l_binaryReadSelectStream() - * - * \param[in] fp file stream - * \param[in] start first byte to read - * \param[in] nbytes number of bytes to read; use 0 to read to end of file - * \param[out] pnread number of bytes actually read - * \return null-terminated array, or NULL on error; reading 0 bytes - * is not an error - * - *
- * Notes:
- *      (1) The returned array is terminated with a null byte so that it can
- *          be used to read ascii data from a file into a proper C string.
- *          If the file to be read is empty and %start == 0, an array
- *          with a single null byte is returned.
- *      (2) Side effect: the stream pointer is re-positioned to the
- *          beginning of the file.
- * 
- */ -l_uint8 * -l_binaryReadSelectStream(FILE *fp, - size_t start, - size_t nbytes, - size_t *pnread) -{ -l_uint8 *data; -size_t bytesleft, bytestoread, nread, filebytes; - - PROCNAME("l_binaryReadSelectStream"); - - if (!pnread) - return (l_uint8 *)ERROR_PTR("&nread not defined", procName, NULL); - *pnread = 0; - if (!fp) - return (l_uint8 *)ERROR_PTR("stream not defined", procName, NULL); - - /* Verify and adjust the parameters if necessary */ - fseek(fp, 0, SEEK_END); /* EOF */ - filebytes = ftell(fp); - fseek(fp, 0, SEEK_SET); - if (start > filebytes) { - L_ERROR("start = %zu but filebytes = %zu\n", procName, - start, filebytes); - return NULL; - } - if (filebytes == 0) /* start == 0; nothing to read; return null byte */ - return (l_uint8 *)LEPT_CALLOC(1, 1); - bytesleft = filebytes - start; /* greater than 0 */ - if (nbytes == 0) nbytes = bytesleft; - bytestoread = (bytesleft >= nbytes) ? nbytes : bytesleft; - - /* Read the data */ - if ((data = (l_uint8 *)LEPT_CALLOC(1, bytestoread + 1)) == NULL) - return (l_uint8 *)ERROR_PTR("calloc fail for data", procName, NULL); - fseek(fp, start, SEEK_SET); - nread = fread(data, 1, bytestoread, fp); - if (nbytes != nread) - L_INFO("%zu bytes requested; %zu bytes read\n", procName, - nbytes, nread); - *pnread = nread; - fseek(fp, 0, SEEK_SET); - return data; -} - - -/*! - * \brief l_binaryWrite() - * - * \param[in] filename output file - * \param[in] operation "w" for write; "a" for append - * \param[in] data binary data to be written - * \param[in] nbytes size of data array - * \return 0 if OK; 1 on error - */ -l_ok -l_binaryWrite(const char *filename, - const char *operation, - const void *data, - size_t nbytes) -{ -char actualOperation[20]; -FILE *fp; - - PROCNAME("l_binaryWrite"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!operation) - return ERROR_INT("operation not defined", procName, 1); - if (!data) - return ERROR_INT("data not defined", procName, 1); - if (nbytes <= 0) - return ERROR_INT("nbytes must be > 0", procName, 1); - - if (strcmp(operation, "w") && strcmp(operation, "a")) - return ERROR_INT("operation not one of {'w','a'}", procName, 1); - - /* The 'b' flag to fopen() is ignored for all POSIX - * conforming systems. However, Windows needs the 'b' flag. */ - stringCopy(actualOperation, operation, 2); - stringCat(actualOperation, 20, "b"); - - if ((fp = fopenWriteStream(filename, actualOperation)) == NULL) - return ERROR_INT("stream not opened", procName, 1); - fwrite(data, 1, nbytes, fp); - fclose(fp); - return 0; -} - - -/*! - * \brief nbytesInFile() - * - * \param[in] filename - * \return nbytes in file; 0 on error - */ -size_t -nbytesInFile(const char *filename) -{ -size_t nbytes; -FILE *fp; - - PROCNAME("nbytesInFile"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 0); - if ((fp = fopenReadStream(filename)) == NULL) - return ERROR_INT("stream not opened", procName, 0); - nbytes = fnbytesInFile(fp); - fclose(fp); - return nbytes; -} - - -/*! - * \brief fnbytesInFile() - * - * \param[in] fp file stream - * \return nbytes in file; 0 on error - */ -size_t -fnbytesInFile(FILE *fp) -{ -l_int64 pos, nbytes; - - PROCNAME("fnbytesInFile"); - - if (!fp) - return ERROR_INT("stream not open", procName, 0); - - pos = ftell(fp); /* initial position */ - if (pos < 0) - return ERROR_INT("seek position must be > 0", procName, 0); - fseek(fp, 0, SEEK_END); /* EOF */ - nbytes = ftell(fp); - fseek(fp, pos, SEEK_SET); /* back to initial position */ - return nbytes; -} - - -/*--------------------------------------------------------------------* - * Copy and compare in memory * - *--------------------------------------------------------------------*/ -/*! - * \brief l_binaryCopy() - * - * \param[in] datas - * \param[in] size of data array - * \return datad on heap, or NULL on error - * - *
- * Notes:
- *      (1) We add 4 bytes to the zeroed output because in some cases
- *          (e.g., string handling) it is important to have the data
- *          be null terminated.  This guarantees that after the memcpy,
- *          the result is automatically null terminated.
- * 
- */ -l_uint8 * -l_binaryCopy(const l_uint8 *datas, - size_t size) -{ -l_uint8 *datad; - - PROCNAME("l_binaryCopy"); - - if (!datas) - return (l_uint8 *)ERROR_PTR("datas not defined", procName, NULL); - - if ((datad = (l_uint8 *)LEPT_CALLOC(size + 4, sizeof(l_uint8))) == NULL) - return (l_uint8 *)ERROR_PTR("datad not made", procName, NULL); - memcpy(datad, datas, size); - return datad; -} - - -l_ok -l_binaryCompare(const l_uint8 *data1, - size_t size1, - const l_uint8 *data2, - size_t size2, - l_int32 *psame) -{ -l_int32 i; - - PROCNAME("l_binaryCompare"); - - if (!psame) - return ERROR_INT("&same not defined", procName, 1); - *psame = FALSE; - if (!data1 || !data2) - return ERROR_INT("data1 and data2 not both defined", procName, 1); - if (size1 != size2) return 0; - for (i = 0; i < size1; i++) { - if (data1[i] != data2[i]) - return 0; - } - *psame = TRUE; - return 0; -} - -/*--------------------------------------------------------------------* - * File copy operations * - *--------------------------------------------------------------------*/ -/*! - * \brief fileCopy() - * - * \param[in] srcfile copy from this file - * \param[in] newfile copy to this file - * \return 0 if OK, 1 on error - */ -l_ok -fileCopy(const char *srcfile, - const char *newfile) -{ -l_int32 ret; -size_t nbytes; -l_uint8 *data; - - PROCNAME("fileCopy"); - - if (!srcfile) - return ERROR_INT("srcfile not defined", procName, 1); - if (!newfile) - return ERROR_INT("newfile not defined", procName, 1); - - if ((data = l_binaryRead(srcfile, &nbytes)) == NULL) - return ERROR_INT("data not returned", procName, 1); - ret = l_binaryWrite(newfile, "w", data, nbytes); - LEPT_FREE(data); - return ret; -} - - -/*! - * \brief fileConcatenate() - * - * \param[in] srcfile append data from this file - * \param[in] destfile add data to this file - * \return 0 if OK, 1 on error - */ -l_ok -fileConcatenate(const char *srcfile, - const char *destfile) -{ -size_t nbytes; -l_uint8 *data; - - PROCNAME("fileConcatenate"); - - if (!srcfile) - return ERROR_INT("srcfile not defined", procName, 1); - if (!destfile) - return ERROR_INT("destfile not defined", procName, 1); - - data = l_binaryRead(srcfile, &nbytes); - l_binaryWrite(destfile, "a", data, nbytes); - LEPT_FREE(data); - return 0; -} - - -/*! - * \brief fileAppendString() - * - * \param[in] filename - * \param[in] str string to append to file - * \return 0 if OK, 1 on error - */ -l_ok -fileAppendString(const char *filename, - const char *str) -{ -FILE *fp; - - PROCNAME("fileAppendString"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!str) - return ERROR_INT("str not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "a")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - fprintf(fp, "%s", str); - fclose(fp); - return 0; -} - - -/*--------------------------------------------------------------------* - * Multi-platform functions for opening file streams * - *--------------------------------------------------------------------*/ -/*! - * \brief fopenReadStream() - * - * \param[in] filename - * \return stream, or NULL on error - * - *
- * Notes:
- *      (1) This should be used whenever you want to run fopen() to
- *          read from a stream.  Never call fopen() directory.
- *      (2) This handles the temp directory pathname conversion on windows:
- *              /tmp  ==>  [Windows Temp directory]
- * 
- */ -FILE * -fopenReadStream(const char *filename) -{ -char *fname, *tail; -FILE *fp; - - PROCNAME("fopenReadStream"); - - if (!filename) - return (FILE *)ERROR_PTR("filename not defined", procName, NULL); - - /* Try input filename */ - fname = genPathname(filename, NULL); - fp = fopen(fname, "rb"); - LEPT_FREE(fname); - if (fp) return fp; - - /* Else, strip directory and try locally */ - splitPathAtDirectory(filename, NULL, &tail); - fp = fopen(tail, "rb"); - LEPT_FREE(tail); - - if (!fp) - return (FILE *)ERROR_PTR("file not found", procName, NULL); - return fp; -} - - -/*! - * \brief fopenWriteStream() - * - * \param[in] filename - * \param[in] modestring - * \return stream, or NULL on error - * - *
- * Notes:
- *      (1) This should be used whenever you want to run fopen() to
- *          write or append to a stream.  Never call fopen() directory.
- *      (2) This handles the temp directory pathname conversion on windows:
- *              /tmp  ==>  [Windows Temp directory]
- * 
- */ -FILE * -fopenWriteStream(const char *filename, - const char *modestring) -{ -char *fname; -FILE *fp; - - PROCNAME("fopenWriteStream"); - - if (!filename) - return (FILE *)ERROR_PTR("filename not defined", procName, NULL); - - fname = genPathname(filename, NULL); - fp = fopen(fname, modestring); - LEPT_FREE(fname); - if (!fp) - return (FILE *)ERROR_PTR("stream not opened", procName, NULL); - return fp; -} - - -/*! - * \brief fopenReadFromMemory() - * - * \param[in] data, size - * \return file stream, or NULL on error - * - *
- * Notes:
- *      (1) Work-around if fmemopen() not available.
- *      (2) Windows tmpfile() writes into the root C:\ directory, which
- *          requires admin privileges.  This also works around that.
- * 
- */ -FILE * -fopenReadFromMemory(const l_uint8 *data, - size_t size) -{ -FILE *fp; - - PROCNAME("fopenReadFromMemory"); - - if (!data) - return (FILE *)ERROR_PTR("data not defined", procName, NULL); - -#if HAVE_FMEMOPEN - if ((fp = fmemopen((void *)data, size, "rb")) == NULL) - return (FILE *)ERROR_PTR("stream not opened", procName, NULL); -#else /* write to tmp file */ - L_INFO("work-around: writing to a temp file\n", procName); - #ifdef _WIN32 - if ((fp = fopenWriteWinTempfile()) == NULL) - return (FILE *)ERROR_PTR("tmpfile stream not opened", procName, NULL); - #else - if ((fp = tmpfile()) == NULL) - return (FILE *)ERROR_PTR("tmpfile stream not opened", procName, NULL); - #endif /* _WIN32 */ - fwrite(data, 1, size, fp); - rewind(fp); -#endif /* HAVE_FMEMOPEN */ - - return fp; -} - - -/*--------------------------------------------------------------------* - * Opening a windows tmpfile for writing * - *--------------------------------------------------------------------*/ -/*! - * \brief fopenWriteWinTempfile() - * - * \return file stream, or NULL on error - * - *
- * Notes:
- *      (1) The Windows version of tmpfile() writes into the root
- *          C:\ directory, which requires admin privileges.  This
- *          function provides an alternative implementation.
- * 
- */ -FILE * -fopenWriteWinTempfile(void) -{ -#ifdef _WIN32 -l_int32 handle; -FILE *fp; -char *filename; - - PROCNAME("fopenWriteWinTempfile"); - - if ((filename = l_makeTempFilename()) == NULL) { - L_ERROR("l_makeTempFilename failed, %s\n", procName, strerror(errno)); - return NULL; - } - - handle = _open(filename, _O_CREAT | _O_RDWR | _O_SHORT_LIVED | - _O_TEMPORARY | _O_BINARY, _S_IREAD | _S_IWRITE); - lept_free(filename); - if (handle == -1) { - L_ERROR("_open failed, %s\n", procName, strerror(errno)); - return NULL; - } - - if ((fp = _fdopen(handle, "r+b")) == NULL) { - L_ERROR("_fdopen failed, %s\n", procName, strerror(errno)); - return NULL; - } - - return fp; -#else - return NULL; -#endif /* _WIN32 */ -} - - -/*--------------------------------------------------------------------* - * Multi-platform functions that avoid C-runtime boundary * - * crossing for applications with Windows DLLs * - *--------------------------------------------------------------------*/ -/* - * Problems arise when pointers to streams and data are passed - * between two Windows DLLs that have been generated with different - * C runtimes. To avoid this, leptonica provides wrappers for - * several C library calls. - */ -/*! - * \brief lept_fopen() - * - * \param[in] filename - * \param[in] mode same as for fopen(); e.g., "rb" - * \return stream or NULL on error - * - *
- * Notes:
- *      (1) This must be used by any application that passes
- *          a file handle to a leptonica Windows DLL.
- * 
- */ -FILE * -lept_fopen(const char *filename, - const char *mode) -{ - PROCNAME("lept_fopen"); - - if (!filename) - return (FILE *)ERROR_PTR("filename not defined", procName, NULL); - if (!mode) - return (FILE *)ERROR_PTR("mode not defined", procName, NULL); - - if (stringFindSubstr(mode, "r", NULL)) - return fopenReadStream(filename); - else - return fopenWriteStream(filename, mode); -} - - -/*! - * \brief lept_fclose() - * - * \param[in] fp file stream - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This should be used by any application that accepts
- *          a file handle generated by a leptonica Windows DLL.
- * 
- */ -l_ok -lept_fclose(FILE *fp) -{ - PROCNAME("lept_fclose"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - - return fclose(fp); -} - - -/*! - * \brief lept_calloc() - * - * \param[in] nmemb number of members - * \param[in] size of each member - * \return void ptr, or NULL on error - * - *
- * Notes:
- *      (1) For safety with windows DLLs, this can be used in conjunction
- *          with lept_free() to avoid C-runtime boundary problems.
- *          Just use these two functions throughout your application.
- * 
- */ -void * -lept_calloc(size_t nmemb, - size_t size) -{ - if (nmemb <= 0 || size <= 0) - return NULL; - return LEPT_CALLOC(nmemb, size); -} - - -/*! - * \brief lept_free() - * - * \param[in] ptr - * - *
- * Notes:
- *      (1) This should be used by any application that accepts
- *          heap data allocated by a leptonica Windows DLL.
- * 
- */ -void -lept_free(void *ptr) -{ - if (!ptr) return; - LEPT_FREE(ptr); - return; -} - - -/*--------------------------------------------------------------------* - * Multi-platform file system operations * - * [ These only write to /tmp or its subdirectories ] * - *--------------------------------------------------------------------*/ -/*! - * \brief lept_mkdir() - * - * \param[in] subdir of /tmp or its equivalent on Windows - * \return 0 on success, non-zero on failure - * - *
- * Notes:
- *      (1) %subdir is a partial path that can consist of one or more
- *          directories.
- *      (2) This makes any subdirectories of /tmp that are required.
- *      (3) The root temp directory is:
- *            /tmp    (unix)  [default]
- *            [Temp]  (windows)
- * 
- */ -l_int32 -lept_mkdir(const char *subdir) -{ -char *dir, *tmpdir; -l_int32 i, n; -l_int32 ret = 0; -SARRAY *sa; -#ifdef _WIN32 -l_uint32 attributes; -#endif /* _WIN32 */ - - PROCNAME("lept_mkdir"); - - if (!LeptDebugOK) { - L_INFO("making named temp subdirectory %s is disabled\n", - procName, subdir); - return 0; - } - - if (!subdir) - return ERROR_INT("subdir not defined", procName, 1); - if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/')) - return ERROR_INT("subdir not an actual subdirectory", procName, 1); - - sa = sarrayCreate(0); - sarraySplitString(sa, subdir, "/"); - n = sarrayGetCount(sa); - dir = genPathname("/tmp", NULL); - /* Make sure the tmp directory exists */ -#ifndef _WIN32 - ret = mkdir(dir, 0777); -#else - attributes = GetFileAttributes(dir); - if (attributes == INVALID_FILE_ATTRIBUTES) - ret = (CreateDirectory(dir, NULL) ? 0 : 1); -#endif - /* Make all the subdirectories */ - for (i = 0; i < n; i++) { - tmpdir = pathJoin(dir, sarrayGetString(sa, i, L_NOCOPY)); -#ifndef _WIN32 - ret += mkdir(tmpdir, 0777); -#else - if (CreateDirectory(tmpdir, NULL) == 0) - ret += (GetLastError () != ERROR_ALREADY_EXISTS); -#endif - LEPT_FREE(dir); - dir = tmpdir; - } - LEPT_FREE(dir); - sarrayDestroy(&sa); - if (ret > 0) - L_ERROR("failure to create %d directories\n", procName, ret); - return ret; -} - - -/*! - * \brief lept_rmdir() - * - * \param[in] subdir of /tmp or its equivalent on Windows - * \return 0 on success, non-zero on failure - * - *
- * Notes:
- *      (1) %subdir is a partial path that can consist of one or more
- *          directories.
- *      (2) This removes all files from the specified subdirectory of
- *          the root temp directory:
- *            /tmp    (unix)
- *            [Temp]  (windows)
- *          and then removes the subdirectory.
- *      (3) The combination
- *            lept_rmdir(subdir);
- *            lept_mkdir(subdir);
- *          is guaranteed to give you an empty subdirectory.
- * 
- */ -l_int32 -lept_rmdir(const char *subdir) -{ -char *dir, *realdir, *fname, *fullname; -l_int32 exists, ret, i, nfiles; -SARRAY *sa; -#ifdef _WIN32 -char *newpath; -#endif /* _WIN32 */ - - PROCNAME("lept_rmdir"); - - if (!subdir) - return ERROR_INT("subdir not defined", procName, 1); - if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/')) - return ERROR_INT("subdir not an actual subdirectory", procName, 1); - - /* Find the temp subdirectory */ - dir = pathJoin("/tmp", subdir); - if (!dir) - return ERROR_INT("directory name not made", procName, 1); - lept_direxists(dir, &exists); - if (!exists) { /* fail silently */ - LEPT_FREE(dir); - return 0; - } - - /* List all the files in that directory */ - if ((sa = getFilenamesInDirectory(dir)) == NULL) { - L_ERROR("directory %s does not exist!\n", procName, dir); - LEPT_FREE(dir); - return 1; - } - nfiles = sarrayGetCount(sa); - - for (i = 0; i < nfiles; i++) { - fname = sarrayGetString(sa, i, L_NOCOPY); - fullname = genPathname(dir, fname); - remove(fullname); - LEPT_FREE(fullname); - } - -#ifndef _WIN32 - realdir = genPathname("/tmp", subdir); - ret = rmdir(realdir); - LEPT_FREE(realdir); -#else - newpath = genPathname(dir, NULL); - ret = (RemoveDirectory(newpath) ? 0 : 1); - LEPT_FREE(newpath); -#endif /* !_WIN32 */ - - sarrayDestroy(&sa); - LEPT_FREE(dir); - return ret; -} - - -/*! - * \brief lept_direxists() - * - * \param[in] dir - * \param[out] pexists 1 if it exists; 0 otherwise - * \return void - * - *
- * Notes:
- *      (1) Always use unix pathname separators.
- *      (2) By calling genPathname(), if the pathname begins with "/tmp"
- *          this does an automatic directory translation on windows
- *          to a path in the windows [Temp] directory:
- *             "/tmp"  ==>  [Temp] (windows)
- * 
- */ -void -lept_direxists(const char *dir, - l_int32 *pexists) -{ -char *realdir; - - if (!pexists) return; - *pexists = 0; - if (!dir) return; - if ((realdir = genPathname(dir, NULL)) == NULL) - return; - -#ifndef _WIN32 - { - struct stat s; - l_int32 err = stat(realdir, &s); - if (err != -1 && S_ISDIR(s.st_mode)) - *pexists = 1; - } -#else /* _WIN32 */ - l_uint32 attributes; - attributes = GetFileAttributes(realdir); - if (attributes != INVALID_FILE_ATTRIBUTES && - (attributes & FILE_ATTRIBUTE_DIRECTORY)) { - *pexists = 1; - } -#endif /* _WIN32 */ - - LEPT_FREE(realdir); - return; -} - - -/*! - * \brief lept_rm_match() - * - * \param[in] subdir [optional] if NULL, the removed files are in /tmp - * \param[in] substr [optional] pattern to match in filename - * \return 0 on success, non-zero on failure - * - *
- * Notes:
- *      (1) This removes the matched files in /tmp or a subdirectory of /tmp.
- *          Use NULL for %subdir if the files are in /tmp.
- *      (2) If %substr == NULL, this removes all files in the directory.
- *          If %substr == "" (empty), this removes no files.
- *          If both %subdir == NULL and %substr == NULL, this removes
- *          all files in /tmp.
- *      (3) Use unix pathname separators.
- *      (4) By calling genPathname(), if the pathname begins with "/tmp"
- *          this does an automatic directory translation on windows
- *          to a path in the windows [Temp] directory:
- *             "/tmp"  ==>  [Temp] (windows)
- *      (5) Error conditions:
- *            * returns -1 if the directory is not found
- *            * returns the number of files (> 0) that it was unable to remove.
- * 
- */ -l_int32 -lept_rm_match(const char *subdir, - const char *substr) -{ -char *path, *fname; -char tempdir[256]; -l_int32 i, n, ret; -SARRAY *sa; - - PROCNAME("lept_rm_match"); - - makeTempDirname(tempdir, sizeof(tempdir), subdir); - if ((sa = getSortedPathnamesInDirectory(tempdir, substr, 0, 0)) == NULL) - return ERROR_INT("sa not made", procName, -1); - n = sarrayGetCount(sa); - if (n == 0) { - L_WARNING("no matching files found\n", procName); - sarrayDestroy(&sa); - return 0; - } - - ret = 0; - for (i = 0; i < n; i++) { - fname = sarrayGetString(sa, i, L_NOCOPY); - path = genPathname(fname, NULL); - if (lept_rmfile(path) != 0) { - L_ERROR("failed to remove %s\n", procName, path); - ret++; - } - LEPT_FREE(path); - } - sarrayDestroy(&sa); - return ret; -} - - -/*! - * \brief lept_rm() - * - * \param[in] subdir [optional] subdir of '/tmp'; can be NULL - * \param[in] tail filename without the directory - * \return 0 on success, non-zero on failure - * - *
- * Notes:
- *      (1) By calling genPathname(), this does an automatic directory
- *          translation on windows to a path in the windows [Temp] directory:
- *             "/tmp/..."  ==>  [Temp]/... (windows)
- * 
- */ -l_int32 -lept_rm(const char *subdir, - const char *tail) -{ -char *path; -char newtemp[256]; -l_int32 ret; - - PROCNAME("lept_rm"); - - if (!tail || strlen(tail) == 0) - return ERROR_INT("tail undefined or empty", procName, 1); - - if (makeTempDirname(newtemp, sizeof(newtemp), subdir)) - return ERROR_INT("temp dirname not made", procName, 1); - path = genPathname(newtemp, tail); - ret = lept_rmfile(path); - LEPT_FREE(path); - return ret; -} - - -/*! - * \brief - * - * lept_rmfile() - * - * \param[in] filepath full path to file including the directory - * \return 0 on success, non-zero on failure - * - *
- * Notes:
- *      (1) This removes the named file.
- *      (2) Use unix pathname separators.
- *      (3) There is no name translation.
- *      (4) Unlike the other lept_* functions in this section, this can remove
- *          any file -- it is not restricted to files that are in /tmp or a
- *          subdirectory of it.
- * 
- */ -l_int32 -lept_rmfile(const char *filepath) -{ -l_int32 ret; - - PROCNAME("lept_rmfile"); - - if (!filepath || strlen(filepath) == 0) - return ERROR_INT("filepath undefined or empty", procName, 1); - -#ifndef _WIN32 - ret = remove(filepath); -#else - /* Set attributes to allow deletion of read-only files */ - SetFileAttributes(filepath, FILE_ATTRIBUTE_NORMAL); - ret = DeleteFile(filepath) ? 0 : 1; -#endif /* !_WIN32 */ - - return ret; -} - - -/*! - * \brief lept_mv() - * - * \param[in] srcfile - * \param[in] newdir [optional]; can be NULL - * \param[in] newtail [optional]; can be NULL - * \param[out] pnewpath [optional] of actual path; can be NULL - * \return 0 on success, non-zero on failure - * - *
- * Notes:
- *      (1) This moves %srcfile to /tmp or to a subdirectory of /tmp.
- *      (2) %srcfile can either be a full path or relative to the
- *          current directory.
- *      (3) %newdir can either specify an existing subdirectory of /tmp
- *          or can be NULL.  In the latter case, the file will be written
- *          into /tmp.
- *      (4) %newtail can either specify a filename tail or, if NULL,
- *          the filename is taken from src-tail, the tail of %srcfile.
- *      (5) For debugging, the computed newpath can be returned.  It must
- *          be freed by the caller.
- *      (6) Reminders:
- *          (a) specify files using unix pathnames
- *          (b) for windows, translates
- *                 /tmp  ==>  [Temp]
- *              where [Temp] is the windows temp directory
- *      (7) Examples:
- *          * newdir = NULL,    newtail = NULL    ==> /tmp/src-tail
- *          * newdir = NULL,    newtail = abc     ==> /tmp/abc
- *          * newdir = def/ghi, newtail = NULL    ==> /tmp/def/ghi/src-tail
- *          * newdir = def/ghi, newtail = abc     ==> /tmp/def/ghi/abc
- * 
- */ -l_int32 -lept_mv(const char *srcfile, - const char *newdir, - const char *newtail, - char **pnewpath) -{ -char *srcpath, *newpath, *dir, *srctail; -char newtemp[256]; -l_int32 ret; - - PROCNAME("lept_mv"); - - if (!srcfile) - return ERROR_INT("srcfile not defined", procName, 1); - - /* Require output pathname to be in /tmp/ or a subdirectory */ - if (makeTempDirname(newtemp, sizeof(newtemp), newdir) == 1) - return ERROR_INT("newdir not NULL or a subdir of /tmp", procName, 1); - - /* Get canonical src pathname */ - splitPathAtDirectory(srcfile, &dir, &srctail); - -#ifndef _WIN32 - srcpath = pathJoin(dir, srctail); - LEPT_FREE(dir); - - /* Generate output pathname */ - if (!newtail || newtail[0] == '\0') - newpath = pathJoin(newtemp, srctail); - else - newpath = pathJoin(newtemp, newtail); - LEPT_FREE(srctail); - - /* Overwrite any existing file at 'newpath' */ - ret = fileCopy(srcpath, newpath); - if (!ret) { /* and remove srcfile */ - char *realpath = genPathname(srcpath, NULL); - remove(realpath); - LEPT_FREE(realpath); - } -#elif (defined WINAPI_FAMILY) && (WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP) /* windows but not desktop environment */ - ret = -1; -#else - srcpath = genPathname(dir, srctail); - LEPT_FREE(dir); - - /* Generate output pathname */ - if (!newtail || newtail[0] == '\0') - newpath = genPathname(newtemp, srctail); - else - newpath = genPathname(newtemp, newtail); - LEPT_FREE(srctail); - - /* Overwrite any existing file at 'newpath' */ - ret = MoveFileEx(srcpath, newpath, - MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING) ? 0 : 1; -#endif /* ! _WIN32 */ - - LEPT_FREE(srcpath); - if (pnewpath) - *pnewpath = newpath; - else - LEPT_FREE(newpath); - return ret; -} - - -/*! - * \brief lept_cp() - * - * \param[in] srcfile - * \param[in] newdir [optional]; can be NULL - * \param[in] newtail [optional]; can be NULL - * \param[out] pnewpath [optional] of actual path; can be NULL - * \return 0 on success, non-zero on failure - * - *
- * Notes:
- *      (1) This copies %srcfile to /tmp or to a subdirectory of /tmp.
- *      (2) %srcfile can either be a full path or relative to the
- *          current directory.
- *      (3) %newdir can either specify an existing subdirectory of /tmp,
- *          or can be NULL.  In the latter case, the file will be written
- *          into /tmp.
- *      (4) %newtail can either specify a filename tail or, if NULL,
- *          the filename is taken from src-tail, the tail of %srcfile.
- *      (5) For debugging, the computed newpath can be returned.  It must
- *          be freed by the caller.
- *      (6) Reminders:
- *          (a) specify files using unix pathnames
- *          (b) for windows, translates
- *                 /tmp  ==>  [Temp]
- *              where [Temp] is the windows temp directory
- *      (7) Examples:
- *          * newdir = NULL,    newtail = NULL    ==> /tmp/src-tail
- *          * newdir = NULL,    newtail = abc     ==> /tmp/abc
- *          * newdir = def/ghi, newtail = NULL    ==> /tmp/def/ghi/src-tail
- *          * newdir = def/ghi, newtail = abc     ==> /tmp/def/ghi/abc
- *
- * 
- */ -l_int32 -lept_cp(const char *srcfile, - const char *newdir, - const char *newtail, - char **pnewpath) -{ -char *srcpath, *newpath, *dir, *srctail; -char newtemp[256]; -l_int32 ret; - - PROCNAME("lept_cp"); - - if (!srcfile) - return ERROR_INT("srcfile not defined", procName, 1); - - /* Require output pathname to be in /tmp or a subdirectory */ - if (makeTempDirname(newtemp, sizeof(newtemp), newdir) == 1) - return ERROR_INT("newdir not NULL or a subdir of /tmp", procName, 1); - - /* Get canonical src pathname */ - splitPathAtDirectory(srcfile, &dir, &srctail); - -#ifndef _WIN32 - srcpath = pathJoin(dir, srctail); - LEPT_FREE(dir); - - /* Generate output pathname */ - if (!newtail || newtail[0] == '\0') - newpath = pathJoin(newtemp, srctail); - else - newpath = pathJoin(newtemp, newtail); - LEPT_FREE(srctail); - - /* Overwrite any existing file at 'newpath' */ - ret = fileCopy(srcpath, newpath); -#else - srcpath = genPathname(dir, srctail); - LEPT_FREE(dir); - - /* Generate output pathname */ - if (!newtail || newtail[0] == '\0') - newpath = genPathname(newtemp, srctail); - else - newpath = genPathname(newtemp, newtail); - LEPT_FREE(srctail); - - /* Overwrite any existing file at 'newpath' */ -#if WINAPI_FAMILY_APP - ret = -1; -#else - ret = CopyFile(srcpath, newpath, FALSE) ? 0 : 1; -#endif -#endif /* !_WIN32 */ - - LEPT_FREE(srcpath); - if (pnewpath) - *pnewpath = newpath; - else - LEPT_FREE(newpath); - return ret; -} - - -/*--------------------------------------------------------------------* - * Special debug/test function for calling 'system' * - *--------------------------------------------------------------------*/ -#if defined(__APPLE__) - #include "TargetConditionals.h" -#endif /* __APPLE__ */ - -/*! - * \brief callSystemDebug() - * - * \param[in] cmd command to be exec'd - * \return void - * - *
- * Notes:
- *      (1) The C library 'system' call is only made through this function.
- *          It only works in debug/test mode, where the global variable
- *          LeptDebugOK == TRUE.  This variable is set to FALSE in the
- *          library as distributed, and calling this function will
- *          generate an error message.
- * 
- */ -void -callSystemDebug(const char *cmd) -{ -l_int32 ret; - - PROCNAME("callSystemDebug"); - - if (!cmd) { - L_ERROR("cmd not defined\n", procName); - return; - } - if (LeptDebugOK == FALSE) { - L_INFO("'system' calls are disabled\n", procName); - return; - } - -#if defined(__APPLE__) /* iOS 11 does not support system() */ - - #if TARGET_OS_OSX /* Mac OS X */ - ret = system(cmd); - #elif TARGET_OS_IPHONE || defined(OS_IOS) /* iOS */ - L_ERROR("iOS 11 does not support system()\n", procName); - #endif /* TARGET_OS_OSX */ - -#else /* ! __APPLE__ */ - - ret = system(cmd); - -#endif /* __APPLE__ */ -} - - -/*--------------------------------------------------------------------* - * General file name operations * - *--------------------------------------------------------------------*/ -/*! - * \brief splitPathAtDirectory() - * - * \param[in] pathname full path; can be a directory - * \param[out] pdir [optional] root directory name of - * input path, including trailing '/' - * \param[out] ptail [optional] path tail, which is either - * the file name within the root directory or - * the last sub-directory in the path - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If you only want the tail, input null for the root directory ptr.
- *      (2) If you only want the root directory name, input null for the
- *          tail ptr.
- *      (3) This function makes decisions based only on the lexical
- *          structure of the input.  Examples:
- *            /usr/tmp/abc.d  -->  dir: /usr/tmp/       tail: abc.d
- *            /usr/tmp/       -->  dir: /usr/tmp/       tail: [empty string]
- *            /usr/tmp        -->  dir: /usr/           tail: tmp
- *            abc.d           -->  dir: [empty string]  tail: abc.d
- *      (4  Consider the first example above: /usr/tmp/abc.d.
- *          Suppose you want the stem of the file, abc, without either
- *          the directory or the extension.  This can be extracted in two steps:
- *              splitPathAtDirectory("usr/tmp/abc.d", NULL, &tail);
- *                   [sets tail: "abc.d"]
- *              splitPathAtExtension(tail, &basename, NULL);
- *                   [sets basename: "abc"]
- *      (5) The input can have either forward (unix) or backward (win)
- *          slash separators.  The output has unix separators.
- *          Note that Win32 pathname functions generally accept both
- *          slash forms, but the windows command line interpreter
- *          only accepts backward slashes, because forward slashes are
- *          used to demarcate switches (vs. dashes in unix).
- * 
- */ -l_ok -splitPathAtDirectory(const char *pathname, - char **pdir, - char **ptail) -{ -char *cpathname, *lastslash; - - PROCNAME("splitPathAtDirectory"); - - if (!pdir && !ptail) - return ERROR_INT("null input for both strings", procName, 1); - if (pdir) *pdir = NULL; - if (ptail) *ptail = NULL; - if (!pathname) - return ERROR_INT("pathname not defined", procName, 1); - - cpathname = stringNew(pathname); - convertSepCharsInPath(cpathname, UNIX_PATH_SEPCHAR); - lastslash = strrchr(cpathname, '/'); - if (lastslash) { - if (ptail) - *ptail = stringNew(lastslash + 1); - if (pdir) { - *(lastslash + 1) = '\0'; - *pdir = cpathname; - } else { - LEPT_FREE(cpathname); - } - } else { /* no directory */ - if (pdir) - *pdir = stringNew(""); - if (ptail) - *ptail = cpathname; - else - LEPT_FREE(cpathname); - } - - return 0; -} - - -/*! - * \brief splitPathAtExtension() - * - * \param[in] pathname full path; can be a directory - * \param[out] pbasename [optional] pathname not including the - * last dot and characters after that - * \param[out] pextension [optional] path extension, which is - * the last dot and the characters after it. If - * there is no extension, it returns the empty string - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) If you only want the extension, input null for the basename ptr.
- *      (2) If you only want the basename without extension, input null
- *          for the extension ptr.
- *      (3) This function makes decisions based only on the lexical
- *          structure of the input.  Examples:
- *            /usr/tmp/abc.jpg  -->  basename: /usr/tmp/abc    ext: .jpg
- *            /usr/tmp/.jpg     -->  basename: /usr/tmp/       ext: .jpg
- *            /usr/tmp.jpg/     -->  basename: /usr/tmp.jpg/   ext: [empty str]
- *            ./.jpg            -->  basename: ./              ext: .jpg
- *      (4) The input can have either forward (unix) or backward (win)
- *          slash separators.  The output has unix separators.
- *      (5) Note that basename, as used here, is different from the result
- *          of the unix program 'basename'.  Here, basename is the entire
- *          pathname up to a final extension and its preceding dot.
- * 
- */ -l_ok -splitPathAtExtension(const char *pathname, - char **pbasename, - char **pextension) -{ -char *tail, *dir, *lastdot; -char empty[4] = ""; - - PROCNAME("splitPathExtension"); - - if (!pbasename && !pextension) - return ERROR_INT("null input for both strings", procName, 1); - if (pbasename) *pbasename = NULL; - if (pextension) *pextension = NULL; - if (!pathname) - return ERROR_INT("pathname not defined", procName, 1); - - /* Split out the directory first */ - splitPathAtDirectory(pathname, &dir, &tail); - - /* Then look for a "." in the tail part. - * This way we ignore all "." in the directory. */ - if ((lastdot = strrchr(tail, '.'))) { - if (pextension) - *pextension = stringNew(lastdot); - if (pbasename) { - *lastdot = '\0'; - *pbasename = stringJoin(dir, tail); - } - } else { - if (pextension) - *pextension = stringNew(empty); - if (pbasename) - *pbasename = stringNew(pathname); - } - LEPT_FREE(dir); - LEPT_FREE(tail); - return 0; -} - - -/*! - * \brief pathJoin() - * - * \param[in] dir [optional] can be null - * \param[in] fname [optional] can be null - * \return specially concatenated path, or NULL on error - * - *
- * Notes:
- *      (1) Use unix-style pathname separators ('/').
- *      (2) %fname can be the entire path, or part of the path containing
- *          at least one directory, or a tail without a directory, or NULL.
- *      (3) It produces a path that strips multiple slashes to a single
- *          slash, joins %dir and %fname by a slash, and has no trailing
- *          slashes (except in the cases where %dir == "/" and
- *          %fname == NULL, or v.v.).
- *      (4) If both %dir and %fname are null, produces an empty string.
- *      (5) Neither %dir nor %fname can begin with '..'.
- *      (6) The result is not canonicalized or tested for correctness:
- *          garbage in (e.g., /&%), garbage out.
- *      (7) Examples:
- *             //tmp// + //abc/  -->  /tmp/abc
- *             tmp/ + /abc/      -->  tmp/abc
- *             tmp/ + abc/       -->  tmp/abc
- *             /tmp/ + ///       -->  /tmp
- *             /tmp/ + NULL      -->  /tmp
- *             // + /abc//       -->  /abc
- *             // + NULL         -->  /
- *             NULL + /abc/def/  -->  /abc/def
- *             NULL + abc//      -->  abc
- *             NULL + //         -->  /
- *             NULL + NULL       -->  (empty string)
- *             "" + ""           -->  (empty string)
- *             "" + /            -->  /
- *             ".." + /etc/foo   -->  NULL
- *             /tmp + ".."       -->  NULL
- * 
- */ -char * -pathJoin(const char *dir, - const char *fname) -{ -const char *slash = "/"; -char *str, *dest; -l_int32 i, n1, n2, emptydir; -size_t size; -SARRAY *sa1, *sa2; -L_BYTEA *ba; - - PROCNAME("pathJoin"); - - if (!dir && !fname) - return stringNew(""); - if (dir && strlen(dir) >= 2 && dir[0] == '.' && dir[1] == '.') - return (char *)ERROR_PTR("dir starts with '..'", procName, NULL); - if (fname && strlen(fname) >= 2 && fname[0] == '.' && fname[1] == '.') - return (char *)ERROR_PTR("fname starts with '..'", procName, NULL); - - sa1 = sarrayCreate(0); - sa2 = sarrayCreate(0); - ba = l_byteaCreate(4); - - /* Process %dir */ - if (dir && strlen(dir) > 0) { - if (dir[0] == '/') - l_byteaAppendString(ba, slash); - sarraySplitString(sa1, dir, "/"); /* removes all slashes */ - n1 = sarrayGetCount(sa1); - for (i = 0; i < n1; i++) { - str = sarrayGetString(sa1, i, L_NOCOPY); - l_byteaAppendString(ba, str); - l_byteaAppendString(ba, slash); - } - } - - /* Special case to add leading slash: dir NULL or empty string */ - emptydir = dir && strlen(dir) == 0; - if ((!dir || emptydir) && fname && strlen(fname) > 0 && fname[0] == '/') - l_byteaAppendString(ba, slash); - - /* Process %fname */ - if (fname && strlen(fname) > 0) { - sarraySplitString(sa2, fname, "/"); - n2 = sarrayGetCount(sa2); - for (i = 0; i < n2; i++) { - str = sarrayGetString(sa2, i, L_NOCOPY); - l_byteaAppendString(ba, str); - l_byteaAppendString(ba, slash); - } - } - - /* Remove trailing slash */ - dest = (char *)l_byteaCopyData(ba, &size); - if (size > 1 && dest[size - 1] == '/') - dest[size - 1] = '\0'; - - sarrayDestroy(&sa1); - sarrayDestroy(&sa2); - l_byteaDestroy(&ba); - return dest; -} - - -/*! - * \brief appendSubdirs() - * - * \param[in] basedir - * \param[in] subdirs - * \return concatenated full directory path without trailing slash, - * or NULL on error - * - *
- * Notes:
- *      (1) Use unix pathname separators
- *      (2) Allocates a new string:  [basedir]/[subdirs]
- * 
- */ -char * -appendSubdirs(const char *basedir, - const char *subdirs) -{ -char *newdir; -size_t len1, len2, len3, len4; - - PROCNAME("appendSubdirs"); - - if (!basedir || !subdirs) - return (char *)ERROR_PTR("basedir and subdirs not both defined", - procName, NULL); - - len1 = strlen(basedir); - len2 = strlen(subdirs); - len3 = len1 + len2 + 8; - if ((newdir = (char *)LEPT_CALLOC(len3, 1)) == NULL) - return (char *)ERROR_PTR("newdir not made", procName, NULL); - stringCat(newdir, len3, basedir); - if (newdir[len1 - 1] != '/') /* add '/' if necessary */ - newdir[len1] = '/'; - if (subdirs[0] == '/') /* add subdirs, stripping leading '/' */ - stringCat(newdir, len3, subdirs + 1); - else - stringCat(newdir, len3, subdirs); - len4 = strlen(newdir); - if (newdir[len4 - 1] == '/') /* strip trailing '/' */ - newdir[len4 - 1] = '\0'; - - return newdir; -} - - -/*--------------------------------------------------------------------* - * Special file name operations * - *--------------------------------------------------------------------*/ -/*! - * \brief convertSepCharsInPath() - * - * \param[in] path - * \param[in] type UNIX_PATH_SEPCHAR, WIN_PATH_SEPCHAR - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) In-place conversion.
- *      (2) Type is the resulting type:
- *            * UNIX_PATH_SEPCHAR:  '\\' ==> '/'
- *            * WIN_PATH_SEPCHAR:   '/' ==> '\\'
- *      (3) Virtually all path operations in leptonica use unix separators.
- * 
- */ -l_ok -convertSepCharsInPath(char *path, - l_int32 type) -{ -l_int32 i; -size_t len; - - PROCNAME("convertSepCharsInPath"); - if (!path) - return ERROR_INT("path not defined", procName, 1); - if (type != UNIX_PATH_SEPCHAR && type != WIN_PATH_SEPCHAR) - return ERROR_INT("invalid type", procName, 1); - - len = strlen(path); - if (type == UNIX_PATH_SEPCHAR) { - for (i = 0; i < len; i++) { - if (path[i] == '\\') - path[i] = '/'; - } - } else { /* WIN_PATH_SEPCHAR */ - for (i = 0; i < len; i++) { - if (path[i] == '/') - path[i] = '\\'; - } - } - return 0; -} - - -/*! - * \brief genPathname() - * - * \param[in] dir [optional] directory or full path name, - * with or without the trailing '/' - * \param[in] fname [optional] file name within a directory - * \return pathname either a directory or full path, or NULL on error - * - *
- * Notes:
- *      (1) This function generates actual paths in the following ways:
- *            * from two sub-parts (e.g., a directory and a file name).
- *            * from a single path full path, placed in %dir, with
- *              %fname == NULL.
- *            * from the name of a file in the local directory placed in
- *              %fname, with %dir == NULL.
- *            * if in a "/tmp" directory and on windows, the windows
- *              temp directory is used.
- *      (2) On windows, if the root of %dir is '/tmp', this does a name
- *          translation:
- *             "/tmp"  ==>  [Temp] (windows)
- *          where [Temp] is the windows temp directory.
- *      (3) On unix, the TMPDIR variable is ignored.  No rewriting
- *          of temp directories is permitted.
- *      (4) There are four cases for the input:
- *          (a) %dir is a directory and %fname is defined: result is a full path
- *          (b) %dir is a directory and %fname is null: result is a directory
- *          (c) %dir is a full path and %fname is null: result is a full path
- *          (d) %dir is null or an empty string: start in the current dir;
- *              result is a full path
- *      (5) In all cases, the resulting pathname is not terminated with a slash
- *      (6) The caller is responsible for freeing the returned pathname.
- * 
- */ -char * -genPathname(const char *dir, - const char *fname) -{ -l_int32 is_win32 = FALSE; -char *cdir, *pathout; -l_int32 dirlen, namelen; -size_t size; - - PROCNAME("genPathname"); - - if (!dir && !fname) - return (char *)ERROR_PTR("no input", procName, NULL); - - /* Handle the case where we start from the current directory */ - if (!dir || dir[0] == '\0') { -#if WINAPI_FAMILY_APP - return (char *)ERROR_PTR("no current dir found", procName, NULL); -#else - if ((cdir = getcwd(NULL, 0)) == NULL) - return (char *)ERROR_PTR("no current dir found", procName, NULL); -#endif - } else { - cdir = stringNew(dir); - } - - /* Convert to unix path separators, and remove the trailing - * slash in the directory, except when dir == "/" */ - convertSepCharsInPath(cdir, UNIX_PATH_SEPCHAR); - dirlen = strlen(cdir); - if (cdir[dirlen - 1] == '/' && dirlen != 1) { - cdir[dirlen - 1] = '\0'; - dirlen--; - } - - namelen = (fname) ? strlen(fname) : 0; - size = dirlen + namelen + 256; - if ((pathout = (char *)LEPT_CALLOC(size, sizeof(char))) == NULL) { - LEPT_FREE(cdir); - return (char *)ERROR_PTR("pathout not made", procName, NULL); - } - -#ifdef _WIN32 - is_win32 = TRUE; -#endif /* _WIN32 */ - - /* First handle %dir (which may be a full pathname). - * There is no path rewriting on unix, and on win32, we do not - * rewrite unless the specified directory is /tmp or - * a subdirectory of /tmp */ - if (!is_win32 || dirlen < 4 || - (dirlen == 4 && strncmp(cdir, "/tmp", 4) != 0) || /* not in "/tmp" */ - (dirlen > 4 && strncmp(cdir, "/tmp/", 5) != 0)) { /* not in "/tmp/" */ - stringCopy(pathout, cdir, dirlen); - } else { /* Rewrite for win32 with "/tmp" specified for the directory. */ -#ifdef _WIN32 - l_int32 tmpdirlen; - char tmpdir[MAX_PATH]; -#if WINAPI_FAMILY_APP - wchar_t tmpdirw[MAX_PATH]; - GetTempPathW(MAX_PATH, tmpdirw); /* get the windows temp dir */ - wcstombs(tmpdir, tmpdirw, MAX_PATH); -#else - GetTempPath(sizeof(tmpdir), tmpdir); /* get the windows temp dir */ -#endif - tmpdirlen = strlen(tmpdir); - if (tmpdirlen > 0 && tmpdir[tmpdirlen - 1] == '\\') { - tmpdir[tmpdirlen - 1] = '\0'; /* trim the trailing '\' */ - } - tmpdirlen = strlen(tmpdir); - stringCopy(pathout, tmpdir, tmpdirlen); - - /* Add the rest of cdir */ - if (dirlen > 4) - stringCat(pathout, size, cdir + 4); -#endif /* _WIN32 */ - } - - /* Now handle %fname */ - if (fname && strlen(fname) > 0) { - dirlen = strlen(pathout); - pathout[dirlen] = '/'; - stringCat(pathout, size, fname); - } - - LEPT_FREE(cdir); - return pathout; -} - - -/*! - * \brief makeTempDirname() - * - * \param[in] result preallocated on stack or heap and passed in - * \param[in] nbytes size of %result array, in bytes - * \param[in] subdir [optional]; can be NULL or an empty string - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This generates the directory path for output temp files,
- *          written into %result with unix separators.
- *      (2) Caller allocates %result, large enough to hold the path,
- *          which is:
- *            /tmp/%subdir       (unix)
- *            [Temp]/%subdir     (windows, mac, ios)
- *          where [Temp] is a path determined
- *             - on windows, mac: by GetTempPath()
- *             - on ios: by confstr() (see man page)
- *          and %subdir is in general a set of nested subdirectories:
- *            dir1/dir2/.../dirN
- *          which in use would not typically exceed 2 levels.
- *      (3) Usage example:
- * \code
- *           char  result[256];
- *           makeTempDirname(result, sizeof(result), "lept/golden");
- * \endcode
- * 
- */ -l_ok -makeTempDirname(char *result, - size_t nbytes, - const char *subdir) -{ -char *dir, *path; -l_int32 ret = 0; -size_t pathlen; - - PROCNAME("makeTempDirname"); - - if (!result) - return ERROR_INT("result not defined", procName, 1); - if (subdir && ((subdir[0] == '.') || (subdir[0] == '/'))) - return ERROR_INT("subdir not an actual subdirectory", procName, 1); - - memset(result, 0, nbytes); - -#ifdef OS_IOS - { - size_t n = confstr(_CS_DARWIN_USER_TEMP_DIR, result, nbytes); - if (n == 0) { - L_ERROR("failed to find tmp dir, %s\n", procName, strerror(errno)); - return 1; - } else if (n > nbytes) { - return ERROR_INT("result array too small for path\n", procName, 1); - } - dir = pathJoin(result, subdir); - } -#else - dir = pathJoin("/tmp", subdir); -#endif /* ~ OS_IOS */ - -#ifndef _WIN32 - path = stringNew(dir); -#else - path = genPathname(dir, NULL); -#endif /* ~ _WIN32 */ - pathlen = strlen(path); - if (pathlen < nbytes - 1) { - stringCat(result, nbytes, path); - } else { - L_ERROR("result array too small for path\n", procName); - ret = 1; - } - - LEPT_FREE(dir); - LEPT_FREE(path); - return ret; -} - - -/*! - * \brief modifyTrailingSlash() - * - * \param[in] path preallocated on stack or heap and passed in - * \param[in] nbytes size of %path array, in bytes - * \param[in] flag L_ADD_TRAIL_SLASH or L_REMOVE_TRAIL_SLASH - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This carries out the requested action if necessary.
- * 
- */ -l_ok -modifyTrailingSlash(char *path, - size_t nbytes, - l_int32 flag) -{ -char lastchar; -size_t len; - - PROCNAME("modifyTrailingSlash"); - - if (!path) - return ERROR_INT("path not defined", procName, 1); - if (flag != L_ADD_TRAIL_SLASH && flag != L_REMOVE_TRAIL_SLASH) - return ERROR_INT("invalid flag", procName, 1); - - len = strlen(path); - lastchar = path[len - 1]; - if (flag == L_ADD_TRAIL_SLASH && lastchar != '/' && len < nbytes - 2) { - path[len] = '/'; - path[len + 1] = '\0'; - } else if (flag == L_REMOVE_TRAIL_SLASH && lastchar == '/') { - path[len - 1] = '\0'; - } - return 0; -} - - -/*! - * \brief l_makeTempFilename() - * - * \return fname : heap allocated filename; returns NULL on failure. - * - *
- * Notes:
- *      (1) On unix, this makes a filename of the form
- *               "/tmp/lept.XXXXXX",
- *          where each X is a random character.
- *      (2) On windows, this makes a filename of the form
- *               "/[Temp]/lp.XXXXXX".
- *      (3) On all systems, this fails if the file is not writable.
- *      (4) Safest usage is to write to a subdirectory in debug code.
- *      (5) The returned filename must be freed by the caller, using lept_free.
- *      (6) The tail of the filename has a '.', so that cygwin interprets
- *          the file as having an extension.  Otherwise, cygwin assumes it
- *          is an executable and appends ".exe" to the filename.
- *      (7) On unix, whenever possible use tmpfile() instead.  tmpfile()
- *          hides the file name, returns a stream opened for write,
- *          and deletes the temp file when the stream is closed.
- * 
- */ -char * -l_makeTempFilename(void) -{ -char dirname[240]; - - PROCNAME("l_makeTempFilename"); - - if (makeTempDirname(dirname, sizeof(dirname), NULL) == 1) - return (char *)ERROR_PTR("failed to make dirname", procName, NULL); - -#ifndef _WIN32 -{ - char *pattern; - l_int32 fd; - pattern = stringConcatNew(dirname, "/lept.XXXXXX", NULL); - fd = mkstemp(pattern); - if (fd == -1) { - LEPT_FREE(pattern); - return (char *)ERROR_PTR("mkstemp failed", procName, NULL); - } - close(fd); - return pattern; -} -#elif (defined WINAPI_FAMILY) && (WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP) /* windows but not desktop environment */ - return (char *)ERROR_PTR("makeTempFilename failed", procName, NULL); -#else -{ - char fname[MAX_PATH]; - FILE *fp; - if (GetTempFileName(dirname, "lp.", 0, fname) == 0) - return (char *)ERROR_PTR("GetTempFileName failed", procName, NULL); - if ((fp = fopen(fname, "wb")) == NULL) - return (char *)ERROR_PTR("file cannot be written to", procName, NULL); - fclose(fp); - return stringNew(fname); -} -#endif /* ~ _WIN32 */ -} - - -/*! - * \brief extractNumberFromFilename() - * - * \param[in] fname - * \param[in] numpre number of characters before the digits to be found - * \param[in] numpost number of characters after the digits to be found - * \return num number embedded in the filename; -1 on error or if - * not found - * - *
- * Notes:
- *      (1) The number is to be found in the basename, which is the
- *          filename without either the directory or the last extension.
- *      (2) When a number is found, it is non-negative.  If no number
- *          is found, this returns -1, without an error message.  The
- *          caller needs to check.
- * 
- */ -l_int32 -extractNumberFromFilename(const char *fname, - l_int32 numpre, - l_int32 numpost) -{ -char *tail, *basename; -l_int32 len, nret, num; - - PROCNAME("extractNumberFromFilename"); - - if (!fname) - return ERROR_INT("fname not defined", procName, -1); - - splitPathAtDirectory(fname, NULL, &tail); - splitPathAtExtension(tail, &basename, NULL); - LEPT_FREE(tail); - - len = strlen(basename); - if (numpre + numpost > len - 1) { - LEPT_FREE(basename); - return ERROR_INT("numpre + numpost too big", procName, -1); - } - - basename[len - numpost] = '\0'; - nret = sscanf(basename + numpre, "%d", &num); - LEPT_FREE(basename); - - if (nret == 1) - return num; - else - return -1; /* not found */ -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/warper.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/warper.c deleted file mode 100644 index ad11f921..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/warper.c +++ /dev/null @@ -1,1394 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file warper.c - *
- *
- *      High-level captcha interface
- *          PIX               *pixSimpleCaptcha()
- *
- *      Random sinusoidal warping
- *          PIX               *pixRandomHarmonicWarp()
- *
- *      Helper functions
- *          static l_float64  *generateRandomNumberArray()
- *          static l_int32     applyWarpTransform()
- *
- *      Version using a LUT for sin
- *          PIX               *pixRandomHarmonicWarpLUT()
- *          static l_int32     applyWarpTransformLUT()
- *          static l_int32     makeSinLUT()
- *          static l_float32   getSinFromLUT()
- *
- *      Stereoscopic warping
- *          PIX               *pixWarpStereoscopic()
- *
- *      Linear and quadratic horizontal stretching
- *          PIX               *pixStretchHorizontal()
- *          PIX               *pixStretchHorizontalSampled()
- *          PIX               *pixStretchHorizontalLI()
- *
- *      Quadratic vertical shear
- *          PIX               *pixQuadraticVShear()
- *          PIX               *pixQuadraticVShearSampled()
- *          PIX               *pixQuadraticVShearLI()
- *
- *      Stereo from a pair of images
- *          PIX               *pixStereoFromPair()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -static l_float64 *generateRandomNumberArray(l_int32 size); -static l_int32 applyWarpTransform(l_float32 xmag, l_float32 ymag, - l_float32 xfreq, l_float32 yfreq, - l_float64 *randa, l_int32 nx, l_int32 ny, - l_int32 xp, l_int32 yp, - l_float32 *px, l_float32 *py); - -#define USE_SIN_TABLE 0 - - /* Suggested input to pixStereoFromPair(). These are weighting - * factors for input to the red channel from the left image. */ -static const l_float32 DefaultRedWeight = 0.0; -static const l_float32 DefaultGreenWeight = 0.7; -static const l_float32 DefaultBlueWeight = 0.3; - - -/*----------------------------------------------------------------------* - * High-level example captcha interface * - *----------------------------------------------------------------------*/ -/*! - * \brief pixSimpleCaptcha() - * - * \param[in] pixs 8 bpp; no colormap - * \param[in] border added white pixels on each side - * \param[in] nterms number of x and y harmonic terms - * \param[in] seed of random number generator - * \param[in] color for colorizing; in 0xrrggbb00 format; use 0 for black - * \param[in] cmapflag 1 for colormap output; 0 for rgb - * \return pixd 8 bpp cmap or 32 bpp rgb, or NULL on error - * - *
- * Notes:
- *      (1) This uses typical default values for generating captchas.
- *          The magnitudes of the harmonic warp are typically to be
- *          smaller when more terms are used, even though the phases
- *          are random.  See, for example, prog/warptest.c.
- * 
- */ -PIX * -pixSimpleCaptcha(PIX *pixs, - l_int32 border, - l_int32 nterms, - l_uint32 seed, - l_uint32 color, - l_int32 cmapflag) -{ -l_int32 k; -l_float32 xmag[] = {7.0f, 5.0f, 4.0f, 3.0f}; -l_float32 ymag[] = {10.0f, 8.0f, 6.0f, 5.0f}; -l_float32 xfreq[] = {0.12f, 0.10f, 0.10f, 0.11f}; -l_float32 yfreq[] = {0.15f, 0.13f, 0.13f, 0.11f}; -PIX *pixg, *pixgb, *pixw, *pixd; - - PROCNAME("pixSimpleCaptcha"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - if (nterms < 1 || nterms > 4) - return (PIX *)ERROR_PTR("nterms must be in {1,2,3,4}", procName, NULL); - - k = nterms - 1; - pixg = pixConvertTo8(pixs, 0); - pixgb = pixAddBorder(pixg, border, 255); - pixw = pixRandomHarmonicWarp(pixgb, xmag[k], ymag[k], xfreq[k], yfreq[k], - nterms, nterms, seed, 255); - pixd = pixColorizeGray(pixw, color, cmapflag); - - pixDestroy(&pixg); - pixDestroy(&pixgb); - pixDestroy(&pixw); - return pixd; -} - - -/*----------------------------------------------------------------------* - * Random sinusoidal warping * - *----------------------------------------------------------------------*/ -/*! - * \brief pixRandomHarmonicWarp() - * - * \param[in] pixs 8 bpp; no colormap - * \param[in] xmag, ymag maximum magnitude of x and y distortion - * \param[in] xfreq, yfreq maximum magnitude of x and y frequency - * \param[in] nx, ny number of x and y harmonic terms - * \param[in] seed of random number generator - * \param[in] grayval color brought in from the outside; - * 0 for black, 255 for white - * \return pixd 8 bpp; no colormap, or NULL on error - * - *
- * Notes:
- *      (1) To generate the warped image p(x',y'), set up the transforms
- *          that are in getWarpTransform().  For each (x',y') in the
- *          dest, the warp function computes the originating location
- *          (x, y) in the src.  The differences (x - x') and (y - y')
- *          are given as a sum of products of sinusoidal terms.  Each
- *          term is multiplied by a maximum amplitude (in pixels), and the
- *          angle is determined by a frequency and phase, and depends
- *          on the (x', y') value of the dest.  Random numbers with
- *          a variable input seed are used to allow the warping to be
- *          unpredictable.  A linear interpolation is used to find
- *          the value for the source at (x, y); this value is written
- *          into the dest.
- *      (2) This can be used to generate 'captcha's, which are somewhat
- *          randomly distorted images of text.  A typical set of parameters
- *          for a captcha are:
- *                    xmag = 4.0     ymag = 6.0
- *                    xfreq = 0.10   yfreq = 0.13
- *                    nx = 3         ny = 3
- *          Other examples can be found in prog/warptest.c.
- * 
- */ -PIX * -pixRandomHarmonicWarp(PIX *pixs, - l_float32 xmag, - l_float32 ymag, - l_float32 xfreq, - l_float32 yfreq, - l_int32 nx, - l_int32 ny, - l_uint32 seed, - l_int32 grayval) -{ -l_int32 w, h, d, i, j, wpls, wpld, val; -l_uint32 *datas, *datad, *lined; -l_float32 x, y; -l_float64 *randa; -PIX *pixd; - - PROCNAME("pixRandomHarmonicWarp"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - - /* Compute filter output at each location. We iterate over - * the destination pixels. For each dest pixel, use the - * warp function to compute the four source pixels that - * contribute, at the location (x, y). Each source pixel - * is divided into 16 x 16 subpixels to get an approximate value. */ - srand(seed); - randa = generateRandomNumberArray(5 * (nx + ny)); - pixd = pixCreateTemplate(pixs); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - applyWarpTransform(xmag, ymag, xfreq, yfreq, randa, nx, ny, - j, i, &x, &y); - linearInterpolatePixelGray(datas, wpls, w, h, x, y, grayval, &val); - SET_DATA_BYTE(lined, j, val); - } - } - - LEPT_FREE(randa); - return pixd; -} - - -/*----------------------------------------------------------------------* - * Static helper functions * - *----------------------------------------------------------------------*/ -static l_float64 * -generateRandomNumberArray(l_int32 size) -{ -l_int32 i; -l_float64 *randa; - - PROCNAME("generateRandomNumberArray"); - - if ((randa = (l_float64 *)LEPT_CALLOC(size, sizeof(l_float64))) == NULL) - return (l_float64 *)ERROR_PTR("calloc fail for randa", procName, NULL); - - /* Return random values between 0.5 and 1.0 */ - for (i = 0; i < size; i++) - randa[i] = 0.5 * (1.0 + (l_float64)rand() / (l_float64)RAND_MAX); - return randa; -} - - -/*! - * \brief applyWarpTransform() - * - * Notes: - * (1) Uses the internal sin function. - */ -static l_int32 -applyWarpTransform(l_float32 xmag, - l_float32 ymag, - l_float32 xfreq, - l_float32 yfreq, - l_float64 *randa, - l_int32 nx, - l_int32 ny, - l_int32 xp, - l_int32 yp, - l_float32 *px, - l_float32 *py) -{ -l_int32 i; -l_float64 twopi, x, y, anglex, angley; - - twopi = 6.283185; - for (i = 0, x = xp; i < nx; i++) { - anglex = xfreq * randa[3 * i + 1] * xp + twopi * randa[3 * i + 2]; - angley = yfreq * randa[3 * i + 3] * yp + twopi * randa[3 * i + 4]; - x += xmag * randa[3 * i] * sin(anglex) * sin(angley); - } - for (i = nx, y = yp; i < nx + ny; i++) { - angley = yfreq * randa[3 * i + 1] * yp + twopi * randa[3 * i + 2]; - anglex = xfreq * randa[3 * i + 3] * xp + twopi * randa[3 * i + 4]; - y += ymag * randa[3 * i] * sin(angley) * sin(anglex); - } - - *px = (l_float32)x; - *py = (l_float32)y; - return 0; -} - - -#if USE_SIN_TABLE -/*----------------------------------------------------------------------* - * Version using a LUT for sin * - *----------------------------------------------------------------------*/ -static l_int32 applyWarpTransformLUT(l_float32 xmag, l_float32 ymag, - l_float32 xfreq, l_float32 yfreq, - l_float64 *randa, l_int32 nx, l_int32 ny, - l_int32 xp, l_int32 yp, l_float32 *lut, - l_int32 npts, l_float32 *px, l_float32 *py); -static l_int32 makeSinLUT(l_int32 npts, NUMA **pna); -static l_float32 getSinFromLUT(l_float32 *tab, l_int32 npts, - l_float32 radang); - -/*! - * \brief pixRandomHarmonicWarpLUT() - * - * \param[in] pixs 8 bpp; no colormap - * \param[in] xmag, ymag maximum magnitude of x and y distortion - * \param[in] xfreq, yfreq maximum magnitude of x and y frequency - * \param[in] nx, ny number of x and y harmonic terms - * \param[in] seed of random number generator - * \param[in] grayval color brought in from the outside; - * 0 for black, 255 for white - * \return pixd 8 bpp; no colormap, or NULL on error - * - *
- * Notes:
- *      (1) See notes and inline comments in pixRandomHarmonicWarp().
- *          This version uses a LUT for the sin function.  It is not
- *          appreciably faster than using the built-in sin function,
- *          and is here for comparison only.
- * 
- */ -PIX * -pixRandomHarmonicWarpLUT(PIX *pixs, - l_float32 xmag, - l_float32 ymag, - l_float32 xfreq, - l_float32 yfreq, - l_int32 nx, - l_int32 ny, - l_uint32 seed, - l_int32 grayval) -{ -l_int32 w, h, d, i, j, wpls, wpld, val, npts; -l_uint32 *datas, *datad, *lined; -l_float32 x, y; -l_float32 *lut; -l_float64 *randa; -NUMA *na; -PIX *pixd; - - PROCNAME("pixRandomHarmonicWarp"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8) - return (PIX *)ERROR_PTR("pixs not 8 bpp", procName, NULL); - - /* Compute filter output at each location. We iterate over - * the destination pixels. For each dest pixel, use the - * warp function to compute the four source pixels that - * contribute, at the location (x, y). Each source pixel - * is divided into 16 x 16 subpixels to get an approximate value. */ - srand(seed); - randa = generateRandomNumberArray(5 * (nx + ny)); - pixd = pixCreateTemplate(pixs); - datas = pixGetData(pixs); - wpls = pixGetWpl(pixs); - datad = pixGetData(pixd); - wpld = pixGetWpl(pixd); - - npts = 100; - makeSinLUT(npts, &na); - lut = numaGetFArray(na, L_NOCOPY); - for (i = 0; i < h; i++) { - lined = datad + i * wpld; - for (j = 0; j < w; j++) { - applyWarpTransformLUT(xmag, ymag, xfreq, yfreq, randa, nx, ny, - j, i, lut, npts, &x, &y); - linearInterpolatePixelGray(datas, wpls, w, h, x, y, grayval, &val); - SET_DATA_BYTE(lined, j, val); - } - } - - numaDestroy(&na); - LEPT_FREE(randa); - return pixd; -} - - -/*! - * \brief applyWarpTransformLUT() - * - * Notes: - * (1) Uses an LUT for computing sin(theta). There is little speed - * advantage to using the LUT. - */ -static l_int32 -applyWarpTransformLUT(l_float32 xmag, - l_float32 ymag, - l_float32 xfreq, - l_float32 yfreq, - l_float64 *randa, - l_int32 nx, - l_int32 ny, - l_int32 xp, - l_int32 yp, - l_float32 *lut, - l_int32 npts, - l_float32 *px, - l_float32 *py) -{ -l_int32 i; -l_float64 twopi, x, y, anglex, angley, sanglex, sangley; - - twopi = 6.283185; - for (i = 0, x = xp; i < nx; i++) { - anglex = xfreq * randa[3 * i + 1] * xp + twopi * randa[3 * i + 2]; - angley = yfreq * randa[3 * i + 3] * yp + twopi * randa[3 * i + 4]; - sanglex = getSinFromLUT(lut, npts, anglex); - sangley = getSinFromLUT(lut, npts, angley); - x += xmag * randa[3 * i] * sanglex * sangley; - } - for (i = nx, y = yp; i < nx + ny; i++) { - angley = yfreq * randa[3 * i + 1] * yp + twopi * randa[3 * i + 2]; - anglex = xfreq * randa[3 * i + 3] * xp + twopi * randa[3 * i + 4]; - sanglex = getSinFromLUT(lut, npts, anglex); - sangley = getSinFromLUT(lut, npts, angley); - y += ymag * randa[3 * i] * sangley * sanglex; - } - - *px = (l_float32)x; - *py = (l_float32)y; - return 0; -} - - -static l_int32 -makeSinLUT(l_int32 npts, - NUMA **pna) -{ -l_int32 i, n; -l_float32 delx, fval; -NUMA *na; - - PROCNAME("makeSinLUT"); - - if (!pna) - return ERROR_INT("&na not defined", procName, 1); - *pna = NULL; - if (npts < 2) - return ERROR_INT("npts < 2", procName, 1); - n = 2 * npts + 1; - na = numaCreate(n); - *pna = na; - delx = 3.14159265 / (l_float32)npts; - numaSetParameters(na, 0.0, delx); - for (i = 0; i < n / 2; i++) - numaAddNumber(na, (l_float32)sin((l_float64)i * delx)); - for (i = 0; i < n / 2; i++) { - numaGetFValue(na, i, &fval); - numaAddNumber(na, -fval); - } - numaAddNumber(na, 0); - - return 0; -} - - -static l_float32 -getSinFromLUT(l_float32 *tab, - l_int32 npts, - l_float32 radang) -{ -l_int32 index; -l_float32 twopi, invtwopi, findex, diff; - - /* Restrict radang to [0, 2pi] */ - twopi = 6.283185; - invtwopi = 0.1591549; - if (radang < 0.0) - radang += twopi * (1.0 - (l_int32)(-radang * invtwopi)); - else if (radang > 0.0) - radang -= twopi * (l_int32)(radang * invtwopi); - - /* Interpolate */ - findex = (2.0 * (l_float32)npts) * (radang * invtwopi); - index = (l_int32)findex; - if (index == 2 * npts) - return tab[index]; - diff = findex - index; - return (1.0 - diff) * tab[index] + diff * tab[index + 1]; -} -#endif /* USE_SIN_TABLE */ - - - -/*---------------------------------------------------------------------------* - * Stereoscopic warping * - *---------------------------------------------------------------------------*/ -/*! - * \brief pixWarpStereoscopic() - * - * \param[in] pixs any depth, colormap ok - * \param[in] zbend horizontal separation in pixels of red and cyan - * at the left and right sides, that gives rise to - * quadratic curvature out of the image plane - * \param[in] zshiftt uniform pixel translation difference between - * red and cyan, that pushes the top of the image - * plane away from the viewer (zshiftt > 0) or - * towards the viewer (zshiftt < 0) - * \param[in] zshiftb uniform pixel translation difference between - * red and cyan, that pushes the bottom of the image - * plane away from the viewer (zshiftb > 0) or - * towards the viewer (zshiftb < 0) - * \param[in] ybendt multiplicative parameter for in-plane vertical - * displacement at the left or right edge at the top: - * y = ybendt * (2x/w - 1)^2 - * \param[in] ybendb same as ybendt, except at the left or right edge - * at the bottom - * \param[in] redleft 1 if the red filter is on the left; 0 otherwise - * \return pixd 32 bpp, or NULL on error - * - *
- * Notes:
- *      (1) This function splits out the red channel, mucks around with
- *          it, then recombines with the unmolested cyan channel.
- *      (2) By using a quadratically increasing shift of the red
- *          pixels horizontally and away from the vertical centerline,
- *          the image appears to bend quadratically out of the image
- *          plane, symmetrically with respect to the vertical center
- *          line.  A positive value of %zbend causes the plane to be
- *          curved away from the viewer.  We use linearly interpolated
- *          stretching to avoid the appearance of kinks in the curve.
- *      (3) The parameters %zshiftt and %zshiftb tilt the image plane
- *          about a horizontal line through the center, and at the
- *          same time move that line either in toward the viewer or away.
- *          This is implemented by a combination of horizontal shear
- *          about the center line (for the tilt) and horizontal
- *          translation (to move the entire plane in or out).
- *          A positive value of %zshiftt moves the top of the plane
- *          away from the viewer, and a positive value of %zshiftb
- *          moves the bottom of the plane away.  We use linear interpolated
- *          shear to avoid visible vertical steps in the tilted image.
- *      (4) The image can be bent in the plane and about the vertical
- *          centerline.  The centerline does not shift, and the
- *          parameter %ybend gives the relative shift at left and right
- *          edges, with a downward shift for positive values of %ybend.
- *      (6) When writing out a steroscopic (red/cyan) image in jpeg,
- *          first call pixSetChromaSampling(pix, 0) to get sufficient
- *          resolution in the red channel.
- *      (7) Typical values are:
- *             zbend = 20
- *             zshiftt = 15
- *             zshiftb = -15
- *             ybendt = 30
- *             ybendb = 0
- *          If the disparity z-values are too large, it is difficult for
- *          the brain to register the two images.
- *      (8) This function has been cleverly reimplemented by Jeff Breidenbach.
- *          The original implementation used two 32 bpp rgb images,
- *          and merged them at the end.  The result is somewhat faded,
- *          and has a parameter "thresh" that controls the amount of
- *          color in the result.  (The present implementation avoids these
- *          two problems, skipping both the colorization and the alpha
- *          blending at the end, and is about 3x faster)
- *          The basic operations with 32 bpp are as follows:
- *               // Immediate conversion to 32 bpp
- *            Pix *pixt1 = pixConvertTo32(pixs);
- *               // Do vertical shear
- *            Pix *pixr = pixQuadraticVerticalShear(pixt1, L_WARP_TO_RIGHT,
- *                                                  ybendt, ybendb,
- *                                                  L_BRING_IN_WHITE);
- *               // Colorize two versions, toward red and cyan
- *            Pix *pixc = pixCopy(NULL, pixr);
- *            l_int32 thresh = 150;  // if higher, get less original color
- *            pixColorGray(pixr, NULL, L_PAINT_DARK, thresh, 255, 0, 0);
- *            pixColorGray(pixc, NULL, L_PAINT_DARK, thresh, 0, 255, 255);
- *               // Shift the red pixels; e.g., by stretching
- *            Pix *pixrs = pixStretchHorizontal(pixr, L_WARP_TO_RIGHT,
- *                                              L_QUADRATIC_WARP, zbend,
- *                                              L_INTERPOLATED,
- *                                              L_BRING_IN_WHITE);
- *               // Blend the shifted red and unshifted cyan 50:50
- *            Pix *pixg = pixCreate(w, h, 8);
- *            pixSetAllArbitrary(pixg, 128);
- *            pixd = pixBlendWithGrayMask(pixrs, pixc, pixg, 0, 0);
- * 
- */ -PIX * -pixWarpStereoscopic(PIX *pixs, - l_int32 zbend, - l_int32 zshiftt, - l_int32 zshiftb, - l_int32 ybendt, - l_int32 ybendb, - l_int32 redleft) -{ -l_int32 w, h, zshift; -l_float32 angle; -BOX *boxleft, *boxright; -PIX *pix1, *pix2, *pix3, *pix4, *pixr, *pixg, *pixb; -PIX *pixv1, *pixv2, *pixv3, *pixv4; -PIX *pixrs, *pixrss; -PIX *pixd; - - PROCNAME("pixWarpStereoscopic"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - - /* Convert to the output depth, 32 bpp. */ - pix1 = pixConvertTo32(pixs); - - /* If requested, do a quad vertical shearing, pushing pixels up - * or down, depending on their distance from the centerline. */ - pixGetDimensions(pixs, &w, &h, NULL); - boxleft = boxCreate(0, 0, w / 2, h); - boxright = boxCreate(w / 2, 0, w - w / 2, h); - if (ybendt != 0 || ybendb != 0) { - pixv1 = pixClipRectangle(pix1, boxleft, NULL); - pixv2 = pixClipRectangle(pix1, boxright, NULL); - pixv3 = pixQuadraticVShear(pixv1, L_WARP_TO_LEFT, ybendt, - ybendb, L_INTERPOLATED, - L_BRING_IN_WHITE); - pixv4 = pixQuadraticVShear(pixv2, L_WARP_TO_RIGHT, ybendt, - ybendb, L_INTERPOLATED, - L_BRING_IN_WHITE); - pix2 = pixCreate(w, h, 32); - pixRasterop(pix2, 0, 0, w / 2, h, PIX_SRC, pixv3, 0, 0); - pixRasterop(pix2, w / 2, 0, w - w / 2, h, PIX_SRC, pixv4, 0, 0); - pixDestroy(&pixv1); - pixDestroy(&pixv2); - pixDestroy(&pixv3); - pixDestroy(&pixv4); - } else { - pix2 = pixClone(pix1); - } - pixDestroy(&pix1); - - /* Split out the 3 components */ - pixr = pixGetRGBComponent(pix2, COLOR_RED); - pixg = pixGetRGBComponent(pix2, COLOR_GREEN); - pixb = pixGetRGBComponent(pix2, COLOR_BLUE); - pixDestroy(&pix2); - - /* The direction of the stereo disparity below is set - * for the red filter to be over the left eye. If the red - * filter is over the right eye, invert the horizontal shifts. */ - if (redleft) { - zbend = -zbend; - zshiftt = -zshiftt; - zshiftb = -zshiftb; - } - - /* Shift the red pixels horizontally by an amount that - * increases quadratically from the centerline. */ - if (zbend == 0) { - pixrs = pixClone(pixr); - } else { - pix1 = pixClipRectangle(pixr, boxleft, NULL); - pix2 = pixClipRectangle(pixr, boxright, NULL); - pix3 = pixStretchHorizontal(pix1, L_WARP_TO_LEFT, L_QUADRATIC_WARP, - zbend, L_INTERPOLATED, L_BRING_IN_WHITE); - pix4 = pixStretchHorizontal(pix2, L_WARP_TO_RIGHT, L_QUADRATIC_WARP, - zbend, L_INTERPOLATED, L_BRING_IN_WHITE); - pixrs = pixCreate(w, h, 8); - pixRasterop(pixrs, 0, 0, w / 2, h, PIX_SRC, pix3, 0, 0); - pixRasterop(pixrs, w / 2, 0, w - w / 2, h, PIX_SRC, pix4, 0, 0); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - pixDestroy(&pix4); - } - - /* Perform a combination of horizontal shift and shear of - * red pixels. The causes the plane of the image to tilt and - * also move forward or backward. */ - if (zshiftt == 0 && zshiftb == 0) { - pixrss = pixClone(pixrs); - } else if (zshiftt == zshiftb) { - pixrss = pixTranslate(NULL, pixrs, zshiftt, 0, L_BRING_IN_WHITE); - } else { - angle = (l_float32)(zshiftb - zshiftt) / - L_MAX(1.0, (l_float32)pixGetHeight(pixrs)); - zshift = (zshiftt + zshiftb) / 2; - pix1 = pixTranslate(NULL, pixrs, zshift, 0, L_BRING_IN_WHITE); - pixrss = pixHShearLI(pix1, h / 2, angle, L_BRING_IN_WHITE); - pixDestroy(&pix1); - } - - /* Combine the unchanged cyan (g,b) image with the shifted red */ - pixd = pixCreateRGBImage(pixrss, pixg, pixb); - - boxDestroy(&boxleft); - boxDestroy(&boxright); - pixDestroy(&pixrs); - pixDestroy(&pixrss); - pixDestroy(&pixr); - pixDestroy(&pixg); - pixDestroy(&pixb); - return pixd; -} - - -/*----------------------------------------------------------------------* - * Linear and quadratic horizontal stretching * - *----------------------------------------------------------------------*/ -/*! - * \brief pixStretchHorizontal() - * - * \param[in] pixs 1, 8 or 32 bpp - * \param[in] dir L_WARP_TO_LEFT or L_WARP_TO_RIGHT - * \param[in] type L_LINEAR_WARP or L_QUADRATIC_WARP - * \param[in] hmax horizontal displacement at edge - * \param[in] operation L_SAMPLED or L_INTERPOLATED - * \param[in] incolor L_BRING_IN_WHITE or L_BRING_IN_BLACK - * \return pixd stretched/compressed, or NULL on error - * - *
- * Notes:
- *      (1) If %hmax > 0, this is an increase in the coordinate value of
- *          pixels in pixd, relative to the same pixel in pixs.
- *      (2) If %dir == L_WARP_TO_LEFT, the pixels on the right edge of
- *          the image are not moved. So, for example, if %hmax > 0
- *          and %dir == L_WARP_TO_LEFT, the pixels in pixd are
- *          contracted toward the right edge of the image, relative
- *          to those in pixs.
- *      (3) If %type == L_LINEAR_WARP, the pixel positions are moved
- *          to the left or right by an amount that varies linearly with
- *          the horizontal location.
- *      (4) If %operation == L_SAMPLED, the dest pixels are taken from
- *          the nearest src pixel.  Otherwise, we use linear interpolation
- *          between pairs of sampled pixels.
- * 
- */ -PIX * -pixStretchHorizontal(PIX *pixs, - l_int32 dir, - l_int32 type, - l_int32 hmax, - l_int32 operation, - l_int32 incolor) -{ -l_int32 d; - - PROCNAME("pixStretchHorizontal"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - d = pixGetDepth(pixs); - if (d != 1 && d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 1, 8 or 32 bpp", procName, NULL); - if (dir != L_WARP_TO_LEFT && dir != L_WARP_TO_RIGHT) - return (PIX *)ERROR_PTR("invalid direction", procName, NULL); - if (type != L_LINEAR_WARP && type != L_QUADRATIC_WARP) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - if (operation != L_SAMPLED && operation != L_INTERPOLATED) - return (PIX *)ERROR_PTR("invalid operation", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - if (d == 1 && operation == L_INTERPOLATED) { - L_WARNING("Using sampling for 1 bpp\n", procName); - operation = L_INTERPOLATED; - } - - if (operation == L_SAMPLED) - return pixStretchHorizontalSampled(pixs, dir, type, hmax, incolor); - else - return pixStretchHorizontalLI(pixs, dir, type, hmax, incolor); -} - - -/*! - * \brief pixStretchHorizontalSampled() - * - * \param[in] pixs 1, 8 or 32 bpp - * \param[in] dir L_WARP_TO_LEFT or L_WARP_TO_RIGHT - * \param[in] type L_LINEAR_WARP or L_QUADRATIC_WARP - * \param[in] hmax horizontal displacement at edge - * \param[in] incolor L_BRING_IN_WHITE or L_BRING_IN_BLACK - * \return pixd stretched/compressed, or NULL on error - * - *
- * Notes:
- *      (1) See pixStretchHorizontal() for details.
- * 
- */ -PIX * -pixStretchHorizontalSampled(PIX *pixs, - l_int32 dir, - l_int32 type, - l_int32 hmax, - l_int32 incolor) -{ -l_int32 i, j, jd, w, wm, h, d, wpls, wpld, val; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixStretchHorizontalSampled"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 1, 8 or 32 bpp", procName, NULL); - if (dir != L_WARP_TO_LEFT && dir != L_WARP_TO_RIGHT) - return (PIX *)ERROR_PTR("invalid direction", procName, NULL); - if (type != L_LINEAR_WARP && type != L_QUADRATIC_WARP) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - - pixd = pixCreateTemplate(pixs); - pixSetBlackOrWhite(pixd, L_BRING_IN_WHITE); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - wm = w - 1; - for (jd = 0; jd < w; jd++) { - if (dir == L_WARP_TO_LEFT) { - if (type == L_LINEAR_WARP) - j = jd - (hmax * (wm - jd)) / wm; - else /* L_QUADRATIC_WARP */ - j = jd - (hmax * (wm - jd) * (wm - jd)) / (wm * wm); - } else if (dir == L_WARP_TO_RIGHT) { - if (type == L_LINEAR_WARP) - j = jd - (hmax * jd) / wm; - else /* L_QUADRATIC_WARP */ - j = jd - (hmax * jd * jd) / (wm * wm); - } - if (j < 0 || j > w - 1) continue; - - switch (d) - { - case 1: - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - val = GET_DATA_BIT(lines, j); - if (val) - SET_DATA_BIT(lined, jd); - } - break; - case 8: - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - val = GET_DATA_BYTE(lines, j); - SET_DATA_BYTE(lined, jd, val); - } - break; - case 32: - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - lined[jd] = lines[j]; - } - break; - default: - L_ERROR("invalid depth: %d\n", procName, d); - pixDestroy(&pixd); - return NULL; - } - } - - return pixd; -} - - -/*! - * \brief pixStretchHorizontalLI() - * - * \param[in] pixs 1, 8 or 32 bpp - * \param[in] dir L_WARP_TO_LEFT or L_WARP_TO_RIGHT - * \param[in] type L_LINEAR_WARP or L_QUADRATIC_WARP - * \param[in] hmax horizontal displacement at edge - * \param[in] incolor L_BRING_IN_WHITE or L_BRING_IN_BLACK - * \return pixd stretched/compressed, or NULL on error - * - *
- * Notes:
- *      (1) See pixStretchHorizontal() for details.
- * 
- */ -PIX * -pixStretchHorizontalLI(PIX *pixs, - l_int32 dir, - l_int32 type, - l_int32 hmax, - l_int32 incolor) -{ -l_int32 i, j, jd, jp, jf, w, wm, h, d, wpls, wpld, val, rval, gval, bval; -l_uint32 word0, word1; -l_uint32 *datas, *datad, *lines, *lined; -PIX *pixd; - - PROCNAME("pixStretchHorizontalLI"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL); - if (dir != L_WARP_TO_LEFT && dir != L_WARP_TO_RIGHT) - return (PIX *)ERROR_PTR("invalid direction", procName, NULL); - if (type != L_LINEAR_WARP && type != L_QUADRATIC_WARP) - return (PIX *)ERROR_PTR("invalid type", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - - /* Standard linear interpolation, subdividing each pixel into 64 */ - pixd = pixCreateTemplate(pixs); - pixSetBlackOrWhite(pixd, L_BRING_IN_WHITE); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - wm = w - 1; - for (jd = 0; jd < w; jd++) { - if (dir == L_WARP_TO_LEFT) { - if (type == L_LINEAR_WARP) - j = 64 * jd - 64 * (hmax * (wm - jd)) / wm; - else /* L_QUADRATIC_WARP */ - j = 64 * jd - 64 * (hmax * (wm - jd) * (wm - jd)) / (wm * wm); - } else if (dir == L_WARP_TO_RIGHT) { - if (type == L_LINEAR_WARP) - j = 64 * jd - 64 * (hmax * jd) / wm; - else /* L_QUADRATIC_WARP */ - j = 64 * jd - 64 * (hmax * jd * jd) / (wm * wm); - } - jp = j / 64; - jf = j & 0x3f; - if (jp < 0 || jp > wm) continue; - - switch (d) - { - case 8: - if (jp < wm) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - val = ((63 - jf) * GET_DATA_BYTE(lines, jp) + - jf * GET_DATA_BYTE(lines, jp + 1) + 31) / 63; - SET_DATA_BYTE(lined, jd, val); - } - } else { /* jp == wm */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - val = GET_DATA_BYTE(lines, jp); - SET_DATA_BYTE(lined, jd, val); - } - } - break; - case 32: - if (jp < wm) { - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - word0 = *(lines + jp); - word1 = *(lines + jp + 1); - rval = ((63 - jf) * ((word0 >> L_RED_SHIFT) & 0xff) + - jf * ((word1 >> L_RED_SHIFT) & 0xff) + 31) / 63; - gval = ((63 - jf) * ((word0 >> L_GREEN_SHIFT) & 0xff) + - jf * ((word1 >> L_GREEN_SHIFT) & 0xff) + 31) / 63; - bval = ((63 - jf) * ((word0 >> L_BLUE_SHIFT) & 0xff) + - jf * ((word1 >> L_BLUE_SHIFT) & 0xff) + 31) / 63; - composeRGBPixel(rval, gval, bval, lined + jd); - } - } else { /* jp == wm */ - for (i = 0; i < h; i++) { - lines = datas + i * wpls; - lined = datad + i * wpld; - lined[jd] = lines[jp]; - } - } - break; - default: - L_ERROR("invalid depth: %d\n", procName, d); - pixDestroy(&pixd); - return NULL; - } - } - - return pixd; -} - - -/*----------------------------------------------------------------------* - * Quadratic vertical shear * - *----------------------------------------------------------------------*/ -/*! - * \brief pixQuadraticVShear() - * - * \param[in] pixs 1, 8 or 32 bpp - * \param[in] dir L_WARP_TO_LEFT or L_WARP_TO_RIGHT - * \param[in] vmaxt max vertical displacement at edge and at top - * \param[in] vmaxb max vertical displacement at edge and at bottom - * \param[in] operation L_SAMPLED or L_INTERPOLATED - * \param[in] incolor L_BRING_IN_WHITE or L_BRING_IN_BLACK - * \return pixd stretched, or NULL on error - * - *
- * Notes:
- *      (1) This gives a quadratic bending, upward or downward, as you
- *          move to the left or right.
- *      (2) If %dir == L_WARP_TO_LEFT, the right edge is unchanged, and
- *          the left edge pixels are moved maximally up or down.
- *      (3) Parameters %vmaxt and %vmaxb control the maximum amount of
- *          vertical pixel shear at the top and bottom, respectively.
- *          If %vmaxt > 0, the vertical displacement of pixels at the
- *          top is downward.  Likewise, if %vmaxb > 0, the vertical
- *          displacement of pixels at the bottom is downward.
- *      (4) If %operation == L_SAMPLED, the dest pixels are taken from
- *          the nearest src pixel.  Otherwise, we use linear interpolation
- *          between pairs of sampled pixels.
- *      (5) This is for quadratic shear.  For uniform (linear) shear,
- *          use the standard shear operators.
- * 
- */ -PIX * -pixQuadraticVShear(PIX *pixs, - l_int32 dir, - l_int32 vmaxt, - l_int32 vmaxb, - l_int32 operation, - l_int32 incolor) -{ -l_int32 w, h, d; - - PROCNAME("pixQuadraticVShear"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 1, 8 or 32 bpp", procName, NULL); - if (dir != L_WARP_TO_LEFT && dir != L_WARP_TO_RIGHT) - return (PIX *)ERROR_PTR("invalid direction", procName, NULL); - if (operation != L_SAMPLED && operation != L_INTERPOLATED) - return (PIX *)ERROR_PTR("invalid operation", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - - if (vmaxt == 0 && vmaxb == 0) - return pixCopy(NULL, pixs); - - if (operation == L_INTERPOLATED && d == 1) { - L_WARNING("no interpolation for 1 bpp; using sampling\n", procName); - operation = L_SAMPLED; - } - - if (operation == L_SAMPLED) - return pixQuadraticVShearSampled(pixs, dir, vmaxt, vmaxb, incolor); - else /* operation == L_INTERPOLATED */ - return pixQuadraticVShearLI(pixs, dir, vmaxt, vmaxb, incolor); -} - - -/*! - * \brief pixQuadraticVShearSampled() - * - * \param[in] pixs 1, 8 or 32 bpp - * \param[in] dir L_WARP_TO_LEFT or L_WARP_TO_RIGHT - * \param[in] vmaxt max vertical displacement at edge and at top - * \param[in] vmaxb max vertical displacement at edge and at bottom - * \param[in] incolor L_BRING_IN_WHITE or L_BRING_IN_BLACK - * \return pixd stretched, or NULL on error - * - *
- * Notes:
- *      (1) See pixQuadraticVShear() for details.
- * 
- */ -PIX * -pixQuadraticVShearSampled(PIX *pixs, - l_int32 dir, - l_int32 vmaxt, - l_int32 vmaxb, - l_int32 incolor) -{ -l_int32 i, j, id, w, h, d, wm, hm, wpls, wpld, val; -l_uint32 *datas, *datad, *lines, *lined; -l_float32 delrowt, delrowb, denom1, denom2, dely; -PIX *pixd; - - PROCNAME("pixQuadraticVShearSampled"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d != 1 && d != 8 && d != 32) - return (PIX *)ERROR_PTR("pixs not 1, 8 or 32 bpp", procName, NULL); - if (dir != L_WARP_TO_LEFT && dir != L_WARP_TO_RIGHT) - return (PIX *)ERROR_PTR("invalid direction", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - - if (vmaxt == 0 && vmaxb == 0) - return pixCopy(NULL, pixs); - - pixd = pixCreateTemplate(pixs); - pixSetBlackOrWhite(pixd, L_BRING_IN_WHITE); - datas = pixGetData(pixs); - datad = pixGetData(pixd); - wpls = pixGetWpl(pixs); - wpld = pixGetWpl(pixd); - wm = w - 1; - hm = h - 1; - denom1 = 1. / (l_float32)h; - denom2 = 1. / (l_float32)(wm * wm); - for (j = 0; j < w; j++) { - if (dir == L_WARP_TO_LEFT) { - delrowt = (l_float32)(vmaxt * (wm - j) * (wm - j)) * denom2; - delrowb = (l_float32)(vmaxb * (wm - j) * (wm - j)) * denom2; - } else if (dir == L_WARP_TO_RIGHT) { - delrowt = (l_float32)(vmaxt * j * j) * denom2; - delrowb = (l_float32)(vmaxb * j * j) * denom2; - } - switch (d) - { - case 1: - for (id = 0; id < h; id++) { - dely = (delrowt * (hm - id) + delrowb * id) * denom1; - i = id - (l_int32)(dely + 0.5); - if (i < 0 || i > hm) continue; - lines = datas + i * wpls; - lined = datad + id * wpld; - val = GET_DATA_BIT(lines, j); - if (val) - SET_DATA_BIT(lined, j); - } - break; - case 8: - for (id = 0; id < h; id++) { - dely = (delrowt * (hm - id) + delrowb * id) * denom1; - i = id - (l_int32)(dely + 0.5); - if (i < 0 || i > hm) continue; - lines = datas + i * wpls; - lined = datad + id * wpld; - val = GET_DATA_BYTE(lines, j); - SET_DATA_BYTE(lined, j, val); - } - break; - case 32: - for (id = 0; id < h; id++) { - dely = (delrowt * (hm - id) + delrowb * id) * denom1; - i = id - (l_int32)(dely + 0.5); - if (i < 0 || i > hm) continue; - lines = datas + i * wpls; - lined = datad + id * wpld; - lined[j] = lines[j]; - } - break; - default: - L_ERROR("invalid depth: %d\n", procName, d); - pixDestroy(&pixd); - return NULL; - } - } - - return pixd; -} - - -/*! - * \brief pixQuadraticVShearLI() - * - * \param[in] pixs 8 or 32 bpp, or colormapped - * \param[in] dir L_WARP_TO_LEFT or L_WARP_TO_RIGHT - * \param[in] vmaxt max vertical displacement at edge and at top - * \param[in] vmaxb max vertical displacement at edge and at bottom - * \param[in] incolor L_BRING_IN_WHITE or L_BRING_IN_BLACK - * \return pixd stretched, or NULL on error - * - *
- * Notes:
- *      (1) See pixQuadraticVShear() for details.
- * 
- */ -PIX * -pixQuadraticVShearLI(PIX *pixs, - l_int32 dir, - l_int32 vmaxt, - l_int32 vmaxb, - l_int32 incolor) -{ -l_int32 i, j, id, yp, yf, w, h, d, wm, hm, wpls, wpld; -l_int32 val, rval, gval, bval; -l_uint32 word0, word1; -l_uint32 *datas, *datad, *lines, *lined; -l_float32 delrowt, delrowb, denom1, denom2, dely; -PIX *pix, *pixd; -PIXCMAP *cmap; - - PROCNAME("pixQuadraticVShearLI"); - - if (!pixs) - return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); - pixGetDimensions(pixs, &w, &h, &d); - if (d == 1) - return (PIX *)ERROR_PTR("pixs is 1 bpp", procName, NULL); - cmap = pixGetColormap(pixs); - if (d != 8 && d != 32 && !cmap) - return (PIX *)ERROR_PTR("pixs not 8, 32 bpp, or cmap", procName, NULL); - if (dir != L_WARP_TO_LEFT && dir != L_WARP_TO_RIGHT) - return (PIX *)ERROR_PTR("invalid direction", procName, NULL); - if (incolor != L_BRING_IN_WHITE && incolor != L_BRING_IN_BLACK) - return (PIX *)ERROR_PTR("invalid incolor", procName, NULL); - - if (vmaxt == 0 && vmaxb == 0) - return pixCopy(NULL, pixs); - - /* Remove any existing colormap */ - if (cmap) - pix = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); - else - pix = pixClone(pixs); - d = pixGetDepth(pix); - if (d != 8 && d != 32) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("invalid depth", procName, NULL); - } - - /* Standard linear interp: subdivide each pixel into 64 parts */ - pixd = pixCreateTemplate(pix); - pixSetBlackOrWhite(pixd, L_BRING_IN_WHITE); - datas = pixGetData(pix); - datad = pixGetData(pixd); - wpls = pixGetWpl(pix); - wpld = pixGetWpl(pixd); - wm = w - 1; - hm = h - 1; - denom1 = 1.0 / (l_float32)h; - denom2 = 1.0 / (l_float32)(wm * wm); - for (j = 0; j < w; j++) { - if (dir == L_WARP_TO_LEFT) { - delrowt = (l_float32)(vmaxt * (wm - j) * (wm - j)) * denom2; - delrowb = (l_float32)(vmaxb * (wm - j) * (wm - j)) * denom2; - } else if (dir == L_WARP_TO_RIGHT) { - delrowt = (l_float32)(vmaxt * j * j) * denom2; - delrowb = (l_float32)(vmaxb * j * j) * denom2; - } - switch (d) - { - case 8: - for (id = 0; id < h; id++) { - dely = (delrowt * (hm - id) + delrowb * id) * denom1; - i = 64 * id - (l_int32)(64.0 * dely); - yp = i / 64; - yf = i & 63; - if (yp < 0 || yp > hm) continue; - lines = datas + yp * wpls; - lined = datad + id * wpld; - if (yp < hm) { - val = ((63 - yf) * GET_DATA_BYTE(lines, j) + - yf * GET_DATA_BYTE(lines + wpls, j) + 31) / 63; - } else { /* yp == hm */ - val = GET_DATA_BYTE(lines, j); - } - SET_DATA_BYTE(lined, j, val); - } - break; - case 32: - for (id = 0; id < h; id++) { - dely = (delrowt * (hm - id) + delrowb * id) * denom1; - i = 64 * id - (l_int32)(64.0 * dely); - yp = i / 64; - yf = i & 63; - if (yp < 0 || yp > hm) continue; - lines = datas + yp * wpls; - lined = datad + id * wpld; - if (yp < hm) { - word0 = *(lines + j); - word1 = *(lines + wpls + j); - rval = ((63 - yf) * ((word0 >> L_RED_SHIFT) & 0xff) + - yf * ((word1 >> L_RED_SHIFT) & 0xff) + 31) / 63; - gval = ((63 - yf) * ((word0 >> L_GREEN_SHIFT) & 0xff) + - yf * ((word1 >> L_GREEN_SHIFT) & 0xff) + 31) / 63; - bval = ((63 - yf) * ((word0 >> L_BLUE_SHIFT) & 0xff) + - yf * ((word1 >> L_BLUE_SHIFT) & 0xff) + 31) / 63; - composeRGBPixel(rval, gval, bval, lined + j); - } else { /* yp == hm */ - lined[j] = lines[j]; - } - } - break; - default: - L_ERROR("invalid depth: %d\n", procName, d); - pixDestroy(&pix); - pixDestroy(&pixd); - return NULL; - } - } - - pixDestroy(&pix); - return pixd; -} - - -/*----------------------------------------------------------------------* - * Stereo from a pair of images * - *----------------------------------------------------------------------*/ -/*! - * \brief pixStereoFromPair() - * - * \param[in] pix1 32 bpp rgb - * \param[in] pix2 32 bpp rgb - * \param[in] rwt, gwt, bwt weighting factors used for each component in - pix1 to determine the output red channel - * \return pixd stereo enhanced, or NULL on error - * - *
- * Notes:
- *      (1) pix1 and pix2 are a pair of stereo images, ideally taken
- *          concurrently in the same plane, with some lateral translation.
- *      (2) The output red channel is determined from %pix1.
- *          The output green and blue channels are taken from the green
- *          and blue channels, respectively, of %pix2.
- *      (3) The weights determine how much of each component in %pix1
- *          goes into the output red channel.  The sum of weights
- *          must be 1.0.  If it's not, we scale the weights to
- *          satisfy this criterion.
- *      (4) The most general pixel mapping allowed here is:
- *            rval = rwt * r1 + gwt * g1 + bwt * b1  (from pix1)
- *            gval = g2   (from pix2)
- *            bval = b2   (from pix2)
- *      (5) The simplest method is to use rwt = 1.0, gwt = 0.0, bwt = 0.0,
- *          but this causes unpleasant visual artifacts with red in the image.
- *          Use of green and blue from %pix1 in the red channel,
- *          instead of red, tends to fix that problem.
- * 
- */ -PIX * -pixStereoFromPair(PIX *pix1, - PIX *pix2, - l_float32 rwt, - l_float32 gwt, - l_float32 bwt) -{ -l_int32 i, j, w, h, wpl1, wpl2, rval, gval, bval; -l_uint32 word1, word2; -l_uint32 *data1, *data2, *datad, *line1, *line2, *lined; -l_float32 sum; -PIX *pixd; - - PROCNAME("pixStereoFromPair"); - - if (!pix1 || !pix2) - return (PIX *)ERROR_PTR("pix1, pix2 not both defined", procName, NULL); - if (pixGetDepth(pix1) != 32 || pixGetDepth(pix2) != 32) - return (PIX *)ERROR_PTR("pix1, pix2 not both 32 bpp", procName, NULL); - - /* Make sure the sum of weights is 1.0; otherwise, you can get - * overflow in the gray value. */ - if (rwt == 0.0 && gwt == 0.0 && bwt == 0.0) { - rwt = DefaultRedWeight; - gwt = DefaultGreenWeight; - bwt = DefaultBlueWeight; - } - sum = rwt + gwt + bwt; - if (L_ABS(sum - 1.0) > 0.0001) { /* maintain ratios with sum == 1.0 */ - L_WARNING("weights don't sum to 1; maintaining ratios\n", procName); - rwt = rwt / sum; - gwt = gwt / sum; - bwt = bwt / sum; - } - - pixGetDimensions(pix1, &w, &h, NULL); - pixd = pixCreateTemplate(pix1); - data1 = pixGetData(pix1); - data2 = pixGetData(pix2); - datad = pixGetData(pixd); - wpl1 = pixGetWpl(pix1); - wpl2 = pixGetWpl(pix2); - for (i = 0; i < h; i++) { - line1 = data1 + i * wpl1; - line2 = data2 + i * wpl2; - lined = datad + i * wpl1; /* wpl1 works for pixd */ - for (j = 0; j < w; j++) { - word1 = *(line1 + j); - word2 = *(line2 + j); - rval = (l_int32)(rwt * ((word1 >> L_RED_SHIFT) & 0xff) + - gwt * ((word1 >> L_GREEN_SHIFT) & 0xff) + - bwt * ((word1 >> L_BLUE_SHIFT) & 0xff) + 0.5); - gval = (word2 >> L_GREEN_SHIFT) & 0xff; - bval = (word2 >> L_BLUE_SHIFT) & 0xff; - composeRGBPixel(rval, gval, bval, lined + j); - } - } - - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/watershed.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/watershed.c deleted file mode 100644 index d9c20fca..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/watershed.c +++ /dev/null @@ -1,1134 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file watershed.c - *
- *
- *      Top-level
- *            L_WSHED         *wshedCreate()
- *            void             wshedDestroy()
- *            l_int32          wshedApply()
- *
- *      Helpers
- *            static l_int32   identifyWatershedBasin()
- *            static l_int32   mergeLookup()
- *            static l_int32   wshedGetHeight()
- *            static void      pushNewPixel()
- *            static void      popNewPixel()
- *            static void      pushWSPixel()
- *            static void      popWSPixel()
- *            static void      debugPrintLUT()
- *            static void      debugWshedMerge()
- *
- *      Output
- *            l_int32          wshedBasins()
- *            PIX             *wshedRenderFill()
- *            PIX             *wshedRenderColors()
- *
- *  The watershed function identifies the "catch basins" of the input
- *  8 bpp image, with respect to the specified seeds or "markers".
- *  The use is in segmentation, but the selection of the markers is
- *  critical to getting meaningful results.
- *
- *  How are the markers selected?  You can't simply use the local
- *  minima, because a typical image has sufficient noise so that
- *  a useful catch basin can easily have multiple local minima.  However
- *  they are selected, the question for the watershed function is
- *  how to handle local minima that are not markers.  The reason
- *  this is important is because of the algorithm used to find the
- *  watersheds, which is roughly like this:
- *
- *    (1) Identify the markers and the local minima, and enter them
- *        into a priority queue based on the pixel value.  Each marker
- *        is shrunk to a single pixel, if necessary, before the
- *        operation starts.
- *    (2) Feed the priority queue with neighbors of pixels that are
- *        popped off the queue.  Each of these queue pixels is labeled
- *        with the index value of its parent.
- *    (3) Each pixel is also labeled, in a 32-bit image, with the marker
- *        or local minimum index, from which it was originally derived.
- *    (4) There are actually 3 classes of labels: seeds, minima, and
- *        fillers.  The fillers are labels of regions that have already
- *        been identified as watersheds and are continuing to fill, for
- *        the purpose of finding higher watersheds.
- *    (5) When a pixel is popped that has already been labeled in the
- *        32-bit image and that label differs from the label of its
- *        parent (stored in the queue pixel), a boundary has been crossed.
- *        There are several cases:
- *         (a) Both parents are derived from markers but at least one
- *             is not deep enough to become a watershed.  Absorb the
- *             shallower basin into the deeper one, fixing the LUT to
- *             redirect the shallower index to the deeper one.
- *         (b) Both parents are derived from markers and both are deep
- *             enough.  Identify and save the watershed for each marker.
- *         (c) One parent was derived from a marker and the other from
- *             a minima: absorb the minima basin into the marker basin.
- *         (d) One parent was derived from a marker and the other is
- *             a filler: identify and save the watershed for the marker.
- *         (e) Both parents are derived from minima: merge them.
- *         (f) One parent is a filler and the other is derived from a
- *             minima: merge the minima into the filler.
- *    (6) The output of the watershed operation consists of:
- *         ~ a pixa of the basins
- *         ~ a pta of the markers
- *         ~ a numa of the watershed levels
- *
- *  Typical usage:
- *      L_WShed *wshed = wshedCreate(pixs, pixseed, mindepth, 0);
- *      wshedApply(wshed);
- *
- *      wshedBasins(wshed, &pixa, &nalevels);
- *        ... do something with pixa, nalevels ...
- *      pixaDestroy(&pixa);
- *      numaDestroy(&nalevels);
- *
- *      Pix *pixd = wshedRenderFill(wshed);
- *
- *      wshedDestroy(&wshed);
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -#ifndef NO_CONSOLE_IO -#define DEBUG_WATERSHED 0 -#endif /* ~NO_CONSOLE_IO */ - -static const l_uint32 MAX_LABEL_VALUE = 0x7fffffff; /* largest l_int32 */ - -/*! New pixel coordinates */ -struct L_NewPixel -{ - l_int32 x; /*!< x coordinate */ - l_int32 y; /*!< y coordinate */ -}; -typedef struct L_NewPixel L_NEWPIXEL; - -/*! Wartshed pixel */ -struct L_WSPixel -{ - l_float32 val; /*!< pixel value */ - l_int32 x; /*!< x coordinate */ - l_int32 y; /*!< y coordinate */ - l_int32 index; /*!< label for set to which pixel belongs */ -}; -typedef struct L_WSPixel L_WSPIXEL; - - - /* Static functions for obtaining bitmap of watersheds */ -static void wshedSaveBasin(L_WSHED *wshed, l_int32 index, l_int32 level); - -static l_int32 identifyWatershedBasin(L_WSHED *wshed, - l_int32 index, l_int32 level, - BOX **pbox, PIX **ppixd); - - /* Static function for merging lut and backlink arrays */ -static l_int32 mergeLookup(L_WSHED *wshed, l_int32 sindex, l_int32 dindex); - - /* Static function for finding the height of the current pixel - above its seed or minima in the watershed. */ -static l_int32 wshedGetHeight(L_WSHED *wshed, l_int32 val, l_int32 label, - l_int32 *pheight); - - /* Static accessors for NewPixel on a queue */ -static void pushNewPixel(L_QUEUE *lq, l_int32 x, l_int32 y, - l_int32 *pminx, l_int32 *pmaxx, - l_int32 *pminy, l_int32 *pmaxy); -static void popNewPixel(L_QUEUE *lq, l_int32 *px, l_int32 *py); - - /* Static accessors for WSPixel on a heap */ -static void pushWSPixel(L_HEAP *lh, L_STACK *stack, l_int32 val, - l_int32 x, l_int32 y, l_int32 index); -static void popWSPixel(L_HEAP *lh, L_STACK *stack, l_int32 *pval, - l_int32 *px, l_int32 *py, l_int32 *pindex); - - /* Static debug print output */ -static void debugPrintLUT(l_int32 *lut, l_int32 size, l_int32 debug); - -static void debugWshedMerge(L_WSHED *wshed, char *descr, l_int32 x, - l_int32 y, l_int32 label, l_int32 index); - - -/*-----------------------------------------------------------------------* - * Top-level watershed * - *-----------------------------------------------------------------------*/ -/*! - * \brief wshedCreate() - * - * \param[in] pixs 8 bpp source - * \param[in] pixm 1 bpp 'marker' seed - * \param[in] mindepth minimum depth; anything less is not saved - * \param[in] debugflag 1 for debug output - * \return WShed, or NULL on error - * - *
- * Notes:
- *      (1) It is not necessary for the fg pixels in the seed image
- *          be at minima, or that they be isolated.  We extract a
- *          single pixel from each connected component, and a seed
- *          anywhere in a watershed will eventually label the watershed
- *          when the filling level reaches it.
- *      (2) Set mindepth to some value to ignore noise in pixs that
- *          can create small local minima.  Any watershed shallower
- *          than mindepth, even if it has a seed, will not be saved;
- *          It will either be incorporated in another watershed or
- *          eliminated.
- * 
- */ -L_WSHED * -wshedCreate(PIX *pixs, - PIX *pixm, - l_int32 mindepth, - l_int32 debugflag) -{ -l_int32 w, h; -L_WSHED *wshed; - - PROCNAME("wshedCreate"); - - if (!pixs) - return (L_WSHED *)ERROR_PTR("pixs is not defined", procName, NULL); - if (pixGetDepth(pixs) != 8) - return (L_WSHED *)ERROR_PTR("pixs is not 8 bpp", procName, NULL); - if (!pixm) - return (L_WSHED *)ERROR_PTR("pixm is not defined", procName, NULL); - if (pixGetDepth(pixm) != 1) - return (L_WSHED *)ERROR_PTR("pixm is not 1 bpp", procName, NULL); - pixGetDimensions(pixs, &w, &h, NULL); - if (pixGetWidth(pixm) != w || pixGetHeight(pixm) != h) - return (L_WSHED *)ERROR_PTR("pixs/m sizes are unequal", procName, NULL); - - if ((wshed = (L_WSHED *)LEPT_CALLOC(1, sizeof(L_WSHED))) == NULL) - return (L_WSHED *)ERROR_PTR("wshed not made", procName, NULL); - - wshed->pixs = pixClone(pixs); - wshed->pixm = pixClone(pixm); - wshed->mindepth = L_MAX(1, mindepth); - wshed->pixlab = pixCreate(w, h, 32); - pixSetAllArbitrary(wshed->pixlab, MAX_LABEL_VALUE); - wshed->pixt = pixCreate(w, h, 1); - wshed->lines8 = pixGetLinePtrs(pixs, NULL); - wshed->linem1 = pixGetLinePtrs(pixm, NULL); - wshed->linelab32 = pixGetLinePtrs(wshed->pixlab, NULL); - wshed->linet1 = pixGetLinePtrs(wshed->pixt, NULL); - wshed->debug = debugflag; - return wshed; -} - - -/*! - * \brief wshedDestroy() - * - * \param[in,out] pwshed will be set to null before returning - * \return void - */ -void -wshedDestroy(L_WSHED **pwshed) -{ -l_int32 i; -L_WSHED *wshed; - - PROCNAME("wshedDestroy"); - - if (pwshed == NULL) { - L_WARNING("ptr address is null!\n", procName); - return; - } - - if ((wshed = *pwshed) == NULL) - return; - - pixDestroy(&wshed->pixs); - pixDestroy(&wshed->pixm); - pixDestroy(&wshed->pixlab); - pixDestroy(&wshed->pixt); - if (wshed->lines8) LEPT_FREE(wshed->lines8); - if (wshed->linem1) LEPT_FREE(wshed->linem1); - if (wshed->linelab32) LEPT_FREE(wshed->linelab32); - if (wshed->linet1) LEPT_FREE(wshed->linet1); - pixaDestroy(&wshed->pixad); - ptaDestroy(&wshed->ptas); - numaDestroy(&wshed->nash); - numaDestroy(&wshed->nasi); - numaDestroy(&wshed->namh); - numaDestroy(&wshed->nalevels); - if (wshed->lut) - LEPT_FREE(wshed->lut); - if (wshed->links) { - for (i = 0; i < wshed->arraysize; i++) - numaDestroy(&wshed->links[i]); - LEPT_FREE(wshed->links); - } - LEPT_FREE(wshed); - *pwshed = NULL; - return; -} - - -/*! - * \brief wshedApply() - * - * \param[in] wshed generated from wshedCreate() - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) N.B. This is buggy!  It seems to locate watersheds that are
- *          duplicates.  The watershed extraction after complete fill
- *          grabs some regions belonging to existing watersheds.
- *          See prog/watershedtest.c for testing.
- * 
- */ -l_ok -wshedApply(L_WSHED *wshed) -{ -char two_new_watersheds[] = "Two new watersheds"; -char seed_absorbed_into_seeded_basin[] = "Seed absorbed into seeded basin"; -char one_new_watershed_label[] = "One new watershed (label)"; -char one_new_watershed_index[] = "One new watershed (index)"; -char minima_absorbed_into_seeded_basin[] = - "Minima absorbed into seeded basin"; -char minima_absorbed_by_filler_or_another[] = - "Minima absorbed by filler or another"; -l_int32 nseeds, nother, nboth, arraysize; -l_int32 i, j, val, x, y, w, h, index, mindepth; -l_int32 imin, imax, jmin, jmax, cindex, clabel, nindex; -l_int32 hindex, hlabel, hmin, hmax, minhindex, maxhindex; -l_int32 *lut; -l_uint32 ulabel, uval; -void **lines8, **linelab32; -NUMA *nalut, *nalevels, *nash, *namh, *nasi; -NUMA **links; -L_HEAP *lh; -PIX *pixmin, *pixsd; -PIXA *pixad; -L_STACK *rstack; -PTA *ptas, *ptao; - - PROCNAME("wshedApply"); - - if (!wshed) - return ERROR_INT("wshed not defined", procName, 1); - - /* ------------------------------------------------------------ * - * Initialize priority queue and pixlab with seeds and minima * - * ------------------------------------------------------------ */ - - lh = lheapCreate(0, L_SORT_INCREASING); /* remove lowest values first */ - rstack = lstackCreate(0); /* for reusing the WSPixels */ - pixGetDimensions(wshed->pixs, &w, &h, NULL); - lines8 = wshed->lines8; /* wshed owns this */ - linelab32 = wshed->linelab32; /* ditto */ - - /* Identify seed (marker) pixels, 1 for each c.c. in pixm */ - pixSelectMinInConnComp(wshed->pixs, wshed->pixm, &ptas, &nash); - pixsd = pixGenerateFromPta(ptas, w, h); - nseeds = ptaGetCount(ptas); - for (i = 0; i < nseeds; i++) { - ptaGetIPt(ptas, i, &x, &y); - uval = GET_DATA_BYTE(lines8[y], x); - pushWSPixel(lh, rstack, (l_int32)uval, x, y, i); - } - wshed->ptas = ptas; - nasi = numaMakeConstant(1, nseeds); /* indicator array */ - wshed->nasi = nasi; - wshed->nash = nash; - wshed->nseeds = nseeds; - - /* Identify minima that are not seeds. Use these 4 steps: - * (1) Get the local minima, which can have components - * of arbitrary size. This will be a clipping mask. - * (2) Get the image of the actual seeds (pixsd) - * (3) Remove all elements of the clipping mask that have a seed. - * (4) Shrink each of the remaining elements of the minima mask - * to a single pixel. */ - pixLocalExtrema(wshed->pixs, 200, 0, &pixmin, NULL); - pixRemoveSeededComponents(pixmin, pixsd, pixmin, 8, 2); - pixSelectMinInConnComp(wshed->pixs, pixmin, &ptao, &namh); - nother = ptaGetCount(ptao); - for (i = 0; i < nother; i++) { - ptaGetIPt(ptao, i, &x, &y); - uval = GET_DATA_BYTE(lines8[y], x); - pushWSPixel(lh, rstack, (l_int32)uval, x, y, nseeds + i); - } - wshed->namh = namh; - - /* ------------------------------------------------------------ * - * Initialize merging lookup tables * - * ------------------------------------------------------------ */ - - /* nalut should always give the current after-merging index. - * links are effectively backpointers: they are numas associated with - * a dest index of all indices in nalut that point to that index. */ - mindepth = wshed->mindepth; - nboth = nseeds + nother; - arraysize = 2 * nboth; - wshed->arraysize = arraysize; - nalut = numaMakeSequence(0, 1, arraysize); - lut = numaGetIArray(nalut); - wshed->lut = lut; /* wshed owns this */ - links = (NUMA **)LEPT_CALLOC(arraysize, sizeof(NUMA *)); - wshed->links = links; /* wshed owns this */ - nindex = nseeds + nother; /* the next unused index value */ - - /* ------------------------------------------------------------ * - * Fill the basins, using the priority queue * - * ------------------------------------------------------------ */ - - pixad = pixaCreate(nseeds); - wshed->pixad = pixad; /* wshed owns this */ - nalevels = numaCreate(nseeds); - wshed->nalevels = nalevels; /* wshed owns this */ - L_INFO("nseeds = %d, nother = %d\n", procName, nseeds, nother); - while (lheapGetCount(lh) > 0) { - popWSPixel(lh, rstack, &val, &x, &y, &index); -/* lept_stderr("x = %d, y = %d, index = %d\n", x, y, index); */ - ulabel = GET_DATA_FOUR_BYTES(linelab32[y], x); - if (ulabel == MAX_LABEL_VALUE) - clabel = ulabel; - else - clabel = lut[ulabel]; - cindex = lut[index]; - if (clabel == cindex) continue; /* have already seen this one */ - if (clabel == MAX_LABEL_VALUE) { /* new one; assign index and try to - * propagate to all neighbors */ - SET_DATA_FOUR_BYTES(linelab32[y], x, cindex); - imin = L_MAX(0, y - 1); - imax = L_MIN(h - 1, y + 1); - jmin = L_MAX(0, x - 1); - jmax = L_MIN(w - 1, x + 1); - for (i = imin; i <= imax; i++) { - for (j = jmin; j <= jmax; j++) { - if (i == y && j == x) continue; - uval = GET_DATA_BYTE(lines8[i], j); - pushWSPixel(lh, rstack, (l_int32)uval, j, i, cindex); - } - } - } else { /* pixel is already labeled (differently); must resolve */ - - /* If both indices are seeds, check if the min height is - * greater than mindepth. If so, we have two new watersheds; - * locate them and assign to both regions a new index - * for further waterfill. If not, absorb the shallower - * watershed into the deeper one and continue filling it. */ - pixGetPixel(pixsd, x, y, &uval); - if (clabel < nseeds && cindex < nseeds) { - wshedGetHeight(wshed, val, clabel, &hlabel); - wshedGetHeight(wshed, val, cindex, &hindex); - hmin = L_MIN(hlabel, hindex); - hmax = L_MAX(hlabel, hindex); - if (hmin == hmax) { - hmin = hlabel; - hmax = hindex; - } - if (wshed->debug) { - lept_stderr("clabel,hlabel = %d,%d\n", clabel, hlabel); - lept_stderr("hmin = %d, hmax = %d\n", hmin, hmax); - lept_stderr("cindex,hindex = %d,%d\n", cindex, hindex); - if (hmin < mindepth) - lept_stderr("Too shallow!\n"); - } - - if (hmin >= mindepth) { - debugWshedMerge(wshed, two_new_watersheds, - x, y, clabel, cindex); - wshedSaveBasin(wshed, cindex, val - 1); - wshedSaveBasin(wshed, clabel, val - 1); - numaSetValue(nasi, cindex, 0); - numaSetValue(nasi, clabel, 0); - - if (wshed->debug) lept_stderr("nindex = %d\n", nindex); - debugPrintLUT(lut, nindex, wshed->debug); - mergeLookup(wshed, clabel, nindex); - debugPrintLUT(lut, nindex, wshed->debug); - mergeLookup(wshed, cindex, nindex); - debugPrintLUT(lut, nindex, wshed->debug); - nindex++; - } else /* extraneous seed within seeded basin; absorb */ { - debugWshedMerge(wshed, seed_absorbed_into_seeded_basin, - x, y, clabel, cindex); - } - maxhindex = clabel; /* TODO: is this part of above 'else'? */ - minhindex = cindex; - if (hindex > hlabel) { - maxhindex = cindex; - minhindex = clabel; - } - mergeLookup(wshed, minhindex, maxhindex); - } else if (clabel < nseeds && cindex >= nboth) { - /* If one index is a seed and the other is a merge of - * 2 watersheds, generate a single watershed. */ - debugWshedMerge(wshed, one_new_watershed_label, - x, y, clabel, cindex); - wshedSaveBasin(wshed, clabel, val - 1); - numaSetValue(nasi, clabel, 0); - mergeLookup(wshed, clabel, cindex); - } else if (cindex < nseeds && clabel >= nboth) { - debugWshedMerge(wshed, one_new_watershed_index, - x, y, clabel, cindex); - wshedSaveBasin(wshed, cindex, val - 1); - numaSetValue(nasi, cindex, 0); - mergeLookup(wshed, cindex, clabel); - } else if (clabel < nseeds) { /* cindex from minima; absorb */ - /* If one index is a seed and the other is from a minimum, - * merge the minimum wshed into the seed wshed. */ - debugWshedMerge(wshed, minima_absorbed_into_seeded_basin, - x, y, clabel, cindex); - mergeLookup(wshed, cindex, clabel); - } else if (cindex < nseeds) { /* clabel from minima; absorb */ - debugWshedMerge(wshed, minima_absorbed_into_seeded_basin, - x, y, clabel, cindex); - mergeLookup(wshed, clabel, cindex); - } else { /* If neither index is a seed, just merge */ - debugWshedMerge(wshed, minima_absorbed_by_filler_or_another, - x, y, clabel, cindex); - mergeLookup(wshed, clabel, cindex); - } - } - } - -#if 0 - /* Use the indicator array to save any watersheds that fill - * to the maximum value. This seems to screw things up! */ - for (i = 0; i < nseeds; i++) { - numaGetIValue(nasi, i, &ival); - if (ival == 1) { - wshedSaveBasin(wshed, lut[i], val - 1); - numaSetValue(nasi, i, 0); - } - } -#endif - - numaDestroy(&nalut); - pixDestroy(&pixmin); - pixDestroy(&pixsd); - ptaDestroy(&ptao); - lheapDestroy(&lh, TRUE); - lstackDestroy(&rstack, TRUE); - return 0; -} - - -/*-----------------------------------------------------------------------* - * Helpers * - *-----------------------------------------------------------------------*/ -/*! - * \brief wshedSaveBasin() - * - * \param[in] wshed - * \param[in] index index of basin to be located - * \param[in] level filling level reached at the time this function - * is called - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This identifies a single watershed.  It does not change
- *          the LUT, which must be done subsequently.
- *      (2) The fill level of a basin is taken to be %level - 1.
- * 
- */ -static void -wshedSaveBasin(L_WSHED *wshed, - l_int32 index, - l_int32 level) -{ -BOX *box; -PIX *pix; - - PROCNAME("wshedSaveBasin"); - - if (!wshed) { - L_ERROR("wshed not defined\n", procName); - return; - } - - if (identifyWatershedBasin(wshed, index, level, &box, &pix) == 0) { - pixaAddPix(wshed->pixad, pix, L_INSERT); - pixaAddBox(wshed->pixad, box, L_INSERT); - numaAddNumber(wshed->nalevels, level - 1); - } - return; -} - - -/*! - * \brief identifyWatershedBasin() - * - * \param[in] wshed - * \param[in] index index of basin to be located - * \param[in] level of basin at point at which the two basins met - * \param[out] pbox bounding box of basin - * \param[out] ppixd pix of basin, cropped to its bounding box - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) This is a static function, so we assume pixlab, pixs and pixt
- *          exist and are the same size.
- *      (2) It selects all pixels that have the label %index in pixlab
- *          and that have a value in pixs that is less than %level.
- *      (3) It is used whenever two seeded basins meet (typically at a saddle),
- *          or when one seeded basin meets a 'filler'.  All identified
- *          basins are saved as a watershed.
- * 
- */ -static l_int32 -identifyWatershedBasin(L_WSHED *wshed, - l_int32 index, - l_int32 level, - BOX **pbox, - PIX **ppixd) -{ -l_int32 imin, imax, jmin, jmax, minx, miny, maxx, maxy; -l_int32 bw, bh, i, j, w, h, x, y; -l_int32 *lut; -l_uint32 label, bval, lval; -void **lines8, **linelab32, **linet1; -BOX *box; -PIX *pixs, *pixt, *pixd; -L_QUEUE *lq; - - PROCNAME("identifyWatershedBasin"); - - if (!pbox) - return ERROR_INT("&box not defined", procName, 1); - *pbox = NULL; - if (!ppixd) - return ERROR_INT("&pixd not defined", procName, 1); - *ppixd = NULL; - if (!wshed) - return ERROR_INT("wshed not defined", procName, 1); - - /* Make a queue and an auxiliary stack */ - lq = lqueueCreate(0); - lq->stack = lstackCreate(0); - - pixs = wshed->pixs; - pixt = wshed->pixt; - lines8 = wshed->lines8; - linelab32 = wshed->linelab32; - linet1 = wshed->linet1; - lut = wshed->lut; - pixGetDimensions(pixs, &w, &h, NULL); - - /* Prime the queue with the seed pixel for this watershed. */ - minx = miny = 1000000; - maxx = maxy = 0; - ptaGetIPt(wshed->ptas, index, &x, &y); - pixSetPixel(pixt, x, y, 1); - pushNewPixel(lq, x, y, &minx, &maxx, &miny, &maxy); - if (wshed->debug) lept_stderr("prime: (x,y) = (%d, %d)\n", x, y); - - /* Each pixel in a spreading breadth-first search is inspected. - * It is accepted as part of this watershed, and pushed on - * the search queue, if: - * (1) It has a label value equal to %index - * (2) The pixel value is less than %level, the overflow - * height at which the two basins join. - * (3) It has not yet been seen in this search. */ - while (lqueueGetCount(lq) > 0) { - popNewPixel(lq, &x, &y); - imin = L_MAX(0, y - 1); - imax = L_MIN(h - 1, y + 1); - jmin = L_MAX(0, x - 1); - jmax = L_MIN(w - 1, x + 1); - for (i = imin; i <= imax; i++) { - for (j = jmin; j <= jmax; j++) { - if (j == x && i == y) continue; /* parent */ - label = GET_DATA_FOUR_BYTES(linelab32[i], j); - if (label == MAX_LABEL_VALUE || lut[label] != index) continue; - bval = GET_DATA_BIT(linet1[i], j); - if (bval == 1) continue; /* already seen */ - lval = GET_DATA_BYTE(lines8[i], j); - if (lval >= level) continue; /* too high */ - SET_DATA_BIT(linet1[i], j); - pushNewPixel(lq, j, i, &minx, &maxx, &miny, &maxy); - } - } - } - - /* Extract the box and pix, and clear pixt */ - bw = maxx - minx + 1; - bh = maxy - miny + 1; - box = boxCreate(minx, miny, bw, bh); - pixd = pixClipRectangle(pixt, box, NULL); - pixRasterop(pixt, minx, miny, bw, bh, PIX_SRC ^ PIX_DST, pixd, 0, 0); - *pbox = box; - *ppixd = pixd; - - lqueueDestroy(&lq, 1); - return 0; -} - - -/*! - * \brief mergeLookup() - * - * \param[in] wshed - * \param[in] sindex primary index being changed in the merge - * \param[in] dindex index that %sindex will point to after the merge - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The links are a sparse array of Numas showing current back-links.
- *          The lut gives the current index (of the seed or the minima
- *          for the wshed  in which it is located.
- *      (2) Think of each entry in the lut.  There are two types:
- *             owner:     lut[index] = index
- *             redirect:  lut[index] != index
- *      (3) This is called each time a merge occurs.  It puts the lut
- *          and backlinks in a canonical form after the merge, where
- *          all entries in the lut point to the current "owner", which
- *          has all backlinks.  That is, every "redirect" in the lut
- *          points to an "owner".  The lut always gives the index of
- *          the current owner.
- * 
- */ -static l_int32 -mergeLookup(L_WSHED *wshed, - l_int32 sindex, - l_int32 dindex) -{ -l_int32 i, n, size, index; -l_int32 *lut; -NUMA *na; -NUMA **links; - - PROCNAME("mergeLookup"); - - if (!wshed) - return ERROR_INT("wshed not defined", procName, 1); - size = wshed->arraysize; - if (sindex < 0 || sindex >= size) - return ERROR_INT("invalid sindex", procName, 1); - if (dindex < 0 || dindex >= size) - return ERROR_INT("invalid dindex", procName, 1); - - /* Redirect links in the lut */ - n = 0; - links = wshed->links; - lut = wshed->lut; - if ((na = links[sindex]) != NULL) { - n = numaGetCount(na); - for (i = 0; i < n; i++) { - numaGetIValue(na, i, &index); - lut[index] = dindex; - } - } - lut[sindex] = dindex; - - /* Shift the backlink arrays from sindex to dindex. - * sindex should have no backlinks because all entries in the - * lut that were previously pointing to it have been redirected - * to dindex. */ - if (!links[dindex]) - links[dindex] = numaCreate(n); - numaJoin(links[dindex], links[sindex], 0, -1); - numaAddNumber(links[dindex], sindex); - numaDestroy(&links[sindex]); - - return 0; -} - - -/*! - * \brief wshedGetHeight() - * - * \param[in] wshed array of current indices - * \param[in] val value of current pixel popped off queue - * \param[in] label of pixel or 32 bpp label image - * \param[out] pheight height of current value from seed - * or minimum of watershed - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) It is only necessary to find the height for a watershed
- *          that is indexed by a seed or a minima.  This function should
- *          not be called on a finished watershed (that continues to fill).
- * 
- */ -static l_int32 -wshedGetHeight(L_WSHED *wshed, - l_int32 val, - l_int32 label, - l_int32 *pheight) -{ -l_int32 minval; - - PROCNAME("wshedGetHeight"); - - if (!pheight) - return ERROR_INT("&height not defined", procName, 1); - *pheight = 0; - if (!wshed) - return ERROR_INT("wshed not defined", procName, 1); - - if (label < wshed->nseeds) - numaGetIValue(wshed->nash, label, &minval); - else if (label < wshed->nseeds + wshed->nother) - numaGetIValue(wshed->namh, label, &minval); - else - return ERROR_INT("finished watershed; should not call", procName, 1); - - *pheight = val - minval; - return 0; -} - - -/* - * \brief pushNewPixel() - * - * \param[in] lqueue - * \param[in] x, y pixel coordinates - * \param[out] pminx, pmaxx, pminy, pmaxy bounding box update - * \return void - * - *
- * Notes:
- *      (1) This is a wrapper for adding a NewPixel to a queue, which
- *          updates the bounding box for all pixels on that queue and
- *          uses the storage stack to retrieve a NewPixel.
- * 
- */ -static void -pushNewPixel(L_QUEUE *lq, - l_int32 x, - l_int32 y, - l_int32 *pminx, - l_int32 *pmaxx, - l_int32 *pminy, - l_int32 *pmaxy) -{ -L_NEWPIXEL *np; - - PROCNAME("pushNewPixel"); - - if (!lq) { - L_ERROR("queue not defined\n", procName); - return; - } - - /* Adjust bounding box */ - *pminx = L_MIN(*pminx, x); - *pmaxx = L_MAX(*pmaxx, x); - *pminy = L_MIN(*pminy, y); - *pmaxy = L_MAX(*pmaxy, y); - - /* Get a newpixel to use */ - if (lstackGetCount(lq->stack) > 0) - np = (L_NEWPIXEL *)lstackRemove(lq->stack); - else - np = (L_NEWPIXEL *)LEPT_CALLOC(1, sizeof(L_NEWPIXEL)); - - np->x = x; - np->y = y; - lqueueAdd(lq, np); - return; -} - - -/* - * \brief popNewPixel() - * - * \param[in] lqueue - * \param[out] px, py pixel coordinates - * \return void - * - *
- * Notes:
- *      (1) This is a wrapper for removing a NewPixel from a queue,
- *          which returns the pixel coordinates and saves the NewPixel
- *          on the storage stack.
- * 
- */ -static void -popNewPixel(L_QUEUE *lq, - l_int32 *px, - l_int32 *py) -{ -L_NEWPIXEL *np; - - PROCNAME("popNewPixel"); - - if (!lq) { - L_ERROR("lqueue not defined\n", procName); - return; - } - - if ((np = (L_NEWPIXEL *)lqueueRemove(lq)) == NULL) - return; - *px = np->x; - *py = np->y; - lstackAdd(lq->stack, np); /* save for re-use */ - return; -} - - -/* - * \brief pushWSPixel() - * - * \param[in] lh priority queue - * \param[in] stack of reusable WSPixels - * \param[in] val pixel value: used for ordering the heap - * \param[in] x, y pixel coordinates - * \param[in] index label for set to which pixel belongs - * \return void - * - *
- * Notes:
- *      (1) This is a wrapper for adding a WSPixel to a heap.  It
- *          uses the storage stack to retrieve a WSPixel.
- * 
- */ -static void -pushWSPixel(L_HEAP *lh, - L_STACK *stack, - l_int32 val, - l_int32 x, - l_int32 y, - l_int32 index) -{ -L_WSPIXEL *wsp; - - PROCNAME("pushWSPixel"); - - if (!lh) { - L_ERROR("heap not defined\n", procName); - return; - } - if (!stack) { - L_ERROR("stack not defined\n", procName); - return; - } - - /* Get a wspixel to use */ - if (lstackGetCount(stack) > 0) - wsp = (L_WSPIXEL *)lstackRemove(stack); - else - wsp = (L_WSPIXEL *)LEPT_CALLOC(1, sizeof(L_WSPIXEL)); - - wsp->val = (l_float32)val; - wsp->x = x; - wsp->y = y; - wsp->index = index; - lheapAdd(lh, wsp); - return; -} - - -/* - * \brief popWSPixel() - * - * \param[in] lh priority queue - * \param[in] stack of reusable WSPixels - * \param[out] pval pixel value - * \param[out] px, py pixel coordinates - * \param[out] pindex label for set to which pixel belongs - * \return void - * - *
- * Notes:
- *      (1) This is a wrapper for removing a WSPixel from a heap,
- *          which returns the WSPixel data and saves the WSPixel
- *          on the storage stack.
- * 
- */ -static void -popWSPixel(L_HEAP *lh, - L_STACK *stack, - l_int32 *pval, - l_int32 *px, - l_int32 *py, - l_int32 *pindex) -{ -L_WSPIXEL *wsp; - - PROCNAME("popWSPixel"); - - if (!lh) { - L_ERROR("lheap not defined\n", procName); - return; - } - if (!stack) { - L_ERROR("stack not defined\n", procName); - return; - } - if (!pval || !px || !py || !pindex) { - L_ERROR("data can't be returned\n", procName); - return; - } - - if ((wsp = (L_WSPIXEL *)lheapRemove(lh)) == NULL) - return; - *pval = (l_int32)wsp->val; - *px = wsp->x; - *py = wsp->y; - *pindex = wsp->index; - lstackAdd(stack, wsp); /* save for re-use */ - return; -} - - -static void -debugPrintLUT(l_int32 *lut, - l_int32 size, - l_int32 debug) -{ -l_int32 i; - - if (!debug) return; - lept_stderr("lut: "); - for (i = 0; i < size; i++) - lept_stderr( "%d ", lut[i]); - lept_stderr("\n"); - return; -} - - -static void -debugWshedMerge(L_WSHED *wshed, - char *descr, - l_int32 x, - l_int32 y, - l_int32 label, - l_int32 index) -{ - if (!wshed || (wshed->debug == 0)) - return; - lept_stderr("%s:\n", descr); - lept_stderr(" (x, y) = (%d, %d)\n", x, y); - lept_stderr(" clabel = %d, cindex = %d\n", label, index); - return; -} - - -/*-----------------------------------------------------------------------* - * Output * - *-----------------------------------------------------------------------*/ -/*! - * \brief wshedBasins() - * - * \param[in] wshed - * \param[out] ppixa [optional] mask of watershed basins - * \param[out] pnalevels [optional] watershed levels - * \return 0 if OK, 1 on error - */ -l_ok -wshedBasins(L_WSHED *wshed, - PIXA **ppixa, - NUMA **pnalevels) -{ - PROCNAME("wshedBasins"); - - if (!wshed) - return ERROR_INT("wshed not defined", procName, 1); - - if (ppixa) - *ppixa = pixaCopy(wshed->pixad, L_CLONE); - if (pnalevels) - *pnalevels = numaClone(wshed->nalevels); - return 0; -} - - -/*! - * \brief wshedRenderFill() - * - * \param[in] wshed - * \return pixd initial image with all basins filled, or NULL on error - */ -PIX * -wshedRenderFill(L_WSHED *wshed) -{ -l_int32 i, n, level, bx, by; -NUMA *na; -PIX *pix, *pixd; -PIXA *pixa; - - PROCNAME("wshedRenderFill"); - - if (!wshed) - return (PIX *)ERROR_PTR("wshed not defined", procName, NULL); - - wshedBasins(wshed, &pixa, &na); - pixd = pixCopy(NULL, wshed->pixs); - n = pixaGetCount(pixa); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - pixaGetBoxGeometry(pixa, i, &bx, &by, NULL, NULL); - numaGetIValue(na, i, &level); - pixPaintThroughMask(pixd, pix, bx, by, level); - pixDestroy(&pix); - } - - pixaDestroy(&pixa); - numaDestroy(&na); - return pixd; -} - - -/*! - * \brief wshedRenderColors() - * - * \param[in] wshed - * \return pixd initial image with all basins filled, or null on error - */ -PIX * -wshedRenderColors(L_WSHED *wshed) -{ -l_int32 w, h; -PIX *pixg, *pixt, *pixc, *pixm, *pixd; -PIXA *pixa; - - PROCNAME("wshedRenderColors"); - - if (!wshed) - return (PIX *)ERROR_PTR("wshed not defined", procName, NULL); - - wshedBasins(wshed, &pixa, NULL); - pixg = pixCopy(NULL, wshed->pixs); - pixGetDimensions(wshed->pixs, &w, &h, NULL); - pixd = pixConvertTo32(pixg); - pixt = pixaDisplayRandomCmap(pixa, w, h); - pixc = pixConvertTo32(pixt); - pixm = pixaDisplay(pixa, w, h); - pixCombineMasked(pixd, pixc, pixm); - - pixDestroy(&pixg); - pixDestroy(&pixt); - pixDestroy(&pixc); - pixDestroy(&pixm); - pixaDestroy(&pixa); - return pixd; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/watershed.h b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/watershed.h deleted file mode 100644 index d6b20775..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/watershed.h +++ /dev/null @@ -1,64 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -#ifndef LEPTONICA_WATERSHED_H -#define LEPTONICA_WATERSHED_H - -/*! - * \file watershed.h - * - * Simple data structure to hold watershed data. - * All data here is owned by the L_WShed and must be freed. - */ - -/*! Simple data structure to hold watershed data. */ -struct L_WShed -{ - struct Pix *pixs; /*!< clone of input 8 bpp pixs */ - struct Pix *pixm; /*!< clone of input 1 bpp seed (marker) pixm */ - l_int32 mindepth; /*!< minimum depth allowed for a watershed */ - struct Pix *pixlab; /*!< 16 bpp label pix */ - struct Pix *pixt; /*!< scratch pix for computing wshed regions */ - void **lines8; /*!< line ptrs for pixs */ - void **linem1; /*!< line ptrs for pixm */ - void **linelab32; /*!< line ptrs for pixlab */ - void **linet1; /*!< line ptrs for pixt */ - struct Pixa *pixad; /*!< result: 1 bpp pixa of watersheds */ - struct Pta *ptas; /*!< pta of initial seed pixels */ - struct Numa *nasi; /*!< numa of seed indicators; 0 if completed */ - struct Numa *nash; /*!< numa of initial seed heights */ - struct Numa *namh; /*!< numa of initial minima heights */ - struct Numa *nalevels; /*!< result: numa of watershed levels */ - l_int32 nseeds; /*!< number of seeds (markers) */ - l_int32 nother; /*!< number of minima different from seeds */ - l_int32 *lut; /*!< lut for pixel indices */ - struct Numa **links; /*!< back-links into lut, for updates */ - l_int32 arraysize; /*!< size of links array */ - l_int32 debug; /*!< set to 1 for debug output */ -}; -typedef struct L_WShed L_WSHED; - -#endif /* LEPTONICA_WATERSHED_H */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/webpanimio.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/webpanimio.c deleted file mode 100644 index c47b5643..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/webpanimio.c +++ /dev/null @@ -1,273 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file webpanimio.c - *
- *
- *    Writing animated WebP
- *          l_int32          pixaWriteWebPAnim()
- *          l_int32          pixaWriteStreamWebPAnim()
- *          l_int32          pixaWriteMemWebPAnim()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* -----------------------------------------------*/ -#if HAVE_LIBWEBP_ANIM /* defined in environ.h */ -/* -----------------------------------------------*/ -#include "webp/decode.h" -#include "webp/encode.h" -#include "webp/mux.h" -#include "webp/demux.h" - -/*---------------------------------------------------------------------* - * Writing animated WebP * - *---------------------------------------------------------------------*/ -/*! - * \brief pixaWriteWebPAnim() - * - * \param[in] filename - * \param[in] pixa with images of all depths; cmap OK - * \param[in] loopcount [0 for infinite] - * \param[in] duration in ms, for each image - * \param[in] quality 0 - 100 for lossy; default ~80 - * \param[in] lossless use 1 for lossless; 0 for lossy - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Special top-level function allowing specification of quality.
- * 
- */ -l_ok -pixaWriteWebPAnim(const char *filename, - PIXA *pixa, - l_int32 loopcount, - l_int32 duration, - l_int32 quality, - l_int32 lossless) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("pixaWriteWebPAnim"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "wb+")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = pixaWriteStreamWebPAnim(fp, pixa, loopcount, duration, - quality, lossless); - fclose(fp); - if (ret) - return ERROR_INT("pixs not compressed to stream", procName, 1); - return 0; -} - - -/*! - * \brief pixaWriteStreamWebPAnim() - * - * \param[in] fp file stream - * \param[in] pixa with images of all depths; cmap OK - * \param[in] loopcount [0 for infinite] - * \param[in] duration in ms, for each image - * \param[in] quality 0 - 100 for lossy; default ~80 - * \param[in] lossless use 1 for lossless; 0 for lossy - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See pixWriteMemWebP() for details.
- *      (2) Use 'free', and not leptonica's 'LEPT_FREE', for all heap data
- *          that is returned from the WebP library.
- * 
- */ -l_ok -pixaWriteStreamWebPAnim(FILE *fp, - PIXA *pixa, - l_int32 loopcount, - l_int32 duration, - l_int32 quality, - l_int32 lossless) -{ -l_uint8 *filedata; -size_t filebytes, nbytes; - - PROCNAME("pixaWriteStreamWebpAnim"); - - if (!fp) - return ERROR_INT("stream not open", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - filedata = NULL; - pixaWriteMemWebPAnim(&filedata, &filebytes, pixa, loopcount, - duration, quality, lossless); - rewind(fp); - if (!filedata) - return ERROR_INT("filedata not made", procName, 1); - nbytes = fwrite(filedata, 1, filebytes, fp); - free(filedata); - if (nbytes != filebytes) - return ERROR_INT("Write error", procName, 1); - return 0; -} - - -/*! - * \brief pixaWriteMemWebPAnim() - * - * \param[out] pencdata webp encoded data of pixs - * \param[out] pencsize size of webp encoded data - * \param[in] pixa with images of any depth, cmapped OK - * \param[in] loopcount [0 for infinite] - * \param[in] duration in ms, for each image - * \param[in] quality 0 - 100 for lossy; default ~80 - * \param[in] lossless use 1 for lossless; 0 for lossy - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See pixWriteMemWebP() for details of webp encoding of images.
- * 
- */ -l_ok -pixaWriteMemWebPAnim(l_uint8 **pencdata, - size_t *pencsize, - PIXA *pixa, - l_int32 loopcount, - l_int32 duration, - l_int32 quality, - l_int32 lossless) -{ -l_int32 i, n, same, w, h, wpl, ret; -l_uint8 *data; -PIX *pix1, *pix2; -WebPAnimEncoder *enc; -WebPAnimEncoderOptions enc_options; -WebPConfig config; -WebPData webp_data; -WebPMux *mux = NULL; -WebPMuxAnimParams newparams; -WebPPicture frame; - - PROCNAME("pixaWriteMemWebPAnim"); - - if (!pencdata) - return ERROR_INT("&encdata not defined", procName, 1); - *pencdata = NULL; - if (!pencsize) - return ERROR_INT("&encsize not defined", procName, 1); - *pencsize = 0; - if (!pixa) - return ERROR_INT("&pixa not defined", procName, 1); - if ((n = pixaGetCount(pixa)) == 0) - return ERROR_INT("no images in pixa", procName, 1); - if (loopcount < 0) loopcount = 0; - if (lossless == 0 && (quality < 0 || quality > 100)) - return ERROR_INT("quality not in [0 ... 100]", procName, 1); - - pixaVerifyDimensions(pixa, &same, &w, &h); - if (!same) - return ERROR_INT("sizes of all pix are not the same", procName, 1); - - /* Set up the encoder */ - WebPAnimEncoderOptionsInit(&enc_options); - enc = WebPAnimEncoderNew(w, h, &enc_options); - - for (i = 0; i < n; i++) { - /* Make a frame for each image. Convert the pix to RGBA with - * an opaque alpha layer, and put the raster data in the frame. */ - pix1 = pixaGetPix(pixa, i, L_CLONE); - pix2 = pixConvertTo32(pix1); - pixSetComponentArbitrary(pix2, L_ALPHA_CHANNEL, 255); - pixEndianByteSwap(pix2); - data = (l_uint8 *)pixGetData(pix2); - wpl = pixGetWpl(pix2); - WebPPictureInit(&frame); - frame.width = w; - frame.height = h; - WebPPictureImportRGBA(&frame, data, 4 * wpl); - pixDestroy(&pix1); - pixDestroy(&pix2); - - /* Add the frame data to the encoder, and clear its memory */ - WebPConfigInit(&config); - config.lossless = lossless; - config.quality = quality; - WebPAnimEncoderAdd(enc, &frame, duration * i, &config); - WebPPictureFree(&frame); - } - WebPAnimEncoderAdd(enc, NULL, duration * i, NULL); /* add a blank frame */ - WebPAnimEncoderAssemble(enc, &webp_data); /* encode the data */ - WebPAnimEncoderDelete(enc); - - /* Set the loopcount if requested. Note that when you make a mux, - * it imports the webp_data that was previously made, including - * the webp encoded images. Before you re-export that data using - * WebPMuxAssemble(), free the heap data in webp_data. There is an - * example for setting the loop count in the webp distribution; - * see gif2webp.c. */ - if (loopcount > 0) { - mux = WebPMuxCreate(&webp_data, 1); - if (!mux) { - L_ERROR("could not re-mux to add loop count\n", procName); - } else { - ret = WebPMuxGetAnimationParams(mux, &newparams); - if (ret != WEBP_MUX_OK) { - L_ERROR("failed to get loop count\n", procName); - } else { - newparams.loop_count = loopcount; - ret = WebPMuxSetAnimationParams(mux, &newparams); - if (ret != WEBP_MUX_OK) - L_ERROR("failed to set loop count\n", procName); - } - WebPDataClear(&webp_data); - WebPMuxAssemble(mux, &webp_data); - WebPMuxDelete(mux); - } - } - - *pencdata = (l_uint8 *)webp_data.bytes; - *pencsize = webp_data.size; - L_INFO("data size = %zu\n", procName, webp_data.size); - return 0; -} - - -/* --------------------------------------------*/ -#endif /* HAVE_LIBWEBP_ANIM */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/webpanimiostub.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/webpanimiostub.c deleted file mode 100644 index b186196f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/webpanimiostub.c +++ /dev/null @@ -1,71 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file webpanimiostub.c - *
- *
- *     Stubs for webpanimio.c functions
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* -----------------------------------------------*/ -#if !HAVE_LIBWEBP_ANIM /* defined in environ.h */ -/* -----------------------------------------------*/ - -l_ok pixaWriteWebPAnim(const char *filename, PIXA *pixa, l_int32 loopcount, - l_int32 duration, l_int32 quality, l_int32 lossless) -{ - return ERROR_INT("function not present", "pixaWriteWebPAnim", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixaWriteStreamWebPAnim(FILE *fp, PIXA *pixa, l_int32 loopcount, - l_int32 duration, l_int32 quality, - l_int32 lossless) -{ - return ERROR_INT("function not present", "pixaWriteStreamWebPAnim", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixaWriteMemWebPAnim(l_uint8 **pencdata, size_t *pencsize, PIXA *pixa, - l_int32 loopcount, l_int32 duration, - l_int32 quality, l_int32 lossless) -{ - return ERROR_INT("function not present", "pixaWriteMemWebPAnim", 1); -} - -/* --------------------------------------------*/ -#endif /* !HAVE_LIBWEBP_ANIM */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/webpio.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/webpio.c deleted file mode 100644 index db7f8b3e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/webpio.c +++ /dev/null @@ -1,417 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file webpio.c - *
- *
- *    Reading WebP
- *          PIX             *pixReadStreamWebP()
- *          PIX             *pixReadMemWebP()
- *
- *    Reading WebP header
- *          l_int32          readHeaderWebP()
- *          l_int32          readHeaderMemWebP()
- *
- *    Writing WebP
- *          l_int32          pixWriteWebP()  [ special top level ]
- *          l_int32          pixWriteStreamWebP()
- *          l_int32          pixWriteMemWebP()
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* --------------------------------------------*/ -#if HAVE_LIBWEBP /* defined in environ.h */ -/* --------------------------------------------*/ -#include "webp/decode.h" -#include "webp/encode.h" - -/*---------------------------------------------------------------------* - * Reading WebP * - *---------------------------------------------------------------------*/ -/*! - * \brief pixReadStreamWebP() - * - * \param[in] fp file stream corresponding to WebP image - * \return pix 32 bpp, or NULL on error - */ -PIX * -pixReadStreamWebP(FILE *fp) -{ -l_uint8 *filedata; -size_t filesize; -PIX *pix; - - PROCNAME("pixReadStreamWebP"); - - if (!fp) - return (PIX *)ERROR_PTR("fp not defined", procName, NULL); - - /* Read data from file and decode into Y,U,V arrays */ - rewind(fp); - if ((filedata = l_binaryReadStream(fp, &filesize)) == NULL) - return (PIX *)ERROR_PTR("filedata not read", procName, NULL); - - pix = pixReadMemWebP(filedata, filesize); - LEPT_FREE(filedata); - return pix; -} - - -/*! - * \brief pixReadMemWebP() - * - * \param[in] filedata webp compressed data in memory - * \param[in] filesize number of bytes in data - * \return pix 32 bpp, or NULL on error - * - *
- * Notes:
- *      (1) When the encoded data only has 3 channels (no alpha),
- *          WebPDecodeRGBAInto() generates a raster of 32-bit pixels, with
- *          the alpha channel set to opaque (255).
- *      (2) We don't need to use the gnu runtime functions like fmemopen()
- *          for redirecting data from a stream to memory, because
- *          the webp library has been written with memory-to-memory
- *          functions at the lowest level (which is good!).  And, in
- *          any event, fmemopen() doesn't work with l_binaryReadStream().
- * 
- */ -PIX * -pixReadMemWebP(const l_uint8 *filedata, - size_t filesize) -{ -l_uint8 *out = NULL; -l_int32 w, h, has_alpha, wpl, stride; -l_uint32 *data; -size_t size; -PIX *pix; -WebPBitstreamFeatures features; - - PROCNAME("pixReadMemWebP"); - - if (!filedata) - return (PIX *)ERROR_PTR("filedata not defined", procName, NULL); - - if (WebPGetFeatures(filedata, filesize, &features)) - return (PIX *)ERROR_PTR("Invalid WebP file", procName, NULL); - w = features.width; - h = features.height; - has_alpha = features.has_alpha; - - /* Write from compressed Y,U,V arrays to pix raster data */ - pix = pixCreate(w, h, 32); - pixSetInputFormat(pix, IFF_WEBP); - if (has_alpha) pixSetSpp(pix, 4); - data = pixGetData(pix); - wpl = pixGetWpl(pix); - stride = wpl * 4; - size = (size_t)stride * h; - out = WebPDecodeRGBAInto(filedata, filesize, (uint8_t *)data, size, - stride); - if (out == NULL) { /* error: out should also point to data */ - pixDestroy(&pix); - return (PIX *)ERROR_PTR("WebP decode failed", procName, NULL); - } - - /* The WebP API expects data in RGBA order. The pix stores - * in host-dependent order with R as the MSB and A as the LSB. - * On little-endian machines, the bytes in the word must - * be swapped; e.g., R goes from byte 0 (LSB) to byte 3 (MSB). - * No swapping is necessary for big-endians. */ - pixEndianByteSwap(pix); - return pix; -} - - -/*! - * \brief readHeaderWebP() - * - * \param[in] filename - * \param[out] pw width - * \param[out] ph height - * \param[out] pspp spp (3 or 4) - * \return 0 if OK, 1 on error - */ -l_ok -readHeaderWebP(const char *filename, - l_int32 *pw, - l_int32 *ph, - l_int32 *pspp) -{ -l_uint8 data[100]; /* expect size info within the first 50 bytes or so */ -l_int32 nbytes, bytesread; -size_t filesize; -FILE *fp; - - PROCNAME("readHeaderWebP"); - - if (!pw || !ph || !pspp) - return ERROR_INT("input ptr(s) not defined", procName, 1); - *pw = *ph = *pspp = 0; - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - - /* Read no more than 100 bytes from the file */ - if ((filesize = nbytesInFile(filename)) == 0) - return ERROR_INT("no file size found", procName, 1); - if (filesize < 100) - L_WARNING("very small webp file\n", procName); - nbytes = L_MIN(filesize, 100); - if ((fp = fopenReadStream(filename)) == NULL) - return ERROR_INT("image file not found", procName, 1); - bytesread = fread(data, 1, nbytes, fp); - fclose(fp); - if (bytesread != nbytes) - return ERROR_INT("failed to read requested data", procName, 1); - - return readHeaderMemWebP(data, nbytes, pw, ph, pspp); -} - - -/*! - * \brief readHeaderMemWebP() - * - * \param[in] data - * \param[in] size 100 bytes is sufficient - * \param[out] pw width - * \param[out] ph height - * \param[out] pspp spp (3 or 4) - * \return 0 if OK, 1 on error - */ -l_ok -readHeaderMemWebP(const l_uint8 *data, - size_t size, - l_int32 *pw, - l_int32 *ph, - l_int32 *pspp) -{ -WebPBitstreamFeatures features; - - PROCNAME("readHeaderWebP"); - - if (pw) *pw = 0; - if (ph) *ph = 0; - if (pspp) *pspp = 0; - if (!data) - return ERROR_INT("data not defined", procName, 1); - if (!pw || !ph || !pspp) - return ERROR_INT("input ptr(s) not defined", procName, 1); - - if (WebPGetFeatures(data, (l_int32)size, &features)) - return ERROR_INT("invalid WebP file", procName, 1); - *pw = features.width; - *ph = features.height; - *pspp = (features.has_alpha) ? 4 : 3; - return 0; -} - - -/*---------------------------------------------------------------------* - * Writing WebP * - *---------------------------------------------------------------------*/ -/*! - * \brief pixWriteWebP() - * - * \param[in] filename - * \param[in] pixs - * \param[in] quality 0 - 100; default ~80 - * \param[in] lossless use 1 for lossless; 0 for lossy - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Special top-level function allowing specification of quality.
- * 
- */ -l_ok -pixWriteWebP(const char *filename, - PIX *pixs, - l_int32 quality, - l_int32 lossless) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("pixWriteWebP"); - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - - if ((fp = fopenWriteStream(filename, "wb+")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - ret = pixWriteStreamWebP(fp, pixs, quality, lossless); - fclose(fp); - if (ret) - return ERROR_INT("pixs not compressed to stream", procName, 1); - return 0; -} - - -/*! - * \brief pixWriteStreampWebP() - * - * \param[in] fp file stream - * \param[in] pixs all depths - * \param[in] quality 0 - 100; default ~80 - * \param[in] lossless use 1 for lossless; 0 for lossy - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) See pixWriteMemWebP() for details.
- *      (2) Use 'free', and not leptonica's 'LEPT_FREE', for all heap data
- *          that is returned from the WebP library.
- * 
- */ -l_ok -pixWriteStreamWebP(FILE *fp, - PIX *pixs, - l_int32 quality, - l_int32 lossless) -{ -l_uint8 *filedata; -size_t filebytes, nbytes; - - PROCNAME("pixWriteStreamWebP"); - - if (!fp) - return ERROR_INT("stream not open", procName, 1); - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - - pixSetPadBits(pixs, 0); - pixWriteMemWebP(&filedata, &filebytes, pixs, quality, lossless); - rewind(fp); - nbytes = fwrite(filedata, 1, filebytes, fp); - free(filedata); - if (nbytes != filebytes) - return ERROR_INT("Write error", procName, 1); - return 0; -} - - -/*! - * \brief pixWriteMemWebP() - * - * \param[out] pencdata webp encoded data of pixs - * \param[out] pencsize size of webp encoded data - * \param[in] pixs any depth, cmapped OK - * \param[in] quality 0 - 100; default ~80 - * \param[in] lossless use 1 for lossless; 0 for lossy - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) Lossless and lossy encoding are entirely different in webp.
- *          %quality applies to lossy, and is ignored for lossless.
- *      (2) The input image is converted to RGB if necessary.  If spp == 3,
- *          we set the alpha channel to fully opaque (255), and
- *          WebPEncodeRGBA() then removes the alpha chunk when encoding,
- *          setting the internal header field has_alpha to 0.
- * 
- */ -l_ok -pixWriteMemWebP(l_uint8 **pencdata, - size_t *pencsize, - PIX *pixs, - l_int32 quality, - l_int32 lossless) -{ -l_int32 w, h, d, wpl, stride; -l_uint32 *data; -PIX *pix1, *pix2; - - PROCNAME("pixWriteMemWebP"); - - if (!pencdata) - return ERROR_INT("&encdata not defined", procName, 1); - *pencdata = NULL; - if (!pencsize) - return ERROR_INT("&encsize not defined", procName, 1); - *pencsize = 0; - if (!pixs) - return ERROR_INT("&pixs not defined", procName, 1); - if (lossless == 0 && (quality < 0 || quality > 100)) - return ERROR_INT("quality not in [0 ... 100]", procName, 1); - - if ((pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_FULL_COLOR)) == NULL) - return ERROR_INT("failure to remove color map", procName, 1); - - /* Convert to rgb if not 32 bpp; pix2 must not be a clone of pixs. */ - if (pixGetDepth(pix1) != 32) - pix2 = pixConvertTo32(pix1); - else - pix2 = pixCopy(NULL, pix1); - pixDestroy(&pix1); - pixGetDimensions(pix2, &w, &h, &d); - if (w <= 0 || h <= 0 || d != 32) { - pixDestroy(&pix2); - return ERROR_INT("pix2 not 32 bpp or of 0 size", procName, 1); - } - - /* If spp == 3, need to set alpha layer to opaque (all 1s). */ - if (pixGetSpp(pix2) == 3) - pixSetComponentArbitrary(pix2, L_ALPHA_CHANNEL, 255); - - /* The WebP API expects data in RGBA order. The pix stores - * in host-dependent order with R as the MSB and A as the LSB. - * On little-endian machines, the bytes in the word must - * be swapped; e.g., R goes from byte 0 (LSB) to byte 3 (MSB). - * No swapping is necessary for big-endians. */ - pixEndianByteSwap(pix2); - wpl = pixGetWpl(pix2); - data = pixGetData(pix2); - stride = wpl * 4; - if (lossless) { - *pencsize = WebPEncodeLosslessRGBA((uint8_t *)data, w, h, - stride, pencdata); - } else { - *pencsize = WebPEncodeRGBA((uint8_t *)data, w, h, stride, - quality, pencdata); - } - pixDestroy(&pix2); - - if (*pencsize == 0) { - free(*pencdata); - *pencdata = NULL; - return ERROR_INT("webp encoding failed", procName, 1); - } - - return 0; -} - -/* --------------------------------------------*/ -#endif /* HAVE_LIBWEBP */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/webpiostub.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/webpiostub.c deleted file mode 100644 index 955ac085..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/webpiostub.c +++ /dev/null @@ -1,99 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file webpiostub.c - *
- *
- *     Stubs for webpio.c functions
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* --------------------------------------------*/ -#if !HAVE_LIBWEBP /* defined in environ.h */ -/* --------------------------------------------*/ - -PIX * pixReadStreamWebP(FILE *fp) -{ - return (PIX * )ERROR_PTR("function not present", "pixReadStreamWebP", NULL); -} - -/* ----------------------------------------------------------------------*/ - -PIX * pixReadMemWebP(const l_uint8 *filedata, size_t filesize) -{ - return (PIX * )ERROR_PTR("function not present", "pixReadMemWebP", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_ok readHeaderWebP(const char *filename, l_int32 *pw, l_int32 *ph, - l_int32 *pspp) -{ - return ERROR_INT("function not present", "readHeaderWebP", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok readHeaderMemWebP(const l_uint8 *data, size_t size, - l_int32 *pw, l_int32 *ph, l_int32 *pspp) -{ - return ERROR_INT("function not present", "readHeaderMemWebP", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteWebP(const char *filename, PIX *pixs, l_int32 quality, - l_int32 lossless) -{ - return ERROR_INT("function not present", "pixWriteWebP", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteStreamWebP(FILE *fp, PIX *pixs, l_int32 quality, - l_int32 lossless) -{ - return ERROR_INT("function not present", "pixWriteStreamWebP", 1); -} - -/* ----------------------------------------------------------------------*/ - -l_ok pixWriteMemWebP(l_uint8 **pencdata, size_t *pencsize, PIX *pixs, - l_int32 quality, l_int32 lossless) -{ - return ERROR_INT("function not present", "pixWriteMemWebP", 1); -} - -/* --------------------------------------------*/ -#endif /* !HAVE_LIBWEBP */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/writefile.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/writefile.c deleted file mode 100644 index d66b6072..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/writefile.c +++ /dev/null @@ -1,1543 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001-2016 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/* - * writefile.c - * - * Set jpeg quality for pixWrite() and pixWriteMem() - * l_int32 l_jpegSetQuality() - * - * Set global variable LeptDebugOK for writing to named temp files - * l_int32 setLeptDebugOK() - * - * High-level procedures for writing images to file: - * l_int32 pixaWriteFiles() - * l_int32 pixWriteDebug() - * l_int32 pixWrite() - * l_int32 pixWriteAutoFormat() - * l_int32 pixWriteStream() - * l_int32 pixWriteImpliedFormat() - * - * Selection of output format if default is requested - * l_int32 pixChooseOutputFormat() - * l_int32 getImpliedFileFormat() - * l_int32 pixGetAutoFormat() - * const char *getFormatExtension() - * - * Write to memory - * l_int32 pixWriteMem() - * - * Image display for debugging - * l_int32 l_fileDisplay() - * l_int32 pixDisplay() - * l_int32 pixDisplayWithTitle() - * PIX *pixMakeColorSquare() - * void l_chooseDisplayProg() - * - * Change format for missing library - * void changeFormatForMissingLib() - * - * Deprecated pix output for debugging - * l_int32 pixDisplayWrite() -- still used in tesseract 3.05 - * l_int32 pixSaveTiled() - * l_int32 pixSaveTiledOutline() - * l_int32 pixSaveTiledWithText() - * - * Supported file formats: - * (1) Writing is supported without any external libraries: - * bmp - * pnm (including pbm, pgm, etc) - * spix (raw serialized) - * (2) Writing is supported with installation of external libraries: - * png - * jpg (standard jfif version) - * tiff (including most varieties of compression) - * gif - * webp - * jp2 (jpeg2000) - * (3) Writing is supported through special interfaces: - * ps (PostScript, in psio1.c, psio2.c): - * level 1 (uncompressed) - * level 2 (g4 and dct encoding: requires tiff, jpg) - * level 3 (g4, dct and flate encoding: requires tiff, jpg, zlib) - * pdf (PDF, in pdfio.c): - * level 1 (g4 and dct encoding: requires tiff, jpg) - * level 2 (g4, dct and flate encoding: requires tiff, jpg, zlib) - */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include "allheaders.h" - -#if WINAPI_FAMILY_APP -#include -#endif - -#ifdef __APPLE__ -#include "TargetConditionals.h" -#endif - - /* Display program (xv, xli, xzgv, open) to be invoked by pixDisplay() */ -#ifdef _WIN32 -static l_int32 var_DISPLAY_PROG = L_DISPLAY_WITH_IV; /* default */ -#elif defined(__APPLE__) -static l_int32 var_DISPLAY_PROG = L_DISPLAY_WITH_OPEN; /* default */ -#else -static l_int32 var_DISPLAY_PROG = L_DISPLAY_WITH_XZGV; /* default */ -#endif /* _WIN32 */ - -static const l_int32 Bufsize = 512; -static const l_int32 MaxDisplayWidth = 1000; -static const l_int32 MaxDisplayHeight = 800; -static const l_int32 MaxSizeForPng = 200; - - /* PostScript output for printing */ -static const l_float32 DefaultScaling = 1.0; - - /* Global array of image file format extension names. */ - /* This is in 1-1 corrspondence with format enum in imageio.h. */ - /* The empty string at the end represents the serialized format, */ - /* which has no recognizable extension name, but the array must */ - /* be padded to agree with the format enum. */ - /* (Note on 'const': The size of the array can't be defined 'const' */ - /* because that makes it static. The 'const' in the definition of */ - /* the array refers to the strings in the array; the ptr to the */ - /* array is not const and can be used 'extern' in other files.) */ -LEPT_DLL l_int32 NumImageFileFormatExtensions = 20; /* array size */ -LEPT_DLL const char *ImageFileFormatExtensions[] = - {"unknown", - "bmp", - "jpg", - "png", - "tif", - "tif", - "tif", - "tif", - "tif", - "tif", - "tif", - "pnm", - "ps", - "gif", - "jp2", - "webp", - "pdf", - "tif", - "default", - ""}; - - /* Local map of image file name extension to output format */ -struct ExtensionMap -{ - char extension[8]; - l_int32 format; -}; -static const struct ExtensionMap extension_map[] = - { { ".bmp", IFF_BMP }, - { ".jpg", IFF_JFIF_JPEG }, - { ".jpeg", IFF_JFIF_JPEG }, - { ".png", IFF_PNG }, - { ".tif", IFF_TIFF }, - { ".tiff", IFF_TIFF }, - { ".pnm", IFF_PNM }, - { ".gif", IFF_GIF }, - { ".jp2", IFF_JP2 }, - { ".ps", IFF_PS }, - { ".pdf", IFF_LPDF }, - { ".webp", IFF_WEBP } }; - - -/*---------------------------------------------------------------------* - * Set jpeg quality for pixWrite() and pixWriteMem() * - *---------------------------------------------------------------------*/ - /* Parameter that controls jpeg quality for high-level calls. */ -static l_int32 var_JPEG_QUALITY = 75; /* default */ - -/*! - * \brief l_jpegSetQuality() - * - * \param[in] new_quality 1 - 100; 75 is default; 0 defaults to 75 - * \return prev previous quality - * - *
- * Notes:
- *      (1) This variable is used in pixWriteStream() and pixWriteMem(),
- *          to control the jpeg quality.  The default is 75.
- *      (2) It returns the previous quality, so for example:
- *           l_int32  prev = l_jpegSetQuality(85);  //sets to 85
- *           pixWriteStream(...);
- *           l_jpegSetQuality(prev);   // resets to previous value
- *      (3) On error, logs a message and does not change the variable.
- */
-l_int32
-l_jpegSetQuality(l_int32  new_quality)
-{
-l_int32  prevq, newq;
-
-    PROCNAME("l_jpeqSetQuality");
-
-    prevq = var_JPEG_QUALITY;
-    newq = (new_quality == 0) ? 75 : new_quality;
-    if (newq < 1 || newq > 100)
-        L_ERROR("invalid jpeg quality; unchanged\n", procName);
-    else
-        var_JPEG_QUALITY = newq;
-    return prevq;
-}
-
-
-/*----------------------------------------------------------------------*
- *    Set global variable LeptDebugOK for writing to named temp files   *
- *----------------------------------------------------------------------*/
-l_int32 LeptDebugOK = 0;  /* default value */
-/*!
- * \brief   setLeptDebugOK()
- *
- * \param[in]    allow     TRUE (1) or FALSE (0)
- * \return       void
- *
- * 
- * Notes:
- *      (1) This sets or clears the global variable LeptDebugOK, to
- *          control writing files in a temp directory with names that
- *          are compiled in.
- *      (2) The default in the library distribution is 0.  Call with
- *          %allow = 1 for development and debugging.
- */
-void
-setLeptDebugOK(l_int32  allow)
-{
-    if (allow != 0) allow = 1;
-    LeptDebugOK = allow;
-}
-
-
-/*---------------------------------------------------------------------*
- *           Top-level procedures for writing images to file           *
- *---------------------------------------------------------------------*/
-/*!
- * \brief   pixaWriteFiles()
- *
- * \param[in]    rootname
- * \param[in]    pixa
- * \param[in]    format  defined in imageio.h; see notes for default
- * \return  0 if OK; 1 on error
- *
- * 
- * Notes:
- *      (1) Use %format = IFF_DEFAULT to decide the output format
- *          individually for each pix.
- * 
- */ -l_ok -pixaWriteFiles(const char *rootname, - PIXA *pixa, - l_int32 format) -{ -char bigbuf[Bufsize]; -l_int32 i, n, pixformat; -PIX *pix; - - PROCNAME("pixaWriteFiles"); - - if (!rootname) - return ERROR_INT("rootname not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - if (format < 0 || format == IFF_UNKNOWN || - format >= NumImageFileFormatExtensions) - return ERROR_INT("invalid format", procName, 1); - - n = pixaGetCount(pixa); - for (i = 0; i < n; i++) { - pix = pixaGetPix(pixa, i, L_CLONE); - if (format == IFF_DEFAULT) - pixformat = pixChooseOutputFormat(pix); - else - pixformat = format; - snprintf(bigbuf, Bufsize, "%s%03d.%s", rootname, i, - ImageFileFormatExtensions[pixformat]); - pixWrite(bigbuf, pix, pixformat); - pixDestroy(&pix); - } - - return 0; -} - - -/*! - * \brief pixWriteDebug() - * - * \param[in] fname - * \param[in] pix - * \param[in] format defined in imageio.h - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Debug version, intended for use in the library when writing
- *          to files in a temp directory with names that are compiled in.
- *          This is used instead of pixWrite() for all such library calls.
- *      (2) The global variable LeptDebugOK defaults to 0, and can be set
- *          or cleared by the function setLeptDebugOK().
- * 
- */ -l_ok -pixWriteDebug(const char *fname, - PIX *pix, - l_int32 format) -{ - PROCNAME("pixWriteDebug"); - - if (LeptDebugOK) { - return pixWrite(fname, pix, format); - } else { - L_INFO("write to named temp file %s is disabled\n", procName, fname); - return 0; - } -} - - -/*! - * \brief pixWrite() - * - * \param[in] fname - * \param[in] pix - * \param[in] format defined in imageio.h - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) Open for write using binary mode (with the "b" flag)
- *          to avoid having Windows automatically translate the NL
- *          into CRLF, which corrupts image files.  On non-windows
- *          systems this flag should be ignored, per ISO C90.
- *          Thanks to Dave Bryan for pointing this out.
- *      (2) If the default image format IFF_DEFAULT is requested:
- *          use the input format if known; otherwise, use a lossless format.
- *      (3) The default jpeg quality is 75.  For some other value,
- *          Use l_jpegSetQuality().
- * 
- */ -l_ok -pixWrite(const char *fname, - PIX *pix, - l_int32 format) -{ -l_int32 ret; -FILE *fp; - - PROCNAME("pixWrite"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!fname) - return ERROR_INT("fname not defined", procName, 1); - - if ((fp = fopenWriteStream(fname, "wb+")) == NULL) - return ERROR_INT("stream not opened", procName, 1); - - ret = pixWriteStream(fp, pix, format); - fclose(fp); - if (ret) - return ERROR_INT("pix not written to stream", procName, 1); - return 0; -} - - -/*! - * \brief pixWriteAutoFormat() - * - * \param[in] filename - * \param[in] pix - * \return 0 if OK; 1 on error - */ -l_ok -pixWriteAutoFormat(const char *filename, - PIX *pix) -{ -l_int32 format; - - PROCNAME("pixWriteAutoFormat"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - - if (pixGetAutoFormat(pix, &format)) - return ERROR_INT("auto format not returned", procName, 1); - return pixWrite(filename, pix, format); -} - - -/*! - * \brief pixWriteStream() - * - * \param[in] fp file stream - * \param[in] pix - * \param[in] format - * \return 0 if OK; 1 on error. - */ -l_ok -pixWriteStream(FILE *fp, - PIX *pix, - l_int32 format) -{ - PROCNAME("pixWriteStream"); - - if (!fp) - return ERROR_INT("stream not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - if (format == IFF_DEFAULT) - format = pixChooseOutputFormat(pix); - - /* Use bmp format for testing if library for requested - * format for jpeg, png or tiff is not available */ - changeFormatForMissingLib(&format); - - switch(format) - { - case IFF_BMP: - pixWriteStreamBmp(fp, pix); - break; - - case IFF_JFIF_JPEG: /* default quality; baseline sequential */ - return pixWriteStreamJpeg(fp, pix, var_JPEG_QUALITY, 0); - - case IFF_PNG: /* no gamma value stored */ - return pixWriteStreamPng(fp, pix, 0.0); - - case IFF_TIFF: /* uncompressed */ - case IFF_TIFF_PACKBITS: /* compressed, binary only */ - case IFF_TIFF_RLE: /* compressed, binary only */ - case IFF_TIFF_G3: /* compressed, binary only */ - case IFF_TIFF_G4: /* compressed, binary only */ - case IFF_TIFF_LZW: /* compressed, all depths */ - case IFF_TIFF_ZIP: /* compressed, all depths */ - case IFF_TIFF_JPEG: /* compressed, 8 bpp gray and 32 bpp rgb */ - return pixWriteStreamTiff(fp, pix, format); - - case IFF_PNM: - return pixWriteStreamPnm(fp, pix); - - case IFF_PS: - return pixWriteStreamPS(fp, pix, NULL, 0, DefaultScaling); - - case IFF_GIF: - return pixWriteStreamGif(fp, pix); - - case IFF_JP2: - return pixWriteStreamJp2k(fp, pix, 34, 4, 0, 0); - - case IFF_WEBP: - return pixWriteStreamWebP(fp, pix, 80, 0); - - case IFF_LPDF: - return pixWriteStreamPdf(fp, pix, 0, NULL); - - case IFF_SPIX: - return pixWriteStreamSpix(fp, pix); - - default: - return ERROR_INT("unknown format", procName, 1); - } - - return 0; -} - - -/*! - * \brief pixWriteImpliedFormat() - * - * \param[in] filename - * \param[in] pix - * \param[in] quality iff JPEG; 1 - 100, 0 for default - * \param[in] progressive iff JPEG; 0 for baseline seq., 1 for progressive - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This determines the output format from the filename extension.
- *      (2) The last two args are ignored except for requests for jpeg files.
- *      (3) The jpeg default quality is 75.
- * 
- */ -l_ok -pixWriteImpliedFormat(const char *filename, - PIX *pix, - l_int32 quality, - l_int32 progressive) -{ -l_int32 format; - - PROCNAME("pixWriteImpliedFormat"); - - if (!filename) - return ERROR_INT("filename not defined", procName, 1); - if (!pix) - return ERROR_INT("pix not defined", procName, 1); - - /* Determine output format */ - format = getImpliedFileFormat(filename); - if (format == IFF_UNKNOWN) { - format = IFF_PNG; - } else if (format == IFF_TIFF) { - if (pixGetDepth(pix) == 1) - format = IFF_TIFF_G4; - else -#ifdef _WIN32 - format = IFF_TIFF_LZW; /* poor compression */ -#else - format = IFF_TIFF_ZIP; /* native windows tools can't handle this */ -#endif /* _WIN32 */ - } - - if (format == IFF_JFIF_JPEG) { - quality = L_MIN(quality, 100); - quality = L_MAX(quality, 0); - if (progressive != 0 && progressive != 1) { - progressive = 0; - L_WARNING("invalid progressive; setting to baseline\n", procName); - } - if (quality == 0) - quality = 75; - pixWriteJpeg (filename, pix, quality, progressive); - } else { - pixWrite(filename, pix, format); - } - - return 0; -} - - -/*---------------------------------------------------------------------* - * Selection of output format if default is requested * - *---------------------------------------------------------------------*/ -/*! - * \brief pixChooseOutputFormat() - * - * \param[in] pix - * \return output format, or 0 on error - * - *
- * Notes:
- *      (1) This should only be called if the requested format is IFF_DEFAULT.
- *      (2) If the pix wasn't read from a file, its input format value
- *          will be IFF_UNKNOWN, and in that case it is written out
- *          in a compressed but lossless format.
- * 
- */ -l_int32 -pixChooseOutputFormat(PIX *pix) -{ -l_int32 d, format; - - PROCNAME("pixChooseOutputFormat"); - - if (!pix) - return ERROR_INT("pix not defined", procName, 0); - - d = pixGetDepth(pix); - format = pixGetInputFormat(pix); - if (format == IFF_UNKNOWN) { /* output lossless */ - if (d == 1) - format = IFF_TIFF_G4; - else - format = IFF_PNG; - } - - return format; -} - - -/*! - * \brief getImpliedFileFormat() - * - * \param[in] filename - * \return output format, or IFF_UNKNOWN on error or invalid extension. - * - *
- * Notes:
- *      (1) This determines the output file format from the extension
- *          of the input filename.
- * 
- */ -l_int32 -getImpliedFileFormat(const char *filename) -{ -char *extension; -int i, numext; -l_int32 format = IFF_UNKNOWN; - - if (splitPathAtExtension (filename, NULL, &extension)) - return IFF_UNKNOWN; - - numext = sizeof(extension_map) / sizeof(extension_map[0]); - for (i = 0; i < numext; i++) { - if (!strcmp(extension, extension_map[i].extension)) { - format = extension_map[i].format; - break; - } - } - - LEPT_FREE(extension); - return format; -} - - -/*! - * \brief pixGetAutoFormat() - * - * \param[in] pix - * \param[in] &format - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) The output formats are restricted to tiff, jpeg and png
- *          because these are the most commonly used image formats and
- *          the ones that are typically installed with leptonica.
- *      (2) This decides what compression to use based on the pix.
- *          It chooses tiff-g4 if 1 bpp without a colormap, jpeg with
- *          quality 75 if grayscale, rgb or rgba (where it loses
- *          the alpha layer), and lossless png for all other situations.
- * 
- */ -l_ok -pixGetAutoFormat(PIX *pix, - l_int32 *pformat) -{ -l_int32 d; -PIXCMAP *cmap; - - PROCNAME("pixGetAutoFormat"); - - if (!pformat) - return ERROR_INT("&format not defined", procName, 0); - *pformat = IFF_UNKNOWN; - if (!pix) - return ERROR_INT("pix not defined", procName, 0); - - d = pixGetDepth(pix); - cmap = pixGetColormap(pix); - if (d == 1 && !cmap) { - *pformat = IFF_TIFF_G4; - } else if ((d == 8 && !cmap) || d == 24 || d == 32) { - *pformat = IFF_JFIF_JPEG; - } else { - *pformat = IFF_PNG; - } - - return 0; -} - - -/*! - * \brief getFormatExtension() - * - * \param[in] format integer - * \return extension string, or NULL if format is out of range - * - *
- * Notes:
- *      (1) This string is NOT owned by the caller; it is just a pointer
- *          to a global string.  Do not free it.
- * 
- */ -const char * -getFormatExtension(l_int32 format) -{ - PROCNAME("getFormatExtension"); - - if (format < 0 || format >= NumImageFileFormatExtensions) - return (const char *)ERROR_PTR("invalid format", procName, NULL); - - return ImageFileFormatExtensions[format]; -} - - -/*---------------------------------------------------------------------* - * Write to memory * - *---------------------------------------------------------------------*/ -/*! - * \brief pixWriteMem() - * - * \param[out] pdata data of tiff compressed image - * \param[out] psize size of returned data - * \param[in] pix - * \param[in] format defined in imageio.h - * \return 0 if OK, 1 on error - * - *
- * Notes:
- *      (1) On windows, this will only write tiff and PostScript to memory.
- *          For other formats, it requires open_memstream(3).
- *      (2) PostScript output is uncompressed, in hex ascii.
- *          Most printers support level 2 compression (tiff_g4 for 1 bpp,
- *          jpeg for 8 and 32 bpp).
- *      (3) The default jpeg quality is 75.  For some other value,
- *          Use l_jpegSetQuality().
- * 
- */ -l_ok -pixWriteMem(l_uint8 **pdata, - size_t *psize, - PIX *pix, - l_int32 format) -{ -l_int32 ret; - - PROCNAME("pixWriteMem"); - - if (!pdata) - return ERROR_INT("&data not defined", procName, 1 ); - if (!psize) - return ERROR_INT("&size not defined", procName, 1 ); - if (!pix) - return ERROR_INT("&pix not defined", procName, 1 ); - - if (format == IFF_DEFAULT) - format = pixChooseOutputFormat(pix); - - /* Use bmp format for testing if library for requested - * format for jpeg, png or tiff is not available */ - changeFormatForMissingLib(&format); - - switch(format) - { - case IFF_BMP: - ret = pixWriteMemBmp(pdata, psize, pix); - break; - - case IFF_JFIF_JPEG: /* default quality; baseline sequential */ - ret = pixWriteMemJpeg(pdata, psize, pix, var_JPEG_QUALITY, 0); - break; - - case IFF_PNG: /* no gamma value stored */ - ret = pixWriteMemPng(pdata, psize, pix, 0.0); - break; - - case IFF_TIFF: /* uncompressed */ - case IFF_TIFF_PACKBITS: /* compressed, binary only */ - case IFF_TIFF_RLE: /* compressed, binary only */ - case IFF_TIFF_G3: /* compressed, binary only */ - case IFF_TIFF_G4: /* compressed, binary only */ - case IFF_TIFF_LZW: /* compressed, all depths */ - case IFF_TIFF_ZIP: /* compressed, all depths */ - ret = pixWriteMemTiff(pdata, psize, pix, format); - break; - - case IFF_PNM: - ret = pixWriteMemPnm(pdata, psize, pix); - break; - - case IFF_PS: - ret = pixWriteMemPS(pdata, psize, pix, NULL, 0, DefaultScaling); - break; - - case IFF_GIF: - ret = pixWriteMemGif(pdata, psize, pix); - break; - - case IFF_JP2: - ret = pixWriteMemJp2k(pdata, psize, pix, 34, 0, 0, 0); - break; - - case IFF_WEBP: - ret = pixWriteMemWebP(pdata, psize, pix, 80, 0); - break; - - case IFF_LPDF: - ret = pixWriteMemPdf(pdata, psize, pix, 0, NULL); - break; - - case IFF_SPIX: - ret = pixWriteMemSpix(pdata, psize, pix); - break; - - default: - return ERROR_INT("unknown format", procName, 1); - } - - return ret; -} - - -/*---------------------------------------------------------------------* - * Image display for debugging * - *---------------------------------------------------------------------*/ -/*! - * \brief l_fileDisplay() - * - * \param[in] fname - * \param[in] x, y location of display frame on the screen - * \param[in] scale scale factor (use 0 to skip display) - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This is a convenient wrapper for displaying image files.
- *      (2) It does nothing unless LeptDebugOK == TRUE.
- *      (2) Set %scale = 0 to disable display.
- *      (3) This downscales 1 bpp to gray.
- * 
- */ -l_ok -l_fileDisplay(const char *fname, - l_int32 x, - l_int32 y, - l_float32 scale) -{ -PIX *pixs, *pixd; - - PROCNAME("l_fileDisplay"); - - if (!LeptDebugOK) { - L_INFO("displaying files is disabled; " - "use setLeptDebugOK(1) to enable\n", procName); - return 0; - } - if (scale == 0.0) - return 0; - if (scale < 0.0) - return ERROR_INT("invalid scale factor", procName, 1); - if ((pixs = pixRead(fname)) == NULL) - return ERROR_INT("pixs not read", procName, 1); - - if (scale == 1.0) { - pixd = pixClone(pixs); - } else { - if (scale < 1.0 && pixGetDepth(pixs) == 1) - pixd = pixScaleToGray(pixs, scale); - else - pixd = pixScale(pixs, scale, scale); - } - pixDisplay(pixd, x, y); - pixDestroy(&pixs); - pixDestroy(&pixd); - return 0; -} - - -/*! - * \brief pixDisplay() - * - * \param[in] pix 1, 2, 4, 8, 16, 32 bpp - * \param[in] x, y location of display frame on the screen - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) This is debugging code that displays an image on the screen.
- *          It uses a static internal variable to number the output files
- *          written by a single process.  Behavior with a shared library
- *          may be unpredictable.
- *      (2) It does nothing unless LeptDebugOK == TRUE.
- *      (3) It uses these programs to display the image:
- *             On Unix: xzgv, xli or xv
- *             On Windows: i_view
- *          The display program must be on your $PATH variable.  It is
- *          chosen by setting the global var_DISPLAY_PROG, using
- *          l_chooseDisplayProg().  Default on Unix is xzgv.
- *      (4) Images with dimensions larger than MaxDisplayWidth or
- *          MaxDisplayHeight are downscaled to fit those constraints.
- *          This is particularly important for displaying 1 bpp images
- *          with xv, because xv automatically downscales large images
- *          by subsampling, which looks poor.  For 1 bpp, we use
- *          scale-to-gray to get decent-looking anti-aliased images.
- *          In all cases, we write a temporary file to /tmp/lept/disp,
- *          that is read by the display program.
- *      (5) The temporary file is written as png if, after initial
- *          processing for special cases, any of these obtain:
- *            * pix dimensions are smaller than some thresholds
- *            * pix depth is less than 8 bpp
- *            * pix is colormapped
- *      (6) For spp == 4, we call pixDisplayLayersRGBA() to show 3
- *          versions of the image: the image with a fully opaque
- *          alpha, the alpha, and the image as it would appear with
- *          a white background.
- * 
- */ -l_ok -pixDisplay(PIX *pixs, - l_int32 x, - l_int32 y) -{ - return pixDisplayWithTitle(pixs, x, y, NULL, 1); -} - - -/*! - * \brief pixDisplayWithTitle() - * - * \param[in] pix 1, 2, 4, 8, 16, 32 bpp - * \param[in] x, y location of display frame - * \param[in] title [optional] on frame; can be NULL; - * \param[in] dispflag 1 to write, else disabled - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (1) See notes for pixDisplay().
- *      (2) This displays the image if dispflag == 1; otherwise it punts.
- * 
- */ -l_ok -pixDisplayWithTitle(PIX *pixs, - l_int32 x, - l_int32 y, - const char *title, - l_int32 dispflag) -{ -char *tempname; -char buffer[Bufsize]; -static l_int32 index = 0; /* caution: not .so or thread safe */ -l_int32 w, h, d, spp, maxheight, opaque, threeviews; -l_float32 ratw, rath, ratmin; -PIX *pix0, *pix1, *pix2; -PIXCMAP *cmap; -#ifndef _WIN32 -l_int32 wt, ht; -#else -char *pathname; -char fullpath[_MAX_PATH]; -#endif /* _WIN32 */ - - PROCNAME("pixDisplayWithTitle"); - - if (!LeptDebugOK) { - L_INFO("displaying images is disabled;\n " - "use setLeptDebugOK(1) to enable\n", procName); - return 0; - } - -#ifdef OS_IOS /* iOS 11 does not support system() */ - return ERROR_INT("iOS 11 does not support system()", procName, 1); -#endif /* OS_IOS */ - - if (dispflag != 1) return 0; - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (var_DISPLAY_PROG != L_DISPLAY_WITH_XZGV && - var_DISPLAY_PROG != L_DISPLAY_WITH_XLI && - var_DISPLAY_PROG != L_DISPLAY_WITH_XV && - var_DISPLAY_PROG != L_DISPLAY_WITH_IV && - var_DISPLAY_PROG != L_DISPLAY_WITH_OPEN) { - return ERROR_INT("no program chosen for display", procName, 1); - } - - /* Display with three views if either spp = 4 or if colormapped - * and the alpha component is not fully opaque */ - opaque = TRUE; - if ((cmap = pixGetColormap(pixs)) != NULL) - pixcmapIsOpaque(cmap, &opaque); - spp = pixGetSpp(pixs); - threeviews = (spp == 4 || !opaque) ? TRUE : FALSE; - - /* If colormapped and not opaque, remove the colormap to RGBA */ - if (!opaque) - pix0 = pixRemoveColormap(pixs, REMOVE_CMAP_WITH_ALPHA); - else - pix0 = pixClone(pixs); - - /* Scale if necessary; this will also remove a colormap */ - pixGetDimensions(pix0, &w, &h, &d); - maxheight = (threeviews) ? MaxDisplayHeight / 3 : MaxDisplayHeight; - if (w <= MaxDisplayWidth && h <= maxheight) { - if (d == 16) /* take MSB */ - pix1 = pixConvert16To8(pix0, L_MS_BYTE); - else - pix1 = pixClone(pix0); - } else { - ratw = (l_float32)MaxDisplayWidth / (l_float32)w; - rath = (l_float32)maxheight / (l_float32)h; - ratmin = L_MIN(ratw, rath); - if (ratmin < 0.125 && d == 1) - pix1 = pixScaleToGray8(pix0); - else if (ratmin < 0.25 && d == 1) - pix1 = pixScaleToGray4(pix0); - else if (ratmin < 0.33 && d == 1) - pix1 = pixScaleToGray3(pix0); - else if (ratmin < 0.5 && d == 1) - pix1 = pixScaleToGray2(pix0); - else - pix1 = pixScale(pix0, ratmin, ratmin); - } - pixDestroy(&pix0); - if (!pix1) - return ERROR_INT("pix1 not made", procName, 1); - - /* Generate the three views if required */ - if (threeviews) - pix2 = pixDisplayLayersRGBA(pix1, 0xffffff00, 0); - else - pix2 = pixClone(pix1); - - if (index == 0) { /* erase any existing images */ - lept_rmdir("lept/disp"); - lept_mkdir("lept/disp"); - } - - index++; - if (pixGetDepth(pix2) < 8 || pixGetColormap(pix2) || - (w < MaxSizeForPng && h < MaxSizeForPng)) { - snprintf(buffer, Bufsize, "/tmp/lept/disp/write.%03d.png", index); - pixWrite(buffer, pix2, IFF_PNG); - } else { - snprintf(buffer, Bufsize, "/tmp/lept/disp/write.%03d.jpg", index); - pixWrite(buffer, pix2, IFF_JFIF_JPEG); - } - tempname = genPathname(buffer, NULL); - -#if TARGET_IPHONE_SIMULATOR || TARGET_OS_IPHONE - return 1; -#elif ! defined( _WIN32 ) - - /* Unix */ - if (var_DISPLAY_PROG == L_DISPLAY_WITH_XZGV) { - /* no way to display title */ - pixGetDimensions(pix2, &wt, &ht, NULL); - snprintf(buffer, Bufsize, - "xzgv --geometry %dx%d+%d+%d %s &", wt + 10, ht + 10, - x, y, tempname); - } else if (var_DISPLAY_PROG == L_DISPLAY_WITH_XLI) { - if (title) { - snprintf(buffer, Bufsize, - "xli -dispgamma 1.0 -quiet -geometry +%d+%d -title \"%s\" %s &", - x, y, title, tempname); - } else { - snprintf(buffer, Bufsize, - "xli -dispgamma 1.0 -quiet -geometry +%d+%d %s &", - x, y, tempname); - } - } else if (var_DISPLAY_PROG == L_DISPLAY_WITH_XV) { - if (title) { - snprintf(buffer, Bufsize, - "xv -quit -geometry +%d+%d -name \"%s\" %s &", - x, y, title, tempname); - } else { - snprintf(buffer, Bufsize, - "xv -quit -geometry +%d+%d %s &", x, y, tempname); - } - } else if (var_DISPLAY_PROG == L_DISPLAY_WITH_OPEN) { - snprintf(buffer, Bufsize, "open %s &", tempname); - } - callSystemDebug(buffer); - -#else /* _WIN32 */ - - /* Windows: L_DISPLAY_WITH_IV */ - pathname = genPathname(tempname, NULL); - _fullpath(fullpath, pathname, sizeof(fullpath)); - if (title) { - snprintf(buffer, Bufsize, - "i_view32.exe \"%s\" /pos=(%d,%d) /title=\"%s\"", - fullpath, x, y, title); - } else { - snprintf(buffer, Bufsize, "i_view32.exe \"%s\" /pos=(%d,%d)", - fullpath, x, y); - } - callSystemDebug(buffer); - LEPT_FREE(pathname); - -#endif /* _WIN32 */ - - pixDestroy(&pix1); - pixDestroy(&pix2); - LEPT_FREE(tempname); - return 0; -} - - -/*! - * \brief pixMakeColorSquare() - * - * \param[in] color in 0xrrggbb00 format - * \param[in] size in pixels; >= 100; use 0 for default (min size) - * \param[in] addlabel use 1 to display the color component values - * \param[in] location of text: L_ADD_ABOVE, etc; ignored if %addlabel == 0 - * \param[in] textcolor of text label; in 0xrrggbb00 format - * \return 32 bpp rgb pixd if OK; NULL on error - * - *
- * Notes:
- *      (1) If %addlabel == 0, %location and %textcolor are ignored.
- * 
- */ -PIX * -pixMakeColorSquare(l_uint32 color, - l_int32 size, - l_int32 addlabel, - l_int32 location, - l_uint32 textcolor) -{ -char buf[32]; -l_int32 w, rval, gval, bval; -L_BMF *bmf; -PIX *pix1, *pix2; - - PROCNAME("pixMakeColorSquare"); - - w = (size <= 0) ? 100 : size; - if (addlabel && w < 100) { - L_WARNING("size too small for label; omitting label\n", procName); - addlabel = 0; - } - - if ((pix1 = pixCreate(w, w, 32)) == NULL) - return (PIX *)ERROR_PTR("pix1 not madel", procName, NULL); - pixSetAllArbitrary(pix1, color); - if (!addlabel) - return pix1; - - /* Adding text of color component values */ - if (location != L_ADD_ABOVE && location != L_ADD_AT_TOP && - location != L_ADD_AT_BOT && location != L_ADD_BELOW) { - L_ERROR("invalid location: adding below\n", procName); - location = L_ADD_BELOW; - } - bmf = bmfCreate(NULL, 4); - extractRGBValues(color, &rval, &gval, &bval); - snprintf(buf, sizeof(buf), "%d,%d,%d", rval, gval, bval); - pix2 = pixAddSingleTextblock(pix1, bmf, buf, textcolor, location, NULL); - pixDestroy(&pix1); - bmfDestroy(&bmf); - return pix2; -} - - -void -l_chooseDisplayProg(l_int32 selection) -{ - if (selection == L_DISPLAY_WITH_XLI || - selection == L_DISPLAY_WITH_XZGV || - selection == L_DISPLAY_WITH_XV || - selection == L_DISPLAY_WITH_IV || - selection == L_DISPLAY_WITH_OPEN) { - var_DISPLAY_PROG = selection; - } else { - L_ERROR("invalid display program\n", "l_chooseDisplayProg"); - } -} - - -/*---------------------------------------------------------------------* - * Change format for missing lib * - *---------------------------------------------------------------------*/ -/*! - * \brief changeFormatForMissingLib() - * - * \param[in,out] pformat addr of requested output image format - * \return void - * - *
- * Notes:
- *      (1) This is useful for testing functionality when the library for
- *          the requested output format (jpeg, png or tiff) is not linked.
- *          In that case, the output format is changed to bmp.
- * 
- */ -void -changeFormatForMissingLib(l_int32 *pformat) -{ - PROCNAME("changeFormatForMissingLib"); - -#if !defined(HAVE_LIBJPEG) - if (*pformat == IFF_JFIF_JPEG) { - L_WARNING("jpeg library missing; output bmp format\n", procName); - *pformat = IFF_BMP; - } -#endif /* !defined(HAVE_LIBJPEG) */ -#if !defined(HAVE_LIBPNG) - if (*pformat == IFF_PNG) { - L_WARNING("png library missing; output bmp format\n", procName); - *pformat = IFF_BMP; - } -#endif /* !defined(HAVE_LIBPNG) */ -#if !defined(HAVE_LIBTIFF) - if (L_FORMAT_IS_TIFF(*pformat)) { - L_WARNING("tiff library missing; output bmp format\n", procName); - *pformat = IFF_BMP; - } -#endif /* !defined(HAVE_LIBTIFF) */ -} - - -/*---------------------------------------------------------------------* - * Deprecated pix output for debugging * - *---------------------------------------------------------------------*/ -/*! - * \brief pixDisplayWrite() - * - * \param[in] pix 1, 2, 4, 8, 16, 32 bpp - * \param[in] reduction -1 to reset/erase; 0 to disable; - * otherwise this is a reduction factor - * \return 0 if OK; 1 on error - * - *
- * Notes:
- *      (0) Deprecated.
- *      (1) This is a simple interface for writing a set of files.
- *      (2) This uses jpeg output for pix that are 32 bpp or 8 bpp
- *          without a colormap; otherwise, it uses png.
- *      (3) To erase any previously written files in the output directory:
- *             pixDisplayWrite(NULL, -1);
- *      (4) If reduction > 1 and depth == 1, this does a scale-to-gray
- *          reduction.
- *      (5) This function uses a static internal variable to number
- *          output files written by a single process.  Behavior
- *          with a shared library may be unpredictable.
- *      (6) For 16 bpp, this displays the full dynamic range with log scale.
- *          Alternative image transforms to generate 8 bpp pix are:
- *             pix8 = pixMaxDynamicRange(pixt, L_LINEAR_SCALE);
- *             pix8 = pixConvert16To8(pixt, L_LS_BYTE);  // low order byte
- *             pix8 = pixConvert16To8(pixt, L_MS_BYTE);  // high order byte
- * 
- */ -l_ok -pixDisplayWrite(PIX *pixs, - l_int32 reduction) -{ -char buf[Bufsize]; -char *fname; -l_float32 scale; -PIX *pix1, *pix2; -static l_int32 index = 0; /* caution: not .so or thread safe */ - - PROCNAME("pixDisplayWrite"); - - lept_stderr("\n######################################################" - "\n Notice:\n" - " pixDisplayWrite() has been deprecated in leptonica \n" - " since version 1.74. It will become a non-functioning\n" - " stub in 1.80.\n" - "######################################################" - "\n\n\n"); - - if (reduction == 0) return 0; - if (reduction < 0) { /* initialize */ - lept_rmdir("lept/display"); - index = 0; - return 0; - } - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (index == 0) - lept_mkdir("lept/display"); - index++; - - if (reduction == 1) { - pix1 = pixClone(pixs); - } else { - scale = 1. / (l_float32)reduction; - if (pixGetDepth(pixs) == 1) - pix1 = pixScaleToGray(pixs, scale); - else - pix1 = pixScale(pixs, scale, scale); - } - - if (pixGetDepth(pix1) == 16) { - pix2 = pixMaxDynamicRange(pix1, L_LOG_SCALE); - snprintf(buf, Bufsize, "file.%03d.png", index); - fname = pathJoin("/tmp/lept/display", buf); - pixWrite(fname, pix2, IFF_PNG); - pixDestroy(&pix2); - } else if (pixGetDepth(pix1) < 8 || pixGetColormap(pix1)) { - snprintf(buf, Bufsize, "file.%03d.png", index); - fname = pathJoin("/tmp/lept/display", buf); - pixWrite(fname, pix1, IFF_PNG); - } else { - snprintf(buf, Bufsize, "file.%03d.jpg", index); - fname = pathJoin("/tmp/lept/display", buf); - pixWrite(fname, pix1, IFF_JFIF_JPEG); - } - LEPT_FREE(fname); - pixDestroy(&pix1); - return 0; -} - - -/*! - * \brief pixSaveTiled() - * - * \param[in] pixs 1, 2, 4, 8, 32 bpp - * \param[in] pixa the pix are accumulated here - * \param[in] scalefactor 0.0 to disable; otherwise this is a scale factor - * \param[in] newrow 0 if placed on the same row as previous; 1 otherwise - * \param[in] space horizontal and vertical spacing, in pixels - * \param[in] dp depth of pixa; 8 or 32 bpp; only used on first call - * \return 0 if OK, 1 on error. - */ -l_ok -pixSaveTiled(PIX *pixs, - PIXA *pixa, - l_float32 scalefactor, - l_int32 newrow, - l_int32 space, - l_int32 dp) -{ - lept_stderr("\n######################################################" - "\n Notice:\n" - " pixSaveTiled() has been deprecated in leptonica \n" - " since version 1.78. It will be removed in 1.80.\n" - "######################################################" - "\n\n\n"); - - /* Save without an outline */ - return pixSaveTiledOutline(pixs, pixa, scalefactor, newrow, space, 0, dp); -} - - -/*! - * \brief pixSaveTiledOutline() - * - * \param[in] pixs 1, 2, 4, 8, 32 bpp - * \param[in] pixa the pix are accumulated here - * \param[in] scalefactor 0.0 to disable; otherwise this is a scale factor - * \param[in] newrow 0 if placed on the same row as previous; 1 otherwise - * \param[in] space horizontal and vertical spacing, in pixels - * \param[in] linewidth width of added outline for image; 0 for no outline - * \param[in] dp depth of pixa; 8 or 32 bpp; only used on first call - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) Before calling this function for the first time, use
- *          pixaCreate() to make the %pixa that will accumulate the pix.
- *          This is passed in each time pixSaveTiled() is called.
- *      (2) %scalefactor scales the input image.  After scaling and
- *          possible depth conversion, the image is saved in the input
- *          pixa, along with a box that specifies the location to
- *          place it when tiled later.  Disable saving the pix by
- *          setting %scalefactor == 0.0.
- *      (3) %newrow and %space specify the location of the new pix
- *          with respect to the last one(s) that were entered.
- *      (4) %dp specifies the depth at which all pix are saved.  It can
- *          be only 8 or 32 bpp.  Any colormap is removed.  This is only
- *          used at the first invocation.
- *      (5) This function uses two variables from call to call.
- *          If they were static, the function would not be .so or thread
- *          safe, and furthermore, there would be interference with two or
- *          more pixa accumulating images at a time.  Consequently,
- *          we use the first pix in the pixa to store and obtain both
- *          the depth and the current position of the bottom (one pixel
- *          below the lowest image raster line when laid out using
- *          the boxa).  The bottom variable is stored in the input format
- *          field, which is the only field available for storing an int.
- * 
- */ -l_ok -pixSaveTiledOutline(PIX *pixs, - PIXA *pixa, - l_float32 scalefactor, - l_int32 newrow, - l_int32 space, - l_int32 linewidth, - l_int32 dp) -{ -l_int32 n, top, left, bx, by, bw, w, h, depth, bottom; -BOX *box; -PIX *pix1, *pix2, *pix3, *pix4; - - PROCNAME("pixSaveTiledOutline"); - - lept_stderr("\n######################################################" - "\n Notice:\n" - " pixSaveTiledOutline() has been deprecated in leptonica \n" - " since version 1.78. It will be removed in 1.80.\n" - "######################################################" - "\n\n\n"); - - if (scalefactor == 0.0) return 0; - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - n = pixaGetCount(pixa); - if (n == 0) { - bottom = 0; - if (dp != 8 && dp != 32) { - L_WARNING("dp not 8 or 32 bpp; using 32\n", procName); - depth = 32; - } else { - depth = dp; - } - } else { /* extract the depth and bottom params from the first pix */ - pix1 = pixaGetPix(pixa, 0, L_CLONE); - depth = pixGetDepth(pix1); - bottom = pixGetInputFormat(pix1); /* not typical usage! */ - pixDestroy(&pix1); - } - - /* Remove colormap if it exists; otherwise a copy. This - * guarantees that pix4 is not a clone of pixs. */ - pix1 = pixRemoveColormapGeneral(pixs, REMOVE_CMAP_BASED_ON_SRC, L_COPY); - - /* Scale and convert to output depth */ - if (scalefactor == 1.0) { - pix2 = pixClone(pix1); - } else if (scalefactor > 1.0) { - pix2 = pixScale(pix1, scalefactor, scalefactor); - } else { /* scalefactor < 1.0) */ - if (pixGetDepth(pix1) == 1) - pix2 = pixScaleToGray(pix1, scalefactor); - else - pix2 = pixScale(pix1, scalefactor, scalefactor); - } - pixDestroy(&pix1); - if (depth == 8) - pix3 = pixConvertTo8(pix2, 0); - else - pix3 = pixConvertTo32(pix2); - pixDestroy(&pix2); - - /* Add black outline */ - if (linewidth > 0) - pix4 = pixAddBorder(pix3, linewidth, 0); - else - pix4 = pixClone(pix3); - pixDestroy(&pix3); - - /* Find position of current pix (UL corner plus size) */ - if (n == 0) { - top = 0; - left = 0; - } else if (newrow == 1) { - top = bottom + space; - left = 0; - } else { /* n > 0 */ - pixaGetBoxGeometry(pixa, n - 1, &bx, &by, &bw, NULL); - top = by; - left = bx + bw + space; - } - - pixGetDimensions(pix4, &w, &h, NULL); - bottom = L_MAX(bottom, top + h); - box = boxCreate(left, top, w, h); - pixaAddPix(pixa, pix4, L_INSERT); - pixaAddBox(pixa, box, L_INSERT); - - /* Save the new bottom value */ - pix1 = pixaGetPix(pixa, 0, L_CLONE); - pixSetInputFormat(pix1, bottom); /* not typical usage! */ - pixDestroy(&pix1); - return 0; -} - - -/*! - * \brief pixSaveTiledWithText() - * - * \param[in] pixs 1, 2, 4, 8, 32 bpp - * \param[in] pixa the pix are accumulated here; as 32 bpp - * \param[in] outwidth in pixels; use 0 to disable entirely - * \param[in] newrow 1 to start a new row; 0 to go on same row as previous - * \param[in] space horizontal and vertical spacing, in pixels - * \param[in] linewidth width of added outline for image; 0 for no outline - * \param[in] bmf [optional] font struct - * \param[in] textstr [optional] text string to be added - * \param[in] val color to set the text - * \param[in] location L_ADD_ABOVE, L_ADD_AT_TOP, L_ADD_AT_BOT, L_ADD_BELOW - * \return 0 if OK, 1 on error. - * - *
- * Notes:
- *      (1) Before calling this function for the first time, use
- *          pixaCreate() to make the %pixa that will accumulate the pix.
- *          This is passed in each time pixSaveTiled() is called.
- *      (2) %outwidth is the scaled width.  After scaling, the image is
- *          saved in the input pixa, along with a box that specifies
- *          the location to place it when tiled later.  Disable saving
- *          the pix by setting %outwidth == 0.
- *      (3) %newrow and %space specify the location of the new pix
- *          with respect to the last one(s) that were entered.
- *      (4) All pix are saved as 32 bpp RGB.
- *      (5) If both %bmf and %textstr are defined, this generates a pix
- *          with the additional text; otherwise, no text is written.
- *      (6) The text is written before scaling, so it is properly
- *          antialiased in the scaled pix.  However, if the pix on
- *          different calls have different widths, the size of the
- *          text will vary.
- *      (7) See pixSaveTiledOutline() for other implementation details.
- * 
- */ -l_ok -pixSaveTiledWithText(PIX *pixs, - PIXA *pixa, - l_int32 outwidth, - l_int32 newrow, - l_int32 space, - l_int32 linewidth, - L_BMF *bmf, - const char *textstr, - l_uint32 val, - l_int32 location) -{ -PIX *pix1, *pix2, *pix3, *pix4; - - PROCNAME("pixSaveTiledWithText"); - - lept_stderr("\n######################################################" - "\n Notice:\n" - " pixSaveTiledWithText() has been deprecated in leptonica \n" - " since version 1.78. It will be removed in 1.80.\n" - "######################################################" - "\n\n\n"); - - if (outwidth == 0) return 0; - - if (!pixs) - return ERROR_INT("pixs not defined", procName, 1); - if (!pixa) - return ERROR_INT("pixa not defined", procName, 1); - - pix1 = pixConvertTo32(pixs); - if (linewidth > 0) - pix2 = pixAddBorder(pix1, linewidth, 0); - else - pix2 = pixClone(pix1); - if (bmf && textstr) - pix3 = pixAddSingleTextblock(pix2, bmf, textstr, val, location, NULL); - else - pix3 = pixClone(pix2); - pix4 = pixScaleToSize(pix3, outwidth, 0); - pixSaveTiled(pix4, pixa, 1.0, newrow, space, 32); - pixDestroy(&pix1); - pixDestroy(&pix2); - pixDestroy(&pix3); - pixDestroy(&pix4); - return 0; -} - - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/zlibmem.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/zlibmem.c deleted file mode 100644 index d43e7140..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/zlibmem.c +++ /dev/null @@ -1,282 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - - -/*! - * \file zlibmem.c - *
- *
- *      zlib operations in memory, using bbuffer
- *          l_uint8   *zlibCompress()
- *          l_uint8   *zlibUncompress()
- *
- *
- *    This provides an example use of the byte buffer utility
- *    (see bbuffer.c for details of how the bbuffer works internally).
- *    We use zlib to compress and decompress a byte array from
- *    one memory buffer to another.  The standard method uses streams,
- *    but here we use the bbuffer as an expandable queue of pixels
- *    for both the reading and writing sides of each operation.
- *
- *    With memory mapping, one should be able to compress between
- *    memory buffers by using the file system to buffer everything in
- *    the background, but the bbuffer implementation is more portable.
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* --------------------------------------------*/ -#if HAVE_LIBZ /* defined in environ.h */ -/* --------------------------------------------*/ - -#include "zlib.h" - -static const l_int32 L_BUF_SIZE = 32768; -static const l_int32 ZLIB_COMPRESSION_LEVEL = 6; - -#ifndef NO_CONSOLE_IO -#define DEBUG 0 -#endif /* ~NO_CONSOLE_IO */ - - -/*! - * \brief zlibCompress() - * - * \param[in] datain byte buffer with input data - * \param[in] nin number of bytes of input data - * \param[out] pnout number of bytes of output data - * \return dataout compressed data, or NULL on error - * - *
- * Notes:
- *      (1) We repeatedly read in and fill up an input buffer,
- *          compress the data, and read it back out.  zlib
- *          uses two byte buffers internally in the z_stream
- *          data structure.  We use the bbuffers to feed data
- *          into the fixed bufferin, and feed it out of bufferout,
- *          in the same way that a pair of streams would normally
- *          be used if the data were being read from one file
- *          and written to another.  This is done iteratively,
- *          compressing L_BUF_SIZE bytes of input data at a time.
- * 
- */ -l_uint8 * -zlibCompress(l_uint8 *datain, - size_t nin, - size_t *pnout) -{ -l_uint8 *dataout; -l_int32 status, success; -l_int32 flush; -size_t nbytes; -l_uint8 *bufferin, *bufferout; -L_BBUFFER *bbin, *bbout; -z_stream z; - - PROCNAME("zlibCompress"); - - if (!datain) - return (l_uint8 *)ERROR_PTR("datain not defined", procName, NULL); - - /* Set up fixed size buffers used in z_stream */ - bufferin = (l_uint8 *)LEPT_CALLOC(L_BUF_SIZE, sizeof(l_uint8)); - bufferout = (l_uint8 *)LEPT_CALLOC(L_BUF_SIZE, sizeof(l_uint8)); - - /* Set up bbuffers and load bbin with the data */ - bbin = bbufferCreate(datain, nin); - bbout = bbufferCreate(NULL, 0); - - success = TRUE; - if (!bufferin || !bufferout || !bbin || !bbout) { - L_ERROR("calloc fail for buffer\n", procName); - success = FALSE; - goto cleanup_arrays; - } - - z.zalloc = (alloc_func)0; - z.zfree = (free_func)0; - z.opaque = (voidpf)0; - - z.next_in = bufferin; - z.avail_in = 0; - z.next_out = bufferout; - z.avail_out = L_BUF_SIZE; - - status = deflateInit(&z, ZLIB_COMPRESSION_LEVEL); - if (status != Z_OK) { - L_ERROR("deflateInit failed\n", procName); - success = FALSE; - goto cleanup_arrays; - } - - do { - if (z.avail_in == 0) { - z.next_in = bufferin; - bbufferWrite(bbin, bufferin, L_BUF_SIZE, &nbytes); -#if DEBUG - lept_stderr(" wrote %zu bytes to bufferin\n", nbytes); -#endif /* DEBUG */ - z.avail_in = nbytes; - } - flush = (bbin->n) ? Z_SYNC_FLUSH : Z_FINISH; - status = deflate(&z, flush); -#if DEBUG - lept_stderr(" status is %d, bytesleft = %u, totalout = %zu\n", - status, z.avail_out, z.total_out); -#endif /* DEBUG */ - nbytes = L_BUF_SIZE - z.avail_out; - if (nbytes) { - bbufferRead(bbout, bufferout, nbytes); -#if DEBUG - lept_stderr(" read %zu bytes from bufferout\n", nbytes); -#endif /* DEBUG */ - } - z.next_out = bufferout; - z.avail_out = L_BUF_SIZE; - } while (flush != Z_FINISH); - - deflateEnd(&z); - -cleanup_arrays: - if (success) { - dataout = bbufferDestroyAndSaveData(&bbout, pnout); - } else { - dataout = NULL; - bbufferDestroy(&bbout); - } - bbufferDestroy(&bbin); - LEPT_FREE(bufferin); - LEPT_FREE(bufferout); - return dataout; -} - - -/*! - * \brief zlibUncompress() - * - * \param[in] datain byte buffer with compressed input data - * \param[in] nin number of bytes of input data - * \param[out] pnout number of bytes of output data - * \return dataout uncompressed data, or NULL on error - * - *
- * Notes:
- *      (1) See zlibCompress().
- * 
- */ -l_uint8 * -zlibUncompress(l_uint8 *datain, - size_t nin, - size_t *pnout) -{ -l_uint8 *dataout; -l_uint8 *bufferin, *bufferout; -l_int32 status, success; -size_t nbytes; -L_BBUFFER *bbin, *bbout; -z_stream z; - - PROCNAME("zlibUncompress"); - - if (!datain) - return (l_uint8 *)ERROR_PTR("datain not defined", procName, NULL); - - /* Set up fixed size buffers used in z_stream */ - bufferin = (l_uint8 *)LEPT_CALLOC(L_BUF_SIZE, sizeof(l_uint8)); - bufferout = (l_uint8 *)LEPT_CALLOC(L_BUF_SIZE, sizeof(l_uint8)); - - /* Set up bbuffers and load bbin with the data */ - bbin = bbufferCreate(datain, nin); - bbout = bbufferCreate(NULL, 0); - - success = TRUE; - if (!bufferin || !bufferout || !bbin || !bbout) { - L_ERROR("calloc fail for buffer\n", procName); - success = FALSE; - goto cleanup_arrays; - } - - z.zalloc = (alloc_func)0; - z.zfree = (free_func)0; - - z.next_in = bufferin; - z.avail_in = 0; - z.next_out = bufferout; - z.avail_out = L_BUF_SIZE; - - inflateInit(&z); - - - for ( ; ; ) { - if (z.avail_in == 0) { - z.next_in = bufferin; - bbufferWrite(bbin, bufferin, L_BUF_SIZE, &nbytes); -#if DEBUG - lept_stderr(" wrote %d bytes to bufferin\n", nbytes); -#endif /* DEBUG */ - z.avail_in = nbytes; - } - if (z.avail_in == 0) - break; - status = inflate(&z, Z_SYNC_FLUSH); -#if DEBUG - lept_stderr(" status is %d, bytesleft = %d, totalout = %d\n", - status, z.avail_out, z.total_out); -#endif /* DEBUG */ - nbytes = L_BUF_SIZE - z.avail_out; - if (nbytes) { - bbufferRead(bbout, bufferout, nbytes); -#if DEBUG - lept_stderr(" read %d bytes from bufferout\n", nbytes); -#endif /* DEBUG */ - } - z.next_out = bufferout; - z.avail_out = L_BUF_SIZE; - } - - inflateEnd(&z); - -cleanup_arrays: - if (success) { - dataout = bbufferDestroyAndSaveData(&bbout, pnout); - } else { - dataout = NULL; - bbufferDestroy(&bbout); - } - bbufferDestroy(&bbin); - LEPT_FREE(bufferin); - LEPT_FREE(bufferout); - return dataout; -} - -/* --------------------------------------------*/ -#endif /* HAVE_LIBZ */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/zlibmemstub.c b/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/zlibmemstub.c deleted file mode 100644 index 1f1ac5c3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/leptonica/zlibmemstub.c +++ /dev/null @@ -1,59 +0,0 @@ -/*====================================================================* - - Copyright (C) 2001 Leptonica. All rights reserved. - - - - Redistribution and use in source and binary forms, with or without - - modification, are permitted provided that the following conditions - - are met: - - 1. Redistributions of source code must retain the above copyright - - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - - copyright notice, this list of conditions and the following - - disclaimer in the documentation and/or other materials - - provided with the distribution. - - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *====================================================================*/ - -/*! - * \file zlibmemstub.c - *
- *
- *     Stubs for zlibmem.c functions
- * 
- */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "allheaders.h" - -/* --------------------------------------------*/ -#if !HAVE_LIBZ /* defined in environ.h */ -/* --------------------------------------------*/ - -l_uint8 * zlibCompress(l_uint8 *datain, size_t nin, size_t *pnout) -{ - return (l_uint8 *)ERROR_PTR("function not present", "zlibCompress", NULL); -} - -/* ----------------------------------------------------------------------*/ - -l_uint8 * zlibUncompress(l_uint8 *datain, size_t nin, size_t *pnout) -{ - return (l_uint8 *)ERROR_PTR("function not present", "zlibUncompress", NULL); -} - -/* --------------------------------------------*/ -#endif /* !HAVE_LIBZ */ -/* --------------------------------------------*/ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/apitypes.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/apitypes.h deleted file mode 100644 index 2c0e85c9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/apitypes.h +++ /dev/null @@ -1,33 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: apitypes.h -// Description: Types used in both the API and internally -// Author: Ray Smith -// Created: Wed Mar 03 09:22:53 PST 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_API_APITYPES_H_ -#define TESSERACT_API_APITYPES_H_ - -#include "publictypes.h" - -// The types used by the API and Page/ResultIterator can be found in: -// ccstruct/publictypes.h -// ccmain/resultiterator.h -// ccmain/pageiterator.h -// API interfaces and API users should be sure to include this file, rather -// than the lower-level one, and lower-level code should be sure to include -// only the lower-level file. - -#endif // TESSERACT_API_APITYPES_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/baseapi.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/baseapi.cpp deleted file mode 100644 index 6fa61017..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/baseapi.cpp +++ /dev/null @@ -1,3024 +0,0 @@ -/********************************************************************** - * File: baseapi.cpp - * Description: Simple API for calling tesseract. - * Author: Ray Smith - * - * (C) Copyright 2006, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "baseapi.h" -#ifdef __linux__ -#include // for sigaction, SA_RESETHAND, SIGBUS, SIGFPE -#endif - -#if defined(_WIN32) -#if defined(__MINGW32__) -// workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME -#undef __STRICT_ANSI__ -#endif // __MINGW32__ -#include -#include -#else -#include // for closedir, opendir, readdir, DIR, dirent -#include -#include -#include // for stat, S_IFDIR -#include -#endif // _WIN32 - -#include // for LC_ALL, LC_CTYPE, LC_NUMERIC -#include // for round, M_PI -#include // for int32_t -#include // for strcmp, strcpy -#include // for size_t -#include // for std::cin -#include // for std::unique_ptr -#include // for std::pair -#include // for std::vector -#include "allheaders.h" // for pixDestroy, boxCreate, boxaAddBox, box... -#include "blobclass.h" // for ExtractFontName -#include "boxword.h" // for BoxWord -#include "config_auto.h" // for PACKAGE_VERSION -#include "coutln.h" // for C_OUTLINE_IT, C_OUTLINE_LIST -#include "dawg_cache.h" // for DawgCache -#include "dict.h" // for Dict -#include "edgblob.h" // for extract_edges -#include "elst.h" // for ELIST_ITERATOR, ELISTIZE, ELISTIZEH -#include "environ.h" // for l_uint8, FALSE, TRUE -#include "equationdetect.h" // for EquationDetect -#include "errcode.h" // for ASSERT_HOST -#include "globaloc.h" // for SavePixForCrash, signal_exit -#include "helpers.h" // for IntCastRounded, chomp_string -#include "host.h" // for BOOL8 -#include "imageio.h" // for IFF_TIFF_G4, IFF_TIFF, IFF_TIFF_G3 -#include "intfx.h" // for INT_FX_RESULT_STRUCT -#include "mutableiterator.h" // for MutableIterator -#include "normalis.h" // for kBlnBaselineOffset, kBlnXHeight -#include "ocrclass.h" // for ETEXT_DESC -#include "openclwrapper.h" // for PERF_COUNT_END, PERF_COUNT_START, PERF... -#include "osdetect.h" // for OSResults, OSBestResult, OrientationId... -#include "pageres.h" // for PAGE_RES_IT, WERD_RES, PAGE_RES, CR_DE... -#include "paragraphs.h" // for DetectParagraphs -#include "params.h" // for BoolParam, IntParam, DoubleParam, Stri... -#include "pdblock.h" // for PDBLK -#include "points.h" // for FCOORD -#include "polyblk.h" // for POLY_BLOCK -#include "rect.h" // for TBOX -#include "renderer.h" // for TessResultRenderer -#include "resultiterator.h" // for ResultIterator -#include "stepblob.h" // for C_BLOB_IT, C_BLOB, C_BLOB_LIST -#include "strngs.h" // for STRING -#include "tessdatamanager.h" // for TessdataManager, kTrainedDataSuffix -#include "tesseractclass.h" // for Tesseract -#include "thresholder.h" // for ImageThresholder -#include "tprintf.h" // for tprintf -#include "werd.h" // for WERD, WERD_IT, W_FUZZY_NON, W_FUZZY_SP - -BOOL_VAR(stream_filelist, FALSE, "Stream a filelist from stdin"); - -namespace tesseract { - -/** Minimum sensible image size to be worth running tesseract. */ -const int kMinRectSize = 10; -/** Character returned when Tesseract couldn't recognize as anything. */ -const char kTesseractReject = '~'; -/** Character used by UNLV error counter as a reject. */ -const char kUNLVReject = '~'; -/** Character used by UNLV as a suspect marker. */ -const char kUNLVSuspect = '^'; -/** - * Filename used for input image file, from which to derive a name to search - * for a possible UNLV zone file, if none is specified by SetInputName. - */ -const char* kInputFile = "noname.tif"; -/** - * Temp file used for storing current parameters before applying retry values. - */ -const char* kOldVarsFile = "failed_vars.txt"; -/** Max string length of an int. */ -const int kMaxIntSize = 22; - -/* Add all available languages recursively. -*/ -static void addAvailableLanguages(const STRING &datadir, const STRING &base, - GenericVector* langs) -{ - const STRING base2 = (base.string()[0] == '\0') ? base : base + "/"; - const size_t extlen = sizeof(kTrainedDataSuffix); -#ifdef _WIN32 - WIN32_FIND_DATA data; -#if (defined WINAPI_FAMILY) && (WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP) /* windows but not desktop environment */ - const int strBufferSize = 4096; - wchar_t* w_string = new wchar_t[strBufferSize]; - MultiByteToWideChar(CP_ACP, 0, (datadir + base2 + "*").string(), -1, w_string, strBufferSize); - HANDLE handle = FindFirstFile(w_string, &data); -#else - HANDLE handle = FindFirstFile((datadir + base2 + "*").string(), &data); -#endif - if (handle != INVALID_HANDLE_VALUE) { - BOOL result = TRUE; - for (; result;) { -#if (defined WINAPI_FAMILY) && (WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP) /* windows but not desktop environment */ - char cstr[strBufferSize]; - size_t charsConverted; - wcstombs_s(&charsConverted, cstr, data.cFileName, wcslen(data.cFileName)); - char *name = cstr; -#else - char *name = data.cFileName; -#endif - // Skip '.', '..', and hidden files - if (name[0] != '.') { - if ((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == - FILE_ATTRIBUTE_DIRECTORY) { - addAvailableLanguages(datadir, base2 + name, langs); - } else { - size_t len = strlen(name); - if (len > extlen && name[len - extlen] == '.' && - strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { - name[len - extlen] = '\0'; - langs->push_back(base2 + name); - } - } - } - result = FindNextFile(handle, &data); - } - FindClose(handle); - } -#else // _WIN32 - DIR* dir = opendir((datadir + base).string()); - if (dir != nullptr) { - dirent *de; - while ((de = readdir(dir))) { - char *name = de->d_name; - // Skip '.', '..', and hidden files - if (name[0] != '.') { - struct stat st; - if (stat((datadir + base2 + name).string(), &st) == 0 && - (st.st_mode & S_IFDIR) == S_IFDIR) { - addAvailableLanguages(datadir, base2 + name, langs); - } else { - size_t len = strlen(name); - if (len > extlen && name[len - extlen] == '.' && - strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { - name[len - extlen] = '\0'; - langs->push_back(base2 + name); - } - } - } - } - closedir(dir); - } -#endif -} - -// Compare two STRING values (used for sorting). -static int CompareSTRING(const void* p1, const void* p2) { - const STRING* s1 = static_cast(p1); - const STRING* s2 = static_cast(p2); - return strcmp(s1->c_str(), s2->c_str()); -} - -TessBaseAPI::TessBaseAPI() - : tesseract_(nullptr), - osd_tesseract_(nullptr), - equ_detect_(nullptr), - reader_(nullptr), - // Thresholder is initialized to nullptr here, but will be set before use by: - // A constructor of a derived API, SetThresholder(), or - // created implicitly when used in InternalSetImage. - thresholder_(nullptr), - paragraph_models_(nullptr), - block_list_(nullptr), - page_res_(nullptr), - input_file_(nullptr), - output_file_(nullptr), - datapath_(nullptr), - language_(nullptr), - last_oem_requested_(OEM_DEFAULT), - recognition_done_(false), - truth_cb_(nullptr), - rect_left_(0), - rect_top_(0), - rect_width_(0), - rect_height_(0), - image_width_(0), - image_height_(0) { - const char *locale; - locale = std::setlocale(LC_ALL, nullptr); - ASSERT_HOST(!strcmp(locale, "C")); - locale = std::setlocale(LC_CTYPE, nullptr); - ASSERT_HOST(!strcmp(locale, "C")); - locale = std::setlocale(LC_NUMERIC, nullptr); - ASSERT_HOST(!strcmp(locale, "C")); -} - -TessBaseAPI::~TessBaseAPI() { - End(); -} - -/** - * Returns the version identifier as a static string. Do not delete. - */ -const char* TessBaseAPI::Version() { - return PACKAGE_VERSION; -} - -/** - * If compiled with OpenCL AND an available OpenCL - * device is deemed faster than serial code, then - * "device" is populated with the cl_device_id - * and returns sizeof(cl_device_id) - * otherwise *device=nullptr and returns 0. - */ -#ifdef USE_OPENCL -#ifdef USE_DEVICE_SELECTION -#include "opencl_device_selection.h" -#endif -#endif -size_t TessBaseAPI::getOpenCLDevice(void **data) { -#ifdef USE_OPENCL -#ifdef USE_DEVICE_SELECTION - ds_device device = OpenclDevice::getDeviceSelection(); - if (device.type == DS_DEVICE_OPENCL_DEVICE) { - *data = new cl_device_id; - memcpy(*data, &device.oclDeviceID, sizeof(cl_device_id)); - return sizeof(cl_device_id); - } -#endif -#endif - - *data = nullptr; - return 0; -} - -/** - * Writes the thresholded image to stderr as a PBM file on receipt of a - * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only). - */ -void TessBaseAPI::CatchSignals() { -#ifdef __linux__ - struct sigaction action; - memset(&action, 0, sizeof(action)); - action.sa_handler = &signal_exit; - action.sa_flags = SA_RESETHAND; - sigaction(SIGSEGV, &action, nullptr); - sigaction(SIGFPE, &action, nullptr); - sigaction(SIGBUS, &action, nullptr); -#else - // Warn API users that an implementation is needed. - tprintf("CatchSignals has no non-linux implementation!\n"); -#endif -} - -/** - * Set the name of the input file. Needed only for training and - * loading a UNLV zone file. - */ -void TessBaseAPI::SetInputName(const char* name) { - if (input_file_ == nullptr) - input_file_ = new STRING(name); - else - *input_file_ = name; -} - -/** Set the name of the output files. Needed only for debugging. */ -void TessBaseAPI::SetOutputName(const char* name) { - if (output_file_ == nullptr) - output_file_ = new STRING(name); - else - *output_file_ = name; -} - -bool TessBaseAPI::SetVariable(const char* name, const char* value) { - if (tesseract_ == nullptr) tesseract_ = new Tesseract; - return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY, - tesseract_->params()); -} - -bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) { - if (tesseract_ == nullptr) tesseract_ = new Tesseract; - return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, - tesseract_->params()); -} - -bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { - IntParam *p = ParamUtils::FindParam( - name, GlobalParams()->int_params, tesseract_->params()->int_params); - if (p == nullptr) return false; - *value = (int32_t)(*p); - return true; -} - -bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { - BoolParam *p = ParamUtils::FindParam( - name, GlobalParams()->bool_params, tesseract_->params()->bool_params); - if (p == nullptr) return false; - *value = (BOOL8)(*p); - return true; -} - -const char *TessBaseAPI::GetStringVariable(const char *name) const { - StringParam *p = ParamUtils::FindParam( - name, GlobalParams()->string_params, tesseract_->params()->string_params); - return (p != nullptr) ? p->string() : nullptr; -} - -bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { - DoubleParam *p = ParamUtils::FindParam( - name, GlobalParams()->double_params, tesseract_->params()->double_params); - if (p == nullptr) return false; - *value = (double)(*p); - return true; -} - -/** Get value of named variable as a string, if it exists. */ -bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) { - return ParamUtils::GetParamAsString(name, tesseract_->params(), val); -} - -/** Print Tesseract parameters to the given file. */ -void TessBaseAPI::PrintVariables(FILE *fp) const { - ParamUtils::PrintParams(fp, tesseract_->params()); -} - -/** - * The datapath must be the name of the data directory (no ending /) or - * some other file in which the data directory resides (for instance argv[0].) - * The language is (usually) an ISO 639-3 string or nullptr will default to eng. - * If numeric_mode is true, then only digits and Roman numerals will - * be returned. - * @return: 0 on success and -1 on initialization failure. - */ -int TessBaseAPI::Init(const char* datapath, const char* language, - OcrEngineMode oem, char **configs, int configs_size, - const GenericVector *vars_vec, - const GenericVector *vars_values, - bool set_only_non_debug_params) { - return Init(datapath, 0, language, oem, configs, configs_size, vars_vec, - vars_values, set_only_non_debug_params, nullptr); -} - -// In-memory version reads the traineddata file directly from the given -// data[data_size] array. Also implements the version with a datapath in data, -// flagged by data_size = 0. -int TessBaseAPI::Init(const char* data, int data_size, const char* language, - OcrEngineMode oem, char** configs, int configs_size, - const GenericVector* vars_vec, - const GenericVector* vars_values, - bool set_only_non_debug_params, FileReader reader) { - PERF_COUNT_START("TessBaseAPI::Init") - // Default language is "eng". - if (language == nullptr) language = "eng"; - STRING datapath = data_size == 0 ? data : language; - // If the datapath, OcrEngineMode or the language have changed - start again. - // Note that the language_ field stores the last requested language that was - // initialized successfully, while tesseract_->lang stores the language - // actually used. They differ only if the requested language was nullptr, in - // which case tesseract_->lang is set to the Tesseract default ("eng"). - if (tesseract_ != nullptr && - (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath || - last_oem_requested_ != oem || - (*language_ != language && tesseract_->lang != language))) { - delete tesseract_; - tesseract_ = nullptr; - } - // PERF_COUNT_SUB("delete tesseract_") -#ifdef USE_OPENCL - OpenclDevice od; - od.InitEnv(); -#endif - PERF_COUNT_SUB("OD::InitEnv()") - bool reset_classifier = true; - if (tesseract_ == nullptr) { - reset_classifier = false; - tesseract_ = new Tesseract; - if (reader != nullptr) reader_ = reader; - TessdataManager mgr(reader_); - if (data_size != 0) { - mgr.LoadMemBuffer(language, data, data_size); - } - if (tesseract_->init_tesseract( - datapath.string(), - output_file_ != nullptr ? output_file_->string() : nullptr, - language, oem, configs, configs_size, vars_vec, vars_values, - set_only_non_debug_params, &mgr) != 0) { - return -1; - } - } - - PERF_COUNT_SUB("update tesseract_") - // Update datapath and language requested for the last valid initialization. - if (datapath_ == nullptr) - datapath_ = new STRING(datapath); - else - *datapath_ = datapath; - if ((strcmp(datapath_->string(), "") == 0) && - (strcmp(tesseract_->datadir.string(), "") != 0)) - *datapath_ = tesseract_->datadir; - - if (language_ == nullptr) - language_ = new STRING(language); - else - *language_ = language; - last_oem_requested_ = oem; - -#ifndef DISABLED_LEGACY_ENGINE - // PERF_COUNT_SUB("update last_oem_requested_") - // For same language and datapath, just reset the adaptive classifier. - if (reset_classifier) { - tesseract_->ResetAdaptiveClassifier(); - PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()") - } -#endif // ndef DISABLED_LEGACY_ENGINE - PERF_COUNT_END - return 0; -} - -/** - * Returns the languages string used in the last valid initialization. - * If the last initialization specified "deu+hin" then that will be - * returned. If hin loaded eng automatically as well, then that will - * not be included in this list. To find the languages actually - * loaded use GetLoadedLanguagesAsVector. - * The returned string should NOT be deleted. - */ -const char* TessBaseAPI::GetInitLanguagesAsString() const { - return (language_ == nullptr || language_->string() == nullptr) ? - "" : language_->string(); -} - -/** - * Returns the loaded languages in the vector of STRINGs. - * Includes all languages loaded by the last Init, including those loaded - * as dependencies of other loaded languages. - */ -void TessBaseAPI::GetLoadedLanguagesAsVector( - GenericVector* langs) const { - langs->clear(); - if (tesseract_ != nullptr) { - langs->push_back(tesseract_->lang); - int num_subs = tesseract_->num_sub_langs(); - for (int i = 0; i < num_subs; ++i) - langs->push_back(tesseract_->get_sub_lang(i)->lang); - } -} - -/** - * Returns the available languages in the sorted vector of STRINGs. - */ -void TessBaseAPI::GetAvailableLanguagesAsVector( - GenericVector* langs) const { - langs->clear(); - if (tesseract_ != nullptr) { - addAvailableLanguages(tesseract_->datadir, "", langs); - langs->sort(CompareSTRING); - } -} - -//TODO(amit): Adapt to lstm -#ifndef DISABLED_LEGACY_ENGINE -/** - * Init only the lang model component of Tesseract. The only functions - * that work after this init are SetVariable and IsValidWord. - * WARNING: temporary! This function will be removed from here and placed - * in a separate API at some future time. - */ -int TessBaseAPI::InitLangMod(const char* datapath, const char* language) { - if (tesseract_ == nullptr) - tesseract_ = new Tesseract; - else - ParamUtils::ResetToDefaults(tesseract_->params()); - TessdataManager mgr; - return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr); -} -#endif // ndef DISABLED_LEGACY_ENGINE - -/** - * Init only for page layout analysis. Use only for calls to SetImage and - * AnalysePage. Calls that attempt recognition will generate an error. - */ -void TessBaseAPI::InitForAnalysePage() { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract; - #ifndef DISABLED_LEGACY_ENGINE - tesseract_->InitAdaptiveClassifier(nullptr); - #endif - } -} - -/** - * Read a "config" file containing a set of parameter name, value pairs. - * Searches the standard places: tessdata/configs, tessdata/tessconfigs - * and also accepts a relative or absolute path name. - */ -void TessBaseAPI::ReadConfigFile(const char* filename) { - tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY); -} - -/** Same as above, but only set debug params from the given config file. */ -void TessBaseAPI::ReadDebugConfigFile(const char* filename) { - tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY); -} - -/** - * Set the current page segmentation mode. Defaults to PSM_AUTO. - * The mode is stored as an IntParam so it can also be modified by - * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). - */ -void TessBaseAPI::SetPageSegMode(PageSegMode mode) { - if (tesseract_ == nullptr) - tesseract_ = new Tesseract; - tesseract_->tessedit_pageseg_mode.set_value(mode); -} - -/** Return the current page segmentation mode. */ -PageSegMode TessBaseAPI::GetPageSegMode() const { - if (tesseract_ == nullptr) - return PSM_SINGLE_BLOCK; - return static_cast( - static_cast(tesseract_->tessedit_pageseg_mode)); -} - -/** - * Recognize a rectangle from an image and return the result as a string. - * May be called many times for a single Init. - * Currently has no error checking. - * Greyscale of 8 and color of 24 or 32 bits per pixel may be given. - * Palette color images will not work properly and must be converted to - * 24 bit. - * Binary images of 1 bit per pixel may also be given but they must be - * byte packed with the MSB of the first byte being the first pixel, and a - * one pixel is WHITE. For binary images set bytes_per_pixel=0. - * The recognized text is returned as a char* which is coded - * as UTF8 and must be freed with the delete [] operator. - */ -char* TessBaseAPI::TesseractRect(const unsigned char* imagedata, - int bytes_per_pixel, - int bytes_per_line, - int left, int top, - int width, int height) { - if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize) - return nullptr; // Nothing worth doing. - - // Since this original api didn't give the exact size of the image, - // we have to invent a reasonable value. - int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8; - SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top, - bytes_per_pixel, bytes_per_line); - SetRectangle(left, top, width, height); - - return GetUTF8Text(); -} - -#ifndef DISABLED_LEGACY_ENGINE -/** - * Call between pages or documents etc to free up memory and forget - * adaptive data. - */ -void TessBaseAPI::ClearAdaptiveClassifier() { - if (tesseract_ == nullptr) - return; - tesseract_->ResetAdaptiveClassifier(); - tesseract_->ResetDocumentDictionary(); -} -#endif // ndef DISABLED_LEGACY_ENGINE - -/** - * Provide an image for Tesseract to recognize. Format is as - * TesseractRect above. Copies the image buffer and converts to Pix. - * SetImage clears all recognition results, and sets the rectangle to the - * full image, so it may be followed immediately by a GetUTF8Text, and it - * will automatically perform recognition. - */ -void TessBaseAPI::SetImage(const unsigned char* imagedata, - int width, int height, - int bytes_per_pixel, int bytes_per_line) { - if (InternalSetImage()) { - thresholder_->SetImage(imagedata, width, height, - bytes_per_pixel, bytes_per_line); - SetInputImage(thresholder_->GetPixRect()); - } -} - -void TessBaseAPI::SetSourceResolution(int ppi) { - if (thresholder_) - thresholder_->SetSourceYResolution(ppi); - else - tprintf("Please call SetImage before SetSourceResolution.\n"); -} - -/** - * Provide an image for Tesseract to recognize. As with SetImage above, - * Tesseract takes its own copy of the image, so it need not persist until - * after Recognize. - * Pix vs raw, which to use? - * Use Pix where possible. Tesseract uses Pix as its internal representation - * and it is therefore more efficient to provide a Pix directly. - */ -void TessBaseAPI::SetImage(Pix* pix) { - if (InternalSetImage()) { - if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) { - // remove alpha channel from png - PIX* p1 = pixRemoveAlpha(pix); - pixSetSpp(p1, 3); - pix = pixCopy(nullptr, p1); - pixDestroy(&p1); - } - thresholder_->SetImage(pix); - SetInputImage(thresholder_->GetPixRect()); - } -} - -/** - * Restrict recognition to a sub-rectangle of the image. Call after SetImage. - * Each SetRectangle clears the recogntion results so multiple rectangles - * can be recognized with the same image. - */ -void TessBaseAPI::SetRectangle(int left, int top, int width, int height) { - if (thresholder_ == nullptr) - return; - thresholder_->SetRectangle(left, top, width, height); - ClearResults(); -} - -/** - * ONLY available after SetImage if you have Leptonica installed. - * Get a copy of the internal thresholded image from Tesseract. - */ -Pix* TessBaseAPI::GetThresholdedImage() { - if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr; - if (tesseract_->pix_binary() == nullptr && - !Threshold(tesseract_->mutable_pix_binary())) { - return nullptr; - } - return pixClone(tesseract_->pix_binary()); -} - -/** - * Get the result of page layout analysis as a leptonica-style - * Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - */ -Boxa* TessBaseAPI::GetRegions(Pixa** pixa) { - return GetComponentImages(RIL_BLOCK, false, pixa, nullptr); -} - -/** - * Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - * If blockids is not nullptr, the block-id of each line is also returned as an - * array of one element per line. delete [] after use. - * If paraids is not nullptr, the paragraph-id of each line within its block is - * also returned as an array of one element per line. delete [] after use. - */ -Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding, - Pixa** pixa, int** blockids, int** paraids) { - return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding, - pixa, blockids, paraids); -} - -/** - * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa - * pair, in reading order. Enables downstream handling of non-rectangular - * regions. - * Can be called before or after Recognize. - * If blockids is not nullptr, the block-id of each line is also returned as an - * array of one element per line. delete [] after use. - */ -Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) { - return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids); -} - -/** - * Get the words as a leptonica-style - * Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - */ -Boxa* TessBaseAPI::GetWords(Pixa** pixa) { - return GetComponentImages(RIL_WORD, true, pixa, nullptr); -} - -/** - * Gets the individual connected (text) components (created - * after pages segmentation step, but before recognition) - * as a leptonica-style Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - */ -Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) { - return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr); -} - -/** - * Get the given level kind of components (block, textline, word etc.) as a - * leptonica-style Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - * If blockids is not nullptr, the block-id of each component is also returned - * as an array of one element per component. delete [] after use. - * If text_only is true, then only text components are returned. - */ -Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level, - bool text_only, bool raw_image, - const int raw_padding, - Pixa** pixa, int** blockids, - int** paraids) { - PageIterator* page_it = GetIterator(); - if (page_it == nullptr) - page_it = AnalyseLayout(); - if (page_it == nullptr) - return nullptr; // Failed. - - // Count the components to get a size for the arrays. - int component_count = 0; - int left, top, right, bottom; - - TessResultCallback* get_bbox = nullptr; - if (raw_image) { - // Get bounding box in original raw image with padding. - get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox, - level, raw_padding, - &left, &top, &right, &bottom); - } else { - // Get bounding box from binarized imaged. Note that this could be - // differently scaled from the original image. - get_bbox = NewPermanentTessCallback(page_it, - &PageIterator::BoundingBoxInternal, - level, &left, &top, &right, &bottom); - } - do { - if (get_bbox->Run() && - (!text_only || PTIsTextType(page_it->BlockType()))) - ++component_count; - } while (page_it->Next(level)); - - Boxa* boxa = boxaCreate(component_count); - if (pixa != nullptr) - *pixa = pixaCreate(component_count); - if (blockids != nullptr) - *blockids = new int[component_count]; - if (paraids != nullptr) - *paraids = new int[component_count]; - - int blockid = 0; - int paraid = 0; - int component_index = 0; - page_it->Begin(); - do { - if (get_bbox->Run() && - (!text_only || PTIsTextType(page_it->BlockType()))) { - Box* lbox = boxCreate(left, top, right - left, bottom - top); - boxaAddBox(boxa, lbox, L_INSERT); - if (pixa != nullptr) { - Pix* pix = nullptr; - if (raw_image) { - pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left, - &top); - } else { - pix = page_it->GetBinaryImage(level); - } - pixaAddPix(*pixa, pix, L_INSERT); - pixaAddBox(*pixa, lbox, L_CLONE); - } - if (paraids != nullptr) { - (*paraids)[component_index] = paraid; - if (page_it->IsAtFinalElement(RIL_PARA, level)) - ++paraid; - } - if (blockids != nullptr) { - (*blockids)[component_index] = blockid; - if (page_it->IsAtFinalElement(RIL_BLOCK, level)) { - ++blockid; - paraid = 0; - } - } - ++component_index; - } - } while (page_it->Next(level)); - delete page_it; - delete get_bbox; - return boxa; -} - -int TessBaseAPI::GetThresholdedImageScaleFactor() const { - if (thresholder_ == nullptr) { - return 0; - } - return thresholder_->GetScaleFactor(); -} - -/** - * Runs page layout analysis in the mode set by SetPageSegMode. - * May optionally be called prior to Recognize to get access to just - * the page layout results. Returns an iterator to the results. - * If merge_similar_words is true, words are combined where suitable for use - * with a line recognizer. Use if you want to use AnalyseLayout to find the - * textlines, and then want to process textline fragments with an external - * line recognizer. - * Returns nullptr on error or an empty page. - * The returned iterator must be deleted after use. - * WARNING! This class points to data held within the TessBaseAPI class, and - * therefore can only be used while the TessBaseAPI class still exists and - * has not been subjected to a call of Init, SetImage, Recognize, Clear, End - * DetectOS, or anything else that changes the internal PAGE_RES. - */ -PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); } - -PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) { - if (FindLines() == 0) { - if (block_list_->empty()) - return nullptr; // The page was empty. - page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr); - DetectParagraphs(false); - return new PageIterator( - page_res_, tesseract_, thresholder_->GetScaleFactor(), - thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); - } - return nullptr; -} - -/** - * Recognize the tesseract global image and return the result as Tesseract - * internal structures. - */ -int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { - if (tesseract_ == nullptr) - return -1; - if (FindLines() != 0) - return -1; - delete page_res_; - if (block_list_->empty()) { - page_res_ = new PAGE_RES(false, block_list_, - &tesseract_->prev_word_best_choice_); - return 0; // Empty page. - } - - tesseract_->SetBlackAndWhitelist(); - recognition_done_ = true; -#ifndef DISABLED_LEGACY_ENGINE - if (tesseract_->tessedit_resegment_from_line_boxes) { - page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_); - } else if (tesseract_->tessedit_resegment_from_boxes) { - page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_); - } else -#endif // ndef DISABLED_LEGACY_ENGINE - { - page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(), - block_list_, &tesseract_->prev_word_best_choice_); - } - - if (page_res_ == nullptr) { - return -1; - } - - if (tesseract_->tessedit_train_line_recognizer) { - tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_); - tesseract_->CorrectClassifyWords(page_res_); - return 0; - } -#ifndef DISABLED_LEGACY_ENGINE - if (tesseract_->tessedit_make_boxes_from_boxes) { - tesseract_->CorrectClassifyWords(page_res_); - return 0; - } -#endif // ndef DISABLED_LEGACY_ENGINE - - if (truth_cb_ != nullptr) { - tesseract_->wordrec_run_blamer.set_value(true); - PageIterator *page_it = new PageIterator( - page_res_, tesseract_, thresholder_->GetScaleFactor(), - thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); - truth_cb_->Run(tesseract_->getDict().getUnicharset(), - image_height_, page_it, this->tesseract()->pix_grey()); - delete page_it; - } - - int result = 0; - if (tesseract_->interactive_display_mode) { - #ifndef GRAPHICS_DISABLED - tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); - #endif // GRAPHICS_DISABLED - // The page_res is invalid after an interactive session, so cleanup - // in a way that lets us continue to the next page without crashing. - delete page_res_; - page_res_ = nullptr; - return -1; - #ifndef DISABLED_LEGACY_ENGINE - } else if (tesseract_->tessedit_train_from_boxes) { - STRING fontname; - ExtractFontName(*output_file_, &fontname); - tesseract_->ApplyBoxTraining(fontname, page_res_); - } else if (tesseract_->tessedit_ambigs_training) { - FILE *training_output_file = tesseract_->init_recog_training(*input_file_); - // OCR the page segmented into words by tesseract. - tesseract_->recog_training_segmented( - *input_file_, page_res_, monitor, training_output_file); - fclose(training_output_file); - #endif // ndef DISABLED_LEGACY_ENGINE - } else { - // Now run the main recognition. - bool wait_for_text = true; - GetBoolVariable("paragraph_text_based", &wait_for_text); - if (!wait_for_text) DetectParagraphs(false); - if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) { - if (wait_for_text) DetectParagraphs(true); - } else { - result = -1; - } - } - return result; -} - -#ifndef DISABLED_LEGACY_ENGINE -/** Tests the chopper by exhaustively running chop_one_blob. */ -int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) { - if (tesseract_ == nullptr) - return -1; - if (thresholder_ == nullptr || thresholder_->IsEmpty()) { - tprintf("Please call SetImage before attempting recognition.\n"); - return -1; - } - if (page_res_ != nullptr) - ClearResults(); - if (FindLines() != 0) - return -1; - // Additional conditions under which chopper test cannot be run - if (tesseract_->interactive_display_mode) return -1; - - recognition_done_ = true; - - page_res_ = new PAGE_RES(false, block_list_, - &(tesseract_->prev_word_best_choice_)); - - PAGE_RES_IT page_res_it(page_res_); - - while (page_res_it.word() != nullptr) { - WERD_RES *word_res = page_res_it.word(); - GenericVector boxes; - tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block, - page_res_it.row()->row, word_res); - page_res_it.forward(); - } - return 0; -} -#endif // ndef DISABLED_LEGACY_ENGINE - -// Takes ownership of the input pix. -void TessBaseAPI::SetInputImage(Pix* pix) { tesseract_->set_pix_original(pix); } - -Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); } - -const char * TessBaseAPI::GetInputName() { - if (input_file_) - return input_file_->c_str(); - return nullptr; -} - -const char * TessBaseAPI::GetDatapath() { - return tesseract_->datadir.c_str(); -} - -int TessBaseAPI::GetSourceYResolution() { - return thresholder_->GetSourceYResolution(); -} - -// If flist exists, get data from there. Otherwise get data from buf. -// Seems convoluted, but is the easiest way I know of to meet multiple -// goals. Support streaming from stdin, and also work on platforms -// lacking fmemopen. -bool TessBaseAPI::ProcessPagesFileList(FILE *flist, - STRING *buf, - const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer, - int tessedit_page_number) { - if (!flist && !buf) return false; - int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; - char pagename[MAX_PATH]; - - GenericVector lines; - if (!flist) { - buf->split('\n', &lines); - if (lines.empty()) return false; - } - - // Skip to the requested page number. - for (int i = 0; i < page; i++) { - if (flist) { - if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; - } - } - - // Begin producing output - if (renderer && !renderer->BeginDocument(unknown_title_)) { - return false; - } - - // Loop over all pages - or just the requested one - while (true) { - if (flist) { - if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; - } else { - if (page >= lines.size()) break; - snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str()); - } - chomp_string(pagename); - Pix *pix = pixRead(pagename); - if (pix == nullptr) { - tprintf("Image file %s cannot be read!\n", pagename); - return false; - } - tprintf("Page %d : %s\n", page, pagename); - bool r = ProcessPage(pix, page, pagename, retry_config, - timeout_millisec, renderer); - pixDestroy(&pix); - if (!r) return false; - if (tessedit_page_number >= 0) break; - ++page; - } - - // Finish producing output - if (renderer && !renderer->EndDocument()) { - return false; - } - return true; -} - -bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, - size_t size, - const char* filename, - const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer, - int tessedit_page_number) { - return false; -/* -#ifndef ANDROID_BUILD - Pix *pix = nullptr; - int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; - size_t offset = 0; - for (; ; ++page) { - if (tessedit_page_number >= 0) - page = tessedit_page_number; - pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) - : pixReadFromMultipageTiff(filename, &offset); - if (pix == nullptr) break; - tprintf("Page %d\n", page + 1); - char page_str[kMaxIntSize]; - snprintf(page_str, kMaxIntSize - 1, "%d", page); - SetVariable("applybox_page", page_str); - bool r = ProcessPage(pix, page, filename, retry_config, - timeout_millisec, renderer); - pixDestroy(&pix); - if (!r) return false; - if (tessedit_page_number >= 0) break; - if (!offset) break; - } - return true; -#else - return false; -#endif -*/ -} - -// Master ProcessPages calls ProcessPagesInternal and then does any post- -// processing required due to being in a training mode. -bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer) { - bool result = - ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer); - #ifndef DISABLED_LEGACY_ENGINE - if (result) { - if (tesseract_->tessedit_train_from_boxes && - !tesseract_->WriteTRFile(*output_file_)) { - tprintf("Write of TR file failed: %s\n", output_file_->string()); - return false; - } - } - #endif // ndef DISABLED_LEGACY_ENGINE - return result; -} - -// In the ideal scenario, Tesseract will start working on data as soon -// as it can. For example, if you stream a filelist through stdin, we -// should start the OCR process as soon as the first filename is -// available. This is particularly useful when hooking Tesseract up to -// slow hardware such as a book scanning machine. -// -// Unfortunately there are tradeoffs. You can't seek on stdin. That -// makes automatic detection of datatype (TIFF? filelist? PNG?) -// impractical. So we support a command line flag to explicitly -// identify the scenario that really matters: filelists on -// stdin. We'll still do our best if the user likes pipes. -bool TessBaseAPI::ProcessPagesInternal(const char* filename, - const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer) { - PERF_COUNT_START("ProcessPages") - bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-"); - if (stdInput) { -#ifdef WIN32 - if (_setmode(_fileno(stdin), _O_BINARY) == -1) - tprintf("ERROR: cin to binary: %s", strerror(errno)); -#endif // WIN32 - } - - if (stream_filelist) { - return ProcessPagesFileList(stdin, nullptr, retry_config, - timeout_millisec, renderer, - tesseract_->tessedit_page_number); - } - - // At this point we are officially in autodection territory. - // That means any data in stdin must be buffered, to make it - // seekable. - std::string buf; - const l_uint8 *data = nullptr; - if (stdInput) { - buf.assign((std::istreambuf_iterator(std::cin)), - (std::istreambuf_iterator())); - data = reinterpret_cast(buf.data()); - } else { - // Check whether the input file can be read. - if (FILE* file = fopen(filename, "rb")) { - fclose(file); - } else { -#ifdef WIN32 - fprintf(stderr, "Error, cannot read input file %s: %s\n", - filename, strerror(errno)); -#endif - return false; - } - } - - // Here is our autodetection - int format; - int r = (stdInput) ? - findFileFormatBuffer(data, &format) : - findFileFormat(filename, &format); - - // Maybe we have a filelist - if (r != 0 || format == IFF_UNKNOWN) { - STRING s; - if (stdInput) { - s = buf.c_str(); - } else { - std::ifstream t(filename); - std::string u((std::istreambuf_iterator(t)), - std::istreambuf_iterator()); - s = u.c_str(); - } - return ProcessPagesFileList(nullptr, &s, retry_config, - timeout_millisec, renderer, - tesseract_->tessedit_page_number); - } - - // Maybe we have a TIFF which is potentially multipage - bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || - format == IFF_TIFF_RLE || format == IFF_TIFF_G3 || - format == IFF_TIFF_G4 || format == IFF_TIFF_LZW || - format == IFF_TIFF_ZIP); - - // Fail early if we can, before producing any output - Pix *pix = nullptr; - if (!tiff) { - pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename); - if (pix == nullptr) { - return false; - } - } - - // Begin the output - if (renderer && !renderer->BeginDocument(unknown_title_)) { - pixDestroy(&pix); - return false; - } - - // Produce output - r = (tiff) ? - ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config, - timeout_millisec, renderer, - tesseract_->tessedit_page_number) : - ProcessPage(pix, 0, filename, retry_config, - timeout_millisec, renderer); - - // Clean up memory as needed - pixDestroy(&pix); - - // End the output - if (!r || (renderer && !renderer->EndDocument())) { - return false; - } - PERF_COUNT_END - return true; -} - -bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename, - const char* retry_config, int timeout_millisec, - TessResultRenderer* renderer) { - PERF_COUNT_START("ProcessPage") - SetInputName(filename); - SetImage(pix); - bool failed = false; - - if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) { - // Disabled character recognition - PageIterator* it = AnalyseLayout(); - - if (it == nullptr) { - failed = true; - } else { - delete it; - } - } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) { - failed = FindLines() != 0; - } else if (timeout_millisec > 0) { - // Running with a timeout. - ETEXT_DESC monitor; - monitor.cancel = nullptr; - monitor.cancel_this = nullptr; - monitor.set_deadline_msecs(timeout_millisec); - - // Now run the main recognition. - failed = Recognize(&monitor) < 0; - } else { - // Normal layout and character recognition with no timeout. - failed = Recognize(nullptr) < 0; - } - - if (tesseract_->tessedit_write_images) { -#ifndef ANDROID_BUILD - Pix* page_pix = GetThresholdedImage(); - pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4); -#endif // ANDROID_BUILD - } - - if (failed && retry_config != nullptr && retry_config[0] != '\0') { - // Save current config variables before switching modes. - FILE* fp = fopen(kOldVarsFile, "wb"); - if (fp == nullptr) { - tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile); - } else { - PrintVariables(fp); - fclose(fp); - } - // Switch to alternate mode for retry. - ReadConfigFile(retry_config); - SetImage(pix); - Recognize(nullptr); - // Restore saved config variables. - ReadConfigFile(kOldVarsFile); - } - - if (renderer && !failed) { - failed = !renderer->AddImage(this); - } - - PERF_COUNT_END - return !failed; -} - -/** - * Get a left-to-right iterator to the results of LayoutAnalysis and/or - * Recognize. The returned iterator must be deleted after use. - */ -LTRResultIterator* TessBaseAPI::GetLTRIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) - return nullptr; - return new LTRResultIterator( - page_res_, tesseract_, - thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); -} - -/** - * Get a reading-order iterator to the results of LayoutAnalysis and/or - * Recognize. The returned iterator must be deleted after use. - * WARNING! This class points to data held within the TessBaseAPI class, and - * therefore can only be used while the TessBaseAPI class still exists and - * has not been subjected to a call of Init, SetImage, Recognize, Clear, End - * DetectOS, or anything else that changes the internal PAGE_RES. - */ -ResultIterator* TessBaseAPI::GetIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) - return nullptr; - return ResultIterator::StartOfParagraph(LTRResultIterator( - page_res_, tesseract_, - thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_)); -} - -/** - * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. - * The returned iterator must be deleted after use. - * WARNING! This class points to data held within the TessBaseAPI class, and - * therefore can only be used while the TessBaseAPI class still exists and - * has not been subjected to a call of Init, SetImage, Recognize, Clear, End - * DetectOS, or anything else that changes the internal PAGE_RES. - */ -MutableIterator* TessBaseAPI::GetMutableIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) - return nullptr; - return new MutableIterator(page_res_, tesseract_, - thresholder_->GetScaleFactor(), - thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); -} - -/** Make a text string from the internal data structures. */ -char* TessBaseAPI::GetUTF8Text() { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - STRING text(""); - ResultIterator *it = GetIterator(); - do { - if (it->Empty(RIL_PARA)) continue; - const std::unique_ptr para_text(it->GetUTF8Text(RIL_PARA)); - text += para_text.get(); - } while (it->Next(RIL_PARA)); - char* result = new char[text.length() + 1]; - strncpy(result, text.string(), text.length() + 1); - delete it; - return result; -} - -/** - * Gets the block orientation at the current iterator position. - */ -static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) { - tesseract::Orientation orientation; - tesseract::WritingDirection writing_direction; - tesseract::TextlineOrder textline_order; - float deskew_angle; - it->Orientation(&orientation, &writing_direction, &textline_order, - &deskew_angle); - return orientation; -} - -/** - * Fits a line to the baseline at the given level, and appends its coefficients - * to the hOCR string. - * NOTE: The hOCR spec is unclear on how to specify baseline coefficients for - * rotated textlines. For this reason, on textlines that are not upright, this - * method currently only inserts a 'textangle' property to indicate the rotation - * direction and does not add any baseline information to the hocr string. - */ -static void AddBaselineCoordsTohOCR(const PageIterator *it, - PageIteratorLevel level, - STRING* hocr_str) { - tesseract::Orientation orientation = GetBlockTextOrientation(it); - if (orientation != ORIENTATION_PAGE_UP) { - hocr_str->add_str_int("; textangle ", 360 - orientation * 90); - return; - } - - int left, top, right, bottom; - it->BoundingBox(level, &left, &top, &right, &bottom); - - // Try to get the baseline coordinates at this level. - int x1, y1, x2, y2; - if (!it->Baseline(level, &x1, &y1, &x2, &y2)) - return; - // Following the description of this field of the hOCR spec, we convert the - // baseline coordinates so that "the bottom left of the bounding box is the - // origin". - x1 -= left; - x2 -= left; - y1 -= bottom; - y2 -= bottom; - - // Now fit a line through the points so we can extract coefficients for the - // equation: y = p1 x + p0 - double p1 = 0; - double p0 = 0; - if (x1 == x2) { - // Problem computing the polynomial coefficients. - return; - } - p1 = (y2 - y1) / static_cast(x2 - x1); - p0 = y1 - static_cast(p1 * x1); - - hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0); - hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0); -} - -static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, - int num2) { - const size_t BUFSIZE = 64; - char id_buffer[BUFSIZE]; - if (num2 >= 0) { - snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2); - } else { - snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1); - } - id_buffer[BUFSIZE - 1] = '\0'; - *hocr_str += " id='"; - *hocr_str += id_buffer; - *hocr_str += "'"; -} - -static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, - int num2, int num3) { - const size_t BUFSIZE = 64; - char id_buffer[BUFSIZE]; - snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d_%d", base.c_str(), num1, num2,num3); - id_buffer[BUFSIZE - 1] = '\0'; - *hocr_str += " id='"; - *hocr_str += id_buffer; - *hocr_str += "'"; -} - -static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level, - STRING* hocr_str) { - int left, top, right, bottom; - it->BoundingBox(level, &left, &top, &right, &bottom); - // This is the only place we use double quotes instead of single quotes, - // but it may too late to change for consistency - hocr_str->add_str_int(" title=\"bbox ", left); - hocr_str->add_str_int(" ", top); - hocr_str->add_str_int(" ", right); - hocr_str->add_str_int(" ", bottom); - // Add baseline coordinates & heights for textlines only. - if (level == RIL_TEXTLINE) { - AddBaselineCoordsTohOCR(it, level, hocr_str); - // add custom height measures - float row_height, descenders, ascenders; // row attributes - it->RowAttributes(&row_height, &descenders, &ascenders); - // TODO(rays): Do we want to limit these to a single decimal place? - hocr_str->add_str_double("; x_size ", row_height); - hocr_str->add_str_double("; x_descenders ", descenders * -1); - hocr_str->add_str_double("; x_ascenders ", ascenders); - } - *hocr_str += "\">"; -} - -static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level, - STRING* hocr_str) { - int left, top, right, bottom; - it->BoundingBox(level, &left, &top, &right, &bottom); - hocr_str->add_str_int("\t", left); - hocr_str->add_str_int("\t", top); - hocr_str->add_str_int("\t", right - left); - hocr_str->add_str_int("\t", bottom - top); -} - -/** - * Make a HTML-formatted string with hOCR markup from the internal - * data structures. - * page_number is 0-based but will appear in the output as 1-based. - * Image name/input_file_ can be set by SetInputName before calling - * GetHOCRText - * STL removed from original patch submission and refactored by rays. - * Returned string must be freed with the delete [] operator. - */ -char* TessBaseAPI::GetHOCRText(int page_number) { - return GetHOCRText(nullptr, page_number); -} - -/** - * Make a HTML-formatted string with hOCR markup from the internal - * data structures. - * page_number is 0-based but will appear in the output as 1-based. - * Image name/input_file_ can be set by SetInputName before calling - * GetHOCRText - * STL removed from original patch submission and refactored by rays. - * Returned string must be freed with the delete [] operator. - */ -char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { - if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) - return nullptr; - - int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1; - int page_id = page_number + 1; // hOCR uses 1-based page numbers. - bool para_is_ltr = true; // Default direction is LTR - const char* paragraph_lang = nullptr; - bool font_info = false; - GetBoolVariable("hocr_font_info", &font_info); - - STRING hocr_str(""); - - if (input_file_ == nullptr) - SetInputName(nullptr); - -#ifdef _WIN32 - // convert input name from ANSI encoding to utf-8 - int str16_len = - MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0); - wchar_t *uni16_str = new WCHAR[str16_len]; - str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, - uni16_str, str16_len); - int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, - nullptr, nullptr); - char *utf8_str = new char[utf8_len]; - WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, - utf8_len, nullptr, nullptr); - *input_file_ = utf8_str; - delete[] uni16_str; - delete[] utf8_str; -#endif - - hocr_str += "
string()); - } else { - hocr_str += "unknown"; - } - hocr_str.add_str_int("\"; bbox ", rect_left_); - hocr_str.add_str_int(" ", rect_top_); - hocr_str.add_str_int(" ", rect_width_); - hocr_str.add_str_int(" ", rect_height_); - hocr_str.add_str_int("; ppageno ", page_number); - hocr_str += "'>\n"; - - ResultIterator *res_it = GetIterator(); - while (!res_it->Empty(RIL_BLOCK)) { - if (res_it->Empty(RIL_WORD)) { - res_it->Next(RIL_WORD); - continue; - } - - // Open any new block/paragraph/textline. - if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - para_is_ltr = true; // reset to default direction - hocr_str += "
IsAtBeginningOf(RIL_PARA)) { - hocr_str += "\n

ParagraphIsLtr(); - if (!para_is_ltr) { - hocr_str += " dir='rtl'"; - } - AddIdTohOCR(&hocr_str, "par", page_id, pcnt); - paragraph_lang = res_it->WordRecognitionLanguage(); - if (paragraph_lang) { - hocr_str += " lang='"; - hocr_str += paragraph_lang; - hocr_str += "'"; - } - AddBoxTohOCR(res_it, RIL_PARA, &hocr_str); - } - if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { - hocr_str += "\n >>* confidencemap = nullptr; - if (tesseract_->lstm_choice_mode) { - confidencemap = res_it->GetBestLSTMSymbolChoices(); - } - hocr_str += "\n BoundingBox(RIL_WORD, &left, &top, &right, &bottom); - font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, - &monospace, &serif, &smallcaps, - &pointsize, &font_id); - hocr_str.add_str_int(" title='bbox ", left); - hocr_str.add_str_int(" ", top); - hocr_str.add_str_int(" ", right); - hocr_str.add_str_int(" ", bottom); - hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD)); - if (font_info) { - if (font_name) { - hocr_str += "; x_font "; - hocr_str += HOcrEscape(font_name); - } - hocr_str.add_str_int("; x_fsize ", pointsize); - } - hocr_str += "'"; - const char* lang = res_it->WordRecognitionLanguage(); - if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) { - hocr_str += " lang='"; - hocr_str += lang; - hocr_str += "'"; - } - switch (res_it->WordDirection()) { - // Only emit direction if different from current paragraph direction - case DIR_LEFT_TO_RIGHT: - if (!para_is_ltr) hocr_str += " dir='ltr'"; - break; - case DIR_RIGHT_TO_LEFT: - if (para_is_ltr) hocr_str += " dir='rtl'"; - break; - case DIR_MIX: - case DIR_NEUTRAL: - default: // Do nothing. - break; - } - hocr_str += ">"; - bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD); - bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD); - bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD); - if (bold) hocr_str += ""; - if (italic) hocr_str += ""; - do { - const std::unique_ptr grapheme( - res_it->GetUTF8Text(RIL_SYMBOL)); - if (grapheme && grapheme[0] != 0) { - hocr_str += HOcrEscape(grapheme.get()); - } - res_it->Next(RIL_SYMBOL); - } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); - if (italic) hocr_str += ""; - if (bold) hocr_str += ""; - // If the lstm choice mode is required it is added here - if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) { - for (size_t i = 0; i < confidencemap->size(); i++) { - hocr_str += "\n > timestep = (*confidencemap)[i]; - for (std::pair conf : timestep) { - hocr_str += "lstm_choice_mode == 2 && confidencemap != nullptr) { - for (size_t i = 0; i < confidencemap->size(); i++) { - std::vector> timestep = (*confidencemap)[i]; - if (timestep.size() > 0) { - hocr_str += "\n Empty(RIL_BLOCK)) { - if (res_it->Empty(RIL_WORD)) { - res_it->Next(RIL_WORD); - continue; - } - - // Add rows for any new block/paragraph/textline. - if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - block_num++; - par_num = 0; - line_num = 0; - word_num = 0; - tsv_str.add_str_int("2\t", page_num); // level 2 - block - tsv_str.add_str_int("\t", block_num); - tsv_str.add_str_int("\t", par_num); - tsv_str.add_str_int("\t", line_num); - tsv_str.add_str_int("\t", word_num); - AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for block - } - if (res_it->IsAtBeginningOf(RIL_PARA)) { - par_num++; - line_num = 0; - word_num = 0; - tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph - tsv_str.add_str_int("\t", block_num); - tsv_str.add_str_int("\t", par_num); - tsv_str.add_str_int("\t", line_num); - tsv_str.add_str_int("\t", word_num); - AddBoxToTSV(res_it, RIL_PARA, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for para - } - if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { - line_num++; - word_num = 0; - tsv_str.add_str_int("4\t", page_num); // level 4 - line - tsv_str.add_str_int("\t", block_num); - tsv_str.add_str_int("\t", par_num); - tsv_str.add_str_int("\t", line_num); - tsv_str.add_str_int("\t", word_num); - AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for line - } - - // Now, process the word... - int left, top, right, bottom; - res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); - word_num++; - tsv_str.add_str_int("5\t", page_num); // level 5 - word - tsv_str.add_str_int("\t", block_num); - tsv_str.add_str_int("\t", par_num); - tsv_str.add_str_int("\t", line_num); - tsv_str.add_str_int("\t", word_num); - tsv_str.add_str_int("\t", left); - tsv_str.add_str_int("\t", top); - tsv_str.add_str_int("\t", right - left); - tsv_str.add_str_int("\t", bottom - top); - tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD)); - tsv_str += "\t"; - - // Increment counts if at end of block/paragraph/textline. - if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++; - if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++; - if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++; - - do { - tsv_str += - std::unique_ptr(res_it->GetUTF8Text(RIL_SYMBOL)).get(); - res_it->Next(RIL_SYMBOL); - } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); - tsv_str += "\n"; // end of row - wcnt++; - } - - char* ret = new char[tsv_str.length() + 1]; - strcpy(ret, tsv_str.string()); - delete res_it; - return ret; -} - -/** The 5 numbers output for each box (the usual 4 and a page number.) */ -const int kNumbersPerBlob = 5; -/** - * The number of bytes taken by each number. Since we use int16_t for ICOORD, - * assume only 5 digits max. - */ -const int kBytesPerNumber = 5; -/** - * Multiplier for max expected textlength assumes (kBytesPerNumber + space) - * * kNumbersPerBlob plus the newline. Add to this the - * original UTF8 characters, and one kMaxBytesPerLine for safety. - */ -const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1; -/** Max bytes in the decimal representation of int64_t. */ -const int kBytesPer64BitNumber = 20; -/** - * A maximal single box could occupy kNumbersPerBlob numbers at - * kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a - * space plus the newline and the maximum length of a UNICHAR. - * Test against this on each iteration for safety. - */ -const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + - UNICHAR_LEN; - -/** - * The recognized text is returned as a char* which is coded - * as a UTF8 box file. - * page_number is a 0-base page index that will appear in the box file. - * Returned string must be freed with the delete [] operator. - */ -char* TessBaseAPI::GetBoxText(int page_number) { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - int blob_count; - int utf8_length = TextLength(&blob_count); - int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + - kMaxBytesPerLine; - char* result = new char[total_length]; - result[0] = '\0'; - int output_length = 0; - LTRResultIterator* it = GetLTRIterator(); - do { - int left, top, right, bottom; - if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) { - const std::unique_ptr text( - it->GetUTF8Text(RIL_SYMBOL)); - // Tesseract uses space for recognition failure. Fix to a reject - // character, kTesseractReject so we don't create illegal box files. - for (int i = 0; text[i] != '\0'; ++i) { - if (text[i] == ' ') - text[i] = kTesseractReject; - } - snprintf(result + output_length, total_length - output_length, - "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom, - right, image_height_ - top, page_number); - output_length += strlen(result + output_length); - // Just in case... - if (output_length + kMaxBytesPerLine > total_length) - break; - } - } while (it->Next(RIL_SYMBOL)); - delete it; - return result; -} - -/** - * Conversion table for non-latin characters. - * Maps characters out of the latin set into the latin set. - * TODO(rays) incorporate this translation into unicharset. - */ -const int kUniChs[] = { - 0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0 -}; -/** Latin chars corresponding to the unicode chars above. */ -const int kLatinChs[] = { - 0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0 -}; - -/** - * The recognized text is returned as a char* which is coded - * as UNLV format Latin-1 with specific reject and suspect codes. - * Returned string must be freed with the delete [] operator. - */ -char* TessBaseAPI::GetUNLVText() { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - bool tilde_crunch_written = false; - bool last_char_was_newline = true; - bool last_char_was_tilde = false; - - int total_length = TextLength(nullptr); - PAGE_RES_IT page_res_it(page_res_); - char* result = new char[total_length]; - char* ptr = result; - for (page_res_it.restart_page(); page_res_it.word () != nullptr; - page_res_it.forward()) { - WERD_RES *word = page_res_it.word(); - // Process the current word. - if (word->unlv_crunch_mode != CR_NONE) { - if (word->unlv_crunch_mode != CR_DELETE && - (!tilde_crunch_written || - (word->unlv_crunch_mode == CR_KEEP_SPACE && - word->word->space() > 0 && - !word->word->flag(W_FUZZY_NON) && - !word->word->flag(W_FUZZY_SP)))) { - if (!word->word->flag(W_BOL) && - word->word->space() > 0 && - !word->word->flag(W_FUZZY_NON) && - !word->word->flag(W_FUZZY_SP)) { - /* Write a space to separate from preceding good text */ - *ptr++ = ' '; - last_char_was_tilde = false; - } - if (!last_char_was_tilde) { - // Write a reject char. - last_char_was_tilde = true; - *ptr++ = kUNLVReject; - tilde_crunch_written = true; - last_char_was_newline = false; - } - } - } else { - // NORMAL PROCESSING of non tilde crunched words. - tilde_crunch_written = false; - tesseract_->set_unlv_suspects(word); - const char* wordstr = word->best_choice->unichar_string().string(); - const STRING& lengths = word->best_choice->unichar_lengths(); - int length = lengths.length(); - int i = 0; - int offset = 0; - - if (last_char_was_tilde && - word->word->space() == 0 && wordstr[offset] == ' ') { - // Prevent adjacent tilde across words - we know that adjacent tildes - // within words have been removed. - // Skip the first character. - offset = lengths[i++]; - } - if (i < length && wordstr[offset] != 0) { - if (!last_char_was_newline) - *ptr++ = ' '; - else - last_char_was_newline = false; - for (; i < length; offset += lengths[i++]) { - if (wordstr[offset] == ' ' || - wordstr[offset] == kTesseractReject) { - *ptr++ = kUNLVReject; - last_char_was_tilde = true; - } else { - if (word->reject_map[i].rejected()) - *ptr++ = kUNLVSuspect; - UNICHAR ch(wordstr + offset, lengths[i]); - int uni_ch = ch.first_uni(); - for (int j = 0; kUniChs[j] != 0; ++j) { - if (kUniChs[j] == uni_ch) { - uni_ch = kLatinChs[j]; - break; - } - } - if (uni_ch <= 0xff) { - *ptr++ = static_cast(uni_ch); - last_char_was_tilde = false; - } else { - *ptr++ = kUNLVReject; - last_char_was_tilde = true; - } - } - } - } - } - if (word->word->flag(W_EOL) && !last_char_was_newline) { - /* Add a new line output */ - *ptr++ = '\n'; - tilde_crunch_written = false; - last_char_was_newline = true; - last_char_was_tilde = false; - } - } - *ptr++ = '\n'; - *ptr = '\0'; - return result; -} - -#ifndef DISABLED_LEGACY_ENGINE - -/** - * Detect the orientation of the input image and apparent script (alphabet). - * orient_deg is the detected clockwise rotation of the input image in degrees - * (0, 90, 180, 270) - * orient_conf is the confidence (15.0 is reasonably confident) - * script_name is an ASCII string, the name of the script, e.g. "Latin" - * script_conf is confidence level in the script - * Returns true on success and writes values to each parameter as an output - */ -bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf, - const char** script_name, - float* script_conf) { - OSResults osr; - - bool osd = DetectOS(&osr); - if (!osd) { - return false; - } - - int orient_id = osr.best_result.orientation_id; - int script_id = osr.get_best_script(orient_id); - if (orient_conf) *orient_conf = osr.best_result.oconfidence; - if (orient_deg) *orient_deg = orient_id * 90; // convert quadrant to degrees - - if (script_name) { - const char* script = osr.unicharset->get_script_from_script_id(script_id); - - *script_name = script; - } - - if (script_conf) *script_conf = osr.best_result.sconfidence; - - return true; -} - -/** - * The recognized text is returned as a char* which is coded - * as UTF8 and must be freed with the delete [] operator. - * page_number is a 0-based page index that will appear in the osd file. - */ -char* TessBaseAPI::GetOsdText(int page_number) { - int orient_deg; - float orient_conf; - const char* script_name; - float script_conf; - - if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, - &script_conf)) - return nullptr; - - // clockwise rotation needed to make the page upright - int rotate = OrientationIdToValue(orient_deg / 90); - - const int kOsdBufsize = 255; - char* osd_buf = new char[kOsdBufsize]; - snprintf(osd_buf, kOsdBufsize, - "Page number: %d\n" - "Orientation in degrees: %d\n" - "Rotate: %d\n" - "Orientation confidence: %.2f\n" - "Script: %s\n" - "Script confidence: %.2f\n", - page_number, orient_deg, rotate, orient_conf, script_name, - script_conf); - - return osd_buf; -} - -#endif // ndef DISABLED_LEGACY_ENGINE - -/** Returns the average word confidence for Tesseract page result. */ -int TessBaseAPI::MeanTextConf() { - int* conf = AllWordConfidences(); - if (!conf) return 0; - int sum = 0; - int *pt = conf; - while (*pt >= 0) sum += *pt++; - if (pt != conf) sum /= pt - conf; - delete [] conf; - return sum; -} - -/** Returns an array of all word confidences, terminated by -1. */ -int* TessBaseAPI::AllWordConfidences() { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - int n_word = 0; - PAGE_RES_IT res_it(page_res_); - for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) - n_word++; - - int* conf = new int[n_word+1]; - n_word = 0; - for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) { - WERD_RES *word = res_it.word(); - WERD_CHOICE* choice = word->best_choice; - int w_conf = static_cast(100 + 5 * choice->certainty()); - // This is the eq for converting Tesseract confidence to 1..100 - if (w_conf < 0) w_conf = 0; - if (w_conf > 100) w_conf = 100; - conf[n_word++] = w_conf; - } - conf[n_word] = -1; - return conf; -} - -#ifndef DISABLED_LEGACY_ENGINE -/** - * Applies the given word to the adaptive classifier if possible. - * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can - * tell the boundaries of the graphemes. - * Assumes that SetImage/SetRectangle have been used to set the image - * to the given word. The mode arg should be PSM_SINGLE_WORD or - * PSM_CIRCLE_WORD, as that will be used to control layout analysis. - * The currently set PageSegMode is preserved. - * Returns false if adaption was not possible for some reason. - */ -bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) { - int debug = 0; - GetIntVariable("applybox_debug", &debug); - bool success = true; - PageSegMode current_psm = GetPageSegMode(); - SetPageSegMode(mode); - SetVariable("classify_enable_learning", "0"); - const std::unique_ptr text(GetUTF8Text()); - if (debug) { - tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr); - } - if (text != nullptr) { - PAGE_RES_IT it(page_res_); - WERD_RES* word_res = it.word(); - if (word_res != nullptr) { - word_res->word->set_text(wordstr); - // Check to see if text matches wordstr. - int w = 0; - int t; - for (t = 0; text[t] != '\0'; ++t) { - if (text[t] == '\n' || text[t] == ' ') - continue; - while (wordstr[w] == ' ') ++w; - if (text[t] != wordstr[w]) - break; - ++w; - } - if (text[t] != '\0' || wordstr[w] != '\0') { - // No match. - delete page_res_; - GenericVector boxes; - page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_); - tesseract_->ReSegmentByClassification(page_res_); - tesseract_->TidyUp(page_res_); - PAGE_RES_IT pr_it(page_res_); - if (pr_it.word() == nullptr) - success = false; - else - word_res = pr_it.word(); - } else { - word_res->BestChoiceToCorrectText(); - } - if (success) { - tesseract_->EnableLearning = true; - tesseract_->LearnWord(nullptr, word_res); - } - } else { - success = false; - } - } else { - success = false; - } - SetPageSegMode(current_psm); - return success; -} -#endif // ndef DISABLED_LEGACY_ENGINE - -/** - * Free up recognition results and any stored image data, without actually - * freeing any recognition data that would be time-consuming to reload. - * Afterwards, you must call SetImage or TesseractRect before doing - * any Recognize or Get* operation. - */ -void TessBaseAPI::Clear() { - if (thresholder_ != nullptr) - thresholder_->Clear(); - ClearResults(); - if (tesseract_ != nullptr) SetInputImage(nullptr); -} - -/** - * Close down tesseract and free up all memory. End() is equivalent to - * destructing and reconstructing your TessBaseAPI. - * Once End() has been used, none of the other API functions may be used - * other than Init and anything declared above it in the class definition. - */ -void TessBaseAPI::End() { - Clear(); - delete thresholder_; - thresholder_ = nullptr; - delete page_res_; - page_res_ = nullptr; - delete block_list_; - block_list_ = nullptr; - if (paragraph_models_ != nullptr) { - paragraph_models_->delete_data_pointers(); - delete paragraph_models_; - paragraph_models_ = nullptr; - } - if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr; - delete tesseract_; - tesseract_ = nullptr; - delete osd_tesseract_; - osd_tesseract_ = nullptr; - delete equ_detect_; - equ_detect_ = nullptr; - delete input_file_; - input_file_ = nullptr; - delete output_file_; - output_file_ = nullptr; - delete datapath_; - datapath_ = nullptr; - delete language_; - language_ = nullptr; -} - -// Clear any library-level memory caches. -// There are a variety of expensive-to-load constant data structures (mostly -// language dictionaries) that are cached globally -- surviving the Init() -// and End() of individual TessBaseAPI's. This function allows the clearing -// of these caches. -void TessBaseAPI::ClearPersistentCache() { - Dict::GlobalDawgCache()->DeleteUnusedDawgs(); -} - -/** - * Check whether a word is valid according to Tesseract's language model - * returns 0 if the word is invalid, non-zero if valid - */ -int TessBaseAPI::IsValidWord(const char *word) { - return tesseract_->getDict().valid_word(word); -} -// Returns true if utf8_character is defined in the UniCharset. -bool TessBaseAPI::IsValidCharacter(const char *utf8_character) { - return tesseract_->unicharset.contains_unichar(utf8_character); -} - - -// TODO(rays) Obsolete this function and replace with a more aptly named -// function that returns image coordinates rather than tesseract coordinates. -bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) { - PageIterator* it = AnalyseLayout(); - if (it == nullptr) { - return false; - } - int x1, x2, y1, y2; - it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2); - // Calculate offset and slope (NOTE: Kind of ugly) - if (x2 <= x1) x2 = x1 + 1; - // Convert the point pair to slope/offset of the baseline (in image coords.) - *out_slope = static_cast(y2 - y1) / (x2 - x1); - *out_offset = static_cast(y1 - *out_slope * x1); - // Get the y-coord of the baseline at the left and right edges of the - // textline's bounding box. - int left, top, right, bottom; - if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) { - delete it; - return false; - } - int left_y = IntCastRounded(*out_slope * left + *out_offset); - int right_y = IntCastRounded(*out_slope * right + *out_offset); - // Shift the baseline down so it passes through the nearest bottom-corner - // of the textline's bounding box. This is the difference between the y - // at the lowest (max) edge of the box and the actual box bottom. - *out_offset += bottom - std::max(left_y, right_y); - // Switch back to bottom-up tesseract coordinates. Requires negation of - // the slope and height - offset for the offset. - *out_slope = -*out_slope; - *out_offset = rect_height_ - *out_offset; - delete it; - - return true; -} - -/** Sets Dict::letter_is_okay_ function to point to the given function. */ -void TessBaseAPI::SetDictFunc(DictFunc f) { - if (tesseract_ != nullptr) { - tesseract_->getDict().letter_is_okay_ = f; - } -} - -/** - * Sets Dict::probability_in_context_ function to point to the given - * function. - * - * @param f A single function that returns the probability of the current - * "character" (in general a utf-8 string), given the context of a previous - * utf-8 string. - */ -void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { - if (tesseract_ != nullptr) { - tesseract_->getDict().probability_in_context_ = f; - // Set it for the sublangs too. - int num_subs = tesseract_->num_sub_langs(); - for (int i = 0; i < num_subs; ++i) { - tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f; - } - } -} - -#ifndef DISABLED_LEGACY_ENGINE -/** Sets Wordrec::fill_lattice_ function to point to the given function. */ -void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) { - if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f; -} -#endif // ndef DISABLED_LEGACY_ENGINE - -/** Common code for setting the image. */ -bool TessBaseAPI::InternalSetImage() { - if (tesseract_ == nullptr) { - tprintf("Please call Init before attempting to set an image.\n"); - return false; - } - if (thresholder_ == nullptr) - thresholder_ = new ImageThresholder; - ClearResults(); - return true; -} - -/** - * Run the thresholder to make the thresholded image, returned in pix, - * which must not be nullptr. *pix must be initialized to nullptr, or point - * to an existing pixDestroyable Pix. - * The usual argument to Threshold is Tesseract::mutable_pix_binary(). - */ -bool TessBaseAPI::Threshold(Pix** pix) { - ASSERT_HOST(pix != nullptr); - if (*pix != nullptr) - pixDestroy(pix); - // Zero resolution messes up the algorithms, so make sure it is credible. - int user_dpi = 0; - bool a = GetIntVariable("user_defined_dpi", &user_dpi); - int y_res = thresholder_->GetScaledYResolution(); - if (user_dpi && (user_dpi < kMinCredibleResolution || - user_dpi > kMaxCredibleResolution)) { - tprintf("Warning: User defined image dpi is outside of expected range " - "(%d - %d)!\n", - kMinCredibleResolution, kMaxCredibleResolution); - } - // Always use user defined dpi - if (user_dpi) { - thresholder_->SetSourceYResolution(user_dpi); - } else if (y_res < kMinCredibleResolution || - y_res > kMaxCredibleResolution) { - tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n", - y_res, kMinCredibleResolution); - thresholder_->SetSourceYResolution(kMinCredibleResolution); - } - PageSegMode pageseg_mode = - static_cast( - static_cast(tesseract_->tessedit_pageseg_mode)); - if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false; - thresholder_->GetImageSizes(&rect_left_, &rect_top_, - &rect_width_, &rect_height_, - &image_width_, &image_height_); - if (!thresholder_->IsBinary()) { - tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds()); - tesseract_->set_pix_grey(thresholder_->GetPixRectGrey()); - } else { - tesseract_->set_pix_thresholds(nullptr); - tesseract_->set_pix_grey(nullptr); - } - // Set the internal resolution that is used for layout parameters from the - // estimated resolution, rather than the image resolution, which may be - // fabricated, but we will use the image resolution, if there is one, to - // report output point sizes. - int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(), - kMinCredibleResolution, - kMaxCredibleResolution); - if (estimated_res != thresholder_->GetScaledEstimatedResolution()) { - tprintf("Estimated internal resolution %d out of range! " - "Corrected to %d.\n", - thresholder_->GetScaledEstimatedResolution(), estimated_res); - } - tesseract_->set_source_resolution(estimated_res); - SavePixForCrash(estimated_res, *pix); - return true; -} - -/** Find lines from the image making the BLOCK_LIST. */ -int TessBaseAPI::FindLines() { - if (thresholder_ == nullptr || thresholder_->IsEmpty()) { - tprintf("Please call SetImage before attempting recognition.\n"); - return -1; - } - if (recognition_done_) - ClearResults(); - if (!block_list_->empty()) { - return 0; - } - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract; - #ifndef DISABLED_LEGACY_ENGINE - tesseract_->InitAdaptiveClassifier(nullptr); - #endif - } - if (tesseract_->pix_binary() == nullptr && - !Threshold(tesseract_->mutable_pix_binary())) { - return -1; - } - - tesseract_->PrepareForPageseg(); - -#ifndef DISABLED_LEGACY_ENGINE - if (tesseract_->textord_equation_detect) { - if (equ_detect_ == nullptr && datapath_ != nullptr) { - equ_detect_ = new EquationDetect(datapath_->string(), nullptr); - } - if (equ_detect_ == nullptr) { - tprintf("Warning: Could not set equation detector\n"); - } else { - tesseract_->SetEquationDetect(equ_detect_); - } - } -#endif // ndef DISABLED_LEGACY_ENGINE - - Tesseract* osd_tess = osd_tesseract_; - OSResults osr; - if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && - osd_tess == nullptr) { - if (strcmp(language_->string(), "osd") == 0) { - osd_tess = tesseract_; - } else { - osd_tesseract_ = new Tesseract; - TessdataManager mgr(reader_); - if (datapath_ == nullptr) { - tprintf("Warning: Auto orientation and script detection requested," - " but data path is undefined\n"); - delete osd_tesseract_; - osd_tesseract_ = nullptr; - } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr, - "osd", OEM_TESSERACT_ONLY, - nullptr, 0, nullptr, nullptr, - false, &mgr) == 0) { - osd_tess = osd_tesseract_; - osd_tesseract_->set_source_resolution( - thresholder_->GetSourceYResolution()); - } else { - tprintf("Warning: Auto orientation and script detection requested," - " but osd language failed to load\n"); - delete osd_tesseract_; - osd_tesseract_ = nullptr; - } - } - } - - if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0) - return -1; - - // If Devanagari is being recognized, we use different images for page seg - // and for OCR. - tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr); - return 0; -} - -/** Delete the pageres and clear the block list ready for a new page. */ -void TessBaseAPI::ClearResults() { - if (tesseract_ != nullptr) { - tesseract_->Clear(); - } - delete page_res_; - page_res_ = nullptr; - recognition_done_ = false; - if (block_list_ == nullptr) - block_list_ = new BLOCK_LIST; - else - block_list_->clear(); - if (paragraph_models_ != nullptr) { - paragraph_models_->delete_data_pointers(); - delete paragraph_models_; - paragraph_models_ = nullptr; - } - SavePixForCrash(0, nullptr); -} - -/** - * Return the length of the output text string, as UTF8, assuming - * liberally two spacing marks after each word (as paragraphs end with two - * newlines), and assuming a single character reject marker for each rejected - * character. - * Also return the number of recognized blobs in blob_count. - */ -int TessBaseAPI::TextLength(int* blob_count) { - if (tesseract_ == nullptr || page_res_ == nullptr) - return 0; - - PAGE_RES_IT page_res_it(page_res_); - int total_length = 2; - int total_blobs = 0; - // Iterate over the data structures to extract the recognition result. - for (page_res_it.restart_page(); page_res_it.word () != nullptr; - page_res_it.forward()) { - WERD_RES *word = page_res_it.word(); - WERD_CHOICE* choice = word->best_choice; - if (choice != nullptr) { - total_blobs += choice->length() + 2; - total_length += choice->unichar_string().length() + 2; - for (int i = 0; i < word->reject_map.length(); ++i) { - if (word->reject_map[i].rejected()) - ++total_length; - } - } - } - if (blob_count != nullptr) - *blob_count = total_blobs; - return total_length; -} - -#ifndef DISABLED_LEGACY_ENGINE -/** - * Estimates the Orientation And Script of the image. - * Returns true if the image was processed successfully. - */ -bool TessBaseAPI::DetectOS(OSResults* osr) { - if (tesseract_ == nullptr) - return false; - ClearResults(); - if (tesseract_->pix_binary() == nullptr && - !Threshold(tesseract_->mutable_pix_binary())) { - return false; - } - - if (input_file_ == nullptr) - input_file_ = new STRING(kInputFile); - return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0; -} -#endif // ndef DISABLED_LEGACY_ENGINE - -void TessBaseAPI::set_min_orientation_margin(double margin) { - tesseract_->min_orientation_margin.set_value(margin); -} - -/** - * Return text orientation of each block as determined in an earlier page layout - * analysis operation. Orientation is returned as the number of ccw 90-degree - * rotations (in [0..3]) required to make the text in the block upright - * (readable). Note that this may not necessary be the block orientation - * preferred for recognition (such as the case of vertical CJK text). - * - * Also returns whether the text in the block is believed to have vertical - * writing direction (when in an upright page orientation). - * - * The returned array is of length equal to the number of text blocks, which may - * be less than the total number of blocks. The ordering is intended to be - * consistent with GetTextLines(). - */ -void TessBaseAPI::GetBlockTextOrientations(int** block_orientation, - bool** vertical_writing) { - delete[] *block_orientation; - *block_orientation = nullptr; - delete[] *vertical_writing; - *vertical_writing = nullptr; - BLOCK_IT block_it(block_list_); - - block_it.move_to_first(); - int num_blocks = 0; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { - if (!block_it.data()->pdblk.poly_block()->IsText()) { - continue; - } - ++num_blocks; - } - if (!num_blocks) { - tprintf("WARNING: Found no blocks\n"); - return; - } - *block_orientation = new int[num_blocks]; - *vertical_writing = new bool[num_blocks]; - block_it.move_to_first(); - int i = 0; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { - if (!block_it.data()->pdblk.poly_block()->IsText()) { - continue; - } - FCOORD re_rotation = block_it.data()->re_rotation(); - float re_theta = re_rotation.angle(); - FCOORD classify_rotation = block_it.data()->classify_rotation(); - float classify_theta = classify_rotation.angle(); - double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI; - if (rot_theta < 0) rot_theta += 4; - int num_rotations = static_cast(rot_theta + 0.5); - (*block_orientation)[i] = num_rotations; - // The classify_rotation is non-zero only if the text has vertical - // writing direction. - (*vertical_writing)[i] = classify_rotation.y() != 0.0f; - ++i; - } -} - - -void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { - int debug_level = 0; - GetIntVariable("paragraph_debug_level", &debug_level); - if (paragraph_models_ == nullptr) - paragraph_models_ = new GenericVector; - MutableIterator *result_it = GetMutableIterator(); - do { // Detect paragraphs for this block - GenericVector models; - ::tesseract::DetectParagraphs(debug_level, after_text_recognition, - result_it, &models); - *paragraph_models_ += models; - } while (result_it->Next(RIL_BLOCK)); - delete result_it; -} - -/** This method returns the string form of the specified unichar. */ -const char* TessBaseAPI::GetUnichar(int unichar_id) { - return tesseract_->unicharset.id_to_unichar(unichar_id); -} - -/** Return the pointer to the i-th dawg loaded into tesseract_ object. */ -const Dawg *TessBaseAPI::GetDawg(int i) const { - if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr; - return tesseract_->getDict().GetDawg(i); -} - -/** Return the number of dawgs loaded into tesseract_ object. */ -int TessBaseAPI::NumDawgs() const { - return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs(); -} - -/** Escape a char string - remove <>&"' with HTML codes. */ -STRING HOcrEscape(const char* text) { - STRING ret; - const char *ptr; - for (ptr = text; *ptr; ptr++) { - switch (*ptr) { - case '<': ret += "<"; break; - case '>': ret += ">"; break; - case '&': ret += "&"; break; - case '"': ret += """; break; - case '\'': ret += "'"; break; - default: ret += *ptr; - } - } - return ret; -} - - -#ifndef DISABLED_LEGACY_ENGINE - - -// ____________________________________________________________________________ -// Ocropus add-ons. - -/** Find lines from the image making the BLOCK_LIST. */ -BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() { - ASSERT_HOST(FindLines() == 0); - BLOCK_LIST* result = block_list_; - block_list_ = nullptr; - return result; -} - -/** - * Delete a block list. - * This is to keep BLOCK_LIST pointer opaque - * and let go of including the other headers. - */ -void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) { - delete block_list; -} - - -ROW *TessBaseAPI::MakeTessOCRRow(float baseline, - float xheight, - float descender, - float ascender) { - int32_t xstarts[] = {-32000}; - double quad_coeffs[] = {0, 0, baseline}; - return new ROW(1, - xstarts, - quad_coeffs, - xheight, - ascender - (baseline + xheight), - descender - baseline, - 0, - 0); -} - -/** Creates a TBLOB* from the whole pix. */ -TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) { - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height); - - // Create C_BLOBs from the page - extract_edges(pix, &block); - - // Merge all C_BLOBs - C_BLOB_LIST *list = block.blob_list(); - C_BLOB_IT c_blob_it(list); - if (c_blob_it.empty()) - return nullptr; - // Move all the outlines to the first blob. - C_OUTLINE_IT ol_it(c_blob_it.data()->out_list()); - for (c_blob_it.forward(); - !c_blob_it.at_first(); - c_blob_it.forward()) { - C_BLOB *c_blob = c_blob_it.data(); - ol_it.add_list_after(c_blob->out_list()); - } - // Convert the first blob to the output TBLOB. - return TBLOB::PolygonalCopy(false, c_blob_it.data()); -} - -/** - * This method baseline normalizes a TBLOB in-place. The input row is used - * for normalization. The denorm is an optional parameter in which the - * normalization-antidote is returned. - */ -void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) { - TBOX box = tblob->bounding_box(); - float x_center = (box.left() + box.right()) / 2.0f; - float baseline = row->base_line(x_center); - float scale = kBlnXHeight / row->x_height(); - tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale, - 0.0f, static_cast(kBlnBaselineOffset), false, nullptr); -} - -/** - * Return a TBLOB * from the whole pix. - * To be freed later with delete. - */ -static TBLOB *make_tesseract_blob(float baseline, float xheight, - float descender, float ascender, - bool numeric_mode, Pix* pix) { - TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix); - - // Normalize TBLOB - ROW *row = - TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); - TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode); - delete row; - return tblob; -} - -/** - * Adapt to recognize the current image as the given character. - * The image must be preloaded into pix_binary_ and be just an image - * of a single character. - */ -void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, - int length, - float baseline, - float xheight, - float descender, - float ascender) { - UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length); - TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender, - tesseract_->classify_bln_numeric_mode, - tesseract_->pix_binary()); - float threshold; - float best_rating = -100; - - - // Classify to get a raw choice. - BLOB_CHOICE_LIST choices; - tesseract_->AdaptiveClassifier(blob, &choices); - BLOB_CHOICE_IT choice_it; - choice_it.set_to_list(&choices); - for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); - choice_it.forward()) { - if (choice_it.data()->rating() > best_rating) { - best_rating = choice_it.data()->rating(); - } - } - - threshold = tesseract_->matcher_good_threshold; - - if (blob->outlines) - tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold, - tesseract_->AdaptedTemplates); - delete blob; -} - - -PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) { - PAGE_RES *page_res = new PAGE_RES(false, block_list, - &(tesseract_->prev_word_best_choice_)); - tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1); - return page_res; -} - -PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list, - PAGE_RES* pass1_result) { - if (!pass1_result) - pass1_result = new PAGE_RES(false, block_list, - &(tesseract_->prev_word_best_choice_)); - tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2); - return pass1_result; -} - -struct TESS_CHAR : ELIST_LINK { - char *unicode_repr; - int length; // of unicode_repr - float cost; - TBOX box; - - TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) { - length = (len == -1 ? strlen(repr) : len); - unicode_repr = new char[length + 1]; - strncpy(unicode_repr, repr, length); - } - - TESS_CHAR() - : unicode_repr(nullptr), - length(0), - cost(0.0f) - { // Satisfies ELISTIZE. - } - ~TESS_CHAR() { - delete [] unicode_repr; - } -}; - -ELISTIZEH(TESS_CHAR) -ELISTIZE(TESS_CHAR) - -static void add_space(TESS_CHAR_IT* it) { - TESS_CHAR *t = new TESS_CHAR(0, " "); - it->add_after_then_move(t); -} - - -static float rating_to_cost(float rating) { - rating = 100 + rating; - // cuddled that to save from coverage profiler - // (I have never seen ratings worse than -100, - // but the check won't hurt) - if (rating < 0) rating = 0; - return rating; -} - -/** - * Extract the OCR results, costs (penalty points for uncertainty), - * and the bounding boxes of the characters. - */ -static void extract_result(TESS_CHAR_IT* out, - PAGE_RES* page_res) { - PAGE_RES_IT page_res_it(page_res); - int word_count = 0; - while (page_res_it.word() != nullptr) { - WERD_RES *word = page_res_it.word(); - const char *str = word->best_choice->unichar_string().string(); - const char *len = word->best_choice->unichar_lengths().string(); - TBOX real_rect = word->word->bounding_box(); - - if (word_count) - add_space(out); - int n = strlen(len); - for (int i = 0; i < n; i++) { - TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), - str, *len); - tc->box = real_rect.intersection(word->box_word->BlobBox(i)); - out->add_after_then_move(tc); - str += *len; - len++; - } - page_res_it.forward(); - word_count++; - } -} - -/** - * Extract the OCR results, costs (penalty points for uncertainty), - * and the bounding boxes of the characters. - */ -int TessBaseAPI::TesseractExtractResult(char** text, - int** lengths, - float** costs, - int** x0, - int** y0, - int** x1, - int** y1, - PAGE_RES* page_res) { - TESS_CHAR_LIST tess_chars; - TESS_CHAR_IT tess_chars_it(&tess_chars); - extract_result(&tess_chars_it, page_res); - tess_chars_it.move_to_first(); - int n = tess_chars.length(); - int text_len = 0; - *lengths = new int[n]; - *costs = new float[n]; - *x0 = new int[n]; - *y0 = new int[n]; - *x1 = new int[n]; - *y1 = new int[n]; - int i = 0; - for (tess_chars_it.mark_cycle_pt(); - !tess_chars_it.cycled_list(); - tess_chars_it.forward(), i++) { - TESS_CHAR *tc = tess_chars_it.data(); - text_len += (*lengths)[i] = tc->length; - (*costs)[i] = tc->cost; - (*x0)[i] = tc->box.left(); - (*y0)[i] = tc->box.bottom(); - (*x1)[i] = tc->box.right(); - (*y1)[i] = tc->box.top(); - } - char *p = *text = new char[text_len]; - - tess_chars_it.move_to_first(); - for (tess_chars_it.mark_cycle_pt(); - !tess_chars_it.cycled_list(); - tess_chars_it.forward()) { - TESS_CHAR *tc = tess_chars_it.data(); - strncpy(p, tc->unicode_repr, tc->length); - p += tc->length; - } - return n; -} - -/** This method returns the features associated with the input blob. */ -// The resulting features are returned in int_features, which must be -// of size MAX_NUM_INT_FEATURES. The number of features is returned in -// num_features (or 0 if there was a failure). -// On return feature_outline_index is filled with an index of the outline -// corresponding to each feature in int_features. -// TODO(rays) Fix the caller to out outline_counts instead. -void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob, - INT_FEATURE_STRUCT* int_features, - int* num_features, - int* feature_outline_index) { - GenericVector outline_counts; - GenericVector bl_features; - GenericVector cn_features; - INT_FX_RESULT_STRUCT fx_info; - tesseract_->ExtractFeatures(*blob, false, &bl_features, - &cn_features, &fx_info, &outline_counts); - if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) { - *num_features = 0; - return; // Feature extraction failed. - } - *num_features = cn_features.size(); - memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0])); - // TODO(rays) Pass outline_counts back and simplify the calling code. - if (feature_outline_index != nullptr) { - int f = 0; - for (int i = 0; i < outline_counts.size(); ++i) { - while (f < outline_counts[i]) - feature_outline_index[f++] = i; - } - } -} - -// This method returns the row to which a box of specified dimensions would -// belong. If no good match is found, it returns nullptr. -ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks, - int left, int top, int right, int bottom) { - TBOX box(left, bottom, right, top); - BLOCK_IT b_it(blocks); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOCK* block = b_it.data(); - if (!box.major_overlap(block->pdblk.bounding_box())) - continue; - ROW_IT r_it(block->row_list()); - for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { - ROW* row = r_it.data(); - if (!box.major_overlap(row->bounding_box())) - continue; - WERD_IT w_it(row->word_list()); - for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { - WERD* word = w_it.data(); - if (box.major_overlap(word->bounding_box())) - return row; - } - } - } - return nullptr; -} - -/** Method to run adaptive classifier on a blob. */ -void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob, - int num_max_matches, - int* unichar_ids, - float* ratings, - int* num_matches_returned) { - BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST; - tesseract_->AdaptiveClassifier(blob, choices); - BLOB_CHOICE_IT choices_it(choices); - int& index = *num_matches_returned; - index = 0; - for (choices_it.mark_cycle_pt(); - !choices_it.cycled_list() && index < num_max_matches; - choices_it.forward()) { - BLOB_CHOICE* choice = choices_it.data(); - unichar_ids[index] = choice->unichar_id(); - ratings[index] = choice->rating(); - ++index; - } - *num_matches_returned = index; - delete choices; -} -#endif // ndef DISABLED_LEGACY_ENGINE - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/baseapi.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/baseapi.h deleted file mode 100644 index da12d647..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/baseapi.h +++ /dev/null @@ -1,926 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: baseapi.h -// Description: Simple API for calling tesseract. -// Author: Ray Smith -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_API_BASEAPI_H_ -#define TESSERACT_API_BASEAPI_H_ - -#include -// To avoid collision with other typenames include the ABSOLUTE MINIMUM -// complexity of includes here. Use forward declarations wherever possible -// and hide includes of complex types in baseapi.cpp. -#include "tess_version.h" -#include "apitypes.h" -#include "pageiterator.h" -#include "platform.h" -#include "publictypes.h" -#include "resultiterator.h" -#include "serialis.h" -#include "tesscallback.h" -#include "thresholder.h" -#include "unichar.h" - -template class GenericVector; -class PAGE_RES; -class PAGE_RES_IT; -class ParagraphModel; -struct BlamerBundle; -class BLOCK_LIST; -class DENORM; -class MATRIX; -class ROW; -class STRING; -class WERD; -struct Pix; -struct Box; -struct Pixa; -struct Boxa; -class ETEXT_DESC; -struct OSResults; -class TBOX; -class UNICHARSET; -class WERD_CHOICE_LIST; - -struct INT_FEATURE_STRUCT; -typedef INT_FEATURE_STRUCT *INT_FEATURE; -struct TBLOB; - -namespace tesseract { - -class Dawg; -class Dict; -class EquationDetect; -class PageIterator; -class LTRResultIterator; -class ResultIterator; -class MutableIterator; -class TessResultRenderer; -class Tesseract; -class Trie; -class Wordrec; - -typedef int (Dict::*DictFunc)(void* void_dawg_args, - const UNICHARSET& unicharset, - UNICHAR_ID unichar_id, bool word_end) const; -typedef double (Dict::*ProbabilityInContextFunc)(const char* lang, - const char* context, - int context_bytes, - const char* character, - int character_bytes); -typedef float (Dict::*ParamsModelClassifyFunc)( - const char *lang, void *path); -typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings, - const WERD_CHOICE_LIST &best_choices, - const UNICHARSET &unicharset, - BlamerBundle *blamer_bundle); -typedef TessCallback4 - TruthCallback; - -/** - * Base class for all tesseract APIs. - * Specific classes can add ability to work on different inputs or produce - * different outputs. - * This class is mostly an interface layer on top of the Tesseract instance - * class to hide the data types so that users of this class don't have to - * include any other Tesseract headers. - */ -class TESS_API TessBaseAPI { - public: - TessBaseAPI(); - virtual ~TessBaseAPI(); - - /** - * Returns the version identifier as a static string. Do not delete. - */ - static const char* Version(); - - /** - * If compiled with OpenCL AND an available OpenCL - * device is deemed faster than serial code, then - * "device" is populated with the cl_device_id - * and returns sizeof(cl_device_id) - * otherwise *device=nullptr and returns 0. - */ - static size_t getOpenCLDevice(void **device); - - /** - * Writes the thresholded image to stderr as a PBM file on receipt of a - * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only). - */ - static void CatchSignals(); - - /** - * Set the name of the input file. Needed for training and - * reading a UNLV zone file, and for searchable PDF output. - */ - void SetInputName(const char* name); - /** - * These functions are required for searchable PDF output. - * We need our hands on the input file so that we can include - * it in the PDF without transcoding. If that is not possible, - * we need the original image. Finally, resolution metadata - * is stored in the PDF so we need that as well. - */ - const char* GetInputName(); - // Takes ownership of the input pix. - void SetInputImage(Pix *pix); - Pix* GetInputImage(); - int GetSourceYResolution(); - const char* GetDatapath(); - - /** Set the name of the bonus output files. Needed only for debugging. */ - void SetOutputName(const char* name); - - /** - * Set the value of an internal "parameter." - * Supply the name of the parameter and the value as a string, just as - * you would in a config file. - * Returns false if the name lookup failed. - * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. - * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode. - * SetVariable may be used before Init, but settings will revert to - * defaults on End(). - * - * Note: Must be called after Init(). Only works for non-init variables - * (init variables should be passed to Init()). - */ - bool SetVariable(const char* name, const char* value); - bool SetDebugVariable(const char* name, const char* value); - - /** - * Returns true if the parameter was found among Tesseract parameters. - * Fills in value with the value of the parameter. - */ - bool GetIntVariable(const char *name, int *value) const; - bool GetBoolVariable(const char *name, bool *value) const; - bool GetDoubleVariable(const char *name, double *value) const; - - /** - * Returns the pointer to the string that represents the value of the - * parameter if it was found among Tesseract parameters. - */ - const char *GetStringVariable(const char *name) const; - - /** - * Print Tesseract parameters to the given file. - */ - void PrintVariables(FILE *fp) const; - - /** - * Get value of named variable as a string, if it exists. - */ - bool GetVariableAsString(const char *name, STRING *val); - - /** - * Instances are now mostly thread-safe and totally independent, - * but some global parameters remain. Basically it is safe to use multiple - * TessBaseAPIs in different threads in parallel, UNLESS: - * you use SetVariable on some of the Params in classify and textord. - * If you do, then the effect will be to change it for all your instances. - * - * Start tesseract. Returns zero on success and -1 on failure. - * NOTE that the only members that may be called before Init are those - * listed above here in the class definition. - * - * The datapath must be the name of the parent directory of tessdata and - * must end in / . Any name after the last / will be stripped. - * The language is (usually) an ISO 639-3 string or nullptr will default to eng. - * It is entirely safe (and eventually will be efficient too) to call - * Init multiple times on the same instance to change language, or just - * to reset the classifier. - * The language may be a string of the form [~][+[~]]* indicating - * that multiple languages are to be loaded. Eg hin+eng will load Hindi and - * English. Languages may specify internally that they want to be loaded - * with one or more other languages, so the ~ sign is available to override - * that. Eg if hin were set to load eng by default, then hin+~eng would force - * loading only hin. The number of loaded languages is limited only by - * memory, with the caveat that loading additional languages will impact - * both speed and accuracy, as there is more work to do to decide on the - * applicable language, and there is more chance of hallucinating incorrect - * words. - * WARNING: On changing languages, all Tesseract parameters are reset - * back to their default values. (Which may vary between languages.) - * If you have a rare need to set a Variable that controls - * initialization for a second call to Init you should explicitly - * call End() and then use SetVariable before Init. This is only a very - * rare use case, since there are very few uses that require any parameters - * to be set before Init. - * - * If set_only_non_debug_params is true, only params that do not contain - * "debug" in the name will be set. - */ - int Init(const char* datapath, const char* language, OcrEngineMode mode, - char **configs, int configs_size, - const GenericVector *vars_vec, - const GenericVector *vars_values, - bool set_only_non_debug_params); - int Init(const char* datapath, const char* language, OcrEngineMode oem) { - return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false); - } - int Init(const char* datapath, const char* language) { - return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false); - } - // In-memory version reads the traineddata file directly from the given - // data[data_size] array, and/or reads data via a FileReader. - int Init(const char* data, int data_size, const char* language, - OcrEngineMode mode, char** configs, int configs_size, - const GenericVector* vars_vec, - const GenericVector* vars_values, - bool set_only_non_debug_params, FileReader reader); - - /** - * Returns the languages string used in the last valid initialization. - * If the last initialization specified "deu+hin" then that will be - * returned. If hin loaded eng automatically as well, then that will - * not be included in this list. To find the languages actually - * loaded use GetLoadedLanguagesAsVector. - * The returned string should NOT be deleted. - */ - const char* GetInitLanguagesAsString() const; - - /** - * Returns the loaded languages in the vector of STRINGs. - * Includes all languages loaded by the last Init, including those loaded - * as dependencies of other loaded languages. - */ - void GetLoadedLanguagesAsVector(GenericVector* langs) const; - - /** - * Returns the available languages in the sorted vector of STRINGs. - */ - void GetAvailableLanguagesAsVector(GenericVector* langs) const; - - /** - * Init only the lang model component of Tesseract. The only functions - * that work after this init are SetVariable and IsValidWord. - * WARNING: temporary! This function will be removed from here and placed - * in a separate API at some future time. - */ - int InitLangMod(const char* datapath, const char* language); - - /** - * Init only for page layout analysis. Use only for calls to SetImage and - * AnalysePage. Calls that attempt recognition will generate an error. - */ - void InitForAnalysePage(); - - /** - * Read a "config" file containing a set of param, value pairs. - * Searches the standard places: tessdata/configs, tessdata/tessconfigs - * and also accepts a relative or absolute path name. - * Note: only non-init params will be set (init params are set by Init()). - */ - void ReadConfigFile(const char* filename); - /** Same as above, but only set debug params from the given config file. */ - void ReadDebugConfigFile(const char* filename); - - /** - * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. - * The mode is stored as an IntParam so it can also be modified by - * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). - */ - void SetPageSegMode(PageSegMode mode); - - /** Return the current page segmentation mode. */ - PageSegMode GetPageSegMode() const; - - /** - * Recognize a rectangle from an image and return the result as a string. - * May be called many times for a single Init. - * Currently has no error checking. - * Greyscale of 8 and color of 24 or 32 bits per pixel may be given. - * Palette color images will not work properly and must be converted to - * 24 bit. - * Binary images of 1 bit per pixel may also be given but they must be - * byte packed with the MSB of the first byte being the first pixel, and a - * 1 represents WHITE. For binary images set bytes_per_pixel=0. - * The recognized text is returned as a char* which is coded - * as UTF8 and must be freed with the delete [] operator. - * - * Note that TesseractRect is the simplified convenience interface. - * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize, - * and one or more of the Get*Text functions below. - */ - char* TesseractRect(const unsigned char* imagedata, - int bytes_per_pixel, int bytes_per_line, - int left, int top, int width, int height); - - /** - * Call between pages or documents etc to free up memory and forget - * adaptive data. - */ - void ClearAdaptiveClassifier(); - - /** - * @defgroup AdvancedAPI Advanced API - * The following methods break TesseractRect into pieces, so you can - * get hold of the thresholded image, get the text in different formats, - * get bounding boxes, confidences etc. - */ - /* @{ */ - - /** - * Provide an image for Tesseract to recognize. Format is as - * TesseractRect above. Copies the image buffer and converts to Pix. - * SetImage clears all recognition results, and sets the rectangle to the - * full image, so it may be followed immediately by a GetUTF8Text, and it - * will automatically perform recognition. - */ - void SetImage(const unsigned char* imagedata, int width, int height, - int bytes_per_pixel, int bytes_per_line); - - /** - * Provide an image for Tesseract to recognize. As with SetImage above, - * Tesseract takes its own copy of the image, so it need not persist until - * after Recognize. - * Pix vs raw, which to use? - * Use Pix where possible. Tesseract uses Pix as its internal representation - * and it is therefore more efficient to provide a Pix directly. - */ - void SetImage(Pix* pix); - - /** - * Set the resolution of the source image in pixels per inch so font size - * information can be calculated in results. Call this after SetImage(). - */ - void SetSourceResolution(int ppi); - - /** - * Restrict recognition to a sub-rectangle of the image. Call after SetImage. - * Each SetRectangle clears the recogntion results so multiple rectangles - * can be recognized with the same image. - */ - void SetRectangle(int left, int top, int width, int height); - - /** - * In extreme cases only, usually with a subclass of Thresholder, it - * is possible to provide a different Thresholder. The Thresholder may - * be preloaded with an image, settings etc, or they may be set after. - * Note that Tesseract takes ownership of the Thresholder and will - * delete it when it it is replaced or the API is destructed. - */ - void SetThresholder(ImageThresholder* thresholder) { - delete thresholder_; - thresholder_ = thresholder; - ClearResults(); - } - - /** - * Get a copy of the internal thresholded image from Tesseract. - * Caller takes ownership of the Pix and must pixDestroy it. - * May be called any time after SetImage, or after TesseractRect. - */ - Pix* GetThresholdedImage(); - - /** - * Get the result of page layout analysis as a leptonica-style - * Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - */ - Boxa* GetRegions(Pixa** pixa); - - /** - * Get the textlines as a leptonica-style - * Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - * If raw_image is true, then extract from the original image instead of the - * thresholded image and pad by raw_padding pixels. - * If blockids is not nullptr, the block-id of each line is also returned as an - * array of one element per line. delete [] after use. - * If paraids is not nullptr, the paragraph-id of each line within its block is - * also returned as an array of one element per line. delete [] after use. - */ - Boxa* GetTextlines(const bool raw_image, const int raw_padding, - Pixa** pixa, int** blockids, int** paraids); - /* - Helper method to extract from the thresholded image. (most common usage) - */ - Boxa* GetTextlines(Pixa** pixa, int** blockids) { - return GetTextlines(false, 0, pixa, blockids, nullptr); - } - - /** - * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa - * pair, in reading order. Enables downstream handling of non-rectangular - * regions. - * Can be called before or after Recognize. - * If blockids is not nullptr, the block-id of each line is also returned as an - * array of one element per line. delete [] after use. - */ - Boxa* GetStrips(Pixa** pixa, int** blockids); - - /** - * Get the words as a leptonica-style - * Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - */ - Boxa* GetWords(Pixa** pixa); - - /** - * Gets the individual connected (text) components (created - * after pages segmentation step, but before recognition) - * as a leptonica-style Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - * Note: the caller is responsible for calling boxaDestroy() - * on the returned Boxa array and pixaDestroy() on cc array. - */ - Boxa* GetConnectedComponents(Pixa** cc); - - /** - * Get the given level kind of components (block, textline, word etc.) as a - * leptonica-style Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - * If blockids is not nullptr, the block-id of each component is also returned - * as an array of one element per component. delete [] after use. - * If blockids is not nullptr, the paragraph-id of each component with its block - * is also returned as an array of one element per component. delete [] after - * use. - * If raw_image is true, then portions of the original image are extracted - * instead of the thresholded image and padded with raw_padding. - * If text_only is true, then only text components are returned. - */ - Boxa* GetComponentImages(const PageIteratorLevel level, - const bool text_only, const bool raw_image, - const int raw_padding, - Pixa** pixa, int** blockids, int** paraids); - // Helper function to get binary images with no padding (most common usage). - Boxa* GetComponentImages(const PageIteratorLevel level, - const bool text_only, - Pixa** pixa, int** blockids) { - return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr); - } - - /** - * Returns the scale factor of the thresholded image that would be returned by - * GetThresholdedImage() and the various GetX() methods that call - * GetComponentImages(). - * Returns 0 if no thresholder has been set. - */ - int GetThresholdedImageScaleFactor() const; - - /** - * Runs page layout analysis in the mode set by SetPageSegMode. - * May optionally be called prior to Recognize to get access to just - * the page layout results. Returns an iterator to the results. - * If merge_similar_words is true, words are combined where suitable for use - * with a line recognizer. Use if you want to use AnalyseLayout to find the - * textlines, and then want to process textline fragments with an external - * line recognizer. - * Returns nullptr on error or an empty page. - * The returned iterator must be deleted after use. - * WARNING! This class points to data held within the TessBaseAPI class, and - * therefore can only be used while the TessBaseAPI class still exists and - * has not been subjected to a call of Init, SetImage, Recognize, Clear, End - * DetectOS, or anything else that changes the internal PAGE_RES. - */ - PageIterator* AnalyseLayout(); - PageIterator* AnalyseLayout(bool merge_similar_words); - - /** - * Recognize the image from SetAndThresholdImage, generating Tesseract - * internal structures. Returns 0 on success. - * Optional. The Get*Text functions below will call Recognize if needed. - * After Recognize, the output is kept internally until the next SetImage. - */ - int Recognize(ETEXT_DESC* monitor); - - /** - * Methods to retrieve information after SetAndThresholdImage(), - * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.) - */ - - #ifndef DISABLED_LEGACY_ENGINE - /** Variant on Recognize used for testing chopper. */ - int RecognizeForChopTest(ETEXT_DESC* monitor); - #endif - - /** - * Turns images into symbolic text. - * - * filename can point to a single image, a multi-page TIFF, - * or a plain text list of image filenames. - * - * retry_config is useful for debugging. If not nullptr, you can fall - * back to an alternate configuration if a page fails for some - * reason. - * - * timeout_millisec terminates processing if any single page - * takes too long. Set to 0 for unlimited time. - * - * renderer is responible for creating the output. For example, - * use the TessTextRenderer if you want plaintext output, or - * the TessPDFRender to produce searchable PDF. - * - * If tessedit_page_number is non-negative, will only process that - * single page. Works for multi-page tiff file, or filelist. - * - * Returns true if successful, false on error. - */ - bool ProcessPages(const char* filename, const char* retry_config, - int timeout_millisec, TessResultRenderer* renderer); - // Does the real work of ProcessPages. - bool ProcessPagesInternal(const char* filename, const char* retry_config, - int timeout_millisec, TessResultRenderer* renderer); - - /** - * Turn a single image into symbolic text. - * - * The pix is the image processed. filename and page_index are - * metadata used by side-effect processes, such as reading a box - * file or formatting as hOCR. - * - * See ProcessPages for desciptions of other parameters. - */ - bool ProcessPage(Pix* pix, int page_index, const char* filename, - const char* retry_config, int timeout_millisec, - TessResultRenderer* renderer); - - /** - * Get a reading-order iterator to the results of LayoutAnalysis and/or - * Recognize. The returned iterator must be deleted after use. - * WARNING! This class points to data held within the TessBaseAPI class, and - * therefore can only be used while the TessBaseAPI class still exists and - * has not been subjected to a call of Init, SetImage, Recognize, Clear, End - * DetectOS, or anything else that changes the internal PAGE_RES. - */ - ResultIterator* GetIterator(); - - /** - * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. - * The returned iterator must be deleted after use. - * WARNING! This class points to data held within the TessBaseAPI class, and - * therefore can only be used while the TessBaseAPI class still exists and - * has not been subjected to a call of Init, SetImage, Recognize, Clear, End - * DetectOS, or anything else that changes the internal PAGE_RES. - */ - MutableIterator* GetMutableIterator(); - - /** - * The recognized text is returned as a char* which is coded - * as UTF8 and must be freed with the delete [] operator. - */ - char* GetUTF8Text(); - - /** - * Make a HTML-formatted string with hOCR markup from the internal - * data structures. - * page_number is 0-based but will appear in the output as 1-based. - * monitor can be used to - * cancel the recognition - * receive progress callbacks - * Returned string must be freed with the delete [] operator. - */ - char* GetHOCRText(ETEXT_DESC* monitor, int page_number); - - /** - * Make a HTML-formatted string with hOCR markup from the internal - * data structures. - * page_number is 0-based but will appear in the output as 1-based. - * Returned string must be freed with the delete [] operator. - */ - char* GetHOCRText(int page_number); - - /** - * Make a TSV-formatted string from the internal data structures. - * page_number is 0-based but will appear in the output as 1-based. - * Returned string must be freed with the delete [] operator. - */ - char* GetTSVText(int page_number); - - /** - * The recognized text is returned as a char* which is coded in the same - * format as a box file used in training. - * Constructs coordinates in the original image - not just the rectangle. - * page_number is a 0-based page index that will appear in the box file. - * Returned string must be freed with the delete [] operator. - */ - char* GetBoxText(int page_number); - - /** - * The recognized text is returned as a char* which is coded - * as UNLV format Latin-1 with specific reject and suspect codes. - * Returned string must be freed with the delete [] operator. - */ - char* GetUNLVText(); - - /** - * Detect the orientation of the input image and apparent script (alphabet). - * orient_deg is the detected clockwise rotation of the input image in degrees - * (0, 90, 180, 270) - * orient_conf is the confidence (15.0 is reasonably confident) - * script_name is an ASCII string, the name of the script, e.g. "Latin" - * script_conf is confidence level in the script - * Returns true on success and writes values to each parameter as an output - */ - bool DetectOrientationScript(int* orient_deg, float* orient_conf, - const char** script_name, float* script_conf); - - /** - * The recognized text is returned as a char* which is coded - * as UTF8 and must be freed with the delete [] operator. - * page_number is a 0-based page index that will appear in the osd file. - */ - char* GetOsdText(int page_number); - - /** Returns the (average) confidence value between 0 and 100. */ - int MeanTextConf(); - /** - * Returns all word confidences (between 0 and 100) in an array, terminated - * by -1. The calling function must delete [] after use. - * The number of confidences should correspond to the number of space- - * delimited words in GetUTF8Text. - */ - int* AllWordConfidences(); - -#ifndef DISABLED_LEGACY_ENGINE - /** - * Applies the given word to the adaptive classifier if possible. - * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can - * tell the boundaries of the graphemes. - * Assumes that SetImage/SetRectangle have been used to set the image - * to the given word. The mode arg should be PSM_SINGLE_WORD or - * PSM_CIRCLE_WORD, as that will be used to control layout analysis. - * The currently set PageSegMode is preserved. - * Returns false if adaption was not possible for some reason. - */ - bool AdaptToWordStr(PageSegMode mode, const char* wordstr); -#endif // ndef DISABLED_LEGACY_ENGINE - - /** - * Free up recognition results and any stored image data, without actually - * freeing any recognition data that would be time-consuming to reload. - * Afterwards, you must call SetImage or TesseractRect before doing - * any Recognize or Get* operation. - */ - void Clear(); - - /** - * Close down tesseract and free up all memory. End() is equivalent to - * destructing and reconstructing your TessBaseAPI. - * Once End() has been used, none of the other API functions may be used - * other than Init and anything declared above it in the class definition. - */ - void End(); - - /** - * Clear any library-level memory caches. - * There are a variety of expensive-to-load constant data structures (mostly - * language dictionaries) that are cached globally -- surviving the Init() - * and End() of individual TessBaseAPI's. This function allows the clearing - * of these caches. - **/ - static void ClearPersistentCache(); - - /** - * Check whether a word is valid according to Tesseract's language model - * @return 0 if the word is invalid, non-zero if valid. - * @warning temporary! This function will be removed from here and placed - * in a separate API at some future time. - */ - int IsValidWord(const char *word); - // Returns true if utf8_character is defined in the UniCharset. - bool IsValidCharacter(const char *utf8_character); - - - bool GetTextDirection(int* out_offset, float* out_slope); - - /** Sets Dict::letter_is_okay_ function to point to the given function. */ - void SetDictFunc(DictFunc f); - - /** Sets Dict::probability_in_context_ function to point to the given - * function. - */ - void SetProbabilityInContextFunc(ProbabilityInContextFunc f); - - /** - * Estimates the Orientation And Script of the image. - * @return true if the image was processed successfully. - */ - bool DetectOS(OSResults*); - - /** - * Return text orientation of each block as determined by an earlier run - * of layout analysis. - */ - void GetBlockTextOrientations(int** block_orientation, - bool** vertical_writing); - - - #ifndef DISABLED_LEGACY_ENGINE - - /** Sets Wordrec::fill_lattice_ function to point to the given function. */ - void SetFillLatticeFunc(FillLatticeFunc f); - - /** Find lines from the image making the BLOCK_LIST. */ - BLOCK_LIST* FindLinesCreateBlockList(); - - /** - * Delete a block list. - * This is to keep BLOCK_LIST pointer opaque - * and let go of including the other headers. - */ - static void DeleteBlockList(BLOCK_LIST* block_list); - - /** Returns a ROW object created from the input row specification. */ - static ROW *MakeTessOCRRow(float baseline, float xheight, - float descender, float ascender); - - /** Returns a TBLOB corresponding to the entire input image. */ - static TBLOB *MakeTBLOB(Pix *pix); - - /** - * This method baseline normalizes a TBLOB in-place. The input row is used - * for normalization. The denorm is an optional parameter in which the - * normalization-antidote is returned. - */ - static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode); - - /** This method returns the features associated with the input image. */ - void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features, - int* num_features, int* feature_outline_index); - - /** - * This method returns the row to which a box of specified dimensions would - * belong. If no good match is found, it returns nullptr. - */ - static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, - int right, int bottom); - - /** - * Method to run adaptive classifier on a blob. - * It returns at max num_max_matches results. - */ - void RunAdaptiveClassifier(TBLOB* blob, - int num_max_matches, - int* unichar_ids, - float* ratings, - int* num_matches_returned); -#endif // ndef DISABLED_LEGACY_ENGINE - - /** This method returns the string form of the specified unichar. */ - const char* GetUnichar(int unichar_id); - - /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ - const Dawg *GetDawg(int i) const; - - /** Return the number of dawgs loaded into tesseract_ object. */ - int NumDawgs() const; - - Tesseract* tesseract() const { return tesseract_; } - - OcrEngineMode oem() const { return last_oem_requested_; } - - void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; } - - void set_min_orientation_margin(double margin); - /* @} */ - - protected: - - /** Common code for setting the image. Returns true if Init has been called. */ - TESS_LOCAL bool InternalSetImage(); - - /** - * Run the thresholder to make the thresholded image. If pix is not nullptr, - * the source is thresholded to pix instead of the internal IMAGE. - */ - TESS_LOCAL virtual bool Threshold(Pix** pix); - - /** - * Find lines from the image making the BLOCK_LIST. - * @return 0 on success. - */ - TESS_LOCAL int FindLines(); - - /** Delete the pageres and block list ready for a new page. */ - void ClearResults(); - - /** - * Return an LTR Result Iterator -- used only for training, as we really want - * to ignore all BiDi smarts at that point. - * delete once you're done with it. - */ - TESS_LOCAL LTRResultIterator* GetLTRIterator(); - - /** - * Return the length of the output text string, as UTF8, assuming - * one newline per line and one per block, with a terminator, - * and assuming a single character reject marker for each rejected character. - * Also return the number of recognized blobs in blob_count. - */ - TESS_LOCAL int TextLength(int* blob_count); - - //// paragraphs.cpp //////////////////////////////////////////////////// - TESS_LOCAL void DetectParagraphs(bool after_text_recognition); - - #ifndef DISABLED_LEGACY_ENGINE - - /** @defgroup ocropusAddOns ocropus add-ons */ - /* @{ */ - - /** - * Adapt to recognize the current image as the given character. - * The image must be preloaded and be just an image of a single character. - */ - TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, - int length, - float baseline, - float xheight, - float descender, - float ascender); - - /** Recognize text doing one pass only, using settings for a given pass. */ - TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); - - TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, - PAGE_RES* pass1_result); - - /** - * Extract the OCR results, costs (penalty points for uncertainty), - * and the bounding boxes of the characters. - */ - TESS_LOCAL static int TesseractExtractResult(char** text, - int** lengths, - float** costs, - int** x0, - int** y0, - int** x1, - int** y1, - PAGE_RES* page_res); - - TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; } - /* @} */ -#endif // ndef DISABLED_LEGACY_ENGINE - - protected: - Tesseract* tesseract_; ///< The underlying data object. - Tesseract* osd_tesseract_; ///< For orientation & script detection. - EquationDetect* equ_detect_; ///* paragraph_models_; - BLOCK_LIST* block_list_; ///< The page layout. - PAGE_RES* page_res_; ///< The page-level data. - STRING* input_file_; ///< Name used by training code. - STRING* output_file_; ///< Name used by debug code. - STRING* datapath_; ///< Current location of tessdata. - STRING* language_; ///< Last initialized language. - OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested. - bool recognition_done_; ///< page_res_ contains recognition data. - TruthCallback *truth_cb_; /// fxn for setting truth_* in WERD_RES - - /** - * @defgroup ThresholderParams Thresholder Parameters - * Parameters saved from the Thresholder. Needed to rebuild coordinates. - */ - /* @{ */ - int rect_left_; - int rect_top_; - int rect_width_; - int rect_height_; - int image_width_; - int image_height_; - /* @} */ - - private: - // A list of image filenames gets special consideration - bool ProcessPagesFileList(FILE *fp, - STRING *buf, - const char* retry_config, int timeout_millisec, - TessResultRenderer* renderer, - int tessedit_page_number); - // TIFF supports multipage so gets special consideration. - bool ProcessPagesMultipageTiff(const unsigned char *data, - size_t size, - const char* filename, - const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer, - int tessedit_page_number); - // There's currently no way to pass a document title from the - // Tesseract command line, and we have multiple places that choose - // to set the title to an empty string. Using a single named - // variable will hopefully reduce confusion if the situation changes - // in the future. - const char *unknown_title_ = ""; -}; // class TessBaseAPI. - -/** Escape a char string - remove &<>"' with HTML codes. */ -STRING HOcrEscape(const char* text); -} // namespace tesseract. - -#endif // TESSERACT_API_BASEAPI_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/capi.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/capi.cpp deleted file mode 100644 index 2146e8c8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/capi.cpp +++ /dev/null @@ -1,861 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: capi.cpp -// Description: C-API TessBaseAPI -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESS_CAPI_INCLUDE_BASEAPI -# define TESS_CAPI_INCLUDE_BASEAPI -#endif -#include "capi.h" -#include "genericvector.h" -#include "strngs.h" - -TESS_API const char* TESS_CALL TessVersion() -{ - return TessBaseAPI::Version(); -} - -TESS_API void TESS_CALL TessDeleteText(char* text) -{ - delete [] text; -} - -TESS_API void TESS_CALL TessDeleteTextArray(char** arr) -{ - for (char** pos = arr; *pos != nullptr; ++pos) - delete [] *pos; - delete [] arr; -} - -TESS_API void TESS_CALL TessDeleteIntArray(int* arr) -{ - delete [] arr; -} - -#ifndef DISABLED_LEGACY_ENGINE -TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list) -{ - TessBaseAPI::DeleteBlockList(block_list); -} -#endif - -TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* outputbase) -{ - return new TessTextRenderer(outputbase); -} - -TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase) -{ - return new TessHOcrRenderer(outputbase); -} - -TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info) -{ - return new TessHOcrRenderer(outputbase, font_info); -} - -TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir, - BOOL textonly) -{ - return new TessPDFRenderer(outputbase, datadir, textonly); -} - -TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase) -{ - return new TessUnlvRenderer(outputbase); -} - -TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase) -{ - return new TessBoxTextRenderer(outputbase); -} - -TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer) -{ - delete renderer; -} - -TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, TessResultRenderer* next) -{ - renderer->insert(next); -} - -TESS_API TessResultRenderer* TESS_CALL TessResultRendererNext(TessResultRenderer* renderer) -{ - return renderer->next(); -} - -TESS_API BOOL TESS_CALL TessResultRendererBeginDocument(TessResultRenderer* renderer, const char* title) -{ - return renderer->BeginDocument(title); -} - -TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, TessBaseAPI* api) -{ - return renderer->AddImage(api); -} - -TESS_API BOOL TESS_CALL TessResultRendererEndDocument(TessResultRenderer* renderer) -{ - return renderer->EndDocument(); -} - -TESS_API const char* TESS_CALL TessResultRendererExtention(TessResultRenderer* renderer) -{ - return renderer->file_extension(); -} - -TESS_API const char* TESS_CALL TessResultRendererTitle(TessResultRenderer* renderer) -{ - return renderer->title(); -} - -TESS_API int TESS_CALL TessResultRendererImageNum(TessResultRenderer* renderer) -{ - return renderer->imagenum(); -} - -TESS_API TessBaseAPI* TESS_CALL TessBaseAPICreate() -{ - return new TessBaseAPI; -} - -TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle) -{ - delete handle; -} - -TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void **device) -{ - return handle->getOpenCLDevice(device); -} - -TESS_API void TESS_CALL TessBaseAPISetInputName(TessBaseAPI* handle, const char* name) -{ - handle->SetInputName(name); -} - -TESS_API const char* TESS_CALL TessBaseAPIGetInputName(TessBaseAPI* handle) -{ - return handle->GetInputName(); -} - -TESS_API void TESS_CALL TessBaseAPISetInputImage(TessBaseAPI* handle, Pix* pix) -{ - handle->SetInputImage(pix); -} - -TESS_API Pix* TESS_CALL TessBaseAPIGetInputImage(TessBaseAPI* handle) -{ - return handle->GetInputImage(); -} - -TESS_API int TESS_CALL TessBaseAPIGetSourceYResolution(TessBaseAPI* handle) -{ - return handle->GetSourceYResolution(); -} - -TESS_API const char* TESS_CALL TessBaseAPIGetDatapath(TessBaseAPI* handle) -{ - return handle->GetDatapath(); -} - -TESS_API void TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle, const char* name) -{ - handle->SetOutputName(name); -} - -TESS_API BOOL TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle, const char* name, const char* value) -{ - return handle->SetVariable(name, value) ? TRUE : FALSE; -} - -TESS_API BOOL TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle, const char* name, const char* value) -{ - return handle->SetVariable(name, value) ? TRUE : FALSE; -} - -TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle, const char* name, int* value) -{ - return handle->GetIntVariable(name, value) ? TRUE : FALSE; -} - -TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle, const char* name, BOOL* value) -{ - bool boolValue; - if (handle->GetBoolVariable(name, &boolValue)) - { - *value = boolValue ? TRUE : FALSE; - return TRUE; - } - else - { - return FALSE; - } -} - -TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, const char* name, double* value) -{ - return handle->GetDoubleVariable(name, value) ? TRUE : FALSE; -} - -TESS_API const char* TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name) -{ - return handle->GetStringVariable(name); -} - -TESS_API void TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle, FILE* fp) -{ - handle->PrintVariables(fp); -} - -TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, const char* filename) -{ - FILE* fp = fopen(filename, "w"); - if (fp != nullptr) - { - handle->PrintVariables(fp); - fclose(fp); - return TRUE; - } - return FALSE; -} - -TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, const char* name, STRING* val) -{ - return handle->GetVariableAsString(name, val) ? TRUE : FALSE; -} - -TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language, - TessOcrEngineMode mode, char** configs, int configs_size, - char** vars_vec, char** vars_values, size_t vars_vec_size, - BOOL set_only_non_debug_params) -{ - GenericVector varNames; - GenericVector varValues; - if (vars_vec != nullptr && vars_values != nullptr) { - for (size_t i = 0; i < vars_vec_size; i++) { - varNames.push_back(STRING(vars_vec[i])); - varValues.push_back(STRING(vars_values[i])); - } - } - - return handle->Init(datapath, language, mode, configs, configs_size, &varNames, &varValues, set_only_non_debug_params); -} - - -TESS_API int TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem, - char** configs, int configs_size) -{ - return handle->Init(datapath, language, oem, configs, configs_size, nullptr, nullptr, false); -} - -TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem) -{ - return handle->Init(datapath, language, oem); -} - -TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const char* language) -{ - return handle->Init(datapath, language); -} - -TESS_API const char* TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle) -{ - return handle->GetInitLanguagesAsString(); -} - -TESS_API char** TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle) -{ - GenericVector languages; - handle->GetLoadedLanguagesAsVector(&languages); - char** arr = new char*[languages.size() + 1]; - for (int index = 0; index < languages.size(); ++index) - arr[index] = languages[index].strdup(); - arr[languages.size()] = nullptr; - return arr; -} - -TESS_API char** TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle) -{ - GenericVector languages; - handle->GetAvailableLanguagesAsVector(&languages); - char** arr = new char*[languages.size() + 1]; - for (int index = 0; index < languages.size(); ++index) - arr[index] = languages[index].strdup(); - arr[languages.size()] = nullptr; - return arr; -} - -#ifndef DISABLED_LEGACY_ENGINE -TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath, const char* language) -{ - return handle->InitLangMod(datapath, language); -} -#endif - -TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle) -{ - handle->InitForAnalysePage(); -} - -TESS_API void TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle, const char* filename) -{ - handle->ReadConfigFile(filename); -} - -TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, const char* filename) -{ - handle->ReadDebugConfigFile(filename); -} - -TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, TessPageSegMode mode) -{ - handle->SetPageSegMode(mode); -} - -TESS_API TessPageSegMode TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle) -{ - return handle->GetPageSegMode(); -} - -TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata, - int bytes_per_pixel, int bytes_per_line, - int left, int top, int width, int height) -{ - return handle->TesseractRect(imagedata, bytes_per_pixel, bytes_per_line, left, top, width, height); -} - -#ifndef DISABLED_LEGACY_ENGINE -TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle) -{ - handle->ClearAdaptiveClassifier(); -} -#endif - -TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, const unsigned char* imagedata, int width, int height, - int bytes_per_pixel, int bytes_per_line) -{ - handle->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line); -} - -TESS_API void TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle, struct Pix* pix) -{ - return handle->SetImage(pix); -} - -TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle, int ppi) -{ - handle->SetSourceResolution(ppi); -} - -TESS_API void TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left, int top, int width, int height) -{ - handle->SetRectangle(left, top, width, height); -} - -TESS_API void TESS_CALL TessBaseAPISetThresholder(TessBaseAPI* handle, TessImageThresholder* thresholder) -{ - handle->SetThresholder(thresholder); -} - -TESS_API struct Pix* TESS_CALL TessBaseAPIGetThresholdedImage(TessBaseAPI* handle) -{ - return handle->GetThresholdedImage(); -} - -TESS_API struct Boxa* TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, struct Pixa** pixa) -{ - return handle->GetRegions(pixa); -} - -TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, struct Pixa** pixa, int** blockids) -{ - return handle->GetTextlines(pixa, blockids); -} - -TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding, - struct Pixa** pixa, int** blockids, int** paraids) -{ - return handle->GetTextlines(raw_image, raw_padding, pixa, blockids, paraids); -} - -TESS_API struct Boxa* TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, struct Pixa** pixa, int** blockids) -{ - return handle->GetStrips(pixa, blockids); -} - -TESS_API struct Boxa* TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, struct Pixa** pixa) -{ - return handle->GetWords(pixa); -} - -TESS_API struct Boxa* TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc) -{ - return handle->GetConnectedComponents(cc); -} - -TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages(TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only, struct Pixa** pixa, int** blockids) -{ - return handle->GetComponentImages(level, text_only != FALSE, pixa, blockids); -} - -TESS_API struct Boxa* -TESS_CALL TessBaseAPIGetComponentImages1(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only, - const BOOL raw_image, const int raw_padding, - struct Pixa** pixa, int** blockids, int** paraids) -{ - return handle->GetComponentImages(level, text_only != FALSE, raw_image, raw_padding, pixa, blockids, paraids); -} - -TESS_API int TESS_CALL TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle) -{ - return handle->GetThresholdedImageScaleFactor(); -} - -TESS_API TessPageIterator* TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle) -{ - return handle->AnalyseLayout(); -} - -TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor) -{ - return handle->Recognize(monitor); -} - -#ifndef DISABLED_LEGACY_ENGINE -TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, ETEXT_DESC* monitor) -{ - return handle->RecognizeForChopTest(monitor); -} -#endif - -TESS_API BOOL TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle, const char* filename, const char* retry_config, - int timeout_millisec, TessResultRenderer* renderer) -{ - if (handle->ProcessPages(filename, retry_config, timeout_millisec, renderer)) - return TRUE; - else - return FALSE; -} - -TESS_API BOOL TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, struct Pix* pix, int page_index, const char* filename, - const char* retry_config, int timeout_millisec, TessResultRenderer* renderer) -{ - if (handle->ProcessPage(pix, page_index, filename, retry_config, timeout_millisec, renderer)) - return TRUE; - else - return FALSE; -} - -TESS_API TessResultIterator* TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle) -{ - return handle->GetIterator(); -} - -TESS_API TessMutableIterator* TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle) -{ - return handle->GetMutableIterator(); -} - -TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle) -{ - return handle->GetUTF8Text(); -} - -TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number) -{ - return handle->GetHOCRText(nullptr, page_number); -} - -TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number) -{ - return handle->GetBoxText(page_number); -} - -TESS_API char* TESS_CALL TessBaseAPIGetUNLVText(TessBaseAPI* handle) -{ - return handle->GetUNLVText(); -} - -TESS_API int TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle) -{ - return handle->MeanTextConf(); -} - -TESS_API int* TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle) -{ - return handle->AllWordConfidences(); -} - -#ifndef DISABLED_LEGACY_ENGINE -TESS_API BOOL TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle, TessPageSegMode mode, const char* wordstr) -{ - return handle->AdaptToWordStr(mode, wordstr) ? TRUE : FALSE; -} -#endif - -TESS_API void TESS_CALL TessBaseAPIClear(TessBaseAPI* handle) -{ - handle->Clear(); -} - -TESS_API void TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle) -{ - handle->End(); -} - -TESS_API int TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle, const char* word) -{ - return handle->IsValidWord(word); -} - -TESS_API BOOL TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle, int* out_offset, float* out_slope) -{ - return handle->GetTextDirection(out_offset, out_slope) ? TRUE : FALSE; -} - -TESS_API void TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle, TessDictFunc f) -{ - handle->SetDictFunc(f); -} - -TESS_API void TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle) -{ - handle->ClearPersistentCache(); -} - -TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f) -{ - handle->SetProbabilityInContextFunc(f); -} - -#ifndef DISABLED_LEGACY_ENGINE - -TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, - int* orient_deg, float* orient_conf, const char** script_name, float* script_conf) -{ - bool success; - success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf); - return (BOOL)success; -} - -TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, - int* num_features, int* FeatureOutlineIndex) -{ - handle->GetFeaturesForBlob(blob, int_features, num_features, FeatureOutlineIndex); -} - -TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, int right, int bottom) -{ - return TessBaseAPI::FindRowForBox(blocks, left, top, right, bottom); -} - -TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, int num_max_matches, - int* unichar_ids, float* ratings, int* num_matches_returned) -{ - handle->RunAdaptiveClassifier(blob, num_max_matches, unichar_ids, ratings, num_matches_returned); -} - -#endif // ndef DISABLED_LEGACY_ENGINE - -TESS_API const char* TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id) -{ - return handle->GetUnichar(unichar_id); -} - -TESS_API const TessDawg* TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i) -{ - return handle->GetDawg(i); -} - -TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle) -{ - return handle->NumDawgs(); -} - -#ifndef DISABLED_LEGACY_ENGINE -TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, float descender, float ascender) -{ - return TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); -} - -TESS_API TBLOB* TESS_CALL TessMakeTBLOB(struct Pix* pix) -{ - return TessBaseAPI::MakeTBLOB(pix); -} - -TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode) -{ - TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode != FALSE); -} -#endif // ndef DISABLED_LEGACY_ENGINE - -TESS_API TessOcrEngineMode TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle) -{ - return handle->oem(); -} - -TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback* cb) -{ - handle->InitTruthCallback(cb); -} - -TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin) -{ - handle->set_min_orientation_margin(margin); -} - -TESS_API void TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, int** block_orientation, bool** vertical_writing) -{ - handle->GetBlockTextOrientations(block_orientation, vertical_writing); -} - -#ifndef DISABLED_LEGACY_ENGINE -TESS_API BLOCK_LIST* TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle) -{ - return handle->FindLinesCreateBlockList(); -} -#endif - -TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle) -{ - delete handle; -} - -TESS_API TessPageIterator* TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle) -{ - return new TessPageIterator(*handle); -} - -TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle) -{ - handle->Begin(); -} - -TESS_API BOOL TESS_CALL TessPageIteratorNext(TessPageIterator* handle, TessPageIteratorLevel level) -{ - return handle->Next(level) ? TRUE : FALSE; -} - -TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf(const TessPageIterator* handle, TessPageIteratorLevel level) -{ - return handle->IsAtBeginningOf(level) ? TRUE : FALSE; -} - -TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle, TessPageIteratorLevel level, - TessPageIteratorLevel element) -{ - return handle->IsAtFinalElement(level, element) ? TRUE : FALSE; -} - -TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* handle, TessPageIteratorLevel level, - int* left, int* top, int* right, int* bottom) -{ - return handle->BoundingBox(level, left, top, right, bottom) ? TRUE : FALSE; -} - -TESS_API TessPolyBlockType TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle) -{ - return handle->BlockType(); -} - -TESS_API struct Pix* TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level) -{ - return handle->GetBinaryImage(level); -} - -TESS_API struct Pix* TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding, - struct Pix* original_image, int* left, int* top) -{ - return handle->GetImage(level, padding, original_image, left, top); -} - -TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level, - int* x1, int* y1, int* x2, int* y2) -{ - return handle->Baseline(level, x1, y1, x2, y2) ? TRUE : FALSE; -} - -TESS_API void TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, TessOrientation* orientation, - TessWritingDirection* writing_direction, TessTextlineOrder* textline_order, - float* deskew_angle) -{ - handle->Orientation(orientation, writing_direction, textline_order, deskew_angle); -} - -TESS_API void TESS_CALL TessPageIteratorParagraphInfo(TessPageIterator* handle, TessParagraphJustification* justification, - BOOL *is_list_item, BOOL *is_crown, int *first_line_indent) -{ - bool bool_is_list_item, bool_is_crown; - handle->ParagraphInfo(justification, &bool_is_list_item, &bool_is_crown, first_line_indent); - if (is_list_item) - *is_list_item = bool_is_list_item ? TRUE : FALSE; - if (is_crown) - *is_crown = bool_is_crown ? TRUE : FALSE; -} - - -TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle) -{ - delete handle; -} - -TESS_API TessResultIterator* TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle) -{ - return new TessResultIterator(*handle); -} - -TESS_API TessPageIterator* TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle) -{ - return handle; -} - -TESS_API const TessPageIterator* TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle) -{ - return handle; -} - -TESS_API TessChoiceIterator* TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle) -{ - return new TessChoiceIterator(*handle); -} - -TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle, TessPageIteratorLevel level) -{ - return handle->Next(level); -} - -TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level) -{ - return handle->GetUTF8Text(level); -} - -TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level) -{ - return handle->Confidence(level); -} - -TESS_API const char* TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle) -{ - return handle->WordRecognitionLanguage(); -} - -TESS_API const char* TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, - BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, - BOOL* is_smallcaps, int* pointsize, int* font_id) -{ - bool bool_is_bold, bool_is_italic, bool_is_underlined, bool_is_monospace, bool_is_serif, bool_is_smallcaps; - const char* ret = handle->WordFontAttributes(&bool_is_bold, &bool_is_italic, &bool_is_underlined, &bool_is_monospace, &bool_is_serif, - &bool_is_smallcaps, pointsize, font_id); - if (is_bold) - *is_bold = bool_is_bold ? TRUE : FALSE; - if (is_italic) - *is_italic = bool_is_italic ? TRUE : FALSE; - if (is_underlined) - *is_underlined = bool_is_underlined ? TRUE : FALSE; - if (is_monospace) - *is_monospace = bool_is_monospace ? TRUE : FALSE; - if (is_serif) - *is_serif = bool_is_serif ? TRUE : FALSE; - if (is_smallcaps) - *is_smallcaps = bool_is_smallcaps ? TRUE : FALSE; - return ret; -} - -TESS_API BOOL TESS_CALL TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle) -{ - return handle->WordIsFromDictionary() ? TRUE : FALSE; -} - -TESS_API BOOL TESS_CALL TessResultIteratorWordIsNumeric(const TessResultIterator* handle) -{ - return handle->WordIsNumeric() ? TRUE : FALSE; -} - -TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle) -{ - return handle->SymbolIsSuperscript() ? TRUE : FALSE; -} - -TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle) -{ - return handle->SymbolIsSubscript() ? TRUE : FALSE; -} - -TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle) -{ - return handle->SymbolIsDropcap() ? TRUE : FALSE; -} - -TESS_API void TESS_CALL TessChoiceIteratorDelete(TessChoiceIterator* handle) -{ - delete handle; -} - -TESS_API BOOL TESS_CALL TessChoiceIteratorNext(TessChoiceIterator* handle) -{ - return handle->Next(); -} - -TESS_API const char* TESS_CALL TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle) -{ - return handle->GetUTF8Text(); -} - -TESS_API float TESS_CALL TessChoiceIteratorConfidence(const TessChoiceIterator* handle) -{ - return handle->Confidence(); -} - -TESS_API ETEXT_DESC* TESS_CALL TessMonitorCreate() -{ - return new ETEXT_DESC(); -} - -TESS_API void TESS_CALL TessMonitorDelete(ETEXT_DESC* monitor) -{ - delete monitor; -} - -TESS_API void TESS_CALL TessMonitorSetCancelFunc(ETEXT_DESC* monitor, TessCancelFunc cancelFunc) -{ - monitor->cancel = cancelFunc; -} - -TESS_API void TESS_CALL TessMonitorSetCancelThis(ETEXT_DESC* monitor, void* cancelThis) -{ - monitor->cancel_this = cancelThis; -} - -TESS_API void* TESS_CALL TessMonitorGetCancelThis(ETEXT_DESC* monitor) -{ - return monitor->cancel_this; -} - -TESS_API void TESS_CALL TessMonitorSetProgressFunc(ETEXT_DESC* monitor, TessProgressFunc progressFunc) -{ - monitor->progress_callback2 = progressFunc; -} - -TESS_API int TESS_CALL TessMonitorGetProgress(ETEXT_DESC* monitor) -{ - return monitor->progress; -} - -TESS_API void TESS_CALL TessMonitorSetDeadlineMSecs(ETEXT_DESC* monitor, int deadline) -{ - monitor->set_deadline_msecs(deadline); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/capi.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/capi.h deleted file mode 100644 index ba4445b5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/capi.h +++ /dev/null @@ -1,451 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: capi.h -// Description: C-API TessBaseAPI -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef API_CAPI_H_ -#define API_CAPI_H_ - -#if defined(TESSERACT_API_BASEAPI_H_) && !defined(TESS_CAPI_INCLUDE_BASEAPI) -# define TESS_CAPI_INCLUDE_BASEAPI -#endif - -#ifdef TESS_CAPI_INCLUDE_BASEAPI -# include "baseapi.h" -# include "ocrclass.h" -# include "pageiterator.h" -# include "resultiterator.h" -# include "renderer.h" -#else -# include "platform.h" -# include -# include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef TESS_CALL -# if defined(WIN32) -# define TESS_CALL __cdecl -# else -# define TESS_CALL -# endif -#endif - -#ifndef BOOL -# define BOOL int -# define TRUE 1 -# define FALSE 0 -#endif - -#ifdef TESS_CAPI_INCLUDE_BASEAPI -typedef tesseract::TessResultRenderer TessResultRenderer; -typedef tesseract::TessTextRenderer TessTextRenderer; -typedef tesseract::TessHOcrRenderer TessHOcrRenderer; -typedef tesseract::TessPDFRenderer TessPDFRenderer; -typedef tesseract::TessUnlvRenderer TessUnlvRenderer; -typedef tesseract::TessBoxTextRenderer TessBoxTextRenderer; -typedef tesseract::TessBaseAPI TessBaseAPI; -typedef tesseract::PageIterator TessPageIterator; -typedef tesseract::ResultIterator TessResultIterator; -typedef tesseract::MutableIterator TessMutableIterator; -typedef tesseract::ChoiceIterator TessChoiceIterator; -typedef tesseract::OcrEngineMode TessOcrEngineMode; -typedef tesseract::PageSegMode TessPageSegMode; -typedef tesseract::ImageThresholder TessImageThresholder; -typedef tesseract::PageIteratorLevel TessPageIteratorLevel; -typedef tesseract::DictFunc TessDictFunc; -typedef tesseract::ProbabilityInContextFunc TessProbabilityInContextFunc; -// typedef tesseract::ParamsModelClassifyFunc TessParamsModelClassifyFunc; -typedef tesseract::FillLatticeFunc TessFillLatticeFunc; -typedef tesseract::Dawg TessDawg; -typedef tesseract::TruthCallback TessTruthCallback; -typedef tesseract::Orientation TessOrientation; -typedef tesseract::ParagraphJustification TessParagraphJustification; -typedef tesseract::WritingDirection TessWritingDirection; -typedef tesseract::TextlineOrder TessTextlineOrder; -typedef PolyBlockType TessPolyBlockType; -#else -typedef struct TessResultRenderer TessResultRenderer; -typedef struct TessTextRenderer TessTextRenderer; -typedef struct TessHOcrRenderer TessHOcrRenderer; -typedef struct TessPDFRenderer TessPDFRenderer; -typedef struct TessUnlvRenderer TessUnlvRenderer; -typedef struct TessBoxTextRenderer TessBoxTextRenderer; -typedef struct TessBaseAPI TessBaseAPI; -typedef struct TessPageIterator TessPageIterator; -typedef struct TessResultIterator TessResultIterator; -typedef struct TessMutableIterator TessMutableIterator; -typedef struct TessChoiceIterator TessChoiceIterator; -typedef enum TessOcrEngineMode { OEM_TESSERACT_ONLY, OEM_LSTM_ONLY, OEM_TESSERACT_LSTM_COMBINED, OEM_DEFAULT } TessOcrEngineMode; -typedef enum TessPageSegMode { PSM_OSD_ONLY, PSM_AUTO_OSD, PSM_AUTO_ONLY, PSM_AUTO, PSM_SINGLE_COLUMN, PSM_SINGLE_BLOCK_VERT_TEXT, - PSM_SINGLE_BLOCK, PSM_SINGLE_LINE, PSM_SINGLE_WORD, PSM_CIRCLE_WORD, PSM_SINGLE_CHAR, PSM_SPARSE_TEXT, - PSM_SPARSE_TEXT_OSD, PSM_COUNT } TessPageSegMode; -typedef enum TessPageIteratorLevel { RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD, RIL_SYMBOL} TessPageIteratorLevel; -typedef enum TessPolyBlockType { PT_UNKNOWN, PT_FLOWING_TEXT, PT_HEADING_TEXT, PT_PULLOUT_TEXT, PT_EQUATION, PT_INLINE_EQUATION, - PT_TABLE, PT_VERTICAL_TEXT, PT_CAPTION_TEXT, PT_FLOWING_IMAGE, PT_HEADING_IMAGE, - PT_PULLOUT_IMAGE, PT_HORZ_LINE, PT_VERT_LINE, PT_NOISE, PT_COUNT } TessPolyBlockType; -typedef enum TessOrientation { ORIENTATION_PAGE_UP, ORIENTATION_PAGE_RIGHT, ORIENTATION_PAGE_DOWN, ORIENTATION_PAGE_LEFT } TessOrientation; -typedef enum TessParagraphJustification { JUSTIFICATION_UNKNOWN, JUSTIFICATION_LEFT, JUSTIFICATION_CENTER, JUSTIFICATION_RIGHT } TessParagraphJustification; -typedef enum TessWritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT, WRITING_DIRECTION_RIGHT_TO_LEFT, WRITING_DIRECTION_TOP_TO_BOTTOM } TessWritingDirection; -typedef enum TessTextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT, TEXTLINE_ORDER_RIGHT_TO_LEFT, TEXTLINE_ORDER_TOP_TO_BOTTOM } TessTextlineOrder; -typedef struct ETEXT_DESC ETEXT_DESC; -#endif - -typedef bool (*TessCancelFunc)(void* cancel_this, int words); -typedef bool (*TessProgressFunc)(ETEXT_DESC* ths, int left, int right, int top, - int bottom); - -struct Pix; -struct Boxa; -struct Pixa; - -/* General free functions */ - -TESS_API const char* - TESS_CALL TessVersion(); -TESS_API void TESS_CALL TessDeleteText(char* text); -TESS_API void TESS_CALL TessDeleteTextArray(char** arr); -TESS_API void TESS_CALL TessDeleteIntArray(int* arr); - -/* Renderer API */ -TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* outputbase); -TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase); -TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info); -TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir, - BOOL textonly); -TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase); -TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase); - -TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer); -TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, TessResultRenderer* next); -TESS_API TessResultRenderer* - TESS_CALL TessResultRendererNext(TessResultRenderer* renderer); -TESS_API BOOL TESS_CALL TessResultRendererBeginDocument(TessResultRenderer* renderer, const char* title); -TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, TessBaseAPI* api); -TESS_API BOOL TESS_CALL TessResultRendererEndDocument(TessResultRenderer* renderer); - -TESS_API const char* TESS_CALL TessResultRendererExtention(TessResultRenderer* renderer); -TESS_API const char* TESS_CALL TessResultRendererTitle(TessResultRenderer* renderer); -TESS_API int TESS_CALL TessResultRendererImageNum(TessResultRenderer* renderer); - -/* Base API */ - -TESS_API TessBaseAPI* - TESS_CALL TessBaseAPICreate(); -TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle); - -TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void **device); - -TESS_API void TESS_CALL TessBaseAPISetInputName(TessBaseAPI* handle, const char* name); -TESS_API const char* TESS_CALL TessBaseAPIGetInputName(TessBaseAPI* handle); - -TESS_API void TESS_CALL TessBaseAPISetInputImage(TessBaseAPI* handle, struct Pix* pix); -TESS_API struct Pix* TESS_CALL TessBaseAPIGetInputImage(TessBaseAPI* handle); - -TESS_API int TESS_CALL TessBaseAPIGetSourceYResolution(TessBaseAPI* handle); -TESS_API const char* TESS_CALL TessBaseAPIGetDatapath(TessBaseAPI* handle); - -TESS_API void TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle, const char* name); - -TESS_API BOOL TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle, const char* name, const char* value); -TESS_API BOOL TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle, const char* name, const char* value); - -TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle, const char* name, int* value); -TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle, const char* name, BOOL* value); -TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, const char* name, double* value); -TESS_API const char* - TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name); - -TESS_API void TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle, FILE* fp); -TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, const char* filename); - -#ifdef TESS_CAPI_INCLUDE_BASEAPI - -TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, const char* name, STRING* val); - -TESS_API int TESS_CALL TessBaseAPIInit(TessBaseAPI* handle, const char* datapath, const char* language, - TessOcrEngineMode mode, char** configs, int configs_size, - const STRING* vars_vec, size_t vars_vec_size, - const STRING* vars_values, size_t vars_values_size, BOOL set_only_init_params); - -#endif // def TESS_CAPI_INCLUDE_BASEAPI - -TESS_API int TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem, - char** configs, int configs_size); -TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem); -TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const char* language); - -TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode mode, - char** configs, int configs_size, - char** vars_vec, char** vars_values, size_t vars_vec_size, - BOOL set_only_non_debug_params); - -TESS_API const char* - TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle); -TESS_API char** - TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle); -TESS_API char** - TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle); - -TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath, const char* language); -TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle); - -TESS_API void TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle, const char* filename); -TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, const char* filename); - -TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, TessPageSegMode mode); -TESS_API TessPageSegMode - TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle); - -TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata, - int bytes_per_pixel, int bytes_per_line, - int left, int top, int width, int height); - -TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle); - -TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, const unsigned char* imagedata, int width, int height, - int bytes_per_pixel, int bytes_per_line); -TESS_API void TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle, struct Pix* pix); - -TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle, int ppi); - -TESS_API void TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left, int top, int width, int height); - -#ifdef TESS_CAPI_INCLUDE_BASEAPI -TESS_API void TESS_CALL TessBaseAPISetThresholder(TessBaseAPI* handle, TessImageThresholder* thresholder); -#endif - -TESS_API struct Pix* - TESS_CALL TessBaseAPIGetThresholdedImage(TessBaseAPI* handle); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, struct Pixa** pixa); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, struct Pixa** pixa, int** blockids); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding, - struct Pixa** pixa, int** blockids, int** paraids); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, struct Pixa** pixa, int** blockids); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, struct Pixa** pixa); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetComponentImages(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only, - struct Pixa** pixa, int** blockids); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetComponentImages1(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only, - const BOOL raw_image, const int raw_padding, - struct Pixa** pixa, int** blockids, int** paraids); - -TESS_API int TESS_CALL TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle); - -TESS_API TessPageIterator* - TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle); - -TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor); - -#ifndef DISABLED_LEGACY_ENGINE -TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, ETEXT_DESC* monitor); -#endif - -TESS_API BOOL TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle, const char* filename, const char* retry_config, - int timeout_millisec, TessResultRenderer* renderer); -TESS_API BOOL TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, struct Pix* pix, int page_index, const char* filename, - const char* retry_config, int timeout_millisec, TessResultRenderer* renderer); - -TESS_API TessResultIterator* - TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle); -TESS_API TessMutableIterator* - TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle); - -TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle); -TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number); - -TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number); - -TESS_API char* TESS_CALL TessBaseAPIGetUNLVText(TessBaseAPI* handle); -TESS_API int TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle); - -TESS_API int* TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle); - -#ifndef DISABLED_LEGACY_ENGINE -TESS_API BOOL TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle, TessPageSegMode mode, const char* wordstr); -#endif // ndef DISABLED_LEGACY_ENGINE - -TESS_API void TESS_CALL TessBaseAPIClear(TessBaseAPI* handle); -TESS_API void TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle); - -TESS_API int TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle, const char* word); -TESS_API BOOL TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle, int* out_offset, float* out_slope); - - -#ifdef TESS_CAPI_INCLUDE_BASEAPI - -TESS_API void TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle, TessDictFunc f); - -TESS_API void TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle); - -TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f); - -// Call TessDeleteText(*best_script_name) to free memory allocated by this function -TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, - int* orient_deg, float* orient_conf, const char **script_name, float* script_conf); - -#endif // def TESS_CAPI_INCLUDE_BASEAPI - -TESS_API const char* - TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id); - -TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin); - -#ifdef TESS_CAPI_INCLUDE_BASEAPI - -TESS_API const TessDawg* - TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i); - -TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle); - -TESS_API TessOcrEngineMode - TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle); - -TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback* cb); - -TESS_API void TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, int** block_orientation, bool** vertical_writing); - -#endif - -/* Page iterator */ - -TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle); - -TESS_API TessPageIterator* - TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle); - -TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle); - -TESS_API BOOL TESS_CALL TessPageIteratorNext(TessPageIterator* handle, TessPageIteratorLevel level); - -TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf(const TessPageIterator* handle, TessPageIteratorLevel level); - -TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle, TessPageIteratorLevel level, - TessPageIteratorLevel element); - -TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* handle, TessPageIteratorLevel level, - int* left, int* top, int* right, int* bottom); - -TESS_API TessPolyBlockType - TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle); - -TESS_API struct Pix* - TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level); - -TESS_API struct Pix* - TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding, - struct Pix* original_image, int* left, int* top); - -TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level, - int* x1, int* y1, int* x2, int* y2); - -TESS_API void TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, TessOrientation* orientation, - TessWritingDirection* writing_direction, TessTextlineOrder* textline_order, - float* deskew_angle); - -TESS_API void TESS_CALL TessPageIteratorParagraphInfo(TessPageIterator* handle, TessParagraphJustification* justification, - BOOL *is_list_item, BOOL *is_crown, int *first_line_indent); - -/* Result iterator */ - -TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle); -TESS_API TessResultIterator* - TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle); -TESS_API TessPageIterator* - TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle); -TESS_API const TessPageIterator* - TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle); -TESS_API TessChoiceIterator* - TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle); - -TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle, TessPageIteratorLevel level); -TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level); -TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level); -TESS_API const char* - TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle); -TESS_API const char* - TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, - BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, - BOOL* is_smallcaps, int* pointsize, int* font_id); - -TESS_API BOOL TESS_CALL TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle); -TESS_API BOOL TESS_CALL TessResultIteratorWordIsNumeric(const TessResultIterator* handle); -TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle); -TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle); -TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle); - -TESS_API void TESS_CALL TessChoiceIteratorDelete(TessChoiceIterator* handle); -TESS_API BOOL TESS_CALL TessChoiceIteratorNext(TessChoiceIterator* handle); -TESS_API const char* TESS_CALL TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle); -TESS_API float TESS_CALL TessChoiceIteratorConfidence(const TessChoiceIterator* handle); - -/* Progress monitor */ - -TESS_API ETEXT_DESC* TESS_CALL TessMonitorCreate(); -TESS_API void TESS_CALL TessMonitorDelete(ETEXT_DESC* monitor); -TESS_API void TESS_CALL TessMonitorSetCancelFunc(ETEXT_DESC* monitor, TessCancelFunc cancelFunc); -TESS_API void TESS_CALL TessMonitorSetCancelThis(ETEXT_DESC* monitor, void* cancelThis); -TESS_API void* TESS_CALL TessMonitorGetCancelThis(ETEXT_DESC* monitor); -TESS_API void TESS_CALL TessMonitorSetProgressFunc(ETEXT_DESC* monitor, TessProgressFunc progressFunc); -TESS_API int TESS_CALL TessMonitorGetProgress(ETEXT_DESC* monitor); -TESS_API void TESS_CALL TessMonitorSetDeadlineMSecs(ETEXT_DESC* monitor, int deadline); - - -#ifndef DISABLED_LEGACY_ENGINE - -#ifdef TESS_CAPI_INCLUDE_BASEAPI -TESS_API void TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, TessFillLatticeFunc f); - -TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, - int* num_features, int* FeatureOutlineIndex); - -TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, int right, int bottom); - -TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, int num_max_matches, - int* unichar_ids, float* ratings, int* num_matches_returned); - -TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, float descender, float ascender); - -TESS_API TBLOB* - TESS_CALL TessMakeTBLOB(Pix* pix); - -TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode); - -TESS_API BLOCK_LIST* - TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle); - -TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list); - -#endif // def TESS_CAPI_INCLUDE_BASEAPI - -#endif // ndef DISABLED_LEGACY_ENGINE - - -#ifdef __cplusplus -} -#endif - -#endif // API_CAPI_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/config_auto.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/config_auto.h deleted file mode 100644 index ef3cfbaa..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/config_auto.h +++ /dev/null @@ -1 +0,0 @@ -#define PACKAGE_VERSION "4.0.0" \ No newline at end of file diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/pdfrenderer.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/pdfrenderer.cpp deleted file mode 100644 index 99e114d2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/pdfrenderer.cpp +++ /dev/null @@ -1,1016 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: pdfrenderer.cpp -// Description: PDF rendering interface to inject into TessBaseAPI -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include // std::unique_ptr -#include "allheaders.h" -#include "baseapi.h" -#include -#include "renderer.h" -#include -#include "tprintf.h" - -/* - -Design notes from Ken Sharp, with light editing. - -We think one solution is a font with a single glyph (.notdef) and a -CIDToGIDMap which maps all the CIDs to 0. That map would then be -stored as a stream in the PDF file, and when flate compressed should -be pretty small. The font, of course, will be approximately the same -size as the one you currently use. - -I'm working on such a font now, the CIDToGIDMap is trivial, you just -create a stream object which contains 128k bytes (2 bytes per possible -CID and your CIDs range from 0 to 65535) and where you currently have -"/CIDToGIDMap /Identity" you would have "/CIDToGIDMap 0 R". - -Note that if, in future, you were to use a different (ie not 2 byte) -CMap for character codes you could trivially extend the CIDToGIDMap. - -The following is an explanation of how some of the font stuff works, -this may be too simple for you in which case please accept my -apologies, its hard to know how much knowledge someone has. You can -skip all this anyway, its just for information. - -The font embedded in a PDF file is usually intended just to be -rendered, but extensions allow for at least some ability to locate (or -copy) text from a document. This isn't something which was an original -goal of the PDF format, but its been retro-fitted, presumably due to -popular demand. - -To do this reliably the PDF file must contain a ToUnicode CMap, a -device for mapping character codes to Unicode code points. If one of -these is present, then this will be used to convert the character -codes into Unicode values. If its not present then the reader will -fall back through a series of heuristics to try and guess the -result. This is, as you would expect, prone to failure. - -This doesn't concern you of course, since you always write a ToUnicode -CMap, so because you are writing the text in text rendering mode 3 it -would seem that you don't really need to worry about this, but in the -PDF spec you cannot have an isolated ToUnicode CMap, it has to be -attached to a font, so in order to get even copy/paste to work you -need to define a font. - -This is what leads to problems, tools like pdfwrite assume that they -are going to be able to (or even have to) modify the font entries, so -they require that the font being embedded be valid, and to be honest -the font Tesseract embeds isn't valid (for this purpose). - - -To see why lets look at how text is specified in a PDF file: - -(Test) Tj - -Now that looks like text but actually it isn't. Each of those bytes is -a 'character code'. When it comes to rendering the text a complex -sequence of events takes place, which converts the character code into -'something' which the font understands. Its entirely possible via -character mappings to have that text render as 'Sftu' - -For simple fonts (PostScript type 1), we use the character code as the -index into an Encoding array (256 elements), each element of which is -a glyph name, so this gives us a glyph name. We then consult the -CharStrings dictionary in the font, that's a complex object which -contains pairs of keys and values, you can use the key to retrieve a -given value. So we have a glyph name, we then use that as the key to -the dictionary and retrieve the associated value. For a type 1 font, -the value is a glyph program that describes how to draw the glyph. - -For CIDFonts, its a little more complicated. Because CIDFonts can be -large, using a glyph name as the key is unreasonable (it would also -lead to unfeasibly large Encoding arrays), so instead we use a 'CID' -as the key. CIDs are just numbers. - -But.... We don't use the character code as the CID. What we do is use -a CMap to convert the character code into a CID. We then use the CID -to key the CharStrings dictionary and proceed as before. So the 'CMap' -is the equivalent of the Encoding array, but its a more compact and -flexible representation. - -Note that you have to use the CMap just to find out how many bytes -constitute a character code, and it can be variable. For example you -can say if the first byte is 0x00->0x7f then its just one byte, if its -0x80->0xf0 then its 2 bytes and if its 0xf0->0xff then its 3 bytes. I -have seen CMaps defining character codes up to 5 bytes wide. - -Now that's fine for 'PostScript' CIDFonts, but its not sufficient for -TrueType CIDFonts. The thing is that TrueType fonts are accessed using -a Glyph ID (GID) (and the LOCA table) which may well not be anything -like the CID. So for this case PDF includes a CIDToGIDMap. That maps -the CIDs to GIDs, and we can then use the GID to get the glyph -description from the GLYF table of the font. - -So for a TrueType CIDFont, character-code->CID->GID->glyf-program. - -Looking at the PDF file I was supplied with we see that it contains -text like : - -<0x0075> Tj - -So we start by taking the character code (117) and look it up in the -CMap. Well you don't supply a CMap, you just use the Identity-H one -which is predefined. So character code 117 maps to CID 117. Then we -use the CIDToGIDMap, again you don't supply one, you just use the -predefined 'Identity' map. So CID 117 maps to GID 117. But the font we -were supplied with only contains 116 glyphs. - -Now for Latin that's not a huge problem, you can just supply a bigger -font. But for more complex languages that *is* going to be more of a -problem. Either you need to supply a font which contains glyphs for -all the possible CID->GID mappings, or we need to think laterally. - -Our solution using a TrueType CIDFont is to intervene at the -CIDToGIDMap stage and convert all the CIDs to GID 0. Then we have a -font with just one glyph, the .notdef glyph at GID 0. This is what I'm -looking into now. - -It would also be possible to have a 'PostScript' (ie type 1 outlines) -CIDFont which contained 1 glyph, and a CMap which mapped all character -codes to CID 0. The effect would be the same. - -Its possible (I haven't checked) that the PostScript CIDFont and -associated CMap would be smaller than the TrueType font and associated -CIDToGIDMap. - ---- in a followup --- - -OK there is a small problem there, if I use GID 0 then Acrobat gets -upset about it and complains it cannot extract the font. If I set the -CIDToGIDMap so that all the entries are 1 instead, it's happy. Totally -mad...... - -*/ - -namespace tesseract { - -// Use for PDF object fragments. Must be large enough -// to hold a colormap with 256 colors in the verbose -// PDF representation. -static const int kBasicBufSize = 2048; - -// If the font is 10 pts, nominal character width is 5 pts -static const int kCharWidth = 2; - -// Used for memory allocation. A codepoint must take no more than this -// many bytes, when written in the PDF way. e.g. "<0063>" for the -// letter 'c' -static const int kMaxBytesPerCodepoint = 20; - -/********************************************************************** - * PDF Renderer interface implementation - **********************************************************************/ -TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir, - bool textonly) - : TessResultRenderer(outputbase, "pdf"), - datadir_(datadir) { - obj_ = 0; - textonly_ = textonly; - offsets_.push_back(0); -} - -void TessPDFRenderer::AppendPDFObjectDIY(size_t objectsize) { - offsets_.push_back(objectsize + offsets_.back()); - obj_++; -} - -void TessPDFRenderer::AppendPDFObject(const char *data) { - AppendPDFObjectDIY(strlen(data)); - AppendString(data); -} - -// Helper function to prevent us from accidentally writing -// scientific notation to an HOCR or PDF file. Besides, three -// decimal points are all you really need. -static double prec(double x) { - double kPrecision = 1000.0; - double a = round(x * kPrecision) / kPrecision; - if (a == -0) - return 0; - return a; -} - -static long dist2(int x1, int y1, int x2, int y2) { - return (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1); -} - -// Viewers like evince can get really confused during copy-paste when -// the baseline wanders around. So I've decided to project every word -// onto the (straight) line baseline. All numbers are in the native -// PDF coordinate system, which has the origin in the bottom left and -// the unit is points, which is 1/72 inch. Tesseract reports baselines -// left-to-right no matter what the reading order is. We need the -// word baseline in reading order, so we do that conversion here. Returns -// the word's baseline origin and length. -static void GetWordBaseline(int writing_direction, int ppi, int height, - int word_x1, int word_y1, int word_x2, int word_y2, - int line_x1, int line_y1, int line_x2, int line_y2, - double *x0, double *y0, double *length) { - if (writing_direction == WRITING_DIRECTION_RIGHT_TO_LEFT) { - Swap(&word_x1, &word_x2); - Swap(&word_y1, &word_y2); - } - double word_length; - double x, y; - { - int px = word_x1; - int py = word_y1; - double l2 = dist2(line_x1, line_y1, line_x2, line_y2); - if (l2 == 0) { - x = line_x1; - y = line_y1; - } else { - double t = ((px - line_x2) * (line_x2 - line_x1) + - (py - line_y2) * (line_y2 - line_y1)) / l2; - x = line_x2 + t * (line_x2 - line_x1); - y = line_y2 + t * (line_y2 - line_y1); - } - word_length = sqrt(static_cast(dist2(word_x1, word_y1, - word_x2, word_y2))); - word_length = word_length * 72.0 / ppi; - x = x * 72 / ppi; - y = height - (y * 72.0 / ppi); - } - *x0 = x; - *y0 = y; - *length = word_length; -} - -// Compute coefficients for an affine matrix describing the rotation -// of the text. If the text is right-to-left such as Arabic or Hebrew, -// we reflect over the Y-axis. This matrix will set the coordinate -// system for placing text in the PDF file. -// -// RTL -// [ x' ] = [ a b ][ x ] = [-1 0 ] [ cos sin ][ x ] -// [ y' ] [ c d ][ y ] [ 0 1 ] [-sin cos ][ y ] -static void AffineMatrix(int writing_direction, - int line_x1, int line_y1, int line_x2, int line_y2, - double *a, double *b, double *c, double *d) { - double theta = atan2(static_cast(line_y1 - line_y2), - static_cast(line_x2 - line_x1)); - *a = cos(theta); - *b = sin(theta); - *c = -sin(theta); - *d = cos(theta); - switch(writing_direction) { - case WRITING_DIRECTION_RIGHT_TO_LEFT: - *a = -*a; - *b = -*b; - break; - case WRITING_DIRECTION_TOP_TO_BOTTOM: - // TODO(jbreiden) Consider using the vertical PDF writing mode. - break; - default: - break; - } -} - -// There are some really awkward PDF viewers in the wild, such as -// 'Preview' which ships with the Mac. They do a better job with text -// selection and highlighting when given perfectly flat baseline -// instead of very slightly tilted. We clip small tilts to appease -// these viewers. I chose this threshold large enough to absorb noise, -// but small enough that lines probably won't cross each other if the -// whole page is tilted at almost exactly the clipping threshold. -static void ClipBaseline(int ppi, int x1, int y1, int x2, int y2, - int *line_x1, int *line_y1, - int *line_x2, int *line_y2) { - *line_x1 = x1; - *line_y1 = y1; - *line_x2 = x2; - *line_y2 = y2; - int rise = abs(y2 - y1) * 72; - int run = abs(x2 - x1) * 72; - if (rise < 2 * ppi && 2 * ppi < run) - *line_y1 = *line_y2 = (y1 + y2) / 2; -} - -static bool CodepointToUtf16be(int code, char utf16[kMaxBytesPerCodepoint]) { - if ((code > 0xD7FF && code < 0xE000) || code > 0x10FFFF) { - tprintf("Dropping invalid codepoint %d\n", code); - return false; - } - if (code < 0x10000) { - snprintf(utf16, kMaxBytesPerCodepoint, "%04X", code); - } else { - int a = code - 0x010000; - int high_surrogate = (0x03FF & (a >> 10)) + 0xD800; - int low_surrogate = (0x03FF & a) + 0xDC00; - snprintf(utf16, kMaxBytesPerCodepoint, - "%04X%04X", high_surrogate, low_surrogate); - } - return true; -} - -char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, - double width, double height) { - STRING pdf_str(""); - double ppi = api->GetSourceYResolution(); - - // These initial conditions are all arbitrary and will be overwritten - double old_x = 0.0, old_y = 0.0; - int old_fontsize = 0; - tesseract::WritingDirection old_writing_direction = - WRITING_DIRECTION_LEFT_TO_RIGHT; - bool new_block = true; - int fontsize = 0; - double a = 1; - double b = 0; - double c = 0; - double d = 1; - - // TODO(jbreiden) This marries the text and image together. - // Slightly cleaner from an abstraction standpoint if this were to - // live inside a separate text object. - pdf_str += "q "; - pdf_str.add_str_double("", prec(width)); - pdf_str += " 0 0 "; - pdf_str.add_str_double("", prec(height)); - pdf_str += " 0 0 cm"; - if (!textonly_) { - pdf_str += " /Im1 Do"; - } - pdf_str += " Q\n"; - - int line_x1 = 0; - int line_y1 = 0; - int line_x2 = 0; - int line_y2 = 0; - - ResultIterator *res_it = api->GetIterator(); - while (!res_it->Empty(RIL_BLOCK)) { - if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - pdf_str += "BT\n3 Tr"; // Begin text object, use invisible ink - old_fontsize = 0; // Every block will declare its fontsize - new_block = true; // Every block will declare its affine matrix - } - - if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { - int x1, y1, x2, y2; - res_it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2); - ClipBaseline(ppi, x1, y1, x2, y2, &line_x1, &line_y1, &line_x2, &line_y2); - } - - if (res_it->Empty(RIL_WORD)) { - res_it->Next(RIL_WORD); - continue; - } - - // Writing direction changes at a per-word granularity - tesseract::WritingDirection writing_direction; - { - tesseract::Orientation orientation; - tesseract::TextlineOrder textline_order; - float deskew_angle; - res_it->Orientation(&orientation, &writing_direction, - &textline_order, &deskew_angle); - if (writing_direction != WRITING_DIRECTION_TOP_TO_BOTTOM) { - switch (res_it->WordDirection()) { - case DIR_LEFT_TO_RIGHT: - writing_direction = WRITING_DIRECTION_LEFT_TO_RIGHT; - break; - case DIR_RIGHT_TO_LEFT: - writing_direction = WRITING_DIRECTION_RIGHT_TO_LEFT; - break; - default: - writing_direction = old_writing_direction; - } - } - } - - // Where is word origin and how long is it? - double x, y, word_length; - { - int word_x1, word_y1, word_x2, word_y2; - res_it->Baseline(RIL_WORD, &word_x1, &word_y1, &word_x2, &word_y2); - GetWordBaseline(writing_direction, ppi, height, - word_x1, word_y1, word_x2, word_y2, - line_x1, line_y1, line_x2, line_y2, - &x, &y, &word_length); - } - - if (writing_direction != old_writing_direction || new_block) { - AffineMatrix(writing_direction, - line_x1, line_y1, line_x2, line_y2, &a, &b, &c, &d); - pdf_str.add_str_double(" ", prec(a)); // . This affine matrix - pdf_str.add_str_double(" ", prec(b)); // . sets the coordinate - pdf_str.add_str_double(" ", prec(c)); // . system for all - pdf_str.add_str_double(" ", prec(d)); // . text that follows. - pdf_str.add_str_double(" ", prec(x)); // . - pdf_str.add_str_double(" ", prec(y)); // . - pdf_str += (" Tm "); // Place cursor absolutely - new_block = false; - } else { - double dx = x - old_x; - double dy = y - old_y; - pdf_str.add_str_double(" ", prec(dx * a + dy * b)); - pdf_str.add_str_double(" ", prec(dx * c + dy * d)); - pdf_str += (" Td "); // Relative moveto - } - old_x = x; - old_y = y; - old_writing_direction = writing_direction; - - // Adjust font size on a per word granularity. Pay attention to - // fontsize, old_fontsize, and pdf_str. We've found that for - // in Arabic, Tesseract will happily return a fontsize of zero, - // so we make up a default number to protect ourselves. - { - bool bold, italic, underlined, monospace, serif, smallcaps; - int font_id; - res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, - &serif, &smallcaps, &fontsize, &font_id); - const int kDefaultFontsize = 8; - if (fontsize <= 0) - fontsize = kDefaultFontsize; - if (fontsize != old_fontsize) { - char textfont[20]; - snprintf(textfont, sizeof(textfont), "/f-0-0 %d Tf ", fontsize); - pdf_str += textfont; - old_fontsize = fontsize; - } - } - - bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD); - bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD); - STRING pdf_word(""); - int pdf_word_len = 0; - do { - const std::unique_ptr grapheme( - res_it->GetUTF8Text(RIL_SYMBOL)); - if (grapheme && grapheme[0] != '\0') { - std::vector unicodes = UNICHAR::UTF8ToUTF32(grapheme.get()); - char utf16[kMaxBytesPerCodepoint]; - for (char32 code : unicodes) { - if (CodepointToUtf16be(code, utf16)) { - pdf_word += utf16; - pdf_word_len++; - } - } - } - res_it->Next(RIL_SYMBOL); - } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); - if (word_length > 0 && pdf_word_len > 0) { - double h_stretch = - kCharWidth * prec(100.0 * word_length / (fontsize * pdf_word_len)); - pdf_str.add_str_double("", h_stretch); - pdf_str += " Tz"; // horizontal stretch - pdf_str += " [ <"; - pdf_str += pdf_word; // UTF-16BE representation - pdf_str += "> ] TJ"; // show the text - } - if (last_word_in_line) { - pdf_str += " \n"; - } - if (last_word_in_block) { - pdf_str += "ET\n"; // end the text object - } - } - char *ret = new char[pdf_str.length() + 1]; - strcpy(ret, pdf_str.string()); - delete res_it; - return ret; -} - -bool TessPDFRenderer::BeginDocumentHandler() { - char buf[kBasicBufSize]; - size_t n; - - n = snprintf(buf, sizeof(buf), - "%%PDF-1.5\n" - "%%%c%c%c%c\n", - 0xDE, 0xAD, 0xBE, 0xEB); - if (n >= sizeof(buf)) return false; - AppendPDFObject(buf); - - // CATALOG - n = snprintf(buf, sizeof(buf), - "1 0 obj\n" - "<<\n" - " /Type /Catalog\n" - " /Pages %ld 0 R\n" - ">>\n" - "endobj\n", - 2L); - if (n >= sizeof(buf)) return false; - AppendPDFObject(buf); - - // We are reserving object #2 for the /Pages - // object, which I am going to create and write - // at the end of the PDF file. - AppendPDFObject(""); - - // TYPE0 FONT - n = snprintf(buf, sizeof(buf), - "3 0 obj\n" - "<<\n" - " /BaseFont /GlyphLessFont\n" - " /DescendantFonts [ %ld 0 R ]\n" - " /Encoding /Identity-H\n" - " /Subtype /Type0\n" - " /ToUnicode %ld 0 R\n" - " /Type /Font\n" - ">>\n" - "endobj\n", - 4L, // CIDFontType2 font - 6L // ToUnicode - ); - if (n >= sizeof(buf)) return false; - AppendPDFObject(buf); - - // CIDFONTTYPE2 - n = snprintf(buf, sizeof(buf), - "4 0 obj\n" - "<<\n" - " /BaseFont /GlyphLessFont\n" - " /CIDToGIDMap %ld 0 R\n" - " /CIDSystemInfo\n" - " <<\n" - " /Ordering (Identity)\n" - " /Registry (Adobe)\n" - " /Supplement 0\n" - " >>\n" - " /FontDescriptor %ld 0 R\n" - " /Subtype /CIDFontType2\n" - " /Type /Font\n" - " /DW %d\n" - ">>\n" - "endobj\n", - 5L, // CIDToGIDMap - 7L, // Font descriptor - 1000 / kCharWidth); - if (n >= sizeof(buf)) return false; - AppendPDFObject(buf); - - // CIDTOGIDMAP - const int kCIDToGIDMapSize = 2 * (1 << 16); - const std::unique_ptr cidtogidmap( - new unsigned char[kCIDToGIDMapSize]); - for (int i = 0; i < kCIDToGIDMapSize; i++) { - cidtogidmap[i] = (i % 2) ? 1 : 0; - } - size_t len; - unsigned char *comp = zlibCompress(cidtogidmap.get(), kCIDToGIDMapSize, &len); - n = snprintf(buf, sizeof(buf), - "5 0 obj\n" - "<<\n" - " /Length %lu /Filter /FlateDecode\n" - ">>\n" - "stream\n", - (unsigned long)len); - if (n >= sizeof(buf)) { - lept_free(comp); - return false; - } - AppendString(buf); - long objsize = strlen(buf); - AppendData(reinterpret_cast(comp), len); - objsize += len; - lept_free(comp); - const char *endstream_endobj = - "endstream\n" - "endobj\n"; - AppendString(endstream_endobj); - objsize += strlen(endstream_endobj); - AppendPDFObjectDIY(objsize); - - const char *stream = - "/CIDInit /ProcSet findresource begin\n" - "12 dict begin\n" - "begincmap\n" - "/CIDSystemInfo\n" - "<<\n" - " /Registry (Adobe)\n" - " /Ordering (UCS)\n" - " /Supplement 0\n" - ">> def\n" - "/CMapName /Adobe-Identify-UCS def\n" - "/CMapType 2 def\n" - "1 begincodespacerange\n" - "<0000> \n" - "endcodespacerange\n" - "1 beginbfrange\n" - "<0000> <0000>\n" - "endbfrange\n" - "endcmap\n" - "CMapName currentdict /CMap defineresource pop\n" - "end\n" - "end\n"; - - // TOUNICODE - n = snprintf(buf, sizeof(buf), - "6 0 obj\n" - "<< /Length %lu >>\n" - "stream\n" - "%s" - "endstream\n" - "endobj\n", (unsigned long) strlen(stream), stream); - if (n >= sizeof(buf)) return false; - AppendPDFObject(buf); - - // FONT DESCRIPTOR - n = snprintf(buf, sizeof(buf), - "7 0 obj\n" - "<<\n" - " /Ascent %d\n" - " /CapHeight %d\n" - " /Descent -1\n" // Spec says must be negative - " /Flags 5\n" // FixedPitch + Symbolic - " /FontBBox [ 0 0 %d %d ]\n" - " /FontFile2 %ld 0 R\n" - " /FontName /GlyphLessFont\n" - " /ItalicAngle 0\n" - " /StemV 80\n" - " /Type /FontDescriptor\n" - ">>\n" - "endobj\n", - 1000, - 1000, - 1000 / kCharWidth, - 1000, - 8L // Font data - ); - if (n >= sizeof(buf)) return false; - AppendPDFObject(buf); - - n = snprintf(buf, sizeof(buf), "%s/pdf.ttf", datadir_.c_str()); - if (n >= sizeof(buf)) return false; - FILE *fp = fopen(buf, "rb"); - if (!fp) { - tprintf("Can not open file \"%s\"!\n", buf); - return false; - } - fseek(fp, 0, SEEK_END); - long int size = ftell(fp); - if (size < 0) { - fclose(fp); - return false; - } - fseek(fp, 0, SEEK_SET); - const std::unique_ptr buffer(new char[size]); - if (!tesseract::DeSerialize(fp, buffer.get(), size)) { - fclose(fp); - return false; - } - fclose(fp); - // FONTFILE2 - n = snprintf(buf, sizeof(buf), - "8 0 obj\n" - "<<\n" - " /Length %ld\n" - " /Length1 %ld\n" - ">>\n" - "stream\n", size, size); - if (n >= sizeof(buf)) { - return false; - } - AppendString(buf); - objsize = strlen(buf); - AppendData(buffer.get(), size); - objsize += size; - AppendString(endstream_endobj); - objsize += strlen(endstream_endobj); - AppendPDFObjectDIY(objsize); - return true; -} - -bool TessPDFRenderer::imageToPDFObj(Pix *pix, - const char* filename, - long int objnum, - char **pdf_object, - long int* pdf_object_size, - const int jpg_quality) { - size_t n; - char b0[kBasicBufSize]; - char b1[kBasicBufSize]; - char b2[kBasicBufSize]; - if (!pdf_object_size || !pdf_object) - return false; - *pdf_object = nullptr; - *pdf_object_size = 0; - if (!filename && !pix) - return false; - - L_Compressed_Data *cid = nullptr; - - int sad = 0; - if (pixGetInputFormat(pix) == IFF_PNG) - sad = pixGenerateCIData(pix, L_FLATE_ENCODE, 0, 0, &cid); - if (!cid) { - sad = l_generateCIDataForPdf(filename, pix, jpg_quality, &cid); - } - - if (sad || !cid) { - l_CIDataDestroy(&cid); - return false; - } - - const char *group4 = ""; - const char *filter; - switch(cid->type) { - case L_FLATE_ENCODE: - filter = "/FlateDecode"; - break; - case L_JPEG_ENCODE: - filter = "/DCTDecode"; - break; - case L_G4_ENCODE: - filter = "/CCITTFaxDecode"; - group4 = " /K -1\n"; - break; - case L_JP2K_ENCODE: - filter = "/JPXDecode"; - break; - default: - l_CIDataDestroy(&cid); - return false; - } - - // Maybe someday we will accept RGBA but today is not that day. - // It requires creating an /SMask for the alpha channel. - // http://stackoverflow.com/questions/14220221 - const char *colorspace; - if (cid->ncolors > 0) { - n = snprintf(b0, sizeof(b0), - " /ColorSpace [ /Indexed /DeviceRGB %d %s ]\n", - cid->ncolors - 1, cid->cmapdatahex); - if (n >= sizeof(b0)) { - l_CIDataDestroy(&cid); - return false; - } - colorspace = b0; - } else { - switch (cid->spp) { - case 1: - colorspace = " /ColorSpace /DeviceGray\n"; - break; - case 3: - colorspace = " /ColorSpace /DeviceRGB\n"; - break; - default: - l_CIDataDestroy(&cid); - return false; - } - } - - int predictor = (cid->predictor) ? 14 : 1; - - // IMAGE - n = snprintf(b1, sizeof(b1), - "%ld 0 obj\n" - "<<\n" - " /Length %ld\n" - " /Subtype /Image\n", - objnum, (unsigned long) cid->nbytescomp); - if (n >= sizeof(b1)) { - l_CIDataDestroy(&cid); - return false; - } - - n = snprintf(b2, sizeof(b2), - " /Width %d\n" - " /Height %d\n" - " /BitsPerComponent %d\n" - " /Filter %s\n" - " /DecodeParms\n" - " <<\n" - " /Predictor %d\n" - " /Colors %d\n" - "%s" - " /Columns %d\n" - " /BitsPerComponent %d\n" - " >>\n" - ">>\n" - "stream\n", - cid->w, cid->h, cid->bps, filter, predictor, cid->spp, - group4, cid->w, cid->bps); - if (n >= sizeof(b2)) { - l_CIDataDestroy(&cid); - return false; - } - - const char *b3 = - "endstream\n" - "endobj\n"; - - size_t b1_len = strlen(b1); - size_t b2_len = strlen(b2); - size_t b3_len = strlen(b3); - size_t colorspace_len = strlen(colorspace); - - *pdf_object_size = - b1_len + colorspace_len + b2_len + cid->nbytescomp + b3_len; - *pdf_object = new char[*pdf_object_size]; - - char *p = *pdf_object; - memcpy(p, b1, b1_len); - p += b1_len; - memcpy(p, colorspace, colorspace_len); - p += colorspace_len; - memcpy(p, b2, b2_len); - p += b2_len; - memcpy(p, cid->datacomp, cid->nbytescomp); - p += cid->nbytescomp; - memcpy(p, b3, b3_len); - l_CIDataDestroy(&cid); - return true; -} - -bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) { - size_t n; - char buf[kBasicBufSize]; - char buf2[kBasicBufSize]; - Pix *pix = api->GetInputImage(); - const char* filename = api->GetInputName(); - int ppi = api->GetSourceYResolution(); - if (!pix || ppi <= 0) - return false; - double width = pixGetWidth(pix) * 72.0 / ppi; - double height = pixGetHeight(pix) * 72.0 / ppi; - - snprintf(buf2, sizeof(buf2), "/XObject << /Im1 %ld 0 R >>\n", obj_ + 2); - const char *xobject = (textonly_) ? "" : buf2; - - // PAGE - n = snprintf(buf, sizeof(buf), - "%ld 0 obj\n" - "<<\n" - " /Type /Page\n" - " /Parent %ld 0 R\n" - " /MediaBox [0 0 %.2f %.2f]\n" - " /Contents %ld 0 R\n" - " /Resources\n" - " <<\n" - " %s" - " /ProcSet [ /PDF /Text /ImageB /ImageI /ImageC ]\n" - " /Font << /f-0-0 %ld 0 R >>\n" - " >>\n" - ">>\n" - "endobj\n", - obj_, - 2L, // Pages object - width, height, - obj_ + 1, // Contents object - xobject, // Image object - 3L); // Type0 Font - if (n >= sizeof(buf)) return false; - pages_.push_back(obj_); - AppendPDFObject(buf); - - // CONTENTS - const std::unique_ptr pdftext(GetPDFTextObjects(api, width, height)); - const size_t pdftext_len = strlen(pdftext.get()); - size_t len; - unsigned char *comp_pdftext = zlibCompress( - reinterpret_cast(pdftext.get()), pdftext_len, &len); - long comp_pdftext_len = len; - n = snprintf(buf, sizeof(buf), - "%ld 0 obj\n" - "<<\n" - " /Length %ld /Filter /FlateDecode\n" - ">>\n" - "stream\n", obj_, comp_pdftext_len); - if (n >= sizeof(buf)) { - lept_free(comp_pdftext); - return false; - } - AppendString(buf); - long objsize = strlen(buf); - AppendData(reinterpret_cast(comp_pdftext), comp_pdftext_len); - objsize += comp_pdftext_len; - lept_free(comp_pdftext); - const char *b2 = - "endstream\n" - "endobj\n"; - AppendString(b2); - objsize += strlen(b2); - AppendPDFObjectDIY(objsize); - - if (!textonly_) { - char *pdf_object = nullptr; - int jpg_quality; - api->GetIntVariable("jpg_quality", &jpg_quality); - if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize, - jpg_quality)) { - return false; - } - AppendData(pdf_object, objsize); - AppendPDFObjectDIY(objsize); - delete[] pdf_object; - } - return true; -} - - -bool TessPDFRenderer::EndDocumentHandler() { - size_t n; - char buf[kBasicBufSize]; - - // We reserved the /Pages object number early, so that the /Page - // objects could refer to their parent. We finally have enough - // information to go fill it in. Using lower level calls to manipulate - // the offset record in two spots, because we are placing objects - // out of order in the file. - - // PAGES - const long int kPagesObjectNumber = 2; - offsets_[kPagesObjectNumber] = offsets_.back(); // manipulation #1 - n = snprintf(buf, sizeof(buf), - "%ld 0 obj\n" - "<<\n" - " /Type /Pages\n" - " /Kids [ ", kPagesObjectNumber); - if (n >= sizeof(buf)) return false; - AppendString(buf); - size_t pages_objsize = strlen(buf); - for (size_t i = 0; i < pages_.unsigned_size(); i++) { - n = snprintf(buf, sizeof(buf), - "%ld 0 R ", pages_[i]); - if (n >= sizeof(buf)) return false; - AppendString(buf); - pages_objsize += strlen(buf); - } - n = snprintf(buf, sizeof(buf), - "]\n" - " /Count %d\n" - ">>\n" - "endobj\n", pages_.size()); - if (n >= sizeof(buf)) return false; - AppendString(buf); - pages_objsize += strlen(buf); - offsets_.back() += pages_objsize; // manipulation #2 - - // INFO - STRING utf16_title = "FEFF"; // byte_order_marker - std::vector unicodes = UNICHAR::UTF8ToUTF32(title()); - char utf16[kMaxBytesPerCodepoint]; - for (char32 code : unicodes) { - if (CodepointToUtf16be(code, utf16)) { - utf16_title += utf16; - } - } - - char* datestr = l_getFormattedDate(); - n = snprintf(buf, sizeof(buf), - "%ld 0 obj\n" - "<<\n" - " /Producer (Tesseract %s)\n" - " /CreationDate (D:%s)\n" - " /Title <%s>\n" - ">>\n" - "endobj\n", - obj_, tesseract::TessBaseAPI::Version(), - datestr, utf16_title.c_str()); - lept_free(datestr); - if (n >= sizeof(buf)) return false; - AppendPDFObject(buf); - n = snprintf(buf, sizeof(buf), - "xref\n" - "0 %ld\n" - "0000000000 65535 f \n", obj_); - if (n >= sizeof(buf)) return false; - AppendString(buf); - for (int i = 1; i < obj_; i++) { - n = snprintf(buf, sizeof(buf), "%010ld 00000 n \n", offsets_[i]); - if (n >= sizeof(buf)) return false; - AppendString(buf); - } - n = snprintf(buf, sizeof(buf), - "trailer\n" - "<<\n" - " /Size %ld\n" - " /Root %ld 0 R\n" - " /Info %ld 0 R\n" - ">>\n" - "startxref\n" - "%ld\n" - "%%%%EOF\n", - obj_, - 1L, // catalog - obj_ - 1, // info - offsets_.back()); - if (n >= sizeof(buf)) return false; - AppendString(buf); - return true; -} -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/renderer.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/renderer.cpp deleted file mode 100644 index af31be8e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/renderer.cpp +++ /dev/null @@ -1,280 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: renderer.cpp -// Description: Rendering interface to inject into TessBaseAPI -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include -#include // std::unique_ptr -#include "baseapi.h" -#include "genericvector.h" -#include "renderer.h" - -namespace tesseract { - -/********************************************************************** - * Base Renderer interface implementation - **********************************************************************/ -TessResultRenderer::TessResultRenderer(const char *outputbase, - const char* extension) - : file_extension_(extension), - title_(""), imagenum_(-1), - fout_(stdout), - next_(nullptr), - happy_(true) { - if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) { - STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_); - fout_ = fopen(outfile.string(), "wb"); - if (fout_ == nullptr) { - happy_ = false; - } - } -} - -TessResultRenderer::~TessResultRenderer() { - if (fout_ != nullptr) { - if (fout_ != stdout) - fclose(fout_); - else - clearerr(fout_); - } - delete next_; -} - -void TessResultRenderer::insert(TessResultRenderer* next) { - if (next == nullptr) return; - - TessResultRenderer* remainder = next_; - next_ = next; - if (remainder) { - while (next->next_ != nullptr) { - next = next->next_; - } - next->next_ = remainder; - } -} - -bool TessResultRenderer::BeginDocument(const char* title) { - if (!happy_) return false; - title_ = title; - imagenum_ = -1; - bool ok = BeginDocumentHandler(); - if (next_) { - ok = next_->BeginDocument(title) && ok; - } - return ok; -} - -bool TessResultRenderer::AddImage(TessBaseAPI* api) { - if (!happy_) return false; - ++imagenum_; - bool ok = AddImageHandler(api); - if (next_) { - ok = next_->AddImage(api) && ok; - } - return ok; -} - -bool TessResultRenderer::EndDocument() { - if (!happy_) return false; - bool ok = EndDocumentHandler(); - if (next_) { - ok = next_->EndDocument() && ok; - } - return ok; -} - -void TessResultRenderer::AppendString(const char* s) { - AppendData(s, strlen(s)); -} - -void TessResultRenderer::AppendData(const char* s, int len) { - if (!tesseract::Serialize(fout_, s, len)) happy_ = false; -} - -bool TessResultRenderer::BeginDocumentHandler() { - return happy_; -} - -bool TessResultRenderer::EndDocumentHandler() { - return happy_; -} - - -/********************************************************************** - * UTF8 Text Renderer interface implementation - **********************************************************************/ -TessTextRenderer::TessTextRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "txt") { -} - -bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr utf8(api->GetUTF8Text()); - if (utf8 == nullptr) { - return false; - } - - AppendString(utf8.get()); - - const char* pageSeparator = api->GetStringVariable("page_separator"); - if (pageSeparator != nullptr && *pageSeparator != '\0') { - AppendString(pageSeparator); - } - - return true; -} - -/********************************************************************** - * HOcr Text Renderer interface implementation - **********************************************************************/ -TessHOcrRenderer::TessHOcrRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "hocr") { - font_info_ = false; -} - -TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info) - : TessResultRenderer(outputbase, "hocr") { - font_info_ = font_info; -} - -bool TessHOcrRenderer::BeginDocumentHandler() { - AppendString( - "\n" - "\n" - "\n \n "); - AppendString(title()); - AppendString( - "\n" - "\n" - " \n" - " \n" - "\n\n"); - - return true; -} - -bool TessHOcrRenderer::EndDocumentHandler() { - AppendString(" \n\n"); - - return true; -} - -bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr hocr(api->GetHOCRText(imagenum())); - if (hocr == nullptr) return false; - - AppendString(hocr.get()); - - return true; -} - -/********************************************************************** - * TSV Text Renderer interface implementation - **********************************************************************/ -TessTsvRenderer::TessTsvRenderer(const char* outputbase) - : TessResultRenderer(outputbase, "tsv") { - font_info_ = false; -} - -TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info) - : TessResultRenderer(outputbase, "tsv") { - font_info_ = font_info; -} - -bool TessTsvRenderer::BeginDocumentHandler() { - // Output TSV column headings - AppendString( - "level\tpage_num\tblock_num\tpar_num\tline_num\tword_" - "num\tleft\ttop\twidth\theight\tconf\ttext\n"); - return true; -} - -bool TessTsvRenderer::EndDocumentHandler() { return true; } - -bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr tsv(api->GetTSVText(imagenum())); - if (tsv == nullptr) return false; - - AppendString(tsv.get()); - - return true; -} - -/********************************************************************** - * UNLV Text Renderer interface implementation - **********************************************************************/ -TessUnlvRenderer::TessUnlvRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "unlv") { -} - -bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr unlv(api->GetUNLVText()); - if (unlv == nullptr) return false; - - AppendString(unlv.get()); - - return true; -} - -/********************************************************************** - * BoxText Renderer interface implementation - **********************************************************************/ -TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "box") { -} - -bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr text(api->GetBoxText(imagenum())); - if (text == nullptr) return false; - - AppendString(text.get()); - - return true; -} - -#ifndef DISABLED_LEGACY_ENGINE - -/********************************************************************** - * Osd Text Renderer interface implementation - **********************************************************************/ -TessOsdRenderer::TessOsdRenderer(const char* outputbase) - : TessResultRenderer(outputbase, "osd") {} - -bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) { - char* osd = api->GetOsdText(imagenum()); - if (osd == nullptr) return false; - - AppendString(osd); - delete[] osd; - - return true; -} - -#endif // ndef DISABLED_LEGACY_ENGINE - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/renderer.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/renderer.h deleted file mode 100644 index 6c753403..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/renderer.h +++ /dev/null @@ -1,262 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: renderer.h -// Description: Rendering interface to inject into TessBaseAPI -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_API_RENDERER_H_ -#define TESSERACT_API_RENDERER_H_ - -// To avoid collision with other typenames include the ABSOLUTE MINIMUM -// complexity of includes here. Use forward declarations wherever possible -// and hide includes of complex types in baseapi.cpp. -#include // for std::string -#include "genericvector.h" -#include "platform.h" - -namespace tesseract { - -class TessBaseAPI; - -/** - * Interface for rendering tesseract results into a document, such as text, - * HOCR or pdf. This class is abstract. Specific classes handle individual - * formats. This interface is then used to inject the renderer class into - * tesseract when processing images. - * - * For simplicity implementing this with tesesract version 3.01, - * the renderer contains document state that is cleared from document - * to document just as the TessBaseAPI is. This way the base API can just - * delegate its rendering functionality to injected renderers, and the - * renderers can manage the associated state needed for the specific formats - * in addition to the heuristics for producing it. - */ -class TESS_API TessResultRenderer { - public: - virtual ~TessResultRenderer(); - - // Takes ownership of pointer so must be new'd instance. - // Renderers aren't ordered, but appends the sequences of next parameter - // and existing next(). The renderers should be unique across both lists. - void insert(TessResultRenderer* next); - - // Returns the next renderer or nullptr. - TessResultRenderer* next() { return next_; } - - /** - * Starts a new document with the given title. - * This clears the contents of the output data. - * Title should use UTF-8 encoding. - */ - bool BeginDocument(const char* title); - - /** - * Adds the recognized text from the source image to the current document. - * Invalid if BeginDocument not yet called. - * - * Note that this API is a bit weird but is designed to fit into the - * current TessBaseAPI implementation where the api has lots of state - * information that we might want to add in. - */ - bool AddImage(TessBaseAPI* api); - - /** - * Finishes the document and finalizes the output data - * Invalid if BeginDocument not yet called. - */ - bool EndDocument(); - - const char* file_extension() const { return file_extension_; } - const char* title() const { return title_.c_str(); } - - // Is everything fine? Otherwise something went wrong. - bool happy() { return happy_; } - - /** - * Returns the index of the last image given to AddImage - * (i.e. images are incremented whether the image succeeded or not) - * - * This is always defined. It means either the number of the - * current image, the last image ended, or in the completed document - * depending on when in the document lifecycle you are looking at it. - * Will return -1 if a document was never started. - */ - int imagenum() const { return imagenum_; } - - protected: - /** - * Called by concrete classes. - * - * outputbase is the name of the output file excluding - * extension. For example, "/path/to/chocolate-chip-cookie-recipe" - * - * extension indicates the file extension to be used for output - * files. For example "pdf" will produce a .pdf file, and "hocr" - * will produce .hocr files. - */ - TessResultRenderer(const char *outputbase, - const char* extension); - - // Hook for specialized handling in BeginDocument() - virtual bool BeginDocumentHandler(); - - // This must be overridden to render the OCR'd results - virtual bool AddImageHandler(TessBaseAPI* api) = 0; - - // Hook for specialized handling in EndDocument() - virtual bool EndDocumentHandler(); - - // Renderers can call this to append '\0' terminated strings into - // the output string returned by GetOutput. - // This method will grow the output buffer if needed. - void AppendString(const char* s); - - // Renderers can call this to append binary byte sequences into - // the output string returned by GetOutput. Note that s is not necessarily - // '\0' terminated (and can contain '\0' within it). - // This method will grow the output buffer if needed. - void AppendData(const char* s, int len); - - private: - const char* file_extension_; // standard extension for generated output - STRING title_; // title of document being renderered - int imagenum_; // index of last image added - - FILE* fout_; // output file pointer - TessResultRenderer* next_; // Can link multiple renderers together - bool happy_; // I get grumpy when the disk fills up, etc. -}; - -/** - * Renders tesseract output into a plain UTF-8 text string - */ -class TESS_API TessTextRenderer : public TessResultRenderer { - public: - explicit TessTextRenderer(const char *outputbase); - - protected: - virtual bool AddImageHandler(TessBaseAPI* api); -}; - -/** - * Renders tesseract output into an hocr text string - */ -class TESS_API TessHOcrRenderer : public TessResultRenderer { - public: - explicit TessHOcrRenderer(const char *outputbase, bool font_info); - explicit TessHOcrRenderer(const char *outputbase); - - protected: - virtual bool BeginDocumentHandler(); - virtual bool AddImageHandler(TessBaseAPI* api); - virtual bool EndDocumentHandler(); - - private: - bool font_info_; // whether to print font information -}; - -/** - * Renders Tesseract output into a TSV string - */ -class TESS_API TessTsvRenderer : public TessResultRenderer { - public: - explicit TessTsvRenderer(const char* outputbase, bool font_info); - explicit TessTsvRenderer(const char* outputbase); - - protected: - virtual bool BeginDocumentHandler(); - virtual bool AddImageHandler(TessBaseAPI* api); - virtual bool EndDocumentHandler(); - - private: - bool font_info_; // whether to print font information -}; - -/** - * Renders tesseract output into searchable PDF - */ -class TESS_API TessPDFRenderer : public TessResultRenderer { - public: - // datadir is the location of the TESSDATA. We need it because - // we load a custom PDF font from this location. - TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly = false); - - protected: - virtual bool BeginDocumentHandler(); - virtual bool AddImageHandler(TessBaseAPI* api); - virtual bool EndDocumentHandler(); - - private: - // We don't want to have every image in memory at once, - // so we store some metadata as we go along producing - // PDFs one page at a time. At the end, that metadata is - // used to make everything that isn't easily handled in a - // streaming fashion. - long int obj_; // counter for PDF objects - GenericVector offsets_; // offset of every PDF object in bytes - GenericVector pages_; // object number for every /Page object - std::string datadir_; // where to find the custom font - bool textonly_; // skip images if set - // Bookkeeping only. DIY = Do It Yourself. - void AppendPDFObjectDIY(size_t objectsize); - // Bookkeeping + emit data. - void AppendPDFObject(const char *data); - // Create the /Contents object for an entire page. - char* GetPDFTextObjects(TessBaseAPI* api, double width, double height); - // Turn an image into a PDF object. Only transcode if we have to. - static bool imageToPDFObj(Pix* pix, const char* filename, long int objnum, - char** pdf_object, long int* pdf_object_size, const int jpg_quality); -}; - - -/** - * Renders tesseract output into a plain UTF-8 text string - */ -class TESS_API TessUnlvRenderer : public TessResultRenderer { - public: - explicit TessUnlvRenderer(const char *outputbase); - - protected: - virtual bool AddImageHandler(TessBaseAPI* api); -}; - -/** - * Renders tesseract output into a plain UTF-8 text string - */ -class TESS_API TessBoxTextRenderer : public TessResultRenderer { - public: - explicit TessBoxTextRenderer(const char *outputbase); - - protected: - virtual bool AddImageHandler(TessBaseAPI* api); -}; - -#ifndef DISABLED_LEGACY_ENGINE - -/** - * Renders tesseract output into an osd text string - */ -class TESS_API TessOsdRenderer : public TessResultRenderer { - public: - explicit TessOsdRenderer(const char* outputbase); - - protected: - virtual bool AddImageHandler(TessBaseAPI* api); -}; - -#endif // ndef DISABLED_LEGACY_ENGINE - -} // namespace tesseract. - -#endif // TESSERACT_API_RENDERER_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/tess_version.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/tess_version.h deleted file mode 100644 index 88e91441..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/tess_version.h +++ /dev/null @@ -1,30 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: version.h -// Description: Version information -// -// (C) Copyright 2018, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_API_VERSION_H_ -#define TESSERACT_API_VERSION_H_ - -#define TESSERACT_MAJOR_VERSION 4 -#define TESSERACT_MINOR_VERSION 0 -#define TESSERACT_MICRO_VERSION 0 -#define TESSERACT_VERSION \ - (TESSERACT_MAJOR_VERSION << 16 | \ - TESSERACT_MINOR_VERSION << 8 | \ - TESSERACT_MICRO_VERSION) -#define TESSERACT_VERSION_STR "4.0.0" - -#endif // TESSERACT_API_VERSION_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/tesseractmain.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/tesseractmain.cpp deleted file mode 100644 index 5fd2cf63..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/api/tesseractmain.cpp +++ /dev/null @@ -1,696 +0,0 @@ -/********************************************************************** - * File: tesseractmain.cpp (Formerly tessedit.c) - * Description: Main program for merge of tess and editor. - * Author: Ray Smith - * Created: Tue Jan 07 15:21:46 GMT 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -// Include automatically generated configuration file if running autoconf -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include // for errno -#include - -#include "allheaders.h" -#include "baseapi.h" -#include "basedir.h" -#include "dict.h" -#include "openclwrapper.h" -#include "osdetect.h" -#include "renderer.h" -#include "simddetect.h" -#include "strngs.h" -#include "tprintf.h" // for tprintf - -#if defined(_WIN32) -#include -#include -#if defined(HAVE_TIFFIO_H) - -#include - -static void Win32ErrorHandler(const char* module, const char* fmt, - va_list ap) { - if (module != nullptr) { - fprintf(stderr, "%s: ", module); - } - vfprintf(stderr, fmt, ap); - fprintf(stderr, ".\n"); -} - -static void Win32WarningHandler(const char* module, const char* fmt, - va_list ap) { - if (module != nullptr) { - fprintf(stderr, "%s: ", module); - } - fprintf(stderr, "Warning, "); - vfprintf(stderr, fmt, ap); - fprintf(stderr, ".\n"); -} - -#endif /* HAVE_TIFFIO_H */ -#endif // _WIN32 - -static void PrintVersionInfo() { - char* versionStrP; - - printf("tesseract %s\n", tesseract::TessBaseAPI::Version()); - - versionStrP = getLeptonicaVersion(); - printf(" %s\n", versionStrP); - lept_free(versionStrP); - - versionStrP = getImagelibVersions(); - printf(" %s\n", versionStrP); - lept_free(versionStrP); - -#ifdef USE_OPENCL - cl_platform_id platform[4]; - cl_uint num_platforms; - - printf(" OpenCL info:\n"); - if (clGetPlatformIDs(4, platform, &num_platforms) == CL_SUCCESS) { - printf(" Found %u platform(s).\n", num_platforms); - for (unsigned n = 0; n < num_platforms; n++) { - char info[256]; - if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) == - CL_SUCCESS) { - printf(" Platform %u name: %s.\n", n + 1, info); - } - if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) == - CL_SUCCESS) { - printf(" Version: %s.\n", info); - } - cl_device_id devices[2]; - cl_uint num_devices; - if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices, - &num_devices) == CL_SUCCESS) { - printf(" Found %u device(s).\n", num_devices); - for (unsigned i = 0; i < num_devices; ++i) { - if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) == - CL_SUCCESS) { - printf(" Device %u name: %s.\n", i + 1, info); - } - } - } - } - } -#endif - if (SIMDDetect::IsAVX512BWAvailable()) printf(" Found AVX512BW\n"); - if (SIMDDetect::IsAVX512FAvailable()) printf(" Found AVX512F\n"); - if (SIMDDetect::IsAVX2Available()) printf(" Found AVX2\n"); - if (SIMDDetect::IsAVXAvailable()) printf(" Found AVX\n"); - if (SIMDDetect::IsSSEAvailable()) printf(" Found SSE\n"); -} - -static void PrintHelpForPSM() { - const char* msg = - "Page segmentation modes:\n" - " 0 Orientation and script detection (OSD) only.\n" - " 1 Automatic page segmentation with OSD.\n" - " 2 Automatic page segmentation, but no OSD, or OCR.\n" - " 3 Fully automatic page segmentation, but no OSD. (Default)\n" - " 4 Assume a single column of text of variable sizes.\n" - " 5 Assume a single uniform block of vertically aligned text.\n" - " 6 Assume a single uniform block of text.\n" - " 7 Treat the image as a single text line.\n" - " 8 Treat the image as a single word.\n" - " 9 Treat the image as a single word in a circle.\n" - " 10 Treat the image as a single character.\n" - " 11 Sparse text. Find as much text as possible in no" - " particular order.\n" - " 12 Sparse text with OSD.\n" - " 13 Raw line. Treat the image as a single text line,\n" - " bypassing hacks that are Tesseract-specific.\n"; - -#ifdef DISABLED_LEGACY_ENGINE - const char* disabled_osd_msg = - "\nNOTE: The OSD modes are currently disabled.\n"; - printf("%s%s", msg, disabled_osd_msg); -#else - printf("%s", msg); -#endif -} - -#ifndef DISABLED_LEGACY_ENGINE -static void PrintHelpForOEM() { - const char* msg = - "OCR Engine modes:\n" - " 0 Legacy engine only.\n" - " 1 Neural nets LSTM engine only.\n" - " 2 Legacy + LSTM engines.\n" - " 3 Default, based on what is available.\n"; - - printf("%s", msg); -} -#endif // ndef DISABLED_LEGACY_ENGINE - -static void PrintHelpExtra(const char* program) { - printf( - "Usage:\n" - " %s --help | --help-extra | --help-psm | " -#ifndef DISABLED_LEGACY_ENGINE - "--help-oem | " -#endif - "--version\n" - " %s --list-langs [--tessdata-dir PATH]\n" - " %s --print-parameters [options...] [configfile...]\n" - " %s imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]\n" - "\n" - "OCR options:\n" - " --tessdata-dir PATH Specify the location of tessdata path.\n" - " --user-words PATH Specify the location of user words file.\n" - " --user-patterns PATH Specify the location of user patterns file.\n" - " --dpi VALUE Specify DPI for input image.\n" - " -l LANG[+LANG] Specify language(s) used for OCR.\n" - " -c VAR=VALUE Set value for config variables.\n" - " Multiple -c arguments are allowed.\n" - " --psm NUM Specify page segmentation mode.\n" -#ifndef DISABLED_LEGACY_ENGINE - " --oem NUM Specify OCR Engine mode.\n" -#endif - "NOTE: These options must occur before any configfile.\n" - "\n", - program, program, program, program - ); - - PrintHelpForPSM(); -#ifndef DISABLED_LEGACY_ENGINE - printf("\n"); - PrintHelpForOEM(); -#endif - - printf( - "\n" - "Single options:\n" - " -h, --help Show minimal help message.\n" - " --help-extra Show extra help for advanced users.\n" - " --help-psm Show page segmentation modes.\n" -#ifndef DISABLED_LEGACY_ENGINE - " --help-oem Show OCR Engine modes.\n" -#endif - " -v, --version Show version information.\n" - " --list-langs List available languages for tesseract engine.\n" - " --print-parameters Print tesseract parameters.\n" - ); -} - -static void PrintHelpMessage(const char* program) { - printf( - "Usage:\n" - " %s --help | --help-extra | --version\n" - " %s --list-langs\n" - " %s imagename outputbase [options...] [configfile...]\n" - "\n" - "OCR options:\n" - " -l LANG[+LANG] Specify language(s) used for OCR.\n" - "NOTE: These options must occur before any configfile.\n" - "\n" - "Single options:\n" - " --help Show this help message.\n" - " --help-extra Show extra help for advanced users.\n" - " --version Show version information.\n" - " --list-langs List available languages for tesseract engine.\n", - program, program, program - ); -} - -static void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, - char** argv) { - char opt1[256], opt2[255]; - for (int i = 0; i < argc; i++) { - if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { - strncpy(opt1, argv[i + 1], 255); - opt1[255] = '\0'; - char* p = strchr(opt1, '='); - if (!p) { - fprintf(stderr, "Missing = in configvar assignment\n"); - exit(EXIT_FAILURE); - } - *p = 0; - strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255); - opt2[254] = 0; - ++i; - - if (!api->SetVariable(opt1, opt2)) { - fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2); - } - } - } -} - -static void PrintLangsList(tesseract::TessBaseAPI* api) { - GenericVector languages; - api->GetAvailableLanguagesAsVector(&languages); - printf("List of available languages (%d):\n", languages.size()); - for (int index = 0; index < languages.size(); ++index) { - STRING& string = languages[index]; - printf("%s\n", string.string()); - } - api->End(); -} - -static void PrintBanner() { - tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", - tesseract::TessBaseAPI::Version()); -} - -/** - * We have 2 possible sources of pagesegmode: a config file and - * the command line. For backwards compatibility reasons, the - * default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the - * default for this program is tesseract::PSM_AUTO. We will let - * the config file take priority, so the command-line default - * can take priority over the tesseract default, so we use the - * value from the command line only if the retrieved mode - * is still tesseract::PSM_SINGLE_BLOCK, indicating no change - * in any config file. Therefore the only way to force - * tesseract::PSM_SINGLE_BLOCK is from the command line. - * It would be simpler if we could set the value before Init, - * but that doesn't work. - */ -static void FixPageSegMode(tesseract::TessBaseAPI* api, - tesseract::PageSegMode pagesegmode) { - if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) - api->SetPageSegMode(pagesegmode); -} - -static void checkArgValues(int arg, const char* mode, int count) { - if (arg >= count || arg < 0) { - printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1); - exit(EXIT_SUCCESS); - } -} - -// NOTE: arg_i is used here to avoid ugly *i so many times in this function -static void ParseArgs(const int argc, char** argv, const char** lang, - const char** image, const char** outputbase, - const char** datapath, l_int32* dpi, bool* list_langs, - bool* print_parameters, GenericVector* vars_vec, - GenericVector* vars_values, l_int32* arg_i, - tesseract::PageSegMode* pagesegmode, - tesseract::OcrEngineMode* enginemode) { - bool noocr = false; - int i; - for (i = 1; i < argc && (*outputbase == nullptr || argv[i][0] == '-'); i++) { - if (*image != nullptr && *outputbase == nullptr) { - // outputbase follows image, don't allow options at that position. - *outputbase = argv[i]; - } else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) { - PrintHelpMessage(argv[0]); - noocr = true; - } else if (strcmp(argv[i], "--help-extra") == 0) { - PrintHelpExtra(argv[0]); - noocr = true; - } else if ((strcmp(argv[i], "--help-psm") == 0)) { - PrintHelpForPSM(); - noocr = true; -#ifndef DISABLED_LEGACY_ENGINE - } else if ((strcmp(argv[i], "--help-oem") == 0)) { - PrintHelpForOEM(); - noocr = true; -#endif - } else if ((strcmp(argv[i], "-v") == 0) || - (strcmp(argv[i], "--version") == 0)) { - PrintVersionInfo(); - noocr = true; - } else if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) { - *lang = argv[i + 1]; - ++i; - } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) { - *datapath = argv[i + 1]; - ++i; - } else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) { - *dpi = atoi(argv[i + 1]); - ++i; - } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) { - vars_vec->push_back("user_words_file"); - vars_values->push_back(argv[i + 1]); - ++i; - } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) { - vars_vec->push_back("user_patterns_file"); - vars_values->push_back(argv[i + 1]); - ++i; - } else if (strcmp(argv[i], "--list-langs") == 0) { - noocr = true; - *list_langs = true; - } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) { - checkArgValues(atoi(argv[i+1]), "PSM", tesseract::PSM_COUNT); - *pagesegmode = static_cast(atoi(argv[i + 1])); - ++i; - } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) { -#ifndef DISABLED_LEGACY_ENGINE - int oem = atoi(argv[i + 1]); - checkArgValues(oem, "OEM", tesseract::OEM_COUNT); - *enginemode = static_cast(oem); -#endif - ++i; - } else if (strcmp(argv[i], "--print-parameters") == 0) { - noocr = true; - *print_parameters = true; - } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { - // handled properly after api init - ++i; - } else if (*image == nullptr) { - *image = argv[i]; - } else { - // Unexpected argument. - fprintf(stderr, "Error, unknown command line argument '%s'\n", argv[i]); - exit(EXIT_FAILURE); - } - } - - *arg_i = i; - - if (*pagesegmode == tesseract::PSM_OSD_ONLY) { - // OSD = orientation and script detection. - if (*lang != nullptr && strcmp(*lang, "osd")) { - // If the user explicitly specifies a language (other than osd) - // or a script, only orientation can be detected. - fprintf(stderr, "Warning, detects only orientation with -l %s\n", *lang); - } else { - // That mode requires osd.traineddata to detect orientation and script. - *lang = "osd"; - } - } - - if (*outputbase == nullptr && noocr == false) { - PrintHelpMessage(argv[0]); - exit(EXIT_FAILURE); - } -} - -static void PreloadRenderers( - tesseract::TessBaseAPI* api, - tesseract::PointerVector* renderers, - tesseract::PageSegMode pagesegmode, const char* outputbase) { - if (pagesegmode == tesseract::PSM_OSD_ONLY) { -#ifndef DISABLED_LEGACY_ENGINE - renderers->push_back(new tesseract::TessOsdRenderer(outputbase)); -#endif // ndef DISABLED_LEGACY_ENGINE - } else { - bool b; - api->GetBoolVariable("tessedit_create_hocr", &b); - if (b) { - bool font_info; - api->GetBoolVariable("hocr_font_info", &font_info); - tesseract::TessHOcrRenderer* renderer = - new tesseract::TessHOcrRenderer(outputbase, font_info); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create hOCR output file: %s\n", - strerror(errno)); - } - } - - api->GetBoolVariable("tessedit_create_tsv", &b); - if (b) { - bool font_info; - api->GetBoolVariable("hocr_font_info", &font_info); - tesseract::TessTsvRenderer* renderer = - new tesseract::TessTsvRenderer(outputbase, font_info); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create TSV output file: %s\n", - strerror(errno)); - } - } - - api->GetBoolVariable("tessedit_create_pdf", &b); - if (b) { - #ifdef WIN32 - if (_setmode(_fileno(stdout), _O_BINARY) == -1) - tprintf("ERROR: cin to binary: %s", strerror(errno)); - #endif // WIN32 - bool textonly; - api->GetBoolVariable("textonly_pdf", &textonly); - tesseract::TessPDFRenderer* renderer = - new tesseract::TessPDFRenderer(outputbase, api->GetDatapath(), - textonly); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create PDF output file: %s\n", - strerror(errno)); - } - } - - api->GetBoolVariable("tessedit_write_unlv", &b); - if (b) { - api->SetVariable("unlv_tilde_crunching", "true"); - tesseract::TessUnlvRenderer* renderer = - new tesseract::TessUnlvRenderer(outputbase); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create UNLV output file: %s\n", - strerror(errno)); - } - } - - api->GetBoolVariable("tessedit_create_boxfile", &b); - if (b) { - tesseract::TessBoxTextRenderer* renderer = - new tesseract::TessBoxTextRenderer(outputbase); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create BOX output file: %s\n", - strerror(errno)); - } - } - - api->GetBoolVariable("tessedit_create_txt", &b); - if (b || renderers->empty()) { - tesseract::TessTextRenderer* renderer = - new tesseract::TessTextRenderer(outputbase); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create TXT output file: %s\n", - strerror(errno)); - } - } - } - - if (!renderers->empty()) { - // Since the PointerVector auto-deletes, null-out the renderers that are - // added to the root, and leave the root in the vector. - for (int r = 1; r < renderers->size(); ++r) { - (*renderers)[0]->insert((*renderers)[r]); - (*renderers)[r] = nullptr; - } - } -} - - -/********************************************************************** - * main() - * - **********************************************************************/ - -int main(int argc, char** argv) { - const char* lang = nullptr; - const char* image = nullptr; - const char* outputbase = nullptr; - const char* datapath = nullptr; - bool list_langs = false; - bool print_parameters = false; - l_int32 dpi = 0; - int arg_i = 1; - tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; -#ifdef DISABLED_LEGACY_ENGINE - auto enginemode = tesseract::OEM_LSTM_ONLY; -#else - tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT; -#endif - /* main() calls functions like ParseArgs which call exit(). - * This results in memory leaks if vars_vec and vars_values are - * declared as auto variables (destructor is not called then). */ - static GenericVector vars_vec; - static GenericVector vars_values; - -#if !defined(DEBUG) - // Disable debugging and informational messages from Leptonica. - setMsgSeverity(L_SEVERITY_ERROR); -#endif - -#if defined(HAVE_TIFFIO_H) && defined(_WIN32) - /* Show libtiff errors and warnings on console (not in GUI). */ - TIFFSetErrorHandler(Win32ErrorHandler); - TIFFSetWarningHandler(Win32WarningHandler); -#endif // HAVE_TIFFIO_H && _WIN32 - - ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi, - &list_langs, &print_parameters, &vars_vec, &vars_values, &arg_i, - &pagesegmode, &enginemode); - - if (lang == nullptr) { - // Set default language if none was given. - lang = "eng"; - } - - if (image == nullptr && !list_langs && !print_parameters) - return EXIT_SUCCESS; - - PERF_COUNT_START("Tesseract:main") - - // Call GlobalDawgCache here to create the global DawgCache object before - // the TessBaseAPI object. This fixes the order of destructor calls: - // first TessBaseAPI must be destructed, DawgCache must be the last object. - tesseract::Dict::GlobalDawgCache(); - - // Avoid memory leak caused by auto variable when return is called. - static tesseract::TessBaseAPI api; - - api.SetOutputName(outputbase); - - const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]), - argc - arg_i, &vars_vec, &vars_values, false); - - SetVariablesFromCLArgs(&api, argc, argv); - - if (list_langs) { - PrintLangsList(&api); - return EXIT_SUCCESS; - } - - if (init_failed) { - fprintf(stderr, "Could not initialize tesseract.\n"); - return EXIT_FAILURE; - } - - if (print_parameters) { - FILE* fout = stdout; - fprintf(stdout, "Tesseract parameters:\n"); - api.PrintVariables(fout); - api.End(); - return EXIT_SUCCESS; - } - - FixPageSegMode(&api, pagesegmode); - - if (dpi) { - char dpi_string[255]; - snprintf(dpi_string, 254, "%d", dpi); - api.SetVariable("user_defined_dpi", dpi_string); - } - - if (pagesegmode == tesseract::PSM_AUTO_ONLY) { - int ret_val = EXIT_SUCCESS; - - Pix* pixs = pixRead(image); - if (!pixs) { - fprintf(stderr, "Leptonica can't process input file: %s\n", image); - return 2; - } - - api.SetImage(pixs); - - tesseract::Orientation orientation; - tesseract::WritingDirection direction; - tesseract::TextlineOrder order; - float deskew_angle; - - const tesseract::PageIterator* it = api.AnalyseLayout(); - if (it) { - it->Orientation(&orientation, &direction, &order, &deskew_angle); - tprintf( - "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" - "Deskew angle: %.4f\n", - orientation, direction, order, deskew_angle); - } else { - ret_val = EXIT_FAILURE; - } - - delete it; - - pixDestroy(&pixs); - return ret_val; - } - - // set in_training_mode to true when using one of these configs: - // ambigs.train, box.train, box.train.stderr, linebox, rebox - bool b = false; - bool in_training_mode = - (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) || - (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) || - (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b); - -#ifdef DISABLED_LEGACY_ENGINE - auto cur_psm = api.GetPageSegMode(); - auto osd_warning = std::string(""); - if (cur_psm == tesseract::PSM_OSD_ONLY) { - const char* disabled_osd_msg = - "\nERROR: The page segmentation mode 0 (OSD Only) is currently disabled.\n\n"; - fprintf(stderr, "%s", disabled_osd_msg); - return EXIT_FAILURE; - } else if (cur_psm == tesseract::PSM_AUTO_OSD) { - api.SetPageSegMode(tesseract::PSM_AUTO); - osd_warning += - "\nWarning: The page segmentation mode 1 (Auto+OSD) is currently disabled. " - "Using PSM 3 (Auto) instead.\n\n"; - } else if (tesseract::PSM_SPARSE_TEXT_OSD) { - api.SetPageSegMode(tesseract::PSM_SPARSE_TEXT); - osd_warning += - "\nWarning: The page segmentation mode 12 (Sparse text + OSD) is currently disabled. " - "Using PSM 11 (Sparse text) instead.\n\n"; - } -#endif // def DISABLED_LEGACY_ENGINE - - // Avoid memory leak caused by auto variable when exit() is called. - static tesseract::PointerVector renderers; - - if (in_training_mode) { - renderers.push_back(nullptr); - } else { - PreloadRenderers(&api, &renderers, pagesegmode, outputbase); - } - - bool banner = false; - if (outputbase != nullptr && strcmp(outputbase, "-") && - strcmp(outputbase, "stdout")) { - banner = true; - } - - if (!renderers.empty()) { - if (banner) PrintBanner(); -#ifdef DISABLED_LEGACY_ENGINE - if (!osd_warning.empty()) { - fprintf(stderr, "%s",osd_warning.c_str()); - } -#endif - bool succeed = api.ProcessPages(image, nullptr, 0, renderers[0]); - if (!succeed) { - fprintf(stderr, "Error during processing.\n"); - return EXIT_FAILURE; - } - } - - PERF_COUNT_END - - return EXIT_SUCCESS; -} - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/Makefile.am b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/Makefile.am deleted file mode 100644 index 602e4b65..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -AM_CPPFLAGS += -I$(top_srcdir)/src/ccstruct -I$(top_srcdir)/src/ccutil -I$(top_srcdir)/src/viewer - -SUBDIRS = -AM_CXXFLAGS = - -if VISIBILITY -AM_CXXFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden -AM_CPPFLAGS += -DTESS_EXPORTS -endif - -pkginclude_HEADERS = - -noinst_HEADERS = dotproductavx.h dotproductsse.h -noinst_HEADERS += intsimdmatrix.h intsimdmatrixavx2.h intsimdmatrixsse.h -noinst_HEADERS += simddetect.h - -noinst_LTLIBRARIES = libtesseract_avx.la libtesseract_avx2.la libtesseract_sse.la -noinst_LTLIBRARIES += libtesseract_arch.la - -if AVX_OPT -libtesseract_avx_la_CXXFLAGS = -mavx -endif -if AVX2_OPT -libtesseract_avx2_la_CXXFLAGS = -mavx2 -endif -if SSE41_OPT -libtesseract_sse_la_CXXFLAGS = -msse4.1 -endif - -libtesseract_arch_la_SOURCES = intsimdmatrix.cpp simddetect.cpp - -libtesseract_avx_la_SOURCES = dotproductavx.cpp - -libtesseract_avx2_la_SOURCES = intsimdmatrixavx2.cpp - -libtesseract_sse_la_SOURCES = dotproductsse.cpp intsimdmatrixsse.cpp diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/dotproductavx.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/dotproductavx.cpp deleted file mode 100644 index 297e0270..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/dotproductavx.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: dotproductavx.cpp -// Description: Architecture-specific dot-product function. -// Author: Ray Smith -// Created: Wed Jul 22 10:48:05 PDT 2015 -// -// (C) Copyright 2015, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#if !defined(__AVX__) -// Implementation for non-avx archs. - -#include "dotproductavx.h" -#include -#include - -namespace tesseract { -double DotProductAVX(const double* u, const double* v, int n) { - fprintf(stderr, "DotProductAVX can't be used on Android\n"); - abort(); -} -} // namespace tesseract - -#else // !defined(__AVX__) -// Implementation for avx capable archs. -#include -#include -#include "dotproductavx.h" - -namespace tesseract { - -// Computes and returns the dot product of the n-vectors u and v. -// Uses Intel AVX intrinsics to access the SIMD instruction set. -double DotProductAVX(const double* u, const double* v, int n) { - int max_offset = n - 4; - int offset = 0; - // Accumulate a set of 4 sums in sum, by loading pairs of 4 values from u and - // v, and multiplying them together in parallel. - __m256d sum = _mm256_setzero_pd(); - if (offset <= max_offset) { - offset = 4; - // Aligned load is reputedly faster but requires 32 byte aligned input. - if ((reinterpret_cast(u) & 31) == 0 && - (reinterpret_cast(v) & 31) == 0) { - // Use aligned load. - __m256d floats1 = _mm256_load_pd(u); - __m256d floats2 = _mm256_load_pd(v); - // Multiply. - sum = _mm256_mul_pd(floats1, floats2); - while (offset <= max_offset) { - floats1 = _mm256_load_pd(u + offset); - floats2 = _mm256_load_pd(v + offset); - offset += 4; - __m256d product = _mm256_mul_pd(floats1, floats2); - sum = _mm256_add_pd(sum, product); - } - } else { - // Use unaligned load. - __m256d floats1 = _mm256_loadu_pd(u); - __m256d floats2 = _mm256_loadu_pd(v); - // Multiply. - sum = _mm256_mul_pd(floats1, floats2); - while (offset <= max_offset) { - floats1 = _mm256_loadu_pd(u + offset); - floats2 = _mm256_loadu_pd(v + offset); - offset += 4; - __m256d product = _mm256_mul_pd(floats1, floats2); - sum = _mm256_add_pd(sum, product); - } - } - } - // Add the 4 product sums together horizontally. Not so easy as with sse, as - // there is no add across the upper/lower 128 bit boundary, so permute to - // move the upper 128 bits to lower in another register. - __m256d sum2 = _mm256_permute2f128_pd(sum, sum, 1); - sum = _mm256_hadd_pd(sum, sum2); - sum = _mm256_hadd_pd(sum, sum); - double result; - // _mm256_extract_f64 doesn't exist, but resist the temptation to use an sse - // instruction, as that introduces a 70 cycle delay. All this casting is to - // fool the intrinsics into thinking we are extracting the bottom int64. - auto cast_sum = _mm256_castpd_si256(sum); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wstrict-aliasing" - *(reinterpret_cast(&result)) = -#if defined(_WIN32) || defined(__i386__) - // This is a very simple workaround that is activated - // for all platforms that do not have _mm256_extract_epi64. - // _mm256_extract_epi64(X, Y) == ((uint64_t*)&X)[Y] - ((uint64_t*)&cast_sum)[0] -#else - _mm256_extract_epi64(cast_sum, 0) -#endif - ; -#pragma GCC diagnostic pop - while (offset < n) { - result += u[offset] * v[offset]; - ++offset; - } - return result; -} - -} // namespace tesseract. - -#endif // ANDROID_BUILD diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/dotproductavx.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/dotproductavx.h deleted file mode 100644 index ef00cdfb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/dotproductavx.h +++ /dev/null @@ -1,30 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: dotproductavx.h -// Description: Architecture-specific dot-product function. -// Author: Ray Smith -// Created: Wed Jul 22 10:51:05 PDT 2015 -// -// (C) Copyright 2015, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_ARCH_DOTPRODUCTAVX_H_ -#define TESSERACT_ARCH_DOTPRODUCTAVX_H_ - -namespace tesseract { - -// Computes and returns the dot product of the n-vectors u and v. -// Uses Intel AVX intrinsics to access the SIMD instruction set. -double DotProductAVX(const double* u, const double* v, int n); - -} // namespace tesseract. - -#endif // TESSERACT_ARCH_DOTPRODUCTAVX_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/dotproductsse.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/dotproductsse.cpp deleted file mode 100644 index 149dc196..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/dotproductsse.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: dotproductsse.cpp -// Description: Architecture-specific dot-product function. -// Author: Ray Smith -// Created: Wed Jul 22 10:57:45 PDT 2015 -// -// (C) Copyright 2015, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#if !defined(__SSE4_1__) -// This code can't compile with "-msse4.1", so use dummy stubs. - -#include "dotproductsse.h" -#include -#include - -namespace tesseract { -double DotProductSSE(const double* u, const double* v, int n) { - fprintf(stderr, "DotProductSSE can't be used on Android\n"); - abort(); -} -int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n) { - fprintf(stderr, "IntDotProductSSE can't be used on Android\n"); - abort(); -} -} // namespace tesseract - -#else // !defined(__SSE4_1__) -// Non-Android code here - -#include -#include -#include -#include "dotproductsse.h" - -namespace tesseract { - -// Computes and returns the dot product of the n-vectors u and v. -// Uses Intel SSE intrinsics to access the SIMD instruction set. -double DotProductSSE(const double* u, const double* v, int n) { - int max_offset = n - 2; - int offset = 0; - // Accumulate a set of 2 sums in sum, by loading pairs of 2 values from u and - // v, and multiplying them together in parallel. - __m128d sum = _mm_setzero_pd(); - if (offset <= max_offset) { - offset = 2; - // Aligned load is reputedly faster but requires 16 byte aligned input. - if ((reinterpret_cast(u) & 15) == 0 && - (reinterpret_cast(v) & 15) == 0) { - // Use aligned load. - sum = _mm_load_pd(u); - __m128d floats2 = _mm_load_pd(v); - // Multiply. - sum = _mm_mul_pd(sum, floats2); - while (offset <= max_offset) { - __m128d floats1 = _mm_load_pd(u + offset); - floats2 = _mm_load_pd(v + offset); - offset += 2; - floats1 = _mm_mul_pd(floats1, floats2); - sum = _mm_add_pd(sum, floats1); - } - } else { - // Use unaligned load. - sum = _mm_loadu_pd(u); - __m128d floats2 = _mm_loadu_pd(v); - // Multiply. - sum = _mm_mul_pd(sum, floats2); - while (offset <= max_offset) { - __m128d floats1 = _mm_loadu_pd(u + offset); - floats2 = _mm_loadu_pd(v + offset); - offset += 2; - floats1 = _mm_mul_pd(floats1, floats2); - sum = _mm_add_pd(sum, floats1); - } - } - } - // Add the 2 sums in sum horizontally. - sum = _mm_hadd_pd(sum, sum); - // Extract the low result. - double result = _mm_cvtsd_f64(sum); - // Add on any left-over products. - while (offset < n) { - result += u[offset] * v[offset]; - ++offset; - } - return result; -} - -// Computes and returns the dot product of the n-vectors u and v. -// Uses Intel SSE intrinsics to access the SIMD instruction set. -int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n) { - int max_offset = n - 8; - int offset = 0; - // Accumulate a set of 4 32-bit sums in sum, by loading 8 pairs of 8-bit - // values, extending to 16 bit, multiplying to make 32 bit results. - __m128i sum = _mm_setzero_si128(); - if (offset <= max_offset) { - offset = 8; - __m128i packed1 = _mm_loadl_epi64(reinterpret_cast(u)); - __m128i packed2 = _mm_loadl_epi64(reinterpret_cast(v)); - sum = _mm_cvtepi8_epi16(packed1); - packed2 = _mm_cvtepi8_epi16(packed2); - // The magic _mm_add_epi16 is perfect here. It multiplies 8 pairs of 16 bit - // ints to make 32 bit results, which are then horizontally added in pairs - // to make 4 32 bit results that still fit in a 128 bit register. - sum = _mm_madd_epi16(sum, packed2); - while (offset <= max_offset) { - packed1 = _mm_loadl_epi64(reinterpret_cast(u + offset)); - packed2 = _mm_loadl_epi64(reinterpret_cast(v + offset)); - offset += 8; - packed1 = _mm_cvtepi8_epi16(packed1); - packed2 = _mm_cvtepi8_epi16(packed2); - packed1 = _mm_madd_epi16(packed1, packed2); - sum = _mm_add_epi32(sum, packed1); - } - } - // Sum the 4 packed 32 bit sums and extract the low result. - sum = _mm_hadd_epi32(sum, sum); - sum = _mm_hadd_epi32(sum, sum); - int32_t result = _mm_cvtsi128_si32(sum); - while (offset < n) { - result += u[offset] * v[offset]; - ++offset; - } - return result; -} - -} // namespace tesseract. - -#endif // ANDROID_BUILD diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/dotproductsse.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/dotproductsse.h deleted file mode 100644 index 522f8dd5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/dotproductsse.h +++ /dev/null @@ -1,35 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: dotproductsse.h -// Description: Architecture-specific dot-product function. -// Author: Ray Smith -// Created: Wed Jul 22 10:57:05 PDT 2015 -// -// (C) Copyright 2015, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_ARCH_DOTPRODUCTSSE_H_ -#define TESSERACT_ARCH_DOTPRODUCTSSE_H_ - -#include // for int32_t - -namespace tesseract { - -// Computes and returns the dot product of the n-vectors u and v. -// Uses Intel SSE intrinsics to access the SIMD instruction set. -double DotProductSSE(const double* u, const double* v, int n); -// Computes and returns the dot product of the n-vectors u and v. -// Uses Intel SSE intrinsics to access the SIMD instruction set. -int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n); - -} // namespace tesseract. - -#endif // TESSERACT_ARCH_DOTPRODUCTSSE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrix.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrix.cpp deleted file mode 100644 index f08480b9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrix.cpp +++ /dev/null @@ -1,135 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: intsimdmatrix.cpp -// Description: Base class for 8-bit int SIMD matrix multipliers. -// Author: Ray Smith -// Created: Tue Aug 15 08:01:32 PST 2017 -// -// (C) Copyright 2017, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "intsimdmatrix.h" -#include "genericvector.h" // for GenericVector -#include "intsimdmatrixavx2.h" // for IntSimdMatrixAVX2 -#include "intsimdmatrixsse.h" // for IntSimdMatrixSSE -#include "matrix.h" // for GENERIC_2D_ARRAY -#include "simddetect.h" // for SIMDDetect - -namespace tesseract { - -// Factory makes and returns an IntSimdMatrix (sub)class of the best -// available type for the current architecture. -/* static */ -IntSimdMatrix* IntSimdMatrix::GetFastestMultiplier() { - IntSimdMatrix* multiplier = nullptr; - if (SIMDDetect::IsAVX2Available()) { - multiplier = new IntSimdMatrixAVX2(); - } else if (SIMDDetect::IsSSEAvailable()) { - multiplier = new IntSimdMatrixSSE(); - } else { - // Default c++ implementation. - multiplier = new IntSimdMatrix(); - } - return multiplier; -} - -// Computes a reshaped copy of the weight matrix w. If there are no -// partial_funcs_, it does nothing. -void IntSimdMatrix::Init(const GENERIC_2D_ARRAY& w) { - if (partial_funcs_.empty()) return; - int num_out = w.dim1(); - int num_in = w.dim2() - 1; - // The rounded-up sizes of the reshaped weight matrix, excluding biases. - int rounded_num_in = Roundup(num_in, num_inputs_per_group_); - int rounded_num_out = RoundOutputs(num_out); - // Add the bias and compute the required size. - shaped_w_.resize((rounded_num_in + 1) * rounded_num_out, 0); - int shaped_index = 0; - int output = 0; - // Each number of registers needs a different format! Iterates over the - // different numbers of registers (each a power of 2). - for (int num_registers = max_output_registers_; num_registers >= 1; - num_registers /= 2) { - // The number of outputs that we will generate with this many registers. - int num_outputs_per_register_set = - num_registers * num_outputs_per_register_; - // Use the max number of registers until we have to go fewer. - while (output + num_outputs_per_register_set <= rounded_num_out) { - // Accumulating outputs in registers saves iterating over the inputs, so - // we only have to do it once per output register set. - for (int input = 0; input < num_in; input += num_inputs_per_group_) { - // Iterate over the number of outputs in a register set. - for (int j = 0; j < num_outputs_per_register_set; ++j) { - // Inner-most loop corresponds to the number of inputs in an input - // group. - for (int i = 0; i < num_inputs_per_group_; ++i) { - int8_t weight = 0; - if (output + j < num_out && input + i < num_in) - weight = w(output + j, input + i); - shaped_w_[shaped_index++] = weight; - } - } - } - // Append the bias weights for the register set. - for (int j = 0; j < num_outputs_per_register_set; ++j) { - int8_t weight = 0; - if (output + j < num_out) weight = w(output + j, num_in); - shaped_w_[shaped_index++] = weight; - } - output += num_outputs_per_register_set; - } - } -} - -// Computes matrix.vector v = Wu. -// u is of size W.dim2() - 1 and the output v is of size W.dim1(). -// u is imagined to have an extra element at the end with value 1, to -// implement the bias, but it doesn't actually have it. -void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY& w, - const GenericVector& scales, - const int8_t* u, double* v) const { - int num_out = w.dim1(); - int num_in = w.dim2() - 1; - if (partial_funcs_.empty()) { - // Base implementation. - for (int i = 0; i < num_out; ++i) { - const int8_t* wi = w[i]; - int total = 0; - for (int j = 0; j < num_in; ++j) total += wi[j] * u[j]; - // Add in the bias and correct for integer values. - v[i] = (static_cast(total) / INT8_MAX + wi[num_in]) * scales[i]; - } - } else { - const int8_t* w_data = shaped_w_.data(); - const double* scales_data = &scales[0]; - // Each call to a partial_func_ produces group_size outputs, except the - // last one, which can produce less. - int group_size = num_outputs_per_register_ * max_output_registers_; - int rounded_num_in = Roundup(num_in, num_inputs_per_group_); - int rounded_num_out = RoundOutputs(num_out); - int output = 0; - for (auto fn : partial_funcs_) { - // The amount of w_data consumed by each call to fn. - int w_step = (rounded_num_in + 1) * group_size; - // Run with this group size, until it would produce too much output, then - // switch to a smaller size. - for (; output + group_size <= rounded_num_out; output += group_size) { - (*fn)(w_data, scales_data, u, rounded_num_in, num_out - output, v); - w_data += w_step; - scales_data += group_size; - v += group_size; - } - group_size /= 2; - } - } -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrix.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrix.h deleted file mode 100644 index 5185eac8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrix.h +++ /dev/null @@ -1,136 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: intsimdmatrix.h -// Description: Base class for 8-bit int SIMD matrix multipliers. -// Author: Ray Smith -// Created: Tue Aug 15 07:37:20 PST 2017 -// -// (C) Copyright 2017, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_ARCH_INTSIMDMATRIX_H_ -#define TESSERACT_ARCH_INTSIMDMATRIX_H_ - -#include -#include - -template class GENERIC_2D_ARRAY; -template class GenericVector; - -namespace tesseract { - -// Base class for a SIMD function to multiply a matrix by a vector, with sources -// of 8-bit signed integer, and result in a double, after appropriate scaling. -// Assumes a specific method of multiplication that can be applied to any size -// and number of SIMD registers as follows: -// int32_t results are computed with num_outputs_per_register_ in each of -// max_output_registers_ result registers, repeatedly until it would make too -// many results, then the number of registers is halved, and so-on down to a -// single result register. The last calculation only outputs the required number -// of results instead of writing beyond the bounds. Eg: matrix has 75 outputs, -// num_outputs_per_register_ = 4, and max_output_registers_ = 8, -// Step 1: 8x4=32 results are computed, -// Step 2: 8x4=32 again, total 64, -// Step 3: 2x4=8 (since 8x4 is too many, so is 4x4), total 72, -// Step 4: 1x3, total 75. -// Each step above is computed using a PartialFunc, which runs over the input -// vector once. The input is read one registerful of num_inputs_per_register_ -// at a time (presumably 4x num_outputs_per_register_ since they are int8_t) -// so the inputs MUST BE PADDED to a multiple of num_inputs_per_register_. -// Since it is slow (on Intel at least) to horizontally add in a register, -// provision is made to process num_inputs_per_group_ inputs at a time, with -// the group being replicated num_input_groups_ times and multiplied by a -// num_inputs_per_group_ by num_input_groups_ rectangle of the weights matrix. -// This is most convenient if num_inputs_per_group_ is 4, and the product -// sign-extends and sums 8x8=16 bit results to 32 bits, adding 4 adjacent -// results in the process, but it doesn't have to be implemented that way. -// The weights are re-ordered by Init() to be used sequentially by the above -// algorithm, followed by the biases, so they can be added at the end. -// The base class computes the base C++ implementation. -// NOTE that, although the subclasses execute on different SIMD hardware, no -// virtual methods are needed, as the constructor sets up everything that -// is required to allow the base class implementation to do all the work. -class IntSimdMatrix { - public: - // Constructor should set the data members to indicate the sizes. - // NOTE: Base constructor public only for test purposes. - IntSimdMatrix() - : num_outputs_per_register_(1), - max_output_registers_(1), - num_inputs_per_register_(1), - num_inputs_per_group_(1), - num_input_groups_(1) {} - - // Factory makes and returns an IntSimdMatrix (sub)class of the best - // available type for the current architecture. - static IntSimdMatrix* GetFastestMultiplier(); - - // Computes a reshaped copy of the weight matrix w. If there are no - // partial_funcs_, it does nothing. - void Init(const GENERIC_2D_ARRAY& w); - - // Rounds the size up to a multiple of the input register size (in int8_t). - int RoundInputs(int size) const { - return Roundup(size, num_inputs_per_register_); - } - // Rounds the size up to a multiple of the output register size (in int32_t). - int RoundOutputs(int size) const { - return Roundup(size, num_outputs_per_register_); - } - - // Computes matrix.vector v = Wu. - // u is of size W.dim2() - 1 and the output v is of size W.dim1(). - // u is imagined to have an extra element at the end with value 1, to - // implement the bias, but it doesn't actually have it. - // Computes the base C++ implementation, if there are no partial_funcs_. - // NOTE: The size of the input vector (u) must be padded using - // RoundInputs above. - // The input will be over-read to the extent of the padding. There are no - // alignment requirements. - void MatrixDotVector(const GENERIC_2D_ARRAY& w, - const GenericVector& scales, const int8_t* u, - double* v) const; - - protected: - // Function to compute part of a matrix.vector multiplication. The weights - // are in a very specific order (see above) in w, which is multiplied by - // u of length num_in, to produce output v after scaling the integer results - // by the corresponding member of scales. - // The amount of w and scales consumed is fixed and not available to the - // caller. The number of outputs written to v will be at most num_out. - typedef void (*PartialFunc)(const int8_t* w, const double* scales, - const int8_t* u, int num_in, int num_out, - double* v); - - // Rounds the input up to a multiple of the given factor. - static int Roundup(int input, int factor) { - return (input + factor - 1) / factor * factor; - } - - // Number of 32 bit outputs held in each register. - int num_outputs_per_register_; - // Maximum number of registers that we will use to hold outputs. - int max_output_registers_; - // Number of 8 bit inputs in the inputs register. - int num_inputs_per_register_; - // Number of inputs in each weight group. - int num_inputs_per_group_; - // Number of groups of inputs to be broadcast. - int num_input_groups_; - // The weights matrix reorganized in whatever way suits this instance. - std::vector shaped_w_; - // A series of functions to compute a partial result. - std::vector partial_funcs_; -}; - -} // namespace tesseract - -#endif // TESSERACT_ARCH_INTSIMDMATRIX_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrixavx2.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrixavx2.cpp deleted file mode 100644 index c8707e73..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrixavx2.cpp +++ /dev/null @@ -1,284 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: intsimdmatrixavx2.cpp -// Description: matrix-vector product for 8-bit data on avx2. -// Author: Ray Smith -// Created: Fri Aug 04 13:26:20 PST 2017 -// -// (C) Copyright 2017, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "intsimdmatrixavx2.h" - -#ifdef __AVX2__ -#include -#include -#include -#include - -namespace tesseract { - -// Number of outputs held in each register. 8 x 32 bit ints. -constexpr int kNumOutputsPerRegister = 8; -// Maximum number of registers that we will use. -constexpr int kMaxOutputRegisters = 8; -// Number of inputs in the inputs register. -constexpr int kNumInputsPerRegister = 32; -// Number of inputs in each weight group. -constexpr int kNumInputsPerGroup = 4; -// Number of groups of inputs to be broadcast. -constexpr int kNumInputGroups = kNumInputsPerRegister / kNumInputsPerGroup; - -// Computes one set of 4x8 products of inputs and weights, adding to result. -// Horizontally adds 4 adjacent results, making 8x32-bit results. -// rep_input is assumed to be an 8x replicated set of 4x8-bit signed integers. -// Note that wi must previously have been re-organized with blocks of 4x8 -// weights in contiguous memory. -// ones is a register of 16x16-bit values all equal to 1. -// Note: wi is incremented by the amount of data read. -// weights and reps are scratch registers. -// This function must be inlined with references in order for the compiler to -// correctly use the registers declared in the caller. -inline void MultiplyGroup(const __m256i& rep_input, const __m256i& ones, - const int8_t*& wi, __m256i& weights, __m256i& reps, - __m256i& result) { - // Load a 4x8 block of weights. - weights = _mm256_loadu_si256(reinterpret_cast(wi)); - wi += kNumInputsPerRegister; - // Normalize the signs on rep_input, weights, so weights is always +ve. - reps = _mm256_sign_epi8(rep_input, weights); - weights = _mm256_sign_epi8(weights, weights); - // Multiply 32x8-bit reps by 32x8-bit weights to make 16x16-bit results, - // with adjacent pairs added. - weights = _mm256_maddubs_epi16(weights, reps); - // Multiply 16x16-bit result by 16x16-bit ones to make 8x32-bit results, - // with adjacent pairs added. What we really want is a horizontal add of - // 16+16=32 bit result, but there is no such instruction, so multiply by - // 16-bit ones instead. It is probably faster than all the sign-extending, - // permuting and adding that would otherwise be required. - weights = _mm256_madd_epi16(weights, ones); - result = _mm256_add_epi32(result, weights); -} - -// Extracts and converts 8x32-bit results from result, adding the bias from wi -// and scaling by scales, before storing in *v. Note that wi, scales and v are -// expected to contain 8 consecutive elements or num_out if less. -inline void ExtractResults(__m256i& result, __m256i& shift_id, - const int8_t*& wi, const double*& scales, - int num_out, double*& v) { - for (int out = 0; out < num_out; ++out) { - int32_t res = -#ifndef _MSC_VER - _mm256_extract_epi32(result, 0) -#else - // Workaround MSVC's ICE - // _mm256_extract_epi32(X, Y) == ((int32_t*)&X)[Y] - ((int32_t*)&result)[0] -#endif - ; - *v++ = (static_cast(res) / INT8_MAX + *wi++) * *scales++; - // Rotate the results in int32_t units, so the next result is ready. - result = _mm256_permutevar8x32_epi32(result, shift_id); - } -} - -// Computes part of matrix.vector v = Wu. Computes N=64 results. -// The weights *must* be arranged so that consecutive reads from wi -// provides (num_in/kNumInputsPerGroup groups of (N output dim groups of -// (kNumInputsPerGroup inputs))). After that there must be N consecutive -// bias weights, before continuing with any more weights. -// u must be padded out with zeros to -// kNumInputsPerGroup*ceil(num_in/kNumInputsPerGroup) elements. -static void PartialMatrixDotVector64(const int8_t* wi, const double* scales, - const int8_t* u, int num_in, int num_out, - double* v) { - // Register containing 16-bit ones for horizontal add with 16->32 bit - // conversion. - __m256i ones = - _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - __m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1); - // Initialize all the results to 0. - __m256i result0 = _mm256_setzero_si256(); - __m256i result1 = _mm256_setzero_si256(); - __m256i result2 = _mm256_setzero_si256(); - __m256i result3 = _mm256_setzero_si256(); - __m256i result4 = _mm256_setzero_si256(); - __m256i result5 = _mm256_setzero_si256(); - __m256i result6 = _mm256_setzero_si256(); - __m256i result7 = _mm256_setzero_si256(); - // Iterate over the input (u), one registerful at a time. - for (int j = 0; j < num_in;) { - __m256i inputs = - _mm256_loadu_si256(reinterpret_cast(u + j)); - // Inputs are processed in groups of kNumInputsPerGroup, replicated - // kNumInputGroups times. - for (int ig = 0; ig < kNumInputGroups && j < num_in; - ++ig, j += kNumInputsPerGroup) { - // Replicate the low 32 bits (4 inputs) 8 times. - __m256i rep_input = - _mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs)); - // Rotate the inputs in groups of 4, so the next 4 inputs are ready. - inputs = _mm256_permutevar8x32_epi32(inputs, shift_id); - __m256i weights, reps; - // Mul-add, with horizontal add of the 4 inputs to each of the results. - MultiplyGroup(rep_input, ones, wi, weights, reps, result0); - MultiplyGroup(rep_input, ones, wi, weights, reps, result1); - MultiplyGroup(rep_input, ones, wi, weights, reps, result2); - MultiplyGroup(rep_input, ones, wi, weights, reps, result3); - MultiplyGroup(rep_input, ones, wi, weights, reps, result4); - MultiplyGroup(rep_input, ones, wi, weights, reps, result5); - MultiplyGroup(rep_input, ones, wi, weights, reps, result6); - MultiplyGroup(rep_input, ones, wi, weights, reps, result7); - } - } - ExtractResults(result0, shift_id, wi, scales, kNumOutputsPerRegister, v); - ExtractResults(result1, shift_id, wi, scales, kNumOutputsPerRegister, v); - ExtractResults(result2, shift_id, wi, scales, kNumOutputsPerRegister, v); - ExtractResults(result3, shift_id, wi, scales, kNumOutputsPerRegister, v); - ExtractResults(result4, shift_id, wi, scales, kNumOutputsPerRegister, v); - ExtractResults(result5, shift_id, wi, scales, kNumOutputsPerRegister, v); - ExtractResults(result6, shift_id, wi, scales, kNumOutputsPerRegister, v); - num_out -= kNumOutputsPerRegister * 7; - ExtractResults(result7, shift_id, wi, scales, - std::min(kNumOutputsPerRegister, num_out), v); -} - -// Computes part of matrix.vector v = Wu. Computes N=32 results. -// For details see PartialMatrixDotVector64 with N=32. -static void PartialMatrixDotVector32(const int8_t* wi, const double* scales, - const int8_t* u, int num_in, int num_out, - double* v) { - // Register containing 16-bit ones for horizontal add with 16->32 bit - // conversion. - __m256i ones = - _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - __m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1); - // Initialize all the results to 0. - __m256i result0 = _mm256_setzero_si256(); - __m256i result1 = _mm256_setzero_si256(); - __m256i result2 = _mm256_setzero_si256(); - __m256i result3 = _mm256_setzero_si256(); - // Iterate over the input (u), one registerful at a time. - for (int j = 0; j < num_in;) { - __m256i inputs = - _mm256_loadu_si256(reinterpret_cast(u + j)); - // Inputs are processed in groups of kNumInputsPerGroup, replicated - // kNumInputGroups times. - for (int ig = 0; ig < kNumInputGroups && j < num_in; - ++ig, j += kNumInputsPerGroup) { - // Replicate the low 32 bits (4 inputs) 8 times. - __m256i rep_input = - _mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs)); - // Rotate the inputs in groups of 4, so the next 4 inputs are ready. - inputs = _mm256_permutevar8x32_epi32(inputs, shift_id); - __m256i weights, reps; - // Mul-add, with horizontal add of the 4 inputs to each of the results. - MultiplyGroup(rep_input, ones, wi, weights, reps, result0); - MultiplyGroup(rep_input, ones, wi, weights, reps, result1); - MultiplyGroup(rep_input, ones, wi, weights, reps, result2); - MultiplyGroup(rep_input, ones, wi, weights, reps, result3); - } - } - ExtractResults(result0, shift_id, wi, scales, kNumOutputsPerRegister, v); - ExtractResults(result1, shift_id, wi, scales, kNumOutputsPerRegister, v); - ExtractResults(result2, shift_id, wi, scales, kNumOutputsPerRegister, v); - num_out -= kNumOutputsPerRegister * 3; - ExtractResults(result3, shift_id, wi, scales, - std::min(kNumOutputsPerRegister, num_out), v); -} - -// Computes part of matrix.vector v = Wu. Computes N=16 results. -// For details see PartialMatrixDotVector64 with N=16. -static void PartialMatrixDotVector16(const int8_t* wi, const double* scales, - const int8_t* u, int num_in, int num_out, - double* v) { - // Register containing 16-bit ones for horizontal add with 16->32 bit - // conversion. - __m256i ones = - _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - __m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1); - // Initialize all the results to 0. - __m256i result0 = _mm256_setzero_si256(); - __m256i result1 = _mm256_setzero_si256(); - // Iterate over the input (u), one registerful at a time. - for (int j = 0; j < num_in;) { - __m256i inputs = - _mm256_loadu_si256(reinterpret_cast(u + j)); - // Inputs are processed in groups of kNumInputsPerGroup, replicated - // kNumInputGroups times. - for (int ig = 0; ig < kNumInputGroups && j < num_in; - ++ig, j += kNumInputsPerGroup) { - // Replicate the low 32 bits (4 inputs) 8 times. - __m256i rep_input = - _mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs)); - // Rotate the inputs in groups of 4, so the next 4 inputs are ready. - inputs = _mm256_permutevar8x32_epi32(inputs, shift_id); - __m256i weights, reps; - // Mul-add, with horizontal add of the 4 inputs to each of the results. - MultiplyGroup(rep_input, ones, wi, weights, reps, result0); - MultiplyGroup(rep_input, ones, wi, weights, reps, result1); - } - } - ExtractResults(result0, shift_id, wi, scales, kNumOutputsPerRegister, v); - num_out -= kNumOutputsPerRegister; - ExtractResults(result1, shift_id, wi, scales, - std::min(kNumOutputsPerRegister, num_out), v); -} - -// Computes part of matrix.vector v = Wu. Computes N=8 results. -// For details see PartialMatrixDotVector64 with N=8. -static void PartialMatrixDotVector8(const int8_t* wi, const double* scales, - const int8_t* u, int num_in, int num_out, - double* v) { - // Register containing 16-bit ones for horizontal add with 16->32 bit - // conversion. - __m256i ones = - _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - __m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1); - // Initialize all the results to 0. - __m256i result0 = _mm256_setzero_si256(); - // Iterate over the input (u), one registerful at a time. - for (int j = 0; j < num_in;) { - __m256i inputs = - _mm256_loadu_si256(reinterpret_cast(u + j)); - // Inputs are processed in groups of kNumInputsPerGroup, replicated - // kNumInputGroups times. - for (int ig = 0; ig < kNumInputGroups && j < num_in; - ++ig, j += kNumInputsPerGroup) { - // Replicate the low 32 bits (4 inputs) 8 times. - __m256i rep_input = - _mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs)); - // Rotate the inputs in groups of 4, so the next 4 inputs are ready. - inputs = _mm256_permutevar8x32_epi32(inputs, shift_id); - __m256i weights, reps; - // Mul-add, with horizontal add of the 4 inputs to each of the results. - MultiplyGroup(rep_input, ones, wi, weights, reps, result0); - } - } - ExtractResults(result0, shift_id, wi, scales, num_out, v); -} -#else -namespace tesseract { -#endif // __AVX2__ - -IntSimdMatrixAVX2::IntSimdMatrixAVX2() { -#ifdef __AVX2__ - num_outputs_per_register_ = kNumOutputsPerRegister; - max_output_registers_ = kMaxOutputRegisters; - num_inputs_per_register_ = kNumInputsPerRegister; - num_inputs_per_group_ = kNumInputsPerGroup; - num_input_groups_ = kNumInputGroups; - partial_funcs_ = {PartialMatrixDotVector64, PartialMatrixDotVector32, - PartialMatrixDotVector16, PartialMatrixDotVector8}; -#endif // __AVX2__ -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrixavx2.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrixavx2.h deleted file mode 100644 index 280bf2f0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrixavx2.h +++ /dev/null @@ -1,33 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: intsindmatrixavx2.h -// Description: AVX2 implementation of 8-bit int SIMD matrix multiply. -// Author: Ray Smith -// Created: Wed Aug 16 10:21:42 PST 2017 -// -// (C) Copyright 2017, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// -#ifndef TESSERACT_ARCH_INTSIMDMATRIXAVX2_H_ -#define TESSERACT_ARCH_INTSIMDMATRIXAVX2_H_ - -#include "intsimdmatrix.h" - -namespace tesseract { - -// AVX2 implementation of IntSimdMatrix. -class IntSimdMatrixAVX2 : public IntSimdMatrix { - public: - IntSimdMatrixAVX2(); -}; - -} // namespace tesseract - -#endif // TESSERACT_ARCH_INTSIMDMATRIXAVX2_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrixsse.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrixsse.cpp deleted file mode 100644 index 5ba57a7e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrixsse.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: intsindmatrixsse.cpp -// Description: SSE implementation of 8-bit int SIMD matrix multiply. -// Author: Ray Smith -// Created: Tue Aug 23 13:58:49 PST 2017 -// -// (C) Copyright 2017, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "intsimdmatrixsse.h" - -#include -#include -#include "dotproductsse.h" - -namespace tesseract { - -#ifdef __SSE4_1__ -// Computes part of matrix.vector v = Wu. Computes 1 result. -static void PartialMatrixDotVector1(const int8_t* wi, const double* scales, - const int8_t* u, int num_in, int num_out, - double* v) { - int total = IntDotProductSSE(u, wi, num_in); - // Add in the bias and correct for integer values. - *v = (static_cast(total) / INT8_MAX + wi[num_in]) * *scales; -} -#endif // __SSE4_1__ - -IntSimdMatrixSSE::IntSimdMatrixSSE() { -#ifdef __SSE4_1__ - partial_funcs_ = {PartialMatrixDotVector1}; -#endif // __SSE4_1__ -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrixsse.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrixsse.h deleted file mode 100644 index 9ca2c890..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/intsimdmatrixsse.h +++ /dev/null @@ -1,33 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: intsindmatrixsse.h -// Description: SSE implementation of 8-bit int SIMD matrix multiply. -// Author: Ray Smith -// Created: Tue Aug 23 13:58:21 PST 2017 -// -// (C) Copyright 2017, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// -#ifndef TESSERACT_ARCH_INTSIMDMATRIXSSE_H_ -#define TESSERACT_ARCH_INTSIMDMATRIXSSE_H_ - -#include "intsimdmatrix.h" - -namespace tesseract { - -// AVX2 implementation of IntSimdMatrix. -class IntSimdMatrixSSE : public IntSimdMatrix { - public: - IntSimdMatrixSSE(); -}; - -} // namespace tesseract - -#endif // TESSERACT_ARCH_INTSIMDMATRIXSSE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/simddetect.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/simddetect.cpp deleted file mode 100644 index bff14ea9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/simddetect.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: simddetect.cpp -// Description: Architecture detector. -// Author: Stefan Weil (based on code from Ray Smith) -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "simddetect.h" - -#undef X86_BUILD -#if defined(__x86_64__) || defined(__i386__) || (defined(_WIN32) && ! defined(WINAPI_FAMILY)) -# if !defined(ANDROID_BUILD) -# define X86_BUILD 1 -# endif // !ANDROID_BUILD -#endif // x86 target - -#if defined(X86_BUILD) -# if defined(__GNUC__) -# include -# elif defined(_WIN32) -# include -# endif -#endif - -SIMDDetect SIMDDetect::detector; - -// If true, then AVX has been detected. -bool SIMDDetect::avx_available_; -bool SIMDDetect::avx2_available_; -bool SIMDDetect::avx512F_available_; -bool SIMDDetect::avx512BW_available_; -// If true, then SSe4.1 has been detected. -bool SIMDDetect::sse_available_; - -// Constructor. -// Tests the architecture in a system-dependent way to detect AVX, SSE and -// any other available SIMD equipment. -// __GNUC__ is also defined by compilers that include GNU extensions such as -// clang. -SIMDDetect::SIMDDetect() { -#if defined(X86_BUILD) -# if defined(__GNUC__) - unsigned int eax, ebx, ecx, edx; - if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) { - // Note that these tests all use hex because the older compilers don't have - // the newer flags. - sse_available_ = (ecx & 0x00080000) != 0; - avx_available_ = (ecx & 0x10000000) != 0; - if (avx_available_) { - // There is supposed to be a __get_cpuid_count function, but this is all - // there is in my cpuid.h. It is a macro for an asm statement and cannot - // be used inside an if. - __cpuid_count(7, 0, eax, ebx, ecx, edx); - avx2_available_ = (ebx & 0x00000020) != 0; - avx512F_available_ = (ebx & 0x00010000) != 0; - avx512BW_available_ = (ebx & 0x40000000) != 0; - } - } -# elif (defined(_WIN32) && ! defined(WINAPI_FAMILY)) - int cpuInfo[4]; - __cpuid(cpuInfo, 0); - if (cpuInfo[0] >= 1) { - __cpuid(cpuInfo, 1); - sse_available_ = (cpuInfo[2] & 0x00080000) != 0; - avx_available_ = (cpuInfo[2] & 0x10000000) != 0; - } -# else -# error "I don't know how to test for SIMD with this compiler" -# endif -#endif // X86_BUILD -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/simddetect.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/simddetect.h deleted file mode 100644 index 26b6920c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/arch/simddetect.h +++ /dev/null @@ -1,58 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: simddetect.h -// Description: Architecture detector. -// Author: Stefan Weil (based on code from Ray Smith) -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// -#ifndef TESSERACT_ARCH_SIMDDETECT_H_ -#define TESSERACT_ARCH_SIMDDETECT_H_ - -#include "platform.h" - -// Architecture detector. Add code here to detect any other architectures for -// SIMD-based faster dot product functions. Intended to be a single static -// object, but it does no real harm to have more than one. -class SIMDDetect { - public: - // Returns true if AVX is available on this system. - static inline bool IsAVXAvailable() { return detector.avx_available_; } - // Returns true if AVX2 (integer support) is available on this system. - static inline bool IsAVX2Available() { return detector.avx2_available_; } - // Returns true if AVX512 Foundation (float) is available on this system. - static inline bool IsAVX512FAvailable() { - return detector.avx512F_available_; - } - // Returns true if AVX512 integer is available on this system. - static inline bool IsAVX512BWAvailable() { - return detector.avx512BW_available_; - } - // Returns true if SSE4.1 is available on this system. - static inline bool IsSSEAvailable() { return detector.sse_available_; } - - private: - // Constructor, must set all static member variables. - SIMDDetect(); - - private: - // Singleton. - static SIMDDetect detector; - // If true, then AVX has been detected. - static TESS_API bool avx_available_; - static TESS_API bool avx2_available_; - static TESS_API bool avx512F_available_; - static TESS_API bool avx512BW_available_; - // If true, then SSe4.1 has been detected. - static TESS_API bool sse_available_; -}; - -#endif // TESSERACT_ARCH_SIMDDETECT_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/Makefile.am b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/Makefile.am deleted file mode 100644 index c3ee7328..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/Makefile.am +++ /dev/null @@ -1,86 +0,0 @@ -AM_CPPFLAGS += \ - -I$(top_srcdir)/src/ccutil \ - -I$(top_srcdir)/src/ccstruct \ - -I$(top_srcdir)/src/arch \ - -I$(top_srcdir)/src/lstm \ - -I$(top_srcdir)/src/viewer \ - -I$(top_srcdir)/src/classify \ - -I$(top_srcdir)/src/dict \ - -I$(top_srcdir)/src/wordrec \ - -I$(top_srcdir)/src/cutil \ - -I$(top_srcdir)/src/textord \ - -I$(top_srcdir)/src/opencl - -AM_CPPFLAGS += $(OPENCL_CPPFLAGS) -AM_CPPFLAGS += $(OPENMP_CXXFLAGS) - -if DISABLED_LEGACY_ENGINE -AM_CPPFLAGS += -DDISABLED_LEGACY_ENGINE -endif - -if VISIBILITY -AM_CPPFLAGS += -DTESS_EXPORTS \ - -fvisibility=hidden -fvisibility-inlines-hidden -endif - -pkginclude_HEADERS = \ - thresholder.h \ - osdetect.h \ - ltrresultiterator.h \ - pageiterator.h \ - resultiterator.h - -noinst_HEADERS = \ - control.h \ - docqual.h \ - equationdetect.h \ - fixspace.h \ - mutableiterator.h \ - output.h \ - paragraphs.h \ - paragraphs_internal.h \ - paramsd.h \ - pgedit.h \ - reject.h \ - tessedit.h \ - tesseractclass.h \ - tessvars.h \ - werdit.h - -noinst_LTLIBRARIES = libtesseract_main.la - -libtesseract_main_la_SOURCES = \ - applybox.cpp \ - control.cpp \ - fixxht.cpp \ - linerec.cpp \ - ltrresultiterator.cpp \ - mutableiterator.cpp \ - output.cpp \ - pageiterator.cpp \ - pagesegmain.cpp \ - pagewalk.cpp \ - paragraphs.cpp \ - paramsd.cpp \ - pgedit.cpp \ - recogtraining.cpp \ - reject.cpp \ - resultiterator.cpp \ - tessedit.cpp \ - tesseractclass.cpp \ - tessvars.cpp \ - thresholder.cpp \ - werdit.cpp - -if !DISABLED_LEGACY_ENGINE -libtesseract_main_la_SOURCES += \ - adaptions.cpp \ - docqual.cpp \ - equationdetect.cpp \ - fixspace.cpp \ - osdetect.cpp \ - par_control.cpp \ - superscript.cpp \ - tessbox.cpp \ - tfacepp.cpp -endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/adaptions.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/adaptions.cpp deleted file mode 100644 index 8cf6344d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/adaptions.cpp +++ /dev/null @@ -1,115 +0,0 @@ -/********************************************************************** - * File: adaptions.cpp (Formerly adaptions.c) - * Description: Functions used to adapt to blobs already confidently - * identified - * Author: Chris Newton - * Created: Thu Oct 7 10:17:28 BST 1993 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include -#include -#include "tessvars.h" -#include "reject.h" -#include "control.h" -#include "stopper.h" -#include "tesseractclass.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -namespace tesseract { -bool Tesseract::word_adaptable( //should we adapt? - WERD_RES* word, - uint16_t mode) { - if (tessedit_adaption_debug) { - tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n", - word->best_choice->unichar_string().string(), - word->best_choice->rating(), word->best_choice->certainty()); - } - - BOOL8 status = FALSE; - BITS16 flags(mode); - - enum MODES - { - ADAPTABLE_WERD, - ACCEPTABLE_WERD, - CHECK_DAWGS, - CHECK_SPACES, - CHECK_ONE_ELL_CONFLICT, - CHECK_AMBIG_WERD - }; - - /* - 0: NO adaption - */ - if (mode == 0) { - if (tessedit_adaption_debug) tprintf("adaption disabled\n"); - return false; - } - - if (flags.bit (ADAPTABLE_WERD)) { - status |= word->tess_would_adapt; // result of Classify::AdaptableWord() - if (tessedit_adaption_debug && !status) { - tprintf("tess_would_adapt bit is false\n"); - } - } - - if (flags.bit (ACCEPTABLE_WERD)) { - status |= word->tess_accepted; - if (tessedit_adaption_debug && !status) { - tprintf("tess_accepted bit is false\n"); - } - } - - if (!status) { // If not set then - return false; // ignore other checks - } - - if (flags.bit (CHECK_DAWGS) && - (word->best_choice->permuter () != SYSTEM_DAWG_PERM) && - (word->best_choice->permuter () != FREQ_DAWG_PERM) && - (word->best_choice->permuter () != USER_DAWG_PERM) && - (word->best_choice->permuter () != NUMBER_PERM)) { - if (tessedit_adaption_debug) tprintf("word not in dawgs\n"); - return false; - } - - if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, false)) { - if (tessedit_adaption_debug) tprintf("word has ell conflict\n"); - return false; - } - - if (flags.bit (CHECK_SPACES) && - (strchr(word->best_choice->unichar_string().string(), ' ') != nullptr)) { - if (tessedit_adaption_debug) tprintf("word contains spaces\n"); - return false; - } - - if (flags.bit (CHECK_AMBIG_WERD) && - word->best_choice->dangerous_ambig_found()) { - if (tessedit_adaption_debug) tprintf("word is ambiguous\n"); - return false; - } - - if (tessedit_adaption_debug) { - tprintf("returning status %d\n", status); - } - return status; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/applybox.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/applybox.cpp deleted file mode 100644 index 15395233..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/applybox.cpp +++ /dev/null @@ -1,824 +0,0 @@ -/********************************************************************** - * File: applybox.cpp (Formerly applybox.c) - * Description: Re segment rows according to box file data - * Author: Phil Cheatle - * Created: Wed Nov 24 09:11:23 GMT 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include -#include -#include -#include "allheaders.h" -#include "boxread.h" -#ifndef DISABLED_LEGACY_ENGINE -#include "chopper.h" -#endif -#include "pageres.h" -#include "unichar.h" -#include "unicharset.h" -#include "tesseractclass.h" -#include "genericvector.h" - -/** Max number of blobs to classify together in FindSegmentation. */ -const int kMaxGroupSize = 4; -/// Max fraction of median allowed as deviation in xheight before switching -/// to median. -const double kMaxXHeightDeviationFraction = 0.125; - -/** - * The box file is assumed to contain box definitions, one per line, of the - * following format for blob-level boxes: - * @verbatim - * - * @endverbatim - * and for word/line-level boxes: - * @verbatim - * WordStr # - * @endverbatim - * NOTES: - * The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT. - * - * is 0-based, and the page number is used for multipage input (tiff). - * - * In the blob-level form, each line represents a recognizable unit, which may - * be several UTF-8 bytes, but there is a bounding box around each recognizable - * unit, and no classifier is needed to train in this mode (bootstrapping.) - * - * In the word/line-level form, the line begins with the literal "WordStr", and - * the bounding box bounds either a whole line or a whole word. The recognizable - * units in the word/line are listed after the # at the end of the line and - * are space delimited, ignoring any original spaces on the line. - * Eg. - * @verbatim - * word -> #w o r d - * multi word line -> #m u l t i w o r d l i n e - * @endverbatim - * The recognizable units must be space-delimited in order to allow multiple - * unicodes to be used for a single recognizable unit, eg Hindi. - * - * In this mode, the classifier must have been pre-trained with the desired - * character set, or it will not be able to find the character segmentations. - */ - -namespace tesseract { - -#ifndef DISABLED_LEGACY_ENGINE -static void clear_any_old_text(BLOCK_LIST *block_list) { - BLOCK_IT block_it(block_list); - for (block_it.mark_cycle_pt(); - !block_it.cycled_list(); block_it.forward()) { - ROW_IT row_it(block_it.data()->row_list()); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - WERD_IT word_it(row_it.data()->word_list()); - for (word_it.mark_cycle_pt(); - !word_it.cycled_list(); word_it.forward()) { - word_it.data()->set_text(""); - } - } - } -} - -// Applies the box file based on the image name fname, and resegments -// the words in the block_list (page), with: -// blob-mode: one blob per line in the box file, words as input. -// word/line-mode: one blob per space-delimited unit after the #, and one word -// per line in the box file. (See comment above for box file format.) -// If find_segmentation is true, (word/line mode) then the classifier is used -// to re-segment words/lines to match the space-delimited truth string for -// each box. In this case, the input box may be for a word or even a whole -// text line, and the output words will contain multiple blobs corresponding -// to the space-delimited input string. -// With find_segmentation false, no classifier is needed, but the chopper -// can still be used to correctly segment touching characters with the help -// of the input boxes. -// In the returned PAGE_RES, the WERD_RES are setup as they would be returned -// from normal classification, ie. with a word, chopped_word, rebuild_word, -// seam_array, denorm, box_word, and best_state, but NO best_choice or -// raw_choice, as they would require a UNICHARSET, which we aim to avoid. -// Instead, the correct_text member of WERD_RES is set, and this may be later -// converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords -// is not required before calling ApplyBoxTraining. -PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname, - bool find_segmentation, - BLOCK_LIST *block_list) { - GenericVector boxes; - GenericVector texts, full_texts; - if (!ReadAllBoxes(applybox_page, true, fname, &boxes, &texts, &full_texts, - nullptr)) { - return nullptr; // Can't do it. - } - - const int box_count = boxes.size(); - int box_failures = 0; - - // In word mode, we use the boxes to make a word for each box, but - // in blob mode we use the existing words and maximally chop them first. - PAGE_RES* page_res = find_segmentation ? - nullptr : SetupApplyBoxes(boxes, block_list); - clear_any_old_text(block_list); - - for (int i = 0; i < box_count; i++) { - bool foundit = false; - if (page_res != nullptr) { - foundit = ResegmentCharBox(page_res, - (i == 0) ? nullptr : &boxes[i - 1], - boxes[i], - (i == box_count - 1) ? nullptr : &boxes[i + 1], - full_texts[i].string()); - } else { - foundit = ResegmentWordBox(block_list, boxes[i], - (i == box_count - 1) ? nullptr : &boxes[i + 1], - texts[i].string()); - } - if (!foundit) { - box_failures++; - ReportFailedBox(i, boxes[i], texts[i].string(), - "FAILURE! Couldn't find a matching blob"); - } - } - - if (page_res == nullptr) { - // In word/line mode, we now maximally chop all the words and resegment - // them with the classifier. - page_res = SetupApplyBoxes(boxes, block_list); - ReSegmentByClassification(page_res); - } - if (applybox_debug > 0) { - tprintf("APPLY_BOXES:\n"); - tprintf(" Boxes read from boxfile: %6d\n", box_count); - if (box_failures > 0) - tprintf(" Boxes failed resegmentation: %6d\n", box_failures); - } - TidyUp(page_res); - return page_res; -} -#endif // ndef DISABLED_LEGACY_ENGINE - -// Helper computes median xheight in the image. -static double MedianXHeight(BLOCK_LIST *block_list) { - BLOCK_IT block_it(block_list); - STATS xheights(0, block_it.data()->pdblk.bounding_box().height()); - for (block_it.mark_cycle_pt(); - !block_it.cycled_list(); block_it.forward()) { - ROW_IT row_it(block_it.data()->row_list()); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - xheights.add(IntCastRounded(row_it.data()->x_height()), 1); - } - } - return xheights.median(); -} - -/// Any row xheight that is significantly different from the median is set -/// to the median. -void Tesseract::PreenXHeights(BLOCK_LIST *block_list) { - const double median_xheight = MedianXHeight(block_list); - const double max_deviation = kMaxXHeightDeviationFraction * median_xheight; - // Strip all fuzzy space markers to simplify the PAGE_RES. - BLOCK_IT b_it(block_list); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOCK* block = b_it.data(); - ROW_IT r_it(block->row_list()); - for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward ()) { - ROW* row = r_it.data(); - const double diff = fabs(row->x_height() - median_xheight); - if (diff > max_deviation) { - if (applybox_debug) { - tprintf("row xheight=%g, but median xheight = %g\n", - row->x_height(), median_xheight); - } - row->set_x_height(static_cast(median_xheight)); - } - } - } -} - -#ifndef DISABLED_LEGACY_ENGINE - -/// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes: -/// All fuzzy spaces are removed, and all the words are maximally chopped. -PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector& boxes, - BLOCK_LIST *block_list) { - PreenXHeights(block_list); - // Strip all fuzzy space markers to simplify the PAGE_RES. - BLOCK_IT b_it(block_list); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOCK* block = b_it.data(); - ROW_IT r_it(block->row_list()); - for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward ()) { - ROW* row = r_it.data(); - WERD_IT w_it(row->word_list()); - for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { - WERD* word = w_it.data(); - if (word->cblob_list()->empty()) { - delete w_it.extract(); - } else { - word->set_flag(W_FUZZY_SP, false); - word->set_flag(W_FUZZY_NON, false); - } - } - } - } - PAGE_RES* page_res = new PAGE_RES(false, block_list, nullptr); - PAGE_RES_IT pr_it(page_res); - WERD_RES* word_res; - while ((word_res = pr_it.word()) != nullptr) { - MaximallyChopWord(boxes, pr_it.block()->block, - pr_it.row()->row, word_res); - pr_it.forward(); - } - return page_res; -} - -/// Tests the chopper by exhaustively running chop_one_blob. -/// The word_res will contain filled chopped_word, seam_array, denorm, -/// box_word and best_state for the maximally chopped word. -void Tesseract::MaximallyChopWord(const GenericVector& boxes, - BLOCK* block, ROW* row, - WERD_RES* word_res) { - if (!word_res->SetupForRecognition(unicharset, this, BestPix(), - tessedit_ocr_engine_mode, nullptr, - classify_bln_numeric_mode, - textord_use_cjk_fp_model, - poly_allow_detailed_fx, - row, block)) { - word_res->CloneChoppedToRebuild(); - return; - } - if (chop_debug) { - tprintf("Maximally chopping word at:"); - word_res->word->bounding_box().print(); - } - GenericVector blob_choices; - ASSERT_HOST(!word_res->chopped_word->blobs.empty()); - float rating = static_cast(INT8_MAX); - for (int i = 0; i < word_res->chopped_word->NumBlobs(); ++i) { - // The rating and certainty are not quite arbitrary. Since - // select_blob_to_chop uses the worst certainty to choose, they all have - // to be different, so starting with INT8_MAX, subtract 1/8 for each blob - // in here, and then divide by e each time they are chopped, which - // should guarantee a set of unequal values for the whole tree of blobs - // produced, however much chopping is required. The chops are thus only - // limited by the ability of the chopper to find suitable chop points, - // and not by the value of the certainties. - BLOB_CHOICE* choice = - new BLOB_CHOICE(0, rating, -rating, -1, 0.0f, 0.0f, 0.0f, BCC_FAKE); - blob_choices.push_back(choice); - rating -= 0.125f; - } - const double e = exp(1.0); // The base of natural logs. - int blob_number; - int right_chop_index = 0; - if (!assume_fixed_pitch_char_segment) { - // We only chop if the language is not fixed pitch like CJK. - SEAM* seam = nullptr; - while ((seam = chop_one_blob(boxes, blob_choices, word_res, - &blob_number)) != nullptr) { - word_res->InsertSeam(blob_number, seam); - BLOB_CHOICE* left_choice = blob_choices[blob_number]; - rating = left_choice->rating() / e; - left_choice->set_rating(rating); - left_choice->set_certainty(-rating); - // combine confidence w/ serial # - BLOB_CHOICE* right_choice = new BLOB_CHOICE(++right_chop_index, - rating - 0.125f, -rating, -1, - 0.0f, 0.0f, 0.0f, BCC_FAKE); - blob_choices.insert(right_choice, blob_number + 1); - } - } - word_res->CloneChoppedToRebuild(); - word_res->FakeClassifyWord(blob_choices.size(), &blob_choices[0]); -} - -#endif // ndef DISABLED_LEGACY_ENGINE - -/// Helper to compute the dispute resolution metric. -/// Disputed blob resolution. The aim is to give the blob to the most -/// appropriate boxfile box. Most of the time it is obvious, but if -/// two boxfile boxes overlap significantly it is not. If a small boxfile -/// box takes most of the blob, and a large boxfile box does too, then -/// we want the small boxfile box to get it, but if the small box -/// is much smaller than the blob, we don't want it to get it. -/// Details of the disputed blob resolution: -/// Given a box with area A, and a blob with area B, with overlap area C, -/// then the miss metric is (A-C)(B-C)/(AB) and the box with minimum -/// miss metric gets the blob. -static double BoxMissMetric(const TBOX& box1, const TBOX& box2) { - const int overlap_area = box1.intersection(box2).area(); - const int a = box1.area(); - const int b = box2.area(); - ASSERT_HOST(a != 0 && b != 0); - return 1.0 * (a - overlap_area) * (b - overlap_area) / a / b; -} - -#ifndef DISABLED_LEGACY_ENGINE - -/// Gather consecutive blobs that match the given box into the best_state -/// and corresponding correct_text. -/// -/// Fights over which box owns which blobs are settled by pre-chopping and -/// applying the blobs to box or next_box with the least non-overlap. -/// @return false if the box was in error, which can only be caused by -/// failing to find an appropriate blob for a box. -/// -/// This means that occasionally, blobs may be incorrectly segmented if the -/// chopper fails to find a suitable chop point. -bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX* prev_box, - const TBOX& box, const TBOX* next_box, - const char* correct_text) { - if (applybox_debug > 1) { - tprintf("\nAPPLY_BOX: in ResegmentCharBox() for %s\n", correct_text); - } - PAGE_RES_IT page_res_it(page_res); - WERD_RES* word_res; - for (word_res = page_res_it.word(); word_res != nullptr; - word_res = page_res_it.forward()) { - if (!word_res->box_word->bounding_box().major_overlap(box)) - continue; - if (applybox_debug > 1) { - tprintf("Checking word box:"); - word_res->box_word->bounding_box().print(); - } - int word_len = word_res->box_word->length(); - for (int i = 0; i < word_len; ++i) { - TBOX char_box = TBOX(); - int blob_count = 0; - for (blob_count = 0; i + blob_count < word_len; ++blob_count) { - TBOX blob_box = word_res->box_word->BlobBox(i + blob_count); - if (!blob_box.major_overlap(box)) - break; - if (word_res->correct_text[i + blob_count].length() > 0) - break; // Blob is claimed already. - if (next_box != nullptr) { - const double current_box_miss_metric = BoxMissMetric(blob_box, box); - const double next_box_miss_metric = BoxMissMetric(blob_box, *next_box); - if (applybox_debug > 2) { - tprintf("Checking blob:"); - blob_box.print(); - tprintf("Current miss metric = %g, next = %g\n", - current_box_miss_metric, next_box_miss_metric); - } - if (current_box_miss_metric > next_box_miss_metric) - break; // Blob is a better match for next box. - } - char_box += blob_box; - } - if (blob_count > 0) { - if (applybox_debug > 1) { - tprintf("Index [%d, %d) seem good.\n", i, i + blob_count); - } - if (!char_box.almost_equal(box, 3) && - ((next_box != nullptr && box.x_gap(*next_box) < -3)|| - (prev_box != nullptr && prev_box->x_gap(box) < -3))) { - return false; - } - // We refine just the box_word, best_state and correct_text here. - // The rebuild_word is made in TidyUp. - // blob_count blobs are put together to match the box. Merge the - // box_word boxes, save the blob_count in the state and the text. - word_res->box_word->MergeBoxes(i, i + blob_count); - word_res->best_state[i] = blob_count; - word_res->correct_text[i] = correct_text; - if (applybox_debug > 2) { - tprintf("%d Blobs match: blob box:", blob_count); - word_res->box_word->BlobBox(i).print(); - tprintf("Matches box:"); - box.print(); - if (next_box != nullptr) { - tprintf("With next box:"); - next_box->print(); - } - } - // Eliminated best_state and correct_text entries for the consumed - // blobs. - for (int j = 1; j < blob_count; ++j) { - word_res->best_state.remove(i + 1); - word_res->correct_text.remove(i + 1); - } - // Assume that no box spans multiple source words, so we are done with - // this box. - if (applybox_debug > 1) { - tprintf("Best state = "); - for (int j = 0; j < word_res->best_state.size(); ++j) { - tprintf("%d ", word_res->best_state[j]); - } - tprintf("\n"); - tprintf("Correct text = [[ "); - for (int j = 0; j < word_res->correct_text.size(); ++j) { - tprintf("%s ", word_res->correct_text[j].string()); - } - tprintf("]]\n"); - } - return true; - } - } - } - if (applybox_debug > 0) { - tprintf("FAIL!\n"); - } - return false; // Failure. -} - -/// Consume all source blobs that strongly overlap the given box, -/// putting them into a new word, with the correct_text label. -/// Fights over which box owns which blobs are settled by -/// applying the blobs to box or next_box with the least non-overlap. -/// @return false if the box was in error, which can only be caused by -/// failing to find an overlapping blob for a box. -bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, - const TBOX& box, const TBOX* next_box, - const char* correct_text) { - if (applybox_debug > 1) { - tprintf("\nAPPLY_BOX: in ResegmentWordBox() for %s\n", correct_text); - } - WERD* new_word = nullptr; - BLOCK_IT b_it(block_list); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOCK* block = b_it.data(); - if (!box.major_overlap(block->pdblk.bounding_box())) - continue; - ROW_IT r_it(block->row_list()); - for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { - ROW* row = r_it.data(); - if (!box.major_overlap(row->bounding_box())) - continue; - WERD_IT w_it(row->word_list()); - for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { - WERD* word = w_it.data(); - if (applybox_debug > 2) { - tprintf("Checking word:"); - word->bounding_box().print(); - } - if (word->text() != nullptr && word->text()[0] != '\0') - continue; // Ignore words that are already done. - if (!box.major_overlap(word->bounding_box())) - continue; - C_BLOB_IT blob_it(word->cblob_list()); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); - blob_it.forward()) { - C_BLOB* blob = blob_it.data(); - TBOX blob_box = blob->bounding_box(); - if (!blob_box.major_overlap(box)) - continue; - if (next_box != nullptr) { - const double current_box_miss_metric = BoxMissMetric(blob_box, box); - const double next_box_miss_metric = BoxMissMetric(blob_box, *next_box); - if (applybox_debug > 2) { - tprintf("Checking blob:"); - blob_box.print(); - tprintf("Current miss metric = %g, next = %g\n", - current_box_miss_metric, next_box_miss_metric); - } - if (current_box_miss_metric > next_box_miss_metric) - continue; // Blob is a better match for next box. - } - if (applybox_debug > 2) { - tprintf("Blob match: blob:"); - blob_box.print(); - tprintf("Matches box:"); - box.print(); - if (next_box != nullptr) { - tprintf("With next box:"); - next_box->print(); - } - } - if (new_word == nullptr) { - // Make a new word with a single blob. - new_word = word->shallow_copy(); - new_word->set_text(correct_text); - w_it.add_to_end(new_word); - } - C_BLOB_IT new_blob_it(new_word->cblob_list()); - new_blob_it.add_to_end(blob_it.extract()); - } - } - } - } - if (new_word == nullptr && applybox_debug > 0) tprintf("FAIL!\n"); - return new_word != nullptr; -} - -/// Resegments the words by running the classifier in an attempt to find the -/// correct segmentation that produces the required string. -void Tesseract::ReSegmentByClassification(PAGE_RES* page_res) { - PAGE_RES_IT pr_it(page_res); - WERD_RES* word_res; - for (; (word_res = pr_it.word()) != nullptr; pr_it.forward()) { - const WERD* word = word_res->word; - if (word->text() == nullptr || word->text()[0] == '\0') - continue; // Ignore words that have no text. - // Convert the correct text to a vector of UNICHAR_ID - GenericVector target_text; - if (!ConvertStringToUnichars(word->text(), &target_text)) { - tprintf("APPLY_BOX: FAILURE: can't find class_id for '%s'\n", - word->text()); - pr_it.DeleteCurrentWord(); - continue; - } - if (!FindSegmentation(target_text, word_res)) { - tprintf("APPLY_BOX: FAILURE: can't find segmentation for '%s'\n", - word->text()); - pr_it.DeleteCurrentWord(); - continue; - } - } -} - -#endif // ndef DISABLED_LEGACY_ENGINE - -/// Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID. -/// @return false if an invalid UNICHAR_ID is encountered. -bool Tesseract::ConvertStringToUnichars(const char* utf8, - GenericVector* class_ids) { - for (int step = 0; *utf8 != '\0'; utf8 += step) { - const char* next_space = strchr(utf8, ' '); - if (next_space == nullptr) - next_space = utf8 + strlen(utf8); - step = next_space - utf8; - UNICHAR_ID class_id = unicharset.unichar_to_id(utf8, step); - if (class_id == INVALID_UNICHAR_ID) { - return false; - } - while (utf8[step] == ' ') - ++step; - class_ids->push_back(class_id); - } - return true; -} - -#ifndef DISABLED_LEGACY_ENGINE - - -/// Resegments the word to achieve the target_text from the classifier. -/// Returns false if the re-segmentation fails. -/// Uses brute-force combination of up to #kMaxGroupSize adjacent blobs, and -/// applies a full search on the classifier results to find the best classified -/// segmentation. As a compromise to obtain better recall, 1-1 ambiguity -/// substitutions ARE used. -bool Tesseract::FindSegmentation(const GenericVector& target_text, - WERD_RES* word_res) { - // Classify all required combinations of blobs and save results in choices. - const int word_length = word_res->box_word->length(); - GenericVector* choices = - new GenericVector[word_length]; - for (int i = 0; i < word_length; ++i) { - for (int j = 1; j <= kMaxGroupSize && i + j <= word_length; ++j) { - BLOB_CHOICE_LIST* match_result = classify_piece( - word_res->seam_array, i, i + j - 1, "Applybox", - word_res->chopped_word, word_res->blamer_bundle); - if (applybox_debug > 2) { - tprintf("%d+%d:", i, j); - print_ratings_list("Segment:", match_result, unicharset); - } - choices[i].push_back(match_result); - } - } - // Search the segmentation graph for the target text. Must be an exact - // match. Using wildcards makes it difficult to find the correct - // segmentation even when it is there. - word_res->best_state.clear(); - GenericVector search_segmentation; - float best_rating = 0.0f; - SearchForText(choices, 0, word_length, target_text, 0, 0.0f, - &search_segmentation, &best_rating, &word_res->best_state); - for (int i = 0; i < word_length; ++i) - choices[i].delete_data_pointers(); - delete [] choices; - if (word_res->best_state.empty()) { - // Build the original segmentation and if it is the same length as the - // truth, assume it will do. - int blob_count = 1; - for (int s = 0; s < word_res->seam_array.size(); ++s) { - SEAM* seam = word_res->seam_array[s]; - if (!seam->HasAnySplits()) { - word_res->best_state.push_back(blob_count); - blob_count = 1; - } else { - ++blob_count; - } - } - word_res->best_state.push_back(blob_count); - if (word_res->best_state.size() != target_text.size()) { - word_res->best_state.clear(); // No good. Original segmentation bad size. - return false; - } - } - word_res->correct_text.clear(); - for (int i = 0; i < target_text.size(); ++i) { - word_res->correct_text.push_back( - STRING(unicharset.id_to_unichar(target_text[i]))); - } - return true; -} - -/// Recursive helper to find a match to the target_text (from text_index -/// position) in the choices (from choices_pos position). -/// @param choices is an array of GenericVectors, of length choices_length, -/// with each element representing a starting position in the word, and the -/// #GenericVector holding classification results for a sequence of consecutive -/// blobs, with index 0 being a single blob, index 1 being 2 blobs etc. -/// @param choices_pos -/// @param choices_length -/// @param target_text -/// @param text_index -/// @param rating -/// @param segmentation -/// @param best_rating -/// @param best_segmentation -void Tesseract::SearchForText(const GenericVector* choices, - int choices_pos, int choices_length, - const GenericVector& target_text, - int text_index, - float rating, GenericVector* segmentation, - float* best_rating, - GenericVector* best_segmentation) { - const UnicharAmbigsVector& table = getDict().getUnicharAmbigs().dang_ambigs(); - for (int length = 1; length <= choices[choices_pos].size(); ++length) { - // Rating of matching choice or worst choice if no match. - float choice_rating = 0.0f; - // Find the corresponding best BLOB_CHOICE. - BLOB_CHOICE_IT choice_it(choices[choices_pos][length - 1]); - for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); - choice_it.forward()) { - const BLOB_CHOICE* choice = choice_it.data(); - choice_rating = choice->rating(); - UNICHAR_ID class_id = choice->unichar_id(); - if (class_id == target_text[text_index]) { - break; - } - // Search ambigs table. - if (class_id < table.size() && table[class_id] != nullptr) { - AmbigSpec_IT spec_it(table[class_id]); - for (spec_it.mark_cycle_pt(); !spec_it.cycled_list(); - spec_it.forward()) { - const AmbigSpec *ambig_spec = spec_it.data(); - // We'll only do 1-1. - if (ambig_spec->wrong_ngram[1] == INVALID_UNICHAR_ID && - ambig_spec->correct_ngram_id == target_text[text_index]) - break; - } - if (!spec_it.cycled_list()) - break; // Found an ambig. - } - } - if (choice_it.cycled_list()) - continue; // No match. - segmentation->push_back(length); - if (choices_pos + length == choices_length && - text_index + 1 == target_text.size()) { - // This is a complete match. If the rating is good record a new best. - if (applybox_debug > 2) { - tprintf("Complete match, rating = %g, best=%g, seglength=%d, best=%d\n", - rating + choice_rating, *best_rating, segmentation->size(), - best_segmentation->size()); - } - if (best_segmentation->empty() || rating + choice_rating < *best_rating) { - *best_segmentation = *segmentation; - *best_rating = rating + choice_rating; - } - } else if (choices_pos + length < choices_length && - text_index + 1 < target_text.size()) { - if (applybox_debug > 3) { - tprintf("Match found for %d=%s:%s, at %d+%d, recursing...\n", - target_text[text_index], - unicharset.id_to_unichar(target_text[text_index]), - choice_it.data()->unichar_id() == target_text[text_index] - ? "Match" : "Ambig", - choices_pos, length); - } - SearchForText(choices, choices_pos + length, choices_length, target_text, - text_index + 1, rating + choice_rating, segmentation, - best_rating, best_segmentation); - if (applybox_debug > 3) { - tprintf("End recursion for %d=%s\n", target_text[text_index], - unicharset.id_to_unichar(target_text[text_index])); - } - } - segmentation->truncate(segmentation->size() - 1); - } -} - -/// - Counts up the labelled words and the blobs within. -/// - Deletes all unused or emptied words, counting the unused ones. -/// - Resets W_BOL and W_EOL flags correctly. -/// - Builds the rebuild_word and rebuilds the box_word and the best_choice. -void Tesseract::TidyUp(PAGE_RES* page_res) { - int ok_blob_count = 0; - int bad_blob_count = 0; - int ok_word_count = 0; - int unlabelled_words = 0; - PAGE_RES_IT pr_it(page_res); - WERD_RES* word_res; - for (; (word_res = pr_it.word()) != nullptr; pr_it.forward()) { - int ok_in_word = 0; - int blob_count = word_res->correct_text.size(); - WERD_CHOICE* word_choice = new WERD_CHOICE(word_res->uch_set, blob_count); - word_choice->set_permuter(TOP_CHOICE_PERM); - for (int c = 0; c < blob_count; ++c) { - if (word_res->correct_text[c].length() > 0) { - ++ok_in_word; - } - // Since we only need a fake word_res->best_choice, the actual - // unichar_ids do not matter. Which is fortunate, since TidyUp() - // can be called while training Tesseract, at the stage where - // unicharset is not meaningful yet. - word_choice->append_unichar_id_space_allocated( - INVALID_UNICHAR_ID, word_res->best_state[c], 1.0f, -1.0f); - } - if (ok_in_word > 0) { - ok_blob_count += ok_in_word; - bad_blob_count += word_res->correct_text.size() - ok_in_word; - word_res->LogNewRawChoice(word_choice); - word_res->LogNewCookedChoice(1, false, word_choice); - } else { - ++unlabelled_words; - if (applybox_debug > 0) { - tprintf("APPLY_BOXES: Unlabelled word at :"); - word_res->word->bounding_box().print(); - } - pr_it.DeleteCurrentWord(); - delete word_choice; - } - } - pr_it.restart_page(); - for (; (word_res = pr_it.word()) != nullptr; pr_it.forward()) { - // Denormalize back to a BoxWord. - word_res->RebuildBestState(); - word_res->SetupBoxWord(); - word_res->word->set_flag(W_BOL, pr_it.prev_row() != pr_it.row()); - word_res->word->set_flag(W_EOL, pr_it.next_row() != pr_it.row()); - } - if (applybox_debug > 0) { - tprintf(" Found %d good blobs.\n", ok_blob_count); - if (bad_blob_count > 0) { - tprintf(" Leaving %d unlabelled blobs in %d words.\n", - bad_blob_count, ok_word_count); - } - if (unlabelled_words > 0) - tprintf(" %d remaining unlabelled words deleted.\n", unlabelled_words); - } -} - -#endif // ndef DISABLED_LEGACY_ENGINE - -/** Logs a bad box by line in the box file and box coords.*/ -void Tesseract::ReportFailedBox(int boxfile_lineno, TBOX box, - const char *box_ch, const char *err_msg) { - tprintf("APPLY_BOXES: boxfile line %d/%s ((%d,%d),(%d,%d)): %s\n", - boxfile_lineno + 1, box_ch, - box.left(), box.bottom(), box.right(), box.top(), err_msg); -} - -/** Creates a fake best_choice entry in each WERD_RES with the correct text.*/ -void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) { - PAGE_RES_IT pr_it(page_res); - for (WERD_RES *word_res = pr_it.word(); word_res != nullptr; - word_res = pr_it.forward()) { - WERD_CHOICE* choice = new WERD_CHOICE(word_res->uch_set, - word_res->correct_text.size()); - for (int i = 0; i < word_res->correct_text.size(); ++i) { - // The part before the first space is the real ground truth, and the - // rest is the bounding box location and page number. - GenericVector tokens; - word_res->correct_text[i].split(' ', &tokens); - UNICHAR_ID char_id = unicharset.unichar_to_id(tokens[0].string()); - choice->append_unichar_id_space_allocated(char_id, - word_res->best_state[i], - 0.0f, 0.0f); - } - word_res->ClearWordChoices(); - word_res->LogNewRawChoice(choice); - word_res->LogNewCookedChoice(1, false, choice); - } -} - -#ifndef DISABLED_LEGACY_ENGINE - - -/// Calls #LearnWord to extract features for labelled blobs within each word. -/// Features are stored in an internal buffer. -void Tesseract::ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res) { - PAGE_RES_IT pr_it(page_res); - int word_count = 0; - for (WERD_RES *word_res = pr_it.word(); word_res != nullptr; - word_res = pr_it.forward()) { - LearnWord(fontname.string(), word_res); - ++word_count; - } - tprintf("Generated training data for %d words\n", word_count); -} - -#endif // ndef DISABLED_LEGACY_ENGINE - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/control.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/control.cpp deleted file mode 100644 index 9f2b8256..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/control.cpp +++ /dev/null @@ -1,2147 +0,0 @@ -/****************************************************************** - * File: control.cpp (Formerly control.c) - * Description: Module-independent matcher controller. - * Author: Ray Smith - * Created: Thu Apr 23 11:09:58 BST 1992 - * ReHacked: Tue Sep 22 08:42:49 BST 1992 Phil Cheatle - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include -#include // for int16_t, int32_t -#include // for fclose, fopen, FILE -#include // for clock -#include -#include "callcpp.h" -#include "control.h" -#ifndef DISABLED_LEGACY_ENGINE -#include "docqual.h" -#include "drawfx.h" -#include "fixspace.h" -#endif -#include "globals.h" -#include "lstmrecognizer.h" -#include "ocrclass.h" -#include "output.h" -#include "pageres.h" // for WERD_RES, PAGE_RES_IT, PAGE_RES, BLO... -#include "pgedit.h" -#include "reject.h" -#include "sorthelper.h" -#include "tesseractclass.h" -#include "tessvars.h" -#include "werdit.h" - -#define MIN_FONT_ROW_COUNT 8 -#define MAX_XHEIGHT_DIFF 3 - -const char* const kBackUpConfigFile = "tempconfigdata.config"; -// Min believable x-height for any text when refitting as a fraction of -// original x-height -const double kMinRefitXHeightFraction = 0.5; - - -/** - * Make a word from the selected blobs and run Tess on them. - * - * @param page_res recognise blobs - * @param selection_box within this box - */ -namespace tesseract { - -void Tesseract::recog_pseudo_word(PAGE_RES* page_res, - TBOX &selection_box) { - PAGE_RES_IT* it = make_pseudo_word(page_res, selection_box); - if (it != nullptr) { - recog_interactive(it); - it->DeleteCurrentWord(); - delete it; - } -} - -/** - * Recognize a single word in interactive mode. - * - * @param pr_it the page results iterator - */ -bool Tesseract::recog_interactive(PAGE_RES_IT* pr_it) { - int16_t char_qual; - int16_t good_char_qual; - - WordData word_data(*pr_it); - SetupWordPassN(2, &word_data); - // LSTM doesn't run on pass2, but we want to run pass2 for tesseract. - if (lstm_recognizer_ == nullptr) { -#ifndef DISABLED_LEGACY_ENGINE - classify_word_and_language(2, pr_it, &word_data); -#endif // ndef DISABLED_LEGACY_ENGINE - } else { - classify_word_and_language(1, pr_it, &word_data); - } -#ifndef DISABLED_LEGACY_ENGINE - if (tessedit_debug_quality_metrics) { - WERD_RES* word_res = pr_it->word(); - word_char_quality(word_res, pr_it->row()->row, &char_qual, &good_char_qual); - tprintf("\n%d chars; word_blob_quality: %d; outline_errs: %d; " - "char_quality: %d; good_char_quality: %d\n", - word_res->reject_map.length(), - word_blob_quality(word_res, pr_it->row()->row), - word_outline_errs(word_res), char_qual, good_char_qual); - } -#endif // ndef DISABLED_LEGACY_ENGINE - return true; -} - -// Helper function to check for a target word and handle it appropriately. -// Inspired by Jetsoft's requirement to process only single words on pass2 -// and beyond. -// If word_config is not null: -// If the word_box and target_word_box overlap, read the word_config file -// else reset to previous config data. -// return true. -// else -// If the word_box and target_word_box overlap or pass <= 1, return true. -// Note that this function uses a fixed temporary file for storing the previous -// configs, so it is neither thread-safe, nor process-safe, but the assumption -// is that it will only be used for one debug window at a time. -// -// Since this function is used for debugging (and not to change OCR results) -// set only debug params from the word config file. -bool Tesseract::ProcessTargetWord(const TBOX& word_box, - const TBOX& target_word_box, - const char* word_config, - int pass) { - if (word_config != nullptr) { - if (word_box.major_overlap(target_word_box)) { - if (backup_config_file_ == nullptr) { - backup_config_file_ = kBackUpConfigFile; - FILE* config_fp = fopen(backup_config_file_, "wb"); - if (config_fp == nullptr) { - tprintf("Error, failed to open file \"%s\"\n", backup_config_file_); - } else { - ParamUtils::PrintParams(config_fp, params()); - fclose(config_fp); - } - ParamUtils::ReadParamsFile(word_config, - SET_PARAM_CONSTRAINT_DEBUG_ONLY, - params()); - } - } else { - if (backup_config_file_ != nullptr) { - ParamUtils::ReadParamsFile(backup_config_file_, - SET_PARAM_CONSTRAINT_DEBUG_ONLY, - params()); - backup_config_file_ = nullptr; - } - } - } else if (pass > 1 && !word_box.major_overlap(target_word_box)) { - return false; - } - return true; -} - -/** If tesseract is to be run, sets the words up ready for it. */ -void Tesseract::SetupAllWordsPassN(int pass_n, - const TBOX* target_word_box, - const char* word_config, - PAGE_RES* page_res, - GenericVector* words) { - // Prepare all the words. - PAGE_RES_IT page_res_it(page_res); - for (page_res_it.restart_page(); page_res_it.word() != nullptr; - page_res_it.forward()) { - if (target_word_box == nullptr || - ProcessTargetWord(page_res_it.word()->word->bounding_box(), - *target_word_box, word_config, 1)) { - words->push_back(WordData(page_res_it)); - } - } - // Setup all the words for recognition with polygonal approximation. - for (int w = 0; w < words->size(); ++w) { - SetupWordPassN(pass_n, &(*words)[w]); - if (w > 0) (*words)[w].prev_word = &(*words)[w - 1]; - } -} - -// Sets up the single word ready for whichever engine is to be run. -void Tesseract::SetupWordPassN(int pass_n, WordData* word) { - if (pass_n == 1 || !word->word->done) { - if (pass_n == 1) { - word->word->SetupForRecognition(unicharset, this, BestPix(), - tessedit_ocr_engine_mode, nullptr, - classify_bln_numeric_mode, - textord_use_cjk_fp_model, - poly_allow_detailed_fx, - word->row, word->block); - } else if (pass_n == 2) { - // TODO(rays) Should we do this on pass1 too? - word->word->caps_height = 0.0; - if (word->word->x_height == 0.0f) - word->word->x_height = word->row->x_height(); - } - word->lang_words.truncate(0); - for (int s = 0; s <= sub_langs_.size(); ++s) { - // The sub_langs_.size() entry is for the master language. - Tesseract* lang_t = s < sub_langs_.size() ? sub_langs_[s] : this; - WERD_RES* word_res = new WERD_RES; - word_res->InitForRetryRecognition(*word->word); - word->lang_words.push_back(word_res); - // LSTM doesn't get setup for pass2. - if (pass_n == 1 || lang_t->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) { - word_res->SetupForRecognition( - lang_t->unicharset, lang_t, BestPix(), - lang_t->tessedit_ocr_engine_mode, nullptr, - lang_t->classify_bln_numeric_mode, - lang_t->textord_use_cjk_fp_model, - lang_t->poly_allow_detailed_fx, word->row, word->block); - } - } - } -} - -// Runs word recognition on all the words. -bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor, - PAGE_RES_IT* pr_it, - GenericVector* words) { - // TODO(rays) Before this loop can be parallelized (it would yield a massive - // speed-up) all remaining member globals need to be converted to local/heap - // (eg set_pass1 and set_pass2) and an intermediate adaption pass needs to be - // added. The results will be significantly different with adaption on, and - // deterioration will need investigation. - pr_it->restart_page(); - for (int w = 0; w < words->size(); ++w) { - WordData* word = &(*words)[w]; - if (w > 0) word->prev_word = &(*words)[w - 1]; - if (monitor != nullptr) { - monitor->ocr_alive = TRUE; - if (pass_n == 1) { - monitor->progress = 70 * w / words->size(); - if (monitor->progress_callback2 != nullptr) { - TBOX box = pr_it->word()->word->bounding_box(); - (*monitor->progress_callback2)(monitor, box.left(), - box.right(), box.top(), box.bottom()); - } - } else { - monitor->progress = 70 + 30 * w / words->size(); - if (monitor->progress_callback2 != nullptr) { - (*monitor->progress_callback2)(monitor, 0, 0, 0, 0); - } - } - if (monitor->deadline_exceeded() || - (monitor->cancel != nullptr && (*monitor->cancel)(monitor->cancel_this, - words->size()))) { - // Timeout. Fake out the rest of the words. - for (; w < words->size(); ++w) { - (*words)[w].word->SetupFake(unicharset); - } - return false; - } - } - if (word->word->tess_failed) { - int s; - for (s = 0; s < word->lang_words.size() && - word->lang_words[s]->tess_failed; ++s) {} - // If all are failed, skip it. Image words are skipped by this test. - if (s > word->lang_words.size()) continue; - } - // Sync pr_it with the wth WordData. - while (pr_it->word() != nullptr && pr_it->word() != word->word) - pr_it->forward(); - ASSERT_HOST(pr_it->word() != nullptr); - bool make_next_word_fuzzy = false; - if (!AnyLSTMLang() && - ReassignDiacritics(pass_n, pr_it, &make_next_word_fuzzy)) { - // Needs to be setup again to see the new outlines in the chopped_word. - SetupWordPassN(pass_n, word); - } - - classify_word_and_language(pass_n, pr_it, word); - if (tessedit_dump_choices || debug_noise_removal) { - tprintf("Pass%d: %s [%s]\n", pass_n, - word->word->best_choice->unichar_string().string(), - word->word->best_choice->debug_string().string()); - } - pr_it->forward(); - if (make_next_word_fuzzy && pr_it->word() != nullptr) { - pr_it->MakeCurrentWordFuzzy(); - } - } - return true; -} - -/** - * recog_all_words() - * - * Walk the page_res, recognizing all the words. - * If monitor is not null, it is used as a progress monitor/timeout/cancel. - * If dopasses is 0, all recognition passes are run, - * 1 just pass 1, 2 passes2 and higher. - * If target_word_box is not null, special things are done to words that - * overlap the target_word_box: - * if word_config is not null, the word config file is read for just the - * target word(s), otherwise, on pass 2 and beyond ONLY the target words - * are processed (Jetsoft modification.) - * Returns false if we cancelled prematurely. - * - * @param page_res page structure - * @param monitor progress monitor - * @param word_config word_config file - * @param target_word_box specifies just to extract a rectangle - * @param dopasses 0 - all, 1 just pass 1, 2 passes 2 and higher - */ - -bool Tesseract::recog_all_words(PAGE_RES* page_res, - ETEXT_DESC* monitor, - const TBOX* target_word_box, - const char* word_config, - int dopasses) { - PAGE_RES_IT page_res_it(page_res); - - if (tessedit_minimal_rej_pass1) { - tessedit_test_adaption.set_value (TRUE); - tessedit_minimal_rejection.set_value (TRUE); - } - - if (dopasses==0 || dopasses==1) { - page_res_it.restart_page(); - // ****************** Pass 1 ******************* - - #ifndef DISABLED_LEGACY_ENGINE - // If the adaptive classifier is full switch to one we prepared earlier, - // ie on the previous page. If the current adaptive classifier is non-empty, - // prepare a backup starting at this page, in case it fills up. Do all this - // independently for each language. - if (AdaptiveClassifierIsFull()) { - SwitchAdaptiveClassifier(); - } else if (!AdaptiveClassifierIsEmpty()) { - StartBackupAdaptiveClassifier(); - } - // Now check the sub-langs as well. - for (int i = 0; i < sub_langs_.size(); ++i) { - if (sub_langs_[i]->AdaptiveClassifierIsFull()) { - sub_langs_[i]->SwitchAdaptiveClassifier(); - } else if (!sub_langs_[i]->AdaptiveClassifierIsEmpty()) { - sub_langs_[i]->StartBackupAdaptiveClassifier(); - } - } - - #endif // ndef DISABLED_LEGACY_ENGINE - - // Set up all words ready for recognition, so that if parallelism is on - // all the input and output classes are ready to run the classifier. - GenericVector words; - SetupAllWordsPassN(1, target_word_box, word_config, page_res, &words); - #ifndef DISABLED_LEGACY_ENGINE - if (tessedit_parallelize) { - PrerecAllWordsPar(words); - } - #endif // ndef DISABLED_LEGACY_ENGINE - - stats_.word_count = words.size(); - - stats_.dict_words = 0; - stats_.doc_blob_quality = 0; - stats_.doc_outline_errs = 0; - stats_.doc_char_quality = 0; - stats_.good_char_count = 0; - stats_.doc_good_char_quality = 0; - - most_recently_used_ = this; - // Run pass 1 word recognition. - if (!RecogAllWordsPassN(1, monitor, &page_res_it, &words)) return false; - // Pass 1 post-processing. - for (page_res_it.restart_page(); page_res_it.word() != nullptr; - page_res_it.forward()) { - if (page_res_it.word()->word->flag(W_REP_CHAR)) { - fix_rep_char(&page_res_it); - continue; - } - - // Count dict words. - if (page_res_it.word()->best_choice->permuter() == USER_DAWG_PERM) - ++(stats_.dict_words); - - // Update misadaption log (we only need to do it on pass 1, since - // adaption only happens on this pass). - if (page_res_it.word()->blamer_bundle != nullptr && - page_res_it.word()->blamer_bundle->misadaption_debug().length() > 0) { - page_res->misadaption_log.push_back( - page_res_it.word()->blamer_bundle->misadaption_debug()); - } - } - } - - if (dopasses == 1) return true; - - #ifndef DISABLED_LEGACY_ENGINE - - // ****************** Pass 2 ******************* - if (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption && - AnyTessLang()) { - page_res_it.restart_page(); - GenericVector words; - SetupAllWordsPassN(2, target_word_box, word_config, page_res, &words); - if (tessedit_parallelize) { - PrerecAllWordsPar(words); - } - most_recently_used_ = this; - // Run pass 2 word recognition. - if (!RecogAllWordsPassN(2, monitor, &page_res_it, &words)) return false; - } - - // The next passes are only required for Tess-only. - if (AnyTessLang() && !AnyLSTMLang()) { - // ****************** Pass 3 ******************* - // Fix fuzzy spaces. - set_global_loc_code(LOC_FUZZY_SPACE); - - if (!tessedit_test_adaption && tessedit_fix_fuzzy_spaces - && !tessedit_word_for_word && !right_to_left()) - fix_fuzzy_spaces(monitor, stats_.word_count, page_res); - - // ****************** Pass 4 ******************* - if (tessedit_enable_dict_correction) dictionary_correction_pass(page_res); - if (tessedit_enable_bigram_correction) bigram_correction_pass(page_res); - - // ****************** Pass 5,6 ******************* - rejection_passes(page_res, monitor, target_word_box, word_config); - - // ****************** Pass 8 ******************* - font_recognition_pass(page_res); - - // ****************** Pass 9 ******************* - // Check the correctness of the final results. - blamer_pass(page_res); - script_pos_pass(page_res); - } - - #endif // ndef DISABLED_LEGACY_ENGINE - - // Write results pass. - set_global_loc_code(LOC_WRITE_RESULTS); - // This is now redundant, but retained commented so show how to obtain - // bounding boxes and style information. - - #ifndef DISABLED_LEGACY_ENGINE - // changed by jetsoft - // needed for dll to output memory structure - if ((dopasses == 0 || dopasses == 2) && (monitor || tessedit_write_unlv)) - output_pass(page_res_it, target_word_box); - // end jetsoft - #endif //ndef DISABLED_LEGACY_ENGINE - - const PageSegMode pageseg_mode = static_cast( - static_cast(tessedit_pageseg_mode)); - textord_.CleanupSingleRowResult(pageseg_mode, page_res); - - // Remove empty words, as these mess up the result iterators. - for (page_res_it.restart_page(); page_res_it.word() != nullptr; - page_res_it.forward()) { - const WERD_RES* word = page_res_it.word(); - const POLY_BLOCK* pb = page_res_it.block()->block != nullptr - ? page_res_it.block()->block->pdblk.poly_block() - : nullptr; - if (word->best_choice == nullptr || word->best_choice->length() == 0 || - (word->best_choice->IsAllSpaces() && (pb == nullptr || pb->IsText()))) { - page_res_it.DeleteCurrentWord(); - } - } - - if (monitor != nullptr) { - monitor->progress = 100; - } - return true; -} - -#ifndef DISABLED_LEGACY_ENGINE - -void Tesseract::bigram_correction_pass(PAGE_RES *page_res) { - PAGE_RES_IT word_it(page_res); - - WERD_RES *w_prev = nullptr; - WERD_RES *w = word_it.word(); - while (true) { - w_prev = w; - while (word_it.forward() != nullptr && - (!word_it.word() || word_it.word()->part_of_combo)) { - // advance word_it, skipping over parts of combos - } - if (!word_it.word()) break; - w = word_it.word(); - if (!w || !w_prev || w->uch_set != w_prev->uch_set) { - continue; - } - if (w_prev->word->flag(W_REP_CHAR) || w->word->flag(W_REP_CHAR)) { - if (tessedit_bigram_debug) { - tprintf("Skipping because one of the words is W_REP_CHAR\n"); - } - continue; - } - // Two words sharing the same language model, excellent! - GenericVector overrides_word1; - GenericVector overrides_word2; - - const STRING orig_w1_str = w_prev->best_choice->unichar_string(); - const STRING orig_w2_str = w->best_choice->unichar_string(); - WERD_CHOICE prev_best(w->uch_set); - { - int w1start, w1end; - w_prev->best_choice->GetNonSuperscriptSpan(&w1start, &w1end); - prev_best = w_prev->best_choice->shallow_copy(w1start, w1end); - } - WERD_CHOICE this_best(w->uch_set); - { - int w2start, w2end; - w->best_choice->GetNonSuperscriptSpan(&w2start, &w2end); - this_best = w->best_choice->shallow_copy(w2start, w2end); - } - - if (w->tesseract->getDict().valid_bigram(prev_best, this_best)) { - if (tessedit_bigram_debug) { - tprintf("Top choice \"%s %s\" verified by bigram model.\n", - orig_w1_str.string(), orig_w2_str.string()); - } - continue; - } - if (tessedit_bigram_debug > 2) { - tprintf("Examining alt choices for \"%s %s\".\n", - orig_w1_str.string(), orig_w2_str.string()); - } - if (tessedit_bigram_debug > 1) { - if (!w_prev->best_choices.singleton()) { - w_prev->PrintBestChoices(); - } - if (!w->best_choices.singleton()) { - w->PrintBestChoices(); - } - } - float best_rating = 0.0; - int best_idx = 0; - WERD_CHOICE_IT prev_it(&w_prev->best_choices); - for (prev_it.mark_cycle_pt(); !prev_it.cycled_list(); prev_it.forward()) { - WERD_CHOICE *p1 = prev_it.data(); - WERD_CHOICE strip1(w->uch_set); - { - int p1start, p1end; - p1->GetNonSuperscriptSpan(&p1start, &p1end); - strip1 = p1->shallow_copy(p1start, p1end); - } - WERD_CHOICE_IT w_it(&w->best_choices); - for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { - WERD_CHOICE *p2 = w_it.data(); - WERD_CHOICE strip2(w->uch_set); - { - int p2start, p2end; - p2->GetNonSuperscriptSpan(&p2start, &p2end); - strip2 = p2->shallow_copy(p2start, p2end); - } - if (w->tesseract->getDict().valid_bigram(strip1, strip2)) { - overrides_word1.push_back(p1); - overrides_word2.push_back(p2); - if (overrides_word1.size() == 1 || - p1->rating() + p2->rating() < best_rating) { - best_rating = p1->rating() + p2->rating(); - best_idx = overrides_word1.size() - 1; - } - } - } - } - if (!overrides_word1.empty()) { - // Excellent, we have some bigram matches. - if (EqualIgnoringCaseAndTerminalPunct(*w_prev->best_choice, - *overrides_word1[best_idx]) && - EqualIgnoringCaseAndTerminalPunct(*w->best_choice, - *overrides_word2[best_idx])) { - if (tessedit_bigram_debug > 1) { - tprintf("Top choice \"%s %s\" verified (sans case) by bigram " - "model.\n", orig_w1_str.string(), orig_w2_str.string()); - } - continue; - } - const STRING new_w1_str = overrides_word1[best_idx]->unichar_string(); - const STRING new_w2_str = overrides_word2[best_idx]->unichar_string(); - if (new_w1_str != orig_w1_str) { - w_prev->ReplaceBestChoice(overrides_word1[best_idx]); - } - if (new_w2_str != orig_w2_str) { - w->ReplaceBestChoice(overrides_word2[best_idx]); - } - if (tessedit_bigram_debug > 0) { - STRING choices_description; - int num_bigram_choices - = overrides_word1.size() * overrides_word2.size(); - if (num_bigram_choices == 1) { - choices_description = "This was the unique bigram choice."; - } else { - if (tessedit_bigram_debug > 1) { - STRING bigrams_list; - const int kMaxChoicesToPrint = 20; - for (int i = 0; i < overrides_word1.size() && - i < kMaxChoicesToPrint; i++) { - if (i > 0) { bigrams_list += ", "; } - WERD_CHOICE *p1 = overrides_word1[i]; - WERD_CHOICE *p2 = overrides_word2[i]; - bigrams_list += p1->unichar_string() + " " + p2->unichar_string(); - } - choices_description = "There were many choices: {"; - choices_description += bigrams_list; - choices_description += "}"; - } else { - choices_description.add_str_int("There were ", num_bigram_choices); - choices_description += " compatible bigrams."; - } - } - tprintf("Replaced \"%s %s\" with \"%s %s\" with bigram model. %s\n", - orig_w1_str.string(), orig_w2_str.string(), - new_w1_str.string(), new_w2_str.string(), - choices_description.string()); - } - } - } -} - -void Tesseract::rejection_passes(PAGE_RES* page_res, - ETEXT_DESC* monitor, - const TBOX* target_word_box, - const char* word_config) { - PAGE_RES_IT page_res_it(page_res); - // ****************** Pass 5 ******************* - // Gather statistics on rejects. - int word_index = 0; - while (!tessedit_test_adaption && page_res_it.word() != nullptr) { - set_global_loc_code(LOC_MM_ADAPT); - WERD_RES* word = page_res_it.word(); - word_index++; - if (monitor != nullptr) { - monitor->ocr_alive = TRUE; - monitor->progress = 95 + 5 * word_index / stats_.word_count; - } - if (word->rebuild_word == nullptr) { - // Word was not processed by tesseract. - page_res_it.forward(); - continue; - } - check_debug_pt(word, 70); - - // changed by jetsoft - // specific to its needs to extract one word when need - if (target_word_box && - !ProcessTargetWord(word->word->bounding_box(), - *target_word_box, word_config, 4)) { - page_res_it.forward(); - continue; - } - // end jetsoft - - page_res_it.rej_stat_word(); - const int chars_in_word = word->reject_map.length(); - const int rejects_in_word = word->reject_map.reject_count(); - - const int blob_quality = word_blob_quality(word, page_res_it.row()->row); - stats_.doc_blob_quality += blob_quality; - const int outline_errs = word_outline_errs(word); - stats_.doc_outline_errs += outline_errs; - int16_t all_char_quality; - int16_t accepted_all_char_quality; - word_char_quality(word, page_res_it.row()->row, - &all_char_quality, &accepted_all_char_quality); - stats_.doc_char_quality += all_char_quality; - const uint8_t permuter_type = word->best_choice->permuter(); - if ((permuter_type == SYSTEM_DAWG_PERM) || - (permuter_type == FREQ_DAWG_PERM) || - (permuter_type == USER_DAWG_PERM)) { - stats_.good_char_count += chars_in_word - rejects_in_word; - stats_.doc_good_char_quality += accepted_all_char_quality; - } - check_debug_pt(word, 80); - if (tessedit_reject_bad_qual_wds && - (blob_quality == 0) && (outline_errs >= chars_in_word)) - word->reject_map.rej_word_bad_quality(); - check_debug_pt(word, 90); - page_res_it.forward(); - } - - if (tessedit_debug_quality_metrics) { - tprintf - ("QUALITY: num_chs= %d num_rejs= %d %5.3f blob_qual= %d %5.3f" - " outline_errs= %d %5.3f char_qual= %d %5.3f good_ch_qual= %d %5.3f\n", - page_res->char_count, page_res->rej_count, - page_res->rej_count / static_cast(page_res->char_count), - stats_.doc_blob_quality, - stats_.doc_blob_quality / static_cast(page_res->char_count), - stats_.doc_outline_errs, - stats_.doc_outline_errs / static_cast(page_res->char_count), - stats_.doc_char_quality, - stats_.doc_char_quality / static_cast(page_res->char_count), - stats_.doc_good_char_quality, - (stats_.good_char_count > 0) ? - (stats_.doc_good_char_quality / - static_cast(stats_.good_char_count)) : 0.0); - } - bool good_quality_doc = - ((page_res->rej_count / static_cast(page_res->char_count)) <= - quality_rej_pc) && - (stats_.doc_blob_quality / static_cast(page_res->char_count) >= - quality_blob_pc) && - (stats_.doc_outline_errs / static_cast(page_res->char_count) <= - quality_outline_pc) && - (stats_.doc_char_quality / static_cast(page_res->char_count) >= - quality_char_pc); - - // ****************** Pass 6 ******************* - // Do whole document or whole block rejection pass - if (!tessedit_test_adaption) { - set_global_loc_code(LOC_DOC_BLK_REJ); - quality_based_rejection(page_res_it, good_quality_doc); - } -} - -#endif // ndef DISABLED_LEGACY_ENGINE - -void Tesseract::blamer_pass(PAGE_RES* page_res) { - if (!wordrec_run_blamer) return; - PAGE_RES_IT page_res_it(page_res); - for (page_res_it.restart_page(); page_res_it.word() != nullptr; - page_res_it.forward()) { - WERD_RES *word = page_res_it.word(); - BlamerBundle::LastChanceBlame(wordrec_debug_blamer, word); - page_res->blame_reasons[word->blamer_bundle->incorrect_result_reason()]++; - } - tprintf("Blame reasons:\n"); - for (int bl = 0; bl < IRR_NUM_REASONS; ++bl) { - tprintf("%s %d\n", BlamerBundle::IncorrectReasonName( - static_cast(bl)), - page_res->blame_reasons[bl]); - } - if (page_res->misadaption_log.length() > 0) { - tprintf("Misadaption log:\n"); - for (int i = 0; i < page_res->misadaption_log.length(); ++i) { - tprintf("%s\n", page_res->misadaption_log[i].string()); - } - } -} - -// Sets script positions and detects smallcaps on all output words. -void Tesseract::script_pos_pass(PAGE_RES* page_res) { - PAGE_RES_IT page_res_it(page_res); - for (page_res_it.restart_page(); page_res_it.word() != nullptr; - page_res_it.forward()) { - WERD_RES* word = page_res_it.word(); - if (word->word->flag(W_REP_CHAR)) { - page_res_it.forward(); - continue; - } - const float x_height = page_res_it.block()->block->x_height(); - float word_x_height = word->x_height; - if (word_x_height < word->best_choice->min_x_height() || - word_x_height > word->best_choice->max_x_height()) { - word_x_height = (word->best_choice->min_x_height() + - word->best_choice->max_x_height()) / 2.0f; - } - // Test for small caps. Word capheight must be close to block xheight, - // and word must contain no lower case letters, and at least one upper case. - const double small_cap_xheight = x_height * kXHeightCapRatio; - const double small_cap_delta = (x_height - small_cap_xheight) / 2.0; - if (word->uch_set->script_has_xheight() && - small_cap_xheight - small_cap_delta <= word_x_height && - word_x_height <= small_cap_xheight + small_cap_delta) { - // Scan for upper/lower. - int num_upper = 0; - int num_lower = 0; - for (int i = 0; i < word->best_choice->length(); ++i) { - if (word->uch_set->get_isupper(word->best_choice->unichar_id(i))) - ++num_upper; - else if (word->uch_set->get_islower(word->best_choice->unichar_id(i))) - ++num_lower; - } - if (num_upper > 0 && num_lower == 0) - word->small_caps = true; - } - word->SetScriptPositions(); - } -} - -// Helper finds the gap between the index word and the next. -static void WordGap(const PointerVector& words, int index, int* right, - int* next_left) { - *right = -INT32_MAX; - *next_left = INT32_MAX; - if (index < words.size()) { - *right = words[index]->word->bounding_box().right(); - if (index + 1 < words.size()) - *next_left = words[index + 1]->word->bounding_box().left(); - } -} - -// Factored helper computes the rating, certainty, badness and validity of -// the permuter of the words in [first_index, end_index). -static void EvaluateWordSpan(const PointerVector& words, - int first_index, int end_index, float* rating, - float* certainty, bool* bad, - bool* valid_permuter) { - if (end_index <= first_index) { - *bad = true; - *valid_permuter = false; - } - for (int index = first_index; index < end_index && index < words.size(); - ++index) { - WERD_CHOICE* choice = words[index]->best_choice; - if (choice == nullptr) { - *bad = true; - } else { - *rating += choice->rating(); - *certainty = std::min(*certainty, choice->certainty()); - if (!Dict::valid_word_permuter(choice->permuter(), false)) - *valid_permuter = false; - } - } -} - -// Helper chooses the best combination of words, transferring good ones from -// new_words to best_words. To win, a new word must have (better rating and -// certainty) or (better permuter status and rating within rating ratio and -// certainty within certainty margin) than current best. -// All the new_words are consumed (moved to best_words or deleted.) -// The return value is the number of new_words used minus the number of -// best_words that remain in the output. -static int SelectBestWords(double rating_ratio, - double certainty_margin, - bool debug, - PointerVector* new_words, - PointerVector* best_words) { - // Process the smallest groups of words that have an overlapping word - // boundary at the end. - GenericVector out_words; - // Index into each word vector (best, new). - int b = 0, n = 0; - int num_best = 0, num_new = 0; - while (b < best_words->size() || n < new_words->size()) { - // Start of the current run in each. - int start_b = b, start_n = n; - while (b < best_words->size() || n < new_words->size()) { - int b_right = -INT32_MAX; - int next_b_left = INT32_MAX; - WordGap(*best_words, b, &b_right, &next_b_left); - int n_right = -INT32_MAX; - int next_n_left = INT32_MAX; - WordGap(*new_words, n, &n_right, &next_n_left); - if (std::max(b_right, n_right) < std::min(next_b_left, next_n_left)) { - // The word breaks overlap. [start_b,b] and [start_n, n] match. - break; - } - // Keep searching for the matching word break. - if ((b_right < n_right && b < best_words->size()) || - n == new_words->size()) - ++b; - else - ++n; - } - // Rating of the current run in each. - float b_rating = 0.0f, n_rating = 0.0f; - // Certainty of the current run in each. - float b_certainty = 0.0f, n_certainty = 0.0f; - // True if any word is missing its best choice. - bool b_bad = false, n_bad = false; - // True if all words have a valid permuter. - bool b_valid_permuter = true, n_valid_permuter = true; - const int end_b = b < best_words->size() ? b + 1 : b; - const int end_n = n < new_words->size() ? n + 1 : n; - EvaluateWordSpan(*best_words, start_b, end_b, &b_rating, &b_certainty, - &b_bad, &b_valid_permuter); - EvaluateWordSpan(*new_words, start_n, end_n, &n_rating, &n_certainty, - &n_bad, &n_valid_permuter); - bool new_better = false; - if (!n_bad && (b_bad || (n_certainty > b_certainty && - n_rating < b_rating) || - (!b_valid_permuter && n_valid_permuter && - n_rating < b_rating * rating_ratio && - n_certainty > b_certainty - certainty_margin))) { - // New is better. - for (int i = start_n; i < end_n; ++i) { - out_words.push_back((*new_words)[i]); - (*new_words)[i] = nullptr; - ++num_new; - } - new_better = true; - } else if (!b_bad) { - // Current best is better. - for (int i = start_b; i < end_b; ++i) { - out_words.push_back((*best_words)[i]); - (*best_words)[i] = nullptr; - ++num_best; - } - } - if (debug) { - tprintf("%d new words %s than %d old words: r: %g v %g c: %g v %g" - " valid dict: %d v %d\n", - end_n - start_n, new_better ? "better" : "worse", - end_b - start_b, n_rating, b_rating, - n_certainty, b_certainty, n_valid_permuter, b_valid_permuter); - } - // Move on to the next group. - b = end_b; - n = end_n; - } - // Transfer from out_words to best_words. - best_words->clear(); - for (int i = 0; i < out_words.size(); ++i) - best_words->push_back(out_words[i]); - return num_new - num_best; -} - -// Helper to recognize the word using the given (language-specific) tesseract. -// Returns positive if this recognizer found more new best words than the -// number kept from best_words. -int Tesseract::RetryWithLanguage(const WordData& word_data, - WordRecognizer recognizer, bool debug, - WERD_RES** in_word, - PointerVector* best_words) { - if (debug) { - tprintf("Trying word using lang %s, oem %d\n", - lang.string(), static_cast(tessedit_ocr_engine_mode)); - } - // Run the recognizer on the word. - PointerVector new_words; - (this->*recognizer)(word_data, in_word, &new_words); - if (new_words.empty()) { - // Transfer input word to new_words, as the classifier must have put - // the result back in the input. - new_words.push_back(*in_word); - *in_word = nullptr; - } - if (debug) { - for (int i = 0; i < new_words.size(); ++i) - new_words[i]->DebugTopChoice("Lang result"); - } - // Initial version is a bit of a hack based on better certainty and rating - // or a dictionary vs non-dictionary word. - return SelectBestWords(classify_max_rating_ratio, - classify_max_certainty_margin, - debug, &new_words, best_words); -} - -// Helper returns true if all the words are acceptable. -static bool WordsAcceptable(const PointerVector& words) { - for (int w = 0; w < words.size(); ++w) { - if (words[w]->tess_failed || !words[w]->tess_accepted) return false; - } - return true; -} - -// Moves good-looking "noise"/diacritics from the reject list to the main -// blob list on the current word. Returns true if anything was done, and -// sets make_next_word_fuzzy if blob(s) were added to the end of the word. -bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT* pr_it, - bool* make_next_word_fuzzy) { -#ifdef DISABLED_LEGACY_ENGINE - return false; -#else - *make_next_word_fuzzy = false; - WERD* real_word = pr_it->word()->word; - if (real_word->rej_cblob_list()->empty() || - real_word->cblob_list()->empty() || - real_word->rej_cblob_list()->length() > noise_maxperword) - return false; - real_word->rej_cblob_list()->sort(&C_BLOB::SortByXMiddle); - // Get the noise outlines into a vector with matching bool map. - GenericVector outlines; - real_word->GetNoiseOutlines(&outlines); - GenericVector word_wanted; - GenericVector overlapped_any_blob; - GenericVector target_blobs; - AssignDiacriticsToOverlappingBlobs(outlines, pass, real_word, pr_it, - &word_wanted, &overlapped_any_blob, - &target_blobs); - // Filter the outlines that overlapped any blob and put them into the word - // now. This simplifies the remaining task and also makes it more accurate - // as it has more completed blobs to work on. - GenericVector wanted; - GenericVector wanted_blobs; - GenericVector wanted_outlines; - int num_overlapped = 0; - int num_overlapped_used = 0; - for (int i = 0; i < overlapped_any_blob.size(); ++i) { - if (overlapped_any_blob[i]) { - ++num_overlapped; - if (word_wanted[i]) ++num_overlapped_used; - wanted.push_back(word_wanted[i]); - wanted_blobs.push_back(target_blobs[i]); - wanted_outlines.push_back(outlines[i]); - outlines[i] = nullptr; - } - } - real_word->AddSelectedOutlines(wanted, wanted_blobs, wanted_outlines, nullptr); - AssignDiacriticsToNewBlobs(outlines, pass, real_word, pr_it, &word_wanted, - &target_blobs); - int non_overlapped = 0; - int non_overlapped_used = 0; - for (int i = 0; i < word_wanted.size(); ++i) { - if (word_wanted[i]) ++non_overlapped_used; - if (outlines[i] != nullptr) ++non_overlapped_used; - } - if (debug_noise_removal) { - tprintf("Used %d/%d overlapped %d/%d non-overlaped diacritics on word:", - num_overlapped_used, num_overlapped, non_overlapped_used, - non_overlapped); - real_word->bounding_box().print(); - } - // Now we have decided which outlines we want, put them into the real_word. - if (real_word->AddSelectedOutlines(word_wanted, target_blobs, outlines, - make_next_word_fuzzy)) { - pr_it->MakeCurrentWordFuzzy(); - } - // TODO(rays) Parts of combos have a deep copy of the real word, and need - // to have their noise outlines moved/assigned in the same way!! - return num_overlapped_used != 0 || non_overlapped_used != 0; -#endif // ndef DISABLED_LEGACY_ENGINE -} - -// Attempts to put noise/diacritic outlines into the blobs that they overlap. -// Input: a set of noisy outlines that probably belong to the real_word. -// Output: word_wanted indicates which outlines are to be assigned to a blob, -// target_blobs indicates which to assign to, and overlapped_any_blob is -// true for all outlines that overlapped a blob. -void Tesseract::AssignDiacriticsToOverlappingBlobs( - const GenericVector& outlines, int pass, WERD* real_word, - PAGE_RES_IT* pr_it, GenericVector* word_wanted, - GenericVector* overlapped_any_blob, - GenericVector* target_blobs) { -#ifndef DISABLED_LEGACY_ENGINE - GenericVector blob_wanted; - word_wanted->init_to_size(outlines.size(), false); - overlapped_any_blob->init_to_size(outlines.size(), false); - target_blobs->init_to_size(outlines.size(), nullptr); - // For each real blob, find the outlines that seriously overlap it. - // A single blob could be several merged characters, so there can be quite - // a few outlines overlapping, and the full engine needs to be used to chop - // and join to get a sensible result. - C_BLOB_IT blob_it(real_word->cblob_list()); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - C_BLOB* blob = blob_it.data(); - const TBOX blob_box = blob->bounding_box(); - blob_wanted.init_to_size(outlines.size(), false); - int num_blob_outlines = 0; - for (int i = 0; i < outlines.size(); ++i) { - if (blob_box.major_x_overlap(outlines[i]->bounding_box()) && - !(*word_wanted)[i]) { - blob_wanted[i] = true; - (*overlapped_any_blob)[i] = true; - ++num_blob_outlines; - } - } - if (debug_noise_removal) { - tprintf("%d noise outlines overlap blob at:", num_blob_outlines); - blob_box.print(); - } - // If any outlines overlap the blob, and not too many, classify the blob - // (using the full engine, languages and all), and choose the maximal - // combination of outlines that doesn't hurt the end-result classification - // by too much. Mark them as wanted. - if (0 < num_blob_outlines && num_blob_outlines < noise_maxperblob) { - if (SelectGoodDiacriticOutlines(pass, noise_cert_basechar, pr_it, blob, - outlines, num_blob_outlines, - &blob_wanted)) { - for (int i = 0; i < blob_wanted.size(); ++i) { - if (blob_wanted[i]) { - // Claim the outline and record where it is going. - (*word_wanted)[i] = true; - (*target_blobs)[i] = blob; - } - } - } - } - } -#endif // ndef DISABLED_LEGACY_ENGINE -} - -// Attempts to assign non-overlapping outlines to their nearest blobs or -// make new blobs out of them. -void Tesseract::AssignDiacriticsToNewBlobs( - const GenericVector& outlines, int pass, WERD* real_word, - PAGE_RES_IT* pr_it, GenericVector* word_wanted, - GenericVector* target_blobs) { -#ifndef DISABLED_LEGACY_ENGINE - GenericVector blob_wanted; - word_wanted->init_to_size(outlines.size(), false); - target_blobs->init_to_size(outlines.size(), nullptr); - // Check for outlines that need to be turned into stand-alone blobs. - for (int i = 0; i < outlines.size(); ++i) { - if (outlines[i] == nullptr) continue; - // Get a set of adjacent outlines that don't overlap any existing blob. - blob_wanted.init_to_size(outlines.size(), false); - int num_blob_outlines = 0; - TBOX total_ol_box(outlines[i]->bounding_box()); - while (i < outlines.size() && outlines[i] != nullptr) { - blob_wanted[i] = true; - total_ol_box += outlines[i]->bounding_box(); - ++i; - ++num_blob_outlines; - } - // Find the insertion point. - C_BLOB_IT blob_it(real_word->cblob_list()); - while (!blob_it.at_last() && - blob_it.data_relative(1)->bounding_box().left() <= - total_ol_box.left()) { - blob_it.forward(); - } - // Choose which combination of them we actually want and where to put - // them. - if (debug_noise_removal) - tprintf("Num blobless outlines = %d\n", num_blob_outlines); - C_BLOB* left_blob = blob_it.data(); - TBOX left_box = left_blob->bounding_box(); - C_BLOB* right_blob = blob_it.at_last() ? nullptr : blob_it.data_relative(1); - if ((left_box.x_overlap(total_ol_box) || right_blob == nullptr || - !right_blob->bounding_box().x_overlap(total_ol_box)) && - SelectGoodDiacriticOutlines(pass, noise_cert_disjoint, pr_it, left_blob, - outlines, num_blob_outlines, - &blob_wanted)) { - if (debug_noise_removal) tprintf("Added to left blob\n"); - for (int j = 0; j < blob_wanted.size(); ++j) { - if (blob_wanted[j]) { - (*word_wanted)[j] = true; - (*target_blobs)[j] = left_blob; - } - } - } else if (right_blob != nullptr && - (!left_box.x_overlap(total_ol_box) || - right_blob->bounding_box().x_overlap(total_ol_box)) && - SelectGoodDiacriticOutlines(pass, noise_cert_disjoint, pr_it, - right_blob, outlines, - num_blob_outlines, &blob_wanted)) { - if (debug_noise_removal) tprintf("Added to right blob\n"); - for (int j = 0; j < blob_wanted.size(); ++j) { - if (blob_wanted[j]) { - (*word_wanted)[j] = true; - (*target_blobs)[j] = right_blob; - } - } - } else if (SelectGoodDiacriticOutlines(pass, noise_cert_punc, pr_it, nullptr, - outlines, num_blob_outlines, - &blob_wanted)) { - if (debug_noise_removal) tprintf("Fitted between blobs\n"); - for (int j = 0; j < blob_wanted.size(); ++j) { - if (blob_wanted[j]) { - (*word_wanted)[j] = true; - (*target_blobs)[j] = nullptr; - } - } - } - } -#endif // ndef DISABLED_LEGACY_ENGINE -} - -// Starting with ok_outlines set to indicate which outlines overlap the blob, -// chooses the optimal set (approximately) and returns true if any outlines -// are desired, in which case ok_outlines indicates which ones. -bool Tesseract::SelectGoodDiacriticOutlines( - int pass, float certainty_threshold, PAGE_RES_IT* pr_it, C_BLOB* blob, - const GenericVector& outlines, int num_outlines, - GenericVector* ok_outlines) { -#ifndef DISABLED_LEGACY_ENGINE - STRING best_str; - float target_cert = certainty_threshold; - if (blob != nullptr) { - float target_c2; - target_cert = ClassifyBlobAsWord(pass, pr_it, blob, &best_str, &target_c2); - if (debug_noise_removal) { - tprintf("No Noise blob classified as %s=%g(%g) at:", best_str.string(), - target_cert, target_c2); - blob->bounding_box().print(); - } - target_cert -= (target_cert - certainty_threshold) * noise_cert_factor; - } - GenericVector test_outlines = *ok_outlines; - // Start with all the outlines in. - STRING all_str; - GenericVector best_outlines = *ok_outlines; - float best_cert = ClassifyBlobPlusOutlines(test_outlines, outlines, pass, - pr_it, blob, &all_str); - if (debug_noise_removal) { - TBOX ol_box; - for (int i = 0; i < test_outlines.size(); ++i) { - if (test_outlines[i]) ol_box += outlines[i]->bounding_box(); - } - tprintf("All Noise blob classified as %s=%g, delta=%g at:", - all_str.string(), best_cert, best_cert - target_cert); - ol_box.print(); - } - // Iteratively zero out the bit that improves the certainty the most, until - // we get past the threshold, have zero bits, or fail to improve. - int best_index = 0; // To zero out. - while (num_outlines > 1 && best_index >= 0 && - (blob == nullptr || best_cert < target_cert || blob != nullptr)) { - // Find the best bit to zero out. - best_index = -1; - for (int i = 0; i < outlines.size(); ++i) { - if (test_outlines[i]) { - test_outlines[i] = false; - STRING str; - float cert = ClassifyBlobPlusOutlines(test_outlines, outlines, pass, - pr_it, blob, &str); - if (debug_noise_removal) { - TBOX ol_box; - for (int j = 0; j < outlines.size(); ++j) { - if (test_outlines[j]) ol_box += outlines[j]->bounding_box(); - tprintf("%d", test_outlines[j]); - } - tprintf(" blob classified as %s=%g, delta=%g) at:", str.string(), - cert, cert - target_cert); - ol_box.print(); - } - if (cert > best_cert) { - best_cert = cert; - best_index = i; - best_outlines = test_outlines; - } - test_outlines[i] = true; - } - } - if (best_index >= 0) { - test_outlines[best_index] = false; - --num_outlines; - } - } - if (best_cert >= target_cert) { - // Save the best combination. - *ok_outlines = best_outlines; - if (debug_noise_removal) { - tprintf("%s noise combination ", blob ? "Adding" : "New"); - for (int i = 0; i < best_outlines.size(); ++i) { - tprintf("%d", best_outlines[i]); - } - tprintf(" yields certainty %g, beating target of %g\n", best_cert, - target_cert); - } - return true; - } -#endif // ndef DISABLED_LEGACY_ENGINE - return false; -} - -// Classifies the given blob plus the outlines flagged by ok_outlines, undoes -// the inclusion of the outlines, and returns the certainty of the raw choice. -float Tesseract::ClassifyBlobPlusOutlines( - const GenericVector& ok_outlines, - const GenericVector& outlines, int pass_n, PAGE_RES_IT* pr_it, - C_BLOB* blob, STRING* best_str) { -#ifndef DISABLED_LEGACY_ENGINE - C_OUTLINE_IT ol_it; - C_OUTLINE* first_to_keep = nullptr; - C_BLOB* local_blob = nullptr; - if (blob != nullptr) { - // Add the required outlines to the blob. - ol_it.set_to_list(blob->out_list()); - first_to_keep = ol_it.data(); - } - for (int i = 0; i < ok_outlines.size(); ++i) { - if (ok_outlines[i]) { - // This outline is to be added. - if (blob == nullptr) { - local_blob = new C_BLOB(outlines[i]); - blob = local_blob; - ol_it.set_to_list(blob->out_list()); - } else { - ol_it.add_before_stay_put(outlines[i]); - } - } - } - float c2; - float cert = ClassifyBlobAsWord(pass_n, pr_it, blob, best_str, &c2); - ol_it.move_to_first(); - if (first_to_keep == nullptr) { - // We created blob. Empty its outlines and delete it. - for (; !ol_it.empty(); ol_it.forward()) ol_it.extract(); - delete local_blob; - cert = -c2; - } else { - // Remove the outlines that we put in. - for (; ol_it.data() != first_to_keep; ol_it.forward()) { - ol_it.extract(); - } - } - return cert; -#else - return 0.1; -#endif // ndef DISABLED_LEGACY_ENGINE -} - -// Classifies the given blob (part of word_data->word->word) as an individual -// word, using languages, chopper etc, returning only the certainty of the -// best raw choice, and undoing all the work done to fake out the word. -float Tesseract::ClassifyBlobAsWord(int pass_n, PAGE_RES_IT* pr_it, - C_BLOB* blob, STRING* best_str, float* c2) { -#ifndef DISABLED_LEGACY_ENGINE - WERD* real_word = pr_it->word()->word; - WERD* word = real_word->ConstructFromSingleBlob( - real_word->flag(W_BOL), real_word->flag(W_EOL), C_BLOB::deep_copy(blob)); - WERD_RES* word_res = pr_it->InsertSimpleCloneWord(*pr_it->word(), word); - // Get a new iterator that points to the new word. - PAGE_RES_IT it(pr_it->page_res); - while (it.word() != word_res && it.word() != nullptr) it.forward(); - ASSERT_HOST(it.word() == word_res); - WordData wd(it); - // Force full initialization. - SetupWordPassN(1, &wd); - classify_word_and_language(pass_n, &it, &wd); - if (debug_noise_removal) { - if (wd.word->raw_choice != NULL) { - tprintf("word xheight=%g, row=%g, range=[%g,%g]\n", word_res->x_height, - wd.row->x_height(), wd.word->raw_choice->min_x_height(), - wd.word->raw_choice->max_x_height()); - } else { - tprintf("Got word with null raw choice xheight=%g, row=%g\n", word_res->x_height, - wd.row->x_height()); - } - } - float cert = 0.0f; - if (wd.word->raw_choice != NULL) { // This probably shouldn't happen, but... - cert = wd.word->raw_choice->certainty(); - float rat = wd.word->raw_choice->rating(); - *c2 = rat > 0.0f ? cert * cert / rat : 0.0f; - *best_str = wd.word->raw_choice->unichar_string(); - } else { - *c2 = 0.0f; - *best_str = ""; - } - it.DeleteCurrentWord(); - pr_it->ResetWordIterator(); - return cert; -#else - return 0.1; -#endif // ndef DISABLED_LEGACY_ENGINE -} - -// Generic function for classifying a word. Can be used either for pass1 or -// pass2 according to the function passed to recognizer. -// word_data holds the word to be recognized, and its block and row, and -// pr_it points to the word as well, in case we are running LSTM and it wants -// to output multiple words. -// Recognizes in the current language, and if successful that is all. -// If recognition was not successful, tries all available languages until -// it gets a successful result or runs out of languages. Keeps the best result. -void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT* pr_it, - WordData* word_data) { -#ifdef DISABLED_LEGACY_ENGINE - WordRecognizer recognizer = &Tesseract::classify_word_pass1; -#else - WordRecognizer recognizer = pass_n == 1 ? &Tesseract::classify_word_pass1 - : &Tesseract::classify_word_pass2; -#endif // def DISABLED_LEGACY_ENGINE - - // Best result so far. - PointerVector best_words; - // Points to the best result. May be word or in lang_words. - const WERD_RES* word = word_data->word; - clock_t start_t = clock(); - const bool debug = classify_debug_level > 0 || multilang_debug_level > 0; - if (debug) { - tprintf("%s word with lang %s at:", - word->done ? "Already done" : "Processing", - most_recently_used_->lang.string()); - word->word->bounding_box().print(); - } - if (word->done) { - // If done on pass1, leave it as-is. - if (!word->tess_failed) - most_recently_used_ = word->tesseract; - return; - } - int sub = sub_langs_.size(); - if (most_recently_used_ != this) { - // Get the index of the most_recently_used_. - for (sub = 0; sub < sub_langs_.size() && - most_recently_used_ != sub_langs_[sub]; ++sub) {} - } - most_recently_used_->RetryWithLanguage( - *word_data, recognizer, debug, &word_data->lang_words[sub], &best_words); - Tesseract* best_lang_tess = most_recently_used_; - if (!WordsAcceptable(best_words)) { - // Try all the other languages to see if they are any better. - if (most_recently_used_ != this && - this->RetryWithLanguage(*word_data, recognizer, debug, - &word_data->lang_words[sub_langs_.size()], - &best_words) > 0) { - best_lang_tess = this; - } - for (int i = 0; !WordsAcceptable(best_words) && i < sub_langs_.size(); - ++i) { - if (most_recently_used_ != sub_langs_[i] && - sub_langs_[i]->RetryWithLanguage(*word_data, recognizer, debug, - &word_data->lang_words[i], - &best_words) > 0) { - best_lang_tess = sub_langs_[i]; - } - } - } - most_recently_used_ = best_lang_tess; - if (!best_words.empty()) { - if (best_words.size() == 1 && !best_words[0]->combination) { - // Move the best single result to the main word. - word_data->word->ConsumeWordResults(best_words[0]); - } else { - // Words came from LSTM, and must be moved to the PAGE_RES properly. - word_data->word = best_words.back(); - pr_it->ReplaceCurrentWord(&best_words); - } - ASSERT_HOST(word_data->word->box_word != nullptr); - } else { - tprintf("no best words!!\n"); - } - clock_t ocr_t = clock(); - if (tessedit_timing_debug) { - tprintf("%s (ocr took %.2f sec)\n", - word->best_choice->unichar_string().string(), - static_cast(ocr_t-start_t)/CLOCKS_PER_SEC); - } -} - -/** - * classify_word_pass1 - * - * Baseline normalize the word and pass it to Tess. - */ - -void Tesseract::classify_word_pass1(const WordData& word_data, - WERD_RES** in_word, - PointerVector* out_words) { - ROW* row = word_data.row; - BLOCK* block = word_data.block; - prev_word_best_choice_ = word_data.prev_word != nullptr - ? word_data.prev_word->word->best_choice : nullptr; -#ifndef ANDROID_BUILD -#ifdef DISABLED_LEGACY_ENGINE - if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) { -#else - if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY || - tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) { -#endif // def DISABLED_LEGACY_ENGINE - if (!(*in_word)->odd_size || tessedit_ocr_engine_mode == OEM_LSTM_ONLY) { - LSTMRecognizeWord(*block, row, *in_word, out_words); - if (!out_words->empty()) - return; // Successful lstm recognition. - } - if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) { - // No fallback allowed, so use a fake. - (*in_word)->SetupFake(lstm_recognizer_->GetUnicharset()); - return; - } - - #ifndef DISABLED_LEGACY_ENGINE - // Fall back to tesseract for failed words or odd words. - (*in_word)->SetupForRecognition(unicharset, this, BestPix(), - OEM_TESSERACT_ONLY, nullptr, - classify_bln_numeric_mode, - textord_use_cjk_fp_model, - poly_allow_detailed_fx, row, block); -#endif // ndef DISABLED_LEGACY_ENGINE - } -#endif // ndef ANDROID_BUILD - -#ifndef DISABLED_LEGACY_ENGINE - WERD_RES* word = *in_word; - match_word_pass_n(1, word, row, block); - if (!word->tess_failed && !word->word->flag(W_REP_CHAR)) { - word->tess_would_adapt = AdaptableWord(word); - bool adapt_ok = word_adaptable(word, tessedit_tess_adaption_mode); - - if (adapt_ok) { - // Send word to adaptive classifier for training. - word->BestChoiceToCorrectText(); - LearnWord(nullptr, word); - // Mark misadaptions if running blamer. - if (word->blamer_bundle != nullptr) { - word->blamer_bundle->SetMisAdaptionDebug(word->best_choice, - wordrec_debug_blamer); - } - } - - if (tessedit_enable_doc_dict && !word->IsAmbiguous()) - tess_add_doc_word(word->best_choice); - } -#endif // ndef DISABLED_LEGACY_ENGINE -} - -// Helper to report the result of the xheight fix. -void Tesseract::ReportXhtFixResult(bool accept_new_word, float new_x_ht, - WERD_RES* word, WERD_RES* new_word) { - tprintf("New XHT Match:%s = %s ", - word->best_choice->unichar_string().string(), - word->best_choice->debug_string().string()); - word->reject_map.print(debug_fp); - tprintf(" -> %s = %s ", - new_word->best_choice->unichar_string().string(), - new_word->best_choice->debug_string().string()); - new_word->reject_map.print(debug_fp); - tprintf(" %s->%s %s %s\n", - word->guessed_x_ht ? "GUESS" : "CERT", - new_word->guessed_x_ht ? "GUESS" : "CERT", - new_x_ht > 0.1 ? "STILL DOUBT" : "OK", - accept_new_word ? "ACCEPTED" : ""); -} - -#ifndef DISABLED_LEGACY_ENGINE - -// Run the x-height fix-up, based on min/max top/bottom information in -// unicharset. -// Returns true if the word was changed. -// See the comment in fixxht.cpp for a description of the overall process. -bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) { - int original_misfits = CountMisfitTops(word); - if (original_misfits == 0) - return false; - float baseline_shift = 0.0f; - float new_x_ht = ComputeCompatibleXheight(word, &baseline_shift); - if (baseline_shift != 0.0f) { - // Try the shift on its own first. - if (!TestNewNormalization(original_misfits, baseline_shift, word->x_height, - word, block, row)) - return false; - original_misfits = CountMisfitTops(word); - if (original_misfits > 0) { - float new_baseline_shift; - // Now recompute the new x_height. - new_x_ht = ComputeCompatibleXheight(word, &new_baseline_shift); - if (new_x_ht >= kMinRefitXHeightFraction * word->x_height) { - // No test of return value here, as we are definitely making a change - // to the word by shifting the baseline. - TestNewNormalization(original_misfits, baseline_shift, new_x_ht, - word, block, row); - } - } - return true; - } else if (new_x_ht >= kMinRefitXHeightFraction * word->x_height) { - return TestNewNormalization(original_misfits, 0.0f, new_x_ht, - word, block, row); - } else { - return false; - } -} - -// Runs recognition with the test baseline shift and x-height and returns true -// if there was an improvement in recognition result. -bool Tesseract::TestNewNormalization(int original_misfits, - float baseline_shift, float new_x_ht, - WERD_RES *word, BLOCK* block, ROW *row) { - bool accept_new_x_ht = false; - WERD_RES new_x_ht_word(word->word); - if (word->blamer_bundle != nullptr) { - new_x_ht_word.blamer_bundle = new BlamerBundle(); - new_x_ht_word.blamer_bundle->CopyTruth(*(word->blamer_bundle)); - } - new_x_ht_word.x_height = new_x_ht; - new_x_ht_word.baseline_shift = baseline_shift; - new_x_ht_word.caps_height = 0.0; - new_x_ht_word.SetupForRecognition( - unicharset, this, BestPix(), tessedit_ocr_engine_mode, nullptr, - classify_bln_numeric_mode, textord_use_cjk_fp_model, - poly_allow_detailed_fx, row, block); - match_word_pass_n(2, &new_x_ht_word, row, block); - if (!new_x_ht_word.tess_failed) { - int new_misfits = CountMisfitTops(&new_x_ht_word); - if (debug_x_ht_level >= 1) { - tprintf("Old misfits=%d with x-height %f, new=%d with x-height %f\n", - original_misfits, word->x_height, - new_misfits, new_x_ht); - tprintf("Old rating= %f, certainty=%f, new=%f, %f\n", - word->best_choice->rating(), word->best_choice->certainty(), - new_x_ht_word.best_choice->rating(), - new_x_ht_word.best_choice->certainty()); - } - // The misfits must improve and either the rating or certainty. - accept_new_x_ht = new_misfits < original_misfits && - (new_x_ht_word.best_choice->certainty() > - word->best_choice->certainty() || - new_x_ht_word.best_choice->rating() < - word->best_choice->rating()); - if (debug_x_ht_level >= 1) { - ReportXhtFixResult(accept_new_x_ht, new_x_ht, word, &new_x_ht_word); - } - } - if (accept_new_x_ht) { - word->ConsumeWordResults(&new_x_ht_word); - return true; - } - return false; -} - -#endif // ndef DISABLED_LEGACY_ENGINE - -/** - * classify_word_pass2 - * - * Control what to do with the word in pass 2 - */ - -void Tesseract::classify_word_pass2(const WordData& word_data, - WERD_RES** in_word, - PointerVector* out_words) { - // Return if we do not want to run Tesseract. - if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) { - return; - } -#ifndef DISABLED_LEGACY_ENGINE - ROW* row = word_data.row; - BLOCK* block = word_data.block; - WERD_RES* word = *in_word; - prev_word_best_choice_ = word_data.prev_word != nullptr - ? word_data.prev_word->word->best_choice : nullptr; - - set_global_subloc_code(SUBLOC_NORM); - check_debug_pt(word, 30); - if (!word->done) { - word->caps_height = 0.0; - if (word->x_height == 0.0f) - word->x_height = row->x_height(); - match_word_pass_n(2, word, row, block); - check_debug_pt(word, 40); - } - - SubAndSuperscriptFix(word); - - if (!word->tess_failed && !word->word->flag(W_REP_CHAR)) { - if (unicharset.top_bottom_useful() && unicharset.script_has_xheight() && - block->classify_rotation().y() == 0.0f) { - // Use the tops and bottoms since they are available. - TrainedXheightFix(word, block, row); - } - - set_global_subloc_code(SUBLOC_NORM); - } -#ifndef GRAPHICS_DISABLED - if (tessedit_display_outwords) { - if (fx_win == nullptr) - create_fx_win(); - clear_fx_win(); - word->rebuild_word->plot(fx_win); - TBOX wbox = word->rebuild_word->bounding_box(); - fx_win->ZoomToRectangle(wbox.left(), wbox.top(), - wbox.right(), wbox.bottom()); - ScrollView::Update(); - } -#endif - set_global_subloc_code(SUBLOC_NORM); - check_debug_pt(word, 50); -#endif // ndef DISABLED_LEGACY_ENGINE -} - -#ifndef DISABLED_LEGACY_ENGINE -/** - * match_word_pass2 - * - * Baseline normalize the word and pass it to Tess. - */ -void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word, - ROW *row, BLOCK* block) { - if (word->tess_failed) return; - tess_segment_pass_n(pass_n, word); - - if (!word->tess_failed) { - if (!word->word->flag (W_REP_CHAR)) { - word->fix_quotes(); - if (tessedit_fix_hyphens) - word->fix_hyphens(); - /* Don't trust fix_quotes! - though I think I've fixed the bug */ - if (word->best_choice->length() != word->box_word->length()) { - tprintf("POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d;" - " #Blobs=%d\n", - word->best_choice->debug_string().string(), - word->best_choice->length(), - word->box_word->length()); - - } - word->tess_accepted = tess_acceptable_word(word); - - // Also sets word->done flag - make_reject_map(word, row, pass_n); - } - } - set_word_fonts(word); - - ASSERT_HOST(word->raw_choice != nullptr); -} -#endif // ndef DISABLED_LEGACY_ENGINE - -// Helper to return the best rated BLOB_CHOICE in the whole word that matches -// the given char_id, or nullptr if none can be found. -static BLOB_CHOICE* FindBestMatchingChoice(UNICHAR_ID char_id, - WERD_RES* word_res) { - // Find the corresponding best BLOB_CHOICE from any position in the word_res. - BLOB_CHOICE* best_choice = nullptr; - for (int i = 0; i < word_res->best_choice->length(); ++i) { - BLOB_CHOICE* choice = FindMatchingChoice(char_id, - word_res->GetBlobChoices(i)); - if (choice != nullptr) { - if (best_choice == nullptr || choice->rating() < best_choice->rating()) - best_choice = choice; - } - } - return best_choice; -} - -// Helper to insert blob_choice in each location in the leader word if there is -// no matching BLOB_CHOICE there already, and correct any incorrect results -// in the best_choice. -static void CorrectRepcharChoices(BLOB_CHOICE* blob_choice, - WERD_RES* word_res) { - WERD_CHOICE* word = word_res->best_choice; - for (int i = 0; i < word_res->best_choice->length(); ++i) { - BLOB_CHOICE* choice = FindMatchingChoice(blob_choice->unichar_id(), - word_res->GetBlobChoices(i)); - if (choice == nullptr) { - BLOB_CHOICE_IT choice_it(word_res->GetBlobChoices(i)); - choice_it.add_before_stay_put(new BLOB_CHOICE(*blob_choice)); - } - } - // Correct any incorrect results in word. - for (int i = 0; i < word->length(); ++i) { - if (word->unichar_id(i) != blob_choice->unichar_id()) - word->set_unichar_id(blob_choice->unichar_id(), i); - } -} - -/** - * fix_rep_char() - * The word is a repeated char. (Leader.) Find the repeated char character. - * Create the appropriate single-word or multi-word sequence according to - * the size of spaces in between blobs, and correct the classifications - * where some of the characters disagree with the majority. - */ -void Tesseract::fix_rep_char(PAGE_RES_IT* page_res_it) { - WERD_RES *word_res = page_res_it->word(); - const WERD_CHOICE &word = *(word_res->best_choice); - - // Find the frequency of each unique character in the word. - SortHelper rep_ch(word.length()); - for (int i = 0; i < word.length(); ++i) { - rep_ch.Add(word.unichar_id(i), 1); - } - - // Find the most frequent result. - UNICHAR_ID maxch_id = INVALID_UNICHAR_ID; // most common char - int max_count = rep_ch.MaxCount(&maxch_id); - // Find the best exemplar of a classifier result for maxch_id. - BLOB_CHOICE* best_choice = FindBestMatchingChoice(maxch_id, word_res); - if (best_choice == nullptr) { - tprintf("Failed to find a choice for %s, occurring %d times\n", - word_res->uch_set->debug_str(maxch_id).string(), max_count); - return; - } - word_res->done = TRUE; - - // Measure the mean space. - int gap_count = 0; - WERD* werd = word_res->word; - C_BLOB_IT blob_it(werd->cblob_list()); - C_BLOB* prev_blob = blob_it.data(); - for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) { - C_BLOB* blob = blob_it.data(); - int gap = blob->bounding_box().left(); - gap -= prev_blob->bounding_box().right(); - ++gap_count; - prev_blob = blob; - } - // Just correct existing classification. - CorrectRepcharChoices(best_choice, word_res); - word_res->reject_map.initialise(word.length()); -} - -ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string( - const UNICHARSET& char_set, const char *s, const char *lengths) { - int i = 0; - int offset = 0; - int leading_punct_count; - int upper_count = 0; - int hyphen_pos = -1; - ACCEPTABLE_WERD_TYPE word_type = AC_UNACCEPTABLE; - - if (strlen (lengths) > 20) - return word_type; - - /* Single Leading punctuation char*/ - - if (s[offset] != '\0' && STRING(chs_leading_punct).contains(s[offset])) - offset += lengths[i++]; - leading_punct_count = i; - - /* Initial cap */ - while (s[offset] != '\0' && char_set.get_isupper(s + offset, lengths[i])) { - offset += lengths[i++]; - upper_count++; - } - if (upper_count > 1) { - word_type = AC_UPPER_CASE; - } else { - /* Lower case word, possibly with an initial cap */ - while (s[offset] != '\0' && char_set.get_islower(s + offset, lengths[i])) { - offset += lengths[i++]; - } - if (i - leading_punct_count < quality_min_initial_alphas_reqd) - goto not_a_word; - /* - Allow a single hyphen in a lower case word - - don't trust upper case - I've seen several cases of "H" -> "I-I" - */ - if (lengths[i] == 1 && s[offset] == '-') { - hyphen_pos = i; - offset += lengths[i++]; - if (s[offset] != '\0') { - while ((s[offset] != '\0') && - char_set.get_islower(s + offset, lengths[i])) { - offset += lengths[i++]; - } - if (i < hyphen_pos + 3) - goto not_a_word; - } - } else { - /* Allow "'s" in NON hyphenated lower case words */ - if (lengths[i] == 1 && (s[offset] == '\'') && - lengths[i + 1] == 1 && (s[offset + lengths[i]] == 's')) { - offset += lengths[i++]; - offset += lengths[i++]; - } - } - if (upper_count > 0) - word_type = AC_INITIAL_CAP; - else - word_type = AC_LOWER_CASE; - } - - /* Up to two different, constrained trailing punctuation chars */ - if (lengths[i] == 1 && s[offset] != '\0' && - STRING(chs_trailing_punct1).contains(s[offset])) - offset += lengths[i++]; - if (lengths[i] == 1 && s[offset] != '\0' && i > 0 && - s[offset - lengths[i - 1]] != s[offset] && - STRING(chs_trailing_punct2).contains (s[offset])) - offset += lengths[i++]; - - if (s[offset] != '\0') - word_type = AC_UNACCEPTABLE; - - not_a_word: - - if (word_type == AC_UNACCEPTABLE) { - /* Look for abbreviation string */ - i = 0; - offset = 0; - if (s[0] != '\0' && char_set.get_isupper(s, lengths[0])) { - word_type = AC_UC_ABBREV; - while (s[offset] != '\0' && - char_set.get_isupper(s + offset, lengths[i]) && - lengths[i + 1] == 1 && s[offset + lengths[i]] == '.') { - offset += lengths[i++]; - offset += lengths[i++]; - } - } - else if (s[0] != '\0' && char_set.get_islower(s, lengths[0])) { - word_type = AC_LC_ABBREV; - while (s[offset] != '\0' && - char_set.get_islower(s + offset, lengths[i]) && - lengths[i + 1] == 1 && s[offset + lengths[i]] == '.') { - offset += lengths[i++]; - offset += lengths[i++]; - } - } - if (s[offset] != '\0') - word_type = AC_UNACCEPTABLE; - } - - return word_type; -} - -bool Tesseract::check_debug_pt(WERD_RES* word, int location) { - bool show_map_detail = false; - int16_t i; - - if (!test_pt) - return false; - - tessedit_rejection_debug.set_value (FALSE); - debug_x_ht_level.set_value(0); - - if (word->word->bounding_box().contains(FCOORD (test_pt_x, test_pt_y))) { - if (location < 0) - return true; // For breakpoint use - tessedit_rejection_debug.set_value(TRUE); - debug_x_ht_level.set_value(2); - tprintf ("\n\nTESTWD::"); - switch (location) { - case 0: - tprintf ("classify_word_pass1 start\n"); - word->word->print(); - break; - case 10: - tprintf ("make_reject_map: initial map"); - break; - case 20: - tprintf ("make_reject_map: after NN"); - break; - case 30: - tprintf ("classify_word_pass2 - START"); - break; - case 40: - tprintf ("classify_word_pass2 - Pre Xht"); - break; - case 50: - tprintf ("classify_word_pass2 - END"); - show_map_detail = true; - break; - case 60: - tprintf ("fixspace"); - break; - case 70: - tprintf ("MM pass START"); - break; - case 80: - tprintf ("MM pass END"); - break; - case 90: - tprintf ("After Poor quality rejection"); - break; - case 100: - tprintf ("unrej_good_quality_words - START"); - break; - case 110: - tprintf ("unrej_good_quality_words - END"); - break; - case 120: - tprintf ("Write results pass"); - show_map_detail = true; - break; - } - if (word->best_choice != nullptr) { - tprintf(" \"%s\" ", word->best_choice->unichar_string().string()); - word->reject_map.print(debug_fp); - tprintf("\n"); - if (show_map_detail) { - tprintf("\"%s\"\n", word->best_choice->unichar_string().string()); - for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) { - tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]); - word->reject_map[i].full_print(debug_fp); - } - } - } else { - tprintf("null best choice\n"); - } - tprintf ("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE"); - tprintf ("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE"); - return true; - } else { - return false; - } -} - -/** - * find_modal_font - * - * Find the modal font and remove from the stats. - */ -static void find_modal_font( // good chars in word - STATS* fonts, // font stats - int16_t* font_out, // output font - int8_t* font_count // output count -) { - int16_t font; //font index - int32_t count; //pile count - - if (fonts->get_total () > 0) { - font = (int16_t) fonts->mode (); - *font_out = font; - count = fonts->pile_count (font); - *font_count = count < INT8_MAX ? count : INT8_MAX; - fonts->add (font, -*font_count); - } - else { - *font_out = -1; - *font_count = 0; - } -} - -/** - * set_word_fonts - * - * Get the fonts for the word. - */ -void Tesseract::set_word_fonts(WERD_RES *word) { - // Don't try to set the word fonts for an lstm word, as the configs - // will be meaningless. - if (word->chopped_word == nullptr) return; - ASSERT_HOST(word->best_choice != nullptr); - -#ifndef DISABLED_LEGACY_ENGINE - const int fontinfo_size = get_fontinfo_table().size(); - if (fontinfo_size == 0) return; - GenericVector font_total_score; - font_total_score.init_to_size(fontinfo_size, 0); - - word->italic = 0; - word->bold = 0; - // Compute the font scores for the word - if (tessedit_debug_fonts) { - tprintf("Examining fonts in %s\n", - word->best_choice->debug_string().string()); - } - for (int b = 0; b < word->best_choice->length(); ++b) { - const BLOB_CHOICE* choice = word->GetBlobChoice(b); - if (choice == nullptr) continue; - const GenericVector& fonts = choice->fonts(); - for (int f = 0; f < fonts.size(); ++f) { - const int fontinfo_id = fonts[f].fontinfo_id; - if (0 <= fontinfo_id && fontinfo_id < fontinfo_size) { - font_total_score[fontinfo_id] += fonts[f].score; - } - } - } - // Find the top and 2nd choice for the word. - int score1 = 0, score2 = 0; - int16_t font_id1 = -1, font_id2 = -1; - for (int f = 0; f < fontinfo_size; ++f) { - if (tessedit_debug_fonts && font_total_score[f] > 0) { - tprintf("Font %s, total score = %d\n", - fontinfo_table_.get(f).name, font_total_score[f]); - } - if (font_total_score[f] > score1) { - score2 = score1; - font_id2 = font_id1; - score1 = font_total_score[f]; - font_id1 = f; - } else if (font_total_score[f] > score2) { - score2 = font_total_score[f]; - font_id2 = f; - } - } - word->fontinfo = font_id1 >= 0 ? &fontinfo_table_.get(font_id1) : nullptr; - word->fontinfo2 = font_id2 >= 0 ? &fontinfo_table_.get(font_id2) : nullptr; - // Each score has a limit of UINT16_MAX, so divide by that to get the number - // of "votes" for that font, ie number of perfect scores. - word->fontinfo_id_count = ClipToRange(score1 / UINT16_MAX, 1, INT8_MAX); - word->fontinfo_id2_count = ClipToRange(score2 / UINT16_MAX, 0, INT8_MAX); - if (score1 > 0) { - const FontInfo fi = fontinfo_table_.get(font_id1); - if (tessedit_debug_fonts) { - if (word->fontinfo_id2_count > 0 && font_id2 >= 0) { - tprintf("Word modal font=%s, score=%d, 2nd choice %s/%d\n", - fi.name, word->fontinfo_id_count, - fontinfo_table_.get(font_id2).name, - word->fontinfo_id2_count); - } else { - tprintf("Word modal font=%s, score=%d. No 2nd choice\n", - fi.name, word->fontinfo_id_count); - } - } - word->italic = (fi.is_italic() ? 1 : -1) * word->fontinfo_id_count; - word->bold = (fi.is_bold() ? 1 : -1) * word->fontinfo_id_count; - } -#endif // ndef DISABLED_LEGACY_ENGINE -} - - -/** - * font_recognition_pass - * - * Smooth the fonts for the document. - */ -void Tesseract::font_recognition_pass(PAGE_RES* page_res) { - PAGE_RES_IT page_res_it(page_res); - WERD_RES *word; // current word - STATS doc_fonts(0, font_table_size_); // font counters - - // Gather font id statistics. - for (page_res_it.restart_page(); page_res_it.word() != nullptr; - page_res_it.forward()) { - word = page_res_it.word(); - if (word->fontinfo != nullptr) { - doc_fonts.add(word->fontinfo->universal_id, word->fontinfo_id_count); - } - if (word->fontinfo2 != nullptr) { - doc_fonts.add(word->fontinfo2->universal_id, word->fontinfo_id2_count); - } - } - int16_t doc_font; // modal font - int8_t doc_font_count; // modal font - find_modal_font(&doc_fonts, &doc_font, &doc_font_count); - if (doc_font_count == 0) - return; - // Get the modal font pointer. - const FontInfo* modal_font = nullptr; - for (page_res_it.restart_page(); page_res_it.word() != nullptr; - page_res_it.forward()) { - word = page_res_it.word(); - if (word->fontinfo != nullptr && word->fontinfo->universal_id == doc_font) { - modal_font = word->fontinfo; - break; - } - if (word->fontinfo2 != nullptr && word->fontinfo2->universal_id == doc_font) { - modal_font = word->fontinfo2; - break; - } - } - ASSERT_HOST(modal_font != nullptr); - - // Assign modal font to weak words. - for (page_res_it.restart_page(); page_res_it.word() != nullptr; - page_res_it.forward()) { - word = page_res_it.word(); - const int length = word->best_choice->length(); - - const int count = word->fontinfo_id_count; - if (!(count == length || (length > 3 && count >= length * 3 / 4))) { - word->fontinfo = modal_font; - // Counts only get 1 as it came from the doc. - word->fontinfo_id_count = 1; - word->italic = modal_font->is_italic() ? 1 : -1; - word->bold = modal_font->is_bold() ? 1 : -1; - } - } -} - -// If a word has multiple alternates check if the best choice is in the -// dictionary. If not, replace it with an alternate that exists in the -// dictionary. -void Tesseract::dictionary_correction_pass(PAGE_RES *page_res) { - PAGE_RES_IT word_it(page_res); - for (WERD_RES* word = word_it.word(); word != nullptr; - word = word_it.forward()) { - if (word->best_choices.singleton()) - continue; // There are no alternates. - - const WERD_CHOICE* best = word->best_choice; - if (word->tesseract->getDict().valid_word(*best) != 0) - continue; // The best choice is in the dictionary. - - WERD_CHOICE_IT choice_it(&word->best_choices); - for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); - choice_it.forward()) { - WERD_CHOICE* alternate = choice_it.data(); - if (word->tesseract->getDict().valid_word(*alternate)) { - // The alternate choice is in the dictionary. - if (tessedit_bigram_debug) { - tprintf("Dictionary correction replaces best choice '%s' with '%s'\n", - best->unichar_string().string(), - alternate->unichar_string().string()); - } - // Replace the 'best' choice with a better choice. - word->ReplaceBestChoice(alternate); - break; - } - } - } -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/control.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/control.h deleted file mode 100644 index cd57ddba..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/control.h +++ /dev/null @@ -1,38 +0,0 @@ -/********************************************************************** - * File: control.h (Formerly control.h) - * Description: Module-independent matcher controller. - * Author: Ray Smith - * Created: Thu Apr 23 11:09:58 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -/** - * @file control.h - * Module-independent matcher controller. - */ - -#ifndef CONTROL_H -#define CONTROL_H - -enum ACCEPTABLE_WERD_TYPE -{ - AC_UNACCEPTABLE, ///< Unacceptable word - AC_LOWER_CASE, ///< ALL lower case - AC_UPPER_CASE, ///< ALL upper case - AC_INITIAL_CAP, ///< ALL but initial lc - AC_LC_ABBREV, ///< a.b.c. - AC_UC_ABBREV ///< A.B.C. -}; - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/docqual.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/docqual.cpp deleted file mode 100644 index 95755d71..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/docqual.cpp +++ /dev/null @@ -1,1001 +0,0 @@ -/****************************************************************** - * File: docqual.cpp (Formerly docqual.c) - * Description: Document Quality Metrics - * Author: Phil Cheatle - * Created: Mon May 9 11:27:28 BST 1994 - * - * (C) Copyright 1994, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include -#include "docqual.h" -#include "reject.h" -#include "tesscallback.h" -#include "tessvars.h" -#include "globals.h" -#include "tesseractclass.h" - -namespace tesseract{ - -// A little class to provide the callbacks as we have no pre-bound args. -struct DocQualCallbacks { - explicit DocQualCallbacks(WERD_RES* word0) - : word(word0), match_count(0), accepted_match_count(0) {} - - void CountMatchingBlobs(int index) { - ++match_count; - } - - void CountAcceptedBlobs(int index) { - if (word->reject_map[index].accepted()) - ++accepted_match_count; - ++match_count; - } - - void AcceptIfGoodQuality(int index) { - if (word->reject_map[index].accept_if_good_quality()) - word->reject_map[index].setrej_quality_accept(); - } - - WERD_RES* word; - int16_t match_count; - int16_t accepted_match_count; -}; - -/************************************************************************* - * word_blob_quality() - * How many blobs in the box_word are identical to those of the inword? - * ASSUME blobs in both initial word and box_word are in ascending order of - * left hand blob edge. - *************************************************************************/ -int16_t Tesseract::word_blob_quality(WERD_RES *word, ROW *row) { - if (word->bln_boxes == nullptr || - word->rebuild_word == nullptr || word->rebuild_word->blobs.empty()) - return 0; - - DocQualCallbacks cb(word); - word->bln_boxes->ProcessMatchedBlobs( - *word->rebuild_word, - NewPermanentTessCallback(&cb, &DocQualCallbacks::CountMatchingBlobs)); - return cb.match_count; -} - -int16_t Tesseract::word_outline_errs(WERD_RES *word) { - int16_t i = 0; - int16_t err_count = 0; - - if (word->rebuild_word != nullptr) { - for (int b = 0; b < word->rebuild_word->NumBlobs(); ++b) { - TBLOB* blob = word->rebuild_word->blobs[b]; - err_count += count_outline_errs(word->best_choice->unichar_string()[i], - blob->NumOutlines()); - i++; - } - } - return err_count; -} - -/************************************************************************* - * word_char_quality() - * Combination of blob quality and outline quality - how many good chars are - * there? - I.e chars which pass the blob AND outline tests. - *************************************************************************/ -void Tesseract::word_char_quality(WERD_RES *word, - ROW *row, - int16_t *match_count, - int16_t *accepted_match_count) { - if (word->bln_boxes == nullptr || word->rebuild_word == nullptr || - word->rebuild_word->blobs.empty()) { - *match_count = 0; - *accepted_match_count = 0; - return; - } - - DocQualCallbacks cb(word); - word->bln_boxes->ProcessMatchedBlobs( - *word->rebuild_word, - NewPermanentTessCallback(&cb, &DocQualCallbacks::CountAcceptedBlobs)); - *match_count = cb.match_count; - *accepted_match_count = cb.accepted_match_count; -} - -/************************************************************************* - * unrej_good_chs() - * Unreject POTENTIAL rejects if the blob passes the blob and outline checks - *************************************************************************/ -void Tesseract::unrej_good_chs(WERD_RES *word, ROW *row) { - if (word->bln_boxes == nullptr || - word->rebuild_word == nullptr || word->rebuild_word->blobs.empty()) - return; - - DocQualCallbacks cb(word); - word->bln_boxes->ProcessMatchedBlobs( - *word->rebuild_word, - NewPermanentTessCallback(&cb, &DocQualCallbacks::AcceptIfGoodQuality)); -} - -int16_t Tesseract::count_outline_errs(char c, int16_t outline_count) { - int expected_outline_count; - - if (STRING (outlines_odd).contains (c)) - return 0; // Don't use this char - else if (STRING (outlines_2).contains (c)) - expected_outline_count = 2; - else - expected_outline_count = 1; - return abs (outline_count - expected_outline_count); -} - -void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it, - bool good_quality_doc) { - if ((tessedit_good_quality_unrej && good_quality_doc)) - unrej_good_quality_words(page_res_it); - doc_and_block_rejection(page_res_it, good_quality_doc); - if (unlv_tilde_crunching) { - tilde_crunch(page_res_it); - tilde_delete(page_res_it); - } -} - -/************************************************************************* - * unrej_good_quality_words() - * Accept potential rejects in words which pass the following checks: - * - Contains a potential reject - * - Word looks like a sensible alpha word. - * - Word segmentation is the same as the original image - * - All characters have the expected number of outlines - * NOTE - the rejection counts are recalculated after unrejection - * - CAN'T do it in a single pass without a bit of fiddling - * - keep it simple but inefficient - *************************************************************************/ -void Tesseract::unrej_good_quality_words( //unreject potential - PAGE_RES_IT &page_res_it) { - WERD_RES *word; - ROW_RES *current_row; - BLOCK_RES *current_block; - int i; - - page_res_it.restart_page (); - while (page_res_it.word () != nullptr) { - check_debug_pt (page_res_it.word (), 100); - if (bland_unrej) { - word = page_res_it.word (); - for (i = 0; i < word->reject_map.length (); i++) { - if (word->reject_map[i].accept_if_good_quality ()) - word->reject_map[i].setrej_quality_accept (); - } - page_res_it.forward (); - } - else if ((page_res_it.row ()->char_count > 0) && - ((page_res_it.row ()->rej_count / - (float) page_res_it.row ()->char_count) <= - quality_rowrej_pc)) { - word = page_res_it.word (); - if (word->reject_map.quality_recoverable_rejects() && - (tessedit_unrej_any_wd || - acceptable_word_string(*word->uch_set, - word->best_choice->unichar_string().string(), - word->best_choice->unichar_lengths().string()) - != AC_UNACCEPTABLE)) { - unrej_good_chs(word, page_res_it.row ()->row); - } - page_res_it.forward (); - } - else { - /* Skip to end of dodgy row */ - current_row = page_res_it.row (); - while ((page_res_it.word () != nullptr) && - (page_res_it.row () == current_row)) - page_res_it.forward (); - } - check_debug_pt (page_res_it.word (), 110); - } - page_res_it.restart_page (); - page_res_it.page_res->char_count = 0; - page_res_it.page_res->rej_count = 0; - current_block = nullptr; - current_row = nullptr; - while (page_res_it.word () != nullptr) { - if (current_block != page_res_it.block ()) { - current_block = page_res_it.block (); - current_block->char_count = 0; - current_block->rej_count = 0; - } - if (current_row != page_res_it.row ()) { - current_row = page_res_it.row (); - current_row->char_count = 0; - current_row->rej_count = 0; - current_row->whole_word_rej_count = 0; - } - page_res_it.rej_stat_word (); - page_res_it.forward (); - } -} - - -/************************************************************************* - * doc_and_block_rejection() - * - * If the page has too many rejects - reject all of it. - * If any block has too many rejects - reject all words in the block - *************************************************************************/ - -void Tesseract::doc_and_block_rejection( //reject big chunks - PAGE_RES_IT &page_res_it, - bool good_quality_doc) { - int16_t block_no = 0; - int16_t row_no = 0; - BLOCK_RES *current_block; - ROW_RES *current_row; - - bool rej_word; - bool prev_word_rejected; - int16_t char_quality = 0; - int16_t accepted_char_quality; - - if (page_res_it.page_res->rej_count * 100.0 / - page_res_it.page_res->char_count > tessedit_reject_doc_percent) { - reject_whole_page(page_res_it); - if (tessedit_debug_doc_rejection) { - tprintf("REJECT ALL #chars: %d #Rejects: %d; \n", - page_res_it.page_res->char_count, - page_res_it.page_res->rej_count); - } - } else { - if (tessedit_debug_doc_rejection) { - tprintf("NO PAGE REJECTION #chars: %d # Rejects: %d; \n", - page_res_it.page_res->char_count, - page_res_it.page_res->rej_count); - } - - /* Walk blocks testing for block rejection */ - - page_res_it.restart_page(); - WERD_RES* word; - while ((word = page_res_it.word()) != nullptr) { - current_block = page_res_it.block(); - block_no = current_block->block->pdblk.index(); - if (current_block->char_count > 0 && - (current_block->rej_count * 100.0 / current_block->char_count) > - tessedit_reject_block_percent) { - if (tessedit_debug_block_rejection) { - tprintf("REJECTING BLOCK %d #chars: %d; #Rejects: %d\n", - block_no, current_block->char_count, - current_block->rej_count); - } - prev_word_rejected = false; - while ((word = page_res_it.word()) != nullptr && - (page_res_it.block() == current_block)) { - if (tessedit_preserve_blk_rej_perfect_wds) { - rej_word = word->reject_map.reject_count() > 0 || - word->reject_map.length () < tessedit_preserve_min_wd_len; - if (rej_word && tessedit_dont_blkrej_good_wds && - word->reject_map.length() >= tessedit_preserve_min_wd_len && - acceptable_word_string( - *word->uch_set, - word->best_choice->unichar_string().string(), - word->best_choice->unichar_lengths().string()) != - AC_UNACCEPTABLE) { - word_char_quality(word, page_res_it.row()->row, - &char_quality, - &accepted_char_quality); - rej_word = char_quality != word->reject_map.length(); - } - } else { - rej_word = true; - } - if (rej_word) { - /* - Reject spacing if both current and prev words are rejected. - NOTE - this is NOT restricted to FUZZY spaces. - When tried this - generated more space errors. - */ - if (tessedit_use_reject_spaces && - prev_word_rejected && - page_res_it.prev_row() == page_res_it.row() && - word->word->space() == 1) - word->reject_spaces = true; - word->reject_map.rej_word_block_rej(); - } - prev_word_rejected = rej_word; - page_res_it.forward(); - } - } else { - if (tessedit_debug_block_rejection) { - tprintf("NOT REJECTING BLOCK %d #chars: %d # Rejects: %d; \n", - block_no, page_res_it.block()->char_count, - page_res_it.block()->rej_count); - } - - /* Walk rows in block testing for row rejection */ - row_no = 0; - while (page_res_it.word() != nullptr && - page_res_it.block() == current_block) { - current_row = page_res_it.row(); - row_no++; - /* Reject whole row if: - fraction of chars on row which are rejected exceed a limit AND - fraction rejects which occur in WHOLE WERD rejects is LESS THAN a - limit - */ - if (current_row->char_count > 0 && - (current_row->rej_count * 100.0 / current_row->char_count) > - tessedit_reject_row_percent && - (current_row->whole_word_rej_count * 100.0 / - current_row->rej_count) < - tessedit_whole_wd_rej_row_percent) { - if (tessedit_debug_block_rejection) { - tprintf("REJECTING ROW %d #chars: %d; #Rejects: %d\n", - row_no, current_row->char_count, - current_row->rej_count); - } - prev_word_rejected = false; - while ((word = page_res_it.word()) != nullptr && - page_res_it.row () == current_row) { - /* Preserve words on good docs unless they are mostly rejected*/ - if (!tessedit_row_rej_good_docs && good_quality_doc) { - rej_word = word->reject_map.reject_count() / - static_cast(word->reject_map.length()) > - tessedit_good_doc_still_rowrej_wd; - } else if (tessedit_preserve_row_rej_perfect_wds) { - /* Preserve perfect words anyway */ - rej_word = word->reject_map.reject_count() > 0 || - word->reject_map.length () < tessedit_preserve_min_wd_len; - if (rej_word && tessedit_dont_rowrej_good_wds && - word->reject_map.length() >= tessedit_preserve_min_wd_len && - acceptable_word_string(*word->uch_set, - word->best_choice->unichar_string().string(), - word->best_choice->unichar_lengths().string()) != - AC_UNACCEPTABLE) { - word_char_quality(word, page_res_it.row()->row, - &char_quality, - &accepted_char_quality); - rej_word = char_quality != word->reject_map.length(); - } - } else { - rej_word = true; - } - if (rej_word) { - /* - Reject spacing if both current and prev words are rejected. - NOTE - this is NOT restricted to FUZZY spaces. - When tried - this generated more space errors. - */ - if (tessedit_use_reject_spaces && - prev_word_rejected && - page_res_it.prev_row() == page_res_it.row() && - word->word->space () == 1) - word->reject_spaces = true; - word->reject_map.rej_word_row_rej(); - } - prev_word_rejected = rej_word; - page_res_it.forward(); - } - } else { - if (tessedit_debug_block_rejection) { - tprintf("NOT REJECTING ROW %d #chars: %d # Rejects: %d; \n", - row_no, current_row->char_count, current_row->rej_count); - } - while (page_res_it.word() != nullptr && - page_res_it.row() == current_row) - page_res_it.forward(); - } - } - } - } - } -} - -} // namespace tesseract - -/************************************************************************* - * reject_whole_page() - * Don't believe any of it - set the reject map to 00..00 in all words - * - *************************************************************************/ - -void reject_whole_page(PAGE_RES_IT &page_res_it) { - page_res_it.restart_page (); - while (page_res_it.word () != nullptr) { - page_res_it.word ()->reject_map.rej_word_doc_rej (); - page_res_it.forward (); - } - //whole page is rejected - page_res_it.page_res->rejected = true; -} - -namespace tesseract { -void Tesseract::tilde_crunch(PAGE_RES_IT &page_res_it) { - WERD_RES *word; - GARBAGE_LEVEL garbage_level; - PAGE_RES_IT copy_it; - bool prev_potential_marked = false; - bool found_terrible_word = false; - BOOL8 ok_dict_word; - - page_res_it.restart_page(); - while (page_res_it.word() != nullptr) { - POLY_BLOCK* pb = page_res_it.block()->block->pdblk.poly_block(); - if (pb != nullptr && !pb->IsText()) { - page_res_it.forward(); - continue; - } - word = page_res_it.word(); - - if (crunch_early_convert_bad_unlv_chs) - convert_bad_unlv_chs(word); - - if (crunch_early_merge_tess_fails) - word->merge_tess_fails(); - - if (word->reject_map.accept_count () != 0) { - found_terrible_word = false; - //Forget earlier potential crunches - prev_potential_marked = false; - } - else { - ok_dict_word = safe_dict_word(word); - garbage_level = garbage_word(word, ok_dict_word); - - if ((garbage_level != G_NEVER_CRUNCH) && - (terrible_word_crunch (word, garbage_level))) { - if (crunch_debug > 0) { - tprintf ("T CRUNCHING: \"%s\"\n", - word->best_choice->unichar_string().string()); - } - word->unlv_crunch_mode = CR_KEEP_SPACE; - if (prev_potential_marked) { - while (copy_it.word () != word) { - if (crunch_debug > 0) { - tprintf ("P1 CRUNCHING: \"%s\"\n", - copy_it.word()->best_choice->unichar_string().string()); - } - copy_it.word ()->unlv_crunch_mode = CR_KEEP_SPACE; - copy_it.forward (); - } - prev_potential_marked = false; - } - found_terrible_word = true; - } - else if ((garbage_level != G_NEVER_CRUNCH) && - (potential_word_crunch (word, - garbage_level, ok_dict_word))) { - if (found_terrible_word) { - if (crunch_debug > 0) { - tprintf ("P2 CRUNCHING: \"%s\"\n", - word->best_choice->unichar_string().string()); - } - word->unlv_crunch_mode = CR_KEEP_SPACE; - } - else if (!prev_potential_marked) { - copy_it = page_res_it; - prev_potential_marked = true; - if (crunch_debug > 1) { - tprintf ("P3 CRUNCHING: \"%s\"\n", - word->best_choice->unichar_string().string()); - } - } - } - else { - found_terrible_word = false; - //Forget earlier potential crunches - prev_potential_marked = false; - if (crunch_debug > 2) { - tprintf ("NO CRUNCH: \"%s\"\n", - word->best_choice->unichar_string().string()); - } - } - } - page_res_it.forward (); - } -} - - -bool Tesseract::terrible_word_crunch(WERD_RES* word, - GARBAGE_LEVEL garbage_level) { - float rating_per_ch; - int adjusted_len; - int crunch_mode = 0; - - if ((word->best_choice->unichar_string().length() == 0) || - (strspn(word->best_choice->unichar_string().string(), " ") == - word->best_choice->unichar_string().unsigned_size())) - crunch_mode = 1; - else { - adjusted_len = word->reject_map.length (); - if (adjusted_len > crunch_rating_max) - adjusted_len = crunch_rating_max; - rating_per_ch = word->best_choice->rating () / adjusted_len; - - if (rating_per_ch > crunch_terrible_rating) - crunch_mode = 2; - else if (crunch_terrible_garbage && (garbage_level == G_TERRIBLE)) - crunch_mode = 3; - else if ((word->best_choice->certainty () < crunch_poor_garbage_cert) && - (garbage_level != G_OK)) - crunch_mode = 4; - else if ((rating_per_ch > crunch_poor_garbage_rate) && - (garbage_level != G_OK)) - crunch_mode = 5; - } - if (crunch_mode > 0) { - if (crunch_debug > 2) { - tprintf ("Terrible_word_crunch (%d) on \"%s\"\n", - crunch_mode, word->best_choice->unichar_string().string()); - } - return true; - } - else - return false; -} - -bool Tesseract::potential_word_crunch(WERD_RES* word, - GARBAGE_LEVEL garbage_level, - bool ok_dict_word) { - float rating_per_ch; - int adjusted_len; - const char *str = word->best_choice->unichar_string().string(); - const char *lengths = word->best_choice->unichar_lengths().string(); - bool word_crunchable; - int poor_indicator_count = 0; - - word_crunchable = !crunch_leave_accept_strings || - word->reject_map.length() < 3 || - (acceptable_word_string(*word->uch_set, - str, lengths) == AC_UNACCEPTABLE && - !ok_dict_word); - - adjusted_len = word->reject_map.length(); - if (adjusted_len > 10) - adjusted_len = 10; - rating_per_ch = word->best_choice->rating() / adjusted_len; - - if (rating_per_ch > crunch_pot_poor_rate) { - if (crunch_debug > 2) { - tprintf("Potential poor rating on \"%s\"\n", - word->best_choice->unichar_string().string()); - } - poor_indicator_count++; - } - - if (word_crunchable && - word->best_choice->certainty() < crunch_pot_poor_cert) { - if (crunch_debug > 2) { - tprintf("Potential poor cert on \"%s\"\n", - word->best_choice->unichar_string().string()); - } - poor_indicator_count++; - } - - if (garbage_level != G_OK) { - if (crunch_debug > 2) { - tprintf("Potential garbage on \"%s\"\n", - word->best_choice->unichar_string().string()); - } - poor_indicator_count++; - } - return poor_indicator_count >= crunch_pot_indicators; -} - -void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) { - WERD_RES *word; - PAGE_RES_IT copy_it; - bool deleting_from_bol = false; - bool marked_delete_point = false; - int16_t debug_delete_mode; - CRUNCH_MODE delete_mode; - int16_t x_debug_delete_mode; - CRUNCH_MODE x_delete_mode; - - page_res_it.restart_page(); - while (page_res_it.word() != nullptr) { - word = page_res_it.word(); - - delete_mode = word_deletable (word, debug_delete_mode); - if (delete_mode != CR_NONE) { - if (word->word->flag (W_BOL) || deleting_from_bol) { - if (crunch_debug > 0) { - tprintf ("BOL CRUNCH DELETING(%d): \"%s\"\n", - debug_delete_mode, - word->best_choice->unichar_string().string()); - } - word->unlv_crunch_mode = delete_mode; - deleting_from_bol = true; - } else if (word->word->flag(W_EOL)) { - if (marked_delete_point) { - while (copy_it.word() != word) { - x_delete_mode = word_deletable (copy_it.word (), - x_debug_delete_mode); - if (crunch_debug > 0) { - tprintf ("EOL CRUNCH DELETING(%d): \"%s\"\n", - x_debug_delete_mode, - copy_it.word()->best_choice->unichar_string().string()); - } - copy_it.word ()->unlv_crunch_mode = x_delete_mode; - copy_it.forward (); - } - } - if (crunch_debug > 0) { - tprintf ("EOL CRUNCH DELETING(%d): \"%s\"\n", - debug_delete_mode, - word->best_choice->unichar_string().string()); - } - word->unlv_crunch_mode = delete_mode; - deleting_from_bol = false; - marked_delete_point = false; - } - else { - if (!marked_delete_point) { - copy_it = page_res_it; - marked_delete_point = true; - } - } - } - else { - deleting_from_bol = false; - //Forget earlier potential crunches - marked_delete_point = false; - } - /* - The following step has been left till now as the tess fails are used to - determine if the word is deletable. - */ - if (!crunch_early_merge_tess_fails) - word->merge_tess_fails(); - page_res_it.forward (); - } -} - - -void Tesseract::convert_bad_unlv_chs(WERD_RES *word_res) { - int i; - UNICHAR_ID unichar_dash = word_res->uch_set->unichar_to_id("-"); - UNICHAR_ID unichar_space = word_res->uch_set->unichar_to_id(" "); - UNICHAR_ID unichar_tilde = word_res->uch_set->unichar_to_id("~"); - UNICHAR_ID unichar_pow = word_res->uch_set->unichar_to_id("^"); - for (i = 0; i < word_res->reject_map.length(); ++i) { - if (word_res->best_choice->unichar_id(i) == unichar_tilde) { - word_res->best_choice->set_unichar_id(unichar_dash, i); - if (word_res->reject_map[i].accepted ()) - word_res->reject_map[i].setrej_unlv_rej (); - } - if (word_res->best_choice->unichar_id(i) == unichar_pow) { - word_res->best_choice->set_unichar_id(unichar_space, i); - if (word_res->reject_map[i].accepted ()) - word_res->reject_map[i].setrej_unlv_rej (); - } - } -} - -GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, BOOL8 ok_dict_word) { - enum STATES - { - JUNK, - FIRST_UPPER, - FIRST_LOWER, - FIRST_NUM, - SUBSEQUENT_UPPER, - SUBSEQUENT_LOWER, - SUBSEQUENT_NUM - }; - const char *str = word->best_choice->unichar_string().string(); - const char *lengths = word->best_choice->unichar_lengths().string(); - STATES state = JUNK; - int len = 0; - int isolated_digits = 0; - int isolated_alphas = 0; - int bad_char_count = 0; - int tess_rejs = 0; - int dodgy_chars = 0; - int ok_chars; - UNICHAR_ID last_char = -1; - int alpha_repetition_count = 0; - int longest_alpha_repetition_count = 0; - int longest_lower_run_len = 0; - int lower_string_count = 0; - int longest_upper_run_len = 0; - int upper_string_count = 0; - int total_alpha_count = 0; - int total_digit_count = 0; - - for (; *str != '\0'; str += *(lengths++)) { - len++; - if (word->uch_set->get_isupper (str, *lengths)) { - total_alpha_count++; - switch (state) { - case SUBSEQUENT_UPPER: - case FIRST_UPPER: - state = SUBSEQUENT_UPPER; - upper_string_count++; - if (longest_upper_run_len < upper_string_count) - longest_upper_run_len = upper_string_count; - if (last_char == word->uch_set->unichar_to_id(str, *lengths)) { - alpha_repetition_count++; - if (longest_alpha_repetition_count < alpha_repetition_count) { - longest_alpha_repetition_count = alpha_repetition_count; - } - } - else { - last_char = word->uch_set->unichar_to_id(str, *lengths); - alpha_repetition_count = 1; - } - break; - case FIRST_NUM: - isolated_digits++; - default: - state = FIRST_UPPER; - last_char = word->uch_set->unichar_to_id(str, *lengths); - alpha_repetition_count = 1; - upper_string_count = 1; - break; - } - } - else if (word->uch_set->get_islower (str, *lengths)) { - total_alpha_count++; - switch (state) { - case SUBSEQUENT_LOWER: - case FIRST_LOWER: - state = SUBSEQUENT_LOWER; - lower_string_count++; - if (longest_lower_run_len < lower_string_count) - longest_lower_run_len = lower_string_count; - if (last_char == word->uch_set->unichar_to_id(str, *lengths)) { - alpha_repetition_count++; - if (longest_alpha_repetition_count < alpha_repetition_count) { - longest_alpha_repetition_count = alpha_repetition_count; - } - } - else { - last_char = word->uch_set->unichar_to_id(str, *lengths); - alpha_repetition_count = 1; - } - break; - case FIRST_NUM: - isolated_digits++; - default: - state = FIRST_LOWER; - last_char = word->uch_set->unichar_to_id(str, *lengths); - alpha_repetition_count = 1; - lower_string_count = 1; - break; - } - } - else if (word->uch_set->get_isdigit (str, *lengths)) { - total_digit_count++; - switch (state) { - case FIRST_NUM: - state = SUBSEQUENT_NUM; - case SUBSEQUENT_NUM: - break; - case FIRST_UPPER: - case FIRST_LOWER: - isolated_alphas++; - default: - state = FIRST_NUM; - break; - } - } - else { - if (*lengths == 1 && *str == ' ') - tess_rejs++; - else - bad_char_count++; - switch (state) { - case FIRST_NUM: - isolated_digits++; - break; - case FIRST_UPPER: - case FIRST_LOWER: - isolated_alphas++; - default: - break; - } - state = JUNK; - } - } - - switch (state) { - case FIRST_NUM: - isolated_digits++; - break; - case FIRST_UPPER: - case FIRST_LOWER: - isolated_alphas++; - default: - break; - } - - if (crunch_include_numerals) { - total_alpha_count += total_digit_count - isolated_digits; - } - - if (crunch_leave_ok_strings && len >= 4 && - 2 * (total_alpha_count - isolated_alphas) > len && - longest_alpha_repetition_count < crunch_long_repetitions) { - if ((crunch_accept_ok && - acceptable_word_string(*word->uch_set, str, lengths) != - AC_UNACCEPTABLE) || - longest_lower_run_len > crunch_leave_lc_strings || - longest_upper_run_len > crunch_leave_uc_strings) - return G_NEVER_CRUNCH; - } - if (word->reject_map.length() > 1 && - strpbrk(str, " ") == nullptr && - (word->best_choice->permuter() == SYSTEM_DAWG_PERM || - word->best_choice->permuter() == FREQ_DAWG_PERM || - word->best_choice->permuter() == USER_DAWG_PERM || - word->best_choice->permuter() == NUMBER_PERM || - acceptable_word_string(*word->uch_set, str, lengths) != - AC_UNACCEPTABLE || ok_dict_word)) - return G_OK; - - ok_chars = len - bad_char_count - isolated_digits - - isolated_alphas - tess_rejs; - - if (crunch_debug > 3) { - tprintf("garbage_word: \"%s\"\n", - word->best_choice->unichar_string().string()); - tprintf("LEN: %d bad: %d iso_N: %d iso_A: %d rej: %d\n", - len, - bad_char_count, isolated_digits, isolated_alphas, tess_rejs); - } - if (bad_char_count == 0 && - tess_rejs == 0 && - (len > isolated_digits + isolated_alphas || len <= 2)) - return G_OK; - - if (tess_rejs > ok_chars || - (tess_rejs > 0 && (bad_char_count + tess_rejs) * 2 > len)) - return G_TERRIBLE; - - if (len > 4) { - dodgy_chars = 2 * tess_rejs + bad_char_count + isolated_digits + - isolated_alphas; - if (dodgy_chars > 5 || (dodgy_chars / (float) len) > 0.5) - return G_DODGY; - else - return G_OK; - } else { - dodgy_chars = 2 * tess_rejs + bad_char_count; - if ((len == 4 && dodgy_chars > 2) || - (len == 3 && dodgy_chars > 2) || dodgy_chars >= len) - return G_DODGY; - else - return G_OK; - } -} - - -/************************************************************************* - * word_deletable() - * DELETE WERDS AT ENDS OF ROWS IF - * Word is crunched && - * ( string length = 0 OR - * > 50% of chars are "|" (before merging) OR - * certainty < -10 OR - * rating /char > 60 OR - * TOP of word is more than 0.5 xht BELOW baseline OR - * BOTTOM of word is more than 0.5 xht ABOVE xht OR - * length of word < 3xht OR - * height of word < 0.7 xht OR - * height of word > 3.0 xht OR - * >75% of the outline BBs have longest dimension < 0.5xht - *************************************************************************/ - -CRUNCH_MODE Tesseract::word_deletable(WERD_RES *word, int16_t &delete_mode) { - int word_len = word->reject_map.length (); - float rating_per_ch; - TBOX box; //BB of word - - if (word->unlv_crunch_mode == CR_NONE) { - delete_mode = 0; - return CR_NONE; - } - - if (word_len == 0) { - delete_mode = 1; - return CR_DELETE; - } - - if (word->rebuild_word != nullptr) { - // Cube leaves rebuild_word nullptr. - box = word->rebuild_word->bounding_box(); - if (box.height () < crunch_del_min_ht * kBlnXHeight) { - delete_mode = 4; - return CR_DELETE; - } - - if (noise_outlines(word->rebuild_word)) { - delete_mode = 5; - return CR_DELETE; - } - } - - if ((failure_count (word) * 1.5) > word_len) { - delete_mode = 2; - return CR_LOOSE_SPACE; - } - - if (word->best_choice->certainty () < crunch_del_cert) { - delete_mode = 7; - return CR_LOOSE_SPACE; - } - - rating_per_ch = word->best_choice->rating () / word_len; - - if (rating_per_ch > crunch_del_rating) { - delete_mode = 8; - return CR_LOOSE_SPACE; - } - - if (box.top () < kBlnBaselineOffset - crunch_del_low_word * kBlnXHeight) { - delete_mode = 9; - return CR_LOOSE_SPACE; - } - - if (box.bottom () > - kBlnBaselineOffset + crunch_del_high_word * kBlnXHeight) { - delete_mode = 10; - return CR_LOOSE_SPACE; - } - - if (box.height () > crunch_del_max_ht * kBlnXHeight) { - delete_mode = 11; - return CR_LOOSE_SPACE; - } - - if (box.width () < crunch_del_min_width * kBlnXHeight) { - delete_mode = 3; - return CR_LOOSE_SPACE; - } - - delete_mode = 0; - return CR_NONE; -} - -int16_t Tesseract::failure_count(WERD_RES *word) { - const char *str = word->best_choice->unichar_string().string(); - int tess_rejs = 0; - - for (; *str != '\0'; str++) { - if (*str == ' ') - tess_rejs++; - } - return tess_rejs; -} - - -bool Tesseract::noise_outlines(TWERD* word) { - TBOX box; // BB of outline - int16_t outline_count = 0; - int16_t small_outline_count = 0; - int16_t max_dimension; - float small_limit = kBlnXHeight * crunch_small_outlines_size; - - for (int b = 0; b < word->NumBlobs(); ++b) { - TBLOB* blob = word->blobs[b]; - for (TESSLINE* ol = blob->outlines; ol != nullptr; ol = ol->next) { - outline_count++; - box = ol->bounding_box(); - if (box.height() > box.width()) - max_dimension = box.height(); - else - max_dimension = box.width(); - if (max_dimension < small_limit) - small_outline_count++; - } - } - return small_outline_count >= outline_count; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/docqual.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/docqual.h deleted file mode 100644 index 22f40e21..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/docqual.h +++ /dev/null @@ -1,40 +0,0 @@ -/****************************************************************** - * File: docqual.h (Formerly docqual.h) - * Description: Document Quality Metrics - * Author: Phil Cheatle - * Created: Mon May 9 11:27:28 BST 1994 - * - * (C) Copyright 1994, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef DOCQUAL_H -#define DOCQUAL_H - -#include // for int16_t - -class PAGE_RES_IT; -class ROW; -class WERD_RES; - -enum GARBAGE_LEVEL -{ - G_NEVER_CRUNCH, - G_OK, - G_DODGY, - G_TERRIBLE -}; - -int16_t word_blob_quality(WERD_RES *word, ROW *row); -void reject_whole_page(PAGE_RES_IT &page_res_it); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/equationdetect.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/equationdetect.cpp deleted file mode 100644 index 9c927531..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/equationdetect.cpp +++ /dev/null @@ -1,1520 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: equationdetect.cpp -// Description: Helper classes to detect equations. -// Author: Zongyi (Joe) Liu (joeliu@google.com) -// Created: Fri Aug 31 11:13:01 PST 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef __MINGW32__ -#include -#endif - -#include -#include -#include -#include - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "equationdetect.h" - -#include "bbgrid.h" -#include "classify.h" -#include "colpartition.h" -#include "colpartitiongrid.h" -#include "colpartitionset.h" -#include "helpers.h" -#include "ratngs.h" -#include "tesseractclass.h" - -// Config variables. -BOOL_VAR(equationdetect_save_bi_image, false, "Save input bi image"); -BOOL_VAR(equationdetect_save_spt_image, false, "Save special character image"); -BOOL_VAR(equationdetect_save_seed_image, false, "Save the seed image"); -BOOL_VAR(equationdetect_save_merged_image, false, "Save the merged image"); - -namespace tesseract { - -/////////////////////////////////////////////////////////////////////////// -// Utility ColParition sort functions. -/////////////////////////////////////////////////////////////////////////// -static int SortCPByTopReverse(const void* p1, const void* p2) { - const ColPartition* cp1 = *static_cast(p1); - const ColPartition* cp2 = *static_cast(p2); - ASSERT_HOST(cp1 != nullptr && cp2 != nullptr); - const TBOX &box1(cp1->bounding_box()), &box2(cp2->bounding_box()); - return box2.top() - box1.top(); -} - -static int SortCPByBottom(const void* p1, const void* p2) { - const ColPartition* cp1 = *static_cast(p1); - const ColPartition* cp2 = *static_cast(p2); - ASSERT_HOST(cp1 != nullptr && cp2 != nullptr); - const TBOX &box1(cp1->bounding_box()), &box2(cp2->bounding_box()); - return box1.bottom() - box2.bottom(); -} - -static int SortCPByHeight(const void* p1, const void* p2) { - const ColPartition* cp1 = *static_cast(p1); - const ColPartition* cp2 = *static_cast(p2); - ASSERT_HOST(cp1 != nullptr && cp2 != nullptr); - const TBOX &box1(cp1->bounding_box()), &box2(cp2->bounding_box()); - return box1.height() - box2.height(); -} - -// TODO(joeliu): we may want to parameterize these constants. -const float kMathDigitDensityTh1 = 0.25; -const float kMathDigitDensityTh2 = 0.1; -const float kMathItalicDensityTh = 0.5; -const float kUnclearDensityTh = 0.25; -const int kSeedBlobsCountTh = 10; -const int kLeftIndentAlignmentCountTh = 1; - -// Returns true if PolyBlockType is of text type or equation type. -inline bool IsTextOrEquationType(PolyBlockType type) { - return PTIsTextType(type) || type == PT_EQUATION; -} - -inline bool IsLeftIndented(const EquationDetect::IndentType type) { - return type == EquationDetect::LEFT_INDENT || - type == EquationDetect::BOTH_INDENT; -} - -inline bool IsRightIndented(const EquationDetect::IndentType type) { - return type == EquationDetect::RIGHT_INDENT || - type == EquationDetect::BOTH_INDENT; -} - -EquationDetect::EquationDetect(const char* equ_datapath, - const char* equ_name) { - const char* default_name = "equ"; - if (equ_name == nullptr) { - equ_name = default_name; - } - lang_tesseract_ = nullptr; - resolution_ = 0; - page_count_ = 0; - - if (equ_tesseract_.init_tesseract(equ_datapath, equ_name, - OEM_TESSERACT_ONLY)) { - tprintf("Warning: equation region detection requested," - " but %s failed to load from %s\n", equ_name, equ_datapath); - } - - cps_super_bbox_ = nullptr; -} - -EquationDetect::~EquationDetect() { delete (cps_super_bbox_); } - -void EquationDetect::SetLangTesseract(Tesseract* lang_tesseract) { - lang_tesseract_ = lang_tesseract; -} - -void EquationDetect::SetResolution(const int resolution) { - resolution_ = resolution; -} - -int EquationDetect::LabelSpecialText(TO_BLOCK* to_block) { - if (to_block == nullptr) { - tprintf("Warning: input to_block is nullptr!\n"); - return -1; - } - - GenericVector blob_lists; - blob_lists.push_back(&(to_block->blobs)); - blob_lists.push_back(&(to_block->large_blobs)); - for (int i = 0; i < blob_lists.size(); ++i) { - BLOBNBOX_IT bbox_it(blob_lists[i]); - for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list(); - bbox_it.forward()) { - bbox_it.data()->set_special_text_type(BSTT_NONE); - } - } - - return 0; -} - -void EquationDetect::IdentifySpecialText( - BLOBNBOX *blobnbox, const int height_th) { - ASSERT_HOST(blobnbox != nullptr); - if (blobnbox->bounding_box().height() < height_th && height_th > 0) { - // For small blob, we simply set to BSTT_NONE. - blobnbox->set_special_text_type(BSTT_NONE); - return; - } - - BLOB_CHOICE_LIST ratings_equ, ratings_lang; - C_BLOB* blob = blobnbox->cblob(); - // TODO(joeliu/rays) Fix this. We may have to normalize separately for - // each classifier here, as they may require different PolygonalCopy. - TBLOB* tblob = TBLOB::PolygonalCopy(false, blob); - const TBOX& box = tblob->bounding_box(); - - // Normalize the blob. Set the origin to the place we want to be the - // bottom-middle, and scaling is to make the height the x-height. - const float scaling = static_cast(kBlnXHeight) / box.height(); - const float x_orig = (box.left() + box.right()) / 2.0f, y_orig = box.bottom(); - std::unique_ptr normed_blob(new TBLOB(*tblob)); - normed_blob->Normalize(nullptr, nullptr, nullptr, x_orig, y_orig, scaling, scaling, - 0.0f, static_cast(kBlnBaselineOffset), - false, nullptr); - equ_tesseract_.AdaptiveClassifier(normed_blob.get(), &ratings_equ); - lang_tesseract_->AdaptiveClassifier(normed_blob.get(), &ratings_lang); - delete tblob; - - // Get the best choice from ratings_lang and rating_equ. As the choice in the - // list has already been sorted by the certainty, we simply use the first - // choice. - BLOB_CHOICE *lang_choice = nullptr, *equ_choice = nullptr; - if (ratings_lang.length() > 0) { - BLOB_CHOICE_IT choice_it(&ratings_lang); - lang_choice = choice_it.data(); - } - if (ratings_equ.length() > 0) { - BLOB_CHOICE_IT choice_it(&ratings_equ); - equ_choice = choice_it.data(); - } - - const float lang_score = lang_choice ? lang_choice->certainty() : -FLT_MAX; - const float equ_score = equ_choice ? equ_choice->certainty() : -FLT_MAX; - - const float kConfScoreTh = -5.0f, kConfDiffTh = 1.8; - // The scores here are negative, so the max/min == fabs(min/max). - // float ratio = fmax(lang_score, equ_score) / fmin(lang_score, equ_score); - const float diff = fabs(lang_score - equ_score); - BlobSpecialTextType type = BSTT_NONE; - - // Classification. - if (fmax(lang_score, equ_score) < kConfScoreTh) { - // If both score are very small, then mark it as unclear. - type = BSTT_UNCLEAR; - } else if (diff > kConfDiffTh && equ_score > lang_score) { - // If equ_score is significantly higher, then we classify this character as - // math symbol. - type = BSTT_MATH; - } else if (lang_choice) { - // For other cases: lang_score is similar or significantly higher. - type = EstimateTypeForUnichar( - lang_tesseract_->unicharset, lang_choice->unichar_id()); - } - - if (type == BSTT_NONE && lang_tesseract_->get_fontinfo_table().get( - lang_choice->fontinfo_id()).is_italic()) { - // For text symbol, we still check if it is italic. - blobnbox->set_special_text_type(BSTT_ITALIC); - } else { - blobnbox->set_special_text_type(type); - } -} - -BlobSpecialTextType EquationDetect::EstimateTypeForUnichar( - const UNICHARSET& unicharset, const UNICHAR_ID id) const { - const STRING s = unicharset.id_to_unichar(id); - if (unicharset.get_isalpha(id)) { - return BSTT_NONE; - } - - if (unicharset.get_ispunctuation(id)) { - // Exclude some special texts that are likely to be confused as math symbol. - static GenericVector ids_to_exclude; - if (ids_to_exclude.empty()) { - static const STRING kCharsToEx[] = {"'", "`", "\"", "\\", ",", ".", - "", "", "", "", "", "", ""}; - int i = 0; - while (kCharsToEx[i] != "") { - ids_to_exclude.push_back( - unicharset.unichar_to_id(kCharsToEx[i++].string())); - } - ids_to_exclude.sort(); - } - return ids_to_exclude.bool_binary_search(id) ? BSTT_NONE : BSTT_MATH; - } - - // Check if it is digit. In addition to the isdigit attribute, we also check - // if this character belongs to those likely to be confused with a digit. - static const STRING kDigitsChars = "|"; - if (unicharset.get_isdigit(id) || - (s.length() == 1 && kDigitsChars.contains(s[0]))) { - return BSTT_DIGIT; - } else { - return BSTT_MATH; - } -} - -void EquationDetect::IdentifySpecialText() { - // Set configuration for Tesseract::AdaptiveClassifier. - equ_tesseract_.tess_cn_matching.set_value(1); // turn it on - equ_tesseract_.tess_bn_matching.set_value(0); - - // Set the multiplier to zero for lang_tesseract_ to improve the accuracy. - const int classify_class_pruner = lang_tesseract_->classify_class_pruner_multiplier; - const int classify_integer_matcher = - lang_tesseract_->classify_integer_matcher_multiplier; - lang_tesseract_->classify_class_pruner_multiplier.set_value(0); - lang_tesseract_->classify_integer_matcher_multiplier.set_value(0); - - ColPartitionGridSearch gsearch(part_grid_); - ColPartition *part = nullptr; - gsearch.StartFullSearch(); - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (!IsTextOrEquationType(part->type())) { - continue; - } - IdentifyBlobsToSkip(part); - BLOBNBOX_C_IT bbox_it(part->boxes()); - // Compute the height threshold. - GenericVector blob_heights; - for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list(); - bbox_it.forward()) { - if (bbox_it.data()->special_text_type() != BSTT_SKIP) { - blob_heights.push_back(bbox_it.data()->bounding_box().height()); - } - } - blob_heights.sort(); - const int height_th = blob_heights[blob_heights.size() / 2] / 3 * 2; - for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list(); - bbox_it.forward()) { - if (bbox_it.data()->special_text_type() != BSTT_SKIP) { - IdentifySpecialText(bbox_it.data(), height_th); - } - } - } - - // Set the multiplier values back. - lang_tesseract_->classify_class_pruner_multiplier.set_value( - classify_class_pruner); - lang_tesseract_->classify_integer_matcher_multiplier.set_value( - classify_integer_matcher); - - if (equationdetect_save_spt_image) { // For debug. - STRING outfile; - GetOutputTiffName("_spt", &outfile); - PaintSpecialTexts(outfile); - } -} - -void EquationDetect::IdentifyBlobsToSkip(ColPartition* part) { - ASSERT_HOST(part); - BLOBNBOX_C_IT blob_it(part->boxes()); - - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - // At this moment, no blob should have been joined. - ASSERT_HOST(!blob_it.data()->joined_to_prev()); - } - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - if (blob->joined_to_prev() || blob->special_text_type() == BSTT_SKIP) { - continue; - } - TBOX blob_box = blob->bounding_box(); - - // Search if any blob can be merged into blob. If found, then we mark all - // these blobs as BSTT_SKIP. - BLOBNBOX_C_IT blob_it2 = blob_it; - bool found = false; - while (!blob_it2.at_last()) { - BLOBNBOX* nextblob = blob_it2.forward(); - const TBOX& nextblob_box = nextblob->bounding_box(); - if (nextblob_box.left() >= blob_box.right()) { - break; - } - const float kWidthR = 0.4, kHeightR = 0.3; - const bool xoverlap = blob_box.major_x_overlap(nextblob_box), - yoverlap = blob_box.y_overlap(nextblob_box); - const float widthR = static_cast( - std::min(nextblob_box.width(), blob_box.width())) / - std::max(nextblob_box.width(), blob_box.width()); - const float heightR = static_cast( - std::min(nextblob_box.height(), blob_box.height())) / - std::max(nextblob_box.height(), blob_box.height()); - - if (xoverlap && yoverlap && widthR > kWidthR && heightR > kHeightR) { - // Found one, set nextblob type and recompute blob_box. - found = true; - nextblob->set_special_text_type(BSTT_SKIP); - blob_box += nextblob_box; - } - } - if (found) { - blob->set_special_text_type(BSTT_SKIP); - } - } -} - -int EquationDetect::FindEquationParts( - ColPartitionGrid* part_grid, ColPartitionSet** best_columns) { - if (!lang_tesseract_) { - tprintf("Warning: lang_tesseract_ is nullptr!\n"); - return -1; - } - if (!part_grid || !best_columns) { - tprintf("part_grid/best_columns is nullptr!!\n"); - return -1; - } - cp_seeds_.clear(); - part_grid_ = part_grid; - best_columns_ = best_columns; - resolution_ = lang_tesseract_->source_resolution(); - STRING outfile; - page_count_++; - - if (equationdetect_save_bi_image) { - GetOutputTiffName("_bi", &outfile); - pixWrite(outfile.string(), lang_tesseract_->pix_binary(), IFF_TIFF_G4); - } - - // Pass 0: Compute special text type for blobs. - IdentifySpecialText(); - - // Pass 1: Merge parts by overlap. - MergePartsByLocation(); - - // Pass 2: compute the math blob density and find the seed partition. - IdentifySeedParts(); - // We still need separate seed into block seed and inline seed partition. - IdentifyInlineParts(); - - if (equationdetect_save_seed_image) { - GetOutputTiffName("_seed", &outfile); - PaintColParts(outfile); - } - - // Pass 3: expand block equation seeds. - while (!cp_seeds_.empty()) { - GenericVector seeds_expanded; - for (int i = 0; i < cp_seeds_.size(); ++i) { - if (ExpandSeed(cp_seeds_[i])) { - // If this seed is expanded, then we add it into seeds_expanded. Note - // this seed has been removed from part_grid_ if it is expanded. - seeds_expanded.push_back(cp_seeds_[i]); - } - } - // Add seeds_expanded back into part_grid_ and reset cp_seeds_. - for (int i = 0; i < seeds_expanded.size(); ++i) { - InsertPartAfterAbsorb(seeds_expanded[i]); - } - cp_seeds_ = seeds_expanded; - } - - // Pass 4: find math block satellite text partitions and merge them. - ProcessMathBlockSatelliteParts(); - - if (equationdetect_save_merged_image) { // For debug. - GetOutputTiffName("_merged", &outfile); - PaintColParts(outfile); - } - - return 0; -} - -void EquationDetect::MergePartsByLocation() { - while (true) { - ColPartition* part = nullptr; - // partitions that have been updated. - GenericVector parts_updated; - ColPartitionGridSearch gsearch(part_grid_); - gsearch.StartFullSearch(); - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (!IsTextOrEquationType(part->type())) { - continue; - } - GenericVector parts_to_merge; - SearchByOverlap(part, &parts_to_merge); - if (parts_to_merge.empty()) { - continue; - } - - // Merge parts_to_merge with part, and remove them from part_grid_. - part_grid_->RemoveBBox(part); - for (int i = 0; i < parts_to_merge.size(); ++i) { - ASSERT_HOST(parts_to_merge[i] != nullptr && parts_to_merge[i] != part); - part->Absorb(parts_to_merge[i], nullptr); - } - gsearch.RepositionIterator(); - - parts_updated.push_back(part); - } - - if (parts_updated.empty()) { // Exit the loop - break; - } - - // Re-insert parts_updated into part_grid_. - for (int i = 0; i < parts_updated.size(); ++i) { - InsertPartAfterAbsorb(parts_updated[i]); - } - } -} - -void EquationDetect::SearchByOverlap( - ColPartition* seed, - GenericVector* parts_overlap) { - ASSERT_HOST(seed != nullptr && parts_overlap != nullptr); - if (!IsTextOrEquationType(seed->type())) { - return; - } - ColPartitionGridSearch search(part_grid_); - const TBOX& seed_box(seed->bounding_box()); - const int kRadNeighborCells = 30; - search.StartRadSearch((seed_box.left() + seed_box.right()) / 2, - (seed_box.top() + seed_box.bottom()) / 2, - kRadNeighborCells); - search.SetUniqueMode(true); - - // Search iteratively. - ColPartition *part; - GenericVector parts; - const float kLargeOverlapTh = 0.95; - const float kEquXOverlap = 0.4, kEquYOverlap = 0.5; - while ((part = search.NextRadSearch()) != nullptr) { - if (part == seed || !IsTextOrEquationType(part->type())) { - continue; - } - const TBOX& part_box(part->bounding_box()); - bool merge = false; - - const float x_overlap_fraction = part_box.x_overlap_fraction(seed_box), - y_overlap_fraction = part_box.y_overlap_fraction(seed_box); - - // If part is large overlapped with seed, then set merge to true. - if (x_overlap_fraction >= kLargeOverlapTh && - y_overlap_fraction >= kLargeOverlapTh) { - merge = true; - } else if (seed->type() == PT_EQUATION && - IsTextOrEquationType(part->type())) { - if ((x_overlap_fraction > kEquXOverlap && y_overlap_fraction > 0.0) || - (x_overlap_fraction > 0.0 && y_overlap_fraction > kEquYOverlap)) { - merge = true; - } - } - - if (merge) { // Remove the part from search and put it into parts. - search.RemoveBBox(); - parts_overlap->push_back(part); - } - } -} - -void EquationDetect::InsertPartAfterAbsorb(ColPartition* part) { - ASSERT_HOST(part); - - // Before insert part back into part_grid_, we will need re-compute some - // of its attributes such as first_column_, last_column_. However, we still - // want to preserve its type. - BlobTextFlowType flow_type = part->flow(); - PolyBlockType part_type = part->type(); - BlobRegionType blob_type = part->blob_type(); - - // Call SetPartitionType to re-compute the attributes of part. - const TBOX& part_box(part->bounding_box()); - int grid_x, grid_y; - part_grid_->GridCoords( - part_box.left(), part_box.bottom(), &grid_x, &grid_y); - part->SetPartitionType(resolution_, best_columns_[grid_y]); - - // Reset the types back. - part->set_type(part_type); - part->set_blob_type(blob_type); - part->set_flow(flow_type); - part->SetBlobTypes(); - - // Insert into part_grid_. - part_grid_->InsertBBox(true, true, part); -} - -void EquationDetect::IdentifySeedParts() { - ColPartitionGridSearch gsearch(part_grid_); - ColPartition *part = nullptr; - gsearch.StartFullSearch(); - - GenericVector seeds1, seeds2; - // The left coordinates of indented text partitions. - GenericVector indented_texts_left; - // The foreground density of text partitions. - GenericVector texts_foreground_density; - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (!IsTextOrEquationType(part->type())) { - continue; - } - part->ComputeSpecialBlobsDensity(); - const bool blobs_check = CheckSeedBlobsCount(part); - const int kTextBlobsTh = 20; - - if (CheckSeedDensity(kMathDigitDensityTh1, kMathDigitDensityTh2, part) && - blobs_check) { - // Passed high density threshold test, save into seeds1. - seeds1.push_back(part); - } else { - IndentType indent = IsIndented(part); - if (IsLeftIndented(indent) && blobs_check && - CheckSeedDensity(kMathDigitDensityTh2, kMathDigitDensityTh2, part)) { - // Passed low density threshold test and is indented, save into seeds2. - seeds2.push_back(part); - } else if (!IsRightIndented(indent) && - part->boxes_count() > kTextBlobsTh) { - // This is likely to be a text part, save the features. - const TBOX&box = part->bounding_box(); - if (IsLeftIndented(indent)) { - indented_texts_left.push_back(box.left()); - } - texts_foreground_density.push_back(ComputeForegroundDensity(box)); - } - } - } - - // Sort the features collected from text regions. - indented_texts_left.sort(); - texts_foreground_density.sort(); - float foreground_density_th = 0.15; // Default value. - if (!texts_foreground_density.empty()) { - // Use the median of the texts_foreground_density. - foreground_density_th = 0.8 * texts_foreground_density[ - texts_foreground_density.size() / 2]; - } - - for (int i = 0; i < seeds1.size(); ++i) { - const TBOX& box = seeds1[i]->bounding_box(); - if (CheckSeedFgDensity(foreground_density_th, seeds1[i]) && - !(IsLeftIndented(IsIndented(seeds1[i])) && - CountAlignment(indented_texts_left, box.left()) >= - kLeftIndentAlignmentCountTh)) { - // Mark as PT_EQUATION type. - seeds1[i]->set_type(PT_EQUATION); - cp_seeds_.push_back(seeds1[i]); - } else { // Mark as PT_INLINE_EQUATION type. - seeds1[i]->set_type(PT_INLINE_EQUATION); - } - } - - for (int i = 0; i < seeds2.size(); ++i) { - if (CheckForSeed2(indented_texts_left, foreground_density_th, seeds2[i])) { - seeds2[i]->set_type(PT_EQUATION); - cp_seeds_.push_back(seeds2[i]); - } - } -} - -float EquationDetect::ComputeForegroundDensity(const TBOX& tbox) { - Pix *pix_bi = lang_tesseract_->pix_binary(); - const int pix_height = pixGetHeight(pix_bi); - Box* box = boxCreate(tbox.left(), pix_height - tbox.top(), - tbox.width(), tbox.height()); - Pix *pix_sub = pixClipRectangle(pix_bi, box, nullptr); - l_float32 fract; - pixForegroundFraction(pix_sub, &fract); - pixDestroy(&pix_sub); - boxDestroy(&box); - - return fract; -} - -bool EquationDetect::CheckSeedFgDensity(const float density_th, - ColPartition* part) { - ASSERT_HOST(part); - - // Split part horizontall, and check for each sub part. - GenericVector sub_boxes; - SplitCPHorLite(part, &sub_boxes); - float parts_passed = 0.0; - for (int i = 0; i < sub_boxes.size(); ++i) { - const float density = ComputeForegroundDensity(sub_boxes[i]); - if (density < density_th) { - parts_passed++; - } - } - - // If most sub parts passed, then we return true. - const float kSeedPartRatioTh = 0.3; - bool retval = (parts_passed / sub_boxes.size() >= kSeedPartRatioTh); - - return retval; -} - -void EquationDetect::SplitCPHor(ColPartition* part, - GenericVector* parts_splitted) { - ASSERT_HOST(part && parts_splitted); - if (part->median_width() == 0 || part->boxes_count() == 0) { - return; - } - - // Make a copy of part, and reset parts_splitted. - ColPartition* right_part = part->CopyButDontOwnBlobs(); - parts_splitted->delete_data_pointers(); - parts_splitted->clear(); - - const double kThreshold = part->median_width() * 3.0; - bool found_split = true; - while (found_split) { - found_split = false; - BLOBNBOX_C_IT box_it(right_part->boxes()); - // Blobs are sorted left side first. If blobs overlap, - // the previous blob may have a "more right" right side. - // Account for this by always keeping the largest "right" - // so far. - int previous_right = INT32_MIN; - - // Look for the next split in the partition. - for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) { - const TBOX& box = box_it.data()->bounding_box(); - if (previous_right != INT32_MIN && - box.left() - previous_right > kThreshold) { - // We have a split position. Split the partition in two pieces. - // Insert the left piece in the grid and keep processing the right. - const int mid_x = (box.left() + previous_right) / 2; - ColPartition* left_part = right_part; - right_part = left_part->SplitAt(mid_x); - - parts_splitted->push_back(left_part); - left_part->ComputeSpecialBlobsDensity(); - found_split = true; - break; - } - - // The right side of the previous blobs. - previous_right = std::max(previous_right, static_cast(box.right())); - } - } - - // Add the last piece. - right_part->ComputeSpecialBlobsDensity(); - parts_splitted->push_back(right_part); -} - -void EquationDetect::SplitCPHorLite(ColPartition* part, - GenericVector* splitted_boxes) { - ASSERT_HOST(part && splitted_boxes); - splitted_boxes->clear(); - if (part->median_width() == 0) { - return; - } - - const double kThreshold = part->median_width() * 3.0; - - // Blobs are sorted left side first. If blobs overlap, - // the previous blob may have a "more right" right side. - // Account for this by always keeping the largest "right" - // so far. - TBOX union_box; - int previous_right = INT32_MIN; - BLOBNBOX_C_IT box_it(part->boxes()); - for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) { - const TBOX& box = box_it.data()->bounding_box(); - if (previous_right != INT32_MIN && - box.left() - previous_right > kThreshold) { - // We have a split position. - splitted_boxes->push_back(union_box); - previous_right = INT32_MIN; - } - if (previous_right == INT32_MIN) { - union_box = box; - } else { - union_box += box; - } - // The right side of the previous blobs. - previous_right = std::max(previous_right, static_cast(box.right())); - } - - // Add the last piece. - if (previous_right != INT32_MIN) { - splitted_boxes->push_back(union_box); - } -} - -bool EquationDetect::CheckForSeed2( - const GenericVector& indented_texts_left, - const float foreground_density_th, - ColPartition* part) { - ASSERT_HOST(part); - const TBOX& box = part->bounding_box(); - - // Check if it is aligned with any indented_texts_left. - if (!indented_texts_left.empty() && - CountAlignment(indented_texts_left, box.left()) >= - kLeftIndentAlignmentCountTh) { - return false; - } - - // Check the foreground density. - if (ComputeForegroundDensity(box) > foreground_density_th) { - return false; - } - - return true; -} - -int EquationDetect::CountAlignment( - const GenericVector& sorted_vec, const int val) const { - if (sorted_vec.empty()) { - return 0; - } - const int kDistTh = static_cast(roundf(0.03 * resolution_)); - const int pos = sorted_vec.binary_search(val); - int count = 0; - - // Search left side. - int index = pos; - while (index >= 0 && abs(val - sorted_vec[index--]) < kDistTh) { - count++; - } - - // Search right side. - index = pos + 1; - while (index < sorted_vec.size() && sorted_vec[index++] - val < kDistTh) { - count++; - } - - return count; -} - -void EquationDetect::IdentifyInlineParts() { - ComputeCPsSuperBBox(); - IdentifyInlinePartsHorizontal(); - const int textparts_linespacing = EstimateTextPartLineSpacing(); - IdentifyInlinePartsVertical(true, textparts_linespacing); - IdentifyInlinePartsVertical(false, textparts_linespacing); -} - -void EquationDetect::ComputeCPsSuperBBox() { - ColPartitionGridSearch gsearch(part_grid_); - ColPartition *part = nullptr; - gsearch.StartFullSearch(); - delete cps_super_bbox_; - cps_super_bbox_ = new TBOX(); - while ((part = gsearch.NextFullSearch()) != nullptr) { - (*cps_super_bbox_) += part->bounding_box(); - } -} - -void EquationDetect::IdentifyInlinePartsHorizontal() { - ASSERT_HOST(cps_super_bbox_); - GenericVector new_seeds; - const int kMarginDiffTh = IntCastRounded( - 0.5 * lang_tesseract_->source_resolution()); - const int kGapTh = static_cast(roundf( - 1.0 * lang_tesseract_->source_resolution())); - ColPartitionGridSearch search(part_grid_); - search.SetUniqueMode(true); - // The center x coordinate of the cp_super_bbox_. - const int cps_cx = cps_super_bbox_->left() + cps_super_bbox_->width() / 2; - for (int i = 0; i < cp_seeds_.size(); ++i) { - ColPartition* part = cp_seeds_[i]; - const TBOX& part_box(part->bounding_box()); - const int left_margin = part_box.left() - cps_super_bbox_->left(), - right_margin = cps_super_bbox_->right() - part_box.right(); - bool right_to_left; - if (left_margin + kMarginDiffTh < right_margin && - left_margin < kMarginDiffTh) { - // part is left aligned, so we search if it has any right neighbor. - search.StartSideSearch( - part_box.right(), part_box.top(), part_box.bottom()); - right_to_left = false; - } else if (left_margin > cps_cx) { - // part locates on the right half on image, so search if it has any left - // neighbor. - search.StartSideSearch( - part_box.left(), part_box.top(), part_box.bottom()); - right_to_left = true; - } else { // part is not an inline equation. - new_seeds.push_back(part); - continue; - } - ColPartition* neighbor = nullptr; - bool side_neighbor_found = false; - while ((neighbor = search.NextSideSearch(right_to_left)) != nullptr) { - const TBOX& neighbor_box(neighbor->bounding_box()); - if (!IsTextOrEquationType(neighbor->type()) || - part_box.x_gap(neighbor_box) > kGapTh || - !part_box.major_y_overlap(neighbor_box) || - part_box.major_x_overlap(neighbor_box)) { - continue; - } - // We have found one. Set the side_neighbor_found flag. - side_neighbor_found = true; - break; - } - if (!side_neighbor_found) { // Mark part as PT_INLINE_EQUATION. - part->set_type(PT_INLINE_EQUATION); - } else { - // Check the geometric feature of neighbor. - const TBOX& neighbor_box(neighbor->bounding_box()); - if (neighbor_box.width() > part_box.width() && - neighbor->type() != PT_EQUATION) { // Mark as PT_INLINE_EQUATION. - part->set_type(PT_INLINE_EQUATION); - } else { // part is not an inline equation type. - new_seeds.push_back(part); - } - } - } - - // Reset the cp_seeds_ using the new_seeds. - cp_seeds_ = new_seeds; -} - -int EquationDetect::EstimateTextPartLineSpacing() { - ColPartitionGridSearch gsearch(part_grid_); - - // Get the y gap between text partitions; - ColPartition *current = nullptr, *prev = nullptr; - gsearch.StartFullSearch(); - GenericVector ygaps; - while ((current = gsearch.NextFullSearch()) != nullptr) { - if (!PTIsTextType(current->type())) { - continue; - } - if (prev != nullptr) { - const TBOX ¤t_box = current->bounding_box(); - const TBOX &prev_box = prev->bounding_box(); - // prev and current should be x major overlap and non y overlap. - if (current_box.major_x_overlap(prev_box) && - !current_box.y_overlap(prev_box)) { - int gap = current_box.y_gap(prev_box); - if (gap < std::min(current_box.height(), prev_box.height())) { - // The gap should be smaller than the height of the bounding boxes. - ygaps.push_back(gap); - } - } - } - prev = current; - } - - if (ygaps.size() < 8) { // We do not have enough data. - return -1; - } - - // Compute the line spacing from ygaps: use the mean of the first half. - ygaps.sort(); - int spacing = 0, count; - for (count = 0; count < ygaps.size() / 2; count++) { - spacing += ygaps[count]; - } - return spacing / count; -} - -void EquationDetect::IdentifyInlinePartsVertical( - const bool top_to_bottom, const int textparts_linespacing) { - if (cp_seeds_.empty()) { - return; - } - - // Sort cp_seeds_. - if (top_to_bottom) { // From top to bottom. - cp_seeds_.sort(&SortCPByTopReverse); - } else { // From bottom to top. - cp_seeds_.sort(&SortCPByBottom); - } - - GenericVector new_seeds; - for (int i = 0; i < cp_seeds_.size(); ++i) { - ColPartition* part = cp_seeds_[i]; - // If we sort cp_seeds_ from top to bottom, then for each cp_seeds_, we look - // for its top neighbors, so that if two/more inline regions are connected - // to each other, then we will identify the top one, and then use it to - // identify the bottom one. - if (IsInline(!top_to_bottom, textparts_linespacing, part)) { - part->set_type(PT_INLINE_EQUATION); - } else { - new_seeds.push_back(part); - } - } - cp_seeds_ = new_seeds; -} - -bool EquationDetect::IsInline(const bool search_bottom, - const int textparts_linespacing, - ColPartition* part) { - ASSERT_HOST(part != nullptr); - // Look for its nearest vertical neighbor that hardly overlaps in y but - // largely overlaps in x. - ColPartitionGridSearch search(part_grid_); - ColPartition *neighbor = nullptr; - const TBOX& part_box(part->bounding_box()); - const float kYGapRatioTh = 1.0; - - if (search_bottom) { - search.StartVerticalSearch(part_box.left(), part_box.right(), - part_box.bottom()); - } else { - search.StartVerticalSearch(part_box.left(), part_box.right(), - part_box.top()); - } - search.SetUniqueMode(true); - while ((neighbor = search.NextVerticalSearch(search_bottom)) != nullptr) { - const TBOX& neighbor_box(neighbor->bounding_box()); - if (part_box.y_gap(neighbor_box) > kYGapRatioTh * - std::min(part_box.height(), neighbor_box.height())) { - // Finished searching. - break; - } - if (!PTIsTextType(neighbor->type())) { - continue; - } - - // Check if neighbor and part is inline similar. - const float kHeightRatioTh = 0.5; - const int kYGapTh = textparts_linespacing > 0 ? - textparts_linespacing + static_cast(roundf(0.02 * resolution_)): - static_cast(roundf(0.05 * resolution_)); // Default value. - if (part_box.x_overlap(neighbor_box) && // Location feature. - part_box.y_gap(neighbor_box) <= kYGapTh && // Line spacing. - // Geo feature. - static_cast(std::min(part_box.height(), neighbor_box.height())) / - std::max(part_box.height(), neighbor_box.height()) > kHeightRatioTh) { - return true; - } - } - - return false; -} - -bool EquationDetect::CheckSeedBlobsCount(ColPartition* part) { - if (!part) { - return false; - } - const int kSeedMathBlobsCount = 2; - const int kSeedMathDigitBlobsCount = 5; - - const int blobs = part->boxes_count(), - math_blobs = part->SpecialBlobsCount(BSTT_MATH), - digit_blobs = part->SpecialBlobsCount(BSTT_DIGIT); - if (blobs < kSeedBlobsCountTh || math_blobs <= kSeedMathBlobsCount || - math_blobs + digit_blobs <= kSeedMathDigitBlobsCount) { - return false; - } - - return true; -} - -bool EquationDetect::CheckSeedDensity( - const float math_density_high, - const float math_density_low, - const ColPartition* part) const { - ASSERT_HOST(part); - float math_digit_density = part->SpecialBlobsDensity(BSTT_MATH) - + part->SpecialBlobsDensity(BSTT_DIGIT); - float italic_density = part->SpecialBlobsDensity(BSTT_ITALIC); - if (math_digit_density > math_density_high) { - return true; - } - if (math_digit_density + italic_density > kMathItalicDensityTh && - math_digit_density > math_density_low) { - return true; - } - - return false; -} - -EquationDetect::IndentType EquationDetect::IsIndented(ColPartition* part) { - ASSERT_HOST(part); - - ColPartitionGridSearch search(part_grid_); - ColPartition *neighbor = nullptr; - const TBOX& part_box(part->bounding_box()); - const int kXGapTh = static_cast(roundf(0.5 * resolution_)); - const int kRadiusTh = static_cast(roundf(3.0 * resolution_)); - const int kYGapTh = static_cast(roundf(0.5 * resolution_)); - - // Here we use a simple approximation algorithm: from the center of part, We - // perform the radius search, and check if we can find a neighboring partition - // that locates on the top/bottom left of part. - search.StartRadSearch((part_box.left() + part_box.right()) / 2, - (part_box.top() + part_box.bottom()) / 2, kRadiusTh); - search.SetUniqueMode(true); - bool left_indented = false, right_indented = false; - while ((neighbor = search.NextRadSearch()) != nullptr && - (!left_indented || !right_indented)) { - if (neighbor == part) { - continue; - } - const TBOX& neighbor_box(neighbor->bounding_box()); - - if (part_box.major_y_overlap(neighbor_box) && - part_box.x_gap(neighbor_box) < kXGapTh) { - // When this happens, it is likely part is a fragment of an - // over-segmented colpartition. So we return false. - return NO_INDENT; - } - - if (!IsTextOrEquationType(neighbor->type())) { - continue; - } - - // The neighbor should be above/below part, and overlap in x direction. - if (!part_box.x_overlap(neighbor_box) || part_box.y_overlap(neighbor_box)) { - continue; - } - - if (part_box.y_gap(neighbor_box) < kYGapTh) { - const int left_gap = part_box.left() - neighbor_box.left(); - const int right_gap = neighbor_box.right() - part_box.right(); - if (left_gap > kXGapTh) { - left_indented = true; - } - if (right_gap > kXGapTh) { - right_indented = true; - } - } - } - - if (left_indented && right_indented) { - return BOTH_INDENT; - } - if (left_indented) { - return LEFT_INDENT; - } - if (right_indented) { - return RIGHT_INDENT; - } - return NO_INDENT; -} - -bool EquationDetect::ExpandSeed(ColPartition* seed) { - if (seed == nullptr || // This seed has been absorbed by other seeds. - seed->IsVerticalType()) { // We skip vertical type right now. - return false; - } - - // Expand in four directions. - GenericVector parts_to_merge; - ExpandSeedHorizontal(true, seed, &parts_to_merge); - ExpandSeedHorizontal(false, seed, &parts_to_merge); - ExpandSeedVertical(true, seed, &parts_to_merge); - ExpandSeedVertical(false, seed, &parts_to_merge); - SearchByOverlap(seed, &parts_to_merge); - - if (parts_to_merge.empty()) { // We don't find any partition to merge. - return false; - } - - // Merge all partitions in parts_to_merge with seed. We first remove seed - // from part_grid_ as its bounding box is going to expand. Then we add it - // back after it aborbs all parts_to_merge parititions. - part_grid_->RemoveBBox(seed); - for (int i = 0; i < parts_to_merge.size(); ++i) { - ColPartition* part = parts_to_merge[i]; - if (part->type() == PT_EQUATION) { - // If part is in cp_seeds_, then we mark it as nullptr so that we won't - // process it again. - for (int j = 0; j < cp_seeds_.size(); ++j) { - if (part == cp_seeds_[j]) { - cp_seeds_[j] = nullptr; - break; - } - } - } - - // part has already been removed from part_grid_ in function - // ExpandSeedHorizontal/ExpandSeedVertical. - seed->Absorb(part, nullptr); - } - - return true; -} - -void EquationDetect::ExpandSeedHorizontal( - const bool search_left, - ColPartition* seed, - GenericVector* parts_to_merge) { - ASSERT_HOST(seed != nullptr && parts_to_merge != nullptr); - const float kYOverlapTh = 0.6; - const int kXGapTh = static_cast(roundf(0.2 * resolution_)); - - ColPartitionGridSearch search(part_grid_); - const TBOX& seed_box(seed->bounding_box()); - const int x = search_left ? seed_box.left() : seed_box.right(); - search.StartSideSearch(x, seed_box.bottom(), seed_box.top()); - search.SetUniqueMode(true); - - // Search iteratively. - ColPartition *part = nullptr; - while ((part = search.NextSideSearch(search_left)) != nullptr) { - if (part == seed) { - continue; - } - const TBOX& part_box(part->bounding_box()); - if (part_box.x_gap(seed_box) > kXGapTh) { // Out of scope. - break; - } - - // Check part location. - if ((part_box.left() >= seed_box.left() && search_left) || - (part_box.right() <= seed_box.right() && !search_left)) { - continue; - } - - if (part->type() != PT_EQUATION) { // Non-equation type. - // Skip PT_LINLINE_EQUATION and non text type. - if (part->type() == PT_INLINE_EQUATION || - (!IsTextOrEquationType(part->type()) && - part->blob_type() != BRT_HLINE)) { - continue; - } - // For other types, it should be the near small neighbor of seed. - if (!IsNearSmallNeighbor(seed_box, part_box) || - !CheckSeedNeighborDensity(part)) { - continue; - } - } else { // Equation type, check the y overlap. - if (part_box.y_overlap_fraction(seed_box) < kYOverlapTh && - seed_box.y_overlap_fraction(part_box) < kYOverlapTh) { - continue; - } - } - - // Passed the check, delete it from search and add into parts_to_merge. - search.RemoveBBox(); - parts_to_merge->push_back(part); - } -} - -void EquationDetect::ExpandSeedVertical( - const bool search_bottom, - ColPartition* seed, - GenericVector* parts_to_merge) { - ASSERT_HOST(seed != nullptr && parts_to_merge != nullptr && - cps_super_bbox_ != nullptr); - const float kXOverlapTh = 0.4; - const int kYGapTh = static_cast(roundf(0.2 * resolution_)); - - ColPartitionGridSearch search(part_grid_); - const TBOX& seed_box(seed->bounding_box()); - const int y = search_bottom ? seed_box.bottom() : seed_box.top(); - search.StartVerticalSearch( - cps_super_bbox_->left(), cps_super_bbox_->right(), y); - search.SetUniqueMode(true); - - // Search iteratively. - ColPartition *part = nullptr; - GenericVector parts; - int skipped_min_top = std::numeric_limits::max(), skipped_max_bottom = -1; - while ((part = search.NextVerticalSearch(search_bottom)) != nullptr) { - if (part == seed) { - continue; - } - const TBOX& part_box(part->bounding_box()); - - if (part_box.y_gap(seed_box) > kYGapTh) { // Out of scope. - break; - } - - // Check part location. - if ((part_box.bottom() >= seed_box.bottom() && search_bottom) || - (part_box.top() <= seed_box.top() && !search_bottom)) { - continue; - } - - bool skip_part = false; - if (part->type() != PT_EQUATION) { // Non-equation type. - // Skip PT_LINLINE_EQUATION and non text type. - if (part->type() == PT_INLINE_EQUATION || - (!IsTextOrEquationType(part->type()) && - part->blob_type() != BRT_HLINE)) { - skip_part = true; - } else if (!IsNearSmallNeighbor(seed_box, part_box) || - !CheckSeedNeighborDensity(part)) { - // For other types, it should be the near small neighbor of seed. - skip_part = true; - } - } else { // Equation type, check the x overlap. - if (part_box.x_overlap_fraction(seed_box) < kXOverlapTh && - seed_box.x_overlap_fraction(part_box) < kXOverlapTh) { - skip_part = true; - } - } - if (skip_part) { - if (part->type() != PT_EQUATION) { - if (skipped_min_top > part_box.top()) { - skipped_min_top = part_box.top(); - } - if (skipped_max_bottom < part_box.bottom()) { - skipped_max_bottom = part_box.bottom(); - } - } - } else { - parts.push_back(part); - } - } - - // For every part in parts, we need verify it is not above skipped_min_top - // when search top, or not below skipped_max_bottom when search bottom. I.e., - // we will skip a part if it looks like: - // search bottom | search top - // seed: ****************** | part: ********** - // skipped: xxx | skipped: xxx - // part: ********** | seed: *********** - for (int i = 0; i < parts.size(); i++) { - const TBOX& part_box(parts[i]->bounding_box()); - if ((search_bottom && part_box.top() <= skipped_max_bottom) || - (!search_bottom && part_box.bottom() >= skipped_min_top)) { - continue; - } - // Add parts[i] into parts_to_merge, and delete it from part_grid_. - parts_to_merge->push_back(parts[i]); - part_grid_->RemoveBBox(parts[i]); - } -} - -bool EquationDetect::IsNearSmallNeighbor(const TBOX& seed_box, - const TBOX& part_box) const { - const int kXGapTh = static_cast(roundf(0.25 * resolution_)); - const int kYGapTh = static_cast(roundf(0.05 * resolution_)); - - // Check geometric feature. - if (part_box.height() > seed_box.height() || - part_box.width() > seed_box.width()) { - return false; - } - - // Check overlap and distance. - if ((!part_box.major_x_overlap(seed_box) || - part_box.y_gap(seed_box) > kYGapTh) && - (!part_box.major_y_overlap(seed_box) || - part_box.x_gap(seed_box) > kXGapTh)) { - return false; - } - - return true; -} - -bool EquationDetect::CheckSeedNeighborDensity(const ColPartition* part) const { - ASSERT_HOST(part); - if (part->boxes_count() < kSeedBlobsCountTh) { - // Too few blobs, skip the check. - return true; - } - - // We check the math blobs density and the unclear blobs density. - if (part->SpecialBlobsDensity(BSTT_MATH) + - part->SpecialBlobsDensity(BSTT_DIGIT) > kMathDigitDensityTh1 || - part->SpecialBlobsDensity(BSTT_UNCLEAR) > kUnclearDensityTh) { - return true; - } - - return false; -} - -void EquationDetect::ProcessMathBlockSatelliteParts() { - // Iterate over part_grid_, and find all parts that are text type but not - // equation type. - ColPartition *part = nullptr; - GenericVector text_parts; - ColPartitionGridSearch gsearch(part_grid_); - gsearch.StartFullSearch(); - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->type() == PT_FLOWING_TEXT || part->type() == PT_HEADING_TEXT) { - text_parts.push_back(part); - } - } - if (text_parts.empty()) { - return; - } - - // Compute the medium height of the text_parts. - text_parts.sort(&SortCPByHeight); - const TBOX& text_box = text_parts[text_parts.size() / 2]->bounding_box(); - int med_height = text_box.height(); - if (text_parts.size() % 2 == 0 && text_parts.size() > 1) { - const TBOX& text_box = - text_parts[text_parts.size() / 2 - 1]->bounding_box(); - med_height = static_cast(roundf( - 0.5 * (text_box.height() + med_height))); - } - - // Iterate every text_parts and check if it is a math block satellite. - for (int i = 0; i < text_parts.size(); ++i) { - const TBOX& text_box(text_parts[i]->bounding_box()); - if (text_box.height() > med_height) { - continue; - } - GenericVector math_blocks; - if (!IsMathBlockSatellite(text_parts[i], &math_blocks)) { - continue; - } - - // Found. merge text_parts[i] with math_blocks. - part_grid_->RemoveBBox(text_parts[i]); - text_parts[i]->set_type(PT_EQUATION); - for (int j = 0; j < math_blocks.size(); ++j) { - part_grid_->RemoveBBox(math_blocks[j]); - text_parts[i]->Absorb(math_blocks[j], nullptr); - } - InsertPartAfterAbsorb(text_parts[i]); - } -} - -bool EquationDetect::IsMathBlockSatellite( - ColPartition* part, GenericVector* math_blocks) { - ASSERT_HOST(part != nullptr && math_blocks != nullptr); - math_blocks->clear(); - const TBOX& part_box(part->bounding_box()); - // Find the top/bottom nearest neighbor of part. - ColPartition *neighbors[2]; - int y_gaps[2] = {std::numeric_limits::max(), std::numeric_limits::max()}; - // The horizontal boundary of the neighbors. - int neighbors_left = std::numeric_limits::max(), neighbors_right = 0; - for (int i = 0; i < 2; ++i) { - neighbors[i] = SearchNNVertical(i != 0, part); - if (neighbors[i]) { - const TBOX& neighbor_box = neighbors[i]->bounding_box(); - y_gaps[i] = neighbor_box.y_gap(part_box); - if (neighbor_box.left() < neighbors_left) { - neighbors_left = neighbor_box.left(); - } - if (neighbor_box.right() > neighbors_right) { - neighbors_right = neighbor_box.right(); - } - } - } - if (neighbors[0] == neighbors[1]) { - // This happens when part is inside neighbor. - neighbors[1] = nullptr; - y_gaps[1] = std::numeric_limits::max(); - } - - // Check if part is within [neighbors_left, neighbors_right]. - if (part_box.left() < neighbors_left || part_box.right() > neighbors_right) { - return false; - } - - // Get the index of the near one in neighbors. - int index = y_gaps[0] < y_gaps[1] ? 0 : 1; - - // Check the near one. - if (IsNearMathNeighbor(y_gaps[index], neighbors[index])) { - math_blocks->push_back(neighbors[index]); - } else { - // If the near one failed the check, then we skip checking the far one. - return false; - } - - // Check the far one. - index = 1 - index; - if (IsNearMathNeighbor(y_gaps[index], neighbors[index])) { - math_blocks->push_back(neighbors[index]); - } - - return true; -} - -ColPartition* EquationDetect::SearchNNVertical( - const bool search_bottom, const ColPartition* part) { - ASSERT_HOST(part); - ColPartition *nearest_neighbor = nullptr, *neighbor = nullptr; - const int kYGapTh = static_cast(roundf(resolution_ * 0.5)); - - ColPartitionGridSearch search(part_grid_); - search.SetUniqueMode(true); - const TBOX& part_box(part->bounding_box()); - int y = search_bottom ? part_box.bottom() : part_box.top(); - search.StartVerticalSearch(part_box.left(), part_box.right(), y); - int min_y_gap = std::numeric_limits::max(); - while ((neighbor = search.NextVerticalSearch(search_bottom)) != nullptr) { - if (neighbor == part || !IsTextOrEquationType(neighbor->type())) { - continue; - } - const TBOX& neighbor_box(neighbor->bounding_box()); - int y_gap = neighbor_box.y_gap(part_box); - if (y_gap > kYGapTh) { // Out of scope. - break; - } - if (!neighbor_box.major_x_overlap(part_box) || - (search_bottom && neighbor_box.bottom() > part_box.bottom()) || - (!search_bottom && neighbor_box.top() < part_box.top())) { - continue; - } - if (y_gap < min_y_gap) { - min_y_gap = y_gap; - nearest_neighbor = neighbor; - } - } - - return nearest_neighbor; -} - -bool EquationDetect::IsNearMathNeighbor( - const int y_gap, const ColPartition *neighbor) const { - if (!neighbor) { - return false; - } - const int kYGapTh = static_cast(roundf(resolution_ * 0.1)); - return neighbor->type() == PT_EQUATION && y_gap <= kYGapTh; -} - -void EquationDetect::GetOutputTiffName(const char* name, - STRING* image_name) const { - ASSERT_HOST(image_name && name); - char page[50]; - snprintf(page, sizeof(page), "%04d", page_count_); - *image_name = STRING(lang_tesseract_->imagebasename) + page + name + ".tif"; -} - -void EquationDetect::PaintSpecialTexts(const STRING& outfile) const { - Pix *pix = nullptr, *pixBi = lang_tesseract_->pix_binary(); - pix = pixConvertTo32(pixBi); - ColPartitionGridSearch gsearch(part_grid_); - ColPartition* part = nullptr; - gsearch.StartFullSearch(); - while ((part = gsearch.NextFullSearch()) != nullptr) { - BLOBNBOX_C_IT blob_it(part->boxes()); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - RenderSpecialText(pix, blob_it.data()); - } - } - - pixWrite(outfile.string(), pix, IFF_TIFF_LZW); - pixDestroy(&pix); -} - -void EquationDetect::PaintColParts(const STRING& outfile) const { - Pix *pix = pixConvertTo32(lang_tesseract_->BestPix()); - ColPartitionGridSearch gsearch(part_grid_); - gsearch.StartFullSearch(); - ColPartition* part = nullptr; - while ((part = gsearch.NextFullSearch()) != nullptr) { - const TBOX& tbox = part->bounding_box(); - Box *box = boxCreate(tbox.left(), pixGetHeight(pix) - tbox.top(), - tbox.width(), tbox.height()); - if (part->type() == PT_EQUATION) { - pixRenderBoxArb(pix, box, 5, 255, 0, 0); - } else if (part->type() == PT_INLINE_EQUATION) { - pixRenderBoxArb(pix, box, 5, 0, 255, 0); - } else { - pixRenderBoxArb(pix, box, 5, 0, 0, 255); - } - boxDestroy(&box); - } - - pixWrite(outfile.string(), pix, IFF_TIFF_LZW); - pixDestroy(&pix); -} - -void EquationDetect::PrintSpecialBlobsDensity(const ColPartition* part) const { - ASSERT_HOST(part); - TBOX box(part->bounding_box()); - int h = pixGetHeight(lang_tesseract_->BestPix()); - tprintf("Printing special blobs density values for ColParition (t=%d,b=%d) ", - h - box.top(), h - box.bottom()); - box.print(); - tprintf("blobs count = %d, density = ", part->boxes_count()); - for (int i = 0; i < BSTT_COUNT; ++i) { - BlobSpecialTextType type = static_cast(i); - tprintf("%d:%f ", i, part->SpecialBlobsDensity(type)); - } - tprintf("\n"); -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/equationdetect.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/equationdetect.h deleted file mode 100644 index 528f255b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/equationdetect.h +++ /dev/null @@ -1,274 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: equationdetect.h -// Description: The equation detection class that inherits equationdetectbase. -// Author: Zongyi (Joe) Liu (joeliu@google.com) -// Created: Fri Aug 31 11:13:01 PST 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCMAIN_EQUATIONDETECT_H_ -#define TESSERACT_CCMAIN_EQUATIONDETECT_H_ - -#include "blobbox.h" // for BLOBNBOX (ptr only), BlobSpecialText... -#include "equationdetectbase.h" // for EquationDetectBase -#include "genericvector.h" // for GenericVector -#include "tesseractclass.h" // for Tesseract -#include "unichar.h" // for UNICHAR_ID - -class TBOX; -class UNICHARSET; - -namespace tesseract { - -class Tesseract; -class ColPartition; -class ColPartitionGrid; -class ColPartitionSet; - -class EquationDetect : public EquationDetectBase { - public: - EquationDetect(const char* equ_datapath, - const char* equ_language); - ~EquationDetect(); - - enum IndentType { - NO_INDENT, - LEFT_INDENT, - RIGHT_INDENT, - BOTH_INDENT, - INDENT_TYPE_COUNT - }; - - // Reset the lang_tesseract_ pointer. This function should be called before we - // do any detector work. - void SetLangTesseract(Tesseract* lang_tesseract); - - // Iterate over the blobs inside to_block, and set the blobs that we want to - // process to BSTT_NONE. (By default, they should be BSTT_SKIP). The function - // returns 0 upon success. - int LabelSpecialText(TO_BLOCK* to_block); - - // Find possible equation partitions from part_grid. Should be called - // after the special_text_type of blobs are set. - // It returns 0 upon success. - int FindEquationParts(ColPartitionGrid* part_grid, - ColPartitionSet** best_columns); - - // Reset the resolution of the processing image. TEST only function. - void SetResolution(const int resolution); - - protected: - // Identify the special text type for one blob, and update its field. When - // height_th is set (> 0), we will label the blob as BSTT_NONE if its height - // is less than height_th. - void IdentifySpecialText(BLOBNBOX *blob, const int height_th); - - // Estimate the type for one unichar. - BlobSpecialTextType EstimateTypeForUnichar( - const UNICHARSET& unicharset, const UNICHAR_ID id) const; - - // Compute special text type for each blobs in part_grid_. - void IdentifySpecialText(); - - // Identify blobs that we want to skip during special blob type - // classification. - void IdentifyBlobsToSkip(ColPartition* part); - - // The ColPartitions in part_grid_ maybe over-segmented, particularly in the - // block equation regions. So we like to identify these partitions and merge - // them before we do the searching. - void MergePartsByLocation(); - - // Staring from the seed center, we do radius search. And for partitions that - // have large overlaps with seed, we remove them from part_grid_ and add into - // parts_overlap. Note: this function may update the part_grid_, so if the - // caller is also running ColPartitionGridSearch, use the RepositionIterator - // to continue. - void SearchByOverlap(ColPartition* seed, - GenericVector* parts_overlap); - - // Insert part back into part_grid_, after it absorbs some other parts. - void InsertPartAfterAbsorb(ColPartition* part); - - // Identify the colparitions in part_grid_, label them as PT_EQUATION, and - // save them into cp_seeds_. - void IdentifySeedParts(); - - // Check the blobs count for a seed region candidate. - bool CheckSeedBlobsCount(ColPartition* part); - - // Compute the foreground pixel density for a tbox area. - float ComputeForegroundDensity(const TBOX& tbox); - - // Check if part from seed2 label: with low math density and left indented. We - // are using two checks: - // 1. If its left is aligned with any coordinates in indented_texts_left, - // which we assume have been sorted. - // 2. If its foreground density is over foreground_density_th. - bool CheckForSeed2( - const GenericVector& indented_texts_left, - const float foreground_density_th, - ColPartition* part); - - // Count the number of values in sorted_vec that is close to val, used to - // check if a partition is aligned with text partitions. - int CountAlignment( - const GenericVector& sorted_vec, const int val) const; - - // Check for a seed candidate using the foreground pixel density. And we - // return true if the density is below a certain threshold, because characters - // in equation regions usually are apart with more white spaces. - bool CheckSeedFgDensity(const float density_th, ColPartition* part); - - // A light version of SplitCPHor: instead of really doing the part split, we - // simply compute the union bounding box of each split part. - void SplitCPHorLite(ColPartition* part, GenericVector* splitted_boxes); - - // Split the part (horizontally), and save the split result into - // parts_splitted. Note that it is caller's responsibility to release the - // memory owns by parts_splitted. On the other hand, the part is unchanged - // during this process and still owns the blobs, so do NOT call DeleteBoxes - // when freeing the colpartitions in parts_splitted. - void SplitCPHor(ColPartition* part, - GenericVector* parts_splitted); - - // Check the density for a seed candidate (part) using its math density and - // italic density, returns true if the check passed. - bool CheckSeedDensity(const float math_density_high, - const float math_density_low, - const ColPartition* part) const; - - // Check if part is indented. - IndentType IsIndented(ColPartition* part); - - // Identify inline partitions from cp_seeds_, and re-label them. - void IdentifyInlineParts(); - - // Compute the super bounding box for all colpartitions inside part_grid_. - void ComputeCPsSuperBBox(); - - // Identify inline partitions from cp_seeds_ using the horizontal search. - void IdentifyInlinePartsHorizontal(); - - // Estimate the line spacing between two text partitions. Returns -1 if not - // enough data. - int EstimateTextPartLineSpacing(); - - // Identify inline partitions from cp_seeds_ using vertical search. - void IdentifyInlinePartsVertical(const bool top_to_bottom, - const int textPartsLineSpacing); - - // Check if part is an inline equation zone. This should be called after we - // identified the seed regions. - bool IsInline(const bool search_bottom, - const int textPartsLineSpacing, - ColPartition* part); - - // For a given seed partition, we search the part_grid_ and see if there is - // any partition can be merged with it. It returns true if the seed has been - // expanded. - bool ExpandSeed(ColPartition* seed); - - // Starting from the seed position, we search the part_grid_ - // horizontally/vertically, find all parititions that can be - // merged with seed, remove them from part_grid_, and put them into - // parts_to_merge. - void ExpandSeedHorizontal(const bool search_left, - ColPartition* seed, - GenericVector* parts_to_merge); - void ExpandSeedVertical(const bool search_bottom, - ColPartition* seed, - GenericVector* parts_to_merge); - - // Check if a part_box is the small neighbor of seed_box. - bool IsNearSmallNeighbor(const TBOX& seed_box, - const TBOX& part_box) const; - - // Perform the density check for part, which we assume is nearing a seed - // partition. It returns true if the check passed. - bool CheckSeedNeighborDensity(const ColPartition* part) const; - - // After identify the math blocks, we do one more scanning on all text - // partitions, and check if any of them is the satellite of: - // math blocks: here a p is the satellite of q if: - // 1. q is the nearest vertical neighbor of p, and - // 2. y_gap(p, q) is less than a threshold, and - // 3. x_overlap(p, q) is over a threshold. - // Note that p can be the satellites of two blocks: its top neighbor and - // bottom neighbor. - void ProcessMathBlockSatelliteParts(); - - // Check if part is the satellite of one/two math blocks. If it is, we return - // true, and save the blocks into math_blocks. - bool IsMathBlockSatellite( - ColPartition* part, GenericVector* math_blocks); - - // Search the nearest neighbor of part in one vertical direction as defined in - // search_bottom. It returns the neighbor found that major x overlap with it, - // or nullptr when not found. - ColPartition* SearchNNVertical(const bool search_bottom, - const ColPartition* part); - - // Check if the neighbor with vertical distance of y_gap is a near and math - // block partition. - bool IsNearMathNeighbor(const int y_gap, const ColPartition *neighbor) const; - - // Generate the tiff file name for output/debug file. - void GetOutputTiffName(const char* name, STRING* image_name) const; - - // Debugger function that renders ColPartitions on the input image, where: - // parts labeled as PT_EQUATION will be painted in red, PT_INLINE_EQUATION - // will be painted in green, and other parts will be painted in blue. - void PaintColParts(const STRING& outfile) const; - - // Debugger function that renders the blobs in part_grid_ over the input - // image. - void PaintSpecialTexts(const STRING& outfile) const; - - // Debugger function that print the math blobs density values for a - // ColPartition object. - void PrintSpecialBlobsDensity(const ColPartition* part) const; - - // The tesseract engine initialized from equation training data. - Tesseract equ_tesseract_; - - // The tesseract engine used for OCR. This pointer is passed in by the caller, - // so do NOT destroy it in this class. - Tesseract* lang_tesseract_; - - // The ColPartitionGrid that we are processing. This pointer is passed in from - // the caller, so do NOT destroy it in the class. - ColPartitionGrid* part_grid_; - - // A simple array of pointers to the best assigned column division at - // each grid y coordinate. This pointer is passed in from the caller, so do - // NOT destroy it in the class. - ColPartitionSet** best_columns_; - - // The super bounding box of all cps in the part_grid_. - TBOX* cps_super_bbox_; - - // The seed ColPartition for equation region. - GenericVector cp_seeds_; - - // The resolution (dpi) of the processing image. - int resolution_; - - // The number of pages we have processed. - int page_count_; -}; - -} // namespace tesseract - -#endif // TESSERACT_CCMAIN_EQUATIONDETECT_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/fixspace.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/fixspace.cpp deleted file mode 100644 index 171efab2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/fixspace.cpp +++ /dev/null @@ -1,895 +0,0 @@ -/****************************************************************** - * File: fixspace.cpp (Formerly fixspace.c) - * Description: Implements a pass over the page res, exploring the alternative - * spacing possibilities, trying to use context to improve the - * word spacing - * Author: Phil Cheatle - * Created: Thu Oct 21 11:38:43 BST 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "fixspace.h" -#include // for INT16_MAX, int16_t, int32_t -#include "blobs.h" // for TWERD, TBLOB, TESSLINE -#include "boxword.h" // for BoxWord -#include "errcode.h" // for ASSERT_HOST -#include "host.h" // for FALSE, TRUE -#include "normalis.h" // for kBlnXHeight, kBlnBaselineOffset -#include "ocrclass.h" // for ETEXT_DESC -#include "pageres.h" // for WERD_RES_IT, WERD_RES, WERD_RES_LIST -#include "params.h" // for IntParam, StringParam, BoolParam, Doub... -#include "ratngs.h" // for WERD_CHOICE, FREQ_DAWG_PERM, NUMBER_PERM -#include "rect.h" // for TBOX -#include "stepblob.h" // for C_BLOB_IT, C_BLOB_LIST, C_BLOB -#include "strngs.h" // for STRING -#include "tesseractclass.h" // for Tesseract, TesseractStats, WordData -#include "tessvars.h" // for debug_fp -#include "tprintf.h" // for tprintf -#include "unichar.h" // for UNICHAR_ID -#include "unicharset.h" // for UNICHARSET -#include "werd.h" // for WERD, W_EOL, W_FUZZY_NON, W_FUZZY_SP - -class BLOCK; -class ROW; - -#define PERFECT_WERDS 999 -#define MAXSPACING 128 /*max expected spacing in pix */ - -namespace tesseract { - -/********************************************************************** - * c_blob_comparator() - * - * Blob comparator used to sort a blob list so that blobs are in increasing - * order of left edge. - **********************************************************************/ - -static int c_blob_comparator( // sort blobs - const void *blob1p, // ptr to ptr to blob1 - const void *blob2p // ptr to ptr to blob2 - ) { - const C_BLOB *blob1 = *reinterpret_cast(blob1p); - const C_BLOB *blob2 = *reinterpret_cast(blob2p); - - return blob1->bounding_box ().left () - blob2->bounding_box ().left (); -} - -/** - * @name fix_fuzzy_spaces() - * Walk over the page finding sequences of words joined by fuzzy spaces. Extract - * them as a sublist, process the sublist to find the optimal arrangement of - * spaces then replace the sublist in the ROW_RES. - * - * @param monitor progress monitor - * @param word_count count of words in doc - * @param[out] page_res - */ -void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, - int32_t word_count, - PAGE_RES *page_res) { - BLOCK_RES_IT block_res_it; - ROW_RES_IT row_res_it; - WERD_RES_IT word_res_it_from; - WERD_RES_IT word_res_it_to; - WERD_RES *word_res; - WERD_RES_LIST fuzzy_space_words; - int16_t new_length; - bool prevent_null_wd_fixsp; // DON'T process blobless wds - int32_t word_index; // current word - - block_res_it.set_to_list(&page_res->block_res_list); - word_index = 0; - for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list(); - block_res_it.forward()) { - row_res_it.set_to_list(&block_res_it.data()->row_res_list); - for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list(); - row_res_it.forward()) { - word_res_it_from.set_to_list(&row_res_it.data()->word_res_list); - while (!word_res_it_from.at_last()) { - word_res = word_res_it_from.data(); - while (!word_res_it_from.at_last() && - !(word_res->combination || - word_res_it_from.data_relative(1)->word->flag(W_FUZZY_NON) || - word_res_it_from.data_relative(1)->word->flag(W_FUZZY_SP))) { - fix_sp_fp_word(word_res_it_from, row_res_it.data()->row, - block_res_it.data()->block); - word_res = word_res_it_from.forward(); - word_index++; - if (monitor != nullptr) { - monitor->ocr_alive = TRUE; - monitor->progress = 90 + 5 * word_index / word_count; - if (monitor->deadline_exceeded() || - (monitor->cancel != nullptr && - (*monitor->cancel)(monitor->cancel_this, stats_.dict_words))) - return; - } - } - - if (!word_res_it_from.at_last()) { - word_res_it_to = word_res_it_from; - prevent_null_wd_fixsp = - word_res->word->cblob_list()->empty(); - if (check_debug_pt(word_res, 60)) - debug_fix_space_level.set_value(10); - word_res_it_to.forward(); - word_index++; - if (monitor != nullptr) { - monitor->ocr_alive = TRUE; - monitor->progress = 90 + 5 * word_index / word_count; - if (monitor->deadline_exceeded() || - (monitor->cancel != nullptr && - (*monitor->cancel)(monitor->cancel_this, stats_.dict_words))) - return; - } - while (!word_res_it_to.at_last () && - (word_res_it_to.data_relative(1)->word->flag(W_FUZZY_NON) || - word_res_it_to.data_relative(1)->word->flag(W_FUZZY_SP))) { - if (check_debug_pt(word_res, 60)) - debug_fix_space_level.set_value(10); - if (word_res->word->cblob_list()->empty()) - prevent_null_wd_fixsp = true; - word_res = word_res_it_to.forward(); - } - if (check_debug_pt(word_res, 60)) - debug_fix_space_level.set_value(10); - if (word_res->word->cblob_list()->empty()) - prevent_null_wd_fixsp = true; - if (prevent_null_wd_fixsp) { - word_res_it_from = word_res_it_to; - } else { - fuzzy_space_words.assign_to_sublist(&word_res_it_from, - &word_res_it_to); - fix_fuzzy_space_list(fuzzy_space_words, - row_res_it.data()->row, - block_res_it.data()->block); - new_length = fuzzy_space_words.length(); - word_res_it_from.add_list_before(&fuzzy_space_words); - for (; - !word_res_it_from.at_last() && new_length > 0; - new_length--) { - word_res_it_from.forward(); - } - } - if (test_pt) - debug_fix_space_level.set_value(0); - } - fix_sp_fp_word(word_res_it_from, row_res_it.data()->row, - block_res_it.data()->block); - // Last word in row - } - } - } -} - -void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, - ROW *row, - BLOCK* block) { - int16_t best_score; - WERD_RES_LIST current_perm; - int16_t current_score; - bool improved = false; - - best_score = eval_word_spacing(best_perm); // default score - dump_words(best_perm, best_score, 1, improved); - - if (best_score != PERFECT_WERDS) - initialise_search(best_perm, current_perm); - - while ((best_score != PERFECT_WERDS) && !current_perm.empty()) { - match_current_words(current_perm, row, block); - current_score = eval_word_spacing(current_perm); - dump_words(current_perm, current_score, 2, improved); - if (current_score > best_score) { - best_perm.clear(); - best_perm.deep_copy(¤t_perm, &WERD_RES::deep_copy); - best_score = current_score; - improved = true; - } - if (current_score < PERFECT_WERDS) - transform_to_next_perm(current_perm); - } - dump_words(best_perm, best_score, 3, improved); -} - -} // namespace tesseract - -void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) { - WERD_RES_IT src_it(&src_list); - WERD_RES_IT new_it(&new_list); - WERD_RES *src_wd; - WERD_RES *new_wd; - - for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { - src_wd = src_it.data(); - if (!src_wd->combination) { - new_wd = WERD_RES::deep_copy(src_wd); - new_wd->combination = false; - new_wd->part_of_combo = false; - new_it.add_after_then_move(new_wd); - } - } -} - - -namespace tesseract { -void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row, - BLOCK* block) { - WERD_RES_IT word_it(&words); - WERD_RES *word; - // Since we are not using PAGE_RES to iterate over words, we need to update - // prev_word_best_choice_ before calling classify_word_pass2(). - prev_word_best_choice_ = nullptr; - for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { - word = word_it.data(); - if ((!word->part_of_combo) && (word->box_word == nullptr)) { - WordData word_data(block, row, word); - SetupWordPassN(2, &word_data); - classify_word_and_language(2, nullptr, &word_data); - } - prev_word_best_choice_ = word->best_choice; - } -} - -/** - * @name eval_word_spacing() - * The basic measure is the number of characters in contextually confirmed - * words. (I.e the word is done) - * If all words are contextually confirmed the evaluation is deemed perfect. - * - * Some fiddles are done to handle "1"s as these are VERY frequent causes of - * fuzzy spaces. The problem with the basic measure is that "561 63" would score - * the same as "56163", though given our knowledge that the space is fuzzy, and - * that there is a "1" next to the fuzzy space, we need to ensure that "56163" - * is preferred. - * - * The solution is to NOT COUNT the score of any word which has a digit at one - * end and a "1Il" as the character the other side of the space. - * - * Conversely, any character next to a "1" within a word is counted as a positive - * score. Thus "561 63" would score 4 (3 chars in a numeric word plus 1 side of - * the "1" joined). "56163" would score 7 - all chars in a numeric word + 2 - * sides of a "1" joined. - * - * The joined 1 rule is applied to any word REGARDLESS of contextual - * confirmation. Thus "PS7a71 3/7a" scores 1 (neither word is contexutally - * confirmed. The only score is from the joined 1. "PS7a713/7a" scores 2. - * - */ -int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { - WERD_RES_IT word_res_it(&word_res_list); - int16_t total_score = 0; - int16_t word_count = 0; - int16_t done_word_count = 0; - int16_t word_len; - int16_t i; - int16_t offset; - WERD_RES *word; // current word - int16_t prev_word_score = 0; - bool prev_word_done = false; - bool prev_char_1 = false; // prev ch a "1/I/l"? - bool prev_char_digit = false; // prev ch 2..9 or 0 - bool current_char_1 = false; - bool current_word_ok_so_far; - STRING punct_chars = "!\"`',.:;"; - bool prev_char_punct = false; - bool current_char_punct = false; - bool word_done = false; - - do { - word = word_res_it.data(); - word_done = fixspace_thinks_word_done(word); - word_count++; - if (word->tess_failed) { - total_score += prev_word_score; - if (prev_word_done) - done_word_count++; - prev_word_score = 0; - prev_char_1 = false; - prev_char_digit = false; - prev_word_done = false; - } else { - /* - Can we add the prev word score and potentially count this word? - Yes IF it didn't end in a 1 when the first char of this word is a digit - AND it didn't end in a digit when the first char of this word is a 1 - */ - word_len = word->reject_map.length(); - current_word_ok_so_far = false; - if (!((prev_char_1 && digit_or_numeric_punct(word, 0)) || - (prev_char_digit && ( - (word_done && - word->best_choice->unichar_lengths().string()[0] == 1 && - word->best_choice->unichar_string()[0] == '1') || - (!word_done && STRING(conflict_set_I_l_1).contains( - word->best_choice->unichar_string()[0])))))) { - total_score += prev_word_score; - if (prev_word_done) - done_word_count++; - current_word_ok_so_far = word_done; - } - - if (current_word_ok_so_far) { - prev_word_done = true; - prev_word_score = word_len; - } else { - prev_word_done = false; - prev_word_score = 0; - } - - /* Add 1 to total score for every joined 1 regardless of context and - rejtn */ - for (i = 0, prev_char_1 = false; i < word_len; i++) { - current_char_1 = word->best_choice->unichar_string()[i] == '1'; - if (prev_char_1 || (current_char_1 && (i > 0))) - total_score++; - prev_char_1 = current_char_1; - } - - /* Add 1 to total score for every joined punctuation regardless of context - and rejtn */ - if (tessedit_prefer_joined_punct) { - for (i = 0, offset = 0, prev_char_punct = false; i < word_len; - offset += word->best_choice->unichar_lengths()[i++]) { - current_char_punct = - punct_chars.contains(word->best_choice->unichar_string()[offset]); - if (prev_char_punct || (current_char_punct && i > 0)) - total_score++; - prev_char_punct = current_char_punct; - } - } - prev_char_digit = digit_or_numeric_punct(word, word_len - 1); - for (i = 0, offset = 0; i < word_len - 1; - offset += word->best_choice->unichar_lengths()[i++]); - prev_char_1 = - ((word_done && (word->best_choice->unichar_string()[offset] == '1')) - || (!word_done && STRING(conflict_set_I_l_1).contains( - word->best_choice->unichar_string()[offset]))); - } - /* Find next word */ - do { - word_res_it.forward(); - } while (word_res_it.data()->part_of_combo); - } while (!word_res_it.at_first()); - total_score += prev_word_score; - if (prev_word_done) - done_word_count++; - if (done_word_count == word_count) - return PERFECT_WERDS; - else - return total_score; -} - -bool Tesseract::digit_or_numeric_punct(WERD_RES *word, int char_position) { - int i; - int offset; - - for (i = 0, offset = 0; i < char_position; - offset += word->best_choice->unichar_lengths()[i++]); - return ( - word->uch_set->get_isdigit( - word->best_choice->unichar_string().string() + offset, - word->best_choice->unichar_lengths()[i]) || - (word->best_choice->permuter() == NUMBER_PERM && - STRING(numeric_punctuation).contains( - word->best_choice->unichar_string().string()[offset]))); -} - -} // namespace tesseract - - -/** - * @name transform_to_next_perm() - * Examines the current word list to find the smallest word gap size. Then walks - * the word list closing any gaps of this size by either inserted new - * combination words, or extending existing ones. - * - * The routine COULD be limited to stop it building words longer than N blobs. - * - * If there are no more gaps then it DELETES the entire list and returns the - * empty list to cause termination. - */ -void transform_to_next_perm(WERD_RES_LIST &words) { - WERD_RES_IT word_it(&words); - WERD_RES_IT prev_word_it(&words); - WERD_RES *word; - WERD_RES *prev_word; - WERD_RES *combo; - WERD *copy_word; - int16_t prev_right = -INT16_MAX; - TBOX box; - int16_t gap; - int16_t min_gap = INT16_MAX; - - for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { - word = word_it.data(); - if (!word->part_of_combo) { - box = word->word->bounding_box(); - if (prev_right > -INT16_MAX) { - gap = box.left() - prev_right; - if (gap < min_gap) - min_gap = gap; - } - prev_right = box.right(); - } - } - if (min_gap < INT16_MAX) { - prev_right = -INT16_MAX; // back to start - word_it.set_to_list(&words); - // Note: we can't use cycle_pt due to inserted combos at start of list. - for (; (prev_right == -INT16_MAX) || !word_it.at_first(); - word_it.forward()) { - word = word_it.data(); - if (!word->part_of_combo) { - box = word->word->bounding_box(); - if (prev_right > -INT16_MAX) { - gap = box.left() - prev_right; - if (gap <= min_gap) { - prev_word = prev_word_it.data(); - if (prev_word->combination) { - combo = prev_word; - } else { - /* Make a new combination and insert before - * the first word being joined. */ - copy_word = new WERD; - *copy_word = *(prev_word->word); - // deep copy - combo = new WERD_RES(copy_word); - combo->combination = TRUE; - combo->x_height = prev_word->x_height; - prev_word->part_of_combo = true; - prev_word_it.add_before_then_move(combo); - } - combo->word->set_flag(W_EOL, word->word->flag(W_EOL)); - if (word->combination) { - combo->word->join_on(word->word); - // Move blobs to combo - // old combo no longer needed - delete word_it.extract(); - } else { - // Copy current wd to combo - combo->copy_on(word); - word->part_of_combo = true; - } - combo->done = FALSE; - combo->ClearResults(); - } else { - prev_word_it = word_it; // catch up - } - } - prev_right = box.right(); - } - } - } else { - words.clear(); // signal termination - } -} - -namespace tesseract { -void Tesseract::dump_words(WERD_RES_LIST &perm, int16_t score, - int16_t mode, bool improved) { - WERD_RES_IT word_res_it(&perm); - - if (debug_fix_space_level > 0) { - if (mode == 1) { - stats_.dump_words_str = ""; - for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list(); - word_res_it.forward()) { - if (!word_res_it.data()->part_of_combo) { - stats_.dump_words_str += - word_res_it.data()->best_choice->unichar_string(); - stats_.dump_words_str += ' '; - } - } - } - - if (debug_fix_space_level > 1) { - switch (mode) { - case 1: - tprintf("EXTRACTED (%d): \"", score); - break; - case 2: - tprintf("TESTED (%d): \"", score); - break; - case 3: - tprintf("RETURNED (%d): \"", score); - break; - } - - for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list(); - word_res_it.forward()) { - if (!word_res_it.data()->part_of_combo) { - tprintf("%s/%1d ", - word_res_it.data()->best_choice->unichar_string().string(), - (int)word_res_it.data()->best_choice->permuter()); - } - } - tprintf("\"\n"); - } else if (improved) { - tprintf("FIX SPACING \"%s\" => \"", stats_.dump_words_str.string()); - for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list(); - word_res_it.forward()) { - if (!word_res_it.data()->part_of_combo) { - tprintf("%s/%1d ", - word_res_it.data()->best_choice->unichar_string().string(), - (int)word_res_it.data()->best_choice->permuter()); - } - } - tprintf("\"\n"); - } - } -} - -bool Tesseract::fixspace_thinks_word_done(WERD_RES *word) { - if (word->done) - return true; - - /* - Use all the standard pass 2 conditions for mode 5 in set_done() in - reject.c BUT DON'T REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DON'T - CARE WHETHER WE HAVE of/at on/an etc. - */ - if (fixsp_done_mode > 0 && - (word->tess_accepted || - (fixsp_done_mode == 2 && word->reject_map.reject_count() == 0) || - fixsp_done_mode == 3) && - (strchr(word->best_choice->unichar_string().string(), ' ') == nullptr) && - ((word->best_choice->permuter() == SYSTEM_DAWG_PERM) || - (word->best_choice->permuter() == FREQ_DAWG_PERM) || - (word->best_choice->permuter() == USER_DAWG_PERM) || - (word->best_choice->permuter() == NUMBER_PERM))) { - return true; - } else { - return false; - } -} - - -/** - * @name fix_sp_fp_word() - * Test the current word to see if it can be split by deleting noise blobs. If - * so, do the business. - * Return with the iterator pointing to the same place if the word is unchanged, - * or the last of the replacement words. - */ -void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, - BLOCK* block) { - WERD_RES *word_res; - WERD_RES_LIST sub_word_list; - WERD_RES_IT sub_word_list_it(&sub_word_list); - int16_t blob_index; - int16_t new_length; - float junk; - - word_res = word_res_it.data(); - if (word_res->word->flag(W_REP_CHAR) || - word_res->combination || - word_res->part_of_combo || - !word_res->word->flag(W_DONT_CHOP)) - return; - - blob_index = worst_noise_blob(word_res, &junk); - if (blob_index < 0) - return; - - if (debug_fix_space_level > 1) { - tprintf("FP fixspace working on \"%s\"\n", - word_res->best_choice->unichar_string().string()); - } - word_res->word->rej_cblob_list()->sort(c_blob_comparator); - sub_word_list_it.add_after_stay_put(word_res_it.extract()); - fix_noisy_space_list(sub_word_list, row, block); - new_length = sub_word_list.length(); - word_res_it.add_list_before(&sub_word_list); - for (; !word_res_it.at_last() && new_length > 1; new_length--) { - word_res_it.forward(); - } -} - -void Tesseract::fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, - BLOCK* block) { - int16_t best_score; - WERD_RES_IT best_perm_it(&best_perm); - WERD_RES_LIST current_perm; - WERD_RES_IT current_perm_it(¤t_perm); - WERD_RES *old_word_res; - int16_t current_score; - bool improved = false; - - best_score = fp_eval_word_spacing(best_perm); // default score - - dump_words(best_perm, best_score, 1, improved); - - old_word_res = best_perm_it.data(); - // Even deep_copy doesn't copy the underlying WERD unless its combination - // flag is true!. - old_word_res->combination = true; // Kludge to force deep copy - current_perm_it.add_to_end(WERD_RES::deep_copy(old_word_res)); - old_word_res->combination = false; // Undo kludge - - break_noisiest_blob_word(current_perm); - - while (best_score != PERFECT_WERDS && !current_perm.empty()) { - match_current_words(current_perm, row, block); - current_score = fp_eval_word_spacing(current_perm); - dump_words(current_perm, current_score, 2, improved); - if (current_score > best_score) { - best_perm.clear(); - best_perm.deep_copy(¤t_perm, &WERD_RES::deep_copy); - best_score = current_score; - improved = true; - } - if (current_score < PERFECT_WERDS) { - break_noisiest_blob_word(current_perm); - } - } - dump_words(best_perm, best_score, 3, improved); -} - - -/** - * break_noisiest_blob_word() - * Find the word with the blob which looks like the worst noise. - * Break the word into two, deleting the noise blob. - */ -void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) { - WERD_RES_IT word_it(&words); - WERD_RES_IT worst_word_it; - float worst_noise_score = 9999; - int worst_blob_index = -1; // Noisiest blob of noisiest wd - int blob_index; // of wds noisiest blob - float noise_score; // of wds noisiest blob - WERD_RES *word_res; - C_BLOB_IT blob_it; - C_BLOB_IT rej_cblob_it; - C_BLOB_LIST new_blob_list; - C_BLOB_IT new_blob_it; - C_BLOB_IT new_rej_cblob_it; - WERD *new_word; - int16_t start_of_noise_blob; - int16_t i; - - for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { - blob_index = worst_noise_blob(word_it.data(), &noise_score); - if (blob_index > -1 && worst_noise_score > noise_score) { - worst_noise_score = noise_score; - worst_blob_index = blob_index; - worst_word_it = word_it; - } - } - if (worst_blob_index < 0) { - words.clear(); // signal termination - return; - } - - /* Now split the worst_word_it */ - - word_res = worst_word_it.data(); - - /* Move blobs before noise blob to a new bloblist */ - - new_blob_it.set_to_list(&new_blob_list); - blob_it.set_to_list(word_res->word->cblob_list()); - for (i = 0; i < worst_blob_index; i++, blob_it.forward()) { - new_blob_it.add_after_then_move(blob_it.extract()); - } - start_of_noise_blob = blob_it.data()->bounding_box().left(); - delete blob_it.extract(); // throw out noise blob - - new_word = new WERD(&new_blob_list, word_res->word); - new_word->set_flag(W_EOL, FALSE); - word_res->word->set_flag(W_BOL, FALSE); - word_res->word->set_blanks(1); // After break - - new_rej_cblob_it.set_to_list(new_word->rej_cblob_list()); - rej_cblob_it.set_to_list(word_res->word->rej_cblob_list()); - for (; - (!rej_cblob_it.empty() && - (rej_cblob_it.data()->bounding_box().left() < start_of_noise_blob)); - rej_cblob_it.forward()) { - new_rej_cblob_it.add_after_then_move(rej_cblob_it.extract()); - } - - WERD_RES* new_word_res = new WERD_RES(new_word); - new_word_res->combination = true; - worst_word_it.add_before_then_move(new_word_res); - - word_res->ClearResults(); -} - -int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, - float *worst_noise_score) { - float noise_score[512]; - int i; - int min_noise_blob; // 1st contender - int max_noise_blob; // last contender - int non_noise_count; - int worst_noise_blob; // Worst blob - float small_limit = kBlnXHeight * fixsp_small_outlines_size; - float non_noise_limit = kBlnXHeight * 0.8; - - if (word_res->rebuild_word == nullptr) - return -1; // Can't handle cube words. - - // Normalised. - int blob_count = word_res->box_word->length(); - ASSERT_HOST(blob_count <= 512); - if (blob_count < 5) - return -1; // too short to split - - /* Get the noise scores for all blobs */ - - #ifndef SECURE_NAMES - if (debug_fix_space_level > 5) - tprintf("FP fixspace Noise metrics for \"%s\": ", - word_res->best_choice->unichar_string().string()); - #endif - - for (i = 0; i < blob_count && i < word_res->rebuild_word->NumBlobs(); i++) { - TBLOB* blob = word_res->rebuild_word->blobs[i]; - if (word_res->reject_map[i].accepted()) - noise_score[i] = non_noise_limit; - else - noise_score[i] = blob_noise_score(blob); - - if (debug_fix_space_level > 5) - tprintf("%1.1f ", noise_score[i]); - } - if (debug_fix_space_level > 5) - tprintf("\n"); - - /* Now find the worst one which is far enough away from the end of the word */ - - non_noise_count = 0; - for (i = 0; i < blob_count && non_noise_count < fixsp_non_noise_limit; i++) { - if (noise_score[i] >= non_noise_limit) { - non_noise_count++; - } - } - if (non_noise_count < fixsp_non_noise_limit) - return -1; - - min_noise_blob = i; - - non_noise_count = 0; - for (i = blob_count - 1; i >= 0 && non_noise_count < fixsp_non_noise_limit; - i--) { - if (noise_score[i] >= non_noise_limit) { - non_noise_count++; - } - } - if (non_noise_count < fixsp_non_noise_limit) - return -1; - - max_noise_blob = i; - - if (min_noise_blob > max_noise_blob) - return -1; - - *worst_noise_score = small_limit; - worst_noise_blob = -1; - for (i = min_noise_blob; i <= max_noise_blob; i++) { - if (noise_score[i] < *worst_noise_score) { - worst_noise_blob = i; - *worst_noise_score = noise_score[i]; - } - } - return worst_noise_blob; -} - -float Tesseract::blob_noise_score(TBLOB *blob) { - TBOX box; // BB of outline - int16_t outline_count = 0; - int16_t max_dimension; - int16_t largest_outline_dimension = 0; - - for (TESSLINE* ol = blob->outlines; ol != nullptr; ol= ol->next) { - outline_count++; - box = ol->bounding_box(); - if (box.height() > box.width()) { - max_dimension = box.height(); - } else { - max_dimension = box.width(); - } - - if (largest_outline_dimension < max_dimension) - largest_outline_dimension = max_dimension; - } - - if (outline_count > 5) { - // penalise LOTS of blobs - largest_outline_dimension *= 2; - } - - box = blob->bounding_box(); - if (box.bottom() > kBlnBaselineOffset * 4 || - box.top() < kBlnBaselineOffset / 2) { - // Lax blob is if high or low - largest_outline_dimension /= 2; - } - - return largest_outline_dimension; -} -} // namespace tesseract - -void fixspace_dbg(WERD_RES *word) { - TBOX box = word->word->bounding_box(); - const bool show_map_detail = false; - int16_t i; - - box.print(); - tprintf(" \"%s\" ", word->best_choice->unichar_string().string()); - tprintf("Blob count: %d (word); %d/%d (rebuild word)\n", - word->word->cblob_list()->length(), - word->rebuild_word->NumBlobs(), - word->box_word->length()); - word->reject_map.print(debug_fp); - tprintf("\n"); - if (show_map_detail) { - tprintf("\"%s\"\n", word->best_choice->unichar_string().string()); - for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) { - tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]); - word->reject_map[i].full_print(debug_fp); - } - } - - tprintf("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE"); - tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE"); -} - - -/** - * fp_eval_word_spacing() - * Evaluation function for fixed pitch word lists. - * - * Basically, count the number of "nice" characters - those which are in tess - * acceptable words or in dict words and are not rejected. - * Penalise any potential noise chars - */ -namespace tesseract { -int16_t Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) { - WERD_RES_IT word_it(&word_res_list); - WERD_RES *word; - int16_t score = 0; - int16_t i; - float small_limit = kBlnXHeight * fixsp_small_outlines_size; - - for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { - word = word_it.data(); - if (word->rebuild_word == nullptr) - continue; // Can't handle cube words. - if (word->done || - word->tess_accepted || - word->best_choice->permuter() == SYSTEM_DAWG_PERM || - word->best_choice->permuter() == FREQ_DAWG_PERM || - word->best_choice->permuter() == USER_DAWG_PERM || - safe_dict_word(word) > 0) { - int num_blobs = word->rebuild_word->NumBlobs(); - UNICHAR_ID space = word->uch_set->unichar_to_id(" "); - for (i = 0; i < word->best_choice->length() && i < num_blobs; ++i) { - TBLOB* blob = word->rebuild_word->blobs[i]; - if (word->best_choice->unichar_id(i) == space || - blob_noise_score(blob) < small_limit) { - score -= 1; // penalise possibly erroneous non-space - } else if (word->reject_map[i].accepted()) { - score++; - } - } - } - } - if (score < 0) - score = 0; - return score; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/fixspace.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/fixspace.h deleted file mode 100644 index b60fb0e7..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/fixspace.h +++ /dev/null @@ -1,32 +0,0 @@ -/****************************************************************** - * File: fixspace.h (Formerly fixspace.h) - * Description: Implements a pass over the page res, exploring the alternative - * spacing possibilities, trying to use context to improve the - * word spacing - * Author: Phil Cheatle - * Created: Thu Oct 21 11:38:43 BST 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef FIXSPACE_H -#define FIXSPACE_H - -class WERD_RES; -class WERD_RES_LIST; - -void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list); -void transform_to_next_perm(WERD_RES_LIST &words); -void fixspace_dbg(WERD_RES *word); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/fixxht.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/fixxht.cpp deleted file mode 100644 index fe59bb1c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/fixxht.cpp +++ /dev/null @@ -1,215 +0,0 @@ -/********************************************************************** - * File: fixxht.cpp (Formerly fixxht.c) - * Description: Improve x_ht and look out for case inconsistencies - * Author: Phil Cheatle - * Created: Thu Aug 5 14:11:08 BST 1993 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include -#include -#include -#include "params.h" -#include "float2int.h" -#include "tesseractclass.h" - -namespace tesseract { - -// Fixxht overview. -// Premise: Initial estimate of x-height is adequate most of the time, but -// occasionally it is incorrect. Most notable causes of failure are: -// 1. Small caps, where the top of the caps is the same as the body text -// xheight. For small caps words the xheight needs to be reduced to correctly -// recognize the caps in the small caps word. -// 2. All xheight lines, such as summer. Here the initial estimate will have -// guessed that the blob tops are caps and will have placed the xheight too low. -// 3. Noise/logos beside words, or changes in font size on a line. Such -// things can blow the statistics and cause an incorrect estimate. -// 4. Incorrect baseline. Can happen when 2 columns are incorrectly merged. -// In this case the x-height is often still correct. -// -// Algorithm. -// Compare the vertical position (top only) of alphnumerics in a word with -// the range of positions in training data (in the unicharset). -// See CountMisfitTops. If any characters disagree sufficiently with the -// initial xheight estimate, then recalculate the xheight, re-run OCR on -// the word, and if the number of vertical misfits goes down, along with -// either the word rating or certainty, then keep the new xheight. -// The new xheight is calculated as follows:ComputeCompatibleXHeight -// For each alphanumeric character that has a vertically misplaced top -// (a misfit), yet its bottom is within the acceptable range (ie it is not -// likely a sub-or super-script) calculate the range of acceptable xheight -// positions from its range of tops, and give each value in the range a -// number of votes equal to the distance of its top from its acceptance range. -// The x-height position with the median of the votes becomes the new -// x-height. This assumes that most characters will be correctly recognized -// even if the x-height is incorrect. This is not a terrible assumption, but -// it is not great. An improvement would be to use a classifier that does -// not care about vertical position or scaling at all. -// Separately collect stats on shifted baselines and apply the same logic to -// computing a best-fit shift to fix the error. If the baseline needs to be -// shifted, but the x-height is OK, returns the original x-height along with -// the baseline shift to indicate that recognition needs to re-run. - -// If the max-min top of a unicharset char is bigger than kMaxCharTopRange -// then the char top cannot be used to judge misfits or suggest a new top. -const int kMaxCharTopRange = 48; - -// Returns the number of misfit blob tops in this word. -int Tesseract::CountMisfitTops(WERD_RES *word_res) { - int bad_blobs = 0; - int num_blobs = word_res->rebuild_word->NumBlobs(); - for (int blob_id = 0; blob_id < num_blobs; ++blob_id) { - TBLOB* blob = word_res->rebuild_word->blobs[blob_id]; - UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id); - if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) { - int top = blob->bounding_box().top(); - if (top >= INT_FEAT_RANGE) - top = INT_FEAT_RANGE - 1; - int min_bottom, max_bottom, min_top, max_top; - unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, - &min_top, &max_top); - if (max_top - min_top > kMaxCharTopRange) - continue; - bool bad = top < min_top - x_ht_acceptance_tolerance || - top > max_top + x_ht_acceptance_tolerance; - if (bad) - ++bad_blobs; - if (debug_x_ht_level >= 1) { - tprintf("Class %s is %s with top %d vs limits of %d->%d, +/-%d\n", - unicharset.id_to_unichar(class_id), - bad ? "Misfit" : "OK", top, min_top, max_top, - static_cast(x_ht_acceptance_tolerance)); - } - } - } - return bad_blobs; -} - -// Returns a new x-height maximally compatible with the result in word_res. -// See comment above for overall algorithm. -float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, - float* baseline_shift) { - STATS top_stats(0, UINT8_MAX); - STATS shift_stats(-UINT8_MAX, UINT8_MAX); - int bottom_shift = 0; - int num_blobs = word_res->rebuild_word->NumBlobs(); - do { - top_stats.clear(); - shift_stats.clear(); - for (int blob_id = 0; blob_id < num_blobs; ++blob_id) { - TBLOB* blob = word_res->rebuild_word->blobs[blob_id]; - UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id); - if (unicharset.get_isalpha(class_id) || - unicharset.get_isdigit(class_id)) { - int top = blob->bounding_box().top() + bottom_shift; - // Clip the top to the limit of normalized feature space. - if (top >= INT_FEAT_RANGE) - top = INT_FEAT_RANGE - 1; - int bottom = blob->bounding_box().bottom() + bottom_shift; - int min_bottom, max_bottom, min_top, max_top; - unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, - &min_top, &max_top); - // Chars with a wild top range would mess up the result so ignore them. - if (max_top - min_top > kMaxCharTopRange) - continue; - int misfit_dist = std::max((min_top - x_ht_acceptance_tolerance) - top, - top - (max_top + x_ht_acceptance_tolerance)); - int height = top - kBlnBaselineOffset; - if (debug_x_ht_level >= 2) { - tprintf("Class %s: height=%d, bottom=%d,%d top=%d,%d, actual=%d,%d: ", - unicharset.id_to_unichar(class_id), - height, min_bottom, max_bottom, min_top, max_top, - bottom, top); - } - // Use only chars that fit in the expected bottom range, and where - // the range of tops is sensibly near the xheight. - if (min_bottom <= bottom + x_ht_acceptance_tolerance && - bottom - x_ht_acceptance_tolerance <= max_bottom && - min_top > kBlnBaselineOffset && - max_top - kBlnBaselineOffset >= kBlnXHeight && - misfit_dist > 0) { - // Compute the x-height position using proportionality between the - // actual height and expected height. - int min_xht = DivRounded(height * kBlnXHeight, - max_top - kBlnBaselineOffset); - int max_xht = DivRounded(height * kBlnXHeight, - min_top - kBlnBaselineOffset); - if (debug_x_ht_level >= 2) { - tprintf(" xht range min=%d, max=%d\n", min_xht, max_xht); - } - // The range of expected heights gets a vote equal to the distance - // of the actual top from the expected top. - for (int y = min_xht; y <= max_xht; ++y) - top_stats.add(y, misfit_dist); - } else if ((min_bottom > bottom + x_ht_acceptance_tolerance || - bottom - x_ht_acceptance_tolerance > max_bottom) && - bottom_shift == 0) { - // Get the range of required bottom shift. - int min_shift = min_bottom - bottom; - int max_shift = max_bottom - bottom; - if (debug_x_ht_level >= 2) { - tprintf(" bottom shift min=%d, max=%d\n", min_shift, max_shift); - } - // The range of expected shifts gets a vote equal to the min distance - // of the actual bottom from the expected bottom, spread over the - // range of its acceptance. - int misfit_weight = abs(min_shift); - if (max_shift > min_shift) - misfit_weight /= max_shift - min_shift; - for (int y = min_shift; y <= max_shift; ++y) - shift_stats.add(y, misfit_weight); - } else { - if (bottom_shift == 0) { - // Things with bottoms that are already ok need to say so, on the - // 1st iteration only. - shift_stats.add(0, kBlnBaselineOffset); - } - if (debug_x_ht_level >= 2) { - tprintf(" already OK\n"); - } - } - } - } - if (shift_stats.get_total() > top_stats.get_total()) { - bottom_shift = IntCastRounded(shift_stats.median()); - if (debug_x_ht_level >= 2) { - tprintf("Applying bottom shift=%d\n", bottom_shift); - } - } - } while (bottom_shift != 0 && - top_stats.get_total() < shift_stats.get_total()); - // Baseline shift is opposite sign to the bottom shift. - *baseline_shift = -bottom_shift / word_res->denorm.y_scale(); - if (debug_x_ht_level >= 2) { - tprintf("baseline shift=%g\n", *baseline_shift); - } - if (top_stats.get_total() == 0) - return bottom_shift != 0 ? word_res->x_height : 0.0f; - // The new xheight is just the median vote, which is then scaled out - // of BLN space back to pixel space to get the x-height in pixel space. - float new_xht = top_stats.median(); - if (debug_x_ht_level >= 2) { - tprintf("Median xht=%f\n", new_xht); - tprintf("Mode20:A: New x-height = %f (norm), %f (orig)\n", - new_xht, new_xht / word_res->denorm.y_scale()); - } - // The xheight must change by at least x_ht_min_change to be used. - if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change) - return new_xht / word_res->denorm.y_scale(); - else - return bottom_shift != 0 ? word_res->x_height : 0.0f; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/linerec.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/linerec.cpp deleted file mode 100644 index 8633d8f3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/linerec.cpp +++ /dev/null @@ -1,302 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: linerec.cpp -// Description: Top-level line-based recognition module for Tesseract. -// Author: Ray Smith -// Created: Thu May 02 09:47:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "tesseractclass.h" - -#include "allheaders.h" -#include "boxread.h" -#include "imagedata.h" -#ifndef ANDROID_BUILD -#include "lstmrecognizer.h" -#include "recodebeam.h" -#endif -#include "pageres.h" -#include "tprintf.h" - -#include - -namespace tesseract { - -// Scale factor to make certainty more comparable to Tesseract. -const float kCertaintyScale = 7.0f; -// Worst acceptable certainty for a dictionary word. -const float kWorstDictCertainty = -25.0f; - -// Generates training data for training a line recognizer, eg LSTM. -// Breaks the page into lines, according to the boxes, and writes them to a -// serialized DocumentData based on output_basename. -void Tesseract::TrainLineRecognizer(const STRING& input_imagename, - const STRING& output_basename, - BLOCK_LIST *block_list) { - STRING lstmf_name = output_basename + ".lstmf"; - DocumentData images(lstmf_name); - if (applybox_page > 0) { - // Load existing document for the previous pages. - if (!images.LoadDocument(lstmf_name.string(), 0, 0, nullptr)) { - tprintf("Failed to read training data from %s!\n", lstmf_name.string()); - return; - } - } - GenericVector boxes; - GenericVector texts; - // Get the boxes for this page, if there are any. - if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr, - nullptr) || - boxes.empty()) { - tprintf("Failed to read boxes from %s\n", input_imagename.string()); - return; - } - TrainFromBoxes(boxes, texts, block_list, &images); - images.Shuffle(); - if (!images.SaveDocument(lstmf_name.string(), nullptr)) { - tprintf("Failed to write training data to %s!\n", lstmf_name.string()); - } -} - -// Generates training data for training a line recognizer, eg LSTM. -// Breaks the boxes into lines, normalizes them, converts to ImageData and -// appends them to the given training_data. -void Tesseract::TrainFromBoxes(const GenericVector& boxes, - const GenericVector& texts, - BLOCK_LIST *block_list, - DocumentData* training_data) { - int box_count = boxes.size(); - // Process all the text lines in this page, as defined by the boxes. - int end_box = 0; - // Don't let \t, which marks newlines in the box file, get into the line - // content, as that makes the line unusable in training. - while (end_box < texts.size() && texts[end_box] == "\t") ++end_box; - for (int start_box = end_box; start_box < box_count; start_box = end_box) { - // Find the textline of boxes starting at start and their bounding box. - TBOX line_box = boxes[start_box]; - STRING line_str = texts[start_box]; - for (end_box = start_box + 1; end_box < box_count && texts[end_box] != "\t"; - ++end_box) { - line_box += boxes[end_box]; - line_str += texts[end_box]; - } - // Find the most overlapping block. - BLOCK* best_block = nullptr; - int best_overlap = 0; - BLOCK_IT b_it(block_list); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOCK* block = b_it.data(); - if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) - continue; // Not a text block. - TBOX block_box = block->pdblk.bounding_box(); - block_box.rotate(block->re_rotation()); - if (block_box.major_overlap(line_box)) { - TBOX overlap_box = line_box.intersection(block_box); - if (overlap_box.area() > best_overlap) { - best_overlap = overlap_box.area(); - best_block = block; - } - } - } - ImageData* imagedata = nullptr; - if (best_block == nullptr) { - tprintf("No block overlapping textline: %s\n", line_str.string()); - } else { - imagedata = GetLineData(line_box, boxes, texts, start_box, end_box, - *best_block); - } - if (imagedata != nullptr) - training_data->AddPageToDocument(imagedata); - // Don't let \t, which marks newlines in the box file, get into the line - // content, as that makes the line unusable in training. - while (end_box < texts.size() && texts[end_box] == "\t") ++end_box; - } -} - -// Returns an Imagedata containing the image of the given box, -// and ground truth boxes/truth text if available in the input. -// The image is not normalized in any way. -ImageData* Tesseract::GetLineData(const TBOX& line_box, - const GenericVector& boxes, - const GenericVector& texts, - int start_box, int end_box, - const BLOCK& block) { - TBOX revised_box; - ImageData* image_data = GetRectImage(line_box, block, kImagePadding, - &revised_box); - if (image_data == nullptr) return nullptr; - image_data->set_page_number(applybox_page); - // Copy the boxes and shift them so they are relative to the image. - FCOORD block_rotation(block.re_rotation().x(), -block.re_rotation().y()); - ICOORD shift = -revised_box.botleft(); - GenericVector line_boxes; - GenericVector line_texts; - for (int b = start_box; b < end_box; ++b) { - TBOX box = boxes[b]; - box.rotate(block_rotation); - box.move(shift); - line_boxes.push_back(box); - line_texts.push_back(texts[b]); - } - GenericVector page_numbers; - page_numbers.init_to_size(line_boxes.size(), applybox_page); - image_data->AddBoxes(line_boxes, line_texts, page_numbers); - return image_data; -} - -// Helper gets the image of a rectangle, using the block.re_rotation() if -// needed to get to the image, and rotating the result back to horizontal -// layout. (CJK characters will be on their left sides) The vertical text flag -// is set in the returned ImageData if the text was originally vertical, which -// can be used to invoke a different CJK recognition engine. The revised_box -// is also returned to enable calculation of output bounding boxes. -ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block, - int padding, TBOX* revised_box) const { - TBOX wbox = box; - wbox.pad(padding, padding); - *revised_box = wbox; - // Number of clockwise 90 degree rotations needed to get back to tesseract - // coords from the clipped image. - int num_rotations = 0; - if (block.re_rotation().y() > 0.0f) - num_rotations = 1; - else if (block.re_rotation().x() < 0.0f) - num_rotations = 2; - else if (block.re_rotation().y() < 0.0f) - num_rotations = 3; - // Handle two cases automatically: 1 the box came from the block, 2 the box - // came from a box file, and refers to the image, which the block may not. - if (block.pdblk.bounding_box().major_overlap(*revised_box)) - revised_box->rotate(block.re_rotation()); - // Now revised_box always refers to the image. - // BestPix is never colormapped, but may be of any depth. - Pix* pix = BestPix(); - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - TBOX image_box(0, 0, width, height); - // Clip to image bounds; - *revised_box &= image_box; - if (revised_box->null_box()) return nullptr; - Box* clip_box = boxCreate(revised_box->left(), height - revised_box->top(), - revised_box->width(), revised_box->height()); - Pix* box_pix = pixClipRectangle(pix, clip_box, nullptr); - if (box_pix == nullptr) return nullptr; - boxDestroy(&clip_box); - if (num_rotations > 0) { - Pix* rot_pix = pixRotateOrth(box_pix, num_rotations); - pixDestroy(&box_pix); - box_pix = rot_pix; - } - // Convert sub-8-bit images to 8 bit. - int depth = pixGetDepth(box_pix); - if (depth < 8) { - Pix* grey; - grey = pixConvertTo8(box_pix, false); - pixDestroy(&box_pix); - box_pix = grey; - } - bool vertical_text = false; - if (num_rotations > 0) { - // Rotated the clipped revised box back to internal coordinates. - FCOORD rotation(block.re_rotation().x(), -block.re_rotation().y()); - revised_box->rotate(rotation); - if (num_rotations != 2) - vertical_text = true; - } - return new ImageData(vertical_text, box_pix); -} - -#ifndef ANDROID_BUILD -// Recognizes a word or group of words, converting to WERD_RES in *words. -// Analogous to classify_word_pass1, but can handle a group of words as well. -void Tesseract::LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word, - PointerVector* words) { - TBOX word_box = word->word->bounding_box(); - // Get the word image - no frills. - if (tessedit_pageseg_mode == PSM_SINGLE_WORD || - tessedit_pageseg_mode == PSM_RAW_LINE) { - // In single word mode, use the whole image without any other row/word - // interpretation. - word_box = TBOX(0, 0, ImageWidth(), ImageHeight()); - } else { - float baseline = row->base_line((word_box.left() + word_box.right()) / 2); - if (baseline + row->descenders() < word_box.bottom()) - word_box.set_bottom(baseline + row->descenders()); - if (baseline + row->x_height() + row->ascenders() > word_box.top()) - word_box.set_top(baseline + row->x_height() + row->ascenders()); - } - ImageData* im_data = GetRectImage(word_box, block, kImagePadding, &word_box); - if (im_data == nullptr) return; - lstm_recognizer_->RecognizeLine(*im_data, true, classify_debug_level > 0, - kWorstDictCertainty / kCertaintyScale, - word_box, words, lstm_choice_mode); - delete im_data; - SearchWords(words); -} - -// Apply segmentation search to the given set of words, within the constraints -// of the existing ratings matrix. If there is already a best_choice on a word -// leaves it untouched and just sets the done/accepted etc flags. -void Tesseract::SearchWords(PointerVector* words) { - // Run the segmentation search on the network outputs and make a BoxWord - // for each of the output words. - // If we drop a word as junk, then there is always a space in front of the - // next. - const Dict* stopper_dict = lstm_recognizer_->GetDict(); - if (stopper_dict == nullptr) stopper_dict = &getDict(); - bool any_nonspace_delimited = false; - for (int w = 0; w < words->size(); ++w) { - WERD_RES* word = (*words)[w]; - if (word->best_choice != nullptr && - word->best_choice->ContainsAnyNonSpaceDelimited()) { - any_nonspace_delimited = true; - break; - } - } - for (int w = 0; w < words->size(); ++w) { - WERD_RES* word = (*words)[w]; - if (word->best_choice == nullptr) { - // It is a dud. - word->SetupFake(lstm_recognizer_->GetUnicharset()); - } else { - // Set the best state. - for (int i = 0; i < word->best_choice->length(); ++i) { - int length = word->best_choice->state(i); - word->best_state.push_back(length); - } - word->reject_map.initialise(word->best_choice->length()); - word->tess_failed = false; - word->tess_accepted = true; - word->tess_would_adapt = false; - word->done = true; - word->tesseract = this; - float word_certainty = std::min(word->space_certainty, - word->best_choice->certainty()); - word_certainty *= kCertaintyScale; - if (getDict().stopper_debug_level >= 1) { - tprintf("Best choice certainty=%g, space=%g, scaled=%g, final=%g\n", - word->best_choice->certainty(), word->space_certainty, - std::min(word->space_certainty, word->best_choice->certainty()) * - kCertaintyScale, - word_certainty); - word->best_choice->print(); - } - word->best_choice->set_certainty(word_certainty); - - word->tess_accepted = stopper_dict->AcceptableResult(word); - } - } -} -#endif // ANDROID_BUILD - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/ltrresultiterator.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/ltrresultiterator.cpp deleted file mode 100644 index 0c08e081..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/ltrresultiterator.cpp +++ /dev/null @@ -1,395 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: ltrresultiterator.cpp -// Description: Iterator for tesseract results in strict left-to-right -// order that avoids using tesseract internal data structures. -// Author: Ray Smith -// Created: Fri Feb 26 14:32:09 PST 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "ltrresultiterator.h" - -#include "allheaders.h" -#include "pageres.h" -#include "strngs.h" -#include "tesseractclass.h" - -namespace tesseract { - -LTRResultIterator::LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract, - int scale, int scaled_yres, - int rect_left, int rect_top, - int rect_width, int rect_height) - : PageIterator(page_res, tesseract, scale, scaled_yres, - rect_left, rect_top, rect_width, rect_height), - line_separator_("\n"), - paragraph_separator_("\n") { -} - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -LTRResultIterator::~LTRResultIterator() = default; - -// Returns the null terminated UTF-8 encoded text string for the current -// object at the given level. Use delete [] to free after use. -char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const { - if (it_->word() == nullptr) return nullptr; // Already at the end! - STRING text; - PAGE_RES_IT res_it(*it_); - WERD_CHOICE* best_choice = res_it.word()->best_choice; - ASSERT_HOST(best_choice != nullptr); - if (level == RIL_SYMBOL) { - text = res_it.word()->BestUTF8(blob_index_, false); - } else if (level == RIL_WORD) { - text = best_choice->unichar_string(); - } else { - bool eol = false; // end of line? - bool eop = false; // end of paragraph? - do { // for each paragraph in a block - do { // for each text line in a paragraph - do { // for each word in a text line - best_choice = res_it.word()->best_choice; - ASSERT_HOST(best_choice != nullptr); - text += best_choice->unichar_string(); - text += " "; - res_it.forward(); - eol = res_it.row() != res_it.prev_row(); - } while (!eol); - text.truncate_at(text.length() - 1); - text += line_separator_; - eop = res_it.block() != res_it.prev_block() || - res_it.row()->row->para() != res_it.prev_row()->row->para(); - } while (level != RIL_TEXTLINE && !eop); - if (eop) text += paragraph_separator_; - } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block()); - } - int length = text.length() + 1; - char* result = new char[length]; - strncpy(result, text.string(), length); - return result; -} - -// Set the string inserted at the end of each text line. "\n" by default. -void LTRResultIterator::SetLineSeparator(const char *new_line) { - line_separator_ = new_line; -} - -// Set the string inserted at the end of each paragraph. "\n" by default. -void LTRResultIterator::SetParagraphSeparator(const char *new_para) { - paragraph_separator_ = new_para; -} - -// Returns the mean confidence of the current object at the given level. -// The number should be interpreted as a percent probability. (0.0f-100.0f) -float LTRResultIterator::Confidence(PageIteratorLevel level) const { - if (it_->word() == nullptr) return 0.0f; // Already at the end! - float mean_certainty = 0.0f; - int certainty_count = 0; - PAGE_RES_IT res_it(*it_); - WERD_CHOICE* best_choice = res_it.word()->best_choice; - ASSERT_HOST(best_choice != nullptr); - switch (level) { - case RIL_BLOCK: - do { - best_choice = res_it.word()->best_choice; - ASSERT_HOST(best_choice != nullptr); - mean_certainty += best_choice->certainty(); - ++certainty_count; - res_it.forward(); - } while (res_it.block() == res_it.prev_block()); - break; - case RIL_PARA: - do { - best_choice = res_it.word()->best_choice; - ASSERT_HOST(best_choice != nullptr); - mean_certainty += best_choice->certainty(); - ++certainty_count; - res_it.forward(); - } while (res_it.block() == res_it.prev_block() && - res_it.row()->row->para() == res_it.prev_row()->row->para()); - break; - case RIL_TEXTLINE: - do { - best_choice = res_it.word()->best_choice; - ASSERT_HOST(best_choice != nullptr); - mean_certainty += best_choice->certainty(); - ++certainty_count; - res_it.forward(); - } while (res_it.row() == res_it.prev_row()); - break; - case RIL_WORD: - mean_certainty += best_choice->certainty(); - ++certainty_count; - break; - case RIL_SYMBOL: - mean_certainty += best_choice->certainty(blob_index_); - ++certainty_count; - } - if (certainty_count > 0) { - mean_certainty /= certainty_count; - float confidence = 100 + 5 * mean_certainty; - if (confidence < 0.0f) confidence = 0.0f; - if (confidence > 100.0f) confidence = 100.0f; - return confidence; - } - return 0.0f; -} - -void LTRResultIterator::RowAttributes(float* row_height, float* descenders, - float* ascenders) const { - *row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() - - it_->row()->row->descenders(); - *descenders = it_->row()->row->descenders(); - *ascenders = it_->row()->row->ascenders(); -} - -// Returns the font attributes of the current word. If iterating at a higher -// level object than words, eg textlines, then this will return the -// attributes of the first word in that textline. -// The actual return value is a string representing a font name. It points -// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as -// the iterator itself, ie rendered invalid by various members of -// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. -// Pointsize is returned in printers points (1/72 inch.) -const char* LTRResultIterator::WordFontAttributes(bool* is_bold, - bool* is_italic, - bool* is_underlined, - bool* is_monospace, - bool* is_serif, - bool* is_smallcaps, - int* pointsize, - int* font_id) const { - if (it_->word() == nullptr) return nullptr; // Already at the end! - float row_height = it_->row()->row->x_height() + - it_->row()->row->ascenders() - it_->row()->row->descenders(); - // Convert from pixels to printers points. - *pointsize = scaled_yres_ > 0 - ? static_cast(row_height * kPointsPerInch / scaled_yres_ + 0.5) - : 0; - if (it_->word()->fontinfo == nullptr) { - *font_id = -1; - return nullptr; // No font information. - } - const FontInfo& font_info = *it_->word()->fontinfo; - *font_id = font_info.universal_id; - *is_bold = font_info.is_bold(); - *is_italic = font_info.is_italic(); - *is_underlined = false; // TODO(rays) fix this! - *is_monospace = font_info.is_fixed_pitch(); - *is_serif = font_info.is_serif(); - *is_smallcaps = it_->word()->small_caps; - - return font_info.name; -} - -// Returns the name of the language used to recognize this word. -const char* LTRResultIterator::WordRecognitionLanguage() const { - if (it_->word() == nullptr || it_->word()->tesseract == nullptr) return nullptr; - return it_->word()->tesseract->lang.string(); -} - -// Return the overall directionality of this word. -StrongScriptDirection LTRResultIterator::WordDirection() const { - if (it_->word() == nullptr) return DIR_NEUTRAL; - bool has_rtl = it_->word()->AnyRtlCharsInWord(); - bool has_ltr = it_->word()->AnyLtrCharsInWord(); - if (has_rtl && !has_ltr) - return DIR_RIGHT_TO_LEFT; - if (has_ltr && !has_rtl) - return DIR_LEFT_TO_RIGHT; - if (!has_ltr && !has_rtl) - return DIR_NEUTRAL; - return DIR_MIX; -} - -// Returns true if the current word was found in a dictionary. -bool LTRResultIterator::WordIsFromDictionary() const { - if (it_->word() == nullptr) return false; // Already at the end! - int permuter = it_->word()->best_choice->permuter(); - return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM || - permuter == USER_DAWG_PERM; -} - -// Returns the number of blanks before the current word. -int LTRResultIterator::BlanksBeforeWord() const { - if (it_->word() == nullptr) return 1; - return it_->word()->word->space(); -} - -// Returns true if the current word is numeric. -bool LTRResultIterator::WordIsNumeric() const { - if (it_->word() == nullptr) return false; // Already at the end! - int permuter = it_->word()->best_choice->permuter(); - return permuter == NUMBER_PERM; -} - -// Returns true if the word contains blamer information. -bool LTRResultIterator::HasBlamerInfo() const { - return it_->word() != nullptr && it_->word()->blamer_bundle != nullptr && - it_->word()->blamer_bundle->HasDebugInfo(); -} - -// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle -// of the current word. -const void *LTRResultIterator::GetParamsTrainingBundle() const { - return (it_->word() != nullptr && it_->word()->blamer_bundle != nullptr) ? - &(it_->word()->blamer_bundle->params_training_bundle()) : nullptr; -} - -// Returns the pointer to the string with blamer information for this word. -// Assumes that the word's blamer_bundle is not nullptr. -const char *LTRResultIterator::GetBlamerDebug() const { - return it_->word()->blamer_bundle->debug().string(); -} - -// Returns the pointer to the string with misadaption information for this word. -// Assumes that the word's blamer_bundle is not nullptr. -const char *LTRResultIterator::GetBlamerMisadaptionDebug() const { - return it_->word()->blamer_bundle->misadaption_debug().string(); -} - -// Returns true if a truth string was recorded for the current word. -bool LTRResultIterator::HasTruthString() const { - if (it_->word() == nullptr) return false; // Already at the end! - if (it_->word()->blamer_bundle == nullptr || - it_->word()->blamer_bundle->NoTruth()) { - return false; // no truth information for this word - } - return true; -} - -// Returns true if the given string is equivalent to the truth string for -// the current word. -bool LTRResultIterator::EquivalentToTruth(const char *str) const { - if (!HasTruthString()) return false; - ASSERT_HOST(it_->word()->uch_set != nullptr); - WERD_CHOICE str_wd(str, *(it_->word()->uch_set)); - return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd); -} - -// Returns the null terminated UTF-8 encoded truth string for the current word. -// Use delete [] to free after use. -char* LTRResultIterator::WordTruthUTF8Text() const { - if (!HasTruthString()) return nullptr; - STRING truth_text = it_->word()->blamer_bundle->TruthString(); - int length = truth_text.length() + 1; - char* result = new char[length]; - strncpy(result, truth_text.string(), length); - return result; -} - -// Returns the null terminated UTF-8 encoded normalized OCR string for the -// current word. Use delete [] to free after use. -char* LTRResultIterator::WordNormedUTF8Text() const { - if (it_->word() == nullptr) return nullptr; // Already at the end! - STRING ocr_text; - WERD_CHOICE* best_choice = it_->word()->best_choice; - const UNICHARSET *unicharset = it_->word()->uch_set; - ASSERT_HOST(best_choice != nullptr); - for (int i = 0; i < best_choice->length(); ++i) { - ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i)); - } - int length = ocr_text.length() + 1; - char* result = new char[length]; - strncpy(result, ocr_text.string(), length); - return result; -} - -// Returns a pointer to serialized choice lattice. -// Fills lattice_size with the number of bytes in lattice data. -const char *LTRResultIterator::WordLattice(int *lattice_size) const { - if (it_->word() == nullptr) return nullptr; // Already at the end! - if (it_->word()->blamer_bundle == nullptr) return nullptr; - *lattice_size = it_->word()->blamer_bundle->lattice_size(); - return it_->word()->blamer_bundle->lattice_data(); -} - -// Returns true if the current symbol is a superscript. -// If iterating at a higher level object than symbols, eg words, then -// this will return the attributes of the first symbol in that word. -bool LTRResultIterator::SymbolIsSuperscript() const { - if (cblob_it_ == nullptr && it_->word() != nullptr) - return it_->word()->best_choice->BlobPosition(blob_index_) == - SP_SUPERSCRIPT; - return false; -} - -// Returns true if the current symbol is a subscript. -// If iterating at a higher level object than symbols, eg words, then -// this will return the attributes of the first symbol in that word. -bool LTRResultIterator::SymbolIsSubscript() const { - if (cblob_it_ == nullptr && it_->word() != nullptr) - return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUBSCRIPT; - return false; -} - -// Returns true if the current symbol is a dropcap. -// If iterating at a higher level object than symbols, eg words, then -// this will return the attributes of the first symbol in that word. -bool LTRResultIterator::SymbolIsDropcap() const { - if (cblob_it_ == nullptr && it_->word() != nullptr) - return it_->word()->best_choice->BlobPosition(blob_index_) == SP_DROPCAP; - return false; -} - -ChoiceIterator::ChoiceIterator(const LTRResultIterator& result_it) { - ASSERT_HOST(result_it.it_->word() != nullptr); - word_res_ = result_it.it_->word(); - BLOB_CHOICE_LIST* choices = nullptr; - if (word_res_->ratings != nullptr) - choices = word_res_->GetBlobChoices(result_it.blob_index_); - if (choices != nullptr && !choices->empty()) { - choice_it_ = new BLOB_CHOICE_IT(choices); - choice_it_->mark_cycle_pt(); - } else { - choice_it_ = nullptr; - } -} - -ChoiceIterator::~ChoiceIterator() { - delete choice_it_; -} - -// Moves to the next choice for the symbol and returns false if there -// are none left. -bool ChoiceIterator::Next() { - if (choice_it_ == nullptr) - return false; - choice_it_->forward(); - return !choice_it_->cycled_list(); -} - -// Returns the null terminated UTF-8 encoded text string for the current -// choice. Do NOT use delete [] to free after use. -const char* ChoiceIterator::GetUTF8Text() const { - if (choice_it_ == nullptr) - return nullptr; - UNICHAR_ID id = choice_it_->data()->unichar_id(); - return word_res_->uch_set->id_to_unichar_ext(id); -} - -// Returns the confidence of the current choice. -// The number should be interpreted as a percent probability. (0.0f-100.0f) -float ChoiceIterator::Confidence() const { - if (choice_it_ == nullptr) - return 0.0f; - float confidence = 100 + 5 * choice_it_->data()->certainty(); - if (confidence < 0.0f) confidence = 0.0f; - if (confidence > 100.0f) confidence = 100.0f; - return confidence; -} - - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/ltrresultiterator.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/ltrresultiterator.h deleted file mode 100644 index 665aa04a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/ltrresultiterator.h +++ /dev/null @@ -1,224 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: ltrresultiterator.h -// Description: Iterator for tesseract results in strict left-to-right -// order that avoids using tesseract internal data structures. -// Author: Ray Smith -// Created: Fri Feb 26 11:01:06 PST 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_ -#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_ - -#include "pageiterator.h" // for PageIterator -#include "platform.h" // for TESS_API -#include "publictypes.h" // for PageIteratorLevel -#include "unichar.h" // for StrongScriptDirection - -class BLOB_CHOICE_IT; -class PAGE_RES; -class WERD_RES; - -namespace tesseract { - -class Tesseract; - -// Class to iterate over tesseract results, providing access to all levels -// of the page hierarchy, without including any tesseract headers or having -// to handle any tesseract structures. -// WARNING! This class points to data held within the TessBaseAPI class, and -// therefore can only be used while the TessBaseAPI class still exists and -// has not been subjected to a call of Init, SetImage, Recognize, Clear, End -// DetectOS, or anything else that changes the internal PAGE_RES. -// See apitypes.h for the definition of PageIteratorLevel. -// See also base class PageIterator, which contains the bulk of the interface. -// LTRResultIterator adds text-specific methods for access to OCR output. - -class TESS_API LTRResultIterator : public PageIterator { - friend class ChoiceIterator; - public: - // page_res and tesseract come directly from the BaseAPI. - // The rectangle parameters are copied indirectly from the Thresholder, - // via the BaseAPI. They represent the coordinates of some rectangle in an - // original image (in top-left-origin coordinates) and therefore the top-left - // needs to be added to any output boxes in order to specify coordinates - // in the original image. See TessBaseAPI::SetRectangle. - // The scale and scaled_yres are in case the Thresholder scaled the image - // rectangle prior to thresholding. Any coordinates in tesseract's image - // must be divided by scale before adding (rect_left, rect_top). - // The scaled_yres indicates the effective resolution of the binary image - // that tesseract has been given by the Thresholder. - // After the constructor, Begin has already been called. - LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract, - int scale, int scaled_yres, - int rect_left, int rect_top, - int rect_width, int rect_height); - - virtual ~LTRResultIterator(); - - // LTRResultIterators may be copied! This makes it possible to iterate over - // all the objects at a lower level, while maintaining an iterator to - // objects at a higher level. These constructors DO NOT CALL Begin, so - // iterations will continue from the location of src. - // TODO: For now the copy constructor and operator= only need the base class - // versions, but if new data members are added, don't forget to add them! - - // ============= Moving around within the page ============. - - // See PageIterator. - - // ============= Accessing data ==============. - - // Returns the null terminated UTF-8 encoded text string for the current - // object at the given level. Use delete [] to free after use. - char* GetUTF8Text(PageIteratorLevel level) const; - - // Set the string inserted at the end of each text line. "\n" by default. - void SetLineSeparator(const char *new_line); - - // Set the string inserted at the end of each paragraph. "\n" by default. - void SetParagraphSeparator(const char *new_para); - - // Returns the mean confidence of the current object at the given level. - // The number should be interpreted as a percent probability. (0.0f-100.0f) - float Confidence(PageIteratorLevel level) const; - - // Returns the attributes of the current row. - void RowAttributes(float* row_height, float* descenders, - float* ascenders) const; - - // ============= Functions that refer to words only ============. - - // Returns the font attributes of the current word. If iterating at a higher - // level object than words, eg textlines, then this will return the - // attributes of the first word in that textline. - // The actual return value is a string representing a font name. It points - // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as - // the iterator itself, ie rendered invalid by various members of - // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. - // Pointsize is returned in printers points (1/72 inch.) - const char* WordFontAttributes(bool* is_bold, - bool* is_italic, - bool* is_underlined, - bool* is_monospace, - bool* is_serif, - bool* is_smallcaps, - int* pointsize, - int* font_id) const; - - // Return the name of the language used to recognize this word. - // On error, nullptr. Do not delete this pointer. - const char* WordRecognitionLanguage() const; - - // Return the overall directionality of this word. - StrongScriptDirection WordDirection() const; - - // Returns true if the current word was found in a dictionary. - bool WordIsFromDictionary() const; - - // Returns the number of blanks before the current word. - int BlanksBeforeWord() const; - - // Returns true if the current word is numeric. - bool WordIsNumeric() const; - - // Returns true if the word contains blamer information. - bool HasBlamerInfo() const; - - // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle - // of the current word. - const void *GetParamsTrainingBundle() const; - - // Returns a pointer to the string with blamer information for this word. - // Assumes that the word's blamer_bundle is not nullptr. - const char *GetBlamerDebug() const; - - // Returns a pointer to the string with misadaption information for this word. - // Assumes that the word's blamer_bundle is not nullptr. - const char *GetBlamerMisadaptionDebug() const; - - // Returns true if a truth string was recorded for the current word. - bool HasTruthString() const; - - // Returns true if the given string is equivalent to the truth string for - // the current word. - bool EquivalentToTruth(const char *str) const; - - // Returns a null terminated UTF-8 encoded truth string for the current word. - // Use delete [] to free after use. - char* WordTruthUTF8Text() const; - - // Returns a null terminated UTF-8 encoded normalized OCR string for the - // current word. Use delete [] to free after use. - char* WordNormedUTF8Text() const; - - // Returns a pointer to serialized choice lattice. - // Fills lattice_size with the number of bytes in lattice data. - const char *WordLattice(int *lattice_size) const; - - // ============= Functions that refer to symbols only ============. - - // Returns true if the current symbol is a superscript. - // If iterating at a higher level object than symbols, eg words, then - // this will return the attributes of the first symbol in that word. - bool SymbolIsSuperscript() const; - // Returns true if the current symbol is a subscript. - // If iterating at a higher level object than symbols, eg words, then - // this will return the attributes of the first symbol in that word. - bool SymbolIsSubscript() const; - // Returns true if the current symbol is a dropcap. - // If iterating at a higher level object than symbols, eg words, then - // this will return the attributes of the first symbol in that word. - bool SymbolIsDropcap() const; - - protected: - const char *line_separator_; - const char *paragraph_separator_; -}; - -// Class to iterate over the classifier choices for a single RIL_SYMBOL. -class ChoiceIterator { - public: - // Construction is from a LTRResultIterator that points to the symbol of - // interest. The ChoiceIterator allows a one-shot iteration over the - // choices for this symbol and after that is is useless. - explicit ChoiceIterator(const LTRResultIterator& result_it); - ~ChoiceIterator(); - - // Moves to the next choice for the symbol and returns false if there - // are none left. - bool Next(); - - // ============= Accessing data ==============. - - // Returns the null terminated UTF-8 encoded text string for the current - // choice. - // NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an - // internal structure and should NOT be delete[]ed to free after use. - const char* GetUTF8Text() const; - - // Returns the confidence of the current choice. - // The number should be interpreted as a percent probability. (0.0f-100.0f) - float Confidence() const; - - private: - // Pointer to the WERD_RES object owned by the API. - WERD_RES* word_res_; - // Iterator over the blob choices. - BLOB_CHOICE_IT* choice_it_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/mutableiterator.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/mutableiterator.cpp deleted file mode 100644 index a472df18..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/mutableiterator.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "mutableiterator.h" - -namespace tesseract { - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -MutableIterator::~MutableIterator() = default; - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/mutableiterator.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/mutableiterator.h deleted file mode 100644 index e68f0738..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/mutableiterator.h +++ /dev/null @@ -1,64 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: mutableiterator.h -// Description: Iterator for tesseract results providing access to -// both high-level API and Tesseract internal data structures. -// Author: David Eger -// Created: Thu Feb 24 19:01:06 PST 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCMAIN_MUTABLEITERATOR_H_ -#define TESSERACT_CCMAIN_MUTABLEITERATOR_H_ - -#include "resultiterator.h" - -class BLOB_CHOICE_IT; - -namespace tesseract { - -class Tesseract; - -// Class to iterate over tesseract results, providing access to all levels -// of the page hierarchy, without including any tesseract headers or having -// to handle any tesseract structures. -// WARNING! This class points to data held within the TessBaseAPI class, and -// therefore can only be used while the TessBaseAPI class still exists and -// has not been subjected to a call of Init, SetImage, Recognize, Clear, End -// DetectOS, or anything else that changes the internal PAGE_RES. -// See apitypes.h for the definition of PageIteratorLevel. -// See also base class PageIterator, which contains the bulk of the interface. -// ResultIterator adds text-specific methods for access to OCR output. -// MutableIterator adds access to internal data structures. - -class MutableIterator : public ResultIterator { - public: - // See argument descriptions in ResultIterator() - MutableIterator(PAGE_RES* page_res, Tesseract* tesseract, - int scale, int scaled_yres, - int rect_left, int rect_top, - int rect_width, int rect_height) - : ResultIterator( - LTRResultIterator(page_res, tesseract, scale, scaled_yres, rect_left, - rect_top, rect_width, rect_height)) {} - virtual ~MutableIterator(); - - // See PageIterator and ResultIterator for most calls. - - // Return access to Tesseract internals. - const PAGE_RES_IT *PageResIt() const { return it_; } -}; - -} // namespace tesseract. - -#endif // TESSERACT_CCMAIN_MUTABLEITERATOR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/osdetect.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/osdetect.cpp deleted file mode 100644 index 3e0e793f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/osdetect.cpp +++ /dev/null @@ -1,581 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: osdetect.cpp -// Description: Orientation and script detection. -// Author: Samuel Charron -// Ranjith Unnikrishnan -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include -#include // for std::fabs -#include - -#include "osdetect.h" - -#include "blobbox.h" -#include "blread.h" -#include "colfind.h" -#include "fontinfo.h" -#include "imagefind.h" -#include "linefind.h" -#include "oldlist.h" -#include "qrsequence.h" -#include "ratngs.h" -#include "strngs.h" -#include "tabvector.h" -#include "tesseractclass.h" -#include "textord.h" - -const float kSizeRatioToReject = 2.0; -const int kMinAcceptableBlobHeight = 10; - -const float kScriptAcceptRatio = 1.3; - -const float kHanRatioInKorean = 0.7; -const float kHanRatioInJapanese = 0.3; - -const float kNonAmbiguousMargin = 1.0; - -// General scripts -static const char* han_script = "Han"; -static const char* latin_script = "Latin"; -static const char* katakana_script = "Katakana"; -static const char* hiragana_script = "Hiragana"; -static const char* hangul_script = "Hangul"; - -// Pseudo-scripts Name -const char* ScriptDetector::korean_script_ = "Korean"; -const char* ScriptDetector::japanese_script_ = "Japanese"; -const char* ScriptDetector::fraktur_script_ = "Fraktur"; - -void OSResults::update_best_orientation() { - float first = orientations[0]; - float second = orientations[1]; - best_result.orientation_id = 0; - if (orientations[0] < orientations[1]) { - first = orientations[1]; - second = orientations[0]; - best_result.orientation_id = 1; - } - for (int i = 2; i < 4; ++i) { - if (orientations[i] > first) { - second = first; - first = orientations[i]; - best_result.orientation_id = i; - } else if (orientations[i] > second) { - second = orientations[i]; - } - } - // Store difference of top two orientation scores. - best_result.oconfidence = first - second; -} - -void OSResults::set_best_orientation(int orientation_id) { - best_result.orientation_id = orientation_id; - best_result.oconfidence = 0; -} - -void OSResults::update_best_script(int orientation) { - // We skip index 0 to ignore the "Common" script. - float first = scripts_na[orientation][1]; - float second = scripts_na[orientation][2]; - best_result.script_id = 1; - if (scripts_na[orientation][1] < scripts_na[orientation][2]) { - first = scripts_na[orientation][2]; - second = scripts_na[orientation][1]; - best_result.script_id = 2; - } - for (int i = 3; i < kMaxNumberOfScripts; ++i) { - if (scripts_na[orientation][i] > first) { - best_result.script_id = i; - second = first; - first = scripts_na[orientation][i]; - } else if (scripts_na[orientation][i] > second) { - second = scripts_na[orientation][i]; - } - } - best_result.sconfidence = - (first / second - 1.0) / (kScriptAcceptRatio - 1.0); -} - -int OSResults::get_best_script(int orientation_id) const { - int max_id = -1; - for (int j = 0; j < kMaxNumberOfScripts; ++j) { - const char *script = unicharset->get_script_from_script_id(j); - if (strcmp(script, "Common") && strcmp(script, "NULL")) { - if (max_id == -1 || - scripts_na[orientation_id][j] > scripts_na[orientation_id][max_id]) - max_id = j; - } - } - return max_id; -} - -// Print the script scores for all possible orientations. -void OSResults::print_scores(void) const { - for (int i = 0; i < 4; ++i) { - tprintf("Orientation id #%d", i); - print_scores(i); - } -} - -// Print the script scores for the given candidate orientation. -void OSResults::print_scores(int orientation_id) const { - for (int j = 0; j < kMaxNumberOfScripts; ++j) { - if (scripts_na[orientation_id][j]) { - tprintf("%12s\t: %f\n", unicharset->get_script_from_script_id(j), - scripts_na[orientation_id][j]); - } - } -} - -// Accumulate scores with given OSResults instance and update the best script. -void OSResults::accumulate(const OSResults& osr) { - for (int i = 0; i < 4; ++i) { - orientations[i] += osr.orientations[i]; - for (int j = 0; j < kMaxNumberOfScripts; ++j) - scripts_na[i][j] += osr.scripts_na[i][j]; - } - unicharset = osr.unicharset; - update_best_orientation(); - update_best_script(best_result.orientation_id); -} - -// Detect and erase horizontal/vertical lines and picture regions from the -// image, so that non-text blobs are removed from consideration. -static void remove_nontext_regions(tesseract::Tesseract *tess, - BLOCK_LIST *blocks, - TO_BLOCK_LIST *to_blocks) { - Pix *pix = tess->pix_binary(); - ASSERT_HOST(pix != nullptr); - int vertical_x = 0; - int vertical_y = 1; - tesseract::TabVector_LIST v_lines; - tesseract::TabVector_LIST h_lines; - int resolution; - if (kMinCredibleResolution > pixGetXRes(pix)) { - resolution = kMinCredibleResolution; - tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n", - pixGetXRes(pix), resolution); - } else { - resolution = pixGetXRes(pix); - } - - tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix, - &vertical_x, &vertical_y, - nullptr, &v_lines, &h_lines); - Pix* im_pix = tesseract::ImageFind::FindImages(pix, nullptr); - if (im_pix != nullptr) { - pixSubtract(pix, pix, im_pix); - pixDestroy(&im_pix); - } - tess->mutable_textord()->find_components(tess->pix_binary(), - blocks, to_blocks); -} - -// Find connected components in the page and process a subset until finished or -// a stopping criterion is met. -// Returns the number of blobs used in making the estimate. 0 implies failure. -int orientation_and_script_detection(STRING& filename, - OSResults* osr, - tesseract::Tesseract* tess) { - STRING name = filename; //truncated name - const char *lastdot; //of name - TBOX page_box; - - lastdot = strrchr (name.string (), '.'); - if (lastdot != nullptr) - name[lastdot-name.string()] = '\0'; - - ASSERT_HOST(tess->pix_binary() != nullptr) - int width = pixGetWidth(tess->pix_binary()); - int height = pixGetHeight(tess->pix_binary()); - - BLOCK_LIST blocks; - if (!read_unlv_file(name, width, height, &blocks)) - FullPageBlock(width, height, &blocks); - - // Try to remove non-text regions from consideration. - TO_BLOCK_LIST land_blocks, port_blocks; - remove_nontext_regions(tess, &blocks, &port_blocks); - - if (port_blocks.empty()) { - // page segmentation did not succeed, so we need to find_components first. - tess->mutable_textord()->find_components(tess->pix_binary(), - &blocks, &port_blocks); - } else { - page_box.set_left(0); - page_box.set_bottom(0); - page_box.set_right(width); - page_box.set_top(height); - // Filter_blobs sets up the TO_BLOCKs the same as find_components does. - tess->mutable_textord()->filter_blobs(page_box.topright(), - &port_blocks, true); - } - - return os_detect(&port_blocks, osr, tess); -} - -// Filter and sample the blobs. -// Returns a non-zero number of blobs if the page was successfully processed, or -// zero if the page had too few characters to be reliable -int os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr, - tesseract::Tesseract* tess) { - int blobs_total = 0; - TO_BLOCK_IT block_it; - block_it.set_to_list(port_blocks); - - BLOBNBOX_CLIST filtered_list; - BLOBNBOX_C_IT filtered_it(&filtered_list); - - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward ()) { - TO_BLOCK* to_block = block_it.data(); - if (to_block->block->pdblk.poly_block() && - !to_block->block->pdblk.poly_block()->IsText()) continue; - BLOBNBOX_IT bbox_it; - bbox_it.set_to_list(&to_block->blobs); - for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list (); - bbox_it.forward ()) { - BLOBNBOX* bbox = bbox_it.data(); - C_BLOB* blob = bbox->cblob(); - TBOX box = blob->bounding_box(); - ++blobs_total; - - // Catch illegal value of box width and avoid division by zero. - if (box.width() == 0) continue; - // TODO: Can height and width be negative? If not, remove fabs. - float y_x = std::fabs((box.height() * 1.0f) / box.width()); - float x_y = 1.0f / y_x; - // Select a >= 1.0 ratio - float ratio = x_y > y_x ? x_y : y_x; - // Blob is ambiguous - if (ratio > kSizeRatioToReject) continue; - if (box.height() < kMinAcceptableBlobHeight) continue; - filtered_it.add_to_end(bbox); - } - } - return os_detect_blobs(nullptr, &filtered_list, osr, tess); -} - -// Detect orientation and script from a list of blobs. -// Returns a non-zero number of blobs if the list was successfully processed, or -// zero if the list had too few characters to be reliable. -// If allowed_scripts is non-null and non-empty, it is a list of scripts that -// constrains both orientation and script detection to consider only scripts -// from the list. -int os_detect_blobs(const GenericVector* allowed_scripts, - BLOBNBOX_CLIST* blob_list, OSResults* osr, - tesseract::Tesseract* tess) { - OSResults osr_; - int minCharactersToTry = tess->min_characters_to_try; - int maxCharactersToTry = 5 * minCharactersToTry; - if (osr == nullptr) - osr = &osr_; - - osr->unicharset = &tess->unicharset; - OrientationDetector o(allowed_scripts, osr); - ScriptDetector s(allowed_scripts, osr, tess); - - BLOBNBOX_C_IT filtered_it(blob_list); - int real_max = std::min(filtered_it.length(), maxCharactersToTry); - // tprintf("Total blobs found = %d\n", blobs_total); - // tprintf("Number of blobs post-filtering = %d\n", filtered_it.length()); - // tprintf("Number of blobs to try = %d\n", real_max); - - // If there are too few characters, skip this page entirely. - if (real_max < minCharactersToTry / 2) { - tprintf("Too few characters. Skipping this page\n"); - return 0; - } - - BLOBNBOX** blobs = new BLOBNBOX*[filtered_it.length()]; - int number_of_blobs = 0; - for (filtered_it.mark_cycle_pt (); !filtered_it.cycled_list (); - filtered_it.forward ()) { - blobs[number_of_blobs++] = (BLOBNBOX*)filtered_it.data(); - } - QRSequenceGenerator sequence(number_of_blobs); - int num_blobs_evaluated = 0; - for (int i = 0; i < real_max; ++i) { - if (os_detect_blob(blobs[sequence.GetVal()], &o, &s, osr, tess) - && i > minCharactersToTry) { - break; - } - ++num_blobs_evaluated; - } - delete [] blobs; - - // Make sure the best_result is up-to-date - int orientation = o.get_orientation(); - osr->update_best_script(orientation); - return num_blobs_evaluated; -} - -// Processes a single blob to estimate script and orientation. -// Return true if estimate of orientation and script satisfies stopping -// criteria. -bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, - ScriptDetector* s, OSResults* osr, - tesseract::Tesseract* tess) { - tess->tess_cn_matching.set_value(true); // turn it on - tess->tess_bn_matching.set_value(false); - C_BLOB* blob = bbox->cblob(); - TBLOB* tblob = TBLOB::PolygonalCopy(tess->poly_allow_detailed_fx, blob); - TBOX box = tblob->bounding_box(); - FCOORD current_rotation(1.0f, 0.0f); - FCOORD rotation90(0.0f, 1.0f); - BLOB_CHOICE_LIST ratings[4]; - // Test the 4 orientations - for (int i = 0; i < 4; ++i) { - // Normalize the blob. Set the origin to the place we want to be the - // bottom-middle after rotation. - // Scaling is to make the rotated height the x-height. - float scaling = static_cast(kBlnXHeight) / box.height(); - float x_origin = (box.left() + box.right()) / 2.0f; - float y_origin = (box.bottom() + box.top()) / 2.0f; - if (i == 0 || i == 2) { - // Rotation is 0 or 180. - y_origin = i == 0 ? box.bottom() : box.top(); - } else { - // Rotation is 90 or 270. - scaling = static_cast(kBlnXHeight) / box.width(); - x_origin = i == 1 ? box.left() : box.right(); - } - std::unique_ptr rotated_blob(new TBLOB(*tblob)); - rotated_blob->Normalize(nullptr, ¤t_rotation, nullptr, - x_origin, y_origin, scaling, scaling, - 0.0f, static_cast(kBlnBaselineOffset), - false, nullptr); - tess->AdaptiveClassifier(rotated_blob.get(), ratings + i); - current_rotation.rotate(rotation90); - } - delete tblob; - - bool stop = o->detect_blob(ratings); - s->detect_blob(ratings); - int orientation = o->get_orientation(); - stop = s->must_stop(orientation) && stop; - return stop; -} - - -OrientationDetector::OrientationDetector( - const GenericVector* allowed_scripts, OSResults* osr) { - osr_ = osr; - allowed_scripts_ = allowed_scripts; -} - -// Score the given blob and return true if it is now sure of the orientation -// after adding this block. -bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST* scores) { - float blob_o_score[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float total_blob_o_score = 0.0f; - - for (int i = 0; i < 4; ++i) { - BLOB_CHOICE_IT choice_it(scores + i); - if (!choice_it.empty()) { - BLOB_CHOICE* choice = nullptr; - if (allowed_scripts_ != nullptr && !allowed_scripts_->empty()) { - // Find the top choice in an allowed script. - for (choice_it.mark_cycle_pt(); !choice_it.cycled_list() && - choice == nullptr; choice_it.forward()) { - int choice_script = choice_it.data()->script_id(); - int s = 0; - for (s = 0; s < allowed_scripts_->size(); ++s) { - if ((*allowed_scripts_)[s] == choice_script) { - choice = choice_it.data(); - break; - } - } - } - } else { - choice = choice_it.data(); - } - if (choice != nullptr) { - // The certainty score ranges between [-20,0]. This is converted here to - // [0,1], with 1 indicating best match. - blob_o_score[i] = 1 + 0.05 * choice->certainty(); - total_blob_o_score += blob_o_score[i]; - } - } - } - if (total_blob_o_score == 0.0) return false; - // Fill in any blanks with the worst score of the others. This is better than - // picking an arbitrary probability for it and way better than -inf. - float worst_score = 0.0f; - int num_good_scores = 0; - for (int i = 0; i < 4; ++i) { - if (blob_o_score[i] > 0.0f) { - ++num_good_scores; - if (worst_score == 0.0f || blob_o_score[i] < worst_score) - worst_score = blob_o_score[i]; - } - } - if (num_good_scores == 1) { - // Lower worst if there is only one. - worst_score /= 2.0f; - } - for (int i = 0; i < 4; ++i) { - if (blob_o_score[i] == 0.0f) { - blob_o_score[i] = worst_score; - total_blob_o_score += worst_score; - } - } - // Normalize the orientation scores for the blob and use them to - // update the aggregated orientation score. - for (int i = 0; total_blob_o_score != 0 && i < 4; ++i) { - osr_->orientations[i] += log(blob_o_score[i] / total_blob_o_score); - } - - // TODO(ranjith) Add an early exit test, based on min_orientation_margin, - // as used in pagesegmain.cpp. - return false; -} - -int OrientationDetector::get_orientation() { - osr_->update_best_orientation(); - return osr_->best_result.orientation_id; -} - - -ScriptDetector::ScriptDetector(const GenericVector* allowed_scripts, - OSResults* osr, tesseract::Tesseract* tess) { - osr_ = osr; - tess_ = tess; - allowed_scripts_ = allowed_scripts; - katakana_id_ = tess_->unicharset.add_script(katakana_script); - hiragana_id_ = tess_->unicharset.add_script(hiragana_script); - han_id_ = tess_->unicharset.add_script(han_script); - hangul_id_ = tess_->unicharset.add_script(hangul_script); - japanese_id_ = tess_->unicharset.add_script(japanese_script_); - korean_id_ = tess_->unicharset.add_script(korean_script_); - latin_id_ = tess_->unicharset.add_script(latin_script); - fraktur_id_ = tess_->unicharset.add_script(fraktur_script_); -} - - -// Score the given blob and return true if it is now sure of the script after -// adding this blob. -void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) { - bool done[kMaxNumberOfScripts]; - for (int i = 0; i < 4; ++i) { - for (int j = 0; j < kMaxNumberOfScripts; ++j) - done[j] = false; - - BLOB_CHOICE_IT choice_it; - choice_it.set_to_list(scores + i); - - float prev_score = -1; - int script_count = 0; - int prev_id = -1; - int prev_fontinfo_id = -1; - const char* prev_unichar = ""; - const char* unichar = ""; - - for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); - choice_it.forward()) { - BLOB_CHOICE* choice = choice_it.data(); - int id = choice->script_id(); - if (allowed_scripts_ != nullptr && !allowed_scripts_->empty()) { - // Check that the choice is in an allowed script. - int s = 0; - for (s = 0; s < allowed_scripts_->size(); ++s) { - if ((*allowed_scripts_)[s] == id) break; - } - if (s == allowed_scripts_->size()) continue; // Not found in list. - } - // Script already processed before. - if (done[id]) continue; - done[id] = true; - - unichar = tess_->unicharset.id_to_unichar(choice->unichar_id()); - // Save data from the first match - if (prev_score < 0) { - prev_score = -choice->certainty(); - script_count = 1; - prev_id = id; - prev_unichar = unichar; - prev_fontinfo_id = choice->fontinfo_id(); - } else if (-choice->certainty() < prev_score + kNonAmbiguousMargin) { - ++script_count; - } - - if (strlen(prev_unichar) == 1) - if (unichar[0] >= '0' && unichar[0] <= '9') - break; - - // if script_count is >= 2, character is ambiguous, skip other matches - // since they are useless. - if (script_count >= 2) - break; - } - // Character is non ambiguous - if (script_count == 1) { - // Update the score of the winning script - osr_->scripts_na[i][prev_id] += 1.0; - - // Workaround for Fraktur - if (prev_id == latin_id_) { - if (prev_fontinfo_id >= 0) { - const tesseract::FontInfo &fi = - tess_->get_fontinfo_table().get(prev_fontinfo_id); - //printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name, - // fi.is_italic(), fi.is_bold(), fi.is_fixed_pitch(), - // fi.is_serif(), fi.is_fraktur(), - // prev_unichar); - if (fi.is_fraktur()) { - osr_->scripts_na[i][prev_id] -= 1.0; - osr_->scripts_na[i][fraktur_id_] += 1.0; - } - } - } - - // Update Japanese / Korean pseudo-scripts - if (prev_id == katakana_id_) - osr_->scripts_na[i][japanese_id_] += 1.0; - if (prev_id == hiragana_id_) - osr_->scripts_na[i][japanese_id_] += 1.0; - if (prev_id == hangul_id_) - osr_->scripts_na[i][korean_id_] += 1.0; - if (prev_id == han_id_) { - osr_->scripts_na[i][korean_id_] += kHanRatioInKorean; - osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese; - } - } - } // iterate over each orientation -} - -bool ScriptDetector::must_stop(int orientation) { - osr_->update_best_script(orientation); - return osr_->best_result.sconfidence > 1; -} - -// Helper method to convert an orientation index to its value in degrees. -// The value represents the amount of clockwise rotation in degrees that must be -// applied for the text to be upright (readable). -int OrientationIdToValue(const int& id) { - switch (id) { - case 0: - return 0; - case 1: - return 270; - case 2: - return 180; - case 3: - return 90; - default: - return -1; - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/osdetect.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/osdetect.h deleted file mode 100644 index c0864c40..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/osdetect.h +++ /dev/null @@ -1,140 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: osdetect.h -// Description: Orientation and script detection. -// Author: Samuel Charron -// Ranjith Unnikrishnan -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCMAIN_OSDETECT_H_ -#define TESSERACT_CCMAIN_OSDETECT_H_ - -#include "platform.h" // for TESS_API - -class BLOBNBOX; -class BLOBNBOX_CLIST; -class BLOB_CHOICE_LIST; -class STRING; -class TO_BLOCK_LIST; -class UNICHARSET; -template class GenericVector; - -namespace tesseract { -class Tesseract; -} - -// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur -const int kMaxNumberOfScripts = 116 + 1 + 2 + 1; - -struct OSBestResult { - OSBestResult() : orientation_id(0), script_id(0), sconfidence(0.0), - oconfidence(0.0) {} - int orientation_id; - int script_id; - float sconfidence; - float oconfidence; -}; - -struct OSResults { - OSResults() : unicharset(nullptr) { - for (int i = 0; i < 4; ++i) { - for (int j = 0; j < kMaxNumberOfScripts; ++j) - scripts_na[i][j] = 0; - orientations[i] = 0; - } - } - void update_best_orientation(); - // Set the estimate of the orientation to the given id. - void set_best_orientation(int orientation_id); - // Update/Compute the best estimate of the script assuming the given - // orientation id. - void update_best_script(int orientation_id); - // Return the index of the script with the highest score for this orientation. - TESS_API int get_best_script(int orientation_id) const; - // Accumulate scores with given OSResults instance and update the best script. - void accumulate(const OSResults& osr); - - // Print statistics. - void print_scores(void) const; - void print_scores(int orientation_id) const; - - // Array holding scores for each orientation id [0,3]. - // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the - // page respectively, where the values refer to the amount of clockwise - // rotation to be applied to the page for the text to be upright and readable. - float orientations[4]; - // Script confidence scores for each of 4 possible orientations. - float scripts_na[4][kMaxNumberOfScripts]; - - UNICHARSET* unicharset; - OSBestResult best_result; -}; - -class OrientationDetector { - public: - OrientationDetector(const GenericVector* allowed_scripts, - OSResults* results); - bool detect_blob(BLOB_CHOICE_LIST* scores); - int get_orientation(); - private: - OSResults* osr_; - const GenericVector* allowed_scripts_; -}; - -class ScriptDetector { - public: - ScriptDetector(const GenericVector* allowed_scripts, - OSResults* osr, tesseract::Tesseract* tess); - void detect_blob(BLOB_CHOICE_LIST* scores); - bool must_stop(int orientation); - private: - OSResults* osr_; - static const char* korean_script_; - static const char* japanese_script_; - static const char* fraktur_script_; - int korean_id_; - int japanese_id_; - int katakana_id_; - int hiragana_id_; - int han_id_; - int hangul_id_; - int latin_id_; - int fraktur_id_; - tesseract::Tesseract* tess_; - const GenericVector* allowed_scripts_; -}; - -int orientation_and_script_detection(STRING& filename, - OSResults*, - tesseract::Tesseract*); - -int os_detect(TO_BLOCK_LIST* port_blocks, - OSResults* osr, - tesseract::Tesseract* tess); - -int os_detect_blobs(const GenericVector* allowed_scripts, - BLOBNBOX_CLIST* blob_list, - OSResults* osr, - tesseract::Tesseract* tess); - -bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, - ScriptDetector* s, OSResults*, - tesseract::Tesseract* tess); - -// Helper method to convert an orientation index to its value in degrees. -// The value represents the amount of clockwise rotation in degrees that must be -// applied for the text to be upright (readable). -TESS_API int OrientationIdToValue(const int& id); - -#endif // TESSERACT_CCMAIN_OSDETECT_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/output.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/output.cpp deleted file mode 100644 index bdd2e23c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/output.cpp +++ /dev/null @@ -1,424 +0,0 @@ -/****************************************************************** - * File: output.cpp (Formerly output.c) - * Description: Output pass - * Author: Phil Cheatle - * Created: Thu Aug 4 10:56:08 BST 1994 - * - * (C) Copyright 1994, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include -#include -#include -#include "helpers.h" -#include "tessvars.h" -#include "control.h" -#include "reject.h" -#include "docqual.h" -#include "output.h" -#include "globals.h" -#include "tesseractclass.h" - -#define EPAPER_EXT ".ep" -#define PAGE_YSIZE 3508 -#define CTRL_INSET '\024' //dc4=text inset -#define CTRL_FONT '\016' //so=font change -#define CTRL_DEFAULT '\017' //si=default font -#define CTRL_SHIFT '\022' //dc2=x shift -#define CTRL_TAB '\011' //tab -#define CTRL_NEWLINE '\012' //newline -#define CTRL_HARDLINE '\015' //cr - -namespace tesseract { -void Tesseract::output_pass( //Tess output pass //send to api - PAGE_RES_IT &page_res_it, - const TBOX *target_word_box) { - BLOCK_RES *block_of_last_word; - bool force_eol; //During output - BLOCK *nextblock; //block of next word - WERD *nextword; //next word - - page_res_it.restart_page (); - block_of_last_word = nullptr; - while (page_res_it.word () != nullptr) { - check_debug_pt (page_res_it.word (), 120); - - if (target_word_box) { - TBOX current_word_box = page_res_it.word()->word->bounding_box(); - FCOORD center_pt( - (current_word_box.right() + current_word_box.left()) / 2, - (current_word_box.bottom() + current_word_box.top()) / 2); - if (!target_word_box->contains(center_pt)) { - page_res_it.forward(); - continue; - } - } - if (tessedit_write_block_separators && - block_of_last_word != page_res_it.block ()) { - block_of_last_word = page_res_it.block (); - } - - force_eol = (tessedit_write_block_separators && - (page_res_it.block () != page_res_it.next_block ())) || - (page_res_it.next_word () == nullptr); - - if (page_res_it.next_word () != nullptr) - nextword = page_res_it.next_word ()->word; - else - nextword = nullptr; - if (page_res_it.next_block () != nullptr) - nextblock = page_res_it.next_block ()->block; - else - nextblock = nullptr; - //regardless of tilde crunching - write_results(page_res_it, - determine_newline_type(page_res_it.word()->word, - page_res_it.block()->block, - nextword, nextblock), force_eol); - page_res_it.forward(); - } -} - - -/************************************************************************* - * write_results() - * - * All recognition and rejection has now been done. Generate the following: - * .txt file - giving the final best choices with NO highlighting - * .raw file - giving the tesseract top choice output for each word - * .map file - showing how the .txt file has been rejected in the .ep file - * epchoice list - a list of one element per word, containing the text for the - * epaper. Reject strings are inserted. - * inset list - a list of bounding boxes of reject insets - indexed by the - * reject strings in the epchoice text. - *************************************************************************/ -void Tesseract::write_results(PAGE_RES_IT& page_res_it, - char newline_type, // type of newline - bool force_eol) { // override tilde crunch? - WERD_RES *word = page_res_it.word(); - const UNICHARSET &uchset = *word->uch_set; - int i; - bool need_reject = false; - UNICHAR_ID space = uchset.unichar_to_id(" "); - - if ((word->unlv_crunch_mode != CR_NONE || - word->best_choice->length() == 0) && - !tessedit_zero_kelvin_rejection && !tessedit_word_for_word) { - if ((word->unlv_crunch_mode != CR_DELETE) && - (!stats_.tilde_crunch_written || - ((word->unlv_crunch_mode == CR_KEEP_SPACE) && - (word->word->space () > 0) && - !word->word->flag (W_FUZZY_NON) && - !word->word->flag (W_FUZZY_SP)))) { - if (!word->word->flag (W_BOL) && - (word->word->space () > 0) && - !word->word->flag (W_FUZZY_NON) && - !word->word->flag (W_FUZZY_SP)) { - stats_.last_char_was_tilde = false; - } - need_reject = true; - } - if ((need_reject && !stats_.last_char_was_tilde) || - (force_eol && stats_.write_results_empty_block)) { - /* Write a reject char - mark as rejected unless zero_rejection mode */ - stats_.last_char_was_tilde = TRUE; - stats_.tilde_crunch_written = true; - stats_.last_char_was_newline = false; - stats_.write_results_empty_block = false; - } - - if ((word->word->flag (W_EOL) && !stats_.last_char_was_newline) || force_eol) { - stats_.tilde_crunch_written = false; - stats_.last_char_was_newline = true; - stats_.last_char_was_tilde = false; - } - - if (force_eol) - stats_.write_results_empty_block = true; - return; - } - - /* NORMAL PROCESSING of non tilde crunched words */ - - stats_.tilde_crunch_written = false; - if (newline_type) - stats_.last_char_was_newline = true; - else - stats_.last_char_was_newline = false; - stats_.write_results_empty_block = force_eol; // about to write a real word - - if (unlv_tilde_crunching && - stats_.last_char_was_tilde && - (word->word->space() == 0) && - !(word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes) && - (word->best_choice->unichar_id(0) == space)) { - /* Prevent adjacent tilde across words - we know that adjacent tildes within - words have been removed */ - word->MergeAdjacentBlobs(0); - } - if (newline_type || - (word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes)) - stats_.last_char_was_tilde = false; - else { - if (word->reject_map.length () > 0) { - if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space) - stats_.last_char_was_tilde = true; - else - stats_.last_char_was_tilde = false; - } - else if (word->word->space () > 0) - stats_.last_char_was_tilde = false; - /* else it is unchanged as there are no output chars */ - } - - ASSERT_HOST (word->best_choice->length() == word->reject_map.length()); - - set_unlv_suspects(word); - check_debug_pt (word, 120); - if (tessedit_rejection_debug) { - tprintf ("Dict word: \"%s\": %d\n", - word->best_choice->debug_string().string(), - dict_word(*(word->best_choice))); - } - if (!word->word->flag(W_REP_CHAR) || !tessedit_write_rep_codes) { - if (tessedit_zero_rejection) { - /* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */ - for (i = 0; i < word->best_choice->length(); ++i) { - if (word->reject_map[i].rejected()) - word->reject_map[i].setrej_minimal_rej_accept(); - } - } - if (tessedit_minimal_rejection) { - /* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */ - for (i = 0; i < word->best_choice->length(); ++i) { - if ((word->best_choice->unichar_id(i) != space) && - word->reject_map[i].rejected()) - word->reject_map[i].setrej_minimal_rej_accept(); - } - } - } -} -} // namespace tesseract - -/********************************************************************** - * determine_newline_type - * - * Find whether we have a wrapping or hard newline. - * Return FALSE if not at end of line. - **********************************************************************/ - -char determine_newline_type( //test line ends - WERD *word, //word to do - BLOCK *block, //current block - WERD *next_word, //next word - BLOCK *next_block //block of next word - ) { - int16_t end_gap; //to right edge - int16_t width; //of next word - TBOX word_box; //bounding - TBOX next_box; //next word - TBOX block_box; //block bounding - - if (!word->flag (W_EOL)) - return FALSE; //not end of line - if (next_word == nullptr || next_block == nullptr || block != next_block) - return CTRL_NEWLINE; - if (next_word->space () > 0) - return CTRL_HARDLINE; //it is tabbed - word_box = word->bounding_box (); - next_box = next_word->bounding_box (); - block_box = block->pdblk.bounding_box (); - //gap to eol - end_gap = block_box.right () - word_box.right (); - end_gap -= (int32_t) block->space (); - width = next_box.right () - next_box.left (); - // tprintf("end_gap=%d-%d=%d, width=%d-%d=%d, nl=%d\n", - // block_box.right(),word_box.right(),end_gap, - // next_box.right(),next_box.left(),width, - // end_gap>width ? CTRL_HARDLINE : CTRL_NEWLINE); - return end_gap > width ? CTRL_HARDLINE : CTRL_NEWLINE; -} - -/************************************************************************* - * get_rep_char() - * Return the first accepted character from the repetition string. This is the - * character which is repeated - as determined earlier by fix_rep_char() - *************************************************************************/ -namespace tesseract { -UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) { // what char is repeated? - int i; - for (i = 0; ((i < word->reject_map.length()) && - (word->reject_map[i].rejected())); ++i); - - if (i < word->reject_map.length()) { - return word->best_choice->unichar_id(i); - } else { - return word->uch_set->unichar_to_id(unrecognised_char.string()); - } -} - -/************************************************************************* - * SUSPECT LEVELS - * - * 0 - don't reject ANYTHING - * 1,2 - partial rejection - * 3 - BEST - * - * NOTE: to reject JUST tess failures in the .map file set suspect_level 3 and - * tessedit_minimal_rejection. - *************************************************************************/ -void Tesseract::set_unlv_suspects(WERD_RES *word_res) { - int len = word_res->reject_map.length(); - const WERD_CHOICE &word = *(word_res->best_choice); - const UNICHARSET &uchset = *word.unicharset(); - int i; - float rating_per_ch; - - if (suspect_level == 0) { - for (i = 0; i < len; i++) { - if (word_res->reject_map[i].rejected()) - word_res->reject_map[i].setrej_minimal_rej_accept(); - } - return; - } - - if (suspect_level >= 3) - return; //Use defaults - - /* NOW FOR LEVELS 1 and 2 Find some stuff to unreject*/ - - if (safe_dict_word(word_res) && - (count_alphas(word) > suspect_short_words)) { - /* Unreject alphas in dictionary words */ - for (i = 0; i < len; ++i) { - if (word_res->reject_map[i].rejected() && - uchset.get_isalpha(word.unichar_id(i))) - word_res->reject_map[i].setrej_minimal_rej_accept(); - } - } - - rating_per_ch = word.rating() / word_res->reject_map.length(); - - if (rating_per_ch >= suspect_rating_per_ch) - return; // Don't touch bad ratings - - if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) { - /* Unreject any Tess Acceptable word - but NOT tess reject chs*/ - for (i = 0; i < len; ++i) { - if (word_res->reject_map[i].rejected() && - (!uchset.eq(word.unichar_id(i), " "))) - word_res->reject_map[i].setrej_minimal_rej_accept(); - } - } - - for (i = 0; i < len; i++) { - if (word_res->reject_map[i].rejected()) { - if (word_res->reject_map[i].flag(R_DOC_REJ)) - word_res->reject_map[i].setrej_minimal_rej_accept(); - if (word_res->reject_map[i].flag(R_BLOCK_REJ)) - word_res->reject_map[i].setrej_minimal_rej_accept(); - if (word_res->reject_map[i].flag(R_ROW_REJ)) - word_res->reject_map[i].setrej_minimal_rej_accept(); - } - } - - if (suspect_level == 2) - return; - - if (!suspect_constrain_1Il || - (word_res->reject_map.length() <= suspect_short_words)) { - for (i = 0; i < len; i++) { - if (word_res->reject_map[i].rejected()) { - if ((word_res->reject_map[i].flag(R_1IL_CONFLICT) || - word_res->reject_map[i].flag(R_POSTNN_1IL))) - word_res->reject_map[i].setrej_minimal_rej_accept(); - - if (!suspect_constrain_1Il && - word_res->reject_map[i].flag(R_MM_REJECT)) - word_res->reject_map[i].setrej_minimal_rej_accept(); - } - } - } - - if (acceptable_word_string(*word_res->uch_set, - word.unichar_string().string(), - word.unichar_lengths().string()) != - AC_UNACCEPTABLE || - acceptable_number_string(word.unichar_string().string(), - word.unichar_lengths().string())) { - if (word_res->reject_map.length() > suspect_short_words) { - for (i = 0; i < len; i++) { - if (word_res->reject_map[i].rejected() && - (!word_res->reject_map[i].perm_rejected() || - word_res->reject_map[i].flag (R_1IL_CONFLICT) || - word_res->reject_map[i].flag (R_POSTNN_1IL) || - word_res->reject_map[i].flag (R_MM_REJECT))) { - word_res->reject_map[i].setrej_minimal_rej_accept(); - } - } - } - } -} - -int16_t Tesseract::count_alphas(const WERD_CHOICE &word) { - int count = 0; - for (int i = 0; i < word.length(); ++i) { - if (word.unicharset()->get_isalpha(word.unichar_id(i))) - count++; - } - return count; -} - - -int16_t Tesseract::count_alphanums(const WERD_CHOICE &word) { - int count = 0; - for (int i = 0; i < word.length(); ++i) { - if (word.unicharset()->get_isalpha(word.unichar_id(i)) || - word.unicharset()->get_isdigit(word.unichar_id(i))) - count++; - } - return count; -} - - -bool Tesseract::acceptable_number_string(const char* s, - const char* lengths) { - bool prev_digit = false; - - if (*lengths == 1 && *s == '(') - s++; - - if (*lengths == 1 && - ((*s == '$') || (*s == '.') || (*s == '+') || (*s == '-'))) - s++; - - for (; *s != '\0'; s += *(lengths++)) { - if (unicharset.get_isdigit(s, *lengths)) - prev_digit = true; - else if (prev_digit && - (*lengths == 1 && ((*s == '.') || (*s == ',') || (*s == '-')))) - prev_digit = false; - else if (prev_digit && *lengths == 1 && - (*(s + *lengths) == '\0') && ((*s == '%') || (*s == ')'))) - return true; - else if (prev_digit && - *lengths == 1 && (*s == '%') && - (*(lengths + 1) == 1 && *(s + *lengths) == ')') && - (*(s + *lengths + *(lengths + 1)) == '\0')) - return true; - else - return false; - } - return true; -} -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/output.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/output.h deleted file mode 100644 index 7afc80b1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/output.h +++ /dev/null @@ -1,33 +0,0 @@ -/****************************************************************** - * File: output.h (Formerly output.h) - * Description: Output pass - * Author: Phil Cheatle - * Created: Thu Aug 4 10:56:08 BST 1994 - * - * (C) Copyright 1994, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef OUTPUT_H -#define OUTPUT_H - -class BLOCK; -class WERD; - -/** test line ends */ -char determine_newline_type(WERD *word, ///< word to do - BLOCK *block, ///< current block - WERD *next_word, ///< next word - BLOCK *next_block ///< block of next word - ); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pageiterator.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pageiterator.cpp deleted file mode 100644 index 6b346a2e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pageiterator.cpp +++ /dev/null @@ -1,627 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: pageiterator.cpp -// Description: Iterator for tesseract page structure that avoids using -// tesseract internal data structures. -// Author: Ray Smith -// Created: Fri Feb 26 14:32:09 PST 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "pageiterator.h" -#include "allheaders.h" -#include "helpers.h" -#include "pageres.h" -#include "tesseractclass.h" - -#include - -namespace tesseract { - -PageIterator::PageIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale, - int scaled_yres, int rect_left, int rect_top, - int rect_width, int rect_height) - : page_res_(page_res), - tesseract_(tesseract), - word_(nullptr), - word_length_(0), - blob_index_(0), - cblob_it_(nullptr), - include_upper_dots_(false), - include_lower_dots_(false), - scale_(scale), - scaled_yres_(scaled_yres), - rect_left_(rect_left), - rect_top_(rect_top), - rect_width_(rect_width), - rect_height_(rect_height) { - it_ = new PAGE_RES_IT(page_res); - PageIterator::Begin(); -} - -PageIterator::~PageIterator() { - delete it_; - delete cblob_it_; -} - -/** - * PageIterators may be copied! This makes it possible to iterate over - * all the objects at a lower level, while maintaining an iterator to - * objects at a higher level. - */ -PageIterator::PageIterator(const PageIterator& src) - : page_res_(src.page_res_), - tesseract_(src.tesseract_), - word_(nullptr), - word_length_(src.word_length_), - blob_index_(src.blob_index_), - cblob_it_(nullptr), - include_upper_dots_(src.include_upper_dots_), - include_lower_dots_(src.include_lower_dots_), - scale_(src.scale_), - scaled_yres_(src.scaled_yres_), - rect_left_(src.rect_left_), - rect_top_(src.rect_top_), - rect_width_(src.rect_width_), - rect_height_(src.rect_height_) { - it_ = new PAGE_RES_IT(*src.it_); - BeginWord(src.blob_index_); -} - -const PageIterator& PageIterator::operator=(const PageIterator& src) { - page_res_ = src.page_res_; - tesseract_ = src.tesseract_; - include_upper_dots_ = src.include_upper_dots_; - include_lower_dots_ = src.include_lower_dots_; - scale_ = src.scale_; - scaled_yres_ = src.scaled_yres_; - rect_left_ = src.rect_left_; - rect_top_ = src.rect_top_; - rect_width_ = src.rect_width_; - rect_height_ = src.rect_height_; - delete it_; - it_ = new PAGE_RES_IT(*src.it_); - BeginWord(src.blob_index_); - return *this; -} - -bool PageIterator::PositionedAtSameWord(const PAGE_RES_IT* other) const { - return (it_ == nullptr && it_ == other) || - ((other != nullptr) && (it_ != nullptr) && (*it_ == *other)); -} - -// ============= Moving around within the page ============. - -/** Resets the iterator to point to the start of the page. */ -void PageIterator::Begin() { - it_->restart_page_with_empties(); - BeginWord(0); -} - -void PageIterator::RestartParagraph() { - if (it_->block() == nullptr) return; // At end of the document. - PAGE_RES_IT para(page_res_); - PAGE_RES_IT next_para(para); - next_para.forward_paragraph(); - while (next_para.cmp(*it_) <= 0) { - para = next_para; - next_para.forward_paragraph(); - } - *it_ = para; - BeginWord(0); -} - -bool PageIterator::IsWithinFirstTextlineOfParagraph() const { - PageIterator p_start(*this); - p_start.RestartParagraph(); - return p_start.it_->row() == it_->row(); -} - -void PageIterator::RestartRow() { - it_->restart_row(); - BeginWord(0); -} - -/** - * Moves to the start of the next object at the given level in the - * page hierarchy, and returns false if the end of the page was reached. - * NOTE (CHANGED!) that ALL PageIteratorLevel level values will visit each - * non-text block at least once. - * Think of non text blocks as containing a single para, with at least one - * line, with a single imaginary word, containing a single symbol. - * The bounding boxes mark out any polygonal nature of the block, and - * PTIsTextType(BLockType()) is false for non-text blocks. - * Calls to Next with different levels may be freely intermixed. - * This function iterates words in right-to-left scripts correctly, if - * the appropriate language has been loaded into Tesseract. - */ -bool PageIterator::Next(PageIteratorLevel level) { - if (it_->block() == nullptr) return false; // Already at the end! - if (it_->word() == nullptr) - level = RIL_BLOCK; - - switch (level) { - case RIL_BLOCK: - it_->forward_block(); - break; - case RIL_PARA: - it_->forward_paragraph(); - break; - case RIL_TEXTLINE: - for (it_->forward_with_empties(); it_->row() == it_->prev_row(); - it_->forward_with_empties()); - break; - case RIL_WORD: - it_->forward_with_empties(); - break; - case RIL_SYMBOL: - if (cblob_it_ != nullptr) - cblob_it_->forward(); - ++blob_index_; - if (blob_index_ >= word_length_) - it_->forward_with_empties(); - else - return true; - break; - } - BeginWord(0); - return it_->block() != nullptr; -} - -/** - * Returns true if the iterator is at the start of an object at the given - * level. Possible uses include determining if a call to Next(RIL_WORD) - * moved to the start of a RIL_PARA. - */ -bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const { - if (it_->block() == nullptr) return false; // Already at the end! - if (it_->word() == nullptr) return true; // In an image block. - switch (level) { - case RIL_BLOCK: - return blob_index_ == 0 && it_->block() != it_->prev_block(); - case RIL_PARA: - return blob_index_ == 0 && - (it_->block() != it_->prev_block() || - it_->row()->row->para() != it_->prev_row()->row->para()); - case RIL_TEXTLINE: - return blob_index_ == 0 && it_->row() != it_->prev_row(); - case RIL_WORD: - return blob_index_ == 0; - case RIL_SYMBOL: - return true; - } - return false; -} - -/** - * Returns whether the iterator is positioned at the last element in a - * given level. (e.g. the last word in a line, the last line in a block) - */ -bool PageIterator::IsAtFinalElement(PageIteratorLevel level, - PageIteratorLevel element) const { - if (Empty(element)) return true; // Already at the end! - // The result is true if we step forward by element and find we are - // at the the end of the page or at beginning of *all* levels in: - // [level, element). - // When there is more than one level difference between element and level, - // we could for instance move forward one symbol and still be at the first - // word on a line, so we also have to be at the first symbol in a word. - PageIterator next(*this); - next.Next(element); - if (next.Empty(element)) return true; // Reached the end of the page. - while (element > level) { - element = static_cast(element - 1); - if (!next.IsAtBeginningOf(element)) - return false; - } - return true; -} - -/** - * Returns whether this iterator is positioned - * before other: -1 - * equal to other: 0 - * after other: 1 - */ -int PageIterator::Cmp(const PageIterator &other) const { - int word_cmp = it_->cmp(*other.it_); - if (word_cmp != 0) - return word_cmp; - if (blob_index_ < other.blob_index_) - return -1; - if (blob_index_ == other.blob_index_) - return 0; - return 1; -} - -// ============= Accessing data ==============. -// Coordinate system: -// Integer coordinates are at the cracks between the pixels. -// The top-left corner of the top-left pixel in the image is at (0,0). -// The bottom-right corner of the bottom-right pixel in the image is at -// (width, height). -// Every bounding box goes from the top-left of the top-left contained -// pixel to the bottom-right of the bottom-right contained pixel, so -// the bounding box of the single top-left pixel in the image is: -// (0,0)->(1,1). -// If an image rectangle has been set in the API, then returned coordinates -// relate to the original (full) image, rather than the rectangle. - -/** - * Returns the bounding rectangle of the current object at the given level in - * the coordinates of the working image that is pix_binary(). - * See comment on coordinate system above. - * Returns false if there is no such object at the current position. - */ -bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, - int* left, int* top, - int* right, int* bottom) const { - if (Empty(level)) - return false; - TBOX box; - PARA *para = nullptr; - switch (level) { - case RIL_BLOCK: - box = it_->block()->block->restricted_bounding_box(include_upper_dots_, - include_lower_dots_); - break; - case RIL_PARA: - para = it_->row()->row->para(); - // explicit fall-through. - case RIL_TEXTLINE: - box = it_->row()->row->restricted_bounding_box(include_upper_dots_, - include_lower_dots_); - break; - case RIL_WORD: - box = it_->word()->word->restricted_bounding_box(include_upper_dots_, - include_lower_dots_); - break; - case RIL_SYMBOL: - if (cblob_it_ == nullptr) - box = it_->word()->box_word->BlobBox(blob_index_); - else - box = cblob_it_->data()->bounding_box(); - } - if (level == RIL_PARA) { - PageIterator other = *this; - other.Begin(); - do { - if (other.it_->block() && - other.it_->block()->block == it_->block()->block && - other.it_->row() && other.it_->row()->row && - other.it_->row()->row->para() == para) { - box = box.bounding_union(other.it_->row()->row->bounding_box()); - } - } while (other.Next(RIL_TEXTLINE)); - } - if (level != RIL_SYMBOL || cblob_it_ != nullptr) - box.rotate(it_->block()->block->re_rotation()); - // Now we have a box in tesseract coordinates relative to the image rectangle, - // we have to convert the coords to a top-down system. - const int pix_height = pixGetHeight(tesseract_->pix_binary()); - const int pix_width = pixGetWidth(tesseract_->pix_binary()); - *left = ClipToRange(static_cast(box.left()), 0, pix_width); - *top = ClipToRange(pix_height - box.top(), 0, pix_height); - *right = ClipToRange(static_cast(box.right()), *left, pix_width); - *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height); - return true; -} - -/** - * Returns the bounding rectangle of the current object at the given level in - * coordinates of the original image. - * See comment on coordinate system above. - * Returns false if there is no such object at the current position. - */ -bool PageIterator::BoundingBox(PageIteratorLevel level, - int* left, int* top, - int* right, int* bottom) const { - return BoundingBox(level, 0, left, top, right, bottom); -} - -bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding, - int* left, int* top, - int* right, int* bottom) const { - if (!BoundingBoxInternal(level, left, top, right, bottom)) - return false; - // Convert to the coordinate system of the original image. - *left = ClipToRange(*left / scale_ + rect_left_ - padding, - rect_left_, rect_left_ + rect_width_); - *top = ClipToRange(*top / scale_ + rect_top_ - padding, - rect_top_, rect_top_ + rect_height_); - *right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding, - *left, rect_left_ + rect_width_); - *bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding, - *top, rect_top_ + rect_height_); - return true; -} - -/** Return that there is no such object at a given level. */ -bool PageIterator::Empty(PageIteratorLevel level) const { - if (it_->block() == nullptr) return true; // Already at the end! - if (it_->word() == nullptr && level != RIL_BLOCK) return true; // image block - if (level == RIL_SYMBOL && blob_index_ >= word_length_) - return true; // Zero length word, or already at the end of it. - return false; -} - -/** Returns the type of the current block. See apitypes.h for PolyBlockType. */ -PolyBlockType PageIterator::BlockType() const { - if (it_->block() == nullptr || it_->block()->block == nullptr) - return PT_UNKNOWN; // Already at the end! - if (it_->block()->block->pdblk.poly_block() == nullptr) - return PT_FLOWING_TEXT; // No layout analysis used - assume text. - return it_->block()->block->pdblk.poly_block()->isA(); -} - -/** Returns the polygon outline of the current block. The returned Pta must - * be ptaDestroy-ed after use. */ -Pta* PageIterator::BlockPolygon() const { - if (it_->block() == nullptr || it_->block()->block == nullptr) - return nullptr; // Already at the end! - if (it_->block()->block->pdblk.poly_block() == nullptr) - return nullptr; // No layout analysis used - no polygon. - ICOORDELT_IT it(it_->block()->block->pdblk.poly_block()->points()); - Pta* pta = ptaCreate(it.length()); - int num_pts = 0; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++num_pts) { - ICOORD* pt = it.data(); - // Convert to top-down coords within the input image. - float x = static_cast(pt->x()) / scale_ + rect_left_; - float y = rect_top_ + rect_height_ - static_cast(pt->y()) / scale_; - ptaAddPt(pta, x, y); - } - return pta; -} - -/** - * Returns a binary image of the current object at the given level. - * The position and size match the return from BoundingBoxInternal, and so this - * could be upscaled with respect to the original input image. - * Use pixDestroy to delete the image after use. - * The following methods are used to generate the images: - * RIL_BLOCK: mask the page image with the block polygon. - * RIL_TEXTLINE: Clip the rectangle of the line box from the page image. - * TODO(rays) fix this to generate and use a line polygon. - * RIL_WORD: Clip the rectangle of the word box from the page image. - * RIL_SYMBOL: Render the symbol outline to an image for cblobs (prior - * to recognition) or the bounding box otherwise. - * A reconstruction of the original image (using xor to check for double - * representation) should be reasonably accurate, - * apart from removed noise, at the block level. Below the block level, the - * reconstruction will be missing images and line separators. - * At the symbol level, kerned characters will be invade the bounding box - * if rendered after recognition, making an xor reconstruction inaccurate, but - * an or construction better. Before recognition, symbol-level reconstruction - * should be good, even with xor, since the images come from the connected - * components. - */ -Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const { - int left, top, right, bottom; - if (!BoundingBoxInternal(level, &left, &top, &right, &bottom)) - return nullptr; - if (level == RIL_SYMBOL && cblob_it_ != nullptr && - cblob_it_->data()->area() != 0) - return cblob_it_->data()->render(); - Box* box = boxCreate(left, top, right - left, bottom - top); - Pix* pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr); - boxDestroy(&box); - if (level == RIL_BLOCK || level == RIL_PARA) { - // Clip to the block polygon as well. - TBOX mask_box; - Pix* mask = it_->block()->block->render_mask(&mask_box); - int mask_x = left - mask_box.left(); - int mask_y = top - (tesseract_->ImageHeight() - mask_box.top()); - // AND the mask and pix, putting the result in pix. - pixRasterop(pix, std::max(0, -mask_x), std::max(0, -mask_y), pixGetWidth(pix), - pixGetHeight(pix), PIX_SRC & PIX_DST, mask, std::max(0, mask_x), - std::max(0, mask_y)); - pixDestroy(&mask); - } - return pix; -} - -/** - * Returns an image of the current object at the given level in greyscale - * if available in the input. To guarantee a binary image use BinaryImage. - * NOTE that in order to give the best possible image, the bounds are - * expanded slightly over the binary connected component, by the supplied - * padding, so the top-left position of the returned image is returned - * in (left,top). These will most likely not match the coordinates - * returned by BoundingBox. - * If you do not supply an original image, you will get a binary one. - * Use pixDestroy to delete the image after use. - */ -Pix* PageIterator::GetImage(PageIteratorLevel level, int padding, - Pix* original_img, - int* left, int* top) const { - int right, bottom; - if (!BoundingBox(level, left, top, &right, &bottom)) - return nullptr; - if (original_img == nullptr) - return GetBinaryImage(level); - - // Expand the box. - *left = std::max(*left - padding, 0); - *top = std::max(*top - padding, 0); - right = std::min(right + padding, rect_width_); - bottom = std::min(bottom + padding, rect_height_); - Box* box = boxCreate(*left, *top, right - *left, bottom - *top); - Pix* grey_pix = pixClipRectangle(original_img, box, nullptr); - boxDestroy(&box); - if (level == RIL_BLOCK || level == RIL_PARA) { - // Clip to the block polygon as well. - TBOX mask_box; - Pix* mask = it_->block()->block->render_mask(&mask_box); - // Copy the mask registered correctly into an image the size of grey_pix. - int mask_x = *left - mask_box.left(); - int mask_y = *top - (pixGetHeight(original_img) - mask_box.top()); - int width = pixGetWidth(grey_pix); - int height = pixGetHeight(grey_pix); - Pix* resized_mask = pixCreate(width, height, 1); - pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width, height, - PIX_SRC, mask, std::max(0, mask_x), std::max(0, mask_y)); - pixDestroy(&mask); - pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1, - 2 * padding + 1); - pixInvert(resized_mask, resized_mask); - pixSetMasked(grey_pix, resized_mask, UINT32_MAX); - pixDestroy(&resized_mask); - } - return grey_pix; -} - -/** - * Returns the baseline of the current object at the given level. - * The baseline is the line that passes through (x1, y1) and (x2, y2). - * WARNING: with vertical text, baselines may be vertical! - */ -bool PageIterator::Baseline(PageIteratorLevel level, - int* x1, int* y1, int* x2, int* y2) const { - if (it_->word() == nullptr) return false; // Already at the end! - ROW* row = it_->row()->row; - WERD* word = it_->word()->word; - TBOX box = (level == RIL_WORD || level == RIL_SYMBOL) - ? word->bounding_box() - : row->bounding_box(); - int left = box.left(); - ICOORD startpt(left, static_cast(row->base_line(left) + 0.5)); - int right = box.right(); - ICOORD endpt(right, static_cast(row->base_line(right) + 0.5)); - // Rotate to image coordinates and convert to global image coords. - startpt.rotate(it_->block()->block->re_rotation()); - endpt.rotate(it_->block()->block->re_rotation()); - *x1 = startpt.x() / scale_ + rect_left_; - *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_; - *x2 = endpt.x() / scale_ + rect_left_; - *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_; - return true; -} - -void PageIterator::Orientation(tesseract::Orientation *orientation, - tesseract::WritingDirection *writing_direction, - tesseract::TextlineOrder *textline_order, - float *deskew_angle) const { - BLOCK* block = it_->block()->block; - - // Orientation - FCOORD up_in_image(0.0, 1.0); - up_in_image.unrotate(block->classify_rotation()); - up_in_image.rotate(block->re_rotation()); - - if (up_in_image.x() == 0.0F) { - if (up_in_image.y() > 0.0F) { - *orientation = ORIENTATION_PAGE_UP; - } else { - *orientation = ORIENTATION_PAGE_DOWN; - } - } else if (up_in_image.x() > 0.0F) { - *orientation = ORIENTATION_PAGE_RIGHT; - } else { - *orientation = ORIENTATION_PAGE_LEFT; - } - - // Writing direction - bool is_vertical_text = (block->classify_rotation().x() == 0.0); - bool right_to_left = block->right_to_left(); - *writing_direction = - is_vertical_text - ? WRITING_DIRECTION_TOP_TO_BOTTOM - : (right_to_left - ? WRITING_DIRECTION_RIGHT_TO_LEFT - : WRITING_DIRECTION_LEFT_TO_RIGHT); - - // Textline Order - const bool is_mongolian = false; // TODO(eger): fix me - *textline_order = is_vertical_text - ? (is_mongolian - ? TEXTLINE_ORDER_LEFT_TO_RIGHT - : TEXTLINE_ORDER_RIGHT_TO_LEFT) - : TEXTLINE_ORDER_TOP_TO_BOTTOM; - - // Deskew angle - FCOORD skew = block->skew(); // true horizontal for textlines - *deskew_angle = -skew.angle(); -} - -void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just, - bool *is_list_item, - bool *is_crown, - int *first_line_indent) const { - *just = tesseract::JUSTIFICATION_UNKNOWN; - if (!it_->row() || !it_->row()->row || !it_->row()->row->para() || - !it_->row()->row->para()->model) - return; - - PARA *para = it_->row()->row->para(); - *is_list_item = para->is_list_item; - *is_crown = para->is_very_first_or_continuation; - *first_line_indent = para->model->first_indent() - - para->model->body_indent(); - *just = para->model->justification(); -} - -/** - * Sets up the internal data for iterating the blobs of a new word, then - * moves the iterator to the given offset. - */ -void PageIterator::BeginWord(int offset) { - WERD_RES* word_res = it_->word(); - if (word_res == nullptr) { - // This is a non-text block, so there is no word. - word_length_ = 0; - blob_index_ = 0; - word_ = nullptr; - return; - } - if (word_res->best_choice != nullptr) { - // Recognition has been done, so we are using the box_word, which - // is already baseline denormalized. - word_length_ = word_res->best_choice->length(); - if (word_res->box_word != nullptr) { - if (word_res->box_word->length() != word_length_) { - tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ", - word_length_, word_res->best_choice->unichar_string().string(), - word_res->box_word->length()); - word_res->box_word->bounding_box().print(); - } - ASSERT_HOST(word_res->box_word->length() == word_length_); - } - word_ = nullptr; - // We will be iterating the box_word. - delete cblob_it_; - cblob_it_ = nullptr; - } else { - // No recognition yet, so a "symbol" is a cblob. - word_ = word_res->word; - ASSERT_HOST(word_->cblob_list() != nullptr); - word_length_ = word_->cblob_list()->length(); - if (cblob_it_ == nullptr) cblob_it_ = new C_BLOB_IT; - cblob_it_->set_to_list(word_->cblob_list()); - } - for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) { - if (cblob_it_ != nullptr) - cblob_it_->forward(); - } -} - -bool PageIterator::SetWordBlamerBundle(BlamerBundle *blamer_bundle) { - if (it_->word() != nullptr) { - it_->word()->blamer_bundle = blamer_bundle; - return true; - } else { - return false; - } -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pageiterator.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pageiterator.h deleted file mode 100644 index 69290808..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pageiterator.h +++ /dev/null @@ -1,364 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: pageiterator.h -// Description: Iterator for tesseract page structure that avoids using -// tesseract internal data structures. -// Author: Ray Smith -// Created: Fri Feb 26 11:01:06 PST 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_ -#define TESSERACT_CCMAIN_PAGEITERATOR_H_ - -#include "publictypes.h" -#include "platform.h" - -struct BlamerBundle; -class C_BLOB_IT; -class PAGE_RES; -class PAGE_RES_IT; -class WERD; -struct Pix; -struct Pta; - -namespace tesseract { - -class Tesseract; - -/** - * Class to iterate over tesseract page structure, providing access to all - * levels of the page hierarchy, without including any tesseract headers or - * having to handle any tesseract structures. - * WARNING! This class points to data held within the TessBaseAPI class, and - * therefore can only be used while the TessBaseAPI class still exists and - * has not been subjected to a call of Init, SetImage, Recognize, Clear, End - * DetectOS, or anything else that changes the internal PAGE_RES. - * See apitypes.h for the definition of PageIteratorLevel. - * See also ResultIterator, derived from PageIterator, which adds in the - * ability to access OCR output with text-specific methods. - */ - -class TESS_API PageIterator { - public: - /** - * page_res and tesseract come directly from the BaseAPI. - * The rectangle parameters are copied indirectly from the Thresholder, - * via the BaseAPI. They represent the coordinates of some rectangle in an - * original image (in top-left-origin coordinates) and therefore the top-left - * needs to be added to any output boxes in order to specify coordinates - * in the original image. See TessBaseAPI::SetRectangle. - * The scale and scaled_yres are in case the Thresholder scaled the image - * rectangle prior to thresholding. Any coordinates in tesseract's image - * must be divided by scale before adding (rect_left, rect_top). - * The scaled_yres indicates the effective resolution of the binary image - * that tesseract has been given by the Thresholder. - * After the constructor, Begin has already been called. - */ - PageIterator(PAGE_RES* page_res, Tesseract* tesseract, - int scale, int scaled_yres, - int rect_left, int rect_top, - int rect_width, int rect_height); - virtual ~PageIterator(); - - /** - * Page/ResultIterators may be copied! This makes it possible to iterate over - * all the objects at a lower level, while maintaining an iterator to - * objects at a higher level. These constructors DO NOT CALL Begin, so - * iterations will continue from the location of src. - */ - PageIterator(const PageIterator& src); - const PageIterator& operator=(const PageIterator& src); - - /** Are we positioned at the same location as other? */ - bool PositionedAtSameWord(const PAGE_RES_IT* other) const; - - // ============= Moving around within the page ============. - - /** - * Moves the iterator to point to the start of the page to begin an - * iteration. - */ - virtual void Begin(); - - /** - * Moves the iterator to the beginning of the paragraph. - * This class implements this functionality by moving it to the zero indexed - * blob of the first (leftmost) word on the first row of the paragraph. - */ - virtual void RestartParagraph(); - - /** - * Return whether this iterator points anywhere in the first textline of a - * paragraph. - */ - bool IsWithinFirstTextlineOfParagraph() const; - - /** - * Moves the iterator to the beginning of the text line. - * This class implements this functionality by moving it to the zero indexed - * blob of the first (leftmost) word of the row. - */ - virtual void RestartRow(); - - /** - * Moves to the start of the next object at the given level in the - * page hierarchy, and returns false if the end of the page was reached. - * NOTE that RIL_SYMBOL will skip non-text blocks, but all other - * PageIteratorLevel level values will visit each non-text block once. - * Think of non text blocks as containing a single para, with a single line, - * with a single imaginary word. - * Calls to Next with different levels may be freely intermixed. - * This function iterates words in right-to-left scripts correctly, if - * the appropriate language has been loaded into Tesseract. - */ - virtual bool Next(PageIteratorLevel level); - - /** - * Returns true if the iterator is at the start of an object at the given - * level. - * - * For instance, suppose an iterator it is pointed to the first symbol of the - * first word of the third line of the second paragraph of the first block in - * a page, then: - * it.IsAtBeginningOf(RIL_BLOCK) = false - * it.IsAtBeginningOf(RIL_PARA) = false - * it.IsAtBeginningOf(RIL_TEXTLINE) = true - * it.IsAtBeginningOf(RIL_WORD) = true - * it.IsAtBeginningOf(RIL_SYMBOL) = true - */ - virtual bool IsAtBeginningOf(PageIteratorLevel level) const; - - /** - * Returns whether the iterator is positioned at the last element in a - * given level. (e.g. the last word in a line, the last line in a block) - * - * Here's some two-paragraph example - * text. It starts off innocuously - * enough but quickly turns bizarre. - * The author inserts a cornucopia - * of words to guard against confused - * references. - * - * Now take an iterator it pointed to the start of "bizarre." - * it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false - * it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true - * it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false - */ - virtual bool IsAtFinalElement(PageIteratorLevel level, - PageIteratorLevel element) const; - - /** - * Returns whether this iterator is positioned - * before other: -1 - * equal to other: 0 - * after other: 1 - */ - int Cmp(const PageIterator &other) const; - - // ============= Accessing data ==============. - // Coordinate system: - // Integer coordinates are at the cracks between the pixels. - // The top-left corner of the top-left pixel in the image is at (0,0). - // The bottom-right corner of the bottom-right pixel in the image is at - // (width, height). - // Every bounding box goes from the top-left of the top-left contained - // pixel to the bottom-right of the bottom-right contained pixel, so - // the bounding box of the single top-left pixel in the image is: - // (0,0)->(1,1). - // If an image rectangle has been set in the API, then returned coordinates - // relate to the original (full) image, rather than the rectangle. - - /** - * Controls what to include in a bounding box. Bounding boxes of all levels - * between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics. - * Between layout analysis and recognition, it isn't known where all - * diacritics belong, so this control is used to include or exclude some - * diacritics that are above or below the main body of the word. In most cases - * where the placement is obvious, and after recognition, it doesn't make as - * much difference, as the diacritics will already be included in the word. - */ - void SetBoundingBoxComponents(bool include_upper_dots, - bool include_lower_dots) { - include_upper_dots_ = include_upper_dots; - include_lower_dots_ = include_lower_dots; - } - - /** - * Returns the bounding rectangle of the current object at the given level. - * See comment on coordinate system above. - * Returns false if there is no such object at the current position. - * The returned bounding box is guaranteed to match the size and position - * of the image returned by GetBinaryImage, but may clip foreground pixels - * from a grey image. The padding argument to GetImage can be used to expand - * the image to include more foreground pixels. See GetImage below. - */ - bool BoundingBox(PageIteratorLevel level, - int* left, int* top, int* right, int* bottom) const; - bool BoundingBox(PageIteratorLevel level, const int padding, - int* left, int* top, int* right, int* bottom) const; - /** - * Returns the bounding rectangle of the object in a coordinate system of the - * working image rectangle having its origin at (rect_left_, rect_top_) with - * respect to the original image and is scaled by a factor scale_. - */ - bool BoundingBoxInternal(PageIteratorLevel level, - int* left, int* top, int* right, int* bottom) const; - - /** Returns whether there is no object of a given level. */ - bool Empty(PageIteratorLevel level) const; - - /** - * Returns the type of the current block. See apitypes.h for - * PolyBlockType. - */ - PolyBlockType BlockType() const; - - /** - * Returns the polygon outline of the current block. The returned Pta must - * be ptaDestroy-ed after use. Note that the returned Pta lists the vertices - * of the polygon, and the last edge is the line segment between the last - * point and the first point. nullptr will be returned if the iterator is - * at the end of the document or layout analysis was not used. - */ - Pta* BlockPolygon() const; - - /** - * Returns a binary image of the current object at the given level. - * The position and size match the return from BoundingBoxInternal, and so - * this could be upscaled with respect to the original input image. - * Use pixDestroy to delete the image after use. - */ - Pix* GetBinaryImage(PageIteratorLevel level) const; - - /** - * Returns an image of the current object at the given level in greyscale - * if available in the input. To guarantee a binary image use BinaryImage. - * NOTE that in order to give the best possible image, the bounds are - * expanded slightly over the binary connected component, by the supplied - * padding, so the top-left position of the returned image is returned - * in (left,top). These will most likely not match the coordinates - * returned by BoundingBox. - * If you do not supply an original image, you will get a binary one. - * Use pixDestroy to delete the image after use. - */ - Pix* GetImage(PageIteratorLevel level, int padding, Pix* original_img, - int* left, int* top) const; - - /** - * Returns the baseline of the current object at the given level. - * The baseline is the line that passes through (x1, y1) and (x2, y2). - * WARNING: with vertical text, baselines may be vertical! - * Returns false if there is no baseline at the current position. - */ - bool Baseline(PageIteratorLevel level, - int* x1, int* y1, int* x2, int* y2) const; - - /** - * Returns orientation for the block the iterator points to. - * orientation, writing_direction, textline_order: see publictypes.h - * deskew_angle: after rotating the block so the text orientation is - * upright, how many radians does one have to rotate the - * block anti-clockwise for it to be level? - * -Pi/4 <= deskew_angle <= Pi/4 - */ - void Orientation(tesseract::Orientation *orientation, - tesseract::WritingDirection *writing_direction, - tesseract::TextlineOrder *textline_order, - float *deskew_angle) const; - - /** - * Returns information about the current paragraph, if available. - * - * justification - - * LEFT if ragged right, or fully justified and script is left-to-right. - * RIGHT if ragged left, or fully justified and script is right-to-left. - * unknown if it looks like source code or we have very few lines. - * is_list_item - - * true if we believe this is a member of an ordered or unordered list. - * is_crown - - * true if the first line of the paragraph is aligned with the other - * lines of the paragraph even though subsequent paragraphs have first - * line indents. This typically indicates that this is the continuation - * of a previous paragraph or that it is the very first paragraph in - * the chapter. - * first_line_indent - - * For LEFT aligned paragraphs, the first text line of paragraphs of - * this kind are indented this many pixels from the left edge of the - * rest of the paragraph. - * for RIGHT aligned paragraphs, the first text line of paragraphs of - * this kind are indented this many pixels from the right edge of the - * rest of the paragraph. - * NOTE 1: This value may be negative. - * NOTE 2: if *is_crown == true, the first line of this paragraph is - * actually flush, and first_line_indent is set to the "common" - * first_line_indent for subsequent paragraphs in this block - * of text. - */ - void ParagraphInfo(tesseract::ParagraphJustification *justification, - bool *is_list_item, - bool *is_crown, - int *first_line_indent) const; - - // If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle - // of the current word to the given pointer (takes ownership of the pointer) - // and returns true. - // Can only be used when iterating on the word level. - bool SetWordBlamerBundle(BlamerBundle *blamer_bundle); - - protected: - /** - * Sets up the internal data for iterating the blobs of a new word, then - * moves the iterator to the given offset. - */ - TESS_LOCAL void BeginWord(int offset); - - /** Pointer to the page_res owned by the API. */ - PAGE_RES* page_res_; - /** Pointer to the Tesseract object owned by the API. */ - Tesseract* tesseract_; - /** - * The iterator to the page_res_. Owned by this ResultIterator. - * A pointer just to avoid dragging in Tesseract includes. - */ - PAGE_RES_IT* it_; - /** - * The current input WERD being iterated. If there is an output from OCR, - * then word_ is nullptr. Owned by the API - */ - WERD* word_; - /** The length of the current word_. */ - int word_length_; - /** The current blob index within the word. */ - int blob_index_; - /** - * Iterator to the blobs within the word. If nullptr, then we are iterating - * OCR results in the box_word. - * Owned by this ResultIterator. - */ - C_BLOB_IT* cblob_it_; - /** Control over what to include in bounding boxes. */ - bool include_upper_dots_; - bool include_lower_dots_; - /** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/ - int scale_; - int scaled_yres_; - int rect_left_; - int rect_top_; - int rect_width_; - int rect_height_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pagesegmain.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pagesegmain.cpp deleted file mode 100644 index dc708b6e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pagesegmain.cpp +++ /dev/null @@ -1,410 +0,0 @@ -/********************************************************************** - * File: pagesegmain.cpp - * Description: Top-level page segmenter for Tesseract. - * Author: Ray Smith - * Created: Thu Sep 25 17:12:01 PDT 2008 - * - * (C) Copyright 2008, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifdef _WIN32 -#ifndef unlink -#include -#endif -#else -#include -#endif // _WIN32 - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "allheaders.h" -#include "blobbox.h" -#include "blread.h" -#include "colfind.h" -#include "debugpixa.h" -#include "equationdetect.h" -#include "imagefind.h" -#include "linefind.h" -#include "makerow.h" -#include "osdetect.h" -#include "tabvector.h" -#include "tesseractclass.h" -#include "tessvars.h" -#include "textord.h" -#include "tordmain.h" -#include "wordseg.h" - -namespace tesseract { - -// Max erosions to perform in removing an enclosing circle. -const int kMaxCircleErosions = 8; - -// Helper to remove an enclosing circle from an image. -// If there isn't one, then the image will most likely get badly mangled. -// The returned pix must be pixDestroyed after use. nullptr may be returned -// if the image doesn't meet the trivial conditions that it uses to determine -// success. -static Pix* RemoveEnclosingCircle(Pix* pixs) { - Pix* pixsi = pixInvert(nullptr, pixs); - Pix* pixc = pixCreateTemplate(pixs); - pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET); - pixSeedfillBinary(pixc, pixc, pixsi, 4); - pixInvert(pixc, pixc); - pixDestroy(&pixsi); - Pix* pixt = pixAnd(nullptr, pixs, pixc); - l_int32 max_count; - pixCountConnComp(pixt, 8, &max_count); - // The count has to go up before we start looking for the minimum. - l_int32 min_count = INT32_MAX; - Pix* pixout = nullptr; - for (int i = 1; i < kMaxCircleErosions; i++) { - pixDestroy(&pixt); - pixErodeBrick(pixc, pixc, 3, 3); - pixt = pixAnd(nullptr, pixs, pixc); - l_int32 count; - pixCountConnComp(pixt, 8, &count); - if (i == 1 || count > max_count) { - max_count = count; - min_count = count; - } else if (i > 1 && count < min_count) { - min_count = count; - pixDestroy(&pixout); - pixout = pixCopy(nullptr, pixt); // Save the best. - } else if (count >= min_count) { - break; // We have passed by the best. - } - } - pixDestroy(&pixt); - pixDestroy(&pixc); - return pixout; -} - -/** - * Segment the page according to the current value of tessedit_pageseg_mode. - * pix_binary_ is used as the source image and should not be nullptr. - * On return the blocks list owns all the constructed page layout. - */ -int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks, - Tesseract* osd_tess, OSResults* osr) { - ASSERT_HOST(pix_binary_ != nullptr); - int width = pixGetWidth(pix_binary_); - int height = pixGetHeight(pix_binary_); - // Get page segmentation mode. - PageSegMode pageseg_mode = static_cast( - static_cast(tessedit_pageseg_mode)); - // If a UNLV zone file can be found, use that instead of segmentation. - if (!PSM_COL_FIND_ENABLED(pageseg_mode) && - input_file != nullptr && input_file->length() > 0) { - STRING name = *input_file; - const char* lastdot = strrchr(name.string(), '.'); - if (lastdot != nullptr) - name[lastdot - name.string()] = '\0'; - read_unlv_file(name, width, height, blocks); - } - if (blocks->empty()) { - // No UNLV file present. Work according to the PageSegMode. - // First make a single block covering the whole image. - BLOCK_IT block_it(blocks); - BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height); - block->set_right_to_left(right_to_left()); - block_it.add_to_end(block); - } else { - // UNLV file present. Use PSM_SINGLE_BLOCK. - pageseg_mode = PSM_SINGLE_BLOCK; - } - // The diacritic_blobs holds noise blobs that may be diacritics. They - // are separated out on areas of the image that seem noisy and short-circuit - // the layout process, going straight from the initial partition creation - // right through to after word segmentation, where they are added to the - // rej_cblobs list of the most appropriate word. From there classification - // will determine whether they are used. - BLOBNBOX_LIST diacritic_blobs; - int auto_page_seg_ret_val = 0; - TO_BLOCK_LIST to_blocks; - if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) || - PSM_SPARSE(pageseg_mode)) { - auto_page_seg_ret_val = AutoPageSeg( - pageseg_mode, blocks, &to_blocks, - enable_noise_removal ? &diacritic_blobs : nullptr, osd_tess, osr); - if (pageseg_mode == PSM_OSD_ONLY) - return auto_page_seg_ret_val; - // To create blobs from the image region bounds uncomment this line: - // to_blocks.clear(); // Uncomment to go back to the old mode. - } else { - deskew_ = FCOORD(1.0f, 0.0f); - reskew_ = FCOORD(1.0f, 0.0f); - if (pageseg_mode == PSM_CIRCLE_WORD) { - Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_); - if (pixcleaned != nullptr) { - pixDestroy(&pix_binary_); - pix_binary_ = pixcleaned; - } - } - } - - if (auto_page_seg_ret_val < 0) { - return -1; - } - - if (blocks->empty()) { - if (textord_debug_tabfind) - tprintf("Empty page\n"); - return 0; // AutoPageSeg found an empty page. - } - bool splitting = - pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT; - bool cjk_mode = textord_use_cjk_fp_model; - - textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_, - pix_thresholds_, pix_grey_, splitting || cjk_mode, - &diacritic_blobs, blocks, &to_blocks); - return auto_page_seg_ret_val; -} - -/** - * Auto page segmentation. Divide the page image into blocks of uniform - * text linespacing and images. - * - * Resolution (in ppi) is derived from the input image. - * - * The output goes in the blocks list with corresponding TO_BLOCKs in the - * to_blocks list. - * - * If !PSM_COL_FIND_ENABLED(pageseg_mode), then no attempt is made to divide - * the image into columns, but multiple blocks are still made if the text is - * of non-uniform linespacing. - * - * If diacritic_blobs is non-null, then diacritics/noise blobs, that would - * confuse layout analysis by causing textline overlap, are placed there, - * with the expectation that they will be reassigned to words later and - * noise/diacriticness determined via classification. - * - * If osd (orientation and script detection) is true then that is performed - * as well. If only_osd is true, then only orientation and script detection is - * performed. If osd is desired, (osd or only_osd) then osr_tess must be - * another Tesseract that was initialized especially for osd, and the results - * will be output into osr (orientation and script result). - */ -int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks, - TO_BLOCK_LIST* to_blocks, - BLOBNBOX_LIST* diacritic_blobs, Tesseract* osd_tess, - OSResults* osr) { - Pix* photomask_pix = nullptr; - Pix* musicmask_pix = nullptr; - // The blocks made by the ColumnFinder. Moved to blocks before return. - BLOCK_LIST found_blocks; - TO_BLOCK_LIST temp_blocks; - - ColumnFinder* finder = SetupPageSegAndDetectOrientation( - pageseg_mode, blocks, osd_tess, osr, &temp_blocks, &photomask_pix, - &musicmask_pix); - int result = 0; - if (finder != nullptr) { - TO_BLOCK_IT to_block_it(&temp_blocks); - TO_BLOCK* to_block = to_block_it.data(); - if (musicmask_pix != nullptr) { - // TODO(rays) pass the musicmask_pix into FindBlocks and mark music - // blocks separately. For now combine with photomask_pix. - pixOr(photomask_pix, photomask_pix, musicmask_pix); - } - if (equ_detect_) { - finder->SetEquationDetect(equ_detect_); - } - result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_, - to_block, photomask_pix, pix_thresholds_, - pix_grey_, &pixa_debug_, &found_blocks, - diacritic_blobs, to_blocks); - if (result >= 0) - finder->GetDeskewVectors(&deskew_, &reskew_); - delete finder; - } - pixDestroy(&photomask_pix); - pixDestroy(&musicmask_pix); - if (result < 0) return result; - - blocks->clear(); - BLOCK_IT block_it(blocks); - // Move the found blocks to the input/output blocks. - block_it.add_list_after(&found_blocks); - return result; -} - -// Helper adds all the scripts from sid_set converted to ids from osd_set to -// allowed_ids. -static void AddAllScriptsConverted(const UNICHARSET& sid_set, - const UNICHARSET& osd_set, - GenericVector* allowed_ids) { - for (int i = 0; i < sid_set.get_script_table_size(); ++i) { - if (i != sid_set.null_sid()) { - const char* script = sid_set.get_script_from_script_id(i); - allowed_ids->push_back(osd_set.get_script_id_from_name(script)); - } - } -} - -/** - * Sets up auto page segmentation, determines the orientation, and corrects it. - * Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to - * facilitate testing. - * photo_mask_pix is a pointer to a nullptr pointer that will be filled on return - * with the leptonica photo mask, which must be pixDestroyed by the caller. - * to_blocks is an empty list that will be filled with (usually a single) - * block that is used during layout analysis. This ugly API is required - * because of the possibility of a unlv zone file. - * TODO(rays) clean this up. - * See AutoPageSeg for other arguments. - * The returned ColumnFinder must be deleted after use. - */ -ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation( - PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess, - OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, - Pix** music_mask_pix) { - int vertical_x = 0; - int vertical_y = 1; - TabVector_LIST v_lines; - TabVector_LIST h_lines; - ICOORD bleft(0, 0); - - ASSERT_HOST(pix_binary_ != nullptr); - if (tessedit_dump_pageseg_images) { - pixa_debug_.AddPix(pix_binary_, "PageSegInput"); - } - // Leptonica is used to find the rule/separator lines in the input. - LineFinder::FindAndRemoveLines(source_resolution_, - textord_tabfind_show_vlines, pix_binary_, - &vertical_x, &vertical_y, music_mask_pix, - &v_lines, &h_lines); - if (tessedit_dump_pageseg_images) { - pixa_debug_.AddPix(pix_binary_, "NoLines"); - } - // Leptonica is used to find a mask of the photo regions in the input. - *photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_); - if (tessedit_dump_pageseg_images) { - pixa_debug_.AddPix(pix_binary_, "NoImages"); - } - if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear(); - - // The rest of the algorithm uses the usual connected components. - textord_.find_components(pix_binary_, blocks, to_blocks); - - TO_BLOCK_IT to_block_it(to_blocks); - // There must be exactly one input block. - // TODO(rays) handle new textline finding with a UNLV zone file. - ASSERT_HOST(to_blocks->singleton()); - TO_BLOCK* to_block = to_block_it.data(); - TBOX blkbox = to_block->block->pdblk.bounding_box(); - ColumnFinder* finder = nullptr; - int estimated_resolution = source_resolution_; - if (source_resolution_ == kMinCredibleResolution) { - // Try to estimate resolution from typical body text size. - int res = IntCastRounded(to_block->line_size * kResolutionEstimationFactor); - if (res > estimated_resolution && res < kMaxCredibleResolution) { - estimated_resolution = res; - tprintf("Estimating resolution as %d\n", estimated_resolution); - } - } - - if (to_block->line_size >= 2) { - finder = new ColumnFinder(static_cast(to_block->line_size), - blkbox.botleft(), blkbox.topright(), - estimated_resolution, textord_use_cjk_fp_model, - textord_tabfind_aligned_gap_fraction, &v_lines, - &h_lines, vertical_x, vertical_y); - - finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block); - -#ifndef DISABLED_LEGACY_ENGINE - - if (equ_detect_) { - equ_detect_->LabelSpecialText(to_block); - } - - BLOBNBOX_CLIST osd_blobs; - // osd_orientation is the number of 90 degree rotations to make the - // characters upright. (See osdetect.h for precise definition.) - // We want the text lines horizontal, (vertical text indicates vertical - // textlines) which may conflict (eg vertically written CJK). - int osd_orientation = 0; - bool vertical_text = textord_tabfind_force_vertical_text || - pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT; - if (!vertical_text && textord_tabfind_vertical_text && - PSM_ORIENTATION_ENABLED(pageseg_mode)) { - vertical_text = - finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio, - to_block, &osd_blobs); - } - if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != nullptr && osr != nullptr) { - GenericVector osd_scripts; - if (osd_tess != this) { - // We are running osd as part of layout analysis, so constrain the - // scripts to those allowed by *this. - AddAllScriptsConverted(unicharset, osd_tess->unicharset, &osd_scripts); - for (int s = 0; s < sub_langs_.size(); ++s) { - AddAllScriptsConverted(sub_langs_[s]->unicharset, - osd_tess->unicharset, &osd_scripts); - } - } - os_detect_blobs(&osd_scripts, &osd_blobs, osr, osd_tess); - if (pageseg_mode == PSM_OSD_ONLY) { - delete finder; - return nullptr; - } - osd_orientation = osr->best_result.orientation_id; - double osd_score = osr->orientations[osd_orientation]; - double osd_margin = min_orientation_margin * 2; - for (int i = 0; i < 4; ++i) { - if (i != osd_orientation && - osd_score - osr->orientations[i] < osd_margin) { - osd_margin = osd_score - osr->orientations[i]; - } - } - int best_script_id = osr->best_result.script_id; - const char* best_script_str = - osd_tess->unicharset.get_script_from_script_id(best_script_id); - bool cjk = best_script_id == osd_tess->unicharset.han_sid() || - best_script_id == osd_tess->unicharset.hiragana_sid() || - best_script_id == osd_tess->unicharset.katakana_sid() || - strcmp("Japanese", best_script_str) == 0 || - strcmp("Korean", best_script_str) == 0 || - strcmp("Hangul", best_script_str) == 0; - if (cjk) { - finder->set_cjk_script(true); - } - if (osd_margin < min_orientation_margin) { - // The margin is weak. - if (!cjk && !vertical_text && osd_orientation == 2) { - // upside down latin text is improbable with such a weak margin. - tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: " - "Don't rotate.\n", osd_margin); - osd_orientation = 0; - } else { - tprintf( - "OSD: Weak margin (%.2f) for %d blob text block, " - "but using orientation anyway: %d\n", - osd_margin, osd_blobs.length(), osd_orientation); - } - } - } - osd_blobs.shallow_clear(); - finder->CorrectOrientation(to_block, vertical_text, osd_orientation); - -#endif // ndef DISABLED_LEGACY_ENGINE - } - - return finder; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pagewalk.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pagewalk.cpp deleted file mode 100644 index a02fe5f4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pagewalk.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/********************************************************************** - * File: pagewalk.cpp (Formerly walkers.c) - * Description: Block list processors - * Author: Phil Cheatle - * Created: Thu Oct 10 16:25:24 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "pageres.h" -#include "tesseractclass.h" - -namespace tesseract { -/** - * @name process_selected_words() - * - * Walk the current block list applying the specified word processor function - * to each word that overlaps the selection_box. - */ -void Tesseract::process_selected_words( - PAGE_RES* page_res, // blocks to check - TBOX& selection_box, - bool (tesseract::Tesseract::* word_processor)(PAGE_RES_IT* pr_it)) { - for (PAGE_RES_IT page_res_it(page_res); page_res_it.word() != nullptr; - page_res_it.forward()) { - WERD* word = page_res_it.word()->word; - if (word->bounding_box().overlap(selection_box)) { - if (!(this->*word_processor)(&page_res_it)) - return; - } - } -} -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/par_control.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/par_control.cpp deleted file mode 100644 index 06ab70cf..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/par_control.cpp +++ /dev/null @@ -1,72 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: par_control.cpp -// Description: Control code for parallel implementation. -// Author: Ray Smith -// Created: Mon Nov 04 13:23:15 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "tesseractclass.h" -#ifdef _OPENMP -#include -#endif // _OPENMP - -namespace tesseract { - -struct BlobData { - BlobData() : blob(nullptr), choices(nullptr) {} - BlobData(int index, Tesseract* tess, const WERD_RES& word) - : blob(word.chopped_word->blobs[index]), - tesseract(tess), - choices(&(*word.ratings)(index, index)) {} - - TBLOB* blob; - Tesseract* tesseract; - BLOB_CHOICE_LIST** choices; -}; - -void Tesseract::PrerecAllWordsPar(const GenericVector& words) { - // Prepare all the blobs. - GenericVector blobs; - for (int w = 0; w < words.size(); ++w) { - if (words[w].word->ratings != nullptr && - words[w].word->ratings->get(0, 0) == nullptr) { - for (int s = 0; s < words[w].lang_words.size(); ++s) { - Tesseract* sub = s < sub_langs_.size() ? sub_langs_[s] : this; - const WERD_RES& word = *words[w].lang_words[s]; - for (int b = 0; b < word.chopped_word->NumBlobs(); ++b) { - blobs.push_back(BlobData(b, sub, word)); - } - } - } - } - // Pre-classify all the blobs. - if (tessedit_parallelize > 1) { -#ifdef _OPENMP -#pragma omp parallel for num_threads(10) -#endif // _OPENMP - for (int b = 0; b < blobs.size(); ++b) { - *blobs[b].choices = - blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, nullptr); - } - } else { - // TODO(AMD) parallelize this. - for (int b = 0; b < blobs.size(); ++b) { - *blobs[b].choices = - blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, nullptr); - } - } -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/paragraphs.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/paragraphs.cpp deleted file mode 100644 index 5265980d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/paragraphs.cpp +++ /dev/null @@ -1,2583 +0,0 @@ -/********************************************************************** - * File: paragraphs.cpp - * Description: Paragraph detection for tesseract. - * Author: David Eger - * Created: 25 February 2011 - * - * (C) Copyright 2011, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "paragraphs.h" -#include // for isspace -#include // for abs -#include // for snprintf -#include // for abs -#include // for strchr, strlen -#include // for max -#include // for unique_ptr -#include "genericvector.h" // for GenericVector, GenericVectorEqEq -#include "helpers.h" // for UpdateRange, ClipToRange -#include "host.h" // for NearlyEqual -#include "mutableiterator.h" // for MutableIterator -#include "ocrblock.h" // for BLOCK -#include "ocrpara.h" // for ParagraphModel, PARA, PARA_IT, PARA... -#include "ocrrow.h" // for ROW -#include "pageiterator.h" // for PageIterator -#include "pageres.h" // for PAGE_RES_IT, WERD_RES, ROW_RES, BLO... -#include "paragraphs_internal.h" // for RowScratchRegisters, SetOfModels -#include "pdblock.h" // for PDBLK -#include "polyblk.h" // for POLY_BLOCK -#include "publictypes.h" // for JUSTIFICATION_LEFT, JUSTIFICATION_R... -#include "ratngs.h" // for WERD_CHOICE -#include "rect.h" // for TBOX -#include "statistc.h" // for STATS -#include "strngs.h" // for STRING -#include "tprintf.h" // for tprintf -#include "unichar.h" // for UNICHAR, UNICHAR_ID -#include "unicharset.h" // for UNICHARSET -#include "unicodes.h" // for kPDF, kRLE -#include "werd.h" // for WERD, W_REP_CHAR - -namespace tesseract { - -// Special "weak" ParagraphModels. -const ParagraphModel *kCrownLeft - = reinterpret_cast(0xDEAD111F); -const ParagraphModel *kCrownRight - = reinterpret_cast(0xDEAD888F); - -// Do the text and geometry of two rows support a paragraph break between them? -static bool LikelyParagraphStart(const RowScratchRegisters &before, - const RowScratchRegisters &after, - tesseract::ParagraphJustification j); - -// Given the width of a typical space between words, what is the threshold -// by which by which we think left and right alignments for paragraphs -// can vary and still be aligned. -static int Epsilon(int space_pix) { - return space_pix * 4 / 5; -} - -static bool AcceptableRowArgs( - int debug_level, int min_num_rows, const char *function_name, - const GenericVector *rows, - int row_start, int row_end) { - if (row_start < 0 || row_end > rows->size() || row_start > row_end) { - tprintf("Invalid arguments rows[%d, %d) while rows is of size %d.\n", - row_start, row_end, rows->size()); - return false; - } - if (row_end - row_start < min_num_rows) { - if (debug_level > 1) { - tprintf("# Too few rows[%d, %d) for %s.\n", - row_start, row_end, function_name); - } - return false; - } - return true; -} - -// =============================== Debug Code ================================ - -// Convert an integer to a decimal string. -static STRING StrOf(int num) { - char buffer[30]; - snprintf(buffer, sizeof(buffer), "%d", num); - return STRING(buffer); -} - -// Given a row-major matrix of unicode text and a column separator, print -// a formatted table. For ASCII, we get good column alignment. -static void PrintTable(const GenericVector > &rows, - const STRING &colsep) { - GenericVector max_col_widths; - for (int r = 0; r < rows.size(); r++) { - int num_columns = rows[r].size(); - for (int c = 0; c < num_columns; c++) { - int num_unicodes = 0; - for (int i = 0; i < rows[r][c].size(); i++) { - if ((rows[r][c][i] & 0xC0) != 0x80) num_unicodes++; - } - if (c >= max_col_widths.size()) { - max_col_widths.push_back(num_unicodes); - } else { - if (num_unicodes > max_col_widths[c]) - max_col_widths[c] = num_unicodes; - } - } - } - - GenericVector col_width_patterns; - for (int c = 0; c < max_col_widths.size(); c++) { - col_width_patterns.push_back( - STRING("%-") + StrOf(max_col_widths[c]) + "s"); - } - - for (int r = 0; r < rows.size(); r++) { - for (int c = 0; c < rows[r].size(); c++) { - if (c > 0) - tprintf("%s", colsep.string()); - tprintf(col_width_patterns[c].string(), rows[r][c].string()); - } - tprintf("\n"); - } -} - -static STRING RtlEmbed(const STRING &word, bool rtlify) { - if (rtlify) - return STRING(kRLE) + word + STRING(kPDF); - return word; -} - -// Print the current thoughts of the paragraph detector. -static void PrintDetectorState(const ParagraphTheory &theory, - const GenericVector &rows) { - GenericVector > output; - output.push_back(GenericVector()); - output.back().push_back("#row"); - output.back().push_back("space"); - output.back().push_back(".."); - output.back().push_back("lword[widthSEL]"); - output.back().push_back("rword[widthSEL]"); - RowScratchRegisters::AppendDebugHeaderFields(&output.back()); - output.back().push_back("text"); - - for (int i = 0; i < rows.size(); i++) { - output.push_back(GenericVector()); - GenericVector &row = output.back(); - const RowInfo& ri = *rows[i].ri_; - row.push_back(StrOf(i)); - row.push_back(StrOf(ri.average_interword_space)); - row.push_back(ri.has_leaders ? ".." : " "); - row.push_back(RtlEmbed(ri.lword_text, !ri.ltr) + - "[" + StrOf(ri.lword_box.width()) + - (ri.lword_likely_starts_idea ? "S" : "s") + - (ri.lword_likely_ends_idea ? "E" : "e") + - (ri.lword_indicates_list_item ? "L" : "l") + - "]"); - row.push_back(RtlEmbed(ri.rword_text, !ri.ltr) + - "[" + StrOf(ri.rword_box.width()) + - (ri.rword_likely_starts_idea ? "S" : "s") + - (ri.rword_likely_ends_idea ? "E" : "e") + - (ri.rword_indicates_list_item ? "L" : "l") + - "]"); - rows[i].AppendDebugInfo(theory, &row); - row.push_back(RtlEmbed(ri.text, !ri.ltr)); - } - PrintTable(output, " "); - - tprintf("Active Paragraph Models:\n"); - for (int m = 0; m < theory.models().size(); m++) { - tprintf(" %d: %s\n", m + 1, theory.models()[m]->ToString().string()); - } -} - -static void DebugDump( - bool should_print, - const STRING &phase, - const ParagraphTheory &theory, - const GenericVector &rows) { - if (!should_print) - return; - tprintf("# %s\n", phase.string()); - PrintDetectorState(theory, rows); -} - -// Print out the text for rows[row_start, row_end) -static void PrintRowRange(const GenericVector &rows, - int row_start, int row_end) { - tprintf("======================================\n"); - for (int row = row_start; row < row_end; row++) { - tprintf("%s\n", rows[row].ri_->text.string()); - } - tprintf("======================================\n"); -} - -// ============= Brain Dead Language Model (ASCII Version) =================== - -static bool IsLatinLetter(int ch) { - return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); -} - -static bool IsDigitLike(int ch) { - return ch == 'o' || ch == 'O' || ch == 'l' || ch == 'I'; -} - -static bool IsOpeningPunct(int ch) { - return strchr("'\"({[", ch) != nullptr; -} - -static bool IsTerminalPunct(int ch) { - return strchr(":'\".?!]})", ch) != nullptr; -} - -// Return a pointer after consuming as much text as qualifies as roman numeral. -static const char *SkipChars(const char *str, const char *toskip) { - while (*str != '\0' && strchr(toskip, *str)) { str++; } - return str; -} - -static const char *SkipChars(const char *str, bool (*skip)(int)) { - while (*str != '\0' && skip(*str)) { str++; } - return str; -} - -static const char *SkipOne(const char *str, const char *toskip) { - if (*str != '\0' && strchr(toskip, *str)) return str + 1; - return str; -} - -// Return whether it is very likely that this is a numeral marker that could -// start a list item. Some examples include: -// A I iii. VI (2) 3.5. [C-4] -static bool LikelyListNumeral(const STRING &word) { - const char *kRomans = "ivxlmdIVXLMD"; - const char *kDigits = "012345789"; - const char *kOpen = "[{("; - const char *kSep = ":;-.,"; - const char *kClose = "]})"; - - int num_segments = 0; - const char *pos = word.string(); - while (*pos != '\0' && num_segments < 3) { - // skip up to two open parens. - const char *numeral_start = SkipOne(SkipOne(pos, kOpen), kOpen); - const char *numeral_end = SkipChars(numeral_start, kRomans); - if (numeral_end != numeral_start) { - // Got Roman Numeral. Great. - } else { - numeral_end = SkipChars(numeral_start, kDigits); - if (numeral_end == numeral_start) { - // If there's a single latin letter, we can use that. - numeral_end = SkipChars(numeral_start, IsLatinLetter); - if (numeral_end - numeral_start != 1) - break; - } - } - // We got some sort of numeral. - num_segments++; - // Skip any trailing parens or punctuation. - pos = SkipChars(SkipChars(numeral_end, kClose), kSep); - if (pos == numeral_end) - break; - } - return *pos == '\0'; -} - -static bool LikelyListMark(const STRING &word) { - const char *kListMarks = "0Oo*.,+."; - return word.size() == 1 && strchr(kListMarks, word[0]) != nullptr; -} - -bool AsciiLikelyListItem(const STRING &word) { - return LikelyListMark(word) || LikelyListNumeral(word); -} - -// ========== Brain Dead Language Model (Tesseract Version) ================ - -// Return the first Unicode Codepoint from werd[pos]. -int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos) { - if (!u || !werd || pos > werd->length()) - return 0; - return UNICHAR(u->id_to_unichar(werd->unichar_id(pos)), -1).first_uni(); -} - -// A useful helper class for finding the first j >= i so that word[j] -// does not have given character type. -class UnicodeSpanSkipper { - public: - UnicodeSpanSkipper(const UNICHARSET *unicharset, const WERD_CHOICE *word) - : u_(unicharset), word_(word) { wordlen_ = word->length(); } - - // Given an input position, return the first position >= pos not punc. - int SkipPunc(int pos); - // Given an input position, return the first position >= pos not digit. - int SkipDigits(int pos); - // Given an input position, return the first position >= pos not roman. - int SkipRomans(int pos); - // Given an input position, return the first position >= pos not alpha. - int SkipAlpha(int pos); - - private: - const UNICHARSET *u_; - const WERD_CHOICE *word_; - int wordlen_; -}; - -int UnicodeSpanSkipper::SkipPunc(int pos) { - while (pos < wordlen_ && u_->get_ispunctuation(word_->unichar_id(pos))) pos++; - return pos; -} - -int UnicodeSpanSkipper::SkipDigits(int pos) { - while (pos < wordlen_ && (u_->get_isdigit(word_->unichar_id(pos)) || - IsDigitLike(UnicodeFor(u_, word_, pos)))) pos++; - return pos; -} - -int UnicodeSpanSkipper::SkipRomans(int pos) { - const char *kRomans = "ivxlmdIVXLMD"; - while (pos < wordlen_) { - int ch = UnicodeFor(u_, word_, pos); - if (ch >= 0xF0 || strchr(kRomans, ch) == nullptr) break; - pos++; - } - return pos; -} - -int UnicodeSpanSkipper::SkipAlpha(int pos) { - while (pos < wordlen_ && u_->get_isalpha(word_->unichar_id(pos))) pos++; - return pos; -} - -static bool LikelyListMarkUnicode(int ch) { - if (ch < 0x80) { - STRING single_ch; - single_ch += ch; - return LikelyListMark(single_ch); - } - switch (ch) { - // TODO(eger) expand this list of unicodes as needed. - case 0x00B0: // degree sign - case 0x2022: // bullet - case 0x25E6: // white bullet - case 0x00B7: // middle dot - case 0x25A1: // white square - case 0x25A0: // black square - case 0x25AA: // black small square - case 0x2B1D: // black very small square - case 0x25BA: // black right-pointing pointer - case 0x25CF: // black circle - case 0x25CB: // white circle - return true; - default: - break; // fall through - } - return false; -} - -// Return whether it is very likely that this is a numeral marker that could -// start a list item. Some examples include: -// A I iii. VI (2) 3.5. [C-4] -static bool UniLikelyListItem(const UNICHARSET *u, const WERD_CHOICE *werd) { - if (werd->length() == 1 && LikelyListMarkUnicode(UnicodeFor(u, werd, 0))) - return true; - - UnicodeSpanSkipper m(u, werd); - int num_segments = 0; - int pos = 0; - while (pos < werd->length() && num_segments < 3) { - int numeral_start = m.SkipPunc(pos); - if (numeral_start > pos + 1) break; - int numeral_end = m.SkipRomans(numeral_start); - if (numeral_end == numeral_start) { - numeral_end = m.SkipDigits(numeral_start); - if (numeral_end == numeral_start) { - // If there's a single latin letter, we can use that. - numeral_end = m.SkipAlpha(numeral_start); - if (numeral_end - numeral_start != 1) - break; - } - } - // We got some sort of numeral. - num_segments++; - // Skip any trailing punctuation. - pos = m.SkipPunc(numeral_end); - if (pos == numeral_end) - break; - } - return pos == werd->length(); -} - -// ========= Brain Dead Language Model (combined entry points) ================ - -// Given the leftmost word of a line either as a Tesseract unicharset + werd -// or a utf8 string, set the following attributes for it: -// is_list - this word might be a list number or bullet. -// starts_idea - this word is likely to start a sentence. -// ends_idea - this word is likely to end a sentence. -void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, - const STRING &utf8, - bool *is_list, bool *starts_idea, bool *ends_idea) { - *is_list = false; - *starts_idea = false; - *ends_idea = false; - if (utf8.size() == 0 || (werd != nullptr && werd->length() == 0)) { // Empty - *ends_idea = true; - return; - } - - if (unicharset && werd) { // We have a proper werd and unicharset so use it. - if (UniLikelyListItem(unicharset, werd)) { - *is_list = true; - *starts_idea = true; - *ends_idea = true; - } - if (unicharset->get_isupper(werd->unichar_id(0))) { - *starts_idea = true; - } - if (unicharset->get_ispunctuation(werd->unichar_id(0))) { - *starts_idea = true; - *ends_idea = true; - } - } else { // Assume utf8 is mostly ASCII - if (AsciiLikelyListItem(utf8)) { - *is_list = true; - *starts_idea = true; - } - int start_letter = utf8[0]; - if (IsOpeningPunct(start_letter)) { - *starts_idea = true; - } - if (IsTerminalPunct(start_letter)) { - *ends_idea = true; - } - if (start_letter >= 'A' && start_letter <= 'Z') { - *starts_idea = true; - } - } -} - -// Given the rightmost word of a line either as a Tesseract unicharset + werd -// or a utf8 string, set the following attributes for it: -// is_list - this word might be a list number or bullet. -// starts_idea - this word is likely to start a sentence. -// ends_idea - this word is likely to end a sentence. -void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, - const STRING &utf8, - bool *is_list, bool *starts_idea, bool *ends_idea) { - *is_list = false; - *starts_idea = false; - *ends_idea = false; - if (utf8.size() == 0 || (werd != nullptr && werd->length() == 0)) { // Empty - *ends_idea = true; - return; - } - - if (unicharset && werd) { // We have a proper werd and unicharset so use it. - if (UniLikelyListItem(unicharset, werd)) { - *is_list = true; - *starts_idea = true; - } - UNICHAR_ID last_letter = werd->unichar_id(werd->length() - 1); - if (unicharset->get_ispunctuation(last_letter)) { - *ends_idea = true; - } - } else { // Assume utf8 is mostly ASCII - if (AsciiLikelyListItem(utf8)) { - *is_list = true; - *starts_idea = true; - } - int last_letter = utf8[utf8.size() - 1]; - if (IsOpeningPunct(last_letter) || IsTerminalPunct(last_letter)) { - *ends_idea = true; - } - } -} - -// =============== Implementation of RowScratchRegisters ===================== -/* static */ -void RowScratchRegisters::AppendDebugHeaderFields( - GenericVector *header) { - header->push_back("[lmarg,lind;rind,rmarg]"); - header->push_back("model"); -} - -void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory, - GenericVector *dbg) const { - char s[30]; - snprintf(s, sizeof(s), "[%3d,%3d;%3d,%3d]", - lmargin_, lindent_, rindent_, rmargin_); - dbg->push_back(s); - STRING model_string; - model_string += static_cast(GetLineType()); - model_string += ":"; - - int model_numbers = 0; - for (int h = 0; h < hypotheses_.size(); h++) { - if (hypotheses_[h].model == nullptr) - continue; - if (model_numbers > 0) - model_string += ","; - if (StrongModel(hypotheses_[h].model)) { - model_string += StrOf(1 + theory.IndexOf(hypotheses_[h].model)); - } else if (hypotheses_[h].model == kCrownLeft) { - model_string += "CrL"; - } else if (hypotheses_[h].model == kCrownRight) { - model_string += "CrR"; - } - model_numbers++; - } - if (model_numbers == 0) - model_string += "0"; - - dbg->push_back(model_string); -} - -void RowScratchRegisters::Init(const RowInfo &row) { - ri_ = &row; - lmargin_ = 0; - lindent_ = row.pix_ldistance; - rmargin_ = 0; - rindent_ = row.pix_rdistance; -} - -LineType RowScratchRegisters::GetLineType() const { - if (hypotheses_.empty()) - return LT_UNKNOWN; - bool has_start = false; - bool has_body = false; - for (int i = 0; i < hypotheses_.size(); i++) { - switch (hypotheses_[i].ty) { - case LT_START: has_start = true; break; - case LT_BODY: has_body = true; break; - default: - tprintf("Encountered bad value in hypothesis list: %c\n", - hypotheses_[i].ty); - break; - } - } - if (has_start && has_body) - return LT_MULTIPLE; - return has_start ? LT_START : LT_BODY; -} - -LineType RowScratchRegisters::GetLineType(const ParagraphModel *model) const { - if (hypotheses_.empty()) - return LT_UNKNOWN; - bool has_start = false; - bool has_body = false; - for (int i = 0; i < hypotheses_.size(); i++) { - if (hypotheses_[i].model != model) - continue; - switch (hypotheses_[i].ty) { - case LT_START: has_start = true; break; - case LT_BODY: has_body = true; break; - default: - tprintf("Encountered bad value in hypothesis list: %c\n", - hypotheses_[i].ty); - break; - } - } - if (has_start && has_body) - return LT_MULTIPLE; - return has_start ? LT_START : LT_BODY; -} - -void RowScratchRegisters::SetStartLine() { - LineType current_lt = GetLineType(); - if (current_lt != LT_UNKNOWN && current_lt != LT_START) { - tprintf("Trying to set a line to be START when it's already BODY.\n"); - } - if (current_lt == LT_UNKNOWN || current_lt == LT_BODY) { - hypotheses_.push_back_new(LineHypothesis(LT_START, nullptr)); - } -} - -void RowScratchRegisters::SetBodyLine() { - LineType current_lt = GetLineType(); - if (current_lt != LT_UNKNOWN && current_lt != LT_BODY) { - tprintf("Trying to set a line to be BODY when it's already START.\n"); - } - if (current_lt == LT_UNKNOWN || current_lt == LT_START) { - hypotheses_.push_back_new(LineHypothesis(LT_BODY, nullptr)); - } -} - -void RowScratchRegisters::AddStartLine(const ParagraphModel *model) { - hypotheses_.push_back_new(LineHypothesis(LT_START, model)); - int old_idx = hypotheses_.get_index(LineHypothesis(LT_START, nullptr)); - if (old_idx >= 0) - hypotheses_.remove(old_idx); -} - -void RowScratchRegisters::AddBodyLine(const ParagraphModel *model) { - hypotheses_.push_back_new(LineHypothesis(LT_BODY, model)); - int old_idx = hypotheses_.get_index(LineHypothesis(LT_BODY, nullptr)); - if (old_idx >= 0) - hypotheses_.remove(old_idx); -} - -void RowScratchRegisters::StartHypotheses(SetOfModels *models) const { - for (int h = 0; h < hypotheses_.size(); h++) { - if (hypotheses_[h].ty == LT_START && StrongModel(hypotheses_[h].model)) - models->push_back_new(hypotheses_[h].model); - } -} - -void RowScratchRegisters::StrongHypotheses(SetOfModels *models) const { - for (int h = 0; h < hypotheses_.size(); h++) { - if (StrongModel(hypotheses_[h].model)) - models->push_back_new(hypotheses_[h].model); - } -} - -void RowScratchRegisters::NonNullHypotheses(SetOfModels *models) const { - for (int h = 0; h < hypotheses_.size(); h++) { - if (hypotheses_[h].model != nullptr) - models->push_back_new(hypotheses_[h].model); - } -} - -const ParagraphModel *RowScratchRegisters::UniqueStartHypothesis() const { - if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_START) - return nullptr; - return hypotheses_[0].model; -} - -const ParagraphModel *RowScratchRegisters::UniqueBodyHypothesis() const { - if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_BODY) - return nullptr; - return hypotheses_[0].model; -} - -// Discard any hypotheses whose model is not in the given list. -void RowScratchRegisters::DiscardNonMatchingHypotheses( - const SetOfModels &models) { - if (models.empty()) - return; - for (int h = hypotheses_.size() - 1; h >= 0; h--) { - if (!models.contains(hypotheses_[h].model)) { - hypotheses_.remove(h); - } - } -} - -// ============ Geometry based Paragraph Detection Algorithm ================= - -struct Cluster { - Cluster() : center(0), count(0) {} - Cluster(int cen, int num) : center(cen), count(num) {} - - int center; // The center of the cluster. - int count; // The number of entries within the cluster. -}; - -class SimpleClusterer { - public: - explicit SimpleClusterer(int max_cluster_width) - : max_cluster_width_(max_cluster_width) {} - void Add(int value) { values_.push_back(value); } - int size() const { return values_.size(); } - void GetClusters(GenericVector *clusters); - - private: - int max_cluster_width_; - GenericVectorEqEq values_; -}; - -// Return the index of the cluster closest to value. -static int ClosestCluster(const GenericVector &clusters, int value) { - int best_index = 0; - for (int i = 0; i < clusters.size(); i++) { - if (abs(value - clusters[i].center) < - abs(value - clusters[best_index].center)) - best_index = i; - } - return best_index; -} - -void SimpleClusterer::GetClusters(GenericVector *clusters) { - clusters->clear(); - values_.sort(); - for (int i = 0; i < values_.size();) { - int orig_i = i; - int lo = values_[i]; - int hi = lo; - while (++i < values_.size() && values_[i] <= lo + max_cluster_width_) { - hi = values_[i]; - } - clusters->push_back(Cluster((hi + lo) / 2, i - orig_i)); - } -} - -// Calculate left- and right-indent tab stop values seen in -// rows[row_start, row_end) given a tolerance of tolerance. -static void CalculateTabStops(GenericVector *rows, - int row_start, int row_end, int tolerance, - GenericVector *left_tabs, - GenericVector *right_tabs) { - if (!AcceptableRowArgs(0, 1, __func__, rows, row_start, row_end)) - return; - // First pass: toss all left and right indents into clusterers. - SimpleClusterer initial_lefts(tolerance); - SimpleClusterer initial_rights(tolerance); - GenericVector initial_left_tabs; - GenericVector initial_right_tabs; - for (int i = row_start; i < row_end; i++) { - initial_lefts.Add((*rows)[i].lindent_); - initial_rights.Add((*rows)[i].rindent_); - } - initial_lefts.GetClusters(&initial_left_tabs); - initial_rights.GetClusters(&initial_right_tabs); - - // Second pass: cluster only lines that are not "stray" - // An example of a stray line is a page number -- a line whose start - // and end tab-stops are far outside the typical start and end tab-stops - // for the block. - // Put another way, we only cluster data from lines whose start or end - // tab stop is frequent. - SimpleClusterer lefts(tolerance); - SimpleClusterer rights(tolerance); - - // Outlier elimination. We might want to switch this to test outlier-ness - // based on how strange a position an outlier is in instead of or in addition - // to how rare it is. These outliers get re-added if we end up having too - // few tab stops, to work with, however. - int infrequent_enough_to_ignore = 0; - if (row_end - row_start >= 8) infrequent_enough_to_ignore = 1; - if (row_end - row_start >= 20) infrequent_enough_to_ignore = 2; - - for (int i = row_start; i < row_end; i++) { - int lidx = ClosestCluster(initial_left_tabs, (*rows)[i].lindent_); - int ridx = ClosestCluster(initial_right_tabs, (*rows)[i].rindent_); - if (initial_left_tabs[lidx].count > infrequent_enough_to_ignore || - initial_right_tabs[ridx].count > infrequent_enough_to_ignore) { - lefts.Add((*rows)[i].lindent_); - rights.Add((*rows)[i].rindent_); - } - } - lefts.GetClusters(left_tabs); - rights.GetClusters(right_tabs); - - if ((left_tabs->size() == 1 && right_tabs->size() >= 4) || - (right_tabs->size() == 1 && left_tabs->size() >= 4)) { - // One side is really ragged, and the other only has one tab stop, - // so those "insignificant outliers" are probably important, actually. - // This often happens on a page of an index. Add back in the ones - // we omitted in the first pass. - for (int i = row_start; i < row_end; i++) { - int lidx = ClosestCluster(initial_left_tabs, (*rows)[i].lindent_); - int ridx = ClosestCluster(initial_right_tabs, (*rows)[i].rindent_); - if (!(initial_left_tabs[lidx].count > infrequent_enough_to_ignore || - initial_right_tabs[ridx].count > infrequent_enough_to_ignore)) { - lefts.Add((*rows)[i].lindent_); - rights.Add((*rows)[i].rindent_); - } - } - } - lefts.GetClusters(left_tabs); - rights.GetClusters(right_tabs); - - // If one side is almost a two-indent aligned side, and the other clearly - // isn't, try to prune out the least frequent tab stop from that side. - if (left_tabs->size() == 3 && right_tabs->size() >= 4) { - int to_prune = -1; - for (int i = left_tabs->size() - 1; i >= 0; i--) { - if (to_prune < 0 || - (*left_tabs)[i].count < (*left_tabs)[to_prune].count) { - to_prune = i; - } - } - if (to_prune >= 0 && - (*left_tabs)[to_prune].count <= infrequent_enough_to_ignore) { - left_tabs->remove(to_prune); - } - } - if (right_tabs->size() == 3 && left_tabs->size() >= 4) { - int to_prune = -1; - for (int i = right_tabs->size() - 1; i >= 0; i--) { - if (to_prune < 0 || - (*right_tabs)[i].count < (*right_tabs)[to_prune].count) { - to_prune = i; - } - } - if (to_prune >= 0 && - (*right_tabs)[to_prune].count <= infrequent_enough_to_ignore) { - right_tabs->remove(to_prune); - } - } -} - -// Given a paragraph model mark rows[row_start, row_end) as said model -// start or body lines. -// -// Case 1: model->first_indent_ != model->body_indent_ -// Differentiating the paragraph start lines from the paragraph body lines in -// this case is easy, we just see how far each line is indented. -// -// Case 2: model->first_indent_ == model->body_indent_ -// Here, we find end-of-paragraph lines by looking for "short lines." -// What constitutes a "short line" changes depending on whether the text -// ragged-right[left] or fully justified (aligned left and right). -// -// Case 2a: Ragged Right (or Left) text. (eop_threshold == 0) -// We have a new paragraph it the first word would have at the end -// of the previous line. -// -// Case 2b: Fully Justified. (eop_threshold > 0) -// We mark a line as short (end of paragraph) if the offside indent -// is greater than eop_threshold. -static void MarkRowsWithModel(GenericVector *rows, - int row_start, int row_end, - const ParagraphModel *model, - bool ltr, int eop_threshold) { - if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) - return; - for (int row = row_start; row < row_end; row++) { - bool valid_first = ValidFirstLine(rows, row, model); - bool valid_body = ValidBodyLine(rows, row, model); - if (valid_first && !valid_body) { - (*rows)[row].AddStartLine(model); - } else if (valid_body && !valid_first) { - (*rows)[row].AddBodyLine(model); - } else if (valid_body && valid_first) { - bool after_eop = (row == row_start); - if (row > row_start) { - if (eop_threshold > 0) { - if (model->justification() == JUSTIFICATION_LEFT) { - after_eop = (*rows)[row - 1].rindent_ > eop_threshold; - } else { - after_eop = (*rows)[row - 1].lindent_ > eop_threshold; - } - } else { - after_eop = FirstWordWouldHaveFit((*rows)[row - 1], (*rows)[row], - model->justification()); - } - } - if (after_eop) { - (*rows)[row].AddStartLine(model); - } else { - (*rows)[row].AddBodyLine(model); - } - } else { - // Do nothing. Stray row. - } - } -} - -// GeometricClassifierState holds all of the information we'll use while -// trying to determine a paragraph model for the text lines in a block of -// text: -// + the rows under consideration [row_start, row_end) -// + the common left- and right-indent tab stops -// + does the block start out left-to-right or right-to-left -// Further, this struct holds the data we amass for the (single) ParagraphModel -// we'll assign to the text lines (assuming we get that far). -struct GeometricClassifierState { - GeometricClassifierState(int dbg_level, - GenericVector *r, - int r_start, int r_end) - : debug_level(dbg_level), rows(r), row_start(r_start), row_end(r_end), - margin(0) { - tolerance = InterwordSpace(*r, r_start, r_end); - CalculateTabStops(r, r_start, r_end, tolerance, - &left_tabs, &right_tabs); - if (debug_level >= 3) { - tprintf("Geometry: TabStop cluster tolerance = %d; " - "%d left tabs; %d right tabs\n", - tolerance, left_tabs.size(), right_tabs.size()); - } - ltr = (*r)[r_start].ri_->ltr; - } - - void AssumeLeftJustification() { - just = tesseract::JUSTIFICATION_LEFT; - margin = (*rows)[row_start].lmargin_; - } - - void AssumeRightJustification() { - just = tesseract::JUSTIFICATION_RIGHT; - margin = (*rows)[row_start].rmargin_; - } - - // Align tabs are the tab stops the text is aligned to. - const GenericVector &AlignTabs() const { - if (just == tesseract::JUSTIFICATION_RIGHT) return right_tabs; - return left_tabs; - } - - // Offside tabs are the tab stops opposite the tabs used to align the text. - // - // Note that for a left-to-right text which is aligned to the right such as - // this function comment, the offside tabs are the horizontal tab stops - // marking the beginning of ("Note", "this" and "marking"). - const GenericVector &OffsideTabs() const { - if (just == tesseract::JUSTIFICATION_RIGHT) return left_tabs; - return right_tabs; - } - - // Return whether the i'th row extends from the leftmost left tab stop - // to the right most right tab stop. - bool IsFullRow(int i) const { - return ClosestCluster(left_tabs, (*rows)[i].lindent_) == 0 && - ClosestCluster(right_tabs, (*rows)[i].rindent_) == 0; - } - - int AlignsideTabIndex(int row_idx) const { - return ClosestCluster(AlignTabs(), (*rows)[row_idx].AlignsideIndent(just)); - } - - // Given what we know about the paragraph justification (just), would the - // first word of row_b have fit at the end of row_a? - bool FirstWordWouldHaveFit(int row_a, int row_b) { - return ::tesseract::FirstWordWouldHaveFit( - (*rows)[row_a], (*rows)[row_b], just); - } - - void PrintRows() const { PrintRowRange(*rows, row_start, row_end); } - - void Fail(int min_debug_level, const char *why) const { - if (debug_level < min_debug_level) return; - tprintf("# %s\n", why); - PrintRows(); - } - - ParagraphModel Model() const { - return ParagraphModel(just, margin, first_indent, body_indent, tolerance); - } - - // We print out messages with a debug level at least as great as debug_level. - int debug_level; - - // The Geometric Classifier was asked to find a single paragraph model - // to fit the text rows (*rows)[row_start, row_end) - GenericVector *rows; - int row_start; - int row_end; - - // The amount by which we expect the text edge can vary and still be aligned. - int tolerance; - - // Is the script in this text block left-to-right? - // HORRIBLE ROUGH APPROXIMATION. TODO(eger): Improve - bool ltr; - - // These left and right tab stops were determined to be the common tab - // stops for the given text. - GenericVector left_tabs; - GenericVector right_tabs; - - // These are parameters we must determine to create a ParagraphModel. - tesseract::ParagraphJustification just; - int margin; - int first_indent; - int body_indent; - - // eop_threshold > 0 if the text is fully justified. See MarkRowsWithModel() - int eop_threshold; -}; - -// Given a section of text where strong textual clues did not help identifying -// paragraph breaks, and for which the left and right indents have exactly -// three tab stops between them, attempt to find the paragraph breaks based -// solely on the outline of the text and whether the script is left-to-right. -// -// Algorithm Detail: -// The selected rows are in the form of a rectangle except -// for some number of "short lines" of the same length: -// -// (A1) xxxxxxxxxxxxx (B1) xxxxxxxxxxxx -// xxxxxxxxxxx xxxxxxxxxx # A "short" line. -// xxxxxxxxxxxxx xxxxxxxxxxxx -// xxxxxxxxxxxxx xxxxxxxxxxxx -// -// We have a slightly different situation if the only short -// line is at the end of the excerpt. -// -// (A2) xxxxxxxxxxxxx (B2) xxxxxxxxxxxx -// xxxxxxxxxxxxx xxxxxxxxxxxx -// xxxxxxxxxxxxx xxxxxxxxxxxx -// xxxxxxxxxxx xxxxxxxxxx # A "short" line. -// -// We'll interpret these as follows based on the reasoning in the comment for -// GeometricClassify(): -// [script direction: first indent, body indent] -// (A1) LtR: 2,0 RtL: 0,0 (B1) LtR: 0,0 RtL: 2,0 -// (A2) LtR: 2,0 RtL: CrR (B2) LtR: CrL RtL: 2,0 -static void GeometricClassifyThreeTabStopTextBlock( - int debug_level, - GeometricClassifierState &s, - ParagraphTheory *theory) { - int num_rows = s.row_end - s.row_start; - int num_full_rows = 0; - int last_row_full = 0; - for (int i = s.row_start; i < s.row_end; i++) { - if (s.IsFullRow(i)) { - num_full_rows++; - if (i == s.row_end - 1) last_row_full++; - } - } - - if (num_full_rows < 0.7 * num_rows) { - s.Fail(1, "Not enough full lines to know which lines start paras."); - return; - } - - // eop_threshold gets set if we're fully justified; see MarkRowsWithModel() - s.eop_threshold = 0; - - if (s.ltr) { - s.AssumeLeftJustification(); - } else { - s.AssumeRightJustification(); - } - - if (debug_level > 0) { - tprintf("# Not enough variety for clear outline classification. " - "Guessing these are %s aligned based on script.\n", - s.ltr ? "left" : "right"); - s.PrintRows(); - } - - if (s.AlignTabs().size() == 2) { // case A1 or A2 - s.first_indent = s.AlignTabs()[1].center; - s.body_indent = s.AlignTabs()[0].center; - } else { // case B1 or B2 - if (num_rows - 1 == num_full_rows - last_row_full) { - // case B2 - const ParagraphModel *model = s.ltr ? kCrownLeft : kCrownRight; - (*s.rows)[s.row_start].AddStartLine(model); - for (int i = s.row_start + 1; i < s.row_end; i++) { - (*s.rows)[i].AddBodyLine(model); - } - return; - } else { - // case B1 - s.first_indent = s.body_indent = s.AlignTabs()[0].center; - s.eop_threshold = (s.OffsideTabs()[0].center + - s.OffsideTabs()[1].center) / 2; - } - } - const ParagraphModel *model = theory->AddModel(s.Model()); - MarkRowsWithModel(s.rows, s.row_start, s.row_end, model, - s.ltr, s.eop_threshold); - return; -} - -// This function is called if strong textual clues were not available, but -// the caller hopes that the paragraph breaks will be super obvious just -// by the outline of the text. -// -// The particularly difficult case is figuring out what's going on if you -// don't have enough short paragraph end lines to tell us what's going on. -// -// For instance, let's say you have the following outline: -// -// (A1) xxxxxxxxxxxxxxxxxxxxxx -// xxxxxxxxxxxxxxxxxxxx -// xxxxxxxxxxxxxxxxxxxxxx -// xxxxxxxxxxxxxxxxxxxxxx -// -// Even if we know that the text is left-to-right and so will probably be -// left-aligned, both of the following are possible texts: -// -// (A1a) 1. Here our list item -// with two full lines. -// 2. Here a second item. -// 3. Here our third one. -// -// (A1b) so ends paragraph one. -// Here starts another -// paragraph we want to -// read. This continues -// -// These examples are obvious from the text and should have been caught -// by the StrongEvidenceClassify pass. However, for languages where we don't -// have capital letters to go on (e.g. Hebrew, Arabic, Hindi, Chinese), -// it's worth guessing that (A1b) is the correct interpretation if there are -// far more "full" lines than "short" lines. -static void GeometricClassify(int debug_level, - GenericVector *rows, - int row_start, int row_end, - ParagraphTheory *theory) { - if (!AcceptableRowArgs(debug_level, 4, __func__, rows, row_start, row_end)) - return; - if (debug_level > 1) { - tprintf("###############################################\n"); - tprintf("##### GeometricClassify( rows[%d:%d) ) ####\n", - row_start, row_end); - tprintf("###############################################\n"); - } - RecomputeMarginsAndClearHypotheses(rows, row_start, row_end, 10); - - GeometricClassifierState s(debug_level, rows, row_start, row_end); - if (s.left_tabs.size() > 2 && s.right_tabs.size() > 2) { - s.Fail(2, "Too much variety for simple outline classification."); - return; - } - if (s.left_tabs.size() <= 1 && s.right_tabs.size() <= 1) { - s.Fail(1, "Not enough variety for simple outline classification."); - return; - } - if (s.left_tabs.size() + s.right_tabs.size() == 3) { - GeometricClassifyThreeTabStopTextBlock(debug_level, s, theory); - return; - } - - // At this point, we know that one side has at least two tab stops, and the - // other side has one or two tab stops. - // Left to determine: - // (1) Which is the body indent and which is the first line indent? - // (2) Is the text fully justified? - - // If one side happens to have three or more tab stops, assume that side - // is opposite of the aligned side. - if (s.right_tabs.size() > 2) { - s.AssumeLeftJustification(); - } else if (s.left_tabs.size() > 2) { - s.AssumeRightJustification(); - } else if (s.ltr) { // guess based on script direction - s.AssumeLeftJustification(); - } else { - s.AssumeRightJustification(); - } - - if (s.AlignTabs().size() == 2) { - // For each tab stop on the aligned side, how many of them appear - // to be paragraph start lines? [first lines] - int firsts[2] = {0, 0}; - // Count the first line as a likely paragraph start line. - firsts[s.AlignsideTabIndex(s.row_start)]++; - // For each line, if the first word would have fit on the previous - // line count it as a likely paragraph start line. - bool jam_packed = true; - for (int i = s.row_start + 1; i < s.row_end; i++) { - if (s.FirstWordWouldHaveFit(i - 1, i)) { - firsts[s.AlignsideTabIndex(i)]++; - jam_packed = false; - } - } - // Make an extra accounting for the last line of the paragraph just - // in case it's the only short line in the block. That is, take its - // first word as typical and see if this looks like the *last* line - // of a paragraph. If so, mark the *other* indent as probably a first. - if (jam_packed && s.FirstWordWouldHaveFit(s.row_end - 1, s.row_end - 1)) { - firsts[1 - s.AlignsideTabIndex(s.row_end - 1)]++; - } - - int percent0firsts, percent1firsts; - percent0firsts = (100 * firsts[0]) / s.AlignTabs()[0].count; - percent1firsts = (100 * firsts[1]) / s.AlignTabs()[1].count; - - // TODO(eger): Tune these constants if necessary. - if ((percent0firsts < 20 && 30 < percent1firsts) || - percent0firsts + 30 < percent1firsts) { - s.first_indent = s.AlignTabs()[1].center; - s.body_indent = s.AlignTabs()[0].center; - } else if ((percent1firsts < 20 && 30 < percent0firsts) || - percent1firsts + 30 < percent0firsts) { - s.first_indent = s.AlignTabs()[0].center; - s.body_indent = s.AlignTabs()[1].center; - } else { - // Ambiguous! Probably lineated (poetry) - if (debug_level > 1) { - tprintf("# Cannot determine %s indent likely to start paragraphs.\n", - s.just == tesseract::JUSTIFICATION_LEFT ? "left" : "right"); - tprintf("# Indent of %d looks like a first line %d%% of the time.\n", - s.AlignTabs()[0].center, percent0firsts); - tprintf("# Indent of %d looks like a first line %d%% of the time.\n", - s.AlignTabs()[1].center, percent1firsts); - s.PrintRows(); - } - return; - } - } else { - // There's only one tab stop for the "aligned to" side. - s.first_indent = s.body_indent = s.AlignTabs()[0].center; - } - - // At this point, we have our model. - const ParagraphModel *model = theory->AddModel(s.Model()); - - // Now all we have to do is figure out if the text is fully justified or not. - // eop_threshold: default to fully justified unless we see evidence below. - // See description on MarkRowsWithModel() - s.eop_threshold = - (s.OffsideTabs()[0].center + s.OffsideTabs()[1].center) / 2; - // If the text is not fully justified, re-set the eop_threshold to 0. - if (s.AlignTabs().size() == 2) { - // Paragraphs with a paragraph-start indent. - for (int i = s.row_start; i < s.row_end - 1; i++) { - if (ValidFirstLine(s.rows, i + 1, model) && - !NearlyEqual(s.OffsideTabs()[0].center, - (*s.rows)[i].OffsideIndent(s.just), s.tolerance)) { - // We found a non-end-of-paragraph short line: not fully justified. - s.eop_threshold = 0; - break; - } - } - } else { - // Paragraphs with no paragraph-start indent. - for (int i = s.row_start; i < s.row_end - 1; i++) { - if (!s.FirstWordWouldHaveFit(i, i + 1) && - !NearlyEqual(s.OffsideTabs()[0].center, - (*s.rows)[i].OffsideIndent(s.just), s.tolerance)) { - // We found a non-end-of-paragraph short line: not fully justified. - s.eop_threshold = 0; - break; - } - } - } - MarkRowsWithModel(rows, row_start, row_end, model, s.ltr, s.eop_threshold); -} - -// =============== Implementation of ParagraphTheory ===================== - -const ParagraphModel *ParagraphTheory::AddModel(const ParagraphModel &model) { - for (int i = 0; i < models_->size(); i++) { - if ((*models_)[i]->Comparable(model)) - return (*models_)[i]; - } - ParagraphModel *m = new ParagraphModel(model); - models_->push_back(m); - models_we_added_.push_back_new(m); - return m; -} - -void ParagraphTheory::DiscardUnusedModels(const SetOfModels &used_models) { - for (int i = models_->size() - 1; i >= 0; i--) { - ParagraphModel *m = (*models_)[i]; - if (!used_models.contains(m) && models_we_added_.contains(m)) { - models_->remove(i); - models_we_added_.remove(models_we_added_.get_index(m)); - delete m; - } - } -} - -// Examine rows[start, end) and try to determine if an existing non-centered -// paragraph model would fit them perfectly. If so, return a pointer to it. -// If not, return nullptr. -const ParagraphModel *ParagraphTheory::Fits( - const GenericVector *rows, int start, int end) const { - for (int m = 0; m < models_->size(); m++) { - const ParagraphModel *model = (*models_)[m]; - if (model->justification() != JUSTIFICATION_CENTER && - RowsFitModel(rows, start, end, model)) - return model; - } - return nullptr; -} - -void ParagraphTheory::NonCenteredModels(SetOfModels *models) { - for (int m = 0; m < models_->size(); m++) { - const ParagraphModel *model = (*models_)[m]; - if (model->justification() != JUSTIFICATION_CENTER) - models->push_back_new(model); - } -} - -int ParagraphTheory::IndexOf(const ParagraphModel *model) const { - for (int i = 0; i < models_->size(); i++) { - if ((*models_)[i] == model) - return i; - } - return -1; -} - -bool ValidFirstLine(const GenericVector *rows, - int row, const ParagraphModel *model) { - if (!StrongModel(model)) { - tprintf("ValidFirstLine() should only be called with strong models!\n"); - } - return StrongModel(model) && - model->ValidFirstLine( - (*rows)[row].lmargin_, (*rows)[row].lindent_, - (*rows)[row].rindent_, (*rows)[row].rmargin_); -} - -bool ValidBodyLine(const GenericVector *rows, - int row, const ParagraphModel *model) { - if (!StrongModel(model)) { - tprintf("ValidBodyLine() should only be called with strong models!\n"); - } - return StrongModel(model) && - model->ValidBodyLine( - (*rows)[row].lmargin_, (*rows)[row].lindent_, - (*rows)[row].rindent_, (*rows)[row].rmargin_); -} - -bool CrownCompatible(const GenericVector *rows, - int a, int b, const ParagraphModel *model) { - if (model != kCrownRight && model != kCrownLeft) { - tprintf("CrownCompatible() should only be called with crown models!\n"); - return false; - } - RowScratchRegisters &row_a = (*rows)[a]; - RowScratchRegisters &row_b = (*rows)[b]; - if (model == kCrownRight) { - return NearlyEqual(row_a.rindent_ + row_a.rmargin_, - row_b.rindent_ + row_b.rmargin_, - Epsilon(row_a.ri_->average_interword_space)); - } - return NearlyEqual(row_a.lindent_ + row_a.lmargin_, - row_b.lindent_ + row_b.lmargin_, - Epsilon(row_a.ri_->average_interword_space)); -} - - -// =============== Implementation of ParagraphModelSmearer ==================== - -ParagraphModelSmearer::ParagraphModelSmearer( - GenericVector *rows, - int row_start, int row_end, ParagraphTheory *theory) - : theory_(theory), rows_(rows), row_start_(row_start), - row_end_(row_end) { - if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) { - row_start_ = 0; - row_end_ = 0; - return; - } - SetOfModels no_models; - for (int row = row_start - 1; row <= row_end; row++) { - open_models_.push_back(no_models); - } -} - -// see paragraphs_internal.h -void ParagraphModelSmearer::CalculateOpenModels(int row_start, int row_end) { - SetOfModels no_models; - if (row_start < row_start_) row_start = row_start_; - if (row_end > row_end_) row_end = row_end_; - - for (int row = (row_start > 0) ? row_start - 1 : row_start; row < row_end; - row++) { - if ((*rows_)[row].ri_->num_words == 0) { - OpenModels(row + 1) = no_models; - } else { - SetOfModels &opened = OpenModels(row); - (*rows_)[row].StartHypotheses(&opened); - - // Which models survive the transition from row to row + 1? - SetOfModels still_open; - for (int m = 0; m < opened.size(); m++) { - if (ValidFirstLine(rows_, row, opened[m]) || - ValidBodyLine(rows_, row, opened[m])) { - // This is basic filtering; we check likely paragraph starty-ness down - // below in Smear() -- you know, whether the first word would have fit - // and such. - still_open.push_back_new(opened[m]); - } - } - OpenModels(row + 1) = still_open; - } - } -} - -// see paragraphs_internal.h -void ParagraphModelSmearer::Smear() { - CalculateOpenModels(row_start_, row_end_); - - // For each row which we're unsure about (that is, it is LT_UNKNOWN or - // we have multiple LT_START hypotheses), see if there's a model that - // was recently used (an "open" model) which might model it well. - for (int i = row_start_; i < row_end_; i++) { - RowScratchRegisters &row = (*rows_)[i]; - if (row.ri_->num_words == 0) - continue; - - // Step One: - // Figure out if there are "open" models which are left-alined or - // right-aligned. This is important for determining whether the - // "first" word in a row would fit at the "end" of the previous row. - bool left_align_open = false; - bool right_align_open = false; - for (int m = 0; m < OpenModels(i).size(); m++) { - switch (OpenModels(i)[m]->justification()) { - case JUSTIFICATION_LEFT: left_align_open = true; break; - case JUSTIFICATION_RIGHT: right_align_open = true; break; - default: left_align_open = right_align_open = true; - } - } - // Step Two: - // Use that knowledge to figure out if this row is likely to - // start a paragraph. - bool likely_start; - if (i == 0) { - likely_start = true; - } else { - if ((left_align_open && right_align_open) || - (!left_align_open && !right_align_open)) { - likely_start = LikelyParagraphStart((*rows_)[i - 1], row, - JUSTIFICATION_LEFT) || - LikelyParagraphStart((*rows_)[i - 1], row, - JUSTIFICATION_RIGHT); - } else if (left_align_open) { - likely_start = LikelyParagraphStart((*rows_)[i - 1], row, - JUSTIFICATION_LEFT); - } else { - likely_start = LikelyParagraphStart((*rows_)[i - 1], row, - JUSTIFICATION_RIGHT); - } - } - - // Step Three: - // If this text line seems like an obvious first line of an - // open model, or an obvious continuation of an existing - // modelled paragraph, mark it up. - if (likely_start) { - // Add Start Hypotheses for all Open models that fit. - for (int m = 0; m < OpenModels(i).size(); m++) { - if (ValidFirstLine(rows_, i, OpenModels(i)[m])) { - row.AddStartLine(OpenModels(i)[m]); - } - } - } else { - // Add relevant body line hypotheses. - SetOfModels last_line_models; - if (i > 0) { - (*rows_)[i - 1].StrongHypotheses(&last_line_models); - } else { - theory_->NonCenteredModels(&last_line_models); - } - for (int m = 0; m < last_line_models.size(); m++) { - const ParagraphModel *model = last_line_models[m]; - if (ValidBodyLine(rows_, i, model)) - row.AddBodyLine(model); - } - } - - // Step Four: - // If we're still quite unsure about this line, go through all - // models in our theory and see if this row could be the start - // of any of our models. - if (row.GetLineType() == LT_UNKNOWN || - (row.GetLineType() == LT_START && !row.UniqueStartHypothesis())) { - SetOfModels all_models; - theory_->NonCenteredModels(&all_models); - for (int m = 0; m < all_models.size(); m++) { - if (ValidFirstLine(rows_, i, all_models[m])) { - row.AddStartLine(all_models[m]); - } - } - } - // Step Five: - // Since we may have updated the hypotheses about this row, we need - // to recalculate the Open models for the rest of rows[i + 1, row_end) - if (row.GetLineType() != LT_UNKNOWN) { - CalculateOpenModels(i + 1, row_end_); - } - } -} - -// ================ Main Paragraph Detection Algorithm ======================= - -// Find out what ParagraphModels are actually used, and discard any -// that are not. -static void DiscardUnusedModels(const GenericVector &rows, - ParagraphTheory *theory) { - SetOfModels used_models; - for (int i = 0; i < rows.size(); i++) { - rows[i].StrongHypotheses(&used_models); - } - theory->DiscardUnusedModels(used_models); -} - -// DowngradeWeakestToCrowns: -// Forget any flush-{left, right} models unless we see two or more -// of them in sequence. -// -// In pass 3, we start to classify even flush-left paragraphs (paragraphs -// where the first line and body indent are the same) as having proper Models. -// This is generally dangerous, since if you start imagining that flush-left -// is a typical paragraph model when it is not, it will lead you to chop normal -// indented paragraphs in the middle whenever a sentence happens to start on a -// new line (see "This" above). What to do? -// What we do is to take any paragraph which is flush left and is not -// preceded by another paragraph of the same model and convert it to a "Crown" -// paragraph. This is a weak pseudo-ParagraphModel which is a placeholder -// for later. It means that the paragraph is flush, but it would be desirable -// to mark it as the same model as following text if it fits. This downgrade -// FlushLeft -> CrownLeft -> Model of following paragraph. Means that we -// avoid making flush left Paragraph Models whenever we see a top-of-the-page -// half-of-a-paragraph. and instead we mark it the same as normal body text. -// -// Implementation: -// -// Comb backwards through the row scratch registers, and turn any -// sequences of body lines of equivalent type abutted against the beginning -// or a body or start line of a different type into a crown paragraph. -static void DowngradeWeakestToCrowns(int debug_level, ParagraphTheory *theory, - GenericVector *rows) { - int start; - for (int end = rows->size(); end > 0; end = start) { - // Search back for a body line of a unique type. - const ParagraphModel *model = nullptr; - while (end > 0 && - (model = (*rows)[end - 1].UniqueBodyHypothesis()) == nullptr) { - end--; - } - if (end == 0) break; - start = end - 1; - while (start >= 0 && (*rows)[start].UniqueBodyHypothesis() == model) { - start--; // walk back to the first line that is not the same body type. - } - if (start >= 0 && (*rows)[start].UniqueStartHypothesis() == model && - StrongModel(model) && - NearlyEqual(model->first_indent(), model->body_indent(), - model->tolerance())) { - start--; - } - start++; - // Now rows[start, end) is a sequence of unique body hypotheses of model. - if (StrongModel(model) && model->justification() == JUSTIFICATION_CENTER) - continue; - if (!StrongModel(model)) { - while (start > 0 && - CrownCompatible(rows, start - 1, start, model)) - start--; - } - if (start == 0 || - (!StrongModel(model)) || - (StrongModel(model) && !ValidFirstLine(rows, start - 1, model))) { - // crownify rows[start, end) - const ParagraphModel *crown_model = model; - if (StrongModel(model)) { - if (model->justification() == JUSTIFICATION_LEFT) - crown_model = kCrownLeft; - else - crown_model = kCrownRight; - } - (*rows)[start].SetUnknown(); - (*rows)[start].AddStartLine(crown_model); - for (int row = start + 1; row < end; row++) { - (*rows)[row].SetUnknown(); - (*rows)[row].AddBodyLine(crown_model); - } - } - } - DiscardUnusedModels(*rows, theory); -} - - -// Clear all hypotheses about lines [start, end) and reset margins. -// -// The empty space between the left of a row and the block boundary (and -// similarly for the right) is split into two pieces: margin and indent. -// In initial processing, we assume the block is tight and the margin for -// all lines is set to zero. However, if our first pass does not yield -// models for everything, it may be due to an inset paragraph like a -// block-quote. In that case, we make a second pass over that unmarked -// section of the page and reset the "margin" portion of the empty space -// to the common amount of space at the ends of the lines under consid- -// eration. This would be equivalent to percentile set to 0. However, -// sometimes we have a single character sticking out in the right margin -// of a text block (like the 'r' in 'for' on line 3 above), and we can -// really just ignore it as an outlier. To express this, we allow the -// user to specify the percentile (0..100) of indent values to use as -// the common margin for each row in the run of rows[start, end). -void RecomputeMarginsAndClearHypotheses( - GenericVector *rows, int start, int end, - int percentile) { - if (!AcceptableRowArgs(0, 0, __func__, rows, start, end)) - return; - - int lmin, lmax, rmin, rmax; - lmin = lmax = (*rows)[start].lmargin_ + (*rows)[start].lindent_; - rmin = rmax = (*rows)[start].rmargin_ + (*rows)[start].rindent_; - for (int i = start; i < end; i++) { - RowScratchRegisters &sr = (*rows)[i]; - sr.SetUnknown(); - if (sr.ri_->num_words == 0) - continue; - UpdateRange(sr.lmargin_ + sr.lindent_, &lmin, &lmax); - UpdateRange(sr.rmargin_ + sr.rindent_, &rmin, &rmax); - } - STATS lefts(lmin, lmax + 1); - STATS rights(rmin, rmax + 1); - for (int i = start; i < end; i++) { - RowScratchRegisters &sr = (*rows)[i]; - if (sr.ri_->num_words == 0) - continue; - lefts.add(sr.lmargin_ + sr.lindent_, 1); - rights.add(sr.rmargin_ + sr.rindent_, 1); - } - int ignorable_left = lefts.ile(ClipToRange(percentile, 0, 100) / 100.0); - int ignorable_right = rights.ile(ClipToRange(percentile, 0, 100) / 100.0); - for (int i = start; i < end; i++) { - RowScratchRegisters &sr = (*rows)[i]; - int ldelta = ignorable_left - sr.lmargin_; - sr.lmargin_ += ldelta; - sr.lindent_ -= ldelta; - int rdelta = ignorable_right - sr.rmargin_; - sr.rmargin_ += rdelta; - sr.rindent_ -= rdelta; - } -} - -// Return the median inter-word space in rows[row_start, row_end). -int InterwordSpace(const GenericVector &rows, - int row_start, int row_end) { - if (row_end < row_start + 1) return 1; - int word_height = (rows[row_start].ri_->lword_box.height() + - rows[row_end - 1].ri_->lword_box.height()) / 2; - int word_width = (rows[row_start].ri_->lword_box.width() + - rows[row_end - 1].ri_->lword_box.width()) / 2; - STATS spacing_widths(0, 5 + word_width); - for (int i = row_start; i < row_end; i++) { - if (rows[i].ri_->num_words > 1) { - spacing_widths.add(rows[i].ri_->average_interword_space, 1); - } - } - int minimum_reasonable_space = word_height / 3; - if (minimum_reasonable_space < 2) - minimum_reasonable_space = 2; - int median = spacing_widths.median(); - return (median > minimum_reasonable_space) - ? median : minimum_reasonable_space; -} - -// Return whether the first word on the after line can fit in the space at -// the end of the before line (knowing which way the text is aligned and read). -bool FirstWordWouldHaveFit(const RowScratchRegisters &before, - const RowScratchRegisters &after, - tesseract::ParagraphJustification justification) { - if (before.ri_->num_words == 0 || after.ri_->num_words == 0) - return true; - - if (justification == JUSTIFICATION_UNKNOWN) { - tprintf("Don't call FirstWordWouldHaveFit(r, s, JUSTIFICATION_UNKNOWN).\n"); - } - int available_space; - if (justification == JUSTIFICATION_CENTER) { - available_space = before.lindent_ + before.rindent_; - } else { - available_space = before.OffsideIndent(justification); - } - available_space -= before.ri_->average_interword_space; - - if (before.ri_->ltr) - return after.ri_->lword_box.width() < available_space; - return after.ri_->rword_box.width() < available_space; -} - -// Return whether the first word on the after line can fit in the space at -// the end of the before line (not knowing which way the text goes) in a left -// or right alignment. -bool FirstWordWouldHaveFit(const RowScratchRegisters &before, - const RowScratchRegisters &after) { - if (before.ri_->num_words == 0 || after.ri_->num_words == 0) - return true; - - int available_space = before.lindent_; - if (before.rindent_ > available_space) - available_space = before.rindent_; - available_space -= before.ri_->average_interword_space; - - if (before.ri_->ltr) - return after.ri_->lword_box.width() < available_space; - return after.ri_->rword_box.width() < available_space; -} - -static bool TextSupportsBreak(const RowScratchRegisters &before, - const RowScratchRegisters &after) { - if (before.ri_->ltr) { - return before.ri_->rword_likely_ends_idea && - after.ri_->lword_likely_starts_idea; - } else { - return before.ri_->lword_likely_ends_idea && - after.ri_->rword_likely_starts_idea; - } -} - -static bool LikelyParagraphStart(const RowScratchRegisters &before, - const RowScratchRegisters &after, - tesseract::ParagraphJustification j) { - return before.ri_->num_words == 0 || - (FirstWordWouldHaveFit(before, after, j) && - TextSupportsBreak(before, after)); -} - -// Examine rows[start, end) and try to determine what sort of ParagraphModel -// would fit them as a single paragraph. -// If we can't produce a unique model justification_ = JUSTIFICATION_UNKNOWN. -// If the rows given could be a consistent start to a paragraph, set *consistent -// true. -static ParagraphModel InternalParagraphModelByOutline( - const GenericVector *rows, - int start, int end, int tolerance, bool *consistent) { - int ltr_line_count = 0; - for (int i = start; i < end; i++) { - ltr_line_count += static_cast((*rows)[i].ri_->ltr); - } - bool ltr = (ltr_line_count >= (end - start) / 2); - - *consistent = true; - if (!AcceptableRowArgs(0, 2, __func__, rows, start, end)) - return ParagraphModel(); - - // Ensure the caller only passed us a region with a common rmargin and - // lmargin. - int lmargin = (*rows)[start].lmargin_; - int rmargin = (*rows)[start].rmargin_; - int lmin, lmax, rmin, rmax, cmin, cmax; - lmin = lmax = (*rows)[start + 1].lindent_; - rmin = rmax = (*rows)[start + 1].rindent_; - cmin = cmax = 0; - for (int i = start + 1; i < end; i++) { - if ((*rows)[i].lmargin_ != lmargin || (*rows)[i].rmargin_ != rmargin) { - tprintf("Margins don't match! Software error.\n"); - *consistent = false; - return ParagraphModel(); - } - UpdateRange((*rows)[i].lindent_, &lmin, &lmax); - UpdateRange((*rows)[i].rindent_, &rmin, &rmax); - UpdateRange((*rows)[i].rindent_ - (*rows)[i].lindent_, &cmin, &cmax); - } - int ldiff = lmax - lmin; - int rdiff = rmax - rmin; - int cdiff = cmax - cmin; - if (rdiff > tolerance && ldiff > tolerance) { - if (cdiff < tolerance * 2) { - if (end - start < 3) - return ParagraphModel(); - return ParagraphModel(JUSTIFICATION_CENTER, 0, 0, 0, tolerance); - } - *consistent = false; - return ParagraphModel(); - } - if (end - start < 3) // Don't return a model for two line paras. - return ParagraphModel(); - - // These booleans keep us from saying something is aligned left when the body - // left variance is too large. - bool body_admits_left_alignment = ldiff < tolerance; - bool body_admits_right_alignment = rdiff < tolerance; - - ParagraphModel left_model = - ParagraphModel(JUSTIFICATION_LEFT, lmargin, (*rows)[start].lindent_, - (lmin + lmax) / 2, tolerance); - ParagraphModel right_model = - ParagraphModel(JUSTIFICATION_RIGHT, rmargin, (*rows)[start].rindent_, - (rmin + rmax) / 2, tolerance); - - // These booleans keep us from having an indent on the "wrong side" for the - // first line. - bool text_admits_left_alignment = ltr || left_model.is_flush(); - bool text_admits_right_alignment = !ltr || right_model.is_flush(); - - // At least one of the edges is less than tolerance in variance. - // If the other is obviously ragged, it can't be the one aligned to. - // [Note the last line is included in this raggedness.] - if (tolerance < rdiff) { - if (body_admits_left_alignment && text_admits_left_alignment) - return left_model; - *consistent = false; - return ParagraphModel(); - } - if (tolerance < ldiff) { - if (body_admits_right_alignment && text_admits_right_alignment) - return right_model; - *consistent = false; - return ParagraphModel(); - } - - // At this point, we know the body text doesn't vary much on either side. - - // If the first line juts out oddly in one direction or the other, - // that likely indicates the side aligned to. - int first_left = (*rows)[start].lindent_; - int first_right = (*rows)[start].rindent_; - - if (ltr && body_admits_left_alignment && - (first_left < lmin || first_left > lmax)) - return left_model; - if (!ltr && body_admits_right_alignment && - (first_right < rmin || first_right > rmax)) - return right_model; - - *consistent = false; - return ParagraphModel(); -} - -// Examine rows[start, end) and try to determine what sort of ParagraphModel -// would fit them as a single paragraph. If nothing fits, -// justification_ = JUSTIFICATION_UNKNOWN and print the paragraph to debug -// output if we're debugging. -static ParagraphModel ParagraphModelByOutline( - int debug_level, - const GenericVector *rows, - int start, int end, int tolerance) { - bool unused_consistent; - ParagraphModel retval = InternalParagraphModelByOutline( - rows, start, end, tolerance, &unused_consistent); - if (debug_level >= 2 && retval.justification() == JUSTIFICATION_UNKNOWN) { - tprintf("Could not determine a model for this paragraph:\n"); - PrintRowRange(*rows, start, end); - } - return retval; -} - -// Do rows[start, end) form a single instance of the given paragraph model? -bool RowsFitModel(const GenericVector *rows, - int start, int end, const ParagraphModel *model) { - if (!AcceptableRowArgs(0, 1, __func__, rows, start, end)) - return false; - if (!ValidFirstLine(rows, start, model)) return false; - for (int i = start + 1 ; i < end; i++) { - if (!ValidBodyLine(rows, i, model)) return false; - } - return true; -} - -// Examine rows[row_start, row_end) as an independent section of text, -// and mark rows that are exceptionally clear as start-of-paragraph -// and paragraph-body lines. -// -// We presume that any lines surrounding rows[row_start, row_end) may -// have wildly different paragraph models, so we don't key any data off -// of those lines. -// -// We only take the very strongest signals, as we don't want to get -// confused and marking up centered text, poetry, or source code as -// clearly part of a typical paragraph. -static void MarkStrongEvidence(GenericVector *rows, - int row_start, int row_end) { - // Record patently obvious body text. - for (int i = row_start + 1; i < row_end; i++) { - const RowScratchRegisters &prev = (*rows)[i - 1]; - RowScratchRegisters &curr = (*rows)[i]; - tesseract::ParagraphJustification typical_justification = - prev.ri_->ltr ? JUSTIFICATION_LEFT : JUSTIFICATION_RIGHT; - if (!curr.ri_->rword_likely_starts_idea && - !curr.ri_->lword_likely_starts_idea && - !FirstWordWouldHaveFit(prev, curr, typical_justification)) { - curr.SetBodyLine(); - } - } - - // Record patently obvious start paragraph lines. - // - // It's an extremely good signal of the start of a paragraph that - // the first word would have fit on the end of the previous line. - // However, applying just that signal would have us mark random - // start lines of lineated text (poetry and source code) and some - // centered headings as paragraph start lines. Therefore, we use - // a second qualification for a paragraph start: Not only should - // the first word of this line have fit on the previous line, - // but also, this line should go full to the right of the block, - // disallowing a subsequent word from having fit on this line. - - // First row: - { - RowScratchRegisters &curr = (*rows)[row_start]; - RowScratchRegisters &next = (*rows)[row_start + 1]; - tesseract::ParagraphJustification j = - curr.ri_->ltr ? JUSTIFICATION_LEFT : JUSTIFICATION_RIGHT; - if (curr.GetLineType() == LT_UNKNOWN && - !FirstWordWouldHaveFit(curr, next, j) && - (curr.ri_->lword_likely_starts_idea || - curr.ri_->rword_likely_starts_idea)) { - curr.SetStartLine(); - } - } - // Middle rows - for (int i = row_start + 1; i < row_end - 1; i++) { - RowScratchRegisters &prev = (*rows)[i - 1]; - RowScratchRegisters &curr = (*rows)[i]; - RowScratchRegisters &next = (*rows)[i + 1]; - tesseract::ParagraphJustification j = - curr.ri_->ltr ? JUSTIFICATION_LEFT : JUSTIFICATION_RIGHT; - if (curr.GetLineType() == LT_UNKNOWN && - !FirstWordWouldHaveFit(curr, next, j) && - LikelyParagraphStart(prev, curr, j)) { - curr.SetStartLine(); - } - } - // Last row - { // the short circuit at the top means we have at least two lines. - RowScratchRegisters &prev = (*rows)[row_end - 2]; - RowScratchRegisters &curr = (*rows)[row_end - 1]; - tesseract::ParagraphJustification j = - curr.ri_->ltr ? JUSTIFICATION_LEFT : JUSTIFICATION_RIGHT; - if (curr.GetLineType() == LT_UNKNOWN && - !FirstWordWouldHaveFit(curr, curr, j) && - LikelyParagraphStart(prev, curr, j)) { - curr.SetStartLine(); - } - } -} - -// Look for sequences of a start line followed by some body lines in -// rows[row_start, row_end) and create ParagraphModels for them if -// they seem coherent. -static void ModelStrongEvidence(int debug_level, - GenericVector *rows, - int row_start, int row_end, - bool allow_flush_models, - ParagraphTheory *theory) { - if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end)) - return; - - int start = row_start; - while (start < row_end) { - while (start < row_end && (*rows)[start].GetLineType() != LT_START) - start++; - if (start >= row_end - 1) - break; - - int tolerance = Epsilon((*rows)[start + 1].ri_->average_interword_space); - int end = start; - ParagraphModel last_model; - bool next_consistent; - do { - ++end; - // rows[row, end) was consistent. - // If rows[row, end + 1) is not consistent, - // just model rows[row, end) - if (end < row_end - 1) { - RowScratchRegisters &next = (*rows)[end]; - LineType lt = next.GetLineType(); - next_consistent = lt == LT_BODY || - (lt == LT_UNKNOWN && - !FirstWordWouldHaveFit((*rows)[end - 1], (*rows)[end])); - } else { - next_consistent = false; - } - if (next_consistent) { - ParagraphModel next_model = InternalParagraphModelByOutline( - rows, start, end + 1, tolerance, &next_consistent); - if (((*rows)[start].ri_->ltr && - last_model.justification() == JUSTIFICATION_LEFT && - next_model.justification() != JUSTIFICATION_LEFT) || - (!(*rows)[start].ri_->ltr && - last_model.justification() == JUSTIFICATION_RIGHT && - next_model.justification() != JUSTIFICATION_RIGHT)) { - next_consistent = false; - } - last_model = next_model; - } else { - next_consistent = false; - } - } while (next_consistent && end < row_end); - // At this point, rows[start, end) looked like it could have been a - // single paragraph. If we can make a good ParagraphModel for it, - // do so and mark this sequence with that model. - if (end > start + 1) { - // emit a new paragraph if we have more than one line. - const ParagraphModel *model = nullptr; - ParagraphModel new_model = ParagraphModelByOutline( - debug_level, rows, start, end, - Epsilon(InterwordSpace(*rows, start, end))); - if (new_model.justification() == JUSTIFICATION_UNKNOWN) { - // couldn't create a good model, oh well. - } else if (new_model.is_flush()) { - if (end == start + 2) { - // It's very likely we just got two paragraph starts in a row. - end = start + 1; - } else if (start == row_start) { - // Mark this as a Crown. - if (new_model.justification() == JUSTIFICATION_LEFT) { - model = kCrownLeft; - } else { - model = kCrownRight; - } - } else if (allow_flush_models) { - model = theory->AddModel(new_model); - } - } else { - model = theory->AddModel(new_model); - } - if (model) { - (*rows)[start].AddStartLine(model); - for (int i = start + 1; i < end; i++) { - (*rows)[i].AddBodyLine(model); - } - } - } - start = end; - } -} - -// We examine rows[row_start, row_end) and do the following: -// (1) Clear all existing hypotheses for the rows being considered. -// (2) Mark up any rows as exceptionally likely to be paragraph starts -// or paragraph body lines as such using both geometric and textual -// clues. -// (3) Form models for any sequence of start + continuation lines. -// (4) Smear the paragraph models to cover surrounding text. -static void StrongEvidenceClassify(int debug_level, - GenericVector *rows, - int row_start, int row_end, - ParagraphTheory *theory) { - if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end)) - return; - - if (debug_level > 1) { - tprintf("#############################################\n"); - tprintf("# StrongEvidenceClassify( rows[%d:%d) )\n", row_start, row_end); - tprintf("#############################################\n"); - } - - RecomputeMarginsAndClearHypotheses(rows, row_start, row_end, 10); - MarkStrongEvidence(rows, row_start, row_end); - - DebugDump(debug_level > 2, "Initial strong signals.", *theory, *rows); - - // Create paragraph models. - ModelStrongEvidence(debug_level, rows, row_start, row_end, false, theory); - - DebugDump(debug_level > 2, "Unsmeared hypotheses.s.", *theory, *rows); - - // At this point, some rows are marked up as paragraphs with model numbers, - // and some rows are marked up as either LT_START or LT_BODY. Now let's - // smear any good paragraph hypotheses forward and backward. - ParagraphModelSmearer smearer(rows, row_start, row_end, theory); - smearer.Smear(); -} - -static void SeparateSimpleLeaderLines(GenericVector *rows, - int row_start, int row_end, - ParagraphTheory *theory) { - for (int i = row_start + 1; i < row_end - 1; i++) { - if ((*rows)[i - 1].ri_->has_leaders && - (*rows)[i].ri_->has_leaders && - (*rows)[i + 1].ri_->has_leaders) { - const ParagraphModel *model = theory->AddModel( - ParagraphModel(JUSTIFICATION_UNKNOWN, 0, 0, 0, 0)); - (*rows)[i].AddStartLine(model); - } - } -} - -// Collect sequences of unique hypotheses in row registers and create proper -// paragraphs for them, referencing the paragraphs in row_owners. -static void ConvertHypothesizedModelRunsToParagraphs( - int debug_level, - const GenericVector &rows, - GenericVector *row_owners, - ParagraphTheory *theory) { - int end = rows.size(); - int start; - for (; end > 0; end = start) { - start = end - 1; - const ParagraphModel *model = nullptr; - // TODO(eger): Be smarter about dealing with multiple hypotheses. - bool single_line_paragraph = false; - SetOfModels models; - rows[start].NonNullHypotheses(&models); - if (!models.empty()) { - model = models[0]; - if (rows[start].GetLineType(model) != LT_BODY) - single_line_paragraph = true; - } - if (model && !single_line_paragraph) { - // walk back looking for more body lines and then a start line. - while (--start > 0 && rows[start].GetLineType(model) == LT_BODY) { - // do nothing - } - if (start < 0 || rows[start].GetLineType(model) != LT_START) { - model = nullptr; - } - } - if (model == nullptr) { - continue; - } - // rows[start, end) should be a paragraph. - PARA *p = new PARA(); - if (model == kCrownLeft || model == kCrownRight) { - p->is_very_first_or_continuation = true; - // Crown paragraph. - // If we can find an existing ParagraphModel that fits, use it, - // else create a new one. - for (int row = end; row < rows.size(); row++) { - if ((*row_owners)[row] && - (ValidBodyLine(&rows, start, (*row_owners)[row]->model) && - (start == 0 || - ValidFirstLine(&rows, start, (*row_owners)[row]->model)))) { - model = (*row_owners)[row]->model; - break; - } - } - if (model == kCrownLeft) { - // No subsequent model fits, so cons one up. - model = theory->AddModel(ParagraphModel( - JUSTIFICATION_LEFT, rows[start].lmargin_ + rows[start].lindent_, - 0, 0, Epsilon(rows[start].ri_->average_interword_space))); - } else if (model == kCrownRight) { - // No subsequent model fits, so cons one up. - model = theory->AddModel(ParagraphModel( - JUSTIFICATION_RIGHT, rows[start].rmargin_ + rows[start].rmargin_, - 0, 0, Epsilon(rows[start].ri_->average_interword_space))); - } - } - rows[start].SetUnknown(); - rows[start].AddStartLine(model); - for (int i = start + 1; i < end; i++) { - rows[i].SetUnknown(); - rows[i].AddBodyLine(model); - } - p->model = model; - p->has_drop_cap = rows[start].ri_->has_drop_cap; - p->is_list_item = - model->justification() == JUSTIFICATION_RIGHT - ? rows[start].ri_->rword_indicates_list_item - : rows[start].ri_->lword_indicates_list_item; - for (int row = start; row < end; row++) { - if ((*row_owners)[row] != nullptr) { - tprintf("Memory leak! ConvertHypothesizeModelRunsToParagraphs() called " - "more than once!\n"); - delete (*row_owners)[row]; - } - (*row_owners)[row] = p; - } - } -} - -struct Interval { - Interval() : begin(0), end(0) {} - Interval(int b, int e) : begin(b), end(e) {} - - int begin; - int end; -}; - -// Return whether rows[row] appears to be stranded, meaning that the evidence -// for this row is very weak due to context. For instance, two lines of source -// code may happen to be indented at the same tab vector as body text starts, -// leading us to think they are two start-of-paragraph lines. This is not -// optimal. However, we also don't want to mark a sequence of short dialog -// as "weak," so our heuristic is: -// (1) If a line is surrounded by lines of unknown type, it's weak. -// (2) If two lines in a row are start lines for a given paragraph type, but -// after that the same paragraph type does not continue, they're weak. -static bool RowIsStranded(const GenericVector &rows, - int row) { - SetOfModels row_models; - rows[row].StrongHypotheses(&row_models); - - for (int m = 0; m < row_models.size(); m++) { - bool all_starts = rows[row].GetLineType(); - int run_length = 1; - bool continues = true; - for (int i = row - 1; i >= 0 && continues; i--) { - SetOfModels models; - rows[i].NonNullHypotheses(&models); - switch (rows[i].GetLineType(row_models[m])) { - case LT_START: run_length++; break; - case LT_MULTIPLE: // explicit fall-through - case LT_BODY: run_length++; all_starts = false; break; - case LT_UNKNOWN: // explicit fall-through - default: continues = false; - } - } - continues = true; - for (int i = row + 1; i < rows.size() && continues; i++) { - SetOfModels models; - rows[i].NonNullHypotheses(&models); - switch (rows[i].GetLineType(row_models[m])) { - case LT_START: run_length++; break; - case LT_MULTIPLE: // explicit fall-through - case LT_BODY: run_length++; all_starts = false; break; - case LT_UNKNOWN: // explicit fall-through - default: continues = false; - } - } - if (run_length > 2 || (!all_starts && run_length > 1)) return false; - } - return true; -} - -// Go through rows[row_start, row_end) and gather up sequences that need better -// classification. -// + Sequences of non-empty rows without hypotheses. -// + Crown paragraphs not immediately followed by a strongly modeled line. -// + Single line paragraphs surrounded by text that doesn't match the -// model. -static void LeftoverSegments(const GenericVector &rows, - GenericVector *to_fix, - int row_start, int row_end) { - to_fix->clear(); - for (int i = row_start; i < row_end; i++) { - bool needs_fixing = false; - - SetOfModels models; - SetOfModels models_w_crowns; - rows[i].StrongHypotheses(&models); - rows[i].NonNullHypotheses(&models_w_crowns); - if (models.empty() && !models_w_crowns.empty()) { - // Crown paragraph. Is it followed by a modeled line? - for (int end = i + 1; end < rows.size(); end++) { - SetOfModels end_models; - SetOfModels strong_end_models; - rows[end].NonNullHypotheses(&end_models); - rows[end].StrongHypotheses(&strong_end_models); - if (end_models.empty()) { - needs_fixing = true; - break; - } else if (!strong_end_models.empty()) { - needs_fixing = false; - break; - } - } - } else if (models.empty() && rows[i].ri_->num_words > 0) { - // No models at all. - needs_fixing = true; - } - - if (!needs_fixing && !models.empty()) { - needs_fixing = RowIsStranded(rows, i); - } - - if (needs_fixing) { - if (!to_fix->empty() && to_fix->back().end == i - 1) - to_fix->back().end = i; - else - to_fix->push_back(Interval(i, i)); - } - } - // Convert inclusive intervals to half-open intervals. - for (int i = 0; i < to_fix->size(); i++) { - (*to_fix)[i].end = (*to_fix)[i].end + 1; - } -} - -// Given a set of row_owners pointing to PARAs or nullptr (no paragraph known), -// normalize each row_owner to point to an actual PARA, and output the -// paragraphs in order onto paragraphs. -void CanonicalizeDetectionResults( - GenericVector *row_owners, - PARA_LIST *paragraphs) { - GenericVector &rows = *row_owners; - paragraphs->clear(); - PARA_IT out(paragraphs); - PARA *formerly_null = nullptr; - for (int i = 0; i < rows.size(); i++) { - if (rows[i] == nullptr) { - if (i == 0 || rows[i - 1] != formerly_null) { - rows[i] = formerly_null = new PARA(); - } else { - rows[i] = formerly_null; - continue; - } - } else if (i > 0 && rows[i - 1] == rows[i]) { - continue; - } - out.add_after_then_move(rows[i]); - } -} - -// Main entry point for Paragraph Detection Algorithm. -// -// Given a set of equally spaced textlines (described by row_infos), -// Split them into paragraphs. -// -// Output: -// row_owners - one pointer for each row, to the paragraph it belongs to. -// paragraphs - this is the actual list of PARA objects. -// models - the list of paragraph models referenced by the PARA objects. -// caller is responsible for deleting the models. -void DetectParagraphs(int debug_level, - GenericVector *row_infos, - GenericVector *row_owners, - PARA_LIST *paragraphs, - GenericVector *models) { - GenericVector rows; - ParagraphTheory theory(models); - - // Initialize row_owners to be a bunch of nullptr pointers. - row_owners->init_to_size(row_infos->size(), nullptr); - - // Set up row scratch registers for the main algorithm. - rows.init_to_size(row_infos->size(), RowScratchRegisters()); - for (int i = 0; i < row_infos->size(); i++) { - rows[i].Init((*row_infos)[i]); - } - - // Pass 1: - // Detect sequences of lines that all contain leader dots (.....) - // These are likely Tables of Contents. If there are three text lines in - // a row with leader dots, it's pretty safe to say the middle one should - // be a paragraph of its own. - SeparateSimpleLeaderLines(&rows, 0, rows.size(), &theory); - - DebugDump(debug_level > 1, "End of Pass 1", theory, rows); - - GenericVector leftovers; - LeftoverSegments(rows, &leftovers, 0, rows.size()); - for (int i = 0; i < leftovers.size(); i++) { - // Pass 2a: - // Find any strongly evidenced start-of-paragraph lines. If they're - // followed by two lines that look like body lines, make a paragraph - // model for that and see if that model applies throughout the text - // (that is, "smear" it). - StrongEvidenceClassify(debug_level, &rows, - leftovers[i].begin, leftovers[i].end, &theory); - - // Pass 2b: - // If we had any luck in pass 2a, we got part of the page and didn't - // know how to classify a few runs of rows. Take the segments that - // didn't find a model and reprocess them individually. - GenericVector leftovers2; - LeftoverSegments(rows, &leftovers2, leftovers[i].begin, leftovers[i].end); - bool pass2a_was_useful = leftovers2.size() > 1 || - (leftovers2.size() == 1 && - (leftovers2[0].begin != 0 || leftovers2[0].end != rows.size())); - if (pass2a_was_useful) { - for (int j = 0; j < leftovers2.size(); j++) { - StrongEvidenceClassify(debug_level, &rows, - leftovers2[j].begin, leftovers2[j].end, - &theory); - } - } - } - - DebugDump(debug_level > 1, "End of Pass 2", theory, rows); - - // Pass 3: - // These are the dregs for which we didn't have enough strong textual - // and geometric clues to form matching models for. Let's see if - // the geometric clues are simple enough that we could just use those. - LeftoverSegments(rows, &leftovers, 0, rows.size()); - for (int i = 0; i < leftovers.size(); i++) { - GeometricClassify(debug_level, &rows, - leftovers[i].begin, leftovers[i].end, &theory); - } - - // Undo any flush models for which there's little evidence. - DowngradeWeakestToCrowns(debug_level, &theory, &rows); - - DebugDump(debug_level > 1, "End of Pass 3", theory, rows); - - // Pass 4: - // Take everything that's still not marked up well and clear all markings. - LeftoverSegments(rows, &leftovers, 0, rows.size()); - for (int i = 0; i < leftovers.size(); i++) { - for (int j = leftovers[i].begin; j < leftovers[i].end; j++) { - rows[j].SetUnknown(); - } - } - - DebugDump(debug_level > 1, "End of Pass 4", theory, rows); - - // Convert all of the unique hypothesis runs to PARAs. - ConvertHypothesizedModelRunsToParagraphs(debug_level, rows, row_owners, - &theory); - - DebugDump(debug_level > 0, "Final Paragraph Segmentation", theory, rows); - - // Finally, clean up any dangling nullptr row paragraph parents. - CanonicalizeDetectionResults(row_owners, paragraphs); -} - -// ============ Code interfacing with the rest of Tesseract ================== - -static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, - RowInfo *info) { - // Set up text, lword_text, and rword_text (mostly for debug printing). - STRING fake_text; - PageIterator pit(static_cast(it)); - bool first_word = true; - if (!pit.Empty(RIL_WORD)) { - do { - fake_text += "x"; - if (first_word) info->lword_text += "x"; - info->rword_text += "x"; - if (pit.IsAtFinalElement(RIL_WORD, RIL_SYMBOL) && - !pit.IsAtFinalElement(RIL_TEXTLINE, RIL_SYMBOL)) { - fake_text += " "; - info->rword_text = ""; - first_word = false; - } - } while (!pit.IsAtFinalElement(RIL_TEXTLINE, RIL_SYMBOL) && - pit.Next(RIL_SYMBOL)); - } - if (fake_text.size() == 0) return; - - int lspaces = info->pix_ldistance / info->average_interword_space; - for (int i = 0; i < lspaces; i++) { - info->text += ' '; - } - info->text += fake_text; - - // Set up lword_box, rword_box, and num_words. - PAGE_RES_IT page_res_it = *it.PageResIt(); - WERD_RES *word_res = page_res_it.restart_row(); - ROW_RES *this_row = page_res_it.row(); - - WERD_RES *lword = nullptr; - WERD_RES *rword = nullptr; - info->num_words = 0; - do { - if (word_res) { - if (!lword) lword = word_res; - if (rword != word_res) info->num_words++; - rword = word_res; - } - word_res = page_res_it.forward(); - } while (page_res_it.row() == this_row); - - if (lword) info->lword_box = lword->word->bounding_box(); - if (rword) info->rword_box = rword->word->bounding_box(); -} - - -// Given a Tesseract Iterator pointing to a text line, fill in the paragraph -// detector RowInfo with all relevant information from the row. -static void InitializeRowInfo(bool after_recognition, - const MutableIterator &it, RowInfo *info) { - if (it.PageResIt()->row() != nullptr) { - ROW *row = it.PageResIt()->row()->row; - info->pix_ldistance = row->lmargin(); - info->pix_rdistance = row->rmargin(); - info->average_interword_space = - row->space() > 0 ? row->space() : std::max(static_cast(row->x_height()), 1); - info->pix_xheight = row->x_height(); - info->has_leaders = false; - info->has_drop_cap = row->has_drop_cap(); - info->ltr = true; // set below depending on word scripts - } else { - info->pix_ldistance = info->pix_rdistance = 0; - info->average_interword_space = 1; - info->pix_xheight = 1.0; - info->has_leaders = false; - info->has_drop_cap = false; - info->ltr = true; - } - - info->num_words = 0; - info->lword_indicates_list_item = false; - info->lword_likely_starts_idea = false; - info->lword_likely_ends_idea = false; - info->rword_indicates_list_item = false; - info->rword_likely_starts_idea = false; - info->rword_likely_ends_idea = false; - info->has_leaders = false; - info->ltr = 1; - - if (!after_recognition) { - InitializeTextAndBoxesPreRecognition(it, info); - return; - } - info->text = ""; - const std::unique_ptr text(it.GetUTF8Text(RIL_TEXTLINE)); - int trailing_ws_idx = strlen(text.get()); // strip trailing space - while (trailing_ws_idx > 0 && - // isspace() only takes ASCII - isascii(text[trailing_ws_idx - 1]) && - isspace(text[trailing_ws_idx - 1])) - trailing_ws_idx--; - if (trailing_ws_idx > 0) { - int lspaces = info->pix_ldistance / info->average_interword_space; - for (int i = 0; i < lspaces; i++) - info->text += ' '; - for (int i = 0; i < trailing_ws_idx; i++) - info->text += text[i]; - } - - if (info->text.size() == 0) { - return; - } - - PAGE_RES_IT page_res_it = *it.PageResIt(); - GenericVector werds; - WERD_RES *word_res = page_res_it.restart_row(); - ROW_RES *this_row = page_res_it.row(); - int num_leaders = 0; - int ltr = 0; - int rtl = 0; - do { - if (word_res && word_res->best_choice->unichar_string().length() > 0) { - werds.push_back(word_res); - ltr += word_res->AnyLtrCharsInWord() ? 1 : 0; - rtl += word_res->AnyRtlCharsInWord() ? 1 : 0; - if (word_res->word->flag(W_REP_CHAR)) num_leaders++; - } - word_res = page_res_it.forward(); - } while (page_res_it.row() == this_row); - info->ltr = ltr >= rtl; - info->has_leaders = num_leaders > 3; - info->num_words = werds.size(); - if (!werds.empty()) { - WERD_RES *lword = werds[0], *rword = werds[werds.size() - 1]; - info->lword_text = lword->best_choice->unichar_string().string(); - info->rword_text = rword->best_choice->unichar_string().string(); - info->lword_box = lword->word->bounding_box(); - info->rword_box = rword->word->bounding_box(); - LeftWordAttributes(lword->uch_set, lword->best_choice, - info->lword_text, - &info->lword_indicates_list_item, - &info->lword_likely_starts_idea, - &info->lword_likely_ends_idea); - RightWordAttributes(rword->uch_set, rword->best_choice, - info->rword_text, - &info->rword_indicates_list_item, - &info->rword_likely_starts_idea, - &info->rword_likely_ends_idea); - } -} - -// This is called after rows have been identified and words are recognized. -// Much of this could be implemented before word recognition, but text helps -// to identify bulleted lists and gives good signals for sentence boundaries. -void DetectParagraphs(int debug_level, - bool after_text_recognition, - const MutableIterator *block_start, - GenericVector *models) { - // Clear out any preconceived notions. - if (block_start->Empty(RIL_TEXTLINE)) { - return; - } - BLOCK *block = block_start->PageResIt()->block()->block; - block->para_list()->clear(); - bool is_image_block = block->pdblk.poly_block() && !block->pdblk.poly_block()->IsText(); - - // Convert the Tesseract structures to RowInfos - // for the paragraph detection algorithm. - MutableIterator row(*block_start); - if (row.Empty(RIL_TEXTLINE)) - return; // end of input already. - - GenericVector row_infos; - do { - if (!row.PageResIt()->row()) - continue; // empty row. - row.PageResIt()->row()->row->set_para(nullptr); - row_infos.push_back(RowInfo()); - RowInfo &ri = row_infos.back(); - InitializeRowInfo(after_text_recognition, row, &ri); - } while (!row.IsAtFinalElement(RIL_BLOCK, RIL_TEXTLINE) && - row.Next(RIL_TEXTLINE)); - - // If we're called before text recognition, we might not have - // tight block bounding boxes, so trim by the minimum on each side. - if (!row_infos.empty()) { - int min_lmargin = row_infos[0].pix_ldistance; - int min_rmargin = row_infos[0].pix_rdistance; - for (int i = 1; i < row_infos.size(); i++) { - if (row_infos[i].pix_ldistance < min_lmargin) - min_lmargin = row_infos[i].pix_ldistance; - if (row_infos[i].pix_rdistance < min_rmargin) - min_rmargin = row_infos[i].pix_rdistance; - } - if (min_lmargin > 0 || min_rmargin > 0) { - for (int i = 0; i < row_infos.size(); i++) { - row_infos[i].pix_ldistance -= min_lmargin; - row_infos[i].pix_rdistance -= min_rmargin; - } - } - } - - // Run the paragraph detection algorithm. - GenericVector row_owners; - GenericVector the_paragraphs; - if (!is_image_block) { - DetectParagraphs(debug_level, &row_infos, &row_owners, block->para_list(), - models); - } else { - row_owners.init_to_size(row_infos.size(), nullptr); - CanonicalizeDetectionResults(&row_owners, block->para_list()); - } - - // Now stitch in the row_owners into the rows. - row = *block_start; - for (int i = 0; i < row_owners.size(); i++) { - while (!row.PageResIt()->row()) - row.Next(RIL_TEXTLINE); - row.PageResIt()->row()->row->set_para(row_owners[i]); - row.Next(RIL_TEXTLINE); - } -} - -} // namespace diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/paragraphs.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/paragraphs.h deleted file mode 100644 index 4a4f6f51..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/paragraphs.h +++ /dev/null @@ -1,109 +0,0 @@ -/********************************************************************** - * File: paragraphs.h - * Description: Paragraph Detection data structures. - * Author: David Eger - * Created: 25 February 2011 - * - * (C) Copyright 2011, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_CCMAIN_PARAGRAPHS_H_ -#define TESSERACT_CCMAIN_PARAGRAPHS_H_ - -#include "rect.h" // for TBOX -#include "strngs.h" // for STRING - -class PARA_LIST; -class ParagraphModel; - -struct PARA; - -template class GenericVector; - -namespace tesseract { - -class MutableIterator; - -// This structure captures all information needed about a text line for the -// purposes of paragraph detection. It is meant to be exceedingly light-weight -// so that we can easily test paragraph detection independent of the rest of -// Tesseract. -class RowInfo { - public: - // Constant data derived from Tesseract output. - STRING text; // the full UTF-8 text of the line. - bool ltr; // whether the majority of the text is left-to-right - // TODO(eger) make this more fine-grained. - - bool has_leaders; // does the line contain leader dots (.....)? - bool has_drop_cap; // does the line have a drop cap? - int pix_ldistance; // distance to the left pblock boundary in pixels - int pix_rdistance; // distance to the right pblock boundary in pixels - float pix_xheight; // guessed xheight for the line - int average_interword_space; // average space between words in pixels. - - int num_words; - TBOX lword_box; // in normalized (horiz text rows) space - TBOX rword_box; // in normalized (horiz text rows) space - - STRING lword_text; // the UTF-8 text of the leftmost werd - STRING rword_text; // the UTF-8 text of the rightmost werd - - // The text of a paragraph typically starts with the start of an idea and - // ends with the end of an idea. Here we define paragraph as something that - // may have a first line indent and a body indent which may be different. - // Typical words that start an idea are: - // 1. Words in western scripts that start with - // a capital letter, for example "The" - // 2. Bulleted or numbered list items, for - // example "2." - // Typical words which end an idea are words ending in punctuation marks. In - // this vocabulary, each list item is represented as a paragraph. - bool lword_indicates_list_item; - bool lword_likely_starts_idea; - bool lword_likely_ends_idea; - - bool rword_indicates_list_item; - bool rword_likely_starts_idea; - bool rword_likely_ends_idea; -}; - -// Main entry point for Paragraph Detection Algorithm. -// -// Given a set of equally spaced textlines (described by row_infos), -// Split them into paragraphs. See http://goto/paragraphstalk -// -// Output: -// row_owners - one pointer for each row, to the paragraph it belongs to. -// paragraphs - this is the actual list of PARA objects. -// models - the list of paragraph models referenced by the PARA objects. -// caller is responsible for deleting the models. -void DetectParagraphs(int debug_level, - GenericVector *row_infos, - GenericVector *row_owners, - PARA_LIST *paragraphs, - GenericVector *models); - -// Given a MutableIterator to the start of a block, run DetectParagraphs on -// that block and commit the results to the underlying ROW and BLOCK structs, -// saving the ParagraphModels in models. Caller owns the models. -// We use unicharset during the function to answer questions such as "is the -// first letter of this word upper case?" -void DetectParagraphs(int debug_level, - bool after_text_recognition, - const MutableIterator *block_start, - GenericVector *models); - -} // namespace - -#endif // TESSERACT_CCMAIN_PARAGRAPHS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/paragraphs_internal.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/paragraphs_internal.h deleted file mode 100644 index 558245af..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/paragraphs_internal.h +++ /dev/null @@ -1,311 +0,0 @@ -/********************************************************************** - * File: paragraphs_internal.h - * Description: Paragraph Detection internal data structures. - * Author: David Eger - * - * (C) Copyright 2011, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_CCMAIN_PARAGRAPHS_INTERNAL_H_ -#define TESSERACT_CCMAIN_PARAGRAPHS_INTERNAL_H_ - -#include "paragraphs.h" -#include "publictypes.h" // for ParagraphJustification - -// NO CODE OUTSIDE OF paragraphs.cpp AND TESTS SHOULD NEED TO ACCESS -// DATA STRUCTURES OR FUNCTIONS IN THIS FILE. - -class UNICHARSET; -class WERD_CHOICE; - -namespace tesseract { - -// Return whether the given word is likely to be a list item start word. -bool AsciiLikelyListItem(const STRING &word); - -// Return the first Unicode Codepoint from werd[pos]. -int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos); - -// Set right word attributes given either a unicharset and werd or a utf8 -// string. -void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, - const STRING &utf8, - bool *is_list, bool *starts_idea, bool *ends_idea); - -// Set left word attributes given either a unicharset and werd or a utf8 string. -void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, - const STRING &utf8, - bool *is_list, bool *starts_idea, bool *ends_idea); - -enum LineType { - LT_START = 'S', // First line of a paragraph. - LT_BODY = 'C', // Continuation line of a paragraph. - LT_UNKNOWN = 'U', // No clues. - LT_MULTIPLE = 'M', // Matches for both LT_START and LT_BODY. -}; - -// The first paragraph in a page of body text is often un-indented. -// This is a typographic convention which is common to indicate either that: -// (1) The paragraph is the continuation of a previous paragraph, or -// (2) The paragraph is the first paragraph in a chapter. -// -// I refer to such paragraphs as "crown"s, and the output of the paragraph -// detection algorithm attempts to give them the same paragraph model as -// the rest of the body text. -// -// Nonetheless, while building hypotheses, it is useful to mark the lines -// of crown paragraphs temporarily as crowns, either aligned left or right. -extern const ParagraphModel *kCrownLeft; -extern const ParagraphModel *kCrownRight; - -inline bool StrongModel(const ParagraphModel *model) { - return model != nullptr && model != kCrownLeft && model != kCrownRight; -} - -struct LineHypothesis { - LineHypothesis() : ty(LT_UNKNOWN), model(nullptr) {} - LineHypothesis(LineType line_type, const ParagraphModel *m) - : ty(line_type), model(m) {} - LineHypothesis(const LineHypothesis &other) - : ty(other.ty), model(other.model) {} - - // Copy assignment operator. - LineHypothesis& operator=(const LineHypothesis& other) { - ty = other.ty; - model = other.model; - return *this; - } - - bool operator==(const LineHypothesis &other) const { - return ty == other.ty && model == other.model; - } - - LineType ty; - const ParagraphModel *model; -}; - -class ParagraphTheory; // Forward Declaration - -using SetOfModels = GenericVectorEqEq; - -// Row Scratch Registers are data generated by the paragraph detection -// algorithm based on a RowInfo input. -class RowScratchRegisters { - public: - // We presume row will outlive us. - void Init(const RowInfo &row); - - LineType GetLineType() const; - - LineType GetLineType(const ParagraphModel *model) const; - - // Mark this as a start line type, sans model. This is useful for the - // initial marking of probable body lines or paragraph start lines. - void SetStartLine(); - - // Mark this as a body line type, sans model. This is useful for the - // initial marking of probably body lines or paragraph start lines. - void SetBodyLine(); - - // Record that this row fits as a paragraph start line in the given model, - void AddStartLine(const ParagraphModel *model); - // Record that this row fits as a paragraph body line in the given model, - void AddBodyLine(const ParagraphModel *model); - - // Clear all hypotheses about this line. - void SetUnknown() { hypotheses_.truncate(0); } - - // Append all hypotheses of strong models that match this row as a start. - void StartHypotheses(SetOfModels *models) const; - - // Append all hypotheses of strong models matching this row. - void StrongHypotheses(SetOfModels *models) const; - - // Append all hypotheses for this row. - void NonNullHypotheses(SetOfModels *models) const; - - // Discard any hypotheses whose model is not in the given list. - void DiscardNonMatchingHypotheses(const SetOfModels &models); - - // If we have only one hypothesis and that is that this line is a paragraph - // start line of a certain model, return that model. Else return nullptr. - const ParagraphModel *UniqueStartHypothesis() const; - - // If we have only one hypothesis and that is that this line is a paragraph - // body line of a certain model, return that model. Else return nullptr. - const ParagraphModel *UniqueBodyHypothesis() const; - - // Return the indentation for the side opposite of the aligned side. - int OffsideIndent(tesseract::ParagraphJustification just) const { - switch (just) { - case tesseract::JUSTIFICATION_RIGHT: return lindent_; - case tesseract::JUSTIFICATION_LEFT: return rindent_; - default: return lindent_ > rindent_ ? lindent_ : rindent_; - } - } - - // Return the indentation for the side the text is aligned to. - int AlignsideIndent(tesseract::ParagraphJustification just) const { - switch (just) { - case tesseract::JUSTIFICATION_RIGHT: return rindent_; - case tesseract::JUSTIFICATION_LEFT: return lindent_; - default: return lindent_ > rindent_ ? lindent_ : rindent_; - } - } - - // Append header fields to a vector of row headings. - static void AppendDebugHeaderFields(GenericVector *header); - - // Append data for this row to a vector of debug strings. - void AppendDebugInfo(const ParagraphTheory &theory, - GenericVector *dbg) const; - - const RowInfo *ri_; - - // These four constants form a horizontal box model for the white space - // on the edges of each line. At each point in the algorithm, the following - // shall hold: - // ri_->pix_ldistance = lmargin_ + lindent_ - // ri_->pix_rdistance = rindent_ + rmargin_ - int lmargin_; - int lindent_; - int rindent_; - int rmargin_; - - private: - // Hypotheses of either LT_START or LT_BODY - GenericVectorEqEq hypotheses_; -}; - -// A collection of convenience functions for wrapping the set of -// Paragraph Models we believe correctly model the paragraphs in the image. -class ParagraphTheory { - public: - // We presume models will outlive us, and that models will take ownership - // of any ParagraphModel *'s we add. - explicit ParagraphTheory(GenericVector *models) - : models_(models) {} - GenericVector &models() { return *models_; } - const GenericVector &models() const { return *models_; } - - // Return an existing model if one that is Comparable() can be found. - // Else, allocate a new copy of model to save and return a pointer to it. - const ParagraphModel *AddModel(const ParagraphModel &model); - - // Discard any models we've made that are not in the list of used models. - void DiscardUnusedModels(const SetOfModels &used_models); - - // Return the set of all non-centered models. - void NonCenteredModels(SetOfModels *models); - - // If any of the non-centered paragraph models we know about fit - // rows[start, end), return it. Else nullptr. - const ParagraphModel *Fits(const GenericVector *rows, - int start, int end) const; - - int IndexOf(const ParagraphModel *model) const; - - private: - GenericVector *models_; - GenericVectorEqEq models_we_added_; -}; - -bool ValidFirstLine(const GenericVector *rows, - int row, const ParagraphModel *model); -bool ValidBodyLine(const GenericVector *rows, - int row, const ParagraphModel *model); -bool CrownCompatible(const GenericVector *rows, - int a, int b, const ParagraphModel *model); - -// A class for smearing Paragraph Model hypotheses to surrounding rows. -// The idea here is that StrongEvidenceClassify first marks only exceedingly -// obvious start and body rows and constructs models of them. Thereafter, -// we may have left over unmarked lines (mostly end-of-paragraph lines) which -// were too short to have much confidence about, but which fit the models we've -// constructed perfectly and which we ought to mark. This class is used to -// "smear" our models over the text. -class ParagraphModelSmearer { - public: - ParagraphModelSmearer(GenericVector *rows, - int row_start, int row_end, - ParagraphTheory *theory); - - // Smear forward paragraph models from existing row markings to subsequent - // text lines if they fit, and mark any thereafter still unmodeled rows - // with any model in the theory that fits them. - void Smear(); - - private: - // Record in open_models_ for rows [start_row, end_row) the list of models - // currently open at each row. - // A model is still open in a row if some previous row has said model as a - // start hypothesis, and all rows since (including this row) would fit as - // either a body or start line in that model. - void CalculateOpenModels(int row_start, int row_end); - - SetOfModels &OpenModels(int row) { - return open_models_[row - row_start_ + 1]; - } - - ParagraphTheory *theory_; - GenericVector *rows_; - int row_start_; - int row_end_; - - // open_models_ corresponds to rows[start_row_ - 1, end_row_] - // - // open_models_: Contains models which there was an active (open) paragraph - // as of the previous line and for which the left and right - // indents admit the possibility that this text line continues - // to fit the same model. - // TODO(eger): Think about whether we can get rid of "Open" models and just - // use the current hypotheses on RowScratchRegisters. - GenericVector open_models_; -}; - -// Clear all hypotheses about lines [start, end) and reset the margins to the -// percentile (0..100) value of the left and right row edges for this run of -// rows. -void RecomputeMarginsAndClearHypotheses( - GenericVector *rows, int start, int end, - int percentile); - -// Return the median inter-word space in rows[row_start, row_end). -int InterwordSpace(const GenericVector &rows, - int row_start, int row_end); - -// Return whether the first word on the after line can fit in the space at -// the end of the before line (knowing which way the text is aligned and read). -bool FirstWordWouldHaveFit(const RowScratchRegisters &before, - const RowScratchRegisters &after, - tesseract::ParagraphJustification justification); - -// Return whether the first word on the after line can fit in the space at -// the end of the before line (not knowing the text alignment). -bool FirstWordWouldHaveFit(const RowScratchRegisters &before, - const RowScratchRegisters &after); - -// Do rows[start, end) form a single instance of the given paragraph model? -bool RowsFitModel(const GenericVector *rows, - int start, int end, const ParagraphModel *model); - -// Given a set of row_owners pointing to PARAs or nullptr (no paragraph known), -// normalize each row_owner to point to an actual PARA, and output the -// paragraphs in order onto paragraphs. -void CanonicalizeDetectionResults( - GenericVector *row_owners, - PARA_LIST *paragraphs); - -} // namespace - -#endif // TESSERACT_CCMAIN_PARAGRAPHS_INTERNAL_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/paramsd.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/paramsd.cpp deleted file mode 100644 index d554e4e6..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/paramsd.cpp +++ /dev/null @@ -1,355 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: paramsd.cpp -// Description: Tesseract parameter Editor -// Author: Joern Wanke -// Created: Wed Jul 18 10:05:01 PDT 2007 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// -// -// The parameters editor is used to edit all the parameters used within -// tesseract from the ui. - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#ifndef GRAPHICS_DISABLED - -#include "paramsd.h" -#include // for fclose, fopen, fprintf, sprintf, FILE -#include // for atoi, strtod -#include // for strcmp, strcspn, strlen, strncpy -#include // for map, _Rb_tree_iterator, map<>::iterator -#include // for unique_ptr -#include // for pair -#include "genericvector.h" // for GenericVector -#include "params.h" // for ParamsVectors, StringParam, BoolParam -#include "scrollview.h" // for SVEvent, ScrollView, SVET_POPUP -#include "svmnode.h" // for SVMenuNode -#include "tesseractclass.h" // for Tesseract - -#define VARDIR "configs/" /*parameters files */ -#define MAX_ITEMS_IN_SUBMENU 30 - -// The following variables should remain static globals, since they -// are used by debug editor, which uses a single Tesseract instance. -// -// Contains the mappings from unique VC ids to their actual pointers. -static std::map vcMap; -static int nrParams = 0; -static int writeCommands[2]; - -ELISTIZE(ParamContent) - -// Constructors for the various ParamTypes. -ParamContent::ParamContent(tesseract::StringParam* it) { - my_id_ = nrParams; - nrParams++; - param_type_ = VT_STRING; - sIt = it; - vcMap[my_id_] = this; -} -// Constructors for the various ParamTypes. -ParamContent::ParamContent(tesseract::IntParam* it) { - my_id_ = nrParams; - nrParams++; - param_type_ = VT_INTEGER; - iIt = it; - vcMap[my_id_] = this; -} -// Constructors for the various ParamTypes. -ParamContent::ParamContent(tesseract::BoolParam* it) { - my_id_ = nrParams; - nrParams++; - param_type_ = VT_BOOLEAN; - bIt = it; - vcMap[my_id_] = this; -} -// Constructors for the various ParamTypes. -ParamContent::ParamContent(tesseract::DoubleParam* it) { - my_id_ = nrParams; - nrParams++; - param_type_ = VT_DOUBLE; - dIt = it; - vcMap[my_id_] = this; -} - -// Gets a VC object identified by its ID. -ParamContent* ParamContent::GetParamContentById(int id) { - return vcMap[id]; -} - -// Copy the first N words from the source string to the target string. -// Words are delimited by "_". -void ParamsEditor::GetFirstWords( - const char *s, // source string - int n, // number of words - char *t // target string - ) { - int full_length = strlen(s); - int reqd_len = 0; // No. of chars requird - const char *next_word = s; - - while ((n > 0) && reqd_len < full_length) { - reqd_len += strcspn(next_word, "_") + 1; - next_word += reqd_len; - n--; - } - strncpy(t, s, reqd_len); - t[reqd_len] = '\0'; // ensure null terminal -} - -// Getter for the name. -const char* ParamContent::GetName() const { - if (param_type_ == VT_INTEGER) { return iIt->name_str(); } - else if (param_type_ == VT_BOOLEAN) { return bIt->name_str(); } - else if (param_type_ == VT_DOUBLE) { return dIt->name_str(); } - else if (param_type_ == VT_STRING) { return sIt->name_str(); } - else - return "ERROR: ParamContent::GetName()"; -} - -// Getter for the description. -const char* ParamContent::GetDescription() const { - if (param_type_ == VT_INTEGER) { return iIt->info_str(); } - else if (param_type_ == VT_BOOLEAN) { return bIt->info_str(); } - else if (param_type_ == VT_DOUBLE) { return dIt->info_str(); } - else if (param_type_ == VT_STRING) { return sIt->info_str(); } - else return nullptr; -} - -// Getter for the value. -STRING ParamContent::GetValue() const { - STRING result; - if (param_type_ == VT_INTEGER) { - result.add_str_int("", *iIt); - } else if (param_type_ == VT_BOOLEAN) { - result.add_str_int("", *bIt); - } else if (param_type_ == VT_DOUBLE) { - result.add_str_double("", *dIt); - } else if (param_type_ == VT_STRING) { - if (((STRING) * (sIt)).string() != nullptr) { - result = sIt->string(); - } else { - result = "Null"; - } - } - return result; -} - -// Setter for the value. -void ParamContent::SetValue(const char* val) { -// TODO (wanke) Test if the values actually are properly converted. -// (Quickly visible impacts?) - changed_ = true; - if (param_type_ == VT_INTEGER) { - iIt->set_value(atoi(val)); - } else if (param_type_ == VT_BOOLEAN) { - bIt->set_value(atoi(val)); - } else if (param_type_ == VT_DOUBLE) { - dIt->set_value(strtod(val, nullptr)); - } else if (param_type_ == VT_STRING) { - sIt->set_value(val); - } -} - -// Gets the up to the first 3 prefixes from s (split by _). -// For example, tesseract_foo_bar will be split into tesseract,foo and bar. -void ParamsEditor::GetPrefixes(const char* s, STRING* level_one, - STRING* level_two, - STRING* level_three) { - std::unique_ptr p(new char[1024]); - GetFirstWords(s, 1, p.get()); - *level_one = p.get(); - GetFirstWords(s, 2, p.get()); - *level_two = p.get(); - GetFirstWords(s, 3, p.get()); - *level_three = p.get(); -} - -// Compare two VC objects by their name. -int ParamContent::Compare(const void* v1, const void* v2) { - const ParamContent* one = *static_cast(v1); - const ParamContent* two = *static_cast(v2); - return strcmp(one->GetName(), two->GetName()); -} - -// Find all editable parameters used within tesseract and create a -// SVMenuNode tree from it. -// TODO (wanke): This is actually sort of hackish. -SVMenuNode* ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) { - SVMenuNode* mr = new SVMenuNode(); - ParamContent_LIST vclist; - ParamContent_IT vc_it(&vclist); - // Amount counts the number of entries for a specific char*. - // TODO(rays) get rid of the use of std::map. - std::map amount; - - // Add all parameters to a list. - int v, i; - int num_iterations = (tess->params() == nullptr) ? 1 : 2; - for (v = 0; v < num_iterations; ++v) { - tesseract::ParamsVectors *vec = (v == 0) ? GlobalParams() : tess->params(); - for (i = 0; i < vec->int_params.size(); ++i) { - vc_it.add_after_then_move(new ParamContent(vec->int_params[i])); - } - for (i = 0; i < vec->bool_params.size(); ++i) { - vc_it.add_after_then_move(new ParamContent(vec->bool_params[i])); - } - for (i = 0; i < vec->string_params.size(); ++i) { - vc_it.add_after_then_move(new ParamContent(vec->string_params[i])); - } - for (i = 0; i < vec->double_params.size(); ++i) { - vc_it.add_after_then_move(new ParamContent(vec->double_params[i])); - } - } - - // Count the # of entries starting with a specific prefix. - for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) { - ParamContent* vc = vc_it.data(); - STRING tag; - STRING tag2; - STRING tag3; - - GetPrefixes(vc->GetName(), &tag, &tag2, &tag3); - amount[tag.string()]++; - amount[tag2.string()]++; - amount[tag3.string()]++; - } - - vclist.sort(ParamContent::Compare); // Sort the list alphabetically. - - SVMenuNode* other = mr->AddChild("OTHER"); - - // go through the list again and this time create the menu structure. - vc_it.move_to_first(); - for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) { - ParamContent* vc = vc_it.data(); - STRING tag; - STRING tag2; - STRING tag3; - GetPrefixes(vc->GetName(), &tag, &tag2, &tag3); - - if (amount[tag.string()] == 1) { - other->AddChild(vc->GetName(), vc->GetId(), vc->GetValue().string(), - vc->GetDescription()); - } else { // More than one would use this submenu -> create submenu. - SVMenuNode* sv = mr->AddChild(tag.string()); - if ((amount[tag.string()] <= MAX_ITEMS_IN_SUBMENU) || - (amount[tag2.string()] <= 1)) { - sv->AddChild(vc->GetName(), vc->GetId(), - vc->GetValue().string(), vc->GetDescription()); - } else { // Make subsubmenus. - SVMenuNode* sv2 = sv->AddChild(tag2.string()); - sv2->AddChild(vc->GetName(), vc->GetId(), - vc->GetValue().string(), vc->GetDescription()); - } - } - } - return mr; -} - -// Event listener. Waits for SVET_POPUP events and processes them. -void ParamsEditor::Notify(const SVEvent* sve) { - if (sve->type == SVET_POPUP) { // only catch SVET_POPUP! - char* param = sve->parameter; - if (sve->command_id == writeCommands[0]) { - WriteParams(param, false); - } else if (sve->command_id == writeCommands[1]) { - WriteParams(param, true); - } else { - ParamContent* vc = ParamContent::GetParamContentById( - sve->command_id); - vc->SetValue(param); - sv_window_->AddMessage("Setting %s to %s", - vc->GetName(), vc->GetValue().string()); - } - } -} - -// Integrate the parameters editor as popupmenu into the existing scrollview -// window (usually the pg editor). If sv == null, create a new empty -// empty window and attach the parameters editor to that window (ugly). -ParamsEditor::ParamsEditor(tesseract::Tesseract* tess, - ScrollView* sv) { - if (sv == nullptr) { - const char* name = "ParamEditorMAIN"; - sv = new ScrollView(name, 1, 1, 200, 200, 300, 200); - } - - sv_window_ = sv; - - //Only one event handler per window. - //sv->AddEventHandler((SVEventHandler*) this); - - SVMenuNode* svMenuRoot = BuildListOfAllLeaves(tess); - - STRING paramfile; - paramfile = tess->datadir; - paramfile += VARDIR; // parameters dir - paramfile += "edited"; // actual name - - SVMenuNode* std_menu = svMenuRoot->AddChild ("Build Config File"); - - writeCommands[0] = nrParams+1; - std_menu->AddChild("All Parameters", writeCommands[0], - paramfile.string(), "Config file name?"); - - writeCommands[1] = nrParams+2; - std_menu->AddChild ("changed_ Parameters Only", writeCommands[1], - paramfile.string(), "Config file name?"); - - svMenuRoot->BuildMenu(sv, false); -} - - -// Write all (changed_) parameters to a config file. -void ParamsEditor::WriteParams(char *filename, - bool changes_only) { - FILE *fp; // input file - char msg_str[255]; - // if file exists - if ((fp = fopen (filename, "rb")) != nullptr) { - fclose(fp); - sprintf (msg_str, "Overwrite file " "%s" "? (Y/N)", filename); - int a = sv_window_->ShowYesNoDialog(msg_str); - if (a == 'n') { - return; - } // don't write - } - - - fp = fopen (filename, "wb"); // can we write to it? - if (fp == nullptr) { - sv_window_->AddMessage( - "Can't write to file " - "%s" - "", - filename); - return; - } - - for (std::map::iterator iter = vcMap.begin(); - iter != vcMap.end(); - ++iter) { - ParamContent* cur = iter->second; - if (!changes_only || cur->HasChanged()) { - fprintf(fp, "%-25s %-12s # %s\n", - cur->GetName(), cur->GetValue().string(), cur->GetDescription()); - } - } - fclose(fp); -} -#endif // GRAPHICS_DISABLED diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/paramsd.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/paramsd.h deleted file mode 100644 index c913ca9f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/paramsd.h +++ /dev/null @@ -1,131 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: paramsd.h -// Description: Tesseract parameter editor -// Author: Joern Wanke -// Created: Wed Jul 18 10:05:01 PDT 2007 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// -// -// Tesseract parameter editor is used to edit all the parameters used -// within tesseract from the ui. -#ifndef TESSERACT_CCMAIN_PARAMSD_H_ -#define TESSERACT_CCMAIN_PARAMSD_H_ - -#ifndef GRAPHICS_DISABLED - -#include "elst.h" // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK -#include "scrollview.h" // for ScrollView (ptr only), SVEvent (ptr only) -#include "strngs.h" // for STRING - -class SVMenuNode; - -namespace tesseract { - class BoolParam; - class DoubleParam; - class IntParam; - class StringParam; - class Tesseract; -} - -// A list of all possible parameter types used. -enum ParamType { - VT_INTEGER, - VT_BOOLEAN, - VT_STRING, - VT_DOUBLE -}; - -// A rather hackish helper structure which can take any kind of parameter input -// (defined by ParamType) and do a couple of common operations on them, like -// comparisond or getting its value. It is used in the context of the -// ParamsEditor as a bridge from the internal tesseract parameters to the -// ones displayed by the ScrollView server. -class ParamContent : public ELIST_LINK { - public: - // Compare two VC objects by their name. - static int Compare(const void* v1, const void* v2); - - // Gets a VC object identified by its ID. - static ParamContent* GetParamContentById(int id); - - // Constructors for the various ParamTypes. - ParamContent() = default; - explicit ParamContent(tesseract::StringParam* it); - explicit ParamContent(tesseract::IntParam* it); - explicit ParamContent(tesseract::BoolParam* it); - explicit ParamContent(tesseract::DoubleParam* it); - - - // Getters and Setters. - void SetValue(const char* val); - STRING GetValue() const; - const char* GetName() const; - const char* GetDescription() const; - - int GetId() { return my_id_; } - bool HasChanged() { return changed_; } - - private: - // The unique ID of this VC object. - int my_id_; - // Whether the parameter was changed_ and thus needs to be rewritten. - bool changed_; - // The actual ParamType of this VC object. - ParamType param_type_; - - tesseract::StringParam* sIt; - tesseract::IntParam* iIt; - tesseract::BoolParam* bIt; - tesseract::DoubleParam* dIt; -}; - -ELISTIZEH(ParamContent) - -// The parameters editor enables the user to edit all the parameters used within -// tesseract. It can be invoked on its own, but is supposed to be invoked by -// the program editor. -class ParamsEditor : public SVEventHandler { - public: - // Integrate the parameters editor as popupmenu into the existing scrollview - // window (usually the pg editor). If sv == null, create a new empty - // empty window and attach the parameter editor to that window (ugly). - explicit ParamsEditor(tesseract::Tesseract*, ScrollView* sv = nullptr); - - // Event listener. Waits for SVET_POPUP events and processes them. - void Notify(const SVEvent* sve); - - private: - // Gets the up to the first 3 prefixes from s (split by _). - // For example, tesseract_foo_bar will be split into tesseract,foo and bar. - void GetPrefixes(const char* s, STRING* level_one, - STRING* level_two, STRING* level_three); - - // Gets the first n words (split by _) and puts them in t. - // For example, tesseract_foo_bar with N=2 will yield tesseract_foo_. - void GetFirstWords(const char *s, // source string - int n, // number of words - char *t); // target string - - // Find all editable parameters used within tesseract and create a - // SVMenuNode tree from it. - SVMenuNode *BuildListOfAllLeaves(tesseract::Tesseract *tess); - - // Write all (changed_) parameters to a config file. - void WriteParams(char* filename, bool changes_only); - - ScrollView* sv_window_; -}; - -#endif // GRAPHICS_DISABLED -#endif // TESSERACT_CCMAIN_PARAMSD_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pgedit.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pgedit.cpp deleted file mode 100644 index 83cf18b0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pgedit.cpp +++ /dev/null @@ -1,1001 +0,0 @@ -/********************************************************************** - * File: pgedit.cpp (Formerly pgeditor.c) - * Description: Page structure file editor - * Author: Phil Cheatle - * Created: Thu Oct 10 16:25:24 BST 1991 - * - *(C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0(the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http:// www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "pgedit.h" - -#include -#include - -#include "blread.h" -#include "control.h" -#include "paramsd.h" -#include "pageres.h" -#include "tordmain.h" -#include "scrollview.h" -#include "svmnode.h" -#include "statistc.h" -#include "tesseractclass.h" -#include "werdit.h" - -#ifndef GRAPHICS_DISABLED -#define ASC_HEIGHT (2 * kBlnBaselineOffset + kBlnXHeight) -#define X_HEIGHT (kBlnBaselineOffset + kBlnXHeight) -#define BL_HEIGHT kBlnBaselineOffset -#define DESC_HEIGHT 0 -#define MAXSPACING 128 /*max expected spacing in pix */ - -const ERRCODE EMPTYBLOCKLIST = "No blocks to edit"; - -enum CMD_EVENTS -{ - NULL_CMD_EVENT, - CHANGE_DISP_CMD_EVENT, - DUMP_WERD_CMD_EVENT, - SHOW_POINT_CMD_EVENT, - SHOW_BLN_WERD_CMD_EVENT, - DEBUG_WERD_CMD_EVENT, - BLAMER_CMD_EVENT, - BOUNDING_BOX_CMD_EVENT, - CORRECT_TEXT_CMD_EVENT, - POLYGONAL_CMD_EVENT, - BL_NORM_CMD_EVENT, - BITMAP_CMD_EVENT, - IMAGE_CMD_EVENT, - BLOCKS_CMD_EVENT, - BASELINES_CMD_EVENT, - UNIFORM_DISP_CMD_EVENT, - REFRESH_CMD_EVENT, - QUIT_CMD_EVENT, - RECOG_WERDS, - RECOG_PSEUDO, - SHOW_BLOB_FEATURES, - SHOW_SUBSCRIPT_CMD_EVENT, - SHOW_SUPERSCRIPT_CMD_EVENT, - SHOW_ITALIC_CMD_EVENT, - SHOW_BOLD_CMD_EVENT, - SHOW_UNDERLINE_CMD_EVENT, - SHOW_FIXEDPITCH_CMD_EVENT, - SHOW_SERIF_CMD_EVENT, - SHOW_SMALLCAPS_CMD_EVENT, - SHOW_DROPCAPS_CMD_EVENT, -}; - -enum ColorationMode { - CM_RAINBOW, - CM_SUBSCRIPT, - CM_SUPERSCRIPT, - CM_ITALIC, - CM_BOLD, - CM_UNDERLINE, - CM_FIXEDPITCH, - CM_SERIF, - CM_SMALLCAPS, - CM_DROPCAPS -}; - -/* - * - * Some global data - * - */ - -static ScrollView* image_win; -static ParamsEditor* pe; -static bool stillRunning = false; - -static ScrollView* bln_word_window = nullptr; // baseline norm words - -static CMD_EVENTS mode = CHANGE_DISP_CMD_EVENT; // selected words op - -static bool recog_done = false; // recog_all_words was called - -// These variables should remain global, since they are only used for the -// debug mode (in which only a single Tesseract thread/instance will exist). -BITS16 word_display_mode; -static ColorationMode color_mode = CM_RAINBOW; -BOOL8 display_image = FALSE; -BOOL8 display_blocks = FALSE; -BOOL8 display_baselines = FALSE; - -PAGE_RES *current_page_res = nullptr; - -STRING_VAR(editor_image_win_name, "EditorImage", - "Editor image window name"); -INT_VAR(editor_image_xpos, 590, "Editor image X Pos"); -INT_VAR(editor_image_ypos, 10, "Editor image Y Pos"); -INT_VAR(editor_image_menuheight, 50, "Add to image height for menu bar"); -INT_VAR(editor_image_word_bb_color, ScrollView::BLUE, - "Word bounding box colour"); -INT_VAR(editor_image_blob_bb_color, ScrollView::YELLOW, - "Blob bounding box colour"); -INT_VAR(editor_image_text_color, ScrollView::WHITE, - "Correct text colour"); - -STRING_VAR(editor_dbwin_name, "EditorDBWin", - "Editor debug window name"); -INT_VAR(editor_dbwin_xpos, 50, "Editor debug window X Pos"); -INT_VAR(editor_dbwin_ypos, 500, "Editor debug window Y Pos"); -INT_VAR(editor_dbwin_height, 24, "Editor debug window height"); -INT_VAR(editor_dbwin_width, 80, "Editor debug window width"); - -STRING_VAR(editor_word_name, "BlnWords", "BL normalized word window"); -INT_VAR(editor_word_xpos, 60, "Word window X Pos"); -INT_VAR(editor_word_ypos, 510, "Word window Y Pos"); -INT_VAR(editor_word_height, 240, "Word window height"); -INT_VAR(editor_word_width, 655, "Word window width"); - -STRING_VAR(editor_debug_config_file, "", "Config file to apply to single words"); - -class BlnEventHandler : public SVEventHandler { - public: - void Notify(const SVEvent* sv_event) { - if (sv_event->type == SVET_DESTROY) - bln_word_window = nullptr; - else if (sv_event->type == SVET_CLICK) - show_point(current_page_res, sv_event->x, sv_event->y); - } -}; - -/** - * bln_word_window_handle() - * - * @return a WINDOW for the word window, creating it if necessary - */ -ScrollView* bln_word_window_handle() { // return handle - // not opened yet - if (bln_word_window == nullptr) { - pgeditor_msg("Creating BLN word window..."); - bln_word_window = new ScrollView(editor_word_name.string(), - editor_word_xpos, editor_word_ypos, editor_word_width, - editor_word_height, 4000, 4000, true); - BlnEventHandler* a = new BlnEventHandler(); - bln_word_window->AddEventHandler(a); - pgeditor_msg("Creating BLN word window...Done"); - } - return bln_word_window; -} - -/** - * build_image_window() - * - * Destroy the existing image window if there is one. Work out how big the - * new window needs to be. Create it and re-display. - */ - -void build_image_window(int width, int height) { - delete image_win; - image_win = new ScrollView(editor_image_win_name.string(), - editor_image_xpos, editor_image_ypos, - width + 1, - height + editor_image_menuheight + 1, - width, - height, - true); -} - -/** - * display_bln_lines() - * - * Display normalized baseline, x-height, ascender limit and descender limit - */ - -static void display_bln_lines(ScrollView* window, ScrollView::Color colour, - float scale_factor, float y_offset, - float minx, float maxx) { - window->Pen(colour); - window->Line(minx, y_offset + scale_factor * DESC_HEIGHT, - maxx, y_offset + scale_factor * DESC_HEIGHT); - window->Line(minx, y_offset + scale_factor * BL_HEIGHT, - maxx, y_offset + scale_factor * BL_HEIGHT); - window->Line(minx, y_offset + scale_factor * X_HEIGHT, - maxx, y_offset + scale_factor * X_HEIGHT); - window->Line(minx, y_offset + scale_factor * ASC_HEIGHT, - maxx, y_offset + scale_factor * ASC_HEIGHT); -} - -/** - * notify() - * - * Event handler that processes incoming events, either forwarding - * them to process_cmd_win_event or process_image_event. - * - */ - -void PGEventHandler::Notify(const SVEvent* event) { - char myval = '0'; - if (event->type == SVET_POPUP) { - pe->Notify(event); - } // These are handled by ParamsEditor - else if (event->type == SVET_EXIT) { stillRunning = false; } - else if (event->type == SVET_MENU) { - if (strcmp(event->parameter, "true") == 0) { myval = 'T'; } - else if (strcmp(event->parameter, "false") == 0) { myval = 'F'; } - tess_->process_cmd_win_event(event->command_id, &myval); - } - else { - tess_->process_image_event(*event); - } -} - -/** - * build_menu() - * - * Construct the menu tree used by the command window - */ -namespace tesseract { -SVMenuNode *Tesseract::build_menu_new() { - SVMenuNode* parent_menu; - SVMenuNode* root_menu_item = new SVMenuNode(); - - SVMenuNode* modes_menu_item = root_menu_item->AddChild("MODES"); - - modes_menu_item->AddChild("Change Display", CHANGE_DISP_CMD_EVENT); - modes_menu_item->AddChild("Dump Word", DUMP_WERD_CMD_EVENT); - modes_menu_item->AddChild("Show Point", SHOW_POINT_CMD_EVENT); - modes_menu_item->AddChild("Show BL Norm Word", SHOW_BLN_WERD_CMD_EVENT); - modes_menu_item->AddChild("Config Words", DEBUG_WERD_CMD_EVENT); - modes_menu_item->AddChild("Recog Words", RECOG_WERDS); - modes_menu_item->AddChild("Recog Blobs", RECOG_PSEUDO); - modes_menu_item->AddChild("Show Blob Features", SHOW_BLOB_FEATURES); - - parent_menu = root_menu_item->AddChild("DISPLAY"); - - parent_menu->AddChild("Blamer", BLAMER_CMD_EVENT, FALSE); - parent_menu->AddChild("Bounding Boxes", BOUNDING_BOX_CMD_EVENT, FALSE); - parent_menu->AddChild("Correct Text", CORRECT_TEXT_CMD_EVENT, FALSE); - parent_menu->AddChild("Polygonal Approx", POLYGONAL_CMD_EVENT, FALSE); - parent_menu->AddChild("Baseline Normalized", BL_NORM_CMD_EVENT, FALSE); - parent_menu->AddChild("Edge Steps", BITMAP_CMD_EVENT, TRUE); - parent_menu->AddChild("Subscripts", SHOW_SUBSCRIPT_CMD_EVENT); - parent_menu->AddChild("Superscripts", SHOW_SUPERSCRIPT_CMD_EVENT); - parent_menu->AddChild("Italics", SHOW_ITALIC_CMD_EVENT); - parent_menu->AddChild("Bold", SHOW_BOLD_CMD_EVENT); - parent_menu->AddChild("Underline", SHOW_UNDERLINE_CMD_EVENT); - parent_menu->AddChild("FixedPitch", SHOW_FIXEDPITCH_CMD_EVENT); - parent_menu->AddChild("Serifs", SHOW_SERIF_CMD_EVENT); - parent_menu->AddChild("SmallCaps", SHOW_SMALLCAPS_CMD_EVENT); - parent_menu->AddChild("DropCaps", SHOW_DROPCAPS_CMD_EVENT); - - - parent_menu = root_menu_item->AddChild("OTHER"); - - parent_menu->AddChild("Quit", QUIT_CMD_EVENT); - parent_menu->AddChild("Show Image", IMAGE_CMD_EVENT, FALSE); - parent_menu->AddChild("ShowBlock Outlines", BLOCKS_CMD_EVENT, FALSE); - parent_menu->AddChild("Show Baselines", BASELINES_CMD_EVENT, FALSE); - parent_menu->AddChild("Uniform Display", UNIFORM_DISP_CMD_EVENT); - parent_menu->AddChild("Refresh Display", REFRESH_CMD_EVENT); - - return root_menu_item; -} - -/** - * do_re_display() - * - * Redisplay page - */ -void Tesseract::do_re_display( - bool (tesseract::Tesseract::* word_painter)(PAGE_RES_IT* pr_it)) { - int block_count = 1; - - image_win->Clear(); - if (display_image != 0) { - image_win->Image(pix_binary_, 0, 0); - } - - image_win->Brush(ScrollView::NONE); - PAGE_RES_IT pr_it(current_page_res); - for (WERD_RES* word = pr_it.word(); word != nullptr; word = pr_it.forward()) { - (this->*word_painter)(&pr_it); - if (display_baselines && pr_it.row() != pr_it.prev_row()) - pr_it.row()->row->plot_baseline(image_win, ScrollView::GREEN); - if (display_blocks && pr_it.block() != pr_it.prev_block()) - pr_it.block()->block->pdblk.plot(image_win, block_count++, ScrollView::RED); - } - image_win->Update(); -} - -/** - * pgeditor_main() - * - * Top level editor operation: - * Setup a new window and an according event handler - * - */ - -void Tesseract::pgeditor_main(int width, int height, PAGE_RES *page_res) { - current_page_res = page_res; - if (current_page_res->block_res_list.empty()) - return; - - recog_done = false; - stillRunning = true; - - build_image_window(width, height); - word_display_mode.turn_on_bit(DF_EDGE_STEP); - do_re_display(&tesseract::Tesseract::word_set_display); -#ifndef GRAPHICS_DISABLED - pe = new ParamsEditor(this, image_win); -#endif - PGEventHandler pgEventHandler(this); - - image_win->AddEventHandler(&pgEventHandler); - image_win->AddMessageBox(); - - SVMenuNode* svMenuRoot = build_menu_new(); - - svMenuRoot->BuildMenu(image_win); - image_win->SetVisible(true); - - image_win->AwaitEvent(SVET_DESTROY); - image_win->AddEventHandler(nullptr); -} -} // namespace tesseract - - -/** - * pgeditor_msg() - * - * Display a message - in the command window if there is one, or to stdout - */ - -void pgeditor_msg( // message display - const char *msg) { - image_win->AddMessage(msg); -} - -/** - * pgeditor_show_point() - * - * Display the coordinates of a point in the command window - */ - -void pgeditor_show_point( // display coords - SVEvent *event) { - image_win->AddMessage("Pointing at(%d, %d)", event->x, event->y); -} - -/** - * process_cmd_win_event() - * - * Process a command returned from the command window - * (Just call the appropriate command handler) - */ - -namespace tesseract { -bool Tesseract::process_cmd_win_event( // UI command semantics - int32_t cmd_event, // which menu item? - char* new_value // any prompt data -) { - char msg[160]; - bool exit = false; - - color_mode = CM_RAINBOW; - - // Run recognition on the full page if needed. - switch (cmd_event) { - case BLAMER_CMD_EVENT: - case SHOW_SUBSCRIPT_CMD_EVENT: - case SHOW_SUPERSCRIPT_CMD_EVENT: - case SHOW_ITALIC_CMD_EVENT: - case SHOW_BOLD_CMD_EVENT: - case SHOW_UNDERLINE_CMD_EVENT: - case SHOW_FIXEDPITCH_CMD_EVENT: - case SHOW_SERIF_CMD_EVENT: - case SHOW_SMALLCAPS_CMD_EVENT: - case SHOW_DROPCAPS_CMD_EVENT: - if (!recog_done) { - recog_all_words(current_page_res, nullptr, nullptr, nullptr, 0); - recog_done = true; - } - break; - default: - break; - } - - char* parameter; - - switch (cmd_event) { - case NULL_CMD_EVENT: - break; - - case CHANGE_DISP_CMD_EVENT: - case DUMP_WERD_CMD_EVENT: - case SHOW_POINT_CMD_EVENT: - case SHOW_BLN_WERD_CMD_EVENT: - case RECOG_WERDS: - case RECOG_PSEUDO: - case SHOW_BLOB_FEATURES: - mode =(CMD_EVENTS) cmd_event; - break; - case DEBUG_WERD_CMD_EVENT: - mode = DEBUG_WERD_CMD_EVENT; - parameter = image_win->ShowInputDialog("Config File Name"); - word_config_ = parameter; - delete[] parameter; - break; - case BOUNDING_BOX_CMD_EVENT: - if (new_value[0] == 'T') - word_display_mode.turn_on_bit(DF_BOX); - else - word_display_mode.turn_off_bit(DF_BOX); - mode = CHANGE_DISP_CMD_EVENT; - break; - case BLAMER_CMD_EVENT: - if (new_value[0] == 'T') - word_display_mode.turn_on_bit(DF_BLAMER); - else - word_display_mode.turn_off_bit(DF_BLAMER); - do_re_display(&tesseract::Tesseract::word_display); - mode = CHANGE_DISP_CMD_EVENT; - break; - case CORRECT_TEXT_CMD_EVENT: - if (new_value[0] == 'T') - word_display_mode.turn_on_bit(DF_TEXT); - else - word_display_mode.turn_off_bit(DF_TEXT); - mode = CHANGE_DISP_CMD_EVENT; - break; - case POLYGONAL_CMD_EVENT: - if (new_value[0] == 'T') - word_display_mode.turn_on_bit(DF_POLYGONAL); - else - word_display_mode.turn_off_bit(DF_POLYGONAL); - mode = CHANGE_DISP_CMD_EVENT; - break; - case BL_NORM_CMD_EVENT: - if (new_value[0] == 'T') - word_display_mode.turn_on_bit(DF_BN_POLYGONAL); - else - word_display_mode.turn_off_bit(DF_BN_POLYGONAL); - mode = CHANGE_DISP_CMD_EVENT; - break; - case BITMAP_CMD_EVENT: - if (new_value[0] == 'T') - word_display_mode.turn_on_bit(DF_EDGE_STEP); - else - word_display_mode.turn_off_bit(DF_EDGE_STEP); - mode = CHANGE_DISP_CMD_EVENT; - break; - case UNIFORM_DISP_CMD_EVENT: - do_re_display(&tesseract::Tesseract::word_set_display); - break; - case IMAGE_CMD_EVENT: - display_image =(new_value[0] == 'T'); - do_re_display(&tesseract::Tesseract::word_display); - break; - case BLOCKS_CMD_EVENT: - display_blocks =(new_value[0] == 'T'); - do_re_display(&tesseract::Tesseract::word_display); - break; - case BASELINES_CMD_EVENT: - display_baselines =(new_value[0] == 'T'); - do_re_display(&tesseract::Tesseract::word_display); - break; - case SHOW_SUBSCRIPT_CMD_EVENT: - color_mode = CM_SUBSCRIPT; - do_re_display(&tesseract::Tesseract::word_display); - break; - case SHOW_SUPERSCRIPT_CMD_EVENT: - color_mode = CM_SUPERSCRIPT; - do_re_display(&tesseract::Tesseract::word_display); - break; - case SHOW_ITALIC_CMD_EVENT: - color_mode = CM_ITALIC; - do_re_display(&tesseract::Tesseract::word_display); - break; - case SHOW_BOLD_CMD_EVENT: - color_mode = CM_BOLD; - do_re_display(&tesseract::Tesseract::word_display); - break; - case SHOW_UNDERLINE_CMD_EVENT: - color_mode = CM_UNDERLINE; - do_re_display(&tesseract::Tesseract::word_display); - break; - case SHOW_FIXEDPITCH_CMD_EVENT: - color_mode = CM_FIXEDPITCH; - do_re_display(&tesseract::Tesseract::word_display); - break; - case SHOW_SERIF_CMD_EVENT: - color_mode = CM_SERIF; - do_re_display(&tesseract::Tesseract::word_display); - break; - case SHOW_SMALLCAPS_CMD_EVENT: - color_mode = CM_SMALLCAPS; - do_re_display(&tesseract::Tesseract::word_display); - break; - case SHOW_DROPCAPS_CMD_EVENT: - color_mode = CM_DROPCAPS; - do_re_display(&tesseract::Tesseract::word_display); - break; - case REFRESH_CMD_EVENT: - do_re_display(&tesseract::Tesseract::word_display); - break; - case QUIT_CMD_EVENT: - exit = true; - ScrollView::Exit(); - break; - - default: - snprintf(msg, sizeof(msg), "Unrecognised event %" PRId32 "(%s)", - cmd_event, new_value); - image_win->AddMessage(msg); - break; - } - return exit; -} - - -/** - * process_image_event() - * - * User has done something in the image window - mouse down or up. Work out - * what it is and do something with it. - * If DOWN - just remember where it was. - * If UP - for each word in the selected area do the operation defined by - * the current mode. - */ -void Tesseract::process_image_event( // action in image win - const SVEvent &event) { - // The following variable should remain static, since it is used by - // debug editor, which uses a single Tesseract instance. - static ICOORD down; - ICOORD up; - TBOX selection_box; - char msg[80]; - - switch(event.type) { - - case SVET_SELECTION: - if (event.type == SVET_SELECTION) { - down.set_x(event.x + event.x_size); - down.set_y(event.y + event.y_size); - if (mode == SHOW_POINT_CMD_EVENT) - show_point(current_page_res, event.x, event.y); - } - - up.set_x(event.x); - up.set_y(event.y); - - selection_box = TBOX(down, up); - - switch(mode) { - case CHANGE_DISP_CMD_EVENT: - process_selected_words( - current_page_res, - selection_box, - &tesseract::Tesseract::word_blank_and_set_display); - break; - case DUMP_WERD_CMD_EVENT: - process_selected_words(current_page_res, - selection_box, - &tesseract::Tesseract::word_dumper); - break; - case SHOW_BLN_WERD_CMD_EVENT: - process_selected_words(current_page_res, - selection_box, - &tesseract::Tesseract::word_bln_display); - break; - case DEBUG_WERD_CMD_EVENT: - debug_word(current_page_res, selection_box); - break; - case SHOW_POINT_CMD_EVENT: - break; // ignore up event - - case RECOG_WERDS: - #ifndef DISABLED_LEGACY_ENGINE - image_win->AddMessage("Recogging selected words"); - this->process_selected_words(current_page_res, - selection_box, - &Tesseract::recog_interactive); - #endif // ndef DISABLED_LEGACY_ENGINE - break; - case RECOG_PSEUDO: - image_win->AddMessage("Recogging selected blobs"); - recog_pseudo_word(current_page_res, selection_box); - break; - case SHOW_BLOB_FEATURES: - blob_feature_display(current_page_res, selection_box); - break; - - default: - sprintf(msg, "Mode %d not yet implemented", mode); - image_win->AddMessage(msg); - break; - } - default: - break; - } -} - -/** - * debug_word - * - * Process the whole image, but load word_config_ for the selected word(s). - */ -void Tesseract::debug_word(PAGE_RES* page_res, const TBOX &selection_box) { -#ifndef DISABLED_LEGACY_ENGINE - ResetAdaptiveClassifier(); -#endif - recog_all_words(page_res, nullptr, &selection_box, word_config_.string(), 0); -} -} // namespace tesseract - - -/** - * show_point() - * - * Show coords of point, blob bounding box, word bounding box and offset from - * row baseline - */ - -void show_point(PAGE_RES* page_res, float x, float y) { - FCOORD pt(x, y); - PAGE_RES_IT pr_it(page_res); - - const int kBufsize = 512; - char msg[kBufsize]; - char *msg_ptr = msg; - - msg_ptr += sprintf(msg_ptr, "Pt:(%0.3f, %0.3f) ", x, y); - - for (WERD_RES* word = pr_it.word(); word != nullptr; word = pr_it.forward()) { - if (pr_it.row() != pr_it.prev_row() && - pr_it.row()->row->bounding_box().contains(pt)) { - msg_ptr += sprintf(msg_ptr, "BL(x)=%0.3f ", - pr_it.row()->row->base_line(x)); - } - if (word->word->bounding_box().contains(pt)) { - TBOX box = word->word->bounding_box(); - msg_ptr += sprintf(msg_ptr, "Wd(%d, %d)/(%d, %d) ", - box.left(), box.bottom(), - box.right(), box.top()); - C_BLOB_IT cblob_it(word->word->cblob_list()); - for (cblob_it.mark_cycle_pt(); - !cblob_it.cycled_list(); - cblob_it.forward()) { - C_BLOB* cblob = cblob_it.data(); - box = cblob->bounding_box(); - if (box.contains(pt)) { - msg_ptr += sprintf(msg_ptr, - "CBlb(%d, %d)/(%d, %d) ", - box.left(), box.bottom(), - box.right(), box.top()); - } - } - } - } - image_win->AddMessage(msg); -} - - -/********************************************************************** - * WERD PROCESSOR FUNCTIONS - * ======================== - * - * These routines are invoked by one or more of: - * process_all_words() - * process_selected_words() - * or - * process_all_words_it() - * process_selected_words_it() - * for each word to be processed - **********************************************************************/ - -/** - * word_blank_and_set_display() Word processor - * - * Blank display of word then redisplay word according to current display mode - * settings - */ -#endif // GRAPHICS_DISABLED -namespace tesseract { -#ifndef GRAPHICS_DISABLED -bool Tesseract::word_blank_and_set_display(PAGE_RES_IT* pr_it) { - pr_it->word()->word->bounding_box().plot(image_win, ScrollView::BLACK, - ScrollView::BLACK); - return word_set_display(pr_it); -} - - -/** - * word_bln_display() - * - * Normalize word and display in word window - */ -bool Tesseract::word_bln_display(PAGE_RES_IT* pr_it) { - WERD_RES* word_res = pr_it->word(); - if (word_res->chopped_word == nullptr) { - // Setup word normalization parameters. - word_res->SetupForRecognition(unicharset, this, BestPix(), - tessedit_ocr_engine_mode, nullptr, - classify_bln_numeric_mode, - textord_use_cjk_fp_model, - poly_allow_detailed_fx, - pr_it->row()->row, pr_it->block()->block); - } - bln_word_window_handle()->Clear(); - display_bln_lines(bln_word_window_handle(), ScrollView::CYAN, - 1.0, 0.0f, -1000.0f, 1000.0f); - C_BLOB_IT it(word_res->word->cblob_list()); - ScrollView::Color color = WERD::NextColor(ScrollView::BLACK); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - it.data()->plot_normed(word_res->denorm, color, ScrollView::BROWN, - bln_word_window_handle()); - color = WERD::NextColor(color); - } - bln_word_window_handle()->Update(); - return true; -} - - - -/** - * word_display() Word Processor - * - * Display a word according to its display modes - */ -bool Tesseract::word_display(PAGE_RES_IT* pr_it) { - WERD_RES* word_res = pr_it->word(); - WERD* word = word_res->word; - TBOX word_bb; // word bounding box - int word_height; // ht of word BB - bool displayed_something = false; - float shift; // from bot left - - if (color_mode != CM_RAINBOW && word_res->box_word != nullptr) { - BoxWord* box_word = word_res->box_word; - WERD_CHOICE* best_choice = word_res->best_choice; - int length = box_word->length(); - if (word_res->fontinfo == nullptr) return false; - const FontInfo& font_info = *word_res->fontinfo; - for (int i = 0; i < length; ++i) { - ScrollView::Color color = ScrollView::GREEN; - switch (color_mode) { - case CM_SUBSCRIPT: - if (best_choice->BlobPosition(i) == SP_SUBSCRIPT) - color = ScrollView::RED; - break; - case CM_SUPERSCRIPT: - if (best_choice->BlobPosition(i) == SP_SUPERSCRIPT) - color = ScrollView::RED; - break; - case CM_ITALIC: - if (font_info.is_italic()) - color = ScrollView::RED; - break; - case CM_BOLD: - if (font_info.is_bold()) - color = ScrollView::RED; - break; - case CM_FIXEDPITCH: - if (font_info.is_fixed_pitch()) - color = ScrollView::RED; - break; - case CM_SERIF: - if (font_info.is_serif()) - color = ScrollView::RED; - break; - case CM_SMALLCAPS: - if (word_res->small_caps) - color = ScrollView::RED; - break; - case CM_DROPCAPS: - if (best_choice->BlobPosition(i) == SP_DROPCAP) - color = ScrollView::RED; - break; - // TODO(rays) underline is currently completely unsupported. - case CM_UNDERLINE: - default: - break; - } - image_win->Pen(color); - TBOX box = box_word->BlobBox(i); - image_win->Rectangle(box.left(), box.bottom(), box.right(), box.top()); - } - return true; - } - /* - Note the double coercions of(COLOUR)((int32_t)editor_image_word_bb_color) - etc. are to keep the compiler happy. - */ - // display bounding box - if (word->display_flag(DF_BOX)) { - word->bounding_box().plot(image_win, - (ScrollView::Color)((int32_t) - editor_image_word_bb_color), - (ScrollView::Color)((int32_t) - editor_image_word_bb_color)); - - ScrollView::Color c = (ScrollView::Color) - ((int32_t) editor_image_blob_bb_color); - image_win->Pen(c); - // cblob iterator - C_BLOB_IT c_it(word->cblob_list()); - for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) - c_it.data()->bounding_box().plot(image_win); - displayed_something = true; - } - - // display edge steps - if (word->display_flag(DF_EDGE_STEP)) { // edgesteps available - word->plot(image_win); // rainbow colors - displayed_something = true; - } - - // display poly approx - if (word->display_flag(DF_POLYGONAL)) { - // need to convert - TWERD* tword = TWERD::PolygonalCopy(poly_allow_detailed_fx, word); - tword->plot(image_win); - delete tword; - displayed_something = true; - } - - // Display correct text and blamer information. - STRING text; - STRING blame; - if (word->display_flag(DF_TEXT) && word->text() != nullptr) { - text = word->text(); - } - if (word->display_flag(DF_BLAMER) && - !(word_res->blamer_bundle != nullptr && - word_res->blamer_bundle->incorrect_result_reason() == IRR_CORRECT)) { - text = ""; - const BlamerBundle *blamer_bundle = word_res->blamer_bundle; - if (blamer_bundle == nullptr) { - text += "NULL"; - } else { - text = blamer_bundle->TruthString(); - } - text += " -> "; - STRING best_choice_str; - if (word_res->best_choice == nullptr) { - best_choice_str = "NULL"; - } else { - word_res->best_choice->string_and_lengths(&best_choice_str, nullptr); - } - text += best_choice_str; - IncorrectResultReason reason = (blamer_bundle == nullptr) ? - IRR_PAGE_LAYOUT : blamer_bundle->incorrect_result_reason(); - ASSERT_HOST(reason < IRR_NUM_REASONS) - blame += " ["; - blame += BlamerBundle::IncorrectReasonName(reason); - blame += "]"; - } - if (text.length() > 0) { - word_bb = word->bounding_box(); - image_win->Pen(ScrollView::RED); - word_height = word_bb.height(); - int text_height = 0.50 * word_height; - if (text_height > 20) text_height = 20; - image_win->TextAttributes("Arial", text_height, false, false, false); - shift = (word_height < word_bb.width()) ? 0.25 * word_height : 0.0f; - image_win->Text(word_bb.left() + shift, - word_bb.bottom() + 0.25 * word_height, text.string()); - if (blame.length() > 0) { - image_win->Text(word_bb.left() + shift, - word_bb.bottom() + 0.25 * word_height - text_height, - blame.string()); - } - - displayed_something = true; - } - - if (!displayed_something) // display BBox anyway - word->bounding_box().plot(image_win, - (ScrollView::Color)((int32_t) editor_image_word_bb_color), - (ScrollView::Color)((int32_t) - editor_image_word_bb_color)); - return true; -} -#endif // GRAPHICS_DISABLED - -/** - * word_dumper() - * - * Dump members to the debug window - */ -bool Tesseract::word_dumper(PAGE_RES_IT* pr_it) { - if (pr_it->block()->block != nullptr) { - tprintf("\nBlock data...\n"); - pr_it->block()->block->print(nullptr, false); - } - tprintf("\nRow data...\n"); - pr_it->row()->row->print(nullptr); - tprintf("\nWord data...\n"); - WERD_RES* word_res = pr_it->word(); - word_res->word->print(); - if (word_res->blamer_bundle != nullptr && wordrec_debug_blamer && - word_res->blamer_bundle->incorrect_result_reason() != IRR_CORRECT) { - tprintf("Current blamer debug: %s\n", - word_res->blamer_bundle->debug().string()); - } - return true; -} - -#ifndef GRAPHICS_DISABLED -/** - * word_set_display() Word processor - * - * Display word according to current display mode settings - */ -bool Tesseract::word_set_display(PAGE_RES_IT* pr_it) { - WERD* word = pr_it->word()->word; - word->set_display_flag(DF_BOX, word_display_mode.bit(DF_BOX)); - word->set_display_flag(DF_TEXT, word_display_mode.bit(DF_TEXT)); - word->set_display_flag(DF_POLYGONAL, word_display_mode.bit(DF_POLYGONAL)); - word->set_display_flag(DF_EDGE_STEP, word_display_mode.bit(DF_EDGE_STEP)); - word->set_display_flag(DF_BN_POLYGONAL, - word_display_mode.bit(DF_BN_POLYGONAL)); - word->set_display_flag(DF_BLAMER, word_display_mode.bit(DF_BLAMER)); - return word_display(pr_it); -} - - -// page_res is non-const because the iterator doesn't know if you are going -// to change the items it points to! Really a const here though. -void Tesseract::blob_feature_display(PAGE_RES* page_res, - const TBOX& selection_box) { -#ifndef DISABLED_LEGACY_ENGINE - PAGE_RES_IT* it = make_pseudo_word(page_res, selection_box); - if (it != nullptr) { - WERD_RES* word_res = it->word(); - word_res->x_height = it->row()->row->x_height(); - word_res->SetupForRecognition(unicharset, this, BestPix(), - tessedit_ocr_engine_mode, nullptr, - classify_bln_numeric_mode, - textord_use_cjk_fp_model, - poly_allow_detailed_fx, - it->row()->row, it->block()->block); - TWERD* bln_word = word_res->chopped_word; - TBLOB* bln_blob = bln_word->blobs[0]; - INT_FX_RESULT_STRUCT fx_info; - GenericVector bl_features; - GenericVector cn_features; - Classify::ExtractFeatures(*bln_blob, classify_nonlinear_norm, &bl_features, - &cn_features, &fx_info, nullptr); - // Display baseline features. - ScrollView* bl_win = CreateFeatureSpaceWindow("BL Features", 512, 0); - ClearFeatureSpaceWindow(baseline, bl_win); - for (int f = 0; f < bl_features.size(); ++f) - RenderIntFeature(bl_win, &bl_features[f], ScrollView::GREEN); - bl_win->Update(); - // Display cn features. - ScrollView* cn_win = CreateFeatureSpaceWindow("CN Features", 512, 0); - ClearFeatureSpaceWindow(character, cn_win); - for (int f = 0; f < cn_features.size(); ++f) - RenderIntFeature(cn_win, &cn_features[f], ScrollView::GREEN); - cn_win->Update(); - - it->DeleteCurrentWord(); - delete it; - } -#endif // ndef DISABLED_LEGACY_ENGINE -} - - -#endif // GRAPHICS_DISABLED - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pgedit.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pgedit.h deleted file mode 100644 index 6a8f48f7..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/pgedit.h +++ /dev/null @@ -1,79 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: pgedit.h -// Description: Page structure file editor -// Author: Joern Wanke -// Created: Wed Jul 18 10:05:01 PDT 2007 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef PGEDIT_H -#define PGEDIT_H - -#include "params.h" // for INT_VAR_H, IntParam, STRING_VAR_H, StringParam -#include "scrollview.h" // for SVEvent (ptr only), SVEventHandler, ScrollView - -class BLOCK_LIST; -class PAGE_RES; - -namespace tesseract { - class Tesseract; -} - -// A small event handler class to process incoming events to -// this window. -class PGEventHandler : public SVEventHandler { - public: - PGEventHandler(tesseract::Tesseract* tess) : tess_(tess) { - } - void Notify(const SVEvent* sve); - private: - tesseract::Tesseract* tess_; -}; - -extern BLOCK_LIST *current_block_list; -extern STRING_VAR_H (editor_image_win_name, "EditorImage", -"Editor image window name"); -extern INT_VAR_H (editor_image_xpos, 590, "Editor image X Pos"); -extern INT_VAR_H (editor_image_ypos, 10, "Editor image Y Pos"); -extern INT_VAR_H (editor_image_height, 680, "Editor image height"); -extern INT_VAR_H (editor_image_width, 655, "Editor image width"); -extern INT_VAR_H (editor_image_word_bb_color, BLUE, -"Word bounding box colour"); -extern INT_VAR_H (editor_image_blob_bb_color, YELLOW, -"Blob bounding box colour"); -extern INT_VAR_H (editor_image_text_color, WHITE, "Correct text colour"); -extern STRING_VAR_H (editor_dbwin_name, "EditorDBWin", -"Editor debug window name"); -extern INT_VAR_H (editor_dbwin_xpos, 50, "Editor debug window X Pos"); -extern INT_VAR_H (editor_dbwin_ypos, 500, "Editor debug window Y Pos"); -extern INT_VAR_H (editor_dbwin_height, 24, "Editor debug window height"); -extern INT_VAR_H (editor_dbwin_width, 80, "Editor debug window width"); -extern STRING_VAR_H (editor_word_name, "BlnWords", -"BL normalised word window"); -extern INT_VAR_H (editor_word_xpos, 60, "Word window X Pos"); -extern INT_VAR_H (editor_word_ypos, 510, "Word window Y Pos"); -extern INT_VAR_H (editor_word_height, 240, "Word window height"); -extern INT_VAR_H (editor_word_width, 655, "Word window width"); -extern double_VAR_H (editor_smd_scale_factor, 1.0, "Scaling for smd image"); - -ScrollView* bln_word_window_handle(); //return handle -void build_image_window(int width, int height); -void pgeditor_msg( //message display - const char *msg); -void pgeditor_show_point( //display coords - SVEvent *event); - //put bln word in box -void show_point(PAGE_RES* page_res, float x, float y); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/recogtraining.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/recogtraining.cpp deleted file mode 100644 index 13318057..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/recogtraining.cpp +++ /dev/null @@ -1,236 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: recogtraining.cpp -// Description: Functions for ambiguity and parameter training. -// Author: Daria Antonova -// Created: Mon Aug 13 11:26:43 PDT 2009 -// -// (C) Copyright 2009, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "tesseractclass.h" - -#include "boxread.h" -#include "control.h" -#include "host.h" -#include "ratngs.h" -#include "reject.h" -#include "stopper.h" - -namespace tesseract { - -const int16_t kMaxBoxEdgeDiff = 2; - -// Sets flags necessary for recognition in the training mode. -// Opens and returns the pointer to the output file. -FILE *Tesseract::init_recog_training(const STRING &fname) { - if (tessedit_ambigs_training) { - tessedit_tess_adaption_mode.set_value(0); // turn off adaption - tessedit_enable_doc_dict.set_value(0); // turn off document dictionary - // Explore all segmentations. - getDict().stopper_no_acceptable_choices.set_value(1); - } - - STRING output_fname = fname; - const char *lastdot = strrchr(output_fname.string(), '.'); - if (lastdot != nullptr) output_fname[lastdot - output_fname.string()] = '\0'; - output_fname += ".txt"; - FILE *output_file = fopen(output_fname.string(), "a+"); - if (output_file == nullptr) { - tprintf("Error: Could not open file %s\n", output_fname.string()); - ASSERT_HOST(output_file); - } - return output_file; -} - -// Copies the bounding box from page_res_it->word() to the given TBOX. -static bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox) { - while (page_res_it->block() != nullptr && page_res_it->word() == nullptr) - page_res_it->forward(); - - if (page_res_it->word() != nullptr) { - *tbox = page_res_it->word()->word->bounding_box(); - - // If tbox->left() is negative, the training image has vertical text and - // all the coordinates of bounding boxes of page_res are rotated by 90 - // degrees in a counterclockwise direction. We need to rotate the TBOX back - // in order to compare with the TBOXes of box files. - if (tbox->left() < 0) { - tbox->rotate(FCOORD(0.0, -1.0)); - } - - return true; - } else { - return false; - } -} - -// This function takes tif/box pair of files and runs recognition on the image, -// while making sure that the word bounds that tesseract identified roughly -// match to those specified by the input box file. For each word (ngram in a -// single bounding box from the input box file) it outputs the ocred result, -// the correct label, rating and certainty. -void Tesseract::recog_training_segmented(const STRING &fname, - PAGE_RES *page_res, - volatile ETEXT_DESC *monitor, - FILE *output_file) { - STRING box_fname = fname; - const char *lastdot = strrchr(box_fname.string(), '.'); - if (lastdot != nullptr) box_fname[lastdot - box_fname.string()] = '\0'; - box_fname += ".box"; - // ReadNextBox() will close box_file - FILE *box_file = fopen(box_fname.string(), "r"); - if (box_file == nullptr) { - tprintf("Error: Could not open file %s\n", box_fname.string()); - ASSERT_HOST(box_file); - } - - PAGE_RES_IT page_res_it; - page_res_it.page_res = page_res; - page_res_it.restart_page(); - STRING label; - - // Process all the words on this page. - TBOX tbox; // tesseract-identified box - TBOX bbox; // box from the box file - bool keep_going; - int line_number = 0; - int examined_words = 0; - do { - keep_going = read_t(&page_res_it, &tbox); - keep_going &= ReadNextBox(applybox_page, &line_number, box_file, &label, - &bbox); - // Align bottom left points of the TBOXes. - while (keep_going && - !NearlyEqual(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) { - if (bbox.bottom() < tbox.bottom()) { - page_res_it.forward(); - keep_going = read_t(&page_res_it, &tbox); - } else { - keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label, - &bbox); - } - } - while (keep_going && - !NearlyEqual(tbox.left(), bbox.left(), kMaxBoxEdgeDiff)) { - if (bbox.left() > tbox.left()) { - page_res_it.forward(); - keep_going = read_t(&page_res_it, &tbox); - } else { - keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label, - &bbox); - } - } - // OCR the word if top right points of the TBOXes are similar. - if (keep_going && - NearlyEqual(tbox.right(), bbox.right(), kMaxBoxEdgeDiff) && - NearlyEqual(tbox.top(), bbox.top(), kMaxBoxEdgeDiff)) { - ambigs_classify_and_output(label.string(), &page_res_it, output_file); - examined_words++; - } - page_res_it.forward(); - } while (keep_going); - - // Set up scripts on all of the words that did not get sent to - // ambigs_classify_and_output. They all should have, but if all the - // werd_res's don't get uch_sets, tesseract will crash when you try - // to iterate over them. :-( - int total_words = 0; - for (page_res_it.restart_page(); page_res_it.block() != nullptr; - page_res_it.forward()) { - if (page_res_it.word()) { - if (page_res_it.word()->uch_set == nullptr) - page_res_it.word()->SetupFake(unicharset); - total_words++; - } - } - if (examined_words < 0.85 * total_words) { - tprintf("TODO(antonova): clean up recog_training_segmented; " - " It examined only a small fraction of the ambigs image.\n"); - } - tprintf("recog_training_segmented: examined %d / %d words.\n", - examined_words, total_words); -} - -// Helper prints the given set of blob choices. -static void PrintPath(int length, const BLOB_CHOICE** blob_choices, - const UNICHARSET& unicharset, - const char *label, FILE *output_file) { - float rating = 0.0f; - float certainty = 0.0f; - for (int i = 0; i < length; ++i) { - const BLOB_CHOICE* blob_choice = blob_choices[i]; - fprintf(output_file, "%s", - unicharset.id_to_unichar(blob_choice->unichar_id())); - rating += blob_choice->rating(); - if (certainty > blob_choice->certainty()) - certainty = blob_choice->certainty(); - } - fprintf(output_file, "\t%s\t%.4f\t%.4f\n", - label, rating, certainty); -} - -// Helper recursively prints all paths through the ratings matrix, starting -// at column col. -static void PrintMatrixPaths(int col, int dim, - const MATRIX& ratings, - int length, const BLOB_CHOICE** blob_choices, - const UNICHARSET& unicharset, - const char *label, FILE *output_file) { - for (int row = col; row < dim && row - col < ratings.bandwidth(); ++row) { - if (ratings.get(col, row) != NOT_CLASSIFIED) { - BLOB_CHOICE_IT bc_it(ratings.get(col, row)); - for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { - blob_choices[length] = bc_it.data(); - if (row + 1 < dim) { - PrintMatrixPaths(row + 1, dim, ratings, length + 1, blob_choices, - unicharset, label, output_file); - } else { - PrintPath(length + 1, blob_choices, unicharset, label, output_file); - } - } - } - } -} - -// Runs classify_word_pass1() on the current word. Outputs Tesseract's -// raw choice as a result of the classification. For words labeled with a -// single unichar also outputs all alternatives from blob_choices of the -// best choice. -void Tesseract::ambigs_classify_and_output(const char *label, - PAGE_RES_IT* pr_it, - FILE *output_file) { - // Classify word. - fflush(stdout); - WordData word_data(*pr_it); - SetupWordPassN(1, &word_data); - classify_word_and_language(1, pr_it, &word_data); - WERD_RES* werd_res = word_data.word; - WERD_CHOICE *best_choice = werd_res->best_choice; - ASSERT_HOST(best_choice != nullptr); - - // Compute the number of unichars in the label. - GenericVector encoding; - if (!unicharset.encode_string(label, true, &encoding, nullptr, nullptr)) { - tprintf("Not outputting illegal unichar %s\n", label); - return; - } - - // Dump all paths through the ratings matrix (which is normally small). - int dim = werd_res->ratings->dimension(); - const BLOB_CHOICE** blob_choices = new const BLOB_CHOICE*[dim]; - PrintMatrixPaths(0, dim, *werd_res->ratings, 0, blob_choices, - unicharset, label, output_file); - delete [] blob_choices; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/reject.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/reject.cpp deleted file mode 100644 index bc4623a3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/reject.cpp +++ /dev/null @@ -1,799 +0,0 @@ -/********************************************************************** - * File: reject.cpp (Formerly reject.c) - * Description: Rejection functions used in tessedit - * Author: Phil Cheatle - * Created: Wed Sep 23 16:50:21 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#ifdef DISABLED_LEGACY_ENGINE - -#include "tesseractclass.h" - -namespace tesseract { - -int16_t Tesseract::safe_dict_word(const WERD_RES *werd_res) { - const WERD_CHOICE &word = *werd_res->best_choice; - int dict_word_type = werd_res->tesseract->dict_word(word); - return dict_word_type == DOC_DAWG_PERM ? 0 : dict_word_type; -} -} // namespace tesseract - -#else - -#include "tessvars.h" -#include -#include -#include -#include "genericvector.h" -#include "reject.h" -#include "control.h" -#include "docqual.h" -#include "globaloc.h" // For err_exit. -#include "globals.h" -#include "helpers.h" - -#include "tesseractclass.h" - - -CLISTIZEH (STRING) CLISTIZE (STRING) - -/************************************************************************* - * set_done() - * - * Set the done flag based on the word acceptability criteria - *************************************************************************/ - -namespace tesseract { -void Tesseract::set_done(WERD_RES *word, int16_t pass) { - word->done = word->tess_accepted && - (strchr(word->best_choice->unichar_string().string(), ' ') == nullptr); - bool word_is_ambig = word->best_choice->dangerous_ambig_found(); - bool word_from_dict = word->best_choice->permuter() == SYSTEM_DAWG_PERM || - word->best_choice->permuter() == FREQ_DAWG_PERM || - word->best_choice->permuter() == USER_DAWG_PERM; - if (word->done && (pass == 1) && (!word_from_dict || word_is_ambig) && - one_ell_conflict(word, false)) { - if (tessedit_rejection_debug) tprintf("one_ell_conflict detected\n"); - word->done = FALSE; - } - if (word->done && ((!word_from_dict && - word->best_choice->permuter() != NUMBER_PERM) || word_is_ambig)) { - if (tessedit_rejection_debug) tprintf("non-dict or ambig word detected\n"); - word->done = FALSE; - } - if (tessedit_rejection_debug) { - tprintf("set_done(): done=%d\n", word->done); - word->best_choice->print(""); - } -} - - -/************************************************************************* - * make_reject_map() - * - * Sets the done flag to indicate whether the resylt is acceptable. - * - * Sets a reject map for the word. - *************************************************************************/ -void Tesseract::make_reject_map(WERD_RES *word, ROW *row, int16_t pass) { - int i; - int offset; - - flip_0O(word); - check_debug_pt(word, -1); // For trap only - set_done(word, pass); // Set acceptance - word->reject_map.initialise(word->best_choice->unichar_lengths().length()); - reject_blanks(word); - /* - 0: Rays original heuristic - the baseline - */ - if (tessedit_reject_mode == 0) { - if (!word->done) - reject_poor_matches(word); - } else if (tessedit_reject_mode == 5) { - /* - 5: Reject I/1/l from words where there is no strong contextual confirmation; - the whole of any unacceptable words (incl PERM rej of dubious 1/I/ls); - and the whole of any words which are very small - */ - if (kBlnXHeight / word->denorm.y_scale() <= min_sane_x_ht_pixels) { - word->reject_map.rej_word_small_xht(); - } else { - one_ell_conflict(word, true); - /* - Originally the code here just used the done flag. Now I have duplicated - and unpacked the conditions for setting the done flag so that each - mechanism can be turned on or off independently. This works WITHOUT - affecting the done flag setting. - */ - if (rej_use_tess_accepted && !word->tess_accepted) - word->reject_map.rej_word_not_tess_accepted (); - - if (rej_use_tess_blanks && - (strchr (word->best_choice->unichar_string().string (), ' ') != nullptr)) - word->reject_map.rej_word_contains_blanks (); - - WERD_CHOICE* best_choice = word->best_choice; - if (rej_use_good_perm) { - if ((best_choice->permuter() == SYSTEM_DAWG_PERM || - best_choice->permuter() == FREQ_DAWG_PERM || - best_choice->permuter() == USER_DAWG_PERM) && - (!rej_use_sensible_wd || - acceptable_word_string(*word->uch_set, - best_choice->unichar_string().string(), - best_choice->unichar_lengths().string()) != - AC_UNACCEPTABLE)) { - // PASSED TEST - } else if (best_choice->permuter() == NUMBER_PERM) { - if (rej_alphas_in_number_perm) { - for (i = 0, offset = 0; - best_choice->unichar_string()[offset] != '\0'; - offset += best_choice->unichar_lengths()[i++]) { - if (word->reject_map[i].accepted() && - word->uch_set->get_isalpha( - best_choice->unichar_string().string() + offset, - best_choice->unichar_lengths()[i])) - word->reject_map[i].setrej_bad_permuter(); - // rej alpha - } - } - } else { - word->reject_map.rej_word_bad_permuter(); - } - } - /* Ambig word rejection was here once !!*/ - } - } else { - tprintf("BAD tessedit_reject_mode\n"); - err_exit(); - } - - if (tessedit_image_border > -1) - reject_edge_blobs(word); - - check_debug_pt (word, 10); - if (tessedit_rejection_debug) { - tprintf("Permuter Type = %d\n", word->best_choice->permuter ()); - tprintf("Certainty: %f Rating: %f\n", - word->best_choice->certainty (), word->best_choice->rating ()); - tprintf("Dict word: %d\n", dict_word(*(word->best_choice))); - } - - flip_hyphens(word); - check_debug_pt(word, 20); -} -} // namespace tesseract - - -void reject_blanks(WERD_RES *word) { - int16_t i; - int16_t offset; - - for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0'; - offset += word->best_choice->unichar_lengths()[i], i += 1) { - if (word->best_choice->unichar_string()[offset] == ' ') - //rej unrecognised blobs - word->reject_map[i].setrej_tess_failure (); - } -} - -namespace tesseract { -void Tesseract::reject_I_1_L(WERD_RES *word) { - int16_t i; - int16_t offset; - - for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0'; - offset += word->best_choice->unichar_lengths()[i], i += 1) { - if (STRING (conflict_set_I_l_1). - contains (word->best_choice->unichar_string()[offset])) { - //rej 1Il conflict - word->reject_map[i].setrej_1Il_conflict (); - } - } -} -} // namespace tesseract - - -void reject_poor_matches(WERD_RES *word) { - float threshold = compute_reject_threshold(word->best_choice); - for (int i = 0; i < word->best_choice->length(); ++i) { - if (word->best_choice->unichar_id(i) == UNICHAR_SPACE) - word->reject_map[i].setrej_tess_failure(); - else if (word->best_choice->certainty(i) < threshold) - word->reject_map[i].setrej_poor_match(); - } -} - - -/********************************************************************** - * compute_reject_threshold - * - * Set a rejection threshold for this word. - * Initially this is a trivial function which looks for the largest - * gap in the certainty value. - **********************************************************************/ - -float compute_reject_threshold(WERD_CHOICE* word) { - float threshold; // rejection threshold - float bestgap = 0.0f; // biggest gap - float gapstart; // bottom of gap - - int blob_count = word->length(); - GenericVector ratings; - ratings.resize_no_init(blob_count); - for (int i = 0; i < blob_count; ++i) { - ratings[i] = word->certainty(i); - } - ratings.sort(); - gapstart = ratings[0] - 1; // all reject if none better - if (blob_count >= 3) { - for (int index = 0; index < blob_count - 1; index++) { - if (ratings[index + 1] - ratings[index] > bestgap) { - bestgap = ratings[index + 1] - ratings[index]; - // find biggest - gapstart = ratings[index]; - } - } - } - threshold = gapstart + bestgap / 2; - - return threshold; -} - - -/************************************************************************* - * reject_edge_blobs() - * - * If the word is perilously close to the edge of the image, reject those blobs - * in the word which are too close to the edge as they could be clipped. - *************************************************************************/ -namespace tesseract { -void Tesseract::reject_edge_blobs(WERD_RES *word) { - TBOX word_box = word->word->bounding_box(); - // Use the box_word as it is already denormed back to image coordinates. - int blobcount = word->box_word->length(); - - if (word_box.left() < tessedit_image_border || - word_box.bottom() < tessedit_image_border || - word_box.right() + tessedit_image_border > ImageWidth() - 1 || - word_box.top() + tessedit_image_border > ImageHeight() - 1) { - ASSERT_HOST(word->reject_map.length() == blobcount); - for (int blobindex = 0; blobindex < blobcount; blobindex++) { - TBOX blob_box = word->box_word->BlobBox(blobindex); - if (blob_box.left() < tessedit_image_border || - blob_box.bottom() < tessedit_image_border || - blob_box.right() + tessedit_image_border > ImageWidth() - 1 || - blob_box.top() + tessedit_image_border > ImageHeight() - 1) { - word->reject_map[blobindex].setrej_edge_char(); - // Close to edge - } - } - } -} - -/********************************************************************** - * one_ell_conflict() - * - * Identify words where there is a potential I/l/1 error. - * - A bundle of contextual heuristics! - **********************************************************************/ -bool Tesseract::one_ell_conflict(WERD_RES* word_res, bool update_map) { - const char *word; - const char *lengths; - int16_t word_len; //its length - int16_t first_alphanum_index_; - int16_t first_alphanum_offset_; - int16_t i; - int16_t offset; - bool non_conflict_set_char; //non conf set a/n? - bool conflict = false; - bool allow_1s; - ACCEPTABLE_WERD_TYPE word_type; - bool dict_perm_type; - bool dict_word_ok; - int dict_word_type; - - word = word_res->best_choice->unichar_string().string (); - lengths = word_res->best_choice->unichar_lengths().string(); - word_len = strlen(lengths); - /* - If there are no occurrences of the conflict set characters then the word - is OK. - */ - if (strpbrk(word, conflict_set_I_l_1.string ()) == nullptr) - return false; - - /* - There is a conflict if there are NO other (confirmed) alphanumerics apart - from those in the conflict set. - */ - - for (i = 0, offset = 0, non_conflict_set_char = false; - (i < word_len) && !non_conflict_set_char; offset += lengths[i++]) - non_conflict_set_char = - (word_res->uch_set->get_isalpha(word + offset, lengths[i]) || - word_res->uch_set->get_isdigit(word + offset, lengths[i])) && - !STRING (conflict_set_I_l_1).contains (word[offset]); - if (!non_conflict_set_char) { - if (update_map) - reject_I_1_L(word_res); - return true; - } - - /* - If the word is accepted by a dawg permuter, and the first alpha character - is "I" or "l", check to see if the alternative is also a dawg word. If it - is, then there is a potential error otherwise the word is ok. - */ - - dict_perm_type = (word_res->best_choice->permuter () == SYSTEM_DAWG_PERM) || - (word_res->best_choice->permuter () == USER_DAWG_PERM) || - (rej_trust_doc_dawg && - (word_res->best_choice->permuter () == DOC_DAWG_PERM)) || - (word_res->best_choice->permuter () == FREQ_DAWG_PERM); - dict_word_type = dict_word(*(word_res->best_choice)); - dict_word_ok = (dict_word_type > 0) && - (rej_trust_doc_dawg || (dict_word_type != DOC_DAWG_PERM)); - - if ((rej_1Il_use_dict_word && dict_word_ok) || - (rej_1Il_trust_permuter_type && dict_perm_type) || - (dict_perm_type && dict_word_ok)) { - first_alphanum_index_ = first_alphanum_index (word, lengths); - first_alphanum_offset_ = first_alphanum_offset (word, lengths); - if (lengths[first_alphanum_index_] == 1 && - word[first_alphanum_offset_] == 'I') { - word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l'; - if (safe_dict_word(word_res) > 0) { - word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I'; - if (update_map) - word_res->reject_map[first_alphanum_index_]. - setrej_1Il_conflict(); - return true; - } - else { - word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I'; - return false; - } - } - - if (lengths[first_alphanum_index_] == 1 && - word[first_alphanum_offset_] == 'l') { - word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I'; - if (safe_dict_word(word_res) > 0) { - word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l'; - if (update_map) - word_res->reject_map[first_alphanum_index_]. - setrej_1Il_conflict(); - return true; - } - else { - word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l'; - return false; - } - } - return false; - } - - /* - NEW 1Il code. The old code relied on permuter types too much. In fact, - tess will use TOP_CHOICE permute for good things like "palette". - In this code the string is examined independently to see if it looks like - a well formed word. - */ - - /* - REGARDLESS OF PERMUTER, see if flipping a leading I/l generates a - dictionary word. - */ - first_alphanum_index_ = first_alphanum_index (word, lengths); - first_alphanum_offset_ = first_alphanum_offset (word, lengths); - if (lengths[first_alphanum_index_] == 1 && - word[first_alphanum_offset_] == 'l') { - word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I'; - if (safe_dict_word(word_res) > 0) - return false; - else - word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l'; - } - else if (lengths[first_alphanum_index_] == 1 && - word[first_alphanum_offset_] == 'I') { - word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l'; - if (safe_dict_word(word_res) > 0) - return false; - else - word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I'; - } - /* - For strings containing digits: - If there are no alphas OR the numeric permuter liked the word, - reject any non 1 conflict chs - Else reject all conflict chs - */ - if (word_contains_non_1_digit (word, lengths)) { - allow_1s = (alpha_count (word, lengths) == 0) || - (word_res->best_choice->permuter () == NUMBER_PERM); - - int16_t offset; - conflict = false; - for (i = 0, offset = 0; word[offset] != '\0'; - offset += word_res->best_choice->unichar_lengths()[i++]) { - if ((!allow_1s || (word[offset] != '1')) && - STRING (conflict_set_I_l_1).contains (word[offset])) { - if (update_map) - word_res->reject_map[i].setrej_1Il_conflict (); - conflict = true; - } - } - return conflict; - } - /* - For anything else. See if it conforms to an acceptable word type. If so, - treat accordingly. - */ - word_type = acceptable_word_string(*word_res->uch_set, word, lengths); - if ((word_type == AC_LOWER_CASE) || (word_type == AC_INITIAL_CAP)) { - first_alphanum_index_ = first_alphanum_index (word, lengths); - first_alphanum_offset_ = first_alphanum_offset (word, lengths); - if (STRING (conflict_set_I_l_1).contains (word[first_alphanum_offset_])) { - if (update_map) - word_res->reject_map[first_alphanum_index_]. - setrej_1Il_conflict (); - return true; - } - else - return false; - } - else if (word_type == AC_UPPER_CASE) { - return false; - } - else { - if (update_map) - reject_I_1_L(word_res); - return true; - } -} - - -int16_t Tesseract::first_alphanum_index(const char *word, - const char *word_lengths) { - int16_t i; - int16_t offset; - - for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) { - if (unicharset.get_isalpha(word + offset, word_lengths[i]) || - unicharset.get_isdigit(word + offset, word_lengths[i])) - return i; - } - return -1; -} - -int16_t Tesseract::first_alphanum_offset(const char *word, - const char *word_lengths) { - int16_t i; - int16_t offset; - - for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) { - if (unicharset.get_isalpha(word + offset, word_lengths[i]) || - unicharset.get_isdigit(word + offset, word_lengths[i])) - return offset; - } - return -1; -} - -int16_t Tesseract::alpha_count(const char *word, - const char *word_lengths) { - int16_t i; - int16_t offset; - int16_t count = 0; - - for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) { - if (unicharset.get_isalpha (word + offset, word_lengths[i])) - count++; - } - return count; -} - - -bool Tesseract::word_contains_non_1_digit(const char* word, - const char* word_lengths) { - int16_t i; - int16_t offset; - - for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) { - if (unicharset.get_isdigit (word + offset, word_lengths[i]) && - (word_lengths[i] != 1 || word[offset] != '1')) - return true; - } - return false; -} - -/************************************************************************* - * dont_allow_1Il() - * Don't unreject LONE accepted 1Il conflict set chars - *************************************************************************/ -void Tesseract::dont_allow_1Il(WERD_RES *word) { - int i = 0; - int offset; - int word_len = word->reject_map.length(); - const char *s = word->best_choice->unichar_string().string(); - const char *lengths = word->best_choice->unichar_lengths().string(); - bool accepted_1Il = false; - - for (i = 0, offset = 0; i < word_len; - offset += word->best_choice->unichar_lengths()[i++]) { - if (word->reject_map[i].accepted()) { - if (STRING(conflict_set_I_l_1).contains(s[offset])) { - accepted_1Il = true; - } else { - if (word->uch_set->get_isalpha(s + offset, lengths[i]) || - word->uch_set->get_isdigit(s + offset, lengths[i])) - return; // >=1 non 1Il ch accepted - } - } - } - if (!accepted_1Il) - return; //Nothing to worry about - - for (i = 0, offset = 0; i < word_len; - offset += word->best_choice->unichar_lengths()[i++]) { - if (STRING(conflict_set_I_l_1).contains(s[offset]) && - word->reject_map[i].accepted()) - word->reject_map[i].setrej_postNN_1Il(); - } -} - - -int16_t Tesseract::count_alphanums(WERD_RES *word_res) { - int count = 0; - const WERD_CHOICE *best_choice = word_res->best_choice; - for (int i = 0; i < word_res->reject_map.length(); ++i) { - if ((word_res->reject_map[i].accepted()) && - (word_res->uch_set->get_isalpha(best_choice->unichar_id(i)) || - word_res->uch_set->get_isdigit(best_choice->unichar_id(i)))) { - count++; - } - } - return count; -} - - -// reject all if most rejected. -void Tesseract::reject_mostly_rejects(WERD_RES *word) { - /* Reject the whole of the word if the fraction of rejects exceeds a limit */ - - if ((float) word->reject_map.reject_count() / word->reject_map.length() >= - rej_whole_of_mostly_reject_word_fract) - word->reject_map.rej_word_mostly_rej(); -} - - -bool Tesseract::repeated_nonalphanum_wd(WERD_RES* word, ROW* row) { - int16_t char_quality; - int16_t accepted_char_quality; - - if (word->best_choice->unichar_lengths().length() <= 1) - return false; - - if (!STRING(ok_repeated_ch_non_alphanum_wds). - contains(word->best_choice->unichar_string()[0])) - return false; - - UNICHAR_ID uch_id = word->best_choice->unichar_id(0); - for (int i = 1; i < word->best_choice->length(); ++i) { - if (word->best_choice->unichar_id(i) != uch_id) return false; - } - - word_char_quality(word, row, &char_quality, &accepted_char_quality); - - if ((word->best_choice->unichar_lengths().length () == char_quality) && - (char_quality == accepted_char_quality)) - return true; - else - return false; -} - -int16_t Tesseract::safe_dict_word(const WERD_RES *werd_res) { - const WERD_CHOICE &word = *werd_res->best_choice; - int dict_word_type = werd_res->tesseract->dict_word(word); - return dict_word_type == DOC_DAWG_PERM ? 0 : dict_word_type; -} - -// Note: After running this function word_res->ratings -// might not contain the right BLOB_CHOICE corresponding to each character -// in word_res->best_choice. -void Tesseract::flip_hyphens(WERD_RES *word_res) { - WERD_CHOICE *best_choice = word_res->best_choice; - int i; - int prev_right = -9999; - int next_left; - TBOX out_box; - float aspect_ratio; - - if (tessedit_lower_flip_hyphen <= 1) - return; - - int num_blobs = word_res->rebuild_word->NumBlobs(); - UNICHAR_ID unichar_dash = word_res->uch_set->unichar_to_id("-"); - for (i = 0; i < best_choice->length() && i < num_blobs; ++i) { - TBLOB* blob = word_res->rebuild_word->blobs[i]; - out_box = blob->bounding_box(); - if (i + 1 == num_blobs) - next_left = 9999; - else - next_left = word_res->rebuild_word->blobs[i + 1]->bounding_box().left(); - // Don't touch small or touching blobs - it is too dangerous. - if ((out_box.width() > 8 * word_res->denorm.x_scale()) && - (out_box.left() > prev_right) && (out_box.right() < next_left)) { - aspect_ratio = out_box.width() / (float) out_box.height(); - if (word_res->uch_set->eq(best_choice->unichar_id(i), ".")) { - if (aspect_ratio >= tessedit_upper_flip_hyphen && - word_res->uch_set->contains_unichar_id(unichar_dash) && - word_res->uch_set->get_enabled(unichar_dash)) { - /* Certain HYPHEN */ - best_choice->set_unichar_id(unichar_dash, i); - if (word_res->reject_map[i].rejected()) - word_res->reject_map[i].setrej_hyphen_accept(); - } - if ((aspect_ratio > tessedit_lower_flip_hyphen) && - word_res->reject_map[i].accepted()) - //Suspected HYPHEN - word_res->reject_map[i].setrej_hyphen (); - } - else if (best_choice->unichar_id(i) == unichar_dash) { - if ((aspect_ratio >= tessedit_upper_flip_hyphen) && - (word_res->reject_map[i].rejected())) - word_res->reject_map[i].setrej_hyphen_accept(); - //Certain HYPHEN - - if ((aspect_ratio <= tessedit_lower_flip_hyphen) && - (word_res->reject_map[i].accepted())) - //Suspected HYPHEN - word_res->reject_map[i].setrej_hyphen(); - } - } - prev_right = out_box.right(); - } -} - -// Note: After running this function word_res->ratings -// might not contain the right BLOB_CHOICE corresponding to each character -// in word_res->best_choice. -void Tesseract::flip_0O(WERD_RES *word_res) { - WERD_CHOICE *best_choice = word_res->best_choice; - int i; - TBOX out_box; - - if (!tessedit_flip_0O) - return; - - int num_blobs = word_res->rebuild_word->NumBlobs(); - for (i = 0; i < best_choice->length() && i < num_blobs; ++i) { - TBLOB* blob = word_res->rebuild_word->blobs[i]; - if (word_res->uch_set->get_isupper(best_choice->unichar_id(i)) || - word_res->uch_set->get_isdigit(best_choice->unichar_id(i))) { - out_box = blob->bounding_box(); - if ((out_box.top() < kBlnBaselineOffset + kBlnXHeight) || - (out_box.bottom() > kBlnBaselineOffset + kBlnXHeight / 4)) - return; //Beware words with sub/superscripts - } - } - UNICHAR_ID unichar_0 = word_res->uch_set->unichar_to_id("0"); - UNICHAR_ID unichar_O = word_res->uch_set->unichar_to_id("O"); - if (unichar_0 == INVALID_UNICHAR_ID || - !word_res->uch_set->get_enabled(unichar_0) || - unichar_O == INVALID_UNICHAR_ID || - !word_res->uch_set->get_enabled(unichar_O)) { - return; // 0 or O are not present/enabled in unicharset - } - for (i = 1; i < best_choice->length(); ++i) { - if (best_choice->unichar_id(i) == unichar_0 || - best_choice->unichar_id(i) == unichar_O) { - /* A0A */ - if ((i+1) < best_choice->length() && - non_O_upper(*word_res->uch_set, best_choice->unichar_id(i-1)) && - non_O_upper(*word_res->uch_set, best_choice->unichar_id(i+1))) { - best_choice->set_unichar_id(unichar_O, i); - } - /* A00A */ - if (non_O_upper(*word_res->uch_set, best_choice->unichar_id(i-1)) && - (i+1) < best_choice->length() && - (best_choice->unichar_id(i+1) == unichar_0 || - best_choice->unichar_id(i+1) == unichar_O) && - (i+2) < best_choice->length() && - non_O_upper(*word_res->uch_set, best_choice->unichar_id(i+2))) { - best_choice->set_unichar_id(unichar_O, i); - i++; - } - /* AA0 */ - if ((i > 1) && - non_O_upper(*word_res->uch_set, best_choice->unichar_id(i-2)) && - non_O_upper(*word_res->uch_set, best_choice->unichar_id(i-1)) && - (((i+1) < best_choice->length() && - !word_res->uch_set->get_isdigit(best_choice->unichar_id(i+1)) && - !word_res->uch_set->eq(best_choice->unichar_id(i+1), "l") && - !word_res->uch_set->eq(best_choice->unichar_id(i+1), "I")) || - (i == best_choice->length() - 1))) { - best_choice->set_unichar_id(unichar_O, i); - } - /* 9O9 */ - if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i-1)) && - (i+1) < best_choice->length() && - non_0_digit(*word_res->uch_set, best_choice->unichar_id(i+1))) { - best_choice->set_unichar_id(unichar_0, i); - } - /* 9OOO */ - if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i-1)) && - (i+2) < best_choice->length() && - (best_choice->unichar_id(i+1) == unichar_0 || - best_choice->unichar_id(i+1) == unichar_O) && - (best_choice->unichar_id(i+2) == unichar_0 || - best_choice->unichar_id(i+2) == unichar_O)) { - best_choice->set_unichar_id(unichar_0, i); - best_choice->set_unichar_id(unichar_0, i+1); - best_choice->set_unichar_id(unichar_0, i+2); - i += 2; - } - /* 9OO */ - if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i-1)) && - (i+2) < best_choice->length() && - (best_choice->unichar_id(i+1) == unichar_0 || - best_choice->unichar_id(i+1) == unichar_O) && - !word_res->uch_set->get_isupper(best_choice->unichar_id(i+2))) { - best_choice->set_unichar_id(unichar_0, i); - best_choice->set_unichar_id(unichar_0, i+1); - i++; - } - /* 9O */ - if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i-1)) && - (i+1) < best_choice->length() && - !word_res->uch_set->get_isupper(best_choice->unichar_id(i+1))) { - best_choice->set_unichar_id(unichar_0, i); - } - /* 9[.,]OOO.. */ - if ((i > 1) && - (word_res->uch_set->eq(best_choice->unichar_id(i-1), ".") || - word_res->uch_set->eq(best_choice->unichar_id(i-1), ",")) && - (word_res->uch_set->get_isdigit(best_choice->unichar_id(i-2)) || - best_choice->unichar_id(i-2) == unichar_O)) { - if (best_choice->unichar_id(i-2) == unichar_O) { - best_choice->set_unichar_id(unichar_0, i-2); - } - while (i < best_choice->length() && - (best_choice->unichar_id(i) == unichar_O || - best_choice->unichar_id(i) == unichar_0)) { - best_choice->set_unichar_id(unichar_0, i); - i++; - } - i--; - } - } - } -} - -bool Tesseract::non_O_upper(const UNICHARSET& ch_set, UNICHAR_ID unichar_id) { - return ch_set.get_isupper(unichar_id) && !ch_set.eq(unichar_id, "O"); -} - -bool Tesseract::non_0_digit(const UNICHARSET& ch_set, UNICHAR_ID unichar_id) { - return ch_set.get_isdigit(unichar_id) && !ch_set.eq(unichar_id, "0"); -} -} // namespace tesseract - -#endif // def DISABLED_LEGACY_ENGINE diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/reject.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/reject.h deleted file mode 100644 index ba3fef38..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/reject.h +++ /dev/null @@ -1,35 +0,0 @@ -/********************************************************************** - * File: reject.h - * Description: Rejection functions used in tessedit - * Author: Phil Cheatle - * Created: Wed Sep 23 16:50:21 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef REJECT_H -#define REJECT_H - -class WERD_CHOICE; -class WERD_RES; - -void reject_blanks(WERD_RES *word); -void reject_poor_matches(WERD_RES *word); -float compute_reject_threshold(WERD_CHOICE* word); -bool word_contains_non_1_digit(const char* word, const char* word_lengths); -void dont_allow_1Il(WERD_RES *word); -void flip_hyphens(WERD_RES *word); -void flip_0O(WERD_RES *word); -bool non_0_digit(const char* str, int length); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/resultiterator.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/resultiterator.cpp deleted file mode 100644 index 00b3f9ad..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/resultiterator.cpp +++ /dev/null @@ -1,695 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: resultiterator.cpp -// Description: Iterator for tesseract results that is capable of -// iterating in proper reading order over Bi Directional -// (e.g. mixed Hebrew and English) text. -// Author: David Eger -// Created: Fri May 27 13:58:06 PST 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "resultiterator.h" - -#include "allheaders.h" -#include "pageres.h" -#include "strngs.h" -#include "tesseractclass.h" -#include "unicharset.h" -#include "unicodes.h" -#include -#include - -namespace tesseract { - -ResultIterator::ResultIterator(const LTRResultIterator &resit) - : LTRResultIterator(resit) { - in_minor_direction_ = false; - at_beginning_of_minor_run_ = false; - preserve_interword_spaces_ = false; - - BoolParam *p = ParamUtils::FindParam( - "preserve_interword_spaces", GlobalParams()->bool_params, - tesseract_->params()->bool_params); - if (p != nullptr) preserve_interword_spaces_ = (bool)(*p); - - current_paragraph_is_ltr_ = CurrentParagraphIsLtr(); - MoveToLogicalStartOfTextline(); -} - -ResultIterator *ResultIterator::StartOfParagraph( - const LTRResultIterator &resit) { - return new ResultIterator(resit); -} - -bool ResultIterator::ParagraphIsLtr() const { - return current_paragraph_is_ltr_; -} - -bool ResultIterator::CurrentParagraphIsLtr() const { - if (!it_->word()) - return true; // doesn't matter. - LTRResultIterator it(*this); - it.RestartParagraph(); - // Try to figure out the ltr-ness of the paragraph. The rules below - // make more sense in the context of a difficult paragraph example. - // Here we denote {ltr characters, RTL CHARACTERS}: - // - // "don't go in there!" DAIS EH - // EHT OTNI DEPMUJ FELSMIH NEHT DNA - // .GNIDLIUB GNINRUB - // - // On the first line, the left-most word is LTR and the rightmost word - // is RTL. Thus, we are better off taking the majority direction for - // the whole paragraph contents. So instead of "the leftmost word is LTR" - // indicating an LTR paragraph, we use a heuristic about what RTL paragraphs - // would not do: Typically an RTL paragraph would *not* start with an LTR - // word. So our heuristics are as follows: - // - // (1) If the first text line has an RTL word in the left-most position - // it is RTL. - // (2) If the first text line has an LTR word in the right-most position - // it is LTR. - // (3) If neither of the above is true, take the majority count for the - // paragraph -- if there are more rtl words, it is RTL. If there - // are more LTR words, it's LTR. - bool leftmost_rtl = it.WordDirection() == DIR_RIGHT_TO_LEFT; - bool rightmost_ltr = it.WordDirection() == DIR_LEFT_TO_RIGHT; - int num_ltr, num_rtl; - num_rtl = leftmost_rtl ? 1 : 0; - num_ltr = (it.WordDirection() == DIR_LEFT_TO_RIGHT) ? 1 : 0; - for (it.Next(RIL_WORD); - !it.Empty(RIL_WORD) && !it.IsAtBeginningOf(RIL_TEXTLINE); - it.Next(RIL_WORD)) { - StrongScriptDirection dir = it.WordDirection(); - rightmost_ltr = (dir == DIR_LEFT_TO_RIGHT); - num_rtl += (dir == DIR_RIGHT_TO_LEFT) ? 1 : 0; - num_ltr += rightmost_ltr ? 1 : 0; - } - if (leftmost_rtl) - return false; - if (rightmost_ltr) - return true; - // First line is ambiguous. Take statistics on the whole paragraph. - if (!it.Empty(RIL_WORD) && !it.IsAtBeginningOf(RIL_PARA)) do { - StrongScriptDirection dir = it.WordDirection(); - num_rtl += (dir == DIR_RIGHT_TO_LEFT) ? 1 : 0; - num_ltr += (dir == DIR_LEFT_TO_RIGHT) ? 1 : 0; - } while (it.Next(RIL_WORD) && !it.IsAtBeginningOf(RIL_PARA)); - return num_ltr >= num_rtl; -} - -const int ResultIterator::kMinorRunStart = -1; -const int ResultIterator::kMinorRunEnd = -2; -const int ResultIterator::kComplexWord = -3; - -void ResultIterator::CalculateBlobOrder( - GenericVector *blob_indices) const { - bool context_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_; - blob_indices->clear(); - if (Empty(RIL_WORD)) return; - if (context_is_ltr || it_->word()->UnicharsInReadingOrder()) { - // Easy! just return the blobs in order; - for (int i = 0; i < word_length_; i++) - blob_indices->push_back(i); - return; - } - - // The blobs are in left-to-right order, but the current reading context - // is right-to-left. - const int U_LTR = UNICHARSET::U_LEFT_TO_RIGHT; - const int U_RTL = UNICHARSET::U_RIGHT_TO_LEFT; - const int U_EURO_NUM = UNICHARSET::U_EUROPEAN_NUMBER; - const int U_EURO_NUM_SEP = UNICHARSET::U_EUROPEAN_NUMBER_SEPARATOR; - const int U_EURO_NUM_TERM = UNICHARSET::U_EUROPEAN_NUMBER_TERMINATOR; - const int U_COMMON_NUM_SEP = UNICHARSET::U_COMMON_NUMBER_SEPARATOR; - const int U_OTHER_NEUTRAL = UNICHARSET::U_OTHER_NEUTRAL; - - // Step 1: Scan for and mark European Number sequences - // [:ET:]*[:EN:]+(([:ES:]|[:CS:])?[:EN:]+)*[:ET:]* - GenericVector letter_types; - for (int i = 0; i < word_length_; i++) { - letter_types.push_back(it_->word()->SymbolDirection(i)); - } - // Convert a single separtor sandwiched between two EN's into an EN. - for (int i = 0; i + 2 < word_length_; i++) { - if (letter_types[i] == U_EURO_NUM && letter_types[i + 2] == U_EURO_NUM && - (letter_types[i + 1] == U_EURO_NUM_SEP || - letter_types[i + 1] == U_COMMON_NUM_SEP)) { - letter_types[i + 1] = U_EURO_NUM; - } - } - // Scan for sequences of European Number Terminators around ENs and convert - // them to ENs. - for (int i = 0; i < word_length_; i++) { - if (letter_types[i] == U_EURO_NUM_TERM) { - int j = i + 1; - while (j < word_length_ && letter_types[j] == U_EURO_NUM_TERM) { j++; } - if (j < word_length_ && letter_types[j] == U_EURO_NUM) { - // The sequence [i..j] should be converted to all European Numbers. - for (int k = i; k < j; k++) letter_types[k] = U_EURO_NUM; - } - j = i - 1; - while (j > -1 && letter_types[j] == U_EURO_NUM_TERM) { j--; } - if (j > -1 && letter_types[j] == U_EURO_NUM) { - // The sequence [j..i] should be converted to all European Numbers. - for (int k = j; k <= i; k++) letter_types[k] = U_EURO_NUM; - } - } - } - // Step 2: Convert all remaining types to either L or R. - // Sequences ([:L:]|[:EN:])+ (([:CS:]|[:ON:])+ ([:L:]|[:EN:])+)* -> L. - // All other are R. - for (int i = 0; i < word_length_;) { - int ti = letter_types[i]; - if (ti == U_LTR || ti == U_EURO_NUM) { - // Left to right sequence; scan to the end of it. - int last_good = i; - for (int j = i + 1; j < word_length_; j++) { - int tj = letter_types[j]; - if (tj == U_LTR || tj == U_EURO_NUM) { - last_good = j; - } else if (tj == U_COMMON_NUM_SEP || tj == U_OTHER_NEUTRAL) { - // do nothing. - } else { - break; - } - } - // [i..last_good] is the L sequence - for (int k = i; k <= last_good; k++) letter_types[k] = U_LTR; - i = last_good + 1; - } else { - letter_types[i] = U_RTL; - i++; - } - } - - // At this point, letter_types is entirely U_LTR or U_RTL. - for (int i = word_length_ - 1; i >= 0;) { - if (letter_types[i] == U_RTL) { - blob_indices->push_back(i); - i--; - } else { - // left to right sequence. scan to the beginning. - int j = i - 1; - for (; j >= 0 && letter_types[j] != U_RTL; j--) { } // pass - // Now (j, i] is LTR - for (int k = j + 1; k <= i; k++) blob_indices->push_back(k); - i = j; - } - } - ASSERT_HOST(blob_indices->size() == word_length_); -} - -static void PrintScriptDirs(const GenericVector &dirs) { - for (int i = 0; i < dirs.size(); i++) { - switch (dirs[i]) { - case DIR_NEUTRAL: tprintf ("N "); break; - case DIR_LEFT_TO_RIGHT: tprintf("L "); break; - case DIR_RIGHT_TO_LEFT: tprintf("R "); break; - case DIR_MIX: tprintf("Z "); break; - default: tprintf("? "); break; - } - } - tprintf("\n"); -} - -void ResultIterator::CalculateTextlineOrder( - bool paragraph_is_ltr, - const LTRResultIterator &resit, - GenericVectorEqEq *word_indices) const { - GenericVector directions; - CalculateTextlineOrder(paragraph_is_ltr, resit, &directions, word_indices); -} - -void ResultIterator::CalculateTextlineOrder( - bool paragraph_is_ltr, - const LTRResultIterator &resit, - GenericVector *dirs_arg, - GenericVectorEqEq *word_indices) const { - GenericVector dirs; - GenericVector *directions; - directions = (dirs_arg != nullptr) ? dirs_arg : &dirs; - directions->truncate(0); - - // A LTRResultIterator goes strictly left-to-right word order. - LTRResultIterator ltr_it(resit); - ltr_it.RestartRow(); - if (ltr_it.Empty(RIL_WORD)) return; - do { - directions->push_back(ltr_it.WordDirection()); - } while (ltr_it.Next(RIL_WORD) && !ltr_it.IsAtBeginningOf(RIL_TEXTLINE)); - - word_indices->truncate(0); - CalculateTextlineOrder(paragraph_is_ltr, *directions, word_indices); -} - -void ResultIterator::CalculateTextlineOrder( - bool paragraph_is_ltr, - const GenericVector &word_dirs, - GenericVectorEqEq *reading_order) { - reading_order->truncate(0); - if (word_dirs.size() == 0) return; - - // Take all of the runs of minor direction words and insert them - // in reverse order. - int minor_direction, major_direction, major_step, start, end; - if (paragraph_is_ltr) { - start = 0; - end = word_dirs.size(); - major_step = 1; - major_direction = DIR_LEFT_TO_RIGHT; - minor_direction = DIR_RIGHT_TO_LEFT; - } else { - start = word_dirs.size() - 1; - end = -1; - major_step = -1; - major_direction = DIR_RIGHT_TO_LEFT; - minor_direction = DIR_LEFT_TO_RIGHT; - // Special rule: if there are neutral words at the right most side - // of a line adjacent to a left-to-right word in the middle of the - // line, we interpret the end of the line as a single LTR sequence. - if (word_dirs[start] == DIR_NEUTRAL) { - int neutral_end = start; - while (neutral_end > 0 && word_dirs[neutral_end] == DIR_NEUTRAL) { - neutral_end--; - } - if (neutral_end >= 0 && word_dirs[neutral_end] == DIR_LEFT_TO_RIGHT) { - // LTR followed by neutrals. - // Scan for the beginning of the minor left-to-right run. - int left = neutral_end; - for (int i = left; i >= 0 && word_dirs[i] != DIR_RIGHT_TO_LEFT; i--) { - if (word_dirs[i] == DIR_LEFT_TO_RIGHT) left = i; - } - reading_order->push_back(kMinorRunStart); - for (int i = left; i < word_dirs.size(); i++) { - reading_order->push_back(i); - if (word_dirs[i] == DIR_MIX) reading_order->push_back(kComplexWord); - } - reading_order->push_back(kMinorRunEnd); - start = left - 1; - } - } - } - for (int i = start; i != end;) { - if (word_dirs[i] == minor_direction) { - int j = i; - while (j != end && word_dirs[j] != major_direction) - j += major_step; - if (j == end) j -= major_step; - while (j != i && word_dirs[j] != minor_direction) - j -= major_step; - // [j..i] is a minor direction run. - reading_order->push_back(kMinorRunStart); - for (int k = j; k != i; k -= major_step) { - reading_order->push_back(k); - } - reading_order->push_back(i); - reading_order->push_back(kMinorRunEnd); - i = j + major_step; - } else { - reading_order->push_back(i); - if (word_dirs[i] == DIR_MIX) reading_order->push_back(kComplexWord); - i += major_step; - } - } -} - -int ResultIterator::LTRWordIndex() const { - int this_word_index = 0; - LTRResultIterator textline(*this); - textline.RestartRow(); - while (!textline.PositionedAtSameWord(it_)) { - this_word_index++; - textline.Next(RIL_WORD); - } - return this_word_index; -} - -void ResultIterator::MoveToLogicalStartOfWord() { - if (word_length_ == 0) { - BeginWord(0); - return; - } - GenericVector blob_order; - CalculateBlobOrder(&blob_order); - if (blob_order.size() == 0 || blob_order[0] == 0) return; - BeginWord(blob_order[0]); -} - -bool ResultIterator::IsAtFinalSymbolOfWord() const { - if (!it_->word()) return true; - GenericVector blob_order; - CalculateBlobOrder(&blob_order); - return blob_order.size() == 0 || blob_order.back() == blob_index_; -} - -bool ResultIterator::IsAtFirstSymbolOfWord() const { - if (!it_->word()) return true; - GenericVector blob_order; - CalculateBlobOrder(&blob_order); - return blob_order.size() == 0 || blob_order[0] == blob_index_; -} - -void ResultIterator::AppendSuffixMarks(STRING *text) const { - if (!it_->word()) return; - bool reading_direction_is_ltr = - current_paragraph_is_ltr_ ^ in_minor_direction_; - // scan forward to see what meta-information the word ordering algorithm - // left us. - // If this word is at the *end* of a minor run, insert the other - // direction's mark; else if this was a complex word, insert the - // current reading order's mark. - GenericVectorEqEq textline_order; - CalculateTextlineOrder(current_paragraph_is_ltr_, - *this, &textline_order); - int this_word_index = LTRWordIndex(); - int i = textline_order.get_index(this_word_index); - if (i < 0) return; - - int last_non_word_mark = 0; - for (i++; i < textline_order.size() && textline_order[i] < 0; i++) { - last_non_word_mark = textline_order[i]; - } - if (last_non_word_mark == kComplexWord) { - *text += reading_direction_is_ltr ? kLRM : kRLM; - } else if (last_non_word_mark == kMinorRunEnd) { - if (current_paragraph_is_ltr_) { - *text += kLRM; - } else { - *text += kRLM; - } - } -} - -void ResultIterator::MoveToLogicalStartOfTextline() { - GenericVectorEqEq word_indices; - RestartRow(); - CalculateTextlineOrder(current_paragraph_is_ltr_, - dynamic_cast(*this), - &word_indices); - int i = 0; - for (; i < word_indices.size() && word_indices[i] < 0; i++) { - if (word_indices[i] == kMinorRunStart) in_minor_direction_ = true; - else if (word_indices[i] == kMinorRunEnd) in_minor_direction_ = false; - } - if (in_minor_direction_) at_beginning_of_minor_run_ = true; - if (i >= word_indices.size()) return; - int first_word_index = word_indices[i]; - for (int j = 0; j < first_word_index; j++) { - PageIterator::Next(RIL_WORD); - } - MoveToLogicalStartOfWord(); -} - -void ResultIterator::Begin() { - LTRResultIterator::Begin(); - current_paragraph_is_ltr_ = CurrentParagraphIsLtr(); - in_minor_direction_ = false; - at_beginning_of_minor_run_ = false; - MoveToLogicalStartOfTextline(); -} - -bool ResultIterator::Next(PageIteratorLevel level) { - if (it_->block() == nullptr) return false; // already at end! - switch (level) { - case RIL_BLOCK: // explicit fall-through - case RIL_PARA: // explicit fall-through - case RIL_TEXTLINE: - if (!PageIterator::Next(level)) return false; - if (IsWithinFirstTextlineOfParagraph()) { - // if we've advanced to a new paragraph, - // recalculate current_paragraph_is_ltr_ - current_paragraph_is_ltr_ = CurrentParagraphIsLtr(); - } - in_minor_direction_ = false; - MoveToLogicalStartOfTextline(); - return it_->block() != nullptr; - case RIL_SYMBOL: - { - GenericVector blob_order; - CalculateBlobOrder(&blob_order); - int next_blob = 0; - while (next_blob < blob_order.size() && - blob_index_ != blob_order[next_blob]) - next_blob++; - next_blob++; - if (next_blob < blob_order.size()) { - // we're in the same word; simply advance one blob. - BeginWord(blob_order[next_blob]); - at_beginning_of_minor_run_ = false; - return true; - } - level = RIL_WORD; // we've fallen through to the next word. - } - case RIL_WORD: // explicit fall-through. - { - if (it_->word() == nullptr) return Next(RIL_BLOCK); - GenericVectorEqEq word_indices; - int this_word_index = LTRWordIndex(); - CalculateTextlineOrder(current_paragraph_is_ltr_, - *this, - &word_indices); - int final_real_index = word_indices.size() - 1; - while (final_real_index > 0 && word_indices[final_real_index] < 0) - final_real_index--; - for (int i = 0; i < final_real_index; i++) { - if (word_indices[i] == this_word_index) { - int j = i + 1; - for (; j < final_real_index && word_indices[j] < 0; j++) { - if (word_indices[j] == kMinorRunStart) in_minor_direction_ = true; - if (word_indices[j] == kMinorRunEnd) in_minor_direction_ = false; - } - at_beginning_of_minor_run_ = (word_indices[j - 1] == kMinorRunStart); - // awesome, we move to word_indices[j] - if (BidiDebug(3)) { - tprintf("Next(RIL_WORD): %d -> %d\n", - this_word_index, word_indices[j]); - } - PageIterator::RestartRow(); - for (int k = 0; k < word_indices[j]; k++) { - PageIterator::Next(RIL_WORD); - } - MoveToLogicalStartOfWord(); - return true; - } - } - if (BidiDebug(3)) { - tprintf("Next(RIL_WORD): %d -> EOL\n", this_word_index); - } - // we're going off the end of the text line. - return Next(RIL_TEXTLINE); - } - } - ASSERT_HOST(false); // shouldn't happen. - return false; -} - -bool ResultIterator::IsAtBeginningOf(PageIteratorLevel level) const { - if (it_->block() == nullptr) return false; // Already at the end! - if (it_->word() == nullptr) return true; // In an image block. - if (level == RIL_SYMBOL) return true; // Always at beginning of a symbol. - - bool at_word_start = IsAtFirstSymbolOfWord(); - if (level == RIL_WORD) return at_word_start; - - ResultIterator line_start(*this); - // move to the first word in the line... - line_start.MoveToLogicalStartOfTextline(); - - bool at_textline_start = at_word_start && *line_start.it_ == *it_; - if (level == RIL_TEXTLINE) return at_textline_start; - - // now we move to the left-most word... - line_start.RestartRow(); - bool at_block_start = at_textline_start && - line_start.it_->block() != line_start.it_->prev_block(); - if (level == RIL_BLOCK) return at_block_start; - - bool at_para_start = at_block_start || - (at_textline_start && - line_start.it_->row()->row->para() != - line_start.it_->prev_row()->row->para()); - if (level == RIL_PARA) return at_para_start; - - ASSERT_HOST(false); // shouldn't happen. - return false; -} - -/** - * NOTE! This is an exact copy of PageIterator::IsAtFinalElement with the - * change that the variable next is now a ResultIterator instead of a - * PageIterator. - */ -bool ResultIterator::IsAtFinalElement(PageIteratorLevel level, - PageIteratorLevel element) const { - if (Empty(element)) return true; // Already at the end! - // The result is true if we step forward by element and find we are - // at the the end of the page or at beginning of *all* levels in: - // [level, element). - // When there is more than one level difference between element and level, - // we could for instance move forward one symbol and still be at the first - // word on a line, so we also have to be at the first symbol in a word. - ResultIterator next(*this); - next.Next(element); - if (next.Empty(element)) return true; // Reached the end of the page. - while (element > level) { - element = static_cast(element - 1); - if (!next.IsAtBeginningOf(element)) - return false; - } - return true; -} - -// Returns the number of blanks before the current word. -int ResultIterator::BlanksBeforeWord() const { - if (CurrentParagraphIsLtr()) return LTRResultIterator::BlanksBeforeWord(); - return IsAtBeginningOf(RIL_TEXTLINE) ? 0 : 1; -} - -/** - * Returns the null terminated UTF-8 encoded text string for the current - * object at the given level. Use delete [] to free after use. - */ -char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const { - if (it_->word() == nullptr) return nullptr; // Already at the end! - STRING text; - switch (level) { - case RIL_BLOCK: - { - ResultIterator pp(*this); - do { - pp.AppendUTF8ParagraphText(&text); - } while (pp.Next(RIL_PARA) && pp.it_->block() == it_->block()); - } - break; - case RIL_PARA: - AppendUTF8ParagraphText(&text); - break; - case RIL_TEXTLINE: - { - ResultIterator it(*this); - it.MoveToLogicalStartOfTextline(); - it.IterateAndAppendUTF8TextlineText(&text); - } - break; - case RIL_WORD: - AppendUTF8WordText(&text); - break; - case RIL_SYMBOL: - { - bool reading_direction_is_ltr = - current_paragraph_is_ltr_ ^ in_minor_direction_; - if (at_beginning_of_minor_run_) { - text += reading_direction_is_ltr ? kLRM : kRLM; - } - text = it_->word()->BestUTF8(blob_index_, false); - if (IsAtFinalSymbolOfWord()) AppendSuffixMarks(&text); - } - break; - } - int length = text.length() + 1; - char* result = new char[length]; - strncpy(result, text.string(), length); - return result; -} - -std::vector>>* ResultIterator::GetBestLSTMSymbolChoices() const { - if (it_->word() != nullptr) { - return &it_->word()->timesteps; - } else { - return nullptr; - } -} - -void ResultIterator::AppendUTF8WordText(STRING *text) const { - if (!it_->word()) return; - ASSERT_HOST(it_->word()->best_choice != nullptr); - bool reading_direction_is_ltr = - current_paragraph_is_ltr_ ^ in_minor_direction_; - if (at_beginning_of_minor_run_) { - *text += reading_direction_is_ltr ? kLRM : kRLM; - } - - GenericVector blob_order; - CalculateBlobOrder(&blob_order); - for (int i = 0; i < blob_order.size(); i++) { - *text += it_->word()->BestUTF8(blob_order[i], false); - } - AppendSuffixMarks(text); -} - -void ResultIterator::IterateAndAppendUTF8TextlineText(STRING *text) { - if (Empty(RIL_WORD)) { - Next(RIL_WORD); - return; - } - if (BidiDebug(1)) { - GenericVectorEqEq textline_order; - GenericVector dirs; - CalculateTextlineOrder(current_paragraph_is_ltr_, - *this, &dirs, &textline_order); - tprintf("Strong Script dirs [%p/P=%s]: ", it_->row(), - current_paragraph_is_ltr_ ? "ltr" : "rtl"); - PrintScriptDirs(dirs); - tprintf("Logical textline order [%p/P=%s]: ", it_->row(), - current_paragraph_is_ltr_ ? "ltr" : "rtl"); - for (int i = 0; i < textline_order.size(); i++) { - tprintf("%d ", textline_order[i]); - } - tprintf("\n"); - } - - int words_appended = 0; - do { - int numSpaces = preserve_interword_spaces_ ? it_->word()->word->space() - : (words_appended > 0); - for (int i = 0; i < numSpaces; ++i) { - *text += " "; - } - AppendUTF8WordText(text); - words_appended++; - if (BidiDebug(2)) { - tprintf("Num spaces=%d, text=%s\n", numSpaces, text->string()); - } - } while (Next(RIL_WORD) && !IsAtBeginningOf(RIL_TEXTLINE)); - if (BidiDebug(1)) { - tprintf("%d words printed\n", words_appended); - } - *text += line_separator_; - // If we just finished a paragraph, add an extra newline. - if (IsAtBeginningOf(RIL_PARA)) { - *text += paragraph_separator_; - } -} - -void ResultIterator::AppendUTF8ParagraphText(STRING *text) const { - ResultIterator it(*this); - it.RestartParagraph(); - it.MoveToLogicalStartOfTextline(); - if (it.Empty(RIL_WORD)) return; - do { - it.IterateAndAppendUTF8TextlineText(text); - } while (it.it_->block() != nullptr && !it.IsAtBeginningOf(RIL_PARA)); -} - -bool ResultIterator::BidiDebug(int min_level) const { - int debug_level = 1; - IntParam *p = ParamUtils::FindParam( - "bidi_debug", GlobalParams()->int_params, - tesseract_->params()->int_params); - if (p != nullptr) debug_level = (int32_t)(*p); - return debug_level >= min_level; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/resultiterator.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/resultiterator.h deleted file mode 100644 index b658e5a0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/resultiterator.h +++ /dev/null @@ -1,256 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: resultiterator.h -// Description: Iterator for tesseract results that is capable of -// iterating in proper reading order over Bi Directional -// (e.g. mixed Hebrew and English) text. -// Author: David Eger -// Created: Fri May 27 13:58:06 PST 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_ -#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_ - -#include // for std::pair -#include // for std::vector -#include "ltrresultiterator.h" // for LTRResultIterator -#include "platform.h" // for TESS_API, TESS_LOCAL -#include "publictypes.h" // for PageIteratorLevel -#include "unichar.h" // for StrongScriptDirection - -template class GenericVector; -template class GenericVectorEqEq; - -class STRING; - -namespace tesseract { - -class Tesseract; - -class TESS_API ResultIterator : public LTRResultIterator { - public: - static ResultIterator *StartOfParagraph(const LTRResultIterator &resit); - - /** - * ResultIterator is copy constructible! - * The default copy constructor works just fine for us. - */ - virtual ~ResultIterator() = default; - - // ============= Moving around within the page ============. - /** - * Moves the iterator to point to the start of the page to begin - * an iteration. - */ - virtual void Begin(); - - /** - * Moves to the start of the next object at the given level in the - * page hierarchy in the appropriate reading order and returns false if - * the end of the page was reached. - * NOTE that RIL_SYMBOL will skip non-text blocks, but all other - * PageIteratorLevel level values will visit each non-text block once. - * Think of non text blocks as containing a single para, with a single line, - * with a single imaginary word. - * Calls to Next with different levels may be freely intermixed. - * This function iterates words in right-to-left scripts correctly, if - * the appropriate language has been loaded into Tesseract. - */ - virtual bool Next(PageIteratorLevel level); - - /** - * IsAtBeginningOf() returns whether we're at the logical beginning of the - * given level. (as opposed to ResultIterator's left-to-right top-to-bottom - * order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf(). - * For a full description, see pageiterator.h - */ - virtual bool IsAtBeginningOf(PageIteratorLevel level) const; - - /** - * Implement PageIterator's IsAtFinalElement correctly in a BiDi context. - * For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we - * point at the last word in a paragraph. See PageIterator for full comment. - */ - virtual bool IsAtFinalElement(PageIteratorLevel level, - PageIteratorLevel element) const; - - // ============= Functions that refer to words only ============. - // Returns the number of blanks before the current word. - int BlanksBeforeWord() const; - - // ============= Accessing data ==============. - - /** - * Returns the null terminated UTF-8 encoded text string for the current - * object at the given level. Use delete [] to free after use. - */ - virtual char* GetUTF8Text(PageIteratorLevel level) const; - - /** - * Returns the LSTM choices for every LSTM timestep for the current word. - */ - virtual std::vector>>* GetBestLSTMSymbolChoices() const; - - /** - * Return whether the current paragraph's dominant reading direction - * is left-to-right (as opposed to right-to-left). - */ - bool ParagraphIsLtr() const; - - // ============= Exposed only for testing =============. - - /** - * Yields the reading order as a sequence of indices and (optional) - * meta-marks for a set of words (given left-to-right). - * The meta marks are passed as negative values: - * kMinorRunStart Start of minor direction text. - * kMinorRunEnd End of minor direction text. - * kComplexWord The next indexed word contains both left-to-right and - * right-to-left characters and was treated as neutral. - * - * For example, suppose we have five words in a text line, - * indexed [0,1,2,3,4] from the leftmost side of the text line. - * The following are all believable reading_orders: - * - * Left-to-Right (in ltr paragraph): - * { 0, 1, 2, 3, 4 } - * Left-to-Right (in rtl paragraph): - * { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd } - * Right-to-Left (in rtl paragraph): - * { 4, 3, 2, 1, 0 } - * Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph: - * { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 } - */ - static void CalculateTextlineOrder( - bool paragraph_is_ltr, - const GenericVector &word_dirs, - GenericVectorEqEq *reading_order); - - static const int kMinorRunStart; - static const int kMinorRunEnd; - static const int kComplexWord; - - protected: - /** - * We presume the data associated with the given iterator will outlive us. - * NB: This is private because it does something that is non-obvious: - * it resets to the beginning of the paragraph instead of staying wherever - * resit might have pointed. - */ - TESS_LOCAL explicit ResultIterator(const LTRResultIterator &resit); - - private: - /** - * Calculates the current paragraph's dominant writing direction. - * Typically, members should use current_paragraph_ltr_ instead. - */ - bool CurrentParagraphIsLtr() const; - - /** - * Returns word indices as measured from resit->RestartRow() = index 0 - * for the reading order of words within a textline given an iterator - * into the middle of the text line. - * In addition to non-negative word indices, the following negative values - * may be inserted: - * kMinorRunStart Start of minor direction text. - * kMinorRunEnd End of minor direction text. - * kComplexWord The previous word contains both left-to-right and - * right-to-left characters and was treated as neutral. - */ - void CalculateTextlineOrder(bool paragraph_is_ltr, - const LTRResultIterator &resit, - GenericVectorEqEq *indices) const; - /** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */ - void CalculateTextlineOrder(bool paragraph_is_ltr, - const LTRResultIterator &resit, - GenericVector *ssd, - GenericVectorEqEq *indices) const; - - /** - * What is the index of the current word in a strict left-to-right reading - * of the row? - */ - int LTRWordIndex() const; - - /** - * Given an iterator pointing at a word, returns the logical reading order - * of blob indices for the word. - */ - void CalculateBlobOrder(GenericVector *blob_indices) const; - - /** Precondition: current_paragraph_is_ltr_ is set. */ - void MoveToLogicalStartOfTextline(); - - /** - * Precondition: current_paragraph_is_ltr_ and in_minor_direction_ - * are set. - */ - void MoveToLogicalStartOfWord(); - - /** Are we pointing at the final (reading order) symbol of the word? */ - bool IsAtFinalSymbolOfWord() const; - - /** Are we pointing at the first (reading order) symbol of the word? */ - bool IsAtFirstSymbolOfWord() const; - - /** - * Append any extra marks that should be appended to this word when printed. - * Mostly, these are Unicode BiDi control characters. - */ - void AppendSuffixMarks(STRING *text) const; - - /** Appends the current word in reading order to the given buffer.*/ - void AppendUTF8WordText(STRING *text) const; - - /** - * Appends the text of the current text line, *assuming this iterator is - * positioned at the beginning of the text line* This function - * updates the iterator to point to the first position past the text line. - * Each textline is terminated in a single newline character. - * If the textline ends a paragraph, it gets a second terminal newline. - */ - void IterateAndAppendUTF8TextlineText(STRING *text); - - /** - * Appends the text of the current paragraph in reading order - * to the given buffer. - * Each textline is terminated in a single newline character, and the - * paragraph gets an extra newline at the end. - */ - void AppendUTF8ParagraphText(STRING *text) const; - - /** Returns whether the bidi_debug flag is set to at least min_level. */ - bool BidiDebug(int min_level) const; - - bool current_paragraph_is_ltr_; - - /** - * Is the currently pointed-at character at the beginning of - * a minor-direction run? - */ - bool at_beginning_of_minor_run_; - - /** Is the currently pointed-at character in a minor-direction sequence? */ - bool in_minor_direction_; - - /** - * Should detected inter-word spaces be preserved, or "compressed" to a single - * space character (default behavior). - */ - bool preserve_interword_spaces_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/superscript.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/superscript.cpp deleted file mode 100644 index 61f54629..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/superscript.cpp +++ /dev/null @@ -1,612 +0,0 @@ -/****************************************************************** - * File: superscript.cpp - * Description: Correction pass to fix superscripts and subscripts. - * Author: David Eger - * Created: Mon Mar 12 14:05:00 PDT 2012 - * - * (C) Copyright 2012, Google, Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "normalis.h" -#include "tesseractclass.h" - -static int LeadingUnicharsToChopped(WERD_RES *word, int num_unichars) { - int num_chopped = 0; - for (int i = 0; i < num_unichars; i++) - num_chopped += word->best_state[i]; - return num_chopped; -} - -static int TrailingUnicharsToChopped(WERD_RES *word, int num_unichars) { - int num_chopped = 0; - for (int i = 0; i < num_unichars; i++) - num_chopped += word->best_state[word->best_state.size() - 1 - i]; - return num_chopped; -} - - -namespace tesseract { - -/** - * Given a recognized blob, see if a contiguous collection of sub-pieces - * (chopped blobs) starting at its left might qualify as being a subscript - * or superscript letter based only on y position. Also do this for the - * right side. - */ -static void YOutlierPieces(WERD_RES *word, int rebuilt_blob_index, - int super_y_bottom, int sub_y_top, - ScriptPos *leading_pos, int *num_leading_outliers, - ScriptPos *trailing_pos, - int *num_trailing_outliers) { - ScriptPos sp_unused1, sp_unused2; - int unused1, unused2; - if (!leading_pos) leading_pos = &sp_unused1; - if (!num_leading_outliers) num_leading_outliers = &unused1; - if (!trailing_pos) trailing_pos = &sp_unused2; - if (!num_trailing_outliers) num_trailing_outliers = &unused2; - - *num_leading_outliers = *num_trailing_outliers = 0; - *leading_pos = *trailing_pos = SP_NORMAL; - - int chopped_start = LeadingUnicharsToChopped(word, rebuilt_blob_index); - int num_chopped_pieces = word->best_state[rebuilt_blob_index]; - ScriptPos last_pos = SP_NORMAL; - int trailing_outliers = 0; - for (int i = 0; i < num_chopped_pieces; i++) { - TBOX box = word->chopped_word->blobs[chopped_start + i]->bounding_box(); - ScriptPos pos = SP_NORMAL; - if (box.bottom() >= super_y_bottom) { - pos = SP_SUPERSCRIPT; - } else if (box.top() <= sub_y_top) { - pos = SP_SUBSCRIPT; - } - if (pos == SP_NORMAL) { - if (trailing_outliers == i) { - *num_leading_outliers = trailing_outliers; - *leading_pos = last_pos; - } - trailing_outliers = 0; - } else { - if (pos == last_pos) { - trailing_outliers++; - } else { - trailing_outliers = 1; - } - } - last_pos = pos; - } - *num_trailing_outliers = trailing_outliers; - *trailing_pos = last_pos; -} - -/** - * Attempt to split off any high (or low) bits at the ends of the word with poor - * certainty and recognize them separately. If the certainty gets much better - * and other sanity checks pass, accept. - * - * This superscript fix is meant to be called in the second pass of recognition - * when we have tried once and already have a preliminary answer for word. - * - * @return Whether we modified the given word. - */ -bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) { - if (word->tess_failed || word->word->flag(W_REP_CHAR) || - !word->best_choice) { - return false; - } - int num_leading, num_trailing; - ScriptPos sp_leading, sp_trailing; - float leading_certainty, trailing_certainty; - float avg_certainty, unlikely_threshold; - - // Calculate the number of whole suspicious characters at the edges. - GetSubAndSuperscriptCandidates( - word, &num_leading, &sp_leading, &leading_certainty, - &num_trailing, &sp_trailing, &trailing_certainty, - &avg_certainty, &unlikely_threshold); - - const char *leading_pos = sp_leading == SP_SUBSCRIPT ? "sub" : "super"; - const char *trailing_pos = sp_trailing == SP_SUBSCRIPT ? "sub" : "super"; - - int num_blobs = word->best_choice->length(); - - // Calculate the remainder (partial characters) at the edges. - // This accounts for us having classified the best version of - // a word as [speaker?'] when it was instead [speaker.^{21}] - // (that is we accidentally thought the 2 was attached to the period). - int num_remainder_leading = 0, num_remainder_trailing = 0; - if (num_leading + num_trailing < num_blobs && unlikely_threshold < 0.0) { - int super_y_bottom = - kBlnBaselineOffset + kBlnXHeight * superscript_min_y_bottom; - int sub_y_top = - kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top; - int last_word_char = num_blobs - 1 - num_trailing; - float last_char_certainty = word->best_choice->certainty(last_word_char); - if (word->best_choice->unichar_id(last_word_char) != 0 && - last_char_certainty <= unlikely_threshold) { - ScriptPos rpos; - YOutlierPieces(word, last_word_char, super_y_bottom, sub_y_top, - nullptr, nullptr, &rpos, &num_remainder_trailing); - if (num_trailing > 0 && rpos != sp_trailing) num_remainder_trailing = 0; - if (num_remainder_trailing > 0 && - last_char_certainty < trailing_certainty) { - trailing_certainty = last_char_certainty; - } - } - bool another_blob_available = (num_remainder_trailing == 0) || - num_leading + num_trailing + 1 < num_blobs; - int first_char_certainty = word->best_choice->certainty(num_leading); - if (another_blob_available && - word->best_choice->unichar_id(num_leading) != 0 && - first_char_certainty <= unlikely_threshold) { - ScriptPos lpos; - YOutlierPieces(word, num_leading, super_y_bottom, sub_y_top, - &lpos, &num_remainder_leading, nullptr, nullptr); - if (num_leading > 0 && lpos != sp_leading) num_remainder_leading = 0; - if (num_remainder_leading > 0 && - first_char_certainty < leading_certainty) { - leading_certainty = first_char_certainty; - } - } - } - - // If nothing to do, bail now. - if (num_leading + num_trailing + - num_remainder_leading + num_remainder_trailing == 0) { - return false; - } - - if (superscript_debug >= 1) { - tprintf("Candidate for superscript detection: %s (", - word->best_choice->unichar_string().string()); - if (num_leading || num_remainder_leading) { - tprintf("%d.%d %s-leading ", num_leading, num_remainder_leading, - leading_pos); - } - if (num_trailing || num_remainder_trailing) { - tprintf("%d.%d %s-trailing ", num_trailing, num_remainder_trailing, - trailing_pos); - } - tprintf(")\n"); - } - if (superscript_debug >= 3) { - word->best_choice->print(); - } - if (superscript_debug >= 2) { - tprintf(" Certainties -- Average: %.2f Unlikely thresh: %.2f ", - avg_certainty, unlikely_threshold); - if (num_leading) - tprintf("Orig. leading (min): %.2f ", leading_certainty); - if (num_trailing) - tprintf("Orig. trailing (min): %.2f ", trailing_certainty); - tprintf("\n"); - } - - // We've now calculated the number of rebuilt blobs we want to carve off. - // However, split_word() works from TBLOBs in chopped_word, so we need to - // convert to those. - int num_chopped_leading = - LeadingUnicharsToChopped(word, num_leading) + num_remainder_leading; - int num_chopped_trailing = - TrailingUnicharsToChopped(word, num_trailing) + num_remainder_trailing; - - int retry_leading = 0; - int retry_trailing = 0; - bool is_good = false; - WERD_RES *revised = TrySuperscriptSplits( - num_chopped_leading, leading_certainty, sp_leading, - num_chopped_trailing, trailing_certainty, sp_trailing, - word, &is_good, &retry_leading, &retry_trailing); - if (is_good) { - word->ConsumeWordResults(revised); - } else if (retry_leading || retry_trailing) { - int retry_chopped_leading = - LeadingUnicharsToChopped(revised, retry_leading); - int retry_chopped_trailing = - TrailingUnicharsToChopped(revised, retry_trailing); - WERD_RES *revised2 = TrySuperscriptSplits( - retry_chopped_leading, leading_certainty, sp_leading, - retry_chopped_trailing, trailing_certainty, sp_trailing, - revised, &is_good, &retry_leading, &retry_trailing); - if (is_good) { - word->ConsumeWordResults(revised2); - } - delete revised2; - } - delete revised; - return is_good; -} - -/** - * Determine how many characters (rebuilt blobs) on each end of a given word - * might plausibly be superscripts so SubAndSuperscriptFix can try to - * re-recognize them. Even if we find no whole blobs at either end, - * we will set *unlikely_threshold to a certainty that might be used to - * select "bad enough" outlier characters. If *unlikely_threshold is set to 0, - * though, there's really no hope. - * - * @param[in] word The word to examine. - * @param[out] num_rebuilt_leading the number of rebuilt blobs at the start - * of the word which are all up or down and - * seem badly classified. - * @param[out] leading_pos "super" or "sub" (for debugging) - * @param[out] leading_certainty the worst certainty in the leading blobs. - * @param[out] num_rebuilt_trailing the number of rebuilt blobs at the end - * of the word which are all up or down and - * seem badly classified. - * @param[out] trailing_pos "super" or "sub" (for debugging) - * @param[out] trailing_certainty the worst certainty in the trailing blobs. - * @param[out] avg_certainty the average certainty of "normal" blobs in - * the word. - * @param[out] unlikely_threshold the threshold (on certainty) we used to - * select "bad enough" outlier characters. - */ -void Tesseract::GetSubAndSuperscriptCandidates(const WERD_RES *word, - int *num_rebuilt_leading, - ScriptPos *leading_pos, - float *leading_certainty, - int *num_rebuilt_trailing, - ScriptPos *trailing_pos, - float *trailing_certainty, - float *avg_certainty, - float *unlikely_threshold) { - *avg_certainty = *unlikely_threshold = 0.0f; - *num_rebuilt_leading = *num_rebuilt_trailing = 0; - *leading_certainty = *trailing_certainty = 0.0f; - - int super_y_bottom = - kBlnBaselineOffset + kBlnXHeight * superscript_min_y_bottom; - int sub_y_top = - kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top; - - // Step one: Get an average certainty for "normally placed" characters. - - // Counts here are of blobs in the rebuild_word / unichars in best_choice. - *leading_pos = *trailing_pos = SP_NORMAL; - int leading_outliers = 0; - int trailing_outliers = 0; - int num_normal = 0; - float normal_certainty_total = 0.0f; - float worst_normal_certainty = 0.0f; - ScriptPos last_pos = SP_NORMAL; - int num_blobs = word->rebuild_word->NumBlobs(); - for (int b = 0; b < num_blobs; ++b) { - TBOX box = word->rebuild_word->blobs[b]->bounding_box(); - ScriptPos pos = SP_NORMAL; - if (box.bottom() >= super_y_bottom) { - pos = SP_SUPERSCRIPT; - } else if (box.top() <= sub_y_top) { - pos = SP_SUBSCRIPT; - } - if (pos == SP_NORMAL) { - if (word->best_choice->unichar_id(b) != 0) { - float char_certainty = word->best_choice->certainty(b); - if (char_certainty < worst_normal_certainty) { - worst_normal_certainty = char_certainty; - } - num_normal++; - normal_certainty_total += char_certainty; - } - if (trailing_outliers == b) { - leading_outliers = trailing_outliers; - *leading_pos = last_pos; - } - trailing_outliers = 0; - } else { - if (last_pos == pos) { - trailing_outliers++; - } else { - trailing_outliers = 1; - } - } - last_pos = pos; - } - *trailing_pos = last_pos; - if (num_normal >= 3) { // throw out the worst as an outlier. - num_normal--; - normal_certainty_total -= worst_normal_certainty; - } - if (num_normal > 0) { - *avg_certainty = normal_certainty_total / num_normal; - *unlikely_threshold = superscript_worse_certainty * (*avg_certainty); - } - if (num_normal == 0 || - (leading_outliers == 0 && trailing_outliers == 0)) { - return; - } - - // Step two: Try to split off bits of the word that are both outliers - // and have much lower certainty than average - // Calculate num_leading and leading_certainty. - for (*leading_certainty = 0.0f, *num_rebuilt_leading = 0; - *num_rebuilt_leading < leading_outliers; - (*num_rebuilt_leading)++) { - float char_certainty = word->best_choice->certainty(*num_rebuilt_leading); - if (char_certainty > *unlikely_threshold) { - break; - } - if (char_certainty < *leading_certainty) { - *leading_certainty = char_certainty; - } - } - - // Calculate num_trailing and trailing_certainty. - for (*trailing_certainty = 0.0f, *num_rebuilt_trailing = 0; - *num_rebuilt_trailing < trailing_outliers; - (*num_rebuilt_trailing)++) { - int blob_idx = num_blobs - 1 - *num_rebuilt_trailing; - float char_certainty = word->best_choice->certainty(blob_idx); - if (char_certainty > *unlikely_threshold) { - break; - } - if (char_certainty < *trailing_certainty) { - *trailing_certainty = char_certainty; - } - } -} - - -/** - * Try splitting off the given number of (chopped) blobs from the front and - * back of the given word and recognizing the pieces. - * - * @param[in] num_chopped_leading how many chopped blobs from the left - * end of the word to chop off and try recognizing as a - * superscript (or subscript) - * @param[in] leading_certainty the (minimum) certainty had by the - * characters in the original leading section. - * @param[in] leading_pos "super" or "sub" (for debugging) - * @param[in] num_chopped_trailing how many chopped blobs from the right - * end of the word to chop off and try recognizing as a - * superscript (or subscript) - * @param[in] trailing_certainty the (minimum) certainty had by the - * characters in the original trailing section. - * @param[in] trailing_pos "super" or "sub" (for debugging) - * @param[in] word the word to try to chop up. - * @param[out] is_good do we believe our result? - * @param[out] retry_rebuild_leading, retry_rebuild_trailing - * If non-zero, and !is_good, then the caller may have luck trying - * to split the returned word with this number of (rebuilt) leading - * and trailing blobs / unichars. - * @return A word which is the result of re-recognizing as asked. - */ -WERD_RES *Tesseract::TrySuperscriptSplits( - int num_chopped_leading, float leading_certainty, ScriptPos leading_pos, - int num_chopped_trailing, float trailing_certainty, - ScriptPos trailing_pos, - WERD_RES *word, - bool *is_good, - int *retry_rebuild_leading, int *retry_rebuild_trailing) { - int num_chopped = word->chopped_word->NumBlobs(); - - *retry_rebuild_leading = *retry_rebuild_trailing = 0; - - // Chop apart the word into up to three pieces. - - BlamerBundle *bb0 = nullptr; - BlamerBundle *bb1 = nullptr; - WERD_RES *prefix = nullptr; - WERD_RES *core = nullptr; - WERD_RES *suffix = nullptr; - if (num_chopped_leading > 0) { - prefix = new WERD_RES(*word); - split_word(prefix, num_chopped_leading, &core, &bb0); - } else { - core = new WERD_RES(*word); - } - - if (num_chopped_trailing > 0) { - int split_pt = num_chopped - num_chopped_trailing - num_chopped_leading; - split_word(core, split_pt, &suffix, &bb1); - } - - // Recognize the pieces in turn. - int saved_cp_multiplier = classify_class_pruner_multiplier; - int saved_im_multiplier = classify_integer_matcher_multiplier; - if (prefix) { - // Turn off Tesseract's y-position penalties for the leading superscript. - classify_class_pruner_multiplier.set_value(0); - classify_integer_matcher_multiplier.set_value(0); - - // Adjust our expectations about the baseline for this prefix. - if (superscript_debug >= 3) { - tprintf(" recognizing first %d chopped blobs\n", num_chopped_leading); - } - recog_word_recursive(prefix); - if (superscript_debug >= 2) { - tprintf(" The leading bits look like %s %s\n", - ScriptPosToString(leading_pos), - prefix->best_choice->unichar_string().string()); - } - - // Restore the normal y-position penalties. - classify_class_pruner_multiplier.set_value(saved_cp_multiplier); - classify_integer_matcher_multiplier.set_value(saved_im_multiplier); - } - - if (superscript_debug >= 3) { - tprintf(" recognizing middle %d chopped blobs\n", - num_chopped - num_chopped_leading - num_chopped_trailing); - } - - if (suffix) { - // Turn off Tesseract's y-position penalties for the trailing superscript. - classify_class_pruner_multiplier.set_value(0); - classify_integer_matcher_multiplier.set_value(0); - - if (superscript_debug >= 3) { - tprintf(" recognizing last %d chopped blobs\n", num_chopped_trailing); - } - recog_word_recursive(suffix); - if (superscript_debug >= 2) { - tprintf(" The trailing bits look like %s %s\n", - ScriptPosToString(trailing_pos), - suffix->best_choice->unichar_string().string()); - } - - // Restore the normal y-position penalties. - classify_class_pruner_multiplier.set_value(saved_cp_multiplier); - classify_integer_matcher_multiplier.set_value(saved_im_multiplier); - } - - // Evaluate whether we think the results are believably better - // than what we already had. - bool good_prefix = !prefix || BelievableSuperscript( - superscript_debug >= 1, *prefix, - superscript_bettered_certainty * leading_certainty, - retry_rebuild_leading, nullptr); - bool good_suffix = !suffix || BelievableSuperscript( - superscript_debug >= 1, *suffix, - superscript_bettered_certainty * trailing_certainty, - nullptr, retry_rebuild_trailing); - - *is_good = good_prefix && good_suffix; - if (!*is_good && !*retry_rebuild_leading && !*retry_rebuild_trailing) { - // None of it is any good. Quit now. - delete core; - delete prefix; - delete suffix; - delete bb1; - return nullptr; - } - recog_word_recursive(core); - - // Now paste the results together into core. - if (suffix) { - suffix->SetAllScriptPositions(trailing_pos); - join_words(core, suffix, bb1); - } - if (prefix) { - prefix->SetAllScriptPositions(leading_pos); - join_words(prefix, core, bb0); - core = prefix; - prefix = nullptr; - } - - if (superscript_debug >= 1) { - tprintf("%s superscript fix: %s\n", *is_good ? "ACCEPT" : "REJECT", - core->best_choice->unichar_string().string()); - } - return core; -} - - -/** - * Return whether this is believable superscript or subscript text. - * - * We insist that: - * + there are no punctuation marks. - * + there are no italics. - * + no normal-sized character is smaller than superscript_scaledown_ratio - * of what it ought to be, and - * + each character is at least as certain as certainty_threshold. - * - * @param[in] debug If true, spew debug output - * @param[in] word The word whose best_choice we're evaluating - * @param[in] certainty_threshold If any of the characters have less - * certainty than this, reject. - * @param[out] left_ok How many left-side characters were ok? - * @param[out] right_ok How many right-side characters were ok? - * @return Whether the complete best choice is believable as a superscript. - */ -bool Tesseract::BelievableSuperscript(bool debug, - const WERD_RES &word, - float certainty_threshold, - int *left_ok, - int *right_ok) const { - int initial_ok_run_count = 0; - int ok_run_count = 0; - float worst_certainty = 0.0f; - const WERD_CHOICE &wc = *word.best_choice; - - const UnicityTable& fontinfo_table = get_fontinfo_table(); - for (int i = 0; i < wc.length(); i++) { - TBLOB *blob = word.rebuild_word->blobs[i]; - UNICHAR_ID unichar_id = wc.unichar_id(i); - float char_certainty = wc.certainty(i); - bool bad_certainty = char_certainty < certainty_threshold; - bool is_punc = wc.unicharset()->get_ispunctuation(unichar_id); - bool is_italic = word.fontinfo && word.fontinfo->is_italic(); - BLOB_CHOICE *choice = word.GetBlobChoice(i); - if (choice && fontinfo_table.size() > 0) { - // Get better information from the specific choice, if available. - int font_id1 = choice->fontinfo_id(); - bool font1_is_italic = font_id1 >= 0 - ? fontinfo_table.get(font_id1).is_italic() : false; - int font_id2 = choice->fontinfo_id2(); - is_italic = font1_is_italic && - (font_id2 < 0 || fontinfo_table.get(font_id2).is_italic()); - } - - float height_fraction = 1.0f; - float char_height = blob->bounding_box().height(); - float normal_height = char_height; - if (wc.unicharset()->top_bottom_useful()) { - int min_bot, max_bot, min_top, max_top; - wc.unicharset()->get_top_bottom(unichar_id, - &min_bot, &max_bot, - &min_top, &max_top); - float hi_height = max_top - max_bot; - float lo_height = min_top - min_bot; - normal_height = (hi_height + lo_height) / 2; - if (normal_height >= kBlnXHeight) { - // Only ding characters that we have decent information for because - // they're supposed to be normal sized, not tiny specks or dashes. - height_fraction = char_height / normal_height; - } - } - bool bad_height = height_fraction < superscript_scaledown_ratio; - - if (debug) { - if (is_italic) { - tprintf(" Rejecting: superscript is italic.\n"); - } - if (is_punc) { - tprintf(" Rejecting: punctuation present.\n"); - } - const char *char_str = wc.unicharset()->id_to_unichar(unichar_id); - if (bad_certainty) { - tprintf(" Rejecting: don't believe character %s with certainty %.2f " - "which is less than threshold %.2f\n", char_str, - char_certainty, certainty_threshold); - } - if (bad_height) { - tprintf(" Rejecting: character %s seems too small @ %.2f versus " - "expected %.2f\n", char_str, char_height, normal_height); - } - } - if (bad_certainty || bad_height || is_punc || is_italic) { - if (ok_run_count == i) { - initial_ok_run_count = ok_run_count; - } - ok_run_count = 0; - } else { - ok_run_count++; - } - if (char_certainty < worst_certainty) { - worst_certainty = char_certainty; - } - } - bool all_ok = ok_run_count == wc.length(); - if (all_ok && debug) { - tprintf(" Accept: worst revised certainty is %.2f\n", worst_certainty); - } - if (!all_ok) { - if (left_ok) *left_ok = initial_ok_run_count; - if (right_ok) *right_ok = ok_run_count; - } - return all_ok; -} - - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tessbox.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tessbox.cpp deleted file mode 100644 index 80c5a9ad..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tessbox.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/********************************************************************** - * File: tessbox.cpp (Formerly tessbox.c) - * Description: Black boxed Tess for developing a resaljet. - * Author: Ray Smith - * Created: Thu Apr 23 11:03:36 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "mfoutline.h" -#include "tesseractclass.h" - -/** - * @name tess_segment_pass_n - * - * Segment a word using the pass_n conditions of the tess segmenter. - * @param pass_n pass number - * @param word word to do - */ - -namespace tesseract { -void Tesseract::tess_segment_pass_n(int pass_n, WERD_RES *word) { - int saved_enable_assoc = 0; - int saved_chop_enable = 0; - - if (word->word->flag(W_DONT_CHOP)) { - saved_enable_assoc = wordrec_enable_assoc; - saved_chop_enable = chop_enable; - wordrec_enable_assoc.set_value(0); - chop_enable.set_value(0); - } - if (pass_n == 1) - set_pass1(); - else - set_pass2(); - recog_word(word); - if (word->best_choice == nullptr) - word->SetupFake(*word->uch_set); - if (word->word->flag(W_DONT_CHOP)) { - wordrec_enable_assoc.set_value(saved_enable_assoc); - chop_enable.set_value(saved_chop_enable); - } -} - -/** - * @name tess_acceptable_word - * - * @return true if the word is regarded as "good enough". - * @param word_choice after context - * @param raw_choice before context - */ -bool Tesseract::tess_acceptable_word(WERD_RES* word) { - return getDict().AcceptableResult(word); -} - - -/** - * @name tess_add_doc_word - * - * Add the given word to the document dictionary - */ -void Tesseract::tess_add_doc_word(WERD_CHOICE *word_choice) { - getDict().add_document_word(*word_choice); -} -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tessedit.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tessedit.cpp deleted file mode 100644 index 4e0298de..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tessedit.cpp +++ /dev/null @@ -1,488 +0,0 @@ -/********************************************************************** - * File: tessedit.cpp (Formerly tessedit.c) - * Description: (Previously) Main program for merge of tess and editor. - * Now just code to load the language model and various - * engine-specific data files. - * Author: Ray Smith - * Created: Tue Jan 07 15:21:46 GMT 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "basedir.h" -#include "tessvars.h" -#include "control.h" -#include "reject.h" -#include "pageres.h" -#include "pgedit.h" -#include "tprintf.h" -#include "tessedit.h" -#include "stopper.h" -#ifndef DISABLED_LEGACY_ENGINE -#include "intmatcher.h" -#include "chop.h" -#endif -#include "globals.h" -#ifndef ANDROID_BUILD -#include "lstmrecognizer.h" -#endif -#include "tesseractclass.h" -#include "params.h" -#ifdef DISABLED_LEGACY_ENGINE -#include "matchdefs.h" -#endif - - // config under api -#define API_CONFIG "configs/api_config" - -ETEXT_DESC *global_monitor = nullptr; // progress monitor - -namespace tesseract { - -// Read a "config" file containing a set of variable, value pairs. -// Searches the standard places: tessdata/configs, tessdata/tessconfigs -// and also accepts a relative or absolute path name. -void Tesseract::read_config_file(const char *filename, - SetParamConstraint constraint) { - STRING path = datadir; - path += "configs/"; - path += filename; - FILE* fp; - if ((fp = fopen(path.string(), "rb")) != nullptr) { - fclose(fp); - } else { - path = datadir; - path += "tessconfigs/"; - path += filename; - if ((fp = fopen(path.string(), "rb")) != nullptr) { - fclose(fp); - } else { - path = filename; - } - } - ParamUtils::ReadParamsFile(path.string(), constraint, this->params()); -} - -// Returns false if a unicharset file for the specified language was not found -// or was invalid. -// This function initializes TessdataManager. After TessdataManager is -// no longer needed, TessdataManager::End() should be called. -// -// This function sets tessedit_oem_mode to the given OcrEngineMode oem, unless -// it is OEM_DEFAULT, in which case the value of the variable will be obtained -// from the language-specific config file (stored in [lang].traineddata), from -// the config files specified on the command line or left as the default -// OEM_TESSERACT_ONLY if none of the configs specify this variable. -bool Tesseract::init_tesseract_lang_data( - const char *arg0, const char *textbase, const char *language, - OcrEngineMode oem, char **configs, int configs_size, - const GenericVector *vars_vec, - const GenericVector *vars_values, bool set_only_non_debug_params, - TessdataManager *mgr) { - // Set the basename, compute the data directory. - main_setup(arg0, textbase); - - // Set the language data path prefix - lang = language != nullptr ? language : "eng"; - language_data_path_prefix = datadir; - language_data_path_prefix += lang; - language_data_path_prefix += "."; - - // Initialize TessdataManager. - STRING tessdata_path = language_data_path_prefix + kTrainedDataSuffix; - if (!mgr->is_loaded() && !mgr->Init(tessdata_path.string())) { - tprintf("Error opening data file %s\n", tessdata_path.string()); - tprintf("Please make sure the TESSDATA_PREFIX environment variable is set" - " to your \"tessdata\" directory.\n"); - return false; - } -#ifndef DISABLED_LEGACY_ENGINE - if (oem == OEM_DEFAULT) { - // Set the engine mode from availability, which can then be overridden by - // the config file when we read it below. - if (!mgr->IsLSTMAvailable()) { - tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY); - } else if (!mgr->IsBaseAvailable()) { - tessedit_ocr_engine_mode.set_value(OEM_LSTM_ONLY); - } else { - tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_LSTM_COMBINED); - } - } -#endif // ndef DISABLED_LEGACY_ENGINE - - // If a language specific config file (lang.config) exists, load it in. - TFile fp; - if (mgr->GetComponent(TESSDATA_LANG_CONFIG, &fp)) { - ParamUtils::ReadParamsFromFp(SET_PARAM_CONSTRAINT_NONE, &fp, - this->params()); - } - - SetParamConstraint set_params_constraint = set_only_non_debug_params ? - SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY : SET_PARAM_CONSTRAINT_NONE; - // Load tesseract variables from config files. This is done after loading - // language-specific variables from [lang].traineddata file, so that custom - // config files can override values in [lang].traineddata file. - for (int i = 0; i < configs_size; ++i) { - read_config_file(configs[i], set_params_constraint); - } - - // Set params specified in vars_vec (done after setting params from config - // files, so that params in vars_vec can override those from files). - if (vars_vec != nullptr && vars_values != nullptr) { - for (int i = 0; i < vars_vec->size(); ++i) { - if (!ParamUtils::SetParam((*vars_vec)[i].string(), - (*vars_values)[i].string(), - set_params_constraint, this->params())) { - tprintf("Error setting param %s\n", (*vars_vec)[i].string()); - exit(1); - } - } - } - - if (((STRING &)tessedit_write_params_to_file).length() > 0) { - FILE *params_file = fopen(tessedit_write_params_to_file.string(), "wb"); - if (params_file != nullptr) { - ParamUtils::PrintParams(params_file, this->params()); - fclose(params_file); - } else { - tprintf("Failed to open %s for writing params.\n", - tessedit_write_params_to_file.string()); - } - } - - // Determine which ocr engine(s) should be loaded and used for recognition. - if (oem != OEM_DEFAULT) tessedit_ocr_engine_mode.set_value(oem); - - // If we are only loading the config file (and so not planning on doing any - // recognition) then there's nothing else do here. - if (tessedit_init_config_only) { - return true; - } - -// The various OcrEngineMode settings (see publictypes.h) determine which -// engine-specific data files need to be loaded. -// If LSTM_ONLY is requested, the base Tesseract files are *Not* required. -#ifndef ANDROID_BUILD -#ifdef DISABLED_LEGACY_ENGINE - if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) { -#else - if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY || - tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) { -#endif // ndef DISABLED_LEGACY_ENGINE - if (mgr->IsComponentAvailable(TESSDATA_LSTM)) { - lstm_recognizer_ = new LSTMRecognizer; - ASSERT_HOST( - lstm_recognizer_->Load(lstm_use_matrix ? language : nullptr, mgr)); - } else { - tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n"); - tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY); - } - } -#endif // ndef ANDROID_BUILD - - // Load the unicharset - if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) { - // Avoid requiring a unicharset when we aren't running base tesseract. -#ifndef ANDROID_BUILD - unicharset.CopyFrom(lstm_recognizer_->GetUnicharset()); -#endif // ndef ANDROID_BUILD - } -#ifndef DISABLED_LEGACY_ENGINE - else if (!mgr->GetComponent(TESSDATA_UNICHARSET, &fp) || - !unicharset.load_from_file(&fp, false)) { - return false; - } -#endif // ndef DISABLED_LEGACY_ENGINE - if (unicharset.size() > MAX_NUM_CLASSES) { - tprintf("Error: Size of unicharset is greater than MAX_NUM_CLASSES\n"); - return false; - } - right_to_left_ = unicharset.major_right_to_left(); - - // Setup initial unichar ambigs table and read universal ambigs. - UNICHARSET encoder_unicharset; - encoder_unicharset.CopyFrom(unicharset); - unichar_ambigs.InitUnicharAmbigs(unicharset, use_ambigs_for_adaption); - unichar_ambigs.LoadUniversal(encoder_unicharset, &unicharset); - - if (!tessedit_ambigs_training && mgr->GetComponent(TESSDATA_AMBIGS, &fp)) { - unichar_ambigs.LoadUnicharAmbigs(encoder_unicharset, &fp, - ambigs_debug_level, - use_ambigs_for_adaption, &unicharset); - } -#ifndef DISABLED_LEGACY_ENGINE - // Init ParamsModel. - // Load pass1 and pass2 weights (for now these two sets are the same, but in - // the future separate sets of weights can be generated). - for (int p = ParamsModel::PTRAIN_PASS1; - p < ParamsModel::PTRAIN_NUM_PASSES; ++p) { - language_model_->getParamsModel().SetPass( - static_cast(p)); - if (mgr->GetComponent(TESSDATA_PARAMS_MODEL, &fp)) { - if (!language_model_->getParamsModel().LoadFromFp(lang.string(), &fp)) { - return false; - } - } - } -#endif // ndef DISABLED_LEGACY_ENGINE - - return true; -} - -// Helper returns true if the given string is in the vector of strings. -static bool IsStrInList(const STRING& str, - const GenericVector& str_list) { - for (int i = 0; i < str_list.size(); ++i) { - if (str_list[i] == str) - return true; - } - return false; -} - -// Parse a string of the form [~][+[~]]*. -// Langs with no prefix get appended to to_load, provided they -// are not in there already. -// Langs with ~ prefix get appended to not_to_load, provided they are not in -// there already. -void Tesseract::ParseLanguageString(const char* lang_str, - GenericVector* to_load, - GenericVector* not_to_load) { - STRING remains(lang_str); - while (remains.length() > 0) { - // Find the start of the lang code and which vector to add to. - const char* start = remains.string(); - while (*start == '+') - ++start; - GenericVector* target = to_load; - if (*start == '~') { - target = not_to_load; - ++start; - } - // Find the index of the end of the lang code in string start. - int end = strlen(start); - const char* plus = strchr(start, '+'); - if (plus != nullptr && plus - start < end) - end = plus - start; - STRING lang_code(start); - lang_code.truncate_at(end); - STRING next(start + end); - remains = next; - // Check whether lang_code is already in the target vector and add. - if (!IsStrInList(lang_code, *target)) { - target->push_back(lang_code); - } - } -} - -// Initialize for potentially a set of languages defined by the language -// string and recursively any additional languages required by any language -// traineddata file (via tessedit_load_sublangs in its config) that is loaded. -// See init_tesseract_internal for args. -int Tesseract::init_tesseract(const char *arg0, const char *textbase, - const char *language, OcrEngineMode oem, - char **configs, int configs_size, - const GenericVector *vars_vec, - const GenericVector *vars_values, - bool set_only_non_debug_params, - TessdataManager *mgr) { - GenericVector langs_to_load; - GenericVector langs_not_to_load; - ParseLanguageString(language, &langs_to_load, &langs_not_to_load); - - sub_langs_.delete_data_pointers(); - sub_langs_.clear(); - // Find the first loadable lang and load into this. - // Add any languages that this language requires - bool loaded_primary = false; - // Load the rest into sub_langs_. - for (int lang_index = 0; lang_index < langs_to_load.size(); ++lang_index) { - if (!IsStrInList(langs_to_load[lang_index], langs_not_to_load)) { - const char *lang_str = langs_to_load[lang_index].string(); - Tesseract *tess_to_init; - if (!loaded_primary) { - tess_to_init = this; - } else { - tess_to_init = new Tesseract; - } - - int result = tess_to_init->init_tesseract_internal( - arg0, textbase, lang_str, oem, configs, configs_size, vars_vec, - vars_values, set_only_non_debug_params, mgr); - // Forget that language, but keep any reader we were given. - mgr->Clear(); - - if (!loaded_primary) { - if (result < 0) { - tprintf("Failed loading language '%s'\n", lang_str); - } else { - ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(), - &langs_to_load, &langs_not_to_load); - loaded_primary = true; - } - } else { - if (result < 0) { - tprintf("Failed loading language '%s'\n", lang_str); - delete tess_to_init; - } else { - sub_langs_.push_back(tess_to_init); - // Add any languages that this language requires - ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(), - &langs_to_load, &langs_not_to_load); - } - } - } - } - if (!loaded_primary) { - tprintf("Tesseract couldn't load any languages!\n"); - return -1; // Couldn't load any language! - } -#ifndef DISABLED_LEGACY_ENGINE - if (!sub_langs_.empty()) { - // In multilingual mode word ratings have to be directly comparable, - // so use the same language model weights for all languages: - // use the primary language's params model if - // tessedit_use_primary_params_model is set, - // otherwise use default language model weights. - if (tessedit_use_primary_params_model) { - for (int s = 0; s < sub_langs_.size(); ++s) { - sub_langs_[s]->language_model_->getParamsModel().Copy( - this->language_model_->getParamsModel()); - } - tprintf("Using params model of the primary language\n"); - } else { - this->language_model_->getParamsModel().Clear(); - for (int s = 0; s < sub_langs_.size(); ++s) { - sub_langs_[s]->language_model_->getParamsModel().Clear(); - } - } - } - - SetupUniversalFontIds(); -#endif // ndef DISABLED_LEGACY_ENGINE - return 0; -} - -// Common initialization for a single language. -// arg0 is the datapath for the tessdata directory, which could be the -// path of the tessdata directory with no trailing /, or (if tessdata -// lives in the same directory as the executable, the path of the executable, -// hence the name arg0. -// textbase is an optional output file basename (used only for training) -// language is the language code to load. -// oem controls which engine(s) will operate on the image -// configs (argv) is an array of config filenames to load variables from. -// May be nullptr. -// configs_size (argc) is the number of elements in configs. -// vars_vec is an optional vector of variables to set. -// vars_values is an optional corresponding vector of values for the variables -// in vars_vec. -// If set_only_init_params is true, then only the initialization variables -// will be set. -int Tesseract::init_tesseract_internal(const char *arg0, const char *textbase, - const char *language, OcrEngineMode oem, - char **configs, int configs_size, - const GenericVector *vars_vec, - const GenericVector *vars_values, - bool set_only_non_debug_params, - TessdataManager *mgr) { - if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs, - configs_size, vars_vec, vars_values, - set_only_non_debug_params, mgr)) { - return -1; - } - if (tessedit_init_config_only) { - return 0; - } - // If only LSTM will be used, skip loading Tesseract classifier's - // pre-trained templates and dictionary. - bool init_tesseract = tessedit_ocr_engine_mode != OEM_LSTM_ONLY; - program_editup(textbase, init_tesseract ? mgr : nullptr, - init_tesseract ? mgr : nullptr); - return 0; //Normal exit -} - -#ifndef DISABLED_LEGACY_ENGINE - -// Helper builds the all_fonts table by adding new fonts from new_fonts. -static void CollectFonts(const UnicityTable& new_fonts, - UnicityTable* all_fonts) { - for (int i = 0; i < new_fonts.size(); ++i) { - // UnicityTable uniques as we go. - all_fonts->push_back(new_fonts.get(i)); - } -} - -// Helper assigns an id to lang_fonts using the index in all_fonts table. -static void AssignIds(const UnicityTable& all_fonts, - UnicityTable* lang_fonts) { - for (int i = 0; i < lang_fonts->size(); ++i) { - int index = all_fonts.get_id(lang_fonts->get(i)); - lang_fonts->get_mutable(i)->universal_id = index; - } -} - -// Set the universal_id member of each font to be unique among all -// instances of the same font loaded. -void Tesseract::SetupUniversalFontIds() { - // Note that we can get away with bitwise copying FontInfo in - // all_fonts, as it is a temporary structure and we avoid setting the - // delete callback. - UnicityTable all_fonts; - all_fonts.set_compare_callback(NewPermanentTessCallback(CompareFontInfo)); - - // Create the universal ID table. - CollectFonts(get_fontinfo_table(), &all_fonts); - for (int i = 0; i < sub_langs_.size(); ++i) { - CollectFonts(sub_langs_[i]->get_fontinfo_table(), &all_fonts); - } - // Assign ids from the table to each font table. - AssignIds(all_fonts, &get_fontinfo_table()); - for (int i = 0; i < sub_langs_.size(); ++i) { - AssignIds(all_fonts, &sub_langs_[i]->get_fontinfo_table()); - } - font_table_size_ = all_fonts.size(); -} - -// init the LM component -int Tesseract::init_tesseract_lm(const char *arg0, const char *textbase, - const char *language, TessdataManager *mgr) { - if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY, - nullptr, 0, nullptr, nullptr, false, mgr)) - return -1; - getDict().SetupForLoad(Dict::GlobalDawgCache()); - getDict().Load(lang, mgr); - getDict().FinishLoad(); - return 0; -} - -#endif // ndef DISABLED_LEGACY_ENGINE - -void Tesseract::end_tesseract() { - end_recog(); -} - -/* Define command type identifiers */ - -enum CMD_EVENTS -{ - ACTION_1_CMD_EVENT, - RECOG_WERDS, - RECOG_PSEUDO, - ACTION_2_CMD_EVENT -}; -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tessedit.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tessedit.h deleted file mode 100644 index 750056b7..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tessedit.h +++ /dev/null @@ -1,28 +0,0 @@ -/********************************************************************** - * File: tessedit.h (Formerly tessedit.h) - * Description: Main program for merge of tess and editor. - * Author: Ray Smith - * Created: Tue Jan 07 15:21:46 GMT 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TESSEDIT_H -#define TESSEDIT_H - -class ETEXT_DESC; - - //progress monitor -extern ETEXT_DESC *global_monitor; - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tesseractclass.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tesseractclass.cpp deleted file mode 100644 index c6aa8337..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tesseractclass.cpp +++ /dev/null @@ -1,692 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tesseractclass.cpp -// Description: The Tesseract class. It holds/owns everything needed -// to run Tesseract on a single language, and also a set of -// sub-Tesseracts to run sub-languages. For thread safety, *every* -// variable that was previously global or static (except for -// constant data, and some visual debugging flags) has been moved -// in here, directly, or indirectly. -// This makes it safe to run multiple Tesseracts in different -// threads in parallel, and keeps the different language -// instances separate. -// Some global functions remain, but they are isolated re-entrant -// functions that operate on their arguments. Functions that work -// on variable data have been moved to an appropriate class based -// mostly on the directory hierarchy. For more information see -// slide 6 of "2ArchitectureAndDataStructures" in -// https://drive.google.com/file/d/0B7l10Bj_LprhbUlIUFlCdGtDYkE/edit?usp=sharing -// Some global data and related functions still exist in the -// training-related code, but they don't interfere with normal -// recognition operation. -// Author: Ray Smith -// Created: Fri Mar 07 08:17:01 PST 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "tesseractclass.h" - -#include "allheaders.h" -#include "edgblob.h" -#include "equationdetect.h" -#include "globals.h" -#ifndef ANDROID_BUILD -#include "lstmrecognizer.h" -#endif - -namespace tesseract { - -Tesseract::Tesseract() - : BOOL_MEMBER(tessedit_resegment_from_boxes, false, - "Take segmentation and labeling from box file", - this->params()), - BOOL_MEMBER(tessedit_resegment_from_line_boxes, false, - "Conversion of word/line box file to char box file", - this->params()), - BOOL_MEMBER(tessedit_train_from_boxes, false, - "Generate training data from boxed chars", this->params()), - BOOL_MEMBER(tessedit_make_boxes_from_boxes, false, - "Generate more boxes from boxed chars", this->params()), - BOOL_MEMBER(tessedit_train_line_recognizer, false, - "Break input into lines and remap boxes if present", - this->params()), - BOOL_MEMBER(tessedit_dump_pageseg_images, false, - "Dump intermediate images made during page segmentation", - this->params()), - // The default for pageseg_mode is the old behaviour, so as not to - // upset anything that relies on that. - INT_MEMBER( - tessedit_pageseg_mode, PSM_SINGLE_BLOCK, - "Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block," - " 5=line, 6=word, 7=char" - " (Values from PageSegMode enum in publictypes.h)", - this->params()), - INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT, - "Which OCR engine(s) to run (Tesseract, LSTM, both)." - " Defaults to loading and running the most accurate" - " available.", - this->params()), - STRING_MEMBER(tessedit_char_blacklist, "", - "Blacklist of chars not to recognize", this->params()), - STRING_MEMBER(tessedit_char_whitelist, "", - "Whitelist of chars to recognize", this->params()), - STRING_MEMBER(tessedit_char_unblacklist, "", - "List of chars to override tessedit_char_blacklist", - this->params()), - BOOL_MEMBER(tessedit_ambigs_training, false, - "Perform training for ambiguities", this->params()), - INT_MEMBER(pageseg_devanagari_split_strategy, - tesseract::ShiroRekhaSplitter::NO_SPLIT, - "Whether to use the top-line splitting process for Devanagari " - "documents while performing page-segmentation.", - this->params()), - INT_MEMBER(ocr_devanagari_split_strategy, - tesseract::ShiroRekhaSplitter::NO_SPLIT, - "Whether to use the top-line splitting process for Devanagari " - "documents while performing ocr.", - this->params()), - STRING_MEMBER(tessedit_write_params_to_file, "", - "Write all parameters to the given file.", this->params()), - BOOL_MEMBER(tessedit_adaption_debug, false, - "Generate and print debug" - " information for adaption", - this->params()), - INT_MEMBER(bidi_debug, 0, "Debug level for BiDi", this->params()), - INT_MEMBER(applybox_debug, 1, "Debug level", this->params()), - INT_MEMBER(applybox_page, 0, "Page number to apply boxes from", - this->params()), - STRING_MEMBER(applybox_exposure_pattern, ".exp", - "Exposure value follows" - " this pattern in the image filename. The name of the image" - " files are expected to be in the form" - " [lang].[fontname].exp[num].tif", - this->params()), - BOOL_MEMBER(applybox_learn_chars_and_char_frags_mode, false, - "Learn both character fragments (as is done in the" - " special low exposure mode) as well as unfragmented" - " characters.", - this->params()), - BOOL_MEMBER(applybox_learn_ngrams_mode, false, - "Each bounding box" - " is assumed to contain ngrams. Only learn the ngrams" - " whose outlines overlap horizontally.", - this->params()), - BOOL_MEMBER(tessedit_display_outwords, false, "Draw output words", - this->params()), - BOOL_MEMBER(tessedit_dump_choices, false, "Dump char choices", - this->params()), - BOOL_MEMBER(tessedit_timing_debug, false, "Print timing stats", - this->params()), - BOOL_MEMBER(tessedit_fix_fuzzy_spaces, true, - "Try to improve fuzzy spaces", this->params()), - BOOL_MEMBER(tessedit_unrej_any_wd, false, - "Don't bother with word plausibility", this->params()), - BOOL_MEMBER(tessedit_fix_hyphens, true, "Crunch double hyphens?", - this->params()), - BOOL_MEMBER(tessedit_redo_xheight, true, "Check/Correct x-height", - this->params()), - BOOL_MEMBER(tessedit_enable_doc_dict, true, - "Add words to the document dictionary", this->params()), - BOOL_MEMBER(tessedit_debug_fonts, false, "Output font info per char", - this->params()), - BOOL_MEMBER(tessedit_debug_block_rejection, false, "Block and Row stats", - this->params()), - BOOL_MEMBER(tessedit_enable_bigram_correction, true, - "Enable correction based on the word bigram dictionary.", - this->params()), - BOOL_MEMBER(tessedit_enable_dict_correction, false, - "Enable single word correction based on the dictionary.", - this->params()), - INT_MEMBER(tessedit_bigram_debug, 0, - "Amount of debug output for bigram correction.", - this->params()), - BOOL_MEMBER(enable_noise_removal, true, - "Remove and conditionally reassign small outlines when they" - " confuse layout analysis, determining diacritics vs noise", - this->params()), - INT_MEMBER(debug_noise_removal, 0, "Debug reassignment of small outlines", - this->params()), - // Worst (min) certainty, for which a diacritic is allowed to make the - // base - // character worse and still be included. - double_MEMBER(noise_cert_basechar, -8.0, - "Hingepoint for base char certainty", this->params()), - // Worst (min) certainty, for which a non-overlapping diacritic is allowed - // to make the base character worse and still be included. - double_MEMBER(noise_cert_disjoint, -1.0, - "Hingepoint for disjoint certainty", this->params()), - // Worst (min) certainty, for which a diacritic is allowed to make a new - // stand-alone blob. - double_MEMBER(noise_cert_punc, -3.0, - "Threshold for new punc char certainty", this->params()), - // Factor of certainty margin for adding diacritics to not count as worse. - double_MEMBER(noise_cert_factor, 0.375, - "Scaling on certainty diff from Hingepoint", - this->params()), - INT_MEMBER(noise_maxperblob, 8, "Max diacritics to apply to a blob", - this->params()), - INT_MEMBER(noise_maxperword, 16, "Max diacritics to apply to a word", - this->params()), - INT_MEMBER(debug_x_ht_level, 0, "Reestimate debug", this->params()), - BOOL_MEMBER(debug_acceptable_wds, false, "Dump word pass/fail chk", - this->params()), - STRING_MEMBER(chs_leading_punct, "('`\"", "Leading punctuation", - this->params()), - STRING_MEMBER(chs_trailing_punct1, ").,;:?!", "1st Trailing punctuation", - this->params()), - STRING_MEMBER(chs_trailing_punct2, ")'`\"", "2nd Trailing punctuation", - this->params()), - double_MEMBER(quality_rej_pc, 0.08, - "good_quality_doc lte rejection limit", this->params()), - double_MEMBER(quality_blob_pc, 0.0, - "good_quality_doc gte good blobs limit", this->params()), - double_MEMBER(quality_outline_pc, 1.0, - "good_quality_doc lte outline error limit", this->params()), - double_MEMBER(quality_char_pc, 0.95, - "good_quality_doc gte good char limit", this->params()), - INT_MEMBER(quality_min_initial_alphas_reqd, 2, "alphas in a good word", - this->params()), - INT_MEMBER(tessedit_tess_adaption_mode, 0x27, - "Adaptation decision algorithm for tess", this->params()), - BOOL_MEMBER(tessedit_minimal_rej_pass1, false, - "Do minimal rejection on pass 1 output", this->params()), - BOOL_MEMBER(tessedit_test_adaption, false, "Test adaption criteria", - this->params()), - BOOL_MEMBER(tessedit_matcher_log, false, "Log matcher activity", - this->params()), - INT_MEMBER(tessedit_test_adaption_mode, 3, - "Adaptation decision algorithm for tess", this->params()), - BOOL_MEMBER(test_pt, false, "Test for point", this->params()), - double_MEMBER(test_pt_x, 99999.99, "xcoord", this->params()), - double_MEMBER(test_pt_y, 99999.99, "ycoord", this->params()), - INT_MEMBER(multilang_debug_level, 0, "Print multilang debug info.", - this->params()), - INT_MEMBER(paragraph_debug_level, 0, "Print paragraph debug info.", - this->params()), - BOOL_MEMBER(paragraph_text_based, true, - "Run paragraph detection on the post-text-recognition " - "(more accurate)", - this->params()), - BOOL_MEMBER(lstm_use_matrix, 1, - "Use ratings matrix/beam search with lstm", this->params()), - STRING_MEMBER(outlines_odd, "%| ", "Non standard number of outlines", - this->params()), - STRING_MEMBER(outlines_2, "ij!?%\":;", "Non standard number of outlines", - this->params()), - BOOL_MEMBER(docqual_excuse_outline_errs, false, - "Allow outline errs in unrejection?", this->params()), - BOOL_MEMBER(tessedit_good_quality_unrej, true, - "Reduce rejection on good docs", this->params()), - BOOL_MEMBER(tessedit_use_reject_spaces, true, "Reject spaces?", - this->params()), - double_MEMBER(tessedit_reject_doc_percent, 65.00, - "%rej allowed before rej whole doc", this->params()), - double_MEMBER(tessedit_reject_block_percent, 45.00, - "%rej allowed before rej whole block", this->params()), - double_MEMBER(tessedit_reject_row_percent, 40.00, - "%rej allowed before rej whole row", this->params()), - double_MEMBER(tessedit_whole_wd_rej_row_percent, 70.00, - "Number of row rejects in whole word rejects" - " which prevents whole row rejection", - this->params()), - BOOL_MEMBER(tessedit_preserve_blk_rej_perfect_wds, true, - "Only rej partially rejected words in block rejection", - this->params()), - BOOL_MEMBER(tessedit_preserve_row_rej_perfect_wds, true, - "Only rej partially rejected words in row rejection", - this->params()), - BOOL_MEMBER(tessedit_dont_blkrej_good_wds, false, - "Use word segmentation quality metric", this->params()), - BOOL_MEMBER(tessedit_dont_rowrej_good_wds, false, - "Use word segmentation quality metric", this->params()), - INT_MEMBER(tessedit_preserve_min_wd_len, 2, - "Only preserve wds longer than this", this->params()), - BOOL_MEMBER(tessedit_row_rej_good_docs, true, - "Apply row rejection to good docs", this->params()), - double_MEMBER(tessedit_good_doc_still_rowrej_wd, 1.1, - "rej good doc wd if more than this fraction rejected", - this->params()), - BOOL_MEMBER(tessedit_reject_bad_qual_wds, true, - "Reject all bad quality wds", this->params()), - BOOL_MEMBER(tessedit_debug_doc_rejection, false, "Page stats", - this->params()), - BOOL_MEMBER(tessedit_debug_quality_metrics, false, - "Output data to debug file", this->params()), - BOOL_MEMBER(bland_unrej, false, "unrej potential with no checks", - this->params()), - double_MEMBER(quality_rowrej_pc, 1.1, - "good_quality_doc gte good char limit", this->params()), - BOOL_MEMBER(unlv_tilde_crunching, false, - "Mark v.bad words for tilde crunch", this->params()), - BOOL_MEMBER(hocr_font_info, false, "Add font info to hocr output", - this->params()), - BOOL_MEMBER(crunch_early_merge_tess_fails, true, "Before word crunch?", - this->params()), - BOOL_MEMBER(crunch_early_convert_bad_unlv_chs, false, - "Take out ~^ early?", this->params()), - double_MEMBER(crunch_terrible_rating, 80.0, "crunch rating lt this", - this->params()), - BOOL_MEMBER(crunch_terrible_garbage, true, "As it says", this->params()), - double_MEMBER(crunch_poor_garbage_cert, -9.0, - "crunch garbage cert lt this", this->params()), - double_MEMBER(crunch_poor_garbage_rate, 60, - "crunch garbage rating lt this", this->params()), - double_MEMBER(crunch_pot_poor_rate, 40, "POTENTIAL crunch rating lt this", - this->params()), - double_MEMBER(crunch_pot_poor_cert, -8.0, "POTENTIAL crunch cert lt this", - this->params()), - BOOL_MEMBER(crunch_pot_garbage, true, "POTENTIAL crunch garbage", - this->params()), - double_MEMBER(crunch_del_rating, 60, "POTENTIAL crunch rating lt this", - this->params()), - double_MEMBER(crunch_del_cert, -10.0, "POTENTIAL crunch cert lt this", - this->params()), - double_MEMBER(crunch_del_min_ht, 0.7, "Del if word ht lt xht x this", - this->params()), - double_MEMBER(crunch_del_max_ht, 3.0, "Del if word ht gt xht x this", - this->params()), - double_MEMBER(crunch_del_min_width, 3.0, - "Del if word width lt xht x this", this->params()), - double_MEMBER(crunch_del_high_word, 1.5, - "Del if word gt xht x this above bl", this->params()), - double_MEMBER(crunch_del_low_word, 0.5, - "Del if word gt xht x this below bl", this->params()), - double_MEMBER(crunch_small_outlines_size, 0.6, "Small if lt xht x this", - this->params()), - INT_MEMBER(crunch_rating_max, 10, "For adj length in rating per ch", - this->params()), - INT_MEMBER(crunch_pot_indicators, 1, - "How many potential indicators needed", this->params()), - BOOL_MEMBER(crunch_leave_ok_strings, true, "Don't touch sensible strings", - this->params()), - BOOL_MEMBER(crunch_accept_ok, true, "Use acceptability in okstring", - this->params()), - BOOL_MEMBER(crunch_leave_accept_strings, false, - "Don't pot crunch sensible strings", this->params()), - BOOL_MEMBER(crunch_include_numerals, false, "Fiddle alpha figures", - this->params()), - INT_MEMBER(crunch_leave_lc_strings, 4, - "Don't crunch words with long lower case strings", - this->params()), - INT_MEMBER(crunch_leave_uc_strings, 4, - "Don't crunch words with long lower case strings", - this->params()), - INT_MEMBER(crunch_long_repetitions, 3, - "Crunch words with long repetitions", this->params()), - INT_MEMBER(crunch_debug, 0, "As it says", this->params()), - INT_MEMBER(fixsp_non_noise_limit, 1, - "How many non-noise blbs either side?", this->params()), - double_MEMBER(fixsp_small_outlines_size, 0.28, "Small if lt xht x this", - this->params()), - BOOL_MEMBER(tessedit_prefer_joined_punct, false, - "Reward punctuation joins", this->params()), - INT_MEMBER(fixsp_done_mode, 1, "What constitues done for spacing", - this->params()), - INT_MEMBER(debug_fix_space_level, 0, "Contextual fixspace debug", - this->params()), - STRING_MEMBER(numeric_punctuation, ".,", - "Punct. chs expected WITHIN numbers", this->params()), - INT_MEMBER(x_ht_acceptance_tolerance, 8, - "Max allowed deviation of blob top outside of font data", - this->params()), - INT_MEMBER(x_ht_min_change, 8, - "Min change in xht before actually trying it", this->params()), - INT_MEMBER(superscript_debug, 0, - "Debug level for sub & superscript fixer", this->params()), - double_MEMBER( - superscript_worse_certainty, 2.0, - "How many times worse " - "certainty does a superscript position glyph need to be for " - "us to try classifying it as a char with a different " - "baseline?", - this->params()), - double_MEMBER( - superscript_bettered_certainty, 0.97, - "What reduction in " - "badness do we think sufficient to choose a superscript " - "over what we'd thought. For example, a value of 0.6 means " - "we want to reduce badness of certainty by at least 40%", - this->params()), - double_MEMBER(superscript_scaledown_ratio, 0.4, - "A superscript scaled down more than this is unbelievably " - "small. For example, 0.3 means we expect the font size to " - "be no smaller than 30% of the text line font size.", - this->params()), - double_MEMBER(subscript_max_y_top, 0.5, - "Maximum top of a character measured as a multiple of " - "x-height above the baseline for us to reconsider whether " - "it's a subscript.", - this->params()), - double_MEMBER(superscript_min_y_bottom, 0.3, - "Minimum bottom of a character measured as a multiple of " - "x-height above the baseline for us to reconsider whether " - "it's a superscript.", - this->params()), - BOOL_MEMBER(tessedit_write_block_separators, false, - "Write block separators in output", this->params()), - BOOL_MEMBER(tessedit_write_rep_codes, false, "Write repetition char code", - this->params()), - BOOL_MEMBER(tessedit_write_unlv, false, "Write .unlv output file", - this->params()), - BOOL_MEMBER(tessedit_create_txt, false, "Write .txt output file", - this->params()), - BOOL_MEMBER(tessedit_create_hocr, false, "Write .html hOCR output file", - this->params()), - BOOL_MEMBER(tessedit_create_tsv, false, "Write .tsv output file", - this->params()), - BOOL_MEMBER(tessedit_create_pdf, false, "Write .pdf output file", - this->params()), - BOOL_MEMBER(textonly_pdf, false, - "Create PDF with only one invisible text layer", - this->params()), - INT_MEMBER(jpg_quality, 85, "Set JPEG quality level", this->params()), - INT_MEMBER(user_defined_dpi, 0, "Specify DPI for input image", - this->params()), - INT_MEMBER(min_characters_to_try, 50, - "Specify minimum characters to try during OSD", - this->params()), - STRING_MEMBER(unrecognised_char, "|", - "Output char for unidentified blobs", this->params()), - INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()), - INT_MEMBER(suspect_space_level, 100, - "Min suspect level for rejecting spaces", this->params()), - INT_MEMBER(suspect_short_words, 2, - "Don't suspect dict wds longer than this", this->params()), - BOOL_MEMBER(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected", - this->params()), - double_MEMBER(suspect_rating_per_ch, 999.9, - "Don't touch bad rating limit", this->params()), - double_MEMBER(suspect_accept_rating, -999.9, "Accept good rating limit", - this->params()), - BOOL_MEMBER(tessedit_minimal_rejection, false, - "Only reject tess failures", this->params()), - BOOL_MEMBER(tessedit_zero_rejection, false, "Don't reject ANYTHING", - this->params()), - BOOL_MEMBER(tessedit_word_for_word, false, - "Make output have exactly one word per WERD", this->params()), - BOOL_MEMBER(tessedit_zero_kelvin_rejection, false, - "Don't reject ANYTHING AT ALL", this->params()), - BOOL_MEMBER(tessedit_consistent_reps, true, - "Force all rep chars the same", this->params()), - INT_MEMBER(tessedit_reject_mode, 0, "Rejection algorithm", - this->params()), - BOOL_MEMBER(tessedit_rejection_debug, false, "Adaption debug", - this->params()), - BOOL_MEMBER(tessedit_flip_0O, true, "Contextual 0O O0 flips", - this->params()), - double_MEMBER(tessedit_lower_flip_hyphen, 1.5, - "Aspect ratio dot/hyphen test", this->params()), - double_MEMBER(tessedit_upper_flip_hyphen, 1.8, - "Aspect ratio dot/hyphen test", this->params()), - BOOL_MEMBER(rej_trust_doc_dawg, false, - "Use DOC dawg in 11l conf. detector", this->params()), - BOOL_MEMBER(rej_1Il_use_dict_word, false, "Use dictword test", - this->params()), - BOOL_MEMBER(rej_1Il_trust_permuter_type, true, "Don't double check", - this->params()), - BOOL_MEMBER(rej_use_tess_accepted, true, "Individual rejection control", - this->params()), - BOOL_MEMBER(rej_use_tess_blanks, true, "Individual rejection control", - this->params()), - BOOL_MEMBER(rej_use_good_perm, true, "Individual rejection control", - this->params()), - BOOL_MEMBER(rej_use_sensible_wd, false, "Extend permuter check", - this->params()), - BOOL_MEMBER(rej_alphas_in_number_perm, false, "Extend permuter check", - this->params()), - double_MEMBER(rej_whole_of_mostly_reject_word_fract, 0.85, - "if >this fract", this->params()), - INT_MEMBER(tessedit_image_border, 2, "Rej blbs near image edge limit", - this->params()), - STRING_MEMBER(ok_repeated_ch_non_alphanum_wds, "-?*\075", - "Allow NN to unrej", this->params()), - STRING_MEMBER(conflict_set_I_l_1, "Il1[]", "Il1 conflict set", - this->params()), - INT_MEMBER(min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this", - this->params()), - BOOL_MEMBER(tessedit_create_boxfile, false, "Output text with boxes", - this->params()), - INT_MEMBER(tessedit_page_number, -1, - "-1 -> All pages" - " , else specific page to process", - this->params()), - BOOL_MEMBER(tessedit_write_images, false, - "Capture the image from the IPE", this->params()), - BOOL_MEMBER(interactive_display_mode, false, "Run interactively?", - this->params()), - STRING_MEMBER(file_type, ".tif", "Filename extension", this->params()), - BOOL_MEMBER(tessedit_override_permuter, true, "According to dict_word", - this->params()), - STRING_MEMBER(tessedit_load_sublangs, "", - "List of languages to load with this one", this->params()), - BOOL_MEMBER(tessedit_use_primary_params_model, false, - "In multilingual mode use params model of the" - " primary language", - this->params()), - double_MEMBER(min_orientation_margin, 7.0, - "Min acceptable orientation margin", this->params()), - BOOL_MEMBER(textord_tabfind_show_vlines, false, "Debug line finding", - this->params()), - BOOL_MEMBER(textord_use_cjk_fp_model, FALSE, "Use CJK fixed pitch model", - this->params()), - BOOL_MEMBER(poly_allow_detailed_fx, false, - "Allow feature extractors to see the original outline", - this->params()), - BOOL_INIT_MEMBER(tessedit_init_config_only, false, - "Only initialize with the config file. Useful if the " - "instance is not going to be used for OCR but say only " - "for layout analysis.", - this->params()), - BOOL_MEMBER(textord_equation_detect, false, "Turn on equation detector", - this->params()), - BOOL_MEMBER(textord_tabfind_vertical_text, true, - "Enable vertical detection", this->params()), - BOOL_MEMBER(textord_tabfind_force_vertical_text, false, - "Force using vertical text page mode", this->params()), - double_MEMBER( - textord_tabfind_vertical_text_ratio, 0.5, - "Fraction of textlines deemed vertical to use vertical page " - "mode", - this->params()), - double_MEMBER( - textord_tabfind_aligned_gap_fraction, 0.75, - "Fraction of height used as a minimum gap for aligned blobs.", - this->params()), - INT_MEMBER(tessedit_parallelize, 0, "Run in parallel where possible", - this->params()), - BOOL_MEMBER(preserve_interword_spaces, false, - "Preserve multiple interword spaces", this->params()), - STRING_MEMBER(page_separator, "\f", - "Page separator (default is form feed control character)", - this->params()), - INT_MEMBER(lstm_choice_mode, 0, - "Allows to include alternative symbols choices in the hOCR output. " - "Valid input values are 0, 1 and 2. 0 is the default value. " - "With 1 the alternative symbol choices per timestep are included. " - "With 2 the alternative symbol choices are accumulated per character.", - this->params()), - - backup_config_file_(nullptr), - pix_binary_(nullptr), - pix_grey_(nullptr), - pix_original_(nullptr), - pix_thresholds_(nullptr), - source_resolution_(0), - textord_(this), - right_to_left_(false), - scaled_color_(nullptr), - scaled_factor_(-1), - deskew_(1.0f, 0.0f), - reskew_(1.0f, 0.0f), - most_recently_used_(this), - font_table_size_(0), - equ_detect_(nullptr), -#ifndef ANDROID_BUILD - lstm_recognizer_(nullptr), -#endif - train_line_page_num_(0) { -} - -Tesseract::~Tesseract() { - Clear(); - pixDestroy(&pix_original_); - end_tesseract(); - sub_langs_.delete_data_pointers(); -#ifndef ANDROID_BUILD - delete lstm_recognizer_; - lstm_recognizer_ = nullptr; -#endif -} - -Dict& Tesseract::getDict() -{ - if (0 == Classify::getDict().NumDawgs() && AnyLSTMLang()) - { - if (lstm_recognizer_ && lstm_recognizer_->GetDict()) - { - return *const_cast(lstm_recognizer_->GetDict()); - } - } - return Classify::getDict(); - } - - -void Tesseract::Clear() { - STRING debug_name = imagebasename + "_debug.pdf"; - pixa_debug_.WritePDF(debug_name.string()); - pixDestroy(&pix_binary_); - pixDestroy(&pix_grey_); - pixDestroy(&pix_thresholds_); - pixDestroy(&scaled_color_); - deskew_ = FCOORD(1.0f, 0.0f); - reskew_ = FCOORD(1.0f, 0.0f); - splitter_.Clear(); - scaled_factor_ = -1; - for (int i = 0; i < sub_langs_.size(); ++i) - sub_langs_[i]->Clear(); -} - -#ifndef DISABLED_LEGACY_ENGINE - -void Tesseract::SetEquationDetect(EquationDetect* detector) { - equ_detect_ = detector; - equ_detect_->SetLangTesseract(this); -} - -// Clear all memory of adaption for this and all subclassifiers. -void Tesseract::ResetAdaptiveClassifier() { - ResetAdaptiveClassifierInternal(); - for (int i = 0; i < sub_langs_.size(); ++i) { - sub_langs_[i]->ResetAdaptiveClassifierInternal(); - } -} - -#endif //ndef DISABLED_LEGACY_ENGINE - -// Clear the document dictionary for this and all subclassifiers. -void Tesseract::ResetDocumentDictionary() { - getDict().ResetDocumentDictionary(); - for (int i = 0; i < sub_langs_.size(); ++i) { - sub_langs_[i]->getDict().ResetDocumentDictionary(); - } -} - -void Tesseract::SetBlackAndWhitelist() { - // Set the white and blacklists (if any) - unicharset.set_black_and_whitelist(tessedit_char_blacklist.string(), - tessedit_char_whitelist.string(), - tessedit_char_unblacklist.string()); - // Black and white lists should apply to all loaded classifiers. - for (int i = 0; i < sub_langs_.size(); ++i) { - sub_langs_[i]->unicharset.set_black_and_whitelist( - tessedit_char_blacklist.string(), tessedit_char_whitelist.string(), - tessedit_char_unblacklist.string()); - } -} - -// Perform steps to prepare underlying binary image/other data structures for -// page segmentation. -void Tesseract::PrepareForPageseg() { - textord_.set_use_cjk_fp_model(textord_use_cjk_fp_model); - // Find the max splitter strategy over all langs. - ShiroRekhaSplitter::SplitStrategy max_pageseg_strategy = - static_cast( - static_cast(pageseg_devanagari_split_strategy)); - for (int i = 0; i < sub_langs_.size(); ++i) { - ShiroRekhaSplitter::SplitStrategy pageseg_strategy = - static_cast( - static_cast(sub_langs_[i]->pageseg_devanagari_split_strategy)); - if (pageseg_strategy > max_pageseg_strategy) - max_pageseg_strategy = pageseg_strategy; - pixDestroy(&sub_langs_[i]->pix_binary_); - sub_langs_[i]->pix_binary_ = pixClone(pix_binary()); - } - // Perform shiro-rekha (top-line) splitting and replace the current image by - // the newly split image. - splitter_.set_orig_pix(pix_binary()); - splitter_.set_pageseg_split_strategy(max_pageseg_strategy); - if (splitter_.Split(true, &pixa_debug_)) { - ASSERT_HOST(splitter_.splitted_image()); - pixDestroy(&pix_binary_); - pix_binary_ = pixClone(splitter_.splitted_image()); - } -} - -// Perform steps to prepare underlying binary image/other data structures for -// OCR. The current segmentation is required by this method. -// Note that this method resets pix_binary_ to the original binarized image, -// which may be different from the image actually used for OCR depending on the -// value of devanagari_ocr_split_strategy. -void Tesseract::PrepareForTessOCR(BLOCK_LIST* block_list, - Tesseract* osd_tess, OSResults* osr) { - // Find the max splitter strategy over all langs. - ShiroRekhaSplitter::SplitStrategy max_ocr_strategy = - static_cast( - static_cast(ocr_devanagari_split_strategy)); - for (int i = 0; i < sub_langs_.size(); ++i) { - ShiroRekhaSplitter::SplitStrategy ocr_strategy = - static_cast( - static_cast(sub_langs_[i]->ocr_devanagari_split_strategy)); - if (ocr_strategy > max_ocr_strategy) - max_ocr_strategy = ocr_strategy; - } - // Utilize the segmentation information available. - splitter_.set_segmentation_block_list(block_list); - splitter_.set_ocr_split_strategy(max_ocr_strategy); - // Run the splitter for OCR - bool split_for_ocr = splitter_.Split(false, &pixa_debug_); - // Restore pix_binary to the binarized original pix for future reference. - ASSERT_HOST(splitter_.orig_pix()); - pixDestroy(&pix_binary_); - pix_binary_ = pixClone(splitter_.orig_pix()); - // If the pageseg and ocr strategies are different, refresh the block list - // (from the last SegmentImage call) with blobs from the real image to be used - // for OCR. - if (splitter_.HasDifferentSplitStrategies()) { - BLOCK block("", TRUE, 0, 0, 0, 0, pixGetWidth(pix_binary_), - pixGetHeight(pix_binary_)); - Pix* pix_for_ocr = split_for_ocr ? splitter_.splitted_image() : - splitter_.orig_pix(); - extract_edges(pix_for_ocr, &block); - splitter_.RefreshSegmentationWithNewBlobs(block.blob_list()); - } - // The splitter isn't needed any more after this, so save memory by clearing. - splitter_.Clear(); -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tesseractclass.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tesseractclass.h deleted file mode 100644 index 8b821f86..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tesseractclass.h +++ /dev/null @@ -1,1187 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tesseractclass.h -// Description: The Tesseract class. It holds/owns everything needed -// to run Tesseract on a single language, and also a set of -// sub-Tesseracts to run sub-languages. For thread safety, *every* -// global variable goes in here, directly, or indirectly. -// This makes it safe to run multiple Tesseracts in different -// threads in parallel, and keeps the different language -// instances separate. -// Author: Ray Smith -// Created: Fri Mar 07 08:17:01 PST 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H_ -#define TESSERACT_CCMAIN_TESSERACTCLASS_H_ - -#include // for int16_t, int32_t, uint16_t -#include // for FILE -#include "allheaders.h" // for pixDestroy, pixGetWidth, pixGetHe... -#include "control.h" // for ACCEPTABLE_WERD_TYPE -#include "debugpixa.h" // for DebugPixa -#include "devanagari_processing.h" // for ShiroRekhaSplitter -#include "docqual.h" // for GARBAGE_LEVEL -#include "genericvector.h" // for GenericVector, PointerVector -#include "host.h" // for BOOL8 -#include "pageres.h" // for WERD_RES (ptr only), PAGE_RES (pt... -#include "params.h" // for BOOL_VAR_H, BoolParam, DoubleParam -#include "points.h" // for FCOORD -#include "publictypes.h" // for OcrEngineMode, PageSegMode, OEM_L... -#include "ratngs.h" // for ScriptPos, WERD_CHOICE (ptr only) -#include "strngs.h" // for STRING -#include "tessdatamanager.h" // for TessdataManager -#include "textord.h" // for Textord -#include "unichar.h" // for UNICHAR_ID -#include "wordrec.h" // for Wordrec - -class BLOCK_LIST; -class ETEXT_DESC; -struct OSResults; -class PAGE_RES; -class PAGE_RES_IT; -struct Pix; -class ROW; -class SVMenuNode; -class TBOX; -class TO_BLOCK_LIST; -class WERD; -class WERD_CHOICE; -class WERD_RES; - - -// Top-level class for all tesseract global instance data. -// This class either holds or points to all data used by an instance -// of Tesseract, including the memory allocator. When this is -// complete, Tesseract will be thread-safe. UNTIL THEN, IT IS NOT! -// -// NOTE to developers: Do not create cyclic dependencies through this class! -// The directory dependency tree must remain a tree! The keep this clean, -// lower-level code (eg in ccutil, the bottom level) must never need to -// know about the content of a higher-level directory. -// The following scheme will grant the easiest access to lower-level -// global members without creating a cyclic dependency: -// -// Class Hierarchy (^ = inheritance): -// -// CCUtil (ccutil/ccutil.h) -// ^ Members include: UNICHARSET -// CUtil (cutil/cutil_class.h) -// ^ Members include: TBLOB*, TEXTBLOCK* -// CCStruct (ccstruct/ccstruct.h) -// ^ Members include: Image -// Classify (classify/classify.h) -// ^ Members include: Dict -// WordRec (wordrec/wordrec.h) -// ^ Members include: WERD*, DENORM* -// Tesseract (ccmain/tesseractclass.h) -// Members include: Pix* -// -// Other important classes: -// -// TessBaseAPI (api/baseapi.h) -// Members include: BLOCK_LIST*, PAGE_RES*, -// Tesseract*, ImageThresholder* -// Dict (dict/dict.h) -// Members include: Image* (private) -// -// NOTE: that each level contains members that correspond to global -// data that is defined (and used) at that level, not necessarily where -// the type is defined so for instance: -// BOOL_VAR_H(textord_show_blobs, false, "Display unsorted blobs"); -// goes inside the Textord class, not the cc_util class. - -namespace tesseract { - -class ColumnFinder; -class DocumentData; -class EquationDetect; -class ImageData; -class LSTMRecognizer; -class Tesseract; - -// A collection of various variables for statistics and debugging. -struct TesseractStats { - TesseractStats() - : adaption_word_number(0), - doc_blob_quality(0), - doc_outline_errs(0), - doc_char_quality(0), - good_char_count(0), - doc_good_char_quality(0), - word_count(0), - dict_words(0), - tilde_crunch_written(false), - last_char_was_newline(true), - last_char_was_tilde(false), - write_results_empty_block(true) {} - - int32_t adaption_word_number; - int16_t doc_blob_quality; - int16_t doc_outline_errs; - int16_t doc_char_quality; - int16_t good_char_count; - int16_t doc_good_char_quality; - int32_t word_count; // count of word in the document - int32_t dict_words; // number of dicitionary words in the document - STRING dump_words_str; // accumulator used by dump_words() - // Flags used by write_results() - bool tilde_crunch_written; - bool last_char_was_newline; - bool last_char_was_tilde; - bool write_results_empty_block; -}; - -// Struct to hold all the pointers to relevant data for processing a word. -struct WordData { - WordData() : word(nullptr), row(nullptr), block(nullptr), prev_word(nullptr) {} - explicit WordData(const PAGE_RES_IT& page_res_it) - : word(page_res_it.word()), row(page_res_it.row()->row), - block(page_res_it.block()->block), prev_word(nullptr) {} - WordData(BLOCK* block_in, ROW* row_in, WERD_RES* word_res) - : word(word_res), row(row_in), block(block_in), prev_word(nullptr) {} - - WERD_RES* word; - ROW* row; - BLOCK* block; - WordData* prev_word; - PointerVector lang_words; -}; - -// Definition of a Tesseract WordRecognizer. The WordData provides the context -// of row/block, in_word holds an initialized, possibly pre-classified word, -// that the recognizer may or may not consume (but if so it sets *in_word=nullptr) -// and produces one or more output words in out_words, which may be the -// consumed in_word, or may be generated independently. -// This api allows both a conventional tesseract classifier to work, or a -// line-level classifier that generates multiple words from a merged input. -typedef void (Tesseract::*WordRecognizer)(const WordData& word_data, - WERD_RES** in_word, - PointerVector* out_words); - -class Tesseract : public Wordrec { - public: - Tesseract(); - ~Tesseract(); - - // Return appropriate dictionary - Dict& getDict() override; - - // Clear as much used memory as possible without resetting the adaptive - // classifier or losing any other classifier data. - void Clear(); - // Clear all memory of adaption for this and all subclassifiers. - void ResetAdaptiveClassifier(); - // Clear the document dictionary for this and all subclassifiers. - void ResetDocumentDictionary(); - - // Set the equation detector. - void SetEquationDetect(EquationDetect* detector); - - // Simple accessors. - const FCOORD& reskew() const { - return reskew_; - } - // Destroy any existing pix and return a pointer to the pointer. - Pix** mutable_pix_binary() { - pixDestroy(&pix_binary_); - return &pix_binary_; - } - Pix* pix_binary() const { - return pix_binary_; - } - Pix* pix_grey() const { - return pix_grey_; - } - void set_pix_grey(Pix* grey_pix) { - pixDestroy(&pix_grey_); - pix_grey_ = grey_pix; - } - Pix* pix_original() const { return pix_original_; } - // Takes ownership of the given original_pix. - void set_pix_original(Pix* original_pix) { - pixDestroy(&pix_original_); - pix_original_ = original_pix; - // Clone to sublangs as well. - for (int i = 0; i < sub_langs_.size(); ++i) - sub_langs_[i]->set_pix_original(original_pix ? pixClone(original_pix) - : nullptr); - } - // Returns a pointer to a Pix representing the best available resolution image - // of the page, with best available bit depth as second priority. Result can - // be of any bit depth, but never color-mapped, as that has always been - // removed. Note that in grey and color, 0 is black and 255 is - // white. If the input was binary, then black is 1 and white is 0. - // To tell the difference pixGetDepth() will return 32, 8 or 1. - // In any case, the return value is a borrowed Pix, and should not be - // deleted or pixDestroyed. - Pix* BestPix() const { - if (pixGetWidth(pix_original_) == ImageWidth()) - return pix_original_; - else if (pix_grey_ != nullptr) - return pix_grey_; - else - return pix_binary_; - } - void set_pix_thresholds(Pix* thresholds) { - pixDestroy(&pix_thresholds_); - pix_thresholds_ = thresholds; - } - int source_resolution() const { - return source_resolution_; - } - void set_source_resolution(int ppi) { - source_resolution_ = ppi; - } - int ImageWidth() const { - return pixGetWidth(pix_binary_); - } - int ImageHeight() const { - return pixGetHeight(pix_binary_); - } - Pix* scaled_color() const { - return scaled_color_; - } - int scaled_factor() const { - return scaled_factor_; - } - void SetScaledColor(int factor, Pix* color) { - scaled_factor_ = factor; - scaled_color_ = color; - } - const Textord& textord() const { - return textord_; - } - Textord* mutable_textord() { - return &textord_; - } - - bool right_to_left() const { - return right_to_left_; - } - int num_sub_langs() const { - return sub_langs_.size(); - } - Tesseract* get_sub_lang(int index) const { - return sub_langs_[index]; - } - // Returns true if any language uses Tesseract (as opposed to LSTM). - bool AnyTessLang() const { - if (tessedit_ocr_engine_mode != OEM_LSTM_ONLY) return true; - for (int i = 0; i < sub_langs_.size(); ++i) { - if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) return true; - } - return false; - } - // Returns true if any language uses the LSTM. - bool AnyLSTMLang() const { - if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) return true; - for (int i = 0; i < sub_langs_.size(); ++i) { - if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) - return true; - } - return false; - } - - void SetBlackAndWhitelist(); - - // Perform steps to prepare underlying binary image/other data structures for - // page segmentation. Uses the strategy specified in the global variable - // pageseg_devanagari_split_strategy for perform splitting while preparing for - // page segmentation. - void PrepareForPageseg(); - - // Perform steps to prepare underlying binary image/other data structures for - // Tesseract OCR. The current segmentation is required by this method. - // Uses the strategy specified in the global variable - // ocr_devanagari_split_strategy for performing splitting while preparing for - // Tesseract ocr. - void PrepareForTessOCR(BLOCK_LIST* block_list, - Tesseract* osd_tess, OSResults* osr); - - int SegmentPage(const STRING* input_file, BLOCK_LIST* blocks, - Tesseract* osd_tess, OSResults* osr); - void SetupWordScripts(BLOCK_LIST* blocks); - int AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks, - TO_BLOCK_LIST* to_blocks, BLOBNBOX_LIST* diacritic_blobs, - Tesseract* osd_tess, OSResults* osr); - ColumnFinder* SetupPageSegAndDetectOrientation( - PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess, - OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, - Pix** music_mask_pix); - // par_control.cpp - void PrerecAllWordsPar(const GenericVector& words); - - //// linerec.cpp - // Generates training data for training a line recognizer, eg LSTM. - // Breaks the page into lines, according to the boxes, and writes them to a - // serialized DocumentData based on output_basename. - void TrainLineRecognizer(const STRING& input_imagename, - const STRING& output_basename, - BLOCK_LIST *block_list); - // Generates training data for training a line recognizer, eg LSTM. - // Breaks the boxes into lines, normalizes them, converts to ImageData and - // appends them to the given training_data. - void TrainFromBoxes(const GenericVector& boxes, - const GenericVector& texts, - BLOCK_LIST *block_list, - DocumentData* training_data); - - // Returns an Imagedata containing the image of the given textline, - // and ground truth boxes/truth text if available in the input. - // The image is not normalized in any way. - ImageData* GetLineData(const TBOX& line_box, - const GenericVector& boxes, - const GenericVector& texts, - int start_box, int end_box, - const BLOCK& block); - // Helper gets the image of a rectangle, using the block.re_rotation() if - // needed to get to the image, and rotating the result back to horizontal - // layout. (CJK characters will be on their left sides) The vertical text flag - // is set in the returned ImageData if the text was originally vertical, which - // can be used to invoke a different CJK recognition engine. The revised_box - // is also returned to enable calculation of output bounding boxes. - ImageData* GetRectImage(const TBOX& box, const BLOCK& block, int padding, - TBOX* revised_box) const; - // Recognizes a word or group of words, converting to WERD_RES in *words. - // Analogous to classify_word_pass1, but can handle a group of words as well. - void LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word, - PointerVector* words); - // Apply segmentation search to the given set of words, within the constraints - // of the existing ratings matrix. If there is already a best_choice on a word - // leaves it untouched and just sets the done/accepted etc flags. - void SearchWords(PointerVector* words); - - //// control.h ///////////////////////////////////////////////////////// - bool ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box, - const char* word_config, int pass); - // Sets up the words ready for whichever engine is to be run - void SetupAllWordsPassN(int pass_n, - const TBOX* target_word_box, - const char* word_config, - PAGE_RES* page_res, - GenericVector* words); - // Sets up the single word ready for whichever engine is to be run. - void SetupWordPassN(int pass_n, WordData* word); - // Runs word recognition on all the words. - bool RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor, - PAGE_RES_IT* pr_it, - GenericVector* words); - bool recog_all_words(PAGE_RES* page_res, - ETEXT_DESC* monitor, - const TBOX* target_word_box, - const char* word_config, - int dopasses); - void rejection_passes(PAGE_RES* page_res, - ETEXT_DESC* monitor, - const TBOX* target_word_box, - const char* word_config); - void bigram_correction_pass(PAGE_RES *page_res); - void blamer_pass(PAGE_RES* page_res); - // Sets script positions and detects smallcaps on all output words. - void script_pos_pass(PAGE_RES* page_res); - // Helper to recognize the word using the given (language-specific) tesseract. - // Returns positive if this recognizer found more new best words than the - // number kept from best_words. - int RetryWithLanguage(const WordData& word_data, WordRecognizer recognizer, - bool debug, WERD_RES** in_word, - PointerVector* best_words); - // Moves good-looking "noise"/diacritics from the reject list to the main - // blob list on the current word. Returns true if anything was done, and - // sets make_next_word_fuzzy if blob(s) were added to the end of the word. - bool ReassignDiacritics(int pass, PAGE_RES_IT* pr_it, - bool* make_next_word_fuzzy); - // Attempts to put noise/diacritic outlines into the blobs that they overlap. - // Input: a set of noisy outlines that probably belong to the real_word. - // Output: outlines that overlapped blobs are set to nullptr and put back into - // the word, either in the blobs or in the reject list. - void AssignDiacriticsToOverlappingBlobs( - const GenericVector& outlines, int pass, WERD* real_word, - PAGE_RES_IT* pr_it, GenericVector* word_wanted, - GenericVector* overlapped_any_blob, - GenericVector* target_blobs); - // Attempts to assign non-overlapping outlines to their nearest blobs or - // make new blobs out of them. - void AssignDiacriticsToNewBlobs(const GenericVector& outlines, - int pass, WERD* real_word, PAGE_RES_IT* pr_it, - GenericVector* word_wanted, - GenericVector* target_blobs); - // Starting with ok_outlines set to indicate which outlines overlap the blob, - // chooses the optimal set (approximately) and returns true if any outlines - // are desired, in which case ok_outlines indicates which ones. - bool SelectGoodDiacriticOutlines(int pass, float certainty_threshold, - PAGE_RES_IT* pr_it, C_BLOB* blob, - const GenericVector& outlines, - int num_outlines, - GenericVector* ok_outlines); - // Classifies the given blob plus the outlines flagged by ok_outlines, undoes - // the inclusion of the outlines, and returns the certainty of the raw choice. - float ClassifyBlobPlusOutlines(const GenericVector& ok_outlines, - const GenericVector& outlines, - int pass_n, PAGE_RES_IT* pr_it, C_BLOB* blob, - STRING* best_str); - // Classifies the given blob (part of word_data->word->word) as an individual - // word, using languages, chopper etc, returning only the certainty of the - // best raw choice, and undoing all the work done to fake out the word. - float ClassifyBlobAsWord(int pass_n, PAGE_RES_IT* pr_it, C_BLOB* blob, - STRING* best_str, float* c2); - void classify_word_and_language(int pass_n, PAGE_RES_IT* pr_it, - WordData* word_data); - void classify_word_pass1(const WordData& word_data, - WERD_RES** in_word, - PointerVector* out_words); - void recog_pseudo_word(PAGE_RES* page_res, // blocks to check - TBOX &selection_box); - - void fix_rep_char(PAGE_RES_IT* page_res_it); - - ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET& char_set, - const char *s, - const char *lengths); - void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK* block); - void classify_word_pass2(const WordData& word_data, - WERD_RES** in_word, - PointerVector* out_words); - void ReportXhtFixResult(bool accept_new_word, float new_x_ht, - WERD_RES* word, WERD_RES* new_word); - bool RunOldFixXht(WERD_RES *word, BLOCK* block, ROW *row); - bool TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row); - // Runs recognition with the test baseline shift and x-height and returns true - // if there was an improvement in recognition result. - bool TestNewNormalization(int original_misfits, float baseline_shift, - float new_x_ht, WERD_RES *word, BLOCK* block, - ROW *row); - bool recog_interactive(PAGE_RES_IT* pr_it); - - // Set fonts of this word. - void set_word_fonts(WERD_RES *word); - void font_recognition_pass(PAGE_RES* page_res); - void dictionary_correction_pass(PAGE_RES* page_res); - bool check_debug_pt(WERD_RES* word, int location); - - //// superscript.cpp //////////////////////////////////////////////////// - bool SubAndSuperscriptFix(WERD_RES *word_res); - void GetSubAndSuperscriptCandidates(const WERD_RES *word, - int *num_rebuilt_leading, - ScriptPos *leading_pos, - float *leading_certainty, - int *num_rebuilt_trailing, - ScriptPos *trailing_pos, - float *trailing_certainty, - float *avg_certainty, - float *unlikely_threshold); - WERD_RES *TrySuperscriptSplits(int num_chopped_leading, - float leading_certainty, - ScriptPos leading_pos, - int num_chopped_trailing, - float trailing_certainty, - ScriptPos trailing_pos, - WERD_RES *word, - bool *is_good, - int *retry_leading, - int *retry_trailing); - bool BelievableSuperscript(bool debug, - const WERD_RES &word, - float certainty_threshold, - int *left_ok, - int *right_ok) const; - - //// output.h ////////////////////////////////////////////////////////// - - void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box); - void write_results(PAGE_RES_IT& page_res_it, // full info - char newline_type, // type of newline - bool force_eol // override tilde crunch? - ); - void set_unlv_suspects(WERD_RES *word); - UNICHAR_ID get_rep_char(WERD_RES *word); // what char is repeated? - bool acceptable_number_string(const char* s, - const char* lengths); - int16_t count_alphanums(const WERD_CHOICE &word); - int16_t count_alphas(const WERD_CHOICE &word); - //// tessedit.h //////////////////////////////////////////////////////// - void read_config_file(const char *filename, SetParamConstraint constraint); - // Initialize for potentially a set of languages defined by the language - // string and recursively any additional languages required by any language - // traineddata file (via tessedit_load_sublangs in its config) that is loaded. - // See init_tesseract_internal for args. - int init_tesseract(const char* arg0, const char* textbase, - const char* language, OcrEngineMode oem, char** configs, - int configs_size, const GenericVector* vars_vec, - const GenericVector* vars_values, - bool set_only_init_params, TessdataManager* mgr); - int init_tesseract(const char *datapath, - const char *language, - OcrEngineMode oem) { - TessdataManager mgr; - return init_tesseract(datapath, nullptr, language, oem, nullptr, 0, nullptr, nullptr, - false, &mgr); - } - // Common initialization for a single language. - // arg0 is the datapath for the tessdata directory, which could be the - // path of the tessdata directory with no trailing /, or (if tessdata - // lives in the same directory as the executable, the path of the executable, - // hence the name arg0. - // textbase is an optional output file basename (used only for training) - // language is the language code to load. - // oem controls which engine(s) will operate on the image - // configs (argv) is an array of config filenames to load variables from. - // May be nullptr. - // configs_size (argc) is the number of elements in configs. - // vars_vec is an optional vector of variables to set. - // vars_values is an optional corresponding vector of values for the variables - // in vars_vec. - // If set_only_init_params is true, then only the initialization variables - // will be set. - int init_tesseract_internal(const char* arg0, const char* textbase, - const char* language, OcrEngineMode oem, - char** configs, int configs_size, - const GenericVector* vars_vec, - const GenericVector* vars_values, - bool set_only_init_params, TessdataManager* mgr); - - // Set the universal_id member of each font to be unique among all - // instances of the same font loaded. - void SetupUniversalFontIds(); - - int init_tesseract_lm(const char* arg0, const char* textbase, - const char* language, TessdataManager* mgr); - - void recognize_page(STRING& image_name); - void end_tesseract(); - - bool init_tesseract_lang_data(const char* arg0, const char* textbase, - const char* language, OcrEngineMode oem, - char** configs, int configs_size, - const GenericVector* vars_vec, - const GenericVector* vars_values, - bool set_only_init_params, - TessdataManager* mgr); - - void ParseLanguageString(const char* lang_str, - GenericVector* to_load, - GenericVector* not_to_load); - - //// pgedit.h ////////////////////////////////////////////////////////// - SVMenuNode *build_menu_new(); - #ifndef GRAPHICS_DISABLED - void pgeditor_main(int width, int height, PAGE_RES* page_res); - #endif // GRAPHICS_DISABLED - void process_image_event( // action in image win - const SVEvent &event); - bool process_cmd_win_event( // UI command semantics - int32_t cmd_event, // which menu item? - char* new_value // any prompt data - ); - void debug_word(PAGE_RES* page_res, const TBOX &selection_box); - void do_re_display( - bool (tesseract::Tesseract::* word_painter)(PAGE_RES_IT* pr_it)); - bool word_display(PAGE_RES_IT* pr_it); - bool word_bln_display(PAGE_RES_IT* pr_it); - bool word_blank_and_set_display(PAGE_RES_IT* pr_its); - bool word_set_display(PAGE_RES_IT* pr_it); - // #ifndef GRAPHICS_DISABLED - bool word_dumper(PAGE_RES_IT* pr_it); - // #endif // GRAPHICS_DISABLED - void blob_feature_display(PAGE_RES* page_res, const TBOX& selection_box); - //// reject.h ////////////////////////////////////////////////////////// - // make rej map for word - void make_reject_map(WERD_RES *word, ROW *row, int16_t pass); - bool one_ell_conflict(WERD_RES* word_res, bool update_map); - int16_t first_alphanum_index(const char *word, - const char *word_lengths); - int16_t first_alphanum_offset(const char *word, - const char *word_lengths); - int16_t alpha_count(const char *word, - const char *word_lengths); - bool word_contains_non_1_digit(const char* word, - const char* word_lengths); - void dont_allow_1Il(WERD_RES *word); - int16_t count_alphanums( //how many alphanums - WERD_RES *word); - void flip_0O(WERD_RES *word); - bool non_0_digit(const UNICHARSET& ch_set, UNICHAR_ID unichar_id); - bool non_O_upper(const UNICHARSET& ch_set, UNICHAR_ID unichar_id); - bool repeated_nonalphanum_wd(WERD_RES* word, ROW* row); - void nn_match_word( //Match a word - WERD_RES *word, - ROW *row); - void nn_recover_rejects(WERD_RES *word, ROW *row); - void set_done( //set done flag - WERD_RES *word, - int16_t pass); - int16_t safe_dict_word(const WERD_RES *werd_res); // is best_choice in dict? - void flip_hyphens(WERD_RES *word); - void reject_I_1_L(WERD_RES *word); - void reject_edge_blobs(WERD_RES *word); - void reject_mostly_rejects(WERD_RES *word); - //// adaptions.h /////////////////////////////////////////////////////// - bool word_adaptable( //should we adapt? - WERD_RES* word, - uint16_t mode); - - //// tfacepp.cpp /////////////////////////////////////////////////////// - void recog_word_recursive(WERD_RES* word); - void recog_word(WERD_RES *word); - void split_and_recog_word(WERD_RES* word); - void split_word(WERD_RES *word, - int split_pt, - WERD_RES **right_piece, - BlamerBundle **orig_blamer_bundle) const; - void join_words(WERD_RES *word, - WERD_RES *word2, - BlamerBundle *orig_bb) const; - //// fixspace.cpp /////////////////////////////////////////////////////// - bool digit_or_numeric_punct(WERD_RES *word, int char_position); - int16_t eval_word_spacing(WERD_RES_LIST &word_res_list); - void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK* block); - int16_t fp_eval_word_spacing(WERD_RES_LIST &word_res_list); - void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block); - void fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block); - void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK* block); - void fix_fuzzy_spaces( //find fuzzy words - ETEXT_DESC *monitor, //progress monitor - int32_t word_count, //count of words in doc - PAGE_RES *page_res); - void dump_words(WERD_RES_LIST &perm, int16_t score, - int16_t mode, bool improved); - bool fixspace_thinks_word_done(WERD_RES *word); - int16_t worst_noise_blob(WERD_RES *word_res, float *worst_noise_score); - float blob_noise_score(TBLOB *blob); - void break_noisiest_blob_word(WERD_RES_LIST &words); - //// docqual.cpp //////////////////////////////////////////////////////// - GARBAGE_LEVEL garbage_word(WERD_RES *word, BOOL8 ok_dict_word); - bool potential_word_crunch(WERD_RES* word, - GARBAGE_LEVEL garbage_level, - bool ok_dict_word); - void tilde_crunch(PAGE_RES_IT &page_res_it); - void unrej_good_quality_words( //unreject potential - PAGE_RES_IT &page_res_it); - void doc_and_block_rejection( //reject big chunks - PAGE_RES_IT &page_res_it, - bool good_quality_doc); - void quality_based_rejection(PAGE_RES_IT &page_res_it, - bool good_quality_doc); - void convert_bad_unlv_chs(WERD_RES *word_res); - void tilde_delete(PAGE_RES_IT &page_res_it); - int16_t word_blob_quality(WERD_RES *word, ROW *row); - void word_char_quality(WERD_RES *word, ROW *row, int16_t *match_count, - int16_t *accepted_match_count); - void unrej_good_chs(WERD_RES *word, ROW *row); - int16_t count_outline_errs(char c, int16_t outline_count); - int16_t word_outline_errs(WERD_RES *word); - bool terrible_word_crunch(WERD_RES* word, GARBAGE_LEVEL garbage_level); - CRUNCH_MODE word_deletable(WERD_RES *word, int16_t &delete_mode); - int16_t failure_count(WERD_RES *word); - bool noise_outlines(TWERD* word); - //// pagewalk.cpp /////////////////////////////////////////////////////// - void - process_selected_words( - PAGE_RES* page_res, // blocks to check - //function to call - TBOX& selection_box, - bool (tesseract::Tesseract::* word_processor)(PAGE_RES_IT* pr_it)); - //// tessbox.cpp /////////////////////////////////////////////////////// - void tess_add_doc_word( //test acceptability - WERD_CHOICE *word_choice //after context - ); - void tess_segment_pass_n(int pass_n, WERD_RES *word); - bool tess_acceptable_word(WERD_RES *word); - - //// applybox.cpp ////////////////////////////////////////////////////// - // Applies the box file based on the image name fname, and resegments - // the words in the block_list (page), with: - // blob-mode: one blob per line in the box file, words as input. - // word/line-mode: one blob per space-delimited unit after the #, and one word - // per line in the box file. (See comment above for box file format.) - // If find_segmentation is true, (word/line mode) then the classifier is used - // to re-segment words/lines to match the space-delimited truth string for - // each box. In this case, the input box may be for a word or even a whole - // text line, and the output words will contain multiple blobs corresponding - // to the space-delimited input string. - // With find_segmentation false, no classifier is needed, but the chopper - // can still be used to correctly segment touching characters with the help - // of the input boxes. - // In the returned PAGE_RES, the WERD_RES are setup as they would be returned - // from normal classification, ie. with a word, chopped_word, rebuild_word, - // seam_array, denorm, box_word, and best_state, but NO best_choice or - // raw_choice, as they would require a UNICHARSET, which we aim to avoid. - // Instead, the correct_text member of WERD_RES is set, and this may be later - // converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords - // is not required before calling ApplyBoxTraining. - PAGE_RES* ApplyBoxes(const STRING& fname, bool find_segmentation, - BLOCK_LIST *block_list); - - // Any row xheight that is significantly different from the median is set - // to the median. - void PreenXHeights(BLOCK_LIST *block_list); - - // Builds a PAGE_RES from the block_list in the way required for ApplyBoxes: - // All fuzzy spaces are removed, and all the words are maximally chopped. - PAGE_RES* SetupApplyBoxes(const GenericVector& boxes, - BLOCK_LIST *block_list); - // Tests the chopper by exhaustively running chop_one_blob. - // The word_res will contain filled chopped_word, seam_array, denorm, - // box_word and best_state for the maximally chopped word. - void MaximallyChopWord(const GenericVector& boxes, - BLOCK* block, ROW* row, WERD_RES* word_res); - // Gather consecutive blobs that match the given box into the best_state - // and corresponding correct_text. - // Fights over which box owns which blobs are settled by pre-chopping and - // applying the blobs to box or next_box with the least non-overlap. - // Returns false if the box was in error, which can only be caused by - // failing to find an appropriate blob for a box. - // This means that occasionally, blobs may be incorrectly segmented if the - // chopper fails to find a suitable chop point. - bool ResegmentCharBox(PAGE_RES* page_res, const TBOX* prev_box, - const TBOX& box, const TBOX* next_box, - const char* correct_text); - // Consume all source blobs that strongly overlap the given box, - // putting them into a new word, with the correct_text label. - // Fights over which box owns which blobs are settled by - // applying the blobs to box or next_box with the least non-overlap. - // Returns false if the box was in error, which can only be caused by - // failing to find an overlapping blob for a box. - bool ResegmentWordBox(BLOCK_LIST* block_list, - const TBOX& box, const TBOX* next_box, - const char* correct_text); - // Resegments the words by running the classifier in an attempt to find the - // correct segmentation that produces the required string. - void ReSegmentByClassification(PAGE_RES* page_res); - // Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID. - // Returns false if an invalid UNICHAR_ID is encountered. - bool ConvertStringToUnichars(const char* utf8, - GenericVector* class_ids); - // Resegments the word to achieve the target_text from the classifier. - // Returns false if the re-segmentation fails. - // Uses brute-force combination of up to kMaxGroupSize adjacent blobs, and - // applies a full search on the classifier results to find the best classified - // segmentation. As a compromise to obtain better recall, 1-1 ambigiguity - // substitutions ARE used. - bool FindSegmentation(const GenericVector& target_text, - WERD_RES* word_res); - // Recursive helper to find a match to the target_text (from text_index - // position) in the choices (from choices_pos position). - // Choices is an array of GenericVectors, of length choices_length, with each - // element representing a starting position in the word, and the - // GenericVector holding classification results for a sequence of consecutive - // blobs, with index 0 being a single blob, index 1 being 2 blobs etc. - void SearchForText(const GenericVector* choices, - int choices_pos, int choices_length, - const GenericVector& target_text, - int text_index, - float rating, GenericVector* segmentation, - float* best_rating, GenericVector* best_segmentation); - // Counts up the labelled words and the blobs within. - // Deletes all unused or emptied words, counting the unused ones. - // Resets W_BOL and W_EOL flags correctly. - // Builds the rebuild_word and rebuilds the box_word. - void TidyUp(PAGE_RES* page_res); - // Logs a bad box by line in the box file and box coords. - void ReportFailedBox(int boxfile_lineno, TBOX box, const char *box_ch, - const char *err_msg); - // Creates a fake best_choice entry in each WERD_RES with the correct text. - void CorrectClassifyWords(PAGE_RES* page_res); - // Call LearnWord to extract features for labelled blobs within each word. - // Features are stored in an internal buffer. - void ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res); - - //// fixxht.cpp /////////////////////////////////////////////////////// - // Returns the number of misfit blob tops in this word. - int CountMisfitTops(WERD_RES *word_res); - // Returns a new x-height in pixels (original image coords) that is - // maximally compatible with the result in word_res. - // Returns 0.0f if no x-height is found that is better than the current - // estimate. - float ComputeCompatibleXheight(WERD_RES *word_res, float* baseline_shift); - //// Data members /////////////////////////////////////////////////////// - // TODO(ocr-team): Find and remove obsolete parameters. - BOOL_VAR_H(tessedit_resegment_from_boxes, false, - "Take segmentation and labeling from box file"); - BOOL_VAR_H(tessedit_resegment_from_line_boxes, false, - "Conversion of word/line box file to char box file"); - BOOL_VAR_H(tessedit_train_from_boxes, false, - "Generate training data from boxed chars"); - BOOL_VAR_H(tessedit_make_boxes_from_boxes, false, - "Generate more boxes from boxed chars"); - BOOL_VAR_H(tessedit_train_line_recognizer, false, - "Break input into lines and remap boxes if present"); - BOOL_VAR_H(tessedit_dump_pageseg_images, false, - "Dump intermediate images made during page segmentation"); - INT_VAR_H(tessedit_pageseg_mode, PSM_SINGLE_BLOCK, - "Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block," - " 5=line, 6=word, 7=char" - " (Values from PageSegMode enum in publictypes.h)"); - INT_VAR_H(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT, - "Which OCR engine(s) to run (Tesseract, LSTM, both). Defaults" - " to loading and running the most accurate available."); - STRING_VAR_H(tessedit_char_blacklist, "", - "Blacklist of chars not to recognize"); - STRING_VAR_H(tessedit_char_whitelist, "", - "Whitelist of chars to recognize"); - STRING_VAR_H(tessedit_char_unblacklist, "", - "List of chars to override tessedit_char_blacklist"); - BOOL_VAR_H(tessedit_ambigs_training, false, - "Perform training for ambiguities"); - INT_VAR_H(pageseg_devanagari_split_strategy, - tesseract::ShiroRekhaSplitter::NO_SPLIT, - "Whether to use the top-line splitting process for Devanagari " - "documents while performing page-segmentation."); - INT_VAR_H(ocr_devanagari_split_strategy, - tesseract::ShiroRekhaSplitter::NO_SPLIT, - "Whether to use the top-line splitting process for Devanagari " - "documents while performing ocr."); - STRING_VAR_H(tessedit_write_params_to_file, "", - "Write all parameters to the given file."); - BOOL_VAR_H(tessedit_adaption_debug, false, - "Generate and print debug information for adaption"); - INT_VAR_H(bidi_debug, 0, "Debug level for BiDi"); - INT_VAR_H(applybox_debug, 1, "Debug level"); - INT_VAR_H(applybox_page, 0, "Page number to apply boxes from"); - STRING_VAR_H(applybox_exposure_pattern, ".exp", - "Exposure value follows this pattern in the image" - " filename. The name of the image files are expected" - " to be in the form [lang].[fontname].exp[num].tif"); - BOOL_VAR_H(applybox_learn_chars_and_char_frags_mode, false, - "Learn both character fragments (as is done in the" - " special low exposure mode) as well as unfragmented" - " characters."); - BOOL_VAR_H(applybox_learn_ngrams_mode, false, - "Each bounding box is assumed to contain ngrams. Only" - " learn the ngrams whose outlines overlap horizontally."); - BOOL_VAR_H(tessedit_display_outwords, false, "Draw output words"); - BOOL_VAR_H(tessedit_dump_choices, false, "Dump char choices"); - BOOL_VAR_H(tessedit_timing_debug, false, "Print timing stats"); - BOOL_VAR_H(tessedit_fix_fuzzy_spaces, true, - "Try to improve fuzzy spaces"); - BOOL_VAR_H(tessedit_unrej_any_wd, false, - "Don't bother with word plausibility"); - BOOL_VAR_H(tessedit_fix_hyphens, true, "Crunch double hyphens?"); - BOOL_VAR_H(tessedit_redo_xheight, true, "Check/Correct x-height"); - BOOL_VAR_H(tessedit_enable_doc_dict, true, - "Add words to the document dictionary"); - BOOL_VAR_H(tessedit_debug_fonts, false, "Output font info per char"); - BOOL_VAR_H(tessedit_debug_block_rejection, false, "Block and Row stats"); - BOOL_VAR_H(tessedit_enable_bigram_correction, true, - "Enable correction based on the word bigram dictionary."); - BOOL_VAR_H(tessedit_enable_dict_correction, false, - "Enable single word correction based on the dictionary."); - INT_VAR_H(tessedit_bigram_debug, 0, "Amount of debug output for bigram " - "correction."); - BOOL_VAR_H(enable_noise_removal, true, - "Remove and conditionally reassign small outlines when they" - " confuse layout analysis, determining diacritics vs noise"); - INT_VAR_H(debug_noise_removal, 0, "Debug reassignment of small outlines"); - // Worst (min) certainty, for which a diacritic is allowed to make the base - // character worse and still be included. - double_VAR_H(noise_cert_basechar, -8.0, "Hingepoint for base char certainty"); - // Worst (min) certainty, for which a non-overlapping diacritic is allowed to - // make the base character worse and still be included. - double_VAR_H(noise_cert_disjoint, -2.5, "Hingepoint for disjoint certainty"); - // Worst (min) certainty, for which a diacritic is allowed to make a new - // stand-alone blob. - double_VAR_H(noise_cert_punc, -2.5, "Threshold for new punc char certainty"); - // Factor of certainty margin for adding diacritics to not count as worse. - double_VAR_H(noise_cert_factor, 0.375, - "Scaling on certainty diff from Hingepoint"); - INT_VAR_H(noise_maxperblob, 8, "Max diacritics to apply to a blob"); - INT_VAR_H(noise_maxperword, 16, "Max diacritics to apply to a word"); - INT_VAR_H(debug_x_ht_level, 0, "Reestimate debug"); - BOOL_VAR_H(debug_acceptable_wds, false, "Dump word pass/fail chk"); - STRING_VAR_H(chs_leading_punct, "('`\"", "Leading punctuation"); - STRING_VAR_H(chs_trailing_punct1, ").,;:?!", "1st Trailing punctuation"); - STRING_VAR_H(chs_trailing_punct2, ")'`\"", "2nd Trailing punctuation"); - double_VAR_H(quality_rej_pc, 0.08, "good_quality_doc lte rejection limit"); - double_VAR_H(quality_blob_pc, 0.0, "good_quality_doc gte good blobs limit"); - double_VAR_H(quality_outline_pc, 1.0, - "good_quality_doc lte outline error limit"); - double_VAR_H(quality_char_pc, 0.95, "good_quality_doc gte good char limit"); - INT_VAR_H(quality_min_initial_alphas_reqd, 2, "alphas in a good word"); - INT_VAR_H(tessedit_tess_adaption_mode, 0x27, - "Adaptation decision algorithm for tess"); - BOOL_VAR_H(tessedit_minimal_rej_pass1, false, - "Do minimal rejection on pass 1 output"); - BOOL_VAR_H(tessedit_test_adaption, false, "Test adaption criteria"); - BOOL_VAR_H(tessedit_matcher_log, false, "Log matcher activity"); - INT_VAR_H(tessedit_test_adaption_mode, 3, - "Adaptation decision algorithm for tess"); - BOOL_VAR_H(test_pt, false, "Test for point"); - double_VAR_H(test_pt_x, 99999.99, "xcoord"); - double_VAR_H(test_pt_y, 99999.99, "ycoord"); - INT_VAR_H(multilang_debug_level, 0, "Print multilang debug info."); - INT_VAR_H(paragraph_debug_level, 0, "Print paragraph debug info."); - BOOL_VAR_H(paragraph_text_based, true, - "Run paragraph detection on the post-text-recognition " - "(more accurate)"); - BOOL_VAR_H(lstm_use_matrix, 1, "Use ratings matrix/beam searct with lstm"); - STRING_VAR_H(outlines_odd, "%| ", "Non standard number of outlines"); - STRING_VAR_H(outlines_2, "ij!?%\":;", "Non standard number of outlines"); - BOOL_VAR_H(docqual_excuse_outline_errs, false, - "Allow outline errs in unrejection?"); - BOOL_VAR_H(tessedit_good_quality_unrej, true, - "Reduce rejection on good docs"); - BOOL_VAR_H(tessedit_use_reject_spaces, true, "Reject spaces?"); - double_VAR_H(tessedit_reject_doc_percent, 65.00, - "%rej allowed before rej whole doc"); - double_VAR_H(tessedit_reject_block_percent, 45.00, - "%rej allowed before rej whole block"); - double_VAR_H(tessedit_reject_row_percent, 40.00, - "%rej allowed before rej whole row"); - double_VAR_H(tessedit_whole_wd_rej_row_percent, 70.00, - "Number of row rejects in whole word rejects" - "which prevents whole row rejection"); - BOOL_VAR_H(tessedit_preserve_blk_rej_perfect_wds, true, - "Only rej partially rejected words in block rejection"); - BOOL_VAR_H(tessedit_preserve_row_rej_perfect_wds, true, - "Only rej partially rejected words in row rejection"); - BOOL_VAR_H(tessedit_dont_blkrej_good_wds, false, - "Use word segmentation quality metric"); - BOOL_VAR_H(tessedit_dont_rowrej_good_wds, false, - "Use word segmentation quality metric"); - INT_VAR_H(tessedit_preserve_min_wd_len, 2, - "Only preserve wds longer than this"); - BOOL_VAR_H(tessedit_row_rej_good_docs, true, - "Apply row rejection to good docs"); - double_VAR_H(tessedit_good_doc_still_rowrej_wd, 1.1, - "rej good doc wd if more than this fraction rejected"); - BOOL_VAR_H(tessedit_reject_bad_qual_wds, true, - "Reject all bad quality wds"); - BOOL_VAR_H(tessedit_debug_doc_rejection, false, "Page stats"); - BOOL_VAR_H(tessedit_debug_quality_metrics, false, - "Output data to debug file"); - BOOL_VAR_H(bland_unrej, false, "unrej potential with no checks"); - double_VAR_H(quality_rowrej_pc, 1.1, - "good_quality_doc gte good char limit"); - BOOL_VAR_H(unlv_tilde_crunching, false, - "Mark v.bad words for tilde crunch"); - BOOL_VAR_H(hocr_font_info, false, - "Add font info to hocr output"); - BOOL_VAR_H(crunch_early_merge_tess_fails, true, "Before word crunch?"); - BOOL_VAR_H(crunch_early_convert_bad_unlv_chs, false, "Take out ~^ early?"); - double_VAR_H(crunch_terrible_rating, 80.0, "crunch rating lt this"); - BOOL_VAR_H(crunch_terrible_garbage, true, "As it says"); - double_VAR_H(crunch_poor_garbage_cert, -9.0, - "crunch garbage cert lt this"); - double_VAR_H(crunch_poor_garbage_rate, 60, "crunch garbage rating lt this"); - double_VAR_H(crunch_pot_poor_rate, 40, "POTENTIAL crunch rating lt this"); - double_VAR_H(crunch_pot_poor_cert, -8.0, "POTENTIAL crunch cert lt this"); - BOOL_VAR_H(crunch_pot_garbage, true, "POTENTIAL crunch garbage"); - double_VAR_H(crunch_del_rating, 60, "POTENTIAL crunch rating lt this"); - double_VAR_H(crunch_del_cert, -10.0, "POTENTIAL crunch cert lt this"); - double_VAR_H(crunch_del_min_ht, 0.7, "Del if word ht lt xht x this"); - double_VAR_H(crunch_del_max_ht, 3.0, "Del if word ht gt xht x this"); - double_VAR_H(crunch_del_min_width, 3.0, "Del if word width lt xht x this"); - double_VAR_H(crunch_del_high_word, 1.5, - "Del if word gt xht x this above bl"); - double_VAR_H(crunch_del_low_word, 0.5, "Del if word gt xht x this below bl"); - double_VAR_H(crunch_small_outlines_size, 0.6, "Small if lt xht x this"); - INT_VAR_H(crunch_rating_max, 10, "For adj length in rating per ch"); - INT_VAR_H(crunch_pot_indicators, 1, "How many potential indicators needed"); - BOOL_VAR_H(crunch_leave_ok_strings, true, "Don't touch sensible strings"); - BOOL_VAR_H(crunch_accept_ok, true, "Use acceptability in okstring"); - BOOL_VAR_H(crunch_leave_accept_strings, false, - "Don't pot crunch sensible strings"); - BOOL_VAR_H(crunch_include_numerals, false, "Fiddle alpha figures"); - INT_VAR_H(crunch_leave_lc_strings, 4, - "Don't crunch words with long lower case strings"); - INT_VAR_H(crunch_leave_uc_strings, 4, - "Don't crunch words with long lower case strings"); - INT_VAR_H(crunch_long_repetitions, 3, "Crunch words with long repetitions"); - INT_VAR_H(crunch_debug, 0, "As it says"); - INT_VAR_H(fixsp_non_noise_limit, 1, - "How many non-noise blbs either side?"); - double_VAR_H(fixsp_small_outlines_size, 0.28, "Small if lt xht x this"); - BOOL_VAR_H(tessedit_prefer_joined_punct, false, "Reward punctuation joins"); - INT_VAR_H(fixsp_done_mode, 1, "What constitues done for spacing"); - INT_VAR_H(debug_fix_space_level, 0, "Contextual fixspace debug"); - STRING_VAR_H(numeric_punctuation, ".,", - "Punct. chs expected WITHIN numbers"); - INT_VAR_H(x_ht_acceptance_tolerance, 8, - "Max allowed deviation of blob top outside of font data"); - INT_VAR_H(x_ht_min_change, 8, "Min change in xht before actually trying it"); - INT_VAR_H(superscript_debug, 0, "Debug level for sub & superscript fixer"); - double_VAR_H(superscript_worse_certainty, 2.0, "How many times worse " - "certainty does a superscript position glyph need to be for us " - "to try classifying it as a char with a different baseline?"); - double_VAR_H(superscript_bettered_certainty, 0.97, "What reduction in " - "badness do we think sufficient to choose a superscript over " - "what we'd thought. For example, a value of 0.6 means we want " - "to reduce badness of certainty by 40%"); - double_VAR_H(superscript_scaledown_ratio, 0.4, - "A superscript scaled down more than this is unbelievably " - "small. For example, 0.3 means we expect the font size to " - "be no smaller than 30% of the text line font size."); - double_VAR_H(subscript_max_y_top, 0.5, - "Maximum top of a character measured as a multiple of x-height " - "above the baseline for us to reconsider whether it's a " - "subscript."); - double_VAR_H(superscript_min_y_bottom, 0.3, - "Minimum bottom of a character measured as a multiple of " - "x-height above the baseline for us to reconsider whether it's " - "a superscript."); - BOOL_VAR_H(tessedit_write_block_separators, false, - "Write block separators in output"); - BOOL_VAR_H(tessedit_write_rep_codes, false, - "Write repetition char code"); - BOOL_VAR_H(tessedit_write_unlv, false, "Write .unlv output file"); - BOOL_VAR_H(tessedit_create_txt, false, "Write .txt output file"); - BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file"); - BOOL_VAR_H(tessedit_create_tsv, false, "Write .tsv output file"); - BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file"); - BOOL_VAR_H(textonly_pdf, false, - "Create PDF with only one invisible text layer"); - INT_VAR_H(jpg_quality, 85, "Set JPEG quality level"); - INT_VAR_H(user_defined_dpi, 0, "Specify DPI for input image"); - INT_VAR_H(min_characters_to_try, 50, - "Specify minimum characters to try during OSD"); - STRING_VAR_H(unrecognised_char, "|", - "Output char for unidentified blobs"); - INT_VAR_H(suspect_level, 99, "Suspect marker level"); - INT_VAR_H(suspect_space_level, 100, - "Min suspect level for rejecting spaces"); - INT_VAR_H(suspect_short_words, 2, "Don't Suspect dict wds longer than this"); - BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected"); - double_VAR_H(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit"); - double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit"); - BOOL_VAR_H(tessedit_minimal_rejection, false, "Only reject tess failures"); - BOOL_VAR_H(tessedit_zero_rejection, false, "Don't reject ANYTHING"); - BOOL_VAR_H(tessedit_word_for_word, false, - "Make output have exactly one word per WERD"); - BOOL_VAR_H(tessedit_zero_kelvin_rejection, false, - "Don't reject ANYTHING AT ALL"); - BOOL_VAR_H(tessedit_consistent_reps, true, "Force all rep chars the same"); - INT_VAR_H(tessedit_reject_mode, 0, "Rejection algorithm"); - BOOL_VAR_H(tessedit_rejection_debug, false, "Adaption debug"); - BOOL_VAR_H(tessedit_flip_0O, true, "Contextual 0O O0 flips"); - double_VAR_H(tessedit_lower_flip_hyphen, 1.5, - "Aspect ratio dot/hyphen test"); - double_VAR_H(tessedit_upper_flip_hyphen, 1.8, - "Aspect ratio dot/hyphen test"); - BOOL_VAR_H(rej_trust_doc_dawg, false, "Use DOC dawg in 11l conf. detector"); - BOOL_VAR_H(rej_1Il_use_dict_word, false, "Use dictword test"); - BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Don't double check"); - BOOL_VAR_H(rej_use_tess_accepted, true, "Individual rejection control"); - BOOL_VAR_H(rej_use_tess_blanks, true, "Individual rejection control"); - BOOL_VAR_H(rej_use_good_perm, true, "Individual rejection control"); - BOOL_VAR_H(rej_use_sensible_wd, false, "Extend permuter check"); - BOOL_VAR_H(rej_alphas_in_number_perm, false, "Extend permuter check"); - double_VAR_H(rej_whole_of_mostly_reject_word_fract, 0.85, "if >this fract"); - INT_VAR_H(tessedit_image_border, 2, "Rej blbs near image edge limit"); - STRING_VAR_H(ok_repeated_ch_non_alphanum_wds, "-?*\075", - "Allow NN to unrej"); - STRING_VAR_H(conflict_set_I_l_1, "Il1[]", "Il1 conflict set"); - INT_VAR_H(min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this"); - BOOL_VAR_H(tessedit_create_boxfile, false, "Output text with boxes"); - INT_VAR_H(tessedit_page_number, -1, - "-1 -> All pages, else specific page to process"); - BOOL_VAR_H(tessedit_write_images, false, "Capture the image from the IPE"); - BOOL_VAR_H(interactive_display_mode, false, "Run interactively?"); - STRING_VAR_H(file_type, ".tif", "Filename extension"); - BOOL_VAR_H(tessedit_override_permuter, true, "According to dict_word"); - STRING_VAR_H(tessedit_load_sublangs, "", - "List of languages to load with this one"); - BOOL_VAR_H(tessedit_use_primary_params_model, false, - "In multilingual mode use params model of the primary language"); - // Min acceptable orientation margin (difference in scores between top and 2nd - // choice in OSResults::orientations) to believe the page orientation. - double_VAR_H(min_orientation_margin, 7.0, - "Min acceptable orientation margin"); - BOOL_VAR_H(textord_tabfind_show_vlines, false, "Debug line finding"); - BOOL_VAR_H(textord_use_cjk_fp_model, FALSE, "Use CJK fixed pitch model"); - BOOL_VAR_H(poly_allow_detailed_fx, false, - "Allow feature extractors to see the original outline"); - BOOL_VAR_H(tessedit_init_config_only, false, - "Only initialize with the config file. Useful if the instance is " - "not going to be used for OCR but say only for layout analysis."); - BOOL_VAR_H(textord_equation_detect, false, "Turn on equation detector"); - BOOL_VAR_H(textord_tabfind_vertical_text, true, "Enable vertical detection"); - BOOL_VAR_H(textord_tabfind_force_vertical_text, false, - "Force using vertical text page mode"); - double_VAR_H(textord_tabfind_vertical_text_ratio, 0.5, - "Fraction of textlines deemed vertical to use vertical page " - "mode"); - double_VAR_H(textord_tabfind_aligned_gap_fraction, 0.75, - "Fraction of height used as a minimum gap for aligned blobs."); - INT_VAR_H(tessedit_parallelize, 0, "Run in parallel where possible"); - BOOL_VAR_H(preserve_interword_spaces, false, - "Preserve multiple interword spaces"); - STRING_VAR_H(page_separator, "\f", - "Page separator (default is form feed control character)"); - INT_VAR_H(lstm_choice_mode, 0, - "Allows to include alternative symbols choices in the hOCR output. " - "Valid input values are 0, 1 and 2. 0 is the default value. " - "With 1 the alternative symbol choices per timestep are included. " - "With 2 the alternative symbol choices are accumulated per character."); - - //// ambigsrecog.cpp ///////////////////////////////////////////////////////// - FILE *init_recog_training(const STRING &fname); - void recog_training_segmented(const STRING &fname, - PAGE_RES *page_res, - volatile ETEXT_DESC *monitor, - FILE *output_file); - void ambigs_classify_and_output(const char *label, - PAGE_RES_IT* pr_it, - FILE *output_file); - - private: - // The filename of a backup config file. If not null, then we currently - // have a temporary debug config file loaded, and backup_config_file_ - // will be loaded, and set to null when debug is complete. - const char* backup_config_file_; - // The filename of a config file to read when processing a debug word. - STRING word_config_; - // Image used for input to layout analysis and tesseract recognition. - // May be modified by the ShiroRekhaSplitter to eliminate the top-line. - Pix* pix_binary_; - // Grey-level input image if the input was not binary, otherwise nullptr. - Pix* pix_grey_; - // Original input image. Color if the input was color. - Pix* pix_original_; - // Thresholds that were used to generate the thresholded image from grey. - Pix* pix_thresholds_; - // Debug images. If non-empty, will be written on destruction. - DebugPixa pixa_debug_; - // Input image resolution after any scaling. The resolution is not well - // transmitted by operations on Pix, so we keep an independent record here. - int source_resolution_; - // The shiro-rekha splitter object which is used to split top-lines in - // Devanagari words to provide a better word and grapheme segmentation. - ShiroRekhaSplitter splitter_; - // Page segmentation/layout - Textord textord_; - // True if the primary language uses right_to_left reading order. - bool right_to_left_; - Pix* scaled_color_; - int scaled_factor_; - FCOORD deskew_; - FCOORD reskew_; - TesseractStats stats_; - // Sub-languages to be tried in addition to this. - GenericVector sub_langs_; - // Most recently used Tesseract out of this and sub_langs_. The default - // language for the next word. - Tesseract* most_recently_used_; - // The size of the font table, ie max possible font id + 1. - int font_table_size_; - // Equation detector. Note: this pointer is NOT owned by the class. - EquationDetect* equ_detect_; - // LSTM recognizer, if available. - LSTMRecognizer* lstm_recognizer_; - // Output "page" number (actually line number) using TrainLineRecognizer. - int train_line_page_num_; -}; - -} // namespace tesseract - -#endif // TESSERACT_CCMAIN_TESSERACTCLASS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tessvars.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tessvars.cpp deleted file mode 100644 index f72b0c27..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tessvars.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/********************************************************************** - * File: tessvars.cpp (Formerly tessvars.c) - * Description: Variables and other globals for tessedit. - * Author: Ray Smith - * Created: Mon Apr 13 13:13:23 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include - -#include "tessvars.h" - -FILE *debug_fp = stderr; // write debug stuff here diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tessvars.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tessvars.h deleted file mode 100644 index 8c063a11..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tessvars.h +++ /dev/null @@ -1,27 +0,0 @@ -/********************************************************************** - * File: tessvars.h (Formerly tessvars.h) - * Description: Variables and other globals for tessedit. - * Author: Ray Smith - * Created: Mon Apr 13 13:13:23 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TESSVARS_H -#define TESSVARS_H - -#include - -extern FILE *debug_fp; // write debug stuff here - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tfacepp.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tfacepp.cpp deleted file mode 100644 index 746bf853..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/tfacepp.cpp +++ /dev/null @@ -1,323 +0,0 @@ -/********************************************************************** - * File: tfacepp.cpp (Formerly tface++.c) - * Description: C++ side of the C/C++ Tess/Editor interface. - * Author: Ray Smith - * Created: Thu Apr 23 15:39:23 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include - -#include "blamer.h" -#include "errcode.h" -#include "ratngs.h" -#include "reject.h" -#include "tesseractclass.h" -#include "werd.h" - -#define MAX_UNDIVIDED_LENGTH 24 - - - -/********************************************************************** - * recog_word - * - * Convert the word to tess form and pass it to the tess segmenter. - * Convert the output back to editor form. - **********************************************************************/ -namespace tesseract { -void Tesseract::recog_word(WERD_RES *word) { - if (wordrec_skip_no_truth_words && (word->blamer_bundle == nullptr || - word->blamer_bundle->incorrect_result_reason() == IRR_NO_TRUTH)) { - if (classify_debug_level) tprintf("No truth for word - skipping\n"); - word->tess_failed = true; - return; - } - ASSERT_HOST(!word->chopped_word->blobs.empty()); - recog_word_recursive(word); - word->SetupBoxWord(); - if (word->best_choice->length() != word->box_word->length()) { - tprintf("recog_word ASSERT FAIL String:\"%s\"; " - "Strlen=%d; #Blobs=%d\n", - word->best_choice->debug_string().string(), - word->best_choice->length(), word->box_word->length()); - } - ASSERT_HOST(word->best_choice->length() == word->box_word->length()); - // Check that the ratings matrix size matches the sum of all the - // segmentation states. - if (!word->StatesAllValid()) { - tprintf("Not all words have valid states relative to ratings matrix!!"); - word->DebugWordChoices(true, nullptr); - ASSERT_HOST(word->StatesAllValid()); - } - if (tessedit_override_permuter) { - /* Override the permuter type if a straight dictionary check disagrees. */ - uint8_t perm_type = word->best_choice->permuter(); - if ((perm_type != SYSTEM_DAWG_PERM) && - (perm_type != FREQ_DAWG_PERM) && (perm_type != USER_DAWG_PERM)) { - uint8_t real_dict_perm_type = dict_word(*word->best_choice); - if (((real_dict_perm_type == SYSTEM_DAWG_PERM) || - (real_dict_perm_type == FREQ_DAWG_PERM) || - (real_dict_perm_type == USER_DAWG_PERM)) && - (alpha_count(word->best_choice->unichar_string().string(), - word->best_choice->unichar_lengths().string()) > 0)) { - word->best_choice->set_permuter(real_dict_perm_type); // use dict perm - } - } - if (tessedit_rejection_debug && - perm_type != word->best_choice->permuter()) { - tprintf("Permuter Type Flipped from %d to %d\n", - perm_type, word->best_choice->permuter()); - } - } - // Factored out from control.cpp - ASSERT_HOST((word->best_choice == nullptr) == (word->raw_choice == nullptr)); - if (word->best_choice == nullptr || word->best_choice->length() == 0 || - static_cast(strspn(word->best_choice->unichar_string().string(), - " ")) == word->best_choice->length()) { - word->tess_failed = true; - word->reject_map.initialise(word->box_word->length()); - word->reject_map.rej_word_tess_failure(); - } else { - word->tess_failed = false; - } -} - - -/********************************************************************** - * recog_word_recursive - * - * Convert the word to tess form and pass it to the tess segmenter. - * Convert the output back to editor form. - **********************************************************************/ -void Tesseract::recog_word_recursive(WERD_RES *word) { - int word_length = word->chopped_word->NumBlobs(); // no of blobs - if (word_length > MAX_UNDIVIDED_LENGTH) { - return split_and_recog_word(word); - } - cc_recog(word); - word_length = word->rebuild_word->NumBlobs(); // No of blobs in output. - - // Do sanity checks and minor fixes on best_choice. - if (word->best_choice->length() > word_length) { - word->best_choice->make_bad(); // should never happen - tprintf("recog_word: Discarded long string \"%s\"" - " (%d characters vs %d blobs)\n", - word->best_choice->unichar_string().string(), - word->best_choice->length(), word_length); - tprintf("Word is at:"); - word->word->bounding_box().print(); - } - if (word->best_choice->length() < word_length) { - UNICHAR_ID space_id = unicharset.unichar_to_id(" "); - while (word->best_choice->length() < word_length) { - word->best_choice->append_unichar_id(space_id, 1, 0.0, - word->best_choice->certainty()); - } - } -} - - -/********************************************************************** - * split_and_recog_word - * - * Split the word into 2 smaller pieces at the largest gap. - * Recognize the pieces and stick the results back together. - **********************************************************************/ -void Tesseract::split_and_recog_word(WERD_RES *word) { - // Find the biggest blob gap in the chopped_word. - int bestgap = -INT32_MAX; - int split_index = 0; - for (int b = 1; b < word->chopped_word->NumBlobs(); ++b) { - TBOX prev_box = word->chopped_word->blobs[b - 1]->bounding_box(); - TBOX blob_box = word->chopped_word->blobs[b]->bounding_box(); - int gap = blob_box.left() - prev_box.right(); - if (gap > bestgap) { - bestgap = gap; - split_index = b; - } - } - ASSERT_HOST(split_index > 0); - - WERD_RES *word2 = nullptr; - BlamerBundle *orig_bb = nullptr; - split_word(word, split_index, &word2, &orig_bb); - - // Recognize the first part of the word. - recog_word_recursive(word); - // Recognize the second part of the word. - recog_word_recursive(word2); - - join_words(word, word2, orig_bb); -} - - -/********************************************************************** - * split_word - * - * Split a given WERD_RES in place into two smaller words for recognition. - * split_pt is the index of the first blob to go in the second word. - * The underlying word is left alone, only the TWERD (and subsequent data) - * are split up. orig_blamer_bundle is set to the original blamer bundle, - * and will now be owned by the caller. New blamer bundles are forged for the - * two pieces. - **********************************************************************/ -void Tesseract::split_word(WERD_RES *word, - int split_pt, - WERD_RES **right_piece, - BlamerBundle **orig_blamer_bundle) const { - ASSERT_HOST(split_pt >0 && split_pt < word->chopped_word->NumBlobs()); - - // Save a copy of the blamer bundle so we can try to reconstruct it below. - BlamerBundle *orig_bb = - word->blamer_bundle ? new BlamerBundle(*word->blamer_bundle) : nullptr; - - WERD_RES *word2 = new WERD_RES(*word); - - // blow away the copied chopped_word, as we want to work with - // the blobs from the input chopped_word so seam_arrays can be merged. - TWERD *chopped = word->chopped_word; - TWERD *chopped2 = new TWERD; - chopped2->blobs.reserve(chopped->NumBlobs() - split_pt); - for (int i = split_pt; i < chopped->NumBlobs(); ++i) { - chopped2->blobs.push_back(chopped->blobs[i]); - } - chopped->blobs.truncate(split_pt); - word->chopped_word = nullptr; - delete word2->chopped_word; - word2->chopped_word = nullptr; - - const UNICHARSET &unicharset = *word->uch_set; - word->ClearResults(); - word2->ClearResults(); - word->chopped_word = chopped; - word2->chopped_word = chopped2; - word->SetupBasicsFromChoppedWord(unicharset); - word2->SetupBasicsFromChoppedWord(unicharset); - - // Try to adjust the blamer bundle. - if (orig_bb != nullptr) { - // TODO(rays) Looks like a leak to me. - // orig_bb should take, rather than copy. - word->blamer_bundle = new BlamerBundle(); - word2->blamer_bundle = new BlamerBundle(); - orig_bb->SplitBundle(chopped->blobs.back()->bounding_box().right(), - word2->chopped_word->blobs[0]->bounding_box().left(), - wordrec_debug_blamer, - word->blamer_bundle, word2->blamer_bundle); - } - - *right_piece = word2; - *orig_blamer_bundle = orig_bb; -} - - -/********************************************************************** - * join_words - * - * The opposite of split_word(): - * join word2 (including any recognized data / seam array / etc) - * onto the right of word and then delete word2. - * Also, if orig_bb is provided, stitch it back into word. - **********************************************************************/ -void Tesseract::join_words(WERD_RES *word, - WERD_RES *word2, - BlamerBundle *orig_bb) const { - TBOX prev_box = word->chopped_word->blobs.back()->bounding_box(); - TBOX blob_box = word2->chopped_word->blobs[0]->bounding_box(); - // Tack the word2 outputs onto the end of the word outputs. - word->chopped_word->blobs += word2->chopped_word->blobs; - word->rebuild_word->blobs += word2->rebuild_word->blobs; - word2->chopped_word->blobs.clear(); - word2->rebuild_word->blobs.clear(); - TPOINT split_pt; - split_pt.x = (prev_box.right() + blob_box.left()) / 2; - split_pt.y = (prev_box.top() + prev_box.bottom() + - blob_box.top() + blob_box.bottom()) / 4; - // Move the word2 seams onto the end of the word1 seam_array. - // Since the seam list is one element short, an empty seam marking the - // end of the last blob in the first word is needed first. - word->seam_array.push_back(new SEAM(0.0f, split_pt)); - word->seam_array += word2->seam_array; - word2->seam_array.truncate(0); - // Fix widths and gaps. - word->blob_widths += word2->blob_widths; - word->blob_gaps += word2->blob_gaps; - // Fix the ratings matrix. - int rat1 = word->ratings->dimension(); - int rat2 = word2->ratings->dimension(); - word->ratings->AttachOnCorner(word2->ratings); - ASSERT_HOST(word->ratings->dimension() == rat1 + rat2); - word->best_state += word2->best_state; - // Append the word choices. - *word->raw_choice += *word2->raw_choice; - - // How many alt choices from each should we try to get? - const int kAltsPerPiece = 2; - // When do we start throwing away extra alt choices? - const int kTooManyAltChoices = 100; - - // Construct the cartesian product of the best_choices of word(1) and word2. - WERD_CHOICE_LIST joined_choices; - WERD_CHOICE_IT jc_it(&joined_choices); - WERD_CHOICE_IT bc1_it(&word->best_choices); - WERD_CHOICE_IT bc2_it(&word2->best_choices); - int num_word1_choices = word->best_choices.length(); - int total_joined_choices = num_word1_choices; - // Nota Bene: For the main loop here, we operate only on the 2nd and greater - // word2 choices, and put them in the joined_choices list. The 1st word2 - // choice gets added to the original word1 choices in-place after we have - // finished with them. - int bc2_index = 1; - for (bc2_it.forward(); !bc2_it.at_first(); bc2_it.forward(), ++bc2_index) { - if (total_joined_choices >= kTooManyAltChoices && - bc2_index > kAltsPerPiece) - break; - int bc1_index = 0; - for (bc1_it.move_to_first(); bc1_index < num_word1_choices; - ++bc1_index, bc1_it.forward()) { - if (total_joined_choices >= kTooManyAltChoices && - bc1_index > kAltsPerPiece) - break; - WERD_CHOICE *wc = new WERD_CHOICE(*bc1_it.data()); - *wc += *bc2_it.data(); - jc_it.add_after_then_move(wc); - ++total_joined_choices; - } - } - // Now that we've filled in as many alternates as we want, paste the best - // choice for word2 onto the original word alt_choices. - bc1_it.move_to_first(); - bc2_it.move_to_first(); - for (bc1_it.mark_cycle_pt(); !bc1_it.cycled_list(); bc1_it.forward()) { - *bc1_it.data() += *bc2_it.data(); - } - bc1_it.move_to_last(); - bc1_it.add_list_after(&joined_choices); - - // Restore the pointer to original blamer bundle and combine blamer - // information recorded in the splits. - if (orig_bb != nullptr) { - orig_bb->JoinBlames(*word->blamer_bundle, *word2->blamer_bundle, - wordrec_debug_blamer); - delete word->blamer_bundle; - word->blamer_bundle = orig_bb; - } - word->SetupBoxWord(); - word->reject_map.initialise(word->box_word->length()); - delete word2; -} - - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/thresholder.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/thresholder.cpp deleted file mode 100644 index 9d0a5a51..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/thresholder.cpp +++ /dev/null @@ -1,333 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: thresholder.cpp -// Description: Base API for thresolding images in tesseract. -// Author: Ray Smith -// Created: Mon May 12 11:28:15 PDT 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "allheaders.h" - -#include "thresholder.h" - -#include // for uint32_t -#include - -#include "otsuthr.h" - -#include "openclwrapper.h" - -namespace tesseract { - -ImageThresholder::ImageThresholder() - : pix_(nullptr), - image_width_(0), image_height_(0), - pix_channels_(0), pix_wpl_(0), - scale_(1), yres_(300), estimated_res_(300) { - SetRectangle(0, 0, 0, 0); -} - -ImageThresholder::~ImageThresholder() { - Clear(); -} - -// Destroy the Pix if there is one, freeing memory. -void ImageThresholder::Clear() { - pixDestroy(&pix_); -} - -// Return true if no image has been set. -bool ImageThresholder::IsEmpty() const { - return pix_ == nullptr; -} - -// SetImage makes a copy of all the image data, so it may be deleted -// immediately after this call. -// Greyscale of 8 and color of 24 or 32 bits per pixel may be given. -// Palette color images will not work properly and must be converted to -// 24 bit. -// Binary images of 1 bit per pixel may also be given but they must be -// byte packed with the MSB of the first byte being the first pixel, and a -// one pixel is WHITE. For binary images set bytes_per_pixel=0. -void ImageThresholder::SetImage(const unsigned char* imagedata, - int width, int height, - int bytes_per_pixel, int bytes_per_line) { - int bpp = bytes_per_pixel * 8; - if (bpp == 0) bpp = 1; - Pix* pix = pixCreate(width, height, bpp == 24 ? 32 : bpp); - l_uint32* data = pixGetData(pix); - int wpl = pixGetWpl(pix); - switch (bpp) { - case 1: - for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) { - for (int x = 0; x < width; ++x) { - if (imagedata[x / 8] & (0x80 >> (x % 8))) - CLEAR_DATA_BIT(data, x); - else - SET_DATA_BIT(data, x); - } - } - break; - - case 8: - // Greyscale just copies the bytes in the right order. - for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) { - for (int x = 0; x < width; ++x) - SET_DATA_BYTE(data, x, imagedata[x]); - } - break; - - case 24: - // Put the colors in the correct places in the line buffer. - for (int y = 0; y < height; ++y, imagedata += bytes_per_line) { - for (int x = 0; x < width; ++x, ++data) { - SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]); - SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]); - SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]); - } - } - break; - - case 32: - // Maintain byte order consistency across different endianness. - for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) { - for (int x = 0; x < width; ++x) { - data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) | - (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3]; - } - } - break; - - default: - tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp); - } - pixSetYRes(pix, 300); - SetImage(pix); - pixDestroy(&pix); -} - -// Store the coordinates of the rectangle to process for later use. -// Doesn't actually do any thresholding. -void ImageThresholder::SetRectangle(int left, int top, int width, int height) { - rect_left_ = left; - rect_top_ = top; - rect_width_ = width; - rect_height_ = height; -} - -// Get enough parameters to be able to rebuild bounding boxes in the -// original image (not just within the rectangle). -// Left and top are enough with top-down coordinates, but -// the height of the rectangle and the image are needed for bottom-up. -void ImageThresholder::GetImageSizes(int* left, int* top, - int* width, int* height, - int* imagewidth, int* imageheight) { - *left = rect_left_; - *top = rect_top_; - *width = rect_width_; - *height = rect_height_; - *imagewidth = image_width_; - *imageheight = image_height_; -} - -// Pix vs raw, which to use? Pix is the preferred input for efficiency, -// since raw buffers are copied. -// SetImage for Pix clones its input, so the source pix may be pixDestroyed -// immediately after, but may not go away until after the Thresholder has -// finished with it. -void ImageThresholder::SetImage(const Pix* pix) { - if (pix_ != nullptr) - pixDestroy(&pix_); - Pix* src = const_cast(pix); - int depth; - pixGetDimensions(src, &image_width_, &image_height_, &depth); - // Convert the image as necessary so it is one of binary, plain RGB, or - // 8 bit with no colormap. Guarantee that we always end up with our own copy, - // not just a clone of the input. - if (pixGetColormap(src)) { - Pix* tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC); - depth = pixGetDepth(tmp); - if (depth > 1 && depth < 8) { - pix_ = pixConvertTo8(tmp, false); - pixDestroy(&tmp); - } else { - pix_ = tmp; - } - } else if (depth > 1 && depth < 8) { - pix_ = pixConvertTo8(src, false); - } else { - pix_ = pixCopy(nullptr, src); - } - depth = pixGetDepth(pix_); - pix_channels_ = depth / 8; - pix_wpl_ = pixGetWpl(pix_); - scale_ = 1; - estimated_res_ = yres_ = pixGetYRes(pix_); - Init(); -} - -// Threshold the source image as efficiently as possible to the output Pix. -// Creates a Pix and sets pix to point to the resulting pointer. -// Caller must use pixDestroy to free the created Pix. -/// Returns false on error. -bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) { - if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) { - tprintf("Image too large: (%d, %d)\n", image_width_, image_height_); - return false; - } - if (pix_channels_ == 0) { - // We have a binary image, but it still has to be copied, as this API - // allows the caller to modify the output. - Pix* original = GetPixRect(); - *pix = pixCopy(nullptr, original); - pixDestroy(&original); - } else { - OtsuThresholdRectToPix(pix_, pix); - } - return true; -} - -// Gets a pix that contains an 8 bit threshold value at each pixel. The -// returned pix may be an integer reduction of the binary image such that -// the scale factor may be inferred from the ratio of the sizes, even down -// to the extreme of a 1x1 pixel thresholds image. -// Ideally the 8 bit threshold should be the exact threshold used to generate -// the binary image in ThresholdToPix, but this is not a hard constraint. -// Returns nullptr if the input is binary. PixDestroy after use. -Pix* ImageThresholder::GetPixRectThresholds() { - if (IsBinary()) return nullptr; - Pix* pix_grey = GetPixRectGrey(); - int width = pixGetWidth(pix_grey); - int height = pixGetHeight(pix_grey); - int* thresholds; - int* hi_values; - OtsuThreshold(pix_grey, 0, 0, width, height, &thresholds, &hi_values); - pixDestroy(&pix_grey); - Pix* pix_thresholds = pixCreate(width, height, 8); - int threshold = thresholds[0] > 0 ? thresholds[0] : 128; - pixSetAllArbitrary(pix_thresholds, threshold); - delete [] thresholds; - delete [] hi_values; - return pix_thresholds; -} - -// Common initialization shared between SetImage methods. -void ImageThresholder::Init() { - SetRectangle(0, 0, image_width_, image_height_); -} - -// Get a clone/copy of the source image rectangle. -// The returned Pix must be pixDestroyed. -// This function will be used in the future by the page layout analysis, and -// the layout analysis that uses it will only be available with Leptonica, -// so there is no raw equivalent. -Pix* ImageThresholder::GetPixRect() { - if (IsFullImage()) { - // Just clone the whole thing. - return pixClone(pix_); - } else { - // Crop to the given rectangle. - Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_); - Pix* cropped = pixClipRectangle(pix_, box, nullptr); - boxDestroy(&box); - return cropped; - } -} - -// Get a clone/copy of the source image rectangle, reduced to greyscale, -// and at the same resolution as the output binary. -// The returned Pix must be pixDestroyed. -// Provided to the classifier to extract features from the greyscale image. -Pix* ImageThresholder::GetPixRectGrey() { - Pix* pix = GetPixRect(); // May have to be reduced to grey. - int depth = pixGetDepth(pix); - if (depth != 8) { - Pix* result = depth < 8 ? pixConvertTo8(pix, false) - : pixConvertRGBToLuminance(pix); - pixDestroy(&pix); - return result; - } - return pix; -} - -// Otsu thresholds the rectangle, taking the rectangle from *this. -void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix, - Pix** out_pix) const { - PERF_COUNT_START("OtsuThresholdRectToPix") - int* thresholds; - int* hi_values; - - int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_, - rect_height_, &thresholds, &hi_values); - // only use opencl if compiled w/ OpenCL and selected device is opencl -#ifdef USE_OPENCL - OpenclDevice od; - if (num_channels == 4 && - od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0) { - od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), num_channels, - pixGetWpl(src_pix) * 4, thresholds, hi_values, - out_pix /*pix_OCL*/, rect_height_, rect_width_, - rect_top_, rect_left_); - } else { -#endif - ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix); -#ifdef USE_OPENCL - } -#endif - delete [] thresholds; - delete [] hi_values; - - PERF_COUNT_END -} - -/// Threshold the rectangle, taking everything except the src_pix -/// from the class, using thresholds/hi_values to the output pix. -/// NOTE that num_channels is the size of the thresholds and hi_values -// arrays and also the bytes per pixel in src_pix. -void ImageThresholder::ThresholdRectToPix(Pix* src_pix, - int num_channels, - const int* thresholds, - const int* hi_values, - Pix** pix) const { - PERF_COUNT_START("ThresholdRectToPix") - *pix = pixCreate(rect_width_, rect_height_, 1); - uint32_t* pixdata = pixGetData(*pix); - int wpl = pixGetWpl(*pix); - int src_wpl = pixGetWpl(src_pix); - uint32_t* srcdata = pixGetData(src_pix); - for (int y = 0; y < rect_height_; ++y) { - const uint32_t* linedata = srcdata + (y + rect_top_) * src_wpl; - uint32_t* pixline = pixdata + y * wpl; - for (int x = 0; x < rect_width_; ++x) { - bool white_result = true; - for (int ch = 0; ch < num_channels; ++ch) { - int pixel = - GET_DATA_BYTE(linedata, (x + rect_left_) * num_channels + ch); - if (hi_values[ch] >= 0 && - (pixel > thresholds[ch]) == (hi_values[ch] == 0)) { - white_result = false; - break; - } - } - if (white_result) - CLEAR_DATA_BIT(pixline, x); - else - SET_DATA_BIT(pixline, x); - } - } - - PERF_COUNT_END -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/thresholder.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/thresholder.h deleted file mode 100644 index 30b1d37c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/thresholder.h +++ /dev/null @@ -1,190 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: thresholder.h -// Description: Base API for thresolding images in tesseract. -// Author: Ray Smith -// Created: Mon May 12 11:00:15 PDT 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCMAIN_THRESHOLDER_H_ -#define TESSERACT_CCMAIN_THRESHOLDER_H_ - -#include "platform.h" -#include "publictypes.h" - -struct Pix; - -namespace tesseract { - -/// Base class for all tesseract image thresholding classes. -/// Specific classes can add new thresholding methods by -/// overriding ThresholdToPix. -/// Each instance deals with a single image, but the design is intended to -/// be useful for multiple calls to SetRectangle and ThresholdTo* if -/// desired. -class TESS_API ImageThresholder { - public: - ImageThresholder(); - virtual ~ImageThresholder(); - - /// Destroy the Pix if there is one, freeing memory. - virtual void Clear(); - - /// Return true if no image has been set. - bool IsEmpty() const; - - /// SetImage makes a copy of all the image data, so it may be deleted - /// immediately after this call. - /// Greyscale of 8 and color of 24 or 32 bits per pixel may be given. - /// Palette color images will not work properly and must be converted to - /// 24 bit. - /// Binary images of 1 bit per pixel may also be given but they must be - /// byte packed with the MSB of the first byte being the first pixel, and a - /// one pixel is WHITE. For binary images set bytes_per_pixel=0. - void SetImage(const unsigned char* imagedata, int width, int height, - int bytes_per_pixel, int bytes_per_line); - - /// Store the coordinates of the rectangle to process for later use. - /// Doesn't actually do any thresholding. - void SetRectangle(int left, int top, int width, int height); - - /// Get enough parameters to be able to rebuild bounding boxes in the - /// original image (not just within the rectangle). - /// Left and top are enough with top-down coordinates, but - /// the height of the rectangle and the image are needed for bottom-up. - virtual void GetImageSizes(int* left, int* top, int* width, int* height, - int* imagewidth, int* imageheight); - - /// Return true if the source image is color. - bool IsColor() const { - return pix_channels_ >= 3; - } - - /// Returns true if the source image is binary. - bool IsBinary() const { - return pix_channels_ == 0; - } - - int GetScaleFactor() const { - return scale_; - } - - // Set the resolution of the source image in pixels per inch. - // This should be called right after SetImage(), and will let us return - // appropriate font sizes for the text. - void SetSourceYResolution(int ppi) { - yres_ = ppi; - estimated_res_ = ppi; - } - int GetSourceYResolution() const { - return yres_; - } - int GetScaledYResolution() const { - return scale_ * yres_; - } - // Set the resolution of the source image in pixels per inch, as estimated - // by the thresholder from the text size found during thresholding. - // This value will be used to set internal size thresholds during recognition - // and will not influence the output "point size." The default value is - // the same as the source resolution. (yres_) - void SetEstimatedResolution(int ppi) { - estimated_res_ = ppi; - } - // Returns the estimated resolution, including any active scaling. - // This value will be used to set internal size thresholds during recognition. - int GetScaledEstimatedResolution() const { - return scale_ * estimated_res_; - } - - /// Pix vs raw, which to use? Pix is the preferred input for efficiency, - /// since raw buffers are copied. - /// SetImage for Pix clones its input, so the source pix may be pixDestroyed - /// immediately after, but may not go away until after the Thresholder has - /// finished with it. - void SetImage(const Pix* pix); - - /// Threshold the source image as efficiently as possible to the output Pix. - /// Creates a Pix and sets pix to point to the resulting pointer. - /// Caller must use pixDestroy to free the created Pix. - /// Returns false on error. - virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix** pix); - - // Gets a pix that contains an 8 bit threshold value at each pixel. The - // returned pix may be an integer reduction of the binary image such that - // the scale factor may be inferred from the ratio of the sizes, even down - // to the extreme of a 1x1 pixel thresholds image. - // Ideally the 8 bit threshold should be the exact threshold used to generate - // the binary image in ThresholdToPix, but this is not a hard constraint. - // Returns nullptr if the input is binary. PixDestroy after use. - virtual Pix* GetPixRectThresholds(); - - /// Get a clone/copy of the source image rectangle. - /// The returned Pix must be pixDestroyed. - /// This function will be used in the future by the page layout analysis, and - /// the layout analysis that uses it will only be available with Leptonica, - /// so there is no raw equivalent. - Pix* GetPixRect(); - - // Get a clone/copy of the source image rectangle, reduced to greyscale, - // and at the same resolution as the output binary. - // The returned Pix must be pixDestroyed. - // Provided to the classifier to extract features from the greyscale image. - virtual Pix* GetPixRectGrey(); - - protected: - // ---------------------------------------------------------------------- - // Utility functions that may be useful components for other thresholders. - - /// Common initialization shared between SetImage methods. - virtual void Init(); - - /// Return true if we are processing the full image. - bool IsFullImage() const { - return rect_left_ == 0 && rect_top_ == 0 && - rect_width_ == image_width_ && rect_height_ == image_height_; - } - - // Otsu thresholds the rectangle, taking the rectangle from *this. - void OtsuThresholdRectToPix(Pix* src_pix, Pix** out_pix) const; - - /// Threshold the rectangle, taking everything except the src_pix - /// from the class, using thresholds/hi_values to the output pix. - /// NOTE that num_channels is the size of the thresholds and hi_values - // arrays and also the bytes per pixel in src_pix. - void ThresholdRectToPix(Pix* src_pix, int num_channels, - const int* thresholds, const int* hi_values, - Pix** pix) const; - - protected: - /// Clone or other copy of the source Pix. - /// The pix will always be PixDestroy()ed on destruction of the class. - Pix* pix_; - - int image_width_; //< Width of source pix_. - int image_height_; //< Height of source pix_. - int pix_channels_; //< Number of 8-bit channels in pix_. - int pix_wpl_; //< Words per line of pix_. - // Limits of image rectangle to be processed. - int scale_; //< Scale factor from original image. - int yres_; //< y pixels/inch in source image. - int estimated_res_; //< Resolution estimate from text size. - int rect_left_; - int rect_top_; - int rect_width_; - int rect_height_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_CCMAIN_THRESHOLDER_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/werdit.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/werdit.cpp deleted file mode 100644 index 45bb71cb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/werdit.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/********************************************************************** - * File: werdit.cpp (Formerly wordit.c) - * Description: An iterator for passing over all the words in a document. - * Author: Ray Smith - * Created: Mon Apr 27 08:51:22 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "werdit.h" -#include "errcode.h" // for ASSERT_HOST -#include "pageres.h" // for PAGE_RES_IT, PAGE_RES (ptr only), WERD_RES -#include "stepblob.h" // for C_BLOB_IT, C_BLOB, C_BLOB_LIST -#include "werd.h" // for WERD - -/********************************************************************** - * make_pseudo_word - * - * Make all the blobs inside a selection into a single word. - * The returned PAGE_RES_IT* it points to the new word. After use, call - * it->DeleteCurrentWord() to delete the fake word, and then - * delete it to get rid of the iterator itself. - **********************************************************************/ - -PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box) { - PAGE_RES_IT pr_it(page_res); - C_BLOB_LIST new_blobs; // list of gathered blobs - C_BLOB_IT new_blob_it = &new_blobs; // iterator - - for (WERD_RES* word_res = pr_it.word(); word_res != nullptr; - word_res = pr_it.forward()) { - WERD* word = word_res->word; - if (word->bounding_box().overlap(selection_box)) { - C_BLOB_IT blob_it(word->cblob_list()); - for (blob_it.mark_cycle_pt(); - !blob_it.cycled_list(); blob_it.forward()) { - C_BLOB* blob = blob_it.data(); - if (blob->bounding_box().overlap(selection_box)) { - new_blob_it.add_after_then_move(C_BLOB::deep_copy(blob)); - } - } - if (!new_blobs.empty()) { - WERD* pseudo_word = new WERD(&new_blobs, 1, nullptr); - word_res = pr_it.InsertSimpleCloneWord(*word_res, pseudo_word); - PAGE_RES_IT* it = new PAGE_RES_IT(page_res); - while (it->word() != word_res && it->word() != nullptr) it->forward(); - ASSERT_HOST(it->word() == word_res); - return it; - } - } - } - return nullptr; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/werdit.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/werdit.h deleted file mode 100644 index 66caaef5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccmain/werdit.h +++ /dev/null @@ -1,29 +0,0 @@ -/********************************************************************** - * File: wordit.h - * Description: An iterator for passing over all the words in a document. - * Author: Ray Smith - * Created: Mon Apr 27 08:51:22 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef WERDIT_H -#define WERDIT_H - -#include "rect.h" // for TBOX -class PAGE_RES; -class PAGE_RES_IT; - -PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/Makefile.am b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/Makefile.am deleted file mode 100644 index dddd1de2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/Makefile.am +++ /dev/null @@ -1,38 +0,0 @@ -AM_CPPFLAGS += \ - -I$(top_srcdir)/src/ccutil \ - -I$(top_srcdir)/src/cutil \ - -I$(top_srcdir)/src/viewer \ - -I$(top_srcdir)/src/opencl -AM_CPPFLAGS += $(OPENCL_CPPFLAGS) - -if VISIBILITY -AM_CPPFLAGS += -DTESS_EXPORTS \ - -fvisibility=hidden -fvisibility-inlines-hidden -endif - -pkginclude_HEADERS = publictypes.h -noinst_HEADERS = \ - blamer.h blobbox.h blobs.h blread.h boxread.h boxword.h \ - ccstruct.h coutln.h crakedge.h \ - debugpixa.h detlinefit.h dppoint.h fontinfo.h \ - imagedata.h \ - linlsq.h matrix.h mod128.h normalis.h \ - ocrblock.h ocrpara.h ocrrow.h otsuthr.h \ - pageres.h params_training_featdef.h \ - pdblock.h points.h polyaprx.h polyblk.h \ - quadlsq.h quadratc.h quspline.h ratngs.h rect.h rejctmap.h \ - seam.h split.h statistc.h stepblob.h vecfuncs.h werd.h - -noinst_LTLIBRARIES = libtesseract_ccstruct.la - -libtesseract_ccstruct_la_SOURCES = \ - blamer.cpp blobbox.cpp blobs.cpp blread.cpp boxread.cpp boxword.cpp ccstruct.cpp coutln.cpp \ - detlinefit.cpp dppoint.cpp fontinfo.cpp \ - imagedata.cpp \ - linlsq.cpp matrix.cpp mod128.cpp normalis.cpp \ - ocrblock.cpp ocrpara.cpp ocrrow.cpp otsuthr.cpp \ - pageres.cpp pdblock.cpp points.cpp polyaprx.cpp polyblk.cpp \ - params_training_featdef.cpp publictypes.cpp \ - quadlsq.cpp quspline.cpp ratngs.cpp rect.cpp rejctmap.cpp \ - seam.cpp split.cpp statistc.cpp stepblob.cpp \ - vecfuncs.cpp werd.cpp diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blamer.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blamer.cpp deleted file mode 100644 index fd9b022c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blamer.cpp +++ /dev/null @@ -1,591 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: blamer.cpp -// Description: Module allowing precise error causes to be allocated. -// Author: Rike Antonova -// Refactored: Ray Smith -// Created: Mon Feb 04 14:37:01 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "blamer.h" -#include // for abs -#include // for abs -#include "blobs.h" // for TPOINT, TWERD, TBLOB -#include "errcode.h" // for ASSERT_HOST -#include "matrix.h" // for MATRIX -#include "normalis.h" // for DENORM -#include "pageres.h" // for WERD_RES -#include "tesscallback.h" // for TessResultCallback2 -#include "unicharset.h" // for UNICHARSET - -// Names for each value of IncorrectResultReason enum. Keep in sync. -const char kBlameCorrect[] = "corr"; -const char kBlameClassifier[] = "cl"; -const char kBlameChopper[] = "chop"; -const char kBlameClassLMTradeoff[] = "cl/LM"; -const char kBlamePageLayout[] = "pglt"; -const char kBlameSegsearchHeur[] = "ss_heur"; -const char kBlameSegsearchPP[] = "ss_pp"; -const char kBlameClassOldLMTradeoff[] = "cl/old_LM"; -const char kBlameAdaption[] = "adapt"; -const char kBlameNoTruthSplit[] = "no_tr_spl"; -const char kBlameNoTruth[] = "no_tr"; -const char kBlameUnknown[] = "unkn"; - -const char * const kIncorrectResultReasonNames[] = { - kBlameCorrect, - kBlameClassifier, - kBlameChopper, - kBlameClassLMTradeoff, - kBlamePageLayout, - kBlameSegsearchHeur, - kBlameSegsearchPP, - kBlameClassOldLMTradeoff, - kBlameAdaption, - kBlameNoTruthSplit, - kBlameNoTruth, - kBlameUnknown -}; - -const char *BlamerBundle::IncorrectReasonName(IncorrectResultReason irr) { - return kIncorrectResultReasonNames[irr]; -} - -const char *BlamerBundle::IncorrectReason() const { - return kIncorrectResultReasonNames[incorrect_result_reason_]; -} - -// Functions to setup the blamer. -// Whole word string, whole word bounding box. -void BlamerBundle::SetWordTruth(const UNICHARSET& unicharset, - const char* truth_str, const TBOX& word_box) { - truth_word_.InsertBox(0, word_box); - truth_has_char_boxes_ = false; - // Encode the string as UNICHAR_IDs. - GenericVector encoding; - GenericVector lengths; - unicharset.encode_string(truth_str, false, &encoding, &lengths, nullptr); - int total_length = 0; - for (int i = 0; i < encoding.size(); total_length += lengths[i++]) { - STRING uch(truth_str + total_length); - uch.truncate_at(lengths[i] - total_length); - UNICHAR_ID id = encoding[i]; - if (id != INVALID_UNICHAR_ID) uch = unicharset.get_normed_unichar(id); - truth_text_.push_back(uch); - } -} - -// Single "character" string, "character" bounding box. -// May be called multiple times to indicate the characters in a word. -void BlamerBundle::SetSymbolTruth(const UNICHARSET& unicharset, - const char* char_str, const TBOX& char_box) { - STRING symbol_str(char_str); - UNICHAR_ID id = unicharset.unichar_to_id(char_str); - if (id != INVALID_UNICHAR_ID) { - STRING normed_uch(unicharset.get_normed_unichar(id)); - if (normed_uch.length() > 0) symbol_str = normed_uch; - } - int length = truth_word_.length(); - truth_text_.push_back(symbol_str); - truth_word_.InsertBox(length, char_box); - if (length == 0) - truth_has_char_boxes_ = true; - else if (truth_word_.BlobBox(length - 1) == char_box) - truth_has_char_boxes_ = false; -} - -// Marks that there is something wrong with the truth text, like it contains -// reject characters. -void BlamerBundle::SetRejectedTruth() { - incorrect_result_reason_ = IRR_NO_TRUTH; - truth_has_char_boxes_ = false; -} - -// Returns true if the provided word_choice is correct. -bool BlamerBundle::ChoiceIsCorrect(const WERD_CHOICE* word_choice) const { - if (word_choice == nullptr) return false; - const UNICHARSET* uni_set = word_choice->unicharset(); - STRING normed_choice_str; - for (int i = 0; i < word_choice->length(); ++i) { - normed_choice_str += - uni_set->get_normed_unichar(word_choice->unichar_id(i)); - } - STRING truth_str = TruthString(); - return truth_str == normed_choice_str; -} - -void BlamerBundle::FillDebugString(const STRING &msg, - const WERD_CHOICE *choice, - STRING *debug) { - (*debug) += "Truth "; - for (int i = 0; i < this->truth_text_.length(); ++i) { - (*debug) += this->truth_text_[i]; - } - if (!this->truth_has_char_boxes_) (*debug) += " (no char boxes)"; - if (choice != nullptr) { - (*debug) += " Choice "; - STRING choice_str; - choice->string_and_lengths(&choice_str, nullptr); - (*debug) += choice_str; - } - if (msg.length() > 0) { - (*debug) += "\n"; - (*debug) += msg; - } - (*debug) += "\n"; -} - -// Sets up the norm_truth_word from truth_word using the given DENORM. -void BlamerBundle::SetupNormTruthWord(const DENORM& denorm) { - // TODO(rays) Is this the last use of denorm in WERD_RES and can it go? - norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale(); - TPOINT topleft; - TPOINT botright; - TPOINT norm_topleft; - TPOINT norm_botright; - for (int b = 0; b < truth_word_.length(); ++b) { - const TBOX &box = truth_word_.BlobBox(b); - topleft.x = box.left(); - topleft.y = box.top(); - botright.x = box.right(); - botright.y = box.bottom(); - denorm.NormTransform(nullptr, topleft, &norm_topleft); - denorm.NormTransform(nullptr, botright, &norm_botright); - TBOX norm_box(norm_topleft.x, norm_botright.y, - norm_botright.x, norm_topleft.y); - norm_truth_word_.InsertBox(b, norm_box); - } -} - -// Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty -// bundles) where the right edge/ of the left-hand word is word1_right, -// and the left edge of the right-hand word is word2_left. -void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug, - BlamerBundle* bundle1, - BlamerBundle* bundle2) const { - STRING debug_str; - // Find truth boxes that correspond to the split in the blobs. - int b; - int begin2_truth_index = -1; - if (incorrect_result_reason_ != IRR_NO_TRUTH && - truth_has_char_boxes_) { - debug_str = "Looking for truth split at"; - debug_str.add_str_int(" end1_x ", word1_right); - debug_str.add_str_int(" begin2_x ", word2_left); - debug_str += "\nnorm_truth_word boxes:\n"; - if (norm_truth_word_.length() > 1) { - norm_truth_word_.BlobBox(0).print_to_str(&debug_str); - for (b = 1; b < norm_truth_word_.length(); ++b) { - norm_truth_word_.BlobBox(b).print_to_str(&debug_str); - if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) < - norm_box_tolerance_) && - (abs(word2_left - norm_truth_word_.BlobBox(b).left()) < - norm_box_tolerance_)) { - begin2_truth_index = b; - debug_str += "Split found"; - break; - } - } - debug_str += '\n'; - } - } - // Populate truth information in word and word2 with the first and second - // part of the original truth. - if (begin2_truth_index > 0) { - bundle1->truth_has_char_boxes_ = true; - bundle1->norm_box_tolerance_ = norm_box_tolerance_; - bundle2->truth_has_char_boxes_ = true; - bundle2->norm_box_tolerance_ = norm_box_tolerance_; - BlamerBundle *curr_bb = bundle1; - for (b = 0; b < norm_truth_word_.length(); ++b) { - if (b == begin2_truth_index) curr_bb = bundle2; - curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b)); - curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b)); - curr_bb->truth_text_.push_back(truth_text_[b]); - } - } else if (incorrect_result_reason_ == IRR_NO_TRUTH) { - bundle1->incorrect_result_reason_ = IRR_NO_TRUTH; - bundle2->incorrect_result_reason_ = IRR_NO_TRUTH; - } else { - debug_str += "Truth split not found"; - debug_str += truth_has_char_boxes_ ? - "\n" : " (no truth char boxes)\n"; - bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug); - bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug); - } -} - -// "Joins" the blames from bundle1 and bundle2 into *this. -void BlamerBundle::JoinBlames(const BlamerBundle& bundle1, - const BlamerBundle& bundle2, bool debug) { - STRING debug_str; - IncorrectResultReason irr = incorrect_result_reason_; - if (irr != IRR_NO_TRUTH_SPLIT) debug_str = ""; - if (bundle1.incorrect_result_reason_ != IRR_CORRECT && - bundle1.incorrect_result_reason_ != IRR_NO_TRUTH && - bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) { - debug_str += "Blame from part 1: "; - debug_str += bundle1.debug_; - irr = bundle1.incorrect_result_reason_; - } - if (bundle2.incorrect_result_reason_ != IRR_CORRECT && - bundle2.incorrect_result_reason_ != IRR_NO_TRUTH && - bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) { - debug_str += "Blame from part 2: "; - debug_str += bundle2.debug_; - if (irr == IRR_CORRECT) { - irr = bundle2.incorrect_result_reason_; - } else if (irr != bundle2.incorrect_result_reason_) { - irr = IRR_UNKNOWN; - } - } - incorrect_result_reason_ = irr; - if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) { - SetBlame(irr, debug_str, nullptr, debug); - } -} - -// If a blob with the same bounding box as one of the truth character -// bounding boxes is not classified as the corresponding truth character -// blames character classifier for incorrect answer. -void BlamerBundle::BlameClassifier(const UNICHARSET& unicharset, - const TBOX& blob_box, - const BLOB_CHOICE_LIST& choices, - bool debug) { - if (!truth_has_char_boxes_ || - incorrect_result_reason_ != IRR_CORRECT) - return; // Nothing to do here. - - for (int b = 0; b < norm_truth_word_.length(); ++b) { - const TBOX &truth_box = norm_truth_word_.BlobBox(b); - // Note that we are more strict on the bounding box boundaries here - // than in other places (chopper, segmentation search), since we do - // not have the ability to check the previous and next bounding box. - if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_/2)) { - bool found = false; - bool incorrect_adapted = false; - UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID; - const char *truth_str = truth_text_[b].string(); - // We promise not to modify the list or its contents, using a - // const BLOB_CHOICE* below. - BLOB_CHOICE_IT choices_it(const_cast(&choices)); - for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); - choices_it.forward()) { - const BLOB_CHOICE* choice = choices_it.data(); - if (strcmp(truth_str, unicharset.get_normed_unichar( - choice->unichar_id())) == 0) { - found = true; - break; - } else if (choice->IsAdapted()) { - incorrect_adapted = true; - incorrect_adapted_id = choice->unichar_id(); - } - } // end choices_it for loop - if (!found) { - STRING debug_str = "unichar "; - debug_str += truth_str; - debug_str += " not found in classification list"; - SetBlame(IRR_CLASSIFIER, debug_str, nullptr, debug); - } else if (incorrect_adapted) { - STRING debug_str = "better rating for adapted "; - debug_str += unicharset.id_to_unichar(incorrect_adapted_id); - debug_str += " than for correct "; - debug_str += truth_str; - SetBlame(IRR_ADAPTION, debug_str, nullptr, debug); - } - break; - } - } // end iterating over blamer_bundle->norm_truth_word -} - -// Checks whether chops were made at all the character bounding box -// boundaries in word->truth_word. If not - blames the chopper for an -// incorrect answer. -void BlamerBundle::SetChopperBlame(const WERD_RES* word, bool debug) { - if (NoTruth() || !truth_has_char_boxes_ || - word->chopped_word->blobs.empty()) { - return; - } - STRING debug_str; - bool missing_chop = false; - int num_blobs = word->chopped_word->blobs.size(); - int box_index = 0; - int blob_index = 0; - int16_t truth_x = -1; - while (box_index < truth_word_.length() && blob_index < num_blobs) { - truth_x = norm_truth_word_.BlobBox(box_index).right(); - TBLOB * curr_blob = word->chopped_word->blobs[blob_index]; - if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) { - ++blob_index; - continue; // encountered an extra chop, keep looking - } else if (curr_blob->bounding_box().right() > - truth_x + norm_box_tolerance_) { - missing_chop = true; - break; - } else { - ++blob_index; - } - } - if (missing_chop || box_index < norm_truth_word_.length()) { - STRING debug_str; - if (missing_chop) { - debug_str.add_str_int("Detected missing chop (tolerance=", - norm_box_tolerance_); - debug_str += ") at Bounding Box="; - TBLOB * curr_blob = word->chopped_word->blobs[blob_index]; - curr_blob->bounding_box().print_to_str(&debug_str); - debug_str.add_str_int("\nNo chop for truth at x=", truth_x); - } else { - debug_str.add_str_int("Missing chops for last ", - norm_truth_word_.length() - box_index); - debug_str += " truth box(es)"; - } - debug_str += "\nMaximally chopped word boxes:\n"; - for (blob_index = 0; blob_index < num_blobs; ++blob_index) { - TBLOB * curr_blob = word->chopped_word->blobs[blob_index]; - curr_blob->bounding_box().print_to_str(&debug_str); - debug_str += '\n'; - } - debug_str += "Truth bounding boxes:\n"; - for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) { - norm_truth_word_.BlobBox(box_index).print_to_str(&debug_str); - debug_str += '\n'; - } - SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug); - } -} - -// Blames the classifier or the language model if, after running only the -// chopper, best_choice is incorrect and no blame has been yet set. -// Blames the classifier if best_choice is classifier's top choice and is a -// dictionary word (i.e. language model could not have helped). -// Otherwise, blames the language model (formerly permuter word adjustment). -void BlamerBundle::BlameClassifierOrLangModel( - const WERD_RES* word, - const UNICHARSET& unicharset, bool valid_permuter, bool debug) { - if (valid_permuter) { - // Find out whether best choice is a top choice. - best_choice_is_dict_and_top_choice_ = true; - for (int i = 0; i < word->best_choice->length(); ++i) { - BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i)); - ASSERT_HOST(!blob_choice_it.empty()); - BLOB_CHOICE *first_choice = nullptr; - for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list(); - blob_choice_it.forward()) { // find first non-fragment choice - if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) { - first_choice = blob_choice_it.data(); - break; - } - } - ASSERT_HOST(first_choice != nullptr); - if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) { - best_choice_is_dict_and_top_choice_ = false; - break; - } - } - } - STRING debug_str; - if (best_choice_is_dict_and_top_choice_) { - debug_str = "Best choice is: incorrect, top choice, dictionary word"; - debug_str += " with permuter "; - debug_str += word->best_choice->permuter_name(); - } else { - debug_str = "Classifier/Old LM tradeoff is to blame"; - } - SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER - : IRR_CLASS_OLD_LM_TRADEOFF, - debug_str, word->best_choice, debug); -} - -// Sets up the correct_segmentation_* to mark the correct bounding boxes. -void BlamerBundle::SetupCorrectSegmentation(const TWERD* word, bool debug) { - params_training_bundle_.StartHypothesisList(); - if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_) - return; // Nothing to do here. - - STRING debug_str; - debug_str += "Blamer computing correct_segmentation_cols\n"; - int curr_box_col = 0; - int next_box_col = 0; - int num_blobs = word->NumBlobs(); - if (num_blobs == 0) return; // No blobs to play with. - int blob_index = 0; - int16_t next_box_x = word->blobs[blob_index]->bounding_box().right(); - for (int truth_idx = 0; blob_index < num_blobs && - truth_idx < norm_truth_word_.length(); - ++blob_index) { - ++next_box_col; - int16_t curr_box_x = next_box_x; - if (blob_index + 1 < num_blobs) - next_box_x = word->blobs[blob_index + 1]->bounding_box().right(); - int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right(); - debug_str.add_str_int("Box x coord vs. truth: ", curr_box_x); - debug_str.add_str_int(" ", truth_x); - debug_str += "\n"; - if (curr_box_x > (truth_x + norm_box_tolerance_)) { - break; // failed to find a matching box - } else if (curr_box_x >= truth_x - norm_box_tolerance_ && // matched - (blob_index + 1 >= num_blobs || // next box can't be included - next_box_x > truth_x + norm_box_tolerance_)) { - correct_segmentation_cols_.push_back(curr_box_col); - correct_segmentation_rows_.push_back(next_box_col-1); - ++truth_idx; - debug_str.add_str_int("col=", curr_box_col); - debug_str.add_str_int(" row=", next_box_col-1); - debug_str += "\n"; - curr_box_col = next_box_col; - } - } - if (blob_index < num_blobs || // trailing blobs - correct_segmentation_cols_.length() != norm_truth_word_.length()) { - debug_str.add_str_int("Blamer failed to find correct segmentation" - " (tolerance=", norm_box_tolerance_); - if (blob_index >= num_blobs) debug_str += " blob == nullptr"; - debug_str += ")\n"; - debug_str.add_str_int(" path length ", correct_segmentation_cols_.length()); - debug_str.add_str_int(" vs. truth ", norm_truth_word_.length()); - debug_str += "\n"; - SetBlame(IRR_UNKNOWN, debug_str, nullptr, debug); - correct_segmentation_cols_.clear(); - correct_segmentation_rows_.clear(); - } -} - -// Returns true if a guided segmentation search is needed. -bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const { - return incorrect_result_reason_ == IRR_CORRECT && - !segsearch_is_looking_for_blame_ && - truth_has_char_boxes_ && - !ChoiceIsCorrect(best_choice); -} - -// Setup ready to guide the segmentation search to the correct segmentation. -// The callback pp_cb is used to avoid a cyclic dependency. -// It calls into LMPainPoints::GenerateForBlamer by pre-binding the -// WERD_RES, and the LMPainPoints itself. -// pp_cb must be a permanent callback, and should be deleted by the caller. -void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice, - MATRIX* ratings, UNICHAR_ID wildcard_id, - bool debug, STRING *debug_str, - TessResultCallback2* cb) { - segsearch_is_looking_for_blame_ = true; - if (debug) { - tprintf("segsearch starting to look for blame\n"); - } - // Fill pain points for any unclassifed blob corresponding to the - // correct segmentation state. - *debug_str += "Correct segmentation:\n"; - for (int idx = 0; idx < correct_segmentation_cols_.length(); ++idx) { - debug_str->add_str_int("col=", correct_segmentation_cols_[idx]); - debug_str->add_str_int(" row=", correct_segmentation_rows_[idx]); - *debug_str += "\n"; - if (!ratings->Classified(correct_segmentation_cols_[idx], - correct_segmentation_rows_[idx], - wildcard_id) && - !cb->Run(correct_segmentation_cols_[idx], - correct_segmentation_rows_[idx])) { - segsearch_is_looking_for_blame_ = false; - *debug_str += "\nFailed to insert pain point\n"; - SetBlame(IRR_SEGSEARCH_HEUR, *debug_str, best_choice, debug); - break; - } - } // end for blamer_bundle->correct_segmentation_cols/rows -} -// Returns true if the guided segsearch is in progress. -bool BlamerBundle::GuidedSegsearchStillGoing() const { - return segsearch_is_looking_for_blame_; -} - -// The segmentation search has ended. Sets the blame appropriately. -void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, - bool debug, STRING *debug_str) { - // If we are still looking for blame (i.e. best_choice is incorrect, but a - // path representing the correct segmentation could be constructed), we can - // blame segmentation search pain point prioritization if the rating of the - // path corresponding to the correct segmentation is better than that of - // best_choice (i.e. language model would have done the correct thing, but - // because of poor pain point prioritization the correct segmentation was - // never explored). Otherwise we blame the tradeoff between the language model - // and the classifier, since even after exploring the path corresponding to - // the correct segmentation incorrect best_choice would have been chosen. - // One special case when we blame the classifier instead is when best choice - // is incorrect, but it is a dictionary word and it classifier's top choice. - if (segsearch_is_looking_for_blame_) { - segsearch_is_looking_for_blame_ = false; - if (best_choice_is_dict_and_top_choice_) { - *debug_str = "Best choice is: incorrect, top choice, dictionary word"; - *debug_str += " with permuter "; - *debug_str += best_choice->permuter_name(); - SetBlame(IRR_CLASSIFIER, *debug_str, best_choice, debug); - } else if (best_correctly_segmented_rating_ < - best_choice->rating()) { - *debug_str += "Correct segmentation state was not explored"; - SetBlame(IRR_SEGSEARCH_PP, *debug_str, best_choice, debug); - } else { - if (best_correctly_segmented_rating_ >= - WERD_CHOICE::kBadRating) { - *debug_str += "Correct segmentation paths were pruned by LM\n"; - } else { - debug_str->add_str_double("Best correct segmentation rating ", - best_correctly_segmented_rating_); - debug_str->add_str_double(" vs. best choice rating ", - best_choice->rating()); - } - SetBlame(IRR_CLASS_LM_TRADEOFF, *debug_str, best_choice, debug); - } - } -} - -// If the bundle is null or still does not indicate the correct result, -// fix it and use some backup reason for the blame. -void BlamerBundle::LastChanceBlame(bool debug, WERD_RES* word) { - if (word->blamer_bundle == nullptr) { - word->blamer_bundle = new BlamerBundle(); - word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame", - word->best_choice, debug); - } else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) { - word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth", - word->best_choice, debug); - } else { - bool correct = word->blamer_bundle->ChoiceIsCorrect(word->best_choice); - IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_; - if (irr == IRR_CORRECT && !correct) { - STRING debug_str = "Choice is incorrect after recognition"; - word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice, - debug); - } else if (irr != IRR_CORRECT && correct) { - if (debug) { - tprintf("Corrected %s\n", word->blamer_bundle->debug_.string()); - } - word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT; - word->blamer_bundle->debug_ = ""; - } - } -} - -// Sets the misadaption debug if this word is incorrect, as this word is -// being adapted to. -void BlamerBundle::SetMisAdaptionDebug(const WERD_CHOICE *best_choice, - bool debug) { - if (incorrect_result_reason_ != IRR_NO_TRUTH && - !ChoiceIsCorrect(best_choice)) { - misadaption_debug_ ="misadapt to word ("; - misadaption_debug_ += best_choice->permuter_name(); - misadaption_debug_ += "): "; - FillDebugString("", best_choice, &misadaption_debug_); - if (debug) { - tprintf("%s\n", misadaption_debug_.string()); - } - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blamer.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blamer.h deleted file mode 100644 index b1b325b4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blamer.h +++ /dev/null @@ -1,344 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: blamer.h -// Description: Module allowing precise error causes to be allocated. -// Author: Rike Antonova -// Refactored: Ray Smith -// Created: Mon Feb 04 14:37:01 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCSTRUCT_BLAMER_H_ -#define TESSERACT_CCSTRUCT_BLAMER_H_ - -#include // for int16_t -#include // for memcpy -#include "boxword.h" // for BoxWord -#include "genericvector.h" // for GenericVector -#include "params_training_featdef.h" // for ParamsTrainingBundle, ParamsTra... -#include "ratngs.h" // for BLOB_CHOICE_LIST (ptr only) -#include "rect.h" // for TBOX -#include "strngs.h" // for STRING -#include "tprintf.h" // for tprintf -#include "unichar.h" // for UNICHAR_ID - -class DENORM; -class MATRIX; -class UNICHARSET; -class WERD_RES; - -struct MATRIX_COORD; -struct TWERD; - -template class TessResultCallback2; - -static const int16_t kBlamerBoxTolerance = 5; - -// Enum for expressing the source of error. -// Note: Please update kIncorrectResultReasonNames when modifying this enum. -enum IncorrectResultReason { - // The text recorded in best choice == truth text - IRR_CORRECT, - // Either: Top choice is incorrect and is a dictionary word (language model - // is unlikely to help correct such errors, so blame the classifier). - // Or: the correct unichar was not included in shortlist produced by the - // classifier at all. - IRR_CLASSIFIER, - // Chopper have not found one or more splits that correspond to the correct - // character bounding boxes recorded in BlamerBundle::truth_word. - IRR_CHOPPER, - // Classifier did include correct unichars for each blob in the correct - // segmentation, however its rating could have been too bad to allow the - // language model to pull out the correct choice. On the other hand the - // strength of the language model might have been too weak to favor the - // correct answer, this we call this case a classifier-language model - // tradeoff error. - IRR_CLASS_LM_TRADEOFF, - // Page layout failed to produce the correct bounding box. Blame page layout - // if the truth was not found for the word, which implies that the bounding - // box of the word was incorrect (no truth word had a similar bounding box). - IRR_PAGE_LAYOUT, - // SegSearch heuristic prevented one or more blobs from the correct - // segmentation state to be classified (e.g. the blob was too wide). - IRR_SEGSEARCH_HEUR, - // The correct segmentaiton state was not explored because of poor SegSearch - // pain point prioritization. We blame SegSearch pain point prioritization - // if the best rating of a choice constructed from correct segmentation is - // better than that of the best choice (i.e. if we got to explore the correct - // segmentation state, language model would have picked the correct choice). - IRR_SEGSEARCH_PP, - // Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word, - // and thus use the old language model (permuters). - // TODO(antonova): integrate the new language mode with chopper - IRR_CLASS_OLD_LM_TRADEOFF, - // If there is an incorrect adaptive template match with a better score than - // a correct one (either pre-trained or adapted), mark this as adaption error. - IRR_ADAPTION, - // split_and_recog_word() failed to find a suitable split in truth. - IRR_NO_TRUTH_SPLIT, - // Truth is not available for this word (e.g. when words in corrected content - // file are turned into ~~~~ because an appropriate alignment was not found. - IRR_NO_TRUTH, - // The text recorded in best choice != truth text, but none of the above - // reasons are set. - IRR_UNKNOWN, - - IRR_NUM_REASONS -}; - -// Blamer-related information to determine the source of errors. -struct BlamerBundle { - static const char *IncorrectReasonName(IncorrectResultReason irr); - BlamerBundle() : truth_has_char_boxes_(false), - incorrect_result_reason_(IRR_CORRECT), - lattice_data_(nullptr) { ClearResults(); } - BlamerBundle(const BlamerBundle &other) { - this->CopyTruth(other); - this->CopyResults(other); - } - ~BlamerBundle() { delete[] lattice_data_; } - - // Accessors. - STRING TruthString() const { - STRING truth_str; - for (int i = 0; i < truth_text_.length(); ++i) - truth_str += truth_text_[i]; - return truth_str; - } - IncorrectResultReason incorrect_result_reason() const { - return incorrect_result_reason_; - } - bool NoTruth() const { - return incorrect_result_reason_ == IRR_NO_TRUTH || - incorrect_result_reason_ == IRR_PAGE_LAYOUT; - } - bool HasDebugInfo() const { - return debug_.length() > 0 || misadaption_debug_.length() > 0; - } - const STRING& debug() const { - return debug_; - } - const STRING& misadaption_debug() const { - return misadaption_debug_; - } - void UpdateBestRating(float rating) { - if (rating < best_correctly_segmented_rating_) - best_correctly_segmented_rating_ = rating; - } - int correct_segmentation_length() const { - return correct_segmentation_cols_.length(); - } - // Returns true if the given ratings matrix col,row position is included - // in the correct segmentation path at the given index. - bool MatrixPositionCorrect(int index, const MATRIX_COORD& coord) { - return correct_segmentation_cols_[index] == coord.col && - correct_segmentation_rows_[index] == coord.row; - } - void set_best_choice_is_dict_and_top_choice(bool value) { - best_choice_is_dict_and_top_choice_ = value; - } - const char* lattice_data() const { - return lattice_data_; - } - int lattice_size() const { - return lattice_size_; // size of lattice_data in bytes - } - void set_lattice_data(const char* data, int size) { - lattice_size_ = size; - delete [] lattice_data_; - lattice_data_ = new char[lattice_size_]; - memcpy(lattice_data_, data, lattice_size_); - } - const tesseract::ParamsTrainingBundle& params_training_bundle() const { - return params_training_bundle_; - } - // Adds a new ParamsTrainingHypothesis to the current hypothesis list. - void AddHypothesis(const tesseract::ParamsTrainingHypothesis& hypo) { - params_training_bundle_.AddHypothesis(hypo); - } - - // Functions to setup the blamer. - // Whole word string, whole word bounding box. - void SetWordTruth(const UNICHARSET& unicharset, - const char* truth_str, const TBOX& word_box); - // Single "character" string, "character" bounding box. - // May be called multiple times to indicate the characters in a word. - void SetSymbolTruth(const UNICHARSET& unicharset, - const char* char_str, const TBOX& char_box); - // Marks that there is something wrong with the truth text, like it contains - // reject characters. - void SetRejectedTruth(); - - // Returns true if the provided word_choice is correct. - bool ChoiceIsCorrect(const WERD_CHOICE* word_choice) const; - - void ClearResults() { - norm_truth_word_.DeleteAllBoxes(); - norm_box_tolerance_ = 0; - if (!NoTruth()) incorrect_result_reason_ = IRR_CORRECT; - debug_ = ""; - segsearch_is_looking_for_blame_ = false; - best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating; - correct_segmentation_cols_.clear(); - correct_segmentation_rows_.clear(); - best_choice_is_dict_and_top_choice_ = false; - delete[] lattice_data_; - lattice_data_ = nullptr; - lattice_size_ = 0; - } - void CopyTruth(const BlamerBundle &other) { - truth_has_char_boxes_ = other.truth_has_char_boxes_; - truth_word_ = other.truth_word_; - truth_text_ = other.truth_text_; - incorrect_result_reason_ = - (other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT); - } - void CopyResults(const BlamerBundle &other) { - norm_truth_word_ = other.norm_truth_word_; - norm_box_tolerance_ = other.norm_box_tolerance_; - incorrect_result_reason_ = other.incorrect_result_reason_; - segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_; - best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_; - correct_segmentation_cols_ = other.correct_segmentation_cols_; - correct_segmentation_rows_ = other.correct_segmentation_rows_; - best_choice_is_dict_and_top_choice_ = - other.best_choice_is_dict_and_top_choice_; - if (other.lattice_data_ != nullptr) { - lattice_data_ = new char[other.lattice_size_]; - memcpy(lattice_data_, other.lattice_data_, other.lattice_size_); - lattice_size_ = other.lattice_size_; - } else { - lattice_data_ = nullptr; - } - } - const char *IncorrectReason() const; - - // Appends choice and truth details to the given debug string. - void FillDebugString(const STRING &msg, const WERD_CHOICE *choice, - STRING *debug); - - // Sets up the norm_truth_word from truth_word using the given DENORM. - void SetupNormTruthWord(const DENORM& denorm); - - // Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty - // bundles) where the right edge/ of the left-hand word is word1_right, - // and the left edge of the right-hand word is word2_left. - void SplitBundle(int word1_right, int word2_left, bool debug, - BlamerBundle* bundle1, BlamerBundle* bundle2) const; - // "Joins" the blames from bundle1 and bundle2 into *this. - void JoinBlames(const BlamerBundle& bundle1, const BlamerBundle& bundle2, - bool debug); - - // If a blob with the same bounding box as one of the truth character - // bounding boxes is not classified as the corresponding truth character - // blames character classifier for incorrect answer. - void BlameClassifier(const UNICHARSET& unicharset, - const TBOX& blob_box, - const BLOB_CHOICE_LIST& choices, - bool debug); - - - // Checks whether chops were made at all the character bounding box - // boundaries in word->truth_word. If not - blames the chopper for an - // incorrect answer. - void SetChopperBlame(const WERD_RES* word, bool debug); - // Blames the classifier or the language model if, after running only the - // chopper, best_choice is incorrect and no blame has been yet set. - // Blames the classifier if best_choice is classifier's top choice and is a - // dictionary word (i.e. language model could not have helped). - // Otherwise, blames the language model (formerly permuter word adjustment). - void BlameClassifierOrLangModel( - const WERD_RES* word, - const UNICHARSET& unicharset, bool valid_permuter, bool debug); - // Sets up the correct_segmentation_* to mark the correct bounding boxes. - void SetupCorrectSegmentation(const TWERD* word, bool debug); - - // Returns true if a guided segmentation search is needed. - bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const; - // Setup ready to guide the segmentation search to the correct segmentation. - // The callback pp_cb is used to avoid a cyclic dependency. - // It calls into LMPainPoints::GenerateForBlamer by pre-binding the - // WERD_RES, and the LMPainPoints itself. - // pp_cb must be a permanent callback, and should be deleted by the caller. - void InitForSegSearch(const WERD_CHOICE *best_choice, - MATRIX* ratings, UNICHAR_ID wildcard_id, - bool debug, STRING *debug_str, - TessResultCallback2* pp_cb); - // Returns true if the guided segsearch is in progress. - bool GuidedSegsearchStillGoing() const; - // The segmentation search has ended. Sets the blame appropriately. - void FinishSegSearch(const WERD_CHOICE *best_choice, - bool debug, STRING *debug_str); - - // If the bundle is null or still does not indicate the correct result, - // fix it and use some backup reason for the blame. - static void LastChanceBlame(bool debug, WERD_RES* word); - - // Sets the misadaption debug if this word is incorrect, as this word is - // being adapted to. - void SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug); - - private: - // Copy assignment operator (currently unused, therefore private). - BlamerBundle& operator=(const BlamerBundle& other); - void SetBlame(IncorrectResultReason irr, const STRING &msg, - const WERD_CHOICE *choice, bool debug) { - incorrect_result_reason_ = irr; - debug_ = IncorrectReason(); - debug_ += " to blame: "; - FillDebugString(msg, choice, &debug_); - if (debug) tprintf("SetBlame(): %s", debug_.string()); - } - - private: - // Set to true when bounding boxes for individual unichars are recorded. - bool truth_has_char_boxes_; - // The true_word (in the original image coordinate space) contains ground - // truth bounding boxes for this WERD_RES. - tesseract::BoxWord truth_word_; - // Same as above, but in normalized coordinates - // (filled in by WERD_RES::SetupForRecognition()). - tesseract::BoxWord norm_truth_word_; - // Tolerance for bounding box comparisons in normalized space. - int norm_box_tolerance_; - // Contains ground truth unichar for each of the bounding boxes in truth_word. - GenericVector truth_text_; - // The reason for incorrect OCR result. - IncorrectResultReason incorrect_result_reason_; - // Debug text associated with the blame. - STRING debug_; - // Misadaption debug information (filled in if this word was misadapted to). - STRING misadaption_debug_; - // Variables used by the segmentation search when looking for the blame. - // Set to true while segmentation search is continued after the usual - // termination condition in order to look for the blame. - bool segsearch_is_looking_for_blame_; - // Best rating for correctly segmented path - // (set and used by SegSearch when looking for blame). - float best_correctly_segmented_rating_; - // Vectors populated by SegSearch to indicate column and row indices that - // correspond to blobs with correct bounding boxes. - GenericVector correct_segmentation_cols_; - GenericVector correct_segmentation_rows_; - // Set to true if best choice is a dictionary word and - // classifier's top choice. - bool best_choice_is_dict_and_top_choice_; - // Serialized segmentation search lattice. - char *lattice_data_; - int lattice_size_; // size of lattice_data in bytes - // Information about hypotheses (paths) explored by the segmentation search. - tesseract::ParamsTrainingBundle params_training_bundle_; -}; - - -#endif // TESSERACT_CCSTRUCT_BLAMER_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blobbox.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blobbox.cpp deleted file mode 100644 index c3e410e7..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blobbox.cpp +++ /dev/null @@ -1,1096 +0,0 @@ -/********************************************************************** - * File: blobbox.cpp (Formerly blobnbox.c) - * Description: Code for the textord blob class. - * Author: Ray Smith - * Created: Thu Jul 30 09:08:51 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "blobbox.h" -#include // for max, min -#include // for INT32_MAX, INT16_MAX -#include "allheaders.h" // for pixGetHeight, pixGetPixel -#include "blobs.h" // for TPOINT -#include "coutln.h" // for C_OUTLINE_IT, C_OUTLINE, C_OUTLINE_LIST -#include "environ.h" // for l_uint32 -#include "helpers.h" // for UpdateRange, IntCastRounded -#include "host.h" // for NearlyEqual, TRUE -#include "points.h" // for operator+=, ICOORD::rotate - -struct Pix; - -#define PROJECTION_MARGIN 10 //arbitrary - -ELISTIZE(BLOBNBOX) -ELIST2IZE(TO_ROW) -ELISTIZE(TO_BLOCK) - -// Up to 30 degrees is allowed for rotations of diacritic blobs. -const double kCosSmallAngle = 0.866; -// Min aspect ratio for a joined word to indicate an obvious flow direction. -const double kDefiniteAspectRatio = 2.0; -// Multiple of short length in perimeter to make a joined word. -const double kComplexShapePerimeterRatio = 1.5; -// Min multiple of linesize for medium-sized blobs in ReFilterBlobs. -const double kMinMediumSizeRatio = 0.25; -// Max multiple of linesize for medium-sized blobs in ReFilterBlobs. -const double kMaxMediumSizeRatio = 4.0; - -// Rotates the box and the underlying blob. -void BLOBNBOX::rotate(FCOORD rotation) { - cblob_ptr->rotate(rotation); - rotate_box(rotation); - compute_bounding_box(); -} - -// Reflect the box in the y-axis, leaving the underlying blob untouched. -void BLOBNBOX::reflect_box_in_y_axis() { - int left = -box.right(); - box.set_right(-box.left()); - box.set_left(left); -} - -// Rotates the box by the angle given by rotation. -// If the blob is a diacritic, then only small rotations for skew -// correction can be applied. -void BLOBNBOX::rotate_box(FCOORD rotation) { - if (IsDiacritic()) { - ASSERT_HOST(rotation.x() >= kCosSmallAngle) - ICOORD top_pt((box.left() + box.right()) / 2, base_char_top_); - ICOORD bottom_pt(top_pt.x(), base_char_bottom_); - top_pt.rotate(rotation); - base_char_top_ = top_pt.y(); - bottom_pt.rotate(rotation); - base_char_bottom_ = bottom_pt.y(); - box.rotate(rotation); - } else { - box.rotate(rotation); - set_diacritic_box(box); - } -} - -/********************************************************************** - * BLOBNBOX::merge - * - * Merge this blob with the given blob, which should be after this. - **********************************************************************/ -void BLOBNBOX::merge( //merge blobs - BLOBNBOX *nextblob //blob to join with - ) { - box += nextblob->box; //merge boxes - set_diacritic_box(box); - nextblob->joined = TRUE; -} - - -// Merge this with other, taking the outlines from other. -// Other is not deleted, but left for the caller to handle. -void BLOBNBOX::really_merge(BLOBNBOX* other) { - if (other->cblob_ptr != nullptr) { - C_OUTLINE_IT ol_it(cblob_ptr->out_list()); - ol_it.add_list_after(other->cblob_ptr->out_list()); - } - compute_bounding_box(); -} - - -/********************************************************************** - * BLOBNBOX::chop - * - * Chop this blob into equal sized pieces using the x height as a guide. - * The blob is not actually chopped. Instead, fake blobs are inserted - * with the relevant bounding boxes. - **********************************************************************/ - -void BLOBNBOX::chop( //chop blobs - BLOBNBOX_IT *start_it, //location of this - BLOBNBOX_IT *end_it, //iterator - FCOORD rotation, //for landscape - float xheight //of line - ) { - int16_t blobcount; //no of blobs - BLOBNBOX *newblob; //fake blob - BLOBNBOX *blob; //current blob - int16_t blobindex; //number of chop - int16_t leftx; //left edge of blob - float blobwidth; //width of each - float rightx; //right edge to scan - float ymin, ymax; //limits of new blob - float test_ymin, test_ymax; //limits of part blob - ICOORD bl, tr; //corners of box - BLOBNBOX_IT blob_it; //blob iterator - - //get no of chops - blobcount = (int16_t) floor (box.width () / xheight); - if (blobcount > 1 && cblob_ptr != nullptr) { - //width of each - blobwidth = (float) (box.width () + 1) / blobcount; - for (blobindex = blobcount - 1, rightx = box.right (); - blobindex >= 0; blobindex--, rightx -= blobwidth) { - ymin = (float) INT32_MAX; - ymax = (float) -INT32_MAX; - blob_it = *start_it; - do { - blob = blob_it.data (); - find_cblob_vlimits(blob->cblob_ptr, rightx - blobwidth, - rightx, - /*rotation, */ test_ymin, test_ymax); - blob_it.forward (); - UpdateRange(test_ymin, test_ymax, &ymin, &ymax); - } - while (blob != end_it->data ()); - if (ymin < ymax) { - leftx = (int16_t) floor (rightx - blobwidth); - if (leftx < box.left ()) - leftx = box.left (); //clip to real box - bl = ICOORD (leftx, (int16_t) floor (ymin)); - tr = ICOORD ((int16_t) ceil (rightx), (int16_t) ceil (ymax)); - if (blobindex == 0) - box = TBOX (bl, tr); //change box - else { - newblob = new BLOBNBOX; - //box is all it has - newblob->box = TBOX (bl, tr); - //stay on current - newblob->base_char_top_ = tr.y(); - newblob->base_char_bottom_ = bl.y(); - end_it->add_after_stay_put (newblob); - } - } - } - } -} - -// Returns the box gaps between this and its neighbours_ in an array -// indexed by BlobNeighbourDir. -void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const { - for (int dir = 0; dir < BND_COUNT; ++dir) { - gaps[dir] = INT16_MAX; - BLOBNBOX* neighbour = neighbours_[dir]; - if (neighbour != nullptr) { - const TBOX& n_box = neighbour->bounding_box(); - if (dir == BND_LEFT || dir == BND_RIGHT) { - gaps[dir] = box.x_gap(n_box); - } else { - gaps[dir] = box.y_gap(n_box); - } - } - } -} -// Returns the min and max horizontal and vertical gaps (from NeighbourGaps) -// modified so that if the max exceeds the max dimension of the blob, and -// the min is less, the max is replaced with the min. -// The objective is to catch cases where there is only a single neighbour -// and avoid reporting the other gap as a ridiculously large number -void BLOBNBOX::MinMaxGapsClipped(int* h_min, int* h_max, - int* v_min, int* v_max) const { - int max_dimension = std::max(box.width(), box.height()); - int gaps[BND_COUNT]; - NeighbourGaps(gaps); - *h_min = std::min(gaps[BND_LEFT], gaps[BND_RIGHT]); - *h_max = std::max(gaps[BND_LEFT], gaps[BND_RIGHT]); - if (*h_max > max_dimension && *h_min < max_dimension) *h_max = *h_min; - *v_min = std::min(gaps[BND_ABOVE], gaps[BND_BELOW]); - *v_max = std::max(gaps[BND_ABOVE], gaps[BND_BELOW]); - if (*v_max > max_dimension && *v_min < max_dimension) *v_max = *v_min; -} - -// Nulls out any neighbours that are DeletableNoise to remove references. -void BLOBNBOX::CleanNeighbours() { - for (int dir = 0; dir < BND_COUNT; ++dir) { - BLOBNBOX* neighbour = neighbours_[dir]; - if (neighbour != nullptr && neighbour->DeletableNoise()) { - neighbours_[dir] = nullptr; - good_stroke_neighbours_[dir] = false; - } - } -} - -// Returns positive if there is at least one side neighbour that has a similar -// stroke width and is not on the other side of a rule line. -int BLOBNBOX::GoodTextBlob() const { - int score = 0; - for (int dir = 0; dir < BND_COUNT; ++dir) { - BlobNeighbourDir bnd = static_cast(dir); - if (good_stroke_neighbour(bnd)) - ++score; - } - return score; -} - -// Returns the number of side neighbours that are of type BRT_NOISE. -int BLOBNBOX::NoisyNeighbours() const { - int count = 0; - for (int dir = 0; dir < BND_COUNT; ++dir) { - BlobNeighbourDir bnd = static_cast(dir); - BLOBNBOX* blob = neighbour(bnd); - if (blob != nullptr && blob->region_type() == BRT_NOISE) - ++count; - } - return count; -} - -// Returns true, and sets vert_possible/horz_possible if the blob has some -// feature that makes it individually appear to flow one way. -// eg if it has a high aspect ratio, yet has a complex shape, such as a -// joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1 etc. -bool BLOBNBOX::DefiniteIndividualFlow() { - if (cblob() == nullptr) return false; - int box_perimeter = 2 * (box.height() + box.width()); - if (box.width() > box.height() * kDefiniteAspectRatio) { - // Attempt to distinguish a wide joined word from a dash. - // If it is a dash, then its perimeter is approximately - // 2 * (box width + stroke width), but more if the outline is noisy, - // so perimeter - 2*(box width + stroke width) should be close to zero. - // A complex shape such as a joined word should have a much larger value. - int perimeter = cblob()->perimeter(); - if (vert_stroke_width() > 0 || perimeter <= 0) - perimeter -= 2 * vert_stroke_width(); - else - perimeter -= 4 * cblob()->area() / perimeter; - perimeter -= 2 * box.width(); - // Use a multiple of the box perimeter as a threshold. - if (perimeter > kComplexShapePerimeterRatio * box_perimeter) { - set_vert_possible(false); - set_horz_possible(true); - return true; - } - } - if (box.height() > box.width() * kDefiniteAspectRatio) { - // As above, but for a putative vertical word vs a I/1/l. - int perimeter = cblob()->perimeter(); - if (horz_stroke_width() > 0 || perimeter <= 0) - perimeter -= 2 * horz_stroke_width(); - else - perimeter -= 4 * cblob()->area() / perimeter; - perimeter -= 2 * box.height(); - if (perimeter > kComplexShapePerimeterRatio * box_perimeter) { - set_vert_possible(true); - set_horz_possible(false); - return true; - } - } - return false; -} - -// Returns true if there is no tabstop violation in merging this and other. -bool BLOBNBOX::ConfirmNoTabViolation(const BLOBNBOX& other) const { - if (box.left() < other.box.left() && box.left() < other.left_rule_) - return false; - if (other.box.left() < box.left() && other.box.left() < left_rule_) - return false; - if (box.right() > other.box.right() && box.right() > other.right_rule_) - return false; - if (other.box.right() > box.right() && other.box.right() > right_rule_) - return false; - return true; -} - -// Returns true if other has a similar stroke width to this. -bool BLOBNBOX::MatchingStrokeWidth(const BLOBNBOX& other, - double fractional_tolerance, - double constant_tolerance) const { - // The perimeter-based width is used as a backup in case there is - // no information in the blob. - double p_width = area_stroke_width(); - double n_p_width = other.area_stroke_width(); - float h_tolerance = horz_stroke_width_ * fractional_tolerance - + constant_tolerance; - float v_tolerance = vert_stroke_width_ * fractional_tolerance - + constant_tolerance; - double p_tolerance = p_width * fractional_tolerance - + constant_tolerance; - bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f; - bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f; - bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_, - other.horz_stroke_width_, h_tolerance); - bool v_ok = !v_zero && NearlyEqual(vert_stroke_width_, - other.vert_stroke_width_, v_tolerance); - bool p_ok = h_zero && v_zero && NearlyEqual(p_width, n_p_width, p_tolerance); - // For a match, at least one of the horizontal and vertical widths - // must match, and the other one must either match or be zero. - // Only if both are zero will we look at the perimeter metric. - return p_ok || ((v_ok || h_ok) && (h_ok || h_zero) && (v_ok || v_zero)); -} - -// Returns a bounding box of the outline contained within the -// given horizontal range. -TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) { - FCOORD no_rotation(1.0f, 0.0f); - float top = box.top(); - float bottom = box.bottom(); - if (cblob_ptr != nullptr) { - find_cblob_limits(cblob_ptr, static_cast(left), - static_cast(right), no_rotation, - bottom, top); - } - - if (top < bottom) { - top = box.top(); - bottom = box.bottom(); - } - FCOORD bot_left(left, bottom); - FCOORD top_right(right, top); - TBOX shrunken_box(bot_left); - TBOX shrunken_box2(top_right); - shrunken_box += shrunken_box2; - return shrunken_box; -} - -// Estimates and stores the baseline position based on the shape of the -// outline. -void BLOBNBOX::EstimateBaselinePosition() { - baseline_y_ = box.bottom(); // The default. - if (cblob_ptr == nullptr) return; - baseline_y_ = cblob_ptr->EstimateBaselinePosition(); -} - -// Helper to call CleanNeighbours on all blobs on the list. -void BLOBNBOX::CleanNeighbours(BLOBNBOX_LIST* blobs) { - BLOBNBOX_IT blob_it(blobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - blob_it.data()->CleanNeighbours(); - } -} - -// Helper to delete all the deletable blobs on the list. -void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST* blobs) { - BLOBNBOX_IT blob_it(blobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - if (blob->DeletableNoise()) { - delete blob->cblob(); - delete blob_it.extract(); - } - } -} - -// Helper to compute edge offsets for all the blobs on the list. -// See coutln.h for an explanation of edge offsets. -void BLOBNBOX::ComputeEdgeOffsets(Pix* thresholds, Pix* grey, - BLOBNBOX_LIST* blobs) { - int grey_height = 0; - int thr_height = 0; - int scale_factor = 1; - if (thresholds != nullptr && grey != nullptr) { - grey_height = pixGetHeight(grey); - thr_height = pixGetHeight(thresholds); - scale_factor = - IntCastRounded(static_cast(grey_height) / thr_height); - } - BLOBNBOX_IT blob_it(blobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - if (blob->cblob() != nullptr) { - // Get the threshold that applies to this blob. - l_uint32 threshold = 128; - if (thresholds != nullptr && grey != nullptr) { - const TBOX& box = blob->cblob()->bounding_box(); - // Transform the coordinates if required. - TPOINT pt((box.left() + box.right()) / 2, - (box.top() + box.bottom()) / 2); - pixGetPixel(thresholds, pt.x / scale_factor, - thr_height - 1 - pt.y / scale_factor, &threshold); - } - blob->cblob()->ComputeEdgeOffsets(threshold, grey); - } - } -} - - -#ifndef GRAPHICS_DISABLED -// Helper to draw all the blobs on the list in the given body_colour, -// with child outlines in the child_colour. -void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST* list, - ScrollView::Color body_colour, - ScrollView::Color child_colour, - ScrollView* win) { - BLOBNBOX_IT it(list); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - it.data()->plot(win, body_colour, child_colour); - } -} - -// Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the -// given list in the given body_colour, with child outlines in the -// child_colour. -void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST* list, - ScrollView::Color body_colour, - ScrollView::Color child_colour, - ScrollView* win) { - BLOBNBOX_IT it(list); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - if (blob->DeletableNoise()) - blob->plot(win, body_colour, child_colour); - } -} - -ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type, - BlobTextFlowType flow_type) { - switch (region_type) { - case BRT_HLINE: - return ScrollView::BROWN; - case BRT_VLINE: - return ScrollView::DARK_GREEN; - case BRT_RECTIMAGE: - return ScrollView::RED; - case BRT_POLYIMAGE: - return ScrollView::ORANGE; - case BRT_UNKNOWN: - return flow_type == BTFT_NONTEXT ? ScrollView::CYAN : ScrollView::WHITE; - case BRT_VERT_TEXT: - if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE) - return ScrollView::GREEN; - if (flow_type == BTFT_CHAIN) - return ScrollView::LIME_GREEN; - return ScrollView::YELLOW; - case BRT_TEXT: - if (flow_type == BTFT_STRONG_CHAIN) - return ScrollView::BLUE; - if (flow_type == BTFT_TEXT_ON_IMAGE) - return ScrollView::LIGHT_BLUE; - if (flow_type == BTFT_CHAIN) - return ScrollView::MEDIUM_BLUE; - if (flow_type == BTFT_LEADER) - return ScrollView::WHEAT; - if (flow_type == BTFT_NONTEXT) - return ScrollView::PINK; - return ScrollView::MAGENTA; - default: - return ScrollView::GREY; - } -} - -// Keep in sync with BlobRegionType. -ScrollView::Color BLOBNBOX::BoxColor() const { - return TextlineColor(region_type_, flow_); -} - -void BLOBNBOX::plot(ScrollView* window, // window to draw in - ScrollView::Color blob_colour, // for outer bits - ScrollView::Color child_colour) { // for holes - if (cblob_ptr != nullptr) - cblob_ptr->plot(window, blob_colour, child_colour); -} -#endif -/********************************************************************** - * find_cblob_limits - * - * Scan the outlines of the cblob to locate the y min and max - * between the given x limits. - **********************************************************************/ - -void find_cblob_limits( //get y limits - C_BLOB *blob, //blob to search - float leftx, //x limits - float rightx, - FCOORD rotation, //for landscape - float &ymin, //output y limits - float &ymax) { - int16_t stepindex; //current point - ICOORD pos; //current coords - ICOORD vec; //rotated step - C_OUTLINE *outline; //current outline - //outlines - C_OUTLINE_IT out_it = blob->out_list (); - - ymin = (float) INT32_MAX; - ymax = (float) -INT32_MAX; - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - outline = out_it.data (); - pos = outline->start_pos (); //get coords - pos.rotate (rotation); - for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { - //inside - if (pos.x () >= leftx && pos.x () <= rightx) { - UpdateRange(pos.y(), &ymin, &ymax); - } - vec = outline->step (stepindex); - vec.rotate (rotation); - pos += vec; //move to next - } - } -} - - -/********************************************************************** - * find_cblob_vlimits - * - * Scan the outlines of the cblob to locate the y min and max - * between the given x limits. - **********************************************************************/ - -void find_cblob_vlimits( //get y limits - C_BLOB *blob, //blob to search - float leftx, //x limits - float rightx, - float &ymin, //output y limits - float &ymax) { - int16_t stepindex; //current point - ICOORD pos; //current coords - ICOORD vec; //rotated step - C_OUTLINE *outline; //current outline - //outlines - C_OUTLINE_IT out_it = blob->out_list (); - - ymin = (float) INT32_MAX; - ymax = (float) -INT32_MAX; - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - outline = out_it.data (); - pos = outline->start_pos (); //get coords - for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { - //inside - if (pos.x () >= leftx && pos.x () <= rightx) { - UpdateRange(pos.y(), &ymin, &ymax); - } - vec = outline->step (stepindex); - pos += vec; //move to next - } - } -} - - -/********************************************************************** - * find_cblob_hlimits - * - * Scan the outlines of the cblob to locate the x min and max - * between the given y limits. - **********************************************************************/ - -void find_cblob_hlimits( //get x limits - C_BLOB *blob, //blob to search - float bottomy, //y limits - float topy, - float &xmin, //output x limits - float &xmax) { - int16_t stepindex; //current point - ICOORD pos; //current coords - ICOORD vec; //rotated step - C_OUTLINE *outline; //current outline - //outlines - C_OUTLINE_IT out_it = blob->out_list (); - - xmin = (float) INT32_MAX; - xmax = (float) -INT32_MAX; - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - outline = out_it.data (); - pos = outline->start_pos (); //get coords - for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { - //inside - if (pos.y () >= bottomy && pos.y () <= topy) { - UpdateRange(pos.x(), &xmin, &xmax); - } - vec = outline->step (stepindex); - pos += vec; //move to next - } - } -} - -/********************************************************************** - * crotate_cblob - * - * Rotate the copy by the given vector and return a C_BLOB. - **********************************************************************/ - -C_BLOB *crotate_cblob( //rotate it - C_BLOB *blob, //blob to search - FCOORD rotation //for landscape - ) { - C_OUTLINE_LIST out_list; //output outlines - //input outlines - C_OUTLINE_IT in_it = blob->out_list (); - //output outlines - C_OUTLINE_IT out_it = &out_list; - - for (in_it.mark_cycle_pt (); !in_it.cycled_list (); in_it.forward ()) { - out_it.add_after_then_move (new C_OUTLINE (in_it.data (), rotation)); - } - return new C_BLOB (&out_list); -} - - -/********************************************************************** - * box_next - * - * Compute the bounding box of this blob with merging of x overlaps - * but no pre-chopping. - * Then move the iterator on to the start of the next blob. - **********************************************************************/ - -TBOX box_next( //get bounding box - BLOBNBOX_IT *it //iterator to blobds - ) { - BLOBNBOX *blob; //current blob - TBOX result; //total box - - blob = it->data (); - result = blob->bounding_box (); - do { - it->forward (); - blob = it->data (); - if (blob->cblob() == nullptr) - //was pre-chopped - result += blob->bounding_box (); - } - //until next real blob - while ((blob->cblob() == nullptr) || blob->joined_to_prev()); - return result; -} - - -/********************************************************************** - * box_next_pre_chopped - * - * Compute the bounding box of this blob with merging of x overlaps - * but WITH pre-chopping. - * Then move the iterator on to the start of the next pre-chopped blob. - **********************************************************************/ - -TBOX box_next_pre_chopped( //get bounding box - BLOBNBOX_IT *it //iterator to blobds - ) { - BLOBNBOX *blob; //current blob - TBOX result; //total box - - blob = it->data (); - result = blob->bounding_box (); - do { - it->forward (); - blob = it->data (); - } - //until next real blob - while (blob->joined_to_prev ()); - return result; -} - - -/********************************************************************** - * TO_ROW::TO_ROW - * - * Constructor to make a row from a blob. - **********************************************************************/ - -TO_ROW::TO_ROW ( //constructor -BLOBNBOX * blob, //first blob -float top, //corrected top -float bottom, //of row -float row_size //ideal -) { - clear(); - y_min = bottom; - y_max = top; - initial_y_min = bottom; - - float diff; //in size - BLOBNBOX_IT it = &blobs; //list of blobs - - it.add_to_end (blob); - diff = top - bottom - row_size; - if (diff > 0) { - y_max -= diff / 2; - y_min += diff / 2; - } - //very small object - else if ((top - bottom) * 3 < row_size) { - diff = row_size / 3 + bottom - top; - y_max += diff / 2; - y_min -= diff / 2; - } -} - -void TO_ROW::print() const { - tprintf("pitch=%d, fp=%g, fps=%g, fpns=%g, prs=%g, prns=%g," - " spacing=%g xh=%g y_origin=%g xev=%d, asc=%g, desc=%g," - " body=%g, minsp=%d maxnsp=%d, thr=%d kern=%g sp=%g\n", - pitch_decision, fixed_pitch, fp_space, fp_nonsp, pr_space, pr_nonsp, - spacing, xheight, y_origin, xheight_evidence, ascrise, descdrop, - body_size, min_space, max_nonspace, space_threshold, kern_size, - space_size); -} - -/********************************************************************** - * TO_ROW:add_blob - * - * Add the blob to the end of the row. - **********************************************************************/ - -void TO_ROW::add_blob( //constructor - BLOBNBOX *blob, //first blob - float top, //corrected top - float bottom, //of row - float row_size //ideal - ) { - float allowed; //allowed expansion - float available; //expansion - BLOBNBOX_IT it = &blobs; //list of blobs - - it.add_to_end (blob); - allowed = row_size + y_min - y_max; - if (allowed > 0) { - available = top > y_max ? top - y_max : 0; - if (bottom < y_min) - //total available - available += y_min - bottom; - if (available > 0) { - available += available; //do it gradually - if (available < allowed) - available = allowed; - if (bottom < y_min) - y_min -= (y_min - bottom) * allowed / available; - if (top > y_max) - y_max += (top - y_max) * allowed / available; - } - } -} - - -/********************************************************************** - * TO_ROW:insert_blob - * - * Add the blob to the row in the correct position. - **********************************************************************/ - -void TO_ROW::insert_blob( //constructor - BLOBNBOX *blob //first blob - ) { - BLOBNBOX_IT it = &blobs; //list of blobs - - if (it.empty ()) - it.add_before_then_move (blob); - else { - it.mark_cycle_pt (); - while (!it.cycled_list () - && it.data ()->bounding_box ().left () <= - blob->bounding_box ().left ()) - it.forward (); - if (it.cycled_list ()) - it.add_to_end (blob); - else - it.add_before_stay_put (blob); - } -} - - -/********************************************************************** - * TO_ROW::compute_vertical_projection - * - * Compute the vertical projection of a TO_ROW from its blobs. - **********************************************************************/ - -void TO_ROW::compute_vertical_projection() { //project whole row - TBOX row_box; //bound of row - BLOBNBOX *blob; //current blob - TBOX blob_box; //bounding box - BLOBNBOX_IT blob_it = blob_list (); - - if (blob_it.empty ()) - return; - row_box = blob_it.data ()->bounding_box (); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) - row_box += blob_it.data ()->bounding_box (); - - projection.set_range (row_box.left () - PROJECTION_MARGIN, - row_box.right () + PROJECTION_MARGIN); - projection_left = row_box.left () - PROJECTION_MARGIN; - projection_right = row_box.right () + PROJECTION_MARGIN; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data(); - if (blob->cblob() != nullptr) - vertical_cblob_projection(blob->cblob(), &projection); - } -} - - -/********************************************************************** - * TO_ROW::clear - * - * Zero out all scalar members. - **********************************************************************/ -void TO_ROW::clear() { - all_caps = false; - used_dm_model = false; - projection_left = 0; - projection_right = 0; - pitch_decision = PITCH_DUNNO; - fixed_pitch = 0.0; - fp_space = 0.0; - fp_nonsp = 0.0; - pr_space = 0.0; - pr_nonsp = 0.0; - spacing = 0.0; - xheight = 0.0; - xheight_evidence = 0; - body_size = 0.0; - ascrise = 0.0; - descdrop = 0.0; - min_space = 0; - max_nonspace = 0; - space_threshold = 0; - kern_size = 0.0; - space_size = 0.0; - y_min = 0.0; - y_max = 0.0; - initial_y_min = 0.0; - m = 0.0; - c = 0.0; - error = 0.0; - para_c = 0.0; - para_error = 0.0; - y_origin = 0.0; - credibility = 0.0; - num_repeated_sets_ = -1; -} - - -/********************************************************************** - * vertical_cblob_projection - * - * Compute the vertical projection of a cblob from its outlines - * and add to the given STATS. - **********************************************************************/ - -void vertical_cblob_projection( //project outlines - C_BLOB *blob, //blob to project - STATS *stats //output - ) { - //outlines of blob - C_OUTLINE_IT out_it = blob->out_list (); - - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - vertical_coutline_projection (out_it.data (), stats); - } -} - - -/********************************************************************** - * vertical_coutline_projection - * - * Compute the vertical projection of a outline from its outlines - * and add to the given STATS. - **********************************************************************/ - -void vertical_coutline_projection( //project outlines - C_OUTLINE *outline, //outline to project - STATS *stats //output - ) { - ICOORD pos; //current point - ICOORD step; //edge step - int32_t length; //of outline - int16_t stepindex; //current step - C_OUTLINE_IT out_it = outline->child (); - - pos = outline->start_pos (); - length = outline->pathlength (); - for (stepindex = 0; stepindex < length; stepindex++) { - step = outline->step (stepindex); - if (step.x () > 0) { - stats->add (pos.x (), -pos.y ()); - } else if (step.x () < 0) { - stats->add (pos.x () - 1, pos.y ()); - } - pos += step; - } - - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - vertical_coutline_projection (out_it.data (), stats); - } -} - - -/********************************************************************** - * TO_BLOCK::TO_BLOCK - * - * Constructor to make a TO_BLOCK from a real block. - **********************************************************************/ - -TO_BLOCK::TO_BLOCK( //make a block - BLOCK *src_block //real block - ) { - clear(); - block = src_block; -} - -static void clear_blobnboxes(BLOBNBOX_LIST* boxes) { - BLOBNBOX_IT it = boxes; - // A BLOBNBOX generally doesn't own its blobs, so if they do, you - // have to delete them explicitly. - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* box = it.data(); - delete box->cblob(); - } -} - -/********************************************************************** - * TO_BLOCK::clear - * - * Zero out all scalar members. - **********************************************************************/ -void TO_BLOCK::clear() { - block = nullptr; - pitch_decision = PITCH_DUNNO; - line_spacing = 0.0; - line_size = 0.0; - max_blob_size = 0.0; - baseline_offset = 0.0; - xheight = 0.0; - fixed_pitch = 0.0; - kern_size = 0.0; - space_size = 0.0; - min_space = 0; - max_nonspace = 0; - fp_space = 0.0; - fp_nonsp = 0.0; - pr_space = 0.0; - pr_nonsp = 0.0; - key_row = nullptr; -} - - -TO_BLOCK::~TO_BLOCK() { - // Any residual BLOBNBOXes at this stage own their blobs, so delete them. - clear_blobnboxes(&blobs); - clear_blobnboxes(&underlines); - clear_blobnboxes(&noise_blobs); - clear_blobnboxes(&small_blobs); - clear_blobnboxes(&large_blobs); -} - -// Helper function to divide the input blobs over noise, small, medium -// and large lists. Blobs small in height and (small in width or large in width) -// go in the noise list. Dash (-) candidates go in the small list, and -// medium and large are by height. -// SIDE-EFFECT: reset all blobs to initial state by calling Init(). -static void SizeFilterBlobs(int min_height, int max_height, - BLOBNBOX_LIST* src_list, - BLOBNBOX_LIST* noise_list, - BLOBNBOX_LIST* small_list, - BLOBNBOX_LIST* medium_list, - BLOBNBOX_LIST* large_list) { - BLOBNBOX_IT noise_it(noise_list); - BLOBNBOX_IT small_it(small_list); - BLOBNBOX_IT medium_it(medium_list); - BLOBNBOX_IT large_it(large_list); - for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) { - BLOBNBOX* blob = src_it.extract(); - blob->ReInit(); - int width = blob->bounding_box().width(); - int height = blob->bounding_box().height(); - if (height < min_height && - (width < min_height || width > max_height)) - noise_it.add_after_then_move(blob); - else if (height > max_height) - large_it.add_after_then_move(blob); - else if (height < min_height) - small_it.add_after_then_move(blob); - else - medium_it.add_after_then_move(blob); - } -} - -// Reorganize the blob lists with a different definition of small, medium -// and large, compared to the original definition. -// Height is still the primary filter key, but medium width blobs of small -// height become small, and very wide blobs of small height stay noise, along -// with small dot-shaped blobs. -void TO_BLOCK::ReSetAndReFilterBlobs() { - int min_height = IntCastRounded(kMinMediumSizeRatio * line_size); - int max_height = IntCastRounded(kMaxMediumSizeRatio * line_size); - BLOBNBOX_LIST noise_list; - BLOBNBOX_LIST small_list; - BLOBNBOX_LIST medium_list; - BLOBNBOX_LIST large_list; - SizeFilterBlobs(min_height, max_height, &blobs, - &noise_list, &small_list, &medium_list, &large_list); - SizeFilterBlobs(min_height, max_height, &large_blobs, - &noise_list, &small_list, &medium_list, &large_list); - SizeFilterBlobs(min_height, max_height, &small_blobs, - &noise_list, &small_list, &medium_list, &large_list); - SizeFilterBlobs(min_height, max_height, &noise_blobs, - &noise_list, &small_list, &medium_list, &large_list); - BLOBNBOX_IT blob_it(&blobs); - blob_it.add_list_after(&medium_list); - blob_it.set_to_list(&large_blobs); - blob_it.add_list_after(&large_list); - blob_it.set_to_list(&small_blobs); - blob_it.add_list_after(&small_list); - blob_it.set_to_list(&noise_blobs); - blob_it.add_list_after(&noise_list); -} - -// Deletes noise blobs from all lists where not owned by a ColPartition. -void TO_BLOCK::DeleteUnownedNoise() { - BLOBNBOX::CleanNeighbours(&blobs); - BLOBNBOX::CleanNeighbours(&small_blobs); - BLOBNBOX::CleanNeighbours(&noise_blobs); - BLOBNBOX::CleanNeighbours(&large_blobs); - BLOBNBOX::DeleteNoiseBlobs(&blobs); - BLOBNBOX::DeleteNoiseBlobs(&small_blobs); - BLOBNBOX::DeleteNoiseBlobs(&noise_blobs); - BLOBNBOX::DeleteNoiseBlobs(&large_blobs); -} - -// Computes and stores the edge offsets on each blob for use in feature -// extraction, using greyscale if the supplied grey and thresholds pixes -// are 8-bit or otherwise (if nullptr or not 8 bit) the original binary -// edge step outlines. -// Thresholds must either be the same size as grey or an integer down-scale -// of grey. -// See coutln.h for an explanation of edge offsets. -void TO_BLOCK::ComputeEdgeOffsets(Pix* thresholds, Pix* grey) { - BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &blobs); - BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &small_blobs); - BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &noise_blobs); -} - -#ifndef GRAPHICS_DISABLED -// Draw the noise blobs from all lists in red. -void TO_BLOCK::plot_noise_blobs(ScrollView* win) { - BLOBNBOX::PlotNoiseBlobs(&noise_blobs, ScrollView::RED, ScrollView::RED, win); - BLOBNBOX::PlotNoiseBlobs(&small_blobs, ScrollView::RED, ScrollView::RED, win); - BLOBNBOX::PlotNoiseBlobs(&large_blobs, ScrollView::RED, ScrollView::RED, win); - BLOBNBOX::PlotNoiseBlobs(&blobs, ScrollView::RED, ScrollView::RED, win); -} - -// Draw the blobs on the various lists in the block in different colors. -void TO_BLOCK::plot_graded_blobs(ScrollView* win) { - BLOBNBOX::PlotBlobs(&noise_blobs, ScrollView::CORAL, ScrollView::BLUE, win); - BLOBNBOX::PlotBlobs(&small_blobs, ScrollView::GOLDENROD, ScrollView::YELLOW, - win); - BLOBNBOX::PlotBlobs(&large_blobs, ScrollView::DARK_GREEN, ScrollView::YELLOW, - win); - BLOBNBOX::PlotBlobs(&blobs, ScrollView::WHITE, ScrollView::BROWN, win); -} - -/********************************************************************** - * plot_blob_list - * - * Draw a list of blobs. - **********************************************************************/ - -void plot_blob_list(ScrollView* win, // window to draw in - BLOBNBOX_LIST *list, // blob list - ScrollView::Color body_colour, // colour to draw - ScrollView::Color child_colour) { // colour of child - BLOBNBOX_IT it = list; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - it.data()->plot(win, body_colour, child_colour); - } -} -#endif // GRAPHICS_DISABLED diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blobbox.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blobbox.h deleted file mode 100644 index d7e13ff5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blobbox.h +++ /dev/null @@ -1,863 +0,0 @@ -/********************************************************************** - * File: blobbox.h (Formerly blobnbox.h) - * Description: Code for the textord blob class. - * Author: Ray Smith - * Created: Thu Jul 30 09:08:51 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef BLOBBOX_H -#define BLOBBOX_H - -#include // for PRId32 -#include // for sqrt -#include // for int16_t, int32_t -#include "elst.h" // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK -#include "elst2.h" // for ELIST2_ITERATOR, ELIST2IZEH, ELIST2_LINK -#include "errcode.h" // for ASSERT_HOST -#include "ocrblock.h" // for BLOCK -#include "params.h" // for DoubleParam, double_VAR_H -#include "pdblock.h" // for PDBLK -#include "points.h" // for FCOORD, ICOORD, ICOORDELT_LIST -#include "quspline.h" // for QSPLINE -#include "rect.h" // for TBOX -#include "scrollview.h" // for ScrollView, ScrollView::Color -#include "statistc.h" // for STATS -#include "stepblob.h" // for C_BLOB -#include "tprintf.h" // for tprintf -#include "werd.h" // for WERD_LIST - -class C_OUTLINE; - -struct Pix; - -enum PITCH_TYPE -{ - PITCH_DUNNO, // insufficient data - PITCH_DEF_FIXED, // definitely fixed - PITCH_MAYBE_FIXED, // could be - PITCH_DEF_PROP, - PITCH_MAYBE_PROP, - PITCH_CORR_FIXED, - PITCH_CORR_PROP -}; - -// The possible tab-stop types of each side of a BLOBNBOX. -// The ordering is important, as it is used for deleting dead-ends in the -// search. ALIGNED, CONFIRMED and VLINE should remain greater than the -// non-aligned, unset, or deleted members. -enum TabType { - TT_NONE, // Not a tab. - TT_DELETED, // Not a tab after detailed analysis. - TT_MAYBE_RAGGED, // Initial designation of a tab-stop candidate. - TT_MAYBE_ALIGNED, // Initial designation of a tab-stop candidate. - TT_CONFIRMED, // Aligned with neighbours. - TT_VLINE // Detected as a vertical line. -}; - -// The possible region types of a BLOBNBOX. -// Note: keep all the text types > BRT_UNKNOWN and all the image types less. -// Keep in sync with kBlobTypes in colpartition.cpp and BoxColor, and the -// *Type static functions below. -enum BlobRegionType { - BRT_NOISE, // Neither text nor image. - BRT_HLINE, // Horizontal separator line. - BRT_VLINE, // Vertical separator line. - BRT_RECTIMAGE, // Rectangular image. - BRT_POLYIMAGE, // Non-rectangular image. - BRT_UNKNOWN, // Not determined yet. - BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented. - BRT_TEXT, // Convincing text. - - BRT_COUNT // Number of possibilities. -}; - -// enum for elements of arrays that refer to neighbours. -// NOTE: keep in this order, so ^2 can be used to flip direction. -enum BlobNeighbourDir { - BND_LEFT, - BND_BELOW, - BND_RIGHT, - BND_ABOVE, - BND_COUNT -}; - -// enum for special type of text characters, such as math symbol or italic. -enum BlobSpecialTextType { - BSTT_NONE, // No special. - BSTT_ITALIC, // Italic style. - BSTT_DIGIT, // Digit symbols. - BSTT_MATH, // Mathmatical symobls (not including digit). - BSTT_UNCLEAR, // Characters with low recognition rate. - BSTT_SKIP, // Characters that we skip labeling (usually too small). - BSTT_COUNT -}; - -inline BlobNeighbourDir DirOtherWay(BlobNeighbourDir dir) { - return static_cast(dir ^ 2); -} - -// BlobTextFlowType indicates the quality of neighbouring information -// related to a chain of connected components, either horizontally or -// vertically. Also used by ColPartition for the collection of blobs -// within, which should all have the same value in most cases. -enum BlobTextFlowType { - BTFT_NONE, // No text flow set yet. - BTFT_NONTEXT, // Flow too poor to be likely text. - BTFT_NEIGHBOURS, // Neighbours support flow in this direction. - BTFT_CHAIN, // There is a weak chain of text in this direction. - BTFT_STRONG_CHAIN, // There is a strong chain of text in this direction. - BTFT_TEXT_ON_IMAGE, // There is a strong chain of text on an image. - BTFT_LEADER, // Leader dots/dashes etc. - BTFT_COUNT -}; - -// Returns true if type1 dominates type2 in a merge. Mostly determined by the -// ordering of the enum, LEADER is weak and dominates nothing. -// The function is anti-symmetric (t1 > t2) === !(t2 > t1), except that -// this cannot be true if t1 == t2, so the result is undefined. -inline bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2) { - // LEADER always loses. - if (type1 == BTFT_LEADER) return false; - if (type2 == BTFT_LEADER) return true; - // With those out of the way, the ordering of the enum determines the result. - return type1 >= type2; -} - -namespace tesseract { -class ColPartition; -} - -class BLOBNBOX; -ELISTIZEH (BLOBNBOX) -class BLOBNBOX:public ELIST_LINK -{ - public: - BLOBNBOX() { - ConstructionInit(); - } - explicit BLOBNBOX(C_BLOB *srcblob) { - box = srcblob->bounding_box(); - ConstructionInit(); - cblob_ptr = srcblob; - area = static_cast(srcblob->area()); - } - ~BLOBNBOX() { - if (owns_cblob_) delete cblob_ptr; - } - static BLOBNBOX* RealBlob(C_OUTLINE* outline) { - C_BLOB* blob = new C_BLOB(outline); - return new BLOBNBOX(blob); - } - - // Rotates the box and the underlying blob. - void rotate(FCOORD rotation); - - // Methods that act on the box without touching the underlying blob. - // Reflect the box in the y-axis, leaving the underlying blob untouched. - void reflect_box_in_y_axis(); - // Rotates the box by the angle given by rotation. - // If the blob is a diacritic, then only small rotations for skew - // correction can be applied. - void rotate_box(FCOORD rotation); - // Moves just the box by the given vector. - void translate_box(ICOORD v) { - if (IsDiacritic()) { - box.move(v); - base_char_top_ += v.y(); - base_char_bottom_ += v.y(); - } else { - box.move(v); - set_diacritic_box(box); - } - } - void merge(BLOBNBOX *nextblob); - void really_merge(BLOBNBOX* other); - void chop( // fake chop blob - BLOBNBOX_IT *start_it, // location of this - BLOBNBOX_IT *blob_it, // iterator - FCOORD rotation, // for landscape - float xheight); // line height - - void NeighbourGaps(int gaps[BND_COUNT]) const; - void MinMaxGapsClipped(int* h_min, int* h_max, - int* v_min, int* v_max) const; - void CleanNeighbours(); - // Returns positive if there is at least one side neighbour that has a - // similar stroke width and is not on the other side of a rule line. - int GoodTextBlob() const; - // Returns the number of side neighbours that are of type BRT_NOISE. - int NoisyNeighbours() const; - - // Returns true if the blob is noise and has no owner. - bool DeletableNoise() const { - return owner() == nullptr && region_type() == BRT_NOISE; - } - - // Returns true, and sets vert_possible/horz_possible if the blob has some - // feature that makes it individually appear to flow one way. - // eg if it has a high aspect ratio, yet has a complex shape, such as a - // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1. - bool DefiniteIndividualFlow(); - - // Returns true if there is no tabstop violation in merging this and other. - bool ConfirmNoTabViolation(const BLOBNBOX& other) const; - - // Returns true if other has a similar stroke width to this. - bool MatchingStrokeWidth(const BLOBNBOX& other, - double fractional_tolerance, - double constant_tolerance) const; - - // Returns a bounding box of the outline contained within the - // given horizontal range. - TBOX BoundsWithinLimits(int left, int right); - - // Estimates and stores the baseline position based on the shape of the - // outline. - void EstimateBaselinePosition(); - - // Simple accessors. - const TBOX& bounding_box() const { - return box; - } - // Set the bounding box. Use with caution. - // Normally use compute_bounding_box instead. - void set_bounding_box(const TBOX& new_box) { - box = new_box; - base_char_top_ = box.top(); - base_char_bottom_ = box.bottom(); - } - void compute_bounding_box() { - box = cblob_ptr->bounding_box(); - base_char_top_ = box.top(); - base_char_bottom_ = box.bottom(); - baseline_y_ = box.bottom(); - } - const TBOX& reduced_box() const { - return red_box; - } - void set_reduced_box(TBOX new_box) { - red_box = new_box; - reduced = true; - } - int32_t enclosed_area() const { - return area; - } - bool joined_to_prev() const { - return joined != 0; - } - bool red_box_set() const { - return reduced != 0; - } - int repeated_set() const { - return repeated_set_; - } - void set_repeated_set(int set_id) { - repeated_set_ = set_id; - } - C_BLOB *cblob() const { - return cblob_ptr; - } - TabType left_tab_type() const { - return left_tab_type_; - } - void set_left_tab_type(TabType new_type) { - left_tab_type_ = new_type; - } - TabType right_tab_type() const { - return right_tab_type_; - } - void set_right_tab_type(TabType new_type) { - right_tab_type_ = new_type; - } - BlobRegionType region_type() const { - return region_type_; - } - void set_region_type(BlobRegionType new_type) { - region_type_ = new_type; - } - BlobSpecialTextType special_text_type() const { - return spt_type_; - } - void set_special_text_type(BlobSpecialTextType new_type) { - spt_type_ = new_type; - } - BlobTextFlowType flow() const { - return flow_; - } - void set_flow(BlobTextFlowType value) { - flow_ = value; - } - bool vert_possible() const { - return vert_possible_; - } - void set_vert_possible(bool value) { - vert_possible_ = value; - } - bool horz_possible() const { - return horz_possible_; - } - void set_horz_possible(bool value) { - horz_possible_ = value; - } - int left_rule() const { - return left_rule_; - } - void set_left_rule(int new_left) { - left_rule_ = new_left; - } - int right_rule() const { - return right_rule_; - } - void set_right_rule(int new_right) { - right_rule_ = new_right; - } - int left_crossing_rule() const { - return left_crossing_rule_; - } - void set_left_crossing_rule(int new_left) { - left_crossing_rule_ = new_left; - } - int right_crossing_rule() const { - return right_crossing_rule_; - } - void set_right_crossing_rule(int new_right) { - right_crossing_rule_ = new_right; - } - float horz_stroke_width() const { - return horz_stroke_width_; - } - void set_horz_stroke_width(float width) { - horz_stroke_width_ = width; - } - float vert_stroke_width() const { - return vert_stroke_width_; - } - void set_vert_stroke_width(float width) { - vert_stroke_width_ = width; - } - float area_stroke_width() const { - return area_stroke_width_; - } - tesseract::ColPartition* owner() const { - return owner_; - } - void set_owner(tesseract::ColPartition* new_owner) { - owner_ = new_owner; - } - bool leader_on_left() const { - return leader_on_left_; - } - void set_leader_on_left(bool flag) { - leader_on_left_ = flag; - } - bool leader_on_right() const { - return leader_on_right_; - } - void set_leader_on_right(bool flag) { - leader_on_right_ = flag; - } - BLOBNBOX* neighbour(BlobNeighbourDir n) const { - return neighbours_[n]; - } - bool good_stroke_neighbour(BlobNeighbourDir n) const { - return good_stroke_neighbours_[n]; - } - void set_neighbour(BlobNeighbourDir n, BLOBNBOX* neighbour, bool good) { - neighbours_[n] = neighbour; - good_stroke_neighbours_[n] = good; - } - bool IsDiacritic() const { - return base_char_top_ != box.top() || base_char_bottom_ != box.bottom(); - } - int base_char_top() const { - return base_char_top_; - } - int base_char_bottom() const { - return base_char_bottom_; - } - int baseline_position() const { - return baseline_y_; - } - int line_crossings() const { - return line_crossings_; - } - void set_line_crossings(int value) { - line_crossings_ = value; - } - void set_diacritic_box(const TBOX& diacritic_box) { - base_char_top_ = diacritic_box.top(); - base_char_bottom_ = diacritic_box.bottom(); - } - BLOBNBOX* base_char_blob() const { - return base_char_blob_; - } - void set_base_char_blob(BLOBNBOX* blob) { - base_char_blob_ = blob; - } - void set_owns_cblob(bool value) { owns_cblob_ = value; } - - bool UniquelyVertical() const { - return vert_possible_ && !horz_possible_; - } - bool UniquelyHorizontal() const { - return horz_possible_ && !vert_possible_; - } - - // Returns true if the region type is text. - static bool IsTextType(BlobRegionType type) { - return type == BRT_TEXT || type == BRT_VERT_TEXT; - } - // Returns true if the region type is image. - static bool IsImageType(BlobRegionType type) { - return type == BRT_RECTIMAGE || type == BRT_POLYIMAGE; - } - // Returns true if the region type is line. - static bool IsLineType(BlobRegionType type) { - return type == BRT_HLINE || type == BRT_VLINE; - } - // Returns true if the region type cannot be merged. - static bool UnMergeableType(BlobRegionType type) { - return IsLineType(type) || IsImageType(type); - } - // Helper to call CleanNeighbours on all blobs on the list. - static void CleanNeighbours(BLOBNBOX_LIST* blobs); - // Helper to delete all the deletable blobs on the list. - static void DeleteNoiseBlobs(BLOBNBOX_LIST* blobs); - // Helper to compute edge offsets for all the blobs on the list. - // See coutln.h for an explanation of edge offsets. - static void ComputeEdgeOffsets(Pix* thresholds, Pix* grey, - BLOBNBOX_LIST* blobs); - -#ifndef GRAPHICS_DISABLED - // Helper to draw all the blobs on the list in the given body_colour, - // with child outlines in the child_colour. - static void PlotBlobs(BLOBNBOX_LIST* list, - ScrollView::Color body_colour, - ScrollView::Color child_colour, - ScrollView* win); - // Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the - // given list in the given body_colour, with child outlines in the - // child_colour. - static void PlotNoiseBlobs(BLOBNBOX_LIST* list, - ScrollView::Color body_colour, - ScrollView::Color child_colour, - ScrollView* win); - - static ScrollView::Color TextlineColor(BlobRegionType region_type, - BlobTextFlowType flow_type); - - // Keep in sync with BlobRegionType. - ScrollView::Color BoxColor() const; - - void plot(ScrollView* window, // window to draw in - ScrollView::Color blob_colour, // for outer bits - ScrollView::Color child_colour); // for holes -#endif - - // Initializes the bulk of the members to default values for use at - // construction time. - void ConstructionInit() { - cblob_ptr = nullptr; - owns_cblob_ = false; - area = 0; - area_stroke_width_ = 0.0f; - horz_stroke_width_ = 0.0f; - vert_stroke_width_ = 0.0f; - ReInit(); - } - // Initializes members set by StrokeWidth and beyond, without discarding - // stored area and strokewidth values, which are expensive to calculate. - void ReInit() { - joined = false; - reduced = false; - repeated_set_ = 0; - left_tab_type_ = TT_NONE; - right_tab_type_ = TT_NONE; - region_type_ = BRT_UNKNOWN; - flow_ = BTFT_NONE; - spt_type_ = BSTT_SKIP; - left_rule_ = 0; - right_rule_ = 0; - left_crossing_rule_ = 0; - right_crossing_rule_ = 0; - if (area_stroke_width_ == 0.0f && area > 0 && cblob() != nullptr - && cblob()->perimeter()!=0) - area_stroke_width_ = 2.0f * area / cblob()->perimeter(); - owner_ = nullptr; - base_char_top_ = box.top(); - base_char_bottom_ = box.bottom(); - baseline_y_ = box.bottom(); - line_crossings_ = 0; - base_char_blob_ = nullptr; - horz_possible_ = false; - vert_possible_ = false; - leader_on_left_ = false; - leader_on_right_ = false; - ClearNeighbours(); - } - - void ClearNeighbours() { - for (int n = 0; n < BND_COUNT; ++n) { - neighbours_[n] = nullptr; - good_stroke_neighbours_[n] = false; - } - } - - private: - C_BLOB *cblob_ptr; // edgestep blob - TBOX box; // bounding box - TBOX red_box; // bounding box - signed int area:30; // enclosed area - unsigned joined : 1; // joined to prev - unsigned reduced : 1; // reduced box set - int repeated_set_; // id of the set of repeated blobs - TabType left_tab_type_; // Indicates tab-stop assessment - TabType right_tab_type_; // Indicates tab-stop assessment - BlobRegionType region_type_; // Type of region this blob belongs to - BlobTextFlowType flow_; // Quality of text flow. - int16_t left_rule_; // x-coord of nearest but not crossing rule line - int16_t right_rule_; // x-coord of nearest but not crossing rule line - int16_t left_crossing_rule_; // x-coord of nearest or crossing rule line - int16_t right_crossing_rule_; // x-coord of nearest or crossing rule line - int16_t base_char_top_; // y-coord of top/bottom of diacritic base, - int16_t base_char_bottom_; // if it exists else top/bottom of this blob. - int16_t baseline_y_; // Estimate of baseline position. - int line_crossings_; // Number of line intersections touched. - BLOBNBOX* base_char_blob_; // The blob that was the base char. - float horz_stroke_width_; // Median horizontal stroke width - float vert_stroke_width_; // Median vertical stroke width - float area_stroke_width_; // Stroke width from area/perimeter ratio. - tesseract::ColPartition* owner_; // Who will delete me when I am not needed - BlobSpecialTextType spt_type_; // Special text type. - BLOBNBOX* neighbours_[BND_COUNT]; - bool good_stroke_neighbours_[BND_COUNT]; - bool horz_possible_; // Could be part of horizontal flow. - bool vert_possible_; // Could be part of vertical flow. - bool leader_on_left_; // There is a leader to the left. - bool leader_on_right_; // There is a leader to the right. - // Iff true, then the destructor should delete the cblob_ptr. - // TODO(rays) migrate all uses to correctly setting this flag instead of - // deleting the C_BLOB before deleting the BLOBNBOX. - bool owns_cblob_; -}; - -class TO_ROW: public ELIST2_LINK -{ - public: - static const int kErrorWeight = 3; - - TO_ROW() { - clear(); - } //empty - TO_ROW( //constructor - BLOBNBOX *blob, //from first blob - float top, //of row //target height - float bottom, - float row_size); - - void print() const; - float max_y() const { //access function - return y_max; - } - float min_y() const { - return y_min; - } - float mean_y() const { - return (y_min + y_max) / 2.0f; - } - float initial_min_y() const { - return initial_y_min; - } - float line_m() const { //access to line fit - return m; - } - float line_c() const { - return c; - } - float line_error() const { - return error; - } - float parallel_c() const { - return para_c; - } - float parallel_error() const { - return para_error; - } - float believability() const { //baseline goodness - return credibility; - } - float intercept() const { //real parallel_c - return y_origin; - } - void add_blob( //put in row - BLOBNBOX *blob, //blob to add - float top, //of row //target height - float bottom, - float row_size); - void insert_blob( //put in row in order - BLOBNBOX *blob); - - BLOBNBOX_LIST *blob_list() { //get list - return &blobs; - } - - void set_line( //set line spec - float new_m, //line to set - float new_c, - float new_error) { - m = new_m; - c = new_c; - error = new_error; - } - void set_parallel_line( //set fixed gradient line - float gradient, //page gradient - float new_c, - float new_error) { - para_c = new_c; - para_error = new_error; - credibility = - (float) (blobs.length () - kErrorWeight * new_error); - y_origin = (float) (new_c / sqrt (1 + gradient * gradient)); - //real intercept - } - void set_limits( //set min,max - float new_min, //bottom and - float new_max) { //top of row - y_min = new_min; - y_max = new_max; - } - void compute_vertical_projection(); - //get projection - - bool rep_chars_marked() const { - return num_repeated_sets_ != -1; - } - void clear_rep_chars_marked() { - num_repeated_sets_ = -1; - } - int num_repeated_sets() const { - return num_repeated_sets_; - } - void set_num_repeated_sets(int num_sets) { - num_repeated_sets_ = num_sets; - } - - // true when dead - bool merged; - bool all_caps; // had no ascenders - bool used_dm_model; // in guessing pitch - int16_t projection_left; // start of projection - int16_t projection_right; // start of projection - PITCH_TYPE pitch_decision; // how strong is decision - float fixed_pitch; // pitch or 0 - float fp_space; // sp if fixed pitch - float fp_nonsp; // nonsp if fixed pitch - float pr_space; // sp if prop - float pr_nonsp; // non sp if prop - float spacing; // to "next" row - float xheight; // of line - int xheight_evidence; // number of blobs of height xheight - float ascrise; // ascenders - float descdrop; // descenders - float body_size; // of CJK characters. Assumed to be - // xheight+ascrise for non-CJK text. - int32_t min_space; // min size for real space - int32_t max_nonspace; // max size of non-space - int32_t space_threshold; // space vs nonspace - float kern_size; // average non-space - float space_size; // average space - WERD_LIST rep_words; // repeated chars - ICOORDELT_LIST char_cells; // fixed pitch cells - QSPLINE baseline; // curved baseline - STATS projection; // vertical projection - - private: - void clear(); // clear all values to reasonable defaults - - BLOBNBOX_LIST blobs; //blobs in row - float y_min; //coords - float y_max; - float initial_y_min; - float m, c; //line spec - float error; //line error - float para_c; //constrained fit - float para_error; - float y_origin; //rotated para_c; - float credibility; //baseline believability - int num_repeated_sets_; // number of sets of repeated blobs - // set to -1 if we have not searched - // for repeated blobs in this row yet -}; - -ELIST2IZEH (TO_ROW) -class TO_BLOCK:public ELIST_LINK -{ - public: - TO_BLOCK() : pitch_decision(PITCH_DUNNO) { - clear(); - } //empty - TO_BLOCK( //constructor - BLOCK *src_block); //real block - ~TO_BLOCK(); - - void clear(); // clear all scalar members. - - TO_ROW_LIST *get_rows() { //access function - return &row_list; - } - - // Rotate all the blobnbox lists and the underlying block. Then update the - // median size statistic from the blobs list. - void rotate(const FCOORD& rotation) { - BLOBNBOX_LIST* blobnbox_list[] = {&blobs, &underlines, &noise_blobs, - &small_blobs, &large_blobs, nullptr}; - for (BLOBNBOX_LIST** list = blobnbox_list; *list != nullptr; ++list) { - BLOBNBOX_IT it(*list); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - it.data()->rotate(rotation); - } - } - // Rotate the block - ASSERT_HOST(block->pdblk.poly_block() != nullptr); - block->rotate(rotation); - // Update the median size statistic from the blobs list. - STATS widths(0, block->pdblk.bounding_box().width()); - STATS heights(0, block->pdblk.bounding_box().height()); - BLOBNBOX_IT blob_it(&blobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - widths.add(blob_it.data()->bounding_box().width(), 1); - heights.add(blob_it.data()->bounding_box().height(), 1); - } - block->set_median_size(static_cast(widths.median() + 0.5), - static_cast(heights.median() + 0.5)); - } - - void print_rows() { //debug info - TO_ROW_IT row_it = &row_list; - TO_ROW *row; - - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); - row_it.forward()) { - row = row_it.data(); - tprintf("Row range (%g,%g), para_c=%g, blobcount=%" PRId32 "\n", - row->min_y(), row->max_y(), row->parallel_c(), - row->blob_list()->length()); - } - } - - // Reorganizes the blob lists with a different definition of small, medium - // and large, compared to the original definition. - // Height is still the primary filter key, but medium width blobs of small - // height become medium, and very wide blobs of small height stay small. - void ReSetAndReFilterBlobs(); - - // Deletes noise blobs from all lists where not owned by a ColPartition. - void DeleteUnownedNoise(); - - // Computes and stores the edge offsets on each blob for use in feature - // extraction, using greyscale if the supplied grey and thresholds pixes - // are 8-bit or otherwise (if nullptr or not 8 bit) the original binary - // edge step outlines. - // Thresholds must either be the same size as grey or an integer down-scale - // of grey. - // See coutln.h for an explanation of edge offsets. - void ComputeEdgeOffsets(Pix* thresholds, Pix* grey); - -#ifndef GRAPHICS_DISABLED - // Draw the noise blobs from all lists in red. - void plot_noise_blobs(ScrollView* to_win); - // Draw the blobs on on the various lists in the block in different colors. - void plot_graded_blobs(ScrollView* to_win); -#endif - - BLOBNBOX_LIST blobs; //medium size - BLOBNBOX_LIST underlines; //underline blobs - BLOBNBOX_LIST noise_blobs; //very small - BLOBNBOX_LIST small_blobs; //fairly small - BLOBNBOX_LIST large_blobs; //big blobs - BLOCK *block; //real block - PITCH_TYPE pitch_decision; //how strong is decision - float line_spacing; //estimate - // line_size is a lower-bound estimate of the font size in pixels of - // the text in the block (with ascenders and descenders), being a small - // (1.25) multiple of the median height of filtered blobs. - // In most cases the font size will be bigger, but it will be closer - // if the text is allcaps, or in a no-x-height script. - float line_size; //estimate - float max_blob_size; //line assignment limit - float baseline_offset; //phase shift - float xheight; //median blob size - float fixed_pitch; //pitch or 0 - float kern_size; //average non-space - float space_size; //average space - int32_t min_space; //min definite space - int32_t max_nonspace; //max definite - float fp_space; //sp if fixed pitch - float fp_nonsp; //nonsp if fixed pitch - float pr_space; //sp if prop - float pr_nonsp; //non sp if prop - TO_ROW *key_row; //starting row - - private: - TO_ROW_LIST row_list; //temporary rows -}; - -ELISTIZEH (TO_BLOCK) -extern double_VAR_H (textord_error_weight, 3, -"Weighting for error in believability"); -void find_cblob_limits( //get y limits - C_BLOB *blob, //blob to search - float leftx, //x limits - float rightx, - FCOORD rotation, //for landscape - float &ymin, //output y limits - float &ymax); -void find_cblob_vlimits( //get y limits - C_BLOB *blob, //blob to search - float leftx, //x limits - float rightx, - float &ymin, //output y limits - float &ymax); -void find_cblob_hlimits( //get x limits - C_BLOB *blob, //blob to search - float bottomy, //y limits - float topy, - float &xmin, //output x limits - float &xymax); -C_BLOB *crotate_cblob( //rotate it - C_BLOB *blob, //blob to search - FCOORD rotation //for landscape - ); -TBOX box_next( //get bounding box - BLOBNBOX_IT *it //iterator to blobds - ); -TBOX box_next_pre_chopped( //get bounding box - BLOBNBOX_IT *it //iterator to blobds - ); -void vertical_cblob_projection( //project outlines - C_BLOB *blob, //blob to project - STATS *stats //output - ); -void vertical_coutline_projection( //project outlines - C_OUTLINE *outline, //outline to project - STATS *stats //output - ); -#ifndef GRAPHICS_DISABLED -void plot_blob_list(ScrollView* win, // window to draw in - BLOBNBOX_LIST *list, // blob list - ScrollView::Color body_colour, // colour to draw - ScrollView::Color child_colour); // colour of child -#endif // GRAPHICS_DISABLED -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blobs.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blobs.cpp deleted file mode 100644 index 67595470..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blobs.cpp +++ /dev/null @@ -1,1008 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: blobs.cpp (Formerly blobs.c) - * Description: Blob definition - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 27 15:39:52 1989 - * Modified: Thu Mar 28 15:33:26 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1989, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "blobs.h" -#include "ccstruct.h" -#include "clst.h" -#include "emalloc.h" -#include "helpers.h" -#include "linlsq.h" -#include "normalis.h" -#include "ocrblock.h" -#include "ocrrow.h" -#include "points.h" -#include "polyaprx.h" -#include "structures.h" -#include "werd.h" - -#include - -using tesseract::CCStruct; - -// A Vector representing the "vertical" direction when measuring the -// divisiblity of blobs into multiple blobs just by separating outlines. -// See divisible_blob below for the use. -const TPOINT kDivisibleVerticalUpright(0, 1); -// A vector representing the "vertical" direction for italic text for use -// when separating outlines. Using it actually deteriorates final accuracy, -// so it is only used for ApplyBoxes chopping to get a better segmentation. -const TPOINT kDivisibleVerticalItalic(1, 5); - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ - -CLISTIZE(EDGEPT) - -// Returns true when the two line segments cross each other. -// (Moved from outlines.cpp). -// Finds where the projected lines would cross and then checks to see if the -// point of intersection lies on both of the line segments. If it does -// then these two segments cross. -/* static */ -bool TPOINT::IsCrossed(const TPOINT& a0, const TPOINT& a1, const TPOINT& b0, - const TPOINT& b1) { - int b0a1xb0b1, b0b1xb0a0; - int a1b1xa1a0, a1a0xa1b0; - - TPOINT b0a1, b0a0, a1b1, b0b1, a1a0; - - b0a1.x = a1.x - b0.x; - b0a0.x = a0.x - b0.x; - a1b1.x = b1.x - a1.x; - b0b1.x = b1.x - b0.x; - a1a0.x = a0.x - a1.x; - b0a1.y = a1.y - b0.y; - b0a0.y = a0.y - b0.y; - a1b1.y = b1.y - a1.y; - b0b1.y = b1.y - b0.y; - a1a0.y = a0.y - a1.y; - - b0a1xb0b1 = CROSS(b0a1, b0b1); - b0b1xb0a0 = CROSS(b0b1, b0a0); - a1b1xa1a0 = CROSS(a1b1, a1a0); - // For clarity, we want CROSS(a1a0,a1b0) here but we have b0a1 instead of a1b0 - // so use -CROSS(a1b0,b0a1) instead, which is the same. - a1a0xa1b0 = -CROSS(a1a0, b0a1); - - return ((b0a1xb0b1 > 0 && b0b1xb0a0 > 0) || - (b0a1xb0b1 < 0 && b0b1xb0a0 < 0)) && - ((a1b1xa1a0 > 0 && a1a0xa1b0 > 0) || (a1b1xa1a0 < 0 && a1a0xa1b0 < 0)); -} - -// Consume the circular list of EDGEPTs to make a TESSLINE. -TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) { - TESSLINE* result = new TESSLINE; - result->loop = outline; - if (outline->src_outline != nullptr) { - // ASSUMPTION: This function is only ever called from ApproximateOutline - // and therefore either all points have a src_outline or all do not. - // Just as SetupFromPos sets the vectors from the vertices, setup the - // step_count members to indicate the (positive) number of original - // C_OUTLINE steps to the next vertex. - EDGEPT* pt = outline; - do { - pt->step_count = pt->next->start_step - pt->start_step; - if (pt->step_count < 0) pt->step_count += pt->src_outline->pathlength(); - pt = pt->next; - } while (pt != outline); - } - result->SetupFromPos(); - return result; -} - -// Copies the data and the outline, but leaves next untouched. -void TESSLINE::CopyFrom(const TESSLINE& src) { - Clear(); - topleft = src.topleft; - botright = src.botright; - start = src.start; - is_hole = src.is_hole; - if (src.loop != nullptr) { - EDGEPT* prevpt = nullptr; - EDGEPT* newpt = nullptr; - EDGEPT* srcpt = src.loop; - do { - newpt = new EDGEPT(*srcpt); - if (prevpt == nullptr) { - loop = newpt; - } else { - newpt->prev = prevpt; - prevpt->next = newpt; - } - prevpt = newpt; - srcpt = srcpt->next; - } while (srcpt != src.loop); - loop->prev = newpt; - newpt->next = loop; - } -} - -// Deletes owned data. -void TESSLINE::Clear() { - if (loop == nullptr) return; - - EDGEPT* this_edge = loop; - do { - EDGEPT* next_edge = this_edge->next; - delete this_edge; - this_edge = next_edge; - } while (this_edge != loop); - loop = nullptr; -} - -// Normalize in-place using the DENORM. -void TESSLINE::Normalize(const DENORM& denorm) { - EDGEPT* pt = loop; - do { - denorm.LocalNormTransform(pt->pos, &pt->pos); - pt = pt->next; - } while (pt != loop); - SetupFromPos(); -} - -// Rotates by the given rotation in place. -void TESSLINE::Rotate(const FCOORD rot) { - EDGEPT* pt = loop; - do { - int tmp = static_cast( - floor(pt->pos.x * rot.x() - pt->pos.y * rot.y() + 0.5)); - pt->pos.y = static_cast( - floor(pt->pos.y * rot.x() + pt->pos.x * rot.y() + 0.5)); - pt->pos.x = tmp; - pt = pt->next; - } while (pt != loop); - SetupFromPos(); -} - -// Moves by the given vec in place. -void TESSLINE::Move(const ICOORD vec) { - EDGEPT* pt = loop; - do { - pt->pos.x += vec.x(); - pt->pos.y += vec.y(); - pt = pt->next; - } while (pt != loop); - SetupFromPos(); -} - -// Scales by the given factor in place. -void TESSLINE::Scale(float factor) { - EDGEPT* pt = loop; - do { - pt->pos.x = static_cast(floor(pt->pos.x * factor + 0.5)); - pt->pos.y = static_cast(floor(pt->pos.y * factor + 0.5)); - pt = pt->next; - } while (pt != loop); - SetupFromPos(); -} - -// Sets up the start and vec members of the loop from the pos members. -void TESSLINE::SetupFromPos() { - EDGEPT* pt = loop; - do { - pt->vec.x = pt->next->pos.x - pt->pos.x; - pt->vec.y = pt->next->pos.y - pt->pos.y; - pt = pt->next; - } while (pt != loop); - start = pt->pos; - ComputeBoundingBox(); -} - -// Recomputes the bounding box from the points in the loop. -void TESSLINE::ComputeBoundingBox() { - int minx = INT32_MAX; - int miny = INT32_MAX; - int maxx = -INT32_MAX; - int maxy = -INT32_MAX; - - // Find boundaries. - start = loop->pos; - EDGEPT* this_edge = loop; - do { - if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) { - if (this_edge->pos.x < minx) minx = this_edge->pos.x; - if (this_edge->pos.y < miny) miny = this_edge->pos.y; - if (this_edge->pos.x > maxx) maxx = this_edge->pos.x; - if (this_edge->pos.y > maxy) maxy = this_edge->pos.y; - } - this_edge = this_edge->next; - } while (this_edge != loop); - // Reset bounds. - topleft.x = minx; - topleft.y = maxy; - botright.x = maxx; - botright.y = miny; -} - -// Computes the min and max cross product of the outline points with the -// given vec and returns the results in min_xp and max_xp. Geometrically -// this is the left and right edge of the outline perpendicular to the -// given direction, but to get the distance units correct, you would -// have to divide by the modulus of vec. -void TESSLINE::MinMaxCrossProduct(const TPOINT vec, int* min_xp, - int* max_xp) const { - *min_xp = INT32_MAX; - *max_xp = INT32_MIN; - EDGEPT* this_edge = loop; - do { - if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) { - int product = CROSS(this_edge->pos, vec); - UpdateRange(product, min_xp, max_xp); - } - this_edge = this_edge->next; - } while (this_edge != loop); -} - -TBOX TESSLINE::bounding_box() const { - return TBOX(topleft.x, botright.y, botright.x, topleft.y); -} - -#ifndef GRAPHICS_DISABLED -void TESSLINE::plot(ScrollView* window, ScrollView::Color color, - ScrollView::Color child_color) { - if (is_hole) - window->Pen(child_color); - else - window->Pen(color); - window->SetCursor(start.x, start.y); - EDGEPT* pt = loop; - do { - bool prev_hidden = pt->IsHidden(); - pt = pt->next; - if (prev_hidden) - window->SetCursor(pt->pos.x, pt->pos.y); - else - window->DrawTo(pt->pos.x, pt->pos.y); - } while (pt != loop); -} -#endif // GRAPHICS_DISABLED - -// Returns the first non-hidden EDGEPT that has a different src_outline to -// its predecessor, or, if all the same, the lowest indexed point. -EDGEPT* TESSLINE::FindBestStartPt() const { - EDGEPT* best_start = loop; - int best_step = loop->start_step; - // Iterate the polygon. - EDGEPT* pt = loop; - do { - if (pt->IsHidden()) continue; - if (pt->prev->IsHidden() || pt->prev->src_outline != pt->src_outline) - return pt; // Qualifies as the best. - if (pt->start_step < best_step) { - best_step = pt->start_step; - best_start = pt; - } - } while ((pt = pt->next) != loop); - return best_start; -} - -// Iterate the given list of outlines, converting to TESSLINE by polygonal -// approximation and recursively any children, returning the current tail -// of the resulting list of TESSLINEs. -static TESSLINE** ApproximateOutlineList(bool allow_detailed_fx, - C_OUTLINE_LIST* outlines, - bool children, TESSLINE** tail) { - C_OUTLINE_IT ol_it(outlines); - for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) { - C_OUTLINE* outline = ol_it.data(); - if (outline->pathlength() > 0) { - TESSLINE* tessline = ApproximateOutline(allow_detailed_fx, outline); - tessline->is_hole = children; - *tail = tessline; - tail = &tessline->next; - } - if (!outline->child()->empty()) { - tail = ApproximateOutlineList(allow_detailed_fx, outline->child(), true, - tail); - } - } - return tail; -} - -// Factory to build a TBLOB from a C_BLOB with polygonal approximation along -// the way. If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB -// contain pointers to the input C_OUTLINEs that enable higher-resolution -// feature extraction that does not use the polygonal approximation. -TBLOB* TBLOB::PolygonalCopy(bool allow_detailed_fx, C_BLOB* src) { - TBLOB* tblob = new TBLOB; - ApproximateOutlineList(allow_detailed_fx, src->out_list(), false, - &tblob->outlines); - return tblob; -} - -// Factory builds a blob with no outlines, but copies the other member data. -TBLOB* TBLOB::ShallowCopy(const TBLOB& src) { - TBLOB* blob = new TBLOB; - blob->denorm_ = src.denorm_; - return blob; -} - -// Normalizes the blob for classification only if needed. -// (Normally this means a non-zero classify rotation.) -// If no Normalization is needed, then nullptr is returned, and the input blob -// can be used directly. Otherwise a new TBLOB is returned which must be -// deleted after use. -TBLOB* TBLOB::ClassifyNormalizeIfNeeded() const { - TBLOB* rotated_blob = nullptr; - // If necessary, copy the blob and rotate it. The rotation is always - // +/- 90 degrees, as 180 was already taken care of. - if (denorm_.block() != nullptr && - denorm_.block()->classify_rotation().y() != 0.0) { - TBOX box = bounding_box(); - int x_middle = (box.left() + box.right()) / 2; - int y_middle = (box.top() + box.bottom()) / 2; - rotated_blob = new TBLOB(*this); - const FCOORD& rotation = denorm_.block()->classify_rotation(); - // Move the rotated blob back to the same y-position so that we - // can still distinguish similar glyphs with differeny y-position. - float target_y = - kBlnBaselineOffset + - (rotation.y() > 0 ? x_middle - box.left() : box.right() - x_middle); - rotated_blob->Normalize(nullptr, &rotation, &denorm_, x_middle, y_middle, - 1.0f, 1.0f, 0.0f, target_y, denorm_.inverse(), - denorm_.pix()); - } - return rotated_blob; -} - -// Copies the data and the outline, but leaves next untouched. -void TBLOB::CopyFrom(const TBLOB& src) { - Clear(); - TESSLINE* prev_outline = nullptr; - for (TESSLINE* srcline = src.outlines; srcline != nullptr; - srcline = srcline->next) { - TESSLINE* new_outline = new TESSLINE(*srcline); - if (outlines == nullptr) - outlines = new_outline; - else - prev_outline->next = new_outline; - prev_outline = new_outline; - } - denorm_ = src.denorm_; -} - -// Deletes owned data. -void TBLOB::Clear() { - for (TESSLINE* next_outline = nullptr; outlines != nullptr; - outlines = next_outline) { - next_outline = outlines->next; - delete outlines; - } -} - -// Sets up the built-in DENORM and normalizes the blob in-place. -// For parameters see DENORM::SetupNormalization, plus the inverse flag for -// this blob and the Pix for the full image. -void TBLOB::Normalize(const BLOCK* block, const FCOORD* rotation, - const DENORM* predecessor, float x_origin, float y_origin, - float x_scale, float y_scale, float final_xshift, - float final_yshift, bool inverse, Pix* pix) { - denorm_.SetupNormalization(block, rotation, predecessor, x_origin, y_origin, - x_scale, y_scale, final_xshift, final_yshift); - denorm_.set_inverse(inverse); - denorm_.set_pix(pix); - // TODO(rays) outline->Normalize is more accurate, but breaks tests due - // the changes it makes. Reinstate this code with a retraining. - // The reason this change is troublesome is that it normalizes for the - // baseline value computed independently at each x-coord. If the baseline - // is not horizontal, this introduces shear into the normalized blob, which - // is useful on the rare occasions that the baseline is really curved, but - // the baselines need to be stabilized the rest of the time. -#if 0 - for (TESSLINE* outline = outlines; outline != nullptr; outline = outline->next) { - outline->Normalize(denorm_); - } -#else - denorm_.LocalNormBlob(this); -#endif -} - -// Rotates by the given rotation in place. -void TBLOB::Rotate(const FCOORD rotation) { - for (TESSLINE* outline = outlines; outline != nullptr; - outline = outline->next) { - outline->Rotate(rotation); - } -} - -// Moves by the given vec in place. -void TBLOB::Move(const ICOORD vec) { - for (TESSLINE* outline = outlines; outline != nullptr; - outline = outline->next) { - outline->Move(vec); - } -} - -// Scales by the given factor in place. -void TBLOB::Scale(float factor) { - for (TESSLINE* outline = outlines; outline != nullptr; - outline = outline->next) { - outline->Scale(factor); - } -} - -// Recomputes the bounding boxes of the outlines. -void TBLOB::ComputeBoundingBoxes() { - for (TESSLINE* outline = outlines; outline != nullptr; - outline = outline->next) { - outline->ComputeBoundingBox(); - } -} - -// Returns the number of outlines. -int TBLOB::NumOutlines() const { - int result = 0; - for (TESSLINE* outline = outlines; outline != nullptr; - outline = outline->next) - ++result; - return result; -} - -/********************************************************************** - * TBLOB::bounding_box() - * - * Compute the bounding_box of a compound blob, defined to be the - * bounding box of the union of all top-level outlines in the blob. - **********************************************************************/ -TBOX TBLOB::bounding_box() const { - if (outlines == nullptr) return TBOX(0, 0, 0, 0); - TESSLINE* outline = outlines; - TBOX box = outline->bounding_box(); - for (outline = outline->next; outline != nullptr; outline = outline->next) { - box += outline->bounding_box(); - } - return box; -} - -// Finds and deletes any duplicate outlines in this blob, without deleting -// their EDGEPTs. -void TBLOB::EliminateDuplicateOutlines() { - for (TESSLINE* outline = outlines; outline != nullptr; - outline = outline->next) { - TESSLINE* last_outline = outline; - for (TESSLINE* other_outline = outline->next; other_outline != nullptr; - last_outline = other_outline, other_outline = other_outline->next) { - if (outline->SameBox(*other_outline)) { - last_outline->next = other_outline->next; - // This doesn't leak - the outlines share the EDGEPTs. - other_outline->loop = nullptr; - delete other_outline; - other_outline = last_outline; - // If it is part of a cut, then it can't be a hole any more. - outline->is_hole = false; - } - } - } -} - -// Swaps the outlines of *this and next if needed to keep the centers in -// increasing x. -void TBLOB::CorrectBlobOrder(TBLOB* next) { - TBOX box = bounding_box(); - TBOX next_box = next->bounding_box(); - if (box.x_middle() > next_box.x_middle()) { - Swap(&outlines, &next->outlines); - } -} - -#ifndef GRAPHICS_DISABLED -void TBLOB::plot(ScrollView* window, ScrollView::Color color, - ScrollView::Color child_color) { - for (TESSLINE* outline = outlines; outline != nullptr; - outline = outline->next) - outline->plot(window, color, child_color); -} -#endif // GRAPHICS_DISABLED - -// Computes the center of mass and second moments for the old baseline and -// 2nd moment normalizations. Returns the outline length. -// The input denorm should be the normalizations that have been applied from -// the image to the current state of this TBLOB. -int TBLOB::ComputeMoments(FCOORD* center, FCOORD* second_moments) const { - // Compute 1st and 2nd moments of the original outline. - LLSQ accumulator; - TBOX box = bounding_box(); - // Iterate the outlines, accumulating edges relative the box.botleft(). - CollectEdges(box, nullptr, &accumulator, nullptr, nullptr); - *center = accumulator.mean_point() + box.botleft(); - // The 2nd moments are just the standard deviation of the point positions. - double x2nd = sqrt(accumulator.x_variance()); - double y2nd = sqrt(accumulator.y_variance()); - if (x2nd < 1.0) x2nd = 1.0; - if (y2nd < 1.0) y2nd = 1.0; - second_moments->set_x(x2nd); - second_moments->set_y(y2nd); - return accumulator.count(); -} - -// Computes the precise bounding box of the coords that are generated by -// GetEdgeCoords. This may be different from the bounding box of the polygon. -void TBLOB::GetPreciseBoundingBox(TBOX* precise_box) const { - TBOX box = bounding_box(); - *precise_box = TBOX(); - CollectEdges(box, precise_box, nullptr, nullptr, nullptr); - precise_box->move(box.botleft()); -} - -// Adds edges to the given vectors. -// For all the edge steps in all the outlines, or polygonal approximation -// where there are no edge steps, collects the steps into x_coords/y_coords. -// x_coords is a collection of the x-coords of vertical edges for each -// y-coord starting at box.bottom(). -// y_coords is a collection of the y-coords of horizontal edges for each -// x-coord starting at box.left(). -// Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. -// Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. -void TBLOB::GetEdgeCoords(const TBOX& box, - GenericVector >* x_coords, - GenericVector >* y_coords) const { - GenericVector empty; - x_coords->init_to_size(box.height(), empty); - y_coords->init_to_size(box.width(), empty); - CollectEdges(box, nullptr, nullptr, x_coords, y_coords); - // Sort the output vectors. - for (int i = 0; i < x_coords->size(); ++i) (*x_coords)[i].sort(); - for (int i = 0; i < y_coords->size(); ++i) (*y_coords)[i].sort(); -} - -// Accumulates the segment between pt1 and pt2 in the LLSQ, quantizing over -// the integer coordinate grid to properly weight long vectors. -static void SegmentLLSQ(const FCOORD& pt1, const FCOORD& pt2, - LLSQ* accumulator) { - FCOORD step(pt2); - step -= pt1; - int xstart = IntCastRounded(std::min(pt1.x(), pt2.x())); - int xend = IntCastRounded(std::max(pt1.x(), pt2.x())); - int ystart = IntCastRounded(std::min(pt1.y(), pt2.y())); - int yend = IntCastRounded(std::max(pt1.y(), pt2.y())); - if (xstart == xend && ystart == yend) return; // Nothing to do. - double weight = step.length() / (xend - xstart + yend - ystart); - // Compute and save the y-position at the middle of each x-step. - for (int x = xstart; x < xend; ++x) { - double y = pt1.y() + step.y() * (x + 0.5 - pt1.x()) / step.x(); - accumulator->add(x + 0.5, y, weight); - } - // Compute and save the x-position at the middle of each y-step. - for (int y = ystart; y < yend; ++y) { - double x = pt1.x() + step.x() * (y + 0.5 - pt1.y()) / step.y(); - accumulator->add(x, y + 0.5, weight); - } -} - -// Adds any edges from a single segment of outline between pt1 and pt2 to -// the x_coords, y_coords vectors. pt1 and pt2 should be relative to the -// bottom-left of the bounding box, hence indices to x_coords, y_coords -// are clipped to ([0,x_limit], [0,y_limit]). -// See GetEdgeCoords above for a description of x_coords, y_coords. -static void SegmentCoords(const FCOORD& pt1, const FCOORD& pt2, int x_limit, - int y_limit, - GenericVector >* x_coords, - GenericVector >* y_coords) { - FCOORD step(pt2); - step -= pt1; - int start = - ClipToRange(IntCastRounded(std::min(pt1.x(), pt2.x())), 0, x_limit); - int end = ClipToRange(IntCastRounded(std::max(pt1.x(), pt2.x())), 0, x_limit); - for (int x = start; x < end; ++x) { - int y = IntCastRounded(pt1.y() + step.y() * (x + 0.5 - pt1.x()) / step.x()); - (*y_coords)[x].push_back(y); - } - start = ClipToRange(IntCastRounded(std::min(pt1.y(), pt2.y())), 0, y_limit); - end = ClipToRange(IntCastRounded(std::max(pt1.y(), pt2.y())), 0, y_limit); - for (int y = start; y < end; ++y) { - int x = IntCastRounded(pt1.x() + step.x() * (y + 0.5 - pt1.y()) / step.y()); - (*x_coords)[y].push_back(x); - } -} - -// Adds any edges from a single segment of outline between pt1 and pt2 to -// the bbox such that it guarantees to contain anything produced by -// SegmentCoords. -static void SegmentBBox(const FCOORD& pt1, const FCOORD& pt2, TBOX* bbox) { - FCOORD step(pt2); - step -= pt1; - int x1 = IntCastRounded(std::min(pt1.x(), pt2.x())); - int x2 = IntCastRounded(std::max(pt1.x(), pt2.x())); - if (x2 > x1) { - int y1 = - IntCastRounded(pt1.y() + step.y() * (x1 + 0.5 - pt1.x()) / step.x()); - int y2 = - IntCastRounded(pt1.y() + step.y() * (x2 - 0.5 - pt1.x()) / step.x()); - TBOX point(x1, std::min(y1, y2), x2, std::max(y1, y2)); - *bbox += point; - } - int y1 = IntCastRounded(std::min(pt1.y(), pt2.y())); - int y2 = IntCastRounded(std::max(pt1.y(), pt2.y())); - if (y2 > y1) { - int x1 = - IntCastRounded(pt1.x() + step.x() * (y1 + 0.5 - pt1.y()) / step.y()); - int x2 = - IntCastRounded(pt1.x() + step.x() * (y2 - 0.5 - pt1.y()) / step.y()); - TBOX point(std::min(x1, x2), y1, std::max(x1, x2), y2); - *bbox += point; - } -} - -// Collects edges into the given bounding box, LLSQ accumulator and/or x_coords, -// y_coords vectors. -// For a description of x_coords/y_coords, see GetEdgeCoords above. -// Startpt to lastpt, inclusive, MUST have the same src_outline member, -// which may be nullptr. The vector from lastpt to its next is included in -// the accumulation. Hidden edges should be excluded by the caller. -// The input denorm should be the normalizations that have been applied from -// the image to the current state of the TBLOB from which startpt, lastpt come. -// box is the bounding box of the blob from which the EDGEPTs are taken and -// indices into x_coords, y_coords are offset by box.botleft(). -static void CollectEdgesOfRun(const EDGEPT* startpt, const EDGEPT* lastpt, - const DENORM& denorm, const TBOX& box, - TBOX* bounding_box, LLSQ* accumulator, - GenericVector >* x_coords, - GenericVector >* y_coords) { - const C_OUTLINE* outline = startpt->src_outline; - int x_limit = box.width() - 1; - int y_limit = box.height() - 1; - if (outline != nullptr) { - // Use higher-resolution edge points stored on the outline. - // The outline coordinates may not match the binary image because of the - // rotation for vertical text lines, but the root_denorm IS the matching - // start of the DENORM chain. - const DENORM* root_denorm = denorm.RootDenorm(); - int step_length = outline->pathlength(); - int start_index = startpt->start_step; - // Note that if this run straddles the wrap-around point of the outline, - // that lastpt->start_step may have a lower index than startpt->start_step, - // and we want to use an end_index that allows us to use a positive - // increment, so we add step_length if necessary, but that may be beyond the - // bounds of the outline steps/ due to wrap-around, so we use % step_length - // everywhere, except for start_index. - int end_index = lastpt->start_step + lastpt->step_count; - if (end_index <= start_index) end_index += step_length; - // pos is the integer coordinates of the binary image steps. - ICOORD pos = outline->position_at_index(start_index); - FCOORD origin(box.left(), box.bottom()); - // f_pos is a floating-point version of pos that offers improved edge - // positioning using greyscale information or smoothing of edge steps. - FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, start_index); - // pos_normed is f_pos after the appropriate normalization, and relative - // to origin. - // prev_normed is the previous value of pos_normed. - FCOORD prev_normed; - denorm.NormTransform(root_denorm, f_pos, &prev_normed); - prev_normed -= origin; - for (int index = start_index; index < end_index; ++index) { - ICOORD step = outline->step(index % step_length); - // Only use the point if its edge strength is positive. This excludes - // points that don't provide useful information, eg - // ___________ - // |___________ - // The vertical step provides only noisy, damaging information, as even - // with a greyscale image, the positioning of the edge there may be a - // fictitious extrapolation, so previous processing has eliminated it. - if (outline->edge_strength_at_index(index % step_length) > 0) { - FCOORD f_pos = - outline->sub_pixel_pos_at_index(pos, index % step_length); - FCOORD pos_normed; - denorm.NormTransform(root_denorm, f_pos, &pos_normed); - pos_normed -= origin; - // Accumulate the information that is selected by the caller. - if (bounding_box != nullptr) { - SegmentBBox(pos_normed, prev_normed, bounding_box); - } - if (accumulator != nullptr) { - SegmentLLSQ(pos_normed, prev_normed, accumulator); - } - if (x_coords != nullptr && y_coords != nullptr) { - SegmentCoords(pos_normed, prev_normed, x_limit, y_limit, x_coords, - y_coords); - } - prev_normed = pos_normed; - } - pos += step; - } - } else { - // There is no outline, so we are forced to use the polygonal approximation. - const EDGEPT* endpt = lastpt->next; - const EDGEPT* pt = startpt; - do { - FCOORD next_pos(pt->next->pos.x - box.left(), - pt->next->pos.y - box.bottom()); - FCOORD pos(pt->pos.x - box.left(), pt->pos.y - box.bottom()); - if (bounding_box != nullptr) { - SegmentBBox(next_pos, pos, bounding_box); - } - if (accumulator != nullptr) { - SegmentLLSQ(next_pos, pos, accumulator); - } - if (x_coords != nullptr && y_coords != nullptr) { - SegmentCoords(next_pos, pos, x_limit, y_limit, x_coords, y_coords); - } - } while ((pt = pt->next) != endpt); - } -} - -// For all the edge steps in all the outlines, or polygonal approximation -// where there are no edge steps, collects the steps into the bounding_box, -// llsq and/or the x_coords/y_coords. Both are used in different kinds of -// normalization. -// For a description of x_coords, y_coords, see GetEdgeCoords above. -void TBLOB::CollectEdges(const TBOX& box, TBOX* bounding_box, LLSQ* llsq, - GenericVector >* x_coords, - GenericVector >* y_coords) const { - // Iterate the outlines. - for (const TESSLINE* ol = outlines; ol != nullptr; ol = ol->next) { - // Iterate the polygon. - EDGEPT* loop_pt = ol->FindBestStartPt(); - EDGEPT* pt = loop_pt; - if (pt == nullptr) continue; - do { - if (pt->IsHidden()) continue; - // Find a run of equal src_outline. - EDGEPT* last_pt = pt; - do { - last_pt = last_pt->next; - } while (last_pt != loop_pt && !last_pt->IsHidden() && - last_pt->src_outline == pt->src_outline); - last_pt = last_pt->prev; - CollectEdgesOfRun(pt, last_pt, denorm_, box, bounding_box, llsq, x_coords, - y_coords); - pt = last_pt; - } while ((pt = pt->next) != loop_pt); - } -} - -// Factory to build a TWERD from a (C_BLOB) WERD, with polygonal -// approximation along the way. -TWERD* TWERD::PolygonalCopy(bool allow_detailed_fx, WERD* src) { - TWERD* tessword = new TWERD; - tessword->latin_script = src->flag(W_SCRIPT_IS_LATIN); - C_BLOB_IT b_it(src->cblob_list()); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - C_BLOB* blob = b_it.data(); - TBLOB* tblob = TBLOB::PolygonalCopy(allow_detailed_fx, blob); - tessword->blobs.push_back(tblob); - } - return tessword; -} - -// Baseline normalizes the blobs in-place, recording the normalization in the -// DENORMs in the blobs. -void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, - bool inverse, float x_height, float baseline_shift, - bool numeric_mode, tesseract::OcrEngineMode hint, - const TBOX* norm_box, DENORM* word_denorm) { - TBOX word_box = bounding_box(); - if (norm_box != nullptr) word_box = *norm_box; - float word_middle = (word_box.left() + word_box.right()) / 2.0f; - float input_y_offset = 0.0f; - float final_y_offset = static_cast(kBlnBaselineOffset); - float scale = kBlnXHeight / x_height; - if (row == nullptr) { - word_middle = word_box.left(); - input_y_offset = word_box.bottom(); - final_y_offset = 0.0f; - } else { - input_y_offset = row->base_line(word_middle) + baseline_shift; - } - for (int b = 0; b < blobs.size(); ++b) { - TBLOB* blob = blobs[b]; - TBOX blob_box = blob->bounding_box(); - float mid_x = (blob_box.left() + blob_box.right()) / 2.0f; - float baseline = input_y_offset; - float blob_scale = scale; - if (numeric_mode) { - baseline = blob_box.bottom(); - blob_scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()), - scale, scale * 1.5f); - } else if (row != nullptr) { - baseline = row->base_line(mid_x) + baseline_shift; - } - // The image will be 8-bit grey if the input was grey or color. Note that in - // a grey image 0 is black and 255 is white. If the input was binary, then - // the pix will be binary and 0 is white, with 1 being black. - // To tell the difference pixGetDepth() will return 8 or 1. - // The inverse flag will be true iff the word has been determined to be - // white on black, and is independent of whether the pix is 8 bit or 1 bit. - blob->Normalize(block, nullptr, nullptr, word_middle, baseline, blob_scale, - blob_scale, 0.0f, final_y_offset, inverse, pix); - } - if (word_denorm != nullptr) { - word_denorm->SetupNormalization(block, nullptr, nullptr, word_middle, - input_y_offset, scale, scale, 0.0f, - final_y_offset); - word_denorm->set_inverse(inverse); - word_denorm->set_pix(pix); - } -} - -// Copies the data and the blobs, but leaves next untouched. -void TWERD::CopyFrom(const TWERD& src) { - Clear(); - latin_script = src.latin_script; - for (int b = 0; b < src.blobs.size(); ++b) { - TBLOB* new_blob = new TBLOB(*src.blobs[b]); - blobs.push_back(new_blob); - } -} - -// Deletes owned data. -void TWERD::Clear() { - blobs.delete_data_pointers(); - blobs.clear(); -} - -// Recomputes the bounding boxes of the blobs. -void TWERD::ComputeBoundingBoxes() { - for (int b = 0; b < blobs.size(); ++b) { - blobs[b]->ComputeBoundingBoxes(); - } -} - -TBOX TWERD::bounding_box() const { - TBOX result; - for (int b = 0; b < blobs.size(); ++b) { - TBOX box = blobs[b]->bounding_box(); - result += box; - } - return result; -} - -// Merges the blobs from start to end, not including end, and deletes -// the blobs between start and end. -void TWERD::MergeBlobs(int start, int end) { - if (start >= blobs.size() - 1) return; // Nothing to do. - TESSLINE* outline = blobs[start]->outlines; - for (int i = start + 1; i < end && i < blobs.size(); ++i) { - TBLOB* next_blob = blobs[i]; - // Take the outlines from the next blob. - if (outline == nullptr) { - blobs[start]->outlines = next_blob->outlines; - outline = blobs[start]->outlines; - } else { - while (outline->next != nullptr) outline = outline->next; - outline->next = next_blob->outlines; - next_blob->outlines = nullptr; - } - // Delete the next blob and move on. - delete next_blob; - blobs[i] = nullptr; - } - // Remove dead blobs from the vector. - for (int i = start + 1; i < end && start + 1 < blobs.size(); ++i) { - blobs.remove(start + 1); - } -} - -#ifndef GRAPHICS_DISABLED -void TWERD::plot(ScrollView* window) { - ScrollView::Color color = WERD::NextColor(ScrollView::BLACK); - for (int b = 0; b < blobs.size(); ++b) { - blobs[b]->plot(window, color, ScrollView::BROWN); - color = WERD::NextColor(color); - } -} -#endif // GRAPHICS_DISABLED - -/********************************************************************** - * divisible_blob - * - * Returns true if the blob contains multiple outlines than can be - * separated using divide_blobs. Sets the location to be used in the - * call to divide_blobs. - **********************************************************************/ -bool divisible_blob(TBLOB* blob, bool italic_blob, TPOINT* location) { - if (blob->outlines == nullptr || blob->outlines->next == nullptr) - return false; // Need at least 2 outlines for it to be possible. - int max_gap = 0; - TPOINT vertical = - italic_blob ? kDivisibleVerticalItalic : kDivisibleVerticalUpright; - for (TESSLINE* outline1 = blob->outlines; outline1 != nullptr; - outline1 = outline1->next) { - if (outline1->is_hole) continue; // Holes do not count as separable. - TPOINT mid_pt1( - static_cast((outline1->topleft.x + outline1->botright.x) / 2), - static_cast((outline1->topleft.y + outline1->botright.y) / 2)); - int mid_prod1 = CROSS(mid_pt1, vertical); - int min_prod1, max_prod1; - outline1->MinMaxCrossProduct(vertical, &min_prod1, &max_prod1); - for (TESSLINE* outline2 = outline1->next; outline2 != nullptr; - outline2 = outline2->next) { - if (outline2->is_hole) continue; // Holes do not count as separable. - TPOINT mid_pt2(static_cast( - (outline2->topleft.x + outline2->botright.x) / 2), - static_cast( - (outline2->topleft.y + outline2->botright.y) / 2)); - int mid_prod2 = CROSS(mid_pt2, vertical); - int min_prod2, max_prod2; - outline2->MinMaxCrossProduct(vertical, &min_prod2, &max_prod2); - int mid_gap = abs(mid_prod2 - mid_prod1); - int overlap = - std::min(max_prod1, max_prod2) - std::max(min_prod1, min_prod2); - if (mid_gap - overlap / 4 > max_gap) { - max_gap = mid_gap - overlap / 4; - *location = mid_pt1; - *location += mid_pt2; - *location /= 2; - } - } - } - // Use the y component of the vertical vector as an approximation to its - // length. - return max_gap > vertical.y; -} - -/********************************************************************** - * divide_blobs - * - * Create two blobs by grouping the outlines in the appropriate blob. - * The outlines that are beyond the location point are moved to the - * other blob. The ones whose x location is less than that point are - * retained in the original blob. - **********************************************************************/ -void divide_blobs(TBLOB* blob, TBLOB* other_blob, bool italic_blob, - const TPOINT& location) { - TPOINT vertical = - italic_blob ? kDivisibleVerticalItalic : kDivisibleVerticalUpright; - TESSLINE* outline1 = nullptr; - TESSLINE* outline2 = nullptr; - - TESSLINE* outline = blob->outlines; - blob->outlines = nullptr; - int location_prod = CROSS(location, vertical); - - while (outline != nullptr) { - TPOINT mid_pt( - static_cast((outline->topleft.x + outline->botright.x) / 2), - static_cast((outline->topleft.y + outline->botright.y) / 2)); - int mid_prod = CROSS(mid_pt, vertical); - if (mid_prod < location_prod) { - // Outline is in left blob. - if (outline1) - outline1->next = outline; - else - blob->outlines = outline; - outline1 = outline; - } else { - // Outline is in right blob. - if (outline2) - outline2->next = outline; - else - other_blob->outlines = outline; - outline2 = outline; - } - outline = outline->next; - } - - if (outline1) outline1->next = nullptr; - if (outline2) outline2->next = nullptr; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blobs.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blobs.h deleted file mode 100644 index de34b23e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blobs.h +++ /dev/null @@ -1,456 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: blobs.h (Formerly blobs.h) - * Description: Blob definition - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 27 15:39:52 1989 - * Modified: Thu Mar 28 15:33:38 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1989, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ - -#ifndef BLOBS_H -#define BLOBS_H - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include // for int16_t -#include // for memcpy, memset -#include "clst.h" // for CLIST_ITERATOR, CLISTIZEH -#include "genericvector.h" // for GenericVector -#include "normalis.h" // for DENORM -#include "points.h" // for FCOORD, ICOORD -#include "publictypes.h" // for OcrEngineMode -#include "rect.h" // for TBOX -#include "scrollview.h" // for ScrollView, ScrollView::Color -#include "vecfuncs.h" // for CROSS - -class BLOCK; -class C_BLOB; -class C_OUTLINE; -class LLSQ; -class ROW; -class WERD; - -struct Pix; - -/*---------------------------------------------------------------------- - T y p e s -----------------------------------------------------------------------*/ -#define EDGEPTFLAGS 4 /*concavity,length etc. */ - -struct TPOINT { - TPOINT(): x(0), y(0) {} - TPOINT(int16_t vx, int16_t vy) : x(vx), y(vy) {} - TPOINT(const ICOORD &ic) : x(ic.x()), y(ic.y()) {} - - void operator+=(const TPOINT& other) { - x += other.x; - y += other.y; - } - void operator/=(int divisor) { - x /= divisor; - y /= divisor; - } - bool operator==(const TPOINT& other) const { - return x == other.x && y == other.y; - } - // Returns true when the two line segments cross each other. - // (Moved from outlines.cpp). - static bool IsCrossed(const TPOINT& a0, const TPOINT& a1, const TPOINT& b0, - const TPOINT& b1); - - int16_t x; // absolute x coord. - int16_t y; // absolute y coord. -}; -using VECTOR = TPOINT; // structure for coordinates. - -struct EDGEPT { - EDGEPT() - : next(nullptr), prev(nullptr), src_outline(nullptr), start_step(0), step_count(0) { - memset(flags, 0, EDGEPTFLAGS * sizeof(flags[0])); - } - EDGEPT(const EDGEPT& src) : next(nullptr), prev(nullptr) { - CopyFrom(src); - } - EDGEPT& operator=(const EDGEPT& src) { - CopyFrom(src); - return *this; - } - // Copies the data elements, but leaves the pointers untouched. - void CopyFrom(const EDGEPT& src) { - pos = src.pos; - vec = src.vec; - memcpy(flags, src.flags, EDGEPTFLAGS * sizeof(flags[0])); - src_outline = src.src_outline; - start_step = src.start_step; - step_count = src.step_count; - } - // Returns the squared distance between the points, with the x-component - // weighted by x_factor. - int WeightedDistance(const EDGEPT& other, int x_factor) const { - int x_dist = pos.x - other.pos.x; - int y_dist = pos.y - other.pos.y; - return x_dist * x_dist * x_factor + y_dist * y_dist; - } - // Returns true if the positions are equal. - bool EqualPos(const EDGEPT& other) const { return pos == other.pos; } - // Returns the bounding box of the outline segment from *this to *end. - // Ignores hidden edge flags. - TBOX SegmentBox(const EDGEPT* end) const { - TBOX box(pos.x, pos.y, pos.x, pos.y); - const EDGEPT* pt = this; - do { - pt = pt->next; - if (pt->pos.x < box.left()) box.set_left(pt->pos.x); - if (pt->pos.x > box.right()) box.set_right(pt->pos.x); - if (pt->pos.y < box.bottom()) box.set_bottom(pt->pos.y); - if (pt->pos.y > box.top()) box.set_top(pt->pos.y); - } while (pt != end && pt != this); - return box; - } - // Returns the area of the outline segment from *this to *end. - // Ignores hidden edge flags. - int SegmentArea(const EDGEPT* end) const { - int area = 0; - const EDGEPT* pt = this->next; - do { - TPOINT origin_vec(pt->pos.x - pos.x, pt->pos.y - pos.y); - area += CROSS(origin_vec, pt->vec); - pt = pt->next; - } while (pt != end && pt != this); - return area; - } - // Returns true if the number of points in the outline segment from *this to - // *end is less that min_points and false if we get back to *this first. - // Ignores hidden edge flags. - bool ShortNonCircularSegment(int min_points, const EDGEPT* end) const { - int count = 0; - const EDGEPT* pt = this; - do { - if (pt == end) return true; - pt = pt->next; - ++count; - } while (pt != this && count <= min_points); - return false; - } - - // Accessors to hide or reveal a cut edge from feature extractors. - void Hide() { - flags[0] = true; - } - void Reveal() { - flags[0] = false; - } - bool IsHidden() const { - return flags[0] != 0; - } - void MarkChop() { - flags[2] = true; - } - bool IsChopPt() const { - return flags[2] != 0; - } - - TPOINT pos; // position - VECTOR vec; // vector to next point - // TODO(rays) Remove flags and replace with - // is_hidden, runlength, dir, and fixed. The only use - // of the flags other than is_hidden is in polyaprx.cpp. - char flags[EDGEPTFLAGS]; // concavity, length etc - EDGEPT* next; // anticlockwise element - EDGEPT* prev; // clockwise element - C_OUTLINE* src_outline; // Outline it came from. - // The following fields are not used if src_outline is nullptr. - int start_step; // Location of pos in src_outline. - int step_count; // Number of steps used (may wrap around). -}; - -// For use in chop and findseam to keep a list of which EDGEPTs were inserted. -CLISTIZEH(EDGEPT) - -struct TESSLINE { - TESSLINE() : is_hole(false), loop(nullptr), next(nullptr) {} - TESSLINE(const TESSLINE& src) : loop(nullptr), next(nullptr) { - CopyFrom(src); - } - ~TESSLINE() { - Clear(); - } - TESSLINE& operator=(const TESSLINE& src) { - CopyFrom(src); - return *this; - } - // Consume the circular list of EDGEPTs to make a TESSLINE. - static TESSLINE* BuildFromOutlineList(EDGEPT* outline); - // Copies the data and the outline, but leaves next untouched. - void CopyFrom(const TESSLINE& src); - // Deletes owned data. - void Clear(); - // Normalize in-place using the DENORM. - void Normalize(const DENORM& denorm); - // Rotates by the given rotation in place. - void Rotate(const FCOORD rotation); - // Moves by the given vec in place. - void Move(const ICOORD vec); - // Scales by the given factor in place. - void Scale(float factor); - // Sets up the start and vec members of the loop from the pos members. - void SetupFromPos(); - // Recomputes the bounding box from the points in the loop. - void ComputeBoundingBox(); - // Computes the min and max cross product of the outline points with the - // given vec and returns the results in min_xp and max_xp. Geometrically - // this is the left and right edge of the outline perpendicular to the - // given direction, but to get the distance units correct, you would - // have to divide by the modulus of vec. - void MinMaxCrossProduct(const TPOINT vec, int* min_xp, int* max_xp) const; - - TBOX bounding_box() const; - // Returns true if *this and other have equal bounding boxes. - bool SameBox(const TESSLINE& other) const { - return topleft == other.topleft && botright == other.botright; - } - // Returns true if the given line segment crosses any outline of this blob. - bool SegmentCrosses(const TPOINT& pt1, const TPOINT& pt2) const { - if (Contains(pt1) && Contains(pt2)) { - EDGEPT* pt = loop; - do { - if (TPOINT::IsCrossed(pt1, pt2, pt->pos, pt->next->pos)) return true; - pt = pt->next; - } while (pt != loop); - } - return false; - } - // Returns true if the point is contained within the outline box. - bool Contains(const TPOINT& pt) const { - return topleft.x <= pt.x && pt.x <= botright.x && - botright.y <= pt.y && pt.y <= topleft.y; - } - - #ifndef GRAPHICS_DISABLED - void plot(ScrollView* window, ScrollView::Color color, - ScrollView::Color child_color); - #endif // GRAPHICS_DISABLED - - // Returns the first outline point that has a different src_outline to its - // predecessor, or, if all the same, the lowest indexed point. - EDGEPT* FindBestStartPt() const; - - - int BBArea() const { - return (botright.x - topleft.x) * (topleft.y - botright.y); - } - - TPOINT topleft; // Top left of loop. - TPOINT botright; // Bottom right of loop. - TPOINT start; // Start of loop. - bool is_hole; // True if this is a hole/child outline. - EDGEPT *loop; // Edgeloop. - TESSLINE *next; // Next outline in blob. -}; // Outline structure. - -struct TBLOB { - TBLOB() : outlines(nullptr) {} - TBLOB(const TBLOB& src) : outlines(nullptr) { - CopyFrom(src); - } - ~TBLOB() { - Clear(); - } - TBLOB& operator=(const TBLOB& src) { - CopyFrom(src); - return *this; - } - // Factory to build a TBLOB from a C_BLOB with polygonal approximation along - // the way. If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB - // contain pointers to the input C_OUTLINEs that enable higher-resolution - // feature extraction that does not use the polygonal approximation. - static TBLOB* PolygonalCopy(bool allow_detailed_fx, C_BLOB* src); - // Factory builds a blob with no outlines, but copies the other member data. - static TBLOB* ShallowCopy(const TBLOB& src); - // Normalizes the blob for classification only if needed. - // (Normally this means a non-zero classify rotation.) - // If no Normalization is needed, then nullptr is returned, and the input blob - // can be used directly. Otherwise a new TBLOB is returned which must be - // deleted after use. - TBLOB* ClassifyNormalizeIfNeeded() const; - - // Copies the data and the outlines, but leaves next untouched. - void CopyFrom(const TBLOB& src); - // Deletes owned data. - void Clear(); - // Sets up the built-in DENORM and normalizes the blob in-place. - // For parameters see DENORM::SetupNormalization, plus the inverse flag for - // this blob and the Pix for the full image. - void Normalize(const BLOCK* block, - const FCOORD* rotation, - const DENORM* predecessor, - float x_origin, float y_origin, - float x_scale, float y_scale, - float final_xshift, float final_yshift, - bool inverse, Pix* pix); - // Rotates by the given rotation in place. - void Rotate(const FCOORD rotation); - // Moves by the given vec in place. - void Move(const ICOORD vec); - // Scales by the given factor in place. - void Scale(float factor); - // Recomputes the bounding boxes of the outlines. - void ComputeBoundingBoxes(); - - // Returns the number of outlines. - int NumOutlines() const; - - TBOX bounding_box() const; - - // Returns true if the given line segment crosses any outline of this blob. - bool SegmentCrossesOutline(const TPOINT& pt1, const TPOINT& pt2) const { - for (const TESSLINE* outline = outlines; outline != nullptr; - outline = outline->next) { - if (outline->SegmentCrosses(pt1, pt2)) return true; - } - return false; - } - // Returns true if the point is contained within any of the outline boxes. - bool Contains(const TPOINT& pt) const { - for (const TESSLINE* outline = outlines; outline != nullptr; - outline = outline->next) { - if (outline->Contains(pt)) return true; - } - return false; - } - - // Finds and deletes any duplicate outlines in this blob, without deleting - // their EDGEPTs. - void EliminateDuplicateOutlines(); - - // Swaps the outlines of *this and next if needed to keep the centers in - // increasing x. - void CorrectBlobOrder(TBLOB* next); - - const DENORM& denorm() const { - return denorm_; - } - - #ifndef GRAPHICS_DISABLED - void plot(ScrollView* window, ScrollView::Color color, - ScrollView::Color child_color); - #endif // GRAPHICS_DISABLED - - int BBArea() const { - int total_area = 0; - for (TESSLINE* outline = outlines; outline != nullptr; outline = outline->next) - total_area += outline->BBArea(); - return total_area; - } - - // Computes the center of mass and second moments for the old baseline and - // 2nd moment normalizations. Returns the outline length. - // The input denorm should be the normalizations that have been applied from - // the image to the current state of this TBLOB. - int ComputeMoments(FCOORD* center, FCOORD* second_moments) const; - // Computes the precise bounding box of the coords that are generated by - // GetEdgeCoords. This may be different from the bounding box of the polygon. - void GetPreciseBoundingBox(TBOX* precise_box) const; - // Adds edges to the given vectors. - // For all the edge steps in all the outlines, or polygonal approximation - // where there are no edge steps, collects the steps into x_coords/y_coords. - // x_coords is a collection of the x-coords of vertical edges for each - // y-coord starting at box.bottom(). - // y_coords is a collection of the y-coords of horizontal edges for each - // x-coord starting at box.left(). - // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. - // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. - void GetEdgeCoords(const TBOX& box, - GenericVector >* x_coords, - GenericVector >* y_coords) const; - - TESSLINE *outlines; // List of outlines in blob. - - private: // TODO(rays) Someday the data members will be private too. - // For all the edge steps in all the outlines, or polygonal approximation - // where there are no edge steps, collects the steps into the bounding_box, - // llsq and/or the x_coords/y_coords. Both are used in different kinds of - // normalization. - // For a description of x_coords, y_coords, see GetEdgeCoords above. - void CollectEdges(const TBOX& box, - TBOX* bounding_box, LLSQ* llsq, - GenericVector >* x_coords, - GenericVector >* y_coords) const; - - private: - // DENORM indicating the transformations that this blob has undergone so far. - DENORM denorm_; -}; // Blob structure. - -struct TWERD { - TWERD() : latin_script(false) {} - TWERD(const TWERD& src) { - CopyFrom(src); - } - ~TWERD() { - Clear(); - } - TWERD& operator=(const TWERD& src) { - CopyFrom(src); - return *this; - } - // Factory to build a TWERD from a (C_BLOB) WERD, with polygonal - // approximation along the way. - static TWERD* PolygonalCopy(bool allow_detailed_fx, WERD* src); - // Baseline normalizes the blobs in-place, recording the normalization in the - // DENORMs in the blobs. - void BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, bool inverse, - float x_height, float baseline_shift, bool numeric_mode, - tesseract::OcrEngineMode hint, - const TBOX* norm_box, - DENORM* word_denorm); - // Copies the data and the blobs, but leaves next untouched. - void CopyFrom(const TWERD& src); - // Deletes owned data. - void Clear(); - // Recomputes the bounding boxes of the blobs. - void ComputeBoundingBoxes(); - - // Returns the number of blobs in the word. - int NumBlobs() const { - return blobs.size(); - } - TBOX bounding_box() const; - - // Merges the blobs from start to end, not including end, and deletes - // the blobs between start and end. - void MergeBlobs(int start, int end); - - void plot(ScrollView* window); - - GenericVector blobs; // Blobs in word. - bool latin_script; // This word is in a latin-based script. -}; - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -// TODO(rays) Make divisible_blob and divide_blobs members of TBLOB. -bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location); - -void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, - const TPOINT& location); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blread.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blread.cpp deleted file mode 100644 index 4b7012b0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blread.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/********************************************************************** - * File: blread.cpp (Formerly pdread.c) - * Description: Friend function of BLOCK to read the uscan pd file. - * Author: Ray Smith - * Created: Mon Mar 18 14:39:00 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "blread.h" -#include // for fclose, fopen, FILE -#include "host.h" // for TRUE -#include "ocrblock.h" // for BLOCK_IT, BLOCK, BLOCK_LIST (ptr only) -#include "scanutils.h" // for tfscanf - -#define UNLV_EXT ".uzn" // unlv zone file - -/********************************************************************** - * read_unlv_file - * - * Read a whole unlv zone file to make a list of blocks. - **********************************************************************/ - -bool read_unlv_file( //print list of sides - STRING name, //basename of file - int32_t xsize, //image size - int32_t ysize, //image size - BLOCK_LIST *blocks //output list - ) { - FILE *pdfp; //file pointer - BLOCK *block; //current block - int x; //current top-down coords - int y; - int width; //of current block - int height; - BLOCK_IT block_it = blocks; //block iterator - - name += UNLV_EXT; //add extension - if ((pdfp = fopen (name.string (), "rb")) == nullptr) { - return false; //didn't read one - } else { - while (tfscanf(pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) { - //make rect block - block = new BLOCK (name.string (), TRUE, 0, 0, - (int16_t) x, (int16_t) (ysize - y - height), - (int16_t) (x + width), (int16_t) (ysize - y)); - //on end of list - block_it.add_to_end (block); - } - fclose(pdfp); - } - return true; -} - -void FullPageBlock(int width, int height, BLOCK_LIST *blocks) { - BLOCK_IT block_it(blocks); - BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height); - block_it.add_to_end(block); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blread.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blread.h deleted file mode 100644 index 210c3010..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/blread.h +++ /dev/null @@ -1,36 +0,0 @@ -/********************************************************************** - * File: blread.h (Formerly pdread.h) - * Description: Friend function of BLOCK to read the uscan pd file. - * Author: Ray Smith - * Created: Mon Mar 18 14:39:00 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef BLREAD_H -#define BLREAD_H - -#include // for int32_t -#include "strngs.h" // for STRING - -class BLOCK_LIST; - -bool read_unlv_file( //print list of sides - STRING name, //basename of file - int32_t xsize, //image size - int32_t ysize, //image size - BLOCK_LIST *blocks //output list - ); -void FullPageBlock(int width, int height, BLOCK_LIST *blocks); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/boxread.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/boxread.cpp deleted file mode 100644 index b9bbf1da..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/boxread.cpp +++ /dev/null @@ -1,241 +0,0 @@ -/********************************************************************** - * File: boxread.cpp - * Description: Read data from a box file. - * Author: Ray Smith - * Created: Fri Aug 24 17:47:23 PDT 2007 - * - * (C) Copyright 2007, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "boxread.h" -#include // for strchr, strcmp, strrchr -#include "errcode.h" // for ERRCODE, TESSEXIT -#include "fileerr.h" // for CANTOPENFILE -#include "genericvector.h" // for GenericVector -#include "helpers.h" // for chomp_string -#include "rect.h" // for TBOX -#include "strngs.h" // for STRING -#include "tprintf.h" // for tprintf -#include "unichar.h" // for UNICHAR - -// Special char code used to identify multi-blob labels. -static const char* kMultiBlobLabelCode = "WordStr"; - -// Open the boxfile based on the given image filename. -FILE* OpenBoxFile(const STRING& fname) { - STRING filename = BoxFileName(fname); - FILE* box_file = nullptr; - if (!(box_file = fopen(filename.string(), "rb"))) { - CANTOPENFILE.error("read_next_box", TESSEXIT, "Can't open box file %s", - filename.string()); - } - return box_file; -} - -// Reads all boxes from the given filename. -// Reads a specific target_page number if >= 0, or all pages otherwise. -// Skips blanks if skip_blanks is true. -// The UTF-8 label of the box is put in texts, and the full box definition as -// a string is put in box_texts, with the corresponding page number in pages. -// Each of the output vectors is optional (may be nullptr). -// Returns false if no boxes are found. -bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename, - GenericVector* boxes, - GenericVector* texts, - GenericVector* box_texts, - GenericVector* pages) { - GenericVector box_data; - if (!tesseract::LoadDataFromFile(BoxFileName(filename), &box_data)) - return false; - // Convert the array of bytes to a string, so it can be used by the parser. - box_data.push_back('\0'); - return ReadMemBoxes(target_page, skip_blanks, &box_data[0], - /*continue_on_failure*/ true, boxes, texts, box_texts, - pages); -} - -// Reads all boxes from the string. Otherwise, as ReadAllBoxes. -bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data, - bool continue_on_failure, - GenericVector* boxes, - GenericVector* texts, - GenericVector* box_texts, - GenericVector* pages) { - STRING box_str(box_data); - GenericVector lines; - box_str.split('\n', &lines); - if (lines.empty()) return false; - int num_boxes = 0; - for (int i = 0; i < lines.size(); ++i) { - int page = 0; - STRING utf8_str; - TBOX box; - if (!ParseBoxFileStr(lines[i].string(), &page, &utf8_str, &box)) { - if (continue_on_failure) - continue; - else - return false; - } - if (skip_blanks && (utf8_str == " " || utf8_str == "\t")) continue; - if (target_page >= 0 && page != target_page) continue; - if (boxes != nullptr) boxes->push_back(box); - if (texts != nullptr) texts->push_back(utf8_str); - if (box_texts != nullptr) { - STRING full_text; - MakeBoxFileStr(utf8_str.string(), box, target_page, &full_text); - box_texts->push_back(full_text); - } - if (pages != nullptr) pages->push_back(page); - ++num_boxes; - } - return num_boxes > 0; -} - -// Returns the box file name corresponding to the given image_filename. -STRING BoxFileName(const STRING& image_filename) { - STRING box_filename = image_filename; - const char *lastdot = strrchr(box_filename.string(), '.'); - if (lastdot != nullptr) - box_filename.truncate_at(lastdot - box_filename.string()); - - box_filename += ".box"; - return box_filename; -} - -// TODO(rays) convert all uses of ReadNextBox to use the new ReadAllBoxes. -// Box files are used ONLY DURING TRAINING, but by both processes of -// creating tr files with tesseract, and unicharset_extractor. -// ReadNextBox factors out the code to interpret a line of a box -// file so that applybox and unicharset_extractor interpret the same way. -// This function returns the next valid box file utf8 string and coords -// and returns true, or false on eof (and closes the file). -// It ignores the utf8 file signature ByteOrderMark (U+FEFF=EF BB BF), checks -// for valid utf-8 and allows space or tab between fields. -// utf8_str is set with the unichar string, and bounding box with the box. -// If there are page numbers in the file, it reads them all. -bool ReadNextBox(int *line_number, FILE* box_file, - STRING* utf8_str, TBOX* bounding_box) { - return ReadNextBox(-1, line_number, box_file, utf8_str, bounding_box); -} - -// As ReadNextBox above, but get a specific page number. (0-based) -// Use -1 to read any page number. Files without page number all -// read as if they are page 0. -bool ReadNextBox(int target_page, int *line_number, FILE* box_file, - STRING* utf8_str, TBOX* bounding_box) { - int page = 0; - char buff[kBoxReadBufSize]; // boxfile read buffer - char *buffptr = buff; - - while (fgets(buff, sizeof(buff) - 1, box_file)) { - (*line_number)++; - - buffptr = buff; - const unsigned char *ubuf = reinterpret_cast(buffptr); - if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) - buffptr += 3; // Skip unicode file designation. - // Check for blank lines in box file - if (*buffptr == '\n' || *buffptr == '\0') continue; - // Skip blank boxes. - if (*buffptr == ' ' || *buffptr == '\t') continue; - if (*buffptr != '\0') { - if (!ParseBoxFileStr(buffptr, &page, utf8_str, bounding_box)) { - tprintf("Box file format error on line %i; ignored\n", *line_number); - continue; - } - if (target_page >= 0 && target_page != page) - continue; // Not on the appropriate page. - return true; // Successfully read a box. - } - } - fclose(box_file); - return false; // EOF -} - -// Parses the given box file string into a page_number, utf8_str, and -// bounding_box. Returns true on a successful parse. -// The box file is assumed to contain box definitions, one per line, of the -// following format for blob-level boxes: -// -// and for word/line-level boxes: -// WordStr # -// See applyybox.cpp for more information. -bool ParseBoxFileStr(const char* boxfile_str, int* page_number, - STRING* utf8_str, TBOX* bounding_box) { - *bounding_box = TBOX(); // Initialize it to empty. - *utf8_str = ""; - char uch[kBoxReadBufSize]; - const char *buffptr = boxfile_str; - // Read the unichar without messing up on Tibetan. - // According to issue 253 the utf-8 surrogates 85 and A0 are treated - // as whitespace by sscanf, so it is more reliable to just find - // ascii space and tab. - int uch_len = 0; - // Skip unicode file designation, if present. - const unsigned char *ubuf = reinterpret_cast(buffptr); - if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) - buffptr += 3; - // Allow a single blank as the UTF-8 string. Check for empty string and - // then blindly eat the first character. - if (*buffptr == '\0') return false; - do { - uch[uch_len++] = *buffptr++; - } while (*buffptr != '\0' && *buffptr != ' ' && *buffptr != '\t' && - uch_len < kBoxReadBufSize - 1); - uch[uch_len] = '\0'; - if (*buffptr != '\0') ++buffptr; - int x_min, y_min, x_max, y_max; - *page_number = 0; - int count = sscanf(buffptr, "%d %d %d %d %d", - &x_min, &y_min, &x_max, &y_max, page_number); - if (count != 5 && count != 4) { - tprintf("Bad box coordinates in boxfile string! %s\n", ubuf); - return false; - } - // Test for long space-delimited string label. - if (strcmp(uch, kMultiBlobLabelCode) == 0 && - (buffptr = strchr(buffptr, '#')) != nullptr) { - strncpy(uch, buffptr + 1, kBoxReadBufSize - 1); - uch[kBoxReadBufSize - 1] = '\0'; // Prevent buffer overrun. - chomp_string(uch); - uch_len = strlen(uch); - } - // Validate UTF8 by making unichars with it. - int used = 0; - while (used < uch_len) { - tesseract::UNICHAR ch(uch + used, uch_len - used); - int new_used = ch.utf8_len(); - if (new_used == 0) { - tprintf("Bad UTF-8 str %s starts with 0x%02x at col %d\n", - uch + used, uch[used], used + 1); - return false; - } - used += new_used; - } - *utf8_str = uch; - if (x_min > x_max) Swap(&x_min, &x_max); - if (y_min > y_max) Swap(&y_min, &y_max); - bounding_box->set_to_given_coords(x_min, y_min, x_max, y_max); - return true; // Successfully read a box. -} - -// Creates a box file string from a unichar string, TBOX and page number. -void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num, - STRING* box_str) { - *box_str = unichar_str; - box_str->add_str_int(" ", box.left()); - box_str->add_str_int(" ", box.bottom()); - box_str->add_str_int(" ", box.right()); - box_str->add_str_int(" ", box.top()); - box_str->add_str_int(" ", page_num); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/boxread.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/boxread.h deleted file mode 100644 index 07295fd6..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/boxread.h +++ /dev/null @@ -1,90 +0,0 @@ -/********************************************************************** - * File: boxread.h - * Description: Read data from a box file. - * Author: Ray Smith - * Created: Fri Aug 24 17:47:23 PDT 2007 - * - * (C) Copyright 2007, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_CCUTIL_BOXREAD_H_ -#define TESSERACT_CCUTIL_BOXREAD_H_ - -#include // for FILE -#include "strngs.h" // for STRING - -class TBOX; - -template class GenericVector; -template class GenericVector; - -// Size of buffer used to read a line from a box file. -const int kBoxReadBufSize = 1024; - -// Open the boxfile based on the given image filename. -// Returns nullptr if the box file cannot be opened. -FILE* OpenBoxFile(const STRING& fname); - -// Reads all boxes from the given filename. -// Reads a specific target_page number if >= 0, or all pages otherwise. -// Skips blanks if skip_blanks is true. -// The UTF-8 label of the box is put in texts, and the full box definition as -// a string is put in box_texts, with the corresponding page number in pages. -// Each of the output vectors is optional (may be nullptr). -// Returns false if no boxes are found. -bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename, - GenericVector* boxes, - GenericVector* texts, - GenericVector* box_texts, - GenericVector* pages); - -// Reads all boxes from the string. Otherwise, as ReadAllBoxes. -// continue_on_failure allows reading to continue even if an invalid box is -// encountered and will return true if it succeeds in reading some boxes. -// It otherwise gives up and returns false on encountering an invalid box. -bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data, - bool continue_on_failure, - GenericVector* boxes, - GenericVector* texts, - GenericVector* box_texts, - GenericVector* pages); - -// Returns the box file name corresponding to the given image_filename. -STRING BoxFileName(const STRING& image_filename); - -// ReadNextBox factors out the code to interpret a line of a box -// file so that applybox and unicharset_extractor interpret the same way. -// This function returns the next valid box file utf8 string and coords -// and returns true, or false on eof (and closes the file). -// It ignores the utf8 file signature ByteOrderMark (U+FEFF=EF BB BF), checks -// for valid utf-8 and allows space or tab between fields. -// utf8_str is set with the unichar string, and bounding box with the box. -// If there are page numbers in the file, it reads them all. -bool ReadNextBox(int *line_number, FILE* box_file, - STRING* utf8_str, TBOX* bounding_box); -// As ReadNextBox above, but get a specific page number. (0-based) -// Use -1 to read any page number. Files without page number all -// read as if they are page 0. -bool ReadNextBox(int target_page, int *line_number, FILE* box_file, - STRING* utf8_str, TBOX* bounding_box); - -// Parses the given box file string into a page_number, utf8_str, and -// bounding_box. Returns true on a successful parse. -bool ParseBoxFileStr(const char* boxfile_str, int* page_number, - STRING* utf8_str, TBOX* bounding_box); - -// Creates a box file string from a unichar string, TBOX and page number. -void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num, - STRING* box_str); - -#endif // TESSERACT_CCUTIL_BOXREAD_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/boxword.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/boxword.cpp deleted file mode 100644 index 7d81edc6..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/boxword.cpp +++ /dev/null @@ -1,200 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: boxword.cpp -// Description: Class to represent the bounding boxes of the output. -// Author: Ray Smith -// Created: Tue May 25 14:18:14 PDT 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "blobs.h" -#include "boxword.h" -#include "normalis.h" -#include "ocrblock.h" -#include "pageres.h" - -namespace tesseract { - -// Clip output boxes to input blob boxes for bounds that are within this -// tolerance. Otherwise, the blob may be chopped and we have to just use -// the word bounding box. -const int kBoxClipTolerance = 2; - -BoxWord::BoxWord() : length_(0) { -} - -BoxWord::BoxWord(const BoxWord& src) { - CopyFrom(src); -} - -BoxWord& BoxWord::operator=(const BoxWord& src) { - CopyFrom(src); - return *this; -} - -void BoxWord::CopyFrom(const BoxWord& src) { - bbox_ = src.bbox_; - length_ = src.length_; - boxes_.clear(); - boxes_.reserve(length_); - for (int i = 0; i < length_; ++i) - boxes_.push_back(src.boxes_[i]); -} - -// Factory to build a BoxWord from a TWERD using the DENORMs on each blob to -// switch back to original image coordinates. -BoxWord* BoxWord::CopyFromNormalized(TWERD* tessword) { - BoxWord* boxword = new BoxWord(); - // Count the blobs. - boxword->length_ = tessword->NumBlobs(); - // Allocate memory. - boxword->boxes_.reserve(boxword->length_); - - for (int b = 0; b < boxword->length_; ++b) { - TBLOB* tblob = tessword->blobs[b]; - TBOX blob_box; - for (TESSLINE* outline = tblob->outlines; outline != nullptr; - outline = outline->next) { - EDGEPT* edgept = outline->loop; - // Iterate over the edges. - do { - if (!edgept->IsHidden() || !edgept->prev->IsHidden()) { - ICOORD pos(edgept->pos.x, edgept->pos.y); - TPOINT denormed; - tblob->denorm().DenormTransform(nullptr, edgept->pos, &denormed); - pos.set_x(denormed.x); - pos.set_y(denormed.y); - TBOX pt_box(pos, pos); - blob_box += pt_box; - } - edgept = edgept->next; - } while (edgept != outline->loop); - } - boxword->boxes_.push_back(blob_box); - } - boxword->ComputeBoundingBox(); - return boxword; -} - -// Clean up the bounding boxes from the polygonal approximation by -// expanding slightly, then clipping to the blobs from the original_word -// that overlap. If not null, the block provides the inverse rotation. -void BoxWord::ClipToOriginalWord(const BLOCK* block, WERD* original_word) { - for (int i = 0; i < length_; ++i) { - TBOX box = boxes_[i]; - // Expand by a single pixel, as the poly approximation error is 1 pixel. - box = TBOX(box.left() - 1, box.bottom() - 1, - box.right() + 1, box.top() + 1); - // Now find the original box that matches. - TBOX original_box; - C_BLOB_IT b_it(original_word->cblob_list()); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - TBOX blob_box = b_it.data()->bounding_box(); - if (block != nullptr) - blob_box.rotate(block->re_rotation()); - if (blob_box.major_overlap(box)) { - original_box += blob_box; - } - } - if (!original_box.null_box()) { - if (NearlyEqual(original_box.left(), box.left(), kBoxClipTolerance)) - box.set_left(original_box.left()); - if (NearlyEqual(original_box.right(), box.right(), - kBoxClipTolerance)) - box.set_right(original_box.right()); - if (NearlyEqual(original_box.top(), box.top(), kBoxClipTolerance)) - box.set_top(original_box.top()); - if (NearlyEqual(original_box.bottom(), box.bottom(), - kBoxClipTolerance)) - box.set_bottom(original_box.bottom()); - } - original_box = original_word->bounding_box(); - if (block != nullptr) - original_box.rotate(block->re_rotation()); - boxes_[i] = box.intersection(original_box); - } - ComputeBoundingBox(); -} - -// Merges the boxes from start to end, not including end, and deletes -// the boxes between start and end. -void BoxWord::MergeBoxes(int start, int end) { - start = ClipToRange(start, 0, length_); - end = ClipToRange(end, 0, length_); - if (end <= start + 1) - return; - for (int i = start + 1; i < end; ++i) { - boxes_[start] += boxes_[i]; - } - int shrinkage = end - 1 - start; - length_ -= shrinkage; - for (int i = start + 1; i < length_; ++i) - boxes_[i] = boxes_[i + shrinkage]; - boxes_.truncate(length_); -} - -// Inserts a new box before the given index. -// Recomputes the bounding box. -void BoxWord::InsertBox(int index, const TBOX& box) { - if (index < length_) - boxes_.insert(box, index); - else - boxes_.push_back(box); - length_ = boxes_.size(); - ComputeBoundingBox(); -} - -// Changes the box at the given index to the new box. -// Recomputes the bounding box. -void BoxWord::ChangeBox(int index, const TBOX& box) { - boxes_[index] = box; - ComputeBoundingBox(); -} - -// Deletes the box with the given index, and shuffles up the rest. -// Recomputes the bounding box. -void BoxWord::DeleteBox(int index) { - ASSERT_HOST(0 <= index && index < length_); - boxes_.remove(index); - --length_; - ComputeBoundingBox(); -} - -// Deletes all the boxes stored in BoxWord. -void BoxWord::DeleteAllBoxes() { - length_ = 0; - boxes_.clear(); - bbox_ = TBOX(); -} - -// Computes the bounding box of the word. -void BoxWord::ComputeBoundingBox() { - bbox_ = TBOX(); - for (int i = 0; i < length_; ++i) - bbox_ += boxes_[i]; -} - -// This and other putatively are the same, so call the (permanent) callback -// for each blob index where the bounding boxes match. -// The callback is deleted on completion. -void BoxWord::ProcessMatchedBlobs(const TWERD& other, - TessCallback1* cb) const { - for (int i = 0; i < length_ && i < other.NumBlobs(); ++i) { - TBOX blob_box = other.blobs[i]->bounding_box(); - if (blob_box == boxes_[i]) - cb->Run(i); - } - delete cb; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/boxword.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/boxword.h deleted file mode 100644 index a1e6352c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/boxword.h +++ /dev/null @@ -1,98 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: boxword.h -// Description: Class to represent the bounding boxes of the output. -// Author: Ray Smith -// Created: Tue May 25 14:18:14 PDT 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CSTRUCT_BOXWORD_H_ -#define TESSERACT_CSTRUCT_BOXWORD_H_ - -#include "genericvector.h" // for GenericVector -#include "rect.h" // for TBOX - -class BLOCK; -class WERD; - -struct TWERD; - -template class TessCallback1; - -namespace tesseract { - -// Class to hold an array of bounding boxes for an output word and -// the bounding box of the whole word. -class BoxWord { - public: - BoxWord(); - explicit BoxWord(const BoxWord& src); - ~BoxWord() = default; - - BoxWord& operator=(const BoxWord& src); - - void CopyFrom(const BoxWord& src); - - // Factory to build a BoxWord from a TWERD using the DENORMs on each blob to - // switch back to original image coordinates. - static BoxWord* CopyFromNormalized(TWERD* tessword); - - // Clean up the bounding boxes from the polygonal approximation by - // expanding slightly, then clipping to the blobs from the original_word - // that overlap. If not null, the block provides the inverse rotation. - void ClipToOriginalWord(const BLOCK* block, WERD* original_word); - - // Merges the boxes from start to end, not including end, and deletes - // the boxes between start and end. - void MergeBoxes(int start, int end); - - // Inserts a new box before the given index. - // Recomputes the bounding box. - void InsertBox(int index, const TBOX& box); - - // Changes the box at the given index to the new box. - // Recomputes the bounding box. - void ChangeBox(int index, const TBOX& box); - - // Deletes the box with the given index, and shuffles up the rest. - // Recomputes the bounding box. - void DeleteBox(int index); - - // Deletes all the boxes stored in BoxWord. - void DeleteAllBoxes(); - - // This and other putatively are the same, so call the (permanent) callback - // for each blob index where the bounding boxes match. - // The callback is deleted on completion. - void ProcessMatchedBlobs(const TWERD& other, TessCallback1* cb) const; - - const TBOX& bounding_box() const { - return bbox_; - } - int length() const { return length_; } - const TBOX& BlobBox(int index) const { - return boxes_[index]; - } - - private: - void ComputeBoundingBox(); - - TBOX bbox_; - int length_; - GenericVector boxes_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_CSTRUCT_BOXWORD_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ccstruct.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ccstruct.cpp deleted file mode 100644 index 4d188c41..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ccstruct.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: ccstruct.cpp -// Description: ccstruct class. -// Author: Samuel Charron -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "ccstruct.h" - -namespace tesseract { - -// APPROXIMATIONS of the fractions of the character cell taken by -// the descenders, ascenders, and x-height. -const double CCStruct::kDescenderFraction = 0.25; -const double CCStruct::kXHeightFraction = 0.5; -const double CCStruct::kAscenderFraction = 0.25; -const double CCStruct::kXHeightCapRatio = CCStruct::kXHeightFraction / - (CCStruct::kXHeightFraction + CCStruct::kAscenderFraction); - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -CCStruct::~CCStruct() = default; - -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ccstruct.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ccstruct.h deleted file mode 100644 index 8979ea33..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ccstruct.h +++ /dev/null @@ -1,41 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: ccstruct.h -// Description: ccstruct class. -// Author: Samuel Charron -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H_ -#define TESSERACT_CCSTRUCT_CCSTRUCT_H_ - -#include "cutil_class.h" // for CUtil - -namespace tesseract { -class CCStruct : public CUtil { - public: - CCStruct() = default; - virtual ~CCStruct(); - - // Globally accessible constants. - // APPROXIMATIONS of the fractions of the character cell taken by - // the descenders, ascenders, and x-height. - static const double kDescenderFraction; // = 0.25; - static const double kXHeightFraction; // = 0.5; - static const double kAscenderFraction; // = 0.25; - // Derived value giving the x-height as a fraction of cap-height. - static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender). -}; -} // namespace tesseract - -#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/coutln.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/coutln.cpp deleted file mode 100644 index a1dea5d0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/coutln.cpp +++ /dev/null @@ -1,1052 +0,0 @@ -/********************************************************************** - * File: coutln.cpp (Formerly coutline.c) - * Description: Code for the C_OUTLINE class. - * Author: Ray Smith - * Created: Mon Oct 07 16:01:57 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "coutln.h" -#include // for max, min -#include // for abs -#include // for abs -#include // for memset, memcpy, memmove -#include "allheaders.h" // for pixSetPixel, pixGetData, pixRasterop, pixGe... -#include "arrayaccess.h" // for GET_DATA_BYTE -#include "blobs.h" // for TPOINT -#include "crakedge.h" // for CRACKEDGE -#include "environ.h" // for l_uint32 -#include "errcode.h" // for ASSERT_HOST -#include "helpers.h" // for ClipToRange, IntCastRounded, Modulo -#include "normalis.h" // for DENORM -#include "pix.h" // for Pix (ptr only), PIX_DST, PIX_NOT - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -ELISTIZE (C_OUTLINE) -ICOORD C_OUTLINE::step_coords[4] = { - ICOORD (-1, 0), ICOORD (0, -1), ICOORD (1, 0), ICOORD (0, 1) -}; - -/** - * @name C_OUTLINE::C_OUTLINE - * - * Constructor to build a C_OUTLINE from a CRACKEDGE LOOP. - * @param startpt outline to convert - * @param bot_left bounding box - * @param top_right bounding box - * @param length length of loop - */ - -C_OUTLINE::C_OUTLINE(CRACKEDGE* startpt, ICOORD bot_left, ICOORD top_right, - int16_t length) - : box(bot_left, top_right), start(startpt->pos), offsets(nullptr) { - int16_t stepindex; //index to step - CRACKEDGE *edgept; //current point - - stepcount = length; //no of steps - if (length == 0) { - steps = nullptr; - return; - } - //get memory - steps = (uint8_t *)calloc(step_mem(), 1); - edgept = startpt; - - for (stepindex = 0; stepindex < length; stepindex++) { - //set compact step - set_step (stepindex, edgept->stepdir); - edgept = edgept->next; - } -} - -/** - * @name C_OUTLINE::C_OUTLINE - * - * Constructor to build a C_OUTLINE from a C_OUTLINE_FRAG. - */ -C_OUTLINE::C_OUTLINE ( -//constructor - //steps to copy -ICOORD startpt, DIR128 * new_steps, -int16_t length //length of loop -):start (startpt), offsets(nullptr) { - int8_t dirdiff; //direction difference - DIR128 prevdir; //previous direction - DIR128 dir; //current direction - DIR128 lastdir; //dir of last step - TBOX new_box; //easy bounding - int16_t stepindex; //index to step - int16_t srcindex; //source steps - ICOORD pos; //current position - - pos = startpt; - stepcount = length; // No. of steps. - ASSERT_HOST(length >= 0); - steps = static_cast(calloc(step_mem(), 1)); // Get memory. - - lastdir = new_steps[length - 1]; - prevdir = lastdir; - for (stepindex = 0, srcindex = 0; srcindex < length; - stepindex++, srcindex++) { - new_box = TBOX (pos, pos); - box += new_box; - //copy steps - dir = new_steps[srcindex]; - set_step(stepindex, dir); - dirdiff = dir - prevdir; - pos += step (stepindex); - if ((dirdiff == 64 || dirdiff == -64) && stepindex > 0) { - stepindex -= 2; //cancel there-and-back - prevdir = stepindex >= 0 ? step_dir (stepindex) : lastdir; - } - else - prevdir = dir; - } - ASSERT_HOST (pos.x () == startpt.x () && pos.y () == startpt.y ()); - do { - dirdiff = step_dir (stepindex - 1) - step_dir (0); - if (dirdiff == 64 || dirdiff == -64) { - start += step (0); - stepindex -= 2; //cancel there-and-back - for (int i = 0; i < stepindex; ++i) - set_step(i, step_dir(i + 1)); - } - } - while (stepindex > 1 && (dirdiff == 64 || dirdiff == -64)); - stepcount = stepindex; - ASSERT_HOST (stepcount >= 4); -} - -/** - * @name C_OUTLINE::C_OUTLINE - * - * Constructor to build a C_OUTLINE from a rotation of a C_OUTLINE. - * @param srcline outline to rotate - * @param rotation rotate to coord - */ - -C_OUTLINE::C_OUTLINE(C_OUTLINE* srcline, FCOORD rotation) : offsets(nullptr) { - TBOX new_box; //easy bounding - int16_t stepindex; //index to step - int16_t dirdiff; //direction change - ICOORD pos; //current position - ICOORD prevpos; //previous dest point - - ICOORD destpos; //destination point - int16_t destindex = INT16_MAX; //index to step - DIR128 dir; //coded direction - uint8_t new_step; - - stepcount = srcline->stepcount * 2; - if (stepcount == 0) { - steps = nullptr; - box = srcline->box; - box.rotate(rotation); - return; - } - //get memory - steps = (uint8_t *)calloc(step_mem(), 1); - - for (int iteration = 0; iteration < 2; ++iteration) { - DIR128 round1 = iteration == 0 ? 32 : 0; - DIR128 round2 = iteration != 0 ? 32 : 0; - pos = srcline->start; - prevpos = pos; - prevpos.rotate (rotation); - start = prevpos; - box = TBOX (start, start); - destindex = 0; - for (stepindex = 0; stepindex < srcline->stepcount; stepindex++) { - pos += srcline->step (stepindex); - destpos = pos; - destpos.rotate (rotation); - // tprintf("%i %i %i %i ", destpos.x(), destpos.y(), pos.x(), pos.y()); - while (destpos.x () != prevpos.x () || destpos.y () != prevpos.y ()) { - dir = DIR128 (FCOORD (destpos - prevpos)); - dir += 64; //turn to step style - new_step = dir.get_dir (); - // tprintf(" %i\n", new_step); - if (new_step & 31) { - set_step(destindex++, dir + round1); - prevpos += step(destindex - 1); - if (destindex < 2 - || ((dirdiff = - step_dir (destindex - 1) - step_dir (destindex - 2)) != - -64 && dirdiff != 64)) { - set_step(destindex++, dir + round2); - prevpos += step(destindex - 1); - } else { - prevpos -= step(destindex - 1); - destindex--; - prevpos -= step(destindex - 1); - set_step(destindex - 1, dir + round2); - prevpos += step(destindex - 1); - } - } - else { - set_step(destindex++, dir); - prevpos += step(destindex - 1); - } - while (destindex >= 2 && - ((dirdiff = - step_dir (destindex - 1) - step_dir (destindex - 2)) == -64 || - dirdiff == 64)) { - prevpos -= step(destindex - 1); - prevpos -= step(destindex - 2); - destindex -= 2; // Forget u turn - } - //ASSERT_HOST(prevpos.x() == destpos.x() && prevpos.y() == destpos.y()); - new_box = TBOX (destpos, destpos); - box += new_box; - } - } - ASSERT_HOST (destpos.x () == start.x () && destpos.y () == start.y ()); - dirdiff = step_dir (destindex - 1) - step_dir (0); - while ((dirdiff == 64 || dirdiff == -64) && destindex > 1) { - start += step (0); - destindex -= 2; - for (int i = 0; i < destindex; ++i) - set_step(i, step_dir(i + 1)); - dirdiff = step_dir (destindex - 1) - step_dir (0); - } - if (destindex >= 4) - break; - } - ASSERT_HOST(destindex <= stepcount); - stepcount = destindex; - destpos = start; - for (stepindex = 0; stepindex < stepcount; stepindex++) { - destpos += step (stepindex); - } - ASSERT_HOST (destpos.x () == start.x () && destpos.y () == start.y ()); -} - -// Build a fake outline, given just a bounding box and append to the list. -void C_OUTLINE::FakeOutline(const TBOX& box, C_OUTLINE_LIST* outlines) { - C_OUTLINE_IT ol_it(outlines); - // Make a C_OUTLINE from the bounds. This is a bit of a hack, - // as there is no outline, just a bounding box, but it works nicely. - CRACKEDGE start; - start.pos = box.topleft(); - C_OUTLINE* outline = new C_OUTLINE(&start, box.topleft(), box.botright(), 0); - ol_it.add_to_end(outline); -} - -/** - * @name C_OUTLINE::area - * - * Compute the area of the outline. - */ - -int32_t C_OUTLINE::area() const { - int stepindex; //current step - int32_t total_steps; //steps to do - int32_t total; //total area - ICOORD pos; //position of point - ICOORD next_step; //step to next pix - // We aren't going to modify the list, or its contents, but there is - // no const iterator. - C_OUTLINE_IT it(const_cast(&children)); - - pos = start_pos (); - total_steps = pathlength (); - total = 0; - for (stepindex = 0; stepindex < total_steps; stepindex++) { - //all intersected - next_step = step (stepindex); - if (next_step.x () < 0) - total += pos.y (); - else if (next_step.x () > 0) - total -= pos.y (); - pos += next_step; - } - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - total += it.data ()->area ();//add areas of children - - return total; -} - -/** - * @name C_OUTLINE::perimeter - * - * Compute the perimeter of the outline and its first level children. - */ - -int32_t C_OUTLINE::perimeter() const { - int32_t total_steps; // Return value. - // We aren't going to modify the list, or its contents, but there is - // no const iterator. - C_OUTLINE_IT it(const_cast(&children)); - - total_steps = pathlength(); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) - total_steps += it.data()->pathlength(); // Add perimeters of children. - - return total_steps; -} - -/** - * @name C_OUTLINE::outer_area - * - * Compute the area of the outline. - */ - -int32_t C_OUTLINE::outer_area() const { - int stepindex; //current step - int32_t total_steps; //steps to do - int32_t total; //total area - ICOORD pos; //position of point - ICOORD next_step; //step to next pix - - pos = start_pos (); - total_steps = pathlength (); - if (total_steps == 0) - return box.area(); - total = 0; - for (stepindex = 0; stepindex < total_steps; stepindex++) { - //all intersected - next_step = step (stepindex); - if (next_step.x () < 0) - total += pos.y (); - else if (next_step.x () > 0) - total -= pos.y (); - pos += next_step; - } - - return total; -} - -/** - * @name C_OUTLINE::count_transitions - * - * Compute the number of x and y maxes and mins in the outline. - * @param threshold winding number on size - */ - -int32_t C_OUTLINE::count_transitions(int32_t threshold) { - bool first_was_max_x; //what was first - bool first_was_max_y; - bool looking_for_max_x; //what is next - bool looking_for_min_x; - bool looking_for_max_y; //what is next - bool looking_for_min_y; - int stepindex; //current step - int32_t total_steps; //steps to do - //current limits - int32_t max_x, min_x, max_y, min_y; - int32_t initial_x, initial_y; //initial limits - int32_t total; //total changes - ICOORD pos; //position of point - ICOORD next_step; //step to next pix - - pos = start_pos(); - total_steps = pathlength(); - total = 0; - max_x = min_x = pos.x(); - max_y = min_y = pos.y(); - looking_for_max_x = true; - looking_for_min_x = true; - looking_for_max_y = true; - looking_for_min_y = true; - first_was_max_x = false; - first_was_max_y = false; - initial_x = pos.x(); - initial_y = pos.y(); //stop uninit warning - for (stepindex = 0; stepindex < total_steps; stepindex++) { - //all intersected - next_step = step(stepindex); - pos += next_step; - if (next_step.x() < 0) { - if (looking_for_max_x && pos.x() < min_x) - min_x = pos.x(); - if (looking_for_min_x && max_x - pos.x() > threshold) { - if (looking_for_max_x) { - initial_x = max_x; - first_was_max_x = false; - } - total++; - looking_for_max_x = true; - looking_for_min_x = false; - min_x = pos.x(); //reset min - } - } - else if (next_step.x() > 0) { - if (looking_for_min_x && pos.x() > max_x) - max_x = pos.x(); - if (looking_for_max_x && pos.x() - min_x > threshold) { - if (looking_for_min_x) { - initial_x = min_x; //remember first min - first_was_max_x = true; - } - total++; - looking_for_max_x = false; - looking_for_min_x = true; - max_x = pos.x(); - } - } - else if (next_step.y() < 0) { - if (looking_for_max_y && pos.y() < min_y) - min_y = pos.y(); - if (looking_for_min_y && max_y - pos.y() > threshold) { - if (looking_for_max_y) { - initial_y = max_y; //remember first max - first_was_max_y = false; - } - total++; - looking_for_max_y = true; - looking_for_min_y = false; - min_y = pos.y(); //reset min - } - } - else { - if (looking_for_min_y && pos.y() > max_y) - max_y = pos.y(); - if (looking_for_max_y && pos.y() - min_y > threshold) { - if (looking_for_min_y) { - initial_y = min_y; //remember first min - first_was_max_y = true; - } - total++; - looking_for_max_y = false; - looking_for_min_y = true; - max_y = pos.y(); - } - } - - } - if (first_was_max_x && looking_for_min_x) { - if (max_x - initial_x > threshold) - total++; - else - total--; - } - else if (!first_was_max_x && looking_for_max_x) { - if (initial_x - min_x > threshold) - total++; - else - total--; - } - if (first_was_max_y && looking_for_min_y) { - if (max_y - initial_y > threshold) - total++; - else - total--; - } - else if (!first_was_max_y && looking_for_max_y) { - if (initial_y - min_y > threshold) - total++; - else - total--; - } - - return total; -} - -/** - * @name C_OUTLINE::operator< - * - * @return TRUE if the left operand is inside the right one. - * @param other other outline - */ - -bool -C_OUTLINE::operator<(const C_OUTLINE& other) const { - int16_t count = 0; //winding count - ICOORD pos; //position of point - int32_t stepindex; //index to cstep - - if (!box.overlap (other.box)) - return false; //can't be contained - if (stepcount == 0) - return other.box.contains(this->box); - - pos = start; - for (stepindex = 0; stepindex < stepcount - && (count = other.winding_number (pos)) == INTERSECTING; stepindex++) - pos += step (stepindex); //try all points - if (count == INTERSECTING) { - //all intersected - pos = other.start; - for (stepindex = 0; stepindex < other.stepcount - && (count = winding_number (pos)) == INTERSECTING; stepindex++) - //try other way round - pos += other.step (stepindex); - return count == INTERSECTING || count == 0; - } - return count != 0; -} - -/** - * @name C_OUTLINE::winding_number - * - * @return the winding number of the outline around the given point. - * @param point point to wind around - */ - -int16_t C_OUTLINE::winding_number(ICOORD point) const { - int16_t stepindex; //index to cstep - int16_t count; //winding count - ICOORD vec; //to current point - ICOORD stepvec; //step vector - int32_t cross; //cross product - - vec = start - point; //vector to it - count = 0; - for (stepindex = 0; stepindex < stepcount; stepindex++) { - stepvec = step (stepindex); //get the step - //crossing the line - if (vec.y () <= 0 && vec.y () + stepvec.y () > 0) { - cross = vec * stepvec; //cross product - if (cross > 0) - count++; //crossing right half - else if (cross == 0) - return INTERSECTING; //going through point - } - else if (vec.y () > 0 && vec.y () + stepvec.y () <= 0) { - cross = vec * stepvec; - if (cross < 0) - count--; //crossing back - else if (cross == 0) - return INTERSECTING; //illegal - } - vec += stepvec; //sum vectors - } - return count; //winding number -} - -/** - * C_OUTLINE::turn_direction - * - * @return the sum direction delta of the outline. - */ - -int16_t C_OUTLINE::turn_direction() const { //winding number - DIR128 prevdir; //previous direction - DIR128 dir; //current direction - int16_t stepindex; //index to cstep - int8_t dirdiff; //direction difference - int16_t count; //winding count - - if (stepcount == 0) - return 128; - count = 0; - prevdir = step_dir (stepcount - 1); - for (stepindex = 0; stepindex < stepcount; stepindex++) { - dir = step_dir (stepindex); - dirdiff = dir - prevdir; - ASSERT_HOST (dirdiff == 0 || dirdiff == 32 || dirdiff == -32); - count += dirdiff; - prevdir = dir; - } - ASSERT_HOST (count == 128 || count == -128); - return count; //winding number -} - -/** - * @name C_OUTLINE::reverse - * - * Reverse the direction of an outline. - */ - -void C_OUTLINE::reverse() { //reverse drection - DIR128 halfturn = MODULUS / 2; //amount to shift - DIR128 stepdir; //direction of step - int16_t stepindex; //index to cstep - int16_t farindex; //index to other side - int16_t halfsteps; //half of stepcount - - halfsteps = (stepcount + 1) / 2; - for (stepindex = 0; stepindex < halfsteps; stepindex++) { - farindex = stepcount - stepindex - 1; - stepdir = step_dir (stepindex); - set_step (stepindex, step_dir (farindex) + halfturn); - set_step (farindex, stepdir + halfturn); - } -} - -/** - * @name C_OUTLINE::move - * - * Move C_OUTLINE by vector - * @param vec vector to reposition OUTLINE by - */ - -void C_OUTLINE::move(const ICOORD vec) { - C_OUTLINE_IT it(&children); // iterator - - box.move (vec); - start += vec; - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - it.data ()->move (vec); // move child outlines -} - -/** - * Returns true if *this and its children are legally nested. - * The outer area of a child should have the opposite sign to the - * parent. If not, it means we have discarded an outline in between - * (probably due to excessive length). - */ -bool C_OUTLINE::IsLegallyNested() const { - if (stepcount == 0) return true; - int64_t parent_area = outer_area(); - // We aren't going to modify the list, or its contents, but there is - // no const iterator. - C_OUTLINE_IT child_it(const_cast(&children)); - for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) { - const C_OUTLINE* child = child_it.data(); - if (child->outer_area() * parent_area > 0 || !child->IsLegallyNested()) - return false; - } - return true; -} - -/** - * If this outline is smaller than the given min_size, delete this and - * remove from its list, via *it, after checking that *it points to this. - * Otherwise, if any children of this are too small, delete them. - * On entry, *it must be an iterator pointing to this. If this gets deleted - * then this is extracted from *it, so an iteration can continue. - * @param min_size minimum size for outline - * @param it outline iterator - */ -void C_OUTLINE::RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it) { - if (box.width() < min_size || box.height() < min_size) { - ASSERT_HOST(this == it->data()); - delete it->extract(); // Too small so get rid of it and any children. - } else if (!children.empty()) { - // Search the children of this, deleting any that are too small. - C_OUTLINE_IT child_it(&children); - for (child_it.mark_cycle_pt(); !child_it.cycled_list(); - child_it.forward()) { - C_OUTLINE* child = child_it.data(); - child->RemoveSmallRecursive(min_size, &child_it); - } - } -} - -// Factored out helpers below are used only by ComputeEdgeOffsets to operate -// on data from an 8-bit Pix, and assume that any input x and/or y are already -// constrained to be legal Pix coordinates. - -/** - * Helper computes the local 2-D gradient (dx, dy) from the 2x2 cell centered - * on the given (x,y). If the cell would go outside the image, it is padded - * with white. - */ -static void ComputeGradient(const l_uint32* data, int wpl, - int x, int y, int width, int height, - ICOORD* gradient) { - const l_uint32* line = data + y * wpl; - int pix_x_y = x < width && y < height ? GET_DATA_BYTE(line, x) : 255; - int pix_x_prevy = x < width && y > 0 ? GET_DATA_BYTE(line - wpl, x) : 255; - int pix_prevx_prevy = x > 0 && y > 0 ? GET_DATA_BYTE(line - wpl, x - 1) : 255; - int pix_prevx_y = x > 0 && y < height ? GET_DATA_BYTE(line, x - 1) : 255; - gradient->set_x(pix_x_y + pix_x_prevy - (pix_prevx_y + pix_prevx_prevy)); - gradient->set_y(pix_x_prevy + pix_prevx_prevy - (pix_x_y + pix_prevx_y)); -} - -/** - * Helper evaluates a vertical difference, (x,y) - (x,y-1), returning true if - * the difference, matches diff_sign and updating the best_diff, best_sum, - * best_y if a new max. - */ -static bool EvaluateVerticalDiff(const l_uint32* data, int wpl, int diff_sign, - int x, int y, int height, - int* best_diff, int* best_sum, int* best_y) { - if (y <= 0 || y >= height) - return false; - const l_uint32* line = data + y * wpl; - int pixel1 = GET_DATA_BYTE(line - wpl, x); - int pixel2 = GET_DATA_BYTE(line, x); - int diff = (pixel2 - pixel1) * diff_sign; - if (diff > *best_diff) { - *best_diff = diff; - *best_sum = pixel1 + pixel2; - *best_y = y; - } - return diff > 0; -} - -/** - * Helper evaluates a horizontal difference, (x,y) - (x-1,y), where y is implied - * by the input image line, returning true if the difference matches diff_sign - * and updating the best_diff, best_sum, best_x if a new max. - */ -static bool EvaluateHorizontalDiff(const l_uint32* line, int diff_sign, - int x, int width, - int* best_diff, int* best_sum, int* best_x) { - if (x <= 0 || x >= width) - return false; - int pixel1 = GET_DATA_BYTE(line, x - 1); - int pixel2 = GET_DATA_BYTE(line, x); - int diff = (pixel2 - pixel1) * diff_sign; - if (diff > *best_diff) { - *best_diff = diff; - *best_sum = pixel1 + pixel2; - *best_x = x; - } - return diff > 0; -} - -/** - * Adds sub-pixel resolution EdgeOffsets for the outline if the supplied - * pix is 8-bit. Does nothing otherwise. - * Operation: Consider the following near-horizontal line: - * @verbatim - * _________ - * |________ - * |________ - * @endverbatim - * At *every* position along this line, the gradient direction will be close - * to vertical. Extrapoaltion/interpolation of the position of the threshold - * that was used to binarize the image gives a more precise vertical position - * for each horizontal step, and the conflict in step direction and gradient - * direction can be used to ignore the vertical steps. - */ -void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix* pix) { - if (pixGetDepth(pix) != 8) return; - const l_uint32* data = pixGetData(pix); - int wpl = pixGetWpl(pix); - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - bool negative = flag(COUT_INVERSE); - delete [] offsets; - offsets = new EdgeOffset[stepcount]; - ICOORD pos = start; - ICOORD prev_gradient; - ComputeGradient(data, wpl, pos.x(), height - pos.y(), width, height, - &prev_gradient); - for (int s = 0; s < stepcount; ++s) { - ICOORD step_vec = step(s); - TPOINT pt1(pos); - pos += step_vec; - TPOINT pt2(pos); - ICOORD next_gradient; - ComputeGradient(data, wpl, pos.x(), height - pos.y(), width, height, - &next_gradient); - // Use the sum of the prev and next as the working gradient. - ICOORD gradient = prev_gradient + next_gradient; - // best_diff will be manipulated to be always positive. - int best_diff = 0; - // offset will be the extrapolation of the location of the greyscale - // threshold from the edge with the largest difference, relative to the - // location of the binary edge. - int offset = 0; - if (pt1.y == pt2.y && abs(gradient.y()) * 2 >= abs(gradient.x())) { - // Horizontal step. diff_sign == 1 indicates black above. - int diff_sign = (pt1.x > pt2.x) == negative ? 1 : -1; - int x = std::min(pt1.x, pt2.x); - int y = height - pt1.y; - int best_sum = 0; - int best_y = y; - EvaluateVerticalDiff(data, wpl, diff_sign, x, y, height, - &best_diff, &best_sum, &best_y); - // Find the strongest edge. - int test_y = y; - do { - ++test_y; - } while (EvaluateVerticalDiff(data, wpl, diff_sign, x, test_y, height, - &best_diff, &best_sum, &best_y)); - test_y = y; - do { - --test_y; - } while (EvaluateVerticalDiff(data, wpl, diff_sign, x, test_y, height, - &best_diff, &best_sum, &best_y)); - offset = diff_sign * (best_sum / 2 - threshold) + - (y - best_y) * best_diff; - } else if (pt1.x == pt2.x && abs(gradient.x()) * 2 >= abs(gradient.y())) { - // Vertical step. diff_sign == 1 indicates black on the left. - int diff_sign = (pt1.y > pt2.y) == negative ? 1 : -1; - int x = pt1.x; - int y = height - std::max(pt1.y, pt2.y); - const l_uint32* line = pixGetData(pix) + y * wpl; - int best_sum = 0; - int best_x = x; - EvaluateHorizontalDiff(line, diff_sign, x, width, - &best_diff, &best_sum, &best_x); - // Find the strongest edge. - int test_x = x; - do { - ++test_x; - } while (EvaluateHorizontalDiff(line, diff_sign, test_x, width, - &best_diff, &best_sum, &best_x)); - test_x = x; - do { - --test_x; - } while (EvaluateHorizontalDiff(line, diff_sign, test_x, width, - &best_diff, &best_sum, &best_x)); - offset = diff_sign * (threshold - best_sum / 2) + - (best_x - x) * best_diff; - } - offsets[s].offset_numerator = - ClipToRange(offset, -INT8_MAX, INT8_MAX); - offsets[s].pixel_diff = ClipToRange(best_diff, 0, UINT8_MAX); - if (negative) gradient = -gradient; - // Compute gradient angle quantized to 256 directions, rotated by 64 (pi/2) - // to convert from gradient direction to edge direction. - offsets[s].direction = - Modulo(FCOORD::binary_angle_plus_pi(gradient.angle()) + 64, 256); - prev_gradient = next_gradient; - } -} - -/** - * Adds sub-pixel resolution EdgeOffsets for the outline using only - * a binary image source. - * - * Runs a sliding window of 5 edge steps over the outline, maintaining a count - * of the number of steps in each of the 4 directions in the window, and a - * sum of the x or y position of each step (as appropriate to its direction.) - * Ignores single-count steps EXCEPT the sharp U-turn and smoothes out the - * perpendicular direction. Eg - * @verbatim - * ___ ___ Chain code from the left: - * |___ ___ ___| 222122212223221232223000 - * |___| |_| Corresponding counts of each direction: - * 0 00000000000000000123 - * 1 11121111001111100000 - * 2 44434443443333343321 - * 3 00000001111111112111 - * Count of direction at center 41434143413313143313 - * Step gets used? YNYYYNYYYNYYNYNYYYyY (y= U-turn exception) - * Path redrawn showing only the used points: - * ___ ___ - * ___ ___ ___| - * ___ _ - * @endverbatim - * Sub-pixel edge position cannot be shown well with ASCII-art, but each - * horizontal step's y position is the mean of the y positions of the steps - * in the same direction in the sliding window, which makes a much smoother - * outline, without losing important detail. - */ -void C_OUTLINE::ComputeBinaryOffsets() { - delete [] offsets; - offsets = new EdgeOffset[stepcount]; - // Count of the number of steps in each direction in the sliding window. - int dir_counts[4]; - // Sum of the positions (y for a horizontal step, x for vertical) in each - // direction in the sliding window. - int pos_totals[4]; - memset(dir_counts, 0, sizeof(dir_counts)); - memset(pos_totals, 0, sizeof(pos_totals)); - ICOORD pos = start; - ICOORD tail_pos = pos; - // tail_pos is the trailing position, with the next point to be lost from - // the window. - tail_pos -= step(stepcount - 1); - tail_pos -= step(stepcount - 2); - // head_pos is the leading position, with the next point to be added to the - // window. - ICOORD head_pos = tail_pos; - // Set up the initial window with 4 points in [-2, 2) - for (int s = -2; s < 2; ++s) { - increment_step(s, 1, &head_pos, dir_counts, pos_totals); - } - for (int s = 0; s < stepcount; pos += step(s++)) { - // At step s, s in in the middle of [s-2, s+2]. - increment_step(s + 2, 1, &head_pos, dir_counts, pos_totals); - int dir_index = chain_code(s); - ICOORD step_vec = step(s); - int best_diff = 0; - int offset = 0; - // Use only steps that have a count of >=2 OR the strong U-turn with a - // single d and 2 at d-1 and 2 at d+1 (mod 4). - if (dir_counts[dir_index] >= 2 || (dir_counts[dir_index] == 1 && - dir_counts[Modulo(dir_index - 1, 4)] == 2 && - dir_counts[Modulo(dir_index + 1, 4)] == 2)) { - // Valid step direction. - best_diff = dir_counts[dir_index]; - int edge_pos = step_vec.x() == 0 ? pos.x() : pos.y(); - // The offset proposes that the actual step should be positioned at - // the mean position of the steps in the window of the same direction. - // See ASCII art above. - offset = pos_totals[dir_index] - best_diff * edge_pos; - } - offsets[s].offset_numerator = - ClipToRange(offset, -INT8_MAX, INT8_MAX); - offsets[s].pixel_diff = ClipToRange(best_diff, 0, UINT8_MAX); - // The direction is just the vector from start to end of the window. - FCOORD direction(head_pos.x() - tail_pos.x(), head_pos.y() - tail_pos.y()); - offsets[s].direction = direction.to_direction(); - increment_step(s - 2, -1, &tail_pos, dir_counts, pos_totals); - } -} - -/** - * Renders the outline to the given pix, with left and top being - * the coords of the upper-left corner of the pix. - */ -void C_OUTLINE::render(int left, int top, Pix* pix) const { - ICOORD pos = start; - for (int stepindex = 0; stepindex < stepcount; ++stepindex) { - ICOORD next_step = step(stepindex); - if (next_step.y() < 0) { - pixRasterop(pix, 0, top - pos.y(), pos.x() - left, 1, - PIX_NOT(PIX_DST), nullptr, 0, 0); - } else if (next_step.y() > 0) { - pixRasterop(pix, 0, top - pos.y() - 1, pos.x() - left, 1, - PIX_NOT(PIX_DST), nullptr, 0, 0); - } - pos += next_step; - } -} - -/** - * Renders just the outline to the given pix (no fill), with left and top - * being the coords of the upper-left corner of the pix. - * @param left coord - * @param top coord - * @param pix the pix to outline - */ -void C_OUTLINE::render_outline(int left, int top, Pix* pix) const { - ICOORD pos = start; - for (int stepindex = 0; stepindex < stepcount; ++stepindex) { - ICOORD next_step = step(stepindex); - if (next_step.y() < 0) { - pixSetPixel(pix, pos.x() - left, top - pos.y(), 1); - } else if (next_step.y() > 0) { - pixSetPixel(pix, pos.x() - left - 1, top - pos.y() - 1, 1); - } else if (next_step.x() < 0) { - pixSetPixel(pix, pos.x() - left - 1, top - pos.y(), 1); - } else if (next_step.x() > 0) { - pixSetPixel(pix, pos.x() - left, top - pos.y() - 1, 1); - } - pos += next_step; - } -} - -/** - * @name C_OUTLINE::plot - * - * Draw the outline in the given colour. - * @param window window to draw in - * @param colour colour to draw in - */ - -#ifndef GRAPHICS_DISABLED -void C_OUTLINE::plot(ScrollView* window, ScrollView::Color colour) const { - int16_t stepindex; // index to cstep - ICOORD pos; // current position - DIR128 stepdir; // direction of step - - pos = start; // current position - window->Pen(colour); - if (stepcount == 0) { - window->Rectangle(box.left(), box.top(), box.right(), box.bottom()); - return; - } - window->SetCursor(pos.x(), pos.y()); - - stepindex = 0; - while (stepindex < stepcount) { - pos += step(stepindex); // step to next - stepdir = step_dir(stepindex); - stepindex++; // count steps - // merge straight lines - while (stepindex < stepcount && - stepdir.get_dir() == step_dir(stepindex).get_dir()) { - pos += step(stepindex); - stepindex++; - } - window->DrawTo(pos.x(), pos.y()); - } -} - -/** - * Draws the outline in the given colour, normalized using the given denorm, - * making use of sub-pixel accurate information if available. - */ -void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour, - ScrollView* window) const { - window->Pen(colour); - if (stepcount == 0) { - window->Rectangle(box.left(), box.top(), box.right(), box.bottom()); - return; - } - const DENORM* root_denorm = denorm.RootDenorm(); - ICOORD pos = start; // current position - FCOORD f_pos = sub_pixel_pos_at_index(pos, 0); - FCOORD pos_normed; - denorm.NormTransform(root_denorm, f_pos, &pos_normed); - window->SetCursor(IntCastRounded(pos_normed.x()), - IntCastRounded(pos_normed.y())); - for (int s = 0; s < stepcount; pos += step(s++)) { - int edge_weight = edge_strength_at_index(s); - if (edge_weight == 0) { - // This point has conflicting gradient and step direction, so ignore it. - continue; - } - FCOORD f_pos = sub_pixel_pos_at_index(pos, s); - FCOORD pos_normed; - denorm.NormTransform(root_denorm, f_pos, &pos_normed); - window->DrawTo(IntCastRounded(pos_normed.x()), - IntCastRounded(pos_normed.y())); - } -} -#endif - -/** - * @name C_OUTLINE::operator= - * - * Assignment - deep copy data - * @param source assign from this - */ - -C_OUTLINE& C_OUTLINE::operator=(const C_OUTLINE& source) { - box = source.box; - start = source.start; - free(steps); - stepcount = source.stepcount; - steps = (uint8_t *)malloc(step_mem()); - memmove (steps, source.steps, step_mem()); - if (!children.empty ()) - children.clear (); - children.deep_copy(&source.children, &deep_copy); - delete [] offsets; - if (source.offsets != nullptr) { - offsets = new EdgeOffset[stepcount]; - memcpy(offsets, source.offsets, stepcount * sizeof(*offsets)); - } else { - offsets = nullptr; - } - return *this; -} - -/** - * Helper for ComputeBinaryOffsets. Increments pos, dir_counts, pos_totals - * by the step, increment, and vertical step ? x : y position * increment - * at step s Mod stepcount respectively. Used to add or subtract the - * direction and position to/from accumulators of a small neighbourhood. - */ -void C_OUTLINE::increment_step(int s, int increment, ICOORD* pos, - int* dir_counts, int* pos_totals) const { - int step_index = Modulo(s, stepcount); - int dir_index = chain_code(step_index); - dir_counts[dir_index] += increment; - ICOORD step_vec = step(step_index); - if (step_vec.x() == 0) - pos_totals[dir_index] += pos->x() * increment; - else - pos_totals[dir_index] += pos->y() * increment; - *pos += step_vec; -} - -ICOORD C_OUTLINE::chain_step(int chaindir) { - return step_coords[chaindir % 4]; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/coutln.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/coutln.h deleted file mode 100644 index 4cb4fc9e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/coutln.h +++ /dev/null @@ -1,293 +0,0 @@ -/********************************************************************** - * File: coutln.h - * Description: Code for the C_OUTLINE class. - * Author: Ray Smith - * Created: Mon Oct 07 16:01:57 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef COUTLN_H -#define COUTLN_H - -#include // for int16_t, int32_t -#include "bits16.h" // for BITS16 -#include "elst.h" // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK -#include "mod128.h" // for DIR128, DIRBITS -#include "platform.h" // for DLLSYM -#include "points.h" // for ICOORD, FCOORD -#include "rect.h" // for TBOX -#include "scrollview.h" // for ScrollView, ScrollView::Color - -class CRACKEDGE; -class DENORM; - -#define INTERSECTING INT16_MAX//no winding number - - //mask to get step -#define STEP_MASK 3 - -enum C_OUTLINE_FLAGS -{ - COUT_INVERSE //White on black blob -}; - -// Simple struct to hold the 3 values needed to compute a more precise edge -// position and direction. The offset_numerator is the difference between the -// grey threshold and the mean pixel value. pixel_diff is the difference between -// the pixels in the edge. Consider the following row of pixels: p1 p2 p3 p4 p5 -// Say the image was thresholded at threshold t, making p1, p2, p3 black -// and p4, p5 white (p1, p2, p3 < t, and p4, p5 >= t), but suppose that -// max(p[i+1] - p[i]) is p3 - p2. Then the extrapolated position of the edge, -// based on the maximum gradient, is at the crack between p2 and p3 plus the -// offset (t - (p2+p3)/2)/(p3 - p2). We store the pixel difference p3-p2 -// denominator in pixel_diff and the offset numerator, relative to the original -// binary edge (t - (p2+p3)/2) - (p3 -p2) in offset_numerator. -// The sign of offset_numerator and pixel_diff are manipulated to ensure -// that the pixel_diff, which will be used as a weight, is always positive. -// The direction stores the quantized feature direction for the given step -// computed from the edge gradient. (Using binary_angle_plus_pi.) -// If the pixel_diff is zero, it means that the direction of the gradient -// is in conflict with the step direction, so this step is to be ignored. -struct EdgeOffset { - int8_t offset_numerator; - uint8_t pixel_diff; - uint8_t direction; -}; - -class DLLSYM C_OUTLINE; //forward declaration -struct Pix; - -ELISTIZEH (C_OUTLINE) -class DLLSYM C_OUTLINE:public ELIST_LINK { - public: - C_OUTLINE() { - stepcount = 0; - steps = nullptr; - offsets = nullptr; - } - C_OUTLINE( //constructor - CRACKEDGE *startpt, //from edge detector - ICOORD bot_left, //bounding box //length of loop - ICOORD top_right, - int16_t length); - C_OUTLINE(ICOORD startpt, //start of loop - DIR128 *new_steps, //steps in loop - int16_t length); //length of loop - //outline to copy - C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation); //and rotate - - // Build a fake outline, given just a bounding box and append to the list. - static void FakeOutline(const TBOX& box, C_OUTLINE_LIST* outlines); - - ~C_OUTLINE () { //destructor - free(steps); - delete [] offsets; - } - - bool flag( //test flag - C_OUTLINE_FLAGS mask) const { //flag to test - return flags.bit(mask); - } - void set_flag( //set flag value - C_OUTLINE_FLAGS mask, //flag to test - bool value) { //value to set - flags.set_bit(mask, value); - } - - C_OUTLINE_LIST *child() { //get child list - return &children; - } - - //access function - const TBOX &bounding_box() const { - return box; - } - void set_step( //set a step - int16_t stepindex, //index of step - int8_t stepdir) { //chain code - int shift = stepindex%4 * 2; - uint8_t mask = 3 << shift; - steps[stepindex/4] = ((stepdir << shift) & mask) | - (steps[stepindex/4] & ~mask); - //squeeze 4 into byte - } - void set_step( //set a step - int16_t stepindex, //index of step - DIR128 stepdir) { //direction - //clean it - int8_t chaindir = stepdir.get_dir() >> (DIRBITS - 2); - //difference - set_step(stepindex, chaindir); - //squeeze 4 into byte - } - - int32_t pathlength() const { //get path length - return stepcount; - } - // Return step at a given index as a DIR128. - DIR128 step_dir(int index) const { - return DIR128((int16_t)(((steps[index/4] >> (index%4 * 2)) & STEP_MASK) << - (DIRBITS - 2))); - } - // Return the step vector for the given outline position. - ICOORD step(int index) const { // index of step - return step_coords[chain_code(index)]; - } - // get start position - const ICOORD &start_pos() const { - return start; - } - // Returns the position at the given index on the outline. - // NOT to be used lightly, as it has to iterate the outline to find out. - ICOORD position_at_index(int index) const { - ICOORD pos = start; - for (int i = 0; i < index; ++i) - pos += step(i); - return pos; - } - // Returns the sub-pixel accurate position given the integer position pos - // at the given index on the outline. pos may be a return value of - // position_at_index, or computed by repeatedly adding step to the - // start_pos() in the usual way. - FCOORD sub_pixel_pos_at_index(const ICOORD& pos, int index) const { - const ICOORD& step_to_next(step(index)); - FCOORD f_pos(pos.x() + step_to_next.x() / 2.0f, - pos.y() + step_to_next.y() / 2.0f); - if (offsets != nullptr && offsets[index].pixel_diff > 0) { - float offset = offsets[index].offset_numerator; - offset /= offsets[index].pixel_diff; - if (step_to_next.x() != 0) - f_pos.set_y(f_pos.y() + offset); - else - f_pos.set_x(f_pos.x() + offset); - } - return f_pos; - } - // Returns the step direction for the given index or -1 if there is none. - int direction_at_index(int index) const { - if (offsets != nullptr && offsets[index].pixel_diff > 0) - return offsets[index].direction; - return -1; - } - // Returns the edge strength for the given index. - // If there are no recorded edge strengths, returns 1 (assuming the image - // is binary). Returns 0 if the gradient direction conflicts with the - // step direction, indicating that this position could be skipped. - int edge_strength_at_index(int index) const { - if (offsets != nullptr) - return offsets[index].pixel_diff; - return 1; - } - // Return the step as a chain code (0-3) related to the standard feature - // direction of binary_angle_plus_pi by: - // chain_code * 64 = feature direction. - int chain_code(int index) const { // index of step - return (steps[index / 4] >> (index % 4 * 2)) & STEP_MASK; - } - - int32_t area() const; // Returns area of self and 1st level children. - int32_t perimeter() const; // Total perimeter of self and 1st level children. - int32_t outer_area() const; // Returns area of self only. - int32_t count_transitions( //count maxima - int32_t threshold); //size threshold - - bool operator< ( //containment test - const C_OUTLINE & other) const; - bool operator> ( //containment test - C_OUTLINE & other) const - { - return other < *this; //use the < to do it - } - int16_t winding_number( //get winding number - ICOORD testpt) const; //around this point - //get direction - int16_t turn_direction() const; - void reverse(); //reverse direction - - void move( // reposition outline - const ICOORD vec); // by vector - - // Returns true if *this and its children are legally nested. - // The outer area of a child should have the opposite sign to the - // parent. If not, it means we have discarded an outline in between - // (probably due to excessive length). - bool IsLegallyNested() const; - - // If this outline is smaller than the given min_size, delete this and - // remove from its list, via *it, after checking that *it points to this. - // Otherwise, if any children of this are too small, delete them. - // On entry, *it must be an iterator pointing to this. If this gets deleted - // then this is extracted from *it, so an iteration can continue. - void RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it); - - // Adds sub-pixel resolution EdgeOffsets for the outline if the supplied - // pix is 8-bit. Does nothing otherwise. - void ComputeEdgeOffsets(int threshold, Pix* pix); - // Adds sub-pixel resolution EdgeOffsets for the outline using only - // a binary image source. - void ComputeBinaryOffsets(); - - // Renders the outline to the given pix, with left and top being - // the coords of the upper-left corner of the pix. - void render(int left, int top, Pix* pix) const; - - // Renders just the outline to the given pix (no fill), with left and top - // being the coords of the upper-left corner of the pix. - void render_outline(int left, int top, Pix* pix) const; - - #ifndef GRAPHICS_DISABLED - void plot( //draw one - ScrollView* window, //window to draw in - ScrollView::Color colour) const; //colour to draw it - // Draws the outline in the given colour, normalized using the given denorm, - // making use of sub-pixel accurate information if available. - void plot_normed(const DENORM& denorm, ScrollView::Color colour, - ScrollView* window) const; - #endif // GRAPHICS_DISABLED - - C_OUTLINE& operator=(const C_OUTLINE& source); - - static C_OUTLINE* deep_copy(const C_OUTLINE* src) { - C_OUTLINE* outline = new C_OUTLINE; - *outline = *src; - return outline; - } - - static ICOORD chain_step(int chaindir); - - // The maximum length of any outline. The stepcount is stored as 16 bits, - // but it is probably not a good idea to increase this constant by much - // and switch to 32 bits, as it plays an important role in keeping huge - // outlines invisible, which prevents bad speed behavior. - static const int kMaxOutlineLength = 16000; - - private: - // Helper for ComputeBinaryOffsets. Increments pos, dir_counts, pos_totals - // by the step, increment, and vertical step ? x : y position * increment - // at step s Mod stepcount respectively. Used to add or subtract the - // direction and position to/from accumulators of a small neighbourhood. - void increment_step(int s, int increment, ICOORD* pos, int* dir_counts, - int* pos_totals) const; - int step_mem() const { return (stepcount+3) / 4; } - - TBOX box; // bounding box - ICOORD start; // start coord - int16_t stepcount; // no of steps - BITS16 flags; // flags about outline - uint8_t *steps; // step array - EdgeOffset* offsets; // Higher precision edge. - C_OUTLINE_LIST children; // child elements - static ICOORD step_coords[4]; -}; -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/crakedge.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/crakedge.h deleted file mode 100644 index 4ce568b2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/crakedge.h +++ /dev/null @@ -1,37 +0,0 @@ -/********************************************************************** - * File: crakedge.h (Formerly: crkedge.h) - * Description: Structures for the Crack following edge detector. - * Author: Ray Smith - * Created: Fri Mar 22 16:06:38 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef CRAKEDGE_H -#define CRAKEDGE_H - -#include "points.h" -#include "mod128.h" - -class CRACKEDGE { - public: - CRACKEDGE() = default; - - ICOORD pos; /*position of crack */ - int8_t stepx; //edge step - int8_t stepy; - int8_t stepdir; //chaincode - CRACKEDGE *prev; /*previous point */ - CRACKEDGE *next; /*next point */ -}; -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/debugpixa.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/debugpixa.h deleted file mode 100644 index 37e63215..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/debugpixa.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_ -#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_ - -#include "allheaders.h" - -namespace tesseract { - -// Class to hold a Pixa collection of debug images with captions and save them -// to a PDF file. -class DebugPixa { - public: - // TODO(rays) add another constructor with size control. - DebugPixa() { - pixa_ = pixaCreate(0); - fonts_ = bmfCreate(nullptr, 14); - } - // If the filename_ has been set and there are any debug images, they are - // written to the set filename_. - ~DebugPixa() { - pixaDestroy(&pixa_); - bmfDestroy(&fonts_); - } - - // Adds the given pix to the set of pages in the PDF file, with the given - // caption added to the top. - void AddPix(const Pix* pix, const char* caption) { - int depth = pixGetDepth(const_cast(pix)); - int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80); - Pix* pix_debug = pixAddSingleTextblock( - const_cast(pix), fonts_, caption, color, L_ADD_BELOW, nullptr); - pixaAddPix(pixa_, pix_debug, L_INSERT); - } - - // Sets the destination filename and enables images to be written to a PDF - // on destruction. - void WritePDF(const char* filename) { - if (pixaGetCount(pixa_) > 0) { - pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename); - pixaClear(pixa_); - } - } - - private: - // The collection of images to put in the PDF. - Pixa* pixa_; - // The fonts used to draw text captions. - L_Bmf* fonts_; -}; - -} // namespace tesseract - -#endif // TESSERACT_CCSTRUCT_DEBUGPIXA_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/detlinefit.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/detlinefit.cpp deleted file mode 100644 index c4093569..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/detlinefit.cpp +++ /dev/null @@ -1,293 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: detlinefit.cpp -// Description: Deterministic least median squares line fitting. -// Author: Ray Smith -// Created: Thu Feb 28 14:45:01 PDT 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "detlinefit.h" -#include "statistc.h" -#include "tprintf.h" - -#include -#include // for FLT_MAX - -namespace tesseract { - -// The number of points to consider at each end. -const int kNumEndPoints = 3; -// The minimum number of points at which to switch to number of points -// for badly fitted lines. -// To ensure a sensible error metric, kMinPointsForErrorCount should be at -// least kMaxRealDistance / (1 - %ile) where %ile is the fractile used in -// ComputeUpperQuartileError. -const int kMinPointsForErrorCount = 16; -// The maximum real distance to use before switching to number of -// mis-fitted points, which will get square-rooted for true distance. -const int kMaxRealDistance = 2.0; - -DetLineFit::DetLineFit() : square_length_(0.0) { -} - -// Delete all Added points. -void DetLineFit::Clear() { - pts_.clear(); - distances_.clear(); -} - -// Add a new point. Takes a copy - the pt doesn't need to stay in scope. -void DetLineFit::Add(const ICOORD& pt) { - pts_.push_back(PointWidth(pt, 0)); -} -// Associates a half-width with the given point if a point overlaps the -// previous point by more than half the width, and its distance is further -// than the previous point, then the more distant point is ignored in the -// distance calculation. Useful for ignoring i dots and other diacritics. -void DetLineFit::Add(const ICOORD& pt, int halfwidth) { - pts_.push_back(PointWidth(pt, halfwidth)); -} - -// Fits a line to the points, ignoring the skip_first initial points and the -// skip_last final points, returning the fitted line as a pair of points, -// and the upper quartile error. -double DetLineFit::Fit(int skip_first, int skip_last, - ICOORD* pt1, ICOORD* pt2) { - // Do something sensible with no points. - if (pts_.empty()) { - pt1->set_x(0); - pt1->set_y(0); - *pt2 = *pt1; - return 0.0; - } - // Count the points and find the first and last kNumEndPoints. - int pt_count = pts_.size(); - ICOORD* starts[kNumEndPoints]; - if (skip_first >= pt_count) skip_first = pt_count - 1; - int start_count = 0; - int end_i = std::min(skip_first + kNumEndPoints, pt_count); - for (int i = skip_first; i < end_i; ++i) { - starts[start_count++] = &pts_[i].pt; - } - ICOORD* ends[kNumEndPoints]; - if (skip_last >= pt_count) skip_last = pt_count - 1; - int end_count = 0; - end_i = std::max(0, pt_count - kNumEndPoints - skip_last); - for (int i = pt_count - 1 - skip_last; i >= end_i; --i) { - ends[end_count++] = &pts_[i].pt; - } - // 1 or 2 points need special treatment. - if (pt_count <= 2) { - *pt1 = *starts[0]; - if (pt_count > 1) - *pt2 = *ends[0]; - else - *pt2 = *pt1; - return 0.0; - } - // Although with between 2 and 2*kNumEndPoints-1 points, there will be - // overlap in the starts, ends sets, this is OK and taken care of by the - // if (*start != *end) test below, which also tests for equal input points. - double best_uq = -1.0; - // Iterate each pair of points and find the best fitting line. - for (int i = 0; i < start_count; ++i) { - ICOORD* start = starts[i]; - for (int j = 0; j < end_count; ++j) { - ICOORD* end = ends[j]; - if (*start != *end) { - ComputeDistances(*start, *end); - // Compute the upper quartile error from the line. - double dist = EvaluateLineFit(); - if (dist < best_uq || best_uq < 0.0) { - best_uq = dist; - *pt1 = *start; - *pt2 = *end; - } - } - } - } - // Finally compute the square root to return the true distance. - return best_uq > 0.0 ? sqrt(best_uq) : best_uq; -} - -// Constrained fit with a supplied direction vector. Finds the best line_pt, -// that is one of the supplied points having the median cross product with -// direction, ignoring points that have a cross product outside of the range -// [min_dist, max_dist]. Returns the resulting error metric using the same -// reduced set of points. -// *Makes use of floating point arithmetic* -double DetLineFit::ConstrainedFit(const FCOORD& direction, - double min_dist, double max_dist, - bool debug, ICOORD* line_pt) { - ComputeConstrainedDistances(direction, min_dist, max_dist); - // Do something sensible with no points or computed distances. - if (pts_.empty() || distances_.empty()) { - line_pt->set_x(0); - line_pt->set_y(0); - return 0.0; - } - int median_index = distances_.choose_nth_item(distances_.size() / 2); - *line_pt = distances_[median_index].data; - if (debug) { - tprintf("Constrained fit to dir %g, %g = %d, %d :%d distances:\n", - direction.x(), direction.y(), - line_pt->x(), line_pt->y(), distances_.size()); - for (int i = 0; i < distances_.size(); ++i) { - tprintf("%d: %d, %d -> %g\n", i, distances_[i].data.x(), - distances_[i].data.y(), distances_[i].key); - } - tprintf("Result = %d\n", median_index); - } - // Center distances on the fitted point. - double dist_origin = direction * *line_pt; - for (int i = 0; i < distances_.size(); ++i) { - distances_[i].key -= dist_origin; - } - return sqrt(EvaluateLineFit()); -} - -// Returns true if there were enough points at the last call to Fit or -// ConstrainedFit for the fitted points to be used on a badly fitted line. -bool DetLineFit::SufficientPointsForIndependentFit() const { - return distances_.size() >= kMinPointsForErrorCount; -} - -// Backwards compatible fit returning a gradient and constant. -// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this -// function in preference to the LMS class. -double DetLineFit::Fit(float* m, float* c) { - ICOORD start, end; - double error = Fit(&start, &end); - if (end.x() != start.x()) { - *m = static_cast(end.y() - start.y()) / (end.x() - start.x()); - *c = start.y() - *m * start.x(); - } else { - *m = 0.0f; - *c = 0.0f; - } - return error; -} - -// Backwards compatible constrained fit with a supplied gradient. -// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible -// to avoid potential difficulties with infinite gradients. -double DetLineFit::ConstrainedFit(double m, float* c) { - // Do something sensible with no points. - if (pts_.empty()) { - *c = 0.0f; - return 0.0; - } - double cos = 1.0 / sqrt(1.0 + m * m); - FCOORD direction(cos, m * cos); - ICOORD line_pt; - double error = ConstrainedFit(direction, -FLT_MAX, FLT_MAX, false, &line_pt); - *c = line_pt.y() - line_pt.x() * m; - return error; -} - -// Computes and returns the squared evaluation metric for a line fit. -double DetLineFit::EvaluateLineFit() { - // Compute the upper quartile error from the line. - double dist = ComputeUpperQuartileError(); - if (distances_.size() >= kMinPointsForErrorCount && - dist > kMaxRealDistance * kMaxRealDistance) { - // Use the number of mis-fitted points as the error metric, as this - // gives a better measure of fit for badly fitted lines where more - // than a quarter are badly fitted. - double threshold = kMaxRealDistance * sqrt(square_length_); - dist = NumberOfMisfittedPoints(threshold); - } - return dist; -} - -// Computes the absolute error distances of the points from the line, -// and returns the squared upper-quartile error distance. -double DetLineFit::ComputeUpperQuartileError() { - int num_errors = distances_.size(); - if (num_errors == 0) return 0.0; - // Get the absolute values of the errors. - for (int i = 0; i < num_errors; ++i) { - if (distances_[i].key < 0) distances_[i].key = -distances_[i].key; - } - // Now get the upper quartile distance. - int index = distances_.choose_nth_item(3 * num_errors / 4); - double dist = distances_[index].key; - // The true distance is the square root of the dist squared / square_length. - // Don't bother with the square root. Just return the square distance. - return square_length_ > 0.0 ? dist * dist / square_length_ : 0.0; -} - -// Returns the number of sample points that have an error more than threshold. -int DetLineFit::NumberOfMisfittedPoints(double threshold) const { - int num_misfits = 0; - int num_dists = distances_.size(); - // Get the absolute values of the errors. - for (int i = 0; i < num_dists; ++i) { - if (distances_[i].key > threshold) - ++num_misfits; - } - return num_misfits; -} - -// Computes all the cross product distances of the points from the line, -// storing the actual (signed) cross products in distances. -// Ignores distances of points that are further away than the previous point, -// and overlaps the previous point by at least half. -void DetLineFit::ComputeDistances(const ICOORD& start, const ICOORD& end) { - distances_.truncate(0); - ICOORD line_vector = end; - line_vector -= start; - square_length_ = line_vector.sqlength(); - int line_length = IntCastRounded(sqrt(square_length_)); - // Compute the distance of each point from the line. - int prev_abs_dist = 0; - int prev_dot = 0; - for (int i = 0; i < pts_.size(); ++i) { - ICOORD pt_vector = pts_[i].pt; - pt_vector -= start; - int dot = line_vector % pt_vector; - // Compute |line_vector||pt_vector|sin(angle between) - int dist = line_vector * pt_vector; - int abs_dist = dist < 0 ? -dist : dist; - if (abs_dist > prev_abs_dist && i > 0) { - // Ignore this point if it overlaps the previous one. - int separation = abs(dot - prev_dot); - if (separation < line_length * pts_[i].halfwidth || - separation < line_length * pts_[i - 1].halfwidth) - continue; - } - distances_.push_back(DistPointPair(dist, pts_[i].pt)); - prev_abs_dist = abs_dist; - prev_dot = dot; - } -} - -// Computes all the cross product distances of the points perpendicular to -// the given direction, ignoring distances outside of the give distance range, -// storing the actual (signed) cross products in distances_. -void DetLineFit::ComputeConstrainedDistances(const FCOORD& direction, - double min_dist, double max_dist) { - distances_.truncate(0); - square_length_ = direction.sqlength(); - // Compute the distance of each point from the line. - for (int i = 0; i < pts_.size(); ++i) { - FCOORD pt_vector = pts_[i].pt; - // Compute |line_vector||pt_vector|sin(angle between) - double dist = direction * pt_vector; - if (min_dist <= dist && dist <= max_dist) - distances_.push_back(DistPointPair(dist, pts_[i].pt)); - } -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/detlinefit.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/detlinefit.h deleted file mode 100644 index 82940437..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/detlinefit.h +++ /dev/null @@ -1,162 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: detlinefit.h -// Description: Deterministic least upper-quartile squares line fitting. -// Author: Ray Smith -// Created: Thu Feb 28 14:35:01 PDT 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_ -#define TESSERACT_CCSTRUCT_DETLINEFIT_H_ - -#include "genericvector.h" -#include "kdpair.h" -#include "points.h" - -namespace tesseract { - -// This class fits a line to a set of ICOORD points. -// There is no restriction on the direction of the line, as it -// uses a vector method, ie no concern over infinite gradients. -// The fitted line has the least upper quartile of squares of perpendicular -// distances of all source points from the line, subject to the constraint -// that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}] -// i.e. the 9 combinations of one of the first 3 and last 3 points. -// A fundamental assumption of this algorithm is that one of the first 3 and -// one of the last 3 points are near the best line fit. -// The points must be Added in line order for the algorithm to work properly. -// No floating point calculations are needed* to make an accurate fit, -// and no random numbers are needed** so the algorithm is deterministic, -// architecture-stable, and compiler-stable as well as stable to minor -// changes in the input. -// *A single floating point division is used to compute each line's distance. -// This is unlikely to result in choice of a different line, but if it does, -// it would be easy to replace with a 64 bit integer calculation. -// **Random numbers are used in the nth_item function, but the worst -// non-determinism that can result is picking a different result among equals, -// and that wouldn't make any difference to the end-result distance, so the -// randomness does not affect the determinism of the algorithm. The random -// numbers are only there to guarantee average linear time. -// Fitting time is linear, but with a high constant, as it tries 9 different -// lines and computes the distance of all points each time. -// This class is aimed at replacing the LLSQ (linear least squares) and -// LMS (least median of squares) classes that are currently used for most -// of the line fitting in Tesseract. -class DetLineFit { - public: - DetLineFit(); - ~DetLineFit() = default; - - // Delete all Added points. - void Clear(); - - // Adds a new point. Takes a copy - the pt doesn't need to stay in scope. - // Add must be called on points in sequence along the line. - void Add(const ICOORD& pt); - // Associates a half-width with the given point if a point overlaps the - // previous point by more than half the width, and its distance is further - // than the previous point, then the more distant point is ignored in the - // distance calculation. Useful for ignoring i dots and other diacritics. - void Add(const ICOORD& pt, int halfwidth); - - // Fits a line to the points, returning the fitted line as a pair of - // points, and the upper quartile error. - double Fit(ICOORD* pt1, ICOORD* pt2) { - return Fit(0, 0, pt1, pt2); - } - // Fits a line to the points, ignoring the skip_first initial points and the - // skip_last final points, returning the fitted line as a pair of points, - // and the upper quartile error. - double Fit(int skip_first, int skip_last, ICOORD* pt1, ICOORD* pt2); - - // Constrained fit with a supplied direction vector. Finds the best line_pt, - // that is one of the supplied points having the median cross product with - // direction, ignoring points that have a cross product outside of the range - // [min_dist, max_dist]. Returns the resulting error metric using the same - // reduced set of points. - // *Makes use of floating point arithmetic* - double ConstrainedFit(const FCOORD& direction, - double min_dist, double max_dist, - bool debug, ICOORD* line_pt); - - // Returns true if there were enough points at the last call to Fit or - // ConstrainedFit for the fitted points to be used on a badly fitted line. - bool SufficientPointsForIndependentFit() const; - - // Backwards compatible fit returning a gradient and constant. - // Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this - // function in preference to the LMS class. - double Fit(float* m, float* c); - - // Backwards compatible constrained fit with a supplied gradient. - // Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible - // to avoid potential difficulties with infinite gradients. - double ConstrainedFit(double m, float* c); - - private: - // Simple struct to hold an ICOORD point and a halfwidth representing half - // the "width" (supposedly approximately parallel to the direction of the - // line) of each point, such that distant points can be discarded when they - // overlap nearer points. (Think i dot and other diacritics or noise.) - struct PointWidth { - PointWidth() : pt(ICOORD(0, 0)), halfwidth(0) {} - PointWidth(const ICOORD& pt0, int halfwidth0) - : pt(pt0), halfwidth(halfwidth0) {} - - ICOORD pt; - int halfwidth; - }; - // Type holds the distance of each point from the fitted line and the point - // itself. Use of double allows integer distances from ICOORDs to be stored - // exactly, and also the floating point results from ConstrainedFit. - using DistPointPair = KDPairInc; - - // Computes and returns the squared evaluation metric for a line fit. - double EvaluateLineFit(); - - // Computes the absolute values of the precomputed distances_, - // and returns the squared upper-quartile error distance. - double ComputeUpperQuartileError(); - - // Returns the number of sample points that have an error more than threshold. - int NumberOfMisfittedPoints(double threshold) const; - - // Computes all the cross product distances of the points from the line, - // storing the actual (signed) cross products in distances_. - // Ignores distances of points that are further away than the previous point, - // and overlaps the previous point by at least half. - void ComputeDistances(const ICOORD& start, const ICOORD& end); - - // Computes all the cross product distances of the points perpendicular to - // the given direction, ignoring distances outside of the give distance range, - // storing the actual (signed) cross products in distances_. - void ComputeConstrainedDistances(const FCOORD& direction, - double min_dist, double max_dist); - - // Stores all the source points in the order they were given and their - // halfwidths, if any. - GenericVector pts_; - // Stores the computed perpendicular distances of (some of) the pts_ from a - // given vector (assuming it goes through the origin, making it a line). - // Since the distances may be a subset of the input points, and get - // re-ordered by the nth_item function, the original point is stored - // along side the distance. - GenericVector distances_; // Distances of points. - // The squared length of the vector used to compute distances_. - double square_length_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/dppoint.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/dppoint.cpp deleted file mode 100644 index 1f9e48bc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/dppoint.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/********************************************************************** - * File: dppoint.cpp - * Description: Simple generic dynamic programming class. - * Author: Ray Smith - * Created: Wed Mar 25 19:08:01 PDT 2009 - * - * (C) Copyright 2009, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "dppoint.h" -#include "errcode.h" -#include "tprintf.h" - -namespace tesseract { - -// Solve the dynamic programming problem for the given array of points, with -// the given size and cost function. -// Steps backwards are limited to being between min_step and max_step -// inclusive. -// The return value is the tail of the best path. -DPPoint* DPPoint::Solve(int min_step, int max_step, bool debug, - CostFunc cost_func, int size, DPPoint* points) { - if (size <= 0 || max_step < min_step || min_step >= size) - return nullptr; // Degenerate, but not necessarily an error. - ASSERT_HOST(min_step > 0); // Infinite loop possible if this is not true. - if (debug) - tprintf("min = %d, max=%d\n", - min_step, max_step); - // Evaluate the total cost at each point. - for (int i = 0; i < size; ++i) { - for (int offset = min_step; offset <= max_step; ++offset) { - DPPoint* prev = offset <= i ? points + i - offset : nullptr; - int64_t new_cost = (points[i].*cost_func)(prev); - if (points[i].best_prev_ != nullptr && offset > min_step * 2 && - new_cost > points[i].total_cost_) - break; // Find only the first minimum if going over twice the min. - } - points[i].total_cost_ += points[i].local_cost_; - if (debug) { - tprintf("At point %d, local cost=%d, total_cost=%d, steps=%d\n", - i, points[i].local_cost_, points[i].total_cost_, - points[i].total_steps_); - } - } - // Now find the end of the best path and return it. - int best_cost = points[size - 1].total_cost_; - int best_end = size - 1; - for (int end = best_end - 1; end >= size - min_step; --end) { - int cost = points[end].total_cost_; - if (cost < best_cost) { - best_cost = cost; - best_end = end; - } - } - return points + best_end; -} - -// A CostFunc that takes the variance of step into account in the cost. -int64_t DPPoint::CostWithVariance(const DPPoint* prev) { - if (prev == nullptr || prev == this) { - UpdateIfBetter(0, 1, nullptr, 0, 0, 0); - return 0; - } - - int delta = this - prev; - int32_t n = prev->n_ + 1; - int32_t sig_x = prev->sig_x_ + delta; - int64_t sig_xsq = prev->sig_xsq_ + delta * delta; - int64_t cost = (sig_xsq - sig_x * sig_x / n) / n; - cost += prev->total_cost_; - UpdateIfBetter(cost, prev->total_steps_ + 1, prev, n, sig_x, sig_xsq); - return cost; -} - -// Update the other members if the cost is lower. -void DPPoint::UpdateIfBetter(int64_t cost, int32_t steps, const DPPoint* prev, - int32_t n, int32_t sig_x, int64_t sig_xsq) { - if (cost < total_cost_) { - total_cost_ = cost; - total_steps_ = steps; - best_prev_ = prev; - n_ = n; - sig_x_ = sig_x; - sig_xsq_ = sig_xsq; - } -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/dppoint.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/dppoint.h deleted file mode 100644 index 4694ef9d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/dppoint.h +++ /dev/null @@ -1,101 +0,0 @@ -/********************************************************************** - * File: dppoint.h - * Description: Simple generic dynamic programming class. - * Author: Ray Smith - * Created: Wed Mar 25 18:57:01 PDT 2009 - * - * (C) Copyright 2009, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_CCSTRUCT_DPPOINT_H_ -#define TESSERACT_CCSTRUCT_DPPOINT_H_ - -#include - -namespace tesseract { - -// A simple class to provide a dynamic programming solution to a class of -// 1st-order problems in which the cost is dependent only on the current -// step and the best cost to that step, with a possible special case -// of using the variance of the steps, and only the top choice is required. -// Useful for problems such as finding the optimal cut points in a fixed-pitch -// (vertical or horizontal) situation. -// Skeletal Example: -// DPPoint* array = new DPPoint[width]; -// for (int i = 0; i < width; i++) { -// array[i].AddLocalCost(cost_at_i) -// } -// DPPoint* best_end = DPPoint::Solve(..., array); -// while (best_end != nullptr) { -// int cut_index = best_end - array; -// best_end = best_end->best_prev(); -// } -// delete [] array; -class DPPoint { - public: - // The cost function evaluates the total cost at this (excluding this's - // local_cost) and if it beats this's total_cost, then - // replace the appropriate values in this. - typedef int64_t (DPPoint::*CostFunc)(const DPPoint* prev); - - DPPoint() - : local_cost_(0), total_cost_(INT32_MAX), total_steps_(1), best_prev_(nullptr), - n_(0), sig_x_(0), sig_xsq_(0) { - } - - // Solve the dynamic programming problem for the given array of points, with - // the given size and cost function. - // Steps backwards are limited to being between min_step and max_step - // inclusive. - // The return value is the tail of the best path. - static DPPoint* Solve(int min_step, int max_step, bool debug, - CostFunc cost_func, int size, DPPoint* points); - - // A CostFunc that takes the variance of step into account in the cost. - int64_t CostWithVariance(const DPPoint* prev); - - // Accessors. - int total_cost() const { - return total_cost_; - } - int Pathlength() const { - return total_steps_; - } - const DPPoint* best_prev() const { - return best_prev_; - } - void AddLocalCost(int new_cost) { - local_cost_ += new_cost; - } - - private: - // Code common to different cost functions. - - // Update the other members if the cost is lower. - void UpdateIfBetter(int64_t cost, int32_t steps, const DPPoint* prev, - int32_t n, int32_t sig_x, int64_t sig_xsq); - - int32_t local_cost_; // Cost of this point on its own. - int32_t total_cost_; // Sum of all costs in best path to here. - // During cost calculations local_cost is excluded. - int32_t total_steps_; // Number of steps in best path to here. - const DPPoint* best_prev_; // Pointer to prev point in best path from here. - // Information for computing the variance part of the cost. - int32_t n_; // Number of steps in best path to here for variance. - int32_t sig_x_; // Sum of step sizes for computing variance. - int64_t sig_xsq_; // Sum of squares of steps for computing variance. -}; - -} // namespace tesseract. - -#endif // TESSERACT_CCSTRUCT_DPPOINT_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/fontinfo.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/fontinfo.cpp deleted file mode 100644 index 493d42d7..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/fontinfo.cpp +++ /dev/null @@ -1,236 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: fontinfo.cpp -// Description: Font information classes abstracted from intproto.h/cpp. -// Author: rays@google.com (Ray Smith) -// Created: Wed May 18 10:39:01 PDT 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "fontinfo.h" -#include "bitvector.h" -#include "unicity_table.h" - -namespace tesseract { - -// Writes to the given file. Returns false in case of error. -bool FontInfo::Serialize(FILE* fp) const { - if (!write_info(fp, *this)) return false; - if (!write_spacing_info(fp, *this)) return false; - return true; -} -// Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool FontInfo::DeSerialize(TFile* fp) { - if (!read_info(fp, this)) return false; - if (!read_spacing_info(fp, this)) return false; - return true; -} - -FontInfoTable::FontInfoTable() { - set_compare_callback(NewPermanentTessCallback(CompareFontInfo)); - set_clear_callback(NewPermanentTessCallback(FontInfoDeleteCallback)); -} - -FontInfoTable::~FontInfoTable() { -} - -// Writes to the given file. Returns false in case of error. -bool FontInfoTable::Serialize(FILE* fp) const { - return this->SerializeClasses(fp); -} -// Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool FontInfoTable::DeSerialize(TFile* fp) { - truncate(0); - return this->DeSerializeClasses(fp); -} - -// Returns true if the given set of fonts includes one with the same -// properties as font_id. -bool FontInfoTable::SetContainsFontProperties( - int font_id, const GenericVector& font_set) const { - uint32_t properties = get(font_id).properties; - for (int f = 0; f < font_set.size(); ++f) { - if (get(font_set[f].fontinfo_id).properties == properties) - return true; - } - return false; -} - -// Returns true if the given set of fonts includes multiple properties. -bool FontInfoTable::SetContainsMultipleFontProperties( - const GenericVector& font_set) const { - if (font_set.empty()) return false; - int first_font = font_set[0].fontinfo_id; - uint32_t properties = get(first_font).properties; - for (int f = 1; f < font_set.size(); ++f) { - if (get(font_set[f].fontinfo_id).properties != properties) - return true; - } - return false; -} - -// Moves any non-empty FontSpacingInfo entries from other to this. -void FontInfoTable::MoveSpacingInfoFrom(FontInfoTable* other) { - set_compare_callback(NewPermanentTessCallback(CompareFontInfo)); - set_clear_callback(NewPermanentTessCallback(FontInfoDeleteCallback)); - for (int i = 0; i < other->size(); ++i) { - GenericVector* spacing_vec = other->get(i).spacing_vec; - if (spacing_vec != nullptr) { - int target_index = get_index(other->get(i)); - if (target_index < 0) { - // Bit copy the FontInfo and steal all the pointers. - push_back(other->get(i)); - other->get(i).name = nullptr; - } else { - delete [] get(target_index).spacing_vec; - get(target_index).spacing_vec = other->get(i).spacing_vec; - } - other->get(i).spacing_vec = nullptr; - } - } -} - -// Moves this to the target unicity table. -void FontInfoTable::MoveTo(UnicityTable* target) { - target->clear(); - target->set_compare_callback(NewPermanentTessCallback(CompareFontInfo)); - target->set_clear_callback(NewPermanentTessCallback(FontInfoDeleteCallback)); - for (int i = 0; i < size(); ++i) { - // Bit copy the FontInfo and steal all the pointers. - target->push_back(get(i)); - get(i).name = nullptr; - get(i).spacing_vec = nullptr; - } -} - - -// Compare FontInfo structures. -bool CompareFontInfo(const FontInfo& fi1, const FontInfo& fi2) { - // The font properties are required to be the same for two font with the same - // name, so there is no need to test them. - // Consequently, querying the table with only its font name as information is - // enough to retrieve its properties. - return strcmp(fi1.name, fi2.name) == 0; -} -// Compare FontSet structures. -bool CompareFontSet(const FontSet& fs1, const FontSet& fs2) { - if (fs1.size != fs2.size) - return false; - for (int i = 0; i < fs1.size; ++i) { - if (fs1.configs[i] != fs2.configs[i]) - return false; - } - return true; -} - -// Callbacks for GenericVector. -void FontInfoDeleteCallback(FontInfo f) { - if (f.spacing_vec != nullptr) { - f.spacing_vec->delete_data_pointers(); - delete f.spacing_vec; - } - delete[] f.name; -} -void FontSetDeleteCallback(FontSet fs) { - delete[] fs.configs; -} - -/*---------------------------------------------------------------------------*/ -// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures. -bool read_info(TFile* f, FontInfo* fi) { - uint32_t size; - if (!f->DeSerialize(&size)) return false; - char* font_name = new char[size + 1]; - fi->name = font_name; - if (!f->DeSerialize(font_name, size)) return false; - font_name[size] = '\0'; - return f->DeSerialize(&fi->properties); -} - -bool write_info(FILE* f, const FontInfo& fi) { - int32_t size = strlen(fi.name); - return tesseract::Serialize(f, &size) && - tesseract::Serialize(f, &fi.name[0], size) && - tesseract::Serialize(f, &fi.properties); -} - -bool read_spacing_info(TFile* f, FontInfo* fi) { - int32_t vec_size, kern_size; - if (!f->DeSerialize(&vec_size)) return false; - ASSERT_HOST(vec_size >= 0); - if (vec_size == 0) return true; - fi->init_spacing(vec_size); - for (int i = 0; i < vec_size; ++i) { - FontSpacingInfo *fs = new FontSpacingInfo(); - if (!f->DeSerialize(&fs->x_gap_before) || - !f->DeSerialize(&fs->x_gap_after) || - !f->DeSerialize(&kern_size)) { - delete fs; - return false; - } - if (kern_size < 0) { // indication of a nullptr entry in fi->spacing_vec - delete fs; - continue; - } - if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(f) || - !fs->kerned_x_gaps.DeSerialize(f))) { - delete fs; - return false; - } - fi->add_spacing(i, fs); - } - return true; -} - -bool write_spacing_info(FILE* f, const FontInfo& fi) { - int32_t vec_size = (fi.spacing_vec == nullptr) ? 0 : fi.spacing_vec->size(); - if (!tesseract::Serialize(f, &vec_size)) return false; - int16_t x_gap_invalid = -1; - for (int i = 0; i < vec_size; ++i) { - FontSpacingInfo *fs = fi.spacing_vec->get(i); - int32_t kern_size = (fs == nullptr) ? -1 : fs->kerned_x_gaps.size(); - if (fs == nullptr) { - // Writing two invalid x-gaps. - if (!tesseract::Serialize(f, &x_gap_invalid, 2) || - !tesseract::Serialize(f, &kern_size)) { - return false; - } - } else { - if (!tesseract::Serialize(f, &fs->x_gap_before) || - !tesseract::Serialize(f, &fs->x_gap_after) || - !tesseract::Serialize(f, &kern_size)) { - return false; - } - } - if (kern_size > 0 && (!fs->kerned_unichar_ids.Serialize(f) || - !fs->kerned_x_gaps.Serialize(f))) { - return false; - } - } - return true; -} - -bool read_set(TFile* f, FontSet* fs) { - if (!f->DeSerialize(&fs->size)) return false; - fs->configs = new int[fs->size]; - return f->DeSerialize(&fs->configs[0], fs->size); -} - -bool write_set(FILE* f, const FontSet& fs) { - return tesseract::Serialize(f, &fs.size) && - tesseract::Serialize(f, &fs.configs[0], fs.size); -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/fontinfo.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/fontinfo.h deleted file mode 100644 index fd641a4f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/fontinfo.h +++ /dev/null @@ -1,189 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: fontinfo.h -// Description: Font information classes abstracted from intproto.h/cpp. -// Author: rays@google.com (Ray Smith) -// Created: Tue May 17 17:08:01 PDT 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - - -#ifndef TESSERACT_CCSTRUCT_FONTINFO_H_ -#define TESSERACT_CCSTRUCT_FONTINFO_H_ - -#include // for uint16_t, uint32_t -#include // for FILE -#include "errcode.h" -#include "genericvector.h" -#include "unichar.h" - -template class UnicityTable; - -namespace tesseract { - -// Simple struct to hold a font and a score. The scores come from the low-level -// integer matcher, so they are in the uint16_t range. Fonts are an index to -// fontinfo_table. -// These get copied around a lot, so best to keep them small. -struct ScoredFont { - ScoredFont() : fontinfo_id(-1), score(0) {} - ScoredFont(int font_id, uint16_t classifier_score) - : fontinfo_id(font_id), score(classifier_score) {} - - // Index into fontinfo table, but inside the classifier, may be a shapetable - // index. - int32_t fontinfo_id; - // Raw score from the low-level classifier. - uint16_t score; -}; - -// Struct for information about spacing between characters in a particular font. -struct FontSpacingInfo { - int16_t x_gap_before; - int16_t x_gap_after; - GenericVector kerned_unichar_ids; - GenericVector kerned_x_gaps; -}; - -/* - * font_properties contains properties about boldness, italicness, fixed pitch, - * serif, fraktur - */ -struct FontInfo { - FontInfo() : name(nullptr), properties(0), universal_id(0), spacing_vec(nullptr) {} - ~FontInfo() = default; - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(TFile* fp); - - // Reserves unicharset_size spots in spacing_vec. - void init_spacing(int unicharset_size) { - spacing_vec = new GenericVector(); - spacing_vec->init_to_size(unicharset_size, nullptr); - } - // Adds the given pointer to FontSpacingInfo to spacing_vec member - // (FontInfo class takes ownership of the pointer). - // Note: init_spacing should be called before calling this function. - void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info) { - ASSERT_HOST(spacing_vec != nullptr && spacing_vec->size() > uch_id); - (*spacing_vec)[uch_id] = spacing_info; - } - - // Returns the pointer to FontSpacingInfo for the given UNICHAR_ID. - const FontSpacingInfo *get_spacing(UNICHAR_ID uch_id) const { - return (spacing_vec == nullptr || spacing_vec->size() <= uch_id) ? - nullptr : (*spacing_vec)[uch_id]; - } - - // Fills spacing with the value of the x gap expected between the two given - // UNICHAR_IDs. Returns true on success. - bool get_spacing(UNICHAR_ID prev_uch_id, - UNICHAR_ID uch_id, - int *spacing) const { - const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id); - const FontSpacingInfo *fsi = this->get_spacing(uch_id); - if (prev_fsi == nullptr || fsi == nullptr) return false; - int i = 0; - for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) { - if (prev_fsi->kerned_unichar_ids[i] == uch_id) break; - } - if (i < prev_fsi->kerned_unichar_ids.size()) { - *spacing = prev_fsi->kerned_x_gaps[i]; - } else { - *spacing = prev_fsi->x_gap_after + fsi->x_gap_before; - } - return true; - } - - bool is_italic() const { return properties & 1; } - bool is_bold() const { return (properties & 2) != 0; } - bool is_fixed_pitch() const { return (properties & 4) != 0; } - bool is_serif() const { return (properties & 8) != 0; } - bool is_fraktur() const { return (properties & 16) != 0; } - - char* name; - uint32_t properties; - // The universal_id is a field reserved for the initialization process - // to assign a unique id number to all fonts loaded for the current - // combination of languages. This id will then be returned by - // ResultIterator::WordFontAttributes. - int32_t universal_id; - // Horizontal spacing between characters (indexed by UNICHAR_ID). - GenericVector *spacing_vec; -}; - -// Every class (character) owns a FontSet that represents all the fonts that can -// render this character. -// Since almost all the characters from the same script share the same set of -// fonts, the sets are shared over multiple classes (see -// Classify::fontset_table_). Thus, a class only store an id to a set. -// Because some fonts cannot render just one character of a set, there are a -// lot of FontSet that differ only by one font. Rather than storing directly -// the FontInfo in the FontSet structure, it's better to share FontInfos among -// FontSets (Classify::fontinfo_table_). -struct FontSet { - int size; - int* configs; // FontInfo ids -}; - -// Class that adds a bit of functionality on top of GenericVector to -// implement a table of FontInfo that replaces UniCityTable. -// TODO(rays) change all references once all existing traineddata files -// are replaced. -class FontInfoTable : public GenericVector { - public: - FontInfoTable(); - ~FontInfoTable(); - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(TFile* fp); - - // Returns true if the given set of fonts includes one with the same - // properties as font_id. - bool SetContainsFontProperties( - int font_id, const GenericVector& font_set) const; - // Returns true if the given set of fonts includes multiple properties. - bool SetContainsMultipleFontProperties( - const GenericVector& font_set) const; - - // Moves any non-empty FontSpacingInfo entries from other to this. - void MoveSpacingInfoFrom(FontInfoTable* other); - // Moves this to the target unicity table. - void MoveTo(UnicityTable* target); -}; - -// Compare FontInfo structures. -bool CompareFontInfo(const FontInfo& fi1, const FontInfo& fi2); -// Compare FontSet structures. -bool CompareFontSet(const FontSet& fs1, const FontSet& fs2); -// Deletion callbacks for GenericVector. -void FontInfoDeleteCallback(FontInfo f); -void FontSetDeleteCallback(FontSet fs); - -// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures. -bool read_info(TFile* f, FontInfo* fi); -bool write_info(FILE* f, const FontInfo& fi); -bool read_spacing_info(TFile* f, FontInfo* fi); -bool write_spacing_info(FILE* f, const FontInfo& fi); -bool read_set(TFile* f, FontSet* fs); -bool write_set(FILE* f, const FontSet& fs); - -} // namespace tesseract. - -#endif /* THIRD_PARTY_TESSERACT_CCSTRUCT_FONTINFO_H_ */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/imagedata.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/imagedata.cpp deleted file mode 100644 index 5783955d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/imagedata.cpp +++ /dev/null @@ -1,711 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: imagedata.cpp -// Description: Class to hold information about a single multi-page tiff -// training file and its corresponding boxes or text file. -// Author: Ray Smith -// Created: Tue May 28 08:56:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "imagedata.h" - -#if defined(__MINGW32__) -#include -#else -#include -#endif - -#include "allheaders.h" // for pixDestroy, pixGetHeight, pixGetWidth, lept_... -#include "boxread.h" // for ReadMemBoxes -#include "callcpp.h" // for window_wait -#include "helpers.h" // for IntCastRounded, TRand, ClipToRange, Modulo -#include "rect.h" // for TBOX -#include "scrollview.h" // for ScrollView, ScrollView::CYAN, ScrollView::NONE -#include "serialis.h" // for TFile -#include "tprintf.h" // for tprintf - -// Number of documents to read ahead while training. Doesn't need to be very -// large. -const int kMaxReadAhead = 8; - -namespace tesseract { - -WordFeature::WordFeature() : x_(0), y_(0), dir_(0) { -} - -WordFeature::WordFeature(const FCOORD& fcoord, uint8_t dir) - : x_(IntCastRounded(fcoord.x())), - y_(ClipToRange(IntCastRounded(fcoord.y()), 0, UINT8_MAX)), - dir_(dir) { -} - -// Computes the maximum x and y value in the features. -void WordFeature::ComputeSize(const GenericVector& features, - int* max_x, int* max_y) { - *max_x = 0; - *max_y = 0; - for (int f = 0; f < features.size(); ++f) { - if (features[f].x_ > *max_x) *max_x = features[f].x_; - if (features[f].y_ > *max_y) *max_y = features[f].y_; - } -} - -// Draws the features in the given window. -void WordFeature::Draw(const GenericVector& features, - ScrollView* window) { -#ifndef GRAPHICS_DISABLED - for (int f = 0; f < features.size(); ++f) { - FCOORD pos(features[f].x_, features[f].y_); - FCOORD dir; - dir.from_direction(features[f].dir_); - dir *= 8.0f; - window->SetCursor(IntCastRounded(pos.x() - dir.x()), - IntCastRounded(pos.y() - dir.y())); - window->DrawTo(IntCastRounded(pos.x() + dir.x()), - IntCastRounded(pos.y() + dir.y())); - } -#endif -} - -// Writes to the given file. Returns false in case of error. -bool WordFeature::Serialize(FILE* fp) const { - return tesseract::Serialize(fp, &x_) && - tesseract::Serialize(fp, &y_) && - tesseract::Serialize(fp, &dir_); -} - -// Reads from the given file. Returns false in case of error. -bool WordFeature::DeSerialize(bool swap, FILE* fp) { - if (!tesseract::DeSerialize(fp, &x_)) return false; - if (swap) ReverseN(&x_, sizeof(x_)); - return tesseract::DeSerialize(fp, &y_) && - tesseract::DeSerialize(fp, &dir_); -} - -void FloatWordFeature::FromWordFeatures( - const GenericVector& word_features, - GenericVector* float_features) { - for (int i = 0; i < word_features.size(); ++i) { - FloatWordFeature f; - f.x = word_features[i].x(); - f.y = word_features[i].y(); - f.dir = word_features[i].dir(); - f.x_bucket = 0; // Will set it later. - float_features->push_back(f); - } -} - -// Sort function to sort first by x-bucket, then by y. -/* static */ -int FloatWordFeature::SortByXBucket(const void* v1, const void* v2) { - const FloatWordFeature* f1 = static_cast(v1); - const FloatWordFeature* f2 = static_cast(v2); - int x_diff = f1->x_bucket - f2->x_bucket; - if (x_diff == 0) return f1->y - f2->y; - return x_diff; -} - -ImageData::ImageData() : page_number_(-1), vertical_text_(false) { -} -// Takes ownership of the pix and destroys it. -ImageData::ImageData(bool vertical, Pix* pix) - : page_number_(0), vertical_text_(vertical) { - SetPix(pix); -} -ImageData::~ImageData() { -} - -// Builds and returns an ImageData from the basic data. Note that imagedata, -// truth_text, and box_text are all the actual file data, NOT filenames. -ImageData* ImageData::Build(const char* name, int page_number, const char* lang, - const char* imagedata, int imagedatasize, - const char* truth_text, const char* box_text) { - ImageData* image_data = new ImageData(); - image_data->imagefilename_ = name; - image_data->page_number_ = page_number; - image_data->language_ = lang; - // Save the imagedata. - image_data->image_data_.resize_no_init(imagedatasize); - memcpy(&image_data->image_data_[0], imagedata, imagedatasize); - if (!image_data->AddBoxes(box_text)) { - if (truth_text == nullptr || truth_text[0] == '\0') { - tprintf("Error: No text corresponding to page %d from image %s!\n", - page_number, name); - delete image_data; - return nullptr; - } - image_data->transcription_ = truth_text; - // If we have no boxes, the transcription is in the 0th box_texts_. - image_data->box_texts_.push_back(truth_text); - // We will create a box for the whole image on PreScale, to save unpacking - // the image now. - } else if (truth_text != nullptr && truth_text[0] != '\0' && - image_data->transcription_ != truth_text) { - // Save the truth text as it is present and disagrees with the box text. - image_data->transcription_ = truth_text; - } - return image_data; -} - -// Writes to the given file. Returns false in case of error. -bool ImageData::Serialize(TFile* fp) const { - if (!imagefilename_.Serialize(fp)) return false; - if (!fp->Serialize(&page_number_)) return false; - if (!image_data_.Serialize(fp)) return false; - if (!language_.Serialize(fp)) return false; - if (!transcription_.Serialize(fp)) return false; - // WARNING: Will not work across different endian machines. - if (!boxes_.Serialize(fp)) return false; - if (!box_texts_.SerializeClasses(fp)) return false; - int8_t vertical = vertical_text_; - return fp->Serialize(&vertical); -} - -// Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool ImageData::DeSerialize(TFile* fp) { - if (!imagefilename_.DeSerialize(fp)) return false; - if (!fp->DeSerialize(&page_number_)) return false; - if (!image_data_.DeSerialize(fp)) return false; - if (!language_.DeSerialize(fp)) return false; - if (!transcription_.DeSerialize(fp)) return false; - // WARNING: Will not work across different endian machines. - if (!boxes_.DeSerialize(fp)) return false; - if (!box_texts_.DeSerializeClasses(fp)) return false; - int8_t vertical = 0; - if (!fp->DeSerialize(&vertical)) return false; - vertical_text_ = vertical != 0; - return true; -} - -// As DeSerialize, but only seeks past the data - hence a static method. -bool ImageData::SkipDeSerialize(TFile* fp) { - if (!STRING::SkipDeSerialize(fp)) return false; - int32_t page_number; - if (!fp->DeSerialize(&page_number)) return false; - if (!GenericVector::SkipDeSerialize(fp)) return false; - if (!STRING::SkipDeSerialize(fp)) return false; - if (!STRING::SkipDeSerialize(fp)) return false; - if (!GenericVector::SkipDeSerialize(fp)) return false; - if (!GenericVector::SkipDeSerializeClasses(fp)) return false; - int8_t vertical = 0; - return fp->DeSerialize(&vertical); -} - -// Saves the given Pix as a PNG-encoded string and destroys it. -void ImageData::SetPix(Pix* pix) { - SetPixInternal(pix, &image_data_); -} - -// Returns the Pix image for *this. Must be pixDestroyed after use. -Pix* ImageData::GetPix() const { - return GetPixInternal(image_data_); -} - -// Gets anything and everything with a non-nullptr pointer, prescaled to a -// given target_height (if 0, then the original image height), and aligned. -// Also returns (if not nullptr) the width and height of the scaled image. -// The return value is the scaled Pix, which must be pixDestroyed after use, -// and scale_factor (if not nullptr) is set to the scale factor that was applied -// to the image to achieve the target_height. -Pix* ImageData::PreScale(int target_height, int max_height, float* scale_factor, - int* scaled_width, int* scaled_height, - GenericVector* boxes) const { - int input_width = 0; - int input_height = 0; - Pix* src_pix = GetPix(); - ASSERT_HOST(src_pix != nullptr); - input_width = pixGetWidth(src_pix); - input_height = pixGetHeight(src_pix); - if (target_height == 0) { - target_height = std::min(input_height, max_height); - } - float im_factor = static_cast(target_height) / input_height; - if (scaled_width != nullptr) - *scaled_width = IntCastRounded(im_factor * input_width); - if (scaled_height != nullptr) - *scaled_height = target_height; - // Get the scaled image. - Pix* pix = pixScale(src_pix, im_factor, im_factor); - if (pix == nullptr) { - tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n", - input_width, input_height, im_factor); - } - if (scaled_width != nullptr) *scaled_width = pixGetWidth(pix); - if (scaled_height != nullptr) *scaled_height = pixGetHeight(pix); - pixDestroy(&src_pix); - if (boxes != nullptr) { - // Get the boxes. - boxes->truncate(0); - for (int b = 0; b < boxes_.size(); ++b) { - TBOX box = boxes_[b]; - box.scale(im_factor); - boxes->push_back(box); - } - if (boxes->empty()) { - // Make a single box for the whole image. - TBOX box(0, 0, im_factor * input_width, target_height); - boxes->push_back(box); - } - } - if (scale_factor != nullptr) *scale_factor = im_factor; - return pix; -} - -int ImageData::MemoryUsed() const { - return image_data_.size(); -} - -// Draws the data in a new window. -void ImageData::Display() const { -#ifndef GRAPHICS_DISABLED - const int kTextSize = 64; - // Draw the image. - Pix* pix = GetPix(); - if (pix == nullptr) return; - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - ScrollView* win = new ScrollView("Imagedata", 100, 100, - 2 * (width + 2 * kTextSize), - 2 * (height + 4 * kTextSize), - width + 10, height + 3 * kTextSize, true); - win->Image(pix, 0, height - 1); - pixDestroy(&pix); - // Draw the boxes. - win->Pen(ScrollView::RED); - win->Brush(ScrollView::NONE); - int text_size = kTextSize; - if (!boxes_.empty() && boxes_[0].height() * 2 < text_size) - text_size = boxes_[0].height() * 2; - win->TextAttributes("Arial", text_size, false, false, false); - if (!boxes_.empty()) { - for (int b = 0; b < boxes_.size(); ++b) { - boxes_[b].plot(win); - win->Text(boxes_[b].left(), height + kTextSize, box_texts_[b].string()); - } - } else { - // The full transcription. - win->Pen(ScrollView::CYAN); - win->Text(0, height + kTextSize * 2, transcription_.string()); - } - win->Update(); - window_wait(win); -#endif -} - -// Adds the supplied boxes and transcriptions that correspond to the correct -// page number. -void ImageData::AddBoxes(const GenericVector& boxes, - const GenericVector& texts, - const GenericVector& box_pages) { - // Copy the boxes and make the transcription. - for (int i = 0; i < box_pages.size(); ++i) { - if (page_number_ >= 0 && box_pages[i] != page_number_) continue; - transcription_ += texts[i]; - boxes_.push_back(boxes[i]); - box_texts_.push_back(texts[i]); - } -} - -// Saves the given Pix as a PNG-encoded string and destroys it. -void ImageData::SetPixInternal(Pix* pix, GenericVector* image_data) { - l_uint8* data; - size_t size; - pixWriteMem(&data, &size, pix, IFF_PNG); - pixDestroy(&pix); - image_data->resize_no_init(size); - memcpy(&(*image_data)[0], data, size); - lept_free(data); -} - -// Returns the Pix image for the image_data. Must be pixDestroyed after use. -Pix* ImageData::GetPixInternal(const GenericVector& image_data) { - Pix* pix = nullptr; - if (!image_data.empty()) { - // Convert the array to an image. - const unsigned char* u_data = - reinterpret_cast(&image_data[0]); - pix = pixReadMem(u_data, image_data.size()); - } - return pix; -} - -// Parses the text string as a box file and adds any discovered boxes that -// match the page number. Returns false on error. -bool ImageData::AddBoxes(const char* box_text) { - if (box_text != nullptr && box_text[0] != '\0') { - GenericVector boxes; - GenericVector texts; - GenericVector box_pages; - if (ReadMemBoxes(page_number_, /*skip_blanks*/ false, box_text, - /*continue_on_failure*/ true, &boxes, &texts, nullptr, - &box_pages)) { - AddBoxes(boxes, texts, box_pages); - return true; - } else { - tprintf("Error: No boxes for page %d from image %s!\n", - page_number_, imagefilename_.string()); - } - } - return false; -} - -// Thread function to call ReCachePages. -void* ReCachePagesFunc(void* data) { - DocumentData* document_data = static_cast(data); - document_data->ReCachePages(); - return nullptr; -} - -DocumentData::DocumentData(const STRING& name) - : document_name_(name), - pages_offset_(-1), - total_pages_(-1), - memory_used_(0), - max_memory_(0), - reader_(nullptr) {} - -DocumentData::~DocumentData() { - SVAutoLock lock_p(&pages_mutex_); - SVAutoLock lock_g(&general_mutex_); -} - -// Reads all the pages in the given lstmf filename to the cache. The reader -// is used to read the file. -bool DocumentData::LoadDocument(const char* filename, int start_page, - int64_t max_memory, FileReader reader) { - SetDocument(filename, max_memory, reader); - pages_offset_ = start_page; - return ReCachePages(); -} - -// Sets up the document, without actually loading it. -void DocumentData::SetDocument(const char* filename, int64_t max_memory, - FileReader reader) { - SVAutoLock lock_p(&pages_mutex_); - SVAutoLock lock(&general_mutex_); - document_name_ = filename; - pages_offset_ = -1; - max_memory_ = max_memory; - reader_ = reader; -} - -// Writes all the pages to the given filename. Returns false on error. -bool DocumentData::SaveDocument(const char* filename, FileWriter writer) { - SVAutoLock lock(&pages_mutex_); - TFile fp; - fp.OpenWrite(nullptr); - if (!pages_.Serialize(&fp) || !fp.CloseWrite(filename, writer)) { - tprintf("Serialize failed: %s\n", filename); - return false; - } - return true; -} -bool DocumentData::SaveToBuffer(GenericVector* buffer) { - SVAutoLock lock(&pages_mutex_); - TFile fp; - fp.OpenWrite(buffer); - return pages_.Serialize(&fp); -} - -// Adds the given page data to this document, counting up memory. -void DocumentData::AddPageToDocument(ImageData* page) { - SVAutoLock lock(&pages_mutex_); - pages_.push_back(page); - set_memory_used(memory_used() + page->MemoryUsed()); -} - -// If the given index is not currently loaded, loads it using a separate -// thread. -void DocumentData::LoadPageInBackground(int index) { - ImageData* page = nullptr; - if (IsPageAvailable(index, &page)) return; - SVAutoLock lock(&pages_mutex_); - if (pages_offset_ == index) return; - pages_offset_ = index; - pages_.clear(); - SVSync::StartThread(ReCachePagesFunc, this); -} - -// Returns a pointer to the page with the given index, modulo the total -// number of pages. Blocks until the background load is completed. -const ImageData* DocumentData::GetPage(int index) { - ImageData* page = nullptr; - while (!IsPageAvailable(index, &page)) { - // If there is no background load scheduled, schedule one now. - pages_mutex_.Lock(); - bool needs_loading = pages_offset_ != index; - pages_mutex_.Unlock(); - if (needs_loading) LoadPageInBackground(index); - // We can't directly load the page, or the background load will delete it - // while the caller is using it, so give it a chance to work. -#if defined(__MINGW32__) - sleep(1); -#else - std::this_thread::sleep_for(std::chrono::seconds(1)); -#endif - } - return page; -} - -// Returns true if the requested page is available, and provides a pointer, -// which may be nullptr if the document is empty. May block, even though it -// doesn't guarantee to return true. -bool DocumentData::IsPageAvailable(int index, ImageData** page) { - SVAutoLock lock(&pages_mutex_); - int num_pages = NumPages(); - if (num_pages == 0 || index < 0) { - *page = nullptr; // Empty Document. - return true; - } - if (num_pages > 0) { - index = Modulo(index, num_pages); - if (pages_offset_ <= index && index < pages_offset_ + pages_.size()) { - *page = pages_[index - pages_offset_]; // Page is available already. - return true; - } - } - return false; -} - -// Removes all pages from memory and frees the memory, but does not forget -// the document metadata. -int64_t DocumentData::UnCache() { - SVAutoLock lock(&pages_mutex_); - int64_t memory_saved = memory_used(); - pages_.clear(); - pages_offset_ = -1; - set_total_pages(-1); - set_memory_used(0); - tprintf("Unloaded document %s, saving %" PRId64 " memory\n", - document_name_.string(), memory_saved); - return memory_saved; -} - -// Shuffles all the pages in the document. -void DocumentData::Shuffle() { - TRand random; - // Different documents get shuffled differently, but the same for the same - // name. - random.set_seed(document_name_.string()); - int num_pages = pages_.size(); - // Execute one random swap for each page in the document. - for (int i = 0; i < num_pages; ++i) { - int src = random.IntRand() % num_pages; - int dest = random.IntRand() % num_pages; - std::swap(pages_[src], pages_[dest]); - } -} - -// Locks the pages_mutex_ and Loads as many pages can fit in max_memory_ -// starting at index pages_offset_. -bool DocumentData::ReCachePages() { - SVAutoLock lock(&pages_mutex_); - // Read the file. - set_total_pages(0); - set_memory_used(0); - int loaded_pages = 0; - pages_.truncate(0); - TFile fp; - if (!fp.Open(document_name_, reader_) || - !PointerVector::DeSerializeSize(&fp, &loaded_pages) || - loaded_pages <= 0) { - tprintf("Deserialize header failed: %s\n", document_name_.string()); - return false; - } - pages_offset_ %= loaded_pages; - // Skip pages before the first one we want, and load the rest until max - // memory and skip the rest after that. - int page; - for (page = 0; page < loaded_pages; ++page) { - if (page < pages_offset_ || - (max_memory_ > 0 && memory_used() > max_memory_)) { - if (!PointerVector::DeSerializeSkip(&fp)) { - tprintf("Deserializeskip failed\n"); - break; - } - } else { - if (!pages_.DeSerializeElement(&fp)) break; - ImageData* image_data = pages_.back(); - if (image_data->imagefilename().length() == 0) { - image_data->set_imagefilename(document_name_); - image_data->set_page_number(page); - } - set_memory_used(memory_used() + image_data->MemoryUsed()); - } - } - if (page < loaded_pages) { - tprintf("Deserialize failed: %s read %d/%d pages\n", - document_name_.string(), page, loaded_pages); - pages_.truncate(0); - } else { - tprintf("Loaded %d/%d pages (%d-%d) of document %s\n", pages_.size(), - loaded_pages, pages_offset_ + 1, pages_offset_ + pages_.size(), - document_name_.string()); - } - set_total_pages(loaded_pages); - return !pages_.empty(); -} - -// A collection of DocumentData that knows roughly how much memory it is using. -DocumentCache::DocumentCache(int64_t max_memory) - : num_pages_per_doc_(0), max_memory_(max_memory) {} -DocumentCache::~DocumentCache() {} - -// Adds all the documents in the list of filenames, counting memory. -// The reader is used to read the files. -bool DocumentCache::LoadDocuments(const GenericVector& filenames, - CachingStrategy cache_strategy, - FileReader reader) { - cache_strategy_ = cache_strategy; - int64_t fair_share_memory = 0; - // In the round-robin case, each DocumentData handles restricting its content - // to its fair share of memory. In the sequential case, DocumentCache - // determines which DocumentDatas are held entirely in memory. - if (cache_strategy_ == CS_ROUND_ROBIN) - fair_share_memory = max_memory_ / filenames.size(); - for (int arg = 0; arg < filenames.size(); ++arg) { - STRING filename = filenames[arg]; - DocumentData* document = new DocumentData(filename); - document->SetDocument(filename.string(), fair_share_memory, reader); - AddToCache(document); - } - if (!documents_.empty()) { - // Try to get the first page now to verify the list of filenames. - if (GetPageBySerial(0) != nullptr) return true; - tprintf("Load of page 0 failed!\n"); - } - return false; -} - -// Adds document to the cache. -bool DocumentCache::AddToCache(DocumentData* data) { - documents_.push_back(data); - return true; -} - -// Finds and returns a document by name. -DocumentData* DocumentCache::FindDocument(const STRING& document_name) const { - for (int i = 0; i < documents_.size(); ++i) { - if (documents_[i]->document_name() == document_name) - return documents_[i]; - } - return nullptr; -} - -// Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache -// strategy, could take a long time. -int DocumentCache::TotalPages() { - if (cache_strategy_ == CS_SEQUENTIAL) { - // In sequential mode, we assume each doc has the same number of pages - // whether it is true or not. - if (num_pages_per_doc_ == 0) GetPageSequential(0); - return num_pages_per_doc_ * documents_.size(); - } - int total_pages = 0; - int num_docs = documents_.size(); - for (int d = 0; d < num_docs; ++d) { - // We have to load a page to make NumPages() valid. - documents_[d]->GetPage(0); - total_pages += documents_[d]->NumPages(); - } - return total_pages; -} - -// Returns a page by serial number, selecting them in a round-robin fashion -// from all the documents. Highly disk-intensive, but doesn't need samples -// to be shuffled between files to begin with. -const ImageData* DocumentCache::GetPageRoundRobin(int serial) { - int num_docs = documents_.size(); - int doc_index = serial % num_docs; - const ImageData* doc = documents_[doc_index]->GetPage(serial / num_docs); - for (int offset = 1; offset <= kMaxReadAhead && offset < num_docs; ++offset) { - doc_index = (serial + offset) % num_docs; - int page = (serial + offset) / num_docs; - documents_[doc_index]->LoadPageInBackground(page); - } - return doc; -} - -// Returns a page by serial number, selecting them in sequence from each file. -// Requires the samples to be shuffled between the files to give a random or -// uniform distribution of data. Less disk-intensive than GetPageRoundRobin. -const ImageData* DocumentCache::GetPageSequential(int serial) { - int num_docs = documents_.size(); - ASSERT_HOST(num_docs > 0); - if (num_pages_per_doc_ == 0) { - // Use the pages in the first doc as the number of pages in each doc. - documents_[0]->GetPage(0); - num_pages_per_doc_ = documents_[0]->NumPages(); - if (num_pages_per_doc_ == 0) { - tprintf("First document cannot be empty!!\n"); - ASSERT_HOST(num_pages_per_doc_ > 0); - } - // Get rid of zero now if we don't need it. - if (serial / num_pages_per_doc_ % num_docs > 0) documents_[0]->UnCache(); - } - int doc_index = serial / num_pages_per_doc_ % num_docs; - const ImageData* doc = - documents_[doc_index]->GetPage(serial % num_pages_per_doc_); - // Count up total memory. Background loading makes it more complicated to - // keep a running count. - int64_t total_memory = 0; - for (int d = 0; d < num_docs; ++d) { - total_memory += documents_[d]->memory_used(); - } - if (total_memory >= max_memory_) { - // Find something to un-cache. - // If there are more than 3 in front, then serial is from the back reader - // of a pair of readers. If we un-cache from in-front-2 to 2-ahead, then - // we create a hole between them and then un-caching the backmost occupied - // will work for both. - int num_in_front = CountNeighbourDocs(doc_index, 1); - for (int offset = num_in_front - 2; - offset > 1 && total_memory >= max_memory_; --offset) { - int next_index = (doc_index + offset) % num_docs; - total_memory -= documents_[next_index]->UnCache(); - } - // If that didn't work, the best solution is to un-cache from the back. If - // we take away the document that a 2nd reader is using, it will put it - // back and make a hole between. - int num_behind = CountNeighbourDocs(doc_index, -1); - for (int offset = num_behind; offset < 0 && total_memory >= max_memory_; - ++offset) { - int next_index = (doc_index + offset + num_docs) % num_docs; - total_memory -= documents_[next_index]->UnCache(); - } - } - int next_index = (doc_index + 1) % num_docs; - if (!documents_[next_index]->IsCached() && total_memory < max_memory_) { - documents_[next_index]->LoadPageInBackground(0); - } - return doc; -} - -// Helper counts the number of adjacent cached neighbours of index looking in -// direction dir, ie index+dir, index+2*dir etc. -int DocumentCache::CountNeighbourDocs(int index, int dir) { - int num_docs = documents_.size(); - for (int offset = dir; abs(offset) < num_docs; offset += dir) { - int offset_index = (index + offset + num_docs) % num_docs; - if (!documents_[offset_index]->IsCached()) return offset - dir; - } - return num_docs; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/imagedata.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/imagedata.h deleted file mode 100644 index 6679db4c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/imagedata.h +++ /dev/null @@ -1,379 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: imagedata.h -// Description: Class to hold information about a single image and its -// corresponding boxes or text file. -// Author: Ray Smith -// Created: Mon Jul 22 14:17:06 PDT 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_IMAGE_IMAGEDATA_H_ -#define TESSERACT_IMAGE_IMAGEDATA_H_ - -#include "genericvector.h" // for GenericVector, PointerVector, FileReader -#include "points.h" // for FCOORD -#include "strngs.h" // for STRING -#include "svutil.h" // for SVAutoLock, SVMutex - -class ScrollView; -class TBOX; -struct Pix; - -namespace tesseract { - -class TFile; - -// Amount of padding to apply in output pixels in feature mode. -const int kFeaturePadding = 2; -// Number of pixels to pad around text boxes. -const int kImagePadding = 4; - -// Enum to determine the caching and data sequencing strategy. -enum CachingStrategy { - // Reads all of one file before moving on to the next. Requires samples to be - // shuffled across files. Uses the count of samples in the first file as - // the count in all the files to achieve high-speed random access. As a - // consequence, if subsequent files are smaller, they get entries used more - // than once, and if subsequent files are larger, some entries are not used. - // Best for larger data sets that don't fit in memory. - CS_SEQUENTIAL, - // Reads one sample from each file in rotation. Does not require shuffled - // samples, but is extremely disk-intensive. Samples in smaller files also - // get used more often than samples in larger files. - // Best for smaller data sets that mostly fit in memory. - CS_ROUND_ROBIN, -}; - -class WordFeature { - public: - WordFeature(); - WordFeature(const FCOORD& fcoord, uint8_t dir); - - // Computes the maximum x and y value in the features. - static void ComputeSize(const GenericVector& features, - int* max_x, int* max_y); - // Draws the features in the given window. - static void Draw(const GenericVector& features, - ScrollView* window); - - // Accessors. - int x() const { return x_; } - int y() const { return y_; } - int dir() const { return dir_; } - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, FILE* fp); - - private: - int16_t x_; - uint8_t y_; - uint8_t dir_; -}; - -// A floating-point version of WordFeature, used as an intermediate during -// scaling. -struct FloatWordFeature { - static void FromWordFeatures(const GenericVector& word_features, - GenericVector* float_features); - // Sort function to sort first by x-bucket, then by y. - static int SortByXBucket(const void*, const void*); - - float x; - float y; - float dir; - int x_bucket; -}; - -// Class to hold information on a single image: -// Filename, cached image as a Pix*, character boxes, text transcription. -// The text transcription is the ground truth UTF-8 text for the image. -// Character boxes are optional and indicate the desired segmentation of -// the text into recognition units. -class ImageData { - public: - ImageData(); - // Takes ownership of the pix. - ImageData(bool vertical, Pix* pix); - ~ImageData(); - - // Builds and returns an ImageData from the basic data. Note that imagedata, - // truth_text, and box_text are all the actual file data, NOT filenames. - static ImageData* Build(const char* name, int page_number, const char* lang, - const char* imagedata, int imagedatasize, - const char* truth_text, const char* box_text); - - // Writes to the given file. Returns false in case of error. - bool Serialize(TFile* fp) const; - // Reads from the given file. Returns false in case of error. - bool DeSerialize(TFile* fp); - // As DeSerialize, but only seeks past the data - hence a static method. - static bool SkipDeSerialize(TFile* fp); - - // Other accessors. - const STRING& imagefilename() const { - return imagefilename_; - } - void set_imagefilename(const STRING& name) { - imagefilename_ = name; - } - int page_number() const { - return page_number_; - } - void set_page_number(int num) { - page_number_ = num; - } - const GenericVector& image_data() const { - return image_data_; - } - const STRING& language() const { - return language_; - } - void set_language(const STRING& lang) { - language_ = lang; - } - const STRING& transcription() const { - return transcription_; - } - const GenericVector& boxes() const { - return boxes_; - } - const GenericVector& box_texts() const { - return box_texts_; - } - const STRING& box_text(int index) const { - return box_texts_[index]; - } - // Saves the given Pix as a PNG-encoded string and destroys it. - void SetPix(Pix* pix); - // Returns the Pix image for *this. Must be pixDestroyed after use. - Pix* GetPix() const; - // Gets anything and everything with a non-nullptr pointer, prescaled to a - // given target_height (if 0, then the original image height), and aligned. - // Also returns (if not nullptr) the width and height of the scaled image. - // The return value is the scaled Pix, which must be pixDestroyed after use, - // and scale_factor (if not nullptr) is set to the scale factor that was applied - // to the image to achieve the target_height. - Pix* PreScale(int target_height, int max_height, float* scale_factor, - int* scaled_width, int* scaled_height, - GenericVector* boxes) const; - - int MemoryUsed() const; - - // Draws the data in a new window. - void Display() const; - - // Adds the supplied boxes and transcriptions that correspond to the correct - // page number. - void AddBoxes(const GenericVector& boxes, - const GenericVector& texts, - const GenericVector& box_pages); - - private: - // Saves the given Pix as a PNG-encoded string and destroys it. - static void SetPixInternal(Pix* pix, GenericVector* image_data); - // Returns the Pix image for the image_data. Must be pixDestroyed after use. - static Pix* GetPixInternal(const GenericVector& image_data); - // Parses the text string as a box file and adds any discovered boxes that - // match the page number. Returns false on error. - bool AddBoxes(const char* box_text); - - private: - STRING imagefilename_; // File to read image from. - int32_t page_number_; // Page number if multi-page tif or -1. - GenericVector image_data_; // PNG file data. - STRING language_; // Language code for image. - STRING transcription_; // UTF-8 ground truth of image. - GenericVector boxes_; // If non-empty boxes of the image. - GenericVector box_texts_; // String for text in each box. - bool vertical_text_; // Image has been rotated from vertical. -}; - -// A collection of ImageData that knows roughly how much memory it is using. -class DocumentData { - friend void* ReCachePagesFunc(void* data); - - public: - explicit DocumentData(const STRING& name); - ~DocumentData(); - - // Reads all the pages in the given lstmf filename to the cache. The reader - // is used to read the file. - bool LoadDocument(const char* filename, int start_page, int64_t max_memory, - FileReader reader); - // Sets up the document, without actually loading it. - void SetDocument(const char* filename, int64_t max_memory, FileReader reader); - // Writes all the pages to the given filename. Returns false on error. - bool SaveDocument(const char* filename, FileWriter writer); - bool SaveToBuffer(GenericVector* buffer); - - // Adds the given page data to this document, counting up memory. - void AddPageToDocument(ImageData* page); - - const STRING& document_name() const { - SVAutoLock lock(&general_mutex_); - return document_name_; - } - int NumPages() const { - SVAutoLock lock(&general_mutex_); - return total_pages_; - } - int64_t memory_used() const { - SVAutoLock lock(&general_mutex_); - return memory_used_; - } - // If the given index is not currently loaded, loads it using a separate - // thread. Note: there are 4 cases: - // Document uncached: IsCached() returns false, total_pages_ < 0. - // Required page is available: IsPageAvailable returns true. In this case, - // total_pages_ > 0 and - // pages_offset_ <= index%total_pages_ <= pages_offset_+pages_.size() - // Pages are loaded, but the required one is not. - // The requested page is being loaded by LoadPageInBackground. In this case, - // index == pages_offset_. Once the loading starts, the pages lock is held - // until it completes, at which point IsPageAvailable will unblock and return - // true. - void LoadPageInBackground(int index); - // Returns a pointer to the page with the given index, modulo the total - // number of pages. Blocks until the background load is completed. - const ImageData* GetPage(int index); - // Returns true if the requested page is available, and provides a pointer, - // which may be nullptr if the document is empty. May block, even though it - // doesn't guarantee to return true. - bool IsPageAvailable(int index, ImageData** page); - // Takes ownership of the given page index. The page is made nullptr in *this. - ImageData* TakePage(int index) { - SVAutoLock lock(&pages_mutex_); - ImageData* page = pages_[index]; - pages_[index] = nullptr; - return page; - } - // Returns true if the document is currently loaded or in the process of - // loading. - bool IsCached() const { return NumPages() >= 0; } - // Removes all pages from memory and frees the memory, but does not forget - // the document metadata. Returns the memory saved. - int64_t UnCache(); - // Shuffles all the pages in the document. - void Shuffle(); - - private: - // Sets the value of total_pages_ behind a mutex. - void set_total_pages(int total) { - SVAutoLock lock(&general_mutex_); - total_pages_ = total; - } - void set_memory_used(int64_t memory_used) { - SVAutoLock lock(&general_mutex_); - memory_used_ = memory_used; - } - // Locks the pages_mutex_ and Loads as many pages can fit in max_memory_ - // starting at index pages_offset_. - bool ReCachePages(); - - private: - // A name for this document. - STRING document_name_; - // A group of pages that corresponds in some loose way to a document. - PointerVector pages_; - // Page number of the first index in pages_. - int pages_offset_; - // Total number of pages in document (may exceed size of pages_.) - int total_pages_; - // Total of all pix sizes in the document. - int64_t memory_used_; - // Max memory to use at any time. - int64_t max_memory_; - // Saved reader from LoadDocument to allow re-caching. - FileReader reader_; - // Mutex that protects pages_ and pages_offset_ against multiple parallel - // loads, and provides a wait for page. - SVMutex pages_mutex_; - // Mutex that protects other data members that callers want to access without - // waiting for a load operation. - mutable SVMutex general_mutex_; -}; - -// A collection of DocumentData that knows roughly how much memory it is using. -// Note that while it supports background read-ahead, it assumes that a single -// thread is accessing documents, ie it is not safe for multiple threads to -// access different documents in parallel, as one may de-cache the other's -// content. -class DocumentCache { - public: - explicit DocumentCache(int64_t max_memory); - ~DocumentCache(); - - // Deletes all existing documents from the cache. - void Clear() { - documents_.clear(); - num_pages_per_doc_ = 0; - } - // Adds all the documents in the list of filenames, counting memory. - // The reader is used to read the files. - bool LoadDocuments(const GenericVector& filenames, - CachingStrategy cache_strategy, FileReader reader); - - // Adds document to the cache. - bool AddToCache(DocumentData* data); - - // Finds and returns a document by name. - DocumentData* FindDocument(const STRING& document_name) const; - - // Returns a page by serial number using the current cache_strategy_ to - // determine the mapping from serial number to page. - const ImageData* GetPageBySerial(int serial) { - if (cache_strategy_ == CS_SEQUENTIAL) - return GetPageSequential(serial); - else - return GetPageRoundRobin(serial); - } - - const PointerVector& documents() const { - return documents_; - } - // Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache - // strategy, could take a long time. - int TotalPages(); - - private: - // Returns a page by serial number, selecting them in a round-robin fashion - // from all the documents. Highly disk-intensive, but doesn't need samples - // to be shuffled between files to begin with. - const ImageData* GetPageRoundRobin(int serial); - // Returns a page by serial number, selecting them in sequence from each file. - // Requires the samples to be shuffled between the files to give a random or - // uniform distribution of data. Less disk-intensive than GetPageRoundRobin. - const ImageData* GetPageSequential(int serial); - - // Helper counts the number of adjacent cached neighbour documents_ of index - // looking in direction dir, ie index+dir, index+2*dir etc. - int CountNeighbourDocs(int index, int dir); - - // A group of pages that corresponds in some loose way to a document. - PointerVector documents_; - // Strategy to use for caching and serializing data samples. - CachingStrategy cache_strategy_; - // Number of pages in the first document, used as a divisor in - // GetPageSequential to determine the document index. - int num_pages_per_doc_; - // Max memory allowed in this cache. - int64_t max_memory_; -}; - -} // namespace tesseract - - -#endif // TESSERACT_IMAGE_IMAGEDATA_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/linlsq.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/linlsq.cpp deleted file mode 100644 index a1f5c279..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/linlsq.cpp +++ /dev/null @@ -1,258 +0,0 @@ -/********************************************************************** - * File: linlsq.cpp (Formerly llsq.c) - * Description: Linear Least squares fitting code. - * Author: Ray Smith - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include -#include // for std::sqrt -#include "errcode.h" -#include "linlsq.h" - -const ERRCODE EMPTY_LLSQ = "Can't delete from an empty LLSQ"; - -/********************************************************************** - * LLSQ::clear - * - * Function to initialize a LLSQ. - **********************************************************************/ - -void LLSQ::clear() { // initialize - total_weight = 0.0; // no elements - sigx = 0.0; // update accumulators - sigy = 0.0; - sigxx = 0.0; - sigxy = 0.0; - sigyy = 0.0; -} - - -/********************************************************************** - * LLSQ::add - * - * Add an element to the accumulator. - **********************************************************************/ - -void LLSQ::add(double x, double y) { // add an element - total_weight++; // count elements - sigx += x; // update accumulators - sigy += y; - sigxx += x * x; - sigxy += x * y; - sigyy += y * y; -} -// Adds an element with a specified weight. -void LLSQ::add(double x, double y, double weight) { - total_weight += weight; - sigx += x * weight; // update accumulators - sigy += y * weight; - sigxx += x * x * weight; - sigxy += x * y * weight; - sigyy += y * y * weight; -} -// Adds a whole LLSQ. -void LLSQ::add(const LLSQ& other) { - total_weight += other.total_weight; - sigx += other.sigx; // update accumulators - sigy += other.sigy; - sigxx += other.sigxx; - sigxy += other.sigxy; - sigyy += other.sigyy; -} - - -/********************************************************************** - * LLSQ::remove - * - * Delete an element from the acculuator. - **********************************************************************/ - -void LLSQ::remove(double x, double y) { // delete an element - if (total_weight <= 0.0) // illegal - EMPTY_LLSQ.error("LLSQ::remove", ABORT, nullptr); - total_weight--; // count elements - sigx -= x; // update accumulators - sigy -= y; - sigxx -= x * x; - sigxy -= x * y; - sigyy -= y * y; -} - - -/********************************************************************** - * LLSQ::m - * - * Return the gradient of the line fit. - **********************************************************************/ - -double LLSQ::m() const { // get gradient - double covar = covariance(); - double x_var = x_variance(); - if (x_var != 0.0) - return covar / x_var; - else - return 0.0; // too little -} - - -/********************************************************************** - * LLSQ::c - * - * Return the constant of the line fit. - **********************************************************************/ - -double LLSQ::c(double m) const { // get constant - if (total_weight > 0.0) - return (sigy - m * sigx) / total_weight; - else - return 0; // too little -} - - -/********************************************************************** - * LLSQ::rms - * - * Return the rms error of the fit. - **********************************************************************/ - -double LLSQ::rms(double m, double c) const { // get error - double error; // total error - - if (total_weight > 0) { - error = sigyy + m * (m * sigxx + 2 * (c * sigx - sigxy)) + c * - (total_weight * c - 2 * sigy); - if (error >= 0) - error = std::sqrt(error / total_weight); // sqrt of mean - else - error = 0; - } else { - error = 0; // too little - } - return error; -} - - -/********************************************************************** - * LLSQ::pearson - * - * Return the pearson product moment correlation coefficient. - **********************************************************************/ - -double LLSQ::pearson() const { // get correlation - double r = 0.0; // Correlation is 0 if insufficient data. - - double covar = covariance(); - if (covar != 0.0) { - double var_product = x_variance() * y_variance(); - if (var_product > 0.0) - r = covar / std::sqrt(var_product); - } - return r; -} - -// Returns the x,y means as an FCOORD. -FCOORD LLSQ::mean_point() const { - if (total_weight > 0.0) { - return FCOORD(sigx / total_weight, sigy / total_weight); - } else { - return FCOORD(0.0f, 0.0f); - } -} - -// Returns the sqrt of the mean squared error measured perpendicular from the -// line through mean_point() in the direction dir. -// -// Derivation: -// Lemma: Let v and x_i (i=1..N) be a k-dimensional vectors (1xk matrices). -// Let % be dot product and ' be transpose. Note that: -// Sum[i=1..N] (v % x_i)^2 -// = v * [x_1' x_2' ... x_N'] * [x_1' x_2' .. x_N']' * v' -// If x_i have average 0 we have: -// = v * (N * COVARIANCE_MATRIX(X)) * v' -// Expanded for the case that k = 2, where we treat the dimensions -// as x_i and y_i, this is: -// = v * (N * [VAR(X), COV(X,Y); COV(X,Y) VAR(Y)]) * v' -// Now, we are trying to calculate the mean squared error, where v is -// perpendicular to our line of interest: -// Mean squared error -// = E [ (v % (x_i - x_avg))) ^2 ] -// = Sum (v % (x_i - x_avg))^2 / N -// = v * N * [VAR(X) COV(X,Y); COV(X,Y) VAR(Y)] / N * v' -// = v * [VAR(X) COV(X,Y); COV(X,Y) VAR(Y)] * v' -// = code below -double LLSQ::rms_orth(const FCOORD &dir) const { - FCOORD v = !dir; - v.normalise(); - return std::sqrt(v.x() * v.x() * x_variance() + - 2 * v.x() * v.y() * covariance() + - v.y() * v.y() * y_variance()); -} - -// Returns the direction of the fitted line as a unit vector, using the -// least mean squared perpendicular distance. The line runs through the -// mean_point, i.e. a point p on the line is given by: -// p = mean_point() + lambda * vector_fit() for some real number lambda. -// Note that the result (0<=x<=1, -1<=y<=-1) is directionally ambiguous -// and may be negated without changing its meaning. -// Fitting a line m + 𝜆v to a set of N points Pi = (xi, yi), where -// m is the mean point (𝝁, 𝝂) and -// v is the direction vector (cos𝜃, sin𝜃) -// The perpendicular distance of each Pi from the line is: -// (Pi - m) x v, where x is the scalar cross product. -// Total squared error is thus: -// E = ∑((xi - 𝝁)sin𝜃 - (yi - 𝝂)cos𝜃)² -// = ∑(xi - 𝝁)²sin²𝜃 - 2∑(xi - 𝝁)(yi - 𝝂)sin𝜃 cos𝜃 + ∑(yi - 𝝂)²cos²𝜃 -// = NVar(xi)sin²𝜃 - 2NCovar(xi, yi)sin𝜃 cos𝜃 + NVar(yi)cos²𝜃 (Eq 1) -// where Var(xi) is the variance of xi, -// and Covar(xi, yi) is the covariance of xi, yi. -// Taking the derivative wrt 𝜃 and setting to 0 to obtain the min/max: -// 0 = 2NVar(xi)sin𝜃 cos𝜃 -2NCovar(xi, yi)(cos²𝜃 - sin²𝜃) -2NVar(yi)sin𝜃 cos𝜃 -// => Covar(xi, yi)(cos²𝜃 - sin²𝜃) = (Var(xi) - Var(yi))sin𝜃 cos𝜃 -// Using double angles: -// 2Covar(xi, yi)cos2𝜃 = (Var(xi) - Var(yi))sin2𝜃 (Eq 2) -// So 𝜃 = 0.5 atan2(2Covar(xi, yi), Var(xi) - Var(yi)) (Eq 3) - -// Because it involves 2𝜃 , Eq 2 has 2 solutions 90 degrees apart, but which -// is the min and which is the max? From Eq1: -// E/N = Var(xi)sin²𝜃 - 2Covar(xi, yi)sin𝜃 cos𝜃 + Var(yi)cos²𝜃 -// and 90 degrees away, using sin/cos equivalences: -// E'/N = Var(xi)cos²𝜃 + 2Covar(xi, yi)sin𝜃 cos𝜃 + Var(yi)sin²𝜃 -// The second error is smaller (making it the minimum) iff -// E'/N < E/N ie: -// (Var(xi) - Var(yi))(cos²𝜃 - sin²𝜃) < -4Covar(xi, yi)sin𝜃 cos𝜃 -// Using double angles: -// (Var(xi) - Var(yi))cos2𝜃 < -2Covar(xi, yi)sin2𝜃 (InEq 1) -// But atan2(2Covar(xi, yi), Var(xi) - Var(yi)) picks 2𝜃 such that: -// sgn(cos2𝜃) = sgn(Var(xi) - Var(yi)) and sgn(sin2𝜃) = sgn(Covar(xi, yi)) -// so InEq1 can *never* be true, making the atan2 result *always* the min! -// In the degenerate case, where Covar(xi, yi) = 0 AND Var(xi) = Var(yi), -// the 2 solutions have equal error and the inequality is still false. -// Therefore the solution really is as trivial as Eq 3. - -// This is equivalent to returning the Principal Component in PCA, or the -// eigenvector corresponding to the largest eigenvalue in the covariance -// matrix. However, atan2 is much simpler! The one reference I found that -// uses this formula is http://web.mit.edu/18.06/www/Essays/tlsfit.pdf but -// that is still a much more complex derivation. It seems Pearson had already -// found this simple solution in 1901. -// http://books.google.com/books?id=WXwvAQAAIAAJ&pg=PA559 -FCOORD LLSQ::vector_fit() const { - double x_var = x_variance(); - double y_var = y_variance(); - double covar = covariance(); - double theta = 0.5 * atan2(2.0 * covar, x_var - y_var); - FCOORD result(cos(theta), sin(theta)); - return result; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/linlsq.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/linlsq.h deleted file mode 100644 index 4a7f09dd..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/linlsq.h +++ /dev/null @@ -1,136 +0,0 @@ -/********************************************************************** - * File: linlsq.h (Formerly llsq.h) - * Description: Linear Least squares fitting code. - * Author: Ray Smith - * Created: Thu Sep 12 08:44:51 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_CCSTRUCT_LINLSQ_H_ -#define TESSERACT_CCSTRUCT_LINLSQ_H_ - -#include // for int32_t -#include "points.h" // for FCOORD - -template class GenericVector; - -class LLSQ { - public: - LLSQ() { // constructor - clear(); // set to zeros - } - void clear(); // initialize - - // Adds an element with a weight of 1. - void add(double x, double y); - // Adds an element with a specified weight. - void add(double x, double y, double weight); - // Adds a whole LLSQ. - void add(const LLSQ& other); - // Deletes an element with a weight of 1. - void remove(double x, double y); - int32_t count() const { // no of elements - return static_cast(total_weight + 0.5); - } - - double m() const; // get gradient - double c(double m) const; // get constant - double rms(double m, double c) const; // get error - double pearson() const; // get correlation coefficient. - - // Returns the x,y means as an FCOORD. - FCOORD mean_point() const; - - // Returns the average sum of squared perpendicular error from a line - // through mean_point() in the direction dir. - double rms_orth(const FCOORD &dir) const; - - // Returns the direction of the fitted line as a unit vector, using the - // least mean squared perpendicular distance. The line runs through the - // mean_point, i.e. a point p on the line is given by: - // p = mean_point() + lambda * vector_fit() for some real number lambda. - // Note that the result (0<=x<=1, -1<=y<=-1) is directionally ambiguous - // and may be negated without changing its meaning, since a line is only - // unique to a range of pi radians. - // Modernists prefer to think of this as an Eigenvalue problem, but - // Pearson had the simple solution in 1901. - // - // Note that this is equivalent to returning the Principal Component in PCA, - // or the eigenvector corresponding to the largest eigenvalue in the - // covariance matrix. - FCOORD vector_fit() const; - - // Returns the covariance. - double covariance() const { - if (total_weight > 0.0) - return (sigxy - sigx * sigy / total_weight) / total_weight; - else - return 0.0; - } - double x_variance() const { - if (total_weight > 0.0) - return (sigxx - sigx * sigx / total_weight) / total_weight; - else - return 0.0; - } - double y_variance() const { - if (total_weight > 0.0) - return (sigyy - sigy * sigy / total_weight) / total_weight; - else - return 0.0; - } - - private: - double total_weight; // no of elements or sum of weights. - double sigx; // sum of x - double sigy; // sum of y - double sigxx; // sum x squared - double sigxy; // sum of xy - double sigyy; // sum y squared -}; - - -// Returns the median value of the vector, given that the values are -// circular, with the given modulus. Values may be signed or unsigned, -// eg range from -pi to pi (modulus 2pi) or from 0 to 2pi (modulus 2pi). -// NOTE that the array is shuffled, but the time taken is linear. -// An assumption is made that most of the values are spread over no more than -// half the range, but wrap-around is accounted for if the median is near -// the wrap-around point. -// Cannot be a member of GenericVector, as it makes heavy used of LLSQ. -// T must be an integer or float/double type. -template T MedianOfCircularValues(T modulus, GenericVector* v) { - LLSQ stats; - T halfrange = static_cast(modulus / 2); - int num_elements = v->size(); - for (int i = 0; i < num_elements; ++i) { - stats.add((*v)[i], (*v)[i] + halfrange); - } - bool offset_needed = stats.y_variance() < stats.x_variance(); - if (offset_needed) { - for (int i = 0; i < num_elements; ++i) { - (*v)[i] += halfrange; - } - } - int median_index = v->choose_nth_item(num_elements / 2); - if (offset_needed) { - for (int i = 0; i < num_elements; ++i) { - (*v)[i] -= halfrange; - } - } - return (*v)[median_index]; -} - - -#endif // TESSERACT_CCSTRUCT_LINLSQ_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/matrix.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/matrix.cpp deleted file mode 100644 index 678d412c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/matrix.cpp +++ /dev/null @@ -1,164 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: matrix.cpp (Formerly matrix.c) - * Description: Ratings matrix code. (Used by associator) - * Author: Mark Seaman, OCR Technology - * Created: Wed May 16 13:18:47 1990 - * Modified: Wed Mar 20 09:44:47 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1990, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "matrix.h" - -#include "callcpp.h" -#include "ratngs.h" -#include "tprintf.h" -#include "unicharset.h" - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -MATRIX::~MATRIX() = default; - -// Returns true if there are any real classification results. -bool MATRIX::Classified(int col, int row, int wildcard_id) const { - if (get(col, row) == NOT_CLASSIFIED) return false; - BLOB_CHOICE_IT b_it(get(col, row)); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOB_CHOICE* choice = b_it.data(); - if (choice->IsClassified()) - return true; - } - return false; -} - -// Expands the existing matrix in-place to make the band wider, without -// losing any existing data. -void MATRIX::IncreaseBandSize(int bandwidth) { - ResizeWithCopy(dimension(), bandwidth); -} - -// Returns a bigger MATRIX with a new column and row in the matrix in order -// to split the blob at the given (ind,ind) diagonal location. -// Entries are relocated to the new MATRIX using the transformation defined -// by MATRIX_COORD::MapForSplit. -// Transfers the pointer data to the new MATRIX and deletes *this. -MATRIX* MATRIX::ConsumeAndMakeBigger(int ind) { - int dim = dimension(); - int band_width = bandwidth(); - // Check to see if bandwidth needs expanding. - for (int col = ind; col >= 0 && col > ind - band_width; --col) { - if (array_[col * band_width + band_width - 1] != empty_) { - ++band_width; - break; - } - } - MATRIX* result = new MATRIX(dim + 1, band_width); - - for (int col = 0; col < dim; ++col) { - for (int row = col; row < dim && row < col + bandwidth(); ++row) { - MATRIX_COORD coord(col, row); - coord.MapForSplit(ind); - BLOB_CHOICE_LIST* choices = get(col, row); - if (choices != nullptr) { - // Correct matrix location on each choice. - BLOB_CHOICE_IT bc_it(choices); - for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { - BLOB_CHOICE* choice = bc_it.data(); - choice->set_matrix_cell(coord.col, coord.row); - } - ASSERT_HOST(coord.Valid(*result)); - result->put(coord.col, coord.row, choices); - } - } - } - delete this; - return result; -} - -// Makes and returns a deep copy of *this, including all the BLOB_CHOICEs -// on the lists, but not any LanguageModelState that may be attached to the -// BLOB_CHOICEs. -MATRIX* MATRIX::DeepCopy() const { - int dim = dimension(); - int band_width = bandwidth(); - MATRIX* result = new MATRIX(dim, band_width); - for (int col = 0; col < dim; ++col) { - for (int row = col; row < dim && row < col + band_width; ++row) { - BLOB_CHOICE_LIST* choices = get(col, row); - if (choices != nullptr) { - BLOB_CHOICE_LIST* copy_choices = new BLOB_CHOICE_LIST; - copy_choices->deep_copy(choices, &BLOB_CHOICE::deep_copy); - result->put(col, row, copy_choices); - } - } - } - return result; -} - -// Print the best guesses out of the match rating matrix. -void MATRIX::print(const UNICHARSET &unicharset) const { - tprintf("Ratings Matrix (top 3 choices)\n"); - int dim = dimension(); - int band_width = bandwidth(); - int row, col; - for (col = 0; col < dim; ++col) { - for (row = col; row < dim && row < col + band_width; ++row) { - BLOB_CHOICE_LIST *rating = this->get(col, row); - if (rating == NOT_CLASSIFIED) continue; - BLOB_CHOICE_IT b_it(rating); - tprintf("col=%d row=%d ", col, row); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - tprintf("%s rat=%g cert=%g " , - unicharset.id_to_unichar(b_it.data()->unichar_id()), - b_it.data()->rating(), b_it.data()->certainty()); - } - tprintf("\n"); - } - tprintf("\n"); - } - tprintf("\n"); - for (col = 0; col < dim; ++col) tprintf("\t%d", col); - tprintf("\n"); - for (row = 0; row < dim; ++row) { - for (col = 0; col <= row; ++col) { - if (col == 0) tprintf("%d\t", row); - if (row >= col + band_width) { - tprintf(" \t"); - continue; - } - BLOB_CHOICE_LIST *rating = this->get(col, row); - if (rating != NOT_CLASSIFIED) { - BLOB_CHOICE_IT b_it(rating); - int counter = 0; - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - tprintf("%s ", - unicharset.id_to_unichar(b_it.data()->unichar_id())); - ++counter; - if (counter == 3) break; - } - tprintf("\t"); - } else { - tprintf(" \t"); - } - } - tprintf("\n"); - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/matrix.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/matrix.h deleted file mode 100644 index 7dc1bb15..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/matrix.h +++ /dev/null @@ -1,640 +0,0 @@ -/* -*-C-*- - ****************************************************************************** - * File: matrix.h - * Description: Generic 2-d array/matrix and banded triangular matrix class. - * Author: Ray Smith - * TODO(rays) Separate from ratings matrix, which it also contains: - * - * Description: Ratings matrix class (specialization of banded matrix). - * Segmentation search matrix of lists of BLOB_CHOICE. - * Author: Mark Seaman, OCR Technology - * Created: Wed May 16 13:22:06 1990 - * Modified: Tue Mar 19 16:00:20 1991 (Mark Seaman) marks@hpgrlt - * - * (c) Copyright 1990, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ - -#ifndef TESSERACT_CCSTRUCT_MATRIX_H_ -#define TESSERACT_CCSTRUCT_MATRIX_H_ - -#include // for max, min -#include // for sqrt, fabs, isfinite -#include // for int32_t -#include // for FILE -#include // for memcpy -#include "errcode.h" // for ASSERT_HOST -#include "helpers.h" // for ReverseN, ClipToRange -#include "kdpair.h" // for KDPairInc -#include "points.h" // for ICOORD -#include "serialis.h" // for TFile - -class BLOB_CHOICE_LIST; -class UNICHARSET; - -#define NOT_CLASSIFIED static_cast(nullptr) - -// A generic class to hold a 2-D matrix with entries of type T, but can also -// act as a base class for other implementations, such as a triangular or -// banded matrix. -template -class GENERIC_2D_ARRAY { - public: - // Initializes the array size, and empty element, but cannot allocate memory - // for the subclasses or initialize because calls to the num_elements - // member will be routed to the base class implementation. Subclasses can - // either pass the memory in, or allocate after by calling Resize(). - GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty, T* array) - : empty_(empty), dim1_(dim1), dim2_(dim2), array_(array) { - size_allocated_ = dim1 * dim2; - } - // Original constructor for a full rectangular matrix DOES allocate memory - // and initialize it to empty. - GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty) - : empty_(empty), dim1_(dim1), dim2_(dim2) { - int new_size = dim1 * dim2; - array_ = new T[new_size]; - size_allocated_ = new_size; - for (int i = 0; i < size_allocated_; ++i) - array_[i] = empty_; - } - // Default constructor for array allocation. Use Resize to set the size. - GENERIC_2D_ARRAY() - : array_(nullptr), empty_(static_cast(0)), dim1_(0), dim2_(0), - size_allocated_(0) { - } - GENERIC_2D_ARRAY(const GENERIC_2D_ARRAY& src) - : array_(nullptr), empty_(static_cast(0)), dim1_(0), dim2_(0), - size_allocated_(0) { - *this = src; - } - virtual ~GENERIC_2D_ARRAY() { delete[] array_; } - - void operator=(const GENERIC_2D_ARRAY& src) { - ResizeNoInit(src.dim1(), src.dim2()); - memcpy(array_, src.array_, num_elements() * sizeof(array_[0])); - } - - // Reallocates the array to the given size. Does not keep old data, but does - // not initialize the array either. - // The allocated memory is expanded on the end by pad, allowing deliberate - // access beyond the bounds of the array. - void ResizeNoInit(int size1, int size2, int pad = 0) { - int new_size = size1 * size2 + pad; - if (new_size > size_allocated_) { - delete [] array_; - array_ = new T[new_size]; - size_allocated_ = new_size; - } - dim1_ = size1; - dim2_ = size2; - // Fill the padding data so it isn't uninitialized. - for (int i = size1 * size2; i < new_size; ++i) array_[i] = empty_; - } - - // Reallocate the array to the given size. Does not keep old data. - void Resize(int size1, int size2, const T& empty) { - empty_ = empty; - ResizeNoInit(size1, size2); - Clear(); - } - - // Reallocate the array to the given size, keeping old data. - void ResizeWithCopy(int size1, int size2) { - if (size1 != dim1_ || size2 != dim2_) { - int new_size = size1 * size2; - T* new_array = new T[new_size]; - for (int col = 0; col < size1; ++col) { - for (int row = 0; row < size2; ++row) { - int old_index = col * dim2() + row; - int new_index = col * size2 + row; - if (col < dim1_ && row < dim2_) { - new_array[new_index] = array_[old_index]; - } else { - new_array[new_index] = empty_; - } - } - } - delete[] array_; - array_ = new_array; - dim1_ = size1; - dim2_ = size2; - size_allocated_ = new_size; - } - } - - // Sets all the elements of the array to the empty value. - void Clear() { - int total_size = num_elements(); - for (int i = 0; i < total_size; ++i) - array_[i] = empty_; - } - - // Writes to the given file. Returns false in case of error. - // Only works with bitwise-serializeable types! - bool Serialize(FILE* fp) const { - if (!SerializeSize(fp)) return false; - if (!tesseract::Serialize(fp, &empty_)) return false; - int size = num_elements(); - return tesseract::Serialize(fp, &array_[0], size); - } - - bool Serialize(tesseract::TFile* fp) const { - if (!SerializeSize(fp)) return false; - if (!fp->Serialize(&empty_)) return false; - int size = num_elements(); - return fp->Serialize(&array_[0], size); - } - - // Reads from the given file. Returns false in case of error. - // Only works with bitwise-serializeable types! - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, FILE* fp) { - if (!DeSerializeSize(swap, fp)) return false; - if (!tesseract::DeSerialize(fp, &empty_)) return false; - if (swap) ReverseN(&empty_, sizeof(empty_)); - int size = num_elements(); - if (!tesseract::DeSerialize(fp, &array_[0], size)) return false; - if (swap) { - for (int i = 0; i < size; ++i) - ReverseN(&array_[i], sizeof(array_[i])); - } - return true; - } - - bool DeSerialize(tesseract::TFile* fp) { - return DeSerializeSize(fp) && - fp->DeSerialize(&empty_) && - fp->DeSerialize(&array_[0], num_elements()); - } - - // Writes to the given file. Returns false in case of error. - // Assumes a T::Serialize(FILE*) const function. - bool SerializeClasses(FILE* fp) const { - if (!SerializeSize(fp)) return false; - if (!empty_.Serialize(fp)) return false; - int size = num_elements(); - for (int i = 0; i < size; ++i) { - if (!array_[i].Serialize(fp)) return false; - } - return true; - } - - // Reads from the given file. Returns false in case of error. - // Assumes a T::DeSerialize(bool swap, FILE*) function. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerializeClasses(bool swap, FILE* fp) { - if (!DeSerializeSize(swap, fp)) return false; - if (!empty_.DeSerialize(swap, fp)) return false; - int size = num_elements(); - for (int i = 0; i < size; ++i) { - if (!array_[i].DeSerialize(swap, fp)) return false; - } - return true; - } - - // Provide the dimensions of this rectangular matrix. - int dim1() const { return dim1_; } - int dim2() const { return dim2_; } - // Returns the number of elements in the array. - // Banded/triangular matrices may override. - virtual int num_elements() const { return dim1_ * dim2_; } - - // Expression to select a specific location in the matrix. The matrix is - // stored COLUMN-major, so the left-most index is the most significant. - // This allows [][] access to use indices in the same order as (,). - virtual int index(int column, int row) const { - return (column * dim2_ + row); - } - - // Put a list element into the matrix at a specific location. - void put(ICOORD pos, const T& thing) { - array_[this->index(pos.x(), pos.y())] = thing; - } - void put(int column, int row, const T& thing) { - array_[this->index(column, row)] = thing; - } - - // Get the item at a specified location from the matrix. - T get(ICOORD pos) const { - return array_[this->index(pos.x(), pos.y())]; - } - T get(int column, int row) const { - return array_[this->index(column, row)]; - } - // Return a reference to the element at the specified location. - const T& operator()(int column, int row) const { - return array_[this->index(column, row)]; - } - T& operator()(int column, int row) { - return array_[this->index(column, row)]; - } - // Allow access using array[column][row]. NOTE that the indices are - // in the same left-to-right order as the () indexing. - T* operator[](int column) { - return &array_[this->index(column, 0)]; - } - const T* operator[](int column) const { - return &array_[this->index(column, 0)]; - } - - // Adds addend to *this, element-by-element. - void operator+=(const GENERIC_2D_ARRAY& addend) { - if (dim2_ == addend.dim2_) { - // Faster if equal size in the major dimension. - int size = std::min(num_elements(), addend.num_elements()); - for (int i = 0; i < size; ++i) { - array_[i] += addend.array_[i]; - } - } else { - for (int x = 0; x < dim1_; x++) { - for (int y = 0; y < dim2_; y++) { - (*this)(x, y) += addend(x, y); - } - } - } - } - // Subtracts minuend from *this, element-by-element. - void operator-=(const GENERIC_2D_ARRAY& minuend) { - if (dim2_ == minuend.dim2_) { - // Faster if equal size in the major dimension. - int size = std::min(num_elements(), minuend.num_elements()); - for (int i = 0; i < size; ++i) { - array_[i] -= minuend.array_[i]; - } - } else { - for (int x = 0; x < dim1_; x++) { - for (int y = 0; y < dim2_; y++) { - (*this)(x, y) -= minuend(x, y); - } - } - } - } - // Adds addend to all elements. - void operator+=(const T& addend) { - int size = num_elements(); - for (int i = 0; i < size; ++i) { - array_[i] += addend; - } - } - // Multiplies *this by factor, element-by-element. - void operator*=(const T& factor) { - int size = num_elements(); - for (int i = 0; i < size; ++i) { - array_[i] *= factor; - } - } - // Clips *this to the given range. - void Clip(const T& rangemin, const T& rangemax) { - int size = num_elements(); - for (int i = 0; i < size; ++i) { - array_[i] = ClipToRange(array_[i], rangemin, rangemax); - } - } - // Returns true if all elements of *this are within the given range. - // Only uses operator< - bool WithinBounds(const T& rangemin, const T& rangemax) const { - int size = num_elements(); - for (int i = 0; i < size; ++i) { - const T& value = array_[i]; - if (value < rangemin || rangemax < value) - return false; - } - return true; - } - // Normalize the whole array. - double Normalize() { - int size = num_elements(); - if (size <= 0) return 0.0; - // Compute the mean. - double mean = 0.0; - for (int i = 0; i < size; ++i) { - mean += array_[i]; - } - mean /= size; - // Subtract the mean and compute the standard deviation. - double sd = 0.0; - for (int i = 0; i < size; ++i) { - double normed = array_[i] - mean; - array_[i] = normed; - sd += normed * normed; - } - sd = sqrt(sd / size); - if (sd > 0.0) { - // Divide by the sd. - for (int i = 0; i < size; ++i) { - array_[i] /= sd; - } - } - return sd; - } - - // Returns the maximum value of the array. - T Max() const { - int size = num_elements(); - if (size <= 0) return empty_; - // Compute the max. - T max_value = array_[0]; - for (int i = 1; i < size; ++i) { - const T& value = array_[i]; - if (value > max_value) max_value = value; - } - return max_value; - } - - // Returns the maximum absolute value of the array. - T MaxAbs() const { - int size = num_elements(); - if (size <= 0) return empty_; - // Compute the max. - T max_abs = static_cast(0); - for (int i = 0; i < size; ++i) { - T value = static_cast(fabs(array_[i])); - if (value > max_abs) max_abs = value; - } - return max_abs; - } - - // Accumulates the element-wise sums of squares of src into *this. - void SumSquares(const GENERIC_2D_ARRAY& src, const T& decay_factor) { - T update_factor = 1.0 - decay_factor; - int size = num_elements(); - for (int i = 0; i < size; ++i) { - array_[i] = array_[i] * decay_factor + - update_factor * src.array_[i] * src.array_[i]; - } - } - - // Scales each element using the adam algorithm, ie array_[i] by - // sqrt(sqsum[i] + epsilon)). - void AdamUpdate(const GENERIC_2D_ARRAY& sum, - const GENERIC_2D_ARRAY& sqsum, const T& epsilon) { - int size = num_elements(); - for (int i = 0; i < size; ++i) { - array_[i] += sum.array_[i] / (sqrt(sqsum.array_[i]) + epsilon); - } - } - - void AssertFinite() const { - int size = num_elements(); - for (int i = 0; i < size; ++i) { - ASSERT_HOST(isfinite(array_[i])); - } - } - - // REGARDLESS OF THE CURRENT DIMENSIONS, treats the data as a - // num_dims-dimensional array/tensor with dimensions given by dims, (ordered - // from most significant to least significant, the same as standard C arrays) - // and moves src_dim to dest_dim, with the initial dest_dim and any dimensions - // in between shifted towards the hole left by src_dim. Example: - // Current data content: array_=[0, 1, 2, ....119] - // perhaps *this may be of dim[40, 3], with values [[0, 1, 2][3, 4, 5]... - // but the current dimensions are irrelevant. - // num_dims = 4, dims=[5, 4, 3, 2] - // src_dim=3, dest_dim=1 - // tensor=[[[[0, 1][2, 3][4, 5]] - // [[6, 7][8, 9][10, 11]] - // [[12, 13][14, 15][16, 17]] - // [[18, 19][20, 21][22, 23]]] - // [[[24, 25]... - // output dims =[5, 2, 4, 3] - // output tensor=[[[[0, 2, 4][6, 8, 10][12, 14, 16][18, 20, 22]] - // [[1, 3, 5][7, 9, 11][13, 15, 17][19, 21, 23]]] - // [[[24, 26, 28]... - // which is stored in the array_ as: - // [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 1, 3, 5, 7, 9, 11, 13...] - // NOTE: the 2 stored matrix dimensions are simply copied from *this. To - // change the dimensions after the transpose, use ResizeNoInit. - // Higher dimensions above 2 are strictly the responsibility of the caller. - void RotatingTranspose(const int* dims, int num_dims, int src_dim, - int dest_dim, GENERIC_2D_ARRAY* result) const { - int max_d = std::max(src_dim, dest_dim); - int min_d = std::min(src_dim, dest_dim); - // In a tensor of shape [d0, d1... min_d, ... max_d, ... dn-2, dn-1], the - // ends outside of min_d and max_d are unaffected, with [max_d +1, dn-1] - // being contiguous blocks of data that will move together, and - // [d0, min_d -1] being replicas of the transpose operation. - // num_replicas represents the large dimensions unchanged by the operation. - // move_size represents the small dimensions unchanged by the operation. - // src_step represents the stride in the src between each adjacent group - // in the destination. - int num_replicas = 1, move_size = 1, src_step = 1; - for (int d = 0; d < min_d; ++d) num_replicas *= dims[d]; - for (int d = max_d + 1; d < num_dims; ++d) move_size *= dims[d]; - for (int d = src_dim + 1; d < num_dims; ++d) src_step *= dims[d]; - if (src_dim > dest_dim) src_step *= dims[src_dim]; - // wrap_size is the size of a single replica, being the amount that is - // handled num_replicas times. - int wrap_size = move_size; - for (int d = min_d; d <= max_d; ++d) wrap_size *= dims[d]; - result->ResizeNoInit(dim1_, dim2_); - result->empty_ = empty_; - const T* src = array_; - T* dest = result->array_; - for (int replica = 0; replica < num_replicas; ++replica) { - for (int start = 0; start < src_step; start += move_size) { - for (int pos = start; pos < wrap_size; pos += src_step) { - memcpy(dest, src + pos, sizeof(*dest) * move_size); - dest += move_size; - } - } - src += wrap_size; - } - } - - // Delete objects pointed to by array_[i]. - void delete_matrix_pointers() { - int size = num_elements(); - for (int i = 0; i < size; ++i) { - T matrix_cell = array_[i]; - if (matrix_cell != empty_) - delete matrix_cell; - } - } - - protected: - // Factored helper to serialize the size. - bool SerializeSize(FILE* fp) const { - uint32_t size = dim1_; - if (!tesseract::Serialize(fp, &size)) return false; - size = dim2_; - return tesseract::Serialize(fp, &size); - } - bool SerializeSize(tesseract::TFile* fp) const { - uint32_t size = dim1_; - if (!fp->Serialize(&size)) return false; - size = dim2_; - return fp->Serialize(&size); - } - // Factored helper to deserialize the size. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerializeSize(bool swap, FILE* fp) { - uint32_t size1, size2; - if (!tesseract::DeSerialize(fp, &size1)) return false; - if (!tesseract::DeSerialize(fp, &size2)) return false; - if (swap) { - ReverseN(&size1, sizeof(size1)); - ReverseN(&size2, sizeof(size2)); - } - // Arbitrarily limit the number of elements to protect against bad data. - if (size1 > UINT16_MAX) return false; - if (size2 > UINT16_MAX) return false; - Resize(size1, size2, empty_); - return true; - } - bool DeSerializeSize(tesseract::TFile* fp) { - int32_t size1, size2; - if (!fp->DeSerialize(&size1)) return false; - if (!fp->DeSerialize(&size2)) return false; - // Arbitrarily limit the number of elements to protect against bad data. - if (size1 > UINT16_MAX) return false; - if (size2 > UINT16_MAX) return false; - Resize(size1, size2, empty_); - return true; - } - - T* array_; - T empty_; // The unused cell. - int dim1_; // Size of the 1st dimension in indexing functions. - int dim2_; // Size of the 2nd dimension in indexing functions. - // The total size to which the array can be expanded before a realloc is - // needed. If Resize is used, memory is retained so it can be re-expanded - // without a further alloc, and this stores the allocated size. - int size_allocated_; -}; - -// A generic class to store a banded triangular matrix with entries of type T. -// In this array, the nominally square matrix is dim1_ x dim1_, and dim2_ is -// the number of bands, INCLUDING the diagonal. The storage is thus of size -// dim1_ * dim2_ and index(col, row) = col * dim2_ + row - col, and an -// assert will fail if row < col or row - col >= dim2. -template -class BandTriMatrix : public GENERIC_2D_ARRAY { - public: - // Allocate a piece of memory to hold a 2d-array of the given dimension. - // Initialize all the elements of the array to empty instead of assuming - // that a default constructor can be used. - BandTriMatrix(int dim1, int dim2, const T& empty) - : GENERIC_2D_ARRAY(dim1, dim2, empty) { - } - // The default destructor will do. - - // Provide the dimensions of this matrix. - // dimension is the size of the nominally square matrix. - int dimension() const { return this->dim1_; } - // bandwidth is the number of bands in the matrix, INCLUDING the diagonal. - int bandwidth() const { return this->dim2_; } - - // Expression to select a specific location in the matrix. The matrix is - // stored COLUMN-major, so the left-most index is the most significant. - // This allows [][] access to use indices in the same order as (,). - virtual int index(int column, int row) const { - ASSERT_HOST(row >= column); - ASSERT_HOST(row - column < this->dim2_); - return column * this->dim2_ + row - column; - } - - // Appends array2 corner-to-corner to *this, making an array of dimension - // equal to the sum of the individual dimensions. - // array2 is not destroyed, but is left empty, as all elements are moved - // to *this. - void AttachOnCorner(BandTriMatrix* array2) { - int new_dim1 = this->dim1_ + array2->dim1_; - int new_dim2 = std::max(this->dim2_, array2->dim2_); - T* new_array = new T[new_dim1 * new_dim2]; - for (int col = 0; col < new_dim1; ++col) { - for (int j = 0; j < new_dim2; ++j) { - int new_index = col * new_dim2 + j; - if (col < this->dim1_ && j < this->dim2_) { - new_array[new_index] = this->get(col, col + j); - } else if (col >= this->dim1_ && j < array2->dim2_) { - new_array[new_index] = array2->get(col - this->dim1_, - col - this->dim1_ + j); - array2->put(col - this->dim1_, col - this->dim1_ + j, nullptr); - } else { - new_array[new_index] = this->empty_; - } - } - } - delete[] this->array_; - this->array_ = new_array; - this->dim1_ = new_dim1; - this->dim2_ = new_dim2; - } -}; - -class MATRIX : public BandTriMatrix { - public: - MATRIX(int dimension, int bandwidth) - : BandTriMatrix(dimension, bandwidth, NOT_CLASSIFIED) {} - - virtual ~MATRIX(); - - // Returns true if there are any real classification results. - bool Classified(int col, int row, int wildcard_id) const; - - // Expands the existing matrix in-place to make the band wider, without - // losing any existing data. - void IncreaseBandSize(int bandwidth); - - // Returns a bigger MATRIX with a new column and row in the matrix in order - // to split the blob at the given (ind,ind) diagonal location. - // Entries are relocated to the new MATRIX using the transformation defined - // by MATRIX_COORD::MapForSplit. - // Transfers the pointer data to the new MATRIX and deletes *this. - MATRIX* ConsumeAndMakeBigger(int ind); - - // Makes and returns a deep copy of *this, including all the BLOB_CHOICEs - // on the lists, but not any LanguageModelState that may be attached to the - // BLOB_CHOICEs. - MATRIX* DeepCopy() const; - - // Print a shortened version of the contents of the matrix. - void print(const UNICHARSET &unicharset) const; -}; - -struct MATRIX_COORD { - static void Delete(void *arg) { - MATRIX_COORD *c = static_cast(arg); - delete c; - } - // Default constructor required by GenericHeap. - MATRIX_COORD() : col(0), row(0) {} - MATRIX_COORD(int c, int r): col(c), row(r) {} - ~MATRIX_COORD() {} - - bool Valid(const MATRIX &m) const { - return 0 <= col && col < m.dimension() && - col <= row && row < col + m.bandwidth() && row < m.dimension(); - } - - // Remaps the col,row pair to split the blob at the given (ind,ind) diagonal - // location. - // Entries at (i,j) for i in [0,ind] and j in [ind,dim) move to (i,j+1), - // making a new row at ind. - // Entries at (i,j) for i in [ind+1,dim) and j in [i,dim) move to (i+i,j+1), - // making a new column at ind+1. - void MapForSplit(int ind) { - ASSERT_HOST(row >= col); - if (col > ind) ++col; - if (row >= ind) ++row; - ASSERT_HOST(row >= col); - } - - int col; - int row; -}; - -// The MatrixCoordPair contains a MATRIX_COORD and its priority. -using MatrixCoordPair = tesseract::KDPairInc; - -#endif // TESSERACT_CCSTRUCT_MATRIX_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/mod128.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/mod128.cpp deleted file mode 100644 index 8e128783..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/mod128.cpp +++ /dev/null @@ -1,88 +0,0 @@ -/********************************************************************** - * File: mod128.cpp (Formerly dir128.c) - * Description: Code to convert a DIR128 to an ICOORD. - * Author: Ray Smith - * Created: Tue Oct 22 11:56:09 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "mod128.h" - -static const int16_t idirtab[] = { - 1000, 0, 998, 49, 995, 98, 989, 146, - 980, 195, 970, 242, 956, 290, 941, 336, - 923, 382, 903, 427, 881, 471, 857, 514, - 831, 555, 803, 595, 773, 634, 740, 671, - 707, 707, 671, 740, 634, 773, 595, 803, - 555, 831, 514, 857, 471, 881, 427, 903, - 382, 923, 336, 941, 290, 956, 242, 970, - 195, 980, 146, 989, 98, 995, 49, 998, - 0, 1000, -49, 998, -98, 995, -146, 989, - -195, 980, -242, 970, -290, 956, -336, 941, - -382, 923, -427, 903, -471, 881, -514, 857, - -555, 831, -595, 803, -634, 773, -671, 740, - -707, 707, -740, 671, -773, 634, -803, 595, - -831, 555, -857, 514, -881, 471, -903, 427, - -923, 382, -941, 336, -956, 290, -970, 242, - -980, 195, -989, 146, -995, 98, -998, 49, - -1000, 0, -998, -49, -995, -98, -989, -146, - -980, -195, -970, -242, -956, -290, -941, -336, - -923, -382, -903, -427, -881, -471, -857, -514, - -831, -555, -803, -595, -773, -634, -740, -671, - -707, -707, -671, -740, -634, -773, -595, -803, - -555, -831, -514, -857, -471, -881, -427, -903, - -382, -923, -336, -941, -290, -956, -242, -970, - -195, -980, -146, -989, -98, -995, -49, -998, - 0, -1000, 49, -998, 98, -995, 146, -989, - 195, -980, 242, -970, 290, -956, 336, -941, - 382, -923, 427, -903, 471, -881, 514, -857, - 555, -831, 595, -803, 634, -773, 671, -740, - 707, -707, 740, -671, 773, -634, 803, -595, - 831, -555, 857, -514, 881, -471, 903, -427, - 923, -382, 941, -336, 956, -290, 970, -242, - 980, -195, 989, -146, 995, -98, 998, -49 -}; - -const ICOORD* dirtab = reinterpret_cast(idirtab); - -/********************************************************************** - * DIR128::DIR128 - * - * Quantize the direction of an FCOORD to make a DIR128. - **********************************************************************/ - -DIR128::DIR128( //from fcoord - const FCOORD fc //vector to quantize - ) { - int high, low, current; //binary search - - low = 0; - if (fc.y () == 0) { - if (fc.x () >= 0) - dir = 0; - else - dir = MODULUS / 2; - return; - } - high = MODULUS; - do { - current = (high + low) / 2; - if (dirtab[current] * fc >= 0) - low = current; - else - high = current; - } - while (high - low > 1); - dir = low; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/mod128.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/mod128.h deleted file mode 100644 index 7afa1453..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/mod128.h +++ /dev/null @@ -1,83 +0,0 @@ -/********************************************************************** - * File: mod128.h (Formerly dir128.h) - * Description: Header for class which implements modulo arithmetic. - * Author: Ray Smith - * Created: Tue Mar 26 17:48:13 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef MOD128_H -#define MOD128_H - -#include "points.h" - -#define MODULUS 128 /*range of directions */ -#define DIRBITS 7 //no of bits used -#define DIRSCALE 1000 //length of vector - -class DLLSYM DIR128 -{ - public: - DIR128() = default; - - DIR128( //constructor - int16_t value) { //value to assign - value %= MODULUS; //modulo arithmetic - if (value < 0) - value += MODULUS; //done properly - dir = (int8_t) value; - } - DIR128(const FCOORD fc); //quantize vector - - DIR128 & operator= ( //assign of int16_t - int16_t value) { //value to assign - value %= MODULUS; //modulo arithmetic - if (value < 0) - value += MODULUS; //done properly - dir = (int8_t) value; - return *this; - } - int8_t operator- ( //subtraction - const DIR128 & minus) const//for signed result - { - //result - int16_t result = dir - minus.dir; - - if (result > MODULUS / 2) - result -= MODULUS; //get in range - else if (result < -MODULUS / 2) - result += MODULUS; - return (int8_t) result; - } - DIR128 operator+ ( //addition - const DIR128 & add) const //of itself - { - DIR128 result; //sum - - result = dir + add.dir; //let = do the work - return result; - } - DIR128 & operator+= ( //same as + - const DIR128 & add) { - *this = dir + add.dir; //let = do the work - return *this; - } - int8_t get_dir() const { //access function - return dir; - } - - private: - int8_t dir; //a direction -}; -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/normalis.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/normalis.cpp deleted file mode 100644 index fc6c728a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/normalis.cpp +++ /dev/null @@ -1,568 +0,0 @@ -/********************************************************************** - * File: normalis.cpp (Formerly denorm.c) - * Description: Code for the DENORM class. - * Author: Ray Smith - * Created: Thu Apr 23 09:22:43 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "normalis.h" - -#include // for FLT_MAX -#include - -#include "allheaders.h" -#include "blobs.h" -#include "helpers.h" -#include "matrix.h" -#include "ocrblock.h" -#include "unicharset.h" -#include "werd.h" - -// Tolerance in pixels used for baseline and xheight on non-upper/lower scripts. -const int kSloppyTolerance = 4; -// Final tolerance in pixels added to the computed xheight range. -const float kFinalPixelTolerance = 0.125f; - -DENORM::DENORM() { - Init(); -} - -DENORM::DENORM(const DENORM &src) { - rotation_ = nullptr; - *this = src; -} - - -DENORM & DENORM::operator=(const DENORM & src) { - Clear(); - inverse_ = src.inverse_; - predecessor_ = src.predecessor_; - pix_ = src.pix_; - block_ = src.block_; - if (src.rotation_ == nullptr) - rotation_ = nullptr; - else - rotation_ = new FCOORD(*src.rotation_); - x_origin_ = src.x_origin_; - y_origin_ = src.y_origin_; - x_scale_ = src.x_scale_; - y_scale_ = src.y_scale_; - final_xshift_ = src.final_xshift_; - final_yshift_ = src.final_yshift_; - return *this; -} - -DENORM::~DENORM() { - Clear(); -} - -// Initializes the denorm for a transformation. For details see the large -// comment in normalis.h. -// Arguments: -// block: if not nullptr, then this is the first transformation, and -// block->re_rotation() needs to be used after the Denorm -// transformation to get back to the image coords. -// rotation: if not nullptr, apply this rotation after translation to the -// origin and scaling. (Usually a classify rotation.) -// predecessor: if not nullptr, then predecessor has been applied to the -// input space and needs to be undone to complete the inverse. -// The above pointers are not owned by this DENORM and are assumed to live -// longer than this denorm, except rotation, which is deep copied on input. -// -// x_origin: The x origin which will be mapped to final_xshift in the result. -// y_origin: The y origin which will be mapped to final_yshift in the result. -// Added to result of row->baseline(x) if not nullptr. -// -// x_scale: scale factor for the x-coordinate. -// y_scale: scale factor for the y-coordinate. Ignored if segs is given. -// Note that these scale factors apply to the same x and y system as the -// x-origin and y-origin apply, ie after any block rotation, but before -// the rotation argument is applied. -// -// final_xshift: The x component of the final translation. -// final_yshift: The y component of the final translation. -void DENORM::SetupNormalization(const BLOCK* block, - const FCOORD* rotation, - const DENORM* predecessor, - float x_origin, float y_origin, - float x_scale, float y_scale, - float final_xshift, float final_yshift) { - Clear(); - block_ = block; - if (rotation == nullptr) - rotation_ = nullptr; - else - rotation_ = new FCOORD(*rotation); - predecessor_ = predecessor; - x_origin_ = x_origin; - y_origin_ = y_origin; - x_scale_ = x_scale; - y_scale_ = y_scale; - final_xshift_ = final_xshift; - final_yshift_ = final_yshift; -} - -// Helper for SetupNonLinear computes an image of shortest run-lengths from -// the x/y edges provided. -// Based on "A nonlinear normalization method for handprinted Kanji character -// recognition -- line density equalization" by Hiromitsu Yamada et al. -// Eg below is an O in a 1-pixel margin-ed bounding box and the corresponding -// ______________ input x_coords and y_coords. -// | _________ | -// | | _ | | 1, 6 -// | | | | | | 1, 3, 4, 6 -// | | | | | | 1, 3, 4, 6 -// | | | | | | 1, 3, 4, 6 -// | | |_| | | 1, 3, 4, 6 -// | |_________| | 1, 6 -// |_____________| -// E 1 1 1 1 1 E -// m 7 7 2 7 7 m -// p 6 p -// t 7 t -// y y -// The output image contains the min of the x and y run-length (distance -// between edges) at each coordinate in the image thus: -// ______________ -// |7 1_1_1_1_1 7| -// |1|5 5 1 5 5|1| -// |1|2 2|1|2 2|1| -// |1|2 2|1|2 2|1| -// |1|2 2|1|2 2|1| -// |1|2 2|1|2 2|1| -// |1|5_5_1_5_5|1| -// |7_1_1_1_1_1_7| -// Note that the input coords are all integer, so all partial pixels are dealt -// with elsewhere. Although it is nice for outlines to be properly connected -// and continuous, there is no requirement that they be as such, so they could -// have been derived from a flaky source, such as greyscale. -// This function works only within the provided box, and it is assumed that the -// input x_coords and y_coords have already been translated to have the bottom- -// left of box as the origin. Although an output, the minruns should have been -// pre-initialized to be the same size as box. Each element will contain the -// minimum of x and y run-length as shown above. -static void ComputeRunlengthImage( - const TBOX& box, - const GenericVector >& x_coords, - const GenericVector >& y_coords, - GENERIC_2D_ARRAY* minruns) { - int width = box.width(); - int height = box.height(); - ASSERT_HOST(minruns->dim1() == width); - ASSERT_HOST(minruns->dim2() == height); - // Set a 2-d image array to the run lengths at each pixel. - for (int ix = 0; ix < width; ++ix) { - int y = 0; - for (int i = 0; i < y_coords[ix].size(); ++i) { - int y_edge = ClipToRange(y_coords[ix][i], 0, height); - int gap = y_edge - y; - // Every pixel between the last and current edge get set to the gap. - while (y < y_edge) { - (*minruns)(ix, y) = gap; - ++y; - } - } - // Pretend there is a bounding box of edges all around the image. - int gap = height - y; - while (y < height) { - (*minruns)(ix, y) = gap; - ++y; - } - } - // Now set the image pixels the the MIN of the x and y runlengths. - for (int iy = 0; iy < height; ++iy) { - int x = 0; - for (int i = 0; i < x_coords[iy].size(); ++i) { - int x_edge = ClipToRange(x_coords[iy][i], 0, width); - int gap = x_edge - x; - while (x < x_edge) { - if (gap < (*minruns)(x, iy)) - (*minruns)(x, iy) = gap; - ++x; - } - } - int gap = width - x; - while (x < width) { - if (gap < (*minruns)(x, iy)) - (*minruns)(x, iy) = gap; - ++x; - } - } -} -// Converts the run-length image (see above to the edge density profiles used -// for scaling, thus: -// ______________ -// |7 1_1_1_1_1 7| = 5.28 -// |1|5 5 1 5 5|1| = 3.8 -// |1|2 2|1|2 2|1| = 5 -// |1|2 2|1|2 2|1| = 5 -// |1|2 2|1|2 2|1| = 5 -// |1|2 2|1|2 2|1| = 5 -// |1|5_5_1_5_5|1| = 3.8 -// |7_1_1_1_1_1_7| = 5.28 -// 6 4 4 8 4 4 6 -// . . . . . . . -// 2 4 4 0 4 4 2 -// 8 8 -// Each profile is the sum of the reciprocals of the pixels in the image in -// the appropriate row or column, and these are then normalized to sum to 1. -// On output hx, hy contain an extra element, which will eventually be used -// to guarantee that the top/right edge of the box (and anything beyond) always -// gets mapped to the maximum target coordinate. -static void ComputeEdgeDensityProfiles(const TBOX& box, - const GENERIC_2D_ARRAY& minruns, - GenericVector* hx, - GenericVector* hy) { - int width = box.width(); - int height = box.height(); - hx->init_to_size(width + 1, 0.0); - hy->init_to_size(height + 1, 0.0); - double total = 0.0; - for (int iy = 0; iy < height; ++iy) { - for (int ix = 0; ix < width; ++ix) { - int run = minruns(ix, iy); - if (run == 0) run = 1; - float density = 1.0f / run; - (*hx)[ix] += density; - (*hy)[iy] += density; - } - total += (*hy)[iy]; - } - // Normalize each profile to sum to 1. - if (total > 0.0) { - for (int ix = 0; ix < width; ++ix) { - (*hx)[ix] /= total; - } - for (int iy = 0; iy < height; ++iy) { - (*hy)[iy] /= total; - } - } - // There is an extra element in each array, so initialize to 1. - (*hx)[width] = 1.0f; - (*hy)[height] = 1.0f; -} - -// Sets up the DENORM to execute a non-linear transformation based on -// preserving an even distribution of stroke edges. The transformation -// operates only within the given box. -// x_coords is a collection of the x-coords of vertical edges for each -// y-coord starting at box.bottom(). -// y_coords is a collection of the y-coords of horizontal edges for each -// x-coord starting at box.left(). -// Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. -// Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. -// The second-level vectors must all be sorted in ascending order. -// See comments on the helper functions above for more details. -void DENORM::SetupNonLinear( - const DENORM* predecessor, const TBOX& box, float target_width, - float target_height, float final_xshift, float final_yshift, - const GenericVector >& x_coords, - const GenericVector >& y_coords) { - Clear(); - predecessor_ = predecessor; - // x_map_ and y_map_ store a mapping from input x and y coordinate to output - // x and y coordinate, based on scaling to the supplied target_width and - // target_height. - x_map_ = new GenericVector; - y_map_ = new GenericVector; - // Set a 2-d image array to the run lengths at each pixel. - int width = box.width(); - int height = box.height(); - GENERIC_2D_ARRAY minruns(width, height, 0); - ComputeRunlengthImage(box, x_coords, y_coords, &minruns); - // Edge density is the sum of the inverses of the run lengths. Compute - // edge density projection profiles. - ComputeEdgeDensityProfiles(box, minruns, x_map_, y_map_); - // Convert the edge density profiles to the coordinates by multiplying by - // the desired size and accumulating. - (*x_map_)[width] = target_width; - for (int x = width - 1; x >= 0; --x) { - (*x_map_)[x] = (*x_map_)[x + 1] - (*x_map_)[x] * target_width; - } - (*y_map_)[height] = target_height; - for (int y = height - 1; y >= 0; --y) { - (*y_map_)[y] = (*y_map_)[y + 1] - (*y_map_)[y] * target_height; - } - x_origin_ = box.left(); - y_origin_ = box.bottom(); - final_xshift_ = final_xshift; - final_yshift_ = final_yshift; -} - -// Transforms the given coords one step forward to normalized space, without -// using any block rotation or predecessor. -void DENORM::LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const { - FCOORD src_pt(pt.x, pt.y); - FCOORD float_result; - LocalNormTransform(src_pt, &float_result); - transformed->x = IntCastRounded(float_result.x()); - transformed->y = IntCastRounded(float_result.y()); -} -void DENORM::LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const { - FCOORD translated(pt.x() - x_origin_, pt.y() - y_origin_); - if (x_map_ != nullptr && y_map_ != nullptr) { - int x = ClipToRange(IntCastRounded(translated.x()), 0, x_map_->size()-1); - translated.set_x((*x_map_)[x]); - int y = ClipToRange(IntCastRounded(translated.y()), 0, y_map_->size()-1); - translated.set_y((*y_map_)[y]); - } else { - translated.set_x(translated.x() * x_scale_); - translated.set_y(translated.y() * y_scale_); - if (rotation_ != nullptr) - translated.rotate(*rotation_); - } - transformed->set_x(translated.x() + final_xshift_); - transformed->set_y(translated.y() + final_yshift_); -} - -// Transforms the given coords forward to normalized space using the -// full transformation sequence defined by the block rotation, the -// predecessors, deepest first, and finally this. If first_norm is not nullptr, -// then the first and deepest transformation used is first_norm, ending -// with this, and the block rotation will not be applied. -void DENORM::NormTransform(const DENORM* first_norm, const TPOINT& pt, - TPOINT* transformed) const { - FCOORD src_pt(pt.x, pt.y); - FCOORD float_result; - NormTransform(first_norm, src_pt, &float_result); - transformed->x = IntCastRounded(float_result.x()); - transformed->y = IntCastRounded(float_result.y()); -} -void DENORM::NormTransform(const DENORM* first_norm, const FCOORD& pt, - FCOORD* transformed) const { - FCOORD src_pt(pt); - if (first_norm != this) { - if (predecessor_ != nullptr) { - predecessor_->NormTransform(first_norm, pt, &src_pt); - } else if (block_ != nullptr) { - FCOORD fwd_rotation(block_->re_rotation().x(), - -block_->re_rotation().y()); - src_pt.rotate(fwd_rotation); - } - } - LocalNormTransform(src_pt, transformed); -} - -// Transforms the given coords one step back to source space, without -// using to any block rotation or predecessor. -void DENORM::LocalDenormTransform(const TPOINT& pt, TPOINT* original) const { - FCOORD src_pt(pt.x, pt.y); - FCOORD float_result; - LocalDenormTransform(src_pt, &float_result); - original->x = IntCastRounded(float_result.x()); - original->y = IntCastRounded(float_result.y()); -} -void DENORM::LocalDenormTransform(const FCOORD& pt, FCOORD* original) const { - FCOORD rotated(pt.x() - final_xshift_, pt.y() - final_yshift_); - if (x_map_ != nullptr && y_map_ != nullptr) { - int x = x_map_->binary_search(rotated.x()); - original->set_x(x + x_origin_); - int y = y_map_->binary_search(rotated.y()); - original->set_y(y + y_origin_); - } else { - if (rotation_ != nullptr) { - FCOORD inverse_rotation(rotation_->x(), -rotation_->y()); - rotated.rotate(inverse_rotation); - } - original->set_x(rotated.x() / x_scale_ + x_origin_); - float y_scale = y_scale_; - original->set_y(rotated.y() / y_scale + y_origin_); - } -} - -// Transforms the given coords all the way back to source image space using -// the full transformation sequence defined by this and its predecessors -// recursively, shallowest first, and finally any block re_rotation. -// If last_denorm is not nullptr, then the last transformation used will -// be last_denorm, and the block re_rotation will never be executed. -void DENORM::DenormTransform(const DENORM* last_denorm, const TPOINT& pt, - TPOINT* original) const { - FCOORD src_pt(pt.x, pt.y); - FCOORD float_result; - DenormTransform(last_denorm, src_pt, &float_result); - original->x = IntCastRounded(float_result.x()); - original->y = IntCastRounded(float_result.y()); -} -void DENORM::DenormTransform(const DENORM* last_denorm, const FCOORD& pt, - FCOORD* original) const { - LocalDenormTransform(pt, original); - if (last_denorm != this) { - if (predecessor_ != nullptr) { - predecessor_->DenormTransform(last_denorm, *original, original); - } else if (block_ != nullptr) { - original->rotate(block_->re_rotation()); - } - } -} - -// Normalize a blob using blob transformations. Less accurate, but -// more accurately copies the old way. -void DENORM::LocalNormBlob(TBLOB* blob) const { - TBOX blob_box = blob->bounding_box(); - ICOORD translation(-IntCastRounded(x_origin_), -IntCastRounded(y_origin_)); - blob->Move(translation); - if (y_scale_ != 1.0f) - blob->Scale(y_scale_); - if (rotation_ != nullptr) - blob->Rotate(*rotation_); - translation.set_x(IntCastRounded(final_xshift_)); - translation.set_y(IntCastRounded(final_yshift_)); - blob->Move(translation); -} - -// Fills in the x-height range accepted by the given unichar_id, given its -// bounding box in the usual baseline-normalized coordinates, with some -// initial crude x-height estimate (such as word size) and this denoting the -// transformation that was used. -void DENORM::XHeightRange(int unichar_id, const UNICHARSET& unicharset, - const TBOX& bbox, - float* min_xht, float* max_xht, float* yshift) const { - // Default return -- accept anything. - *yshift = 0.0f; - *min_xht = 0.0f; - *max_xht = FLT_MAX; - - if (!unicharset.top_bottom_useful()) - return; - - // Clip the top and bottom to the limit of normalized feature space. - int top = ClipToRange(bbox.top(), 0, kBlnCellHeight - 1); - int bottom = ClipToRange(bbox.bottom(), 0, kBlnCellHeight - 1); - // A tolerance of yscale corresponds to 1 pixel in the image. - double tolerance = y_scale(); - // If the script doesn't have upper and lower-case characters, widen the - // tolerance to allow sloppy baseline/x-height estimates. - if (!unicharset.script_has_upper_lower()) - tolerance = y_scale() * kSloppyTolerance; - - int min_bottom, max_bottom, min_top, max_top; - unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, - &min_top, &max_top); - - // Calculate the scale factor we'll use to get to image y-pixels - double midx = (bbox.left() + bbox.right()) / 2.0; - double ydiff = (bbox.top() - bbox.bottom()) + 2.0; - FCOORD mid_bot(midx, bbox.bottom()), tmid_bot; - FCOORD mid_high(midx, bbox.bottom() + ydiff), tmid_high; - DenormTransform(nullptr, mid_bot, &tmid_bot); - DenormTransform(nullptr, mid_high, &tmid_high); - - // bln_y_measure * yscale = image_y_measure - double yscale = tmid_high.pt_to_pt_dist(tmid_bot) / ydiff; - - // Calculate y-shift - int bln_yshift = 0, bottom_shift = 0, top_shift = 0; - if (bottom < min_bottom - tolerance) { - bottom_shift = bottom - min_bottom; - } else if (bottom > max_bottom + tolerance) { - bottom_shift = bottom - max_bottom; - } - if (top < min_top - tolerance) { - top_shift = top - min_top; - } else if (top > max_top + tolerance) { - top_shift = top - max_top; - } - if ((top_shift >= 0 && bottom_shift > 0) || - (top_shift < 0 && bottom_shift < 0)) { - bln_yshift = (top_shift + bottom_shift) / 2; - } - *yshift = bln_yshift * yscale; - - // To help very high cap/xheight ratio fonts accept the correct x-height, - // and to allow the large caps in small caps to accept the xheight of the - // small caps, add kBlnBaselineOffset to chars with a maximum max, and have - // a top already at a significantly high position. - if (max_top == kBlnCellHeight - 1 && - top > kBlnCellHeight - kBlnBaselineOffset / 2) - max_top += kBlnBaselineOffset; - top -= bln_yshift; - int height = top - kBlnBaselineOffset; - double min_height = min_top - kBlnBaselineOffset - tolerance; - double max_height = max_top - kBlnBaselineOffset + tolerance; - - // We shouldn't try calculations if the characters are very short (for example - // for punctuation). - if (min_height > kBlnXHeight / 8 && height > 0) { - float result = height * kBlnXHeight * yscale / min_height; - *max_xht = result + kFinalPixelTolerance; - result = height * kBlnXHeight * yscale / max_height; - *min_xht = result - kFinalPixelTolerance; - } -} - -// Prints the content of the DENORM for debug purposes. -void DENORM::Print() const { - if (pix_ != nullptr) { - tprintf("Pix dimensions %d x %d x %d\n", - pixGetWidth(pix_), pixGetHeight(pix_), pixGetDepth(pix_)); - } - if (inverse_) - tprintf("Inverse\n"); - if (block_ && block_->re_rotation().x() != 1.0f) { - tprintf("Block rotation %g, %g\n", - block_->re_rotation().x(), block_->re_rotation().y()); - } - tprintf("Input Origin = (%g, %g)\n", x_origin_, y_origin_); - if (x_map_ != nullptr && y_map_ != nullptr) { - tprintf("x map:\n"); - for (int x = 0; x < x_map_->size(); ++x) { - tprintf("%g ", (*x_map_)[x]); - } - tprintf("\ny map:\n"); - for (int y = 0; y < y_map_->size(); ++y) { - tprintf("%g ", (*y_map_)[y]); - } - tprintf("\n"); - } else { - tprintf("Scale = (%g, %g)\n", x_scale_, y_scale_); - if (rotation_ != nullptr) - tprintf("Rotation = (%g, %g)\n", rotation_->x(), rotation_->y()); - } - tprintf("Final Origin = (%g, %g)\n", final_xshift_, final_xshift_); - if (predecessor_ != nullptr) { - tprintf("Predecessor:\n"); - predecessor_->Print(); - } -} - - -// ============== Private Code ====================== - -// Free allocated memory and clear pointers. -void DENORM::Clear() { - delete x_map_; - x_map_ = nullptr; - delete y_map_; - y_map_ = nullptr; - delete rotation_; - rotation_ = nullptr; -} - -// Setup default values. -void DENORM::Init() { - inverse_ = false; - pix_ = nullptr; - block_ = nullptr; - rotation_ = nullptr; - predecessor_ = nullptr; - x_map_ = nullptr; - y_map_ = nullptr; - x_origin_ = 0.0f; - y_origin_ = 0.0f; - x_scale_ = 1.0f; - y_scale_ = 1.0f; - final_xshift_ = 0.0f; - final_yshift_ = static_cast(kBlnBaselineOffset); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/normalis.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/normalis.h deleted file mode 100644 index b163c05d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/normalis.h +++ /dev/null @@ -1,316 +0,0 @@ -/********************************************************************** - * File: normalis.h (Formerly denorm.h) - * Description: Code for the DENORM class. - * Author: Ray Smith - * Created: Thu Apr 23 09:22:43 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef NORMALIS_H -#define NORMALIS_H - -const int kBlnCellHeight = 256; // Full-height for baseline normalization. -const int kBlnXHeight = 128; // x-height for baseline normalization. -const int kBlnBaselineOffset = 64; // offset for baseline normalization. - -class BLOCK; -class FCOORD; -class TBOX; -class UNICHARSET; - -struct Pix; -struct TBLOB; -struct TPOINT; - -template class GenericVector; - -namespace tesseract { - -// Possible normalization methods. Use NEGATIVE values as these also -// double up as markers for the last sub-classifier. -enum NormalizationMode { - NM_BASELINE = -3, // The original BL normalization mode. - NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic. - NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode. -}; - -} // namespace tesseract. - -class DENORM { - public: - DENORM(); - - // Copying a DENORM is allowed. - DENORM(const DENORM &); - DENORM& operator=(const DENORM&); - ~DENORM(); - - // Setup the normalization transformation parameters. - // The normalizations applied to a blob are as follows: - // 1. An optional block layout rotation that was applied during layout - // analysis to make the textlines horizontal. - // 2. A normalization transformation (LocalNormTransform): - // Subtract the "origin" - // Apply an x,y scaling. - // Apply an optional rotation. - // Add back a final translation. - // The origin is in the block-rotated space, and is usually something like - // the x-middle of the word at the baseline. - // 3. Zero or more further normalization transformations that are applied - // in sequence, with a similar pattern to the first normalization transform. - // - // A DENORM holds the parameters of a single normalization, and can execute - // both the LocalNormTransform (a forwards normalization), and the - // LocalDenormTransform which is an inverse transform or de-normalization. - // A DENORM may point to a predecessor DENORM, which is actually the earlier - // normalization, so the full normalization sequence involves executing all - // predecessors first and then the transform in "this". - // Let x be image co-ordinates and that we have normalization classes A, B, C - // where we first apply A then B then C to get normalized x': - // x' = CBAx - // Then the backwards (to original coordinates) would be: - // x = A^-1 B^-1 C^-1 x' - // and A = B->predecessor_ and B = C->predecessor_ - // NormTransform executes all predecessors recursively, and then this. - // NormTransform would be used to transform an image-based feature to - // normalized space for use in a classifier - // DenormTransform inverts this and then all predecessors. It can be - // used to get back to the original image coordinates from normalized space. - // The LocalNormTransform member executes just the transformation - // in "this" without the layout rotation or any predecessors. It would be - // used to run each successive normalization, eg the word normalization, - // and later the character normalization. - - // Arguments: - // block: if not nullptr, then this is the first transformation, and - // block->re_rotation() needs to be used after the Denorm - // transformation to get back to the image coords. - // rotation: if not nullptr, apply this rotation after translation to the - // origin and scaling. (Usually a classify rotation.) - // predecessor: if not nullptr, then predecessor has been applied to the - // input space and needs to be undone to complete the inverse. - // The above pointers are not owned by this DENORM and are assumed to live - // longer than this denorm, except rotation, which is deep copied on input. - // - // x_origin: The x origin which will be mapped to final_xshift in the result. - // y_origin: The y origin which will be mapped to final_yshift in the result. - // Added to result of row->baseline(x) if not nullptr. - // - // x_scale: scale factor for the x-coordinate. - // y_scale: scale factor for the y-coordinate. Ignored if segs is given. - // Note that these scale factors apply to the same x and y system as the - // x-origin and y-origin apply, ie after any block rotation, but before - // the rotation argument is applied. - // - // final_xshift: The x component of the final translation. - // final_yshift: The y component of the final translation. - // - // In theory, any of the commonly used normalizations can be setup here: - // * Traditional baseline normalization on a word: - // SetupNormalization(block, nullptr, nullptr, - // box.x_middle(), baseline, - // kBlnXHeight / x_height, kBlnXHeight / x_height, - // 0, kBlnBaselineOffset); - // * "Numeric mode" baseline normalization on a word, in which the blobs - // are positioned with the bottom as the baseline is achieved by making - // a separate DENORM for each blob. - // SetupNormalization(block, nullptr, nullptr, - // box.x_middle(), box.bottom(), - // kBlnXHeight / x_height, kBlnXHeight / x_height, - // 0, kBlnBaselineOffset); - // * Anisotropic character normalization used by IntFx. - // SetupNormalization(nullptr, nullptr, denorm, - // centroid_x, centroid_y, - // 51.2 / ry, 51.2 / rx, 128, 128); - // * Normalize blob height to x-height (current OSD): - // SetupNormalization(nullptr, &rotation, nullptr, - // box.rotational_x_middle(rotation), - // box.rotational_y_middle(rotation), - // kBlnXHeight / box.rotational_height(rotation), - // kBlnXHeight / box.rotational_height(rotation), - // 0, kBlnBaselineOffset); - // * Secondary normalization for classification rotation (current): - // FCOORD rotation = block->classify_rotation(); - // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio; - // SetupNormalization(nullptr, &rotation, denorm, - // box.rotational_x_middle(rotation), - // box.rotational_y_middle(rotation), - // target_height / box.rotational_height(rotation), - // target_height / box.rotational_height(rotation), - // 0, kBlnBaselineOffset); - // * Proposed new normalizations for CJK: Between them there is then - // no need for further normalization at all, and the character fills the cell. - // ** Replacement for baseline normalization on a word: - // Scales height and width independently so that modal height and pitch - // fill the cell respectively. - // float cap_height = x_height / CCStruct::kXHeightCapRatio; - // SetupNormalization(block, nullptr, nullptr, - // box.x_middle(), cap_height / 2.0f, - // kBlnCellHeight / fixed_pitch, - // kBlnCellHeight / cap_height, - // 0, 0); - // ** Secondary normalization for classification (with rotation) (proposed): - // Requires a simple translation to the center of the appropriate character - // cell, no further scaling and a simple rotation (or nothing) about the - // cell center. - // FCOORD rotation = block->classify_rotation(); - // SetupNormalization(nullptr, &rotation, denorm, - // fixed_pitch_cell_center, - // 0.0f, - // 1.0f, - // 1.0f, - // 0, 0); - void SetupNormalization(const BLOCK* block, - const FCOORD* rotation, - const DENORM* predecessor, - float x_origin, float y_origin, - float x_scale, float y_scale, - float final_xshift, float final_yshift); - - // Sets up the DENORM to execute a non-linear transformation based on - // preserving an even distribution of stroke edges. The transformation - // operates only within the given box, scaling input coords within the box - // non-linearly to a box of target_width by target_height, with all other - // coords being clipped to the box edge. As with SetupNormalization above, - // final_xshift and final_yshift are applied after scaling, and the bottom- - // left of box is used as a pre-scaling origin. - // x_coords is a collection of the x-coords of vertical edges for each - // y-coord starting at box.bottom(). - // y_coords is a collection of the y-coords of horizontal edges for each - // x-coord starting at box.left(). - // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. - // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. - // The second-level vectors must all be sorted in ascending order. - void SetupNonLinear(const DENORM* predecessor, const TBOX& box, - float target_width, float target_height, - float final_xshift, float final_yshift, - const GenericVector >& x_coords, - const GenericVector >& y_coords); - - // Transforms the given coords one step forward to normalized space, without - // using any block rotation or predecessor. - void LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const; - void LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const; - // Transforms the given coords forward to normalized space using the - // full transformation sequence defined by the block rotation, the - // predecessors, deepest first, and finally this. If first_norm is not nullptr, - // then the first and deepest transformation used is first_norm, ending - // with this, and the block rotation will not be applied. - void NormTransform(const DENORM* first_norm, const TPOINT& pt, - TPOINT* transformed) const; - void NormTransform(const DENORM* first_norm, const FCOORD& pt, - FCOORD* transformed) const; - // Transforms the given coords one step back to source space, without - // using to any block rotation or predecessor. - void LocalDenormTransform(const TPOINT& pt, TPOINT* original) const; - void LocalDenormTransform(const FCOORD& pt, FCOORD* original) const; - // Transforms the given coords all the way back to source image space using - // the full transformation sequence defined by this and its predecessors - // recursively, shallowest first, and finally any block re_rotation. - // If last_denorm is not nullptr, then the last transformation used will - // be last_denorm, and the block re_rotation will never be executed. - void DenormTransform(const DENORM* last_denorm, const TPOINT& pt, - TPOINT* original) const; - void DenormTransform(const DENORM* last_denorm, const FCOORD& pt, - FCOORD* original) const; - - // Normalize a blob using blob transformations. Less accurate, but - // more accurately copies the old way. - void LocalNormBlob(TBLOB* blob) const; - - // Fills in the x-height range accepted by the given unichar_id in blob - // coordinates, given its bounding box in the usual baseline-normalized - // coordinates, with some initial crude x-height estimate (such as word - // size) and this denoting the transformation that was used. - // Also returns the amount the character must have shifted up or down. - void XHeightRange(int unichar_id, const UNICHARSET& unicharset, - const TBOX& bbox, - float* min_xht, - float* max_xht, - float* yshift) const; - - // Prints the content of the DENORM for debug purposes. - void Print() const; - - Pix* pix() const { - return pix_; - } - void set_pix(Pix* pix) { - pix_ = pix; - } - bool inverse() const { - return inverse_; - } - void set_inverse(bool value) { - inverse_ = value; - } - const DENORM* RootDenorm() const { - if (predecessor_ != nullptr) - return predecessor_->RootDenorm(); - return this; - } - const DENORM* predecessor() const { - return predecessor_; - } - // Accessors - perhaps should not be needed. - float x_scale() const { - return x_scale_; - } - float y_scale() const { - return y_scale_; - } - const BLOCK* block() const { - return block_; - } - void set_block(const BLOCK* block) { - block_ = block; - } - - private: - // Free allocated memory and clear pointers. - void Clear(); - // Setup default values. - void Init(); - - // Best available image. - Pix* pix_; - // True if the source image is white-on-black. - bool inverse_; - // Block the word came from. If not null, block->re_rotation() takes the - // "untransformed" coordinates even further back to the original image. - // Used only on the first DENORM in a chain. - const BLOCK* block_; - // Rotation to apply between translation to the origin and scaling. - const FCOORD* rotation_; - // Previous transformation in a chain. - const DENORM* predecessor_; - // Non-linear transformation maps directly from each integer offset from the - // origin to the corresponding x-coord. Owned by the DENORM. - GenericVector* x_map_; - // Non-linear transformation maps directly from each integer offset from the - // origin to the corresponding y-coord. Owned by the DENORM. - GenericVector* y_map_; - // x-coordinate to be mapped to final_xshift_ in the result. - float x_origin_; - // y-coordinate to be mapped to final_yshift_ in the result. - float y_origin_; - // Scale factors for x and y coords. Applied to pre-rotation system. - float x_scale_; - float y_scale_; - // Destination coords of the x_origin_ and y_origin_. - float final_xshift_; - float final_yshift_; -}; - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrblock.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrblock.cpp deleted file mode 100644 index 7e3e72a3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrblock.cpp +++ /dev/null @@ -1,517 +0,0 @@ -/********************************************************************** - * File: ocrblock.cpp (Formerly block.c) - * Description: BLOCK member functions and iterator functions. - * Author: Ray Smith - * Created: Fri Mar 15 09:41:28 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "ocrblock.h" -#include -#include // std::unique_ptr -#include "stepblob.h" -#include "tprintf.h" - -#define BLOCK_LABEL_HEIGHT 150 //char height of block id - -ELISTIZE (BLOCK) -/** - * BLOCK::BLOCK - * - * Constructor for a simple rectangular block. - */ -BLOCK::BLOCK(const char *name, //< filename - BOOL8 prop, //< proportional - int16_t kern, //< kerning - int16_t space, //< spacing - int16_t xmin, //< bottom left - int16_t ymin, int16_t xmax, //< top right - int16_t ymax) - : pdblk(xmin, ymin, xmax, ymax), - filename(name), - re_rotation_(1.0f, 0.0f), - classify_rotation_(1.0f, 0.0f), - skew_(1.0f, 0.0f) { - ICOORDELT_IT left_it = &pdblk.leftside; - ICOORDELT_IT right_it = &pdblk.rightside; - - proportional = prop; - right_to_left_ = false; - kerning = kern; - spacing = space; - font_class = -1; //not assigned - cell_over_xheight_ = 2.0f; - pdblk.hand_poly = nullptr; - left_it.set_to_list (&pdblk.leftside); - right_it.set_to_list (&pdblk.rightside); - //make default box - left_it.add_to_end (new ICOORDELT (xmin, ymin)); - left_it.add_to_end (new ICOORDELT (xmin, ymax)); - right_it.add_to_end (new ICOORDELT (xmax, ymin)); - right_it.add_to_end (new ICOORDELT (xmax, ymax)); -} - -/** - * decreasing_top_order - * - * Sort Comparator: Return <0 if row1 top < row2 top - */ - -static int decreasing_top_order(const void *row1, const void *row2) { - return (*reinterpret_cast(row2))->bounding_box().top() - - (*reinterpret_cast(row1))->bounding_box().top(); -} - - -/** - * BLOCK::rotate - * - * Rotate the polygon by the given rotation and recompute the bounding_box. - */ -void BLOCK::rotate(const FCOORD& rotation) { - pdblk.poly_block()->rotate(rotation); - pdblk.box = *pdblk.poly_block()->bounding_box(); -} - -// Returns the bounding box including the desired combination of upper and -// lower noise/diacritic elements. -TBOX BLOCK::restricted_bounding_box(bool upper_dots, bool lower_dots) const { - TBOX box; - // This is a read-only iteration of the rows in the block. - ROW_IT it(const_cast(&rows)); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - box += it.data()->restricted_bounding_box(upper_dots, lower_dots); - } - return box; -} - -/** - * BLOCK::reflect_polygon_in_y_axis - * - * Reflects the polygon in the y-axis and recompute the bounding_box. - * Does nothing to any contained rows/words/blobs etc. - */ -void BLOCK::reflect_polygon_in_y_axis() { - pdblk.poly_block()->reflect_in_y_axis(); - pdblk.box = *pdblk.poly_block()->bounding_box(); -} - -/** - * BLOCK::sort_rows - * - * Order rows so that they are in order of decreasing Y coordinate - */ - -void BLOCK::sort_rows() { // order on "top" - ROW_IT row_it(&rows); - - row_it.sort (decreasing_top_order); -} - - -/** - * BLOCK::compress - * - * Delete space between the rows. (And maybe one day, compress the rows) - * Fill space of block from top down, left aligning rows. - */ - -void BLOCK::compress() { // squash it up - #define ROW_SPACING 5 - - ROW_IT row_it(&rows); - ROW *row; - ICOORD row_spacing (0, ROW_SPACING); - - ICOORDELT_IT icoordelt_it; - - sort_rows(); - - pdblk.box = TBOX (pdblk.box.topleft (), pdblk.box.topleft ()); - pdblk.box.move_bottom_edge (ROW_SPACING); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - row->move (pdblk.box.botleft () - row_spacing - - row->bounding_box ().topleft ()); - pdblk.box += row->bounding_box (); - } - - pdblk.leftside.clear (); - icoordelt_it.set_to_list (&pdblk.leftside); - icoordelt_it.add_to_end (new ICOORDELT (pdblk.box.left (), pdblk.box.bottom ())); - icoordelt_it.add_to_end (new ICOORDELT (pdblk.box.left (), pdblk.box.top ())); - pdblk.rightside.clear (); - icoordelt_it.set_to_list (&pdblk.rightside); - icoordelt_it.add_to_end (new ICOORDELT (pdblk.box.right (), pdblk.box.bottom ())); - icoordelt_it.add_to_end (new ICOORDELT (pdblk.box.right (), pdblk.box.top ())); -} - - -/** - * BLOCK::check_pitch - * - * Check whether the block is fixed or prop, set the flag, and set - * the pitch if it is fixed. - */ - -void BLOCK::check_pitch() { // check prop - // tprintf("Missing FFT fixed pitch stuff!\n"); - pitch = -1; -} - - -/** - * BLOCK::compress - * - * Compress and move in a single operation. - */ - -void BLOCK::compress( // squash it up - const ICOORD vec // and move - ) { - pdblk.box.move (vec); - compress(); -} - - -/** - * BLOCK::print - * - * Print the info on a block - */ - -void BLOCK::print( //print list of sides - FILE*, //< file to print on - bool dump //< print full detail -) { - ICOORDELT_IT it = &pdblk.leftside; //iterator - - pdblk.box.print (); - tprintf ("Proportional= %s\n", proportional ? "TRUE" : "FALSE"); - tprintf ("Kerning= %d\n", kerning); - tprintf ("Spacing= %d\n", spacing); - tprintf ("Fixed_pitch=%d\n", pitch); - tprintf ("Filename= %s\n", filename.string ()); - - if (dump) { - tprintf ("Left side coords are:\n"); - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - tprintf ("(%d,%d) ", it.data ()->x (), it.data ()->y ()); - tprintf ("\n"); - tprintf ("Right side coords are:\n"); - it.set_to_list (&pdblk.rightside); - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - tprintf ("(%d,%d) ", it.data ()->x (), it.data ()->y ()); - tprintf ("\n"); - } -} - -/** - * BLOCK::operator= - * - * Assignment - duplicate the block structure, but with an EMPTY row list. - */ - -BLOCK & BLOCK::operator= ( //assignment -const BLOCK & source //from this -) { - this->ELIST_LINK::operator= (source); - pdblk = source.pdblk; - proportional = source.proportional; - kerning = source.kerning; - spacing = source.spacing; - filename = source.filename; //STRINGs assign ok - if (!rows.empty ()) - rows.clear (); - re_rotation_ = source.re_rotation_; - classify_rotation_ = source.classify_rotation_; - skew_ = source.skew_; - return *this; -} - -// This function is for finding the approximate (horizontal) distance from -// the x-coordinate of the left edge of a symbol to the left edge of the -// text block which contains it. We are passed: -// segments - output of PB_LINE_IT::get_line() which contains x-coordinate -// intervals for the scan line going through the symbol's y-coordinate. -// Each element of segments is of the form (x()=start_x, y()=length). -// x - the x coordinate of the symbol we're interested in. -// margin - return value, the distance from x,y to the left margin of the -// block containing it. -// If all segments were to the right of x, we return false and 0. -static bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) { - bool found = false; - *margin = 0; - if (segments->empty()) - return found; - ICOORDELT_IT seg_it(segments); - for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { - int cur_margin = x - seg_it.data()->x(); - if (cur_margin >= 0) { - if (!found) { - *margin = cur_margin; - } else if (cur_margin < *margin) { - *margin = cur_margin; - } - found = true; - } - } - return found; -} - -// This function is for finding the approximate (horizontal) distance from -// the x-coordinate of the right edge of a symbol to the right edge of the -// text block which contains it. We are passed: -// segments - output of PB_LINE_IT::get_line() which contains x-coordinate -// intervals for the scan line going through the symbol's y-coordinate. -// Each element of segments is of the form (x()=start_x, y()=length). -// x - the x coordinate of the symbol we're interested in. -// margin - return value, the distance from x,y to the right margin of the -// block containing it. -// If all segments were to the left of x, we return false and 0. -static bool RightMargin(ICOORDELT_LIST *segments, int x, int *margin) { - bool found = false; - *margin = 0; - if (segments->empty()) - return found; - ICOORDELT_IT seg_it(segments); - for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { - int cur_margin = seg_it.data()->x() + seg_it.data()->y() - x; - if (cur_margin >= 0) { - if (!found) { - *margin = cur_margin; - } else if (cur_margin < *margin) { - *margin = cur_margin; - } - found = true; - } - } - return found; -} - -// Compute the distance from the left and right ends of each row to the -// left and right edges of the block's polyblock. Illustration: -// ____________________________ _______________________ -// | Howdy neighbor! | |rectangular blocks look| -// | This text is written to| |more like stacked pizza| -// |illustrate how useful poly- |boxes. | -// |blobs are in ----------- ------ The polyblob| -// |dealing with| _________ |for a BLOCK rec-| -// |harder layout| /===========\ |ords the possibly| -// |issues. | | _ _ | |skewed pseudo-| -// | You see this| | |_| \|_| | |rectangular | -// |text is flowed| | } | |boundary that| -// |around a mid-| \ ____ | |forms the ideal-| -// |cloumn portrait._____ \ / __|ized text margin| -// | Polyblobs exist| \ / |from which we should| -// |to account for insets| | | |measure paragraph| -// |which make otherwise| ----- |indentation. | -// ----------------------- ---------------------- -// -// If we identify a drop-cap, we measure the left margin for the lines -// below the first line relative to one space past the drop cap. The -// first line's margin and those past the drop cap area are measured -// relative to the enclosing polyblock. -// -// TODO(rays): Before this will work well, we'll need to adjust the -// polyblob tighter around the text near images, as in: -// UNLV_AUTO:mag.3G0 page 2 -// UNLV_AUTO:mag.3G4 page 16 -void BLOCK::compute_row_margins() { - if (row_list()->empty() || row_list()->singleton()) { - return; - } - - // If Layout analysis was not called, default to this. - POLY_BLOCK rect_block(pdblk.bounding_box(), PT_FLOWING_TEXT); - POLY_BLOCK *pblock = &rect_block; - if (pdblk.poly_block() != nullptr) { - pblock = pdblk.poly_block(); - } - - // Step One: Determine if there is a drop-cap. - // TODO(eger): Fix up drop cap code for RTL languages. - ROW_IT r_it(row_list()); - ROW *first_row = r_it.data(); - ROW *second_row = r_it.data_relative(1); - - // initialize the bottom of a fictitious drop cap far above the first line. - int drop_cap_bottom = first_row->bounding_box().top() + - first_row->bounding_box().height(); - int drop_cap_right = first_row->bounding_box().left(); - int mid_second_line = second_row->bounding_box().top() - - second_row->bounding_box().height() / 2; - WERD_IT werd_it(r_it.data()->word_list()); // words of line one - if (!werd_it.empty()) { - C_BLOB_IT cblob_it(werd_it.data()->cblob_list()); - for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); - cblob_it.forward()) { - TBOX bbox = cblob_it.data()->bounding_box(); - if (bbox.bottom() <= mid_second_line) { - // we found a real drop cap - first_row->set_has_drop_cap(true); - if (drop_cap_bottom > bbox.bottom()) - drop_cap_bottom = bbox.bottom(); - if (drop_cap_right < bbox.right()) - drop_cap_right = bbox.right(); - } - } - } - - // Step Two: Calculate the margin from the text of each row to the block - // (or drop-cap) boundaries. - PB_LINE_IT lines(pblock); - r_it.set_to_list(row_list()); - for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { - ROW *row = r_it.data(); - TBOX row_box = row->bounding_box(); - int left_y = row->base_line(row_box.left()) + row->x_height(); - int left_margin; - const std::unique_ptr segments_left( - lines.get_line(left_y)); - LeftMargin(segments_left.get(), row_box.left(), &left_margin); - - if (row_box.top() >= drop_cap_bottom) { - int drop_cap_distance = row_box.left() - row->space() - drop_cap_right; - if (drop_cap_distance < 0) - drop_cap_distance = 0; - if (drop_cap_distance < left_margin) - left_margin = drop_cap_distance; - } - - int right_y = row->base_line(row_box.right()) + row->x_height(); - int right_margin; - const std::unique_ptr segments_right( - lines.get_line(right_y)); - RightMargin(segments_right.get(), row_box.right(), &right_margin); - row->set_lmargin(left_margin); - row->set_rmargin(right_margin); - } -} - -/********************************************************************** - * PrintSegmentationStats - * - * Prints segmentation stats for the given block list. - **********************************************************************/ - -void PrintSegmentationStats(BLOCK_LIST* block_list) { - int num_blocks = 0; - int num_rows = 0; - int num_words = 0; - int num_blobs = 0; - BLOCK_IT block_it(block_list); - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { - BLOCK* block = block_it.data(); - ++num_blocks; - ROW_IT row_it(block->row_list()); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - ++num_rows; - ROW* row = row_it.data(); - // Iterate over all werds in the row. - WERD_IT werd_it(row->word_list()); - for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) { - WERD* werd = werd_it.data(); - ++num_words; - num_blobs += werd->cblob_list()->length(); - } - } - } - tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n", - num_blocks, num_rows, num_words, num_blobs); -} - -/********************************************************************** - * ExtractBlobsFromSegmentation - * - * Extracts blobs from the given block list and adds them to the output list. - * The block list must have been created by performing a page segmentation. - **********************************************************************/ - -void ExtractBlobsFromSegmentation(BLOCK_LIST* blocks, - C_BLOB_LIST* output_blob_list) { - C_BLOB_IT return_list_it(output_blob_list); - BLOCK_IT block_it(blocks); - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { - BLOCK* block = block_it.data(); - ROW_IT row_it(block->row_list()); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - ROW* row = row_it.data(); - // Iterate over all werds in the row. - WERD_IT werd_it(row->word_list()); - for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) { - WERD* werd = werd_it.data(); - return_list_it.move_to_last(); - return_list_it.add_list_after(werd->cblob_list()); - return_list_it.move_to_last(); - return_list_it.add_list_after(werd->rej_cblob_list()); - } - } - } -} - -/********************************************************************** - * RefreshWordBlobsFromNewBlobs() - * - * Refreshes the words in the block_list by using blobs in the - * new_blobs list. - * Block list must have word segmentation in it. - * It consumes the blobs provided in the new_blobs list. The blobs leftover in - * the new_blobs list after the call weren't matched to any blobs of the words - * in block list. - * The output not_found_blobs is a list of blobs from the original segmentation - * in the block_list for which no corresponding new blobs were found. - **********************************************************************/ - -void RefreshWordBlobsFromNewBlobs(BLOCK_LIST* block_list, - C_BLOB_LIST* new_blobs, - C_BLOB_LIST* not_found_blobs) { - // Now iterate over all the blobs in the segmentation_block_list_, and just - // replace the corresponding c-blobs inside the werds. - BLOCK_IT block_it(block_list); - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { - BLOCK* block = block_it.data(); - if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) - continue; // Don't touch non-text blocks. - // Iterate over all rows in the block. - ROW_IT row_it(block->row_list()); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - ROW* row = row_it.data(); - // Iterate over all werds in the row. - WERD_IT werd_it(row->word_list()); - WERD_LIST new_words; - WERD_IT new_words_it(&new_words); - for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) { - WERD* werd = werd_it.extract(); - WERD* new_werd = werd->ConstructWerdWithNewBlobs(new_blobs, - not_found_blobs); - if (new_werd) { - // Insert this new werd into the actual row's werd-list. Remove the - // existing one. - new_words_it.add_after_then_move(new_werd); - delete werd; - } else { - // Reinsert the older word back, for lack of better options. - // This is critical since dropping the words messes up segmentation: - // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on. - new_words_it.add_after_then_move(werd); - } - } - // Get rid of the old word list & replace it with the new one. - row->word_list()->clear(); - werd_it.move_to_first(); - werd_it.add_list_after(&new_words); - } - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrblock.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrblock.h deleted file mode 100644 index e27a48b0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrblock.h +++ /dev/null @@ -1,234 +0,0 @@ -/********************************************************************** - * File: ocrblock.h (Formerly block.h) - * Description: Page block class definition. - * Author: Ray Smith - * Created: Thu Mar 14 17:32:01 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef OCRBLOCK_H -#define OCRBLOCK_H - -#include "ocrpara.h" -#include "ocrrow.h" -#include "pdblock.h" - -class BLOCK; //forward decl - -ELISTIZEH (BLOCK) -class BLOCK:public ELIST_LINK -//page block -{ - friend class BLOCK_RECT_IT; //block iterator - public: - BLOCK() - : re_rotation_(1.0f, 0.0f), - classify_rotation_(1.0f, 0.0f), - skew_(1.0f, 0.0f) { - right_to_left_ = false; - pdblk.hand_poly = nullptr; - } - BLOCK(const char *name, //< filename - BOOL8 prop, //< proportional - int16_t kern, //< kerning - int16_t space, //< spacing - int16_t xmin, //< bottom left - int16_t ymin, - int16_t xmax, //< top right - int16_t ymax); - - ~BLOCK () = default; - - /** - * set space size etc. - * @param prop proportional - * @param kern inter char size - * @param space inter word size - * @param ch_pitch pitch if fixed - */ - void set_stats(BOOL8 prop, - int16_t kern, - int16_t space, - int16_t ch_pitch) { - proportional = prop; - kerning = (int8_t) kern; - spacing = space; - pitch = ch_pitch; - } - /// set char size - void set_xheight(int32_t height) { - xheight = height; - } - /// set font class - void set_font_class(int16_t font) { - font_class = font; - } - /// return proportional - BOOL8 prop() const { - return proportional; - } - bool right_to_left() const { - return right_to_left_; - } - void set_right_to_left(bool value) { - right_to_left_ = value; - } - /// return pitch - int32_t fixed_pitch() const { - return pitch; - } - /// return kerning - int16_t kern() const { - return kerning; - } - /// return font class - int16_t font() const { - return font_class; - } - /// return spacing - int16_t space() const { - return spacing; - } - /// return filename - const char *name() const { - return filename.string (); - } - /// return xheight - int32_t x_height() const { - return xheight; - } - float cell_over_xheight() const { - return cell_over_xheight_; - } - void set_cell_over_xheight(float ratio) { - cell_over_xheight_ = ratio; - } - /// get rows - ROW_LIST *row_list() { - return &rows; - } - // Compute the margins between the edges of each row and this block's - // polyblock, and store the results in the rows. - void compute_row_margins(); - - // get paragraphs - PARA_LIST *para_list() { - return ¶s_; - } - /// get blobs - C_BLOB_LIST *blob_list() { - return &c_blobs; - } - C_BLOB_LIST *reject_blobs() { - return &rej_blobs; - } - FCOORD re_rotation() const { - return re_rotation_; // How to transform coords back to image. - } - void set_re_rotation(const FCOORD& rotation) { - re_rotation_ = rotation; - } - FCOORD classify_rotation() const { - return classify_rotation_; // Apply this before classifying. - } - void set_classify_rotation(const FCOORD& rotation) { - classify_rotation_ = rotation; - } - FCOORD skew() const { - return skew_; // Direction of true horizontal. - } - void set_skew(const FCOORD& skew) { - skew_ = skew; - } - const ICOORD& median_size() const { - return median_size_; - } - void set_median_size(int x, int y) { - median_size_.set_x(x); - median_size_.set_y(y); - } - - Pix* render_mask(TBOX* mask_box) { - return pdblk.render_mask(re_rotation_, mask_box); - } - - // Returns the bounding box including the desired combination of upper and - // lower noise/diacritic elements. - TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const; - - // Reflects the polygon in the y-axis and recomputes the bounding_box. - // Does nothing to any contained rows/words/blobs etc. - void reflect_polygon_in_y_axis(); - - void rotate(const FCOORD& rotation); - - /// decreasing y order - void sort_rows(); - - /// shrink white space - void compress(); - - /// check proportional - void check_pitch(); - - /// shrink white space and move by vector - void compress(const ICOORD vec); - - /// dump whole table - void print(FILE* fp, bool dump); - - BLOCK& operator=(const BLOCK & source); - PDBLK pdblk; //< Page Description Block - - private: - BOOL8 proportional; //< proportional - bool right_to_left_; //< major script is right to left. - int8_t kerning; //< inter blob gap - int16_t spacing; //< inter word gap - int16_t pitch; //< pitch of non-props - int16_t font_class; //< correct font class - int32_t xheight; //< height of chars - float cell_over_xheight_; //< Ratio of cell height to xheight. - STRING filename; //< name of block - ROW_LIST rows; //< rows in block - PARA_LIST paras_; //< paragraphs of block - C_BLOB_LIST c_blobs; //< before textord - C_BLOB_LIST rej_blobs; //< duff stuff - FCOORD re_rotation_; //< How to transform coords back to image. - FCOORD classify_rotation_; //< Apply this before classifying. - FCOORD skew_; //< Direction of true horizontal. - ICOORD median_size_; //< Median size of blobs. -}; - -// A function to print segmentation stats for the given block list. -void PrintSegmentationStats(BLOCK_LIST* block_list); - -// Extracts blobs fromo the given block list and adds them to the output list. -// The block list must have been created by performing a page segmentation. -void ExtractBlobsFromSegmentation(BLOCK_LIST* blocks, - C_BLOB_LIST* output_blob_list); - -// Refreshes the words in the block_list by using blobs in the -// new_blobs list. -// Block list must have word segmentation in it. -// It consumes the blobs provided in the new_blobs list. The blobs leftover in -// the new_blobs list after the call weren't matched to any blobs of the words -// in block list. -// The output not_found_blobs is a list of blobs from the original segmentation -// in the block_list for which no corresponding new blobs were found. -void RefreshWordBlobsFromNewBlobs(BLOCK_LIST* block_list, - C_BLOB_LIST* new_blobs, - C_BLOB_LIST* not_found_blobs); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrpara.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrpara.cpp deleted file mode 100644 index 022d25c8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrpara.cpp +++ /dev/null @@ -1,100 +0,0 @@ -///////////////////////////////////////////////////////////////////// -// File: ocrpara.cpp -// Description: OCR Paragraph Output Type -// Author: David Eger -// Created: 2010-11-15 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include - -#include "ocrpara.h" -#include "host.h" // For NearlyEqual() - -ELISTIZE(PARA) - -using tesseract::JUSTIFICATION_LEFT; -using tesseract::JUSTIFICATION_RIGHT; -using tesseract::JUSTIFICATION_CENTER; -using tesseract::JUSTIFICATION_UNKNOWN; - -static STRING ParagraphJustificationToString( - tesseract::ParagraphJustification justification) { - switch (justification) { - case JUSTIFICATION_LEFT: - return "LEFT"; - case JUSTIFICATION_RIGHT: - return "RIGHT"; - case JUSTIFICATION_CENTER: - return "CENTER"; - default: - return "UNKNOWN"; - } -} - -bool ParagraphModel::ValidFirstLine(int lmargin, int lindent, - int rindent, int rmargin) const { - switch (justification_) { - case JUSTIFICATION_LEFT: - return NearlyEqual(lmargin + lindent, margin_ + first_indent_, - tolerance_); - case JUSTIFICATION_RIGHT: - return NearlyEqual(rmargin + rindent, margin_ + first_indent_, - tolerance_); - case JUSTIFICATION_CENTER: - return NearlyEqual(lindent, rindent, tolerance_ * 2); - default: - // shouldn't happen - return false; - } -} - -bool ParagraphModel::ValidBodyLine(int lmargin, int lindent, - int rindent, int rmargin) const { - switch (justification_) { - case JUSTIFICATION_LEFT: - return NearlyEqual(lmargin + lindent, margin_ + body_indent_, - tolerance_); - case JUSTIFICATION_RIGHT: - return NearlyEqual(rmargin + rindent, margin_ + body_indent_, - tolerance_); - case JUSTIFICATION_CENTER: - return NearlyEqual(lindent, rindent, tolerance_ * 2); - default: - // shouldn't happen - return false; - } -} - -bool ParagraphModel::Comparable(const ParagraphModel &other) const { - if (justification_ != other.justification_) - return false; - if (justification_ == JUSTIFICATION_CENTER || - justification_ == JUSTIFICATION_UNKNOWN) - return true; - int tolerance = (tolerance_ + other.tolerance_) / 4; - return NearlyEqual(margin_ + first_indent_, - other.margin_ + other.first_indent_, tolerance) && - NearlyEqual(margin_ + body_indent_, - other.margin_ + other.body_indent_, tolerance); -} - -STRING ParagraphModel::ToString() const { - char buffer[200]; - const STRING &alignment = ParagraphJustificationToString(justification_); - snprintf(buffer, sizeof(buffer), - "margin: %d, first_indent: %d, body_indent: %d, alignment: %s", - margin_, first_indent_, body_indent_, alignment.string()); - return STRING(buffer); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrpara.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrpara.h deleted file mode 100644 index cbe05217..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrpara.h +++ /dev/null @@ -1,191 +0,0 @@ -///////////////////////////////////////////////////////////////////// -// File: ocrpara.h -// Description: OCR Paragraph Output Type -// Author: David Eger -// Created: 2010-11-15 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCSTRUCT_OCRPARA_H_ -#define TESSERACT_CCSTRUCT_OCRPARA_H_ - -#include "publictypes.h" -#include "elst.h" -#include "strngs.h" - -class ParagraphModel; - -struct PARA : public ELIST_LINK { - public: - PARA() : model(nullptr), is_list_item(false), - is_very_first_or_continuation(false), has_drop_cap(false) {} - - // We do not own the model, we just reference it. - // model may be nullptr if there is not a good model for this paragraph. - const ParagraphModel *model; - - bool is_list_item; - - // The first paragraph on a page often lacks a first line indent, but should - // still be modeled by the same model as other body text paragraphs on the - // page. - bool is_very_first_or_continuation; - - // Does this paragraph begin with a drop cap? - bool has_drop_cap; -}; - -ELISTIZEH(PARA) - -// A geometric model of paragraph indentation and alignment. -// -// Measurements are in pixels. The meaning of the integer arguments changes -// depending upon the value of justification. Distances less than or equal -// to tolerance apart we take as "equivalent" for the purpose of model -// matching, and in the examples below, we assume tolerance is zero. -// -// justification = LEFT: -// margin the "ignored" margin to the left block edge. -// first_indent indent from the left margin to a typical first text line. -// body_indent indent from the left margin of a typical body text line. -// -// justification = RIGHT: -// margin the "ignored" margin to the right block edge. -// first_indent indent from the right margin to a typical first text line. -// body_indent indent from the right margin of a typical body text line. -// -// justification = CENTER: -// margin ignored -// first_indent ignored -// body_indent ignored -// -// ====== Extended example, assuming each letter is ten pixels wide: ======= -// -// +--------------------------------+ -// | Awesome | ParagraphModel(CENTER, 0, 0, 0) -// | Centered Title | -// | Paragraph Detection | -// | OCR TEAM | -// | 10 November 2010 | -// | | -// | Look here, I have a paragraph.| ParagraphModel(LEFT, 0, 20, 0) -// |This paragraph starts at the top| -// |of the page and takes 3 lines. | -// | Here I have a second paragraph| ParagraphModel(LEFT, 0, 20, 0) -// |which indicates that the first | -// |paragraph is not a continuation | -// |from a previous page, as it is | -// |indented just like this second | -// |paragraph. | -// | Here is a block quote. It | ParagraphModel(LEFT, 30, 0, 0) -// | looks like the prior text | -// | but it is indented more | -// | and is fully justified. | -// | So how does one deal with | ParagraphModel(LEFT, 0, 20, 0) -// |centered text, block quotes, | -// |normal paragraphs, and lists | -// |like what follows? | -// |1. Make a plan. | ParagraphModel(LEFT, 0, 0, 30) -// |2. Use a heuristic, for example,| ParagraphModel(LEFT, 0, 0, 30) -// | looking for lines where the | -// | first word of the next line | -// | would fit on the previous | -// | line. | -// |8. Try to implement the plan in | ParagraphModel(LEFT, 0, 0, 30) -// | Python and try it out. | -// |4. Determine how to fix the | ParagraphModel(LEFT, 0, 0, 30) -// | mistakes. | -// |5. Repeat. | ParagraphModel(LEFT, 0, 0, 30) -// | For extra painful penalty work| ParagraphModel(LEFT, 0, 20, 0) -// |you can try to identify source | -// |code. Ouch! | -// +--------------------------------+ -class ParagraphModel { - public: - ParagraphModel(tesseract::ParagraphJustification justification, - int margin, - int first_indent, - int body_indent, - int tolerance) - : justification_(justification), - margin_(margin), - first_indent_(first_indent), - body_indent_(body_indent), - tolerance_(tolerance) { - // Make one of {first_indent, body_indent} is 0. - int added_margin = first_indent; - if (body_indent < added_margin) - added_margin = body_indent; - margin_ += added_margin; - first_indent_ -= added_margin; - body_indent_ -= added_margin; - } - - ParagraphModel() - : justification_(tesseract::JUSTIFICATION_UNKNOWN), - margin_(0), - first_indent_(0), - body_indent_(0), - tolerance_(0) { } - - // ValidFirstLine() and ValidBodyLine() take arguments describing a text line - // in a block of text which we are trying to model: - // lmargin, lindent: these add up to the distance from the leftmost ink - // in the text line to the surrounding text block's left - // edge. - // rmargin, rindent: these add up to the distance from the rightmost ink - // in the text line to the surrounding text block's right - // edge. - // The caller determines the division between "margin" and "indent", which - // only actually affect whether we think the line may be centered. - // - // If the amount of whitespace matches the amount of whitespace expected on - // the relevant side of the line (within tolerance_) we say it matches. - - // Return whether a given text line could be a first paragraph line according - // to this paragraph model. - bool ValidFirstLine(int lmargin, int lindent, int rindent, int rmargin) const; - - // Return whether a given text line could be a first paragraph line according - // to this paragraph model. - bool ValidBodyLine(int lmargin, int lindent, int rindent, int rmargin) const; - - tesseract::ParagraphJustification justification() const { - return justification_; - } - int margin() const { return margin_; } - int first_indent() const { return first_indent_; } - int body_indent() const { return body_indent_; } - int tolerance() const { return tolerance_; } - bool is_flush() const { - return (justification_ == tesseract::JUSTIFICATION_LEFT || - justification_ == tesseract::JUSTIFICATION_RIGHT) && - abs(first_indent_ - body_indent_) <= tolerance_; - } - - // Return whether this model is likely to agree with the other model on most - // paragraphs they are marked. - bool Comparable(const ParagraphModel &other) const; - - STRING ToString() const; - - private: - tesseract::ParagraphJustification justification_; - int margin_; - int first_indent_; - int body_indent_; - int tolerance_; -}; - -#endif // TESSERACT_CCSTRUCT_OCRPARA_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrrow.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrrow.cpp deleted file mode 100644 index f6f8dbdb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrrow.cpp +++ /dev/null @@ -1,243 +0,0 @@ -/********************************************************************** - * File: ocrrow.cpp (Formerly row.c) - * Description: Code for the ROW class. - * Author: Ray Smith - * Created: Tue Oct 08 15:58:04 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "ocrrow.h" -#include "blobbox.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -ELISTIZE (ROW) -/********************************************************************** - * ROW::ROW - * - * Constructor to build a ROW. Only the stats stuff are given here. - * The words are added directly. - **********************************************************************/ -ROW::ROW ( //constructor -int32_t spline_size, //no of segments -int32_t * xstarts, //segment boundaries -double *coeffs, //coefficients -float x_height, //line height -float ascenders, //ascender size -float descenders, //descender drop -int16_t kern, //char gap -int16_t space //word gap -) - : baseline(spline_size, xstarts, coeffs), - para_(nullptr) { - kerning = kern; //just store stuff - spacing = space; - xheight = x_height; - ascrise = ascenders; - bodysize = 0.0f; - descdrop = descenders; - has_drop_cap_ = false; - lmargin_ = 0; - rmargin_ = 0; -} - - -/********************************************************************** - * ROW::ROW - * - * Constructor to build a ROW. Only the stats stuff are given here. - * The words are added directly. - **********************************************************************/ - -ROW::ROW( //constructor - TO_ROW *to_row, //source row - int16_t kern, //char gap - int16_t space //word gap - ) : para_(nullptr) { - kerning = kern; //just store stuff - spacing = space; - xheight = to_row->xheight; - bodysize = to_row->body_size; - ascrise = to_row->ascrise; - descdrop = to_row->descdrop; - baseline = to_row->baseline; - has_drop_cap_ = false; - lmargin_ = 0; - rmargin_ = 0; -} - -// Returns the bounding box including the desired combination of upper and -// lower noise/diacritic elements. -TBOX ROW::restricted_bounding_box(bool upper_dots, bool lower_dots) const { - TBOX box; - // This is a read-only iteration of the words in the row. - WERD_IT it(const_cast(&words)); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - box += it.data()->restricted_bounding_box(upper_dots, lower_dots); - } - return box; -} - -/********************************************************************** - * ROW::recalc_bounding_box - * - * Set the bounding box correctly - **********************************************************************/ - -void ROW::recalc_bounding_box() { //recalculate BB - WERD *word; //current word - WERD_IT it = &words; //words of ROW - int16_t left; //of word - int16_t prev_left; //old left - - if (!it.empty ()) { - word = it.data (); - prev_left = word->bounding_box ().left (); - it.forward (); - while (!it.at_first ()) { - word = it.data (); - left = word->bounding_box ().left (); - if (left < prev_left) { - it.move_to_first (); - //words in BB order - it.sort (word_comparator); - break; - } - prev_left = left; - it.forward (); - } - } - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - word = it.data (); - if (it.at_first ()) - word->set_flag (W_BOL, TRUE); - else - //not start of line - word->set_flag (W_BOL, FALSE); - if (it.at_last ()) - word->set_flag (W_EOL, TRUE); - else - //not end of line - word->set_flag (W_EOL, FALSE); - //extend BB as reqd - bound_box += word->bounding_box (); - } -} - - -/********************************************************************** - * ROW::move - * - * Reposition row by vector - **********************************************************************/ - -void ROW::move( // reposition row - const ICOORD vec // by vector - ) { - WERD_IT it(&words); // word iterator - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - it.data ()->move (vec); - - bound_box.move (vec); - baseline.move (vec); -} - - -/********************************************************************** - * ROW::print - * - * Display members - **********************************************************************/ - -void ROW::print( //print - FILE *fp //file to print on - ) { - tprintf("Kerning= %d\n", kerning); - tprintf("Spacing= %d\n", spacing); - bound_box.print(); - tprintf("Xheight= %f\n", xheight); - tprintf("Ascrise= %f\n", ascrise); - tprintf("Descdrop= %f\n", descdrop); - tprintf("has_drop_cap= %d\n", has_drop_cap_); - tprintf("lmargin= %d, rmargin= %d\n", lmargin_, rmargin_); -} - - -/********************************************************************** - * ROW::plot - * - * Draw the ROW in the given colour. - **********************************************************************/ - -#ifndef GRAPHICS_DISABLED -void ROW::plot( //draw it - ScrollView* window, //window to draw in - ScrollView::Color colour //colour to draw in - ) { - WERD *word; //current word - WERD_IT it = &words; //words of ROW - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - word = it.data (); - word->plot (window, colour); //all in one colour - } -} - -/********************************************************************** - * ROW::plot - * - * Draw the ROW in rainbow colours. - **********************************************************************/ - -void ROW::plot( //draw it - ScrollView* window //window to draw in - ) { - WERD *word; //current word - WERD_IT it = &words; //words of ROW - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - word = it.data (); - word->plot (window); //in rainbow colours - } -} -#endif // GRAPHICS_DISABLED - -/********************************************************************** - * ROW::operator= - * - * Assign rows by duplicating the row structure but NOT the WERDLIST - **********************************************************************/ - -ROW & ROW::operator= (const ROW & source) { - this->ELIST_LINK::operator= (source); - kerning = source.kerning; - spacing = source.spacing; - xheight = source.xheight; - bodysize = source.bodysize; - ascrise = source.ascrise; - descdrop = source.descdrop; - if (!words.empty ()) - words.clear (); - baseline = source.baseline; //QSPLINES must do = - bound_box = source.bound_box; - has_drop_cap_ = source.has_drop_cap_; - lmargin_ = source.lmargin_; - rmargin_ = source.rmargin_; - para_ = source.para_; - return *this; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrrow.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrrow.h deleted file mode 100644 index 3ab22c36..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ocrrow.h +++ /dev/null @@ -1,171 +0,0 @@ -/********************************************************************** - * File: ocrrow.h (Formerly row.h) - * Description: Code for the ROW class. - * Author: Ray Smith - * Created: Tue Oct 08 15:58:04 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef OCRROW_H -#define OCRROW_H - -#include // for int16_t, int32_t -#include // for FILE -#include "elst.h" // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK -#include "quspline.h" // for QSPLINE -#include "rect.h" // for TBOX -#include "scrollview.h" // for ScrollView, ScrollView::Color -#include "werd.h" // for WERD_LIST - -class ICOORD; -class TO_ROW; - -struct PARA; - -class ROW:public ELIST_LINK -{ - friend void tweak_row_baseline(ROW *, double, double); - public: - ROW() = default; - ROW( //constructor - int32_t spline_size, //no of segments - int32_t *xstarts, //segment boundaries - double *coeffs, //coefficients //ascender size - float x_height, - float ascenders, - float descenders, //descender size - int16_t kern, //char gap - int16_t space); //word gap - ROW( //constructor - TO_ROW *row, //textord row - int16_t kern, //char gap - int16_t space); //word gap - - WERD_LIST *word_list() { //get words - return &words; - } - - float base_line( //compute baseline - float xpos) const { //at the position - //get spline value - return (float) baseline.y (xpos); - } - float x_height() const { //return x height - return xheight; - } - void set_x_height(float new_xheight) { // set x height - xheight = new_xheight; - } - int32_t kern() const { //return kerning - return kerning; - } - float body_size() const { //return body size - return bodysize; - } - void set_body_size(float new_size) { // set body size - bodysize = new_size; - } - int32_t space() const { //return spacing - return spacing; - } - float ascenders() const { //return size - return ascrise; - } - float descenders() const { //return size - return descdrop; - } - TBOX bounding_box() const { //return bounding box - return bound_box; - } - // Returns the bounding box including the desired combination of upper and - // lower noise/diacritic elements. - TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const; - - void set_lmargin(int16_t lmargin) { - lmargin_ = lmargin; - } - void set_rmargin(int16_t rmargin) { - rmargin_ = rmargin; - } - int16_t lmargin() const { - return lmargin_; - } - int16_t rmargin() const { - return rmargin_; - } - - void set_has_drop_cap(bool has) { - has_drop_cap_ = has; - } - bool has_drop_cap() const { - return has_drop_cap_; - } - - void set_para(PARA *p) { - para_ = p; - } - PARA *para() const { - return para_; - } - - void recalc_bounding_box(); //recalculate BB - - void move( // reposition row - const ICOORD vec); // by vector - - void print( //print - FILE *fp); //file to print on - - #ifndef GRAPHICS_DISABLED - void plot( //draw one - ScrollView* window, //window to draw in - ScrollView::Color colour); //uniform colour - void plot( //draw one - ScrollView* window); //in rainbow colours - - void plot_baseline( //draw the baseline - ScrollView* window, //window to draw in - ScrollView::Color colour) { //colour to draw - //draw it - baseline.plot (window, colour); - } - #endif // GRAPHICS_DISABLED - ROW& operator= (const ROW & source); - - private: - // Copy constructor (currently unused, therefore private). - ROW(const ROW& source); - - int32_t kerning; //inter char gap - int32_t spacing; //inter word gap - TBOX bound_box; //bounding box - float xheight; //height of line - float ascrise; //size of ascenders - float descdrop; //-size of descenders - float bodysize; //CJK character size. (equals to - //xheight+ascrise by default) - WERD_LIST words; //words - QSPLINE baseline; //baseline spline - - // These get set after blocks have been determined. - bool has_drop_cap_; - int16_t lmargin_; // Distance to left polyblock margin. - int16_t rmargin_; // Distance to right polyblock margin. - - // This gets set during paragraph analysis. - PARA *para_; // Paragraph of which this row is part. -}; - -ELISTIZEH (ROW) -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/otsuthr.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/otsuthr.cpp deleted file mode 100644 index 032a7cdb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/otsuthr.cpp +++ /dev/null @@ -1,214 +0,0 @@ -/********************************************************************** - * File: otsuthr.cpp - * Description: Simple Otsu thresholding for binarizing images. - * Author: Ray Smith - * Created: Fri Mar 07 12:31:01 PST 2008 - * - * (C) Copyright 2008, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "otsuthr.h" - -#include -#include "allheaders.h" -#include "helpers.h" -#include "openclwrapper.h" - - -namespace tesseract { - -// Computes the Otsu threshold(s) for the given image rectangle, making one -// for each channel. Each channel is always one byte per pixel. -// Returns an array of threshold values and an array of hi_values, such -// that a pixel value >threshold[channel] is considered foreground if -// hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates -// that there is no apparent foreground. At least one hi_value will not be -1. -// Delete thresholds and hi_values with delete [] after use. -// The return value is the number of channels in the input image, being -// the size of the output thresholds and hi_values arrays. -int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height, - int** thresholds, int** hi_values) { - int num_channels = pixGetDepth(src_pix) / 8; - // Of all channels with no good hi_value, keep the best so we can always - // produce at least one answer. - PERF_COUNT_START("OtsuThreshold") - int best_hi_value = 1; - int best_hi_index = 0; - bool any_good_hivalue = false; - double best_hi_dist = 0.0; - *thresholds = new int[num_channels]; - *hi_values = new int[num_channels]; - - // only use opencl if compiled w/ OpenCL and selected device is opencl -#ifdef USE_OPENCL - // all of channel 0 then all of channel 1... - int* histogramAllChannels = new int[kHistogramSize * num_channels]; - - // Calculate Histogram on GPU - OpenclDevice od; - if (od.selectedDeviceIsOpenCL() && (num_channels == 1 || num_channels == 4) && - top == 0 && left == 0) { - od.HistogramRectOCL(pixGetData(src_pix), num_channels, - pixGetWpl(src_pix) * 4, left, top, width, height, - kHistogramSize, histogramAllChannels); - - // Calculate Threshold from Histogram on cpu - for (int ch = 0; ch < num_channels; ++ch) { - (*thresholds)[ch] = -1; - (*hi_values)[ch] = -1; - int *histogram = &histogramAllChannels[kHistogramSize * ch]; - int H; - int best_omega_0; - int best_t = OtsuStats(histogram, &H, &best_omega_0); - if (best_omega_0 == 0 || best_omega_0 == H) { - // This channel is empty. - continue; - } - // To be a convincing foreground we must have a small fraction of H - // or to be a convincing background we must have a large fraction of H. - // In between we assume this channel contains no thresholding information. - int hi_value = best_omega_0 < H * 0.5; - (*thresholds)[ch] = best_t; - if (best_omega_0 > H * 0.75) { - any_good_hivalue = true; - (*hi_values)[ch] = 0; - } else if (best_omega_0 < H * 0.25) { - any_good_hivalue = true; - (*hi_values)[ch] = 1; - } else { - // In case all channels are like this, keep the best of the bad lot. - double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0; - if (hi_dist > best_hi_dist) { - best_hi_dist = hi_dist; - best_hi_value = hi_value; - best_hi_index = ch; - } - } - } - } else { -#endif - for (int ch = 0; ch < num_channels; ++ch) { - (*thresholds)[ch] = -1; - (*hi_values)[ch] = -1; - // Compute the histogram of the image rectangle. - int histogram[kHistogramSize]; - HistogramRect(src_pix, ch, left, top, width, height, histogram); - int H; - int best_omega_0; - int best_t = OtsuStats(histogram, &H, &best_omega_0); - if (best_omega_0 == 0 || best_omega_0 == H) { - // This channel is empty. - continue; - } - // To be a convincing foreground we must have a small fraction of H - // or to be a convincing background we must have a large fraction of H. - // In between we assume this channel contains no thresholding information. - int hi_value = best_omega_0 < H * 0.5; - (*thresholds)[ch] = best_t; - if (best_omega_0 > H * 0.75) { - any_good_hivalue = true; - (*hi_values)[ch] = 0; - } else if (best_omega_0 < H * 0.25) { - any_good_hivalue = true; - (*hi_values)[ch] = 1; - } else { - // In case all channels are like this, keep the best of the bad lot. - double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0; - if (hi_dist > best_hi_dist) { - best_hi_dist = hi_dist; - best_hi_value = hi_value; - best_hi_index = ch; - } - } - } -#ifdef USE_OPENCL - } - delete[] histogramAllChannels; -#endif // USE_OPENCL - - if (!any_good_hivalue) { - // Use the best of the ones that were not good enough. - (*hi_values)[best_hi_index] = best_hi_value; - } - PERF_COUNT_END - return num_channels; -} - -// Computes the histogram for the given image rectangle, and the given -// single channel. Each channel is always one byte per pixel. -// Histogram is always a kHistogramSize(256) element array to count -// occurrences of each pixel value. -void HistogramRect(Pix* src_pix, int channel, - int left, int top, int width, int height, - int* histogram) { - PERF_COUNT_START("HistogramRect") - int num_channels = pixGetDepth(src_pix) / 8; - channel = ClipToRange(channel, 0, num_channels - 1); - int bottom = top + height; - memset(histogram, 0, sizeof(*histogram) * kHistogramSize); - int src_wpl = pixGetWpl(src_pix); - l_uint32* srcdata = pixGetData(src_pix); - for (int y = top; y < bottom; ++y) { - const l_uint32* linedata = srcdata + y * src_wpl; - for (int x = 0; x < width; ++x) { - int pixel = GET_DATA_BYTE(linedata, (x + left) * num_channels + channel); - ++histogram[pixel]; - } - } - PERF_COUNT_END -} - -// Computes the Otsu threshold(s) for the given histogram. -// Also returns H = total count in histogram, and -// omega0 = count of histogram below threshold. -int OtsuStats(const int* histogram, int* H_out, int* omega0_out) { - int H = 0; - double mu_T = 0.0; - for (int i = 0; i < kHistogramSize; ++i) { - H += histogram[i]; - mu_T += static_cast(i) * histogram[i]; - } - - // Now maximize sig_sq_B over t. - // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf - int best_t = -1; - int omega_0, omega_1; - int best_omega_0 = 0; - double best_sig_sq_B = 0.0; - double mu_0, mu_1, mu_t; - omega_0 = 0; - mu_t = 0.0; - for (int t = 0; t < kHistogramSize - 1; ++t) { - omega_0 += histogram[t]; - mu_t += t * static_cast(histogram[t]); - if (omega_0 == 0) - continue; - omega_1 = H - omega_0; - if (omega_1 == 0) - break; - mu_0 = mu_t / omega_0; - mu_1 = (mu_T - mu_t) / omega_1; - double sig_sq_B = mu_1 - mu_0; - sig_sq_B *= sig_sq_B * omega_0 * omega_1; - if (best_t < 0 || sig_sq_B > best_sig_sq_B) { - best_sig_sq_B = sig_sq_B; - best_t = t; - best_omega_0 = omega_0; - } - } - if (H_out != nullptr) *H_out = H; - if (omega0_out != nullptr) *omega0_out = best_omega_0; - return best_t; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/otsuthr.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/otsuthr.h deleted file mode 100644 index dd35d23f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/otsuthr.h +++ /dev/null @@ -1,56 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: otsuthr.h -// Description: Simple Otsu thresholding for binarizing images. -// Author: Ray Smith -// Created: Fri Mar 07 12:14:01 PST 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCMAIN_OTSUTHR_H_ -#define TESSERACT_CCMAIN_OTSUTHR_H_ - -struct Pix; - -namespace tesseract { - -const int kHistogramSize = 256; // The size of a histogram of pixel values. - -// Computes the Otsu threshold(s) for the given image rectangle, making one -// for each channel. Each channel is always one byte per pixel. -// Returns an array of threshold values and an array of hi_values, such -// that a pixel value >threshold[channel] is considered foreground if -// hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates -// that there is no apparent foreground. At least one hi_value will not be -1. -// Delete thresholds and hi_values with delete [] after use. -// The return value is the number of channels in the input image, being -// the size of the output thresholds and hi_values arrays. -int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height, - int** thresholds, int** hi_values); - -// Computes the histogram for the given image rectangle, and the given -// single channel. Each channel is always one byte per pixel. -// Histogram is always a kHistogramSize(256) element array to count -// occurrences of each pixel value. -void HistogramRect(Pix* src_pix, int channel, - int left, int top, int width, int height, - int* histogram); - -// Computes the Otsu threshold(s) for the given histogram. -// Also returns H = total count in histogram, and -// omega0 = count of histogram below threshold. -int OtsuStats(const int* histogram, int* H_out, int* omega0_out); - -} // namespace tesseract. - -#endif // TESSERACT_CCMAIN_OTSUTHR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/pageres.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/pageres.cpp deleted file mode 100644 index 923ad30a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/pageres.cpp +++ /dev/null @@ -1,1690 +0,0 @@ -/********************************************************************** - * File: pageres.cpp (Formerly page_res.c) - * Description: Hierarchy of results classes from PAGE_RES to WERD_RES - * and an iterator class to iterate over the words. - * Main purposes: - * Easy way to iterate over the words without a 3-nested loop. - * Holds data used during word recognition. - * Holds information about alternative spacing paths. - * Author: Phil Cheatle - * Created: Tue Sep 22 08:42:49 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "pageres.h" -#include // for assert -#include // for INT32_MAX -#include // for strlen -#include "blamer.h" // for BlamerBundle -#include "blobs.h" // for TWERD, TBLOB -#include "boxword.h" // for BoxWord -#include "errcode.h" // for ASSERT_HOST -#include "host.h" // for TRUE, FALSE -#include "ocrblock.h" // for BLOCK_IT, BLOCK, BLOCK_LIST (ptr only) -#include "ocrrow.h" // for ROW, ROW_IT -#include "pdblock.h" // for PDBLK -#include "polyblk.h" // for POLY_BLOCK -#include "publictypes.h" // for OcrEngineMode, OEM_LSTM_ONLY -#include "seam.h" // for SEAM, start_seam_list -#include "stepblob.h" // for C_BLOB_IT, C_BLOB, C_BLOB_LIST -#include "tesscallback.h" // for NewPermanentTessCallback, TessResultCallback2 -#include "tprintf.h" // for tprintf - -struct Pix; - -ELISTIZE (BLOCK_RES) -CLISTIZE (BLOCK_RES) ELISTIZE (ROW_RES) ELISTIZE (WERD_RES) - -// Gain factor for computing thresholds that determine the ambiguity of a word. -static const double kStopperAmbiguityThresholdGain = 8.0; -// Constant offset for computing thresholds that determine the ambiguity of a -// word. -static const double kStopperAmbiguityThresholdOffset = 1.5; -// Max number of broken pieces to associate. -const int kWordrecMaxNumJoinChunks = 4; -// Max ratio of word box height to line size to allow it to be processed as -// a line with other words. -const double kMaxWordSizeRatio = 1.25; -// Max ratio of line box height to line size to allow a new word to be added. -const double kMaxLineSizeRatio = 1.25; -// Max ratio of word gap to line size to allow a new word to be added. -const double kMaxWordGapRatio = 2.0; - -// Computes and returns a threshold of certainty difference used to determine -// which words to keep, based on the adjustment factors of the two words. -// TODO(rays) This is horrible. Replace with an enhance params training model. -static double StopperAmbigThreshold(double f1, double f2) { - return (f2 - f1) * kStopperAmbiguityThresholdGain - - kStopperAmbiguityThresholdOffset; -} - -/************************************************************************* - * PAGE_RES::PAGE_RES - * - * Constructor for page results - *************************************************************************/ -PAGE_RES::PAGE_RES( - bool merge_similar_words, - BLOCK_LIST *the_block_list, - WERD_CHOICE **prev_word_best_choice_ptr) { - Init(); - BLOCK_IT block_it(the_block_list); - BLOCK_RES_IT block_res_it(&block_res_list); - for (block_it.mark_cycle_pt(); - !block_it.cycled_list(); block_it.forward()) { - block_res_it.add_to_end(new BLOCK_RES(merge_similar_words, - block_it.data())); - } - prev_word_best_choice = prev_word_best_choice_ptr; -} - -/************************************************************************* - * BLOCK_RES::BLOCK_RES - * - * Constructor for BLOCK results - *************************************************************************/ - -BLOCK_RES::BLOCK_RES(bool merge_similar_words, BLOCK *the_block) { - ROW_IT row_it (the_block->row_list ()); - ROW_RES_IT row_res_it(&row_res_list); - - char_count = 0; - rej_count = 0; - font_class = -1; //not assigned - x_height = -1.0; - font_assigned = false; - bold = false; - italic = false; - row_count = 0; - - block = the_block; - - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - row_res_it.add_to_end(new ROW_RES(merge_similar_words, row_it.data())); - } -} - -/************************************************************************* - * ROW_RES::ROW_RES - * - * Constructor for ROW results - *************************************************************************/ - -ROW_RES::ROW_RES(bool merge_similar_words, ROW *the_row) { - WERD_IT word_it(the_row->word_list()); - WERD_RES_IT word_res_it(&word_res_list); - WERD_RES *combo = nullptr; // current combination of fuzzies - WERD *copy_word; - - char_count = 0; - rej_count = 0; - whole_word_rej_count = 0; - - row = the_row; - bool add_next_word = false; - TBOX union_box; - float line_height = the_row->x_height() + the_row->ascenders() - - the_row->descenders(); - for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { - WERD_RES* word_res = new WERD_RES(word_it.data()); - word_res->x_height = the_row->x_height(); - if (add_next_word) { - ASSERT_HOST(combo != nullptr); - // We are adding this word to the combination. - word_res->part_of_combo = TRUE; - combo->copy_on(word_res); - } else if (merge_similar_words) { - union_box = word_res->word->bounding_box(); - add_next_word = !word_res->word->flag(W_REP_CHAR) && - union_box.height() <= line_height * kMaxWordSizeRatio; - word_res->odd_size = !add_next_word; - } - WERD* next_word = word_it.data_relative(1); - if (merge_similar_words) { - if (add_next_word && !next_word->flag(W_REP_CHAR)) { - // Next word will be added on if all of the following are true: - // Not a rep char. - // Box height small enough. - // Union box height small enough. - // Horizontal gap small enough. - TBOX next_box = next_word->bounding_box(); - int prev_right = union_box.right(); - union_box += next_box; - if (next_box.height() > line_height * kMaxWordSizeRatio || - union_box.height() > line_height * kMaxLineSizeRatio || - next_box.left() > prev_right + line_height * kMaxWordGapRatio) { - add_next_word = false; - } - } - next_word->set_flag(W_FUZZY_NON, add_next_word); - } else { - add_next_word = next_word->flag(W_FUZZY_NON); - } - if (add_next_word) { - if (combo == nullptr) { - copy_word = new WERD; - *copy_word = *(word_it.data()); // deep copy - combo = new WERD_RES(copy_word); - combo->x_height = the_row->x_height(); - combo->combination = TRUE; - word_res_it.add_to_end(combo); - } - word_res->part_of_combo = TRUE; - } else { - combo = nullptr; - } - word_res_it.add_to_end(word_res); - } -} - - -WERD_RES& WERD_RES::operator=(const WERD_RES & source) { - this->ELIST_LINK::operator=(source); - Clear(); - if (source.combination) { - word = new WERD; - *word = *(source.word); // deep copy - } else { - word = source.word; // pt to same word - } - if (source.bln_boxes != nullptr) - bln_boxes = new tesseract::BoxWord(*source.bln_boxes); - if (source.chopped_word != nullptr) - chopped_word = new TWERD(*source.chopped_word); - if (source.rebuild_word != nullptr) - rebuild_word = new TWERD(*source.rebuild_word); - // TODO(rays) Do we ever need to copy the seam_array? - blob_row = source.blob_row; - denorm = source.denorm; - if (source.box_word != nullptr) - box_word = new tesseract::BoxWord(*source.box_word); - best_state = source.best_state; - correct_text = source.correct_text; - blob_widths = source.blob_widths; - blob_gaps = source.blob_gaps; - // None of the uses of operator= require the ratings matrix to be copied, - // so don't as it would be really slow. - - // Copy the cooked choices. - WERD_CHOICE_IT wc_it(const_cast(&source.best_choices)); - WERD_CHOICE_IT wc_dest_it(&best_choices); - for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) { - const WERD_CHOICE *choice = wc_it.data(); - wc_dest_it.add_after_then_move(new WERD_CHOICE(*choice)); - } - if (!wc_dest_it.empty()) { - wc_dest_it.move_to_first(); - best_choice = wc_dest_it.data(); - } else { - best_choice = nullptr; - } - - if (source.raw_choice != nullptr) { - raw_choice = new WERD_CHOICE(*source.raw_choice); - } else { - raw_choice = nullptr; - } - if (source.ep_choice != nullptr) { - ep_choice = new WERD_CHOICE(*source.ep_choice); - } else { - ep_choice = nullptr; - } - reject_map = source.reject_map; - combination = source.combination; - part_of_combo = source.part_of_combo; - CopySimpleFields(source); - if (source.blamer_bundle != nullptr) { - blamer_bundle = new BlamerBundle(*(source.blamer_bundle)); - } - return *this; -} - -// Copies basic fields that don't involve pointers that might be useful -// to copy when making one WERD_RES from another. -void WERD_RES::CopySimpleFields(const WERD_RES& source) { - tess_failed = source.tess_failed; - tess_accepted = source.tess_accepted; - tess_would_adapt = source.tess_would_adapt; - done = source.done; - unlv_crunch_mode = source.unlv_crunch_mode; - small_caps = source.small_caps; - odd_size = source.odd_size; - italic = source.italic; - bold = source.bold; - fontinfo = source.fontinfo; - fontinfo2 = source.fontinfo2; - fontinfo_id_count = source.fontinfo_id_count; - fontinfo_id2_count = source.fontinfo_id2_count; - x_height = source.x_height; - caps_height = source.caps_height; - baseline_shift = source.baseline_shift; - guessed_x_ht = source.guessed_x_ht; - guessed_caps_ht = source.guessed_caps_ht; - reject_spaces = source.reject_spaces; - uch_set = source.uch_set; - tesseract = source.tesseract; -} - -// Initializes a blank (default constructed) WERD_RES from one that has -// already been recognized. -// Use SetupFor*Recognition afterwards to complete the setup and make -// it ready for a retry recognition. -void WERD_RES::InitForRetryRecognition(const WERD_RES& source) { - word = source.word; - CopySimpleFields(source); - if (source.blamer_bundle != nullptr) { - blamer_bundle = new BlamerBundle(); - blamer_bundle->CopyTruth(*source.blamer_bundle); - } -} - -// Sets up the members used in recognition: bln_boxes, chopped_word, -// seam_array, denorm. Returns false if -// the word is empty and sets up fake results. If use_body_size is -// true and row->body_size is set, then body_size will be used for -// blob normalization instead of xheight + ascrise. This flag is for -// those languages that are using CJK pitch model and thus it has to -// be true if and only if tesseract->textord_use_cjk_fp_model is -// true. -// If allow_detailed_fx is true, the feature extractor will receive fine -// precision outline information, allowing smoother features and better -// features on low resolution images. -// The norm_mode_hint sets the default mode for normalization in absence -// of any of the above flags. -// norm_box is used to override the word bounding box to determine the -// normalization scale and offset. -// Returns false if the word is empty and sets up fake results. -bool WERD_RES::SetupForRecognition(const UNICHARSET& unicharset_in, - tesseract::Tesseract* tess, Pix* pix, - int norm_mode, - const TBOX* norm_box, - bool numeric_mode, - bool use_body_size, - bool allow_detailed_fx, - ROW *row, const BLOCK* block) { - tesseract::OcrEngineMode norm_mode_hint = - static_cast(norm_mode); - tesseract = tess; - POLY_BLOCK* pb = block != nullptr ? block->pdblk.poly_block() : nullptr; - if ((norm_mode_hint != tesseract::OEM_LSTM_ONLY && - word->cblob_list()->empty()) || - (pb != nullptr && !pb->IsText())) { - // Empty words occur when all the blobs have been moved to the rej_blobs - // list, which seems to occur frequently in junk. - SetupFake(unicharset_in); - word->set_flag(W_REP_CHAR, false); - return false; - } - ClearResults(); - SetupWordScript(unicharset_in); - chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word); - float word_xheight = use_body_size && row != nullptr && row->body_size() > 0.0f - ? row->body_size() : x_height; - chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE), - word_xheight, baseline_shift, numeric_mode, - norm_mode_hint, norm_box, &denorm); - blob_row = row; - SetupBasicsFromChoppedWord(unicharset_in); - SetupBlamerBundle(); - int num_blobs = chopped_word->NumBlobs(); - ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks); - tess_failed = false; - return true; -} - -// Set up the seam array, bln_boxes, best_choice, and raw_choice to empty -// accumulators from a made chopped word. We presume the fields are already -// empty. -void WERD_RES::SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in) { - bln_boxes = tesseract::BoxWord::CopyFromNormalized(chopped_word); - start_seam_list(chopped_word, &seam_array); - SetupBlobWidthsAndGaps(); - ClearWordChoices(); -} - -// Sets up the members used in recognition for an empty recognition result: -// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice. -void WERD_RES::SetupFake(const UNICHARSET& unicharset_in) { - ClearResults(); - SetupWordScript(unicharset_in); - chopped_word = new TWERD; - rebuild_word = new TWERD; - bln_boxes = new tesseract::BoxWord; - box_word = new tesseract::BoxWord; - int blob_count = word->cblob_list()->length(); - if (blob_count > 0) { - BLOB_CHOICE** fake_choices = new BLOB_CHOICE*[blob_count]; - // For non-text blocks, just pass any blobs through to the box_word - // and call the word failed with a fake classification. - C_BLOB_IT b_it(word->cblob_list()); - int blob_id = 0; - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - TBOX box = b_it.data()->bounding_box(); - box_word->InsertBox(box_word->length(), box); - fake_choices[blob_id++] = new BLOB_CHOICE; - } - FakeClassifyWord(blob_count, fake_choices); - delete [] fake_choices; - } else { - WERD_CHOICE* word = new WERD_CHOICE(&unicharset_in); - word->make_bad(); - LogNewRawChoice(word); - // Ownership of word is taken by *this WERD_RES in LogNewCookedChoice. - LogNewCookedChoice(1, false, word); - } - tess_failed = true; - done = true; -} - -void WERD_RES::SetupWordScript(const UNICHARSET& uch) { - uch_set = &uch; - int script = uch.default_sid(); - word->set_script_id(script); - word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight()); - word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid()); -} - -// Sets up the blamer_bundle if it is not null, using the initialized denorm. -void WERD_RES::SetupBlamerBundle() { - if (blamer_bundle != nullptr) { - blamer_bundle->SetupNormTruthWord(denorm); - } -} - -// Computes the blob_widths and blob_gaps from the chopped_word. -void WERD_RES::SetupBlobWidthsAndGaps() { - blob_widths.truncate(0); - blob_gaps.truncate(0); - int num_blobs = chopped_word->NumBlobs(); - for (int b = 0; b < num_blobs; ++b) { - TBLOB *blob = chopped_word->blobs[b]; - TBOX box = blob->bounding_box(); - blob_widths.push_back(box.width()); - if (b + 1 < num_blobs) { - blob_gaps.push_back( - chopped_word->blobs[b + 1]->bounding_box().left() - box.right()); - } - } -} - -// Updates internal data to account for a new SEAM (chop) at the given -// blob_number. Fixes the ratings matrix and states in the choices, as well -// as the blob widths and gaps. -void WERD_RES::InsertSeam(int blob_number, SEAM* seam) { - // Insert the seam into the SEAMS array. - seam->PrepareToInsertSeam(seam_array, chopped_word->blobs, blob_number, true); - seam_array.insert(seam, blob_number); - if (ratings != nullptr) { - // Expand the ratings matrix. - ratings = ratings->ConsumeAndMakeBigger(blob_number); - // Fix all the segmentation states. - if (raw_choice != nullptr) - raw_choice->UpdateStateForSplit(blob_number); - WERD_CHOICE_IT wc_it(&best_choices); - for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) { - WERD_CHOICE* choice = wc_it.data(); - choice->UpdateStateForSplit(blob_number); - } - SetupBlobWidthsAndGaps(); - } -} - -// Returns true if all the word choices except the first have adjust_factors -// worse than the given threshold. -bool WERD_RES::AlternativeChoiceAdjustmentsWorseThan(float threshold) const { - // The choices are not changed by this iteration. - WERD_CHOICE_IT wc_it(const_cast(&best_choices)); - for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) { - WERD_CHOICE* choice = wc_it.data(); - if (choice->adjust_factor() <= threshold) - return false; - } - return true; -} - -// Returns true if the current word is ambiguous (by number of answers or -// by dangerous ambigs.) -bool WERD_RES::IsAmbiguous() { - return !best_choices.singleton() || best_choice->dangerous_ambig_found(); -} - -// Returns true if the ratings matrix size matches the sum of each of the -// segmentation states. -bool WERD_RES::StatesAllValid() { - int ratings_dim = ratings->dimension(); - if (raw_choice->TotalOfStates() != ratings_dim) { - tprintf("raw_choice has total of states = %d vs ratings dim of %d\n", - raw_choice->TotalOfStates(), ratings_dim); - return false; - } - WERD_CHOICE_IT it(&best_choices); - int index = 0; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) { - WERD_CHOICE* choice = it.data(); - if (choice->TotalOfStates() != ratings_dim) { - tprintf("Cooked #%d has total of states = %d vs ratings dim of %d\n", - index, choice->TotalOfStates(), ratings_dim); - return false; - } - } - return true; -} - -// Prints a list of words found if debug is true or the word result matches -// the word_to_debug. -void WERD_RES::DebugWordChoices(bool debug, const char* word_to_debug) { - if (debug || - (word_to_debug != nullptr && *word_to_debug != '\0' && best_choice != nullptr && - best_choice->unichar_string() == STRING(word_to_debug))) { - if (raw_choice != nullptr) - raw_choice->print("\nBest Raw Choice"); - - WERD_CHOICE_IT it(&best_choices); - int index = 0; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) { - WERD_CHOICE* choice = it.data(); - STRING label; - label.add_str_int("\nCooked Choice #", index); - choice->print(label.string()); - } - } -} - -// Prints the top choice along with the accepted/done flags. -void WERD_RES::DebugTopChoice(const char* msg) const { - tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ", - tess_accepted, tess_would_adapt, done); - if (best_choice == nullptr) - tprintf("\n"); - else - best_choice->print(msg); -} - -// Removes from best_choices all choices which are not within a reasonable -// range of the best choice. -// TODO(rays) incorporate the information used here into the params training -// re-ranker, in place of this heuristic that is based on the previous -// adjustment factor. -void WERD_RES::FilterWordChoices(int debug_level) { - if (best_choice == nullptr || best_choices.singleton()) - return; - - if (debug_level >= 2) - best_choice->print("\nFiltering against best choice"); - WERD_CHOICE_IT it(&best_choices); - int index = 0; - for (it.forward(); !it.at_first(); it.forward(), ++index) { - WERD_CHOICE* choice = it.data(); - float threshold = StopperAmbigThreshold(best_choice->adjust_factor(), - choice->adjust_factor()); - // i, j index the blob choice in choice, best_choice. - // chunk is an index into the chopped_word blobs (AKA chunks). - // Since the two words may use different segmentations of the chunks, we - // iterate over the chunks to find out whether a comparable blob - // classification is much worse than the best result. - int i = 0, j = 0, chunk = 0; - // Each iteration of the while deals with 1 chunk. On entry choice_chunk - // and best_chunk are the indices of the first chunk in the NEXT blob, - // i.e. we don't have to increment i, j while chunk < choice_chunk and - // best_chunk respectively. - int choice_chunk = choice->state(0), best_chunk = best_choice->state(0); - while (i < choice->length() && j < best_choice->length()) { - if (choice->unichar_id(i) != best_choice->unichar_id(j) && - choice->certainty(i) - best_choice->certainty(j) < threshold) { - if (debug_level >= 2) { - choice->print("WorstCertaintyDiffWorseThan"); - tprintf( - "i %d j %d Choice->Blob[i].Certainty %.4g" - " WorstOtherChoiceCertainty %g Threshold %g\n", - i, j, choice->certainty(i), best_choice->certainty(j), threshold); - tprintf("Discarding bad choice #%d\n", index); - } - delete it.extract(); - break; - } - ++chunk; - // If needed, advance choice_chunk to keep up with chunk. - while (choice_chunk < chunk && ++i < choice->length()) - choice_chunk += choice->state(i); - // If needed, advance best_chunk to keep up with chunk. - while (best_chunk < chunk && ++j < best_choice->length()) - best_chunk += best_choice->state(j); - } - } -} - -void WERD_RES::ComputeAdaptionThresholds(float certainty_scale, - float min_rating, - float max_rating, - float rating_margin, - float* thresholds) { - int chunk = 0; - int end_chunk = best_choice->state(0); - int end_raw_chunk = raw_choice->state(0); - int raw_blob = 0; - for (int i = 0; i < best_choice->length(); i++, thresholds++) { - float avg_rating = 0.0f; - int num_error_chunks = 0; - - // For each chunk in best choice blob i, count non-matching raw results. - while (chunk < end_chunk) { - if (chunk >= end_raw_chunk) { - ++raw_blob; - end_raw_chunk += raw_choice->state(raw_blob); - } - if (best_choice->unichar_id(i) != - raw_choice->unichar_id(raw_blob)) { - avg_rating += raw_choice->certainty(raw_blob); - ++num_error_chunks; - } - ++chunk; - } - - if (num_error_chunks > 0) { - avg_rating /= num_error_chunks; - *thresholds = (avg_rating / -certainty_scale) * (1.0 - rating_margin); - } else { - *thresholds = max_rating; - } - - if (*thresholds > max_rating) - *thresholds = max_rating; - if (*thresholds < min_rating) - *thresholds = min_rating; - } -} - -// Saves a copy of the word_choice if it has the best unadjusted rating. -// Returns true if the word_choice was the new best. -bool WERD_RES::LogNewRawChoice(WERD_CHOICE* word_choice) { - if (raw_choice == nullptr || word_choice->rating() < raw_choice->rating()) { - delete raw_choice; - raw_choice = new WERD_CHOICE(*word_choice); - raw_choice->set_permuter(TOP_CHOICE_PERM); - return true; - } - return false; -} - -// Consumes word_choice by adding it to best_choices, (taking ownership) if -// the certainty for word_choice is some distance of the best choice in -// best_choices, or by deleting the word_choice and returning false. -// The best_choices list is kept in sorted order by rating. Duplicates are -// removed, and the list is kept no longer than max_num_choices in length. -// Returns true if the word_choice is still a valid pointer. -bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, - WERD_CHOICE* word_choice) { - if (best_choice != nullptr) { - // Throw out obviously bad choices to save some work. - // TODO(rays) Get rid of this! This piece of code produces different - // results according to the order in which words are found, which is an - // undesirable behavior. It would be better to keep all the choices and - // prune them later when more information is available. - float max_certainty_delta = - StopperAmbigThreshold(best_choice->adjust_factor(), - word_choice->adjust_factor()); - if (max_certainty_delta > -kStopperAmbiguityThresholdOffset) - max_certainty_delta = -kStopperAmbiguityThresholdOffset; - if (word_choice->certainty() - best_choice->certainty() < - max_certainty_delta) { - if (debug) { - STRING bad_string; - word_choice->string_and_lengths(&bad_string, nullptr); - tprintf("Discarding choice \"%s\" with an overly low certainty" - " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n", - bad_string.string(), word_choice->certainty(), - best_choice->certainty(), - max_certainty_delta + best_choice->certainty()); - } - delete word_choice; - return false; - } - } - - // Insert in the list in order of increasing rating, but knock out worse - // string duplicates. - WERD_CHOICE_IT it(&best_choices); - const STRING& new_str = word_choice->unichar_string(); - bool inserted = false; - int num_choices = 0; - if (!it.empty()) { - do { - WERD_CHOICE* choice = it.data(); - if (choice->rating() > word_choice->rating() && !inserted) { - // Time to insert. - it.add_before_stay_put(word_choice); - inserted = true; - if (num_choices == 0) - best_choice = word_choice; // This is the new best. - ++num_choices; - } - if (choice->unichar_string() == new_str) { - if (inserted) { - // New is better. - delete it.extract(); - } else { - // Old is better. - if (debug) { - tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n", - new_str.string(), word_choice->rating(), choice->rating()); - } - delete word_choice; - return false; - } - } else { - ++num_choices; - if (num_choices > max_num_choices) - delete it.extract(); - } - it.forward(); - } while (!it.at_first()); - } - if (!inserted && num_choices < max_num_choices) { - it.add_to_end(word_choice); - inserted = true; - if (num_choices == 0) - best_choice = word_choice; // This is the new best. - } - if (debug) { - if (inserted) - tprintf("New %s", best_choice == word_choice ? "Best" : "Secondary"); - else - tprintf("Poor"); - word_choice->print(" Word Choice"); - } - if (!inserted) { - delete word_choice; - return false; - } - return true; -} - - -// Simple helper moves the ownership of the pointer data from src to dest, -// first deleting anything in dest, and nulling out src afterwards. -template static void MovePointerData(T** dest, T**src) { - delete *dest; - *dest = *src; - *src = nullptr; -} - -// Prints a brief list of all the best choices. -void WERD_RES::PrintBestChoices() const { - STRING alternates_str; - WERD_CHOICE_IT it(const_cast(&best_choices)); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - if (!it.at_first()) alternates_str += "\", \""; - alternates_str += it.data()->unichar_string(); - } - tprintf("Alternates for \"%s\": {\"%s\"}\n", - best_choice->unichar_string().string(), alternates_str.string()); -} - -// Returns the sum of the widths of the blob between start_blob and last_blob -// inclusive. -int WERD_RES::GetBlobsWidth(int start_blob, int last_blob) { - int result = 0; - for (int b = start_blob; b <= last_blob; ++b) { - result += blob_widths[b]; - if (b < last_blob) - result += blob_gaps[b]; - } - return result; -} -// Returns the width of a gap between the specified blob and the next one. -int WERD_RES::GetBlobsGap(int blob_index) { - if (blob_index < 0 || blob_index >= blob_gaps.size()) - return 0; - return blob_gaps[blob_index]; -} - -// Returns the BLOB_CHOICE corresponding to the given index in the -// best choice word taken from the appropriate cell in the ratings MATRIX. -// Borrowed pointer, so do not delete. May return nullptr if there is no -// BLOB_CHOICE matching the unichar_id at the given index. -BLOB_CHOICE* WERD_RES::GetBlobChoice(int index) const { - if (index < 0 || index >= best_choice->length()) return nullptr; - BLOB_CHOICE_LIST* choices = GetBlobChoices(index); - return FindMatchingChoice(best_choice->unichar_id(index), choices); -} - -// Returns the BLOB_CHOICE_LIST corresponding to the given index in the -// best choice word taken from the appropriate cell in the ratings MATRIX. -// Borrowed pointer, so do not delete. -BLOB_CHOICE_LIST* WERD_RES::GetBlobChoices(int index) const { - return best_choice->blob_choices(index, ratings); -} - -// Moves the results fields from word to this. This takes ownership of all -// the data, so src can be destructed. -void WERD_RES::ConsumeWordResults(WERD_RES* word) { - denorm = word->denorm; - blob_row = word->blob_row; - MovePointerData(&chopped_word, &word->chopped_word); - MovePointerData(&rebuild_word, &word->rebuild_word); - MovePointerData(&box_word, &word->box_word); - seam_array.delete_data_pointers(); - seam_array = word->seam_array; - word->seam_array.clear(); - best_state.move(&word->best_state); - correct_text.move(&word->correct_text); - blob_widths.move(&word->blob_widths); - blob_gaps.move(&word->blob_gaps); - if (ratings != nullptr) ratings->delete_matrix_pointers(); - MovePointerData(&ratings, &word->ratings); - best_choice = word->best_choice; - MovePointerData(&raw_choice, &word->raw_choice); - best_choices.clear(); - WERD_CHOICE_IT wc_it(&best_choices); - wc_it.add_list_after(&word->best_choices); - reject_map = word->reject_map; - if (word->blamer_bundle != nullptr) { - assert(blamer_bundle != nullptr); - blamer_bundle->CopyResults(*(word->blamer_bundle)); - } - CopySimpleFields(*word); -} - -// Replace the best choice and rebuild box word. -// choice must be from the current best_choices list. -void WERD_RES::ReplaceBestChoice(WERD_CHOICE* choice) { - best_choice = choice; - RebuildBestState(); - SetupBoxWord(); - // Make up a fake reject map of the right length to keep the - // rejection pass happy. - reject_map.initialise(best_state.length()); - done = tess_accepted = tess_would_adapt = true; - SetScriptPositions(); -} - -// Builds the rebuild_word and sets the best_state from the chopped_word and -// the best_choice->state. -void WERD_RES::RebuildBestState() { - ASSERT_HOST(best_choice != nullptr); - delete rebuild_word; - rebuild_word = new TWERD; - if (seam_array.empty()) - start_seam_list(chopped_word, &seam_array); - best_state.truncate(0); - int start = 0; - for (int i = 0; i < best_choice->length(); ++i) { - int length = best_choice->state(i); - best_state.push_back(length); - if (length > 1) { - SEAM::JoinPieces(seam_array, chopped_word->blobs, start, - start + length - 1); - } - TBLOB* blob = chopped_word->blobs[start]; - rebuild_word->blobs.push_back(new TBLOB(*blob)); - if (length > 1) { - SEAM::BreakPieces(seam_array, chopped_word->blobs, start, - start + length - 1); - } - start += length; - } -} - -// Copies the chopped_word to the rebuild_word, faking a best_state as well. -// Also sets up the output box_word. -void WERD_RES::CloneChoppedToRebuild() { - delete rebuild_word; - rebuild_word = new TWERD(*chopped_word); - SetupBoxWord(); - int word_len = box_word->length(); - best_state.reserve(word_len); - correct_text.reserve(word_len); - for (int i = 0; i < word_len; ++i) { - best_state.push_back(1); - correct_text.push_back(STRING("")); - } -} - -// Sets/replaces the box_word with one made from the rebuild_word. -void WERD_RES::SetupBoxWord() { - delete box_word; - rebuild_word->ComputeBoundingBoxes(); - box_word = tesseract::BoxWord::CopyFromNormalized(rebuild_word); - box_word->ClipToOriginalWord(denorm.block(), word); -} - -// Sets up the script positions in the output best_choice using the best_choice -// to get the unichars, and the unicharset to get the target positions. -void WERD_RES::SetScriptPositions() { - best_choice->SetScriptPositions(small_caps, chopped_word); -} -// Sets all the blobs in all the words (raw choice and best choices) to be -// the given position. (When a sub/superscript is recognized as a separate -// word, it falls victim to the rule that a whole word cannot be sub or -// superscript, so this function overrides that problem.) -void WERD_RES::SetAllScriptPositions(tesseract::ScriptPos position) { - raw_choice->SetAllScriptPositions(position); - WERD_CHOICE_IT wc_it(&best_choices); - for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) - wc_it.data()->SetAllScriptPositions(position); -} - -// Classifies the word with some already-calculated BLOB_CHOICEs. -// The choices are an array of blob_count pointers to BLOB_CHOICE, -// providing a single classifier result for each blob. -// The BLOB_CHOICEs are consumed and the word takes ownership. -// The number of blobs in the box_word must match blob_count. -void WERD_RES::FakeClassifyWord(int blob_count, BLOB_CHOICE** choices) { - // Setup the WERD_RES. - ASSERT_HOST(box_word != nullptr); - ASSERT_HOST(blob_count == box_word->length()); - ClearWordChoices(); - ClearRatings(); - ratings = new MATRIX(blob_count, 1); - for (int c = 0; c < blob_count; ++c) { - BLOB_CHOICE_LIST* choice_list = new BLOB_CHOICE_LIST; - BLOB_CHOICE_IT choice_it(choice_list); - choice_it.add_after_then_move(choices[c]); - ratings->put(c, c, choice_list); - } - FakeWordFromRatings(TOP_CHOICE_PERM); - reject_map.initialise(blob_count); - best_state.init_to_size(blob_count, 1); - done = true; -} - -// Creates a WERD_CHOICE for the word using the top choices from the leading -// diagonal of the ratings matrix. -void WERD_RES::FakeWordFromRatings(PermuterType permuter) { - int num_blobs = ratings->dimension(); - WERD_CHOICE* word_choice = new WERD_CHOICE(uch_set, num_blobs); - word_choice->set_permuter(permuter); - for (int b = 0; b < num_blobs; ++b) { - UNICHAR_ID unichar_id = UNICHAR_SPACE; - float rating = INT32_MAX; - float certainty = -INT32_MAX; - BLOB_CHOICE_LIST* choices = ratings->get(b, b); - if (choices != nullptr && !choices->empty()) { - BLOB_CHOICE_IT bc_it(choices); - BLOB_CHOICE* choice = bc_it.data(); - unichar_id = choice->unichar_id(); - rating = choice->rating(); - certainty = choice->certainty(); - } - word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating, - certainty); - } - LogNewRawChoice(word_choice); - // Ownership of word_choice taken by word here. - LogNewCookedChoice(1, false, word_choice); -} - -// Copies the best_choice strings to the correct_text for adaption/training. -void WERD_RES::BestChoiceToCorrectText() { - correct_text.clear(); - ASSERT_HOST(best_choice != nullptr); - for (int i = 0; i < best_choice->length(); ++i) { - UNICHAR_ID choice_id = best_choice->unichar_id(i); - const char* blob_choice = uch_set->id_to_unichar(choice_id); - correct_text.push_back(STRING(blob_choice)); - } -} - -// Merges 2 adjacent blobs in the result if the permanent callback -// class_cb returns other than INVALID_UNICHAR_ID, AND the permanent -// callback box_cb is nullptr or returns true, setting the merged blob -// result to the class returned from class_cb. -// Returns true if anything was merged. -bool WERD_RES::ConditionalBlobMerge( - TessResultCallback2* class_cb, - TessResultCallback2* box_cb) { - ASSERT_HOST(best_choice->length() == 0 || ratings != nullptr); - bool modified = false; - for (int i = 0; i + 1 < best_choice->length(); ++i) { - UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i), - best_choice->unichar_id(i+1)); - if (new_id != INVALID_UNICHAR_ID && - (box_cb == nullptr || box_cb->Run(box_word->BlobBox(i), - box_word->BlobBox(i + 1)))) { - // Raw choice should not be fixed. - best_choice->set_unichar_id(new_id, i); - modified = true; - MergeAdjacentBlobs(i); - const MATRIX_COORD& coord = best_choice->MatrixCoord(i); - if (!coord.Valid(*ratings)) { - ratings->IncreaseBandSize(coord.row + 1 - coord.col); - } - BLOB_CHOICE_LIST* blob_choices = GetBlobChoices(i); - if (FindMatchingChoice(new_id, blob_choices) == nullptr) { - // Insert a fake result. - BLOB_CHOICE* blob_choice = new BLOB_CHOICE; - blob_choice->set_unichar_id(new_id); - BLOB_CHOICE_IT bc_it(blob_choices); - bc_it.add_before_then_move(blob_choice); - } - } - } - delete class_cb; - delete box_cb; - return modified; -} - -// Merges 2 adjacent blobs in the result (index and index+1) and corrects -// all the data to account for the change. -void WERD_RES::MergeAdjacentBlobs(int index) { - if (reject_map.length() == best_choice->length()) - reject_map.remove_pos(index); - best_choice->remove_unichar_id(index + 1); - rebuild_word->MergeBlobs(index, index + 2); - box_word->MergeBoxes(index, index + 2); - if (index + 1 < best_state.length()) { - best_state[index] += best_state[index + 1]; - best_state.remove(index + 1); - } -} - -// TODO(tkielbus) Decide between keeping this behavior here or modifying the -// training data. - -// Utility function for fix_quotes -// Return true if the next character in the string (given the UTF8 length in -// bytes) is a quote character. -static int is_simple_quote(const char* signed_str, int length) { - const unsigned char* str = - reinterpret_cast(signed_str); - // Standard 1 byte quotes. - return (length == 1 && (*str == '\'' || *str == '`')) || - // UTF-8 3 bytes curved quotes. - (length == 3 && ((*str == 0xe2 && - *(str + 1) == 0x80 && - *(str + 2) == 0x98) || - (*str == 0xe2 && - *(str + 1) == 0x80 && - *(str + 2) == 0x99))); -} - -// Callback helper for fix_quotes returns a double quote if both -// arguments are quote, otherwise INVALID_UNICHAR_ID. -UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) { - const char *ch = uch_set->id_to_unichar(id1); - const char *next_ch = uch_set->id_to_unichar(id2); - if (is_simple_quote(ch, strlen(ch)) && - is_simple_quote(next_ch, strlen(next_ch))) - return uch_set->unichar_to_id("\""); - return INVALID_UNICHAR_ID; -} - -// Change pairs of quotes to double quotes. -void WERD_RES::fix_quotes() { - if (!uch_set->contains_unichar("\"") || - !uch_set->get_enabled(uch_set->unichar_to_id("\""))) - return; // Don't create it if it is disallowed. - - ConditionalBlobMerge( - NewPermanentTessCallback(this, &WERD_RES::BothQuotes), - nullptr); -} - -// Callback helper for fix_hyphens returns UNICHAR_ID of - if both -// arguments are hyphen, otherwise INVALID_UNICHAR_ID. -UNICHAR_ID WERD_RES::BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2) { - const char *ch = uch_set->id_to_unichar(id1); - const char *next_ch = uch_set->id_to_unichar(id2); - if (strlen(ch) == 1 && strlen(next_ch) == 1 && - (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~')) - return uch_set->unichar_to_id("-"); - return INVALID_UNICHAR_ID; -} - -// Callback helper for fix_hyphens returns true if box1 and box2 overlap -// (assuming both on the same textline, are in order and a chopped em dash.) -bool WERD_RES::HyphenBoxesOverlap(const TBOX& box1, const TBOX& box2) { - return box1.right() >= box2.left(); -} - -// Change pairs of hyphens to a single hyphen if the bounding boxes touch -// Typically a long dash which has been segmented. -void WERD_RES::fix_hyphens() { - if (!uch_set->contains_unichar("-") || - !uch_set->get_enabled(uch_set->unichar_to_id("-"))) - return; // Don't create it if it is disallowed. - - ConditionalBlobMerge( - NewPermanentTessCallback(this, &WERD_RES::BothHyphens), - NewPermanentTessCallback(this, &WERD_RES::HyphenBoxesOverlap)); -} - -// Callback helper for merge_tess_fails returns a space if both -// arguments are space, otherwise INVALID_UNICHAR_ID. -UNICHAR_ID WERD_RES::BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2) { - if (id1 == id2 && id1 == uch_set->unichar_to_id(" ")) - return id1; - else - return INVALID_UNICHAR_ID; -} - -// Change pairs of tess failures to a single one -void WERD_RES::merge_tess_fails() { - if (ConditionalBlobMerge( - NewPermanentTessCallback(this, &WERD_RES::BothSpaces), nullptr)) { - int len = best_choice->length(); - ASSERT_HOST(reject_map.length() == len); - ASSERT_HOST(box_word->length() == len); - } -} - -// Returns true if the collection of count pieces, starting at start, are all -// natural connected components, ie there are no real chops involved. -bool WERD_RES::PiecesAllNatural(int start, int count) const { - // all seams must have no splits. - for (int index = start; index < start + count - 1; ++index) { - if (index >= 0 && index < seam_array.size()) { - SEAM* seam = seam_array[index]; - if (seam != nullptr && seam->HasAnySplits()) return false; - } - } - return true; -} - - -WERD_RES::~WERD_RES () { - Clear(); -} - -void WERD_RES::InitNonPointers() { - tess_failed = false; - tess_accepted = false; - tess_would_adapt = false; - done = false; - unlv_crunch_mode = CR_NONE; - small_caps = false; - odd_size = false; - italic = FALSE; - bold = FALSE; - // The fontinfos and tesseract count as non-pointers as they point to - // data owned elsewhere. - fontinfo = nullptr; - fontinfo2 = nullptr; - tesseract = nullptr; - fontinfo_id_count = 0; - fontinfo_id2_count = 0; - x_height = 0.0; - caps_height = 0.0; - baseline_shift = 0.0f; - space_certainty = 0.0f; - guessed_x_ht = true; - guessed_caps_ht = true; - combination = false; - part_of_combo = false; - reject_spaces = false; -} - -void WERD_RES::InitPointers() { - word = nullptr; - bln_boxes = nullptr; - blob_row = nullptr; - uch_set = nullptr; - chopped_word = nullptr; - rebuild_word = nullptr; - box_word = nullptr; - ratings = nullptr; - best_choice = nullptr; - raw_choice = nullptr; - ep_choice = nullptr; - blamer_bundle = nullptr; -} - -void WERD_RES::Clear() { - if (combination) { - delete word; - } - word = nullptr; - delete blamer_bundle; - blamer_bundle = nullptr; - ClearResults(); -} - -void WERD_RES::ClearResults() { - done = false; - fontinfo = nullptr; - fontinfo2 = nullptr; - fontinfo_id_count = 0; - fontinfo_id2_count = 0; - delete bln_boxes; - bln_boxes = nullptr; - blob_row = nullptr; - delete chopped_word; - chopped_word = nullptr; - delete rebuild_word; - rebuild_word = nullptr; - delete box_word; - box_word = nullptr; - best_state.clear(); - correct_text.clear(); - seam_array.delete_data_pointers(); - seam_array.clear(); - blob_widths.clear(); - blob_gaps.clear(); - ClearRatings(); - ClearWordChoices(); - if (blamer_bundle != nullptr) blamer_bundle->ClearResults(); -} -void WERD_RES::ClearWordChoices() { - best_choice = nullptr; - delete raw_choice; - raw_choice = nullptr; - best_choices.clear(); - delete ep_choice; - ep_choice = nullptr; -} -void WERD_RES::ClearRatings() { - if (ratings != nullptr) { - ratings->delete_matrix_pointers(); - delete ratings; - ratings = nullptr; - } -} - - -bool PAGE_RES_IT::operator ==(const PAGE_RES_IT &other) const { - return word_res == other.word_res && - row_res == other.row_res && - block_res == other.block_res; -} - -int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const { - ASSERT_HOST(page_res == other.page_res); - if (other.block_res == nullptr) { - // other points to the end of the page. - if (block_res == nullptr) - return 0; - return -1; - } - if (block_res == nullptr) { - return 1; // we point to the end of the page. - } - if (block_res == other.block_res) { - if (other.row_res == nullptr || row_res == nullptr) { - // this should only happen if we hit an image block. - return 0; - } - if (row_res == other.row_res) { - // we point to the same block and row. - ASSERT_HOST(other.word_res != nullptr && word_res != nullptr); - if (word_res == other.word_res) { - // we point to the same word! - return 0; - } - - WERD_RES_IT word_res_it(&row_res->word_res_list); - for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list(); - word_res_it.forward()) { - if (word_res_it.data() == word_res) { - return -1; - } else if (word_res_it.data() == other.word_res) { - return 1; - } - } - ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == nullptr); - } - - // we both point to the same block, but different rows. - ROW_RES_IT row_res_it(&block_res->row_res_list); - for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list(); - row_res_it.forward()) { - if (row_res_it.data() == row_res) { - return -1; - } else if (row_res_it.data() == other.row_res) { - return 1; - } - } - ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == nullptr); - } - - // We point to different blocks. - BLOCK_RES_IT block_res_it(&page_res->block_res_list); - for (block_res_it.mark_cycle_pt(); - !block_res_it.cycled_list(); block_res_it.forward()) { - if (block_res_it.data() == block_res) { - return -1; - } else if (block_res_it.data() == other.block_res) { - return 1; - } - } - // Shouldn't happen... - ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == nullptr); - return 0; -} - -// Inserts the new_word as a combination owned by a corresponding WERD_RES -// before the current position. The simple fields of the WERD_RES are copied -// from clone_res and the resulting WERD_RES is returned for further setup -// with best_choice etc. -WERD_RES* PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES& clone_res, - WERD* new_word) { - // Make a WERD_RES for the new_word. - WERD_RES* new_res = new WERD_RES(new_word); - new_res->CopySimpleFields(clone_res); - new_res->combination = true; - // Insert into the appropriate place in the ROW_RES. - WERD_RES_IT wr_it(&row()->word_res_list); - for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) { - WERD_RES* word = wr_it.data(); - if (word == word_res) - break; - } - ASSERT_HOST(!wr_it.cycled_list()); - wr_it.add_before_then_move(new_res); - if (wr_it.at_first()) { - // This is the new first word, so reset the member iterator so it - // detects the cycled_list state correctly. - ResetWordIterator(); - } - return new_res; -} - -// Helper computes the boundaries between blobs in the word. The blob bounds -// are likely very poor, if they come from LSTM, where it only outputs the -// character at one pixel within it, so we find the midpoints between them. -static void ComputeBlobEnds(const WERD_RES& word, C_BLOB_LIST* next_word_blobs, - GenericVector* blob_ends) { - C_BLOB_IT blob_it(word.word->cblob_list()); - for (int i = 0; i < word.best_state.size(); ++i) { - int length = word.best_state[i]; - // Get the bounding box of the fake blobs - TBOX blob_box = blob_it.data()->bounding_box(); - blob_it.forward(); - for (int b = 1; b < length; ++b) { - blob_box += blob_it.data()->bounding_box(); - blob_it.forward(); - } - // This blob_box is crap, so for now we are only looking for the - // boundaries between them. - int blob_end = INT32_MAX; - if (!blob_it.at_first() || next_word_blobs != nullptr) { - if (blob_it.at_first()) - blob_it.set_to_list(next_word_blobs); - blob_end = (blob_box.right() + blob_it.data()->bounding_box().left()) / 2; - } - blob_ends->push_back(blob_end); - } -} - -// Replaces the current WERD/WERD_RES with the given words. The given words -// contain fake blobs that indicate the position of the characters. These are -// replaced with real blobs from the current word as much as possible. -void PAGE_RES_IT::ReplaceCurrentWord( - tesseract::PointerVector* words) { - if (words->empty()) { - DeleteCurrentWord(); - return; - } - WERD_RES* input_word = word(); - // Set the BOL/EOL flags on the words from the input word. - if (input_word->word->flag(W_BOL)) { - (*words)[0]->word->set_flag(W_BOL, true); - } else { - (*words)[0]->word->set_blanks(input_word->word->space()); - } - words->back()->word->set_flag(W_EOL, input_word->word->flag(W_EOL)); - - // Move the blobs from the input word to the new set of words. - // If the input word_res is a combination, then the replacements will also be - // combinations, and will own their own words. If the input word_res is not a - // combination, then the final replacements will not be either, (although it - // is allowed for the input words to be combinations) and their words - // will get put on the row list. This maintains the ownership rules. - WERD_IT w_it(row()->row->word_list()); - if (!input_word->combination) { - for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { - WERD* word = w_it.data(); - if (word == input_word->word) - break; - } - // w_it is now set to the input_word's word. - ASSERT_HOST(!w_it.cycled_list()); - } - // Insert into the appropriate place in the ROW_RES. - WERD_RES_IT wr_it(&row()->word_res_list); - for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) { - WERD_RES* word = wr_it.data(); - if (word == input_word) - break; - } - ASSERT_HOST(!wr_it.cycled_list()); - // Since we only have an estimate of the bounds between blobs, use the blob - // x-middle as the determiner of where to put the blobs - C_BLOB_IT src_b_it(input_word->word->cblob_list()); - src_b_it.sort(&C_BLOB::SortByXMiddle); - C_BLOB_IT rej_b_it(input_word->word->rej_cblob_list()); - rej_b_it.sort(&C_BLOB::SortByXMiddle); - for (int w = 0; w < words->size(); ++w) { - WERD_RES* word_w = (*words)[w]; - // Compute blob boundaries. - GenericVector blob_ends; - C_BLOB_LIST* next_word_blobs = - w + 1 < words->size() ? (*words)[w + 1]->word->cblob_list() : nullptr; - ComputeBlobEnds(*word_w, next_word_blobs, &blob_ends); - // Delete the fake blobs on the current word. - word_w->word->cblob_list()->clear(); - C_BLOB_IT dest_it(word_w->word->cblob_list()); - // Build the box word as we move the blobs. - tesseract::BoxWord* box_word = new tesseract::BoxWord; - for (int i = 0; i < blob_ends.size(); ++i) { - int end_x = blob_ends[i]; - TBOX blob_box; - // Add the blobs up to end_x. - while (!src_b_it.empty() && - src_b_it.data()->bounding_box().x_middle() < end_x) { - blob_box += src_b_it.data()->bounding_box(); - dest_it.add_after_then_move(src_b_it.extract()); - src_b_it.forward(); - } - while (!rej_b_it.empty() && - rej_b_it.data()->bounding_box().x_middle() < end_x) { - blob_box += rej_b_it.data()->bounding_box(); - dest_it.add_after_then_move(rej_b_it.extract()); - rej_b_it.forward(); - } - // Clip to the previously computed bounds. Although imperfectly accurate, - // it is good enough, and much more complicated to determine where else - // to clip. - if (i > 0 && blob_box.left() < blob_ends[i - 1]) - blob_box.set_left(blob_ends[i - 1]); - if (blob_box.right() > end_x) - blob_box.set_right(end_x); - box_word->InsertBox(i, blob_box); - } - // Fix empty boxes. If a very joined blob sits over multiple characters, - // then we will have some empty boxes from using the middle, so look for - // overlaps. - for (int i = 0; i < box_word->length(); ++i) { - TBOX box = box_word->BlobBox(i); - if (box.null_box()) { - // Nothing has its middle in the bounds of this blob, so use anything - // that overlaps. - for (dest_it.mark_cycle_pt(); !dest_it.cycled_list(); - dest_it.forward()) { - TBOX blob_box = dest_it.data()->bounding_box(); - if (blob_box.left() < blob_ends[i] && - (i == 0 || blob_box.right() >= blob_ends[i - 1])) { - if (i > 0 && blob_box.left() < blob_ends[i - 1]) - blob_box.set_left(blob_ends[i - 1]); - if (blob_box.right() > blob_ends[i]) - blob_box.set_right(blob_ends[i]); - box_word->ChangeBox(i, blob_box); - break; - } - } - } - } - delete word_w->box_word; - word_w->box_word = box_word; - if (!input_word->combination) { - // Insert word_w->word into the ROW. It doesn't own its word, so the - // ROW needs to own it. - w_it.add_before_stay_put(word_w->word); - word_w->combination = false; - } - (*words)[w] = nullptr; // We are taking ownership. - wr_it.add_before_stay_put(word_w); - } - // We have taken ownership of the words. - words->clear(); - // Delete the current word, which has been replaced. We could just call - // DeleteCurrentWord, but that would iterate both lists again, and we know - // we are already in the right place. - if (!input_word->combination) - delete w_it.extract(); - delete wr_it.extract(); - ResetWordIterator(); -} - -// Deletes the current WERD_RES and its underlying WERD. -void PAGE_RES_IT::DeleteCurrentWord() { - // Check that this word is as we expect. part_of_combos are NEVER iterated - // by the normal iterator, so we should never be trying to delete them. - ASSERT_HOST(!word_res->part_of_combo); - if (!word_res->combination) { - // Combinations own their own word, so we won't find the word on the - // row's word_list, but it is legitimate to try to delete them. - // Delete word from the ROW when not a combination. - WERD_IT w_it(row()->row->word_list()); - for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { - if (w_it.data() == word_res->word) { - break; - } - } - ASSERT_HOST(!w_it.cycled_list()); - delete w_it.extract(); - } - // Remove the WERD_RES for the new_word. - // Remove the WORD_RES from the ROW_RES. - WERD_RES_IT wr_it(&row()->word_res_list); - for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) { - if (wr_it.data() == word_res) { - word_res = nullptr; - break; - } - } - ASSERT_HOST(!wr_it.cycled_list()); - delete wr_it.extract(); - ResetWordIterator(); -} - -// Makes the current word a fuzzy space if not already fuzzy. Updates -// corresponding part of combo if required. -void PAGE_RES_IT::MakeCurrentWordFuzzy() { - WERD* real_word = word_res->word; - if (!real_word->flag(W_FUZZY_SP) && !real_word->flag(W_FUZZY_NON)) { - real_word->set_flag(W_FUZZY_SP, true); - if (word_res->combination) { - // The next word should be the corresponding part of combo, but we have - // already stepped past it, so find it by search. - WERD_RES_IT wr_it(&row()->word_res_list); - for (wr_it.mark_cycle_pt(); - !wr_it.cycled_list() && wr_it.data() != word_res; wr_it.forward()) { - } - wr_it.forward(); - ASSERT_HOST(wr_it.data()->part_of_combo); - real_word = wr_it.data()->word; - ASSERT_HOST(!real_word->flag(W_FUZZY_SP) && - !real_word->flag(W_FUZZY_NON)); - real_word->set_flag(W_FUZZY_SP, true); - } - } -} - -/************************************************************************* - * PAGE_RES_IT::restart_page - * - * Set things up at the start of the page - *************************************************************************/ - -WERD_RES *PAGE_RES_IT::start_page(bool empty_ok) { - block_res_it.set_to_list(&page_res->block_res_list); - block_res_it.mark_cycle_pt(); - prev_block_res = nullptr; - prev_row_res = nullptr; - prev_word_res = nullptr; - block_res = nullptr; - row_res = nullptr; - word_res = nullptr; - next_block_res = nullptr; - next_row_res = nullptr; - next_word_res = nullptr; - internal_forward(true, empty_ok); - return internal_forward(false, empty_ok); -} - -// Recovers from operations on the current word, such as in InsertCloneWord -// and DeleteCurrentWord. -// Resets the word_res_it so that it is one past the next_word_res, as -// it should be after internal_forward. If next_row_res != row_res, -// then the next_word_res is in the next row, so there is no need to do -// anything to word_res_it, but it is still a good idea to reset the pointers -// word_res and prev_word_res, which are still in the current row. -void PAGE_RES_IT::ResetWordIterator() { - if (row_res == next_row_res) { - // Reset the member iterator so it can move forward and detect the - // cycled_list state correctly. - word_res_it.move_to_first(); - for (word_res_it.mark_cycle_pt(); - !word_res_it.cycled_list() && word_res_it.data() != next_word_res; - word_res_it.forward()) { - if (!word_res_it.data()->part_of_combo) { - if (prev_row_res == row_res) prev_word_res = word_res; - word_res = word_res_it.data(); - } - } - ASSERT_HOST(!word_res_it.cycled_list()); - word_res_it.forward(); - } else { - // word_res_it is OK, but reset word_res and prev_word_res if needed. - WERD_RES_IT wr_it(&row_res->word_res_list); - for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) { - if (!wr_it.data()->part_of_combo) { - if (prev_row_res == row_res) prev_word_res = word_res; - word_res = wr_it.data(); - } - } - } -} - -/************************************************************************* - * PAGE_RES_IT::internal_forward - * - * Find the next word on the page. If empty_ok is true, then non-text blocks - * and text blocks with no text are visited as if they contain a single - * imaginary word in a single imaginary row. (word() and row() both return nullptr - * in such a block and the return value is nullptr.) - * If empty_ok is false, the old behaviour is maintained. Each real word - * is visited and empty and non-text blocks and rows are skipped. - * new_block is used to initialize the iterators for a new block. - * The iterator maintains pointers to block, row and word for the previous, - * current and next words. These are correct, regardless of block/row - * boundaries. nullptr values denote start and end of the page. - *************************************************************************/ - -WERD_RES *PAGE_RES_IT::internal_forward(bool new_block, bool empty_ok) { - bool new_row = false; - - prev_block_res = block_res; - prev_row_res = row_res; - prev_word_res = word_res; - block_res = next_block_res; - row_res = next_row_res; - word_res = next_word_res; - next_block_res = nullptr; - next_row_res = nullptr; - next_word_res = nullptr; - - while (!block_res_it.cycled_list()) { - if (new_block) { - new_block = false; - row_res_it.set_to_list(&block_res_it.data()->row_res_list); - row_res_it.mark_cycle_pt(); - if (row_res_it.empty() && empty_ok) { - next_block_res = block_res_it.data(); - break; - } - new_row = true; - } - while (!row_res_it.cycled_list()) { - if (new_row) { - new_row = false; - word_res_it.set_to_list(&row_res_it.data()->word_res_list); - word_res_it.mark_cycle_pt(); - } - // Skip any part_of_combo words. - while (!word_res_it.cycled_list() && word_res_it.data()->part_of_combo) - word_res_it.forward(); - if (!word_res_it.cycled_list()) { - next_block_res = block_res_it.data(); - next_row_res = row_res_it.data(); - next_word_res = word_res_it.data(); - word_res_it.forward(); - goto foundword; - } - // end of row reached - row_res_it.forward(); - new_row = true; - } - // end of block reached - block_res_it.forward(); - new_block = true; - } - foundword: - // Update prev_word_best_choice pointer. - if (page_res != nullptr && page_res->prev_word_best_choice != nullptr) { - *page_res->prev_word_best_choice = - (new_block || prev_word_res == nullptr) ? nullptr : prev_word_res->best_choice; - } - return word_res; -} - -/************************************************************************* - * PAGE_RES_IT::restart_row() - * - * Move to the beginning (leftmost word) of the current row. - *************************************************************************/ -WERD_RES *PAGE_RES_IT::restart_row() { - ROW_RES *row = this->row(); - if (!row) return nullptr; - for (restart_page(); this->row() != row; forward()) { - // pass - } - return word(); -} - -/************************************************************************* - * PAGE_RES_IT::forward_paragraph - * - * Move to the beginning of the next paragraph, allowing empty blocks. - *************************************************************************/ - -WERD_RES *PAGE_RES_IT::forward_paragraph() { - while (block_res == next_block_res && - (next_row_res != nullptr && next_row_res->row != nullptr && - row_res->row->para() == next_row_res->row->para())) { - internal_forward(false, true); - } - return internal_forward(false, true); -} - -/************************************************************************* - * PAGE_RES_IT::forward_block - * - * Move to the beginning of the next block, allowing empty blocks. - *************************************************************************/ - -WERD_RES *PAGE_RES_IT::forward_block() { - while (block_res == next_block_res) { - internal_forward(false, true); - } - return internal_forward(false, true); -} - -void PAGE_RES_IT::rej_stat_word() { - int16_t chars_in_word; - int16_t rejects_in_word = 0; - - chars_in_word = word_res->reject_map.length (); - page_res->char_count += chars_in_word; - block_res->char_count += chars_in_word; - row_res->char_count += chars_in_word; - - rejects_in_word = word_res->reject_map.reject_count (); - - page_res->rej_count += rejects_in_word; - block_res->rej_count += rejects_in_word; - row_res->rej_count += rejects_in_word; - if (chars_in_word == rejects_in_word) - row_res->whole_word_rej_count += rejects_in_word; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/pageres.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/pageres.h deleted file mode 100644 index 6aea3283..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/pageres.h +++ /dev/null @@ -1,791 +0,0 @@ -/********************************************************************** - * File: pageres.h (Formerly page_res.h) - * Description: Results classes used by control.c - * Author: Phil Cheatle - * Created: Tue Sep 22 08:42:49 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef PAGERES_H -#define PAGERES_H - -#include // for int32_t, int16_t -#include // for std::pair -#include // for std::vector -#include // for int8_t -#include "blamer.h" // for BlamerBundle (ptr only), IRR_NUM_REASONS -#include "clst.h" // for CLIST_ITERATOR, CLISTIZEH -#include "elst.h" // for ELIST_ITERATOR, ELIST_LINK, ELISTIZEH -#include "genericvector.h" // for GenericVector, PointerVector (ptr only) -#include "matrix.h" // for MATRIX -#include "normalis.h" // for DENORM -#include "ratngs.h" // for WERD_CHOICE, BLOB_CHOICE (ptr only) -#include "rect.h" // for TBOX -#include "rejctmap.h" // for REJMAP -#include "strngs.h" // for STRING -#include "unichar.h" // for UNICHAR_ID, INVALID_UNICHAR_ID -#include "unicharset.h" // for UNICHARSET, UNICHARSET::Direction, UNI... -#include "werd.h" // for WERD, W_BOL, W_EOL - -class BLOCK; -class BLOCK_LIST; -class BLOCK_RES; -class ROW; -class ROW_RES; -class SEAM; -class WERD_RES; - -struct Pix; -struct TWERD; - -template class TessResultCallback2; - -namespace tesseract { - class BoxWord; - class Tesseract; - struct FontInfo; -} -using tesseract::FontInfo; - -/* Forward declarations */ - -class BLOCK_RES; - -ELISTIZEH (BLOCK_RES) CLISTIZEH (BLOCK_RES) -class -ROW_RES; - -ELISTIZEH (ROW_RES) -class WERD_RES; - -ELISTIZEH (WERD_RES) - -/************************************************************************* - * PAGE_RES - Page results - *************************************************************************/ -class PAGE_RES { // page result - public: - int32_t char_count; - int32_t rej_count; - BLOCK_RES_LIST block_res_list; - bool rejected; - // Updated every time PAGE_RES_IT iterating on this PAGE_RES moves to - // the next word. This pointer is not owned by PAGE_RES class. - WERD_CHOICE **prev_word_best_choice; - // Sums of blame reasons computed by the blamer. - GenericVector blame_reasons; - // Debug information about all the misadaptions on this page. - // Each BlamerBundle contains an index into this vector, so that words that - // caused misadaption could be marked. However, since words could be - // deleted/split/merged, the log is stored on the PAGE_RES level. - GenericVector misadaption_log; - - inline void Init() { - char_count = 0; - rej_count = 0; - rejected = false; - prev_word_best_choice = nullptr; - blame_reasons.init_to_size(IRR_NUM_REASONS, 0); - } - - PAGE_RES() { Init(); } // empty constructor - - PAGE_RES(bool merge_similar_words, - BLOCK_LIST *block_list, // real blocks - WERD_CHOICE **prev_word_best_choice_ptr); - - ~PAGE_RES () = default; -}; - -/************************************************************************* - * BLOCK_RES - Block results - *************************************************************************/ - -class BLOCK_RES:public ELIST_LINK { - public: - BLOCK * block; // real block - int32_t char_count; // chars in block - int32_t rej_count; // rejected chars - int16_t font_class; // - int16_t row_count; - float x_height; - bool font_assigned; // block already - // processed - bool bold; // all bold - bool italic; // all italic - - ROW_RES_LIST row_res_list; - - BLOCK_RES() = default; - - BLOCK_RES(bool merge_similar_words, BLOCK *the_block); // real block - - ~BLOCK_RES () = default; -}; - -/************************************************************************* - * ROW_RES - Row results - *************************************************************************/ - -class ROW_RES:public ELIST_LINK { - public: - ROW * row; // real row - int32_t char_count; // chars in block - int32_t rej_count; // rejected chars - int32_t whole_word_rej_count; // rejs in total rej wds - WERD_RES_LIST word_res_list; - - ROW_RES() = default; - - ROW_RES(bool merge_similar_words, ROW *the_row); // real row - - ~ROW_RES() = default; -}; - -/************************************************************************* - * WERD_RES - Word results - *************************************************************************/ -enum CRUNCH_MODE -{ - CR_NONE, - CR_KEEP_SPACE, - CR_LOOSE_SPACE, - CR_DELETE -}; - -// WERD_RES is a collection of publicly accessible members that gathers -// information about a word result. -class WERD_RES : public ELIST_LINK { - public: - // Which word is which? - // There are 3 coordinate spaces in use here: a possibly rotated pixel space, - // the original image coordinate space, and the BLN space in which the - // baseline of a word is at kBlnBaselineOffset, the xheight is kBlnXHeight, - // and the x-middle of the word is at 0. - // In the rotated pixel space, coordinates correspond to the input image, - // but may be rotated about the origin by a multiple of 90 degrees, - // and may therefore be negative. - // In any case a rotation by denorm.block()->re_rotation() will take them - // back to the original image. - // The other differences between words all represent different stages of - // processing during recognition. - - // ---------------------------INPUT------------------------------------- - - // The word is the input C_BLOBs in the rotated pixel space. - // word is NOT owned by the WERD_RES unless combination is true. - // All the other word pointers ARE owned by the WERD_RES. - WERD* word; // Input C_BLOB word. - - // -------------SETUP BY SetupFor*Recognition---READONLY-INPUT------------ - - // The bln_boxes contains the bounding boxes (only) of the input word, in the - // BLN space. The lengths of word and bln_boxes - // match as they are both before any chopping. - // TODO(rays) determine if docqual does anything useful and delete bln_boxes - // if it doesn't. - tesseract::BoxWord* bln_boxes; // BLN input bounding boxes. - // The ROW that this word sits in. NOT owned by the WERD_RES. - ROW* blob_row; - // The denorm provides the transformation to get back to the rotated image - // coords from the chopped_word/rebuild_word BLN coords, but each blob also - // has its own denorm. - DENORM denorm; // For use on chopped_word. - // Unicharset used by the classifier output in best_choice and raw_choice. - const UNICHARSET* uch_set; // For converting back to utf8. - - // ----Initialized by SetupFor*Recognition---BUT OUTPUT FROM RECOGNITION---- - // ----Setup to a (different!) state expected by the various classifiers---- - // TODO(rays) Tidy and make more consistent. - - // The chopped_word is also in BLN space, and represents the fully chopped - // character fragments that make up the word. - // The length of chopped_word matches length of seam_array + 1 (if set). - TWERD* chopped_word; // BLN chopped fragments output. - // Vector of SEAM* holding chopping points matching chopped_word. - GenericVector seam_array; - // Widths of blobs in chopped_word. - GenericVector blob_widths; - // Gaps between blobs in chopped_word. blob_gaps[i] is the gap between - // blob i and blob i+1. - GenericVector blob_gaps; - // Stores the lstm choices of every timestep - std::vector>> timesteps; - // Ratings matrix contains classifier choices for each classified combination - // of blobs. The dimension is the same as the number of blobs in chopped_word - // and the leading diagonal corresponds to classifier results of the blobs - // in chopped_word. The state_ members of best_choice, raw_choice and - // best_choices all correspond to this ratings matrix and allow extraction - // of the blob choices for any given WERD_CHOICE. - MATRIX* ratings; // Owned pointer. - // Pointer to the first WERD_CHOICE in best_choices. This is the result that - // will be output from Tesseract. Note that this is now a borrowed pointer - // and should NOT be deleted. - WERD_CHOICE* best_choice; // Borrowed pointer. - // The best raw_choice found during segmentation search. Differs from the - // best_choice by being the best result according to just the character - // classifier, not taking any language model information into account. - // Unlike best_choice, the pointer IS owned by this WERD_RES. - WERD_CHOICE* raw_choice; // Owned pointer. - // Alternative results found during chopping/segmentation search stages. - // Note that being an ELIST, best_choices owns the WERD_CHOICEs. - WERD_CHOICE_LIST best_choices; - - // Truth bounding boxes, text and incorrect choice reason. - BlamerBundle *blamer_bundle; - - // --------------OUTPUT FROM RECOGNITION------------------------------- - // --------------Not all fields are necessarily set.------------------- - // ---best_choice, raw_choice *must* end up set, with a box_word------- - // ---In complete output, the number of blobs in rebuild_word matches--- - // ---the number of boxes in box_word, the number of unichar_ids in--- - // ---best_choice, the number of ints in best_state, and the number--- - // ---of strings in correct_text-------------------------------------- - // ---SetupFake Sets everything to appropriate values if the word is--- - // ---known to be bad before recognition.------------------------------ - - // The rebuild_word is also in BLN space, but represents the final best - // segmentation of the word. Its length is therefore the same as box_word. - TWERD* rebuild_word; // BLN best segmented word. - // The box_word is in the original image coordinate space. It is the - // bounding boxes of the rebuild_word, after denormalization. - // The length of box_word matches rebuild_word, best_state (if set) and - // correct_text (if set), as well as best_choice and represents the - // number of classified units in the output. - tesseract::BoxWord* box_word; // Denormalized output boxes. - // The best_state stores the relationship between chopped_word and - // rebuild_word. Each blob[i] in rebuild_word is composed of best_state[i] - // adjacent blobs in chopped_word. The seams in seam_array are hidden - // within a rebuild_word blob and revealed between them. - GenericVector best_state; // Number of blobs in each best blob. - // The correct_text is used during training and adaption to carry the - // text to the training system without the need for a unicharset. There - // is one entry in the vector for each blob in rebuild_word and box_word. - GenericVector correct_text; - // The Tesseract that was used to recognize this word. Just a borrowed - // pointer. Note: Tesseract's class definition is in a higher-level library. - // We avoid introducing a cyclic dependency by not using the Tesseract - // within WERD_RES. We are just storing it to provide access to it - // for the top-level multi-language controller, and maybe for output of - // the recognized language. - tesseract::Tesseract* tesseract; - - // Less-well documented members. - // TODO(rays) Add more documentation here. - WERD_CHOICE *ep_choice; // ep text TODO(rays) delete this. - REJMAP reject_map; // best_choice rejects - bool tess_failed; - /* - If tess_failed is TRUE, one of the following tests failed when Tess - returned: - - The outword blob list was not the same length as the best_choice string; - - The best_choice string contained ALL blanks; - - The best_choice string was zero length - */ - bool tess_accepted; // Tess thinks its ok? - bool tess_would_adapt; // Tess would adapt? - bool done; // ready for output? - bool small_caps; // word appears to be small caps - bool odd_size; // word is bigger than line or leader dots. - int8_t italic; - int8_t bold; - // The fontinfos are pointers to data owned by the classifier. - const FontInfo* fontinfo; - const FontInfo* fontinfo2; - int8_t fontinfo_id_count; // number of votes - int8_t fontinfo_id2_count; // number of votes - bool guessed_x_ht; - bool guessed_caps_ht; - CRUNCH_MODE unlv_crunch_mode; - float x_height; // post match estimate - float caps_height; // post match estimate - float baseline_shift; // post match estimate. - // Certainty score for the spaces either side of this word (LSTM mode). - // MIN this value with the actual word certainty. - float space_certainty; - - /* - To deal with fuzzy spaces we need to be able to combine "words" to form - combinations when we suspect that the gap is a non-space. The (new) text - ord code generates separate words for EVERY fuzzy gap - flags in the word - indicate whether the gap is below the threshold (fuzzy kern) and is thus - NOT a real word break by default, or above the threshold (fuzzy space) and - this is a real word break by default. - - The WERD_RES list contains all these words PLUS "combination" words built - out of (copies of) the words split by fuzzy kerns. The separate parts have - their "part_of_combo" flag set true and should be IGNORED on a default - reading of the list. - - Combination words are FOLLOWED by the sequence of part_of_combo words - which they combine. - */ - bool combination; //of two fuzzy gap wds - bool part_of_combo; //part of a combo - bool reject_spaces; //Reject spacing? - - WERD_RES() { - InitNonPointers(); - InitPointers(); - } - WERD_RES(WERD *the_word) { - InitNonPointers(); - InitPointers(); - word = the_word; - } - // Deep copies everything except the ratings MATRIX. - // To get that use deep_copy below. - WERD_RES(const WERD_RES& source) : ELIST_LINK(source) { - InitPointers(); - *this = source; // see operator= - } - - ~WERD_RES(); - - // Returns the UTF-8 string for the given blob index in the best_choice word, - // given that we know whether we are in a right-to-left reading context. - // This matters for mirrorable characters such as parentheses. We recognize - // characters purely based on their shape on the page, and by default produce - // the corresponding unicode for a left-to-right context. - const char* BestUTF8(int blob_index, bool in_rtl_context) const { - if (blob_index < 0 || best_choice == nullptr || - blob_index >= best_choice->length()) - return nullptr; - UNICHAR_ID id = best_choice->unichar_id(blob_index); - if (id < 0 || id >= uch_set->size()) - return nullptr; - UNICHAR_ID mirrored = uch_set->get_mirror(id); - if (in_rtl_context && mirrored > 0) - id = mirrored; - return uch_set->id_to_unichar_ext(id); - } - // Returns the UTF-8 string for the given blob index in the raw_choice word. - const char* RawUTF8(int blob_index) const { - if (blob_index < 0 || blob_index >= raw_choice->length()) - return nullptr; - UNICHAR_ID id = raw_choice->unichar_id(blob_index); - if (id < 0 || id >= uch_set->size()) - return nullptr; - return uch_set->id_to_unichar(id); - } - - UNICHARSET::Direction SymbolDirection(int blob_index) const { - if (best_choice == nullptr || - blob_index >= best_choice->length() || - blob_index < 0) - return UNICHARSET::U_OTHER_NEUTRAL; - return uch_set->get_direction(best_choice->unichar_id(blob_index)); - } - - bool AnyRtlCharsInWord() const { - if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) - return false; - for (int id = 0; id < best_choice->length(); id++) { - int unichar_id = best_choice->unichar_id(id); - if (unichar_id < 0 || unichar_id >= uch_set->size()) - continue; // Ignore illegal chars. - UNICHARSET::Direction dir = - uch_set->get_direction(unichar_id); - if (dir == UNICHARSET::U_RIGHT_TO_LEFT || - dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC || - dir == UNICHARSET::U_ARABIC_NUMBER) - return true; - } - return false; - } - - bool AnyLtrCharsInWord() const { - if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) - return false; - for (int id = 0; id < best_choice->length(); id++) { - int unichar_id = best_choice->unichar_id(id); - if (unichar_id < 0 || unichar_id >= uch_set->size()) - continue; // Ignore illegal chars. - UNICHARSET::Direction dir = uch_set->get_direction(unichar_id); - if (dir == UNICHARSET::U_LEFT_TO_RIGHT) - return true; - } - return false; - } - - // Return whether the blobs in this WERD_RES 0, 1,... come from an engine - // that gave us the unichars in reading order (as opposed to strict left - // to right). - bool UnicharsInReadingOrder() const { - return best_choice->unichars_in_script_order(); - } - - void InitNonPointers(); - void InitPointers(); - void Clear(); - void ClearResults(); - void ClearWordChoices(); - void ClearRatings(); - - // Deep copies everything except the ratings MATRIX. - // To get that use deep_copy below. - WERD_RES& operator=(const WERD_RES& source); //from this - - void CopySimpleFields(const WERD_RES& source); - - // Initializes a blank (default constructed) WERD_RES from one that has - // already been recognized. - // Use SetupFor*Recognition afterwards to complete the setup and make - // it ready for a retry recognition. - void InitForRetryRecognition(const WERD_RES& source); - - // Sets up the members used in recognition: bln_boxes, chopped_word, - // seam_array, denorm. Returns false if - // the word is empty and sets up fake results. If use_body_size is - // true and row->body_size is set, then body_size will be used for - // blob normalization instead of xheight + ascrise. This flag is for - // those languages that are using CJK pitch model and thus it has to - // be true if and only if tesseract->textord_use_cjk_fp_model is - // true. - // If allow_detailed_fx is true, the feature extractor will receive fine - // precision outline information, allowing smoother features and better - // features on low resolution images. - // The norm_mode sets the default mode for normalization in absence - // of any of the above flags. It should really be a tesseract::OcrEngineMode - // but is declared as int for ease of use with tessedit_ocr_engine_mode. - // Returns false if the word is empty and sets up fake results. - bool SetupForRecognition(const UNICHARSET& unicharset_in, - tesseract::Tesseract* tesseract, Pix* pix, - int norm_mode, - const TBOX* norm_box, bool numeric_mode, - bool use_body_size, bool allow_detailed_fx, - ROW *row, const BLOCK* block); - - // Set up the seam array, bln_boxes, best_choice, and raw_choice to empty - // accumulators from a made chopped word. We presume the fields are already - // empty. - void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in); - - // Sets up the members used in recognition for an empty recognition result: - // bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice. - void SetupFake(const UNICHARSET& uch); - - // Set the word as having the script of the input unicharset. - void SetupWordScript(const UNICHARSET& unicharset_in); - - // Sets up the blamer_bundle if it is not null, using the initialized denorm. - void SetupBlamerBundle(); - - // Computes the blob_widths and blob_gaps from the chopped_word. - void SetupBlobWidthsAndGaps(); - - // Updates internal data to account for a new SEAM (chop) at the given - // blob_number. Fixes the ratings matrix and states in the choices, as well - // as the blob widths and gaps. - void InsertSeam(int blob_number, SEAM* seam); - - // Returns true if all the word choices except the first have adjust_factors - // worse than the given threshold. - bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const; - - // Returns true if the current word is ambiguous (by number of answers or - // by dangerous ambigs.) - bool IsAmbiguous(); - - // Returns true if the ratings matrix size matches the sum of each of the - // segmentation states. - bool StatesAllValid(); - - // Prints a list of words found if debug is true or the word result matches - // the word_to_debug. - void DebugWordChoices(bool debug, const char* word_to_debug); - - // Prints the top choice along with the accepted/done flags. - void DebugTopChoice(const char* msg) const; - - // Removes from best_choices all choices which are not within a reasonable - // range of the best choice. - void FilterWordChoices(int debug_level); - - // Computes a set of distance thresholds used to control adaption. - // Compares the best choice for the current word to the best raw choice - // to determine which characters were classified incorrectly by the - // classifier. Then places a separate threshold into thresholds for each - // character in the word. If the classifier was correct, max_rating is placed - // into thresholds. If the classifier was incorrect, the mean match rating - // (error percentage) of the classifier's incorrect choice minus some margin - // is placed into thresholds. This can then be used by the caller to try to - // create a new template for the desired class that will classify the - // character with a rating better than the threshold value. The match rating - // placed into thresholds is never allowed to be below min_rating in order to - // prevent trying to make overly tight templates. - // min_rating limits how tight to make a template. - // max_rating limits how loose to make a template. - // rating_margin denotes the amount of margin to put in template. - void ComputeAdaptionThresholds(float certainty_scale, - float min_rating, - float max_rating, - float rating_margin, - float* thresholds); - - // Saves a copy of the word_choice if it has the best unadjusted rating. - // Returns true if the word_choice was the new best. - bool LogNewRawChoice(WERD_CHOICE* word_choice); - // Consumes word_choice by adding it to best_choices, (taking ownership) if - // the certainty for word_choice is some distance of the best choice in - // best_choices, or by deleting the word_choice and returning false. - // The best_choices list is kept in sorted order by rating. Duplicates are - // removed, and the list is kept no longer than max_num_choices in length. - // Returns true if the word_choice is still a valid pointer. - bool LogNewCookedChoice(int max_num_choices, bool debug, - WERD_CHOICE* word_choice); - - // Prints a brief list of all the best choices. - void PrintBestChoices() const; - - // Returns the sum of the widths of the blob between start_blob and last_blob - // inclusive. - int GetBlobsWidth(int start_blob, int last_blob); - // Returns the width of a gap between the specified blob and the next one. - int GetBlobsGap(int blob_index); - - // Returns the BLOB_CHOICE corresponding to the given index in the - // best choice word taken from the appropriate cell in the ratings MATRIX. - // Borrowed pointer, so do not delete. May return nullptr if there is no - // BLOB_CHOICE matching the unichar_id at the given index. - BLOB_CHOICE* GetBlobChoice(int index) const; - - // Returns the BLOB_CHOICE_LIST corresponding to the given index in the - // best choice word taken from the appropriate cell in the ratings MATRIX. - // Borrowed pointer, so do not delete. - BLOB_CHOICE_LIST* GetBlobChoices(int index) const; - - // Moves the results fields from word to this. This takes ownership of all - // the data, so src can be destructed. - // word1.ConsumeWordResult(word); - // delete word; - // is simpler and faster than: - // word1 = *word; - // delete word; - // as it doesn't need to copy and reallocate anything. - void ConsumeWordResults(WERD_RES* word); - - // Replace the best choice and rebuild box word. - // choice must be from the current best_choices list. - void ReplaceBestChoice(WERD_CHOICE* choice); - - // Builds the rebuild_word and sets the best_state from the chopped_word and - // the best_choice->state. - void RebuildBestState(); - - // Copies the chopped_word to the rebuild_word, faking a best_state as well. - // Also sets up the output box_word. - void CloneChoppedToRebuild(); - - // Sets/replaces the box_word with one made from the rebuild_word. - void SetupBoxWord(); - - // Sets up the script positions in the best_choice using the best_choice - // to get the unichars, and the unicharset to get the target positions. - void SetScriptPositions(); - // Sets all the blobs in all the words (best choice and alternates) to be - // the given position. (When a sub/superscript is recognized as a separate - // word, it falls victim to the rule that a whole word cannot be sub or - // superscript, so this function overrides that problem.) - void SetAllScriptPositions(tesseract::ScriptPos position); - - // Classifies the word with some already-calculated BLOB_CHOICEs. - // The choices are an array of blob_count pointers to BLOB_CHOICE, - // providing a single classifier result for each blob. - // The BLOB_CHOICEs are consumed and the word takes ownership. - // The number of blobs in the box_word must match blob_count. - void FakeClassifyWord(int blob_count, BLOB_CHOICE** choices); - - // Creates a WERD_CHOICE for the word using the top choices from the leading - // diagonal of the ratings matrix. - void FakeWordFromRatings(PermuterType permuter); - - // Copies the best_choice strings to the correct_text for adaption/training. - void BestChoiceToCorrectText(); - - // Merges 2 adjacent blobs in the result if the permanent callback - // class_cb returns other than INVALID_UNICHAR_ID, AND the permanent - // callback box_cb is nullptr or returns true, setting the merged blob - // result to the class returned from class_cb. - // Returns true if anything was merged. - bool ConditionalBlobMerge( - TessResultCallback2* class_cb, - TessResultCallback2* box_cb); - - // Merges 2 adjacent blobs in the result (index and index+1) and corrects - // all the data to account for the change. - void MergeAdjacentBlobs(int index); - - // Callback helper for fix_quotes returns a double quote if both - // arguments are quote, otherwise INVALID_UNICHAR_ID. - UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2); - void fix_quotes(); - - // Callback helper for fix_hyphens returns UNICHAR_ID of - if both - // arguments are hyphen, otherwise INVALID_UNICHAR_ID. - UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2); - // Callback helper for fix_hyphens returns true if box1 and box2 overlap - // (assuming both on the same textline, are in order and a chopped em dash.) - bool HyphenBoxesOverlap(const TBOX& box1, const TBOX& box2); - void fix_hyphens(); - - // Callback helper for merge_tess_fails returns a space if both - // arguments are space, otherwise INVALID_UNICHAR_ID. - UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2); - void merge_tess_fails(); - - // Returns a really deep copy of *src, including the ratings MATRIX. - static WERD_RES* deep_copy(const WERD_RES* src) { - WERD_RES* result = new WERD_RES(*src); - // That didn't copy the ratings, but we want a copy if there is one to - // begin with. - if (src->ratings != nullptr) - result->ratings = src->ratings->DeepCopy(); - return result; - } - - // Copy blobs from word_res onto this word (eliminating spaces between). - // Since this may be called bidirectionally OR both the BOL and EOL flags. - void copy_on(WERD_RES *word_res) { //from this word - word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL)); - word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL)); - word->copy_on(word_res->word); - } - - // Returns true if the collection of count pieces, starting at start, are all - // natural connected components, ie there are no real chops involved. - bool PiecesAllNatural(int start, int count) const; -}; - -/************************************************************************* - * PAGE_RES_IT - Page results iterator - *************************************************************************/ - -class PAGE_RES_IT { - public: - PAGE_RES * page_res; // page being iterated - - PAGE_RES_IT() = default; - - PAGE_RES_IT(PAGE_RES *the_page_res) { // page result - page_res = the_page_res; - restart_page(); // ready to scan - } - - // Do two PAGE_RES_ITs point at the same word? - // This is much cheaper than cmp(). - bool operator ==(const PAGE_RES_IT &other) const; - - bool operator !=(const PAGE_RES_IT &other) const {return !(*this == other); } - - // Given another PAGE_RES_IT to the same page, - // this before other: -1 - // this equal to other: 0 - // this later than other: 1 - int cmp(const PAGE_RES_IT &other) const; - - WERD_RES *restart_page() { - return start_page(false); // Skip empty blocks. - } - WERD_RES *restart_page_with_empties() { - return start_page(true); // Allow empty blocks. - } - WERD_RES *start_page(bool empty_ok); - - WERD_RES *restart_row(); - - // ============ Methods that mutate the underling structures =========== - // Note that these methods will potentially invalidate other PAGE_RES_ITs - // and are intended to be used only while a single PAGE_RES_IT is active. - // This problem needs to be taken into account if these mutation operators - // are ever provided to PageIterator or its subclasses. - - // Inserts the new_word and a corresponding WERD_RES before the current - // position. The simple fields of the WERD_RES are copied from clone_res and - // the resulting WERD_RES is returned for further setup with best_choice etc. - WERD_RES* InsertSimpleCloneWord(const WERD_RES& clone_res, WERD* new_word); - - // Replaces the current WERD/WERD_RES with the given words. The given words - // contain fake blobs that indicate the position of the characters. These are - // replaced with real blobs from the current word as much as possible. - void ReplaceCurrentWord(tesseract::PointerVector* words); - - // Deletes the current WERD_RES and its underlying WERD. - void DeleteCurrentWord(); - - // Makes the current word a fuzzy space if not already fuzzy. Updates - // corresponding part of combo if required. - void MakeCurrentWordFuzzy(); - - WERD_RES *forward() { // Get next word. - return internal_forward(false, false); - } - // Move forward, but allow empty blocks to show as single nullptr words. - WERD_RES *forward_with_empties() { - return internal_forward(false, true); - } - - WERD_RES *forward_paragraph(); // get first word in next non-empty paragraph - WERD_RES *forward_block(); // get first word in next non-empty block - - WERD_RES *prev_word() const { // previous word - return prev_word_res; - } - ROW_RES *prev_row() const { // row of prev word - return prev_row_res; - } - BLOCK_RES *prev_block() const { // block of prev word - return prev_block_res; - } - WERD_RES *word() const { // current word - return word_res; - } - ROW_RES *row() const { // row of current word - return row_res; - } - BLOCK_RES *block() const { // block of cur. word - return block_res; - } - WERD_RES *next_word() const { // next word - return next_word_res; - } - ROW_RES *next_row() const { // row of next word - return next_row_res; - } - BLOCK_RES *next_block() const { // block of next word - return next_block_res; - } - void rej_stat_word(); // for page/block/row - void ResetWordIterator(); - - private: - WERD_RES *internal_forward(bool new_block, bool empty_ok); - - WERD_RES * prev_word_res; // previous word - ROW_RES *prev_row_res; // row of prev word - BLOCK_RES *prev_block_res; // block of prev word - - WERD_RES *word_res; // current word - ROW_RES *row_res; // row of current word - BLOCK_RES *block_res; // block of cur. word - - WERD_RES *next_word_res; // next word - ROW_RES *next_row_res; // row of next word - BLOCK_RES *next_block_res; // block of next word - - BLOCK_RES_IT block_res_it; // iterators - ROW_RES_IT row_res_it; - WERD_RES_IT word_res_it; -}; -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/params_training_featdef.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/params_training_featdef.cpp deleted file mode 100644 index d8617657..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/params_training_featdef.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: params_training_featdef.cpp -// Description: Utility functions for params training features. -// Author: David Eger -// Created: Mon Jun 11 11:26:42 PDT 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include - -#include "params_training_featdef.h" - -namespace tesseract { - -int ParamsTrainingFeatureByName(const char *name) { - if (name == nullptr) - return -1; - int array_size = sizeof(kParamsTrainingFeatureTypeName) / - sizeof(kParamsTrainingFeatureTypeName[0]); - for (int i = 0; i < array_size; i++) { - if (kParamsTrainingFeatureTypeName[i] == nullptr) - continue; - if (strcmp(name, kParamsTrainingFeatureTypeName[i]) == 0) - return i; - } - return -1; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/params_training_featdef.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/params_training_featdef.h deleted file mode 100644 index 16a20e4e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/params_training_featdef.h +++ /dev/null @@ -1,154 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: params_training_featdef.h -// Description: Feature definitions for params training. -// Author: Rika Antonova -// Created: Mon Nov 28 11:26:42 PDT 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_ -#define TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_ - -#include "genericvector.h" -#include "strngs.h" - -namespace tesseract { - -// Maximum number of unichars in the small and medium sized words -static const int kMaxSmallWordUnichars = 3; -static const int kMaxMediumWordUnichars = 6; - -// Raw features extracted from a single OCR hypothesis. -// The features are normalized (by outline length or number of unichars as -// appropriate) real-valued quantities with unbounded range and -// unknown distribution. -// Normalization / binarization of these features is done at a later stage. -// Note: when adding new fields to this enum make sure to modify -// kParamsTrainingFeatureTypeName -enum kParamsTrainingFeatureType { - // Digits - PTRAIN_DIGITS_SHORT, // 0 - PTRAIN_DIGITS_MED, // 1 - PTRAIN_DIGITS_LONG, // 2 - // Number or pattern (NUMBER_PERM, USER_PATTERN_PERM) - PTRAIN_NUM_SHORT, // 3 - PTRAIN_NUM_MED, // 4 - PTRAIN_NUM_LONG, // 5 - // Document word (DOC_DAWG_PERM) - PTRAIN_DOC_SHORT, // 6 - PTRAIN_DOC_MED, // 7 - PTRAIN_DOC_LONG, // 8 - // Word (SYSTEM_DAWG_PERM, USER_DAWG_PERM, COMPOUND_PERM) - PTRAIN_DICT_SHORT, // 9 - PTRAIN_DICT_MED, // 10 - PTRAIN_DICT_LONG, // 11 - // Frequent word (FREQ_DAWG_PERM) - PTRAIN_FREQ_SHORT, // 12 - PTRAIN_FREQ_MED, // 13 - PTRAIN_FREQ_LONG, // 14 - PTRAIN_SHAPE_COST_PER_CHAR, // 15 - PTRAIN_NGRAM_COST_PER_CHAR, // 16 - PTRAIN_NUM_BAD_PUNC, // 17 - PTRAIN_NUM_BAD_CASE, // 18 - PTRAIN_XHEIGHT_CONSISTENCY, // 19 - PTRAIN_NUM_BAD_CHAR_TYPE, // 20 - PTRAIN_NUM_BAD_SPACING, // 21 - PTRAIN_NUM_BAD_FONT, // 22 - PTRAIN_RATING_PER_CHAR, // 23 - - PTRAIN_NUM_FEATURE_TYPES -}; - -static const char * const kParamsTrainingFeatureTypeName[] = { - "PTRAIN_DIGITS_SHORT", // 0 - "PTRAIN_DIGITS_MED", // 1 - "PTRAIN_DIGITS_LONG", // 2 - "PTRAIN_NUM_SHORT", // 3 - "PTRAIN_NUM_MED", // 4 - "PTRAIN_NUM_LONG", // 5 - "PTRAIN_DOC_SHORT", // 6 - "PTRAIN_DOC_MED", // 7 - "PTRAIN_DOC_LONG", // 8 - "PTRAIN_DICT_SHORT", // 9 - "PTRAIN_DICT_MED", // 10 - "PTRAIN_DICT_LONG", // 11 - "PTRAIN_FREQ_SHORT", // 12 - "PTRAIN_FREQ_MED", // 13 - "PTRAIN_FREQ_LONG", // 14 - "PTRAIN_SHAPE_COST_PER_CHAR", // 15 - "PTRAIN_NGRAM_COST_PER_CHAR", // 16 - "PTRAIN_NUM_BAD_PUNC", // 17 - "PTRAIN_NUM_BAD_CASE", // 18 - "PTRAIN_XHEIGHT_CONSISTENCY", // 19 - "PTRAIN_NUM_BAD_CHAR_TYPE", // 20 - "PTRAIN_NUM_BAD_SPACING", // 21 - "PTRAIN_NUM_BAD_FONT", // 22 - "PTRAIN_RATING_PER_CHAR", // 23 -}; - -// Returns the index of the given feature (by name), -// or -1 meaning the feature is unknown. -int ParamsTrainingFeatureByName(const char *name); - - -// Entry with features extracted from a single OCR hypothesis for a word. -struct ParamsTrainingHypothesis { - ParamsTrainingHypothesis() : cost(0.0) { - memset(features, 0, sizeof(features)); - } - ParamsTrainingHypothesis(const ParamsTrainingHypothesis &other) { - memcpy(features, other.features, sizeof(features)); - str = other.str; - cost = other.cost; - } - ParamsTrainingHypothesis& operator=(const ParamsTrainingHypothesis& other) { - memcpy(features, other.features, sizeof(features)); - str = other.str; - cost = other.cost; - return *this; - } - float features[PTRAIN_NUM_FEATURE_TYPES]; - STRING str; // string corresponding to word hypothesis (for debugging) - float cost; // path cost computed by segsearch -}; - -// A list of hypotheses explored during one run of segmentation search. -using ParamsTrainingHypothesisList = GenericVector; - -// A bundle that accumulates all of the hypothesis lists explored during all -// of the runs of segmentation search on a word (e.g. a list of hypotheses -// explored on PASS1, PASS2, fix xheight pass, etc). -class ParamsTrainingBundle { - public: - ParamsTrainingBundle() = default; - // Starts a new hypothesis list. - // Should be called at the beginning of a new run of the segmentation search. - void StartHypothesisList() { - hyp_list_vec.push_back(ParamsTrainingHypothesisList()); - } - // Adds a new ParamsTrainingHypothesis to the current hypothesis list - // and returns the reference to the newly added entry. - ParamsTrainingHypothesis &AddHypothesis( - const ParamsTrainingHypothesis &other) { - if (hyp_list_vec.empty()) StartHypothesisList(); - hyp_list_vec.back().push_back(ParamsTrainingHypothesis(other)); - return hyp_list_vec.back().back(); - } - - GenericVector hyp_list_vec; -}; - -} // namespace tesseract - -#endif // TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/pdblock.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/pdblock.cpp deleted file mode 100644 index bbb1836b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/pdblock.cpp +++ /dev/null @@ -1,381 +0,0 @@ -/********************************************************************** - * File: pdblock.cpp - * Description: PDBLK member functions and iterator functions. - * Author: Ray Smith - * Created: Fri Mar 15 09:41:28 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "pdblock.h" -#include -#include // std::unique_ptr -#include "allheaders.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#define BLOCK_LABEL_HEIGHT 150 //char height of block id - -const ERRCODE BADBLOCKLINE = "Y coordinate in block out of bounds"; -const ERRCODE LOSTBLOCKLINE = "Can't find rectangle for line"; - -CLISTIZE (PDBLK) -/********************************************************************** - * PDBLK::PDBLK - * - * Constructor for a simple rectangular block. - **********************************************************************/ -PDBLK::PDBLK ( //rectangular block -int16_t xmin, //bottom left -int16_t ymin, int16_t xmax, //top right -int16_t ymax): box (ICOORD (xmin, ymin), ICOORD (xmax, ymax)) { - //boundaries - ICOORDELT_IT left_it = &leftside; - ICOORDELT_IT right_it = &rightside; - - hand_poly = nullptr; - left_it.set_to_list (&leftside); - right_it.set_to_list (&rightside); - //make default box - left_it.add_to_end (new ICOORDELT (xmin, ymin)); - left_it.add_to_end (new ICOORDELT (xmin, ymax)); - right_it.add_to_end (new ICOORDELT (xmax, ymin)); - right_it.add_to_end (new ICOORDELT (xmax, ymax)); - index_ = 0; -} - - -/********************************************************************** - * PDBLK::set_sides - * - * Sets left and right vertex lists - **********************************************************************/ - -void PDBLK::set_sides( //set vertex lists - ICOORDELT_LIST *left, //left vertices - ICOORDELT_LIST *right //right vertices - ) { - //boundaries - ICOORDELT_IT left_it = &leftside; - ICOORDELT_IT right_it = &rightside; - - leftside.clear(); - left_it.move_to_first(); - left_it.add_list_before(left); - rightside.clear(); - right_it.move_to_first(); - right_it.add_list_before(right); -} - -/********************************************************************** - * PDBLK::contains - * - * Return TRUE if the given point is within the block. - **********************************************************************/ - -bool PDBLK::contains( //test containment - ICOORD pt //point to test -) { - BLOCK_RECT_IT it = this; //rectangle iterator - ICOORD bleft, tright; //corners of rectangle - - for (it.start_block(); !it.cycled_rects(); it.forward()) { - //get rectangle - it.bounding_box (bleft, tright); - //inside rect - if (pt.x() >= bleft.x() && pt.x() <= tright.x() - && pt.y() >= bleft.y() && pt.y() <= tright.y()) - return true; //is inside - } - return false; //not inside -} - - -/********************************************************************** - * PDBLK::move - * - * Reposition block - **********************************************************************/ - -void PDBLK::move( // reposition block - const ICOORD vec // by vector - ) { - ICOORDELT_IT it(&leftside); - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - *(it.data ()) += vec; - - it.set_to_list (&rightside); - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - *(it.data ()) += vec; - - box.move (vec); -} - -// Returns a binary Pix mask with a 1 pixel for every pixel within the -// block. Rotates the coordinate system by rerotation prior to rendering. -Pix* PDBLK::render_mask(const FCOORD& rerotation, TBOX* mask_box) { - TBOX rotated_box(box); - rotated_box.rotate(rerotation); - Pix* pix = pixCreate(rotated_box.width(), rotated_box.height(), 1); - if (hand_poly != nullptr) { - // We are going to rotate, so get a deep copy of the points and - // make a new POLY_BLOCK with it. - ICOORDELT_LIST polygon; - polygon.deep_copy(hand_poly->points(), ICOORDELT::deep_copy); - POLY_BLOCK image_block(&polygon, hand_poly->isA()); - image_block.rotate(rerotation); - // Block outline is a polygon, so use a PB_LINE_IT to get the - // rasterized interior. (Runs of interior pixels on a line.) - PB_LINE_IT *lines = new PB_LINE_IT(&image_block); - for (int y = box.bottom(); y < box.top(); ++y) { - const std::unique_ptr segments( - lines->get_line(y)); - if (!segments->empty()) { - ICOORDELT_IT s_it(segments.get()); - // Each element of segments is a start x and x size of the - // run of interior pixels. - for (s_it.mark_cycle_pt(); !s_it.cycled_list(); s_it.forward()) { - int start = s_it.data()->x(); - int xext = s_it.data()->y(); - // Set the run of pixels to 1. - pixRasterop(pix, start - rotated_box.left(), - rotated_box.height() - 1 - (y - rotated_box.bottom()), - xext, 1, PIX_SET, nullptr, 0, 0); - } - } - } - delete lines; - } else { - // Just fill the whole block as there is only a bounding box. - pixRasterop(pix, 0, 0, rotated_box.width(), rotated_box.height(), - PIX_SET, nullptr, 0, 0); - } - if (mask_box != nullptr) *mask_box = rotated_box; - return pix; -} - - -/********************************************************************** - * PDBLK::plot - * - * Plot the outline of a block in the given colour. - **********************************************************************/ - -#ifndef GRAPHICS_DISABLED -void PDBLK::plot( //draw outline - ScrollView* window, //window to draw in - int32_t serial, //serial number - ScrollView::Color colour //colour to draw in - ) { - ICOORD startpt; //start of outline - ICOORD endpt; //end of outline - ICOORD prevpt; //previous point - ICOORDELT_IT it = &leftside; //iterator - - //set the colour - window->Pen(colour); - window->TextAttributes("Times", BLOCK_LABEL_HEIGHT, false, false, false); - - if (hand_poly != nullptr) { - hand_poly->plot(window, serial); - } else if (!leftside.empty ()) { - startpt = *(it.data ()); //bottom left corner - // tprintf("Block %d bottom left is (%d,%d)\n", - // serial,startpt.x(),startpt.y()); - char temp_buff[34]; -#if !defined(_WIN32) || defined(__MINGW32__) - snprintf(temp_buff, sizeof(temp_buff), "%" PRId32, serial); -#else - ultoa (serial, temp_buff, 10); -#endif - window->Text(startpt.x (), startpt.y (), temp_buff); - - window->SetCursor(startpt.x (), startpt.y ()); - do { - prevpt = *(it.data ()); //previous point - it.forward (); //move to next point - //draw round corner - window->DrawTo(prevpt.x (), it.data ()->y ()); - window->DrawTo(it.data ()->x (), it.data ()->y ()); - } - while (!it.at_last ()); //until end of list - endpt = *(it.data ()); //end point - - //other side of boundary - window->SetCursor(startpt.x (), startpt.y ()); - it.set_to_list (&rightside); - prevpt = startpt; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - //draw round corner - window->DrawTo(prevpt.x (), it.data ()->y ()); - window->DrawTo(it.data ()->x (), it.data ()->y ()); - prevpt = *(it.data ()); //previous point - } - //close boundary - window->DrawTo(endpt.x(), endpt.y()); - } -} -#endif - -/********************************************************************** - * PDBLK::operator= - * - * Assignment - duplicate the block structure, but with an EMPTY row list. - **********************************************************************/ - -PDBLK & PDBLK::operator= ( //assignment -const PDBLK & source //from this -) { - // this->ELIST_LINK::operator=(source); - if (!leftside.empty ()) - leftside.clear (); - if (!rightside.empty ()) - rightside.clear (); - leftside.deep_copy(&source.leftside, &ICOORDELT::deep_copy); - rightside.deep_copy(&source.rightside, &ICOORDELT::deep_copy); - box = source.box; - return *this; -} - - -/********************************************************************** - * BLOCK_RECT_IT::BLOCK_RECT_IT - * - * Construct a block rectangle iterator. - **********************************************************************/ - -BLOCK_RECT_IT::BLOCK_RECT_IT ( -//iterate rectangles -PDBLK * blkptr //from block -):left_it (&blkptr->leftside), right_it (&blkptr->rightside) { - block = blkptr; //remember block - //non empty list - if (!blkptr->leftside.empty ()) { - start_block(); //ready for iteration - } -} - - -/********************************************************************** - * BLOCK_RECT_IT::set_to_block - * - * Start a new block. - **********************************************************************/ - -void BLOCK_RECT_IT::set_to_block( //start (new) block - PDBLK *blkptr) { //block to start - block = blkptr; //remember block - //set iterators - left_it.set_to_list (&blkptr->leftside); - right_it.set_to_list (&blkptr->rightside); - if (!blkptr->leftside.empty ()) - start_block(); //ready for iteration -} - - -/********************************************************************** - * BLOCK_RECT_IT::start_block - * - * Restart a block. - **********************************************************************/ - -void BLOCK_RECT_IT::start_block() { //start (new) block - left_it.move_to_first (); - right_it.move_to_first (); - left_it.mark_cycle_pt (); - right_it.mark_cycle_pt (); - ymin = left_it.data ()->y (); //bottom of first box - ymax = left_it.data_relative (1)->y (); - if (right_it.data_relative (1)->y () < ymax) - //smallest step - ymax = right_it.data_relative (1)->y (); -} - - -/********************************************************************** - * BLOCK_RECT_IT::forward - * - * Move to the next rectangle in the block. - **********************************************************************/ - -void BLOCK_RECT_IT::forward() { //next rectangle - if (!left_it.empty ()) { //non-empty list - if (left_it.data_relative (1)->y () == ymax) - left_it.forward (); //move to meet top - if (right_it.data_relative (1)->y () == ymax) - right_it.forward (); - //last is special - if (left_it.at_last () || right_it.at_last ()) { - left_it.move_to_first (); //restart - right_it.move_to_first (); - //now at bottom - ymin = left_it.data ()->y (); - } - else { - ymin = ymax; //new bottom - } - //next point - ymax = left_it.data_relative (1)->y (); - if (right_it.data_relative (1)->y () < ymax) - //least step forward - ymax = right_it.data_relative (1)->y (); - } -} - - -/********************************************************************** - * BLOCK_LINE_IT::get_line - * - * Get the the start and width of a line in the block. - **********************************************************************/ - -int16_t BLOCK_LINE_IT::get_line( //get a line - int16_t y, //line to get - int16_t &xext //output extent - ) { - ICOORD bleft; //bounding box - ICOORD tright; //of block & rect - - //get block box - block->bounding_box (bleft, tright); - if (y < bleft.y () || y >= tright.y ()) { - // block->print(stderr,FALSE); - BADBLOCKLINE.error ("BLOCK_LINE_IT::get_line", ABORT, "Y=%d", y); - } - - //get rectangle box - rect_it.bounding_box (bleft, tright); - //inside rectangle - if (y >= bleft.y () && y < tright.y ()) { - //width of line - xext = tright.x () - bleft.x (); - return bleft.x (); //start of line - } - for (rect_it.start_block (); !rect_it.cycled_rects (); rect_it.forward ()) { - //get rectangle box - rect_it.bounding_box (bleft, tright); - //inside rectangle - if (y >= bleft.y () && y < tright.y ()) { - //width of line - xext = tright.x () - bleft.x (); - return bleft.x (); //start of line - } - } - LOSTBLOCKLINE.error ("BLOCK_LINE_IT::get_line", ABORT, "Y=%d", y); - return 0; //dummy to stop warning -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/pdblock.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/pdblock.h deleted file mode 100644 index 15609d26..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/pdblock.h +++ /dev/null @@ -1,174 +0,0 @@ -/********************************************************************** - * File: pdblock.h (Formerly pdblk.h) - * Description: Page block class definition. - * Author: Ray Smith - * Created: Thu Mar 14 17:32:01 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef PDBLOCK_H -#define PDBLOCK_H - -#include "clst.h" -#include "strngs.h" -#include "polyblk.h" - -class DLLSYM PDBLK; //forward decl -struct Pix; - -CLISTIZEH (PDBLK) -///page block -class PDBLK { - friend class BLOCK_RECT_IT; //< block iterator - friend class BLOCK; //< Page Block - - public: - /// empty constructor - PDBLK() { - hand_poly = nullptr; - index_ = 0; - } - /// simple constructor - PDBLK(int16_t xmin, //< bottom left - int16_t ymin, - int16_t xmax, //< top right - int16_t ymax); - - /// set vertex lists - ///@param left list of left vertices - ///@param right list of right vertices - void set_sides(ICOORDELT_LIST *left, ICOORDELT_LIST *right); - - /// destructor - ~PDBLK() { delete hand_poly; } - - POLY_BLOCK *poly_block() const { return hand_poly; } - /// set the poly block - void set_poly_block(POLY_BLOCK *blk) { hand_poly = blk; } - /// get box - void bounding_box(ICOORD &bottom_left, // bottom left - ICOORD &top_right) const { // topright - bottom_left = box.botleft(); - top_right = box.topright(); - } - /// get real box - const TBOX &bounding_box() const { return box; } - - int index() const { return index_; } - void set_index(int value) { index_ = value; } - - /// is pt inside block - bool contains(ICOORD pt); - - /// reposition block - void move(const ICOORD vec); // by vector - - // Returns a binary Pix mask with a 1 pixel for every pixel within the - // block. Rotates the coordinate system by rerotation prior to rendering. - // If not nullptr, mask_box is filled with the position box of the returned - // mask image. - Pix *render_mask(const FCOORD &rerotation, TBOX *mask_box); - -#ifndef GRAPHICS_DISABLED - /// draw histogram - ///@param window window to draw in - ///@param serial serial number - ///@param colour colour to draw in - void plot(ScrollView *window, int32_t serial, ScrollView::Color colour); -#endif // GRAPHICS_DISABLED - - /// assignment - ///@param source from this - PDBLK &operator=(const PDBLK &source); - - protected: - POLY_BLOCK *hand_poly; //< weird as well - ICOORDELT_LIST leftside; //< left side vertices - ICOORDELT_LIST rightside; //< right side vertices - TBOX box; //< bounding box - int index_; //< Serial number of this block. -}; - -class DLLSYM BLOCK_RECT_IT //rectangle iterator -{ - public: - ///constructor - ///@param blkptr block to iterate - BLOCK_RECT_IT(PDBLK *blkptr); - - ///start (new) block - void set_to_block ( - PDBLK * blkptr); //block to iterate - - ///start iteration - void start_block(); - - ///next rectangle - void forward(); - - ///test end - bool cycled_rects() { - return left_it.cycled_list() && right_it.cycled_list(); - } - - ///current rectangle - ///@param bleft bottom left - ///@param tright top right - void bounding_box(ICOORD &bleft, - ICOORD &tright) { - //bottom left - bleft = ICOORD (left_it.data ()->x (), ymin); - //top right - tright = ICOORD (right_it.data ()->x (), ymax); - } - - private: - int16_t ymin; //< bottom of rectangle - int16_t ymax; //< top of rectangle - PDBLK *block; //< block to iterate - ICOORDELT_IT left_it; //< boundary iterators - ICOORDELT_IT right_it; -}; - -///rectangle iterator -class DLLSYM BLOCK_LINE_IT -{ - public: - ///constructor - ///@param blkptr from block - BLOCK_LINE_IT (PDBLK * blkptr) - :rect_it (blkptr) { - block = blkptr; //remember block - } - - ///start (new) block - ///@param blkptr block to start - void set_to_block (PDBLK * blkptr) { - block = blkptr; //remember block - //set iterator - rect_it.set_to_block (blkptr); - } - - ///get a line - ///@param y line to get - ///@param xext output extent - int16_t get_line(int16_t y, - int16_t &xext); - - private: - PDBLK * block; //< block to iterate - BLOCK_RECT_IT rect_it; //< rectangle iterator -}; - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/points.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/points.cpp deleted file mode 100644 index f7096015..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/points.cpp +++ /dev/null @@ -1,144 +0,0 @@ -/********************************************************************** - * File: points.cpp (Formerly coords.c) - * Description: Member functions for coordinate classes. - * Author: Ray Smith - * Created: Fri Mar 15 08:58:17 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifdef _MSC_VER -#define _USE_MATH_DEFINES -#endif // _MSC_VER - -#include -#include -#include "helpers.h" -#include "serialis.h" -#include "points.h" - -ELISTIZE (ICOORDELT) //turn to list -bool FCOORD::normalise() { //Convert to unit vec - float len = length (); - - if (len < 0.0000000001) { - return false; - } - xcoord /= len; - ycoord /= len; - return true; -} - -// Set from the given x,y, shrinking the vector to fit if needed. -void ICOORD::set_with_shrink(int x, int y) { - // Fit the vector into an ICOORD, which is 16 bit. - int factor = 1; - int max_extent = std::max(abs(x), abs(y)); - if (max_extent > INT16_MAX) - factor = max_extent / INT16_MAX + 1; - xcoord = x / factor; - ycoord = y / factor; -} - -// The fortran/basic sgn function returns -1, 0, 1 if x < 0, x == 0, x > 0 -// respectively. -static int sign(int x) { - if (x < 0) - return -1; - else - return x > 0 ? 1 : 0; -} - -// Writes to the given file. Returns false in case of error. -bool ICOORD::Serialize(FILE* fp) const { - return tesseract::Serialize(fp, &xcoord) && - tesseract::Serialize(fp, &ycoord); -} -// Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool ICOORD::DeSerialize(bool swap, FILE* fp) { - if (!tesseract::DeSerialize(fp, &xcoord)) return false; - if (!tesseract::DeSerialize(fp, &ycoord)) return false; - if (swap) { - ReverseN(&xcoord, sizeof(xcoord)); - ReverseN(&ycoord, sizeof(ycoord)); - } - return true; -} - -// Setup for iterating over the pixels in a vector by the well-known -// Bresenham rendering algorithm. -// Starting with major/2 in the accumulator, on each step add major_step, -// and then add minor to the accumulator. When the accumulator >= major -// subtract major and step a minor step. - -void ICOORD::setup_render(ICOORD* major_step, ICOORD* minor_step, - int* major, int* minor) const { - int abs_x = abs(xcoord); - int abs_y = abs(ycoord); - if (abs_x >= abs_y) { - // X-direction is major. - major_step->xcoord = sign(xcoord); - major_step->ycoord = 0; - minor_step->xcoord = 0; - minor_step->ycoord = sign(ycoord); - *major = abs_x; - *minor = abs_y; - } else { - // Y-direction is major. - major_step->xcoord = 0; - major_step->ycoord = sign(ycoord); - minor_step->xcoord = sign(xcoord); - minor_step->ycoord = 0; - *major = abs_y; - *minor = abs_x; - } -} - -// Returns the standard feature direction corresponding to this. -// See binary_angle_plus_pi below for a description of the direction. -uint8_t FCOORD::to_direction() const { - return binary_angle_plus_pi(angle()); -} -// Sets this with a unit vector in the given standard feature direction. -void FCOORD::from_direction(uint8_t direction) { - double radians = angle_from_direction(direction); - xcoord = cos(radians); - ycoord = sin(radians); -} - -// Converts an angle in radians (from ICOORD::angle or FCOORD::angle) to a -// standard feature direction as an unsigned angle in 256ths of a circle -// measured anticlockwise from (-1, 0). -uint8_t FCOORD::binary_angle_plus_pi(double radians) { - return Modulo(IntCastRounded((radians + M_PI) * 128.0 / M_PI), 256); -} -// Inverse of binary_angle_plus_pi returns an angle in radians for the -// given standard feature direction. -double FCOORD::angle_from_direction(uint8_t direction) { - return direction * M_PI / 128.0 - M_PI; -} - -// Returns the point on the given line nearest to this, ie the point such -// that the vector point->this is perpendicular to the line. -// The line is defined as a line_point and a dir_vector for its direction. -FCOORD FCOORD::nearest_pt_on_line(const FCOORD& line_point, - const FCOORD& dir_vector) const { - FCOORD point_vector(*this - line_point); - // The dot product (%) is |dir_vector||point_vector|cos theta, so dividing by - // the square of the length of dir_vector gives us the fraction of dir_vector - // to add to line1 to get the appropriate point, so - // result = line1 + lambda dir_vector. - double lambda = point_vector % dir_vector / dir_vector.sqlength(); - return line_point + (dir_vector * lambda); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/points.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/points.h deleted file mode 100644 index 7bccbf5b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/points.h +++ /dev/null @@ -1,777 +0,0 @@ -/********************************************************************** - * File: points.h (Formerly coords.h) - * Description: Coordinate class definitions. - * Author: Ray Smith - * Created: Fri Mar 15 08:32:45 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef POINTS_H -#define POINTS_H - -#include // for sqrt, atan2 -#include -#include "elst.h" -#include "errcode.h" // for ASSERT_HOST -#include "platform.h" // for DLLSYM - -class FCOORD; - -///integer coordinate -class ICOORD -{ - friend class FCOORD; - - public: - ///empty constructor - ICOORD() { - xcoord = ycoord = 0; //default zero - } - ///constructor - ///@param xin x value - ///@param yin y value - ICOORD(int16_t xin, - int16_t yin) { - xcoord = xin; - ycoord = yin; - } - ///destructor - ~ICOORD () = default; - - ///access function - int16_t x() const { - return xcoord; - } - ///access_function - int16_t y() const { - return ycoord; - } - - ///rewrite function - void set_x(int16_t xin) { - xcoord = xin; //write new value - } - ///rewrite function - void set_y(int16_t yin) { //value to set - ycoord = yin; - } - - /// Set from the given x,y, shrinking the vector to fit if needed. - void set_with_shrink(int x, int y); - - ///find sq length - float sqlength() const { - return (float) (xcoord * xcoord + ycoord * ycoord); - } - - ///find length - float length() const { - return (float) sqrt (sqlength ()); - } - - ///sq dist between pts - float pt_to_pt_sqdist(const ICOORD &pt) const { - ICOORD gap; - - gap.xcoord = xcoord - pt.xcoord; - gap.ycoord = ycoord - pt.ycoord; - return gap.sqlength (); - } - - ///Distance between pts - float pt_to_pt_dist(const ICOORD &pt) const { - return (float) sqrt (pt_to_pt_sqdist (pt)); - } - - ///find angle - float angle() const { - return (float) atan2 ((double) ycoord, (double) xcoord); - } - - ///test equality - bool operator== (const ICOORD & other) const { - return xcoord == other.xcoord && ycoord == other.ycoord; - } - ///test inequality - bool operator!= (const ICOORD & other) const { - return xcoord != other.xcoord || ycoord != other.ycoord; - } - ///rotate 90 deg anti - friend ICOORD operator! (const ICOORD &); - ///unary minus - friend ICOORD operator- (const ICOORD &); - ///add - friend ICOORD operator+ (const ICOORD &, const ICOORD &); - ///add - friend ICOORD & operator+= (ICOORD &, const ICOORD &); - ///subtract - friend ICOORD operator- (const ICOORD &, const ICOORD &); - ///subtract - friend ICOORD & operator-= (ICOORD &, const ICOORD &); - ///scalar product - friend int32_t operator% (const ICOORD &, const ICOORD &); - ///cross product - friend int32_t operator *(const ICOORD &, - const ICOORD &); - ///multiply - friend ICOORD operator *(const ICOORD &, - int16_t); - ///multiply - friend ICOORD operator *(int16_t, - const ICOORD &); - ///multiply - friend ICOORD & operator*= (ICOORD &, int16_t); - ///divide - friend ICOORD operator/ (const ICOORD &, int16_t); - ///divide - friend ICOORD & operator/= (ICOORD &, int16_t); - ///rotate - ///@param vec by vector - void rotate(const FCOORD& vec); - - /// Setup for iterating over the pixels in a vector by the well-known - /// Bresenham rendering algorithm. - /// Starting with major/2 in the accumulator, on each step move by - /// major_step, and then add minor to the accumulator. When - /// accumulator >= major subtract major and also move by minor_step. - void setup_render(ICOORD* major_step, ICOORD* minor_step, - int* major, int* minor) const; - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, FILE* fp); - - protected: - int16_t xcoord; //< x value - int16_t ycoord; //< y value -}; - -class DLLSYM ICOORDELT:public ELIST_LINK, public ICOORD - //embedded coord list -{ - public: - ///empty constructor - ICOORDELT() = default; - ///constructor from ICOORD - ICOORDELT (ICOORD icoord):ICOORD (icoord) { - } - ///constructor - ///@param xin x value - ///@param yin y value - ICOORDELT(int16_t xin, - int16_t yin) { - xcoord = xin; - ycoord = yin; - } - - static ICOORDELT* deep_copy(const ICOORDELT* src) { - ICOORDELT* elt = new ICOORDELT; - *elt = *src; - return elt; - } - -}; - -ELISTIZEH (ICOORDELT) -class DLLSYM FCOORD -{ - public: - ///empty constructor - FCOORD() = default; - ///constructor - ///@param xvalue x value - ///@param yvalue y value - FCOORD(float xvalue, - float yvalue) { - xcoord = xvalue; //set coords - ycoord = yvalue; - } - FCOORD( //make from ICOORD - ICOORD icoord) { //coords to set - xcoord = icoord.xcoord; - ycoord = icoord.ycoord; - } - - float x() const { //get coords - return xcoord; - } - float y() const { - return ycoord; - } - ///rewrite function - void set_x(float xin) { - xcoord = xin; //write new value - } - ///rewrite function - void set_y(float yin) { //value to set - ycoord = yin; - } - - ///find sq length - float sqlength() const { - return xcoord * xcoord + ycoord * ycoord; - } - - ///find length - float length() const { - return (float) sqrt (sqlength ()); - } - - ///sq dist between pts - float pt_to_pt_sqdist(const FCOORD &pt) const { - FCOORD gap; - - gap.xcoord = xcoord - pt.xcoord; - gap.ycoord = ycoord - pt.ycoord; - return gap.sqlength (); - } - - ///Distance between pts - float pt_to_pt_dist(const FCOORD &pt) const { - return (float) sqrt (pt_to_pt_sqdist (pt)); - } - - ///find angle - float angle() const { - return (float) atan2 (ycoord, xcoord); - } - // Returns the standard feature direction corresponding to this. - // See binary_angle_plus_pi below for a description of the direction. - uint8_t to_direction() const; - // Sets this with a unit vector in the given standard feature direction. - void from_direction(uint8_t direction); - - // Converts an angle in radians (from ICOORD::angle or FCOORD::angle) to a - // standard feature direction as an unsigned angle in 256ths of a circle - // measured anticlockwise from (-1, 0). - static uint8_t binary_angle_plus_pi(double angle); - // Inverse of binary_angle_plus_pi returns an angle in radians for the - // given standard feature direction. - static double angle_from_direction(uint8_t direction); - // Returns the point on the given line nearest to this, ie the point such - // that the vector point->this is perpendicular to the line. - // The line is defined as a line_point and a dir_vector for its direction. - // dir_vector need not be a unit vector. - FCOORD nearest_pt_on_line(const FCOORD& line_point, - const FCOORD& dir_vector) const; - - ///Convert to unit vec - bool normalise(); - - ///test equality - bool operator== (const FCOORD & other) { - return xcoord == other.xcoord && ycoord == other.ycoord; - } - ///test inequality - bool operator!= (const FCOORD & other) { - return xcoord != other.xcoord || ycoord != other.ycoord; - } - ///rotate 90 deg anti - friend FCOORD operator! (const FCOORD &); - ///unary minus - friend FCOORD operator- (const FCOORD &); - ///add - friend FCOORD operator+ (const FCOORD &, const FCOORD &); - ///add - friend FCOORD & operator+= (FCOORD &, const FCOORD &); - ///subtract - friend FCOORD operator- (const FCOORD &, const FCOORD &); - ///subtract - friend FCOORD & operator-= (FCOORD &, const FCOORD &); - ///scalar product - friend float operator% (const FCOORD &, const FCOORD &); - ///cross product - friend float operator *(const FCOORD &, const FCOORD &); - ///multiply - friend FCOORD operator *(const FCOORD &, float); - ///multiply - friend FCOORD operator *(float, const FCOORD &); - - ///multiply - friend FCOORD & operator*= (FCOORD &, float); - ///divide - friend FCOORD operator/ (const FCOORD &, float); - ///rotate - ///@param vec by vector - void rotate(const FCOORD vec); - // unrotate - undo a rotate(vec) - // @param vec by vector - void unrotate(const FCOORD &vec); - ///divide - friend FCOORD & operator/= (FCOORD &, float); - - private: - float xcoord; //2 floating coords - float ycoord; -}; - -/********************************************************************** - * operator! - * - * Rotate an ICOORD 90 degrees anticlockwise. - **********************************************************************/ - -inline ICOORD -operator! ( //rotate 90 deg anti -const ICOORD & src //thing to rotate -) { - ICOORD result; //output - - result.xcoord = -src.ycoord; - result.ycoord = src.xcoord; - return result; -} - - -/********************************************************************** - * operator- - * - * Unary minus of an ICOORD. - **********************************************************************/ - -inline ICOORD -operator- ( //unary minus -const ICOORD & src //thing to minus -) { - ICOORD result; //output - - result.xcoord = -src.xcoord; - result.ycoord = -src.ycoord; - return result; -} - - -/********************************************************************** - * operator+ - * - * Add 2 ICOORDS. - **********************************************************************/ - -inline ICOORD -operator+ ( //sum vectors -const ICOORD & op1, //operands -const ICOORD & op2) { - ICOORD sum; //result - - sum.xcoord = op1.xcoord + op2.xcoord; - sum.ycoord = op1.ycoord + op2.ycoord; - return sum; -} - - -/********************************************************************** - * operator+= - * - * Add 2 ICOORDS. - **********************************************************************/ - -inline ICOORD & -operator+= ( //sum vectors -ICOORD & op1, //operands -const ICOORD & op2) { - op1.xcoord += op2.xcoord; - op1.ycoord += op2.ycoord; - return op1; -} - - -/********************************************************************** - * operator- - * - * Subtract 2 ICOORDS. - **********************************************************************/ - -inline ICOORD -operator- ( //subtract vectors -const ICOORD & op1, //operands -const ICOORD & op2) { - ICOORD sum; //result - - sum.xcoord = op1.xcoord - op2.xcoord; - sum.ycoord = op1.ycoord - op2.ycoord; - return sum; -} - - -/********************************************************************** - * operator-= - * - * Subtract 2 ICOORDS. - **********************************************************************/ - -inline ICOORD & -operator-= ( //subtract vectors -ICOORD & op1, //operands -const ICOORD & op2) { - op1.xcoord -= op2.xcoord; - op1.ycoord -= op2.ycoord; - return op1; -} - - -/********************************************************************** - * operator% - * - * Scalar product of 2 ICOORDS. - **********************************************************************/ - -inline int32_t -operator% ( //scalar product -const ICOORD & op1, //operands -const ICOORD & op2) { - return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord; -} - - -/********************************************************************** - * operator* - * - * Cross product of 2 ICOORDS. - **********************************************************************/ - -inline int32_t operator *( //cross product - const ICOORD &op1, //operands - const ICOORD &op2) { - return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord; -} - - -/********************************************************************** - * operator* - * - * Scalar multiply of an ICOORD. - **********************************************************************/ - -inline ICOORD operator *( //scalar multiply - const ICOORD &op1, //operands - int16_t scale) { - ICOORD result; //output - - result.xcoord = op1.xcoord * scale; - result.ycoord = op1.ycoord * scale; - return result; -} - - -inline ICOORD operator *( //scalar multiply - int16_t scale, - const ICOORD &op1 //operands - ) { - ICOORD result; //output - - result.xcoord = op1.xcoord * scale; - result.ycoord = op1.ycoord * scale; - return result; -} - - -/********************************************************************** - * operator*= - * - * Scalar multiply of an ICOORD. - **********************************************************************/ - -inline ICOORD & -operator*= ( //scalar multiply -ICOORD & op1, //operands -int16_t scale) { - op1.xcoord *= scale; - op1.ycoord *= scale; - return op1; -} - - -/********************************************************************** - * operator/ - * - * Scalar divide of an ICOORD. - **********************************************************************/ - -inline ICOORD -operator/ ( //scalar divide -const ICOORD & op1, //operands -int16_t scale) { - ICOORD result; //output - - result.xcoord = op1.xcoord / scale; - result.ycoord = op1.ycoord / scale; - return result; -} - - -/********************************************************************** - * operator/= - * - * Scalar divide of an ICOORD. - **********************************************************************/ - -inline ICOORD & -operator/= ( //scalar divide -ICOORD & op1, //operands -int16_t scale) { - op1.xcoord /= scale; - op1.ycoord /= scale; - return op1; -} - - -/********************************************************************** - * ICOORD::rotate - * - * Rotate an ICOORD by the given (normalized) (cos,sin) vector. - **********************************************************************/ - -inline void ICOORD::rotate( //rotate by vector - const FCOORD& vec) { - int16_t tmp; - - tmp = (int16_t) floor (xcoord * vec.x () - ycoord * vec.y () + 0.5); - ycoord = (int16_t) floor (ycoord * vec.x () + xcoord * vec.y () + 0.5); - xcoord = tmp; -} - - -/********************************************************************** - * operator! - * - * Rotate an FCOORD 90 degrees anticlockwise. - **********************************************************************/ - -inline FCOORD -operator! ( //rotate 90 deg anti -const FCOORD & src //thing to rotate -) { - FCOORD result; //output - - result.xcoord = -src.ycoord; - result.ycoord = src.xcoord; - return result; -} - - -/********************************************************************** - * operator- - * - * Unary minus of an FCOORD. - **********************************************************************/ - -inline FCOORD -operator- ( //unary minus -const FCOORD & src //thing to minus -) { - FCOORD result; //output - - result.xcoord = -src.xcoord; - result.ycoord = -src.ycoord; - return result; -} - - -/********************************************************************** - * operator+ - * - * Add 2 FCOORDS. - **********************************************************************/ - -inline FCOORD -operator+ ( //sum vectors -const FCOORD & op1, //operands -const FCOORD & op2) { - FCOORD sum; //result - - sum.xcoord = op1.xcoord + op2.xcoord; - sum.ycoord = op1.ycoord + op2.ycoord; - return sum; -} - - -/********************************************************************** - * operator+= - * - * Add 2 FCOORDS. - **********************************************************************/ - -inline FCOORD & -operator+= ( //sum vectors -FCOORD & op1, //operands -const FCOORD & op2) { - op1.xcoord += op2.xcoord; - op1.ycoord += op2.ycoord; - return op1; -} - - -/********************************************************************** - * operator- - * - * Subtract 2 FCOORDS. - **********************************************************************/ - -inline FCOORD -operator- ( //subtract vectors -const FCOORD & op1, //operands -const FCOORD & op2) { - FCOORD sum; //result - - sum.xcoord = op1.xcoord - op2.xcoord; - sum.ycoord = op1.ycoord - op2.ycoord; - return sum; -} - - -/********************************************************************** - * operator-= - * - * Subtract 2 FCOORDS. - **********************************************************************/ - -inline FCOORD & -operator-= ( //subtract vectors -FCOORD & op1, //operands -const FCOORD & op2) { - op1.xcoord -= op2.xcoord; - op1.ycoord -= op2.ycoord; - return op1; -} - - -/********************************************************************** - * operator% - * - * Scalar product of 2 FCOORDS. - **********************************************************************/ - -inline float -operator% ( //scalar product -const FCOORD & op1, //operands -const FCOORD & op2) { - return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord; -} - - -/********************************************************************** - * operator* - * - * Cross product of 2 FCOORDS. - **********************************************************************/ - -inline float operator *( //cross product - const FCOORD &op1, //operands - const FCOORD &op2) { - return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord; -} - - -/********************************************************************** - * operator* - * - * Scalar multiply of an FCOORD. - **********************************************************************/ - -inline FCOORD operator *( //scalar multiply - const FCOORD &op1, //operands - float scale) { - FCOORD result; //output - - result.xcoord = op1.xcoord * scale; - result.ycoord = op1.ycoord * scale; - return result; -} - - -inline FCOORD operator *( //scalar multiply - float scale, - const FCOORD &op1 //operands - ) { - FCOORD result; //output - - result.xcoord = op1.xcoord * scale; - result.ycoord = op1.ycoord * scale; - return result; -} - - -/********************************************************************** - * operator*= - * - * Scalar multiply of an FCOORD. - **********************************************************************/ - -inline FCOORD & -operator*= ( //scalar multiply -FCOORD & op1, //operands -float scale) { - op1.xcoord *= scale; - op1.ycoord *= scale; - return op1; -} - - -/********************************************************************** - * operator/ - * - * Scalar divide of an FCOORD. - **********************************************************************/ - -inline FCOORD -operator/ ( //scalar divide -const FCOORD & op1, //operands -float scale) { - FCOORD result; //output - ASSERT_HOST(scale != 0.0f); - result.xcoord = op1.xcoord / scale; - result.ycoord = op1.ycoord / scale; - return result; -} - - -/********************************************************************** - * operator/= - * - * Scalar divide of an FCOORD. - **********************************************************************/ - -inline FCOORD & -operator/= ( //scalar divide -FCOORD & op1, //operands -float scale) { - ASSERT_HOST(scale != 0.0f); - op1.xcoord /= scale; - op1.ycoord /= scale; - return op1; -} - - -/********************************************************************** - * rotate - * - * Rotate an FCOORD by the given (normalized) (cos,sin) vector. - **********************************************************************/ - -inline void FCOORD::rotate( //rotate by vector - const FCOORD vec) { - float tmp; - - tmp = xcoord * vec.x () - ycoord * vec.y (); - ycoord = ycoord * vec.x () + xcoord * vec.y (); - xcoord = tmp; -} - -inline void FCOORD::unrotate(const FCOORD& vec) { - rotate(FCOORD(vec.x(), -vec.y())); -} - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/polyaprx.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/polyaprx.cpp deleted file mode 100644 index 43282176..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/polyaprx.cpp +++ /dev/null @@ -1,589 +0,0 @@ -/********************************************************************** - * File: polyaprx.cpp (Formerly polygon.c) - * Description: Code for polygonal approximation from old edgeprog. - * Author: Ray Smith - * Created: Thu Nov 25 11:42:04 GMT 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "polyaprx.h" -#include // for INT16_MAX, int8_t -#include "blobs.h" // for EDGEPT, TPOINT, VECTOR, TESSLINE -#include "coutln.h" // for C_OUTLINE -#include "errcode.h" // for ASSERT_HOST -#include "host.h" // for FALSE, TRUE -#include "mod128.h" // for DIR128 -#include "params.h" // for BoolParam, BOOL_VAR -#include "points.h" // for ICOORD -#include "rect.h" // for TBOX -#include "tprintf.h" // for tprintf -#include "vecfuncs.h" // for LENGTH, point_diff, CROSS - -#define EXTERN -#define FASTEDGELENGTH 256 - -EXTERN BOOL_VAR(poly_debug, FALSE, "Debug old poly"); -EXTERN BOOL_VAR(poly_wide_objects_better, TRUE, - "More accurate approx on wide things"); - -#define FIXED 4 /*OUTLINE point is fixed */ - -#define RUNLENGTH 1 /*length of run */ - -#define DIR 2 /*direction of run */ - -#define FLAGS 0 - -#define fixed_dist 20 //really an int_variable -#define approx_dist 15 //really an int_variable - -const int par1 = 4500 / (approx_dist * approx_dist); -const int par2 = 6750 / (approx_dist * approx_dist); - - -/********************************************************************** - * tesspoly_outline - * - * Approximate an outline from chain codes form using the old tess algorithm. - * If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB - * contain pointers to the input C_OUTLINEs that enable higher-resolution - * feature extraction that does not use the polygonal approximation. - **********************************************************************/ - - -TESSLINE* ApproximateOutline(bool allow_detailed_fx, C_OUTLINE* c_outline) { - TBOX loop_box; // bounding box - int32_t area; // loop area - EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path - EDGEPT* edgepts = stack_edgepts; - - // Use heap memory if the stack buffer is not big enough. - if (c_outline->pathlength() > FASTEDGELENGTH) - edgepts = new EDGEPT[c_outline->pathlength()]; - - loop_box = c_outline->bounding_box(); - area = loop_box.height(); - if (!poly_wide_objects_better && loop_box.width() > area) - area = loop_box.width(); - area *= area; - edgesteps_to_edgepts(c_outline, edgepts); - fix2(edgepts, area); - EDGEPT* edgept = poly2(edgepts, area); // 2nd approximation. - EDGEPT* startpt = edgept; - EDGEPT* result = nullptr; - EDGEPT* prev_result = nullptr; - do { - EDGEPT* new_pt = new EDGEPT; - new_pt->pos = edgept->pos; - new_pt->prev = prev_result; - if (prev_result == nullptr) { - result = new_pt; - } else { - prev_result->next = new_pt; - new_pt->prev = prev_result; - } - if (allow_detailed_fx) { - new_pt->src_outline = edgept->src_outline; - new_pt->start_step = edgept->start_step; - new_pt->step_count = edgept->step_count; - } - prev_result = new_pt; - edgept = edgept->next; - } - while (edgept != startpt); - prev_result->next = result; - result->prev = prev_result; - if (edgepts != stack_edgepts) - delete [] edgepts; - return TESSLINE::BuildFromOutlineList(result); -} - - -/********************************************************************** - * edgesteps_to_edgepts - * - * Convert a C_OUTLINE to EDGEPTs. - **********************************************************************/ - -EDGEPT * -edgesteps_to_edgepts ( //convert outline -C_OUTLINE * c_outline, //input -EDGEPT edgepts[] //output is array -) { - int32_t length; //steps in path - ICOORD pos; //current coords - int32_t stepindex; //current step - int32_t stepinc; //increment - int32_t epindex; //current EDGEPT - int32_t count; //repeated steps - ICOORD vec; //for this 8 step - ICOORD prev_vec; - int8_t epdir; //of this step - DIR128 prevdir; //prvious dir - DIR128 dir; //of this step - - pos = c_outline->start_pos (); //start of loop - length = c_outline->pathlength (); - stepindex = 0; - epindex = 0; - prevdir = -1; - count = 0; - int prev_stepindex = 0; - do { - dir = c_outline->step_dir (stepindex); - vec = c_outline->step (stepindex); - if (stepindex < length - 1 - && c_outline->step_dir (stepindex + 1) - dir == -32) { - dir += 128 - 16; - vec += c_outline->step (stepindex + 1); - stepinc = 2; - } - else - stepinc = 1; - if (count == 0) { - prevdir = dir; - prev_vec = vec; - } - if (prevdir.get_dir () != dir.get_dir ()) { - edgepts[epindex].pos.x = pos.x (); - edgepts[epindex].pos.y = pos.y (); - prev_vec *= count; - edgepts[epindex].vec.x = prev_vec.x (); - edgepts[epindex].vec.y = prev_vec.y (); - pos += prev_vec; - edgepts[epindex].flags[RUNLENGTH] = count; - edgepts[epindex].prev = &edgepts[epindex - 1]; - edgepts[epindex].flags[FLAGS] = 0; - edgepts[epindex].next = &edgepts[epindex + 1]; - prevdir += 64; - epdir = (DIR128) 0 - prevdir; - epdir >>= 4; - epdir &= 7; - edgepts[epindex].flags[DIR] = epdir; - edgepts[epindex].src_outline = c_outline; - edgepts[epindex].start_step = prev_stepindex; - edgepts[epindex].step_count = stepindex - prev_stepindex; - epindex++; - prevdir = dir; - prev_vec = vec; - count = 1; - prev_stepindex = stepindex; - } - else - count++; - stepindex += stepinc; - } - while (stepindex < length); - edgepts[epindex].pos.x = pos.x (); - edgepts[epindex].pos.y = pos.y (); - prev_vec *= count; - edgepts[epindex].vec.x = prev_vec.x (); - edgepts[epindex].vec.y = prev_vec.y (); - pos += prev_vec; - edgepts[epindex].flags[RUNLENGTH] = count; - edgepts[epindex].flags[FLAGS] = 0; - edgepts[epindex].src_outline = c_outline; - edgepts[epindex].start_step = prev_stepindex; - edgepts[epindex].step_count = stepindex - prev_stepindex; - edgepts[epindex].prev = &edgepts[epindex - 1]; - edgepts[epindex].next = &edgepts[0]; - prevdir += 64; - epdir = (DIR128) 0 - prevdir; - epdir >>= 4; - epdir &= 7; - edgepts[epindex].flags[DIR] = epdir; - edgepts[0].prev = &edgepts[epindex]; - ASSERT_HOST (pos.x () == c_outline->start_pos ().x () - && pos.y () == c_outline->start_pos ().y ()); - return &edgepts[0]; -} - - -/********************************************************************** - *fix2(start,area) fixes points on the outline according to a trial method* - **********************************************************************/ - -//#pragma OPT_LEVEL 1 /*stop compiler bugs*/ - -void fix2( //polygonal approx - EDGEPT *start, /*loop to approimate */ - int area) { - EDGEPT *edgept; /*current point */ - EDGEPT *edgept1; - EDGEPT *loopstart; /*modified start of loop */ - EDGEPT *linestart; /*start of line segment */ - int dir1, dir2; /*directions of line */ - int sum1, sum2; /*lengths in dir1,dir2 */ - int stopped; /*completed flag */ - int fixed_count; //no of fixed points - int d01, d12, d23, gapmin; - TPOINT d01vec, d12vec, d23vec; - EDGEPT *edgefix, *startfix; - EDGEPT *edgefix0, *edgefix1, *edgefix2, *edgefix3; - - edgept = start; /*start of loop */ - while (((edgept->flags[DIR] - edgept->prev->flags[DIR] + 1) & 7) < 3 - && (dir1 = - (edgept->prev->flags[DIR] - edgept->next->flags[DIR]) & 7) != 2 - && dir1 != 6) - edgept = edgept->next; /*find suitable start */ - loopstart = edgept; /*remember start */ - - stopped = 0; /*not finished yet */ - edgept->flags[FLAGS] |= FIXED; /*fix it */ - do { - linestart = edgept; /*possible start of line */ - dir1 = edgept->flags[DIR]; /*first direction */ - /*length of dir1 */ - sum1 = edgept->flags[RUNLENGTH]; - edgept = edgept->next; - dir2 = edgept->flags[DIR]; /*2nd direction */ - /*length in dir2 */ - sum2 = edgept->flags[RUNLENGTH]; - if (((dir1 - dir2 + 1) & 7) < 3) { - while (edgept->prev->flags[DIR] == edgept->next->flags[DIR]) { - edgept = edgept->next; /*look at next */ - if (edgept->flags[DIR] == dir1) - /*sum lengths */ - sum1 += edgept->flags[RUNLENGTH]; - else - sum2 += edgept->flags[RUNLENGTH]; - } - - if (edgept == loopstart) - stopped = 1; /*finished */ - if (sum2 + sum1 > 2 - && linestart->prev->flags[DIR] == dir2 - && (linestart->prev->flags[RUNLENGTH] > - linestart->flags[RUNLENGTH] || sum2 > sum1)) { - /*start is back one */ - linestart = linestart->prev; - linestart->flags[FLAGS] |= FIXED; - } - - if (((edgept->next->flags[DIR] - edgept->flags[DIR] + 1) & 7) >= 3 - || (edgept->flags[DIR] == dir1 && sum1 >= sum2) - || ((edgept->prev->flags[RUNLENGTH] < edgept->flags[RUNLENGTH] - || (edgept->flags[DIR] == dir2 && sum2 >= sum1)) - && linestart->next != edgept)) - edgept = edgept->next; - } - /*sharp bend */ - edgept->flags[FLAGS] |= FIXED; - } - /*do whole loop */ - while (edgept != loopstart && !stopped); - - edgept = start; - do { - if (((edgept->flags[RUNLENGTH] >= 8) && - (edgept->flags[DIR] != 2) && (edgept->flags[DIR] != 6)) || - ((edgept->flags[RUNLENGTH] >= 8) && - ((edgept->flags[DIR] == 2) || (edgept->flags[DIR] == 6)))) { - edgept->flags[FLAGS] |= FIXED; - edgept1 = edgept->next; - edgept1->flags[FLAGS] |= FIXED; - } - edgept = edgept->next; - } - while (edgept != start); - - edgept = start; - do { - /*single fixed step */ - if (edgept->flags[FLAGS] & FIXED && edgept->flags[RUNLENGTH] == 1 - /*and neighours free */ - && edgept->next->flags[FLAGS] & FIXED && (edgept->prev->flags[FLAGS] & FIXED) == 0 - /*same pair of dirs */ - && (edgept->next->next->flags[FLAGS] & FIXED) == 0 && edgept->prev->flags[DIR] == edgept->next->flags[DIR] && edgept->prev->prev->flags[DIR] == edgept->next->next->flags[DIR] - && ((edgept->prev->flags[DIR] - edgept->flags[DIR] + 1) & 7) < 3) { - /*unfix it */ - edgept->flags[FLAGS] &= ~FIXED; - edgept->next->flags[FLAGS] &= ~FIXED; - } - edgept = edgept->next; /*do all points */ - } - while (edgept != start); /*until finished */ - - stopped = 0; - if (area < 450) - area = 450; - - gapmin = area * fixed_dist * fixed_dist / 44000; - - edgept = start; - fixed_count = 0; - do { - if (edgept->flags[FLAGS] & FIXED) - fixed_count++; - edgept = edgept->next; - } - while (edgept != start); - while ((edgept->flags[FLAGS] & FIXED) == 0) - edgept = edgept->next; - edgefix0 = edgept; - - edgept = edgept->next; - while ((edgept->flags[FLAGS] & FIXED) == 0) - edgept = edgept->next; - edgefix1 = edgept; - - edgept = edgept->next; - while ((edgept->flags[FLAGS] & FIXED) == 0) - edgept = edgept->next; - edgefix2 = edgept; - - edgept = edgept->next; - while ((edgept->flags[FLAGS] & FIXED) == 0) - edgept = edgept->next; - edgefix3 = edgept; - - startfix = edgefix2; - - do { - if (fixed_count <= 3) - break; //already too few - point_diff (d12vec, edgefix1->pos, edgefix2->pos); - d12 = LENGTH (d12vec); - // TODO(rays) investigate this change: - // Only unfix a point if it is part of a low-curvature section - // of outline and the total angle change of the outlines is - // less than 90 degrees, ie the scalar product is positive. - // if (d12 <= gapmin && SCALAR(edgefix0->vec, edgefix2->vec) > 0) { - if (d12 <= gapmin) { - point_diff (d01vec, edgefix0->pos, edgefix1->pos); - d01 = LENGTH (d01vec); - point_diff (d23vec, edgefix2->pos, edgefix3->pos); - d23 = LENGTH (d23vec); - if (d01 > d23) { - edgefix2->flags[FLAGS] &= ~FIXED; - fixed_count--; - } - else { - edgefix1->flags[FLAGS] &= ~FIXED; - fixed_count--; - edgefix1 = edgefix2; - } - } - else { - edgefix0 = edgefix1; - edgefix1 = edgefix2; - } - edgefix2 = edgefix3; - edgept = edgept->next; - while ((edgept->flags[FLAGS] & FIXED) == 0) { - if (edgept == startfix) - stopped = 1; - edgept = edgept->next; - } - edgefix3 = edgept; - edgefix = edgefix2; - } - while ((edgefix != startfix) && (!stopped)); -} - - -//#pragma OPT_LEVEL 2 /*stop compiler bugs*/ - -/********************************************************************** - *poly2(startpt,area,path) applies a second approximation to the outline - *using the points which have been fixed by the first approximation* - **********************************************************************/ - -EDGEPT *poly2( //second poly - EDGEPT *startpt, /*start of loop */ - int area /*area of blob box */ - ) { - EDGEPT *edgept; /*current outline point */ - EDGEPT *loopstart; /*starting point */ - EDGEPT *linestart; /*start of line */ - int edgesum; /*correction count */ - - if (area < 1200) - area = 1200; /*minimum value */ - - loopstart = nullptr; /*not found it yet */ - edgept = startpt; /*start of loop */ - - do { - /*current point fixed */ - if (edgept->flags[FLAGS] & FIXED - /*and next not */ - && (edgept->next->flags[FLAGS] & FIXED) == 0) { - loopstart = edgept; /*start of repoly */ - break; - } - edgept = edgept->next; /*next point */ - } - while (edgept != startpt); /*until found or finished */ - - if (loopstart == nullptr && (startpt->flags[FLAGS] & FIXED) == 0) { - /*fixed start of loop */ - startpt->flags[FLAGS] |= FIXED; - loopstart = startpt; /*or start of loop */ - } - if (loopstart) { - do { - edgept = loopstart; /*first to do */ - do { - linestart = edgept; - edgesum = 0; /*sum of lengths */ - do { - /*sum lengths */ - edgesum += edgept->flags[RUNLENGTH]; - edgept = edgept->next; /*move on */ - } - while ((edgept->flags[FLAGS] & FIXED) == 0 - && edgept != loopstart && edgesum < 126); - if (poly_debug) - tprintf - ("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n", - linestart->pos.x, linestart->pos.y, linestart->flags[DIR], - linestart->vec.x, linestart->vec.y, edgesum, edgept->pos.x, - edgept->pos.y); - /*reapproximate */ - cutline(linestart, edgept, area); - - while ((edgept->next->flags[FLAGS] & FIXED) - && edgept != loopstart) - edgept = edgept->next; /*look for next non-fixed */ - } - /*do all the loop */ - while (edgept != loopstart); - edgesum = 0; - do { - if (edgept->flags[FLAGS] & FIXED) - edgesum++; - edgept = edgept->next; - } - //count fixed pts - while (edgept != loopstart); - if (edgesum < 3) - area /= 2; //must have 3 pts - } - while (edgesum < 3); - do { - linestart = edgept; - do { - edgept = edgept->next; - } - while ((edgept->flags[FLAGS] & FIXED) == 0); - linestart->next = edgept; - edgept->prev = linestart; - linestart->vec.x = edgept->pos.x - linestart->pos.x; - linestart->vec.y = edgept->pos.y - linestart->pos.y; - } - while (edgept != loopstart); - } - else - edgept = startpt; /*start of loop */ - - loopstart = edgept; /*new start */ - return loopstart; /*correct exit */ -} - - -/********************************************************************** - *cutline(first,last,area) straightens out a line by partitioning - *and joining the ends by a straight line* - **********************************************************************/ - -void cutline( //recursive refine - EDGEPT *first, /*ends of line */ - EDGEPT *last, - int area /*area of object */ - ) { - EDGEPT *edge; /*current edge */ - TPOINT vecsum; /*vector sum */ - int vlen; /*approx length of vecsum */ - TPOINT vec; /*accumulated vector */ - EDGEPT *maxpoint; /*worst point */ - int maxperp; /*max deviation */ - int perp; /*perp distance */ - int ptcount; /*no of points */ - int squaresum; /*sum of perps */ - - edge = first; /*start of line */ - if (edge->next == last) - return; /*simple line */ - - /*vector sum */ - vecsum.x = last->pos.x - edge->pos.x; - vecsum.y = last->pos.y - edge->pos.y; - if (vecsum.x == 0 && vecsum.y == 0) { - /*special case */ - vecsum.x = -edge->prev->vec.x; - vecsum.y = -edge->prev->vec.y; - } - /*absolute value */ - vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x; - if (vecsum.y > vlen) - vlen = vecsum.y; /*maximum */ - else if (-vecsum.y > vlen) - vlen = -vecsum.y; /*absolute value */ - - vec.x = edge->vec.x; /*accumulated vector */ - vec.y = edge->vec.y; - maxperp = 0; /*none yet */ - squaresum = ptcount = 0; - edge = edge->next; /*move to actual point */ - maxpoint = edge; /*in case there isn't one */ - do { - perp = CROSS (vec, vecsum); /*get perp distance */ - if (perp != 0) { - perp *= perp; /*squared deviation */ - } - squaresum += perp; /*sum squares */ - ptcount++; /*count points */ - if (poly_debug) - tprintf ("Cutline:Final perp=%d\n", perp); - if (perp > maxperp) { - maxperp = perp; - maxpoint = edge; /*find greatest deviation */ - } - vec.x += edge->vec.x; /*accumulate vectors */ - vec.y += edge->vec.y; - edge = edge->next; - } - while (edge != last); /*test all line */ - - perp = LENGTH (vecsum); - ASSERT_HOST (perp != 0); - - if (maxperp < 256 * INT16_MAX) { - maxperp <<= 8; - maxperp /= perp; /*true max perp */ - } - else { - maxperp /= perp; - maxperp <<= 8; /*avoid overflow */ - } - if (squaresum < 256 * INT16_MAX) - /*mean squared perp */ - perp = (squaresum << 8) / (perp * ptcount); - else - /*avoid overflow */ - perp = (squaresum / perp << 8) / ptcount; - - if (poly_debug) - tprintf ("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n", - area, maxperp / 256.0, maxperp * 200.0 / area, - perp / 256.0, perp * 300.0 / area); - if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) { - maxpoint->flags[FLAGS] |= FIXED; - /*partitions */ - cutline(first, maxpoint, area); - cutline(maxpoint, last, area); - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/polyaprx.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/polyaprx.h deleted file mode 100644 index 2542d67b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/polyaprx.h +++ /dev/null @@ -1,45 +0,0 @@ -/********************************************************************** - * File: polyaprx.h (Formerly polygon.h) - * Description: Code for polygonal approximation from old edgeprog. - * Author: Ray Smith - * Created: Thu Nov 25 11:42:04 GMT 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef POLYAPRX_H -#define POLYAPRX_H - -class C_OUTLINE; -struct EDGEPT; -struct TESSLINE; - -// convert a chain-coded input to the old OUTLINE approximation -TESSLINE* ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline); -EDGEPT *edgesteps_to_edgepts ( //convert outline -C_OUTLINE * c_outline, //input -EDGEPT edgepts[] //output is array -); -void fix2( //polygonal approx - EDGEPT *start, /*loop to approimate */ - int area); -EDGEPT *poly2( //second poly - EDGEPT *startpt, /*start of loop */ - int area /*area of blob box */ - ); -void cutline( //recursive refine - EDGEPT *first, /*ends of line */ - EDGEPT *last, - int area /*area of object */ - ); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/polyblk.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/polyblk.cpp deleted file mode 100644 index e3585d45..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/polyblk.cpp +++ /dev/null @@ -1,416 +0,0 @@ -/********************************************************************** - * File: polyblk.cpp (Formerly poly_block.c) - * Description: Polygonal blocks - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "polyblk.h" -#include -#include -#include -#include // std::unique_ptr -#include "elst.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#define INTERSECTING INT16_MAX - -int lessthan(const void *first, const void *second); - -POLY_BLOCK::POLY_BLOCK(ICOORDELT_LIST *points, PolyBlockType t) { - ICOORDELT_IT v = &vertices; - - vertices.clear(); - v.move_to_first(); - v.add_list_before(points); - compute_bb(); - type = t; -} - -// Initialize from box coordinates. -POLY_BLOCK::POLY_BLOCK(const TBOX& tbox, PolyBlockType t) { - vertices.clear(); - ICOORDELT_IT v = &vertices; - v.move_to_first(); - v.add_to_end(new ICOORDELT(tbox.left(), tbox.top())); - v.add_to_end(new ICOORDELT(tbox.left(), tbox.bottom())); - v.add_to_end(new ICOORDELT(tbox.right(), tbox.bottom())); - v.add_to_end(new ICOORDELT(tbox.right(), tbox.top())); - compute_bb(); - type = t; -} - -/** - * @name POLY_BLOCK::compute_bb - * - * Compute the bounding box from the outline points. - */ - -void POLY_BLOCK::compute_bb() { //constructor - ICOORD ibl, itr; //integer bb - ICOORD botleft; //bounding box - ICOORD topright; - ICOORD pos; //current pos; - ICOORDELT_IT pts = &vertices; //iterator - - botleft = *pts.data (); - topright = botleft; - do { - pos = *pts.data (); - if (pos.x () < botleft.x ()) - //get bounding box - botleft = ICOORD (pos.x (), botleft.y ()); - if (pos.y () < botleft.y ()) - botleft = ICOORD (botleft.x (), pos.y ()); - if (pos.x () > topright.x ()) - topright = ICOORD (pos.x (), topright.y ()); - if (pos.y () > topright.y ()) - topright = ICOORD (topright.x (), pos.y ()); - pts.forward (); - } - while (!pts.at_first ()); - ibl = ICOORD (botleft.x (), botleft.y ()); - itr = ICOORD (topright.x (), topright.y ()); - box = TBOX (ibl, itr); -} - - -/** - * @name POLY_BLOCK::winding_number - * - * Return the winding number of the outline around the given point. - * @param point point to wind around - */ - -int16_t POLY_BLOCK::winding_number(const ICOORD &point) { - int16_t count; //winding count - ICOORD pt; //current point - ICOORD vec; //point to current point - ICOORD vvec; //current point to next point - int32_t cross; //cross product - ICOORDELT_IT it = &vertices; //iterator - - count = 0; - do { - pt = *it.data (); - vec = pt - point; - vvec = *it.data_relative (1) - pt; - //crossing the line - if (vec.y () <= 0 && vec.y () + vvec.y () > 0) { - cross = vec * vvec; //cross product - if (cross > 0) - count++; //crossing right half - else if (cross == 0) - return INTERSECTING; //going through point - } - else if (vec.y () > 0 && vec.y () + vvec.y () <= 0) { - cross = vec * vvec; - if (cross < 0) - count--; //crossing back - else if (cross == 0) - return INTERSECTING; //illegal - } - else if (vec.y () == 0 && vec.x () == 0) - return INTERSECTING; - it.forward (); - } - while (!it.at_first ()); - return count; //winding number -} - - -/// @return true if other is inside this. -bool POLY_BLOCK::contains(POLY_BLOCK *other) { - int16_t count; // winding count - ICOORDELT_IT it = &vertices; // iterator - ICOORD vertex; - - if (!box.overlap (*(other->bounding_box ()))) - return false; // can't be contained - - /* check that no vertex of this is inside other */ - - do { - vertex = *it.data (); - // get winding number - count = other->winding_number (vertex); - if (count != INTERSECTING) - if (count != 0) - return false; - it.forward (); - } - while (!it.at_first ()); - - /* check that all vertices of other are inside this */ - - //switch lists - it.set_to_list (other->points ()); - do { - vertex = *it.data (); - //try other way round - count = winding_number (vertex); - if (count != INTERSECTING) - if (count == 0) - return false; - it.forward (); - } - while (!it.at_first ()); - return true; -} - - -/** - * @name POLY_BLOCK::rotate - * - * Rotate the POLY_BLOCK. - * @param rotation cos, sin of angle - */ - -void POLY_BLOCK::rotate(FCOORD rotation) { - FCOORD pos; //current pos; - ICOORDELT *pt; //current point - ICOORDELT_IT pts = &vertices; //iterator - - do { - pt = pts.data (); - pos.set_x (pt->x ()); - pos.set_y (pt->y ()); - pos.rotate (rotation); - pt->set_x(static_cast(floor(pos.x() + 0.5))); - pt->set_y(static_cast(floor(pos.y() + 0.5))); - pts.forward (); - } - while (!pts.at_first ()); - compute_bb(); -} - -/** - * @name POLY_BLOCK::reflect_in_y_axis - * - * Reflect the coords of the polygon in the y-axis. (Flip the sign of x.) - */ - -void POLY_BLOCK::reflect_in_y_axis() { - ICOORDELT *pt; // current point - ICOORDELT_IT pts = &vertices; // Iterator. - - do { - pt = pts.data(); - pt->set_x(-pt->x()); - pts.forward(); - } - while (!pts.at_first()); - compute_bb(); -} - - -/** - * POLY_BLOCK::move - * - * Move the POLY_BLOCK. - * @param shift x,y translation vector - */ - -void POLY_BLOCK::move(ICOORD shift) { - ICOORDELT *pt; //current point - ICOORDELT_IT pts = &vertices; //iterator - - do { - pt = pts.data (); - *pt += shift; - pts.forward (); - } - while (!pts.at_first ()); - compute_bb(); -} - - -#ifndef GRAPHICS_DISABLED -void POLY_BLOCK::plot(ScrollView* window, int32_t num) { - ICOORDELT_IT v = &vertices; - - window->Pen(ColorForPolyBlockType(type)); - - v.move_to_first (); - - if (num > 0) { - window->TextAttributes("Times", 80, false, false, false); - char temp_buff[34]; -#if !defined(_WIN32) || defined(__MINGW32__) - snprintf(temp_buff, sizeof(temp_buff), "%" PRId32, num); -#else - ltoa (num, temp_buff, 10); -#endif - window->Text(v.data ()->x (), v.data ()->y (), temp_buff); - } - - window->SetCursor(v.data ()->x (), v.data ()->y ()); - for (v.mark_cycle_pt (); !v.cycled_list (); v.forward ()) { - window->DrawTo(v.data ()->x (), v.data ()->y ()); - } - v.move_to_first (); - window->DrawTo(v.data ()->x (), v.data ()->y ()); -} - - -void POLY_BLOCK::fill(ScrollView* window, ScrollView::Color colour) { - int16_t y; - int16_t width; - PB_LINE_IT *lines; - ICOORDELT_IT s_it; - - lines = new PB_LINE_IT (this); - window->Pen(colour); - - for (y = this->bounding_box ()->bottom (); - y <= this->bounding_box ()->top (); y++) { - const std::unique_ptr segments( - lines->get_line(y)); - if (!segments->empty ()) { - s_it.set_to_list(segments.get()); - for (s_it.mark_cycle_pt (); !s_it.cycled_list (); s_it.forward ()) { - // Note different use of ICOORDELT, x coord is x coord of pixel - // at the start of line segment, y coord is length of line segment - // Last pixel is start pixel + length. - width = s_it.data ()->y (); - window->SetCursor(s_it.data ()->x (), y); - window->DrawTo(s_it.data()->x() + static_cast(width), y); - } - } - } - - delete lines; -} -#endif - - -/// @return true if the polygons of other and this overlap. -bool POLY_BLOCK::overlap(POLY_BLOCK *other) { - int16_t count; // winding count - ICOORDELT_IT it = &vertices; // iterator - ICOORD vertex; - - if (!box.overlap(*(other->bounding_box()))) - return false; // can't be any overlap. - - /* see if a vertex of this is inside other */ - - do { - vertex = *it.data (); - // get winding number - count = other->winding_number (vertex); - if (count != INTERSECTING) - if (count != 0) - return true; - it.forward (); - } - while (!it.at_first ()); - - /* see if a vertex of other is inside this */ - - // switch lists - it.set_to_list (other->points ()); - do { - vertex = *it.data(); - // try other way round - count = winding_number (vertex); - if (count != INTERSECTING) - if (count != 0) - return true; - it.forward (); - } - while (!it.at_first ()); - return false; -} - - -ICOORDELT_LIST *PB_LINE_IT::get_line(int16_t y) { - ICOORDELT_IT v, r; - ICOORDELT_LIST *result; - ICOORDELT *x, *current, *previous; - float fy = y + 0.5f; - result = new ICOORDELT_LIST (); - r.set_to_list (result); - v.set_to_list (block->points ()); - - for (v.mark_cycle_pt (); !v.cycled_list (); v.forward ()) { - if (((v.data_relative (-1)->y () > y) && (v.data ()->y () <= y)) - || ((v.data_relative (-1)->y () <= y) && (v.data ()->y () > y))) { - previous = v.data_relative (-1); - current = v.data (); - float fx = 0.5f + previous->x() + - (current->x() - previous->x()) * (fy - previous->y()) / - (current->y() - previous->y()); - x = new ICOORDELT(static_cast(fx), 0); - r.add_to_end (x); - } - } - - if (!r.empty ()) { - r.sort (lessthan); - for (r.mark_cycle_pt (); !r.cycled_list (); r.forward ()) - x = r.data (); - for (r.mark_cycle_pt (); !r.cycled_list (); r.forward ()) { - r.data ()->set_y (r.data_relative (1)->x () - r.data ()->x ()); - r.forward (); - delete (r.extract ()); - } - } - - return result; -} - - -int lessthan(const void *first, const void *second) { - const ICOORDELT *p1 = *reinterpret_cast(first); - const ICOORDELT *p2 = *reinterpret_cast(second); - - if (p1->x () < p2->x ()) - return (-1); - else if (p1->x () > p2->x ()) - return (1); - else - return (0); -} - -#ifndef GRAPHICS_DISABLED -/// Returns a color to draw the given type. -ScrollView::Color POLY_BLOCK::ColorForPolyBlockType(PolyBlockType type) { - // Keep kPBColors in sync with PolyBlockType. - const ScrollView::Color kPBColors[PT_COUNT] = { - ScrollView::WHITE, // Type is not yet known. Keep as the 1st element. - ScrollView::BLUE, // Text that lives inside a column. - ScrollView::CYAN, // Text that spans more than one column. - ScrollView::MEDIUM_BLUE, // Text that is in a cross-column pull-out region. - ScrollView::AQUAMARINE, // Partition belonging to an equation region. - ScrollView::SKY_BLUE, // Partition belonging to an inline equation region. - ScrollView::MAGENTA, // Partition belonging to a table region. - ScrollView::GREEN, // Text-line runs vertically. - ScrollView::LIGHT_BLUE, // Text that belongs to an image. - ScrollView::RED, // Image that lives inside a column. - ScrollView::YELLOW, // Image that spans more than one column. - ScrollView::ORANGE, // Image in a cross-column pull-out region. - ScrollView::BROWN, // Horizontal Line. - ScrollView::DARK_GREEN, // Vertical Line. - ScrollView::GREY // Lies outside of any column. - }; - if (type >= 0 && type < PT_COUNT) { - return kPBColors[type]; - } - return ScrollView::WHITE; -} -#endif // GRAPHICS_DISABLED diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/polyblk.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/polyblk.h deleted file mode 100644 index 62baa222..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/polyblk.h +++ /dev/null @@ -1,110 +0,0 @@ -/********************************************************************** - * File: polyblk.h (Formerly poly_block.h) - * Description: Polygonal blocks - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef POLYBLK_H -#define POLYBLK_H - -#include "publictypes.h" -#include "elst.h" -#include "points.h" -#include "rect.h" -#include "scrollview.h" - -class DLLSYM POLY_BLOCK { - public: - POLY_BLOCK() = default; - // Initialize from box coordinates. - POLY_BLOCK(const TBOX& tbox, PolyBlockType type); - POLY_BLOCK(ICOORDELT_LIST *points, PolyBlockType type); - ~POLY_BLOCK () = default; - - TBOX *bounding_box() { // access function - return &box; - } - - ICOORDELT_LIST *points() { // access function - return &vertices; - } - - void compute_bb(); - - PolyBlockType isA() const { - return type; - } - - bool IsText() const { - return PTIsTextType(type); - } - - // Rotate about the origin by the given rotation. (Analogous to - // multiplying by a complex number. - void rotate(FCOORD rotation); - // Reflect the coords of the polygon in the y-axis. (Flip the sign of x.) - void reflect_in_y_axis(); - // Move by adding shift to all coordinates. - void move(ICOORD shift); - - void plot(ScrollView* window, int32_t num); - - #ifndef GRAPHICS_DISABLED - void fill(ScrollView* window, ScrollView::Color colour); - #endif // GRAPHICS_DISABLED - - // Returns true if other is inside this. - bool contains(POLY_BLOCK *other); - - // Returns true if the polygons of other and this overlap. - bool overlap(POLY_BLOCK *other); - - // Returns the winding number of this around the test_pt. - // Positive for anticlockwise, negative for clockwise, and zero for - // test_pt outside this. - int16_t winding_number(const ICOORD &test_pt); - - #ifndef GRAPHICS_DISABLED - // Static utility functions to handle the PolyBlockType. - // Returns a color to draw the given type. - static ScrollView::Color ColorForPolyBlockType(PolyBlockType type); - #endif // GRAPHICS_DISABLED - - private: - ICOORDELT_LIST vertices; // vertices - TBOX box; // bounding box - PolyBlockType type; // Type of this region. -}; - -// Class to iterate the scanlines of a polygon. -class DLLSYM PB_LINE_IT { - public: - PB_LINE_IT(POLY_BLOCK *blkptr) { - block = blkptr; - } - - void set_to_block(POLY_BLOCK * blkptr) { - block = blkptr; - } - - // Returns a list of runs of pixels for the given y coord. - // Each element of the returned list is the start (x) and extent(y) of - // a run inside the region. - // Delete the returned list after use. - ICOORDELT_LIST *get_line(int16_t y); - - private: - POLY_BLOCK * block; -}; -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/publictypes.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/publictypes.cpp deleted file mode 100644 index 47a18009..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/publictypes.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: publictypes.cpp -// Description: Types used in both the API and internally -// Author: Ray Smith -// Created: Wed Mar 03 11:17:09 PST 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "publictypes.h" - -/** String name for each block type. Keep in sync with PolyBlockType. */ -const char* kPolyBlockNames[] = { - "Unknown", - "Flowing Text", - "Heading Text", - "Pullout Text", - "Equation", - "Inline Equation", - "Table", - "Vertical Text", - "Caption Text", - "Flowing Image", - "Heading Image", - "Pullout Image", - "Horizontal Line", - "Vertical Line", - "Noise", - "" // End marker for testing that sizes match. -}; diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/publictypes.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/publictypes.h deleted file mode 100644 index 775af291..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/publictypes.h +++ /dev/null @@ -1,286 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: publictypes.h -// Description: Types used in both the API and internally -// Author: Ray Smith -// Created: Wed Mar 03 09:22:53 PST 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_ -#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_ - -// This file contains types that are used both by the API and internally -// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic -// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT. -// Restated: It is OK for low-level Tesseract files to include publictypes.h, -// but not for the low-level tesseract code to include top-level API code. -// This file should not use other Tesseract types, as that would drag -// their includes into the API-level. -// API-level code should include apitypes.h in preference to this file. - -/** Number of printers' points in an inch. The unit of the pointsize return. */ -constexpr int kPointsPerInch = 72; -/** - * Minimum believable resolution. Used as a default if there is no other - * information, as it is safer to under-estimate than over-estimate. - */ -constexpr int kMinCredibleResolution = 70; -/** Maximum believable resolution. */ -constexpr int kMaxCredibleResolution = 2400; -/** - * Ratio between median blob size and likely resolution. Used to estimate - * resolution when none is provided. This is basically 1/usual text size in - * inches. */ -constexpr int kResolutionEstimationFactor = 10; - -/** - * Possible types for a POLY_BLOCK or ColPartition. - * Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions - * below, as well as kPolyBlockNames in publictypes.cpp. - * Used extensively by ColPartition, and POLY_BLOCK. -*/ -enum PolyBlockType { - PT_UNKNOWN, // Type is not yet known. Keep as the first element. - PT_FLOWING_TEXT, // Text that lives inside a column. - PT_HEADING_TEXT, // Text that spans more than one column. - PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region. - PT_EQUATION, // Partition belonging to an equation region. - PT_INLINE_EQUATION, // Partition has inline equation. - PT_TABLE, // Partition belonging to a table region. - PT_VERTICAL_TEXT, // Text-line runs vertically. - PT_CAPTION_TEXT, // Text that belongs to an image. - PT_FLOWING_IMAGE, // Image that lives inside a column. - PT_HEADING_IMAGE, // Image that spans more than one column. - PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region. - PT_HORZ_LINE, // Horizontal Line. - PT_VERT_LINE, // Vertical Line. - PT_NOISE, // Lies outside of any column. - PT_COUNT -}; - -/** Returns true if PolyBlockType is of horizontal line type */ -inline bool PTIsLineType(PolyBlockType type) { - return type == PT_HORZ_LINE || type == PT_VERT_LINE; -} -/** Returns true if PolyBlockType is of image type */ -inline bool PTIsImageType(PolyBlockType type) { - return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE || - type == PT_PULLOUT_IMAGE; -} -/** Returns true if PolyBlockType is of text type */ -inline bool PTIsTextType(PolyBlockType type) { - return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT || - type == PT_PULLOUT_TEXT || type == PT_TABLE || - type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT || - type == PT_INLINE_EQUATION; -} -// Returns true if PolyBlockType is of pullout(inter-column) type -inline bool PTIsPulloutType(PolyBlockType type) { - return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT; -} - -/** String name for each block type. Keep in sync with PolyBlockType. */ -extern const char* kPolyBlockNames[]; - -namespace tesseract { -/** - * +------------------+ Orientation Example: - * | 1 Aaaa Aaaa Aaaa | ==================== - * | Aaa aa aaa aa | To left is a diagram of some (1) English and - * | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit. - * | 2 | - * | ####### c c C | Upright Latin characters are represented as A and a. - * | ####### c c c | '<' represents a latin character rotated - * | < ####### c c c | anti-clockwise 90 degrees. - * | < ####### c c | - * | < ####### . c | Upright Chinese characters are represented C and c. - * | 3 ####### c | - * +------------------+ NOTA BENE: enum values here should match goodoc.proto - - * If you orient your head so that "up" aligns with Orientation, - * then the characters will appear "right side up" and readable. - * - * In the example above, both the English and Chinese paragraphs are oriented - * so their "up" is the top of the page (page up). The photo credit is read - * with one's head turned leftward ("up" is to page left). - * - * The values of this enum match the convention of Tesseract's osdetect.h -*/ -enum Orientation { - ORIENTATION_PAGE_UP = 0, - ORIENTATION_PAGE_RIGHT = 1, - ORIENTATION_PAGE_DOWN = 2, - ORIENTATION_PAGE_LEFT = 3, -}; - -/** - * The grapheme clusters within a line of text are laid out logically - * in this direction, judged when looking at the text line rotated so that - * its Orientation is "page up". - * - * For English text, the writing direction is left-to-right. For the - * Chinese text in the above example, the writing direction is top-to-bottom. -*/ -enum WritingDirection { - WRITING_DIRECTION_LEFT_TO_RIGHT = 0, - WRITING_DIRECTION_RIGHT_TO_LEFT = 1, - WRITING_DIRECTION_TOP_TO_BOTTOM = 2, -}; - -/** - * The text lines are read in the given sequence. - * - * In English, the order is top-to-bottom. - * In Chinese, vertical text lines are read right-to-left. Mongolian is - * written in vertical columns top to bottom like Chinese, but the lines - * order left-to right. - * - * Note that only some combinations make sense. For example, - * WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM -*/ -enum TextlineOrder { - TEXTLINE_ORDER_LEFT_TO_RIGHT = 0, - TEXTLINE_ORDER_RIGHT_TO_LEFT = 1, - TEXTLINE_ORDER_TOP_TO_BOTTOM = 2, -}; - -/** - * Possible modes for page layout analysis. These *must* be kept in order - * of decreasing amount of layout analysis to be done, except for OSD_ONLY, - * so that the inequality test macros below work. -*/ -enum PageSegMode { - PSM_OSD_ONLY, ///< Orientation and script detection only. - PSM_AUTO_OSD, ///< Automatic page segmentation with orientation and - ///< script detection. (OSD) - PSM_AUTO_ONLY, ///< Automatic page segmentation, but no OSD, or OCR. - PSM_AUTO, ///< Fully automatic page segmentation, but no OSD. - PSM_SINGLE_COLUMN, ///< Assume a single column of text of variable sizes. - PSM_SINGLE_BLOCK_VERT_TEXT, ///< Assume a single uniform block of vertically - ///< aligned text. - PSM_SINGLE_BLOCK, ///< Assume a single uniform block of text. (Default.) - PSM_SINGLE_LINE, ///< Treat the image as a single text line. - PSM_SINGLE_WORD, ///< Treat the image as a single word. - PSM_CIRCLE_WORD, ///< Treat the image as a single word in a circle. - PSM_SINGLE_CHAR, ///< Treat the image as a single character. - PSM_SPARSE_TEXT, ///< Find as much text as possible in no particular order. - PSM_SPARSE_TEXT_OSD, ///< Sparse text with orientation and script det. - PSM_RAW_LINE, ///< Treat the image as a single text line, bypassing - ///< hacks that are Tesseract-specific. - - PSM_COUNT ///< Number of enum entries. -}; - -/** - * Inline functions that act on a PageSegMode to determine whether components of - * layout analysis are enabled. - * *Depend critically on the order of elements of PageSegMode.* - * NOTE that arg is an int for compatibility with INT_PARAM. -*/ -inline bool PSM_OSD_ENABLED(int pageseg_mode) { - return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD; -} -inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) { - return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD; -} -inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) { - return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO; -} -inline bool PSM_SPARSE(int pageseg_mode) { - return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD; -} -inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) { - return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN; -} -inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) { - return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK; -} -inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) { - return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) || - pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD; -} - -/** - * enum of the elements of the page hierarchy, used in ResultIterator - * to provide functions that operate on each level without having to - * have 5x as many functions. -*/ -enum PageIteratorLevel { - RIL_BLOCK, // Block of text/image/separator line. - RIL_PARA, // Paragraph within a block. - RIL_TEXTLINE, // Line within a paragraph. - RIL_WORD, // Word within a textline. - RIL_SYMBOL // Symbol/character within a word. -}; - -/** - * JUSTIFICATION_UNKNOWN - * The alignment is not clearly one of the other options. This could happen - * for example if there are only one or two lines of text or the text looks - * like source code or poetry. - * - * NOTA BENE: Fully justified paragraphs (text aligned to both left and right - * margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text - * is written with a left-to-right script and with JUSTIFICATION_RIGHT if - * their text is written in a right-to-left script. - * - * Interpretation for text read in vertical lines: - * "Left" is wherever the starting reading position is. - * - * JUSTIFICATION_LEFT - * Each line, except possibly the first, is flush to the same left tab stop. - * - * JUSTIFICATION_CENTER - * The text lines of the paragraph are centered about a line going - * down through their middle of the text lines. - * - * JUSTIFICATION_RIGHT - * Each line, except possibly the first, is flush to the same right tab stop. - */ -enum ParagraphJustification { - JUSTIFICATION_UNKNOWN, - JUSTIFICATION_LEFT, - JUSTIFICATION_CENTER, - JUSTIFICATION_RIGHT, -}; - -/** - * When Tesseract/Cube is initialized we can choose to instantiate/load/run - * only the Tesseract part, only the Cube part or both along with the combiner. - * The preference of which engine to use is stored in tessedit_ocr_engine_mode. - * - * ATTENTION: When modifying this enum, please make sure to make the - * appropriate changes to all the enums mirroring it (e.g. OCREngine in - * cityblock/workflow/detection/detection_storage.proto). Such enums will - * mention the connection to OcrEngineMode in the comments. -*/ -enum OcrEngineMode { - OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated - OEM_LSTM_ONLY, // Run just the LSTM line recognizer. - OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback - // to Tesseract when things get difficult. - // deprecated - OEM_DEFAULT, // Specify this mode when calling init_*(), - // to indicate that any of the above modes - // should be automatically inferred from the - // variables in the language-specific config, - // command-line configs, or if not specified - // in any of the above should be set to the - // default OEM_TESSERACT_ONLY. - OEM_COUNT // Number of OEMs -}; - -} // namespace tesseract. - -#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/quadlsq.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/quadlsq.cpp deleted file mode 100644 index 7501a9cd..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/quadlsq.cpp +++ /dev/null @@ -1,143 +0,0 @@ -/********************************************************************** - * File: quadlsq.cpp (Formerly qlsq.c) - * Description: Code for least squares approximation of quadratics. - * Author: Ray Smith - * Created: Wed Oct 6 15:14:23 BST 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include -#include -#include "quadlsq.h" -#include "tprintf.h" - -// Minimum variance in least squares before backing off to a lower degree. -const double kMinVariance = 1.0 / 1024; - -/********************************************************************** - * QLSQ::clear - * - * Function to initialize a QLSQ. - **********************************************************************/ - -void QLSQ::clear() { // initialize - a = 0.0; - b = 0.0; - c = 0.0; - n = 0; // No elements. - sigx = 0.0; // Zero accumulators. - sigy = 0.0; - sigxx = 0.0; - sigxy = 0.0; - sigyy = 0.0; - sigxxx = 0.0; - sigxxy = 0.0; - sigxxxx = 0.0; -} - - -/********************************************************************** - * QLSQ::add - * - * Add an element to the accumulator. - **********************************************************************/ - -void QLSQ::add(double x, double y) { - n++; // Count elements. - sigx += x; // Update accumulators. - sigy += y; - sigxx += x * x; - sigxy += x * y; - sigyy += y * y; - sigxxx += static_cast(x) * x * x; - sigxxy += static_cast(x) * x * y; - sigxxxx += static_cast(x) * x * x * x; -} - - -/********************************************************************** - * QLSQ::remove - * - * Delete an element from the accumulator. - **********************************************************************/ - -void QLSQ::remove(double x, double y) { - if (n <= 0) { - tprintf("Can't remove an element from an empty QLSQ accumulator!\n"); - return; - } - n--; // Count elements. - sigx -= x; // Update accumulators. - sigy -= y; - sigxx -= x * x; - sigxy -= x * y; - sigyy -= y * y; - sigxxx -= static_cast(x) * x * x; - sigxxy -= static_cast(x) * x * y; - sigxxxx -= static_cast(x) * x * x * x; -} - - -/********************************************************************** - * QLSQ::fit - * - * Fit the given degree of polynomial and store the result. - * This creates a quadratic of the form axx + bx + c, but limited to - * the given degree. - **********************************************************************/ - -void QLSQ::fit(int degree) { - long double x_variance = static_cast(sigxx) * n - - static_cast(sigx) * sigx; - - // Note: for computational efficiency, we do not normalize the variance, - // covariance and cube variance here as they are in the same order in both - // nominators and denominators. However, we need be careful in value range - // check. - if (x_variance < kMinVariance * n * n || degree < 1 || n < 2) { - // We cannot calculate b reliably so forget a and b, and just work on c. - a = b = 0.0; - if (n >= 1 && degree >= 0) { - c = sigy / n; - } else { - c = 0.0; - } - return; - } - long double top96 = 0.0; // Accurate top. - long double bottom96 = 0.0; // Accurate bottom. - long double cubevar = sigxxx * n - static_cast(sigxx) * sigx; - long double covariance = static_cast(sigxy) * n - - static_cast(sigx) * sigy; - - if (n >= 4 && degree >= 2) { - top96 = cubevar * covariance; - top96 += x_variance * (static_cast(sigxx) * sigy - sigxxy * n); - - bottom96 = cubevar * cubevar; - bottom96 -= x_variance * - (sigxxxx * n - static_cast(sigxx) * sigxx); - } - if (bottom96 >= kMinVariance * n * n * n * n) { - // Denominators looking good - a = top96 / bottom96; - top96 = covariance - cubevar * a; - b = top96 / x_variance; - } else { - // Forget a, and concentrate on b. - a = 0.0; - b = covariance / x_variance; - } - c = (sigy - a * sigxx - b * sigx) / n; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/quadlsq.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/quadlsq.h deleted file mode 100644 index a10cedf8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/quadlsq.h +++ /dev/null @@ -1,67 +0,0 @@ -/********************************************************************** - * File: quadlsq.h (Formerly qlsq.h) - * Description: Code for least squares approximation of quadratics. - * Author: Ray Smith - * Created: Wed Oct 6 15:14:23 BST 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef QUADLSQ_H -#define QUADLSQ_H - -#include "points.h" - -class QLSQ -{ - public: - QLSQ() { //constructor - clear(); //set to zeros - } - void clear(); //initialize - - void add( //add element - double x, //coords to add - double y); - void remove( //delete element - double x, //coords to delete - double y); - int32_t count() { //no of elements - return n; - } - - void fit( //fit the given - int degree); //return actual - double get_a() { //get x squard - return a; - } - double get_b() { //get x squard - return b; - } - double get_c() { //get x squard - return c; - } - - private: - int32_t n; //no of elements - double a, b, c; //result - double sigx; //sum of x - double sigy; //sum of y - double sigxx; //sum x squared - double sigxy; //sum of xy - double sigyy; //sum y squared - long double sigxxx; //sum x cubed - long double sigxxy; //sum xsquared y - long double sigxxxx; //sum x fourth -}; -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/quadratc.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/quadratc.h deleted file mode 100644 index 19749401..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/quadratc.h +++ /dev/null @@ -1,62 +0,0 @@ -/********************************************************************** - * File: quadratc.h (Formerly quadrtic.h) - * Description: Code for the QUAD_COEFFS class. - * Author: Ray Smith - * Created: Tue Oct 08 17:24:40 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef QUADRATC_H -#define QUADRATC_H - -#include "points.h" - -class QUAD_COEFFS -{ - public: - QUAD_COEFFS() = default; - QUAD_COEFFS( //constructor - double xsq, //coefficients - float x, - float constant) { - a = xsq; - b = x; - c = constant; - } - - float y( //evaluate - float x) const { //at x - return (float) ((a * x + b) * x + c); - } - - void move( // reposition word - ICOORD vec) { // by vector - /************************************************************ - y - q = a (x - p)^2 + b (x - p) + c - y - q = ax^2 - 2apx + ap^2 + bx - bp + c - y = ax^2 + (b - 2ap)x + (c - bp + ap^2 + q) - ************************************************************/ - int16_t p = vec.x (); - int16_t q = vec.y (); - - c = (float) (c - b * p + a * p * p + q); - b = (float) (b - 2 * a * p); - } - - double a; //x squared - float b; //x - float c; //constant - private: -}; -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/quspline.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/quspline.cpp deleted file mode 100644 index a3e82ba3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/quspline.cpp +++ /dev/null @@ -1,409 +0,0 @@ -/********************************************************************** - * File: quspline.cpp (Formerly qspline.c) - * Description: Code for the QSPLINE class. - * Author: Ray Smith - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "quspline.h" -#include "allheaders.h" // for pixRenderPolyline, pixGetDepth, pixGetHeight -#include "pix.h" // for L_CLEAR_PIXELS, L_SET_PIXELS, Pix (ptr only) -#include "points.h" // for ICOORD -#include "quadlsq.h" // for QLSQ -#include "quadratc.h" // for QUAD_COEFFS - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#define QSPLINE_PRECISION 16 //no of steps to draw - -/********************************************************************** - * QSPLINE::QSPLINE - * - * Constructor to build a QSPLINE given the components used in the old code. - **********************************************************************/ - -QSPLINE::QSPLINE( //constructor - int32_t count, //no of segments - int32_t *xstarts, //start coords - double *coeffs //coefficients - ) { - int32_t index; //segment index - - //get memory - xcoords = new int32_t[count + 1]; - quadratics = new QUAD_COEFFS[count]; - segments = count; - for (index = 0; index < segments; index++) { - //copy them - xcoords[index] = xstarts[index]; - quadratics[index] = QUAD_COEFFS (coeffs[index * 3], - coeffs[index * 3 + 1], - coeffs[index * 3 + 2]); - } - //right edge - xcoords[index] = xstarts[index]; -} - - -/********************************************************************** - * QSPLINE::QSPLINE - * - * Constructor to build a QSPLINE by appproximation of points. - **********************************************************************/ - -QSPLINE::QSPLINE ( //constructor -int xstarts[], //spline boundaries -int segcount, //no of segments -int xpts[], //points to fit -int ypts[], int pointcount, //no of pts -int degree //fit required -) { - int pointindex; /*no along text line */ - int segment; /*segment no */ - int32_t *ptcounts; //no in each segment - QLSQ qlsq; /*accumulator */ - - segments = segcount; - xcoords = new int32_t[segcount + 1]; - ptcounts = new int32_t[segcount + 1]; - quadratics = new QUAD_COEFFS[segcount]; - memmove (xcoords, xstarts, (segcount + 1) * sizeof (int32_t)); - ptcounts[0] = 0; /*none in any yet */ - for (segment = 0, pointindex = 0; pointindex < pointcount; pointindex++) { - while (segment < segcount && xpts[pointindex] >= xstarts[segment]) { - segment++; /*try next segment */ - /*cumulative counts */ - ptcounts[segment] = ptcounts[segment - 1]; - } - ptcounts[segment]++; /*no in previous partition */ - } - while (segment < segcount) { - segment++; - /*zero the rest */ - ptcounts[segment] = ptcounts[segment - 1]; - } - - for (segment = 0; segment < segcount; segment++) { - qlsq.clear (); - /*first blob */ - pointindex = ptcounts[segment]; - if (pointindex > 0 - && xpts[pointindex] != xpts[pointindex - 1] - && xpts[pointindex] != xstarts[segment]) - qlsq.add (xstarts[segment], - ypts[pointindex - 1] - + (ypts[pointindex] - ypts[pointindex - 1]) - * (xstarts[segment] - xpts[pointindex - 1]) - / (xpts[pointindex] - xpts[pointindex - 1])); - for (; pointindex < ptcounts[segment + 1]; pointindex++) { - qlsq.add (xpts[pointindex], ypts[pointindex]); - } - if (pointindex > 0 && pointindex < pointcount - && xpts[pointindex] != xstarts[segment + 1]) - qlsq.add (xstarts[segment + 1], - ypts[pointindex - 1] - + (ypts[pointindex] - ypts[pointindex - 1]) - * (xstarts[segment + 1] - xpts[pointindex - 1]) - / (xpts[pointindex] - xpts[pointindex - 1])); - qlsq.fit (degree); - quadratics[segment].a = qlsq.get_a (); - quadratics[segment].b = qlsq.get_b (); - quadratics[segment].c = qlsq.get_c (); - } - delete[] ptcounts; -} - - -/********************************************************************** - * QSPLINE::QSPLINE - * - * Constructor to build a QSPLINE from another. - **********************************************************************/ - -QSPLINE::QSPLINE( //constructor - const QSPLINE &src) { - segments = 0; - xcoords = nullptr; - quadratics = nullptr; - *this = src; -} - - -/********************************************************************** - * QSPLINE::~QSPLINE - * - * Destroy a QSPLINE. - **********************************************************************/ - -QSPLINE::~QSPLINE () { - delete[] xcoords; - delete[] quadratics; -} - - -/********************************************************************** - * QSPLINE::operator= - * - * Copy a QSPLINE - **********************************************************************/ - -QSPLINE & QSPLINE::operator= ( //assignment -const QSPLINE & source) { - delete[] xcoords; - delete[] quadratics; - - segments = source.segments; - xcoords = new int32_t[segments + 1]; - quadratics = new QUAD_COEFFS[segments]; - memmove (xcoords, source.xcoords, (segments + 1) * sizeof (int32_t)); - memmove (quadratics, source.quadratics, segments * sizeof (QUAD_COEFFS)); - return *this; -} - - -/********************************************************************** - * QSPLINE::step - * - * Return the total of the step functions between the given coords. - **********************************************************************/ - -double QSPLINE::step( //find step functions - double x1, //between coords - double x2) { - int index1, index2; //indices of coords - double total; /*total steps */ - - index1 = spline_index (x1); - index2 = spline_index (x2); - total = 0; - while (index1 < index2) { - total += - (double) quadratics[index1 + 1].y ((float) xcoords[index1 + 1]); - total -= (double) quadratics[index1].y ((float) xcoords[index1 + 1]); - index1++; /*next segment */ - } - return total; /*total steps */ -} - - -/********************************************************************** - * QSPLINE::y - * - * Return the y value at the given x value. - **********************************************************************/ - -double QSPLINE::y( //evaluate - double x //coord to evaluate at - ) const { - int32_t index; //segment index - - index = spline_index (x); - return quadratics[index].y (x);//in correct segment -} - - -/********************************************************************** - * QSPLINE::spline_index - * - * Return the index to the largest xcoord not greater than x. - **********************************************************************/ - -int32_t QSPLINE::spline_index( //evaluate - double x //coord to evaluate at - ) const { - int32_t index; //segment index - int32_t bottom; //bottom of range - int32_t top; //top of range - - bottom = 0; - top = segments; - while (top - bottom > 1) { - index = (top + bottom) / 2; //centre of range - if (x >= xcoords[index]) - bottom = index; //new min - else - top = index; //new max - } - return bottom; -} - - -/********************************************************************** - * QSPLINE::move - * - * Reposition spline by vector - **********************************************************************/ - -void QSPLINE::move( // reposition spline - ICOORD vec // by vector - ) { - int32_t segment; //index of segment - int16_t x_shift = vec.x (); - - for (segment = 0; segment < segments; segment++) { - xcoords[segment] += x_shift; - quadratics[segment].move (vec); - } - xcoords[segment] += x_shift; -} - - -/********************************************************************** - * QSPLINE::overlap - * - * Return TRUE if spline2 overlaps this by no more than fraction less - * than the bounds of this. - **********************************************************************/ - -bool QSPLINE::overlap( //test overlap - QSPLINE* spline2, //2 cannot be smaller - double fraction //by more than this -) { - int leftlimit = xcoords[1]; /*common left limit */ - int rightlimit = xcoords[segments - 1]; /*common right limit */ - /*or too non-overlap */ - return !(spline2->segments < 3 || spline2->xcoords[1] > leftlimit + fraction * (rightlimit - leftlimit) || - spline2->xcoords[spline2->segments - 1] < rightlimit - fraction * (rightlimit - leftlimit)); -} - - -/********************************************************************** - * extrapolate_spline - * - * Extrapolates the spline linearly using the same gradient as the - * quadratic has at either end. - **********************************************************************/ - -void QSPLINE::extrapolate( //linear extrapolation - double gradient, //gradient to use - int xmin, //new left edge - int xmax //new right edge - ) { - int segment; /*current segment of spline */ - int dest_segment; //dest index - int32_t* xstarts; //new boundaries - QUAD_COEFFS *quads; //new ones - int increment; //in size - - increment = xmin < xcoords[0] ? 1 : 0; - if (xmax > xcoords[segments]) - increment++; - if (increment == 0) - return; - xstarts = new int32_t[segments + 1 + increment]; - quads = new QUAD_COEFFS[segments + increment]; - if (xmin < xcoords[0]) { - xstarts[0] = xmin; - quads[0].a = 0; - quads[0].b = gradient; - quads[0].c = y (xcoords[0]) - quads[0].b * xcoords[0]; - dest_segment = 1; - } - else - dest_segment = 0; - for (segment = 0; segment < segments; segment++) { - xstarts[dest_segment] = xcoords[segment]; - quads[dest_segment] = quadratics[segment]; - dest_segment++; - } - xstarts[dest_segment] = xcoords[segment]; - if (xmax > xcoords[segments]) { - quads[dest_segment].a = 0; - quads[dest_segment].b = gradient; - quads[dest_segment].c = y (xcoords[segments]) - - quads[dest_segment].b * xcoords[segments]; - dest_segment++; - xstarts[dest_segment] = xmax + 1; - } - segments = dest_segment; - delete[] xcoords; - delete[] quadratics; - xcoords = xstarts; - quadratics = quads; -} - - -/********************************************************************** - * QSPLINE::plot - * - * Draw the QSPLINE in the given colour. - **********************************************************************/ - -#ifndef GRAPHICS_DISABLED -void QSPLINE::plot( //draw it - ScrollView* window, //window to draw in - ScrollView::Color colour //colour to draw in - ) const { - int32_t segment; //index of segment - int16_t step; //index of poly piece - double increment; //x increment - double x; //x coord - - window->Pen(colour); - for (segment = 0; segment < segments; segment++) { - increment = - (double) (xcoords[segment + 1] - - xcoords[segment]) / QSPLINE_PRECISION; - x = xcoords[segment]; - for (step = 0; step <= QSPLINE_PRECISION; step++) { - if (segment == 0 && step == 0) - window->SetCursor(x, quadratics[segment].y (x)); - else - window->DrawTo(x, quadratics[segment].y (x)); - x += increment; - } - } -} -#endif - -void QSPLINE::plot(Pix *pix) const { - if (pix == nullptr) { - return; - } - - int32_t segment; // Index of segment - int16_t step; // Index of poly piece - double increment; // x increment - double x; // x coord - double height = static_cast(pixGetHeight(pix)); - Pta* points = ptaCreate(QSPLINE_PRECISION * segments); - const int kLineWidth = 5; - - for (segment = 0; segment < segments; segment++) { - increment = static_cast((xcoords[segment + 1] - - xcoords[segment])) / QSPLINE_PRECISION; - x = xcoords[segment]; - for (step = 0; step <= QSPLINE_PRECISION; step++) { - double y = height - quadratics[segment].y(x); - ptaAddPt(points, x, y); - x += increment; - } - } - - switch (pixGetDepth(pix)) { - case 1: - pixRenderPolyline(pix, points, kLineWidth, L_SET_PIXELS, 1); - break; - case 32: - pixRenderPolylineArb(pix, points, kLineWidth, 255, 0, 0, 1); - break; - default: - pixRenderPolyline(pix, points, kLineWidth, L_CLEAR_PIXELS, 1); - break; - } - ptaDestroy(&points); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/quspline.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/quspline.h deleted file mode 100644 index 58e60d3b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/quspline.h +++ /dev/null @@ -1,100 +0,0 @@ -/********************************************************************** - * File: quspline.h (Formerly qspline.h) - * Description: Code for the QSPLINE class. - * Author: Ray Smith - * Created: Tue Oct 08 17:16:12 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef QUSPLINE_H -#define QUSPLINE_H - -#include // for int32_t -#include "scrollview.h" // for ScrollView, ScrollView::Color - -class ICOORD; -class QUAD_COEFFS; -class ROW; -class TBOX; -struct Pix; - -class QSPLINE -{ - friend void make_first_baseline(TBOX *, - int, - int *, - int *, - QSPLINE *, - QSPLINE *, - float); - friend void make_holed_baseline(TBOX *, int, QSPLINE *, QSPLINE *, float); - friend void tweak_row_baseline(ROW *, double, double); - public: - QSPLINE() { //empty constructor - segments = 0; - xcoords = nullptr; //everything empty - quadratics = nullptr; - } - QSPLINE( //copy constructor - const QSPLINE &src); - QSPLINE( //constructor - int32_t count, //number of segments - int32_t *xstarts, //segment starts - double *coeffs); //coefficients - ~QSPLINE (); //destructor - QSPLINE ( //least squares fit - int xstarts[], //spline boundaries - int segcount, //no of segments - int xcoords[], //points to fit - int ycoords[], int blobcount,//no of coords - int degree); //function - - double step( //step change - double x1, //between coords - double x2); - double y( //evaluate - double x) const; //at x - - void move( // reposition spline - ICOORD vec); // by vector - bool overlap( //test overlap - QSPLINE* spline2, //2 cannot be smaller - double fraction); //by more than this - void extrapolate( //linear extrapolation - double gradient, //gradient to use - int left, //new left edge - int right); //new right edge - -#ifndef GRAPHICS_DISABLED - void plot( //draw it - ScrollView* window, //in window - ScrollView::Color colour) const; //in colour -#endif - - // Paint the baseline over pix. If pix has depth of 32, then the line will - // be painted in red. Otherwise it will be painted in black. - void plot(Pix* pix) const; - - QSPLINE & operator= ( - const QSPLINE & source); //from this - - private: - - int32_t spline_index( //binary search - double x) const; //for x - int32_t segments; //no of segments - int32_t *xcoords; //no of coords - QUAD_COEFFS *quadratics; //spline pieces -}; -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ratngs.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ratngs.cpp deleted file mode 100644 index aa3788a3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ratngs.cpp +++ /dev/null @@ -1,854 +0,0 @@ -/********************************************************************** - * File: ratngs.cpp (Formerly ratings.c) - * Description: Code to manipulate the BLOB_CHOICE and WERD_CHOICE classes. - * Author: Ray Smith - * Created: Thu Apr 23 13:23:29 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "ratngs.h" - -#include -#include -#include "blobs.h" -#include "callcpp.h" -#include "genericvector.h" -#include "matrix.h" -#include "normalis.h" // kBlnBaselineOffset. -#include "unicharset.h" - -using tesseract::ScriptPos; - -ELISTIZE(BLOB_CHOICE) -ELISTIZE(WERD_CHOICE) - -const float WERD_CHOICE::kBadRating = 100000.0; -// Min offset in baseline-normalized coords to make a character a subscript. -const int kMinSubscriptOffset = 20; -// Min offset in baseline-normalized coords to make a character a superscript. -const int kMinSuperscriptOffset = 20; -// Max y of bottom of a drop-cap blob. -const int kMaxDropCapBottom = -128; -// Max fraction of x-height to use as denominator in measuring x-height overlap. -const double kMaxOverlapDenominator = 0.125; -// Min fraction of x-height range that should be in agreement for matching -// x-heights. -const double kMinXHeightMatch = 0.5; -// Max tolerance on baseline position as a fraction of x-height for matching -// baselines. -const double kMaxBaselineDrift = 0.0625; - -static const char kPermuterTypeNoPerm[] = "None"; -static const char kPermuterTypePuncPerm[] = "Punctuation"; -static const char kPermuterTypeTopPerm[] = "Top Choice"; -static const char kPermuterTypeLowerPerm[] = "Top Lower Case"; -static const char kPermuterTypeUpperPerm[] = "Top Upper Case"; -static const char kPermuterTypeNgramPerm[] = "Ngram"; -static const char kPermuterTypeNumberPerm[] = "Number"; -static const char kPermuterTypeUserPatPerm[] = "User Pattern"; -static const char kPermuterTypeSysDawgPerm[] = "System Dictionary"; -static const char kPermuterTypeDocDawgPerm[] = "Document Dictionary"; -static const char kPermuterTypeUserDawgPerm[] = "User Dictionary"; -static const char kPermuterTypeFreqDawgPerm[] = "Frequent Words Dictionary"; -static const char kPermuterTypeCompoundPerm[] = "Compound"; - -static const char * const kPermuterTypeNames[] = { - kPermuterTypeNoPerm, // 0 - kPermuterTypePuncPerm, // 1 - kPermuterTypeTopPerm, // 2 - kPermuterTypeLowerPerm, // 3 - kPermuterTypeUpperPerm, // 4 - kPermuterTypeNgramPerm, // 5 - kPermuterTypeNumberPerm, // 6 - kPermuterTypeUserPatPerm, // 7 - kPermuterTypeSysDawgPerm, // 8 - kPermuterTypeDocDawgPerm, // 9 - kPermuterTypeUserDawgPerm, // 10 - kPermuterTypeFreqDawgPerm, // 11 - kPermuterTypeCompoundPerm // 12 -}; - -/** - * BLOB_CHOICE::BLOB_CHOICE - * - * Constructor to build a BLOB_CHOICE from a char, rating and certainty. - */ -BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id - float src_rating, // rating - float src_cert, // certainty - int src_script_id, // script - float min_xheight, // min xheight allowed - float max_xheight, // max xheight by this char - float yshift, // yshift out of position - BlobChoiceClassifier c) { // adapted match or other - unichar_id_ = src_unichar_id; - rating_ = src_rating; - certainty_ = src_cert; - fontinfo_id_ = -1; - fontinfo_id2_ = -1; - script_id_ = src_script_id; - min_xheight_ = min_xheight; - max_xheight_ = max_xheight; - yshift_ = yshift; - classifier_ = c; -} - -/** - * BLOB_CHOICE::BLOB_CHOICE - * - * Constructor to build a BLOB_CHOICE from another BLOB_CHOICE. - */ -BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) : ELIST_LINK(other) { - unichar_id_ = other.unichar_id(); - rating_ = other.rating(); - certainty_ = other.certainty(); - fontinfo_id_ = other.fontinfo_id(); - fontinfo_id2_ = other.fontinfo_id2(); - script_id_ = other.script_id(); - matrix_cell_ = other.matrix_cell_; - min_xheight_ = other.min_xheight_; - max_xheight_ = other.max_xheight_; - yshift_ = other.yshift(); - classifier_ = other.classifier_; - fonts_ = other.fonts_; -} - -// Copy assignment operator. -BLOB_CHOICE& BLOB_CHOICE::operator=(const BLOB_CHOICE& other) { - ELIST_LINK::operator=(other); - unichar_id_ = other.unichar_id(); - rating_ = other.rating(); - certainty_ = other.certainty(); - fontinfo_id_ = other.fontinfo_id(); - fontinfo_id2_ = other.fontinfo_id2(); - script_id_ = other.script_id(); - matrix_cell_ = other.matrix_cell_; - min_xheight_ = other.min_xheight_; - max_xheight_ = other.max_xheight_; - yshift_ = other.yshift(); - classifier_ = other.classifier_; - fonts_ = other.fonts_; - return *this; -} - -// Returns true if *this and other agree on the baseline and x-height -// to within some tolerance based on a given estimate of the x-height. -bool BLOB_CHOICE::PosAndSizeAgree(const BLOB_CHOICE& other, float x_height, - bool debug) const { - double baseline_diff = fabs(yshift() - other.yshift()); - if (baseline_diff > kMaxBaselineDrift * x_height) { - if (debug) { - tprintf("Baseline diff %g for %d v %d\n", - baseline_diff, unichar_id_, other.unichar_id_); - } - return false; - } - double this_range = max_xheight() - min_xheight(); - double other_range = other.max_xheight() - other.min_xheight(); - double denominator = ClipToRange(std::min(this_range, other_range), - 1.0, kMaxOverlapDenominator * x_height); - double overlap = std::min(max_xheight(), other.max_xheight()) - - std::max(min_xheight(), other.min_xheight()); - overlap /= denominator; - if (debug) { - tprintf("PosAndSize for %d v %d: bl diff = %g, ranges %g, %g / %g ->%g\n", - unichar_id_, other.unichar_id_, baseline_diff, - this_range, other_range, denominator, overlap); - } - - return overlap >= kMinXHeightMatch; -} - -// Helper to find the BLOB_CHOICE in the bc_list that matches the given -// unichar_id, or nullptr if there is no match. -BLOB_CHOICE* FindMatchingChoice(UNICHAR_ID char_id, - BLOB_CHOICE_LIST* bc_list) { - // Find the corresponding best BLOB_CHOICE. - BLOB_CHOICE_IT choice_it(bc_list); - for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); - choice_it.forward()) { - BLOB_CHOICE* choice = choice_it.data(); - if (choice->unichar_id() == char_id) { - return choice; - } - } - return nullptr; -} - -const char *WERD_CHOICE::permuter_name(uint8_t permuter) { - return kPermuterTypeNames[permuter]; -} - -namespace tesseract { - -const char *ScriptPosToString(enum ScriptPos script_pos) { - switch (script_pos) { - case SP_NORMAL: return "NORM"; - case SP_SUBSCRIPT: return "SUB"; - case SP_SUPERSCRIPT: return "SUPER"; - case SP_DROPCAP: return "DROPC"; - } - return "SP_UNKNOWN"; -} - -} // namespace tesseract. - -/** - * WERD_CHOICE::WERD_CHOICE - * - * Constructor to build a WERD_CHOICE from the given string. - * The function assumes that src_string is not nullptr. - */ -WERD_CHOICE::WERD_CHOICE(const char *src_string, - const UNICHARSET &unicharset) - : unicharset_(&unicharset){ - GenericVector encoding; - GenericVector lengths; - std::string cleaned = unicharset.CleanupString(src_string); - if (unicharset.encode_string(cleaned.c_str(), true, &encoding, &lengths, - nullptr)) { - lengths.push_back('\0'); - STRING src_lengths = &lengths[0]; - this->init(cleaned.c_str(), src_lengths.string(), 0.0, 0.0, NO_PERM); - } else { // There must have been an invalid unichar in the string. - this->init(8); - this->make_bad(); - } -} - -/** - * WERD_CHOICE::init - * - * Helper function to build a WERD_CHOICE from the given string, - * fragment lengths, rating, certainty and permuter. - * - * The function assumes that src_string is not nullptr. - * src_lengths argument could be nullptr, in which case the unichars - * in src_string are assumed to all be of length 1. - */ -void WERD_CHOICE::init(const char *src_string, - const char *src_lengths, - float src_rating, - float src_certainty, - uint8_t src_permuter) { - int src_string_len = strlen(src_string); - if (src_string_len == 0) { - this->init(8); - } else { - this->init(src_lengths ? strlen(src_lengths): src_string_len); - length_ = reserved_; - int offset = 0; - for (int i = 0; i < length_; ++i) { - int unichar_length = src_lengths ? src_lengths[i] : 1; - unichar_ids_[i] = - unicharset_->unichar_to_id(src_string+offset, unichar_length); - state_[i] = 1; - certainties_[i] = src_certainty; - offset += unichar_length; - } - } - adjust_factor_ = 1.0f; - rating_ = src_rating; - certainty_ = src_certainty; - permuter_ = src_permuter; - dangerous_ambig_found_ = false; -} - -/** - * WERD_CHOICE::~WERD_CHOICE - */ -WERD_CHOICE::~WERD_CHOICE() { - delete[] unichar_ids_; - delete[] script_pos_; - delete[] state_; - delete[] certainties_; -} - -const char *WERD_CHOICE::permuter_name() const { - return kPermuterTypeNames[permuter_]; -} - -// Returns the BLOB_CHOICE_LIST corresponding to the given index in the word, -// taken from the appropriate cell in the ratings MATRIX. -// Borrowed pointer, so do not delete. -BLOB_CHOICE_LIST* WERD_CHOICE::blob_choices(int index, MATRIX* ratings) const { - MATRIX_COORD coord = MatrixCoord(index); - BLOB_CHOICE_LIST* result = ratings->get(coord.col, coord.row); - if (result == nullptr) { - result = new BLOB_CHOICE_LIST; - ratings->put(coord.col, coord.row, result); - } - return result; -} - -// Returns the MATRIX_COORD corresponding to the location in the ratings -// MATRIX for the given index into the word. -MATRIX_COORD WERD_CHOICE::MatrixCoord(int index) const { - int col = 0; - for (int i = 0; i < index; ++i) - col += state_[i]; - int row = col + state_[index] - 1; - return MATRIX_COORD(col, row); -} - -// Sets the entries for the given index from the BLOB_CHOICE, assuming -// unit fragment lengths, but setting the state for this index to blob_count. -void WERD_CHOICE::set_blob_choice(int index, int blob_count, - const BLOB_CHOICE* blob_choice) { - unichar_ids_[index] = blob_choice->unichar_id(); - script_pos_[index] = tesseract::SP_NORMAL; - state_[index] = blob_count; - certainties_[index] = blob_choice->certainty(); -} - - -/** - * contains_unichar_id - * - * Returns true if unichar_ids_ contain the given unichar_id, false otherwise. - */ -bool WERD_CHOICE::contains_unichar_id(UNICHAR_ID unichar_id) const { - for (int i = 0; i < length_; ++i) { - if (unichar_ids_[i] == unichar_id) { - return true; - } - } - return false; -} - -/** - * remove_unichar_ids - * - * Removes num unichar ids starting from index start from unichar_ids_ - * and updates length_ and fragment_lengths_ to reflect this change. - * Note: this function does not modify rating_ and certainty_. - */ -void WERD_CHOICE::remove_unichar_ids(int start, int num) { - ASSERT_HOST(start >= 0 && start + num <= length_); - // Accumulate the states to account for the merged blobs. - for (int i = 0; i < num; ++i) { - if (start > 0) - state_[start - 1] += state_[start + i]; - else if (start + num < length_) - state_[start + num] += state_[start + i]; - } - for (int i = start; i + num < length_; ++i) { - unichar_ids_[i] = unichar_ids_[i + num]; - script_pos_[i] = script_pos_[i + num]; - state_[i] = state_[i + num]; - certainties_[i] = certainties_[i + num]; - } - length_ -= num; -} - -/** - * reverse_and_mirror_unichar_ids - * - * Reverses and mirrors unichars in unichar_ids. - */ -void WERD_CHOICE::reverse_and_mirror_unichar_ids() { - for (int i = 0; i < length_ / 2; ++i) { - UNICHAR_ID tmp_id = unichar_ids_[i]; - unichar_ids_[i] = unicharset_->get_mirror(unichar_ids_[length_-1-i]); - unichar_ids_[length_-1-i] = unicharset_->get_mirror(tmp_id); - } - if (length_ % 2 != 0) { - unichar_ids_[length_/2] = unicharset_->get_mirror(unichar_ids_[length_/2]); - } -} - -/** - * punct_stripped - * - * Returns the half-open interval of unichar_id indices [start, end) which - * enclose the core portion of this word -- the part after stripping - * punctuation from the left and right. - */ -void WERD_CHOICE::punct_stripped(int *start, int *end) const { - *start = 0; - *end = length() - 1; - while (*start < length() && - unicharset()->get_ispunctuation(unichar_id(*start))) { - (*start)++; - } - while (*end > -1 && - unicharset()->get_ispunctuation(unichar_id(*end))) { - (*end)--; - } - (*end)++; -} - -void WERD_CHOICE::GetNonSuperscriptSpan(int *pstart, int *pend) const { - int end = length(); - while (end > 0 && - unicharset_->get_isdigit(unichar_ids_[end - 1]) && - BlobPosition(end - 1) == tesseract::SP_SUPERSCRIPT) { - end--; - } - int start = 0; - while (start < end && - unicharset_->get_isdigit(unichar_ids_[start]) && - BlobPosition(start) == tesseract::SP_SUPERSCRIPT) { - start++; - } - *pstart = start; - *pend = end; -} - -WERD_CHOICE WERD_CHOICE::shallow_copy(int start, int end) const { - ASSERT_HOST(start >= 0 && start <= length_); - ASSERT_HOST(end >= 0 && end <= length_); - if (end < start) { end = start; } - WERD_CHOICE retval(unicharset_, end - start); - for (int i = start; i < end; i++) { - retval.append_unichar_id_space_allocated( - unichar_ids_[i], state_[i], 0.0f, certainties_[i]); - } - return retval; -} - -/** - * has_rtl_unichar_id - * - * Returns true if unichar_ids contain at least one "strongly" RTL unichar. - */ -bool WERD_CHOICE::has_rtl_unichar_id() const { - int i; - for (i = 0; i < length_; ++i) { - UNICHARSET::Direction dir = unicharset_->get_direction(unichar_ids_[i]); - if (dir == UNICHARSET::U_RIGHT_TO_LEFT || - dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) { - return true; - } - } - return false; -} - -/** - * string_and_lengths - * - * Populates the given word_str with unichars from unichar_ids and - * and word_lengths_str with the corresponding unichar lengths. - */ -void WERD_CHOICE::string_and_lengths(STRING *word_str, - STRING *word_lengths_str) const { - *word_str = ""; - if (word_lengths_str != nullptr) *word_lengths_str = ""; - for (int i = 0; i < length_; ++i) { - const char *ch = unicharset_->id_to_unichar_ext(unichar_ids_[i]); - *word_str += ch; - if (word_lengths_str != nullptr) { - *word_lengths_str += strlen(ch); - } - } -} - -/** - * append_unichar_id - * - * Make sure there is enough space in the word for the new unichar id - * and call append_unichar_id_space_allocated(). - */ -void WERD_CHOICE::append_unichar_id( - UNICHAR_ID unichar_id, int blob_count, - float rating, float certainty) { - if (length_ == reserved_) { - this->double_the_size(); - } - this->append_unichar_id_space_allocated(unichar_id, blob_count, - rating, certainty); -} - -/** - * WERD_CHOICE::operator+= - * - * Cat a second word rating on the end of this current one. - * The ratings are added and the confidence is the min. - * If the permuters are NOT the same the permuter is set to COMPOUND_PERM - */ -WERD_CHOICE & WERD_CHOICE::operator+= (const WERD_CHOICE & second) { - ASSERT_HOST(unicharset_ == second.unicharset_); - while (reserved_ < length_ + second.length()) { - this->double_the_size(); - } - const UNICHAR_ID *other_unichar_ids = second.unichar_ids(); - for (int i = 0; i < second.length(); ++i) { - unichar_ids_[length_ + i] = other_unichar_ids[i]; - state_[length_ + i] = second.state_[i]; - certainties_[length_ + i] = second.certainties_[i]; - script_pos_[length_ + i] = second.BlobPosition(i); - } - length_ += second.length(); - if (second.adjust_factor_ > adjust_factor_) - adjust_factor_ = second.adjust_factor_; - rating_ += second.rating(); // add ratings - if (second.certainty() < certainty_) // take min - certainty_ = second.certainty(); - if (second.dangerous_ambig_found_) - dangerous_ambig_found_ = true; - if (permuter_ == NO_PERM) { - permuter_ = second.permuter(); - } else if (second.permuter() != NO_PERM && - second.permuter() != permuter_) { - permuter_ = COMPOUND_PERM; - } - return *this; -} - - -/** - * WERD_CHOICE::operator= - * - * Allocate enough memory to hold a copy of source and copy over - * all the information from source to this WERD_CHOICE. - */ -WERD_CHOICE& WERD_CHOICE::operator=(const WERD_CHOICE& source) { - while (reserved_ < source.length()) { - this->double_the_size(); - } - - unicharset_ = source.unicharset_; - const UNICHAR_ID *other_unichar_ids = source.unichar_ids(); - for (int i = 0; i < source.length(); ++i) { - unichar_ids_[i] = other_unichar_ids[i]; - state_[i] = source.state_[i]; - certainties_[i] = source.certainties_[i]; - script_pos_[i] = source.BlobPosition(i); - } - length_ = source.length(); - adjust_factor_ = source.adjust_factor_; - rating_ = source.rating(); - certainty_ = source.certainty(); - min_x_height_ = source.min_x_height(); - max_x_height_ = source.max_x_height(); - permuter_ = source.permuter(); - dangerous_ambig_found_ = source.dangerous_ambig_found_; - return *this; -} - -// Sets up the script_pos_ member using the blobs_list to get the bln -// bounding boxes, *this to get the unichars, and this->unicharset -// to get the target positions. If small_caps is true, sub/super are not -// considered, but dropcaps are. -// NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.) -void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD* word, int debug) { - // Initialize to normal. - for (int i = 0; i < length_; ++i) - script_pos_[i] = tesseract::SP_NORMAL; - if (word->blobs.empty() || word->NumBlobs() != TotalOfStates()) { - return; - } - - int position_counts[4]; - for (int i = 0; i < 4; i++) { - position_counts[i] = 0; - } - - int chunk_index = 0; - for (int blob_index = 0; blob_index < length_; ++blob_index, ++chunk_index) { - TBLOB* tblob = word->blobs[chunk_index]; - int uni_id = unichar_id(blob_index); - TBOX blob_box = tblob->bounding_box(); - if (state_ != nullptr) { - for (int i = 1; i < state_[blob_index]; ++i) { - ++chunk_index; - tblob = word->blobs[chunk_index]; - blob_box += tblob->bounding_box(); - } - } - script_pos_[blob_index] = ScriptPositionOf(false, *unicharset_, blob_box, - uni_id); - if (small_caps && script_pos_[blob_index] != tesseract::SP_DROPCAP) { - script_pos_[blob_index] = tesseract::SP_NORMAL; - } - position_counts[script_pos_[blob_index]]++; - } - // If almost everything looks like a superscript or subscript, - // we most likely just got the baseline wrong. - if (position_counts[tesseract::SP_SUBSCRIPT] > 0.75 * length_ || - position_counts[tesseract::SP_SUPERSCRIPT] > 0.75 * length_) { - if (debug >= 2) { - tprintf("Most characters of %s are subscript or superscript.\n" - "That seems wrong, so I'll assume we got the baseline wrong\n", - unichar_string().string()); - } - for (int i = 0; i < length_; i++) { - ScriptPos sp = script_pos_[i]; - if (sp == tesseract::SP_SUBSCRIPT || sp == tesseract::SP_SUPERSCRIPT) { - position_counts[sp]--; - position_counts[tesseract::SP_NORMAL]++; - script_pos_[i] = tesseract::SP_NORMAL; - } - } - } - - if ((debug >= 1 && position_counts[tesseract::SP_NORMAL] < length_) || - debug >= 2) { - tprintf("SetScriptPosition on %s\n", unichar_string().string()); - int chunk_index = 0; - for (int blob_index = 0; blob_index < length_; ++blob_index) { - if (debug >= 2 || script_pos_[blob_index] != tesseract::SP_NORMAL) { - TBLOB* tblob = word->blobs[chunk_index]; - ScriptPositionOf(true, *unicharset_, tblob->bounding_box(), - unichar_id(blob_index)); - } - chunk_index += state_ != nullptr ? state_[blob_index] : 1; - } - } -} -// Sets the script_pos_ member from some source positions with a given length. -void WERD_CHOICE::SetScriptPositions(const tesseract::ScriptPos* positions, - int length) { - ASSERT_HOST(length == length_); - if (positions != script_pos_) { - delete [] script_pos_; - script_pos_ = new ScriptPos[length]; - memcpy(script_pos_, positions, sizeof(positions[0]) * length); - } -} -// Sets all the script_pos_ positions to the given position. -void WERD_CHOICE::SetAllScriptPositions(tesseract::ScriptPos position) { - for (int i = 0; i < length_; ++i) - script_pos_[i] = position; -} - -/* static */ -ScriptPos WERD_CHOICE::ScriptPositionOf(bool print_debug, - const UNICHARSET& unicharset, - const TBOX& blob_box, - UNICHAR_ID unichar_id) { - ScriptPos retval = tesseract::SP_NORMAL; - int top = blob_box.top(); - int bottom = blob_box.bottom(); - int min_bottom, max_bottom, min_top, max_top; - unicharset.get_top_bottom(unichar_id, - &min_bottom, &max_bottom, - &min_top, &max_top); - - int sub_thresh_top = min_top - kMinSubscriptOffset; - int sub_thresh_bot = kBlnBaselineOffset - kMinSubscriptOffset; - int sup_thresh_bot = max_bottom + kMinSuperscriptOffset; - if (bottom <= kMaxDropCapBottom) { - retval = tesseract::SP_DROPCAP; - } else if (top < sub_thresh_top && bottom < sub_thresh_bot) { - retval = tesseract::SP_SUBSCRIPT; - } else if (bottom > sup_thresh_bot) { - retval = tesseract::SP_SUPERSCRIPT; - } - - if (print_debug) { - const char *pos = ScriptPosToString(retval); - tprintf("%s Character %s[bot:%d top: %d] " - "bot_range[%d,%d] top_range[%d, %d] " - "sub_thresh[bot:%d top:%d] sup_thresh_bot %d\n", - pos, unicharset.id_to_unichar(unichar_id), - bottom, top, - min_bottom, max_bottom, min_top, max_top, - sub_thresh_bot, sub_thresh_top, - sup_thresh_bot); - } - return retval; -} - -// Returns the script-id (eg Han) of the dominant script in the word. -int WERD_CHOICE::GetTopScriptID() const { - int max_script = unicharset_->get_script_table_size(); - int *sid = new int[max_script]; - int x; - for (x = 0; x < max_script; x++) sid[x] = 0; - for (x = 0; x < length_; ++x) { - int script_id = unicharset_->get_script(unichar_id(x)); - sid[script_id]++; - } - if (unicharset_->han_sid() != unicharset_->null_sid()) { - // Add the Hiragana & Katakana counts to Han and zero them out. - if (unicharset_->hiragana_sid() != unicharset_->null_sid()) { - sid[unicharset_->han_sid()] += sid[unicharset_->hiragana_sid()]; - sid[unicharset_->hiragana_sid()] = 0; - } - if (unicharset_->katakana_sid() != unicharset_->null_sid()) { - sid[unicharset_->han_sid()] += sid[unicharset_->katakana_sid()]; - sid[unicharset_->katakana_sid()] = 0; - } - } - // Note that high script ID overrides lower one on a tie, thus biasing - // towards non-Common script (if sorted that way in unicharset file). - int max_sid = 0; - for (x = 1; x < max_script; x++) - if (sid[x] >= sid[max_sid]) max_sid = x; - if (sid[max_sid] < length_ / 2) - max_sid = unicharset_->null_sid(); - delete[] sid; - return max_sid; -} - -// Fixes the state_ for a chop at the given blob_posiiton. -void WERD_CHOICE::UpdateStateForSplit(int blob_position) { - int total_chunks = 0; - for (int i = 0; i < length_; ++i) { - total_chunks += state_[i]; - if (total_chunks > blob_position) { - ++state_[i]; - return; - } - } -} - -// Returns the sum of all the state elements, being the total number of blobs. -int WERD_CHOICE::TotalOfStates() const { - int total_chunks = 0; - for (int i = 0; i < length_; ++i) { - total_chunks += state_[i]; - } - return total_chunks; -} - -/** - * WERD_CHOICE::print - * - * Print WERD_CHOICE to stdout. - */ -void WERD_CHOICE::print(const char *msg) const { - tprintf("%s : ", msg); - for (int i = 0; i < length_; ++i) { - tprintf("%s", unicharset_->id_to_unichar(unichar_ids_[i])); - } - tprintf(" : R=%g, C=%g, F=%g, Perm=%d, xht=[%g,%g], ambig=%d\n", - rating_, certainty_, adjust_factor_, permuter_, - min_x_height_, max_x_height_, dangerous_ambig_found_); - tprintf("pos"); - for (int i = 0; i < length_; ++i) { - tprintf("\t%s", ScriptPosToString(script_pos_[i])); - } - tprintf("\nstr"); - for (int i = 0; i < length_; ++i) { - tprintf("\t%s", unicharset_->id_to_unichar(unichar_ids_[i])); - } - tprintf("\nstate:"); - for (int i = 0; i < length_; ++i) { - tprintf("\t%d ", state_[i]); - } - tprintf("\nC"); - for (int i = 0; i < length_; ++i) { - tprintf("\t%.3f", certainties_[i]); - } - tprintf("\n"); -} - -// Prints the segmentation state with an introductory message. -void WERD_CHOICE::print_state(const char *msg) const { - tprintf("%s", msg); - for (int i = 0; i < length_; ++i) - tprintf(" %d", state_[i]); - tprintf("\n"); -} - -// Displays the segmentation state of *this (if not the same as the last -// one displayed) and waits for a click in the window. -void WERD_CHOICE::DisplaySegmentation(TWERD* word) { -#ifndef GRAPHICS_DISABLED - // Number of different colors to draw with. - const int kNumColors = 6; - static ScrollView *segm_window = nullptr; - // Check the state against the static prev_drawn_state. - static GenericVector prev_drawn_state; - bool already_done = prev_drawn_state.size() == length_; - if (!already_done) prev_drawn_state.init_to_size(length_, 0); - for (int i = 0; i < length_; ++i) { - if (prev_drawn_state[i] != state_[i]) { - already_done = false; - } - prev_drawn_state[i] = state_[i]; - } - if (already_done || word->blobs.empty()) return; - - // Create the window if needed. - if (segm_window == nullptr) { - segm_window = new ScrollView("Segmentation", 5, 10, 500, 256, - 2000.0, 256.0, true); - } else { - segm_window->Clear(); - } - - TBOX bbox; - int blob_index = 0; - for (int c = 0; c < length_; ++c) { - ScrollView::Color color = - static_cast(c % kNumColors + 3); - for (int i = 0; i < state_[c]; ++i, ++blob_index) { - TBLOB* blob = word->blobs[blob_index]; - bbox += blob->bounding_box(); - blob->plot(segm_window, color, color); - } - } - segm_window->ZoomToRectangle(bbox.left(), bbox.top(), - bbox.right(), bbox.bottom()); - segm_window->Update(); - window_wait(segm_window); -#endif -} - - -bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, - const WERD_CHOICE &word2) { - const UNICHARSET *uchset = word1.unicharset(); - if (word2.unicharset() != uchset) return false; - int w1start, w1end; - word1.punct_stripped(&w1start, &w1end); - int w2start, w2end; - word2.punct_stripped(&w2start, &w2end); - if (w1end - w1start != w2end - w2start) return false; - for (int i = 0; i < w1end - w1start; i++) { - if (uchset->to_lower(word1.unichar_id(w1start + i)) != - uchset->to_lower(word2.unichar_id(w2start + i))) { - return false; - } - } - return true; -} - -/** - * print_ratings_list - * - * Send all the ratings out to the logfile. - * - * @param msg intro message - * @param ratings list of ratings - * @param current_unicharset unicharset that can be used - * for id-to-unichar conversion - */ -void print_ratings_list(const char *msg, - BLOB_CHOICE_LIST *ratings, - const UNICHARSET ¤t_unicharset) { - if (ratings->length() == 0) { - tprintf("%s:\n", msg); - return; - } - if (*msg != '\0') { - tprintf("%s\n", msg); - } - BLOB_CHOICE_IT c_it; - c_it.set_to_list(ratings); - for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { - c_it.data()->print(¤t_unicharset); - if (!c_it.at_last()) tprintf("\n"); - } - tprintf("\n"); - fflush(stdout); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ratngs.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ratngs.h deleted file mode 100644 index 4e3c6772..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/ratngs.h +++ /dev/null @@ -1,659 +0,0 @@ -/********************************************************************** - * File: ratngs.h (Formerly ratings.h) - * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes. - * Author: Ray Smith - * Created: Thu Apr 23 11:40:38 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef RATNGS_H -#define RATNGS_H - -#include -#include // for FLT_MAX - -#include "clst.h" -#include "elst.h" -#include "fontinfo.h" -#include "genericvector.h" -#include "matrix.h" -#include "unichar.h" -#include "unicharset.h" -#include "werd.h" - -class MATRIX; -struct TBLOB; -struct TWERD; - -// Enum to describe the source of a BLOB_CHOICE to make it possible to determine -// whether a blob has been classified by inspecting the BLOB_CHOICEs. -enum BlobChoiceClassifier { - BCC_STATIC_CLASSIFIER, // From the char_norm classifier. - BCC_ADAPTED_CLASSIFIER, // From the adaptive classifier. - BCC_SPECKLE_CLASSIFIER, // Backup for failed classification. - BCC_AMBIG, // Generated by ambiguity detection. - BCC_FAKE, // From some other process. -}; - -class BLOB_CHOICE: public ELIST_LINK -{ - public: - BLOB_CHOICE() { - unichar_id_ = UNICHAR_SPACE; - fontinfo_id_ = -1; - fontinfo_id2_ = -1; - rating_ = 10.0; - certainty_ = -1.0; - script_id_ = -1; - xgap_before_ = 0; - xgap_after_ = 0; - min_xheight_ = 0.0f; - max_xheight_ = 0.0f; - yshift_ = 0.0f; - classifier_ = BCC_FAKE; - } - BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id - float src_rating, // rating - float src_cert, // certainty - int script_id, // script - float min_xheight, // min xheight in image pixel units - float max_xheight, // max xheight allowed by this char - float yshift, // the larger of y shift (top or bottom) - BlobChoiceClassifier c); // adapted match or other - BLOB_CHOICE(const BLOB_CHOICE &other); - ~BLOB_CHOICE() = default; - - UNICHAR_ID unichar_id() const { - return unichar_id_; - } - float rating() const { - return rating_; - } - float certainty() const { - return certainty_; - } - int16_t fontinfo_id() const { - return fontinfo_id_; - } - int16_t fontinfo_id2() const { - return fontinfo_id2_; - } - const GenericVector& fonts() const { - return fonts_; - } - void set_fonts(const GenericVector& fonts) { - fonts_ = fonts; - int score1 = 0, score2 = 0; - fontinfo_id_ = -1; - fontinfo_id2_ = -1; - for (int f = 0; f < fonts_.size(); ++f) { - if (fonts_[f].score > score1) { - score2 = score1; - fontinfo_id2_ = fontinfo_id_; - score1 = fonts_[f].score; - fontinfo_id_ = fonts_[f].fontinfo_id; - } else if (fonts_[f].score > score2) { - score2 = fonts_[f].score; - fontinfo_id2_ = fonts_[f].fontinfo_id; - } - } - } - int script_id() const { - return script_id_; - } - const MATRIX_COORD& matrix_cell() { - return matrix_cell_; - } - int16_t xgap_before() const { - return xgap_before_; - } - int16_t xgap_after() const { - return xgap_after_; - } - float min_xheight() const { - return min_xheight_; - } - float max_xheight() const { - return max_xheight_; - } - float yshift() const { - return yshift_; - } - BlobChoiceClassifier classifier() const { - return classifier_; - } - bool IsAdapted() const { - return classifier_ == BCC_ADAPTED_CLASSIFIER; - } - bool IsClassified() const { - return classifier_ == BCC_STATIC_CLASSIFIER || - classifier_ == BCC_ADAPTED_CLASSIFIER || - classifier_ == BCC_SPECKLE_CLASSIFIER; - } - - void set_unichar_id(UNICHAR_ID newunichar_id) { - unichar_id_ = newunichar_id; - } - void set_rating(float newrat) { - rating_ = newrat; - } - void set_certainty(float newrat) { - certainty_ = newrat; - } - void set_script(int newscript_id) { - script_id_ = newscript_id; - } - void set_matrix_cell(int col, int row) { - matrix_cell_.col = col; - matrix_cell_.row = row; - } - void set_xgap_before(int16_t gap) { - xgap_before_ = gap; - } - void set_xgap_after(int16_t gap) { - xgap_after_ = gap; - } - void set_classifier(BlobChoiceClassifier classifier) { - classifier_ = classifier; - } - static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) { - BLOB_CHOICE* choice = new BLOB_CHOICE; - *choice = *src; - return choice; - } - // Returns true if *this and other agree on the baseline and x-height - // to within some tolerance based on a given estimate of the x-height. - bool PosAndSizeAgree(const BLOB_CHOICE& other, float x_height, - bool debug) const; - - void print(const UNICHARSET *unicharset) const { - tprintf("r%.2f c%.2f x[%g,%g]: %d %s", - rating_, certainty_, - min_xheight_, max_xheight_, unichar_id_, - (unicharset == nullptr) ? "" : - unicharset->debug_str(unichar_id_).string()); - } - void print_full() const { - print(nullptr); - tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n", - script_id_, fontinfo_id_, fontinfo_id2_, yshift_, classifier_); - } - // Sort function for sorting BLOB_CHOICEs in increasing order of rating. - static int SortByRating(const void *p1, const void *p2) { - const BLOB_CHOICE *bc1 = *static_cast(p1); - const BLOB_CHOICE *bc2 = *static_cast(p2); - return (bc1->rating_ < bc2->rating_) ? -1 : 1; - } - - private: - // Copy assignment operator. - BLOB_CHOICE& operator=(const BLOB_CHOICE& other); - - UNICHAR_ID unichar_id_; // unichar id - // Fonts and scores. Allowed to be empty. - GenericVector fonts_; - int16_t fontinfo_id_; // char font information - int16_t fontinfo_id2_; // 2nd choice font information - // Rating is the classifier distance weighted by the length of the outline - // in the blob. In terms of probability, classifier distance is -klog p such - // that the resulting distance is in the range [0, 1] and then - // rating = w (-k log p) where w is the weight for the length of the outline. - // Sums of ratings may be compared meaningfully for words of different - // segmentation. - float rating_; // size related - // Certainty is a number in [-20, 0] indicating the classifier certainty - // of the choice. In terms of probability, certainty = 20 (k log p) where - // k is defined as above to normalize -klog p to the range [0, 1]. - float certainty_; // absolute - int script_id_; - // Holds the position of this choice in the ratings matrix. - // Used to location position in the matrix during path backtracking. - MATRIX_COORD matrix_cell_; - int16_t xgap_before_; - int16_t xgap_after_; - // X-height range (in image pixels) that this classification supports. - float min_xheight_; - float max_xheight_; - // yshift_ - The vertical distance (in image pixels) the character is - // shifted (up or down) from an acceptable y position. - float yshift_; - BlobChoiceClassifier classifier_; // What generated *this. -}; - -// Make BLOB_CHOICE listable. -ELISTIZEH(BLOB_CHOICE) - -// Return the BLOB_CHOICE in bc_list matching a given unichar_id, -// or nullptr if there is no match. -BLOB_CHOICE *FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list); - -// Permuter codes used in WERD_CHOICEs. -enum PermuterType { - NO_PERM, // 0 - PUNC_PERM, // 1 - TOP_CHOICE_PERM, // 2 - LOWER_CASE_PERM, // 3 - UPPER_CASE_PERM, // 4 - NGRAM_PERM, // 5 - NUMBER_PERM, // 6 - USER_PATTERN_PERM, // 7 - SYSTEM_DAWG_PERM, // 8 - DOC_DAWG_PERM, // 9 - USER_DAWG_PERM, // 10 - FREQ_DAWG_PERM, // 11 - COMPOUND_PERM, // 12 - - NUM_PERMUTER_TYPES -}; - -namespace tesseract { -// ScriptPos tells whether a character is subscript, superscript or normal. -enum ScriptPos { - SP_NORMAL, - SP_SUBSCRIPT, - SP_SUPERSCRIPT, - SP_DROPCAP -}; - -const char *ScriptPosToString(tesseract::ScriptPos script_pos); - -} // namespace tesseract. - -class WERD_CHOICE : public ELIST_LINK { - public: - static const float kBadRating; - static const char *permuter_name(uint8_t permuter); - - WERD_CHOICE(const UNICHARSET *unicharset) - : unicharset_(unicharset) { this->init(8); } - WERD_CHOICE(const UNICHARSET *unicharset, int reserved) - : unicharset_(unicharset) { this->init(reserved); } - WERD_CHOICE(const char *src_string, - const char *src_lengths, - float src_rating, - float src_certainty, - uint8_t src_permuter, - const UNICHARSET &unicharset) - : unicharset_(&unicharset) { - this->init(src_string, src_lengths, src_rating, - src_certainty, src_permuter); - } - WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset); - WERD_CHOICE(const WERD_CHOICE &word) - : ELIST_LINK(word), unicharset_(word.unicharset_) { - this->init(word.length()); - this->operator=(word); - } - ~WERD_CHOICE(); - - const UNICHARSET *unicharset() const { - return unicharset_; - } - inline int length() const { - return length_; - } - float adjust_factor() const { - return adjust_factor_; - } - void set_adjust_factor(float factor) { - adjust_factor_ = factor; - } - inline const UNICHAR_ID *unichar_ids() const { - return unichar_ids_; - } - inline UNICHAR_ID unichar_id(int index) const { - assert(index < length_); - return unichar_ids_[index]; - } - inline int state(int index) const { - return state_[index]; - } - tesseract::ScriptPos BlobPosition(int index) const { - if (index < 0 || index >= length_) - return tesseract::SP_NORMAL; - return script_pos_[index]; - } - inline float rating() const { - return rating_; - } - inline float certainty() const { - return certainty_; - } - inline float certainty(int index) const { - return certainties_[index]; - } - inline float min_x_height() const { - return min_x_height_; - } - inline float max_x_height() const { - return max_x_height_; - } - inline void set_x_heights(float min_height, float max_height) { - min_x_height_ = min_height; - max_x_height_ = max_height; - } - inline uint8_t permuter() const { - return permuter_; - } - const char *permuter_name() const; - // Returns the BLOB_CHOICE_LIST corresponding to the given index in the word, - // taken from the appropriate cell in the ratings MATRIX. - // Borrowed pointer, so do not delete. - BLOB_CHOICE_LIST* blob_choices(int index, MATRIX* ratings) const; - - // Returns the MATRIX_COORD corresponding to the location in the ratings - // MATRIX for the given index into the word. - MATRIX_COORD MatrixCoord(int index) const; - - inline void set_unichar_id(UNICHAR_ID unichar_id, int index) { - assert(index < length_); - unichar_ids_[index] = unichar_id; - } - bool dangerous_ambig_found() const { - return dangerous_ambig_found_; - } - void set_dangerous_ambig_found_(bool value) { - dangerous_ambig_found_ = value; - } - inline void set_rating(float new_val) { - rating_ = new_val; - } - inline void set_certainty(float new_val) { - certainty_ = new_val; - } - inline void set_permuter(uint8_t perm) { - permuter_ = perm; - } - // Note: this function should only be used if all the fields - // are populated manually with set_* functions (rather than - // (copy)constructors and append_* functions). - inline void set_length(int len) { - ASSERT_HOST(reserved_ >= len); - length_ = len; - } - - /// Make more space in unichar_id_ and fragment_lengths_ arrays. - inline void double_the_size() { - if (reserved_ > 0) { - unichar_ids_ = GenericVector::double_the_size_memcpy( - reserved_, unichar_ids_); - script_pos_ = GenericVector::double_the_size_memcpy( - reserved_, script_pos_); - state_ = GenericVector::double_the_size_memcpy( - reserved_, state_); - certainties_ = GenericVector::double_the_size_memcpy( - reserved_, certainties_); - reserved_ *= 2; - } else { - unichar_ids_ = new UNICHAR_ID[1]; - script_pos_ = new tesseract::ScriptPos[1]; - state_ = new int[1]; - certainties_ = new float[1]; - reserved_ = 1; - } - } - - /// Initializes WERD_CHOICE - reserves length slots in unichar_ids_ and - /// fragment_length_ arrays. Sets other values to default (blank) values. - inline void init(int reserved) { - reserved_ = reserved; - if (reserved > 0) { - unichar_ids_ = new UNICHAR_ID[reserved]; - script_pos_ = new tesseract::ScriptPos[reserved]; - state_ = new int[reserved]; - certainties_ = new float[reserved]; - } else { - unichar_ids_ = nullptr; - script_pos_ = nullptr; - state_ = nullptr; - certainties_ = nullptr; - } - length_ = 0; - adjust_factor_ = 1.0f; - rating_ = 0.0; - certainty_ = FLT_MAX; - min_x_height_ = 0.0f; - max_x_height_ = FLT_MAX; - permuter_ = NO_PERM; - unichars_in_script_order_ = false; // Tesseract is strict left-to-right. - dangerous_ambig_found_ = false; - } - - /// Helper function to build a WERD_CHOICE from the given string, - /// fragment lengths, rating, certainty and permuter. - /// The function assumes that src_string is not nullptr. - /// src_lengths argument could be nullptr, in which case the unichars - /// in src_string are assumed to all be of length 1. - void init(const char *src_string, const char *src_lengths, - float src_rating, float src_certainty, - uint8_t src_permuter); - - /// Set the fields in this choice to be default (bad) values. - inline void make_bad() { - length_ = 0; - rating_ = kBadRating; - certainty_ = -FLT_MAX; - } - - /// This function assumes that there is enough space reserved - /// in the WERD_CHOICE for adding another unichar. - /// This is an efficient alternative to append_unichar_id(). - inline void append_unichar_id_space_allocated( - UNICHAR_ID unichar_id, int blob_count, - float rating, float certainty) { - assert(reserved_ > length_); - length_++; - this->set_unichar_id(unichar_id, blob_count, - rating, certainty, length_-1); - } - - void append_unichar_id(UNICHAR_ID unichar_id, int blob_count, - float rating, float certainty); - - inline void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, - float rating, float certainty, int index) { - assert(index < length_); - unichar_ids_[index] = unichar_id; - state_[index] = blob_count; - certainties_[index] = certainty; - script_pos_[index] = tesseract::SP_NORMAL; - rating_ += rating; - if (certainty < certainty_) { - certainty_ = certainty; - } - } - // Sets the entries for the given index from the BLOB_CHOICE, assuming - // unit fragment lengths, but setting the state for this index to blob_count. - void set_blob_choice(int index, int blob_count, - const BLOB_CHOICE* blob_choice); - - bool contains_unichar_id(UNICHAR_ID unichar_id) const; - void remove_unichar_ids(int index, int num); - inline void remove_last_unichar_id() { --length_; } - inline void remove_unichar_id(int index) { - this->remove_unichar_ids(index, 1); - } - bool has_rtl_unichar_id() const; - void reverse_and_mirror_unichar_ids(); - - // Returns the half-open interval of unichar_id indices [start, end) which - // enclose the core portion of this word -- the part after stripping - // punctuation from the left and right. - void punct_stripped(int *start_core, int *end_core) const; - - // Returns the indices [start, end) containing the core of the word, stripped - // of any superscript digits on either side. (i.e., the non-footnote part - // of the word). There is no guarantee that the output range is non-empty. - void GetNonSuperscriptSpan(int *start, int *end) const; - - // Return a copy of this WERD_CHOICE with the choices [start, end). - // The result is useful only for checking against a dictionary. - WERD_CHOICE shallow_copy(int start, int end) const; - - void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const; - const STRING debug_string() const { - STRING word_str; - for (int i = 0; i < length_; ++i) { - word_str += unicharset_->debug_str(unichar_ids_[i]); - word_str += " "; - } - return word_str; - } - // Returns true if any unichar_id in the word is a non-space-delimited char. - bool ContainsAnyNonSpaceDelimited() const { - for (int i = 0; i < length_; ++i) { - if (!unicharset_->IsSpaceDelimited(unichar_ids_[i])) return true; - } - return false; - } - // Returns true if the word is all spaces. - bool IsAllSpaces() const { - for (int i = 0; i < length_; ++i) { - if (unichar_ids_[i] != UNICHAR_SPACE) return false; - } - return true; - } - - // Call this to override the default (strict left to right graphemes) - // with the fact that some engine produces a "reading order" set of - // Graphemes for each word. - bool set_unichars_in_script_order(bool in_script_order) { - return unichars_in_script_order_ = in_script_order; - } - - bool unichars_in_script_order() const { - return unichars_in_script_order_; - } - - // Returns a UTF-8 string equivalent to the current choice - // of UNICHAR IDs. - const STRING &unichar_string() const { - this->string_and_lengths(&unichar_string_, &unichar_lengths_); - return unichar_string_; - } - - // Returns the lengths, one byte each, representing the number of bytes - // required in the unichar_string for each UNICHAR_ID. - const STRING &unichar_lengths() const { - this->string_and_lengths(&unichar_string_, &unichar_lengths_); - return unichar_lengths_; - } - - // Sets up the script_pos_ member using the blobs_list to get the bln - // bounding boxes, *this to get the unichars, and this->unicharset - // to get the target positions. If small_caps is true, sub/super are not - // considered, but dropcaps are. - // NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.) - void SetScriptPositions(bool small_caps, TWERD* word, int debug = 0); - // Sets the script_pos_ member from some source positions with a given length. - void SetScriptPositions(const tesseract::ScriptPos* positions, int length); - // Sets all the script_pos_ positions to the given position. - void SetAllScriptPositions(tesseract::ScriptPos position); - - static tesseract::ScriptPos ScriptPositionOf(bool print_debug, - const UNICHARSET& unicharset, - const TBOX& blob_box, - UNICHAR_ID unichar_id); - - // Returns the "dominant" script ID for the word. By "dominant", the script - // must account for at least half the characters. Otherwise, it returns 0. - // Note that for Japanese, Hiragana and Katakana are simply treated as Han. - int GetTopScriptID() const; - - // Fixes the state_ for a chop at the given blob_posiiton. - void UpdateStateForSplit(int blob_position); - - // Returns the sum of all the state elements, being the total number of blobs. - int TotalOfStates() const; - - void print() const { this->print(""); } - void print(const char *msg) const; - // Prints the segmentation state with an introductory message. - void print_state(const char *msg) const; - - // Displays the segmentation state of *this (if not the same as the last - // one displayed) and waits for a click in the window. - void DisplaySegmentation(TWERD* word); - - WERD_CHOICE& operator+= ( // concatanate - const WERD_CHOICE & second);// second on first - - WERD_CHOICE& operator= (const WERD_CHOICE& source); - - private: - const UNICHARSET *unicharset_; - // TODO(rays) Perhaps replace the multiple arrays with an array of structs? - // unichar_ids_ is an array of classifier "results" that make up a word. - // For each unichar_ids_[i], script_pos_[i] has the sub/super/normal position - // of each unichar_id. - // state_[i] indicates the number of blobs in WERD_RES::chopped_word that - // were put together to make the classification results in the ith position - // in unichar_ids_, and certainties_[i] is the certainty of the choice that - // was used in this word. - // == Change from before == - // Previously there was fragment_lengths_ that allowed a word to be - // artificially composed of multiple fragment results. Since the new - // segmentation search doesn't do fragments, treatment of fragments has - // been moved to a lower level, augmenting the ratings matrix with the - // combined fragments, and allowing the language-model/segmentation-search - // to deal with only the combined unichar_ids. - UNICHAR_ID *unichar_ids_; // unichar ids that represent the text of the word - tesseract::ScriptPos* script_pos_; // Normal/Sub/Superscript of each unichar. - int* state_; // Number of blobs in each unichar. - float* certainties_; // Certainty of each unichar. - int reserved_; // size of the above arrays - int length_; // word length - // Factor that was used to adjust the rating. - float adjust_factor_; - // Rating is the sum of the ratings of the individual blobs in the word. - float rating_; // size related - // certainty is the min (worst) certainty of the individual blobs in the word. - float certainty_; // absolute - // xheight computed from the result, or 0 if inconsistent. - float min_x_height_; - float max_x_height_; - uint8_t permuter_; // permuter code - - // Normally, the ratings_ matrix represents the recognition results in order - // from left-to-right. However, some engines (say Cube) may return - // recognition results in the order of the script's major reading direction - // (for Arabic, that is right-to-left). - bool unichars_in_script_order_; - // True if NoDangerousAmbig found an ambiguity. - bool dangerous_ambig_found_; - - // The following variables are populated and passed by reference any - // time unichar_string() or unichar_lengths() are called. - mutable STRING unichar_string_; - mutable STRING unichar_lengths_; -}; - -// Make WERD_CHOICE listable. -ELISTIZEH(WERD_CHOICE) -using BLOB_CHOICE_LIST_VECTOR = GenericVector; - -// Utilities for comparing WERD_CHOICEs - -bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, - const WERD_CHOICE &word2); - -// Utilities for debug printing. -void print_ratings_list( - const char *msg, // intro message - BLOB_CHOICE_LIST *ratings, // list of results - const UNICHARSET ¤t_unicharset // unicharset that can be used - // for id-to-unichar conversion - ); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/rect.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/rect.cpp deleted file mode 100644 index f0049e07..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/rect.cpp +++ /dev/null @@ -1,263 +0,0 @@ -/********************************************************************** - * File: rect.cpp (Formerly box.c) - * Description: Bounding box class definition. - * Author: Phil Cheatle - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "rect.h" -#include "strngs.h" // for STRING - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -/********************************************************************** - * TBOX::TBOX() Constructor from 2 ICOORDS - * - **********************************************************************/ - -TBOX::TBOX( // constructor - const ICOORD pt1, // one corner - const ICOORD pt2 // the other corner - ) { - if (pt1.x () <= pt2.x ()) { - if (pt1.y () <= pt2.y ()) { - bot_left = pt1; - top_right = pt2; - } - else { - bot_left = ICOORD (pt1.x (), pt2.y ()); - top_right = ICOORD (pt2.x (), pt1.y ()); - } - } - else { - if (pt1.y () <= pt2.y ()) { - bot_left = ICOORD (pt2.x (), pt1.y ()); - top_right = ICOORD (pt1.x (), pt2.y ()); - } - else { - bot_left = pt2; - top_right = pt1; - } - } -} - -/********************************************************************** - * TBOX::TBOX() Constructor from 4 integer values. - * Note: It is caller's responsibility to provide values in the right - * order. - **********************************************************************/ - -TBOX::TBOX( //constructor - int16_t left, int16_t bottom, int16_t right, int16_t top) - : bot_left(left, bottom), top_right(right, top) { -} - -// rotate_large constructs the containing bounding box of all 4 -// corners after rotating them. It therefore guarantees that all -// original content is contained within, but also slightly enlarges the box. -void TBOX::rotate_large(const FCOORD& vec) { - ICOORD top_left(bot_left.x(), top_right.y()); - ICOORD bottom_right(top_right.x(), bot_left.y()); - top_left.rotate(vec); - bottom_right.rotate(vec); - rotate(vec); - TBOX box2(top_left, bottom_right); - *this += box2; -} - -/********************************************************************** - * TBOX::intersection() Build the largest box contained in both boxes - * - **********************************************************************/ - -TBOX TBOX::intersection( //shared area box - const TBOX &box) const { - int16_t left; - int16_t bottom; - int16_t right; - int16_t top; - if (overlap (box)) { - if (box.bot_left.x () > bot_left.x ()) - left = box.bot_left.x (); - else - left = bot_left.x (); - - if (box.top_right.x () < top_right.x ()) - right = box.top_right.x (); - else - right = top_right.x (); - - if (box.bot_left.y () > bot_left.y ()) - bottom = box.bot_left.y (); - else - bottom = bot_left.y (); - - if (box.top_right.y () < top_right.y ()) - top = box.top_right.y (); - else - top = top_right.y (); - } - else { - left = INT16_MAX; - bottom = INT16_MAX; - top = -INT16_MAX; - right = -INT16_MAX; - } - return TBOX (left, bottom, right, top); -} - - -/********************************************************************** - * TBOX::bounding_union() Build the smallest box containing both boxes - * - **********************************************************************/ - -TBOX TBOX::bounding_union( //box enclosing both - const TBOX &box) const { - ICOORD bl; //bottom left - ICOORD tr; //top right - - if (box.bot_left.x () < bot_left.x ()) - bl.set_x (box.bot_left.x ()); - else - bl.set_x (bot_left.x ()); - - if (box.top_right.x () > top_right.x ()) - tr.set_x (box.top_right.x ()); - else - tr.set_x (top_right.x ()); - - if (box.bot_left.y () < bot_left.y ()) - bl.set_y (box.bot_left.y ()); - else - bl.set_y (bot_left.y ()); - - if (box.top_right.y () > top_right.y ()) - tr.set_y (box.top_right.y ()); - else - tr.set_y (top_right.y ()); - return TBOX (bl, tr); -} - - -/********************************************************************** - * TBOX::plot() Paint a box using specified settings - * - **********************************************************************/ - -#ifndef GRAPHICS_DISABLED -void TBOX::plot( //paint box - ScrollView* fd, //where to paint - ScrollView::Color fill_colour, //colour for inside - ScrollView::Color border_colour //colour for border - ) const { - fd->Brush(fill_colour); - fd->Pen(border_colour); - plot(fd); -} -#endif - -// Appends the bounding box as (%d,%d)->(%d,%d) to a STRING. -void TBOX::print_to_str(STRING *str) const { - // "(%d,%d)->(%d,%d)", left(), bottom(), right(), top() - str->add_str_int("(", left()); - str->add_str_int(",", bottom()); - str->add_str_int(")->(", right()); - str->add_str_int(",", top()); - *str += ')'; -} - -// Writes to the given file. Returns false in case of error. -bool TBOX::Serialize(FILE* fp) const { - if (!bot_left.Serialize(fp)) return false; - if (!top_right.Serialize(fp)) return false; - return true; -} -// Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool TBOX::DeSerialize(bool swap, FILE* fp) { - if (!bot_left.DeSerialize(swap, fp)) return false; - if (!top_right.DeSerialize(swap, fp)) return false; - return true; -} - -/********************************************************************** - * operator+= - * - * Extend one box to include the other (In place union) - **********************************************************************/ - -DLLSYM TBOX & -operator+= ( //bounding bounding bx -TBOX & op1, //operands -const TBOX & op2) { - if (op2.bot_left.x () < op1.bot_left.x ()) - op1.bot_left.set_x (op2.bot_left.x ()); - - if (op2.top_right.x () > op1.top_right.x ()) - op1.top_right.set_x (op2.top_right.x ()); - - if (op2.bot_left.y () < op1.bot_left.y ()) - op1.bot_left.set_y (op2.bot_left.y ()); - - if (op2.top_right.y () > op1.top_right.y ()) - op1.top_right.set_y (op2.top_right.y ()); - - return op1; -} - - -/********************************************************************** - * operator&= - * - * Reduce one box to intersection with the other (In place intersection) - **********************************************************************/ - -TBOX& operator&=(TBOX& op1, const TBOX& op2) { - if (op1.overlap (op2)) { - if (op2.bot_left.x () > op1.bot_left.x ()) - op1.bot_left.set_x (op2.bot_left.x ()); - - if (op2.top_right.x () < op1.top_right.x ()) - op1.top_right.set_x (op2.top_right.x ()); - - if (op2.bot_left.y () > op1.bot_left.y ()) - op1.bot_left.set_y (op2.bot_left.y ()); - - if (op2.top_right.y () < op1.top_right.y ()) - op1.top_right.set_y (op2.top_right.y ()); - } - else { - op1.bot_left.set_x (INT16_MAX); - op1.bot_left.set_y (INT16_MAX); - op1.top_right.set_x (-INT16_MAX); - op1.top_right.set_y (-INT16_MAX); - } - return op1; -} - -bool TBOX::x_almost_equal(const TBOX &box, int tolerance) const { - return (abs(left() - box.left()) <= tolerance && - abs(right() - box.right()) <= tolerance); -} - -bool TBOX::almost_equal(const TBOX &box, int tolerance) const { - return (abs(left() - box.left()) <= tolerance && - abs(right() - box.right()) <= tolerance && - abs(top() - box.top()) <= tolerance && - abs(bottom() - box.bottom()) <= tolerance); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/rect.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/rect.h deleted file mode 100644 index 344559a0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/rect.h +++ /dev/null @@ -1,494 +0,0 @@ -/********************************************************************** - * File: rect.h (Formerly box.h) - * Description: Bounding box class definition. - * Author: Phil Cheatle - * Created: Wed Oct 16 15:18:45 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef RECT_H -#define RECT_H - -#include // for std::max, std::min -#include // for std::ceil, std::floor -#include // for INT16_MAX -#include // for FILE -#include "platform.h" // for DLLSYM -#include "points.h" // for ICOORD, FCOORD -#include "scrollview.h" // for ScrollView, ScrollView::Color -#include "tprintf.h" // for tprintf - -class STRING; - -class DLLSYM TBOX { // bounding box - public: - TBOX (): // empty constructor making a null box - bot_left (INT16_MAX, INT16_MAX), top_right (-INT16_MAX, -INT16_MAX) { - } - - TBOX( // constructor - const ICOORD pt1, // one corner - const ICOORD pt2); // the other corner - - TBOX( // constructor - int16_t left, int16_t bottom, int16_t right, int16_t top); - - TBOX( // box around FCOORD - const FCOORD pt); - - bool null_box() const { // Is box null - return ((left () >= right ()) || (top () <= bottom ())); - } - - bool operator==(const TBOX& other) const { - return bot_left == other.bot_left && top_right == other.top_right; - } - - int16_t top() const { // coord of top - return top_right.y (); - } - void set_top(int y) { - top_right.set_y(y); - } - - int16_t bottom() const { // coord of bottom - return bot_left.y (); - } - void set_bottom(int y) { - bot_left.set_y(y); - } - - int16_t left() const { // coord of left - return bot_left.x (); - } - void set_left(int x) { - bot_left.set_x(x); - } - - int16_t right() const { // coord of right - return top_right.x (); - } - void set_right(int x) { - top_right.set_x(x); - } - int x_middle() const { - return (bot_left.x() + top_right.x()) / 2; - } - int y_middle() const { - return (bot_left.y() + top_right.y()) / 2; - } - - const ICOORD &botleft() const { // access function - return bot_left; - } - - ICOORD botright() const { // ~ access function - return ICOORD (top_right.x (), bot_left.y ()); - } - - ICOORD topleft() const { // ~ access function - return ICOORD (bot_left.x (), top_right.y ()); - } - - const ICOORD &topright() const { // access function - return top_right; - } - - int16_t height() const { // how high is it? - if (!null_box ()) - return top_right.y () - bot_left.y (); - else - return 0; - } - - int16_t width() const { // how high is it? - if (!null_box ()) - return top_right.x () - bot_left.x (); - else - return 0; - } - - int32_t area() const { // what is the area? - if (!null_box ()) - return width () * height (); - else - return 0; - } - - // Pads the box on either side by the supplied x,y pad amounts. - // NO checks for exceeding any bounds like 0 or an image size. - void pad(int xpad, int ypad) { - ICOORD pad(xpad, ypad); - bot_left -= pad; - top_right += pad; - } - - void move_bottom_edge( // move one edge - const int16_t y) { // by +/- y - bot_left += ICOORD (0, y); - } - - void move_left_edge( // move one edge - const int16_t x) { // by +/- x - bot_left += ICOORD (x, 0); - } - - void move_right_edge( // move one edge - const int16_t x) { // by +/- x - top_right += ICOORD (x, 0); - } - - void move_top_edge( // move one edge - const int16_t y) { // by +/- y - top_right += ICOORD (0, y); - } - - void move( // move box - const ICOORD vec) { // by vector - bot_left += vec; - top_right += vec; - } - - void move( // move box - const FCOORD vec) { // by float vector - bot_left.set_x(static_cast(std::floor(bot_left.x() + vec.x()))); - // round left - bot_left.set_y(static_cast(std::floor(bot_left.y() + vec.y()))); - // round down - top_right.set_x(static_cast(std::ceil(top_right.x() + vec.x()))); - // round right - top_right.set_y(static_cast(std::ceil(top_right.y() + vec.y()))); - // round up - } - - void scale( // scale box - const float f) { // by multiplier - // round left - bot_left.set_x(static_cast(std::floor(bot_left.x() * f))); - // round down - bot_left.set_y(static_cast(std::floor(bot_left.y() * f))); - // round right - top_right.set_x(static_cast(std::ceil(top_right.x() * f))); - // round up - top_right.set_y(static_cast(std::ceil(top_right.y() * f))); - } - void scale( // scale box - const FCOORD vec) { // by float vector - bot_left.set_x(static_cast(std::floor(bot_left.x() * vec.x()))); - bot_left.set_y(static_cast(std::floor(bot_left.y() * vec.y()))); - top_right.set_x(static_cast(std::ceil(top_right.x() * vec.x()))); - top_right.set_y(static_cast(std::ceil(top_right.y() * vec.y()))); - } - - // rotate doesn't enlarge the box - it just rotates the bottom-left - // and top-right corners. Use rotate_large if you want to guarantee - // that all content is contained within the rotated box. - void rotate(const FCOORD& vec) { // by vector - bot_left.rotate (vec); - top_right.rotate (vec); - *this = TBOX (bot_left, top_right); - } - // rotate_large constructs the containing bounding box of all 4 - // corners after rotating them. It therefore guarantees that all - // original content is contained within, but also slightly enlarges the box. - void rotate_large(const FCOORD& vec); - - bool contains( // is pt inside box - const FCOORD pt) const; - - bool contains( // is box inside box - const TBOX &box) const; - - bool overlap( // do boxes overlap - const TBOX &box) const; - - bool major_overlap( // do boxes overlap more than half - const TBOX &box) const; - - // Do boxes overlap on x axis. - bool x_overlap(const TBOX &box) const; - - // Return the horizontal gap between the boxes. If the boxes - // overlap horizontally then the return value is negative, indicating - // the amount of the overlap. - int x_gap(const TBOX& box) const { - return std::max(bot_left.x(), box.bot_left.x()) - - std::min(top_right.x(), box.top_right.x()); - } - - // Return the vertical gap between the boxes. If the boxes - // overlap vertically then the return value is negative, indicating - // the amount of the overlap. - int y_gap(const TBOX& box) const { - return std::max(bot_left.y(), box.bot_left.y()) - - std::min(top_right.y(), box.top_right.y()); - } - - // Do boxes overlap on x axis by more than - // half of the width of the narrower box. - bool major_x_overlap(const TBOX &box) const; - - // Do boxes overlap on y axis. - bool y_overlap(const TBOX &box) const; - - // Do boxes overlap on y axis by more than - // half of the height of the shorter box. - bool major_y_overlap(const TBOX &box) const; - - // fraction of current box's area covered by other - double overlap_fraction(const TBOX &box) const; - - // fraction of the current box's projected area covered by the other's - double x_overlap_fraction(const TBOX& box) const; - - // fraction of the current box's projected area covered by the other's - double y_overlap_fraction(const TBOX& box) const; - - // Returns true if the boxes are almost equal on x axis. - bool x_almost_equal(const TBOX &box, int tolerance) const; - - // Returns true if the boxes are almost equal - bool almost_equal(const TBOX &box, int tolerance) const; - - TBOX intersection( // shared area box - const TBOX &box) const; - - TBOX bounding_union( // box enclosing both - const TBOX &box) const; - - // Sets the box boundaries to the given coordinates. - void set_to_given_coords(int x_min, int y_min, int x_max, int y_max) { - bot_left.set_x(x_min); - bot_left.set_y(y_min); - top_right.set_x(x_max); - top_right.set_y(y_max); - } - - void print() const { // print - tprintf("Bounding box=(%d,%d)->(%d,%d)\n", - left(), bottom(), right(), top()); - } - // Appends the bounding box as (%d,%d)->(%d,%d) to a STRING. - void print_to_str(STRING *str) const; - -#ifndef GRAPHICS_DISABLED - void plot( // use current settings - ScrollView* fd) const { // where to paint - fd->Rectangle(bot_left.x (), bot_left.y (), top_right.x (), - top_right.y ()); - } - - void plot( // paint box - ScrollView* fd, // where to paint - ScrollView::Color fill_colour, // colour for inside - ScrollView::Color border_colour) const; // colour for border -#endif - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, FILE* fp); - - friend TBOX& operator+=(TBOX&, const TBOX&); - // in place union - friend TBOX& operator&=(TBOX&, const TBOX&); - // in place intersection - - private: - ICOORD bot_left; // bottom left corner - ICOORD top_right; // top right corner -}; - -/********************************************************************** - * TBOX::TBOX() Constructor from 1 FCOORD - * - **********************************************************************/ - -inline TBOX::TBOX( // constructor - const FCOORD pt // floating centre - ) { - bot_left = ICOORD(static_cast(std::floor(pt.x())), - static_cast(std::floor(pt.y()))); - top_right = ICOORD(static_cast(std::ceil(pt.x())), - static_cast(std::ceil(pt.y()))); -} - - -/********************************************************************** - * TBOX::contains() Is point within box - * - **********************************************************************/ - -inline bool TBOX::contains(const FCOORD pt) const { - return ((pt.x () >= bot_left.x ()) && - (pt.x () <= top_right.x ()) && - (pt.y () >= bot_left.y ()) && (pt.y () <= top_right.y ())); -} - - -/********************************************************************** - * TBOX::contains() Is box within box - * - **********************************************************************/ - -inline bool TBOX::contains(const TBOX &box) const { - return (contains (box.bot_left) && contains (box.top_right)); -} - - -/********************************************************************** - * TBOX::overlap() Do two boxes overlap? - * - **********************************************************************/ - -inline bool TBOX::overlap( // do boxes overlap - const TBOX &box) const { - return ((box.bot_left.x () <= top_right.x ()) && - (box.top_right.x () >= bot_left.x ()) && - (box.bot_left.y () <= top_right.y ()) && - (box.top_right.y () >= bot_left.y ())); -} - -/********************************************************************** - * TBOX::major_overlap() Do two boxes overlap by at least half of the smallest? - * - **********************************************************************/ - -inline bool TBOX::major_overlap( // Do boxes overlap more that half. - const TBOX &box) const { - int overlap = std::min(box.top_right.x(), top_right.x()); - overlap -= std::max(box.bot_left.x(), bot_left.x()); - overlap += overlap; - if (overlap < std::min(box.width(), width())) - return false; - overlap = std::min(box.top_right.y(), top_right.y()); - overlap -= std::max(box.bot_left.y(), bot_left.y()); - overlap += overlap; - if (overlap < std::min(box.height(), height())) - return false; - return true; -} - -/********************************************************************** - * TBOX::overlap_fraction() Fraction of area covered by the other box - * - **********************************************************************/ - -inline double TBOX::overlap_fraction(const TBOX &box) const { - double fraction = 0.0; - if (this->area()) { - fraction = this->intersection(box).area() * 1.0 / this->area(); - } - return fraction; -} - -/********************************************************************** - * TBOX::x_overlap() Do two boxes overlap on x-axis - * - **********************************************************************/ - -inline bool TBOX::x_overlap(const TBOX &box) const { - return ((box.bot_left.x() <= top_right.x()) && - (box.top_right.x() >= bot_left.x())); -} - -/********************************************************************** - * TBOX::major_x_overlap() Do two boxes overlap by more than half the - * width of the narrower box on the x-axis - * - **********************************************************************/ - -inline bool TBOX::major_x_overlap(const TBOX &box) const { - int16_t overlap = box.width(); - if (this->left() > box.left()) { - overlap -= this->left() - box.left(); - } - if (this->right() < box.right()) { - overlap -= box.right() - this->right(); - } - return (overlap >= box.width() / 2 || overlap >= this->width() / 2); -} - -/********************************************************************** - * TBOX::y_overlap() Do two boxes overlap on y-axis - * - **********************************************************************/ - -inline bool TBOX::y_overlap(const TBOX &box) const { - return ((box.bot_left.y() <= top_right.y()) && - (box.top_right.y() >= bot_left.y())); -} - -/********************************************************************** - * TBOX::major_y_overlap() Do two boxes overlap by more than half the - * height of the shorter box on the y-axis - * - **********************************************************************/ - -inline bool TBOX::major_y_overlap(const TBOX &box) const { - int16_t overlap = box.height(); - if (this->bottom() > box.bottom()) { - overlap -= this->bottom() - box.bottom(); - } - if (this->top() < box.top()) { - overlap -= box.top() - this->top(); - } - return (overlap >= box.height() / 2 || overlap >= this->height() / 2); -} - -/********************************************************************** - * TBOX::x_overlap_fraction() Calculates the horizontal overlap of the - * given boxes as a fraction of this boxes - * width. - * - **********************************************************************/ - -inline double TBOX::x_overlap_fraction(const TBOX& other) const { - int low = std::max(left(), other.left()); - int high = std::min(right(), other.right()); - int width = right() - left(); - if (width == 0) { - int x = left(); - if (other.left() <= x && x <= other.right()) - return 1.0; - else - return 0.0; - } else { - return std::max(0.0, static_cast(high - low) / width); - } -} - -/********************************************************************** - * TBOX::y_overlap_fraction() Calculates the vertical overlap of the - * given boxes as a fraction of this boxes - * height. - * - **********************************************************************/ - -inline double TBOX::y_overlap_fraction(const TBOX& other) const { - int low = std::max(bottom(), other.bottom()); - int high = std::min(top(), other.top()); - int height = top() - bottom(); - if (height == 0) { - int y = bottom(); - if (other.bottom() <= y && y <= other.top()) - return 1.0; - else - return 0.0; - } else { - return std::max(0.0, static_cast(high - low) / height); - } -} - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/rejctmap.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/rejctmap.cpp deleted file mode 100644 index 85a3cd98..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/rejctmap.cpp +++ /dev/null @@ -1,450 +0,0 @@ -/********************************************************************** - * File: rejctmap.cpp (Formerly rejmap.c) - * Description: REJ and REJMAP class functions. - * Author: Phil Cheatle - * Created: Thu Jun 9 13:46:38 BST 1994 - * - * (C) Copyright 1994, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "host.h" -#include "rejctmap.h" -#include "params.h" - -bool REJ::perm_rejected() { //Is char perm reject? - return (flag (R_TESS_FAILURE) || - flag (R_SMALL_XHT) || - flag (R_EDGE_CHAR) || - flag (R_1IL_CONFLICT) || - flag (R_POSTNN_1IL) || - flag (R_REJ_CBLOB) || - flag (R_BAD_REPETITION) || flag (R_MM_REJECT)); -} - - -bool REJ::rej_before_nn_accept() { - return flag (R_POOR_MATCH) || - flag (R_NOT_TESS_ACCEPTED) || - flag (R_CONTAINS_BLANKS) || flag (R_BAD_PERMUTER); -} - - -bool REJ::rej_between_nn_and_mm() { - return flag (R_HYPHEN) || - flag (R_DUBIOUS) || - flag (R_NO_ALPHANUMS) || flag (R_MOSTLY_REJ) || flag (R_XHT_FIXUP); -} - - -bool REJ::rej_between_mm_and_quality_accept() { - return flag (R_BAD_QUALITY); -} - - -bool REJ::rej_between_quality_and_minimal_rej_accept() { - return flag (R_DOC_REJ) || - flag (R_BLOCK_REJ) || flag (R_ROW_REJ) || flag (R_UNLV_REJ); -} - - -bool REJ::rej_before_mm_accept() { - return rej_between_nn_and_mm () || - (rej_before_nn_accept () && - !flag (R_NN_ACCEPT) && !flag (R_HYPHEN_ACCEPT)); -} - - -bool REJ::rej_before_quality_accept() { - return rej_between_mm_and_quality_accept () || - (!flag (R_MM_ACCEPT) && rej_before_mm_accept ()); -} - - -bool REJ::rejected() { //Is char rejected? - if (flag (R_MINIMAL_REJ_ACCEPT)) - return false; - else - return (perm_rejected () || - rej_between_quality_and_minimal_rej_accept () || - (!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ())); -} - - -bool REJ::accept_if_good_quality() { //potential rej? - return (rejected () && - !perm_rejected () && - flag (R_BAD_PERMUTER) && - !flag (R_POOR_MATCH) && - !flag (R_NOT_TESS_ACCEPTED) && - !flag (R_CONTAINS_BLANKS) && - (!rej_between_nn_and_mm () && - !rej_between_mm_and_quality_accept () && - !rej_between_quality_and_minimal_rej_accept ())); -} - - -void REJ::setrej_tess_failure() { //Tess generated blank - set_flag(R_TESS_FAILURE); -} - - -void REJ::setrej_small_xht() { //Small xht char/wd - set_flag(R_SMALL_XHT); -} - - -void REJ::setrej_edge_char() { //Close to image edge - set_flag(R_EDGE_CHAR); -} - - -void REJ::setrej_1Il_conflict() { //Initial reject map - set_flag(R_1IL_CONFLICT); -} - - -void REJ::setrej_postNN_1Il() { //1Il after NN - set_flag(R_POSTNN_1IL); -} - - -void REJ::setrej_rej_cblob() { //Insert duff blob - set_flag(R_REJ_CBLOB); -} - - -void REJ::setrej_mm_reject() { //Matrix matcher - set_flag(R_MM_REJECT); -} - - -void REJ::setrej_bad_repetition() { //Odd repeated char - set_flag(R_BAD_REPETITION); -} - - -void REJ::setrej_poor_match() { //Failed Rays heuristic - set_flag(R_POOR_MATCH); -} - - -void REJ::setrej_not_tess_accepted() { - //TEMP reject_word - set_flag(R_NOT_TESS_ACCEPTED); -} - - -void REJ::setrej_contains_blanks() { - //TEMP reject_word - set_flag(R_CONTAINS_BLANKS); -} - - -void REJ::setrej_bad_permuter() { //POTENTIAL reject_word - set_flag(R_BAD_PERMUTER); -} - - -void REJ::setrej_hyphen() { //PostNN dubious hyphen or . - set_flag(R_HYPHEN); -} - - -void REJ::setrej_dubious() { //PostNN dubious limit - set_flag(R_DUBIOUS); -} - - -void REJ::setrej_no_alphanums() { //TEMP reject_word - set_flag(R_NO_ALPHANUMS); -} - - -void REJ::setrej_mostly_rej() { //TEMP reject_word - set_flag(R_MOSTLY_REJ); -} - - -void REJ::setrej_xht_fixup() { //xht fixup - set_flag(R_XHT_FIXUP); -} - - -void REJ::setrej_bad_quality() { //TEMP reject_word - set_flag(R_BAD_QUALITY); -} - - -void REJ::setrej_doc_rej() { //TEMP reject_word - set_flag(R_DOC_REJ); -} - - -void REJ::setrej_block_rej() { //TEMP reject_word - set_flag(R_BLOCK_REJ); -} - - -void REJ::setrej_row_rej() { //TEMP reject_word - set_flag(R_ROW_REJ); -} - - -void REJ::setrej_unlv_rej() { //TEMP reject_word - set_flag(R_UNLV_REJ); -} - - -void REJ::setrej_hyphen_accept() { //NN Flipped a char - set_flag(R_HYPHEN_ACCEPT); -} - - -void REJ::setrej_nn_accept() { //NN Flipped a char - set_flag(R_NN_ACCEPT); -} - - -void REJ::setrej_mm_accept() { //Matrix matcher - set_flag(R_MM_ACCEPT); -} - - -void REJ::setrej_quality_accept() { //Quality flip a char - set_flag(R_QUALITY_ACCEPT); -} - - -void REJ::setrej_minimal_rej_accept() { - //Accept all except blank - set_flag(R_MINIMAL_REJ_ACCEPT); -} - - -void REJ::full_print(FILE *fp) { - fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F"); - fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F"); - fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F"); - fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F"); - fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F"); - fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F"); - fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F"); - fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F"); - fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F"); - fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n", - flag (R_NOT_TESS_ACCEPTED) ? "T" : "F"); - fprintf (fp, "R_CONTAINS_BLANKS: %s\n", - flag (R_CONTAINS_BLANKS) ? "T" : "F"); - fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F"); - fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F"); - fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F"); - fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F"); - fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F"); - fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F"); - fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F"); - fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F"); - fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F"); - fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F"); - fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F"); - fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F"); - fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F"); - fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F"); - fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F"); - fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n", - flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F"); -} - -REJMAP &REJMAP::operator=(const REJMAP &source) { - initialise(source.len); - for (int i = 0; i < len; i++) { - ptr[i] = source.ptr[i]; - } - return *this; -} - -void REJMAP::initialise(int16_t length) { - ptr.reset(new REJ[length]); - len = length; -} - - -int16_t REJMAP::accept_count() { //How many accepted? - int i; - int16_t count = 0; - - for (i = 0; i < len; i++) { - if (ptr[i].accepted ()) - count++; - } - return count; -} - - -bool REJMAP::recoverable_rejects() { //Any non perm rejs? - for (int i = 0; i < len; i++) { - if (ptr[i].recoverable ()) - return true; - } - return false; -} - - -bool REJMAP::quality_recoverable_rejects() { //Any potential rejs? - for (int i = 0; i < len; i++) { - if (ptr[i].accept_if_good_quality ()) - return true; - } - return false; -} - - -void REJMAP::remove_pos( //Cut out an element - int16_t pos //element to remove - ) { - ASSERT_HOST (pos >= 0); - ASSERT_HOST (pos < len); - ASSERT_HOST (len > 0); - - len--; - for (; pos < len; pos++) ptr[pos] = ptr[pos + 1]; -} - - -void REJMAP::print(FILE *fp) { - int i; - char buff[512]; - - for (i = 0; i < len; i++) { - buff[i] = ptr[i].display_char (); - } - buff[i] = '\0'; - fprintf (fp, "\"%s\"", buff); -} - - -void REJMAP::full_print(FILE *fp) { - int i; - - for (i = 0; i < len; i++) { - ptr[i].full_print (fp); - fprintf (fp, "\n"); - } -} - - -void REJMAP::rej_word_small_xht() { //Reject whole word - int i; - - for (i = 0; i < len; i++) { - ptr[i].setrej_small_xht (); - } -} - - -void REJMAP::rej_word_tess_failure() { //Reject whole word - int i; - - for (i = 0; i < len; i++) { - ptr[i].setrej_tess_failure (); - } -} - - -void REJMAP::rej_word_not_tess_accepted() { //Reject whole word - int i; - - for (i = 0; i < len; i++) { - if (ptr[i].accepted()) ptr[i].setrej_not_tess_accepted(); - } -} - - -void REJMAP::rej_word_contains_blanks() { //Reject whole word - int i; - - for (i = 0; i < len; i++) { - if (ptr[i].accepted()) ptr[i].setrej_contains_blanks(); - } -} - - -void REJMAP::rej_word_bad_permuter() { //Reject whole word - int i; - - for (i = 0; i < len; i++) { - if (ptr[i].accepted()) ptr[i].setrej_bad_permuter (); - } -} - - -void REJMAP::rej_word_xht_fixup() { //Reject whole word - int i; - - for (i = 0; i < len; i++) { - if (ptr[i].accepted()) ptr[i].setrej_xht_fixup(); - } -} - - -void REJMAP::rej_word_no_alphanums() { //Reject whole word - int i; - - for (i = 0; i < len; i++) { - if (ptr[i].accepted()) ptr[i].setrej_no_alphanums(); - } -} - - -void REJMAP::rej_word_mostly_rej() { //Reject whole word - int i; - - for (i = 0; i < len; i++) { - if (ptr[i].accepted()) ptr[i].setrej_mostly_rej(); - } -} - - -void REJMAP::rej_word_bad_quality() { //Reject whole word - int i; - - for (i = 0; i < len; i++) { - if (ptr[i].accepted()) ptr[i].setrej_bad_quality(); - } -} - - -void REJMAP::rej_word_doc_rej() { //Reject whole word - int i; - - for (i = 0; i < len; i++) { - if (ptr[i].accepted()) ptr[i].setrej_doc_rej(); - } -} - - -void REJMAP::rej_word_block_rej() { //Reject whole word - int i; - - for (i = 0; i < len; i++) { - if (ptr[i].accepted()) ptr[i].setrej_block_rej(); - } -} - - -void REJMAP::rej_word_row_rej() { //Reject whole word - int i; - - for (i = 0; i < len; i++) { - if (ptr[i].accepted()) ptr[i].setrej_row_rej(); - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/rejctmap.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/rejctmap.h deleted file mode 100644 index 338843d3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/rejctmap.h +++ /dev/null @@ -1,263 +0,0 @@ -/********************************************************************** - * File: rejctmap.h (Formerly rejmap.h) - * Description: REJ and REJMAP class functions. - * Author: Phil Cheatle - * Created: Thu Jun 9 13:46:38 BST 1994 - * - * (C) Copyright 1994, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - -This module may look unnecessarily verbose, but here's the philosophy... - -ALL processing of the reject map is done in this module. There are lots of -separate calls to set reject/accept flags. These have DELIBERATELY been kept -distinct so that this module can decide what to do. - -Basically, there is a flag for each sort of rejection or acceptance. This -provides a history of what has happened to EACH character. - -Determining whether a character is CURRENTLY rejected depends on implicit -understanding of the SEQUENCE of possible calls. The flags are defined and -grouped in the REJ_FLAGS enum. These groupings are used in determining a -characters CURRENT rejection status. Basically, a character is ACCEPTED if - - none of the permanent rej flags are set - AND ( the character has never been rejected - OR an accept flag is set which is LATER than the latest reject flag ) - -IT IS FUNDAMENTAL THAT ANYONE HACKING THIS CODE UNDERSTANDS THE SIGNIFICANCE -OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!! -**********************************************************************/ - -#ifndef REJCTMAP_H -#define REJCTMAP_H - -#include -#include "bits16.h" -#include "errcode.h" -#include "params.h" - -enum REJ_FLAGS { - /* Reject modes which are NEVER overridden */ - R_TESS_FAILURE, // PERM Tess didn't classify - R_SMALL_XHT, // PERM Xht too small - R_EDGE_CHAR, // PERM Too close to edge of image - R_1IL_CONFLICT, // PERM 1Il confusion - R_POSTNN_1IL, // PERM 1Il unrejected by NN - R_REJ_CBLOB, // PERM Odd blob - R_MM_REJECT, // PERM Matrix match rejection (m's) - R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend - - /* Initial reject modes (pre NN_ACCEPT) */ - R_POOR_MATCH, // TEMP Ray's original heuristic (Not used) - R_NOT_TESS_ACCEPTED, // TEMP Tess didn't accept WERD - R_CONTAINS_BLANKS, // TEMP Tess failed on other chs in WERD - R_BAD_PERMUTER, // POTENTIAL Bad permuter for WERD - - /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */ - R_HYPHEN, // TEMP Post NN dodgy hyphen or full stop - R_DUBIOUS, // TEMP Post NN dodgy chars - R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN - R_MOSTLY_REJ, // TEMP Most of word rejected so rej the rest - R_XHT_FIXUP, // TEMP Xht tests unsure - - /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */ - R_BAD_QUALITY, // TEMP Quality metrics bad for WERD - - /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/ - R_DOC_REJ, // TEMP Document rejection - R_BLOCK_REJ, // TEMP Block rejection - R_ROW_REJ, // TEMP Row rejection - R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space - - /* Accept modes which occur between the above rejection groups */ - R_NN_ACCEPT, // NN acceptance - R_HYPHEN_ACCEPT, // Hyphen acceptance - R_MM_ACCEPT, // Matrix match acceptance - R_QUALITY_ACCEPT, // Accept word in good quality doc - R_MINIMAL_REJ_ACCEPT // Accept EVERYTHING except tess failures -}; - -/* REJECT MAP VALUES */ - -#define MAP_ACCEPT '1' -#define MAP_REJECT_PERM '0' -#define MAP_REJECT_TEMP '2' -#define MAP_REJECT_POTENTIAL '3' - -class REJ -{ - BITS16 flags1; - BITS16 flags2; - - void set_flag(REJ_FLAGS rej_flag) { - if (rej_flag < 16) - flags1.turn_on_bit (rej_flag); - else - flags2.turn_on_bit (rej_flag - 16); - } - - bool rej_before_nn_accept(); - bool rej_between_nn_and_mm(); - bool rej_between_mm_and_quality_accept(); - bool rej_between_quality_and_minimal_rej_accept(); - bool rej_before_mm_accept(); - bool rej_before_quality_accept(); - - public: - REJ() = default; - - REJ( //classwise copy - const REJ &source) { - flags1 = source.flags1; - flags2 = source.flags2; - } - - REJ & operator= ( //assign REJ - const REJ & source) { //from this - flags1 = source.flags1; - flags2 = source.flags2; - return *this; - } - - bool flag(REJ_FLAGS rej_flag) { - if (rej_flag < 16) - return flags1.bit (rej_flag); - else - return flags2.bit (rej_flag - 16); - } - - char display_char() { - if (perm_rejected ()) - return MAP_REJECT_PERM; - else if (accept_if_good_quality ()) - return MAP_REJECT_POTENTIAL; - else if (rejected ()) - return MAP_REJECT_TEMP; - else - return MAP_ACCEPT; - } - - bool perm_rejected(); //Is char perm reject? - - bool rejected(); //Is char rejected? - - bool accepted() { //Is char accepted? - return !rejected (); - } - - //potential rej? - bool accept_if_good_quality(); - - bool recoverable() { - return (rejected () && !perm_rejected ()); - } - - void setrej_tess_failure(); //Tess generated blank - void setrej_small_xht(); //Small xht char/wd - void setrej_edge_char(); //Close to image edge - void setrej_1Il_conflict(); //Initial reject map - void setrej_postNN_1Il(); //1Il after NN - void setrej_rej_cblob(); //Insert duff blob - void setrej_mm_reject(); //Matrix matcher - //Odd repeated char - void setrej_bad_repetition(); - void setrej_poor_match(); //Failed Rays heuristic - //TEMP reject_word - void setrej_not_tess_accepted(); - //TEMP reject_word - void setrej_contains_blanks(); - void setrej_bad_permuter(); //POTENTIAL reject_word - void setrej_hyphen(); //PostNN dubious hyph or . - void setrej_dubious(); //PostNN dubious limit - void setrej_no_alphanums(); //TEMP reject_word - void setrej_mostly_rej(); //TEMP reject_word - void setrej_xht_fixup(); //xht fixup - void setrej_bad_quality(); //TEMP reject_word - void setrej_doc_rej(); //TEMP reject_word - void setrej_block_rej(); //TEMP reject_word - void setrej_row_rej(); //TEMP reject_word - void setrej_unlv_rej(); //TEMP reject_word - void setrej_nn_accept(); //NN Flipped a char - void setrej_hyphen_accept(); //Good aspect ratio - void setrej_mm_accept(); //Matrix matcher - //Quality flip a char - void setrej_quality_accept(); - //Accept all except blank - void setrej_minimal_rej_accept(); - - void full_print(FILE *fp); -}; - -class REJMAP -{ - std::unique_ptr ptr; // ptr to the chars - int16_t len; //Number of chars - - public: - REJMAP() : len(0) {} - - REJMAP(const REJMAP &rejmap) { *this = rejmap; } - - REJMAP &operator=(const REJMAP &source); - - // Sets up the ptr array to length, whatever it was before. - void initialise(int16_t length); - - REJ &operator[]( // access function - int16_t index) const // map index - { - ASSERT_HOST(index < len); - return ptr[index]; // no bounds checks - } - - int32_t length() const { //map length - return len; - } - - int16_t accept_count(); //How many accepted? - - int16_t reject_count() { //How many rejects? - return len - accept_count (); - } - - void remove_pos( //Cut out an element - int16_t pos); //element to remove - - void print(FILE *fp); - - void full_print(FILE *fp); - - bool recoverable_rejects(); //Any non perm rejs? - - bool quality_recoverable_rejects(); - //Any potential rejs? - - void rej_word_small_xht(); //Reject whole word - //Reject whole word - void rej_word_tess_failure(); - void rej_word_not_tess_accepted(); - //Reject whole word - //Reject whole word - void rej_word_contains_blanks(); - //Reject whole word - void rej_word_bad_permuter(); - void rej_word_xht_fixup(); //Reject whole word - //Reject whole word - void rej_word_no_alphanums(); - void rej_word_mostly_rej(); //Reject whole word - void rej_word_bad_quality(); //Reject whole word - void rej_word_doc_rej(); //Reject whole word - void rej_word_block_rej(); //Reject whole word - void rej_word_row_rej(); //Reject whole word -}; -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/seam.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/seam.cpp deleted file mode 100644 index 5a2f1b34..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/seam.cpp +++ /dev/null @@ -1,280 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: seam.cpp (Formerly seam.c) - * Description: - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Fri May 17 16:30:13 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "seam.h" -#include "blobs.h" -#include "tprintf.h" - -/*---------------------------------------------------------------------- - Public Function Code -----------------------------------------------------------------------*/ - -// Returns the bounding box of all the points in the seam. -TBOX SEAM::bounding_box() const { - TBOX box(location_.x, location_.y, location_.x, location_.y); - for (int s = 0; s < num_splits_; ++s) { - box += splits_[s].bounding_box(); - } - return box; -} - -// Returns true if other can be combined into *this. -bool SEAM::CombineableWith(const SEAM& other, int max_x_dist, - float max_total_priority) const { - int dist = location_.x - other.location_.x; - if (-max_x_dist < dist && dist < max_x_dist && - num_splits_ + other.num_splits_ <= kMaxNumSplits && - priority_ + other.priority_ < max_total_priority && - !OverlappingSplits(other) && !SharesPosition(other)) { - return true; - } else { - return false; - } -} - -// Combines other into *this. Only works if CombinableWith returned true. -void SEAM::CombineWith(const SEAM& other) { - priority_ += other.priority_; - location_ += other.location_; - location_ /= 2; - - for (uint8_t s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s) - splits_[num_splits_++] = other.splits_[s]; -} - -// Returns true if the splits in *this SEAM appear OK in the sense that they -// do not cross any outlines and do not chop off any ridiculously small -// pieces. -bool SEAM::IsHealthy(const TBLOB& blob, int min_points, int min_area) const { - // TODO(rays) Try testing all the splits. Duplicating original code for now, - // which tested only the first. - return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area); -} - -// Computes the widthp_/widthn_ range for all existing SEAMs and for *this -// seam, which is about to be inserted at insert_index. Returns false if -// any of the computations fails, as this indicates an invalid chop. -// widthn_/widthp_ are only changed if modify is true. -bool SEAM::PrepareToInsertSeam(const GenericVector& seams, - const GenericVector& blobs, - int insert_index, bool modify) { - for (int s = 0; s < insert_index; ++s) { - if (!seams[s]->FindBlobWidth(blobs, s, modify)) return false; - } - if (!FindBlobWidth(blobs, insert_index, modify)) return false; - for (int s = insert_index; s < seams.size(); ++s) { - if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) return false; - } - return true; -} - -// Computes the widthp_/widthn_ range. Returns false if not all the splits -// are accounted for. widthn_/widthp_ are only changed if modify is true. -bool SEAM::FindBlobWidth(const GenericVector& blobs, int index, - bool modify) { - int num_found = 0; - if (modify) { - widthp_ = 0; - widthn_ = 0; - } - for (int s = 0; s < num_splits_; ++s) { - const SPLIT& split = splits_[s]; - bool found_split = split.ContainedByBlob(*blobs[index]); - // Look right. - for (int b = index + 1; !found_split && b < blobs.size(); ++b) { - found_split = split.ContainedByBlob(*blobs[b]); - if (found_split && b - index > widthp_ && modify) widthp_ = b - index; - } - // Look left. - for (int b = index - 1; !found_split && b >= 0; --b) { - found_split = split.ContainedByBlob(*blobs[b]); - if (found_split && index - b > widthn_ && modify) widthn_ = index - b; - } - if (found_split) ++num_found; - } - return num_found == num_splits_; -} - -// Splits this blob into two blobs by applying the splits included in -// *this SEAM -void SEAM::ApplySeam(bool italic_blob, TBLOB* blob, TBLOB* other_blob) const { - for (int s = 0; s < num_splits_; ++s) { - splits_[s].SplitOutlineList(blob->outlines); - } - blob->ComputeBoundingBoxes(); - - divide_blobs(blob, other_blob, italic_blob, location_); - - blob->EliminateDuplicateOutlines(); - other_blob->EliminateDuplicateOutlines(); - - blob->CorrectBlobOrder(other_blob); -} - -// Undoes ApplySeam by removing the seam between these two blobs. -// Produces one blob as a result, and deletes other_blob. -void SEAM::UndoSeam(TBLOB* blob, TBLOB* other_blob) const { - if (blob->outlines == nullptr) { - blob->outlines = other_blob->outlines; - other_blob->outlines = nullptr; - } - - TESSLINE* outline = blob->outlines; - while (outline->next) outline = outline->next; - outline->next = other_blob->outlines; - other_blob->outlines = nullptr; - delete other_blob; - - for (int s = 0; s < num_splits_; ++s) { - splits_[s].UnsplitOutlineList(blob); - } - blob->ComputeBoundingBoxes(); - blob->EliminateDuplicateOutlines(); -} - -// Prints everything in *this SEAM. -void SEAM::Print(const char* label) const { - tprintf(label); - tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", priority_, location_.x, location_.y, - widthp_, widthn_); - for (int s = 0; s < num_splits_; ++s) { - splits_[s].Print(); - if (s + 1 < num_splits_) tprintf(", "); - } - tprintf("\n"); -} - -// Prints a collection of SEAMs. -/* static */ -void SEAM::PrintSeams(const char* label, const GenericVector& seams) { - if (!seams.empty()) { - tprintf("%s\n", label); - for (int x = 0; x < seams.size(); ++x) { - tprintf("%2d: ", x); - seams[x]->Print(""); - } - tprintf("\n"); - } -} - -#ifndef GRAPHICS_DISABLED -// Draws the seam in the given window. -void SEAM::Mark(ScrollView* window) const { - for (int s = 0; s < num_splits_; ++s) splits_[s].Mark(window); -} -#endif - -// Break up the blobs in this chain so that they are all independent. -// This operation should undo the affect of join_pieces. -/* static */ -void SEAM::BreakPieces(const GenericVector& seams, - const GenericVector& blobs, int first, - int last) { - for (int x = first; x < last; ++x) seams[x]->Reveal(); - - TESSLINE* outline = blobs[first]->outlines; - int next_blob = first + 1; - - while (outline != nullptr && next_blob <= last) { - if (outline->next == blobs[next_blob]->outlines) { - outline->next = nullptr; - outline = blobs[next_blob]->outlines; - ++next_blob; - } else { - outline = outline->next; - } - } -} - -// Join a group of base level pieces into a single blob that can then -// be classified. -/* static */ -void SEAM::JoinPieces(const GenericVector& seams, - const GenericVector& blobs, int first, int last) { - TESSLINE* outline = blobs[first]->outlines; - if (!outline) - return; - - for (int x = first; x < last; ++x) { - SEAM *seam = seams[x]; - if (x - seam->widthn_ >= first && x + seam->widthp_ < last) seam->Hide(); - while (outline->next) outline = outline->next; - outline->next = blobs[x + 1]->outlines; - } -} - -// Hides the seam so the outlines appear not to be cut by it. -void SEAM::Hide() const { - for (int s = 0; s < num_splits_; ++s) { - splits_[s].Hide(); - } -} - -// Undoes hide, so the outlines are cut by the seam. -void SEAM::Reveal() const { - for (int s = 0; s < num_splits_; ++s) { - splits_[s].Reveal(); - } -} - -// Computes and returns, but does not set, the full priority of *this SEAM. -float SEAM::FullPriority(int xmin, int xmax, double overlap_knob, - int centered_maxwidth, double center_knob, - double width_change_knob) const { - if (num_splits_ == 0) return 0.0f; - for (int s = 1; s < num_splits_; ++s) { - splits_[s].SplitOutline(); - } - float full_priority = - priority_ + - splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth, - center_knob, width_change_knob); - for (int s = num_splits_ - 1; s >= 1; --s) { - splits_[s].UnsplitOutlines(); - } - return full_priority; -} - -/** - * @name start_seam_list - * - * Initialize a list of seams that match the original number of blobs - * present in the starting segmentation. Each of the seams created - * by this routine have location information only. - */ -void start_seam_list(TWERD* word, GenericVector* seam_array) { - seam_array->truncate(0); - TPOINT location; - - for (int b = 1; b < word->NumBlobs(); ++b) { - TBOX bbox = word->blobs[b - 1]->bounding_box(); - TBOX nbox = word->blobs[b]->bounding_box(); - location.x = (bbox.right() + nbox.left()) / 2; - location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4; - seam_array->push_back(new SEAM(0.0f, location)); - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/seam.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/seam.h deleted file mode 100644 index 7179bd8a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/seam.h +++ /dev/null @@ -1,203 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: seam.h (Formerly seam.h) - * Description: - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Thu May 16 17:05:52 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -#ifndef SEAM_H -#define SEAM_H - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "blobs.h" -#include "split.h" - -/*---------------------------------------------------------------------- - T y p e s -----------------------------------------------------------------------*/ -using PRIORITY = float; /* PRIORITY */ - -class SEAM { - public: - // A seam with no splits - SEAM(float priority, const TPOINT& location) - : priority_(priority), - location_(location), - widthp_(0), - widthn_(0), - num_splits_(0) {} - // A seam with a single split point. - SEAM(float priority, const TPOINT& location, const SPLIT& split) - : priority_(priority), - location_(location), - widthp_(0), - widthn_(0), - num_splits_(1) { - splits_[0] = split; - } - // Default copy constructor, operator= and destructor are OK! - - // Accessors. - float priority() const { return priority_; } - void set_priority(float priority) { priority_ = priority; } - bool HasAnySplits() const { return num_splits_ > 0; } - - // Returns the bounding box of all the points in the seam. - TBOX bounding_box() const; - - // Returns true if other can be combined into *this. - bool CombineableWith(const SEAM& other, int max_x_dist, - float max_total_priority) const; - // Combines other into *this. Only works if CombinableWith returned true. - void CombineWith(const SEAM& other); - - // Returns true if the given blob contains all splits of *this SEAM. - bool ContainedByBlob(const TBLOB& blob) const { - for (int s = 0; s < num_splits_; ++s) { - if (!splits_[s].ContainedByBlob(blob)) return false; - } - return true; - } - - // Returns true if the given EDGEPT is used by this SEAM, checking only - // the EDGEPT pointer, not the coordinates. - bool UsesPoint(const EDGEPT* point) const { - for (int s = 0; s < num_splits_; ++s) { - if (splits_[s].UsesPoint(point)) return true; - } - return false; - } - // Returns true if *this and other share any common point, by coordinates. - bool SharesPosition(const SEAM& other) const { - for (int s = 0; s < num_splits_; ++s) { - for (int t = 0; t < other.num_splits_; ++t) - if (splits_[s].SharesPosition(other.splits_[t])) return true; - } - return false; - } - // Returns true if *this and other have any vertically overlapping splits. - bool OverlappingSplits(const SEAM& other) const { - for (int s = 0; s < num_splits_; ++s) { - TBOX split1_box = splits_[s].bounding_box(); - for (int t = 0; t < other.num_splits_; ++t) { - TBOX split2_box = other.splits_[t].bounding_box(); - if (split1_box.y_overlap(split2_box)) return true; - } - } - return false; - } - - // Marks the edgepts used by the seam so the segments made by the cut - // never get split further by another seam in the future. - void Finalize() { - for (int s = 0; s < num_splits_; ++s) { - splits_[s].point1->MarkChop(); - splits_[s].point2->MarkChop(); - } - } - - // Returns true if the splits in *this SEAM appear OK in the sense that they - // do not cross any outlines and do not chop off any ridiculously small - // pieces. - bool IsHealthy(const TBLOB& blob, int min_points, int min_area) const; - - // Computes the widthp_/widthn_ range for all existing SEAMs and for *this - // seam, which is about to be inserted at insert_index. Returns false if - // any of the computations fails, as this indicates an invalid chop. - // widthn_/widthp_ are only changed if modify is true. - bool PrepareToInsertSeam(const GenericVector& seams, - const GenericVector& blobs, int insert_index, - bool modify); - // Computes the widthp_/widthn_ range. Returns false if not all the splits - // are accounted for. widthn_/widthp_ are only changed if modify is true. - bool FindBlobWidth(const GenericVector& blobs, int index, - bool modify); - - // Splits this blob into two blobs by applying the splits included in - // *this SEAM - void ApplySeam(bool italic_blob, TBLOB* blob, TBLOB* other_blob) const; - // Undoes ApplySeam by removing the seam between these two blobs. - // Produces one blob as a result, and deletes other_blob. - void UndoSeam(TBLOB* blob, TBLOB* other_blob) const; - - // Prints everything in *this SEAM. - void Print(const char* label) const; - // Prints a collection of SEAMs. - static void PrintSeams(const char* label, const GenericVector& seams); -#ifndef GRAPHICS_DISABLED - // Draws the seam in the given window. - void Mark(ScrollView* window) const; -#endif - - // Break up the blobs in this chain so that they are all independent. - // This operation should undo the affect of join_pieces. - static void BreakPieces(const GenericVector& seams, - const GenericVector& blobs, int first, - int last); - // Join a group of base level pieces into a single blob that can then - // be classified. - static void JoinPieces(const GenericVector& seams, - const GenericVector& blobs, int first, - int last); - - // Hides the seam so the outlines appear not to be cut by it. - void Hide() const; - // Undoes hide, so the outlines are cut by the seam. - void Reveal() const; - - // Computes and returns, but does not set, the full priority of *this SEAM. - // The arguments here are config parameters defined in Wordrec. Add chop_ - // to the beginning of the name. - float FullPriority(int xmin, int xmax, double overlap_knob, - int centered_maxwidth, double center_knob, - double width_change_knob) const; - - private: - // Maximum number of splits that a SEAM can hold. - static const uint8_t kMaxNumSplits = 3; - // Priority of this split. Lower is better. - float priority_; - // Position of the middle of the seam. - TPOINT location_; - // A range such that all splits in *this SEAM are contained within blobs in - // the range [index - widthn_,index + widthp_] where index is the index of - // this SEAM in the seams vector. - int8_t widthp_; - int8_t widthn_; - // Number of splits_ that are used. - uint8_t num_splits_; - // Set of pairs of points that are the ends of each split in the SEAM. - SPLIT splits_[kMaxNumSplits]; -}; - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ - -void start_seam_list(TWERD* word, GenericVector* seam_array); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/split.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/split.cpp deleted file mode 100644 index eeb728b5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/split.cpp +++ /dev/null @@ -1,328 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: split.cpp (Formerly split.c) - * Description: - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Fri May 17 16:27:49 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "split.h" -#include "coutln.h" -#include "tprintf.h" - -#include - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -// Limit on the amount of penalty for the chop being off-center. -const int kCenterGradeCap = 25; -// Ridiculously large priority for splits that are no use. -const double kBadPriority = 999.0; - -BOOL_VAR(wordrec_display_splits, 0, "Display splits"); - -// Returns the bounding box of all the points in the split. -TBOX SPLIT::bounding_box() const { - return TBOX( - std::min(point1->pos.x, point2->pos.x), std::min(point1->pos.y, point2->pos.y), - std::max(point1->pos.x, point2->pos.x), std::max(point1->pos.y, point2->pos.y)); -} - -// Hides the SPLIT so the outlines appear not to be cut by it. -void SPLIT::Hide() const { - EDGEPT* edgept = point1; - do { - edgept->Hide(); - edgept = edgept->next; - } while (!edgept->EqualPos(*point2) && edgept != point1); - edgept = point2; - do { - edgept->Hide(); - edgept = edgept->next; - } while (!edgept->EqualPos(*point1) && edgept != point2); -} - -// Undoes hide, so the outlines are cut by the SPLIT. -void SPLIT::Reveal() const { - EDGEPT* edgept = point1; - do { - edgept->Reveal(); - edgept = edgept->next; - } while (!edgept->EqualPos(*point2) && edgept != point1); - edgept = point2; - do { - edgept->Reveal(); - edgept = edgept->next; - } while (!edgept->EqualPos(*point1) && edgept != point2); -} - -// Compute a split priority based on the bounding boxes of the parts. -// The arguments here are config parameters defined in Wordrec. Add chop_ -// to the beginning of the name. -float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob, - int centered_maxwidth, double center_knob, - double width_change_knob) const { - TBOX box1 = Box12(); - TBOX box2 = Box21(); - int min_left = std::min(box1.left(), box2.left()); - int max_right = std::max(box1.right(), box2.right()); - if (xmin < min_left && xmax > max_right) return kBadPriority; - - float grade = 0.0f; - // grade_overlap. - int width1 = box1.width(); - int width2 = box2.width(); - int min_width = std::min(width1, width2); - int overlap = -box1.x_gap(box2); - if (overlap == min_width) { - grade += 100.0f; // Total overlap. - } else { - if (2 * overlap > min_width) overlap += 2 * overlap - min_width; - if (overlap > 0) grade += overlap_knob * overlap; - } - // grade_center_of_blob. - if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) { - grade += std::min(static_cast(kCenterGradeCap), center_knob * abs(width1 - width2)); - } - // grade_width_change. - float width_change_grade = 20 - (max_right - min_left - std::max(width1, width2)); - if (width_change_grade > 0.0f) - grade += width_change_grade * width_change_knob; - return grade; -} - -// Returns true if *this SPLIT appears OK in the sense that it does not cross -// any outlines and does not chop off any ridiculously small pieces. -bool SPLIT::IsHealthy(const TBLOB& blob, int min_points, int min_area) const { - return !IsLittleChunk(min_points, min_area) && - !blob.SegmentCrossesOutline(point1->pos, point2->pos); -} - -// Returns true if the split generates a small chunk in terms of either area -// or number of points. -bool SPLIT::IsLittleChunk(int min_points, int min_area) const { - if (point1->ShortNonCircularSegment(min_points, point2) && - point1->SegmentArea(point2) < min_area) { - return true; - } - if (point2->ShortNonCircularSegment(min_points, point1) && - point2->SegmentArea(point1) < min_area) { - return true; - } - return false; -} - -/********************************************************************** - * make_edgept - * - * Create an EDGEPT and hook it into an existing list of edge points. - **********************************************************************/ -EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) { - EDGEPT *this_edgept; - /* Create point */ - this_edgept = new EDGEPT; - this_edgept->pos.x = x; - this_edgept->pos.y = y; - // Now deal with the src_outline steps. - C_OUTLINE* prev_ol = prev->src_outline; - if (prev_ol != nullptr && prev->next == next) { - // Compute the fraction of the segment that is being cut. - FCOORD segment_vec(next->pos.x - prev->pos.x, next->pos.y - prev->pos.y); - FCOORD target_vec(x - prev->pos.x, y - prev->pos.y); - double cut_fraction = target_vec.length() / segment_vec.length(); - // Get the start and end at the step level. - ICOORD step_start = prev_ol->position_at_index(prev->start_step); - int end_step = prev->start_step + prev->step_count; - int step_length = prev_ol->pathlength(); - ICOORD step_end = prev_ol->position_at_index(end_step % step_length); - ICOORD step_vec = step_end - step_start; - double target_length = step_vec.length() * cut_fraction; - // Find the point on the segment that gives the length nearest to target. - int best_step = prev->start_step; - ICOORD total_step(0, 0); - double best_dist = target_length; - for (int s = prev->start_step; s < end_step; ++s) { - total_step += prev_ol->step(s % step_length); - double dist = fabs(target_length - total_step.length()); - if (dist < best_dist) { - best_dist = dist; - best_step = s + 1; - } - } - // The new point is an intermediate point. - this_edgept->src_outline = prev_ol; - this_edgept->step_count = end_step - best_step; - this_edgept->start_step = best_step % step_length; - prev->step_count = best_step - prev->start_step; - } else { - // The new point is poly only. - this_edgept->src_outline = nullptr; - this_edgept->step_count = 0; - this_edgept->start_step = 0; - } - /* Hook it up */ - this_edgept->next = next; - this_edgept->prev = prev; - prev->next = this_edgept; - next->prev = this_edgept; - /* Set up vec entries */ - this_edgept->vec.x = this_edgept->next->pos.x - x; - this_edgept->vec.y = this_edgept->next->pos.y - y; - this_edgept->prev->vec.x = x - this_edgept->prev->pos.x; - this_edgept->prev->vec.y = y - this_edgept->prev->pos.y; - return this_edgept; -} - -/********************************************************************** - * remove_edgept - * - * Remove a given EDGEPT from its list and delete it. - **********************************************************************/ -void remove_edgept(EDGEPT *point) { - EDGEPT *prev = point->prev; - EDGEPT *next = point->next; - // Add point's steps onto prev's steps if they are from the same outline. - if (prev->src_outline == point->src_outline && prev->src_outline != nullptr) { - prev->step_count += point->step_count; - } - prev->next = next; - next->prev = prev; - prev->vec.x = next->pos.x - prev->pos.x; - prev->vec.y = next->pos.y - prev->pos.y; - delete point; -} - -/********************************************************************** - * Print - * - * Shows the coordinates of both points in a split. - **********************************************************************/ -void SPLIT::Print() const { - tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x, - point2->pos.y); -} - -#ifndef GRAPHICS_DISABLED -// Draws the split in the given window. -void SPLIT::Mark(ScrollView* window) const { - window->Pen(ScrollView::GREEN); - window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y); - window->UpdateWindow(); -} -#endif - -// Creates two outlines out of one by splitting the original one in half. -// Inserts the resulting outlines into the given list. -void SPLIT::SplitOutlineList(TESSLINE* outlines) const { - SplitOutline(); - while (outlines->next != nullptr) outlines = outlines->next; - - outlines->next = new TESSLINE; - outlines->next->loop = point1; - outlines->next->ComputeBoundingBox(); - - outlines = outlines->next; - - outlines->next = new TESSLINE; - outlines->next->loop = point2; - outlines->next->ComputeBoundingBox(); - - outlines->next->next = nullptr; -} - -// Makes a split between these two edge points, but does not affect the -// outlines to which they belong. -void SPLIT::SplitOutline() const { - EDGEPT* temp2 = point2->next; - EDGEPT* temp1 = point1->next; - /* Create two new points */ - EDGEPT* new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2); - EDGEPT* new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1); - // point1 and 2 are now cross-over points, so they must have nullptr - // src_outlines and give their src_outline information their new - // replacements. - new_point1->src_outline = point1->src_outline; - new_point1->start_step = point1->start_step; - new_point1->step_count = point1->step_count; - new_point2->src_outline = point2->src_outline; - new_point2->start_step = point2->start_step; - new_point2->step_count = point2->step_count; - point1->src_outline = nullptr; - point1->start_step = 0; - point1->step_count = 0; - point2->src_outline = nullptr; - point2->start_step = 0; - point2->step_count = 0; -} - -// Undoes the effect of SplitOutlineList, correcting the outlines for undoing -// the split, but possibly leaving some duplicate outlines. -void SPLIT::UnsplitOutlineList(TBLOB* blob) const { - /* Modify edge points */ - UnsplitOutlines(); - - TESSLINE* outline1 = new TESSLINE; - outline1->next = blob->outlines; - blob->outlines = outline1; - outline1->loop = point1; - - TESSLINE* outline2 = new TESSLINE; - outline2->next = blob->outlines; - blob->outlines = outline2; - outline2->loop = point2; -} - -// Removes the split that was put between these two points. -void SPLIT::UnsplitOutlines() const { - EDGEPT* tmp1 = point1->next; - EDGEPT* tmp2 = point2->next; - - tmp1->next->prev = point2; - tmp2->next->prev = point1; - - // tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is - // deleted. - point1->next = tmp2->next; - point1->src_outline = tmp2->src_outline; - point1->start_step = tmp2->start_step; - point1->step_count = tmp2->step_count; - // Likewise point2 takes tmp1's place. - point2->next = tmp1->next; - point2->src_outline = tmp1->src_outline; - point2->start_step = tmp1->start_step; - point2->step_count = tmp1->step_count; - - delete tmp1; - delete tmp2; - - point1->vec.x = point1->next->pos.x - point1->pos.x; - point1->vec.y = point1->next->pos.y - point1->pos.y; - - point2->vec.x = point2->next->pos.x - point2->pos.x; - point2->vec.y = point2->next->pos.y - point2->pos.y; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/split.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/split.h deleted file mode 100644 index ef5848eb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/split.h +++ /dev/null @@ -1,120 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: split.h - * Description: - * Author: Mark Seaman, SW Productivity - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *****************************************************************************/ -#ifndef SPLIT_H -#define SPLIT_H - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ - -#include "blobs.h" // for EDGEPT, TBLOB, TESSLINE -#include "params.h" // for BOOL_VAR_H, BoolParam -#include "rect.h" // for TBOX - -class ScrollView; - -/*---------------------------------------------------------------------- - T y p e s -----------------------------------------------------------------------*/ -struct SPLIT { - SPLIT() : point1(nullptr), point2(nullptr) {} - SPLIT(EDGEPT* pt1, EDGEPT* pt2) : point1(pt1), point2(pt2) {} - - // Returns the bounding box of all the points in the split. - TBOX bounding_box() const; - // Returns the bounding box of the outline from point1 to point2. - TBOX Box12() const { return point1->SegmentBox(point2); } - // Returns the bounding box of the outline from point1 to point1. - TBOX Box21() const { return point2->SegmentBox(point1); } - // Returns the bounding box of the out - - // Hides the SPLIT so the outlines appear not to be cut by it. - void Hide() const; - // Undoes hide, so the outlines are cut by the SPLIT. - void Reveal() const; - - // Returns true if the given EDGEPT is used by this SPLIT, checking only - // the EDGEPT pointer, not the coordinates. - bool UsesPoint(const EDGEPT* point) const { - return point1 == point || point2 == point; - } - // Returns true if the other SPLIT has any position shared with *this. - bool SharesPosition(const SPLIT& other) const { - return point1->EqualPos(*other.point1) || point1->EqualPos(*other.point2) || - point2->EqualPos(*other.point1) || point2->EqualPos(*other.point2); - } - // Returns true if both points are contained within the blob. - bool ContainedByBlob(const TBLOB& blob) const { - return blob.Contains(point1->pos) && blob.Contains(point2->pos); - } - // Returns true if both points are contained within the outline. - bool ContainedByOutline(const TESSLINE& outline) const { - return outline.Contains(point1->pos) && outline.Contains(point2->pos); - } - // Compute a split priority based on the bounding boxes of the parts. - // The arguments here are config parameters defined in Wordrec. Add chop_ - // to the beginning of the name. - float FullPriority(int xmin, int xmax, double overlap_knob, - int centered_maxwidth, double center_knob, - double width_change_knob) const; - // Returns true if *this SPLIT appears OK in the sense that it does not cross - // any outlines and does not chop off any ridiculously small pieces. - bool IsHealthy(const TBLOB& blob, int min_points, int min_area) const; - // Returns true if the split generates a small chunk in terms of either area - // or number of points. - bool IsLittleChunk(int min_points, int min_area) const; - - void Print() const; -#ifndef GRAPHICS_DISABLED - // Draws the split in the given window. - void Mark(ScrollView* window) const; -#endif - - // Creates two outlines out of one by splitting the original one in half. - // Inserts the resulting outlines into the given list. - void SplitOutlineList(TESSLINE* outlines) const; - // Makes a split between these two edge points, but does not affect the - // outlines to which they belong. - void SplitOutline() const; - // Undoes the effect of SplitOutlineList, correcting the outlines for undoing - // the split, but possibly leaving some duplicate outlines. - void UnsplitOutlineList(TBLOB* blob) const; - // Removes the split that was put between these two points. - void UnsplitOutlines() const; - - EDGEPT *point1; - EDGEPT *point2; -}; - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ - -extern BOOL_VAR_H(wordrec_display_splits, 0, "Display splits"); - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev); - -void remove_edgept(EDGEPT *point); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/statistc.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/statistc.cpp deleted file mode 100644 index f530e811..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/statistc.cpp +++ /dev/null @@ -1,781 +0,0 @@ -/********************************************************************** - * File: statistc.cpp (Formerly stats.c) - * Description: Simple statistical package for integer values. - * Author: Ray Smith - * Created: Mon Feb 04 16:56:05 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "statistc.h" -#include -#include -#include -#include "errcode.h" -#include "helpers.h" -#include "scrollview.h" -#include "tprintf.h" - -using tesseract::KDPairInc; - -/********************************************************************** - * STATS::STATS - * - * Construct a new stats element by allocating and zeroing the memory. - **********************************************************************/ -STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) { - if (max_bucket_value_plus_1 <= min_bucket_value) { - min_bucket_value = 0; - max_bucket_value_plus_1 = 1; - } - rangemin_ = min_bucket_value; // setup - rangemax_ = max_bucket_value_plus_1; - buckets_ = new int32_t[rangemax_ - rangemin_]; - clear(); -} - -STATS::STATS() { - rangemax_ = 0; - rangemin_ = 0; - buckets_ = nullptr; -} - -/********************************************************************** - * STATS::set_range - * - * Alter the range on an existing stats element. - **********************************************************************/ -bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) { - if (max_bucket_value_plus_1 <= min_bucket_value) { - return false; - } - if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) { - delete [] buckets_; - buckets_ = new int32_t[max_bucket_value_plus_1 - min_bucket_value]; - } - rangemin_ = min_bucket_value; // setup - rangemax_ = max_bucket_value_plus_1; - clear(); // zero it - return true; -} - -/********************************************************************** - * STATS::clear - * - * Clear out the STATS class by zeroing all the buckets. - **********************************************************************/ -void STATS::clear() { // clear out buckets - total_count_ = 0; - if (buckets_ != nullptr) - memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0])); -} - -/********************************************************************** - * STATS::~STATS - * - * Destructor for a stats class. - **********************************************************************/ -STATS::~STATS() { delete[] buckets_; } - -/********************************************************************** - * STATS::add - * - * Add a set of samples to (or delete from) a pile. - **********************************************************************/ -void STATS::add(int32_t value, int32_t count) { - if (buckets_ == nullptr) { - return; - } - value = ClipToRange(value, rangemin_, rangemax_ - 1); - buckets_[value - rangemin_] += count; - total_count_ += count; // keep count of total -} - -/********************************************************************** - * STATS::mode - * - * Find the mode of a stats class. - **********************************************************************/ -int32_t STATS::mode() const { // get mode of samples - if (buckets_ == nullptr) { - return rangemin_; - } - int32_t max = buckets_[0]; // max cell count - int32_t maxindex = 0; // index of max - for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) { - if (buckets_[index] > max) { - max = buckets_[index]; // find biggest - maxindex = index; - } - } - return maxindex + rangemin_; // index of biggest -} - -/********************************************************************** - * STATS::mean - * - * Find the mean of a stats class. - **********************************************************************/ -double STATS::mean() const { //get mean of samples - if (buckets_ == nullptr || total_count_ <= 0) { - return static_cast(rangemin_); - } - int64_t sum = 0; - for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) { - sum += static_cast(index) * buckets_[index]; - } - return static_cast(sum) / total_count_ + rangemin_; -} - -/********************************************************************** - * STATS::sd - * - * Find the standard deviation of a stats class. - **********************************************************************/ -double STATS::sd() const { //standard deviation - if (buckets_ == nullptr || total_count_ <= 0) { - return 0.0; - } - int64_t sum = 0; - double sqsum = 0.0; - for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) { - sum += static_cast(index) * buckets_[index]; - sqsum += static_cast(index) * index * buckets_[index]; - } - double variance = static_cast(sum) / total_count_; - variance = sqsum / total_count_ - variance * variance; - if (variance > 0.0) - return sqrt(variance); - return 0.0; -} - -/********************************************************************** - * STATS::ile - * - * Returns the fractile value such that frac fraction (in [0,1]) of samples - * has a value less than the return value. - **********************************************************************/ -double STATS::ile(double frac) const { - if (buckets_ == nullptr || total_count_ == 0) { - return static_cast(rangemin_); - } -#if 0 - // TODO(rays) The existing code doesn't seem to be doing the right thing - // with target a double but this substitute crashes the code that uses it. - // Investigate and fix properly. - int target = IntCastRounded(frac * total_count_); - target = ClipToRange(target, 1, total_count_); -#else - double target = frac * total_count_; - target = ClipToRange(target, 1.0, static_cast(total_count_)); -#endif - int sum = 0; - int index = 0; - for (index = 0; index < rangemax_ - rangemin_ && sum < target; - sum += buckets_[index++]); - if (index > 0) { - ASSERT_HOST(buckets_[index - 1] > 0); - return rangemin_ + index - - static_cast(sum - target) / buckets_[index - 1]; - } else { - return static_cast(rangemin_); - } -} - -/********************************************************************** - * STATS::min_bucket - * - * Find REAL minimum bucket - ile(0.0) isn't necessarily correct - **********************************************************************/ -int32_t STATS::min_bucket() const { // Find min - if (buckets_ == nullptr || total_count_ == 0) { - return rangemin_; - } - int32_t min = 0; - for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++); - return rangemin_ + min; -} - -/********************************************************************** - * STATS::max_bucket - * - * Find REAL maximum bucket - ile(1.0) isn't necessarily correct - **********************************************************************/ - -int32_t STATS::max_bucket() const { // Find max - if (buckets_ == nullptr || total_count_ == 0) { - return rangemin_; - } - int32_t max; - for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--); - return rangemin_ + max; -} - -/********************************************************************** - * STATS::median - * - * Finds a more useful estimate of median than ile(0.5). - * - * Overcomes a problem with ile() - if the samples are, for example, - * 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway - * between 6 and 13 = 9.5 - **********************************************************************/ -double STATS::median() const { //get median - if (buckets_ == nullptr) { - return static_cast(rangemin_); - } - double median = ile(0.5); - int median_pile = static_cast(floor(median)); - if ((total_count_ > 1) && (pile_count(median_pile) == 0)) { - int32_t min_pile; - int32_t max_pile; - /* Find preceding non zero pile */ - for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--); - /* Find following non zero pile */ - for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++); - median = (min_pile + max_pile) / 2.0; - } - return median; -} - -/********************************************************************** - * STATS::local_min - * - * Return TRUE if this point is a local min. - **********************************************************************/ -bool STATS::local_min(int32_t x) const { - if (buckets_ == nullptr) { - return false; - } - x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_; - if (buckets_[x] == 0) - return true; - int32_t index; // table index - for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index); - if (index >= 0 && buckets_[index] < buckets_[x]) - return false; - for (index = x + 1; index < rangemax_ - rangemin_ && - buckets_[index] == buckets_[x]; ++index); - if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) - return false; - else - return true; -} - -/********************************************************************** - * STATS::smooth - * - * Apply a triangular smoothing filter to the stats. - * This makes the modes a bit more useful. - * The factor gives the height of the triangle, i.e. the weight of the - * centre. - **********************************************************************/ -void STATS::smooth(int32_t factor) { - if (buckets_ == nullptr || factor < 2) { - return; - } - STATS result(rangemin_, rangemax_); - int entrycount = rangemax_ - rangemin_; - for (int entry = 0; entry < entrycount; entry++) { - //centre weight - int count = buckets_[entry] * factor; - for (int offset = 1; offset < factor; offset++) { - if (entry - offset >= 0) - count += buckets_[entry - offset] * (factor - offset); - if (entry + offset < entrycount) - count += buckets_[entry + offset] * (factor - offset); - } - result.add(entry + rangemin_, count); - } - total_count_ = result.total_count_; - memcpy(buckets_, result.buckets_, entrycount * sizeof(buckets_[0])); -} - -/********************************************************************** - * STATS::cluster - * - * Cluster the samples into max_cluster clusters. - * Each call runs one iteration. The array of clusters must be - * max_clusters+1 in size as cluster 0 is used to indicate which samples - * have been used. - * The return value is the current number of clusters. - **********************************************************************/ - -int32_t STATS::cluster(float lower, // thresholds - float upper, - float multiple, // distance threshold - int32_t max_clusters, // max no to make - STATS *clusters) { // array of clusters - bool new_cluster; // added one - float *centres; // cluster centres - int32_t entry; // bucket index - int32_t cluster; // cluster index - int32_t best_cluster; // one to assign to - int32_t new_centre = 0; // residual mode - int32_t new_mode; // pile count of new_centre - int32_t count; // pile to place - float dist; // from cluster - float min_dist; // from best_cluster - int32_t cluster_count; // no of clusters - - if (buckets_ == nullptr || max_clusters < 1) - return 0; - centres = new float[max_clusters + 1]; - for (cluster_count = 1; cluster_count <= max_clusters - && clusters[cluster_count].buckets_ != nullptr - && clusters[cluster_count].total_count_ > 0; - cluster_count++) { - centres[cluster_count] = - static_cast(clusters[cluster_count].ile(0.5)); - new_centre = clusters[cluster_count].mode(); - for (entry = new_centre - 1; centres[cluster_count] - entry < lower - && entry >= rangemin_ - && pile_count(entry) <= pile_count(entry + 1); - entry--) { - count = pile_count(entry) - clusters[0].pile_count(entry); - if (count > 0) { - clusters[cluster_count].add(entry, count); - clusters[0].add (entry, count); - } - } - for (entry = new_centre + 1; entry - centres[cluster_count] < lower - && entry < rangemax_ - && pile_count(entry) <= pile_count(entry - 1); - entry++) { - count = pile_count(entry) - clusters[0].pile_count(entry); - if (count > 0) { - clusters[cluster_count].add(entry, count); - clusters[0].add(entry, count); - } - } - } - cluster_count--; - - if (cluster_count == 0) { - clusters[0].set_range(rangemin_, rangemax_); - } - do { - new_cluster = false; - new_mode = 0; - for (entry = 0; entry < rangemax_ - rangemin_; entry++) { - count = buckets_[entry] - clusters[0].buckets_[entry]; - //remaining pile - if (count > 0) { //any to handle - min_dist = static_cast(INT32_MAX); - best_cluster = 0; - for (cluster = 1; cluster <= cluster_count; cluster++) { - dist = entry + rangemin_ - centres[cluster]; - //find distance - if (dist < 0) - dist = -dist; - if (dist < min_dist) { - min_dist = dist; //find least - best_cluster = cluster; - } - } - if (min_dist > upper //far enough for new - && (best_cluster == 0 - || entry + rangemin_ > centres[best_cluster] * multiple - || entry + rangemin_ < centres[best_cluster] / multiple)) { - if (count > new_mode) { - new_mode = count; - new_centre = entry + rangemin_; - } - } - } - } - // need new and room - if (new_mode > 0 && cluster_count < max_clusters) { - cluster_count++; - new_cluster = true; - if (!clusters[cluster_count].set_range(rangemin_, rangemax_)) { - delete [] centres; - return 0; - } - centres[cluster_count] = static_cast(new_centre); - clusters[cluster_count].add(new_centre, new_mode); - clusters[0].add(new_centre, new_mode); - for (entry = new_centre - 1; centres[cluster_count] - entry < lower - && entry >= rangemin_ - && pile_count (entry) <= pile_count(entry + 1); entry--) { - count = pile_count(entry) - clusters[0].pile_count(entry); - if (count > 0) { - clusters[cluster_count].add(entry, count); - clusters[0].add(entry, count); - } - } - for (entry = new_centre + 1; entry - centres[cluster_count] < lower - && entry < rangemax_ - && pile_count (entry) <= pile_count(entry - 1); entry++) { - count = pile_count(entry) - clusters[0].pile_count(entry); - if (count > 0) { - clusters[cluster_count].add(entry, count); - clusters[0].add (entry, count); - } - } - centres[cluster_count] = - static_cast(clusters[cluster_count].ile(0.5)); - } - } while (new_cluster && cluster_count < max_clusters); - delete [] centres; - return cluster_count; -} - -// Helper tests that the current index is still part of the peak and gathers -// the data into the peak, returning false when the peak is ended. -// src_buckets[index] - used_buckets[index] is the unused part of the histogram. -// prev_count is the histogram count of the previous index on entry and is -// updated to the current index on return. -// total_count and total_value are accumulating the mean of the peak. -static bool GatherPeak(int index, const int* src_buckets, int* used_buckets, - int* prev_count, int* total_count, double* total_value) { - int pile_count = src_buckets[index] - used_buckets[index]; - if (pile_count <= *prev_count && pile_count > 0) { - // Accumulate count and index.count product. - *total_count += pile_count; - *total_value += index * pile_count; - // Mark this index as used - used_buckets[index] = src_buckets[index]; - *prev_count = pile_count; - return true; - } else { - return false; - } -} - -// Finds (at most) the top max_modes modes, well actually the whole peak around -// each mode, returning them in the given modes vector as a pair in order of decreasing total count. -// Since the mean is the key and the count the data in the pair, a single call -// to sort on the output will re-sort by increasing mean of peak if that is -// more useful than decreasing total count. -// Returns the actual number of modes found. -int STATS::top_n_modes(int max_modes, - GenericVector >* modes) const { - if (max_modes <= 0) return 0; - int src_count = rangemax_ - rangemin_; - // Used copies the counts in buckets_ as they get used. - STATS used(rangemin_, rangemax_); - modes->truncate(0); - // Total count of the smallest peak found so far. - int least_count = 1; - // Mode that is used as a seed for each peak - int max_count = 0; - do { - // Find an unused mode. - max_count = 0; - int max_index = 0; - for (int src_index = 0; src_index < src_count; src_index++) { - int pile_count = buckets_[src_index] - used.buckets_[src_index]; - if (pile_count > max_count) { - max_count = pile_count; - max_index = src_index; - } - } - if (max_count > 0) { - // Copy the bucket count to used so it doesn't get found again. - used.buckets_[max_index] = max_count; - // Get the entire peak. - double total_value = max_index * max_count; - int total_count = max_count; - int prev_pile = max_count; - for (int offset = 1; max_index + offset < src_count; ++offset) { - if (!GatherPeak(max_index + offset, buckets_, used.buckets_, - &prev_pile, &total_count, &total_value)) - break; - } - prev_pile = buckets_[max_index]; - for (int offset = 1; max_index - offset >= 0; ++offset) { - if (!GatherPeak(max_index - offset, buckets_, used.buckets_, - &prev_pile, &total_count, &total_value)) - break; - } - if (total_count > least_count || modes->size() < max_modes) { - // We definitely want this mode, so if we have enough discard the least. - if (modes->size() == max_modes) - modes->truncate(max_modes - 1); - int target_index = 0; - // Linear search for the target insertion point. - while (target_index < modes->size() && - (*modes)[target_index].data >= total_count) - ++target_index; - float peak_mean = - static_cast(total_value / total_count + rangemin_); - modes->insert(KDPairInc(peak_mean, total_count), - target_index); - least_count = modes->back().data; - } - } - } while (max_count > 0); - return modes->size(); -} - -/********************************************************************** - * STATS::print - * - * Prints a summary and table of the histogram. - **********************************************************************/ -void STATS::print() const { - if (buckets_ == nullptr) { - return; - } - int32_t min = min_bucket() - rangemin_; - int32_t max = max_bucket() - rangemin_; - - int num_printed = 0; - for (int index = min; index <= max; index++) { - if (buckets_[index] != 0) { - tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]); - if (++num_printed % 8 == 0) - tprintf ("\n"); - } - } - tprintf ("\n"); - print_summary(); -} - - - -/********************************************************************** - * STATS::print_summary - * - * Print a summary of the stats. - **********************************************************************/ -void STATS::print_summary() const { - if (buckets_ == nullptr) { - return; - } - int32_t min = min_bucket(); - int32_t max = max_bucket(); - tprintf("Total count=%d\n", total_count_); - tprintf("Min=%.2f Really=%d\n", ile(0.0), min); - tprintf("Lower quartile=%.2f\n", ile(0.25)); - tprintf("Median=%.2f, ile(0.5)=%.2f\n", median(), ile(0.5)); - tprintf("Upper quartile=%.2f\n", ile(0.75)); - tprintf("Max=%.2f Really=%d\n", ile(1.0), max); - tprintf("Range=%d\n", max + 1 - min); - tprintf("Mean= %.2f\n", mean()); - tprintf("SD= %.2f\n", sd()); -} - - -/********************************************************************** - * STATS::plot - * - * Draw a histogram of the stats table. - **********************************************************************/ - -#ifndef GRAPHICS_DISABLED -void STATS::plot(ScrollView* window, // to draw in - float xorigin, // bottom left - float yorigin, - float xscale, // one x unit - float yscale, // one y unit - ScrollView::Color colour) const { // colour to draw in - if (buckets_ == nullptr) { - return; - } - window->Pen(colour); - - for (int index = 0; index < rangemax_ - rangemin_; index++) { - window->Rectangle(xorigin + xscale * index, yorigin, - xorigin + xscale * (index + 1), - yorigin + yscale * buckets_[index]); - } -} -#endif - - -/********************************************************************** - * STATS::plotline - * - * Draw a histogram of the stats table. (Line only) - **********************************************************************/ - -#ifndef GRAPHICS_DISABLED -void STATS::plotline(ScrollView* window, // to draw in - float xorigin, // bottom left - float yorigin, - float xscale, // one x unit - float yscale, // one y unit - ScrollView::Color colour) const { // colour to draw in - if (buckets_ == nullptr) { - return; - } - window->Pen(colour); - window->SetCursor(xorigin, yorigin + yscale * buckets_[0]); - for (int index = 0; index < rangemax_ - rangemin_; index++) { - window->DrawTo(xorigin + xscale * index, - yorigin + yscale * buckets_[index]); - } -} -#endif - - -/********************************************************************** - * choose_nth_item - * - * Returns the index of what would b the nth item in the array - * if the members were sorted, without actually sorting. - **********************************************************************/ - -int32_t choose_nth_item(int32_t index, float *array, int32_t count) { - int32_t next_sample; // next one to do - int32_t next_lesser; // space for new - int32_t prev_greater; // last one saved - int32_t equal_count; // no of equal ones - float pivot; // proposed median - float sample; // current sample - - if (count <= 1) - return 0; - if (count == 2) { - if (array[0] < array[1]) { - return index >= 1 ? 1 : 0; - } - else { - return index >= 1 ? 0 : 1; - } - } - else { - if (index < 0) - index = 0; // ensure legal - else if (index >= count) - index = count - 1; - equal_count = (int32_t) (rand() % count); - pivot = array[equal_count]; - // fill gap - array[equal_count] = array[0]; - next_lesser = 0; - prev_greater = count; - equal_count = 1; - for (next_sample = 1; next_sample < prev_greater;) { - sample = array[next_sample]; - if (sample < pivot) { - // shuffle - array[next_lesser++] = sample; - next_sample++; - } - else if (sample > pivot) { - prev_greater--; - // juggle - array[next_sample] = array[prev_greater]; - array[prev_greater] = sample; - } - else { - equal_count++; - next_sample++; - } - } - for (next_sample = next_lesser; next_sample < prev_greater;) - array[next_sample++] = pivot; - if (index < next_lesser) - return choose_nth_item (index, array, next_lesser); - else if (index < prev_greater) - return next_lesser; // in equal bracket - else - return choose_nth_item (index - prev_greater, - array + prev_greater, - count - prev_greater) + prev_greater; - } -} - -/********************************************************************** - * choose_nth_item - * - * Returns the index of what would be the nth item in the array - * if the members were sorted, without actually sorting. - **********************************************************************/ -int32_t choose_nth_item(int32_t index, void *array, int32_t count, size_t size, - int (*compar)(const void*, const void*)) { - int result; // of compar - int32_t next_sample; // next one to do - int32_t next_lesser; // space for new - int32_t prev_greater; // last one saved - int32_t equal_count; // no of equal ones - int32_t pivot; // proposed median - - if (count <= 1) - return 0; - if (count == 2) { - if (compar (array, (char *) array + size) < 0) { - return index >= 1 ? 1 : 0; - } - else { - return index >= 1 ? 0 : 1; - } - } - if (index < 0) - index = 0; // ensure legal - else if (index >= count) - index = count - 1; - pivot = (int32_t) (rand () % count); - swap_entries (array, size, pivot, 0); - next_lesser = 0; - prev_greater = count; - equal_count = 1; - for (next_sample = 1; next_sample < prev_greater;) { - result = - compar ((char *) array + size * next_sample, - (char *) array + size * next_lesser); - if (result < 0) { - swap_entries (array, size, next_lesser++, next_sample++); - // shuffle - } - else if (result > 0) { - prev_greater--; - swap_entries(array, size, prev_greater, next_sample); - } - else { - equal_count++; - next_sample++; - } - } - if (index < next_lesser) - return choose_nth_item (index, array, next_lesser, size, compar); - else if (index < prev_greater) - return next_lesser; // in equal bracket - else - return choose_nth_item (index - prev_greater, - (char *) array + size * prev_greater, - count - prev_greater, size, - compar) + prev_greater; -} - -/********************************************************************** - * swap_entries - * - * Swap 2 entries of arbitrary size in-place in a table. - **********************************************************************/ -void swap_entries(void *array, // array of entries - size_t size, // size of entry - int32_t index1, // entries to swap - int32_t index2) { - char tmp; - char *ptr1; // to entries - char *ptr2; - size_t count; // of bytes - - ptr1 = static_cast(array) + index1 * size; - ptr2 = static_cast(array) + index2 * size; - for (count = 0; count < size; count++) { - tmp = *ptr1; - *ptr1++ = *ptr2; - *ptr2++ = tmp; // tedious! - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/statistc.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/statistc.h deleted file mode 100644 index b2a036dd..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/statistc.h +++ /dev/null @@ -1,169 +0,0 @@ -/********************************************************************** - * File: statistc.h (Formerly stats.h) - * Description: Class description for STATS class. - * Author: Ray Smith - * Created: Mon Feb 04 16:19:07 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_CCSTRUCT_STATISTC_H_ -#define TESSERACT_CCSTRUCT_STATISTC_H_ - -#include -#include "host.h" -#include "kdpair.h" -#include "scrollview.h" - -template class GenericVector; - - -// Simple histogram-based statistics for integer values in a known -// range, such that the range is small compared to the number of samples. -class STATS { - public: - // The histogram buckets are in the range - // [min_bucket_value, max_bucket_value_plus_1 - 1] i.e. - // [min_bucket_value, max_bucket_value]. - // Any data under min_bucket value is silently mapped to min_bucket_value, - // and likewise, any data over max_bucket_value is silently mapped to - // max_bucket_value. - // In the internal array, min_bucket_value maps to 0 and - // max_bucket_value_plus_1 - min_bucket_value to the array size. - // TODO(rays) This is ugly. Convert the second argument to - // max_bucket_value and all the code that uses it. - STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1); - STATS(); // empty for arrays - - ~STATS(); - - // (Re)Sets the range and clears the counts. - // See the constructor for info on max and min values. - bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1); - - void clear(); // empty buckets - - void add(int32_t value, int32_t count); - - // "Accessors" return various statistics on the data. - int32_t mode() const; // get mode of samples - double mean() const; // get mean of samples - double sd() const; // standard deviation - // Returns the fractile value such that frac fraction (in [0,1]) of samples - // has a value less than the return value. - double ile(double frac) const; - // Returns the minimum used entry in the histogram (ie the minimum of the - // data, NOT the minimum of the supplied range, nor is it an index.) - // Would normally be called min(), but that is a reserved word in VC++. - int32_t min_bucket() const; // Find min - // Returns the maximum used entry in the histogram (ie the maximum of the - // data, NOT the maximum of the supplied range, nor is it an index.) - int32_t max_bucket() const; // Find max - // Finds a more useful estimate of median than ile(0.5). - // Overcomes a problem with ile() - if the samples are, for example, - // 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway - // between 6 and 13 = 9.5 - double median() const; // get median of samples - // Returns the count of the given value. - int32_t pile_count(int32_t value) const { - if (value <= rangemin_) - return buckets_[0]; - if (value >= rangemax_ - 1) - return buckets_[rangemax_ - rangemin_ - 1]; - return buckets_[value - rangemin_]; - } - // Returns the total count of all buckets. - int32_t get_total() const { - return total_count_; // total of all piles - } - // Returns true if x is a local min. - bool local_min(int32_t x) const; - - // Apply a triangular smoothing filter to the stats. - // This makes the modes a bit more useful. - // The factor gives the height of the triangle, i.e. the weight of the - // centre. - void smooth(int32_t factor); - - // Cluster the samples into max_cluster clusters. - // Each call runs one iteration. The array of clusters must be - // max_clusters+1 in size as cluster 0 is used to indicate which samples - // have been used. - // The return value is the current number of clusters. - int32_t cluster(float lower, // thresholds - float upper, - float multiple, // distance threshold - int32_t max_clusters, // max no to make - STATS *clusters); // array of clusters - -// Finds (at most) the top max_modes modes, well actually the whole peak around -// each mode, returning them in the given modes vector as a pair in order of decreasing total count. -// Since the mean is the key and the count the data in the pair, a single call -// to sort on the output will re-sort by increasing mean of peak if that is -// more useful than decreasing total count. -// Returns the actual number of modes found. - int top_n_modes( - int max_modes, - GenericVector >* modes) const; - - // Prints a summary and table of the histogram. - void print() const; - // Prints summary stats only of the histogram. - void print_summary() const; - - #ifndef GRAPHICS_DISABLED - // Draws the histogram as a series of rectangles. - void plot(ScrollView* window, // window to draw in - float xorigin, // origin of histo - float yorigin, // gram - float xscale, // size of one unit - float yscale, // size of one uint - ScrollView::Color colour) const; // colour to draw in - - // Draws a line graph of the histogram. - void plotline(ScrollView* window, // window to draw in - float xorigin, // origin of histo - float yorigin, // gram - float xscale, // size of one unit - float yscale, // size of one uint - ScrollView::Color colour) const; // colour to draw in - #endif // GRAPHICS_DISABLED - - private: - int32_t rangemin_; // min of range - // rangemax_ is not well named as it is really one past the max. - int32_t rangemax_; // max of range - int32_t total_count_; // no of samples - int32_t* buckets_; // array of cells -}; - -// Returns the nth ordered item from the array, as if they were -// ordered, but without ordering them, in linear time. -// The array does get shuffled! -int32_t choose_nth_item(int32_t index, // index to choose - float *array, // array of items - int32_t count); // no of items -// Generic version uses a defined comparator (with qsort semantics). -int32_t choose_nth_item(int32_t index, // index to choose - void *array, // array of items - int32_t count, // no of items - size_t size, // element size - int (*compar)(const void*, const void*)); // comparator -// Swaps 2 entries in an array in-place. -void swap_entries(void *array, // array of entries - size_t size, // size of entry - int32_t index1, // entries to swap - int32_t index2); - -#endif // TESSERACT_CCSTRUCT_STATISTC_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/stepblob.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/stepblob.cpp deleted file mode 100644 index d0c4a0d2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/stepblob.cpp +++ /dev/null @@ -1,553 +0,0 @@ -/********************************************************************** - * File: stepblob.cpp (Formerly cblob.c) - * Description: Code for C_BLOB class. - * Author: Ray Smith - * Created: Tue Oct 08 10:41:13 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "stepblob.h" -#include "allheaders.h" // for pixCreate, pixGetDepth -#include "genericvector.h" // for GenericVector -#include "host.h" // for TRUE, FALSE -#include "points.h" // for operator+=, FCOORD, ICOORD - -class DENORM; - -// Max perimeter to width ratio for a baseline position above box bottom. -const double kMaxPerimeterWidthRatio = 8.0; - -ELISTIZE (C_BLOB) -/********************************************************************** - * position_outline - * - * Position the outline in the given list at the relevant place - * according to its nesting. - **********************************************************************/ -static void position_outline( //put in place - C_OUTLINE *outline, //thing to place - C_OUTLINE_LIST *destlist //desstination list - ) { - C_OUTLINE *dest_outline; //outline from dest list - C_OUTLINE_IT it = destlist; //iterator - //iterator on children - C_OUTLINE_IT child_it = outline->child (); - - if (!it.empty ()) { - do { - dest_outline = it.data (); //get destination - //encloses dest - if (*dest_outline < *outline) { - //take off list - dest_outline = it.extract (); - //put this in place - it.add_after_then_move (outline); - //make it a child - child_it.add_to_end (dest_outline); - while (!it.at_last ()) { - it.forward (); //do rest of list - //check for other children - dest_outline = it.data (); - if (*dest_outline < *outline) { - //take off list - dest_outline = it.extract (); - child_it.add_to_end (dest_outline); - //make it a child - if (it.empty ()) - break; - } - } - return; //finished - } - //enclosed by dest - else if (*outline < *dest_outline) { - position_outline (outline, dest_outline->child ()); - //place in child list - return; //finished - } - it.forward (); - } - while (!it.at_first ()); - } - it.add_to_end (outline); //at outer level -} - - -/********************************************************************** - * plot_outline_list - * - * Draw a list of outlines in the given colour and their children - * in the child colour. - **********************************************************************/ - -#ifndef GRAPHICS_DISABLED -static void plot_outline_list( //draw outlines - C_OUTLINE_LIST *list, //outline to draw - ScrollView* window, //window to draw in - ScrollView::Color colour, //colour to use - ScrollView::Color child_colour //colour of children - ) { - C_OUTLINE *outline; //current outline - C_OUTLINE_IT it = list; //iterator - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - outline = it.data (); - //draw it - outline->plot (window, colour); - if (!outline->child ()->empty ()) - plot_outline_list (outline->child (), window, - child_colour, child_colour); - } -} -// Draws the outlines in the given colour, and child_colour, normalized -// using the given denorm, making use of sub-pixel accurate information -// if available. -static void plot_normed_outline_list(const DENORM& denorm, - C_OUTLINE_LIST *list, - ScrollView::Color colour, - ScrollView::Color child_colour, - ScrollView* window) { - C_OUTLINE_IT it(list); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - C_OUTLINE* outline = it.data(); - outline->plot_normed(denorm, colour, window); - if (!outline->child()->empty()) - plot_normed_outline_list(denorm, outline->child(), child_colour, - child_colour, window); - } -} -#endif - - -/********************************************************************** - * reverse_outline_list - * - * Reverse a list of outlines and their children. - **********************************************************************/ - -static void reverse_outline_list(C_OUTLINE_LIST *list) { - C_OUTLINE_IT it = list; // iterator - - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - C_OUTLINE* outline = it.data(); - outline->reverse(); // reverse it - outline->set_flag(COUT_INVERSE, TRUE); - if (!outline->child()->empty()) - reverse_outline_list(outline->child()); - } -} - - -/********************************************************************** - * C_BLOB::C_BLOB - * - * Constructor to build a C_BLOB from a list of C_OUTLINEs. - * The C_OUTLINEs are not copied so the source list is emptied. - * The C_OUTLINEs are nested correctly in the blob. - **********************************************************************/ - -C_BLOB::C_BLOB(C_OUTLINE_LIST *outline_list) { - for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) { - C_OUTLINE* outline = ol_it.extract(); - // Position this outline in appropriate position in the hierarchy. - position_outline(outline, &outlines); - } - CheckInverseFlagAndDirection(); -} - -// Simpler constructor to build a blob from a single outline that has -// already been fully initialized. -C_BLOB::C_BLOB(C_OUTLINE* outline) { - C_OUTLINE_IT it(&outlines); - it.add_to_end(outline); -} - -// Builds a set of one or more blobs from a list of outlines. -// Input: one outline on outline_list contains all the others, but the -// nesting and order are undefined. -// If good_blob is true, the blob is added to good_blobs_it, unless -// an illegal (generation-skipping) parent-child relationship is found. -// If so, the parent blob goes to bad_blobs_it, and the immediate children -// are promoted to the top level, recursively being sent to good_blobs_it. -// If good_blob is false, all created blobs will go to the bad_blobs_it. -// Output: outline_list is empty. One or more blobs are added to -// good_blobs_it and/or bad_blobs_it. -void C_BLOB::ConstructBlobsFromOutlines(bool good_blob, - C_OUTLINE_LIST* outline_list, - C_BLOB_IT* good_blobs_it, - C_BLOB_IT* bad_blobs_it) { - // List of top-level outlines with correctly nested children. - C_OUTLINE_LIST nested_outlines; - for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) { - C_OUTLINE* outline = ol_it.extract(); - // Position this outline in appropriate position in the hierarchy. - position_outline(outline, &nested_outlines); - } - // Check for legal nesting and reassign as required. - for (C_OUTLINE_IT ol_it(&nested_outlines); !ol_it.empty(); ol_it.forward()) { - C_OUTLINE* outline = ol_it.extract(); - bool blob_is_good = good_blob; - if (!outline->IsLegallyNested()) { - // The blob is illegally nested. - // Mark it bad, and add all its children to the top-level list. - blob_is_good = false; - ol_it.add_list_after(outline->child()); - } - C_BLOB* blob = new C_BLOB(outline); - // Set inverse flag and reverse if needed. - blob->CheckInverseFlagAndDirection(); - // Put on appropriate list. - if (!blob_is_good && bad_blobs_it != nullptr) - bad_blobs_it->add_after_then_move(blob); - else - good_blobs_it->add_after_then_move(blob); - } -} - -// Sets the COUT_INVERSE flag appropriately on the outlines and their -// children recursively, reversing the outlines if needed so that -// everything has an anticlockwise top-level. -void C_BLOB::CheckInverseFlagAndDirection() { - C_OUTLINE_IT ol_it(&outlines); - for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) { - C_OUTLINE* outline = ol_it.data(); - if (outline->turn_direction() < 0) { - outline->reverse(); - reverse_outline_list(outline->child()); - outline->set_flag(COUT_INVERSE, TRUE); - } else { - outline->set_flag(COUT_INVERSE, FALSE); - } - } -} - - -// Build and return a fake blob containing a single fake outline with no -// steps. -C_BLOB* C_BLOB::FakeBlob(const TBOX& box) { - C_OUTLINE_LIST outlines; - C_OUTLINE::FakeOutline(box, &outlines); - return new C_BLOB(&outlines); -} - -/********************************************************************** - * C_BLOB::bounding_box - * - * Return the bounding box of the blob. - **********************************************************************/ - -TBOX C_BLOB::bounding_box() const { // bounding box - C_OUTLINE *outline; // current outline - // This is a read-only iteration of the outlines. - C_OUTLINE_IT it = const_cast(&outlines); - TBOX box; // bounding box - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - outline = it.data (); - box += outline->bounding_box (); - } - return box; -} - - -/********************************************************************** - * C_BLOB::area - * - * Return the area of the blob. - **********************************************************************/ - -int32_t C_BLOB::area() { //area - C_OUTLINE *outline; //current outline - C_OUTLINE_IT it = &outlines; //outlines of blob - int32_t total; //total area - - total = 0; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - outline = it.data (); - total += outline->area (); - } - return total; -} - -/********************************************************************** - * C_BLOB::perimeter - * - * Return the perimeter of the top and 2nd level outlines. - **********************************************************************/ - -int32_t C_BLOB::perimeter() { - C_OUTLINE *outline; // current outline - C_OUTLINE_IT it = &outlines; // outlines of blob - int32_t total; // total perimeter - - total = 0; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - outline = it.data(); - total += outline->perimeter(); - } - return total; -} - - -/********************************************************************** - * C_BLOB::outer_area - * - * Return the area of the blob. - **********************************************************************/ - -int32_t C_BLOB::outer_area() { //area - C_OUTLINE *outline; //current outline - C_OUTLINE_IT it = &outlines; //outlines of blob - int32_t total; //total area - - total = 0; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - outline = it.data (); - total += outline->outer_area (); - } - return total; -} - - -/********************************************************************** - * C_BLOB::count_transitions - * - * Return the total x and y maxes and mins in the blob. - * Chlid outlines are not counted. - **********************************************************************/ - -int32_t C_BLOB::count_transitions( //area - int32_t threshold //on size - ) { - C_OUTLINE *outline; //current outline - C_OUTLINE_IT it = &outlines; //outlines of blob - int32_t total; //total area - - total = 0; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - outline = it.data (); - total += outline->count_transitions (threshold); - } - return total; -} - - -/********************************************************************** - * C_BLOB::move - * - * Move C_BLOB by vector - **********************************************************************/ - -void C_BLOB::move( // reposition blob - const ICOORD vec // by vector - ) { - C_OUTLINE_IT it(&outlines); // iterator - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - it.data ()->move (vec); // move each outline -} - -// Static helper for C_BLOB::rotate to allow recursion of child outlines. -static void RotateOutlineList(const FCOORD& rotation, - C_OUTLINE_LIST* outlines) { - C_OUTLINE_LIST new_outlines; - C_OUTLINE_IT src_it(outlines); - C_OUTLINE_IT dest_it(&new_outlines); - while (!src_it.empty()) { - C_OUTLINE* old_outline = src_it.extract(); - src_it.forward(); - C_OUTLINE* new_outline = new C_OUTLINE(old_outline, rotation); - if (!old_outline->child()->empty()) { - RotateOutlineList(rotation, old_outline->child()); - C_OUTLINE_IT child_it(new_outline->child()); - child_it.add_list_after(old_outline->child()); - } - delete old_outline; - dest_it.add_to_end(new_outline); - } - src_it.add_list_after(&new_outlines); -} - -/********************************************************************** - * C_BLOB::rotate - * - * Rotate C_BLOB by rotation. - * Warning! has to rebuild all the C_OUTLINEs. - **********************************************************************/ -void C_BLOB::rotate(const FCOORD& rotation) { - RotateOutlineList(rotation, &outlines); -} - -// Helper calls ComputeEdgeOffsets or ComputeBinaryOffsets recursively on the -// outline list and its children. -static void ComputeEdgeOffsetsOutlineList(int threshold, Pix* pix, - C_OUTLINE_LIST *list) { - C_OUTLINE_IT it(list); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - C_OUTLINE* outline = it.data(); - if (pix != nullptr && pixGetDepth(pix) == 8) - outline->ComputeEdgeOffsets(threshold, pix); - else - outline->ComputeBinaryOffsets(); - if (!outline->child()->empty()) - ComputeEdgeOffsetsOutlineList(threshold, pix, outline->child()); - } -} - -// Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale -// if the supplied pix is 8-bit or the binary edges if nullptr. -void C_BLOB::ComputeEdgeOffsets(int threshold, Pix* pix) { - ComputeEdgeOffsetsOutlineList(threshold, pix, &outlines); -} - -// Estimates and returns the baseline position based on the shape of the -// outlines. -// We first find the minimum y-coord (y_mins) at each x-coord within the blob. -// If there is a run of some y or y+1 in y_mins that is longer than the total -// number of positions at bottom or bottom+1, subject to the additional -// condition that at least one side of the y/y+1 run is higher than y+1, so it -// is not a local minimum, then y, not the bottom, makes a good candidate -// baseline position for this blob. Eg -// | ---| -// | | -// |- -----------| <= Good candidate baseline position. -// |- -| -// | -| -// |---| <= Bottom of blob -int16_t C_BLOB::EstimateBaselinePosition() { - TBOX box = bounding_box(); - int left = box.left(); - int width = box.width(); - int bottom = box.bottom(); - if (outlines.empty() || perimeter() > width * kMaxPerimeterWidthRatio) - return bottom; // This is only for non-CJK blobs. - // Get the minimum y coordinate at each x-coordinate. - GenericVector y_mins; - y_mins.init_to_size(width + 1, box.top()); - C_OUTLINE_IT it(&outlines); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - C_OUTLINE* outline = it.data(); - ICOORD pos = outline->start_pos(); - for (int s = 0; s < outline->pathlength(); ++s) { - if (pos.y() < y_mins[pos.x() - left]) - y_mins[pos.x() - left] = pos.y(); - pos += outline->step(s); - } - } - // Find the total extent of the bottom or bottom + 1. - int bottom_extent = 0; - for (int x = 0; x <= width; ++x) { - if (y_mins[x] == bottom || y_mins[x] == bottom + 1) - ++bottom_extent; - } - // Find the lowest run longer than the bottom extent that is not the bottom. - int best_min = box.top(); - int prev_run = 0; - int prev_y = box.top(); - int prev_prev_y = box.top(); - for (int x = 0; x < width; x += prev_run) { - // Find the length of the current run. - int y_at_x = y_mins[x]; - int run = 1; - while (x + run <= width && y_mins[x + run] == y_at_x) ++run; - if (y_at_x > bottom + 1) { - // Possible contender. - int total_run = run; - // Find extent of current value or +1 to the right of x. - while (x + total_run <= width && - (y_mins[x + total_run] == y_at_x || - y_mins[x + total_run] == y_at_x + 1)) ++total_run; - // At least one end has to be higher so it is not a local max. - if (prev_prev_y > y_at_x + 1 || x + total_run > width || - y_mins[x + total_run] > y_at_x + 1) { - // If the prev_run is at y + 1, then we can add that too. There cannot - // be a suitable run at y before that or we would have found it already. - if (prev_run > 0 && prev_y == y_at_x + 1) total_run += prev_run; - if (total_run > bottom_extent && y_at_x < best_min) { - best_min = y_at_x; - } - } - } - prev_run = run; - prev_prev_y = prev_y; - prev_y = y_at_x; - } - return best_min == box.top() ? bottom : best_min; -} - -static void render_outline_list(C_OUTLINE_LIST *list, - int left, int top, Pix* pix) { - C_OUTLINE_IT it(list); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - C_OUTLINE* outline = it.data(); - outline->render(left, top, pix); - if (!outline->child()->empty()) - render_outline_list(outline->child(), left, top, pix); - } -} - -static void render_outline_list_outline(C_OUTLINE_LIST *list, - int left, int top, Pix* pix) { - C_OUTLINE_IT it(list); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - C_OUTLINE* outline = it.data(); - outline->render_outline(left, top, pix); - } -} - -// Returns a Pix rendering of the blob. pixDestroy after use. -Pix* C_BLOB::render() { - TBOX box = bounding_box(); - Pix* pix = pixCreate(box.width(), box.height(), 1); - render_outline_list(&outlines, box.left(), box.top(), pix); - return pix; -} - -// Returns a Pix rendering of the outline of the blob. (no fill). -// pixDestroy after use. -Pix* C_BLOB::render_outline() { - TBOX box = bounding_box(); - Pix* pix = pixCreate(box.width(), box.height(), 1); - render_outline_list_outline(&outlines, box.left(), box.top(), pix); - return pix; -} - -/********************************************************************** - * C_BLOB::plot - * - * Draw the C_BLOB in the given colour. - **********************************************************************/ - -#ifndef GRAPHICS_DISABLED -void C_BLOB::plot(ScrollView* window, // window to draw in - ScrollView::Color blob_colour, // main colour - ScrollView::Color child_colour) { // for holes - plot_outline_list(&outlines, window, blob_colour, child_colour); -} -// Draws the blob in the given colour, and child_colour, normalized -// using the given denorm, making use of sub-pixel accurate information -// if available. -void C_BLOB::plot_normed(const DENORM& denorm, - ScrollView::Color blob_colour, - ScrollView::Color child_colour, - ScrollView* window) { - plot_normed_outline_list(denorm, &outlines, blob_colour, child_colour, - window); -} -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/stepblob.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/stepblob.h deleted file mode 100644 index eb798fac..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/stepblob.h +++ /dev/null @@ -1,137 +0,0 @@ -/********************************************************************** - * File: stepblob.h (Formerly cblob.h) - * Description: Code for C_BLOB class. - * Author: Ray Smith - * Created: Tue Oct 08 10:41:13 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef STEPBLOB_H -#define STEPBLOB_H - -#include // for int32_t, int16_t -#include "coutln.h" // for C_OUTLINE_LIST, C_OUTLINE -#include "elst.h" // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK -#include "points.h" // for FCOORD, ICOORD (ptr only) -#include "rect.h" // for TBOX -#include "scrollview.h" // for ScrollView, ScrollView::Color - -class C_BLOB; -class DENORM; - -struct Pix; - -ELISTIZEH(C_BLOB) - -class C_BLOB:public ELIST_LINK -{ - public: - C_BLOB() = default; - explicit C_BLOB(C_OUTLINE_LIST *outline_list); - // Simpler constructor to build a blob from a single outline that has - // already been fully initialized. - explicit C_BLOB(C_OUTLINE* outline); - - // Builds a set of one or more blobs from a list of outlines. - // Input: one outline on outline_list contains all the others, but the - // nesting and order are undefined. - // If good_blob is true, the blob is added to good_blobs_it, unless - // an illegal (generation-skipping) parent-child relationship is found. - // If so, the parent blob goes to bad_blobs_it, and the immediate children - // are promoted to the top level, recursively being sent to good_blobs_it. - // If good_blob is false, all created blobs will go to the bad_blobs_it. - // Output: outline_list is empty. One or more blobs are added to - // good_blobs_it and/or bad_blobs_it. - static void ConstructBlobsFromOutlines(bool good_blob, - C_OUTLINE_LIST* outline_list, - C_BLOB_IT* good_blobs_it, - C_BLOB_IT* bad_blobs_it); - - // Sets the COUT_INVERSE flag appropriately on the outlines and their - // children recursively, reversing the outlines if needed so that - // everything has an anticlockwise top-level. - void CheckInverseFlagAndDirection(); - - // Build and return a fake blob containing a single fake outline with no - // steps. - static C_BLOB* FakeBlob(const TBOX& box); - - C_OUTLINE_LIST *out_list() { //get outline list - return &outlines; - } - - TBOX bounding_box() const; // compute bounding box - int32_t area(); //compute area - int32_t perimeter(); // Total perimeter of outlines and 1st level children. - int32_t outer_area(); //compute area - int32_t count_transitions( //count maxima - int32_t threshold); //size threshold - - void move(const ICOORD vec); // repostion blob by vector - void rotate(const FCOORD& rotation); // Rotate by given vector. - - // Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale - // if the supplied pix is 8-bit or the binary edges if nullptr. - void ComputeEdgeOffsets(int threshold, Pix* pix); - - // Estimates and returns the baseline position based on the shape of the - // outlines. - int16_t EstimateBaselinePosition(); - - // Returns a Pix rendering of the blob. pixDestroy after use. - Pix* render(); - // Returns a Pix rendering of the outline of the blob. (no fill). - // pixDestroy after use. - Pix* render_outline(); - - #ifndef GRAPHICS_DISABLED - void plot( //draw one - ScrollView* window, //window to draw in - ScrollView::Color blob_colour, //for outer bits - ScrollView::Color child_colour); //for holes - // Draws the blob in the given colour, and child_colour, normalized - // using the given denorm, making use of sub-pixel accurate information - // if available. - void plot_normed(const DENORM& denorm, - ScrollView::Color blob_colour, - ScrollView::Color child_colour, - ScrollView* window); - #endif // GRAPHICS_DISABLED - - C_BLOB& operator= (const C_BLOB & source) { - if (!outlines.empty ()) - outlines.clear(); - outlines.deep_copy(&source.outlines, &C_OUTLINE::deep_copy); - return *this; - } - - static C_BLOB* deep_copy(const C_BLOB* src) { - C_BLOB* blob = new C_BLOB; - *blob = *src; - return blob; - } - - static int SortByXMiddle(const void *v1, const void *v2) { - const C_BLOB* blob1 = *static_cast(v1); - const C_BLOB* blob2 = *static_cast(v2); - return blob1->bounding_box().x_middle() - - blob2->bounding_box().x_middle(); - } - - - private: - C_OUTLINE_LIST outlines; //master elements -}; - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/vecfuncs.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/vecfuncs.cpp deleted file mode 100644 index 86add656..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/vecfuncs.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: vecfuncs.cpp (Formerly vecfuncs.c) - * Description: Blob definition - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 27 15:39:52 1989 - * Modified: Tue Jul 9 17:44:12 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1989, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ******************************************************************************** - * Revision 5.1 89/07/27 11:47:50 11:47:50 ray () - * Added ratings access methods. - * This version ready for independent development. - */ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "vecfuncs.h" -#include "blobs.h" - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -/********************************************************************** - * direction - * - * Show if the line is going in the positive or negative X direction. - **********************************************************************/ -int direction(EDGEPT *point) { - int dir; /** direction to return **/ - EDGEPT *prev; /** prev point **/ - EDGEPT *next; /** next point **/ - - dir = 0; - prev = point->prev; - next = point->next; - - if (((prev->pos.x <= point->pos.x) && - (point->pos.x < next->pos.x)) || - ((prev->pos.x < point->pos.x) && (point->pos.x <= next->pos.x))) - dir = 1; - - if (((prev->pos.x >= point->pos.x) && - (point->pos.x > next->pos.x)) || - ((prev->pos.x > point->pos.x) && (point->pos.x >= next->pos.x))) - dir = -1; - - return dir; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/vecfuncs.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/vecfuncs.h deleted file mode 100644 index ac86a3fe..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/vecfuncs.h +++ /dev/null @@ -1,78 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: vecfuncs.h (Formerly vecfuncs.h) - * Description: Vector calculations - * Author: Mark Seaman, OCR Technology - * Created: Wed Dec 20 09:37:18 1989 - * Modified: Tue Jul 9 17:44:37 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1989, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -#ifndef VECFUNCS_H -#define VECFUNCS_H - -#include - -struct EDGEPT; - -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ -/********************************************************************** - * point_diff - * - * Return the difference from point (p1) to point (p2). Put the value - * into point (p). - **********************************************************************/ - -#define point_diff(p,p1,p2) \ -((p).x = (p1).x - (p2).x, \ - (p).y = (p1).y - (p2).y) - -/********************************************************************** - * CROSS - * - * cross product - **********************************************************************/ - -#define CROSS(a,b) \ -((a).x * (b).y - (a).y * (b).x) - -/********************************************************************** - * SCALAR - * - * scalar vector product - **********************************************************************/ - -#define SCALAR(a,b) \ -((a).x * (b).x + (a).y * (b).y) - -/********************************************************************** - * LENGTH - * - * length of vector - **********************************************************************/ - -#define LENGTH(a) \ -((a).x * (a).x + (a).y * (a).y) - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -int direction(EDGEPT *point); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/werd.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/werd.cpp deleted file mode 100644 index 87002809..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/werd.cpp +++ /dev/null @@ -1,599 +0,0 @@ -/********************************************************************** - * File: werd.cpp (Formerly word.c) - * Description: Code for the WERD class. - * Author: Ray Smith - * Created: Tue Oct 08 14:32:12 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "helpers.h" -#include "linlsq.h" -#include "werd.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#define FIRST_COLOUR ScrollView::RED //< first rainbow colour -#define LAST_COLOUR ScrollView::AQUAMARINE //< last rainbow colour -#define CHILD_COLOUR ScrollView::BROWN //< colour of children - -const ERRCODE CANT_SCALE_EDGESTEPS = - "Attempted to scale an edgestep format word"; - -ELIST2IZE(WERD) - -/** - * WERD::WERD - * - * Constructor to build a WERD from a list of C_BLOBs. - * blob_list The C_BLOBs (in word order) are not copied; - * we take its elements and put them in our lists. - * blank_count blanks in front of the word - * text correct text, outlives this WERD - */ -WERD::WERD(C_BLOB_LIST *blob_list, uint8_t blank_count, const char *text) - : blanks(blank_count), - flags(0), - script_id_(0), - correct(text) { - C_BLOB_IT start_it = &cblobs; - C_BLOB_IT rej_cblob_it = &rej_cblobs; - C_OUTLINE_IT c_outline_it; - int16_t inverted_vote = 0; - int16_t non_inverted_vote = 0; - - // Move blob_list's elements into cblobs. - start_it.add_list_after(blob_list); - - /* - Set white on black flag for the WERD, moving any duff blobs onto the - rej_cblobs list. - First, walk the cblobs checking the inverse flag for each outline of each - cblob. If a cblob has inconsistent flag settings for its different - outlines, move the blob to the reject list. Otherwise, increment the - appropriate w-on-b or b-on-w vote for the word. - - Now set the inversion flag for the WERD by maximum vote. - - Walk the blobs again, moving any blob whose inversion flag does not agree - with the concencus onto the reject list. - */ - start_it.set_to_list(&cblobs); - if (start_it.empty()) - return; - for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) { - bool reject_blob = false; - bool blob_inverted; - - c_outline_it.set_to_list(start_it.data()->out_list()); - blob_inverted = c_outline_it.data()->flag(COUT_INVERSE); - for (c_outline_it.mark_cycle_pt(); - !c_outline_it.cycled_list() && !reject_blob; - c_outline_it.forward()) { - reject_blob = c_outline_it.data()->flag(COUT_INVERSE) != blob_inverted; - } - if (reject_blob) { - rej_cblob_it.add_after_then_move(start_it.extract()); - } else { - if (blob_inverted) - inverted_vote++; - else - non_inverted_vote++; - } - } - - flags.set_bit(W_INVERSE, (inverted_vote > non_inverted_vote)); - - start_it.set_to_list(&cblobs); - if (start_it.empty()) - return; - for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) { - c_outline_it.set_to_list(start_it.data()->out_list()); - if (c_outline_it.data()->flag(COUT_INVERSE) != flags.bit(W_INVERSE)) - rej_cblob_it.add_after_then_move(start_it.extract()); - } -} - - -/** - * WERD::WERD - * - * Constructor to build a WERD from a list of C_BLOBs. - * The C_BLOBs are not copied so the source list is emptied. - */ - -WERD::WERD(C_BLOB_LIST * blob_list, //< In word order - WERD * clone) //< Source of flags - : flags(clone->flags), - script_id_(clone->script_id_), - correct(clone->correct) { - C_BLOB_IT start_it = blob_list; // iterator - C_BLOB_IT end_it = blob_list; // another - - while (!end_it.at_last ()) - end_it.forward (); //move to last - (reinterpret_cast(&cblobs))->assign_to_sublist(&start_it, &end_it); - //move to our list - blanks = clone->blanks; - // fprintf(stderr,"Wrong constructor!!!!\n"); -} - -// Construct a WERD from a single_blob and clone the flags from this. -// W_BOL and W_EOL flags are set according to the given values. -WERD* WERD::ConstructFromSingleBlob(bool bol, bool eol, C_BLOB* blob) { - C_BLOB_LIST temp_blobs; - C_BLOB_IT temp_it(&temp_blobs); - temp_it.add_after_then_move(blob); - WERD* blob_word = new WERD(&temp_blobs, this); - blob_word->set_flag(W_BOL, bol); - blob_word->set_flag(W_EOL, eol); - return blob_word; -} - -/** - * WERD::bounding_box - * - * Return the bounding box of the WERD. - * This is quite a mess to compute! - * ORIGINALLY, REJECT CBLOBS WERE EXCLUDED, however, this led to bugs when the - * words on the row were re-sorted. The original words were built with reject - * blobs included. The FUZZY SPACE flags were set accordingly. If ALL the - * blobs in a word are rejected the BB for the word is nullptr, causing the sort - * to screw up, leading to the erroneous possibility of the first word in a - * row being marked as FUZZY space. - */ - -TBOX WERD::bounding_box() const { return restricted_bounding_box(true, true); } - -// Returns the bounding box including the desired combination of upper and -// lower noise/diacritic elements. -TBOX WERD::restricted_bounding_box(bool upper_dots, bool lower_dots) const { - TBOX box = true_bounding_box(); - int bottom = box.bottom(); - int top = box.top(); - // This is a read-only iteration of the rejected blobs. - C_BLOB_IT it(const_cast(&rej_cblobs)); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TBOX dot_box = it.data()->bounding_box(); - if ((upper_dots || dot_box.bottom() <= top) && - (lower_dots || dot_box.top() >= bottom)) { - box += dot_box; - } - } - return box; -} - -// Returns the bounding box of only the good blobs. -TBOX WERD::true_bounding_box() const { - TBOX box; // box being built - // This is a read-only iteration of the good blobs. - C_BLOB_IT it(const_cast(&cblobs)); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - box += it.data()->bounding_box(); - } - return box; -} - -/** - * WERD::move - * - * Reposition WERD by vector - * NOTE!! REJECT CBLOBS ARE NOT MOVED - */ - -void WERD::move(const ICOORD vec) { - C_BLOB_IT cblob_it(&cblobs); // cblob iterator - - for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) - cblob_it.data()->move(vec); -} - -/** - * WERD::join_on - * - * Join other word onto this one. Delete the old word. - */ - -void WERD::join_on(WERD* other) { - C_BLOB_IT blob_it(&cblobs); - C_BLOB_IT src_it(&other->cblobs); - C_BLOB_IT rej_cblob_it(&rej_cblobs); - C_BLOB_IT src_rej_it(&other->rej_cblobs); - - while (!src_it.empty()) { - blob_it.add_to_end(src_it.extract()); - src_it.forward(); - } - while (!src_rej_it.empty()) { - rej_cblob_it.add_to_end(src_rej_it.extract()); - src_rej_it.forward(); - } -} - - -/** - * WERD::copy_on - * - * Copy blobs from other word onto this one. - */ - -void WERD::copy_on(WERD* other) { - bool reversed = other->bounding_box().left() < bounding_box().left(); - C_BLOB_IT c_blob_it(&cblobs); - C_BLOB_LIST c_blobs; - - c_blobs.deep_copy(&other->cblobs, &C_BLOB::deep_copy); - if (reversed) { - c_blob_it.add_list_before(&c_blobs); - } else { - c_blob_it.move_to_last(); - c_blob_it.add_list_after(&c_blobs); - } - if (!other->rej_cblobs.empty()) { - C_BLOB_IT rej_c_blob_it(&rej_cblobs); - C_BLOB_LIST new_rej_c_blobs; - - new_rej_c_blobs.deep_copy(&other->rej_cblobs, &C_BLOB::deep_copy); - if (reversed) { - rej_c_blob_it.add_list_before(&new_rej_c_blobs); - } else { - rej_c_blob_it.move_to_last(); - rej_c_blob_it.add_list_after(&new_rej_c_blobs); - } - } -} - -/** - * WERD::print - * - * Display members - */ - -void WERD::print() { - tprintf("Blanks= %d\n", blanks); - bounding_box().print(); - tprintf("Flags = %d = 0%o\n", flags.val, flags.val); - tprintf(" W_SEGMENTED = %s\n", flags.bit(W_SEGMENTED) ? "TRUE" : "FALSE "); - tprintf(" W_ITALIC = %s\n", flags.bit(W_ITALIC) ? "TRUE" : "FALSE "); - tprintf(" W_BOL = %s\n", flags.bit(W_BOL) ? "TRUE" : "FALSE "); - tprintf(" W_EOL = %s\n", flags.bit(W_EOL) ? "TRUE" : "FALSE "); - tprintf(" W_NORMALIZED = %s\n", - flags.bit(W_NORMALIZED) ? "TRUE" : "FALSE "); - tprintf(" W_SCRIPT_HAS_XHEIGHT = %s\n", - flags.bit(W_SCRIPT_HAS_XHEIGHT) ? "TRUE" : "FALSE "); - tprintf(" W_SCRIPT_IS_LATIN = %s\n", - flags.bit(W_SCRIPT_IS_LATIN) ? "TRUE" : "FALSE "); - tprintf(" W_DONT_CHOP = %s\n", flags.bit(W_DONT_CHOP) ? "TRUE" : "FALSE "); - tprintf(" W_REP_CHAR = %s\n", flags.bit(W_REP_CHAR) ? "TRUE" : "FALSE "); - tprintf(" W_FUZZY_SP = %s\n", flags.bit(W_FUZZY_SP) ? "TRUE" : "FALSE "); - tprintf(" W_FUZZY_NON = %s\n", flags.bit(W_FUZZY_NON) ? "TRUE" : "FALSE "); - tprintf("Correct= %s\n", correct.string()); - tprintf("Rejected cblob count = %d\n", rej_cblobs.length()); - tprintf("Script = %d\n", script_id_); -} - - -/** - * WERD::plot - * - * Draw the WERD in the given colour. - */ - -#ifndef GRAPHICS_DISABLED -void WERD::plot(ScrollView *window, ScrollView::Color colour) { - C_BLOB_IT it = &cblobs; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - it.data()->plot(window, colour, colour); - } - plot_rej_blobs(window); -} - -// Get the next color in the (looping) rainbow. -ScrollView::Color WERD::NextColor(ScrollView::Color colour) { - ScrollView::Color next = static_cast(colour + 1); - if (next >= LAST_COLOUR || next < FIRST_COLOUR) - next = FIRST_COLOUR; - return next; -} - -/** - * WERD::plot - * - * Draw the WERD in rainbow colours in window. - */ - -void WERD::plot(ScrollView* window) { - ScrollView::Color colour = FIRST_COLOUR; - C_BLOB_IT it = &cblobs; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - it.data()->plot(window, colour, CHILD_COLOUR); - colour = NextColor(colour); - } - plot_rej_blobs(window); -} - - -/** - * WERD::plot_rej_blobs - * - * Draw the WERD rejected blobs in window - ALWAYS GREY - */ - - -void WERD::plot_rej_blobs(ScrollView *window) { - C_BLOB_IT it = &rej_cblobs; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - it.data()->plot(window, ScrollView::GREY, ScrollView::GREY); - } -} -#endif // GRAPHICS_DISABLED - - -/** - * WERD::shallow_copy() - * - * Make a shallow copy of a word - */ - -WERD *WERD::shallow_copy() { - WERD *new_word = new WERD; - - new_word->blanks = blanks; - new_word->flags = flags; - new_word->dummy = dummy; - new_word->correct = correct; - return new_word; -} - - -/** - * WERD::operator= - * - * Assign a word, DEEP copying the blob list - */ - -WERD & WERD::operator= (const WERD & source) { - this->ELIST2_LINK::operator= (source); - blanks = source.blanks; - flags = source.flags; - script_id_ = source.script_id_; - dummy = source.dummy; - correct = source.correct; - if (!cblobs.empty()) - cblobs.clear(); - cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy); - - if (!rej_cblobs.empty()) - rej_cblobs.clear(); - rej_cblobs.deep_copy(&source.rej_cblobs, &C_BLOB::deep_copy); - return *this; -} - - -/** - * word_comparator() - * - * word comparator used to sort a word list so that words are in increasing - * order of left edge. - */ - -int word_comparator(const void *word1p, const void *word2p) { - const WERD *word1 = *reinterpret_cast(word1p); - const WERD *word2 = *reinterpret_cast(word2p); - return word1->bounding_box().left() - word2->bounding_box().left(); -} - -/** - * WERD::ConstructWerdWithNewBlobs() - * - * This method returns a new werd constructed using the blobs in the input - * all_blobs list, which correspond to the blobs in this werd object. The - * blobs used to construct the new word are consumed and removed from the - * input all_blobs list. - * Returns nullptr if the word couldn't be constructed. - * Returns original blobs for which no matches were found in the output list - * orphan_blobs (appends). - */ - -WERD* WERD::ConstructWerdWithNewBlobs(C_BLOB_LIST* all_blobs, - C_BLOB_LIST* orphan_blobs) { - C_BLOB_LIST current_blob_list; - C_BLOB_IT werd_blobs_it(¤t_blob_list); - // Add the word's c_blobs. - werd_blobs_it.add_list_after(cblob_list()); - - // New blob list. These contain the blobs which will form the new word. - C_BLOB_LIST new_werd_blobs; - C_BLOB_IT new_blobs_it(&new_werd_blobs); - - // not_found_blobs contains the list of current word's blobs for which a - // corresponding blob wasn't found in the input all_blobs list. - C_BLOB_LIST not_found_blobs; - C_BLOB_IT not_found_it(¬_found_blobs); - not_found_it.move_to_last(); - - werd_blobs_it.move_to_first(); - for (werd_blobs_it.mark_cycle_pt(); !werd_blobs_it.cycled_list(); - werd_blobs_it.forward()) { - C_BLOB* werd_blob = werd_blobs_it.extract(); - TBOX werd_blob_box = werd_blob->bounding_box(); - bool found = false; - // Now find the corresponding blob for this blob in the all_blobs - // list. For now, follow the inefficient method of pairwise - // comparisons. Ideally, one can pre-bucket the blobs by row. - C_BLOB_IT all_blobs_it(all_blobs); - for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list(); - all_blobs_it.forward()) { - C_BLOB* a_blob = all_blobs_it.data(); - // Compute the overlap of the two blobs. If major, a_blob should - // be added to the new blobs list. - TBOX a_blob_box = a_blob->bounding_box(); - if (a_blob_box.null_box()) { - tprintf("Bounding box couldn't be ascertained\n"); - } - if (werd_blob_box.contains(a_blob_box) || - werd_blob_box.major_overlap(a_blob_box)) { - // Old blobs are from minimal splits, therefore are expected to be - // bigger. The new small blobs should cover a significant portion. - // This is it. - all_blobs_it.extract(); - new_blobs_it.add_after_then_move(a_blob); - found = true; - } - } - if (!found) { - not_found_it.add_after_then_move(werd_blob); - } else { - delete werd_blob; - } - } - // Iterate over all not found blobs. Some of them may be due to - // under-segmentation (which is OK, since the corresponding blob is already - // in the list in that case. - not_found_it.move_to_first(); - for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list(); - not_found_it.forward()) { - C_BLOB* not_found = not_found_it.data(); - TBOX not_found_box = not_found->bounding_box(); - C_BLOB_IT existing_blobs_it(new_blobs_it); - for (existing_blobs_it.mark_cycle_pt(); !existing_blobs_it.cycled_list(); - existing_blobs_it.forward()) { - C_BLOB* a_blob = existing_blobs_it.data(); - TBOX a_blob_box = a_blob->bounding_box(); - if ((not_found_box.major_overlap(a_blob_box) || - a_blob_box.major_overlap(not_found_box)) && - not_found_box.y_overlap_fraction(a_blob_box) > 0.8) { - // Already taken care of. - delete not_found_it.extract(); - break; - } - } - } - if (orphan_blobs) { - C_BLOB_IT orphan_blobs_it(orphan_blobs); - orphan_blobs_it.move_to_last(); - orphan_blobs_it.add_list_after(¬_found_blobs); - } - - // New blobs are ready. Create a new werd object with these. - WERD* new_werd = nullptr; - if (!new_werd_blobs.empty()) { - new_werd = new WERD(&new_werd_blobs, this); - } else { - // Add the blobs back to this word so that it can be reused. - C_BLOB_IT this_list_it(cblob_list()); - this_list_it.add_list_after(¬_found_blobs); - } - return new_werd; -} - -// Removes noise from the word by moving small outlines to the rej_cblobs -// list, based on the size_threshold. -void WERD::CleanNoise(float size_threshold) { - C_BLOB_IT blob_it(&cblobs); - C_BLOB_IT rej_it(&rej_cblobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - C_BLOB* blob = blob_it.data(); - C_OUTLINE_IT ol_it(blob->out_list()); - for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) { - C_OUTLINE* outline = ol_it.data(); - TBOX ol_box = outline->bounding_box(); - int ol_size = - ol_box.width() > ol_box.height() ? ol_box.width() : ol_box.height(); - if (ol_size < size_threshold) { - // This outline is too small. Move it to a separate blob in the - // reject blobs list. - C_BLOB* rej_blob = new C_BLOB(ol_it.extract()); - rej_it.add_after_then_move(rej_blob); - } - } - if (blob->out_list()->empty()) delete blob_it.extract(); - } -} - -// Extracts all the noise outlines and stuffs the pointers into the given -// vector of outlines. Afterwards, the outlines vector owns the pointers. -void WERD::GetNoiseOutlines(GenericVector* outlines) { - C_BLOB_IT rej_it(&rej_cblobs); - for (rej_it.mark_cycle_pt(); !rej_it.empty(); rej_it.forward()) { - C_BLOB* blob = rej_it.extract(); - C_OUTLINE_IT ol_it(blob->out_list()); - outlines->push_back(ol_it.extract()); - delete blob; - } -} - -// Adds the selected outlines to the indcated real blobs, and puts the rest -// back in rej_cblobs where they came from. Where the target_blobs entry is -// nullptr, a run of wanted outlines is put into a single new blob. -// Ownership of the outlines is transferred back to the word. (Hence -// GenericVector and not PointerVector.) -// Returns true if any new blob was added to the start of the word, which -// suggests that it might need joining to the word before it, and likewise -// sets make_next_word_fuzzy true if any new blob was added to the end. -bool WERD::AddSelectedOutlines(const GenericVector& wanted, - const GenericVector& target_blobs, - const GenericVector& outlines, - bool* make_next_word_fuzzy) { - bool outline_added_to_start = false; - if (make_next_word_fuzzy != nullptr) *make_next_word_fuzzy = false; - C_BLOB_IT rej_it(&rej_cblobs); - for (int i = 0; i < outlines.size(); ++i) { - C_OUTLINE* outline = outlines[i]; - if (outline == nullptr) continue; // Already used it. - if (wanted[i]) { - C_BLOB* target_blob = target_blobs[i]; - TBOX noise_box = outline->bounding_box(); - if (target_blob == nullptr) { - target_blob = new C_BLOB(outline); - // Need to find the insertion point. - C_BLOB_IT blob_it(&cblobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); - blob_it.forward()) { - C_BLOB* blob = blob_it.data(); - TBOX blob_box = blob->bounding_box(); - if (blob_box.left() > noise_box.left()) { - if (blob_it.at_first() && !flag(W_FUZZY_SP) && !flag(W_FUZZY_NON)) { - // We might want to join this word to its predecessor. - outline_added_to_start = true; - } - blob_it.add_before_stay_put(target_blob); - break; - } - } - if (blob_it.cycled_list()) { - blob_it.add_to_end(target_blob); - if (make_next_word_fuzzy != nullptr) *make_next_word_fuzzy = true; - } - // Add all consecutive wanted, but null-blob outlines to same blob. - C_OUTLINE_IT ol_it(target_blob->out_list()); - while (i + 1 < outlines.size() && wanted[i + 1] && - target_blobs[i + 1] == nullptr) { - ++i; - ol_it.add_to_end(outlines[i]); - } - } else { - // Insert outline into this blob. - C_OUTLINE_IT ol_it(target_blob->out_list()); - ol_it.add_to_end(outline); - } - } else { - // Put back on noise list. - rej_it.add_to_end(new C_BLOB(outline)); - } - } - return outline_added_to_start; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/werd.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/werd.h deleted file mode 100644 index b9e87e65..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccstruct/werd.h +++ /dev/null @@ -1,197 +0,0 @@ -/********************************************************************** - * File: werd.h - * Description: Code for the WERD class. - * Author: Ray Smith - * Created: Tue Oct 08 14:32:12 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef WERD_H -#define WERD_H - -#include "params.h" -#include "bits16.h" -#include "elst2.h" -#include "strngs.h" -#include "stepblob.h" - -enum WERD_FLAGS -{ - W_SEGMENTED, //< correctly segmented - W_ITALIC, //< italic text - W_BOLD, //< bold text - W_BOL, //< start of line - W_EOL, //< end of line - W_NORMALIZED, //< flags - W_SCRIPT_HAS_XHEIGHT, //< x-height concept makes sense. - W_SCRIPT_IS_LATIN, //< Special case latin for y. splitting. - W_DONT_CHOP, //< fixed pitch chopped - W_REP_CHAR, //< repeated character - W_FUZZY_SP, //< fuzzy space - W_FUZZY_NON, //< fuzzy nonspace - W_INVERSE //< white on black -}; - -enum DISPLAY_FLAGS -{ - /* Display flags bit number allocations */ - DF_BOX, //< Bounding box - DF_TEXT, //< Correct ascii - DF_POLYGONAL, //< Polyg approx - DF_EDGE_STEP, //< Edge steps - DF_BN_POLYGONAL, //< BL normalisd polyapx - DF_BLAMER //< Blamer information -}; - -class ROW; //forward decl - -class WERD : public ELIST2_LINK { - public: - WERD() = default; - // WERD constructed with: - // blob_list - blobs of the word (we take this list's contents) - // blanks - number of blanks before the word - // text - correct text (outlives WERD) - WERD(C_BLOB_LIST *blob_list, uint8_t blanks, const char *text); - - // WERD constructed from: - // blob_list - blobs in the word - // clone - werd to clone flags, etc from. - WERD(C_BLOB_LIST *blob_list, WERD *clone); - - // Construct a WERD from a single_blob and clone the flags from this. - // W_BOL and W_EOL flags are set according to the given values. - WERD* ConstructFromSingleBlob(bool bol, bool eol, C_BLOB* blob); - - ~WERD() = default; - - // assignment - WERD & operator= (const WERD &source); - - // This method returns a new werd constructed using the blobs in the input - // all_blobs list, which correspond to the blobs in this werd object. The - // blobs used to construct the new word are consumed and removed from the - // input all_blobs list. - // Returns nullptr if the word couldn't be constructed. - // Returns original blobs for which no matches were found in the output list - // orphan_blobs (appends). - WERD *ConstructWerdWithNewBlobs(C_BLOB_LIST *all_blobs, - C_BLOB_LIST *orphan_blobs); - - // Accessors for reject / DUFF blobs in various formats - C_BLOB_LIST *rej_cblob_list() { // compact format - return &rej_cblobs; - } - - // Accessors for good blobs in various formats. - C_BLOB_LIST *cblob_list() { // get compact blobs - return &cblobs; - } - - uint8_t space() { // access function - return blanks; - } - void set_blanks(uint8_t new_blanks) { - blanks = new_blanks; - } - int script_id() const { - return script_id_; - } - void set_script_id(int id) { - script_id_ = id; - } - - // Returns the (default) bounding box including all the dots. - TBOX bounding_box() const; // compute bounding box - // Returns the bounding box including the desired combination of upper and - // lower noise/diacritic elements. - TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const; - // Returns the bounding box of only the good blobs. - TBOX true_bounding_box() const; - - const char *text() const { return correct.string(); } - void set_text(const char *new_text) { correct = new_text; } - - bool flag(WERD_FLAGS mask) const { return flags.bit(mask); } - void set_flag(WERD_FLAGS mask, bool value) { flags.set_bit(mask, value); } - - bool display_flag(uint8_t flag) const { return disp_flags.bit(flag); } - void set_display_flag(uint8_t flag, bool value) { - disp_flags.set_bit(flag, value); - } - - WERD *shallow_copy(); // shallow copy word - - // reposition word by vector - void move(const ICOORD vec); - - // join other's blobs onto this werd, emptying out other. - void join_on(WERD* other); - - // copy other's blobs onto this word, leaving other intact. - void copy_on(WERD* other); - - // tprintf word metadata (but not blob innards) - void print(); - - #ifndef GRAPHICS_DISABLED - // plot word on window in a uniform colour - void plot(ScrollView *window, ScrollView::Color colour); - - // Get the next color in the (looping) rainbow. - static ScrollView::Color NextColor(ScrollView::Color colour); - - // plot word on window in a rainbow of colours - void plot(ScrollView *window); - - // plot rejected blobs in a rainbow of colours - void plot_rej_blobs(ScrollView *window); - #endif // GRAPHICS_DISABLED - - // Removes noise from the word by moving small outlines to the rej_cblobs - // list, based on the size_threshold. - void CleanNoise(float size_threshold); - - // Extracts all the noise outlines and stuffs the pointers into the given - // vector of outlines. Afterwards, the outlines vector owns the pointers. - void GetNoiseOutlines(GenericVector *outlines); - // Adds the selected outlines to the indcated real blobs, and puts the rest - // back in rej_cblobs where they came from. Where the target_blobs entry is - // nullptr, a run of wanted outlines is put into a single new blob. - // Ownership of the outlines is transferred back to the word. (Hence - // GenericVector and not PointerVector.) - // Returns true if any new blob was added to the start of the word, which - // suggests that it might need joining to the word before it, and likewise - // sets make_next_word_fuzzy true if any new blob was added to the end. - bool AddSelectedOutlines(const GenericVector &wanted, - const GenericVector &target_blobs, - const GenericVector &outlines, - bool *make_next_word_fuzzy); - - private: - uint8_t blanks; // no of blanks - uint8_t dummy; // padding - BITS16 flags; // flags about word - BITS16 disp_flags; // display flags - int16_t script_id_; // From unicharset. - STRING correct; // correct text - C_BLOB_LIST cblobs; // compacted blobs - C_BLOB_LIST rej_cblobs; // DUFF blobs -}; - -ELIST2IZEH (WERD) -#include "ocrrow.h" // placed here due to -// compare words by increasing order of left edge, suitable for qsort(3) -int word_comparator(const void *word1p, const void *word2p); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/Makefile.am b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/Makefile.am deleted file mode 100644 index 05d80b62..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/Makefile.am +++ /dev/null @@ -1,45 +0,0 @@ -SUBDIRS = -AM_CXXFLAGS = -AM_CPPFLAGS = - -if !NO_TESSDATA_PREFIX -AM_CXXFLAGS += -DTESSDATA_PREFIX=@datadir@ -endif - -if VISIBILITY -AM_CXXFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden -AM_CPPFLAGS += -DTESS_EXPORTS -endif - -pkginclude_HEADERS = \ - genericvector.h helpers.h host.h \ - ocrclass.h platform.h serialis.h strngs.h \ - tesscallback.h unichar.h - -noinst_HEADERS = \ - ambigs.h basedir.h bits16.h bitvector.h ccutil.h clst.h doubleptr.h elst2.h \ - elst.h errcode.h fileerr.h fileio.h \ - genericheap.h globaloc.h \ - indexmapbidi.h kdpair.h lsterr.h \ - object_cache.h params.h qrsequence.h sorthelper.h \ - scanutils.h tessdatamanager.h tprintf.h \ - unicharcompress.h unicharmap.h unicharset.h unicity_table.h unicodes.h \ - universalambigs.h - -noinst_LTLIBRARIES = libtesseract_ccutil.la - -libtesseract_ccutil_la_SOURCES = \ - ambigs.cpp basedir.cpp bitvector.cpp \ - ccutil.cpp clst.cpp \ - elst2.cpp elst.cpp errcode.cpp \ - fileio.cpp \ - globaloc.cpp indexmapbidi.cpp \ - mainblk.cpp \ - serialis.cpp strngs.cpp scanutils.cpp \ - tessdatamanager.cpp tprintf.cpp \ - unichar.cpp unicharcompress.cpp unicharmap.cpp unicharset.cpp unicodes.cpp \ - params.cpp universalambigs.cpp - -if T_WIN -AM_CPPFLAGS += -DWINDLLNAME=\"lib@GENERIC_LIBRARY_NAME@\" -endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/ambigs.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/ambigs.cpp deleted file mode 100644 index 402064ae..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/ambigs.cpp +++ /dev/null @@ -1,394 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: ambigs.cpp -// Description: Functions for dealing with ambiguities -// (training and recognition). -// Author: Daria Antonova -// Created: Mon Feb 5 11:26:43 PDT 2009 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "ambigs.h" - -#include -#include "helpers.h" -#include "universalambigs.h" - -#if defined(_WIN32) && !defined(__GNUC__) -#define strtok_r(str, delim, saveptr) strtok_s(str, delim, saveptr) -#endif /* _WIN32 && !__GNUC__ */ - -namespace tesseract { - -// Maximum line size: -// 10 for sizes of ambigs, tabs, abmig type and newline -// UNICHAR_LEN * (MAX_AMBIG_SIZE + 1) for each part of the ambig -const int kMaxAmbigStringSize = UNICHAR_LEN * (MAX_AMBIG_SIZE + 1); - -AmbigSpec::AmbigSpec() { - wrong_ngram[0] = INVALID_UNICHAR_ID; - correct_fragments[0] = INVALID_UNICHAR_ID; - correct_ngram_id = INVALID_UNICHAR_ID; - type = NOT_AMBIG; - wrong_ngram_size = 0; -} - -ELISTIZE(AmbigSpec) - -// Initializes the ambigs by adding a nullptr pointer to each table. -void UnicharAmbigs::InitUnicharAmbigs(const UNICHARSET& unicharset, - bool use_ambigs_for_adaption) { - for (int i = 0; i < unicharset.size(); ++i) { - replace_ambigs_.push_back(nullptr); - dang_ambigs_.push_back(nullptr); - one_to_one_definite_ambigs_.push_back(nullptr); - if (use_ambigs_for_adaption) { - ambigs_for_adaption_.push_back(nullptr); - reverse_ambigs_for_adaption_.push_back(nullptr); - } - } -} - -// Loads the universal ambigs that are useful for any language. -void UnicharAmbigs::LoadUniversal(const UNICHARSET& encoder_set, - UNICHARSET* unicharset) { - TFile file; - if (!file.Open(kUniversalAmbigsFile, ksizeofUniversalAmbigsFile)) return; - LoadUnicharAmbigs(encoder_set, &file, 0, false, unicharset); -} - -void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET& encoder_set, - TFile *ambig_file, - int debug_level, - bool use_ambigs_for_adaption, - UNICHARSET *unicharset) { - int i, j; - UnicharIdVector *adaption_ambigs_entry; - if (debug_level) tprintf("Reading ambiguities\n"); - - int test_ambig_part_size; - int replacement_ambig_part_size; - // The space for buffer is allocated on the heap to avoid - // GCC frame size warning. - const int kBufferSize = 10 + 2 * kMaxAmbigStringSize; - char *buffer = new char[kBufferSize]; - char replacement_string[kMaxAmbigStringSize]; - UNICHAR_ID test_unichar_ids[MAX_AMBIG_SIZE + 1]; - int line_num = 0; - int type = NOT_AMBIG; - - // Determine the version of the ambigs file. - int version = 0; - ASSERT_HOST(ambig_file->FGets(buffer, kBufferSize) != nullptr && - strlen(buffer) > 0); - if (*buffer == 'v') { - version = static_cast(strtol(buffer+1, nullptr, 10)); - ++line_num; - } else { - ambig_file->Rewind(); - } - while (ambig_file->FGets(buffer, kBufferSize) != nullptr) { - chomp_string(buffer); - if (debug_level > 2) tprintf("read line %s\n", buffer); - ++line_num; - if (!ParseAmbiguityLine(line_num, version, debug_level, encoder_set, - buffer, &test_ambig_part_size, test_unichar_ids, - &replacement_ambig_part_size, - replacement_string, &type)) continue; - // Construct AmbigSpec and add it to the appropriate AmbigSpec_LIST. - AmbigSpec *ambig_spec = new AmbigSpec(); - if (!InsertIntoTable((type == REPLACE_AMBIG) ? replace_ambigs_ - : dang_ambigs_, - test_ambig_part_size, test_unichar_ids, - replacement_ambig_part_size, replacement_string, type, - ambig_spec, unicharset)) - continue; - - // Update one_to_one_definite_ambigs_. - if (test_ambig_part_size == 1 && - replacement_ambig_part_size == 1 && type == DEFINITE_AMBIG) { - if (one_to_one_definite_ambigs_[test_unichar_ids[0]] == nullptr) { - one_to_one_definite_ambigs_[test_unichar_ids[0]] = new UnicharIdVector(); - } - one_to_one_definite_ambigs_[test_unichar_ids[0]]->push_back( - ambig_spec->correct_ngram_id); - } - // Update ambigs_for_adaption_. - if (use_ambigs_for_adaption) { - GenericVector encoding; - // Silently ignore invalid strings, as before, so it is safe to use a - // universal ambigs file. - if (unicharset->encode_string(replacement_string, true, &encoding, - nullptr, nullptr)) { - for (i = 0; i < test_ambig_part_size; ++i) { - if (ambigs_for_adaption_[test_unichar_ids[i]] == nullptr) { - ambigs_for_adaption_[test_unichar_ids[i]] = new UnicharIdVector(); - } - adaption_ambigs_entry = ambigs_for_adaption_[test_unichar_ids[i]]; - for (int r = 0; r < encoding.size(); ++r) { - UNICHAR_ID id_to_insert = encoding[r]; - ASSERT_HOST(id_to_insert != INVALID_UNICHAR_ID); - // Add the new unichar id to adaption_ambigs_entry (only if the - // vector does not already contain it) keeping it in sorted order. - for (j = 0; j < adaption_ambigs_entry->size() && - (*adaption_ambigs_entry)[j] > id_to_insert; ++j); - if (j < adaption_ambigs_entry->size()) { - if ((*adaption_ambigs_entry)[j] != id_to_insert) { - adaption_ambigs_entry->insert(id_to_insert, j); - } - } else { - adaption_ambigs_entry->push_back(id_to_insert); - } - } - } - } - } - } - delete[] buffer; - - // Fill in reverse_ambigs_for_adaption from ambigs_for_adaption vector. - if (use_ambigs_for_adaption) { - for (i = 0; i < ambigs_for_adaption_.size(); ++i) { - adaption_ambigs_entry = ambigs_for_adaption_[i]; - if (adaption_ambigs_entry == nullptr) continue; - for (j = 0; j < adaption_ambigs_entry->size(); ++j) { - UNICHAR_ID ambig_id = (*adaption_ambigs_entry)[j]; - if (reverse_ambigs_for_adaption_[ambig_id] == nullptr) { - reverse_ambigs_for_adaption_[ambig_id] = new UnicharIdVector(); - } - reverse_ambigs_for_adaption_[ambig_id]->push_back(i); - } - } - } - - // Print what was read from the input file. - if (debug_level > 1) { - for (int tbl = 0; tbl < 2; ++tbl) { - const UnicharAmbigsVector &print_table = - (tbl == 0) ? replace_ambigs_ : dang_ambigs_; - for (i = 0; i < print_table.size(); ++i) { - AmbigSpec_LIST *lst = print_table[i]; - if (lst == nullptr) continue; - if (!lst->empty()) { - tprintf("%s Ambiguities for %s:\n", - (tbl == 0) ? "Replaceable" : "Dangerous", - unicharset->debug_str(i).string()); - } - AmbigSpec_IT lst_it(lst); - for (lst_it.mark_cycle_pt(); !lst_it.cycled_list(); lst_it.forward()) { - AmbigSpec *ambig_spec = lst_it.data(); - tprintf("wrong_ngram:"); - UnicharIdArrayUtils::print(ambig_spec->wrong_ngram, *unicharset); - tprintf("correct_fragments:"); - UnicharIdArrayUtils::print(ambig_spec->correct_fragments, *unicharset); - } - } - } - if (use_ambigs_for_adaption) { - for (int vec_id = 0; vec_id < 2; ++vec_id) { - const GenericVector &vec = (vec_id == 0) ? - ambigs_for_adaption_ : reverse_ambigs_for_adaption_; - for (i = 0; i < vec.size(); ++i) { - adaption_ambigs_entry = vec[i]; - if (adaption_ambigs_entry != nullptr) { - tprintf("%sAmbigs for adaption for %s:\n", - (vec_id == 0) ? "" : "Reverse ", - unicharset->debug_str(i).string()); - for (j = 0; j < adaption_ambigs_entry->size(); ++j) { - tprintf("%s ", unicharset->debug_str( - (*adaption_ambigs_entry)[j]).string()); - } - tprintf("\n"); - } - } - } - } - } -} - -bool UnicharAmbigs::ParseAmbiguityLine( - int line_num, int version, int debug_level, const UNICHARSET &unicharset, - char *buffer, int *test_ambig_part_size, UNICHAR_ID *test_unichar_ids, - int *replacement_ambig_part_size, char *replacement_string, int *type) { - if (version > 1) { - // Simpler format is just wrong-string correct-string type\n. - STRING input(buffer); - GenericVector fields; - input.split(' ', &fields); - if (fields.size() != 3) { - if (debug_level) tprintf(kIllegalMsg, line_num); - return false; - } - // Encode wrong-string. - GenericVector unichars; - if (!unicharset.encode_string(fields[0].string(), true, &unichars, nullptr, - nullptr)) { - return false; - } - *test_ambig_part_size = unichars.size(); - if (*test_ambig_part_size > MAX_AMBIG_SIZE) { - if (debug_level) - tprintf("Too many unichars in ambiguity on line %d\n", line_num); - return false; - } - // Copy encoded string to output. - for (int i = 0; i < unichars.size(); ++i) - test_unichar_ids[i] = unichars[i]; - test_unichar_ids[unichars.size()] = INVALID_UNICHAR_ID; - // Encode replacement-string to check validity. - if (!unicharset.encode_string(fields[1].string(), true, &unichars, nullptr, - nullptr)) { - return false; - } - *replacement_ambig_part_size = unichars.size(); - if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) { - if (debug_level) - tprintf("Too many unichars in ambiguity on line %d\n", line_num); - return false; - } - if (sscanf(fields[2].string(), "%d", type) != 1) { - if (debug_level) tprintf(kIllegalMsg, line_num); - return false; - } - snprintf(replacement_string, kMaxAmbigStringSize, "%s", fields[1].string()); - return true; - } - int i; - char *token; - char *next_token; - if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) || - !sscanf(token, "%d", test_ambig_part_size) || - *test_ambig_part_size <= 0) { - if (debug_level) tprintf(kIllegalMsg, line_num); - return false; - } - if (*test_ambig_part_size > MAX_AMBIG_SIZE) { - if (debug_level) - tprintf("Too many unichars in ambiguity on line %d\n", line_num); - return false; - } - for (i = 0; i < *test_ambig_part_size; ++i) { - if (!(token = strtok_r(nullptr, kAmbigDelimiters, &next_token))) break; - if (!unicharset.contains_unichar(token)) { - if (debug_level) tprintf(kIllegalUnicharMsg, token); - break; - } - test_unichar_ids[i] = unicharset.unichar_to_id(token); - } - test_unichar_ids[i] = INVALID_UNICHAR_ID; - - if (i != *test_ambig_part_size || - !(token = strtok_r(nullptr, kAmbigDelimiters, &next_token)) || - !sscanf(token, "%d", replacement_ambig_part_size) || - *replacement_ambig_part_size <= 0) { - if (debug_level) tprintf(kIllegalMsg, line_num); - return false; - } - if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) { - if (debug_level) - tprintf("Too many unichars in ambiguity on line %d\n", line_num); - return false; - } - replacement_string[0] = '\0'; - for (i = 0; i < *replacement_ambig_part_size; ++i) { - if (!(token = strtok_r(nullptr, kAmbigDelimiters, &next_token))) break; - strcat(replacement_string, token); - if (!unicharset.contains_unichar(token)) { - if (debug_level) tprintf(kIllegalUnicharMsg, token); - break; - } - } - if (i != *replacement_ambig_part_size) { - if (debug_level) tprintf(kIllegalMsg, line_num); - return false; - } - if (version > 0) { - // The next field being true indicates that the abiguity should - // always be substituted (e.g. '' should always be changed to "). - // For such "certain" n -> m ambigs tesseract will insert character - // fragments for the n pieces in the unicharset. AmbigsFound() - // will then replace the incorrect ngram with the character - // fragments of the correct character (or ngram if m > 1). - // Note that if m > 1, an ngram will be inserted into the - // modified word, not the individual unigrams. Tesseract - // has limited support for ngram unichar (e.g. dawg permuter). - if (!(token = strtok_r(nullptr, kAmbigDelimiters, &next_token)) || - !sscanf(token, "%d", type)) { - if (debug_level) tprintf(kIllegalMsg, line_num); - return false; - } - } - return true; -} - -bool UnicharAmbigs::InsertIntoTable( - UnicharAmbigsVector &table, int test_ambig_part_size, - UNICHAR_ID *test_unichar_ids, int replacement_ambig_part_size, - const char *replacement_string, int type, - AmbigSpec *ambig_spec, UNICHARSET *unicharset) { - ambig_spec->type = static_cast(type); - if (test_ambig_part_size == 1 && replacement_ambig_part_size == 1 && - unicharset->to_lower(test_unichar_ids[0]) == - unicharset->to_lower(unicharset->unichar_to_id(replacement_string))) { - ambig_spec->type = CASE_AMBIG; - } - - ambig_spec->wrong_ngram_size = - UnicharIdArrayUtils::copy(test_unichar_ids, ambig_spec->wrong_ngram); - - // Since we need to maintain a constant number of unichar positions in - // order to construct ambig_blob_choices vector in NoDangerousAmbig(), for - // each n->m ambiguity we will have to place n character fragments of the - // correct ngram into the corresponding positions in the vector (e.g. given - // "vvvvw" and vvvv->ww we will place v and |ww|0|4 into position 0, v and - // |ww|1|4 into position 1 and so on. The correct ngram is reconstructed - // from fragments by dawg_permute_and_select(). - - // Insert the corresponding correct ngram into the unicharset. - // Unicharset code assumes that the "base" ngram is inserted into - // the unicharset before fragments of this ngram are inserted. - unicharset->unichar_insert(replacement_string, OldUncleanUnichars::kTrue); - ambig_spec->correct_ngram_id = - unicharset->unichar_to_id(replacement_string); - if (replacement_ambig_part_size > 1) { - unicharset->set_isngram(ambig_spec->correct_ngram_id, true); - } - // Add the corresponding fragments of the wrong ngram to unicharset. - int i; - for (i = 0; i < test_ambig_part_size; ++i) { - UNICHAR_ID unichar_id; - if (test_ambig_part_size == 1) { - unichar_id = ambig_spec->correct_ngram_id; - } else { - STRING frag_str = CHAR_FRAGMENT::to_string( - replacement_string, i, test_ambig_part_size, false); - unicharset->unichar_insert(frag_str.string(), OldUncleanUnichars::kTrue); - unichar_id = unicharset->unichar_to_id(frag_str.string()); - } - ambig_spec->correct_fragments[i] = unichar_id; - } - ambig_spec->correct_fragments[i] = INVALID_UNICHAR_ID; - - // Add AmbigSpec for this ambiguity to the corresponding AmbigSpec_LIST. - // Keep AmbigSpec_LISTs sorted by AmbigSpec.wrong_ngram. - if (table[test_unichar_ids[0]] == nullptr) { - table[test_unichar_ids[0]] = new AmbigSpec_LIST(); - } - if (table[test_unichar_ids[0]]->add_sorted( - AmbigSpec::compare_ambig_specs, true, ambig_spec)) - return true; - delete ambig_spec; - return false; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/ambigs.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/ambigs.h deleted file mode 100644 index cedf4cfc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/ambigs.h +++ /dev/null @@ -1,227 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: ambigs.h -// Description: Constants, flags, functions for dealing with -// ambiguities (training and recognition). -// Author: Daria Antonova -// Created: Mon Aug 23 11:26:43 PDT 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_AMBIGS_H_ -#define TESSERACT_CCUTIL_AMBIGS_H_ - -#include "elst.h" -#include "tprintf.h" -#include "unichar.h" -#include "unicharset.h" -#include "genericvector.h" - -#define MAX_AMBIG_SIZE 10 - -namespace tesseract { - -using UnicharIdVector = GenericVector; - -static const int kUnigramAmbigsBufferSize = 1000; -static const char kAmbigNgramSeparator[] = { ' ', '\0' }; -static const char kAmbigDelimiters[] = "\t "; -static const char kIllegalMsg[] = - "Illegal ambiguity specification on line %d\n"; -static const char kIllegalUnicharMsg[] = - "Illegal unichar %s in ambiguity specification\n"; - -enum AmbigType { - NOT_AMBIG, // the ngram pair is not ambiguous - REPLACE_AMBIG, // ocred ngram should always be substituted with correct - DEFINITE_AMBIG, // add correct ngram to the classifier results (1-1) - SIMILAR_AMBIG, // use pairwise classifier for ocred/correct pair (1-1) - CASE_AMBIG, // this is a case ambiguity (1-1) - - AMBIG_TYPE_COUNT // number of enum entries -}; - -// A collection of utility functions for arrays of UNICHAR_IDs that are -// terminated by INVALID_UNICHAR_ID. -class UnicharIdArrayUtils { - public: - // Compares two arrays of unichar ids. Returns -1 if the length of array1 is - // less than length of array2, if any array1[i] is less than array2[i]. - // Returns 0 if the arrays are equal, 1 otherwise. - // The function assumes that the arrays are terminated by INVALID_UNICHAR_ID. - static inline int compare(const UNICHAR_ID *ptr1, const UNICHAR_ID *ptr2) { - for (;;) { - const UNICHAR_ID val1 = *ptr1++; - const UNICHAR_ID val2 = *ptr2++; - if (val1 != val2) { - if (val1 == INVALID_UNICHAR_ID) return -1; - if (val2 == INVALID_UNICHAR_ID) return 1; - if (val1 < val2) return -1; - return 1; - } - if (val1 == INVALID_UNICHAR_ID) return 0; - } - } - - // Look uid in the vector of uids. If found, the index of the matched - // element is returned. Otherwise, it returns -1. - static inline int find_in(const UnicharIdVector& uid_vec, - const UNICHAR_ID uid) { - for (int i = 0; i < uid_vec.size(); ++i) - if (uid_vec[i] == uid) return i; - return -1; - } - - // Copies UNICHAR_IDs from dst to src. Returns the number of ids copied. - // The function assumes that the arrays are terminated by INVALID_UNICHAR_ID - // and that dst has enough space for all the elements from src. - static inline int copy(const UNICHAR_ID src[], UNICHAR_ID dst[]) { - int i = 0; - do { - dst[i] = src[i]; - } while (dst[i++] != INVALID_UNICHAR_ID); - return i - 1; - } - - // Prints unichars corresponding to the unichar_ids in the given array. - // The function assumes that array is terminated by INVALID_UNICHAR_ID. - static inline void print(const UNICHAR_ID array[], - const UNICHARSET &unicharset) { - const UNICHAR_ID *ptr = array; - if (*ptr == INVALID_UNICHAR_ID) tprintf("[Empty]"); - while (*ptr != INVALID_UNICHAR_ID) { - tprintf("%s ", unicharset.id_to_unichar(*ptr++)); - } - tprintf("( "); - ptr = array; - while (*ptr != INVALID_UNICHAR_ID) tprintf("%d ", *ptr++); - tprintf(")\n"); - } -}; - -// AMBIG_SPEC_LIST stores a list of dangerous ambigs that -// start with the same unichar (e.g. r->t rn->m rr1->m). -class AmbigSpec : public ELIST_LINK { - public: - AmbigSpec(); - ~AmbigSpec() = default; - - // Comparator function for sorting AmbigSpec_LISTs. The lists will - // be sorted by their wrong_ngram arrays. Example of wrong_ngram vectors - // in a a sorted AmbigSpec_LIST: [9 1 3], [9 3 4], [9 8], [9, 8 1]. - static int compare_ambig_specs(const void *spec1, const void *spec2) { - const AmbigSpec *s1 = *static_cast(spec1); - const AmbigSpec *s2 = *static_cast(spec2); - int result = UnicharIdArrayUtils::compare(s1->wrong_ngram, s2->wrong_ngram); - if (result != 0) return result; - return UnicharIdArrayUtils::compare(s1->correct_fragments, - s2->correct_fragments); - } - - UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE + 1]; - UNICHAR_ID correct_fragments[MAX_AMBIG_SIZE + 1]; - UNICHAR_ID correct_ngram_id; - AmbigType type; - int wrong_ngram_size; -}; -ELISTIZEH(AmbigSpec) - -// AMBIG_TABLE[i] stores a set of ambiguities whose -// wrong ngram starts with unichar id i. -using UnicharAmbigsVector = GenericVector; - -class UnicharAmbigs { - public: - UnicharAmbigs() = default; - ~UnicharAmbigs() { - replace_ambigs_.delete_data_pointers(); - dang_ambigs_.delete_data_pointers(); - one_to_one_definite_ambigs_.delete_data_pointers(); - } - - const UnicharAmbigsVector &dang_ambigs() const { return dang_ambigs_; } - const UnicharAmbigsVector &replace_ambigs() const { return replace_ambigs_; } - - // Initializes the ambigs by adding a nullptr pointer to each table. - void InitUnicharAmbigs(const UNICHARSET& unicharset, - bool use_ambigs_for_adaption); - - // Loads the universal ambigs that are useful for any language. - void LoadUniversal(const UNICHARSET& encoder_set, UNICHARSET* unicharset); - - // Fills in two ambiguity tables (replaceable and dangerous) with information - // read from the ambigs file. An ambiguity table is an array of lists. - // The array is indexed by a class id. Each entry in the table provides - // a list of potential ambiguities which can start with the corresponding - // character. For example the ambiguity "rn -> m", would be located in the - // table at index of unicharset.unichar_to_id('r'). - // In 1-1 ambiguities (e.g. s -> S, 1 -> I) are recorded in - // one_to_one_definite_ambigs_. This vector is also indexed by the class id - // of the wrong part of the ambiguity and each entry contains a vector of - // unichar ids that are ambiguous to it. - // encoder_set is used to encode the ambiguity strings, undisturbed by new - // unichar_ids that may be created by adding the ambigs. - void LoadUnicharAmbigs(const UNICHARSET& encoder_set, - TFile *ambigs_file, int debug_level, - bool use_ambigs_for_adaption, UNICHARSET *unicharset); - - // Returns definite 1-1 ambigs for the given unichar id. - inline const UnicharIdVector *OneToOneDefiniteAmbigs( - UNICHAR_ID unichar_id) const { - if (one_to_one_definite_ambigs_.empty()) return nullptr; - return one_to_one_definite_ambigs_[unichar_id]; - } - - // Returns a pointer to the vector with all unichar ids that appear in the - // 'correct' part of the ambiguity pair when the given unichar id appears - // in the 'wrong' part of the ambiguity. E.g. if DangAmbigs file consist of - // m->rn,rn->m,m->iii, UnicharAmbigsForAdaption() called with unichar id of - // m will return a pointer to a vector with unichar ids of r,n,i. - inline const UnicharIdVector *AmbigsForAdaption( - UNICHAR_ID unichar_id) const { - if (ambigs_for_adaption_.empty()) return nullptr; - return ambigs_for_adaption_[unichar_id]; - } - - // Similar to the above, but return the vector of unichar ids for which - // the given unichar_id is an ambiguity (appears in the 'wrong' part of - // some ambiguity pair). - inline const UnicharIdVector *ReverseAmbigsForAdaption( - UNICHAR_ID unichar_id) const { - if (reverse_ambigs_for_adaption_.empty()) return nullptr; - return reverse_ambigs_for_adaption_[unichar_id]; - } - - private: - bool ParseAmbiguityLine(int line_num, int version, int debug_level, - const UNICHARSET &unicharset, char *buffer, - int *test_ambig_part_size, - UNICHAR_ID *test_unichar_ids, - int *replacement_ambig_part_size, - char *replacement_string, int *type); - bool InsertIntoTable(UnicharAmbigsVector &table, - int test_ambig_part_size, UNICHAR_ID *test_unichar_ids, - int replacement_ambig_part_size, - const char *replacement_string, int type, - AmbigSpec *ambig_spec, UNICHARSET *unicharset); - - UnicharAmbigsVector dang_ambigs_; - UnicharAmbigsVector replace_ambigs_; - GenericVector one_to_one_definite_ambigs_; - GenericVector ambigs_for_adaption_; - GenericVector reverse_ambigs_for_adaption_; -}; - -} // namespace tesseract - -#endif // TESSERACT_CCUTIL_AMBIGS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/basedir.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/basedir.cpp deleted file mode 100644 index ad2fd70e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/basedir.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/********************************************************************** - * File: basedir.cpp (Formerly getpath.c) - * Description: Find the directory location of the current executable using - *PATH. Author: Ray Smith Created: Mon Jul 09 09:06:39 BST 1990 - * - * (C) Copyright 1990, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "basedir.h" - -#include - -// Assuming that code_path is the name of some file in a desired directory, -// returns the given code_path stripped back to the last slash, leaving -// the last slash in place. If there is no slash, returns ./ assuming that -// the input was the name of something in the current directory. -// Useful for getting to the directory of argv[0], but does not search -// any paths. -TESS_API void truncate_path(const char *code_path, STRING* trunc_path) { - int trunc_index = -1; - if (code_path != nullptr) { - const char* last_slash = strrchr(code_path, '/'); - if (last_slash != nullptr && last_slash + 1 - code_path > trunc_index) - trunc_index = last_slash + 1 - code_path; - last_slash = strrchr(code_path, '\\'); - if (last_slash != nullptr && last_slash + 1 - code_path > trunc_index) - trunc_index = last_slash + 1 - code_path; - } - *trunc_path = code_path; - if (trunc_index >= 0) - trunc_path->truncate_at(trunc_index); - else - *trunc_path = "./"; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/basedir.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/basedir.h deleted file mode 100644 index 60d56ba6..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/basedir.h +++ /dev/null @@ -1,31 +0,0 @@ -/********************************************************************** - * File: basedir.h (Formerly getpath.h) - * Description: Header file for getpath.c. Provides relocatability of data. - * Author: Ray Smith - * Created: Mon Jul 09 09:13:03 BST 1990 - * - * (C) Copyright 1990, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef BASEDIR_H -#define BASEDIR_H - -#include "platform.h" -#include "strngs.h" - -// Returns the given code_path truncated to the last slash. -// Useful for getting to the directory of argv[0], but does not search -// any paths. -TESS_API void truncate_path(const char *code_path, STRING* trunc_path); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/bits16.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/bits16.h deleted file mode 100644 index 9c0d0a77..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/bits16.h +++ /dev/null @@ -1,62 +0,0 @@ -/********************************************************************** - * File: bits16.h (Formerly bits8.h) - * Description: Code for 8 bit field class. - * Author: Phil Cheatle - * Created: Thu Oct 17 10:10:05 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef BITS16_H -#define BITS16_H - -#include "host.h" - -class DLLSYM BITS16 -{ - public: - uint16_t val; - - BITS16() { - val = 0; - } // constructor - - BITS16(uint16_t init) { - val = init; - } - - void turn_on_bit( // flip specified bit - uint8_t bit_num) { // bit to flip 0..7 - val = val | 01 << bit_num; - } - - void turn_off_bit( // flip specified bit - uint8_t bit_num) { // bit to flip 0..7 - val = val & ~(01 << bit_num); - } - - void set_bit( // flip specified bit - uint8_t bit_num, // bit to flip 0..7 - bool value) { // value to flip to - if (value) - val = val | 01 << bit_num; - else - val = val & ~(01 << bit_num); - } - - bool bit( // access bit - uint8_t bit_num) const { // bit to access - return (val >> bit_num) & 01; - } -}; -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/bitvector.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/bitvector.cpp deleted file mode 100644 index 71df6d8d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/bitvector.cpp +++ /dev/null @@ -1,265 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: bitvector.cpp -// Description: Class replacement for BITVECTOR. -// Author: Ray Smith -// Created: Mon Jan 10 17:45:01 PST 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "bitvector.h" -#include -#include -#include "helpers.h" -#include "serialis.h" // for tesseract::Serialize - -namespace tesseract { - -// Fast lookup table to get the first least significant set bit in a byte. -// For zero, the table has 255, but since it is a special case, most code -// that uses this table will check for zero before looking up lsb_index_. -const uint8_t BitVector::lsb_index_[256] = { - 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 -}; - -// Fast lookup table to get the residual bits after zeroing the first (lowest) -// set bit in a byte. -const uint8_t BitVector::lsb_eroded_[256] = { - 0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, - 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e, - 0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, - 0x10, 0x18, 0x18, 0x1a, 0x18, 0x1c, 0x1c, 0x1e, - 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, - 0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, - 0x20, 0x30, 0x30, 0x32, 0x30, 0x34, 0x34, 0x36, - 0x30, 0x38, 0x38, 0x3a, 0x38, 0x3c, 0x3c, 0x3e, - 0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, - 0x40, 0x48, 0x48, 0x4a, 0x48, 0x4c, 0x4c, 0x4e, - 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, - 0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, - 0x40, 0x60, 0x60, 0x62, 0x60, 0x64, 0x64, 0x66, - 0x60, 0x68, 0x68, 0x6a, 0x68, 0x6c, 0x6c, 0x6e, - 0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, - 0x70, 0x78, 0x78, 0x7a, 0x78, 0x7c, 0x7c, 0x7e, - 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, - 0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, - 0x80, 0x90, 0x90, 0x92, 0x90, 0x94, 0x94, 0x96, - 0x90, 0x98, 0x98, 0x9a, 0x98, 0x9c, 0x9c, 0x9e, - 0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, - 0xa0, 0xa8, 0xa8, 0xaa, 0xa8, 0xac, 0xac, 0xae, - 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, - 0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, - 0x80, 0xc0, 0xc0, 0xc2, 0xc0, 0xc4, 0xc4, 0xc6, - 0xc0, 0xc8, 0xc8, 0xca, 0xc8, 0xcc, 0xcc, 0xce, - 0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, - 0xd0, 0xd8, 0xd8, 0xda, 0xd8, 0xdc, 0xdc, 0xde, - 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, - 0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, - 0xe0, 0xf0, 0xf0, 0xf2, 0xf0, 0xf4, 0xf4, 0xf6, - 0xf0, 0xf8, 0xf8, 0xfa, 0xf8, 0xfc, 0xfc, 0xfe -}; - -// Fast lookup table to give the number of set bits in a byte. -const int BitVector::hamming_table_[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 -}; - - -BitVector::BitVector() : bit_size_(0), array_(nullptr) {} - -BitVector::BitVector(int length) : bit_size_(length) { - array_ = new uint32_t[WordLength()]; - SetAllFalse(); -} - -BitVector::BitVector(const BitVector& src) : bit_size_(src.bit_size_) { - array_ = new uint32_t[WordLength()]; - memcpy(array_, src.array_, ByteLength()); -} - -BitVector& BitVector::operator=(const BitVector& src) { - Alloc(src.bit_size_); - memcpy(array_, src.array_, ByteLength()); - return *this; -} - -BitVector::~BitVector() { - delete [] array_; -} - -// Initializes the array to length * false. -void BitVector::Init(int length) { - Alloc(length); - SetAllFalse(); -} - -// Writes to the given file. Returns false in case of error. -bool BitVector::Serialize(FILE* fp) const { - if (!tesseract::Serialize(fp, &bit_size_)) return false; - int wordlen = WordLength(); - return tesseract::Serialize(fp, &array_[0], wordlen); -} - -// Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool BitVector::DeSerialize(bool swap, FILE* fp) { - uint32_t new_bit_size; - if (!tesseract::DeSerialize(fp, &new_bit_size)) return false; - if (swap) { - ReverseN(&new_bit_size, sizeof(new_bit_size)); - } - Alloc(new_bit_size); - int wordlen = WordLength(); - if (!tesseract::DeSerialize(fp, &array_[0], wordlen)) return false; - if (swap) { - for (int i = 0; i < wordlen; ++i) - ReverseN(&array_[i], sizeof(array_[i])); - } - return true; -} - -void BitVector::SetAllFalse() { - memset(array_, 0, ByteLength()); -} -void BitVector::SetAllTrue() { - memset(array_, ~0, ByteLength()); -} - -// Returns the index of the next set bit after the given index. -// Useful for quickly iterating through the set bits in a sparse vector. -int BitVector::NextSetBit(int prev_bit) const { - // Move on to the next bit. - int next_bit = prev_bit + 1; - if (next_bit >= bit_size_) return -1; - // Check the remains of the word containing the next_bit first. - int next_word = WordIndex(next_bit); - int bit_index = next_word * kBitFactor; - int word_end = bit_index + kBitFactor; - uint32_t word = array_[next_word]; - uint8_t byte = word & 0xff; - while (bit_index < word_end) { - if (bit_index + 8 > next_bit && byte != 0) { - while (bit_index + lsb_index_[byte] < next_bit && byte != 0) - byte = lsb_eroded_[byte]; - if (byte != 0) - return bit_index + lsb_index_[byte]; - } - word >>= 8; - bit_index += 8; - byte = word & 0xff; - } - // next_word didn't contain a 1, so find the next word with set bit. - ++next_word; - int wordlen = WordLength(); - while (next_word < wordlen && (word = array_[next_word]) == 0) { - ++next_word; - bit_index += kBitFactor; - } - if (bit_index >= bit_size_) return -1; - // Find the first non-zero byte within the word. - while ((word & 0xff) == 0) { - word >>= 8; - bit_index += 8; - } - return bit_index + lsb_index_[word & 0xff]; -} - -// Returns the number of set bits in the vector. -int BitVector::NumSetBits() const { - int wordlen = WordLength(); - int total_bits = 0; - for (int w = 0; w < wordlen; ++w) { - uint32_t word = array_[w]; - for (int i = 0; i < 4; ++i) { - total_bits += hamming_table_[word & 0xff]; - word >>= 8; - } - } - return total_bits; -} - -// Logical in-place operations on whole bit vectors. Tries to do something -// sensible if they aren't the same size, but they should be really. -void BitVector::operator|=(const BitVector& other) { - int length = std::min(WordLength(), other.WordLength()); - for (int w = 0; w < length; ++w) - array_[w] |= other.array_[w]; -} -void BitVector::operator&=(const BitVector& other) { - int length = std::min(WordLength(), other.WordLength()); - for (int w = 0; w < length; ++w) - array_[w] &= other.array_[w]; - for (int w = WordLength() - 1; w >= length; --w) - array_[w] = 0; -} -void BitVector::operator^=(const BitVector& other) { - int length = std::min(WordLength(), other.WordLength()); - for (int w = 0; w < length; ++w) - array_[w] ^= other.array_[w]; -} -// Set subtraction *this = v1 - v2. -void BitVector::SetSubtract(const BitVector& v1, const BitVector& v2) { - Alloc(v1.size()); - int length = std::min(v1.WordLength(), v2.WordLength()); - for (int w = 0; w < length; ++w) - array_[w] = v1.array_[w] ^ (v1.array_[w] & v2.array_[w]); - for (int w = WordLength() - 1; w >= length; --w) - array_[w] = v1.array_[w]; -} - -// Allocates memory for a vector of the given length. -// Reallocates if the array is a different size, larger or smaller. -void BitVector::Alloc(int length) { - int initial_wordlength = WordLength(); - bit_size_ = length; - int new_wordlength = WordLength(); - if (new_wordlength != initial_wordlength) { - delete [] array_; - array_ = new uint32_t[new_wordlength]; - } -} - - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/bitvector.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/bitvector.h deleted file mode 100644 index 9f105145..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/bitvector.h +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: bitvector.h -// Description: Class replacement for BITVECTOR. -// Author: Ray Smith -// Created: Mon Jan 10 17:44:01 PST 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_BITVECTOR_H_ -#define TESSERACT_CCUTIL_BITVECTOR_H_ - -#include -#include -#include "host.h" - -namespace tesseract { - -// Trivial class to encapsulate a fixed-length array of bits, with -// Serialize/DeSerialize. Replaces the old macros. -class BitVector { - public: - // Fast lookup table to get the first least significant set bit in a byte. - // For zero, the table has 255, but since it is a special case, most code - // that uses this table will check for zero before looking up lsb_index_. - static const uint8_t lsb_index_[256]; - // Fast lookup table to get the residual bits after zeroing the least - // significant set bit in a byte. - static const uint8_t lsb_eroded_[256]; - // Fast lookup table to give the number of set bits in a byte. - static const int hamming_table_[256]; - - BitVector(); - // Initializes the array to length * false. - explicit BitVector(int length); - BitVector(const BitVector& src); - BitVector& operator=(const BitVector& src); - ~BitVector(); - - // Initializes the array to length * false. - void Init(int length); - - // Returns the number of bits that are accessible in the vector. - int size() const { - return bit_size_; - } - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, FILE* fp); - - void SetAllFalse(); - void SetAllTrue(); - - // Accessors to set/reset/get bits. - // The range of index is [0, size()-1]. - // There is debug-only bounds checking. - void SetBit(int index) { - array_[WordIndex(index)] |= BitMask(index); - } - void ResetBit(int index) { - array_[WordIndex(index)] &= ~BitMask(index); - } - void SetValue(int index, bool value) { - if (value) - SetBit(index); - else - ResetBit(index); - } - bool At(int index) const { - return (array_[WordIndex(index)] & BitMask(index)) != 0; - } - bool operator[](int index) const { - return (array_[WordIndex(index)] & BitMask(index)) != 0; - } - - // Returns the index of the next set bit after the given index. - // Useful for quickly iterating through the set bits in a sparse vector. - int NextSetBit(int prev_bit) const; - - // Returns the number of set bits in the vector. - int NumSetBits() const; - - // Logical in-place operations on whole bit vectors. Tries to do something - // sensible if they aren't the same size, but they should be really. - void operator|=(const BitVector& other); - void operator&=(const BitVector& other); - void operator^=(const BitVector& other); - // Set subtraction *this = v1 - v2. - void SetSubtract(const BitVector& v1, const BitVector& v2); - - private: - // Allocates memory for a vector of the given length. - void Alloc(int length); - - // Computes the index to array_ for the given index, with debug range - // checking. - int WordIndex(int index) const { - assert(0 <= index && index < bit_size_); - return index / kBitFactor; - } - // Returns a mask to select the appropriate bit for the given index. - uint32_t BitMask(int index) const { - return 1 << (index & (kBitFactor - 1)); - } - // Returns the number of array elements needed to represent the current - // bit_size_. - int WordLength() const { - return (bit_size_ + kBitFactor - 1) / kBitFactor; - } - // Returns the number of bytes consumed by the array_. - int ByteLength() const { - return WordLength() * sizeof(*array_); - } - - // Number of bits in this BitVector. - int32_t bit_size_; - // Array of words used to pack the bits. - // Bits are stored little-endian by uint32_t word, ie by word first and then - // starting with the least significant bit in each word. - uint32_t* array_; - // Number of bits in an array_ element. - static const int kBitFactor = sizeof(uint32_t) * 8; -}; - -} // namespace tesseract. - -#endif // TESSERACT_CCUTIL_BITVECTOR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/ccutil.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/ccutil.cpp deleted file mode 100644 index 32e82c75..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/ccutil.cpp +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2008 Google Inc. All Rights Reserved. -// Author: scharron@google.com (Samuel Charron) -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ccutil.h" - -namespace tesseract { -CCUtil::CCUtil() : - params_(), -#ifdef _WIN32 - STRING_INIT_MEMBER(tessedit_module_name, WINDLLNAME, - "Module colocated with tessdata dir", ¶ms_), -#endif - INT_INIT_MEMBER(ambigs_debug_level, 0, "Debug level for unichar ambiguities", - ¶ms_), - BOOL_MEMBER(use_definite_ambigs_for_classifier, 0, "Use definite" - " ambiguities when running character classifier", ¶ms_), - BOOL_MEMBER(use_ambigs_for_adaption, 0, "Use ambigs for deciding" - " whether to adapt to a character", ¶ms_) { -} - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -CCUtil::~CCUtil() = default; - -CCUtilMutex::CCUtilMutex() { -#ifdef _WIN32 - mutex_ = CreateMutex(0, FALSE, 0); -#else - pthread_mutex_init(&mutex_, nullptr); -#endif -} - -void CCUtilMutex::Lock() { -#ifdef _WIN32 - WaitForSingleObject(mutex_, INFINITE); -#else - pthread_mutex_lock(&mutex_); -#endif -} - -void CCUtilMutex::Unlock() { -#ifdef _WIN32 - ReleaseMutex(mutex_); -#else - pthread_mutex_unlock(&mutex_); -#endif -} - -CCUtilMutex tprintfMutex; // should remain global -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/ccutil.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/ccutil.h deleted file mode 100644 index 0cf35ff7..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/ccutil.h +++ /dev/null @@ -1,94 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: ccutil.h -// Description: ccutil class. -// Author: Samuel Charron -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_CCUTIL_H_ -#define TESSERACT_CCUTIL_CCUTIL_H_ - -#include "ambigs.h" -#include "errcode.h" -#include "strngs.h" -#include "params.h" -#include "unicharset.h" - -#ifndef _WIN32 -#include -#include -#endif - -namespace tesseract { - -class CCUtilMutex { - public: - CCUtilMutex(); - - void Lock(); - - void Unlock(); - private: -#ifdef _WIN32 - HANDLE mutex_; -#else - pthread_mutex_t mutex_; -#endif -}; - - -class CCUtil { - public: - CCUtil(); - virtual ~CCUtil(); - - public: - // Read the arguments and set up the data path. - void main_setup( - const char *argv0, // program name - const char *basename // name of image - ); - ParamsVectors *params() { return ¶ms_; } - - STRING datadir; // dir for data files - STRING imagebasename; // name of image - STRING lang; - STRING language_data_path_prefix; - UNICHARSET unicharset; - UnicharAmbigs unichar_ambigs; - STRING imagefile; // image file name - STRING directory; // main directory - - private: - ParamsVectors params_; - - public: - // Member parameters. - // These have to be declared and initialized after params_ member, since - // params_ should be initialized before parameters are added to it. - #ifdef _WIN32 - STRING_VAR_H(tessedit_module_name, WINDLLNAME, - "Module colocated with tessdata dir"); - #endif - INT_VAR_H(ambigs_debug_level, 0, "Debug level for unichar ambiguities"); - BOOL_VAR_H(use_definite_ambigs_for_classifier, 0, - "Use definite ambiguities when running character classifier"); - BOOL_VAR_H(use_ambigs_for_adaption, 0, - "Use ambigs for deciding whether to adapt to a character"); -}; - -extern CCUtilMutex tprintfMutex; // should remain global -} // namespace tesseract - -#endif // TESSERACT_CCUTIL_CCUTIL_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/clst.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/clst.cpp deleted file mode 100644 index 688b0a7f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/clst.cpp +++ /dev/null @@ -1,510 +0,0 @@ -/********************************************************************** - * File: clst.cpp (Formerly clist.c) - * Description: CONS cell list handling code which is not in the include file. - * Author: Phil Cheatle - * Created: Mon Jan 28 08:33:13 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include -#include "clst.h" - -/*********************************************************************** - * MEMBER FUNCTIONS OF CLASS: CLIST - * ================================ - **********************************************************************/ - -/*********************************************************************** - * CLIST::internal_deep_clear - * - * Used by the "deep_clear" member function of derived list - * classes to destroy all the elements on the list. - * The calling function passes a "zapper" function which can be called to - * delete each data element of the list, regardless of its class. This - * technique permits a generic clear function to destroy elements of - * different derived types correctly, without requiring virtual functions and - * the consequential memory overhead. - **********************************************************************/ - -void -CLIST::internal_deep_clear ( //destroy all links -void (*zapper) (void *)) { //ptr to zapper functn - CLIST_LINK *ptr; - CLIST_LINK *next; - - if (!empty ()) { - ptr = last->next; //set to first - last->next = nullptr; //break circle - last = nullptr; //set list empty - while (ptr) { - next = ptr->next; - zapper (ptr->data); - delete(ptr); - ptr = next; - } - } -} - -/*********************************************************************** - * CLIST::shallow_clear - * - * Used by the destructor and the "shallow_clear" member function of derived - * list classes to destroy the list. - * The data elements are NOT destroyed. - * - **********************************************************************/ - -void CLIST::shallow_clear() { //destroy all links - CLIST_LINK *ptr; - CLIST_LINK *next; - - if (!empty ()) { - ptr = last->next; //set to first - last->next = nullptr; //break circle - last = nullptr; //set list empty - while (ptr) { - next = ptr->next; - delete(ptr); - ptr = next; - } - } -} - -/*********************************************************************** - * CLIST::assign_to_sublist - * - * The list is set to a sublist of another list. "This" list must be empty - * before this function is invoked. The two iterators passed must refer to - * the same list, different from "this" one. The sublist removed is the - * inclusive list from start_it's current position to end_it's current - * position. If this range passes over the end of the source list then the - * source list has its end set to the previous element of start_it. The - * extracted sublist is unaffected by the end point of the source list, its - * end point is always the end_it position. - **********************************************************************/ - -void CLIST::assign_to_sublist( //to this list - CLIST_ITERATOR *start_it, //from list start - CLIST_ITERATOR *end_it) { //from list end - const ERRCODE LIST_NOT_EMPTY = - "Destination list must be empty before extracting a sublist"; - - if (!empty ()) - LIST_NOT_EMPTY.error ("CLIST.assign_to_sublist", ABORT, nullptr); - - last = start_it->extract_sublist (end_it); -} - -/*********************************************************************** - * CLIST::length - * - * Return count of elements on list - **********************************************************************/ - -int32_t CLIST::length() const { //count elements - CLIST_ITERATOR it(const_cast(this)); - int32_t count = 0; - - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) - count++; - return count; -} - -/*********************************************************************** - * CLIST::sort - * - * Sort elements on list - **********************************************************************/ - -void -CLIST::sort ( //sort elements -int comparator ( //comparison routine -const void *, const void *)) { - CLIST_ITERATOR it(this); - int32_t count; - void **base; //ptr array to sort - void **current; - int32_t i; - - /* Allocate an array of pointers, one per list element */ - count = length (); - base = (void **) malloc (count * sizeof (void *)); - - /* Extract all elements, putting the pointers in the array */ - current = base; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - *current = it.extract (); - current++; - } - - /* Sort the pointer array */ - qsort(base, count, sizeof(*base), comparator); - - /* Rebuild the list from the sorted pointers */ - current = base; - for (i = 0; i < count; i++) { - it.add_to_end (*current); - current++; - } - free(base); -} - -// Assuming list has been sorted already, insert new_data to -// keep the list sorted according to the same comparison function. -// Comparison function is the same as used by sort, i.e. uses double -// indirection. Time is O(1) to add to beginning or end. -// Time is linear to add pre-sorted items to an empty list. -// If unique, then don't add duplicate entries. -// Returns true if the element was added to the list. -bool CLIST::add_sorted(int comparator(const void*, const void*), - bool unique, void* new_data) { - // Check for adding at the end. - if (last == nullptr || comparator(&last->data, &new_data) < 0) { - CLIST_LINK* new_element = new CLIST_LINK; - new_element->data = new_data; - if (last == nullptr) { - new_element->next = new_element; - } else { - new_element->next = last->next; - last->next = new_element; - } - last = new_element; - return true; - } else if (!unique || last->data != new_data) { - // Need to use an iterator. - CLIST_ITERATOR it(this); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - void* data = it.data(); - if (data == new_data && unique) - return false; - if (comparator(&data, &new_data) > 0) - break; - } - if (it.cycled_list()) - it.add_to_end(new_data); - else - it.add_before_then_move(new_data); - return true; - } - return false; -} - -// Assuming that the minuend and subtrahend are already sorted with -// the same comparison function, shallow clears this and then copies -// the set difference minuend - subtrahend to this, being the elements -// of minuend that do not compare equal to anything in subtrahend. -// If unique is true, any duplicates in minuend are also eliminated. -void CLIST::set_subtract(int comparator(const void*, const void*), - bool unique, - CLIST* minuend, CLIST* subtrahend) { - shallow_clear(); - CLIST_ITERATOR m_it(minuend); - CLIST_ITERATOR s_it(subtrahend); - // Since both lists are sorted, finding the subtras that are not - // minus is a case of a parallel iteration. - for (m_it.mark_cycle_pt(); !m_it.cycled_list(); m_it.forward()) { - void* minu = m_it.data(); - void* subtra = nullptr; - if (!s_it.empty()) { - subtra = s_it.data(); - while (!s_it.at_last() && - comparator(&subtra, &minu) < 0) { - s_it.forward(); - subtra = s_it.data(); - } - } - if (subtra == nullptr || comparator(&subtra, &minu) != 0) - add_sorted(comparator, unique, minu); - } -} - - -/*********************************************************************** - * MEMBER FUNCTIONS OF CLASS: CLIST_ITERATOR - * ========================================= - **********************************************************************/ - -/*********************************************************************** - * CLIST_ITERATOR::forward - * - * Move the iterator to the next element of the list. - * REMEMBER: ALL LISTS ARE CIRCULAR. - **********************************************************************/ - -void *CLIST_ITERATOR::forward() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::forward", ABORT, nullptr); - #endif - if (list->empty ()) - return nullptr; - - if (current) { //not removed so - //set previous - prev = current; - started_cycling = TRUE; - // In case next is deleted by another iterator, get next from current. - current = current->next; - } else { - if (ex_current_was_cycle_pt) - cycle_pt = next; - current = next; - } - - #ifndef NDEBUG - if (!current) - NULL_DATA.error ("CLIST_ITERATOR::forward", ABORT, nullptr); - if (!next) - NULL_NEXT.error ("CLIST_ITERATOR::forward", ABORT, - "This is: %p Current is: %p", this, current); - #endif - - next = current->next; - return current->data; -} - -/*********************************************************************** - * CLIST_ITERATOR::data_relative - * - * Return the data pointer to the element "offset" elements from current. - * "offset" must not be less than -1. - * (This function can't be INLINEd because it contains a loop) - **********************************************************************/ - -void *CLIST_ITERATOR::data_relative( //get data + or - ... - int8_t offset) { //offset from current - CLIST_LINK *ptr; - - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::data_relative", ABORT, nullptr); - if (list->empty ()) - EMPTY_LIST.error ("CLIST_ITERATOR::data_relative", ABORT, nullptr); - if (offset < -1) - BAD_PARAMETER.error ("CLIST_ITERATOR::data_relative", ABORT, - "offset < -l"); - #endif - - if (offset == -1) - ptr = prev; - else - for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next); - - #ifndef NDEBUG - if (!ptr) - NULL_DATA.error ("CLIST_ITERATOR::data_relative", ABORT, nullptr); - #endif - - return ptr->data; -} - -/*********************************************************************** - * CLIST_ITERATOR::move_to_last() - * - * Move current so that it is set to the end of the list. - * Return data just in case anyone wants it. - * (This function can't be INLINEd because it contains a loop) - **********************************************************************/ - -void *CLIST_ITERATOR::move_to_last() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::move_to_last", ABORT, nullptr); - #endif - - while (current != list->last) - forward(); - - if (current == nullptr) - return nullptr; - else - return current->data; -} - -/*********************************************************************** - * CLIST_ITERATOR::exchange() - * - * Given another iterator, whose current element is a different element on - * the same list list OR an element of another list, exchange the two current - * elements. On return, each iterator points to the element which was the - * other iterators current on entry. - * (This function hasn't been in-lined because its a bit big!) - **********************************************************************/ - -void CLIST_ITERATOR::exchange( //positions of 2 links - CLIST_ITERATOR *other_it) { //other iterator - const ERRCODE DONT_EXCHANGE_DELETED = - "Can't exchange deleted elements of lists"; - - CLIST_LINK *old_current; - - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::exchange", ABORT, nullptr); - if (!other_it) - BAD_PARAMETER.error ("CLIST_ITERATOR::exchange", ABORT, "other_it nullptr"); - if (!(other_it->list)) - NO_LIST.error ("CLIST_ITERATOR::exchange", ABORT, "other_it"); - #endif - - /* Do nothing if either list is empty or if both iterators reference the same - link */ - - if ((list->empty ()) || - (other_it->list->empty ()) || (current == other_it->current)) - return; - - /* Error if either current element is deleted */ - - if (!current || !other_it->current) - DONT_EXCHANGE_DELETED.error ("CLIST_ITERATOR.exchange", ABORT, nullptr); - - /* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements - (other before this); non-doubleton adjacent elements (this before other); - non-adjacent elements. */ - - //adjacent links - if ((next == other_it->current) || - (other_it->next == current)) { - //doubleton list - if ((next == other_it->current) && - (other_it->next == current)) { - prev = next = current; - other_it->prev = other_it->next = other_it->current; - } - else { //non-doubleton with - //adjacent links - //other before this - if (other_it->next == current) { - other_it->prev->next = current; - other_it->current->next = next; - current->next = other_it->current; - other_it->next = other_it->current; - prev = current; - } - else { //this before other - prev->next = other_it->current; - current->next = other_it->next; - other_it->current->next = current; - next = current; - other_it->prev = other_it->current; - } - } - } - else { //no overlap - prev->next = other_it->current; - current->next = other_it->next; - other_it->prev->next = current; - other_it->current->next = next; - } - - /* update end of list pointer when necessary (remember that the 2 iterators - may iterate over different lists!) */ - - if (list->last == current) - list->last = other_it->current; - if (other_it->list->last == other_it->current) - other_it->list->last = current; - - if (current == cycle_pt) - cycle_pt = other_it->cycle_pt; - if (other_it->current == other_it->cycle_pt) - other_it->cycle_pt = cycle_pt; - - /* The actual exchange - in all cases*/ - - old_current = current; - current = other_it->current; - other_it->current = old_current; -} - -/*********************************************************************** - * CLIST_ITERATOR::extract_sublist() - * - * This is a private member, used only by CLIST::assign_to_sublist. - * Given another iterator for the same list, extract the links from THIS to - * OTHER inclusive, link them into a new circular list, and return a - * pointer to the last element. - * (Can't inline this function because it contains a loop) - **********************************************************************/ - -CLIST_LINK *CLIST_ITERATOR::extract_sublist( //from this current - CLIST_ITERATOR *other_it) { //to other current - CLIST_ITERATOR temp_it = *this; - CLIST_LINK *end_of_new_list; - - const ERRCODE BAD_SUBLIST = "Can't find sublist end point in original list"; - #ifndef NDEBUG - const ERRCODE BAD_EXTRACTION_PTS = - "Can't extract sublist from points on different lists"; - const ERRCODE DONT_EXTRACT_DELETED = - "Can't extract a sublist marked by deleted points"; - - if (!other_it) - BAD_PARAMETER.error ("CLIST_ITERATOR::extract_sublist", ABORT, - "other_it nullptr"); - if (!list) - NO_LIST.error ("CLIST_ITERATOR::extract_sublist", ABORT, nullptr); - if (list != other_it->list) - BAD_EXTRACTION_PTS.error ("CLIST_ITERATOR.extract_sublist", ABORT, nullptr); - if (list->empty ()) - EMPTY_LIST.error ("CLIST_ITERATOR::extract_sublist", ABORT, nullptr); - - if (!current || !other_it->current) - DONT_EXTRACT_DELETED.error ("CLIST_ITERATOR.extract_sublist", ABORT, - nullptr); - #endif - - ex_current_was_last = other_it->ex_current_was_last = false; - ex_current_was_cycle_pt = false; - other_it->ex_current_was_cycle_pt = false; - - temp_it.mark_cycle_pt (); - do { //walk sublist - if (temp_it.cycled_list()) // can't find end pt - BAD_SUBLIST.error ("CLIST_ITERATOR.extract_sublist", ABORT, nullptr); - - if (temp_it.at_last ()) { - list->last = prev; - ex_current_was_last = other_it->ex_current_was_last = true; - } - - if (temp_it.current == cycle_pt) - ex_current_was_cycle_pt = true; - - if (temp_it.current == other_it->cycle_pt) - other_it->ex_current_was_cycle_pt = true; - - temp_it.forward (); - } - while (temp_it.prev != other_it->current); - - //circularise sublist - other_it->current->next = current; - end_of_new_list = other_it->current; - - //sublist = whole list - if (prev == other_it->current) { - list->last = nullptr; - prev = current = next = nullptr; - other_it->prev = other_it->current = other_it->next = nullptr; - } - else { - prev->next = other_it->next; - current = other_it->current = nullptr; - next = other_it->next; - other_it->prev = prev; - } - return end_of_new_list; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/clst.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/clst.h deleted file mode 100644 index cad132b0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/clst.h +++ /dev/null @@ -1,931 +0,0 @@ -/********************************************************************** - * File: clst.h (Formerly clist.h) - * Description: CONS cell list module include file. - * Author: Phil Cheatle - * Created: Mon Jan 28 08:33:13 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef CLST_H -#define CLST_H - -#include -#include "host.h" -#include "serialis.h" -#include "lsterr.h" - -class CLIST_ITERATOR; - -/********************************************************************** - * CLASS - CLIST_LINK - * - * Generic link class for singly linked CONS cell lists - * - * Note: No destructor - elements are assumed to be destroyed EITHER after - * they have been extracted from a list OR by the CLIST destructor which - * walks the list. - **********************************************************************/ - -class DLLSYM CLIST_LINK -{ - friend class CLIST_ITERATOR; - friend class CLIST; - - CLIST_LINK *next; - void *data; - - public: - CLIST_LINK() { //constructor - data = next = nullptr; - } - - CLIST_LINK( // copy constructor - const CLIST_LINK &) { // don't copy link - data = next = nullptr; - } - - void operator=( // don't copy links - const CLIST_LINK &) { - data = next = nullptr; - } -}; - -/********************************************************************** - * CLASS - CLIST - * - * Generic list class for singly linked CONS cell lists - **********************************************************************/ - -class DLLSYM CLIST -{ - friend class CLIST_ITERATOR; - - CLIST_LINK *last; //End of list - //(Points to head) - CLIST_LINK *First() { // return first - return last != nullptr ? last->next : nullptr; - } - - public: - CLIST() { //constructor - last = nullptr; - } - - ~CLIST () { //destructor - shallow_clear(); - } - - void internal_deep_clear ( //destroy all links - void (*zapper) (void *)); //ptr to zapper functn - - void shallow_clear(); // clear list but don't - // delete data elements - - bool empty() const { //is list empty? - return !last; - } - - bool singleton() const { - return last != nullptr ? (last == last->next) : false; - } - - void shallow_copy( //dangerous!! - CLIST *from_list) { //beware destructors!! - last = from_list->last; - } - - void assign_to_sublist( //to this list - CLIST_ITERATOR *start_it, //from list start - CLIST_ITERATOR *end_it); //from list end - - int32_t length() const; //# elements in list - - void sort ( //sort elements - int comparator ( //comparison routine - const void *, const void *)); - - // Assuming list has been sorted already, insert new_data to - // keep the list sorted according to the same comparison function. - // Comparison function is the same as used by sort, i.e. uses double - // indirection. Time is O(1) to add to beginning or end. - // Time is linear to add pre-sorted items to an empty list. - // If unique, then don't add duplicate entries. - // Returns true if the element was added to the list. - bool add_sorted(int comparator(const void*, const void*), - bool unique, void* new_data); - - // Assuming that the minuend and subtrahend are already sorted with - // the same comparison function, shallow clears this and then copies - // the set difference minuend - subtrahend to this, being the elements - // of minuend that do not compare equal to anything in subtrahend. - // If unique is true, any duplicates in minuend are also eliminated. - void set_subtract(int comparator(const void*, const void*), bool unique, - CLIST* minuend, CLIST* subtrahend); - -}; - -/*********************************************************************** - * CLASS - CLIST_ITERATOR - * - * Generic iterator class for singly linked lists with embedded - *links - **********************************************************************/ - -class DLLSYM CLIST_ITERATOR -{ - friend void CLIST::assign_to_sublist(CLIST_ITERATOR *, CLIST_ITERATOR *); - - CLIST *list; //List being iterated - CLIST_LINK *prev; //prev element - CLIST_LINK *current; //current element - CLIST_LINK *next; //next element - bool ex_current_was_last; //current extracted - //was end of list - bool ex_current_was_cycle_pt; //current extracted - //was cycle point - CLIST_LINK *cycle_pt; //point we are cycling - //the list to. - bool started_cycling; //Have we moved off - //the start? - - CLIST_LINK *extract_sublist( //from this current... - CLIST_ITERATOR *other_it); //to other current - - public: - CLIST_ITERATOR() { //constructor - list = nullptr; - } //unassigned list - - CLIST_ITERATOR( //constructor - CLIST *list_to_iterate); - - void set_to_list( //change list - CLIST *list_to_iterate); - - void add_after_then_move( //add after current & - void *new_data); //move to new - - void add_after_stay_put( //add after current & - void *new_data); //stay at current - - void add_before_then_move( //add before current & - void *new_data); //move to new - - void add_before_stay_put( //add before current & - void *new_data); //stay at current - - void add_list_after( //add a list & - CLIST *list_to_add); //stay at current - - void add_list_before( //add a list & - CLIST *list_to_add); //move to it 1st item - - void *data() { //get current data - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::data", ABORT, nullptr); - if (!current) - NULL_DATA.error ("CLIST_ITERATOR::data", ABORT, nullptr); - #endif - return current->data; - } - - void *data_relative( //get data + or - ... - int8_t offset); //offset from current - - void *forward(); //move to next element - - void *extract(); //remove from list - - void *move_to_first(); //go to start of list - - void *move_to_last(); //go to end of list - - void mark_cycle_pt(); //remember current - - bool empty() { //is list empty? - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::empty", ABORT, nullptr); - #endif - return list->empty (); - } - - bool current_extracted() { //current extracted? - return !current; - } - - bool at_first(); //Current is first? - - bool at_last(); //Current is last? - - bool cycled_list(); //Completed a cycle? - - void add_to_end( // add at end & - void *new_data); // don't move - - void exchange( //positions of 2 links - CLIST_ITERATOR *other_it); //other iterator - - int32_t length(); //# elements in list - - void sort ( //sort elements - int comparator ( //comparison routine - const void *, const void *)); - -}; - -/*********************************************************************** - * CLIST_ITERATOR::set_to_list - * - * (Re-)initialise the iterator to point to the start of the list_to_iterate - * over. - **********************************************************************/ - -inline void CLIST_ITERATOR::set_to_list( //change list - CLIST *list_to_iterate) { - #ifndef NDEBUG - if (!list_to_iterate) - BAD_PARAMETER.error ("CLIST_ITERATOR::set_to_list", ABORT, - "list_to_iterate is nullptr"); - #endif - - list = list_to_iterate; - prev = list->last; - current = list->First (); - next = current != nullptr ? current->next : nullptr; - cycle_pt = nullptr; //await explicit set - started_cycling = false; - ex_current_was_last = false; - ex_current_was_cycle_pt = false; -} - -/*********************************************************************** - * CLIST_ITERATOR::CLIST_ITERATOR - * - * CONSTRUCTOR - set iterator to specified list; - **********************************************************************/ - -inline CLIST_ITERATOR::CLIST_ITERATOR(CLIST *list_to_iterate) { - set_to_list(list_to_iterate); -} - -/*********************************************************************** - * CLIST_ITERATOR::add_after_then_move - * - * Add a new element to the list after the current element and move the - * iterator to the new element. - **********************************************************************/ - -inline void CLIST_ITERATOR::add_after_then_move( // element to add - void *new_data) { - CLIST_LINK *new_element; - - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::add_after_then_move", ABORT, nullptr); - if (!new_data) - BAD_PARAMETER.error ("CLIST_ITERATOR::add_after_then_move", ABORT, - "new_data is nullptr"); - #endif - - new_element = new CLIST_LINK; - new_element->data = new_data; - - if (list->empty ()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - } - else { - new_element->next = next; - - if (current) { //not extracted - current->next = new_element; - prev = current; - if (current == list->last) - list->last = new_element; - } - else { //current extracted - prev->next = new_element; - if (ex_current_was_last) - list->last = new_element; - if (ex_current_was_cycle_pt) - cycle_pt = new_element; - } - } - current = new_element; -} - -/*********************************************************************** - * CLIST_ITERATOR::add_after_stay_put - * - * Add a new element to the list after the current element but do not move - * the iterator to the new element. - **********************************************************************/ - -inline void CLIST_ITERATOR::add_after_stay_put( // element to add - void *new_data) { - CLIST_LINK *new_element; - - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::add_after_stay_put", ABORT, nullptr); - if (!new_data) - BAD_PARAMETER.error ("CLIST_ITERATOR::add_after_stay_put", ABORT, - "new_data is nullptr"); - #endif - - new_element = new CLIST_LINK; - new_element->data = new_data; - - if (list->empty ()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - ex_current_was_last = false; - current = nullptr; - } - else { - new_element->next = next; - - if (current) { //not extracted - current->next = new_element; - if (prev == current) - prev = new_element; - if (current == list->last) - list->last = new_element; - } - else { //current extracted - prev->next = new_element; - if (ex_current_was_last) { - list->last = new_element; - ex_current_was_last = false; - } - } - next = new_element; - } -} - -/*********************************************************************** - * CLIST_ITERATOR::add_before_then_move - * - * Add a new element to the list before the current element and move the - * iterator to the new element. - **********************************************************************/ - -inline void CLIST_ITERATOR::add_before_then_move( // element to add - void *new_data) { - CLIST_LINK *new_element; - - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::add_before_then_move", ABORT, nullptr); - if (!new_data) - BAD_PARAMETER.error ("CLIST_ITERATOR::add_before_then_move", ABORT, - "new_data is nullptr"); - #endif - - new_element = new CLIST_LINK; - new_element->data = new_data; - - if (list->empty ()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - } - else { - prev->next = new_element; - if (current) { //not extracted - new_element->next = current; - next = current; - } - else { //current extracted - new_element->next = next; - if (ex_current_was_last) - list->last = new_element; - if (ex_current_was_cycle_pt) - cycle_pt = new_element; - } - } - current = new_element; -} - -/*********************************************************************** - * CLIST_ITERATOR::add_before_stay_put - * - * Add a new element to the list before the current element but don't move the - * iterator to the new element. - **********************************************************************/ - -inline void CLIST_ITERATOR::add_before_stay_put( // element to add - void *new_data) { - CLIST_LINK *new_element; - - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::add_before_stay_put", ABORT, nullptr); - if (!new_data) - BAD_PARAMETER.error ("CLIST_ITERATOR::add_before_stay_put", ABORT, - "new_data is nullptr"); - #endif - - new_element = new CLIST_LINK; - new_element->data = new_data; - - if (list->empty ()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - ex_current_was_last = true; - current = nullptr; - } - else { - prev->next = new_element; - if (current) { //not extracted - new_element->next = current; - if (next == current) - next = new_element; - } - else { //current extracted - new_element->next = next; - if (ex_current_was_last) - list->last = new_element; - } - prev = new_element; - } -} - -/*********************************************************************** - * CLIST_ITERATOR::add_list_after - * - * Insert another list to this list after the current element but don't move - *the - * iterator. - **********************************************************************/ - -inline void CLIST_ITERATOR::add_list_after(CLIST *list_to_add) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::add_list_after", ABORT, nullptr); - if (!list_to_add) - BAD_PARAMETER.error ("CLIST_ITERATOR::add_list_after", ABORT, - "list_to_add is nullptr"); - #endif - - if (!list_to_add->empty ()) { - if (list->empty ()) { - list->last = list_to_add->last; - prev = list->last; - next = list->First (); - ex_current_was_last = true; - current = nullptr; - } - else { - if (current) { //not extracted - current->next = list_to_add->First (); - if (current == list->last) - list->last = list_to_add->last; - list_to_add->last->next = next; - next = current->next; - } - else { //current extracted - prev->next = list_to_add->First (); - if (ex_current_was_last) { - list->last = list_to_add->last; - ex_current_was_last = false; - } - list_to_add->last->next = next; - next = prev->next; - } - } - list_to_add->last = nullptr; - } -} - -/*********************************************************************** - * CLIST_ITERATOR::add_list_before - * - * Insert another list to this list before the current element. Move the - * iterator to the start of the inserted elements - * iterator. - **********************************************************************/ - -inline void CLIST_ITERATOR::add_list_before(CLIST *list_to_add) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::add_list_before", ABORT, nullptr); - if (!list_to_add) - BAD_PARAMETER.error ("CLIST_ITERATOR::add_list_before", ABORT, - "list_to_add is nullptr"); - #endif - - if (!list_to_add->empty ()) { - if (list->empty ()) { - list->last = list_to_add->last; - prev = list->last; - current = list->First (); - next = current->next; - ex_current_was_last = false; - } - else { - prev->next = list_to_add->First (); - if (current) { //not extracted - list_to_add->last->next = current; - } - else { //current extracted - list_to_add->last->next = next; - if (ex_current_was_last) - list->last = list_to_add->last; - if (ex_current_was_cycle_pt) - cycle_pt = prev->next; - } - current = prev->next; - next = current->next; - } - list_to_add->last = nullptr; - } -} - -/*********************************************************************** - * CLIST_ITERATOR::extract - * - * Do extraction by removing current from the list, deleting the cons cell - * and returning the data to the caller, but NOT updating the iterator. (So - * that any calling loop can do this.) The iterator's current points to - * nullptr. If the data is to be deleted, this is the callers responsibility. - **********************************************************************/ - -inline void *CLIST_ITERATOR::extract() { - void *extracted_data; - - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::extract", ABORT, nullptr); - if (!current) //list empty or - //element extracted - NULL_CURRENT.error ("CLIST_ITERATOR::extract", - ABORT, nullptr); - #endif - - if (list->singleton()) { - // Special case where we do need to change the iterator. - prev = next = list->last = nullptr; - } else { - prev->next = next; //remove from list - - if (current == list->last) { - list->last = prev; - ex_current_was_last = true; - } else { - ex_current_was_last = false; - } - } - // Always set ex_current_was_cycle_pt so an add/forward will work in a loop. - ex_current_was_cycle_pt = (current == cycle_pt); - extracted_data = current->data; - delete(current); //destroy CONS cell - current = nullptr; - return extracted_data; -} - -/*********************************************************************** - * CLIST_ITERATOR::move_to_first() - * - * Move current so that it is set to the start of the list. - * Return data just in case anyone wants it. - **********************************************************************/ - -inline void *CLIST_ITERATOR::move_to_first() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::move_to_first", ABORT, nullptr); - #endif - - current = list->First (); - prev = list->last; - next = current != nullptr ? current->next : nullptr; - return current != nullptr ? current->data : nullptr; -} - -/*********************************************************************** - * CLIST_ITERATOR::mark_cycle_pt() - * - * Remember the current location so that we can tell whether we've returned - * to this point later. - * - * If the current point is deleted either now, or in the future, the cycle - * point will be set to the next item which is set to current. This could be - * by a forward, add_after_then_move or add_after_then_move. - **********************************************************************/ - -inline void CLIST_ITERATOR::mark_cycle_pt() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::mark_cycle_pt", ABORT, nullptr); - #endif - - if (current) - cycle_pt = current; - else - ex_current_was_cycle_pt = TRUE; - started_cycling = FALSE; -} - -/*********************************************************************** - * CLIST_ITERATOR::at_first() - * - * Are we at the start of the list? - * - **********************************************************************/ - -inline bool CLIST_ITERATOR::at_first() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::at_first", ABORT, nullptr); - #endif - - //we're at a deleted - return ((list->empty ()) || (current == list->First ()) || ((current == nullptr) && - (prev == list->last) && //NON-last pt between - !ex_current_was_last)); //first and last -} - -/*********************************************************************** - * CLIST_ITERATOR::at_last() - * - * Are we at the end of the list? - * - **********************************************************************/ - -inline bool CLIST_ITERATOR::at_last() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::at_last", ABORT, nullptr); - #endif - - //we're at a deleted - return ((list->empty ()) || (current == list->last) || ((current == nullptr) && - (prev == list->last) && //last point between - ex_current_was_last)); //first and last -} - -/*********************************************************************** - * CLIST_ITERATOR::cycled_list() - * - * Have we returned to the cycle_pt since it was set? - * - **********************************************************************/ - -inline bool CLIST_ITERATOR::cycled_list() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::cycled_list", ABORT, nullptr); - #endif - - return ((list->empty ()) || ((current == cycle_pt) && started_cycling)); - -} - -/*********************************************************************** - * CLIST_ITERATOR::length() - * - * Return the length of the list - * - **********************************************************************/ - -inline int32_t CLIST_ITERATOR::length() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::length", ABORT, nullptr); - #endif - - return list->length (); -} - -/*********************************************************************** - * CLIST_ITERATOR::sort() - * - * Sort the elements of the list, then reposition at the start. - * - **********************************************************************/ - -inline void -CLIST_ITERATOR::sort ( //sort elements -int comparator ( //comparison routine -const void *, const void *)) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::sort", ABORT, nullptr); - #endif - - list->sort (comparator); - move_to_first(); -} - -/*********************************************************************** - * CLIST_ITERATOR::add_to_end - * - * Add a new element to the end of the list without moving the iterator. - * This is provided because a single linked list cannot move to the last as - * the iterator couldn't set its prev pointer. Adding to the end is - * essential for implementing - queues. -**********************************************************************/ - -inline void CLIST_ITERATOR::add_to_end( // element to add - void *new_data) { - CLIST_LINK *new_element; - - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::add_to_end", ABORT, nullptr); - if (!new_data) - BAD_PARAMETER.error ("CLIST_ITERATOR::add_to_end", ABORT, - "new_data is nullptr"); - #endif - - if (this->at_last ()) { - this->add_after_stay_put (new_data); - } - else { - if (this->at_first ()) { - this->add_before_stay_put (new_data); - list->last = prev; - } - else { //Iteratr is elsewhere - new_element = new CLIST_LINK; - new_element->data = new_data; - - new_element->next = list->last->next; - list->last->next = new_element; - list->last = new_element; - } - } -} - - -/*********************************************************************** - QUOTE_IT MACRO DEFINITION - =========================== -Replace with "". may be an arbitrary number of tokens -***********************************************************************/ - -#define QUOTE_IT(parm) #parm - -/*********************************************************************** - CLISTIZE(CLASSNAME) MACRO DEFINITION - ====================================== - -CLASSNAME is assumed to be the name of a class to be used in a CONS list - -NOTE: Because we don't use virtual functions in the list code, the list code -will NOT work correctly for classes derived from this. - -The macro generates: - - An element deletion function: CLASSNAME##_c1_zapper - - An element copier function: - CLASSNAME##_c1_copier - - A CLIST subclass: CLASSNAME##_CLIST - - A CLIST_ITERATOR subclass: - CLASSNAME##_C_IT - -NOTE: Generated names do NOT clash with those generated by ELISTIZE, -ELIST2ISE and CLIST2IZE - -Two macros are provided: CLISTIZE and CLISTIZEH -The ...IZEH macros just define the class names for use in .h files -The ...IZE macros define the code use in .c files -***********************************************************************/ - -/*********************************************************************** - CLISTIZEH(CLASSNAME) MACRO - -CLISTIZEH is a concatenation of 3 fragments CLISTIZEH_A, CLISTIZEH_B and -CLISTIZEH_C. -***********************************************************************/ - -#define CLISTIZEH_A(CLASSNAME) \ - \ - extern DLLSYM void CLASSNAME##_c1_zapper( /*delete a link*/ \ - void *link); /*link to delete*/ \ - \ - extern DLLSYM void \ - *CLASSNAME##_c1_copier( /*deep copy a link*/ \ - void *old_element); /*source link */ - -#define CLISTIZEH_B(CLASSNAME) \ - \ - /*********************************************************************** \ - * CLASS - \ - *CLASSNAME##_CLIST \ - * \ - * List class for class \ - *CLASSNAME \ - * \ - **********************************************************************/ \ - \ - class DLLSYM CLASSNAME##_CLIST : public CLIST { \ - public: \ - CLASSNAME##_CLIST() : CLIST() {} \ - /* constructor */ \ - \ - CLASSNAME##_CLIST( /* don't construct */ \ - const CLASSNAME##_CLIST &) /*by initial assign*/ \ - { \ - DONT_CONSTRUCT_LIST_BY_COPY.error(QUOTE_IT(CLASSNAME##_CLIST), ABORT, \ - nullptr); \ - } \ - \ - void deep_clear() /* delete elements */ \ - { \ - CLIST::internal_deep_clear(&CLASSNAME##_c1_zapper); \ - } \ - \ - void operator=(/* prevent assign */ \ - const CLASSNAME##_CLIST &) { \ - DONT_ASSIGN_LISTS.error(QUOTE_IT(CLASSNAME##_CLIST), ABORT, nullptr); \ - } - -#define CLISTIZEH_C(CLASSNAME) \ - } \ - ; \ - \ - /*********************************************************************** \ - * CLASS - CLASSNAME##_C_IT \ - * \ - * Iterator class for class CLASSNAME##_CLIST \ - * \ - * Note: We don't need to coerce pointers to member functions input \ - * parameters as these are automatically converted to the type of the base \ - * type. ("A ptr to a class may be converted to a pointer to a public base \ - * class of that class") \ - **********************************************************************/ \ - \ - class DLLSYM CLASSNAME##_C_IT : public CLIST_ITERATOR { \ - public: \ - CLASSNAME##_C_IT() : CLIST_ITERATOR() {} \ - \ - CLASSNAME##_C_IT(CLASSNAME##_CLIST *list) : CLIST_ITERATOR(list) {} \ - \ - CLASSNAME *data() { return (CLASSNAME *)CLIST_ITERATOR::data(); } \ - \ - CLASSNAME *data_relative(int8_t offset) { \ - return (CLASSNAME *)CLIST_ITERATOR::data_relative(offset); \ - } \ - \ - CLASSNAME *forward() { return (CLASSNAME *)CLIST_ITERATOR::forward(); } \ - \ - CLASSNAME *extract() { return (CLASSNAME *)CLIST_ITERATOR::extract(); } \ - \ - CLASSNAME *move_to_first() { \ - return (CLASSNAME *)CLIST_ITERATOR::move_to_first(); \ - } \ - \ - CLASSNAME *move_to_last() { \ - return (CLASSNAME *)CLIST_ITERATOR::move_to_last(); \ - } \ - }; - -#define CLISTIZEH(CLASSNAME) \ - \ - CLISTIZEH_A(CLASSNAME) \ - \ - CLISTIZEH_B(CLASSNAME) \ - \ - CLISTIZEH_C(CLASSNAME) - -/*********************************************************************** - CLISTIZE(CLASSNAME) MACRO -***********************************************************************/ - -#define CLISTIZE(CLASSNAME) \ - \ - /*********************************************************************** \ - * CLASSNAME##_c1_zapper \ - * \ - * A function which can delete a CLASSNAME element. This is passed to the \ - * generic deep_clear list member function so that when a list is cleared \ - *the \ - * elements on the list are properly destroyed from the base class, even \ - * though we don't use a virtual destructor function. \ - **********************************************************************/ \ - \ - DLLSYM void CLASSNAME##_c1_zapper( /*delete a link*/ \ - void *link) /*link to delete*/ \ - { \ - delete (CLASSNAME *)link; \ - } - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/doubleptr.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/doubleptr.h deleted file mode 100644 index 3c59e210..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/doubleptr.h +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2012 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: doubleptr.h -// Description: Double-ended pointer that keeps pointing correctly even -// when reallocated or copied. -// Author: Ray Smith -// Created: Wed Mar 14 12:22:57 PDT 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_DOUBLEPTR_H_ -#define TESSERACT_CCUTIL_DOUBLEPTR_H_ - -#include "errcode.h" - -namespace tesseract { - -// A smart pointer class that implements a double-ended pointer. Each end -// points to the other end. The copy constructor and operator= have MOVE -// semantics, meaning that the relationship with the other end moves to the -// destination of the copy, leaving the source unattached. -// For this reason both the copy constructor and the operator= take a non-const -// reference argument, and the const reference versions cannot be used. -// DoublePtr is useful to incorporate into structures that are part of a -// collection such as GenericVector or STL containers, where reallocs can -// relocate the members. DoublePtr is also useful in a GenericHeap, where it -// can correctly maintain the pointer to an element of the heap despite it -// getting moved around on the heap. -class DoublePtr { - public: - DoublePtr() : other_end_(nullptr) {} - // Copy constructor steals the partner off src and is therefore a non - // const reference arg. - // Copying a const DoublePtr generates a compiler error. - DoublePtr(DoublePtr& src) { - other_end_ = src.other_end_; - if (other_end_ != nullptr) { - other_end_->other_end_ = this; - src.other_end_ = nullptr; - } - } - // Operator= steals the partner off src, and therefore needs src to be a non- - // const reference. - // Assigning from a const DoublePtr generates a compiler error. - void operator=(DoublePtr& src) { - Disconnect(); - other_end_ = src.other_end_; - if (other_end_ != nullptr) { - other_end_->other_end_ = this; - src.other_end_ = nullptr; - } - } - - // Connects this and other, discarding any existing connections. - void Connect(DoublePtr* other) { - other->Disconnect(); - Disconnect(); - other->other_end_ = this; - other_end_ = other; - } - // Disconnects this and other, making OtherEnd() return nullptr for both. - void Disconnect() { - if (other_end_ != nullptr) { - other_end_->other_end_ = nullptr; - other_end_ = nullptr; - } - } - // Returns the pointer to the other end of the double pointer. - DoublePtr* OtherEnd() const { - return other_end_; - } - - private: - // Pointer to the other end of the link. It is always true that either - // other_end_ == nullptr or other_end_->other_end_ == this. - DoublePtr* other_end_; -}; - -} // namespace tesseract. - -#endif // THIRD_PARTY_TESSERACT_CCUTIL_DOUBLEPTR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/elst.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/elst.cpp deleted file mode 100644 index 550554d0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/elst.cpp +++ /dev/null @@ -1,459 +0,0 @@ -/********************************************************************** - * File: elst.cpp (Formerly elist.c) - * Description: Embedded list handling code which is not in the include file. - * Author: Phil Cheatle - * Created: Fri Jan 04 13:55:49 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include -#include "elst.h" - -/*********************************************************************** - * MEMBER FUNCTIONS OF CLASS: ELIST - * ================================ - **********************************************************************/ - -/*********************************************************************** - * ELIST::internal_clear - * - * Used by the destructor and the "clear" member function of derived list - * classes to destroy all the elements on the list. - * The calling function passes a "zapper" function which can be called to - * delete each element of the list, regardless of its derived type. This - * technique permits a generic clear function to destroy elements of - * different derived types correctly, without requiring virtual functions and - * the consequential memory overhead. - **********************************************************************/ - -void -ELIST::internal_clear ( //destroy all links -void (*zapper) (ELIST_LINK *)) { - //ptr to zapper functn - ELIST_LINK *ptr; - ELIST_LINK *next; - - if (!empty ()) { - ptr = last->next; //set to first - last->next = nullptr; //break circle - last = nullptr; //set list empty - while (ptr) { - next = ptr->next; - zapper(ptr); - ptr = next; - } - } -} - -/*********************************************************************** - * ELIST::assign_to_sublist - * - * The list is set to a sublist of another list. "This" list must be empty - * before this function is invoked. The two iterators passed must refer to - * the same list, different from "this" one. The sublist removed is the - * inclusive list from start_it's current position to end_it's current - * position. If this range passes over the end of the source list then the - * source list has its end set to the previous element of start_it. The - * extracted sublist is unaffected by the end point of the source list, its - * end point is always the end_it position. - **********************************************************************/ - -void ELIST::assign_to_sublist( //to this list - ELIST_ITERATOR *start_it, //from list start - ELIST_ITERATOR *end_it) { //from list end - const ERRCODE LIST_NOT_EMPTY = - "Destination list must be empty before extracting a sublist"; - - if (!empty ()) - LIST_NOT_EMPTY.error ("ELIST.assign_to_sublist", ABORT, nullptr); - - last = start_it->extract_sublist (end_it); -} - -/*********************************************************************** - * ELIST::length - * - * Return count of elements on list - **********************************************************************/ - -int32_t ELIST::length() const { // count elements - ELIST_ITERATOR it(const_cast(this)); - int32_t count = 0; - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - count++; - return count; -} - -/*********************************************************************** - * ELIST::sort - * - * Sort elements on list - * NB If you don't like the const declarations in the comparator, coerce yours: - * ( int (*)(const void *, const void *) - **********************************************************************/ - -void -ELIST::sort ( //sort elements -int comparator ( //comparison routine -const void *, const void *)) { - ELIST_ITERATOR it(this); - int32_t count; - ELIST_LINK **base; //ptr array to sort - ELIST_LINK **current; - int32_t i; - - /* Allocate an array of pointers, one per list element */ - count = length (); - base = (ELIST_LINK **) malloc (count * sizeof (ELIST_LINK *)); - - /* Extract all elements, putting the pointers in the array */ - current = base; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - *current = it.extract (); - current++; - } - - /* Sort the pointer array */ - qsort(base, count, sizeof(*base), comparator); - - /* Rebuild the list from the sorted pointers */ - current = base; - for (i = 0; i < count; i++) { - it.add_to_end (*current); - current++; - } - free(base); -} - -// Assuming list has been sorted already, insert new_link to -// keep the list sorted according to the same comparison function. -// Comparison function is the same as used by sort, i.e. uses double -// indirection. Time is O(1) to add to beginning or end. -// Time is linear to add pre-sorted items to an empty list. -// If unique is set to true and comparator() returns 0 (an entry with the -// same information as the one contained in new_link is already in the -// list) - new_link is not added to the list and the function returns the -// pointer to the identical entry that already exists in the list -// (otherwise the function returns new_link). -ELIST_LINK *ELIST::add_sorted_and_find( - int comparator(const void*, const void*), - bool unique, ELIST_LINK* new_link) { - // Check for adding at the end. - if (last == nullptr || comparator(&last, &new_link) < 0) { - if (last == nullptr) { - new_link->next = new_link; - } else { - new_link->next = last->next; - last->next = new_link; - } - last = new_link; - } else { - // Need to use an iterator. - ELIST_ITERATOR it(this); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ELIST_LINK* link = it.data(); - int compare = comparator(&link, &new_link); - if (compare > 0) { - break; - } else if (unique && compare == 0) { - return link; - } - } - if (it.cycled_list()) - it.add_to_end(new_link); - else - it.add_before_then_move(new_link); - } - return new_link; -} - -/*********************************************************************** - * MEMBER FUNCTIONS OF CLASS: ELIST_ITERATOR - * ========================================= - **********************************************************************/ - -/*********************************************************************** - * ELIST_ITERATOR::forward - * - * Move the iterator to the next element of the list. - * REMEMBER: ALL LISTS ARE CIRCULAR. - **********************************************************************/ - -ELIST_LINK *ELIST_ITERATOR::forward() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::forward", ABORT, nullptr); - #endif - if (list->empty ()) - return nullptr; - - if (current) { //not removed so - //set previous - prev = current; - started_cycling = TRUE; - // In case next is deleted by another iterator, get next from current. - current = current->next; - } else { - if (ex_current_was_cycle_pt) - cycle_pt = next; - current = next; - } -#ifndef NDEBUG - if (!current) - NULL_DATA.error ("ELIST_ITERATOR::forward", ABORT, nullptr); -#endif - next = current->next; - - #ifndef NDEBUG - if (!next) - NULL_NEXT.error ("ELIST_ITERATOR::forward", ABORT, - "This is: %p Current is: %p", this, current); - #endif - return current; -} - -/*********************************************************************** - * ELIST_ITERATOR::data_relative - * - * Return the data pointer to the element "offset" elements from current. - * "offset" must not be less than -1. - * (This function can't be INLINEd because it contains a loop) - **********************************************************************/ - -ELIST_LINK *ELIST_ITERATOR::data_relative( //get data + or - ... - int8_t offset) { //offset from current - ELIST_LINK *ptr; - - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::data_relative", ABORT, nullptr); - if (list->empty ()) - EMPTY_LIST.error ("ELIST_ITERATOR::data_relative", ABORT, nullptr); - if (offset < -1) - BAD_PARAMETER.error ("ELIST_ITERATOR::data_relative", ABORT, - "offset < -l"); - #endif - - if (offset == -1) - ptr = prev; - else - for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next); - - #ifndef NDEBUG - if (!ptr) - NULL_DATA.error ("ELIST_ITERATOR::data_relative", ABORT, nullptr); - #endif - - return ptr; -} - -/*********************************************************************** - * ELIST_ITERATOR::move_to_last() - * - * Move current so that it is set to the end of the list. - * Return data just in case anyone wants it. - * (This function can't be INLINEd because it contains a loop) - **********************************************************************/ - -ELIST_LINK *ELIST_ITERATOR::move_to_last() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::move_to_last", ABORT, nullptr); - #endif - - while (current != list->last) - forward(); - - return current; -} - -/*********************************************************************** - * ELIST_ITERATOR::exchange() - * - * Given another iterator, whose current element is a different element on - * the same list list OR an element of another list, exchange the two current - * elements. On return, each iterator points to the element which was the - * other iterators current on entry. - * (This function hasn't been in-lined because its a bit big!) - **********************************************************************/ - -void ELIST_ITERATOR::exchange( //positions of 2 links - ELIST_ITERATOR *other_it) { //other iterator - const ERRCODE DONT_EXCHANGE_DELETED = - "Can't exchange deleted elements of lists"; - - ELIST_LINK *old_current; - - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::exchange", ABORT, nullptr); - if (!other_it) - BAD_PARAMETER.error ("ELIST_ITERATOR::exchange", ABORT, "other_it nullptr"); - if (!(other_it->list)) - NO_LIST.error ("ELIST_ITERATOR::exchange", ABORT, "other_it"); - #endif - - /* Do nothing if either list is empty or if both iterators reference the same - link */ - - if ((list->empty ()) || - (other_it->list->empty ()) || (current == other_it->current)) - return; - - /* Error if either current element is deleted */ - - if (!current || !other_it->current) - DONT_EXCHANGE_DELETED.error ("ELIST_ITERATOR.exchange", ABORT, nullptr); - - /* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements - (other before this); non-doubleton adjacent elements (this before other); - non-adjacent elements. */ - - //adjacent links - if ((next == other_it->current) || - (other_it->next == current)) { - //doubleton list - if ((next == other_it->current) && - (other_it->next == current)) { - prev = next = current; - other_it->prev = other_it->next = other_it->current; - } - else { //non-doubleton with - //adjacent links - //other before this - if (other_it->next == current) { - other_it->prev->next = current; - other_it->current->next = next; - current->next = other_it->current; - other_it->next = other_it->current; - prev = current; - } - else { //this before other - prev->next = other_it->current; - current->next = other_it->next; - other_it->current->next = current; - next = current; - other_it->prev = other_it->current; - } - } - } - else { //no overlap - prev->next = other_it->current; - current->next = other_it->next; - other_it->prev->next = current; - other_it->current->next = next; - } - - /* update end of list pointer when necessary (remember that the 2 iterators - may iterate over different lists!) */ - - if (list->last == current) - list->last = other_it->current; - if (other_it->list->last == other_it->current) - other_it->list->last = current; - - if (current == cycle_pt) - cycle_pt = other_it->cycle_pt; - if (other_it->current == other_it->cycle_pt) - other_it->cycle_pt = cycle_pt; - - /* The actual exchange - in all cases*/ - - old_current = current; - current = other_it->current; - other_it->current = old_current; -} - -/*********************************************************************** - * ELIST_ITERATOR::extract_sublist() - * - * This is a private member, used only by ELIST::assign_to_sublist. - * Given another iterator for the same list, extract the links from THIS to - * OTHER inclusive, link them into a new circular list, and return a - * pointer to the last element. - * (Can't inline this function because it contains a loop) - **********************************************************************/ - -ELIST_LINK *ELIST_ITERATOR::extract_sublist( //from this current - ELIST_ITERATOR *other_it) { //to other current - #ifndef NDEBUG - const ERRCODE BAD_EXTRACTION_PTS = - "Can't extract sublist from points on different lists"; - const ERRCODE DONT_EXTRACT_DELETED = - "Can't extract a sublist marked by deleted points"; - #endif - const ERRCODE BAD_SUBLIST = "Can't find sublist end point in original list"; - - ELIST_ITERATOR temp_it = *this; - ELIST_LINK *end_of_new_list; - - #ifndef NDEBUG - if (!other_it) - BAD_PARAMETER.error ("ELIST_ITERATOR::extract_sublist", ABORT, - "other_it nullptr"); - if (!list) - NO_LIST.error ("ELIST_ITERATOR::extract_sublist", ABORT, nullptr); - if (list != other_it->list) - BAD_EXTRACTION_PTS.error ("ELIST_ITERATOR.extract_sublist", ABORT, nullptr); - if (list->empty ()) - EMPTY_LIST.error ("ELIST_ITERATOR::extract_sublist", ABORT, nullptr); - - if (!current || !other_it->current) - DONT_EXTRACT_DELETED.error ("ELIST_ITERATOR.extract_sublist", ABORT, - nullptr); - #endif - - ex_current_was_last = other_it->ex_current_was_last = FALSE; - ex_current_was_cycle_pt = FALSE; - other_it->ex_current_was_cycle_pt = FALSE; - - temp_it.mark_cycle_pt (); - do { //walk sublist - if (temp_it.cycled_list()) // can't find end pt - BAD_SUBLIST.error ("ELIST_ITERATOR.extract_sublist", ABORT, nullptr); - - if (temp_it.at_last ()) { - list->last = prev; - ex_current_was_last = other_it->ex_current_was_last = TRUE; - } - - if (temp_it.current == cycle_pt) - ex_current_was_cycle_pt = TRUE; - - if (temp_it.current == other_it->cycle_pt) - other_it->ex_current_was_cycle_pt = TRUE; - - temp_it.forward (); - } - while (temp_it.prev != other_it->current); - - //circularise sublist - other_it->current->next = current; - end_of_new_list = other_it->current; - - //sublist = whole list - if (prev == other_it->current) { - list->last = nullptr; - prev = current = next = nullptr; - other_it->prev = other_it->current = other_it->next = nullptr; - } - else { - prev->next = other_it->next; - current = other_it->current = nullptr; - next = other_it->next; - other_it->prev = prev; - } - return end_of_new_list; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/elst.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/elst.h deleted file mode 100644 index 78714ef1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/elst.h +++ /dev/null @@ -1,986 +0,0 @@ -/********************************************************************** - * File: elst.h (Formerly elist.h) - * Description: Embedded list module include file. - * Author: Phil Cheatle - * Created: Mon Jan 07 08:35:34 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef ELST_H -#define ELST_H - -#include -#include "host.h" -#include "serialis.h" -#include "lsterr.h" - -class ELIST_ITERATOR; - -/********************************************************************** -This module implements list classes and iterators. -The following list types and iterators are provided: - - List type List Class Iterator Class Element Class - --------- ---------- -------------- ------------- - - Embedded list ELIST - ELIST_ITERATOR - ELIST_LINK - (Single linked) - - Embedded list ELIST2 - ELIST2_ITERATOR - ELIST2_LINK - (Double linked) - - Cons List CLIST - CLIST_ITERATOR - CLIST_LINK - (Single linked) - - Cons List CLIST2 - CLIST2_ITERATOR - CLIST2_LINK - (Double linked) - -An embedded list is where the list pointers are provided by a generic class. -Data types to be listed inherit from the generic class. Data is thus linked -in only ONE list at any one time. - -A cons list has a separate structure for a "cons cell". This contains the -list pointer(s) AND a pointer to the data structure held on the list. A -structure can be on many cons lists at the same time, and the structure does -not need to inherit from any generic class in order to be on the list. - -The implementation of lists is very careful about space and speed overheads. -This is why many embedded lists are provided. The same concerns mean that -in-line type coercion is done, rather than use virtual functions. This is -cumbersome in that each data type to be listed requires its own iterator and -list class - though macros can generate these. It also prevents heterogeneous -lists. -**********************************************************************/ - -/********************************************************************** - * CLASS - ELIST_LINK - * - * Generic link class for singly linked lists with embedded links - * - * Note: No destructor - elements are assumed to be destroyed EITHER after - * they have been extracted from a list OR by the ELIST destructor which - * walks the list. - **********************************************************************/ - -class DLLSYM ELIST_LINK -{ - friend class ELIST_ITERATOR; - friend class ELIST; - - ELIST_LINK *next; - - public: - ELIST_LINK() { - next = nullptr; - } - //constructor - - ELIST_LINK(const ELIST_LINK &) { // don't copy link. - next = nullptr; - } - - void operator=( // don't copy links - const ELIST_LINK &) { - next = nullptr; - } -}; - -/********************************************************************** - * CLASS - ELIST - * - * Generic list class for singly linked lists with embedded links - **********************************************************************/ - -class DLLSYM ELIST -{ - friend class ELIST_ITERATOR; - - ELIST_LINK *last; //End of list - //(Points to head) - ELIST_LINK *First() { // return first - return last ? last->next : nullptr; - } - - public: - ELIST() { //constructor - last = nullptr; - } - - void internal_clear ( //destroy all links - //ptr to zapper functn - void (*zapper) (ELIST_LINK *)); - - bool empty() const { //is list empty? - return !last; - } - - bool singleton() const { - return last ? (last == last->next) : false; - } - - void shallow_copy( //dangerous!! - ELIST *from_list) { //beware destructors!! - last = from_list->last; - } - - //ptr to copier functn - void internal_deep_copy (ELIST_LINK * (*copier) (ELIST_LINK *), - const ELIST * list); //list being copied - - void assign_to_sublist( //to this list - ELIST_ITERATOR *start_it, //from list start - ELIST_ITERATOR *end_it); //from list end - - int32_t length() const; // # elements in list - - void sort ( //sort elements - int comparator ( //comparison routine - const void *, const void *)); - - // Assuming list has been sorted already, insert new_link to - // keep the list sorted according to the same comparison function. - // Comparison function is the same as used by sort, i.e. uses double - // indirection. Time is O(1) to add to beginning or end. - // Time is linear to add pre-sorted items to an empty list. - // If unique is set to true and comparator() returns 0 (an entry with the - // same information as the one contained in new_link is already in the - // list) - new_link is not added to the list and the function returns the - // pointer to the identical entry that already exists in the list - // (otherwise the function returns new_link). - ELIST_LINK *add_sorted_and_find(int comparator(const void*, const void*), - bool unique, ELIST_LINK* new_link); - - // Same as above, but returns true if the new entry was inserted, false - // if the identical entry already existed in the list. - bool add_sorted(int comparator(const void*, const void*), - bool unique, ELIST_LINK* new_link) { - return (add_sorted_and_find(comparator, unique, new_link) == new_link); - } - -}; - -/*********************************************************************** - * CLASS - ELIST_ITERATOR - * - * Generic iterator class for singly linked lists with embedded links - **********************************************************************/ - -class DLLSYM ELIST_ITERATOR -{ - friend void ELIST::assign_to_sublist(ELIST_ITERATOR *, ELIST_ITERATOR *); - - ELIST *list; //List being iterated - ELIST_LINK *prev; //prev element - ELIST_LINK *current; //current element - ELIST_LINK *next; //next element - bool ex_current_was_last; //current extracted - //was end of list - bool ex_current_was_cycle_pt; //current extracted - //was cycle point - ELIST_LINK *cycle_pt; //point we are cycling - //the list to. - bool started_cycling; //Have we moved off - //the start? - - ELIST_LINK *extract_sublist( //from this current... - ELIST_ITERATOR *other_it); //to other current - - public: - ELIST_ITERATOR() { //constructor - list = nullptr; - } //unassigned list - - explicit ELIST_ITERATOR(ELIST *list_to_iterate); - - void set_to_list( //change list - ELIST *list_to_iterate); - - void add_after_then_move( //add after current & - ELIST_LINK *new_link); //move to new - - void add_after_stay_put( //add after current & - ELIST_LINK *new_link); //stay at current - - void add_before_then_move( //add before current & - ELIST_LINK *new_link); //move to new - - void add_before_stay_put( //add before current & - ELIST_LINK *new_link); //stay at current - - void add_list_after( //add a list & - ELIST *list_to_add); //stay at current - - void add_list_before( //add a list & - ELIST *list_to_add); //move to it 1st item - - ELIST_LINK *data() { //get current data - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::data", ABORT, nullptr); - if (!current) - NULL_DATA.error ("ELIST_ITERATOR::data", ABORT, nullptr); - #endif - return current; - } - - ELIST_LINK *data_relative( //get data + or - ... - int8_t offset); //offset from current - - ELIST_LINK *forward(); //move to next element - - ELIST_LINK *extract(); //remove from list - - ELIST_LINK *move_to_first(); //go to start of list - - ELIST_LINK *move_to_last(); //go to end of list - - void mark_cycle_pt(); //remember current - - bool empty() { //is list empty? - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::empty", ABORT, nullptr); - #endif - return list->empty (); - } - - bool current_extracted() { //current extracted? - return !current; - } - - bool at_first(); //Current is first? - - bool at_last(); //Current is last? - - bool cycled_list(); //Completed a cycle? - - void add_to_end( // add at end & - ELIST_LINK *new_link); // don't move - - void exchange( //positions of 2 links - ELIST_ITERATOR *other_it); //other iterator - - int32_t length(); //# elements in list - - void sort ( //sort elements - int comparator ( //comparison routine - const void *, const void *)); - -}; - -/*********************************************************************** - * ELIST_ITERATOR::set_to_list - * - * (Re-)initialise the iterator to point to the start of the list_to_iterate - * over. - **********************************************************************/ - -inline void ELIST_ITERATOR::set_to_list( //change list - ELIST *list_to_iterate) { - #ifndef NDEBUG - if (!list_to_iterate) - BAD_PARAMETER.error ("ELIST_ITERATOR::set_to_list", ABORT, - "list_to_iterate is nullptr"); - #endif - - list = list_to_iterate; - prev = list->last; - current = list->First (); - next = current ? current->next : nullptr; - cycle_pt = nullptr; //await explicit set - started_cycling = FALSE; - ex_current_was_last = FALSE; - ex_current_was_cycle_pt = FALSE; -} - - -/*********************************************************************** - * ELIST_ITERATOR::ELIST_ITERATOR - * - * CONSTRUCTOR - set iterator to specified list; - **********************************************************************/ - -inline ELIST_ITERATOR::ELIST_ITERATOR(ELIST *list_to_iterate) { - set_to_list(list_to_iterate); -} - - -/*********************************************************************** - * ELIST_ITERATOR::add_after_then_move - * - * Add a new element to the list after the current element and move the - * iterator to the new element. - **********************************************************************/ - -inline void ELIST_ITERATOR::add_after_then_move( // element to add - ELIST_LINK *new_element) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::add_after_then_move", ABORT, nullptr); - if (!new_element) - BAD_PARAMETER.error ("ELIST_ITERATOR::add_after_then_move", ABORT, - "new_element is nullptr"); - if (new_element->next) - STILL_LINKED.error ("ELIST_ITERATOR::add_after_then_move", ABORT, nullptr); - #endif - - if (list->empty ()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - } - else { - new_element->next = next; - - if (current) { //not extracted - current->next = new_element; - prev = current; - if (current == list->last) - list->last = new_element; - } - else { //current extracted - prev->next = new_element; - if (ex_current_was_last) - list->last = new_element; - if (ex_current_was_cycle_pt) - cycle_pt = new_element; - } - } - current = new_element; -} - - -/*********************************************************************** - * ELIST_ITERATOR::add_after_stay_put - * - * Add a new element to the list after the current element but do not move - * the iterator to the new element. - **********************************************************************/ - -inline void ELIST_ITERATOR::add_after_stay_put( // element to add - ELIST_LINK *new_element) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::add_after_stay_put", ABORT, nullptr); - if (!new_element) - BAD_PARAMETER.error ("ELIST_ITERATOR::add_after_stay_put", ABORT, - "new_element is nullptr"); - if (new_element->next) - STILL_LINKED.error ("ELIST_ITERATOR::add_after_stay_put", ABORT, nullptr); - #endif - - if (list->empty ()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - ex_current_was_last = FALSE; - current = nullptr; - } - else { - new_element->next = next; - - if (current) { //not extracted - current->next = new_element; - if (prev == current) - prev = new_element; - if (current == list->last) - list->last = new_element; - } - else { //current extracted - prev->next = new_element; - if (ex_current_was_last) { - list->last = new_element; - ex_current_was_last = FALSE; - } - } - next = new_element; - } -} - - -/*********************************************************************** - * ELIST_ITERATOR::add_before_then_move - * - * Add a new element to the list before the current element and move the - * iterator to the new element. - **********************************************************************/ - -inline void ELIST_ITERATOR::add_before_then_move( // element to add - ELIST_LINK *new_element) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::add_before_then_move", ABORT, nullptr); - if (!new_element) - BAD_PARAMETER.error ("ELIST_ITERATOR::add_before_then_move", ABORT, - "new_element is nullptr"); - if (new_element->next) - STILL_LINKED.error ("ELIST_ITERATOR::add_before_then_move", ABORT, nullptr); - #endif - - if (list->empty ()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - } - else { - prev->next = new_element; - if (current) { //not extracted - new_element->next = current; - next = current; - } - else { //current extracted - new_element->next = next; - if (ex_current_was_last) - list->last = new_element; - if (ex_current_was_cycle_pt) - cycle_pt = new_element; - } - } - current = new_element; -} - -/*********************************************************************** - * ELIST_ITERATOR::add_before_stay_put - * - * Add a new element to the list before the current element but don't move the - * iterator to the new element. - **********************************************************************/ - -inline void ELIST_ITERATOR::add_before_stay_put( // element to add - ELIST_LINK *new_element) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::add_before_stay_put", ABORT, nullptr); - if (!new_element) - BAD_PARAMETER.error ("ELIST_ITERATOR::add_before_stay_put", ABORT, - "new_element is nullptr"); - if (new_element->next) - STILL_LINKED.error ("ELIST_ITERATOR::add_before_stay_put", ABORT, nullptr); - #endif - - if (list->empty ()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - ex_current_was_last = TRUE; - current = nullptr; - } - else { - prev->next = new_element; - if (current) { //not extracted - new_element->next = current; - if (next == current) - next = new_element; - } - else { //current extracted - new_element->next = next; - if (ex_current_was_last) - list->last = new_element; - } - prev = new_element; - } -} - -/*********************************************************************** - * ELIST_ITERATOR::add_list_after - * - * Insert another list to this list after the current element but don't move - *the - * iterator. - **********************************************************************/ - -inline void ELIST_ITERATOR::add_list_after(ELIST *list_to_add) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::add_list_after", ABORT, nullptr); - if (!list_to_add) - BAD_PARAMETER.error ("ELIST_ITERATOR::add_list_after", ABORT, - "list_to_add is nullptr"); - #endif - - if (!list_to_add->empty ()) { - if (list->empty ()) { - list->last = list_to_add->last; - prev = list->last; - next = list->First (); - ex_current_was_last = TRUE; - current = nullptr; - } - else { - if (current) { //not extracted - current->next = list_to_add->First (); - if (current == list->last) - list->last = list_to_add->last; - list_to_add->last->next = next; - next = current->next; - } - else { //current extracted - prev->next = list_to_add->First (); - if (ex_current_was_last) { - list->last = list_to_add->last; - ex_current_was_last = FALSE; - } - list_to_add->last->next = next; - next = prev->next; - } - } - list_to_add->last = nullptr; - } -} - - -/*********************************************************************** - * ELIST_ITERATOR::add_list_before - * - * Insert another list to this list before the current element. Move the - * iterator to the start of the inserted elements - * iterator. - **********************************************************************/ - -inline void ELIST_ITERATOR::add_list_before(ELIST *list_to_add) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::add_list_before", ABORT, nullptr); - if (!list_to_add) - BAD_PARAMETER.error ("ELIST_ITERATOR::add_list_before", ABORT, - "list_to_add is nullptr"); - #endif - - if (!list_to_add->empty ()) { - if (list->empty ()) { - list->last = list_to_add->last; - prev = list->last; - current = list->First (); - next = current->next; - ex_current_was_last = FALSE; - } - else { - prev->next = list_to_add->First (); - if (current) { //not extracted - list_to_add->last->next = current; - } - else { //current extracted - list_to_add->last->next = next; - if (ex_current_was_last) - list->last = list_to_add->last; - if (ex_current_was_cycle_pt) - cycle_pt = prev->next; - } - current = prev->next; - next = current->next; - } - list_to_add->last = nullptr; - } -} - - -/*********************************************************************** - * ELIST_ITERATOR::extract - * - * Do extraction by removing current from the list, returning it to the - * caller, but NOT updating the iterator. (So that any calling loop can do - * this.) The iterator's current points to nullptr. If the extracted element - * is to be deleted, this is the callers responsibility. - **********************************************************************/ - -inline ELIST_LINK *ELIST_ITERATOR::extract() { - ELIST_LINK *extracted_link; - - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::extract", ABORT, nullptr); - if (!current) //list empty or - //element extracted - NULL_CURRENT.error ("ELIST_ITERATOR::extract", - ABORT, nullptr); - #endif - - if (list->singleton()) { - // Special case where we do need to change the iterator. - prev = next = list->last = nullptr; - } else { - prev->next = next; //remove from list - - if (current == list->last) { - list->last = prev; - ex_current_was_last = TRUE; - } else { - ex_current_was_last = FALSE; - } - } - // Always set ex_current_was_cycle_pt so an add/forward will work in a loop. - ex_current_was_cycle_pt = (current == cycle_pt) ? TRUE : FALSE; - extracted_link = current; - extracted_link->next = nullptr; //for safety - current = nullptr; - return extracted_link; -} - - -/*********************************************************************** - * ELIST_ITERATOR::move_to_first() - * - * Move current so that it is set to the start of the list. - * Return data just in case anyone wants it. - **********************************************************************/ - -inline ELIST_LINK *ELIST_ITERATOR::move_to_first() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::move_to_first", ABORT, nullptr); - #endif - - current = list->First (); - prev = list->last; - next = current ? current->next : nullptr; - return current; -} - - -/*********************************************************************** - * ELIST_ITERATOR::mark_cycle_pt() - * - * Remember the current location so that we can tell whether we've returned - * to this point later. - * - * If the current point is deleted either now, or in the future, the cycle - * point will be set to the next item which is set to current. This could be - * by a forward, add_after_then_move or add_after_then_move. - **********************************************************************/ - -inline void ELIST_ITERATOR::mark_cycle_pt() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::mark_cycle_pt", ABORT, nullptr); - #endif - - if (current) - cycle_pt = current; - else - ex_current_was_cycle_pt = TRUE; - started_cycling = FALSE; -} - - -/*********************************************************************** - * ELIST_ITERATOR::at_first() - * - * Are we at the start of the list? - * - **********************************************************************/ - -inline bool ELIST_ITERATOR::at_first() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::at_first", ABORT, nullptr); - #endif - - //we're at a deleted - return ((list->empty ()) || (current == list->First ()) || ((current == nullptr) && - (prev == list->last) && //NON-last pt between - !ex_current_was_last)); //first and last -} - - -/*********************************************************************** - * ELIST_ITERATOR::at_last() - * - * Are we at the end of the list? - * - **********************************************************************/ - -inline bool ELIST_ITERATOR::at_last() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::at_last", ABORT, nullptr); - #endif - - //we're at a deleted - return ((list->empty ()) || (current == list->last) || ((current == nullptr) && - (prev == list->last) && //last point between - ex_current_was_last)); //first and last -} - - -/*********************************************************************** - * ELIST_ITERATOR::cycled_list() - * - * Have we returned to the cycle_pt since it was set? - * - **********************************************************************/ - -inline bool ELIST_ITERATOR::cycled_list() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::cycled_list", ABORT, nullptr); - #endif - - return ((list->empty ()) || ((current == cycle_pt) && started_cycling)); - -} - - -/*********************************************************************** - * ELIST_ITERATOR::length() - * - * Return the length of the list - * - **********************************************************************/ - -inline int32_t ELIST_ITERATOR::length() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::length", ABORT, nullptr); - #endif - - return list->length (); -} - - -/*********************************************************************** - * ELIST_ITERATOR::sort() - * - * Sort the elements of the list, then reposition at the start. - * - **********************************************************************/ - -inline void -ELIST_ITERATOR::sort ( //sort elements -int comparator ( //comparison routine -const void *, const void *)) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::sort", ABORT, nullptr); - #endif - - list->sort (comparator); - move_to_first(); -} - - -/*********************************************************************** - * ELIST_ITERATOR::add_to_end - * - * Add a new element to the end of the list without moving the iterator. - * This is provided because a single linked list cannot move to the last as - * the iterator couldn't set its prev pointer. Adding to the end is - * essential for implementing - queues. -**********************************************************************/ - -inline void ELIST_ITERATOR::add_to_end( // element to add - ELIST_LINK *new_element) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::add_to_end", ABORT, nullptr); - if (!new_element) - BAD_PARAMETER.error ("ELIST_ITERATOR::add_to_end", ABORT, - "new_element is nullptr"); - if (new_element->next) - STILL_LINKED.error ("ELIST_ITERATOR::add_to_end", ABORT, nullptr); - #endif - - if (this->at_last ()) { - this->add_after_stay_put (new_element); - } - else { - if (this->at_first ()) { - this->add_before_stay_put (new_element); - list->last = new_element; - } - else { //Iteratr is elsewhere - new_element->next = list->last->next; - list->last->next = new_element; - list->last = new_element; - } - } -} - - -/*********************************************************************** - ******************** MACROS ************************************** - ***********************************************************************/ - -/*********************************************************************** - QUOTE_IT MACRO DEFINITION - =========================== -Replace with "". may be an arbitrary number of tokens -***********************************************************************/ - -#define QUOTE_IT(parm) #parm - -/*********************************************************************** - ELISTIZE(CLASSNAME) MACRO - ============================ - -CLASSNAME is assumed to be the name of a class which has a baseclass of -ELIST_LINK. - -NOTE: Because we don't use virtual functions in the list code, the list code -will NOT work correctly for classes derived from this. - -The macros generate: - - An element deletion function: CLASSNAME##_zapper - - An E_LIST subclass: CLASSNAME##_LIST - - An E_LIST_ITERATOR subclass: CLASSNAME##_IT - -NOTE: Generated names are DELIBERATELY designed to clash with those for -ELIST2IZE but NOT with those for CLISTIZE and CLIST2IZE - -Two macros are provided: ELISTIZE and ELISTIZEH. -The ...IZEH macros just define the class names for use in .h files -The ...IZE macros define the code use in .c files -***********************************************************************/ - -/*********************************************************************** - ELISTIZEH(CLASSNAME) MACRO - -ELISTIZEH is a concatenation of 3 fragments ELISTIZEH_A, ELISTIZEH_B and -ELISTIZEH_C. -***********************************************************************/ - -#define ELISTIZEH_A(CLASSNAME) \ - \ -extern DLLSYM void CLASSNAME##_zapper(ELIST_LINK* link); - -#define ELISTIZEH_B(CLASSNAME) \ - \ -/*********************************************************************** \ -* CLASS - CLASSNAME##_LIST \ -* \ -* List class for class CLASSNAME \ -* \ -**********************************************************************/ \ - \ -class DLLSYM CLASSNAME##_LIST : public ELIST { \ - public: \ - CLASSNAME##_LIST():ELIST() {} \ - \ - void clear() { /* delete elements */\ - ELIST::internal_clear(&CLASSNAME##_zapper); \ - } \ - \ - ~CLASSNAME##_LIST() { \ - clear(); \ - } \ - \ - /* Become a deep copy of src_list*/ \ - void deep_copy(const CLASSNAME##_LIST* src_list, \ - CLASSNAME* (*copier)(const CLASSNAME*)); \ - \ -private: \ - /* Prevent assign and copy construction. */ \ - CLASSNAME##_LIST(const CLASSNAME##_LIST&) { \ - DONT_CONSTRUCT_LIST_BY_COPY.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, nullptr);\ - } \ - void operator=(const CLASSNAME##_LIST&) { \ - DONT_ASSIGN_LISTS.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, nullptr); \ - } \ - -#define ELISTIZEH_C(CLASSNAME) \ -}; \ - \ - \ - \ -/*********************************************************************** \ -* CLASS - CLASSNAME##_IT \ -* \ -* Iterator class for class CLASSNAME##_LIST \ -* \ -* Note: We don't need to coerce pointers to member functions input \ -* parameters as these are automatically converted to the type of the base \ -* type. ("A ptr to a class may be converted to a pointer to a public base \ -* class of that class") \ -**********************************************************************/ \ - \ -class DLLSYM CLASSNAME##_IT : public ELIST_ITERATOR { \ - public: \ - CLASSNAME##_IT():ELIST_ITERATOR(){} \ - \ - /* TODO(rays) This constructor should be explicit, but that means changing \ - hundreds of incorrect initializations of iterators that use = over () */ \ - CLASSNAME##_IT(CLASSNAME##_LIST* list) : ELIST_ITERATOR(list) {} \ - \ - CLASSNAME* data() { \ - return reinterpret_cast(ELIST_ITERATOR::data()); \ - } \ - \ - CLASSNAME* data_relative(int8_t offset) { \ - return reinterpret_cast(ELIST_ITERATOR::data_relative(offset));\ - } \ - \ - CLASSNAME* forward() { \ - return reinterpret_cast(ELIST_ITERATOR::forward()); \ - } \ - \ - CLASSNAME* extract() { \ - return reinterpret_cast(ELIST_ITERATOR::extract()); \ - } \ - \ - CLASSNAME* move_to_first() { \ - return reinterpret_cast(ELIST_ITERATOR::move_to_first()); \ - } \ - \ - CLASSNAME* move_to_last() { \ - return reinterpret_cast(ELIST_ITERATOR::move_to_last()); \ - } \ -}; - -#define ELISTIZEH(CLASSNAME) \ - \ -ELISTIZEH_A(CLASSNAME) \ - \ -ELISTIZEH_B(CLASSNAME) \ - \ -ELISTIZEH_C(CLASSNAME) - - -/*********************************************************************** - ELISTIZE(CLASSNAME) MACRO -***********************************************************************/ - -#define ELISTIZE(CLASSNAME) \ - \ - /*********************************************************************** \ - * CLASSNAME##_zapper \ - * \ - * A function which can delete a CLASSNAME element. This is passed to the \ - * generic clear list member function so that when a list is cleared the \ - * elements on the list are properly destroyed from the base class, even \ - * though we don't use a virtual destructor function. \ - **********************************************************************/ \ - \ - DLLSYM void CLASSNAME##_zapper(ELIST_LINK *link) { \ - delete reinterpret_cast(link); \ - } \ - \ - /* Become a deep copy of src_list*/ \ - void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST *src_list, \ - CLASSNAME *(*copier)(const CLASSNAME *)) { \ - CLASSNAME##_IT from_it(const_cast(src_list)); \ - CLASSNAME##_IT to_it(this); \ - \ - for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) \ - to_it.add_after_then_move((*copier)(from_it.data())); \ - } - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/elst2.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/elst2.cpp deleted file mode 100644 index cb719078..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/elst2.cpp +++ /dev/null @@ -1,488 +0,0 @@ -/********************************************************************** - * File: elst2.cpp (Formerly elist2.c) - * Description: Doubly linked embedded list code not in the include file. - * Author: Phil Cheatle - * Created: Wed Jan 23 11:04:47 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include -#include "host.h" -#include "elst2.h" - -/*********************************************************************** - * MEMBER FUNCTIONS OF CLASS: ELIST2 - * ================================= - **********************************************************************/ - -/*********************************************************************** - * ELIST2::internal_clear - * - * Used by the destructor and the "clear" member function of derived list - * classes to destroy all the elements on the list. - * The calling function passes a "zapper" function which can be called to - * delete each element of the list, regardless of its derived type. This - * technique permits a generic clear function to destroy elements of - * different derived types correctly, without requiring virtual functions and - * the consequential memory overhead. - **********************************************************************/ - -void -ELIST2::internal_clear ( //destroy all links -void (*zapper) (ELIST2_LINK *)) { - //ptr to zapper functn - ELIST2_LINK *ptr; - ELIST2_LINK *next; - - if (!empty ()) { - ptr = last->next; //set to first - last->next = nullptr; //break circle - last = nullptr; //set list empty - while (ptr) { - next = ptr->next; - zapper(ptr); - ptr = next; - } - } -} - -/*********************************************************************** - * ELIST2::assign_to_sublist - * - * The list is set to a sublist of another list. "This" list must be empty - * before this function is invoked. The two iterators passed must refer to - * the same list, different from "this" one. The sublist removed is the - * inclusive list from start_it's current position to end_it's current - * position. If this range passes over the end of the source list then the - * source list has its end set to the previous element of start_it. The - * extracted sublist is unaffected by the end point of the source list, its - * end point is always the end_it position. - **********************************************************************/ - -void ELIST2::assign_to_sublist( //to this list - ELIST2_ITERATOR *start_it, //from list start - ELIST2_ITERATOR *end_it) { //from list end - const ERRCODE LIST_NOT_EMPTY = - "Destination list must be empty before extracting a sublist"; - - if (!empty ()) - LIST_NOT_EMPTY.error ("ELIST2.assign_to_sublist", ABORT, nullptr); - - last = start_it->extract_sublist (end_it); -} - -/*********************************************************************** - * ELIST2::length - * - * Return count of elements on list - **********************************************************************/ - -int32_t ELIST2::length() const { // count elements - ELIST2_ITERATOR it(const_cast(this)); - int32_t count = 0; - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - count++; - return count; -} - -/*********************************************************************** - * ELIST2::sort - * - * Sort elements on list - * NB If you don't like the const declarations in the comparator, coerce yours: - * (int (*)(const void *, const void *) - **********************************************************************/ - -void -ELIST2::sort ( //sort elements -int comparator ( //comparison routine -const void *, const void *)) { - ELIST2_ITERATOR it(this); - int32_t count; - ELIST2_LINK **base; //ptr array to sort - ELIST2_LINK **current; - int32_t i; - - /* Allocate an array of pointers, one per list element */ - count = length (); - base = (ELIST2_LINK **) malloc (count * sizeof (ELIST2_LINK *)); - - /* Extract all elements, putting the pointers in the array */ - current = base; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - *current = it.extract (); - current++; - } - - /* Sort the pointer array */ - qsort(base, count, sizeof(*base), comparator); - - /* Rebuild the list from the sorted pointers */ - current = base; - for (i = 0; i < count; i++) { - it.add_to_end (*current); - current++; - } - free(base); -} - -// Assuming list has been sorted already, insert new_link to -// keep the list sorted according to the same comparison function. -// Comparison function is the same as used by sort, i.e. uses double -// indirection. Time is O(1) to add to beginning or end. -// Time is linear to add pre-sorted items to an empty list. -void ELIST2::add_sorted(int comparator(const void*, const void*), - ELIST2_LINK* new_link) { - // Check for adding at the end. - if (last == nullptr || comparator(&last, &new_link) < 0) { - if (last == nullptr) { - new_link->next = new_link; - new_link->prev = new_link; - } else { - new_link->next = last->next; - new_link->prev = last; - last->next = new_link; - new_link->next->prev = new_link; - } - last = new_link; - } else { - // Need to use an iterator. - ELIST2_ITERATOR it(this); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ELIST2_LINK* link = it.data(); - if (comparator(&link, &new_link) > 0) - break; - } - if (it.cycled_list()) - it.add_to_end(new_link); - else - it.add_before_then_move(new_link); - } -} - -/*********************************************************************** - * MEMBER FUNCTIONS OF CLASS: ELIST2_ITERATOR - * ========================================== - **********************************************************************/ - -/*********************************************************************** - * ELIST2_ITERATOR::forward - * - * Move the iterator to the next element of the list. - * REMEMBER: ALL LISTS ARE CIRCULAR. - **********************************************************************/ - -ELIST2_LINK *ELIST2_ITERATOR::forward() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::forward", ABORT, nullptr); - #endif - if (list->empty ()) - return nullptr; - - if (current) { //not removed so - //set previous - prev = current; - started_cycling = TRUE; - // In case next is deleted by another iterator, get it from the current. - current = current->next; - } - else { - if (ex_current_was_cycle_pt) - cycle_pt = next; - current = next; - } - -#ifndef NDEBUG - if (!current) - NULL_DATA.error ("ELIST2_ITERATOR::forward", ABORT, nullptr); -#endif - - next = current->next; - -#ifndef NDEBUG - if (!next) - NULL_NEXT.error ("ELIST2_ITERATOR::forward", ABORT, - "This is: %p Current is: %p", this, current); -#endif - - return current; -} - -/*********************************************************************** - * ELIST2_ITERATOR::backward - * - * Move the iterator to the previous element of the list. - * REMEMBER: ALL LISTS ARE CIRCULAR. - **********************************************************************/ - -ELIST2_LINK *ELIST2_ITERATOR::backward() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::backward", ABORT, nullptr); - #endif - if (list->empty ()) - return nullptr; - - if (current) { //not removed so - //set previous - next = current; - started_cycling = TRUE; - // In case prev is deleted by another iterator, get it from current. - current = current->prev; - } else { - if (ex_current_was_cycle_pt) - cycle_pt = prev; - current = prev; - } - - #ifndef NDEBUG - if (!current) - NULL_DATA.error ("ELIST2_ITERATOR::backward", ABORT, nullptr); - if (!prev) - NULL_PREV.error ("ELIST2_ITERATOR::backward", ABORT, - "This is: %p Current is: %p", this, current); - #endif - - prev = current->prev; - return current; -} - -/*********************************************************************** - * ELIST2_ITERATOR::data_relative - * - * Return the data pointer to the element "offset" elements from current. - * (This function can't be INLINEd because it contains a loop) - **********************************************************************/ - -ELIST2_LINK *ELIST2_ITERATOR::data_relative( //get data + or - .. - int8_t offset) { //offset from current - ELIST2_LINK *ptr; - - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::data_relative", ABORT, nullptr); - if (list->empty ()) - EMPTY_LIST.error ("ELIST2_ITERATOR::data_relative", ABORT, nullptr); - #endif - - if (offset < 0) - for (ptr = current ? current : next; offset++ < 0; ptr = ptr->prev); - else - for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next); - - #ifndef NDEBUG - if (!ptr) - NULL_DATA.error ("ELIST2_ITERATOR::data_relative", ABORT, nullptr); - #endif - - return ptr; -} - -/*********************************************************************** - * ELIST2_ITERATOR::exchange() - * - * Given another iterator, whose current element is a different element on - * the same list list OR an element of another list, exchange the two current - * elements. On return, each iterator points to the element which was the - * other iterators current on entry. - * (This function hasn't been in-lined because its a bit big!) - **********************************************************************/ - -void ELIST2_ITERATOR::exchange( //positions of 2 links - ELIST2_ITERATOR *other_it) { //other iterator - const ERRCODE DONT_EXCHANGE_DELETED = - "Can't exchange deleted elements of lists"; - - ELIST2_LINK *old_current; - - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::exchange", ABORT, nullptr); - if (!other_it) - BAD_PARAMETER.error ("ELIST2_ITERATOR::exchange", ABORT, "other_it nullptr"); - if (!(other_it->list)) - NO_LIST.error ("ELIST2_ITERATOR::exchange", ABORT, "other_it"); - #endif - - /* Do nothing if either list is empty or if both iterators reference the same - link */ - - if ((list->empty ()) || - (other_it->list->empty ()) || (current == other_it->current)) - return; - - /* Error if either current element is deleted */ - - if (!current || !other_it->current) - DONT_EXCHANGE_DELETED.error ("ELIST2_ITERATOR.exchange", ABORT, nullptr); - - /* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements - (other before this); non-doubleton adjacent elements (this before other); - non-adjacent elements. */ - - //adjacent links - if ((next == other_it->current) || - (other_it->next == current)) { - //doubleton list - if ((next == other_it->current) && - (other_it->next == current)) { - prev = next = current; - other_it->prev = other_it->next = other_it->current; - } - else { //non-doubleton with - //adjacent links - //other before this - if (other_it->next == current) { - other_it->prev->next = current; - other_it->current->next = next; - other_it->current->prev = current; - current->next = other_it->current; - current->prev = other_it->prev; - next->prev = other_it->current; - - other_it->next = other_it->current; - prev = current; - } - else { //this before other - prev->next = other_it->current; - current->next = other_it->next; - current->prev = other_it->current; - other_it->current->next = current; - other_it->current->prev = prev; - other_it->next->prev = current; - - next = current; - other_it->prev = other_it->current; - } - } - } - else { //no overlap - prev->next = other_it->current; - current->next = other_it->next; - current->prev = other_it->prev; - next->prev = other_it->current; - other_it->prev->next = current; - other_it->current->next = next; - other_it->current->prev = prev; - other_it->next->prev = current; - } - - /* update end of list pointer when necessary (remember that the 2 iterators - may iterate over different lists!) */ - - if (list->last == current) - list->last = other_it->current; - if (other_it->list->last == other_it->current) - other_it->list->last = current; - - if (current == cycle_pt) - cycle_pt = other_it->cycle_pt; - if (other_it->current == other_it->cycle_pt) - other_it->cycle_pt = cycle_pt; - - /* The actual exchange - in all cases*/ - - old_current = current; - current = other_it->current; - other_it->current = old_current; -} - -/*********************************************************************** - * ELIST2_ITERATOR::extract_sublist() - * - * This is a private member, used only by ELIST2::assign_to_sublist. - * Given another iterator for the same list, extract the links from THIS to - * OTHER inclusive, link them into a new circular list, and return a - * pointer to the last element. - * (Can't inline this function because it contains a loop) - **********************************************************************/ - -ELIST2_LINK *ELIST2_ITERATOR::extract_sublist( //from this current - ELIST2_ITERATOR *other_it) { //to other current - #ifndef NDEBUG - const ERRCODE BAD_EXTRACTION_PTS = - "Can't extract sublist from points on different lists"; - const ERRCODE DONT_EXTRACT_DELETED = - "Can't extract a sublist marked by deleted points"; - #endif - const ERRCODE BAD_SUBLIST = "Can't find sublist end point in original list"; - - ELIST2_ITERATOR temp_it = *this; - ELIST2_LINK *end_of_new_list; - - #ifndef NDEBUG - if (!other_it) - BAD_PARAMETER.error ("ELIST2_ITERATOR::extract_sublist", ABORT, - "other_it nullptr"); - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::extract_sublist", ABORT, nullptr); - if (list != other_it->list) - BAD_EXTRACTION_PTS.error ("ELIST2_ITERATOR.extract_sublist", ABORT, nullptr); - if (list->empty ()) - EMPTY_LIST.error ("ELIST2_ITERATOR::extract_sublist", ABORT, nullptr); - - if (!current || !other_it->current) - DONT_EXTRACT_DELETED.error ("ELIST2_ITERATOR.extract_sublist", ABORT, - nullptr); - #endif - - ex_current_was_last = other_it->ex_current_was_last = false; - ex_current_was_cycle_pt = false; - other_it->ex_current_was_cycle_pt = false; - - temp_it.mark_cycle_pt (); - do { //walk sublist - if (temp_it.cycled_list()) // can't find end pt - BAD_SUBLIST.error ("ELIST2_ITERATOR.extract_sublist", ABORT, nullptr); - - if (temp_it.at_last ()) { - list->last = prev; - ex_current_was_last = other_it->ex_current_was_last = true; - } - - if (temp_it.current == cycle_pt) - ex_current_was_cycle_pt = true; - - if (temp_it.current == other_it->cycle_pt) - other_it->ex_current_was_cycle_pt = true; - - temp_it.forward (); - } - //do INCLUSIVE list - while (temp_it.prev != other_it->current); - - //circularise sublist - other_it->current->next = current; - //circularise sublist - current->prev = other_it->current; - end_of_new_list = other_it->current; - - //sublist = whole list - if (prev == other_it->current) { - list->last = nullptr; - prev = current = next = nullptr; - other_it->prev = other_it->current = other_it->next = nullptr; - } - else { - prev->next = other_it->next; - other_it->next->prev = prev; - - current = other_it->current = nullptr; - next = other_it->next; - other_it->prev = prev; - } - return end_of_new_list; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/elst2.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/elst2.h deleted file mode 100644 index f119ed6a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/elst2.h +++ /dev/null @@ -1,988 +0,0 @@ -/********************************************************************** - * File: elst2.h (Formerly elist2.h) - * Description: Double linked embedded list module include file. - * Author: Phil Cheatle - * Created: Wed Jan 23 11:04:47 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef ELST2_H -#define ELST2_H - -#include -#include "host.h" -#include "serialis.h" -#include "lsterr.h" - -class ELIST2_ITERATOR; - -/********************************************************************** -DESIGN NOTE -=========== - -It would probably be possible to implement the ELIST2 classes as derived -classes from ELIST. I haven't done this because: - -a) I think it would be harder to understand the code -(Though the problem with not inheriting is that changes to ELIST must be - reflected in ELIST2 and vice versa) - -b) Most of the code is inline so: -i) The duplication in source does not affect the run time code size - the - code is copied inline anyway! - - ii) The compiler should have a bit less work to do! -**********************************************************************/ - -/********************************************************************** - * CLASS - ELIST2_LINK - * - * Generic link class for doubly linked lists with embedded links - * - * Note: No destructor - elements are assumed to be destroyed EITHER after - * they have been extracted from a list OR by the ELIST2 destructor which - * walks the list. - **********************************************************************/ - -class DLLSYM ELIST2_LINK -{ - friend class ELIST2_ITERATOR; - friend class ELIST2; - - ELIST2_LINK *prev; - ELIST2_LINK *next; - - public: - ELIST2_LINK() { //constructor - prev = next = nullptr; - } - - ELIST2_LINK( // copy constructor - const ELIST2_LINK &) { // don't copy link - prev = next = nullptr; - } - - void operator=( // don't copy links - const ELIST2_LINK &) { - prev = next = nullptr; - } -}; - -/********************************************************************** - * CLASS - ELIST2 - * - * Generic list class for doubly linked lists with embedded links - **********************************************************************/ - -class DLLSYM ELIST2 -{ - friend class ELIST2_ITERATOR; - - ELIST2_LINK *last; //End of list - //(Points to head) - ELIST2_LINK *First() { // return first - return last ? last->next : nullptr; - } - - public: - ELIST2() { //constructor - last = nullptr; - } - - void internal_clear ( //destroy all links - void (*zapper) (ELIST2_LINK *)); - //ptr to zapper functn - - bool empty() const { //is list empty? - return !last; - } - - bool singleton() const { - return last ? (last == last->next) : false; - } - - void shallow_copy( //dangerous!! - ELIST2 *from_list) { //beware destructors!! - last = from_list->last; - } - - //ptr to copier functn - void internal_deep_copy (ELIST2_LINK * (*copier) (ELIST2_LINK *), - const ELIST2 * list); //list being copied - - void assign_to_sublist( //to this list - ELIST2_ITERATOR *start_it, //from list start - ELIST2_ITERATOR *end_it); //from list end - - int32_t length() const; // # elements in list - - void sort ( //sort elements - int comparator ( //comparison routine - const void *, const void *)); - - // Assuming list has been sorted already, insert new_link to - // keep the list sorted according to the same comparison function. - // Comparison function is the same as used by sort, i.e. uses double - // indirection. Time is O(1) to add to beginning or end. - // Time is linear to add pre-sorted items to an empty list. - void add_sorted(int comparator(const void*, const void*), - ELIST2_LINK* new_link); - -}; - -/*********************************************************************** - * CLASS - ELIST2_ITERATOR - * - * Generic iterator class for doubly linked lists with embedded - *links - **********************************************************************/ - -class DLLSYM ELIST2_ITERATOR -{ - friend void ELIST2::assign_to_sublist(ELIST2_ITERATOR *, ELIST2_ITERATOR *); - - ELIST2 *list; //List being iterated - ELIST2_LINK *prev; //prev element - ELIST2_LINK *current; //current element - ELIST2_LINK *next; //next element - bool ex_current_was_last; //current extracted - //was end of list - bool ex_current_was_cycle_pt; //current extracted - //was cycle point - ELIST2_LINK *cycle_pt; //point we are cycling - //the list to. - bool started_cycling; //Have we moved off - //the start? - - ELIST2_LINK *extract_sublist( //from this current... - ELIST2_ITERATOR *other_it); //to other current - - public: - ELIST2_ITERATOR( //constructor - ELIST2 *list_to_iterate); - - void set_to_list( //change list - ELIST2 *list_to_iterate); - - void add_after_then_move( //add after current & - ELIST2_LINK *new_link); //move to new - - void add_after_stay_put( //add after current & - ELIST2_LINK *new_link); //stay at current - - void add_before_then_move( //add before current & - ELIST2_LINK *new_link); //move to new - - void add_before_stay_put( //add before current & - ELIST2_LINK *new_link); //stay at current - - void add_list_after( //add a list & - ELIST2 *list_to_add); //stay at current - - void add_list_before( //add a list & - ELIST2 *list_to_add); //move to it 1st item - - ELIST2_LINK *data() { //get current data - #ifndef NDEBUG - if (!current) - NULL_DATA.error ("ELIST2_ITERATOR::data", ABORT, nullptr); - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::data", ABORT, nullptr); - #endif - return current; - } - - ELIST2_LINK *data_relative( //get data + or - ... - int8_t offset); //offset from current - - ELIST2_LINK *forward(); //move to next element - - ELIST2_LINK *backward(); //move to prev element - - ELIST2_LINK *extract(); //remove from list - - //go to start of list - ELIST2_LINK *move_to_first(); - - ELIST2_LINK *move_to_last(); //go to end of list - - void mark_cycle_pt(); //remember current - - bool empty() { //is list empty? - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::empty", ABORT, nullptr); - #endif - return list->empty (); - } - - bool current_extracted() { //current extracted? - return !current; - } - - bool at_first(); //Current is first? - - bool at_last(); //Current is last? - - bool cycled_list(); //Completed a cycle? - - void add_to_end( // add at end & - ELIST2_LINK *new_link); // don't move - - void exchange( //positions of 2 links - ELIST2_ITERATOR *other_it); //other iterator - - int32_t length(); //# elements in list - - void sort ( //sort elements - int comparator ( //comparison routine - const void *, const void *)); - - private: - // Don't use the following constructor. - ELIST2_ITERATOR(); -}; - -/*********************************************************************** - * ELIST2_ITERATOR::set_to_list - * - * (Re-)initialise the iterator to point to the start of the list_to_iterate - * over. - **********************************************************************/ - -inline void ELIST2_ITERATOR::set_to_list( //change list - ELIST2 *list_to_iterate) { - #ifndef NDEBUG - if (!list_to_iterate) - BAD_PARAMETER.error ("ELIST2_ITERATOR::set_to_list", ABORT, - "list_to_iterate is nullptr"); - #endif - - list = list_to_iterate; - prev = list->last; - current = list->First (); - next = current ? current->next : nullptr; - cycle_pt = nullptr; //await explicit set - started_cycling = false; - ex_current_was_last = false; - ex_current_was_cycle_pt = false; -} - -/*********************************************************************** - * ELIST2_ITERATOR::ELIST2_ITERATOR - * - * CONSTRUCTOR - set iterator to specified list; - **********************************************************************/ - -inline ELIST2_ITERATOR::ELIST2_ITERATOR(ELIST2 *list_to_iterate) { - set_to_list(list_to_iterate); -} - -/*********************************************************************** - * ELIST2_ITERATOR::add_after_then_move - * - * Add a new element to the list after the current element and move the - * iterator to the new element. - **********************************************************************/ - -inline void ELIST2_ITERATOR::add_after_then_move( // element to add - ELIST2_LINK *new_element) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::add_after_then_move", ABORT, nullptr); - if (!new_element) - BAD_PARAMETER.error ("ELIST2_ITERATOR::add_after_then_move", ABORT, - "new_element is nullptr"); - if (new_element->next) - STILL_LINKED.error ("ELIST2_ITERATOR::add_after_then_move", ABORT, nullptr); - #endif - - if (list->empty ()) { - new_element->next = new_element; - new_element->prev = new_element; - list->last = new_element; - prev = next = new_element; - } - else { - new_element->next = next; - next->prev = new_element; - - if (current) { //not extracted - new_element->prev = current; - current->next = new_element; - prev = current; - if (current == list->last) - list->last = new_element; - } - else { //current extracted - new_element->prev = prev; - prev->next = new_element; - if (ex_current_was_last) - list->last = new_element; - if (ex_current_was_cycle_pt) - cycle_pt = new_element; - } - } - current = new_element; -} - -/*********************************************************************** - * ELIST2_ITERATOR::add_after_stay_put - * - * Add a new element to the list after the current element but do not move - * the iterator to the new element. - **********************************************************************/ - -inline void ELIST2_ITERATOR::add_after_stay_put( // element to add - ELIST2_LINK *new_element) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::add_after_stay_put", ABORT, nullptr); - if (!new_element) - BAD_PARAMETER.error ("ELIST2_ITERATOR::add_after_stay_put", ABORT, - "new_element is nullptr"); - if (new_element->next) - STILL_LINKED.error ("ELIST2_ITERATOR::add_after_stay_put", ABORT, nullptr); - #endif - - if (list->empty ()) { - new_element->next = new_element; - new_element->prev = new_element; - list->last = new_element; - prev = next = new_element; - ex_current_was_last = false; - current = nullptr; - } - else { - new_element->next = next; - next->prev = new_element; - - if (current) { //not extracted - new_element->prev = current; - current->next = new_element; - if (prev == current) - prev = new_element; - if (current == list->last) - list->last = new_element; - } - else { //current extracted - new_element->prev = prev; - prev->next = new_element; - if (ex_current_was_last) { - list->last = new_element; - ex_current_was_last = false; - } - } - next = new_element; - } -} - -/*********************************************************************** - * ELIST2_ITERATOR::add_before_then_move - * - * Add a new element to the list before the current element and move the - * iterator to the new element. - **********************************************************************/ - -inline void ELIST2_ITERATOR::add_before_then_move( // element to add - ELIST2_LINK *new_element) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::add_before_then_move", ABORT, nullptr); - if (!new_element) - BAD_PARAMETER.error ("ELIST2_ITERATOR::add_before_then_move", ABORT, - "new_element is nullptr"); - if (new_element->next) - STILL_LINKED.error ("ELIST2_ITERATOR::add_before_then_move", ABORT, nullptr); - #endif - - if (list->empty ()) { - new_element->next = new_element; - new_element->prev = new_element; - list->last = new_element; - prev = next = new_element; - } - else { - prev->next = new_element; - new_element->prev = prev; - - if (current) { //not extracted - new_element->next = current; - current->prev = new_element; - next = current; - } - else { //current extracted - new_element->next = next; - next->prev = new_element; - if (ex_current_was_last) - list->last = new_element; - if (ex_current_was_cycle_pt) - cycle_pt = new_element; - } - } - current = new_element; -} - -/*********************************************************************** - * ELIST2_ITERATOR::add_before_stay_put - * - * Add a new element to the list before the current element but don't move the - * iterator to the new element. - **********************************************************************/ - -inline void ELIST2_ITERATOR::add_before_stay_put( // element to add - ELIST2_LINK *new_element) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::add_before_stay_put", ABORT, nullptr); - if (!new_element) - BAD_PARAMETER.error ("ELIST2_ITERATOR::add_before_stay_put", ABORT, - "new_element is nullptr"); - if (new_element->next) - STILL_LINKED.error ("ELIST2_ITERATOR::add_before_stay_put", ABORT, nullptr); - #endif - - if (list->empty ()) { - new_element->next = new_element; - new_element->prev = new_element; - list->last = new_element; - prev = next = new_element; - ex_current_was_last = true; - current = nullptr; - } - else { - prev->next = new_element; - new_element->prev = prev; - - if (current) { //not extracted - new_element->next = current; - current->prev = new_element; - if (next == current) - next = new_element; - } - else { //current extracted - new_element->next = next; - next->prev = new_element; - if (ex_current_was_last) - list->last = new_element; - } - prev = new_element; - } -} - -/*********************************************************************** - * ELIST2_ITERATOR::add_list_after - * - * Insert another list to this list after the current element but don't move - *the - * iterator. - **********************************************************************/ - -inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::add_list_after", ABORT, nullptr); - if (!list_to_add) - BAD_PARAMETER.error ("ELIST2_ITERATOR::add_list_after", ABORT, - "list_to_add is nullptr"); - #endif - - if (!list_to_add->empty ()) { - if (list->empty ()) { - list->last = list_to_add->last; - prev = list->last; - next = list->First (); - ex_current_was_last = true; - current = nullptr; - } - else { - if (current) { //not extracted - current->next = list_to_add->First (); - current->next->prev = current; - if (current == list->last) - list->last = list_to_add->last; - list_to_add->last->next = next; - next->prev = list_to_add->last; - next = current->next; - } - else { //current extracted - prev->next = list_to_add->First (); - prev->next->prev = prev; - if (ex_current_was_last) { - list->last = list_to_add->last; - ex_current_was_last = false; - } - list_to_add->last->next = next; - next->prev = list_to_add->last; - next = prev->next; - } - } - list_to_add->last = nullptr; - } -} - -/*********************************************************************** - * ELIST2_ITERATOR::add_list_before - * - * Insert another list to this list before the current element. Move the - * iterator to the start of the inserted elements - * iterator. - **********************************************************************/ - -inline void ELIST2_ITERATOR::add_list_before(ELIST2 *list_to_add) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::add_list_before", ABORT, nullptr); - if (!list_to_add) - BAD_PARAMETER.error ("ELIST2_ITERATOR::add_list_before", ABORT, - "list_to_add is nullptr"); - #endif - - if (!list_to_add->empty ()) { - if (list->empty ()) { - list->last = list_to_add->last; - prev = list->last; - current = list->First (); - next = current->next; - ex_current_was_last = false; - } - else { - prev->next = list_to_add->First (); - prev->next->prev = prev; - - if (current) { //not extracted - list_to_add->last->next = current; - current->prev = list_to_add->last; - } - else { //current extracted - list_to_add->last->next = next; - next->prev = list_to_add->last; - if (ex_current_was_last) - list->last = list_to_add->last; - if (ex_current_was_cycle_pt) - cycle_pt = prev->next; - } - current = prev->next; - next = current->next; - } - list_to_add->last = nullptr; - } -} - -/*********************************************************************** - * ELIST2_ITERATOR::extract - * - * Do extraction by removing current from the list, returning it to the - * caller, but NOT updating the iterator. (So that any calling loop can do - * this.) The iterator's current points to nullptr. If the extracted element - * is to be deleted, this is the callers responsibility. - **********************************************************************/ - -inline ELIST2_LINK *ELIST2_ITERATOR::extract() { - ELIST2_LINK *extracted_link; - - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::extract", ABORT, nullptr); - if (!current) //list empty or - //element extracted - NULL_CURRENT.error ("ELIST2_ITERATOR::extract", - ABORT, nullptr); - #endif - - if (list->singleton()) { - // Special case where we do need to change the iterator. - prev = next = list->last = nullptr; - } else { - prev->next = next; //remove from list - next->prev = prev; - - if (current == list->last) { - list->last = prev; - ex_current_was_last = true; - } else { - ex_current_was_last = false; - } - } - // Always set ex_current_was_cycle_pt so an add/forward will work in a loop. - ex_current_was_cycle_pt = (current == cycle_pt); - extracted_link = current; - extracted_link->next = nullptr; //for safety - extracted_link->prev = nullptr; //for safety - current = nullptr; - return extracted_link; -} - -/*********************************************************************** - * ELIST2_ITERATOR::move_to_first() - * - * Move current so that it is set to the start of the list. - * Return data just in case anyone wants it. - **********************************************************************/ - -inline ELIST2_LINK *ELIST2_ITERATOR::move_to_first() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::move_to_first", ABORT, nullptr); - #endif - - current = list->First (); - prev = list->last; - next = current ? current->next : nullptr; - return current; -} - -/*********************************************************************** - * ELIST2_ITERATOR::move_to_last() - * - * Move current so that it is set to the end of the list. - * Return data just in case anyone wants it. - **********************************************************************/ - -inline ELIST2_LINK *ELIST2_ITERATOR::move_to_last() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::move_to_last", ABORT, nullptr); - #endif - - current = list->last; - prev = current ? current->prev : nullptr; - next = current ? current->next : nullptr; - return current; -} - -/*********************************************************************** - * ELIST2_ITERATOR::mark_cycle_pt() - * - * Remember the current location so that we can tell whether we've returned - * to this point later. - * - * If the current point is deleted either now, or in the future, the cycle - * point will be set to the next item which is set to current. This could be - * by a forward, add_after_then_move or add_after_then_move. - **********************************************************************/ - -inline void ELIST2_ITERATOR::mark_cycle_pt() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::mark_cycle_pt", ABORT, nullptr); - #endif - - if (current) - cycle_pt = current; - else - ex_current_was_cycle_pt = TRUE; - started_cycling = FALSE; -} - -/*********************************************************************** - * ELIST2_ITERATOR::at_first() - * - * Are we at the start of the list? - * - **********************************************************************/ - -inline bool ELIST2_ITERATOR::at_first() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::at_first", ABORT, nullptr); - #endif - - //we're at a deleted - return ((list->empty ()) || (current == list->First ()) || ((current == nullptr) && - (prev == list->last) && //NON-last pt between - !ex_current_was_last)); //first and last -} - -/*********************************************************************** - * ELIST2_ITERATOR::at_last() - * - * Are we at the end of the list? - * - **********************************************************************/ - -inline bool ELIST2_ITERATOR::at_last() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::at_last", ABORT, nullptr); - #endif - - //we're at a deleted - return ((list->empty ()) || (current == list->last) || ((current == nullptr) && - (prev == list->last) && //last point between - ex_current_was_last)); //first and last -} - -/*********************************************************************** - * ELIST2_ITERATOR::cycled_list() - * - * Have we returned to the cycle_pt since it was set? - * - **********************************************************************/ - -inline bool ELIST2_ITERATOR::cycled_list() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::cycled_list", ABORT, nullptr); - #endif - - return ((list->empty ()) || ((current == cycle_pt) && started_cycling)); - -} - -/*********************************************************************** - * ELIST2_ITERATOR::length() - * - * Return the length of the list - * - **********************************************************************/ - -inline int32_t ELIST2_ITERATOR::length() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::length", ABORT, nullptr); - #endif - - return list->length (); -} - -/*********************************************************************** - * ELIST2_ITERATOR::sort() - * - * Sort the elements of the list, then reposition at the start. - * - **********************************************************************/ - -inline void -ELIST2_ITERATOR::sort ( //sort elements -int comparator ( //comparison routine -const void *, const void *)) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::sort", ABORT, nullptr); - #endif - - list->sort (comparator); - move_to_first(); -} - -/*********************************************************************** - * ELIST2_ITERATOR::add_to_end - * - * Add a new element to the end of the list without moving the iterator. - * This is provided because a single linked list cannot move to the last as - * the iterator couldn't set its prev pointer. Adding to the end is - * essential for implementing - queues. -**********************************************************************/ - -inline void ELIST2_ITERATOR::add_to_end( // element to add - ELIST2_LINK *new_element) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::add_to_end", ABORT, nullptr); - if (!new_element) - BAD_PARAMETER.error ("ELIST2_ITERATOR::add_to_end", ABORT, - "new_element is nullptr"); - if (new_element->next) - STILL_LINKED.error ("ELIST2_ITERATOR::add_to_end", ABORT, nullptr); - #endif - - if (this->at_last ()) { - this->add_after_stay_put (new_element); - } - else { - if (this->at_first ()) { - this->add_before_stay_put (new_element); - list->last = new_element; - } - else { //Iteratr is elsewhere - new_element->next = list->last->next; - new_element->prev = list->last; - list->last->next->prev = new_element; - list->last->next = new_element; - list->last = new_element; - } - } -} - - -/*********************************************************************** - QUOTE_IT MACRO DEFINITION - =========================== -Replace with "". may be an arbitrary number of tokens -***********************************************************************/ - -#define QUOTE_IT(parm) #parm - -/*********************************************************************** - ELIST2IZE(CLASSNAME) MACRO DEFINITION - ====================================== - -CLASSNAME is assumed to be the name of a class which has a baseclass of -ELIST2_LINK. - -NOTE: Because we don't use virtual functions in the list code, the list code -will NOT work correctly for classes derived from this. - -The macro generates: - - An element deletion function: CLASSNAME##_zapper - - An E_LIST2 subclass: CLASSNAME##_LIST - - An E_LIST2_ITERATOR subclass: - CLASSNAME##_IT - -NOTE: Generated names are DELIBERATELY designed to clash with those for -ELISTIZE but NOT with those for CLISTIZE and CLIST2IZE - -Two macros are provided: ELIST2IZE and ELIST2IZEH -The ...IZEH macros just define the class names for use in .h files -The ...IZE macros define the code use in .c files -***********************************************************************/ - -/*********************************************************************** - ELIST2IZEH(CLASSNAME) MACRO - -ELIST2IZEH is a concatenation of 3 fragments ELIST2IZEH_A, ELIST2IZEH_B and -ELIST2IZEH_C. -***********************************************************************/ - -#define ELIST2IZEH_A(CLASSNAME) \ - \ - extern DLLSYM void CLASSNAME##_zapper( /*delete a link*/ \ - ELIST2_LINK *link); /*link to delete*/ - -#define ELIST2IZEH_B(CLASSNAME) \ - \ - /*********************************************************************** \ - * CLASS - \ - *CLASSNAME##_LIST \ - * \ - * List class for class \ - *CLASSNAME \ - * \ - **********************************************************************/ \ - \ - class DLLSYM CLASSNAME##_LIST : public ELIST2 { \ - public: \ - CLASSNAME##_LIST() : ELIST2() {} \ - /* constructor */ \ - \ - CLASSNAME##_LIST( /* don't construct */ \ - const CLASSNAME##_LIST &) /*by initial assign*/ \ - { \ - DONT_CONSTRUCT_LIST_BY_COPY.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, \ - nullptr); \ - } \ - \ - void clear() /* delete elements */ \ - { \ - ELIST2::internal_clear(&CLASSNAME##_zapper); \ - } \ - \ - ~CLASSNAME##_LIST() /* destructor */ \ - { \ - clear(); \ - } \ - \ - /* Become a deep copy of src_list*/ \ - void deep_copy(const CLASSNAME##_LIST *src_list, \ - CLASSNAME *(*copier)(const CLASSNAME *)); \ - \ - void operator=(/* prevent assign */ \ - const CLASSNAME##_LIST &) { \ - DONT_ASSIGN_LISTS.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, nullptr); \ - } - -#define ELIST2IZEH_C(CLASSNAME) \ - } \ - ; \ - \ - /*********************************************************************** \ - * CLASS - CLASSNAME##_IT \ - * \ - * Iterator class for class CLASSNAME##_LIST \ - * \ - * Note: We don't need to coerce pointers to member functions input \ - * parameters as these are automatically converted to the type of the base \ - * type. ("A ptr to a class may be converted to a pointer to a public base \ - * class of that class") \ - **********************************************************************/ \ - \ - class DLLSYM CLASSNAME##_IT : public ELIST2_ITERATOR { \ - public: \ - CLASSNAME##_IT(CLASSNAME##_LIST *list) : ELIST2_ITERATOR(list) {} \ - \ - CLASSNAME *data() { return (CLASSNAME *)ELIST2_ITERATOR::data(); } \ - \ - CLASSNAME *data_relative(int8_t offset) { \ - return (CLASSNAME *)ELIST2_ITERATOR::data_relative(offset); \ - } \ - \ - CLASSNAME *forward() { return (CLASSNAME *)ELIST2_ITERATOR::forward(); } \ - \ - CLASSNAME *backward() { return (CLASSNAME *)ELIST2_ITERATOR::backward(); } \ - \ - CLASSNAME *extract() { return (CLASSNAME *)ELIST2_ITERATOR::extract(); } \ - \ - CLASSNAME *move_to_first() { \ - return (CLASSNAME *)ELIST2_ITERATOR::move_to_first(); \ - } \ - \ - CLASSNAME *move_to_last() { \ - return (CLASSNAME *)ELIST2_ITERATOR::move_to_last(); \ - } \ - private: \ - CLASSNAME##_IT(); \ - }; - -#define ELIST2IZEH(CLASSNAME) \ - \ - ELIST2IZEH_A(CLASSNAME) \ - \ - ELIST2IZEH_B(CLASSNAME) \ - \ - ELIST2IZEH_C(CLASSNAME) - -/*********************************************************************** - ELIST2IZE(CLASSNAME) MACRO -***********************************************************************/ - -#define ELIST2IZE(CLASSNAME) \ - \ - /*********************************************************************** \ - * CLASSNAME##_zapper \ - * \ - * A function which can delete a CLASSNAME element. This is passed to the \ - * generic clear list member function so that when a list is cleared the \ - * elements on the list are properly destroyed from the base class, even \ - * though we don't use a virtual destructor function. \ - **********************************************************************/ \ - \ - DLLSYM void CLASSNAME##_zapper( /*delete a link*/ \ - ELIST2_LINK *link) /*link to delete*/ \ - { \ - delete (CLASSNAME *)link; \ - } \ - \ - /* Become a deep copy of src_list*/ \ - void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST *src_list, \ - CLASSNAME *(*copier)(const CLASSNAME *)) { \ - CLASSNAME##_IT from_it(const_cast(src_list)); \ - CLASSNAME##_IT to_it(this); \ - \ - for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) \ - to_it.add_after_then_move((*copier)(from_it.data())); \ - } - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/errcode.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/errcode.cpp deleted file mode 100644 index ce4377de..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/errcode.cpp +++ /dev/null @@ -1,91 +0,0 @@ -/********************************************************************** - * File: errcode.cpp (Formerly error.c) - * Description: Generic error handler function - * Author: Ray Smith - * Created: Tue May 1 16:28:39 BST 1990 - * - * (C) Copyright 1989, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include -#include -#include -#include -#include "tprintf.h" -#include "errcode.h" - -const ERRCODE BADERRACTION = "Illegal error action"; -#define MAX_MSG 1024 - -/********************************************************************** - * error - * - * Print an error message and continue, exit or abort according to action. - * Makes use of error messages and numbers in a common place. - * - **********************************************************************/ -void ERRCODE::error( // handle error -const char *caller, // name of caller -TessErrorLogCode action, // action to take -const char *format, ... // special message -) const { - va_list args; // variable args - char msg[MAX_MSG]; - char *msgptr = msg; - - if (caller != nullptr) - //name of caller - msgptr += sprintf (msgptr, "%s:", caller); - //actual message - msgptr += sprintf (msgptr, "Error:%s", message); - if (format != nullptr) { - msgptr += sprintf (msgptr, ":"); - va_start(args, format); //variable list - #ifdef _WIN32 - //print remainder - msgptr += _vsnprintf (msgptr, MAX_MSG - 2 - (msgptr - msg), format, args); - msg[MAX_MSG - 2] = '\0'; //ensure termination - strcat (msg, "\n"); - #else - //print remainder - msgptr += vsprintf (msgptr, format, args); - //no specific - msgptr += sprintf (msgptr, "\n"); - #endif - va_end(args); - } - else - //no specific - msgptr += sprintf (msgptr, "\n"); - - // %s is needed here so msg is printed correctly! - fprintf(stderr, "%s", msg); - - switch (action) { - case DBG: - case TESSLOG: - return; //report only - case TESSEXIT: - //err_exit(); - case ABORT: -#if !defined(NDEBUG) - // Create a deliberate segv as the stack trace is more useful that way. - // This is done only in debug builds, because the error message - // "segmentation fault" confuses most normal users. - *reinterpret_cast(0) = 0; -#endif - abort(); - default: - BADERRACTION.error ("error", ABORT, nullptr); - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/errcode.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/errcode.h deleted file mode 100644 index 2f31a7b9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/errcode.h +++ /dev/null @@ -1,103 +0,0 @@ -/********************************************************************** - * File: errcode.h (Formerly error.h) - * Description: Header file for generic error handler class - * Author: Ray Smith - * Created: Tue May 1 16:23:36 BST 1990 - * - * (C) Copyright 1990, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef ERRCODE_H -#define ERRCODE_H - -#include "host.h" - -/*Control parameters for error()*/ -enum TessErrorLogCode { - DBG = -1, /*log without alert */ - TESSLOG = 0, /*alert user */ - TESSEXIT = 1, /*exit after erro */ - ABORT = 2 /*abort after error */ -}; - -/* Explicit Error Abort codes */ -#define NO_ABORT_CODE 0 -#define LIST_ABORT 1 -#define MEMORY_ABORT 2 -#define FILE_ABORT 3 - -/* Location of code at error codes Reserve 0..2 (status codes 0..23 for UNLV)*/ -#define LOC_UNUSED0 0 -#define LOC_UNUSED1 1 -#define LOC_UNUSED2 2 -#define LOC_INIT 3 -#define LOC_EDGE_PROG 4 -#define LOC_TEXT_ORD_ROWS 5 -#define LOC_TEXT_ORD_WORDS 6 -#define LOC_PASS1 7 -#define LOC_PASS2 8 -/* Reserve up to 8..13 for adding subloc 0/3 plus subsubloc 0/1/2 */ -#define LOC_FUZZY_SPACE 14 -/* Reserve up to 14..20 for adding subloc 0/3 plus subsubloc 0/1/2 */ -#define LOC_MM_ADAPT 21 -#define LOC_DOC_BLK_REJ 22 -#define LOC_WRITE_RESULTS 23 -#define LOC_ADAPTIVE 24 -/* DON'T DEFINE ANY LOCATION > 31 !!! */ - -/* Sub locatation determines whether pass2 was in normal mode or fix xht mode*/ -#define SUBLOC_NORM 0 -#define SUBLOC_FIX_XHT 3 - -/* Sub Sub locatation determines whether match_word_pass2 was in Tess - matcher, NN matcher or somewhere else */ - -#define SUBSUBLOC_OTHER 0 -#define SUBSUBLOC_TESS 1 -#define SUBSUBLOC_NN 2 - -class TESS_API ERRCODE { // error handler class - const char *message; // error message - public: - void error( // error print function - const char *caller, // function location - TessErrorLogCode action, // action to take - const char *format, ... // fprintf format - ) const; - ERRCODE(const char *string) { - message = string; - } // initialize with string -}; - -const ERRCODE ASSERT_FAILED = "Assert failed"; - -#define ASSERT_HOST(x) if (!(x)) \ - { \ - ASSERT_FAILED.error(#x, ABORT, "in file %s, line %d", \ - __FILE__, __LINE__); \ - } - -#define ASSERT_HOST_MSG(x, ...) \ - if (!(x)) { \ - tprintf(__VA_ARGS__); \ - ASSERT_FAILED.error(#x, ABORT, "in file %s, line %d", __FILE__, __LINE__); \ - } - -void signal_exit(int signal_code); - -void set_global_loc_code(int loc_code); - -void set_global_subloc_code(int loc_code); - -void set_global_subsubloc_code(int loc_code); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/fileerr.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/fileerr.h deleted file mode 100644 index d3b6993d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/fileerr.h +++ /dev/null @@ -1,34 +0,0 @@ -/********************************************************************** - * File: fileerr.h (Formerly filerr.h) - * Description: Errors for file utilities. - * Author: Ray Smith - * Created: Tue Aug 14 15:45:16 BST 1990 - * - * (C) Copyright 1990, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef FILEERR_H -#define FILEERR_H - -#include "errcode.h" - -const ERRCODE CANTOPENFILE = "Can't open file"; -const ERRCODE CANTCREATEFILE = "Can't create file"; -const ERRCODE CANTMAKEPIPE = "Can't create pipe"; -const ERRCODE CANTCONNECTPIPE = "Can't reconnect pipes to stdin/stdout"; -const ERRCODE READFAILED = "Read of file failed"; -const ERRCODE WRITEFAILED = "Write of file failed"; -const ERRCODE SELECTFAILED = "Select failed"; - -const ERRCODE EXECFAILED = "Could not exec new process"; -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/fileio.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/fileio.cpp deleted file mode 100644 index 7a4c27ab..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/fileio.cpp +++ /dev/null @@ -1,214 +0,0 @@ -/********************************************************************** - * File: fileio.cpp - * Description: File I/O utilities. - * Author: Samuel Charron - * Created: Tuesday, July 9, 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy - * of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required - * by applicable law or agreed to in writing, software distributed under the - * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS - * OF ANY KIND, either express or implied. See the License for the specific - * language governing permissions and limitations under the License. - * - **********************************************************************/ -#ifdef _WIN32 -#ifndef unlink -#include -#endif -#else -#include -#include -#endif - -#include -#include -#include - -#include "errcode.h" -#include "fileio.h" -#include "tprintf.h" - -namespace tesseract { - -/////////////////////////////////////////////////////////////////////////////// -// File:: -/////////////////////////////////////////////////////////////////////////////// -FILE* File::Open(const std::string& filename, const std::string& mode) { - return fopen(filename.c_str(), mode.c_str()); -} - -FILE* File::OpenOrDie(const std::string& filename, - const std::string& mode) { - FILE* stream = fopen(filename.c_str(), mode.c_str()); - if (stream == nullptr) { - tprintf("Unable to open '%s' in mode '%s'\n", filename.c_str(), - mode.c_str()); - } - return stream; -} - -void File::WriteStringToFileOrDie(const std::string& str, - const std::string& filename) { - FILE* stream = fopen(filename.c_str(), "wb"); - if (stream == nullptr) { - tprintf("Unable to open '%s' for writing\n", filename.c_str()); - return; - } - fputs(str.c_str(), stream); - ASSERT_HOST(fclose(stream) == 0); -} - -bool File::Readable(const std::string& filename) { - FILE* stream = fopen(filename.c_str(), "rb"); - if (stream == nullptr) { - return false; - } - fclose(stream); - return true; -} - -bool File::ReadFileToString(const std::string& filename, std::string* out) { - FILE* stream = File::Open(filename.c_str(), "rb"); - if (stream == nullptr) return false; - InputBuffer in(stream); - *out = ""; - in.Read(out); - return in.CloseFile(); -} - -std::string File::JoinPath(const std::string& prefix, const std::string& suffix) { - return (prefix.empty() || prefix[prefix.size() - 1] == '/') - ? prefix + suffix - : prefix + "/" + suffix; -} - -bool File::Delete(const char* pathname) { -#if (defined WINAPI_FAMILY) && (WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP) /* windows but not desktop environment */ - const int status = _unlink(pathname); -#else - const int status = unlink(pathname); -#endif - if (status != 0) { - tprintf("ERROR: Unable to delete file %s\n", pathname); - return false; - } - return true; -} - -#ifdef _WIN32 -bool File::DeleteMatchingFiles(const char* pattern) { - WIN32_FIND_DATA data; - BOOL result = TRUE; -#if (defined WINAPI_FAMILY) && (WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP) /* windows but not desktop environment */ - const int strBufferSize = 4096; - wchar_t w_string[strBufferSize]; - MultiByteToWideChar(CP_ACP, 0, pattern, -1, w_string, strBufferSize); - HANDLE handle = FindFirstFile(w_string, &data); -#else - HANDLE handle = FindFirstFile(pattern, &data); -#endif - bool all_deleted = true; - if (handle != INVALID_HANDLE_VALUE) { - for (; result; result = FindNextFile(handle, &data)) { -#if (defined WINAPI_FAMILY) && (WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP) /* windows but not desktop environment */ - char datacFileName[strBufferSize]; - sprintf(datacFileName, "%ws", data.cFileName); - all_deleted &= File::Delete(datacFileName); -#else - all_deleted &= File::Delete(data.cFileName); -#endif - } - FindClose(handle); - } - return all_deleted; -} -#else -bool File::DeleteMatchingFiles(const char* pattern) { - glob_t pglob; - char **paths; - bool all_deleted = true; - if (glob(pattern, 0, nullptr, &pglob) == 0) { - for (paths = pglob.gl_pathv; *paths != nullptr; paths++) { - all_deleted &= File::Delete(*paths); - } - globfree(&pglob); - } - return all_deleted; -} -#endif - -/////////////////////////////////////////////////////////////////////////////// -// InputBuffer:: -/////////////////////////////////////////////////////////////////////////////// -InputBuffer::InputBuffer(FILE* stream) - : stream_(stream) { - fseek(stream_, 0, SEEK_END); - filesize_ = ftell(stream_); - fseek(stream_, 0, SEEK_SET); -} - -InputBuffer::InputBuffer(FILE* stream, size_t) - : stream_(stream) { - fseek(stream_, 0, SEEK_END); - filesize_ = ftell(stream_); - fseek(stream_, 0, SEEK_SET); -} - -InputBuffer::~InputBuffer() { - if (stream_ != nullptr) { - fclose(stream_); - } -} - -bool InputBuffer::Read(std::string* out) { - char buf[BUFSIZ + 1]; - int l; - while ((l = fread(buf, 1, BUFSIZ, stream_)) > 0) { - if (ferror(stream_)) { - clearerr(stream_); - return false; - } - buf[l] = 0; - out->append(buf); - } - return true; -} - -bool InputBuffer::CloseFile() { - int ret = fclose(stream_); - stream_ = nullptr; - return ret == 0; -} - -/////////////////////////////////////////////////////////////////////////////// -// OutputBuffer:: -/////////////////////////////////////////////////////////////////////////////// - -OutputBuffer::OutputBuffer(FILE* stream) - : stream_(stream) { -} - -OutputBuffer::OutputBuffer(FILE* stream, size_t) - : stream_(stream) { -} - -OutputBuffer::~OutputBuffer() { - if (stream_ != nullptr) { - fclose(stream_); - } -} - -void OutputBuffer::WriteString(const std::string& str) { - fputs(str.c_str(), stream_); -} - -bool OutputBuffer::CloseFile() { - int ret = fclose(stream_); - stream_ = nullptr; - return ret == 0; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/fileio.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/fileio.h deleted file mode 100644 index e72439a9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/fileio.h +++ /dev/null @@ -1,98 +0,0 @@ -/********************************************************************** - * File: fileio.h - * Description: File I/O utilities. - * Author: Samuel Charron - * Created: Tuesday, July 9, 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy - * of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required - * by applicable law or agreed to in writing, software distributed under the - * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS - * OF ANY KIND, either express or implied. See the License for the specific - * language governing permissions and limitations under the License. - * - **********************************************************************/ -#ifndef TESSERACT_TRAINING_FILEIO_H_ -#define TESSERACT_TRAINING_FILEIO_H_ - -#include -#include -#include - -#include "platform.h" - -namespace tesseract { - -// A class to manipulate FILE*s. -class File { - public: - // Try to open the file 'filename' in mode 'mode'. - // Stop the program if it cannot open it. - static FILE* OpenOrDie(const std::string& filename, const std::string& mode); - static FILE* Open(const std::string& filename, const std::string& mode); - - // Try to open the file 'filename' and to write 'str' in it. - // Stop the program if it fails. - static void WriteStringToFileOrDie(const std::string& str, const std::string& filename); - - // Return true if the file 'filename' is readable. - static bool Readable(const std::string& filename); - - static bool ReadFileToString(const std::string& filename, std::string* out); - - // Helper methods - - // Concatenate file paths removing any extra intervening '/' symbols. - static std::string JoinPath(const std::string& prefix, const std::string& suffix); - // Delete a filename or all filenames matching a glob pattern. - static bool Delete(const char* pathname); - static bool DeleteMatchingFiles(const char* pattern); -}; - -// A class to manipulate Files for reading. -class InputBuffer { - public: - explicit InputBuffer(FILE* stream); - // 'size' is ignored. - InputBuffer(FILE* stream, size_t size); - - ~InputBuffer(); - - // Read data until end-of-file. - // The data is stored in '*out'. - // Return false if an error occurs, true otherwise. - bool Read(std::string* out); - - // Close the FILE* used by InputBuffer. - // Return false if an error occurs, true otherwise. - bool CloseFile(); - - private: - FILE* stream_; - int filesize_; -}; - -// A class to manipulate Files for writing. -class OutputBuffer { - public: - explicit OutputBuffer(FILE* stream); - // 'size' is ignored. - OutputBuffer(FILE* stream, size_t size); - - ~OutputBuffer(); - - // Write string 'str' to the open FILE*. - void WriteString(const std::string& str); - - // Close the FILE* used by InputBuffer. - // Return false if an error occurs, true otherwise. - bool CloseFile(); - - private: - FILE* stream_; -}; - -} // namespace tesseract -#endif // TESSERACT_TRAINING_FILEIO_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/genericheap.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/genericheap.h deleted file mode 100644 index c78500d5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/genericheap.h +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright 2012 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: genericheap.h -// Description: Template heap class. -// Author: Ray Smith, based on Dan Johnson's original code. -// Created: Wed Mar 14 08:13:00 PDT 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_GENERICHEAP_H_ -#define TESSERACT_CCUTIL_GENERICHEAP_H_ - -#include "errcode.h" -#include "genericvector.h" - -namespace tesseract { - -// GenericHeap requires 1 template argument: -// Pair will normally be either KDPairInc or KDPairDec -// for some arbitrary Key and scalar, smart pointer, or non-ownership pointer -// Data type, according to whether a MIN heap or a MAX heap is desired, -// respectively. Using KDPtrPairInc or KDPtrPairDec, -// GenericHeap can also handle simple Data pointers and own them. -// If no additional data is required, Pair can also be a scalar, since -// GenericHeap doesn't look inside it except for operator<. -// -// The heap is stored as a packed binary tree in an array hosted by a -// GenericVector, with the invariant that the children of each node are -// both NOT Pair::operator< the parent node. KDPairInc defines Pair::operator< -// to use Key::operator< to generate a MIN heap and KDPairDec defines -// Pair::operator< to use Key::operator> to generate a MAX heap by reversing -// all the comparisons. -// See http://en.wikipedia.org/wiki/Heap_(data_structure) for more detail on -// the basic heap implementation. -// -// Insertion and removal are both O(log n) and, unlike the STL heap, an -// explicit Reshuffle function allows a node to be repositioned in time O(log n) -// after changing its value. -// -// Accessing the element for revaluation is a more complex matter, since the -// index and pointer can be changed arbitrarily by heap operations. -// Revaluation can be done by making the Data type in the Pair derived from or -// contain a DoublePtr as its first data element, making it possible to convert -// the pointer to a Pair using KDPairInc::RecastDataPointer. -template -class GenericHeap { - public: - GenericHeap() = default; - // The initial size is only a GenericVector::reserve. It is not enforced as - // the size limit of the heap. Caller must implement their own enforcement. - explicit GenericHeap(int initial_size) { - heap_.reserve(initial_size); - } - - // Simple accessors. - bool empty() const { - return heap_.empty(); - } - int size() const { - return heap_.size(); - } - int size_reserved() const { - return heap_.size_reserved(); - } - void clear() { - // Clear truncates to 0 to keep the number reserved in tact. - heap_.truncate(0); - } - // Provides access to the underlying vector. - // Caution! any changes that modify the keys will invalidate the heap! - GenericVector* heap() { - return &heap_; - } - // Provides read-only access to an element of the underlying vector. - const Pair& get(int index) const { - return heap_[index]; - } - - // Add entry to the heap, keeping the smallest item at the top, by operator<. - // Note that *entry is used as the source of operator=, but it is non-const - // to allow for a smart pointer to be contained within. - // Time = O(log n). - void Push(Pair* entry) { - int hole_index = heap_.size(); - // Make a hole in the end of heap_ and sift it up to be the correct - // location for the new *entry. To avoid needing a default constructor - // for primitive types, and to allow for use of DoublePtr in the Pair - // somewhere, we have to incur a double copy here. - heap_.push_back(*entry); - *entry = heap_.back(); - hole_index = SiftUp(hole_index, *entry); - heap_[hole_index] = *entry; - } - - // Get the value of the top (smallest, defined by operator< ) element. - const Pair& PeekTop() const { - return heap_[0]; - } - // Get the value of the worst (largest, defined by operator< ) element. - const Pair& PeekWorst() const { return heap_[IndexOfWorst()]; } - - // Removes the top element of the heap. If entry is not nullptr, the element - // is copied into *entry, otherwise it is discarded. - // Returns false if the heap was already empty. - // Time = O(log n). - bool Pop(Pair* entry) { - int new_size = heap_.size() - 1; - if (new_size < 0) - return false; // Already empty. - if (entry != nullptr) - *entry = heap_[0]; - if (new_size > 0) { - // Sift the hole at the start of the heap_ downwards to match the last - // element. - Pair hole_pair = heap_[new_size]; - heap_.truncate(new_size); - int hole_index = SiftDown(0, hole_pair); - heap_[hole_index] = hole_pair; - } else { - heap_.truncate(new_size); - } - return true; - } - - // Removes the MAXIMUM element of the heap. (MIN from a MAX heap.) If entry is - // not nullptr, the element is copied into *entry, otherwise it is discarded. - // Time = O(n). Returns false if the heap was already empty. - bool PopWorst(Pair* entry) { - int worst_index = IndexOfWorst(); - if (worst_index < 0) return false; // It cannot be empty! - // Extract the worst element from the heap, leaving a hole at worst_index. - if (entry != nullptr) - *entry = heap_[worst_index]; - int heap_size = heap_.size() - 1; - if (heap_size > 0) { - // Sift the hole upwards to match the last element of the heap_ - Pair hole_pair = heap_[heap_size]; - int hole_index = SiftUp(worst_index, hole_pair); - heap_[hole_index] = hole_pair; - } - heap_.truncate(heap_size); - return true; - } - - // Returns the index of the worst element. Time = O(n/2). - int IndexOfWorst() const { - int heap_size = heap_.size(); - if (heap_size == 0) return -1; // It cannot be empty! - - // Find the maximum element. Its index is guaranteed to be greater than - // the index of the parent of the last element, since by the heap invariant - // the parent must be less than or equal to the children. - int worst_index = heap_size - 1; - int end_parent = ParentNode(worst_index); - for (int i = worst_index - 1; i > end_parent; --i) { - if (heap_[worst_index] < heap_[i]) worst_index = i; - } - return worst_index; - } - - // The pointed-to Pair has changed its key value, so the location of pair - // is reshuffled to maintain the heap invariant. - // Must be a valid pointer to an element of the heap_! - // Caution! Since GenericHeap is based on GenericVector, reallocs may occur - // whenever the vector is extended and elements may get shuffled by any - // Push or Pop operation. Therefore use this function only if Data in Pair is - // of type DoublePtr, derived (first) from DoublePtr, or has a DoublePtr as - // its first element. Reshuffles the heap to maintain the invariant. - // Time = O(log n). - void Reshuffle(Pair* pair) { - int index = pair - &heap_[0]; - Pair hole_pair = heap_[index]; - index = SiftDown(index, hole_pair); - index = SiftUp(index, hole_pair); - heap_[index] = hole_pair; - } - - private: - // A hole in the heap exists at hole_index, and we want to fill it with the - // given pair. SiftUp sifts the hole upward to the correct position and - // returns the destination index without actually putting pair there. - int SiftUp(int hole_index, const Pair& pair) { - int parent; - while (hole_index > 0 && pair < heap_[parent = ParentNode(hole_index)]) { - heap_[hole_index] = heap_[parent]; - hole_index = parent; - } - return hole_index; - } - - // A hole in the heap exists at hole_index, and we want to fill it with the - // given pair. SiftDown sifts the hole downward to the correct position and - // returns the destination index without actually putting pair there. - int SiftDown(int hole_index, const Pair& pair) { - int heap_size = heap_.size(); - int child; - while ((child = LeftChild(hole_index)) < heap_size) { - if (child + 1 < heap_size && heap_[child + 1] < heap_[child]) - ++child; - if (heap_[child] < pair) { - heap_[hole_index] = heap_[child]; - hole_index = child; - } else { - break; - } - } - return hole_index; - } - - // Functions to navigate the tree. Unlike the original implementation, we - // store the root at index 0. - int ParentNode(int index) const { - return (index + 1) / 2 - 1; - } - int LeftChild(int index) const { - return index * 2 + 1; - } - - private: - GenericVector heap_; -}; - -} // namespace tesseract - -#endif // TESSERACT_CCUTIL_GENERICHEAP_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/genericvector.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/genericvector.h deleted file mode 100644 index e0d31b47..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/genericvector.h +++ /dev/null @@ -1,1130 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: genericvector.h -// Description: Generic vector class -// Author: Daria Antonova -// Created: Mon Jun 23 11:26:43 PDT 2008 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// -// -#ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_ -#define TESSERACT_CCUTIL_GENERICVECTOR_H_ - -#include -#include -#include -#include - -#include "tesscallback.h" -#include "helpers.h" -#include "serialis.h" -#include "strngs.h" - -// Use PointerVector below in preference to GenericVector, as that -// provides automatic deletion of pointers, [De]Serialize that works, and -// sort that works. -template -class GenericVector { - public: - GenericVector() { - init(kDefaultVectorSize); - } - GenericVector(int size, const T& init_val) { - init(size); - init_to_size(size, init_val); - } - - // Copy - GenericVector(const GenericVector& other) { - this->init(other.size()); - this->operator+=(other); - } - GenericVector &operator+=(const GenericVector& other); - GenericVector &operator=(const GenericVector& other); - - ~GenericVector(); - - // Reserve some memory. - void reserve(int size); - // Double the size of the internal array. - void double_the_size(); - - // Resizes to size and sets all values to t. - void init_to_size(int size, const T& t); - // Resizes to size without any initialization. - void resize_no_init(int size) { - reserve(size); - size_used_ = size; - } - - // Return the size used. - int size() const { - return size_used_; - } - // Workaround to avoid g++ -Wsign-compare warnings. - size_t unsigned_size() const { - static_assert(sizeof(size_used_) <= sizeof(size_t), - "Wow! sizeof(size_t) < sizeof(int32_t)!!"); - assert(0 <= size_used_); - return static_cast(size_used_); - } - int size_reserved() const { - return size_reserved_; - } - - int length() const { - return size_used_; - } - - // Return true if empty. - bool empty() const { - return size_used_ == 0; - } - - // Return the object from an index. - T &get(int index) const; - T &back() const; - T &operator[](int index) const; - // Returns the last object and removes it. - T pop_back(); - - // Return the index of the T object. - // This method NEEDS a compare_callback to be passed to - // set_compare_callback. - int get_index(const T& object) const; - - // Return true if T is in the array - bool contains(const T& object) const; - - // Return true if the index is valid - T contains_index(int index) const; - - // Push an element in the end of the array - int push_back(T object); - void operator+=(const T& t); - - // Push an element in the end of the array if the same - // element is not already contained in the array. - int push_back_new(const T& object); - - // Push an element in the front of the array - // Note: This function is O(n) - int push_front(const T& object); - - // Set the value at the given index - void set(const T& t, int index); - - // Insert t at the given index, push other elements to the right. - void insert(const T& t, int index); - - // Removes an element at the given index and - // shifts the remaining elements to the left. - void remove(int index); - - // Truncates the array to the given size by removing the end. - // If the current size is less, the array is not expanded. - void truncate(int size) { - if (size < size_used_) - size_used_ = size; - } - - // Add a callback to be called to delete the elements when the array took - // their ownership. - void set_clear_callback(TessCallback1* cb); - - // Add a callback to be called to compare the elements when needed (contains, - // get_id, ...) - void set_compare_callback(TessResultCallback2* cb); - - // Clear the array, calling the clear callback function if any. - // All the owned callbacks are also deleted. - // If you don't want the callbacks to be deleted, before calling clear, set - // the callback to nullptr. - void clear(); - - // Delete objects pointed to by data_[i] - void delete_data_pointers(); - - // This method clears the current object, then, does a shallow copy of - // its argument, and finally invalidates its argument. - // Callbacks are moved to the current object; - void move(GenericVector* from); - - // Read/Write the array to a file. This does _NOT_ read/write the callbacks. - // The callback given must be permanent since they will be called more than - // once. The given callback will be deleted at the end. - // If the callbacks are nullptr, then the data is simply read/written using - // fread (and swapping)/fwrite. - // Returns false on error or if the callback returns false. - // DEPRECATED. Use [De]Serialize[Classes] instead. - bool write(FILE* f, TessResultCallback2* cb) const; - bool read(tesseract::TFile* f, - TessResultCallback2* cb); - // Writes a vector of simple types to the given file. Assumes that bitwise - // read/write of T will work. Returns false in case of error. - // TODO(rays) Change all callers to use TFile and remove deprecated methods. - bool Serialize(FILE* fp) const; - bool Serialize(tesseract::TFile* fp) const; - // Reads a vector of simple types from the given file. Assumes that bitwise - // read/write will work with ReverseN according to sizeof(T). - // Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - // TFile is assumed to know about swapping. - bool DeSerialize(bool swap, FILE* fp); - bool DeSerialize(tesseract::TFile* fp); - // Skips the deserialization of the vector. - static bool SkipDeSerialize(tesseract::TFile* fp); - // Writes a vector of classes to the given file. Assumes the existence of - // bool T::Serialize(FILE* fp) const that returns false in case of error. - // Returns false in case of error. - bool SerializeClasses(FILE* fp) const; - bool SerializeClasses(tesseract::TFile* fp) const; - // Reads a vector of classes from the given file. Assumes the existence of - // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of - // error. Also needs T::T() and T::T(constT&), as init_to_size is used in - // this function. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerializeClasses(bool swap, FILE* fp); - bool DeSerializeClasses(tesseract::TFile* fp); - // Calls SkipDeSerialize on the elements of the vector. - static bool SkipDeSerializeClasses(tesseract::TFile* fp); - - // Allocates a new array of double the current_size, copies over the - // information from data to the new location, deletes data and returns - // the pointed to the new larger array. - // This function uses memcpy to copy the data, instead of invoking - // operator=() for each element like double_the_size() does. - static T *double_the_size_memcpy(int current_size, T *data) { - T *data_new = new T[current_size * 2]; - memcpy(data_new, data, sizeof(T) * current_size); - delete[] data; - return data_new; - } - - // Reverses the elements of the vector. - void reverse() { - for (int i = 0; i < size_used_ / 2; ++i) - Swap(&data_[i], &data_[size_used_ - 1 - i]); - } - - // Sorts the members of this vector using the less than comparator (cmp_lt), - // which compares the values. Useful for GenericVectors to primitive types. - // Will not work so great for pointers (unless you just want to sort some - // pointers). You need to provide a specialization to sort_cmp to use - // your type. - void sort(); - - // Sort the array into the order defined by the qsort function comparator. - // The comparator function is as defined by qsort, ie. it receives pointers - // to two Ts and returns negative if the first element is to appear earlier - // in the result and positive if it is to appear later, with 0 for equal. - void sort(int (*comparator)(const void*, const void*)) { - qsort(data_, size_used_, sizeof(*data_), comparator); - } - - // Searches the array (assuming sorted in ascending order, using sort()) for - // an element equal to target and returns true if it is present. - // Use binary_search to get the index of target, or its nearest candidate. - bool bool_binary_search(const T& target) const { - int index = binary_search(target); - if (index >= size_used_) - return false; - return data_[index] == target; - } - // Searches the array (assuming sorted in ascending order, using sort()) for - // an element equal to target and returns the index of the best candidate. - // The return value is conceptually the largest index i such that - // data_[i] <= target or 0 if target < the whole vector. - // NOTE that this function uses operator> so really the return value is - // the largest index i such that data_[i] > target is false. - int binary_search(const T& target) const { - int bottom = 0; - int top = size_used_; - while (top - bottom > 1) { - int middle = (bottom + top) / 2; - if (data_[middle] > target) - top = middle; - else - bottom = middle; - } - return bottom; - } - - // Compact the vector by deleting elements using operator!= on basic types. - // The vector must be sorted. - void compact_sorted() { - if (size_used_ == 0) - return; - - // First element is in no matter what, hence the i = 1. - int last_write = 0; - for (int i = 1; i < size_used_; ++i) { - // Finds next unique item and writes it. - if (data_[last_write] != data_[i]) - data_[++last_write] = data_[i]; - } - // last_write is the index of a valid data cell, so add 1. - size_used_ = last_write + 1; - } - - // Compact the vector by deleting elements for which delete_cb returns - // true. delete_cb is a permanent callback and will be deleted. - void compact(TessResultCallback1* delete_cb) { - int new_size = 0; - int old_index = 0; - // Until the callback returns true, the elements stay the same. - while (old_index < size_used_ && !delete_cb->Run(old_index++)) - ++new_size; - // Now just copy anything else that gets false from delete_cb. - for (; old_index < size_used_; ++old_index) { - if (!delete_cb->Run(old_index)) { - data_[new_size++] = data_[old_index]; - } - } - size_used_ = new_size; - delete delete_cb; - } - - T dot_product(const GenericVector& other) const { - T result = static_cast(0); - for (int i = std::min(size_used_, other.size_used_) - 1; i >= 0; --i) - result += data_[i] * other.data_[i]; - return result; - } - - // Returns the index of what would be the target_index_th item in the array - // if the members were sorted, without actually sorting. Members are - // shuffled around, but it takes O(n) time. - // NOTE: uses operator< and operator== on the members. - int choose_nth_item(int target_index) { - // Make sure target_index is legal. - if (target_index < 0) - target_index = 0; // ensure legal - else if (target_index >= size_used_) - target_index = size_used_ - 1; - unsigned int seed = 1; - return choose_nth_item(target_index, 0, size_used_, &seed); - } - - // Swaps the elements with the given indices. - void swap(int index1, int index2) { - if (index1 != index2) { - T tmp = data_[index1]; - data_[index1] = data_[index2]; - data_[index2] = tmp; - } - } - // Returns true if all elements of *this are within the given range. - // Only uses operator< - bool WithinBounds(const T& rangemin, const T& rangemax) const { - for (int i = 0; i < size_used_; ++i) { - if (data_[i] < rangemin || rangemax < data_[i]) - return false; - } - return true; - } - - protected: - // Internal recursive version of choose_nth_item. - int choose_nth_item(int target_index, int start, int end, unsigned int* seed); - - // Init the object, allocating size memory. - void init(int size); - - // We are assuming that the object generally placed in the - // vector are small enough that for efficiency it makes sense - // to start with a larger initial size. - static const int kDefaultVectorSize = 4; - int32_t size_used_; - int32_t size_reserved_; - T* data_; - TessCallback1* clear_cb_; - // Mutable because Run method is not const - mutable TessResultCallback2* compare_cb_; -}; - -namespace tesseract { - -// Function to read a GenericVector from a whole file. -// Returns false on failure. -typedef bool (*FileReader)(const STRING& filename, GenericVector* data); -// Function to write a GenericVector to a whole file. -// Returns false on failure. -typedef bool (*FileWriter)(const GenericVector& data, - const STRING& filename); -// The default FileReader loads the whole file into the vector of char, -// returning false on error. -inline bool LoadDataFromFile(const char* filename, GenericVector* data) { - bool result = false; - FILE* fp = fopen(filename, "rb"); - if (fp != nullptr) { - fseek(fp, 0, SEEK_END); - long size = ftell(fp); - fseek(fp, 0, SEEK_SET); - // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here. - if (size > 0 && size < LONG_MAX) { - // reserve an extra byte in case caller wants to append a '\0' character - data->reserve(size + 1); - data->resize_no_init(size); - result = static_cast(fread(&(*data)[0], 1, size, fp)) == size; - } - fclose(fp); - } - return result; -} - -inline bool LoadDataFromFile(const STRING& filename, - GenericVector* data) { - return LoadDataFromFile(filename.string(), data); -} - -// The default FileWriter writes the vector of char to the filename file, -// returning false on error. -inline bool SaveDataToFile(const GenericVector& data, - const STRING& filename) { - FILE* fp = fopen(filename.string(), "wb"); - if (fp == nullptr) return false; - bool result = - static_cast(fwrite(&data[0], 1, data.size(), fp)) == data.size(); - fclose(fp); - return result; -} -// Reads a file as a vector of STRING. -inline bool LoadFileLinesToStrings(const STRING& filename, - GenericVector* lines) { - GenericVector data; - if (!LoadDataFromFile(filename.string(), &data)) { - return false; - } - STRING lines_str(&data[0], data.size()); - lines_str.split('\n', lines); - return true; -} - -template -bool cmp_eq(T const & t1, T const & t2) { - return t1 == t2; -} - -// Used by sort() -// return < 0 if t1 < t2 -// return 0 if t1 == t2 -// return > 0 if t1 > t2 -template -int sort_cmp(const void* t1, const void* t2) { - const T* a = static_cast (t1); - const T* b = static_cast (t2); - if (*a < *b) { - return -1; - } else if (*b < *a) { - return 1; - } else { - return 0; - } -} - -// Used by PointerVector::sort() -// return < 0 if t1 < t2 -// return 0 if t1 == t2 -// return > 0 if t1 > t2 -template -int sort_ptr_cmp(const void* t1, const void* t2) { - const T* a = *static_cast(t1); - const T* b = *static_cast(t2); - if (*a < *b) { - return -1; - } else if (*b < *a) { - return 1; - } else { - return 0; - } -} - -// Subclass for a vector of pointers. Use in preference to GenericVector -// as it provides automatic deletion and correct serialization, with the -// corollary that all copy operations are deep copies of the pointed-to objects. -template -class PointerVector : public GenericVector { - public: - PointerVector() : GenericVector() { } - explicit PointerVector(int size) : GenericVector(size) { } - ~PointerVector() { - // Clear must be called here, even though it is called again by the base, - // as the base will call the wrong clear. - clear(); - } - // Copy must be deep, as the pointers will be automatically deleted on - // destruction. - PointerVector(const PointerVector& other) : GenericVector(other) { - this->init(other.size()); - this->operator+=(other); - } - PointerVector& operator+=(const PointerVector& other) { - this->reserve(this->size_used_ + other.size_used_); - for (int i = 0; i < other.size(); ++i) { - this->push_back(new T(*other.data_[i])); - } - return *this; - } - - PointerVector& operator=(const PointerVector& other) { - if (&other != this) { - this->truncate(0); - this->operator+=(other); - } - return *this; - } - - // Removes an element at the given index and - // shifts the remaining elements to the left. - void remove(int index) { - delete GenericVector::data_[index]; - GenericVector::remove(index); - } - - // Truncates the array to the given size by removing the end. - // If the current size is less, the array is not expanded. - void truncate(int size) { - for (int i = size; i < GenericVector::size_used_; ++i) - delete GenericVector::data_[i]; - GenericVector::truncate(size); - } - - // Compact the vector by deleting elements for which delete_cb returns - // true. delete_cb is a permanent callback and will be deleted. - void compact(TessResultCallback1* delete_cb) { - int new_size = 0; - int old_index = 0; - // Until the callback returns true, the elements stay the same. - while (old_index < GenericVector::size_used_ && - !delete_cb->Run(GenericVector::data_[old_index++])) - ++new_size; - // Now just copy anything else that gets false from delete_cb. - for (; old_index < GenericVector::size_used_; ++old_index) { - if (!delete_cb->Run(GenericVector::data_[old_index])) { - GenericVector::data_[new_size++] = - GenericVector::data_[old_index]; - } else { - delete GenericVector::data_[old_index]; - } - } - GenericVector::size_used_ = new_size; - delete delete_cb; - } - - // Clear the array, calling the clear callback function if any. - // All the owned callbacks are also deleted. - // If you don't want the callbacks to be deleted, before calling clear, set - // the callback to nullptr. - void clear() { - GenericVector::delete_data_pointers(); - GenericVector::clear(); - } - - // Writes a vector of (pointers to) classes to the given file. Assumes the - // existence of bool T::Serialize(FILE*) const that returns false in case of - // error. There is no Serialize for simple types, as you would have a - // normal GenericVector of those. - // Returns false in case of error. - bool Serialize(FILE* fp) const { - int32_t used = GenericVector::size_used_; - if (fwrite(&used, sizeof(used), 1, fp) != 1) return false; - for (int i = 0; i < used; ++i) { - int8_t non_null = GenericVector::data_[i] != nullptr; - if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) return false; - if (non_null && !GenericVector::data_[i]->Serialize(fp)) return false; - } - return true; - } - bool Serialize(TFile* fp) const { - int32_t used = GenericVector::size_used_; - if (fp->FWrite(&used, sizeof(used), 1) != 1) return false; - for (int i = 0; i < used; ++i) { - int8_t non_null = GenericVector::data_[i] != nullptr; - if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) return false; - if (non_null && !GenericVector::data_[i]->Serialize(fp)) return false; - } - return true; - } - // Reads a vector of (pointers to) classes to the given file. Assumes the - // existence of bool T::DeSerialize(bool, Tfile*) const that returns false in - // case of error. There is no Serialize for simple types, as you would have a - // normal GenericVector of those. - // If swap is true, assumes a big/little-endian swap is needed. - // Also needs T::T(), as new T is used in this function. - // Returns false in case of error. - bool DeSerialize(bool swap, FILE* fp) { - uint32_t reserved; - if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; - if (swap) Reverse32(&reserved); - // Arbitrarily limit the number of elements to protect against bad data. - assert(reserved <= UINT16_MAX); - if (reserved > UINT16_MAX) { - return false; - } - GenericVector::reserve(reserved); - truncate(0); - for (uint32_t i = 0; i < reserved; ++i) { - int8_t non_null; - if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false; - T* item = nullptr; - if (non_null) { - item = new T; - if (!item->DeSerialize(swap, fp)) { - delete item; - return false; - } - this->push_back(item); - } else { - // Null elements should keep their place in the vector. - this->push_back(nullptr); - } - } - return true; - } - bool DeSerialize(TFile* fp) { - int32_t reserved; - if (!DeSerializeSize(fp, &reserved)) return false; - GenericVector::reserve(reserved); - truncate(0); - for (int i = 0; i < reserved; ++i) { - if (!DeSerializeElement(fp)) return false; - } - return true; - } - // Enables deserialization of a selection of elements. Note that in order to - // retain the integrity of the stream, the caller must call some combination - // of DeSerializeElement and DeSerializeSkip of the exact number returned in - // *size, assuming a true return. - static bool DeSerializeSize(TFile* fp, int32_t* size) { - return fp->FReadEndian(size, sizeof(*size), 1) == 1; - } - // Reads and appends to the vector the next element of the serialization. - bool DeSerializeElement(TFile* fp) { - int8_t non_null; - if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false; - T* item = nullptr; - if (non_null) { - item = new T; - if (!item->DeSerialize(fp)) { - delete item; - return false; - } - this->push_back(item); - } else { - // Null elements should keep their place in the vector. - this->push_back(nullptr); - } - return true; - } - // Skips the next element of the serialization. - static bool DeSerializeSkip(TFile* fp) { - int8_t non_null; - if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false; - if (non_null) { - if (!T::SkipDeSerialize(fp)) return false; - } - return true; - } - - // Sorts the items pointed to by the members of this vector using - // t::operator<(). - void sort() { this->GenericVector::sort(&sort_ptr_cmp); } -}; - -} // namespace tesseract - -// A useful vector that uses operator== to do comparisons. -template -class GenericVectorEqEq : public GenericVector { - public: - GenericVectorEqEq() { - GenericVector::set_compare_callback( - NewPermanentTessCallback(tesseract::cmp_eq)); - } - GenericVectorEqEq(int size) : GenericVector(size) { - GenericVector::set_compare_callback( - NewPermanentTessCallback(tesseract::cmp_eq)); - } -}; - -template -void GenericVector::init(int size) { - size_used_ = 0; - if (size <= 0) { - data_ = nullptr; - size_reserved_ = 0; - } else { - if (size < kDefaultVectorSize) size = kDefaultVectorSize; - data_ = new T[size]; - size_reserved_ = size; - } - clear_cb_ = nullptr; - compare_cb_ = nullptr; -} - -template -GenericVector::~GenericVector() { - clear(); -} - -// Reserve some memory. If the internal array contains elements, they are -// copied. -template -void GenericVector::reserve(int size) { - if (size_reserved_ >= size || size <= 0) - return; - if (size < kDefaultVectorSize) size = kDefaultVectorSize; - T* new_array = new T[size]; - for (int i = 0; i < size_used_; ++i) - new_array[i] = data_[i]; - delete[] data_; - data_ = new_array; - size_reserved_ = size; -} - -template -void GenericVector::double_the_size() { - if (size_reserved_ == 0) { - reserve(kDefaultVectorSize); - } - else { - reserve(2 * size_reserved_); - } -} - -// Resizes to size and sets all values to t. -template -void GenericVector::init_to_size(int size, const T& t) { - reserve(size); - size_used_ = size; - for (int i = 0; i < size; ++i) - data_[i] = t; -} - - -// Return the object from an index. -template -T &GenericVector::get(int index) const { - assert(index >= 0 && index < size_used_); - return data_[index]; -} - -template -T &GenericVector::operator[](int index) const { - assert(index >= 0 && index < size_used_); - return data_[index]; -} - -template -T &GenericVector::back() const { - assert(size_used_ > 0); - return data_[size_used_ - 1]; -} -// Returns the last object and removes it. -template -T GenericVector::pop_back() { - assert(size_used_ > 0); - return data_[--size_used_]; -} - -// Return the object from an index. -template -void GenericVector::set(const T& t, int index) { - assert(index >= 0 && index < size_used_); - data_[index] = t; -} - -// Shifts the rest of the elements to the right to make -// space for the new elements and inserts the given element -// at the specified index. -template -void GenericVector::insert(const T& t, int index) { - assert(index >= 0 && index <= size_used_); - if (size_reserved_ == size_used_) - double_the_size(); - for (int i = size_used_; i > index; --i) { - data_[i] = data_[i-1]; - } - data_[index] = t; - size_used_++; -} - -// Removes an element at the given index and -// shifts the remaining elements to the left. -template -void GenericVector::remove(int index) { - assert(index >= 0 && index < size_used_); - for (int i = index; i < size_used_ - 1; ++i) { - data_[i] = data_[i+1]; - } - size_used_--; -} - -// Return true if the index is valindex -template -T GenericVector::contains_index(int index) const { - return index >= 0 && index < size_used_; -} - -// Return the index of the T object. -template -int GenericVector::get_index(const T& object) const { - for (int i = 0; i < size_used_; ++i) { - assert(compare_cb_ != nullptr); - if (compare_cb_->Run(object, data_[i])) - return i; - } - return -1; -} - -// Return true if T is in the array -template -bool GenericVector::contains(const T& object) const { - return get_index(object) != -1; -} - -// Add an element in the array -template -int GenericVector::push_back(T object) { - int index = 0; - if (size_used_ == size_reserved_) - double_the_size(); - index = size_used_++; - data_[index] = object; - return index; -} - -template -int GenericVector::push_back_new(const T& object) { - int index = get_index(object); - if (index >= 0) - return index; - return push_back(object); -} - -// Add an element in the array (front) -template -int GenericVector::push_front(const T& object) { - if (size_used_ == size_reserved_) - double_the_size(); - for (int i = size_used_; i > 0; --i) - data_[i] = data_[i-1]; - data_[0] = object; - ++size_used_; - return 0; -} - -template -void GenericVector::operator+=(const T& t) { - push_back(t); -} - -template -GenericVector &GenericVector::operator+=(const GenericVector& other) { - this->reserve(size_used_ + other.size_used_); - for (int i = 0; i < other.size(); ++i) { - this->operator+=(other.data_[i]); - } - return *this; -} - -template -GenericVector &GenericVector::operator=(const GenericVector& other) { - if (&other != this) { - this->truncate(0); - this->operator+=(other); - } - return *this; -} - -// Add a callback to be called to delete the elements when the array took -// their ownership. -template -void GenericVector::set_clear_callback(TessCallback1* cb) { - clear_cb_ = cb; -} - -// Add a callback to be called to delete the elements when the array took -// their ownership. -template -void GenericVector::set_compare_callback( - TessResultCallback2* cb) { - compare_cb_ = cb; -} - -// Clear the array, calling the callback function if any. -template -void GenericVector::clear() { - if (size_reserved_ > 0 && clear_cb_ != nullptr) { - for (int i = 0; i < size_used_; ++i) - clear_cb_->Run(data_[i]); - } - delete[] data_; - data_ = nullptr; - size_used_ = 0; - size_reserved_ = 0; - delete clear_cb_; - clear_cb_ = nullptr; - delete compare_cb_; - compare_cb_ = nullptr; -} - -template -void GenericVector::delete_data_pointers() { - for (int i = 0; i < size_used_; ++i) { - delete data_[i]; - } -} - - -template -bool GenericVector::write( - FILE* f, TessResultCallback2* cb) const { - if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) return false; - if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) return false; - if (cb != nullptr) { - for (int i = 0; i < size_used_; ++i) { - if (!cb->Run(f, data_[i])) { - delete cb; - return false; - } - } - delete cb; - } else { - if (fwrite(data_, sizeof(T), size_used_, f) != unsigned_size()) - return false; - } - return true; -} - -template -bool GenericVector::read( - tesseract::TFile* f, TessResultCallback2* cb) { - int32_t reserved; - if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false; - reserve(reserved); - if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) return false; - if (cb != nullptr) { - for (int i = 0; i < size_used_; ++i) { - if (!cb->Run(f, data_ + i)) { - delete cb; - return false; - } - } - delete cb; - } else { - if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_) - return false; - } - return true; -} - -// Writes a vector of simple types to the given file. Assumes that bitwise -// read/write of T will work. Returns false in case of error. -template -bool GenericVector::Serialize(FILE* fp) const { - if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false; - if (fwrite(data_, sizeof(*data_), size_used_, fp) != unsigned_size()) - return false; - return true; -} -template -bool GenericVector::Serialize(tesseract::TFile* fp) const { - if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false; - if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) return false; - return true; -} - -// Reads a vector of simple types from the given file. Assumes that bitwise -// read/write will work with ReverseN according to sizeof(T). -// Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -template -bool GenericVector::DeSerialize(bool swap, FILE* fp) { - uint32_t reserved; - if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; - if (swap) Reverse32(&reserved); - // Arbitrarily limit the number of elements to protect against bad data. - assert(reserved <= UINT16_MAX); - if (reserved > UINT16_MAX) return false; - reserve(reserved); - size_used_ = reserved; - if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) return false; - if (swap) { - for (int i = 0; i < size_used_; ++i) - ReverseN(&data_[i], sizeof(data_[i])); - } - return true; -} -template -bool GenericVector::DeSerialize(tesseract::TFile* fp) { - uint32_t reserved; - if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false; - // Arbitrarily limit the number of elements to protect against bad data. - const uint32_t limit = 50000000; - assert(reserved <= limit); - if (reserved > limit) return false; - reserve(reserved); - size_used_ = reserved; - return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_; -} -template -bool GenericVector::SkipDeSerialize(tesseract::TFile* fp) { - uint32_t reserved; - if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false; - return fp->FRead(nullptr, sizeof(T), reserved) == reserved; -} - -// Writes a vector of classes to the given file. Assumes the existence of -// bool T::Serialize(FILE* fp) const that returns false in case of error. -// Returns false in case of error. -template -bool GenericVector::SerializeClasses(FILE* fp) const { - if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false; - for (int i = 0; i < size_used_; ++i) { - if (!data_[i].Serialize(fp)) return false; - } - return true; -} -template -bool GenericVector::SerializeClasses(tesseract::TFile* fp) const { - if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false; - for (int i = 0; i < size_used_; ++i) { - if (!data_[i].Serialize(fp)) return false; - } - return true; -} - -// Reads a vector of classes from the given file. Assumes the existence of -// bool T::Deserialize(bool swap, FILE* fp) that returns false in case of -// error. Also needs T::T() and T::T(constT&), as init_to_size is used in -// this function. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -template -bool GenericVector::DeSerializeClasses(bool swap, FILE* fp) { - int32_t reserved; - if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; - if (swap) Reverse32(&reserved); - T empty; - init_to_size(reserved, empty); - for (int i = 0; i < reserved; ++i) { - if (!data_[i].DeSerialize(swap, fp)) return false; - } - return true; -} -template -bool GenericVector::DeSerializeClasses(tesseract::TFile* fp) { - int32_t reserved; - if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false; - T empty; - init_to_size(reserved, empty); - for (int i = 0; i < reserved; ++i) { - if (!data_[i].DeSerialize(fp)) return false; - } - return true; -} -template -bool GenericVector::SkipDeSerializeClasses(tesseract::TFile* fp) { - int32_t reserved; - if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false; - for (int i = 0; i < reserved; ++i) { - if (!T::SkipDeSerialize(fp)) return false; - } - return true; -} - -// This method clear the current object, then, does a shallow copy of -// its argument, and finally invalidates its argument. -template -void GenericVector::move(GenericVector* from) { - this->clear(); - this->data_ = from->data_; - this->size_reserved_ = from->size_reserved_; - this->size_used_ = from->size_used_; - this->compare_cb_ = from->compare_cb_; - this->clear_cb_ = from->clear_cb_; - from->data_ = nullptr; - from->clear_cb_ = nullptr; - from->compare_cb_ = nullptr; - from->size_used_ = 0; - from->size_reserved_ = 0; -} - -template -void GenericVector::sort() { - sort(&tesseract::sort_cmp); -} - -// Internal recursive version of choose_nth_item. -// The algorithm used comes from "Algorithms" by Sedgewick: -// http://books.google.com/books/about/Algorithms.html?id=idUdqdDXqnAC -// The principle is to choose a random pivot, and move everything less than -// the pivot to its left, and everything greater than the pivot to the end -// of the array, then recurse on the part that contains the desired index, or -// just return the answer if it is in the equal section in the middle. -// The random pivot guarantees average linear time for the same reason that -// n times vector::push_back takes linear time on average. -// target_index, start and and end are all indices into the full array. -// Seed is a seed for rand_r for thread safety purposes. Its value is -// unimportant as the random numbers do not affect the result except -// between equal answers. -template -int GenericVector::choose_nth_item(int target_index, int start, int end, - unsigned int* seed) { - // Number of elements to process. - int num_elements = end - start; - // Trivial cases. - if (num_elements <= 1) - return start; - if (num_elements == 2) { - if (data_[start] < data_[start + 1]) { - return target_index > start ? start + 1 : start; - } else { - return target_index > start ? start : start + 1; - } - } - // Place the pivot at start. - #ifndef rand_r // _MSC_VER, ANDROID - srand(*seed); - #define rand_r(seed) rand() - #endif // _MSC_VER - int pivot = rand_r(seed) % num_elements + start; - swap(pivot, start); - // The invariant condition here is that items [start, next_lesser) are less - // than the pivot (which is at index next_lesser) and items - // [prev_greater, end) are greater than the pivot, with items - // [next_lesser, prev_greater) being equal to the pivot. - int next_lesser = start; - int prev_greater = end; - for (int next_sample = start + 1; next_sample < prev_greater;) { - if (data_[next_sample] < data_[next_lesser]) { - swap(next_lesser++, next_sample++); - } else if (data_[next_sample] == data_[next_lesser]) { - ++next_sample; - } else { - swap(--prev_greater, next_sample); - } - } - // Now the invariant is set up, we recurse on just the section that contains - // the desired index. - if (target_index < next_lesser) - return choose_nth_item(target_index, start, next_lesser, seed); - else if (target_index < prev_greater) - return next_lesser; // In equal bracket. - else - return choose_nth_item(target_index, prev_greater, end, seed); -} - - -#endif // TESSERACT_CCUTIL_GENERICVECTOR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/globaloc.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/globaloc.cpp deleted file mode 100644 index 36475448..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/globaloc.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/********************************************************************** - * File: errcode.cpp (Formerly error.c) - * Description: Generic error handler function - * Author: Ray Smith - * Created: Tue May 1 16:28:39 BST 1990 - * - * (C) Copyright 1989, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "globaloc.h" -#include -#ifdef __linux__ -#include // For SYS_gettid. -#include // For syscall itself. -#endif -#include "allheaders.h" -#include "errcode.h" -#include "tprintf.h" - -// Size of thread-id array of pixes to keep in case of crash. -const int kMaxNumThreadPixes = 32768; - -static Pix* global_crash_pixes[kMaxNumThreadPixes]; - -void SavePixForCrash(int resolution, Pix* pix) { -#ifdef __linux__ -#ifndef ANDROID - int thread_id = syscall(SYS_gettid) % kMaxNumThreadPixes; -#else - int thread_id = gettid() % kMaxNumThreadPixes; -#endif - pixDestroy(&global_crash_pixes[thread_id]); - if (pix != nullptr) { - Pix* clone = pixClone(pix); - pixSetXRes(clone, resolution); - pixSetYRes(clone, resolution); - global_crash_pixes[thread_id] = clone; - } -#endif -} - -// CALL ONLY from a signal handler! Writes a crash image to stderr. -void signal_exit(int signal_code) { - tprintf("Received signal %d!\n", signal_code); -#ifdef __linux__ -#ifndef ANDROID - int thread_id = syscall(SYS_gettid) % kMaxNumThreadPixes; -#else - int thread_id = gettid() % kMaxNumThreadPixes; -#endif - if (global_crash_pixes[thread_id] != nullptr) { - fprintf(stderr, "Crash caused by image with resolution %d\n", - pixGetYRes(global_crash_pixes[thread_id])); - fprintf(stderr, "\n"); - pixWriteStreamPng(stderr, global_crash_pixes[thread_id], 0.0); - fprintf(stderr, "\n\n"); - } - // Raise an uncaught signal, so as to get a useful stack trace. - raise(SIGILL); -#else - abort(); -#endif -} - -void err_exit() { - ASSERT_HOST("Fatal error encountered!" == nullptr); -} - -// TODO: remove empty function? -void set_global_loc_code(int loc_code) { - // global_loc_code = loc_code; -} - -// TODO: remove empty function? -void set_global_subloc_code(int loc_code) { - // global_subloc_code = loc_code; -} - -// TODO: remove empty function? -void set_global_subsubloc_code(int loc_code) { - // global_subsubloc_code = loc_code; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/globaloc.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/globaloc.h deleted file mode 100644 index 1118fbc9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/globaloc.h +++ /dev/null @@ -1,39 +0,0 @@ -/********************************************************************** - * File: globaloc.h (Formerly error.h) - * Description: Header file for generic error handler class - * Author: Ray Smith - * Created: Tue May 1 16:23:36 BST 1990 - * - * (C) Copyright 1990, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef GLOBALOC_H -#define GLOBALOC_H - -#include "host.h" - -// Saves a clone of the given pix, and notes its resolution in thread-specific -// data, so that the image can be written prior to a crash. -struct Pix; -void SavePixForCrash(int resolution, Pix* pix); - -void signal_exit(int signal_code); - -void err_exit(); - -void set_global_loc_code(int loc_code); - -void set_global_subloc_code(int loc_code); - -void set_global_subsubloc_code(int loc_code); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/helpers.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/helpers.h deleted file mode 100644 index c5a8dfa8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/helpers.h +++ /dev/null @@ -1,213 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: helpers.h - * Description: General utility functions - * Author: Daria Antonova - * Created: Wed Apr 8 14:37:00 2009 - * Language: C++ - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 2009, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ********************************************************************************/ - -#ifndef TESSERACT_CCUTIL_HELPERS_H_ -#define TESSERACT_CCUTIL_HELPERS_H_ - -#include -#include -#include -#if ANDROID -#include -#else -#include -#endif -#include - -#include "host.h" - -// TODO(rays) Put the rest of the helpers in the namespace. -namespace tesseract { - -// A simple linear congruential random number generator, using Knuth's -// constants from: -// http://en.wikipedia.org/wiki/Linear_congruential_generator. -class TRand { - public: - TRand() : seed_(1) {} - // Sets the seed to the given value. - void set_seed(uint64_t seed) { - seed_ = seed; - } - // Sets the seed using a hash of a string. - void set_seed(const std::string& str) { -#if ANDROID - std::tr1::hash hasher; -#else - std::hash hasher; -#endif - set_seed(static_cast(hasher(str))); - } - - // Returns an integer in the range 0 to INT32_MAX. - int32_t IntRand() { - Iterate(); - return seed_ >> 33; - } - // Returns a floating point value in the range [-range, range]. - double SignedRand(double range) { - return range * 2.0 * IntRand() / INT32_MAX - range; - } - // Returns a floating point value in the range [0, range]. - double UnsignedRand(double range) { - return range * IntRand() / INT32_MAX; - } - - private: - // Steps the generator to the next value. - void Iterate() { - seed_ *= 6364136223846793005ULL; - seed_ += 1442695040888963407ULL; - } - - // The current value of the seed. - uint64_t seed_; -}; - -} // namespace tesseract - -// Remove newline (if any) at the end of the string. -inline void chomp_string(char *str) { - int last_index = static_cast(strlen(str)) - 1; - while (last_index >= 0 && - (str[last_index] == '\n' || str[last_index] == '\r')) { - str[last_index--] = '\0'; - } -} - -// Advance the current pointer of the file if it points to a newline character. -inline void SkipNewline(FILE *file) { - if (fgetc(file) != '\n') fseek(file, -1, SEEK_CUR); -} - -// Swaps the two args pointed to by the pointers. -// Operator= and copy constructor must work on T. -template inline void Swap(T* p1, T* p2) { - T tmp(*p2); - *p2 = *p1; - *p1 = tmp; -} - -// return the smallest multiple of block_size greater than or equal to n. -inline int RoundUp(int n, int block_size) { - return block_size * ((n + block_size - 1) / block_size); -} - -// Clip a numeric value to the interval [lower_bound, upper_bound]. -template -inline T ClipToRange(const T& x, const T& lower_bound, const T& upper_bound) { - if (x < lower_bound) - return lower_bound; - if (x > upper_bound) - return upper_bound; - return x; -} - -// Extend the range [lower_bound, upper_bound] to include x. -template -inline void UpdateRange(const T1& x, T2* lower_bound, T2* upper_bound) { - if (x < *lower_bound) - *lower_bound = x; - if (x > *upper_bound) - *upper_bound = x; -} - -// Decrease lower_bound to be <= x_lo AND increase upper_bound to be >= x_hi. -template -inline void UpdateRange(const T1& x_lo, const T1& x_hi, - T2* lower_bound, T2* upper_bound) { - if (x_lo < *lower_bound) - *lower_bound = x_lo; - if (x_hi > *upper_bound) - *upper_bound = x_hi; -} - -// Intersect the range [*lower2, *upper2] with the range [lower1, upper1], -// putting the result back in [*lower2, *upper2]. -// If non-intersecting ranges are given, we end up with *lower2 > *upper2. -template -inline void IntersectRange(const T& lower1, const T& upper1, - T* lower2, T* upper2) { - if (lower1 > *lower2) - *lower2 = lower1; - if (upper1 < *upper2) - *upper2 = upper1; -} - -// Proper modulo arithmetic operator. Returns a mod b that works for -ve a. -// For any integer a and positive b, returns r : 0<=r= 0 ? (a + b / 2) / b : (a - b / 2) / b; -} - -// Return a double cast to int with rounding. -inline int IntCastRounded(double x) { - return x >= 0.0 ? static_cast(x + 0.5) : -static_cast(-x + 0.5); -} - -// Return a float cast to int with rounding. -inline int IntCastRounded(float x) { - return x >= 0.0f ? static_cast(x + 0.5f) : -static_cast(-x + 0.5f); -} - -// Reverse the order of bytes in a n byte quantity for big/little-endian switch. -inline void ReverseN(void* ptr, int num_bytes) { - assert(num_bytes == 1 || num_bytes == 2 || num_bytes == 4 || num_bytes == 8); - char* cptr = static_cast(ptr); - int halfsize = num_bytes / 2; - for (int i = 0; i < halfsize; ++i) { - char tmp = cptr[i]; - cptr[i] = cptr[num_bytes - 1 - i]; - cptr[num_bytes - 1 - i] = tmp; - } -} - -// Reverse the order of bytes in a 16 bit quantity for big/little-endian switch. -inline void Reverse16(void *ptr) { - ReverseN(ptr, 2); -} - -// Reverse the order of bytes in a 32 bit quantity for big/little-endian switch. -inline void Reverse32(void *ptr) { - ReverseN(ptr, 4); -} - -// Reverse the order of bytes in a 64 bit quantity for big/little-endian switch. -inline void Reverse64(void* ptr) { - ReverseN(ptr, 8); -} - - -#endif // TESSERACT_CCUTIL_HELPERS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/host.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/host.h deleted file mode 100644 index 148399f2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/host.h +++ /dev/null @@ -1,65 +0,0 @@ -/****************************************************************************** - ** Filename: host.h - ** Purpose: This is the system independent typedefs and defines - ** Author: MN, JG, MD - ** - ** (c) Copyright Hewlett-Packard Company, 1988-1996. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - */ - -#ifndef TESSERACT_CCUTIL_HOST_H_ -#define TESSERACT_CCUTIL_HOST_H_ - -#include -#include "platform.h" -/* _WIN32 */ -#ifdef _WIN32 -#include -#undef min -#undef max -#endif - -#include // PRId32, ... -#include // int32_t, ... - -// definitions of portable data types (numbers and characters) -using BOOL8 = unsigned char; - -#if defined(_WIN32) - -/* MinGW defines the standard PRI... macros, but MSVS doesn't. */ - -#if !defined(PRId32) -#define PRId32 "d" -#endif - -#if !defined(PRId64) -#define PRId64 "I64d" -#endif - -#endif /* _WIN32 */ - -// Defines -#ifndef TRUE -#define TRUE 1 -#endif - -#ifndef FALSE -#define FALSE 0 -#endif - -// Return true if x is within tolerance of y -template bool NearlyEqual(T x, T y, T tolerance) { - T diff = x - y; - return diff <= tolerance && -diff <= tolerance; -} - -#endif // TESSERACT_CCUTIL_HOST_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/indexmapbidi.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/indexmapbidi.cpp deleted file mode 100644 index 8670e321..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/indexmapbidi.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: indexmapbidi.cpp -// Description: Bi-directional mapping between a sparse and compact space. -// Author: rays@google.com (Ray Smith) -// Created: Tue Apr 06 11:33:59 PDT 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "indexmapbidi.h" - -namespace tesseract { - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -IndexMap::~IndexMap() = default; - -// SparseToCompact takes a sparse index to an index in the compact space. -// Uses a binary search to find the result. For faster speed use -// IndexMapBiDi, but that takes more memory. -int IndexMap::SparseToCompact(int sparse_index) const { - int result = compact_map_.binary_search(sparse_index); - return compact_map_[result] == sparse_index ? result : -1; -} - -// Copy from the input. -void IndexMap::CopyFrom(const IndexMap& src) { - sparse_size_ = src.sparse_size_; - compact_map_ = src.compact_map_; -} -void IndexMap::CopyFrom(const IndexMapBiDi& src) { - sparse_size_ = src.SparseSize(); - compact_map_ = src.compact_map_; -} - -// Writes to the given file. Returns false in case of error. -bool IndexMap::Serialize(FILE* fp) const { - return tesseract::Serialize(fp, &sparse_size_) && compact_map_.Serialize(fp); -} - -// Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool IndexMap::DeSerialize(bool swap, FILE* fp) { - uint32_t sparse_size; - if (!tesseract::DeSerialize(fp, &sparse_size)) return false; - if (swap) - ReverseN(&sparse_size, sizeof(sparse_size)); - // Arbitrarily limit the number of elements to protect against bad data. - if (sparse_size > UINT16_MAX) return false; - sparse_size_ = sparse_size; - return compact_map_.DeSerialize(swap, fp); -} - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -IndexMapBiDi::~IndexMapBiDi() = default; - -// Top-level init function in a single call to initialize a map to select -// a single contiguous subrange [start, end) of the sparse space to be mapped -// 1 to 1 to the compact space, with all other elements of the sparse space -// left unmapped. -// No need to call Setup after this. -void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) { - Init(sparse_size, false); - for (int i = start; i < end; ++i) - SetMap(i, true); - Setup(); -} - -// Initializes just the sparse_map_ to the given size with either all -// forward indices mapped (all_mapped = true) or none (all_mapped = false). -// Call Setup immediately after, or make calls to SetMap first to adjust the -// mapping and then call Setup before using the map. -void IndexMapBiDi::Init(int size, bool all_mapped) { - sparse_map_.init_to_size(size, -1); - if (all_mapped) { - for (int i = 0; i < size; ++i) - sparse_map_[i] = i; - } -} - -// Sets a given index in the sparse_map_ to be mapped or not. -void IndexMapBiDi::SetMap(int sparse_index, bool mapped) { - sparse_map_[sparse_index] = mapped ? 0 : -1; -} - -// Sets up the sparse_map_ and compact_map_ properly after Init and -// some calls to SetMap. Assumes an ordered 1-1 map from set indices -// in the forward map to the compact space. -void IndexMapBiDi::Setup() { - int compact_size = 0; - for (int i = 0; i < sparse_map_.size(); ++i) { - if (sparse_map_[i] >= 0) { - sparse_map_[i] = compact_size++; - } - } - compact_map_.init_to_size(compact_size, -1); - for (int i = 0; i < sparse_map_.size(); ++i) { - if (sparse_map_[i] >= 0) { - compact_map_[sparse_map_[i]] = i; - } - } - sparse_size_ = sparse_map_.size(); -} - -// Copy from the input. -void IndexMapBiDi::CopyFrom(const IndexMapBiDi& src) { - sparse_map_ = src.sparse_map_; - compact_map_ = src.compact_map_; - sparse_size_ = sparse_map_.size(); -} - -// Merges the two compact space indices. May be called many times, but -// the merges must be concluded by a call to CompleteMerges. -// Returns true if a merge was actually performed. -bool IndexMapBiDi::Merge(int compact_index1, int compact_index2) { - // Find the current master index for index1 and index2. - compact_index1 = MasterCompactIndex(compact_index1); - compact_index2 = MasterCompactIndex(compact_index2); - // Be sure that index1 < index2. - if (compact_index1 > compact_index2) { - int tmp = compact_index1; - compact_index1 = compact_index2; - compact_index2 = tmp; - } else if (compact_index1 == compact_index2) { - return false; - } - // To save iterating over all sparse_map_ entries, simply make the master - // entry for index2 point to index1. - // This leaves behind a potential chain of parents that needs to be chased, - // as above. - sparse_map_[compact_map_[compact_index2]] = compact_index1; - if (compact_index1 >= 0) - compact_map_[compact_index2] = compact_map_[compact_index1]; - return true; -} - -// Completes one or more Merge operations by further compacting the -// compact space. Unused compact space indices are removed, and the used -// ones above shuffled down to fill the gaps. -// Example: -// Input sparse_map_: (x indicates -1) -// x x 0 x 2 x x 4 x 0 x 2 x -// Output sparse_map_: -// x x 0 x 1 x x 2 x 0 x 1 x -// Output compact_map_: -// 2 4 7. -void IndexMapBiDi::CompleteMerges() { - // Ensure each sparse_map_entry contains a master compact_map_ index. - int compact_size = 0; - for (int i = 0; i < sparse_map_.size(); ++i) { - int compact_index = MasterCompactIndex(sparse_map_[i]); - sparse_map_[i] = compact_index; - if (compact_index >= compact_size) - compact_size = compact_index + 1; - } - // Re-generate the compact_map leaving holes for unused indices. - compact_map_.init_to_size(compact_size, -1); - for (int i = 0; i < sparse_map_.size(); ++i) { - if (sparse_map_[i] >= 0) { - if (compact_map_[sparse_map_[i]] == -1) - compact_map_[sparse_map_[i]] = i; - } - } - // Compact the compact_map, leaving tmp_compact_map saying where each - // index went to in the compacted map. - GenericVector tmp_compact_map; - tmp_compact_map.init_to_size(compact_size, -1); - compact_size = 0; - for (int i = 0; i < compact_map_.size(); ++i) { - if (compact_map_[i] >= 0) { - tmp_compact_map[i] = compact_size; - compact_map_[compact_size++] = compact_map_[i]; - } - } - compact_map_.truncate(compact_size); - // Now modify the entries in the sparse map to point to the new locations. - for (int i = 0; i < sparse_map_.size(); ++i) { - if (sparse_map_[i] >= 0) { - sparse_map_[i] = tmp_compact_map[sparse_map_[i]]; - } - } -} - -// Writes to the given file. Returns false in case of error. -bool IndexMapBiDi::Serialize(FILE* fp) const { - if (!IndexMap::Serialize(fp)) return false; - // Make a vector containing the rest of the map. If the map is many-to-one - // then each additional sparse entry needs to be stored. - // Normally we store only the compact map to save space. - GenericVector remaining_pairs; - for (int i = 0; i < sparse_map_.size(); ++i) { - if (sparse_map_[i] >= 0 && compact_map_[sparse_map_[i]] != i) { - remaining_pairs.push_back(i); - remaining_pairs.push_back(sparse_map_[i]); - } - } - if (!remaining_pairs.Serialize(fp)) return false; - return true; -} - -// Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool IndexMapBiDi::DeSerialize(bool swap, FILE* fp) { - if (!IndexMap::DeSerialize(swap, fp)) return false; - GenericVector remaining_pairs; - if (!remaining_pairs.DeSerialize(swap, fp)) return false; - sparse_map_.init_to_size(sparse_size_, -1); - for (int i = 0; i < compact_map_.size(); ++i) { - sparse_map_[compact_map_[i]] = i; - } - for (int i = 0; i < remaining_pairs.size(); ++i) { - int sparse_index = remaining_pairs[i++]; - sparse_map_[sparse_index] = remaining_pairs[i]; - } - return true; -} - -// Bulk calls to SparseToCompact. -// Maps the given array of sparse indices to an array of compact indices. -// Assumes the input is sorted. The output indices are sorted and uniqued. -// Return value is the number of "missed" features, being features that -// don't map to the compact feature space. -int IndexMapBiDi::MapFeatures(const GenericVector& sparse, - GenericVector* compact) const { - compact->truncate(0); - int num_features = sparse.size(); - int missed_features = 0; - int prev_good_feature = -1; - for (int f = 0; f < num_features; ++f) { - int feature = sparse_map_[sparse[f]]; - if (feature >= 0) { - if (feature != prev_good_feature) { - compact->push_back(feature); - prev_good_feature = feature; - } - } else { - ++missed_features; - } - } - return missed_features; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/indexmapbidi.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/indexmapbidi.h deleted file mode 100644 index dac963cd..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/indexmapbidi.h +++ /dev/null @@ -1,180 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: indexmapbidi.h -// Description: Bi-directional mapping between a sparse and compact space. -// Author: rays@google.com (Ray Smith) -// Created: Tue Apr 06 11:33:59 PDT 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_INDEXMAPBIDI_H_ -#define TESSERACT_CCUTIL_INDEXMAPBIDI_H_ - -#include -#include "genericvector.h" - -namespace tesseract { - -class IndexMapBiDi; - -// Bidirectional one-to-one mapping between a sparse and a compact discrete -// space. Many entries in the sparse space are unmapped, but those that are -// mapped have a 1-1 mapping to (and from) the compact space, where all -// values are used. This is useful for forming subsets of larger collections, -// such as subsets of character sets, or subsets of binary feature spaces. -// -// This base class provides basic functionality with binary search for the -// SparseToCompact mapping to save memory. -// For a faster inverse mapping, or to allow a many-to-one mapping, use -// IndexMapBiDi below. -// NOTE: there are currently no methods to setup an IndexMap on its own! -// It must be initialized by copying from an IndexMapBiDi or by DeSerialize. -class IndexMap { - public: - virtual ~IndexMap(); - - // SparseToCompact takes a sparse index to an index in the compact space. - // Uses a binary search to find the result. For faster speed use - // IndexMapBiDi, but that takes more memory. - virtual int SparseToCompact(int sparse_index) const; - - // CompactToSparse takes a compact index to the corresponding index in the - // sparse space. - int CompactToSparse(int compact_index) const { - return compact_map_[compact_index]; - } - // The size of the sparse space. - virtual int SparseSize() const { - return sparse_size_; - } - // The size of the compact space. - int CompactSize() const { - return compact_map_.size(); - } - - // Copy from the input. - void CopyFrom(const IndexMap& src); - void CopyFrom(const IndexMapBiDi& src); - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, FILE* fp); - - protected: - // The sparse space covers integers in the range [0, sparse_size_-1]. - int32_t sparse_size_; - // The compact space covers integers in the range [0, compact_map_.size()-1]. - // Each element contains the corresponding sparse index. - GenericVector compact_map_; -}; - -// Bidirectional many-to-one mapping between a sparse and a compact discrete -// space. As with IndexMap, many entries may be unmapped, but unlike IndexMap, -// of those that are, many may be mapped to the same compact index. -// If the map is many-to-one, it is not possible to directly obtain all the -// sparse indices that map to a single compact index. -// This map is time- rather than space-efficient. It stores the entire sparse -// space. -// IndexMapBiDi may be initialized in one of 3 ways: -// 1. Init(size, true); -// Setup(); -// Sets a complete 1:1 mapping with no unmapped elements. -// 2. Init(size, false); -// for ... SetMap(index, true); -// Setup(); -// Specifies precisely which sparse indices are mapped. The mapping is 1:1. -// 3. Either of the above, followed by: -// for ... Merge(index1, index2); -// CompleteMerges(); -// Allows a many-to-one mapping by merging compact space indices. -class IndexMapBiDi : public IndexMap { - public: - virtual ~IndexMapBiDi(); - - // Top-level init function in a single call to initialize a map to select - // a single contiguous subrange [start, end) of the sparse space to be mapped - // 1 to 1 to the compact space, with all other elements of the sparse space - // left unmapped. - // No need to call Setup after this. - void InitAndSetupRange(int sparse_size, int start, int end); - - // Initializes just the sparse_map_ to the given size with either all - // forward indices mapped (all_mapped = true) or none (all_mapped = false). - // Call Setup immediately after, or make calls to SetMap first to adjust the - // mapping and then call Setup before using the map. - void Init(int size, bool all_mapped); - // Sets a given index in the sparse_map_ to be mapped or not. - void SetMap(int sparse_index, bool mapped); - // Sets up the sparse_map_ and compact_map_ properly after Init and - // some calls to SetMap. Assumes an ordered 1-1 map from set indices - // in the sparse space to the compact space. - void Setup(); - - // Merges the two compact space indices. May be called many times, but - // the merges must be concluded by a call to CompleteMerges. - // Returns true if a merge was actually performed. - bool Merge(int compact_index1, int compact_index2); - // Returns true if the given compact index has been deleted. - bool IsCompactDeleted(int index) const { - return MasterCompactIndex(index) < 0; - } - // Completes one or more Merge operations by further compacting the - // compact space. - void CompleteMerges(); - - // SparseToCompact takes a sparse index to an index in the compact space. - virtual int SparseToCompact(int sparse_index) const { - return sparse_map_[sparse_index]; - } - // The size of the sparse space. - virtual int SparseSize() const { - return sparse_map_.size(); - } - - // Copy from the input. - void CopyFrom(const IndexMapBiDi& src); - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, FILE* fp); - - // Bulk calls to SparseToCompact. - // Maps the given array of sparse indices to an array of compact indices. - // Assumes the input is sorted. The output indices are sorted and uniqued. - // Return value is the number of "missed" features, being features that - // don't map to the compact feature space. - int MapFeatures(const GenericVector& sparse, - GenericVector* compact) const; - - private: - // Returns the master compact index for a given compact index. - // During a multiple merge operation, several compact indices may be - // combined, so we need to be able to find the master of all. - int MasterCompactIndex(int compact_index) const { - while (compact_index >= 0 && - sparse_map_[compact_map_[compact_index]] != compact_index) - compact_index = sparse_map_[compact_map_[compact_index]]; - return compact_index; - } - - // Direct look-up of the compact index for each element in sparse space. - GenericVector sparse_map_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_CCUTIL_INDEXMAPBIDI_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/kdpair.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/kdpair.h deleted file mode 100644 index 8a3052b7..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/kdpair.h +++ /dev/null @@ -1,189 +0,0 @@ -// Copyright 2012 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: kdpair.h -// Description: Template pair class like STL pair but geared towards -// the Key+Data design pattern in which some data needs -// to be sorted or kept in a heap sorted on some separate key. -// Author: Ray Smith. -// Created: Thu Mar 15 14:48:05 PDT 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_KDPAIR_H_ -#define TESSERACT_CCUTIL_KDPAIR_H_ - -#include "genericvector.h" - -namespace tesseract { - -// A useful base struct to facilitate the common operation of sorting a vector -// of simple or smart-pointer data using a separate key. Similar to STL pair. -template -struct KDPair { - KDPair() = default; - KDPair(Key k, Data d) : data(d), key(k) {} - - int operator==(const KDPair& other) const { - return key == other.key; - } - - // WARNING! Keep data as the first element! KDPairInc and KDPairDec depend - // on the order of these elements so they can downcast pointers appropriately - // for use by GenericHeap::Reshuffle. - Data data; - Key key; -}; -// Specialization of KDPair to provide operator< for sorting in increasing order -// and recasting of data pointers for use with DoublePtr. -template -struct KDPairInc : public KDPair { - KDPairInc() = default; - KDPairInc(Key k, Data d) : KDPair(k, d) {} - // Operator< facilitates sorting in increasing order. - int operator<(const KDPairInc& other) const { - return this->key < other.key; - } - // Returns the input Data pointer recast to a KDPairInc pointer. - // Just casts a pointer to the first element to a pointer to the whole struct. - static KDPairInc* RecastDataPointer(Data* data_ptr) { - return reinterpret_cast(data_ptr); - } -}; -// Specialization of KDPair to provide operator< for sorting in decreasing order -// and recasting of data pointers for use with DoublePtr. -template -struct KDPairDec : public KDPair { - KDPairDec() = default; - KDPairDec(Key k, Data d) : KDPair(k, d) {} - // Operator< facilitates sorting in decreasing order by using operator> on - // the key values. - int operator<(const KDPairDec& other) const { - return this->key > other.key; - } - // Returns the input Data pointer recast to a KDPairDec pointer. - // Just casts a pointer to the first element to a pointer to the whole struct. - static KDPairDec* RecastDataPointer(Data* data_ptr) { - return reinterpret_cast(data_ptr); - } -}; - -// A useful base class to facilitate the common operation of sorting a vector -// of owned pointer data using a separate key. This class owns its data pointer, -// deleting it when it has finished with it, and providing copy constructor and -// operator= that have move semantics so that the data does not get copied and -// only a single instance of KDPtrPair holds a specific data pointer. -template -class KDPtrPair { - public: - KDPtrPair() : data_(nullptr) {} - KDPtrPair(Key k, Data* d) : data_(d), key_(k) {} - // Copy constructor steals the pointer from src and nulls it in src, thereby - // moving the (single) ownership of the data. - KDPtrPair(KDPtrPair& src) : data_(src.data_), key_(src.key_) { - src.data_ = nullptr; - } - // Destructor deletes data, assuming it is the sole owner. - ~KDPtrPair() { - delete this->data_; - this->data_ = nullptr; - } - // Operator= steals the pointer from src and nulls it in src, thereby - // moving the (single) ownership of the data. - void operator=(KDPtrPair& src) { - delete this->data_; - this->data_ = src.data_; - src.data_ = nullptr; - this->key_ = src.key_; - } - - int operator==(const KDPtrPair& other) const { - return key_ == other.key_; - } - - // Accessors. - const Key& key() const { - return key_; - } - void set_key(const Key& new_key) { - key_ = new_key; - } - const Data* data() const { - return data_; - } - // Sets the data pointer, taking ownership of the data. - void set_data(Data* new_data) { - delete data_; - data_ = new_data; - } - // Relinquishes ownership of the data pointer (setting it to nullptr). - Data* extract_data() { - Data* result = data_; - data_ = nullptr; - return result; - } - - private: - // Data members are private to keep deletion of data_ encapsulated. - Data* data_; - Key key_; -}; -// Specialization of KDPtrPair to provide operator< for sorting in increasing -// order. -template -struct KDPtrPairInc : public KDPtrPair { - // Since we are doing non-standard stuff we have to duplicate *all* the - // constructors and operator=. - KDPtrPairInc() : KDPtrPair() {} - KDPtrPairInc(Key k, Data* d) : KDPtrPair(k, d) {} - KDPtrPairInc(KDPtrPairInc& src) : KDPtrPair(src) {} - void operator=(KDPtrPairInc& src) { - KDPtrPair::operator=(src); - } - // Operator< facilitates sorting in increasing order. - int operator<(const KDPtrPairInc& other) const { - return this->key() < other.key(); - } -}; -// Specialization of KDPtrPair to provide operator< for sorting in decreasing -// order. -template -struct KDPtrPairDec : public KDPtrPair { - // Since we are doing non-standard stuff we have to duplicate *all* the - // constructors and operator=. - KDPtrPairDec() : KDPtrPair() {} - KDPtrPairDec(Key k, Data* d) : KDPtrPair(k, d) {} - KDPtrPairDec(KDPtrPairDec& src) : KDPtrPair(src) {} - void operator=(KDPtrPairDec& src) { - KDPtrPair::operator=(src); - } - // Operator< facilitates sorting in decreasing order by using operator> on - // the key values. - int operator<(const KDPtrPairDec& other) const { - return this->key() > other.key(); - } -}; - -// Specialization for a pair of ints in increasing order. -using IntKDPair = KDPairInc; - -// Vector of IntKDPair. -class KDVector : public GenericVector { - // TODO(rays) Add some code to manipulate a KDVector. For now there - // is nothing and this class is effectively a specialization typedef. -}; - -} // namespace tesseract - -#endif // TESSERACT_CCUTIL_KDPAIR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/lsterr.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/lsterr.h deleted file mode 100644 index c585c3a4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/lsterr.h +++ /dev/null @@ -1,44 +0,0 @@ -/********************************************************************** - * File: lsterr.h (Formerly listerr.h) - * Description: Errors shared by list modules - * Author: Phil Cheatle - * Created: Wed Jan 23 09:10:35 GMT 1991 - * - * (C) Copyright 1990, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_CCUTIL_LSTERR_H_ -#define TESSERACT_CCUTIL_LSTERR_H_ - -#include "errcode.h" //must be last include - -const ERRCODE DONT_CONSTRUCT_LIST_BY_COPY = -"Can't create a list by assignment"; -const ERRCODE DONT_ASSIGN_LISTS = "Can't assign to lists"; -const ERRCODE SERIALISE_LINKS = "Attempted to (de)serialise a link element"; - -#ifndef NDEBUG - -const ERRCODE NO_LIST = "Iterator not set to a list"; -const ERRCODE NULL_OBJECT = "List found this = nullptr!"; -const ERRCODE NULL_DATA = "List would have returned a nullptr data pointer"; -const ERRCODE NULL_CURRENT = "List current position is nullptr"; -const ERRCODE NULL_NEXT = "Next element on the list is nullptr"; -const ERRCODE NULL_PREV = "Previous element on the list is nullptr"; -const ERRCODE EMPTY_LIST = "List is empty"; -const ERRCODE BAD_PARAMETER = "List parameter error"; -const ERRCODE STILL_LINKED = - "Attempting to add an element with non nullptr links, to a list"; - -#endif // !NDEBUG -#endif // TESSERACT_CCUTIL_LSTERR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/mainblk.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/mainblk.cpp deleted file mode 100644 index 079b0b58..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/mainblk.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/********************************************************************** - * File: mainblk.cpp (Formerly main.c) - * Description: Function to call from main() to setup. - * Author: Ray Smith - * Created: Tue Oct 22 11:09:40 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include -#if defined(_WIN32) -#include // for _access -#endif - -#include "fileerr.h" -#include "ccutil.h" - -#ifdef WINAPI_FAMILY -#include -#include -#endif - -const ERRCODE NO_PATH = -"Warning:explicit path for executable will not be used for configs"; -static const ERRCODE USAGE = "Usage"; - -namespace tesseract { -/********************************************************************** - * main_setup - * - * Main for mithras demo program. Read the arguments and set up globals. - **********************************************************************/ - -/** - * @brief CCUtil::main_setup - set location of tessdata and name of image - * - * @param argv0 - paths to the directory with language files and config files. - * An actual value of argv0 is used if not nullptr, otherwise TESSDATA_PREFIX is - * used if not nullptr, next try to use compiled in -DTESSDATA_PREFIX. If previous - * is not successful - use current directory. - * @param basename - name of image - */ -void CCUtil::main_setup(const char *argv0, const char *basename) { - imagebasename = basename; /**< name of image */ - -#if WINAPI_FAMILY_APP - char *tessdata_prefix = 0; -#else - char *tessdata_prefix = getenv("TESSDATA_PREFIX"); -#endif - - if (argv0 != nullptr && *argv0 != '\0') { - /* Use tessdata prefix from the command line. */ - datadir = argv0; - } else if (tessdata_prefix) { - /* Use tessdata prefix from the environment. */ - datadir = tessdata_prefix; -#if defined(_WIN32) || defined(WINAPI_FAMILY) - } else if (datadir == nullptr || _access(datadir.string(), 0) != 0) { - /* Look for tessdata in directory of executable. */ - char drive[_MAX_DRIVE]; - char dir[_MAX_DIR]; - char path[_MAX_PATH]; -#ifdef WINAPI_FAMILY - DWORD length = 0; -#else - DWORD length = GetModuleFileName(nullptr, path, sizeof(path)); -#endif - if (length > 0 && length < sizeof(path)) { - errno_t result = _splitpath_s(path, drive, sizeof(drive), - dir, sizeof(dir), nullptr, 0, nullptr, 0); - if (result == ERANGE) { - tprintf("Error: Path too long: %s\n", path); - } - - datadir = drive; - datadir += dir; - datadir += "/tessdata"; - } -#endif - -#if defined(TESSDATA_PREFIX) - } else { -/* Use tessdata prefix which was compiled in. */ -#define _STR(a) #a -#define _XSTR(a) _STR(a) - datadir = _XSTR(TESSDATA_PREFIX) "/tessdata"; -#undef _XSTR -#undef _STR -#endif - } - - // datadir may still be empty: - if (datadir.length() == 0) { - datadir = "./"; - } - - // check for missing directory separator - const char *lastchar = datadir.string(); - lastchar += datadir.length() - 1; - if ((strcmp(lastchar, "/") != 0) && (strcmp(lastchar, "\\") != 0)) - datadir += "/"; -} -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/object_cache.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/object_cache.h deleted file mode 100644 index c03f9140..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/object_cache.h +++ /dev/null @@ -1,125 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: object_cache.h -// Description: A string indexed object cache. -// Author: David Eger -// Created: Fri Jan 27 12:08:00 PST 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_OBJECT_CACHE_H_ -#define TESSERACT_CCUTIL_OBJECT_CACHE_H_ - -#include "ccutil.h" -#include "errcode.h" -#include "genericvector.h" -#include "tesscallback.h" - -namespace tesseract { - -// A simple object cache which maps a string to an object of type T. -// Usually, these are expensive objects that are loaded from disk. -// Reference counting is performed, so every Get() needs to be followed later -// by a Free(). Actual deletion is accomplished by DeleteUnusedObjects(). -template -class ObjectCache { - public: - ObjectCache() = default; - ~ObjectCache() { - mu_.Lock(); - for (int i = 0; i < cache_.size(); i++) { - if (cache_[i].count > 0) { - tprintf("ObjectCache(%p)::~ObjectCache(): WARNING! LEAK! object %p " - "still has count %d (id %s)\n", - this, cache_[i].object, cache_[i].count, - cache_[i].id.string()); - } else { - delete cache_[i].object; - cache_[i].object = nullptr; - } - } - mu_.Unlock(); - } - - // Return a pointer to the object identified by id. - // If we haven't yet loaded the object, use loader to load it. - // If loader fails to load it, record a nullptr entry in the cache - // and return nullptr -- further attempts to load will fail (even - // with a different loader) until DeleteUnusedObjects() is called. - // We delete the given loader. - T *Get(STRING id, - TessResultCallback *loader) { - T *retval = nullptr; - mu_.Lock(); - for (int i = 0; i < cache_.size(); i++) { - if (id == cache_[i].id) { - retval = cache_[i].object; - if (cache_[i].object != nullptr) { - cache_[i].count++; - } - mu_.Unlock(); - delete loader; - return retval; - } - } - cache_.push_back(ReferenceCount()); - ReferenceCount &rc = cache_.back(); - rc.id = id; - retval = rc.object = loader->Run(); - rc.count = (retval != nullptr) ? 1 : 0; - mu_.Unlock(); - return retval; - } - - // Decrement the count for t. - // Return whether we knew about the given pointer. - bool Free(T *t) { - if (t == nullptr) return false; - mu_.Lock(); - for (int i = 0; i < cache_.size(); i++) { - if (cache_[i].object == t) { - --cache_[i].count; - mu_.Unlock(); - return true; - } - } - mu_.Unlock(); - return false; - } - - void DeleteUnusedObjects() { - mu_.Lock(); - for (int i = cache_.size() - 1; i >= 0; i--) { - if (cache_[i].count <= 0) { - delete cache_[i].object; - cache_.remove(i); - } - } - mu_.Unlock(); - } - - private: - struct ReferenceCount { - STRING id; // A unique ID to identify the object (think path on disk) - T *object; // A copy of the object in memory. Can be delete'd. - int count; // A count of the number of active users of this object. - }; - - CCUtilMutex mu_; - GenericVector cache_; -}; - -} // namespace tesseract - - -#endif // TESSERACT_CCUTIL_OBJECT_CACHE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/ocrclass.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/ocrclass.h deleted file mode 100644 index 77272c4b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/ocrclass.h +++ /dev/null @@ -1,184 +0,0 @@ -/********************************************************************** - * File: ocrclass.h - * Description: Class definitions and constants for the OCR API. - * Author: Hewlett-Packard Co - * - * (C) Copyright 1996, Hewlett-Packard Co. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -/********************************************************************** - * This file contains typedefs for all the structures used by - * the HP OCR interface. - * The code is designed to be used with either a C or C++ compiler. - * The structures are designed to allow them to be used with any - * structure alignment up to 8. - **********************************************************************/ - -#ifndef CCUTIL_OCRCLASS_H_ -#define CCUTIL_OCRCLASS_H_ - -#ifndef __GNUC__ -#ifdef _WIN32 -#include "gettimeofday.h" -#endif -#else -#include -#endif -#include -#include "host.h" - -/*Maximum lengths of various strings*/ -#define MAX_FONT_NAME 34 /*name of font */ -#define MAX_OCR_NAME 32 /*name of engine */ -#define MAX_OCR_VERSION 17 /*version code of engine */ - -/*pitch set definitions are identical to RTF*/ -#define PITCH_DEF 0 /*default */ -#define PITCH_FIXED 1 /*fixed pitch */ -#define PITCH_VAR 2 /*variable pitch */ - -/********************************************************************** - * EANYCODE_CHAR - * Description of a single character. The character code is defined by - * the character set of the current font. - * Output text is sent as an array of these structures. - * Spaces and line endings in the output are represented in the - * structures of the surrounding characters. They are not directly - * represented as characters. - * The first character in a word has a positive value of blanks. - * Missing information should be set to the defaults in the comments. - * If word bounds are known, but not character bounds, then the top and - * bottom of each character should be those of the word. The left of the - * first and right of the last char in each word should be set. All other - * lefts and rights should be set to -1. - * If set, the values of right and bottom are left+width and top+height. - * Most of the members come directly from the parameters to ocr_append_char. - * The formatting member uses the enhancement parameter and combines the - * line direction stuff into the top 3 bits. - * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para, - * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what - * the coding is, only that it is backwards compatible with the previous - * version. - **********************************************************************/ - -typedef struct { /*single character */ -// It should be noted that the format for char_code for version 2.0 and beyond -// is UTF8 which means that ASCII characters will come out as one structure but -// other characters will be returned in two or more instances of this structure -// with a single byte of the UTF8 code in each, but each will have the same -// bounding box. Programs which want to handle languagues with different -// characters sets will need to handle extended characters appropriately, but -// *all* code needs to be prepared to receive UTF8 coded characters for -// characters such as bullet and fancy quotes. - uint16_t char_code; /*character itself */ - int16_t left; /*of char (-1) */ - int16_t right; /*of char (-1) */ - int16_t top; /*of char (-1) */ - int16_t bottom; /*of char (-1) */ - int16_t font_index; /*what font (0) */ - uint8_t confidence; /*0=perfect, 100=reject (0/100) */ - uint8_t point_size; /*of char, 72=i inch, (10) */ - int8_t blanks; /*no of spaces before this char (1) */ - uint8_t formatting; /*char formatting (0) */ -} EANYCODE_CHAR; /*single character */ - -/********************************************************************** - * ETEXT_DESC - * Description of the output of the OCR engine. - * This structure is used as both a progress monitor and the final - * output header, since it needs to be a valid progress monitor while - * the OCR engine is storing its output to shared memory. - * During progress, all the buffer info is -1. - * Progress starts at 0 and increases to 100 during OCR. No other constraint. - * Additionally the progress callback contains the bounding box of the word that - * is currently being processed. - * Every progress callback, the OCR engine must set ocr_alive to 1. - * The HP side will set ocr_alive to 0. Repeated failure to reset - * to 1 indicates that the OCR engine is dead. - * If the cancel function is not null then it is called with the number of - * user words found. If it returns true then operation is cancelled. - **********************************************************************/ -class ETEXT_DESC; - -typedef bool (*CANCEL_FUNC)(void* cancel_this, int words); -typedef bool (*PROGRESS_FUNC)(int progress, int left, int right, int top, - int bottom); -typedef bool (*PROGRESS_FUNC2)(ETEXT_DESC* ths, int left, int right, int top, - int bottom); - -class ETEXT_DESC { // output header - public: - int16_t count; /// chars in this buffer(0) - int16_t progress; /// percent complete increasing (0-100) - /** Progress monitor covers word recognition and it does not cover layout - * analysis. - * See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */ - int8_t more_to_come; /// true if not last - volatile int8_t ocr_alive; /// ocr sets to 1, HP 0 - int8_t err_code; /// for errcode use - CANCEL_FUNC cancel; /// returns true to cancel - PROGRESS_FUNC progress_callback; /// called whenever progress increases - PROGRESS_FUNC2 progress_callback2;/// monitor-aware progress callback - void* cancel_this; /// this or other data for cancel - struct timeval end_time; /// Time to stop. Expected to be set only - /// by call to set_deadline_msecs(). - EANYCODE_CHAR text[1]; /// character data - - ETEXT_DESC() - : count(0), - progress(0), - more_to_come(0), - ocr_alive(0), - err_code(0), - cancel(nullptr), - progress_callback(nullptr), - progress_callback2(&default_progress_func), - cancel_this(nullptr) { - end_time.tv_sec = 0; - end_time.tv_usec = 0; - } - - // Sets the end time to be deadline_msecs milliseconds from now. - void set_deadline_msecs(int32_t deadline_msecs) { - gettimeofday(&end_time, nullptr); - int32_t deadline_secs = deadline_msecs / 1000; - end_time.tv_sec += deadline_secs; - end_time.tv_usec += (deadline_msecs - deadline_secs * 1000) * 1000; - if (end_time.tv_usec > 1000000) { - end_time.tv_usec -= 1000000; - ++end_time.tv_sec; - } - } - - // Returns false if we've not passed the end_time, or have not set a deadline. - bool deadline_exceeded() const { - if (end_time.tv_sec == 0 && end_time.tv_usec == 0) return false; - struct timeval now; - gettimeofday(&now, nullptr); - return (now.tv_sec > end_time.tv_sec || (now.tv_sec == end_time.tv_sec && - now.tv_usec > end_time.tv_usec)); - } - -private: - static bool default_progress_func(ETEXT_DESC* ths, int left, int right, int top, - int bottom) - { - if (ths->progress_callback) { - return (*(ths->progress_callback))(ths->progress, left, right, top, bottom); - } - return true; - } - -}; - -#endif // CCUTIL_OCRCLASS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/params.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/params.cpp deleted file mode 100644 index 99e559aa..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/params.cpp +++ /dev/null @@ -1,216 +0,0 @@ -/********************************************************************** - * File: params.cpp - * Description: Initialization and setting of Tesseract parameters. - * Author: Ray Smith - * Created: Fri Feb 22 16:22:34 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include -#include -#include - -#include "genericvector.h" -#include "tprintf.h" -#include "params.h" - -#define PLUS '+' //flag states -#define MINUS '-' -#define EQUAL '=' - -tesseract::ParamsVectors *GlobalParams() { - static tesseract::ParamsVectors global_params = tesseract::ParamsVectors(); - return &global_params; -} - -namespace tesseract { - -bool ParamUtils::ReadParamsFile(const char *file, - SetParamConstraint constraint, - ParamsVectors *member_params) { - int16_t nameoffset; // offset for real name - - if (*file == PLUS) { - nameoffset = 1; - } else if (*file == MINUS) { - nameoffset = 1; - } else { - nameoffset = 0; - } - - TFile fp; - if (!fp.Open(file + nameoffset, nullptr)) { - tprintf("read_params_file: Can't open %s\n", file + nameoffset); - return true; - } - return ReadParamsFromFp(constraint, &fp, member_params); -} - -bool ParamUtils::ReadParamsFromFp(SetParamConstraint constraint, TFile *fp, - ParamsVectors *member_params) { - char line[MAX_PATH]; // input line - bool anyerr = false; // true if any error - bool foundit; // found parameter - char *valptr; // value field - - while (fp->FGets(line, MAX_PATH) != nullptr) { - if (line[0] != '\r' && line[0] != '\n' && line[0] != '#') { - chomp_string(line); // remove newline - for (valptr = line; *valptr && *valptr != ' ' && *valptr != '\t'; - valptr++); - if (*valptr) { // found blank - *valptr = '\0'; // make name a string - do - valptr++; // find end of blanks - while (*valptr == ' ' || *valptr == '\t'); - } - foundit = SetParam(line, valptr, constraint, member_params); - - if (!foundit) { - anyerr = true; // had an error - tprintf("Warning: Parameter not found: %s\n", line); - } - } - } - return anyerr; -} - -bool ParamUtils::SetParam(const char *name, const char* value, - SetParamConstraint constraint, - ParamsVectors *member_params) { - // Look for the parameter among string parameters. - StringParam *sp = FindParam(name, GlobalParams()->string_params, - member_params->string_params); - if (sp != nullptr && sp->constraint_ok(constraint)) sp->set_value(value); - if (*value == '\0') return (sp != nullptr); - - // Look for the parameter among int parameters. - int intval; - IntParam *ip = FindParam(name, GlobalParams()->int_params, - member_params->int_params); - if (ip && ip->constraint_ok(constraint) && sscanf(value, "%d", &intval) == 1) - ip->set_value(intval); - - // Look for the parameter among bool parameters. - BoolParam *bp = FindParam(name, GlobalParams()->bool_params, - member_params->bool_params); - if (bp != nullptr && bp->constraint_ok(constraint)) { - if (*value == 'T' || *value == 't' || - *value == 'Y' || *value == 'y' || *value == '1') { - bp->set_value(true); - } else if (*value == 'F' || *value == 'f' || - *value == 'N' || *value == 'n' || *value == '0') { - bp->set_value(false); - } - } - - // Look for the parameter among double parameters. - double doubleval; - DoubleParam *dp = FindParam(name, GlobalParams()->double_params, - member_params->double_params); - if (dp != nullptr && dp->constraint_ok(constraint)) { -#ifdef EMBEDDED - doubleval = strtofloat(value); -#else - if (sscanf(value, "%lf", &doubleval) == 1) -#endif - dp->set_value(doubleval); - } - return (sp || ip || bp || dp); -} - -bool ParamUtils::GetParamAsString(const char *name, - const ParamsVectors* member_params, - STRING *value) { - // Look for the parameter among string parameters. - StringParam *sp = FindParam(name, GlobalParams()->string_params, - member_params->string_params); - if (sp) { - *value = sp->string(); - return true; - } - // Look for the parameter among int parameters. - IntParam *ip = FindParam(name, GlobalParams()->int_params, - member_params->int_params); - if (ip) { - char buf[128]; - snprintf(buf, sizeof(buf), "%d", int32_t(*ip)); - *value = buf; - return true; - } - // Look for the parameter among bool parameters. - BoolParam *bp = FindParam(name, GlobalParams()->bool_params, - member_params->bool_params); - if (bp != nullptr) { - *value = BOOL8(*bp) ? "1": "0"; - return true; - } - // Look for the parameter among double parameters. - DoubleParam *dp = FindParam(name, GlobalParams()->double_params, - member_params->double_params); - if (dp != nullptr) { - char buf[128]; - snprintf(buf, sizeof(buf), "%g", double(*dp)); - *value = buf; - return true; - } - return false; -} - -void ParamUtils::PrintParams(FILE *fp, const ParamsVectors *member_params) { - int v, i; - int num_iterations = (member_params == nullptr) ? 1 : 2; - for (v = 0; v < num_iterations; ++v) { - const ParamsVectors *vec = (v == 0) ? GlobalParams() : member_params; - for (i = 0; i < vec->int_params.size(); ++i) { - fprintf(fp, "%s\t%d\t%s\n", vec->int_params[i]->name_str(), - (int32_t)(*vec->int_params[i]), vec->int_params[i]->info_str()); - } - for (i = 0; i < vec->bool_params.size(); ++i) { - fprintf(fp, "%s\t%d\t%s\n", vec->bool_params[i]->name_str(), - (BOOL8)(*vec->bool_params[i]), vec->bool_params[i]->info_str()); - } - for (int i = 0; i < vec->string_params.size(); ++i) { - fprintf(fp, "%s\t%s\t%s\n", vec->string_params[i]->name_str(), - vec->string_params[i]->string(), vec->string_params[i]->info_str()); - } - for (int i = 0; i < vec->double_params.size(); ++i) { - fprintf(fp, "%s\t%g\t%s\n", vec->double_params[i]->name_str(), - (double)(*vec->double_params[i]), vec->double_params[i]->info_str()); - } - } -} - -// Resets all parameters back to default values; -void ParamUtils::ResetToDefaults(ParamsVectors* member_params) { - int v, i; - int num_iterations = (member_params == nullptr) ? 1 : 2; - for (v = 0; v < num_iterations; ++v) { - ParamsVectors *vec = (v == 0) ? GlobalParams() : member_params; - for (i = 0; i < vec->int_params.size(); ++i) { - vec->int_params[i]->ResetToDefault(); - } - for (i = 0; i < vec->bool_params.size(); ++i) { - vec->bool_params[i]->ResetToDefault(); - } - for (int i = 0; i < vec->string_params.size(); ++i) { - vec->string_params[i]->ResetToDefault(); - } - for (int i = 0; i < vec->double_params.size(); ++i) { - vec->double_params[i]->ResetToDefault(); - } - } -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/params.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/params.h deleted file mode 100644 index 742c8984..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/params.h +++ /dev/null @@ -1,312 +0,0 @@ -/********************************************************************** - * File: params.h - * Description: Class definitions of the *_VAR classes for tunable constants. - * Author: Ray Smith - * Created: Fri Feb 22 11:26:25 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef PARAMS_H -#define PARAMS_H - -#include - -#include "genericvector.h" -#include "strngs.h" - -namespace tesseract { - -class IntParam; -class BoolParam; -class StringParam; -class DoubleParam; - -// Enum for constraints on what kind of params should be set by SetParam(). -enum SetParamConstraint { - SET_PARAM_CONSTRAINT_NONE, - SET_PARAM_CONSTRAINT_DEBUG_ONLY, - SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY, - SET_PARAM_CONSTRAINT_NON_INIT_ONLY, -}; - -struct ParamsVectors { - GenericVector int_params; - GenericVector bool_params; - GenericVector string_params; - GenericVector double_params; -}; - -// Utility functions for working with Tesseract parameters. -class ParamUtils { - public: - // Reads a file of parameter definitions and set/modify the values therein. - // If the filename begins with a + or -, the BoolVariables will be - // ORed or ANDed with any current values. - // Blank lines and lines beginning # are ignored. - // Values may have any whitespace after the name and are the rest of line. - static bool ReadParamsFile( - const char *file, // filename to read - SetParamConstraint constraint, - ParamsVectors *member_params); - - // Read parameters from the given file pointer. - static bool ReadParamsFromFp(SetParamConstraint constraint, TFile *fp, - ParamsVectors *member_params); - - // Set a parameters to have the given value. - static bool SetParam(const char *name, const char* value, - SetParamConstraint constraint, - ParamsVectors *member_params); - - // Returns the pointer to the parameter with the given name (of the - // appropriate type) if it was found in the vector obtained from - // GlobalParams() or in the given member_params. - template - static T *FindParam(const char *name, - const GenericVector &global_vec, - const GenericVector &member_vec) { - int i; - for (i = 0; i < global_vec.size(); ++i) { - if (strcmp(global_vec[i]->name_str(), name) == 0) return global_vec[i]; - } - for (i = 0; i < member_vec.size(); ++i) { - if (strcmp(member_vec[i]->name_str(), name) == 0) return member_vec[i]; - } - return nullptr; - } - // Removes the given pointer to the param from the given vector. - template - static void RemoveParam(T *param_ptr, GenericVector *vec) { - for (int i = 0; i < vec->size(); ++i) { - if ((*vec)[i] == param_ptr) { - vec->remove(i); - return; - } - } - } - // Fetches the value of the named param as a STRING. Returns false if not - // found. - static bool GetParamAsString(const char *name, - const ParamsVectors* member_params, - STRING *value); - - // Print parameters to the given file. - static void PrintParams(FILE *fp, const ParamsVectors *member_params); - - // Resets all parameters back to default values; - static void ResetToDefaults(ParamsVectors* member_params); -}; - -// Definition of various parameter types. -class Param { - public: - ~Param() = default; - - const char *name_str() const { return name_; } - const char *info_str() const { return info_; } - bool is_init() const { return init_; } - bool is_debug() const { return debug_; } - bool constraint_ok(SetParamConstraint constraint) const { - return (constraint == SET_PARAM_CONSTRAINT_NONE || - (constraint == SET_PARAM_CONSTRAINT_DEBUG_ONLY && - this->is_debug()) || - (constraint == SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY && - !this->is_debug()) || - (constraint == SET_PARAM_CONSTRAINT_NON_INIT_ONLY && - !this->is_init())); - } - - protected: - Param(const char *name, const char *comment, bool init) : - name_(name), info_(comment), init_(init) { - debug_ = (strstr(name, "debug") != nullptr) || (strstr(name, "display")); - } - - const char *name_; // name of this parameter - const char *info_; // for menus - bool init_; // needs to be set before init - bool debug_; -}; - -class IntParam : public Param { - public: - IntParam(int32_t value, const char *name, const char *comment, bool init, - ParamsVectors *vec) : Param(name, comment, init) { - value_ = value; - default_ = value; - params_vec_ = &(vec->int_params); - vec->int_params.push_back(this); - } - ~IntParam() { ParamUtils::RemoveParam(this, params_vec_); } - operator int32_t() const { return value_; } - void operator=(int32_t value) { value_ = value; } - void set_value(int32_t value) { value_ = value; } - void ResetToDefault() { - value_ = default_; - } - - private: - int32_t value_; - int32_t default_; - // Pointer to the vector that contains this param (not owned by this class). - GenericVector *params_vec_; -}; - -class BoolParam : public Param { - public: - BoolParam(bool value, const char *name, const char *comment, bool init, - ParamsVectors *vec) : Param(name, comment, init) { - value_ = value; - default_ = value; - params_vec_ = &(vec->bool_params); - vec->bool_params.push_back(this); - } - ~BoolParam() { ParamUtils::RemoveParam(this, params_vec_); } - operator BOOL8() const { return value_; } - void operator=(BOOL8 value) { value_ = value; } - void set_value(BOOL8 value) { value_ = value; } - void ResetToDefault() { - value_ = default_; - } - - private: - BOOL8 value_; - BOOL8 default_; - // Pointer to the vector that contains this param (not owned by this class). - GenericVector *params_vec_; -}; - -class StringParam : public Param { - public: - StringParam(const char *value, const char *name, - const char *comment, bool init, - ParamsVectors *vec) : Param(name, comment, init) { - value_ = value; - default_ = value; - params_vec_ = &(vec->string_params); - vec->string_params.push_back(this); - } - ~StringParam() { ParamUtils::RemoveParam(this, params_vec_); } - operator STRING &() { return value_; } - const char *string() const { return value_.string(); } - const char *c_str() const { return value_.string(); } - bool empty() { return value_.length() <= 0; } - bool operator==(const STRING& other) { return value_ == other; } - void operator=(const STRING& value) { value_ = value; } - void set_value(const STRING& value) { value_ = value; } - void ResetToDefault() { - value_ = default_; - } - - private: - STRING value_; - STRING default_; - // Pointer to the vector that contains this param (not owned by this class). - GenericVector *params_vec_; -}; - -class DoubleParam : public Param { - public: - DoubleParam(double value, const char *name, const char *comment, - bool init, ParamsVectors *vec) : Param(name, comment, init) { - value_ = value; - default_ = value; - params_vec_ = &(vec->double_params); - vec->double_params.push_back(this); - } - ~DoubleParam() { ParamUtils::RemoveParam(this, params_vec_); } - operator double() const { return value_; } - void operator=(double value) { value_ = value; } - void set_value(double value) { value_ = value; } - void ResetToDefault() { - value_ = default_; - } - - private: - double value_; - double default_; - // Pointer to the vector that contains this param (not owned by this class). - GenericVector *params_vec_; -}; - -} // namespace tesseract - -// Global parameter lists. -// -// To avoid the problem of undetermined order of static initialization -// global_params are accessed through the GlobalParams function that -// initializes the static pointer to global_params only on the first time -// GlobalParams() is called. -// -// TODO(daria): remove GlobalParams() when all global Tesseract -// parameters are converted to members. -tesseract::ParamsVectors *GlobalParams(); - -/************************************************************************* - * Note on defining parameters. - * - * The values of the parameters defined with *_INIT_* macros are guaranteed - * to be loaded from config files before Tesseract initialization is done - * (there is no such guarantee for parameters defined with the other macros). - *************************************************************************/ - -#define INT_VAR_H(name,val,comment)\ - tesseract::IntParam name - -#define BOOL_VAR_H(name,val,comment)\ - tesseract::BoolParam name - -#define STRING_VAR_H(name,val,comment)\ - tesseract::StringParam name - -#define double_VAR_H(name,val,comment)\ - tesseract::DoubleParam name - -#define INT_VAR(name,val,comment)\ - tesseract::IntParam name(val,#name,comment,false,GlobalParams()) - -#define BOOL_VAR(name,val,comment)\ - tesseract::BoolParam name(val,#name,comment,false,GlobalParams()) - -#define STRING_VAR(name,val,comment)\ - tesseract::StringParam name(val,#name,comment,false,GlobalParams()) - -#define double_VAR(name,val,comment)\ - tesseract::DoubleParam name(val,#name,comment,false,GlobalParams()) - -#define INT_MEMBER(name, val, comment, vec)\ - name(val, #name, comment, false, vec) - -#define BOOL_MEMBER(name, val, comment, vec)\ - name(val, #name, comment, false, vec) - -#define STRING_MEMBER(name, val, comment, vec)\ - name(val, #name, comment, false, vec) - -#define double_MEMBER(name, val, comment, vec)\ - name(val, #name, comment, false, vec) - -#define INT_INIT_MEMBER(name, val, comment, vec)\ - name(val, #name, comment, true, vec) - -#define BOOL_INIT_MEMBER(name, val, comment, vec)\ - name(val, #name, comment, true, vec) - -#define STRING_INIT_MEMBER(name, val, comment, vec)\ - name(val, #name, comment, true, vec) - -#define double_INIT_MEMBER(name, val, comment, vec)\ - name(val, #name, comment, true, vec) - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/platform.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/platform.h deleted file mode 100644 index 09ac39bb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/platform.h +++ /dev/null @@ -1,79 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: platform.h -// Description: Place holder -// Author: -// Created: -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_PLATFORM_H_ -#define TESSERACT_CCUTIL_PLATFORM_H_ - -#define DLLSYM -#ifdef _WIN32 -#ifndef NOMINMAX -#define NOMINMAX -#endif /* NOMINMAX */ -#ifndef WIN32_LEAN_AND_MEAN -#define WIN32_LEAN_AND_MEAN -#endif -#ifdef __GNUC__ -#define ultoa _ultoa -#endif /* __GNUC__ */ -#define SIGNED -#else -#ifdef __cplusplus -#include -#else /* C compiler*/ -#include -#endif /* __cplusplus */ -#ifndef PATH_MAX -#define MAX_PATH 4096 -#else -#define MAX_PATH PATH_MAX -#endif -#define SIGNED signed -#endif - -#if defined(_WIN32) || defined(__CYGWIN__) -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif -#endif - -#if defined(_WIN32) || defined(__CYGWIN__) - #if defined(TESS_EXPORTS) - #define TESS_API __declspec(dllexport) - #elif defined(TESS_IMPORTS) - #define TESS_API __declspec(dllimport) - #else - #define TESS_API - #endif - #define TESS_LOCAL -#else - #if __GNUC__ >= 4 - #if defined(TESS_EXPORTS) || defined(TESS_IMPORTS) - #define TESS_API __attribute__ ((visibility ("default"))) - #define TESS_LOCAL __attribute__ ((visibility ("hidden"))) - #else - #define TESS_API - #define TESS_LOCAL - #endif - #else - #define TESS_API - #define TESS_LOCAL - #endif -#endif - -#endif // TESSERACT_CCUTIL_PLATFORM_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/qrsequence.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/qrsequence.h deleted file mode 100644 index 991004a0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/qrsequence.h +++ /dev/null @@ -1,80 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: qrsequence.h -// Description: Quasi-random sequence generator class. -// Author: Ranjith Unnikrishnan -// Created: Wed May 20 2009 -// -// Class to generate a (deterministic) quasi-random Van der Corput sequence that -// covers the interval [0,N) without repetition. -// -// The sequence is generated by reversing the base-2 representation of the -// sequence of natural numbers {0, 1,... M-1}, where M is 2^{num_bits_} and -// num_bits is the minimum number of bits required to represent N. If a reversed -// numbers is >= N it is rejected and the next natural number is considered -// until a valid output number is found. -// -// (C) Copyright 2009, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not -// use this file except in compliance with the License. You may obtain a copy -// of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -// by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -// OF ANY KIND, either express or implied. See the License for the specific -// language governing permissions and limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_QRSEQUENCE_H_ -#define TESSERACT_CCUTIL_QRSEQUENCE_H_ - -#include - -class QRSequenceGenerator { - public: - // Object is initialized with the size of the output range. - explicit QRSequenceGenerator(int N) : N_(N), next_num_(0) { - num_bits_ = static_cast(ceil(log(static_cast(N)) / log(2.0))); - } - - // Main worker method that retrieves the next number in the sequence. - // Returns kInvalidVal if called more than N times after object initialization - int GetVal() { - const int kInvalidVal = -1; - const int kMaxNaturalNumberValue = 1 << num_bits_; - if (next_num_ >= kMaxNaturalNumberValue) - return kInvalidVal; - int n = next_num_; - - while (next_num_ < kMaxNaturalNumberValue) { - n = GetBinaryReversedInteger(next_num_++); - if (n < N_) break; - } - return (next_num_ > kMaxNaturalNumberValue) ? kInvalidVal : n; - } - - protected: - // Outputs the integer formed by reversing the bits of the input integer. Only - // the lowest num_bits_ bits of the input integer are reversed. - int GetBinaryReversedInteger(int in_val) const { - int bit_pos = num_bits_; - int out_val = 0; - while(bit_pos--) { - // Set the value of the last bit. - out_val |= (in_val & 0x1); - if (bit_pos > 0) { - // Left-shift output value to prepare for storing the next bit. - out_val <<= 1; - } - // Right-shift input value to prepare for retrieving the next bit. - in_val >>= 1; - } - return out_val; - } - int N_; - // Next number to be considered for reversal and output. - int next_num_; - // number of bits required to represent the numbers of the sequence - int num_bits_; -}; - -#endif // TESSERACT_CCUTIL_QRSEQUENCE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/scanutils.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/scanutils.cpp deleted file mode 100644 index c978616e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/scanutils.cpp +++ /dev/null @@ -1,552 +0,0 @@ -// Copyright 2006 Google Inc. -// All Rights Reserved. -// Author: renn -// -// The fscanf, vfscanf and creat functions are implemented so that their -// functionality is mostly like their stdio counterparts. However, currently -// these functions do not use any buffering, making them rather slow. -// File streams are thus processed one character at a time. -// Although the implementations of the scanf functions do lack a few minor -// features, they should be sufficient for their use in tesseract. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "scanutils.h" -#include "tprintf.h" - -// workaround for "'off_t' was not declared in this scope" with -std=c++11 -// OSX has off_t defined, but HAVE_OFF_T is false... -#if !defined(HAVE_OFF_T) && !__APPLE__ -typedef long off_t; -#endif // off_t - -enum Flags { - FL_SPLAT = 0x01, // Drop the value, do not assign - FL_INV = 0x02, // Character-set with inverse - FL_WIDTH = 0x04, // Field width specified - FL_MINUS = 0x08, // Negative number -}; - -enum Ranks { - RANK_CHAR = -2, - RANK_SHORT = -1, - RANK_INT = 0, - RANK_LONG = 1, - RANK_LONGLONG = 2, - RANK_PTR = std::numeric_limits::max() // Special value used for pointers -}; - -const enum Ranks kMinRank = RANK_CHAR; -const enum Ranks kMaxRank = RANK_LONGLONG; - -const enum Ranks kIntMaxRank = RANK_LONGLONG; -const enum Ranks kSizeTRank = RANK_LONG; -const enum Ranks kPtrDiffRank = RANK_LONG; - -enum Bail { - BAIL_NONE = 0, // No error condition - BAIL_EOF, // Hit EOF - BAIL_ERR // Conversion mismatch -}; - -// Helper functions ------------------------------------------------------------ -inline size_t LongBit() { - return CHAR_BIT * sizeof(long); -} - -static inline int -SkipSpace(FILE *s) { - int p; - while (isascii(p = fgetc(s)) && isspace(p)); - ungetc(p, s); // Make sure next char is available for reading - return p; -} - -static inline void -SetBit(unsigned long *bitmap, unsigned int bit) { - bitmap[bit/LongBit()] |= 1UL << (bit%LongBit()); -} - -static inline int -TestBit(unsigned long *bitmap, unsigned int bit) { - return static_cast(bitmap[bit/LongBit()] >> (bit%LongBit())) & 1; -} - -static inline int DigitValue(int ch, int base) { - if (ch >= '0' && ch <= '9') { - if (base >= 10 || ch <= '7') - return ch-'0'; - } else if (ch >= 'A' && ch <= 'Z' && base == 16) { - return ch-'A'+10; - } else if (ch >= 'a' && ch <= 'z' && base == 16) { - return ch-'a'+10; - } - return -1; -} - -// IO (re-)implementations ----------------------------------------------------- -static uintmax_t streamtoumax(FILE* s, int base) { - int minus = 0; - uintmax_t v = 0; - int d, c = 0; - - for (c = fgetc(s); isascii(c) && isspace(c); c = fgetc(s)); - - // Single optional + or - - if (c == '-' || c == '+') { - minus = (c == '-'); - c = fgetc(s); - } - - // Assign correct base - if (base == 0) { - if (c == '0') { - c = fgetc(s); - if (c == 'x' || c == 'X') { - base = 16; - c = fgetc(s); - } else { - base = 8; - } - } - } else if (base == 16) { - if (c == '0') { - c = fgetc(s); - if (c == 'x' || c == 'X') c = fgetc(s); - } - } - - // Actual number parsing - for (; (c != EOF) && (d = DigitValue(c, base)) >= 0; c = fgetc(s)) - v = v*base + d; - - ungetc(c, s); - return minus ? 0 : v; -} - -static double streamtofloat(FILE* s) { - int minus = 0; - int v = 0; - int d, c = 0; - int k = 1; - int w = 0; - - for (c = fgetc(s); isascii(c) && isspace(c); c = fgetc(s)); - - // Single optional + or - - if (c == '-' || c == '+') { - minus = (c == '-'); - c = fgetc(s); - } - - // Actual number parsing - for (; c != EOF && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) - v = v*10 + d; - if (c == '.') { - for (c = fgetc(s); c != EOF && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) { - w = w*10 + d; - k *= 10; - } - } - double f = static_cast(v) - + static_cast(w) / static_cast(k); - if (c == 'e' || c == 'E') { - c = fgetc(s); - int expsign = 1; - if (c == '-' || c == '+') { - expsign = (c == '-') ? -1 : 1; - c = fgetc(s); - } - int exponent = 0; - for (; (c != EOF) && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) { - exponent = exponent * 10 + d; - } - exponent *= expsign; - f *= pow(10.0, static_cast(exponent)); - } - ungetc(c, s); - - return minus ? -f : f; -} - -static int tvfscanf(FILE* stream, const char *format, va_list ap); - -int tfscanf(FILE* stream, const char *format, ...) { - va_list ap; - int rv; - - va_start(ap, format); - rv = tvfscanf(stream, format, ap); - va_end(ap); - - return rv; -} - -#ifdef EMBEDDED - -int fscanf(FILE* stream, const char *format, ...) { - va_list ap; - int rv; - - va_start(ap, format); - rv = tvfscanf(stream, format, ap); - va_end(ap); - - return rv; -} - -int vfscanf(FILE* stream, const char *format, ...) { - va_list ap; - int rv; - - va_start(ap, format); - rv = tvfscanf(stream, format, ap); - va_end(ap); - - return rv; -} -#endif - -static int tvfscanf(FILE* stream, const char *format, va_list ap) { - const char *p = format; - char ch; - int q = 0; - uintmax_t val = 0; - int rank = RANK_INT; // Default rank - unsigned int width = UINT_MAX; - int base; - int flags = 0; - enum { - ST_NORMAL, // Ground state - ST_FLAGS, // Special flags - ST_WIDTH, // Field width - ST_MODIFIERS, // Length or conversion modifiers - ST_MATCH_INIT, // Initial state of %[ sequence - ST_MATCH, // Main state of %[ sequence - ST_MATCH_RANGE, // After - in a %[ sequence - } state = ST_NORMAL; - char *sarg = nullptr; // %s %c or %[ string argument - enum Bail bail = BAIL_NONE; - int converted = 0; // Successful conversions - unsigned long matchmap[((1 << CHAR_BIT)+(CHAR_BIT * sizeof(long) - 1)) / - (CHAR_BIT * sizeof(long))]; - int matchinv = 0; // Is match map inverted? - unsigned char range_start = 0; - off_t start_off = ftell(stream); - - // Skip leading spaces - SkipSpace(stream); - - while ((ch = *p++) && !bail) { - switch (state) { - case ST_NORMAL: - if (ch == '%') { - state = ST_FLAGS; - flags = 0; rank = RANK_INT; width = UINT_MAX; - } else if (isascii(ch) && isspace(ch)) { - SkipSpace(stream); - } else { - if (fgetc(stream) != ch) - bail = BAIL_ERR; // Match failure - } - break; - - case ST_FLAGS: - if (ch == '*') { - flags |= FL_SPLAT; - } else if ('0' <= ch && ch <= '9') { - width = (ch-'0'); - state = ST_WIDTH; - flags |= FL_WIDTH; - } else { - state = ST_MODIFIERS; - p--; // Process this character again - } - break; - - case ST_WIDTH: - if (ch >= '0' && ch <= '9') { - width = width*10+(ch-'0'); - } else { - state = ST_MODIFIERS; - p--; // Process this character again - } - break; - - case ST_MODIFIERS: - switch (ch) { - // Length modifiers - nonterminal sequences - case 'h': - rank--; // Shorter rank - break; - case 'l': - rank++; // Longer rank - break; - case 'j': - rank = kIntMaxRank; - break; - case 'z': - rank = kSizeTRank; - break; - case 't': - rank = kPtrDiffRank; - break; - case 'L': - case 'q': - rank = RANK_LONGLONG; // long double/long long - break; - - default: - // Output modifiers - terminal sequences - state = ST_NORMAL; // Next state will be normal - if (rank < kMinRank) // Canonicalize rank - rank = kMinRank; - else if (rank > kMaxRank) - rank = kMaxRank; - - switch (ch) { - case 'P': // Upper case pointer - case 'p': // Pointer - rank = RANK_PTR; - base = 0; - goto scan_int; - - case 'i': // Base-independent integer - base = 0; - goto scan_int; - - case 'd': // Decimal integer - base = 10; - goto scan_int; - - case 'o': // Octal integer - base = 8; - goto scan_int; - - case 'u': // Unsigned decimal integer - base = 10; - goto scan_int; - - case 'x': // Hexadecimal integer - case 'X': - base = 16; - goto scan_int; - - case 'n': // Number of characters consumed - val = ftell(stream) - start_off; - goto set_integer; - - scan_int: - q = SkipSpace(stream); - if (q <= 0) { - bail = BAIL_EOF; - break; - } - val = streamtoumax(stream, base); - // fall through - - set_integer: - if (!(flags & FL_SPLAT)) { - converted++; - switch(rank) { - case RANK_CHAR: - *va_arg(ap, unsigned char *) - = static_cast(val); - break; - case RANK_SHORT: - *va_arg(ap, unsigned short *) - = static_cast(val); - break; - case RANK_INT: - *va_arg(ap, unsigned int *) - = static_cast(val); - break; - case RANK_LONG: - *va_arg(ap, unsigned long *) - = static_cast(val); - break; - case RANK_LONGLONG: - *va_arg(ap, unsigned long long *) - = static_cast(val); - break; - case RANK_PTR: - *va_arg(ap, void **) - = reinterpret_cast(static_cast(val)); - break; - } - } - break; - - case 'f': // Preliminary float value parsing - case 'g': - case 'G': - case 'e': - case 'E': - q = SkipSpace(stream); - if (q <= 0) { - bail = BAIL_EOF; - break; - } - - { - double fval = streamtofloat(stream); - if (!(flags & FL_SPLAT)) { - if (rank == RANK_INT) - *va_arg(ap, float *) = static_cast(fval); - else if (rank == RANK_LONG) - *va_arg(ap, double *) = static_cast(fval); - converted++; - } - } - break; - - case 'c': // Character - width = (flags & FL_WIDTH) ? width : 1; // Default width == 1 - sarg = va_arg(ap, char *); - while (width--) { - if ((q = fgetc(stream)) <= 0) { - bail = BAIL_EOF; - break; - } - if (!(flags & FL_SPLAT)) { - *sarg++ = q; - converted++; - } - } - break; - - case 's': // String - { - if (!(flags & FL_SPLAT)) { - sarg = va_arg(ap, char *); - } - unsigned length = 0; - while (width--) { - q = fgetc(stream); - if ((isascii(q) && isspace(q)) || (q <= 0)) { - ungetc(q, stream); - break; - } - if (!(flags & FL_SPLAT)) { - sarg[length] = q; - } - length++; - } - if (length == 0) { - bail = BAIL_EOF; - } else if (!(flags & FL_SPLAT)) { - sarg[length] = '\0'; // Terminate output - converted++; - } - } - break; - - case '[': // Character range - sarg = va_arg(ap, char *); - state = ST_MATCH_INIT; - matchinv = 0; - memset(matchmap, 0, sizeof matchmap); - break; - - case '%': // %% sequence - if (fgetc(stream) != '%') - bail = BAIL_ERR; - break; - - default: // Anything else - bail = BAIL_ERR; // Unknown sequence - break; - } - } - break; - - case ST_MATCH_INIT: // Initial state for %[ match - if (ch == '^' && !(flags & FL_INV)) { - matchinv = 1; - } else { - SetBit(matchmap, static_cast(ch)); - state = ST_MATCH; - } - break; - - case ST_MATCH: // Main state for %[ match - if (ch == ']') { - goto match_run; - } else if (ch == '-') { - range_start = static_cast(ch); - state = ST_MATCH_RANGE; - } else { - SetBit(matchmap, static_cast(ch)); - } - break; - - case ST_MATCH_RANGE: // %[ match after - - if (ch == ']') { - SetBit(matchmap, static_cast('-')); - goto match_run; - } else { - int i; - for (i = range_start ; i < (static_cast(ch)) ; i++) - SetBit(matchmap, i); - state = ST_MATCH; - } - break; - - match_run: // Match expression finished - char* oarg = sarg; - while (width) { - q = fgetc(stream); - unsigned char qc = static_cast(q); - if (q <= 0 || !(TestBit(matchmap, qc)^matchinv)) { - ungetc(q, stream); - break; - } - if (!(flags & FL_SPLAT)) *sarg = q; - sarg++; - } - if (oarg == sarg) { - bail = (q <= 0) ? BAIL_EOF : BAIL_ERR; - } else if (!(flags & FL_SPLAT)) { - *sarg = '\0'; - converted++; - } - break; - } - } - - if (bail == BAIL_EOF && !converted) - converted = -1; // Return EOF (-1) - - return converted; -} - -#ifdef EMBEDDED -int creat(const char *pathname, mode_t mode) { - return open(pathname, O_CREAT | O_TRUNC | O_WRONLY, mode); -} - -#endif // EMBEDDED diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/scanutils.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/scanutils.h deleted file mode 100644 index b2f15580..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/scanutils.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2006 Google Inc. -// All Rights Reserved. -// Author: renn -// -// Contains file io functions (mainly for file parsing), that might not be -// available, on embedded devices, or that have an incomplete implementation -// there. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TESSERACT_CCUTIL_SCANUTILS_H_ -#define TESSERACT_CCUTIL_SCANUTILS_H_ - -#include -#include -#include -#include - -/** - * fscanf variant to ensure correct reading regardless of locale. - * - * tfscanf parse a file stream according to the given format. See the fscanf - * manpage for more information, as this function attempts to mimic its - * behavior. - * - * @note Note that scientific floating-point notation is not supported. - * - */ -int tfscanf(FILE* stream, const char *format, ...); - -#ifdef EMBEDDED - -// Attempts to parse the given file stream s as an integer of the base -// 'base'. Returns the first successfully parsed integer as a uintmax_t, or -// 0, if none was found. -uintmax_t streamtoumax(FILE* s, int base); - -// Parse a file stream according to the given format. See the fscanf manpage -// for more information, as this function attempts to mimic its behavior. -// Note that scientific floating-point notation is not supported. -int fscanf(FILE* stream, const char *format, ...); - -// Parse a file stream according to the given format. See the fscanf manpage -// for more information, as this function attempts to mimic its behavior. -// Note that scientific floating-point notation is not supported. -int vfscanf(FILE* stream, const char *format, va_list ap); - -// Create a file at the specified path. See the creat manpage for more -// information, as this function attempts to mimic its behavior. -int creat(const char *pathname, mode_t mode); - -// Convert the specified C-String to a float. Returns the first parsed float, -// or 0.0 if no floating point value could be found. Note that scientific -// floating-point notation is not supported. -double strtofloat(const char* s); - -#endif // EMBEDDED - -#endif // TESSERACT_CCUTIL_SCANUTILS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/serialis.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/serialis.cpp deleted file mode 100644 index 7def011f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/serialis.cpp +++ /dev/null @@ -1,332 +0,0 @@ -/********************************************************************** - * File: serialis.cpp (Formerly serialmac.h) - * Description: Inline routines and macros for serialisation functions - * Author: Phil Cheatle - * Created: Tue Oct 08 08:33:12 BST 1991 - * - * (C) Copyright 1990, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "serialis.h" -#include -#include "errcode.h" -#include "genericvector.h" - -namespace tesseract { - -bool DeSerialize(FILE* fp, char* data, size_t n) { - return fread(data, sizeof(*data), n, fp) == n; -} - -bool DeSerialize(FILE* fp, float* data, size_t n) { - return fread(data, sizeof(*data), n, fp) == n; -} - -bool DeSerialize(FILE* fp, int8_t* data, size_t n) { - return fread(data, sizeof(*data), n, fp) == n; -} - -bool DeSerialize(FILE* fp, int16_t* data, size_t n) { - return fread(data, sizeof(*data), n, fp) == n; -} - -bool DeSerialize(FILE* fp, int32_t* data, size_t n) { - return fread(data, sizeof(*data), n, fp) == n; -} - -bool DeSerialize(FILE* fp, uint8_t* data, size_t n) { - return fread(data, sizeof(*data), n, fp) == n; -} - -bool DeSerialize(FILE* fp, uint16_t* data, size_t n) { - return fread(data, sizeof(*data), n, fp) == n; -} - -bool DeSerialize(FILE* fp, uint32_t* data, size_t n) { - return fread(data, sizeof(*data), n, fp) == n; -} - -bool Serialize(FILE* fp, const char* data, size_t n) { - return fwrite(data, sizeof(*data), n, fp) == n; -} - -bool Serialize(FILE* fp, const float* data, size_t n) { - return fwrite(data, sizeof(*data), n, fp) == n; -} - -bool Serialize(FILE* fp, const int8_t* data, size_t n) { - return fwrite(data, sizeof(*data), n, fp) == n; -} - -bool Serialize(FILE* fp, const int16_t* data, size_t n) { - return fwrite(data, sizeof(*data), n, fp) == n; -} - -bool Serialize(FILE* fp, const int32_t* data, size_t n) { - return fwrite(data, sizeof(*data), n, fp) == n; -} - -bool Serialize(FILE* fp, const uint8_t* data, size_t n) { - return fwrite(data, sizeof(*data), n, fp) == n; -} - -bool Serialize(FILE* fp, const uint16_t* data, size_t n) { - return fwrite(data, sizeof(*data), n, fp) == n; -} - -bool Serialize(FILE* fp, const uint32_t* data, size_t n) { - return fwrite(data, sizeof(*data), n, fp) == n; -} - -TFile::TFile() - : offset_(0), - data_(nullptr), - data_is_owned_(false), - is_writing_(false), - swap_(false) {} - -TFile::~TFile() { - if (data_is_owned_) - delete data_; -} - -bool TFile::DeSerialize(char* buffer, size_t count) { - return FRead(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::DeSerialize(double* buffer, size_t count) { - return FReadEndian(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::DeSerialize(float* buffer, size_t count) { - return FReadEndian(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::DeSerialize(int8_t* buffer, size_t count) { - return FRead(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::DeSerialize(int16_t* buffer, size_t count) { - return FReadEndian(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::DeSerialize(int32_t* buffer, size_t count) { - return FReadEndian(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::DeSerialize(int64_t* buffer, size_t count) { - return FReadEndian(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::DeSerialize(uint8_t* buffer, size_t count) { - return FRead(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::DeSerialize(uint16_t* buffer, size_t count) { - return FReadEndian(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::DeSerialize(uint32_t* buffer, size_t count) { - return FReadEndian(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::DeSerialize(uint64_t* buffer, size_t count) { - return FReadEndian(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::Serialize(const char* buffer, size_t count) { - return FWrite(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::Serialize(const double* buffer, size_t count) { - return FWrite(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::Serialize(const float* buffer, size_t count) { - return FWrite(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::Serialize(const int8_t* buffer, size_t count) { - return FWrite(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::Serialize(const int16_t* buffer, size_t count) { - return FWrite(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::Serialize(const int32_t* buffer, size_t count) { - return FWrite(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::Serialize(const int64_t* buffer, size_t count) { - return FWrite(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::Serialize(const uint8_t* buffer, size_t count) { - return FWrite(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::Serialize(const uint16_t* buffer, size_t count) { - return FWrite(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::Serialize(const uint32_t* buffer, size_t count) { - return FWrite(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::Serialize(const uint64_t* buffer, size_t count) { - return FWrite(buffer, sizeof(*buffer), count) == count; -} - -bool TFile::Skip(size_t count) { - offset_ += count; - return true; -} - -bool TFile::Open(const STRING& filename, FileReader reader) { - if (!data_is_owned_) { - data_ = new GenericVector; - data_is_owned_ = true; - } - offset_ = 0; - is_writing_ = false; - swap_ = false; - if (reader == nullptr) - return LoadDataFromFile(filename, data_); - else - return (*reader)(filename, data_); -} - -bool TFile::Open(const char* data, int size) { - offset_ = 0; - if (!data_is_owned_) { - data_ = new GenericVector; - data_is_owned_ = true; - } - is_writing_ = false; - swap_ = false; - data_->resize_no_init(size); - memcpy(&(*data_)[0], data, size); - return true; -} - -bool TFile::Open(FILE* fp, int64_t end_offset) { - offset_ = 0; - long current_pos = ftell(fp); - if (current_pos < 0) { - // ftell failed. - return false; - } - if (end_offset < 0) { - if (fseek(fp, 0, SEEK_END)) - return false; - end_offset = ftell(fp); - if (fseek(fp, current_pos, SEEK_SET)) - return false; - } - int size = end_offset - current_pos; - is_writing_ = false; - swap_ = false; - if (!data_is_owned_) { - data_ = new GenericVector; - data_is_owned_ = true; - } - data_->resize_no_init(size); - return static_cast(fread(&(*data_)[0], 1, size, fp)) == size; -} - -char* TFile::FGets(char* buffer, int buffer_size) { - ASSERT_HOST(!is_writing_); - int size = 0; - while (size + 1 < buffer_size && offset_ < data_->size()) { - buffer[size++] = (*data_)[offset_++]; - if ((*data_)[offset_ - 1] == '\n') break; - } - if (size < buffer_size) buffer[size] = '\0'; - return size > 0 ? buffer : nullptr; -} - -int TFile::FReadEndian(void* buffer, size_t size, int count) { - int num_read = FRead(buffer, size, count); - if (swap_) { - char* char_buffer = static_cast(buffer); - for (int i = 0; i < num_read; ++i, char_buffer += size) { - ReverseN(char_buffer, size); - } - } - return num_read; -} - -int TFile::FRead(void* buffer, size_t size, int count) { - ASSERT_HOST(!is_writing_); - ASSERT_HOST(size > 0); - ASSERT_HOST(count >= 0); - size_t required_size; - if (SIZE_MAX / size <= count) { - // Avoid integer overflow. - required_size = data_->size() - offset_; - } else { - required_size = size * count; - if (data_->size() - offset_ < required_size) { - required_size = data_->size() - offset_; - } - } - if (required_size > 0 && buffer != nullptr) - memcpy(buffer, &(*data_)[offset_], required_size); - offset_ += required_size; - return required_size / size; -} - -void TFile::Rewind() { - ASSERT_HOST(!is_writing_); - offset_ = 0; -} - -void TFile::OpenWrite(GenericVector* data) { - offset_ = 0; - if (data != nullptr) { - if (data_is_owned_) delete data_; - data_ = data; - data_is_owned_ = false; - } else if (!data_is_owned_) { - data_ = new GenericVector; - data_is_owned_ = true; - } - is_writing_ = true; - swap_ = false; - data_->truncate(0); -} - -bool TFile::CloseWrite(const STRING& filename, FileWriter writer) { - ASSERT_HOST(is_writing_); - if (writer == nullptr) - return SaveDataToFile(*data_, filename); - else - return (*writer)(*data_, filename); -} - -int TFile::FWrite(const void* buffer, size_t size, int count) { - ASSERT_HOST(is_writing_); - ASSERT_HOST(size > 0); - ASSERT_HOST(count >= 0); - ASSERT_HOST(SIZE_MAX / size > count); - size_t total = size * count; - const char* buf = static_cast(buffer); - // This isn't very efficient, but memory is so fast compared to disk - // that it is relatively unimportant, and very simple. - for (size_t i = 0; i < total; ++i) - data_->push_back(buf[i]); - return count; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/serialis.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/serialis.h deleted file mode 100644 index 9f7f6a6a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/serialis.h +++ /dev/null @@ -1,162 +0,0 @@ -/********************************************************************** - * File: serialis.h (Formerly serialmac.h) - * Description: Inline routines and macros for serialisation functions - * Author: Phil Cheatle - * Created: Tue Oct 08 08:33:12 BST 1991 - * - * (C) Copyright 1990, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef SERIALIS_H -#define SERIALIS_H - -#include -#include -#include -#include "host.h" - -template class GenericVector; -class STRING; - -/*********************************************************************** - QUOTE_IT MACRO DEFINITION - =========================== -Replace with "". may be an arbitrary number of tokens -***********************************************************************/ - -#define QUOTE_IT(parm) #parm - -namespace tesseract { - -// Return number of elements of an array. -template -constexpr size_t countof(T const (&)[N]) noexcept { - return N; -} - -// Function to read a GenericVector from a whole file. -// Returns false on failure. -typedef bool (*FileReader)(const STRING& filename, GenericVector* data); -// Function to write a GenericVector to a whole file. -// Returns false on failure. -typedef bool (*FileWriter)(const GenericVector& data, - const STRING& filename); - -// Deserialize data from file. -bool DeSerialize(FILE* fp, char* data, size_t n = 1); -bool DeSerialize(FILE* fp, float* data, size_t n = 1); -bool DeSerialize(FILE* fp, int8_t* data, size_t n = 1); -bool DeSerialize(FILE* fp, int16_t* data, size_t n = 1); -bool DeSerialize(FILE* fp, int32_t* data, size_t n = 1); -bool DeSerialize(FILE* fp, uint8_t* data, size_t n = 1); -bool DeSerialize(FILE* fp, uint16_t* data, size_t n = 1); -bool DeSerialize(FILE* fp, uint32_t* data, size_t n = 1); - -// Serialize data to file. -bool Serialize(FILE* fp, const char* data, size_t n = 1); -bool Serialize(FILE* fp, const float* data, size_t n = 1); -bool Serialize(FILE* fp, const int8_t* data, size_t n = 1); -bool Serialize(FILE* fp, const int16_t* data, size_t n = 1); -bool Serialize(FILE* fp, const int32_t* data, size_t n = 1); -bool Serialize(FILE* fp, const uint8_t* data, size_t n = 1); -bool Serialize(FILE* fp, const uint16_t* data, size_t n = 1); -bool Serialize(FILE* fp, const uint32_t* data, size_t n = 1); - -// Simple file class. -// Allows for portable file input from memory and from foreign file systems. -class TFile { - public: - TFile(); - ~TFile(); - - // All the Open methods load the whole file into memory for reading. - // Opens a file with a supplied reader, or nullptr to use the default. - // Note that mixed read/write is not supported. - bool Open(const STRING& filename, FileReader reader); - // From an existing memory buffer. - bool Open(const char* data, int size); - // From an open file and an end offset. - bool Open(FILE* fp, int64_t end_offset); - // Sets the value of the swap flag, so that FReadEndian does the right thing. - void set_swap(bool value) { swap_ = value; } - - // Deserialize data. - bool DeSerialize(char* data, size_t count = 1); - bool DeSerialize(double* data, size_t count = 1); - bool DeSerialize(float* data, size_t count = 1); - bool DeSerialize(int8_t* data, size_t count = 1); - bool DeSerialize(int16_t* data, size_t count = 1); - bool DeSerialize(int32_t* data, size_t count = 1); - bool DeSerialize(int64_t* data, size_t count = 1); - bool DeSerialize(uint8_t* data, size_t count = 1); - bool DeSerialize(uint16_t* data, size_t count = 1); - bool DeSerialize(uint32_t* data, size_t count = 1); - bool DeSerialize(uint64_t* data, size_t count = 1); - - // Serialize data. - bool Serialize(const char* data, size_t count = 1); - bool Serialize(const double* data, size_t count = 1); - bool Serialize(const float* data, size_t count = 1); - bool Serialize(const int8_t* data, size_t count = 1); - bool Serialize(const int16_t* data, size_t count = 1); - bool Serialize(const int32_t* data, size_t count = 1); - bool Serialize(const int64_t* data, size_t count = 1); - bool Serialize(const uint8_t* data, size_t count = 1); - bool Serialize(const uint16_t* data, size_t count = 1); - bool Serialize(const uint32_t* data, size_t count = 1); - bool Serialize(const uint64_t* data, size_t count = 1); - - // Skip data. - bool Skip(size_t count); - - // Reads a line like fgets. Returns nullptr on EOF, otherwise buffer. - // Reads at most buffer_size bytes, including '\0' terminator, even if - // the line is longer. Does nothing if buffer_size <= 0. - // To use fscanf use FGets and sscanf. - char* FGets(char* buffer, int buffer_size); - // Replicates fread, followed by a swap of the bytes if needed, returning the - // number of items read. If swap_ is true then the count items will each have - // size bytes reversed. - int FReadEndian(void* buffer, size_t size, int count); - // Replicates fread, returning the number of items read. - int FRead(void* buffer, size_t size, int count); - // Resets the TFile as if it has been Opened, but nothing read. - // Only allowed while reading! - void Rewind(); - - // Open for writing. Either supply a non-nullptr data with OpenWrite before - // calling FWrite, (no close required), or supply a nullptr data to OpenWrite - // and call CloseWrite to write to a file after the FWrites. - void OpenWrite(GenericVector* data); - bool CloseWrite(const STRING& filename, FileWriter writer); - - // Replicates fwrite, returning the number of items written. - // To use fprintf, use snprintf and FWrite. - int FWrite(const void* buffer, size_t size, int count); - - private: - // The number of bytes used so far. - int offset_; - // The buffered data from the file. - GenericVector* data_; - // True if the data_ pointer is owned by *this. - bool data_is_owned_; - // True if the TFile is open for writing. - bool is_writing_; - // True if bytes need to be swapped in FReadEndian. - bool swap_; -}; - -} // namespace tesseract. - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/sorthelper.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/sorthelper.h deleted file mode 100644 index d5b67db2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/sorthelper.h +++ /dev/null @@ -1,108 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: sorthelper.h -// Description: Generic sort and maxfinding class. -// Author: Ray Smith -// Created: Thu May 20 17:48:21 PDT 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_SORTHELPER_H_ -#define TESSERACT_CCUTIL_SORTHELPER_H_ - -#include -#include "genericvector.h" - -// Generic class to provide functions based on a pair. -// T is the value type. -// The class keeps a count of each value and can return the most frequent -// value or a sorted array of the values with counts. -// Note that this class uses linear search for adding. It is better -// to use the STATS class to get the mode of a large number of values -// in a small space. SortHelper is better to get the mode of a small number -// of values from a large space. -// T must have a copy constructor. -template -class SortHelper { - public: - // Simple pair class to hold the values and counts. - template struct SortPair { - PairT value; - int count; - }; - // qsort function to sort by decreasing count. - static int SortPairsByCount(const void* v1, const void* v2) { - const SortPair* p1 = static_cast*>(v1); - const SortPair* p2 = static_cast*>(v2); - return p2->count - p1->count; - } - // qsort function to sort by decreasing value. - static int SortPairsByValue(const void* v1, const void* v2) { - const SortPair* p1 = static_cast*>(v1); - const SortPair* p2 = static_cast*>(v2); - if (p2->value - p1->value < 0) return -1; - if (p2->value - p1->value > 0) return 1; - return 0; - } - - // Constructor takes a hint of the array size, but it need not be accurate. - explicit SortHelper(int sizehint) { - counts_.reserve(sizehint); - } - - // Add a value that may be a duplicate of an existing value. - // Uses a linear search. - void Add(T value, int count) { - // Linear search for value. - for (int i = 0; i < counts_.size(); ++i) { - if (counts_[i].value == value) { - counts_[i].count += count; - return; - } - } - SortPair new_pair = {value, count}; - counts_.push_back(SortPair(new_pair)); - } - - // Returns the frequency of the most frequent value. - // If max_value is not nullptr, returns the most frequent value. - // If the array is empty, returns -INT32_MAX and max_value is unchanged. - int MaxCount(T* max_value) const { - int best_count = -INT32_MAX; - for (int i = 0; i < counts_.size(); ++i) { - if (counts_[i].count > best_count) { - best_count = counts_[i].count; - if (max_value != nullptr) - *max_value = counts_[i].value; - } - } - return best_count; - } - - // Returns the data array sorted by decreasing frequency. - const GenericVector >& SortByCount() { - counts_.sort(&SortPairsByCount); - return counts_; - } - // Returns the data array sorted by decreasing value. - const GenericVector >& SortByValue() { - counts_.sort(&SortPairsByValue); - return counts_; - } - - private: - GenericVector > counts_; -}; - - -#endif // TESSERACT_CCUTIL_SORTHELPER_H_. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/strngs.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/strngs.cpp deleted file mode 100644 index 49562af4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/strngs.cpp +++ /dev/null @@ -1,505 +0,0 @@ -/********************************************************************** - * File: strngs.cpp (Formerly strings.c) - * Description: STRING class functions. - * Author: Ray Smith - * Created: Fri Feb 15 09:13:30 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "strngs.h" -#include // for assert -#include // for malloc, free -#include "errcode.h" // for ASSERT_HOST -#include "genericvector.h" // for GenericVector -#include "helpers.h" // for ReverseN -#include "serialis.h" // for TFile - -using tesseract::TFile; - -// Size of buffer needed to host the decimal representation of the maximum -// possible length of an int (in 64 bits), being -<20 digits>. -const int kMaxIntSize = 22; -// Size of buffer needed to host the decimal representation of the maximum -// possible length of a %.8g being -1.2345678e+999 = 16. -const int kMaxDoubleSize = 16; - -/********************************************************************** - * STRING_HEADER provides metadata about the allocated buffer, - * including total capacity and how much used (strlen with '\0'). - * - * The implementation hides this header at the start of the data - * buffer and appends the string on the end to keep sizeof(STRING) - * unchanged from earlier versions so serialization is not affected. - * - * The collection of MACROS provide different implementations depending - * on whether the string keeps track of its strlen or not so that this - * feature can be added in later when consumers don't modify the string - **********************************************************************/ - -// Smallest string to allocate by default -const int kMinCapacity = 16; - -char* STRING::AllocData(int used, int capacity) { - data_ = (STRING_HEADER *)malloc(capacity + sizeof(STRING_HEADER)); - - // header is the metadata for this memory block - STRING_HEADER* header = GetHeader(); - header->capacity_ = capacity; - header->used_ = used; - return GetCStr(); -} - -void STRING::DiscardData() { - free(data_); - data_ = nullptr; -} - -// This is a private method; ensure FixHeader is called (or used_ is well defined) -// beforehand -char* STRING::ensure_cstr(int32_t min_capacity) { - STRING_HEADER* orig_header = GetHeader(); - if (min_capacity <= orig_header->capacity_) - return ((char *)this->data_) + sizeof(STRING_HEADER); - - // if we are going to grow bigger, than double our existing - // size, but if that still is not big enough then keep the - // requested capacity - if (min_capacity < 2 * orig_header->capacity_) - min_capacity = 2 * orig_header->capacity_; - - int alloc = sizeof(STRING_HEADER) + min_capacity; - STRING_HEADER* new_header = (STRING_HEADER*)(malloc(alloc)); - - memcpy(&new_header[1], GetCStr(), orig_header->used_); - new_header->capacity_ = min_capacity; - new_header->used_ = orig_header->used_; - - // free old memory, then rebind to new memory - DiscardData(); - data_ = new_header; - - assert(InvariantOk()); - return ((char *)data_) + sizeof(STRING_HEADER); -} - -// This is const, but is modifying a mutable field -// this way it can be used on const or non-const instances. -void STRING::FixHeader() const { - const STRING_HEADER* header = GetHeader(); - if (header->used_ < 0) - header->used_ = strlen(GetCStr()) + 1; -} - - -STRING::STRING() { - // Empty STRINGs contain just the "\0". - memcpy(AllocData(1, kMinCapacity), "", 1); -} - -STRING::STRING(const STRING& str) { - str.FixHeader(); - const STRING_HEADER* str_header = str.GetHeader(); - const int str_used = str_header->used_; - char *this_cstr = AllocData(str_used, str_used); - memcpy(this_cstr, str.GetCStr(), str_used); - assert(InvariantOk()); -} - -STRING::STRING(const char* cstr) { - if (cstr == nullptr) { - // Empty STRINGs contain just the "\0". - memcpy(AllocData(1, kMinCapacity), "", 1); - } else { - const int len = strlen(cstr) + 1; - char* this_cstr = AllocData(len, len); - memcpy(this_cstr, cstr, len); - } - assert(InvariantOk()); -} - -STRING::STRING(const char *data, int length) { - if (data == nullptr) { - // Empty STRINGs contain just the "\0". - memcpy(AllocData(1, kMinCapacity), "", 1); - } else { - char* this_cstr = AllocData(length + 1, length + 1); - memcpy(this_cstr, data, length); - this_cstr[length] = '\0'; - } -} - -STRING::~STRING() { - DiscardData(); -} - -// TODO(rays) Change all callers to use TFile and remove the old functions. -// Writes to the given file. Returns false in case of error. -bool STRING::Serialize(FILE* fp) const { - uint32_t len = length(); - return tesseract::Serialize(fp, &len) && - tesseract::Serialize(fp, GetCStr(), len); -} -// Writes to the given file. Returns false in case of error. -bool STRING::Serialize(TFile* fp) const { - uint32_t len = length(); - return fp->Serialize(&len) && - fp->Serialize(GetCStr(), len); -} -// Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool STRING::DeSerialize(bool swap, FILE* fp) { - uint32_t len; - if (!tesseract::DeSerialize(fp, &len)) return false; - if (swap) - ReverseN(&len, sizeof(len)); - // Arbitrarily limit the number of characters to protect against bad data. - if (len > UINT16_MAX) return false; - truncate_at(len); - return tesseract::DeSerialize(fp, GetCStr(), len); -} -// Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool STRING::DeSerialize(TFile* fp) { - uint32_t len; - if (!fp->DeSerialize(&len)) return false; - truncate_at(len); - return fp->DeSerialize(GetCStr(), len); -} - -// As DeSerialize, but only seeks past the data - hence a static method. -bool STRING::SkipDeSerialize(TFile* fp) { - uint32_t len; - if (!fp->DeSerialize(&len)) return false; - return fp->Skip(len); -} - -bool STRING::contains(const char c) const { - return (c != '\0') && (strchr (GetCStr(), c) != nullptr); -} - -int32_t STRING::length() const { - FixHeader(); - return GetHeader()->used_ - 1; -} - -const char* STRING::string() const { - const STRING_HEADER* header = GetHeader(); - if (header->used_ == 0) - return nullptr; - - // mark header length unreliable because tesseract might - // cast away the const and mutate the string directly. - header->used_ = -1; - return GetCStr(); -} - -const char* STRING::c_str() const { - return string(); -} - -/****** - * The STRING_IS_PROTECTED interface adds additional support to migrate - * code that needs to modify the STRING in ways not otherwise supported - * without violating encapsulation. - * - * Also makes the [] operator return a const so it is immutable - */ -#if STRING_IS_PROTECTED -const char& STRING::operator[](int32_t index) const { - return GetCStr()[index]; -} - -void STRING::insert_range(int32_t index, const char* str, int len) { - // if index is outside current range, then also grow size of string - // to accmodate the requested range. - STRING_HEADER* this_header = GetHeader(); - int used = this_header->used_; - if (index > used) - used = index; - - char* this_cstr = ensure_cstr(used + len + 1); - if (index < used) { - // move existing string from index to '\0' inclusive. - memmove(this_cstr + index + len, - this_cstr + index, - this_header->used_ - index); - } else if (len > 0) { - // We are going to overwrite previous null terminator, so write the new one. - this_cstr[this_header->used_ + len - 1] = '\0'; - - // If the old header did not have the terminator, - // then we need to account for it now that we've added it. - // Otherwise it was already accounted for; we just moved it. - if (this_header->used_ == 0) - ++this_header->used_; - } - - // Write new string to index. - // The string is already terminated from the conditions above. - memcpy(this_cstr + index, str, len); - this_header->used_ += len; - - assert(InvariantOk()); -} - -void STRING::erase_range(int32_t index, int len) { - char* this_cstr = GetCStr(); - STRING_HEADER* this_header = GetHeader(); - - memcpy(this_cstr+index, this_cstr+index+len, - this_header->used_ - index - len); - this_header->used_ -= len; - assert(InvariantOk()); -} - -#else -void STRING::truncate_at(int32_t index) { - ASSERT_HOST(index >= 0); - FixHeader(); - char* this_cstr = ensure_cstr(index + 1); - this_cstr[index] = '\0'; - GetHeader()->used_ = index + 1; - assert(InvariantOk()); -} - -char& STRING::operator[](int32_t index) const { - // Code is casting away this const and mutating the string, - // so mark used_ as -1 to flag it unreliable. - GetHeader()->used_ = -1; - return ((char *)GetCStr())[index]; -} -#endif - -void STRING::split(const char c, GenericVector *splited) { - int start_index = 0; - const int len = length(); - for (int i = 0; i < len; i++) { - if ((*this)[i] == c) { - if (i != start_index) { - (*this)[i] = '\0'; - splited->push_back(STRING(GetCStr() + start_index, i - start_index)); - (*this)[i] = c; - } - start_index = i + 1; - } - } - - if (len != start_index) { - splited->push_back(STRING(GetCStr() + start_index, len - start_index)); - } -} - -bool STRING::operator==(const STRING& str) const { - FixHeader(); - str.FixHeader(); - const STRING_HEADER* str_header = str.GetHeader(); - const STRING_HEADER* this_header = GetHeader(); - const int this_used = this_header->used_; - const int str_used = str_header->used_; - - return (this_used == str_used) - && (memcmp(GetCStr(), str.GetCStr(), this_used) == 0); -} - -bool STRING::operator!=(const STRING& str) const { - FixHeader(); - str.FixHeader(); - const STRING_HEADER* str_header = str.GetHeader(); - const STRING_HEADER* this_header = GetHeader(); - const int this_used = this_header->used_; - const int str_used = str_header->used_; - - return (this_used != str_used) - || (memcmp(GetCStr(), str.GetCStr(), this_used) != 0); -} - -bool STRING::operator!=(const char* cstr) const { - FixHeader(); - const STRING_HEADER* this_header = GetHeader(); - - if (cstr == nullptr) - return this_header->used_ > 1; // either '\0' or nullptr - else { - const int32_t length = strlen(cstr) + 1; - return (this_header->used_ != length) - || (memcmp(GetCStr(), cstr, length) != 0); - } -} - -STRING& STRING::operator=(const STRING& str) { - str.FixHeader(); - const STRING_HEADER* str_header = str.GetHeader(); - const int str_used = str_header->used_; - - GetHeader()->used_ = 0; // clear since ensure doesn't need to copy data - char* this_cstr = ensure_cstr(str_used); - STRING_HEADER* this_header = GetHeader(); - - memcpy(this_cstr, str.GetCStr(), str_used); - this_header->used_ = str_used; - - assert(InvariantOk()); - return *this; -} - -STRING & STRING::operator+=(const STRING& str) { - FixHeader(); - str.FixHeader(); - const STRING_HEADER* str_header = str.GetHeader(); - const char* str_cstr = str.GetCStr(); - const int str_used = str_header->used_; - const int this_used = GetHeader()->used_; - char* this_cstr = ensure_cstr(this_used + str_used); - - STRING_HEADER* this_header = GetHeader(); // after ensure for realloc - - if (this_used > 1) { - memcpy(this_cstr + this_used - 1, str_cstr, str_used); - this_header->used_ += str_used - 1; // overwrite '\0' - } else { - memcpy(this_cstr, str_cstr, str_used); - this_header->used_ = str_used; - } - - assert(InvariantOk()); - return *this; -} - -void STRING::add_str_int(const char* str, int number) { - if (str != nullptr) - *this += str; - // Allow space for the maximum possible length of int64_t. - char num_buffer[kMaxIntSize]; - snprintf(num_buffer, kMaxIntSize - 1, "%d", number); - num_buffer[kMaxIntSize - 1] = '\0'; - *this += num_buffer; -} -// Appends the given string and double (as a %.8g) to this. -void STRING::add_str_double(const char* str, double number) { - if (str != nullptr) - *this += str; - // Allow space for the maximum possible length of %8g. - char num_buffer[kMaxDoubleSize]; - snprintf(num_buffer, kMaxDoubleSize - 1, "%.8g", number); - num_buffer[kMaxDoubleSize - 1] = '\0'; - *this += num_buffer; -} - -STRING & STRING::operator=(const char* cstr) { - STRING_HEADER* this_header = GetHeader(); - if (cstr) { - const int len = strlen(cstr) + 1; - - this_header->used_ = 0; // don't bother copying data if need to realloc - char* this_cstr = ensure_cstr(len); - this_header = GetHeader(); // for realloc - memcpy(this_cstr, cstr, len); - this_header->used_ = len; - } else { - // Reallocate to same state as default constructor. - DiscardData(); - // Empty STRINGs contain just the "\0". - memcpy(AllocData(1, kMinCapacity), "", 1); - } - - assert(InvariantOk()); - return *this; -} - -void STRING::assign(const char *cstr, int len) { - STRING_HEADER* this_header = GetHeader(); - this_header->used_ = 0; // don't bother copying data if need to realloc - char* this_cstr = ensure_cstr(len + 1); // +1 for '\0' - - this_header = GetHeader(); // for realloc - memcpy(this_cstr, cstr, len); - this_cstr[len] = '\0'; - this_header->used_ = len + 1; - - assert(InvariantOk()); -} - -STRING STRING::operator+(const STRING& str) const { - STRING result(*this); - result += str; - - assert(InvariantOk()); - return result; -} - - -STRING STRING::operator+(const char ch) const { - STRING result; - FixHeader(); - const STRING_HEADER* this_header = GetHeader(); - const int this_used = this_header->used_; - char* result_cstr = result.ensure_cstr(this_used + 1); - STRING_HEADER* result_header = result.GetHeader(); - const int result_used = result_header->used_; - - // copies '\0' but we'll overwrite that - memcpy(result_cstr, GetCStr(), this_used); - result_cstr[result_used] = ch; // overwrite old '\0' - result_cstr[result_used + 1] = '\0'; // append on '\0' - ++result_header->used_; - - assert(InvariantOk()); - return result; -} - - -STRING& STRING::operator+=(const char *str) { - if (!str || !*str) // empty string has no effect - return *this; - - FixHeader(); - const int len = strlen(str) + 1; - const int this_used = GetHeader()->used_; - char* this_cstr = ensure_cstr(this_used + len); - STRING_HEADER* this_header = GetHeader(); // after ensure for realloc - - // if we had non-empty string then append overwriting old '\0' - // otherwise replace - if (this_used > 0) { - memcpy(this_cstr + this_used - 1, str, len); - this_header->used_ += len - 1; - } else { - memcpy(this_cstr, str, len); - this_header->used_ = len; - } - - assert(InvariantOk()); - return *this; -} - - -STRING& STRING::operator+=(const char ch) { - if (ch == '\0') - return *this; - - FixHeader(); - int this_used = GetHeader()->used_; - char* this_cstr = ensure_cstr(this_used + 1); - STRING_HEADER* this_header = GetHeader(); - - if (this_used > 0) - --this_used; // undo old empty null if there was one - - this_cstr[this_used++] = ch; // append ch to end - this_cstr[this_used++] = '\0'; // append '\0' after ch - this_header->used_ = this_used; - - assert(InvariantOk()); - return *this; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/strngs.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/strngs.h deleted file mode 100644 index 244b4534..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/strngs.h +++ /dev/null @@ -1,181 +0,0 @@ -/********************************************************************** - * File: strngs.h (Formerly strings.h) - * Description: STRING class definition. - * Author: Ray Smith - * Created: Fri Feb 15 09:15:01 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef STRNGS_H -#define STRNGS_H - -#include // for assert -#include // for uint32_t -#include // for FILE -#include // for strncpy -#include "platform.h" // for TESS_API - -namespace tesseract { -class TFile; -} // namespace tesseract. - -// STRING_IS_PROTECTED means that string[index] = X is invalid -// because you have to go through strings interface to modify it. -// This allows the string to ensure internal integrity and maintain -// its own string length. Unfortunately this is not possible because -// STRINGS are used as direct-manipulation data buffers for things -// like length arrays and many places cast away the const on string() -// to mutate the string. Turning this off means that internally we -// cannot assume we know the strlen. -#define STRING_IS_PROTECTED 0 - -template class GenericVector; - -class TESS_API STRING -{ - public: - STRING(); - STRING(const STRING &string); - STRING(const char *string); - STRING(const char *data, int length); - ~STRING(); - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, FILE* fp); - // Writes to the given file. Returns false in case of error. - bool Serialize(tesseract::TFile* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(tesseract::TFile* fp); - // As DeSerialize, but only seeks past the data - hence a static method. - static bool SkipDeSerialize(tesseract::TFile* fp); - - bool contains(const char c) const; - int32_t length() const; - int32_t size() const { return length(); } - // Workaround to avoid g++ -Wsign-compare warnings. - uint32_t unsigned_size() const { - const int32_t len = length(); - assert(0 <= len); - return static_cast(len); - } - const char *string() const; - const char *c_str() const; - - inline char* strdup() const { - int32_t len = length() + 1; - return strncpy(new char[len], GetCStr(), len); - } - -#if STRING_IS_PROTECTED - const char &operator[] (int32_t index) const; - // len is number of chars in s to insert starting at index in this string - void insert_range(int32_t index, const char*s, int len); - void erase_range(int32_t index, int len); -#else - char &operator[] (int32_t index) const; -#endif - void split(const char c, GenericVector *splited); - void truncate_at(int32_t index); - - bool operator== (const STRING & string) const; - bool operator!= (const STRING & string) const; - bool operator!= (const char *string) const; - - STRING & operator= (const char *string); - STRING & operator= (const STRING & string); - - STRING operator+ (const STRING & string) const; - STRING operator+ (const char ch) const; - - STRING & operator+= (const char *string); - STRING & operator+= (const STRING & string); - STRING & operator+= (const char ch); - - // Assignment for strings which are not null-terminated. - void assign(const char *cstr, int len); - - // Appends the given string and int (as a %d) to this. - // += cannot be used for ints as there as a char += operator that would - // be ambiguous, and ints usually need a string before or between them - // anyway. - void add_str_int(const char* str, int number); - // Appends the given string and double (as a %.8g) to this. - void add_str_double(const char* str, double number); - - // ensure capacity but keep pointer encapsulated - inline void ensure(int32_t min_capacity) { ensure_cstr(min_capacity); } - - private: - typedef struct STRING_HEADER { - // How much space was allocated in the string buffer for char data. - int capacity_; - - // used_ is how much of the capacity is currently being used, - // including a '\0' terminator. - // - // If used_ is 0 then string is nullptr (not even the '\0') - // else if used_ > 0 then it is strlen() + 1 (because it includes '\0') - // else strlen is >= 0 (not nullptr) but needs to be computed. - // this condition is set when encapsulation is violated because - // an API returned a mutable string. - // - // capacity_ - used_ = excess capacity that the string can grow - // without reallocating - mutable int used_; - } STRING_HEADER; - - // To preserve the behavior of the old serialization, we only have space - // for one pointer in this structure. So we are embedding a data structure - // at the start of the storage that will hold additional state variables, - // then storing the actual string contents immediately after. - STRING_HEADER* data_; - - // returns the header part of the storage - inline STRING_HEADER* GetHeader() { - return data_; - } - inline const STRING_HEADER* GetHeader() const { - return data_; - } - - // returns the string data part of storage - inline char* GetCStr() { return ((char*)data_) + sizeof(STRING_HEADER); } - - inline const char* GetCStr() const { - return ((const char *)data_) + sizeof(STRING_HEADER); - } - inline bool InvariantOk() const { -#if STRING_IS_PROTECTED - return (GetHeader()->used_ == 0) ? - (string() == nullptr) : (GetHeader()->used_ == (strlen(string()) + 1)); -#else - return true; -#endif - } - - // Ensure string has requested capacity as optimization - // to avoid unnecessary reallocations. - // The return value is a cstr buffer with at least requested capacity - char* ensure_cstr(int32_t min_capacity); - - void FixHeader() const; // make used_ non-negative, even if const - - char* AllocData(int used, int capacity); - void DiscardData(); -}; -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/tesscallback.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/tesscallback.h deleted file mode 100644 index 6d67027f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/tesscallback.h +++ /dev/null @@ -1,9721 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tesscallback.h -// Description: classes and functions to replace pointer-to-functions -// Author: Samuel Charron -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESS_CALLBACK_SPECIALIZATIONS_H_ -#define TESS_CALLBACK_SPECIALIZATIONS_H_ - -#include "host.h" // For nullptr. - -struct TessCallbackUtils_ { - static void FailIsRepeatable(const char* name); -}; - - -class TessClosure { - public: - virtual ~TessClosure(); - virtual void Run() = 0; -}; - -template -class TessResultCallback { - public: - virtual ~TessResultCallback() { } - virtual R Run() = 0; -}; - -template -class _ConstTessMemberResultCallback_0_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (T::*MemberSignature)() const; - - private: - const T* object_; - MemberSignature member_; - - public: - inline _ConstTessMemberResultCallback_0_0( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual R Run() { - if (!del) { - R result = (object_->*member_)(); - return result; - } else { - R result = (object_->*member_)(); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_0_0 - : public TessClosure { - public: - typedef TessClosure base; - typedef void (T::*MemberSignature)() const; - - private: - const T* object_; - MemberSignature member_; - - public: - inline _ConstTessMemberResultCallback_0_0( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual void Run() { - if (!del) { - (object_->*member_)(); - } else { - (object_->*member_)(); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_0_0::base* -NewTessCallback( - const T1* obj, R (T2::*member)() const) { - return new _ConstTessMemberResultCallback_0_0( - obj, member); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_0_0::base* -NewPermanentTessCallback( - const T1* obj, R (T2::*member)() const) { - return new _ConstTessMemberResultCallback_0_0( - obj, member); -} -#endif - -template -class _TessMemberResultCallback_0_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (T::*MemberSignature)() ; - - private: - T* object_; - MemberSignature member_; - - public: - inline _TessMemberResultCallback_0_0( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual R Run() { - if (!del) { - R result = (object_->*member_)(); - return result; - } else { - R result = (object_->*member_)(); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_0_0 - : public TessClosure { - public: - typedef TessClosure base; - typedef void (T::*MemberSignature)() ; - - private: - T* object_; - MemberSignature member_; - - public: - inline _TessMemberResultCallback_0_0( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual void Run() { - if (!del) { - (object_->*member_)(); - } else { - (object_->*member_)(); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_0_0::base* -NewTessCallback( - T1* obj, R (T2::*member)()) { - return new _TessMemberResultCallback_0_0( - obj, member); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_0_0::base* -NewPermanentTessCallback( - T1* obj, R (T2::*member)()) { - return new _TessMemberResultCallback_0_0( - obj, member); -} -#endif - -template -class _TessFunctionResultCallback_0_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (*FunctionSignature)(); - - private: - FunctionSignature function_; - - public: - inline _TessFunctionResultCallback_0_0( - FunctionSignature function) - : function_(function) { - } - - virtual R Run() { - if (!del) { - R result = (*function_)(); - return result; - } else { - R result = (*function_)(); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_0_0 - : public TessClosure { - public: - typedef TessClosure base; - typedef void (*FunctionSignature)(); - - private: - FunctionSignature function_; - - public: - inline _TessFunctionResultCallback_0_0( - FunctionSignature function) - : function_(function) { - } - - virtual void Run() { - if (!del) { - (*function_)(); - } else { - (*function_)(); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_0_0::base* -NewTessCallback(R (*function)()) { - return new _TessFunctionResultCallback_0_0(function); -} - -template -inline typename _TessFunctionResultCallback_0_0::base* -NewPermanentTessCallback(R (*function)()) { - return new _TessFunctionResultCallback_0_0(function); -} - - - -// Specified by TR1 [4.7.2] Reference modifications. -template struct remove_reference; -template struct remove_reference { typedef T type; }; -template struct remove_reference { typedef T type; }; - -// Identity::type is a typedef of T. Useful for preventing the -// compiler from inferring the type of an argument in templates. -template -struct Identity { - typedef T type; -}; - -template -class _ConstTessMemberResultCallback_1_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _ConstTessMemberResultCallback_1_0(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual R Run() { - if (!del) { - R result = (object_->*member_)(p1_); - return result; - } else { - R result = (object_->*member_)(p1_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_1_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (T::*MemberSignature)(P1) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _ConstTessMemberResultCallback_1_0(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual void Run() { - if (!del) { - (object_->*member_)(p1_); - } else { - (object_->*member_)(p1_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_0::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_0(obj, member, p1); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_0::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_0(obj, member, p1); -} -#endif - -template -class _TessMemberResultCallback_1_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _TessMemberResultCallback_1_0(T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual R Run() { - if (!del) { - R result = (object_->*member_)(p1_); - return result; - } else { - R result = (object_->*member_)(p1_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_1_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (T::*MemberSignature)(P1) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _TessMemberResultCallback_1_0(T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual void Run() { - if (!del) { - (object_->*member_)(p1_); - } else { - (object_->*member_)(p1_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_1_0::base* -NewTessCallback(T1* obj, R (T2::*member)(P1) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_0(obj, member, p1); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_1_0::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_0(obj, member, p1); -} -#endif - -template -class _TessFunctionResultCallback_1_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (*FunctionSignature)(P1); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - - public: - inline _TessFunctionResultCallback_1_0(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } - - virtual R Run() { - if (!del) { - R result = (*function_)(p1_); - return result; - } else { - R result = (*function_)(p1_); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_1_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (*FunctionSignature)(P1); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - - public: - inline _TessFunctionResultCallback_1_0(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } - - virtual void Run() { - if (!del) { - (*function_)(p1_); - } else { - (*function_)(p1_); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_1_0::base* -NewTessCallback(R (*function)(P1), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_0(function, p1); -} - -template -inline typename _TessFunctionResultCallback_1_0::base* -NewPermanentTessCallback(R (*function)(P1), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_0(function, p1); -} - -template -class _ConstTessMemberResultCallback_2_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _ConstTessMemberResultCallback_2_0(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual R Run() { - if (!del) { - R result = (object_->*member_)(p1_,p2_); - return result; - } else { - R result = (object_->*member_)(p1_,p2_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_2_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _ConstTessMemberResultCallback_2_0(const T* object, MemberSignature member, P1 p1, P2 p2) - : - object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual void Run() { - if (!del) { - (object_->*member_)(p1_,p2_); - } else { - (object_->*member_)(p1_,p2_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_0::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_0(obj, member, p1, p2); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_0::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_0(obj, member, p1, p2); -} -#endif - -template -class _TessMemberResultCallback_2_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessMemberResultCallback_2_0(T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual R Run() { - if (!del) { - R result = (object_->*member_)(p1_,p2_); - return result; - } else { - R result = (object_->*member_)(p1_,p2_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_2_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessMemberResultCallback_2_0(T* object, MemberSignature member, P1 p1, P2 p2) - : - object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual void Run() { - if (!del) { - (object_->*member_)(p1_,p2_); - } else { - (object_->*member_)(p1_,p2_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_2_0::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_0(obj, member, p1, p2); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_2_0::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_0(obj, member, p1, p2); -} -#endif - -template -class _TessFunctionResultCallback_2_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (*FunctionSignature)(P1,P2); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessFunctionResultCallback_2_0(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } - - virtual R Run() { - if (!del) { - R result = (*function_)(p1_,p2_); - return result; - } else { - R result = (*function_)(p1_,p2_); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_2_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (*FunctionSignature)(P1,P2); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessFunctionResultCallback_2_0(FunctionSignature function, P1 p1, P2 p2) - : - function_(function), p1_(p1), p2_(p2) { } - - virtual void Run() { - if (!del) { - (*function_)(p1_,p2_); - } else { - (*function_)(p1_,p2_); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_2_0::base* -NewTessCallback(R (*function)(P1,P2), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_0(function, p1, p2); -} - -template -inline typename _TessFunctionResultCallback_2_0::base* -NewPermanentTessCallback(R (*function)(P1,P2), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_0(function, p1, p2); -} - -template -class _ConstTessMemberResultCallback_3_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _ConstTessMemberResultCallback_3_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : - object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run() { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_3_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _ConstTessMemberResultCallback_3_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run() { - if (!del) { - (object_->*member_)(p1_,p2_,p3_); - } else { - (object_->*member_)(p1_,p2_,p3_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_0::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_0(obj, member, p1, p2, p3); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_0::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_0(obj, member, p1, p2, p3); -} -#endif - -template -class _TessMemberResultCallback_3_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessMemberResultCallback_3_0(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run() { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_3_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessMemberResultCallback_3_0(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run() { - if (!del) { - (object_->*member_)(p1_,p2_,p3_); - } else { - (object_->*member_)(p1_,p2_,p3_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_3_0::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_0(obj, member, p1, p2, p3); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_3_0::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_0(obj, member, p1, p2, p3); -} -#endif - -template -class _TessFunctionResultCallback_3_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (*FunctionSignature)(P1,P2,P3); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessFunctionResultCallback_3_0(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run() { - if (!del) { - R result = (*function_)(p1_,p2_,p3_); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_3_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (*FunctionSignature)(P1,P2,P3); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessFunctionResultCallback_3_0(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run() { - if (!del) { - (*function_)(p1_,p2_,p3_); - } else { - (*function_)(p1_,p2_,p3_); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_3_0::base* -NewTessCallback(R (*function)(P1,P2,P3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_0(function, p1, p2, p3); -} - -template -inline typename _TessFunctionResultCallback_3_0::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_0(function, p1, p2, p3); -} - -template -class _ConstTessMemberResultCallback_4_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _ConstTessMemberResultCallback_4_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run() { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_4_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _ConstTessMemberResultCallback_4_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run() { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_0::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_0(obj, member, p1, p2, p3, p4); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_0::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_0(obj, member, p1, p2, p3, p4); -} -#endif - -template -class _TessMemberResultCallback_4_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessMemberResultCallback_4_0(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run() { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_4_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessMemberResultCallback_4_0(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run() { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_4_0::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_0(obj, member, p1, p2, p3, p4); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_4_0::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_0(obj, member, p1, p2, p3, p4); -} -#endif - -template -class _TessFunctionResultCallback_4_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (*FunctionSignature)(P1,P2,P3,P4); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessFunctionResultCallback_4_0(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run() { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_4_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (*FunctionSignature)(P1,P2,P3,P4); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessFunctionResultCallback_4_0(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run() { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_); - } else { - (*function_)(p1_,p2_,p3_,p4_); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_4_0::base* -NewTessCallback(R (*function)(P1,P2,P3,P4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_0(function, p1, p2, p3, p4); -} - -template -inline typename _TessFunctionResultCallback_4_0::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_0(function, p1, p2, p3, p4); -} - -template -class _ConstTessMemberResultCallback_5_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _ConstTessMemberResultCallback_5_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run() { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_5_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _ConstTessMemberResultCallback_5_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run() { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_0::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_0(obj, member, p1, p2, p3, p4, p5); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_0::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_0(obj, member, p1, p2, p3, p4, p5); -} -#endif - -template -class _TessMemberResultCallback_5_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessMemberResultCallback_5_0(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run() { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_5_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessMemberResultCallback_5_0(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run() { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_5_0::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_0(obj, member, p1, p2, p3, p4, p5); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_5_0::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_0(obj, member, p1, p2, p3, p4, p5); -} -#endif - -template -class _TessFunctionResultCallback_5_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessFunctionResultCallback_5_0(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run() { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_5_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessFunctionResultCallback_5_0(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run() { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_); - } else { - (*function_)(p1_,p2_,p3_,p4_,p5_); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_5_0::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_0(function, p1, p2, p3, p4, p5); -} - -template -inline typename _TessFunctionResultCallback_5_0::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_0(function, p1, p2, p3, p4, p5); -} - -template -class _ConstTessMemberResultCallback_6_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _ConstTessMemberResultCallback_6_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run() { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_6_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _ConstTessMemberResultCallback_6_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run() { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_0::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_0(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_0::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_0(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -template -class _TessMemberResultCallback_6_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessMemberResultCallback_6_0(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run() { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_6_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessMemberResultCallback_6_0(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run() { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_6_0::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_0(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_6_0::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_0(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -template -class _TessFunctionResultCallback_6_0 : public TessResultCallback { - public: - typedef TessResultCallback base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,P6); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessFunctionResultCallback_6_0(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run() { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_6_0 : public TessClosure { - public: - typedef TessClosure base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,P6); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessFunctionResultCallback_6_0(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run() { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_); - } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_6_0::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,P6), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_0(function, p1, p2, p3, p4, p5, p6); -} - -template -inline typename _TessFunctionResultCallback_6_0::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,P6), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_0(function, p1, p2, p3, p4, p5, p6); -} - -template -class TessCallback1 { - public: - virtual ~TessCallback1() { } - virtual void Run(A1) = 0; -}; - -template -class TessResultCallback1 { - public: - virtual ~TessResultCallback1() { } - virtual R Run(A1) = 0; -}; - - -template -class TessCallback2 { - public: - virtual ~TessCallback2() { } - virtual void Run(A1,A2) = 0; -}; - -template -class TessResultCallback2 { - public: - virtual ~TessResultCallback2() { } - virtual R Run(A1,A2) = 0; -}; - -template -class TessCallback3 { - public: - virtual ~TessCallback3() { } - virtual void Run(A1,A2,A3) = 0; -}; - -template -class TessResultCallback3 { - public: - virtual ~TessResultCallback3() { } - virtual R Run(A1,A2,A3) = 0; -}; - -template -class TessCallback4 { - public: - virtual ~TessCallback4() { } - virtual void Run(A1,A2,A3,A4) = 0; -}; - -template -class TessResultCallback4 { - public: - virtual ~TessResultCallback4() { } - virtual R Run(A1,A2,A3,A4) = 0; -}; - -template -class _ConstTessMemberResultCallback_0_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(A1) const; - - private: - const T* object_; - MemberSignature member_; - - public: - inline _ConstTessMemberResultCallback_0_1( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual R Run(A1 a1) { - if (!del) { - R result = (object_->*member_)(a1); - return result; - } else { - R result = (object_->*member_)(a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_0_1 - : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (T::*MemberSignature)(A1) const; - - private: - const T* object_; - MemberSignature member_; - - public: - inline _ConstTessMemberResultCallback_0_1( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual void Run(A1 a1) { - if (!del) { - (object_->*member_)(a1); - } else { - (object_->*member_)(a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_0_1::base* -NewTessCallback( - const T1* obj, R (T2::*member)(A1) const) { - return new _ConstTessMemberResultCallback_0_1( - obj, member); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_0_1::base* -NewPermanentTessCallback( - const T1* obj, R (T2::*member)(A1) const) { - return new _ConstTessMemberResultCallback_0_1( - obj, member); -} -#endif - -template -class _TessMemberResultCallback_0_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(A1) ; - - private: - T* object_; - MemberSignature member_; - - public: - inline _TessMemberResultCallback_0_1( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual R Run(A1 a1) { - if (!del) { - R result = (object_->*member_)(a1); - return result; - } else { - R result = (object_->*member_)(a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_0_1 - : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (T::*MemberSignature)(A1) ; - - private: - T* object_; - MemberSignature member_; - - public: - inline _TessMemberResultCallback_0_1( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual void Run(A1 a1) { - if (!del) { - (object_->*member_)(a1); - } else { - (object_->*member_)(a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_0_1::base* -NewTessCallback( - T1* obj, R (T2::*member)(A1)) { - return new _TessMemberResultCallback_0_1( - obj, member); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_0_1::base* -NewPermanentTessCallback( - T1* obj, R (T2::*member)(A1)) { - return new _TessMemberResultCallback_0_1( - obj, member); -} -#endif - -template -class _TessFunctionResultCallback_0_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (*FunctionSignature)(A1); - - private: - FunctionSignature function_; - - public: - inline _TessFunctionResultCallback_0_1( - FunctionSignature function) - : function_(function) { - } - - virtual R Run(A1 a1) { - if (!del) { - R result = (*function_)(a1); - return result; - } else { - R result = (*function_)(a1); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_0_1 - : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (*FunctionSignature)(A1); - - private: - FunctionSignature function_; - - public: - inline _TessFunctionResultCallback_0_1( - FunctionSignature function) - : function_(function) { - } - - virtual void Run(A1 a1) { - if (!del) { - (*function_)(a1); - } else { - (*function_)(a1); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_0_1::base* -NewTessCallback(R (*function)(A1)) { - return new _TessFunctionResultCallback_0_1(function); -} - -template -inline typename _TessFunctionResultCallback_0_1::base* -NewPermanentTessCallback(R (*function)(A1)) { - return new _TessFunctionResultCallback_0_1(function); -} - -template -class _ConstTessMemberResultCallback_1_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,A1) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _ConstTessMemberResultCallback_1_1(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (object_->*member_)(p1_,a1); - return result; - } else { - R result = (object_->*member_)(p1_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_1_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,A1) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _ConstTessMemberResultCallback_1_1(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual void Run(A1 a1) { - if (!del) { - (object_->*member_)(p1_,a1); - } else { - (object_->*member_)(p1_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_1::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,A1) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_1(obj, member, p1); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_1::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,A1) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_1(obj, member, p1); -} -#endif - -template -class _TessMemberResultCallback_1_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,A1) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _TessMemberResultCallback_1_1(T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (object_->*member_)(p1_,a1); - return result; - } else { - R result = (object_->*member_)(p1_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_1_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,A1) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _TessMemberResultCallback_1_1(T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual void Run(A1 a1) { - if (!del) { - (object_->*member_)(p1_,a1); - } else { - (object_->*member_)(p1_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_1_1::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,A1) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_1(obj, member, p1); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_1_1::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,A1) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_1(obj, member, p1); -} -#endif - -template -class _TessFunctionResultCallback_1_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (*FunctionSignature)(P1,A1); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - - public: - inline _TessFunctionResultCallback_1_1(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (*function_)(p1_,a1); - return result; - } else { - R result = (*function_)(p1_,a1); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_1_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (*FunctionSignature)(P1,A1); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - - public: - inline _TessFunctionResultCallback_1_1(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } - - virtual void Run(A1 a1) { - if (!del) { - (*function_)(p1_,a1); - } else { - (*function_)(p1_,a1); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_1_1::base* -NewTessCallback(R (*function)(P1,A1), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_1(function, p1); -} - -template -inline typename _TessFunctionResultCallback_1_1::base* -NewPermanentTessCallback(R (*function)(P1,A1), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_1(function, p1); -} - -template -class _ConstTessMemberResultCallback_2_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,A1) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _ConstTessMemberResultCallback_2_1(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,a1); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_2_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,A1) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _ConstTessMemberResultCallback_2_1(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual void Run(A1 a1) { - if (!del) { - (object_->*member_)(p1_,p2_,a1); - } else { - (object_->*member_)(p1_,p2_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_1::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_1(obj, member, p1, p2); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_1::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_1(obj, member, p1, p2); -} -#endif - -template -class _TessMemberResultCallback_2_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,A1) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessMemberResultCallback_2_1(T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,a1); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_2_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,A1) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessMemberResultCallback_2_1(T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual void Run(A1 a1) { - if (!del) { - (object_->*member_)(p1_,p2_,a1); - } else { - (object_->*member_)(p1_,p2_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_2_1::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,A1) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_1(obj, member, p1, p2); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_2_1::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,A1) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_1(obj, member, p1, p2); -} -#endif - -template -class _TessFunctionResultCallback_2_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (*FunctionSignature)(P1,P2,A1); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessFunctionResultCallback_2_1(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (*function_)(p1_,p2_,a1); - return result; - } else { - R result = (*function_)(p1_,p2_,a1); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_2_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (*FunctionSignature)(P1,P2,A1); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessFunctionResultCallback_2_1(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } - - virtual void Run(A1 a1) { - if (!del) { - (*function_)(p1_,p2_,a1); - } else { - (*function_)(p1_,p2_,a1); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_2_1::base* -NewTessCallback(R (*function)(P1,P2,A1), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_1(function, p1, p2); -} - -template -inline typename _TessFunctionResultCallback_2_1::base* -NewPermanentTessCallback(R (*function)(P1,P2,A1), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_1(function, p1, p2); -} - -template -class _ConstTessMemberResultCallback_3_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _ConstTessMemberResultCallback_3_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_3_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _ConstTessMemberResultCallback_3_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run(A1 a1) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1); - } else { - (object_->*member_)(p1_,p2_,p3_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_1::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_1(obj, member, p1, p2, p3); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_1::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_1(obj, member, p1, p2, p3); -} -#endif - -template -class _TessMemberResultCallback_3_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessMemberResultCallback_3_1(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_3_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessMemberResultCallback_3_1(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run(A1 a1) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1); - } else { - (object_->*member_)(p1_,p2_,p3_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_3_1::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_1(obj, member, p1, p2, p3); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_3_1::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_1(obj, member, p1, p2, p3); -} -#endif - -template -class _TessFunctionResultCallback_3_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (*FunctionSignature)(P1,P2,P3,A1); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessFunctionResultCallback_3_1(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,a1); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,a1); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_3_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (*FunctionSignature)(P1,P2,P3,A1); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessFunctionResultCallback_3_1(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run(A1 a1) { - if (!del) { - (*function_)(p1_,p2_,p3_,a1); - } else { - (*function_)(p1_,p2_,p3_,a1); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_3_1::base* -NewTessCallback(R (*function)(P1,P2,P3,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_1(function, p1, p2, p3); -} - -template -inline typename _TessFunctionResultCallback_3_1::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_1(function, p1, p2, p3); -} - -template -class _ConstTessMemberResultCallback_4_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _ConstTessMemberResultCallback_4_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_4_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _ConstTessMemberResultCallback_4_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run(A1 a1) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_1::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_1(obj, member, p1, p2, p3, p4); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_1::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_1(obj, member, p1, p2, p3, p4); -} -#endif - -template -class _TessMemberResultCallback_4_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessMemberResultCallback_4_1(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_4_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessMemberResultCallback_4_1(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run(A1 a1) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_4_1::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_1(obj, member, p1, p2, p3, p4); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_4_1::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_1(obj, member, p1, p2, p3, p4); -} -#endif - -template -class _TessFunctionResultCallback_4_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,A1); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessFunctionResultCallback_4_1(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,a1); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,a1); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_4_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,A1); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessFunctionResultCallback_4_1(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run(A1 a1) { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,a1); - } else { - (*function_)(p1_,p2_,p3_,p4_,a1); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_4_1::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_1(function, p1, p2, p3, p4); -} - -template -inline typename _TessFunctionResultCallback_4_1::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_1(function, p1, p2, p3, p4); -} - -template -class _ConstTessMemberResultCallback_5_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _ConstTessMemberResultCallback_5_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_5_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _ConstTessMemberResultCallback_5_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run(A1 a1) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_1::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_1(obj, member, p1, p2, p3, p4, p5); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_1::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_1(obj, member, p1, p2, p3, p4, p5); -} -#endif - -template -class _TessMemberResultCallback_5_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessMemberResultCallback_5_1(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_5_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessMemberResultCallback_5_1(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run(A1 a1) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_5_1::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_1(obj, member, p1, p2, p3, p4, p5); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_5_1::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_1(obj, member, p1, p2, p3, p4, p5); -} -#endif - -template -class _TessFunctionResultCallback_5_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,A1); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessFunctionResultCallback_5_1(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_5_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,A1); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessFunctionResultCallback_5_1(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run(A1 a1) { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1); - } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_5_1::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_1(function, p1, p2, p3, p4, p5); -} - -template -inline typename _TessFunctionResultCallback_5_1::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_1(function, p1, p2, p3, p4, p5); -} - -template -class _ConstTessMemberResultCallback_6_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _ConstTessMemberResultCallback_6_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_6_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _ConstTessMemberResultCallback_6_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run(A1 a1) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_1::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_1(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_1::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_1(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -template -class _TessMemberResultCallback_6_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessMemberResultCallback_6_1(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_6_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessMemberResultCallback_6_1(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run(A1 a1) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_6_1::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_1(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_6_1::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_1(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -template -class _TessFunctionResultCallback_6_1 : public TessResultCallback1 { - public: - typedef TessResultCallback1 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessFunctionResultCallback_6_1(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run(A1 a1) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_6_1 : public TessCallback1 { - public: - typedef TessCallback1 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessFunctionResultCallback_6_1(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run(A1 a1) { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); - } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_6_1::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_1(function, p1, p2, p3, p4, p5, p6); -} - -template -inline typename _TessFunctionResultCallback_6_1::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_1(function, p1, p2, p3, p4, p5, p6); -} - -template -class _ConstTessMemberResultCallback_0_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(A1,A2) const; - - private: - const T* object_; - MemberSignature member_; - - public: - inline _ConstTessMemberResultCallback_0_2( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (object_->*member_)(a1,a2); - return result; - } else { - R result = (object_->*member_)(a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_0_2 - : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(A1,A2) const; - - private: - const T* object_; - MemberSignature member_; - - public: - inline _ConstTessMemberResultCallback_0_2( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (object_->*member_)(a1,a2); - } else { - (object_->*member_)(a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_0_2::base* -NewTessCallback( - const T1* obj, R (T2::*member)(A1,A2) const) { - return new _ConstTessMemberResultCallback_0_2( - obj, member); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_0_2::base* -NewPermanentTessCallback( - const T1* obj, R (T2::*member)(A1,A2) const) { - return new _ConstTessMemberResultCallback_0_2( - obj, member); -} -#endif - -template -class _TessMemberResultCallback_0_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(A1,A2) ; - - private: - T* object_; - MemberSignature member_; - - public: - inline _TessMemberResultCallback_0_2( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (object_->*member_)(a1,a2); - return result; - } else { - R result = (object_->*member_)(a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_0_2 - : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(A1,A2) ; - - private: - T* object_; - MemberSignature member_; - - public: - inline _TessMemberResultCallback_0_2( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (object_->*member_)(a1,a2); - } else { - (object_->*member_)(a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_0_2::base* -NewTessCallback( - T1* obj, R (T2::*member)(A1,A2)) { - return new _TessMemberResultCallback_0_2( - obj, member); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_0_2::base* -NewPermanentTessCallback( - T1* obj, R (T2::*member)(A1,A2)) { - return new _TessMemberResultCallback_0_2( - obj, member); -} -#endif - -template -class _TessFunctionResultCallback_0_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (*FunctionSignature)(A1,A2); - - private: - FunctionSignature function_; - - public: - inline _TessFunctionResultCallback_0_2( - FunctionSignature function) - : function_(function) { - } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (*function_)(a1,a2); - return result; - } else { - R result = (*function_)(a1,a2); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_0_2 - : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (*FunctionSignature)(A1,A2); - - private: - FunctionSignature function_; - - public: - inline _TessFunctionResultCallback_0_2( - FunctionSignature function) - : function_(function) { - } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (*function_)(a1,a2); - } else { - (*function_)(a1,a2); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_0_2::base* -NewTessCallback(R (*function)(A1,A2)) { - return new _TessFunctionResultCallback_0_2(function); -} - -template -inline typename _TessFunctionResultCallback_0_2::base* -NewPermanentTessCallback(R (*function)(A1,A2)) { - return new _TessFunctionResultCallback_0_2(function); -} - -template -class _ConstTessMemberResultCallback_1_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,A1,A2) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _ConstTessMemberResultCallback_1_2(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (object_->*member_)(p1_,a1,a2); - return result; - } else { - R result = (object_->*member_)(p1_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_1_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,A1,A2) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _ConstTessMemberResultCallback_1_2(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (object_->*member_)(p1_,a1,a2); - } else { - (object_->*member_)(p1_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_2::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,A1,A2) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_2(obj, member, p1); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_2::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,A1,A2) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_2(obj, member, p1); -} -#endif - -template -class _TessMemberResultCallback_1_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,A1,A2) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _TessMemberResultCallback_1_2(T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (object_->*member_)(p1_,a1,a2); - return result; - } else { - R result = (object_->*member_)(p1_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_1_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,A1,A2) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _TessMemberResultCallback_1_2(T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (object_->*member_)(p1_,a1,a2); - } else { - (object_->*member_)(p1_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_1_2::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,A1,A2) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_2(obj, member, p1); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_1_2::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,A1,A2) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_2(obj, member, p1); -} -#endif - -template -class _TessFunctionResultCallback_1_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (*FunctionSignature)(P1,A1,A2); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - - public: - inline _TessFunctionResultCallback_1_2(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (*function_)(p1_,a1,a2); - return result; - } else { - R result = (*function_)(p1_,a1,a2); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_1_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (*FunctionSignature)(P1,A1,A2); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - - public: - inline _TessFunctionResultCallback_1_2(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (*function_)(p1_,a1,a2); - } else { - (*function_)(p1_,a1,a2); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_1_2::base* -NewTessCallback(R (*function)(P1,A1,A2), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_2(function, p1); -} - -template -inline typename _TessFunctionResultCallback_1_2::base* -NewPermanentTessCallback(R (*function)(P1,A1,A2), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_2(function, p1); -} - -template -class _ConstTessMemberResultCallback_2_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _ConstTessMemberResultCallback_2_2(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_2_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _ConstTessMemberResultCallback_2_2(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (object_->*member_)(p1_,p2_,a1,a2); - } else { - (object_->*member_)(p1_,p2_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_2::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_2(obj, member, p1, p2); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_2::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_2(obj, member, p1, p2); -} -#endif - -template -class _TessMemberResultCallback_2_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessMemberResultCallback_2_2(T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_2_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessMemberResultCallback_2_2(T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (object_->*member_)(p1_,p2_,a1,a2); - } else { - (object_->*member_)(p1_,p2_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_2_2::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,A1,A2) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_2(obj, member, p1, p2); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_2_2::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,A1,A2) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_2(obj, member, p1, p2); -} -#endif - -template -class _TessFunctionResultCallback_2_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (*FunctionSignature)(P1,P2,A1,A2); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessFunctionResultCallback_2_2(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (*function_)(p1_,p2_,a1,a2); - return result; - } else { - R result = (*function_)(p1_,p2_,a1,a2); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_2_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (*FunctionSignature)(P1,P2,A1,A2); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessFunctionResultCallback_2_2(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (*function_)(p1_,p2_,a1,a2); - } else { - (*function_)(p1_,p2_,a1,a2); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_2_2::base* -NewTessCallback(R (*function)(P1,P2,A1,A2), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_2(function, p1, p2); -} - -template -inline typename _TessFunctionResultCallback_2_2::base* -NewPermanentTessCallback(R (*function)(P1,P2,A1,A2), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_2(function, p1, p2); -} - -template -class _ConstTessMemberResultCallback_3_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _ConstTessMemberResultCallback_3_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_3_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _ConstTessMemberResultCallback_3_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2); - } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_2::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_2(obj, member, p1, p2, p3); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_2::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_2(obj, member, p1, p2, p3); -} -#endif - -template -class _TessMemberResultCallback_3_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessMemberResultCallback_3_2(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_3_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessMemberResultCallback_3_2(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2); - } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_3_2::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_2(obj, member, p1, p2, p3); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_3_2::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_2(obj, member, p1, p2, p3); -} -#endif - -template -class _TessFunctionResultCallback_3_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (*FunctionSignature)(P1,P2,P3,A1,A2); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessFunctionResultCallback_3_2(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,a1,a2); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,a1,a2); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_3_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (*FunctionSignature)(P1,P2,P3,A1,A2); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessFunctionResultCallback_3_2(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (*function_)(p1_,p2_,p3_,a1,a2); - } else { - (*function_)(p1_,p2_,p3_,a1,a2); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_3_2::base* -NewTessCallback(R (*function)(P1,P2,P3,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_2(function, p1, p2, p3); -} - -template -inline typename _TessFunctionResultCallback_3_2::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_2(function, p1, p2, p3); -} - -template -class _ConstTessMemberResultCallback_4_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _ConstTessMemberResultCallback_4_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_4_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _ConstTessMemberResultCallback_4_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_2::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_2(obj, member, p1, p2, p3, p4); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_2::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_2(obj, member, p1, p2, p3, p4); -} -#endif - -template -class _TessMemberResultCallback_4_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessMemberResultCallback_4_2(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_4_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessMemberResultCallback_4_2(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_4_2::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_2(obj, member, p1, p2, p3, p4); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_4_2::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_2(obj, member, p1, p2, p3, p4); -} -#endif - -template -class _TessFunctionResultCallback_4_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,A1,A2); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessFunctionResultCallback_4_2(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_4_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,A1,A2); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessFunctionResultCallback_4_2(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,a1,a2); - } else { - (*function_)(p1_,p2_,p3_,p4_,a1,a2); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_4_2::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_2(function, p1, p2, p3, p4); -} - -template -inline typename _TessFunctionResultCallback_4_2::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_2(function, p1, p2, p3, p4); -} - -template -class _ConstTessMemberResultCallback_5_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _ConstTessMemberResultCallback_5_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_5_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _ConstTessMemberResultCallback_5_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_2::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_2(obj, member, p1, p2, p3, p4, p5); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_2::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_2(obj, member, p1, p2, p3, p4, p5); -} -#endif - -template -class _TessMemberResultCallback_5_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessMemberResultCallback_5_2(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_5_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessMemberResultCallback_5_2(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_5_2::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_2(obj, member, p1, p2, p3, p4, p5); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_5_2::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_2(obj, member, p1, p2, p3, p4, p5); -} -#endif - -template -class _TessFunctionResultCallback_5_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessFunctionResultCallback_5_2(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_5_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessFunctionResultCallback_5_2(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2); - } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_5_2::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_2(function, p1, p2, p3, p4, p5); -} - -template -inline typename _TessFunctionResultCallback_5_2::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_2(function, p1, p2, p3, p4, p5); -} - -template -class _ConstTessMemberResultCallback_6_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _ConstTessMemberResultCallback_6_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_6_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _ConstTessMemberResultCallback_6_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_2::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_2(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_2::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_2(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -template -class _TessMemberResultCallback_6_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessMemberResultCallback_6_2(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_6_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessMemberResultCallback_6_2(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_6_2::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_2(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_6_2::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_2(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -template -class _TessFunctionResultCallback_6_2 : public TessResultCallback2 { - public: - typedef TessResultCallback2 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessFunctionResultCallback_6_2(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run(A1 a1,A2 a2) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_6_2 : public TessCallback2 { - public: - typedef TessCallback2 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessFunctionResultCallback_6_2(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run(A1 a1,A2 a2) { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); - } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_6_2::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_2(function, p1, p2, p3, p4, p5, p6); -} - -template -inline typename _TessFunctionResultCallback_6_2::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_2(function, p1, p2, p3, p4, p5, p6); -} - -template -class _ConstTessMemberResultCallback_0_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(A1,A2,A3) const; - - private: - const T* object_; - MemberSignature member_; - - public: - inline _ConstTessMemberResultCallback_0_3( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (object_->*member_)(a1,a2,a3); - return result; - } else { - R result = (object_->*member_)(a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_0_3 - : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(A1,A2,A3) const; - - private: - const T* object_; - MemberSignature member_; - - public: - inline _ConstTessMemberResultCallback_0_3( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (object_->*member_)(a1,a2,a3); - } else { - (object_->*member_)(a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_0_3::base* -NewTessCallback( - const T1* obj, R (T2::*member)(A1,A2,A3) const) { - return new _ConstTessMemberResultCallback_0_3( - obj, member); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_0_3::base* -NewPermanentTessCallback( - const T1* obj, R (T2::*member)(A1,A2,A3) const) { - return new _ConstTessMemberResultCallback_0_3( - obj, member); -} -#endif - -template -class _TessMemberResultCallback_0_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(A1,A2,A3) ; - - private: - T* object_; - MemberSignature member_; - - public: - inline _TessMemberResultCallback_0_3( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (object_->*member_)(a1,a2,a3); - return result; - } else { - R result = (object_->*member_)(a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_0_3 - : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(A1,A2,A3) ; - - private: - T* object_; - MemberSignature member_; - - public: - inline _TessMemberResultCallback_0_3( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (object_->*member_)(a1,a2,a3); - } else { - (object_->*member_)(a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_0_3::base* -NewTessCallback( - T1* obj, R (T2::*member)(A1,A2,A3)) { - return new _TessMemberResultCallback_0_3( - obj, member); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_0_3::base* -NewPermanentTessCallback( - T1* obj, R (T2::*member)(A1,A2,A3)) { - return new _TessMemberResultCallback_0_3( - obj, member); -} -#endif - -template -class _TessFunctionResultCallback_0_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (*FunctionSignature)(A1,A2,A3); - - private: - FunctionSignature function_; - - public: - inline _TessFunctionResultCallback_0_3( - FunctionSignature function) - : function_(function) { - } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (*function_)(a1,a2,a3); - return result; - } else { - R result = (*function_)(a1,a2,a3); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_0_3 - : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (*FunctionSignature)(A1,A2,A3); - - private: - FunctionSignature function_; - - public: - inline _TessFunctionResultCallback_0_3( - FunctionSignature function) - : function_(function) { - } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (*function_)(a1,a2,a3); - } else { - (*function_)(a1,a2,a3); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_0_3::base* -NewTessCallback(R (*function)(A1,A2,A3)) { - return new _TessFunctionResultCallback_0_3(function); -} - -template -inline typename _TessFunctionResultCallback_0_3::base* -NewPermanentTessCallback(R (*function)(A1,A2,A3)) { - return new _TessFunctionResultCallback_0_3(function); -} - -template -class _ConstTessMemberResultCallback_1_3 - : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,A1,A2,A3) const; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _ConstTessMemberResultCallback_1_3(T* object, - MemberSignature member, P1 p1) - : object_(object), member_(member), p1_(p1) { } - - virtual R Run(A1 a1, A2 a2, A3 a3) { - if (!del) { - R result = (object_->*member_)(p1_,a1,a2,a3); - return result; - } else { - R result = (object_->*member_)(p1_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_1_3 - : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,A1,A2,A3) const; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _ConstTessMemberResultCallback_1_3(T* object, - MemberSignature member, P1 p1) - : object_(object), member_(member), p1_(p1) { } - - virtual void Run(A1 a1, A2 a2, A3 a3) { - if (!del) { - (object_->*member_)(p1_,a1,a2,a3); - } else { - (object_->*member_)(p1_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_3::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,A1,A2,A3) , typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_3(obj, member, p1); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_3::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,A1,A2,A3) , typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_3(obj, member, p1); -} -#endif - -template -class _TessMemberResultCallback_1_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,A1,A2,A3) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _TessMemberResultCallback_1_3(T* object, - MemberSignature member, P1 p1) - : object_(object), member_(member), p1_(p1) { } - - virtual R Run(A1 a1, A2 a2, A3 a3) { - if (!del) { - R result = (object_->*member_)(p1_,a1,a2,a3); - return result; - } else { - R result = (object_->*member_)(p1_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_1_3 - : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,A1,A2,A3) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _TessMemberResultCallback_1_3(T* object, - MemberSignature member, P1 p1) - : object_(object), member_(member), p1_(p1) { } - - virtual void Run(A1 a1, A2 a2, A3 a3) { - if (!del) { - (object_->*member_)(p1_,a1,a2,a3); - } else { - (object_->*member_)(p1_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_1_3::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,A1,A2,A3) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_3(obj, member, p1); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_1_3::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,A1,A2,A3) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_3(obj, member, p1); -} -#endif - -template -class _TessFunctionResultCallback_1_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef R (*FunctionSignature)(P1,A1,A2,A3); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - - public: - inline _TessFunctionResultCallback_1_3(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } - - virtual R Run(A1 a1, A2 a2, A3 a3) { - if (!del) { - R result = (*function_)(p1_,a1,a2,a3); - return result; - } else { - R result = (*function_)(p1_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_1_3 - : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (*FunctionSignature)(P1,A1,A2,A3); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - - public: - inline _TessFunctionResultCallback_1_3(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } - - virtual void Run(A1 a1, A2 a2, A3 a3) { - if (!del) { - (*function_)(p1_,a1,a2,a3); - } else { - (*function_)(p1_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_1_3::base* -NewTessCallback(R (*function)(P1,A1,A2,A3), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_3(function, p1); -} - -template -inline typename _TessFunctionResultCallback_1_3::base* -NewPermanentTessCallback(R (*function)(P1,A1,A2,A3), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_3(function, p1); -} - -template -class _ConstTessMemberResultCallback_2_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2,A3) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _ConstTessMemberResultCallback_2_3(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_2_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2,A3) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _ConstTessMemberResultCallback_2_3(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (object_->*member_)(p1_,p2_,a1,a2,a3); - } else { - (object_->*member_)(p1_,p2_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_3::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_3(obj, member, p1, p2); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_3::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_3(obj, member, p1, p2); -} -#endif - -template -class _TessMemberResultCallback_2_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2,A3) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessMemberResultCallback_2_3(T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_2_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2,A3) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessMemberResultCallback_2_3(T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (object_->*member_)(p1_,p2_,a1,a2,a3); - } else { - (object_->*member_)(p1_,p2_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_2_3::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_3(obj, member, p1, p2); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_2_3::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_3(obj, member, p1, p2); -} -#endif - -template -class _TessFunctionResultCallback_2_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (*FunctionSignature)(P1,P2,A1,A2,A3); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessFunctionResultCallback_2_3(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (*function_)(p1_,p2_,a1,a2,a3); - return result; - } else { - R result = (*function_)(p1_,p2_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_2_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (*FunctionSignature)(P1,P2,A1,A2,A3); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessFunctionResultCallback_2_3(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (*function_)(p1_,p2_,a1,a2,a3); - } else { - (*function_)(p1_,p2_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_2_3::base* -NewTessCallback(R (*function)(P1,P2,A1,A2,A3), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_3(function, p1, p2); -} - -template -inline typename _TessFunctionResultCallback_2_3::base* -NewPermanentTessCallback(R (*function)(P1,P2,A1,A2,A3), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_3(function, p1, p2); -} - -template -class _ConstTessMemberResultCallback_3_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2,A3) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _ConstTessMemberResultCallback_3_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_3_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2,A3) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _ConstTessMemberResultCallback_3_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); - } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_3::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_3(obj, member, p1, p2, p3); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_3::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_3(obj, member, p1, p2, p3); -} -#endif - -template -class _TessMemberResultCallback_3_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2,A3) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessMemberResultCallback_3_3(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_3_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2,A3) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessMemberResultCallback_3_3(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); - } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_3_3::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_3(obj, member, p1, p2, p3); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_3_3::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_3(obj, member, p1, p2, p3); -} -#endif - -template -class _TessFunctionResultCallback_3_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (*FunctionSignature)(P1,P2,P3,A1,A2,A3); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessFunctionResultCallback_3_3(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,a1,a2,a3); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_3_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (*FunctionSignature)(P1,P2,P3,A1,A2,A3); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessFunctionResultCallback_3_3(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (*function_)(p1_,p2_,p3_,a1,a2,a3); - } else { - (*function_)(p1_,p2_,p3_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_3_3::base* -NewTessCallback(R (*function)(P1,P2,P3,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_3(function, p1, p2, p3); -} - -template -inline typename _TessFunctionResultCallback_3_3::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_3(function, p1, p2, p3); -} - -template -class _ConstTessMemberResultCallback_4_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _ConstTessMemberResultCallback_4_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_4_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _ConstTessMemberResultCallback_4_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_3::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_3(obj, member, p1, p2, p3, p4); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_3::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_3(obj, member, p1, p2, p3, p4); -} -#endif - -template -class _TessMemberResultCallback_4_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessMemberResultCallback_4_3(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_4_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessMemberResultCallback_4_3(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_4_3::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_3(obj, member, p1, p2, p3, p4); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_4_3::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_3(obj, member, p1, p2, p3, p4); -} -#endif - -template -class _TessFunctionResultCallback_4_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,A1,A2,A3); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessFunctionResultCallback_4_3(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_4_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,A1,A2,A3); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessFunctionResultCallback_4_3(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3); - } else { - (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_4_3::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_3(function, p1, p2, p3, p4); -} - -template -inline typename _TessFunctionResultCallback_4_3::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_3(function, p1, p2, p3, p4); -} - -template -class _ConstTessMemberResultCallback_5_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _ConstTessMemberResultCallback_5_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_5_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _ConstTessMemberResultCallback_5_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_3::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_3(obj, member, p1, p2, p3, p4, p5); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_3::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_3(obj, member, p1, p2, p3, p4, p5); -} -#endif - -template -class _TessMemberResultCallback_5_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessMemberResultCallback_5_3(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_5_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessMemberResultCallback_5_3(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_5_3::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_3(obj, member, p1, p2, p3, p4, p5); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_5_3::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_3(obj, member, p1, p2, p3, p4, p5); -} -#endif - -template -class _TessFunctionResultCallback_5_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2,A3); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessFunctionResultCallback_5_3(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_5_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2,A3); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessFunctionResultCallback_5_3(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); - } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_5_3::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_3(function, p1, p2, p3, p4, p5); -} - -template -inline typename _TessFunctionResultCallback_5_3::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_3(function, p1, p2, p3, p4, p5); -} - -template -class _ConstTessMemberResultCallback_6_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _ConstTessMemberResultCallback_6_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_6_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _ConstTessMemberResultCallback_6_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_3::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_3(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_3::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_3(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -template -class _TessMemberResultCallback_6_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessMemberResultCallback_6_3(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_6_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessMemberResultCallback_6_3(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_6_3::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_3(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_6_3::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_3(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -template -class _TessFunctionResultCallback_6_3 : public TessResultCallback3 { - public: - typedef TessResultCallback3 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessFunctionResultCallback_6_3(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_6_3 : public TessCallback3 { - public: - typedef TessCallback3 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessFunctionResultCallback_6_3(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run(A1 a1,A2 a2,A3 a3) { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); - } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_6_3::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_3(function, p1, p2, p3, p4, p5, p6); -} - -template -inline typename _TessFunctionResultCallback_6_3::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_3(function, p1, p2, p3, p4, p5, p6); -} - -template -class _ConstTessMemberResultCallback_0_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(A1,A2,A3,A4) const; - - private: - const T* object_; - MemberSignature member_; - - public: - inline _ConstTessMemberResultCallback_0_4(const T* object, MemberSignature member) - : object_(object), - member_(member) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (object_->*member_)(a1,a2,a3,a4); - return result; - } else { - R result = (object_->*member_)(a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_0_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(A1,A2,A3,A4) const; - - private: - const T* object_; - MemberSignature member_; - - public: - inline _ConstTessMemberResultCallback_0_4(const T* object, MemberSignature member) - : object_(object), - member_(member) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (object_->*member_)(a1,a2,a3,a4); - } else { - (object_->*member_)(a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_0_4::base* -NewTessCallback(const T1* obj, R (T2::*member)(A1,A2,A3,A4) const) { - return new _ConstTessMemberResultCallback_0_4(obj, member); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_0_4::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(A1,A2,A3,A4) const) { - return new _ConstTessMemberResultCallback_0_4(obj, member); -} -#endif - -template -class _TessMemberResultCallback_0_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(A1,A2,A3,A4) ; - - private: - T* object_; - MemberSignature member_; - - public: - inline _TessMemberResultCallback_0_4(T* object, MemberSignature member) - : object_(object), - member_(member) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (object_->*member_)(a1,a2,a3,a4); - return result; - } else { - R result = (object_->*member_)(a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_0_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(A1,A2,A3,A4) ; - - private: - T* object_; - MemberSignature member_; - - public: - inline _TessMemberResultCallback_0_4(T* object, MemberSignature member) - : object_(object), - member_(member) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (object_->*member_)(a1,a2,a3,a4); - } else { - (object_->*member_)(a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_0_4::base* -NewTessCallback(T1* obj, R (T2::*member)(A1,A2,A3,A4)) { - return new _TessMemberResultCallback_0_4(obj, member); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_0_4::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(A1,A2,A3,A4)) { - return new _TessMemberResultCallback_0_4(obj, member); -} -#endif - -template -class _TessFunctionResultCallback_0_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (*FunctionSignature)(A1,A2,A3,A4); - - private: - FunctionSignature function_; - - public: - inline _TessFunctionResultCallback_0_4(FunctionSignature function) - : function_(function) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (*function_)(a1,a2,a3,a4); - return result; - } else { - R result = (*function_)(a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_0_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (*FunctionSignature)(A1,A2,A3,A4); - - private: - FunctionSignature function_; - - public: - inline _TessFunctionResultCallback_0_4(FunctionSignature function) - : function_(function) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (*function_)(a1,a2,a3,a4); - } else { - (*function_)(a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_0_4::base* -NewTessCallback(R (*function)(A1,A2,A3,A4)) { - return new _TessFunctionResultCallback_0_4(function); -} - -template -inline typename _TessFunctionResultCallback_0_4::base* -NewPermanentTessCallback(R (*function)(A1,A2,A3,A4)) { - return new _TessFunctionResultCallback_0_4(function); -} - -template -class _ConstTessMemberResultCallback_1_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,A1,A2,A3,A4) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _ConstTessMemberResultCallback_1_4(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (object_->*member_)(p1_,a1,a2,a3,a4); - return result; - } else { - R result = (object_->*member_)(p1_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_1_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,A1,A2,A3,A4) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _ConstTessMemberResultCallback_1_4(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (object_->*member_)(p1_,a1,a2,a3,a4); - } else { - (object_->*member_)(p1_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_4::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,A1,A2,A3,A4) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_4(obj, member, p1); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_4::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,A1,A2,A3,A4) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_4(obj, member, p1); -} -#endif - -template -class _TessMemberResultCallback_1_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,A1,A2,A3,A4) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _TessMemberResultCallback_1_4(T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (object_->*member_)(p1_,a1,a2,a3,a4); - return result; - } else { - R result = (object_->*member_)(p1_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_1_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,A1,A2,A3,A4) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _TessMemberResultCallback_1_4(T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (object_->*member_)(p1_,a1,a2,a3,a4); - } else { - (object_->*member_)(p1_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_1_4::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,A1,A2,A3,A4) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_4(obj, member, p1); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_1_4::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,A1,A2,A3,A4) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_4(obj, member, p1); -} -#endif - -template -class _TessFunctionResultCallback_1_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (*FunctionSignature)(P1,A1,A2,A3,A4); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - - public: - inline _TessFunctionResultCallback_1_4(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (*function_)(p1_,a1,a2,a3,a4); - return result; - } else { - R result = (*function_)(p1_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_1_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (*FunctionSignature)(P1,A1,A2,A3,A4); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - - public: - inline _TessFunctionResultCallback_1_4(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (*function_)(p1_,a1,a2,a3,a4); - } else { - (*function_)(p1_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_1_4::base* -NewTessCallback(R (*function)(P1,A1,A2,A3,A4), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_4(function, p1); -} - -template -inline typename _TessFunctionResultCallback_1_4::base* -NewPermanentTessCallback(R (*function)(P1,A1,A2,A3,A4), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_4(function, p1); -} - -template -class _ConstTessMemberResultCallback_2_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2,A3,A4) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _ConstTessMemberResultCallback_2_4(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_2_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2,A3,A4) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _ConstTessMemberResultCallback_2_4(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4); - } else { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_4::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_4(obj, member, p1, p2); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_4::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_4(obj, member, p1, p2); -} -#endif - -template -class _TessMemberResultCallback_2_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2,A3,A4) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessMemberResultCallback_2_4(T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_2_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2,A3,A4) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessMemberResultCallback_2_4(T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4); - } else { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_2_4::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_4(obj, member, p1, p2); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_2_4::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_4(obj, member, p1, p2); -} -#endif - -template -class _TessFunctionResultCallback_2_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (*FunctionSignature)(P1,P2,A1,A2,A3,A4); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessFunctionResultCallback_2_4(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (*function_)(p1_,p2_,a1,a2,a3,a4); - return result; - } else { - R result = (*function_)(p1_,p2_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_2_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (*FunctionSignature)(P1,P2,A1,A2,A3,A4); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessFunctionResultCallback_2_4(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (*function_)(p1_,p2_,a1,a2,a3,a4); - } else { - (*function_)(p1_,p2_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_2_4::base* -NewTessCallback(R (*function)(P1,P2,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_4(function, p1, p2); -} - -template -inline typename _TessFunctionResultCallback_2_4::base* -NewPermanentTessCallback(R (*function)(P1,P2,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_4(function, p1, p2); -} - -template -class _ConstTessMemberResultCallback_3_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _ConstTessMemberResultCallback_3_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_3_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _ConstTessMemberResultCallback_3_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); - } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_4::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_4(obj, member, p1, p2, p3); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_4::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_4(obj, member, p1, p2, p3); -} -#endif - -template -class _TessMemberResultCallback_3_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessMemberResultCallback_3_4(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_3_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessMemberResultCallback_3_4(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); - } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_3_4::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_4(obj, member, p1, p2, p3); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_3_4::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_4(obj, member, p1, p2, p3); -} -#endif - -template -class _TessFunctionResultCallback_3_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (*FunctionSignature)(P1,P2,P3,A1,A2,A3,A4); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessFunctionResultCallback_3_4(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,a1,a2,a3,a4); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_3_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (*FunctionSignature)(P1,P2,P3,A1,A2,A3,A4); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessFunctionResultCallback_3_4(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (*function_)(p1_,p2_,p3_,a1,a2,a3,a4); - } else { - (*function_)(p1_,p2_,p3_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_3_4::base* -NewTessCallback(R (*function)(P1,P2,P3,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_4(function, p1, p2, p3); -} - -template -inline typename _TessFunctionResultCallback_3_4::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_4(function, p1, p2, p3); -} - -template -class _ConstTessMemberResultCallback_4_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _ConstTessMemberResultCallback_4_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_4_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _ConstTessMemberResultCallback_4_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_4::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_4(obj, member, p1, p2, p3, p4); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_4::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_4(obj, member, p1, p2, p3, p4); -} -#endif - -template -class _TessMemberResultCallback_4_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessMemberResultCallback_4_4(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_4_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessMemberResultCallback_4_4(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_4_4::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_4(obj, member, p1, p2, p3, p4); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_4_4::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_4(obj, member, p1, p2, p3, p4); -} -#endif - -template -class _TessFunctionResultCallback_4_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,A1,A2,A3,A4); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessFunctionResultCallback_4_4(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_4_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,A1,A2,A3,A4); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessFunctionResultCallback_4_4(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); - } else { - (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_4_4::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_4(function, p1, p2, p3, p4); -} - -template -inline typename _TessFunctionResultCallback_4_4::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_4(function, p1, p2, p3, p4); -} - -template -class _ConstTessMemberResultCallback_5_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _ConstTessMemberResultCallback_5_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_5_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _ConstTessMemberResultCallback_5_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_4::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_4(obj, member, p1, p2, p3, p4, p5); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_4::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_4(obj, member, p1, p2, p3, p4, p5); -} -#endif - -template -class _TessMemberResultCallback_5_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessMemberResultCallback_5_4(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_5_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessMemberResultCallback_5_4(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_5_4::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_4(obj, member, p1, p2, p3, p4, p5); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_5_4::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_4(obj, member, p1, p2, p3, p4, p5); -} -#endif - -template -class _TessFunctionResultCallback_5_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessFunctionResultCallback_5_4(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_5_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessFunctionResultCallback_5_4(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); - } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_5_4::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_4(function, p1, p2, p3, p4, p5); -} - -template -inline typename _TessFunctionResultCallback_5_4::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_4(function, p1, p2, p3, p4, p5); -} - -template -class _ConstTessMemberResultCallback_6_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _ConstTessMemberResultCallback_6_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_6_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _ConstTessMemberResultCallback_6_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_4::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_4(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_4::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_4(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -template -class _TessMemberResultCallback_6_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessMemberResultCallback_6_4(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_6_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessMemberResultCallback_6_4(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_6_4::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_4(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_6_4::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_4(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -template -class _TessFunctionResultCallback_6_4 : public TessResultCallback4 { - public: - typedef TessResultCallback4 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessFunctionResultCallback_6_4(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_6_4 : public TessCallback4 { - public: - typedef TessCallback4 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessFunctionResultCallback_6_4(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); - } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_6_4::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_4(function, p1, p2, p3, p4, p5, p6); -} - -template -inline typename _TessFunctionResultCallback_6_4::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_4(function, p1, p2, p3, p4, p5, p6); -} - -template -class TessCallback5 { - public: - virtual ~TessCallback5() { } - virtual void Run(A1,A2,A3,A4,A5) = 0; -}; - -template -class TessResultCallback5 { - public: - virtual ~TessResultCallback5() { } - virtual R Run(A1,A2,A3,A4,A5) = 0; -}; - -template -class _ConstTessMemberResultCallback_0_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(A1,A2,A3,A4,A5) const; - - private: - const T* object_; - MemberSignature member_; - - public: - inline _ConstTessMemberResultCallback_0_5(const T* object, MemberSignature member) - : object_(object), - member_(member) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (object_->*member_)(a1,a2,a3,a4,a5); - return result; - } else { - R result = (object_->*member_)(a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_0_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(A1,A2,A3,A4,A5) const; - - private: - const T* object_; - MemberSignature member_; - - public: - inline _ConstTessMemberResultCallback_0_5(const T* object, MemberSignature member) - : object_(object), - member_(member) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (object_->*member_)(a1,a2,a3,a4,a5); - } else { - (object_->*member_)(a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_0_5::base* -NewTessCallback(const T1* obj, R (T2::*member)(A1,A2,A3,A4,A5) const) { - return new _ConstTessMemberResultCallback_0_5(obj, member); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_0_5::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(A1,A2,A3,A4,A5) const) { - return new _ConstTessMemberResultCallback_0_5(obj, member); -} -#endif - -template -class _TessMemberResultCallback_0_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(A1,A2,A3,A4,A5) ; - - private: - T* object_; - MemberSignature member_; - - public: - inline _TessMemberResultCallback_0_5(T* object, MemberSignature member) - : object_(object), - member_(member) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (object_->*member_)(a1,a2,a3,a4,a5); - return result; - } else { - R result = (object_->*member_)(a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_0_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(A1,A2,A3,A4,A5) ; - - private: - T* object_; - MemberSignature member_; - - public: - inline _TessMemberResultCallback_0_5(T* object, MemberSignature member) - : object_(object), - member_(member) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (object_->*member_)(a1,a2,a3,a4,a5); - } else { - (object_->*member_)(a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_0_5::base* -NewTessCallback(T1* obj, R (T2::*member)(A1,A2,A3,A4,A5)) { - return new _TessMemberResultCallback_0_5(obj, member); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_0_5::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(A1,A2,A3,A4,A5)) { - return new _TessMemberResultCallback_0_5(obj, member); -} -#endif - -template -class _TessFunctionResultCallback_0_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (*FunctionSignature)(A1,A2,A3,A4,A5); - - private: - FunctionSignature function_; - - public: - inline _TessFunctionResultCallback_0_5(FunctionSignature function) - : function_(function) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (*function_)(a1,a2,a3,a4,a5); - return result; - } else { - R result = (*function_)(a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_0_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (*FunctionSignature)(A1,A2,A3,A4,A5); - - private: - FunctionSignature function_; - - public: - inline _TessFunctionResultCallback_0_5(FunctionSignature function) - : function_(function) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (*function_)(a1,a2,a3,a4,a5); - } else { - (*function_)(a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_0_5::base* -NewTessCallback(R (*function)(A1,A2,A3,A4,A5)) { - return new _TessFunctionResultCallback_0_5(function); -} - -template -inline typename _TessFunctionResultCallback_0_5::base* -NewPermanentTessCallback(R (*function)(A1,A2,A3,A4,A5)) { - return new _TessFunctionResultCallback_0_5(function); -} - -template -class _ConstTessMemberResultCallback_1_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,A1,A2,A3,A4,A5) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _ConstTessMemberResultCallback_1_5(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (object_->*member_)(p1_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (object_->*member_)(p1_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_1_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,A1,A2,A3,A4,A5) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _ConstTessMemberResultCallback_1_5(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (object_->*member_)(p1_,a1,a2,a3,a4,a5); - } else { - (object_->*member_)(p1_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_5::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,A1,A2,A3,A4,A5) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_5(obj, member, p1); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_5::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,A1,A2,A3,A4,A5) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_5(obj, member, p1); -} -#endif - -template -class _TessMemberResultCallback_1_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,A1,A2,A3,A4,A5) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _TessMemberResultCallback_1_5(T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (object_->*member_)(p1_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (object_->*member_)(p1_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_1_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,A1,A2,A3,A4,A5) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - - public: - inline _TessMemberResultCallback_1_5(T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (object_->*member_)(p1_,a1,a2,a3,a4,a5); - } else { - (object_->*member_)(p1_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_1_5::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,A1,A2,A3,A4,A5) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_5(obj, member, p1); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_1_5::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,A1,A2,A3,A4,A5) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_5(obj, member, p1); -} -#endif - -template -class _TessFunctionResultCallback_1_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (*FunctionSignature)(P1,A1,A2,A3,A4,A5); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - - public: - inline _TessFunctionResultCallback_1_5(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (*function_)(p1_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (*function_)(p1_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_1_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (*FunctionSignature)(P1,A1,A2,A3,A4,A5); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - - public: - inline _TessFunctionResultCallback_1_5(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (*function_)(p1_,a1,a2,a3,a4,a5); - } else { - (*function_)(p1_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_1_5::base* -NewTessCallback(R (*function)(P1,A1,A2,A3,A4,A5), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_5(function, p1); -} - -template -inline typename _TessFunctionResultCallback_1_5::base* -NewPermanentTessCallback(R (*function)(P1,A1,A2,A3,A4,A5), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_5(function, p1); -} - -template -class _ConstTessMemberResultCallback_2_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2,A3,A4,A5) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _ConstTessMemberResultCallback_2_5(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_2_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2,A3,A4,A5) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _ConstTessMemberResultCallback_2_5(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); - } else { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_5::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_5(obj, member, p1, p2); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_5::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_5(obj, member, p1, p2); -} -#endif - -template -class _TessMemberResultCallback_2_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2,A3,A4,A5) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessMemberResultCallback_2_5(T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_2_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2,A3,A4,A5) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessMemberResultCallback_2_5(T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); - } else { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_2_5::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_5(obj, member, p1, p2); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_2_5::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_5(obj, member, p1, p2); -} -#endif - -template -class _TessFunctionResultCallback_2_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (*FunctionSignature)(P1,P2,A1,A2,A3,A4,A5); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessFunctionResultCallback_2_5(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (*function_)(p1_,p2_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (*function_)(p1_,p2_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_2_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (*FunctionSignature)(P1,P2,A1,A2,A3,A4,A5); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - - public: - inline _TessFunctionResultCallback_2_5(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (*function_)(p1_,p2_,a1,a2,a3,a4,a5); - } else { - (*function_)(p1_,p2_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_2_5::base* -NewTessCallback(R (*function)(P1,P2,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_5(function, p1, p2); -} - -template -inline typename _TessFunctionResultCallback_2_5::base* -NewPermanentTessCallback(R (*function)(P1,P2,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_5(function, p1, p2); -} - -template -class _ConstTessMemberResultCallback_3_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4,A5) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _ConstTessMemberResultCallback_3_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_3_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4,A5) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _ConstTessMemberResultCallback_3_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); - } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_5::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_5(obj, member, p1, p2, p3); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_5::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_5(obj, member, p1, p2, p3); -} -#endif - -template -class _TessMemberResultCallback_3_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4,A5) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessMemberResultCallback_3_5(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_3_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4,A5) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessMemberResultCallback_3_5(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); - } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_3_5::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_5(obj, member, p1, p2, p3); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_3_5::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_5(obj, member, p1, p2, p3); -} -#endif - -template -class _TessFunctionResultCallback_3_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (*FunctionSignature)(P1,P2,P3,A1,A2,A3,A4,A5); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessFunctionResultCallback_3_5(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_3_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (*FunctionSignature)(P1,P2,P3,A1,A2,A3,A4,A5); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - - public: - inline _TessFunctionResultCallback_3_5(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (*function_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); - } else { - (*function_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_3_5::base* -NewTessCallback(R (*function)(P1,P2,P3,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_5(function, p1, p2, p3); -} - -template -inline typename _TessFunctionResultCallback_3_5::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_5(function, p1, p2, p3); -} - -template -class _ConstTessMemberResultCallback_4_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4,A5) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _ConstTessMemberResultCallback_4_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_4_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4,A5) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _ConstTessMemberResultCallback_4_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_5::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_5(obj, member, p1, p2, p3, p4); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_5::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_5(obj, member, p1, p2, p3, p4); -} -#endif - -template -class _TessMemberResultCallback_4_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4,A5) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessMemberResultCallback_4_5(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_4_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4,A5) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessMemberResultCallback_4_5(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_4_5::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_5(obj, member, p1, p2, p3, p4); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_4_5::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_5(obj, member, p1, p2, p3, p4); -} -#endif - -template -class _TessFunctionResultCallback_4_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,A1,A2,A3,A4,A5); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessFunctionResultCallback_4_5(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_4_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,A1,A2,A3,A4,A5); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - - public: - inline _TessFunctionResultCallback_4_5(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); - } else { - (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_4_5::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_5(function, p1, p2, p3, p4); -} - -template -inline typename _TessFunctionResultCallback_4_5::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_5(function, p1, p2, p3, p4); -} - -template -class _ConstTessMemberResultCallback_5_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _ConstTessMemberResultCallback_5_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_5_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _ConstTessMemberResultCallback_5_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_5::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_5(obj, member, p1, p2, p3, p4, p5); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_5::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_5(obj, member, p1, p2, p3, p4, p5); -} -#endif - -template -class _TessMemberResultCallback_5_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessMemberResultCallback_5_5(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_5_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessMemberResultCallback_5_5(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_5_5::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_5(obj, member, p1, p2, p3, p4, p5); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_5_5::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_5(obj, member, p1, p2, p3, p4, p5); -} -#endif - -template -class _TessFunctionResultCallback_5_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessFunctionResultCallback_5_5(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_5_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - - public: - inline _TessFunctionResultCallback_5_5(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); - } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_5_5::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_5(function, p1, p2, p3, p4, p5); -} - -template -inline typename _TessFunctionResultCallback_5_5::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_5(function, p1, p2, p3, p4, p5); -} - -template -class _ConstTessMemberResultCallback_6_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _ConstTessMemberResultCallback_6_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _ConstTessMemberResultCallback_6_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) const; - - private: - const T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _ConstTessMemberResultCallback_6_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_5::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_5(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -#ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_5::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_5(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -template -class _TessMemberResultCallback_6_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessMemberResultCallback_6_5(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessMemberResultCallback_6_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) ; - - private: - T* object_; - MemberSignature member_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessMemberResultCallback_6_5(T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); - } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - member_ = nullptr; - delete this; - } - } -}; - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_6_5::base* -NewTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_5(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -#ifndef SWIG -template -inline typename _TessMemberResultCallback_6_5::base* -NewPermanentTessCallback(T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_5(obj, member, p1, p2, p3, p4, p5, p6); -} -#endif - -template -class _TessFunctionResultCallback_6_5 : public TessResultCallback5 { - public: - typedef TessResultCallback5 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessFunctionResultCallback_6_5(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); - return result; - } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - return result; - } - } -}; - -template -class _TessFunctionResultCallback_6_5 : public TessCallback5 { - public: - typedef TessCallback5 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5); - - private: - FunctionSignature function_; - typename remove_reference::type p1_; - typename remove_reference::type p2_; - typename remove_reference::type p3_; - typename remove_reference::type p4_; - typename remove_reference::type p5_; - typename remove_reference::type p6_; - - public: - inline _TessFunctionResultCallback_6_5(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } - - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { - if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); - } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); - // zero out the pointer to ensure segfault if used again - function_ = nullptr; - delete this; - } - } -}; - -template -inline typename _TessFunctionResultCallback_6_5::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_5(function, p1, p2, p3, p4, p5, p6); -} - -template -inline typename _TessFunctionResultCallback_6_5::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_5(function, p1, p2, p3, p4, p5, p6); -} - -#endif // TESS_CALLBACK_SPECIALIZATIONS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/tessdatamanager.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/tessdatamanager.cpp deleted file mode 100644 index 9a7a75c9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/tessdatamanager.cpp +++ /dev/null @@ -1,275 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tessdatamanager.cpp -// Description: Functions to handle loading/combining tesseract data files. -// Author: Daria Antonova -// Created: Wed Jun 03 11:26:43 PST 2009 -// -// (C) Copyright 2009, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "tessdatamanager.h" - -#include - -#include "errcode.h" -#include "helpers.h" -#include "serialis.h" -#include "strngs.h" -#include "tprintf.h" -#include "params.h" - -namespace tesseract { - -TessdataManager::TessdataManager() : reader_(nullptr), is_loaded_(false), swap_(false) { - SetVersionString(PACKAGE_VERSION); -} - -TessdataManager::TessdataManager(FileReader reader) - : reader_(reader), - is_loaded_(false), - swap_(false) { - SetVersionString(PACKAGE_VERSION); -} - -// Lazily loads from the the given filename. Won't actually read the file -// until it needs it. -void TessdataManager::LoadFileLater(const char *data_file_name) { - Clear(); - data_file_name_ = data_file_name; -} - -bool TessdataManager::Init(const char *data_file_name) { - GenericVector data; - if (reader_ == nullptr) { - if (!LoadDataFromFile(data_file_name, &data)) return false; - } else { - if (!(*reader_)(data_file_name, &data)) return false; - } - return LoadMemBuffer(data_file_name, &data[0], data.size()); -} - -// Loads from the given memory buffer as if a file. -bool TessdataManager::LoadMemBuffer(const char *name, const char *data, - int size) { - Clear(); - data_file_name_ = name; - TFile fp; - fp.Open(data, size); - uint32_t num_entries; - if (!fp.DeSerialize(&num_entries)) return false; - swap_ = num_entries > kMaxNumTessdataEntries; - fp.set_swap(swap_); - if (swap_) ReverseN(&num_entries, sizeof(num_entries)); - if (num_entries > kMaxNumTessdataEntries) return false; - GenericVector offset_table; - offset_table.resize_no_init(num_entries); - if (!fp.DeSerialize(&offset_table[0], num_entries)) return false; - for (int i = 0; i < num_entries && i < TESSDATA_NUM_ENTRIES; ++i) { - if (offset_table[i] >= 0) { - int64_t entry_size = size - offset_table[i]; - int j = i + 1; - while (j < num_entries && offset_table[j] == -1) ++j; - if (j < num_entries) entry_size = offset_table[j] - offset_table[i]; - entries_[i].resize_no_init(entry_size); - if (!fp.DeSerialize(&entries_[i][0], entry_size)) return false; - } - } - if (entries_[TESSDATA_VERSION].empty()) { - SetVersionString("Pre-4.0.0"); - } - is_loaded_ = true; - return true; -} - -// Overwrites a single entry of the given type. -void TessdataManager::OverwriteEntry(TessdataType type, const char *data, - int size) { - is_loaded_ = true; - entries_[type].resize_no_init(size); - memcpy(&entries_[type][0], data, size); -} - -// Saves to the given filename. -bool TessdataManager::SaveFile(const STRING &filename, - FileWriter writer) const { - ASSERT_HOST(is_loaded_); - GenericVector data; - Serialize(&data); - if (writer == nullptr) - return SaveDataToFile(data, filename); - else - return (*writer)(data, filename); -} - -// Serializes to the given vector. -void TessdataManager::Serialize(GenericVector *data) const { - ASSERT_HOST(is_loaded_); - // Compute the offset_table and total size. - int64_t offset_table[TESSDATA_NUM_ENTRIES]; - int64_t offset = sizeof(int32_t) + sizeof(offset_table); - for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) { - if (entries_[i].empty()) { - offset_table[i] = -1; - } else { - offset_table[i] = offset; - offset += entries_[i].size(); - } - } - data->init_to_size(offset, 0); - int32_t num_entries = TESSDATA_NUM_ENTRIES; - TFile fp; - fp.OpenWrite(data); - fp.Serialize(&num_entries); - fp.Serialize(&offset_table[0], countof(offset_table)); - for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) { - if (!entries_[i].empty()) { - fp.Serialize(&entries_[i][0], entries_[i].size()); - } - } -} - -// Resets to the initial state, keeping the reader. -void TessdataManager::Clear() { - for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) { - entries_[i].clear(); - } - is_loaded_ = false; -} - -// Prints a directory of contents. -void TessdataManager::Directory() const { - tprintf("Version string:%s\n", VersionString().c_str()); - int offset = TESSDATA_NUM_ENTRIES * sizeof(int64_t); - for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) { - if (!entries_[i].empty()) { - tprintf("%d:%s:size=%d, offset=%d\n", i, kTessdataFileSuffixes[i], - entries_[i].size(), offset); - offset += entries_[i].size(); - } - } -} - -// Opens the given TFile pointer to the given component type. -// Returns false in case of failure. -bool TessdataManager::GetComponent(TessdataType type, TFile *fp) { - if (!is_loaded_ && !Init(data_file_name_.string())) return false; - const TessdataManager *const_this = this; - return const_this->GetComponent(type, fp); -} - -// As non-const version except it can't load the component if not already -// loaded. -bool TessdataManager::GetComponent(TessdataType type, TFile *fp) const { - ASSERT_HOST(is_loaded_); - if (entries_[type].empty()) return false; - fp->Open(&entries_[type][0], entries_[type].size()); - fp->set_swap(swap_); - return true; -} - -// Returns the current version string. -std::string TessdataManager::VersionString() const { - return std::string(&entries_[TESSDATA_VERSION][0], - entries_[TESSDATA_VERSION].size()); -} - -// Sets the version string to the given v_str. -void TessdataManager::SetVersionString(const std::string &v_str) { - entries_[TESSDATA_VERSION].resize_no_init(v_str.size()); - memcpy(&entries_[TESSDATA_VERSION][0], v_str.data(), v_str.size()); -} - -bool TessdataManager::CombineDataFiles( - const char *language_data_path_prefix, - const char *output_filename) { - // Load individual tessdata components from files. - for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) { - TessdataType type; - ASSERT_HOST(TessdataTypeFromFileSuffix(kTessdataFileSuffixes[i], &type)); - STRING filename = language_data_path_prefix; - filename += kTessdataFileSuffixes[i]; - FILE *fp = fopen(filename.string(), "rb"); - if (fp != nullptr) { - fclose(fp); - if (!LoadDataFromFile(filename, &entries_[type])) { - tprintf("Load of file %s failed!\n", filename.string()); - return false; - } - } - } - is_loaded_ = true; - - // Make sure that the required components are present. - if (!IsBaseAvailable() && !IsLSTMAvailable()) { - tprintf( - "Error: traineddata file must contain at least (a unicharset file" - "and inttemp) OR an lstm file.\n"); - return false; - } - // Write updated data to the output traineddata file. - return SaveFile(output_filename, nullptr); -} - -bool TessdataManager::OverwriteComponents( - const char *new_traineddata_filename, - char **component_filenames, - int num_new_components) { - // Open the files with the new components. - for (int i = 0; i < num_new_components; ++i) { - TessdataType type; - if (TessdataTypeFromFileName(component_filenames[i], &type)) { - if (!LoadDataFromFile(component_filenames[i], &entries_[type])) { - tprintf("Failed to read component file:%s\n", component_filenames[i]); - return false; - } - } - } - - // Write updated data to the output traineddata file. - return SaveFile(new_traineddata_filename, nullptr); -} - -bool TessdataManager::ExtractToFile(const char *filename) { - TessdataType type = TESSDATA_NUM_ENTRIES; - ASSERT_HOST( - tesseract::TessdataManager::TessdataTypeFromFileName(filename, &type)); - if (entries_[type].empty()) return false; - return SaveDataToFile(entries_[type], filename); -} - -bool TessdataManager::TessdataTypeFromFileSuffix(const char *suffix, - TessdataType *type) { - for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) { - if (strcmp(kTessdataFileSuffixes[i], suffix) == 0) { - *type = static_cast(i); - return true; - } - } - tprintf("TessdataManager can't determine which tessdata" - " component is represented by %s\n", suffix); - return false; -} - -bool TessdataManager::TessdataTypeFromFileName(const char *filename, - TessdataType *type) { - // Get the file suffix (extension) - const char *suffix = strrchr(filename, '.'); - if (suffix == nullptr || *(++suffix) == '\0') return false; - return TessdataTypeFromFileSuffix(suffix, type); -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/tessdatamanager.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/tessdatamanager.h deleted file mode 100644 index f003adb4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/tessdatamanager.h +++ /dev/null @@ -1,248 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tessdatamanager.h -// Description: Functions to handle loading/combining tesseract data files. -// Author: Daria Antonova -// Created: Wed Jun 03 11:26:43 PST 2009 -// -// (C) Copyright 2009, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_TESSDATAMANAGER_H_ -#define TESSERACT_CCUTIL_TESSDATAMANAGER_H_ - -#include "genericvector.h" - -static const char kTrainedDataSuffix[] = "traineddata"; - -// When adding new tessdata types and file suffixes, please make sure to -// update TessdataType enum, kTessdataFileSuffixes and kTessdataFileIsText. -static const char kLangConfigFileSuffix[] = "config"; -static const char kUnicharsetFileSuffix[] = "unicharset"; -static const char kAmbigsFileSuffix[] = "unicharambigs"; -static const char kBuiltInTemplatesFileSuffix[] = "inttemp"; -static const char kBuiltInCutoffsFileSuffix[] = "pffmtable"; -static const char kNormProtoFileSuffix[] = "normproto"; -static const char kPuncDawgFileSuffix[] = "punc-dawg"; -static const char kSystemDawgFileSuffix[] = "word-dawg"; -static const char kNumberDawgFileSuffix[] = "number-dawg"; -static const char kFreqDawgFileSuffix[] = "freq-dawg"; -static const char kFixedLengthDawgsFileSuffix[] = "fixed-length-dawgs"; -static const char kCubeUnicharsetFileSuffix[] = "cube-unicharset"; -static const char kCubeSystemDawgFileSuffix[] = "cube-word-dawg"; -static const char kShapeTableFileSuffix[] = "shapetable"; -static const char kBigramDawgFileSuffix[] = "bigram-dawg"; -static const char kUnambigDawgFileSuffix[] = "unambig-dawg"; -static const char kParamsModelFileSuffix[] = "params-model"; -static const char kLSTMModelFileSuffix[] = "lstm"; -static const char kLSTMPuncDawgFileSuffix[] = "lstm-punc-dawg"; -static const char kLSTMSystemDawgFileSuffix[] = "lstm-word-dawg"; -static const char kLSTMNumberDawgFileSuffix[] = "lstm-number-dawg"; -static const char kLSTMUnicharsetFileSuffix[] = "lstm-unicharset"; -static const char kLSTMRecoderFileSuffix[] = "lstm-recoder"; -static const char kVersionFileSuffix[] = "version"; - -namespace tesseract { - -enum TessdataType { - TESSDATA_LANG_CONFIG, // 0 - TESSDATA_UNICHARSET, // 1 - TESSDATA_AMBIGS, // 2 - TESSDATA_INTTEMP, // 3 - TESSDATA_PFFMTABLE, // 4 - TESSDATA_NORMPROTO, // 5 - TESSDATA_PUNC_DAWG, // 6 - TESSDATA_SYSTEM_DAWG, // 7 - TESSDATA_NUMBER_DAWG, // 8 - TESSDATA_FREQ_DAWG, // 9 - TESSDATA_FIXED_LENGTH_DAWGS, // 10 // deprecated - TESSDATA_CUBE_UNICHARSET, // 11 // deprecated - TESSDATA_CUBE_SYSTEM_DAWG, // 12 // deprecated - TESSDATA_SHAPE_TABLE, // 13 - TESSDATA_BIGRAM_DAWG, // 14 - TESSDATA_UNAMBIG_DAWG, // 15 - TESSDATA_PARAMS_MODEL, // 16 - TESSDATA_LSTM, // 17 - TESSDATA_LSTM_PUNC_DAWG, // 18 - TESSDATA_LSTM_SYSTEM_DAWG, // 19 - TESSDATA_LSTM_NUMBER_DAWG, // 20 - TESSDATA_LSTM_UNICHARSET, // 21 - TESSDATA_LSTM_RECODER, // 22 - TESSDATA_VERSION, // 23 - - TESSDATA_NUM_ENTRIES -}; - -/** - * kTessdataFileSuffixes[i] indicates the file suffix for - * tessdata of type i (from TessdataType enum). - */ -static const char *const kTessdataFileSuffixes[] = { - kLangConfigFileSuffix, // 0 - kUnicharsetFileSuffix, // 1 - kAmbigsFileSuffix, // 2 - kBuiltInTemplatesFileSuffix, // 3 - kBuiltInCutoffsFileSuffix, // 4 - kNormProtoFileSuffix, // 5 - kPuncDawgFileSuffix, // 6 - kSystemDawgFileSuffix, // 7 - kNumberDawgFileSuffix, // 8 - kFreqDawgFileSuffix, // 9 - kFixedLengthDawgsFileSuffix, // 10 // deprecated - kCubeUnicharsetFileSuffix, // 11 // deprecated - kCubeSystemDawgFileSuffix, // 12 // deprecated - kShapeTableFileSuffix, // 13 - kBigramDawgFileSuffix, // 14 - kUnambigDawgFileSuffix, // 15 - kParamsModelFileSuffix, // 16 - kLSTMModelFileSuffix, // 17 - kLSTMPuncDawgFileSuffix, // 18 - kLSTMSystemDawgFileSuffix, // 19 - kLSTMNumberDawgFileSuffix, // 20 - kLSTMUnicharsetFileSuffix, // 21 - kLSTMRecoderFileSuffix, // 22 - kVersionFileSuffix, // 23 -}; - -/** - * TessdataType could be updated to contain more entries, however - * we do not expect that number to be astronomically high. - * In order to automatically detect endianness TessdataManager will - * flip the bits if actual_tessdata_num_entries_ is larger than - * kMaxNumTessdataEntries. - */ -static const int kMaxNumTessdataEntries = 1000; - - -class TessdataManager { - public: - TessdataManager(); - explicit TessdataManager(FileReader reader); - - ~TessdataManager() = default; - - bool swap() const { return swap_; } - bool is_loaded() const { return is_loaded_; } - - // Lazily loads from the the given filename. Won't actually read the file - // until it needs it. - void LoadFileLater(const char *data_file_name); - /** - * Opens and reads the given data file right now. - * @return true on success. - */ - bool Init(const char *data_file_name); - // Loads from the given memory buffer as if a file, remembering name as some - // arbitrary source id for caching. - bool LoadMemBuffer(const char *name, const char *data, int size); - // Overwrites a single entry of the given type. - void OverwriteEntry(TessdataType type, const char *data, int size); - - // Saves to the given filename. - bool SaveFile(const STRING &filename, FileWriter writer) const; - // Serializes to the given vector. - void Serialize(GenericVector *data) const; - // Resets to the initial state, keeping the reader. - void Clear(); - - // Prints a directory of contents. - void Directory() const; - - // Returns true if the component requested is present. - bool IsComponentAvailable(TessdataType type) const { - return !entries_[type].empty(); - } - // Opens the given TFile pointer to the given component type. - // Returns false in case of failure. - bool GetComponent(TessdataType type, TFile *fp); - // As non-const version except it can't load the component if not already - // loaded. - bool GetComponent(TessdataType type, TFile *fp) const; - - // Returns the current version string. - std::string VersionString() const; - // Sets the version string to the given v_str. - void SetVersionString(const std::string &v_str); - - // Returns true if the base Tesseract components are present. - bool IsBaseAvailable() const { - return !entries_[TESSDATA_UNICHARSET].empty() && - !entries_[TESSDATA_INTTEMP].empty(); - } - - // Returns true if the LSTM components are present. - bool IsLSTMAvailable() const { return !entries_[TESSDATA_LSTM].empty(); } - - // Return the name of the underlying data file. - const STRING &GetDataFileName() const { return data_file_name_; } - - /** - * Reads all the standard tesseract config and data files for a language - * at the given path and bundles them up into one binary data file. - * Returns true if the combined traineddata file was successfully written. - */ - bool CombineDataFiles(const char *language_data_path_prefix, - const char *output_filename); - - /** - * Gets the individual components from the data_file_ with which the class was - * initialized. Overwrites the components specified by component_filenames. - * Writes the updated traineddata file to new_traineddata_filename. - */ - bool OverwriteComponents(const char *new_traineddata_filename, - char **component_filenames, - int num_new_components); - - /** - * Extracts tessdata component implied by the name of the input file from - * the combined traineddata loaded into TessdataManager. - * Writes the extracted component to the file indicated by the file name. - * E.g. if the filename given is somepath/somelang.unicharset, unicharset - * will be extracted from the data loaded into the TessdataManager and will - * be written to somepath/somelang.unicharset. - * @return true if the component was successfully extracted, false if the - * component was not present in the traineddata loaded into TessdataManager. - */ - bool ExtractToFile(const char *filename); - - /** - * Fills type with TessdataType of the tessdata component represented by the - * given file name. E.g. tessdata/eng.unicharset -> TESSDATA_UNICHARSET. - * @return true if the tessdata component type could be determined - * from the given file name. - */ - static bool TessdataTypeFromFileSuffix(const char *suffix, - TessdataType *type); - - /** - * Tries to determine tessdata component file suffix from filename, - * returns true on success. - */ - static bool TessdataTypeFromFileName(const char *filename, - TessdataType *type); - - private: - // Name of file it came from. - STRING data_file_name_; - // Function to load the file when we need it. - FileReader reader_; - // True if the file has been loaded. - bool is_loaded_; - // True if the bytes need swapping. - bool swap_; - // Contents of each element of the traineddata file. - GenericVector entries_[TESSDATA_NUM_ENTRIES]; -}; - -} // namespace tesseract - -#endif // TESSERACT_CCUTIL_TESSDATAMANAGER_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/tprintf.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/tprintf.cpp deleted file mode 100644 index a51d040d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/tprintf.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/********************************************************************** - * File: tprintf.cpp - * Description: Trace version of printf - portable between UX and NT - * Author: Phil Cheatle - * Created: Wed Jun 28 15:01:15 BST 1995 - * - * (C) Copyright 1995, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include -#include -#include "ccutil.h" -#include "params.h" -#include "strngs.h" -#include "tprintf.h" - -#define MAX_MSG_LEN 65536 - -static STRING_VAR(debug_file, "", "File to send tprintf output to"); - -// Trace printf -DLLSYM void tprintf(const char *format, ...) -{ - tesseract::tprintfMutex.Lock(); - va_list args; // variable args - static FILE *debugfp = nullptr; // debug file - // debug window - int32_t offset = 0; // into message - static char msg[MAX_MSG_LEN + 1]; - - va_start(args, format); // variable list - // Format into msg - #ifdef _WIN32 - offset += _vsnprintf(msg + offset, MAX_MSG_LEN - offset, format, args); - if (strcmp(debug_file.string(), "/dev/null") == 0) - debug_file.set_value("nul"); - #else - offset += vsnprintf(msg + offset, MAX_MSG_LEN - offset, format, args); - #endif - va_end(args); - - if (debugfp == nullptr && strlen(debug_file.string()) > 0) { - debugfp = fopen(debug_file.string(), "wb"); - } else if (debugfp != nullptr && strlen(debug_file.string()) == 0) { - fclose(debugfp); - debugfp = nullptr; - } - if (debugfp != nullptr) - fprintf(debugfp, "%s", msg); - else - fprintf(stderr, "%s", msg); - tesseract::tprintfMutex.Unlock(); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/tprintf.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/tprintf.h deleted file mode 100644 index 6622a3a2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/tprintf.h +++ /dev/null @@ -1,29 +0,0 @@ -/********************************************************************** - * File: tprintf.h - * Description: Trace version of printf - portable between UX and NT - * Author: Phil Cheatle - * Created: Wed Jun 28 15:01:15 BST 1995 - * - * (C) Copyright 1995, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_CCUTIL_TPRINTF_H -#define TESSERACT_CCUTIL_TPRINTF_H - -#include "platform.h" // for TESS_API - -// Main logging function. -extern TESS_API void tprintf( // Trace printf - const char *format, ...); // Message - -#endif // define TESSERACT_CCUTIL_TPRINTF_H diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unichar.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unichar.cpp deleted file mode 100644 index 9ac853be..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unichar.cpp +++ /dev/null @@ -1,244 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: unichar.cpp -// Description: Unicode character/ligature class. -// Author: Ray Smith -// Created: Wed Jun 28 17:05:01 PDT 2006 -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "unichar.h" -#include "errcode.h" -#include "genericvector.h" -#include "tprintf.h" - -#define UNI_MAX_LEGAL_UTF32 0x0010FFFF - -namespace tesseract { - -// Construct from a utf8 string. If len<0 then the string is null terminated. -// If the string is too long to fit in the UNICHAR then it takes only what -// will fit. Checks for illegal input and stops at an illegal sequence. -// The resulting UNICHAR may be empty. -UNICHAR::UNICHAR(const char* utf8_str, int len) { - int total_len = 0; - int step = 0; - if (len < 0) { - for (len = 0; len < UNICHAR_LEN && utf8_str[len] != 0; ++len); - } - for (total_len = 0; total_len < len; total_len += step) { - step = utf8_step(utf8_str + total_len); - if (total_len + step > UNICHAR_LEN) - break; // Too long. - if (step == 0) - break; // Illegal first byte. - int i; - for (i = 1; i < step; ++i) - if ((utf8_str[total_len + i] & 0xc0) != 0x80) - break; - if (i < step) - break; // Illegal surrogate - } - memcpy(chars, utf8_str, total_len); - if (total_len < UNICHAR_LEN) { - chars[UNICHAR_LEN - 1] = total_len; - while (total_len < UNICHAR_LEN - 1) - chars[total_len++] = 0; - } -} - -// Construct from a single UCS4 character. Illegal values are ignored, -// resulting in an empty UNICHAR. -UNICHAR::UNICHAR(int unicode) { - const int bytemask = 0xBF; - const int bytemark = 0x80; - - if (unicode < 0x80) { - chars[UNICHAR_LEN - 1] = 1; - chars[2] = 0; - chars[1] = 0; - chars[0] = static_cast(unicode); - } else if (unicode < 0x800) { - chars[UNICHAR_LEN - 1] = 2; - chars[2] = 0; - chars[1] = static_cast((unicode | bytemark) & bytemask); - unicode >>= 6; - chars[0] = static_cast(unicode | 0xc0); - } else if (unicode < 0x10000) { - chars[UNICHAR_LEN - 1] = 3; - chars[2] = static_cast((unicode | bytemark) & bytemask); - unicode >>= 6; - chars[1] = static_cast((unicode | bytemark) & bytemask); - unicode >>= 6; - chars[0] = static_cast(unicode | 0xe0); - } else if (unicode <= UNI_MAX_LEGAL_UTF32) { - chars[UNICHAR_LEN - 1] = 4; - chars[3] = static_cast((unicode | bytemark) & bytemask); - unicode >>= 6; - chars[2] = static_cast((unicode | bytemark) & bytemask); - unicode >>= 6; - chars[1] = static_cast((unicode | bytemark) & bytemask); - unicode >>= 6; - chars[0] = static_cast(unicode | 0xf0); - } else { - memset(chars, 0, UNICHAR_LEN); - } -} - -// Get the first character as UCS-4. -int UNICHAR::first_uni() const { - static const int utf8_offsets[5] = { - 0, 0, 0x3080, 0xE2080, 0x3C82080 - }; - int uni = 0; - int len = utf8_step(chars); - const char* src = chars; - - switch (len) { - default: - break; - case 4: - uni += static_cast(*src++); - uni <<= 6; - case 3: - uni += static_cast(*src++); - uni <<= 6; - case 2: - uni += static_cast(*src++); - uni <<= 6; - case 1: - uni += static_cast(*src++); - } - uni -= utf8_offsets[len]; - return uni; -} - -// Get a terminated UTF8 string: Must delete[] it after use. -char* UNICHAR::utf8_str() const { - int len = utf8_len(); - char* str = new char[len + 1]; - memcpy(str, chars, len); - str[len] = 0; - return str; -} - -// Get the number of bytes in the first character of the given utf8 string. -int UNICHAR::utf8_step(const char* utf8_str) { - static const char utf8_bytes[256] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0 - }; - - return utf8_bytes[static_cast(*utf8_str)]; -} - -UNICHAR::const_iterator& UNICHAR::const_iterator::operator++() { - ASSERT_HOST(it_ != nullptr); - int step = utf8_step(it_); - if (step == 0) { - tprintf("ERROR: Illegal UTF8 encountered.\n"); - for (int i = 0; i < 5 && it_[i] != '\0'; ++i) { - tprintf("Index %d char = 0x%x\n", i, it_[i]); - } - step = 1; - } - it_ += step; - return *this; -} - -int UNICHAR::const_iterator::operator*() const { - ASSERT_HOST(it_ != nullptr); - const int len = utf8_step(it_); - if (len == 0) { - tprintf("WARNING: Illegal UTF8 encountered\n"); - return ' '; - } - UNICHAR uch(it_, len); - return uch.first_uni(); -} - -int UNICHAR::const_iterator::get_utf8(char* utf8_output) const { - ASSERT_HOST(it_ != nullptr); - const int len = utf8_step(it_); - if (len == 0) { - tprintf("WARNING: Illegal UTF8 encountered\n"); - utf8_output[0] = ' '; - return 1; - } - strncpy(utf8_output, it_, len); - return len; -} - -int UNICHAR::const_iterator::utf8_len() const { - ASSERT_HOST(it_ != nullptr); - const int len = utf8_step(it_); - if (len == 0) { - tprintf("WARNING: Illegal UTF8 encountered\n"); - return 1; - } - return len; -} - -bool UNICHAR::const_iterator::is_legal() const { - return utf8_step(it_) > 0; -} - -UNICHAR::const_iterator UNICHAR::begin(const char* utf8_str, const int len) { - return UNICHAR::const_iterator(utf8_str); -} - -UNICHAR::const_iterator UNICHAR::end(const char* utf8_str, const int len) { - return UNICHAR::const_iterator(utf8_str + len); -} - -// Converts a utf-8 string to a vector of unicodes. -// Returns an empty vector if the input contains invalid UTF-8. -/* static */ -std::vector UNICHAR::UTF8ToUTF32(const char* utf8_str) { - const int utf8_length = strlen(utf8_str); - std::vector unicodes; - unicodes.reserve(utf8_length); - const_iterator end_it(end(utf8_str, utf8_length)); - for (const_iterator it(begin(utf8_str, utf8_length)); it != end_it; ++it) { - if (it.is_legal()) { - unicodes.push_back(*it); - } else { - unicodes.clear(); - return unicodes; - } - } - return unicodes; -} - -// Returns an empty string if the input contains an invalid unicode. -std::string UNICHAR::UTF32ToUTF8(const std::vector& str32) { - std::string utf8_str; - for (char32 ch : str32) { - UNICHAR uni_ch(ch); - int step; - if (uni_ch.utf8_len() > 0 && (step = utf8_step(uni_ch.utf8())) > 0) { - utf8_str.append(uni_ch.utf8(), step); - } else { - return ""; - } - } - return utf8_str; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unichar.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unichar.h deleted file mode 100644 index 80e5b7f7..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unichar.h +++ /dev/null @@ -1,175 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: unichar.h -// Description: Unicode character/ligature class. -// Author: Ray Smith -// Created: Wed Jun 28 17:05:01 PDT 2006 -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_UNICHAR_H_ -#define TESSERACT_CCUTIL_UNICHAR_H_ - -#include -#include -#include -#include -#include "platform.h" - -// Maximum number of characters that can be stored in a UNICHAR. Must be -// at least 4. Must not exceed 31 without changing the coding of length. -#define UNICHAR_LEN 30 - -// TODO(rays) Move these to the tesseract namespace. -// A UNICHAR_ID is the unique id of a unichar. -using UNICHAR_ID = int; - -// A variable to indicate an invalid or uninitialized unichar id. -static const int INVALID_UNICHAR_ID = -1; -// A special unichar that corresponds to INVALID_UNICHAR_ID. -static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__"; - -enum StrongScriptDirection { - DIR_NEUTRAL = 0, // Text contains only neutral characters. - DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters. - DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters. - DIR_MIX = 3, // Text contains a mixture of left-to-right - // and right-to-left characters. -}; - -namespace tesseract { - -using char32 = signed int; - -// The UNICHAR class holds a single classification result. This may be -// a single Unicode character (stored as between 1 and 4 utf8 bytes) or -// multiple Unicode characters representing the NFKC expansion of a ligature -// such as fi, ffl etc. These are also stored as utf8. -class UNICHAR { - public: - UNICHAR() { - memset(chars, 0, UNICHAR_LEN); - } - - // Construct from a utf8 string. If len<0 then the string is null terminated. - // If the string is too long to fit in the UNICHAR then it takes only what - // will fit. - UNICHAR(const char* utf8_str, int len); - - // Construct from a single UCS4 character. - explicit UNICHAR(int unicode); - - // Default copy constructor and operator= are OK. - - // Get the first character as UCS-4. - int first_uni() const; - - // Get the length of the UTF8 string. - int utf8_len() const { - int len = chars[UNICHAR_LEN - 1]; - return len >=0 && len < UNICHAR_LEN ? len : UNICHAR_LEN; - } - - // Get a UTF8 string, but NOT nullptr terminated. - const char* utf8() const { - return chars; - } - - // Get a terminated UTF8 string: Must delete[] it after use. - char* utf8_str() const; - - // Get the number of bytes in the first character of the given utf8 string. - static int utf8_step(const char* utf8_str); - - // A class to simplify iterating over and accessing elements of a UTF8 - // string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or - // take ownership of the underlying byte array. It also does not permit - // modification of the array (as the name suggests). - // - // Example: - // for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len); - // it != UNICHAR::end(str, len); - // ++it) { - // tprintf("UCS-4 symbol code = %d\n", *it); - // char buf[5]; - // int char_len = it.get_utf8(buf); buf[char_len] = '\0'; - // tprintf("Char = %s\n", buf); - // } - class const_iterator { - using CI = const_iterator ; - - public: - // Step to the next UTF8 character. - // If the current position is at an illegal UTF8 character, then print an - // error message and step by one byte. If the current position is at a nullptr - // value, don't step past it. - const_iterator& operator++(); - - // Return the UCS-4 value at the current position. - // If the current position is at an illegal UTF8 value, return a single - // space character. - int operator*() const; - - // Store the UTF-8 encoding of the current codepoint into buf, which must be - // at least 4 bytes long. Return the number of bytes written. - // If the current position is at an illegal UTF8 value, writes a single - // space character and returns 1. - // Note that this method does not null-terminate the buffer. - int get_utf8(char* buf) const; - // Returns the number of bytes of the current codepoint. Returns 1 if the - // current position is at an illegal UTF8 value. - int utf8_len() const; - // Returns true if the UTF-8 encoding at the current position is legal. - bool is_legal() const; - - // Return the pointer into the string at the current position. - const char* utf8_data() const { return it_; } - - // Iterator equality operators. - friend bool operator==(const CI& lhs, const CI& rhs) { - return lhs.it_ == rhs.it_; - } - friend bool operator!=(const CI& lhs, const CI& rhs) { - return !(lhs == rhs); - } - - private: - friend class UNICHAR; - explicit const_iterator(const char* it) : it_(it) {} - - const char* it_; // Pointer into the string. - }; - - // Create a start/end iterator pointing to a string. Note that these methods - // are static and do NOT create a copy or take ownership of the underlying - // array. - static const_iterator begin(const char* utf8_str, const int byte_length); - static const_iterator end(const char* utf8_str, const int byte_length); - - // Converts a utf-8 string to a vector of unicodes. - // Returns an empty vector if the input contains invalid UTF-8. - static std::vector UTF8ToUTF32(const char* utf8_str); - // Converts a vector of unicodes to a utf8 string. - // Returns an empty string if the input contains an invalid unicode. - static std::string UTF32ToUTF8(const std::vector& str32); - - private: - // A UTF-8 representation of 1 or more Unicode characters. - // The last element (chars[UNICHAR_LEN - 1]) is a length if - // its value < UNICHAR_LEN, otherwise it is a genuine character. - char chars[UNICHAR_LEN]; -}; - -} // namespace tesseract - -#endif // TESSERACT_CCUTIL_UNICHAR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharcompress.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharcompress.cpp deleted file mode 100644 index 64b238fc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharcompress.cpp +++ /dev/null @@ -1,426 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: unicharcompress.cpp -// Description: Unicode re-encoding using a sequence of smaller numbers in -// place of a single large code for CJK, similarly for Indic, -// and dissection of ligatures for other scripts. -// Author: Ray Smith -// Created: Wed Mar 04 14:45:01 PST 2015 -// -// (C) Copyright 2015, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "unicharcompress.h" -#include -#include -#include "tprintf.h" - -namespace tesseract { - -// String used to represent the null_id in direct_set. -const char* kNullChar = ""; -// Radix to make unique values from the stored radical codes. -const int kRadicalRadix = 29; - -// "Hash" function for const std::vector computes the sum of elements. -// Build a unique number for each code sequence that we can use as the index in -// a hash map of ints instead of trying to hash the vectors. -static int RadicalPreHash(const std::vector& rs) { - size_t result = 0; - for (int radical : rs) { - result *= kRadicalRadix; - result += radical; - } - return result; -} - -// A hash map to convert unicodes to radical encoding. -using RSMap = std::unordered_map>>; -// A hash map to count occurrences of each radical encoding. -using RSCounts = std::unordered_map; - -static bool DecodeRadicalLine(STRING* radical_data_line, RSMap* radical_map) { - if (radical_data_line->length() == 0 || (*radical_data_line)[0] == '#') - return true; - GenericVector entries; - radical_data_line->split(' ', &entries); - if (entries.size() < 2) return false; - char* end = nullptr; - int unicode = strtol(&entries[0][0], &end, 10); - if (*end != '\0') return false; - std::unique_ptr> radicals(new std::vector); - for (int i = 1; i < entries.size(); ++i) { - int radical = strtol(&entries[i][0], &end, 10); - if (*end != '\0') return false; - radicals->push_back(radical); - } - (*radical_map)[unicode] = std::move(radicals); - return true; -} - -// Helper function builds the RSMap from the radical-stroke file, which has -// already been read into a STRING. Returns false on error. -// The radical_stroke_table is non-const because it gets split and the caller -// is unlikely to want to use it again. -static bool DecodeRadicalTable(STRING* radical_data, RSMap* radical_map) { - GenericVector lines; - radical_data->split('\n', &lines); - for (int i = 0; i < lines.size(); ++i) { - if (!DecodeRadicalLine(&lines[i], radical_map)) { - tprintf("Invalid format in radical table at line %d: %s\n", i, - lines[i].string()); - return false; - } - } - return true; -} - -UnicharCompress::UnicharCompress() : code_range_(0) {} -UnicharCompress::UnicharCompress(const UnicharCompress& src) { *this = src; } -UnicharCompress::~UnicharCompress() { Cleanup(); } -UnicharCompress& UnicharCompress::operator=(const UnicharCompress& src) { - Cleanup(); - encoder_ = src.encoder_; - code_range_ = src.code_range_; - SetupDecoder(); - return *this; -} - -// Computes the encoding for the given unicharset. It is a requirement that -// the file training/langdata/radical-stroke.txt have been read into the -// input string radical_stroke_table. -// Returns false if the encoding cannot be constructed. -bool UnicharCompress::ComputeEncoding(const UNICHARSET& unicharset, int null_id, - STRING* radical_stroke_table) { - RSMap radical_map; - if (radical_stroke_table != nullptr && - !DecodeRadicalTable(radical_stroke_table, &radical_map)) - return false; - encoder_.clear(); - UNICHARSET direct_set; - // To avoid unused codes, clear the special codes from the direct_set. - direct_set.clear(); - // Always keep space as 0; - direct_set.unichar_insert(" ", OldUncleanUnichars::kTrue); - // Null char is next if we have one. - if (null_id >= 0) { - direct_set.unichar_insert(kNullChar); - } - RSCounts radical_counts; - // In the initial map, codes [0, unicharset.size()) are - // reserved for non-han/hangul sequences of 1 or more unicodes. - int hangul_offset = unicharset.size(); - // Hangul takes the next range [hangul_offset, hangul_offset + kTotalJamos). - const int kTotalJamos = kLCount + kVCount + kTCount; - // Han takes the codes beyond hangul_offset + kTotalJamos. Since it is hard - // to measure the number of radicals and strokes, initially we use the same - // code range for all 3 Han code positions, and fix them after. - int han_offset = hangul_offset + kTotalJamos; - for (int u = 0; u <= unicharset.size(); ++u) { - // We special-case allow null_id to be equal to unicharset.size() in case - // there is no space in unicharset for it. - if (u == unicharset.size() && u != null_id) break; // Finished - RecodedCharID code; - // Convert to unicodes. - std::vector unicodes; - std::string cleaned; - if (u < unicharset.size()) - cleaned = UNICHARSET::CleanupString(unicharset.id_to_unichar(u)); - if (u < unicharset.size() && - (unicodes = UNICHAR::UTF8ToUTF32(cleaned.c_str())).size() == 1) { - // Check single unicodes for Hangul/Han and encode if so. - int unicode = unicodes[0]; - int leading, vowel, trailing; - auto it = radical_map.find(unicode); - if (it != radical_map.end()) { - // This is Han. Use the radical codes directly. - int num_radicals = it->second->size(); - for (int c = 0; c < num_radicals; ++c) { - code.Set(c, han_offset + (*it->second)[c]); - } - int pre_hash = RadicalPreHash(*it->second); - int num_samples = radical_counts[pre_hash]++; - if (num_samples > 0) - code.Set(num_radicals, han_offset + num_samples + kRadicalRadix); - } else if (DecomposeHangul(unicode, &leading, &vowel, &trailing)) { - // This is Hangul. Since we know the exact size of each part at compile - // time, it gets the bottom set of codes. - code.Set3(leading + hangul_offset, vowel + kLCount + hangul_offset, - trailing + kLCount + kVCount + hangul_offset); - } - } - // If the code is still empty, it wasn't Han or Hangul. - if (code.length() == 0) { - // Special cases. - if (u == UNICHAR_SPACE) { - code.Set(0, 0); // Space. - } else if (u == null_id || (unicharset.has_special_codes() && - u < SPECIAL_UNICHAR_CODES_COUNT)) { - code.Set(0, direct_set.unichar_to_id(kNullChar)); - } else { - // Add the direct_set unichar-ids of the unicodes in sequence to the - // code. - for (int i = 0; i < unicodes.size(); ++i) { - int position = code.length(); - if (position >= RecodedCharID::kMaxCodeLen) { - tprintf("Unichar %d=%s is too long to encode!!\n", u, - unicharset.id_to_unichar(u)); - return false; - } - int uni = unicodes[i]; - UNICHAR unichar(uni); - char* utf8 = unichar.utf8_str(); - if (!direct_set.contains_unichar(utf8)) - direct_set.unichar_insert(utf8); - code.Set(position, direct_set.unichar_to_id(utf8)); - delete[] utf8; - if (direct_set.size() > - unicharset.size() + !unicharset.has_special_codes()) { - // Code space got bigger! - tprintf("Code space expanded from original unicharset!!\n"); - return false; - } - } - } - } - encoder_.push_back(code); - } - // Now renumber Han to make all codes unique. We already added han_offset to - // all Han. Now separate out the radical, stroke, and count codes for Han. - int code_offset = 0; - for (int i = 0; i < RecodedCharID::kMaxCodeLen; ++i) { - int max_offset = 0; - for (int u = 0; u < unicharset.size(); ++u) { - RecodedCharID* code = &encoder_[u]; - if (code->length() <= i) continue; - max_offset = std::max(max_offset, (*code)(i)-han_offset); - code->Set(i, (*code)(i) + code_offset); - } - if (max_offset == 0) break; - code_offset += max_offset + 1; - } - DefragmentCodeValues(null_id >= 0 ? 1 : -1); - SetupDecoder(); - return true; -} - -// Sets up an encoder that doesn't change the unichars at all, so it just -// passes them through unchanged. -void UnicharCompress::SetupPassThrough(const UNICHARSET& unicharset) { - GenericVector codes; - for (int u = 0; u < unicharset.size(); ++u) { - RecodedCharID code; - code.Set(0, u); - codes.push_back(code); - } - if (!unicharset.has_special_codes()) { - RecodedCharID code; - code.Set(0, unicharset.size()); - codes.push_back(code); - } - SetupDirect(codes); -} - -// Sets up an encoder directly using the given encoding vector, which maps -// unichar_ids to the given codes. -void UnicharCompress::SetupDirect(const GenericVector& codes) { - encoder_ = codes; - ComputeCodeRange(); - SetupDecoder(); -} - -// Renumbers codes to eliminate unused values. -void UnicharCompress::DefragmentCodeValues(int encoded_null) { - // There may not be any Hangul, but even if there is, it is possible that not - // all codes are used. Likewise with the Han encoding, it is possible that not - // all numbers of strokes are used. - ComputeCodeRange(); - GenericVector offsets; - offsets.init_to_size(code_range_, 0); - // Find which codes are used - for (int c = 0; c < encoder_.size(); ++c) { - const RecodedCharID& code = encoder_[c]; - for (int i = 0; i < code.length(); ++i) { - offsets[code(i)] = 1; - } - } - // Compute offsets based on code use. - int offset = 0; - for (int i = 0; i < offsets.size(); ++i) { - // If not used, decrement everything above here. - // We are moving encoded_null to the end, so it is not "used". - if (offsets[i] == 0 || i == encoded_null) { - --offset; - } else { - offsets[i] = offset; - } - } - if (encoded_null >= 0) { - // The encoded_null is moving to the end, for the benefit of TensorFlow, - // which is offsets.size() + offsets.back(). - offsets[encoded_null] = offsets.size() + offsets.back() - encoded_null; - } - // Now apply the offsets. - for (int c = 0; c < encoder_.size(); ++c) { - RecodedCharID* code = &encoder_[c]; - for (int i = 0; i < code->length(); ++i) { - int value = (*code)(i); - code->Set(i, value + offsets[value]); - } - } - ComputeCodeRange(); -} - -// Encodes a single unichar_id. Returns the length of the code, or zero if -// invalid input, and the encoding itself -int UnicharCompress::EncodeUnichar(int unichar_id, RecodedCharID* code) const { - if (unichar_id < 0 || unichar_id >= encoder_.size()) return 0; - *code = encoder_[unichar_id]; - return code->length(); -} - -// Decodes code, returning the original unichar-id, or -// INVALID_UNICHAR_ID if the input is invalid. -int UnicharCompress::DecodeUnichar(const RecodedCharID& code) const { - int len = code.length(); - if (len <= 0 || len > RecodedCharID::kMaxCodeLen) return INVALID_UNICHAR_ID; - auto it = decoder_.find(code); - if (it == decoder_.end()) return INVALID_UNICHAR_ID; - return it->second; -} - -// Writes to the given file. Returns false in case of error. -bool UnicharCompress::Serialize(TFile* fp) const { - return encoder_.SerializeClasses(fp); -} - -// Reads from the given file. Returns false in case of error. -bool UnicharCompress::DeSerialize(TFile* fp) { - if (!encoder_.DeSerializeClasses(fp)) return false; - ComputeCodeRange(); - SetupDecoder(); - return true; -} - -// Returns a STRING containing a text file that describes the encoding thus: -// [,]* -// In words, a comma-separated list of one or more indices, followed by a tab -// and the UTF-8 string that the code represents per line. Most simple scripts -// will encode a single index to a UTF8-string, but Chinese, Japanese, Korean -// and the Indic scripts will contain a many-to-many mapping. -// See the class comment above for details. -STRING UnicharCompress::GetEncodingAsString( - const UNICHARSET& unicharset) const { - STRING encoding; - for (int c = 0; c < encoder_.size(); ++c) { - const RecodedCharID& code = encoder_[c]; - if (0 < c && c < SPECIAL_UNICHAR_CODES_COUNT && code == encoder_[c - 1]) { - // Don't show the duplicate entry. - continue; - } - encoding.add_str_int("", code(0)); - for (int i = 1; i < code.length(); ++i) { - encoding.add_str_int(",", code(i)); - } - encoding += "\t"; - if (c >= unicharset.size() || (0 < c && c < SPECIAL_UNICHAR_CODES_COUNT && - unicharset.has_special_codes())) { - encoding += kNullChar; - } else { - encoding += unicharset.id_to_unichar(c); - } - encoding += "\n"; - } - return encoding; -} - -// Helper decomposes a Hangul unicode to 3 parts, leading, vowel, trailing. -// Note that the returned values are 0-based indices, NOT unicode Jamo. -// Returns false if the input is not in the Hangul unicode range. -/* static */ -bool UnicharCompress::DecomposeHangul(int unicode, int* leading, int* vowel, - int* trailing) { - if (unicode < kFirstHangul) return false; - int offset = unicode - kFirstHangul; - if (offset >= kNumHangul) return false; - const int kNCount = kVCount * kTCount; - *leading = offset / kNCount; - *vowel = (offset % kNCount) / kTCount; - *trailing = offset % kTCount; - return true; -} - -// Computes the value of code_range_ from the encoder_. -void UnicharCompress::ComputeCodeRange() { - code_range_ = -1; - for (int c = 0; c < encoder_.size(); ++c) { - const RecodedCharID& code = encoder_[c]; - for (int i = 0; i < code.length(); ++i) { - if (code(i) > code_range_) code_range_ = code(i); - } - } - ++code_range_; -} - -// Initializes the decoding hash_map from the encoding array. -void UnicharCompress::SetupDecoder() { - Cleanup(); - is_valid_start_.init_to_size(code_range_, false); - for (int c = 0; c < encoder_.size(); ++c) { - const RecodedCharID& code = encoder_[c]; - decoder_[code] = c; - is_valid_start_[code(0)] = true; - RecodedCharID prefix = code; - int len = code.length() - 1; - prefix.Truncate(len); - auto final_it = final_codes_.find(prefix); - if (final_it == final_codes_.end()) { - GenericVectorEqEq* code_list = new GenericVectorEqEq; - code_list->push_back(code(len)); - final_codes_[prefix] = code_list; - while (--len >= 0) { - prefix.Truncate(len); - auto next_it = next_codes_.find(prefix); - if (next_it == next_codes_.end()) { - GenericVectorEqEq* code_list = new GenericVectorEqEq; - code_list->push_back(code(len)); - next_codes_[prefix] = code_list; - } else { - // We still have to search the list as we may get here via multiple - // lengths of code. - if (!next_it->second->contains(code(len))) - next_it->second->push_back(code(len)); - break; // This prefix has been processed. - } - } - } else { - if (!final_it->second->contains(code(len))) - final_it->second->push_back(code(len)); - } - } -} - -// Frees allocated memory. -void UnicharCompress::Cleanup() { - decoder_.clear(); - is_valid_start_.clear(); - for (auto it = next_codes_.begin(); it != next_codes_.end(); ++it) { - delete it->second; - } - for (auto it = final_codes_.begin(); it != final_codes_.end(); ++it) { - delete it->second; - } - next_codes_.clear(); - final_codes_.clear(); -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharcompress.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharcompress.h deleted file mode 100644 index 66dd2943..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharcompress.h +++ /dev/null @@ -1,239 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: unicharcompress.h -// Description: Unicode re-encoding using a sequence of smaller numbers in -// place of a single large code for CJK, similarly for Indic, -// and dissection of ligatures for other scripts. -// Author: Ray Smith -// Created: Wed Mar 04 14:45:01 PST 2015 -// -// (C) Copyright 2015, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_UNICHARCOMPRESS_H_ -#define TESSERACT_CCUTIL_UNICHARCOMPRESS_H_ - -#include - -#include "serialis.h" -#include "strngs.h" -#include "unicharset.h" - -namespace tesseract { - -// Trivial class to hold the code for a recoded unichar-id. -class RecodedCharID { - public: - // The maximum length of a code. - static const int kMaxCodeLen = 9; - - RecodedCharID() : self_normalized_(1), length_(0) { - memset(code_, 0, sizeof(code_)); - } - void Truncate(int length) { length_ = length; } - // Sets the code value at the given index in the code. - void Set(int index, int value) { - code_[index] = value; - if (length_ <= index) length_ = index + 1; - } - // Shorthand for setting codes of length 3, as all Hangul and Han codes are - // length 3. - void Set3(int code0, int code1, int code2) { - length_ = 3; - code_[0] = code0; - code_[1] = code1; - code_[2] = code2; - } - // Accessors - int length() const { return length_; } - int operator()(int index) const { return code_[index]; } - - // Writes to the given file. Returns false in case of error. - bool Serialize(TFile* fp) const { - return fp->Serialize(&self_normalized_) && - fp->Serialize(&length_) && - fp->Serialize(&code_[0], length_); - } - // Reads from the given file. Returns false in case of error. - bool DeSerialize(TFile* fp) { - return fp->DeSerialize(&self_normalized_) && - fp->DeSerialize(&length_) && - fp->DeSerialize(&code_[0], length_); - } - bool operator==(const RecodedCharID& other) const { - if (length_ != other.length_) return false; - for (int i = 0; i < length_; ++i) { - if (code_[i] != other.code_[i]) return false; - } - return true; - } - // Hash functor for RecodedCharID. - struct RecodedCharIDHash { - size_t operator()(const RecodedCharID& code) const { - size_t result = 0; - for (int i = 0; i < code.length_; ++i) { - result ^= code(i) << (7 * i); - } - return result; - } - }; - - private: - // True if this code is self-normalizing, ie is the master entry for indices - // that map to the same code. Has boolean value, but int8_t for serialization. - int8_t self_normalized_; - // The number of elements in use in code_; - int32_t length_; - // The re-encoded form of the unichar-id to which this RecodedCharID relates. - int32_t code_[kMaxCodeLen]; -}; - -// Class holds a "compression" of a unicharset to simplify the learning problem -// for a neural-network-based classifier. -// Objectives: -// 1 (CJK): Ids of a unicharset with a large number of classes are expressed as -// a sequence of 3 codes with much fewer values. -// This is achieved using the Jamo coding for Hangul and the Unicode -// Radical-Stroke-index for Han. -// 2 (Indic): Instead of thousands of codes with one for each grapheme, re-code -// as the unicode sequence (but coded in a more compact space). -// 3 (the rest): Eliminate multi-path problems with ligatures and fold confusing -// and not significantly distinct shapes (quotes) together, ie -// represent the fi ligature as the f-i pair, and fold u+2019 and -// friends all onto ascii single ' -// 4 The null character and mapping to target activations: -// To save horizontal coding space, the compressed codes are generally mapped -// to target network activations without intervening null characters, BUT -// in the case of ligatures, such as ff, null characters have to be included -// so existence of repeated codes is detected at codebook-building time, and -// null characters are embedded directly into the codes, so the rest of the -// system doesn't need to worry about the problem (much). There is still an -// effect on the range of ways in which the target activations can be -// generated. -// -// The computed code values are compact (no unused values), and, for CJK, -// unique (each code position uses a disjoint set of values from each other code -// position). For non-CJK, the same code value CAN be used in multiple -// positions, eg the ff ligature is converted to , where -// is the same code as is used for the single f. -class UnicharCompress { - public: - UnicharCompress(); - UnicharCompress(const UnicharCompress& src); - ~UnicharCompress(); - UnicharCompress& operator=(const UnicharCompress& src); - - // The 1st Hangul unicode. - static const int kFirstHangul = 0xac00; - // The number of Hangul unicodes. - static const int kNumHangul = 11172; - // The number of Jamos for each of the 3 parts of a Hangul character, being - // the Leading consonant, Vowel and Trailing consonant. - static const int kLCount = 19; - static const int kVCount = 21; - static const int kTCount = 28; - - // Computes the encoding for the given unicharset. It is a requirement that - // the file training/langdata/radical-stroke.txt have been read into the - // input string radical_stroke_table. - // Returns false if the encoding cannot be constructed. - bool ComputeEncoding(const UNICHARSET& unicharset, int null_id, - STRING* radical_stroke_table); - // Sets up an encoder that doesn't change the unichars at all, so it just - // passes them through unchanged. - void SetupPassThrough(const UNICHARSET& unicharset); - // Sets up an encoder directly using the given encoding vector, which maps - // unichar_ids to the given codes. - void SetupDirect(const GenericVector& codes); - - // Returns the number of different values that can be used in a code, ie - // 1 + the maximum value that will ever be used by an RecodedCharID code in - // any position in its array. - int code_range() const { return code_range_; } - - // Encodes a single unichar_id. Returns the length of the code, (or zero if - // invalid input), and the encoding itself in code. - int EncodeUnichar(int unichar_id, RecodedCharID* code) const; - // Decodes code, returning the original unichar-id, or - // INVALID_UNICHAR_ID if the input is invalid. - int DecodeUnichar(const RecodedCharID& code) const; - // Returns true if the given code is a valid start or single code. - bool IsValidFirstCode(int code) const { return is_valid_start_[code]; } - // Returns a list of valid non-final next codes for a given prefix code, - // which may be empty. - const GenericVector* GetNextCodes(const RecodedCharID& code) const { - auto it = next_codes_.find(code); - return it == next_codes_.end() ? nullptr : it->second; - } - // Returns a list of valid final codes for a given prefix code, which may - // be empty. - const GenericVector* GetFinalCodes(const RecodedCharID& code) const { - auto it = final_codes_.find(code); - return it == final_codes_.end() ? nullptr : it->second; - } - - // Writes to the given file. Returns false in case of error. - bool Serialize(TFile* fp) const; - // Reads from the given file. Returns false in case of error. - - bool DeSerialize(TFile* fp); - - // Returns a STRING containing a text file that describes the encoding thus: - // [,]* - // In words, a comma-separated list of one or more indices, followed by a tab - // and the UTF-8 string that the code represents per line. Most simple scripts - // will encode a single index to a UTF8-string, but Chinese, Japanese, Korean - // and the Indic scripts will contain a many-to-many mapping. - // See the class comment above for details. - STRING GetEncodingAsString(const UNICHARSET& unicharset) const; - - // Helper decomposes a Hangul unicode to 3 parts, leading, vowel, trailing. - // Note that the returned values are 0-based indices, NOT unicode Jamo. - // Returns false if the input is not in the Hangul unicode range. - static bool DecomposeHangul(int unicode, int* leading, int* vowel, - int* trailing); - - private: - // Renumbers codes to eliminate unused values. - void DefragmentCodeValues(int encoded_null); - // Computes the value of code_range_ from the encoder_. - void ComputeCodeRange(); - // Initializes the decoding hash_map from the encoder_ array. - void SetupDecoder(); - // Frees allocated memory. - void Cleanup(); - - // The encoder that maps a unichar-id to a sequence of small codes. - // encoder_ is the only part that is serialized. The rest is computed on load. - GenericVector encoder_; - // Decoder converts the output of encoder back to a unichar-id. - std::unordered_map - decoder_; - // True if the index is a valid single or start code. - GenericVector is_valid_start_; - // Maps a prefix code to a list of valid next codes. - // The map owns the vectors. - std::unordered_map*, - RecodedCharID::RecodedCharIDHash> - next_codes_; - // Maps a prefix code to a list of valid final codes. - // The map owns the vectors. - std::unordered_map*, - RecodedCharID::RecodedCharIDHash> - final_codes_; - // Max of any value in encoder_ + 1. - int code_range_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_CCUTIL_UNICHARCOMPRESS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharmap.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharmap.cpp deleted file mode 100644 index 4f7b266f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharmap.cpp +++ /dev/null @@ -1,130 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: unicharmap.cpp -// Description: Unicode character/ligature to integer id class. -// Author: Thomas Kielbus -// Created: Wed Jun 28 17:05:01 PDT 2006 -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include -#include "unichar.h" -#include "host.h" -#include "unicharmap.h" - -UNICHARMAP::UNICHARMAP() : -nodes(nullptr) { -} - -UNICHARMAP::~UNICHARMAP() { - delete[] nodes; -} - -// Search the given unichar representation in the tree, using length characters -// from it maximum. Each character in the string is interpreted as an index in -// an array of nodes. -UNICHAR_ID UNICHARMAP::unichar_to_id(const char* const unichar_repr, - int length) const { - UNICHARMAP_NODE* current_nodes = nodes; - - assert(*unichar_repr != '\0'); - assert(length > 0 && length <= UNICHAR_LEN); - - int index = 0; - if (index >= length || unichar_repr[index] == '\0') return INVALID_UNICHAR_ID; - do { - if (index + 1 >= length || unichar_repr[index + 1] == '\0') - return current_nodes[static_cast(unichar_repr[index])].id; - current_nodes = - current_nodes[static_cast(unichar_repr[index])].children; - ++index; - } while (true); -} - -// Search the given unichar representation in the tree, creating the possibly -// missing nodes. Once the right place has been found, insert the given id and -// update the inserted flag to keep track of the insert. Each character in the -// string is interpreted as an index in an array of nodes. -void UNICHARMAP::insert(const char* const unichar_repr, UNICHAR_ID id) { - const char* current_char = unichar_repr; - if (*current_char == '\0') return; - UNICHARMAP_NODE** current_nodes_pointer = &nodes; - do { - if (*current_nodes_pointer == nullptr) - *current_nodes_pointer = new UNICHARMAP_NODE[256]; - if (current_char[1] == '\0') { - (*current_nodes_pointer) - [static_cast(*current_char)].id = id; - return; - } - current_nodes_pointer = - &((*current_nodes_pointer) - [static_cast(*current_char)].children); - ++current_char; - } while (true); -} - -// Search the given unichar representation in the tree, using length characters -// from it maximum. Each character in the string is interpreted as an index in -// an array of nodes. Stop once the tree does not have anymore nodes or once we -// found the right unichar_repr. -bool UNICHARMAP::contains(const char* const unichar_repr, - int length) const { - if (unichar_repr == nullptr || *unichar_repr == '\0') return false; - if (length <= 0 || length > UNICHAR_LEN) return false; - int index = 0; - if (unichar_repr[index] == '\0') return false; - UNICHARMAP_NODE* current_nodes = nodes; - - while (current_nodes != nullptr && index + 1 < length && - unichar_repr[index + 1] != '\0') { - current_nodes = - current_nodes[static_cast(unichar_repr[index])].children; - ++index; - } - return current_nodes != nullptr && - (index + 1 >= length || unichar_repr[index + 1] == '\0') && - current_nodes[static_cast(unichar_repr[index])].id >= 0; -} - -// Return the minimum number of characters that must be used from this string -// to obtain a match in the UNICHARMAP. -int UNICHARMAP::minmatch(const char* const unichar_repr) const { - const char* current_char = unichar_repr; - if (*current_char == '\0') return 0; - UNICHARMAP_NODE* current_nodes = nodes; - - while (current_nodes != nullptr && *current_char != '\0') { - if (current_nodes[static_cast(*current_char)].id >= 0) - return current_char + 1 - unichar_repr; - current_nodes = - current_nodes[static_cast(*current_char)].children; - ++current_char; - } - return 0; -} - -void UNICHARMAP::clear() { - delete[] nodes; - nodes = nullptr; -} - -UNICHARMAP::UNICHARMAP_NODE::UNICHARMAP_NODE() : -children(nullptr), -id(-1) { -} - -// Recursively delete the children -UNICHARMAP::UNICHARMAP_NODE::~UNICHARMAP_NODE() { - delete[] children; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharmap.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharmap.h deleted file mode 100644 index 45170c4f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharmap.h +++ /dev/null @@ -1,73 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: unicharmap.h -// Description: Unicode character/ligature to integer id class. -// Author: Thomas Kielbus -// Created: Wed Jun 28 17:05:01 PDT 2006 -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_UNICHARMAP_H_ -#define TESSERACT_CCUTIL_UNICHARMAP_H_ - -#include "unichar.h" - -// A UNICHARMAP stores unique unichars. Each of them is associated with one -// UNICHAR_ID. -class UNICHARMAP { - public: - - // Create an empty UNICHARMAP - UNICHARMAP(); - - ~UNICHARMAP(); - - // Insert the given unichar represention in the UNICHARMAP and associate it - // with the given id. The length of the representation MUST be non-zero. - void insert(const char* const unichar_repr, UNICHAR_ID id); - - // Return the id associated with the given unichar representation, - // this representation MUST exist within the UNICHARMAP. The first - // length characters (maximum) from unichar_repr are used. The length - // MUST be non-zero. - UNICHAR_ID unichar_to_id(const char* const unichar_repr, int length) const; - - // Return true if the given unichar representation is already present in the - // UNICHARMAP. The first length characters (maximum) from unichar_repr are - // used. The length MUST be non-zero. - bool contains(const char* const unichar_repr, int length) const; - - // Return the minimum number of characters that must be used from this string - // to obtain a match in the UNICHARMAP. - int minmatch(const char* const unichar_repr) const; - - // Clear the UNICHARMAP. All previous data is lost. - void clear(); - - private: - - // The UNICHARMAP is represented as a tree whose nodes are of type - // UNICHARMAP_NODE. - struct UNICHARMAP_NODE { - - UNICHARMAP_NODE(); - ~UNICHARMAP_NODE(); - - UNICHARMAP_NODE* children; - UNICHAR_ID id; - }; - - UNICHARMAP_NODE* nodes; -}; - -#endif // TESSERACT_CCUTIL_UNICHARMAP_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharset.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharset.cpp deleted file mode 100644 index 875c5667..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharset.cpp +++ /dev/null @@ -1,1144 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: unicharset.cpp -// Description: Unicode character/ligature set class. -// Author: Thomas Kielbus -// Created: Wed Jun 28 17:05:01 PDT 2006 -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "unicharset.h" - -#include -#include -#include -#include - -#include "params.h" -#include "serialis.h" -#include "tesscallback.h" -#include "tprintf.h" -#include "unichar.h" - -// TODO(rays) Move UNICHARSET to tesseract namespace. -using tesseract::char32; -using tesseract::UNICHAR; - -// Special character used in representing character fragments. -static const char kSeparator = '|'; -// Special character used in representing 'natural' character fragments. -static const char kNaturalFlag = 'n'; - -static const int ISALPHA_MASK = 0x1; -static const int ISLOWER_MASK = 0x2; -static const int ISUPPER_MASK = 0x4; -static const int ISDIGIT_MASK = 0x8; -static const int ISPUNCTUATION_MASK = 0x10; - -// Y coordinate threshold for determining cap-height vs x-height. -// TODO(rays) Bring the global definition down to the ccutil library level, -// so this constant is relative to some other constants. -static const int kMeanlineThreshold = 220; -// Let C be the number of alpha chars for which all tops exceed -// kMeanlineThreshold, and X the number of alpha chars for which all -// tops are below kMeanlineThreshold, then if X > C * -// kMinXHeightFraction and C > X * kMinCapHeightFraction or more than -// half the alpha characters have upper or lower case, then the -// unicharset "has x-height". -const double kMinXHeightFraction = 0.25; -const double kMinCapHeightFraction = 0.05; - -/*static */ -const char* UNICHARSET::kCustomLigatures[][2] = { - {"ct", "\uE003"}, // c + t -> U+E003 - {"ſh", "\uE006"}, // long-s + h -> U+E006 - {"ſi", "\uE007"}, // long-s + i -> U+E007 - {"ſl", "\uE008"}, // long-s + l -> U+E008 - {"ſſ", "\uE009"}, // long-s + long-s -> U+E009 - {nullptr, nullptr} -}; - -// List of mappings to make when ingesting strings from the outside. -// The substitutions clean up text that should exist for rendering of -// synthetic data, but not in the recognition set. -const char* UNICHARSET::kCleanupMaps[][2] = { - {"\u0640", ""}, // TATWEEL is deleted. - {"\ufb01", "fi"}, // fi ligature->fi pair. - {"\ufb02", "fl"}, // fl ligature->fl pair. - {nullptr, nullptr}}; - -// List of strings for the SpecialUnicharCodes. Keep in sync with the enum. -const char* UNICHARSET::kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT] = { - " ", - "Joined", - "|Broken|0|1" -}; - -const char* UNICHARSET::null_script = "NULL"; - -UNICHARSET::UNICHAR_PROPERTIES::UNICHAR_PROPERTIES() { - Init(); -} - -// Initialize all properties to sensible default values. -void UNICHARSET::UNICHAR_PROPERTIES::Init() { - isalpha = false; - islower = false; - isupper = false; - isdigit = false; - ispunctuation = false; - isngram = false; - enabled = false; - SetRangesOpen(); - script_id = 0; - other_case = 0; - mirror = 0; - normed = ""; - direction = UNICHARSET::U_LEFT_TO_RIGHT; - fragment = nullptr; -} - -// Sets all ranges wide open. Initialization default in case there are -// no useful values available. -void UNICHARSET::UNICHAR_PROPERTIES::SetRangesOpen() { - min_bottom = 0; - max_bottom = UINT8_MAX; - min_top = 0; - max_top = UINT8_MAX; - width = 0.0f; - width_sd = 0.0f; - bearing = 0.0f; - bearing_sd = 0.0f; - advance = 0.0f; - advance_sd = 0.0f; -} - -// Sets all ranges to empty. Used before expanding with font-based data. -void UNICHARSET::UNICHAR_PROPERTIES::SetRangesEmpty() { - min_bottom = UINT8_MAX; - max_bottom = 0; - min_top = UINT8_MAX; - max_top = 0; - width = 0.0f; - width_sd = 0.0f; - bearing = 0.0f; - bearing_sd = 0.0f; - advance = 0.0f; - advance_sd = 0.0f; -} - -// Returns true if any of the top/bottom/width/bearing/advance ranges/stats -// is empty. -bool UNICHARSET::UNICHAR_PROPERTIES::AnyRangeEmpty() const { - return width == 0.0f || advance == 0.0f; -} - -// Expands the ranges with the ranges from the src properties. -void UNICHARSET::UNICHAR_PROPERTIES::ExpandRangesFrom( - const UNICHAR_PROPERTIES& src) { - UpdateRange(src.min_bottom, &min_bottom, &max_bottom); - UpdateRange(src.max_bottom, &min_bottom, &max_bottom); - UpdateRange(src.min_top, &min_top, &max_top); - UpdateRange(src.max_top, &min_top, &max_top); - if (src.width_sd > width_sd) { - width = src.width; - width_sd = src.width_sd; - } - if (src.bearing_sd > bearing_sd) { - bearing = src.bearing; - bearing_sd = src.bearing_sd; - } - if (src.advance_sd > advance_sd) { - advance = src.advance; - advance_sd = src.advance_sd; - } -} - -// Copies the properties from src into this. -void UNICHARSET::UNICHAR_PROPERTIES::CopyFrom(const UNICHAR_PROPERTIES& src) { - // Apart from the fragment, everything else can be done with a default copy. - CHAR_FRAGMENT* saved_fragment = fragment; - *this = src; // Bitwise copy. - fragment = saved_fragment; -} - -UNICHARSET::UNICHARSET() : - unichars(nullptr), - ids(), - size_used(0), - size_reserved(0), - script_table(nullptr), - script_table_size_used(0) { - clear(); - for (int i = 0; i < SPECIAL_UNICHAR_CODES_COUNT; ++i) { - unichar_insert(kSpecialUnicharCodes[i]); - if (i == UNICHAR_JOINED) - set_isngram(i, true); - } -} - -UNICHARSET::~UNICHARSET() { - clear(); -} - -void UNICHARSET::reserve(int unichars_number) { - if (unichars_number > size_reserved) { - UNICHAR_SLOT* unichars_new = new UNICHAR_SLOT[unichars_number]; - for (int i = 0; i < size_used; ++i) - unichars_new[i] = unichars[i]; - for (int j = size_used; j < unichars_number; ++j) { - unichars_new[j].properties.script_id = add_script(null_script); - } - delete[] unichars; - unichars = unichars_new; - size_reserved = unichars_number; - } -} - -UNICHAR_ID -UNICHARSET::unichar_to_id(const char* const unichar_repr) const { - std::string cleaned = - old_style_included_ ? unichar_repr : CleanupString(unichar_repr); - return ids.contains(cleaned.data(), cleaned.size()) - ? ids.unichar_to_id(cleaned.data(), cleaned.size()) - : INVALID_UNICHAR_ID; -} - -UNICHAR_ID UNICHARSET::unichar_to_id(const char* const unichar_repr, - int length) const { - assert(length > 0 && length <= UNICHAR_LEN); - std::string cleaned(unichar_repr, length); - if (!old_style_included_) cleaned = CleanupString(unichar_repr, length); - return ids.contains(cleaned.data(), cleaned.size()) - ? ids.unichar_to_id(cleaned.data(), cleaned.size()) - : INVALID_UNICHAR_ID; -} - -// Return the minimum number of bytes that matches a legal UNICHAR_ID, -// while leaving the rest of the string encodable. Returns 0 if the -// beginning of the string is not encodable. -// WARNING: this function now encodes the whole string for precision. -// Use encode_string in preference to repeatedly calling step. -int UNICHARSET::step(const char* str) const { - GenericVector encoding; - GenericVector lengths; - encode_string(str, true, &encoding, &lengths, nullptr); - if (encoding.empty() || encoding[0] == INVALID_UNICHAR_ID) return 0; - return lengths[0]; -} - -// Return whether the given UTF-8 string is encodable with this UNICHARSET. -// If not encodable, write the first byte offset which cannot be converted -// into the second (return) argument. -bool UNICHARSET::encodable_string(const char *str, - int *first_bad_position) const { - GenericVector encoding; - return encode_string(str, true, &encoding, nullptr, first_bad_position); -} - -// Encodes the given UTF-8 string with this UNICHARSET. -// Returns true if the encoding succeeds completely, false if there is at -// least one INVALID_UNICHAR_ID in the returned encoding, but in this case -// the rest of the string is still encoded. -// If lengths is not nullptr, then it is filled with the corresponding -// byte length of each encoded UNICHAR_ID. -// WARNING: Caller must guarantee that str has already been cleaned of codes -// that do not belong in the unicharset, or encoding may fail. -// Use CleanupString to perform the cleaning. -bool UNICHARSET::encode_string(const char* str, bool give_up_on_failure, - GenericVector* encoding, - GenericVector* lengths, - int* encoded_length) const { - GenericVector working_encoding; - GenericVector working_lengths; - GenericVector best_lengths; - encoding->truncate(0); // Just in case str is empty. - int str_length = strlen(str); - int str_pos = 0; - bool perfect = true; - while (str_pos < str_length) { - encode_string(str, str_pos, str_length, &working_encoding, &working_lengths, - &str_pos, encoding, &best_lengths); - if (str_pos < str_length) { - // This is a non-match. Skip one utf-8 character. - perfect = false; - if (give_up_on_failure) break; - int step = UNICHAR::utf8_step(str + str_pos); - if (step == 0) step = 1; - encoding->push_back(INVALID_UNICHAR_ID); - best_lengths.push_back(step); - str_pos += step; - working_encoding = *encoding; - working_lengths = best_lengths; - } - } - if (lengths != nullptr) *lengths = best_lengths; - if (encoded_length != nullptr) *encoded_length = str_pos; - return perfect; -} - -const char* UNICHARSET::id_to_unichar(UNICHAR_ID id) const { - if (id == INVALID_UNICHAR_ID) { - return INVALID_UNICHAR; - } - ASSERT_HOST(id < this->size()); - return unichars[id].representation; -} - -const char* UNICHARSET::id_to_unichar_ext(UNICHAR_ID id) const { - if (id == INVALID_UNICHAR_ID) { - return INVALID_UNICHAR; - } - ASSERT_HOST(id < this->size()); - // Resolve from the kCustomLigatures table if this is a private encoding. - if (get_isprivate(id)) { - const char* ch = id_to_unichar(id); - for (int i = 0; kCustomLigatures[i][0] != nullptr; ++i) { - if (!strcmp(ch, kCustomLigatures[i][1])) { - return kCustomLigatures[i][0]; - } - } - } - // Otherwise return the stored representation. - return unichars[id].representation; -} - -// Return a STRING that reformats the utf8 str into the str followed -// by its hex unicodes. -STRING UNICHARSET::debug_utf8_str(const char* str) { - STRING result = str; - result += " ["; - int step = 1; - // Chop into unicodes and code each as hex. - for (int i = 0; str[i] != '\0'; i += step) { - char hex[sizeof(int) * 2 + 1]; - step = UNICHAR::utf8_step(str + i); - if (step == 0) { - step = 1; - sprintf(hex, "%x", str[i]); - } else { - UNICHAR ch(str + i, step); - sprintf(hex, "%x", ch.first_uni()); - } - result += hex; - result += " "; - } - result += "]"; - return result; -} - -// Return a STRING containing debug information on the unichar, including -// the id_to_unichar, its hex unicodes and the properties. -STRING UNICHARSET::debug_str(UNICHAR_ID id) const { - if (id == INVALID_UNICHAR_ID) return STRING(id_to_unichar(id)); - const CHAR_FRAGMENT *fragment = this->get_fragment(id); - if (fragment) { - return fragment->to_string(); - } - const char* str = id_to_unichar(id); - STRING result = debug_utf8_str(str); - // Append a for lower alpha, A for upper alpha, and x if alpha but neither. - if (get_isalpha(id)) { - if (get_islower(id)) - result += "a"; - else if (get_isupper(id)) - result += "A"; - else - result += "x"; - } - // Append 0 if a digit. - if (get_isdigit(id)) { - result += "0"; - } - // Append p is a punctuation symbol. - if (get_ispunctuation(id)) { - result += "p"; - } - return result; -} - -// Sets the normed_ids vector from the normed string. normed_ids is not -// stored in the file, and needs to be set when the UNICHARSET is loaded. -void UNICHARSET::set_normed_ids(UNICHAR_ID unichar_id) { - unichars[unichar_id].properties.normed_ids.truncate(0); - if (unichar_id == UNICHAR_SPACE && id_to_unichar(unichar_id)[0] == ' ') { - unichars[unichar_id].properties.normed_ids.push_back(UNICHAR_SPACE); - } else if (!encode_string(unichars[unichar_id].properties.normed.string(), - true, &unichars[unichar_id].properties.normed_ids, - nullptr, nullptr)) { - unichars[unichar_id].properties.normed_ids.truncate(0); - unichars[unichar_id].properties.normed_ids.push_back(unichar_id); - } -} - -// Returns whether the unichar id represents a unicode value in the private use -// area. We use this range only internally to represent uncommon ligatures -// (eg. 'ct') that do not have regular unicode values. -bool UNICHARSET::get_isprivate(UNICHAR_ID unichar_id) const { - UNICHAR uc(id_to_unichar(unichar_id), -1); - int uni = uc.first_uni(); - return (uni >= 0xE000 && uni <= 0xF8FF); -} - - -// Sets all ranges to empty, so they can be expanded to set the values. -void UNICHARSET::set_ranges_empty() { - for (int id = 0; id < size_used; ++id) { - unichars[id].properties.SetRangesEmpty(); - } -} - -// Sets all the properties for this unicharset given a src unicharset with -// everything set. The unicharsets don't have to be the same, and graphemes -// are correctly accounted for. -void UNICHARSET::PartialSetPropertiesFromOther(int start_index, - const UNICHARSET& src) { - for (int ch = start_index; ch < size_used; ++ch) { - const char* utf8 = id_to_unichar(ch); - UNICHAR_PROPERTIES properties; - if (src.GetStrProperties(utf8, &properties)) { - // Setup the script_id, other_case, and mirror properly. - const char* script = src.get_script_from_script_id(properties.script_id); - properties.script_id = add_script(script); - const char* other_case = src.id_to_unichar(properties.other_case); - if (contains_unichar(other_case)) { - properties.other_case = unichar_to_id(other_case); - } else { - properties.other_case = ch; - } - const char* mirror_str = src.id_to_unichar(properties.mirror); - if (contains_unichar(mirror_str)) { - properties.mirror = unichar_to_id(mirror_str); - } else { - properties.mirror = ch; - } - unichars[ch].properties.CopyFrom(properties); - set_normed_ids(ch); - } - } -} - -// Expands the tops and bottoms and widths for this unicharset given a -// src unicharset with ranges in it. The unicharsets don't have to be the -// same, and graphemes are correctly accounted for. -void UNICHARSET::ExpandRangesFromOther(const UNICHARSET& src) { - for (int ch = 0; ch < size_used; ++ch) { - const char* utf8 = id_to_unichar(ch); - UNICHAR_PROPERTIES properties; - if (src.GetStrProperties(utf8, &properties)) { - // Expand just the ranges from properties. - unichars[ch].properties.ExpandRangesFrom(properties); - } - } -} - -// Makes this a copy of src. Clears this completely first, so the automatic -// ids will not be present in this if not in src. Does NOT reorder the set! -void UNICHARSET::CopyFrom(const UNICHARSET& src) { - clear(); - for (int ch = 0; ch < src.size_used; ++ch) { - const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties; - const char* utf8 = src.id_to_unichar(ch); - unichar_insert_backwards_compatible(utf8); - unichars[ch].properties.ExpandRangesFrom(src_props); - } - // Set properties, including mirror and other_case, WITHOUT reordering - // the unicharset. - PartialSetPropertiesFromOther(0, src); -} - -// For each id in src, if it does not occur in this, add it, as in -// SetPropertiesFromOther, otherwise expand the ranges, as in -// ExpandRangesFromOther. -void UNICHARSET::AppendOtherUnicharset(const UNICHARSET& src) { - int initial_used = size_used; - for (int ch = 0; ch < src.size_used; ++ch) { - const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties; - const char* utf8 = src.id_to_unichar(ch); - int id = size_used; - if (contains_unichar(utf8)) { - id = unichar_to_id(utf8); - // Just expand current ranges. - unichars[id].properties.ExpandRangesFrom(src_props); - } else { - unichar_insert_backwards_compatible(utf8); - unichars[id].properties.SetRangesEmpty(); - } - } - // Set properties, including mirror and other_case, WITHOUT reordering - // the unicharset. - PartialSetPropertiesFromOther(initial_used, src); -} - -// Returns true if the acceptable ranges of the tops of the characters do -// not overlap, making their x-height calculations distinct. -bool UNICHARSET::SizesDistinct(UNICHAR_ID id1, UNICHAR_ID id2) const { - int overlap = std::min(unichars[id1].properties.max_top, - unichars[id2].properties.max_top) - - std::max(unichars[id1].properties.min_top, - unichars[id2].properties.min_top); - return overlap <= 0; -} - -// Internal recursive version of encode_string above. -// Seeks to encode the given string as a sequence of UNICHAR_IDs such that -// each UNICHAR_ID uses the least possible part of the utf8 str. -// It does this by depth-first tail recursion on increasing length matches -// to the UNICHARSET, saving the first encountered result that encodes the -// maximum total length of str. It stops on a failure to encode to make -// the overall process of encoding a partially failed string more efficient. -// See unicharset.h for definition of the args. -void UNICHARSET::encode_string(const char* str, int str_index, int str_length, - GenericVector* encoding, - GenericVector* lengths, - int* best_total_length, - GenericVector* best_encoding, - GenericVector* best_lengths) const { - if (str_index > *best_total_length) { - // This is the best result so far. - *best_total_length = str_index; - *best_encoding = *encoding; - if (best_lengths != nullptr) - *best_lengths = *lengths; - } - if (str_index == str_length) return; - int encoding_index = encoding->size(); - // Find the length of the first matching unicharset member. - int length = ids.minmatch(str + str_index); - if (length == 0 || str_index + length > str_length) return; - do { - if (ids.contains(str + str_index, length)) { - // Successful encoding so far. - UNICHAR_ID id = ids.unichar_to_id(str + str_index, length); - encoding->push_back(id); - lengths->push_back(length); - encode_string(str, str_index + length, str_length, encoding, lengths, - best_total_length, best_encoding, best_lengths); - if (*best_total_length == str_length) - return; // Tail recursion success! - // Failed with that length, truncate back and try again. - encoding->truncate(encoding_index); - lengths->truncate(encoding_index); - } - int step = UNICHAR::utf8_step(str + str_index + length); - if (step == 0) step = 1; - length += step; - } while (length <= UNICHAR_LEN && str_index + length <= str_length); -} - -// Gets the properties for a grapheme string, combining properties for -// multiple characters in a meaningful way where possible. -// Returns false if no valid match was found in the unicharset. -// NOTE that script_id, mirror, and other_case refer to this unicharset on -// return and will need translation if the target unicharset is different. -bool UNICHARSET::GetStrProperties(const char* utf8_str, - UNICHAR_PROPERTIES* props) const { - props->Init(); - props->SetRangesEmpty(); - int total_unicodes = 0; - GenericVector encoding; - if (!encode_string(utf8_str, true, &encoding, nullptr, nullptr)) - return false; // Some part was invalid. - for (int i = 0; i < encoding.size(); ++i) { - int id = encoding[i]; - const UNICHAR_PROPERTIES& src_props = unichars[id].properties; - // Logical OR all the bools. - if (src_props.isalpha) props->isalpha = true; - if (src_props.islower) props->islower = true; - if (src_props.isupper) props->isupper = true; - if (src_props.isdigit) props->isdigit = true; - if (src_props.ispunctuation) props->ispunctuation = true; - if (src_props.isngram) props->isngram = true; - if (src_props.enabled) props->enabled = true; - // Min/max the tops/bottoms. - UpdateRange(src_props.min_bottom, &props->min_bottom, &props->max_bottom); - UpdateRange(src_props.max_bottom, &props->min_bottom, &props->max_bottom); - UpdateRange(src_props.min_top, &props->min_top, &props->max_top); - UpdateRange(src_props.max_top, &props->min_top, &props->max_top); - float bearing = props->advance + src_props.bearing; - if (total_unicodes == 0 || bearing < props->bearing) { - props->bearing = bearing; - props->bearing_sd = props->advance_sd + src_props.bearing_sd; - } - props->advance += src_props.advance; - props->advance_sd += src_props.advance_sd; - // With a single width, just use the widths stored in the unicharset. - props->width = src_props.width; - props->width_sd = src_props.width_sd; - // Use the first script id, other_case, mirror, direction. - // Note that these will need translation, except direction. - if (total_unicodes == 0) { - props->script_id = src_props.script_id; - props->other_case = src_props.other_case; - props->mirror = src_props.mirror; - props->direction = src_props.direction; - } - // The normed string for the compound character is the concatenation of - // the normed versions of the individual characters. - props->normed += src_props.normed; - ++total_unicodes; - } - if (total_unicodes > 1) { - // Estimate the total widths from the advance - bearing. - props->width = props->advance - props->bearing; - props->width_sd = props->advance_sd + props->bearing_sd; - } - return total_unicodes > 0; -} - -// TODO(rays) clean-up the order of functions to match unicharset.h. - -unsigned int UNICHARSET::get_properties(UNICHAR_ID id) const { - unsigned int properties = 0; - if (this->get_isalpha(id)) - properties |= ISALPHA_MASK; - if (this->get_islower(id)) - properties |= ISLOWER_MASK; - if (this->get_isupper(id)) - properties |= ISUPPER_MASK; - if (this->get_isdigit(id)) - properties |= ISDIGIT_MASK; - if (this->get_ispunctuation(id)) - properties |= ISPUNCTUATION_MASK; - return properties; -} - -char UNICHARSET::get_chartype(UNICHAR_ID id) const { - if (this->get_isupper(id)) return 'A'; - if (this->get_islower(id)) return 'a'; - if (this->get_isalpha(id)) return 'x'; - if (this->get_isdigit(id)) return '0'; - if (this->get_ispunctuation(id)) return 'p'; - return 0; -} - -void UNICHARSET::unichar_insert(const char* const unichar_repr, - OldUncleanUnichars old_style) { - if (old_style == OldUncleanUnichars::kTrue) old_style_included_ = true; - std::string cleaned = - old_style_included_ ? unichar_repr : CleanupString(unichar_repr); - if (!cleaned.empty() && !ids.contains(cleaned.data(), cleaned.size())) { - const char* str = cleaned.c_str(); - GenericVector encoding; - if (!old_style_included_ && - encode_string(str, true, &encoding, nullptr, nullptr)) - return; - if (size_used == size_reserved) { - if (size_used == 0) - reserve(8); - else - reserve(2 * size_used); - } - int index = 0; - do { - if (index >= UNICHAR_LEN) { - fprintf(stderr, "Utf8 buffer too big, size>%d for %s\n", UNICHAR_LEN, - unichar_repr); - return; - } - unichars[size_used].representation[index++] = *str++; - } while (*str != '\0'); - unichars[size_used].representation[index] = '\0'; - this->set_script(size_used, null_script); - // If the given unichar_repr represents a fragmented character, set - // fragment property to a pointer to CHAR_FRAGMENT class instance with - // information parsed from the unichar representation. Use the script - // of the base unichar for the fragmented character if possible. - CHAR_FRAGMENT* frag = - CHAR_FRAGMENT::parse_from_string(unichars[size_used].representation); - this->unichars[size_used].properties.fragment = frag; - if (frag != nullptr && this->contains_unichar(frag->get_unichar())) { - this->unichars[size_used].properties.script_id = - this->get_script(frag->get_unichar()); - } - this->unichars[size_used].properties.enabled = true; - ids.insert(unichars[size_used].representation, size_used); - ++size_used; - } -} - -bool UNICHARSET::contains_unichar(const char* const unichar_repr) const { - std::string cleaned = - old_style_included_ ? unichar_repr : CleanupString(unichar_repr); - return ids.contains(cleaned.data(), cleaned.size()); -} - -bool UNICHARSET::contains_unichar(const char* const unichar_repr, - int length) const { - if (length == 0) { - return false; - } - std::string cleaned(unichar_repr, length); - if (!old_style_included_) cleaned = CleanupString(unichar_repr, length); - return ids.contains(cleaned.data(), cleaned.size()); -} - -bool UNICHARSET::eq(UNICHAR_ID unichar_id, - const char* const unichar_repr) const { - return strcmp(this->id_to_unichar(unichar_id), unichar_repr) == 0; -} - -bool UNICHARSET::save_to_string(STRING *str) const { - const int kFileBufSize = 1024; - char buffer[kFileBufSize + 1]; - snprintf(buffer, kFileBufSize, "%d\n", this->size()); - *str = buffer; - for (UNICHAR_ID id = 0; id < this->size(); ++id) { - int min_bottom, max_bottom, min_top, max_top; - get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top); - float width, width_sd; - get_width_stats(id, &width, &width_sd); - float bearing, bearing_sd; - get_bearing_stats(id, &bearing, &bearing_sd); - float advance, advance_sd; - get_advance_stats(id, &advance, &advance_sd); - unsigned int properties = this->get_properties(id); - if (strcmp(this->id_to_unichar(id), " ") == 0) { - snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties, - this->get_script_from_script_id(this->get_script(id)), - this->get_other_case(id)); - } else { - snprintf(buffer, kFileBufSize, - "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %s %d %d %d %s\t# %s\n", - this->id_to_unichar(id), properties, - min_bottom, max_bottom, min_top, max_top, width, width_sd, - bearing, bearing_sd, advance, advance_sd, - this->get_script_from_script_id(this->get_script(id)), - this->get_other_case(id), this->get_direction(id), - this->get_mirror(id), this->get_normed_unichar(id), - this->debug_str(id).string()); - } - *str += buffer; - } - return true; -} - -// TODO(rays) Replace with TFile everywhere. -class InMemoryFilePointer { - public: - InMemoryFilePointer(const char *memory, int mem_size) - : memory_(memory), fgets_ptr_(memory), mem_size_(mem_size) { } - - char *fgets(char *orig_dst, int size) { - const char *src_end = memory_ + mem_size_; - char *dst_end = orig_dst + size - 1; - if (size < 1) { - return fgets_ptr_ < src_end ? orig_dst : nullptr; - } - - char *dst = orig_dst; - char ch = '^'; - while (fgets_ptr_ < src_end && dst < dst_end && ch != '\n') { - ch = *dst++ = *fgets_ptr_++; - } - *dst = 0; - return (dst == orig_dst) ? nullptr : orig_dst; - } - - private: - const char *memory_; - const char *fgets_ptr_; - const int mem_size_; -}; - -bool UNICHARSET::load_from_inmemory_file( - const char *memory, int mem_size, bool skip_fragments) { - InMemoryFilePointer mem_fp(memory, mem_size); - TessResultCallback2 *fgets_cb = - NewPermanentTessCallback(&mem_fp, &InMemoryFilePointer::fgets); - bool success = load_via_fgets(fgets_cb, skip_fragments); - delete fgets_cb; - return success; -} - -class LocalFilePointer { - public: - LocalFilePointer(FILE *stream) : fp_(stream) {} - char *fgets(char *dst, int size) { - return ::fgets(dst, size, fp_); - } - private: - FILE *fp_; -}; - -bool UNICHARSET::load_from_file(FILE *file, bool skip_fragments) { - LocalFilePointer lfp(file); - TessResultCallback2 *fgets_cb = - NewPermanentTessCallback(&lfp, &LocalFilePointer::fgets); - bool success = load_via_fgets(fgets_cb, skip_fragments); - delete fgets_cb; - return success; -} - -bool UNICHARSET::load_from_file(tesseract::TFile *file, bool skip_fragments) { - TessResultCallback2 *fgets_cb = - NewPermanentTessCallback(file, &tesseract::TFile::FGets); - bool success = load_via_fgets(fgets_cb, skip_fragments); - delete fgets_cb; - return success; -} - -bool UNICHARSET::load_via_fgets( - TessResultCallback2 *fgets_cb, - bool skip_fragments) { - int unicharset_size; - char buffer[256]; - - this->clear(); - if (fgets_cb->Run(buffer, sizeof(buffer)) == nullptr || - sscanf(buffer, "%d", &unicharset_size) != 1) { - return false; - } - this->reserve(unicharset_size); - for (UNICHAR_ID id = 0; id < unicharset_size; ++id) { - char unichar[256]; - unsigned int properties; - char script[64]; - - strncpy(script, null_script, sizeof(script)); - int min_bottom = 0; - int max_bottom = UINT8_MAX; - int min_top = 0; - int max_top = UINT8_MAX; - float width = 0.0f; - float width_sd = 0.0f; - float bearing = 0.0f; - float bearing_sd = 0.0f; - float advance = 0.0f; - float advance_sd = 0.0f; - // TODO(eger): check that this default it ok - // after enabling BiDi iterator for Arabic+Cube. - int direction = UNICHARSET::U_LEFT_TO_RIGHT; - UNICHAR_ID other_case = id; - UNICHAR_ID mirror = id; - char normed[64]; - int v = -1; - if (fgets_cb->Run(buffer, sizeof (buffer)) == nullptr || - ((v = sscanf(buffer, - "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d %63s", - unichar, &properties, - &min_bottom, &max_bottom, &min_top, &max_top, - &width, &width_sd, &bearing, &bearing_sd, - &advance, &advance_sd, script, &other_case, - &direction, &mirror, normed)) != 17 && - (v = sscanf(buffer, - "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d", - unichar, &properties, - &min_bottom, &max_bottom, &min_top, &max_top, - &width, &width_sd, &bearing, &bearing_sd, - &advance, &advance_sd, script, &other_case, - &direction, &mirror)) != 16 && - (v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d %d %d", - unichar, &properties, - &min_bottom, &max_bottom, &min_top, &max_top, - script, &other_case, &direction, &mirror)) != 10 && - (v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d", unichar, &properties, - &min_bottom, &max_bottom, &min_top, &max_top, - script, &other_case)) != 8 && - (v = sscanf(buffer, "%s %x %63s %d", unichar, &properties, - script, &other_case)) != 4 && - (v = sscanf(buffer, "%s %x %63s", - unichar, &properties, script)) != 3 && - (v = sscanf(buffer, "%s %x", unichar, &properties)) != 2)) { - return false; - } - - // Skip fragments if needed. - CHAR_FRAGMENT *frag = nullptr; - if (skip_fragments && (frag = CHAR_FRAGMENT::parse_from_string(unichar))) { - int num_pieces = frag->get_total(); - delete frag; - // Skip multi-element fragments, but keep singles like UNICHAR_BROKEN in. - if (num_pieces > 1) - continue; - } - // Insert unichar into unicharset and set its properties. - if (strcmp(unichar, "NULL") == 0) - this->unichar_insert(" "); - else - this->unichar_insert_backwards_compatible(unichar); - - this->set_isalpha(id, properties & ISALPHA_MASK); - this->set_islower(id, properties & ISLOWER_MASK); - this->set_isupper(id, properties & ISUPPER_MASK); - this->set_isdigit(id, properties & ISDIGIT_MASK); - this->set_ispunctuation(id, properties & ISPUNCTUATION_MASK); - this->set_isngram(id, false); - this->set_script(id, script); - this->unichars[id].properties.enabled = true; - this->set_top_bottom(id, min_bottom, max_bottom, min_top, max_top); - this->set_width_stats(id, width, width_sd); - this->set_bearing_stats(id, bearing, bearing_sd); - this->set_advance_stats(id, advance, advance_sd); - this->set_direction(id, static_cast(direction)); - this->set_other_case( - id, (v > 3 && other_case < unicharset_size) ? other_case : id); - this->set_mirror(id, (v > 8 && mirror < unicharset_size) ? mirror : id); - this->set_normed(id, (v>16) ? normed : unichar); - } - post_load_setup(); - return true; -} - -// Sets up internal data after loading the file, based on the char -// properties. Called from load_from_file, but also needs to be run -// during set_unicharset_properties. -void UNICHARSET::post_load_setup() { - // Number of alpha chars with the case property minus those without, - // in order to determine that half the alpha chars have case. - int net_case_alphas = 0; - int x_height_alphas = 0; - int cap_height_alphas = 0; - top_bottom_set_ = false; - for (UNICHAR_ID id = 0; id < size_used; ++id) { - int min_bottom = 0; - int max_bottom = UINT8_MAX; - int min_top = 0; - int max_top = UINT8_MAX; - get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top); - if (min_top > 0) - top_bottom_set_ = true; - if (get_isalpha(id)) { - if (get_islower(id) || get_isupper(id)) - ++net_case_alphas; - else - --net_case_alphas; - if (min_top < kMeanlineThreshold && max_top < kMeanlineThreshold) - ++x_height_alphas; - else if (min_top > kMeanlineThreshold && max_top > kMeanlineThreshold) - ++cap_height_alphas; - } - set_normed_ids(id); - } - - script_has_upper_lower_ = net_case_alphas > 0; - script_has_xheight_ = script_has_upper_lower_ || - (x_height_alphas > cap_height_alphas * kMinXHeightFraction && - cap_height_alphas > x_height_alphas * kMinCapHeightFraction); - - null_sid_ = get_script_id_from_name(null_script); - ASSERT_HOST(null_sid_ == 0); - common_sid_ = get_script_id_from_name("Common"); - latin_sid_ = get_script_id_from_name("Latin"); - cyrillic_sid_ = get_script_id_from_name("Cyrillic"); - greek_sid_ = get_script_id_from_name("Greek"); - han_sid_ = get_script_id_from_name("Han"); - hiragana_sid_ = get_script_id_from_name("Hiragana"); - katakana_sid_ = get_script_id_from_name("Katakana"); - thai_sid_ = get_script_id_from_name("Thai"); - hangul_sid_ = get_script_id_from_name("Hangul"); - - // Compute default script. Use the highest-counting alpha script, that is - // not the common script, as that still contains some "alphas". - int* script_counts = new int[script_table_size_used]; - memset(script_counts, 0, sizeof(*script_counts) * script_table_size_used); - for (int id = 0; id < size_used; ++id) { - if (get_isalpha(id)) { - ++script_counts[get_script(id)]; - } - } - default_sid_ = 0; - for (int s = 1; s < script_table_size_used; ++s) { - if (script_counts[s] > script_counts[default_sid_] && s != common_sid_) - default_sid_ = s; - } - delete [] script_counts; -} - -// Returns true if right_to_left scripts are significant in the unicharset, -// but without being so sensitive that "universal" unicharsets containing -// characters from many scripts, like orientation and script detection, -// look like they are right_to_left. -bool UNICHARSET::major_right_to_left() const { - int ltr_count = 0; - int rtl_count = 0; - for (int id = 0; id < size_used; ++id) { - int dir = get_direction(id); - if (dir == UNICHARSET::U_LEFT_TO_RIGHT) ltr_count++; - if (dir == UNICHARSET::U_RIGHT_TO_LEFT || - dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC || - dir == UNICHARSET::U_ARABIC_NUMBER) rtl_count++; - } - return rtl_count > ltr_count; -} - -// Set a whitelist and/or blacklist of characters to recognize. -// An empty or nullptr whitelist enables everything (minus any blacklist). -// An empty or nullptr blacklist disables nothing. -// An empty or nullptr blacklist has no effect. -void UNICHARSET::set_black_and_whitelist(const char* blacklist, - const char* whitelist, - const char* unblacklist) { - bool def_enabled = whitelist == nullptr || whitelist[0] == '\0'; - // Set everything to default - for (int ch = 0; ch < size_used; ++ch) - unichars[ch].properties.enabled = def_enabled; - if (!def_enabled) { - // Enable the whitelist. - GenericVector encoding; - encode_string(whitelist, false, &encoding, nullptr, nullptr); - for (int i = 0; i < encoding.size(); ++i) { - if (encoding[i] != INVALID_UNICHAR_ID) - unichars[encoding[i]].properties.enabled = true; - } - } - if (blacklist != nullptr && blacklist[0] != '\0') { - // Disable the blacklist. - GenericVector encoding; - encode_string(blacklist, false, &encoding, nullptr, nullptr); - for (int i = 0; i < encoding.size(); ++i) { - if (encoding[i] != INVALID_UNICHAR_ID) - unichars[encoding[i]].properties.enabled = false; - } - } - if (unblacklist != nullptr && unblacklist[0] != '\0') { - // Re-enable the unblacklist. - GenericVector encoding; - encode_string(unblacklist, false, &encoding, nullptr, nullptr); - for (int i = 0; i < encoding.size(); ++i) { - if (encoding[i] != INVALID_UNICHAR_ID) - unichars[encoding[i]].properties.enabled = true; - } - } -} - -// Returns true if there are any repeated unicodes in the normalized -// text of any unichar-id in the unicharset. -bool UNICHARSET::AnyRepeatedUnicodes() const { - int start_id = 0; - if (has_special_codes()) start_id = SPECIAL_UNICHAR_CODES_COUNT; - for (int id = start_id; id < size_used; ++id) { - // Convert to unicodes. - std::vector unicodes = UNICHAR::UTF8ToUTF32(get_normed_unichar(id)); - for (int u = 1; u < unicodes.size(); ++u) { - if (unicodes[u - 1] == unicodes[u]) return true; - } - } - return false; -} - -int UNICHARSET::add_script(const char* script) { - for (int i = 0; i < script_table_size_used; ++i) { - if (strcmp(script, script_table[i]) == 0) - return i; - } - if (script_table_size_reserved == 0) { - script_table_size_reserved = 8; - script_table = new char*[script_table_size_reserved]; - } else if (script_table_size_used >= script_table_size_reserved) { - assert(script_table_size_used == script_table_size_reserved); - script_table_size_reserved += script_table_size_reserved; - char** new_script_table = new char*[script_table_size_reserved]; - memcpy(new_script_table, script_table, - script_table_size_used * sizeof(char*)); - delete[] script_table; - script_table = new_script_table; - } - script_table[script_table_size_used] = new char[strlen(script) + 1]; - strcpy(script_table[script_table_size_used], script); - return script_table_size_used++; -} - -// Returns the string that represents a fragment -// with the given unichar, pos and total. -STRING CHAR_FRAGMENT::to_string(const char *unichar, int pos, int total, - bool natural) { - if (total == 1) return STRING(unichar); - STRING result = ""; - result += kSeparator; - result += unichar; - char buffer[kMaxLen]; - snprintf(buffer, kMaxLen, "%c%d%c%d", kSeparator, pos, - natural ? kNaturalFlag : kSeparator, total); - result += buffer; - return result; -} - -CHAR_FRAGMENT *CHAR_FRAGMENT::parse_from_string(const char *string) { - const char *ptr = string; - int len = strlen(string); - if (len < kMinLen || *ptr != kSeparator) { - return nullptr; // this string can not represent a fragment - } - ptr++; // move to the next character - int step = 0; - while ((ptr + step) < (string + len) && *(ptr + step) != kSeparator) { - step += UNICHAR::utf8_step(ptr + step); - } - if (step == 0 || step > UNICHAR_LEN) { - return nullptr; // no character for unichar or the character is too long - } - char unichar[UNICHAR_LEN + 1]; - strncpy(unichar, ptr, step); - unichar[step] = '\0'; // null terminate unichar - ptr += step; // move to the next fragment separator - int pos = 0; - int total = 0; - bool natural = false; - char *end_ptr = nullptr; - for (int i = 0; i < 2; i++) { - if (ptr > string + len || *ptr != kSeparator) { - if (i == 1 && *ptr == kNaturalFlag) - natural = true; - else - return nullptr; // Failed to parse fragment representation. - } - ptr++; // move to the next character - i == 0 ? pos = static_cast(strtol(ptr, &end_ptr, 10)) - : total = static_cast(strtol(ptr, &end_ptr, 10)); - ptr = end_ptr; - } - if (ptr != string + len) { - return nullptr; // malformed fragment representation - } - CHAR_FRAGMENT *fragment = new CHAR_FRAGMENT(); - fragment->set_all(unichar, pos, total, natural); - return fragment; -} - -int UNICHARSET::get_script_id_from_name(const char* script_name) const { - for (int i = 0; i < script_table_size_used; ++i) { - if (strcmp(script_name, script_table[i]) == 0) - return i; - } - return 0; // 0 is always the null_script -} - -// Removes/replaces content that belongs in rendered text, but not in the -// unicharset. -/* static */ -std::string UNICHARSET::CleanupString(const char* utf8_str, size_t length) { - std::string result; - result.reserve(length); - char ch; - while ((ch = *utf8_str) != '\0' && length-- > 0) { - int key_index = 0; - const char* key; - while ((key = kCleanupMaps[key_index][0]) != nullptr) { - int match = 0; - while (key[match] != '\0' && key[match] == utf8_str[match]) ++match; - if (key[match] == '\0') { - utf8_str += match; - break; - } - ++key_index; - } - if (key == nullptr) { - result.push_back(ch); - ++utf8_str; - } else { - result.append(kCleanupMaps[key_index][1]); - } - } - return result; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharset.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharset.h deleted file mode 100644 index be9db935..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicharset.h +++ /dev/null @@ -1,1043 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: unicharset.h -// Description: Unicode character/ligature set class. -// Author: Thomas Kielbus -// Created: Wed Jun 28 17:05:01 PDT 2006 -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_UNICHARSET_H_ -#define TESSERACT_CCUTIL_UNICHARSET_H_ - -#include "errcode.h" -#include "genericvector.h" -#include "helpers.h" -#include "serialis.h" -#include "strngs.h" -#include "tesscallback.h" -#include "unichar.h" -#include "unicharmap.h" - -// Enum holding special values of unichar_id. Every unicharset has these. -// Warning! Keep in sync with kSpecialUnicharCodes. -enum SpecialUnicharCodes { - UNICHAR_SPACE, - UNICHAR_JOINED, - UNICHAR_BROKEN, - - SPECIAL_UNICHAR_CODES_COUNT -}; - -// Boolean flag for unichar_insert. It's a bit of a double negative to allow -// the default value to be false. -enum class OldUncleanUnichars { - kFalse, - kTrue, -}; - -class CHAR_FRAGMENT { - public: - // Minimum number of characters used for fragment representation. - static const int kMinLen = 6; - // Maximum number of characters used for fragment representation. - static const int kMaxLen = 3 + UNICHAR_LEN + 2; - // Maximum number of fragments per character. - static const int kMaxChunks = 5; - - // Setters and Getters. - inline void set_all(const char *unichar, int pos, int total, bool natural) { - set_unichar(unichar); - set_pos(pos); - set_total(total); - set_natural(natural); - } - inline void set_unichar(const char *uch) { - strncpy(this->unichar, uch, UNICHAR_LEN); - this->unichar[UNICHAR_LEN] = '\0'; - } - inline void set_pos(int p) { this->pos = p; } - inline void set_total(int t) { this->total = t; } - inline const char* get_unichar() const { return this->unichar; } - inline int get_pos() const { return this->pos; } - inline int get_total() const { return this->total; } - - // Returns the string that represents a fragment - // with the given unichar, pos and total. - static STRING to_string(const char *unichar, int pos, int total, - bool natural); - // Returns the string that represents this fragment. - STRING to_string() const { - return to_string(unichar, pos, total, natural); - } - - // Checks whether a fragment has the same unichar, - // position and total as the given inputs. - inline bool equals(const char *other_unichar, - int other_pos, int other_total) const { - return (strcmp(this->unichar, other_unichar) == 0 && - this->pos == other_pos && this->total == other_total); - } - inline bool equals(const CHAR_FRAGMENT *other) const { - return this->equals(other->get_unichar(), - other->get_pos(), - other->get_total()); - } - - // Checks whether a given fragment is a continuation of this fragment. - // Assumes that the given fragment pointer is not nullptr. - inline bool is_continuation_of(const CHAR_FRAGMENT *fragment) const { - return (strcmp(this->unichar, fragment->get_unichar()) == 0 && - this->total == fragment->get_total() && - this->pos == fragment->get_pos() + 1); - } - - // Returns true if this fragment is a beginning fragment. - inline bool is_beginning() const { return this->pos == 0; } - - // Returns true if this fragment is an ending fragment. - inline bool is_ending() const { return this->pos == this->total-1; } - - // Returns true if the fragment was a separate component to begin with, - // ie did not need chopping to be isolated, but may have been separated - // out from a multi-outline blob. - inline bool is_natural() const { return natural; } - void set_natural(bool value) { natural = value; } - - // Parses the string to see whether it represents a character fragment - // (rather than a regular character). If so, allocates memory for a new - // CHAR_FRAGMENT instance and fills it in with the corresponding fragment - // information. Fragments are of the form: - // |m|1|2, meaning chunk 1 of 2 of character m, or - // |:|1n2, meaning chunk 1 of 2 of character :, and no chopping was needed - // to divide the parts, as they were already separate connected components. - // - // If parsing succeeded returns the pointer to the allocated CHAR_FRAGMENT - // instance, otherwise (if the string does not represent a fragment or it - // looks like it does, but parsing it as a fragment fails) returns nullptr. - // - // Note: The caller is responsible for deallocating memory - // associated with the returned pointer. - static CHAR_FRAGMENT *parse_from_string(const char *str); - - private: - char unichar[UNICHAR_LEN + 1]; - // True if the fragment was a separate component to begin with, - // ie did not need chopping to be isolated, but may have been separated - // out from a multi-outline blob. - bool natural; - int16_t pos; // fragment position in the character - int16_t total; // total number of fragments in the character -}; - -// The UNICHARSET class is an utility class for Tesseract that holds the -// set of characters that are used by the engine. Each character is identified -// by a unique number, from 0 to (size - 1). -class UNICHARSET { - public: - // Custom list of characters and their ligature forms (UTF8) - // These map to unicode values in the private use area (PUC) and are supported - // by only few font families (eg. Wyld, Adobe Caslon Pro). - static TESS_API const char* kCustomLigatures[][2]; - - // List of strings for the SpecialUnicharCodes. Keep in sync with the enum. - static TESS_API const char* kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT]; - - // ICU 2.0 UCharDirection enum (from third_party/icu/include/unicode/uchar.h) - enum Direction { - U_LEFT_TO_RIGHT = 0, - U_RIGHT_TO_LEFT = 1, - U_EUROPEAN_NUMBER = 2, - U_EUROPEAN_NUMBER_SEPARATOR = 3, - U_EUROPEAN_NUMBER_TERMINATOR = 4, - U_ARABIC_NUMBER = 5, - U_COMMON_NUMBER_SEPARATOR = 6, - U_BLOCK_SEPARATOR = 7, - U_SEGMENT_SEPARATOR = 8, - U_WHITE_SPACE_NEUTRAL = 9, - U_OTHER_NEUTRAL = 10, - U_LEFT_TO_RIGHT_EMBEDDING = 11, - U_LEFT_TO_RIGHT_OVERRIDE = 12, - U_RIGHT_TO_LEFT_ARABIC = 13, - U_RIGHT_TO_LEFT_EMBEDDING = 14, - U_RIGHT_TO_LEFT_OVERRIDE = 15, - U_POP_DIRECTIONAL_FORMAT = 16, - U_DIR_NON_SPACING_MARK = 17, - U_BOUNDARY_NEUTRAL = 18, - U_CHAR_DIRECTION_COUNT - }; - - // Create an empty UNICHARSET - UNICHARSET(); - - ~UNICHARSET(); - - // Return the UNICHAR_ID of a given unichar representation within the - // UNICHARSET. - UNICHAR_ID unichar_to_id(const char* const unichar_repr) const; - - // Return the UNICHAR_ID of a given unichar representation within the - // UNICHARSET. Only the first length characters from unichar_repr are used. - UNICHAR_ID unichar_to_id(const char* const unichar_repr, int length) const; - - // Return the minimum number of bytes that matches a legal UNICHAR_ID, - // while leaving the rest of the string encodable. Returns 0 if the - // beginning of the string is not encodable. - // WARNING: this function now encodes the whole string for precision. - // Use encode_string in preference to repeatedly calling step. - int step(const char* str) const; - - // Returns true if the given UTF-8 string is encodable with this UNICHARSET. - // If not encodable, write the first byte offset which cannot be converted - // into the second (return) argument. - bool encodable_string(const char *str, int *first_bad_position) const; - - // Encodes the given UTF-8 string with this UNICHARSET. - // Any part of the string that cannot be encoded (because the utf8 can't - // be broken up into pieces that are in the unicharset) then: - // if give_up_on_failure, stops and returns a partial encoding, - // else continues and inserts an INVALID_UNICHAR_ID in the returned encoding. - // Returns true if the encoding succeeds completely, false if there is at - // least one failure. - // If lengths is not nullptr, then it is filled with the corresponding - // byte length of each encoded UNICHAR_ID. - // If encoded_length is not nullptr then on return it contains the length of - // str that was encoded. (if give_up_on_failure the location of the first - // failure, otherwise strlen(str).) - // WARNING: Caller must guarantee that str has already been cleaned of codes - // that do not belong in the unicharset, or encoding may fail. - // Use CleanupString to perform the cleaning. - bool encode_string(const char* str, bool give_up_on_failure, - GenericVector* encoding, - GenericVector* lengths, - int* encoded_length) const; - - // Return the unichar representation corresponding to the given UNICHAR_ID - // within the UNICHARSET. - const char* id_to_unichar(UNICHAR_ID id) const; - - // Return the UTF8 representation corresponding to the given UNICHAR_ID after - // resolving any private encodings internal to Tesseract. This method is - // preferable to id_to_unichar for outputting text that will be visible to - // external applications. - const char* id_to_unichar_ext(UNICHAR_ID id) const; - - // Return a STRING that reformats the utf8 str into the str followed - // by its hex unicodes. - static STRING debug_utf8_str(const char* str); - - // Removes/replaces content that belongs in rendered text, but not in the - // unicharset. - static std::string CleanupString(const char* utf8_str) { - return CleanupString(utf8_str, strlen(utf8_str)); - } - static std::string CleanupString(const char* utf8_str, size_t length); - - // Return a STRING containing debug information on the unichar, including - // the id_to_unichar, its hex unicodes and the properties. - STRING debug_str(UNICHAR_ID id) const; - STRING debug_str(const char * unichar_repr) const { - return debug_str(unichar_to_id(unichar_repr)); - } - - // Adds a unichar representation to the set. If old_style is true, then - // TATWEEL characters are kept and n-grams are allowed. Otherwise TATWEEL - // characters are ignored/skipped as if they don't exist and n-grams that - // can already be encoded are not added. - void unichar_insert(const char* const unichar_repr, - OldUncleanUnichars old_style); - void unichar_insert(const char* const unichar_repr) { - unichar_insert(unichar_repr, OldUncleanUnichars::kFalse); - } - // Adds a unichar representation to the set. Avoids setting old_style to true, - // unless it is necessary to make the new unichar get added. - void unichar_insert_backwards_compatible(const char* const unichar_repr) { - std::string cleaned = CleanupString(unichar_repr); - if (cleaned != unichar_repr) { - unichar_insert(unichar_repr, OldUncleanUnichars::kTrue); - } else { - int old_size = size(); - unichar_insert(unichar_repr, OldUncleanUnichars::kFalse); - if (size() == old_size) { - unichar_insert(unichar_repr, OldUncleanUnichars::kTrue); - } - } - } - - // Return true if the given unichar id exists within the set. - // Relies on the fact that unichar ids are contiguous in the unicharset. - bool contains_unichar_id(UNICHAR_ID unichar_id) const { - return unichar_id != INVALID_UNICHAR_ID && unichar_id < size_used && - unichar_id >= 0; - } - - // Return true if the given unichar representation exists within the set. - bool contains_unichar(const char* const unichar_repr) const; - bool contains_unichar(const char* const unichar_repr, int length) const; - - // Return true if the given unichar representation corresponds to the given - // UNICHAR_ID within the set. - bool eq(UNICHAR_ID unichar_id, const char* const unichar_repr) const; - - // Delete CHAR_FRAGMENTs stored in properties of unichars array. - void delete_pointers_in_unichars() { - for (int i = 0; i < size_used; ++i) { - delete unichars[i].properties.fragment; - unichars[i].properties.fragment = nullptr; - } - } - - // Clear the UNICHARSET (all the previous data is lost). - void clear() { - if (script_table != nullptr) { - for (int i = 0; i < script_table_size_used; ++i) - delete[] script_table[i]; - delete[] script_table; - script_table = nullptr; - script_table_size_used = 0; - } - if (unichars != nullptr) { - delete_pointers_in_unichars(); - delete[] unichars; - unichars = nullptr; - } - script_table_size_reserved = 0; - size_reserved = 0; - size_used = 0; - ids.clear(); - top_bottom_set_ = false; - script_has_upper_lower_ = false; - script_has_xheight_ = false; - old_style_included_ = false; - null_sid_ = 0; - common_sid_ = 0; - latin_sid_ = 0; - cyrillic_sid_ = 0; - greek_sid_ = 0; - han_sid_ = 0; - hiragana_sid_ = 0; - katakana_sid_ = 0; - thai_sid_ = 0; - hangul_sid_ = 0; - default_sid_ = 0; - } - - // Return the size of the set (the number of different UNICHAR it holds). - int size() const { - return size_used; - } - - // Reserve enough memory space for the given number of UNICHARS - void reserve(int unichars_number); - - // Opens the file indicated by filename and saves unicharset to that file. - // Returns true if the operation is successful. - bool save_to_file(const char * const filename) const { - FILE* file = fopen(filename, "w+b"); - if (file == nullptr) return false; - bool result = save_to_file(file); - fclose(file); - return result; - } - - // Saves the content of the UNICHARSET to the given file. - // Returns true if the operation is successful. - bool save_to_file(FILE *file) const { - STRING str; - return save_to_string(&str) && - tesseract::Serialize(file, &str[0], str.length()); - } - - bool save_to_file(tesseract::TFile *file) const { - STRING str; - return save_to_string(&str) && file->Serialize(&str[0], str.length()); - } - - // Saves the content of the UNICHARSET to the given STRING. - // Returns true if the operation is successful. - bool save_to_string(STRING *str) const; - - // Load a unicharset from a unicharset file that has been loaded into - // the given memory buffer. - // Returns true if the operation is successful. - bool load_from_inmemory_file(const char* const memory, int mem_size, - bool skip_fragments); - // Returns true if the operation is successful. - bool load_from_inmemory_file(const char* const memory, int mem_size) { - return load_from_inmemory_file(memory, mem_size, false); - } - - // Opens the file indicated by filename and loads the UNICHARSET - // from the given file. The previous data is lost. - // Returns true if the operation is successful. - bool load_from_file(const char* const filename, bool skip_fragments) { - FILE* file = fopen(filename, "rb"); - if (file == nullptr) return false; - bool result = load_from_file(file, skip_fragments); - fclose(file); - return result; - } - // returns true if the operation is successful. - bool load_from_file(const char* const filename) { - return load_from_file(filename, false); - } - - // Loads the UNICHARSET from the given file. The previous data is lost. - // Returns true if the operation is successful. - bool load_from_file(FILE *file, bool skip_fragments); - bool load_from_file(FILE *file) { return load_from_file(file, false); } - bool load_from_file(tesseract::TFile *file, bool skip_fragments); - - - // Sets up internal data after loading the file, based on the char - // properties. Called from load_from_file, but also needs to be run - // during set_unicharset_properties. - void post_load_setup(); - - // Returns true if right_to_left scripts are significant in the unicharset, - // but without being so sensitive that "universal" unicharsets containing - // characters from many scripts, like orientation and script detection, - // look like they are right_to_left. - bool major_right_to_left() const; - - // Set a whitelist and/or blacklist of characters to recognize. - // An empty or nullptr whitelist enables everything (minus any blacklist). - // An empty or nullptr blacklist disables nothing. - // An empty or nullptr unblacklist has no effect. - // The blacklist overrides the whitelist. - // The unblacklist overrides the blacklist. - // Each list is a string of utf8 character strings. Boundaries between - // unicharset units are worked out automatically, and characters not in - // the unicharset are silently ignored. - void set_black_and_whitelist(const char* blacklist, const char* whitelist, - const char* unblacklist); - - // Set the isalpha property of the given unichar to the given value. - void set_isalpha(UNICHAR_ID unichar_id, bool value) { - unichars[unichar_id].properties.isalpha = value; - } - - // Set the islower property of the given unichar to the given value. - void set_islower(UNICHAR_ID unichar_id, bool value) { - unichars[unichar_id].properties.islower = value; - } - - // Set the isupper property of the given unichar to the given value. - void set_isupper(UNICHAR_ID unichar_id, bool value) { - unichars[unichar_id].properties.isupper = value; - } - - // Set the isdigit property of the given unichar to the given value. - void set_isdigit(UNICHAR_ID unichar_id, bool value) { - unichars[unichar_id].properties.isdigit = value; - } - - // Set the ispunctuation property of the given unichar to the given value. - void set_ispunctuation(UNICHAR_ID unichar_id, bool value) { - unichars[unichar_id].properties.ispunctuation = value; - } - - // Set the isngram property of the given unichar to the given value. - void set_isngram(UNICHAR_ID unichar_id, bool value) { - unichars[unichar_id].properties.isngram = value; - } - - // Set the script name of the given unichar to the given value. - // Value is copied and thus can be a temporary; - void set_script(UNICHAR_ID unichar_id, const char* value) { - unichars[unichar_id].properties.script_id = add_script(value); - } - - // Set other_case unichar id in the properties for the given unichar id. - void set_other_case(UNICHAR_ID unichar_id, UNICHAR_ID other_case) { - unichars[unichar_id].properties.other_case = other_case; - } - - // Set the direction property of the given unichar to the given value. - void set_direction(UNICHAR_ID unichar_id, UNICHARSET::Direction value) { - unichars[unichar_id].properties.direction = value; - } - - // Set mirror unichar id in the properties for the given unichar id. - void set_mirror(UNICHAR_ID unichar_id, UNICHAR_ID mirror) { - unichars[unichar_id].properties.mirror = mirror; - } - - // Record normalized version of unichar with the given unichar_id. - void set_normed(UNICHAR_ID unichar_id, const char* normed) { - unichars[unichar_id].properties.normed = normed; - unichars[unichar_id].properties.normed_ids.truncate(0); - } - // Sets the normed_ids vector from the normed string. normed_ids is not - // stored in the file, and needs to be set when the UNICHARSET is loaded. - void set_normed_ids(UNICHAR_ID unichar_id); - - // Return the isalpha property of the given unichar. - bool get_isalpha(UNICHAR_ID unichar_id) const { - if (INVALID_UNICHAR_ID == unichar_id) return false; - ASSERT_HOST(contains_unichar_id(unichar_id)); - return unichars[unichar_id].properties.isalpha; - } - - // Return the islower property of the given unichar. - bool get_islower(UNICHAR_ID unichar_id) const { - if (INVALID_UNICHAR_ID == unichar_id) return false; - ASSERT_HOST(contains_unichar_id(unichar_id)); - return unichars[unichar_id].properties.islower; - } - - // Return the isupper property of the given unichar. - bool get_isupper(UNICHAR_ID unichar_id) const { - if (INVALID_UNICHAR_ID == unichar_id) return false; - ASSERT_HOST(contains_unichar_id(unichar_id)); - return unichars[unichar_id].properties.isupper; - } - - // Return the isdigit property of the given unichar. - bool get_isdigit(UNICHAR_ID unichar_id) const { - if (INVALID_UNICHAR_ID == unichar_id) return false; - ASSERT_HOST(contains_unichar_id(unichar_id)); - return unichars[unichar_id].properties.isdigit; - } - - // Return the ispunctuation property of the given unichar. - bool get_ispunctuation(UNICHAR_ID unichar_id) const { - if (INVALID_UNICHAR_ID == unichar_id) return false; - ASSERT_HOST(contains_unichar_id(unichar_id)); - return unichars[unichar_id].properties.ispunctuation; - } - - // Return the isngram property of the given unichar. - bool get_isngram(UNICHAR_ID unichar_id) const { - if (INVALID_UNICHAR_ID == unichar_id) return false; - ASSERT_HOST(contains_unichar_id(unichar_id)); - return unichars[unichar_id].properties.isngram; - } - - // Returns whether the unichar id represents a unicode value in the private - // use area. - bool get_isprivate(UNICHAR_ID unichar_id) const; - - // Returns true if the ids have useful min/max top/bottom values. - bool top_bottom_useful() const { - return top_bottom_set_; - } - // Sets all ranges to empty, so they can be expanded to set the values. - void set_ranges_empty(); - // Sets all the properties for this unicharset given a src_unicharset with - // everything set. The unicharsets don't have to be the same, and graphemes - // are correctly accounted for. - void SetPropertiesFromOther(const UNICHARSET& src) { - PartialSetPropertiesFromOther(0, src); - } - // Sets properties from Other, starting only at the given index. - void PartialSetPropertiesFromOther(int start_index, const UNICHARSET& src); - // Expands the tops and bottoms and widths for this unicharset given a - // src_unicharset with ranges in it. The unicharsets don't have to be the - // same, and graphemes are correctly accounted for. - void ExpandRangesFromOther(const UNICHARSET& src); - // Makes this a copy of src. Clears this completely first, so the automattic - // ids will not be present in this if not in src. - void CopyFrom(const UNICHARSET& src); - // For each id in src, if it does not occur in this, add it, as in - // SetPropertiesFromOther, otherwise expand the ranges, as in - // ExpandRangesFromOther. - void AppendOtherUnicharset(const UNICHARSET& src); - // Returns true if the acceptable ranges of the tops of the characters do - // not overlap, making their x-height calculations distinct. - bool SizesDistinct(UNICHAR_ID id1, UNICHAR_ID id2) const; - // Returns the min and max bottom and top of the given unichar in - // baseline-normalized coordinates, ie, where the baseline is - // kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight - // (See normalis.h for the definitions). - void get_top_bottom(UNICHAR_ID unichar_id, - int* min_bottom, int* max_bottom, - int* min_top, int* max_top) const { - if (INVALID_UNICHAR_ID == unichar_id) { - *min_bottom = *min_top = 0; - *max_bottom = *max_top = 256; // kBlnCellHeight - return; - } - ASSERT_HOST(contains_unichar_id(unichar_id)); - *min_bottom = unichars[unichar_id].properties.min_bottom; - *max_bottom = unichars[unichar_id].properties.max_bottom; - *min_top = unichars[unichar_id].properties.min_top; - *max_top = unichars[unichar_id].properties.max_top; - } - void set_top_bottom(UNICHAR_ID unichar_id, - int min_bottom, int max_bottom, - int min_top, int max_top) { - unichars[unichar_id].properties.min_bottom = - ClipToRange(min_bottom, 0, UINT8_MAX); - unichars[unichar_id].properties.max_bottom = - ClipToRange(max_bottom, 0, UINT8_MAX); - unichars[unichar_id].properties.min_top = - ClipToRange(min_top, 0, UINT8_MAX); - unichars[unichar_id].properties.max_top = - ClipToRange(max_top, 0, UINT8_MAX); - } - // Returns the width stats (as mean, sd) of the given unichar relative to the - // median advance of all characters in the character set. - void get_width_stats(UNICHAR_ID unichar_id, - float* width, float* width_sd) const { - if (INVALID_UNICHAR_ID == unichar_id) { - *width = 0.0f; - *width_sd = 0.0f;; - return; - } - ASSERT_HOST(contains_unichar_id(unichar_id)); - *width = unichars[unichar_id].properties.width; - *width_sd = unichars[unichar_id].properties.width_sd; - } - void set_width_stats(UNICHAR_ID unichar_id, float width, float width_sd) { - unichars[unichar_id].properties.width = width; - unichars[unichar_id].properties.width_sd = width_sd; - } - // Returns the stats of the x-bearing (as mean, sd) of the given unichar - // relative to the median advance of all characters in the character set. - void get_bearing_stats(UNICHAR_ID unichar_id, - float* bearing, float* bearing_sd) const { - if (INVALID_UNICHAR_ID == unichar_id) { - *bearing = *bearing_sd = 0.0f; - return; - } - ASSERT_HOST(contains_unichar_id(unichar_id)); - *bearing = unichars[unichar_id].properties.bearing; - *bearing_sd = unichars[unichar_id].properties.bearing_sd; - } - void set_bearing_stats(UNICHAR_ID unichar_id, - float bearing, float bearing_sd) { - unichars[unichar_id].properties.bearing = bearing; - unichars[unichar_id].properties.bearing_sd = bearing_sd; - } - // Returns the stats of the x-advance of the given unichar (as mean, sd) - // relative to the median advance of all characters in the character set. - void get_advance_stats(UNICHAR_ID unichar_id, - float* advance, float* advance_sd) const { - if (INVALID_UNICHAR_ID == unichar_id) { - *advance = *advance_sd = 0; - return; - } - ASSERT_HOST(contains_unichar_id(unichar_id)); - *advance = unichars[unichar_id].properties.advance; - *advance_sd = unichars[unichar_id].properties.advance_sd; - } - void set_advance_stats(UNICHAR_ID unichar_id, - float advance, float advance_sd) { - unichars[unichar_id].properties.advance = advance; - unichars[unichar_id].properties.advance_sd = advance_sd; - } - // Returns true if the font metrics properties are empty. - bool PropertiesIncomplete(UNICHAR_ID unichar_id) const { - return unichars[unichar_id].properties.AnyRangeEmpty(); - } - - // Returns true if the script of the given id is space delimited. - // Returns false for Han and Thai scripts. - bool IsSpaceDelimited(UNICHAR_ID unichar_id) const { - if (INVALID_UNICHAR_ID == unichar_id) return true; - int script_id = get_script(unichar_id); - return script_id != han_sid_ && script_id != thai_sid_ && - script_id != hangul_sid_ && script_id != hiragana_sid_ && - script_id != katakana_sid_; - } - - // Return the script name of the given unichar. - // The returned pointer will always be the same for the same script, it's - // managed by unicharset and thus MUST NOT be deleted - int get_script(UNICHAR_ID unichar_id) const { - if (INVALID_UNICHAR_ID == unichar_id) return null_sid_; - ASSERT_HOST(contains_unichar_id(unichar_id)); - return unichars[unichar_id].properties.script_id; - } - - // Return the character properties, eg. alpha/upper/lower/digit/punct, - // as a bit field of unsigned int. - unsigned int get_properties(UNICHAR_ID unichar_id) const; - - // Return the character property as a single char. If a character has - // multiple attributes, the main property is defined by the following order: - // upper_case : 'A' - // lower_case : 'a' - // alpha : 'x' - // digit : '0' - // punctuation: 'p' - char get_chartype(UNICHAR_ID unichar_id) const; - - // Get other_case unichar id in the properties for the given unichar id. - UNICHAR_ID get_other_case(UNICHAR_ID unichar_id) const { - if (INVALID_UNICHAR_ID == unichar_id) return INVALID_UNICHAR_ID; - ASSERT_HOST(contains_unichar_id(unichar_id)); - return unichars[unichar_id].properties.other_case; - } - - // Returns the direction property of the given unichar. - Direction get_direction(UNICHAR_ID unichar_id) const { - if (INVALID_UNICHAR_ID == unichar_id) return UNICHARSET::U_OTHER_NEUTRAL; - ASSERT_HOST(contains_unichar_id(unichar_id)); - return unichars[unichar_id].properties.direction; - } - - // Get mirror unichar id in the properties for the given unichar id. - UNICHAR_ID get_mirror(UNICHAR_ID unichar_id) const { - if (INVALID_UNICHAR_ID == unichar_id) return INVALID_UNICHAR_ID; - ASSERT_HOST(contains_unichar_id(unichar_id)); - return unichars[unichar_id].properties.mirror; - } - - // Returns UNICHAR_ID of the corresponding lower-case unichar. - UNICHAR_ID to_lower(UNICHAR_ID unichar_id) const { - if (INVALID_UNICHAR_ID == unichar_id) return INVALID_UNICHAR_ID; - ASSERT_HOST(contains_unichar_id(unichar_id)); - if (unichars[unichar_id].properties.islower) return unichar_id; - return unichars[unichar_id].properties.other_case; - } - - // Returns UNICHAR_ID of the corresponding upper-case unichar. - UNICHAR_ID to_upper(UNICHAR_ID unichar_id) const { - if (INVALID_UNICHAR_ID == unichar_id) return INVALID_UNICHAR_ID; - ASSERT_HOST(contains_unichar_id(unichar_id)); - if (unichars[unichar_id].properties.isupper) return unichar_id; - return unichars[unichar_id].properties.other_case; - } - - // Returns true if this UNICHARSET has the special codes in - // SpecialUnicharCodes available. If false then there are normal unichars - // at these codes and they should not be used. - bool has_special_codes() const { - return get_fragment(UNICHAR_BROKEN) != nullptr && - strcmp(id_to_unichar(UNICHAR_BROKEN), - kSpecialUnicharCodes[UNICHAR_BROKEN]) == 0; - } - - // Returns true if there are any repeated unicodes in the normalized - // text of any unichar-id in the unicharset. - bool AnyRepeatedUnicodes() const; - - // Return a pointer to the CHAR_FRAGMENT class if the given - // unichar id represents a character fragment. - const CHAR_FRAGMENT *get_fragment(UNICHAR_ID unichar_id) const { - if (INVALID_UNICHAR_ID == unichar_id) return nullptr; - ASSERT_HOST(contains_unichar_id(unichar_id)); - return unichars[unichar_id].properties.fragment; - } - - // Return the isalpha property of the given unichar representation. - bool get_isalpha(const char* const unichar_repr) const { - return get_isalpha(unichar_to_id(unichar_repr)); - } - - // Return the islower property of the given unichar representation. - bool get_islower(const char* const unichar_repr) const { - return get_islower(unichar_to_id(unichar_repr)); - } - - // Return the isupper property of the given unichar representation. - bool get_isupper(const char* const unichar_repr) const { - return get_isupper(unichar_to_id(unichar_repr)); - } - - // Return the isdigit property of the given unichar representation. - bool get_isdigit(const char* const unichar_repr) const { - return get_isdigit(unichar_to_id(unichar_repr)); - } - - // Return the ispunctuation property of the given unichar representation. - bool get_ispunctuation(const char* const unichar_repr) const { - return get_ispunctuation(unichar_to_id(unichar_repr)); - } - - // Return the character properties, eg. alpha/upper/lower/digit/punct, - // of the given unichar representation - unsigned int get_properties(const char* const unichar_repr) const { - return get_properties(unichar_to_id(unichar_repr)); - } - - char get_chartype(const char* const unichar_repr) const { - return get_chartype(unichar_to_id(unichar_repr)); - } - - // Return the script name of the given unichar representation. - // The returned pointer will always be the same for the same script, it's - // managed by unicharset and thus MUST NOT be deleted - int get_script(const char* const unichar_repr) const { - return get_script(unichar_to_id(unichar_repr)); - } - - // Return a pointer to the CHAR_FRAGMENT class struct if the given - // unichar representation represents a character fragment. - const CHAR_FRAGMENT *get_fragment(const char* const unichar_repr) const { - if (unichar_repr == nullptr || unichar_repr[0] == '\0' || - !ids.contains(unichar_repr, false)) { - return nullptr; - } - return get_fragment(unichar_to_id(unichar_repr)); - } - - // Return the isalpha property of the given unichar representation. - // Only the first length characters from unichar_repr are used. - bool get_isalpha(const char* const unichar_repr, - int length) const { - return get_isalpha(unichar_to_id(unichar_repr, length)); - } - - // Return the islower property of the given unichar representation. - // Only the first length characters from unichar_repr are used. - bool get_islower(const char* const unichar_repr, - int length) const { - return get_islower(unichar_to_id(unichar_repr, length)); - } - - // Return the isupper property of the given unichar representation. - // Only the first length characters from unichar_repr are used. - bool get_isupper(const char* const unichar_repr, - int length) const { - return get_isupper(unichar_to_id(unichar_repr, length)); - } - - // Return the isdigit property of the given unichar representation. - // Only the first length characters from unichar_repr are used. - bool get_isdigit(const char* const unichar_repr, - int length) const { - return get_isdigit(unichar_to_id(unichar_repr, length)); - } - - // Return the ispunctuation property of the given unichar representation. - // Only the first length characters from unichar_repr are used. - bool get_ispunctuation(const char* const unichar_repr, - int length) const { - return get_ispunctuation(unichar_to_id(unichar_repr, length)); - } - - // Returns normalized version of unichar with the given unichar_id. - const char *get_normed_unichar(UNICHAR_ID unichar_id) const { - if (unichar_id == UNICHAR_SPACE) return " "; - return unichars[unichar_id].properties.normed.string(); - } - // Returns a vector of UNICHAR_IDs that represent the ids of the normalized - // version of the given id. There may be more than one UNICHAR_ID in the - // vector if unichar_id represents a ligature. - const GenericVector& normed_ids(UNICHAR_ID unichar_id) const { - return unichars[unichar_id].properties.normed_ids; - } - - // Return the script name of the given unichar representation. - // Only the first length characters from unichar_repr are used. - // The returned pointer will always be the same for the same script, it's - // managed by unicharset and thus MUST NOT be deleted - int get_script(const char* const unichar_repr, - int length) const { - return get_script(unichar_to_id(unichar_repr, length)); - } - - // Return the (current) number of scripts in the script table - int get_script_table_size() const { - return script_table_size_used; - } - - // Return the script string from its id - const char* get_script_from_script_id(int id) const { - if (id >= script_table_size_used || id < 0) - return null_script; - return script_table[id]; - } - - // Returns the id from the name of the script, or 0 if script is not found. - // Note that this is an expensive operation since it involves iteratively - // comparing strings in the script table. To avoid dependency on STL, we - // won't use a hash. Instead, the calling function can use this to lookup - // and save the ID for relevant scripts for fast comparisons later. - int get_script_id_from_name(const char* script_name) const; - - // Return true if the given script is the null script - bool is_null_script(const char* script) const { - return script == null_script; - } - - // Uniquify the given script. For two scripts a and b, if strcmp(a, b) == 0, - // then the returned pointer will be the same. - // The script parameter is copied and thus can be a temporary. - int add_script(const char* script); - - // Return the enabled property of the given unichar. - bool get_enabled(UNICHAR_ID unichar_id) const { - return unichars[unichar_id].properties.enabled; - } - - - int null_sid() const { return null_sid_; } - int common_sid() const { return common_sid_; } - int latin_sid() const { return latin_sid_; } - int cyrillic_sid() const { return cyrillic_sid_; } - int greek_sid() const { return greek_sid_; } - int han_sid() const { return han_sid_; } - int hiragana_sid() const { return hiragana_sid_; } - int katakana_sid() const { return katakana_sid_; } - int thai_sid() const { return thai_sid_; } - int hangul_sid() const { return hangul_sid_; } - int default_sid() const { return default_sid_; } - - // Returns true if the unicharset has the concept of upper/lower case. - bool script_has_upper_lower() const { - return script_has_upper_lower_; - } - // Returns true if the unicharset has the concept of x-height. - // script_has_xheight can be true even if script_has_upper_lower is not, - // when the script has a sufficiently predominant top line with ascenders, - // such as Devanagari and Thai. - bool script_has_xheight() const { - return script_has_xheight_; - } - - private: - - struct UNICHAR_PROPERTIES { - UNICHAR_PROPERTIES(); - // Initializes all properties to sensible default values. - void Init(); - // Sets all ranges wide open. Initialization default in case there are - // no useful values available. - void SetRangesOpen(); - // Sets all ranges to empty. Used before expanding with font-based data. - void SetRangesEmpty(); - // Returns true if any of the top/bottom/width/bearing/advance ranges/stats - // is empty. - bool AnyRangeEmpty() const; - // Expands the ranges with the ranges from the src properties. - void ExpandRangesFrom(const UNICHAR_PROPERTIES& src); - // Copies the properties from src into this. - void CopyFrom(const UNICHAR_PROPERTIES& src); - - bool isalpha; - bool islower; - bool isupper; - bool isdigit; - bool ispunctuation; - bool isngram; - bool enabled; - // Possible limits of the top and bottom of the bounding box in - // baseline-normalized coordinates, ie, where the baseline is - // kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight - // (See normalis.h for the definitions). - uint8_t min_bottom; - uint8_t max_bottom; - uint8_t min_top; - uint8_t max_top; - // Statstics of the widths of bounding box, relative to the median advance. - float width; - float width_sd; - // Stats of the x-bearing and advance, also relative to the median advance. - float bearing; - float bearing_sd; - float advance; - float advance_sd; - int script_id; - UNICHAR_ID other_case; // id of the corresponding upper/lower case unichar - Direction direction; // direction of this unichar - // Mirror property is useful for reverse DAWG lookup for words in - // right-to-left languages (e.g. "(word)" would be in - // '[open paren]' 'w' 'o' 'r' 'd' '[close paren]' in a UTF8 string. - // However, what we want in our DAWG is - // '[open paren]', 'd', 'r', 'o', 'w', '[close paren]' not - // '[close paren]', 'd', 'r', 'o', 'w', '[open paren]'. - UNICHAR_ID mirror; - // A string of unichar_ids that represent the corresponding normed string. - // For awkward characters like em-dash, this gives hyphen. - // For ligatures, this gives the string of normal unichars. - GenericVector normed_ids; - STRING normed; // normalized version of this unichar - // Contains meta information about the fragment if a unichar represents - // a fragment of a character, otherwise should be set to nullptr. - // It is assumed that character fragments are added to the unicharset - // after the corresponding 'base' characters. - CHAR_FRAGMENT *fragment; - }; - - struct UNICHAR_SLOT { - char representation[UNICHAR_LEN + 1]; - UNICHAR_PROPERTIES properties; - }; - - // Internal recursive version of encode_string above. - // str is the start of the whole string. - // str_index is the current position in str. - // str_length is the length of str. - // encoding is a working encoding of str. - // lengths is a working set of lengths of each element of encoding. - // best_total_length is the longest length of str that has been successfully - // encoded so far. - // On return: - // best_encoding contains the encoding that used the longest part of str. - // best_lengths (may be null) contains the lengths of best_encoding. - void encode_string(const char* str, int str_index, int str_length, - GenericVector* encoding, - GenericVector* lengths, - int* best_total_length, - GenericVector* best_encoding, - GenericVector* best_lengths) const; - - // Gets the properties for a grapheme string, combining properties for - // multiple characters in a meaningful way where possible. - // Returns false if no valid match was found in the unicharset. - // NOTE that script_id, mirror, and other_case refer to this unicharset on - // return and will need redirecting if the target unicharset is different. - bool GetStrProperties(const char* utf8_str, - UNICHAR_PROPERTIES* props) const; - - // Load ourselves from a "file" where our only interface to the file is - // an implementation of fgets(). This is the parsing primitive accessed by - // the public routines load_from_file() and load_from_inmemory_file(). - bool load_via_fgets(TessResultCallback2 *fgets_cb, - bool skip_fragments); - - // List of mappings to make when ingesting strings from the outside. - // The substitutions clean up text that should exists for rendering of - // synthetic data, but not in the recognition set. - static const char* kCleanupMaps[][2]; - static TESS_API const char* null_script; - - UNICHAR_SLOT* unichars; - UNICHARMAP ids; - int size_used; - int size_reserved; - char** script_table; - int script_table_size_used; - int script_table_size_reserved; - // True if the unichars have their tops/bottoms set. - bool top_bottom_set_; - // True if the unicharset has significant upper/lower case chars. - bool script_has_upper_lower_; - // True if the unicharset has a significant mean-line with significant - // ascenders above that. - bool script_has_xheight_; - // True if the set contains chars that would be changed by the cleanup. - bool old_style_included_; - - // A few convenient script name-to-id mapping without using hash. - // These are initialized when unicharset file is loaded. Anything - // missing from this list can be looked up using get_script_id_from_name. - int null_sid_; - int common_sid_; - int latin_sid_; - int cyrillic_sid_; - int greek_sid_; - int han_sid_; - int hiragana_sid_; - int katakana_sid_; - int thai_sid_; - int hangul_sid_; - // The most frequently occurring script in the charset. - int default_sid_; -}; - -#endif // TESSERACT_CCUTIL_UNICHARSET_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicity_table.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicity_table.h deleted file mode 100644 index 3aef73fc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicity_table.h +++ /dev/null @@ -1,208 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: unicity_table.h -// Description: a class to uniquify objects, manipulating them using integers -// ids. -// Author: Samuel Charron -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_UNICITY_TABLE_H_ -#define TESSERACT_CCUTIL_UNICITY_TABLE_H_ - -#include "tesscallback.h" -#include "errcode.h" -#include "genericvector.h" - -// A class to uniquify objects, manipulating them using integers ids. -// T requirements: -// operator= to add an element -// default-constructible: allocating the internal table will call the default -// constructor. -template -class UnicityTable { - public: - UnicityTable(); - /// Clear the structures and deallocate internal structures. - ~UnicityTable(); - - /// Reserve some memory. If there is size or more elements, the table will - /// then allocate size * 2 elements. - void reserve(int size); - - /// Return the size used. - int size() const; - - /// Return the object from an id. - const T &get(int id) const; - - // Return the pointer to an object with the given id. - T *get_mutable(int id); - - /// Return the id of the T object. - /// This method NEEDS a compare_callback to be passed to - /// set_compare_callback. - int get_id(T object) const; - - /// Return true if T is in the table - bool contains(T object) const; - - /// Return true if the id is valid - T contains_id(int id) const; - - /// Add an element in the table - int push_back(T object); - - /// Add a callback to be called to delete the elements when the table took - /// their ownership. - void set_clear_callback(TessCallback1* cb); - - /// Add a callback to be called to compare the elements when needed (contains, - /// get_id, ...) - void set_compare_callback(TessResultCallback2* cb); - - /// Clear the table, calling the callback function if any. - /// All the owned Callbacks are also deleted. - /// If you don't want the Callbacks to be deleted, before calling clear, set - /// the callback to nullptr. - void clear(); - - /// This method clear the current object, then, does a shallow copy of - /// its argument, and finally invalidate its argument. - void move(UnicityTable* from); - - /// Read/Write the table to a file. This does _NOT_ read/write the callbacks. - /// The Callback given must be permanent since they will be called more than - /// once. The given callback will be deleted at the end. - /// Returns false on read/write error. - bool write(FILE* f, TessResultCallback2* cb) const; - bool read(tesseract::TFile* f, - TessResultCallback2* cb); - - private: - GenericVector table_; - // Mutable because Run method is not const - mutable TessResultCallback2* compare_cb_; -}; - -template -class UnicityTableEqEq : public UnicityTable { - public: - UnicityTableEqEq() { - UnicityTable::set_compare_callback( - NewPermanentTessCallback(tesseract::cmp_eq)); - } -}; - -template -UnicityTable::UnicityTable() : - compare_cb_(nullptr) { -} - - -template -UnicityTable::~UnicityTable() { - clear(); -} - -template -int UnicityTable::size() const{ - return table_.size(); -} - -// Reserve some memory. If there is size or more elements, the table will -// then allocate size * 2 elements. -template -void UnicityTable::reserve(int size) { - table_.reserve(size); -} - -// Return the object from an id. -template -const T &UnicityTable::get(int id) const { - return table_.get(id); -} -// Returns the pointer to the object with the given id. -template -T *UnicityTable::get_mutable(int id) { - return &(table_.get(id)); -} -// Return true if the id is valid -template -T UnicityTable::contains_id(int id) const { - return table_.contains_index(id); -} - -// Return the id of the T object. -template -int UnicityTable::get_id(T object) const { - return table_.get_index(object); -} - -// Return true if T is in the table -template -bool UnicityTable::contains(T object) const { - return get_id(object) != -1; -} - -// Add an element in the table -template -int UnicityTable::push_back(T object) { - int idx = get_id(object); - if (idx == -1) { - idx = table_.push_back(object); - } - return idx; -} - -// Add a callback to be called to delete the elements when the table took -// their ownership. -template -void UnicityTable::set_clear_callback(TessCallback1* cb) { - table_.set_clear_callback(cb); -} - -// Add a callback to be called to delete the elements when the table took -// their ownership. -template -void UnicityTable::set_compare_callback(TessResultCallback2* cb) { - table_.set_compare_callback(cb); - compare_cb_ = cb; -} - -// Clear the table, calling the callback function if any. -template -void UnicityTable::clear() { - table_.clear(); -} - -template -bool UnicityTable::write( - FILE* f, TessResultCallback2* cb) const { - return table_.write(f, cb); -} - -template -bool UnicityTable::read( - tesseract::TFile* f, TessResultCallback2* cb) { - return table_.read(f, cb); -} - -// This method clear the current object, then, does a shallow copy of -// its argument, and finally invalidate its argument. -template -void UnicityTable::move(UnicityTable* from) { - table_.move(&from->table_); -} - -#endif // TESSERACT_CCUTIL_UNICITY_TABLE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicodes.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicodes.cpp deleted file mode 100644 index a5c7b8bd..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicodes.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/********************************************************************** - * File: unicodes.cpp - * Description: Unicode related machinery - * Author: David Eger - * Created: Wed Jun 15 16:37:50 PST 2011 - * - * (C) Copyright 2011, Google, Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "unicodes.h" -#include "host.h" // for nullptr - -namespace tesseract { - -const char *kUTF8LineSeparator = "\u2028"; // "\xe2\x80\xa8"; -const char *kUTF8ParagraphSeparator = "\u2029"; // "\xe2\x80\xa9"; -const char *kLRM = "\u200E"; // Left-to-Right Mark -const char *kRLM = "\u200F"; // Right-to-Left Mark -const char *kRLE = "\u202A"; // Right-to-Left Embedding -const char *kPDF = "\u202C"; // Pop Directional Formatting - -const char *kHyphenLikeUTF8[] = { - "-", // ASCII hyphen-minus - "\u05BE", // word hyphen in hybrew - "\u2010", // hyphen - "\u2011", // non-breaking hyphen - "\u2012", // a hyphen the same width as digits - "\u2013", // en dash - "\u2014", // em dash - "\u2015", // horizontal bar - "\u2212", // arithmetic minus sign - "\uFE58", // small em dash - "\uFE63", // small hyphen-minus - "\uFF0D", // fullwidth hyphen-minus - nullptr, // end of our list -}; - -const char *kApostropheLikeUTF8[] = { - "'", // ASCII apostrophe - "`", // ASCII backtick - "\u2018", // opening single quote - "\u2019", // closing single quote - "\u2032", // mathematical prime mark - nullptr, // end of our list. -}; - -} // namespace diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicodes.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicodes.h deleted file mode 100644 index 7bab9b00..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/unicodes.h +++ /dev/null @@ -1,39 +0,0 @@ -/********************************************************************** - * File: unicodes.h - * Description: Unicode related machinery - * Author: David Eger - * Created: Wed Jun 15 16:37:50 PST 2011 - * - * (C) Copyright 2011, Google, Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_CCUTIL_UNICODES_H_ -#define TESSERACT_CCUTIL_UNICODES_H_ - -namespace tesseract { - -extern const char *kUTF8LineSeparator; -extern const char *kUTF8ParagraphSeparator; -extern const char *kLRM; //< Left-to-Right Mark -extern const char *kRLM; //< Right-to-Left Mark -extern const char *kRLE; //< Right-to-Left Embedding -extern const char *kPDF; //< Pop Directional Formatting - -/// The following are confusable internal word punctuation symbols -/// which we normalize to the first variant when matching in dawgs. -extern const char *kHyphenLikeUTF8[]; -extern const char *kApostropheLikeUTF8[]; - -} // namespace - -#endif // TESSERACT_CCUTIL_UNICODES_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/universalambigs.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/universalambigs.cpp deleted file mode 100644 index aa03c845..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/universalambigs.cpp +++ /dev/null @@ -1,19021 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: universalambigs.cpp -// Description: Data for a universal ambigs file that is useful for -// any language. -// Author: Ray Smith -// Created: Mon Mar 18 11:26:00 PDT 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -namespace tesseract { - -extern const char kUniversalAmbigsFile[] = { - '\166', '\062', '\012', '\047', '\047', '\040', '\042', '\040', '\061', - '\012', '\140', '\047', '\040', '\042', '\040', '\061', '\012', '\047', - '\140', '\040', '\042', '\040', '\061', '\012', '\342', '\200', '\230', - '\047', '\040', '\042', '\040', '\061', '\012', '\047', '\342', '\200', - '\230', '\040', '\042', '\040', '\061', '\012', '\342', '\200', '\231', - '\047', '\040', '\042', '\040', '\061', '\012', '\047', '\342', '\200', - '\231', '\040', '\042', '\040', '\061', '\012', '\140', '\140', '\040', - '\042', '\040', '\061', '\012', '\140', '\342', '\200', '\230', '\040', - '\042', '\040', '\061', '\012', '\342', '\200', '\230', '\140', '\040', - '\042', '\040', '\061', '\012', '\140', '\342', '\200', '\231', '\040', - '\042', '\040', '\061', '\012', '\342', '\200', '\231', '\140', '\040', - '\042', '\040', '\061', '\012', '\342', '\200', '\230', '\342', '\200', - '\230', '\040', '\342', '\200', '\234', '\040', '\061', '\012', '\342', - '\200', '\230', '\342', '\200', '\231', '\040', '\042', '\040', '\061', - '\012', '\342', '\200', '\231', '\342', '\200', '\230', '\040', '\042', - '\040', '\061', '\012', '\342', '\200', '\231', '\342', '\200', '\231', - '\040', '\342', '\200', '\235', '\040', '\061', '\012', '\054', '\054', - '\040', '\342', '\200', '\236', '\040', '\061', '\012', '\155', '\040', - '\162', '\156', '\040', '\060', '\012', '\162', '\156', '\040', '\155', - '\040', '\060', '\012', '\155', '\040', '\151', '\156', '\040', '\060', - '\012', '\151', '\156', '\040', '\155', '\040', '\060', '\012', '\144', - '\040', '\143', '\154', '\040', '\060', '\012', '\143', '\154', '\040', - '\144', '\040', '\060', '\012', '\156', '\156', '\040', '\162', '\155', - '\040', '\060', '\012', '\162', '\155', '\040', '\156', '\156', '\040', - '\060', '\012', '\156', '\040', '\162', '\151', '\040', '\060', '\012', - '\162', '\151', '\040', '\156', '\040', '\060', '\012', '\154', '\151', - '\040', '\150', '\040', '\060', '\012', '\154', '\162', '\040', '\150', - '\040', '\060', '\012', '\151', '\151', '\040', '\165', '\040', '\060', - '\012', '\151', '\151', '\040', '\156', '\040', '\060', '\012', '\156', - '\151', '\040', '\155', '\040', '\060', '\012', '\151', '\151', '\151', - '\040', '\155', '\040', '\060', '\012', '\154', '\154', '\040', '\110', - '\040', '\060', '\012', '\111', '\055', '\111', '\040', '\110', '\040', - '\060', '\012', '\166', '\166', '\040', '\167', '\040', '\060', '\012', - '\126', '\126', '\040', '\127', '\040', '\060', '\012', '\164', '\040', - '\146', '\040', '\060', '\012', '\146', '\040', '\164', '\040', '\060', - '\012', '\141', '\040', '\157', '\040', '\060', '\012', '\157', '\040', - '\141', '\040', '\060', '\012', '\145', '\040', '\143', '\040', '\060', - '\012', '\143', '\040', '\145', '\040', '\060', '\012', '\162', '\162', - '\040', '\156', '\040', '\060', '\012', '\105', '\040', '\146', '\151', - '\040', '\060', '\012', '\154', '\074', '\040', '\153', '\040', '\060', - '\012', '\154', '\144', '\040', '\153', '\151', '\040', '\060', '\012', - '\154', '\170', '\040', '\150', '\040', '\060', '\012', '\170', '\156', - '\040', '\155', '\040', '\060', '\012', '\165', '\170', '\040', '\151', - '\156', '\040', '\060', '\012', '\162', '\040', '\164', '\040', '\060', - '\012', '\144', '\040', '\164', '\154', '\040', '\060', '\012', '\144', - '\151', '\040', '\164', '\150', '\040', '\060', '\012', '\165', '\162', - '\040', '\151', '\156', '\040', '\060', '\012', '\165', '\156', '\040', - '\151', '\155', '\040', '\060', '\012', '\165', '\040', '\141', '\040', - '\060', '\012', '\157', '\040', '\303', '\263', '\040', '\060', '\012', - '\303', '\263', '\040', '\157', '\040', '\060', '\012', '\151', '\040', - '\303', '\255', '\040', '\060', '\012', '\303', '\255', '\040', '\151', - '\040', '\060', '\012', '\141', '\040', '\303', '\241', '\040', '\060', - '\012', '\303', '\241', '\040', '\141', '\040', '\060', '\012', '\145', - '\040', '\303', '\251', '\040', '\060', '\012', '\303', '\251', '\040', - '\145', '\040', '\060', '\012', '\165', '\040', '\303', '\272', '\040', - '\060', '\012', '\303', '\272', '\040', '\165', '\040', '\060', '\012', - '\156', '\040', '\303', '\261', '\040', '\060', '\012', '\303', '\261', - '\040', '\156', '\040', '\060', '\012', '\060', '\040', '\157', '\040', - '\060', '\012', '\144', '\040', '\164', '\162', '\040', '\060', '\012', - '\156', '\040', '\164', '\162', '\040', '\060', '\012', '\303', '\261', - '\040', '\146', '\151', '\040', '\060', '\012', '\165', '\040', '\164', - '\151', '\040', '\060', '\012', '\303', '\261', '\040', '\164', '\151', - '\040', '\060', '\012', '\144', '\040', '\164', '\151', '\040', '\060', - '\012', '\144', '\040', '\164', '\303', '\255', '\040', '\060', '\012', - '\144', '\040', '\162', '\303', '\255', '\040', '\060', '\012', '\141', - '\040', '\303', '\240', '\040', '\060', '\012', '\145', '\040', '\303', - '\250', '\040', '\060', '\012', '\156', '\040', '\151', '\152', '\040', - '\060', '\012', '\147', '\040', '\151', '\152', '\040', '\060', '\012', - '\157', '\040', '\303', '\262', '\040', '\060', '\012', '\105', '\040', - '\303', '\211', '\040', '\060', '\012', '\105', '\040', '\303', '\210', - '\040', '\060', '\012', '\165', '\040', '\303', '\274', '\040', '\060', - '\012', '\170', '\156', '\105', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\131', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\164', '\105', '\040', '\156', '\164', '\040', '\061', - '\012', '\124', '\154', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\170', '\116', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\152', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\160', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\131', '\162', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\141', '\161', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\166', '\112', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\142', '\114', '\040', '\142', '\145', '\040', '\061', - '\012', '\116', '\166', '\153', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\112', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\170', '\103', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\165', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\172', '\164', '\040', '\164', '\141', '\040', '\061', - '\012', '\161', '\113', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\163', '\143', '\112', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\130', '\160', '\040', '\160', '\157', '\040', '\061', - '\012', '\126', '\161', '\151', '\040', '\164', '\151', '\040', '\061', - '\012', '\125', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\112', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\131', '\153', '\144', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\160', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\151', '\102', '\166', '\040', '\164', '\151', '\040', '\061', - '\012', '\172', '\122', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\124', '\155', '\040', '\155', '\151', '\040', '\061', - '\012', '\155', '\113', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\126', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\164', '\160', '\040', '\164', '\151', '\040', '\061', - '\012', '\155', '\166', '\104', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\104', '\161', '\040', '\155', '\145', '\040', '\061', - '\012', '\152', '\170', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\102', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\157', '\111', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\166', '\143', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\103', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\101', '\157', '\040', '\166', '\157', '\040', '\061', - '\012', '\161', '\165', '\102', '\040', '\164', '\165', '\040', '\061', - '\012', '\142', '\164', '\126', '\040', '\164', '\151', '\040', '\061', - '\012', '\114', '\155', '\143', '\040', '\155', '\145', '\040', '\061', - '\012', '\164', '\126', '\167', '\040', '\164', '\151', '\040', '\061', - '\012', '\131', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\110', '\170', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\144', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\131', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\120', '\152', '\040', '\164', '\165', '\040', '\061', - '\012', '\146', '\124', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\122', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\144', '\101', '\040', '\144', '\151', '\040', '\061', - '\012', '\152', '\172', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\170', '\114', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\147', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\166', '\147', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\152', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\165', '\126', '\040', '\164', '\165', '\040', '\061', - '\012', '\163', '\127', '\153', '\040', '\153', '\165', '\040', '\061', - '\012', '\120', '\147', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\110', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\153', '\125', '\040', '\153', '\165', '\040', '\061', - '\012', '\147', '\166', '\107', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\144', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\126', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\121', '\147', '\144', '\040', '\144', '\151', '\040', '\061', - '\012', '\172', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\161', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\163', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\146', '\116', '\040', '\144', '\151', '\040', '\061', - '\012', '\144', '\147', '\127', '\040', '\144', '\151', '\040', '\061', - '\012', '\167', '\116', '\162', '\040', '\162', '\151', '\040', '\061', - '\012', '\172', '\166', '\103', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\131', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\110', '\171', '\040', '\164', '\165', '\040', '\061', - '\012', '\164', '\116', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\170', '\112', '\040', '\154', '\151', '\040', '\061', - '\012', '\110', '\142', '\153', '\040', '\153', '\165', '\040', '\061', - '\012', '\170', '\163', '\107', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\123', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\106', '\142', '\040', '\142', '\165', '\040', '\061', - '\012', '\116', '\164', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\102', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\153', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\126', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\152', '\124', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\166', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\157', '\132', '\146', '\040', '\164', '\157', '\040', '\061', - '\012', '\153', '\143', '\125', '\040', '\153', '\157', '\040', '\061', - '\012', '\146', '\106', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\130', '\142', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\113', '\161', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\122', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\166', '\112', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\156', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\170', '\115', '\040', '\160', '\157', '\040', '\061', - '\012', '\145', '\102', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\112', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\156', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\141', '\103', '\161', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\110', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\146', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\161', '\156', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\123', '\163', '\040', '\151', '\163', '\040', '\061', - '\012', '\163', '\102', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\106', '\150', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\116', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\115', '\166', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\110', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\114', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\147', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\170', '\127', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\144', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\162', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\105', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\150', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\106', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\126', '\143', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\115', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\124', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\101', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\151', '\115', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\154', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\142', '\120', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\126', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\145', '\121', '\154', '\040', '\164', '\145', '\040', '\061', - '\012', '\163', '\127', '\142', '\040', '\163', '\164', '\040', '\061', - '\012', '\102', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\130', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\125', '\143', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\117', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\110', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\116', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\106', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\154', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\155', '\132', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\122', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\146', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\170', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\131', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\164', '\146', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\144', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\121', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\144', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\116', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\153', '\106', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\152', '\123', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\120', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\116', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\126', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\112', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\160', '\110', '\040', '\160', '\157', '\040', '\061', - '\012', '\170', '\161', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\126', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\102', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\155', '\102', '\040', '\156', '\164', '\040', '\061', - '\012', '\172', '\143', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\146', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\146', '\117', '\040', '\155', '\145', '\040', '\061', - '\012', '\131', '\150', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\132', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\155', '\172', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\122', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\104', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\147', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\165', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\142', '\127', '\040', '\145', '\162', '\040', '\061', - '\012', '\112', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\166', '\152', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\143', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\147', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\103', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\127', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\167', '\127', '\040', '\167', '\141', '\040', '\061', - '\012', '\112', '\153', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\107', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\142', '\110', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\124', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\145', '\103', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\126', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\157', '\121', '\040', '\160', '\157', '\040', '\061', - '\012', '\161', '\164', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\166', '\147', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\101', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\146', '\127', '\040', '\155', '\145', '\040', '\061', - '\012', '\164', '\147', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\146', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\131', '\150', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\160', '\113', '\040', '\160', '\162', '\040', '\061', - '\012', '\112', '\172', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\121', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\152', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\170', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\120', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\116', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\166', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\107', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\165', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\166', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\131', '\145', '\040', '\164', '\145', '\040', '\061', - '\012', '\146', '\132', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\131', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\150', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\170', '\131', '\040', '\146', '\157', '\040', '\061', - '\012', '\171', '\120', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\146', '\107', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\155', '\124', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\146', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\170', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\172', '\101', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\141', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\142', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\153', '\126', '\144', '\040', '\153', '\141', '\040', '\061', - '\012', '\130', '\152', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\153', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\121', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\150', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\166', '\152', '\040', '\166', '\141', '\040', '\061', - '\012', '\126', '\142', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\160', '\116', '\040', '\160', '\162', '\040', '\061', - '\012', '\160', '\153', '\107', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\114', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\112', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\167', '\112', '\040', '\167', '\141', '\040', '\061', - '\012', '\132', '\162', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\144', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\127', '\147', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\120', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\147', '\116', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\110', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\124', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\104', '\166', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\155', '\125', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\150', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\103', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\167', '\126', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\166', '\114', '\040', '\166', '\141', '\040', '\061', - '\012', '\156', '\107', '\146', '\040', '\156', '\164', '\040', '\061', - '\012', '\152', '\152', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\125', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\127', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\170', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\115', '\161', '\156', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\166', '\127', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\127', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\144', '\117', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\116', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\117', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\114', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\102', '\171', '\040', '\146', '\157', '\040', '\061', - '\012', '\156', '\125', '\152', '\040', '\156', '\164', '\040', '\061', - '\012', '\154', '\124', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\154', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\162', '\122', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\130', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\145', '\126', '\167', '\040', '\166', '\145', '\040', '\061', - '\012', '\172', '\127', '\156', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\112', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\115', '\147', '\171', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\144', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\150', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\155', '\113', '\040', '\155', '\145', '\040', '\061', - '\012', '\123', '\163', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\127', '\154', '\040', '\163', '\172', '\040', '\061', - '\012', '\151', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\152', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\152', '\102', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\113', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\166', '\111', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\143', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\153', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\125', '\145', '\040', '\164', '\145', '\040', '\061', - '\012', '\154', '\125', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\102', '\147', '\040', '\156', '\164', '\040', '\061', - '\012', '\144', '\110', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\127', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\165', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\160', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\157', '\126', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\102', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\124', '\144', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\146', '\126', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\147', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\143', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\143', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\153', '\101', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\121', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\160', '\106', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\102', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\120', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\155', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\127', '\146', '\040', '\166', '\145', '\040', '\061', - '\012', '\152', '\132', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\110', '\167', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\171', '\111', '\040', '\156', '\171', '\040', '\061', - '\012', '\132', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\147', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\117', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\153', '\112', '\040', '\153', '\157', '\040', '\061', - '\012', '\144', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\142', '\127', '\040', '\151', '\163', '\040', '\061', - '\012', '\172', '\115', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\112', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\115', '\143', '\040', '\153', '\157', '\040', '\061', - '\012', '\172', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\121', '\153', '\040', '\166', '\141', '\040', '\061', - '\012', '\145', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\106', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\107', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\132', '\170', '\040', '\170', '\145', '\040', '\061', - '\012', '\161', '\166', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\153', '\131', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\162', '\110', '\040', '\145', '\162', '\040', '\061', - '\012', '\127', '\162', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\152', '\105', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\152', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\114', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\147', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\167', '\111', '\040', '\167', '\141', '\040', '\061', - '\012', '\151', '\104', '\167', '\040', '\164', '\151', '\040', '\061', - '\012', '\102', '\164', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\120', '\172', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\106', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\121', '\171', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\102', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\144', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\164', '\166', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\152', '\117', '\040', '\154', '\145', '\040', '\061', - '\012', '\116', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\144', '\117', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\172', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\164', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\146', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\132', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\143', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\166', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\110', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\142', '\115', '\040', '\142', '\145', '\040', '\061', - '\012', '\156', '\127', '\147', '\040', '\156', '\164', '\040', '\061', - '\012', '\131', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\130', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\170', '\113', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\142', '\121', '\040', '\142', '\145', '\040', '\061', - '\012', '\127', '\166', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\114', '\147', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\164', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\122', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\156', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\155', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\144', '\166', '\120', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\152', '\111', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\166', '\132', '\040', '\166', '\141', '\040', '\061', - '\012', '\103', '\167', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\125', '\171', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\106', '\146', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\157', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\150', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\127', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\162', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\145', '\117', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\167', '\132', '\040', '\142', '\145', '\040', '\061', - '\012', '\144', '\156', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\107', '\142', '\167', '\040', '\142', '\145', '\040', '\061', - '\012', '\170', '\107', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\156', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\160', '\116', '\040', '\160', '\162', '\040', '\061', - '\012', '\144', '\172', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\102', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\160', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\124', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\167', '\120', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\144', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\167', '\130', '\040', '\167', '\141', '\040', '\061', - '\012', '\125', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\113', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\144', '\106', '\040', '\144', '\145', '\040', '\061', - '\012', '\112', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\172', '\117', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\124', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\152', '\120', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\124', '\156', '\040', '\156', '\147', '\040', '\061', - '\012', '\107', '\164', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\147', '\101', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\144', '\114', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\172', '\117', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\150', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\155', '\160', '\040', '\155', '\145', '\040', '\061', - '\012', '\121', '\144', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\142', '\112', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\122', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\163', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\147', '\111', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\150', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\167', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\170', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\103', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\143', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\113', '\170', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\131', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\121', '\164', '\040', '\145', '\162', '\040', '\061', - '\012', '\132', '\170', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\144', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\167', '\110', '\040', '\144', '\145', '\040', '\061', - '\012', '\131', '\155', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\126', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\166', '\154', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\110', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\152', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\115', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\172', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\143', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\117', '\141', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\131', '\160', '\040', '\164', '\151', '\040', '\061', - '\012', '\166', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\157', '\126', '\040', '\162', '\157', '\040', '\061', - '\012', '\146', '\132', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\161', '\121', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\144', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\127', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\153', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\160', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\107', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\167', '\117', '\040', '\145', '\162', '\040', '\061', - '\012', '\121', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\147', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\163', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\141', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\152', '\114', '\040', '\151', '\152', '\040', '\061', - '\012', '\131', '\143', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\156', '\120', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\127', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\171', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\122', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\165', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\152', '\102', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\162', '\124', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\167', '\112', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\126', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\166', '\127', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\132', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\162', '\107', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\163', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\166', '\163', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\114', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\103', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\166', '\126', '\040', '\144', '\145', '\040', '\061', - '\012', '\120', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\113', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\112', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\167', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\167', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\107', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\127', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\160', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\113', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\127', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\152', '\155', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\160', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\172', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\132', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\155', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\116', '\153', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\160', '\115', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\167', '\110', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\110', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\172', '\103', '\040', '\152', '\157', '\040', '\061', - '\012', '\157', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\130', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\105', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\165', '\127', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\166', '\124', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\163', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\123', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\113', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\171', '\145', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\110', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\103', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\155', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\165', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\144', '\110', '\040', '\144', '\145', '\040', '\061', - '\012', '\120', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\144', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\116', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\172', '\116', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\152', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\150', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\160', '\112', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\124', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\114', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\147', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\121', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\122', '\152', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\150', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\103', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\142', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\170', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\126', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\153', '\131', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\120', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\121', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\117', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\126', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\155', '\125', '\040', '\155', '\145', '\040', '\061', - '\012', '\165', '\106', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\141', '\132', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\107', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\147', '\111', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\124', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\166', '\103', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\107', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\116', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\116', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\120', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\112', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\144', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\167', '\131', '\040', '\164', '\151', '\040', '\061', - '\012', '\116', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\124', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\172', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\152', '\101', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\166', '\110', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\114', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\127', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\121', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\166', '\131', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\114', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\161', '\172', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\170', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\172', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\126', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\132', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\160', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\110', '\170', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\164', '\125', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\113', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\151', '\107', '\170', '\040', '\164', '\151', '\040', '\061', - '\012', '\170', '\166', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\170', '\101', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\152', '\110', '\040', '\163', '\164', '\040', '\061', - '\012', '\107', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\147', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\104', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\132', '\156', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\146', '\125', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\165', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\121', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\150', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\114', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\144', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\162', '\132', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\104', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\163', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\113', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\127', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\126', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\155', '\143', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\104', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\101', '\157', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\172', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\130', '\162', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\162', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\155', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\156', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\150', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\161', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\127', '\156', '\040', '\144', '\145', '\040', '\061', - '\012', '\127', '\155', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\122', '\147', '\171', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\166', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\151', '\131', '\040', '\164', '\151', '\040', '\061', - '\012', '\170', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\112', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\157', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\131', '\144', '\156', '\040', '\144', '\145', '\040', '\061', - '\012', '\116', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\155', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\170', '\132', '\040', '\170', '\145', '\040', '\061', - '\012', '\130', '\144', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\156', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\116', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\127', '\156', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\167', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\127', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\121', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\126', '\170', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\170', '\167', '\107', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\155', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\160', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\107', '\171', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\170', '\172', '\101', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\107', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\161', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\150', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\120', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\152', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\126', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\167', '\124', '\040', '\145', '\162', '\040', '\061', - '\012', '\126', '\150', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\156', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\103', '\160', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\116', '\155', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\156', '\117', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\127', '\143', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\117', '\156', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\154', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\156', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\164', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\147', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\127', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\101', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\132', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\172', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\116', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\124', '\153', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\165', '\131', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\143', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\116', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\110', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\112', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\152', '\104', '\040', '\151', '\152', '\040', '\061', - '\012', '\116', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\150', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\130', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\156', '\167', '\102', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\172', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\121', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\126', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\154', '\125', '\040', '\154', '\145', '\040', '\061', - '\012', '\114', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\130', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\102', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\111', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\152', '\126', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\170', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\172', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\104', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\165', '\121', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\107', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\142', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\125', '\157', '\040', '\153', '\141', '\040', '\061', - '\012', '\144', '\126', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\104', '\144', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\161', '\103', '\040', '\166', '\157', '\040', '\061', - '\012', '\152', '\153', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\114', '\166', '\172', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\120', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\146', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\150', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\150', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\103', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\146', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\167', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\106', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\160', '\166', '\125', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\150', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\124', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\154', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\172', '\114', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\161', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\164', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\150', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\152', '\102', '\040', '\151', '\152', '\040', '\061', - '\012', '\151', '\124', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\114', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\122', '\161', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\152', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\152', '\111', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\107', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\156', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\121', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\165', '\166', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\147', '\145', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\112', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\144', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\104', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\167', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\116', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\167', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\122', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\126', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\156', '\113', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\124', '\147', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\131', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\102', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\152', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\101', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\155', '\113', '\040', '\155', '\145', '\040', '\061', - '\012', '\167', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\152', '\124', '\040', '\166', '\141', '\040', '\061', - '\012', '\114', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\156', '\103', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\172', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\107', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\153', '\120', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\126', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\127', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\160', '\131', '\040', '\167', '\141', '\040', '\061', - '\012', '\154', '\106', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\167', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\127', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\152', '\124', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\106', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\111', '\160', '\040', '\151', '\156', '\040', '\061', - '\012', '\164', '\142', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\161', '\143', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\153', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\145', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\120', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\167', '\114', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\110', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\170', '\167', '\120', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\166', '\102', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\123', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\172', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\131', '\160', '\040', '\167', '\141', '\040', '\061', - '\012', '\144', '\104', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\102', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\116', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\125', '\142', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\130', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\122', '\154', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\102', '\172', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\166', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\154', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\167', '\131', '\040', '\155', '\145', '\040', '\061', - '\012', '\167', '\150', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\172', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\101', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\104', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\152', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\165', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\102', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\114', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\146', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\120', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\157', '\116', '\040', '\157', '\156', '\040', '\061', - '\012', '\131', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\114', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\126', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\162', '\105', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\147', '\120', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\120', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\165', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\132', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\156', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\147', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\154', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\170', '\101', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\114', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\156', '\104', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\130', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\146', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\167', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\127', '\144', '\040', '\144', '\157', '\040', '\061', - '\012', '\170', '\156', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\117', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\114', '\153', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\116', '\166', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\111', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\153', '\113', '\040', '\153', '\141', '\040', '\061', - '\012', '\162', '\115', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\155', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\120', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\101', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\121', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\110', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\120', '\155', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\172', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\124', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\132', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\113', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\107', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\115', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\116', '\146', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\146', '\101', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\144', '\110', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\154', '\170', '\110', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\161', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\152', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\111', '\171', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\157', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\150', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\115', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\172', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\162', '\122', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\116', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\141', '\120', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\167', '\107', '\040', '\167', '\141', '\040', '\061', - '\012', '\103', '\155', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\163', '\166', '\113', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\162', '\117', '\040', '\145', '\162', '\040', '\061', - '\012', '\125', '\150', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\120', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\124', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\172', '\110', '\040', '\163', '\172', '\040', '\061', - '\012', '\111', '\157', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\146', '\121', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\132', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\156', '\161', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\120', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\124', '\172', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\156', '\122', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\146', '\112', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\171', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\114', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\152', '\120', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\155', '\122', '\040', '\155', '\145', '\040', '\061', - '\012', '\145', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\171', '\124', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\152', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\163', '\110', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\167', '\102', '\040', '\166', '\141', '\040', '\061', - '\012', '\131', '\156', '\162', '\040', '\141', '\156', '\040', '\061', - '\012', '\124', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\103', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\160', '\102', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\130', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\150', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\131', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\104', '\160', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\147', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\146', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\112', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\120', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\141', '\117', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\106', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\142', '\104', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\113', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\110', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\150', '\162', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\114', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\141', '\131', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\103', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\144', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\167', '\111', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\114', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\115', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\113', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\115', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\143', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\152', '\102', '\040', '\151', '\152', '\040', '\061', - '\012', '\115', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\170', '\127', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\132', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\146', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\167', '\104', '\040', '\167', '\141', '\040', '\061', - '\012', '\154', '\150', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\126', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\146', '\127', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\170', '\120', '\040', '\154', '\145', '\040', '\061', - '\012', '\131', '\171', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\120', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\125', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\144', '\117', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\122', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\130', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\147', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\101', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\167', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\113', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\166', '\114', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\127', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\162', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\114', '\160', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\113', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\103', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\167', '\110', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\166', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\125', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\157', '\120', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\164', '\152', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\102', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\160', '\111', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\172', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\132', '\163', '\040', '\157', '\156', '\040', '\061', - '\012', '\160', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\113', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\143', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\146', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\163', '\166', '\115', '\040', '\166', '\141', '\040', '\061', - '\012', '\126', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\126', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\127', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\130', '\160', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\143', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\114', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\104', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\152', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\144', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\113', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\156', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\124', '\143', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\147', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\132', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\112', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\131', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\153', '\146', '\115', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\113', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\115', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\147', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\107', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\123', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\104', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\124', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\122', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\117', '\141', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\165', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\147', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\122', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\167', '\104', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\130', '\163', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\154', '\103', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\155', '\110', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\170', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\166', '\164', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\127', '\155', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\126', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\152', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\120', '\170', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\131', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\167', '\147', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\166', '\163', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\110', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\172', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\152', '\111', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\126', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\170', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\120', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\130', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\172', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\170', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\146', '\102', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\120', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\142', '\160', '\103', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\106', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\167', '\111', '\040', '\144', '\145', '\040', '\061', - '\012', '\124', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\132', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\117', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\112', '\146', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\151', '\132', '\162', '\040', '\151', '\156', '\040', '\061', - '\012', '\126', '\170', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\114', '\160', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\110', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\106', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\143', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\115', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\171', '\125', '\040', '\156', '\171', '\040', '\061', - '\012', '\155', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\112', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\113', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\115', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\110', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\145', '\111', '\152', '\040', '\164', '\145', '\040', '\061', - '\012', '\126', '\144', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\103', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\125', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\132', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\116', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\131', '\162', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\113', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\104', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\156', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\114', '\163', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\110', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\103', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\156', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\102', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\152', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\117', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\152', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\146', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\166', '\111', '\040', '\166', '\141', '\040', '\061', - '\012', '\117', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\147', '\130', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\103', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\115', '\162', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\111', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\112', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\120', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\151', '\167', '\127', '\040', '\151', '\156', '\040', '\061', - '\012', '\143', '\115', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\124', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\111', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\154', '\132', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\152', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\151', '\120', '\142', '\040', '\151', '\156', '\040', '\061', - '\012', '\127', '\150', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\166', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\172', '\104', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\150', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\162', '\122', '\040', '\145', '\162', '\040', '\061', - '\012', '\156', '\154', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\131', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\126', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\160', '\117', '\040', '\166', '\141', '\040', '\061', - '\012', '\122', '\166', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\143', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\144', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\114', '\153', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\166', '\111', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\144', '\105', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\102', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\162', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\124', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\131', '\160', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\115', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\143', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\103', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\146', '\126', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\141', '\120', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\160', '\125', '\040', '\160', '\162', '\040', '\061', - '\012', '\126', '\153', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\142', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\170', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\123', '\146', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\131', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\105', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\155', '\130', '\171', '\040', '\155', '\145', '\040', '\061', - '\012', '\154', '\156', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\155', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\153', '\171', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\167', '\130', '\040', '\167', '\141', '\040', '\061', - '\012', '\125', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\146', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\107', '\170', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\160', '\114', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\124', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\132', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\154', '\113', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\102', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\161', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\107', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\104', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\147', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\103', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\116', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\161', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\154', '\104', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\130', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\130', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\150', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\132', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\123', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\110', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\130', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\147', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\144', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\143', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\112', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\155', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\166', '\126', '\040', '\166', '\141', '\040', '\061', - '\012', '\116', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\170', '\123', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\107', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\106', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\172', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\130', '\162', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\143', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\121', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\116', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\170', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\167', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\162', '\113', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\144', '\103', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\101', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\144', '\114', '\164', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\147', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\147', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\160', '\116', '\040', '\166', '\141', '\040', '\061', - '\012', '\111', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\131', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\122', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\120', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\152', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\160', '\110', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\104', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\146', '\112', '\040', '\146', '\157', '\040', '\061', - '\012', '\146', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\102', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\132', '\153', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\110', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\101', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\116', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\152', '\130', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\161', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\152', '\113', '\040', '\154', '\145', '\040', '\061', - '\012', '\107', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\123', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\115', '\170', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\104', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\113', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\155', '\112', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\172', '\124', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\110', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\112', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\161', '\127', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\166', '\153', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\153', '\102', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\105', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\125', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\155', '\171', '\040', '\155', '\145', '\040', '\061', - '\012', '\114', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\107', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\110', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\107', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\106', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\156', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\145', '\106', '\171', '\040', '\145', '\162', '\040', '\061', - '\012', '\116', '\146', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\150', '\123', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\130', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\110', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\165', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\130', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\143', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\112', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\127', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\160', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\161', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\146', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\162', '\111', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\147', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\171', '\120', '\040', '\156', '\171', '\040', '\061', - '\012', '\132', '\155', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\153', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\145', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\162', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\106', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\154', '\132', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\156', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\141', '\120', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\152', '\105', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\132', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\165', '\106', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\156', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\106', '\160', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\146', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\156', '\103', '\040', '\141', '\156', '\040', '\061', - '\012', '\104', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\162', '\115', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\146', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\107', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\106', '\153', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\107', '\153', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\122', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\127', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\131', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\105', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\110', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\123', '\155', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\106', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\104', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\123', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\162', '\114', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\156', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\152', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\124', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\113', '\163', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\143', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\167', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\125', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\166', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\143', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\126', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\165', '\111', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\154', '\116', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\167', '\114', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\127', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\120', '\170', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\122', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\146', '\104', '\040', '\142', '\145', '\040', '\061', - '\012', '\171', '\103', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\156', '\112', '\163', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\103', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\142', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\103', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\155', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\145', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\156', '\123', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\167', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\152', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\111', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\152', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\167', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\112', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\156', '\101', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\102', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\147', '\106', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\104', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\147', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\125', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\104', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\110', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\130', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\171', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\153', '\104', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\114', '\172', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\170', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\115', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\154', '\122', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\167', '\132', '\040', '\160', '\162', '\040', '\061', - '\012', '\160', '\131', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\146', '\114', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\164', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\124', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\144', '\103', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\103', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\146', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\131', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\160', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\111', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\171', '\167', '\105', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\116', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\167', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\132', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\107', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\143', '\126', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\152', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\107', '\172', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\102', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\124', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\163', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\105', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\162', '\113', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\115', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\110', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\143', '\152', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\167', '\117', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\154', '\102', '\040', '\154', '\145', '\040', '\061', - '\012', '\121', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\113', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\146', '\107', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\146', '\102', '\040', '\167', '\141', '\040', '\061', - '\012', '\112', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\167', '\113', '\040', '\167', '\141', '\040', '\061', - '\012', '\150', '\150', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\125', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\106', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\153', '\124', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\114', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\150', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\155', '\126', '\040', '\155', '\145', '\040', '\061', - '\012', '\164', '\155', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\164', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\171', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\171', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\122', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\130', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\132', '\156', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\115', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\167', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\120', '\142', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\143', '\115', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\120', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\167', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\171', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\130', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\142', '\163', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\102', '\161', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\107', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\156', '\116', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\124', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\155', '\120', '\040', '\155', '\145', '\040', '\061', - '\012', '\152', '\160', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\115', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\152', '\115', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\165', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\102', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\165', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\105', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\127', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\172', '\110', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\114', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\143', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\146', '\116', '\040', '\153', '\141', '\040', '\061', - '\012', '\165', '\125', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\103', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\103', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\125', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\102', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\102', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\104', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\155', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\164', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\143', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\120', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\121', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\172', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\143', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\162', '\111', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\166', '\116', '\040', '\166', '\141', '\040', '\061', - '\012', '\103', '\167', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\150', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\172', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\144', '\117', '\040', '\144', '\145', '\040', '\061', - '\012', '\102', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\114', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\170', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\131', '\153', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\123', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\153', '\123', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\113', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\120', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\127', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\165', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\172', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\107', '\172', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\145', '\120', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\167', '\127', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\167', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\170', '\120', '\040', '\142', '\145', '\040', '\061', - '\012', '\144', '\155', '\104', '\040', '\144', '\145', '\040', '\061', - '\012', '\141', '\167', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\126', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\142', '\167', '\131', '\040', '\167', '\141', '\040', '\061', - '\012', '\132', '\170', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\150', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\131', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\103', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\146', '\121', '\040', '\156', '\171', '\040', '\061', - '\012', '\172', '\107', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\166', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\103', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\120', '\146', '\040', '\157', '\156', '\040', '\061', - '\012', '\172', '\130', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\150', '\166', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\172', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\146', '\130', '\040', '\155', '\145', '\040', '\061', - '\012', '\144', '\120', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\114', '\162', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\162', '\107', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\131', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\150', '\116', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\101', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\170', '\121', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\124', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\117', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\167', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\121', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\104', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\127', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\170', '\105', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\130', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\166', '\102', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\130', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\103', '\161', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\172', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\122', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\132', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\156', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\141', '\107', '\040', '\141', '\156', '\040', '\061', - '\012', '\102', '\161', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\115', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\110', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\114', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\115', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\160', '\172', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\120', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\152', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\122', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\132', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\156', '\161', '\107', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\126', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\152', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\110', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\104', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\147', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\112', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\156', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\150', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\153', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\156', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\122', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\172', '\101', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\121', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\124', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\153', '\107', '\040', '\153', '\141', '\040', '\061', - '\012', '\171', '\167', '\132', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\150', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\155', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\146', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\155', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\117', '\147', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\165', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\101', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\104', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\126', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\122', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\155', '\115', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\170', '\102', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\164', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\172', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\106', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\126', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\156', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\107', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\144', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\126', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\115', '\150', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\167', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\153', '\107', '\040', '\166', '\141', '\040', '\061', - '\012', '\130', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\122', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\166', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\167', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\150', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\167', '\117', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\121', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\162', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\115', '\162', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\121', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\102', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\125', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\167', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\147', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\116', '\170', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\150', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\160', '\130', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\166', '\104', '\040', '\166', '\141', '\040', '\061', - '\012', '\103', '\166', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\157', '\110', '\152', '\040', '\157', '\156', '\040', '\061', - '\012', '\121', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\131', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\132', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\171', '\113', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\143', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\105', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\130', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\154', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\107', '\147', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\114', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\170', '\125', '\040', '\156', '\171', '\040', '\061', - '\012', '\147', '\166', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\163', '\116', '\040', '\163', '\172', '\040', '\061', - '\012', '\111', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\142', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\115', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\153', '\130', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\162', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\117', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\107', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\107', '\153', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\103', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\161', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\104', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\156', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\144', '\126', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\147', '\123', '\040', '\156', '\147', '\040', '\061', - '\012', '\124', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\105', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\132', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\131', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\120', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\150', '\147', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\166', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\170', '\113', '\040', '\146', '\157', '\040', '\061', - '\012', '\110', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\122', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\144', '\155', '\120', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\143', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\170', '\122', '\040', '\142', '\145', '\040', '\061', - '\012', '\114', '\163', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\122', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\167', '\121', '\040', '\151', '\156', '\040', '\061', - '\012', '\127', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\146', '\126', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\167', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\160', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\162', '\117', '\040', '\145', '\162', '\040', '\061', - '\012', '\151', '\106', '\143', '\040', '\164', '\151', '\040', '\061', - '\012', '\167', '\172', '\104', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\142', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\146', '\123', '\040', '\146', '\157', '\040', '\061', - '\012', '\120', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\131', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\154', '\104', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\164', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\172', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\152', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\104', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\103', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\103', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\170', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\115', '\166', '\040', '\157', '\156', '\040', '\061', - '\012', '\143', '\147', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\153', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\161', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\156', '\103', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\131', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\160', '\143', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\106', '\147', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\164', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\150', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\125', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\156', '\116', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\124', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\163', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\112', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\121', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\107', '\156', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\155', '\115', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\152', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\170', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\144', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\141', '\101', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\125', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\130', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\102', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\147', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\132', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\112', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\166', '\104', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\155', '\124', '\040', '\155', '\145', '\040', '\061', - '\012', '\157', '\131', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\150', '\167', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\152', '\102', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\131', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\151', '\110', '\170', '\040', '\151', '\156', '\040', '\061', - '\012', '\154', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\103', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\146', '\150', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\104', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\103', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\170', '\102', '\040', '\154', '\145', '\040', '\061', - '\012', '\145', '\130', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\166', '\127', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\124', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\161', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\116', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\153', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\122', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\156', '\111', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\167', '\103', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\121', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\170', '\162', '\103', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\106', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\157', '\145', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\114', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\167', '\124', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\167', '\104', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\160', '\105', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\154', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\122', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\123', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\165', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\162', '\111', '\040', '\145', '\162', '\040', '\061', - '\012', '\131', '\163', '\156', '\040', '\163', '\164', '\040', '\061', - '\012', '\126', '\150', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\147', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\120', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\153', '\102', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\122', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\152', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\143', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\104', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\121', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\145', '\125', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\143', '\115', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\122', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\106', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\142', '\127', '\040', '\142', '\145', '\040', '\061', - '\012', '\165', '\125', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\150', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\164', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\160', '\124', '\040', '\151', '\156', '\040', '\061', - '\012', '\130', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\167', '\116', '\040', '\167', '\141', '\040', '\061', - '\012', '\150', '\130', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\114', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\107', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\144', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\146', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\146', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\117', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\144', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\116', '\152', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\152', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\150', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\167', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\127', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\106', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\114', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\142', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\155', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\156', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\172', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\116', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\160', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\147', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\154', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\151', '\110', '\161', '\040', '\151', '\156', '\040', '\061', - '\012', '\163', '\167', '\116', '\040', '\163', '\172', '\040', '\061', - '\012', '\116', '\152', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\120', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\157', '\122', '\166', '\040', '\157', '\156', '\040', '\061', - '\012', '\160', '\112', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\132', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\126', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\126', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\106', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\172', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\107', '\166', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\120', '\147', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\120', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\103', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\116', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\155', '\104', '\040', '\155', '\145', '\040', '\061', - '\012', '\155', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\154', '\106', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\151', '\130', '\040', '\151', '\156', '\040', '\061', - '\012', '\171', '\122', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\154', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\146', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\127', '\146', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\166', '\162', '\117', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\170', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\167', '\105', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\144', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\160', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\121', '\160', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\132', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\146', '\112', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\121', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\101', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\161', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\116', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\114', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\164', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\152', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\127', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\132', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\131', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\126', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\130', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\167', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\167', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\167', '\114', '\040', '\154', '\145', '\040', '\061', - '\012', '\145', '\107', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\123', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\102', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\163', '\123', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\156', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\116', '\156', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\155', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\123', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\146', '\121', '\040', '\146', '\157', '\040', '\061', - '\012', '\126', '\143', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\155', '\104', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\131', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\101', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\156', '\142', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\112', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\167', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\170', '\112', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\142', '\103', '\040', '\142', '\145', '\040', '\061', - '\012', '\122', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\112', '\171', '\040', '\142', '\145', '\040', '\061', - '\012', '\130', '\171', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\153', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\157', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\143', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\132', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\120', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\107', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\143', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\147', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\103', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\107', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\122', '\172', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\121', '\150', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\114', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\126', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\144', '\112', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\126', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\164', '\114', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\146', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\170', '\126', '\040', '\167', '\141', '\040', '\061', - '\012', '\171', '\122', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\131', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\150', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\114', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\166', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\166', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\143', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\167', '\160', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\124', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\130', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\121', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\112', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\171', '\146', '\107', '\040', '\156', '\171', '\040', '\061', - '\012', '\160', '\150', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\152', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\144', '\147', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\120', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\102', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\156', '\102', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\167', '\112', '\040', '\167', '\141', '\040', '\061', - '\012', '\165', '\164', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\152', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\126', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\124', '\155', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\115', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\113', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\122', '\144', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\115', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\152', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\131', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\131', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\116', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\130', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\172', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\123', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\164', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\131', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\147', '\154', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\125', '\165', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\117', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\102', '\170', '\040', '\151', '\156', '\040', '\061', - '\012', '\122', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\127', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\116', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\121', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\151', '\110', '\144', '\040', '\151', '\156', '\040', '\061', - '\012', '\127', '\160', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\156', '\146', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\153', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\113', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\162', '\103', '\040', '\145', '\162', '\040', '\061', - '\012', '\127', '\150', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\152', '\115', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\170', '\107', '\040', '\156', '\171', '\040', '\061', - '\012', '\146', '\160', '\127', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\143', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\162', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\165', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\172', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\167', '\120', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\114', '\146', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\102', '\153', '\160', '\040', '\153', '\141', '\040', '\061', - '\012', '\130', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\170', '\110', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\111', '\152', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\124', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\105', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\154', '\104', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\106', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\106', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\172', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\112', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\111', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\106', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\172', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\126', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\126', '\161', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\144', '\112', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\111', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\132', '\156', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\153', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\146', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\153', '\111', '\040', '\153', '\165', '\040', '\061', - '\012', '\146', '\111', '\157', '\040', '\162', '\157', '\040', '\061', - '\012', '\154', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\160', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\101', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\162', '\162', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\111', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\104', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\157', '\110', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\167', '\112', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\103', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\166', '\102', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\114', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\106', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\132', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\116', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\156', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\124', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\126', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\106', '\144', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\167', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\120', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\106', '\166', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\132', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\126', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\102', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\130', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\154', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\116', '\154', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\103', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\113', '\167', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\112', '\161', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\107', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\165', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\172', '\114', '\040', '\163', '\172', '\040', '\061', - '\012', '\151', '\106', '\170', '\040', '\151', '\156', '\040', '\061', - '\012', '\146', '\124', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\127', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\110', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\106', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\121', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\104', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\142', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\160', '\147', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\153', '\116', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\102', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\102', '\144', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\155', '\127', '\040', '\151', '\152', '\040', '\061', - '\012', '\112', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\130', '\160', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\144', '\107', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\153', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\123', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\106', '\144', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\147', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\144', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\116', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\146', '\101', '\040', '\144', '\145', '\040', '\061', - '\012', '\110', '\172', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\170', '\110', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\170', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\150', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\121', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\160', '\167', '\124', '\040', '\160', '\162', '\040', '\061', - '\012', '\114', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\170', '\113', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\164', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\150', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\167', '\122', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\111', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\172', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\161', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\154', '\132', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\115', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\167', '\160', '\122', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\110', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\117', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\153', '\125', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\122', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\152', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\165', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\155', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\165', '\112', '\040', '\157', '\165', '\040', '\061', - '\012', '\171', '\127', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\150', '\125', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\172', '\120', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\123', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\147', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\152', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\123', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\146', '\101', '\040', '\146', '\157', '\040', '\061', - '\012', '\146', '\110', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\153', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\144', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\127', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\142', '\101', '\040', '\151', '\152', '\040', '\061', - '\012', '\102', '\155', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\152', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\170', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\126', '\155', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\151', '\111', '\161', '\040', '\151', '\156', '\040', '\061', - '\012', '\127', '\147', '\154', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\122', '\160', '\040', '\155', '\145', '\040', '\061', - '\012', '\167', '\166', '\123', '\040', '\166', '\141', '\040', '\061', - '\012', '\125', '\166', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\160', '\121', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\106', '\167', '\040', '\166', '\157', '\040', '\061', - '\012', '\146', '\161', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\167', '\112', '\040', '\163', '\164', '\040', '\061', - '\012', '\112', '\162', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\170', '\105', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\132', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\126', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\150', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\123', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\121', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\110', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\165', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\160', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\120', '\153', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\121', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\106', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\107', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\163', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\167', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\171', '\121', '\040', '\156', '\171', '\040', '\061', - '\012', '\144', '\161', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\110', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\113', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\114', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\155', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\102', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\152', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\132', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\111', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\171', '\143', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\104', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\112', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\111', '\163', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\121', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\153', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\103', '\160', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\131', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\152', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\144', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\102', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\131', '\167', '\142', '\040', '\157', '\167', '\040', '\061', - '\012', '\126', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\152', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\104', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\107', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\166', '\106', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\102', '\147', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\156', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\160', '\111', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\113', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\130', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\114', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\131', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\170', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\150', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\160', '\112', '\040', '\160', '\162', '\040', '\061', - '\012', '\143', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\126', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\112', '\172', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\156', '\104', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\152', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\132', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\106', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\116', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\106', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\110', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\122', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\170', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\160', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\156', '\115', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\152', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\150', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\160', '\104', '\040', '\160', '\162', '\040', '\061', - '\012', '\104', '\146', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\171', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\126', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\156', '\113', '\143', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\153', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\167', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\104', '\146', '\040', '\157', '\156', '\040', '\061', - '\012', '\155', '\153', '\131', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\144', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\150', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\112', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\170', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\172', '\123', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\125', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\124', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\110', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\113', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\161', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\155', '\110', '\040', '\144', '\145', '\040', '\061', - '\012', '\124', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\106', '\150', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\143', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\163', '\122', '\040', '\163', '\164', '\040', '\061', - '\012', '\151', '\127', '\147', '\040', '\151', '\156', '\040', '\061', - '\012', '\130', '\171', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\130', '\152', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\160', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\172', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\172', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\126', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\111', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\156', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\144', '\110', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\104', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\146', '\125', '\040', '\156', '\171', '\040', '\061', - '\012', '\161', '\157', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\153', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\113', '\143', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\127', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\102', '\146', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\121', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\145', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\160', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\172', '\117', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\152', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\124', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\122', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\144', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\142', '\121', '\040', '\167', '\141', '\040', '\061', - '\012', '\121', '\160', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\111', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\170', '\116', '\040', '\156', '\171', '\040', '\061', - '\012', '\156', '\103', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\112', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\105', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\144', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\113', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\124', '\152', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\132', '\143', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\155', '\122', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\124', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\161', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\166', '\132', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\114', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\111', '\167', '\040', '\157', '\156', '\040', '\061', - '\012', '\170', '\152', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\164', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\143', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\147', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\161', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\165', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\163', '\131', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\103', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\142', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\161', '\110', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\152', '\172', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\147', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\151', '\130', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\156', '\117', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\155', '\116', '\040', '\155', '\145', '\040', '\061', - '\012', '\167', '\147', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\142', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\153', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\172', '\103', '\040', '\160', '\157', '\040', '\061', - '\012', '\154', '\146', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\102', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\155', '\114', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\155', '\131', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\161', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\152', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\162', '\110', '\040', '\145', '\162', '\040', '\061', - '\012', '\111', '\165', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\146', '\104', '\040', '\156', '\171', '\040', '\061', - '\012', '\143', '\154', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\144', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\145', '\124', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\130', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\160', '\126', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\170', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\155', '\112', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\153', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\125', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\103', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\103', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\170', '\121', '\040', '\160', '\162', '\040', '\061', - '\012', '\131', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\167', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\127', '\152', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\124', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\132', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\144', '\110', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\165', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\126', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\152', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\150', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\146', '\104', '\040', '\167', '\141', '\040', '\061', - '\012', '\132', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\120', '\166', '\040', '\151', '\156', '\040', '\061', - '\012', '\155', '\172', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\130', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\105', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\105', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\104', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\132', '\154', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\142', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\103', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\157', '\131', '\040', '\157', '\156', '\040', '\061', - '\012', '\160', '\153', '\124', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\142', '\111', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\144', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\110', '\163', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\160', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\146', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\104', '\150', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\115', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\172', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\155', '\116', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\152', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\153', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\102', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\144', '\105', '\040', '\144', '\145', '\040', '\061', - '\012', '\121', '\170', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\150', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\131', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\150', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\142', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\156', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\170', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\131', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\150', '\112', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\122', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\156', '\123', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\114', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\102', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\123', '\144', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\145', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\112', '\167', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\144', '\120', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\116', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\111', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\142', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\144', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\115', '\146', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\152', '\112', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\170', '\131', '\040', '\155', '\145', '\040', '\061', - '\012', '\154', '\106', '\144', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\167', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\106', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\162', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\122', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\110', '\164', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\131', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\162', '\126', '\143', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\122', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\160', '\101', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\154', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\116', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\113', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\166', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\167', '\120', '\040', '\155', '\145', '\040', '\061', - '\012', '\112', '\171', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\164', '\102', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\123', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\115', '\154', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\152', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\131', '\172', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\120', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\106', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\153', '\110', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\132', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\150', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\167', '\116', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\152', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\121', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\115', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\112', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\124', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\155', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\154', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\131', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\145', '\112', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\124', '\153', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\146', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\170', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\104', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\143', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\121', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\157', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\122', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\142', '\126', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\170', '\102', '\040', '\167', '\141', '\040', '\061', - '\012', '\114', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\153', '\132', '\040', '\153', '\141', '\040', '\061', - '\012', '\151', '\167', '\117', '\040', '\151', '\156', '\040', '\061', - '\012', '\144', '\147', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\166', '\117', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\166', '\113', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\154', '\126', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\130', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\153', '\106', '\040', '\153', '\165', '\040', '\061', - '\012', '\151', '\171', '\124', '\040', '\151', '\156', '\040', '\061', - '\012', '\125', '\146', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\156', '\172', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\142', '\110', '\040', '\142', '\165', '\040', '\061', - '\012', '\154', '\123', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\130', '\160', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\125', '\166', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\171', '\106', '\040', '\156', '\171', '\040', '\061', - '\012', '\146', '\170', '\120', '\040', '\146', '\157', '\040', '\061', - '\012', '\152', '\131', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\152', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\170', '\114', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\167', '\111', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\125', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\163', '\106', '\040', '\163', '\164', '\040', '\061', - '\012', '\143', '\144', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\144', '\110', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\131', '\163', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\106', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\111', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\111', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\124', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\146', '\105', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\122', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\150', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\115', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\160', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\170', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\120', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\142', '\102', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\153', '\121', '\040', '\163', '\164', '\040', '\061', - '\012', '\165', '\113', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\121', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\153', '\127', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\161', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\167', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\112', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\143', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\146', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\130', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\147', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\152', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\114', '\161', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\167', '\103', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\152', '\116', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\153', '\120', '\040', '\153', '\141', '\040', '\061', - '\012', '\122', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\107', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\120', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\142', '\124', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\160', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\115', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\152', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\104', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\167', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\127', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\170', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\107', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\166', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\116', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\103', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\154', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\102', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\106', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\104', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\146', '\103', '\040', '\160', '\162', '\040', '\061', - '\012', '\114', '\160', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\106', '\150', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\170', '\123', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\127', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\147', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\106', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\170', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\141', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\123', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\152', '\172', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\156', '\103', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\162', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\116', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\156', '\166', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\171', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\106', '\150', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\107', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\114', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\114', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\113', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\112', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\152', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\121', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\160', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\151', '\117', '\040', '\151', '\156', '\040', '\061', - '\012', '\166', '\166', '\107', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\117', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\150', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\144', '\116', '\040', '\144', '\145', '\040', '\061', - '\012', '\103', '\172', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\152', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\126', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\166', '\110', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\164', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\111', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\152', '\121', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\106', '\171', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\160', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\170', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\142', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\166', '\112', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\152', '\127', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\160', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\156', '\122', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\121', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\103', '\166', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\153', '\102', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\147', '\102', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\146', '\104', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\110', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\144', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\124', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\124', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\147', '\102', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\170', '\123', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\120', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\160', '\121', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\170', '\127', '\040', '\156', '\171', '\040', '\061', - '\012', '\110', '\152', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\116', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\156', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\110', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\166', '\110', '\040', '\151', '\152', '\040', '\061', - '\012', '\107', '\147', '\156', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\142', '\123', '\040', '\154', '\145', '\040', '\061', - '\012', '\121', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\161', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\171', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\122', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\156', '\146', '\101', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\130', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\155', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\163', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\121', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\163', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\111', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\152', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\114', '\154', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\115', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\160', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\155', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\115', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\107', '\167', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\121', '\152', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\153', '\171', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\104', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\114', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\131', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\124', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\153', '\106', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\106', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\156', '\102', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\171', '\112', '\040', '\156', '\171', '\040', '\061', - '\012', '\156', '\111', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\131', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\127', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\131', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\121', '\144', '\172', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\142', '\116', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\167', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\142', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\167', '\164', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\121', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\112', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\116', '\172', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\144', '\103', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\116', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\110', '\147', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\166', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\170', '\112', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\115', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\107', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\146', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\144', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\147', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\153', '\111', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\166', '\113', '\040', '\166', '\141', '\040', '\061', - '\012', '\103', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\106', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\157', '\110', '\155', '\040', '\157', '\156', '\040', '\061', - '\012', '\141', '\112', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\106', '\172', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\127', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\155', '\105', '\040', '\155', '\145', '\040', '\061', - '\012', '\163', '\115', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\102', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\116', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\144', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\150', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\101', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\162', '\115', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\110', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\114', '\166', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\122', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\152', '\126', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\122', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\153', '\126', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\127', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\131', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\124', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\170', '\126', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\113', '\171', '\040', '\155', '\145', '\040', '\061', - '\012', '\121', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\160', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\121', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\167', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\130', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\124', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\156', '\124', '\040', '\141', '\156', '\040', '\061', - '\012', '\126', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\144', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\146', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\157', '\145', '\117', '\040', '\157', '\156', '\040', '\061', - '\012', '\156', '\103', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\130', '\144', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\110', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\101', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\131', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\104', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\107', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\154', '\110', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\170', '\107', '\040', '\154', '\145', '\040', '\061', - '\012', '\110', '\147', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\122', '\172', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\124', '\163', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\103', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\110', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\114', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\116', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\113', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\107', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\154', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\131', '\171', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\104', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\172', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\105', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\150', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\172', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\142', '\106', '\040', '\142', '\145', '\040', '\061', - '\012', '\130', '\163', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\114', '\156', '\143', '\040', '\141', '\156', '\040', '\061', - '\012', '\107', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\152', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\150', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\147', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\113', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\165', '\121', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\167', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\170', '\107', '\040', '\144', '\145', '\040', '\061', - '\012', '\131', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\113', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\127', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\143', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\146', '\131', '\040', '\167', '\141', '\040', '\061', - '\012', '\162', '\102', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\112', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\131', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\123', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\121', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\160', '\106', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\146', '\113', '\040', '\156', '\171', '\040', '\061', - '\012', '\152', '\121', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\147', '\124', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\167', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\120', '\156', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\132', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\120', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\165', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\151', '\130', '\162', '\040', '\151', '\156', '\040', '\061', - '\012', '\160', '\143', '\105', '\040', '\143', '\150', '\040', '\061', - '\012', '\116', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\152', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\172', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\155', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\166', '\127', '\040', '\166', '\141', '\040', '\061', - '\012', '\145', '\112', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\111', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\130', '\171', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\146', '\127', '\040', '\167', '\141', '\040', '\061', - '\012', '\126', '\144', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\112', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\152', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\114', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\144', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\121', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\172', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\167', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\167', '\125', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\120', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\106', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\110', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\127', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\147', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\114', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\153', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\102', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\150', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\147', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\160', '\101', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\170', '\103', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\146', '\123', '\040', '\156', '\147', '\040', '\061', - '\012', '\115', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\120', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\115', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\162', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\144', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\167', '\122', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\115', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\120', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\126', '\142', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\172', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\116', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\124', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\132', '\146', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\106', '\172', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\143', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\145', '\113', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\160', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\153', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\126', '\170', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\107', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\102', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\143', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\132', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\153', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\146', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\144', '\122', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\132', '\154', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\113', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\116', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\144', '\131', '\171', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\132', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\164', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\120', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\153', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\154', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\116', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\162', '\127', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\127', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\130', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\121', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\146', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\105', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\107', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\132', '\152', '\172', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\144', '\115', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\154', '\106', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\170', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\132', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\143', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\154', '\105', '\040', '\154', '\145', '\040', '\061', - '\012', '\156', '\131', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\146', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\112', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\166', '\106', '\040', '\166', '\141', '\040', '\061', - '\012', '\110', '\156', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\153', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\115', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\170', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\166', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\115', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\122', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\114', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\106', '\160', '\040', '\155', '\145', '\040', '\061', - '\012', '\147', '\116', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\106', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\113', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\112', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\172', '\111', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\147', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\113', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\170', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\166', '\107', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\147', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\114', '\167', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\154', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\120', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\127', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\172', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\110', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\106', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\166', '\126', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\130', '\145', '\040', '\154', '\145', '\040', '\061', - '\012', '\132', '\146', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\111', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\142', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\132', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\163', '\113', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\160', '\114', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\113', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\142', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\151', '\121', '\142', '\040', '\151', '\156', '\040', '\061', - '\012', '\106', '\170', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\160', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\127', '\166', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\172', '\104', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\153', '\124', '\040', '\153', '\141', '\040', '\061', - '\012', '\131', '\153', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\152', '\107', '\040', '\141', '\156', '\040', '\061', - '\012', '\125', '\166', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\146', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\143', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\144', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\115', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\142', '\171', '\040', '\142', '\145', '\040', '\061', - '\012', '\154', '\167', '\112', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\127', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\123', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\156', '\162', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\166', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\126', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\164', '\161', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\126', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\121', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\142', '\107', '\040', '\142', '\145', '\040', '\061', - '\012', '\162', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\110', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\150', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\172', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\106', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\160', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\101', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\170', '\120', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\103', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\141', '\115', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\154', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\124', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\102', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\122', '\142', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\126', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\107', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\144', '\116', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\146', '\116', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\120', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\143', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\170', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\110', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\126', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\126', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\147', '\123', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\115', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\121', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\102', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\167', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\110', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\172', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\143', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\106', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\143', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\146', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\161', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\146', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\152', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\150', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\127', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\161', '\105', '\040', '\151', '\156', '\040', '\061', - '\012', '\147', '\160', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\151', '\127', '\142', '\040', '\151', '\156', '\040', '\061', - '\012', '\164', '\154', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\131', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\103', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\157', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\123', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\166', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\146', '\131', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\142', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\166', '\101', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\110', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\166', '\113', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\160', '\132', '\040', '\160', '\162', '\040', '\061', - '\012', '\144', '\146', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\162', '\113', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\145', '\105', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\153', '\131', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\142', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\143', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\113', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\154', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\132', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\127', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\144', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\166', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\115', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\167', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\144', '\120', '\040', '\144', '\145', '\040', '\061', - '\012', '\165', '\115', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\143', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\162', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\164', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\121', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\160', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\160', '\111', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\153', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\150', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\123', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\106', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\165', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\107', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\131', '\172', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\142', '\103', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\123', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\163', '\132', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\122', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\106', '\154', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\143', '\110', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\155', '\107', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\103', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\141', '\104', '\040', '\141', '\156', '\040', '\061', - '\012', '\151', '\167', '\110', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\104', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\107', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\130', '\150', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\153', '\112', '\040', '\167', '\141', '\040', '\061', - '\012', '\114', '\143', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\147', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\104', '\150', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\146', '\117', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\155', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\117', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\156', '\116', '\040', '\141', '\156', '\040', '\061', - '\012', '\115', '\172', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\131', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\114', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\170', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\167', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\170', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\113', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\152', '\130', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\142', '\123', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\162', '\120', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\112', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\147', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\167', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\130', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\156', '\104', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\107', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\160', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\114', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\126', '\146', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\103', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\147', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\141', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\162', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\112', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\112', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\115', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\143', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\153', '\113', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\116', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\162', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\130', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\162', '\132', '\154', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\170', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\156', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\147', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\104', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\114', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\156', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\152', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\166', '\117', '\040', '\166', '\141', '\040', '\061', - '\012', '\157', '\126', '\155', '\040', '\157', '\156', '\040', '\061', - '\012', '\166', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\142', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\123', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\112', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\112', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\157', '\121', '\166', '\040', '\157', '\156', '\040', '\061', - '\012', '\126', '\167', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\156', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\116', '\155', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\124', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\105', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\165', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\162', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\156', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\112', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\153', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\150', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\114', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\153', '\125', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\106', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\142', '\121', '\040', '\142', '\145', '\040', '\061', - '\012', '\166', '\143', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\153', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\160', '\105', '\040', '\153', '\141', '\040', '\061', - '\012', '\107', '\170', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\132', '\164', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\111', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\153', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\144', '\126', '\040', '\144', '\145', '\040', '\061', - '\012', '\162', '\167', '\120', '\040', '\145', '\162', '\040', '\061', - '\012', '\141', '\103', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\162', '\163', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\155', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\146', '\117', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\102', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\142', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\144', '\104', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\102', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\162', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\121', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\154', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\164', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\122', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\126', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\103', '\162', '\161', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\106', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\152', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\103', '\155', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\127', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\172', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\115', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\164', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\107', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\107', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\114', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\127', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\126', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\127', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\130', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\127', '\153', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\172', '\110', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\165', '\120', '\040', '\165', '\156', '\040', '\061', - '\012', '\144', '\110', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\104', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\147', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\147', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\164', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\115', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\110', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\132', '\146', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\132', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\153', '\110', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\116', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\115', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\160', '\106', '\040', '\160', '\162', '\040', '\061', - '\012', '\144', '\152', '\104', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\170', '\126', '\040', '\142', '\145', '\040', '\061', - '\012', '\150', '\147', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\153', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\115', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\144', '\107', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\142', '\110', '\040', '\153', '\141', '\040', '\061', - '\012', '\114', '\150', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\162', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\165', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\167', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\143', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\150', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\147', '\120', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\153', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\161', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\170', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\126', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\113', '\144', '\040', '\154', '\145', '\040', '\061', - '\012', '\116', '\154', '\171', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\113', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\102', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\121', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\153', '\131', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\121', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\166', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\107', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\164', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\166', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\152', '\172', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\167', '\165', '\040', '\153', '\165', '\040', '\061', - '\012', '\121', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\144', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\150', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\155', '\123', '\040', '\156', '\147', '\040', '\061', - '\012', '\111', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\132', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\114', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\115', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\120', '\155', '\040', '\165', '\155', '\040', '\061', - '\012', '\160', '\115', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\172', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\122', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\172', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\142', '\117', '\040', '\142', '\145', '\040', '\061', - '\012', '\130', '\170', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\156', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\102', '\166', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\152', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\143', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\155', '\102', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\106', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\170', '\102', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\102', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\126', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\157', '\121', '\040', '\157', '\156', '\040', '\061', - '\012', '\170', '\157', '\110', '\040', '\157', '\156', '\040', '\061', - '\012', '\144', '\127', '\147', '\040', '\144', '\145', '\040', '\061', - '\012', '\124', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\131', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\104', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\167', '\107', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\104', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\144', '\171', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\156', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\172', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\113', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\166', '\103', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\165', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\156', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\103', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\125', '\144', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\124', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\142', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\142', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\104', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\150', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\142', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\146', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\142', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\102', '\144', '\171', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\152', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\142', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\165', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\103', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\127', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\122', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\127', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\132', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\112', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\132', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\147', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\142', '\110', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\112', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\150', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\126', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\103', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\131', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\167', '\110', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\167', '\116', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\146', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\154', '\117', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\164', '\130', '\040', '\164', '\151', '\040', '\061', - '\012', '\144', '\113', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\121', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\104', '\154', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\126', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\160', '\116', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\153', '\107', '\040', '\153', '\141', '\040', '\061', - '\012', '\145', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\144', '\104', '\040', '\144', '\151', '\040', '\061', - '\012', '\146', '\121', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\131', '\150', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\102', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\105', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\150', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\147', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\146', '\112', '\040', '\144', '\145', '\040', '\061', - '\012', '\132', '\144', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\162', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\132', '\150', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\164', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\170', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\126', '\156', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\110', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\131', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\102', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\162', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\131', '\143', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\122', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\151', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\126', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\132', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\103', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\146', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\102', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\117', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\107', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\106', '\166', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\132', '\147', '\163', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\146', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\131', '\162', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\151', '\106', '\160', '\040', '\151', '\156', '\040', '\061', - '\012', '\142', '\126', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\172', '\146', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\144', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\107', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\164', '\156', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\144', '\122', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\102', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\172', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\120', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\101', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\156', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\147', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\101', '\167', '\040', '\153', '\157', '\040', '\061', - '\012', '\170', '\102', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\144', '\116', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\120', '\161', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\160', '\144', '\040', '\144', '\151', '\040', '\061', - '\012', '\157', '\125', '\171', '\040', '\153', '\157', '\040', '\061', - '\012', '\146', '\160', '\104', '\040', '\160', '\162', '\040', '\061', - '\012', '\122', '\146', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\154', '\130', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\127', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\127', '\166', '\040', '\166', '\151', '\040', '\061', - '\012', '\106', '\167', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\114', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\166', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\147', '\102', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\112', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\127', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\130', '\166', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\144', '\120', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\126', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\120', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\144', '\167', '\101', '\040', '\144', '\145', '\040', '\061', - '\012', '\117', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\151', '\132', '\040', '\151', '\156', '\040', '\061', - '\012', '\170', '\144', '\126', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\106', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\172', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\167', '\114', '\040', '\167', '\141', '\040', '\061', - '\012', '\163', '\127', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\124', '\160', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\142', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\165', '\120', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\113', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\151', '\165', '\117', '\040', '\151', '\156', '\040', '\061', - '\012', '\121', '\144', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\165', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\114', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\116', '\146', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\131', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\163', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\111', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\163', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\147', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\123', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\170', '\121', '\040', '\146', '\157', '\040', '\061', - '\012', '\150', '\143', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\142', '\112', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\122', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\143', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\132', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\130', '\172', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\147', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\154', '\117', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\103', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\155', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\132', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\142', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\104', '\147', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\153', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\125', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\147', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\142', '\104', '\040', '\163', '\172', '\040', '\061', - '\012', '\123', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\172', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\111', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\126', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\152', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\110', '\155', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\141', '\121', '\144', '\040', '\141', '\156', '\040', '\061', - '\012', '\151', '\110', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\115', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\167', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\165', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\103', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\156', '\120', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\114', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\122', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\166', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\122', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\146', '\112', '\040', '\156', '\171', '\040', '\061', - '\012', '\170', '\103', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\163', '\121', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\165', '\103', '\040', '\165', '\156', '\040', '\061', - '\012', '\103', '\164', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\120', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\152', '\111', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\155', '\103', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\144', '\112', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\130', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\163', '\117', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\122', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\142', '\106', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\116', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\167', '\115', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\170', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\150', '\151', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\156', '\114', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\113', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\151', '\161', '\116', '\040', '\151', '\156', '\040', '\061', - '\012', '\144', '\153', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\121', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\116', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\124', '\154', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\116', '\154', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\103', '\170', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\166', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\167', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\107', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\156', '\162', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\153', '\123', '\040', '\153', '\141', '\040', '\061', - '\012', '\144', '\122', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\112', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\151', '\161', '\106', '\040', '\151', '\156', '\040', '\061', - '\012', '\146', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\170', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\163', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\146', '\121', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\147', '\120', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\154', '\110', '\040', '\154', '\145', '\040', '\061', - '\012', '\156', '\162', '\111', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\130', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\126', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\115', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\110', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\144', '\115', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\153', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\113', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\172', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\111', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\116', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\152', '\131', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\167', '\114', '\040', '\167', '\141', '\040', '\061', - '\012', '\144', '\132', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\147', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\130', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\166', '\132', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\164', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\117', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\147', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\127', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\162', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\156', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\125', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\160', '\155', '\103', '\040', '\155', '\145', '\040', '\061', - '\012', '\165', '\172', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\166', '\111', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\156', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\170', '\132', '\040', '\154', '\145', '\040', '\061', - '\012', '\130', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\104', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\113', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\123', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\110', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\116', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\171', '\157', '\121', '\040', '\157', '\156', '\040', '\061', - '\012', '\143', '\123', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\105', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\111', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\146', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\172', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\102', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\102', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\154', '\112', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\152', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\162', '\127', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\141', '\104', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\104', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\114', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\121', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\112', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\122', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\146', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\142', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\132', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\163', '\162', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\112', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\106', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\116', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\122', '\153', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\172', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\142', '\101', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\102', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\113', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\143', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\130', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\126', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\131', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\126', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\172', '\103', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\113', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\120', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\143', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\152', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\170', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\142', '\124', '\040', '\142', '\145', '\040', '\061', - '\012', '\156', '\166', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\155', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\170', '\114', '\040', '\142', '\145', '\040', '\061', - '\012', '\130', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\123', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\116', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\124', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\114', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\162', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\166', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\106', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\147', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\104', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\117', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\164', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\167', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\146', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\120', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\160', '\117', '\040', '\160', '\162', '\040', '\061', - '\012', '\103', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\170', '\117', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\126', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\106', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\156', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\170', '\110', '\040', '\153', '\141', '\040', '\061', - '\012', '\131', '\167', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\144', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\127', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\124', '\154', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\155', '\127', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\150', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\172', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\166', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\143', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\153', '\123', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\130', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\103', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\116', '\164', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\110', '\150', '\040', '\154', '\157', '\040', '\061', - '\012', '\131', '\166', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\126', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\153', '\105', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\146', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\127', '\155', '\040', '\157', '\156', '\040', '\061', - '\012', '\164', '\115', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\131', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\106', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\121', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\113', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\165', '\114', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\111', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\162', '\110', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\147', '\114', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\142', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\152', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\106', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\144', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\124', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\167', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\170', '\125', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\152', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\120', '\147', '\040', '\151', '\156', '\040', '\061', - '\012', '\130', '\156', '\163', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\153', '\127', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\146', '\120', '\040', '\160', '\162', '\040', '\061', - '\012', '\104', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\127', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\172', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\167', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\142', '\102', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\167', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\164', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\154', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\132', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\131', '\155', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\167', '\146', '\130', '\040', '\167', '\141', '\040', '\061', - '\012', '\126', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\125', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\172', '\124', '\040', '\152', '\157', '\040', '\061', - '\012', '\153', '\116', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\155', '\121', '\040', '\155', '\145', '\040', '\061', - '\012', '\144', '\130', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\127', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\166', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\150', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\112', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\142', '\124', '\040', '\160', '\162', '\040', '\061', - '\012', '\141', '\102', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\150', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\101', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\144', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\102', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\143', '\130', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\155', '\115', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\122', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\110', '\153', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\171', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\153', '\107', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\113', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\104', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\112', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\116', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\155', '\114', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\106', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\142', '\104', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\124', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\167', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\122', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\101', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\121', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\157', '\132', '\040', '\157', '\156', '\040', '\061', - '\012', '\152', '\120', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\171', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\130', '\152', '\040', '\153', '\141', '\040', '\061', - '\012', '\171', '\102', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\156', '\167', '\120', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\156', '\101', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\113', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\142', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\107', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\152', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\113', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\123', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\127', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\104', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\110', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\131', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\162', '\127', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\104', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\144', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\121', '\155', '\040', '\157', '\156', '\040', '\061', - '\012', '\121', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\142', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\165', '\122', '\040', '\165', '\156', '\040', '\061', - '\012', '\143', '\115', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\130', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\162', '\110', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\150', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\116', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\110', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\124', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\152', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\112', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\103', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\120', '\146', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\117', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\155', '\122', '\040', '\155', '\145', '\040', '\061', - '\012', '\121', '\160', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\143', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\131', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\146', '\101', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\162', '\123', '\040', '\145', '\162', '\040', '\061', - '\012', '\107', '\160', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\155', '\104', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\167', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\150', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\130', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\131', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\126', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\103', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\115', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\160', '\113', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\126', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\143', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\117', '\153', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\114', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\131', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\160', '\106', '\040', '\157', '\156', '\040', '\061', - '\012', '\162', '\127', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\143', '\120', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\132', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\131', '\142', '\040', '\146', '\157', '\040', '\061', - '\012', '\172', '\142', '\103', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\102', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\111', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\164', '\167', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\103', '\172', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\164', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\122', '\154', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\171', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\105', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\155', '\110', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\164', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\111', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\111', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\142', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\161', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\146', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\146', '\127', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\127', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\127', '\160', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\115', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\123', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\131', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\143', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\154', '\124', '\040', '\154', '\145', '\040', '\061', - '\012', '\107', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\155', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\146', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\102', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\103', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\161', '\172', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\144', '\106', '\040', '\144', '\145', '\040', '\061', - '\012', '\126', '\144', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\112', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\146', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\154', '\126', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\117', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\146', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\124', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\107', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\101', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\146', '\106', '\040', '\166', '\141', '\040', '\061', - '\012', '\104', '\172', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\106', '\160', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\124', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\116', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\143', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\113', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\162', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\155', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\146', '\122', '\040', '\146', '\157', '\040', '\061', - '\012', '\167', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\125', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\157', '\151', '\125', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\163', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\107', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\164', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\120', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\153', '\130', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\163', '\142', '\040', '\163', '\164', '\040', '\061', - '\012', '\143', '\170', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\132', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\151', '\125', '\040', '\151', '\156', '\040', '\061', - '\012', '\170', '\166', '\127', '\040', '\166', '\141', '\040', '\061', - '\012', '\141', '\104', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\121', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\170', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\124', '\167', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\146', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\141', '\107', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\120', '\147', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\110', '\172', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\147', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\144', '\106', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\142', '\131', '\040', '\153', '\141', '\040', '\061', - '\012', '\121', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\110', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\126', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\170', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\157', '\126', '\144', '\040', '\157', '\156', '\040', '\061', - '\012', '\110', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\113', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\101', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\144', '\116', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\161', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\111', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\155', '\127', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\143', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\121', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\155', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\154', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\107', '\153', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\150', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\156', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\126', '\146', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\156', '\102', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\166', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\161', '\116', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\114', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\112', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\164', '\121', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\167', '\127', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\172', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\171', '\122', '\040', '\156', '\171', '\040', '\061', - '\012', '\161', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\171', '\111', '\040', '\156', '\171', '\040', '\061', - '\012', '\152', '\172', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\147', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\147', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\114', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\156', '\162', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\110', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\141', '\121', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\152', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\160', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\172', '\116', '\040', '\163', '\172', '\040', '\061', - '\012', '\151', '\111', '\167', '\040', '\151', '\156', '\040', '\061', - '\012', '\144', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\121', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\130', '\171', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\163', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\106', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\160', '\106', '\040', '\160', '\162', '\040', '\061', - '\012', '\126', '\163', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\121', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\172', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\172', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\157', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\153', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\153', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\114', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\155', '\125', '\040', '\155', '\145', '\040', '\061', - '\012', '\143', '\162', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\154', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\124', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\142', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\160', '\111', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\103', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\106', '\155', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\121', '\150', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\121', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\122', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\131', '\143', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\152', '\120', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\165', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\111', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\153', '\127', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\167', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\126', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\152', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\172', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\103', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\105', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\162', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\113', '\161', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\154', '\131', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\107', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\103', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\104', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\117', '\152', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\104', '\156', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\162', '\106', '\040', '\145', '\162', '\040', '\061', - '\012', '\112', '\155', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\146', '\111', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\166', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\120', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\141', '\126', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\102', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\126', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\143', '\110', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\142', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\122', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\121', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\170', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\104', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\127', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\147', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\167', '\120', '\040', '\167', '\141', '\040', '\061', - '\012', '\156', '\162', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\151', '\126', '\161', '\040', '\144', '\151', '\040', '\061', - '\012', '\170', '\172', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\126', '\170', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\114', '\172', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\147', '\103', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\117', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\166', '\120', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\116', '\162', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\130', '\161', '\040', '\151', '\156', '\040', '\061', - '\012', '\121', '\156', '\154', '\040', '\151', '\156', '\040', '\061', - '\012', '\164', '\120', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\111', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\120', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\166', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\161', '\117', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\161', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\152', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\167', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\105', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\127', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\167', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\155', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\122', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\132', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\115', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\170', '\117', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\142', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\171', '\146', '\116', '\040', '\156', '\171', '\040', '\061', - '\012', '\171', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\160', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\127', '\152', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\155', '\127', '\040', '\155', '\145', '\040', '\061', - '\012', '\162', '\113', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\154', '\110', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\143', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\142', '\126', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\116', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\110', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\154', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\102', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\157', '\141', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\146', '\115', '\040', '\146', '\157', '\040', '\061', - '\012', '\162', '\132', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\147', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\110', '\166', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\104', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\114', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\121', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\166', '\121', '\040', '\151', '\156', '\040', '\061', - '\012', '\125', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\160', '\126', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\120', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\144', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\107', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\114', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\150', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\106', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\111', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\150', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\165', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\155', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\132', '\160', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\156', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\102', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\147', '\111', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\102', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\125', '\167', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\162', '\115', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\102', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\122', '\154', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\120', '\172', '\150', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\132', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\126', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\170', '\112', '\040', '\144', '\145', '\040', '\061', - '\012', '\114', '\143', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\111', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\164', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\142', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\110', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\162', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\164', '\102', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\113', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\116', '\153', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\171', '\130', '\040', '\142', '\145', '\040', '\061', - '\012', '\157', '\102', '\160', '\040', '\157', '\156', '\040', '\061', - '\012', '\127', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\146', '\120', '\040', '\163', '\172', '\040', '\061', - '\012', '\141', '\121', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\146', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\130', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\112', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\141', '\123', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\151', '\122', '\146', '\040', '\151', '\156', '\040', '\061', - '\012', '\171', '\115', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\102', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\170', '\122', '\040', '\166', '\141', '\040', '\061', - '\012', '\114', '\154', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\107', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\112', '\163', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\114', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\145', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\142', '\115', '\040', '\167', '\141', '\040', '\061', - '\012', '\165', '\117', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\127', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\166', '\125', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\156', '\117', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\172', '\111', '\040', '\163', '\172', '\040', '\061', - '\012', '\126', '\143', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\150', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\147', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\147', '\120', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\142', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\132', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\130', '\164', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\144', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\172', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\131', '\171', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\125', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\102', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\152', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\130', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\130', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\124', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\162', '\105', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\116', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\132', '\150', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\126', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\107', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\112', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\124', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\150', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\121', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\124', '\155', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\170', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\172', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\115', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\103', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\167', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\126', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\122', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\117', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\131', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\121', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\116', '\154', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\104', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\110', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\152', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\147', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\121', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\171', '\116', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\154', '\167', '\132', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\107', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\126', '\155', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\160', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\106', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\110', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\142', '\123', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\105', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\167', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\145', '\127', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\146', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\160', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\166', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\130', '\162', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\112', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\105', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\116', '\170', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\115', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\171', '\121', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\160', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\157', '\101', '\040', '\157', '\156', '\040', '\061', - '\012', '\147', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\154', '\120', '\040', '\154', '\145', '\040', '\061', - '\012', '\114', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\170', '\102', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\143', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\164', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\114', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\125', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\156', '\106', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\112', '\163', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\102', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\166', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\106', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\161', '\156', '\101', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\142', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\120', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\163', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\146', '\120', '\040', '\156', '\171', '\040', '\061', - '\012', '\147', '\131', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\146', '\103', '\040', '\142', '\145', '\040', '\061', - '\012', '\144', '\115', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\154', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\122', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\152', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\121', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\124', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\125', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\154', '\122', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\167', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\115', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\172', '\153', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\154', '\122', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\141', '\141', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\113', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\172', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\126', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\141', '\126', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\156', '\122', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\170', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\172', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\172', '\170', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\147', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\166', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\167', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\105', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\172', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\146', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\120', '\160', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\101', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\112', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\172', '\106', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\146', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\172', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\147', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\163', '\123', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\121', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\153', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\117', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\131', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\116', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\115', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\152', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\143', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\105', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\145', '\121', '\171', '\040', '\145', '\162', '\040', '\061', - '\012', '\123', '\170', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\125', '\170', '\040', '\155', '\142', '\040', '\061', - '\012', '\172', '\144', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\160', '\116', '\040', '\154', '\145', '\040', '\061', - '\012', '\122', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\166', '\111', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\147', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\146', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\154', '\124', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\142', '\126', '\040', '\142', '\145', '\040', '\061', - '\012', '\160', '\155', '\132', '\040', '\155', '\145', '\040', '\061', - '\012', '\165', '\161', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\131', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\155', '\131', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\154', '\102', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\116', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\166', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\166', '\114', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\114', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\143', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\152', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\156', '\161', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\170', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\141', '\161', '\111', '\040', '\141', '\156', '\040', '\061', - '\012', '\113', '\161', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\130', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\166', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\161', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\110', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\110', '\143', '\040', '\141', '\156', '\040', '\061', - '\012', '\125', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\146', '\116', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\130', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\104', '\163', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\122', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\142', '\132', '\040', '\167', '\141', '\040', '\061', - '\012', '\110', '\156', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\125', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\143', '\131', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\124', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\102', '\147', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\103', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\130', '\155', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\152', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\144', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\150', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\153', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\114', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\111', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\167', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\142', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\131', '\150', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\166', '\110', '\040', '\143', '\150', '\040', '\061', - '\012', '\114', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\146', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\116', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\115', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\156', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\142', '\107', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\106', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\154', '\112', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\120', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\170', '\160', '\111', '\040', '\160', '\162', '\040', '\061', - '\012', '\155', '\162', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\106', '\167', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\117', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\120', '\155', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\142', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\160', '\147', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\101', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\155', '\143', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\153', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\153', '\127', '\040', '\153', '\141', '\040', '\061', - '\012', '\120', '\156', '\167', '\040', '\151', '\156', '\040', '\061', - '\012', '\142', '\116', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\130', '\162', '\040', '\141', '\156', '\040', '\061', - '\012', '\126', '\155', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\125', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\121', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\170', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\113', '\163', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\160', '\127', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\145', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\166', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\122', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\112', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\163', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\167', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\120', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\157', '\101', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\143', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\167', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\152', '\114', '\040', '\163', '\172', '\040', '\061', - '\012', '\157', '\132', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\153', '\152', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\104', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\123', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\145', '\121', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\102', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\114', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\132', '\162', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\107', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\153', '\130', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\124', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\132', '\147', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\150', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\120', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\156', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\110', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\130', '\147', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\103', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\154', '\142', '\116', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\116', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\116', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\112', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\157', '\112', '\144', '\040', '\157', '\156', '\040', '\061', - '\012', '\122', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\166', '\114', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\166', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\167', '\103', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\106', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\110', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\143', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\124', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\121', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\104', '\154', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\114', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\142', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\150', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\117', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\155', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\155', '\121', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\121', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\166', '\121', '\040', '\157', '\156', '\040', '\061', - '\012', '\147', '\146', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\120', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\167', '\126', '\040', '\155', '\145', '\040', '\061', - '\012', '\142', '\130', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\154', '\101', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\152', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\170', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\167', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\166', '\125', '\040', '\153', '\141', '\040', '\061', - '\012', '\102', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\146', '\101', '\040', '\156', '\147', '\040', '\061', - '\012', '\101', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\155', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\150', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\115', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\110', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\120', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\154', '\113', '\040', '\154', '\145', '\040', '\061', - '\012', '\131', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\112', '\163', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\127', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\126', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\163', '\162', '\116', '\040', '\145', '\162', '\040', '\061', - '\012', '\125', '\150', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\146', '\122', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\106', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\112', '\154', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\122', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\127', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\166', '\117', '\040', '\163', '\172', '\040', '\061', - '\012', '\130', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\111', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\112', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\106', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\116', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\170', '\114', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\114', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\144', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\122', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\114', '\152', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\122', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\170', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\152', '\110', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\112', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\106', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\105', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\122', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\146', '\124', '\040', '\166', '\141', '\040', '\061', - '\012', '\132', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\123', '\142', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\151', '\167', '\126', '\040', '\151', '\156', '\040', '\061', - '\012', '\152', '\146', '\111', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\127', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\114', '\152', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\152', '\107', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\106', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\126', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\152', '\147', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\132', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\165', '\107', '\040', '\165', '\156', '\040', '\061', - '\012', '\154', '\103', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\170', '\127', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\107', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\166', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\152', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\164', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\131', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\131', '\162', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\126', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\160', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\113', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\160', '\115', '\040', '\160', '\162', '\040', '\061', - '\012', '\143', '\114', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\123', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\127', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\127', '\172', '\040', '\163', '\164', '\040', '\061', - '\012', '\163', '\162', '\123', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\126', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\116', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\150', '\120', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\144', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\112', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\125', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\144', '\112', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\150', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\164', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\107', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\104', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\146', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\116', '\155', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\110', '\163', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\146', '\107', '\040', '\160', '\162', '\040', '\061', - '\012', '\144', '\115', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\152', '\123', '\040', '\145', '\162', '\040', '\061', - '\012', '\121', '\154', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\116', '\146', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\143', '\161', '\115', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\127', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\165', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\146', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\147', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\160', '\132', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\164', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\171', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\162', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\164', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\110', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\102', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\171', '\116', '\040', '\156', '\171', '\040', '\061', - '\012', '\121', '\162', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\113', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\146', '\125', '\040', '\142', '\145', '\040', '\061', - '\012', '\121', '\146', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\161', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\117', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\150', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\152', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\146', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\130', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\142', '\126', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\152', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\131', '\142', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\112', '\155', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\127', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\150', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\167', '\124', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\132', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\106', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\160', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\114', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\146', '\114', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\121', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\167', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\160', '\115', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\153', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\147', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\152', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\147', '\114', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\114', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\170', '\116', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\127', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\112', '\152', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\150', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\166', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\162', '\105', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\132', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\114', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\145', '\116', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\152', '\102', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\143', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\132', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\120', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\115', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\150', '\146', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\172', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\125', '\165', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\107', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\103', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\160', '\103', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\127', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\152', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\172', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\165', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\150', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\163', '\111', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\144', '\125', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\162', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\153', '\127', '\040', '\153', '\141', '\040', '\061', - '\012', '\144', '\110', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\143', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\127', '\165', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\111', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\130', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\161', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\155', '\116', '\040', '\155', '\145', '\040', '\061', - '\012', '\163', '\112', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\171', '\115', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\123', '\146', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\172', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\166', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\155', '\130', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\156', '\107', '\040', '\141', '\156', '\040', '\061', - '\012', '\112', '\160', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\146', '\162', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\114', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\165', '\171', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\144', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\124', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\145', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\105', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\103', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\155', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\152', '\110', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\115', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\131', '\167', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\172', '\147', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\120', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\161', '\115', '\040', '\157', '\156', '\040', '\061', - '\012', '\167', '\144', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\102', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\150', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\105', '\160', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\166', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\122', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\142', '\120', '\040', '\142', '\145', '\040', '\061', - '\012', '\156', '\115', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\165', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\152', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\170', '\130', '\040', '\146', '\157', '\040', '\061', - '\012', '\150', '\166', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\120', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\155', '\171', '\040', '\155', '\145', '\040', '\061', - '\012', '\121', '\172', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\116', '\163', '\172', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\127', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\146', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\121', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\167', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\162', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\147', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\150', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\150', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\167', '\146', '\040', '\157', '\167', '\040', '\061', - '\012', '\154', '\152', '\103', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\166', '\102', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\143', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\110', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\142', '\102', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\122', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\154', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\132', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\123', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\126', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\165', '\127', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\170', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\146', '\152', '\115', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\150', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\152', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\132', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\103', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\167', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\105', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\144', '\125', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\122', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\107', '\143', '\165', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\104', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\152', '\110', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\154', '\125', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\171', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\142', '\103', '\040', '\154', '\145', '\040', '\061', - '\012', '\120', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\117', '\141', '\145', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\142', '\103', '\040', '\160', '\162', '\040', '\061', - '\012', '\144', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\172', '\125', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\112', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\131', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\102', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\122', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\152', '\107', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\131', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\142', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\156', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\120', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\167', '\166', '\116', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\107', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\116', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\122', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\125', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\170', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\172', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\152', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\161', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\115', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\155', '\114', '\040', '\155', '\145', '\040', '\061', - '\012', '\105', '\171', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\150', '\110', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\107', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\146', '\104', '\040', '\155', '\145', '\040', '\061', - '\012', '\112', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\127', '\152', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\132', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\111', '\171', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\122', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\144', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\141', '\157', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\130', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\123', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\101', '\157', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\114', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\103', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\153', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\170', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\144', '\116', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\131', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\144', '\153', '\116', '\040', '\144', '\145', '\040', '\061', - '\012', '\122', '\147', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\147', '\114', '\040', '\163', '\172', '\040', '\061', - '\012', '\122', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\127', '\172', '\040', '\151', '\156', '\040', '\061', - '\012', '\144', '\114', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\160', '\130', '\040', '\155', '\145', '\040', '\061', - '\012', '\107', '\142', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\156', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\144', '\115', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\115', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\167', '\110', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\152', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\154', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\147', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\143', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\142', '\171', '\040', '\142', '\145', '\040', '\061', - '\012', '\155', '\126', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\147', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\161', '\120', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\150', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\106', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\102', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\127', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\116', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\115', '\156', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\155', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\172', '\123', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\146', '\103', '\040', '\156', '\171', '\040', '\061', - '\012', '\105', '\160', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\152', '\107', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\125', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\147', '\157', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\153', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\166', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\102', '\152', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\146', '\132', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\170', '\124', '\040', '\167', '\141', '\040', '\061', - '\012', '\126', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\144', '\122', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\126', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\151', '\127', '\146', '\040', '\151', '\156', '\040', '\061', - '\012', '\123', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\167', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\147', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\162', '\114', '\040', '\145', '\162', '\040', '\061', - '\012', '\164', '\126', '\150', '\040', '\143', '\150', '\040', '\061', - '\012', '\132', '\154', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\170', '\120', '\040', '\156', '\171', '\040', '\061', - '\012', '\131', '\171', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\120', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\102', '\147', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\117', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\157', '\130', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\121', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\170', '\106', '\040', '\146', '\157', '\040', '\061', - '\012', '\144', '\117', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\164', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\150', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\106', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\145', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\152', '\110', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\160', '\114', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\147', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\106', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\121', '\152', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\113', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\116', '\150', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\167', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\131', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\156', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\123', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\101', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\105', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\113', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\155', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\130', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\152', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\130', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\164', '\121', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\157', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\122', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\171', '\171', '\114', '\040', '\156', '\171', '\040', '\061', - '\012', '\153', '\123', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\130', '\171', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\155', '\101', '\040', '\166', '\141', '\040', '\061', - '\012', '\132', '\147', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\142', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\142', '\111', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\132', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\110', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\115', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\163', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\130', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\121', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\143', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\146', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\115', '\150', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\102', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\127', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\127', '\172', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\127', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\116', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\142', '\132', '\040', '\142', '\145', '\040', '\061', - '\012', '\155', '\124', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\113', '\144', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\146', '\121', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\103', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\120', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\146', '\132', '\040', '\157', '\156', '\040', '\061', - '\012', '\167', '\131', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\124', '\146', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\107', '\156', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\126', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\124', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\172', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\111', '\147', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\166', '\166', '\040', '\166', '\151', '\040', '\061', - '\012', '\120', '\155', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\110', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\142', '\122', '\040', '\142', '\145', '\040', '\061', - '\012', '\143', '\106', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\113', '\166', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\132', '\170', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\157', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\150', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\167', '\120', '\040', '\167', '\141', '\040', '\061', - '\012', '\126', '\166', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\144', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\106', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\122', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\150', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\102', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\142', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\112', '\172', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\162', '\123', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\153', '\132', '\040', '\155', '\145', '\040', '\061', - '\012', '\142', '\113', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\120', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\130', '\161', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\107', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\114', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\156', '\162', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\124', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\166', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\127', '\154', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\170', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\171', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\127', '\165', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\132', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\161', '\111', '\040', '\151', '\156', '\040', '\061', - '\012', '\143', '\160', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\120', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\155', '\111', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\153', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\132', '\166', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\144', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\131', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\102', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\104', '\167', '\142', '\040', '\157', '\167', '\040', '\061', - '\012', '\127', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\144', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\166', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\116', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\122', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\104', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\107', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\167', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\124', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\103', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\154', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\127', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\167', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\160', '\115', '\040', '\141', '\156', '\040', '\061', - '\012', '\125', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\165', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\103', '\152', '\040', '\157', '\156', '\040', '\061', - '\012', '\164', '\170', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\146', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\167', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\144', '\172', '\040', '\144', '\145', '\040', '\061', - '\012', '\126', '\147', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\153', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\120', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\103', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\147', '\102', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\166', '\127', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\144', '\114', '\040', '\144', '\145', '\040', '\061', - '\012', '\114', '\170', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\163', '\166', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\165', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\106', '\171', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\126', '\166', '\040', '\157', '\156', '\040', '\061', - '\012', '\132', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\112', '\160', '\040', '\157', '\156', '\040', '\061', - '\012', '\147', '\111', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\167', '\106', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\114', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\147', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\113', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\170', '\122', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\167', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\116', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\107', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\121', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\142', '\126', '\040', '\142', '\145', '\040', '\061', - '\012', '\144', '\160', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\102', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\125', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\172', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\156', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\102', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\141', '\105', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\154', '\113', '\040', '\154', '\145', '\040', '\061', - '\012', '\127', '\154', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\150', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\166', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\106', '\146', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\147', '\130', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\127', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\107', '\160', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\155', '\123', '\040', '\155', '\145', '\040', '\061', - '\012', '\147', '\132', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\152', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\153', '\130', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\154', '\120', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\103', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\150', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\167', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\154', '\104', '\040', '\154', '\145', '\040', '\061', - '\012', '\122', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\105', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\160', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\126', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\112', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\144', '\126', '\040', '\144', '\145', '\040', '\061', - '\012', '\122', '\166', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\117', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\160', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\120', '\172', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\124', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\146', '\161', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\164', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\132', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\110', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\121', '\143', '\162', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\126', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\116', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\130', '\150', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\131', '\171', '\040', '\157', '\156', '\040', '\061', - '\012', '\106', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\167', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\162', '\167', '\110', '\040', '\145', '\162', '\040', '\061', - '\012', '\157', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\167', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\152', '\130', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\114', '\153', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\126', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\130', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\153', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\162', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\170', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\147', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\147', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\167', '\115', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\147', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\147', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\144', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\152', '\115', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\110', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\113', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\166', '\115', '\040', '\144', '\145', '\040', '\061', - '\012', '\132', '\160', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\120', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\151', '\101', '\040', '\151', '\156', '\040', '\061', - '\012', '\152', '\171', '\126', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\171', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\125', '\157', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\121', '\153', '\172', '\040', '\153', '\141', '\040', '\061', - '\012', '\114', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\155', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\153', '\122', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\106', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\132', '\143', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\103', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\154', '\106', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\127', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\113', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\160', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\142', '\122', '\040', '\154', '\145', '\040', '\061', - '\012', '\162', '\142', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\146', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\126', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\132', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\172', '\156', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\132', '\142', '\040', '\147', '\141', '\040', '\061', - '\012', '\167', '\164', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\166', '\127', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\150', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\162', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\131', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\160', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\106', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\144', '\117', '\040', '\144', '\145', '\040', '\061', - '\012', '\112', '\166', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\121', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\127', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\104', '\164', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\113', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\153', '\111', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\123', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\171', '\103', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\150', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\126', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\110', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\120', '\154', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\112', '\160', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\150', '\105', '\167', '\040', '\150', '\141', '\040', '\061', - '\012', '\172', '\110', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\111', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\172', '\102', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\163', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\142', '\130', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\171', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\104', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\124', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\124', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\142', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\150', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\167', '\122', '\040', '\167', '\141', '\040', '\061', - '\012', '\144', '\121', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\103', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\171', '\150', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\154', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\126', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\120', '\144', '\171', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\117', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\132', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\153', '\130', '\040', '\151', '\152', '\040', '\061', - '\012', '\113', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\160', '\121', '\040', '\160', '\162', '\040', '\061', - '\012', '\162', '\150', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\152', '\111', '\040', '\151', '\152', '\040', '\061', - '\012', '\102', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\103', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\107', '\155', '\040', '\155', '\141', '\040', '\061', - '\012', '\160', '\141', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\125', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\114', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\146', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\167', '\110', '\040', '\167', '\141', '\040', '\061', - '\012', '\120', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\170', '\126', '\040', '\153', '\141', '\040', '\061', - '\012', '\116', '\142', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\161', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\152', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\143', '\132', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\161', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\172', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\161', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\171', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\114', '\172', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\132', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\163', '\144', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\116', '\142', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\167', '\114', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\116', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\171', '\166', '\110', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\154', '\103', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\171', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\156', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\110', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\147', '\111', '\040', '\156', '\147', '\040', '\061', - '\012', '\132', '\164', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\166', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\107', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\125', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\110', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\130', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\106', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\144', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\110', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\103', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\126', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\127', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\152', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\164', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\154', '\115', '\040', '\154', '\145', '\040', '\061', - '\012', '\111', '\167', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\144', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\164', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\152', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\150', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\130', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\142', '\105', '\040', '\142', '\145', '\040', '\061', - '\012', '\110', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\114', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\142', '\104', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\125', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\153', '\145', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\150', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\110', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\110', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\124', '\146', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\157', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\103', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\114', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\144', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\103', '\147', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\114', '\162', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\117', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\117', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\161', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\164', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\167', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\172', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\156', '\127', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\116', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\127', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\143', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\146', '\104', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\126', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\172', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\146', '\110', '\040', '\151', '\152', '\040', '\061', - '\012', '\122', '\162', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\104', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\117', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\167', '\132', '\040', '\167', '\141', '\040', '\061', - '\012', '\155', '\121', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\156', '\161', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\125', '\166', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\122', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\150', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\163', '\104', '\040', '\163', '\164', '\040', '\061', - '\012', '\114', '\144', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\121', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\115', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\142', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\152', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\142', '\124', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\116', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\167', '\103', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\156', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\132', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\103', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\110', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\103', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\115', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\166', '\107', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\120', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\111', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\154', '\110', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\156', '\102', '\040', '\141', '\156', '\040', '\061', - '\012', '\105', '\142', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\162', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\147', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\112', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\143', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\142', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\157', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\122', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\112', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\106', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\144', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\110', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\125', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\110', '\160', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\132', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\167', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\172', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\121', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\142', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\126', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\112', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\162', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\115', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\104', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\115', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\172', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\127', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\124', '\144', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\155', '\143', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\117', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\113', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\162', '\124', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\161', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\114', '\172', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\114', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\172', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\121', '\162', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\145', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\116', '\155', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\160', '\170', '\105', '\040', '\160', '\162', '\040', '\061', - '\012', '\103', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\143', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\130', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\142', '\125', '\040', '\142', '\145', '\040', '\061', - '\012', '\141', '\145', '\117', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\166', '\126', '\040', '\163', '\164', '\040', '\061', - '\012', '\171', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\122', '\160', '\040', '\163', '\164', '\040', '\061', - '\012', '\162', '\170', '\125', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\150', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\121', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\130', '\167', '\040', '\157', '\156', '\040', '\061', - '\012', '\112', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\166', '\110', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\126', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\117', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\127', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\130', '\166', '\040', '\151', '\156', '\040', '\061', - '\012', '\143', '\102', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\153', '\115', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\110', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\142', '\127', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\156', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\122', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\166', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\161', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\102', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\157', '\120', '\152', '\040', '\157', '\156', '\040', '\061', - '\012', '\146', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\164', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\164', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\167', '\114', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\106', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\126', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\107', '\142', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\157', '\112', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\153', '\114', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\157', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\162', '\132', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\103', '\147', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\166', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\131', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\152', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\127', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\142', '\130', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\126', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\165', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\172', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\165', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\167', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\112', '\153', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\144', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\143', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\154', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\114', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\172', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\150', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\146', '\104', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\142', '\112', '\040', '\153', '\141', '\040', '\061', - '\012', '\116', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\131', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\164', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\143', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\147', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\144', '\124', '\040', '\144', '\141', '\040', '\061', - '\012', '\166', '\124', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\116', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\142', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\125', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\130', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\122', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\112', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\141', '\161', '\101', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\117', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\120', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\104', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\161', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\130', '\162', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\162', '\114', '\040', '\145', '\162', '\040', '\061', - '\012', '\156', '\112', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\163', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\161', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\145', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\114', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\105', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\160', '\155', '\105', '\040', '\155', '\145', '\040', '\061', - '\012', '\152', '\111', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\172', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\121', '\150', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\156', '\116', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\132', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\151', '\167', '\122', '\040', '\151', '\156', '\040', '\061', - '\012', '\157', '\112', '\166', '\040', '\153', '\157', '\040', '\061', - '\012', '\165', '\146', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\113', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\165', '\127', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\103', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\167', '\102', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\125', '\171', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\165', '\126', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\113', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\162', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\130', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\112', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\131', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\110', '\153', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\105', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\112', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\105', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\143', '\161', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\126', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\120', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\172', '\143', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\116', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\127', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\143', '\115', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\153', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\141', '\120', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\126', '\163', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\114', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\123', '\147', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\150', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\122', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\127', '\144', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\143', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\142', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\164', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\167', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\146', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\147', '\120', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\167', '\127', '\040', '\160', '\162', '\040', '\061', - '\012', '\160', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\153', '\160', '\040', '\153', '\141', '\040', '\061', - '\012', '\151', '\172', '\112', '\040', '\151', '\156', '\040', '\061', - '\012', '\143', '\131', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\121', '\154', '\040', '\151', '\156', '\040', '\061', - '\012', '\121', '\166', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\154', '\122', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\106', '\160', '\040', '\163', '\164', '\040', '\061', - '\012', '\114', '\161', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\156', '\120', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\131', '\154', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\111', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\161', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\160', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\130', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\162', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\152', '\111', '\040', '\163', '\164', '\040', '\061', - '\012', '\151', '\171', '\130', '\040', '\151', '\156', '\040', '\061', - '\012', '\132', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\164', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\132', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\130', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\112', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\142', '\120', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\165', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\122', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\130', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\116', '\170', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\113', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\101', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\157', '\125', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\156', '\127', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\167', '\125', '\040', '\167', '\141', '\040', '\061', - '\012', '\155', '\113', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\150', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\107', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\167', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\156', '\105', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\152', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\171', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\127', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\121', '\144', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\123', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\111', '\157', '\171', '\040', '\157', '\156', '\040', '\061', - '\012', '\130', '\160', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\112', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\170', '\166', '\124', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\144', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\150', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\126', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\152', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\110', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\142', '\120', '\040', '\141', '\156', '\040', '\061', - '\012', '\125', '\167', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\113', '\143', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\163', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\153', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\162', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\142', '\116', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\131', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\147', '\114', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\142', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\143', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\166', '\102', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\113', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\132', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\150', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\162', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\122', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\115', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\150', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\162', '\113', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\144', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\102', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\171', '\130', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\144', '\117', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\127', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\164', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\125', '\170', '\040', '\141', '\162', '\040', '\061', - '\012', '\161', '\110', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\172', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\124', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\153', '\116', '\040', '\163', '\172', '\040', '\061', - '\012', '\106', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\153', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\170', '\106', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\122', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\172', '\104', '\040', '\163', '\172', '\040', '\061', - '\012', '\132', '\161', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\172', '\127', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\147', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\147', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\155', '\102', '\040', '\155', '\145', '\040', '\061', - '\012', '\147', '\172', '\101', '\040', '\156', '\147', '\040', '\061', - '\012', '\132', '\152', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\111', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\157', '\113', '\040', '\157', '\156', '\040', '\061', - '\012', '\107', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\107', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\132', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\143', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\120', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\167', '\107', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\146', '\120', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\111', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\105', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\170', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\114', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\160', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\160', '\122', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\132', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\126', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\153', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\107', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\166', '\132', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\161', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\114', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\160', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\162', '\122', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\167', '\132', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\126', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\151', '\103', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\171', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\147', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\114', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\116', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\110', '\147', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\112', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\166', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\130', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\114', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\167', '\120', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\166', '\116', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\160', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\132', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\120', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\143', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\126', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\144', '\103', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\142', '\105', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\121', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\124', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\144', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\111', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\110', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\163', '\142', '\040', '\163', '\164', '\040', '\061', - '\012', '\127', '\166', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\121', '\143', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\146', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\152', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\102', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\114', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\172', '\153', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\152', '\101', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\143', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\150', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\151', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\121', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\130', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\114', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\112', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\117', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\127', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\101', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\111', '\171', '\040', '\151', '\156', '\040', '\061', - '\012', '\160', '\112', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\114', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\102', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\122', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\114', '\143', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\146', '\102', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\126', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\127', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\171', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\160', '\165', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\111', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\107', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\152', '\114', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\143', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\150', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\170', '\116', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\115', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\172', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\147', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\155', '\124', '\040', '\166', '\141', '\040', '\061', - '\012', '\104', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\157', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\150', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\102', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\127', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\155', '\105', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\143', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\131', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\104', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\125', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\126', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\165', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\166', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\120', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\165', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\114', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\147', '\163', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\152', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\153', '\107', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\112', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\115', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\146', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\145', '\117', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\153', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\161', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\112', '\144', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\105', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\126', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\132', '\171', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\156', '\155', '\124', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\163', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\144', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\153', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\121', '\156', '\143', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\102', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\115', '\152', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\155', '\112', '\040', '\155', '\145', '\040', '\061', - '\012', '\115', '\170', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\142', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\104', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\144', '\152', '\103', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\144', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\156', '\114', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\152', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\125', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\152', '\127', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\127', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\166', '\106', '\040', '\166', '\141', '\040', '\061', - '\012', '\107', '\161', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\107', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\130', '\165', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\103', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\170', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\154', '\116', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\144', '\114', '\040', '\144', '\145', '\040', '\061', - '\012', '\126', '\164', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\112', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\121', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\146', '\130', '\040', '\146', '\157', '\040', '\061', - '\012', '\116', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\102', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\172', '\120', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\125', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\142', '\124', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\171', '\126', '\040', '\167', '\141', '\040', '\061', - '\012', '\130', '\153', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\144', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\121', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\146', '\127', '\040', '\163', '\164', '\040', '\061', - '\012', '\147', '\146', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\154', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\130', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\111', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\167', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\132', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\151', '\113', '\167', '\040', '\151', '\156', '\040', '\061', - '\012', '\124', '\142', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\121', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\155', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\160', '\105', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\123', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\106', '\147', '\151', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\111', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\161', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\152', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\116', '\152', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\167', '\106', '\040', '\153', '\141', '\040', '\061', - '\012', '\117', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\167', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\166', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\104', '\166', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\163', '\120', '\040', '\163', '\164', '\040', '\061', - '\012', '\147', '\132', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\130', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\107', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\154', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\116', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\116', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\144', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\170', '\126', '\040', '\166', '\141', '\040', '\061', - '\012', '\116', '\150', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\132', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\171', '\123', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\132', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\162', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\154', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\152', '\115', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\131', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\151', '\171', '\106', '\040', '\151', '\156', '\040', '\061', - '\012', '\103', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\167', '\105', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\146', '\126', '\040', '\146', '\157', '\040', '\061', - '\012', '\167', '\142', '\106', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\165', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\154', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\103', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\107', '\152', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\114', '\154', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\114', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\155', '\120', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\131', '\157', '\040', '\143', '\150', '\040', '\061', - '\012', '\122', '\150', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\162', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\104', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\171', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\165', '\171', '\127', '\040', '\165', '\156', '\040', '\061', - '\012', '\153', '\107', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\151', '\167', '\113', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\153', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\130', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\103', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\121', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\162', '\117', '\040', '\145', '\162', '\040', '\061', - '\012', '\106', '\172', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\123', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\120', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\102', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\150', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\102', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\171', '\166', '\114', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\143', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\106', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\105', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\105', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\165', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\110', '\167', '\040', '\165', '\163', '\040', '\061', - '\012', '\106', '\166', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\153', '\117', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\151', '\131', '\040', '\151', '\156', '\040', '\061', - '\012', '\163', '\120', '\155', '\040', '\163', '\164', '\040', '\061', - '\012', '\144', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\121', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\163', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\125', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\114', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\122', '\153', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\153', '\120', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\166', '\106', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\131', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\167', '\131', '\040', '\151', '\163', '\040', '\061', - '\012', '\162', '\122', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\110', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\104', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\154', '\127', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\157', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\115', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\146', '\112', '\040', '\160', '\162', '\040', '\061', - '\012', '\104', '\155', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\157', '\142', '\121', '\040', '\157', '\156', '\040', '\061', - '\012', '\126', '\146', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\126', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\103', '\152', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\113', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\152', '\105', '\040', '\151', '\152', '\040', '\061', - '\012', '\101', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\103', '\170', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\160', '\110', '\040', '\166', '\141', '\040', '\061', - '\012', '\114', '\170', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\160', '\110', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\157', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\122', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\131', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\144', '\125', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\113', '\170', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\125', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\150', '\104', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\104', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\127', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\172', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\107', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\152', '\152', '\126', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\146', '\122', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\120', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\114', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\122', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\146', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\107', '\162', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\107', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\122', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\132', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\123', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\120', '\150', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\166', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\154', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\147', '\123', '\040', '\156', '\147', '\040', '\061', - '\012', '\115', '\155', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\120', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\127', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\111', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\170', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\167', '\164', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\113', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\164', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\122', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\163', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\142', '\104', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\113', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\150', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\150', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\131', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\103', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\172', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\112', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\121', '\162', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\165', '\166', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\146', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\161', '\130', '\040', '\151', '\156', '\040', '\061', - '\012', '\166', '\116', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\143', '\115', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\166', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\155', '\123', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\127', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\141', '\111', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\155', '\123', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\155', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\151', '\171', '\116', '\040', '\151', '\156', '\040', '\061', - '\012', '\142', '\132', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\126', '\167', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\125', '\154', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\162', '\103', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\153', '\162', '\040', '\162', '\151', '\040', '\061', - '\012', '\146', '\152', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\122', '\162', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\103', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\142', '\103', '\040', '\142', '\145', '\040', '\061', - '\012', '\146', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\104', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\147', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\115', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\120', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\152', '\114', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\171', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\145', '\130', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\156', '\126', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\112', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\104', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\103', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\164', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\150', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\143', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\113', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\164', '\146', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\142', '\132', '\040', '\151', '\156', '\040', '\061', - '\012', '\116', '\172', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\127', '\156', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\130', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\151', '\126', '\146', '\040', '\151', '\156', '\040', '\061', - '\012', '\144', '\170', '\124', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\170', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\104', '\144', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\130', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\147', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\147', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\115', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\160', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\132', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\156', '\130', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\166', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\126', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\103', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\166', '\111', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\146', '\112', '\040', '\155', '\145', '\040', '\061', - '\012', '\164', '\121', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\124', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\111', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\146', '\116', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\121', '\163', '\040', '\156', '\147', '\040', '\061', - '\012', '\151', '\126', '\160', '\040', '\151', '\156', '\040', '\061', - '\012', '\152', '\107', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\115', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\170', '\166', '\167', '\040', '\167', '\151', '\040', '\061', - '\012', '\172', '\111', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\146', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\127', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\145', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\155', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\114', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\132', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\156', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\166', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\130', '\150', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\152', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\147', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\141', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\123', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\130', '\172', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\124', '\172', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\130', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\121', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\103', '\161', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\123', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\162', '\127', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\104', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\130', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\143', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\147', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\124', '\172', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\143', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\167', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\167', '\166', '\040', '\167', '\151', '\040', '\061', - '\012', '\162', '\160', '\113', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\120', '\163', '\040', '\151', '\163', '\040', '\061', - '\012', '\113', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\104', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\152', '\162', '\106', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\142', '\121', '\040', '\142', '\145', '\040', '\061', - '\012', '\121', '\144', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\162', '\113', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\131', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\170', '\101', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\150', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\163', '\125', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\130', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\167', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\163', '\122', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\110', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\127', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\146', '\123', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\111', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\144', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\103', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\172', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\121', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\157', '\152', '\130', '\040', '\157', '\156', '\040', '\061', - '\012', '\126', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\127', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\153', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\156', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\112', '\172', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\122', '\162', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\130', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\145', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\125', '\167', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\131', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\146', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\170', '\162', '\107', '\040', '\145', '\162', '\040', '\061', - '\012', '\145', '\132', '\162', '\040', '\154', '\145', '\040', '\061', - '\012', '\165', '\146', '\126', '\040', '\165', '\163', '\040', '\061', - '\012', '\162', '\130', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\132', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\121', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\124', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\154', '\115', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\161', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\127', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\152', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\146', '\126', '\040', '\151', '\152', '\040', '\061', - '\012', '\132', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\170', '\115', '\040', '\142', '\145', '\040', '\061', - '\012', '\146', '\106', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\152', '\120', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\115', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\153', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\155', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\131', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\144', '\166', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\162', '\167', '\103', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\167', '\127', '\040', '\167', '\141', '\040', '\061', - '\012', '\121', '\160', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\130', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\117', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\155', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\105', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\112', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\110', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\150', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\104', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\114', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\110', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\156', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\170', '\123', '\040', '\151', '\152', '\040', '\061', - '\012', '\112', '\164', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\147', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\160', '\110', '\040', '\160', '\162', '\040', '\061', - '\012', '\111', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\115', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\155', '\105', '\040', '\144', '\145', '\040', '\061', - '\012', '\110', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\123', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\150', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\152', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\146', '\130', '\040', '\156', '\171', '\040', '\061', - '\012', '\166', '\165', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\106', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\156', '\123', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\154', '\126', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\153', '\113', '\040', '\154', '\145', '\040', '\061', - '\012', '\106', '\166', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\152', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\157', '\121', '\040', '\157', '\156', '\040', '\061', - '\012', '\127', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\115', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\115', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\143', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\142', '\102', '\040', '\144', '\145', '\040', '\061', - '\012', '\103', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\103', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\112', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\104', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\152', '\114', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\151', '\107', '\040', '\151', '\156', '\040', '\061', - '\012', '\132', '\154', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\126', '\163', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\106', '\147', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\155', '\104', '\040', '\155', '\145', '\040', '\061', - '\012', '\104', '\170', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\161', '\162', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\112', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\114', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\144', '\102', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\142', '\115', '\040', '\142', '\145', '\040', '\061', - '\012', '\155', '\166', '\115', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\164', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\156', '\102', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\164', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\163', '\144', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\161', '\154', '\040', '\167', '\141', '\040', '\061', - '\012', '\155', '\150', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\112', '\171', '\040', '\157', '\156', '\040', '\061', - '\012', '\107', '\150', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\157', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\163', '\111', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\106', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\131', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\156', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\105', '\157', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\143', '\115', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\167', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\107', '\153', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\165', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\164', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\110', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\145', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\165', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\106', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\113', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\167', '\125', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\121', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\123', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\131', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\107', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\166', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\150', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\102', '\150', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\130', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\162', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\156', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\165', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\163', '\172', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\113', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\171', '\111', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\143', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\114', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\123', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\142', '\105', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\141', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\164', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\142', '\106', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\147', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\110', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\145', '\131', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\114', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\122', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\160', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\152', '\127', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\147', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\103', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\107', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\172', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\121', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\153', '\154', '\112', '\040', '\154', '\151', '\040', '\061', - '\012', '\143', '\161', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\131', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\121', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\145', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\150', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\102', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\142', '\124', '\040', '\163', '\164', '\040', '\061', - '\012', '\144', '\121', '\171', '\040', '\144', '\145', '\040', '\061', - '\012', '\106', '\155', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\150', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\164', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\162', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\104', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\146', '\126', '\040', '\155', '\145', '\040', '\061', - '\012', '\157', '\123', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\112', '\170', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\117', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\166', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\105', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\161', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\170', '\111', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\113', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\144', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\143', '\115', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\156', '\116', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\142', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\116', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\152', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\112', '\142', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\102', '\146', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\145', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\130', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\154', '\112', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\113', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\103', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\147', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\115', '\150', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\113', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\161', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\144', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\172', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\116', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\152', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\143', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\143', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\152', '\125', '\040', '\144', '\145', '\040', '\061', - '\012', '\131', '\147', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\157', '\111', '\040', '\157', '\156', '\040', '\061', - '\012', '\131', '\171', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\146', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\162', '\114', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\121', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\164', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\142', '\130', '\040', '\167', '\141', '\040', '\061', - '\012', '\147', '\155', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\132', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\164', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\142', '\106', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\147', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\127', '\165', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\170', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\116', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\146', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\162', '\103', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\157', '\130', '\040', '\157', '\156', '\040', '\061', - '\012', '\167', '\152', '\124', '\040', '\151', '\152', '\040', '\061', - '\012', '\120', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\156', '\114', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\152', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\143', '\120', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\154', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\106', '\147', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\147', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\113', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\151', '\132', '\040', '\151', '\156', '\040', '\061', - '\012', '\162', '\130', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\131', '\143', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\166', '\101', '\040', '\166', '\141', '\040', '\061', - '\012', '\124', '\161', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\132', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\126', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\123', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\150', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\172', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\166', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\161', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\110', '\150', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\121', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\167', '\114', '\040', '\160', '\162', '\040', '\061', - '\012', '\163', '\116', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\105', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\163', '\104', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\104', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\114', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\124', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\162', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\111', '\171', '\040', '\155', '\145', '\040', '\061', - '\012', '\111', '\160', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\152', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\114', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\161', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\127', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\131', '\143', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\152', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\130', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\161', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\170', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\156', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\102', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\154', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\150', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\142', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\151', '\104', '\170', '\040', '\154', '\151', '\040', '\061', - '\012', '\132', '\156', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\112', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\142', '\125', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\122', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\160', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\154', '\107', '\040', '\154', '\145', '\040', '\061', - '\012', '\127', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\123', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\151', '\150', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\172', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\141', '\145', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\113', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\127', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\114', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\160', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\112', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\107', '\166', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\105', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\163', '\113', '\144', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\150', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\115', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\145', '\150', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\146', '\132', '\040', '\153', '\165', '\040', '\061', - '\012', '\127', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\155', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\126', '\153', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\172', '\104', '\040', '\163', '\172', '\040', '\061', - '\012', '\130', '\153', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\172', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\166', '\126', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\110', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\155', '\115', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\116', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\166', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\102', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\103', '\163', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\122', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\156', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\114', '\172', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\151', '\167', '\116', '\040', '\151', '\156', '\040', '\061', - '\012', '\160', '\146', '\116', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\103', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\165', '\110', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\114', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\167', '\104', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\152', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\155', '\126', '\040', '\144', '\151', '\040', '\061', - '\012', '\143', '\103', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\130', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\155', '\122', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\170', '\117', '\040', '\155', '\145', '\040', '\061', - '\012', '\112', '\162', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\152', '\116', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\102', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\170', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\113', '\144', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\104', '\154', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\123', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\103', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\146', '\120', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\107', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\142', '\105', '\040', '\142', '\145', '\040', '\061', - '\012', '\130', '\160', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\130', '\172', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\127', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\170', '\132', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\157', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\123', '\147', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\122', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\147', '\151', '\040', '\156', '\147', '\040', '\061', - '\012', '\145', '\104', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\127', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\151', '\127', '\160', '\040', '\151', '\156', '\040', '\061', - '\012', '\146', '\122', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\167', '\164', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\167', '\127', '\040', '\163', '\164', '\040', '\061', - '\012', '\147', '\162', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\110', '\146', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\146', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\113', '\152', '\040', '\157', '\156', '\040', '\061', - '\012', '\166', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\127', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\165', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\153', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\104', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\123', '\146', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\131', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\166', '\122', '\040', '\166', '\141', '\040', '\061', - '\012', '\145', '\101', '\157', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\131', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\122', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\151', '\127', '\144', '\040', '\151', '\156', '\040', '\061', - '\012', '\147', '\107', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\130', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\143', '\120', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\143', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\103', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\155', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\110', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\162', '\150', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\161', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\121', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\103', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\127', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\110', '\162', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\124', '\172', '\040', '\163', '\164', '\040', '\061', - '\012', '\141', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\167', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\166', '\105', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\113', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\143', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\160', '\115', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\154', '\103', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\154', '\107', '\040', '\154', '\145', '\040', '\061', - '\012', '\157', '\124', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\114', '\160', '\040', '\151', '\156', '\040', '\061', - '\012', '\170', '\163', '\114', '\040', '\163', '\164', '\040', '\061', - '\012', '\154', '\106', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\150', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\154', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\155', '\117', '\040', '\155', '\145', '\040', '\061', - '\012', '\131', '\143', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\156', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\142', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\121', '\154', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\170', '\101', '\040', '\142', '\145', '\040', '\061', - '\012', '\164', '\106', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\143', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\146', '\113', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\160', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\104', '\164', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\124', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\126', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\142', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\172', '\127', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\123', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\172', '\160', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\124', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\155', '\153', '\103', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\122', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\102', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\107', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\156', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\161', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\167', '\120', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\143', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\154', '\123', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\152', '\105', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\122', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\126', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\165', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\162', '\102', '\040', '\145', '\162', '\040', '\061', - '\012', '\121', '\171', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\123', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\131', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\120', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\106', '\144', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\155', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\120', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\112', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\144', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\110', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\147', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\152', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\162', '\103', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\103', '\142', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\125', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\143', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\102', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\124', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\167', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\127', '\147', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\117', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\142', '\102', '\040', '\142', '\145', '\040', '\061', - '\012', '\170', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\121', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\157', '\121', '\040', '\157', '\156', '\040', '\061', - '\012', '\171', '\152', '\127', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\166', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\142', '\106', '\040', '\142', '\145', '\040', '\061', - '\012', '\156', '\127', '\165', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\152', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\152', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\123', '\170', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\142', '\130', '\040', '\142', '\145', '\040', '\061', - '\012', '\145', '\131', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\102', '\155', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\130', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\115', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\123', '\170', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\154', '\110', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\146', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\167', '\107', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\110', '\154', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\160', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\167', '\106', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\107', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\167', '\103', '\040', '\167', '\141', '\040', '\061', - '\012', '\115', '\154', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\112', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\156', '\103', '\040', '\141', '\156', '\040', '\061', - '\012', '\106', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\107', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\153', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\142', '\113', '\040', '\142', '\145', '\040', '\061', - '\012', '\172', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\124', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\162', '\104', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\122', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\106', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\127', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\172', '\105', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\167', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\110', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\161', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\124', '\166', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\120', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\120', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\144', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\166', '\101', '\040', '\166', '\141', '\040', '\061', - '\012', '\132', '\166', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\141', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\167', '\121', '\040', '\167', '\141', '\040', '\061', - '\012', '\122', '\163', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\154', '\102', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\154', '\116', '\040', '\154', '\145', '\040', '\061', - '\012', '\107', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\144', '\112', '\040', '\144', '\145', '\040', '\061', - '\012', '\154', '\143', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\124', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\114', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\172', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\130', '\171', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\130', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\161', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\150', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\147', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\160', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\153', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\161', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\146', '\102', '\040', '\142', '\145', '\040', '\061', - '\012', '\127', '\160', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\127', '\170', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\130', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\150', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\113', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\164', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\130', '\145', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\130', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\107', '\150', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\172', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\170', '\127', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\126', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\112', '\170', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\142', '\142', '\130', '\040', '\142', '\145', '\040', '\061', - '\012', '\162', '\120', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\103', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\151', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\147', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\116', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\107', '\160', '\040', '\160', '\157', '\040', '\061', - '\012', '\150', '\120', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\124', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\111', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\143', '\105', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\103', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\142', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\172', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\161', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\110', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\167', '\110', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\103', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\170', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\160', '\117', '\040', '\160', '\162', '\040', '\061', - '\012', '\153', '\143', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\153', '\126', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\121', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\131', '\161', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\126', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\142', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\124', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\130', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\167', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\113', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\153', '\123', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\166', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\111', '\171', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\107', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\172', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\150', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\166', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\130', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\130', '\143', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\106', '\150', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\171', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\112', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\155', '\132', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\142', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\120', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\123', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\130', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\171', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\106', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\155', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\116', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\161', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\107', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\155', '\130', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\131', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\161', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\144', '\160', '\124', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\171', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\112', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\152', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\116', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\101', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\102', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\160', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\147', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\130', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\115', '\154', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\147', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\114', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\106', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\164', '\166', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\121', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\144', '\106', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\170', '\113', '\040', '\142', '\145', '\040', '\061', - '\012', '\102', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\160', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\112', '\142', '\040', '\163', '\164', '\040', '\061', - '\012', '\113', '\166', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\110', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\172', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\112', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\121', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\113', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\132', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\150', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\156', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\112', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\112', '\153', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\122', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\166', '\115', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\170', '\131', '\040', '\142', '\145', '\040', '\061', - '\012', '\160', '\130', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\125', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\160', '\166', '\105', '\040', '\166', '\141', '\040', '\061', - '\012', '\114', '\160', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\144', '\172', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\111', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\167', '\132', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\156', '\160', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\127', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\147', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\155', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\146', '\115', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\127', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\142', '\116', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\166', '\106', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\104', '\144', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\144', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\162', '\114', '\040', '\145', '\162', '\040', '\061', - '\012', '\165', '\110', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\167', '\116', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\102', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\104', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\143', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\172', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\105', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\170', '\110', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\161', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\153', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\130', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\103', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\120', '\172', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\122', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\166', '\127', '\040', '\166', '\141', '\040', '\061', - '\012', '\122', '\146', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\147', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\147', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\125', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\112', '\153', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\123', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\153', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\147', '\171', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\112', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\162', '\106', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\165', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\126', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\164', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\104', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\147', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\113', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\171', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\112', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\170', '\131', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\146', '\131', '\040', '\146', '\157', '\040', '\061', - '\012', '\130', '\153', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\147', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\171', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\142', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\124', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\163', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\154', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\161', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\160', '\121', '\040', '\160', '\157', '\040', '\061', - '\012', '\161', '\112', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\150', '\131', '\151', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\154', '\115', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\166', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\116', '\163', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\142', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\116', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\121', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\113', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\113', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\110', '\160', '\040', '\155', '\145', '\040', '\061', - '\012', '\125', '\171', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\170', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\111', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\124', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\146', '\120', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\170', '\111', '\040', '\146', '\157', '\040', '\061', - '\012', '\166', '\121', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\166', '\116', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\167', '\116', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\141', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\170', '\121', '\040', '\155', '\145', '\040', '\061', - '\012', '\142', '\144', '\126', '\040', '\144', '\145', '\040', '\061', - '\012', '\103', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\127', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\160', '\117', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\157', '\121', '\040', '\157', '\156', '\040', '\061', - '\012', '\170', '\131', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\160', '\124', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\116', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\166', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\114', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\113', '\163', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\127', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\151', '\125', '\171', '\040', '\151', '\156', '\040', '\061', - '\012', '\142', '\146', '\130', '\040', '\142', '\145', '\040', '\061', - '\012', '\170', '\163', '\126', '\040', '\163', '\164', '\040', '\061', - '\012', '\130', '\156', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\155', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\157', '\121', '\167', '\040', '\157', '\156', '\040', '\061', - '\012', '\132', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\117', '\141', '\171', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\152', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\132', '\142', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\127', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\125', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\170', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\103', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\146', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\166', '\125', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\111', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\104', '\146', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\132', '\155', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\121', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\116', '\142', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\112', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\150', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\172', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\131', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\102', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\143', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\107', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\126', '\167', '\040', '\151', '\156', '\040', '\061', - '\012', '\106', '\172', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\152', '\110', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\165', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\167', '\123', '\040', '\151', '\152', '\040', '\061', - '\012', '\103', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\112', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\144', '\112', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\144', '\124', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\161', '\102', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\127', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\163', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\114', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\144', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\147', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\131', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\132', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\146', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\166', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\141', '\126', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\115', '\163', '\040', '\156', '\147', '\040', '\061', - '\012', '\120', '\142', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\155', '\121', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\125', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\166', '\107', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\147', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\166', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\162', '\101', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\162', '\115', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\115', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\125', '\171', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\114', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\107', '\152', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\105', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\130', '\144', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\110', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\157', '\120', '\172', '\040', '\157', '\156', '\040', '\061', - '\012', '\170', '\111', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\103', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\104', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\152', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\107', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\152', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\103', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\166', '\117', '\040', '\166', '\141', '\040', '\061', - '\012', '\120', '\172', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\162', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\150', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\154', '\102', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\104', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\154', '\117', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\147', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\121', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\144', '\132', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\121', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\154', '\122', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\121', '\171', '\040', '\157', '\156', '\040', '\061', - '\012', '\164', '\167', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\144', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\152', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\156', '\120', '\040', '\141', '\156', '\040', '\061', - '\012', '\116', '\156', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\151', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\110', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\114', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\163', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\113', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\146', '\153', '\105', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\154', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\132', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\126', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\142', '\101', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\150', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\142', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\111', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\107', '\162', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\160', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\150', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\155', '\130', '\040', '\155', '\145', '\040', '\061', - '\012', '\141', '\112', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\146', '\117', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\130', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\156', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\160', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\103', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\150', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\107', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\170', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\132', '\163', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\107', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\172', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\152', '\123', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\146', '\123', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\160', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\147', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\161', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\146', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\124', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\132', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\105', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\121', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\131', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\152', '\126', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\127', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\122', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\165', '\123', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\170', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\143', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\172', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\161', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\112', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\103', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\166', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\167', '\116', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\155', '\122', '\040', '\155', '\145', '\040', '\061', - '\012', '\142', '\164', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\124', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\153', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\150', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\111', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\163', '\121', '\040', '\163', '\164', '\040', '\061', - '\012', '\147', '\123', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\104', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\126', '\152', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\155', '\111', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\127', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\113', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\120', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\157', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\147', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\167', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\147', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\127', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\146', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\150', '\153', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\167', '\127', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\121', '\172', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\125', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\155', '\113', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\157', '\121', '\146', '\040', '\157', '\156', '\040', '\061', - '\012', '\152', '\126', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\167', '\124', '\040', '\167', '\141', '\040', '\061', - '\012', '\163', '\124', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\154', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\115', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\113', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\160', '\130', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\121', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\112', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\113', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\153', '\112', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\142', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\132', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\130', '\147', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\172', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\124', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\167', '\104', '\040', '\145', '\162', '\040', '\061', - '\012', '\121', '\144', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\162', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\167', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\167', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\172', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\154', '\127', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\172', '\120', '\040', '\163', '\172', '\040', '\061', - '\012', '\127', '\170', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\162', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\107', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\132', '\164', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\125', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\145', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\132', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\106', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\161', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\156', '\104', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\166', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\111', '\171', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\146', '\104', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\113', '\142', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\131', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\127', '\170', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\113', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\162', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\103', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\150', '\105', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\144', '\125', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\107', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\107', '\167', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\155', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\104', '\155', '\040', '\160', '\157', '\040', '\061', - '\012', '\161', '\155', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\124', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\121', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\126', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\101', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\105', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\113', '\160', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\110', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\103', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\141', '\161', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\125', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\120', '\166', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\104', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\144', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\172', '\114', '\040', '\163', '\172', '\040', '\061', - '\012', '\102', '\150', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\107', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\164', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\124', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\170', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\126', '\166', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\110', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\150', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\144', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\132', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\155', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\146', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\152', '\111', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\144', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\153', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\123', '\144', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\104', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\112', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\152', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\114', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\145', '\106', '\163', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\132', '\155', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\154', '\166', '\112', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\131', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\116', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\112', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\171', '\121', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\146', '\115', '\040', '\160', '\162', '\040', '\061', - '\012', '\144', '\150', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\155', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\150', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\107', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\166', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\103', '\147', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\146', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\153', '\104', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\144', '\123', '\040', '\144', '\145', '\040', '\061', - '\012', '\111', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\107', '\153', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\111', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\102', '\172', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\102', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\124', '\160', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\171', '\131', '\040', '\166', '\141', '\040', '\061', - '\012', '\125', '\170', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\167', '\127', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\120', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\124', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\172', '\111', '\040', '\163', '\172', '\040', '\061', - '\012', '\131', '\160', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\166', '\104', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\103', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\143', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\132', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\132', '\170', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\167', '\142', '\101', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\124', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\162', '\170', '\122', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\161', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\106', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\160', '\116', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\153', '\115', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\117', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\170', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\111', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\163', '\144', '\040', '\163', '\164', '\040', '\061', - '\012', '\157', '\152', '\131', '\040', '\157', '\156', '\040', '\061', - '\012', '\143', '\105', '\157', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\167', '\122', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\152', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\124', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\172', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\146', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\123', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\116', '\143', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\167', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\151', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\167', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\121', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\126', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\131', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\106', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\166', '\102', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\131', '\145', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\167', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\110', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\115', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\153', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\110', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\127', '\160', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\101', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\170', '\102', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\165', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\111', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\146', '\105', '\040', '\142', '\145', '\040', '\061', - '\012', '\147', '\122', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\102', '\160', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\170', '\116', '\040', '\142', '\145', '\040', '\061', - '\012', '\153', '\147', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\120', '\170', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\103', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\116', '\160', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\170', '\105', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\103', '\171', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\147', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\114', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\142', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\147', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\170', '\132', '\040', '\160', '\162', '\040', '\061', - '\012', '\160', '\120', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\151', '\131', '\172', '\040', '\151', '\156', '\040', '\061', - '\012', '\166', '\112', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\124', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\126', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\167', '\123', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\124', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\121', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\105', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\160', '\120', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\152', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\171', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\155', '\143', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\152', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\144', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\146', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\117', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\152', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\147', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\147', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\115', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\143', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\106', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\167', '\101', '\040', '\160', '\162', '\040', '\061', - '\012', '\114', '\160', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\153', '\120', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\110', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\112', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\166', '\115', '\040', '\166', '\141', '\040', '\061', - '\012', '\111', '\143', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\146', '\112', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\163', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\127', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\125', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\114', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\152', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\147', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\114', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\161', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\155', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\132', '\152', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\132', '\153', '\160', '\040', '\153', '\141', '\040', '\061', - '\012', '\151', '\171', '\110', '\040', '\151', '\156', '\040', '\061', - '\012', '\167', '\165', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\172', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\167', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\103', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\144', '\107', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\144', '\125', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\124', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\154', '\110', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\171', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\154', '\126', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\171', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\127', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\115', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\130', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\130', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\110', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\126', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\172', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\144', '\116', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\115', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\152', '\123', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\155', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\111', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\146', '\160', '\115', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\110', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\112', '\152', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\154', '\107', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\143', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\121', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\111', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\167', '\102', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\143', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\112', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\121', '\142', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\102', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\160', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\143', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\123', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\151', '\130', '\152', '\040', '\151', '\156', '\040', '\061', - '\012', '\121', '\147', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\143', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\123', '\161', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\155', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\143', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\102', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\172', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\164', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\142', '\114', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\103', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\160', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\155', '\130', '\163', '\040', '\155', '\145', '\040', '\061', - '\012', '\132', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\153', '\171', '\040', '\153', '\141', '\040', '\061', - '\012', '\130', '\155', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\114', '\156', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\122', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\155', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\167', '\120', '\040', '\166', '\141', '\040', '\061', - '\012', '\145', '\106', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\116', '\152', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\154', '\107', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\142', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\127', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\160', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\122', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\130', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\151', '\171', '\104', '\040', '\151', '\156', '\040', '\061', - '\012', '\146', '\166', '\114', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\120', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\144', '\122', '\040', '\144', '\145', '\040', '\061', - '\012', '\151', '\123', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\142', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\170', '\121', '\040', '\170', '\145', '\040', '\061', - '\012', '\104', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\147', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\150', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\147', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\153', '\171', '\040', '\153', '\141', '\040', '\061', - '\012', '\103', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\127', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\154', '\155', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\162', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\167', '\113', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\130', '\147', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\167', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\121', '\154', '\040', '\141', '\156', '\040', '\061', - '\012', '\107', '\150', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\156', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\155', '\125', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\152', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\172', '\123', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\131', '\162', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\144', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\170', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\125', '\170', '\040', '\170', '\145', '\040', '\061', - '\012', '\167', '\155', '\124', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\131', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\143', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\126', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\123', '\147', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\120', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\131', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\172', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\116', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\164', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\150', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\114', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\114', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\147', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\153', '\123', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\127', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\147', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\147', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\120', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\127', '\170', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\147', '\161', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\103', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\157', '\125', '\040', '\157', '\156', '\040', '\061', - '\012', '\171', '\143', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\167', '\104', '\040', '\153', '\141', '\040', '\061', - '\012', '\123', '\142', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\143', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\110', '\167', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\155', '\114', '\040', '\155', '\145', '\040', '\061', - '\012', '\147', '\167', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\113', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\130', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\151', '\113', '\170', '\040', '\151', '\156', '\040', '\061', - '\012', '\154', '\122', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\110', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\106', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\112', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\155', '\111', '\040', '\155', '\145', '\040', '\061', - '\012', '\143', '\103', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\131', '\144', '\040', '\151', '\156', '\040', '\061', - '\012', '\171', '\146', '\131', '\040', '\156', '\171', '\040', '\061', - '\012', '\170', '\142', '\131', '\040', '\142', '\145', '\040', '\061', - '\012', '\142', '\155', '\105', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\102', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\110', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\143', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\166', '\114', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\152', '\114', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\131', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\127', '\160', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\170', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\144', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\152', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\146', '\160', '\121', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\117', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\143', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\114', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\106', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\123', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\104', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\162', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\131', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\126', '\156', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\144', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\166', '\120', '\040', '\166', '\141', '\040', '\061', - '\012', '\131', '\146', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\153', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\110', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\126', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\153', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\106', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\127', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\160', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\116', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\127', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\155', '\104', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\115', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\132', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\116', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\150', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\142', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\113', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\151', '\132', '\170', '\040', '\151', '\156', '\040', '\061', - '\012', '\163', '\152', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\151', '\152', '\131', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\164', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\124', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\110', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\151', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\147', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\106', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\117', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\130', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\113', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\123', '\166', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\155', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\111', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\147', '\113', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\124', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\146', '\103', '\040', '\146', '\157', '\040', '\061', - '\012', '\150', '\113', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\123', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\113', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\121', '\163', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\151', '\107', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\147', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\103', '\152', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\120', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\130', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\172', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\106', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\143', '\105', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\153', '\114', '\040', '\153', '\141', '\040', '\061', - '\012', '\110', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\124', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\130', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\155', '\115', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\126', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\124', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\127', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\170', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\121', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\166', '\114', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\120', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\110', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\170', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\112', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\115', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\111', '\170', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\103', '\171', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\160', '\130', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\160', '\114', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\124', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\164', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\122', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\130', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\111', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\152', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\170', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\104', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\130', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\152', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\132', '\172', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\147', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\156', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\126', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\142', '\111', '\040', '\142', '\145', '\040', '\061', - '\012', '\132', '\160', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\146', '\117', '\040', '\142', '\145', '\040', '\061', - '\012', '\155', '\123', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\141', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\141', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\152', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\130', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\161', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\166', '\122', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\123', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\144', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\124', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\113', '\172', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\164', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\147', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\170', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\150', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\107', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\104', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\157', '\131', '\040', '\157', '\156', '\040', '\061', - '\012', '\144', '\113', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\111', '\170', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\115', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\130', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\121', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\116', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\121', '\146', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\114', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\107', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\110', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\143', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\104', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\147', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\104', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\155', '\117', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\144', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\116', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\162', '\130', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\112', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\104', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\167', '\115', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\131', '\152', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\152', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\151', '\112', '\142', '\040', '\151', '\156', '\040', '\061', - '\012', '\143', '\144', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\142', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\106', '\160', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\150', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\103', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\130', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\104', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\165', '\161', '\124', '\040', '\165', '\156', '\040', '\061', - '\012', '\102', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\102', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\107', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\130', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\171', '\142', '\106', '\040', '\142', '\145', '\040', '\061', - '\012', '\144', '\164', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\126', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\103', '\142', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\164', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\144', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\120', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\132', '\166', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\120', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\157', '\110', '\040', '\157', '\156', '\040', '\061', - '\012', '\130', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\130', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\124', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\167', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\132', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\125', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\103', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\115', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\150', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\142', '\102', '\040', '\153', '\141', '\040', '\061', - '\012', '\107', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\107', '\172', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\167', '\105', '\040', '\167', '\141', '\040', '\061', - '\012', '\124', '\164', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\172', '\116', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\153', '\117', '\040', '\153', '\141', '\040', '\061', - '\012', '\165', '\172', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\170', '\121', '\040', '\157', '\156', '\040', '\061', - '\012', '\126', '\147', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\155', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\122', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\124', '\156', '\162', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\152', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\164', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\156', '\114', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\104', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\170', '\146', '\121', '\040', '\146', '\157', '\040', '\061', - '\012', '\167', '\170', '\112', '\040', '\167', '\141', '\040', '\061', - '\012', '\156', '\170', '\105', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\121', '\156', '\040', '\151', '\156', '\040', '\061', - '\012', '\127', '\153', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\167', '\104', '\040', '\167', '\141', '\040', '\061', - '\012', '\160', '\106', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\142', '\113', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\110', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\126', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\117', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\143', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\127', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\115', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\167', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\156', '\115', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\164', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\121', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\126', '\170', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\125', '\170', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\127', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\122', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\161', '\113', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\152', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\160', '\130', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\153', '\107', '\040', '\144', '\145', '\040', '\061', - '\012', '\102', '\156', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\142', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\154', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\153', '\110', '\040', '\153', '\141', '\040', '\061', - '\012', '\144', '\113', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\113', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\161', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\102', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\120', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\110', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\131', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\107', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\111', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\125', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\124', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\161', '\111', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\146', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\122', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\122', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\152', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\146', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\163', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\106', '\143', '\155', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\112', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\164', '\130', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\122', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\161', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\107', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\130', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\102', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\124', '\144', '\040', '\154', '\145', '\040', '\061', - '\012', '\127', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\106', '\164', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\144', '\102', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\156', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\102', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\161', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\144', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\157', '\152', '\112', '\040', '\157', '\156', '\040', '\061', - '\012', '\161', '\132', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\172', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\154', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\132', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\155', '\166', '\114', '\040', '\166', '\141', '\040', '\061', - '\012', '\114', '\152', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\107', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\146', '\105', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\114', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\114', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\102', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\125', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\144', '\114', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\112', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\170', '\125', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\160', '\107', '\040', '\160', '\162', '\040', '\061', - '\012', '\164', '\154', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\150', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\104', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\122', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\166', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\152', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\162', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\171', '\131', '\040', '\156', '\171', '\040', '\061', - '\012', '\171', '\150', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\131', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\114', '\155', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\142', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\141', '\120', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\167', '\112', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\171', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\143', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\124', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\154', '\102', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\113', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\143', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\145', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\131', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\126', '\164', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\101', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\170', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\166', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\150', '\155', '\040', '\155', '\141', '\040', '\061', - '\012', '\132', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\172', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\166', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\124', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\144', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\167', '\107', '\040', '\166', '\141', '\040', '\061', - '\012', '\131', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\157', '\131', '\167', '\040', '\157', '\156', '\040', '\061', - '\012', '\152', '\130', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\126', '\170', '\040', '\166', '\151', '\040', '\061', - '\012', '\122', '\167', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\104', '\166', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\113', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\114', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\171', '\166', '\040', '\166', '\151', '\040', '\061', - '\012', '\103', '\161', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\122', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\121', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\132', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\142', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\154', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\165', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\160', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\126', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\126', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\112', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\131', '\172', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\103', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\167', '\123', '\040', '\160', '\162', '\040', '\061', - '\012', '\113', '\153', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\106', '\144', '\171', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\160', '\130', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\166', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\167', '\107', '\040', '\151', '\156', '\040', '\061', - '\012', '\162', '\102', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\102', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\131', '\163', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\143', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\105', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\142', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\163', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\166', '\103', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\153', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\151', '\167', '\040', '\151', '\156', '\040', '\061', - '\012', '\107', '\164', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\101', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\126', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\170', '\124', '\040', '\142', '\145', '\040', '\061', - '\012', '\121', '\150', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\154', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\142', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\146', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\170', '\127', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\145', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\113', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\151', '\124', '\146', '\040', '\151', '\156', '\040', '\061', - '\012', '\153', '\167', '\125', '\040', '\153', '\141', '\040', '\061', - '\012', '\151', '\106', '\161', '\040', '\151', '\156', '\040', '\061', - '\012', '\155', '\152', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\147', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\114', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\163', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\104', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\144', '\106', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\170', '\116', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\107', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\144', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\112', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\103', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\104', '\150', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\111', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\121', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\172', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\110', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\152', '\126', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\123', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\161', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\162', '\127', '\040', '\145', '\162', '\040', '\061', - '\012', '\110', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\127', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\115', '\153', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\147', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\103', '\156', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\104', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\172', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\117', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\165', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\146', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\115', '\150', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\131', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\153', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\131', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\164', '\161', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\160', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\107', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\167', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\113', '\147', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\111', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\112', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\121', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\124', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\132', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\104', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\146', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\146', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\144', '\112', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\124', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\125', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\110', '\156', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\142', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\171', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\124', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\170', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\144', '\123', '\040', '\144', '\145', '\040', '\061', - '\012', '\127', '\147', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\161', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\162', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\131', '\171', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\115', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\113', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\110', '\171', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\115', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\110', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\146', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\147', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\155', '\117', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\172', '\123', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\167', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\150', '\143', '\040', '\151', '\143', '\040', '\061', - '\012', '\170', '\111', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\146', '\110', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\106', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\121', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\110', '\147', '\162', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\147', '\123', '\040', '\156', '\147', '\040', '\061', - '\012', '\116', '\161', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\121', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\172', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\111', '\170', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\103', '\170', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\155', '\170', '\116', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\121', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\143', '\101', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\145', '\103', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\126', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\157', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\170', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\172', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\130', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\121', '\164', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\150', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\147', '\171', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\156', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\127', '\155', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\130', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\127', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\113', '\155', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\166', '\110', '\040', '\166', '\141', '\040', '\061', - '\012', '\125', '\145', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\112', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\153', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\155', '\120', '\040', '\155', '\145', '\040', '\061', - '\012', '\163', '\154', '\122', '\040', '\151', '\163', '\040', '\061', - '\012', '\125', '\141', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\142', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\116', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\126', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\107', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\151', '\167', '\125', '\040', '\151', '\156', '\040', '\061', - '\012', '\103', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\130', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\127', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\107', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\142', '\131', '\040', '\142', '\145', '\040', '\061', - '\012', '\150', '\172', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\127', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\115', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\152', '\172', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\114', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\110', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\164', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\145', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\170', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\121', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\166', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\124', '\155', '\040', '\157', '\156', '\040', '\061', - '\012', '\160', '\152', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\125', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\152', '\167', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\112', '\147', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\146', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\145', '\117', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\102', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\102', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\123', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\171', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\152', '\123', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\164', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\155', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\131', '\164', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\167', '\112', '\040', '\167', '\141', '\040', '\061', - '\012', '\157', '\127', '\146', '\040', '\157', '\156', '\040', '\061', - '\012', '\153', '\170', '\112', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\110', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\143', '\120', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\102', '\163', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\153', '\113', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\144', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\152', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\126', '\147', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\166', '\107', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\107', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\127', '\152', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\155', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\107', '\154', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\155', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\154', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\120', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\121', '\167', '\040', '\167', '\151', '\040', '\061', - '\012', '\170', '\141', '\117', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\146', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\107', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\166', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\167', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\166', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\155', '\132', '\040', '\166', '\141', '\040', '\061', - '\012', '\156', '\112', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\110', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\107', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\121', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\102', '\160', '\040', '\155', '\145', '\040', '\061', - '\012', '\164', '\160', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\153', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\125', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\144', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\146', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\114', '\166', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\151', '\130', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\117', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\150', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\115', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\106', '\163', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\101', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\167', '\112', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\120', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\104', '\146', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\132', '\142', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\102', '\147', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\121', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\153', '\121', '\160', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\157', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\131', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\163', '\104', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\165', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\122', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\121', '\163', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\124', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\121', '\170', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\110', '\166', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\132', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\104', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\146', '\170', '\101', '\040', '\146', '\157', '\040', '\061', - '\012', '\170', '\120', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\167', '\130', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\112', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\144', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\160', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\170', '\107', '\040', '\153', '\141', '\040', '\061', - '\012', '\144', '\114', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\102', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\126', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\163', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\123', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\125', '\153', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\120', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\106', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\152', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\127', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\113', '\150', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\107', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\156', '\104', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\131', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\147', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\152', '\103', '\040', '\145', '\162', '\040', '\061', - '\012', '\130', '\152', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\172', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\121', '\147', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\147', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\150', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\153', '\117', '\040', '\153', '\141', '\040', '\061', - '\012', '\165', '\167', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\120', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\130', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\101', '\157', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\166', '\107', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\117', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\130', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\155', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\107', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\152', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\153', '\111', '\040', '\153', '\165', '\040', '\061', - '\012', '\160', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\156', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\150', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\144', '\122', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\104', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\111', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\103', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\122', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\111', '\165', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\161', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\105', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\146', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\165', '\112', '\040', '\165', '\156', '\040', '\061', - '\012', '\156', '\122', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\170', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\146', '\132', '\040', '\156', '\171', '\040', '\061', - '\012', '\157', '\161', '\124', '\040', '\150', '\157', '\040', '\061', - '\012', '\143', '\147', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\142', '\114', '\040', '\160', '\162', '\040', '\061', - '\012', '\130', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\126', '\152', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\154', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\146', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\147', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\167', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\120', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\103', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\172', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\147', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\154', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\124', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\122', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\122', '\147', '\152', '\040', '\152', '\157', '\040', '\061', - '\012', '\107', '\153', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\170', '\107', '\040', '\146', '\157', '\040', '\061', - '\012', '\155', '\164', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\147', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\144', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\110', '\153', '\040', '\151', '\156', '\040', '\061', - '\012', '\107', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\104', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\172', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\106', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\124', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\164', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\142', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\166', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\103', '\164', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\144', '\107', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\113', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\103', '\154', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\162', '\125', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\155', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\130', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\172', '\117', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\130', '\156', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\172', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\121', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\160', '\124', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\114', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\130', '\147', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\132', '\154', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\106', '\171', '\040', '\154', '\145', '\040', '\061', - '\012', '\132', '\156', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\141', '\130', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\142', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\143', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\154', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\157', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\120', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\132', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\144', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\102', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\160', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\144', '\120', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\165', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\150', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\167', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\153', '\117', '\040', '\153', '\157', '\040', '\061', - '\012', '\147', '\163', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\107', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\153', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\160', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\167', '\113', '\040', '\145', '\162', '\040', '\061', - '\012', '\114', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\165', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\143', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\127', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\156', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\127', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\170', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\156', '\105', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\124', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\144', '\112', '\040', '\144', '\145', '\040', '\061', - '\012', '\145', '\126', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\152', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\161', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\144', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\166', '\105', '\040', '\163', '\164', '\040', '\061', - '\012', '\127', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\143', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\110', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\167', '\101', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\150', '\146', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\115', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\167', '\117', '\040', '\160', '\162', '\040', '\061', - '\012', '\131', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\142', '\110', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\152', '\132', '\040', '\157', '\156', '\040', '\061', - '\012', '\163', '\165', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\143', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\115', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\170', '\107', '\040', '\160', '\162', '\040', '\061', - '\012', '\162', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\154', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\171', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\124', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\123', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\126', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\105', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\156', '\104', '\040', '\141', '\156', '\040', '\061', - '\012', '\117', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\124', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\167', '\152', '\114', '\040', '\151', '\152', '\040', '\061', - '\012', '\122', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\156', '\127', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\110', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\102', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\116', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\167', '\123', '\040', '\167', '\141', '\040', '\061', - '\012', '\103', '\142', '\171', '\040', '\142', '\145', '\040', '\061', - '\012', '\172', '\122', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\167', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\156', '\102', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\111', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\107', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\152', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\147', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\104', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\171', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\143', '\110', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\170', '\102', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\166', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\162', '\122', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\156', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\154', '\162', '\040', '\154', '\145', '\040', '\061', - '\012', '\104', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\156', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\103', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\152', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\105', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\114', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\160', '\132', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\166', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\154', '\107', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\166', '\116', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\142', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\116', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\172', '\114', '\040', '\163', '\172', '\040', '\061', - '\012', '\127', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\131', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\144', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\146', '\107', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\146', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\102', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\164', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\162', '\103', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\165', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\171', '\112', '\040', '\156', '\171', '\040', '\061', - '\012', '\161', '\155', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\130', '\171', '\040', '\142', '\145', '\040', '\061', - '\012', '\131', '\155', '\171', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\170', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\116', '\154', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\172', '\125', '\040', '\146', '\157', '\040', '\061', - '\012', '\122', '\166', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\154', '\111', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\115', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\121', '\150', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\167', '\114', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\131', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\121', '\170', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\116', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\116', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\155', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\120', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\125', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\110', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\123', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\153', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\132', '\164', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\102', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\112', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\144', '\111', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\124', '\144', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\152', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\152', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\130', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\167', '\102', '\040', '\157', '\167', '\040', '\061', - '\012', '\153', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\146', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\104', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\132', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\161', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\132', '\162', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\155', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\114', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\113', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\161', '\104', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\113', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\146', '\122', '\040', '\142', '\145', '\040', '\061', - '\012', '\122', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\150', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\116', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\124', '\143', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\110', '\142', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\114', '\167', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\144', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\160', '\122', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\127', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\101', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\162', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\155', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\114', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\167', '\102', '\040', '\151', '\156', '\040', '\061', - '\012', '\145', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\156', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\157', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\126', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\142', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\160', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\144', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\172', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\127', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\132', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\112', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\127', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\131', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\152', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\167', '\102', '\040', '\144', '\145', '\040', '\061', - '\012', '\126', '\154', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\113', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\114', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\110', '\160', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\155', '\166', '\122', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\115', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\127', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\144', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\105', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\162', '\163', '\040', '\145', '\162', '\040', '\061', - '\012', '\106', '\164', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\171', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\123', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\172', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\153', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\167', '\131', '\040', '\167', '\141', '\040', '\061', - '\012', '\157', '\107', '\142', '\040', '\157', '\156', '\040', '\061', - '\012', '\152', '\102', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\127', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\155', '\121', '\040', '\163', '\164', '\040', '\061', - '\012', '\165', '\107', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\153', '\126', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\112', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\152', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\116', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\152', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\104', '\144', '\040', '\167', '\141', '\040', '\061', - '\012', '\154', '\162', '\102', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\113', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\116', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\161', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\112', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\162', '\116', '\040', '\145', '\162', '\040', '\061', - '\012', '\165', '\102', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\165', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\104', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\147', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\144', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\152', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\162', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\113', '\166', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\171', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\172', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\117', '\152', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\146', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\126', '\161', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\141', '\121', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\110', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\111', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\113', '\160', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\121', '\153', '\040', '\153', '\157', '\040', '\061', - '\012', '\107', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\132', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\166', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\167', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\115', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\152', '\111', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\120', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\102', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\114', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\102', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\143', '\165', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\121', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\160', '\130', '\040', '\160', '\162', '\040', '\061', - '\012', '\155', '\121', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\155', '\122', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\146', '\110', '\040', '\146', '\157', '\040', '\061', - '\012', '\160', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\164', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\127', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\170', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\155', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\144', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\102', '\170', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\132', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\116', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\142', '\116', '\040', '\142', '\145', '\040', '\061', - '\012', '\142', '\153', '\132', '\040', '\153', '\141', '\040', '\061', - '\012', '\156', '\126', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\112', '\152', '\040', '\157', '\156', '\040', '\061', - '\012', '\160', '\102', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\147', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\170', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\166', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\130', '\143', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\106', '\144', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\101', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\167', '\121', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\155', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\166', '\132', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\116', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\113', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\122', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\154', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\151', '\121', '\152', '\040', '\151', '\156', '\040', '\061', - '\012', '\152', '\155', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\142', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\166', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\126', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\114', '\170', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\147', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\146', '\105', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\126', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\113', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\143', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\124', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\113', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\127', '\153', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\166', '\132', '\040', '\154', '\145', '\040', '\061', - '\012', '\162', '\107', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\113', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\103', '\142', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\152', '\121', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\132', '\146', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\166', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\147', '\116', '\040', '\156', '\147', '\040', '\061', - '\012', '\113', '\160', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\172', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\170', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\147', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\115', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\152', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\146', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\152', '\116', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\116', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\150', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\125', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\150', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\123', '\152', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\127', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\150', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\107', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\164', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\167', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\162', '\113', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\161', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\143', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\121', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\121', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\130', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\165', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\156', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\104', '\154', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\115', '\170', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\171', '\116', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\155', '\126', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\130', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\155', '\167', '\127', '\040', '\155', '\145', '\040', '\061', - '\012', '\154', '\111', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\106', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\107', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\131', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\126', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\142', '\124', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\152', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\156', '\115', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\155', '\117', '\040', '\155', '\145', '\040', '\061', - '\012', '\147', '\121', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\113', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\125', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\123', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\126', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\143', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\152', '\105', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\131', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\162', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\172', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\146', '\103', '\040', '\163', '\172', '\040', '\061', - '\012', '\131', '\142', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\147', '\123', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\143', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\116', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\130', '\153', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\124', '\160', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\102', '\167', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\167', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\121', '\154', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\104', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\131', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\124', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\127', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\152', '\124', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\152', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\104', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\150', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\127', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\103', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\171', '\102', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\127', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\156', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\166', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\172', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\102', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\111', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\122', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\162', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\132', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\122', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\116', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\121', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\116', '\152', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\106', '\171', '\040', '\142', '\145', '\040', '\061', - '\012', '\154', '\150', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\127', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\142', '\115', '\040', '\151', '\152', '\040', '\061', - '\012', '\130', '\163', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\163', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\154', '\106', '\040', '\154', '\145', '\040', '\061', - '\012', '\120', '\150', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\127', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\142', '\103', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\147', '\146', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\126', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\143', '\120', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\104', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\124', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\147', '\163', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\165', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\146', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\102', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\124', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\123', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\142', '\104', '\040', '\155', '\145', '\040', '\061', - '\012', '\126', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\150', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\146', '\120', '\040', '\153', '\141', '\040', '\061', - '\012', '\120', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\150', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\132', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\122', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\103', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\152', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\122', '\161', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\154', '\122', '\040', '\154', '\145', '\040', '\061', - '\012', '\130', '\155', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\112', '\152', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\161', '\111', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\126', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\110', '\147', '\165', '\040', '\156', '\147', '\040', '\061', - '\012', '\151', '\110', '\167', '\040', '\151', '\156', '\040', '\061', - '\012', '\145', '\121', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\172', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\152', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\116', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\154', '\105', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\107', '\160', '\040', '\153', '\141', '\040', '\061', - '\012', '\111', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\102', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\132', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\104', '\153', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\154', '\110', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\170', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\121', '\162', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\117', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\112', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\172', '\142', '\114', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\153', '\104', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\143', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\130', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\111', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\116', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\112', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\155', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\143', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\167', '\132', '\040', '\153', '\141', '\040', '\061', - '\012', '\165', '\132', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\156', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\162', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\130', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\143', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\146', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\167', '\115', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\111', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\165', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\104', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\110', '\152', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\121', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\166', '\112', '\040', '\167', '\141', '\040', '\061', - '\012', '\164', '\110', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\144', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\170', '\111', '\040', '\167', '\141', '\040', '\061', - '\012', '\160', '\117', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\127', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\150', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\160', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\155', '\103', '\040', '\155', '\145', '\040', '\061', - '\012', '\167', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\152', '\110', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\127', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\107', '\144', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\114', '\144', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\123', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\132', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\113', '\167', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\150', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\122', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\150', '\167', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\160', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\155', '\126', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\107', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\161', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\150', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\110', '\172', '\040', '\163', '\164', '\040', '\061', - '\012', '\127', '\142', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\142', '\146', '\113', '\040', '\142', '\145', '\040', '\061', - '\012', '\112', '\147', '\154', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\124', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\113', '\142', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\153', '\172', '\103', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\167', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\132', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\164', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\130', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\172', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\127', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\162', '\110', '\040', '\145', '\162', '\040', '\061', - '\012', '\157', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\154', '\124', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\146', '\111', '\040', '\144', '\145', '\040', '\061', - '\012', '\121', '\155', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\163', '\147', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\163', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\122', '\172', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\114', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\163', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\103', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\106', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\151', '\147', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\122', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\107', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\123', '\172', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\131', '\166', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\130', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\107', '\156', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\104', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\161', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\110', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\144', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\117', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\132', '\156', '\154', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\165', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\114', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\146', '\121', '\040', '\157', '\156', '\040', '\061', - '\012', '\166', '\131', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\171', '\110', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\161', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\112', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\142', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\154', '\124', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\154', '\127', '\040', '\154', '\145', '\040', '\061', - '\012', '\130', '\170', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\103', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\113', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\167', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\110', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\142', '\116', '\040', '\144', '\145', '\040', '\061', - '\012', '\165', '\125', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\147', '\116', '\040', '\156', '\147', '\040', '\061', - '\012', '\120', '\170', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\116', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\171', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\160', '\110', '\040', '\151', '\152', '\040', '\061', - '\012', '\126', '\164', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\152', '\112', '\040', '\163', '\164', '\040', '\061', - '\012', '\121', '\154', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\167', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\126', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\163', '\121', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\156', '\124', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\160', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\172', '\111', '\040', '\163', '\172', '\040', '\061', - '\012', '\132', '\150', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\104', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\125', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\120', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\123', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\102', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\120', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\126', '\153', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\151', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\153', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\117', '\165', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\157', '\110', '\040', '\157', '\156', '\040', '\061', - '\012', '\161', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\170', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\172', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\167', '\110', '\040', '\163', '\164', '\040', '\061', - '\012', '\156', '\102', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\150', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\122', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\156', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\157', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\170', '\120', '\040', '\155', '\145', '\040', '\061', - '\012', '\142', '\167', '\122', '\040', '\167', '\141', '\040', '\061', - '\012', '\147', '\112', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\156', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\115', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\170', '\117', '\040', '\144', '\145', '\040', '\061', - '\012', '\162', '\172', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\160', '\120', '\040', '\166', '\141', '\040', '\061', - '\012', '\116', '\166', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\116', '\146', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\103', '\156', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\157', '\124', '\144', '\040', '\157', '\156', '\040', '\061', - '\012', '\144', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\163', '\130', '\040', '\163', '\164', '\040', '\061', - '\012', '\163', '\167', '\115', '\040', '\163', '\164', '\040', '\061', - '\012', '\144', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\156', '\130', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\153', '\131', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\146', '\103', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\123', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\104', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\166', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\170', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\113', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\143', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\127', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\162', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\155', '\107', '\040', '\155', '\145', '\040', '\061', - '\012', '\163', '\162', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\116', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\143', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\156', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\112', '\150', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\111', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\123', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\145', '\125', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\111', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\132', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\107', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\121', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\130', '\143', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\154', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\155', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\154', '\114', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\167', '\103', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\152', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\102', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\150', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\102', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\165', '\130', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\162', '\107', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\130', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\165', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\143', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\105', '\157', '\152', '\040', '\157', '\156', '\040', '\061', - '\012', '\151', '\126', '\164', '\040', '\151', '\156', '\040', '\061', - '\012', '\171', '\150', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\126', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\115', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\132', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\126', '\166', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\151', '\103', '\166', '\040', '\151', '\156', '\040', '\061', - '\012', '\166', '\121', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\154', '\102', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\125', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\164', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\103', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\166', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\126', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\162', '\120', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\146', '\110', '\040', '\167', '\141', '\040', '\061', - '\012', '\150', '\142', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\152', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\157', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\123', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\122', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\110', '\143', '\165', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\170', '\112', '\040', '\156', '\171', '\040', '\061', - '\012', '\154', '\124', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\131', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\127', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\162', '\105', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\107', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\112', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\172', '\111', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\147', '\126', '\040', '\147', '\151', '\040', '\061', - '\012', '\122', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\126', '\156', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\106', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\147', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\141', '\121', '\143', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\172', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\116', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\146', '\101', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\143', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\153', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\102', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\147', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\123', '\170', '\040', '\151', '\156', '\040', '\061', - '\012', '\170', '\103', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\152', '\130', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\111', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\147', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\124', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\152', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\162', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\155', '\132', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\102', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\166', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\143', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\162', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\112', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\130', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\141', '\104', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\145', '\107', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\152', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\124', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\115', '\144', '\040', '\157', '\156', '\040', '\061', - '\012', '\146', '\113', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\116', '\160', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\142', '\106', '\040', '\154', '\145', '\040', '\061', - '\012', '\110', '\166', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\132', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\121', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\153', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\132', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\132', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\171', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\155', '\112', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\146', '\113', '\040', '\153', '\141', '\040', '\061', - '\012', '\151', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\167', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\166', '\123', '\040', '\166', '\141', '\040', '\061', - '\012', '\151', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\115', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\160', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\113', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\154', '\111', '\040', '\154', '\145', '\040', '\061', - '\012', '\116', '\155', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\172', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\132', '\163', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\122', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\125', '\146', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\160', '\106', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\167', '\131', '\040', '\167', '\141', '\040', '\061', - '\012', '\107', '\170', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\114', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\172', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\122', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\162', '\122', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\153', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\125', '\171', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\152', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\113', '\144', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\160', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\157', '\125', '\152', '\040', '\157', '\156', '\040', '\061', - '\012', '\161', '\155', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\152', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\122', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\150', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\150', '\162', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\164', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\152', '\161', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\167', '\125', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\171', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\170', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\120', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\122', '\144', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\106', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\167', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\105', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\116', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\120', '\172', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\146', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\102', '\156', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\115', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\113', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\162', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\157', '\110', '\167', '\040', '\157', '\156', '\040', '\061', - '\012', '\154', '\106', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\160', '\127', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\116', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\102', '\150', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\150', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\104', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\160', '\131', '\040', '\160', '\162', '\040', '\061', - '\012', '\164', '\156', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\146', '\114', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\172', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\116', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\102', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\130', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\120', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\132', '\143', '\154', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\115', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\112', '\152', '\040', '\162', '\151', '\040', '\061', - '\012', '\141', '\130', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\163', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\121', '\155', '\040', '\143', '\150', '\040', '\061', - '\012', '\123', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\113', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\166', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\107', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\142', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\103', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\107', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\104', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\122', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\166', '\130', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\151', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\106', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\114', '\150', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\105', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\112', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\172', '\117', '\040', '\154', '\145', '\040', '\061', - '\012', '\106', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\104', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\156', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\170', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\107', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\166', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\160', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\170', '\104', '\040', '\160', '\162', '\040', '\061', - '\012', '\132', '\146', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\157', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\166', '\126', '\040', '\166', '\141', '\040', '\061', - '\012', '\107', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\131', '\143', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\115', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\121', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\165', '\107', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\116', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\131', '\143', '\155', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\111', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\114', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\155', '\122', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\156', '\162', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\171', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\143', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\155', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\120', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\105', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\116', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\163', '\116', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\144', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\123', '\156', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\162', '\120', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\112', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\126', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\166', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\150', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\144', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\155', '\124', '\040', '\155', '\145', '\040', '\061', - '\012', '\114', '\142', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\160', '\112', '\040', '\160', '\162', '\040', '\061', - '\012', '\155', '\131', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\167', '\126', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\152', '\104', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\125', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\144', '\150', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\132', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\164', '\167', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\142', '\115', '\040', '\142', '\145', '\040', '\061', - '\012', '\150', '\147', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\113', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\112', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\105', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\130', '\154', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\160', '\126', '\040', '\160', '\162', '\040', '\061', - '\012', '\164', '\161', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\125', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\124', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\115', '\147', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\121', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\171', '\152', '\124', '\040', '\151', '\152', '\040', '\061', - '\012', '\141', '\126', '\144', '\040', '\141', '\156', '\040', '\061', - '\012', '\145', '\110', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\107', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\162', '\107', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\126', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\154', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\162', '\124', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\122', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\114', '\162', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\162', '\110', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\124', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\166', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\170', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\145', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\116', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\122', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\165', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\113', '\172', '\142', '\040', '\142', '\151', '\040', '\061', - '\012', '\127', '\170', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\152', '\115', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\144', '\117', '\040', '\144', '\145', '\040', '\061', - '\012', '\112', '\146', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\142', '\126', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\121', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\156', '\143', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\126', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\123', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\125', '\142', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\166', '\103', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\150', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\161', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\116', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\104', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\104', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\151', '\171', '\111', '\040', '\151', '\156', '\040', '\061', - '\012', '\145', '\130', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\161', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\170', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\163', '\131', '\040', '\163', '\164', '\040', '\061', - '\012', '\124', '\167', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\155', '\103', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\106', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\156', '\103', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\127', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\172', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\146', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\121', '\145', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\165', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\154', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\107', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\117', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\160', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\147', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\170', '\115', '\040', '\146', '\157', '\040', '\061', - '\012', '\152', '\123', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\152', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\147', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\117', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\110', '\142', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\114', '\152', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\151', '\166', '\132', '\040', '\151', '\156', '\040', '\061', - '\012', '\142', '\155', '\131', '\040', '\155', '\145', '\040', '\061', - '\012', '\121', '\146', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\146', '\121', '\040', '\167', '\141', '\040', '\061', - '\012', '\150', '\103', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\165', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\144', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\126', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\132', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\154', '\117', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\111', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\132', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\170', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\155', '\171', '\040', '\155', '\145', '\040', '\061', - '\012', '\112', '\161', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\170', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\132', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\144', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\107', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\127', '\167', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\102', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\171', '\161', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\131', '\154', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\156', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\171', '\112', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\107', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\116', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\106', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\170', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\166', '\112', '\040', '\153', '\141', '\040', '\061', - '\012', '\106', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\167', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\166', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\122', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\161', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\172', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\116', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\160', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\143', '\120', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\120', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\143', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\171', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\155', '\131', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\154', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\105', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\150', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\144', '\115', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\114', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\101', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\167', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\114', '\172', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\117', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\113', '\144', '\163', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\166', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\120', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\156', '\132', '\162', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\103', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\146', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\146', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\165', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\146', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\154', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\152', '\104', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\164', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\155', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\127', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\121', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\126', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\155', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\144', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\130', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\155', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\146', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\121', '\162', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\150', '\162', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\166', '\123', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\104', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\144', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\166', '\105', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\166', '\123', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\122', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\166', '\104', '\040', '\145', '\162', '\040', '\061', - '\012', '\130', '\171', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\112', '\146', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\141', '\102', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\127', '\143', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\147', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\107', '\142', '\040', '\142', '\151', '\040', '\061', - '\012', '\147', '\152', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\154', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\162', '\124', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\121', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\152', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\166', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\113', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\124', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\124', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\156', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\127', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\156', '\127', '\144', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\113', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\115', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\153', '\107', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\167', '\130', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\167', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\167', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\114', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\115', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\160', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\164', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\166', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\143', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\103', '\171', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\156', '\152', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\141', '\126', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\125', '\161', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\126', '\172', '\040', '\144', '\145', '\040', '\061', - '\012', '\122', '\143', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\145', '\113', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\172', '\156', '\040', '\151', '\156', '\040', '\061', - '\012', '\166', '\171', '\106', '\040', '\166', '\141', '\040', '\061', - '\012', '\113', '\154', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\144', '\111', '\040', '\144', '\145', '\040', '\061', - '\012', '\110', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\105', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\160', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\104', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\150', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\165', '\113', '\040', '\165', '\156', '\040', '\061', - '\012', '\166', '\147', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\127', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\120', '\156', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\114', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\102', '\150', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\120', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\160', '\111', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\114', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\160', '\123', '\040', '\166', '\141', '\040', '\061', - '\012', '\106', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\104', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\172', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\167', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\102', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\107', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\114', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\152', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\146', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\154', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\122', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\107', '\163', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\125', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\147', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\146', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\121', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\166', '\107', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\107', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\143', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\105', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\102', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\107', '\160', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\102', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\146', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\112', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\163', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\162', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\104', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\172', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\115', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\146', '\115', '\040', '\156', '\171', '\040', '\061', - '\012', '\107', '\170', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\167', '\172', '\120', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\116', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\113', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\162', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\110', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\146', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\155', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\112', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\124', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\166', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\120', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\167', '\122', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\115', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\167', '\111', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\170', '\131', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\132', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\106', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\115', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\110', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\126', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\154', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\163', '\114', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\122', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\163', '\130', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\102', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\172', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\144', '\122', '\040', '\144', '\145', '\040', '\061', - '\012', '\132', '\154', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\127', '\146', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\122', '\152', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\106', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\153', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\113', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\170', '\103', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\156', '\161', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\153', '\144', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\153', '\111', '\040', '\153', '\141', '\040', '\061', - '\012', '\157', '\150', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\144', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\170', '\114', '\040', '\163', '\164', '\040', '\061', - '\012', '\121', '\162', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\130', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\152', '\113', '\040', '\151', '\156', '\040', '\061', - '\012', '\163', '\106', '\172', '\040', '\163', '\164', '\040', '\061', - '\012', '\110', '\154', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\107', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\120', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\172', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\102', '\144', '\172', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\121', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\164', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\125', '\171', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\127', '\143', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\156', '\163', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\112', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\127', '\146', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\150', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\127', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\153', '\102', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\164', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\172', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\115', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\170', '\116', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\121', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\153', '\103', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\105', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\130', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\155', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\160', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\107', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\124', '\152', '\170', '\040', '\172', '\152', '\040', '\061', - '\012', '\164', '\166', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\131', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\104', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\171', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\126', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\116', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\143', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\156', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\166', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\120', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\167', '\104', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\160', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\104', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\166', '\105', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\132', '\143', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\162', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\150', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\152', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\104', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\162', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\141', '\127', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\166', '\112', '\040', '\166', '\141', '\040', '\061', - '\012', '\131', '\164', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\151', '\121', '\040', '\151', '\156', '\040', '\061', - '\012', '\164', '\106', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\112', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\125', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\157', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\104', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\146', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\123', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\152', '\155', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\106', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\130', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\120', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\151', '\161', '\121', '\040', '\164', '\151', '\040', '\061', - '\012', '\155', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\170', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\102', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\166', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\144', '\115', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\147', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\154', '\101', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\106', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\106', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\143', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\146', '\132', '\040', '\146', '\157', '\040', '\061', - '\012', '\152', '\104', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\116', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\113', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\164', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\110', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\103', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\143', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\104', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\131', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\130', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\171', '\115', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\107', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\131', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\103', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\132', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\157', '\121', '\144', '\040', '\157', '\156', '\040', '\061', - '\012', '\106', '\172', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\167', '\106', '\040', '\154', '\145', '\040', '\061', - '\012', '\130', '\172', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\116', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\157', '\111', '\040', '\157', '\156', '\040', '\061', - '\012', '\163', '\112', '\155', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\113', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\121', '\164', '\150', '\040', '\143', '\150', '\040', '\061', - '\012', '\114', '\154', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\126', '\146', '\040', '\147', '\151', '\040', '\061', - '\012', '\160', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\107', '\171', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\172', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\116', '\160', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\166', '\122', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\130', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\115', '\154', '\040', '\154', '\151', '\040', '\061', - '\012', '\142', '\131', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\146', '\172', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\162', '\107', '\040', '\145', '\162', '\040', '\061', - '\012', '\113', '\144', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\113', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\132', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\146', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\162', '\154', '\127', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\120', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\117', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\107', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\164', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\154', '\171', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\110', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\121', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\114', '\144', '\143', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\125', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\143', '\112', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\114', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\115', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\152', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\141', '\167', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\107', '\164', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\172', '\116', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\101', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\172', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\102', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\163', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\125', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\163', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\163', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\172', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\116', '\170', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\110', '\161', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\130', '\154', '\040', '\145', '\162', '\040', '\061', - '\012', '\156', '\154', '\120', '\040', '\141', '\156', '\040', '\061', - '\012', '\141', '\126', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\150', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\146', '\101', '\040', '\153', '\141', '\040', '\061', - '\012', '\126', '\155', '\153', '\040', '\155', '\107', '\040', '\061', - '\012', '\152', '\113', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\120', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\120', '\144', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\131', '\171', '\040', '\142', '\145', '\040', '\061', - '\012', '\142', '\156', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\107', '\163', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\170', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\153', '\106', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\172', '\123', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\127', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\121', '\143', '\165', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\132', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\142', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\141', '\121', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\172', '\117', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\162', '\116', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\153', '\114', '\040', '\153', '\141', '\040', '\061', - '\012', '\104', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\153', '\103', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\114', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\116', '\166', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\116', '\142', '\171', '\040', '\142', '\145', '\040', '\061', - '\012', '\145', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\103', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\151', '\132', '\160', '\040', '\151', '\156', '\040', '\061', - '\012', '\144', '\166', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\111', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\103', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\172', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\166', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\154', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\107', '\142', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\112', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\106', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\115', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\153', '\112', '\040', '\153', '\141', '\040', '\061', - '\012', '\123', '\170', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\146', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\110', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\152', '\170', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\112', '\154', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\161', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\107', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\172', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\113', '\146', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\127', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\130', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\156', '\104', '\040', '\141', '\156', '\040', '\061', - '\012', '\112', '\162', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\157', '\170', '\132', '\040', '\157', '\156', '\040', '\061', - '\012', '\150', '\130', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\101', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\151', '\107', '\153', '\040', '\151', '\156', '\040', '\061', - '\012', '\170', '\105', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\164', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\150', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\121', '\172', '\040', '\157', '\156', '\040', '\061', - '\012', '\160', '\147', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\112', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\153', '\143', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\156', '\115', '\040', '\141', '\156', '\040', '\061', - '\012', '\103', '\167', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\127', '\147', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\160', '\124', '\040', '\160', '\162', '\040', '\061', - '\012', '\112', '\144', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\116', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\167', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\145', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\113', '\144', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\127', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\106', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\126', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\154', '\111', '\040', '\154', '\145', '\040', '\061', - '\012', '\102', '\172', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\146', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\131', '\166', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\106', '\164', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\115', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\172', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\117', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\110', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\127', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\104', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\155', '\104', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\152', '\114', '\040', '\151', '\152', '\040', '\061', - '\012', '\151', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\163', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\170', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\152', '\163', '\107', '\040', '\163', '\164', '\040', '\061', - '\012', '\143', '\130', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\142', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\145', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\157', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\130', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\166', '\114', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\143', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\106', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\130', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\101', '\157', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\172', '\153', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\120', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\106', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\142', '\130', '\040', '\142', '\145', '\040', '\061', - '\012', '\157', '\103', '\146', '\040', '\157', '\156', '\040', '\061', - '\012', '\131', '\152', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\120', '\160', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\116', '\152', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\143', '\132', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\156', '\107', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\167', '\112', '\040', '\143', '\155', '\040', '\061', - '\012', '\161', '\112', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\116', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\124', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\167', '\113', '\040', '\166', '\141', '\040', '\061', - '\012', '\132', '\143', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\145', '\102', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\114', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\162', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\111', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\102', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\172', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\152', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\170', '\125', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\132', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\144', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\146', '\115', '\040', '\142', '\145', '\040', '\061', - '\012', '\155', '\121', '\155', '\040', '\121', '\117', '\040', '\061', - '\012', '\172', '\154', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\142', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\113', '\166', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\125', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\160', '\152', '\123', '\040', '\151', '\152', '\040', '\061', - '\012', '\130', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\152', '\111', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\131', '\151', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\112', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\141', '\116', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\112', '\146', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\116', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\144', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\102', '\172', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\132', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\143', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\107', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\103', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\167', '\120', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\142', '\126', '\040', '\167', '\141', '\040', '\061', - '\012', '\105', '\161', '\164', '\040', '\145', '\161', '\040', '\061', - '\012', '\130', '\150', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\125', '\146', '\040', '\157', '\156', '\040', '\061', - '\012', '\144', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\170', '\116', '\040', '\163', '\164', '\040', '\061', - '\012', '\117', '\146', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\103', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\150', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\147', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\156', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\152', '\124', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\163', '\132', '\040', '\163', '\164', '\040', '\061', - '\012', '\154', '\107', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\115', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\165', '\153', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\150', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\122', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\122', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\167', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\112', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\126', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\131', '\154', '\040', '\151', '\156', '\040', '\061', - '\012', '\170', '\114', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\130', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\143', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\154', '\115', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\104', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\116', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\144', '\113', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\120', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\131', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\156', '\146', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\146', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\113', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\111', '\171', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\165', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\113', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\132', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\160', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\146', '\114', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\152', '\124', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\130', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\113', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\167', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\152', '\102', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\131', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\131', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\161', '\131', '\040', '\145', '\161', '\040', '\061', - '\012', '\165', '\111', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\124', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\107', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\167', '\106', '\040', '\163', '\164', '\040', '\061', - '\012', '\110', '\146', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\164', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\146', '\127', '\040', '\150', '\127', '\040', '\061', - '\012', '\151', '\171', '\107', '\040', '\151', '\156', '\040', '\061', - '\012', '\172', '\120', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\172', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\126', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\120', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\113', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\145', '\106', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\121', '\152', '\151', '\040', '\152', '\123', '\040', '\061', - '\012', '\155', '\164', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\147', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\110', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\124', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\170', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\113', '\164', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\127', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\123', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\106', '\172', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\150', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\160', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\166', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\131', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\170', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\123', '\161', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\143', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\115', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\132', '\147', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\107', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\125', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\157', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\107', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\131', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\122', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\162', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\124', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\132', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\147', '\117', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\152', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\120', '\160', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\167', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\143', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\106', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\167', '\131', '\040', '\167', '\141', '\040', '\061', - '\012', '\153', '\124', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\107', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\145', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\107', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\160', '\126', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\124', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\117', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\160', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\131', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\164', '\152', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\172', '\156', '\040', '\114', '\107', '\040', '\061', - '\012', '\131', '\152', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\131', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\144', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\130', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\111', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\112', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\146', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\170', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\104', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\163', '\117', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\162', '\107', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\152', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\147', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\125', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\144', '\102', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\154', '\125', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\102', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\142', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\154', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\147', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\162', '\125', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\147', '\111', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\152', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\166', '\125', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\103', '\160', '\040', '\107', '\103', '\040', '\061', - '\012', '\156', '\126', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\142', '\107', '\040', '\142', '\145', '\040', '\061', - '\012', '\164', '\144', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\152', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\121', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\172', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\125', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\152', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\112', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\132', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\132', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\122', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\167', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\160', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\167', '\123', '\040', '\163', '\164', '\040', '\061', - '\012', '\105', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\105', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\153', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\147', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\167', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\163', '\152', '\127', '\040', '\163', '\164', '\040', '\061', - '\012', '\144', '\130', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\166', '\131', '\040', '\166', '\113', '\040', '\061', - '\012', '\154', '\162', '\117', '\040', '\145', '\162', '\040', '\061', - '\012', '\114', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\170', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\126', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\171', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\113', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\167', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\160', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\110', '\147', '\145', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\142', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\104', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\160', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\132', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\132', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\155', '\125', '\040', '\155', '\145', '\040', '\061', - '\012', '\164', '\125', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\127', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\162', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\121', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\162', '\132', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\152', '\111', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\121', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\166', '\107', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\167', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\116', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\160', '\120', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\113', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\126', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\115', '\150', '\040', '\143', '\150', '\040', '\061', - '\012', '\113', '\164', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\160', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\104', '\146', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\113', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\114', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\152', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\143', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\107', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\170', '\126', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\143', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\106', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\120', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\155', '\105', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\155', '\124', '\040', '\155', '\145', '\040', '\061', - '\012', '\154', '\170', '\103', '\040', '\107', '\103', '\040', '\061', - '\012', '\154', '\122', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\121', '\153', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\151', '\150', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\154', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\150', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\120', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\166', '\121', '\040', '\121', '\117', '\040', '\061', - '\012', '\152', '\107', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\115', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\165', '\117', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\144', '\124', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\166', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\153', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\142', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\146', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\115', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\160', '\106', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\147', '\120', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\170', '\106', '\040', '\155', '\145', '\040', '\061', - '\012', '\162', '\132', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\107', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\120', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\162', '\107', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\142', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\104', '\146', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\152', '\103', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\123', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\111', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\111', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\160', '\106', '\040', '\153', '\141', '\040', '\061', - '\012', '\145', '\125', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\110', '\170', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\166', '\107', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\125', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\152', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\152', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\144', '\123', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\167', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\110', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\123', '\163', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\152', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\144', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\120', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\127', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\162', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\160', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\157', '\130', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\152', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\126', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\131', '\144', '\040', '\154', '\145', '\040', '\061', - '\012', '\117', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\122', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\132', '\164', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\126', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\106', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\114', '\150', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\154', '\117', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\166', '\102', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\142', '\116', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\120', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\121', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\166', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\105', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\152', '\163', '\102', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\155', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\164', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\144', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\104', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\167', '\111', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\160', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\130', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\131', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\157', '\106', '\172', '\040', '\157', '\156', '\040', '\061', - '\012', '\164', '\102', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\103', '\156', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\132', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\162', '\114', '\040', '\145', '\162', '\040', '\061', - '\012', '\112', '\162', '\171', '\040', '\145', '\162', '\040', '\061', - '\012', '\151', '\113', '\144', '\040', '\151', '\156', '\040', '\061', - '\012', '\166', '\143', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\116', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\122', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\143', '\110', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\141', '\117', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\141', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\170', '\114', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\125', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\117', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\120', '\170', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\165', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\146', '\116', '\040', '\163', '\164', '\040', '\061', - '\012', '\121', '\154', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\132', '\171', '\040', '\142', '\145', '\040', '\061', - '\012', '\166', '\105', '\161', '\040', '\166', '\113', '\040', '\061', - '\012', '\130', '\166', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\170', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\172', '\107', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\103', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\120', '\160', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\101', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\127', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\162', '\143', '\132', '\040', '\143', '\155', '\040', '\061', - '\012', '\154', '\104', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\104', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\123', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\167', '\123', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\147', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\162', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\113', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\150', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\115', '\154', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\113', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\144', '\106', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\146', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\144', '\117', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\110', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\166', '\105', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\120', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\162', '\172', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\123', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\106', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\130', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\122', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\132', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\130', '\172', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\162', '\122', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\110', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\145', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\162', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\153', '\111', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\162', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\132', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\124', '\155', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\110', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\144', '\161', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\154', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\166', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\113', '\154', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\142', '\123', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\142', '\121', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\165', '\106', '\040', '\165', '\156', '\040', '\061', - '\012', '\161', '\172', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\141', '\111', '\040', '\141', '\156', '\040', '\061', - '\012', '\126', '\155', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\141', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\153', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\130', '\152', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\157', '\103', '\161', '\040', '\107', '\103', '\040', '\061', - '\012', '\161', '\121', '\150', '\040', '\121', '\117', '\040', '\061', - '\012', '\143', '\167', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\115', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\162', '\113', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\113', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\113', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\161', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\107', '\166', '\040', '\151', '\156', '\040', '\061', - '\012', '\170', '\130', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\115', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\132', '\155', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\131', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\104', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\171', '\171', '\105', '\040', '\156', '\171', '\040', '\061', - '\012', '\163', '\125', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\143', '\126', '\162', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\147', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\124', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\151', '\115', '\166', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\127', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\144', '\126', '\040', '\144', '\145', '\040', '\061', - '\012', '\157', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\132', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\157', '\131', '\040', '\157', '\156', '\040', '\061', - '\012', '\152', '\122', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\120', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\161', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\160', '\130', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\131', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\131', '\145', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\152', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\161', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\150', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\166', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\106', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\110', '\144', '\171', '\040', '\144', '\145', '\040', '\061', - '\012', '\154', '\162', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\132', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\124', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\162', '\111', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\104', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\145', '\110', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\172', '\110', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\114', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\151', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\172', '\143', '\040', '\143', '\155', '\040', '\061', - '\012', '\170', '\122', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\123', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\167', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\170', '\131', '\040', '\167', '\141', '\040', '\061', - '\012', '\131', '\153', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\157', '\126', '\160', '\040', '\157', '\156', '\040', '\061', - '\012', '\143', '\147', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\152', '\124', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\132', '\172', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\150', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\172', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\110', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\116', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\154', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\166', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\160', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\164', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\121', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\113', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\122', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\162', '\117', '\040', '\145', '\162', '\040', '\061', - '\012', '\156', '\160', '\102', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\164', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\172', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\126', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\124', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\121', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\124', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\121', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\132', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\112', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\162', '\120', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\167', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\126', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\161', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\112', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\164', '\170', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\132', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\130', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\146', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\126', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\126', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\102', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\156', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\161', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\172', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\156', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\113', '\170', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\144', '\130', '\154', '\040', '\130', '\155', '\040', '\061', - '\012', '\150', '\167', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\162', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\154', '\114', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\117', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\103', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\106', '\142', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\150', '\127', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\123', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\102', '\170', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\143', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\166', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\113', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\146', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\113', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\153', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\112', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\111', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\101', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\146', '\110', '\040', '\160', '\162', '\040', '\061', - '\012', '\121', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\142', '\125', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\104', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\112', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\154', '\122', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\130', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\110', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\157', '\126', '\172', '\040', '\157', '\156', '\040', '\061', - '\012', '\147', '\164', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\162', '\113', '\040', '\110', '\113', '\040', '\061', - '\012', '\127', '\170', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\156', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\106', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\126', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\142', '\120', '\040', '\143', '\150', '\040', '\061', - '\012', '\107', '\152', '\143', '\040', '\152', '\123', '\040', '\061', - '\012', '\152', '\121', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\164', '\166', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\172', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\171', '\127', '\040', '\151', '\152', '\040', '\061', - '\012', '\130', '\142', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\146', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\142', '\114', '\040', '\167', '\141', '\040', '\061', - '\012', '\155', '\153', '\117', '\040', '\153', '\141', '\040', '\061', - '\012', '\145', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\166', '\123', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\107', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\127', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\154', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\114', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\116', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\172', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\103', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\156', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\152', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\120', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\170', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\150', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\125', '\166', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\146', '\125', '\040', '\146', '\157', '\040', '\061', - '\012', '\151', '\116', '\160', '\040', '\151', '\156', '\040', '\061', - '\012', '\171', '\131', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\120', '\142', '\040', '\157', '\156', '\040', '\061', - '\012', '\161', '\151', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\143', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\126', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\122', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\142', '\102', '\040', '\142', '\145', '\040', '\061', - '\012', '\163', '\132', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\170', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\106', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\115', '\170', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\170', '\120', '\040', '\144', '\145', '\040', '\061', - '\012', '\154', '\122', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\142', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\105', '\141', '\157', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\147', '\101', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\155', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\131', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\151', '\117', '\040', '\151', '\156', '\040', '\061', - '\012', '\170', '\117', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\110', '\146', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\142', '\146', '\123', '\040', '\142', '\145', '\040', '\061', - '\012', '\121', '\150', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\155', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\154', '\131', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\116', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\145', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\164', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\115', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\150', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\123', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\146', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\123', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\123', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\103', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\167', '\103', '\040', '\160', '\162', '\040', '\061', - '\012', '\107', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\153', '\103', '\040', '\153', '\141', '\040', '\061', - '\012', '\165', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\102', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\163', '\127', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\132', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\170', '\152', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\167', '\116', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\115', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\110', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\163', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\162', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\166', '\115', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\127', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\144', '\170', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\126', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\162', '\106', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\142', '\125', '\040', '\160', '\162', '\040', '\061', - '\012', '\124', '\146', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\143', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\162', '\123', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\150', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\147', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\130', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\112', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\162', '\132', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\102', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\125', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\110', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\106', '\152', '\040', '\157', '\156', '\040', '\061', - '\012', '\170', '\142', '\116', '\040', '\142', '\145', '\040', '\061', - '\012', '\160', '\156', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\114', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\144', '\115', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\123', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\163', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\162', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\165', '\113', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\154', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\170', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\152', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\162', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\166', '\106', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\162', '\113', '\040', '\145', '\162', '\040', '\061', - '\012', '\121', '\154', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\170', '\104', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\144', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\166', '\104', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\106', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\163', '\146', '\112', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\111', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\170', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\116', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\111', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\110', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\161', '\130', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\154', '\104', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\127', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\145', '\113', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\150', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\142', '\126', '\040', '\142', '\145', '\040', '\061', - '\012', '\170', '\130', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\131', '\150', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\167', '\130', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\166', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\166', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\162', '\142', '\120', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\130', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\125', '\167', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\155', '\127', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\170', '\126', '\040', '\160', '\162', '\040', '\061', - '\012', '\156', '\152', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\124', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\155', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\122', '\161', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\161', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\150', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\112', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\103', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\127', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\144', '\171', '\040', '\144', '\145', '\040', '\061', - '\012', '\151', '\122', '\170', '\040', '\151', '\156', '\040', '\061', - '\012', '\126', '\143', '\155', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\111', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\142', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\143', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\152', '\130', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\155', '\117', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\121', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\146', '\126', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\142', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\106', '\144', '\040', '\141', '\156', '\040', '\061', - '\012', '\157', '\127', '\166', '\040', '\157', '\156', '\040', '\061', - '\012', '\156', '\110', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\156', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\170', '\132', '\040', '\142', '\145', '\040', '\061', - '\012', '\167', '\155', '\110', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\147', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\172', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\132', '\142', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\147', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\155', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\166', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\145', '\121', '\163', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\110', '\155', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\102', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\110', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\166', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\160', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\172', '\114', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\115', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\167', '\125', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\146', '\130', '\040', '\160', '\162', '\040', '\061', - '\012', '\156', '\106', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\106', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\126', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\105', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\130', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\126', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\166', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\110', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\127', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\143', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\125', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\114', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\126', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\106', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\107', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\167', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\147', '\172', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\162', '\125', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\164', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\167', '\115', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\162', '\120', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\155', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\155', '\120', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\151', '\131', '\040', '\151', '\156', '\040', '\061', - '\012', '\160', '\124', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\132', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\160', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\150', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\117', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\143', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\147', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\146', '\106', '\040', '\146', '\157', '\040', '\061', - '\012', '\143', '\124', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\160', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\146', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\147', '\167', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\104', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\104', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\166', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\144', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\127', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\152', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\103', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\162', '\123', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\123', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\132', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\115', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\116', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\124', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\155', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\170', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\101', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\110', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\147', '\101', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\150', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\146', '\125', '\040', '\160', '\162', '\040', '\061', - '\012', '\157', '\111', '\152', '\040', '\157', '\156', '\040', '\061', - '\012', '\154', '\150', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\104', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\112', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\104', '\160', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\151', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\146', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\170', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\150', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\152', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\155', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\155', '\115', '\040', '\155', '\145', '\040', '\061', - '\012', '\143', '\126', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\113', '\172', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\146', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\152', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\171', '\121', '\040', '\156', '\171', '\040', '\061', - '\012', '\155', '\102', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\121', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\144', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\145', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\166', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\106', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\150', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\143', '\156', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\167', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\150', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\127', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\112', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\131', '\172', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\105', '\157', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\152', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\132', '\147', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\147', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\171', '\105', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\172', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\152', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\142', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\163', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\121', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\121', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\171', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\131', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\145', '\120', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\141', '\103', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\126', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\170', '\117', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\152', '\127', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\147', '\111', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\132', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\164', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\115', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\124', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\142', '\111', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\101', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\167', '\146', '\124', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\143', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\146', '\113', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\117', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\165', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\112', '\155', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\160', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\161', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\117', '\166', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\130', '\154', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\116', '\162', '\154', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\170', '\127', '\040', '\146', '\157', '\040', '\061', - '\012', '\123', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\166', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\160', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\116', '\167', '\040', '\157', '\156', '\040', '\061', - '\012', '\153', '\131', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\130', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\146', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\145', '\104', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\126', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\113', '\172', '\040', '\165', '\163', '\040', '\061', - '\012', '\161', '\152', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\170', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\114', '\153', '\171', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\106', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\115', '\154', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\131', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\143', '\121', '\145', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\131', '\152', '\040', '\157', '\156', '\040', '\061', - '\012', '\164', '\142', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\142', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\126', '\153', '\040', '\156', '\144', '\040', '\061', - '\012', '\142', '\130', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\114', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\144', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\144', '\120', '\040', '\144', '\145', '\040', '\061', - '\012', '\164', '\161', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\143', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\123', '\144', '\040', '\141', '\156', '\040', '\061', - '\012', '\103', '\155', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\172', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\121', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\107', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\127', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\130', '\162', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\112', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\163', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\127', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\142', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\151', '\167', '\120', '\040', '\151', '\156', '\040', '\061', - '\012', '\154', '\127', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\124', '\163', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\110', '\172', '\040', '\144', '\145', '\040', '\061', - '\012', '\164', '\143', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\153', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\144', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\115', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\152', '\126', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\121', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\156', '\111', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\171', '\131', '\040', '\156', '\171', '\040', '\061', - '\012', '\141', '\106', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\154', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\131', '\171', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\152', '\142', '\126', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\143', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\172', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\122', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\152', '\101', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\156', '\111', '\040', '\141', '\156', '\040', '\061', - '\012', '\114', '\154', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\155', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\121', '\157', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\164', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\170', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\164', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\161', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\110', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\122', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\116', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\151', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\131', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\144', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\127', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\153', '\102', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\170', '\103', '\040', '\153', '\141', '\040', '\061', - '\012', '\154', '\152', '\101', '\040', '\154', '\145', '\040', '\061', - '\012', '\121', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\155', '\103', '\160', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\112', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\103', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\143', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\102', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\131', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\110', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\153', '\166', '\127', '\040', '\153', '\141', '\040', '\061', - '\012', '\112', '\155', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\121', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\142', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\170', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\170', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\144', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\155', '\106', '\040', '\155', '\145', '\040', '\061', - '\012', '\163', '\104', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\162', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\104', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\102', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\110', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\172', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\126', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\146', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\166', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\115', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\156', '\123', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\121', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\157', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\153', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\110', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\165', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\142', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\167', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\160', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\122', '\160', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\125', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\156', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\160', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\143', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\170', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\152', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\162', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\106', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\126', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\166', '\112', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\102', '\166', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\144', '\155', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\127', '\144', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\131', '\172', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\131', '\143', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\113', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\162', '\110', '\040', '\145', '\162', '\040', '\061', - '\012', '\114', '\156', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\103', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\125', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\125', '\166', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\115', '\146', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\110', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\147', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\141', '\107', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\152', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\153', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\110', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\116', '\172', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\132', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\166', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\154', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\155', '\104', '\040', '\155', '\145', '\040', '\061', - '\012', '\131', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\106', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\166', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\164', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\161', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\166', '\116', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\143', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\153', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\144', '\110', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\105', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\143', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\167', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\147', '\130', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\127', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\127', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\112', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\123', '\152', '\040', '\157', '\156', '\040', '\061', - '\012', '\154', '\167', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\124', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\143', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\150', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\172', '\107', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\144', '\116', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\162', '\123', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\110', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\142', '\132', '\040', '\166', '\141', '\040', '\061', - '\012', '\125', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\172', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\146', '\132', '\040', '\167', '\141', '\040', '\061', - '\012', '\163', '\167', '\102', '\040', '\163', '\164', '\040', '\061', - '\012', '\144', '\155', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\143', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\172', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\112', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\126', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\167', '\102', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\111', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\160', '\125', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\167', '\115', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\153', '\101', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\125', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\124', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\113', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\170', '\123', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\141', '\123', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\166', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\144', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\146', '\112', '\040', '\167', '\141', '\040', '\061', - '\012', '\127', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\132', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\114', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\170', '\130', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\161', '\104', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\166', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\146', '\110', '\040', '\153', '\141', '\040', '\061', - '\012', '\141', '\121', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\106', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\152', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\160', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\115', '\155', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\150', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\113', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\101', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\146', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\167', '\116', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\160', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\170', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\115', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\167', '\114', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\102', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\101', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\123', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\112', '\155', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\147', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\127', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\110', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\106', '\142', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\172', '\157', '\124', '\040', '\157', '\156', '\040', '\061', - '\012', '\171', '\152', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\122', '\154', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\143', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\144', '\103', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\166', '\117', '\040', '\166', '\141', '\040', '\061', - '\012', '\157', '\121', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\156', '\111', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\172', '\101', '\040', '\163', '\172', '\040', '\061', - '\012', '\122', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\121', '\172', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\152', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\153', '\121', '\040', '\153', '\165', '\040', '\061', - '\012', '\154', '\162', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\167', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\107', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\156', '\114', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\143', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\122', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\117', '\146', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\152', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\165', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\132', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\116', '\142', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\156', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\142', '\110', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\104', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\121', '\155', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\167', '\126', '\040', '\144', '\145', '\040', '\061', - '\012', '\117', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\167', '\111', '\040', '\167', '\141', '\040', '\061', - '\012', '\156', '\152', '\120', '\040', '\141', '\156', '\040', '\061', - '\012', '\117', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\126', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\146', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\161', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\104', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\124', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\143', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\155', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\126', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\127', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\112', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\165', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\146', '\116', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\146', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\142', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\152', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\170', '\125', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\130', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\116', '\172', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\157', '\150', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\163', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\132', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\125', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\152', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\124', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\171', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\111', '\143', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\166', '\116', '\040', '\163', '\164', '\040', '\061', - '\012', '\112', '\152', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\126', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\144', '\111', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\142', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\146', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\107', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\117', '\166', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\104', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\147', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\131', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\152', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\120', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\122', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\162', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\160', '\124', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\163', '\102', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\170', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\106', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\157', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\155', '\104', '\040', '\163', '\164', '\040', '\061', - '\012', '\154', '\142', '\115', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\103', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\106', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\130', '\154', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\171', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\106', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\152', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\131', '\170', '\040', '\151', '\156', '\040', '\061', - '\012', '\165', '\112', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\145', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\162', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\102', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\144', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\156', '\122', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\155', '\114', '\040', '\155', '\145', '\040', '\061', - '\012', '\164', '\166', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\155', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\104', '\147', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\172', '\117', '\040', '\157', '\156', '\040', '\061', - '\012', '\146', '\121', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\120', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\131', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\120', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\147', '\127', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\103', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\145', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\132', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\147', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\152', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\103', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\166', '\121', '\040', '\163', '\164', '\040', '\061', - '\012', '\122', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\142', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\153', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\172', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\154', '\102', '\040', '\154', '\145', '\040', '\061', - '\012', '\151', '\127', '\152', '\040', '\151', '\156', '\040', '\061', - '\012', '\132', '\170', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\113', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\143', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\103', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\101', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\125', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\115', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\152', '\115', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\125', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\132', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\167', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\164', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\122', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\172', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\166', '\131', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\106', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\102', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\107', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\147', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\127', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\167', '\120', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\166', '\105', '\040', '\166', '\141', '\040', '\061', - '\012', '\106', '\163', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\111', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\167', '\103', '\040', '\167', '\141', '\040', '\061', - '\012', '\106', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\114', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\122', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\151', '\130', '\146', '\040', '\151', '\156', '\040', '\061', - '\012', '\171', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\161', '\120', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\163', '\114', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\111', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\165', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\142', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\105', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\147', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\107', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\152', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\143', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\172', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\125', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\107', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\155', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\153', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\147', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\167', '\117', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\155', '\123', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\150', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\171', '\130', '\040', '\163', '\164', '\040', '\061', - '\012', '\156', '\142', '\103', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\147', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\127', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\167', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\156', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\104', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\123', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\121', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\162', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\107', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\170', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\162', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\154', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\106', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\142', '\106', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\116', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\121', '\143', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\126', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\160', '\120', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\160', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\154', '\107', '\040', '\154', '\145', '\040', '\061', - '\012', '\104', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\121', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\153', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\171', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\166', '\106', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\121', '\157', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\154', '\125', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\154', '\127', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\154', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\155', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\127', '\154', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\155', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\114', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\154', '\102', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\161', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\147', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\107', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\167', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\146', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\167', '\155', '\114', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\114', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\142', '\105', '\040', '\163', '\164', '\040', '\061', - '\012', '\142', '\121', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\153', '\122', '\040', '\153', '\141', '\040', '\061', - '\012', '\171', '\106', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\117', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\146', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\112', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\114', '\167', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\146', '\125', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\146', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\116', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\153', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\104', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\142', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\121', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\166', '\115', '\040', '\166', '\141', '\040', '\061', - '\012', '\130', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\153', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\101', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\106', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\167', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\127', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\131', '\162', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\145', '\125', '\157', '\040', '\145', '\162', '\040', '\061', - '\012', '\165', '\104', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\150', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\107', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\122', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\142', '\106', '\040', '\163', '\164', '\040', '\061', - '\012', '\156', '\146', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\146', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\104', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\160', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\106', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\130', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\163', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\132', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\114', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\152', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\153', '\104', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\112', '\170', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\126', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\122', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\166', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\116', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\122', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\107', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\132', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\130', '\164', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\132', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\126', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\115', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\120', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\172', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\152', '\105', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\172', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\103', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\146', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\132', '\146', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\167', '\103', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\153', '\115', '\040', '\153', '\157', '\040', '\061', - '\012', '\166', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\120', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\112', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\155', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\165', '\115', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\113', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\116', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\103', '\162', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\163', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\167', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\123', '\144', '\171', '\040', '\144', '\145', '\040', '\061', - '\012', '\106', '\160', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\127', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\152', '\127', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\167', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\152', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\132', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\113', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\144', '\122', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\150', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\147', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\115', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\156', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\166', '\103', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\160', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\164', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\171', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\130', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\113', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\126', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\172', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\120', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\124', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\106', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\172', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\170', '\111', '\040', '\145', '\162', '\040', '\061', - '\012', '\145', '\131', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\167', '\102', '\040', '\153', '\141', '\040', '\061', - '\012', '\145', '\121', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\142', '\110', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\103', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\156', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\131', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\132', '\170', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\132', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\110', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\122', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\160', '\122', '\040', '\160', '\162', '\040', '\061', - '\012', '\143', '\142', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\115', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\121', '\171', '\040', '\164', '\157', '\040', '\061', - '\012', '\166', '\170', '\107', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\160', '\102', '\040', '\156', '\147', '\040', '\061', - '\012', '\107', '\153', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\120', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\156', '\116', '\040', '\141', '\156', '\040', '\061', - '\012', '\107', '\153', '\160', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\166', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\110', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\146', '\123', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\103', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\147', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\153', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\157', '\106', '\166', '\040', '\157', '\156', '\040', '\061', - '\012', '\104', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\111', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\147', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\170', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\164', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\166', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\122', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\103', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\152', '\124', '\040', '\162', '\157', '\040', '\061', - '\012', '\162', '\152', '\104', '\040', '\145', '\162', '\040', '\061', - '\012', '\121', '\160', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\130', '\144', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\114', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\101', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\131', '\154', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\121', '\164', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\110', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\104', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\150', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\114', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\147', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\145', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\152', '\123', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\126', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\105', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\104', '\163', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\150', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\107', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\117', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\106', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\120', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\163', '\113', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\114', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\153', '\102', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\103', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\116', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\167', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\124', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\120', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\106', '\166', '\040', '\162', '\157', '\040', '\061', - '\012', '\122', '\167', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\161', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\155', '\113', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\165', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\156', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\147', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\144', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\101', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\167', '\117', '\040', '\167', '\141', '\040', '\061', - '\012', '\145', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\106', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\104', '\160', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\121', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\141', '\106', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\146', '\102', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\160', '\101', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\147', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\107', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\143', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\127', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\172', '\106', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\163', '\121', '\040', '\163', '\164', '\040', '\061', - '\012', '\142', '\121', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\167', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\104', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\146', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\132', '\142', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\113', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\130', '\141', '\040', '\141', '\162', '\040', '\061', - '\012', '\167', '\152', '\101', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\172', '\123', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\127', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\152', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\122', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\147', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\161', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\156', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\172', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\113', '\161', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\147', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\145', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\152', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\155', '\116', '\040', '\166', '\141', '\040', '\061', - '\012', '\151', '\165', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\107', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\113', '\144', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\121', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\127', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\103', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\113', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\130', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\157', '\121', '\040', '\157', '\156', '\040', '\061', - '\012', '\167', '\102', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\171', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\150', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\160', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\156', '\112', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\107', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\150', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\152', '\123', '\040', '\151', '\152', '\040', '\061', - '\012', '\123', '\143', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\106', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\113', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\155', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\144', '\124', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\124', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\152', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\110', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\162', '\122', '\156', '\040', '\141', '\162', '\040', '\061', - '\012', '\130', '\154', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\116', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\106', '\172', '\040', '\151', '\156', '\040', '\061', - '\012', '\116', '\154', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\120', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\130', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\157', '\172', '\112', '\040', '\157', '\156', '\040', '\061', - '\012', '\172', '\111', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\123', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\122', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\102', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\127', '\167', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\127', '\147', '\040', '\160', '\162', '\040', '\061', - '\012', '\160', '\114', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\162', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\132', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\111', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\157', '\113', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\161', '\114', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\110', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\157', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\170', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\132', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\127', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\161', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\130', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\131', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\107', '\171', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\113', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\152', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\152', '\115', '\040', '\163', '\164', '\040', '\061', - '\012', '\163', '\146', '\103', '\040', '\163', '\164', '\040', '\061', - '\012', '\144', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\132', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\143', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\157', '\152', '\040', '\157', '\156', '\040', '\061', - '\012', '\147', '\170', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\132', '\146', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\131', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\132', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\121', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\130', '\154', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\121', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\142', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\105', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\116', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\162', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\170', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\121', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\131', '\160', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\116', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\160', '\142', '\121', '\040', '\160', '\162', '\040', '\061', - '\012', '\147', '\115', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\145', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\126', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\126', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\165', '\115', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\121', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\150', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\124', '\142', '\040', '\151', '\156', '\040', '\061', - '\012', '\120', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\103', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\143', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\166', '\125', '\040', '\163', '\164', '\040', '\061', - '\012', '\156', '\115', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\152', '\105', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\155', '\110', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\172', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\154', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\110', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\163', '\116', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\103', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\117', '\154', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\107', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\167', '\126', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\120', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\122', '\150', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\147', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\144', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\106', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\154', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\112', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\162', '\170', '\105', '\040', '\145', '\162', '\040', '\061', - '\012', '\165', '\110', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\113', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\160', '\112', '\040', '\160', '\162', '\040', '\061', - '\012', '\103', '\152', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\164', '\131', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\132', '\170', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\121', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\106', '\170', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\121', '\157', '\153', '\040', '\157', '\156', '\040', '\061', - '\012', '\160', '\154', '\113', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\160', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\144', '\120', '\040', '\144', '\145', '\040', '\061', - '\012', '\132', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\122', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\104', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\147', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\142', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\113', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\172', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\107', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\104', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\112', '\152', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\164', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\167', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\104', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\147', '\146', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\150', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\125', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\142', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\153', '\106', '\040', '\153', '\157', '\040', '\061', - '\012', '\120', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\142', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\123', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\167', '\111', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\146', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\150', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\172', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\116', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\167', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\172', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\121', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\114', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\125', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\110', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\112', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\150', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\172', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\122', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\130', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\172', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\132', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\114', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\125', '\153', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\115', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\166', '\107', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\164', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\170', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\162', '\110', '\040', '\145', '\162', '\040', '\061', - '\012', '\106', '\147', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\154', '\106', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\143', '\117', '\040', '\152', '\141', '\040', '\061', - '\012', '\163', '\103', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\102', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\132', '\171', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\117', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\112', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\152', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\113', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\143', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\103', '\167', '\040', '\155', '\141', '\040', '\061', - '\012', '\150', '\170', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\124', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\155', '\121', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\152', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\144', '\120', '\040', '\143', '\150', '\040', '\061', - '\012', '\132', '\152', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\161', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\115', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\115', '\161', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\166', '\130', '\040', '\163', '\164', '\040', '\061', - '\012', '\151', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\167', '\122', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\164', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\152', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\103', '\152', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\130', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\107', '\167', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\111', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\121', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\131', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\164', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\125', '\163', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\156', '\146', '\120', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\121', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\157', '\130', '\146', '\040', '\157', '\156', '\040', '\061', - '\012', '\146', '\105', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\163', '\147', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\120', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\142', '\127', '\040', '\142', '\145', '\040', '\061', - '\012', '\153', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\110', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\143', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\130', '\157', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\172', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\170', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\170', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\127', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\154', '\113', '\040', '\154', '\145', '\040', '\061', - '\012', '\156', '\132', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\117', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\153', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\124', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\162', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\164', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\104', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\162', '\116', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\105', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\150', '\112', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\164', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\110', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\150', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\160', '\132', '\040', '\166', '\141', '\040', '\061', - '\012', '\104', '\147', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\170', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\154', '\126', '\040', '\160', '\162', '\040', '\061', - '\012', '\153', '\111', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\113', '\150', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\163', '\131', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\114', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\166', '\111', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\150', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\146', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\170', '\132', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\126', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\121', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\130', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\114', '\150', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\153', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\131', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\162', '\110', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\150', '\107', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\162', '\104', '\040', '\145', '\162', '\040', '\061', - '\012', '\120', '\163', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\147', '\104', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\152', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\114', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\153', '\154', '\103', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\124', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\162', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\130', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\170', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\146', '\144', '\104', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\110', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\104', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\153', '\120', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\122', '\153', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\172', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\110', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\160', '\122', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\132', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\102', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\120', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\116', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\166', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\171', '\125', '\040', '\160', '\162', '\040', '\061', - '\012', '\123', '\152', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\157', '\121', '\160', '\040', '\157', '\156', '\040', '\061', - '\012', '\170', '\144', '\114', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\156', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\146', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\127', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\130', '\155', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\107', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\106', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\126', '\167', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\113', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\121', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\157', '\116', '\155', '\040', '\157', '\156', '\040', '\061', - '\012', '\165', '\130', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\146', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\111', '\152', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\153', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\116', '\170', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\165', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\120', '\171', '\040', '\142', '\145', '\040', '\061', - '\012', '\154', '\113', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\141', '\114', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\120', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\160', '\132', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\152', '\105', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\116', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\150', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\121', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\170', '\102', '\040', '\142', '\145', '\040', '\061', - '\012', '\146', '\144', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\112', '\143', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\106', '\144', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\126', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\164', '\155', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\152', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\172', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\164', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\107', '\150', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\132', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\113', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\166', '\117', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\130', '\163', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\122', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\147', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\160', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\127', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\125', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\167', '\104', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\170', '\105', '\040', '\155', '\145', '\040', '\061', - '\012', '\132', '\166', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\157', '\172', '\115', '\040', '\157', '\156', '\040', '\061', - '\012', '\146', '\142', '\112', '\040', '\142', '\145', '\040', '\061', - '\012', '\164', '\160', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\145', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\132', '\156', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\130', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\143', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\147', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\146', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\143', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\130', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\165', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\170', '\127', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\164', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\147', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\101', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\102', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\107', '\164', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\146', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\102', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\166', '\115', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\127', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\123', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\130', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\124', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\114', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\162', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\157', '\112', '\142', '\040', '\157', '\156', '\040', '\061', - '\012', '\160', '\130', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\162', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\156', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\163', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\126', '\171', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\111', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\147', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\114', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\126', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\122', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\150', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\114', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\123', '\147', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\114', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\124', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\160', '\131', '\040', '\160', '\162', '\040', '\061', - '\012', '\164', '\130', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\143', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\131', '\146', '\040', '\151', '\156', '\040', '\061', - '\012', '\127', '\167', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\153', '\132', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\131', '\167', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\106', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\155', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\167', '\122', '\040', '\167', '\141', '\040', '\061', - '\012', '\131', '\146', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\141', '\111', '\157', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\172', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\167', '\111', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\106', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\141', '\127', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\105', '\141', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\153', '\127', '\040', '\153', '\141', '\040', '\061', - '\012', '\116', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\154', '\116', '\040', '\154', '\145', '\040', '\061', - '\012', '\114', '\160', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\154', '\113', '\040', '\154', '\145', '\040', '\061', - '\012', '\132', '\156', '\162', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\143', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\146', '\105', '\040', '\153', '\141', '\040', '\061', - '\012', '\111', '\171', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\161', '\162', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\120', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\146', '\147', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\111', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\142', '\120', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\121', '\171', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\121', '\156', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\144', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\103', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\132', '\154', '\040', '\156', '\147', '\040', '\061', - '\012', '\116', '\154', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\132', '\167', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\127', '\154', '\040', '\151', '\156', '\040', '\061', - '\012', '\142', '\125', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\142', '\112', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\152', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\142', '\124', '\040', '\167', '\141', '\040', '\061', - '\012', '\171', '\116', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\170', '\115', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\110', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\122', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\153', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\142', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\147', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\152', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\107', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\130', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\116', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\112', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\171', '\166', '\132', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\116', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\104', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\125', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\104', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\127', '\167', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\120', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\116', '\142', '\040', '\153', '\157', '\040', '\061', - '\012', '\127', '\144', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\130', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\152', '\114', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\112', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\155', '\115', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\124', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\163', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\155', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\160', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\121', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\120', '\142', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\126', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\150', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\123', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\107', '\170', '\172', '\040', '\172', '\145', '\040', '\061', - '\012', '\104', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\162', '\115', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\115', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\112', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\112', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\146', '\116', '\040', '\146', '\157', '\040', '\061', - '\012', '\144', '\121', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\165', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\152', '\102', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\120', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\161', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\146', '\115', '\040', '\155', '\145', '\040', '\061', - '\012', '\153', '\167', '\107', '\040', '\153', '\141', '\040', '\061', - '\012', '\145', '\141', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\126', '\155', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\146', '\123', '\040', '\163', '\172', '\040', '\061', - '\012', '\106', '\155', '\171', '\040', '\155', '\145', '\040', '\061', - '\012', '\163', '\161', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\113', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\121', '\144', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\152', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\162', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\170', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\170', '\110', '\040', '\142', '\145', '\040', '\061', - '\012', '\152', '\122', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\152', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\123', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\123', '\170', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\162', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\155', '\110', '\040', '\155', '\145', '\040', '\061', - '\012', '\144', '\146', '\110', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\112', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\155', '\167', '\132', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\122', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\130', '\161', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\107', '\166', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\172', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\156', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\150', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\154', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\142', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\124', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\160', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\101', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\113', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\154', '\104', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\124', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\170', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\120', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\166', '\132', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\110', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\113', '\147', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\171', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\166', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\164', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\102', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\147', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\152', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\104', '\152', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\111', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\104', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\112', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\101', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\106', '\163', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\171', '\104', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\170', '\152', '\126', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\144', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\167', '\107', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\154', '\127', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\131', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\172', '\117', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\161', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\155', '\107', '\040', '\155', '\145', '\040', '\061', - '\012', '\113', '\144', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\164', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\112', '\171', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\152', '\127', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\167', '\122', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\126', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\115', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\154', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\170', '\121', '\040', '\142', '\145', '\040', '\061', - '\012', '\150', '\112', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\156', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\146', '\123', '\040', '\156', '\171', '\040', '\061', - '\012', '\115', '\144', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\163', '\112', '\040', '\163', '\164', '\040', '\061', - '\012', '\121', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\170', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\101', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\112', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\167', '\112', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\170', '\103', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\112', '\162', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\107', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\143', '\143', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\123', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\153', '\126', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\112', '\160', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\154', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\112', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\105', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\162', '\114', '\040', '\145', '\162', '\040', '\061', - '\012', '\164', '\161', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\112', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\127', '\150', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\127', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\172', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\143', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\144', '\166', '\103', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\152', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\153', '\106', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\166', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\171', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\116', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\156', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\152', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\131', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\114', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\162', '\126', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\117', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\170', '\114', '\040', '\146', '\157', '\040', '\061', - '\012', '\163', '\156', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\127', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\147', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\141', '\124', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\145', '\126', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\132', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\126', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\152', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\167', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\123', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\116', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\146', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\143', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\124', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\141', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\172', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\152', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\115', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\142', '\164', '\102', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\146', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\170', '\117', '\040', '\142', '\145', '\040', '\061', - '\012', '\167', '\120', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\147', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\172', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\161', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\132', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\164', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\153', '\131', '\040', '\153', '\141', '\040', '\061', - '\012', '\154', '\103', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\160', '\117', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\130', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\117', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\147', '\171', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\153', '\104', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\111', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\132', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\113', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\126', '\160', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\155', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\116', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\153', '\131', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\160', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\130', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\156', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\146', '\113', '\040', '\146', '\157', '\040', '\061', - '\012', '\146', '\103', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\120', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\121', '\156', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\160', '\127', '\040', '\160', '\162', '\040', '\061', - '\012', '\165', '\167', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\166', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\156', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\166', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\107', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\132', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\142', '\123', '\040', '\153', '\141', '\040', '\061', - '\012', '\123', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\150', '\166', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\152', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\125', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\154', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\154', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\167', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\154', '\116', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\124', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\116', '\160', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\115', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\116', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\145', '\146', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\141', '\103', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\141', '\127', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\114', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\172', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\112', '\152', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\166', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\167', '\124', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\130', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\107', '\155', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\166', '\123', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\122', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\131', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\121', '\166', '\040', '\151', '\156', '\040', '\061', - '\012', '\146', '\153', '\110', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\143', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\116', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\155', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\172', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\104', '\146', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\125', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\161', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\130', '\153', '\040', '\163', '\164', '\040', '\061', - '\012', '\130', '\171', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\142', '\114', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\131', '\144', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\157', '\130', '\040', '\157', '\156', '\040', '\061', - '\012', '\172', '\165', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\147', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\121', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\156', '\105', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\132', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\153', '\104', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\126', '\153', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\171', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\102', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\103', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\104', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\170', '\102', '\040', '\144', '\145', '\040', '\061', - '\012', '\104', '\153', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\120', '\160', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\127', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\152', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\111', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\156', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\167', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\142', '\120', '\040', '\142', '\145', '\040', '\061', - '\012', '\146', '\162', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\101', '\157', '\166', '\040', '\157', '\156', '\040', '\061', - '\012', '\171', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\146', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\163', '\110', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\170', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\142', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\115', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\122', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\107', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\172', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\157', '\161', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\152', '\125', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\155', '\121', '\040', '\155', '\145', '\040', '\061', - '\012', '\150', '\117', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\167', '\130', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\147', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\114', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\120', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\164', '\103', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\162', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\127', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\162', '\104', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\131', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\156', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\106', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\160', '\125', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\120', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\152', '\115', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\155', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\103', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\104', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\106', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\164', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\160', '\117', '\040', '\160', '\162', '\040', '\061', - '\012', '\160', '\147', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\146', '\117', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\132', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\122', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\104', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\120', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\166', '\120', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\153', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\116', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\113', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\161', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\121', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\164', '\170', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\160', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\151', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\166', '\120', '\040', '\166', '\141', '\040', '\061', - '\012', '\151', '\107', '\146', '\040', '\151', '\156', '\040', '\061', - '\012', '\164', '\152', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\127', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\151', '\106', '\040', '\164', '\151', '\040', '\061', - '\012', '\132', '\172', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\141', '\131', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\152', '\101', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\167', '\122', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\153', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\103', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\147', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\103', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\131', '\160', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\171', '\105', '\040', '\167', '\141', '\040', '\061', - '\012', '\151', '\171', '\102', '\040', '\151', '\156', '\040', '\061', - '\012', '\150', '\121', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\160', '\121', '\040', '\151', '\156', '\040', '\061', - '\012', '\125', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\153', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\162', '\113', '\040', '\145', '\162', '\040', '\061', - '\012', '\110', '\160', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\156', '\116', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\167', '\102', '\040', '\151', '\152', '\040', '\061', - '\012', '\132', '\144', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\131', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\121', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\167', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\170', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\161', '\104', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\130', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\144', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\141', '\105', '\157', '\040', '\141', '\156', '\040', '\061', - '\012', '\124', '\167', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\141', '\166', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\172', '\126', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\110', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\142', '\112', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\125', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\106', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\116', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\102', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\144', '\122', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\154', '\124', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\162', '\117', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\172', '\127', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\131', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\155', '\122', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\162', '\130', '\171', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\171', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\107', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\125', '\167', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\130', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\112', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\147', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\131', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\131', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\172', '\103', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\152', '\102', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\172', '\111', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\162', '\117', '\040', '\145', '\162', '\040', '\061', - '\012', '\164', '\161', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\167', '\115', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\152', '\114', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\156', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\145', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\146', '\114', '\040', '\160', '\162', '\040', '\061', - '\012', '\151', '\122', '\142', '\040', '\151', '\156', '\040', '\061', - '\012', '\147', '\144', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\101', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\156', '\114', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\153', '\124', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\126', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\113', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\116', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\114', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\116', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\155', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\146', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\156', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\107', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\163', '\110', '\144', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\167', '\106', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\120', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\104', '\162', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\121', '\160', '\040', '\163', '\164', '\040', '\061', - '\012', '\111', '\167', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\165', '\103', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\106', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\112', '\160', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\151', '\111', '\040', '\151', '\156', '\040', '\061', - '\012', '\122', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\153', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\116', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\131', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\126', '\155', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\154', '\131', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\120', '\167', '\040', '\157', '\156', '\040', '\061', - '\012', '\153', '\152', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\113', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\104', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\146', '\106', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\126', '\150', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\146', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\152', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\124', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\164', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\116', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\121', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\141', '\123', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\167', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\125', '\171', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\167', '\126', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\111', '\157', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\150', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\102', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\107', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\143', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\132', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\142', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\106', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\132', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\172', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\104', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\143', '\146', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\172', '\120', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\161', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\172', '\110', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\123', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\170', '\112', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\130', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\171', '\142', '\124', '\040', '\142', '\145', '\040', '\061', - '\012', '\163', '\110', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\124', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\120', '\147', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\113', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\120', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\124', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\123', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\147', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\113', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\145', '\125', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\104', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\106', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\156', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\125', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\165', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\171', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\103', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\122', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\130', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\107', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\130', '\156', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\120', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\146', '\132', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\126', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\167', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\104', '\172', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\146', '\107', '\040', '\146', '\157', '\040', '\061', - '\012', '\146', '\130', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\147', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\112', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\130', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\147', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\165', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\170', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\116', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\147', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\110', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\157', '\141', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\122', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\130', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\172', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\143', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\102', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\166', '\102', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\121', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\166', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\150', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\170', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\164', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\112', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\123', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\122', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\156', '\103', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\107', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\147', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\116', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\157', '\110', '\153', '\040', '\157', '\156', '\040', '\061', - '\012', '\127', '\172', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\166', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\153', '\130', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\131', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\157', '\132', '\040', '\157', '\156', '\040', '\061', - '\012', '\156', '\107', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\155', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\155', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\126', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\103', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\130', '\172', '\040', '\151', '\156', '\040', '\061', - '\012', '\166', '\113', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\105', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\150', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\167', '\123', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\171', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\152', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\131', '\147', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\112', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\145', '\121', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\131', '\146', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\127', '\160', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\144', '\123', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\155', '\107', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\144', '\124', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\162', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\102', '\160', '\040', '\160', '\157', '\040', '\061', - '\012', '\146', '\153', '\132', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\145', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\107', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\105', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\146', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\123', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\150', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\152', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\121', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\156', '\160', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\104', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\155', '\121', '\040', '\155', '\145', '\040', '\061', - '\012', '\153', '\115', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\141', '\161', '\103', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\131', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\153', '\104', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\127', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\171', '\112', '\040', '\156', '\171', '\040', '\061', - '\012', '\167', '\166', '\126', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\131', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\162', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\152', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\113', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\152', '\104', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\104', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\113', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\172', '\163', '\124', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\131', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\167', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\111', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\146', '\125', '\040', '\146', '\157', '\040', '\061', - '\012', '\127', '\156', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\145', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\127', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\116', '\167', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\123', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\146', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\130', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\162', '\111', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\126', '\146', '\040', '\157', '\156', '\040', '\061', - '\012', '\126', '\146', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\152', '\147', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\110', '\152', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\171', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\143', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\163', '\105', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\103', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\153', '\167', '\120', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\146', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\132', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\170', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\112', '\166', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\105', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\114', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\117', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\160', '\123', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\111', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\164', '\107', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\110', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\107', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\166', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\116', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\146', '\113', '\040', '\163', '\164', '\040', '\061', - '\012', '\144', '\131', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\115', '\155', '\040', '\163', '\164', '\040', '\061', - '\012', '\157', '\102', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\161', '\163', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\155', '\111', '\040', '\155', '\145', '\040', '\061', - '\012', '\164', '\155', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\154', '\127', '\040', '\154', '\145', '\040', '\061', - '\012', '\124', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\162', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\116', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\125', '\165', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\107', '\152', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\152', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\117', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\155', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\156', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\166', '\131', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\107', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\110', '\160', '\040', '\141', '\154', '\040', '\061', - '\012', '\161', '\147', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\142', '\123', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\121', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\164', '\161', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\167', '\111', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\153', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\150', '\104', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\121', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\112', '\160', '\040', '\151', '\156', '\040', '\061', - '\012', '\170', '\162', '\116', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\107', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\121', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\112', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\115', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\152', '\124', '\040', '\154', '\145', '\040', '\061', - '\012', '\130', '\153', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\116', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\165', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\157', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\152', '\122', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\106', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\152', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\152', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\116', '\156', '\154', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\112', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\132', '\162', '\040', '\156', '\147', '\040', '\061', - '\012', '\102', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\144', '\127', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\167', '\115', '\040', '\154', '\145', '\040', '\061', - '\012', '\111', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\167', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\167', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\152', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\102', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\151', '\167', '\106', '\040', '\151', '\156', '\040', '\061', - '\012', '\162', '\110', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\123', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\152', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\164', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\113', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\102', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\155', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\145', '\131', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\107', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\121', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\156', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\166', '\112', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\170', '\115', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\116', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\154', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\144', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\150', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\154', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\142', '\104', '\040', '\142', '\145', '\040', '\061', - '\012', '\170', '\101', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\114', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\110', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\150', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\170', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\126', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\132', '\153', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\160', '\104', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\152', '\110', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\107', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\151', '\171', '\120', '\040', '\151', '\156', '\040', '\061', - '\012', '\167', '\155', '\113', '\040', '\155', '\145', '\040', '\061', - '\012', '\155', '\112', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\155', '\114', '\040', '\155', '\145', '\040', '\061', - '\012', '\143', '\102', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\166', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\105', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\103', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\157', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\172', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\111', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\120', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\131', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\150', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\161', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\155', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\130', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\154', '\132', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\123', '\170', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\113', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\127', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\143', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\166', '\102', '\040', '\160', '\157', '\040', '\061', - '\012', '\164', '\147', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\162', '\116', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\121', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\130', '\166', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\146', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\106', '\166', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\125', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\154', '\132', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\144', '\111', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\157', '\111', '\040', '\157', '\156', '\040', '\061', - '\012', '\171', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\167', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\112', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\170', '\115', '\040', '\166', '\141', '\040', '\061', - '\012', '\126', '\172', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\152', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\113', '\155', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\111', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\171', '\104', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\142', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\153', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\126', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\150', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\145', '\157', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\130', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\106', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\112', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\116', '\163', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\154', '\115', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\121', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\156', '\115', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\122', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\113', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\161', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\103', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\117', '\172', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\154', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\142', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\152', '\116', '\040', '\163', '\164', '\040', '\061', - '\012', '\125', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\126', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\127', '\152', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\142', '\155', '\115', '\040', '\155', '\145', '\040', '\061', - '\012', '\126', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\132', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\106', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\116', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\142', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\155', '\112', '\040', '\155', '\145', '\040', '\061', - '\012', '\106', '\143', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\124', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\123', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\155', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\106', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\144', '\111', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\154', '\113', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\156', '\102', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\171', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\152', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\172', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\147', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\116', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\124', '\152', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\154', '\126', '\040', '\154', '\145', '\040', '\061', - '\012', '\162', '\126', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\114', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\144', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\131', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\150', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\127', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\164', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\172', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\111', '\157', '\040', '\150', '\157', '\040', '\061', - '\012', '\153', '\146', '\103', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\102', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\112', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\145', '\111', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\165', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\142', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\152', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\130', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\124', '\146', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\172', '\114', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\146', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\150', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\153', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\105', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\167', '\116', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\104', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\120', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\164', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\164', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\162', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\167', '\124', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\122', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\130', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\146', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\106', '\167', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\162', '\116', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\102', '\150', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\154', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\143', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\123', '\146', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\125', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\124', '\144', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\122', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\131', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\143', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\143', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\102', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\112', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\171', '\162', '\117', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\161', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\131', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\114', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\110', '\166', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\156', '\123', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\143', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\106', '\153', '\040', '\163', '\164', '\040', '\061', - '\012', '\144', '\143', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\120', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\116', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\107', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\154', '\120', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\114', '\170', '\040', '\152', '\157', '\040', '\061', - '\012', '\152', '\132', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\167', '\124', '\040', '\167', '\141', '\040', '\061', - '\012', '\164', '\107', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\150', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\164', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\156', '\127', '\040', '\157', '\156', '\040', '\061', - '\012', '\160', '\153', '\112', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\111', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\132', '\170', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\156', '\117', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\110', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\152', '\123', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\144', '\114', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\142', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\153', '\117', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\172', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\127', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\170', '\115', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\160', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\164', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\163', '\110', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\111', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\153', '\131', '\040', '\153', '\141', '\040', '\061', - '\012', '\106', '\161', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\157', '\107', '\153', '\040', '\157', '\156', '\040', '\061', - '\012', '\110', '\156', '\143', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\154', '\127', '\040', '\154', '\145', '\040', '\061', - '\012', '\165', '\122', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\107', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\131', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\113', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\121', '\157', '\040', '\156', '\147', '\040', '\061', - '\012', '\113', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\116', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\144', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\107', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\114', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\162', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\166', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\150', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\132', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\104', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\120', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\147', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\103', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\127', '\167', '\040', '\157', '\167', '\040', '\061', - '\012', '\155', '\112', '\160', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\130', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\165', '\131', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\110', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\144', '\120', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\106', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\162', '\107', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\147', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\163', '\107', '\040', '\163', '\164', '\040', '\061', - '\012', '\126', '\147', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\101', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\164', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\154', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\155', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\147', '\171', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\170', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\125', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\151', '\126', '\162', '\040', '\151', '\156', '\040', '\061', - '\012', '\172', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\104', '\150', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\117', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\102', '\144', '\040', '\151', '\156', '\040', '\061', - '\012', '\143', '\161', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\142', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\153', '\163', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\120', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\146', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\132', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\104', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\156', '\112', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\143', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\127', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\170', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\120', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\170', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\157', '\120', '\166', '\040', '\157', '\156', '\040', '\061', - '\012', '\162', '\152', '\116', '\040', '\145', '\162', '\040', '\061', - '\012', '\157', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\167', '\110', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\150', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\163', '\125', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\152', '\127', '\040', '\151', '\152', '\040', '\061', - '\012', '\120', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\102', '\142', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\117', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\160', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\142', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\160', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\155', '\102', '\040', '\151', '\152', '\040', '\061', - '\012', '\116', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\131', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\131', '\142', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\120', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\171', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\102', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\107', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\161', '\170', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\146', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\142', '\126', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\153', '\131', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\127', '\154', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\102', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\117', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\107', '\160', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\120', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\163', '\130', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\164', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\103', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\157', '\131', '\040', '\157', '\156', '\040', '\061', - '\012', '\160', '\167', '\121', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\107', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\164', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\162', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\145', '\126', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\116', '\162', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\164', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\110', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\147', '\163', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\154', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\114', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\152', '\103', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\166', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\111', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\166', '\114', '\040', '\166', '\141', '\040', '\061', - '\012', '\110', '\150', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\115', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\115', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\131', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\126', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\131', '\156', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\155', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\152', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\121', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\121', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\116', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\146', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\152', '\123', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\102', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\112', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\113', '\156', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\107', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\132', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\107', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\127', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\107', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\163', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\150', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\150', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\170', '\123', '\040', '\156', '\171', '\040', '\061', - '\012', '\162', '\170', '\113', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\116', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\167', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\116', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\121', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\172', '\110', '\040', '\163', '\172', '\040', '\061', - '\012', '\122', '\166', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\160', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\130', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\150', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\154', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\156', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\152', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\112', '\152', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\164', '\112', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\167', '\130', '\040', '\151', '\156', '\040', '\061', - '\012', '\156', '\126', '\144', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\172', '\101', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\167', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\161', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\104', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\116', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\165', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\113', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\111', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\110', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\115', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\127', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\166', '\143', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\107', '\153', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\122', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\115', '\143', '\040', '\156', '\144', '\040', '\061', - '\012', '\132', '\150', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\154', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\125', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\110', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\103', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\121', '\146', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\153', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\143', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\124', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\152', '\106', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\170', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\116', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\114', '\147', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\144', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\112', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\143', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\130', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\167', '\167', '\121', '\040', '\167', '\141', '\040', '\061', - '\012', '\145', '\166', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\106', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\103', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\160', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\101', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\107', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\142', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\166', '\146', '\131', '\040', '\166', '\141', '\040', '\061', - '\012', '\157', '\130', '\144', '\040', '\157', '\156', '\040', '\061', - '\012', '\167', '\101', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\142', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\155', '\122', '\040', '\155', '\145', '\040', '\061', - '\012', '\162', '\172', '\116', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\143', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\102', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\147', '\123', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\121', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\112', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\132', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\146', '\101', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\155', '\130', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\116', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\126', '\170', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\122', '\166', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\132', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\147', '\101', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\162', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\152', '\127', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\120', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\152', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\125', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\155', '\111', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\110', '\160', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\115', '\160', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\153', '\117', '\040', '\153', '\141', '\040', '\061', - '\012', '\101', '\166', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\113', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\102', '\146', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\131', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\105', '\147', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\170', '\110', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\110', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\166', '\101', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\143', '\120', '\040', '\143', '\150', '\040', '\061', - '\012', '\102', '\170', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\150', '\123', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\170', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\102', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\127', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\102', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\167', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\153', '\112', '\040', '\153', '\141', '\040', '\061', - '\012', '\157', '\116', '\152', '\040', '\157', '\156', '\040', '\061', - '\012', '\125', '\147', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\146', '\120', '\040', '\146', '\157', '\040', '\061', - '\012', '\142', '\131', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\170', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\143', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\150', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\166', '\120', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\125', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\170', '\103', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\120', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\116', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\127', '\147', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\147', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\170', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\162', '\104', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\105', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\142', '\172', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\167', '\123', '\040', '\167', '\141', '\040', '\061', - '\012', '\155', '\114', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\115', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\106', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\146', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\122', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\155', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\156', '\161', '\122', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\160', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\110', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\124', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\152', '\107', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\141', '\124', '\040', '\141', '\156', '\040', '\061', - '\012', '\120', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\154', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\172', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\102', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\170', '\117', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\166', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\103', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\152', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\102', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\115', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\122', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\153', '\125', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\131', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\120', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\157', '\107', '\166', '\040', '\157', '\156', '\040', '\061', - '\012', '\152', '\114', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\165', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\143', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\107', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\166', '\107', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\112', '\144', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\151', '\152', '\110', '\040', '\151', '\156', '\040', '\061', - '\012', '\155', '\154', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\116', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\150', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\115', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\102', '\147', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\106', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\127', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\143', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\142', '\111', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\107', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\145', '\170', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\127', '\152', '\040', '\152', '\157', '\040', '\061', - '\012', '\160', '\121', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\143', '\110', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\117', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\164', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\162', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\102', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\154', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\156', '\110', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\146', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\130', '\160', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\125', '\170', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\113', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\127', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\156', '\161', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\103', '\170', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\112', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\161', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\150', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\125', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\125', '\170', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\112', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\166', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\150', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\166', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\141', '\120', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\112', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\104', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\111', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\153', '\146', '\123', '\040', '\153', '\141', '\040', '\061', - '\012', '\162', '\132', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\155', '\105', '\040', '\155', '\145', '\040', '\061', - '\012', '\163', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\155', '\122', '\040', '\155', '\145', '\040', '\061', - '\012', '\165', '\103', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\106', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\113', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\121', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\123', '\146', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\147', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\166', '\124', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\121', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\107', '\142', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\142', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\121', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\111', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\121', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\131', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\120', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\117', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\116', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\112', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\110', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\102', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\144', '\105', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\120', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\126', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\120', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\122', '\155', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\157', '\105', '\040', '\157', '\156', '\040', '\061', - '\012', '\150', '\156', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\166', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\157', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\155', '\104', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\104', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\147', '\111', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\126', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\164', '\104', '\150', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\110', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\153', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\170', '\124', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\131', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\124', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\125', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\114', '\154', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\152', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\163', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\143', '\146', '\115', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\142', '\107', '\040', '\142', '\145', '\040', '\061', - '\012', '\112', '\146', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\127', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\152', '\104', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\127', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\130', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\121', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\165', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\166', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\166', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\104', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\114', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\104', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\150', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\155', '\113', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\114', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\146', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\101', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\143', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\146', '\123', '\040', '\155', '\145', '\040', '\061', - '\012', '\144', '\162', '\114', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\171', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\121', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\162', '\114', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\143', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\160', '\130', '\040', '\160', '\162', '\040', '\061', - '\012', '\132', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\156', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\105', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\121', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\120', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\160', '\125', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\172', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\132', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\167', '\125', '\040', '\167', '\141', '\040', '\061', - '\012', '\122', '\152', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\113', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\146', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\167', '\165', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\166', '\115', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\151', '\127', '\040', '\151', '\156', '\040', '\061', - '\012', '\150', '\161', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\102', '\144', '\040', '\154', '\145', '\040', '\061', - '\012', '\132', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\160', '\127', '\040', '\160', '\162', '\040', '\061', - '\012', '\162', '\110', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\150', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\115', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\166', '\127', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\106', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\107', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\104', '\150', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\152', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\166', '\104', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\166', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\155', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\103', '\152', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\153', '\130', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\153', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\127', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\115', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\116', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\172', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\162', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\147', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\167', '\102', '\040', '\160', '\162', '\040', '\061', - '\012', '\112', '\170', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\143', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\131', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\124', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\112', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\172', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\171', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\166', '\126', '\040', '\166', '\141', '\040', '\061', - '\012', '\130', '\171', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\131', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\102', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\166', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\142', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\147', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\142', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\167', '\125', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\112', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\111', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\126', '\152', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\104', '\147', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\166', '\122', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\122', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\117', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\105', '\143', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\132', '\162', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\170', '\104', '\040', '\155', '\145', '\040', '\061', - '\012', '\111', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\102', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\124', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\103', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\156', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\147', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\160', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\112', '\143', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\150', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\114', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\131', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\160', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\163', '\146', '\105', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\170', '\122', '\040', '\167', '\141', '\040', '\061', - '\012', '\160', '\106', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\131', '\155', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\112', '\147', '\171', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\166', '\111', '\040', '\166', '\141', '\040', '\061', - '\012', '\116', '\143', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\102', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\162', '\126', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\166', '\130', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\131', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\116', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\121', '\151', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\167', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\120', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\166', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\153', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\155', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\144', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\113', '\152', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\163', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\104', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\166', '\106', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\127', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\131', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\131', '\171', '\040', '\155', '\145', '\040', '\061', - '\012', '\110', '\170', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\142', '\115', '\040', '\160', '\162', '\040', '\061', - '\012', '\110', '\167', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\127', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\116', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\121', '\152', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\161', '\104', '\040', '\141', '\156', '\040', '\061', - '\012', '\107', '\143', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\164', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\166', '\115', '\040', '\166', '\141', '\040', '\061', - '\012', '\110', '\150', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\127', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\131', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\154', '\117', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\156', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\115', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\113', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\157', '\126', '\040', '\157', '\156', '\040', '\061', - '\012', '\146', '\172', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\114', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\117', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\107', '\164', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\154', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\144', '\103', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\146', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\113', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\112', '\151', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\123', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\147', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\143', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\116', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\160', '\102', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\120', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\155', '\101', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\170', '\111', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\107', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\102', '\166', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\162', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\120', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\155', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\161', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\164', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\103', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\132', '\155', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\144', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\167', '\120', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\126', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\116', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\130', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\115', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\166', '\107', '\040', '\166', '\145', '\040', '\061', - '\012', '\126', '\160', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\130', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\154', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\131', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\106', '\142', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\143', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\121', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\164', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\145', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\107', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\161', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\161', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\114', '\146', '\040', '\160', '\157', '\040', '\061', - '\012', '\170', '\166', '\117', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\146', '\110', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\111', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\103', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\126', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\151', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\163', '\112', '\040', '\163', '\164', '\040', '\061', - '\012', '\126', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\132', '\156', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\162', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\122', '\166', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\172', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\142', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\153', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\153', '\120', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\172', '\123', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\130', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\170', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\106', '\167', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\110', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\162', '\102', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\116', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\110', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\107', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\105', '\147', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\164', '\126', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\167', '\121', '\040', '\167', '\141', '\040', '\061', - '\012', '\147', '\111', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\161', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\152', '\166', '\111', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\123', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\170', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\110', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\112', '\160', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\126', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\125', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\162', '\170', '\106', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\126', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\144', '\130', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\152', '\115', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\150', '\162', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\146', '\101', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\142', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\146', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\172', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\110', '\146', '\040', '\151', '\156', '\040', '\061', - '\012', '\152', '\170', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\155', '\120', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\166', '\111', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\155', '\110', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\164', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\166', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\172', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\126', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\130', '\155', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\130', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\146', '\104', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\103', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\142', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\132', '\150', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\143', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\154', '\124', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\172', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\160', '\120', '\040', '\145', '\162', '\040', '\061', - '\012', '\164', '\155', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\131', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\150', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\113', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\144', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\142', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\144', '\110', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\150', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\163', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\162', '\132', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\130', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\166', '\124', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\151', '\103', '\040', '\151', '\156', '\040', '\061', - '\012', '\147', '\153', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\112', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\160', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\120', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\102', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\122', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\122', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\147', '\101', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\115', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\110', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\103', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\111', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\167', '\161', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\132', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\161', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\161', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\157', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\121', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\165', '\165', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\172', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\121', '\147', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\106', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\147', '\110', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\147', '\116', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\103', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\131', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\156', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\142', '\123', '\040', '\142', '\145', '\040', '\061', - '\012', '\151', '\110', '\172', '\040', '\151', '\156', '\040', '\061', - '\012', '\153', '\107', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\167', '\123', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\104', '\155', '\040', '\163', '\164', '\040', '\061', - '\012', '\126', '\150', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\150', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\142', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\160', '\127', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\166', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\116', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\131', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\110', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\132', '\172', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\143', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\167', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\106', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\155', '\117', '\040', '\155', '\145', '\040', '\061', - '\012', '\102', '\166', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\147', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\131', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\167', '\106', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\167', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\105', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\172', '\117', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\120', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\156', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\107', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\153', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\130', '\162', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\112', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\161', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\146', '\147', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\143', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\126', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\167', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\130', '\154', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\112', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\106', '\156', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\160', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\125', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\102', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\144', '\154', '\127', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\166', '\126', '\040', '\166', '\141', '\040', '\061', - '\012', '\115', '\167', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\132', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\143', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\126', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\143', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\114', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\166', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\145', '\131', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\103', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\102', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\111', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\115', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\150', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\104', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\126', '\150', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\112', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\117', '\150', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\104', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\124', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\112', '\162', '\040', '\156', '\147', '\040', '\061', - '\012', '\132', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\167', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\147', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\144', '\126', '\040', '\163', '\164', '\040', '\061', - '\012', '\154', '\152', '\126', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\107', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\127', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\142', '\117', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\144', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\112', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\167', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\101', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\143', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\167', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\157', '\171', '\121', '\040', '\157', '\156', '\040', '\061', - '\012', '\154', '\120', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\131', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\162', '\107', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\153', '\124', '\040', '\153', '\141', '\040', '\061', - '\012', '\144', '\125', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\162', '\150', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\120', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\157', '\106', '\040', '\157', '\156', '\040', '\061', - '\012', '\150', '\131', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\131', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\120', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\104', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\167', '\127', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\114', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\141', '\102', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\104', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\113', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\146', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\115', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\142', '\114', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\167', '\127', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\172', '\110', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\111', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\144', '\116', '\040', '\163', '\172', '\040', '\061', - '\012', '\107', '\147', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\167', '\126', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\171', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\102', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\117', '\167', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\114', '\164', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\161', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\172', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\112', '\144', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\115', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\104', '\144', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\164', '\146', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\161', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\165', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\110', '\142', '\040', '\160', '\157', '\040', '\061', - '\012', '\166', '\122', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\160', '\131', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\127', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\142', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\132', '\167', '\040', '\157', '\156', '\040', '\061', - '\012', '\143', '\102', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\152', '\111', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\166', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\167', '\131', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\102', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\144', '\116', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\170', '\110', '\040', '\156', '\171', '\040', '\061', - '\012', '\146', '\170', '\110', '\040', '\146', '\157', '\040', '\061', - '\012', '\164', '\130', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\102', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\112', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\170', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\152', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\161', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\115', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\126', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\122', '\150', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\104', '\156', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\153', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\155', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\141', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\111', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\155', '\120', '\040', '\155', '\145', '\040', '\061', - '\012', '\142', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\155', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\154', '\103', '\040', '\154', '\145', '\040', '\061', - '\012', '\113', '\162', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\151', '\126', '\166', '\040', '\151', '\156', '\040', '\061', - '\012', '\132', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\120', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\125', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\144', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\121', '\172', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\157', '\125', '\040', '\157', '\156', '\040', '\061', - '\012', '\170', '\112', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\125', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\113', '\166', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\121', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\122', '\144', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\111', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\107', '\147', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\116', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\166', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\130', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\155', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\147', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\160', '\117', '\040', '\160', '\157', '\040', '\061', - '\012', '\164', '\105', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\146', '\114', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\131', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\127', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\172', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\121', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\124', '\164', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\126', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\143', '\156', '\040', '\143', '\150', '\040', '\061', - '\012', '\116', '\167', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\157', '\112', '\040', '\157', '\156', '\040', '\061', - '\012', '\166', '\104', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\150', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\112', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\120', '\170', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\162', '\106', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\154', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\153', '\130', '\040', '\153', '\141', '\040', '\061', - '\012', '\156', '\156', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\130', '\146', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\142', '\132', '\040', '\163', '\164', '\040', '\061', - '\012', '\131', '\171', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\102', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\111', '\154', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\160', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\116', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\132', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\123', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\102', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\166', '\132', '\040', '\166', '\141', '\040', '\061', - '\012', '\125', '\157', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\106', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\113', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\166', '\111', '\040', '\166', '\141', '\040', '\061', - '\012', '\132', '\154', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\144', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\160', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\152', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\160', '\112', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\172', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\147', '\161', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\150', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\150', '\162', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\165', '\131', '\040', '\165', '\156', '\040', '\061', - '\012', '\152', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\165', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\172', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\106', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\146', '\105', '\040', '\166', '\141', '\040', '\061', - '\012', '\111', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\131', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\143', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\166', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\124', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\144', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\165', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\110', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\122', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\147', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\120', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\113', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\160', '\101', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\153', '\111', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\123', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\170', '\127', '\040', '\155', '\145', '\040', '\061', - '\012', '\155', '\152', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\117', '\151', '\160', '\040', '\151', '\156', '\040', '\061', - '\012', '\167', '\171', '\131', '\040', '\167', '\141', '\040', '\061', - '\012', '\144', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\104', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\130', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\126', '\142', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\171', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\166', '\120', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\126', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\141', '\127', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\107', '\152', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\101', '\160', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\132', '\163', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\121', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\142', '\124', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\144', '\102', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\143', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\170', '\104', '\040', '\142', '\145', '\040', '\061', - '\012', '\166', '\154', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\152', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\170', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\110', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\165', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\167', '\130', '\040', '\153', '\141', '\040', '\061', - '\012', '\157', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\110', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\110', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\152', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\142', '\101', '\040', '\142', '\145', '\040', '\061', - '\012', '\122', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\123', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\126', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\121', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\155', '\113', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\156', '\101', '\040', '\141', '\156', '\040', '\061', - '\012', '\120', '\150', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\150', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\170', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\126', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\150', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\163', '\106', '\040', '\163', '\164', '\040', '\061', - '\012', '\164', '\131', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\146', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\161', '\130', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\112', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\160', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\124', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\160', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\131', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\102', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\105', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\111', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\144', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\116', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\117', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\130', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\166', '\132', '\040', '\166', '\141', '\040', '\061', - '\012', '\103', '\152', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\106', '\155', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\153', '\122', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\146', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\132', '\160', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\143', '\142', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\166', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\155', '\160', '\040', '\155', '\145', '\040', '\061', - '\012', '\147', '\106', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\106', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\106', '\152', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\152', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\152', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\142', '\124', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\155', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\106', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\104', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\106', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\107', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\150', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\164', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\172', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\112', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\115', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\147', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\167', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\107', '\156', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\142', '\120', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\144', '\123', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\110', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\114', '\147', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\166', '\101', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\125', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\152', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\104', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\107', '\146', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\142', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\123', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\117', '\147', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\107', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\164', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\167', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\115', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\166', '\125', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\162', '\107', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\115', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\144', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\153', '\132', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\152', '\114', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\120', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\144', '\162', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\170', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\131', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\110', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\162', '\120', '\040', '\145', '\162', '\040', '\061', - '\012', '\164', '\143', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\112', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\125', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\130', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\104', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\152', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\106', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\170', '\107', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\117', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\147', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\160', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\150', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\124', '\146', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\116', '\167', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\121', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\122', '\153', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\154', '\112', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\106', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\104', '\142', '\040', '\157', '\156', '\040', '\061', - '\012', '\154', '\163', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\132', '\142', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\103', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\170', '\116', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\121', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\113', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\117', '\166', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\170', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\110', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\167', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\107', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\122', '\167', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\166', '\110', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\126', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\155', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\144', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\112', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\104', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\126', '\150', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\114', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\166', '\103', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\126', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\146', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\164', '\121', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\150', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\161', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\171', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\132', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\113', '\171', '\040', '\142', '\145', '\040', '\061', - '\012', '\164', '\152', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\153', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\152', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\147', '\116', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\116', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\112', '\172', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\114', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\143', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\130', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\164', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\112', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\156', '\160', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\167', '\107', '\040', '\163', '\164', '\040', '\061', - '\012', '\163', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\145', '\112', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\143', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\132', '\162', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\120', '\147', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\131', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\154', '\111', '\040', '\154', '\145', '\040', '\061', - '\012', '\106', '\155', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\107', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\154', '\132', '\040', '\154', '\145', '\040', '\061', - '\012', '\103', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\121', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\114', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\167', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\121', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\146', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\122', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\141', '\125', '\157', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\160', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\120', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\110', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\127', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\102', '\161', '\040', '\142', '\145', '\040', '\061', - '\012', '\167', '\127', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\143', '\146', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\127', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\162', '\166', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\150', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\154', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\142', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\155', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\120', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\156', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\106', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\112', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\120', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\143', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\155', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\147', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\156', '\111', '\040', '\156', '\164', '\040', '\061', - '\012', '\161', '\117', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\171', '\125', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\121', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\125', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\102', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\116', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\105', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\171', '\160', '\104', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\170', '\114', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\145', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\153', '\102', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\102', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\125', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\121', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\167', '\117', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\162', '\154', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\124', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\127', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\170', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\110', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\143', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\112', '\163', '\040', '\157', '\156', '\040', '\061', - '\012', '\163', '\122', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\165', '\121', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\150', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\144', '\116', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\170', '\122', '\040', '\155', '\145', '\040', '\061', - '\012', '\130', '\163', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\120', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\153', '\132', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\104', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\162', '\111', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\156', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\160', '\101', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\132', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\156', '\144', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\132', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\162', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\123', '\142', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\164', '\127', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\160', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\110', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\143', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\120', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\143', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\113', '\147', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\141', '\125', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\143', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\144', '\115', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\172', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\162', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\144', '\126', '\040', '\144', '\145', '\040', '\061', - '\012', '\165', '\161', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\155', '\116', '\040', '\155', '\145', '\040', '\061', - '\012', '\117', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\114', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\112', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\107', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\115', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\124', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\116', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\110', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\165', '\127', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\170', '\114', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\170', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\126', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\116', '\142', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\170', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\143', '\166', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\103', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\153', '\152', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\146', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\143', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\160', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\120', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\154', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\111', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\170', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\152', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\132', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\106', '\153', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\147', '\127', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\161', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\154', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\103', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\150', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\117', '\167', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\113', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\107', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\103', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\155', '\121', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\156', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\165', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\171', '\123', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\152', '\130', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\117', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\112', '\155', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\132', '\166', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\124', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\117', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\115', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\124', '\160', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\127', '\164', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\170', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\102', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\164', '\116', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\124', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\156', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\104', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\123', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\122', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\125', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\102', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\152', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\111', '\171', '\040', '\163', '\164', '\040', '\061', - '\012', '\144', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\111', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\132', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\162', '\115', '\040', '\145', '\162', '\040', '\061', - '\012', '\165', '\117', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\147', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\162', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\120', '\147', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\105', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\153', '\102', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\123', '\147', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\152', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\117', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\155', '\127', '\040', '\155', '\145', '\040', '\061', - '\012', '\107', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\132', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\124', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\114', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\120', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\167', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\104', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\144', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\163', '\132', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\143', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\104', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\125', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\111', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\162', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\142', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\172', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\127', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\166', '\103', '\040', '\166', '\141', '\040', '\061', - '\012', '\112', '\162', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\170', '\111', '\040', '\156', '\171', '\040', '\061', - '\012', '\144', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\103', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\130', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\127', '\144', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\104', '\172', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\144', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\142', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\167', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\130', '\167', '\040', '\151', '\156', '\040', '\061', - '\012', '\146', '\131', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\165', '\121', '\040', '\165', '\156', '\040', '\061', - '\012', '\153', '\152', '\104', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\111', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\127', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\157', '\103', '\167', '\040', '\157', '\156', '\040', '\061', - '\012', '\132', '\143', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\144', '\116', '\040', '\144', '\145', '\040', '\061', - '\012', '\165', '\131', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\123', '\162', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\147', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\110', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\102', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\126', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\131', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\141', '\123', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\170', '\127', '\040', '\160', '\162', '\040', '\061', - '\012', '\155', '\156', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\102', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\124', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\106', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\167', '\115', '\040', '\167', '\141', '\040', '\061', - '\012', '\104', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\167', '\111', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\150', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\154', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\102', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\156', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\103', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\147', '\162', '\116', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\131', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\131', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\142', '\164', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\121', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\154', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\112', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\142', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\114', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\154', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\116', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\143', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\162', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\116', '\150', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\152', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\112', '\144', '\040', '\151', '\156', '\040', '\061', - '\012', '\144', '\114', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\121', '\156', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\146', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\150', '\153', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\150', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\114', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\130', '\147', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\113', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\152', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\112', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\126', '\170', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\102', '\170', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\156', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\153', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\154', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\127', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\144', '\125', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\164', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\111', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\145', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\162', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\116', '\150', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\104', '\160', '\040', '\160', '\157', '\040', '\061', - '\012', '\103', '\156', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\170', '\125', '\040', '\153', '\141', '\040', '\061', - '\012', '\102', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\130', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\102', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\102', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\115', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\153', '\170', '\122', '\040', '\153', '\141', '\040', '\061', - '\012', '\114', '\172', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\102', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\152', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\160', '\103', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\113', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\150', '\167', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\102', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\116', '\156', '\163', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\155', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\113', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\161', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\152', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\107', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\114', '\156', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\150', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\120', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\115', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\167', '\105', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\155', '\112', '\040', '\153', '\141', '\040', '\061', - '\012', '\121', '\163', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\154', '\103', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\121', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\166', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\153', '\116', '\040', '\153', '\141', '\040', '\061', - '\012', '\165', '\126', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\121', '\155', '\040', '\163', '\164', '\040', '\061', - '\012', '\165', '\112', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\172', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\130', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\162', '\111', '\040', '\145', '\162', '\040', '\061', - '\012', '\164', '\102', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\122', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\111', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\110', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\106', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\144', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\113', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\110', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\102', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\104', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\107', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\153', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\110', '\150', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\123', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\106', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\166', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\122', '\167', '\040', '\157', '\156', '\040', '\061', - '\012', '\170', '\147', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\152', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\104', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\143', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\143', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\146', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\107', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\107', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\170', '\126', '\040', '\146', '\157', '\040', '\061', - '\012', '\151', '\120', '\152', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\147', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\111', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\126', '\150', '\165', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\112', '\166', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\124', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\104', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\131', '\163', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\164', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\164', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\106', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\163', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\152', '\123', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\130', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\160', '\113', '\040', '\160', '\162', '\040', '\061', - '\012', '\156', '\104', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\113', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\131', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\132', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\170', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\161', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\124', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\160', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\105', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\161', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\110', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\104', '\153', '\160', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\161', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\161', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\170', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\170', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\124', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\167', '\103', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\121', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\121', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\125', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\163', '\121', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\107', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\113', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\167', '\102', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\106', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\167', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\162', '\102', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\160', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\154', '\122', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\144', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\145', '\106', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\171', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\167', '\124', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\103', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\147', '\115', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\164', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\161', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\130', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\144', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\170', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\143', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\155', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\121', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\107', '\154', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\105', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\166', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\166', '\106', '\040', '\163', '\164', '\040', '\061', - '\012', '\163', '\112', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\121', '\171', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\130', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\164', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\132', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\105', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\172', '\104', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\146', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\144', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\113', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\112', '\150', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\170', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\124', '\154', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\107', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\105', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\172', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\127', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\127', '\162', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\114', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\160', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\147', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\102', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\121', '\163', '\040', '\157', '\156', '\040', '\061', - '\012', '\153', '\142', '\132', '\040', '\153', '\141', '\040', '\061', - '\012', '\162', '\126', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\114', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\162', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\163', '\122', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\167', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\156', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\120', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\125', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\145', '\147', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\167', '\162', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\146', '\104', '\040', '\146', '\157', '\040', '\061', - '\012', '\167', '\171', '\110', '\040', '\167', '\141', '\040', '\061', - '\012', '\154', '\102', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\115', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\121', '\163', '\171', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\160', '\131', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\154', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\147', '\114', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\156', '\116', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\126', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\113', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\142', '\144', '\127', '\040', '\144', '\145', '\040', '\061', - '\012', '\154', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\150', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\116', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\111', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\110', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\162', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\162', '\122', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\172', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\147', '\102', '\040', '\160', '\162', '\040', '\061', - '\012', '\155', '\146', '\103', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\153', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\125', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\103', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\154', '\116', '\040', '\154', '\145', '\040', '\061', - '\012', '\102', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\143', '\105', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\122', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\150', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\107', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\106', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\166', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\155', '\102', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\150', '\150', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\170', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\127', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\115', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\127', '\144', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\127', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\157', '\121', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\127', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\156', '\165', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\127', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\166', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\167', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\155', '\112', '\040', '\163', '\164', '\040', '\061', - '\012', '\110', '\154', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\155', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\132', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\152', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\150', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\161', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\143', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\153', '\126', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\102', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\153', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\113', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\127', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\130', '\171', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\122', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\152', '\110', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\172', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\170', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\166', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\143', '\115', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\113', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\171', '\157', '\130', '\040', '\160', '\157', '\040', '\061', - '\012', '\170', '\162', '\124', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\127', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\127', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\123', '\144', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\146', '\122', '\040', '\144', '\145', '\040', '\061', - '\012', '\113', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\107', '\152', '\144', '\040', '\144', '\157', '\040', '\061', - '\012', '\121', '\142', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\171', '\113', '\040', '\156', '\171', '\040', '\061', - '\012', '\170', '\155', '\130', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\165', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\126', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\157', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\114', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\147', '\115', '\162', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\103', '\160', '\040', '\163', '\164', '\040', '\061', - '\012', '\142', '\107', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\130', '\157', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\124', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\153', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\124', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\116', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\130', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\126', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\111', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\156', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\167', '\103', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\123', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\157', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\104', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\144', '\125', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\155', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\153', '\116', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\131', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\131', '\147', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\154', '\112', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\106', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\123', '\170', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\106', '\172', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\124', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\111', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\141', '\152', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\131', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\162', '\113', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\172', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\145', '\111', '\171', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\146', '\113', '\040', '\167', '\141', '\040', '\061', - '\012', '\106', '\155', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\146', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\154', '\155', '\040', '\154', '\145', '\040', '\061', - '\012', '\103', '\172', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\161', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\106', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\126', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\156', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\103', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\145', '\105', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\110', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\116', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\155', '\130', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\116', '\153', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\106', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\113', '\146', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\142', '\147', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\131', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\147', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\147', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\150', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\162', '\104', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\167', '\101', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\171', '\115', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\172', '\103', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\121', '\144', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\143', '\110', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\142', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\172', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\123', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\131', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\161', '\147', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\131', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\111', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\160', '\107', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\126', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\152', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\166', '\120', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\132', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\106', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\146', '\125', '\040', '\153', '\141', '\040', '\061', - '\012', '\123', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\167', '\106', '\040', '\167', '\141', '\040', '\061', - '\012', '\121', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\127', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\152', '\161', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\126', '\146', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\143', '\112', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\167', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\102', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\104', '\144', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\127', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\160', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\162', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\143', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\110', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\111', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\144', '\103', '\040', '\163', '\164', '\040', '\061', - '\012', '\171', '\126', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\152', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\172', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\106', '\146', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\172', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\114', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\114', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\154', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\152', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\160', '\114', '\040', '\160', '\162', '\040', '\061', - '\012', '\143', '\112', '\162', '\040', '\143', '\150', '\040', '\061', - '\012', '\141', '\112', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\166', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\113', '\171', '\040', '\154', '\145', '\040', '\061', - '\012', '\145', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\170', '\114', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\103', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\122', '\144', '\040', '\163', '\164', '\040', '\061', - '\012', '\162', '\115', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\102', '\166', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\113', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\154', '\113', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\104', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\153', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\122', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\154', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\122', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\166', '\116', '\040', '\166', '\141', '\040', '\061', - '\012', '\156', '\170', '\111', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\103', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\131', '\142', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\105', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\153', '\116', '\040', '\153', '\141', '\040', '\061', - '\012', '\142', '\121', '\171', '\040', '\142', '\145', '\040', '\061', - '\012', '\162', '\104', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\152', '\112', '\040', '\144', '\145', '\040', '\061', - '\012', '\164', '\155', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\167', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\112', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\143', '\115', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\172', '\126', '\040', '\157', '\156', '\040', '\061', - '\012', '\155', '\114', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\145', '\132', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\106', '\150', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\114', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\161', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\130', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\164', '\147', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\121', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\104', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\157', '\104', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\147', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\156', '\104', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\110', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\111', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\114', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\164', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\144', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\103', '\155', '\040', '\157', '\167', '\040', '\061', - '\012', '\166', '\126', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\112', '\155', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\150', '\142', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\162', '\127', '\040', '\145', '\162', '\040', '\061', - '\012', '\156', '\170', '\116', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\126', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\165', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\147', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\102', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\125', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\146', '\154', '\110', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\127', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\152', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\125', '\167', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\131', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\164', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\147', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\130', '\172', '\040', '\157', '\156', '\040', '\061', - '\012', '\151', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\160', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\107', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\131', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\161', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\150', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\155', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\102', '\160', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\130', '\166', '\040', '\157', '\156', '\040', '\061', - '\012', '\154', '\147', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\112', '\146', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\172', '\160', '\123', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\143', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\167', '\121', '\040', '\167', '\141', '\040', '\061', - '\012', '\160', '\153', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\117', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\147', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\117', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\116', '\146', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\163', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\144', '\110', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\122', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\153', '\130', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\104', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\146', '\125', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\172', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\107', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\165', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\166', '\104', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\127', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\106', '\172', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\160', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\121', '\145', '\040', '\156', '\147', '\040', '\061', - '\012', '\132', '\155', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\131', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\166', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\141', '\121', '\154', '\040', '\141', '\156', '\040', '\061', - '\012', '\157', '\161', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\166', '\124', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\125', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\151', '\142', '\110', '\040', '\151', '\156', '\040', '\061', - '\012', '\152', '\166', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\127', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\147', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\145', '\106', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\130', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\131', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\132', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\160', '\104', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\102', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\114', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\167', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\171', '\113', '\040', '\156', '\171', '\040', '\061', - '\012', '\123', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\170', '\132', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\153', '\113', '\040', '\153', '\141', '\040', '\061', - '\012', '\171', '\112', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\164', '\152', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\120', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\132', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\122', '\162', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\156', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\170', '\131', '\040', '\156', '\171', '\040', '\061', - '\012', '\166', '\163', '\105', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\153', '\113', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\165', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\121', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\130', '\166', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\170', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\114', '\161', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\156', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\155', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\127', '\161', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\150', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\147', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\155', '\117', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\106', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\113', '\150', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\126', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\146', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\155', '\114', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\144', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\127', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\166', '\117', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\131', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\157', '\150', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\157', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\152', '\102', '\040', '\145', '\162', '\040', '\061', - '\012', '\104', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\141', '\127', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\154', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\144', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\164', '\167', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\132', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\121', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\144', '\167', '\104', '\040', '\144', '\145', '\040', '\061', - '\012', '\151', '\131', '\166', '\040', '\151', '\156', '\040', '\061', - '\012', '\101', '\167', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\147', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\157', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\162', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\126', '\170', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\167', '\102', '\040', '\154', '\145', '\040', '\061', - '\012', '\120', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\112', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\114', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\164', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\167', '\131', '\040', '\160', '\162', '\040', '\061', - '\012', '\115', '\152', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\162', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\130', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\105', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\160', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\156', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\121', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\152', '\117', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\116', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\154', '\111', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\115', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\112', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\126', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\166', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\110', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\142', '\113', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\127', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\124', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\156', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\170', '\117', '\040', '\156', '\171', '\040', '\061', - '\012', '\106', '\161', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\106', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\157', '\104', '\160', '\040', '\157', '\156', '\040', '\061', - '\012', '\152', '\125', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\110', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\120', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\110', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\111', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\172', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\106', '\142', '\040', '\163', '\164', '\040', '\061', - '\012', '\114', '\166', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\124', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\166', '\113', '\040', '\166', '\141', '\040', '\061', - '\012', '\103', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\171', '\101', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\105', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\144', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\161', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\142', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\110', '\144', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\150', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\126', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\110', '\167', '\040', '\165', '\156', '\040', '\061', - '\012', '\132', '\143', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\110', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\104', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\164', '\154', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\163', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\166', '\106', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\161', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\147', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\162', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\123', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\155', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\103', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\154', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\166', '\107', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\165', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\115', '\172', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\127', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\160', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\121', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\102', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\170', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\146', '\113', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\152', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\162', '\105', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\162', '\107', '\040', '\145', '\162', '\040', '\061', - '\012', '\103', '\146', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\171', '\132', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\127', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\103', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\132', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\147', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\162', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\125', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\157', '\111', '\171', '\040', '\157', '\156', '\040', '\061', - '\012', '\162', '\146', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\157', '\102', '\167', '\040', '\157', '\156', '\040', '\061', - '\012', '\171', '\171', '\126', '\040', '\156', '\171', '\040', '\061', - '\012', '\121', '\151', '\166', '\040', '\151', '\156', '\040', '\061', - '\012', '\144', '\113', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\104', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\147', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\116', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\144', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\166', '\131', '\040', '\157', '\156', '\040', '\061', - '\012', '\146', '\142', '\132', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\151', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\166', '\124', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\142', '\113', '\040', '\153', '\141', '\040', '\061', - '\012', '\115', '\146', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\160', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\110', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\161', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\153', '\126', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\127', '\160', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\120', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\114', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\157', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\114', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\160', '\114', '\040', '\160', '\162', '\040', '\061', - '\012', '\124', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\172', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\143', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\152', '\130', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\120', '\171', '\040', '\153', '\165', '\040', '\061', - '\012', '\146', '\144', '\102', '\040', '\144', '\145', '\040', '\061', - '\012', '\121', '\170', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\147', '\131', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\160', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\123', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\104', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\142', '\112', '\040', '\142', '\145', '\040', '\061', - '\012', '\171', '\146', '\117', '\040', '\156', '\171', '\040', '\061', - '\012', '\165', '\121', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\160', '\121', '\040', '\160', '\162', '\040', '\061', - '\012', '\144', '\130', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\167', '\120', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\124', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\112', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\127', '\171', '\040', '\142', '\145', '\040', '\061', - '\012', '\143', '\125', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\142', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\125', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\116', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\132', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\163', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\114', '\150', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\131', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\131', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\121', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\122', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\107', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\170', '\112', '\040', '\142', '\145', '\040', '\061', - '\012', '\152', '\106', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\114', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\104', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\161', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\111', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\102', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\121', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\112', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\146', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\124', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\142', '\130', '\040', '\153', '\141', '\040', '\061', - '\012', '\110', '\154', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\165', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\113', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\102', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\160', '\127', '\040', '\166', '\141', '\040', '\061', - '\012', '\131', '\152', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\127', '\156', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\132', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\144', '\132', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\115', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\132', '\146', '\040', '\160', '\151', '\040', '\061', - '\012', '\145', '\131', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\124', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\107', '\153', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\103', '\147', '\171', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\104', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\170', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\103', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\150', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\166', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\156', '\146', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\147', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\104', '\146', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\160', '\112', '\040', '\166', '\141', '\040', '\061', - '\012', '\127', '\160', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\103', '\142', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\147', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\120', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\103', '\160', '\040', '\157', '\156', '\040', '\061', - '\012', '\116', '\162', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\110', '\167', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\122', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\141', '\145', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\144', '\111', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\102', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\117', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\155', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\120', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\153', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\142', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\172', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\131', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\130', '\142', '\040', '\163', '\164', '\040', '\061', - '\012', '\171', '\121', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\147', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\130', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\116', '\170', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\141', '\117', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\146', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\121', '\170', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\167', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\152', '\126', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\152', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\164', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\147', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\115', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\116', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\120', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\106', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\146', '\110', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\132', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\120', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\147', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\102', '\142', '\040', '\142', '\151', '\040', '\061', - '\012', '\163', '\152', '\117', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\104', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\156', '\152', '\116', '\040', '\141', '\156', '\040', '\061', - '\012', '\157', '\150', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\161', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\172', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\162', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\152', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\106', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\121', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\142', '\105', '\040', '\166', '\151', '\040', '\061', - '\012', '\125', '\152', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\111', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\106', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\166', '\131', '\040', '\166', '\141', '\040', '\061', - '\012', '\123', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\154', '\110', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\143', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\105', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\150', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\126', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\146', '\110', '\040', '\142', '\145', '\040', '\061', - '\012', '\116', '\162', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\112', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\127', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\166', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\151', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\142', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\170', '\102', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\166', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\162', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\131', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\153', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\103', '\170', '\040', '\166', '\151', '\040', '\061', - '\012', '\132', '\142', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\127', '\160', '\040', '\155', '\145', '\040', '\061', - '\012', '\104', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\146', '\105', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\166', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\105', '\157', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\144', '\142', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\154', '\116', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\162', '\124', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\152', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\126', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\112', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\104', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\147', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\153', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\144', '\111', '\040', '\144', '\145', '\040', '\061', - '\012', '\107', '\143', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\130', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\121', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\147', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\113', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\120', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\110', '\143', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\152', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\107', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\171', '\105', '\040', '\156', '\171', '\040', '\061', - '\012', '\144', '\102', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\145', '\120', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\147', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\122', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\113', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\155', '\131', '\040', '\155', '\145', '\040', '\061', - '\012', '\150', '\147', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\144', '\107', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\166', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\162', '\106', '\040', '\145', '\162', '\040', '\061', - '\012', '\102', '\166', '\146', '\040', '\166', '\151', '\040', '\061', - '\012', '\171', '\166', '\104', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\126', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\131', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\167', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\161', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\113', '\160', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\112', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\114', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\144', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\143', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\116', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\154', '\113', '\040', '\154', '\145', '\040', '\061', - '\012', '\162', '\112', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\141', '\116', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\113', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\116', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\120', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\172', '\114', '\040', '\163', '\172', '\040', '\061', - '\012', '\112', '\144', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\122', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\116', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\164', '\156', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\156', '\111', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\132', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\132', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\115', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\111', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\166', '\110', '\040', '\166', '\141', '\040', '\061', - '\012', '\125', '\166', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\170', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\126', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\120', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\167', '\104', '\040', '\155', '\145', '\040', '\061', - '\012', '\152', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\120', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\146', '\126', '\040', '\166', '\141', '\040', '\061', - '\012', '\124', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\112', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\167', '\117', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\142', '\107', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\124', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\172', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\120', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\132', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\152', '\103', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\113', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\124', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\156', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\152', '\130', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\147', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\123', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\154', '\116', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\124', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\127', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\167', '\102', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\116', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\124', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\106', '\163', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\112', '\154', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\143', '\122', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\142', '\124', '\040', '\142', '\145', '\040', '\061', - '\012', '\106', '\143', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\170', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\167', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\123', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\143', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\142', '\126', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\123', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\154', '\142', '\102', '\040', '\154', '\145', '\040', '\061', - '\012', '\117', '\143', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\147', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\142', '\111', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\163', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\171', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\160', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\122', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\117', '\147', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\165', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\130', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\142', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\124', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\162', '\122', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\155', '\120', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\103', '\155', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\164', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\150', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\152', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\147', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\106', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\162', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\123', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\142', '\113', '\040', '\142', '\145', '\040', '\061', - '\012', '\155', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\162', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\144', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\143', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\106', '\142', '\040', '\151', '\156', '\040', '\061', - '\012', '\155', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\103', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\110', '\172', '\040', '\164', '\172', '\040', '\061', - '\012', '\150', '\152', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\164', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\155', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\154', '\104', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\122', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\103', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\170', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\111', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\163', '\131', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\162', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\164', '\116', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\142', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\114', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\106', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\170', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\146', '\122', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\112', '\162', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\105', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\167', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\126', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\147', '\116', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\101', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\120', '\152', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\160', '\120', '\040', '\151', '\156', '\040', '\061', - '\012', '\112', '\143', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\112', '\142', '\040', '\142', '\151', '\040', '\061', - '\012', '\152', '\170', '\111', '\040', '\151', '\152', '\040', '\061', - '\012', '\113', '\153', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\167', '\126', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\122', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\146', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\124', '\144', '\160', '\040', '\160', '\157', '\040', '\061', - '\012', '\167', '\105', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\114', '\166', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\104', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\161', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\144', '\103', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\170', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\125', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\164', '\121', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\172', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\124', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\124', '\154', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\121', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\106', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\147', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\103', '\153', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\113', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\167', '\123', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\122', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\153', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\121', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\114', '\160', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\101', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\144', '\155', '\107', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\113', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\125', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\130', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\172', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\172', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\116', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\147', '\131', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\150', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\146', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\132', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\130', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\124', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\116', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\130', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\143', '\105', '\040', '\143', '\150', '\040', '\061', - '\012', '\115', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\104', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\144', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\147', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\144', '\122', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\107', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\115', '\152', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\170', '\110', '\040', '\163', '\164', '\040', '\061', - '\012', '\120', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\146', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\117', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\116', '\166', '\170', '\040', '\166', '\151', '\040', '\061', - '\012', '\161', '\141', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\152', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\147', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\107', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\132', '\170', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\146', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\106', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\147', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\160', '\107', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\113', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\161', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\147', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\171', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\152', '\155', '\111', '\040', '\151', '\152', '\040', '\061', - '\012', '\126', '\147', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\103', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\126', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\165', '\105', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\143', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\102', '\172', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\126', '\154', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\121', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\154', '\162', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\126', '\164', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\110', '\163', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\152', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\172', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\131', '\171', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\170', '\121', '\040', '\167', '\141', '\040', '\061', - '\012', '\132', '\164', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\127', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\103', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\106', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\145', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\120', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\152', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\113', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\150', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\103', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\153', '\110', '\040', '\153', '\141', '\040', '\061', - '\012', '\171', '\152', '\104', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\124', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\170', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\166', '\113', '\040', '\166', '\151', '\040', '\061', - '\012', '\114', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\167', '\121', '\040', '\163', '\164', '\040', '\061', - '\012', '\144', '\124', '\153', '\040', '\144', '\151', '\040', '\061', - '\012', '\146', '\163', '\117', '\040', '\163', '\164', '\040', '\061', - '\012', '\154', '\152', '\105', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\152', '\115', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\121', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\120', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\155', '\103', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\163', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\104', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\112', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\132', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\150', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\116', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\127', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\106', '\167', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\110', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\106', '\156', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\144', '\114', '\040', '\144', '\145', '\040', '\061', - '\012', '\157', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\131', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\126', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\113', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\103', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\161', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\122', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\123', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\116', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\157', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\150', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\144', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\166', '\106', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\160', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\164', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\127', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\120', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\102', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\162', '\154', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\124', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\146', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\123', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\155', '\107', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\107', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\162', '\171', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\110', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\114', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\154', '\161', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\142', '\102', '\040', '\142', '\151', '\040', '\061', - '\012', '\151', '\131', '\162', '\040', '\151', '\156', '\040', '\061', - '\012', '\167', '\104', '\172', '\040', '\164', '\172', '\040', '\061', - '\012', '\170', '\163', '\112', '\040', '\163', '\164', '\040', '\061', - '\012', '\142', '\172', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\115', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\125', '\165', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\170', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\166', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\162', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\167', '\126', '\040', '\167', '\141', '\040', '\061', - '\012', '\147', '\120', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\126', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\104', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\126', '\162', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\113', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\157', '\132', '\152', '\040', '\157', '\156', '\040', '\061', - '\012', '\172', '\101', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\115', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\166', '\111', '\040', '\166', '\141', '\040', '\061', - '\012', '\106', '\167', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\126', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\167', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\114', '\155', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\130', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\130', '\150', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\154', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\142', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\120', '\170', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\156', '\120', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\121', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\104', '\143', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\152', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\152', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\115', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\131', '\142', '\040', '\151', '\156', '\040', '\061', - '\012', '\106', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\157', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\114', '\167', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\112', '\160', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\125', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\112', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\130', '\167', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\113', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\132', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\103', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\121', '\165', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\166', '\126', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\144', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\123', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\161', '\123', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\150', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\166', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\104', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\155', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\164', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\142', '\112', '\040', '\144', '\145', '\040', '\061', - '\012', '\106', '\146', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\166', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\130', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\106', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\160', '\122', '\040', '\151', '\152', '\040', '\061', - '\012', '\130', '\143', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\124', '\142', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\121', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\120', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\151', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\143', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\106', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\131', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\114', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\170', '\126', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\103', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\126', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\154', '\124', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\150', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\126', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\152', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\103', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\167', '\126', '\040', '\167', '\141', '\040', '\061', - '\012', '\171', '\142', '\132', '\040', '\142', '\145', '\040', '\061', - '\012', '\166', '\107', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\166', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\132', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\167', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\153', '\130', '\040', '\153', '\141', '\040', '\061', - '\012', '\116', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\130', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\144', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\131', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\131', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\123', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\171', '\143', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\156', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\103', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\156', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\164', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\167', '\117', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\130', '\165', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\102', '\167', '\040', '\155', '\142', '\040', '\061', - '\012', '\167', '\155', '\106', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\112', '\170', '\040', '\170', '\145', '\040', '\061', - '\012', '\144', '\130', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\145', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\102', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\130', '\142', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\143', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\153', '\123', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\117', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\121', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\166', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\102', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\103', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\113', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\126', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\132', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\102', '\166', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\170', '\122', '\040', '\146', '\157', '\040', '\061', - '\012', '\166', '\155', '\106', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\102', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\120', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\116', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\106', '\153', '\160', '\040', '\153', '\141', '\040', '\061', - '\012', '\131', '\171', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\125', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\172', '\120', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\155', '\121', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\143', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\166', '\132', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\142', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\131', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\120', '\155', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\167', '\106', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\150', '\122', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\160', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\150', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\153', '\103', '\040', '\153', '\141', '\040', '\061', - '\012', '\171', '\164', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\156', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\170', '\104', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\115', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\166', '\125', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\162', '\115', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\114', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\107', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\155', '\171', '\040', '\155', '\145', '\040', '\061', - '\012', '\150', '\143', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\113', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\112', '\170', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\142', '\154', '\127', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\121', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\142', '\105', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\131', '\155', '\040', '\163', '\164', '\040', '\061', - '\012', '\156', '\113', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\164', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\124', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\145', '\160', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\103', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\106', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\122', '\172', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\150', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\111', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\160', '\150', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\116', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\122', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\166', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\143', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\107', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\103', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\102', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\115', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\126', '\170', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\130', '\150', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\103', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\166', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\170', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\115', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\161', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\142', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\103', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\124', '\146', '\040', '\157', '\156', '\040', '\061', - '\012', '\153', '\142', '\127', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\152', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\150', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\131', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\143', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\166', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\157', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\126', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\172', '\110', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\142', '\123', '\040', '\142', '\145', '\040', '\061', - '\012', '\110', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\115', '\170', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\153', '\114', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\155', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\142', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\146', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\122', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\147', '\126', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\102', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\130', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\162', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\166', '\117', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\104', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\121', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\146', '\106', '\040', '\167', '\141', '\040', '\061', - '\012', '\150', '\132', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\147', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\156', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\130', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\145', '\116', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\170', '\123', '\040', '\146', '\157', '\040', '\061', - '\012', '\163', '\116', '\153', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\125', '\165', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\144', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\172', '\127', '\040', '\157', '\156', '\040', '\061', - '\012', '\130', '\172', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\112', '\146', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\106', '\164', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\172', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\132', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\157', '\110', '\172', '\040', '\157', '\156', '\040', '\061', - '\012', '\161', '\166', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\157', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\123', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\170', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\105', '\147', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\115', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\122', '\150', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\122', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\152', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\122', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\152', '\101', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\104', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\155', '\132', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\111', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\153', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\113', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\124', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\126', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\165', '\121', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\146', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\113', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\125', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\165', '\124', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\156', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\144', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\170', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\147', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\144', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\161', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\145', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\107', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\166', '\155', '\105', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\113', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\125', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\166', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\110', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\115', '\150', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\163', '\132', '\040', '\163', '\164', '\040', '\061', - '\012', '\126', '\172', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\113', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\120', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\147', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\150', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\117', '\147', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\167', '\130', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\131', '\171', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\172', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\130', '\152', '\040', '\152', '\157', '\040', '\061', - '\012', '\113', '\160', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\144', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\160', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\104', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\106', '\152', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\144', '\101', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\127', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\123', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\106', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\123', '\170', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\106', '\166', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\142', '\122', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\162', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\132', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\125', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\105', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\152', '\110', '\040', '\152', '\157', '\040', '\061', - '\012', '\163', '\104', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\125', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\156', '\111', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\117', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\103', '\152', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\142', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\120', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\122', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\166', '\107', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\171', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\167', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\104', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\122', '\147', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\152', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\112', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\122', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\164', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\166', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\113', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\106', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\116', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\142', '\160', '\102', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\131', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\107', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\146', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\124', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\146', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\172', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\125', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\144', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\130', '\152', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\115', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\124', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\154', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\113', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\166', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\115', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\115', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\127', '\154', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\172', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\155', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\117', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\142', '\142', '\111', '\040', '\142', '\145', '\040', '\061', - '\012', '\142', '\160', '\111', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\121', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\105', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\106', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\127', '\150', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\166', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\131', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\170', '\115', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\120', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\152', '\104', '\040', '\151', '\152', '\040', '\061', - '\012', '\126', '\167', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\131', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\143', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\131', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\112', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\124', '\153', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\150', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\170', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\130', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\147', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\156', '\111', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\171', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\102', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\123', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\161', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\131', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\162', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\110', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\162', '\113', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\142', '\110', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\121', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\154', '\170', '\106', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\147', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\107', '\150', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\150', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\163', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\121', '\147', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\144', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\166', '\113', '\040', '\166', '\141', '\040', '\061', - '\012', '\131', '\144', '\172', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\166', '\127', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\120', '\155', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\121', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\167', '\106', '\040', '\167', '\141', '\040', '\061', - '\012', '\131', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\163', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\131', '\147', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\126', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\170', '\114', '\040', '\156', '\171', '\040', '\061', - '\012', '\131', '\167', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\115', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\124', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\141', '\111', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\121', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\161', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\121', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\146', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\124', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\146', '\121', '\040', '\142', '\145', '\040', '\061', - '\012', '\113', '\146', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\130', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\131', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\157', '\143', '\040', '\162', '\157', '\040', '\061', - '\012', '\166', '\162', '\114', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\132', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\144', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\147', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\156', '\117', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\146', '\131', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\156', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\142', '\132', '\040', '\155', '\145', '\040', '\061', - '\012', '\147', '\142', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\152', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\160', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\156', '\160', '\105', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\157', '\127', '\160', '\040', '\157', '\156', '\040', '\061', - '\012', '\150', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\112', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\163', '\121', '\144', '\040', '\163', '\164', '\040', '\061', - '\012', '\132', '\166', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\104', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\114', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\167', '\106', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\102', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\113', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\130', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\111', '\165', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\147', '\102', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\112', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\147', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\116', '\150', '\040', '\150', '\157', '\040', '\061', - '\012', '\143', '\166', '\105', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\147', '\110', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\116', '\163', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\104', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\143', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\132', '\156', '\040', '\157', '\156', '\040', '\061', - '\012', '\165', '\125', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\154', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\144', '\110', '\040', '\144', '\145', '\040', '\061', - '\012', '\145', '\132', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\126', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\122', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\107', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\172', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\121', '\160', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\123', '\160', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\143', '\107', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\161', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\142', '\113', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\145', '\127', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\153', '\103', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\172', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\165', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\167', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\120', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\123', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\120', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\165', '\172', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\166', '\110', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\143', '\110', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\154', '\131', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\164', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\166', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\166', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\122', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\116', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\142', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\113', '\171', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\126', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\151', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\147', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\145', '\112', '\163', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\117', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\162', '\130', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\127', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\124', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\103', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\141', '\117', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\157', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\156', '\105', '\040', '\141', '\156', '\040', '\061', - '\012', '\106', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\162', '\124', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\110', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\171', '\144', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\153', '\126', '\040', '\144', '\145', '\040', '\061', - '\012', '\122', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\130', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\157', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\170', '\111', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\132', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\154', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\167', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\141', '\110', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\127', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\121', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\145', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\121', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\122', '\160', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\155', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\102', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\170', '\126', '\040', '\155', '\145', '\040', '\061', - '\012', '\115', '\166', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\143', '\122', '\154', '\040', '\143', '\150', '\040', '\061', - '\012', '\106', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\102', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\127', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\170', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\150', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\171', '\121', '\040', '\167', '\141', '\040', '\061', - '\012', '\165', '\103', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\162', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\151', '\171', '\121', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\163', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\114', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\166', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\123', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\162', '\114', '\040', '\145', '\162', '\040', '\061', - '\012', '\145', '\143', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\166', '\130', '\040', '\157', '\156', '\040', '\061', - '\012', '\125', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\126', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\163', '\160', '\130', '\040', '\163', '\164', '\040', '\061', - '\012', '\121', '\153', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\171', '\127', '\040', '\156', '\171', '\040', '\061', - '\012', '\162', '\102', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\144', '\103', '\040', '\144', '\145', '\040', '\061', - '\012', '\127', '\152', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\130', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\153', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\150', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\166', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\143', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\132', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\164', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\156', '\104', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\155', '\102', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\152', '\102', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\144', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\153', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\116', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\146', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\127', '\166', '\040', '\151', '\156', '\040', '\061', - '\012', '\127', '\164', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\146', '\105', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\132', '\142', '\040', '\144', '\145', '\040', '\061', - '\012', '\145', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\167', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\125', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\107', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\167', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\116', '\142', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\152', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\121', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\132', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\127', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\115', '\170', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\106', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\152', '\130', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\104', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\104', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\125', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\150', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\110', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\124', '\152', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\165', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\132', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\106', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\107', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\154', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\153', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\166', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\111', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\114', '\154', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\112', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\145', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\154', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\143', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\164', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\153', '\127', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\112', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\121', '\171', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\120', '\172', '\040', '\163', '\164', '\040', '\061', - '\012', '\142', '\155', '\117', '\040', '\155', '\145', '\040', '\061', - '\012', '\131', '\164', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\161', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\102', '\153', '\040', '\151', '\156', '\040', '\061', - '\012', '\165', '\172', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\116', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\122', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\165', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\102', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\166', '\101', '\040', '\166', '\141', '\040', '\061', - '\012', '\145', '\126', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\107', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\143', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\160', '\110', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\104', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\165', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\126', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\155', '\172', '\123', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\166', '\115', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\146', '\126', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\121', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\124', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\120', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\170', '\112', '\040', '\146', '\157', '\040', '\061', - '\012', '\161', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\112', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\163', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\122', '\142', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\143', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\126', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\161', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\152', '\113', '\040', '\163', '\164', '\040', '\061', - '\012', '\132', '\153', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\115', '\152', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\167', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\142', '\116', '\040', '\167', '\141', '\040', '\061', - '\012', '\155', '\166', '\113', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\114', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\114', '\142', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\167', '\152', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\121', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\113', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\155', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\142', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\113', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\161', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\126', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\170', '\143', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\105', '\167', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\107', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\142', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\110', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\130', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\117', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\110', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\152', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\121', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\106', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\131', '\142', '\040', '\157', '\156', '\040', '\061', - '\012', '\106', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\130', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\111', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\160', '\115', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\156', '\161', '\120', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\142', '\132', '\040', '\142', '\145', '\040', '\061', - '\012', '\150', '\163', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\152', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\132', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\120', '\170', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\102', '\172', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\142', '\111', '\040', '\160', '\162', '\040', '\061', - '\012', '\131', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\170', '\115', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\171', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\172', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\131', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\115', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\150', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\117', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\156', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\111', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\131', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\170', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\146', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\153', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\131', '\153', '\040', '\157', '\156', '\040', '\061', - '\012', '\154', '\122', '\147', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\117', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\126', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\101', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\113', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\150', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\103', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\166', '\131', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\151', '\126', '\040', '\151', '\156', '\040', '\061', - '\012', '\143', '\162', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\105', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\122', '\162', '\154', '\040', '\145', '\162', '\040', '\061', - '\012', '\132', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\142', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\115', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\132', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\106', '\170', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\172', '\153', '\123', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\113', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\142', '\111', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\110', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\172', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\115', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\153', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\113', '\155', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\110', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\160', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\143', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\141', '\127', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\144', '\123', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\110', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\155', '\111', '\040', '\166', '\141', '\040', '\061', - '\012', '\127', '\143', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\102', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\121', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\141', '\167', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\144', '\104', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\132', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\113', '\153', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\102', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\172', '\101', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\171', '\124', '\040', '\156', '\171', '\040', '\061', - '\012', '\161', '\145', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\160', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\171', '\107', '\040', '\156', '\171', '\040', '\061', - '\012', '\154', '\114', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\166', '\123', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\166', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\154', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\147', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\155', '\131', '\040', '\155', '\145', '\040', '\061', - '\012', '\155', '\152', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\161', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\113', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\110', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\122', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\114', '\160', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\120', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\153', '\122', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\170', '\123', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\127', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\116', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\113', '\143', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\112', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\170', '\167', '\132', '\040', '\167', '\141', '\040', '\061', - '\012', '\122', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\172', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\167', '\110', '\040', '\151', '\152', '\040', '\061', - '\012', '\104', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\114', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\130', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\146', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\152', '\130', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\172', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\125', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\123', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\170', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\170', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\126', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\172', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\125', '\143', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\141', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\146', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\163', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\151', '\110', '\160', '\040', '\151', '\156', '\040', '\061', - '\012', '\151', '\171', '\103', '\040', '\151', '\156', '\040', '\061', - '\012', '\124', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\112', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\112', '\147', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\112', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\116', '\154', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\144', '\101', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\111', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\152', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\172', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\112', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\156', '\161', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\107', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\151', '\121', '\172', '\040', '\151', '\156', '\040', '\061', - '\012', '\164', '\114', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\126', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\162', '\116', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\113', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\141', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\103', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\130', '\143', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\111', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\130', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\106', '\163', '\040', '\141', '\156', '\040', '\061', - '\012', '\151', '\167', '\115', '\040', '\151', '\156', '\040', '\061', - '\012', '\107', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\130', '\154', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\121', '\146', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\161', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\161', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\116', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\127', '\163', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\156', '\115', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\123', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\103', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\152', '\110', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\124', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\127', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\104', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\132', '\164', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\102', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\145', '\172', '\125', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\152', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\102', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\150', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\160', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\161', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\103', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\162', '\122', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\153', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\107', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\143', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\123', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\104', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\142', '\104', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\105', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\154', '\105', '\040', '\154', '\145', '\040', '\061', - '\012', '\122', '\152', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\106', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\120', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\152', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\143', '\105', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\123', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\104', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\165', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\120', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\112', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\160', '\101', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\107', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\130', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\114', '\143', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\112', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\121', '\172', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\121', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\150', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\144', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\165', '\131', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\153', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\166', '\101', '\040', '\151', '\152', '\040', '\061', - '\012', '\112', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\151', '\167', '\132', '\040', '\151', '\156', '\040', '\061', - '\012', '\172', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\150', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\155', '\126', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\113', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\146', '\131', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\125', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\161', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\167', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\130', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\110', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\102', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\120', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\112', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\111', '\160', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\113', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\146', '\114', '\040', '\166', '\141', '\040', '\061', - '\012', '\156', '\160', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\157', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\172', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\116', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\130', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\172', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\113', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\155', '\102', '\040', '\155', '\145', '\040', '\061', - '\012', '\127', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\130', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\131', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\121', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\144', '\161', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\144', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\162', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\170', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\171', '\146', '\114', '\040', '\156', '\171', '\040', '\061', - '\012', '\171', '\131', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\163', '\142', '\110', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\154', '\126', '\040', '\154', '\145', '\040', '\061', - '\012', '\165', '\113', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\150', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\114', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\146', '\104', '\040', '\151', '\152', '\040', '\061', - '\012', '\112', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\102', '\172', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\112', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\141', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\112', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\110', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\141', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\150', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\131', '\162', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\155', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\150', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\147', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\155', '\171', '\040', '\155', '\145', '\040', '\061', - '\012', '\122', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\163', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\150', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\150', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\111', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\111', '\142', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\143', '\106', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\122', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\156', '\126', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\145', '\161', '\117', '\040', '\145', '\162', '\040', '\061', - '\012', '\107', '\153', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\116', '\156', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\161', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\166', '\101', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\115', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\167', '\123', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\101', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\155', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\150', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\167', '\105', '\040', '\145', '\162', '\040', '\061', - '\012', '\130', '\156', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\125', '\150', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\156', '\122', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\146', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\160', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\170', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\107', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\122', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\122', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\164', '\143', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\102', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\122', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\146', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\150', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\103', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\161', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\172', '\123', '\040', '\154', '\145', '\040', '\061', - '\012', '\114', '\162', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\145', '\161', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\147', '\114', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\121', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\167', '\102', '\040', '\167', '\141', '\040', '\061', - '\012', '\154', '\107', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\116', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\144', '\125', '\040', '\163', '\164', '\040', '\061', - '\012', '\132', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\104', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\114', '\163', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\143', '\116', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\104', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\114', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\127', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\153', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\152', '\104', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\131', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\145', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\166', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\153', '\101', '\040', '\153', '\141', '\040', '\061', - '\012', '\116', '\166', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\152', '\115', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\147', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\130', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\124', '\154', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\122', '\150', '\172', '\040', '\150', '\141', '\040', '\061', - '\012', '\167', '\153', '\120', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\104', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\145', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\145', '\150', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\154', '\171', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\170', '\113', '\040', '\167', '\141', '\040', '\061', - '\012', '\144', '\120', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\106', '\144', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\143', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\106', '\170', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\166', '\122', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\161', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\115', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\142', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\160', '\120', '\040', '\153', '\141', '\040', '\061', - '\012', '\102', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\124', '\155', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\142', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\115', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\147', '\114', '\040', '\156', '\147', '\040', '\061', - '\012', '\145', '\146', '\125', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\121', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\143', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\105', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\155', '\126', '\040', '\155', '\145', '\040', '\061', - '\012', '\121', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\172', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\113', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\106', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\122', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\120', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\115', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\172', '\117', '\040', '\163', '\172', '\040', '\061', - '\012', '\157', '\106', '\167', '\040', '\157', '\156', '\040', '\061', - '\012', '\150', '\112', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\126', '\172', '\040', '\151', '\156', '\040', '\061', - '\012', '\157', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\150', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\117', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\121', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\130', '\146', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\143', '\116', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\147', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\124', '\166', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\111', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\165', '\132', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\172', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\131', '\154', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\110', '\146', '\040', '\157', '\156', '\040', '\061', - '\012', '\143', '\163', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\172', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\102', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\112', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\147', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\124', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\113', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\142', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\152', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\166', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\146', '\102', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\132', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\103', '\163', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\162', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\107', '\146', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\152', '\142', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\131', '\166', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\110', '\170', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\154', '\162', '\104', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\124', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\102', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\107', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\155', '\150', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\124', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\122', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\127', '\160', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\160', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\167', '\123', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\107', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\156', '\161', '\124', '\040', '\141', '\156', '\040', '\061', - '\012', '\125', '\152', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\152', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\115', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\113', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\132', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\116', '\152', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\131', '\154', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\126', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\147', '\132', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\110', '\143', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\143', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\115', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\154', '\167', '\103', '\040', '\154', '\145', '\040', '\061', - '\012', '\104', '\156', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\152', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\124', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\126', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\126', '\170', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\167', '\154', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\116', '\162', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\152', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\167', '\132', '\040', '\167', '\141', '\040', '\061', - '\012', '\164', '\156', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\112', '\167', '\040', '\157', '\156', '\040', '\061', - '\012', '\153', '\112', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\126', '\160', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\101', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\150', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\103', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\162', '\125', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\122', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\154', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\106', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\164', '\167', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\103', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\123', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\130', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\124', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\106', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\152', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\142', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\166', '\123', '\040', '\153', '\141', '\040', '\061', - '\012', '\123', '\155', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\102', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\116', '\172', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\121', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\114', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\126', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\125', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\132', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\111', '\171', '\040', '\145', '\147', '\040', '\061', - '\012', '\150', '\126', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\121', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\146', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\113', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\150', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\142', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\107', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\142', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\131', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\130', '\166', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\115', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\165', '\110', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\130', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\163', '\116', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\126', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\160', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\106', '\147', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\145', '\127', '\154', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\113', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\103', '\142', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\155', '\146', '\110', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\111', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\146', '\130', '\040', '\163', '\164', '\040', '\061', - '\012', '\163', '\156', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\152', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\155', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\147', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\105', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\117', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\110', '\152', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\165', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\132', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\154', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\125', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\114', '\163', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\153', '\123', '\040', '\151', '\152', '\040', '\061', - '\012', '\107', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\120', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\167', '\121', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\162', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\142', '\110', '\040', '\142', '\145', '\040', '\061', - '\012', '\147', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\166', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\112', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\122', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\147', '\120', '\040', '\156', '\147', '\040', '\061', - '\012', '\110', '\150', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\160', '\114', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\106', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\123', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\144', '\103', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\107', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\126', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\166', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\167', '\117', '\040', '\153', '\141', '\040', '\061', - '\012', '\112', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\127', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\121', '\153', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\156', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\162', '\104', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\166', '\131', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\146', '\111', '\040', '\142', '\145', '\040', '\061', - '\012', '\146', '\123', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\103', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\127', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\107', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\160', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\146', '\153', '\126', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\131', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\162', '\127', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\102', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\112', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\111', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\144', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\121', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\146', '\130', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\164', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\153', '\112', '\040', '\153', '\141', '\040', '\061', - '\012', '\121', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\113', '\163', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\172', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\142', '\167', '\111', '\040', '\167', '\141', '\040', '\061', - '\012', '\124', '\163', '\142', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\166', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\154', '\122', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\154', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\142', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\110', '\146', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\102', '\163', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\131', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\156', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\172', '\132', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\107', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\147', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\167', '\105', '\040', '\167', '\141', '\040', '\061', - '\012', '\117', '\171', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\121', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\122', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\155', '\130', '\040', '\155', '\145', '\040', '\061', - '\012', '\154', '\132', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\112', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\153', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\153', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\107', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\122', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\104', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\166', '\114', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\107', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\151', '\111', '\152', '\040', '\151', '\156', '\040', '\061', - '\012', '\107', '\172', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\114', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\152', '\125', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\121', '\166', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\150', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\126', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\156', '\115', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\130', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\162', '\103', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\156', '\114', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\146', '\110', '\040', '\146', '\157', '\040', '\061', - '\012', '\151', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\150', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\110', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\131', '\167', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\155', '\104', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\143', '\102', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\122', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\123', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\103', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\124', '\143', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\141', '\132', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\112', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\142', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\172', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\153', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\172', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\110', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\161', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\105', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\152', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\120', '\152', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\146', '\125', '\040', '\163', '\164', '\040', '\061', - '\012', '\142', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\130', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\166', '\123', '\040', '\166', '\141', '\040', '\061', - '\012', '\160', '\115', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\112', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\126', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\103', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\144', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\122', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\121', '\150', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\143', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\105', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\121', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\154', '\123', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\154', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\141', '\157', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\154', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\120', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\164', '\111', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\111', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\155', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\112', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\126', '\147', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\125', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\164', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\150', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\164', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\172', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\170', '\121', '\040', '\156', '\171', '\040', '\061', - '\012', '\156', '\162', '\120', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\110', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\143', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\161', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\106', '\147', '\171', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\102', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\165', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\156', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\120', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\106', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\152', '\144', '\112', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\107', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\131', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\152', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\124', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\117', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\114', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\115', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\157', '\126', '\154', '\040', '\157', '\156', '\040', '\061', - '\012', '\143', '\167', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\147', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\152', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\172', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\150', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\142', '\122', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\147', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\167', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\145', '\121', '\157', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\121', '\160', '\040', '\155', '\145', '\040', '\061', - '\012', '\113', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\166', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\154', '\112', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\126', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\163', '\120', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\144', '\121', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\132', '\142', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\150', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\127', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\161', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\146', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\154', '\171', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\160', '\114', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\105', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\115', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\122', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\163', '\103', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\154', '\123', '\040', '\154', '\145', '\040', '\061', - '\012', '\154', '\172', '\115', '\040', '\154', '\145', '\040', '\061', - '\012', '\120', '\146', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\165', '\112', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\126', '\146', '\040', '\156', '\171', '\040', '\061', - '\012', '\132', '\147', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\142', '\123', '\040', '\142', '\145', '\040', '\061', - '\012', '\157', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\166', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\143', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\167', '\125', '\040', '\167', '\141', '\040', '\061', - '\012', '\171', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\120', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\112', '\144', '\040', '\163', '\164', '\040', '\061', - '\012', '\142', '\155', '\116', '\040', '\155', '\145', '\040', '\061', - '\012', '\165', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\144', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\126', '\155', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\110', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\146', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\101', '\171', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\170', '\113', '\040', '\156', '\171', '\040', '\061', - '\012', '\110', '\167', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\111', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\132', '\147', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\164', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\114', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\153', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\115', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\106', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\102', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\110', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\121', '\172', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\131', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\166', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\166', '\131', '\040', '\166', '\141', '\040', '\061', - '\012', '\112', '\170', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\147', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\154', '\114', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\115', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\123', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\105', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\146', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\143', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\103', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\110', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\153', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\165', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\142', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\111', '\160', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\172', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\121', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\106', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\120', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\104', '\160', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\112', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\160', '\116', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\172', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\167', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\157', '\121', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\154', '\103', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\115', '\150', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\124', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\125', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\150', '\147', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\143', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\160', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\102', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\111', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\120', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\163', '\115', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\130', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\162', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\110', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\146', '\127', '\040', '\156', '\171', '\040', '\061', - '\012', '\131', '\171', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\131', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\166', '\122', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\122', '\172', '\040', '\163', '\164', '\040', '\061', - '\012', '\113', '\171', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\156', '\170', '\122', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\144', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\116', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\142', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\145', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\143', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\123', '\167', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\111', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\166', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\150', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\113', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\144', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\160', '\120', '\040', '\160', '\162', '\040', '\061', - '\012', '\153', '\121', '\171', '\040', '\153', '\141', '\040', '\061', - '\012', '\102', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\146', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\120', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\101', '\157', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\154', '\112', '\040', '\154', '\145', '\040', '\061', - '\012', '\131', '\156', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\141', '\146', '\115', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\166', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\145', '\110', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\121', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\145', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\160', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\146', '\127', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\144', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\142', '\116', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\102', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\165', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\105', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\146', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\157', '\110', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\145', '\106', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\120', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\153', '\104', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\132', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\143', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\127', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\125', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\121', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\132', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\152', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\147', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\152', '\121', '\040', '\157', '\156', '\040', '\061', - '\012', '\113', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\106', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\115', '\144', '\040', '\163', '\164', '\040', '\061', - '\012', '\115', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\172', '\171', '\040', '\163', '\172', '\040', '\061', - '\012', '\116', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\167', '\124', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\163', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\116', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\125', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\156', '\122', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\122', '\154', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\102', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\146', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\126', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\107', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\154', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\146', '\107', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\126', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\144', '\105', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\172', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\150', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\166', '\114', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\172', '\121', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\126', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\132', '\170', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\114', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\124', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\155', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\154', '\104', '\040', '\154', '\145', '\040', '\061', - '\012', '\113', '\143', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\166', '\131', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\121', '\154', '\040', '\143', '\150', '\040', '\061', - '\012', '\111', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\107', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\147', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\153', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\110', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\103', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\102', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\112', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\143', '\127', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\130', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\150', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\152', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\154', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\170', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\172', '\162', '\105', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\153', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\110', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\172', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\102', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\146', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\114', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\125', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\153', '\104', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\114', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\131', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\113', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\111', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\162', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\106', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\163', '\142', '\103', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\107', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\130', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\120', '\153', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\103', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\103', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\167', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\112', '\147', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\154', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\102', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\111', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\144', '\121', '\040', '\157', '\156', '\040', '\061', - '\012', '\121', '\156', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\125', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\112', '\160', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\170', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\132', '\153', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\153', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\122', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\143', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\115', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\145', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\110', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\170', '\125', '\040', '\142', '\145', '\040', '\061', - '\012', '\170', '\144', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\131', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\154', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\122', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\107', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\106', '\172', '\040', '\172', '\145', '\040', '\061', - '\012', '\161', '\117', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\147', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\157', '\107', '\155', '\040', '\157', '\156', '\040', '\061', - '\012', '\130', '\156', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\131', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\165', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\116', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\103', '\160', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\150', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\121', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\167', '\101', '\040', '\166', '\141', '\040', '\061', - '\012', '\126', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\127', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\110', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\160', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\143', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\146', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\141', '\130', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\123', '\167', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\170', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\107', '\161', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\125', '\170', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\144', '\113', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\132', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\167', '\112', '\040', '\155', '\145', '\040', '\061', - '\012', '\143', '\166', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\154', '\142', '\132', '\040', '\154', '\145', '\040', '\061', - '\012', '\120', '\172', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\144', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\112', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\127', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\130', '\171', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\165', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\130', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\170', '\156', '\114', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\115', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\116', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\121', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\106', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\160', '\124', '\040', '\166', '\141', '\040', '\061', - '\012', '\116', '\167', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\171', '\161', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\150', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\131', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\166', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\111', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\161', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\142', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\115', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\124', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\122', '\150', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\127', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\114', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\163', '\104', '\040', '\163', '\164', '\040', '\061', - '\012', '\165', '\115', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\114', '\155', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\153', '\125', '\040', '\153', '\141', '\040', '\061', - '\012', '\154', '\101', '\170', '\040', '\154', '\145', '\040', '\061', - '\012', '\113', '\172', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\113', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\121', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\171', '\130', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\146', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\160', '\125', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\167', '\112', '\040', '\167', '\141', '\040', '\061', - '\012', '\101', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\111', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\165', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\146', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\102', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\164', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\122', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\124', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\125', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\124', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\125', '\151', '\167', '\040', '\151', '\156', '\040', '\061', - '\012', '\112', '\154', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\103', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\161', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\154', '\132', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\117', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\154', '\113', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\146', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\165', '\112', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\153', '\120', '\040', '\153', '\141', '\040', '\061', - '\012', '\107', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\154', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\171', '\104', '\040', '\156', '\171', '\040', '\061', - '\012', '\152', '\150', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\162', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\104', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\171', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\160', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\155', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\127', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\120', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\125', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\142', '\122', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\144', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\121', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\155', '\104', '\040', '\155', '\145', '\040', '\061', - '\012', '\112', '\153', '\152', '\040', '\153', '\141', '\040', '\061', - '\012', '\152', '\124', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\131', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\132', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\153', '\121', '\040', '\145', '\162', '\040', '\061', - '\012', '\142', '\104', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\123', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\130', '\162', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\132', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\116', '\147', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\161', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\127', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\166', '\113', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\112', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\115', '\167', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\144', '\112', '\040', '\144', '\145', '\040', '\061', - '\012', '\151', '\167', '\105', '\040', '\151', '\156', '\040', '\061', - '\012', '\142', '\170', '\130', '\040', '\142', '\145', '\040', '\061', - '\012', '\152', '\170', '\124', '\040', '\151', '\152', '\040', '\061', - '\012', '\131', '\143', '\156', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\115', '\146', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\161', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\122', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\131', '\171', '\040', '\167', '\141', '\040', '\061', - '\012', '\124', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\162', '\116', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\126', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\155', '\122', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\106', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\171', '\121', '\040', '\156', '\171', '\040', '\061', - '\012', '\170', '\145', '\111', '\040', '\145', '\162', '\040', '\061', - '\012', '\127', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\154', '\171', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\104', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\131', '\172', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\170', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\167', '\114', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\147', '\113', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\163', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\106', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\130', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\105', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\103', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\130', '\162', '\144', '\040', '\145', '\162', '\040', '\061', - '\012', '\122', '\172', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\146', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\164', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\124', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\165', '\146', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\152', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\154', '\127', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\150', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\167', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\156', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\146', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\113', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\153', '\106', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\121', '\165', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\130', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\126', '\153', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\106', '\150', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\111', '\165', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\124', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\115', '\167', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\166', '\124', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\113', '\160', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\122', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\130', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\166', '\172', '\114', '\040', '\163', '\172', '\040', '\061', - '\012', '\112', '\143', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\124', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\144', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\122', '\142', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\112', '\162', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\122', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\127', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\156', '\105', '\040', '\141', '\156', '\040', '\061', - '\012', '\113', '\143', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\160', '\111', '\040', '\160', '\162', '\040', '\061', - '\012', '\151', '\116', '\167', '\040', '\151', '\156', '\040', '\061', - '\012', '\165', '\152', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\110', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\110', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\166', '\112', '\040', '\166', '\141', '\040', '\061', - '\012', '\156', '\161', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\160', '\105', '\040', '\167', '\141', '\040', '\061', - '\012', '\110', '\167', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\172', '\111', '\040', '\163', '\172', '\040', '\061', - '\012', '\103', '\147', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\127', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\165', '\126', '\040', '\165', '\156', '\040', '\061', - '\012', '\142', '\152', '\116', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\121', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\170', '\105', '\040', '\142', '\145', '\040', '\061', - '\012', '\165', '\126', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\162', '\154', '\040', '\145', '\162', '\040', '\061', - '\012', '\114', '\162', '\170', '\040', '\145', '\162', '\040', '\061', - '\012', '\111', '\167', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\141', '\161', '\102', '\040', '\141', '\156', '\040', '\061', - '\012', '\126', '\143', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\167', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\107', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\120', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\147', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\163', '\144', '\040', '\163', '\164', '\040', '\061', - '\012', '\126', '\170', '\163', '\040', '\163', '\172', '\040', '\061', - '\012', '\113', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\123', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\157', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\160', '\121', '\040', '\155', '\145', '\040', '\061', - '\012', '\113', '\143', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\167', '\104', '\040', '\163', '\164', '\040', '\061', - '\012', '\162', '\132', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\131', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\112', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\127', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\166', '\117', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\106', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\131', '\152', '\170', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\160', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\126', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\116', '\155', '\040', '\163', '\164', '\040', '\061', - '\012', '\154', '\113', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\166', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\170', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\160', '\165', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\170', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\101', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\155', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\154', '\112', '\040', '\154', '\145', '\040', '\061', - '\012', '\155', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\103', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\132', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\141', '\106', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\157', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\120', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\167', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\103', '\143', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\162', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\103', '\144', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\114', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\170', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\115', '\170', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\143', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\126', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\153', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\170', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\166', '\124', '\040', '\166', '\141', '\040', '\061', - '\012', '\115', '\154', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\164', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\107', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\152', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\152', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\167', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\113', '\170', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\106', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\150', '\132', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\172', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\164', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\166', '\113', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\126', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\120', '\167', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\161', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\171', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\103', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\152', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\122', '\142', '\040', '\154', '\145', '\040', '\061', - '\012', '\164', '\146', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\132', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\160', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\160', '\146', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\164', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\161', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\147', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\157', '\124', '\040', '\157', '\156', '\040', '\061', - '\012', '\172', '\123', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\170', '\123', '\040', '\167', '\141', '\040', '\061', - '\012', '\127', '\162', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\117', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\114', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\130', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\144', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\161', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\130', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\102', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\151', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\143', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\125', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\152', '\130', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\142', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\167', '\123', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\126', '\155', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\167', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\163', '\115', '\040', '\163', '\164', '\040', '\061', - '\012', '\120', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\120', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\167', '\107', '\040', '\167', '\141', '\040', '\061', - '\012', '\130', '\167', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\167', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\155', '\131', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\166', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\146', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\142', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\116', '\146', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\160', '\110', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\161', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\143', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\147', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\121', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\166', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\114', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\143', '\145', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\102', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\144', '\126', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\156', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\115', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\103', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\146', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\103', '\170', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\146', '\117', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\141', '\112', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\114', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\155', '\130', '\040', '\155', '\145', '\040', '\061', - '\012', '\131', '\146', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\144', '\112', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\105', '\141', '\171', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\123', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\152', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\116', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\116', '\166', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\153', '\130', '\040', '\153', '\141', '\040', '\061', - '\012', '\112', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\166', '\114', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\160', '\110', '\040', '\160', '\162', '\040', '\061', - '\012', '\160', '\170', '\117', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\120', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\127', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\142', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\157', '\105', '\040', '\157', '\156', '\040', '\061', - '\012', '\147', '\164', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\146', '\106', '\040', '\142', '\145', '\040', '\061', - '\012', '\155', '\166', '\127', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\163', '\115', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\114', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\167', '\110', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\103', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\114', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\153', '\130', '\167', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\126', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\150', '\103', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\125', '\153', '\040', '\157', '\156', '\040', '\061', - '\012', '\172', '\143', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\115', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\144', '\162', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\146', '\117', '\040', '\167', '\141', '\040', '\061', - '\012', '\171', '\106', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\130', '\141', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\115', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\146', '\103', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\167', '\103', '\040', '\167', '\141', '\040', '\061', - '\012', '\157', '\124', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\153', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\145', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\170', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\152', '\107', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\107', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\155', '\130', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\131', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\153', '\111', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\104', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\166', '\103', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\164', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\120', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\160', '\116', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\116', '\162', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\156', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\153', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\111', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\146', '\116', '\040', '\167', '\141', '\040', '\061', - '\012', '\126', '\150', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\153', '\121', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\170', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\111', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\131', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\161', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\166', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\163', '\170', '\125', '\040', '\163', '\164', '\040', '\061', - '\012', '\114', '\161', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\146', '\111', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\171', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\166', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\123', '\144', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\165', '\131', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\147', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\130', '\141', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\102', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\160', '\131', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\152', '\127', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\113', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\152', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\152', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\141', '\152', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\130', '\144', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\110', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\150', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\107', '\155', '\040', '\145', '\162', '\040', '\061', - '\012', '\121', '\164', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\162', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\120', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\122', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\117', '\147', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\114', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\121', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\150', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\167', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\127', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\170', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\156', '\160', '\111', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\156', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\103', '\144', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\146', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\160', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\142', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\156', '\167', '\116', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\114', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\127', '\143', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\126', '\166', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\126', '\153', '\170', '\040', '\153', '\141', '\040', '\061', - '\012', '\144', '\155', '\125', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\107', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\147', '\112', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\106', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\103', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\166', '\127', '\040', '\154', '\145', '\040', '\061', - '\012', '\123', '\166', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\112', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\165', '\132', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\111', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\126', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\144', '\117', '\040', '\144', '\145', '\040', '\061', - '\012', '\154', '\124', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\104', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\124', '\172', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\160', '\103', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\121', '\153', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\160', '\131', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\121', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\151', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\121', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\167', '\125', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\126', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\152', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\130', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\130', '\146', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\147', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\153', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\152', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\162', '\112', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\167', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\164', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\110', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\110', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\104', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\142', '\105', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\146', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\122', '\152', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\155', '\131', '\040', '\155', '\145', '\040', '\061', - '\012', '\167', '\131', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\106', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\127', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\171', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\147', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\155', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\146', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\172', '\156', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\147', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\165', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\163', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\157', '\127', '\170', '\040', '\157', '\156', '\040', '\061', - '\012', '\120', '\152', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\112', '\144', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\130', '\155', '\160', '\040', '\155', '\145', '\040', '\061', - '\012', '\163', '\147', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\103', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\164', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\104', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\142', '\121', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\165', '\115', '\040', '\165', '\156', '\040', '\061', - '\012', '\146', '\114', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\116', '\150', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\156', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\144', '\123', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\127', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\106', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\106', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\127', '\167', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\114', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\161', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\155', '\104', '\040', '\163', '\172', '\040', '\061', - '\012', '\107', '\171', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\142', '\153', '\122', '\040', '\153', '\141', '\040', '\061', - '\012', '\154', '\121', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\120', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\167', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\110', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\171', '\114', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\170', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\162', '\103', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\172', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\146', '\123', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\154', '\126', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\153', '\112', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\156', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\125', '\161', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\165', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\125', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\102', '\142', '\040', '\163', '\164', '\040', '\061', - '\012', '\116', '\150', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\150', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\127', '\160', '\040', '\144', '\145', '\040', '\061', - '\012', '\131', '\166', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\122', '\170', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\172', '\107', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\165', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\166', '\104', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\152', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\132', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\112', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\156', '\117', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\143', '\101', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\146', '\113', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\152', '\123', '\040', '\151', '\152', '\040', '\061', - '\012', '\116', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\146', '\102', '\040', '\144', '\145', '\040', '\061', - '\012', '\121', '\163', '\142', '\040', '\163', '\164', '\040', '\061', - '\012', '\144', '\130', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\122', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\105', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\107', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\110', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\166', '\101', '\040', '\166', '\141', '\040', '\061', - '\012', '\102', '\146', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\126', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\163', '\131', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\126', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\152', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\130', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\151', '\113', '\152', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\141', '\105', '\040', '\141', '\156', '\040', '\061', - '\012', '\103', '\146', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\115', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\147', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\147', '\101', '\040', '\156', '\147', '\040', '\061', - '\012', '\151', '\167', '\112', '\040', '\151', '\156', '\040', '\061', - '\012', '\166', '\107', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\164', '\146', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\152', '\110', '\040', '\154', '\145', '\040', '\061', - '\012', '\172', '\107', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\155', '\113', '\040', '\155', '\145', '\040', '\061', - '\012', '\156', '\125', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\122', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\107', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\126', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\123', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\124', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\161', '\105', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\156', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\126', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\126', '\163', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\116', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\116', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\156', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\112', '\163', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\166', '\112', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\154', '\115', '\040', '\154', '\145', '\040', '\061', - '\012', '\112', '\172', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\122', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\143', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\126', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\127', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\110', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\117', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\151', '\125', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\127', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\161', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\106', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\156', '\103', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\131', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\126', '\163', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\164', '\115', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\150', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\164', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\143', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\167', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\144', '\130', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\163', '\117', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\122', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\156', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\155', '\120', '\040', '\153', '\141', '\040', '\061', - '\012', '\130', '\164', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\107', '\166', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\126', '\154', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\144', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\144', '\105', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\132', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\102', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\110', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\153', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\170', '\112', '\040', '\166', '\141', '\040', '\061', - '\012', '\154', '\162', '\101', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\162', '\124', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\152', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\142', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\124', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\155', '\126', '\040', '\155', '\145', '\040', '\061', - '\012', '\162', '\104', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\144', '\116', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\107', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\141', '\126', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\116', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\130', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\162', '\107', '\163', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\141', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\162', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\112', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\146', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\170', '\105', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\166', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\122', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\103', '\160', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\112', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\142', '\121', '\040', '\142', '\145', '\040', '\061', - '\012', '\130', '\172', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\106', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\146', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\167', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\117', '\141', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\163', '\131', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\144', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\147', '\155', '\117', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\107', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\122', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\161', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\101', '\147', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\167', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\156', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\126', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\104', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\107', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\124', '\166', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\155', '\116', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\164', '\105', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\172', '\120', '\040', '\163', '\172', '\040', '\061', - '\012', '\126', '\163', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\107', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\120', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\171', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\170', '\106', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\104', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\110', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\170', '\132', '\040', '\146', '\157', '\040', '\061', - '\012', '\163', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\155', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\162', '\104', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\110', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\150', '\155', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\144', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\167', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\165', '\112', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\120', '\153', '\040', '\163', '\164', '\040', '\061', - '\012', '\130', '\152', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\125', '\161', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\147', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\147', '\111', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\106', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\116', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\150', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\170', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\123', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\122', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\167', '\113', '\040', '\167', '\141', '\040', '\061', - '\012', '\146', '\155', '\102', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\162', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\123', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\120', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\110', '\142', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\123', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\115', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\126', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\153', '\113', '\040', '\153', '\141', '\040', '\061', - '\012', '\130', '\144', '\163', '\040', '\144', '\145', '\040', '\061', - '\012', '\171', '\142', '\102', '\040', '\142', '\145', '\040', '\061', - '\012', '\147', '\160', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\143', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\170', '\114', '\040', '\160', '\162', '\040', '\061', - '\012', '\147', '\120', '\155', '\040', '\156', '\147', '\040', '\061', - '\012', '\102', '\160', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\160', '\102', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\154', '\112', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\153', '\103', '\040', '\153', '\141', '\040', '\061', - '\012', '\171', '\160', '\120', '\040', '\160', '\162', '\040', '\061', - '\012', '\116', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\147', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\105', '\161', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\122', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\125', '\142', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\150', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\112', '\144', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\166', '\116', '\040', '\166', '\141', '\040', '\061', - '\012', '\121', '\146', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\104', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\163', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\153', '\130', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\160', '\122', '\040', '\160', '\162', '\040', '\061', - '\012', '\160', '\152', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\153', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\115', '\146', '\040', '\145', '\162', '\040', '\061', - '\012', '\112', '\163', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\117', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\104', '\161', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\156', '\142', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\166', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\106', '\156', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\160', '\126', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\164', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\105', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\150', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\117', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\130', '\171', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\144', '\125', '\040', '\144', '\145', '\040', '\061', - '\012', '\155', '\104', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\151', '\126', '\153', '\040', '\151', '\156', '\040', '\061', - '\012', '\110', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\160', '\132', '\040', '\160', '\157', '\040', '\061', - '\012', '\141', '\145', '\125', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\152', '\132', '\040', '\163', '\164', '\040', '\061', - '\012', '\163', '\107', '\160', '\040', '\163', '\164', '\040', '\061', - '\012', '\127', '\161', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\161', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\120', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\130', '\172', '\040', '\163', '\164', '\040', '\061', - '\012', '\170', '\166', '\120', '\040', '\166', '\141', '\040', '\061', - '\012', '\127', '\142', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\152', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\150', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\161', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\131', '\146', '\040', '\144', '\145', '\040', '\061', - '\012', '\160', '\106', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\163', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\150', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\154', '\105', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\161', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\156', '\162', '\040', '\141', '\156', '\040', '\061', - '\012', '\106', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\110', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\144', '\102', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\110', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\110', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\161', '\112', '\040', '\141', '\156', '\040', '\061', - '\012', '\157', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\142', '\171', '\040', '\142', '\145', '\040', '\061', - '\012', '\164', '\142', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\123', '\146', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\150', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\110', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\160', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\121', '\172', '\160', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\151', '\125', '\040', '\151', '\156', '\040', '\061', - '\012', '\162', '\152', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\152', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\164', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\131', '\147', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\121', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\127', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\126', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\121', '\170', '\040', '\160', '\162', '\040', '\061', - '\012', '\114', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\127', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\110', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\166', '\160', '\040', '\166', '\141', '\040', '\061', - '\012', '\112', '\170', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\110', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\166', '\125', '\040', '\166', '\141', '\040', '\061', - '\012', '\127', '\161', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\126', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\147', '\171', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\132', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\165', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\170', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\110', '\154', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\104', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\144', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\166', '\115', '\040', '\166', '\141', '\040', '\061', - '\012', '\127', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\127', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\151', '\117', '\040', '\151', '\156', '\040', '\061', - '\012', '\146', '\104', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\110', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\151', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\120', '\155', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\130', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\146', '\114', '\040', '\146', '\157', '\040', '\061', - '\012', '\171', '\107', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\102', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\103', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\114', '\154', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\115', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\162', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\144', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\170', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\155', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\122', '\172', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\102', '\144', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\127', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\165', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\171', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\126', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\147', '\120', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\106', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\122', '\155', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\154', '\110', '\040', '\154', '\145', '\040', '\061', - '\012', '\126', '\146', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\113', '\172', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\114', '\150', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\123', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\162', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\102', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\103', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\171', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\130', '\165', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\146', '\115', '\040', '\167', '\141', '\040', '\061', - '\012', '\153', '\144', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\143', '\130', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\164', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\152', '\111', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\147', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\167', '\114', '\040', '\155', '\145', '\040', '\061', - '\012', '\153', '\172', '\125', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\132', '\162', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\112', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\104', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\125', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\170', '\105', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\170', '\125', '\040', '\155', '\145', '\040', '\061', - '\012', '\143', '\167', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\160', '\103', '\040', '\160', '\162', '\040', '\061', - '\012', '\163', '\122', '\167', '\040', '\163', '\164', '\040', '\061', - '\012', '\113', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\170', '\101', '\040', '\167', '\141', '\040', '\061', - '\012', '\147', '\121', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\120', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\110', '\167', '\165', '\040', '\153', '\165', '\040', '\061', - '\012', '\163', '\165', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\161', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\170', '\127', '\040', '\163', '\164', '\040', '\061', - '\012', '\141', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\142', '\132', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\161', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\112', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\164', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\115', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\107', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\154', '\110', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\162', '\153', '\040', '\145', '\162', '\040', '\061', - '\012', '\117', '\143', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\113', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\162', '\101', '\040', '\145', '\162', '\040', '\061', - '\012', '\147', '\170', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\127', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\170', '\121', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\130', '\157', '\172', '\040', '\157', '\156', '\040', '\061', - '\012', '\146', '\155', '\120', '\040', '\155', '\145', '\040', '\061', - '\012', '\153', '\144', '\104', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\102', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\160', '\101', '\040', '\160', '\162', '\040', '\061', - '\012', '\156', '\115', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\110', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\115', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\123', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\115', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\102', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\155', '\130', '\040', '\155', '\145', '\040', '\061', - '\012', '\150', '\143', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\162', '\125', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\141', '\130', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\144', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\163', '\142', '\131', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\150', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\146', '\132', '\040', '\160', '\162', '\040', '\061', - '\012', '\126', '\155', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\132', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\114', '\152', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\161', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\160', '\113', '\040', '\144', '\145', '\040', '\061', - '\012', '\164', '\146', '\107', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\152', '\122', '\040', '\151', '\156', '\040', '\061', - '\012', '\151', '\112', '\171', '\040', '\151', '\156', '\040', '\061', - '\012', '\161', '\146', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\162', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\147', '\124', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\117', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\156', '\105', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\127', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\160', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\144', '\117', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\131', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\162', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\155', '\106', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\110', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\112', '\172', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\105', '\171', '\040', '\156', '\171', '\040', '\061', - '\012', '\150', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\160', '\121', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\131', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\164', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\144', '\170', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\146', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\142', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\142', '\117', '\040', '\142', '\145', '\040', '\061', - '\012', '\130', '\143', '\156', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\103', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\107', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\155', '\103', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\112', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\104', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\172', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\131', '\162', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\113', '\163', '\170', '\040', '\163', '\164', '\040', '\061', - '\012', '\165', '\113', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\123', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\114', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\144', '\102', '\040', '\144', '\145', '\040', '\061', - '\012', '\172', '\127', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\167', '\131', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\115', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\142', '\110', '\040', '\144', '\145', '\040', '\061', - '\012', '\121', '\163', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\132', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\164', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\155', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\161', '\160', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\154', '\104', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\166', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\132', '\146', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\164', '\150', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\164', '\114', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\117', '\152', '\040', '\151', '\156', '\040', '\061', - '\012', '\143', '\111', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\150', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\166', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\152', '\103', '\040', '\151', '\152', '\040', '\061', - '\012', '\117', '\152', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\166', '\111', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\110', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\170', '\112', '\040', '\151', '\152', '\040', '\061', - '\012', '\107', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\121', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\130', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\104', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\121', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\112', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\172', '\142', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\122', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\105', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\141', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\152', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\123', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\130', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\112', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\127', '\162', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\113', '\160', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\141', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\103', '\166', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\142', '\122', '\040', '\142', '\145', '\040', '\061', - '\012', '\160', '\124', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\144', '\111', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\146', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\122', '\162', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\142', '\106', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\172', '\106', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\167', '\117', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\162', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\164', '\167', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\114', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\156', '\154', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\147', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\165', '\123', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\111', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\124', '\167', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\113', '\144', '\040', '\141', '\156', '\040', '\061', - '\012', '\104', '\153', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\102', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\117', '\172', '\040', '\153', '\141', '\040', '\061', - '\012', '\172', '\117', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\172', '\105', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\142', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\115', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\146', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\147', '\104', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\164', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\152', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\142', '\130', '\040', '\142', '\145', '\040', '\061', - '\012', '\172', '\146', '\110', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\167', '\110', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\121', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\107', '\172', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\163', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\116', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\114', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\155', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\116', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\143', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\115', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\107', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\103', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\160', '\166', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\116', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\160', '\120', '\040', '\160', '\162', '\040', '\061', - '\012', '\154', '\130', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\114', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\144', '\130', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\172', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\170', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\166', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\162', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\105', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\131', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\163', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\112', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\146', '\153', '\103', '\040', '\153', '\141', '\040', '\061', - '\012', '\155', '\170', '\113', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\162', '\127', '\040', '\145', '\162', '\040', '\061', - '\012', '\155', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\102', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\103', '\146', '\040', '\151', '\156', '\040', '\061', - '\012', '\163', '\162', '\110', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\152', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\143', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\106', '\164', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\102', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\152', '\106', '\040', '\144', '\145', '\040', '\061', - '\012', '\164', '\147', '\125', '\040', '\164', '\150', '\040', '\061', - '\012', '\127', '\162', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\106', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\143', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\145', '\161', '\101', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\142', '\107', '\040', '\160', '\162', '\040', '\061', - '\012', '\103', '\167', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\104', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\124', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\162', '\127', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\121', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\167', '\115', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\103', '\156', '\040', '\156', '\144', '\040', '\061', - '\012', '\145', '\107', '\160', '\040', '\145', '\162', '\040', '\061', - '\012', '\165', '\120', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\151', '\111', '\040', '\151', '\156', '\040', '\061', - '\012', '\162', '\161', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\152', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\154', '\167', '\113', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\152', '\121', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\111', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\170', '\122', '\040', '\144', '\145', '\040', '\061', - '\012', '\107', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\114', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\122', '\144', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\171', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\164', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\122', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\113', '\146', '\040', '\151', '\156', '\040', '\061', - '\012', '\150', '\142', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\161', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\155', '\106', '\040', '\155', '\145', '\040', '\061', - '\012', '\166', '\110', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\161', '\116', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\114', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\166', '\112', '\040', '\166', '\141', '\040', '\061', - '\012', '\142', '\147', '\112', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\166', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\110', '\170', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\126', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\150', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\154', '\114', '\040', '\154', '\145', '\040', '\061', - '\012', '\153', '\144', '\110', '\040', '\144', '\145', '\040', '\061', - '\012', '\113', '\146', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\104', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\103', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\121', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\127', '\156', '\172', '\040', '\141', '\156', '\040', '\061', - '\012', '\116', '\152', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\112', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\167', '\122', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\160', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\163', '\120', '\152', '\040', '\163', '\164', '\040', '\061', - '\012', '\132', '\160', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\120', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\143', '\154', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\103', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\162', '\103', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\103', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\141', '\102', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\165', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\143', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\132', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\107', '\164', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\142', '\127', '\040', '\167', '\141', '\040', '\061', - '\012', '\166', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\164', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\127', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\142', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\127', '\155', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\170', '\131', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\121', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\116', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\144', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\131', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\154', '\130', '\040', '\154', '\145', '\040', '\061', - '\012', '\162', '\167', '\106', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\132', '\155', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\142', '\112', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\141', '\102', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\126', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\125', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\146', '\103', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\170', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\124', '\142', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\157', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\124', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\102', '\153', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\121', '\145', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\102', '\145', '\040', '\144', '\145', '\040', '\061', - '\012', '\144', '\160', '\103', '\040', '\144', '\145', '\040', '\061', - '\012', '\153', '\160', '\127', '\040', '\153', '\141', '\040', '\061', - '\012', '\132', '\153', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\116', '\167', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\162', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\165', '\130', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\157', '\171', '\040', '\157', '\156', '\040', '\061', - '\012', '\132', '\146', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\113', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\150', '\123', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\142', '\120', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\143', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\111', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\102', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\147', '\132', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\120', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\102', '\146', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\162', '\170', '\103', '\040', '\145', '\162', '\040', '\061', - '\012', '\163', '\114', '\153', '\040', '\163', '\164', '\040', '\061', - '\012', '\150', '\107', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\166', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\160', '\122', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\116', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\104', '\146', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\122', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\150', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\161', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\116', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\167', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\167', '\101', '\040', '\167', '\141', '\040', '\061', - '\012', '\167', '\115', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\123', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\146', '\104', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\107', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\130', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\121', '\150', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\117', '\171', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\144', '\166', '\102', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\126', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\110', '\143', '\156', '\040', '\143', '\150', '\040', '\061', - '\012', '\163', '\142', '\125', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\106', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\153', '\146', '\124', '\040', '\153', '\141', '\040', '\061', - '\012', '\162', '\166', '\127', '\040', '\145', '\162', '\040', '\061', - '\012', '\131', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\156', '\106', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\114', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\157', '\121', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\146', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\162', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\112', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\120', '\156', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\116', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\146', '\105', '\040', '\156', '\171', '\040', '\061', - '\012', '\153', '\155', '\111', '\040', '\153', '\141', '\040', '\061', - '\012', '\107', '\155', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\170', '\123', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\165', '\125', '\040', '\165', '\156', '\040', '\061', - '\012', '\161', '\131', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\113', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\150', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\157', '\146', '\131', '\040', '\157', '\156', '\040', '\061', - '\012', '\160', '\162', '\110', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\130', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\121', '\155', '\040', '\166', '\141', '\040', '\061', - '\012', '\151', '\127', '\170', '\040', '\151', '\156', '\040', '\061', - '\012', '\142', '\172', '\103', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\131', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\141', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\107', '\147', '\142', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\123', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\162', '\121', '\172', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\153', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\126', '\156', '\154', '\040', '\141', '\156', '\040', '\061', - '\012', '\107', '\164', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\115', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\166', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\171', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\161', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\156', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\106', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\153', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\127', '\143', '\153', '\040', '\143', '\150', '\040', '\061', - '\012', '\146', '\115', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\172', '\147', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\112', '\172', '\040', '\157', '\156', '\040', '\061', - '\012', '\170', '\166', '\110', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\121', '\171', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\131', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\170', '\104', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\104', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\102', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\112', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\120', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\127', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\162', '\110', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\151', '\171', '\115', '\040', '\151', '\156', '\040', '\061', - '\012', '\171', '\170', '\104', '\040', '\156', '\171', '\040', '\061', - '\012', '\153', '\120', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\130', '\166', '\040', '\143', '\150', '\040', '\061', - '\012', '\116', '\155', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\153', '\116', '\040', '\153', '\141', '\040', '\061', - '\012', '\154', '\106', '\152', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\155', '\125', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\132', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\132', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\101', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\102', '\143', '\171', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\161', '\105', '\040', '\143', '\150', '\040', '\061', - '\012', '\122', '\167', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\162', '\115', '\040', '\143', '\150', '\040', '\061', - '\012', '\101', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\132', '\152', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\170', '\106', '\040', '\156', '\171', '\040', '\061', - '\012', '\166', '\132', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\120', '\142', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\103', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\131', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\102', '\160', '\040', '\164', '\150', '\040', '\061', - '\012', '\112', '\142', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\162', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\146', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\155', '\142', '\112', '\040', '\155', '\145', '\040', '\061', - '\012', '\146', '\122', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\167', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\165', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\131', '\172', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\104', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\110', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\155', '\111', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\143', '\105', '\040', '\143', '\150', '\040', '\061', - '\012', '\115', '\150', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\165', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\130', '\146', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\120', '\171', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\120', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\130', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\117', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\116', '\155', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\104', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\103', '\167', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\154', '\152', '\120', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\162', '\105', '\040', '\141', '\156', '\040', '\061', - '\012', '\113', '\155', '\167', '\040', '\155', '\145', '\040', '\061', - '\012', '\147', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\147', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\172', '\122', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\112', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\141', '\125', '\151', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\156', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\132', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\123', '\170', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\101', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\132', '\166', '\040', '\151', '\156', '\040', '\061', - '\012', '\152', '\130', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\160', '\122', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\126', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\116', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\102', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\115', '\152', '\171', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\152', '\132', '\040', '\151', '\152', '\040', '\061', - '\012', '\164', '\114', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\131', '\152', '\040', '\151', '\156', '\040', '\061', - '\012', '\167', '\142', '\117', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\130', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\113', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\152', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\165', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\154', '\121', '\040', '\154', '\145', '\040', '\061', - '\012', '\171', '\146', '\102', '\040', '\156', '\171', '\040', '\061', - '\012', '\121', '\163', '\153', '\040', '\163', '\164', '\040', '\061', - '\012', '\125', '\167', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\132', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\155', '\131', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\130', '\167', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\126', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\111', '\167', '\040', '\156', '\147', '\040', '\061', - '\012', '\110', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\120', '\147', '\171', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\121', '\166', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\156', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\170', '\164', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\143', '\145', '\040', '\143', '\150', '\040', '\061', - '\012', '\116', '\152', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\107', '\164', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\112', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\104', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\114', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\157', '\145', '\125', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\166', '\131', '\040', '\143', '\150', '\040', '\061', - '\012', '\107', '\142', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\124', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\141', '\124', '\160', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\144', '\124', '\040', '\144', '\145', '\040', '\061', - '\012', '\127', '\153', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\170', '\101', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\104', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\146', '\104', '\040', '\163', '\164', '\040', '\061', - '\012', '\162', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\110', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\126', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\115', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\126', '\155', '\040', '\163', '\164', '\040', '\061', - '\012', '\156', '\172', '\122', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\166', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\132', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\156', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\132', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\120', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\123', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\116', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\162', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\114', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\126', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\105', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\161', '\103', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\132', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\150', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\116', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\106', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\170', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\120', '\152', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\131', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\106', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\166', '\114', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\112', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\126', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\143', '\132', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\143', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\114', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\171', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\150', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\164', '\113', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\122', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\103', '\170', '\040', '\142', '\145', '\040', '\061', - '\012', '\156', '\112', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\152', '\167', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\120', '\144', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\170', '\105', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\154', '\132', '\040', '\154', '\145', '\040', '\061', - '\012', '\114', '\170', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\156', '\114', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\172', '\126', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\107', '\161', '\040', '\154', '\145', '\040', '\061', - '\012', '\121', '\142', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\142', '\131', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\123', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\121', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\160', '\122', '\040', '\160', '\162', '\040', '\061', - '\012', '\147', '\103', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\151', '\150', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\146', '\170', '\040', '\146', '\157', '\040', '\061', - '\012', '\156', '\152', '\111', '\040', '\156', '\144', '\040', '\061', - '\012', '\131', '\160', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\154', '\170', '\124', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\126', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\112', '\172', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\170', '\101', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\104', '\154', '\040', '\156', '\147', '\040', '\061', - '\012', '\105', '\141', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\143', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\107', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\114', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\153', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\142', '\113', '\040', '\167', '\141', '\040', '\061', - '\012', '\156', '\116', '\170', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\122', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\162', '\125', '\040', '\145', '\162', '\040', '\061', - '\012', '\146', '\156', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\172', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\122', '\143', '\156', '\040', '\143', '\150', '\040', '\061', - '\012', '\161', '\142', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\162', '\104', '\040', '\145', '\162', '\040', '\061', - '\012', '\126', '\170', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\166', '\106', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\112', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\131', '\170', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\151', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\115', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\142', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\147', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\155', '\123', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\124', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\132', '\152', '\155', '\040', '\151', '\152', '\040', '\061', - '\012', '\116', '\152', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\144', '\161', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\131', '\152', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\113', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\170', '\125', '\040', '\143', '\150', '\040', '\061', - '\012', '\103', '\153', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\146', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\164', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\162', '\120', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\105', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\170', '\117', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\132', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\142', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\130', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\166', '\104', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\143', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\153', '\117', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\116', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\106', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\130', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\126', '\153', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\107', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\121', '\143', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\166', '\106', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\106', '\170', '\040', '\170', '\145', '\040', '\061', - '\012', '\144', '\123', '\152', '\040', '\144', '\145', '\040', '\061', - '\012', '\170', '\120', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\157', '\106', '\160', '\040', '\157', '\156', '\040', '\061', - '\012', '\161', '\101', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\107', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\172', '\103', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\111', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\114', '\150', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\167', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\160', '\147', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\101', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\102', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\113', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\120', '\146', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\165', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\124', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\164', '\127', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\144', '\116', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\162', '\116', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\154', '\123', '\040', '\154', '\145', '\040', '\061', - '\012', '\161', '\105', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\161', '\122', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\156', '\155', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\130', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\170', '\116', '\040', '\146', '\157', '\040', '\061', - '\012', '\142', '\166', '\114', '\040', '\166', '\141', '\040', '\061', - '\012', '\157', '\107', '\146', '\040', '\157', '\156', '\040', '\061', - '\012', '\150', '\132', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\146', '\110', '\040', '\156', '\171', '\040', '\061', - '\012', '\144', '\143', '\105', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\147', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\167', '\162', '\102', '\040', '\145', '\162', '\040', '\061', - '\012', '\153', '\127', '\155', '\040', '\153', '\141', '\040', '\061', - '\012', '\123', '\150', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\167', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\166', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\121', '\147', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\112', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\116', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\110', '\160', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\154', '\106', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\161', '\172', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\147', '\107', '\040', '\156', '\147', '\040', '\061', - '\012', '\153', '\144', '\132', '\040', '\144', '\145', '\040', '\061', - '\012', '\145', '\152', '\130', '\040', '\145', '\162', '\040', '\061', - '\012', '\120', '\170', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\166', '\124', '\040', '\166', '\141', '\040', '\061', - '\012', '\113', '\161', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\155', '\142', '\040', '\155', '\145', '\040', '\061', - '\012', '\170', '\106', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\121', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\120', '\147', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\160', '\114', '\040', '\160', '\162', '\040', '\061', - '\012', '\142', '\167', '\105', '\040', '\167', '\141', '\040', '\061', - '\012', '\170', '\110', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\126', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\155', '\106', '\040', '\151', '\152', '\040', '\061', - '\012', '\111', '\170', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\171', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\126', '\166', '\040', '\145', '\162', '\040', '\061', - '\012', '\131', '\164', '\167', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\160', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\160', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\152', '\130', '\040', '\163', '\172', '\040', '\061', - '\012', '\113', '\150', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\146', '\126', '\040', '\161', '\165', '\040', '\061', - '\012', '\112', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\124', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\102', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\152', '\122', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\147', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\155', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\103', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\160', '\131', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\153', '\132', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\166', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\126', '\146', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\154', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\116', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\142', '\126', '\040', '\153', '\141', '\040', '\061', - '\012', '\104', '\161', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\162', '\104', '\040', '\145', '\162', '\040', '\061', - '\012', '\154', '\142', '\107', '\040', '\154', '\145', '\040', '\061', - '\012', '\170', '\150', '\106', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\170', '\132', '\040', '\153', '\141', '\040', '\061', - '\012', '\111', '\165', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\106', '\170', '\040', '\156', '\171', '\040', '\061', - '\012', '\161', '\126', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\143', '\107', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\127', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\141', '\102', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\171', '\112', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\143', '\172', '\114', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\111', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\125', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\160', '\132', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\164', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\170', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\144', '\131', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\151', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\167', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\146', '\104', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\126', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\146', '\121', '\040', '\166', '\141', '\040', '\061', - '\012', '\150', '\166', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\144', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\110', '\172', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\131', '\163', '\040', '\143', '\150', '\040', '\061', - '\012', '\106', '\164', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\160', '\125', '\040', '\144', '\145', '\040', '\061', - '\012', '\114', '\154', '\144', '\040', '\154', '\145', '\040', '\061', - '\012', '\107', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\144', '\122', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\130', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\163', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\116', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\152', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\126', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\112', '\155', '\170', '\040', '\155', '\145', '\040', '\061', - '\012', '\160', '\104', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\151', '\102', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\114', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\156', '\107', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\124', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\116', '\144', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\125', '\141', '\167', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\172', '\116', '\040', '\163', '\172', '\040', '\061', - '\012', '\147', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\152', '\115', '\040', '\151', '\152', '\040', '\061', - '\012', '\154', '\156', '\113', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\170', '\142', '\040', '\163', '\172', '\040', '\061', - '\012', '\153', '\143', '\123', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\152', '\115', '\040', '\141', '\156', '\040', '\061', - '\012', '\107', '\144', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\154', '\156', '\132', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\150', '\113', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\160', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\161', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\152', '\130', '\040', '\151', '\156', '\040', '\061', - '\012', '\152', '\107', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\142', '\170', '\111', '\040', '\142', '\145', '\040', '\061', - '\012', '\166', '\130', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\126', '\162', '\167', '\040', '\145', '\162', '\040', '\061', - '\012', '\103', '\167', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\156', '\102', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\166', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\170', '\102', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\126', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\103', '\172', '\170', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\171', '\126', '\040', '\156', '\171', '\040', '\061', - '\012', '\143', '\130', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\121', '\156', '\146', '\040', '\141', '\156', '\040', '\061', - '\012', '\131', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\161', '\110', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\142', '\131', '\040', '\144', '\145', '\040', '\061', - '\012', '\123', '\161', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\113', '\161', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\160', '\112', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\142', '\115', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\106', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\113', '\142', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\162', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\122', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\146', '\161', '\116', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\146', '\101', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\157', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\117', '\167', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\156', '\154', '\107', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\111', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\161', '\162', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\167', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\161', '\141', '\127', '\040', '\141', '\156', '\040', '\061', - '\012', '\150', '\143', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\153', '\102', '\040', '\153', '\141', '\040', '\061', - '\012', '\116', '\144', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\172', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\170', '\102', '\040', '\156', '\147', '\040', '\061', - '\012', '\102', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\124', '\146', '\040', '\166', '\141', '\040', '\061', - '\012', '\152', '\106', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\115', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\165', '\146', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\160', '\107', '\040', '\141', '\156', '\040', '\061', - '\012', '\165', '\132', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\124', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\107', '\154', '\167', '\040', '\154', '\145', '\040', '\061', - '\012', '\113', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\103', '\170', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\152', '\132', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\123', '\161', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\171', '\120', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\145', '\121', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\141', '\111', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\104', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\111', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\116', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\117', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\153', '\115', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\106', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\143', '\146', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\113', '\152', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\153', '\120', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\112', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\120', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\157', '\172', '\121', '\040', '\157', '\156', '\040', '\061', - '\012', '\104', '\154', '\153', '\040', '\154', '\145', '\040', '\061', - '\012', '\166', '\130', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\164', '\131', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\127', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\121', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\124', '\160', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\121', '\150', '\143', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\165', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\156', '\142', '\123', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\121', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\147', '\132', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\125', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\165', '\127', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\115', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\132', '\143', '\144', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\102', '\160', '\040', '\151', '\156', '\040', '\061', - '\012', '\146', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\172', '\131', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\167', '\103', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\103', '\161', '\171', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\152', '\106', '\040', '\143', '\150', '\040', '\061', - '\012', '\107', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\155', '\143', '\127', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\161', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\165', '\112', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\125', '\152', '\040', '\151', '\156', '\040', '\061', - '\012', '\166', '\153', '\122', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\147', '\111', '\040', '\156', '\147', '\040', '\061', - '\012', '\166', '\125', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\144', '\156', '\040', '\144', '\145', '\040', '\061', - '\012', '\163', '\152', '\106', '\040', '\163', '\164', '\040', '\061', - '\012', '\164', '\120', '\166', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\122', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\154', '\126', '\040', '\154', '\145', '\040', '\061', - '\012', '\163', '\142', '\115', '\040', '\163', '\164', '\040', '\061', - '\012', '\155', '\146', '\124', '\040', '\155', '\145', '\040', '\061', - '\012', '\144', '\142', '\126', '\040', '\144', '\145', '\040', '\061', - '\012', '\106', '\155', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\147', '\146', '\125', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\142', '\102', '\040', '\143', '\150', '\040', '\061', - '\012', '\131', '\170', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\113', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\104', '\167', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\167', '\147', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\163', '\120', '\166', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\110', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\156', '\142', '\110', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\106', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\161', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\106', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\105', '\142', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\106', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\105', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\143', '\111', '\040', '\143', '\150', '\040', '\061', - '\012', '\142', '\115', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\132', '\167', '\040', '\163', '\172', '\040', '\061', - '\012', '\150', '\152', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\113', '\170', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\147', '\103', '\040', '\156', '\147', '\040', '\061', - '\012', '\143', '\156', '\114', '\040', '\141', '\156', '\040', '\061', - '\012', '\106', '\144', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\107', '\146', '\040', '\142', '\145', '\040', '\061', - '\012', '\123', '\152', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\142', '\115', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\130', '\167', '\040', '\166', '\141', '\040', '\061', - '\012', '\107', '\146', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\103', '\167', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\163', '\121', '\040', '\163', '\164', '\040', '\061', - '\012', '\132', '\147', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\154', '\120', '\146', '\040', '\154', '\145', '\040', '\061', - '\012', '\156', '\155', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\126', '\144', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\143', '\130', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\152', '\124', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\167', '\105', '\040', '\155', '\145', '\040', '\061', - '\012', '\161', '\114', '\155', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\110', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\164', '\156', '\040', '\164', '\150', '\040', '\061', - '\012', '\116', '\164', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\147', '\127', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\120', '\161', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\160', '\120', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\122', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\160', '\114', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\156', '\104', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\160', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\144', '\172', '\123', '\040', '\163', '\172', '\040', '\061', - '\012', '\164', '\132', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\171', '\147', '\115', '\040', '\156', '\147', '\040', '\061', - '\012', '\142', '\170', '\103', '\040', '\142', '\145', '\040', '\061', - '\012', '\144', '\146', '\125', '\040', '\144', '\145', '\040', '\061', - '\012', '\142', '\155', '\102', '\040', '\155', '\145', '\040', '\061', - '\012', '\154', '\102', '\172', '\040', '\154', '\145', '\040', '\061', - '\012', '\147', '\112', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\131', '\153', '\166', '\040', '\153', '\141', '\040', '\061', - '\012', '\132', '\144', '\153', '\040', '\144', '\145', '\040', '\061', - '\012', '\167', '\156', '\121', '\040', '\141', '\156', '\040', '\061', - '\012', '\164', '\132', '\152', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\172', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\126', '\146', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\115', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\162', '\125', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\161', '\167', '\160', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\143', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\146', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\165', '\157', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\103', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\151', '\121', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\102', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\126', '\142', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\152', '\125', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\143', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\160', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\166', '\132', '\040', '\163', '\164', '\040', '\061', - '\012', '\132', '\160', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\160', '\151', '\126', '\040', '\151', '\156', '\040', '\061', - '\012', '\153', '\142', '\120', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\161', '\115', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\126', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\161', '\132', '\162', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\170', '\117', '\040', '\164', '\150', '\040', '\061', - '\012', '\167', '\124', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\112', '\172', '\146', '\040', '\163', '\172', '\040', '\061', - '\012', '\121', '\152', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\165', '\131', '\166', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\167', '\113', '\040', '\160', '\162', '\040', '\061', - '\012', '\150', '\166', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\104', '\161', '\145', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\146', '\111', '\040', '\160', '\162', '\040', '\061', - '\012', '\155', '\150', '\126', '\040', '\164', '\150', '\040', '\061', - '\012', '\152', '\147', '\105', '\040', '\156', '\147', '\040', '\061', - '\012', '\162', '\143', '\121', '\040', '\143', '\150', '\040', '\061', - '\012', '\153', '\155', '\124', '\040', '\153', '\141', '\040', '\061', - '\012', '\127', '\172', '\152', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\116', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\120', '\142', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\172', '\166', '\102', '\040', '\163', '\172', '\040', '\061', - '\012', '\170', '\150', '\112', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\166', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\116', '\166', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\163', '\167', '\132', '\040', '\163', '\164', '\040', '\061', - '\012', '\152', '\147', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\155', '\146', '\114', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\153', '\114', '\040', '\163', '\172', '\040', '\061', - '\012', '\152', '\126', '\160', '\040', '\151', '\152', '\040', '\061', - '\012', '\104', '\153', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\170', '\165', '\131', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\110', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\123', '\146', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\172', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\154', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\115', '\144', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\147', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\146', '\170', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\164', '\122', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\106', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\105', '\157', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\157', '\131', '\040', '\157', '\156', '\040', '\061', - '\012', '\101', '\167', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\124', '\170', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\143', '\111', '\147', '\040', '\143', '\150', '\040', '\061', - '\012', '\170', '\125', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\122', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\112', '\170', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\151', '\120', '\146', '\040', '\151', '\156', '\040', '\061', - '\012', '\145', '\152', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\130', '\164', '\163', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\146', '\124', '\040', '\160', '\162', '\040', '\061', - '\012', '\120', '\161', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\163', '\126', '\040', '\163', '\164', '\040', '\061', - '\012', '\171', '\160', '\103', '\040', '\160', '\162', '\040', '\061', - '\012', '\167', '\115', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\105', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\170', '\131', '\040', '\166', '\141', '\040', '\061', - '\012', '\146', '\125', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\104', '\146', '\146', '\040', '\146', '\157', '\040', '\061', - '\012', '\147', '\161', '\121', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\115', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\166', '\112', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\146', '\120', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\114', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\144', '\115', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\116', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\141', '\107', '\166', '\040', '\141', '\156', '\040', '\061', - '\012', '\166', '\166', '\104', '\040', '\166', '\141', '\040', '\061', - '\012', '\144', '\112', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\170', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\127', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\120', '\166', '\170', '\040', '\166', '\141', '\040', '\061', - '\012', '\162', '\150', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\122', '\144', '\040', '\163', '\172', '\040', '\061', - '\012', '\113', '\147', '\166', '\040', '\156', '\147', '\040', '\061', - '\012', '\130', '\166', '\171', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\132', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\153', '\160', '\113', '\040', '\153', '\141', '\040', '\061', - '\012', '\120', '\146', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\167', '\125', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\167', '\127', '\170', '\040', '\167', '\141', '\040', '\061', - '\012', '\152', '\120', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\147', '\114', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\112', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\147', '\120', '\170', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\110', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\166', '\112', '\142', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\150', '\102', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\121', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\105', '\157', '\141', '\040', '\141', '\156', '\040', '\061', - '\012', '\160', '\152', '\117', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\106', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\163', '\130', '\157', '\040', '\157', '\156', '\040', '\061', - '\012', '\167', '\142', '\131', '\040', '\167', '\141', '\040', '\061', - '\012', '\143', '\152', '\117', '\040', '\143', '\150', '\040', '\061', - '\012', '\155', '\154', '\132', '\040', '\154', '\145', '\040', '\061', - '\012', '\142', '\116', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\153', '\152', '\120', '\040', '\151', '\152', '\040', '\061', - '\012', '\171', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\126', '\152', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\116', '\166', '\040', '\166', '\141', '\040', '\061', - '\012', '\147', '\152', '\127', '\040', '\156', '\147', '\040', '\061', - '\012', '\156', '\130', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\144', '\161', '\112', '\040', '\161', '\165', '\040', '\061', - '\012', '\110', '\156', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\171', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\153', '\166', '\102', '\040', '\153', '\141', '\040', '\061', - '\012', '\161', '\171', '\102', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\172', '\147', '\120', '\040', '\156', '\147', '\040', '\061', - '\012', '\132', '\172', '\153', '\040', '\163', '\172', '\040', '\061', - '\012', '\146', '\115', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\172', '\131', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\142', '\124', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\117', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\163', '\101', '\040', '\163', '\164', '\040', '\061', - '\012', '\147', '\114', '\152', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\170', '\110', '\040', '\163', '\172', '\040', '\061', - '\012', '\143', '\114', '\155', '\040', '\143', '\150', '\040', '\061', - '\012', '\104', '\156', '\153', '\040', '\141', '\156', '\040', '\061', - '\012', '\172', '\111', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\153', '\160', '\112', '\040', '\153', '\141', '\040', '\061', - '\012', '\170', '\162', '\113', '\040', '\145', '\162', '\040', '\061', - '\012', '\145', '\111', '\142', '\040', '\145', '\162', '\040', '\061', - '\012', '\112', '\142', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\102', '\161', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\130', '\147', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\152', '\153', '\040', '\151', '\152', '\040', '\061', - '\012', '\144', '\122', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\164', '\152', '\132', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\121', '\154', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\171', '\127', '\040', '\151', '\156', '\040', '\061', - '\012', '\112', '\167', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\132', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\112', '\160', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\102', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\172', '\162', '\107', '\040', '\145', '\162', '\040', '\061', - '\012', '\150', '\127', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\144', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\161', '\163', '\132', '\040', '\161', '\165', '\040', '\061', - '\012', '\143', '\121', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\143', '\143', '\116', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\167', '\115', '\040', '\167', '\141', '\040', '\061', - '\012', '\147', '\142', '\130', '\040', '\156', '\147', '\040', '\061', - '\012', '\164', '\146', '\124', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\167', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\121', '\142', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\171', '\145', '\131', '\040', '\145', '\162', '\040', '\061', - '\012', '\141', '\125', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\161', '\110', '\167', '\040', '\161', '\165', '\040', '\061', - '\012', '\106', '\150', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\106', '\156', '\147', '\040', '\141', '\156', '\040', '\061', - '\012', '\154', '\166', '\111', '\040', '\154', '\145', '\040', '\061', - '\012', '\152', '\103', '\146', '\040', '\151', '\152', '\040', '\061', - '\012', '\150', '\161', '\110', '\040', '\164', '\150', '\040', '\061', - '\012', '\164', '\124', '\161', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\146', '\111', '\040', '\163', '\164', '\040', '\061', - '\012', '\166', '\163', '\115', '\040', '\163', '\164', '\040', '\061', - '\012', '\154', '\104', '\160', '\040', '\154', '\145', '\040', '\061', - '\012', '\167', '\112', '\142', '\040', '\167', '\141', '\040', '\061', - '\012', '\142', '\150', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\122', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\164', '\123', '\040', '\164', '\150', '\040', '\061', - '\012', '\132', '\167', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\112', '\142', '\150', '\040', '\164', '\150', '\040', '\061', - '\012', '\150', '\110', '\142', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\104', '\171', '\040', '\160', '\162', '\040', '\061', - '\012', '\163', '\152', '\104', '\040', '\163', '\164', '\040', '\061', - '\012', '\117', '\171', '\160', '\040', '\160', '\162', '\040', '\061', - '\012', '\161', '\167', '\104', '\040', '\161', '\165', '\040', '\061', - '\012', '\152', '\142', '\104', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\160', '\107', '\040', '\166', '\141', '\040', '\061', - '\012', '\127', '\152', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\160', '\102', '\040', '\166', '\141', '\040', '\061', - '\012', '\141', '\130', '\161', '\040', '\141', '\156', '\040', '\061', - '\012', '\155', '\127', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\110', '\151', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\171', '\116', '\040', '\156', '\171', '\040', '\061', - '\012', '\155', '\142', '\121', '\040', '\155', '\145', '\040', '\061', - '\012', '\171', '\167', '\103', '\040', '\167', '\141', '\040', '\061', - '\012', '\157', '\126', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\155', '\132', '\040', '\155', '\145', '\040', '\061', - '\012', '\163', '\154', '\117', '\040', '\154', '\145', '\040', '\061', - '\012', '\146', '\130', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\153', '\131', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\160', '\126', '\165', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\153', '\125', '\040', '\153', '\141', '\040', '\061', - '\012', '\102', '\162', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\103', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\143', '\170', '\040', '\143', '\150', '\040', '\061', - '\012', '\172', '\115', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\122', '\167', '\040', '\143', '\150', '\040', '\061', - '\012', '\147', '\172', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\142', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\152', '\165', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\170', '\123', '\172', '\040', '\163', '\172', '\040', '\061', - '\012', '\126', '\147', '\172', '\040', '\156', '\147', '\040', '\061', - '\012', '\157', '\115', '\167', '\040', '\157', '\156', '\040', '\061', - '\012', '\146', '\160', '\105', '\040', '\160', '\162', '\040', '\061', - '\012', '\170', '\152', '\130', '\040', '\151', '\152', '\040', '\061', - '\012', '\161', '\103', '\147', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\167', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\165', '\121', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\161', '\120', '\153', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\152', '\104', '\040', '\151', '\152', '\040', '\061', - '\012', '\121', '\172', '\155', '\040', '\163', '\172', '\040', '\061', - '\012', '\163', '\111', '\160', '\040', '\163', '\164', '\040', '\061', - '\012', '\165', '\157', '\107', '\040', '\161', '\165', '\040', '\061', - '\012', '\162', '\126', '\154', '\040', '\145', '\162', '\040', '\061', - '\012', '\143', '\142', '\113', '\040', '\143', '\150', '\040', '\061', - '\012', '\150', '\130', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\113', '\163', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\153', '\142', '\106', '\040', '\153', '\141', '\040', '\061', - '\012', '\167', '\102', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\151', '\131', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\163', '\147', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\107', '\172', '\166', '\040', '\163', '\172', '\040', '\061', - '\012', '\171', '\166', '\105', '\040', '\166', '\141', '\040', '\061', - '\012', '\170', '\113', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\127', '\146', '\040', '\163', '\164', '\040', '\061', - '\012', '\172', '\102', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\171', '\153', '\110', '\040', '\153', '\141', '\040', '\061', - '\012', '\166', '\152', '\110', '\040', '\151', '\152', '\040', '\061', - '\012', '\167', '\150', '\111', '\040', '\164', '\150', '\040', '\061', - '\012', '\166', '\120', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\132', '\150', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\151', '\112', '\170', '\040', '\151', '\156', '\040', '\061', - '\012', '\143', '\132', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\115', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\143', '\125', '\152', '\040', '\143', '\150', '\040', '\061', - '\012', '\166', '\115', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\143', '\112', '\040', '\143', '\150', '\040', '\061', - '\012', '\102', '\143', '\155', '\040', '\143', '\150', '\040', '\061', - '\012', '\152', '\130', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\170', '\157', '\111', '\040', '\157', '\156', '\040', '\061', - '\012', '\132', '\153', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\130', '\172', '\162', '\040', '\145', '\162', '\040', '\061', - '\012', '\171', '\172', '\115', '\040', '\163', '\172', '\040', '\061', - '\012', '\161', '\152', '\130', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\116', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\160', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\164', '\130', '\144', '\040', '\164', '\150', '\040', '\061', - '\012', '\130', '\153', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\110', '\163', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\142', '\161', '\125', '\040', '\161', '\165', '\040', '\061', - '\012', '\163', '\147', '\106', '\040', '\156', '\147', '\040', '\061', - '\012', '\144', '\120', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\112', '\170', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\125', '\147', '\160', '\040', '\156', '\147', '\040', '\061', - '\012', '\122', '\170', '\151', '\040', '\151', '\156', '\040', '\061', - '\012', '\113', '\167', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\172', '\153', '\104', '\040', '\163', '\172', '\040', '\061', - '\012', '\122', '\161', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\160', '\112', '\142', '\040', '\160', '\162', '\040', '\061', - '\012', '\146', '\143', '\126', '\040', '\143', '\150', '\040', '\061', - '\012', '\151', '\126', '\144', '\040', '\151', '\156', '\040', '\061', - '\012', '\142', '\102', '\160', '\040', '\142', '\145', '\040', '\061', - '\012', '\117', '\152', '\167', '\040', '\151', '\152', '\040', '\061', - '\012', '\166', '\132', '\154', '\040', '\154', '\145', '\040', '\061', - '\012', '\111', '\171', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\146', '\153', '\125', '\040', '\153', '\141', '\040', '\061', - '\012', '\113', '\143', '\161', '\040', '\143', '\150', '\040', '\061', - '\012', '\144', '\102', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\115', '\161', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\115', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\127', '\167', '\163', '\040', '\163', '\164', '\040', '\061', - '\012', '\164', '\161', '\130', '\040', '\164', '\150', '\040', '\061', - '\012', '\170', '\150', '\104', '\040', '\164', '\150', '\040', '\061', - '\012', '\162', '\116', '\154', '\040', '\145', '\162', '\040', '\061', - '\012', '\160', '\127', '\144', '\040', '\144', '\145', '\040', '\061', - '\012', '\152', '\162', '\126', '\040', '\145', '\162', '\040', '\061', - '\012', '\102', '\155', '\152', '\040', '\151', '\152', '\040', '\061', - '\012', '\110', '\155', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\166', '\154', '\110', '\040', '\154', '\145', '\040', '\061', - '\012', '\115', '\170', '\142', '\040', '\142', '\145', '\040', '\061', - '\012', '\171', '\171', '\123', '\040', '\156', '\171', '\040', '\061', - '\012', '\161', '\166', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\146', '\166', '\130', '\040', '\166', '\141', '\040', '\061', - '\012', '\126', '\146', '\145', '\040', '\145', '\162', '\040', '\061', - '\012', '\103', '\144', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\113', '\147', '\145', '\040', '\156', '\147', '\040', '\061', - '\012', '\121', '\145', '\152', '\040', '\145', '\162', '\040', '\061', - '\012', '\162', '\166', '\132', '\040', '\145', '\162', '\040', '\061', - '\012', '\166', '\172', '\111', '\040', '\163', '\172', '\040', '\061', - '\012', '\144', '\104', '\156', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\167', '\123', '\040', '\141', '\156', '\040', '\061', - '\012', '\121', '\143', '\142', '\040', '\143', '\150', '\040', '\061', - '\012', '\167', '\153', '\126', '\040', '\153', '\141', '\040', '\061', - '\012', '\165', '\103', '\170', '\040', '\161', '\165', '\040', '\061', - '\012', '\111', '\147', '\153', '\040', '\156', '\147', '\040', '\061', - '\012', '\126', '\160', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\150', '\102', '\155', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\144', '\121', '\040', '\144', '\145', '\040', '\061', - '\012', '\146', '\147', '\121', '\040', '\156', '\147', '\040', '\061', - '\012', '\171', '\121', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\147', '\170', '\110', '\040', '\156', '\147', '\040', '\061', - '\012', '\160', '\161', '\113', '\040', '\161', '\165', '\040', '\061', - '\012', '\154', '\122', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\130', '\144', '\166', '\040', '\144', '\145', '\040', '\061', - '\012', '\150', '\104', '\172', '\040', '\164', '\150', '\040', '\061', - '\012', '\144', '\106', '\167', '\040', '\144', '\145', '\040', '\061', - '\012', '\161', '\121', '\165', '\040', '\165', '\156', '\040', '\061', - '\012', '\170', '\142', '\104', '\040', '\142', '\145', '\040', '\061', - '\012', '\161', '\155', '\105', '\040', '\161', '\165', '\040', '\061', - '\012', '\155', '\127', '\155', '\040', '\155', '\145', '\040', '\061', - '\012', '\152', '\102', '\142', '\040', '\151', '\152', '\040', '\061', - '\012', '\152', '\130', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\146', '\170', '\125', '\040', '\146', '\157', '\040', '\061', - '\012', '\130', '\167', '\143', '\040', '\143', '\150', '\040', '\061', - '\012', '\114', '\161', '\146', '\040', '\161', '\165', '\040', '\061', - '\012', '\150', '\143', '\120', '\040', '\164', '\150', '\040', '\061', - '\012', '\160', '\146', '\102', '\040', '\160', '\162', '\040', '\061', - '\012', '\166', '\123', '\147', '\040', '\156', '\147', '\040', '\061', - '\012', '\170', '\112', '\167', '\040', '\167', '\141', '\040', '\061', - '\012', '\155', '\122', '\146', '\040', '\155', '\145', '\040', '\061', - '\012', '\150', '\161', '\127', '\040', '\164', '\150', '\040', '\061', - '\012', '\156', '\126', '\142', '\040', '\141', '\156', '\040', '\061', - '\012', '\143', '\105', '\165', '\040', '\143', '\150', '\040', '\061', - '\012', '\156', '\146', '\116', '\040', '\141', '\156', '\040', '\061', - '\012', '\156', '\126', '\152', '\040', '\141', '\156', '\040', '\061', - '\012', '\122', '\167', '\153', '\040', '\153', '\141', '\040', '\061', - '\012', '\156', '\155', '\107', '\040', '\141', '\156', '\040', '\061', - '\012', '\157', '\104', '\164', '\040', '\164', '\150', '\040', '\061', - '\012', '\153', '\120', '\142', '\040', '\153', '\141', '\040', '\061', - '\012', '\147', '\161', '\127', '\040', '\161', '\165', '\040', '\061', - '\012', '\121', '\150', '\146', '\040', '\164', '\150', '\040', '\061', - '\012', '\161', '\132', '\154', '\040', '\161', '\165', '\040', '\061', - '\012', '\172', '\110', '\161', '\040', '\161', '\165', '\040', '\061', - '\012', '\151', '\130', '\154', '\040', '\151', '\156', '\040', '\061', - '\012', -}; - -extern const int ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile); - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/universalambigs.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/universalambigs.h deleted file mode 100644 index f3f2fa1a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/ccutil/universalambigs.h +++ /dev/null @@ -1,31 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: universalambigs.h -// Description: Data for a universal ambigs file that is useful for -// any language. -// Author: Ray Smith -// Created: Mon Mar 18 11:26:00 PDT 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CCUTIL_UNIVERSALAMBIGS_H_ -#define TESSERACT_CCUTIL_UNIVERSALAMBIGS_H_ - -namespace tesseract { - -extern const char kUniversalAmbigsFile[]; -extern const int ksizeofUniversalAmbigsFile; - -} // namespace tesseract - -#endif // TESSERACT_CCUTIL_UNIVERSALAMBIGS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/Makefile.am b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/Makefile.am deleted file mode 100644 index 933ff1d6..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/Makefile.am +++ /dev/null @@ -1,94 +0,0 @@ -AM_CPPFLAGS += \ - -I$(top_srcdir)/src/cutil \ - -I$(top_srcdir)/src/ccutil \ - -I$(top_srcdir)/src/ccstruct \ - -I$(top_srcdir)/src/dict \ - -I$(top_srcdir)/src/viewer - -if DISABLED_LEGACY_ENGINE -AM_CPPFLAGS += -DDISABLED_LEGACY_ENGINE -endif - -if VISIBILITY -AM_CPPFLAGS += -DTESS_EXPORTS \ - -fvisibility=hidden -fvisibility-inlines-hidden -endif - -noinst_HEADERS = \ - blobclass.h \ - classify.h - -if !DISABLED_LEGACY_ENGINE -noinst_HEADERS += \ - adaptive.h \ - cluster.h \ - clusttool.h \ - cutoffs.h \ - errorcounter.h \ - featdefs.h \ - float2int.h \ - fpoint.h \ - intfeaturedist.h \ - intfeaturemap.h \ - intfeaturespace.h \ - intfx.h intmatcher.h \ - intproto.h kdtree.h \ - mastertrainer.h mf.h \ - mfdefs.h \ - mfoutline.h \ - mfx.h \ - normfeat.h \ - normmatch.h \ - ocrfeatures.h \ - outfeat.h \ - picofeat.h \ - protos.h \ - sampleiterator.h \ - shapeclassifier.h \ - shapetable.h \ - tessclassifier.h \ - trainingsample.h \ - trainingsampleset.h -endif - -noinst_LTLIBRARIES = libtesseract_classify.la - -libtesseract_classify_la_SOURCES = \ - blobclass.cpp \ - classify.cpp - -if !DISABLED_LEGACY_ENGINE -libtesseract_classify_la_SOURCES += \ - adaptive.cpp \ - adaptmatch.cpp \ - cluster.cpp \ - clusttool.cpp \ - cutoffs.cpp \ - errorcounter.cpp \ - featdefs.cpp \ - float2int.cpp \ - fpoint.cpp \ - intfeaturedist.cpp \ - intfeaturemap.cpp \ - intfeaturespace.cpp \ - intfx.cpp \ - intmatcher.cpp \ - intproto.cpp \ - kdtree.cpp \ - mastertrainer.cpp \ - mf.cpp mfdefs.cpp \ - mfoutline.cpp \ - mfx.cpp \ - normfeat.cpp \ - normmatch.cpp \ - ocrfeatures.cpp \ - outfeat.cpp \ - picofeat.cpp \ - protos.cpp \ - sampleiterator.cpp \ - shapeclassifier.cpp \ - shapetable.cpp \ - tessclassifier.cpp \ - trainingsample.cpp \ - trainingsampleset.cpp -endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/adaptive.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/adaptive.cpp deleted file mode 100644 index 14762559..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/adaptive.cpp +++ /dev/null @@ -1,510 +0,0 @@ -/****************************************************************************** - ** Filename: adaptive.c - ** Purpose: Adaptive matcher. - ** Author: Dan Johnson - ** History: Fri Mar 8 10:00:21 1991, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "adaptive.h" -#include "emalloc.h" -#include "globals.h" -#include "classify.h" - -#include -#include - -using tesseract::TFile; - -/*---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -/** - * This routine adds a new adapted class to an existing - * set of adapted templates. - * - * @param Templates set of templates to add new class to - * @param Class new class to add to templates - * @param ClassId class id to associate with new class - * - * @note Globals: none - */ -void AddAdaptedClass(ADAPT_TEMPLATES Templates, - ADAPT_CLASS Class, - CLASS_ID ClassId) { - INT_CLASS IntClass; - - assert (Templates != nullptr); - assert (Class != nullptr); - assert (LegalClassId (ClassId)); - assert (UnusedClassIdIn (Templates->Templates, ClassId)); - assert (Class->NumPermConfigs == 0); - - IntClass = NewIntClass (1, 1); - AddIntClass (Templates->Templates, ClassId, IntClass); - - assert (Templates->Class[ClassId] == nullptr); - Templates->Class[ClassId] = Class; - -} /* AddAdaptedClass */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine frees all memory consumed by a temporary - * configuration. - * - * @param Config config to be freed - * - * @note Globals: none - */ -void FreeTempConfig(TEMP_CONFIG Config) { - assert (Config != nullptr); - FreeBitVector (Config->Protos); - free(Config); -} /* FreeTempConfig */ - -/*---------------------------------------------------------------------------*/ -void FreeTempProto(void *arg) { - PROTO proto = (PROTO) arg; - - free(proto); -} - -static void FreePermConfig(PERM_CONFIG Config) { - assert(Config != nullptr); - delete [] Config->Ambigs; - free(Config); -} - -/*---------------------------------------------------------------------------*/ -/** - * This operation allocates and initializes a new adapted - * class data structure and returns a ptr to it. - * - * @return Ptr to new class data structure. - * - * @note Globals: none - */ -ADAPT_CLASS NewAdaptedClass() { - ADAPT_CLASS Class; - - Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT)); - Class->NumPermConfigs = 0; - Class->MaxNumTimesSeen = 0; - Class->TempProtos = NIL_LIST; - - Class->PermProtos = NewBitVector (MAX_NUM_PROTOS); - Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS); - zero_all_bits (Class->PermProtos, WordsInVectorOfSize (MAX_NUM_PROTOS)); - zero_all_bits (Class->PermConfigs, WordsInVectorOfSize (MAX_NUM_CONFIGS)); - - for (int i = 0; i < MAX_NUM_CONFIGS; i++) - TempConfigFor (Class, i) = nullptr; - - return (Class); - -} /* NewAdaptedClass */ - - -/*-------------------------------------------------------------------------*/ -void free_adapted_class(ADAPT_CLASS adapt_class) { - for (int i = 0; i < MAX_NUM_CONFIGS; i++) { - if (ConfigIsPermanent (adapt_class, i) - && PermConfigFor (adapt_class, i) != nullptr) - FreePermConfig (PermConfigFor (adapt_class, i)); - else if (!ConfigIsPermanent (adapt_class, i) - && TempConfigFor (adapt_class, i) != nullptr) - FreeTempConfig (TempConfigFor (adapt_class, i)); - } - FreeBitVector (adapt_class->PermProtos); - FreeBitVector (adapt_class->PermConfigs); - destroy_nodes (adapt_class->TempProtos, FreeTempProto); - Efree(adapt_class); -} - - -/*---------------------------------------------------------------------------*/ -namespace tesseract { -/** - * Allocates memory for adapted tempates. - * each char in unicharset to the newly created templates - * - * @param InitFromUnicharset if true, add an empty class for - * @return Ptr to new adapted templates. - * - * @note Globals: none - */ -ADAPT_TEMPLATES Classify::NewAdaptedTemplates(bool InitFromUnicharset) { - ADAPT_TEMPLATES Templates; - - Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT)); - - Templates->Templates = NewIntTemplates (); - Templates->NumPermClasses = 0; - Templates->NumNonEmptyClasses = 0; - - /* Insert an empty class for each unichar id in unicharset */ - for (int i = 0; i < MAX_NUM_CLASSES; i++) { - Templates->Class[i] = nullptr; - if (InitFromUnicharset && i < unicharset.size()) { - AddAdaptedClass(Templates, NewAdaptedClass(), i); - } - } - - return (Templates); - -} /* NewAdaptedTemplates */ - -// Returns FontinfoId of the given config of the given adapted class. -int Classify::GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId) { - return (ConfigIsPermanent(Class, ConfigId) ? - PermConfigFor(Class, ConfigId)->FontinfoId : - TempConfigFor(Class, ConfigId)->FontinfoId); -} - -} // namespace tesseract - -/*----------------------------------------------------------------------------*/ -void free_adapted_templates(ADAPT_TEMPLATES templates) { - - if (templates != nullptr) { - for (int i = 0; i < (templates->Templates)->NumClasses; i++) - free_adapted_class (templates->Class[i]); - free_int_templates (templates->Templates); - Efree(templates); - } -} - - -/*---------------------------------------------------------------------------*/ -/** - * This routine allocates and returns a new temporary config. - * - * @param MaxProtoId max id of any proto in new config - * @param FontinfoId font information from pre-trained templates - * @return Ptr to new temp config. - * - * @note Globals: none - */ -TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) { - int NumProtos = MaxProtoId + 1; - - TEMP_CONFIG Config = (TEMP_CONFIG)malloc(sizeof(TEMP_CONFIG_STRUCT)); - Config->Protos = NewBitVector (NumProtos); - - Config->NumTimesSeen = 1; - Config->MaxProtoId = MaxProtoId; - Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos); - zero_all_bits (Config->Protos, Config->ProtoVectorSize); - Config->FontinfoId = FontinfoId; - - return (Config); - -} /* NewTempConfig */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine allocates and returns a new temporary proto. - * - * @return Ptr to new temporary proto. - * - * @note Globals: none - */ -TEMP_PROTO NewTempProto() { - return (TEMP_PROTO)malloc(sizeof(TEMP_PROTO_STRUCT)); -} /* NewTempProto */ - - -/*---------------------------------------------------------------------------*/ -namespace tesseract { -/** - * This routine prints a summary of the adapted templates - * in Templates to File. - * - * @param File open text file to print Templates to - * @param Templates adapted templates to print to File - * - * @note Globals: none - */ -void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { - INT_CLASS IClass; - ADAPT_CLASS AClass; - - fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n"); - fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n", - Templates->NumNonEmptyClasses, Templates->NumPermClasses); - fprintf (File, " Id NC NPC NP NPP\n"); - fprintf (File, "------------------------\n"); - - for (int i = 0; i < (Templates->Templates)->NumClasses; i++) { - IClass = Templates->Templates->Class[i]; - AClass = Templates->Class[i]; - if (!IsEmptyAdaptedClass (AClass)) { - fprintf (File, "%5d %s %3d %3d %3d %3d\n", - i, unicharset.id_to_unichar(i), - IClass->NumConfigs, AClass->NumPermConfigs, - IClass->NumProtos, - IClass->NumProtos - count (AClass->TempProtos)); - } - } - fprintf (File, "\n"); - -} /* PrintAdaptedTemplates */ -} // namespace tesseract - - -/*---------------------------------------------------------------------------*/ -/** - * Read an adapted class description from file and return - * a ptr to the adapted class. - * - * @param fp open file to read adapted class from - * @return Ptr to new adapted class. - * - * @note Globals: none - */ -ADAPT_CLASS ReadAdaptedClass(TFile *fp) { - int NumTempProtos; - int NumConfigs; - int i; - ADAPT_CLASS Class; - - /* first read high level adapted class structure */ - Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT)); - fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1); - - /* then read in the definitions of the permanent protos and configs */ - Class->PermProtos = NewBitVector (MAX_NUM_PROTOS); - Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS); - fp->FRead(Class->PermProtos, sizeof(uint32_t), - WordsInVectorOfSize(MAX_NUM_PROTOS)); - fp->FRead(Class->PermConfigs, sizeof(uint32_t), - WordsInVectorOfSize(MAX_NUM_CONFIGS)); - - /* then read in the list of temporary protos */ - fp->FRead(&NumTempProtos, sizeof(int), 1); - Class->TempProtos = NIL_LIST; - for (i = 0; i < NumTempProtos; i++) { - TEMP_PROTO TempProto = (TEMP_PROTO)malloc(sizeof(TEMP_PROTO_STRUCT)); - fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1); - Class->TempProtos = push_last (Class->TempProtos, TempProto); - } - - /* then read in the adapted configs */ - fp->FRead(&NumConfigs, sizeof(int), 1); - for (i = 0; i < NumConfigs; i++) - if (test_bit (Class->PermConfigs, i)) - Class->Config[i].Perm = ReadPermConfig(fp); - else - Class->Config[i].Temp = ReadTempConfig(fp); - - return (Class); - -} /* ReadAdaptedClass */ - - -/*---------------------------------------------------------------------------*/ -namespace tesseract { -/** - * Read a set of adapted templates from file and return - * a ptr to the templates. - * - * @param fp open text file to read adapted templates from - * @return Ptr to adapted templates read from file. - * - * @note Globals: none - */ -ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(TFile *fp) { - ADAPT_TEMPLATES Templates; - - /* first read the high level adaptive template struct */ - Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT)); - fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1); - - /* then read in the basic integer templates */ - Templates->Templates = ReadIntTemplates(fp); - - /* then read in the adaptive info for each class */ - for (int i = 0; i < (Templates->Templates)->NumClasses; i++) { - Templates->Class[i] = ReadAdaptedClass(fp); - } - return (Templates); - -} /* ReadAdaptedTemplates */ -} // namespace tesseract - - -/*---------------------------------------------------------------------------*/ -/** - * Read a permanent configuration description from file - * and return a ptr to it. - * - * @param fp open file to read permanent config from - * @return Ptr to new permanent configuration description. - * - * @note Globals: none - */ -PERM_CONFIG ReadPermConfig(TFile *fp) { - PERM_CONFIG Config = (PERM_CONFIG)malloc(sizeof(PERM_CONFIG_STRUCT)); - uint8_t NumAmbigs; - fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1); - Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1]; - fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs); - Config->Ambigs[NumAmbigs] = -1; - fp->FRead(&(Config->FontinfoId), sizeof(int), 1); - - return (Config); - -} /* ReadPermConfig */ - - -/*---------------------------------------------------------------------------*/ -/** - * Read a temporary configuration description from file - * and return a ptr to it. - * - * @param fp open file to read temporary config from - * @return Ptr to new temporary configuration description. - * - * @note Globals: none - */ -TEMP_CONFIG ReadTempConfig(TFile *fp) { - TEMP_CONFIG Config = (TEMP_CONFIG)malloc(sizeof(TEMP_CONFIG_STRUCT)); - fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1); - - Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG); - fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize); - - return (Config); - -} /* ReadTempConfig */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine writes a binary representation of Class - * to File. - * - * @param File open file to write Class to - * @param Class adapted class to write to File - * @param NumConfigs number of configs in Class - * - * @note Globals: none - */ -void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) { - int NumTempProtos; - LIST TempProtos; - int i; - - /* first write high level adapted class structure */ - fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File); - - /* then write out the definitions of the permanent protos and configs */ - fwrite(Class->PermProtos, sizeof(uint32_t), - WordsInVectorOfSize(MAX_NUM_PROTOS), File); - fwrite(Class->PermConfigs, sizeof(uint32_t), - WordsInVectorOfSize(MAX_NUM_CONFIGS), File); - - /* then write out the list of temporary protos */ - NumTempProtos = count (Class->TempProtos); - fwrite(&NumTempProtos, sizeof(int), 1, File); - TempProtos = Class->TempProtos; - iterate (TempProtos) { - void* proto = first_node(TempProtos); - fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File); - } - - /* then write out the adapted configs */ - fwrite(&NumConfigs, sizeof(int), 1, File); - for (i = 0; i < NumConfigs; i++) - if (test_bit (Class->PermConfigs, i)) - WritePermConfig (File, Class->Config[i].Perm); - else - WriteTempConfig (File, Class->Config[i].Temp); - -} /* WriteAdaptedClass */ - - -/*---------------------------------------------------------------------------*/ -namespace tesseract { -/** - * This routine saves Templates to File in a binary format. - * - * @param File open text file to write Templates to - * @param Templates set of adapted templates to write to File - * - * @note Globals: none - */ -void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { - int i; - - /* first write the high level adaptive template struct */ - fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File); - - /* then write out the basic integer templates */ - WriteIntTemplates (File, Templates->Templates, unicharset); - - /* then write out the adaptive info for each class */ - for (i = 0; i < (Templates->Templates)->NumClasses; i++) { - WriteAdaptedClass (File, Templates->Class[i], - Templates->Templates->Class[i]->NumConfigs); - } -} /* WriteAdaptedTemplates */ -} // namespace tesseract - - -/*---------------------------------------------------------------------------*/ -/** - * This routine writes a binary representation of a - * permanent configuration to File. - * - * @param File open file to write Config to - * @param Config permanent config to write to File - * - * @note Globals: none - */ -void WritePermConfig(FILE *File, PERM_CONFIG Config) { - uint8_t NumAmbigs = 0; - - assert (Config != nullptr); - while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs; - - fwrite(&NumAmbigs, sizeof(uint8_t), 1, File); - fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File); - fwrite(&(Config->FontinfoId), sizeof(int), 1, File); -} /* WritePermConfig */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine writes a binary representation of a - * temporary configuration to File. - * - * @param File open file to write Config to - * @param Config temporary config to write to File - * - * @note Globals: none - */ -void WriteTempConfig(FILE *File, TEMP_CONFIG Config) { - assert (Config != nullptr); - - fwrite(Config, sizeof (TEMP_CONFIG_STRUCT), 1, File); - fwrite(Config->Protos, sizeof (uint32_t), Config->ProtoVectorSize, File); - -} /* WriteTempConfig */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/adaptive.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/adaptive.h deleted file mode 100644 index 45840256..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/adaptive.h +++ /dev/null @@ -1,140 +0,0 @@ -/****************************************************************************** - ** Filename: adaptive.h - ** Purpose: Interface to adaptive matcher. - ** Author: Dan Johnson - ** History: Fri Mar 8 10:00:49 1991, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -#ifndef ADAPTIVE_H -#define ADAPTIVE_H - -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "oldlist.h" -#include "intproto.h" -#include - -typedef struct -{ - uint16_t ProtoId; - uint16_t dummy; - PROTO_STRUCT Proto; -} - - -TEMP_PROTO_STRUCT; -typedef TEMP_PROTO_STRUCT *TEMP_PROTO; - -typedef struct -{ - uint8_t NumTimesSeen; - uint8_t ProtoVectorSize; - PROTO_ID MaxProtoId; - BIT_VECTOR Protos; - int FontinfoId; // font information inferred from pre-trained templates -} TEMP_CONFIG_STRUCT; -typedef TEMP_CONFIG_STRUCT *TEMP_CONFIG; - -typedef struct -{ - UNICHAR_ID *Ambigs; - int FontinfoId; // font information inferred from pre-trained templates -} PERM_CONFIG_STRUCT; -typedef PERM_CONFIG_STRUCT *PERM_CONFIG; - -typedef union -{ - TEMP_CONFIG Temp; - PERM_CONFIG Perm; -} ADAPTED_CONFIG; - -typedef struct -{ - uint8_t NumPermConfigs; - uint8_t MaxNumTimesSeen; // maximum number of times any TEMP_CONFIG was seen - uint8_t dummy[2]; // (cut at matcher_min_examples_for_prototyping) - BIT_VECTOR PermProtos; - BIT_VECTOR PermConfigs; - LIST TempProtos; - ADAPTED_CONFIG Config[MAX_NUM_CONFIGS]; -} ADAPT_CLASS_STRUCT; -typedef ADAPT_CLASS_STRUCT *ADAPT_CLASS; - -typedef struct -{ - INT_TEMPLATES Templates; - int NumNonEmptyClasses; - uint8_t NumPermClasses; - uint8_t dummy[3]; - ADAPT_CLASS Class[MAX_NUM_CLASSES]; -} ADAPT_TEMPLATES_STRUCT; -typedef ADAPT_TEMPLATES_STRUCT *ADAPT_TEMPLATES; - -/*---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------*/ -#define NumNonEmptyClassesIn(Template) ((Template)->NumNonEmptyClasses) - -#define IsEmptyAdaptedClass(Class) ((Class)->NumPermConfigs == 0 && \ -(Class)->TempProtos == NIL_LIST) - -#define ConfigIsPermanent(Class,ConfigId) \ -(test_bit ((Class)->PermConfigs, ConfigId)) - -#define MakeConfigPermanent(Class,ConfigId) \ -(SET_BIT ((Class)->PermConfigs, ConfigId)) - -#define MakeProtoPermanent(Class,ProtoId) \ -(SET_BIT ((Class)->PermProtos, ProtoId)) - -#define TempConfigFor(Class,ConfigId) \ -((Class)->Config[ConfigId].Temp) - -#define PermConfigFor(Class,ConfigId) \ -((Class)->Config[ConfigId].Perm) - -#define IncreaseConfidence(TempConfig) \ -((TempConfig)->NumTimesSeen++) - -void AddAdaptedClass(ADAPT_TEMPLATES Templates, - ADAPT_CLASS Class, - CLASS_ID ClassId); - -void FreeTempProto(void *arg); - -void FreeTempConfig(TEMP_CONFIG Config); - -ADAPT_CLASS NewAdaptedClass(); - -void free_adapted_class(ADAPT_CLASS adapt_class); - -void free_adapted_templates(ADAPT_TEMPLATES templates); - -TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId); - -TEMP_PROTO NewTempProto(); - -ADAPT_CLASS ReadAdaptedClass(tesseract::TFile *File); - -PERM_CONFIG ReadPermConfig(tesseract::TFile *File); - -TEMP_CONFIG ReadTempConfig(tesseract::TFile *File); - -void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs); - -void WritePermConfig(FILE *File, PERM_CONFIG Config); - -void WriteTempConfig(FILE *File, TEMP_CONFIG Config); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/adaptmatch.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/adaptmatch.cpp deleted file mode 100644 index ba7a922f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/adaptmatch.cpp +++ /dev/null @@ -1,2305 +0,0 @@ -/****************************************************************************** - ** Filename: adaptmatch.cpp - ** Purpose: High level adaptive matcher. - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -/*----------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include // for max, min -#include // for assert -#include // for fabs -#include // for INT32_MAX, UINT8_MAX -#include // for fflush, fclose, fopen, stdout, FILE -#include // for malloc -#include // for strstr, memset, strcmp -#include "adaptive.h" // for ADAPT_CLASS, free_adapted_templates -#include "ambigs.h" // for UnicharIdVector, UnicharAmbigs -#include "bitvec.h" // for FreeBitVector, NewBitVector, BIT_VECTOR -#include "blobs.h" // for TBLOB, TWERD -#include "callcpp.h" // for cprintf, window_wait -#include "classify.h" // for Classify, CST_FRAGMENT, CST_WHOLE -#include "dict.h" // for Dict -#include "errcode.h" // for ASSERT_HOST -#include "featdefs.h" // for CharNormDesc -#include "float2int.h" // for BASELINE_Y_SHIFT -#include "fontinfo.h" // for ScoredFont, FontSet -#include "genericvector.h" // for GenericVector -#include "helpers.h" // for IntCastRounded, ClipToRange -#include "host.h" // for FALSE, TRUE -#include "intfx.h" // for BlobToTrainingSample, INT_FX_RESULT_S... -#include "intmatcher.h" // for CP_RESULT_STRUCT, IntegerMatcher -#include "intproto.h" // for INT_FEATURE_STRUCT, (anonymous), Clas... -#include "matchdefs.h" // for CLASS_ID, FEATURE_ID, PROTO_ID, NO_PROTO -#include "mfoutline.h" // for baseline, character, MF_SCALE_FACTOR -#include "normalis.h" // for DENORM, kBlnBaselineOffset, kBlnXHeight -#include "normfeat.h" // for ActualOutlineLength, CharNormLength -#include "ocrfeatures.h" // for FEATURE_STRUCT, FreeFeatureSet, FEATURE -#include "oldlist.h" // for push, delete_d -#include "outfeat.h" // for OutlineFeatDir, OutlineFeatLength -#include "pageres.h" // for WERD_RES -#include "params.h" // for IntParam, BoolParam, DoubleParam, Str... -#include "picofeat.h" // for PicoFeatDir, PicoFeatX, PicoFeatY -#include "protos.h" // for PROTO_STRUCT, FillABC, PROTO -#include "ratngs.h" // for BLOB_CHOICE_IT, BLOB_CHOICE_LIST, BLO... -#include "rect.h" // for TBOX -#include "scrollview.h" // for ScrollView, ScrollView::BROWN, Scroll... -#include "seam.h" // for SEAM -#include "serialis.h" // for TFile -#include "shapeclassifier.h" // for ShapeClassifier -#include "shapetable.h" // for UnicharRating, ShapeTable, Shape, Uni... -#include "strngs.h" // for STRING -#include "tessclassifier.h" // for TessClassifier -#include "tessdatamanager.h" // for TessdataManager, TESSDATA_INTTEMP -#include "tprintf.h" // for tprintf -#include "trainingsample.h" // for TrainingSample -#include "unichar.h" // for UNICHAR_ID, INVALID_UNICHAR_ID -#include "unicharset.h" // for UNICHARSET, CHAR_FRAGMENT, UNICHAR_SPACE -#include "unicity_table.h" // for UnicityTable - -#define ADAPT_TEMPLATE_SUFFIX ".a" - -#define MAX_MATCHES 10 -#define UNLIKELY_NUM_FEAT 200 -#define NO_DEBUG 0 -#define MAX_ADAPTABLE_WERD_SIZE 40 - -#define ADAPTABLE_WERD_ADJUSTMENT (0.05) - -#define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT) - -#define WORST_POSSIBLE_RATING (0.0f) - -using tesseract::UnicharRating; -using tesseract::ScoredFont; - -struct ADAPT_RESULTS { - int32_t BlobLength; - bool HasNonfragment; - UNICHAR_ID best_unichar_id; - int best_match_index; - float best_rating; - GenericVector match; - GenericVector CPResults; - - /// Initializes data members to the default values. Sets the initial - /// rating of each class to be the worst possible rating (1.0). - inline void Initialize() { - BlobLength = INT32_MAX; - HasNonfragment = false; - ComputeBest(); - } - // Computes best_unichar_id, best_match_index and best_rating. - void ComputeBest() { - best_unichar_id = INVALID_UNICHAR_ID; - best_match_index = -1; - best_rating = WORST_POSSIBLE_RATING; - for (int i = 0; i < match.size(); ++i) { - if (match[i].rating > best_rating) { - best_rating = match[i].rating; - best_unichar_id = match[i].unichar_id; - best_match_index = i; - } - } - } -}; - -struct PROTO_KEY { - ADAPT_TEMPLATES Templates; - CLASS_ID ClassId; - int ConfigId; -}; - -/*----------------------------------------------------------------------------- - Private Macros ------------------------------------------------------------------------------*/ -inline bool MarginalMatch(float confidence, float matcher_great_threshold) { - return (1.0f - confidence) > matcher_great_threshold; -} - -/*----------------------------------------------------------------------------- - Private Function Prototypes ------------------------------------------------------------------------------*/ -// Returns the index of the given id in results, if present, or the size of the -// vector (index it will go at) if not present. -static int FindScoredUnichar(UNICHAR_ID id, const ADAPT_RESULTS& results) { - for (int i = 0; i < results.match.size(); i++) { - if (results.match[i].unichar_id == id) - return i; - } - return results.match.size(); -} - -// Returns the current rating for a unichar id if we have rated it, defaulting -// to WORST_POSSIBLE_RATING. -static float ScoredUnichar(UNICHAR_ID id, const ADAPT_RESULTS& results) { - int index = FindScoredUnichar(id, results); - if (index >= results.match.size()) return WORST_POSSIBLE_RATING; - return results.match[index].rating; -} - -void InitMatcherRatings(float *Rating); - -int MakeTempProtoPerm(void *item1, void *item2); - -void SetAdaptiveThreshold(float Threshold); - - -/*----------------------------------------------------------------------------- - Public Code ------------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -namespace tesseract { -/** - * This routine calls the adaptive matcher - * which returns (in an array) the class id of each - * class matched. - * - * It also returns the number of classes matched. - * For each class matched it places the best rating - * found for that class into the Ratings array. - * - * Bad matches are then removed so that they don't - * need to be sorted. The remaining good matches are - * then sorted and converted to choices. - * - * This routine also performs some simple speckle - * filtering. - * - * @param Blob blob to be classified - * @param[out] Choices List of choices found by adaptive matcher. - * filled on return with the choices found by the - * class pruner and the ratings therefrom. Also - * contains the detailed results of the integer matcher. - * - */ -void Classify::AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices) { - assert(Choices != nullptr); - ADAPT_RESULTS *Results = new ADAPT_RESULTS; - Results->Initialize(); - - ASSERT_HOST(AdaptedTemplates != nullptr); - - DoAdaptiveMatch(Blob, Results); - - RemoveBadMatches(Results); - Results->match.sort(&UnicharRating::SortDescendingRating); - RemoveExtraPuncs(Results); - Results->ComputeBest(); - ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results, - Choices); - - // TODO(rays) Move to before ConvertMatchesToChoices! - if (LargeSpeckle(*Blob) || Choices->length() == 0) - AddLargeSpeckleTo(Results->BlobLength, Choices); - - if (matcher_debug_level >= 1) { - tprintf("AD Matches = "); - PrintAdaptiveMatchResults(*Results); - } - -#ifndef GRAPHICS_DISABLED - if (classify_enable_adaptive_debugger) - DebugAdaptiveClassifier(Blob, Results); -#endif - - delete Results; -} /* AdaptiveClassifier */ - -// If *win is nullptr, sets it to a new ScrollView() object with title msg. -// Clears the window and draws baselines. -void Classify::RefreshDebugWindow(ScrollView **win, const char *msg, - int y_offset, const TBOX &wbox) { - #ifndef GRAPHICS_DISABLED - const int kSampleSpaceWidth = 500; - if (*win == nullptr) { - *win = new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200, - kSampleSpaceWidth * 2, 200, true); - } - (*win)->Clear(); - (*win)->Pen(64, 64, 64); - (*win)->Line(-kSampleSpaceWidth, kBlnBaselineOffset, - kSampleSpaceWidth, kBlnBaselineOffset); - (*win)->Line(-kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset, - kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset); - (*win)->ZoomToRectangle(wbox.left(), wbox.top(), - wbox.right(), wbox.bottom()); - #endif // GRAPHICS_DISABLED -} - -// Learns the given word using its chopped_word, seam_array, denorm, -// box_word, best_state, and correct_text to learn both correctly and -// incorrectly segmented blobs. If fontname is not nullptr, then LearnBlob -// is called and the data will be saved in an internal buffer. -// Otherwise AdaptToBlob is called for adaption within a document. -void Classify::LearnWord(const char* fontname, WERD_RES* word) { - int word_len = word->correct_text.size(); - if (word_len == 0) return; - - float* thresholds = nullptr; - if (fontname == nullptr) { - // Adaption mode. - if (!EnableLearning || word->best_choice == nullptr) - return; // Can't or won't adapt. - - if (classify_learning_debug_level >= 1) - tprintf("\n\nAdapting to word = %s\n", - word->best_choice->debug_string().string()); - thresholds = new float[word_len]; - word->ComputeAdaptionThresholds(certainty_scale, - matcher_perfect_threshold, - matcher_good_threshold, - matcher_rating_margin, thresholds); - } - int start_blob = 0; - - #ifndef GRAPHICS_DISABLED - if (classify_debug_character_fragments) { - if (learn_fragmented_word_debug_win_ != nullptr) { - window_wait(learn_fragmented_word_debug_win_); - } - RefreshDebugWindow(&learn_fragments_debug_win_, "LearnPieces", 400, - word->chopped_word->bounding_box()); - RefreshDebugWindow(&learn_fragmented_word_debug_win_, "LearnWord", 200, - word->chopped_word->bounding_box()); - word->chopped_word->plot(learn_fragmented_word_debug_win_); - ScrollView::Update(); - } - #endif // GRAPHICS_DISABLED - - for (int ch = 0; ch < word_len; ++ch) { - if (classify_debug_character_fragments) { - tprintf("\nLearning %s\n", word->correct_text[ch].string()); - } - if (word->correct_text[ch].length() > 0) { - float threshold = thresholds != nullptr ? thresholds[ch] : 0.0f; - - LearnPieces(fontname, start_blob, word->best_state[ch], threshold, - CST_WHOLE, word->correct_text[ch].string(), word); - - if (word->best_state[ch] > 1 && !disable_character_fragments) { - // Check that the character breaks into meaningful fragments - // that each match a whole character with at least - // classify_character_fragments_garbage_certainty_threshold - bool garbage = false; - int frag; - for (frag = 0; frag < word->best_state[ch]; ++frag) { - TBLOB* frag_blob = word->chopped_word->blobs[start_blob + frag]; - if (classify_character_fragments_garbage_certainty_threshold < 0) { - garbage |= LooksLikeGarbage(frag_blob); - } - } - // Learn the fragments. - if (!garbage) { - bool pieces_all_natural = word->PiecesAllNatural(start_blob, - word->best_state[ch]); - if (pieces_all_natural || !prioritize_division) { - for (frag = 0; frag < word->best_state[ch]; ++frag) { - GenericVector tokens; - word->correct_text[ch].split(' ', &tokens); - - tokens[0] = CHAR_FRAGMENT::to_string( - tokens[0].string(), frag, word->best_state[ch], - pieces_all_natural); - - STRING full_string; - for (int i = 0; i < tokens.size(); i++) { - full_string += tokens[i]; - if (i != tokens.size() - 1) - full_string += ' '; - } - LearnPieces(fontname, start_blob + frag, 1, threshold, - CST_FRAGMENT, full_string.string(), word); - } - } - } - } - - // TODO(rays): re-enable this part of the code when we switch to the - // new classifier that needs to see examples of garbage. - /* - if (word->best_state[ch] > 1) { - // If the next blob is good, make junk with the rightmost fragment. - if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) { - LearnPieces(fontname, start_blob + word->best_state[ch] - 1, - word->best_state[ch + 1] + 1, - threshold, CST_IMPROPER, INVALID_UNICHAR, word); - } - // If the previous blob is good, make junk with the leftmost fragment. - if (ch > 0 && word->correct_text[ch - 1].length() > 0) { - LearnPieces(fontname, start_blob - word->best_state[ch - 1], - word->best_state[ch - 1] + 1, - threshold, CST_IMPROPER, INVALID_UNICHAR, word); - } - } - // If the next blob is good, make a join with it. - if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) { - STRING joined_text = word->correct_text[ch]; - joined_text += word->correct_text[ch + 1]; - LearnPieces(fontname, start_blob, - word->best_state[ch] + word->best_state[ch + 1], - threshold, CST_NGRAM, joined_text.string(), word); - } - */ - } - start_blob += word->best_state[ch]; - } - delete [] thresholds; -} // LearnWord. - -// Builds a blob of length fragments, from the word, starting at start, -// and then learns it, as having the given correct_text. -// If fontname is not nullptr, then LearnBlob is called and the data will be -// saved in an internal buffer for static training. -// Otherwise AdaptToBlob is called for adaption within a document. -// threshold is a magic number required by AdaptToChar and generated by -// ComputeAdaptionThresholds. -// Although it can be partly inferred from the string, segmentation is -// provided to explicitly clarify the character segmentation. -void Classify::LearnPieces(const char* fontname, int start, int length, - float threshold, CharSegmentationType segmentation, - const char* correct_text, WERD_RES* word) { - // TODO(daria) Remove/modify this if/when we want - // to train and/or adapt to n-grams. - if (segmentation != CST_WHOLE && - (segmentation != CST_FRAGMENT || disable_character_fragments)) - return; - - if (length > 1) { - SEAM::JoinPieces(word->seam_array, word->chopped_word->blobs, start, - start + length - 1); - } - TBLOB* blob = word->chopped_word->blobs[start]; - // Rotate the blob if needed for classification. - TBLOB* rotated_blob = blob->ClassifyNormalizeIfNeeded(); - if (rotated_blob == nullptr) - rotated_blob = blob; - - #ifndef GRAPHICS_DISABLED - // Draw debug windows showing the blob that is being learned if needed. - if (strcmp(classify_learn_debug_str.string(), correct_text) == 0) { - RefreshDebugWindow(&learn_debug_win_, "LearnPieces", 600, - word->chopped_word->bounding_box()); - rotated_blob->plot(learn_debug_win_, ScrollView::GREEN, ScrollView::BROWN); - learn_debug_win_->Update(); - window_wait(learn_debug_win_); - } - if (classify_debug_character_fragments && segmentation == CST_FRAGMENT) { - ASSERT_HOST(learn_fragments_debug_win_ != nullptr); // set up in LearnWord - blob->plot(learn_fragments_debug_win_, - ScrollView::BLUE, ScrollView::BROWN); - learn_fragments_debug_win_->Update(); - } - #endif // GRAPHICS_DISABLED - - if (fontname != nullptr) { - classify_norm_method.set_value(character); // force char norm spc 30/11/93 - tess_bn_matching.set_value(false); // turn it off - tess_cn_matching.set_value(false); - DENORM bl_denorm, cn_denorm; - INT_FX_RESULT_STRUCT fx_info; - SetupBLCNDenorms(*rotated_blob, classify_nonlinear_norm, - &bl_denorm, &cn_denorm, &fx_info); - LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text); - } else if (unicharset.contains_unichar(correct_text)) { - UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text); - int font_id = word->fontinfo != nullptr - ? fontinfo_table_.get_id(*word->fontinfo) - : 0; - if (classify_learning_debug_level >= 1) - tprintf("Adapting to char = %s, thr= %g font_id= %d\n", - unicharset.id_to_unichar(class_id), threshold, font_id); - // If filename is not nullptr we are doing recognition - // (as opposed to training), so we must have already set word fonts. - AdaptToChar(rotated_blob, class_id, font_id, threshold, AdaptedTemplates); - if (BackupAdaptedTemplates != nullptr) { - // Adapt the backup templates too. They will be used if the primary gets - // too full. - AdaptToChar(rotated_blob, class_id, font_id, threshold, - BackupAdaptedTemplates); - } - } else if (classify_debug_level >= 1) { - tprintf("Can't adapt to %s not in unicharset\n", correct_text); - } - if (rotated_blob != blob) { - delete rotated_blob; - } - - SEAM::BreakPieces(word->seam_array, word->chopped_word->blobs, start, - start + length - 1); -} // LearnPieces. - -/*---------------------------------------------------------------------------*/ -/** - * This routine performs cleanup operations - * on the adaptive classifier. It should be called - * before the program is terminated. Its main function - * is to save the adapted templates to a file. - * - * Globals: - * - #AdaptedTemplates current set of adapted templates - * - #classify_save_adapted_templates TRUE if templates should be saved - * - #classify_enable_adaptive_matcher TRUE if adaptive matcher is enabled - */ -void Classify::EndAdaptiveClassifier() { - STRING Filename; - FILE *File; - - if (AdaptedTemplates != nullptr && - classify_enable_adaptive_matcher && classify_save_adapted_templates) { - Filename = imagefile + ADAPT_TEMPLATE_SUFFIX; - File = fopen (Filename.string(), "wb"); - if (File == nullptr) - cprintf ("Unable to save adapted templates to %s!\n", Filename.string()); - else { - cprintf ("\nSaving adapted templates to %s ...", Filename.string()); - fflush(stdout); - WriteAdaptedTemplates(File, AdaptedTemplates); - cprintf ("\n"); - fclose(File); - } - } - - if (AdaptedTemplates != nullptr) { - free_adapted_templates(AdaptedTemplates); - AdaptedTemplates = nullptr; - } - if (BackupAdaptedTemplates != nullptr) { - free_adapted_templates(BackupAdaptedTemplates); - BackupAdaptedTemplates = nullptr; - } - - if (PreTrainedTemplates != nullptr) { - free_int_templates(PreTrainedTemplates); - PreTrainedTemplates = nullptr; - } - getDict().EndDangerousAmbigs(); - FreeNormProtos(); - if (AllProtosOn != nullptr) { - FreeBitVector(AllProtosOn); - FreeBitVector(AllConfigsOn); - FreeBitVector(AllConfigsOff); - FreeBitVector(TempProtoMask); - AllProtosOn = nullptr; - AllConfigsOn = nullptr; - AllConfigsOff = nullptr; - TempProtoMask = nullptr; - } - delete shape_table_; - shape_table_ = nullptr; - delete static_classifier_; - static_classifier_ = nullptr; -} /* EndAdaptiveClassifier */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine reads in the training - * information needed by the adaptive classifier - * and saves it into global variables. - * Parameters: - * load_pre_trained_templates Indicates whether the pre-trained - * templates (inttemp, normproto and pffmtable components) - * should be loaded. Should only be set to true if the - * necessary classifier components are present in the - * [lang].traineddata file. - * Globals: - * BuiltInTemplatesFile file to get built-in temps from - * BuiltInCutoffsFile file to get avg. feat per class from - * classify_use_pre_adapted_templates - * enables use of pre-adapted templates - */ -void Classify::InitAdaptiveClassifier(TessdataManager* mgr) { - if (!classify_enable_adaptive_matcher) - return; - if (AllProtosOn != nullptr) - EndAdaptiveClassifier(); // Don't leak with multiple inits. - - // If there is no language_data_path_prefix, the classifier will be - // adaptive only. - if (language_data_path_prefix.length() > 0 && mgr != nullptr) { - TFile fp; - ASSERT_HOST(mgr->GetComponent(TESSDATA_INTTEMP, &fp)); - PreTrainedTemplates = ReadIntTemplates(&fp); - - if (mgr->GetComponent(TESSDATA_SHAPE_TABLE, &fp)) { - shape_table_ = new ShapeTable(unicharset); - if (!shape_table_->DeSerialize(&fp)) { - tprintf("Error loading shape table!\n"); - delete shape_table_; - shape_table_ = nullptr; - } - } - - ASSERT_HOST(mgr->GetComponent(TESSDATA_PFFMTABLE, &fp)); - ReadNewCutoffs(&fp, CharNormCutoffs); - - ASSERT_HOST(mgr->GetComponent(TESSDATA_NORMPROTO, &fp)); - NormProtos = ReadNormProtos(&fp); - static_classifier_ = new TessClassifier(false, this); - } - - InitIntegerFX(); - - AllProtosOn = NewBitVector(MAX_NUM_PROTOS); - AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS); - AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS); - TempProtoMask = NewBitVector(MAX_NUM_PROTOS); - set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS)); - set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS)); - zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS)); - - for (int i = 0; i < MAX_NUM_CLASSES; i++) { - BaselineCutoffs[i] = 0; - } - - if (classify_use_pre_adapted_templates) { - TFile fp; - STRING Filename; - - Filename = imagefile; - Filename += ADAPT_TEMPLATE_SUFFIX; - if (!fp.Open(Filename.string(), nullptr)) { - AdaptedTemplates = NewAdaptedTemplates(true); - } else { - cprintf("\nReading pre-adapted templates from %s ...\n", - Filename.string()); - fflush(stdout); - AdaptedTemplates = ReadAdaptedTemplates(&fp); - cprintf("\n"); - PrintAdaptedTemplates(stdout, AdaptedTemplates); - - for (int i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) { - BaselineCutoffs[i] = CharNormCutoffs[i]; - } - } - } else { - if (AdaptedTemplates != nullptr) - free_adapted_templates(AdaptedTemplates); - AdaptedTemplates = NewAdaptedTemplates(true); - } -} /* InitAdaptiveClassifier */ - -void Classify::ResetAdaptiveClassifierInternal() { - if (classify_learning_debug_level > 0) { - tprintf("Resetting adaptive classifier (NumAdaptationsFailed=%d)\n", - NumAdaptationsFailed); - } - free_adapted_templates(AdaptedTemplates); - AdaptedTemplates = NewAdaptedTemplates(true); - if (BackupAdaptedTemplates != nullptr) - free_adapted_templates(BackupAdaptedTemplates); - BackupAdaptedTemplates = nullptr; - NumAdaptationsFailed = 0; -} - -// If there are backup adapted templates, switches to those, otherwise resets -// the main adaptive classifier (because it is full.) -void Classify::SwitchAdaptiveClassifier() { - if (BackupAdaptedTemplates == nullptr) { - ResetAdaptiveClassifierInternal(); - return; - } - if (classify_learning_debug_level > 0) { - tprintf("Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n", - NumAdaptationsFailed); - } - free_adapted_templates(AdaptedTemplates); - AdaptedTemplates = BackupAdaptedTemplates; - BackupAdaptedTemplates = nullptr; - NumAdaptationsFailed = 0; -} - -// Resets the backup adaptive classifier to empty. -void Classify::StartBackupAdaptiveClassifier() { - if (BackupAdaptedTemplates != nullptr) - free_adapted_templates(BackupAdaptedTemplates); - BackupAdaptedTemplates = NewAdaptedTemplates(true); -} - -/*---------------------------------------------------------------------------*/ -/** - * This routine prepares the adaptive - * matcher for the start - * of the first pass. Learning is enabled (unless it - * is disabled for the whole program). - * - * @note this is somewhat redundant, it simply says that if learning is - * enabled then it will remain enabled on the first pass. If it is - * disabled, then it will remain disabled. This is only put here to - * make it very clear that learning is controlled directly by the global - * setting of EnableLearning. - * - * Globals: - * - #EnableLearning - * set to TRUE by this routine - */ -void Classify::SettupPass1() { - EnableLearning = classify_enable_learning; - - getDict().SettupStopperPass1(); - -} /* SettupPass1 */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine prepares the adaptive - * matcher for the start of the second pass. Further - * learning is disabled. - * - * Globals: - * - #EnableLearning set to FALSE by this routine - */ -void Classify::SettupPass2() { - EnableLearning = FALSE; - getDict().SettupStopperPass2(); - -} /* SettupPass2 */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine creates a new adapted - * class and uses Blob as the model for the first - * config in that class. - * - * @param Blob blob to model new class after - * @param ClassId id of the class to be initialized - * @param FontinfoId font information inferred from pre-trained templates - * @param Class adapted class to be initialized - * @param Templates adapted templates to add new class to - * - * Globals: - * - #AllProtosOn dummy mask with all 1's - * - BaselineCutoffs kludge needed to get cutoffs - * - #PreTrainedTemplates kludge needed to get cutoffs - */ -void Classify::InitAdaptedClass(TBLOB *Blob, - CLASS_ID ClassId, - int FontinfoId, - ADAPT_CLASS Class, - ADAPT_TEMPLATES Templates) { - FEATURE_SET Features; - int Fid, Pid; - FEATURE Feature; - int NumFeatures; - TEMP_PROTO TempProto; - PROTO Proto; - INT_CLASS IClass; - TEMP_CONFIG Config; - - classify_norm_method.set_value(baseline); - Features = ExtractOutlineFeatures(Blob); - NumFeatures = Features->NumFeatures; - if (NumFeatures > UNLIKELY_NUM_FEAT || NumFeatures <= 0) { - FreeFeatureSet(Features); - return; - } - - Config = NewTempConfig(NumFeatures - 1, FontinfoId); - TempConfigFor(Class, 0) = Config; - - /* this is a kludge to construct cutoffs for adapted templates */ - if (Templates == AdaptedTemplates) - BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId]; - - IClass = ClassForClassId (Templates->Templates, ClassId); - - for (Fid = 0; Fid < Features->NumFeatures; Fid++) { - Pid = AddIntProto (IClass); - assert (Pid != NO_PROTO); - - Feature = Features->Features[Fid]; - TempProto = NewTempProto (); - Proto = &(TempProto->Proto); - - /* compute proto params - NOTE that Y_DIM_OFFSET must be used because - ConvertProto assumes that the Y dimension varies from -0.5 to 0.5 - instead of the -0.25 to 0.75 used in baseline normalization */ - Proto->Angle = Feature->Params[OutlineFeatDir]; - Proto->X = Feature->Params[OutlineFeatX]; - Proto->Y = Feature->Params[OutlineFeatY] - Y_DIM_OFFSET; - Proto->Length = Feature->Params[OutlineFeatLength]; - FillABC(Proto); - - TempProto->ProtoId = Pid; - SET_BIT (Config->Protos, Pid); - - ConvertProto(Proto, Pid, IClass); - AddProtoToProtoPruner(Proto, Pid, IClass, - classify_learning_debug_level >= 2); - - Class->TempProtos = push (Class->TempProtos, TempProto); - } - FreeFeatureSet(Features); - - AddIntConfig(IClass); - ConvertConfig (AllProtosOn, 0, IClass); - - if (classify_learning_debug_level >= 1) { - tprintf("Added new class '%s' with class id %d and %d protos.\n", - unicharset.id_to_unichar(ClassId), ClassId, NumFeatures); - if (classify_learning_debug_level > 1) - DisplayAdaptedChar(Blob, IClass); - } - - if (IsEmptyAdaptedClass(Class)) - (Templates->NumNonEmptyClasses)++; -} /* InitAdaptedClass */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine sets up the feature - * extractor to extract baseline normalized - * pico-features. - * - * The extracted pico-features are converted - * to integer form and placed in IntFeatures. The - * original floating-pt. features are returned in - * FloatFeatures. - * - * Globals: none - * @param Blob blob to extract features from - * @param[out] IntFeatures array to fill with integer features - * @param[out] FloatFeatures place to return actual floating-pt features - * - * @return Number of pico-features returned (0 if - * an error occurred) - */ -int Classify::GetAdaptiveFeatures(TBLOB *Blob, - INT_FEATURE_ARRAY IntFeatures, - FEATURE_SET *FloatFeatures) { - FEATURE_SET Features; - int NumFeatures; - - classify_norm_method.set_value(baseline); - Features = ExtractPicoFeatures(Blob); - - NumFeatures = Features->NumFeatures; - if (NumFeatures == 0 || NumFeatures > UNLIKELY_NUM_FEAT) { - FreeFeatureSet(Features); - return 0; - } - - ComputeIntFeatures(Features, IntFeatures); - *FloatFeatures = Features; - - return NumFeatures; -} /* GetAdaptiveFeatures */ - - -/*----------------------------------------------------------------------------- - Private Code ------------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -/** - * Return TRUE if the specified word is - * acceptable for adaptation. - * - * Globals: none - * - * @param word current word - * - * @return true or false - */ -bool Classify::AdaptableWord(WERD_RES* word) { - if (word->best_choice == nullptr) return false; - int BestChoiceLength = word->best_choice->length(); - float adaptable_score = - getDict().segment_penalty_dict_case_ok + ADAPTABLE_WERD_ADJUSTMENT; - return // rules that apply in general - simplest to compute first - BestChoiceLength > 0 && - BestChoiceLength == word->rebuild_word->NumBlobs() && - BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE && - // This basically ensures that the word is at least a dictionary match - // (freq word, user word, system dawg word, etc). - // Since all the other adjustments will make adjust factor higher - // than higher than adaptable_score=1.1+0.05=1.15 - // Since these are other flags that ensure that the word is dict word, - // this check could be at times redundant. - word->best_choice->adjust_factor() <= adaptable_score && - // Make sure that alternative choices are not dictionary words. - word->AlternativeChoiceAdjustmentsWorseThan(adaptable_score); -} - -/*---------------------------------------------------------------------------*/ -/** - * @param Blob blob to add to templates for ClassId - * @param ClassId class to add blob to - * @param FontinfoId font information from pre-trained templates - * @param Threshold minimum match rating to existing template - * @param adaptive_templates current set of adapted templates - * - * Globals: - * - AllProtosOn dummy mask to match against all protos - * - AllConfigsOn dummy mask to match against all configs - * - * @return none - */ -void Classify::AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId, - float Threshold, - ADAPT_TEMPLATES adaptive_templates) { - int NumFeatures; - INT_FEATURE_ARRAY IntFeatures; - UnicharRating int_result; - INT_CLASS IClass; - ADAPT_CLASS Class; - TEMP_CONFIG TempConfig; - FEATURE_SET FloatFeatures; - int NewTempConfigId; - - if (!LegalClassId (ClassId)) - return; - - int_result.unichar_id = ClassId; - Class = adaptive_templates->Class[ClassId]; - assert(Class != nullptr); - if (IsEmptyAdaptedClass(Class)) { - InitAdaptedClass(Blob, ClassId, FontinfoId, Class, adaptive_templates); - } else { - IClass = ClassForClassId(adaptive_templates->Templates, ClassId); - - NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures); - if (NumFeatures <= 0) { - return; // Features already freed by GetAdaptiveFeatures. - } - - // Only match configs with the matching font. - BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS); - for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) { - if (GetFontinfoId(Class, cfg) == FontinfoId) { - SET_BIT(MatchingFontConfigs, cfg); - } else { - reset_bit(MatchingFontConfigs, cfg); - } - } - im_.Match(IClass, AllProtosOn, MatchingFontConfigs, - NumFeatures, IntFeatures, - &int_result, classify_adapt_feature_threshold, - NO_DEBUG, matcher_debug_separate_windows); - FreeBitVector(MatchingFontConfigs); - - SetAdaptiveThreshold(Threshold); - - if (1.0f - int_result.rating <= Threshold) { - if (ConfigIsPermanent(Class, int_result.config)) { - if (classify_learning_debug_level >= 1) - tprintf("Found good match to perm config %d = %4.1f%%.\n", - int_result.config, int_result.rating * 100.0); - FreeFeatureSet(FloatFeatures); - return; - } - - TempConfig = TempConfigFor(Class, int_result.config); - IncreaseConfidence(TempConfig); - if (TempConfig->NumTimesSeen > Class->MaxNumTimesSeen) { - Class->MaxNumTimesSeen = TempConfig->NumTimesSeen; - } - if (classify_learning_debug_level >= 1) - tprintf("Increasing reliability of temp config %d to %d.\n", - int_result.config, TempConfig->NumTimesSeen); - - if (TempConfigReliable(ClassId, TempConfig)) { - MakePermanent(adaptive_templates, ClassId, int_result.config, Blob); - UpdateAmbigsGroup(ClassId, Blob); - } - } else { - if (classify_learning_debug_level >= 1) { - tprintf("Found poor match to temp config %d = %4.1f%%.\n", - int_result.config, int_result.rating * 100.0); - if (classify_learning_debug_level > 2) - DisplayAdaptedChar(Blob, IClass); - } - NewTempConfigId = - MakeNewTemporaryConfig(adaptive_templates, ClassId, FontinfoId, - NumFeatures, IntFeatures, FloatFeatures); - if (NewTempConfigId >= 0 && - TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) { - MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob); - UpdateAmbigsGroup(ClassId, Blob); - } - -#ifndef GRAPHICS_DISABLED - if (classify_learning_debug_level > 1) { - DisplayAdaptedChar(Blob, IClass); - } -#endif - } - FreeFeatureSet(FloatFeatures); - } -} /* AdaptToChar */ - -void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) { -#ifndef GRAPHICS_DISABLED - INT_FX_RESULT_STRUCT fx_info; - GenericVector bl_features; - TrainingSample* sample = - BlobToTrainingSample(*blob, classify_nonlinear_norm, &fx_info, - &bl_features); - if (sample == nullptr) return; - - UnicharRating int_result; - im_.Match(int_class, AllProtosOn, AllConfigsOn, - bl_features.size(), &bl_features[0], - &int_result, classify_adapt_feature_threshold, - NO_DEBUG, matcher_debug_separate_windows); - tprintf("Best match to temp config %d = %4.1f%%.\n", - int_result.config, int_result.rating * 100.0); - if (classify_learning_debug_level >= 2) { - uint32_t ConfigMask; - ConfigMask = 1 << int_result.config; - ShowMatchDisplay(); - im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask, - bl_features.size(), &bl_features[0], - &int_result, classify_adapt_feature_threshold, - 6 | 0x19, matcher_debug_separate_windows); - UpdateMatchDisplay(); - } - - delete sample; -#endif -} - -/** - * This routine adds the result of a classification into - * Results. If the new rating is much worse than the current - * best rating, it is not entered into results because it - * would end up being stripped later anyway. If the new rating - * is better than the old rating for the class, it replaces the - * old rating. If this is the first rating for the class, the - * class is added to the list of matched classes in Results. - * If the new rating is better than the best so far, it - * becomes the best so far. - * - * Globals: - * - #matcher_bad_match_pad defines limits of an acceptable match - * - * @param new_result new result to add - * @param[out] results results to add new result to - */ -void Classify::AddNewResult(const UnicharRating& new_result, - ADAPT_RESULTS *results) { - int old_match = FindScoredUnichar(new_result.unichar_id, *results); - - if (new_result.rating + matcher_bad_match_pad < results->best_rating || - (old_match < results->match.size() && - new_result.rating <= results->match[old_match].rating)) - return; // New one not good enough. - - if (!unicharset.get_fragment(new_result.unichar_id)) - results->HasNonfragment = true; - - if (old_match < results->match.size()) { - results->match[old_match].rating = new_result.rating; - } else { - results->match.push_back(new_result); - } - - if (new_result.rating > results->best_rating && - // Ensure that fragments do not affect best rating, class and config. - // This is needed so that at least one non-fragmented character is - // always present in the results. - // TODO(daria): verify that this helps accuracy and does not - // hurt performance. - !unicharset.get_fragment(new_result.unichar_id)) { - results->best_match_index = old_match; - results->best_rating = new_result.rating; - results->best_unichar_id = new_result.unichar_id; - } -} /* AddNewResult */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine is identical to CharNormClassifier() - * except that it does no class pruning. It simply matches - * the unknown blob against the classes listed in - * Ambiguities. - * - * Globals: - * - #AllProtosOn mask that enables all protos - * - #AllConfigsOn mask that enables all configs - * - * @param blob blob to be classified - * @param templates built-in templates to classify against - * @param classes adapted class templates - * @param ambiguities array of unichar id's to match against - * @param[out] results place to put match results - * @param int_features - * @param fx_info - */ -void Classify::AmbigClassifier( - const GenericVector& int_features, - const INT_FX_RESULT_STRUCT& fx_info, - const TBLOB *blob, - INT_TEMPLATES templates, - ADAPT_CLASS *classes, - UNICHAR_ID *ambiguities, - ADAPT_RESULTS *results) { - if (int_features.empty()) return; - uint8_t* CharNormArray = new uint8_t[unicharset.size()]; - UnicharRating int_result; - - results->BlobLength = GetCharNormFeature(fx_info, templates, nullptr, - CharNormArray); - bool debug = matcher_debug_level >= 2 || classify_debug_level > 1; - if (debug) - tprintf("AM Matches = "); - - int top = blob->bounding_box().top(); - int bottom = blob->bounding_box().bottom(); - while (*ambiguities >= 0) { - CLASS_ID class_id = *ambiguities; - - int_result.unichar_id = class_id; - im_.Match(ClassForClassId(templates, class_id), - AllProtosOn, AllConfigsOn, - int_features.size(), &int_features[0], - &int_result, - classify_adapt_feature_threshold, NO_DEBUG, - matcher_debug_separate_windows); - - ExpandShapesAndApplyCorrections(nullptr, debug, class_id, bottom, top, 0, - results->BlobLength, - classify_integer_matcher_multiplier, - CharNormArray, &int_result, results); - ambiguities++; - } - delete [] CharNormArray; -} /* AmbigClassifier */ - -/*---------------------------------------------------------------------------*/ -/// Factored-out calls to IntegerMatcher based on class pruner results. -/// Returns integer matcher results inside CLASS_PRUNER_RESULTS structure. -void Classify::MasterMatcher(INT_TEMPLATES templates, - int16_t num_features, - const INT_FEATURE_STRUCT* features, - const uint8_t* norm_factors, - ADAPT_CLASS* classes, - int debug, - int matcher_multiplier, - const TBOX& blob_box, - const GenericVector& results, - ADAPT_RESULTS* final_results) { - int top = blob_box.top(); - int bottom = blob_box.bottom(); - UnicharRating int_result; - for (int c = 0; c < results.size(); c++) { - CLASS_ID class_id = results[c].Class; - BIT_VECTOR protos = classes != nullptr ? classes[class_id]->PermProtos - : AllProtosOn; - BIT_VECTOR configs = classes != nullptr ? classes[class_id]->PermConfigs - : AllConfigsOn; - - int_result.unichar_id = class_id; - im_.Match(ClassForClassId(templates, class_id), - protos, configs, - num_features, features, - &int_result, classify_adapt_feature_threshold, debug, - matcher_debug_separate_windows); - bool is_debug = matcher_debug_level >= 2 || classify_debug_level > 1; - ExpandShapesAndApplyCorrections(classes, is_debug, class_id, bottom, top, - results[c].Rating, - final_results->BlobLength, - matcher_multiplier, norm_factors, - &int_result, final_results); - } -} - -// Converts configs to fonts, and if the result is not adapted, and a -// shape_table_ is present, the shape is expanded to include all -// unichar_ids represented, before applying a set of corrections to the -// distance rating in int_result, (see ComputeCorrectedRating.) -// The results are added to the final_results output. -void Classify::ExpandShapesAndApplyCorrections( - ADAPT_CLASS* classes, bool debug, int class_id, int bottom, int top, - float cp_rating, int blob_length, int matcher_multiplier, - const uint8_t* cn_factors, - UnicharRating* int_result, ADAPT_RESULTS* final_results) { - if (classes != nullptr) { - // Adapted result. Convert configs to fontinfo_ids. - int_result->adapted = true; - for (int f = 0; f < int_result->fonts.size(); ++f) { - int_result->fonts[f].fontinfo_id = - GetFontinfoId(classes[class_id], int_result->fonts[f].fontinfo_id); - } - } else { - // Pre-trained result. Map fonts using font_sets_. - int_result->adapted = false; - for (int f = 0; f < int_result->fonts.size(); ++f) { - int_result->fonts[f].fontinfo_id = - ClassAndConfigIDToFontOrShapeID(class_id, - int_result->fonts[f].fontinfo_id); - } - if (shape_table_ != nullptr) { - // Two possible cases: - // 1. Flat shapetable. All unichar-ids of the shapes referenced by - // int_result->fonts are the same. In this case build a new vector of - // mapped fonts and replace the fonts in int_result. - // 2. Multi-unichar shapetable. Variable unichars in the shapes referenced - // by int_result. In this case, build a vector of UnicharRating to - // gather together different font-ids for each unichar. Also covers case1. - GenericVector mapped_results; - for (int f = 0; f < int_result->fonts.size(); ++f) { - int shape_id = int_result->fonts[f].fontinfo_id; - const Shape& shape = shape_table_->GetShape(shape_id); - for (int c = 0; c < shape.size(); ++c) { - int unichar_id = shape[c].unichar_id; - if (!unicharset.get_enabled(unichar_id)) continue; - // Find the mapped_result for unichar_id. - int r = 0; - for (r = 0; r < mapped_results.size() && - mapped_results[r].unichar_id != unichar_id; ++r) {} - if (r == mapped_results.size()) { - mapped_results.push_back(*int_result); - mapped_results[r].unichar_id = unichar_id; - mapped_results[r].fonts.truncate(0); - } - for (int i = 0; i < shape[c].font_ids.size(); ++i) { - mapped_results[r].fonts.push_back( - ScoredFont(shape[c].font_ids[i], int_result->fonts[f].score)); - } - } - } - for (int m = 0; m < mapped_results.size(); ++m) { - mapped_results[m].rating = - ComputeCorrectedRating(debug, mapped_results[m].unichar_id, - cp_rating, int_result->rating, - int_result->feature_misses, bottom, top, - blob_length, matcher_multiplier, cn_factors); - AddNewResult(mapped_results[m], final_results); - } - return; - } - } - if (unicharset.get_enabled(class_id)) { - int_result->rating = ComputeCorrectedRating(debug, class_id, cp_rating, - int_result->rating, - int_result->feature_misses, - bottom, top, blob_length, - matcher_multiplier, cn_factors); - AddNewResult(*int_result, final_results); - } -} - -// Applies a set of corrections to the confidence im_rating, -// including the cn_correction, miss penalty and additional penalty -// for non-alnums being vertical misfits. Returns the corrected confidence. -double Classify::ComputeCorrectedRating(bool debug, int unichar_id, - double cp_rating, double im_rating, - int feature_misses, - int bottom, int top, - int blob_length, int matcher_multiplier, - const uint8_t* cn_factors) { - // Compute class feature corrections. - double cn_corrected = im_.ApplyCNCorrection(1.0 - im_rating, blob_length, - cn_factors[unichar_id], - matcher_multiplier); - double miss_penalty = tessedit_class_miss_scale * feature_misses; - double vertical_penalty = 0.0; - // Penalize non-alnums for being vertical misfits. - if (!unicharset.get_isalpha(unichar_id) && - !unicharset.get_isdigit(unichar_id) && - cn_factors[unichar_id] != 0 && classify_misfit_junk_penalty > 0.0) { - int min_bottom, max_bottom, min_top, max_top; - unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, - &min_top, &max_top); - if (debug) { - tprintf("top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n", - top, min_top, max_top, bottom, min_bottom, max_bottom); - } - if (top < min_top || top > max_top || - bottom < min_bottom || bottom > max_bottom) { - vertical_penalty = classify_misfit_junk_penalty; - } - } - double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty); - if (result < WORST_POSSIBLE_RATING) - result = WORST_POSSIBLE_RATING; - if (debug) { - tprintf("%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n", - unicharset.id_to_unichar(unichar_id), - result * 100.0, - cp_rating * 100.0, - (1.0 - im_rating) * 100.0, - (cn_corrected - (1.0 - im_rating)) * 100.0, - cn_factors[unichar_id], - miss_penalty * 100.0, - vertical_penalty * 100.0); - } - return result; -} - -/*---------------------------------------------------------------------------*/ -/** - * This routine extracts baseline normalized features - * from the unknown character and matches them against the - * specified set of templates. The classes which match - * are added to Results. - * - * Globals: - * - BaselineCutoffs expected num features for each class - * - * @param Blob blob to be classified - * @param Templates current set of adapted templates - * @param Results place to put match results - * @param int_features - * @param fx_info - * - * @return Array of possible ambiguous chars that should be checked. - */ -UNICHAR_ID *Classify::BaselineClassifier( - TBLOB *Blob, const GenericVector& int_features, - const INT_FX_RESULT_STRUCT& fx_info, - ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) { - if (int_features.empty()) return nullptr; - uint8_t* CharNormArray = new uint8_t[unicharset.size()]; - ClearCharNormArray(CharNormArray); - - Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength); - PruneClasses(Templates->Templates, int_features.size(), -1, &int_features[0], - CharNormArray, BaselineCutoffs, &Results->CPResults); - - if (matcher_debug_level >= 2 || classify_debug_level > 1) - tprintf("BL Matches = "); - - MasterMatcher(Templates->Templates, int_features.size(), &int_features[0], - CharNormArray, - Templates->Class, matcher_debug_flags, 0, - Blob->bounding_box(), Results->CPResults, Results); - - delete [] CharNormArray; - CLASS_ID ClassId = Results->best_unichar_id; - if (ClassId == INVALID_UNICHAR_ID || Results->best_match_index < 0) - return nullptr; - - return Templates->Class[ClassId]-> - Config[Results->match[Results->best_match_index].config].Perm->Ambigs; -} /* BaselineClassifier */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine extracts character normalized features - * from the unknown character and matches them against the - * specified set of templates. The classes which match - * are added to Results. - * - * @param blob blob to be classified - * @param sample templates to classify unknown against - * @param adapt_results place to put match results - * - * Globals: - * - CharNormCutoffs expected num features for each class - * - AllProtosOn mask that enables all protos - * - AllConfigsOn mask that enables all configs - */ -int Classify::CharNormClassifier(TBLOB *blob, - const TrainingSample& sample, - ADAPT_RESULTS *adapt_results) { - // This is the length that is used for scaling ratings vs certainty. - adapt_results->BlobLength = - IntCastRounded(sample.outline_length() / kStandardFeatureLength); - GenericVector unichar_results; - static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0, - -1, &unichar_results); - // Convert results to the format used internally by AdaptiveClassifier. - for (int r = 0; r < unichar_results.size(); ++r) { - AddNewResult(unichar_results[r], adapt_results); - } - return sample.num_features(); -} /* CharNormClassifier */ - -// As CharNormClassifier, but operates on a TrainingSample and outputs to -// a GenericVector of ShapeRating without conversion to classes. -int Classify::CharNormTrainingSample(bool pruner_only, - int keep_this, - const TrainingSample& sample, - GenericVector* results) { - results->clear(); - ADAPT_RESULTS* adapt_results = new ADAPT_RESULTS(); - adapt_results->Initialize(); - // Compute the bounding box of the features. - uint32_t num_features = sample.num_features(); - // Only the top and bottom of the blob_box are used by MasterMatcher, so - // fabricate right and left using top and bottom. - TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom), - sample.geo_feature(GeoTop), sample.geo_feature(GeoTop)); - // Compute the char_norm_array from the saved cn_feature. - FEATURE norm_feature = sample.GetCNFeature(); - uint8_t* char_norm_array = new uint8_t[unicharset.size()]; - int num_pruner_classes = std::max(unicharset.size(), - PreTrainedTemplates->NumClasses); - uint8_t* pruner_norm_array = new uint8_t[num_pruner_classes]; - adapt_results->BlobLength = - static_cast(ActualOutlineLength(norm_feature) * 20 + 0.5); - ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array, - pruner_norm_array); - - PruneClasses(PreTrainedTemplates, num_features, keep_this, sample.features(), - pruner_norm_array, - shape_table_ != nullptr ? &shapetable_cutoffs_[0] : CharNormCutoffs, - &adapt_results->CPResults); - delete [] pruner_norm_array; - if (keep_this >= 0) { - adapt_results->CPResults[0].Class = keep_this; - adapt_results->CPResults.truncate(1); - } - if (pruner_only) { - // Convert pruner results to output format. - for (int i = 0; i < adapt_results->CPResults.size(); ++i) { - int class_id = adapt_results->CPResults[i].Class; - results->push_back( - UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating)); - } - } else { - MasterMatcher(PreTrainedTemplates, num_features, sample.features(), - char_norm_array, - nullptr, matcher_debug_flags, - classify_integer_matcher_multiplier, - blob_box, adapt_results->CPResults, adapt_results); - // Convert master matcher results to output format. - for (int i = 0; i < adapt_results->match.size(); i++) { - results->push_back(adapt_results->match[i]); - } - results->sort(&UnicharRating::SortDescendingRating); - } - delete [] char_norm_array; - delete adapt_results; - return num_features; -} /* CharNormTrainingSample */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine computes a rating which reflects the - * likelihood that the blob being classified is a noise - * blob. NOTE: assumes that the blob length has already been - * computed and placed into Results. - * - * @param results results to add noise classification to - * - * Globals: - * - matcher_avg_noise_size avg. length of a noise blob - */ -void Classify::ClassifyAsNoise(ADAPT_RESULTS *results) { - float rating = results->BlobLength / matcher_avg_noise_size; - rating *= rating; - rating /= 1.0 + rating; - - AddNewResult(UnicharRating(UNICHAR_SPACE, 1.0f - rating), results); -} /* ClassifyAsNoise */ - -/// The function converts the given match ratings to the list of blob -/// choices with ratings and certainties (used by the context checkers). -/// If character fragments are present in the results, this function also makes -/// sure that there is at least one non-fragmented classification included. -/// For each classification result check the unicharset for "definite" -/// ambiguities and modify the resulting Choices accordingly. -void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, - ADAPT_RESULTS *Results, - BLOB_CHOICE_LIST *Choices) { - assert(Choices != nullptr); - float Rating; - float Certainty; - BLOB_CHOICE_IT temp_it; - bool contains_nonfrag = false; - temp_it.set_to_list(Choices); - int choices_length = 0; - // With no shape_table_ maintain the previous MAX_MATCHES as the maximum - // number of returned results, but with a shape_table_ we want to have room - // for at least the biggest shape (which might contain hundreds of Indic - // grapheme fragments) and more, so use double the size of the biggest shape - // if that is more than the default. - int max_matches = MAX_MATCHES; - if (shape_table_ != nullptr) { - max_matches = shape_table_->MaxNumUnichars() * 2; - if (max_matches < MAX_MATCHES) - max_matches = MAX_MATCHES; - } - - float best_certainty = -FLT_MAX; - for (int i = 0; i < Results->match.size(); i++) { - const UnicharRating& result = Results->match[i]; - bool adapted = result.adapted; - bool current_is_frag = (unicharset.get_fragment(result.unichar_id) != nullptr); - if (temp_it.length()+1 == max_matches && - !contains_nonfrag && current_is_frag) { - continue; // look for a non-fragmented character to fill the - // last spot in Choices if only fragments are present - } - // BlobLength can never be legally 0, this means recognition failed. - // But we must return a classification result because some invoking - // functions (chopper/permuter) do not anticipate a null blob choice. - // So we need to assign a poor, but not infinitely bad score. - if (Results->BlobLength == 0) { - Certainty = -20; - Rating = 100; // should be -certainty * real_blob_length - } else { - Rating = Certainty = (1.0f - result.rating); - Rating *= rating_scale * Results->BlobLength; - Certainty *= -(getDict().certainty_scale); - } - // Adapted results, by their very nature, should have good certainty. - // Those that don't are at best misleading, and often lead to errors, - // so don't accept adapted results that are too far behind the best result, - // whether adapted or static. - // TODO(rays) find some way of automatically tuning these constants. - if (Certainty > best_certainty) { - best_certainty = std::min(Certainty, static_cast(classify_adapted_pruning_threshold)); - } else if (adapted && - Certainty / classify_adapted_pruning_factor < best_certainty) { - continue; // Don't accept bad adapted results. - } - - float min_xheight, max_xheight, yshift; - denorm.XHeightRange(result.unichar_id, unicharset, box, - &min_xheight, &max_xheight, &yshift); - BLOB_CHOICE* choice = - new BLOB_CHOICE(result.unichar_id, Rating, Certainty, - unicharset.get_script(result.unichar_id), - min_xheight, max_xheight, yshift, - adapted ? BCC_ADAPTED_CLASSIFIER - : BCC_STATIC_CLASSIFIER); - choice->set_fonts(result.fonts); - temp_it.add_to_end(choice); - contains_nonfrag |= !current_is_frag; // update contains_nonfrag - choices_length++; - if (choices_length >= max_matches) break; - } - Results->match.truncate(choices_length); -} // ConvertMatchesToChoices - - -/*---------------------------------------------------------------------------*/ -#ifndef GRAPHICS_DISABLED -/** - * - * @param blob blob whose classification is being debugged - * @param Results results of match being debugged - * - * Globals: none - */ -void Classify::DebugAdaptiveClassifier(TBLOB *blob, - ADAPT_RESULTS *Results) { - if (static_classifier_ == nullptr) return; - INT_FX_RESULT_STRUCT fx_info; - GenericVector bl_features; - TrainingSample* sample = - BlobToTrainingSample(*blob, false, &fx_info, &bl_features); - if (sample == nullptr) return; - static_classifier_->DebugDisplay(*sample, blob->denorm().pix(), - Results->best_unichar_id); -} /* DebugAdaptiveClassifier */ -#endif - -/*---------------------------------------------------------------------------*/ -/** - * This routine performs an adaptive classification. - * If we have not yet adapted to enough classes, a simple - * classification to the pre-trained templates is performed. - * Otherwise, we match the blob against the adapted templates. - * If the adapted templates do not match well, we try a - * match against the pre-trained templates. If an adapted - * template match is found, we do a match to any pre-trained - * templates which could be ambiguous. The results from all - * of these classifications are merged together into Results. - * - * @param Blob blob to be classified - * @param Results place to put match results - * - * Globals: - * - PreTrainedTemplates built-in training templates - * - AdaptedTemplates templates adapted for this page - * - matcher_reliable_adaptive_result rating limit for a great match - */ -void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) { - UNICHAR_ID *Ambiguities; - - INT_FX_RESULT_STRUCT fx_info; - GenericVector bl_features; - TrainingSample* sample = - BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info, - &bl_features); - if (sample == nullptr) return; - - // TODO: With LSTM, static_classifier_ is nullptr. - // Return to avoid crash in CharNormClassifier. - if (static_classifier_ == nullptr) { - delete sample; - return; - } - - if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min || - tess_cn_matching) { - CharNormClassifier(Blob, *sample, Results); - } else { - Ambiguities = BaselineClassifier(Blob, bl_features, fx_info, - AdaptedTemplates, Results); - if ((!Results->match.empty() && - MarginalMatch(Results->best_rating, - matcher_reliable_adaptive_result) && - !tess_bn_matching) || - Results->match.empty()) { - CharNormClassifier(Blob, *sample, Results); - } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) { - AmbigClassifier(bl_features, fx_info, Blob, - PreTrainedTemplates, - AdaptedTemplates->Class, - Ambiguities, - Results); - } - } - - // Force the blob to be classified as noise - // if the results contain only fragments. - // TODO(daria): verify that this is better than - // just adding a nullptr classification. - if (!Results->HasNonfragment || Results->match.empty()) - ClassifyAsNoise(Results); - delete sample; -} /* DoAdaptiveMatch */ - -/*---------------------------------------------------------------------------*/ -/** - * This routine matches blob to the built-in templates - * to find out if there are any classes other than the correct - * class which are potential ambiguities. - * - * @param Blob blob to get classification ambiguities for - * @param CorrectClass correct class for Blob - * - * Globals: - * - CurrentRatings used by qsort compare routine - * - PreTrainedTemplates built-in templates - * - * @return String containing all possible ambiguous classes. - */ -UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, - CLASS_ID CorrectClass) { - ADAPT_RESULTS *Results = new ADAPT_RESULTS(); - UNICHAR_ID *Ambiguities; - int i; - - Results->Initialize(); - INT_FX_RESULT_STRUCT fx_info; - GenericVector bl_features; - TrainingSample* sample = - BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info, - &bl_features); - if (sample == nullptr) { - delete Results; - return nullptr; - } - - CharNormClassifier(Blob, *sample, Results); - delete sample; - RemoveBadMatches(Results); - Results->match.sort(&UnicharRating::SortDescendingRating); - - /* copy the class id's into an string of ambiguities - don't copy if - the correct class is the only class id matched */ - Ambiguities = new UNICHAR_ID[Results->match.size() + 1]; - if (Results->match.size() > 1 || - (Results->match.size() == 1 && - Results->match[0].unichar_id != CorrectClass)) { - for (i = 0; i < Results->match.size(); i++) - Ambiguities[i] = Results->match[i].unichar_id; - Ambiguities[i] = -1; - } else { - Ambiguities[0] = -1; - } - - delete Results; - return Ambiguities; -} /* GetAmbiguities */ - -// Returns true if the given blob looks too dissimilar to any character -// present in the classifier templates. -bool Classify::LooksLikeGarbage(TBLOB *blob) { - BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST(); - AdaptiveClassifier(blob, ratings); - BLOB_CHOICE_IT ratings_it(ratings); - const UNICHARSET &unicharset = getDict().getUnicharset(); - if (classify_debug_character_fragments) { - print_ratings_list("======================\nLooksLikeGarbage() got ", - ratings, unicharset); - } - for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list(); - ratings_it.forward()) { - if (unicharset.get_fragment(ratings_it.data()->unichar_id()) != nullptr) { - continue; - } - float certainty = ratings_it.data()->certainty(); - delete ratings; - return certainty < - classify_character_fragments_garbage_certainty_threshold; - } - delete ratings; - return true; // no whole characters in ratings -} - -/*---------------------------------------------------------------------------*/ -/** - * This routine calls the integer (Hardware) feature - * extractor if it has not been called before for this blob. - * - * The results from the feature extractor are placed into - * globals so that they can be used in other routines without - * re-extracting the features. - * - * It then copies the char norm features into the IntFeatures - * array provided by the caller. - * - * @param templates used to compute char norm adjustments - * @param pruner_norm_array Array of factors from blob normalization - * process - * @param char_norm_array array to fill with dummy char norm adjustments - * @param fx_info - * - * Globals: - * - * @return Number of features extracted or 0 if an error occurred. - */ -int Classify::GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info, - INT_TEMPLATES templates, - uint8_t* pruner_norm_array, - uint8_t* char_norm_array) { - FEATURE norm_feature = NewFeature(&CharNormDesc); - float baseline = kBlnBaselineOffset; - float scale = MF_SCALE_FACTOR; - norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale; - norm_feature->Params[CharNormLength] = - fx_info.Length * scale / LENGTH_COMPRESSION; - norm_feature->Params[CharNormRx] = fx_info.Rx * scale; - norm_feature->Params[CharNormRy] = fx_info.Ry * scale; - // Deletes norm_feature. - ComputeCharNormArrays(norm_feature, templates, char_norm_array, - pruner_norm_array); - return IntCastRounded(fx_info.Length / kStandardFeatureLength); -} /* GetCharNormFeature */ - -// Computes the char_norm_array for the unicharset and, if not nullptr, the -// pruner_array as appropriate according to the existence of the shape_table. -void Classify::ComputeCharNormArrays(FEATURE_STRUCT* norm_feature, - INT_TEMPLATES_STRUCT* templates, - uint8_t* char_norm_array, - uint8_t* pruner_array) { - ComputeIntCharNormArray(*norm_feature, char_norm_array); - if (pruner_array != nullptr) { - if (shape_table_ == nullptr) { - ComputeIntCharNormArray(*norm_feature, pruner_array); - } else { - memset(pruner_array, UINT8_MAX, - templates->NumClasses * sizeof(pruner_array[0])); - // Each entry in the pruner norm array is the MIN of all the entries of - // the corresponding unichars in the CharNormArray. - for (int id = 0; id < templates->NumClasses; ++id) { - int font_set_id = templates->Class[id]->font_set_id; - const FontSet &fs = fontset_table_.get(font_set_id); - for (int config = 0; config < fs.size; ++config) { - const Shape& shape = shape_table_->GetShape(fs.configs[config]); - for (int c = 0; c < shape.size(); ++c) { - if (char_norm_array[shape[c].unichar_id] < pruner_array[id]) - pruner_array[id] = char_norm_array[shape[c].unichar_id]; - } - } - } - } - } - FreeFeature(norm_feature); -} - -/*---------------------------------------------------------------------------*/ -/** - * - * @param Templates adapted templates to add new config to - * @param ClassId class id to associate with new config - * @param FontinfoId font information inferred from pre-trained templates - * @param NumFeatures number of features in IntFeatures - * @param Features features describing model for new config - * @param FloatFeatures floating-pt representation of features - * - * @return The id of the new config created, a negative integer in - * case of error. - */ -int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, - CLASS_ID ClassId, - int FontinfoId, - int NumFeatures, - INT_FEATURE_ARRAY Features, - FEATURE_SET FloatFeatures) { - INT_CLASS IClass; - ADAPT_CLASS Class; - PROTO_ID OldProtos[MAX_NUM_PROTOS]; - FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES]; - int NumOldProtos; - int NumBadFeatures; - int MaxProtoId, OldMaxProtoId; - int BlobLength = 0; - int MaskSize; - int ConfigId; - TEMP_CONFIG Config; - int i; - int debug_level = NO_DEBUG; - - if (classify_learning_debug_level >= 3) - debug_level = - PRINT_MATCH_SUMMARY | PRINT_FEATURE_MATCHES | PRINT_PROTO_MATCHES; - - IClass = ClassForClassId(Templates->Templates, ClassId); - Class = Templates->Class[ClassId]; - - if (IClass->NumConfigs >= MAX_NUM_CONFIGS) { - ++NumAdaptationsFailed; - if (classify_learning_debug_level >= 1) - cprintf("Cannot make new temporary config: maximum number exceeded.\n"); - return -1; - } - - OldMaxProtoId = IClass->NumProtos - 1; - - NumOldProtos = im_.FindGoodProtos(IClass, AllProtosOn, AllConfigsOff, - BlobLength, NumFeatures, Features, - OldProtos, classify_adapt_proto_threshold, - debug_level); - - MaskSize = WordsInVectorOfSize(MAX_NUM_PROTOS); - zero_all_bits(TempProtoMask, MaskSize); - for (i = 0; i < NumOldProtos; i++) - SET_BIT(TempProtoMask, OldProtos[i]); - - NumBadFeatures = im_.FindBadFeatures(IClass, TempProtoMask, AllConfigsOn, - BlobLength, NumFeatures, Features, - BadFeatures, - classify_adapt_feature_threshold, - debug_level); - - MaxProtoId = MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures, - IClass, Class, TempProtoMask); - if (MaxProtoId == NO_PROTO) { - ++NumAdaptationsFailed; - if (classify_learning_debug_level >= 1) - cprintf("Cannot make new temp protos: maximum number exceeded.\n"); - return -1; - } - - ConfigId = AddIntConfig(IClass); - ConvertConfig(TempProtoMask, ConfigId, IClass); - Config = NewTempConfig(MaxProtoId, FontinfoId); - TempConfigFor(Class, ConfigId) = Config; - copy_all_bits(TempProtoMask, Config->Protos, Config->ProtoVectorSize); - - if (classify_learning_debug_level >= 1) - cprintf("Making new temp config %d fontinfo id %d" - " using %d old and %d new protos.\n", - ConfigId, Config->FontinfoId, - NumOldProtos, MaxProtoId - OldMaxProtoId); - - return ConfigId; -} /* MakeNewTemporaryConfig */ - -/*---------------------------------------------------------------------------*/ -/** - * This routine finds sets of sequential bad features - * that all have the same angle and converts each set into - * a new temporary proto. The temp proto is added to the - * proto pruner for IClass, pushed onto the list of temp - * protos in Class, and added to TempProtoMask. - * - * @param Features floating-pt features describing new character - * @param NumBadFeat number of bad features to turn into protos - * @param BadFeat feature id's of bad features - * @param IClass integer class templates to add new protos to - * @param Class adapted class templates to add new protos to - * @param TempProtoMask proto mask to add new protos to - * - * Globals: none - * - * @return Max proto id in class after all protos have been added. - */ -PROTO_ID Classify::MakeNewTempProtos(FEATURE_SET Features, - int NumBadFeat, - FEATURE_ID BadFeat[], - INT_CLASS IClass, - ADAPT_CLASS Class, - BIT_VECTOR TempProtoMask) { - FEATURE_ID *ProtoStart; - FEATURE_ID *ProtoEnd; - FEATURE_ID *LastBad; - TEMP_PROTO TempProto; - PROTO Proto; - FEATURE F1, F2; - float X1, X2, Y1, Y2; - float A1, A2, AngleDelta; - float SegmentLength; - PROTO_ID Pid; - - for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat; - ProtoStart < LastBad; ProtoStart = ProtoEnd) { - F1 = Features->Features[*ProtoStart]; - X1 = F1->Params[PicoFeatX]; - Y1 = F1->Params[PicoFeatY]; - A1 = F1->Params[PicoFeatDir]; - - for (ProtoEnd = ProtoStart + 1, - SegmentLength = GetPicoFeatureLength(); - ProtoEnd < LastBad; - ProtoEnd++, SegmentLength += GetPicoFeatureLength()) { - F2 = Features->Features[*ProtoEnd]; - X2 = F2->Params[PicoFeatX]; - Y2 = F2->Params[PicoFeatY]; - A2 = F2->Params[PicoFeatDir]; - - AngleDelta = fabs(A1 - A2); - if (AngleDelta > 0.5) - AngleDelta = 1.0 - AngleDelta; - - if (AngleDelta > matcher_clustering_max_angle_delta || - fabs(X1 - X2) > SegmentLength || - fabs(Y1 - Y2) > SegmentLength) - break; - } - - F2 = Features->Features[*(ProtoEnd - 1)]; - X2 = F2->Params[PicoFeatX]; - Y2 = F2->Params[PicoFeatY]; - A2 = F2->Params[PicoFeatDir]; - - Pid = AddIntProto(IClass); - if (Pid == NO_PROTO) - return (NO_PROTO); - - TempProto = NewTempProto(); - Proto = &(TempProto->Proto); - - /* compute proto params - NOTE that Y_DIM_OFFSET must be used because - ConvertProto assumes that the Y dimension varies from -0.5 to 0.5 - instead of the -0.25 to 0.75 used in baseline normalization */ - Proto->Length = SegmentLength; - Proto->Angle = A1; - Proto->X = (X1 + X2) / 2.0; - Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET; - FillABC(Proto); - - TempProto->ProtoId = Pid; - SET_BIT(TempProtoMask, Pid); - - ConvertProto(Proto, Pid, IClass); - AddProtoToProtoPruner(Proto, Pid, IClass, - classify_learning_debug_level >= 2); - - Class->TempProtos = push(Class->TempProtos, TempProto); - } - return IClass->NumProtos - 1; -} /* MakeNewTempProtos */ - -/*---------------------------------------------------------------------------*/ -/** - * - * @param Templates current set of adaptive templates - * @param ClassId class containing config to be made permanent - * @param ConfigId config to be made permanent - * @param Blob current blob being adapted to - * - * Globals: none - */ -void Classify::MakePermanent(ADAPT_TEMPLATES Templates, - CLASS_ID ClassId, - int ConfigId, - TBLOB *Blob) { - UNICHAR_ID *Ambigs; - TEMP_CONFIG Config; - ADAPT_CLASS Class; - PROTO_KEY ProtoKey; - - Class = Templates->Class[ClassId]; - Config = TempConfigFor(Class, ConfigId); - - MakeConfigPermanent(Class, ConfigId); - if (Class->NumPermConfigs == 0) - Templates->NumPermClasses++; - Class->NumPermConfigs++; - - // Initialize permanent config. - Ambigs = GetAmbiguities(Blob, ClassId); - PERM_CONFIG Perm = (PERM_CONFIG)malloc(sizeof(PERM_CONFIG_STRUCT)); - Perm->Ambigs = Ambigs; - Perm->FontinfoId = Config->FontinfoId; - - // Free memory associated with temporary config (since ADAPTED_CONFIG - // is a union we need to clean up before we record permanent config). - ProtoKey.Templates = Templates; - ProtoKey.ClassId = ClassId; - ProtoKey.ConfigId = ConfigId; - Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, MakeTempProtoPerm); - FreeTempConfig(Config); - - // Record permanent config. - PermConfigFor(Class, ConfigId) = Perm; - - if (classify_learning_debug_level >= 1) { - tprintf("Making config %d for %s (ClassId %d) permanent:" - " fontinfo id %d, ambiguities '", - ConfigId, getDict().getUnicharset().debug_str(ClassId).string(), - ClassId, PermConfigFor(Class, ConfigId)->FontinfoId); - for (UNICHAR_ID *AmbigsPointer = Ambigs; - *AmbigsPointer >= 0; ++AmbigsPointer) - tprintf("%s", unicharset.id_to_unichar(*AmbigsPointer)); - tprintf("'.\n"); - } -} /* MakePermanent */ -} // namespace tesseract - -/*---------------------------------------------------------------------------*/ -/** - * This routine converts TempProto to be permanent if - * its proto id is used by the configuration specified in - * ProtoKey. - * - * @param item1 (TEMP_PROTO) temporary proto to compare to key - * @param item2 (PROTO_KEY) defines which protos to make permanent - * - * Globals: none - * - * @return TRUE if TempProto is converted, FALSE otherwise - */ -int MakeTempProtoPerm(void *item1, void *item2) { - ADAPT_CLASS Class; - TEMP_CONFIG Config; - TEMP_PROTO TempProto; - PROTO_KEY *ProtoKey; - - TempProto = (TEMP_PROTO) item1; - ProtoKey = (PROTO_KEY *) item2; - - Class = ProtoKey->Templates->Class[ProtoKey->ClassId]; - Config = TempConfigFor(Class, ProtoKey->ConfigId); - - if (TempProto->ProtoId > Config->MaxProtoId || - !test_bit (Config->Protos, TempProto->ProtoId)) - return FALSE; - - MakeProtoPermanent(Class, TempProto->ProtoId); - AddProtoToClassPruner(&(TempProto->Proto), ProtoKey->ClassId, - ProtoKey->Templates->Templates); - FreeTempProto(TempProto); - - return TRUE; -} /* MakeTempProtoPerm */ - -/*---------------------------------------------------------------------------*/ -namespace tesseract { -/** - * This routine writes the matches in Results to File. - * - * @param results match results to write to File - * - * Globals: none - */ -void Classify::PrintAdaptiveMatchResults(const ADAPT_RESULTS& results) { - for (int i = 0; i < results.match.size(); ++i) { - tprintf("%s ", unicharset.debug_str(results.match[i].unichar_id).string()); - results.match[i].Print(); - } -} /* PrintAdaptiveMatchResults */ - -/*---------------------------------------------------------------------------*/ -/** - * This routine steps through each matching class in Results - * and removes it from the match list if its rating - * is worse than the BestRating plus a pad. In other words, - * all good matches get moved to the front of the classes - * array. - * - * @param Results contains matches to be filtered - * - * Globals: - * - matcher_bad_match_pad defines a "bad match" - */ -void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) { - int Next, NextGood; - float BadMatchThreshold; - static const char* romans = "i v x I V X"; - BadMatchThreshold = Results->best_rating - matcher_bad_match_pad; - - if (classify_bln_numeric_mode) { - UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ? - unicharset.unichar_to_id("1") : -1; - UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ? - unicharset.unichar_to_id("0") : -1; - float scored_one = ScoredUnichar(unichar_id_one, *Results); - float scored_zero = ScoredUnichar(unichar_id_zero, *Results); - - for (Next = NextGood = 0; Next < Results->match.size(); Next++) { - const UnicharRating& match = Results->match[Next]; - if (match.rating >= BadMatchThreshold) { - if (!unicharset.get_isalpha(match.unichar_id) || - strstr(romans, - unicharset.id_to_unichar(match.unichar_id)) != nullptr) { - } else if (unicharset.eq(match.unichar_id, "l") && - scored_one < BadMatchThreshold) { - Results->match[Next].unichar_id = unichar_id_one; - } else if (unicharset.eq(match.unichar_id, "O") && - scored_zero < BadMatchThreshold) { - Results->match[Next].unichar_id = unichar_id_zero; - } else { - Results->match[Next].unichar_id = INVALID_UNICHAR_ID; // Don't copy. - } - if (Results->match[Next].unichar_id != INVALID_UNICHAR_ID) { - if (NextGood == Next) { - ++NextGood; - } else { - Results->match[NextGood++] = Results->match[Next]; - } - } - } - } - } else { - for (Next = NextGood = 0; Next < Results->match.size(); Next++) { - if (Results->match[Next].rating >= BadMatchThreshold) { - if (NextGood == Next) { - ++NextGood; - } else { - Results->match[NextGood++] = Results->match[Next]; - } - } - } - } - Results->match.truncate(NextGood); -} /* RemoveBadMatches */ - -/*----------------------------------------------------------------------------*/ -/** - * This routine discards extra digits or punctuation from the results. - * We keep only the top 2 punctuation answers and the top 1 digit answer if - * present. - * - * @param Results contains matches to be filtered - */ -void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) { - int Next, NextGood; - int punc_count; /*no of garbage characters */ - int digit_count; - /*garbage characters */ - static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^"; - static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9"; - - punc_count = 0; - digit_count = 0; - for (Next = NextGood = 0; Next < Results->match.size(); Next++) { - const UnicharRating& match = Results->match[Next]; - bool keep = true; - if (strstr(punc_chars, - unicharset.id_to_unichar(match.unichar_id)) != nullptr) { - if (punc_count >= 2) - keep = false; - punc_count++; - } else { - if (strstr(digit_chars, - unicharset.id_to_unichar(match.unichar_id)) != nullptr) { - if (digit_count >= 1) - keep = false; - digit_count++; - } - } - if (keep) { - if (NextGood == Next) { - ++NextGood; - } else { - Results->match[NextGood++] = match; - } - } - } - Results->match.truncate(NextGood); -} /* RemoveExtraPuncs */ - -/*---------------------------------------------------------------------------*/ -/** - * This routine resets the internal thresholds inside - * the integer matcher to correspond to the specified - * threshold. - * - * @param Threshold threshold for creating new templates - * - * Globals: - * - matcher_good_threshold default good match rating - */ -void Classify::SetAdaptiveThreshold(float Threshold) { - Threshold = (Threshold == matcher_good_threshold) ? 0.9: (1.0 - Threshold); - classify_adapt_proto_threshold.set_value( - ClipToRange(255 * Threshold, 0, 255)); - classify_adapt_feature_threshold.set_value( - ClipToRange(255 * Threshold, 0, 255)); -} /* SetAdaptiveThreshold */ - -/*---------------------------------------------------------------------------*/ -/** - * This routine displays debug information for the best config - * of the given shape_id for the given set of features. - * - * @param shape_id classifier id to work with - * @param features features of the unknown character - * @param num_features Number of features in the features array. - */ - -void Classify::ShowBestMatchFor(int shape_id, - const INT_FEATURE_STRUCT* features, - int num_features) { -#ifndef GRAPHICS_DISABLED - uint32_t config_mask; - if (UnusedClassIdIn(PreTrainedTemplates, shape_id)) { - tprintf("No built-in templates for class/shape %d\n", shape_id); - return; - } - if (num_features <= 0) { - tprintf("Illegal blob (char norm features)!\n"); - return; - } - UnicharRating cn_result; - classify_norm_method.set_value(character); - im_.Match(ClassForClassId(PreTrainedTemplates, shape_id), - AllProtosOn, AllConfigsOn, - num_features, features, &cn_result, - classify_adapt_feature_threshold, NO_DEBUG, - matcher_debug_separate_windows); - tprintf("\n"); - config_mask = 1 << cn_result.config; - - tprintf("Static Shape ID: %d\n", shape_id); - ShowMatchDisplay(); - im_.Match(ClassForClassId(PreTrainedTemplates, shape_id), AllProtosOn, - &config_mask, num_features, features, &cn_result, - classify_adapt_feature_threshold, matcher_debug_flags, - matcher_debug_separate_windows); - UpdateMatchDisplay(); -#endif // GRAPHICS_DISABLED -} /* ShowBestMatchFor */ - -// Returns a string for the classifier class_id: either the corresponding -// unicharset debug_str or the shape_table_ debug str. -STRING Classify::ClassIDToDebugStr(const INT_TEMPLATES_STRUCT* templates, - int class_id, int config_id) const { - STRING class_string; - if (templates == PreTrainedTemplates && shape_table_ != nullptr) { - int shape_id = ClassAndConfigIDToFontOrShapeID(class_id, config_id); - class_string = shape_table_->DebugStr(shape_id); - } else { - class_string = unicharset.debug_str(class_id); - } - return class_string; -} - -// Converts a classifier class_id index to a shape_table_ index -int Classify::ClassAndConfigIDToFontOrShapeID(int class_id, - int int_result_config) const { - int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id; - // Older inttemps have no font_ids. - if (font_set_id < 0) - return kBlankFontinfoId; - const FontSet &fs = fontset_table_.get(font_set_id); - ASSERT_HOST(int_result_config >= 0 && int_result_config < fs.size); - return fs.configs[int_result_config]; -} - -// Converts a shape_table_ index to a classifier class_id index (not a -// unichar-id!). Uses a search, so not fast. -int Classify::ShapeIDToClassID(int shape_id) const { - for (int id = 0; id < PreTrainedTemplates->NumClasses; ++id) { - int font_set_id = PreTrainedTemplates->Class[id]->font_set_id; - ASSERT_HOST(font_set_id >= 0); - const FontSet &fs = fontset_table_.get(font_set_id); - for (int config = 0; config < fs.size; ++config) { - if (fs.configs[config] == shape_id) - return id; - } - } - tprintf("Shape %d not found\n", shape_id); - return -1; -} - -// Returns true if the given TEMP_CONFIG is good enough to make it -// a permanent config. -bool Classify::TempConfigReliable(CLASS_ID class_id, - const TEMP_CONFIG &config) { - if (classify_learning_debug_level >= 1) { - tprintf("NumTimesSeen for config of %s is %d\n", - getDict().getUnicharset().debug_str(class_id).string(), - config->NumTimesSeen); - } - if (config->NumTimesSeen >= matcher_sufficient_examples_for_prototyping) { - return true; - } else if (config->NumTimesSeen < matcher_min_examples_for_prototyping) { - return false; - } else if (use_ambigs_for_adaption) { - // Go through the ambigs vector and see whether we have already seen - // enough times all the characters represented by the ambigs vector. - const UnicharIdVector *ambigs = - getDict().getUnicharAmbigs().AmbigsForAdaption(class_id); - int ambigs_size = (ambigs == nullptr) ? 0 : ambigs->size(); - for (int ambig = 0; ambig < ambigs_size; ++ambig) { - ADAPT_CLASS ambig_class = AdaptedTemplates->Class[(*ambigs)[ambig]]; - assert(ambig_class != nullptr); - if (ambig_class->NumPermConfigs == 0 && - ambig_class->MaxNumTimesSeen < - matcher_min_examples_for_prototyping) { - if (classify_learning_debug_level >= 1) { - tprintf("Ambig %s has not been seen enough times," - " not making config for %s permanent\n", - getDict().getUnicharset().debug_str( - (*ambigs)[ambig]).string(), - getDict().getUnicharset().debug_str(class_id).string()); - } - return false; - } - } - } - return true; -} - -void Classify::UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob) { - const UnicharIdVector *ambigs = - getDict().getUnicharAmbigs().ReverseAmbigsForAdaption(class_id); - int ambigs_size = (ambigs == nullptr) ? 0 : ambigs->size(); - if (classify_learning_debug_level >= 1) { - tprintf("Running UpdateAmbigsGroup for %s class_id=%d\n", - getDict().getUnicharset().debug_str(class_id).string(), class_id); - } - for (int ambig = 0; ambig < ambigs_size; ++ambig) { - CLASS_ID ambig_class_id = (*ambigs)[ambig]; - const ADAPT_CLASS ambigs_class = AdaptedTemplates->Class[ambig_class_id]; - for (int cfg = 0; cfg < MAX_NUM_CONFIGS; ++cfg) { - if (ConfigIsPermanent(ambigs_class, cfg)) continue; - const TEMP_CONFIG config = - TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg); - if (config != nullptr && TempConfigReliable(ambig_class_id, config)) { - if (classify_learning_debug_level >= 1) { - tprintf("Making config %d of %s permanent\n", cfg, - getDict().getUnicharset().debug_str( - ambig_class_id).string()); - } - MakePermanent(AdaptedTemplates, ambig_class_id, cfg, Blob); - } - } - } -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/blobclass.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/blobclass.cpp deleted file mode 100644 index 0ec953f3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/blobclass.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/****************************************************************************** - ** Filename: blobclass.c - ** Purpose: High level blob classification and training routines. - ** Author: Dan Johnson - ** History: 7/21/89, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "blobclass.h" - -#include - -#include "classify.h" -#ifndef DISABLED_LEGACY_ENGINE -#include "featdefs.h" -#include "mf.h" -#include "normfeat.h" -#endif // ndef DISABLED_LEGACY_ENGINE - -static const char kUnknownFontName[] = "UnknownFont"; - -STRING_VAR(classify_font_name, kUnknownFontName, - "Default font name to be used in training"); - -namespace tesseract { -/**---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------**/ -// Finds the name of the training font and returns it in fontname, by cutting -// it out based on the expectation that the filename is of the form: -// /path/to/dir/[lang].[fontname].exp[num] -// The [lang], [fontname] and [num] fields should not have '.' characters. -// If the global parameter classify_font_name is set, its value is used instead. -void ExtractFontName(const STRING& filename, STRING* fontname) { - *fontname = classify_font_name; - if (*fontname == kUnknownFontName) { - // filename is expected to be of the form [lang].[fontname].exp[num] - // The [lang], [fontname] and [num] fields should not have '.' characters. - const char *basename = strrchr(filename.string(), '/'); - const char *firstdot = strchr(basename ? basename : filename.string(), '.'); - const char *lastdot = strrchr(filename.string(), '.'); - if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) { - ++firstdot; - *fontname = firstdot; - fontname->truncate_at(lastdot - firstdot); - } - } -} - - -/*---------------------------------------------------------------------------*/ - -#ifndef DISABLED_LEGACY_ENGINE - -// Extracts features from the given blob and saves them in the tr_file_data_ -// member variable. -// fontname: Name of font that this blob was printed in. -// cn_denorm: Character normalization transformation to apply to the blob. -// fx_info: Character normalization parameters computed with cn_denorm. -// blob_text: Ground truth text for the blob. -void Classify::LearnBlob(const STRING& fontname, TBLOB* blob, - const DENORM& cn_denorm, - const INT_FX_RESULT_STRUCT& fx_info, - const char* blob_text) { - CHAR_DESC CharDesc = NewCharDescription(feature_defs_); - CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm); - CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info); - CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info); - CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info); - - if (ValidCharDescription(feature_defs_, CharDesc)) { - // Label the features with a class name and font name. - tr_file_data_ += "\n"; - tr_file_data_ += fontname; - tr_file_data_ += " "; - tr_file_data_ += blob_text; - tr_file_data_ += "\n"; - - // write micro-features to file and clean up - WriteCharDescription(feature_defs_, CharDesc, &tr_file_data_); - } else { - tprintf("Blob learned was invalid!\n"); - } - FreeCharDescription(CharDesc); -} // LearnBlob - -// Writes stored training data to a .tr file based on the given filename. -// Returns false on error. -bool Classify::WriteTRFile(const STRING& filename) { - bool result = false; - STRING tr_filename = filename + ".tr"; - FILE* fp = fopen(tr_filename.string(), "wb"); - if (fp) { - result = - tesseract::Serialize(fp, &tr_file_data_[0], tr_file_data_.length()); - fclose(fp); - } - tr_file_data_.truncate_at(0); - return result; -} - -#endif // ndef DISABLED_LEGACY_ENGINE - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/blobclass.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/blobclass.h deleted file mode 100644 index bb839e9c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/blobclass.h +++ /dev/null @@ -1,50 +0,0 @@ -/****************************************************************************** - ** Filename: blobclass.h - ** Purpose: Interface to high level classification and training. - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef BLOBCLASS_H -#define BLOBCLASS_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "strngs.h" - -/*--------------------------------------------------------------------------- - Macros -----------------------------------------------------------------------------*/ -/* macros for controlling the display of recognized characters */ -#define EnableCharDisplay() (DisplayCharacters = TRUE) -#define DisableCharDisplay() (DisplayCharacters = FALSE) - -/* macros for controlling the display of the entire match list */ -#define EnableMatchDisplay() (DisplayMatchList = TRUE) -#define DisableMatchDisplay() (DisplayMatchList = FALSE) - -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ -namespace tesseract { -// Finds the name of the training font and returns it in fontname, by cutting -// it out based on the expectation that the filename is of the form: -// /path/to/dir/[lang].[fontname].exp[num] -// The [lang], [fontname] and [num] fields should not have '.' characters. -// If the global parameter classify_font_name is set, its value is used instead. -void ExtractFontName(const STRING& filename, STRING* fontname); - -} // namespace tesseract. - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/classify.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/classify.cpp deleted file mode 100644 index 546013b0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/classify.cpp +++ /dev/null @@ -1,264 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: classify.cpp -// Description: classify class. -// Author: Samuel Charron -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "classify.h" - -#ifdef DISABLED_LEGACY_ENGINE - -#include - -namespace tesseract { - -Classify::Classify() - : - INT_MEMBER(classify_debug_level, 0, "Classify debug level", - this->params()), - - BOOL_MEMBER(classify_bln_numeric_mode, 0, -"Assume the input is numbers [0-9].", this->params()), - - double_MEMBER(classify_max_rating_ratio, 1.5, - "Veto ratio between classifier ratings", this->params()), - - double_MEMBER(classify_max_certainty_margin, 5.5, - "Veto difference between classifier certainties", - this->params()), - - dict_(this) {} - -Classify::~Classify() {} - -} // namespace tesseract - -#else // DISABLED_LEGACY_ENGINE not defined - -#include "fontinfo.h" -#include "intproto.h" -#include "mfoutline.h" -#include "scrollview.h" -#include "shapeclassifier.h" -#include "shapetable.h" -#include "unicity_table.h" -#include - -namespace tesseract { -Classify::Classify() - : BOOL_MEMBER(allow_blob_division, true, "Use divisible blobs chopping", - this->params()), - BOOL_MEMBER(prioritize_division, FALSE, - "Prioritize blob division over chopping", this->params()), - INT_MEMBER(tessedit_single_match, FALSE, "Top choice only from CP", - this->params()), - BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier", - this->params()), - INT_MEMBER(classify_debug_level, 0, "Classify debug level", - this->params()), - INT_MEMBER(classify_norm_method, character, "Normalization Method ...", - this->params()), - double_MEMBER(classify_char_norm_range, 0.2, - "Character Normalization Range ...", this->params()), - double_MEMBER(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...", - this->params()), /* PREV DEFAULT 0.1 */ - double_MEMBER(classify_max_norm_scale_x, 0.325, - "Max char x-norm scale ...", - this->params()), /* PREV DEFAULT 0.3 */ - double_MEMBER(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...", - this->params()), /* PREV DEFAULT 0.1 */ - double_MEMBER(classify_max_norm_scale_y, 0.325, - "Max char y-norm scale ...", - this->params()), /* PREV DEFAULT 0.3 */ - double_MEMBER(classify_max_rating_ratio, 1.5, - "Veto ratio between classifier ratings", this->params()), - double_MEMBER(classify_max_certainty_margin, 5.5, - "Veto difference between classifier certainties", - this->params()), - BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching", - this->params()), - BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching", - this->params()), - BOOL_MEMBER(classify_enable_adaptive_matcher, 1, - "Enable adaptive classifier", this->params()), - BOOL_MEMBER(classify_use_pre_adapted_templates, 0, - "Use pre-adapted classifier templates", this->params()), - BOOL_MEMBER(classify_save_adapted_templates, 0, - "Save adapted templates to a file", this->params()), - BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger", - this->params()), - BOOL_MEMBER(classify_nonlinear_norm, 0, - "Non-linear stroke-density normalization", this->params()), - INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()), - INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()), - INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ", - this->params()), - double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)", - this->params()), - double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)", - this->params()), - double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)", - this->params()), - double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)", - this->params()), - double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)", - this->params()), - double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length", - this->params()), - INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes", - this->params()), - INT_MEMBER(matcher_min_examples_for_prototyping, 3, - "Reliable Config Threshold", this->params()), - INT_MEMBER(matcher_sufficient_examples_for_prototyping, 5, - "Enable adaption even if the ambiguities have not been seen", - this->params()), - double_MEMBER(matcher_clustering_max_angle_delta, 0.015, - "Maximum angle delta for prototype clustering", - this->params()), - double_MEMBER(classify_misfit_junk_penalty, 0.0, - "Penalty to apply when a non-alnum is vertically out of " - "its expected textline position", - this->params()), - double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params()), - double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor", - this->params()), - double_MEMBER(tessedit_class_miss_scale, 0.00390625, - "Scale factor for features not used", this->params()), - double_MEMBER( - classify_adapted_pruning_factor, 2.5, - "Prune poor adapted results this much worse than best result", - this->params()), - double_MEMBER(classify_adapted_pruning_threshold, -1.0, - "Threshold at which classify_adapted_pruning_factor starts", - this->params()), - INT_MEMBER(classify_adapt_proto_threshold, 230, - "Threshold for good protos during adaptive 0-255", - this->params()), - INT_MEMBER(classify_adapt_feature_threshold, 230, - "Threshold for good features during adaptive 0-255", - this->params()), - BOOL_MEMBER(disable_character_fragments, TRUE, - "Do not include character fragments in the" - " results of the classifier", - this->params()), - double_MEMBER(classify_character_fragments_garbage_certainty_threshold, - -3.0, - "Exclude fragments that do not look like whole" - " characters from training and adaption", - this->params()), - BOOL_MEMBER(classify_debug_character_fragments, FALSE, - "Bring up graphical debugging windows for fragments training", - this->params()), - BOOL_MEMBER(matcher_debug_separate_windows, FALSE, - "Use two different windows for debugging the matching: " - "One for the protos and one for the features.", - this->params()), - STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning", - this->params()), - INT_MEMBER(classify_class_pruner_threshold, 229, - "Class Pruner Threshold 0-255", this->params()), - INT_MEMBER(classify_class_pruner_multiplier, 15, - "Class Pruner Multiplier 0-255: ", this->params()), - INT_MEMBER(classify_cp_cutoff_strength, 7, - "Class Pruner CutoffStrength: ", this->params()), - INT_MEMBER(classify_integer_matcher_multiplier, 10, - "Integer Matcher Multiplier 0-255: ", this->params()), - EnableLearning(true), - INT_MEMBER(il1_adaption_test, 0, - "Don't adapt to i/I at beginning of word", this->params()), - BOOL_MEMBER(classify_bln_numeric_mode, 0, - "Assume the input is numbers [0-9].", this->params()), - double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size", - this->params()), - double_MEMBER(speckle_rating_penalty, 10.0, - "Penalty to add to worst rating for noise", this->params()), - im_(&classify_debug_level), - shape_table_(nullptr), - dict_(this), - static_classifier_(nullptr) { - fontinfo_table_.set_compare_callback( - NewPermanentTessCallback(CompareFontInfo)); - fontinfo_table_.set_clear_callback( - NewPermanentTessCallback(FontInfoDeleteCallback)); - fontset_table_.set_compare_callback( - NewPermanentTessCallback(CompareFontSet)); - fontset_table_.set_clear_callback( - NewPermanentTessCallback(FontSetDeleteCallback)); - AdaptedTemplates = nullptr; - BackupAdaptedTemplates = nullptr; - PreTrainedTemplates = nullptr; - AllProtosOn = nullptr; - AllConfigsOn = nullptr; - AllConfigsOff = nullptr; - TempProtoMask = nullptr; - NormProtos = nullptr; - - NumAdaptationsFailed = 0; - - learn_debug_win_ = nullptr; - learn_fragmented_word_debug_win_ = nullptr; - learn_fragments_debug_win_ = nullptr; -} - -Classify::~Classify() { - EndAdaptiveClassifier(); - delete learn_debug_win_; - delete learn_fragmented_word_debug_win_; - delete learn_fragments_debug_win_; -} - - -// Takes ownership of the given classifier, and uses it for future calls -// to CharNormClassifier. -void Classify::SetStaticClassifier(ShapeClassifier* static_classifier) { - delete static_classifier_; - static_classifier_ = static_classifier; -} - -// Moved from speckle.cpp -// Adds a noise classification result that is a bit worse than the worst -// current result, or the worst possible result if no current results. -void Classify::AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices) { - BLOB_CHOICE_IT bc_it(choices); - // If there is no classifier result, we will use the worst possible certainty - // and corresponding rating. - float certainty = -getDict().certainty_scale; - float rating = rating_scale * blob_length; - if (!choices->empty() && blob_length > 0) { - bc_it.move_to_last(); - BLOB_CHOICE* worst_choice = bc_it.data(); - // Add speckle_rating_penalty to worst rating, matching old value. - rating = worst_choice->rating() + speckle_rating_penalty; - // Compute the rating to correspond to the certainty. (Used to be kept - // the same, but that messes up the language model search.) - certainty = -rating * getDict().certainty_scale / - (rating_scale * blob_length); - } - BLOB_CHOICE* blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty, - -1, 0.0f, FLT_MAX, 0, - BCC_SPECKLE_CLASSIFIER); - bc_it.add_to_end(blob_choice); -} - -// Returns true if the blob is small enough to be a large speckle. -bool Classify::LargeSpeckle(const TBLOB &blob) { - double speckle_size = kBlnXHeight * speckle_large_max_size; - TBOX bbox = blob.bounding_box(); - return bbox.width() < speckle_size && bbox.height() < speckle_size; -} - -} // namespace tesseract - -#endif // def DISABLED_LEGACY_ENGINE diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/classify.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/classify.h deleted file mode 100644 index 03a2ff07..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/classify.h +++ /dev/null @@ -1,585 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: classify.h -// Description: classify class. -// Author: Samuel Charron -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CLASSIFY_CLASSIFY_H_ -#define TESSERACT_CLASSIFY_CLASSIFY_H_ - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - - -#ifdef DISABLED_LEGACY_ENGINE - -#include "ccstruct.h" -#include "dict.h" - -namespace tesseract { - -class Classify : public CCStruct { - public: - Classify(); - virtual ~Classify(); - virtual Dict& getDict() { - return dict_; - } - - // Member variables. - - INT_VAR_H(classify_debug_level, 0, "Classify debug level"); - - BOOL_VAR_H(classify_bln_numeric_mode, 0, - "Assume the input is numbers [0-9]."); - - double_VAR_H(classify_max_rating_ratio, 1.5, - "Veto ratio between classifier ratings"); - - double_VAR_H(classify_max_certainty_margin, 5.5, - "Veto difference between classifier certainties"); - - private: - Dict dict_; -}; - -} // namespace tesseract - - -#else // DISABLED_LEGACY_ENGINE not defined - -#include "adaptive.h" -#include "ccstruct.h" -#include "dict.h" -#include "featdefs.h" -#include "fontinfo.h" -#include "imagedata.h" -#include "intfx.h" -#include "intmatcher.h" -#include "normalis.h" -#include "ratngs.h" -#include "ocrfeatures.h" -#include "unicity_table.h" - -class ScrollView; -class WERD_CHOICE; -class WERD_RES; -struct ADAPT_RESULTS; -struct NORM_PROTOS; - -static const int kUnknownFontinfoId = -1; -static const int kBlankFontinfoId = -2; - -namespace tesseract { - -class ShapeClassifier; -struct ShapeRating; -class ShapeTable; -struct UnicharRating; - -// How segmented is a blob. In this enum, character refers to a classifiable -// unit, but that is too long and character is usually easier to understand. -enum CharSegmentationType { - CST_FRAGMENT, // A partial character. - CST_WHOLE, // A correctly segmented character. - CST_IMPROPER, // More than one but less than 2 characters. - CST_NGRAM // Multiple characters. -}; - -class Classify : public CCStruct { - public: - Classify(); - virtual ~Classify(); - virtual Dict& getDict() { - return dict_; - } - - const ShapeTable* shape_table() const { - return shape_table_; - } - - // Takes ownership of the given classifier, and uses it for future calls - // to CharNormClassifier. - void SetStaticClassifier(ShapeClassifier* static_classifier); - - // Adds a noise classification result that is a bit worse than the worst - // current result, or the worst possible result if no current results. - void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices); - - // Returns true if the blob is small enough to be a large speckle. - bool LargeSpeckle(const TBLOB &blob); - - /* adaptive.cpp ************************************************************/ - ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset); - int GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId); - // Runs the class pruner from int_templates on the given features, returning - // the number of classes output in results. - // int_templates Class pruner tables - // num_features Number of features in blob - // features Array of features - // normalization_factors (input) Array of int_templates->NumClasses fudge - // factors from blob normalization process. - // (Indexed by CLASS_INDEX) - // expected_num_features (input) Array of int_templates->NumClasses - // expected number of features for each class. - // (Indexed by CLASS_INDEX) - // results (output) Sorted Array of pruned classes. - // Array must be sized to take the maximum possible - // number of outputs : int_templates->NumClasses. - int PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, int num_features, - int keep_this, const INT_FEATURE_STRUCT* features, - const uint8_t* normalization_factors, - const uint16_t* expected_num_features, - GenericVector* results); - void ReadNewCutoffs(TFile* fp, CLASS_CUTOFF_ARRAY Cutoffs); - void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates); - void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates); - ADAPT_TEMPLATES ReadAdaptedTemplates(TFile* File); - /* normmatch.cpp ************************************************************/ - float ComputeNormMatch(CLASS_ID ClassId, - const FEATURE_STRUCT& feature, bool DebugMatch); - void FreeNormProtos(); - NORM_PROTOS* ReadNormProtos(TFile* fp); - /* protos.cpp ***************************************************************/ - void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class); - INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos, - const UNICHARSET& target_unicharset); - /* adaptmatch.cpp ***********************************************************/ - - // Learns the given word using its chopped_word, seam_array, denorm, - // box_word, best_state, and correct_text to learn both correctly and - // incorrectly segmented blobs. If fontname is not nullptr, then LearnBlob - // is called and the data will be saved in an internal buffer. - // Otherwise AdaptToBlob is called for adaption within a document. - void LearnWord(const char* fontname, WERD_RES* word); - - // Builds a blob of length fragments, from the word, starting at start, - // and then learns it, as having the given correct_text. - // If fontname is not nullptr, then LearnBlob is called and the data will be - // saved in an internal buffer for static training. - // Otherwise AdaptToBlob is called for adaption within a document. - // threshold is a magic number required by AdaptToChar and generated by - // ComputeAdaptionThresholds. - // Although it can be partly inferred from the string, segmentation is - // provided to explicitly clarify the character segmentation. - void LearnPieces(const char* fontname, int start, int length, float threshold, - CharSegmentationType segmentation, const char* correct_text, - WERD_RES* word); - void InitAdaptiveClassifier(TessdataManager* mgr); - void InitAdaptedClass(TBLOB *Blob, - CLASS_ID ClassId, - int FontinfoId, - ADAPT_CLASS Class, - ADAPT_TEMPLATES Templates); - void AmbigClassifier(const GenericVector& int_features, - const INT_FX_RESULT_STRUCT& fx_info, - const TBLOB *blob, - INT_TEMPLATES templates, - ADAPT_CLASS *classes, - UNICHAR_ID *ambiguities, - ADAPT_RESULTS *results); - void MasterMatcher(INT_TEMPLATES templates, - int16_t num_features, - const INT_FEATURE_STRUCT* features, - const uint8_t* norm_factors, - ADAPT_CLASS* classes, - int debug, - int matcher_multiplier, - const TBOX& blob_box, - const GenericVector& results, - ADAPT_RESULTS* final_results); - // Converts configs to fonts, and if the result is not adapted, and a - // shape_table_ is present, the shape is expanded to include all - // unichar_ids represented, before applying a set of corrections to the - // distance rating in int_result, (see ComputeCorrectedRating.) - // The results are added to the final_results output. - void ExpandShapesAndApplyCorrections(ADAPT_CLASS* classes, - bool debug, - int class_id, - int bottom, int top, - float cp_rating, - int blob_length, - int matcher_multiplier, - const uint8_t* cn_factors, - UnicharRating* int_result, - ADAPT_RESULTS* final_results); - // Applies a set of corrections to the distance im_rating, - // including the cn_correction, miss penalty and additional penalty - // for non-alnums being vertical misfits. Returns the corrected distance. - double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, - double im_rating, int feature_misses, - int bottom, int top, - int blob_length, int matcher_multiplier, - const uint8_t* cn_factors); - void ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, - ADAPT_RESULTS *Results, - BLOB_CHOICE_LIST *Choices); - void AddNewResult(const UnicharRating& new_result, ADAPT_RESULTS *results); - int GetAdaptiveFeatures(TBLOB *Blob, - INT_FEATURE_ARRAY IntFeatures, - FEATURE_SET *FloatFeatures); - -#ifndef GRAPHICS_DISABLED - void DebugAdaptiveClassifier(TBLOB *Blob, - ADAPT_RESULTS *Results); -#endif - PROTO_ID MakeNewTempProtos(FEATURE_SET Features, - int NumBadFeat, - FEATURE_ID BadFeat[], - INT_CLASS IClass, - ADAPT_CLASS Class, - BIT_VECTOR TempProtoMask); - int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, - CLASS_ID ClassId, - int FontinfoId, - int NumFeatures, - INT_FEATURE_ARRAY Features, - FEATURE_SET FloatFeatures); - void MakePermanent(ADAPT_TEMPLATES Templates, - CLASS_ID ClassId, - int ConfigId, - TBLOB *Blob); - void PrintAdaptiveMatchResults(const ADAPT_RESULTS& results); - void RemoveExtraPuncs(ADAPT_RESULTS *Results); - void RemoveBadMatches(ADAPT_RESULTS *Results); - void SetAdaptiveThreshold(float Threshold); - void ShowBestMatchFor(int shape_id, - const INT_FEATURE_STRUCT* features, - int num_features); - // Returns a string for the classifier class_id: either the corresponding - // unicharset debug_str or the shape_table_ debug str. - STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT* templates, - int class_id, int config_id) const; - // Converts a classifier class_id index with a config ID to: - // shape_table_ present: a shape_table_ index OR - // No shape_table_: a font ID. - // Without shape training, each class_id, config pair represents a single - // unichar id/font combination, so this function looks up the corresponding - // font id. - // With shape training, each class_id, config pair represents a single - // shape table index, so the fontset_table stores the shape table index, - // and the shape_table_ must be consulted to obtain the actual unichar_id/ - // font combinations that the shape represents. - int ClassAndConfigIDToFontOrShapeID(int class_id, - int int_result_config) const; - // Converts a shape_table_ index to a classifier class_id index (not a - // unichar-id!). Uses a search, so not fast. - int ShapeIDToClassID(int shape_id) const; - UNICHAR_ID *BaselineClassifier( - TBLOB *Blob, const GenericVector& int_features, - const INT_FX_RESULT_STRUCT& fx_info, - ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results); - int CharNormClassifier(TBLOB *blob, - const TrainingSample& sample, - ADAPT_RESULTS *adapt_results); - - // As CharNormClassifier, but operates on a TrainingSample and outputs to - // a GenericVector of ShapeRating without conversion to classes. - int CharNormTrainingSample(bool pruner_only, int keep_this, - const TrainingSample& sample, - GenericVector* results); - UNICHAR_ID *GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass); - void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results); - void AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId, - float Threshold, ADAPT_TEMPLATES adaptive_templates); - void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class); - bool AdaptableWord(WERD_RES* word); - void EndAdaptiveClassifier(); - void SettupPass1(); - void SettupPass2(); - void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices); - void ClassifyAsNoise(ADAPT_RESULTS *Results); - void ResetAdaptiveClassifierInternal(); - void SwitchAdaptiveClassifier(); - void StartBackupAdaptiveClassifier(); - - int GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info, - INT_TEMPLATES templates, - uint8_t* pruner_norm_array, - uint8_t* char_norm_array); - // Computes the char_norm_array for the unicharset and, if not nullptr, the - // pruner_array as appropriate according to the existence of the shape_table. - // The norm_feature is deleted as it is almost certainly no longer needed. - void ComputeCharNormArrays(FEATURE_STRUCT* norm_feature, - INT_TEMPLATES_STRUCT* templates, - uint8_t* char_norm_array, - uint8_t* pruner_array); - - bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config); - void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob); - - bool AdaptiveClassifierIsFull() const { return NumAdaptationsFailed > 0; } - bool AdaptiveClassifierIsEmpty() const { - return AdaptedTemplates->NumPermClasses == 0; - } - bool LooksLikeGarbage(TBLOB *blob); - void RefreshDebugWindow(ScrollView **win, const char *msg, - int y_offset, const TBOX &wbox); - // intfx.cpp - // Computes the DENORMS for bl(baseline) and cn(character) normalization - // during feature extraction. The input denorm describes the current state - // of the blob, which is usually a baseline-normalized word. - // The Transforms setup are as follows: - // Baseline Normalized (bl) Output: - // We center the grapheme by aligning the x-coordinate of its centroid with - // x=128 and leaving the already-baseline-normalized y as-is. - // - // Character Normalized (cn) Output: - // We align the grapheme's centroid at the origin and scale it - // asymmetrically in x and y so that the 2nd moments are a standard value - // (51.2) ie the result is vaguely square. - // If classify_nonlinear_norm is true: - // A non-linear normalization is setup that attempts to evenly distribute - // edges across x and y. - // - // Some of the fields of fx_info are also setup: - // Length: Total length of outline. - // Rx: Rounded y second moment. (Reversed by convention.) - // Ry: rounded x second moment. - // Xmean: Rounded x center of mass of the blob. - // Ymean: Rounded y center of mass of the blob. - static void SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm, - DENORM* bl_denorm, DENORM* cn_denorm, - INT_FX_RESULT_STRUCT* fx_info); - - // Extracts sets of 3-D features of length kStandardFeatureLength (=12.8), as - // (x,y) position and angle as measured counterclockwise from the vector - // <-1, 0>, from blob using two normalizations defined by bl_denorm and - // cn_denorm. See SetpuBLCNDenorms for definitions. - // If outline_cn_counts is not nullptr, on return it contains the cumulative - // number of cn features generated for each outline in the blob (in order). - // Thus after the first outline, there were (*outline_cn_counts)[0] features, - // after the second outline, there were (*outline_cn_counts)[1] features etc. - static void ExtractFeatures(const TBLOB& blob, - bool nonlinear_norm, - GenericVector* bl_features, - GenericVector* cn_features, - INT_FX_RESULT_STRUCT* results, - GenericVector* outline_cn_counts); - /* float2int.cpp ************************************************************/ - void ClearCharNormArray(uint8_t* char_norm_array); - void ComputeIntCharNormArray(const FEATURE_STRUCT& norm_feature, - uint8_t* char_norm_array); - void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures); - /* intproto.cpp *************************************************************/ - INT_TEMPLATES ReadIntTemplates(TFile* fp); - void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, - const UNICHARSET& target_unicharset); - CLASS_ID GetClassToDebug(const char *Prompt, bool* adaptive_on, - bool* pretrained_on, int* shape_id); - void ShowMatchDisplay(); - /* font detection ***********************************************************/ - UnicityTable& get_fontinfo_table() { - return fontinfo_table_; - } - const UnicityTable& get_fontinfo_table() const { - return fontinfo_table_; - } - UnicityTable& get_fontset_table() { - return fontset_table_; - } - /* mfoutline.cpp ***********************************************************/ - void NormalizeOutlines(LIST Outlines, float *XScale, float *YScale); - /* outfeat.cpp ***********************************************************/ - FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob); - /* picofeat.cpp ***********************************************************/ - FEATURE_SET ExtractPicoFeatures(TBLOB *Blob); - FEATURE_SET ExtractIntCNFeatures(const TBLOB& blob, - const INT_FX_RESULT_STRUCT& fx_info); - FEATURE_SET ExtractIntGeoFeatures(const TBLOB& blob, - const INT_FX_RESULT_STRUCT& fx_info); - /* blobclass.cpp ***********************************************************/ - // Extracts features from the given blob and saves them in the tr_file_data_ - // member variable. - // fontname: Name of font that this blob was printed in. - // cn_denorm: Character normalization transformation to apply to the blob. - // fx_info: Character normalization parameters computed with cn_denorm. - // blob_text: Ground truth text for the blob. - void LearnBlob(const STRING& fontname, TBLOB* Blob, const DENORM& cn_denorm, - const INT_FX_RESULT_STRUCT& fx_info, const char* blob_text); - // Writes stored training data to a .tr file based on the given filename. - // Returns false on error. - bool WriteTRFile(const STRING& filename); - - // Member variables. - - // Parameters. - // Set during training (in lang.config) to indicate whether the divisible - // blobs chopper should be used (true for latin script.) - BOOL_VAR_H(allow_blob_division, true, "Use divisible blobs chopping"); - // Set during training (in lang.config) to indicate whether the divisible - // blobs chopper should be used in preference to chopping. Set to true for - // southern Indic scripts. - BOOL_VAR_H(prioritize_division, FALSE, - "Prioritize blob division over chopping"); - INT_VAR_H(tessedit_single_match, FALSE, "Top choice only from CP"); - BOOL_VAR_H(classify_enable_learning, true, "Enable adaptive classifier"); - INT_VAR_H(classify_debug_level, 0, "Classify debug level"); - - /* mfoutline.cpp ***********************************************************/ - /* control knobs used to control normalization of outlines */ - INT_VAR_H(classify_norm_method, character, "Normalization Method ..."); - double_VAR_H(classify_char_norm_range, 0.2, - "Character Normalization Range ..."); - double_VAR_H(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ..."); - double_VAR_H(classify_max_norm_scale_x, 0.325, "Max char x-norm scale ..."); - double_VAR_H(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ..."); - double_VAR_H(classify_max_norm_scale_y, 0.325, "Max char y-norm scale ..."); - double_VAR_H(classify_max_rating_ratio, 1.5, - "Veto ratio between classifier ratings"); - double_VAR_H(classify_max_certainty_margin, 5.5, - "Veto difference between classifier certainties"); - - /* adaptmatch.cpp ***********************************************************/ - BOOL_VAR_H(tess_cn_matching, 0, "Character Normalized Matching"); - BOOL_VAR_H(tess_bn_matching, 0, "Baseline Normalized Matching"); - BOOL_VAR_H(classify_enable_adaptive_matcher, 1, "Enable adaptive classifier"); - BOOL_VAR_H(classify_use_pre_adapted_templates, 0, - "Use pre-adapted classifier templates"); - BOOL_VAR_H(classify_save_adapted_templates, 0, - "Save adapted templates to a file"); - BOOL_VAR_H(classify_enable_adaptive_debugger, 0, "Enable match debugger"); - BOOL_VAR_H(classify_nonlinear_norm, 0, - "Non-linear stroke-density normalization"); - INT_VAR_H(matcher_debug_level, 0, "Matcher Debug Level"); - INT_VAR_H(matcher_debug_flags, 0, "Matcher Debug Flags"); - INT_VAR_H(classify_learning_debug_level, 0, "Learning Debug Level: "); - double_VAR_H(matcher_good_threshold, 0.125, "Good Match (0-1)"); - double_VAR_H(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)"); - double_VAR_H(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)"); - double_VAR_H(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)"); - double_VAR_H(matcher_rating_margin, 0.1, "New template margin (0-1)"); - double_VAR_H(matcher_avg_noise_size, 12.0, "Avg. noise blob length: "); - INT_VAR_H(matcher_permanent_classes_min, 1, "Min # of permanent classes"); - INT_VAR_H(matcher_min_examples_for_prototyping, 3, - "Reliable Config Threshold"); - INT_VAR_H(matcher_sufficient_examples_for_prototyping, 5, - "Enable adaption even if the ambiguities have not been seen"); - double_VAR_H(matcher_clustering_max_angle_delta, 0.015, - "Maximum angle delta for prototype clustering"); - double_VAR_H(classify_misfit_junk_penalty, 0.0, - "Penalty to apply when a non-alnum is vertically out of " - "its expected textline position"); - double_VAR_H(rating_scale, 1.5, "Rating scaling factor"); - double_VAR_H(certainty_scale, 20.0, "Certainty scaling factor"); - double_VAR_H(tessedit_class_miss_scale, 0.00390625, - "Scale factor for features not used"); - double_VAR_H(classify_adapted_pruning_factor, 2.5, - "Prune poor adapted results this much worse than best result"); - double_VAR_H(classify_adapted_pruning_threshold, -1.0, - "Threshold at which classify_adapted_pruning_factor starts"); - INT_VAR_H(classify_adapt_proto_threshold, 230, - "Threshold for good protos during adaptive 0-255"); - INT_VAR_H(classify_adapt_feature_threshold, 230, - "Threshold for good features during adaptive 0-255"); - BOOL_VAR_H(disable_character_fragments, TRUE, - "Do not include character fragments in the" - " results of the classifier"); - double_VAR_H(classify_character_fragments_garbage_certainty_threshold, -3.0, - "Exclude fragments that do not match any whole character" - " with at least this certainty"); - BOOL_VAR_H(classify_debug_character_fragments, FALSE, - "Bring up graphical debugging windows for fragments training"); - BOOL_VAR_H(matcher_debug_separate_windows, FALSE, - "Use two different windows for debugging the matching: " - "One for the protos and one for the features."); - STRING_VAR_H(classify_learn_debug_str, "", "Class str to debug learning"); - - /* intmatcher.cpp **********************************************************/ - INT_VAR_H(classify_class_pruner_threshold, 229, - "Class Pruner Threshold 0-255"); - INT_VAR_H(classify_class_pruner_multiplier, 15, - "Class Pruner Multiplier 0-255: "); - INT_VAR_H(classify_cp_cutoff_strength, 7, - "Class Pruner CutoffStrength: "); - INT_VAR_H(classify_integer_matcher_multiplier, 10, - "Integer Matcher Multiplier 0-255: "); - - // Use class variables to hold onto built-in templates and adapted templates. - INT_TEMPLATES PreTrainedTemplates; - ADAPT_TEMPLATES AdaptedTemplates; - // The backup adapted templates are created from the previous page (only) - // so they are always ready and reasonably well trained if the primary - // adapted templates become full. - ADAPT_TEMPLATES BackupAdaptedTemplates; - - // Create dummy proto and config masks for use with the built-in templates. - BIT_VECTOR AllProtosOn; - BIT_VECTOR AllConfigsOn; - BIT_VECTOR AllConfigsOff; - BIT_VECTOR TempProtoMask; - bool EnableLearning; - /* normmatch.cpp */ - NORM_PROTOS *NormProtos; - /* font detection ***********************************************************/ - UnicityTable fontinfo_table_; - // Without shape training, each class_id, config pair represents a single - // unichar id/font combination, so each fontset_table_ entry holds font ids - // for each config in the class. - // With shape training, each class_id, config pair represents a single - // shape_table_ index, so the fontset_table_ stores the shape_table_ index, - // and the shape_table_ must be consulted to obtain the actual unichar_id/ - // font combinations that the shape represents. - UnicityTable fontset_table_; - - INT_VAR_H(il1_adaption_test, 0, "Don't adapt to i/I at beginning of word"); - BOOL_VAR_H(classify_bln_numeric_mode, 0, - "Assume the input is numbers [0-9]."); - double_VAR_H(speckle_large_max_size, 0.30, "Max large speckle size"); - double_VAR_H(speckle_rating_penalty, 10.0, - "Penalty to add to worst rating for noise"); - - protected: - IntegerMatcher im_; - FEATURE_DEFS_STRUCT feature_defs_; - // If a shape_table_ is present, it is used to remap classifier output in - // ExpandShapesAndApplyCorrections. font_ids referenced by configs actually - // mean an index to the shape_table_ and the choices returned are *all* the - // shape_table_ entries at that index. - ShapeTable* shape_table_; - - private: - Dict dict_; - // The currently active static classifier. - ShapeClassifier* static_classifier_; - - /* variables used to hold performance statistics */ - int NumAdaptationsFailed; - - // Training data gathered here for all the images in a document. - STRING tr_file_data_; - - // Expected number of features in the class pruner, used to penalize - // unknowns that have too few features (like a c being classified as e) so - // it doesn't recognize everything as '@' or '#'. - // CharNormCutoffs is for the static classifier (with no shapetable). - // BaselineCutoffs gets a copy of CharNormCutoffs as an estimate of the real - // value in the adaptive classifier. Both are indexed by unichar_id. - // shapetable_cutoffs_ provides a similar value for each shape in the - // shape_table_ - uint16_t CharNormCutoffs[MAX_NUM_CLASSES]; - uint16_t BaselineCutoffs[MAX_NUM_CLASSES]; - GenericVector shapetable_cutoffs_; - ScrollView* learn_debug_win_; - ScrollView* learn_fragmented_word_debug_win_; - ScrollView* learn_fragments_debug_win_; -}; -} // namespace tesseract - -#endif // DISABLED_LEGACY_ENGINE - -#endif // TESSERACT_CLASSIFY_CLASSIFY_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/cluster.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/cluster.cpp deleted file mode 100644 index c11c4a61..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/cluster.cpp +++ /dev/null @@ -1,2494 +0,0 @@ -/****************************************************************************** - ** Filename: cluster.c - ** Purpose: Routines for clustering points in N-D space - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - *****************************************************************************/ - -#include // for FLT_MAX -#include -#include // for std::vector - -#include "cluster.h" -#include "cutil.h" // for void_proc -#include "emalloc.h" -#include "genericheap.h" -#include "helpers.h" -#include "kdpair.h" -#include "matrix.h" -#include "tprintf.h" - -#define HOTELLING 1 // If true use Hotelling's test to decide where to split. -#define FTABLE_X 10 // Size of FTable. -#define FTABLE_Y 100 // Size of FTable. - -// Table of values approximating the cumulative F-distribution for a confidence of 1%. -const double FTable[FTABLE_Y][FTABLE_X] = { - {4052.19, 4999.52, 5403.34, 5624.62, 5763.65, 5858.97, 5928.33, 5981.10, 6022.50, 6055.85,}, - {98.502, 99.000, 99.166, 99.249, 99.300, 99.333, 99.356, 99.374, 99.388, 99.399,}, - {34.116, 30.816, 29.457, 28.710, 28.237, 27.911, 27.672, 27.489, 27.345, 27.229,}, - {21.198, 18.000, 16.694, 15.977, 15.522, 15.207, 14.976, 14.799, 14.659, 14.546,}, - {16.258, 13.274, 12.060, 11.392, 10.967, 10.672, 10.456, 10.289, 10.158, 10.051,}, - {13.745, 10.925, 9.780, 9.148, 8.746, 8.466, 8.260, 8.102, 7.976, 7.874,}, - {12.246, 9.547, 8.451, 7.847, 7.460, 7.191, 6.993, 6.840, 6.719, 6.620,}, - {11.259, 8.649, 7.591, 7.006, 6.632, 6.371, 6.178, 6.029, 5.911, 5.814,}, - {10.561, 8.022, 6.992, 6.422, 6.057, 5.802, 5.613, 5.467, 5.351, 5.257,}, - {10.044, 7.559, 6.552, 5.994, 5.636, 5.386, 5.200, 5.057, 4.942, 4.849,}, - { 9.646, 7.206, 6.217, 5.668, 5.316, 5.069, 4.886, 4.744, 4.632, 4.539,}, - { 9.330, 6.927, 5.953, 5.412, 5.064, 4.821, 4.640, 4.499, 4.388, 4.296,}, - { 9.074, 6.701, 5.739, 5.205, 4.862, 4.620, 4.441, 4.302, 4.191, 4.100,}, - { 8.862, 6.515, 5.564, 5.035, 4.695, 4.456, 4.278, 4.140, 4.030, 3.939,}, - { 8.683, 6.359, 5.417, 4.893, 4.556, 4.318, 4.142, 4.004, 3.895, 3.805,}, - { 8.531, 6.226, 5.292, 4.773, 4.437, 4.202, 4.026, 3.890, 3.780, 3.691,}, - { 8.400, 6.112, 5.185, 4.669, 4.336, 4.102, 3.927, 3.791, 3.682, 3.593,}, - { 8.285, 6.013, 5.092, 4.579, 4.248, 4.015, 3.841, 3.705, 3.597, 3.508,}, - { 8.185, 5.926, 5.010, 4.500, 4.171, 3.939, 3.765, 3.631, 3.523, 3.434,}, - { 8.096, 5.849, 4.938, 4.431, 4.103, 3.871, 3.699, 3.564, 3.457, 3.368,}, - { 8.017, 5.780, 4.874, 4.369, 4.042, 3.812, 3.640, 3.506, 3.398, 3.310,}, - { 7.945, 5.719, 4.817, 4.313, 3.988, 3.758, 3.587, 3.453, 3.346, 3.258,}, - { 7.881, 5.664, 4.765, 4.264, 3.939, 3.710, 3.539, 3.406, 3.299, 3.211,}, - { 7.823, 5.614, 4.718, 4.218, 3.895, 3.667, 3.496, 3.363, 3.256, 3.168,}, - { 7.770, 5.568, 4.675, 4.177, 3.855, 3.627, 3.457, 3.324, 3.217, 3.129,}, - { 7.721, 5.526, 4.637, 4.140, 3.818, 3.591, 3.421, 3.288, 3.182, 3.094,}, - { 7.677, 5.488, 4.601, 4.106, 3.785, 3.558, 3.388, 3.256, 3.149, 3.062,}, - { 7.636, 5.453, 4.568, 4.074, 3.754, 3.528, 3.358, 3.226, 3.120, 3.032,}, - { 7.598, 5.420, 4.538, 4.045, 3.725, 3.499, 3.330, 3.198, 3.092, 3.005,}, - { 7.562, 5.390, 4.510, 4.018, 3.699, 3.473, 3.305, 3.173, 3.067, 2.979,}, - { 7.530, 5.362, 4.484, 3.993, 3.675, 3.449, 3.281, 3.149, 3.043, 2.955,}, - { 7.499, 5.336, 4.459, 3.969, 3.652, 3.427, 3.258, 3.127, 3.021, 2.934,}, - { 7.471, 5.312, 4.437, 3.948, 3.630, 3.406, 3.238, 3.106, 3.000, 2.913,}, - { 7.444, 5.289, 4.416, 3.927, 3.611, 3.386, 3.218, 3.087, 2.981, 2.894,}, - { 7.419, 5.268, 4.396, 3.908, 3.592, 3.368, 3.200, 3.069, 2.963, 2.876,}, - { 7.396, 5.248, 4.377, 3.890, 3.574, 3.351, 3.183, 3.052, 2.946, 2.859,}, - { 7.373, 5.229, 4.360, 3.873, 3.558, 3.334, 3.167, 3.036, 2.930, 2.843,}, - { 7.353, 5.211, 4.343, 3.858, 3.542, 3.319, 3.152, 3.021, 2.915, 2.828,}, - { 7.333, 5.194, 4.327, 3.843, 3.528, 3.305, 3.137, 3.006, 2.901, 2.814,}, - { 7.314, 5.179, 4.313, 3.828, 3.514, 3.291, 3.124, 2.993, 2.888, 2.801,}, - { 7.296, 5.163, 4.299, 3.815, 3.501, 3.278, 3.111, 2.980, 2.875, 2.788,}, - { 7.280, 5.149, 4.285, 3.802, 3.488, 3.266, 3.099, 2.968, 2.863, 2.776,}, - { 7.264, 5.136, 4.273, 3.790, 3.476, 3.254, 3.087, 2.957, 2.851, 2.764,}, - { 7.248, 5.123, 4.261, 3.778, 3.465, 3.243, 3.076, 2.946, 2.840, 2.754,}, - { 7.234, 5.110, 4.249, 3.767, 3.454, 3.232, 3.066, 2.935, 2.830, 2.743,}, - { 7.220, 5.099, 4.238, 3.757, 3.444, 3.222, 3.056, 2.925, 2.820, 2.733,}, - { 7.207, 5.087, 4.228, 3.747, 3.434, 3.213, 3.046, 2.916, 2.811, 2.724,}, - { 7.194, 5.077, 4.218, 3.737, 3.425, 3.204, 3.037, 2.907, 2.802, 2.715,}, - { 7.182, 5.066, 4.208, 3.728, 3.416, 3.195, 3.028, 2.898, 2.793, 2.706,}, - { 7.171, 5.057, 4.199, 3.720, 3.408, 3.186, 3.020, 2.890, 2.785, 2.698,}, - { 7.159, 5.047, 4.191, 3.711, 3.400, 3.178, 3.012, 2.882, 2.777, 2.690,}, - { 7.149, 5.038, 4.182, 3.703, 3.392, 3.171, 3.005, 2.874, 2.769, 2.683,}, - { 7.139, 5.030, 4.174, 3.695, 3.384, 3.163, 2.997, 2.867, 2.762, 2.675,}, - { 7.129, 5.021, 4.167, 3.688, 3.377, 3.156, 2.990, 2.860, 2.755, 2.668,}, - { 7.119, 5.013, 4.159, 3.681, 3.370, 3.149, 2.983, 2.853, 2.748, 2.662,}, - { 7.110, 5.006, 4.152, 3.674, 3.363, 3.143, 2.977, 2.847, 2.742, 2.655,}, - { 7.102, 4.998, 4.145, 3.667, 3.357, 3.136, 2.971, 2.841, 2.736, 2.649,}, - { 7.093, 4.991, 4.138, 3.661, 3.351, 3.130, 2.965, 2.835, 2.730, 2.643,}, - { 7.085, 4.984, 4.132, 3.655, 3.345, 3.124, 2.959, 2.829, 2.724, 2.637,}, - { 7.077, 4.977, 4.126, 3.649, 3.339, 3.119, 2.953, 2.823, 2.718, 2.632,}, - { 7.070, 4.971, 4.120, 3.643, 3.333, 3.113, 2.948, 2.818, 2.713, 2.626,}, - { 7.062, 4.965, 4.114, 3.638, 3.328, 3.108, 2.942, 2.813, 2.708, 2.621,}, - { 7.055, 4.959, 4.109, 3.632, 3.323, 3.103, 2.937, 2.808, 2.703, 2.616,}, - { 7.048, 4.953, 4.103, 3.627, 3.318, 3.098, 2.932, 2.803, 2.698, 2.611,}, - { 7.042, 4.947, 4.098, 3.622, 3.313, 3.093, 2.928, 2.798, 2.693, 2.607,}, - { 7.035, 4.942, 4.093, 3.618, 3.308, 3.088, 2.923, 2.793, 2.689, 2.602,}, - { 7.029, 4.937, 4.088, 3.613, 3.304, 3.084, 2.919, 2.789, 2.684, 2.598,}, - { 7.023, 4.932, 4.083, 3.608, 3.299, 3.080, 2.914, 2.785, 2.680, 2.593,}, - { 7.017, 4.927, 4.079, 3.604, 3.295, 3.075, 2.910, 2.781, 2.676, 2.589,}, - { 7.011, 4.922, 4.074, 3.600, 3.291, 3.071, 2.906, 2.777, 2.672, 2.585,}, - { 7.006, 4.917, 4.070, 3.596, 3.287, 3.067, 2.902, 2.773, 2.668, 2.581,}, - { 7.001, 4.913, 4.066, 3.591, 3.283, 3.063, 2.898, 2.769, 2.664, 2.578,}, - { 6.995, 4.908, 4.062, 3.588, 3.279, 3.060, 2.895, 2.765, 2.660, 2.574,}, - { 6.990, 4.904, 4.058, 3.584, 3.275, 3.056, 2.891, 2.762, 2.657, 2.570,}, - { 6.985, 4.900, 4.054, 3.580, 3.272, 3.052, 2.887, 2.758, 2.653, 2.567,}, - { 6.981, 4.896, 4.050, 3.577, 3.268, 3.049, 2.884, 2.755, 2.650, 2.563,}, - { 6.976, 4.892, 4.047, 3.573, 3.265, 3.046, 2.881, 2.751, 2.647, 2.560,}, - { 6.971, 4.888, 4.043, 3.570, 3.261, 3.042, 2.877, 2.748, 2.644, 2.557,}, - { 6.967, 4.884, 4.040, 3.566, 3.258, 3.039, 2.874, 2.745, 2.640, 2.554,}, - { 6.963, 4.881, 4.036, 3.563, 3.255, 3.036, 2.871, 2.742, 2.637, 2.551,}, - { 6.958, 4.877, 4.033, 3.560, 3.252, 3.033, 2.868, 2.739, 2.634, 2.548,}, - { 6.954, 4.874, 4.030, 3.557, 3.249, 3.030, 2.865, 2.736, 2.632, 2.545,}, - { 6.950, 4.870, 4.027, 3.554, 3.246, 3.027, 2.863, 2.733, 2.629, 2.542,}, - { 6.947, 4.867, 4.024, 3.551, 3.243, 3.025, 2.860, 2.731, 2.626, 2.539,}, - { 6.943, 4.864, 4.021, 3.548, 3.240, 3.022, 2.857, 2.728, 2.623, 2.537,}, - { 6.939, 4.861, 4.018, 3.545, 3.238, 3.019, 2.854, 2.725, 2.621, 2.534,}, - { 6.935, 4.858, 4.015, 3.543, 3.235, 3.017, 2.852, 2.723, 2.618, 2.532,}, - { 6.932, 4.855, 4.012, 3.540, 3.233, 3.014, 2.849, 2.720, 2.616, 2.529,}, - { 6.928, 4.852, 4.010, 3.538, 3.230, 3.012, 2.847, 2.718, 2.613, 2.527,}, - { 6.925, 4.849, 4.007, 3.535, 3.228, 3.009, 2.845, 2.715, 2.611, 2.524,}, - { 6.922, 4.846, 4.004, 3.533, 3.225, 3.007, 2.842, 2.713, 2.609, 2.522,}, - { 6.919, 4.844, 4.002, 3.530, 3.223, 3.004, 2.840, 2.711, 2.606, 2.520,}, - { 6.915, 4.841, 3.999, 3.528, 3.221, 3.002, 2.838, 2.709, 2.604, 2.518,}, - { 6.912, 4.838, 3.997, 3.525, 3.218, 3.000, 2.835, 2.706, 2.602, 2.515,}, - { 6.909, 4.836, 3.995, 3.523, 3.216, 2.998, 2.833, 2.704, 2.600, 2.513,}, - { 6.906, 4.833, 3.992, 3.521, 3.214, 2.996, 2.831, 2.702, 2.598, 2.511,}, - { 6.904, 4.831, 3.990, 3.519, 3.212, 2.994, 2.829, 2.700, 2.596, 2.509,}, - { 6.901, 4.829, 3.988, 3.517, 3.210, 2.992, 2.827, 2.698, 2.594, 2.507,}, - { 6.898, 4.826, 3.986, 3.515, 3.208, 2.990, 2.825, 2.696, 2.592, 2.505,}, - { 6.895, 4.824, 3.984, 3.513, 3.206, 2.988, 2.823, 2.694, 2.590, 2.503} -}; - -/** define the variance which will be used as a minimum variance for any - dimension of any feature. Since most features are calculated from numbers - with a precision no better than 1 in 128, the variance should never be - less than the square of this number for parameters whose range is 1. */ -#define MINVARIANCE 0.0004 - -/** define the absolute minimum number of samples which must be present in - order to accurately test hypotheses about underlying probability - distributions. Define separately the minimum samples that are needed - before a statistical analysis is attempted; this number should be - equal to MINSAMPLES but can be set to a lower number for early testing - when very few samples are available. */ -#define MINSAMPLESPERBUCKET 5 -#define MINSAMPLES (MINBUCKETS * MINSAMPLESPERBUCKET) -#define MINSAMPLESNEEDED 1 - -/** define the size of the table which maps normalized samples to - histogram buckets. Also define the number of standard deviations - in a normal distribution which are considered to be significant. - The mapping table will be defined in such a way that it covers - the specified number of standard deviations on either side of - the mean. BUCKETTABLESIZE should always be even. */ -#define BUCKETTABLESIZE 1024 -#define NORMALEXTENT 3.0 - -struct TEMPCLUSTER { - CLUSTER *Cluster; - CLUSTER *Neighbor; -}; - -using ClusterPair = tesseract::KDPairInc; -using ClusterHeap = tesseract::GenericHeap; - -struct STATISTICS { - float AvgVariance; - float *CoVariance; - float *Min; // largest negative distance from the mean - float *Max; // largest positive distance from the mean -}; - -struct BUCKETS { - DISTRIBUTION Distribution; // distribution being tested for - uint32_t SampleCount; // # of samples in histogram - double Confidence; // confidence level of test - double ChiSquared; // test threshold - uint16_t NumberOfBuckets; // number of cells in histogram - uint16_t Bucket[BUCKETTABLESIZE]; // mapping to histogram buckets - uint32_t *Count; // frequency of occurrence histogram - float *ExpectedCount; // expected histogram -}; - -struct CHISTRUCT{ - uint16_t DegreesOfFreedom; - double Alpha; - double ChiSquared; -}; - -// For use with KDWalk / MakePotentialClusters -struct ClusteringContext { - ClusterHeap *heap; // heap used to hold temp clusters, "best" on top - TEMPCLUSTER *candidates; // array of potential clusters - KDTREE *tree; // kd-tree to be searched for neighbors - int32_t next; // next candidate to be used -}; - -typedef double (*DENSITYFUNC) (int32_t); -typedef double (*SOLVEFUNC) (CHISTRUCT *, double); - -#define Odd(N) ((N)%2) -#define Mirror(N,R) ((R) - (N) - 1) -#define Abs(N) (((N) < 0) ? (-(N)) : (N)) - -//--------------Global Data Definitions and Declarations---------------------- -/** the following variables describe a discrete normal distribution - which is used by NormalDensity() and NormalBucket(). The - constant NORMALEXTENT determines how many standard - deviations of the distribution are mapped onto the fixed - discrete range of x. x=0 is mapped to -NORMALEXTENT standard - deviations and x=BUCKETTABLESIZE is mapped to - +NORMALEXTENT standard deviations. */ -#define SqrtOf2Pi 2.506628275 -static const double kNormalStdDev = BUCKETTABLESIZE / (2.0 * NORMALEXTENT); -static const double kNormalVariance = - (BUCKETTABLESIZE * BUCKETTABLESIZE) / (4.0 * NORMALEXTENT * NORMALEXTENT); -static const double kNormalMagnitude = - (2.0 * NORMALEXTENT) / (SqrtOf2Pi * BUCKETTABLESIZE); -static const double kNormalMean = BUCKETTABLESIZE / 2; - -/** define lookup tables used to compute the number of histogram buckets - that should be used for a given number of samples. */ -#define LOOKUPTABLESIZE 8 -#define MAXDEGREESOFFREEDOM MAXBUCKETS - -static const uint32_t kCountTable[LOOKUPTABLESIZE] = { - MINSAMPLES, 200, 400, 600, 800, 1000, 1500, 2000 -}; // number of samples - -static const uint16_t kBucketsTable[LOOKUPTABLESIZE] = { - MINBUCKETS, 16, 20, 24, 27, 30, 35, MAXBUCKETS -}; // number of buckets - -/*------------------------------------------------------------------------- - Private Function Prototypes ---------------------------------------------------------------------------*/ -void CreateClusterTree(CLUSTERER *Clusterer); - -void MakePotentialClusters(ClusteringContext *context, CLUSTER *Cluster, - int32_t Level); - -CLUSTER *FindNearestNeighbor(KDTREE *Tree, - CLUSTER *Cluster, - float *Distance); - -CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster); - -int32_t MergeClusters (int16_t N, -PARAM_DESC ParamDesc[], -int32_t n1, -int32_t n2, -float m[], -float m1[], float m2[]); - -void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config); - -PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, - CLUSTERCONFIG *Config, - CLUSTER *Cluster); - -PROTOTYPE *MakeDegenerateProto(uint16_t N, - CLUSTER *Cluster, - STATISTICS *Statistics, - PROTOSTYLE Style, - int32_t MinSamples); - -PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer, - CLUSTERCONFIG *Config, - CLUSTER *Cluster, - STATISTICS *Statistics); - -PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer, - CLUSTER *Cluster, - STATISTICS *Statistics, - BUCKETS *Buckets); - -PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer, - CLUSTER *Cluster, - STATISTICS *Statistics, - BUCKETS *Buckets); - -PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, - CLUSTER *Cluster, - STATISTICS *Statistics, - BUCKETS *NormalBuckets, - double Confidence); - -void MakeDimRandom(uint16_t i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc); - -void MakeDimUniform(uint16_t i, PROTOTYPE *Proto, STATISTICS *Statistics); - -STATISTICS *ComputeStatistics (int16_t N, -PARAM_DESC ParamDesc[], CLUSTER * Cluster); - -PROTOTYPE *NewSphericalProto(uint16_t N, - CLUSTER *Cluster, - STATISTICS *Statistics); - -PROTOTYPE *NewEllipticalProto(int16_t N, - CLUSTER *Cluster, - STATISTICS *Statistics); - -PROTOTYPE *NewMixedProto(int16_t N, CLUSTER *Cluster, STATISTICS *Statistics); - -PROTOTYPE *NewSimpleProto(int16_t N, CLUSTER *Cluster); - -bool Independent(PARAM_DESC* ParamDesc, - int16_t N, float* CoVariance, float Independence); - -BUCKETS *GetBuckets(CLUSTERER* clusterer, - DISTRIBUTION Distribution, - uint32_t SampleCount, - double Confidence); - -BUCKETS *MakeBuckets(DISTRIBUTION Distribution, - uint32_t SampleCount, - double Confidence); - -uint16_t OptimumNumberOfBuckets(uint32_t SampleCount); - -double ComputeChiSquared(uint16_t DegreesOfFreedom, double Alpha); - -double NormalDensity(int32_t x); - -double UniformDensity(int32_t x); - -double Integral(double f1, double f2, double Dx); - -void FillBuckets(BUCKETS *Buckets, - CLUSTER *Cluster, - uint16_t Dim, - PARAM_DESC *ParamDesc, - float Mean, - float StdDev); - -uint16_t NormalBucket(PARAM_DESC *ParamDesc, - float x, - float Mean, - float StdDev); - -uint16_t UniformBucket(PARAM_DESC *ParamDesc, - float x, - float Mean, - float StdDev); - -bool DistributionOK(BUCKETS* Buckets); - -void FreeStatistics(STATISTICS *Statistics); - -void FreeBuckets(BUCKETS *Buckets); - -void FreeCluster(CLUSTER *Cluster); - -uint16_t DegreesOfFreedom(DISTRIBUTION Distribution, uint16_t HistogramBuckets); - -int NumBucketsMatch(void *arg1, // BUCKETS *Histogram, - void *arg2); // uint16_t *DesiredNumberOfBuckets); - -int ListEntryMatch(void *arg1, void *arg2); - -void AdjustBuckets(BUCKETS *Buckets, uint32_t NewSampleCount); - -void InitBuckets(BUCKETS *Buckets); - -int AlphaMatch(void *arg1, // CHISTRUCT *ChiStruct, - void *arg2); // CHISTRUCT *SearchKey); - -CHISTRUCT *NewChiStruct(uint16_t DegreesOfFreedom, double Alpha); - -double Solve(SOLVEFUNC Function, - void *FunctionParams, - double InitialGuess, - double Accuracy); - -double ChiArea(CHISTRUCT *ChiParams, double x); - -bool MultipleCharSamples(CLUSTERER* Clusterer, - CLUSTER* Cluster, - float MaxIllegal); - -double InvertMatrix(const float* input, int size, float* inv); - -//--------------------------Public Code-------------------------------------- -/** - * This routine creates a new clusterer data structure, - * initializes it, and returns a pointer to it. - * - * @param SampleSize number of dimensions in feature space - * @param ParamDesc description of each dimension - * @return pointer to the new clusterer data structure - */ -CLUSTERER * -MakeClusterer (int16_t SampleSize, const PARAM_DESC ParamDesc[]) { - CLUSTERER *Clusterer; - int i; - - // allocate main clusterer data structure and init simple fields - Clusterer = (CLUSTERER *) Emalloc (sizeof (CLUSTERER)); - Clusterer->SampleSize = SampleSize; - Clusterer->NumberOfSamples = 0; - Clusterer->NumChar = 0; - - // init fields which will not be used initially - Clusterer->Root = nullptr; - Clusterer->ProtoList = NIL_LIST; - - // maintain a copy of param descriptors in the clusterer data structure - Clusterer->ParamDesc = - (PARAM_DESC *) Emalloc (SampleSize * sizeof (PARAM_DESC)); - for (i = 0; i < SampleSize; i++) { - Clusterer->ParamDesc[i].Circular = ParamDesc[i].Circular; - Clusterer->ParamDesc[i].NonEssential = ParamDesc[i].NonEssential; - Clusterer->ParamDesc[i].Min = ParamDesc[i].Min; - Clusterer->ParamDesc[i].Max = ParamDesc[i].Max; - Clusterer->ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min; - Clusterer->ParamDesc[i].HalfRange = Clusterer->ParamDesc[i].Range / 2; - Clusterer->ParamDesc[i].MidRange = - (ParamDesc[i].Max + ParamDesc[i].Min) / 2; - } - - // allocate a kd tree to hold the samples - Clusterer->KDTree = MakeKDTree (SampleSize, ParamDesc); - - // Initialize cache of histogram buckets to minimize recomputing them. - for (int d = 0; d < DISTRIBUTION_COUNT; ++d) { - for (int c = 0; c < MAXBUCKETS + 1 - MINBUCKETS; ++c) - Clusterer->bucket_cache[d][c] = nullptr; - } - - return Clusterer; -} // MakeClusterer - -/** - * This routine creates a new sample data structure to hold - * the specified feature. This sample is added to the clusterer - * data structure (so that it knows which samples are to be - * clustered later), and a pointer to the sample is returned to - * the caller. - * - * @param Clusterer clusterer data structure to add sample to - * @param Feature feature to be added to clusterer - * @param CharID unique ident. of char that sample came from - * - * @return Pointer to the new sample data structure - */ -SAMPLE* MakeSample(CLUSTERER * Clusterer, const float* Feature, - int32_t CharID) { - SAMPLE *Sample; - int i; - - // see if the samples have already been clustered - if so trap an error - // Can't add samples after they have been clustered. - ASSERT_HOST(Clusterer->Root == nullptr); - - // allocate the new sample and initialize it - Sample = (SAMPLE *) Emalloc (sizeof (SAMPLE) + - (Clusterer->SampleSize - - 1) * sizeof (float)); - Sample->Clustered = FALSE; - Sample->Prototype = FALSE; - Sample->SampleCount = 1; - Sample->Left = nullptr; - Sample->Right = nullptr; - Sample->CharID = CharID; - - for (i = 0; i < Clusterer->SampleSize; i++) - Sample->Mean[i] = Feature[i]; - - // add the sample to the KD tree - keep track of the total # of samples - Clusterer->NumberOfSamples++; - KDStore(Clusterer->KDTree, Sample->Mean, Sample); - if (CharID >= Clusterer->NumChar) - Clusterer->NumChar = CharID + 1; - - // execute hook for monitoring clustering operation - // (*SampleCreationHook)(Sample); - - return (Sample); -} // MakeSample - -/** - * This routine first checks to see if the samples in this - * clusterer have already been clustered before; if so, it does - * not bother to recreate the cluster tree. It simply recomputes - * the prototypes based on the new Config info. - * - * If the samples have not been clustered before, the - * samples in the KD tree are formed into a cluster tree and then - * the prototypes are computed from the cluster tree. - * - * In either case this routine returns a pointer to a - * list of prototypes that best represent the samples given - * the constraints specified in Config. - * - * @param Clusterer data struct containing samples to be clustered - * @param Config parameters which control clustering process - * - * @return Pointer to a list of prototypes - */ -LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { - //only create cluster tree if samples have never been clustered before - if (Clusterer->Root == nullptr) - CreateClusterTree(Clusterer); - - //deallocate the old prototype list if one exists - FreeProtoList (&Clusterer->ProtoList); - Clusterer->ProtoList = NIL_LIST; - - //compute prototypes starting at the root node in the tree - ComputePrototypes(Clusterer, Config); - // We don't need the cluster pointers in the protos any more, so null them - // out, which makes it safe to delete the clusterer. - LIST proto_list = Clusterer->ProtoList; - iterate(proto_list) { - PROTOTYPE *proto = reinterpret_cast(first_node(proto_list)); - proto->Cluster = nullptr; - } - return Clusterer->ProtoList; -} // ClusterSamples - -/** - * This routine frees all of the memory allocated to the - * specified data structure. It will not, however, free - * the memory used by the prototype list. The pointers to - * the clusters for each prototype in the list will be set - * to nullptr to indicate that the cluster data structures no - * longer exist. Any sample lists that have been obtained - * via calls to GetSamples are no longer valid. - * @param Clusterer pointer to data structure to be freed - * @return None - */ -void FreeClusterer(CLUSTERER *Clusterer) { - if (Clusterer != nullptr) { - free(Clusterer->ParamDesc); - if (Clusterer->KDTree != nullptr) - FreeKDTree (Clusterer->KDTree); - if (Clusterer->Root != nullptr) - FreeCluster (Clusterer->Root); - // Free up all used buckets structures. - for (int d = 0; d < DISTRIBUTION_COUNT; ++d) { - for (int c = 0; c < MAXBUCKETS + 1 - MINBUCKETS; ++c) - if (Clusterer->bucket_cache[d][c] != nullptr) - FreeBuckets(Clusterer->bucket_cache[d][c]); - } - - free(Clusterer); - } -} // FreeClusterer - -/** - * This routine frees all of the memory allocated to the - * specified list of prototypes. The clusters which are - * pointed to by the prototypes are not freed. - * @param ProtoList pointer to list of prototypes to be freed - * @return None - */ -void FreeProtoList(LIST *ProtoList) { - destroy_nodes(*ProtoList, FreePrototype); -} // FreeProtoList - -/** - * This routine deallocates the memory consumed by the specified - * prototype and modifies the corresponding cluster so that it - * is no longer marked as a prototype. The cluster is NOT - * deallocated by this routine. - * @param arg prototype data structure to be deallocated - * @return None - */ -void FreePrototype(void *arg) { //PROTOTYPE *Prototype) - PROTOTYPE *Prototype = (PROTOTYPE *) arg; - - // unmark the corresponding cluster (if there is one - if (Prototype->Cluster != nullptr) - Prototype->Cluster->Prototype = FALSE; - - // deallocate the prototype statistics and then the prototype itself - free(Prototype->Distrib); - free(Prototype->Mean); - if (Prototype->Style != spherical) { - free(Prototype->Variance.Elliptical); - free(Prototype->Magnitude.Elliptical); - free(Prototype->Weight.Elliptical); - } - free(Prototype); -} // FreePrototype - -/** - * This routine is used to find all of the samples which - * belong to a cluster. It starts by removing the top - * cluster on the cluster list (SearchState). If this cluster is - * a leaf it is returned. Otherwise, the right subcluster - * is pushed on the list and we continue the search in the - * left subcluster. This continues until a leaf is found. - * If all samples have been found, nullptr is returned. - * InitSampleSearch() must be called - * before NextSample() to initialize the search. - * @param SearchState ptr to list containing clusters to be searched - * @return Pointer to the next leaf cluster (sample) or nullptr. - */ -CLUSTER *NextSample(LIST *SearchState) { - CLUSTER *Cluster; - - if (*SearchState == NIL_LIST) - return (nullptr); - Cluster = (CLUSTER *) first_node (*SearchState); - *SearchState = pop (*SearchState); - while (TRUE) { - if (Cluster->Left == nullptr) - return (Cluster); - *SearchState = push (*SearchState, Cluster->Right); - Cluster = Cluster->Left; - } -} // NextSample - -/** - * This routine returns the mean of the specified - * prototype in the indicated dimension. - * @param Proto prototype to return mean of - * @param Dimension dimension whose mean is to be returned - * @return Mean of Prototype in Dimension - */ -float Mean(PROTOTYPE *Proto, uint16_t Dimension) { - return (Proto->Mean[Dimension]); -} // Mean - -/** - * This routine returns the standard deviation of the - * prototype in the indicated dimension. - * @param Proto prototype to return standard deviation of - * @param Dimension dimension whose stddev is to be returned - * @return Standard deviation of Prototype in Dimension - */ -float StandardDeviation(PROTOTYPE *Proto, uint16_t Dimension) { - switch (Proto->Style) { - case spherical: - return ((float) sqrt ((double) Proto->Variance.Spherical)); - case elliptical: - return ((float) - sqrt ((double) Proto->Variance.Elliptical[Dimension])); - case mixed: - switch (Proto->Distrib[Dimension]) { - case normal: - return ((float) - sqrt ((double) Proto->Variance.Elliptical[Dimension])); - case uniform: - case D_random: - return (Proto->Variance.Elliptical[Dimension]); - case DISTRIBUTION_COUNT: - ASSERT_HOST(!"Distribution count not allowed!"); - } - } - return 0.0f; -} // StandardDeviation - - -/*--------------------------------------------------------------------------- - Private Code -----------------------------------------------------------------------------*/ -/** - * This routine performs a bottoms-up clustering on the samples - * held in the kd-tree of the Clusterer data structure. The - * result is a cluster tree. Each node in the tree represents - * a cluster which conceptually contains a subset of the samples. - * More precisely, the cluster contains all of the samples which - * are contained in its two sub-clusters. The leaves of the - * tree are the individual samples themselves; they have no - * sub-clusters. The root node of the tree conceptually contains - * all of the samples. - * @param Clusterer data structure holdings samples to be clustered - * @return None (the Clusterer data structure is changed) - */ -void CreateClusterTree(CLUSTERER *Clusterer) { - ClusteringContext context; - ClusterPair HeapEntry; - TEMPCLUSTER *PotentialCluster; - - // each sample and its nearest neighbor form a "potential" cluster - // save these in a heap with the "best" potential clusters on top - context.tree = Clusterer->KDTree; - context.candidates = (TEMPCLUSTER *) - Emalloc(Clusterer->NumberOfSamples * sizeof(TEMPCLUSTER)); - context.next = 0; - context.heap = new ClusterHeap(Clusterer->NumberOfSamples); - KDWalk(context.tree, (void_proc)MakePotentialClusters, &context); - - // form potential clusters into actual clusters - always do "best" first - while (context.heap->Pop(&HeapEntry)) { - PotentialCluster = HeapEntry.data; - - // if main cluster of potential cluster is already in another cluster - // then we don't need to worry about it - if (PotentialCluster->Cluster->Clustered) { - continue; - } - - // if main cluster is not yet clustered, but its nearest neighbor is - // then we must find a new nearest neighbor - else if (PotentialCluster->Neighbor->Clustered) { - PotentialCluster->Neighbor = - FindNearestNeighbor(context.tree, PotentialCluster->Cluster, - &HeapEntry.key); - if (PotentialCluster->Neighbor != nullptr) { - context.heap->Push(&HeapEntry); - } - } - - // if neither cluster is already clustered, form permanent cluster - else { - PotentialCluster->Cluster = - MakeNewCluster(Clusterer, PotentialCluster); - PotentialCluster->Neighbor = - FindNearestNeighbor(context.tree, PotentialCluster->Cluster, - &HeapEntry.key); - if (PotentialCluster->Neighbor != nullptr) { - context.heap->Push(&HeapEntry); - } - } - } - - // the root node in the cluster tree is now the only node in the kd-tree - Clusterer->Root = (CLUSTER *) RootOf(Clusterer->KDTree); - - // free up the memory used by the K-D tree, heap, and temp clusters - FreeKDTree(context.tree); - Clusterer->KDTree = nullptr; - delete context.heap; - free(context.candidates); -} // CreateClusterTree - -/** - * This routine is designed to be used in concert with the - * KDWalk routine. It will create a potential cluster for - * each sample in the kd-tree that is being walked. This - * potential cluster will then be pushed on the heap. - * @param context ClusteringContext (see definition above) - * @param Cluster current cluster being visited in kd-tree walk - * @param Level level of this cluster in the kd-tree - */ -void MakePotentialClusters(ClusteringContext *context, - CLUSTER *Cluster, int32_t Level) { - ClusterPair HeapEntry; - int next = context->next; - context->candidates[next].Cluster = Cluster; - HeapEntry.data = &(context->candidates[next]); - context->candidates[next].Neighbor = - FindNearestNeighbor(context->tree, - context->candidates[next].Cluster, - &HeapEntry.key); - if (context->candidates[next].Neighbor != nullptr) { - context->heap->Push(&HeapEntry); - context->next++; - } -} // MakePotentialClusters - -/** - * This routine searches the specified kd-tree for the nearest - * neighbor of the specified cluster. It actually uses the - * kd routines to find the 2 nearest neighbors since one of them - * will be the original cluster. A pointer to the nearest - * neighbor is returned, if it can be found, otherwise nullptr is - * returned. The distance between the 2 nodes is placed - * in the specified variable. - * @param Tree kd-tree to search in for nearest neighbor - * @param Cluster cluster whose nearest neighbor is to be found - * @param Distance ptr to variable to report distance found - * @return Pointer to the nearest neighbor of Cluster, or nullptr - */ -CLUSTER * -FindNearestNeighbor(KDTREE * Tree, CLUSTER * Cluster, float * Distance) -#define MAXNEIGHBORS 2 -#define MAXDISTANCE FLT_MAX -{ - CLUSTER *Neighbor[MAXNEIGHBORS]; - float Dist[MAXNEIGHBORS]; - int NumberOfNeighbors; - int32_t i; - CLUSTER *BestNeighbor; - - // find the 2 nearest neighbors of the cluster - KDNearestNeighborSearch(Tree, Cluster->Mean, MAXNEIGHBORS, MAXDISTANCE, - &NumberOfNeighbors, (void **)Neighbor, Dist); - - // search for the nearest neighbor that is not the cluster itself - *Distance = MAXDISTANCE; - BestNeighbor = nullptr; - for (i = 0; i < NumberOfNeighbors; i++) { - if ((Dist[i] < *Distance) && (Neighbor[i] != Cluster)) { - *Distance = Dist[i]; - BestNeighbor = Neighbor[i]; - } - } - return BestNeighbor; -} // FindNearestNeighbor - -/** - * This routine creates a new permanent cluster from the - * clusters specified in TempCluster. The 2 clusters in - * TempCluster are marked as "clustered" and deleted from - * the kd-tree. The new cluster is then added to the kd-tree. - * @param Clusterer current clustering environment - * @param TempCluster potential cluster to make permanent - * @return Pointer to the new permanent cluster - */ -CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster) { - CLUSTER *Cluster; - - // allocate the new cluster and initialize it - Cluster = (CLUSTER *) Emalloc( - sizeof(CLUSTER) + (Clusterer->SampleSize - 1) * sizeof(float)); - Cluster->Clustered = FALSE; - Cluster->Prototype = FALSE; - Cluster->Left = TempCluster->Cluster; - Cluster->Right = TempCluster->Neighbor; - Cluster->CharID = -1; - - // mark the old clusters as "clustered" and delete them from the kd-tree - Cluster->Left->Clustered = TRUE; - Cluster->Right->Clustered = TRUE; - KDDelete(Clusterer->KDTree, Cluster->Left->Mean, Cluster->Left); - KDDelete(Clusterer->KDTree, Cluster->Right->Mean, Cluster->Right); - - // compute the mean and sample count for the new cluster - Cluster->SampleCount = - MergeClusters(Clusterer->SampleSize, Clusterer->ParamDesc, - Cluster->Left->SampleCount, Cluster->Right->SampleCount, - Cluster->Mean, Cluster->Left->Mean, Cluster->Right->Mean); - - // add the new cluster to the KD tree - KDStore(Clusterer->KDTree, Cluster->Mean, Cluster); - return Cluster; -} // MakeNewCluster - -/** - * This routine merges two clusters into one larger cluster. - * To do this it computes the number of samples in the new - * cluster and the mean of the new cluster. The ParamDesc - * information is used to ensure that circular dimensions - * are handled correctly. - * @param N # of dimensions (size of arrays) - * @param ParamDesc array of dimension descriptions - * @param n1, n2 number of samples in each old cluster - * @param m array to hold mean of new cluster - * @param m1, m2 arrays containing means of old clusters - * @return The number of samples in the new cluster. - */ -int32_t MergeClusters(int16_t N, - PARAM_DESC ParamDesc[], - int32_t n1, - int32_t n2, - float m[], - float m1[], float m2[]) { - int32_t i, n; - - n = n1 + n2; - for (i = N; i > 0; i--, ParamDesc++, m++, m1++, m2++) { - if (ParamDesc->Circular) { - // if distance between means is greater than allowed - // reduce upper point by one "rotation" to compute mean - // then normalize the mean back into the accepted range - if ((*m2 - *m1) > ParamDesc->HalfRange) { - *m = (n1 * *m1 + n2 * (*m2 - ParamDesc->Range)) / n; - if (*m < ParamDesc->Min) - *m += ParamDesc->Range; - } - else if ((*m1 - *m2) > ParamDesc->HalfRange) { - *m = (n1 * (*m1 - ParamDesc->Range) + n2 * *m2) / n; - if (*m < ParamDesc->Min) - *m += ParamDesc->Range; - } - else - *m = (n1 * *m1 + n2 * *m2) / n; - } - else - *m = (n1 * *m1 + n2 * *m2) / n; - } - return n; -} // MergeClusters - -/** - * This routine decides which clusters in the cluster tree - * should be represented by prototypes, forms a list of these - * prototypes, and places the list in the Clusterer data - * structure. - * @param Clusterer data structure holding cluster tree - * @param Config parameters used to control prototype generation - * @return None - */ -void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { - LIST ClusterStack = NIL_LIST; - CLUSTER *Cluster; - PROTOTYPE *Prototype; - - // use a stack to keep track of clusters waiting to be processed - // initially the only cluster on the stack is the root cluster - if (Clusterer->Root != nullptr) - ClusterStack = push (NIL_LIST, Clusterer->Root); - - // loop until we have analyzed all clusters which are potential prototypes - while (ClusterStack != NIL_LIST) { - // remove the next cluster to be analyzed from the stack - // try to make a prototype from the cluster - // if successful, put it on the proto list, else split the cluster - Cluster = (CLUSTER *) first_node (ClusterStack); - ClusterStack = pop (ClusterStack); - Prototype = MakePrototype(Clusterer, Config, Cluster); - if (Prototype != nullptr) { - Clusterer->ProtoList = push (Clusterer->ProtoList, Prototype); - } - else { - ClusterStack = push (ClusterStack, Cluster->Right); - ClusterStack = push (ClusterStack, Cluster->Left); - } - } -} // ComputePrototypes - -/** - * This routine attempts to create a prototype from the - * specified cluster that conforms to the distribution - * specified in Config. If there are too few samples in the - * cluster to perform a statistical analysis, then a prototype - * is generated but labelled as insignificant. If the - * dimensions of the cluster are not independent, no prototype - * is generated and nullptr is returned. If a prototype can be - * found that matches the desired distribution then a pointer - * to it is returned, otherwise nullptr is returned. - * @param Clusterer data structure holding cluster tree - * @param Config parameters used to control prototype generation - * @param Cluster cluster to be made into a prototype - * @return Pointer to new prototype or nullptr - */ -PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, - CLUSTERCONFIG *Config, - CLUSTER *Cluster) { - STATISTICS *Statistics; - PROTOTYPE *Proto; - BUCKETS *Buckets; - - // filter out clusters which contain samples from the same character - if (MultipleCharSamples (Clusterer, Cluster, Config->MaxIllegal)) - return nullptr; - - // compute the covariance matrix and ranges for the cluster - Statistics = - ComputeStatistics(Clusterer->SampleSize, Clusterer->ParamDesc, Cluster); - - // check for degenerate clusters which need not be analyzed further - // note that the MinSamples test assumes that all clusters with multiple - // character samples have been removed (as above) - Proto = MakeDegenerateProto( - Clusterer->SampleSize, Cluster, Statistics, Config->ProtoStyle, - (int32_t) (Config->MinSamples * Clusterer->NumChar)); - if (Proto != nullptr) { - FreeStatistics(Statistics); - return Proto; - } - // check to ensure that all dimensions are independent - if (!Independent(Clusterer->ParamDesc, Clusterer->SampleSize, - Statistics->CoVariance, Config->Independence)) { - FreeStatistics(Statistics); - return nullptr; - } - - if (HOTELLING && Config->ProtoStyle == elliptical) { - Proto = TestEllipticalProto(Clusterer, Config, Cluster, Statistics); - if (Proto != nullptr) { - FreeStatistics(Statistics); - return Proto; - } - } - - // create a histogram data structure used to evaluate distributions - Buckets = GetBuckets(Clusterer, normal, Cluster->SampleCount, - Config->Confidence); - - // create a prototype based on the statistics and test it - switch (Config->ProtoStyle) { - case spherical: - Proto = MakeSphericalProto(Clusterer, Cluster, Statistics, Buckets); - break; - case elliptical: - Proto = MakeEllipticalProto(Clusterer, Cluster, Statistics, Buckets); - break; - case mixed: - Proto = MakeMixedProto(Clusterer, Cluster, Statistics, Buckets, - Config->Confidence); - break; - case automatic: - Proto = MakeSphericalProto(Clusterer, Cluster, Statistics, Buckets); - if (Proto != nullptr) - break; - Proto = MakeEllipticalProto(Clusterer, Cluster, Statistics, Buckets); - if (Proto != nullptr) - break; - Proto = MakeMixedProto(Clusterer, Cluster, Statistics, Buckets, - Config->Confidence); - break; - } - FreeStatistics(Statistics); - return Proto; -} // MakePrototype - -/** - * This routine checks for clusters which are degenerate and - * therefore cannot be analyzed in a statistically valid way. - * A cluster is defined as degenerate if it does not have at - * least MINSAMPLESNEEDED samples in it. If the cluster is - * found to be degenerate, a prototype of the specified style - * is generated and marked as insignificant. A cluster is - * also degenerate if it does not have at least MinSamples - * samples in it. - * - * If the cluster is not degenerate, nullptr is returned. - * - * @param N number of dimensions - * @param Cluster cluster being analyzed - * @param Statistics statistical info about cluster - * @param Style type of prototype to be generated - * @param MinSamples minimum number of samples in a cluster - * @return Pointer to degenerate prototype or nullptr. - */ -PROTOTYPE *MakeDegenerateProto( //this was MinSample - uint16_t N, - CLUSTER *Cluster, - STATISTICS *Statistics, - PROTOSTYLE Style, - int32_t MinSamples) { - PROTOTYPE *Proto = nullptr; - - if (MinSamples < MINSAMPLESNEEDED) - MinSamples = MINSAMPLESNEEDED; - - if (Cluster->SampleCount < MinSamples) { - switch (Style) { - case spherical: - Proto = NewSphericalProto (N, Cluster, Statistics); - break; - case elliptical: - case automatic: - Proto = NewEllipticalProto (N, Cluster, Statistics); - break; - case mixed: - Proto = NewMixedProto (N, Cluster, Statistics); - break; - } - Proto->Significant = FALSE; - } - return (Proto); -} // MakeDegenerateProto - -/** - * This routine tests the specified cluster to see if ** - * there is a statistically significant difference between - * the sub-clusters that would be made if the cluster were to - * be split. If not, then a new prototype is formed and - * returned to the caller. If there is, then nullptr is returned - * to the caller. - * @param Clusterer data struct containing samples being clustered - * @param Config provides the magic number of samples that make a good cluster - * @param Cluster cluster to be made into an elliptical prototype - * @param Statistics statistical info about cluster - * @return Pointer to new elliptical prototype or nullptr. - */ -PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer, - CLUSTERCONFIG *Config, - CLUSTER *Cluster, - STATISTICS *Statistics) { - // Fraction of the number of samples used as a range around 1 within - // which a cluster has the magic size that allows a boost to the - // FTable by kFTableBoostMargin, thus allowing clusters near the - // magic size (equal to the number of sample characters) to be more - // likely to stay together. - const double kMagicSampleMargin = 0.0625; - const double kFTableBoostMargin = 2.0; - - int N = Clusterer->SampleSize; - CLUSTER* Left = Cluster->Left; - CLUSTER* Right = Cluster->Right; - if (Left == nullptr || Right == nullptr) - return nullptr; - int TotalDims = Left->SampleCount + Right->SampleCount; - if (TotalDims < N + 1 || TotalDims < 2) - return nullptr; - std::vector Covariance(static_cast(N) * N); - std::vector Inverse(static_cast(N) * N); - std::vector Delta(N); - // Compute a new covariance matrix that only uses essential features. - for (int i = 0; i < N; ++i) { - int row_offset = i * N; - if (!Clusterer->ParamDesc[i].NonEssential) { - for (int j = 0; j < N; ++j) { - if (!Clusterer->ParamDesc[j].NonEssential) - Covariance[j + row_offset] = Statistics->CoVariance[j + row_offset]; - else - Covariance[j + row_offset] = 0.0f; - } - } else { - for (int j = 0; j < N; ++j) { - if (i == j) - Covariance[j + row_offset] = 1.0f; - else - Covariance[j + row_offset] = 0.0f; - } - } - } - double err = InvertMatrix(&Covariance[0], N, &Inverse[0]); - if (err > 1) { - tprintf("Clustering error: Matrix inverse failed with error %g\n", err); - } - int EssentialN = 0; - for (int dim = 0; dim < N; ++dim) { - if (!Clusterer->ParamDesc[dim].NonEssential) { - Delta[dim] = Left->Mean[dim] - Right->Mean[dim]; - ++EssentialN; - } else { - Delta[dim] = 0.0f; - } - } - // Compute Hotelling's T-squared. - double Tsq = 0.0; - for (int x = 0; x < N; ++x) { - double temp = 0.0; - for (int y = 0; y < N; ++y) { - temp += static_cast(Inverse[y + N * x]) * Delta[y]; - } - Tsq += Delta[x] * temp; - } - // Changed this function to match the formula in - // Statistical Methods in Medical Research p 473 - // By Peter Armitage, Geoffrey Berry, J. N. S. Matthews. - // Tsq *= Left->SampleCount * Right->SampleCount / TotalDims; - double F = Tsq * (TotalDims - EssentialN - 1) / ((TotalDims - 2)*EssentialN); - int Fx = EssentialN; - if (Fx > FTABLE_X) - Fx = FTABLE_X; - --Fx; - int Fy = TotalDims - EssentialN - 1; - if (Fy > FTABLE_Y) - Fy = FTABLE_Y; - --Fy; - double FTarget = FTable[Fy][Fx]; - if (Config->MagicSamples > 0 && - TotalDims >= Config->MagicSamples * (1.0 - kMagicSampleMargin) && - TotalDims <= Config->MagicSamples * (1.0 + kMagicSampleMargin)) { - // Give magic-sized clusters a magic FTable boost. - FTarget += kFTableBoostMargin; - } - if (F < FTarget) { - return NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics); - } - return nullptr; -} - -/** - * This routine tests the specified cluster to see if it can - * be approximated by a spherical normal distribution. If it - * can be, then a new prototype is formed and returned to the - * caller. If it can't be, then nullptr is returned to the caller. - * @param Clusterer data struct containing samples being clustered - * @param Cluster cluster to be made into a spherical prototype - * @param Statistics statistical info about cluster - * @param Buckets histogram struct used to analyze distribution - * @return Pointer to new spherical prototype or nullptr. - */ -PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer, - CLUSTER *Cluster, - STATISTICS *Statistics, - BUCKETS *Buckets) { - PROTOTYPE *Proto = nullptr; - int i; - - // check that each dimension is a normal distribution - for (i = 0; i < Clusterer->SampleSize; i++) { - if (Clusterer->ParamDesc[i].NonEssential) - continue; - - FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]), - Cluster->Mean[i], - sqrt ((double) (Statistics->AvgVariance))); - if (!DistributionOK (Buckets)) - break; - } - // if all dimensions matched a normal distribution, make a proto - if (i >= Clusterer->SampleSize) - Proto = NewSphericalProto (Clusterer->SampleSize, Cluster, Statistics); - return (Proto); -} // MakeSphericalProto - -/** - * This routine tests the specified cluster to see if it can - * be approximated by an elliptical normal distribution. If it - * can be, then a new prototype is formed and returned to the - * caller. If it can't be, then nullptr is returned to the caller. - * @param Clusterer data struct containing samples being clustered - * @param Cluster cluster to be made into an elliptical prototype - * @param Statistics statistical info about cluster - * @param Buckets histogram struct used to analyze distribution - * @return Pointer to new elliptical prototype or nullptr. - */ -PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer, - CLUSTER *Cluster, - STATISTICS *Statistics, - BUCKETS *Buckets) { - PROTOTYPE *Proto = nullptr; - int i; - - // check that each dimension is a normal distribution - for (i = 0; i < Clusterer->SampleSize; i++) { - if (Clusterer->ParamDesc[i].NonEssential) - continue; - - FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]), - Cluster->Mean[i], - sqrt ((double) Statistics-> - CoVariance[i * (Clusterer->SampleSize + 1)])); - if (!DistributionOK (Buckets)) - break; - } - // if all dimensions matched a normal distribution, make a proto - if (i >= Clusterer->SampleSize) - Proto = NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics); - return (Proto); -} // MakeEllipticalProto - -/** - * This routine tests each dimension of the specified cluster to - * see what distribution would best approximate that dimension. - * Each dimension is compared to the following distributions - * in order: normal, random, uniform. If each dimension can - * be represented by one of these distributions, - * then a new prototype is formed and returned to the - * caller. If it can't be, then nullptr is returned to the caller. - * @param Clusterer data struct containing samples being clustered - * @param Cluster cluster to be made into a prototype - * @param Statistics statistical info about cluster - * @param NormalBuckets histogram struct used to analyze distribution - * @param Confidence confidence level for alternate distributions - * @return Pointer to new mixed prototype or nullptr. - */ -PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, - CLUSTER *Cluster, - STATISTICS *Statistics, - BUCKETS *NormalBuckets, - double Confidence) { - PROTOTYPE *Proto; - int i; - BUCKETS *UniformBuckets = nullptr; - BUCKETS *RandomBuckets = nullptr; - - // create a mixed proto to work on - initially assume all dimensions normal*/ - Proto = NewMixedProto (Clusterer->SampleSize, Cluster, Statistics); - - // find the proper distribution for each dimension - for (i = 0; i < Clusterer->SampleSize; i++) { - if (Clusterer->ParamDesc[i].NonEssential) - continue; - - FillBuckets (NormalBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), - Proto->Mean[i], - sqrt ((double) Proto->Variance.Elliptical[i])); - if (DistributionOK (NormalBuckets)) - continue; - - if (RandomBuckets == nullptr) - RandomBuckets = - GetBuckets(Clusterer, D_random, Cluster->SampleCount, Confidence); - MakeDimRandom (i, Proto, &(Clusterer->ParamDesc[i])); - FillBuckets (RandomBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), - Proto->Mean[i], Proto->Variance.Elliptical[i]); - if (DistributionOK (RandomBuckets)) - continue; - - if (UniformBuckets == nullptr) - UniformBuckets = - GetBuckets(Clusterer, uniform, Cluster->SampleCount, Confidence); - MakeDimUniform(i, Proto, Statistics); - FillBuckets (UniformBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), - Proto->Mean[i], Proto->Variance.Elliptical[i]); - if (DistributionOK (UniformBuckets)) - continue; - break; - } - // if any dimension failed to match a distribution, discard the proto - if (i < Clusterer->SampleSize) { - FreePrototype(Proto); - Proto = nullptr; - } - return (Proto); -} // MakeMixedProto - -/** - * This routine alters the ith dimension of the specified - * mixed prototype to be D_random. - * @param i index of dimension to be changed - * @param Proto prototype whose dimension is to be altered - * @param ParamDesc description of specified dimension - * @return None - */ -void MakeDimRandom(uint16_t i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) { - Proto->Distrib[i] = D_random; - Proto->Mean[i] = ParamDesc->MidRange; - Proto->Variance.Elliptical[i] = ParamDesc->HalfRange; - - // subtract out the previous magnitude of this dimension from the total - Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i]; - Proto->Magnitude.Elliptical[i] = 1.0 / ParamDesc->Range; - Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; - Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); - - // note that the proto Weight is irrelevant for D_random protos -} // MakeDimRandom - -/** - * This routine alters the ith dimension of the specified - * mixed prototype to be uniform. - * @param i index of dimension to be changed - * @param Proto prototype whose dimension is to be altered - * @param Statistics statistical info about prototype - * @return None - */ -void MakeDimUniform(uint16_t i, PROTOTYPE *Proto, STATISTICS *Statistics) { - Proto->Distrib[i] = uniform; - Proto->Mean[i] = Proto->Cluster->Mean[i] + - (Statistics->Min[i] + Statistics->Max[i]) / 2; - Proto->Variance.Elliptical[i] = - (Statistics->Max[i] - Statistics->Min[i]) / 2; - if (Proto->Variance.Elliptical[i] < MINVARIANCE) - Proto->Variance.Elliptical[i] = MINVARIANCE; - - // subtract out the previous magnitude of this dimension from the total - Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i]; - Proto->Magnitude.Elliptical[i] = - 1.0 / (2.0 * Proto->Variance.Elliptical[i]); - Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; - Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); - - // note that the proto Weight is irrelevant for uniform protos -} // MakeDimUniform - -/** - * This routine searches the cluster tree for all leaf nodes - * which are samples in the specified cluster. It computes - * a full covariance matrix for these samples as well as - * keeping track of the ranges (min and max) for each - * dimension. A special data structure is allocated to - * return this information to the caller. An incremental - * algorithm for computing statistics is not used because - * it will not work with circular dimensions. - * @param N number of dimensions - * @param ParamDesc array of dimension descriptions - * @param Cluster cluster whose stats are to be computed - * @return Pointer to new data structure containing statistics - */ -STATISTICS * -ComputeStatistics (int16_t N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) { - STATISTICS *Statistics; - int i, j; - float *CoVariance; - float *Distance; - LIST SearchState; - SAMPLE *Sample; - uint32_t SampleCountAdjustedForBias; - - // allocate memory to hold the statistics results - Statistics = (STATISTICS *) Emalloc (sizeof (STATISTICS)); - Statistics->CoVariance = (float *)Emalloc(sizeof(float) * N * N); - Statistics->Min = (float *) Emalloc (N * sizeof (float)); - Statistics->Max = (float *) Emalloc (N * sizeof (float)); - - // allocate temporary memory to hold the sample to mean distances - Distance = (float *) Emalloc (N * sizeof (float)); - - // initialize the statistics - Statistics->AvgVariance = 1.0; - CoVariance = Statistics->CoVariance; - for (i = 0; i < N; i++) { - Statistics->Min[i] = 0.0; - Statistics->Max[i] = 0.0; - for (j = 0; j < N; j++, CoVariance++) - *CoVariance = 0; - } - // find each sample in the cluster and merge it into the statistics - InitSampleSearch(SearchState, Cluster); - while ((Sample = NextSample (&SearchState)) != nullptr) { - for (i = 0; i < N; i++) { - Distance[i] = Sample->Mean[i] - Cluster->Mean[i]; - if (ParamDesc[i].Circular) { - if (Distance[i] > ParamDesc[i].HalfRange) - Distance[i] -= ParamDesc[i].Range; - if (Distance[i] < -ParamDesc[i].HalfRange) - Distance[i] += ParamDesc[i].Range; - } - if (Distance[i] < Statistics->Min[i]) - Statistics->Min[i] = Distance[i]; - if (Distance[i] > Statistics->Max[i]) - Statistics->Max[i] = Distance[i]; - } - CoVariance = Statistics->CoVariance; - for (i = 0; i < N; i++) - for (j = 0; j < N; j++, CoVariance++) - *CoVariance += Distance[i] * Distance[j]; - } - // normalize the variances by the total number of samples - // use SampleCount-1 instead of SampleCount to get an unbiased estimate - // also compute the geometic mean of the diagonal variances - // ensure that clusters with only 1 sample are handled correctly - if (Cluster->SampleCount > 1) - SampleCountAdjustedForBias = Cluster->SampleCount - 1; - else - SampleCountAdjustedForBias = 1; - CoVariance = Statistics->CoVariance; - for (i = 0; i < N; i++) - for (j = 0; j < N; j++, CoVariance++) { - *CoVariance /= SampleCountAdjustedForBias; - if (j == i) { - if (*CoVariance < MINVARIANCE) - *CoVariance = MINVARIANCE; - Statistics->AvgVariance *= *CoVariance; - } - } - Statistics->AvgVariance = (float)pow((double)Statistics->AvgVariance, - 1.0 / N); - - // release temporary memory and return - free(Distance); - return (Statistics); -} // ComputeStatistics - -/** - * This routine creates a spherical prototype data structure to - * approximate the samples in the specified cluster. - * Spherical prototypes have a single variance which is - * common across all dimensions. All dimensions are normally - * distributed and independent. - * @param N number of dimensions - * @param Cluster cluster to be made into a spherical prototype - * @param Statistics statistical info about samples in cluster - * @return Pointer to a new spherical prototype data structure - */ -PROTOTYPE *NewSphericalProto(uint16_t N, - CLUSTER *Cluster, - STATISTICS *Statistics) { - PROTOTYPE *Proto; - - Proto = NewSimpleProto (N, Cluster); - - Proto->Variance.Spherical = Statistics->AvgVariance; - if (Proto->Variance.Spherical < MINVARIANCE) - Proto->Variance.Spherical = MINVARIANCE; - - Proto->Magnitude.Spherical = - 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical); - Proto->TotalMagnitude = (float)pow((double)Proto->Magnitude.Spherical, - (double) N); - Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical; - Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); - - return (Proto); -} // NewSphericalProto - -/** - * This routine creates an elliptical prototype data structure to - * approximate the samples in the specified cluster. - * Elliptical prototypes have a variance for each dimension. - * All dimensions are normally distributed and independent. - * @param N number of dimensions - * @param Cluster cluster to be made into an elliptical prototype - * @param Statistics statistical info about samples in cluster - * @return Pointer to a new elliptical prototype data structure - */ -PROTOTYPE *NewEllipticalProto(int16_t N, - CLUSTER *Cluster, - STATISTICS *Statistics) { - PROTOTYPE *Proto; - float *CoVariance; - int i; - - Proto = NewSimpleProto (N, Cluster); - Proto->Variance.Elliptical = (float *) Emalloc (N * sizeof (float)); - Proto->Magnitude.Elliptical = (float *) Emalloc (N * sizeof (float)); - Proto->Weight.Elliptical = (float *) Emalloc (N * sizeof (float)); - - CoVariance = Statistics->CoVariance; - Proto->TotalMagnitude = 1.0; - for (i = 0; i < N; i++, CoVariance += N + 1) { - Proto->Variance.Elliptical[i] = *CoVariance; - if (Proto->Variance.Elliptical[i] < MINVARIANCE) - Proto->Variance.Elliptical[i] = MINVARIANCE; - - Proto->Magnitude.Elliptical[i] = - 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Elliptical[i]); - Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i]; - Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; - } - Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); - Proto->Style = elliptical; - return (Proto); -} // NewEllipticalProto - -/** - * This routine creates a mixed prototype data structure to - * approximate the samples in the specified cluster. - * Mixed prototypes can have different distributions for - * each dimension. All dimensions are independent. The - * structure is initially filled in as though it were an - * elliptical prototype. The actual distributions of the - * dimensions can be altered by other routines. - * @param N number of dimensions - * @param Cluster cluster to be made into a mixed prototype - * @param Statistics statistical info about samples in cluster - * @return Pointer to a new mixed prototype data structure - */ -PROTOTYPE *NewMixedProto(int16_t N, CLUSTER *Cluster, STATISTICS *Statistics) { - PROTOTYPE *Proto; - int i; - - Proto = NewEllipticalProto (N, Cluster, Statistics); - Proto->Distrib = (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION)); - - for (i = 0; i < N; i++) { - Proto->Distrib[i] = normal; - } - Proto->Style = mixed; - return (Proto); -} // NewMixedProto - -/** - * This routine allocates memory to hold a simple prototype - * data structure, i.e. one without independent distributions - * and variances for each dimension. - * @param N number of dimensions - * @param Cluster cluster to be made into a prototype - * @return Pointer to new simple prototype - */ -PROTOTYPE *NewSimpleProto(int16_t N, CLUSTER *Cluster) { - PROTOTYPE *Proto; - int i; - - Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE)); - Proto->Mean = (float *) Emalloc (N * sizeof (float)); - - for (i = 0; i < N; i++) - Proto->Mean[i] = Cluster->Mean[i]; - Proto->Distrib = nullptr; - - Proto->Significant = TRUE; - Proto->Merged = FALSE; - Proto->Style = spherical; - Proto->NumSamples = Cluster->SampleCount; - Proto->Cluster = Cluster; - Proto->Cluster->Prototype = TRUE; - return (Proto); -} // NewSimpleProto - -/** - * This routine returns TRUE if the specified covariance - * matrix indicates that all N dimensions are independent of - * one another. One dimension is judged to be independent of - * another when the magnitude of the corresponding correlation - * coefficient is - * less than the specified Independence factor. The - * correlation coefficient is calculated as: (see Duda and - * Hart, pg. 247) - * coeff[ij] = stddev[ij] / sqrt (stddev[ii] * stddev[jj]) - * The covariance matrix is assumed to be symmetric (which - * should always be true). - * @param ParamDesc descriptions of each feature space dimension - * @param N number of dimensions - * @param CoVariance ptr to a covariance matrix - * @param Independence max off-diagonal correlation coefficient - * @return TRUE if dimensions are independent, FALSE otherwise - */ -bool -Independent(PARAM_DESC* ParamDesc, - int16_t N, float* CoVariance, float Independence) { - int i, j; - float *VARii; // points to ith on-diagonal element - float *VARjj; // points to jth on-diagonal element - float CorrelationCoeff; - - VARii = CoVariance; - for (i = 0; i < N; i++, VARii += N + 1) { - if (ParamDesc[i].NonEssential) - continue; - - VARjj = VARii + N + 1; - CoVariance = VARii + 1; - for (j = i + 1; j < N; j++, CoVariance++, VARjj += N + 1) { - if (ParamDesc[j].NonEssential) - continue; - - if ((*VARii == 0.0) || (*VARjj == 0.0)) - CorrelationCoeff = 0.0; - else - CorrelationCoeff = - sqrt (sqrt (*CoVariance * *CoVariance / (*VARii * *VARjj))); - if (CorrelationCoeff > Independence) - return false; - } - } - return true; -} // Independent - -/** - * This routine returns a histogram data structure which can - * be used by other routines to place samples into histogram - * buckets, and then apply a goodness of fit test to the - * histogram data to determine if the samples belong to the - * specified probability distribution. The routine keeps - * a list of bucket data structures which have already been - * created so that it minimizes the computation time needed - * to create a new bucket. - * @param clusterer which keeps a bucket_cache for us. - * @param Distribution type of probability distribution to test for - * @param SampleCount number of samples that are available - * @param Confidence probability of a Type I error - * @return Bucket data structure - */ -BUCKETS *GetBuckets(CLUSTERER* clusterer, - DISTRIBUTION Distribution, - uint32_t SampleCount, - double Confidence) { - // Get an old bucket structure with the same number of buckets. - uint16_t NumberOfBuckets = OptimumNumberOfBuckets(SampleCount); - BUCKETS *Buckets = - clusterer->bucket_cache[Distribution][NumberOfBuckets - MINBUCKETS]; - - // If a matching bucket structure is not found, make one and save it. - if (Buckets == nullptr) { - Buckets = MakeBuckets(Distribution, SampleCount, Confidence); - clusterer->bucket_cache[Distribution][NumberOfBuckets - MINBUCKETS] = - Buckets; - } else { - // Just adjust the existing buckets. - if (SampleCount != Buckets->SampleCount) - AdjustBuckets(Buckets, SampleCount); - if (Confidence != Buckets->Confidence) { - Buckets->Confidence = Confidence; - Buckets->ChiSquared = ComputeChiSquared( - DegreesOfFreedom(Distribution, Buckets->NumberOfBuckets), - Confidence); - } - InitBuckets(Buckets); - } - return Buckets; -} // GetBuckets - -/** - * This routine creates a histogram data structure which can - * be used by other routines to place samples into histogram - * buckets, and then apply a goodness of fit test to the - * histogram data to determine if the samples belong to the - * specified probability distribution. The buckets are - * allocated in such a way that the expected frequency of - * samples in each bucket is approximately the same. In - * order to make this possible, a mapping table is - * computed which maps "normalized" samples into the - * appropriate bucket. - * @param Distribution type of probability distribution to test for - * @param SampleCount number of samples that are available - * @param Confidence probability of a Type I error - * @return Pointer to new histogram data structure - */ -BUCKETS *MakeBuckets(DISTRIBUTION Distribution, - uint32_t SampleCount, - double Confidence) { - const DENSITYFUNC DensityFunction[] = - { NormalDensity, UniformDensity, UniformDensity }; - int i, j; - BUCKETS *Buckets; - double BucketProbability; - double NextBucketBoundary; - double Probability; - double ProbabilityDelta; - double LastProbDensity; - double ProbDensity; - uint16_t CurrentBucket; - bool Symmetrical; - - // allocate memory needed for data structure - Buckets = static_cast(Emalloc(sizeof(BUCKETS))); - Buckets->NumberOfBuckets = OptimumNumberOfBuckets(SampleCount); - Buckets->SampleCount = SampleCount; - Buckets->Confidence = Confidence; - Buckets->Count = - static_cast(Emalloc(Buckets->NumberOfBuckets * sizeof(uint32_t))); - Buckets->ExpectedCount = static_cast( - Emalloc(Buckets->NumberOfBuckets * sizeof(float))); - - // initialize simple fields - Buckets->Distribution = Distribution; - for (i = 0; i < Buckets->NumberOfBuckets; i++) { - Buckets->Count[i] = 0; - Buckets->ExpectedCount[i] = 0.0; - } - - // all currently defined distributions are symmetrical - Symmetrical = true; - Buckets->ChiSquared = ComputeChiSquared( - DegreesOfFreedom(Distribution, Buckets->NumberOfBuckets), Confidence); - - if (Symmetrical) { - // allocate buckets so that all have approx. equal probability - BucketProbability = 1.0 / (double) (Buckets->NumberOfBuckets); - - // distribution is symmetric so fill in upper half then copy - CurrentBucket = Buckets->NumberOfBuckets / 2; - if (Odd (Buckets->NumberOfBuckets)) - NextBucketBoundary = BucketProbability / 2; - else - NextBucketBoundary = BucketProbability; - - Probability = 0.0; - LastProbDensity = - (*DensityFunction[(int) Distribution]) (BUCKETTABLESIZE / 2); - for (i = BUCKETTABLESIZE / 2; i < BUCKETTABLESIZE; i++) { - ProbDensity = (*DensityFunction[(int) Distribution]) (i + 1); - ProbabilityDelta = Integral (LastProbDensity, ProbDensity, 1.0); - Probability += ProbabilityDelta; - if (Probability > NextBucketBoundary) { - if (CurrentBucket < Buckets->NumberOfBuckets - 1) - CurrentBucket++; - NextBucketBoundary += BucketProbability; - } - Buckets->Bucket[i] = CurrentBucket; - Buckets->ExpectedCount[CurrentBucket] += - (float) (ProbabilityDelta * SampleCount); - LastProbDensity = ProbDensity; - } - // place any leftover probability into the last bucket - Buckets->ExpectedCount[CurrentBucket] += - (float) ((0.5 - Probability) * SampleCount); - - // copy upper half of distribution to lower half - for (i = 0, j = BUCKETTABLESIZE - 1; i < j; i++, j--) - Buckets->Bucket[i] = - Mirror(Buckets->Bucket[j], Buckets->NumberOfBuckets); - - // copy upper half of expected counts to lower half - for (i = 0, j = Buckets->NumberOfBuckets - 1; i <= j; i++, j--) - Buckets->ExpectedCount[i] += Buckets->ExpectedCount[j]; - } - return Buckets; -} // MakeBuckets - -/** - * This routine computes the optimum number of histogram - * buckets that should be used in a chi-squared goodness of - * fit test for the specified number of samples. The optimum - * number is computed based on Table 4.1 on pg. 147 of - * "Measurement and Analysis of Random Data" by Bendat & Piersol. - * Linear interpolation is used to interpolate between table - * values. The table is intended for a 0.05 level of - * significance (alpha). This routine assumes that it is - * equally valid for other alpha's, which may not be true. - * @param SampleCount number of samples to be tested - * @return Optimum number of histogram buckets - */ -uint16_t OptimumNumberOfBuckets(uint32_t SampleCount) { - uint8_t Last, Next; - float Slope; - - if (SampleCount < kCountTable[0]) - return kBucketsTable[0]; - - for (Last = 0, Next = 1; Next < LOOKUPTABLESIZE; Last++, Next++) { - if (SampleCount <= kCountTable[Next]) { - Slope = (float) (kBucketsTable[Next] - kBucketsTable[Last]) / - (float) (kCountTable[Next] - kCountTable[Last]); - return ((uint16_t) (kBucketsTable[Last] + - Slope * (SampleCount - kCountTable[Last]))); - } - } - return kBucketsTable[Last]; -} // OptimumNumberOfBuckets - -/** - * This routine computes the chi-squared value which will - * leave a cumulative probability of Alpha in the right tail - * of a chi-squared distribution with the specified number of - * degrees of freedom. Alpha must be between 0 and 1. - * DegreesOfFreedom must be even. The routine maintains an - * array of lists. Each list corresponds to a different - * number of degrees of freedom. Each entry in the list - * corresponds to a different alpha value and its corresponding - * chi-squared value. Therefore, once a particular chi-squared - * value is computed, it is stored in the list and never - * needs to be computed again. - * @param DegreesOfFreedom determines shape of distribution - * @param Alpha probability of right tail - * @return Desired chi-squared value - */ -double -ComputeChiSquared (uint16_t DegreesOfFreedom, double Alpha) -#define CHIACCURACY 0.01 -#define MINALPHA (1e-200) -{ - static LIST ChiWith[MAXDEGREESOFFREEDOM + 1]; - - CHISTRUCT *OldChiSquared; - CHISTRUCT SearchKey; - - // limit the minimum alpha that can be used - if alpha is too small - // it may not be possible to compute chi-squared. - Alpha = ClipToRange(Alpha, MINALPHA, 1.0); - if (Odd (DegreesOfFreedom)) - DegreesOfFreedom++; - - /* find the list of chi-squared values which have already been computed - for the specified number of degrees of freedom. Search the list for - the desired chi-squared. */ - SearchKey.Alpha = Alpha; - OldChiSquared = (CHISTRUCT *) first_node (search (ChiWith[DegreesOfFreedom], - &SearchKey, AlphaMatch)); - - if (OldChiSquared == nullptr) { - OldChiSquared = NewChiStruct (DegreesOfFreedom, Alpha); - OldChiSquared->ChiSquared = Solve (ChiArea, OldChiSquared, - (double) DegreesOfFreedom, - (double) CHIACCURACY); - ChiWith[DegreesOfFreedom] = push (ChiWith[DegreesOfFreedom], - OldChiSquared); - } - else { - // further optimization might move OldChiSquared to front of list - } - - return (OldChiSquared->ChiSquared); - -} // ComputeChiSquared - -/** - * This routine computes the probability density function - * of a discrete normal distribution defined by the global - * variables kNormalMean, kNormalVariance, and kNormalMagnitude. - * Normal magnitude could, of course, be computed in terms of - * the normal variance but it is precomputed for efficiency. - * @param x number to compute the normal probability density for - * @note Globals: - * kNormalMean mean of a discrete normal distribution - * kNormalVariance variance of a discrete normal distribution - * kNormalMagnitude magnitude of a discrete normal distribution - * @return The value of the normal distribution at x. - */ -double NormalDensity(int32_t x) { - double Distance; - - Distance = x - kNormalMean; - return kNormalMagnitude * exp(-0.5 * Distance * Distance / kNormalVariance); -} // NormalDensity - -/** - * This routine computes the probability density function - * of a uniform distribution at the specified point. The - * range of the distribution is from 0 to BUCKETTABLESIZE. - * @param x number to compute the uniform probability density for - * @return The value of the uniform distribution at x. - */ -double UniformDensity(int32_t x) { - static double UniformDistributionDensity = (double) 1.0 / BUCKETTABLESIZE; - - if ((x >= 0.0) && (x <= BUCKETTABLESIZE)) - return UniformDistributionDensity; - else - return (double) 0.0; -} // UniformDensity - -/** - * This routine computes a trapezoidal approximation to the - * integral of a function over a small delta in x. - * @param f1 value of function at x1 - * @param f2 value of function at x2 - * @param Dx x2 - x1 (should always be positive) - * @return Approximation of the integral of the function from x1 to x2. - */ -double Integral(double f1, double f2, double Dx) { - return (f1 + f2) * Dx / 2.0; -} // Integral - -/** - * This routine counts the number of cluster samples which - * fall within the various histogram buckets in Buckets. Only - * one dimension of each sample is examined. The exact meaning - * of the Mean and StdDev parameters depends on the - * distribution which is being analyzed (this info is in the - * Buckets data structure). For normal distributions, Mean - * and StdDev have the expected meanings. For uniform and - * random distributions the Mean is the center point of the - * range and the StdDev is 1/2 the range. A dimension with - * zero standard deviation cannot be statistically analyzed. - * In this case, a pseudo-analysis is used. - * @param Buckets histogram buckets to count samples - * @param Cluster cluster whose samples are being analyzed - * @param Dim dimension of samples which is being analyzed - * @param ParamDesc description of the dimension - * @param Mean "mean" of the distribution - * @param StdDev "standard deviation" of the distribution - * @return None (the Buckets data structure is filled in) - */ -void FillBuckets(BUCKETS *Buckets, - CLUSTER *Cluster, - uint16_t Dim, - PARAM_DESC *ParamDesc, - float Mean, - float StdDev) { - uint16_t BucketID; - int i; - LIST SearchState; - SAMPLE *Sample; - - // initialize the histogram bucket counts to 0 - for (i = 0; i < Buckets->NumberOfBuckets; i++) - Buckets->Count[i] = 0; - - if (StdDev == 0.0) { - /* if the standard deviation is zero, then we can't statistically - analyze the cluster. Use a pseudo-analysis: samples exactly on - the mean are distributed evenly across all buckets. Samples greater - than the mean are placed in the last bucket; samples less than the - mean are placed in the first bucket. */ - - InitSampleSearch(SearchState, Cluster); - i = 0; - while ((Sample = NextSample (&SearchState)) != nullptr) { - if (Sample->Mean[Dim] > Mean) - BucketID = Buckets->NumberOfBuckets - 1; - else if (Sample->Mean[Dim] < Mean) - BucketID = 0; - else - BucketID = i; - Buckets->Count[BucketID] += 1; - i++; - if (i >= Buckets->NumberOfBuckets) - i = 0; - } - } - else { - // search for all samples in the cluster and add to histogram buckets - InitSampleSearch(SearchState, Cluster); - while ((Sample = NextSample (&SearchState)) != nullptr) { - switch (Buckets->Distribution) { - case normal: - BucketID = NormalBucket (ParamDesc, Sample->Mean[Dim], - Mean, StdDev); - break; - case D_random: - case uniform: - BucketID = UniformBucket (ParamDesc, Sample->Mean[Dim], - Mean, StdDev); - break; - default: - BucketID = 0; - } - Buckets->Count[Buckets->Bucket[BucketID]] += 1; - } - } -} // FillBuckets - -/** - * This routine determines which bucket x falls into in the - * discrete normal distribution defined by kNormalMean - * and kNormalStdDev. x values which exceed the range of - * the discrete distribution are clipped. - * @param ParamDesc used to identify circular dimensions - * @param x value to be normalized - * @param Mean mean of normal distribution - * @param StdDev standard deviation of normal distribution - * @return Bucket number into which x falls - */ -uint16_t NormalBucket(PARAM_DESC *ParamDesc, - float x, - float Mean, - float StdDev) { - float X; - - // wraparound circular parameters if necessary - if (ParamDesc->Circular) { - if (x - Mean > ParamDesc->HalfRange) - x -= ParamDesc->Range; - else if (x - Mean < -ParamDesc->HalfRange) - x += ParamDesc->Range; - } - - X = ((x - Mean) / StdDev) * kNormalStdDev + kNormalMean; - if (X < 0) - return 0; - if (X > BUCKETTABLESIZE - 1) - return ((uint16_t) (BUCKETTABLESIZE - 1)); - return (uint16_t) floor((double) X); -} // NormalBucket - -/** - * This routine determines which bucket x falls into in the - * discrete uniform distribution defined by - * BUCKETTABLESIZE. x values which exceed the range of - * the discrete distribution are clipped. - * @param ParamDesc used to identify circular dimensions - * @param x value to be normalized - * @param Mean center of range of uniform distribution - * @param StdDev 1/2 the range of the uniform distribution - * @return Bucket number into which x falls - */ -uint16_t UniformBucket(PARAM_DESC *ParamDesc, - float x, - float Mean, - float StdDev) { - float X; - - // wraparound circular parameters if necessary - if (ParamDesc->Circular) { - if (x - Mean > ParamDesc->HalfRange) - x -= ParamDesc->Range; - else if (x - Mean < -ParamDesc->HalfRange) - x += ParamDesc->Range; - } - - X = ((x - Mean) / (2 * StdDev) * BUCKETTABLESIZE + BUCKETTABLESIZE / 2.0); - if (X < 0) - return 0; - if (X > BUCKETTABLESIZE - 1) - return (uint16_t) (BUCKETTABLESIZE - 1); - return (uint16_t) floor((double) X); -} // UniformBucket - -/** - * This routine performs a chi-square goodness of fit test - * on the histogram data in the Buckets data structure. TRUE - * is returned if the histogram matches the probability - * distribution which was specified when the Buckets - * structure was originally created. Otherwise FALSE is - * returned. - * @param Buckets histogram data to perform chi-square test on - * @return TRUE if samples match distribution, FALSE otherwise - */ -bool DistributionOK(BUCKETS* Buckets) { - float FrequencyDifference; - float TotalDifference; - int i; - - // compute how well the histogram matches the expected histogram - TotalDifference = 0.0; - for (i = 0; i < Buckets->NumberOfBuckets; i++) { - FrequencyDifference = Buckets->Count[i] - Buckets->ExpectedCount[i]; - TotalDifference += (FrequencyDifference * FrequencyDifference) / - Buckets->ExpectedCount[i]; - } - - // test to see if the difference is more than expected - if (TotalDifference > Buckets->ChiSquared) - return false; - else - return true; -} // DistributionOK - -/** - * This routine frees the memory used by the statistics - * data structure. - * @param Statistics pointer to data structure to be freed - * @return None - */ -void FreeStatistics(STATISTICS *Statistics) { - free(Statistics->CoVariance); - free(Statistics->Min); - free(Statistics->Max); - free(Statistics); -} // FreeStatistics - -/** - * This routine properly frees the memory used by a BUCKETS. - * - * @param buckets pointer to data structure to be freed - */ -void FreeBuckets(BUCKETS *buckets) { - Efree(buckets->Count); - Efree(buckets->ExpectedCount); - Efree(buckets); -} // FreeBuckets - -/** - * This routine frees the memory consumed by the specified - * cluster and all of its subclusters. This is done by - * recursive calls to FreeCluster(). - * - * @param Cluster pointer to cluster to be freed - * - * @return None - */ -void FreeCluster(CLUSTER *Cluster) { - if (Cluster != nullptr) { - FreeCluster (Cluster->Left); - FreeCluster (Cluster->Right); - free(Cluster); - } -} // FreeCluster - -/** - * This routine computes the degrees of freedom that should - * be used in a chi-squared test with the specified number of - * histogram buckets. The result is always rounded up to - * the next even number so that the value of chi-squared can be - * computed more easily. This will cause the value of - * chi-squared to be higher than the optimum value, resulting - * in the chi-square test being more lenient than optimum. - * @param Distribution distribution being tested for - * @param HistogramBuckets number of buckets in chi-square test - * @return The number of degrees of freedom for a chi-square test - */ -uint16_t DegreesOfFreedom(DISTRIBUTION Distribution, uint16_t HistogramBuckets) { - static uint8_t DegreeOffsets[] = { 3, 3, 1 }; - - uint16_t AdjustedNumBuckets; - - AdjustedNumBuckets = HistogramBuckets - DegreeOffsets[(int) Distribution]; - if (Odd (AdjustedNumBuckets)) - AdjustedNumBuckets++; - return (AdjustedNumBuckets); - -} // DegreesOfFreedom - -/** - * This routine is used to search a list of histogram data - * structures to find one with the specified number of - * buckets. It is called by the list search routines. - * @param arg1 current histogram being tested for a match - * @param arg2 match key - * @return TRUE if arg1 matches arg2 - */ -int NumBucketsMatch(void *arg1, // BUCKETS *Histogram, - void *arg2) { // uint16_t *DesiredNumberOfBuckets) - BUCKETS *Histogram = (BUCKETS *) arg1; - uint16_t *DesiredNumberOfBuckets = (uint16_t *) arg2; - - return (*DesiredNumberOfBuckets == Histogram->NumberOfBuckets); - -} // NumBucketsMatch - -/** - * This routine is used to search a list for a list node - * whose contents match Key. It is called by the list - * delete_d routine. - * @return TRUE if ListNode matches Key - */ -int ListEntryMatch(void *arg1, //ListNode - void *arg2) { //Key - return (arg1 == arg2); - -} // ListEntryMatch - -/** - * This routine multiplies each ExpectedCount histogram entry - * by NewSampleCount/OldSampleCount so that the histogram - * is now adjusted to the new sample count. - * @param Buckets histogram data structure to adjust - * @param NewSampleCount new sample count to adjust to - * @return none - */ -void AdjustBuckets(BUCKETS *Buckets, uint32_t NewSampleCount) { - int i; - double AdjustFactor; - - AdjustFactor = (((double) NewSampleCount) / - ((double) Buckets->SampleCount)); - - for (i = 0; i < Buckets->NumberOfBuckets; i++) { - Buckets->ExpectedCount[i] *= AdjustFactor; - } - - Buckets->SampleCount = NewSampleCount; - -} // AdjustBuckets - -/** - * This routine sets the bucket counts in the specified histogram - * to zero. - * @param Buckets histogram data structure to init - * @return none - */ -void InitBuckets(BUCKETS *Buckets) { - int i; - - for (i = 0; i < Buckets->NumberOfBuckets; i++) { - Buckets->Count[i] = 0; - } - -} // InitBuckets - -/** - * This routine is used to search a list of structures which - * hold pre-computed chi-squared values for a chi-squared - * value whose corresponding alpha field matches the alpha - * field of SearchKey. - * - * It is called by the list search routines. - * - * @param arg1 chi-squared struct being tested for a match - * @param arg2 chi-squared struct that is the search key - * @return TRUE if ChiStruct's Alpha matches SearchKey's Alpha - */ -int AlphaMatch(void *arg1, //CHISTRUCT *ChiStruct, - void *arg2) { //CHISTRUCT *SearchKey) - CHISTRUCT *ChiStruct = (CHISTRUCT *) arg1; - CHISTRUCT *SearchKey = (CHISTRUCT *) arg2; - - return (ChiStruct->Alpha == SearchKey->Alpha); - -} // AlphaMatch - -/** - * This routine allocates a new data structure which is used - * to hold a chi-squared value along with its associated - * number of degrees of freedom and alpha value. - * - * @param DegreesOfFreedom degrees of freedom for new chi value - * @param Alpha confidence level for new chi value - * @return none - */ -CHISTRUCT *NewChiStruct(uint16_t DegreesOfFreedom, double Alpha) { - CHISTRUCT *NewChiStruct; - - NewChiStruct = (CHISTRUCT *) Emalloc (sizeof (CHISTRUCT)); - NewChiStruct->DegreesOfFreedom = DegreesOfFreedom; - NewChiStruct->Alpha = Alpha; - return (NewChiStruct); - -} // NewChiStruct - -/** - * This routine attempts to find an x value at which Function - * goes to zero (i.e. a root of the function). It will only - * work correctly if a solution actually exists and there - * are no extrema between the solution and the InitialGuess. - * The algorithms used are extremely primitive. - * - * @param Function function whose zero is to be found - * @param FunctionParams arbitrary data to pass to function - * @param InitialGuess point to start solution search at - * @param Accuracy maximum allowed error - * @return Solution of function (x for which f(x) = 0). - */ -double -Solve (SOLVEFUNC Function, -void *FunctionParams, double InitialGuess, double Accuracy) -#define INITIALDELTA 0.1 -#define DELTARATIO 0.1 -{ - double x; - double f; - double Slope; - double Delta; - double NewDelta; - double xDelta; - double LastPosX, LastNegX; - - x = InitialGuess; - Delta = INITIALDELTA; - LastPosX = FLT_MAX; - LastNegX = -FLT_MAX; - f = (*Function) ((CHISTRUCT *) FunctionParams, x); - while (Abs (LastPosX - LastNegX) > Accuracy) { - // keep track of outer bounds of current estimate - if (f < 0) - LastNegX = x; - else - LastPosX = x; - - // compute the approx. slope of f(x) at the current point - Slope = - ((*Function) ((CHISTRUCT *) FunctionParams, x + Delta) - f) / Delta; - - // compute the next solution guess */ - xDelta = f / Slope; - x -= xDelta; - - // reduce the delta used for computing slope to be a fraction of - //the amount moved to get to the new guess - NewDelta = Abs (xDelta) * DELTARATIO; - if (NewDelta < Delta) - Delta = NewDelta; - - // compute the value of the function at the new guess - f = (*Function) ((CHISTRUCT *) FunctionParams, x); - } - return (x); - -} // Solve - -/** - * This routine computes the area under a chi density curve - * from 0 to x, minus the desired area under the curve. The - * number of degrees of freedom of the chi curve is specified - * in the ChiParams structure. The desired area is also - * specified in the ChiParams structure as Alpha (or 1 minus - * the desired area). This routine is intended to be passed - * to the Solve() function to find the value of chi-squared - * which will yield a desired area under the right tail of - * the chi density curve. The function will only work for - * even degrees of freedom. The equations are based on - * integrating the chi density curve in parts to obtain - * a series that can be used to compute the area under the - * curve. - * @param ChiParams contains degrees of freedom and alpha - * @param x value of chi-squared to evaluate - * @return Error between actual and desired area under the chi curve. - */ -double ChiArea(CHISTRUCT *ChiParams, double x) { - int i, N; - double SeriesTotal; - double Denominator; - double PowerOfx; - - N = ChiParams->DegreesOfFreedom / 2 - 1; - SeriesTotal = 1; - Denominator = 1; - PowerOfx = 1; - for (i = 1; i <= N; i++) { - Denominator *= 2 * i; - PowerOfx *= x; - SeriesTotal += PowerOfx / Denominator; - } - return ((SeriesTotal * exp (-0.5 * x)) - ChiParams->Alpha); - -} // ChiArea - -/** - * This routine looks at all samples in the specified cluster. - * It computes a running estimate of the percentage of the - * characters which have more than 1 sample in the cluster. - * When this percentage exceeds MaxIllegal, TRUE is returned. - * Otherwise FALSE is returned. The CharID - * fields must contain integers which identify the training - * characters which were used to generate the sample. One - * integer is used for each sample. The NumChar field in - * the Clusterer must contain the number of characters in the - * training set. All CharID fields must be between 0 and - * NumChar-1. The main function of this routine is to help - * identify clusters which need to be split further, i.e. if - * numerous training characters have 2 or more features which are - * contained in the same cluster, then the cluster should be - * split. - * - * @param Clusterer data structure holding cluster tree - * @param Cluster cluster containing samples to be tested - * @param MaxIllegal max percentage of samples allowed to have - * more than 1 feature in the cluster - * @return TRUE if the cluster should be split, FALSE otherwise. - */ -bool -MultipleCharSamples(CLUSTERER* Clusterer, - CLUSTER* Cluster, float MaxIllegal) -#define ILLEGAL_CHAR 2 -{ - static BOOL8 *CharFlags = nullptr; - static int32_t NumFlags = 0; - int i; - LIST SearchState; - SAMPLE *Sample; - int32_t CharID; - int32_t NumCharInCluster; - int32_t NumIllegalInCluster; - float PercentIllegal; - - // initial estimate assumes that no illegal chars exist in the cluster - NumCharInCluster = Cluster->SampleCount; - NumIllegalInCluster = 0; - - if (Clusterer->NumChar > NumFlags) { - free(CharFlags); - NumFlags = Clusterer->NumChar; - CharFlags = (BOOL8 *) Emalloc (NumFlags * sizeof (BOOL8)); - } - - for (i = 0; i < NumFlags; i++) - CharFlags[i] = FALSE; - - // find each sample in the cluster and check if we have seen it before - InitSampleSearch(SearchState, Cluster); - while ((Sample = NextSample (&SearchState)) != nullptr) { - CharID = Sample->CharID; - if (CharFlags[CharID] == FALSE) { - CharFlags[CharID] = TRUE; - } - else { - if (CharFlags[CharID] == TRUE) { - NumIllegalInCluster++; - CharFlags[CharID] = ILLEGAL_CHAR; - } - NumCharInCluster--; - PercentIllegal = (float) NumIllegalInCluster / NumCharInCluster; - if (PercentIllegal > MaxIllegal) { - destroy(SearchState); - return true; - } - } - } - return false; - -} // MultipleCharSamples - -/** - * Compute the inverse of a matrix using LU decomposition with partial pivoting. - * The return value is the sum of norms of the off-diagonal terms of the - * product of a and inv. (A measure of the error.) - */ -double InvertMatrix(const float* input, int size, float* inv) { - // Allocate memory for the 2D arrays. - GENERIC_2D_ARRAY U(size, size, 0.0); - GENERIC_2D_ARRAY U_inv(size, size, 0.0); - GENERIC_2D_ARRAY L(size, size, 0.0); - - // Initialize the working matrices. U starts as input, L as I and U_inv as O. - int row; - int col; - for (row = 0; row < size; row++) { - for (col = 0; col < size; col++) { - U[row][col] = input[row*size + col]; - L[row][col] = row == col ? 1.0 : 0.0; - U_inv[row][col] = 0.0; - } - } - - // Compute forward matrix by inversion by LU decomposition of input. - for (col = 0; col < size; ++col) { - // Find best pivot - int best_row = 0; - double best_pivot = -1.0; - for (row = col; row < size; ++row) { - if (Abs(U[row][col]) > best_pivot) { - best_pivot = Abs(U[row][col]); - best_row = row; - } - } - // Exchange pivot rows. - if (best_row != col) { - for (int k = 0; k < size; ++k) { - double tmp = U[best_row][k]; - U[best_row][k] = U[col][k]; - U[col][k] = tmp; - tmp = L[best_row][k]; - L[best_row][k] = L[col][k]; - L[col][k] = tmp; - } - } - // Now do the pivot itself. - for (row = col + 1; row < size; ++row) { - double ratio = -U[row][col] / U[col][col]; - for (int j = col; j < size; ++j) { - U[row][j] += U[col][j] * ratio; - } - for (int k = 0; k < size; ++k) { - L[row][k] += L[col][k] * ratio; - } - } - } - // Next invert U. - for (col = 0; col < size; ++col) { - U_inv[col][col] = 1.0 / U[col][col]; - for (row = col - 1; row >= 0; --row) { - double total = 0.0; - for (int k = col; k > row; --k) { - total += U[row][k] * U_inv[k][col]; - } - U_inv[row][col] = -total / U[row][row]; - } - } - // Now the answer is U_inv.L. - for (row = 0; row < size; row++) { - for (col = 0; col < size; col++) { - double sum = 0.0; - for (int k = row; k < size; ++k) { - sum += U_inv[row][k] * L[k][col]; - } - inv[row*size + col] = sum; - } - } - // Check matrix product. - double error_sum = 0.0; - for (row = 0; row < size; row++) { - for (col = 0; col < size; col++) { - double sum = 0.0; - for (int k = 0; k < size; ++k) { - sum += static_cast(input[row * size + k]) * inv[k * size + col]; - } - if (row != col) { - error_sum += Abs(sum); - } - } - } - return error_sum; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/cluster.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/cluster.h deleted file mode 100644 index cccab455..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/cluster.h +++ /dev/null @@ -1,131 +0,0 @@ -/****************************************************************************** - ** Filename: cluster.h - ** Purpose: Definition of feature space clustering routines - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - *****************************************************************************/ - -#ifndef CLUSTER_H -#define CLUSTER_H - -#include "kdtree.h" -#include "oldlist.h" - -struct BUCKETS; - -#define MINBUCKETS 5 -#define MAXBUCKETS 39 - -/*---------------------------------------------------------------------- - Types -----------------------------------------------------------------------*/ -typedef struct sample { - unsigned Clustered:1; // TRUE if included in a higher cluster - unsigned Prototype:1; // TRUE if cluster represented by a proto - unsigned SampleCount:30; // number of samples in this cluster - struct sample *Left; // ptr to left sub-cluster - struct sample *Right; // ptr to right sub-cluster - int32_t CharID; // identifier of char sample came from - float Mean[1]; // mean of cluster - SampleSize floats -} CLUSTER; - -typedef CLUSTER SAMPLE; // can refer to as either sample or cluster - -typedef enum { - spherical, elliptical, mixed, automatic -} PROTOSTYLE; - -typedef struct { // parameters to control clustering - PROTOSTYLE ProtoStyle; // specifies types of protos to be made - float MinSamples; // min # of samples per proto - % of total - float MaxIllegal; // max percentage of samples in a cluster which - // have more than 1 feature in that cluster - float Independence; // desired independence between dimensions - double Confidence; // desired confidence in prototypes created - int MagicSamples; // Ideal number of samples in a cluster. -} CLUSTERCONFIG; - -typedef enum { - normal, uniform, D_random, DISTRIBUTION_COUNT -} DISTRIBUTION; - -typedef union { - float Spherical; - float *Elliptical; -} FLOATUNION; - -typedef struct { - unsigned Significant:1; // TRUE if prototype is significant - unsigned Merged:1; // Merged after clustering so do not output - // but kept for display purposes. If it has no - // samples then it was actually merged. - // Otherwise it matched an already significant - // cluster. - unsigned Style:2; // spherical, elliptical, or mixed - unsigned NumSamples:28; // number of samples in the cluster - CLUSTER *Cluster; // ptr to cluster which made prototype - DISTRIBUTION *Distrib; // different distribution for each dimension - float *Mean; // prototype mean - float TotalMagnitude; // total magnitude over all dimensions - float LogMagnitude; // log base e of TotalMagnitude - FLOATUNION Variance; // prototype variance - FLOATUNION Magnitude; // magnitude of density function - FLOATUNION Weight; // weight of density function -} PROTOTYPE; - -typedef struct { - int16_t SampleSize; // number of parameters per sample - PARAM_DESC *ParamDesc; // description of each parameter - int32_t NumberOfSamples; // total number of samples being clustered - KDTREE *KDTree; // for optimal nearest neighbor searching - CLUSTER *Root; // ptr to root cluster of cluster tree - LIST ProtoList; // list of prototypes - int32_t NumChar; // # of characters represented by samples - // cache of reusable histograms by distribution type and number of buckets. - BUCKETS* bucket_cache[DISTRIBUTION_COUNT][MAXBUCKETS + 1 - MINBUCKETS]; -} CLUSTERER; - -typedef struct { - int32_t NumSamples; // number of samples in list - int32_t MaxNumSamples; // maximum size of list - SAMPLE *Sample[1]; // array of ptrs to sample data structures -} SAMPLELIST; - -// low level cluster tree analysis routines. -#define InitSampleSearch(S,C) (((C)==nullptr)?(S=NIL_LIST):(S=push(NIL_LIST,(C)))) - -/*-------------------------------------------------------------------------- - Public Function Prototypes ---------------------------------------------------------------------------*/ -CLUSTERER *MakeClusterer (int16_t SampleSize, const PARAM_DESC ParamDesc[]); - -SAMPLE *MakeSample(CLUSTERER* Clusterer, const float* Feature, int32_t CharID); - -LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config); - -void FreeClusterer(CLUSTERER *Clusterer); - -void FreeProtoList(LIST *ProtoList); - -void FreePrototype(void *arg); // PROTOTYPE *Prototype); - -CLUSTER *NextSample(LIST *SearchState); - -float Mean(PROTOTYPE *Proto, uint16_t Dimension); - -float StandardDeviation(PROTOTYPE *Proto, uint16_t Dimension); - -int32_t MergeClusters(int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2, - float m[], float m1[], float m2[]); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/clusttool.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/clusttool.cpp deleted file mode 100644 index 5bf77502..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/clusttool.cpp +++ /dev/null @@ -1,362 +0,0 @@ -/****************************************************************************** - ** Filename: clustertool.c - ** Purpose: Misc. tools for use with the clustering routines - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - *****************************************************************************/ - -//--------------------------Include Files---------------------------------- -#include "clusttool.h" -#include "emalloc.h" -#include -#include - -using tesseract::TFile; - -//---------------Global Data Definitions and Declarations-------------------- -#define TOKENSIZE 80 //< max size of tokens read from an input file -#define QUOTED_TOKENSIZE "79" -#define MAXSAMPLESIZE 65535 //< max num of dimensions in feature space -//#define MAXBLOCKSIZE 65535 //< max num of samples in a character (block -// size) - -/** - * This routine reads a single integer from the specified - * file and checks to ensure that it is between 0 and - * MAXSAMPLESIZE. - * @param fp open text file to read sample size from - * @return Sample size - * @note Globals: None - */ -uint16_t ReadSampleSize(TFile *fp) { - int SampleSize = 0; - - const int kMaxLineSize = 100; - char line[kMaxLineSize]; - ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr); - ASSERT_HOST(sscanf(line, "%d", &SampleSize) == 1); - ASSERT_HOST(SampleSize >= 0 && SampleSize <= MAXSAMPLESIZE); - return SampleSize; -} - -/** - * This routine reads textual descriptions of sets of parameters - * which describe the characteristics of feature dimensions. - * - * @param fp open text file to read N parameter descriptions from - * @param N number of parameter descriptions to read - * @return Pointer to an array of parameter descriptors. - * @note Globals: None - */ -PARAM_DESC *ReadParamDesc(TFile *fp, uint16_t N) { - PARAM_DESC *ParamDesc; - char linear_token[TOKENSIZE], essential_token[TOKENSIZE]; - - ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC)); - for (int i = 0; i < N; i++) { - const int kMaxLineSize = TOKENSIZE * 4; - char line[kMaxLineSize]; - ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr); - ASSERT_HOST(sscanf(line, - "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %f %f", - linear_token, essential_token, &ParamDesc[i].Min, - &ParamDesc[i].Max) == 4); - if (linear_token[0] == 'c') - ParamDesc[i].Circular = TRUE; - else - ParamDesc[i].Circular = FALSE; - - if (linear_token[0] == 'e') - ParamDesc[i].NonEssential = FALSE; - else - ParamDesc[i].NonEssential = TRUE; - ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min; - ParamDesc[i].HalfRange = ParamDesc[i].Range / 2; - ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2; - } - return (ParamDesc); -} - -/** - * This routine reads a textual description of a prototype from - * the specified file. - * - * @param fp open text file to read prototype from - * @param N number of dimensions used in prototype - * @return List of prototypes - * @note Globals: None - */ -PROTOTYPE *ReadPrototype(TFile *fp, uint16_t N) { - char sig_token[TOKENSIZE], shape_token[TOKENSIZE]; - PROTOTYPE *Proto; - int SampleCount; - int i; - - const int kMaxLineSize = TOKENSIZE * 4; - char line[kMaxLineSize]; - if (fp->FGets(line, kMaxLineSize) == nullptr || - sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %d", - sig_token, shape_token, &SampleCount) != 3) { - tprintf("Invalid prototype: %s\n", line); - return nullptr; - } - Proto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE)); - Proto->Cluster = nullptr; - if (sig_token[0] == 's') - Proto->Significant = TRUE; - else - Proto->Significant = FALSE; - - switch (shape_token[0]) { - case 's': - Proto->Style = spherical; - break; - case 'e': - Proto->Style = elliptical; - break; - case 'a': - Proto->Style = automatic; - break; - default: - tprintf("Invalid prototype style specification:%s\n", shape_token); - Proto->Style = elliptical; - } - - ASSERT_HOST(SampleCount >= 0); - Proto->NumSamples = SampleCount; - - Proto->Mean = ReadNFloats(fp, N, nullptr); - ASSERT_HOST(Proto->Mean != nullptr); - - switch (Proto->Style) { - case spherical: - ASSERT_HOST(ReadNFloats(fp, 1, &(Proto->Variance.Spherical)) != nullptr); - Proto->Magnitude.Spherical = - 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical); - Proto->TotalMagnitude = pow(Proto->Magnitude.Spherical, (float)N); - Proto->LogMagnitude = log((double)Proto->TotalMagnitude); - Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical; - Proto->Distrib = nullptr; - break; - case elliptical: - Proto->Variance.Elliptical = ReadNFloats(fp, N, nullptr); - ASSERT_HOST(Proto->Variance.Elliptical != nullptr); - Proto->Magnitude.Elliptical = (float *)Emalloc(N * sizeof(float)); - Proto->Weight.Elliptical = (float *)Emalloc(N * sizeof(float)); - Proto->TotalMagnitude = 1.0; - for (i = 0; i < N; i++) { - Proto->Magnitude.Elliptical[i] = - 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Elliptical[i]); - Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i]; - Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; - } - Proto->LogMagnitude = log((double)Proto->TotalMagnitude); - Proto->Distrib = nullptr; - break; - default: - Efree(Proto); - tprintf("Invalid prototype style\n"); - return nullptr; - } - return Proto; -} - -/** - * This routine reads N floats from the specified text file - * and places them into Buffer. If Buffer is nullptr, a buffer - * is created and passed back to the caller. If EOF is - * encountered before any floats can be read, nullptr is - * returned. - * @param fp open text file to read floats from - * @param N number of floats to read - * @param Buffer pointer to buffer to place floats into - * @return Pointer to buffer holding floats or nullptr if EOF - * @note Globals: None - */ -float *ReadNFloats(TFile *fp, uint16_t N, float Buffer[]) { - const int kMaxLineSize = 1024; - char line[kMaxLineSize]; - if (fp->FGets(line, kMaxLineSize) == nullptr) { - tprintf("Hit EOF in ReadNFloats!\n"); - return nullptr; - } - bool needs_free = false; - - if (Buffer == nullptr) { - Buffer = static_cast(Emalloc(N * sizeof(float))); - needs_free = true; - } - - char *startptr = line; - for (int i = 0; i < N; i++) { - char *endptr; - Buffer[i] = strtof(startptr, &endptr); - if (endptr == startptr) { - tprintf("Read of %d floats failed!\n", N); - if (needs_free) Efree(Buffer); - return nullptr; - } - startptr = endptr; - } - return Buffer; -} - -/** - * This routine writes an array of dimension descriptors to - * the specified text file. - * @param File open text file to write param descriptors to - * @param N number of param descriptors to write - * @param ParamDesc array of param descriptors to write - * @return None - * @note Globals: None - */ -void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[]) { - int i; - - for (i = 0; i < N; i++) { - if (ParamDesc[i].Circular) - fprintf (File, "circular "); - else - fprintf (File, "linear "); - - if (ParamDesc[i].NonEssential) - fprintf (File, "non-essential "); - else - fprintf (File, "essential "); - - fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max); - } -} - -/** - * This routine writes a textual description of a prototype - * to the specified text file. - * @param File open text file to write prototype to - * @param N number of dimensions in feature space - * @param Proto prototype to write out - * @return None - * @note Globals: None - */ -void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto) { - int i; - - if (Proto->Significant) - fprintf (File, "significant "); - else - fprintf (File, "insignificant "); - WriteProtoStyle (File, (PROTOSTYLE) Proto->Style); - fprintf (File, "%6d\n\t", Proto->NumSamples); - WriteNFloats (File, N, Proto->Mean); - fprintf (File, "\t"); - - switch (Proto->Style) { - case spherical: - WriteNFloats (File, 1, &(Proto->Variance.Spherical)); - break; - case elliptical: - WriteNFloats (File, N, Proto->Variance.Elliptical); - break; - case mixed: - for (i = 0; i < N; i++) - switch (Proto->Distrib[i]) { - case normal: - fprintf (File, " %9s", "normal"); - break; - case uniform: - fprintf (File, " %9s", "uniform"); - break; - case D_random: - fprintf (File, " %9s", "random"); - break; - case DISTRIBUTION_COUNT: - ASSERT_HOST(!"Distribution count not allowed!"); - } - fprintf (File, "\n\t"); - WriteNFloats (File, N, Proto->Variance.Elliptical); - } -} - -/** - * This routine writes a text representation of N floats from - * an array to a file. All of the floats are placed on one line. - * @param File open text file to write N floats to - * @param N number of floats to write - * @param Array array of floats to write - * @return None - * @note Globals: None - */ -void WriteNFloats(FILE * File, uint16_t N, float Array[]) { - for (int i = 0; i < N; i++) - fprintf(File, " %9.6f", Array[i]); - fprintf(File, "\n"); -} - -/** - * This routine writes to the specified text file a word - * which represents the ProtoStyle. It does not append - * a carriage return to the end. - * @param File open text file to write prototype style to - * @param ProtoStyle prototype style to write - * @return None - * @note Globals: None - */ -void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) { - switch (ProtoStyle) { - case spherical: - fprintf (File, "spherical"); - break; - case elliptical: - fprintf (File, "elliptical"); - break; - case mixed: - fprintf (File, "mixed"); - break; - case automatic: - fprintf (File, "automatic"); - break; - } -} - -/** - * This routine writes a textual description of each prototype - * in the prototype list to the specified file. It also - * writes a file header which includes the number of dimensions - * in feature space and the descriptions for each dimension. - * @param File open text file to write prototypes to - * @param N number of dimensions in feature space - * @param ParamDesc descriptions for each dimension - * @param ProtoList list of prototypes to be written - * @param WriteSigProtos TRUE to write out significant prototypes - * @param WriteInsigProtos TRUE to write out insignificants - * @note Globals: None - * @return None - */ - -void WriteProtoList(FILE* File, uint16_t N, PARAM_DESC* ParamDesc, - LIST ProtoList, bool WriteSigProtos, - bool WriteInsigProtos) { - PROTOTYPE *Proto; - - /* write file header */ - fprintf(File,"%0d\n",N); - WriteParamDesc(File,N,ParamDesc); - - /* write prototypes */ - iterate(ProtoList) - { - Proto = (PROTOTYPE *) first_node (ProtoList); - if ((Proto->Significant && WriteSigProtos) || - (!Proto->Significant && WriteInsigProtos)) - WritePrototype(File, N, Proto); - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/clusttool.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/clusttool.h deleted file mode 100644 index 5103cc86..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/clusttool.h +++ /dev/null @@ -1,50 +0,0 @@ -/****************************************************************************** - ** Filename: clusttool.h - ** Purpose: Definition of clustering utility tools - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef TESSERACT_CLASSIFY_CLUSTTOOL_H_ -#define TESSERACT_CLASSIFY_CLUSTTOOL_H_ - -//--------------------------Include Files--------------------------------------- -#include -#include "cluster.h" -#include "host.h" -#include "serialis.h" - -/*------------------------------------------------------------------------- - Public Function Prototype ---------------------------------------------------------------------------*/ -uint16_t ReadSampleSize(tesseract::TFile *fp); - -PARAM_DESC *ReadParamDesc(tesseract::TFile *fp, uint16_t N); - -PROTOTYPE *ReadPrototype(tesseract::TFile *fp, uint16_t N); - -float *ReadNFloats(tesseract::TFile *fp, uint16_t N, float Buffer[]); - -void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[]); - -void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto); - -void WriteNFloats (FILE * File, uint16_t N, float Array[]); - -void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle); - -void WriteProtoList(FILE* File, uint16_t N, PARAM_DESC* ParamDesc, - LIST ProtoList, bool WriteSigProtos, - bool WriteInsigProtos); - -#endif // TESSERACT_CLASSIFY_CLUSTTOOL_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/cutoffs.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/cutoffs.cpp deleted file mode 100644 index 1daa40cc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/cutoffs.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/****************************************************************************** - ** Filename: cutoffs.c - ** Purpose: Routines to manipulate an array of class cutoffs. - ** Author: Dan Johnson - ** History: Wed Feb 20 09:28:51 1991, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "cutoffs.h" - -#include - -#include "classify.h" -#include "globals.h" -#include "helpers.h" -#include "serialis.h" -#include "unichar.h" - -#define REALLY_QUOTE_IT(x) QUOTE_IT(x) - -#define MAX_CUTOFF 1000 - -namespace tesseract { -/** - * Open file, read in all of the class-id/cutoff pairs - * and insert them into the Cutoffs array. Cutoffs are - * indexed in the array by class id. Unused entries in the - * array are set to an arbitrarily high cutoff value. - * @param fp file containing cutoff definitions - * @param Cutoffs array to put cutoffs into - * @return none - * @note Globals: none - */ -void Classify::ReadNewCutoffs(TFile* fp, CLASS_CUTOFF_ARRAY Cutoffs) { - char Class[UNICHAR_LEN + 1]; - CLASS_ID ClassId; - int Cutoff; - - if (shape_table_ != nullptr) { - if (!shapetable_cutoffs_.DeSerialize(fp)) { - tprintf("Error during read of shapetable pffmtable!\n"); - } - } - for (int i = 0; i < MAX_NUM_CLASSES; i++) - Cutoffs[i] = MAX_CUTOFF; - - const int kMaxLineSize = 100; - char line[kMaxLineSize]; - while (fp->FGets(line, kMaxLineSize) != nullptr && - sscanf(line, "%" REALLY_QUOTE_IT(UNICHAR_LEN) "s %d", Class, - &Cutoff) == 2) { - if (strcmp(Class, "NULL") == 0) { - ClassId = unicharset.unichar_to_id(" "); - } else { - ClassId = unicharset.unichar_to_id(Class); - } - Cutoffs[ClassId] = Cutoff; - } -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/cutoffs.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/cutoffs.h deleted file mode 100644 index ebc67e27..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/cutoffs.h +++ /dev/null @@ -1,45 +0,0 @@ -/****************************************************************************** - ** Filename: cutoffs.h - ** Purpose: Routines to manipulate an array of class cutoffs. - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef CUTOFFS_H -#define CUTOFFS_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "matchdefs.h" - -typedef uint16_t CLASS_CUTOFF_ARRAY[MAX_NUM_CLASSES]; - -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ - -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* cutoffs.c -void ReadNewCutoffs - _ARGS((char *Filename, - CLASS_CUTOFF_ARRAY Cutoffs)); -#undef _ARGS -*/ -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/errorcounter.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/errorcounter.cpp deleted file mode 100644 index 9e079649..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/errorcounter.cpp +++ /dev/null @@ -1,500 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// -#include -#include - -#include "errorcounter.h" - -#include "fontinfo.h" -#include "sampleiterator.h" -#include "shapeclassifier.h" -#include "shapetable.h" -#include "trainingsample.h" -#include "trainingsampleset.h" -#include "unicity_table.h" - -namespace tesseract { - -// Difference in result rating to be thought of as an "equal" choice. -const double kRatingEpsilon = 1.0 / 32; - -// Tests a classifier, computing its error rate. -// See errorcounter.h for description of arguments. -// Iterates over the samples, calling the classifier in normal/silent mode. -// If the classifier makes a CT_UNICHAR_TOPN_ERR error, and the appropriate -// report_level is set (4 or greater), it will then call the classifier again -// with a debug flag and a keep_this argument to find out what is going on. -double ErrorCounter::ComputeErrorRate(ShapeClassifier* classifier, - int report_level, CountTypes boosting_mode, - const FontInfoTable& fontinfo_table, - const GenericVector& page_images, SampleIterator* it, - double* unichar_error, double* scaled_error, STRING* fonts_report) { - const int fontsize = it->sample_set()->NumFonts(); - ErrorCounter counter(classifier->GetUnicharset(), fontsize); - GenericVector results; - - clock_t start = clock(); - unsigned total_samples = 0; - double unscaled_error = 0.0; - // Set a number of samples on which to run the classify debug mode. - int error_samples = report_level > 3 ? report_level * report_level : 0; - // Iterate over all the samples, accumulating errors. - for (it->Begin(); !it->AtEnd(); it->Next()) { - TrainingSample* mutable_sample = it->MutableSample(); - int page_index = mutable_sample->page_num(); - Pix* page_pix = 0 <= page_index && page_index < page_images.size() - ? page_images[page_index] : nullptr; - // No debug, no keep this. - classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, - INVALID_UNICHAR_ID, &results); - bool debug_it = false; - int correct_id = mutable_sample->class_id(); - if (counter.unicharset_.has_special_codes() && - (correct_id == UNICHAR_SPACE || correct_id == UNICHAR_JOINED || - correct_id == UNICHAR_BROKEN)) { - // This is junk so use the special counter. - debug_it = counter.AccumulateJunk(report_level > 3, - results, - mutable_sample); - } else { - debug_it = counter.AccumulateErrors(report_level > 3, boosting_mode, - fontinfo_table, - results, mutable_sample); - } - if (debug_it && error_samples > 0) { - // Running debug, keep the correct answer, and debug the classifier. - tprintf("Error on sample %d: %s Classifier debug output:\n", - it->GlobalSampleIndex(), - it->sample_set()->SampleToString(*mutable_sample).string()); - classifier->DebugDisplay(*mutable_sample, page_pix, correct_id); - --error_samples; - } - ++total_samples; - } - const double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC; - // Create the appropriate error report. - unscaled_error = counter.ReportErrors(report_level, boosting_mode, - fontinfo_table, - *it, unichar_error, fonts_report); - if (scaled_error != nullptr) *scaled_error = counter.scaled_error_; - if (report_level > 1 && total_samples > 0) { - // It is useful to know the time in microseconds/char. - tprintf("Errors computed in %.2fs at %.1f μs/char\n", - total_time, 1000000.0 * total_time / total_samples); - } - return unscaled_error; -} - -// Tests a pair of classifiers, debugging errors of the new against the old. -// See errorcounter.h for description of arguments. -// Iterates over the samples, calling the classifiers in normal/silent mode. -// If the new_classifier makes a boosting_mode error that the old_classifier -// does not, it will then call the new_classifier again with a debug flag -// and a keep_this argument to find out what is going on. -void ErrorCounter::DebugNewErrors( - ShapeClassifier* new_classifier, ShapeClassifier* old_classifier, - CountTypes boosting_mode, - const FontInfoTable& fontinfo_table, - const GenericVector& page_images, SampleIterator* it) { - int fontsize = it->sample_set()->NumFonts(); - ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize); - ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize); - GenericVector results; - - int total_samples = 0; - int error_samples = 25; - int total_new_errors = 0; - // Iterate over all the samples, accumulating errors. - for (it->Begin(); !it->AtEnd(); it->Next()) { - TrainingSample* mutable_sample = it->MutableSample(); - int page_index = mutable_sample->page_num(); - Pix* page_pix = 0 <= page_index && page_index < page_images.size() - ? page_images[page_index] : nullptr; - // No debug, no keep this. - old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, - INVALID_UNICHAR_ID, &results); - int correct_id = mutable_sample->class_id(); - if (correct_id != 0 && - !old_counter.AccumulateErrors(true, boosting_mode, fontinfo_table, - results, mutable_sample)) { - // old classifier was correct, check the new one. - new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, - INVALID_UNICHAR_ID, &results); - if (correct_id != 0 && - new_counter.AccumulateErrors(true, boosting_mode, fontinfo_table, - results, mutable_sample)) { - tprintf("New Error on sample %d: Classifier debug output:\n", - it->GlobalSampleIndex()); - ++total_new_errors; - new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 1, - correct_id, &results); - if (results.size() > 0 && error_samples > 0) { - new_classifier->DebugDisplay(*mutable_sample, page_pix, correct_id); - --error_samples; - } - } - } - ++total_samples; - } - tprintf("Total new errors = %d\n", total_new_errors); -} - -// Constructor is private. Only anticipated use of ErrorCounter is via -// the static ComputeErrorRate. -ErrorCounter::ErrorCounter(const UNICHARSET& unicharset, int fontsize) - : scaled_error_(0.0), rating_epsilon_(kRatingEpsilon), - unichar_counts_(unicharset.size(), unicharset.size(), 0), - ok_score_hist_(0, 101), bad_score_hist_(0, 101), - unicharset_(unicharset) { - Counts empty_counts; - font_counts_.init_to_size(fontsize, empty_counts); - multi_unichar_counts_.init_to_size(unicharset.size(), 0); -} - -// Accumulates the errors from the classifier results on a single sample. -// Returns true if debug is true and a CT_UNICHAR_TOPN_ERR error occurred. -// boosting_mode selects the type of error to be used for boosting and the -// is_error_ member of sample is set according to whether the required type -// of error occurred. The font_table provides access to font properties -// for error counting and shape_table is used to understand the relationship -// between unichar_ids and shape_ids in the results -bool ErrorCounter::AccumulateErrors(bool debug, CountTypes boosting_mode, - const FontInfoTable& font_table, - const GenericVector& results, - TrainingSample* sample) { - int num_results = results.size(); - int answer_actual_rank = -1; - int font_id = sample->font_id(); - int unichar_id = sample->class_id(); - sample->set_is_error(false); - if (num_results == 0) { - // Reject. We count rejects as a separate category, but still mark the - // sample as an error in case any training module wants to use that to - // improve the classifier. - sample->set_is_error(true); - ++font_counts_[font_id].n[CT_REJECT]; - } else { - // Find rank of correct unichar answer, using rating_epsilon_ to allow - // different answers to score as equal. (Ignoring the font.) - int epsilon_rank = 0; - int answer_epsilon_rank = -1; - int num_top_answers = 0; - double prev_rating = results[0].rating; - bool joined = false; - bool broken = false; - int res_index = 0; - while (res_index < num_results) { - if (results[res_index].rating < prev_rating - rating_epsilon_) { - ++epsilon_rank; - prev_rating = results[res_index].rating; - } - if (results[res_index].unichar_id == unichar_id && - answer_epsilon_rank < 0) { - answer_epsilon_rank = epsilon_rank; - answer_actual_rank = res_index; - } - if (results[res_index].unichar_id == UNICHAR_JOINED && - unicharset_.has_special_codes()) - joined = true; - else if (results[res_index].unichar_id == UNICHAR_BROKEN && - unicharset_.has_special_codes()) - broken = true; - else if (epsilon_rank == 0) - ++num_top_answers; - ++res_index; - } - if (answer_actual_rank != 0) { - // Correct result is not absolute top. - ++font_counts_[font_id].n[CT_UNICHAR_TOPTOP_ERR]; - if (boosting_mode == CT_UNICHAR_TOPTOP_ERR) sample->set_is_error(true); - } - if (answer_epsilon_rank == 0) { - ++font_counts_[font_id].n[CT_UNICHAR_TOP_OK]; - // Unichar OK, but count if multiple unichars. - if (num_top_answers > 1) { - ++font_counts_[font_id].n[CT_OK_MULTI_UNICHAR]; - ++multi_unichar_counts_[unichar_id]; - } - // Check to see if any font in the top choice has attributes that match. - // TODO(rays) It is easy to add counters for individual font attributes - // here if we want them. - if (font_table.SetContainsFontProperties( - font_id, results[answer_actual_rank].fonts)) { - // Font attributes were matched. - // Check for multiple properties. - if (font_table.SetContainsMultipleFontProperties( - results[answer_actual_rank].fonts)) - ++font_counts_[font_id].n[CT_OK_MULTI_FONT]; - } else { - // Font attributes weren't matched. - ++font_counts_[font_id].n[CT_FONT_ATTR_ERR]; - } - } else { - // This is a top unichar error. - ++font_counts_[font_id].n[CT_UNICHAR_TOP1_ERR]; - if (boosting_mode == CT_UNICHAR_TOP1_ERR) sample->set_is_error(true); - // Count maps from unichar id to wrong unichar id. - ++unichar_counts_(unichar_id, results[0].unichar_id); - if (answer_epsilon_rank < 0 || answer_epsilon_rank >= 2) { - // It is also a 2nd choice unichar error. - ++font_counts_[font_id].n[CT_UNICHAR_TOP2_ERR]; - if (boosting_mode == CT_UNICHAR_TOP2_ERR) sample->set_is_error(true); - } - if (answer_epsilon_rank < 0) { - // It is also a top-n choice unichar error. - ++font_counts_[font_id].n[CT_UNICHAR_TOPN_ERR]; - if (boosting_mode == CT_UNICHAR_TOPN_ERR) sample->set_is_error(true); - answer_epsilon_rank = epsilon_rank; - } - } - // Compute mean number of return values and mean rank of correct answer. - font_counts_[font_id].n[CT_NUM_RESULTS] += num_results; - font_counts_[font_id].n[CT_RANK] += answer_epsilon_rank; - if (joined) - ++font_counts_[font_id].n[CT_OK_JOINED]; - if (broken) - ++font_counts_[font_id].n[CT_OK_BROKEN]; - } - // If it was an error for boosting then sum the weight. - if (sample->is_error()) { - scaled_error_ += sample->weight(); - if (debug) { - tprintf("%d results for char %s font %d :", - num_results, unicharset_.id_to_unichar(unichar_id), - font_id); - for (int i = 0; i < num_results; ++i) { - tprintf(" %.3f : %s\n", - results[i].rating, - unicharset_.id_to_unichar(results[i].unichar_id)); - } - return true; - } - int percent = 0; - if (num_results > 0) - percent = IntCastRounded(results[0].rating * 100); - bad_score_hist_.add(percent, 1); - } else { - int percent = 0; - if (answer_actual_rank >= 0) - percent = IntCastRounded(results[answer_actual_rank].rating * 100); - ok_score_hist_.add(percent, 1); - } - return false; -} - -// Accumulates counts for junk. Counts only whether the junk was correctly -// rejected or not. -bool ErrorCounter::AccumulateJunk(bool debug, - const GenericVector& results, - TrainingSample* sample) { - // For junk we accept no answer, or an explicit shape answer matching the - // class id of the sample. - const int num_results = results.size(); - const int font_id = sample->font_id(); - const int unichar_id = sample->class_id(); - int percent = 0; - if (num_results > 0) - percent = IntCastRounded(results[0].rating * 100); - if (num_results > 0 && results[0].unichar_id != unichar_id) { - // This is a junk error. - ++font_counts_[font_id].n[CT_ACCEPTED_JUNK]; - sample->set_is_error(true); - // It counts as an error for boosting too so sum the weight. - scaled_error_ += sample->weight(); - bad_score_hist_.add(percent, 1); - return debug; - } else { - // Correctly rejected. - ++font_counts_[font_id].n[CT_REJECTED_JUNK]; - sample->set_is_error(false); - ok_score_hist_.add(percent, 1); - } - return false; -} - -// Creates a report of the error rate. The report_level controls the detail -// that is reported to stderr via tprintf: -// 0 -> no output. -// >=1 -> bottom-line error rate. -// >=3 -> font-level error rate. -// boosting_mode determines the return value. It selects which (un-weighted) -// error rate to return. -// The fontinfo_table from MasterTrainer provides the names of fonts. -// The it determines the current subset of the training samples. -// If not nullptr, the top-choice unichar error rate is saved in unichar_error. -// If not nullptr, the report string is saved in fonts_report. -// (Ignoring report_level). -double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode, - const FontInfoTable& fontinfo_table, - const SampleIterator& it, - double* unichar_error, - STRING* fonts_report) { - // Compute totals over all the fonts and report individual font results - // when required. - Counts totals; - int fontsize = font_counts_.size(); - for (int f = 0; f < fontsize; ++f) { - // Accumulate counts over fonts. - totals += font_counts_[f]; - STRING font_report; - if (ReportString(false, font_counts_[f], &font_report)) { - if (fonts_report != nullptr) { - *fonts_report += fontinfo_table.get(f).name; - *fonts_report += ": "; - *fonts_report += font_report; - *fonts_report += "\n"; - } - if (report_level > 2) { - // Report individual font error rates. - tprintf("%s: %s\n", fontinfo_table.get(f).name, font_report.string()); - } - } - } - // Report the totals. - STRING total_report; - bool any_results = ReportString(true, totals, &total_report); - if (fonts_report != nullptr && fonts_report->length() == 0) { - // Make sure we return something even if there were no samples. - *fonts_report = "NoSamplesFound: "; - *fonts_report += total_report; - *fonts_report += "\n"; - } - if (report_level > 0) { - // Report the totals. - STRING total_report; - if (any_results) { - tprintf("TOTAL Scaled Err=%.4g%%, %s\n", - scaled_error_ * 100.0, total_report.string()); - } - // Report the worst substitution error only for now. - if (totals.n[CT_UNICHAR_TOP1_ERR] > 0) { - int charsetsize = unicharset_.size(); - int worst_uni_id = 0; - int worst_result_id = 0; - int worst_err = 0; - for (int u = 0; u < charsetsize; ++u) { - for (int v = 0; v < charsetsize; ++v) { - if (unichar_counts_(u, v) > worst_err) { - worst_err = unichar_counts_(u, v); - worst_uni_id = u; - worst_result_id = v; - } - } - } - if (worst_err > 0) { - tprintf("Worst error = %d:%s -> %s with %d/%d=%.2f%% errors\n", - worst_uni_id, unicharset_.id_to_unichar(worst_uni_id), - unicharset_.id_to_unichar(worst_result_id), - worst_err, totals.n[CT_UNICHAR_TOP1_ERR], - 100.0 * worst_err / totals.n[CT_UNICHAR_TOP1_ERR]); - } - } - tprintf("Multi-unichar shape use:\n"); - for (int u = 0; u < multi_unichar_counts_.size(); ++u) { - if (multi_unichar_counts_[u] > 0) { - tprintf("%d multiple answers for unichar: %s\n", - multi_unichar_counts_[u], - unicharset_.id_to_unichar(u)); - } - } - tprintf("OK Score histogram:\n"); - ok_score_hist_.print(); - tprintf("ERROR Score histogram:\n"); - bad_score_hist_.print(); - } - - double rates[CT_SIZE]; - if (!ComputeRates(totals, rates)) - return 0.0; - // Set output values if asked for. - if (unichar_error != nullptr) - *unichar_error = rates[CT_UNICHAR_TOP1_ERR]; - return rates[boosting_mode]; -} - -// Sets the report string to a combined human and machine-readable report -// string of the error rates. -// Returns false if there is no data, leaving report unchanged, unless -// even_if_empty is true. -bool ErrorCounter::ReportString(bool even_if_empty, const Counts& counts, - STRING* report) { - // Compute the error rates. - double rates[CT_SIZE]; - if (!ComputeRates(counts, rates) && !even_if_empty) - return false; - // Using %.4g%%, the length of the output string should exactly match the - // length of the format string, but in case of overflow, allow for +eddd - // on each number. - const int kMaxExtraLength = 5; // Length of +eddd. - // Keep this format string and the snprintf in sync with the CountTypes enum. - const char* format_str = "Unichar=%.4g%%[1], %.4g%%[2], %.4g%%[n], %.4g%%[T] " - "Mult=%.4g%%, Jn=%.4g%%, Brk=%.4g%%, Rej=%.4g%%, " - "FontAttr=%.4g%%, Multi=%.4g%%, " - "Answers=%.3g, Rank=%.3g, " - "OKjunk=%.4g%%, Badjunk=%.4g%%"; - const size_t max_str_len = strlen(format_str) + kMaxExtraLength * (CT_SIZE - 1) + 1; - char* formatted_str = new char[max_str_len]; - snprintf(formatted_str, max_str_len, format_str, - rates[CT_UNICHAR_TOP1_ERR] * 100.0, - rates[CT_UNICHAR_TOP2_ERR] * 100.0, - rates[CT_UNICHAR_TOPN_ERR] * 100.0, - rates[CT_UNICHAR_TOPTOP_ERR] * 100.0, - rates[CT_OK_MULTI_UNICHAR] * 100.0, - rates[CT_OK_JOINED] * 100.0, - rates[CT_OK_BROKEN] * 100.0, - rates[CT_REJECT] * 100.0, - rates[CT_FONT_ATTR_ERR] * 100.0, - rates[CT_OK_MULTI_FONT] * 100.0, - rates[CT_NUM_RESULTS], - rates[CT_RANK], - 100.0 * rates[CT_REJECTED_JUNK], - 100.0 * rates[CT_ACCEPTED_JUNK]); - *report = formatted_str; - delete [] formatted_str; - // Now append each field of counts with a tab in front so the result can - // be loaded into a spreadsheet. - for (int ct = 0; ct < CT_SIZE; ++ct) - report->add_str_int("\t", counts.n[ct]); - return true; -} - -// Computes the error rates and returns in rates which is an array of size -// CT_SIZE. Returns false if there is no data, leaving rates unchanged. -bool ErrorCounter::ComputeRates(const Counts& counts, double rates[CT_SIZE]) { - const int ok_samples = counts.n[CT_UNICHAR_TOP_OK] + counts.n[CT_UNICHAR_TOP1_ERR] + - counts.n[CT_REJECT]; - const int junk_samples = counts.n[CT_REJECTED_JUNK] + counts.n[CT_ACCEPTED_JUNK]; - // Compute rates for normal chars. - double denominator = static_cast(std::max(ok_samples, 1)); - for (int ct = 0; ct <= CT_RANK; ++ct) - rates[ct] = counts.n[ct] / denominator; - // Compute rates for junk. - denominator = static_cast(std::max(junk_samples, 1)); - for (int ct = CT_REJECTED_JUNK; ct <= CT_ACCEPTED_JUNK; ++ct) - rates[ct] = counts.n[ct] / denominator; - return ok_samples != 0 || junk_samples != 0; -} - -ErrorCounter::Counts::Counts() { - memset(n, 0, sizeof(n[0]) * CT_SIZE); -} -// Adds other into this for computing totals. -void ErrorCounter::Counts::operator+=(const Counts& other) { - for (int ct = 0; ct < CT_SIZE; ++ct) - n[ct] += other.n[ct]; -} - - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/errorcounter.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/errorcounter.h deleted file mode 100644 index a2d3d6ae..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/errorcounter.h +++ /dev/null @@ -1,227 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef THIRD_PARTY_TESSERACT_CLASSIFY_ERRORCOUNTER_H_ -#define THIRD_PARTY_TESSERACT_CLASSIFY_ERRORCOUNTER_H_ - -#include "genericvector.h" -#include "matrix.h" -#include "statistc.h" - -struct Pix; -template class UnicityTable; - -namespace tesseract { - -struct FontInfo; -class FontInfoTable; -class SampleIterator; -class ShapeClassifier; -class TrainingSample; -struct UnicharRating; - -// Enumeration of the different types of error count. -// Error counts work as follows: -// -// Ground truth is a valid unichar-id / font-id pair: -// Number of classifier answers? -// 0 >0 -// CT_REJECT unichar-id matches top shape? -// __________ yes! no -// CT_UNICHAR_TOP_OK CT_UNICHAR_TOP1_ERR -// Top shape-id has multiple unichars? 2nd shape unichar id matches? -// yes! no yes! no -// CT_OK_MULTI_UNICHAR | _____ CT_UNICHAR_TOP2_ERR -// Font attributes match? Any unichar-id matches? -// yes! no yes! no -// CT_FONT_ATTR_OK CT_FONT_ATTR_ERR ______ CT_UNICHAR_TOPN_ERR -// | __________________ _________________ -// Top shape-id has multiple font attrs? -// yes! no -// CT_OK_MULTI_FONT -// _____________________________ -// -// Note that multiple counts may be activated for a single sample! -// -// Ground truth is for a fragment/n-gram that is NOT in the unicharset. -// This is called junk and is expected to be rejected: -// Number of classifier answers? -// 0 >0 -// CT_REJECTED_JUNK CT_ACCEPTED_JUNK -// -// Also, CT_NUM_RESULTS stores the mean number of results, and CT_RANK stores -// the mean rank of the correct result, counting from 0, and with an error -// receiving the number of answers as the correct rank. -// -// Keep in sync with the ReportString function. -enum CountTypes { - CT_UNICHAR_TOP_OK, // Top shape contains correct unichar id. - // The rank of the results in TOP1, TOP2, TOPN is determined by a gap of - // kRatingEpsilon from the first result in each group. The real top choice - // is measured using TOPTOP. - CT_UNICHAR_TOP1_ERR, // Top shape does not contain correct unichar id. - CT_UNICHAR_TOP2_ERR, // Top 2 shapes don't contain correct unichar id. - CT_UNICHAR_TOPN_ERR, // No output shape contains correct unichar id. - CT_UNICHAR_TOPTOP_ERR, // Very top choice not correct. - CT_OK_MULTI_UNICHAR, // Top shape id has correct unichar id, and others. - CT_OK_JOINED, // Top shape id is correct but marked joined. - CT_OK_BROKEN, // Top shape id is correct but marked broken. - CT_REJECT, // Classifier hates this. - CT_FONT_ATTR_ERR, // Top unichar OK, but font attributes incorrect. - CT_OK_MULTI_FONT, // CT_FONT_ATTR_OK but there are multiple font attrs. - CT_NUM_RESULTS, // Number of answers produced. - CT_RANK, // Rank of correct answer. - CT_REJECTED_JUNK, // Junk that was correctly rejected. - CT_ACCEPTED_JUNK, // Junk that was incorrectly classified otherwise. - - CT_SIZE // Number of types for array sizing. -}; - -// Class to encapsulate all the functionality and sub-structures required -// to count errors for an isolated character classifier (ShapeClassifier). -class ErrorCounter { - public: - // Computes and returns the unweighted boosting_mode error rate of the given - // classifier. Can be used for testing, or inside an iterative training - // system, including one that uses boosting. - // report_levels: - // 0 = no output. - // 1 = bottom-line error rate. - // 2 = bottom-line error rate + time. - // 3 = font-level error rate + time. - // 4 = list of all errors + short classifier debug output on 16 errors. - // 5 = list of all errors + short classifier debug output on 25 errors. - // * The boosting_mode determines which error type is used for computing the - // scaled_error output, and setting the is_error flag in the samples. - // * The fontinfo_table is used to get string font names for the debug - // output, and also to count font attributes errors. - // * The page_images vector may contain a Pix* (which may be nullptr) for each - // page index assigned to the samples. - // * The it provides encapsulated iteration over some sample set. - // * The outputs unichar_error, scaled_error and totals_report are all - // optional. - // * If not nullptr, unichar error gets the top1 unichar error rate. - // * Scaled_error gets the error chosen by boosting_mode weighted by the - // weights on the samples. - // * Fonts_report gets a string summarizing the error rates for each font in - // both human-readable form and as a tab-separated list of error counts. - // The human-readable form is all before the first tab. - // * The return value is the un-weighted version of the scaled_error. - static double ComputeErrorRate(ShapeClassifier* classifier, - int report_level, CountTypes boosting_mode, - const FontInfoTable& fontinfo_table, - const GenericVector& page_images, - SampleIterator* it, - double* unichar_error, - double* scaled_error, - STRING* fonts_report); - // Tests a pair of classifiers, debugging errors of the new against the old. - // See errorcounter.h for description of arguments. - // Iterates over the samples, calling the classifiers in normal/silent mode. - // If the new_classifier makes a boosting_mode error that the old_classifier - // does not, and the appropriate, it will then call the new_classifier again - // with a debug flag and a keep_this argument to find out what is going on. - static void DebugNewErrors(ShapeClassifier* new_classifier, - ShapeClassifier* old_classifier, - CountTypes boosting_mode, - const FontInfoTable& fontinfo_table, - const GenericVector& page_images, - SampleIterator* it); - - private: - // Simple struct to hold an array of counts. - struct Counts { - Counts(); - // Adds other into this for computing totals. - void operator+=(const Counts& other); - - int n[CT_SIZE]; - }; - - // Constructor is private. Only anticipated use of ErrorCounter is via - // the static ComputeErrorRate. - ErrorCounter(const UNICHARSET& unicharset, int fontsize); - ~ErrorCounter() = default; - - // Accumulates the errors from the classifier results on a single sample. - // Returns true if debug is true and a CT_UNICHAR_TOPN_ERR error occurred. - // boosting_mode selects the type of error to be used for boosting and the - // is_error_ member of sample is set according to whether the required type - // of error occurred. The font_table provides access to font properties - // for error counting and shape_table is used to understand the relationship - // between unichar_ids and shape_ids in the results - bool AccumulateErrors(bool debug, CountTypes boosting_mode, - const FontInfoTable& font_table, - const GenericVector& results, - TrainingSample* sample); - - // Accumulates counts for junk. Counts only whether the junk was correctly - // rejected or not. - bool AccumulateJunk(bool debug, const GenericVector& results, - TrainingSample* sample); - - // Creates a report of the error rate. The report_level controls the detail - // that is reported to stderr via tprintf: - // 0 -> no output. - // >=1 -> bottom-line error rate. - // >=3 -> font-level error rate. - // boosting_mode determines the return value. It selects which (un-weighted) - // error rate to return. - // The fontinfo_table from MasterTrainer provides the names of fonts. - // The it determines the current subset of the training samples. - // If not nullptr, the top-choice unichar error rate is saved in unichar_error. - // If not nullptr, the report string is saved in fonts_report. - // (Ignoring report_level). - double ReportErrors(int report_level, CountTypes boosting_mode, - const FontInfoTable& fontinfo_table, - const SampleIterator& it, - double* unichar_error, - STRING* fonts_report); - - // Sets the report string to a combined human and machine-readable report - // string of the error rates. - // Returns false if there is no data, leaving report unchanged, unless - // even_if_empty is true. - static bool ReportString(bool even_if_empty, const Counts& counts, - STRING* report); - - // Computes the error rates and returns in rates which is an array of size - // CT_SIZE. Returns false if there is no data, leaving rates unchanged. - static bool ComputeRates(const Counts& counts, double rates[CT_SIZE]); - - - // Total scaled error used by boosting algorithms. - double scaled_error_; - // Difference in result rating to be thought of as an "equal" choice. - double rating_epsilon_; - // Vector indexed by font_id from the samples of error accumulators. - GenericVector font_counts_; - // Counts of the results that map each unichar_id (from samples) to an - // incorrect shape_id. - GENERIC_2D_ARRAY unichar_counts_; - // Count of the number of times each shape_id occurs, is correct, and multi- - // unichar. - GenericVector multi_unichar_counts_; - // Histogram of scores (as percent) for correct answers. - STATS ok_score_hist_; - // Histogram of scores (as percent) for incorrect answers. - STATS bad_score_hist_; - // Unicharset for printing character ids in results. - const UNICHARSET& unicharset_; -}; - -} // namespace tesseract. - -#endif /* THIRD_PARTY_TESSERACT_CLASSIFY_ERRORCOUNTER_H_ */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/featdefs.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/featdefs.cpp deleted file mode 100644 index dc4e3735..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/featdefs.cpp +++ /dev/null @@ -1,277 +0,0 @@ -/****************************************************************************** - ** Filename: featdefs.cpp - ** Purpose: Definitions of currently defined feature types. - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -/*----------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ -#include "featdefs.h" -#include "emalloc.h" -#include "scanutils.h" - -#include -#include - -#define PICO_FEATURE_LENGTH 0.05 - -/*----------------------------------------------------------------------------- - Global Data Definitions and Declarations ------------------------------------------------------------------------------*/ -const char* kMicroFeatureType = "mf"; -const char* kCNFeatureType = "cn"; -const char* kIntFeatureType = "if"; -const char* kGeoFeatureType = "tb"; - -// Define all of the parameters for the MicroFeature type. -StartParamDesc(MicroFeatureParams) -DefineParam(0, 0, -0.5, 0.5) -DefineParam(0, 0, -0.25, 0.75) -DefineParam(0, 1, 0.0, 1.0) -DefineParam(1, 0, 0.0, 1.0) -DefineParam (0, 1, -0.5, 0.5) -DefineParam (0, 1, -0.5, 0.5) -EndParamDesc -// Now define the feature type itself (see features.h for parameters). -DefineFeature(MicroFeatureDesc, 5, 1, kMicroFeatureType, MicroFeatureParams) - -// Define all of the parameters for the NormFeat type. -StartParamDesc (CharNormParams) -DefineParam(0, 0, -0.25, 0.75) -DefineParam(0, 1, 0.0, 1.0) -DefineParam(0, 0, 0.0, 1.0) -DefineParam(0, 0, 0.0, 1.0) -EndParamDesc -// Now define the feature type itself (see features.h for parameters). -DefineFeature(CharNormDesc, 4, 0, kCNFeatureType, CharNormParams) - -// Define all of the parameters for the IntFeature type -StartParamDesc(IntFeatParams) -DefineParam(0, 0, 0.0, 255.0) -DefineParam(0, 0, 0.0, 255.0) -DefineParam(1, 0, 0.0, 255.0) -EndParamDesc -// Now define the feature type itself (see features.h for parameters). -DefineFeature(IntFeatDesc, 2, 1, kIntFeatureType, IntFeatParams) - -// Define all of the parameters for the GeoFeature type -StartParamDesc(GeoFeatParams) -DefineParam(0, 0, 0.0, 255.0) -DefineParam(0, 0, 0.0, 255.0) -DefineParam(0, 0, 0.0, 255.0) -EndParamDesc -// Now define the feature type itself (see features.h for parameters). -DefineFeature(GeoFeatDesc, 3, 0, kGeoFeatureType, GeoFeatParams) - -// Other features used for training the adaptive classifier, but not used -// during normal training, therefore not in the DescDefs array. - -// Define all of the parameters for the PicoFeature type -// define knob that can be used to adjust pico-feature length. -float PicoFeatureLength = PICO_FEATURE_LENGTH; -StartParamDesc(PicoFeatParams) -DefineParam(0, 0, -0.25, 0.75) -DefineParam(1, 0, 0.0, 1.0) -DefineParam(0, 0, -0.5, 0.5) -EndParamDesc -// Now define the feature type itself (see features.h for parameters). -DefineFeature(PicoFeatDesc, 2, 1, "pf", PicoFeatParams) - -// Define all of the parameters for the OutlineFeature type. -StartParamDesc(OutlineFeatParams) -DefineParam(0, 0, -0.5, 0.5) -DefineParam(0, 0, -0.25, 0.75) -DefineParam(0, 0, 0.0, 1.0) -DefineParam(1, 0, 0.0, 1.0) -EndParamDesc -// Now define the feature type itself (see features.h for parameters). -DefineFeature(OutlineFeatDesc, 3, 1, "of", OutlineFeatParams) - -// MUST be kept in-sync with ExtractorDefs in fxdefs.cpp. -static const FEATURE_DESC_STRUCT *DescDefs[NUM_FEATURE_TYPES] = { - &MicroFeatureDesc, - &CharNormDesc, - &IntFeatDesc, - &GeoFeatDesc -}; - -/*----------------------------------------------------------------------------- - Public Code ------------------------------------------------------------------------------*/ -void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs) { - featuredefs->NumFeatureTypes = NUM_FEATURE_TYPES; - for (int i = 0; i < NUM_FEATURE_TYPES; ++i) { - featuredefs->FeatureDesc[i] = DescDefs[i]; - } -} - -/*---------------------------------------------------------------------------*/ -/** - * Release the memory consumed by the specified character - * description and all of the features in that description. - * - * @param CharDesc character description to be deallocated - * - * Globals: - * - none - */ -void FreeCharDescription(CHAR_DESC CharDesc) { - if (CharDesc) { - for (size_t i = 0; i < CharDesc->NumFeatureSets; i++) - FreeFeatureSet (CharDesc->FeatureSets[i]); - Efree(CharDesc); - } -} /* FreeCharDescription */ - - -/*---------------------------------------------------------------------------*/ -/** - * Allocate a new character description, initialize its - * feature sets to be empty, and return it. - * - * Globals: - * - none - * - * @return New character description structure. - */ -CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs) { - CHAR_DESC CharDesc; - CharDesc = (CHAR_DESC) Emalloc (sizeof (CHAR_DESC_STRUCT)); - CharDesc->NumFeatureSets = FeatureDefs.NumFeatureTypes; - - for (size_t i = 0; i < CharDesc->NumFeatureSets; i++) - CharDesc->FeatureSets[i] = nullptr; - - return (CharDesc); -} /* NewCharDescription */ - -/*---------------------------------------------------------------------------*/ -/** - * Appends a textual representation of CharDesc to str. - * The format used is to write out the number of feature - * sets which will be written followed by a representation of - * each feature set. - * - * Each set starts with the short name for that feature followed - * by a description of the feature set. Feature sets which are - * not present are not written. - * - * @param FeatureDefs definitions of feature types/extractors - * @param str string to append CharDesc to - * @param CharDesc character description to write to File - */ -void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs, - CHAR_DESC CharDesc, STRING* str) { - int NumSetsToWrite = 0; - - for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) - if (CharDesc->FeatureSets[Type]) - NumSetsToWrite++; - - str->add_str_int(" ", NumSetsToWrite); - *str += "\n"; - for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) { - if (CharDesc->FeatureSets[Type]) { - *str += FeatureDefs.FeatureDesc[Type]->ShortName; - *str += " "; - WriteFeatureSet(CharDesc->FeatureSets[Type], str); - } - } -} /* WriteCharDescription */ - -// Return whether all of the fields of the given feature set -// are well defined (not inf or nan). -bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, - CHAR_DESC CharDesc) { - bool anything_written = false; - bool well_formed = true; - for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) { - if (CharDesc->FeatureSets[Type]) { - for (int i = 0; i < CharDesc->FeatureSets[Type]->NumFeatures; i++) { - FEATURE feat = CharDesc->FeatureSets[Type]->Features[i]; - for (int p = 0; p < feat->Type->NumParams; p++) { - if (std::isnan(feat->Params[p]) || std::isinf(feat->Params[p])) - well_formed = false; - else - anything_written = true; - } - } - } else { - return false; - } - } - return anything_written && well_formed; -} /* ValidCharDescription */ - -/*---------------------------------------------------------------------------*/ -/** - * Read a character description from File, and return - * a data structure containing this information. The data - * is formatted as follows: - * @verbatim - NumberOfSets - ShortNameForSet1 Set1 - ShortNameForSet2 Set2 - ... - @endverbatim - * - * Globals: - * - none - * - * @param FeatureDefs definitions of feature types/extractors - * @param File open text file to read character description from - * @return Character description read from File. - */ -CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, - FILE *File) { - int NumSetsToRead; - char ShortName[FEAT_NAME_SIZE]; - CHAR_DESC CharDesc; - int Type; - - ASSERT_HOST(tfscanf(File, "%d", &NumSetsToRead) == 1); - ASSERT_HOST(NumSetsToRead >= 0); - ASSERT_HOST(NumSetsToRead <= FeatureDefs.NumFeatureTypes); - - CharDesc = NewCharDescription(FeatureDefs); - for (; NumSetsToRead > 0; NumSetsToRead--) { - tfscanf(File, "%s", ShortName); - Type = ShortNameToFeatureType(FeatureDefs, ShortName); - CharDesc->FeatureSets[Type] = - ReadFeatureSet (File, FeatureDefs.FeatureDesc[Type]); - } - return CharDesc; -} - -/*---------------------------------------------------------------------------*/ -/** - * Search through all features currently defined and return - * the feature type for the feature with the specified short - * name. Trap an error if the specified name is not found. - * - * Globals: - * - none - * - * @param FeatureDefs definitions of feature types/extractors - * @param ShortName short name of a feature type - * @return Feature type which corresponds to ShortName. - */ -uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, - const char *ShortName) { - for (int i = 0; i < FeatureDefs.NumFeatureTypes; i++) - if (!strcmp ((FeatureDefs.FeatureDesc[i]->ShortName), ShortName)) - return static_cast(i); - ASSERT_HOST(!"Illegal short name for a feature"); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/featdefs.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/featdefs.h deleted file mode 100644 index 6996898c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/featdefs.h +++ /dev/null @@ -1,83 +0,0 @@ -/****************************************************************************** - ** Filename: featdefs.h - ** Purpose: Definitions of currently defined feature types. - ** Author: Dan Johnson - ** History: Mon May 21 08:28:01 1990, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef FEATDEFS_H -#define FEATDEFS_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "ocrfeatures.h" - -/* Enumerate the different types of features currently defined. */ -#define NUM_FEATURE_TYPES 4 -extern const char* kMicroFeatureType; -extern const char* kCNFeatureType; -extern const char* kIntFeatureType; -extern const char* kGeoFeatureType; - -/* A character is described by multiple sets of extracted features. Each - set contains a number of features of a particular type, for example, a - set of bays, or a set of closures, or a set of microfeatures. Each - feature consists of a number of parameters. All features within a - feature set contain the same number of parameters.*/ - -struct CHAR_DESC_STRUCT { - uint32_t NumFeatureSets; - FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]; -}; -using CHAR_DESC = CHAR_DESC_STRUCT *; - -struct FEATURE_DEFS_STRUCT { - int32_t NumFeatureTypes; - const FEATURE_DESC_STRUCT* FeatureDesc[NUM_FEATURE_TYPES]; - int FeatureEnabled[NUM_FEATURE_TYPES]; -}; -using FEATURE_DEFS = FEATURE_DEFS_STRUCT *; - -/*---------------------------------------------------------------------- - Generic functions for manipulating character descriptions -----------------------------------------------------------------------*/ -void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs); - -void FreeCharDescription(CHAR_DESC CharDesc); - -CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs); - -bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, - CHAR_DESC CharDesc); - -void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs, - CHAR_DESC CharDesc, STRING* str); - -CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, - FILE *File); - -uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, - const char *ShortName); - -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ -extern const FEATURE_DESC_STRUCT MicroFeatureDesc; -extern TESS_API const FEATURE_DESC_STRUCT PicoFeatDesc; -extern const FEATURE_DESC_STRUCT CharNormDesc; -extern const FEATURE_DESC_STRUCT OutlineFeatDesc; -extern const FEATURE_DESC_STRUCT IntFeatDesc; -extern const FEATURE_DESC_STRUCT GeoFeatDesc; -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/float2int.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/float2int.cpp deleted file mode 100644 index 6d4f6ac9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/float2int.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/****************************************************************************** - ** Filename: float2int.cpp - ** Purpose: Routines for converting float features to int features - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -/*----------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ -#include "float2int.h" -#include "normmatch.h" -#include "mfoutline.h" -#include "classify.h" -#include "helpers.h" -#include "picofeat.h" - -#define MAX_INT_CHAR_NORM (INT_CHAR_NORM_RANGE - 1) - -/*----------------------------------------------------------------------------- - Public Code ------------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -namespace tesseract { - -/** - * For each class in the unicharset, clears the corresponding - * entry in char_norm_array. char_norm_array is indexed by unichar_id. - * - * Globals: - * - none - * - * @param char_norm_array array to be cleared - */ -void Classify::ClearCharNormArray(uint8_t* char_norm_array) { - memset(char_norm_array, 0, sizeof(*char_norm_array) * unicharset.size()); -} /* ClearCharNormArray */ - - -/*---------------------------------------------------------------------------*/ -/** - * For each class in unicharset, computes the match between - * norm_feature and the normalization protos for that class. - * Converts this number to the range from 0 - 255 and stores it - * into char_norm_array. CharNormArray is indexed by unichar_id. - * - * Globals: - * - PreTrainedTemplates current set of built-in templates - * - * @param norm_feature character normalization feature - * @param[out] char_norm_array place to put results of size unicharset.size() - */ -void Classify::ComputeIntCharNormArray(const FEATURE_STRUCT& norm_feature, - uint8_t* char_norm_array) { - for (int i = 0; i < unicharset.size(); i++) { - if (i < PreTrainedTemplates->NumClasses) { - int norm_adjust = static_cast(INT_CHAR_NORM_RANGE * - ComputeNormMatch(i, norm_feature, false)); - char_norm_array[i] = ClipToRange(norm_adjust, 0, MAX_INT_CHAR_NORM); - } else { - // Classes with no templates (eg. ambigs & ligatures) default - // to worst match. - char_norm_array[i] = MAX_INT_CHAR_NORM; - } - } -} /* ComputeIntCharNormArray */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine converts each floating point pico-feature - * in Features into integer format and saves it into - * IntFeatures. - * - * Globals: - * - none - * - * @param Features floating point pico-features to be converted - * @param[out] IntFeatures array to put converted features into - */ -void Classify::ComputeIntFeatures(FEATURE_SET Features, - INT_FEATURE_ARRAY IntFeatures) { - float YShift; - - if (classify_norm_method == baseline) - YShift = BASELINE_Y_SHIFT; - else - YShift = Y_SHIFT; - - for (int Fid = 0; Fid < Features->NumFeatures; Fid++) { - FEATURE Feature = Features->Features[Fid]; - - IntFeatures[Fid].X = - Bucket8For(Feature->Params[PicoFeatX], X_SHIFT, INT_FEAT_RANGE); - IntFeatures[Fid].Y = - Bucket8For(Feature->Params[PicoFeatY], YShift, INT_FEAT_RANGE); - IntFeatures[Fid].Theta = CircBucketFor(Feature->Params[PicoFeatDir], - ANGLE_SHIFT, INT_FEAT_RANGE); - IntFeatures[Fid].CP_misses = 0; - } -} /* ComputeIntFeatures */ -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/float2int.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/float2int.h deleted file mode 100644 index 70a05ab6..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/float2int.h +++ /dev/null @@ -1,30 +0,0 @@ -/****************************************************************************** - ** Filename: float2int.h - ** Purpose: Routines for converting float features to int features - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef FLOAT2INT_H -#define FLOAT2INT_H - -/*----------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ -#include "intmatcher.h" -#include "ocrfeatures.h" - -#define INT_FEAT_RANGE 256 -#define BASELINE_Y_SHIFT (0.25) - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/fpoint.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/fpoint.cpp deleted file mode 100644 index e13231e5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/fpoint.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/****************************************************************************** - ** Filename: fpoint.cpp - ** Purpose: Abstract data type for a 2D point (floating point coords) - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "fpoint.h" -#include -#include - -/*---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------*/ - -float DistanceBetween(FPOINT A, FPOINT B) { - const double xd = XDelta(A, B); - const double yd = YDelta(A, B); - return sqrt(static_cast(xd * xd + yd * yd)); -} - -/** - * Return the angle from Point1 to Point2 normalized to - * lie in the range 0 to FullScale (where FullScale corresponds - * to 2*pi or 360 degrees). - * @param Point1 points to compute angle between - * @param Point2 points to compute angle between - * @param FullScale value to associate with 2*pi - * @return none - * @note Globals: none - */ -float NormalizedAngleFrom(FPOINT *Point1, FPOINT *Point2, float FullScale) { - float NumRadsInCircle = 2.0 * M_PI; - - float Angle = AngleFrom (*Point1, *Point2); - if (Angle < 0.0) - Angle += NumRadsInCircle; - Angle *= FullScale / NumRadsInCircle; - if (Angle < 0.0 || Angle >= FullScale) - Angle = 0.0; - return (Angle); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/fpoint.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/fpoint.h deleted file mode 100644 index bce12e65..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/fpoint.h +++ /dev/null @@ -1,54 +0,0 @@ -/****************************************************************************** - ** Filename: fpoint.h - ** Purpose: Abstract data type for 2D points (floating point coords) - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef FPOINT_H -#define FPOINT_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include -#include -#include "host.h" - -/* define data structure to hold 2D points or vectors using floating point */ -typedef struct { - float x, y; -} FPOINT; -using FVECTOR = FPOINT; - -/**---------------------------------------------------------------------------- - Macros -----------------------------------------------------------------------------**/ -/* macros for computing miscellaneous functions of 2 points */ -#define XDelta(A, B) ((B).x - (A).x) -#define YDelta(A, B) ((B).y - (A).y) -#define SlopeFrom(A, B) (YDelta(A, B) / XDelta(A, B)) -#define AngleFrom(A, B) (atan2((double)YDelta(A, B), (double)XDelta(A, B))) - -#define XIntersectionOf(A, B, X) (SlopeFrom(A, B) * ((X)-A.x) + A.y) - -/*------------------------------------------------------------------------- - Public Function Prototypes ----------------------------------------------------------------------------*/ - -float DistanceBetween(FPOINT A, FPOINT B); - -float NormalizedAngleFrom(FPOINT* Point1, FPOINT* Point2, float FullScale); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturedist.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturedist.cpp deleted file mode 100644 index 53deef1b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturedist.cpp +++ /dev/null @@ -1,159 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: intfeaturedist.cpp -// Description: Fast set-difference-based feature distance calculator. -// Created: Thu Sep 01 13:07:30 PDT 2011 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "intfeaturedist.h" -#include "intfeaturemap.h" - -namespace tesseract { - -IntFeatureDist::IntFeatureDist() - : size_(0), total_feature_weight_(0.0), - feature_map_(nullptr), features_(nullptr), - features_delta_one_(nullptr), features_delta_two_(nullptr) { -} - -IntFeatureDist::~IntFeatureDist() { - Clear(); -} - -// Initialize the table to the given size of feature space. -void IntFeatureDist::Init(const IntFeatureMap* feature_map) { - size_ = feature_map->sparse_size(); - Clear(); - feature_map_ = feature_map; - features_ = new bool[size_]; - features_delta_one_ = new bool[size_]; - features_delta_two_ = new bool[size_]; - memset(features_, false, size_ * sizeof(features_[0])); - memset(features_delta_one_, false, size_ * sizeof(features_delta_one_[0])); - memset(features_delta_two_, false, size_ * sizeof(features_delta_two_[0])); - total_feature_weight_ = 0.0; -} - -// Setup the map for the given indexed_features that have been indexed by -// feature_map. -void IntFeatureDist::Set(const GenericVector& indexed_features, - int canonical_count, bool value) { - total_feature_weight_ = canonical_count; - for (int i = 0; i < indexed_features.size(); ++i) { - const int f = indexed_features[i]; - features_[f] = value; - for (int dir = -kNumOffsetMaps; dir <= kNumOffsetMaps; ++dir) { - if (dir == 0) continue; - const int mapped_f = feature_map_->OffsetFeature(f, dir); - if (mapped_f >= 0) { - features_delta_one_[mapped_f] = value; - for (int dir2 = -kNumOffsetMaps; dir2 <= kNumOffsetMaps; ++dir2) { - if (dir2 == 0) continue; - const int mapped_f2 = feature_map_->OffsetFeature(mapped_f, dir2); - if (mapped_f2 >= 0) - features_delta_two_[mapped_f2] = value; - } - } - } - } -} - -// Compute the distance between the given feature vector and the last -// Set feature vector. -double IntFeatureDist::FeatureDistance( - const GenericVector& features) const { - const int num_test_features = features.size(); - const double denominator = total_feature_weight_ + num_test_features; - double misses = denominator; - for (int i = 0; i < num_test_features; ++i) { - const int index = features[i]; - const double weight = 1.0; - if (features_[index]) { - // A perfect match. - misses -= 2.0 * weight; - } else if (features_delta_one_[index]) { - misses -= 1.5 * weight; - } else if (features_delta_two_[index]) { - // A near miss. - misses -= 1.0 * weight; - } - } - return misses / denominator; -} - -// Compute the distance between the given feature vector and the last -// Set feature vector. -double IntFeatureDist::DebugFeatureDistance( - const GenericVector& features) const { - const int num_test_features = features.size(); - const double denominator = total_feature_weight_ + num_test_features; - double misses = denominator; - for (int i = 0; i < num_test_features; ++i) { - const int index = features[i]; - const double weight = 1.0; - INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(features[i]); - tprintf("Testing feature weight %g:", weight); - f.print(); - if (features_[index]) { - // A perfect match. - misses -= 2.0 * weight; - tprintf("Perfect hit\n"); - } else if (features_delta_one_[index]) { - misses -= 1.5 * weight; - tprintf("-1 hit\n"); - } else if (features_delta_two_[index]) { - // A near miss. - misses -= 1.0 * weight; - tprintf("-2 hit\n"); - } else { - tprintf("Total miss\n"); - } - } - tprintf("Features present:"); - for (int i = 0; i < size_; ++i) { - if (features_[i]) { - INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i); - f.print(); - } - } - tprintf("\nMinus one features:"); - for (int i = 0; i < size_; ++i) { - if (features_delta_one_[i]) { - INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i); - f.print(); - } - } - tprintf("\nMinus two features:"); - for (int i = 0; i < size_; ++i) { - if (features_delta_two_[i]) { - INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i); - f.print(); - } - } - tprintf("\n"); - return misses / denominator; -} - -// Clear all data. -void IntFeatureDist::Clear() { - delete [] features_; - features_ = nullptr; - delete [] features_delta_one_; - features_delta_one_ = nullptr; - delete [] features_delta_two_; - features_delta_two_ = nullptr; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturedist.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturedist.h deleted file mode 100644 index 8cfcb3be..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturedist.h +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: intfeaturedist.h -// Description: Fast set-difference-based feature distance calculator. -// Created: Thu Sep 01 12:14:30 PDT 2011 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CLASSIFY_INTFEATUREDIST_H_ -#define TESSERACT_CLASSIFY_INTFEATUREDIST_H_ - -#include "genericvector.h" - -namespace tesseract { - -class IntFeatureMap; - -// Feature distance calculator designed to provide a fast distance calculation -// based on set difference between a given feature set and many other feature -// sets in turn. -// Representation of a feature set as an array of bools that are sparsely -// true, and companion arrays that allow fast feature set distance -// calculations with allowance of offsets in position. -// Init is expensive, so for greatest efficiency, to re-initialize for a new -// feature set, use Set(..., false) on the SAME feature set as was used to -// setup with Set(..., true), to return to its initialized state before -// reuse with Set(..., true) on a new feature set. -class IntFeatureDist { - public: - IntFeatureDist(); - ~IntFeatureDist(); - - // Initialize the bool array to the given size of feature space. - // The feature_map is just borrowed, and must exist for the entire - // lifetime of the IntFeatureDist. - void Init(const IntFeatureMap* feature_map); - - // Setup the map for the given indexed_features that have been indexed by - // feature_map. After use, use Set(..., false) to reset to the initial state - // as this is faster than calling Init for sparse spaces. - void Set(const GenericVector& indexed_features, - int canonical_count, bool value); - - // Compute the distance between the given feature vector and the last - // Set feature vector. - double FeatureDistance(const GenericVector& features) const; - double DebugFeatureDistance(const GenericVector& features) const; - - private: - // Clear all data. - void Clear(); - - // Size of the indexed feature space. - int size_; - // Total weight of features currently stored in the maps. - double total_feature_weight_; - // Pointer to IntFeatureMap given at Init to find offset features. - const IntFeatureMap* feature_map_; - // Array of bools indicating presence of a feature. - bool* features_; - // Array indicating the presence of a feature offset by one unit. - bool* features_delta_one_; - // Array indicating the presence of a feature offset by two units. - bool* features_delta_two_; -}; - -} // namespace tesseract - -#endif // TESSERACT_CLASSIFY_INTFEATUREDIST_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturemap.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturemap.cpp deleted file mode 100644 index 53829c87..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturemap.cpp +++ /dev/null @@ -1,244 +0,0 @@ -// Copyright 2010 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: intfeaturemap.cpp -// Description: Encapsulation of IntFeatureSpace with IndexMapBiDi -// to provide a subspace mapping and fast feature lookup. -// Created: Tue Oct 26 08:58:30 PDT 2010 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "intfeaturemap.h" - -#include "intfeaturespace.h" -#include "intfx.h" -// These includes do not exist yet, but will be coming soon. -//#include "sampleiterator.h" -//#include "trainingsample.h" -//#include "trainingsampleset.h" - -namespace tesseract { - -const int kMaxOffsetDist = 32; - -IntFeatureMap::IntFeatureMap() - : mapping_changed_(true), compact_size_(0) { - for (int dir = 0; dir < kNumOffsetMaps; ++dir) { - offset_plus_[dir] = nullptr; - offset_minus_[dir] = nullptr; - } -} - -IntFeatureMap::~IntFeatureMap() { - Clear(); -} - -// Pseudo-accessors. -int IntFeatureMap::IndexFeature(const INT_FEATURE_STRUCT& f) const { - return feature_space_.Index(f); -} -int IntFeatureMap::MapFeature(const INT_FEATURE_STRUCT& f) const { - return feature_map_.SparseToCompact(feature_space_.Index(f)); -} -int IntFeatureMap::MapIndexFeature(int index_feature) const { - return feature_map_.SparseToCompact(index_feature); -} -INT_FEATURE_STRUCT IntFeatureMap::InverseIndexFeature(int index_feature) const { - return feature_space_.PositionFromIndex(index_feature); -} -INT_FEATURE_STRUCT IntFeatureMap::InverseMapFeature(int map_feature) const { - int index = feature_map_.CompactToSparse(map_feature); - return feature_space_.PositionFromIndex(index); -} -void IntFeatureMap::DeleteMapFeature(int map_feature) { - feature_map_.Merge(-1, map_feature); - mapping_changed_ = true; -} -bool IntFeatureMap::IsMapFeatureDeleted(int map_feature) const { - return feature_map_.IsCompactDeleted(map_feature); -} - -// Copies the given feature_space and uses it as the index feature map -// from INT_FEATURE_STRUCT. -void IntFeatureMap::Init(const IntFeatureSpace& feature_space) { - feature_space_ = feature_space; - mapping_changed_ = false; - int sparse_size = feature_space_.Size(); - feature_map_.Init(sparse_size, true); - feature_map_.Setup(); - compact_size_ = feature_map_.CompactSize(); - // Initialize look-up tables if needed. - FCOORD dir = FeatureDirection(0); - if (dir.x() == 0.0f && dir.y() == 0.0f) - InitIntegerFX(); - // Compute look-up tables to generate offset features. - for (int dir = 0; dir < kNumOffsetMaps; ++dir) { - delete [] offset_plus_[dir]; - delete [] offset_minus_[dir]; - offset_plus_[dir] = new int[sparse_size]; - offset_minus_[dir] = new int[sparse_size]; - } - for (int dir = 1; dir <= kNumOffsetMaps; ++dir) { - for (int i = 0; i < sparse_size; ++i) { - int offset_index = ComputeOffsetFeature(i, dir); - offset_plus_[dir - 1][i] = offset_index; - offset_index = ComputeOffsetFeature(i, -dir); - offset_minus_[dir - 1][i] = offset_index; - } - } -} - -// Helper to return an offset index feature. In this context an offset -// feature with a dir of +/-1 is a feature of a similar direction, -// but shifted perpendicular to the direction of the feature. An offset -// feature with a dir of +/-2 is feature at the same position, but rotated -// by +/- one [compact] quantum. Returns the index of the generated offset -// feature, or -1 if it doesn't exist. Dir should be in -// [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction. -// A dir of 0 is an identity transformation. -// Both input and output are from the index(sparse) feature space, not -// the mapped/compact feature space, but the offset feature is the minimum -// distance moved from the input to guarantee that it maps to the next -// available quantum in the mapped/compact space. -int IntFeatureMap::OffsetFeature(int index_feature, int dir) const { - if (dir > 0 && dir <= kNumOffsetMaps) - return offset_plus_[dir - 1][index_feature]; - else if (dir < 0 && -dir <= kNumOffsetMaps) - return offset_minus_[-dir - 1][index_feature]; - else if (dir == 0) - return index_feature; - else - return -1; -} - - -//#define EXPERIMENT_ON -#ifdef EXPERIMENT_ON // This code is commented out as SampleIterator and -// TrainingSample are not reviewed/checked in yet, but these functions are a -// useful indicator of how an IntFeatureMap is setup. - -// Computes the features used by the subset of samples defined by -// the iterator and sets up the feature mapping. -// Returns the size of the compacted feature space. -int IntFeatureMap::FindNZFeatureMapping(SampleIterator* it) { - feature_map_.Init(feature_space_.Size(), false); - int total_samples = 0; - for (it->Begin(); !it->AtEnd(); it->Next()) { - const TrainingSample& sample = it->GetSample(); - GenericVector features; - feature_space_.IndexAndSortFeatures(sample.features(), - sample.num_features(), - &features); - int num_features = features.size(); - for (int f = 0; f < num_features; ++f) - feature_map_.SetMap(features[f], true); - ++total_samples; - } - feature_map_.Setup(); - compact_size_ = feature_map_.CompactSize(); - mapping_changed_ = true; - FinalizeMapping(it); - tprintf("%d non-zero features found in %d samples\n", - compact_size_, total_samples); - return compact_size_; -} -#endif - -// After deleting some features, finish setting up the mapping, and map -// all the samples. Returns the size of the compacted feature space. -int IntFeatureMap::FinalizeMapping(SampleIterator* it) { - if (mapping_changed_) { - feature_map_.CompleteMerges(); - compact_size_ = feature_map_.CompactSize(); -#ifdef EXPERIMENT_ON - it->MapSampleFeatures(*this); -#endif - mapping_changed_ = false; - } - return compact_size_; -} - -// Prints the map features from the set in human-readable form. -void IntFeatureMap::DebugMapFeatures( - const GenericVector& map_features) const { - for (int i = 0; i < map_features.size(); ++i) { - INT_FEATURE_STRUCT f = InverseMapFeature(map_features[i]); - f.print(); - } -} - -void IntFeatureMap::Clear() { - for (int dir = 0; dir < kNumOffsetMaps; ++dir) { - delete [] offset_plus_[dir]; - delete [] offset_minus_[dir]; - offset_plus_[dir] = nullptr; - offset_minus_[dir] = nullptr; - } -} - -// Helper to compute an offset index feature. In this context an offset -// feature with a dir of +/-1 is a feature of a similar direction, -// but shifted perpendicular to the direction of the feature. An offset -// feature with a dir of +/-2 is feature at the same position, but rotated -// by +/- one [compact] quantum. Returns the index of the generated offset -// feature, or -1 if it doesn't exist. Dir should be in -// [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction. -// A dir of 0 is an identity transformation. -// Both input and output are from the index(sparse) feature space, not -// the mapped/compact feature space, but the offset feature is the minimum -// distance moved from the input to guarantee that it maps to the next -// available quantum in the mapped/compact space. -int IntFeatureMap::ComputeOffsetFeature(int index_feature, int dir) const { - INT_FEATURE_STRUCT f = InverseIndexFeature(index_feature); - ASSERT_HOST(IndexFeature(f) == index_feature); - if (dir == 0) { - return index_feature; - } else if (dir == 1 || dir == -1) { - FCOORD feature_dir = FeatureDirection(f.Theta); - FCOORD rotation90(0.0f, 1.0f); - feature_dir.rotate(rotation90); - // Find the nearest existing feature. - for (int m = 1; m < kMaxOffsetDist; ++m) { - double x_pos = f.X + feature_dir.x() * (m * dir); - double y_pos = f.Y + feature_dir.y() * (m * dir); - int x = IntCastRounded(x_pos); - int y = IntCastRounded(y_pos); - if (x >= 0 && x <= UINT8_MAX && y >= 0 && y <= UINT8_MAX) { - INT_FEATURE_STRUCT offset_f; - offset_f.X = x; - offset_f.Y = y; - offset_f.Theta = f.Theta; - int offset_index = IndexFeature(offset_f); - if (offset_index != index_feature && offset_index >= 0) - return offset_index; // Found one. - } else { - return -1; // Hit the edge of feature space. - } - } - } else if (dir == 2 || dir == -2) { - // Find the nearest existing index_feature. - for (int m = 1; m < kMaxOffsetDist; ++m) { - int theta = f.Theta + m * dir / 2; - INT_FEATURE_STRUCT offset_f; - offset_f.X = f.X; - offset_f.Y = f.Y; - offset_f.Theta = Modulo(theta, 256); - int offset_index = IndexFeature(offset_f); - if (offset_index != index_feature && offset_index >= 0) - return offset_index; // Found one. - } - } - return -1; // Nothing within the max distance. -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturemap.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturemap.h deleted file mode 100644 index 5c5a54b8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturemap.h +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright 2010 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: intfeaturemap.h -// Description: Encapsulation of IntFeatureSpace with IndexMapBiDi -// to provide a subspace mapping and fast feature lookup. -// Created: Tue Oct 26 08:58:30 PDT 2010 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CLASSIFY_INTFEATUREMAP_H_ -#define TESSERACT_CLASSIFY_INTFEATUREMAP_H_ - -#include "intfeaturespace.h" -#include "indexmapbidi.h" -#include "intproto.h" - -namespace tesseract { - -class SampleIterator; - -// Number of positive and negative offset maps. -static const int kNumOffsetMaps = 2; - -// Class to map a feature space defined by INT_FEATURE_STRUCT to a compact -// down-sampled subspace of actually used features. -// The IntFeatureMap copes with 2 stages of transformation: -// The first step is down-sampling (re-quantization) and converting to a -// single index value from the 3-D input: -// INT_FEATURE_STRUCT <-> index feature (via IntFeatureSpace) and -// the second is a feature-space compaction to map only the feature indices -// that are actually used. This saves space in classifiers that are built -// using the mapped feature space. -// index (sparse) feature <-> map (compact) feature via IndexMapBiDi. -// Although the transformations are reversible, the inverses are lossy and do -// not return the exact input INT_FEATURE_STRUCT, due to the many->one nature -// of both transformations. -class IntFeatureMap { - public: - IntFeatureMap(); - ~IntFeatureMap(); - - // Accessors. - int sparse_size() const { - return feature_space_.Size(); - } - int compact_size() const { - return compact_size_; - } - const IntFeatureSpace& feature_space() const { - return feature_space_; - } - const IndexMapBiDi& feature_map() const { - return feature_map_; - } - - // Pseudo-accessors. - int IndexFeature(const INT_FEATURE_STRUCT& f) const; - int MapFeature(const INT_FEATURE_STRUCT& f) const; - int MapIndexFeature(int index_feature) const; - INT_FEATURE_STRUCT InverseIndexFeature(int index_feature) const; - INT_FEATURE_STRUCT InverseMapFeature(int map_feature) const; - void DeleteMapFeature(int map_feature); - bool IsMapFeatureDeleted(int map_feature) const; - - // Copies the given feature_space and uses it as the index feature map - // from INT_FEATURE_STRUCT. - void Init(const IntFeatureSpace& feature_space); - - // Helper to return an offset index feature. In this context an offset - // feature with a dir of +/-1 is a feature of a similar direction, - // but shifted perpendicular to the direction of the feature. An offset - // feature with a dir of +/-2 is feature at the same position, but rotated - // by +/- one [compact] quantum. Returns the index of the generated offset - // feature, or -1 if it doesn't exist. Dir should be in - // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction. - // A dir of 0 is an identity transformation. - // Both input and output are from the index(sparse) feature space, not - // the mapped/compact feature space, but the offset feature is the minimum - // distance moved from the input to guarantee that it maps to the next - // available quantum in the mapped/compact space. - int OffsetFeature(int index_feature, int dir) const; - - // Computes the features used by the subset of samples defined by - // the iterator and sets up the feature mapping. - // Returns the size of the compacted feature space. - int FindNZFeatureMapping(SampleIterator* it); - - // After deleting some features, finish setting up the mapping, and map - // all the samples. Returns the size of the compacted feature space. - int FinalizeMapping(SampleIterator* it); - - // Indexes the given array of features to a vector of sorted indices. - void IndexAndSortFeatures(const INT_FEATURE_STRUCT* features, - int num_features, - GenericVector* sorted_features) const { - feature_space_.IndexAndSortFeatures(features, num_features, - sorted_features); - } - // Maps the given array of index/sparse features to an array of map/compact - // features. - // Assumes the input is sorted. The output indices are sorted and uniqued. - // Returns the number of "missed" features, being features that - // don't map to the compact feature space. - int MapIndexedFeatures(const GenericVector& index_features, - GenericVector* map_features) const { - return feature_map_.MapFeatures(index_features, map_features); - } - - // Prints the map features from the set in human-readable form. - void DebugMapFeatures(const GenericVector& map_features) const; - - private: - void Clear(); - - // Helper to compute an offset index feature. In this context an offset - // feature with a dir of +/-1 is a feature of a similar direction, - // but shifted perpendicular to the direction of the feature. An offset - // feature with a dir of +/-2 is feature at the same position, but rotated - // by +/- one [compact] quantum. Returns the index of the generated offset - // feature, or -1 if it doesn't exist. Dir should be in - // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction. - // A dir of 0 is an identity transformation. - // Both input and output are from the index(sparse) feature space, not - // the mapped/compact feature space, but the offset feature is the minimum - // distance moved from the input to guarantee that it maps to the next - // available quantum in the mapped/compact space. - int ComputeOffsetFeature(int index_feature, int dir) const; - - // True if the mapping has changed since it was last finalized. - bool mapping_changed_; - // Size of the compacted feature space, after unused features are removed. - int compact_size_; - // Feature space quantization definition and indexing from INT_FEATURE_STRUCT. - IntFeatureSpace feature_space_; - // Mapping from indexed feature space to the compacted space with unused - // features mapping to -1. - IndexMapBiDi feature_map_; - // Index tables to map a feature index to the corresponding feature after a - // shift perpendicular to the feature direction, or a rotation in place. - // An entry of -1 indicates that there is no corresponding feature. - // Array of arrays of size feature_space_.Size() owned by this class. - int* offset_plus_[kNumOffsetMaps]; - int* offset_minus_[kNumOffsetMaps]; - - // Don't use default copy and assign! - IntFeatureMap(const IntFeatureMap&); - void operator=(const IntFeatureMap&); -}; - -} // namespace tesseract. - -#endif // TESSERACT_CLASSIFY_INTFEATUREMAP_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturespace.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturespace.cpp deleted file mode 100644 index 0e14d928..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturespace.cpp +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright 2010 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: intfeaturespace.cpp -// Description: Indexed feature space based on INT_FEATURE_STRUCT. -// Created: Wed Mar 24 11:21:27 PDT 2010 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "intfeaturespace.h" -#include "intfx.h" - -namespace tesseract { - -IntFeatureSpace::IntFeatureSpace() - : x_buckets_(0), y_buckets_(0), theta_buckets_(0) { -} - -void IntFeatureSpace::Init(uint8_t xbuckets, uint8_t ybuckets, uint8_t thetabuckets) { - x_buckets_ = xbuckets; - y_buckets_ = ybuckets; - theta_buckets_ = thetabuckets; -} - -// Serializes the feature space definition to the given file. -// Returns false on error. -bool IntFeatureSpace::Serialize(FILE* fp) const { - if (fwrite(&x_buckets_, sizeof(x_buckets_), 1, fp) != 1) - return false; - if (fwrite(&y_buckets_, sizeof(y_buckets_), 1, fp) != 1) - return false; - if (fwrite(&theta_buckets_, sizeof(theta_buckets_), 1, fp) != 1) - return false; - return true; -} - -// Returns an INT_FEATURE_STRUCT corresponding to the given index. -// This is the inverse of the Index member. -INT_FEATURE_STRUCT IntFeatureSpace::PositionFromIndex(int index) const { - return PositionFromBuckets(index / (y_buckets_ * theta_buckets_), - index / theta_buckets_ % y_buckets_, - index % theta_buckets_); -} - -// Bulk calls to Index. Maps the given array of features to a vector of -// int32_t indices in the same order as the input. -void IntFeatureSpace::IndexFeatures(const INT_FEATURE_STRUCT* features, - int num_features, - GenericVector* mapped_features) const { - mapped_features->truncate(0); - for (int f = 0; f < num_features; ++f) - mapped_features->push_back(Index(features[f])); -} - -// Bulk calls to Index. Maps the given array of features to a vector of -// sorted int32_t indices. -void IntFeatureSpace::IndexAndSortFeatures( - const INT_FEATURE_STRUCT* features, int num_features, - GenericVector* sorted_features) const { - sorted_features->truncate(0); - for (int f = 0; f < num_features; ++f) - sorted_features->push_back(Index(features[f])); - sorted_features->sort(); -} - -// Returns a feature space index for the given x,y position in a display -// window, or -1 if the feature is a miss. -int IntFeatureSpace::XYToFeatureIndex(int x, int y) const { - // Round the x,y position to a feature. Search for a valid theta. - INT_FEATURE_STRUCT feature(x, y, 0); - int index = -1; - for (int theta = 0; theta <= UINT8_MAX && index < 0; ++theta) { - feature.Theta = theta; - index = Index(feature); - } - if (index < 0) { - tprintf("(%d,%d) does not exist in feature space!\n", x, y); - return -1; - } - feature = PositionFromIndex(index); - tprintf("Click at (%d, %d) ->(%d, %d), ->(%d, %d)\n", - x, y, feature.X, feature.Y, x - feature.X, y - feature.Y); - // Get the relative position of x,y from the rounded feature. - x -= feature.X; - y -= feature.Y; - if (x != 0 || y != 0) { - double angle = atan2(static_cast(y), static_cast(x)) + M_PI; - angle *= kIntFeatureExtent / (2.0 * M_PI); - feature.Theta = static_cast(angle + 0.5); - index = Index(feature); - if (index < 0) { - tprintf("Feature failed to map to a valid index:"); - feature.print(); - return -1; - } - feature = PositionFromIndex(index); - } - feature.print(); - return index; -} - -// Returns an INT_FEATURE_STRUCT corresponding to the given bucket coords. -INT_FEATURE_STRUCT IntFeatureSpace::PositionFromBuckets(int x, - int y, - int theta) const { - INT_FEATURE_STRUCT pos( - (x * kIntFeatureExtent + kIntFeatureExtent / 2) / x_buckets_, - (y * kIntFeatureExtent + kIntFeatureExtent / 2) / y_buckets_, - DivRounded(theta * kIntFeatureExtent, theta_buckets_)); - return pos; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturespace.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturespace.h deleted file mode 100644 index af23af9a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfeaturespace.h +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright 2010 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: intfeaturespace.h -// Description: Indexed feature space based on INT_FEATURE_STRUCT. -// Created: Wed Mar 24 10:55:30 PDT 2010 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CLASSIFY_INTFEATURESPACE_H_ -#define TESSERACT_CLASSIFY_INTFEATURESPACE_H_ - -#include "genericvector.h" -#include "intproto.h" - -// Extent of x,y,theta in the input feature space. [0,255]. -const int kIntFeatureExtent = 256; -// Extent of x,y,theta dimensions in the quantized feature space. -const int kBoostXYBuckets = 16; -const int kBoostDirBuckets = 16; - -namespace tesseract { - -class IndexMap; - -// Down-sampling quantization of the INT_FEATURE_STRUCT feature space and -// conversion to a single scalar index value, used as a binary feature space. -class IntFeatureSpace { - public: - IntFeatureSpace(); - // Default copy constructors and assignment OK! - - // Setup the feature space with the given dimensions. - void Init(uint8_t xbuckets, uint8_t ybuckets, uint8_t thetabuckets); - - // Serializes the feature space definition to the given file. - // Returns false on error. - bool Serialize(FILE* fp) const; - - // Returns the total size of the feature space. - int Size() const { - return static_cast(x_buckets_) * y_buckets_ * theta_buckets_; - } - // Returns an INT_FEATURE_STRUCT corresponding to the given index. - // This is the inverse of the Index member. - INT_FEATURE_STRUCT PositionFromIndex(int index) const; - - // Returns a 1-dimensional index corresponding to the given feature value. - // Range is [0, Size()-1]. Inverse of PositionFromIndex member. - int Index(const INT_FEATURE_STRUCT& f) const { - return (XBucket(f.X) * y_buckets_ + YBucket(f.Y)) * theta_buckets_ + - ThetaBucket(f.Theta); - } - // Bulk calls to Index. Maps the given array of features to a vector of - // int32_t indices in the same order as the input. - void IndexFeatures(const INT_FEATURE_STRUCT* features, int num_features, - GenericVector* mapped_features) const; - // Bulk calls to Index. Maps the given array of features to a vector of - // sorted int32_t indices. - void IndexAndSortFeatures(const INT_FEATURE_STRUCT* features, - int num_features, - GenericVector* sorted_features) const; - // Returns a feature space index for the given x,y position in a display - // window, or -1 if the feature is a miss. - int XYToFeatureIndex(int x, int y) const; - - protected: - // Converters to generate indices for individual feature dimensions. - int XBucket(int x) const { - int bucket = x * x_buckets_ / kIntFeatureExtent; - return ClipToRange(bucket, 0, static_cast(x_buckets_) - 1); - } - int YBucket(int y) const { - int bucket = y * y_buckets_ / kIntFeatureExtent; - return ClipToRange(bucket, 0, static_cast(y_buckets_) - 1); - } - // Use DivRounded for theta so that exactly vertical and horizontal are in - // the middle of a bucket. The Modulo takes care of the wrap-around. - int ThetaBucket(int theta) const { - int bucket = DivRounded(theta * theta_buckets_, kIntFeatureExtent); - return Modulo(bucket, theta_buckets_); - } - // Returns an INT_FEATURE_STRUCT corresponding to the given buckets. - INT_FEATURE_STRUCT PositionFromBuckets(int x, int y, int theta) const; - - // Feature space definition - serialized. - uint8_t x_buckets_; - uint8_t y_buckets_; - uint8_t theta_buckets_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_CLASSIFY_INTFEATURESPACE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfx.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfx.cpp deleted file mode 100644 index 93311819..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfx.cpp +++ /dev/null @@ -1,488 +0,0 @@ -/****************************************************************************** - ** Filename: intfx.c - ** Purpose: Integer character normalization & feature extraction - ** Author: Robert Moss, rays@google.com (Ray Smith) - ** History: Tue May 21 15:51:57 MDT 1991, RWM, Created. - ** Tue Feb 28 10:42:00 PST 2012, vastly rewritten to allow - greyscale fx and non-linear - normalization. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - *****************************************************************************/ -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "intfx.h" -#include "allheaders.h" -#include "ccutil.h" -#include "classify.h" -#include "helpers.h" -#include "intmatcher.h" -#include "linlsq.h" -#include "normalis.h" -#include "statistc.h" -#include "trainingsample.h" - -using tesseract::TrainingSample; - -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ -// Look up table for cos and sin to turn the intfx feature angle to a vector. -// Protected by atan_table_mutex. -// The entries are in binary degrees where a full circle is 256 binary degrees. -static float cos_table[INT_CHAR_NORM_RANGE]; -static float sin_table[INT_CHAR_NORM_RANGE]; -// Guards write access to AtanTable so we don't create it more than once. -tesseract::CCUtilMutex atan_table_mutex; - - -/**---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------**/ -/*---------------------------------------------------------------------------*/ -void InitIntegerFX() { - static bool atan_table_init = false; - atan_table_mutex.Lock(); - if (!atan_table_init) { - for (int i = 0; i < INT_CHAR_NORM_RANGE; ++i) { - cos_table[i] = cos(i * 2 * M_PI / INT_CHAR_NORM_RANGE + M_PI); - sin_table[i] = sin(i * 2 * M_PI / INT_CHAR_NORM_RANGE + M_PI); - } - atan_table_init = true; - } - atan_table_mutex.Unlock(); -} - -// Returns a vector representing the direction of a feature with the given -// theta direction in an INT_FEATURE_STRUCT. -FCOORD FeatureDirection(uint8_t theta) { - return FCOORD(cos_table[theta], sin_table[theta]); -} - -namespace tesseract { - -// Generates a TrainingSample from a TBLOB. Extracts features and sets -// the bounding box, so classifiers that operate on the image can work. -// TODO(rays) Make BlobToTrainingSample a member of Classify now that -// the FlexFx and FeatureDescription code have been removed and LearnBlob -// is now a member of Classify. -TrainingSample* BlobToTrainingSample( - const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, - GenericVector* bl_features) { - GenericVector cn_features; - Classify::ExtractFeatures(blob, nonlinear_norm, bl_features, - &cn_features, fx_info, nullptr); - // TODO(rays) Use blob->PreciseBoundingBox() instead. - TBOX box = blob.bounding_box(); - TrainingSample* sample = nullptr; - int num_features = fx_info->NumCN; - if (num_features > 0) { - sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0], - num_features); - } - if (sample != nullptr) { - // Set the bounding box (in original image coordinates) in the sample. - TPOINT topleft, botright; - topleft.x = box.left(); - topleft.y = box.top(); - botright.x = box.right(); - botright.y = box.bottom(); - TPOINT original_topleft, original_botright; - blob.denorm().DenormTransform(nullptr, topleft, &original_topleft); - blob.denorm().DenormTransform(nullptr, botright, &original_botright); - sample->set_bounding_box(TBOX(original_topleft.x, original_botright.y, - original_botright.x, original_topleft.y)); - } - return sample; -} - -// Computes the DENORMS for bl(baseline) and cn(character) normalization -// during feature extraction. The input denorm describes the current state -// of the blob, which is usually a baseline-normalized word. -// The Transforms setup are as follows: -// Baseline Normalized (bl) Output: -// We center the grapheme by aligning the x-coordinate of its centroid with -// x=128 and leaving the already-baseline-normalized y as-is. -// -// Character Normalized (cn) Output: -// We align the grapheme's centroid at the origin and scale it -// asymmetrically in x and y so that the 2nd moments are a standard value -// (51.2) ie the result is vaguely square. -// If classify_nonlinear_norm is true: -// A non-linear normalization is setup that attempts to evenly distribute -// edges across x and y. -// -// Some of the fields of fx_info are also setup: -// Length: Total length of outline. -// Rx: Rounded y second moment. (Reversed by convention.) -// Ry: rounded x second moment. -// Xmean: Rounded x center of mass of the blob. -// Ymean: Rounded y center of mass of the blob. -void Classify::SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm, - DENORM* bl_denorm, DENORM* cn_denorm, - INT_FX_RESULT_STRUCT* fx_info) { - // Compute 1st and 2nd moments of the original outline. - FCOORD center, second_moments; - int length = blob.ComputeMoments(¢er, &second_moments); - if (fx_info != nullptr) { - fx_info->Length = length; - fx_info->Rx = IntCastRounded(second_moments.y()); - fx_info->Ry = IntCastRounded(second_moments.x()); - - fx_info->Xmean = IntCastRounded(center.x()); - fx_info->Ymean = IntCastRounded(center.y()); - } - // Setup the denorm for Baseline normalization. - bl_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(), center.x(), 128.0f, - 1.0f, 1.0f, 128.0f, 128.0f); - // Setup the denorm for character normalization. - if (nonlinear_norm) { - GenericVector > x_coords; - GenericVector > y_coords; - TBOX box; - blob.GetPreciseBoundingBox(&box); - box.pad(1, 1); - blob.GetEdgeCoords(box, &x_coords, &y_coords); - cn_denorm->SetupNonLinear(&blob.denorm(), box, UINT8_MAX, UINT8_MAX, - 0.0f, 0.0f, x_coords, y_coords); - } else { - cn_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(), - center.x(), center.y(), - 51.2f / second_moments.x(), - 51.2f / second_moments.y(), - 128.0f, 128.0f); - } -} - -// Helper normalizes the direction, assuming that it is at the given -// unnormed_pos, using the given denorm, starting at the root_denorm. -static uint8_t NormalizeDirection(uint8_t dir, const FCOORD& unnormed_pos, - const DENORM& denorm, - const DENORM* root_denorm) { - // Convert direction to a vector. - FCOORD unnormed_end; - unnormed_end.from_direction(dir); - unnormed_end += unnormed_pos; - FCOORD normed_pos, normed_end; - denorm.NormTransform(root_denorm, unnormed_pos, &normed_pos); - denorm.NormTransform(root_denorm, unnormed_end, &normed_end); - normed_end -= normed_pos; - return normed_end.to_direction(); -} - -// Helper returns the mean direction vector from the given stats. Use the -// mean direction from dirs if there is information available, otherwise, use -// the fit_vector from point_diffs. -static FCOORD MeanDirectionVector(const LLSQ& point_diffs, const LLSQ& dirs, - const FCOORD& start_pt, - const FCOORD& end_pt) { - FCOORD fit_vector; - if (dirs.count() > 0) { - // There were directions, so use them. To avoid wrap-around problems, we - // have 2 accumulators in dirs: x for normal directions and y for - // directions offset by 128. We will use the one with the least variance. - FCOORD mean_pt = dirs.mean_point(); - double mean_dir = 0.0; - if (dirs.x_variance() <= dirs.y_variance()) { - mean_dir = mean_pt.x(); - } else { - mean_dir = mean_pt.y() + 128; - } - fit_vector.from_direction(Modulo(IntCastRounded(mean_dir), 256)); - } else { - // There were no directions, so we rely on the vector_fit to the points. - // Since the vector_fit is 180 degrees ambiguous, we align with the - // supplied feature_dir by making the scalar product non-negative. - FCOORD feature_dir(end_pt - start_pt); - fit_vector = point_diffs.vector_fit(); - if (fit_vector.x() == 0.0f && fit_vector.y() == 0.0f) { - // There was only a single point. Use feature_dir directly. - fit_vector = feature_dir; - } else { - // Sometimes the least mean squares fit is wrong, due to the small sample - // of points and scaling. Use a 90 degree rotated vector if that matches - // feature_dir better. - FCOORD fit_vector2 = !fit_vector; - // The fit_vector is 180 degrees ambiguous, so resolve the ambiguity by - // insisting that the scalar product with the feature_dir should be +ve. - if (fit_vector % feature_dir < 0.0) - fit_vector = -fit_vector; - if (fit_vector2 % feature_dir < 0.0) - fit_vector2 = -fit_vector2; - // Even though fit_vector2 has a higher mean squared error, it might be - // a better fit, so use it if the dot product with feature_dir is bigger. - if (fit_vector2 % feature_dir > fit_vector % feature_dir) - fit_vector = fit_vector2; - } - } - return fit_vector; -} - -// Helper computes one or more features corresponding to the given points. -// Emitted features are on the line defined by: -// start_pt + lambda * (end_pt - start_pt) for scalar lambda. -// Features are spaced at feature_length intervals. -static int ComputeFeatures(const FCOORD& start_pt, const FCOORD& end_pt, - double feature_length, - GenericVector* features) { - FCOORD feature_vector(end_pt - start_pt); - if (feature_vector.x() == 0.0f && feature_vector.y() == 0.0f) return 0; - // Compute theta for the feature based on its direction. - uint8_t theta = feature_vector.to_direction(); - // Compute the number of features and lambda_step. - double target_length = feature_vector.length(); - int num_features = IntCastRounded(target_length / feature_length); - if (num_features == 0) return 0; - // Divide the length evenly into num_features pieces. - double lambda_step = 1.0 / num_features; - double lambda = lambda_step / 2.0; - for (int f = 0; f < num_features; ++f, lambda += lambda_step) { - FCOORD feature_pt(start_pt); - feature_pt += feature_vector * lambda; - INT_FEATURE_STRUCT feature(feature_pt, theta); - features->push_back(feature); - } - return num_features; -} - -// Gathers outline points and their directions from start_index into dirs by -// stepping along the outline and normalizing the coordinates until the -// required feature_length has been collected or end_index is reached. -// On input pos must point to the position corresponding to start_index and on -// return pos is updated to the current raw position, and pos_normed is set to -// the normed version of pos. -// Since directions wrap-around, they need special treatment to get the mean. -// Provided the cluster of directions doesn't straddle the wrap-around point, -// the simple mean works. If they do, then, unless the directions are wildly -// varying, the cluster rotated by 180 degrees will not straddle the wrap- -// around point, so mean(dir + 180 degrees) - 180 degrees will work. Since -// LLSQ conveniently stores the mean of 2 variables, we use it to store -// dir and dir+128 (128 is 180 degrees) and then use the resulting mean -// with the least variance. -static int GatherPoints(const C_OUTLINE* outline, double feature_length, - const DENORM& denorm, const DENORM* root_denorm, - int start_index, int end_index, - ICOORD* pos, FCOORD* pos_normed, - LLSQ* points, LLSQ* dirs) { - int step_length = outline->pathlength(); - ICOORD step = outline->step(start_index % step_length); - // Prev_normed is the start point of this collection and will be set on the - // first iteration, and on later iterations used to determine the length - // that has been collected. - FCOORD prev_normed; - points->clear(); - dirs->clear(); - int num_points = 0; - int index; - for (index = start_index; index <= end_index; ++index, *pos += step) { - step = outline->step(index % step_length); - int edge_weight = outline->edge_strength_at_index(index % step_length); - if (edge_weight == 0) { - // This point has conflicting gradient and step direction, so ignore it. - continue; - } - // Get the sub-pixel precise location and normalize. - FCOORD f_pos = outline->sub_pixel_pos_at_index(*pos, index % step_length); - denorm.NormTransform(root_denorm, f_pos, pos_normed); - if (num_points == 0) { - // The start of this segment. - prev_normed = *pos_normed; - } else { - FCOORD offset = *pos_normed - prev_normed; - float length = offset.length(); - if (length > feature_length) { - // We have gone far enough from the start. We will use this point in - // the next set so return what we have so far. - return index; - } - } - points->add(pos_normed->x(), pos_normed->y(), edge_weight); - int direction = outline->direction_at_index(index % step_length); - if (direction >= 0) { - direction = NormalizeDirection(direction, f_pos, denorm, root_denorm); - // Use both the direction and direction +128 so we are not trying to - // take the mean of something straddling the wrap-around point. - dirs->add(direction, Modulo(direction + 128, 256)); - } - ++num_points; - } - return index; -} - -// Extracts Tesseract features and appends them to the features vector. -// Startpt to lastpt, inclusive, MUST have the same src_outline member, -// which may be nullptr. The vector from lastpt to its next is included in -// the feature extraction. Hidden edges should be excluded by the caller. -// If force_poly is true, the features will be extracted from the polygonal -// approximation even if more accurate data is available. -static void ExtractFeaturesFromRun( - const EDGEPT* startpt, const EDGEPT* lastpt, - const DENORM& denorm, double feature_length, bool force_poly, - GenericVector* features) { - const EDGEPT* endpt = lastpt->next; - const C_OUTLINE* outline = startpt->src_outline; - if (outline != nullptr && !force_poly) { - // Detailed information is available. We have to normalize only from - // the root_denorm to denorm. - const DENORM* root_denorm = denorm.RootDenorm(); - int total_features = 0; - // Get the features from the outline. - int step_length = outline->pathlength(); - int start_index = startpt->start_step; - // pos is the integer coordinates of the binary image steps. - ICOORD pos = outline->position_at_index(start_index); - // We use an end_index that allows us to use a positive increment, but that - // may be beyond the bounds of the outline steps/ due to wrap-around, to - // so we use % step_length everywhere, except for start_index. - int end_index = lastpt->start_step + lastpt->step_count; - if (end_index <= start_index) - end_index += step_length; - LLSQ prev_points; - LLSQ prev_dirs; - FCOORD prev_normed_pos = outline->sub_pixel_pos_at_index(pos, start_index); - denorm.NormTransform(root_denorm, prev_normed_pos, &prev_normed_pos); - LLSQ points; - LLSQ dirs; - FCOORD normed_pos(0.0f, 0.0f); - int index = GatherPoints(outline, feature_length, denorm, root_denorm, - start_index, end_index, &pos, &normed_pos, - &points, &dirs); - while (index <= end_index) { - // At each iteration we nominally have 3 accumulated sets of points and - // dirs: prev_points/dirs, points/dirs, next_points/dirs and sum them - // into sum_points/dirs, but we don't necessarily get any features out, - // so if that is the case, we keep accumulating instead of rotating the - // accumulators. - LLSQ next_points; - LLSQ next_dirs; - FCOORD next_normed_pos(0.0f, 0.0f); - index = GatherPoints(outline, feature_length, denorm, root_denorm, - index, end_index, &pos, &next_normed_pos, - &next_points, &next_dirs); - LLSQ sum_points(prev_points); - // TODO(rays) find out why it is better to use just dirs and next_dirs - // in sum_dirs, instead of using prev_dirs as well. - LLSQ sum_dirs(dirs); - sum_points.add(points); - sum_points.add(next_points); - sum_dirs.add(next_dirs); - bool made_features = false; - // If we have some points, we can try making some features. - if (sum_points.count() > 0) { - // We have gone far enough from the start. Make a feature and restart. - FCOORD fit_pt = sum_points.mean_point(); - FCOORD fit_vector = MeanDirectionVector(sum_points, sum_dirs, - prev_normed_pos, normed_pos); - // The segment to which we fit features is the line passing through - // fit_pt in direction of fit_vector that starts nearest to - // prev_normed_pos and ends nearest to normed_pos. - FCOORD start_pos = prev_normed_pos.nearest_pt_on_line(fit_pt, - fit_vector); - FCOORD end_pos = normed_pos.nearest_pt_on_line(fit_pt, fit_vector); - // Possible correction to match the adjacent polygon segment. - if (total_features == 0 && startpt != endpt) { - FCOORD poly_pos(startpt->pos.x, startpt->pos.y); - denorm.LocalNormTransform(poly_pos, &start_pos); - } - if (index > end_index && startpt != endpt) { - FCOORD poly_pos(endpt->pos.x, endpt->pos.y); - denorm.LocalNormTransform(poly_pos, &end_pos); - } - int num_features = ComputeFeatures(start_pos, end_pos, feature_length, - features); - if (num_features > 0) { - // We made some features so shuffle the accumulators. - prev_points = points; - prev_dirs = dirs; - prev_normed_pos = normed_pos; - points = next_points; - dirs = next_dirs; - made_features = true; - total_features += num_features; - } - // The end of the next set becomes the end next time around. - normed_pos = next_normed_pos; - } - if (!made_features) { - // We didn't make any features, so keep the prev accumulators and - // add the next ones into the current. - points.add(next_points); - dirs.add(next_dirs); - } - } - } else { - // There is no outline, so we are forced to use the polygonal approximation. - const EDGEPT* pt = startpt; - do { - FCOORD start_pos(pt->pos.x, pt->pos.y); - FCOORD end_pos(pt->next->pos.x, pt->next->pos.y); - denorm.LocalNormTransform(start_pos, &start_pos); - denorm.LocalNormTransform(end_pos, &end_pos); - ComputeFeatures(start_pos, end_pos, feature_length, features); - } while ((pt = pt->next) != endpt); - } -} - -// Extracts sets of 3-D features of length kStandardFeatureLength (=12.8), as -// (x,y) position and angle as measured counterclockwise from the vector -// <-1, 0>, from blob using two normalizations defined by bl_denorm and -// cn_denorm. See SetpuBLCNDenorms for definitions. -// If outline_cn_counts is not nullptr, on return it contains the cumulative -// number of cn features generated for each outline in the blob (in order). -// Thus after the first outline, there were (*outline_cn_counts)[0] features, -// after the second outline, there were (*outline_cn_counts)[1] features etc. -void Classify::ExtractFeatures(const TBLOB& blob, - bool nonlinear_norm, - GenericVector* bl_features, - GenericVector* cn_features, - INT_FX_RESULT_STRUCT* results, - GenericVector* outline_cn_counts) { - DENORM bl_denorm, cn_denorm; - tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm, - &bl_denorm, &cn_denorm, results); - if (outline_cn_counts != nullptr) - outline_cn_counts->truncate(0); - // Iterate the outlines. - for (TESSLINE* ol = blob.outlines; ol != nullptr; ol = ol->next) { - // Iterate the polygon. - EDGEPT* loop_pt = ol->FindBestStartPt(); - EDGEPT* pt = loop_pt; - if (pt == nullptr) continue; - do { - if (pt->IsHidden()) continue; - // Find a run of equal src_outline. - EDGEPT* last_pt = pt; - do { - last_pt = last_pt->next; - } while (last_pt != loop_pt && !last_pt->IsHidden() && - last_pt->src_outline == pt->src_outline); - last_pt = last_pt->prev; - // Until the adaptive classifier can be weaned off polygon segments, - // we have to force extraction from the polygon for the bl_features. - ExtractFeaturesFromRun(pt, last_pt, bl_denorm, kStandardFeatureLength, - true, bl_features); - ExtractFeaturesFromRun(pt, last_pt, cn_denorm, kStandardFeatureLength, - false, cn_features); - pt = last_pt; - } while ((pt = pt->next) != loop_pt); - if (outline_cn_counts != nullptr) - outline_cn_counts->push_back(cn_features->size()); - } - results->NumBL = bl_features->size(); - results->NumCN = cn_features->size(); - results->YBottom = blob.bounding_box().bottom(); - results->YTop = blob.bounding_box().top(); - results->Width = blob.bounding_box().width(); -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfx.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfx.h deleted file mode 100644 index 0494aae4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intfx.h +++ /dev/null @@ -1,68 +0,0 @@ -/****************************************************************************** - ** Filename: intfx.h - ** Purpose: Interface to high level integer feature extractor. - ** Author: Robert Moss - ** History: Tue May 21 15:51:57 MDT 1991, RWM, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -#ifndef INTFX_H -#define INTFX_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "blobs.h" -#include "intproto.h" -#include "normalis.h" -#include - -class DENORM; - -namespace tesseract { -class TrainingSample; -} - -struct INT_FX_RESULT_STRUCT { - int32_t Length; // total length of all outlines - int16_t Xmean, Ymean; // center of mass of all outlines - int16_t Rx, Ry; // radius of gyration - int16_t NumBL, NumCN; // number of features extracted - int16_t Width; // Width of blob in BLN coords. - uint8_t YBottom; // Bottom of blob in BLN coords. - uint8_t YTop; // Top of blob in BLN coords. -}; - -// The standard feature length -const double kStandardFeatureLength = 64.0 / 5; - -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ -void InitIntegerFX(); - -// Returns a vector representing the direction of a feature with the given -// theta direction in an INT_FEATURE_STRUCT. -FCOORD FeatureDirection(uint8_t theta); - -namespace tesseract { - // Generates a TrainingSample from a TBLOB. Extracts features and sets - // the bounding box, so classifiers that operate on the image can work. - // TODO(rays) BlobToTrainingSample must remain a global function until - // the FlexFx and FeatureDescription code can be removed and LearnBlob - // made a member of Classify. - TrainingSample* BlobToTrainingSample( - const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, - GenericVector* bl_features); -} - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intmatcher.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intmatcher.cpp deleted file mode 100644 index 7740d269..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intmatcher.cpp +++ /dev/null @@ -1,1251 +0,0 @@ -/****************************************************************************** - ** Filename: intmatcher.cpp - ** Purpose: Generic high level classification routines. - ** Author: Robert Moss - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "intmatcher.h" - -#include "fontinfo.h" -#include "intproto.h" -#include "callcpp.h" -#include "scrollview.h" -#include "float2int.h" -#include "globals.h" -#include "helpers.h" -#include "classify.h" -#include "shapetable.h" -#include - -using tesseract::ScoredFont; -using tesseract::UnicharRating; - -/*---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------*/ -// Parameters of the sigmoid used to convert similarity to evidence in the -// similarity_evidence_table_ that is used to convert distance metric to an -// 8 bit evidence value in the secondary matcher. (See IntMatcher::Init). -const float IntegerMatcher::kSEExponentialMultiplier = 0.0; -const float IntegerMatcher::kSimilarityCenter = 0.0075; - -#define offset_table_entries \ - 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \ - 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \ - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, \ - 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, \ - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \ - 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \ - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, \ - 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, \ - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \ - 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \ - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 - -#define INTMATCHER_OFFSET_TABLE_SIZE 256 - -#define next_table_entries \ - 0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e, \ - 0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, \ - 0x18, 0x1c, 0x1c, 0x1e, 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, \ - 0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, 0x20, 0x30, 0x30, 0x32, \ - 0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, 0x38, 0x3c, 0x3c, 0x3e, \ - 0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a, \ - 0x48, 0x4c, 0x4c, 0x4e, 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, \ - 0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, 0x40, 0x60, 0x60, 0x62, \ - 0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, 0x68, 0x6c, 0x6c, 0x6e, \ - 0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a, \ - 0x78, 0x7c, 0x7c, 0x7e, 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, \ - 0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, 0x80, 0x90, 0x90, 0x92, \ - 0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, 0x98, 0x9c, 0x9c, 0x9e, \ - 0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa, \ - 0xa8, 0xac, 0xac, 0xae, 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, \ - 0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, 0x80, 0xc0, 0xc0, 0xc2, \ - 0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, 0xc8, 0xcc, 0xcc, 0xce, \ - 0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda, \ - 0xd8, 0xdc, 0xdc, 0xde, 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, \ - 0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, 0xe0, 0xf0, 0xf0, 0xf2, \ - 0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, 0xf8, 0xfc, 0xfc, 0xfe - -// See http://b/19318793 (#6) for a complete discussion. Merging arrays -// offset_table and next_table helps improve performance of PIE code. -static const uint8_t data_table[512] = {offset_table_entries, next_table_entries}; - -static const uint8_t* const offset_table = &data_table[0]; -static const uint8_t* const next_table = - &data_table[INTMATCHER_OFFSET_TABLE_SIZE]; - -namespace tesseract { - -// Encapsulation of the intermediate data and computations made by the class -// pruner. The class pruner implements a simple linear classifier on binary -// features by heavily quantizing the feature space, and applying -// NUM_BITS_PER_CLASS (2)-bit weights to the features. Lack of resolution in -// weights is compensated by a non-constant bias that is dependent on the -// number of features present. -class ClassPruner { - public: - ClassPruner(int max_classes) { - // The unrolled loop in ComputeScores means that the array sizes need to - // be rounded up so that the array is big enough to accommodate the extra - // entries accessed by the unrolling. Each pruner word is of sized - // BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are - // BITS_PER_WERD / NUM_BITS_PER_CLASS entries. - // See ComputeScores. - max_classes_ = max_classes; - rounded_classes_ = RoundUp( - max_classes, WERDS_PER_CP_VECTOR * BITS_PER_WERD / NUM_BITS_PER_CLASS); - class_count_ = new int[rounded_classes_]; - norm_count_ = new int[rounded_classes_]; - sort_key_ = new int[rounded_classes_ + 1]; - sort_index_ = new int[rounded_classes_ + 1]; - for (int i = 0; i < rounded_classes_; i++) { - class_count_[i] = 0; - } - pruning_threshold_ = 0; - num_features_ = 0; - num_classes_ = 0; - } - - ~ClassPruner() { - delete []class_count_; - delete []norm_count_; - delete []sort_key_; - delete []sort_index_; - } - - /// Computes the scores for every class in the character set, by summing the - /// weights for each feature and stores the sums internally in class_count_. - void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates, - int num_features, const INT_FEATURE_STRUCT* features) { - num_features_ = num_features; - int num_pruners = int_templates->NumClassPruners; - for (int f = 0; f < num_features; ++f) { - const INT_FEATURE_STRUCT* feature = &features[f]; - // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS. - int x = feature->X * NUM_CP_BUCKETS >> 8; - int y = feature->Y * NUM_CP_BUCKETS >> 8; - int theta = feature->Theta * NUM_CP_BUCKETS >> 8; - int class_id = 0; - // Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so - // we need a collection of them, indexed by pruner_set. - for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) { - // Look up quantized feature in a 3-D array, an array of weights for - // each class. - const uint32_t* pruner_word_ptr = - int_templates->ClassPruners[pruner_set]->p[x][y][theta]; - for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) { - uint32_t pruner_word = *pruner_word_ptr++; - // This inner loop is unrolled to speed up the ClassPruner. - // Currently gcc would not unroll it unless it is set to O3 - // level of optimization or -funroll-loops is specified. - /* - uint32_t class_mask = (1 << NUM_BITS_PER_CLASS) - 1; - for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) { - class_count_[class_id++] += pruner_word & class_mask; - pruner_word >>= NUM_BITS_PER_CLASS; - } - */ - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - pruner_word >>= NUM_BITS_PER_CLASS; - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - pruner_word >>= NUM_BITS_PER_CLASS; - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - pruner_word >>= NUM_BITS_PER_CLASS; - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - pruner_word >>= NUM_BITS_PER_CLASS; - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - pruner_word >>= NUM_BITS_PER_CLASS; - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - pruner_word >>= NUM_BITS_PER_CLASS; - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - pruner_word >>= NUM_BITS_PER_CLASS; - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - pruner_word >>= NUM_BITS_PER_CLASS; - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - pruner_word >>= NUM_BITS_PER_CLASS; - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - pruner_word >>= NUM_BITS_PER_CLASS; - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - pruner_word >>= NUM_BITS_PER_CLASS; - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - pruner_word >>= NUM_BITS_PER_CLASS; - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - pruner_word >>= NUM_BITS_PER_CLASS; - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - pruner_word >>= NUM_BITS_PER_CLASS; - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - pruner_word >>= NUM_BITS_PER_CLASS; - class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; - } - } - } - } - - /// Adjusts the scores according to the number of expected features. Used - /// in lieu of a constant bias, this penalizes classes that expect more - /// features than there are present. Thus an actual c will score higher for c - /// than e, even though almost all the features match e as well as c, because - /// e expects more features to be present. - void AdjustForExpectedNumFeatures(const uint16_t* expected_num_features, - int cutoff_strength) { - for (int class_id = 0; class_id < max_classes_; ++class_id) { - if (num_features_ < expected_num_features[class_id]) { - int deficit = expected_num_features[class_id] - num_features_; - class_count_[class_id] -= class_count_[class_id] * deficit / - (num_features_ * cutoff_strength + deficit); - } - } - } - - /// Zeros the scores for classes disabled in the unicharset. - /// Implements the black-list to recognize a subset of the character set. - void DisableDisabledClasses(const UNICHARSET& unicharset) { - for (int class_id = 0; class_id < max_classes_; ++class_id) { - if (!unicharset.get_enabled(class_id)) - class_count_[class_id] = 0; // This char is disabled! - } - } - - /** Zeros the scores of fragments. */ - void DisableFragments(const UNICHARSET& unicharset) { - for (int class_id = 0; class_id < max_classes_; ++class_id) { - // Do not include character fragments in the class pruner - // results if disable_character_fragments is true. - if (unicharset.get_fragment(class_id)) { - class_count_[class_id] = 0; - } - } - } - - /// Normalizes the counts for xheight, putting the normalized result in - /// norm_count_. Applies a simple subtractive penalty for incorrect vertical - /// position provided by the normalization_factors array, indexed by - /// character class, and scaled by the norm_multiplier. - void NormalizeForXheight(int norm_multiplier, - const uint8_t* normalization_factors) { - for (int class_id = 0; class_id < max_classes_; class_id++) { - norm_count_[class_id] = class_count_[class_id] - - ((norm_multiplier * normalization_factors[class_id]) >> 8); - } - } - - /** The nop normalization copies the class_count_ array to norm_count_. */ - void NoNormalization() { - for (int class_id = 0; class_id < max_classes_; class_id++) { - norm_count_[class_id] = class_count_[class_id]; - } - } - - /// Prunes the classes using <the maximum count> * pruning_factor/256 as a - /// threshold for keeping classes. If max_of_non_fragments, then ignore - /// fragments in computing the maximum count. - void PruneAndSort(int pruning_factor, int keep_this, - bool max_of_non_fragments, const UNICHARSET& unicharset) { - int max_count = 0; - for (int c = 0; c < max_classes_; ++c) { - if (norm_count_[c] > max_count && - // This additional check is added in order to ensure that - // the classifier will return at least one non-fragmented - // character match. - // TODO(daria): verify that this helps accuracy and does not - // hurt performance. - (!max_of_non_fragments || !unicharset.get_fragment(c))) { - max_count = norm_count_[c]; - } - } - // Prune Classes. - pruning_threshold_ = (max_count * pruning_factor) >> 8; - // Select Classes. - if (pruning_threshold_ < 1) - pruning_threshold_ = 1; - num_classes_ = 0; - for (int class_id = 0; class_id < max_classes_; class_id++) { - if (norm_count_[class_id] >= pruning_threshold_ || - class_id == keep_this) { - ++num_classes_; - sort_index_[num_classes_] = class_id; - sort_key_[num_classes_] = norm_count_[class_id]; - } - } - - // Sort Classes using Heapsort Algorithm. - if (num_classes_ > 1) - HeapSort(num_classes_, sort_key_, sort_index_); - } - - /** Prints debug info on the class pruner matches for the pruned classes only. - */ - void DebugMatch(const Classify& classify, - const INT_TEMPLATES_STRUCT* int_templates, - const INT_FEATURE_STRUCT* features) const { - int num_pruners = int_templates->NumClassPruners; - int max_num_classes = int_templates->NumClasses; - for (int f = 0; f < num_features_; ++f) { - const INT_FEATURE_STRUCT* feature = &features[f]; - tprintf("F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta); - // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS. - int x = feature->X * NUM_CP_BUCKETS >> 8; - int y = feature->Y * NUM_CP_BUCKETS >> 8; - int theta = feature->Theta * NUM_CP_BUCKETS >> 8; - int class_id = 0; - for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) { - // Look up quantized feature in a 3-D array, an array of weights for - // each class. - const uint32_t* pruner_word_ptr = - int_templates->ClassPruners[pruner_set]->p[x][y][theta]; - for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) { - uint32_t pruner_word = *pruner_word_ptr++; - for (int word_class = 0; word_class < 16 && - class_id < max_num_classes; ++word_class, ++class_id) { - if (norm_count_[class_id] >= pruning_threshold_) { - tprintf(" %s=%d,", - classify.ClassIDToDebugStr(int_templates, - class_id, 0).string(), - pruner_word & CLASS_PRUNER_CLASS_MASK); - } - pruner_word >>= NUM_BITS_PER_CLASS; - } - } - tprintf("\n"); - } - } - } - - /** Prints a summary of the pruner result. */ - void SummarizeResult(const Classify& classify, - const INT_TEMPLATES_STRUCT* int_templates, - const uint16_t* expected_num_features, - int norm_multiplier, - const uint8_t* normalization_factors) const { - tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_); - for (int i = 0; i < num_classes_; ++i) { - int class_id = sort_index_[num_classes_ - i]; - STRING class_string = classify.ClassIDToDebugStr(int_templates, - class_id, 0); - tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n", - class_string.string(), - class_count_[class_id], - expected_num_features[class_id], - (norm_multiplier * normalization_factors[class_id]) >> 8, - sort_key_[num_classes_ - i], - 100.0 - 100.0 * sort_key_[num_classes_ - i] / - (CLASS_PRUNER_CLASS_MASK * num_features_)); - } - } - - /// Copies the pruned, sorted classes into the output results and returns - /// the number of classes. - int SetupResults(GenericVector* results) const { - CP_RESULT_STRUCT empty; - results->init_to_size(num_classes_, empty); - for (int c = 0; c < num_classes_; ++c) { - (*results)[c].Class = sort_index_[num_classes_ - c]; - (*results)[c].Rating = 1.0 - sort_key_[num_classes_ - c] / - (static_cast(CLASS_PRUNER_CLASS_MASK) * num_features_); - } - return num_classes_; - } - - private: - /** Array[rounded_classes_] of initial counts for each class. */ - int *class_count_; - /// Array[rounded_classes_] of modified counts for each class after - /// normalizing for expected number of features, disabled classes, fragments, - /// and xheights. - int *norm_count_; - /** Array[rounded_classes_ +1] of pruned counts that gets sorted */ - int *sort_key_; - /** Array[rounded_classes_ +1] of classes corresponding to sort_key_. */ - int *sort_index_; - /** Number of classes in this class pruner. */ - int max_classes_; - /** Rounded up number of classes used for array sizes. */ - int rounded_classes_; - /** Threshold count applied to prune classes. */ - int pruning_threshold_; - /** The number of features used to compute the scores. */ - int num_features_; - /** Final number of pruned classes. */ - int num_classes_; -}; - -/*---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------*/ -/** - * Runs the class pruner from int_templates on the given features, returning - * the number of classes output in results. - * @param int_templates Class pruner tables - * @param num_features Number of features in blob - * @param features Array of features - * @param normalization_factors Array of fudge factors from blob - * normalization process (by CLASS_INDEX) - * @param expected_num_features Array of expected number of features - * for each class (by CLASS_INDEX) - * @param results Sorted Array of pruned classes. Must be an - * array of size at least - * int_templates->NumClasses. - * @param keep_this - */ -int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, - int num_features, int keep_this, - const INT_FEATURE_STRUCT* features, - const uint8_t* normalization_factors, - const uint16_t* expected_num_features, - GenericVector* results) { - ClassPruner pruner(int_templates->NumClasses); - // Compute initial match scores for all classes. - pruner.ComputeScores(int_templates, num_features, features); - // Adjust match scores for number of expected features. - pruner.AdjustForExpectedNumFeatures(expected_num_features, - classify_cp_cutoff_strength); - // Apply disabled classes in unicharset - only works without a shape_table. - if (shape_table_ == nullptr) - pruner.DisableDisabledClasses(unicharset); - // If fragments are disabled, remove them, also only without a shape table. - if (disable_character_fragments && shape_table_ == nullptr) - pruner.DisableFragments(unicharset); - - // If we have good x-heights, apply the given normalization factors. - if (normalization_factors != nullptr) { - pruner.NormalizeForXheight(classify_class_pruner_multiplier, - normalization_factors); - } else { - pruner.NoNormalization(); - } - // Do the actual pruning and sort the short-list. - pruner.PruneAndSort(classify_class_pruner_threshold, keep_this, - shape_table_ == nullptr, unicharset); - - if (classify_debug_level > 2) { - pruner.DebugMatch(*this, int_templates, features); - } - if (classify_debug_level > 1) { - pruner.SummarizeResult(*this, int_templates, expected_num_features, - classify_class_pruner_multiplier, - normalization_factors); - } - // Convert to the expected output format. - return pruner.SetupResults(results); -} - -} // namespace tesseract - -/** - * IntegerMatcher returns the best configuration and rating - * for a single class. The class matched against is determined - * by the uniqueness of the ClassTemplate parameter. The - * best rating and its associated configuration are returned. - * - * Globals: - * - local_matcher_multiplier_ Normalization factor multiplier - * param ClassTemplate Prototypes & tables for a class - * param BlobLength Length of unormalized blob - * param NumFeatures Number of features in blob - * param Features Array of features - * param NormalizationFactor Fudge factor from blob normalization process - * param Result Class rating & configuration: (0.0 -> 1.0), 0=bad, 1=good - * param Debug Debugger flag: 1=debugger on - * @return none - */ -void IntegerMatcher::Match(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - int16_t NumFeatures, - const INT_FEATURE_STRUCT* Features, - UnicharRating* Result, - int AdaptFeatureThreshold, - int Debug, - bool SeparateDebugWindows) { - ScratchEvidence *tables = new ScratchEvidence(); - int Feature; - - if (MatchDebuggingOn (Debug)) - cprintf ("Integer Matcher -------------------------------------------\n"); - - tables->Clear(ClassTemplate); - Result->feature_misses = 0; - - for (Feature = 0; Feature < NumFeatures; Feature++) { - int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask, - Feature, &Features[Feature], - tables, Debug); - // Count features that were missed over all configs. - if (csum == 0) - ++Result->feature_misses; - } - -#ifndef GRAPHICS_DISABLED - if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) { - DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, - NumFeatures, Debug); - } - - if (DisplayProtoMatchesOn(Debug)) { - DisplayProtoDebugInfo(ClassTemplate, ProtoMask, ConfigMask, - *tables, SeparateDebugWindows); - } - - if (DisplayFeatureMatchesOn(Debug)) { - DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures, - Features, AdaptFeatureThreshold, Debug, - SeparateDebugWindows); - } -#endif - - tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask, NumFeatures); - tables->NormalizeSums(ClassTemplate, NumFeatures, NumFeatures); - - FindBestMatch(ClassTemplate, *tables, Result); - -#ifndef GRAPHICS_DISABLED - if (PrintMatchSummaryOn(Debug)) - Result->Print(); - - if (MatchDebuggingOn(Debug)) - cprintf("Match Complete --------------------------------------------\n"); -#endif - - delete tables; -} - -/** - * FindGoodProtos finds all protos whose normalized proto-evidence - * exceed classify_adapt_proto_thresh. The list is ordered by increasing - * proto id number. - * - * Globals: - * - local_matcher_multiplier_ Normalization factor multiplier - * param ClassTemplate Prototypes & tables for a class - * param ProtoMask AND Mask for proto word - * param ConfigMask AND Mask for config word - * param BlobLength Length of unormalized blob - * param NumFeatures Number of features in blob - * param Features Array of features - * param ProtoArray Array of good protos - * param AdaptProtoThreshold Threshold for good protos - * param Debug Debugger flag: 1=debugger on - * @return Number of good protos in ProtoArray. - */ -int IntegerMatcher::FindGoodProtos( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - uint16_t BlobLength, - int16_t NumFeatures, - INT_FEATURE_ARRAY Features, - PROTO_ID *ProtoArray, - int AdaptProtoThreshold, - int Debug) { - ScratchEvidence *tables = new ScratchEvidence(); - int NumGoodProtos = 0; - - /* DEBUG opening heading */ - if (MatchDebuggingOn (Debug)) - cprintf - ("Find Good Protos -------------------------------------------\n"); - - tables->Clear(ClassTemplate); - - for (int Feature = 0; Feature < NumFeatures; Feature++) - UpdateTablesForFeature( - ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]), - tables, Debug); - -#ifndef GRAPHICS_DISABLED - if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug)) - DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, - NumFeatures, Debug); -#endif - - /* Average Proto Evidences & Find Good Protos */ - for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) { - /* Compute Average for Actual Proto */ - int Temp = 0; - for (int i = 0; i < ClassTemplate->ProtoLengths[proto]; i++) - Temp += tables->proto_evidence_[proto][i]; - - Temp /= ClassTemplate->ProtoLengths[proto]; - - /* Find Good Protos */ - if (Temp >= AdaptProtoThreshold) { - *ProtoArray = proto; - ProtoArray++; - NumGoodProtos++; - } - } - - if (MatchDebuggingOn (Debug)) - cprintf ("Match Complete --------------------------------------------\n"); - delete tables; - - return NumGoodProtos; -} - -/** - * FindBadFeatures finds all features with maximum feature-evidence < - * AdaptFeatureThresh. The list is ordered by increasing feature number. - * @param ClassTemplate Prototypes & tables for a class - * @param ProtoMask AND Mask for proto word - * @param ConfigMask AND Mask for config word - * @param BlobLength Length of unormalized blob - * @param NumFeatures Number of features in blob - * @param Features Array of features - * @param FeatureArray Array of bad features - * @param AdaptFeatureThreshold Threshold for bad features - * @param Debug Debugger flag: 1=debugger on - * @return Number of bad features in FeatureArray. - */ -int IntegerMatcher::FindBadFeatures( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - uint16_t BlobLength, - int16_t NumFeatures, - INT_FEATURE_ARRAY Features, - FEATURE_ID *FeatureArray, - int AdaptFeatureThreshold, - int Debug) { - ScratchEvidence *tables = new ScratchEvidence(); - int NumBadFeatures = 0; - - /* DEBUG opening heading */ - if (MatchDebuggingOn(Debug)) - cprintf("Find Bad Features -------------------------------------------\n"); - - tables->Clear(ClassTemplate); - - for (int Feature = 0; Feature < NumFeatures; Feature++) { - UpdateTablesForFeature( - ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature], - tables, Debug); - - /* Find Best Evidence for Current Feature */ - int best = 0; - for (int i = 0; i < ClassTemplate->NumConfigs; i++) - if (tables->feature_evidence_[i] > best) - best = tables->feature_evidence_[i]; - - /* Find Bad Features */ - if (best < AdaptFeatureThreshold) { - *FeatureArray = Feature; - FeatureArray++; - NumBadFeatures++; - } - } - -#ifndef GRAPHICS_DISABLED - if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) - DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, - NumFeatures, Debug); -#endif - - if (MatchDebuggingOn(Debug)) - cprintf("Match Complete --------------------------------------------\n"); - - delete tables; - return NumBadFeatures; -} - - -IntegerMatcher::IntegerMatcher(tesseract::IntParam *classify_debug_level) - : classify_debug_level_(classify_debug_level) -{ - /* Initialize table for evidence to similarity lookup */ - for (int i = 0; i < SE_TABLE_SIZE; i++) { - uint32_t IntSimilarity = i << (27 - SE_TABLE_BITS); - double Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0; - double evidence = Similarity / kSimilarityCenter; - evidence = 255.0 / (evidence * evidence + 1.0); - - if (kSEExponentialMultiplier > 0.0) { - double scale = 1.0 - exp(-kSEExponentialMultiplier) * - exp(kSEExponentialMultiplier * ((double) i / SE_TABLE_SIZE)); - evidence *= ClipToRange(scale, 0.0, 1.0); - } - - similarity_evidence_table_[i] = (uint8_t) (evidence + 0.5); - } - - /* Initialize evidence computation variables */ - evidence_table_mask_ = - ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits); - mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits); - table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1)); - evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1); -} - -/*---------------------------------------------------------------------------- - Private Code -----------------------------------------------------------------------------*/ -void ScratchEvidence::Clear(const INT_CLASS class_template) { - memset(sum_feature_evidence_, 0, - class_template->NumConfigs * sizeof(sum_feature_evidence_[0])); - memset(proto_evidence_, 0, - class_template->NumProtos * sizeof(proto_evidence_[0])); -} - -void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) { - memset(feature_evidence_, 0, - class_template->NumConfigs * sizeof(feature_evidence_[0])); -} - -/** - * Print debugging information for Configurations - * @return none - */ -static void IMDebugConfiguration(int FeatureNum, uint16_t ActualProtoNum, - uint8_t Evidence, BIT_VECTOR ConfigMask, - uint32_t ConfigWord) { - cprintf ("F = %3d, P = %3d, E = %3d, Configs = ", - FeatureNum, (int) ActualProtoNum, (int) Evidence); - while (ConfigWord) { - if (ConfigWord & 1) - cprintf ("1"); - else - cprintf ("0"); - ConfigWord >>= 1; - } - cprintf ("\n"); -} - -/** - * Print debugging information for Configurations - * @return none - */ -static void IMDebugConfigurationSum(int FeatureNum, uint8_t *FeatureEvidence, - int32_t ConfigCount) { - cprintf("F=%3d, C=", FeatureNum); - for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) { - cprintf("%4d", FeatureEvidence[ConfigNum]); - } - cprintf("\n"); -} - -/** - * For the given feature: prune protos, compute evidence, - * update Feature Evidence, Proto Evidence, and Sum of Feature - * Evidence tables. - * @param ClassTemplate Prototypes & tables for a class - * @param FeatureNum Current feature number (for DEBUG only) - * @param Feature Pointer to a feature struct - * @param tables Evidence tables - * @param Debug Debugger flag: 1=debugger on - * @return none - */ -int IntegerMatcher::UpdateTablesForFeature( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - int FeatureNum, - const INT_FEATURE_STRUCT* Feature, - ScratchEvidence *tables, - int Debug) { - uint32_t ConfigWord; - uint32_t ProtoWord; - uint32_t ProtoNum; - uint32_t ActualProtoNum; - uint8_t proto_byte; - int32_t proto_word_offset; - int32_t proto_offset; - uint8_t config_byte; - int32_t config_offset; - PROTO_SET ProtoSet; - uint32_t *ProtoPrunerPtr; - INT_PROTO Proto; - int ProtoSetIndex; - uint8_t Evidence; - uint32_t XFeatureAddress; - uint32_t YFeatureAddress; - uint32_t ThetaFeatureAddress; - uint8_t* UINT8Pointer; - int ProtoIndex; - uint8_t Temp; - int* IntPointer; - int ConfigNum; - int32_t M3; - int32_t A3; - uint32_t A4; - - tables->ClearFeatureEvidence(ClassTemplate); - - /* Precompute Feature Address offset for Proto Pruning */ - XFeatureAddress = ((Feature->X >> 2) << 1); - YFeatureAddress = (NUM_PP_BUCKETS << 1) + ((Feature->Y >> 2) << 1); - ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1); - - for (ProtoSetIndex = 0, ActualProtoNum = 0; - ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { - ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; - ProtoPrunerPtr = (uint32_t *) ((*ProtoSet).ProtoPruner); - for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET; - ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum += - (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) { - /* Prune Protos of current Proto Set */ - ProtoWord = *(ProtoPrunerPtr + XFeatureAddress); - ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress); - ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress); - ProtoWord &= *ProtoMask; - - if (ProtoWord != 0) { - proto_byte = ProtoWord & 0xff; - ProtoWord >>= 8; - proto_word_offset = 0; - while (ProtoWord != 0 || proto_byte != 0) { - while (proto_byte == 0) { - proto_byte = ProtoWord & 0xff; - ProtoWord >>= 8; - proto_word_offset += 8; - } - proto_offset = offset_table[proto_byte] + proto_word_offset; - proto_byte = next_table[proto_byte]; - Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]); - ConfigWord = Proto->Configs[0]; - A3 = (((Proto->A * (Feature->X - 128)) << 1) - - (Proto->B * (Feature->Y - 128)) + (Proto->C << 9)); - M3 = - (((int8_t) (Feature->Theta - Proto->Angle)) * kIntThetaFudge) << 1; - - if (A3 < 0) - A3 = ~A3; - if (M3 < 0) - M3 = ~M3; - A3 >>= mult_trunc_shift_bits_; - M3 >>= mult_trunc_shift_bits_; - if (static_cast(A3) > evidence_mult_mask_) - A3 = evidence_mult_mask_; - if (static_cast(M3) > evidence_mult_mask_) - M3 = evidence_mult_mask_; - - A4 = (A3 * A3) + (M3 * M3); - A4 >>= table_trunc_shift_bits_; - if (A4 > evidence_table_mask_) - Evidence = 0; - else - Evidence = similarity_evidence_table_[A4]; - - if (PrintFeatureMatchesOn (Debug)) - IMDebugConfiguration (FeatureNum, - ActualProtoNum + proto_offset, - Evidence, ConfigMask, ConfigWord); - - ConfigWord &= *ConfigMask; - - UINT8Pointer = tables->feature_evidence_ - 8; - config_byte = 0; - while (ConfigWord != 0 || config_byte != 0) { - while (config_byte == 0) { - config_byte = ConfigWord & 0xff; - ConfigWord >>= 8; - UINT8Pointer += 8; - } - config_offset = offset_table[config_byte]; - config_byte = next_table[config_byte]; - if (Evidence > UINT8Pointer[config_offset]) - UINT8Pointer[config_offset] = Evidence; - } - - UINT8Pointer = - &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]); - for (ProtoIndex = - ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset]; - ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) { - if (Evidence > *UINT8Pointer) { - Temp = *UINT8Pointer; - *UINT8Pointer = Evidence; - Evidence = Temp; - } - else if (Evidence == 0) - break; - } - } - } - } - } - - if (PrintFeatureMatchesOn(Debug)) { - IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_, - ClassTemplate->NumConfigs); - } - - IntPointer = tables->sum_feature_evidence_; - UINT8Pointer = tables->feature_evidence_; - int SumOverConfigs = 0; - for (ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) { - int evidence = *UINT8Pointer++; - SumOverConfigs += evidence; - *IntPointer++ += evidence; - } - return SumOverConfigs; -} - -/** - * Print debugging information for Configurations - * @return none - */ -#ifndef GRAPHICS_DISABLED -void IntegerMatcher::DebugFeatureProtoError( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - const ScratchEvidence& tables, - int16_t NumFeatures, - int Debug) { - float ProtoConfigs[MAX_NUM_CONFIGS]; - int ConfigNum; - uint32_t ConfigWord; - int ProtoSetIndex; - uint16_t ProtoNum; - uint8_t ProtoWordNum; - PROTO_SET ProtoSet; - uint16_t ActualProtoNum; - - if (PrintMatchSummaryOn(Debug)) { - cprintf("Configuration Mask:\n"); - for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) - cprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1)); - cprintf("\n"); - - cprintf("Feature Error for Configurations:\n"); - for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) { - cprintf( - " %5.1f", - 100.0 * (1.0 - (float)tables.sum_feature_evidence_[ConfigNum] - / NumFeatures / 256.0)); - } - cprintf("\n\n\n"); - } - - if (PrintMatchSummaryOn (Debug)) { - cprintf ("Proto Mask:\n"); - for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; - ProtoSetIndex++) { - ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); - for (ProtoWordNum = 0; ProtoWordNum < 2; - ProtoWordNum++, ProtoMask++) { - ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); - for (ProtoNum = 0; - ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1)) - && (ActualProtoNum < ClassTemplate->NumProtos)); - ProtoNum++, ActualProtoNum++) - cprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1)); - cprintf ("\n"); - } - } - cprintf ("\n"); - } - - for (int i = 0; i < ClassTemplate->NumConfigs; i++) - ProtoConfigs[i] = 0; - - if (PrintProtoMatchesOn (Debug)) { - cprintf ("Proto Evidence:\n"); - for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; - ProtoSetIndex++) { - ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; - ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); - for (ProtoNum = 0; - ((ProtoNum < PROTOS_PER_PROTO_SET) && - (ActualProtoNum < ClassTemplate->NumProtos)); - ProtoNum++, ActualProtoNum++) { - cprintf ("P %3d =", ActualProtoNum); - int temp = 0; - for (int j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) { - uint8_t data = tables.proto_evidence_[ActualProtoNum][j]; - cprintf(" %d", data); - temp += data; - } - - cprintf(" = %6.4f%%\n", - temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]); - - ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0]; - ConfigNum = 0; - while (ConfigWord) { - cprintf ("%5d", ConfigWord & 1 ? temp : 0); - if (ConfigWord & 1) - ProtoConfigs[ConfigNum] += temp; - ConfigNum++; - ConfigWord >>= 1; - } - cprintf("\n"); - } - } - } - - if (PrintMatchSummaryOn (Debug)) { - cprintf ("Proto Error for Configurations:\n"); - for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) - cprintf (" %5.1f", - 100.0 * (1.0 - - ProtoConfigs[ConfigNum] / - ClassTemplate->ConfigLengths[ConfigNum] / 256.0)); - cprintf ("\n\n"); - } - - if (PrintProtoMatchesOn (Debug)) { - cprintf ("Proto Sum for Configurations:\n"); - for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) - cprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0); - cprintf ("\n\n"); - - cprintf ("Proto Length for Configurations:\n"); - for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) - cprintf (" %4.1f", - (float) ClassTemplate->ConfigLengths[ConfigNum]); - cprintf ("\n\n"); - } - -} - -void IntegerMatcher::DisplayProtoDebugInfo( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - const ScratchEvidence& tables, - bool SeparateDebugWindows) { - uint16_t ProtoNum; - uint16_t ActualProtoNum; - PROTO_SET ProtoSet; - int ProtoSetIndex; - - InitIntMatchWindowIfReqd(); - if (SeparateDebugWindows) { - InitFeatureDisplayWindowIfReqd(); - InitProtoDisplayWindowIfReqd(); - } - - - for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; - ProtoSetIndex++) { - ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; - ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET; - for (ProtoNum = 0; - ((ProtoNum < PROTOS_PER_PROTO_SET) && - (ActualProtoNum < ClassTemplate->NumProtos)); - ProtoNum++, ActualProtoNum++) { - /* Compute Average for Actual Proto */ - int temp = 0; - for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++) - temp += tables.proto_evidence_[ActualProtoNum][i]; - - temp /= ClassTemplate->ProtoLengths[ActualProtoNum]; - - if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) { - DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0); - } - } - } -} - - -void IntegerMatcher::DisplayFeatureDebugInfo( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - int16_t NumFeatures, - const INT_FEATURE_STRUCT* Features, - int AdaptFeatureThreshold, - int Debug, - bool SeparateDebugWindows) { - ScratchEvidence *tables = new ScratchEvidence(); - - tables->Clear(ClassTemplate); - - InitIntMatchWindowIfReqd(); - if (SeparateDebugWindows) { - InitFeatureDisplayWindowIfReqd(); - InitProtoDisplayWindowIfReqd(); - } - - for (int Feature = 0; Feature < NumFeatures; Feature++) { - UpdateTablesForFeature( - ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature], - tables, 0); - - /* Find Best Evidence for Current Feature */ - int best = 0; - for (int i = 0; i < ClassTemplate->NumConfigs; i++) - if (tables->feature_evidence_[i] > best) - best = tables->feature_evidence_[i]; - - /* Update display for current feature */ - if (ClipMatchEvidenceOn(Debug)) { - if (best < AdaptFeatureThreshold) - DisplayIntFeature(&Features[Feature], 0.0); - else - DisplayIntFeature(&Features[Feature], 1.0); - } else { - DisplayIntFeature(&Features[Feature], best / 255.0); - } - } - - delete tables; -} -#endif - -/** - * Add sum of Proto Evidences into Sum Of Feature Evidence Array - */ -void ScratchEvidence::UpdateSumOfProtoEvidences( - INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, int16_t NumFeatures) { - - int *IntPointer; - uint32_t ConfigWord; - int ProtoSetIndex; - uint16_t ProtoNum; - PROTO_SET ProtoSet; - int NumProtos; - uint16_t ActualProtoNum; - - NumProtos = ClassTemplate->NumProtos; - - for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; - ProtoSetIndex++) { - ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; - ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); - for (ProtoNum = 0; - ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos)); - ProtoNum++, ActualProtoNum++) { - int temp = 0; - for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++) - temp += proto_evidence_[ActualProtoNum] [i]; - - ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0]; - ConfigWord &= *ConfigMask; - IntPointer = sum_feature_evidence_; - while (ConfigWord) { - if (ConfigWord & 1) - *IntPointer += temp; - IntPointer++; - ConfigWord >>= 1; - } - } - } -} - -/** - * Normalize Sum of Proto and Feature Evidence by dividing by the sum of - * the Feature Lengths and the Proto Lengths for each configuration. - */ -void ScratchEvidence::NormalizeSums( - INT_CLASS ClassTemplate, int16_t NumFeatures, int32_t used_features) { - - for (int i = 0; i < ClassTemplate->NumConfigs; i++) { - sum_feature_evidence_[i] = (sum_feature_evidence_[i] << 8) / - (NumFeatures + ClassTemplate->ConfigLengths[i]); - } -} - -/** - * Find the best match for the current class and update the Result - * with the configuration and match rating. - * @return The best normalized sum of evidences - */ -int IntegerMatcher::FindBestMatch( - INT_CLASS class_template, - const ScratchEvidence &tables, - UnicharRating* result) { - int best_match = 0; - result->config = 0; - result->fonts.truncate(0); - result->fonts.reserve(class_template->NumConfigs); - - /* Find best match */ - for (int c = 0; c < class_template->NumConfigs; ++c) { - int rating = tables.sum_feature_evidence_[c]; - if (*classify_debug_level_ > 2) - tprintf("Config %d, rating=%d\n", c, rating); - if (rating > best_match) { - result->config = c; - best_match = rating; - } - result->fonts.push_back(ScoredFont(c, rating)); - } - - // Compute confidence on a Probability scale. - result->rating = best_match / 65536.0f; - - return best_match; -} - -/** - * Applies the CN normalization factor to the given rating and returns - * the modified rating. - */ -float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length, - int normalization_factor, - int matcher_multiplier) { - return (rating * blob_length + - matcher_multiplier * normalization_factor / 256.0) / - (blob_length + matcher_multiplier); -} - -/** - * Sort Key array in ascending order using heap sort - * algorithm. Also sort Index array that is tied to - * the key array. - * @param n Number of elements to sort - * @param ra Key array [1..n] - * @param rb Index array [1..n] - * @return none - */ -void -HeapSort (int n, int ra[], int rb[]) { - int i, rra, rrb; - int l, j, ir; - - l = (n >> 1) + 1; - ir = n; - for (;;) { - if (l > 1) { - rra = ra[--l]; - rrb = rb[l]; - } - else { - rra = ra[ir]; - rrb = rb[ir]; - ra[ir] = ra[1]; - rb[ir] = rb[1]; - if (--ir == 1) { - ra[1] = rra; - rb[1] = rrb; - return; - } - } - i = l; - j = l << 1; - while (j <= ir) { - if (j < ir && ra[j] < ra[j + 1]) - ++j; - if (rra < ra[j]) { - ra[i] = ra[j]; - rb[i] = rb[j]; - j += (i = j); - } - else - j = ir + 1; - } - ra[i] = rra; - rb[i] = rrb; - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intmatcher.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intmatcher.h deleted file mode 100644 index 47c9248d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intmatcher.h +++ /dev/null @@ -1,201 +0,0 @@ -/****************************************************************************** - ** Filename: intmatcher.h - ** Purpose: Interface to high level generic classifier routines. - ** Author: Robert Moss - ** History: Wed Feb 13 15:24:15 MST 1991, RWM, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -#ifndef INTMATCHER_H -#define INTMATCHER_H - -#include "params.h" - -// Character fragments could be present in the trained templaes -// but turned on/off on the language-by-language basis or depending -// on particular properties of the corpus (e.g. when we expect the -// images to have low exposure). -extern BOOL_VAR_H(disable_character_fragments, FALSE, - "Do not include character fragments in the" - " results of the classifier"); - -extern INT_VAR_H(classify_integer_matcher_multiplier, 10, - "Integer Matcher Multiplier 0-255: "); - - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "intproto.h" -#include "cutoffs.h" - -namespace tesseract { -struct UnicharRating; -} - -struct CP_RESULT_STRUCT { - CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {} - - float Rating; - CLASS_ID Class; -}; - -/*---------------------------------------------------------------------------- - Variables ------------------------------------------------------------------------------*/ - -extern INT_VAR_H(classify_adapt_proto_thresh, 230, - "Threshold for good protos during adaptive 0-255: "); - -extern INT_VAR_H(classify_adapt_feature_thresh, 230, - "Threshold for good features during adaptive 0-255: "); - -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ - -#define SE_TABLE_BITS 9 -#define SE_TABLE_SIZE 512 - -struct ScratchEvidence { - uint8_t feature_evidence_[MAX_NUM_CONFIGS]; - int sum_feature_evidence_[MAX_NUM_CONFIGS]; - uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]; - - void Clear(const INT_CLASS class_template); - void ClearFeatureEvidence(const INT_CLASS class_template); - void NormalizeSums(INT_CLASS ClassTemplate, int16_t NumFeatures, - int32_t used_features); - void UpdateSumOfProtoEvidences( - INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, int16_t NumFeatures); -}; - - -class IntegerMatcher { - public: - // Integer Matcher Theta Fudge (0-255). - static const int kIntThetaFudge = 128; - // Bits in Similarity to Evidence Lookup (8-9). - static const int kEvidenceTableBits = 9; - // Integer Evidence Truncation Bits (8-14). - static const int kIntEvidenceTruncBits = 14; - // Similarity to Evidence Table Exponential Multiplier. - static const float kSEExponentialMultiplier; - // Center of Similarity Curve. - static const float kSimilarityCenter; - - IntegerMatcher(tesseract::IntParam *classify_debug_level); - - void Match(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - int16_t NumFeatures, - const INT_FEATURE_STRUCT* Features, - tesseract::UnicharRating* Result, - int AdaptFeatureThreshold, - int Debug, - bool SeparateDebugWindows); - - // Applies the CN normalization factor to the given rating and returns - // the modified rating. - float ApplyCNCorrection(float rating, int blob_length, - int normalization_factor, int matcher_multiplier); - - int FindGoodProtos(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - uint16_t BlobLength, - int16_t NumFeatures, - INT_FEATURE_ARRAY Features, - PROTO_ID *ProtoArray, - int AdaptProtoThreshold, - int Debug); - - int FindBadFeatures(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - uint16_t BlobLength, - int16_t NumFeatures, - INT_FEATURE_ARRAY Features, - FEATURE_ID *FeatureArray, - int AdaptFeatureThreshold, - int Debug); - - private: - int UpdateTablesForFeature( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - int FeatureNum, - const INT_FEATURE_STRUCT* Feature, - ScratchEvidence *evidence, - int Debug); - - int FindBestMatch(INT_CLASS ClassTemplate, - const ScratchEvidence &tables, - tesseract::UnicharRating* Result); - -#ifndef GRAPHICS_DISABLED - void DebugFeatureProtoError( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - const ScratchEvidence &tables, - int16_t NumFeatures, - int Debug); - - void DisplayProtoDebugInfo( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - const ScratchEvidence &tables, - bool SeparateDebugWindows); - - void DisplayFeatureDebugInfo( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - int16_t NumFeatures, - const INT_FEATURE_STRUCT* Features, - int AdaptFeatureThreshold, - int Debug, - bool SeparateDebugWindows); -#endif - - private: - tesseract::IntParam *classify_debug_level_; - uint8_t similarity_evidence_table_[SE_TABLE_SIZE]; - uint32_t evidence_table_mask_; - uint32_t mult_trunc_shift_bits_; - uint32_t table_trunc_shift_bits_; - uint32_t evidence_mult_mask_; -}; - -/**---------------------------------------------------------------------------- - Private Function Prototypes -----------------------------------------------------------------------------**/ -void IMDebugConfiguration(INT_FEATURE FeatureNum, - uint16_t ActualProtoNum, - uint8_t Evidence, - BIT_VECTOR ConfigMask, - uint32_t ConfigWord); - -void IMDebugConfigurationSum(INT_FEATURE FeatureNum, - uint8_t *FeatureEvidence, - int32_t ConfigCount); - -void HeapSort (int n, int ra[], int rb[]); - -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intproto.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intproto.cpp deleted file mode 100644 index a34d6fd5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intproto.cpp +++ /dev/null @@ -1,1792 +0,0 @@ -/****************************************************************************** - ** Filename: intproto.c - ** Purpose: Definition of data structures for integer protos. - ** Author: Dan Johnson - ** History: Thu Feb 7 14:38:16 1991, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -/*----------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ - -#include -#include // for std::floor -#include -#include - -#include "classify.h" -#include "callcpp.h" // for cprintf -#include "emalloc.h" -#include "fontinfo.h" -#include "genericvector.h" -#include "globals.h" -#include "helpers.h" -#include "intproto.h" -#include "mfoutline.h" -#include "picofeat.h" -#include "points.h" -#include "shapetable.h" -#include "svmnode.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -using tesseract::FontSet; - -/* match debug display constants*/ -#define PROTO_PRUNER_SCALE (4.0) - -#define INT_DESCENDER (0.0 * INT_CHAR_NORM_RANGE) -#define INT_BASELINE (0.25 * INT_CHAR_NORM_RANGE) -#define INT_XHEIGHT (0.75 * INT_CHAR_NORM_RANGE) -#define INT_CAPHEIGHT (1.0 * INT_CHAR_NORM_RANGE) - -#define INT_XCENTER (0.5 * INT_CHAR_NORM_RANGE) -#define INT_YCENTER (0.5 * INT_CHAR_NORM_RANGE) -#define INT_XRADIUS (0.2 * INT_CHAR_NORM_RANGE) -#define INT_YRADIUS (0.2 * INT_CHAR_NORM_RANGE) -#define INT_MIN_X 0 -#define INT_MIN_Y 0 -#define INT_MAX_X INT_CHAR_NORM_RANGE -#define INT_MAX_Y INT_CHAR_NORM_RANGE - -/** define pad used to snap near horiz/vertical protos to horiz/vertical */ -#define HV_TOLERANCE (0.0025) /* approx 0.9 degrees */ - -typedef enum -{ StartSwitch, EndSwitch, LastSwitch } -SWITCH_TYPE; -#define MAX_NUM_SWITCHES 3 - -typedef struct -{ - SWITCH_TYPE Type; - int8_t X, Y; - int16_t YInit; - int16_t Delta; -} - - -FILL_SWITCH; - -typedef struct -{ - uint8_t NextSwitch; - uint8_t AngleStart, AngleEnd; - int8_t X; - int16_t YStart, YEnd; - int16_t StartDelta, EndDelta; - FILL_SWITCH Switch[MAX_NUM_SWITCHES]; -} - - -TABLE_FILLER; - -typedef struct -{ - int8_t X; - int8_t YStart, YEnd; - uint8_t AngleStart, AngleEnd; -} - - -FILL_SPEC; - - -/* constants for conversion from old inttemp format */ -#define OLD_MAX_NUM_CONFIGS 32 -#define OLD_WERDS_PER_CONFIG_VEC ((OLD_MAX_NUM_CONFIGS + BITS_PER_WERD - 1) /\ - BITS_PER_WERD) - -/*----------------------------------------------------------------------------- - Macros ------------------------------------------------------------------------------*/ -/** macro for performing circular increments of bucket indices */ -#define CircularIncrement(i,r) (((i) < (r) - 1)?((i)++):((i) = 0)) - -/** macro for mapping floats to ints without bounds checking */ -#define MapParam(P,O,N) (std::floor(((P) + (O)) * (N))) - -/*--------------------------------------------------------------------------- - Private Function Prototypes -----------------------------------------------------------------------------*/ -float BucketStart(int Bucket, float Offset, int NumBuckets); - -float BucketEnd(int Bucket, float Offset, int NumBuckets); - -void DoFill(FILL_SPEC *FillSpec, - CLASS_PRUNER_STRUCT* Pruner, - uint32_t ClassMask, - uint32_t ClassCount, - uint32_t WordIndex); - -bool FillerDone(TABLE_FILLER* Filler); - -void FillPPCircularBits(uint32_t - ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], - int Bit, float Center, float Spread, bool debug); - -void FillPPLinearBits(uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], - int Bit, float Center, float Spread, bool debug); - -void GetCPPadsForLevel(int Level, - float *EndPad, - float *SidePad, - float *AnglePad); - -ScrollView::Color GetMatchColorFor(float Evidence); - -void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill); - -void InitTableFiller(float EndPad, - float SidePad, - float AnglePad, - PROTO Proto, - TABLE_FILLER *Filler); - -#ifndef GRAPHICS_DISABLED -void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature, - ScrollView::Color color); - -void RenderIntProto(ScrollView *window, - INT_CLASS Class, - PROTO_ID ProtoId, - ScrollView::Color color); -#endif // GRAPHICS_DISABLED - -int TruncateParam(float Param, int Min, int Max, char *Id); - -/*----------------------------------------------------------------------------- - Global Data Definitions and Declarations ------------------------------------------------------------------------------*/ - -/* global display lists used to display proto and feature match information*/ -ScrollView *IntMatchWindow = nullptr; -ScrollView *FeatureDisplayWindow = nullptr; -ScrollView *ProtoDisplayWindow = nullptr; - -/*----------------------------------------------------------------------------- - Variables ------------------------------------------------------------------------------*/ - -/* control knobs */ -INT_VAR(classify_num_cp_levels, 3, "Number of Class Pruner Levels"); -double_VAR(classify_cp_angle_pad_loose, 45.0, - "Class Pruner Angle Pad Loose"); -double_VAR(classify_cp_angle_pad_medium, 20.0, - "Class Pruner Angle Pad Medium"); -double_VAR(classify_cp_angle_pad_tight, 10.0, - "CLass Pruner Angle Pad Tight"); -double_VAR(classify_cp_end_pad_loose, 0.5, "Class Pruner End Pad Loose"); -double_VAR(classify_cp_end_pad_medium, 0.5, "Class Pruner End Pad Medium"); -double_VAR(classify_cp_end_pad_tight, 0.5, "Class Pruner End Pad Tight"); -double_VAR(classify_cp_side_pad_loose, 2.5, "Class Pruner Side Pad Loose"); -double_VAR(classify_cp_side_pad_medium, 1.2, "Class Pruner Side Pad Medium"); -double_VAR(classify_cp_side_pad_tight, 0.6, "Class Pruner Side Pad Tight"); -double_VAR(classify_pp_angle_pad, 45.0, "Proto Pruner Angle Pad"); -double_VAR(classify_pp_end_pad, 0.5, "Proto Prune End Pad"); -double_VAR(classify_pp_side_pad, 2.5, "Proto Pruner Side Pad"); - -/*----------------------------------------------------------------------------- - Public Code ------------------------------------------------------------------------------*/ -/// Builds a feature from an FCOORD for position with all the necessary -/// clipping and rounding. -INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(const FCOORD& pos, uint8_t theta) - : X(ClipToRange(static_cast(pos.x() + 0.5), 0, 255)), - Y(ClipToRange(static_cast(pos.y() + 0.5), 0, 255)), - Theta(theta), - CP_misses(0) { -} -/** Builds a feature from ints with all the necessary clipping and casting. */ -INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(int x, int y, int theta) - : X(static_cast(ClipToRange(x, 0, UINT8_MAX))), - Y(static_cast(ClipToRange(y, 0, UINT8_MAX))), - Theta(static_cast(ClipToRange(theta, 0, UINT8_MAX))), - CP_misses(0) { -} - -/** - * This routine adds a new class structure to a set of - * templates. Classes have to be added to Templates in - * the order of increasing ClassIds. - * - * @param Templates templates to add new class to - * @param ClassId class id to associate new class with - * @param Class class data structure to add to templates - * - * Globals: none - */ -void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class) { - int Pruner; - - assert (LegalClassId (ClassId)); - if (ClassId != Templates->NumClasses) { - fprintf(stderr, "Please make sure that classes are added to templates"); - fprintf(stderr, " in increasing order of ClassIds\n"); - exit(1); - } - ClassForClassId (Templates, ClassId) = Class; - Templates->NumClasses++; - - if (Templates->NumClasses > MaxNumClassesIn (Templates)) { - Pruner = Templates->NumClassPruners++; - Templates->ClassPruners[Pruner] = new CLASS_PRUNER_STRUCT; - memset(Templates->ClassPruners[Pruner], 0, sizeof(CLASS_PRUNER_STRUCT)); - } -} /* AddIntClass */ - - -/** - * This routine returns the index of the next free config - * in Class. - * - * @param Class class to add new configuration to - * - * Globals: none - * - * @return Index of next free config. - */ -int AddIntConfig(INT_CLASS Class) { - int Index; - - assert(Class->NumConfigs < MAX_NUM_CONFIGS); - - Index = Class->NumConfigs++; - Class->ConfigLengths[Index] = 0; - return Index; -} /* AddIntConfig */ - - -/** - * This routine allocates the next free proto in Class and - * returns its index. - * - * @param Class class to add new proto to - * - * Globals: none - * - * @return Proto index of new proto. - */ -int AddIntProto(INT_CLASS Class) { - int Index; - int ProtoSetId; - PROTO_SET ProtoSet; - INT_PROTO Proto; - uint32_t *Word; - - if (Class->NumProtos >= MAX_NUM_PROTOS) - return (NO_PROTO); - - Index = Class->NumProtos++; - - if (Class->NumProtos > MaxNumIntProtosIn(Class)) { - ProtoSetId = Class->NumProtoSets++; - - ProtoSet = (PROTO_SET) Emalloc(sizeof(PROTO_SET_STRUCT)); - Class->ProtoSets[ProtoSetId] = ProtoSet; - memset(ProtoSet, 0, sizeof(*ProtoSet)); - - /* reallocate space for the proto lengths and install in class */ - Class->ProtoLengths = - (uint8_t *)Erealloc(Class->ProtoLengths, - MaxNumIntProtosIn(Class) * sizeof(uint8_t)); - memset(&Class->ProtoLengths[Index], 0, - sizeof(*Class->ProtoLengths) * (MaxNumIntProtosIn(Class) - Index)); - } - - /* initialize proto so its length is zero and it isn't in any configs */ - Class->ProtoLengths[Index] = 0; - Proto = ProtoForProtoId (Class, Index); - for (Word = Proto->Configs; - Word < Proto->Configs + WERDS_PER_CONFIG_VEC; *Word++ = 0); - - return (Index); -} - -/** - * This routine adds Proto to the class pruning tables - * for the specified class in Templates. - * - * Globals: - * - classify_num_cp_levels number of levels used in the class pruner - * @param Proto floating-pt proto to add to class pruner - * @param ClassId class id corresponding to Proto - * @param Templates set of templates containing class pruner - * @return none - */ -void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId, - INT_TEMPLATES Templates) -#define MAX_LEVEL 2 -{ - CLASS_PRUNER_STRUCT* Pruner; - uint32_t ClassMask; - uint32_t ClassCount; - uint32_t WordIndex; - int Level; - float EndPad, SidePad, AnglePad; - TABLE_FILLER TableFiller; - FILL_SPEC FillSpec; - - Pruner = CPrunerFor (Templates, ClassId); - WordIndex = CPrunerWordIndexFor (ClassId); - ClassMask = CPrunerMaskFor (MAX_LEVEL, ClassId); - - for (Level = classify_num_cp_levels - 1; Level >= 0; Level--) { - GetCPPadsForLevel(Level, &EndPad, &SidePad, &AnglePad); - ClassCount = CPrunerMaskFor (Level, ClassId); - InitTableFiller(EndPad, SidePad, AnglePad, Proto, &TableFiller); - - while (!FillerDone (&TableFiller)) { - GetNextFill(&TableFiller, &FillSpec); - DoFill(&FillSpec, Pruner, ClassMask, ClassCount, WordIndex); - } - } -} /* AddProtoToClassPruner */ - -/** - * This routine updates the proto pruner lookup tables - * for Class to include a new proto identified by ProtoId - * and described by Proto. - * @param Proto floating-pt proto to be added to proto pruner - * @param ProtoId id of proto - * @param Class integer class that contains desired proto pruner - * @param debug debug flag - * @note Globals: none - * @return none - */ -void AddProtoToProtoPruner(PROTO Proto, int ProtoId, - INT_CLASS Class, bool debug) { - float Angle, X, Y, Length; - float Pad; - int Index; - PROTO_SET ProtoSet; - - if (ProtoId >= Class->NumProtos) - cprintf("AddProtoToProtoPruner:assert failed: %d < %d", - ProtoId, Class->NumProtos); - assert(ProtoId < Class->NumProtos); - - Index = IndexForProto (ProtoId); - ProtoSet = Class->ProtoSets[SetForProto (ProtoId)]; - - Angle = Proto->Angle; -#ifndef _WIN32 - assert(!std::isnan(Angle)); -#endif - - FillPPCircularBits (ProtoSet->ProtoPruner[PRUNER_ANGLE], Index, - Angle + ANGLE_SHIFT, classify_pp_angle_pad / 360.0, - debug); - - Angle *= 2.0 * M_PI; - Length = Proto->Length; - - X = Proto->X + X_SHIFT; - Pad = std::max(fabs (cos (Angle)) * (Length / 2.0 + - classify_pp_end_pad * - GetPicoFeatureLength ()), - fabs (sin (Angle)) * (classify_pp_side_pad * - GetPicoFeatureLength ())); - - FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_X], Index, X, Pad, debug); - - Y = Proto->Y + Y_SHIFT; - Pad = std::max(fabs (sin (Angle)) * (Length / 2.0 + - classify_pp_end_pad * - GetPicoFeatureLength ()), - fabs (cos (Angle)) * (classify_pp_side_pad * - GetPicoFeatureLength ())); - - FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad, debug); -} /* AddProtoToProtoPruner */ - -/** - * Returns a quantized bucket for the given param shifted by offset, - * notionally (param + offset) * num_buckets, but clipped and casted to the - * appropriate type. - */ -uint8_t Bucket8For(float param, float offset, int num_buckets) { - int bucket = IntCastRounded(MapParam(param, offset, num_buckets)); - return static_cast(ClipToRange(bucket, 0, num_buckets - 1)); -} -uint16_t Bucket16For(float param, float offset, int num_buckets) { - int bucket = IntCastRounded(MapParam(param, offset, num_buckets)); - return static_cast(ClipToRange(bucket, 0, num_buckets - 1)); -} - -/** - * Returns a quantized bucket for the given circular param shifted by offset, - * notionally (param + offset) * num_buckets, but modded and casted to the - * appropriate type. - */ -uint8_t CircBucketFor(float param, float offset, int num_buckets) { - int bucket = IntCastRounded(MapParam(param, offset, num_buckets)); - return static_cast(Modulo(bucket, num_buckets)); -} /* CircBucketFor */ - - -#ifndef GRAPHICS_DISABLED -/** - * This routine clears the global feature and proto - * display lists. - * - * Globals: - * - FeatureShapes display list for features - * - ProtoShapes display list for protos - * @return none - */ -void UpdateMatchDisplay() { - if (IntMatchWindow != nullptr) - IntMatchWindow->Update(); -} /* ClearMatchDisplay */ -#endif - -/** - * This operation updates the config vectors of all protos - * in Class to indicate that the protos with 1's in Config - * belong to a new configuration identified by ConfigId. - * It is assumed that the length of the Config bit vector is - * equal to the number of protos in Class. - * @param Config config to be added to class - * @param ConfigId id to be used for new config - * @param Class class to add new config to - * @return none - * @note Globals: none - */ -void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class) { - int ProtoId; - INT_PROTO Proto; - int TotalLength; - - for (ProtoId = 0, TotalLength = 0; - ProtoId < Class->NumProtos; ProtoId++) { - if (test_bit(Config, ProtoId)) { - Proto = ProtoForProtoId(Class, ProtoId); - SET_BIT(Proto->Configs, ConfigId); - TotalLength += Class->ProtoLengths[ProtoId]; - } - } - Class->ConfigLengths[ConfigId] = TotalLength; -} /* ConvertConfig */ - - -namespace tesseract { -/** - * This routine converts Proto to integer format and - * installs it as ProtoId in Class. - * @param Proto floating-pt proto to be converted to integer format - * @param ProtoId id of proto - * @param Class integer class to add converted proto to - * @return none - * @note Globals: none - */ -void Classify::ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) { - INT_PROTO P; - float Param; - - assert(ProtoId < Class->NumProtos); - - P = ProtoForProtoId(Class, ProtoId); - - Param = Proto->A * 128; - P->A = TruncateParam(Param, -128, 127, nullptr); - - Param = -Proto->B * 256; - P->B = TruncateParam(Param, 0, 255, nullptr); - - Param = Proto->C * 128; - P->C = TruncateParam(Param, -128, 127, nullptr); - - Param = Proto->Angle * 256; - if (Param < 0 || Param >= 256) - P->Angle = 0; - else - P->Angle = (uint8_t) Param; - - /* round proto length to nearest integer number of pico-features */ - Param = (Proto->Length / GetPicoFeatureLength()) + 0.5; - Class->ProtoLengths[ProtoId] = TruncateParam(Param, 1, 255, nullptr); - if (classify_learning_debug_level >= 2) - cprintf("Converted ffeat to (A=%d,B=%d,C=%d,L=%d)", - P->A, P->B, P->C, Class->ProtoLengths[ProtoId]); -} /* ConvertProto */ - -/** - * This routine converts from the old floating point format - * to the new integer format. - * @param FloatProtos prototypes in old floating pt format - * @param target_unicharset the UNICHARSET to use - * @return New set of training templates in integer format. - * @note Globals: none - */ -INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos, - const UNICHARSET& - target_unicharset) { - INT_TEMPLATES IntTemplates; - CLASS_TYPE FClass; - INT_CLASS IClass; - int ClassId; - int ProtoId; - int ConfigId; - - IntTemplates = NewIntTemplates(); - - for (ClassId = 0; ClassId < target_unicharset.size(); ClassId++) { - FClass = &(FloatProtos[ClassId]); - if (FClass->NumProtos == 0 && FClass->NumConfigs == 0 && - strcmp(target_unicharset.id_to_unichar(ClassId), " ") != 0) { - cprintf("Warning: no protos/configs for %s in CreateIntTemplates()\n", - target_unicharset.id_to_unichar(ClassId)); - } - assert(UnusedClassIdIn(IntTemplates, ClassId)); - IClass = NewIntClass(FClass->NumProtos, FClass->NumConfigs); - FontSet fs; - fs.size = FClass->font_set.size(); - fs.configs = new int[fs.size]; - for (int i = 0; i < fs.size; ++i) { - fs.configs[i] = FClass->font_set.get(i); - } - if (this->fontset_table_.contains(fs)) { - IClass->font_set_id = this->fontset_table_.get_id(fs); - delete[] fs.configs; - } else { - IClass->font_set_id = this->fontset_table_.push_back(fs); - } - AddIntClass(IntTemplates, ClassId, IClass); - - for (ProtoId = 0; ProtoId < FClass->NumProtos; ProtoId++) { - AddIntProto(IClass); - ConvertProto(ProtoIn(FClass, ProtoId), ProtoId, IClass); - AddProtoToProtoPruner(ProtoIn(FClass, ProtoId), ProtoId, IClass, - classify_learning_debug_level >= 2); - AddProtoToClassPruner(ProtoIn(FClass, ProtoId), ClassId, IntTemplates); - } - - for (ConfigId = 0; ConfigId < FClass->NumConfigs; ConfigId++) { - AddIntConfig(IClass); - ConvertConfig(FClass->Configurations[ConfigId], ConfigId, IClass); - } - } - return (IntTemplates); -} /* CreateIntTemplates */ -} // namespace tesseract - - -#ifndef GRAPHICS_DISABLED -/** - * This routine renders the specified feature into a - * global display list. - * - * Globals: - * - FeatureShapes global display list for features - * @param Feature pico-feature to be displayed - * @param Evidence best evidence for this feature (0-1) - * @return none - */ -void DisplayIntFeature(const INT_FEATURE_STRUCT *Feature, float Evidence) { - ScrollView::Color color = GetMatchColorFor(Evidence); - RenderIntFeature(IntMatchWindow, Feature, color); - if (FeatureDisplayWindow) { - RenderIntFeature(FeatureDisplayWindow, Feature, color); - } -} /* DisplayIntFeature */ - -/** - * This routine renders the specified proto into a - * global display list. - * - * Globals: - * - ProtoShapes global display list for protos - * @param Class class to take proto from - * @param ProtoId id of proto in Class to be displayed - * @param Evidence total evidence for proto (0-1) - * @return none - */ -void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, float Evidence) { - ScrollView::Color color = GetMatchColorFor(Evidence); - RenderIntProto(IntMatchWindow, Class, ProtoId, color); - if (ProtoDisplayWindow) { - RenderIntProto(ProtoDisplayWindow, Class, ProtoId, color); - } -} /* DisplayIntProto */ -#endif - -/** - * This routine creates a new integer class data structure - * and returns it. Sufficient space is allocated - * to handle the specified number of protos and configs. - * @param MaxNumProtos number of protos to allocate space for - * @param MaxNumConfigs number of configs to allocate space for - * @return New class created. - * @note Globals: none - */ -INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) { - INT_CLASS Class; - PROTO_SET ProtoSet; - int i; - - assert(MaxNumConfigs <= MAX_NUM_CONFIGS); - - Class = (INT_CLASS) Emalloc(sizeof(INT_CLASS_STRUCT)); - Class->NumProtoSets = ((MaxNumProtos + PROTOS_PER_PROTO_SET - 1) / - PROTOS_PER_PROTO_SET); - - assert(Class->NumProtoSets <= MAX_NUM_PROTO_SETS); - - Class->NumProtos = 0; - Class->NumConfigs = 0; - - for (i = 0; i < Class->NumProtoSets; i++) { - /* allocate space for a proto set, install in class, and initialize */ - ProtoSet = (PROTO_SET) Emalloc(sizeof(PROTO_SET_STRUCT)); - memset(ProtoSet, 0, sizeof(*ProtoSet)); - Class->ProtoSets[i] = ProtoSet; - - /* allocate space for the proto lengths and install in class */ - } - if (MaxNumIntProtosIn (Class) > 0) { - Class->ProtoLengths = - (uint8_t *)Emalloc(MaxNumIntProtosIn (Class) * sizeof (uint8_t)); - memset(Class->ProtoLengths, 0, - MaxNumIntProtosIn(Class) * sizeof(*Class->ProtoLengths)); - } else { - Class->ProtoLengths = nullptr; - } - memset(Class->ConfigLengths, 0, sizeof(Class->ConfigLengths)); - - return (Class); - -} /* NewIntClass */ - -static void free_int_class(INT_CLASS int_class) { - int i; - - for (i = 0; i < int_class->NumProtoSets; i++) { - Efree (int_class->ProtoSets[i]); - } - if (int_class->ProtoLengths != nullptr) { - Efree (int_class->ProtoLengths); - } - Efree(int_class); -} - -/** - * This routine allocates a new set of integer templates - * initialized to hold 0 classes. - * @return The integer templates created. - * @note Globals: none - */ -INT_TEMPLATES NewIntTemplates() { - INT_TEMPLATES T; - int i; - - T = (INT_TEMPLATES) Emalloc (sizeof (INT_TEMPLATES_STRUCT)); - T->NumClasses = 0; - T->NumClassPruners = 0; - - for (i = 0; i < MAX_NUM_CLASSES; i++) - ClassForClassId (T, i) = nullptr; - - return (T); -} /* NewIntTemplates */ - - -/*---------------------------------------------------------------------------*/ -void free_int_templates(INT_TEMPLATES templates) { - int i; - - for (i = 0; i < templates->NumClasses; i++) - free_int_class(templates->Class[i]); - for (i = 0; i < templates->NumClassPruners; i++) - delete templates->ClassPruners[i]; - Efree(templates); -} - - -namespace tesseract { -/** - * This routine reads a set of integer templates from - * File. File must already be open and must be in the - * correct binary format. - * @param fp open file to read templates from - * @return Pointer to integer templates read from File. - * @note Globals: none - */ -INT_TEMPLATES Classify::ReadIntTemplates(TFile *fp) { - int i, j, w, x, y, z; - int unicharset_size; - int version_id = 0; - INT_TEMPLATES Templates; - CLASS_PRUNER_STRUCT* Pruner; - INT_CLASS Class; - uint8_t *Lengths; - PROTO_SET ProtoSet; - - /* variables for conversion from older inttemp formats */ - int b, bit_number, last_cp_bit_number, new_b, new_i, new_w; - CLASS_ID class_id, max_class_id; - int16_t *IndexFor = new int16_t[MAX_NUM_CLASSES]; - CLASS_ID *ClassIdFor = new CLASS_ID[MAX_NUM_CLASSES]; - CLASS_PRUNER_STRUCT **TempClassPruner = - new CLASS_PRUNER_STRUCT*[MAX_NUM_CLASS_PRUNERS]; - uint32_t SetBitsForMask = // word with NUM_BITS_PER_CLASS - (1 << NUM_BITS_PER_CLASS) - 1; // set starting at bit 0 - uint32_t Mask, NewMask, ClassBits; - int MaxNumConfigs = MAX_NUM_CONFIGS; - int WerdsPerConfigVec = WERDS_PER_CONFIG_VEC; - - /* first read the high level template struct */ - Templates = NewIntTemplates(); - // Read Templates in parts for 64 bit compatibility. - if (fp->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1) != 1) - tprintf("Bad read of inttemp!\n"); - if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses), - 1) != 1 || - fp->FReadEndian(&Templates->NumClassPruners, - sizeof(Templates->NumClassPruners), 1) != 1) - tprintf("Bad read of inttemp!\n"); - if (Templates->NumClasses < 0) { - // This file has a version id! - version_id = -Templates->NumClasses; - if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses), - 1) != 1) - tprintf("Bad read of inttemp!\n"); - } - - if (version_id < 3) { - MaxNumConfigs = OLD_MAX_NUM_CONFIGS; - WerdsPerConfigVec = OLD_WERDS_PER_CONFIG_VEC; - } - - if (version_id < 2) { - if (fp->FReadEndian(IndexFor, sizeof(IndexFor[0]), unicharset_size) != - unicharset_size) { - tprintf("Bad read of inttemp!\n"); - } - if (fp->FReadEndian(ClassIdFor, sizeof(ClassIdFor[0]), - Templates->NumClasses) != Templates->NumClasses) { - tprintf("Bad read of inttemp!\n"); - } - } - - /* then read in the class pruners */ - const int kNumBuckets = - NUM_CP_BUCKETS * NUM_CP_BUCKETS * NUM_CP_BUCKETS * WERDS_PER_CP_VECTOR; - for (i = 0; i < Templates->NumClassPruners; i++) { - Pruner = new CLASS_PRUNER_STRUCT; - if (fp->FReadEndian(Pruner, sizeof(Pruner->p[0][0][0][0]), kNumBuckets) != - kNumBuckets) { - tprintf("Bad read of inttemp!\n"); - } - if (version_id < 2) { - TempClassPruner[i] = Pruner; - } else { - Templates->ClassPruners[i] = Pruner; - } - } - - /* fix class pruners if they came from an old version of inttemp */ - if (version_id < 2) { - // Allocate enough class pruners to cover all the class ids. - max_class_id = 0; - for (i = 0; i < Templates->NumClasses; i++) - if (ClassIdFor[i] > max_class_id) - max_class_id = ClassIdFor[i]; - for (i = 0; i <= CPrunerIdFor(max_class_id); i++) { - Templates->ClassPruners[i] = new CLASS_PRUNER_STRUCT; - memset(Templates->ClassPruners[i], 0, sizeof(CLASS_PRUNER_STRUCT)); - } - // Convert class pruners from the old format (indexed by class index) - // to the new format (indexed by class id). - last_cp_bit_number = NUM_BITS_PER_CLASS * Templates->NumClasses - 1; - for (i = 0; i < Templates->NumClassPruners; i++) { - for (x = 0; x < NUM_CP_BUCKETS; x++) - for (y = 0; y < NUM_CP_BUCKETS; y++) - for (z = 0; z < NUM_CP_BUCKETS; z++) - for (w = 0; w < WERDS_PER_CP_VECTOR; w++) { - if (TempClassPruner[i]->p[x][y][z][w] == 0) - continue; - for (b = 0; b < BITS_PER_WERD; b += NUM_BITS_PER_CLASS) { - bit_number = i * BITS_PER_CP_VECTOR + w * BITS_PER_WERD + b; - if (bit_number > last_cp_bit_number) - break; // the rest of the bits in this word are not used - class_id = ClassIdFor[bit_number / NUM_BITS_PER_CLASS]; - // Single out NUM_BITS_PER_CLASS bits relating to class_id. - Mask = SetBitsForMask << b; - ClassBits = TempClassPruner[i]->p[x][y][z][w] & Mask; - // Move these bits to the new position in which they should - // appear (indexed corresponding to the class_id). - new_i = CPrunerIdFor(class_id); - new_w = CPrunerWordIndexFor(class_id); - new_b = CPrunerBitIndexFor(class_id) * NUM_BITS_PER_CLASS; - if (new_b > b) { - ClassBits <<= (new_b - b); - } else { - ClassBits >>= (b - new_b); - } - // Copy bits relating to class_id to the correct position - // in Templates->ClassPruner. - NewMask = SetBitsForMask << new_b; - Templates->ClassPruners[new_i]->p[x][y][z][new_w] &= ~NewMask; - Templates->ClassPruners[new_i]->p[x][y][z][new_w] |= ClassBits; - } - } - } - for (i = 0; i < Templates->NumClassPruners; i++) { - delete TempClassPruner[i]; - } - } - - /* then read in each class */ - for (i = 0; i < Templates->NumClasses; i++) { - /* first read in the high level struct for the class */ - Class = (INT_CLASS) Emalloc (sizeof (INT_CLASS_STRUCT)); - if (fp->FReadEndian(&Class->NumProtos, sizeof(Class->NumProtos), 1) != 1 || - fp->FRead(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1) != 1 || - fp->FRead(&Class->NumConfigs, sizeof(Class->NumConfigs), 1) != 1) - tprintf("Bad read of inttemp!\n"); - if (version_id == 0) { - // Only version 0 writes 5 pointless pointers to the file. - for (j = 0; j < 5; ++j) { - int32_t junk; - if (fp->FRead(&junk, sizeof(junk), 1) != 1) - tprintf("Bad read of inttemp!\n"); - } - } - int num_configs = version_id < 4 ? MaxNumConfigs : Class->NumConfigs; - ASSERT_HOST(num_configs <= MaxNumConfigs); - if (fp->FReadEndian(Class->ConfigLengths, sizeof(uint16_t), num_configs) != - num_configs) { - tprintf("Bad read of inttemp!\n"); - } - if (version_id < 2) { - ClassForClassId (Templates, ClassIdFor[i]) = Class; - } else { - ClassForClassId (Templates, i) = Class; - } - - /* then read in the proto lengths */ - Lengths = nullptr; - if (MaxNumIntProtosIn (Class) > 0) { - Lengths = (uint8_t *)Emalloc(sizeof(uint8_t) * MaxNumIntProtosIn(Class)); - if (fp->FRead(Lengths, sizeof(uint8_t), MaxNumIntProtosIn(Class)) != - MaxNumIntProtosIn(Class)) - tprintf("Bad read of inttemp!\n"); - } - Class->ProtoLengths = Lengths; - - /* then read in the proto sets */ - for (j = 0; j < Class->NumProtoSets; j++) { - ProtoSet = (PROTO_SET)Emalloc(sizeof(PROTO_SET_STRUCT)); - int num_buckets = NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR; - if (fp->FReadEndian(&ProtoSet->ProtoPruner, - sizeof(ProtoSet->ProtoPruner[0][0][0]), - num_buckets) != num_buckets) - tprintf("Bad read of inttemp!\n"); - for (x = 0; x < PROTOS_PER_PROTO_SET; x++) { - if (fp->FRead(&ProtoSet->Protos[x].A, sizeof(ProtoSet->Protos[x].A), - 1) != 1 || - fp->FRead(&ProtoSet->Protos[x].B, sizeof(ProtoSet->Protos[x].B), - 1) != 1 || - fp->FRead(&ProtoSet->Protos[x].C, sizeof(ProtoSet->Protos[x].C), - 1) != 1 || - fp->FRead(&ProtoSet->Protos[x].Angle, - sizeof(ProtoSet->Protos[x].Angle), 1) != 1) - tprintf("Bad read of inttemp!\n"); - if (fp->FReadEndian(&ProtoSet->Protos[x].Configs, - sizeof(ProtoSet->Protos[x].Configs[0]), - WerdsPerConfigVec) != WerdsPerConfigVec) - cprintf("Bad read of inttemp!\n"); - } - Class->ProtoSets[j] = ProtoSet; - } - if (version_id < 4) { - Class->font_set_id = -1; - } else { - fp->FReadEndian(&Class->font_set_id, sizeof(Class->font_set_id), 1); - } - } - - if (version_id < 2) { - /* add an empty nullptr class with class id 0 */ - assert(UnusedClassIdIn (Templates, 0)); - ClassForClassId (Templates, 0) = NewIntClass (1, 1); - ClassForClassId (Templates, 0)->font_set_id = -1; - Templates->NumClasses++; - /* make sure the classes are contiguous */ - for (i = 0; i < MAX_NUM_CLASSES; i++) { - if (i < Templates->NumClasses) { - if (ClassForClassId (Templates, i) == nullptr) { - fprintf(stderr, "Non-contiguous class ids in inttemp\n"); - exit(1); - } - } else { - if (ClassForClassId (Templates, i) != nullptr) { - fprintf(stderr, "Class id %d exceeds NumClassesIn (Templates) %d\n", - i, Templates->NumClasses); - exit(1); - } - } - } - } - if (version_id >= 4) { - this->fontinfo_table_.read(fp, NewPermanentTessCallback(read_info)); - if (version_id >= 5) { - this->fontinfo_table_.read(fp, - NewPermanentTessCallback(read_spacing_info)); - } - this->fontset_table_.read(fp, NewPermanentTessCallback(read_set)); - } - - // Clean up. - delete[] IndexFor; - delete[] ClassIdFor; - delete[] TempClassPruner; - - return (Templates); -} /* ReadIntTemplates */ - - -#ifndef GRAPHICS_DISABLED -/** - * This routine sends the shapes in the global display - * lists to the match debugger window. - * - * Globals: - * - FeatureShapes display list containing feature matches - * - ProtoShapes display list containing proto matches - * @return none - */ -void Classify::ShowMatchDisplay() { - InitIntMatchWindowIfReqd(); - if (ProtoDisplayWindow) { - ProtoDisplayWindow->Clear(); - } - if (FeatureDisplayWindow) { - FeatureDisplayWindow->Clear(); - } - ClearFeatureSpaceWindow( - static_cast(static_cast(classify_norm_method)), - IntMatchWindow); - IntMatchWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, - INT_MAX_X, INT_MAX_Y); - if (ProtoDisplayWindow) { - ProtoDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, - INT_MAX_X, INT_MAX_Y); - } - if (FeatureDisplayWindow) { - FeatureDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, - INT_MAX_X, INT_MAX_Y); - } -} /* ShowMatchDisplay */ - -/// Clears the given window and draws the featurespace guides for the -/// appropriate normalization method. -void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window) { - window->Clear(); - - window->Pen(ScrollView::GREY); - // Draw the feature space limit rectangle. - window->Rectangle(0, 0, INT_MAX_X, INT_MAX_Y); - if (norm_method == baseline) { - window->SetCursor(0, INT_DESCENDER); - window->DrawTo(INT_MAX_X, INT_DESCENDER); - window->SetCursor(0, INT_BASELINE); - window->DrawTo(INT_MAX_X, INT_BASELINE); - window->SetCursor(0, INT_XHEIGHT); - window->DrawTo(INT_MAX_X, INT_XHEIGHT); - window->SetCursor(0, INT_CAPHEIGHT); - window->DrawTo(INT_MAX_X, INT_CAPHEIGHT); - } else { - window->Rectangle(INT_XCENTER - INT_XRADIUS, INT_YCENTER - INT_YRADIUS, - INT_XCENTER + INT_XRADIUS, INT_YCENTER + INT_YRADIUS); - } -} -#endif - -/** - * This routine writes Templates to File. The format - * is an efficient binary format. File must already be open - * for writing. - * @param File open file to write templates to - * @param Templates templates to save into File - * @param target_unicharset the UNICHARSET to use - * @return none - * @note Globals: none - */ -void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, - const UNICHARSET& target_unicharset) { - int i, j; - INT_CLASS Class; - int unicharset_size = target_unicharset.size(); - int version_id = -5; // When negated by the reader -1 becomes +1 etc. - - if (Templates->NumClasses != unicharset_size) { - cprintf("Warning: executing WriteIntTemplates() with %d classes in" - " Templates, while target_unicharset size is %d\n", - Templates->NumClasses, unicharset_size); - } - - /* first write the high level template struct */ - fwrite(&unicharset_size, sizeof(unicharset_size), 1, File); - fwrite(&version_id, sizeof(version_id), 1, File); - fwrite(&Templates->NumClassPruners, sizeof(Templates->NumClassPruners), - 1, File); - fwrite(&Templates->NumClasses, sizeof(Templates->NumClasses), 1, File); - - /* then write out the class pruners */ - for (i = 0; i < Templates->NumClassPruners; i++) - fwrite(Templates->ClassPruners[i], - sizeof(CLASS_PRUNER_STRUCT), 1, File); - - /* then write out each class */ - for (i = 0; i < Templates->NumClasses; i++) { - Class = Templates->Class[i]; - - /* first write out the high level struct for the class */ - fwrite(&Class->NumProtos, sizeof(Class->NumProtos), 1, File); - fwrite(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File); - ASSERT_HOST(Class->NumConfigs == this->fontset_table_.get(Class->font_set_id).size); - fwrite(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File); - for (j = 0; j < Class->NumConfigs; ++j) { - fwrite(&Class->ConfigLengths[j], sizeof(uint16_t), 1, File); - } - - /* then write out the proto lengths */ - if (MaxNumIntProtosIn (Class) > 0) { - fwrite(Class->ProtoLengths, sizeof(uint8_t), - MaxNumIntProtosIn(Class), File); - } - - /* then write out the proto sets */ - for (j = 0; j < Class->NumProtoSets; j++) - fwrite(Class->ProtoSets[j], sizeof(PROTO_SET_STRUCT), 1, File); - - /* then write the fonts info */ - fwrite(&Class->font_set_id, sizeof(int), 1, File); - } - - /* Write the fonts info tables */ - this->fontinfo_table_.write(File, NewPermanentTessCallback(write_info)); - this->fontinfo_table_.write(File, - NewPermanentTessCallback(write_spacing_info)); - this->fontset_table_.write(File, NewPermanentTessCallback(write_set)); -} /* WriteIntTemplates */ -} // namespace tesseract - - -/*----------------------------------------------------------------------------- - Private Code ------------------------------------------------------------------------------*/ -/** - * This routine returns the parameter value which - * corresponds to the beginning of the specified bucket. - * The bucket number should have been generated using the - * BucketFor() function with parameters Offset and NumBuckets. - * @param Bucket bucket whose start is to be computed - * @param Offset offset used to map params to buckets - * @param NumBuckets total number of buckets - * @return Param value corresponding to start position of Bucket. - * @note Globals: none - */ -float BucketStart(int Bucket, float Offset, int NumBuckets) { - return (((float) Bucket / NumBuckets) - Offset); - -} /* BucketStart */ - -/** - * This routine returns the parameter value which - * corresponds to the end of the specified bucket. - * The bucket number should have been generated using the - * BucketFor() function with parameters Offset and NumBuckets. - * @param Bucket bucket whose end is to be computed - * @param Offset offset used to map params to buckets - * @param NumBuckets total number of buckets - * @return Param value corresponding to end position of Bucket. - * @note Globals: none - */ -float BucketEnd(int Bucket, float Offset, int NumBuckets) { - return (((float) (Bucket + 1) / NumBuckets) - Offset); -} /* BucketEnd */ - -/** - * This routine fills in the section of a class pruner - * corresponding to a single x value for a single proto of - * a class. - * @param FillSpec specifies which bits to fill in pruner - * @param Pruner class pruner to be filled - * @param ClassMask indicates which bits to change in each word - * @param ClassCount indicates what to change bits to - * @param WordIndex indicates which word to change - * @return none - * @note Globals: none - */ -void DoFill(FILL_SPEC *FillSpec, - CLASS_PRUNER_STRUCT* Pruner, - uint32_t ClassMask, - uint32_t ClassCount, - uint32_t WordIndex) { - int X, Y, Angle; - uint32_t OldWord; - - X = FillSpec->X; - if (X < 0) - X = 0; - if (X >= NUM_CP_BUCKETS) - X = NUM_CP_BUCKETS - 1; - - if (FillSpec->YStart < 0) - FillSpec->YStart = 0; - if (FillSpec->YEnd >= NUM_CP_BUCKETS) - FillSpec->YEnd = NUM_CP_BUCKETS - 1; - - for (Y = FillSpec->YStart; Y <= FillSpec->YEnd; Y++) - for (Angle = FillSpec->AngleStart; - TRUE; CircularIncrement (Angle, NUM_CP_BUCKETS)) { - OldWord = Pruner->p[X][Y][Angle][WordIndex]; - if (ClassCount > (OldWord & ClassMask)) { - OldWord &= ~ClassMask; - OldWord |= ClassCount; - Pruner->p[X][Y][Angle][WordIndex] = OldWord; - } - if (Angle == FillSpec->AngleEnd) - break; - } -} /* DoFill */ - -/** - * Return TRUE if the specified table filler is done, i.e. - * if it has no more lines to fill. - * @param Filler table filler to check if done - * @return TRUE if no more lines to fill, FALSE otherwise. - * @note Globals: none - */ -bool FillerDone(TABLE_FILLER* Filler) { - FILL_SWITCH *Next; - - Next = &(Filler->Switch[Filler->NextSwitch]); - - return Filler->X > Next->X && Next->Type == LastSwitch; - -} /* FillerDone */ - -/** - * This routine sets Bit in each bit vector whose - * bucket lies within the range Center +- Spread. The fill - * is done for a circular dimension, i.e. bucket 0 is adjacent - * to the last bucket. It is assumed that Center and Spread - * are expressed in a circular coordinate system whose range - * is 0 to 1. - * @param ParamTable table of bit vectors, one per param bucket - * @param Bit bit position in vectors to be filled - * @param Center center of filled area - * @param Spread spread of filled area - * @param debug debug flag - * @return none - * @note Globals: none - */ -void FillPPCircularBits(uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], - int Bit, float Center, float Spread, bool debug) { - int i, FirstBucket, LastBucket; - - if (Spread > 0.5) - Spread = 0.5; - - FirstBucket = static_cast(std::floor((Center - Spread) * NUM_PP_BUCKETS)); - if (FirstBucket < 0) - FirstBucket += NUM_PP_BUCKETS; - - LastBucket = static_cast(std::floor((Center + Spread) * NUM_PP_BUCKETS)); - if (LastBucket >= NUM_PP_BUCKETS) - LastBucket -= NUM_PP_BUCKETS; - if (debug) tprintf("Circular fill from %d to %d", FirstBucket, LastBucket); - for (i = FirstBucket; TRUE; CircularIncrement (i, NUM_PP_BUCKETS)) { - SET_BIT (ParamTable[i], Bit); - - /* exit loop after we have set the bit for the last bucket */ - if (i == LastBucket) - break; - } - -} /* FillPPCircularBits */ - -/** - * This routine sets Bit in each bit vector whose - * bucket lies within the range Center +- Spread. The fill - * is done for a linear dimension, i.e. there is no wrap-around - * for this dimension. It is assumed that Center and Spread - * are expressed in a linear coordinate system whose range - * is approximately 0 to 1. Values outside this range will - * be clipped. - * @param ParamTable table of bit vectors, one per param bucket - * @param Bit bit number being filled - * @param Center center of filled area - * @param Spread spread of filled area - * @param debug debug flag - * @return none - * @note Globals: none - */ -void FillPPLinearBits(uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], - int Bit, float Center, float Spread, bool debug) { - int i, FirstBucket, LastBucket; - - FirstBucket = static_cast(std::floor((Center - Spread) * NUM_PP_BUCKETS)); - if (FirstBucket < 0) - FirstBucket = 0; - - LastBucket = static_cast(std::floor((Center + Spread) * NUM_PP_BUCKETS)); - if (LastBucket >= NUM_PP_BUCKETS) - LastBucket = NUM_PP_BUCKETS - 1; - - if (debug) tprintf("Linear fill from %d to %d", FirstBucket, LastBucket); - for (i = FirstBucket; i <= LastBucket; i++) - SET_BIT (ParamTable[i], Bit); - -} /* FillPPLinearBits */ - - -/*---------------------------------------------------------------------------*/ -#ifndef GRAPHICS_DISABLED -namespace tesseract { -/** - * This routine prompts the user with Prompt and waits - * for the user to enter something in the debug window. - * @param Prompt prompt to print while waiting for input from window - * @param adaptive_on - * @param pretrained_on - * @param shape_id - * @return Character entered in the debug window. - * @note Globals: none - */ -CLASS_ID Classify::GetClassToDebug(const char *Prompt, bool* adaptive_on, - bool* pretrained_on, int* shape_id) { - tprintf("%s\n", Prompt); - SVEvent* ev; - SVEventType ev_type; - int unichar_id = INVALID_UNICHAR_ID; - // Wait until a click or popup event. - do { - ev = IntMatchWindow->AwaitEvent(SVET_ANY); - ev_type = ev->type; - if (ev_type == SVET_POPUP) { - if (ev->command_id == IDA_SHAPE_INDEX) { - if (shape_table_ != nullptr) { - *shape_id = atoi(ev->parameter); - *adaptive_on = false; - *pretrained_on = true; - if (*shape_id >= 0 && *shape_id < shape_table_->NumShapes()) { - int font_id; - shape_table_->GetFirstUnicharAndFont(*shape_id, &unichar_id, - &font_id); - tprintf("Shape %d, first unichar=%d, font=%d\n", - *shape_id, unichar_id, font_id); - return unichar_id; - } - tprintf("Shape index '%s' not found in shape table\n", ev->parameter); - } else { - tprintf("No shape table loaded!\n"); - } - } else { - if (unicharset.contains_unichar(ev->parameter)) { - unichar_id = unicharset.unichar_to_id(ev->parameter); - if (ev->command_id == IDA_ADAPTIVE) { - *adaptive_on = true; - *pretrained_on = false; - *shape_id = -1; - } else if (ev->command_id == IDA_STATIC) { - *adaptive_on = false; - *pretrained_on = true; - } else { - *adaptive_on = true; - *pretrained_on = true; - } - if (ev->command_id == IDA_ADAPTIVE || shape_table_ == nullptr) { - *shape_id = -1; - return unichar_id; - } - for (int s = 0; s < shape_table_->NumShapes(); ++s) { - if (shape_table_->GetShape(s).ContainsUnichar(unichar_id)) { - tprintf("%s\n", shape_table_->DebugStr(s).string()); - } - } - } else { - tprintf("Char class '%s' not found in unicharset", - ev->parameter); - } - } - } - delete ev; - } while (ev_type != SVET_CLICK); - return 0; -} /* GetClassToDebug */ - -} // namespace tesseract -#endif - -/** - * This routine copies the appropriate global pad variables - * into EndPad, SidePad, and AnglePad. This is a kludge used - * to get around the fact that global control variables cannot - * be arrays. If the specified level is illegal, the tightest - * possible pads are returned. - * @param Level "tightness" level to return pads for - * @param EndPad place to put end pad for Level - * @param SidePad place to put side pad for Level - * @param AnglePad place to put angle pad for Level - * @return none (results are returned in EndPad, SidePad, and AnglePad. - * @note Globals: none - */ -void GetCPPadsForLevel(int Level, - float *EndPad, - float *SidePad, - float *AnglePad) { - switch (Level) { - case 0: - *EndPad = classify_cp_end_pad_loose * GetPicoFeatureLength (); - *SidePad = classify_cp_side_pad_loose * GetPicoFeatureLength (); - *AnglePad = classify_cp_angle_pad_loose / 360.0; - break; - - case 1: - *EndPad = classify_cp_end_pad_medium * GetPicoFeatureLength (); - *SidePad = classify_cp_side_pad_medium * GetPicoFeatureLength (); - *AnglePad = classify_cp_angle_pad_medium / 360.0; - break; - - case 2: - *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength (); - *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength (); - *AnglePad = classify_cp_angle_pad_tight / 360.0; - break; - - default: - *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength (); - *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength (); - *AnglePad = classify_cp_angle_pad_tight / 360.0; - break; - } - if (*AnglePad > 0.5) - *AnglePad = 0.5; - -} /* GetCPPadsForLevel */ - -/** - * @param Evidence evidence value to return color for - * @return Color which corresponds to specified Evidence value. - * @note Globals: none - */ -ScrollView::Color GetMatchColorFor(float Evidence) { - assert (Evidence >= 0.0); - assert (Evidence <= 1.0); - - if (Evidence >= 0.90) - return ScrollView::WHITE; - else if (Evidence >= 0.75) - return ScrollView::GREEN; - else if (Evidence >= 0.50) - return ScrollView::RED; - else - return ScrollView::BLUE; -} /* GetMatchColorFor */ - -/** - * This routine returns (in Fill) the specification of - * the next line to be filled from Filler. FillerDone() should - * always be called before GetNextFill() to ensure that we - * do not run past the end of the fill table. - * @param Filler filler to get next fill spec from - * @param Fill place to put spec for next fill - * @return none (results are returned in Fill) - * @note Globals: none - */ -void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) { - FILL_SWITCH *Next; - - /* compute the fill assuming no switches will be encountered */ - Fill->AngleStart = Filler->AngleStart; - Fill->AngleEnd = Filler->AngleEnd; - Fill->X = Filler->X; - Fill->YStart = Filler->YStart >> 8; - Fill->YEnd = Filler->YEnd >> 8; - - /* update the fill info and the filler for ALL switches at this X value */ - Next = &(Filler->Switch[Filler->NextSwitch]); - while (Filler->X >= Next->X) { - Fill->X = Filler->X = Next->X; - if (Next->Type == StartSwitch) { - Fill->YStart = Next->Y; - Filler->StartDelta = Next->Delta; - Filler->YStart = Next->YInit; - } - else if (Next->Type == EndSwitch) { - Fill->YEnd = Next->Y; - Filler->EndDelta = Next->Delta; - Filler->YEnd = Next->YInit; - } - else { /* Type must be LastSwitch */ - break; - } - Filler->NextSwitch++; - Next = &(Filler->Switch[Filler->NextSwitch]); - } - - /* prepare the filler for the next call to this routine */ - Filler->X++; - Filler->YStart += Filler->StartDelta; - Filler->YEnd += Filler->EndDelta; - -} /* GetNextFill */ - -/** - * This routine computes a data structure (Filler) - * which can be used to fill in a rectangle surrounding - * the specified Proto. - * - * @param EndPad, SidePad, AnglePad padding to add to proto - * @param Proto proto to create a filler for - * @param Filler place to put table filler - * - * @return none (results are returned in Filler) - * @note Globals: none - */ -void InitTableFiller (float EndPad, float SidePad, - float AnglePad, PROTO Proto, TABLE_FILLER * Filler) -#define XS X_SHIFT -#define YS Y_SHIFT -#define AS ANGLE_SHIFT -#define NB NUM_CP_BUCKETS -{ - float Angle; - float X, Y, HalfLength; - float Cos, Sin; - float XAdjust, YAdjust; - FPOINT Start, Switch1, Switch2, End; - int S1 = 0; - int S2 = 1; - - Angle = Proto->Angle; - X = Proto->X; - Y = Proto->Y; - HalfLength = Proto->Length / 2.0; - - Filler->AngleStart = CircBucketFor(Angle - AnglePad, AS, NB); - Filler->AngleEnd = CircBucketFor(Angle + AnglePad, AS, NB); - Filler->NextSwitch = 0; - - if (fabs (Angle - 0.0) < HV_TOLERANCE || fabs (Angle - 0.5) < HV_TOLERANCE) { - /* horizontal proto - handle as special case */ - Filler->X = Bucket8For(X - HalfLength - EndPad, XS, NB); - Filler->YStart = Bucket16For(Y - SidePad, YS, NB * 256); - Filler->YEnd = Bucket16For(Y + SidePad, YS, NB * 256); - Filler->StartDelta = 0; - Filler->EndDelta = 0; - Filler->Switch[0].Type = LastSwitch; - Filler->Switch[0].X = Bucket8For(X + HalfLength + EndPad, XS, NB); - } else if (fabs(Angle - 0.25) < HV_TOLERANCE || - fabs(Angle - 0.75) < HV_TOLERANCE) { - /* vertical proto - handle as special case */ - Filler->X = Bucket8For(X - SidePad, XS, NB); - Filler->YStart = Bucket16For(Y - HalfLength - EndPad, YS, NB * 256); - Filler->YEnd = Bucket16For(Y + HalfLength + EndPad, YS, NB * 256); - Filler->StartDelta = 0; - Filler->EndDelta = 0; - Filler->Switch[0].Type = LastSwitch; - Filler->Switch[0].X = Bucket8For(X + SidePad, XS, NB); - } else { - /* diagonal proto */ - - if ((Angle > 0.0 && Angle < 0.25) || (Angle > 0.5 && Angle < 0.75)) { - /* rising diagonal proto */ - Angle *= 2.0 * M_PI; - Cos = fabs(cos(Angle)); - Sin = fabs(sin(Angle)); - - /* compute the positions of the corners of the acceptance region */ - Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin; - Start.y = Y - (HalfLength + EndPad) * Sin + SidePad * Cos; - End.x = 2.0 * X - Start.x; - End.y = 2.0 * Y - Start.y; - Switch1.x = X - (HalfLength + EndPad) * Cos + SidePad * Sin; - Switch1.y = Y - (HalfLength + EndPad) * Sin - SidePad * Cos; - Switch2.x = 2.0 * X - Switch1.x; - Switch2.y = 2.0 * Y - Switch1.y; - - if (Switch1.x > Switch2.x) { - S1 = 1; - S2 = 0; - } - - /* translate into bucket positions and deltas */ - Filler->X = Bucket8For(Start.x, XS, NB); - Filler->StartDelta = -(int16_t) ((Cos / Sin) * 256); - Filler->EndDelta = (int16_t) ((Sin / Cos) * 256); - - XAdjust = BucketEnd(Filler->X, XS, NB) - Start.x; - YAdjust = XAdjust * Cos / Sin; - Filler->YStart = Bucket16For(Start.y - YAdjust, YS, NB * 256); - YAdjust = XAdjust * Sin / Cos; - Filler->YEnd = Bucket16For(Start.y + YAdjust, YS, NB * 256); - - Filler->Switch[S1].Type = StartSwitch; - Filler->Switch[S1].X = Bucket8For(Switch1.x, XS, NB); - Filler->Switch[S1].Y = Bucket8For(Switch1.y, YS, NB); - XAdjust = Switch1.x - BucketStart(Filler->Switch[S1].X, XS, NB); - YAdjust = XAdjust * Sin / Cos; - Filler->Switch[S1].YInit = Bucket16For(Switch1.y - YAdjust, YS, NB * 256); - Filler->Switch[S1].Delta = Filler->EndDelta; - - Filler->Switch[S2].Type = EndSwitch; - Filler->Switch[S2].X = Bucket8For(Switch2.x, XS, NB); - Filler->Switch[S2].Y = Bucket8For(Switch2.y, YS, NB); - XAdjust = Switch2.x - BucketStart(Filler->Switch[S2].X, XS, NB); - YAdjust = XAdjust * Cos / Sin; - Filler->Switch[S2].YInit = Bucket16For(Switch2.y + YAdjust, YS, NB * 256); - Filler->Switch[S2].Delta = Filler->StartDelta; - - Filler->Switch[2].Type = LastSwitch; - Filler->Switch[2].X = Bucket8For(End.x, XS, NB); - } else { - /* falling diagonal proto */ - Angle *= 2.0 * M_PI; - Cos = fabs(cos(Angle)); - Sin = fabs(sin(Angle)); - - /* compute the positions of the corners of the acceptance region */ - Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin; - Start.y = Y + (HalfLength + EndPad) * Sin - SidePad * Cos; - End.x = 2.0 * X - Start.x; - End.y = 2.0 * Y - Start.y; - Switch1.x = X - (HalfLength + EndPad) * Cos + SidePad * Sin; - Switch1.y = Y + (HalfLength + EndPad) * Sin + SidePad * Cos; - Switch2.x = 2.0 * X - Switch1.x; - Switch2.y = 2.0 * Y - Switch1.y; - - if (Switch1.x > Switch2.x) { - S1 = 1; - S2 = 0; - } - - /* translate into bucket positions and deltas */ - Filler->X = Bucket8For(Start.x, XS, NB); - Filler->StartDelta = static_cast(ClipToRange( - -IntCastRounded((Sin / Cos) * 256), INT16_MIN, INT16_MAX)); - Filler->EndDelta = static_cast(ClipToRange( - IntCastRounded((Cos / Sin) * 256), INT16_MIN, INT16_MAX)); - - XAdjust = BucketEnd(Filler->X, XS, NB) - Start.x; - YAdjust = XAdjust * Sin / Cos; - Filler->YStart = Bucket16For(Start.y - YAdjust, YS, NB * 256); - YAdjust = XAdjust * Cos / Sin; - Filler->YEnd = Bucket16For(Start.y + YAdjust, YS, NB * 256); - - Filler->Switch[S1].Type = EndSwitch; - Filler->Switch[S1].X = Bucket8For(Switch1.x, XS, NB); - Filler->Switch[S1].Y = Bucket8For(Switch1.y, YS, NB); - XAdjust = Switch1.x - BucketStart(Filler->Switch[S1].X, XS, NB); - YAdjust = XAdjust * Sin / Cos; - Filler->Switch[S1].YInit = Bucket16For(Switch1.y + YAdjust, YS, NB * 256); - Filler->Switch[S1].Delta = Filler->StartDelta; - - Filler->Switch[S2].Type = StartSwitch; - Filler->Switch[S2].X = Bucket8For(Switch2.x, XS, NB); - Filler->Switch[S2].Y = Bucket8For(Switch2.y, YS, NB); - XAdjust = Switch2.x - BucketStart(Filler->Switch[S2].X, XS, NB); - YAdjust = XAdjust * Cos / Sin; - Filler->Switch[S2].YInit = Bucket16For(Switch2.y - YAdjust, YS, NB * 256); - Filler->Switch[S2].Delta = Filler->EndDelta; - - Filler->Switch[2].Type = LastSwitch; - Filler->Switch[2].X = Bucket8For(End.x, XS, NB); - } - } -} /* InitTableFiller */ - - -/*---------------------------------------------------------------------------*/ -#ifndef GRAPHICS_DISABLED -/** - * This routine renders the specified feature into ShapeList. - * @param window to add feature rendering to - * @param Feature feature to be rendered - * @param color color to use for feature rendering - * @return New shape list with rendering of Feature added. - * @note Globals: none - */ -void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature, - ScrollView::Color color) { - float X, Y, Dx, Dy, Length; - - window->Pen(color); - assert(Feature != nullptr); - assert(color != 0); - - X = Feature->X; - Y = Feature->Y; - Length = GetPicoFeatureLength() * 0.7 * INT_CHAR_NORM_RANGE; - // The -PI has no significant effect here, but the value of Theta is computed - // using BinaryAnglePlusPi in intfx.cpp. - Dx = (Length / 2.0) * cos((Feature->Theta / 256.0) * 2.0 * M_PI - M_PI); - Dy = (Length / 2.0) * sin((Feature->Theta / 256.0) * 2.0 * M_PI - M_PI); - - window->SetCursor(X, Y); - window->DrawTo(X + Dx, Y + Dy); -} /* RenderIntFeature */ - -/** - * This routine extracts the parameters of the specified - * proto from the class description and adds a rendering of - * the proto onto the ShapeList. - * - * @param window ScrollView instance - * @param Class class that proto is contained in - * @param ProtoId id of proto to be rendered - * @param color color to render proto in - * - * Globals: none - * - * @return New shape list with a rendering of one proto added. - */ -void RenderIntProto(ScrollView *window, - INT_CLASS Class, - PROTO_ID ProtoId, - ScrollView::Color color) { - PROTO_SET ProtoSet; - INT_PROTO Proto; - int ProtoSetIndex; - int ProtoWordIndex; - float Length; - int Xmin, Xmax, Ymin, Ymax; - float X, Y, Dx, Dy; - uint32_t ProtoMask; - int Bucket; - - assert(ProtoId >= 0); - assert(Class != nullptr); - assert(ProtoId < Class->NumProtos); - assert(color != 0); - window->Pen(color); - - ProtoSet = Class->ProtoSets[SetForProto(ProtoId)]; - ProtoSetIndex = IndexForProto(ProtoId); - Proto = &(ProtoSet->Protos[ProtoSetIndex]); - Length = (Class->ProtoLengths[ProtoId] * - GetPicoFeatureLength() * INT_CHAR_NORM_RANGE); - ProtoMask = PPrunerMaskFor(ProtoId); - ProtoWordIndex = PPrunerWordIndexFor(ProtoId); - - // find the x and y extent of the proto from the proto pruning table - Xmin = Ymin = NUM_PP_BUCKETS; - Xmax = Ymax = 0; - for (Bucket = 0; Bucket < NUM_PP_BUCKETS; Bucket++) { - if (ProtoMask & ProtoSet->ProtoPruner[PRUNER_X][Bucket][ProtoWordIndex]) { - UpdateRange(Bucket, &Xmin, &Xmax); - } - - if (ProtoMask & ProtoSet->ProtoPruner[PRUNER_Y][Bucket][ProtoWordIndex]) { - UpdateRange(Bucket, &Ymin, &Ymax); - } - } - X = (Xmin + Xmax + 1) / 2.0 * PROTO_PRUNER_SCALE; - Y = (Ymin + Ymax + 1) / 2.0 * PROTO_PRUNER_SCALE; - // The -PI has no significant effect here, but the value of Theta is computed - // using BinaryAnglePlusPi in intfx.cpp. - Dx = (Length / 2.0) * cos((Proto->Angle / 256.0) * 2.0 * M_PI - M_PI); - Dy = (Length / 2.0) * sin((Proto->Angle / 256.0) * 2.0 * M_PI - M_PI); - - window->SetCursor(X - Dx, Y - Dy); - window->DrawTo(X + Dx, Y + Dy); -} /* RenderIntProto */ -#endif - -/** - * This routine truncates Param to lie within the range - * of Min-Max inclusive. If a truncation is performed, and - * Id is not null, an warning message is printed. - * - * @param Param parameter value to be truncated - * @param Min, Max parameter limits (inclusive) - * @param Id string id of parameter for error messages - * - * Globals: none - * - * @return Truncated parameter. - */ -int TruncateParam(float Param, int Min, int Max, char *Id) { - if (Param < Min) { - if (Id) - cprintf("Warning: Param %s truncated from %f to %d!\n", - Id, Param, Min); - Param = Min; - } else if (Param > Max) { - if (Id) - cprintf("Warning: Param %s truncated from %f to %d!\n", - Id, Param, Max); - Param = Max; - } - return static_cast(std::floor(Param)); -} /* TruncateParam */ - - -#ifndef GRAPHICS_DISABLED -/** - * Initializes the int matcher window if it is not already - * initialized. - */ -void InitIntMatchWindowIfReqd() { - if (IntMatchWindow == nullptr) { - IntMatchWindow = CreateFeatureSpaceWindow("IntMatchWindow", 50, 200); - SVMenuNode* popup_menu = new SVMenuNode(); - - popup_menu->AddChild("Debug Adapted classes", IDA_ADAPTIVE, - "x", "Class to debug"); - popup_menu->AddChild("Debug Static classes", IDA_STATIC, - "x", "Class to debug"); - popup_menu->AddChild("Debug Both", IDA_BOTH, - "x", "Class to debug"); - popup_menu->AddChild("Debug Shape Index", IDA_SHAPE_INDEX, - "0", "Index to debug"); - popup_menu->BuildMenu(IntMatchWindow, false); - } -} - -/** - * Initializes the proto display window if it is not already - * initialized. - */ -void InitProtoDisplayWindowIfReqd() { - if (ProtoDisplayWindow == nullptr) { - ProtoDisplayWindow = CreateFeatureSpaceWindow("ProtoDisplayWindow", - 550, 200); - } -} - -/** - * Initializes the feature display window if it is not already - * initialized. - */ -void InitFeatureDisplayWindowIfReqd() { - if (FeatureDisplayWindow == nullptr) { - FeatureDisplayWindow = CreateFeatureSpaceWindow("FeatureDisplayWindow", - 50, 700); - } -} - -/// Creates a window of the appropriate size for displaying elements -/// in feature space. -ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos) { - return new ScrollView(name, xpos, ypos, 520, 520, 260, 260, true); -} -#endif // GRAPHICS_DISABLED diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intproto.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intproto.h deleted file mode 100644 index 90e27111..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/intproto.h +++ /dev/null @@ -1,261 +0,0 @@ -/****************************************************************************** - ** Filename: intproto.h - ** Purpose: Definition of data structures for integer protos. - ** Author: Dan Johnson - ** History: Thu Feb 7 12:58:45 1991, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - *****************************************************************************/ - -#ifndef INTPROTO_H -#define INTPROTO_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "genericvector.h" -#include "matchdefs.h" -#include "mfoutline.h" -#include "protos.h" -#include "scrollview.h" -#include "unicharset.h" - -class FCOORD; - -/* define order of params in pruners */ -#define PRUNER_X 0 -#define PRUNER_Y 1 -#define PRUNER_ANGLE 2 - -/* definition of coordinate system offsets for each table parameter */ -#define ANGLE_SHIFT (0.0) -#define X_SHIFT (0.5) -#define Y_SHIFT (0.5) - -#define MAX_PROTO_INDEX 24 -#define BITS_PER_WERD static_cast(8 * sizeof(uint32_t)) -/* Script detection: increase this number to 128 */ -#define MAX_NUM_CONFIGS 64 -#define MAX_NUM_PROTOS 512 -#define PROTOS_PER_PROTO_SET 64 -#define MAX_NUM_PROTO_SETS (MAX_NUM_PROTOS / PROTOS_PER_PROTO_SET) -#define NUM_PP_PARAMS 3 -#define NUM_PP_BUCKETS 64 -#define NUM_CP_BUCKETS 24 -#define CLASSES_PER_CP 32 -#define NUM_BITS_PER_CLASS 2 -#define CLASS_PRUNER_CLASS_MASK (~(~0u << NUM_BITS_PER_CLASS)) -#define CLASSES_PER_CP_WERD (CLASSES_PER_CP / NUM_BITS_PER_CLASS) -#define PROTOS_PER_PP_WERD BITS_PER_WERD -#define BITS_PER_CP_VECTOR (CLASSES_PER_CP * NUM_BITS_PER_CLASS) -#define MAX_NUM_CLASS_PRUNERS \ - ((MAX_NUM_CLASSES + CLASSES_PER_CP - 1) / CLASSES_PER_CP) -#define WERDS_PER_CP_VECTOR (BITS_PER_CP_VECTOR / BITS_PER_WERD) -#define WERDS_PER_PP_VECTOR \ - ((PROTOS_PER_PROTO_SET + BITS_PER_WERD - 1) / BITS_PER_WERD) -#define WERDS_PER_PP (NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR) -#define WERDS_PER_CP \ - (NUM_CP_BUCKETS * NUM_CP_BUCKETS * NUM_CP_BUCKETS * WERDS_PER_CP_VECTOR) -#define WERDS_PER_CONFIG_VEC \ - ((MAX_NUM_CONFIGS + BITS_PER_WERD - 1) / BITS_PER_WERD) - -/* The first 3 dimensions of the CLASS_PRUNER_STRUCT are the - * 3 axes of the quantized feature space. - * The position of the the bits recorded for each class in the - * 4th dimension is determined by using CPrunerWordIndexFor(c), - * where c is the corresponding class id. */ -struct CLASS_PRUNER_STRUCT { - uint32_t p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS] - [WERDS_PER_CP_VECTOR]; -}; - -typedef struct { - int8_t A; - uint8_t B; - int8_t C; - uint8_t Angle; - uint32_t Configs[WERDS_PER_CONFIG_VEC]; -} - -INT_PROTO_STRUCT, - *INT_PROTO; - -typedef uint32_t PROTO_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS] - [WERDS_PER_PP_VECTOR]; - -typedef struct { - PROTO_PRUNER ProtoPruner; - INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]; -} - -PROTO_SET_STRUCT, - *PROTO_SET; - -typedef uint32_t CONFIG_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][4]; - -typedef struct { - uint16_t NumProtos; - uint8_t NumProtoSets; - uint8_t NumConfigs; - PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]; - uint8_t* ProtoLengths; - uint16_t ConfigLengths[MAX_NUM_CONFIGS]; - int font_set_id; // FontSet id, see above -} - -INT_CLASS_STRUCT, - *INT_CLASS; - -typedef struct { - int NumClasses; - int NumClassPruners; - INT_CLASS Class[MAX_NUM_CLASSES]; - CLASS_PRUNER_STRUCT* ClassPruners[MAX_NUM_CLASS_PRUNERS]; -} - -INT_TEMPLATES_STRUCT, - *INT_TEMPLATES; - -/* definitions of integer features*/ -#define MAX_NUM_INT_FEATURES 512 -#define INT_CHAR_NORM_RANGE 256 - -struct INT_FEATURE_STRUCT { - INT_FEATURE_STRUCT() : X(0), Y(0), Theta(0), CP_misses(0) {} - // Builds a feature from an FCOORD for position with all the necessary - // clipping and rounding. - INT_FEATURE_STRUCT(const FCOORD& pos, uint8_t theta); - // Builds a feature from ints with all the necessary clipping and casting. - INT_FEATURE_STRUCT(int x, int y, int theta); - - uint8_t X; - uint8_t Y; - uint8_t Theta; - int8_t CP_misses; - - void print() const { tprintf("(%d,%d):%d\n", X, Y, Theta); } -}; - -typedef INT_FEATURE_STRUCT* INT_FEATURE; - -typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]; - -enum IntmatcherDebugAction { - IDA_ADAPTIVE, - IDA_STATIC, - IDA_SHAPE_INDEX, - IDA_BOTH -}; - -/**---------------------------------------------------------------------------- - Macros -----------------------------------------------------------------------------**/ - -#define MaxNumIntProtosIn(C) (C->NumProtoSets * PROTOS_PER_PROTO_SET) -#define SetForProto(P) (P / PROTOS_PER_PROTO_SET) -#define IndexForProto(P) (P % PROTOS_PER_PROTO_SET) -#define ProtoForProtoId(C, P) \ - (&((C->ProtoSets[SetForProto(P)])->Protos[IndexForProto(P)])) -#define PPrunerWordIndexFor(I) \ - (((I) % PROTOS_PER_PROTO_SET) / PROTOS_PER_PP_WERD) -#define PPrunerBitIndexFor(I) ((I) % PROTOS_PER_PP_WERD) -#define PPrunerMaskFor(I) (1 << PPrunerBitIndexFor(I)) - -#define MaxNumClassesIn(T) (T->NumClassPruners * CLASSES_PER_CP) -#define LegalClassId(c) ((c) >= 0 && (c) <= MAX_CLASS_ID) -#define UnusedClassIdIn(T, c) ((T)->Class[c] == nullptr) -#define ClassForClassId(T, c) ((T)->Class[c]) -#define ClassPrunersFor(T) ((T)->ClassPruner) -#define CPrunerIdFor(c) ((c) / CLASSES_PER_CP) -#define CPrunerFor(T, c) ((T)->ClassPruners[CPrunerIdFor(c)]) -#define CPrunerWordIndexFor(c) (((c) % CLASSES_PER_CP) / CLASSES_PER_CP_WERD) -#define CPrunerBitIndexFor(c) (((c) % CLASSES_PER_CP) % CLASSES_PER_CP_WERD) -#define CPrunerMaskFor(L, c) \ - (((L) + 1) << CPrunerBitIndexFor(c) * NUM_BITS_PER_CLASS) - -/* DEBUG macros*/ -#define PRINT_MATCH_SUMMARY 0x001 -#define DISPLAY_FEATURE_MATCHES 0x002 -#define DISPLAY_PROTO_MATCHES 0x004 -#define PRINT_FEATURE_MATCHES 0x008 -#define PRINT_PROTO_MATCHES 0x010 -#define CLIP_MATCH_EVIDENCE 0x020 - -#define MatchDebuggingOn(D) (D) -#define PrintMatchSummaryOn(D) ((D)&PRINT_MATCH_SUMMARY) -#define DisplayFeatureMatchesOn(D) ((D)&DISPLAY_FEATURE_MATCHES) -#define DisplayProtoMatchesOn(D) ((D)&DISPLAY_PROTO_MATCHES) -#define PrintFeatureMatchesOn(D) ((D)&PRINT_FEATURE_MATCHES) -#define PrintProtoMatchesOn(D) ((D)&PRINT_PROTO_MATCHES) -#define ClipMatchEvidenceOn(D) ((D)&CLIP_MATCH_EVIDENCE) - -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ -void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class); - -int AddIntConfig(INT_CLASS Class); - -int AddIntProto(INT_CLASS Class); - -void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, - INT_TEMPLATES Templates); - -void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, - bool debug); - -uint8_t Bucket8For(float param, float offset, int num_buckets); -uint16_t Bucket16For(float param, float offset, int num_buckets); - -uint8_t CircBucketFor(float param, float offset, int num_buckets); - -void UpdateMatchDisplay(); - -void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class); - -void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, float Evidence); - -void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, float Evidence); - -INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs); - -INT_TEMPLATES NewIntTemplates(); - -void free_int_templates(INT_TEMPLATES templates); - -void ShowMatchDisplay(); - -namespace tesseract { - -// Clears the given window and draws the featurespace guides for the -// appropriate normalization method. -void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window); - -} // namespace tesseract. - -/*----------------------------------------------------------------------------*/ -#ifndef GRAPHICS_DISABLED -void RenderIntFeature(ScrollView* window, const INT_FEATURE_STRUCT* Feature, - ScrollView::Color color); - -void InitIntMatchWindowIfReqd(); - -void InitProtoDisplayWindowIfReqd(); - -void InitFeatureDisplayWindowIfReqd(); - -// Creates a window of the appropriate size for displaying elements -// in feature space. -ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos); -#endif // GRAPHICS_DISABLED - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/kdtree.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/kdtree.cpp deleted file mode 100644 index fbb3a571..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/kdtree.cpp +++ /dev/null @@ -1,540 +0,0 @@ -/****************************************************************************** - ** Filename: kdtree.cpp - ** Purpose: Routines for managing K-D search trees - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -/*----------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ -#include "kdtree.h" -#include "cutil.h" // for void_proc -#include "emalloc.h" - -#include -#include // for FLT_MAX -#include -#include - -#define Magnitude(X) ((X) < 0 ? -(X) : (X)) -#define NodeFound(N,K,D) (((N)->Key == (K)) && ((N)->Data == (D))) - -/*----------------------------------------------------------------------------- - Global Data Definitions and Declarations ------------------------------------------------------------------------------*/ -#define MINSEARCH -FLT_MAX -#define MAXSEARCH FLT_MAX - -// Helper function to find the next essential dimension in a cycle. -static int NextLevel(KDTREE *tree, int level) { - do { - ++level; - if (level >= tree->KeySize) - level = 0; - } while (tree->KeyDesc[level].NonEssential); - return level; -} - -//----------------------------------------------------------------------------- -/** Store the k smallest-keyed key-value pairs. */ -template -class MinK { - public: - MinK(Key max_key, int k); - ~MinK(); - - struct Element { - Element() {} - Element(const Key& k, const Value& v) : key(k), value(v) {} - - Key key; - Value value; - }; - - bool insert(Key k, Value v); - const Key& max_insertable_key(); - - int elements_count() { return elements_count_; } - const Element* elements() { return elements_; } - - private: - const Key max_key_; //< the maximum possible Key - Element *elements_; //< unsorted array of elements - int elements_count_; //< the number of results collected so far - int k_; //< the number of results we want from the search - int max_index_; //< the index of the result with the largest key -}; - -template -MinK::MinK(Key max_key, int k) : - max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) { - elements_ = new Element[k_]; -} - -template -MinK::~MinK() { - delete []elements_; -} - -template -const Key& MinK::max_insertable_key() { - if (elements_count_ < k_) - return max_key_; - return elements_[max_index_].key; -} - -template -bool MinK::insert(Key key, Value value) { - if (elements_count_ < k_) { - elements_[elements_count_++] = Element(key, value); - if (key > elements_[max_index_].key) - max_index_ = elements_count_ - 1; - return true; - } else if (key < elements_[max_index_].key) { - // evict the largest element. - elements_[max_index_] = Element(key, value); - // recompute max_index_ - for (int i = 0; i < elements_count_; i++) { - if (elements_[i].key > elements_[max_index_].key) - max_index_ = i; - } - return true; - } - return false; -} - - -//----------------------------------------------------------------------------- -/** Helper class for searching for the k closest points to query_point in tree. - */ -class KDTreeSearch { - public: - KDTreeSearch(KDTREE* tree, float *query_point, int k_closest); - ~KDTreeSearch(); - - /** Return the k nearest points' data. */ - void Search(int *result_count, float *distances, void **results); - - private: - void SearchRec(int Level, KDNODE *SubTree); - bool BoxIntersectsSearch(float *lower, float *upper); - - KDTREE *tree_; - float *query_point_; - float *sb_min_; //< search box minimum - float *sb_max_; //< search box maximum - MinK results_; -}; - -KDTreeSearch::KDTreeSearch(KDTREE *tree, float *query_point, int k_closest) - : tree_(tree), query_point_(query_point), results_(MAXSEARCH, k_closest) { - sb_min_ = new float[tree->KeySize]; - sb_max_ = new float[tree->KeySize]; -} - -KDTreeSearch::~KDTreeSearch() { - delete[] sb_min_; - delete[] sb_max_; -} - -/// Locate the k_closest points to query_point_, and return their distances and -/// data into the given buffers. -void KDTreeSearch::Search(int *result_count, - float *distances, - void **results) { - if (tree_->Root.Left == nullptr) { - *result_count = 0; - } else { - for (int i = 0; i < tree_->KeySize; i++) { - sb_min_[i] = tree_->KeyDesc[i].Min; - sb_max_[i] = tree_->KeyDesc[i].Max; - } - SearchRec(0, tree_->Root.Left); - int count = results_.elements_count(); - *result_count = count; - for (int j = 0; j < count; j++) { - // Pre-cast to float64 as key is a template type and we have no control - // over its actual type. - distances[j] = (float)sqrt((double)results_.elements()[j].key); - results[j] = results_.elements()[j].value; - } - } -} - -/*----------------------------------------------------------------------------- - Public Code ------------------------------------------------------------------------------*/ -/// @return a new KDTREE based on the specified parameters. -/// @param KeySize # of dimensions in the K-D tree -/// @param KeyDesc array of params to describe key dimensions -KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) { - KDTREE *KDTree = (KDTREE *) Emalloc( - sizeof(KDTREE) + (KeySize - 1) * sizeof(PARAM_DESC)); - for (int i = 0; i < KeySize; i++) { - KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential; - KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular; - if (KeyDesc[i].Circular) { - KDTree->KeyDesc[i].Min = KeyDesc[i].Min; - KDTree->KeyDesc[i].Max = KeyDesc[i].Max; - KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min; - KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2; - KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2; - } else { - KDTree->KeyDesc[i].Min = MINSEARCH; - KDTree->KeyDesc[i].Max = MAXSEARCH; - } - } - KDTree->KeySize = KeySize; - KDTree->Root.Left = nullptr; - KDTree->Root.Right = nullptr; - return KDTree; -} - - -/** - * This routine stores Data in the K-D tree specified by Tree - * using Key as an access key. - * - * @param Tree K-D tree in which data is to be stored - * @param Key ptr to key by which data can be retrieved - * @param Data ptr to data to be stored in the tree - */ -void KDStore(KDTREE *Tree, float *Key, void *Data) { - int Level; - KDNODE *Node; - KDNODE **PtrToNode; - - PtrToNode = &(Tree->Root.Left); - Node = *PtrToNode; - Level = NextLevel(Tree, -1); - while (Node != nullptr) { - if (Key[Level] < Node->BranchPoint) { - PtrToNode = &(Node->Left); - if (Key[Level] > Node->LeftBranch) - Node->LeftBranch = Key[Level]; - } - else { - PtrToNode = &(Node->Right); - if (Key[Level] < Node->RightBranch) - Node->RightBranch = Key[Level]; - } - Level = NextLevel(Tree, Level); - Node = *PtrToNode; - } - - *PtrToNode = MakeKDNode(Tree, Key, (void *) Data, Level); -} /* KDStore */ - -/** - * This routine deletes a node from Tree. The node to be - * deleted is specified by the Key for the node and the Data - * contents of the node. These two pointers must be identical - * to the pointers that were used for the node when it was - * originally stored in the tree. A node will be deleted from - * the tree only if its key and data pointers are identical - * to Key and Data respectively. The tree is re-formed by removing - * the affected subtree and inserting all elements but the root. - * - * @param Tree K-D tree to delete node from - * @param Key key of node to be deleted - * @param Data data contents of node to be deleted - */ -void -KDDelete (KDTREE * Tree, float Key[], void *Data) { - int Level; - KDNODE *Current; - KDNODE *Father; - - /* initialize search at root of tree */ - Father = &(Tree->Root); - Current = Father->Left; - Level = NextLevel(Tree, -1); - - /* search tree for node to be deleted */ - while ((Current != nullptr) && (!NodeFound (Current, Key, Data))) { - Father = Current; - if (Key[Level] < Current->BranchPoint) - Current = Current->Left; - else - Current = Current->Right; - - Level = NextLevel(Tree, Level); - } - - if (Current != nullptr) { /* if node to be deleted was found */ - if (Current == Father->Left) { - Father->Left = nullptr; - Father->LeftBranch = Tree->KeyDesc[Level].Min; - } else { - Father->Right = nullptr; - Father->RightBranch = Tree->KeyDesc[Level].Max; - } - - InsertNodes(Tree, Current->Left); - InsertNodes(Tree, Current->Right); - FreeSubTree(Current); - } -} /* KDDelete */ - -/** - * This routine searches the K-D tree specified by Tree and - * finds the QuerySize nearest neighbors of Query. All neighbors - * must be within MaxDistance of Query. The data contents of - * the nearest neighbors - * are placed in NBuffer and their distances from Query are - * placed in DBuffer. - * @param Tree ptr to K-D tree to be searched - * @param Query ptr to query key (point in D-space) - * @param QuerySize number of nearest neighbors to be found - * @param MaxDistance all neighbors must be within this distance - * @param NBuffer ptr to QuerySize buffer to hold nearest neighbors - * @param DBuffer ptr to QuerySize buffer to hold distances - * from nearest neighbor to query point - * @param NumberOfResults [out] Number of nearest neighbors actually found - */ -void KDNearestNeighborSearch( - KDTREE *Tree, float Query[], int QuerySize, float MaxDistance, - int *NumberOfResults, void **NBuffer, float DBuffer[]) { - KDTreeSearch search(Tree, Query, QuerySize); - search.Search(NumberOfResults, DBuffer, NBuffer); -} - - -/*---------------------------------------------------------------------------*/ -/** Walk a given Tree with action. */ -void KDWalk(KDTREE *Tree, void_proc action, void *context) { - if (Tree->Root.Left != nullptr) - Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1)); -} - - -/*---------------------------------------------------------------------------*/ -/** - * This routine frees all memory which is allocated to the - * specified KD-tree. This includes the data structure for - * the kd-tree itself plus the data structures for each node - * in the tree. It does not include the Key and Data items - * which are pointed to by the nodes. This memory is left - * untouched. - * @param Tree tree data structure to be released - * @return none - */ -void FreeKDTree(KDTREE *Tree) { - FreeSubTree(Tree->Root.Left); - free(Tree); -} /* FreeKDTree */ - - -/*----------------------------------------------------------------------------- - Private Code ------------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -/** - * This routine allocates memory for a new K-D tree node - * and places the specified Key and Data into it. The - * left and right subtree pointers for the node are - * initialized to empty subtrees. - * @param tree The tree to create the node for - * @param Key Access key for new node in KD tree - * @param Data ptr to data to be stored in new node - * @param Index index of Key to branch on - * @return pointer to new K-D tree node - */ -KDNODE *MakeKDNode(KDTREE *tree, float Key[], void *Data, int Index) { - KDNODE *NewNode; - - NewNode = (KDNODE *) Emalloc (sizeof (KDNODE)); - - NewNode->Key = Key; - NewNode->Data = Data; - NewNode->BranchPoint = Key[Index]; - NewNode->LeftBranch = tree->KeyDesc[Index].Min; - NewNode->RightBranch = tree->KeyDesc[Index].Max; - NewNode->Left = nullptr; - NewNode->Right = nullptr; - - return NewNode; -} /* MakeKDNode */ - - -/*---------------------------------------------------------------------------*/ -void FreeKDNode(KDNODE *Node) { free(Node); } - -/*---------------------------------------------------------------------------*/ -/** - * Recursively accumulate the k_closest points to query_point_ into results_. - * @param Level level in tree of sub-tree to be searched - * @param SubTree sub-tree to be searched - */ -void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) { - if (level >= tree_->KeySize) - level = 0; - - if (!BoxIntersectsSearch(sb_min_, sb_max_)) - return; - - results_.insert(DistanceSquared(tree_->KeySize, tree_->KeyDesc, query_point_, - sub_tree->Key), - sub_tree->Data); - - if (query_point_[level] < sub_tree->BranchPoint) { - if (sub_tree->Left != nullptr) { - float tmp = sb_max_[level]; - sb_max_[level] = sub_tree->LeftBranch; - SearchRec(NextLevel(tree_, level), sub_tree->Left); - sb_max_[level] = tmp; - } - if (sub_tree->Right != nullptr) { - float tmp = sb_min_[level]; - sb_min_[level] = sub_tree->RightBranch; - SearchRec(NextLevel(tree_, level), sub_tree->Right); - sb_min_[level] = tmp; - } - } else { - if (sub_tree->Right != nullptr) { - float tmp = sb_min_[level]; - sb_min_[level] = sub_tree->RightBranch; - SearchRec(NextLevel(tree_, level), sub_tree->Right); - sb_min_[level] = tmp; - } - if (sub_tree->Left != nullptr) { - float tmp = sb_max_[level]; - sb_max_[level] = sub_tree->LeftBranch; - SearchRec(NextLevel(tree_, level), sub_tree->Left); - sb_max_[level] = tmp; - } - } -} - - -/*---------------------------------------------------------------------------*/ -/** - *Returns the Euclidean distance squared between p1 and p2 for all essential - * dimensions. - * @param k keys are in k-space - * @param dim dimension descriptions (essential, circular, etc) - * @param p1,p2 two different points in K-D space - */ -float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]) { - float total_distance = 0; - - for (; k > 0; k--, p1++, p2++, dim++) { - if (dim->NonEssential) - continue; - - float dimension_distance = *p1 - *p2; - - /* if this dimension is circular - check wraparound distance */ - if (dim->Circular) { - dimension_distance = Magnitude(dimension_distance); - float wrap_distance = dim->Max - dim->Min - dimension_distance; - dimension_distance = std::min(dimension_distance, wrap_distance); - } - - total_distance += dimension_distance * dimension_distance; - } - return total_distance; -} - -float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]) { - return sqrt(DistanceSquared(k, dim, p1, p2)); -} - -/*---------------------------------------------------------------------------*/ -/// Return whether the query region (the smallest known circle about -/// query_point_ containing results->k_ points) intersects the box specified -/// between lower and upper. For circular dimensions, we also check the point -/// one wrap distance away from the query. -bool KDTreeSearch::BoxIntersectsSearch(float *lower, float *upper) { - float *query = query_point_; - // Compute the sum in higher precision. - double total_distance = 0.0; - double radius_squared = static_cast(results_.max_insertable_key()) * - results_.max_insertable_key(); - PARAM_DESC *dim = tree_->KeyDesc; - - for (int i = tree_->KeySize; i > 0; i--, dim++, query++, lower++, upper++) { - if (dim->NonEssential) - continue; - - float dimension_distance; - if (*query < *lower) - dimension_distance = *lower - *query; - else if (*query > *upper) - dimension_distance = *query - *upper; - else - dimension_distance = 0; - - /* if this dimension is circular - check wraparound distance */ - if (dim->Circular) { - float wrap_distance = FLT_MAX; - if (*query < *lower) - wrap_distance = *query + dim->Max - dim->Min - *upper; - else if (*query > *upper) - wrap_distance = *lower - (*query - (dim->Max - dim->Min)); - dimension_distance = std::min(dimension_distance, wrap_distance); - } - - total_distance += - static_cast(dimension_distance) * dimension_distance; - if (total_distance >= radius_squared) - return false; - } - return true; -} - - -/*---------------------------------------------------------------------------*/ -/** - * Walk a tree, calling action once on each node. - * - * Operation: - * This routine walks through the specified sub_tree and invokes action - * action at each node as follows: - * action(context, data, level) - * data the data contents of the node being visited, - * level is the level of the node in the tree with the root being level 0. - * @param tree root of the tree being walked. - * @param action action to be performed at every node - * @param context action's context - * @param sub_tree ptr to root of subtree to be walked - * @param level current level in the tree for this node - */ -void Walk(KDTREE *tree, void_proc action, void *context, - KDNODE *sub_tree, int32_t level) { - (*action)(context, sub_tree->Data, level); - if (sub_tree->Left != nullptr) - Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level)); - if (sub_tree->Right != nullptr) - Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level)); -} - -/** Given a subtree nodes, insert all of its elements into tree. */ -void InsertNodes(KDTREE *tree, KDNODE *nodes) { - if (nodes == nullptr) - return; - - KDStore(tree, nodes->Key, nodes->Data); - InsertNodes(tree, nodes->Left); - InsertNodes(tree, nodes->Right); -} - -/** Free all of the nodes of a sub tree. */ -void FreeSubTree(KDNODE *sub_tree) { - if (sub_tree != nullptr) { - FreeSubTree(sub_tree->Left); - FreeSubTree(sub_tree->Right); - free(sub_tree); - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/kdtree.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/kdtree.h deleted file mode 100644 index d3bee713..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/kdtree.h +++ /dev/null @@ -1,94 +0,0 @@ -/****************************************************************************** - ** Filename: kdtree.h - ** Purpose: Definition of K-D tree access routines. - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - *****************************************************************************/ -#ifndef KDTREE_H -#define KDTREE_H - -/*----------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ -#include "cutil.h" // for void_proc -#include "host.h" -#include "ocrfeatures.h" - -/** -NOTE: All circular parameters of all keys must be in the range - -Min <= Param < Max - -where Min and Max are specified in the KeyDesc parameter passed to -MakeKDTree. All KD routines assume that this is true and will not operate -correctly if circular parameters outside the specified range are used. -*/ - -struct KDNODE { - float *Key; /**< search key */ - void *Data; /**< data that corresponds to key */ - float BranchPoint; /**< needed to make deletes work efficiently */ - float LeftBranch; /**< used to optimize search pruning */ - float RightBranch; /**< used to optimize search pruning */ - struct KDNODE *Left; /**< ptrs for KD tree structure */ - struct KDNODE *Right; -}; - -struct KDTREE { - int16_t KeySize; /* number of dimensions in the tree */ - KDNODE Root; /* Root.Left points to actual root node */ - PARAM_DESC KeyDesc[1]; /* description of each dimension */ -}; - -/*---------------------------------------------------------------------------- - Macros ------------------------------------------------------------------------------*/ -#define RootOf(T) ((T)->Root.Left->Data) - -/*----------------------------------------------------------------------------- - Public Function Prototypes ------------------------------------------------------------------------------*/ -KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]); - -void KDStore(KDTREE *Tree, float *Key, void *Data); - -void KDDelete(KDTREE * Tree, float Key[], void *Data); - -void KDNearestNeighborSearch( - KDTREE *Tree, float Query[], int QuerySize, float MaxDistance, - int *NumberOfResults, void **NBuffer, float DBuffer[]); - -void KDWalk(KDTREE *Tree, void_proc Action, void *context); - -void FreeKDTree(KDTREE *Tree); - -/*----------------------------------------------------------------------------- - Private Function Prototypes ------------------------------------------------------------------------------*/ -KDNODE *MakeKDNode(KDTREE *tree, float Key[], void *Data, int Index); - -void FreeKDNode(KDNODE *Node); - -float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]); - -float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]); - -int QueryInSearch(KDTREE *tree); - -void Walk(KDTREE *tree, void_proc action, void *context, - KDNODE *SubTree, int32_t Level); - -void InsertNodes(KDTREE *tree, KDNODE *nodes); - -void FreeSubTree(KDNODE *SubTree); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mastertrainer.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mastertrainer.cpp deleted file mode 100644 index 658333d0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mastertrainer.cpp +++ /dev/null @@ -1,985 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: mastertrainer.cpp -// Description: Trainer to build the MasterClassifier. -// Author: Ray Smith -// Created: Wed Nov 03 18:10:01 PDT 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "mastertrainer.h" -#include -#include -#include "allheaders.h" -#include "boxread.h" -#include "classify.h" -#include "errorcounter.h" -#include "featdefs.h" -#include "sampleiterator.h" -#include "shapeclassifier.h" -#include "shapetable.h" -#include "svmnode.h" - -#include "scanutils.h" - -namespace tesseract { - -// Constants controlling clustering. With a low kMinClusteredShapes and a high -// kMaxUnicharsPerCluster, then kFontMergeDistance is the only limiting factor. -// Min number of shapes in the output. -const int kMinClusteredShapes = 1; -// Max number of unichars in any individual cluster. -const int kMaxUnicharsPerCluster = 2000; -// Mean font distance below which to merge fonts and unichars. -const float kFontMergeDistance = 0.025; - -MasterTrainer::MasterTrainer(NormalizationMode norm_mode, - bool shape_analysis, - bool replicate_samples, - int debug_level) - : norm_mode_(norm_mode), samples_(fontinfo_table_), - junk_samples_(fontinfo_table_), verify_samples_(fontinfo_table_), - charsetsize_(0), - enable_shape_analysis_(shape_analysis), - enable_replication_(replicate_samples), - fragments_(nullptr), prev_unichar_id_(-1), debug_level_(debug_level) { -} - -MasterTrainer::~MasterTrainer() { - delete [] fragments_; - for (int p = 0; p < page_images_.size(); ++p) - pixDestroy(&page_images_[p]); -} - -// WARNING! Serialize/DeSerialize are only partial, providing -// enough data to get the samples back and display them. -// Writes to the given file. Returns false in case of error. -bool MasterTrainer::Serialize(FILE* fp) const { - uint32_t value = norm_mode_; - if (!tesseract::Serialize(fp, &value)) return false; - if (!unicharset_.save_to_file(fp)) return false; - if (!feature_space_.Serialize(fp)) return false; - if (!samples_.Serialize(fp)) return false; - if (!junk_samples_.Serialize(fp)) return false; - if (!verify_samples_.Serialize(fp)) return false; - if (!master_shapes_.Serialize(fp)) return false; - if (!flat_shapes_.Serialize(fp)) return false; - if (!fontinfo_table_.Serialize(fp)) return false; - if (!xheights_.Serialize(fp)) return false; - return true; -} - -// Load an initial unicharset, or set one up if the file cannot be read. -void MasterTrainer::LoadUnicharset(const char* filename) { - if (!unicharset_.load_from_file(filename)) { - tprintf("Failed to load unicharset from file %s\n" - "Building unicharset for training from scratch...\n", - filename); - unicharset_.clear(); - UNICHARSET initialized; - // Add special characters, as they were removed by the clear, but the - // default constructor puts them in. - unicharset_.AppendOtherUnicharset(initialized); - } - charsetsize_ = unicharset_.size(); - delete [] fragments_; - fragments_ = new int[charsetsize_]; - memset(fragments_, 0, sizeof(*fragments_) * charsetsize_); - samples_.LoadUnicharset(filename); - junk_samples_.LoadUnicharset(filename); - verify_samples_.LoadUnicharset(filename); -} - -// Reads the samples and their features from the given .tr format file, -// adding them to the trainer with the font_id from the content of the file. -// See mftraining.cpp for a description of the file format. -// If verification, then these are verification samples, not training. -void MasterTrainer::ReadTrainingSamples(const char* page_name, - const FEATURE_DEFS_STRUCT& feature_defs, - bool verification) { - char buffer[2048]; - const int int_feature_type = ShortNameToFeatureType(feature_defs, kIntFeatureType); - const int micro_feature_type = ShortNameToFeatureType(feature_defs, - kMicroFeatureType); - const int cn_feature_type = ShortNameToFeatureType(feature_defs, kCNFeatureType); - const int geo_feature_type = ShortNameToFeatureType(feature_defs, kGeoFeatureType); - - FILE* fp = fopen(page_name, "rb"); - if (fp == nullptr) { - tprintf("Failed to open tr file: %s\n", page_name); - return; - } - tr_filenames_.push_back(STRING(page_name)); - while (fgets(buffer, sizeof(buffer), fp) != nullptr) { - if (buffer[0] == '\n') - continue; - - char* space = strchr(buffer, ' '); - if (space == nullptr) { - tprintf("Bad format in tr file, reading fontname, unichar\n"); - continue; - } - *space++ = '\0'; - int font_id = GetFontInfoId(buffer); - if (font_id < 0) font_id = 0; - int page_number; - STRING unichar; - TBOX bounding_box; - if (!ParseBoxFileStr(space, &page_number, &unichar, &bounding_box)) { - tprintf("Bad format in tr file, reading box coords\n"); - continue; - } - CHAR_DESC char_desc = ReadCharDescription(feature_defs, fp); - TrainingSample* sample = new TrainingSample; - sample->set_font_id(font_id); - sample->set_page_num(page_number + page_images_.size()); - sample->set_bounding_box(bounding_box); - sample->ExtractCharDesc(int_feature_type, micro_feature_type, - cn_feature_type, geo_feature_type, char_desc); - AddSample(verification, unichar.string(), sample); - FreeCharDescription(char_desc); - } - charsetsize_ = unicharset_.size(); - fclose(fp); -} - -// Adds the given single sample to the trainer, setting the classid -// appropriately from the given unichar_str. -void MasterTrainer::AddSample(bool verification, const char* unichar, - TrainingSample* sample) { - if (verification) { - verify_samples_.AddSample(unichar, sample); - prev_unichar_id_ = -1; - } else if (unicharset_.contains_unichar(unichar)) { - if (prev_unichar_id_ >= 0) - fragments_[prev_unichar_id_] = -1; - prev_unichar_id_ = samples_.AddSample(unichar, sample); - if (flat_shapes_.FindShape(prev_unichar_id_, sample->font_id()) < 0) - flat_shapes_.AddShape(prev_unichar_id_, sample->font_id()); - } else { - const int junk_id = junk_samples_.AddSample(unichar, sample); - if (prev_unichar_id_ >= 0) { - CHAR_FRAGMENT* frag = CHAR_FRAGMENT::parse_from_string(unichar); - if (frag != nullptr && frag->is_natural()) { - if (fragments_[prev_unichar_id_] == 0) - fragments_[prev_unichar_id_] = junk_id; - else if (fragments_[prev_unichar_id_] != junk_id) - fragments_[prev_unichar_id_] = -1; - } - delete frag; - } - prev_unichar_id_ = -1; - } -} - -// Loads all pages from the given tif filename and append to page_images_. -// Must be called after ReadTrainingSamples, as the current number of images -// is used as an offset for page numbers in the samples. -void MasterTrainer::LoadPageImages(const char* filename) { - size_t offset = 0; - int page; - Pix* pix; - for (page = 0; ; page++) { - pix = 0; - //pix = pixReadFromMultipageTiff(filename, &offset); - if (!pix) break; - page_images_.push_back(pix); - if (!offset) break; - } - tprintf("Loaded %d page images from %s\n", page, filename); -} - -// Cleans up the samples after initial load from the tr files, and prior to -// saving the MasterTrainer: -// Remaps fragmented chars if running shape analysis. -// Sets up the samples appropriately for class/fontwise access. -// Deletes outlier samples. -void MasterTrainer::PostLoadCleanup() { - if (debug_level_ > 0) - tprintf("PostLoadCleanup...\n"); - if (enable_shape_analysis_) - ReplaceFragmentedSamples(); - SampleIterator sample_it; - sample_it.Init(nullptr, nullptr, true, &verify_samples_); - sample_it.NormalizeSamples(); - verify_samples_.OrganizeByFontAndClass(); - - samples_.IndexFeatures(feature_space_); - // TODO(rays) DeleteOutliers is currently turned off to prove NOP-ness - // against current training. - // samples_.DeleteOutliers(feature_space_, debug_level_ > 0); - samples_.OrganizeByFontAndClass(); - if (debug_level_ > 0) - tprintf("ComputeCanonicalSamples...\n"); - samples_.ComputeCanonicalSamples(feature_map_, debug_level_ > 0); -} - -// Gets the samples ready for training. Use after both -// ReadTrainingSamples+PostLoadCleanup or DeSerialize. -// Re-indexes the features and computes canonical and cloud features. -void MasterTrainer::PreTrainingSetup() { - if (debug_level_ > 0) - tprintf("PreTrainingSetup...\n"); - samples_.IndexFeatures(feature_space_); - samples_.ComputeCanonicalFeatures(); - if (debug_level_ > 0) - tprintf("ComputeCloudFeatures...\n"); - samples_.ComputeCloudFeatures(feature_space_.Size()); -} - -// Sets up the master_shapes_ table, which tells which fonts should stay -// together until they get to a leaf node classifier. -void MasterTrainer::SetupMasterShapes() { - tprintf("Building master shape table\n"); - const int num_fonts = samples_.NumFonts(); - - ShapeTable char_shapes_begin_fragment(samples_.unicharset()); - ShapeTable char_shapes_end_fragment(samples_.unicharset()); - ShapeTable char_shapes(samples_.unicharset()); - for (int c = 0; c < samples_.charsetsize(); ++c) { - ShapeTable shapes(samples_.unicharset()); - for (int f = 0; f < num_fonts; ++f) { - if (samples_.NumClassSamples(f, c, true) > 0) - shapes.AddShape(c, f); - } - ClusterShapes(kMinClusteredShapes, 1, kFontMergeDistance, &shapes); - - const CHAR_FRAGMENT *fragment = samples_.unicharset().get_fragment(c); - - if (fragment == nullptr) - char_shapes.AppendMasterShapes(shapes, nullptr); - else if (fragment->is_beginning()) - char_shapes_begin_fragment.AppendMasterShapes(shapes, nullptr); - else if (fragment->is_ending()) - char_shapes_end_fragment.AppendMasterShapes(shapes, nullptr); - else - char_shapes.AppendMasterShapes(shapes, nullptr); - } - ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, - kFontMergeDistance, &char_shapes_begin_fragment); - char_shapes.AppendMasterShapes(char_shapes_begin_fragment, nullptr); - ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, - kFontMergeDistance, &char_shapes_end_fragment); - char_shapes.AppendMasterShapes(char_shapes_end_fragment, nullptr); - ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, - kFontMergeDistance, &char_shapes); - master_shapes_.AppendMasterShapes(char_shapes, nullptr); - tprintf("Master shape_table:%s\n", master_shapes_.SummaryStr().string()); -} - -// Adds the junk_samples_ to the main samples_ set. Junk samples are initially -// fragments and n-grams (all incorrectly segmented characters). -// Various training functions may result in incorrectly segmented characters -// being added to the unicharset of the main samples, perhaps because they -// form a "radical" decomposition of some (Indic) grapheme, or because they -// just look the same as a real character (like rn/m) -// This function moves all the junk samples, to the main samples_ set, but -// desirable junk, being any sample for which the unichar already exists in -// the samples_ unicharset gets the unichar-ids re-indexed to match, but -// anything else gets re-marked as unichar_id 0 (space character) to identify -// it as junk to the error counter. -void MasterTrainer::IncludeJunk() { - // Get ids of fragments in junk_samples_ that replace the dead chars. - const UNICHARSET& junk_set = junk_samples_.unicharset(); - const UNICHARSET& sample_set = samples_.unicharset(); - int num_junks = junk_samples_.num_samples(); - tprintf("Moving %d junk samples to master sample set.\n", num_junks); - for (int s = 0; s < num_junks; ++s) { - TrainingSample* sample = junk_samples_.mutable_sample(s); - int junk_id = sample->class_id(); - const char* junk_utf8 = junk_set.id_to_unichar(junk_id); - int sample_id = sample_set.unichar_to_id(junk_utf8); - if (sample_id == INVALID_UNICHAR_ID) - sample_id = 0; - sample->set_class_id(sample_id); - junk_samples_.extract_sample(s); - samples_.AddSample(sample_id, sample); - } - junk_samples_.DeleteDeadSamples(); - samples_.OrganizeByFontAndClass(); -} - -// Replicates the samples and perturbs them if the enable_replication_ flag -// is set. MUST be used after the last call to OrganizeByFontAndClass on -// the training samples, ie after IncludeJunk if it is going to be used, as -// OrganizeByFontAndClass will eat the replicated samples into the regular -// samples. -void MasterTrainer::ReplicateAndRandomizeSamplesIfRequired() { - if (enable_replication_) { - if (debug_level_ > 0) - tprintf("ReplicateAndRandomize...\n"); - verify_samples_.ReplicateAndRandomizeSamples(); - samples_.ReplicateAndRandomizeSamples(); - samples_.IndexFeatures(feature_space_); - } -} - -// Loads the basic font properties file into fontinfo_table_. -// Returns false on failure. -bool MasterTrainer::LoadFontInfo(const char* filename) { - FILE* fp = fopen(filename, "rb"); - if (fp == nullptr) { - fprintf(stderr, "Failed to load font_properties from %s\n", filename); - return false; - } - int italic, bold, fixed, serif, fraktur; - while (!feof(fp)) { - FontInfo fontinfo; - char* font_name = new char[1024]; - fontinfo.name = font_name; - fontinfo.properties = 0; - fontinfo.universal_id = 0; - if (tfscanf(fp, "%1024s %i %i %i %i %i\n", font_name, &italic, &bold, - &fixed, &serif, &fraktur) != 6) { - delete[] font_name; - continue; - } - fontinfo.properties = - (italic << 0) + - (bold << 1) + - (fixed << 2) + - (serif << 3) + - (fraktur << 4); - if (!fontinfo_table_.contains(fontinfo)) { - fontinfo_table_.push_back(fontinfo); - } else { - delete[] font_name; - } - } - fclose(fp); - return true; -} - -// Loads the xheight font properties file into xheights_. -// Returns false on failure. -bool MasterTrainer::LoadXHeights(const char* filename) { - tprintf("fontinfo table is of size %d\n", fontinfo_table_.size()); - xheights_.init_to_size(fontinfo_table_.size(), -1); - if (filename == nullptr) return true; - FILE *f = fopen(filename, "rb"); - if (f == nullptr) { - fprintf(stderr, "Failed to load font xheights from %s\n", filename); - return false; - } - tprintf("Reading x-heights from %s ...\n", filename); - FontInfo fontinfo; - fontinfo.properties = 0; // Not used to lookup in the table. - fontinfo.universal_id = 0; - char buffer[1024]; - int xht; - int total_xheight = 0; - int xheight_count = 0; - while (!feof(f)) { - if (tfscanf(f, "%1023s %d\n", buffer, &xht) != 2) - continue; - buffer[1023] = '\0'; - fontinfo.name = buffer; - if (!fontinfo_table_.contains(fontinfo)) continue; - int fontinfo_id = fontinfo_table_.get_index(fontinfo); - xheights_[fontinfo_id] = xht; - total_xheight += xht; - ++xheight_count; - } - if (xheight_count == 0) { - fprintf(stderr, "No valid xheights in %s!\n", filename); - fclose(f); - return false; - } - int mean_xheight = DivRounded(total_xheight, xheight_count); - for (int i = 0; i < fontinfo_table_.size(); ++i) { - if (xheights_[i] < 0) - xheights_[i] = mean_xheight; - } - fclose(f); - return true; -} // LoadXHeights - -// Reads spacing stats from filename and adds them to fontinfo_table. -bool MasterTrainer::AddSpacingInfo(const char *filename) { - FILE* fontinfo_file = fopen(filename, "rb"); - if (fontinfo_file == nullptr) - return true; // We silently ignore missing files! - // Find the fontinfo_id. - int fontinfo_id = GetBestMatchingFontInfoId(filename); - if (fontinfo_id < 0) { - tprintf("No font found matching fontinfo filename %s\n", filename); - fclose(fontinfo_file); - return false; - } - tprintf("Reading spacing from %s for font %d...\n", filename, fontinfo_id); - // TODO(rays) scale should probably be a double, but keep as an int for now - // to duplicate current behavior. - int scale = kBlnXHeight / xheights_[fontinfo_id]; - int num_unichars; - char uch[UNICHAR_LEN]; - char kerned_uch[UNICHAR_LEN]; - int x_gap, x_gap_before, x_gap_after, num_kerned; - ASSERT_HOST(tfscanf(fontinfo_file, "%d\n", &num_unichars) == 1); - FontInfo *fi = &fontinfo_table_.get(fontinfo_id); - fi->init_spacing(unicharset_.size()); - FontSpacingInfo *spacing = nullptr; - for (int l = 0; l < num_unichars; ++l) { - if (tfscanf(fontinfo_file, "%s %d %d %d", - uch, &x_gap_before, &x_gap_after, &num_kerned) != 4) { - tprintf("Bad format of font spacing file %s\n", filename); - fclose(fontinfo_file); - return false; - } - bool valid = unicharset_.contains_unichar(uch); - if (valid) { - spacing = new FontSpacingInfo(); - spacing->x_gap_before = static_cast(x_gap_before * scale); - spacing->x_gap_after = static_cast(x_gap_after * scale); - } - for (int k = 0; k < num_kerned; ++k) { - if (tfscanf(fontinfo_file, "%s %d", kerned_uch, &x_gap) != 2) { - tprintf("Bad format of font spacing file %s\n", filename); - fclose(fontinfo_file); - delete spacing; - return false; - } - if (!valid || !unicharset_.contains_unichar(kerned_uch)) continue; - spacing->kerned_unichar_ids.push_back( - unicharset_.unichar_to_id(kerned_uch)); - spacing->kerned_x_gaps.push_back(static_cast(x_gap * scale)); - } - if (valid) fi->add_spacing(unicharset_.unichar_to_id(uch), spacing); - } - fclose(fontinfo_file); - return true; -} - -// Returns the font id corresponding to the given font name. -// Returns -1 if the font cannot be found. -int MasterTrainer::GetFontInfoId(const char* font_name) { - FontInfo fontinfo; - // We are only borrowing the string, so it is OK to const cast it. - fontinfo.name = const_cast(font_name); - fontinfo.properties = 0; // Not used to lookup in the table - fontinfo.universal_id = 0; - return fontinfo_table_.get_index(fontinfo); -} -// Returns the font_id of the closest matching font name to the given -// filename. It is assumed that a substring of the filename will match -// one of the fonts. If more than one is matched, the longest is returned. -int MasterTrainer::GetBestMatchingFontInfoId(const char* filename) { - int fontinfo_id = -1; - int best_len = 0; - for (int f = 0; f < fontinfo_table_.size(); ++f) { - if (strstr(filename, fontinfo_table_.get(f).name) != nullptr) { - int len = strlen(fontinfo_table_.get(f).name); - // Use the longest matching length in case a substring of a font matched. - if (len > best_len) { - best_len = len; - fontinfo_id = f; - } - } - } - return fontinfo_id; -} - -// Sets up a flat shapetable with one shape per class/font combination. -void MasterTrainer::SetupFlatShapeTable(ShapeTable* shape_table) { - // To exactly mimic the results of the previous implementation, the shapes - // must be clustered in order the fonts arrived, and reverse order of the - // characters within each font. - // Get a list of the fonts in the order they appeared. - GenericVector active_fonts; - int num_shapes = flat_shapes_.NumShapes(); - for (int s = 0; s < num_shapes; ++s) { - int font = flat_shapes_.GetShape(s)[0].font_ids[0]; - int f = 0; - for (f = 0; f < active_fonts.size(); ++f) { - if (active_fonts[f] == font) - break; - } - if (f == active_fonts.size()) - active_fonts.push_back(font); - } - // For each font in order, add all the shapes with that font in reverse order. - int num_fonts = active_fonts.size(); - for (int f = 0; f < num_fonts; ++f) { - for (int s = num_shapes - 1; s >= 0; --s) { - int font = flat_shapes_.GetShape(s)[0].font_ids[0]; - if (font == active_fonts[f]) { - shape_table->AddShape(flat_shapes_.GetShape(s)); - } - } - } -} - -// Sets up a Clusterer for mftraining on a single shape_id. -// Call FreeClusterer on the return value after use. -CLUSTERER* MasterTrainer::SetupForClustering( - const ShapeTable& shape_table, - const FEATURE_DEFS_STRUCT& feature_defs, - int shape_id, - int* num_samples) { - - int desc_index = ShortNameToFeatureType(feature_defs, kMicroFeatureType); - int num_params = feature_defs.FeatureDesc[desc_index]->NumParams; - ASSERT_HOST(num_params == MFCount); - CLUSTERER* clusterer = MakeClusterer( - num_params, feature_defs.FeatureDesc[desc_index]->ParamDesc); - - // We want to iterate over the samples of just the one shape. - IndexMapBiDi shape_map; - shape_map.Init(shape_table.NumShapes(), false); - shape_map.SetMap(shape_id, true); - shape_map.Setup(); - // Reverse the order of the samples to match the previous behavior. - GenericVector sample_ptrs; - SampleIterator it; - it.Init(&shape_map, &shape_table, false, &samples_); - for (it.Begin(); !it.AtEnd(); it.Next()) { - sample_ptrs.push_back(&it.GetSample()); - } - int sample_id = 0; - for (int i = sample_ptrs.size() - 1; i >= 0; --i) { - const TrainingSample* sample = sample_ptrs[i]; - uint32_t num_features = sample->num_micro_features(); - for (uint32_t f = 0; f < num_features; ++f) - MakeSample(clusterer, sample->micro_features()[f], sample_id); - ++sample_id; - } - *num_samples = sample_id; - return clusterer; -} - -// Writes the given float_classes (produced by SetupForFloat2Int) as inttemp -// to the given inttemp_file, and the corresponding pffmtable. -// The unicharset is the original encoding of graphemes, and shape_set should -// match the size of the shape_table, and may possibly be totally fake. -void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET& unicharset, - const UNICHARSET& shape_set, - const ShapeTable& shape_table, - CLASS_STRUCT* float_classes, - const char* inttemp_file, - const char* pffmtable_file) { - tesseract::Classify *classify = new tesseract::Classify(); - // Move the fontinfo table to classify. - fontinfo_table_.MoveTo(&classify->get_fontinfo_table()); - INT_TEMPLATES int_templates = classify->CreateIntTemplates(float_classes, - shape_set); - FILE* fp = fopen(inttemp_file, "wb"); - if (fp == nullptr) { - tprintf("Error, failed to open file \"%s\"\n", inttemp_file); - } else { - classify->WriteIntTemplates(fp, int_templates, shape_set); - fclose(fp); - } - // Now write pffmtable. This is complicated by the fact that the adaptive - // classifier still wants one indexed by unichar-id, but the static - // classifier needs one indexed by its shape class id. - // We put the shapetable_cutoffs in a GenericVector, and compute the - // unicharset cutoffs along the way. - GenericVector shapetable_cutoffs; - GenericVector unichar_cutoffs; - for (int c = 0; c < unicharset.size(); ++c) - unichar_cutoffs.push_back(0); - /* then write out each class */ - for (int i = 0; i < int_templates->NumClasses; ++i) { - INT_CLASS Class = ClassForClassId(int_templates, i); - // Todo: Test with min instead of max - // int MaxLength = LengthForConfigId(Class, 0); - uint16_t max_length = 0; - for (int config_id = 0; config_id < Class->NumConfigs; config_id++) { - // Todo: Test with min instead of max - // if (LengthForConfigId (Class, config_id) < MaxLength) - uint16_t length = Class->ConfigLengths[config_id]; - if (length > max_length) - max_length = Class->ConfigLengths[config_id]; - int shape_id = float_classes[i].font_set.get(config_id); - const Shape& shape = shape_table.GetShape(shape_id); - for (int c = 0; c < shape.size(); ++c) { - int unichar_id = shape[c].unichar_id; - if (length > unichar_cutoffs[unichar_id]) - unichar_cutoffs[unichar_id] = length; - } - } - shapetable_cutoffs.push_back(max_length); - } - fp = fopen(pffmtable_file, "wb"); - if (fp == nullptr) { - tprintf("Error, failed to open file \"%s\"\n", pffmtable_file); - } else { - shapetable_cutoffs.Serialize(fp); - for (int c = 0; c < unicharset.size(); ++c) { - const char *unichar = unicharset.id_to_unichar(c); - if (strcmp(unichar, " ") == 0) { - unichar = "NULL"; - } - fprintf(fp, "%s %d\n", unichar, unichar_cutoffs[c]); - } - fclose(fp); - } - free_int_templates(int_templates); - delete classify; -} - -// Generate debug output relating to the canonical distance between the -// two given UTF8 grapheme strings. -void MasterTrainer::DebugCanonical(const char* unichar_str1, - const char* unichar_str2) { - int class_id1 = unicharset_.unichar_to_id(unichar_str1); - int class_id2 = unicharset_.unichar_to_id(unichar_str2); - if (class_id2 == INVALID_UNICHAR_ID) - class_id2 = class_id1; - if (class_id1 == INVALID_UNICHAR_ID) { - tprintf("No unicharset entry found for %s\n", unichar_str1); - return; - } else { - tprintf("Font ambiguities for unichar %d = %s and %d = %s\n", - class_id1, unichar_str1, class_id2, unichar_str2); - } - int num_fonts = samples_.NumFonts(); - const IntFeatureMap& feature_map = feature_map_; - // Iterate the fonts to get the similarity with other fonst of the same - // class. - tprintf(" "); - for (int f = 0; f < num_fonts; ++f) { - if (samples_.NumClassSamples(f, class_id2, false) == 0) - continue; - tprintf("%6d", f); - } - tprintf("\n"); - for (int f1 = 0; f1 < num_fonts; ++f1) { - // Map the features of the canonical_sample. - if (samples_.NumClassSamples(f1, class_id1, false) == 0) - continue; - tprintf("%4d ", f1); - for (int f2 = 0; f2 < num_fonts; ++f2) { - if (samples_.NumClassSamples(f2, class_id2, false) == 0) - continue; - float dist = samples_.ClusterDistance(f1, class_id1, f2, class_id2, - feature_map); - tprintf(" %5.3f", dist); - } - tprintf("\n"); - } - // Build a fake ShapeTable containing all the sample types. - ShapeTable shapes(unicharset_); - for (int f = 0; f < num_fonts; ++f) { - if (samples_.NumClassSamples(f, class_id1, true) > 0) - shapes.AddShape(class_id1, f); - if (class_id1 != class_id2 && - samples_.NumClassSamples(f, class_id2, true) > 0) - shapes.AddShape(class_id2, f); - } -} - -#ifndef GRAPHICS_DISABLED -// Debugging for cloud/canonical features. -// Displays a Features window containing: -// If unichar_str2 is in the unicharset, and canonical_font is non-negative, -// displays the canonical features of the char/font combination in red. -// If unichar_str1 is in the unicharset, and cloud_font is non-negative, -// displays the cloud feature of the char/font combination in green. -// The canonical features are drawn first to show which ones have no -// matches in the cloud features. -// Until the features window is destroyed, each click in the features window -// will display the samples that have that feature in a separate window. -void MasterTrainer::DisplaySamples(const char* unichar_str1, int cloud_font, - const char* unichar_str2, - int canonical_font) { - const IntFeatureMap& feature_map = feature_map_; - const IntFeatureSpace& feature_space = feature_map.feature_space(); - ScrollView* f_window = CreateFeatureSpaceWindow("Features", 100, 500); - ClearFeatureSpaceWindow(norm_mode_ == NM_BASELINE ? baseline : character, - f_window); - int class_id2 = samples_.unicharset().unichar_to_id(unichar_str2); - if (class_id2 != INVALID_UNICHAR_ID && canonical_font >= 0) { - const TrainingSample* sample = samples_.GetCanonicalSample(canonical_font, - class_id2); - for (uint32_t f = 0; f < sample->num_features(); ++f) { - RenderIntFeature(f_window, &sample->features()[f], ScrollView::RED); - } - } - int class_id1 = samples_.unicharset().unichar_to_id(unichar_str1); - if (class_id1 != INVALID_UNICHAR_ID && cloud_font >= 0) { - const BitVector& cloud = samples_.GetCloudFeatures(cloud_font, class_id1); - for (int f = 0; f < cloud.size(); ++f) { - if (cloud[f]) { - INT_FEATURE_STRUCT feature = - feature_map.InverseIndexFeature(f); - RenderIntFeature(f_window, &feature, ScrollView::GREEN); - } - } - } - f_window->Update(); - ScrollView* s_window = CreateFeatureSpaceWindow("Samples", 100, 500); - SVEventType ev_type; - do { - SVEvent* ev; - // Wait until a click or popup event. - ev = f_window->AwaitEvent(SVET_ANY); - ev_type = ev->type; - if (ev_type == SVET_CLICK) { - int feature_index = feature_space.XYToFeatureIndex(ev->x, ev->y); - if (feature_index >= 0) { - // Iterate samples and display those with the feature. - Shape shape; - shape.AddToShape(class_id1, cloud_font); - s_window->Clear(); - samples_.DisplaySamplesWithFeature(feature_index, shape, - feature_space, ScrollView::GREEN, - s_window); - s_window->Update(); - } - } - delete ev; - } while (ev_type != SVET_DESTROY); -} -#endif // GRAPHICS_DISABLED - -void MasterTrainer::TestClassifierVOld(bool replicate_samples, - ShapeClassifier* test_classifier, - ShapeClassifier* old_classifier) { - SampleIterator sample_it; - sample_it.Init(nullptr, nullptr, replicate_samples, &samples_); - ErrorCounter::DebugNewErrors(test_classifier, old_classifier, - CT_UNICHAR_TOPN_ERR, fontinfo_table_, - page_images_, &sample_it); -} - -// Tests the given test_classifier on the internal samples. -// See TestClassifier for details. -void MasterTrainer::TestClassifierOnSamples(CountTypes error_mode, - int report_level, - bool replicate_samples, - ShapeClassifier* test_classifier, - STRING* report_string) { - TestClassifier(error_mode, report_level, replicate_samples, &samples_, - test_classifier, report_string); -} - -// Tests the given test_classifier on the given samples. -// error_mode indicates what counts as an error. -// report_levels: -// 0 = no output. -// 1 = bottom-line error rate. -// 2 = bottom-line error rate + time. -// 3 = font-level error rate + time. -// 4 = list of all errors + short classifier debug output on 16 errors. -// 5 = list of all errors + short classifier debug output on 25 errors. -// If replicate_samples is true, then the test is run on an extended test -// sample including replicated and systematically perturbed samples. -// If report_string is non-nullptr, a summary of the results for each font -// is appended to the report_string. -double MasterTrainer::TestClassifier(CountTypes error_mode, - int report_level, - bool replicate_samples, - TrainingSampleSet* samples, - ShapeClassifier* test_classifier, - STRING* report_string) { - SampleIterator sample_it; - sample_it.Init(nullptr, nullptr, replicate_samples, samples); - if (report_level > 0) { - int num_samples = 0; - for (sample_it.Begin(); !sample_it.AtEnd(); sample_it.Next()) - ++num_samples; - tprintf("Iterator has charset size of %d/%d, %d shapes, %d samples\n", - sample_it.SparseCharsetSize(), sample_it.CompactCharsetSize(), - test_classifier->GetShapeTable()->NumShapes(), num_samples); - tprintf("Testing %sREPLICATED:\n", replicate_samples ? "" : "NON-"); - } - double unichar_error = 0.0; - ErrorCounter::ComputeErrorRate(test_classifier, report_level, - error_mode, fontinfo_table_, - page_images_, &sample_it, &unichar_error, - nullptr, report_string); - return unichar_error; -} - -// Returns the average (in some sense) distance between the two given -// shapes, which may contain multiple fonts and/or unichars. -float MasterTrainer::ShapeDistance(const ShapeTable& shapes, int s1, int s2) { - const IntFeatureMap& feature_map = feature_map_; - const Shape& shape1 = shapes.GetShape(s1); - const Shape& shape2 = shapes.GetShape(s2); - int num_chars1 = shape1.size(); - int num_chars2 = shape2.size(); - float dist_sum = 0.0f; - int dist_count = 0; - if (num_chars1 > 1 || num_chars2 > 1) { - // In the multi-char case try to optimize the calculation by computing - // distances between characters of matching font where possible. - for (int c1 = 0; c1 < num_chars1; ++c1) { - for (int c2 = 0; c2 < num_chars2; ++c2) { - dist_sum += samples_.UnicharDistance(shape1[c1], shape2[c2], - true, feature_map); - ++dist_count; - } - } - } else { - // In the single unichar case, there is little alternative, but to compute - // the squared-order distance between pairs of fonts. - dist_sum = samples_.UnicharDistance(shape1[0], shape2[0], - false, feature_map); - ++dist_count; - } - return dist_sum / dist_count; -} - -// Replaces samples that are always fragmented with the corresponding -// fragment samples. -void MasterTrainer::ReplaceFragmentedSamples() { - if (fragments_ == nullptr) return; - // Remove samples that are replaced by fragments. Each class that was - // always naturally fragmented should be replaced by its fragments. - int num_samples = samples_.num_samples(); - for (int s = 0; s < num_samples; ++s) { - TrainingSample* sample = samples_.mutable_sample(s); - if (fragments_[sample->class_id()] > 0) - samples_.KillSample(sample); - } - samples_.DeleteDeadSamples(); - - // Get ids of fragments in junk_samples_ that replace the dead chars. - const UNICHARSET& frag_set = junk_samples_.unicharset(); -#if 0 - // TODO(rays) The original idea was to replace only graphemes that were - // always naturally fragmented, but that left a lot of the Indic graphemes - // out. Determine whether we can go back to that idea now that spacing - // is fixed in the training images, or whether this code is obsolete. - bool* good_junk = new bool[frag_set.size()]; - memset(good_junk, 0, sizeof(*good_junk) * frag_set.size()); - for (int dead_ch = 1; dead_ch < unicharset_.size(); ++dead_ch) { - int frag_ch = fragments_[dead_ch]; - if (frag_ch <= 0) continue; - const char* frag_utf8 = frag_set.id_to_unichar(frag_ch); - CHAR_FRAGMENT* frag = CHAR_FRAGMENT::parse_from_string(frag_utf8); - // Mark the chars for all parts of the fragment as good in good_junk. - for (int part = 0; part < frag->get_total(); ++part) { - frag->set_pos(part); - int good_ch = frag_set.unichar_to_id(frag->to_string().string()); - if (good_ch != INVALID_UNICHAR_ID) - good_junk[good_ch] = true; // We want this one. - } - delete frag; - } -#endif - // For now just use all the junk that was from natural fragments. - // Get samples of fragments in junk_samples_ that replace the dead chars. - int num_junks = junk_samples_.num_samples(); - for (int s = 0; s < num_junks; ++s) { - TrainingSample* sample = junk_samples_.mutable_sample(s); - int junk_id = sample->class_id(); - const char* frag_utf8 = frag_set.id_to_unichar(junk_id); - CHAR_FRAGMENT* frag = CHAR_FRAGMENT::parse_from_string(frag_utf8); - if (frag != nullptr && frag->is_natural()) { - junk_samples_.extract_sample(s); - samples_.AddSample(frag_set.id_to_unichar(junk_id), sample); - } - delete frag; - } - junk_samples_.DeleteDeadSamples(); - junk_samples_.OrganizeByFontAndClass(); - samples_.OrganizeByFontAndClass(); - unicharset_.clear(); - unicharset_.AppendOtherUnicharset(samples_.unicharset()); - // delete [] good_junk; - // Fragments_ no longer needed? - delete [] fragments_; - fragments_ = nullptr; -} - -// Runs a hierarchical agglomerative clustering to merge shapes in the given -// shape_table, while satisfying the given constraints: -// * End with at least min_shapes left in shape_table, -// * No shape shall have more than max_shape_unichars in it, -// * Don't merge shapes where the distance between them exceeds max_dist. -const float kInfiniteDist = 999.0f; -void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, - float max_dist, ShapeTable* shapes) { - int num_shapes = shapes->NumShapes(); - int max_merges = num_shapes - min_shapes; - GenericVector* shape_dists = - new GenericVector[num_shapes]; - float min_dist = kInfiniteDist; - int min_s1 = 0; - int min_s2 = 0; - tprintf("Computing shape distances..."); - for (int s1 = 0; s1 < num_shapes; ++s1) { - for (int s2 = s1 + 1; s2 < num_shapes; ++s2) { - ShapeDist dist(s1, s2, ShapeDistance(*shapes, s1, s2)); - shape_dists[s1].push_back(dist); - if (dist.distance < min_dist) { - min_dist = dist.distance; - min_s1 = s1; - min_s2 = s2; - } - } - tprintf(" %d", s1); - } - tprintf("\n"); - int num_merged = 0; - while (num_merged < max_merges && min_dist < max_dist) { - tprintf("Distance = %f: ", min_dist); - int num_unichars = shapes->MergedUnicharCount(min_s1, min_s2); - shape_dists[min_s1][min_s2 - min_s1 - 1].distance = kInfiniteDist; - if (num_unichars > max_shape_unichars) { - tprintf("Merge of %d and %d with %d would exceed max of %d unichars\n", - min_s1, min_s2, num_unichars, max_shape_unichars); - } else { - shapes->MergeShapes(min_s1, min_s2); - shape_dists[min_s2].clear(); - ++num_merged; - - for (int s = 0; s < min_s1; ++s) { - if (!shape_dists[s].empty()) { - shape_dists[s][min_s1 - s - 1].distance = - ShapeDistance(*shapes, s, min_s1); - shape_dists[s][min_s2 - s -1].distance = kInfiniteDist; - } - } - for (int s2 = min_s1 + 1; s2 < num_shapes; ++s2) { - if (shape_dists[min_s1][s2 - min_s1 - 1].distance < kInfiniteDist) - shape_dists[min_s1][s2 - min_s1 - 1].distance = - ShapeDistance(*shapes, min_s1, s2); - } - for (int s = min_s1 + 1; s < min_s2; ++s) { - if (!shape_dists[s].empty()) { - shape_dists[s][min_s2 - s - 1].distance = kInfiniteDist; - } - } - } - min_dist = kInfiniteDist; - for (int s1 = 0; s1 < num_shapes; ++s1) { - for (int i = 0; i < shape_dists[s1].size(); ++i) { - if (shape_dists[s1][i].distance < min_dist) { - min_dist = shape_dists[s1][i].distance; - min_s1 = s1; - min_s2 = s1 + 1 + i; - } - } - } - } - tprintf("Stopped with %d merged, min dist %f\n", num_merged, min_dist); - delete [] shape_dists; - if (debug_level_ > 1) { - for (int s1 = 0; s1 < num_shapes; ++s1) { - if (shapes->MasterDestinationIndex(s1) == s1) { - tprintf("Master shape:%s\n", shapes->DebugStr(s1).string()); - } - } - } -} - - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mastertrainer.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mastertrainer.h deleted file mode 100644 index 9abf111e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mastertrainer.h +++ /dev/null @@ -1,309 +0,0 @@ -// Copyright 2010 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: mastertrainer.h -// Description: Trainer to build the MasterClassifier. -// Author: Ray Smith -// Created: Wed Nov 03 18:07:01 PDT 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TRAINING_MASTERTRAINER_H_ -#define TESSERACT_TRAINING_MASTERTRAINER_H_ - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "classify.h" -#include "cluster.h" -#include "intfx.h" -#include "elst.h" -#include "errorcounter.h" -#include "featdefs.h" -#include "fontinfo.h" -#include "indexmapbidi.h" -#include "intfeaturespace.h" -#include "intfeaturemap.h" -#include "intmatcher.h" -#include "params.h" -#include "shapetable.h" -#include "trainingsample.h" -#include "trainingsampleset.h" -#include "unicharset.h" - -namespace tesseract { - -class ShapeClassifier; - -// Simple struct to hold the distance between two shapes during clustering. -struct ShapeDist { - ShapeDist() : shape1(0), shape2(0), distance(0.0f) {} - ShapeDist(int s1, int s2, float dist) - : shape1(s1), shape2(s2), distance(dist) {} - - // Sort operator to sort in ascending order of distance. - bool operator<(const ShapeDist& other) const { - return distance < other.distance; - } - - int shape1; - int shape2; - float distance; -}; - -// Class to encapsulate training processes that use the TrainingSampleSet. -// Initially supports shape clustering and mftrainining. -// Other important features of the MasterTrainer are conditioning the data -// by outlier elimination, replication with perturbation, and serialization. -class MasterTrainer { - public: - MasterTrainer(NormalizationMode norm_mode, bool shape_analysis, - bool replicate_samples, int debug_level); - ~MasterTrainer(); - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - - // Loads an initial unicharset, or sets one up if the file cannot be read. - void LoadUnicharset(const char* filename); - - // Sets the feature space definition. - void SetFeatureSpace(const IntFeatureSpace& fs) { - feature_space_ = fs; - feature_map_.Init(fs); - } - - // Reads the samples and their features from the given file, - // adding them to the trainer with the font_id from the content of the file. - // If verification, then these are verification samples, not training. - void ReadTrainingSamples(const char* page_name, - const FEATURE_DEFS_STRUCT& feature_defs, - bool verification); - - // Adds the given single sample to the trainer, setting the classid - // appropriately from the given unichar_str. - void AddSample(bool verification, const char* unichar_str, - TrainingSample* sample); - - // Loads all pages from the given tif filename and append to page_images_. - // Must be called after ReadTrainingSamples, as the current number of images - // is used as an offset for page numbers in the samples. - void LoadPageImages(const char* filename); - - // Cleans up the samples after initial load from the tr files, and prior to - // saving the MasterTrainer: - // Remaps fragmented chars if running shape analysis. - // Sets up the samples appropriately for class/fontwise access. - // Deletes outlier samples. - void PostLoadCleanup(); - - // Gets the samples ready for training. Use after both - // ReadTrainingSamples+PostLoadCleanup or DeSerialize. - // Re-indexes the features and computes canonical and cloud features. - void PreTrainingSetup(); - - // Sets up the master_shapes_ table, which tells which fonts should stay - // together until they get to a leaf node classifier. - void SetupMasterShapes(); - - // Adds the junk_samples_ to the main samples_ set. Junk samples are initially - // fragments and n-grams (all incorrectly segmented characters). - // Various training functions may result in incorrectly segmented characters - // being added to the unicharset of the main samples, perhaps because they - // form a "radical" decomposition of some (Indic) grapheme, or because they - // just look the same as a real character (like rn/m) - // This function moves all the junk samples, to the main samples_ set, but - // desirable junk, being any sample for which the unichar already exists in - // the samples_ unicharset gets the unichar-ids re-indexed to match, but - // anything else gets re-marked as unichar_id 0 (space character) to identify - // it as junk to the error counter. - void IncludeJunk(); - - // Replicates the samples and perturbs them if the enable_replication_ flag - // is set. MUST be used after the last call to OrganizeByFontAndClass on - // the training samples, ie after IncludeJunk if it is going to be used, as - // OrganizeByFontAndClass will eat the replicated samples into the regular - // samples. - void ReplicateAndRandomizeSamplesIfRequired(); - - // Loads the basic font properties file into fontinfo_table_. - // Returns false on failure. - bool LoadFontInfo(const char* filename); - - // Loads the xheight font properties file into xheights_. - // Returns false on failure. - bool LoadXHeights(const char* filename); - - // Reads spacing stats from filename and adds them to fontinfo_table. - // Returns false on failure. - bool AddSpacingInfo(const char *filename); - - // Returns the font id corresponding to the given font name. - // Returns -1 if the font cannot be found. - int GetFontInfoId(const char* font_name); - // Returns the font_id of the closest matching font name to the given - // filename. It is assumed that a substring of the filename will match - // one of the fonts. If more than one is matched, the longest is returned. - int GetBestMatchingFontInfoId(const char* filename); - - // Returns the filename of the tr file corresponding to the command-line - // argument with the given index. - const STRING& GetTRFileName(int index) const { - return tr_filenames_[index]; - } - - // Sets up a flat shapetable with one shape per class/font combination. - void SetupFlatShapeTable(ShapeTable* shape_table); - - // Sets up a Clusterer for mftraining on a single shape_id. - // Call FreeClusterer on the return value after use. - CLUSTERER* SetupForClustering(const ShapeTable& shape_table, - const FEATURE_DEFS_STRUCT& feature_defs, - int shape_id, int* num_samples); - - // Writes the given float_classes (produced by SetupForFloat2Int) as inttemp - // to the given inttemp_file, and the corresponding pffmtable. - // The unicharset is the original encoding of graphemes, and shape_set should - // match the size of the shape_table, and may possibly be totally fake. - void WriteInttempAndPFFMTable(const UNICHARSET& unicharset, - const UNICHARSET& shape_set, - const ShapeTable& shape_table, - CLASS_STRUCT* float_classes, - const char* inttemp_file, - const char* pffmtable_file); - - const UNICHARSET& unicharset() const { - return samples_.unicharset(); - } - TrainingSampleSet* GetSamples() { - return &samples_; - } - const ShapeTable& master_shapes() const { - return master_shapes_; - } - - // Generates debug output relating to the canonical distance between the - // two given UTF8 grapheme strings. - void DebugCanonical(const char* unichar_str1, const char* unichar_str2); - #ifndef GRAPHICS_DISABLED - // Debugging for cloud/canonical features. - // Displays a Features window containing: - // If unichar_str2 is in the unicharset, and canonical_font is non-negative, - // displays the canonical features of the char/font combination in red. - // If unichar_str1 is in the unicharset, and cloud_font is non-negative, - // displays the cloud feature of the char/font combination in green. - // The canonical features are drawn first to show which ones have no - // matches in the cloud features. - // Until the features window is destroyed, each click in the features window - // will display the samples that have that feature in a separate window. - void DisplaySamples(const char* unichar_str1, int cloud_font, - const char* unichar_str2, int canonical_font); - #endif // GRAPHICS_DISABLED - - void TestClassifierVOld(bool replicate_samples, - ShapeClassifier* test_classifier, - ShapeClassifier* old_classifier); - - // Tests the given test_classifier on the internal samples. - // See TestClassifier for details. - void TestClassifierOnSamples(CountTypes error_mode, - int report_level, - bool replicate_samples, - ShapeClassifier* test_classifier, - STRING* report_string); - // Tests the given test_classifier on the given samples - // error_mode indicates what counts as an error. - // report_levels: - // 0 = no output. - // 1 = bottom-line error rate. - // 2 = bottom-line error rate + time. - // 3 = font-level error rate + time. - // 4 = list of all errors + short classifier debug output on 16 errors. - // 5 = list of all errors + short classifier debug output on 25 errors. - // If replicate_samples is true, then the test is run on an extended test - // sample including replicated and systematically perturbed samples. - // If report_string is non-nullptr, a summary of the results for each font - // is appended to the report_string. - double TestClassifier(CountTypes error_mode, - int report_level, - bool replicate_samples, - TrainingSampleSet* samples, - ShapeClassifier* test_classifier, - STRING* report_string); - - // Returns the average (in some sense) distance between the two given - // shapes, which may contain multiple fonts and/or unichars. - // This function is public to facilitate testing. - float ShapeDistance(const ShapeTable& shapes, int s1, int s2); - - private: - // Replaces samples that are always fragmented with the corresponding - // fragment samples. - void ReplaceFragmentedSamples(); - - // Runs a hierarchical agglomerative clustering to merge shapes in the given - // shape_table, while satisfying the given constraints: - // * End with at least min_shapes left in shape_table, - // * No shape shall have more than max_shape_unichars in it, - // * Don't merge shapes where the distance between them exceeds max_dist. - void ClusterShapes(int min_shapes, int max_shape_unichars, - float max_dist, ShapeTable* shape_table); - - private: - NormalizationMode norm_mode_; - // Character set we are training for. - UNICHARSET unicharset_; - // Original feature space. Subspace mapping is contained in feature_map_. - IntFeatureSpace feature_space_; - TrainingSampleSet samples_; - TrainingSampleSet junk_samples_; - TrainingSampleSet verify_samples_; - // Master shape table defines what fonts stay together until the leaves. - ShapeTable master_shapes_; - // Flat shape table has each unichar/font id pair in a separate shape. - ShapeTable flat_shapes_; - // Font metrics gathered from multiple files. - FontInfoTable fontinfo_table_; - // Array of xheights indexed by font ids in fontinfo_table_; - GenericVector xheights_; - - // Non-serialized data initialized by other means or used temporarily - // during loading of training samples. - // Number of different class labels in unicharset_. - int charsetsize_; - // Flag to indicate that we are running shape analysis and need fragments - // fixing. - bool enable_shape_analysis_; - // Flag to indicate that sample replication is required. - bool enable_replication_; - // Array of classids of fragments that replace the correctly segmented chars. - int* fragments_; - // Classid of previous correctly segmented sample that was added. - int prev_unichar_id_; - // Debug output control. - int debug_level_; - // Feature map used to construct reduced feature spaces for compact - // classifiers. - IntFeatureMap feature_map_; - // Vector of Pix pointers used for classifiers that need the image. - // Indexed by page_num_ in the samples. - // These images are owned by the trainer and need to be pixDestroyed. - GenericVector page_images_; - // Vector of filenames of loaded tr files. - GenericVector tr_filenames_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_TRAINING_MASTERTRAINER_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mf.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mf.cpp deleted file mode 100644 index 106b5e0a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mf.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/****************************************************************************** - ** Filename: mf.c - ** Purpose: Micro-feature interface to flexible feature extractor. - ** Author: Dan Johnson - ** History: Thu May 24 09:08:38 1990, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "mf.h" - -#include "featdefs.h" -#include "mfdefs.h" -#include "mfx.h" - -#include - -/*---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------- - Private Code -----------------------------------------------------------------------------*/ -/** - * Call the old micro-feature extractor and then copy - * the features into the new format. Then deallocate the - * old micro-features. - * @param Blob blob to extract micro-features from - * @param cn_denorm control parameter to feature extractor. - * @return Micro-features for Blob. - */ -FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm) { - int NumFeatures; - MICROFEATURES Features, OldFeatures; - FEATURE_SET FeatureSet; - FEATURE Feature; - MICROFEATURE OldFeature; - - OldFeatures = BlobMicroFeatures(Blob, cn_denorm); - if (OldFeatures == nullptr) - return nullptr; - NumFeatures = count (OldFeatures); - FeatureSet = NewFeatureSet (NumFeatures); - - Features = OldFeatures; - iterate(Features) { - OldFeature = (MICROFEATURE) first_node (Features); - Feature = NewFeature (&MicroFeatureDesc); - Feature->Params[MFDirection] = OldFeature[ORIENTATION]; - Feature->Params[MFXPosition] = OldFeature[XPOSITION]; - Feature->Params[MFYPosition] = OldFeature[YPOSITION]; - Feature->Params[MFLength] = OldFeature[MFLENGTH]; - - // Bulge features are deprecated and should not be used. Set to 0. - Feature->Params[MFBulge1] = 0.0f; - Feature->Params[MFBulge2] = 0.0f; - -#ifndef _WIN32 - // Assert that feature parameters are well defined. - int i; - for (i = 0; i < Feature->Type->NumParams; i++) { - ASSERT_HOST(!std::isnan(Feature->Params[i])); - } -#endif - - AddFeature(FeatureSet, Feature); - } - FreeMicroFeatures(OldFeatures); - return FeatureSet; -} /* ExtractMicros */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mf.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mf.h deleted file mode 100644 index 90115eeb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mf.h +++ /dev/null @@ -1,39 +0,0 @@ -/****************************************************************************** - ** Filename: mf.h - ** Purpose: Micro-feature interface to flexible feature extractor. - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef MF_H -#define MF_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "ocrfeatures.h" -#include "blobs.h" - -typedef enum { - MFXPosition, MFYPosition, - MFLength, MFDirection, MFBulge1, MFBulge2, - MFCount // For array sizes. -} MF_PARAM_NAME; - -typedef float MicroFeature[MFCount]; -/*---------------------------------------------------------------------------- - Private Function Prototypes ------------------------------------------------------------------------------*/ -FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfdefs.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfdefs.cpp deleted file mode 100644 index 8ca9be68..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfdefs.cpp +++ /dev/null @@ -1,48 +0,0 @@ -/****************************************************************************** - ** Filename: mfdefs.cpp - ** Purpose: Basic routines for manipulating micro-features - ** Author: Dan Johnson - ** History: Mon Jan 22 08:48:58 1990, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "mfdefs.h" -#include "emalloc.h" -#include - -/*---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------**/ -/*---------------------------------------------------------------------------*/ -/** - * This routine allocates and returns a new micro-feature - * data structure. - * @return New MICROFEATURE - */ -MICROFEATURE NewMicroFeature() { - return ((MICROFEATURE) Emalloc (sizeof (MFBLOCK))); -} /* NewMicroFeature */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine deallocates all of the memory consumed by - * a list of micro-features. - * @param MicroFeatures list of micro-features to be freed - * @return none - */ -void FreeMicroFeatures(MICROFEATURES MicroFeatures) { - destroy_nodes(MicroFeatures, Efree); -} /* FreeMicroFeatures */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfdefs.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfdefs.h deleted file mode 100644 index 38b18b42..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfdefs.h +++ /dev/null @@ -1,56 +0,0 @@ -/****************************************************************************** - ** Filename: mfdefs.h - ** Purpose: Definition of micro-features - ** Author: Dan Johnson - ** History: Mon Jan 22 08:42:13 1990, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -#ifndef MFDEFS_H -#define MFDEFS_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "oldlist.h" -#include "matchdefs.h" - -/* definition of a list of micro-features */ -typedef LIST MICROFEATURES; - -/* definition of structure of micro-features */ -#define MFSIZE 6 -typedef float MFBLOCK[MFSIZE]; -typedef float *MICROFEATURE; - -/* definitions of individual micro-feature parameters */ -#define XPOSITION 0 -#define YPOSITION 1 -#define MFLENGTH 2 -#define ORIENTATION 3 -#define FIRSTBULGE 4 -#define SECONDBULGE 5 - -/**---------------------------------------------------------------------------- - Macros -----------------------------------------------------------------------------**/ - -/* macros for accessing micro-feature lists */ -#define NextFeatureOf(L) ((MICROFEATURE)first_node(L)) - -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ -MICROFEATURE NewMicroFeature(); - -void FreeMicroFeatures(MICROFEATURES MicroFeatures); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfoutline.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfoutline.cpp deleted file mode 100644 index 1e7deb12..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfoutline.cpp +++ /dev/null @@ -1,463 +0,0 @@ -/****************************************************************************** - ** Filename: mfoutline.c - ** Purpose: Interface to outline struct used for extracting features - ** Author: Dan Johnson - ** History: Thu May 17 08:14:18 1990, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "clusttool.h" //If remove you get cought in a loop somewhere -#include "emalloc.h" -#include "mfoutline.h" -#include "blobs.h" -#include "mfx.h" -#include "params.h" -#include "classify.h" - -#include -#include - -/*---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------*/ -/** Convert a blob into a list of MFOUTLINEs (float-based microfeature format). - */ -LIST ConvertBlob(TBLOB *blob) { - LIST outlines = NIL_LIST; - return (blob == nullptr) - ? NIL_LIST - : ConvertOutlines(blob->outlines, outlines, outer); -} - - -/*---------------------------------------------------------------------------*/ -/** Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. */ -MFOUTLINE ConvertOutline(TESSLINE *outline) { - MFEDGEPT *NewPoint; - MFOUTLINE MFOutline = NIL_LIST; - EDGEPT *EdgePoint; - EDGEPT *StartPoint; - EDGEPT *NextPoint; - - if (outline == nullptr || outline->loop == nullptr) - return MFOutline; - - StartPoint = outline->loop; - EdgePoint = StartPoint; - do { - NextPoint = EdgePoint->next; - - /* filter out duplicate points */ - if (EdgePoint->pos.x != NextPoint->pos.x || - EdgePoint->pos.y != NextPoint->pos.y) { - NewPoint = NewEdgePoint(); - ClearMark(NewPoint); - NewPoint->Hidden = EdgePoint->IsHidden(); - NewPoint->Point.x = EdgePoint->pos.x; - NewPoint->Point.y = EdgePoint->pos.y; - MFOutline = push(MFOutline, NewPoint); - } - EdgePoint = NextPoint; - } while (EdgePoint != StartPoint); - - if (MFOutline != nullptr) - MakeOutlineCircular(MFOutline); - return MFOutline; -} - - -/*---------------------------------------------------------------------------*/ -/** - * Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs). - * - * @param outline first outline to be converted - * @param mf_outlines list to add converted outlines to - * @param outline_type are the outlines outer or holes? - */ -LIST ConvertOutlines(TESSLINE *outline, - LIST mf_outlines, - OUTLINETYPE outline_type) { - MFOUTLINE mf_outline; - - while (outline != nullptr) { - mf_outline = ConvertOutline(outline); - if (mf_outline != nullptr) - mf_outlines = push(mf_outlines, mf_outline); - outline = outline->next; - } - return mf_outlines; -} - -/*---------------------------------------------------------------------------*/ -/** - * This routine searches through the specified outline, computes - * a slope for each vector in the outline, and marks each - * vector as having one of the following directions: - * N, S, E, W, NE, NW, SE, SW - * This information is then stored in the outline and the - * outline is returned. - * @param Outline micro-feature outline to analyze - * @param MinSlope controls "snapping" of segments to horizontal - * @param MaxSlope controls "snapping" of segments to vertical - * @return none - */ -void FindDirectionChanges(MFOUTLINE Outline, - float MinSlope, - float MaxSlope) { - MFEDGEPT *Current; - MFEDGEPT *Last; - MFOUTLINE EdgePoint; - - if (DegenerateOutline (Outline)) - return; - - Last = PointAt (Outline); - Outline = NextPointAfter (Outline); - EdgePoint = Outline; - do { - Current = PointAt (EdgePoint); - ComputeDirection(Last, Current, MinSlope, MaxSlope); - - Last = Current; - EdgePoint = NextPointAfter (EdgePoint); - } - while (EdgePoint != Outline); - -} /* FindDirectionChanges */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine deallocates all of the memory consumed by - * a micro-feature outline. - * @param arg micro-feature outline to be freed - * @return none - */ -void FreeMFOutline(void *arg) { //MFOUTLINE Outline) - MFOUTLINE Start; - MFOUTLINE Outline = (MFOUTLINE) arg; - - /* break the circular outline so we can use std. techniques to deallocate */ - Start = list_rest (Outline); - set_rest(Outline, NIL_LIST); - while (Start != nullptr) { - free(first_node(Start)); - Start = pop (Start); - } - -} /* FreeMFOutline */ - - -/*---------------------------------------------------------------------------*/ -/** - * Release all memory consumed by the specified list - * of outlines. - * @param Outlines list of mf-outlines to be freed - * @return none - */ -void FreeOutlines(LIST Outlines) { - destroy_nodes(Outlines, FreeMFOutline); -} /* FreeOutlines */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine searches through the specified outline and finds - * the points at which the outline changes direction. These - * points are then marked as "extremities". This routine is - * used as an alternative to FindExtremities(). It forces the - * endpoints of the microfeatures to be at the direction - * changes rather than at the midpoint between direction - * changes. - * @param Outline micro-feature outline to analyze - * @return none - * @note Globals: none - */ -void MarkDirectionChanges(MFOUTLINE Outline) { - MFOUTLINE Current; - MFOUTLINE Last; - MFOUTLINE First; - - if (DegenerateOutline (Outline)) - return; - - First = NextDirectionChange (Outline); - Last = First; - do { - Current = NextDirectionChange (Last); - MarkPoint (PointAt (Current)); - Last = Current; - } - while (Last != First); - -} /* MarkDirectionChanges */ - - -/*---------------------------------------------------------------------------*/ -/** Return a new edge point for a micro-feature outline. */ -MFEDGEPT *NewEdgePoint() { - return reinterpret_cast(malloc(sizeof(MFEDGEPT))); -} - -/*---------------------------------------------------------------------------*/ -/** - * This routine returns the next point in the micro-feature - * outline that is an extremity. The search starts after - * EdgePoint. The routine assumes that the outline being - * searched is not a degenerate outline (i.e. it must have - * 2 or more edge points). - * @param EdgePoint start search from this point - * @return Next extremity in the outline after EdgePoint. - * @note Globals: none - */ -MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) { - EdgePoint = NextPointAfter(EdgePoint); - while (!PointAt(EdgePoint)->ExtremityMark) - EdgePoint = NextPointAfter(EdgePoint); - - return (EdgePoint); - -} /* NextExtremity */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine normalizes the coordinates of the specified - * outline so that the outline is deskewed down to the - * baseline, translated so that x=0 is at XOrigin, and scaled - * so that the height of a character cell from descender to - * ascender is 1. Of this height, 0.25 is for the descender, - * 0.25 for the ascender, and 0.5 for the x-height. The - * y coordinate of the baseline is 0. - * @param Outline outline to be normalized - * @param XOrigin x-origin of text - * @return none - * @note Globals: none - */ -void NormalizeOutline(MFOUTLINE Outline, - float XOrigin) { - if (Outline == NIL_LIST) - return; - - MFOUTLINE EdgePoint = Outline; - do { - MFEDGEPT *Current = PointAt(EdgePoint); - Current->Point.y = MF_SCALE_FACTOR * - (Current->Point.y - kBlnBaselineOffset); - Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin); - EdgePoint = NextPointAfter(EdgePoint); - } while (EdgePoint != Outline); -} /* NormalizeOutline */ - - -/*---------------------------------------------------------------------------*/ -namespace tesseract { -/** - * This routine normalizes every outline in Outlines - * according to the currently selected normalization method. - * It also returns the scale factors that it used to do this - * scaling. The scale factors returned represent the x and - * y sizes in the normalized coordinate system that correspond - * to 1 pixel in the original coordinate system. - * - * Globals: - * - classify_norm_method method being used for normalization - * - classify_char_norm_range map radius of gyration to this value - * @param Outlines list of outlines to be normalized - * @param XScale x-direction scale factor used by routine - * @param YScale y-direction scale factor used by routine - * @return none (Outlines are changed and XScale and YScale are updated) - */ -void Classify::NormalizeOutlines(LIST Outlines, - float *XScale, - float *YScale) { - MFOUTLINE Outline; - - switch (classify_norm_method) { - case character: - ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?"); - break; - - case baseline: - iterate(Outlines) { - Outline = (MFOUTLINE) first_node(Outlines); - NormalizeOutline(Outline, 0.0); - } - *XScale = *YScale = MF_SCALE_FACTOR; - break; - } -} /* NormalizeOutlines */ -} // namespace tesseract - -/*---------------------------------------------------------------------------- - Private Code -----------------------------------------------------------------------------*/ -/** - * Change the direction of every vector in the specified - * outline segment to Direction. The segment to be changed - * starts at Start and ends at End. Note that the previous - * direction of End must also be changed to reflect the - * change in direction of the point before it. - * @param Start, End defines segment of outline to be modified - * @param Direction new direction to assign to segment - * @return none - * @note Globals: none - */ -void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) { - MFOUTLINE Current; - - for (Current = Start; Current != End; Current = NextPointAfter (Current)) - PointAt (Current)->Direction = Direction; - - PointAt (End)->PreviousDirection = Direction; - -} /* ChangeDirection */ - -/** - * This routine normalizes each point in Outline by - * translating it to the specified center and scaling it - * anisotropically according to the given scale factors. - * @param Outline outline to be character normalized - * @param cn_denorm - * @return none - * @note Globals: none - */ -void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) { - MFOUTLINE First, Current; - MFEDGEPT *CurrentPoint; - - if (Outline == NIL_LIST) - return; - - First = Outline; - Current = First; - do { - CurrentPoint = PointAt(Current); - FCOORD pos(CurrentPoint->Point.x, CurrentPoint->Point.y); - cn_denorm.LocalNormTransform(pos, &pos); - CurrentPoint->Point.x = (pos.x() - UINT8_MAX / 2) * MF_SCALE_FACTOR; - CurrentPoint->Point.y = (pos.y() - UINT8_MAX / 2) * MF_SCALE_FACTOR; - - Current = NextPointAfter(Current); - } - while (Current != First); - -} /* CharNormalizeOutline */ - -/** - * This routine computes the slope from Start to Finish and - * and then computes the approximate direction of the line - * segment from Start to Finish. The direction is quantized - * into 8 buckets: - * N, S, E, W, NE, NW, SE, SW - * Both the slope and the direction are then stored into - * the appropriate fields of the Start edge point. The - * direction is also stored into the PreviousDirection field - * of the Finish edge point. - * @param Start starting point to compute direction from - * @param Finish finishing point to compute direction to - * @param MinSlope slope below which lines are horizontal - * @param MaxSlope slope above which lines are vertical - * @return none - * @note Globals: none - */ -void ComputeDirection(MFEDGEPT *Start, - MFEDGEPT *Finish, - float MinSlope, - float MaxSlope) { - FVECTOR Delta; - - Delta.x = Finish->Point.x - Start->Point.x; - Delta.y = Finish->Point.y - Start->Point.y; - if (Delta.x == 0) { - if (Delta.y < 0) { - Start->Slope = -FLT_MAX; - Start->Direction = south; - } else { - Start->Slope = FLT_MAX; - Start->Direction = north; - } - } else { - Start->Slope = Delta.y / Delta.x; - if (Delta.x > 0) { - if (Delta.y > 0) { - if (Start->Slope > MinSlope) { - if (Start->Slope < MaxSlope) { - Start->Direction = northeast; - } else { - Start->Direction = north; - } - } else { - Start->Direction = east; - } - } - else if (Start->Slope < -MinSlope) { - if (Start->Slope > -MaxSlope) { - Start->Direction = southeast; - } else { - Start->Direction = south; - } - } else { - Start->Direction = east; - } - } else if (Delta.y > 0) { - if (Start->Slope < -MinSlope) { - if (Start->Slope > -MaxSlope) { - Start->Direction = northwest; - } else { - Start->Direction = north; - } - } else { - Start->Direction = west; - } - } else if (Start->Slope > MinSlope) { - if (Start->Slope < MaxSlope) { - Start->Direction = southwest; - } else { - Start->Direction = south; - } - } else { - Start->Direction = west; - } - } - Finish->PreviousDirection = Start->Direction; -} - -/** - * This routine returns the next point in the micro-feature - * outline that has a direction different than EdgePoint. The - * routine assumes that the outline being searched is not a - * degenerate outline (i.e. it must have 2 or more edge points). - * @param EdgePoint start search from this point - * @return Point of next direction change in micro-feature outline. - * @note Globals: none - */ -MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) { - DIRECTION InitialDirection; - - InitialDirection = PointAt (EdgePoint)->Direction; - - MFOUTLINE next_pt = nullptr; - do { - EdgePoint = NextPointAfter(EdgePoint); - next_pt = NextPointAfter(EdgePoint); - } while (PointAt(EdgePoint)->Direction == InitialDirection && - !PointAt(EdgePoint)->Hidden && - next_pt != nullptr && !PointAt(next_pt)->Hidden); - - return (EdgePoint); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfoutline.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfoutline.h deleted file mode 100644 index a8b934ae..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfoutline.h +++ /dev/null @@ -1,125 +0,0 @@ -/****************************************************************************** - ** Filename: mfoutline.h - ** Purpose: Interface spec for fx outline structures - ** Author: Dan Johnson - ** History: Thu May 17 08:55:32 1990, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef MFOUTLINE_H -#define MFOUTLINE_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "blobs.h" -#include "host.h" -#include "oldlist.h" -#include "fpoint.h" -#include "params.h" - -#define NORMAL_X_HEIGHT (0.5) -#define NORMAL_BASELINE (0.0) - -typedef LIST MFOUTLINE; - -typedef enum { - north, south, east, west, northeast, northwest, southeast, southwest -} DIRECTION; - -typedef struct { - FPOINT Point; - float Slope; - unsigned Padding:20; - BOOL8 Hidden:TRUE; - BOOL8 ExtremityMark:TRUE; - DIRECTION Direction:4; - DIRECTION PreviousDirection:4; -} MFEDGEPT; - -typedef enum { - outer, hole -} OUTLINETYPE; - -typedef enum { - baseline, character -} NORM_METHOD; - -/**---------------------------------------------------------------------------- - Macros -----------------------------------------------------------------------------**/ -#define AverageOf(A,B) (((A) + (B)) / 2) - -/* macro for computing the scale factor to use to normalize characters */ -#define MF_SCALE_FACTOR (NORMAL_X_HEIGHT / kBlnXHeight) - -/* macros for manipulating micro-feature outlines */ -#define DegenerateOutline(O) (((O) == NIL_LIST) || ((O) == list_rest(O))) -#define PointAt(O) ((MFEDGEPT *) first_node (O)) -#define NextPointAfter(E) (list_rest (E)) -#define MakeOutlineCircular(O) (set_rest (last (O), (O))) - -/* macros for manipulating micro-feature outline edge points */ -#define ClearMark(P) ((P)->ExtremityMark = FALSE) -#define MarkPoint(P) ((P)->ExtremityMark = TRUE) - -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ -void ComputeBlobCenter(TBLOB *Blob, TPOINT *BlobCenter); - -LIST ConvertBlob(TBLOB *Blob); - -MFOUTLINE ConvertOutline(TESSLINE *Outline); - -LIST ConvertOutlines(TESSLINE *Outline, - LIST ConvertedOutlines, - OUTLINETYPE OutlineType); - -void FilterEdgeNoise(MFOUTLINE Outline, float NoiseSegmentLength); - -void FindDirectionChanges(MFOUTLINE Outline, - float MinSlope, - float MaxSlope); - -void FreeMFOutline(void *agr); //MFOUTLINE Outline); - -void FreeOutlines(LIST Outlines); - -void MarkDirectionChanges(MFOUTLINE Outline); - -MFEDGEPT *NewEdgePoint(); - -MFOUTLINE NextExtremity(MFOUTLINE EdgePoint); - -void NormalizeOutline(MFOUTLINE Outline, - float XOrigin); - -/*---------------------------------------------------------------------------- - Private Function Prototypes ------------------------------------------------------------------------------*/ -void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction); - -// Normalizes the Outline in-place using cn_denorm's local transformation, -// then converts from the integer feature range [0,255] to the clusterer -// feature range of [-0.5, 0.5]. -void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm); - -void ComputeDirection(MFEDGEPT *Start, - MFEDGEPT *Finish, - float MinSlope, - float MaxSlope); - -MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfx.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfx.cpp deleted file mode 100644 index 7771ebe4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfx.cpp +++ /dev/null @@ -1,185 +0,0 @@ -/****************************************************************************** - ** Filename: mfx.c - ** Purpose: Micro feature extraction routines - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - *****************************************************************************/ -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "mfx.h" -#include "mfdefs.h" -#include "mfoutline.h" -#include "clusttool.h" //NEEDED -#include "intfx.h" -#include "normalis.h" -#include "params.h" - -#include - -/*---------------------------------------------------------------------------- - Variables -----------------------------------------------------------------------------*/ - -/* old numbers corresponded to 10.0 degrees and 80.0 degrees */ -double_VAR(classify_min_slope, 0.414213562, - "Slope below which lines are called horizontal"); -double_VAR(classify_max_slope, 2.414213562, - "Slope above which lines are called vertical"); - -/*---------------------------------------------------------------------------- - Macros -----------------------------------------------------------------------------*/ -/* miscellaneous macros */ -#define NormalizeAngle(A) ((((A) < 0) ? ((A) + 2 * M_PI) : (A)) / (2 * M_PI)) - -/*---------------------------------------------------------------------------- - Private Function Prototypes ------------------------------------------------------------------------------*/ -float ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End); - -MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, - MICROFEATURES MicroFeatures); - -MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End); - -/*---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------*/ - -/** - * This routine extracts micro-features from the specified - * blob and returns a list of the micro-features. All - * micro-features are normalized according to the specified - * line statistics. - * @param Blob blob to extract micro-features from - * @param cn_denorm control parameter to feature extractor - * @return List of micro-features extracted from the blob. - */ -MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) { - MICROFEATURES MicroFeatures = NIL_LIST; - LIST Outlines; - LIST RemainingOutlines; - MFOUTLINE Outline; - - if (Blob != nullptr) { - Outlines = ConvertBlob(Blob); - - RemainingOutlines = Outlines; - iterate(RemainingOutlines) { - Outline = (MFOUTLINE) first_node (RemainingOutlines); - CharNormalizeOutline(Outline, cn_denorm); - } - - RemainingOutlines = Outlines; - iterate(RemainingOutlines) { - Outline = (MFOUTLINE) first_node(RemainingOutlines); - FindDirectionChanges(Outline, classify_min_slope, classify_max_slope); - MarkDirectionChanges(Outline); - MicroFeatures = ConvertToMicroFeatures(Outline, MicroFeatures); - } - FreeOutlines(Outlines); - } - return MicroFeatures; -} /* BlobMicroFeatures */ - -/*--------------------------------------------------------------------------- - Private Code ----------------------------------------------------------------------------*/ - -/** - * This routine computes the orientation parameter of the - * specified micro-feature. The orientation is the angle of - * the vector from Start to End. It is normalized to a number - * between 0 and 1 where 0 corresponds to 0 degrees and 1 - * corresponds to 360 degrees. The actual range is [0,1), i.e. - * 1 is excluded from the range (since it is actual the - * same orientation as 0). This routine assumes that Start - * and End are not the same point. - * @param Start starting edge point of micro-feature - * @param End ending edge point of micro-feature - * @note Globals: none - * @return Orientation parameter for the specified micro-feature. - */ -float ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End) { - float Orientation = NormalizeAngle(AngleFrom(Start->Point, End->Point)); - - /* ensure that round-off errors do not put circular param out of range */ - if ((Orientation < 0) || (Orientation >= 1)) - Orientation = 0; - return (Orientation); -} /* ComputeOrientation */ - -/** - * Convert Outline to MicroFeatures - * @param Outline outline to extract micro-features from - * @param MicroFeatures list of micro-features to add to - * @return List of micro-features with new features added to front. - * @note Globals: none - */ -MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, - MICROFEATURES MicroFeatures) { - MFOUTLINE Current; - MFOUTLINE Last; - MFOUTLINE First; - MICROFEATURE NewFeature; - - if (DegenerateOutline (Outline)) - return (MicroFeatures); - - First = NextExtremity (Outline); - Last = First; - do { - Current = NextExtremity (Last); - if (!PointAt(Current)->Hidden) { - NewFeature = ExtractMicroFeature (Last, Current); - if (NewFeature != nullptr) - MicroFeatures = push (MicroFeatures, NewFeature); - } - Last = Current; - } - while (Last != First); - - return (MicroFeatures); -} /* ConvertToMicroFeatures */ - -/** - * This routine computes the feature parameters which describe - * the micro-feature that starts and Start and ends at End. - * A new micro-feature is allocated, filled with the feature - * parameters, and returned. The routine assumes that - * Start and End are not the same point. If they are the - * same point, nullptr is returned, a warning message is - * printed, and the current outline is dumped to stdout. - * @param Start starting point of micro-feature - * @param End ending point of micro-feature - * @return New micro-feature or nullptr if the feature was rejected. - * @note Globals: none - */ -MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End) { - MICROFEATURE NewFeature; - MFEDGEPT *P1, *P2; - - P1 = PointAt(Start); - P2 = PointAt(End); - - NewFeature = NewMicroFeature (); - NewFeature[XPOSITION] = AverageOf(P1->Point.x, P2->Point.x); - NewFeature[YPOSITION] = AverageOf(P1->Point.y, P2->Point.y); - NewFeature[MFLENGTH] = DistanceBetween(P1->Point, P2->Point); - NewFeature[ORIENTATION] = NormalizedAngleFrom(&P1->Point, &P2->Point, 1.0); - NewFeature[FIRSTBULGE] = 0.0f; // deprecated - NewFeature[SECONDBULGE] = 0.0f; // deprecated - - return NewFeature; -} /* ExtractMicroFeature */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfx.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfx.h deleted file mode 100644 index 9778a547..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/mfx.h +++ /dev/null @@ -1,45 +0,0 @@ -/****************************************************************************** - ** Filename: mfx.h - ** Purpose: Definition of micro-feature extraction routines - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef MFX_H -#define MFX_H - -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "mfdefs.h" -#include "params.h" - -class DENORM; -struct TBLOB; - -/*---------------------------------------------------------------------------- - Variables -----------------------------------------------------------------------------**/ - -/* old numbers corresponded to 10.0 degrees and 80.0 degrees */ -extern double_VAR_H(classify_min_slope, 0.414213562, - "Slope below which lines are called horizontal"); -extern double_VAR_H(classify_max_slope, 2.414213562, - "Slope above which lines are called vertical"); - -/*---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ -MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/normfeat.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/normfeat.cpp deleted file mode 100644 index 233f2f57..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/normfeat.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/****************************************************************************** - ** Filename: normfeat.c - ** Purpose: Definition of char normalization features. - ** Author: Dan Johnson - ** History: 12/14/90, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "normfeat.h" - -#include "intfx.h" -#include "featdefs.h" -#include "mfoutline.h" - -/*---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------*/ - -/** Return the length of the outline in baseline normalized form. */ -float ActualOutlineLength(FEATURE Feature) { - return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION); -} - -/** - * Return the character normalization feature for a blob. - * - * The features returned are in a scale where the x-height has been - * normalized to live in the region y = [-0.25 .. 0.25]. Example ranges - * for English below are based on the Linux font collection on 2009-12-04: - * - * - Params[CharNormY] - * - The y coordinate of the grapheme's centroid. - * - English: [-0.27, 0.71] - * - * - Params[CharNormLength] - * - The length of the grapheme's outline (tiny segments discarded), - * divided by 10.0=LENGTH_COMPRESSION. - * - English: [0.16, 0.85] - * - * - Params[CharNormRx] - * - The radius of gyration about the x axis, as measured from CharNormY. - * - English: [0.011, 0.34] - * - * - Params[CharNormRy] - * - The radius of gyration about the y axis, as measured from - * the x center of the grapheme's bounding box. - * - English: [0.011, 0.31] - */ -FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info) { - FEATURE_SET feature_set = NewFeatureSet(1); - FEATURE feature = NewFeature(&CharNormDesc); - - feature->Params[CharNormY] = - MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset); - feature->Params[CharNormLength] = - MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION; - feature->Params[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx; - feature->Params[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry; - - AddFeature(feature_set, feature); - - return feature_set; -} /* ExtractCharNormFeatures */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/normfeat.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/normfeat.h deleted file mode 100644 index 5660b81e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/normfeat.h +++ /dev/null @@ -1,40 +0,0 @@ -/****************************************************************************** - ** Filename: normfeat.h - ** Purpose: Definition of character normalization features. - ** Author: Dan Johnson - ** History: 12/14/90, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - *****************************************************************************/ - -#ifndef NORMFEAT_H -#define NORMFEAT_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "ocrfeatures.h" - -#define LENGTH_COMPRESSION (10.0) - -typedef enum { - CharNormY, CharNormLength, CharNormRx, CharNormRy -} NORM_PARAM_NAME; - -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ -float ActualOutlineLength(FEATURE Feature); - -FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/normmatch.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/normmatch.cpp deleted file mode 100644 index 311bf794..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/normmatch.cpp +++ /dev/null @@ -1,274 +0,0 @@ -/****************************************************************************** - ** Filename: normmatch.c - ** Purpose: Simple matcher based on character normalization features. - ** Author: Dan Johnson - ** History: Wed Dec 19 16:18:06 1990, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "normmatch.h" - -#include -#include - -#include "classify.h" -#include "clusttool.h" -#include "emalloc.h" -#include "globals.h" -#include "helpers.h" -#include "normfeat.h" -#include "unicharset.h" -#include "params.h" - -struct NORM_PROTOS -{ - int NumParams; - PARAM_DESC *ParamDesc; - LIST* Protos; - int NumProtos; -}; - -/*---------------------------------------------------------------------------- - Private Function Prototypes -----------------------------------------------------------------------------*/ -double NormEvidenceOf(double NormAdj); - -void PrintNormMatch(FILE *File, - int NumParams, - PROTOTYPE *Proto, - FEATURE Feature); - -NORM_PROTOS *ReadNormProtos(FILE *File); - -/*---------------------------------------------------------------------------- - Variables -----------------------------------------------------------------------------*/ - -/** control knobs used to control the normalization adjustment process */ -double_VAR(classify_norm_adj_midpoint, 32.0, "Norm adjust midpoint ..."); -double_VAR(classify_norm_adj_curl, 2.0, "Norm adjust curl ..."); -/** Weight of width variance against height and vertical position. */ -const double kWidthErrorWeighting = 0.125; - -/*---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -namespace tesseract { -/** - * This routine compares Features against each character - * normalization proto for ClassId and returns the match - * rating of the best match. - * @param ClassId id of class to match against - * @param feature character normalization feature - * @param DebugMatch controls dump of debug info - * - * Globals: - * #NormProtos character normalization prototypes - * - * @return Best match rating for Feature against protos of ClassId. - */ -float Classify::ComputeNormMatch(CLASS_ID ClassId, - const FEATURE_STRUCT& feature, - bool DebugMatch) { - LIST Protos; - float BestMatch; - float Match; - float Delta; - PROTOTYPE *Proto; - int ProtoId; - - if (ClassId >= NormProtos->NumProtos) { - ClassId = NO_CLASS; - } - - /* handle requests for classification as noise */ - if (ClassId == NO_CLASS) { - /* kludge - clean up constants and make into control knobs later */ - Match = (feature.Params[CharNormLength] * - feature.Params[CharNormLength] * 500.0 + - feature.Params[CharNormRx] * - feature.Params[CharNormRx] * 8000.0 + - feature.Params[CharNormRy] * - feature.Params[CharNormRy] * 8000.0); - return (1.0 - NormEvidenceOf (Match)); - } - - BestMatch = FLT_MAX; - Protos = NormProtos->Protos[ClassId]; - - if (DebugMatch) { - tprintf("\nChar norm for class %s\n", unicharset.id_to_unichar(ClassId)); - } - - ProtoId = 0; - iterate(Protos) { - Proto = (PROTOTYPE *) first_node (Protos); - Delta = feature.Params[CharNormY] - Proto->Mean[CharNormY]; - Match = Delta * Delta * Proto->Weight.Elliptical[CharNormY]; - if (DebugMatch) { - tprintf("YMiddle: Proto=%g, Delta=%g, Var=%g, Dist=%g\n", - Proto->Mean[CharNormY], Delta, - Proto->Weight.Elliptical[CharNormY], Match); - } - Delta = feature.Params[CharNormRx] - Proto->Mean[CharNormRx]; - Match += Delta * Delta * Proto->Weight.Elliptical[CharNormRx]; - if (DebugMatch) { - tprintf("Height: Proto=%g, Delta=%g, Var=%g, Dist=%g\n", - Proto->Mean[CharNormRx], Delta, - Proto->Weight.Elliptical[CharNormRx], Match); - } - // Ry is width! See intfx.cpp. - Delta = feature.Params[CharNormRy] - Proto->Mean[CharNormRy]; - if (DebugMatch) { - tprintf("Width: Proto=%g, Delta=%g, Var=%g\n", - Proto->Mean[CharNormRy], Delta, - Proto->Weight.Elliptical[CharNormRy]); - } - Delta = Delta * Delta * Proto->Weight.Elliptical[CharNormRy]; - Delta *= kWidthErrorWeighting; - Match += Delta; - if (DebugMatch) { - tprintf("Total Dist=%g, scaled=%g, sigmoid=%g, penalty=%g\n", - Match, Match / classify_norm_adj_midpoint, - NormEvidenceOf(Match), 256 * (1 - NormEvidenceOf(Match))); - } - - if (Match < BestMatch) - BestMatch = Match; - - ProtoId++; - } - return 1.0 - NormEvidenceOf(BestMatch); -} /* ComputeNormMatch */ - -void Classify::FreeNormProtos() { - if (NormProtos != nullptr) { - for (int i = 0; i < NormProtos->NumProtos; i++) - FreeProtoList(&NormProtos->Protos[i]); - Efree(NormProtos->Protos); - Efree(NormProtos->ParamDesc); - Efree(NormProtos); - NormProtos = nullptr; - } -} -} // namespace tesseract - -/*---------------------------------------------------------------------------- - Private Code -----------------------------------------------------------------------------*/ -/** - * @name NormEvidenceOf - * - * Return the new type of evidence number corresponding to this - * normalization adjustment. The equation that represents the transform is: - * 1 / (1 + (NormAdj / midpoint) ^ curl) - */ -double NormEvidenceOf(double NormAdj) { - NormAdj /= classify_norm_adj_midpoint; - - if (classify_norm_adj_curl == 3) - NormAdj = NormAdj * NormAdj * NormAdj; - else if (classify_norm_adj_curl == 2) - NormAdj = NormAdj * NormAdj; - else - NormAdj = pow (NormAdj, classify_norm_adj_curl); - return (1.0 / (1.0 + NormAdj)); -} - - -/*---------------------------------------------------------------------------*/ -/** - * This routine dumps out detailed normalization match info. - * @param File open text file to dump match debug info to - * @param NumParams # of parameters in proto and feature - * @param Proto[] array of prototype parameters - * @param Feature[] array of feature parameters - * Globals: none - * @return none - */ -void PrintNormMatch(FILE *File, - int NumParams, - PROTOTYPE *Proto, - FEATURE Feature) { - int i; - float ParamMatch; - float TotalMatch; - - for (i = 0, TotalMatch = 0.0; i < NumParams; i++) { - ParamMatch = (Feature->Params[i] - Mean(Proto, i)) / - StandardDeviation(Proto, i); - - fprintf (File, " %6.1f", ParamMatch); - - if (i == CharNormY || i == CharNormRx) - TotalMatch += ParamMatch * ParamMatch; - } - fprintf (File, " --> %6.1f (%4.2f)\n", - TotalMatch, NormEvidenceOf (TotalMatch)); - -} /* PrintNormMatch */ - - -/*---------------------------------------------------------------------------*/ -namespace tesseract { -/** - * This routine allocates a new data structure to hold - * a set of character normalization protos. It then fills in - * the data structure by reading from the specified File. - * @param fp open text file to read normalization protos from - * Globals: none - * @return Character normalization protos. - */ -NORM_PROTOS *Classify::ReadNormProtos(TFile *fp) { - NORM_PROTOS *NormProtos; - int i; - char unichar[2 * UNICHAR_LEN + 1]; - UNICHAR_ID unichar_id; - LIST Protos; - int NumProtos; - - /* allocate and initialization data structure */ - NormProtos = (NORM_PROTOS *) Emalloc (sizeof (NORM_PROTOS)); - NormProtos->NumProtos = unicharset.size(); - NormProtos->Protos = (LIST *) Emalloc (NormProtos->NumProtos * sizeof(LIST)); - for (i = 0; i < NormProtos->NumProtos; i++) - NormProtos->Protos[i] = NIL_LIST; - - /* read file header and save in data structure */ - NormProtos->NumParams = ReadSampleSize(fp); - NormProtos->ParamDesc = ReadParamDesc(fp, NormProtos->NumParams); - - /* read protos for each class into a separate list */ - const int kMaxLineSize = 100; - char line[kMaxLineSize]; - while (fp->FGets(line, kMaxLineSize) != nullptr) { - if (sscanf(line, "%s %d", unichar, &NumProtos) != 2) continue; - if (unicharset.contains_unichar(unichar)) { - unichar_id = unicharset.unichar_to_id(unichar); - Protos = NormProtos->Protos[unichar_id]; - for (i = 0; i < NumProtos; i++) - Protos = push_last(Protos, ReadPrototype(fp, NormProtos->NumParams)); - NormProtos->Protos[unichar_id] = Protos; - } else { - tprintf("Error: unichar %s in normproto file is not in unichar set.\n", - unichar); - for (i = 0; i < NumProtos; i++) - FreePrototype(ReadPrototype(fp, NormProtos->NumParams)); - } - } - return (NormProtos); -} /* ReadNormProtos */ -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/normmatch.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/normmatch.h deleted file mode 100644 index 89f625d8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/normmatch.h +++ /dev/null @@ -1,37 +0,0 @@ -/****************************************************************************** - ** Filename: normmatch.h - ** Purpose: Simple matcher based on character normalization features. - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef NORMMATCH_H -#define NORMMATCH_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "matchdefs.h" -#include "ocrfeatures.h" -#include "params.h" - -/**---------------------------------------------------------------------------- - Variables -----------------------------------------------------------------------------**/ - -/* control knobs used to control the normalization adjustment process */ -extern double_VAR_H(classify_norm_adj_midpoint, 32.0, - "Norm adjust midpoint ..."); -extern double_VAR_H(classify_norm_adj_curl, 2.0, "Norm adjust curl ..."); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/ocrfeatures.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/ocrfeatures.cpp deleted file mode 100644 index 9579e606..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/ocrfeatures.cpp +++ /dev/null @@ -1,193 +0,0 @@ -/****************************************************************************** - ** Filename: ocrfeatures.cpp - ** Purpose: Generic definition of a feature. - ** Author: Dan Johnson - ** History: Mon May 21 10:49:04 1990, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "ocrfeatures.h" -#include "emalloc.h" -#include "callcpp.h" -#include "scanutils.h" - -#include -#include - -/*---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------*/ -/** - * Add a feature to a feature set. If the feature set is - * already full, FALSE is returned to indicate that the - * feature could not be added to the set; otherwise, TRUE is - * returned. - * @param FeatureSet set of features to add Feature to - * @param Feature feature to be added to FeatureSet - * @return TRUE if feature added to set, FALSE if set is already full. - */ -bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature) { - if (FeatureSet->NumFeatures >= FeatureSet->MaxNumFeatures) { - FreeFeature(Feature); - return false; - } - - FeatureSet->Features[FeatureSet->NumFeatures++] = Feature; - return true; -} /* AddFeature */ - -/** - * Release the memory consumed by the specified feature. - * @param Feature feature to be deallocated. - * @return none - */ -void FreeFeature(FEATURE Feature) { free(Feature); } /* FreeFeature */ - -/** - * Release the memory consumed by the specified feature - * set. This routine also frees the memory consumed by the - * features contained in the set. - * @param FeatureSet set of features to be freed - * @return none - */ -void FreeFeatureSet(FEATURE_SET FeatureSet) { - int i; - - if (FeatureSet) { - for (i = 0; i < FeatureSet->NumFeatures; i++) - FreeFeature(FeatureSet->Features[i]); - free(FeatureSet); - } -} /* FreeFeatureSet */ - -/** - * Allocate and return a new feature of the specified - * type. - * @param FeatureDesc description of feature to be created. - * @return New #FEATURE. - */ -FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) { - FEATURE Feature; - - Feature = (FEATURE)malloc(sizeof(FEATURE_STRUCT) + - (FeatureDesc->NumParams - 1) * sizeof(float)); - Feature->Type = FeatureDesc; - return (Feature); - -} /* NewFeature */ - -/** - * Allocate and return a new feature set large enough to - * hold the specified number of features. - * @param NumFeatures maximum # of features to be put in feature set - * @return New #FEATURE_SET. - */ -FEATURE_SET NewFeatureSet(int NumFeatures) { - FEATURE_SET FeatureSet; - - FeatureSet = (FEATURE_SET) Emalloc (sizeof (FEATURE_SET_STRUCT) + - (NumFeatures - 1) * sizeof (FEATURE)); - FeatureSet->MaxNumFeatures = NumFeatures; - FeatureSet->NumFeatures = 0; - return (FeatureSet); - -} /* NewFeatureSet */ - -/** - * Create a new feature of the specified type and read in - * the value of its parameters from File. The extra penalty - * for the feature is also computed by calling the appropriate - * function for the specified feature type. The correct text - * representation for a feature is a list of N floats where - * N is the number of parameters in the feature. - * @param File open text file to read feature from - * @param FeatureDesc specifies type of feature to read from File - * @return New #FEATURE read from File. - */ -FEATURE ReadFeature(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { - FEATURE Feature; - int i; - - Feature = NewFeature (FeatureDesc); - for (i = 0; i < Feature->Type->NumParams; i++) { - ASSERT_HOST(tfscanf(File, "%f", &(Feature->Params[i])) == 1); -#ifndef _WIN32 - assert (!std::isnan(Feature->Params[i])); -#endif - } - return Feature; -} - -/** - * Create a new feature set of the specified type and read in - * the features from File. The correct text representation - * for a feature set is an integer which specifies the number (N) - * of features in a set followed by a list of N feature - * descriptions. - * @param File open text file to read new feature set from - * @param FeatureDesc specifies type of feature to read from File - * @return New feature set read from File. - */ -FEATURE_SET ReadFeatureSet(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { - int NumFeatures; - ASSERT_HOST(tfscanf(File, "%d", &NumFeatures) == 1); - ASSERT_HOST(NumFeatures >= 0); - - FEATURE_SET FeatureSet = NewFeatureSet(NumFeatures); - for (int i = 0; i < NumFeatures; i++) - AddFeature(FeatureSet, ReadFeature (File, FeatureDesc)); - - return FeatureSet; -} - -/** - * Appends a textual representation of Feature to str. - * This representation is simply a list of the N parameters - * of the feature, terminated with a newline. It is assumed - * that the ExtraPenalty field can be reconstructed from the - * parameters of the feature. It is also assumed that the - * feature type information is specified or assumed elsewhere. - * @param Feature feature to write out to str - * @param str string to write Feature to - * @return none - */ -void WriteFeature(FEATURE Feature, STRING* str) { - for (int i = 0; i < Feature->Type->NumParams; i++) { -#ifndef WIN32 - assert(!std::isnan(Feature->Params[i])); -#endif - str->add_str_double(" ", Feature->Params[i]); - } - *str += "\n"; -} /* WriteFeature */ - -/** - * Write a textual representation of FeatureSet to File. - * This representation is an integer specifying the number of - * features in the set, followed by a newline, followed by - * text representations for each feature in the set. - * @param FeatureSet feature set to write to File - * @param str string to write Feature to - * @return none - */ -void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) { - if (FeatureSet) { - str->add_str_int("", FeatureSet->NumFeatures); - *str += "\n"; - for (int i = 0; i < FeatureSet->NumFeatures; i++) { - WriteFeature(FeatureSet->Features[i], str); - } - } -} /* WriteFeatureSet */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/ocrfeatures.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/ocrfeatures.h deleted file mode 100644 index 3ca9bfd9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/ocrfeatures.h +++ /dev/null @@ -1,124 +0,0 @@ -/****************************************************************************** - ** Filename: features.h - ** Purpose: Generic definition of a feature. - ** Author: Dan Johnson - ** History: Sun May 20 10:28:30 1990, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef FEATURES_H -#define FEATURES_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "blobs.h" - -#include - -class DENORM; -struct INT_FX_RESULT_STRUCT; - -#undef Min -#undef Max -#define FEAT_NAME_SIZE 80 - -// A character is described by multiple sets of extracted features. Each -// set contains a number of features of a particular type, for example, a -// set of bays, or a set of closures, or a set of microfeatures. Each -// feature consists of a number of parameters. All features within a -// feature set contain the same number of parameters. All circular -// parameters are required to be the first parameters in the feature. - -struct PARAM_DESC { - int8_t Circular; // TRUE if dimension wraps around - int8_t NonEssential; // TRUE if dimension not used in searches - float Min; // low end of range for circular dimensions - float Max; // high end of range for circular dimensions - float Range; // Max - Min - float HalfRange; // (Max - Min)/2 - float MidRange; // (Max + Min)/2 -}; - -struct FEATURE_DESC_STRUCT { - uint16_t NumParams; // total # of params - const char* ShortName; // short name for feature - const PARAM_DESC* ParamDesc; // array - one per param -}; -using FEATURE_DESC = FEATURE_DESC_STRUCT*; - -struct FEATURE_STRUCT { - const FEATURE_DESC_STRUCT* Type; // points to description of feature type - float Params[1]; // variable size array - params for feature -}; -using FEATURE = FEATURE_STRUCT*; - -struct FEATURE_SET_STRUCT { - uint16_t NumFeatures; // number of features in set - uint16_t MaxNumFeatures; // maximum size of feature set - FEATURE Features[1]; // variable size array of features -}; -using FEATURE_SET = FEATURE_SET_STRUCT*; - -// A generic character description as a char pointer. In reality, it will be -// a pointer to some data structure. Paired feature extractors/matchers need -// to agree on the data structure to be used, however, the high level -// classifier does not need to know the details of this data structure. -using CHAR_FEATURES = char*; - -/*---------------------------------------------------------------------- - Macros for defining the parameters of a new features -----------------------------------------------------------------------*/ -#define StartParamDesc(Name) const PARAM_DESC Name[] = { -#define DefineParam(Circular, NonEssential, Min, Max) \ - {Circular, \ - NonEssential, \ - Min, \ - Max, \ - (Max) - (Min), \ - (((Max) - (Min)) / 2.0), \ - (((Max) + (Min)) / 2.0)}, - -#define EndParamDesc }; - -/*---------------------------------------------------------------------- -Macro for describing a new feature. The parameters of the macro -are as follows: - -DefineFeature (Name, NumLinear, NumCircular, ShortName, ParamName) -----------------------------------------------------------------------*/ -#define DefineFeature(Name, NL, NC, SN, PN) \ - const FEATURE_DESC_STRUCT Name = {((NL) + (NC)), SN, PN}; - -/*---------------------------------------------------------------------- - Generic routines that work for all feature types -----------------------------------------------------------------------*/ -bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature); - -void FreeFeature(FEATURE Feature); - -void FreeFeatureSet(FEATURE_SET FeatureSet); - -FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc); - -FEATURE_SET NewFeatureSet(int NumFeatures); - -FEATURE ReadFeature(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc); - -FEATURE_SET ReadFeatureSet(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc); - -void WriteFeature(FEATURE Feature, STRING* str); - -void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/outfeat.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/outfeat.cpp deleted file mode 100644 index 119e7ba2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/outfeat.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/****************************************************************************** - ** Filename: outfeat.c - ** Purpose: Definition of outline-features. - ** Author: Dan Johnson - ** History: 11/13/90, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "outfeat.h" - -#include "classify.h" -#include "featdefs.h" -#include "mfoutline.h" -#include "ocrfeatures.h" - -#include - -/*---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -namespace tesseract { -/** - * Convert each segment in the outline to a feature - * and return the features. - * @param Blob blob to extract pico-features from - * @return Outline-features for Blob. - * @note Globals: none - */ -FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) { - LIST Outlines; - LIST RemainingOutlines; - MFOUTLINE Outline; - FEATURE_SET FeatureSet; - float XScale, YScale; - - FeatureSet = NewFeatureSet (MAX_OUTLINE_FEATURES); - if (Blob == nullptr) - return (FeatureSet); - - Outlines = ConvertBlob (Blob); - - NormalizeOutlines(Outlines, &XScale, &YScale); - RemainingOutlines = Outlines; - iterate(RemainingOutlines) { - Outline = (MFOUTLINE) first_node (RemainingOutlines); - ConvertToOutlineFeatures(Outline, FeatureSet); - } - if (classify_norm_method == baseline) - NormalizeOutlineX(FeatureSet); - FreeOutlines(Outlines); - return (FeatureSet); -} /* ExtractOutlineFeatures */ -} // namespace tesseract - -/*---------------------------------------------------------------------------- - Private Code -----------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -/** - * This routine computes the midpoint between Start and - * End to obtain the x,y position of the outline-feature. It - * also computes the direction from Start to End as the - * direction of the outline-feature and the distance from - * Start to End as the length of the outline-feature. - * This feature is then - * inserted into the next feature slot in FeatureSet. - * @param Start starting point of outline-feature - * @param End ending point of outline-feature - * @param FeatureSet set to add outline-feature to - * @return none (results are placed in FeatureSet) - * @note Globals: none - */ -void AddOutlineFeatureToSet(FPOINT *Start, - FPOINT *End, - FEATURE_SET FeatureSet) { - FEATURE Feature; - - Feature = NewFeature(&OutlineFeatDesc); - Feature->Params[OutlineFeatDir] = NormalizedAngleFrom(Start, End, 1.0); - Feature->Params[OutlineFeatX] = AverageOf(Start->x, End->x); - Feature->Params[OutlineFeatY] = AverageOf(Start->y, End->y); - Feature->Params[OutlineFeatLength] = DistanceBetween(*Start, *End); - AddFeature(FeatureSet, Feature); - -} /* AddOutlineFeatureToSet */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine steps converts each section in the specified - * outline to a feature described by its x,y position, length - * and angle. - * @param Outline outline to extract outline-features from - * @param FeatureSet set of features to add outline-features to - * @return none (results are returned in FeatureSet) - * @note Globals: none - */ -void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) { - MFOUTLINE Next; - MFOUTLINE First; - FPOINT FeatureStart; - FPOINT FeatureEnd; - - if (DegenerateOutline (Outline)) - return; - - First = Outline; - Next = First; - do { - FeatureStart = PointAt(Next)->Point; - Next = NextPointAfter(Next); - - /* note that an edge is hidden if the ending point of the edge is - marked as hidden. This situation happens because the order of - the outlines is reversed when they are converted from the old - format. In the old format, a hidden edge is marked by the - starting point for that edge. */ - if (!PointAt(Next)->Hidden) { - FeatureEnd = PointAt(Next)->Point; - AddOutlineFeatureToSet(&FeatureStart, &FeatureEnd, FeatureSet); - } - } - while (Next != First); -} /* ConvertToOutlineFeatures */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine computes the weighted average x position - * over all of the outline-features in FeatureSet and then - * renormalizes the outline-features to force this average - * to be the x origin (i.e. x=0). - * @param FeatureSet outline-features to be normalized - * @return none (FeatureSet is changed) - * @note Globals: none - */ -void NormalizeOutlineX(FEATURE_SET FeatureSet) { - int i; - FEATURE Feature; - float Length; - float TotalX = 0.0; - float TotalWeight = 0.0; - float Origin; - - if (FeatureSet->NumFeatures <= 0) - return; - - for (i = 0; i < FeatureSet->NumFeatures; i++) { - Feature = FeatureSet->Features[i]; - Length = Feature->Params[OutlineFeatLength]; - TotalX += Feature->Params[OutlineFeatX] * Length; - TotalWeight += Length; - } - Origin = TotalX / TotalWeight; - - for (i = 0; i < FeatureSet->NumFeatures; i++) { - Feature = FeatureSet->Features[i]; - Feature->Params[OutlineFeatX] -= Origin; - } -} /* NormalizeOutlineX */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/outfeat.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/outfeat.h deleted file mode 100644 index a3c88975..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/outfeat.h +++ /dev/null @@ -1,48 +0,0 @@ -/****************************************************************************** - ** Filename: outfeat.h - ** Purpose: Definition of outline features. - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef OUTFEAT_H -#define OUTFEAT_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "ocrfeatures.h" -#include "fpoint.h" -#include "mfoutline.h" - -typedef enum { - OutlineFeatX, - OutlineFeatY, - OutlineFeatLength, - OutlineFeatDir -} OUTLINE_FEAT_PARAM_NAME; - -#define MAX_OUTLINE_FEATURES (100) - -/*--------------------------------------------------------------------------- - Privat Function Prototypes -----------------------------------------------------------------------------*/ -void AddOutlineFeatureToSet(FPOINT *Start, - FPOINT *End, - FEATURE_SET FeatureSet); - -void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet); - -void NormalizeOutlineX(FEATURE_SET FeatureSet); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/picofeat.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/picofeat.cpp deleted file mode 100644 index 53bb8f3b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/picofeat.cpp +++ /dev/null @@ -1,269 +0,0 @@ -/****************************************************************************** - ** Filename: picofeat.c - ** Purpose: Definition of pico-features. - ** Author: Dan Johnson - ** History: 9/4/90, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "picofeat.h" - -#include "classify.h" -#include "featdefs.h" -#include "fpoint.h" -#include "mfoutline.h" -#include "ocrfeatures.h" -#include "params.h" -#include "trainingsample.h" - -#include -#include - -/*--------------------------------------------------------------------------- - Variables -----------------------------------------------------------------------------*/ - -double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length"); - -/*--------------------------------------------------------------------------- - Private Function Prototypes -----------------------------------------------------------------------------*/ -void ConvertSegmentToPicoFeat(FPOINT *Start, - FPOINT *End, - FEATURE_SET FeatureSet); - -void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet); - -void NormalizePicoX(FEATURE_SET FeatureSet); - -/*---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -namespace tesseract { -/** - * Operation: Dummy for now. - * - * Globals: - * - classify_norm_method normalization method currently specified - * @param Blob blob to extract pico-features from - * @return Pico-features for Blob. - */ -FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) { - LIST Outlines; - LIST RemainingOutlines; - MFOUTLINE Outline; - FEATURE_SET FeatureSet; - float XScale, YScale; - - FeatureSet = NewFeatureSet(MAX_PICO_FEATURES); - Outlines = ConvertBlob(Blob); - NormalizeOutlines(Outlines, &XScale, &YScale); - RemainingOutlines = Outlines; - iterate(RemainingOutlines) { - Outline = (MFOUTLINE) first_node (RemainingOutlines); - ConvertToPicoFeatures2(Outline, FeatureSet); - } - if (classify_norm_method == baseline) - NormalizePicoX(FeatureSet); - FreeOutlines(Outlines); - return (FeatureSet); - -} /* ExtractPicoFeatures */ -} // namespace tesseract - -/*---------------------------------------------------------------------------- - Private Code -----------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -/** - * This routine converts an entire segment of an outline - * into a set of pico features which are added to - * FeatureSet. The length of the segment is rounded to the - * nearest whole number of pico-features. The pico-features - * are spaced evenly over the entire segment. - * Globals: - * - classify_pico_feature_length length of a single pico-feature - * @param Start starting point of pico-feature - * @param End ending point of pico-feature - * @param FeatureSet set to add pico-feature to - * @return none (results are placed in FeatureSet) - */ -void ConvertSegmentToPicoFeat(FPOINT *Start, - FPOINT *End, - FEATURE_SET FeatureSet) { - FEATURE Feature; - float Angle; - float Length; - int NumFeatures; - FPOINT Center; - FPOINT Delta; - int i; - - Angle = NormalizedAngleFrom (Start, End, 1.0); - Length = DistanceBetween (*Start, *End); - NumFeatures = (int) floor (Length / classify_pico_feature_length + 0.5); - if (NumFeatures < 1) - NumFeatures = 1; - - /* compute vector for one pico feature */ - Delta.x = XDelta (*Start, *End) / NumFeatures; - Delta.y = YDelta (*Start, *End) / NumFeatures; - - /* compute position of first pico feature */ - Center.x = Start->x + Delta.x / 2.0; - Center.y = Start->y + Delta.y / 2.0; - - /* compute each pico feature in segment and add to feature set */ - for (i = 0; i < NumFeatures; i++) { - Feature = NewFeature (&PicoFeatDesc); - Feature->Params[PicoFeatDir] = Angle; - Feature->Params[PicoFeatX] = Center.x; - Feature->Params[PicoFeatY] = Center.y; - AddFeature(FeatureSet, Feature); - - Center.x += Delta.x; - Center.y += Delta.y; - } -} /* ConvertSegmentToPicoFeat */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine steps through the specified outline and cuts it - * up into pieces of equal length. These pieces become the - * desired pico-features. Each segment in the outline - * is converted into an integral number of pico-features. - * - * Globals: - * - classify_pico_feature_length length of features to be extracted - * @param Outline outline to extract micro-features from - * @param FeatureSet set of features to add pico-features to - * @return none (results are returned in FeatureSet) - */ -void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) { - MFOUTLINE Next; - MFOUTLINE First; - MFOUTLINE Current; - - if (DegenerateOutline(Outline)) - return; - - First = Outline; - Current = First; - Next = NextPointAfter(Current); - do { - /* note that an edge is hidden if the ending point of the edge is - marked as hidden. This situation happens because the order of - the outlines is reversed when they are converted from the old - format. In the old format, a hidden edge is marked by the - starting point for that edge. */ - if (!(PointAt(Next)->Hidden)) - ConvertSegmentToPicoFeat (&(PointAt(Current)->Point), - &(PointAt(Next)->Point), FeatureSet); - - Current = Next; - Next = NextPointAfter(Current); - } - while (Current != First); - -} /* ConvertToPicoFeatures2 */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine computes the average x position over all - * of the pico-features in FeatureSet and then renormalizes - * the pico-features to force this average to be the x origin - * (i.e. x=0). - * @param FeatureSet pico-features to be normalized - * @return none (FeatureSet is changed) - * @note Globals: none - */ -void NormalizePicoX(FEATURE_SET FeatureSet) { - int i; - FEATURE Feature; - float Origin = 0.0; - - for (i = 0; i < FeatureSet->NumFeatures; i++) { - Feature = FeatureSet->Features[i]; - Origin += Feature->Params[PicoFeatX]; - } - Origin /= FeatureSet->NumFeatures; - - for (i = 0; i < FeatureSet->NumFeatures; i++) { - Feature = FeatureSet->Features[i]; - Feature->Params[PicoFeatX] -= Origin; - } -} /* NormalizePicoX */ - -namespace tesseract { -/*---------------------------------------------------------------------------*/ -/** - * @param blob blob to extract features from - * @param fx_info - * @return Integer character-normalized features for blob. - */ -FEATURE_SET Classify::ExtractIntCNFeatures( - const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) { - INT_FX_RESULT_STRUCT local_fx_info(fx_info); - GenericVector bl_features; - tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( - blob, false, &local_fx_info, &bl_features); - if (sample == nullptr) return nullptr; - - uint32_t num_features = sample->num_features(); - const INT_FEATURE_STRUCT* features = sample->features(); - FEATURE_SET feature_set = NewFeatureSet(num_features); - for (uint32_t f = 0; f < num_features; ++f) { - FEATURE feature = NewFeature(&IntFeatDesc); - - feature->Params[IntX] = features[f].X; - feature->Params[IntY] = features[f].Y; - feature->Params[IntDir] = features[f].Theta; - AddFeature(feature_set, feature); - } - delete sample; - - return feature_set; -} /* ExtractIntCNFeatures */ - -/*---------------------------------------------------------------------------*/ -/** - * @param blob blob to extract features from - * @param fx_info - * @return Geometric (top/bottom/width) features for blob. - */ -FEATURE_SET Classify::ExtractIntGeoFeatures( - const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) { - INT_FX_RESULT_STRUCT local_fx_info(fx_info); - GenericVector bl_features; - tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( - blob, false, &local_fx_info, &bl_features); - if (sample == nullptr) return nullptr; - - FEATURE_SET feature_set = NewFeatureSet(1); - FEATURE feature = NewFeature(&IntFeatDesc); - - feature->Params[GeoBottom] = sample->geo_feature(GeoBottom); - feature->Params[GeoTop] = sample->geo_feature(GeoTop); - feature->Params[GeoWidth] = sample->geo_feature(GeoWidth); - AddFeature(feature_set, feature); - delete sample; - - return feature_set; -} /* ExtractIntGeoFeatures */ - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/picofeat.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/picofeat.h deleted file mode 100644 index b3f0b90f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/picofeat.h +++ /dev/null @@ -1,64 +0,0 @@ -/****************************************************************************** - ** Filename: picofeat.h - ** Purpose: Definition of pico features. - ** Author: Dan Johnson - ** History: 9/4/90, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef PICOFEAT_H -#define PICOFEAT_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "ocrfeatures.h" -#include "params.h" - -// Enum for the order/type of params in IntFeatDesc. -enum IntParams { - IntX, // x-position (0-255). - IntY, // y-position (0-255). - IntDir // Direction (0-255, circular). -}; - -// Enum for the order/type of params in GeoFeatDesc. -enum GeoParams { - GeoBottom, // Bounding box bottom in baseline space (0-255). - GeoTop, // Bounding box top in baseline space (0-255). - GeoWidth, // Bounding box width in baseline space (0-255). - - GeoCount // Number of geo features. -}; - -typedef enum { PicoFeatY, PicoFeatDir, PicoFeatX } PICO_FEAT_PARAM_NAME; - -#define MAX_PICO_FEATURES (1000) - -/*--------------------------------------------------------------------------- - Variables -----------------------------------------------------------------------------*/ - -extern double_VAR_H(classify_pico_feature_length, 0.05, "Pico Feature Length"); - -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ -#define GetPicoFeatureLength() (PicoFeatureLength) - -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ -extern TESS_API float PicoFeatureLength; - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/protos.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/protos.cpp deleted file mode 100644 index f316b02d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/protos.cpp +++ /dev/null @@ -1,279 +0,0 @@ -/* -*-C-*- - ****************************************************************************** - * - * File: protos.cpp (Formerly protos.c) - * Description: - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Mon Mar 4 14:51:24 1991 (Dan Johnson) danj@hpgrlj - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *****************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "protos.h" -#include "emalloc.h" -#include "callcpp.h" -#include "tprintf.h" -#include "globals.h" -#include "classify.h" -#include "params.h" - -#include -#include - -#define PROTO_INCREMENT 32 -#define CONFIG_INCREMENT 16 - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -CLASS_STRUCT TrainingData[NUMBER_OF_CLASSES]; - -STRING_VAR(classify_training_file, "MicroFeatures", "Training file"); - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -/** - * @name AddConfigToClass - * - * Add a new config to this class. Malloc new space and copy the - * old configs if necessary. Return the config id for the new config. - * - * @param Class The class to add to - */ -int AddConfigToClass(CLASS_TYPE Class) { - int NewNumConfigs; - int NewConfig; - int MaxNumProtos; - BIT_VECTOR Config; - - MaxNumProtos = Class->MaxNumProtos; - - if (Class->NumConfigs >= Class->MaxNumConfigs) { - /* add configs in CONFIG_INCREMENT chunks at a time */ - NewNumConfigs = (((Class->MaxNumConfigs + CONFIG_INCREMENT) / - CONFIG_INCREMENT) * CONFIG_INCREMENT); - - Class->Configurations = - (CONFIGS) Erealloc (Class->Configurations, - sizeof (BIT_VECTOR) * NewNumConfigs); - - Class->MaxNumConfigs = NewNumConfigs; - } - NewConfig = Class->NumConfigs++; - Config = NewBitVector (MaxNumProtos); - Class->Configurations[NewConfig] = Config; - zero_all_bits (Config, WordsInVectorOfSize (MaxNumProtos)); - - return (NewConfig); -} - - -/** - * @name AddProtoToClass - * - * Add a new proto to this class. Malloc new space and copy the - * old protos if necessary. Return the proto id for the new proto. - * - * @param Class The class to add to - */ -int AddProtoToClass(CLASS_TYPE Class) { - int i; - int Bit; - int NewNumProtos; - int NewProto; - BIT_VECTOR Config; - - if (Class->NumProtos >= Class->MaxNumProtos) { - /* add protos in PROTO_INCREMENT chunks at a time */ - NewNumProtos = (((Class->MaxNumProtos + PROTO_INCREMENT) / - PROTO_INCREMENT) * PROTO_INCREMENT); - - Class->Prototypes = (PROTO) Erealloc (Class->Prototypes, - sizeof (PROTO_STRUCT) * - NewNumProtos); - - Class->MaxNumProtos = NewNumProtos; - - for (i = 0; i < Class->NumConfigs; i++) { - Config = Class->Configurations[i]; - Class->Configurations[i] = ExpandBitVector (Config, NewNumProtos); - - for (Bit = Class->NumProtos; Bit < NewNumProtos; Bit++) - reset_bit(Config, Bit); - } - } - NewProto = Class->NumProtos++; - if (Class->NumProtos > MAX_NUM_PROTOS) { - tprintf("Ouch! number of protos = %d, vs max of %d!", - Class->NumProtos, MAX_NUM_PROTOS); - } - return (NewProto); -} - - -/** - * @name ClassConfigLength - * - * Return the length of all the protos in this class. - * - * @param Class The class to add to - * @param Config FIXME - */ -float ClassConfigLength(CLASS_TYPE Class, BIT_VECTOR Config) { - int16_t Pid; - float TotalLength = 0; - - for (Pid = 0; Pid < Class->NumProtos; Pid++) { - if (test_bit (Config, Pid)) { - - TotalLength += (ProtoIn (Class, Pid))->Length; - } - } - return (TotalLength); -} - - -/** - * @name ClassProtoLength - * - * Return the length of all the protos in this class. - * - * @param Class The class to use - */ -float ClassProtoLength(CLASS_TYPE Class) { - int16_t Pid; - float TotalLength = 0; - - for (Pid = 0; Pid < Class->NumProtos; Pid++) { - TotalLength += (ProtoIn (Class, Pid))->Length; - } - return (TotalLength); -} - - -/** - * @name CopyProto - * - * Copy the first proto into the second. - * - * @param Src Source - * @param Dest Destination - */ -void CopyProto(PROTO Src, PROTO Dest) { - Dest->X = Src->X; - Dest->Y = Src->Y; - Dest->Length = Src->Length; - Dest->Angle = Src->Angle; - Dest->A = Src->A; - Dest->B = Src->B; - Dest->C = Src->C; -} - - -/********************************************************************** - * FillABC - * - * Fill in Protos A, B, C fields based on the X, Y, Angle fields. - **********************************************************************/ -void FillABC(PROTO Proto) { - float Slope, Intercept, Normalizer; - - Slope = tan(Proto->Angle * 2.0 * M_PI); - Intercept = Proto->Y - Slope * Proto->X; - Normalizer = 1.0 / sqrt (Slope * Slope + 1.0); - Proto->A = Slope * Normalizer; - Proto->B = -Normalizer; - Proto->C = Intercept * Normalizer; -} - - -/********************************************************************** - * FreeClass - * - * Deallocate the memory consumed by the specified class. - **********************************************************************/ -void FreeClass(CLASS_TYPE Class) { - if (Class) { - FreeClassFields(Class); - delete Class; - } -} - - -/********************************************************************** - * FreeClassFields - * - * Deallocate the memory consumed by subfields of the specified class. - **********************************************************************/ -void FreeClassFields(CLASS_TYPE Class) { - int i; - - if (Class) { - if (Class->MaxNumProtos > 0) free(Class->Prototypes); - if (Class->MaxNumConfigs > 0) { - for (i = 0; i < Class->NumConfigs; i++) - FreeBitVector (Class->Configurations[i]); - free(Class->Configurations); - } - } -} - -/********************************************************************** - * NewClass - * - * Allocate a new class with enough memory to hold the specified number - * of prototypes and configurations. - **********************************************************************/ -CLASS_TYPE NewClass(int NumProtos, int NumConfigs) { - CLASS_TYPE Class; - - Class = new CLASS_STRUCT; - - if (NumProtos > 0) - Class->Prototypes = (PROTO) Emalloc (NumProtos * sizeof (PROTO_STRUCT)); - - if (NumConfigs > 0) - Class->Configurations = (CONFIGS) Emalloc (NumConfigs * - sizeof (BIT_VECTOR)); - Class->MaxNumProtos = NumProtos; - Class->MaxNumConfigs = NumConfigs; - Class->NumProtos = 0; - Class->NumConfigs = 0; - return (Class); - -} - - -/********************************************************************** - * PrintProtos - * - * Print the list of prototypes in this class type. - **********************************************************************/ -void PrintProtos(CLASS_TYPE Class) { - int16_t Pid; - - for (Pid = 0; Pid < Class->NumProtos; Pid++) { - cprintf ("Proto %d:\t", Pid); - PrintProto (ProtoIn (Class, Pid)); - cprintf ("\t"); - PrintProtoLine (ProtoIn (Class, Pid)); - tprintf("\n"); - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/protos.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/protos.h deleted file mode 100644 index 299f617a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/protos.h +++ /dev/null @@ -1,169 +0,0 @@ -/* -*-C-*- - ****************************************************************************** - * - * File: protos.h - * Description: - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Fri Jul 12 10:06:55 1991 (Dan Johnson) danj@hpgrlj - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *****************************************************************************/ - -#ifndef PROTOS_H -#define PROTOS_H - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "bitvec.h" -#include "params.h" -#include "unichar.h" -#include "unicity_table.h" - -/*---------------------------------------------------------------------- - T y p e s -----------------------------------------------------------------------*/ -using CONFIGS = BIT_VECTOR*; - -typedef struct { - float A; - float B; - float C; - float X; - float Y; - float Angle; - float Length; -} PROTO_STRUCT; -using PROTO = PROTO_STRUCT*; - -struct CLASS_STRUCT { - CLASS_STRUCT() - : NumProtos(0), - MaxNumProtos(0), - Prototypes(nullptr), - NumConfigs(0), - MaxNumConfigs(0), - Configurations(nullptr) {} - int16_t NumProtos; - int16_t MaxNumProtos; - PROTO Prototypes; - int16_t NumConfigs; - int16_t MaxNumConfigs; - CONFIGS Configurations; - UnicityTableEqEq font_set; -}; -using CLASS_TYPE = CLASS_STRUCT*; -using CLASSES = CLASS_STRUCT*; - -/*---------------------------------------------------------------------- - C o n s t a n t s -----------------------------------------------------------------------*/ -#define NUMBER_OF_CLASSES MAX_NUM_CLASSES -#define Y_OFFSET -40.0 -#define FEATURE_SCALE 100.0 - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -extern CLASS_STRUCT TrainingData[]; - -extern STRING_VAR_H(classify_training_file, "MicroFeatures", "Training file"); - -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ -/** - * AddProtoToConfig - * - * Set a single proto bit in the specified configuration. - */ - -#define AddProtoToConfig(Pid, Config) (SET_BIT(Config, Pid)) - -/** - * RemoveProtoFromConfig - * - * Clear a single proto bit in the specified configuration. - */ - -#define RemoveProtoFromConfig(Pid, Config) (reset_bit(Config, Pid)) - -/** - * ClassOfChar - * - * Return the class of a particular ASCII character value. - */ - -#define ClassOfChar(Char) \ - ((TrainingData[Char].NumProtos) ? (&TrainingData[Char]) : NO_CLASS) - -/** - * ProtoIn - * - * Choose the selected prototype in this class record. Return the - * pointer to it (type PROTO). - */ - -#define ProtoIn(Class, Pid) (&(Class)->Prototypes[Pid]) - -/** - * PrintProto - * - * Print out the contents of a prototype. The 'Proto' argument is of - * type 'PROTO'. - */ - -#define PrintProto(Proto) \ - (tprintf("X=%4.2f, Y=%4.2f, Length=%4.2f, Angle=%4.2f", Proto->X, Proto->Y, \ - Proto->Length, Proto->Angle)) - -/** - * PrintProtoLine - * - * Print out the contents of a prototype. The 'Proto' argument is of - * type 'PROTO'. - */ - -#define PrintProtoLine(Proto) \ - (cprintf("A=%4.2f, B=%4.2f, C=%4.2f", Proto->A, Proto->B, Proto->C)) - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -int AddConfigToClass(CLASS_TYPE Class); - -int AddProtoToClass(CLASS_TYPE Class); - -float ClassConfigLength(CLASS_TYPE Class, BIT_VECTOR Config); - -float ClassProtoLength(CLASS_TYPE Class); - -void CopyProto(PROTO Src, PROTO Dest); - -void FillABC(PROTO Proto); - -void FreeClass(CLASS_TYPE Class); - -void FreeClassFields(CLASS_TYPE Class); - -void InitPrototypes(); - -CLASS_TYPE NewClass(int NumProtos, int NumConfigs); - -void PrintProtos(CLASS_TYPE Class); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/sampleiterator.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/sampleiterator.cpp deleted file mode 100644 index 9d52c4fb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/sampleiterator.cpp +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "sampleiterator.h" - -#include "indexmapbidi.h" -#include "shapetable.h" -#include "trainingsample.h" -#include "trainingsampleset.h" - -namespace tesseract { - -// ================== SampleIterator Implementation ================= - -SampleIterator::SampleIterator() - : charset_map_(nullptr), - shape_table_(nullptr), - sample_set_(nullptr), - randomize_(false), - owned_shape_table_(nullptr) { - num_shapes_ = 0; - Begin(); -} - -SampleIterator::~SampleIterator() { - Clear(); -} - -void SampleIterator::Clear() { - delete owned_shape_table_; - owned_shape_table_ = nullptr; -} - -// See class comment for arguments. -void SampleIterator::Init(const IndexMapBiDi* charset_map, - const ShapeTable* shape_table, - bool randomize, - TrainingSampleSet* sample_set) { - Clear(); - charset_map_ = charset_map; - shape_table_ = shape_table; - sample_set_ = sample_set; - randomize_ = randomize; - if (shape_table_ == nullptr && charset_map_ != nullptr) { - // The caller wishes to iterate by class. The easiest way to do this - // is to create a dummy shape_table_ that we will own. - int num_fonts = sample_set_->NumFonts(); - owned_shape_table_ = new ShapeTable(sample_set_->unicharset()); - int charsetsize = sample_set_->unicharset().size(); - for (int c = 0; c < charsetsize; ++c) { - // We always add a shape for each character to keep the index in sync - // with the unichar_id. - int shape_id = owned_shape_table_->AddShape(c, 0); - for (int f = 1; f < num_fonts; ++f) { - if (sample_set_->NumClassSamples(f, c, true) > 0) { - owned_shape_table_->AddToShape(shape_id, c, f); - } - } - } - shape_table_ = owned_shape_table_; - } - if (shape_table_ != nullptr) { - num_shapes_ = shape_table_->NumShapes(); - } else { - num_shapes_ = randomize ? sample_set_->num_samples() - : sample_set_->num_raw_samples(); - } - Begin(); -} - -// Iterator functions designed for use with a simple for loop: -// for (it.Begin(); !it.AtEnd(); it.Next()) { -// const TrainingSample& sample = it.GetSample(); -// } -void SampleIterator::Begin() { - shape_index_ = -1; - shape_char_index_ = 0; - num_shape_chars_ = 0; - shape_font_index_ = 0; - num_shape_fonts_ = 0; - sample_index_ = 0; - num_samples_ = 0; - // Find the first indexable sample. - Next(); -} - -bool SampleIterator::AtEnd() const { - return shape_index_ >= num_shapes_; -} - -const TrainingSample& SampleIterator::GetSample() const { - if (shape_table_ != nullptr) { - const UnicharAndFonts* shape_entry = GetShapeEntry(); - int char_id = shape_entry->unichar_id; - int font_id = shape_entry->font_ids[shape_font_index_]; - return *sample_set_->GetSample(font_id, char_id, sample_index_); - } else { - return *sample_set_->GetSample(shape_index_); - } -} - -TrainingSample* SampleIterator::MutableSample() const { - if (shape_table_ != nullptr) { - const UnicharAndFonts* shape_entry = GetShapeEntry(); - int char_id = shape_entry->unichar_id; - int font_id = shape_entry->font_ids[shape_font_index_]; - return sample_set_->MutableSample(font_id, char_id, sample_index_); - } else { - return sample_set_->mutable_sample(shape_index_); - } -} - -// Returns the total index (from the original set of samples) of the current -// sample. -int SampleIterator::GlobalSampleIndex() const { - if (shape_table_ != nullptr) { - const UnicharAndFonts* shape_entry = GetShapeEntry(); - int char_id = shape_entry->unichar_id; - int font_id = shape_entry->font_ids[shape_font_index_]; - return sample_set_->GlobalSampleIndex(font_id, char_id, sample_index_); - } else { - return shape_index_; - } -} - -// Returns the index of the current sample in compact charset space, so -// in a 2-class problem between x and y, the returned indices will all be -// 0 or 1, and have nothing to do with the unichar_ids. -// If the charset_map_ is nullptr, then this is equal to GetSparseClassID(). -int SampleIterator::GetCompactClassID() const { - return charset_map_ != nullptr ? charset_map_->SparseToCompact(shape_index_) - : GetSparseClassID(); -} -// Returns the index of the current sample in sparse charset space, so -// in a 2-class problem between x and y, the returned indices will all be -// x or y, where x and y may be unichar_ids (no shape_table_) or shape_ids -// with a shape_table_. -int SampleIterator::GetSparseClassID() const { - return shape_table_ != nullptr ? shape_index_ : GetSample().class_id(); -} - -// Moves on to the next indexable sample. If the end is reached, leaves -// the state such that AtEnd() is true. -void SampleIterator::Next() { - if (shape_table_ != nullptr) { - // Next sample in this class/font combination. - ++sample_index_; - if (sample_index_ < num_samples_) - return; - // Next font in this class in this shape. - sample_index_ = 0; - do { - ++shape_font_index_; - if (shape_font_index_ >= num_shape_fonts_) { - // Next unichar in this shape. - shape_font_index_ = 0; - ++shape_char_index_; - if (shape_char_index_ >= num_shape_chars_) { - // Find the next shape that is mapped in the charset_map_. - shape_char_index_ = 0; - do { - ++shape_index_; - } while (shape_index_ < num_shapes_ && - charset_map_ != nullptr && - charset_map_->SparseToCompact(shape_index_) < 0); - if (shape_index_ >= num_shapes_) - return; // The end. - num_shape_chars_ = shape_table_->GetShape(shape_index_).size(); - } - } - const UnicharAndFonts* shape_entry = GetShapeEntry(); - num_shape_fonts_ = shape_entry->font_ids.size(); - int char_id = shape_entry->unichar_id; - int font_id = shape_entry->font_ids[shape_font_index_]; - num_samples_ = sample_set_->NumClassSamples(font_id, char_id, randomize_); - } while (num_samples_ == 0); - } else { - // We are just iterating over the samples. - ++shape_index_; - } -} - -// Returns the size of the compact charset space. -int SampleIterator::CompactCharsetSize() const { - return charset_map_ != nullptr ? charset_map_->CompactSize() - : SparseCharsetSize(); -} - -// Returns the size of the sparse charset space. -int SampleIterator::SparseCharsetSize() const { - return charset_map_ != nullptr - ? charset_map_->SparseSize() - : (shape_table_ != nullptr ? shape_table_->NumShapes() - : sample_set_->charsetsize()); -} - -// Apply the supplied feature_space/feature_map transform to all samples -// accessed by this iterator. -void SampleIterator::MapSampleFeatures(const IntFeatureMap& feature_map) { - for (Begin(); !AtEnd(); Next()) { - TrainingSample* sample = MutableSample(); - sample->MapFeatures(feature_map); - } -} - -// Adjust the weights of all the samples to be uniform in the given charset. -// Returns the number of samples in the iterator. -int SampleIterator::UniformSamples() { - int num_good_samples = 0; - for (Begin(); !AtEnd(); Next()) { - TrainingSample* sample = MutableSample(); - sample->set_weight(1.0); - ++num_good_samples; - } - NormalizeSamples(); - return num_good_samples; -} - -// Normalize the weights of all the samples in the charset_map so they sum -// to 1. Returns the minimum assigned sample weight. -double SampleIterator::NormalizeSamples() { - double total_weight = 0.0; - int sample_count = 0; - for (Begin(); !AtEnd(); Next()) { - const TrainingSample& sample = GetSample(); - total_weight += sample.weight(); - ++sample_count; - } - // Normalize samples. - double min_assigned_sample_weight = 1.0; - if (total_weight > 0.0) { - for (Begin(); !AtEnd(); Next()) { - TrainingSample* sample = MutableSample(); - double weight = sample->weight() / total_weight; - if (weight < min_assigned_sample_weight) - min_assigned_sample_weight = weight; - sample->set_weight(weight); - } - } - return min_assigned_sample_weight; -} - -// Helper returns the current UnicharAndFont shape_entry. -const UnicharAndFonts* SampleIterator::GetShapeEntry() const { - const Shape& shape = shape_table_->GetShape(shape_index_); - return &shape[shape_char_index_]; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/sampleiterator.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/sampleiterator.h deleted file mode 100644 index 47bc9459..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/sampleiterator.h +++ /dev/null @@ -1,195 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - - -#ifndef TESSERACT_CLASSIFY_SAMPLEITERATOR_H_ -#define TESSERACT_CLASSIFY_SAMPLEITERATOR_H_ - -namespace tesseract { - -class IndexMapBiDi; -class IntFeatureMap; -class ShapeTable; -class TrainingSample; -class TrainingSampleSet; -struct UnicharAndFonts; - -// Iterator class to encapsulate the complex iteration involved in getting -// all samples of all shapes needed for a classification problem. -// -// =====INPUTS TO Init FUNCTION===== -// The charset_map defines a subset of the sample_set classes (with a nullptr -// shape_table, or the shape_table classes if not nullptr.) -// -// The shape_table (if not nullptr) defines the mapping from shapes to -// font_id/class_id pairs. Each shape is a list of unichar_id and font lists. -// -// The sample_set holds the samples and provides indexed access to samples -// of font_id/class_id pairs. -// -// If randomize is true, the samples are perturbed slightly, but the -// perturbation is guaranteed to be the same for multiple identical -// iterations. -// -// =====DIFFERENT COMBINATIONS OF INPUTS===== -// nullptr shape_table: -// Without a shape_table, everything works in UNICHAR_IDs. -// -// nullptr shape_table, nullptr charset_map: -// Iterations simply run over the samples in the order the samples occur in the -// input files. -// GetCompactClassID and GetSparseClassID both return the sample UNICHAR_ID. -// -// nullptr shape_table, non-nullptr charset_map: -// When shape_table is nullptr, the charset_map indexes unichar_ids directly, -// and an iteration returns all samples of all chars in the charset_map, which -// is a subset of the full unicharset. -// The iteration will be in groups of the same unichar_id, in the order -// defined by the charset_map. -// GetCompactClassID returns the charset_map index of a sample, and -// GetSparseClassID returns the sample UNICHAR_ID. -// -// Non-nullptr shape_table: -// With a shape_table, samples are grouped according to the shape_table, so -// multiple UNICHAR_IDs and fonts may be grouped together, and everything -// works in shape_ids. -// -// Non-nullptr shape_table, nullptr charset_map. -// Iterations simply run over the samples in the order of shape_id. -// GetCompactClassID and GetSparseClassID both return the shape_id. -// (If you want the unichar_id or font_id, the sample still has them.) -// -// Non-nullptr shape_table, non-nullptr charset_map. -// When shape_table is not nullptr, the charset_map indexes and subsets shapes in -// the shape_table, and iterations will be in shape_table order, not -// charset_map order. -// GetCompactClassID returns the charset_map index of a shape, and -// GetSparseClassID returns the shape_id. -// -// =====What is SampleIterator good for?===== -// Inside a classifier training module, the SampleIterator has abstracted away -// all the different modes above. -// Use the following iteration to train your classifier: -// for (it.Begin(); !it.AtEnd(); it.Next()) { -// const TrainingSample& sample = it.GetSample(); -// int class_id = it.GetCompactClassID(); -// Your classifier may or may not be dealing with a shape_table, and may be -// dealing with some subset of the character/shape set. It doesn't need to -// know and shouldn't care. It is just learning shapes with compact class ids -// in the range [0, it.CompactCharsetSize()). -class SampleIterator { - public: - SampleIterator(); - ~SampleIterator(); - - void Clear(); - - // See class comment for arguments. - void Init(const IndexMapBiDi* charset_map, - const ShapeTable* shape_table, - bool randomize, - TrainingSampleSet* sample_set); - - // Iterator functions designed for use with a simple for loop: - // for (it.Begin(); !it.AtEnd(); it.Next()) { - // const TrainingSample& sample = it.GetSample(); - // int class_id = it.GetCompactClassID(); - // ... - // } - void Begin(); - bool AtEnd() const; - const TrainingSample& GetSample() const; - TrainingSample* MutableSample() const; - // Returns the total index (from the original set of samples) of the current - // sample. - int GlobalSampleIndex() const; - // Returns the index of the current sample in compact charset space, so - // in a 2-class problem between x and y, the returned indices will all be - // 0 or 1, and have nothing to do with the unichar_ids. - // If the charset_map_ is nullptr, then this is equal to GetSparseClassID(). - int GetCompactClassID() const; - // Returns the index of the current sample in sparse charset space, so - // in a 2-class problem between x and y, the returned indices will all be - // x or y, where x and y may be unichar_ids (no shape_table_) or shape_ids - // with a shape_table_. - int GetSparseClassID() const; - // Moves on to the next indexable sample. If the end is reached, leaves - // the state such that AtEnd() is true. - void Next(); - - // Returns the size of the compact charset space. - int CompactCharsetSize() const; - // Returns the size of the sparse charset space. - int SparseCharsetSize() const; - - const IndexMapBiDi& charset_map() const { - return *charset_map_; - } - const ShapeTable* shape_table() const { - return shape_table_; - } - // Sample set operations. - const TrainingSampleSet* sample_set() const { - return sample_set_; - } - - // A set of functions that do something to all the samples accessed by the - // iterator, as it is currently setup. - - // Apply the supplied feature_space/feature_map transform to all samples - // accessed by this iterator. - void MapSampleFeatures(const IntFeatureMap& feature_map); - - // Adjust the weights of all the samples to be uniform in the given charset. - // Returns the number of samples in the iterator. - int UniformSamples(); - - // Normalize the weights of all the samples defined by the iterator so they - // sum to 1. Returns the minimum assigned sample weight. - double NormalizeSamples(); - - private: - // Helper returns the current UnicharAndFont shape_entry. - const UnicharAndFonts* GetShapeEntry() const; - - // Map to subset the actual charset space. - const IndexMapBiDi* charset_map_; - // Shape table to recombine character classes into shapes - const ShapeTable* shape_table_; - // The samples to iterate over. - TrainingSampleSet* sample_set_; - // Flag to control randomizing the sample features. - bool randomize_; - // Shape table owned by this used to iterate character classes. - ShapeTable* owned_shape_table_; - - // Top-level iteration. Shape index in sparse charset_map space. - int shape_index_; - int num_shapes_; - // Index to the character class within a shape. - int shape_char_index_; - int num_shape_chars_; - // Index to the font within a shape/class pair. - int shape_font_index_; - int num_shape_fonts_; - // The lowest level iteration. sample_index_/num_samples_ counts samples - // in the current shape/class/font combination. - int sample_index_; - int num_samples_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_CLASSIFY_SAMPLEITERATOR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/shapeclassifier.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/shapeclassifier.cpp deleted file mode 100644 index ac899dc5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/shapeclassifier.cpp +++ /dev/null @@ -1,231 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: shapeclassifier.cpp -// Description: Base interface class for classifiers that return a -// shape index. -// Author: Ray Smith -// Created: Thu Dec 15 15:24:27 PST 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "shapeclassifier.h" -#include "genericvector.h" -#include "scrollview.h" -#include "shapetable.h" -#include "svmnode.h" -#include "trainingsample.h" -#include "tprintf.h" - -namespace tesseract { - -// Classifies the given [training] sample, writing to results. -// See shapeclassifier.h for a full description. -// Default implementation calls the ShapeRating version. -int ShapeClassifier::UnicharClassifySample( - const TrainingSample& sample, Pix* page_pix, int debug, - UNICHAR_ID keep_this, GenericVector* results) { - results->truncate(0); - GenericVector shape_results; - int num_shape_results = ClassifySample(sample, page_pix, debug, keep_this, - &shape_results); - const ShapeTable* shapes = GetShapeTable(); - GenericVector unichar_map; - unichar_map.init_to_size(shapes->unicharset().size(), -1); - for (int r = 0; r < num_shape_results; ++r) { - shapes->AddShapeToResults(shape_results[r], &unichar_map, results); - } - return results->size(); -} - -// Classifies the given [training] sample, writing to results. -// See shapeclassifier.h for a full description. -// Default implementation aborts. -int ShapeClassifier::ClassifySample(const TrainingSample& sample, Pix* page_pix, - int debug, int keep_this, - GenericVector* results) { - ASSERT_HOST("Must implement ClassifySample!" == nullptr); - return 0; -} - -// Returns the shape that contains unichar_id that has the best result. -// If result is not nullptr, it is set with the shape_id and rating. -// Does not need to be overridden if ClassifySample respects the keep_this -// rule. -int ShapeClassifier::BestShapeForUnichar(const TrainingSample& sample, - Pix* page_pix, UNICHAR_ID unichar_id, - ShapeRating* result) { - GenericVector results; - const ShapeTable* shapes = GetShapeTable(); - int num_results = ClassifySample(sample, page_pix, 0, unichar_id, &results); - for (int r = 0; r < num_results; ++r) { - if (shapes->GetShape(results[r].shape_id).ContainsUnichar(unichar_id)) { - if (result != nullptr) - *result = results[r]; - return results[r].shape_id; - } - } - return -1; -} - -// Provides access to the UNICHARSET that this classifier works with. -// Only needs to be overridden if GetShapeTable() can return nullptr. -const UNICHARSET& ShapeClassifier::GetUnicharset() const { - return GetShapeTable()->unicharset(); -} - -// Visual debugger classifies the given sample, displays the results and -// solicits user input to display other classifications. Returns when -// the user has finished with debugging the sample. -// Probably doesn't need to be overridden if the subclass provides -// DisplayClassifyAs. -void ShapeClassifier::DebugDisplay(const TrainingSample& sample, - Pix* page_pix, - UNICHAR_ID unichar_id) { -#ifndef GRAPHICS_DISABLED - static ScrollView* terminator = nullptr; - if (terminator == nullptr) { - terminator = new ScrollView("XIT", 0, 0, 50, 50, 50, 50, true); - } - ScrollView* debug_win = CreateFeatureSpaceWindow("ClassifierDebug", 0, 0); - // Provide a right-click menu to choose the class. - SVMenuNode* popup_menu = new SVMenuNode(); - popup_menu->AddChild("Choose class to debug", 0, "x", "Class to debug"); - popup_menu->BuildMenu(debug_win, false); - // Display the features in green. - const INT_FEATURE_STRUCT* features = sample.features(); - uint32_t num_features = sample.num_features(); - for (uint32_t f = 0; f < num_features; ++f) { - RenderIntFeature(debug_win, &features[f], ScrollView::GREEN); - } - debug_win->Update(); - GenericVector results; - // Debug classification until the user quits. - const UNICHARSET& unicharset = GetUnicharset(); - SVEvent* ev; - SVEventType ev_type; - do { - PointerVector windows; - if (unichar_id >= 0) { - tprintf("Debugging class %d = %s\n", - unichar_id, unicharset.id_to_unichar(unichar_id)); - UnicharClassifySample(sample, page_pix, 1, unichar_id, &results); - DisplayClassifyAs(sample, page_pix, unichar_id, 1, &windows); - } else { - tprintf("Invalid unichar_id: %d\n", unichar_id); - UnicharClassifySample(sample, page_pix, 1, -1, &results); - } - if (unichar_id >= 0) { - tprintf("Debugged class %d = %s\n", - unichar_id, unicharset.id_to_unichar(unichar_id)); - } - tprintf("Right-click in ClassifierDebug window to choose debug class,"); - tprintf(" Left-click or close window to quit...\n"); - UNICHAR_ID old_unichar_id; - do { - old_unichar_id = unichar_id; - ev = debug_win->AwaitEvent(SVET_ANY); - ev_type = ev->type; - if (ev_type == SVET_POPUP) { - if (unicharset.contains_unichar(ev->parameter)) { - unichar_id = unicharset.unichar_to_id(ev->parameter); - } else { - tprintf("Char class '%s' not found in unicharset", ev->parameter); - } - } - delete ev; - } while (unichar_id == old_unichar_id && - ev_type != SVET_CLICK && ev_type != SVET_DESTROY); - } while (ev_type != SVET_CLICK && ev_type != SVET_DESTROY); - delete debug_win; -#endif // GRAPHICS_DISABLED -} - -// Displays classification as the given shape_id. Creates as many windows -// as it feels fit, using index as a guide for placement. Adds any created -// windows to the windows output and returns a new index that may be used -// by any subsequent classifiers. Caller waits for the user to view and -// then destroys the windows by clearing the vector. -int ShapeClassifier::DisplayClassifyAs( - const TrainingSample& sample, Pix* page_pix, - UNICHAR_ID unichar_id, int index, - PointerVector* windows) { - // Does nothing in the default implementation. - return index; -} - -// Prints debug information on the results. -void ShapeClassifier::UnicharPrintResults( - const char* context, const GenericVector& results) const { - tprintf("%s\n", context); - for (int i = 0; i < results.size(); ++i) { - tprintf("%g: c_id=%d=%s", results[i].rating, results[i].unichar_id, - GetUnicharset().id_to_unichar(results[i].unichar_id)); - if (!results[i].fonts.empty()) { - tprintf(" Font Vector:"); - for (int f = 0; f < results[i].fonts.size(); ++f) { - tprintf(" %d", results[i].fonts[f].fontinfo_id); - } - } - tprintf("\n"); - } -} -void ShapeClassifier::PrintResults( - const char* context, const GenericVector& results) const { - tprintf("%s\n", context); - for (int i = 0; i < results.size(); ++i) { - tprintf("%g:", results[i].rating); - if (results[i].joined) - tprintf("[J]"); - if (results[i].broken) - tprintf("[B]"); - tprintf(" %s\n", GetShapeTable()->DebugStr(results[i].shape_id).string()); - } -} - -// Removes any result that has all its unichars covered by a better choice, -// regardless of font. -void ShapeClassifier::FilterDuplicateUnichars( - GenericVector* results) const { - GenericVector filtered_results; - // Copy results to filtered results and knock out duplicate unichars. - const ShapeTable* shapes = GetShapeTable(); - for (int r = 0; r < results->size(); ++r) { - if (r > 0) { - const Shape& shape_r = shapes->GetShape((*results)[r].shape_id); - int c; - for (c = 0; c < shape_r.size(); ++c) { - int unichar_id = shape_r[c].unichar_id; - int s; - for (s = 0; s < r; ++s) { - const Shape& shape_s = shapes->GetShape((*results)[s].shape_id); - if (shape_s.ContainsUnichar(unichar_id)) - break; // We found unichar_id. - } - if (s == r) - break; // We didn't find unichar_id. - } - if (c == shape_r.size()) - continue; // We found all the unichar ids in previous answers. - } - filtered_results.push_back((*results)[r]); - } - *results = filtered_results; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/shapeclassifier.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/shapeclassifier.h deleted file mode 100644 index 24852cac..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/shapeclassifier.h +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: shapeclassifier.h -// Description: Base interface class for classifiers that return a -// shape index. -// Author: Ray Smith -// Created: Tue Sep 13 11:26:32 PDT 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_ -#define TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_ - -#include "unichar.h" - -template class GenericVector; -struct Pix; -class ScrollView; -class UNICHARSET; - -namespace tesseract { - -template class PointerVector; -struct ShapeRating; -class ShapeTable; -class TrainingSample; -class TrainingSampleSet; -struct UnicharRating; - -// Interface base class for classifiers that produce ShapeRating results. -class ShapeClassifier { - public: - virtual ~ShapeClassifier() = default; - - // Classifies the given [training] sample, writing to results. - // If page_pix is not nullptr, the overriding function may call - // sample.GetSamplePix(padding, page_pix) to get an image of the sample - // padded (with real image data) by the given padding to extract features - // from the image of the character. Other members of TrainingSample: - // features(), micro_features(), cn_feature(), geo_feature() may be used - // to get the appropriate tesseract features. - // If debug is non-zero, then various degrees of classifier dependent debug - // information is provided. - // If keep_this (a UNICHAR_ID) is >= 0, then the results should always - // contain keep_this, and (if possible) anything of intermediate confidence. - // (Used for answering "Why didn't it get that right?" questions.) It must - // be a UNICHAR_ID as the callers have no clue how to choose the best shape - // that may contain a desired answer. - // The return value is the number of classes saved in results. - // NOTE that overriding functions MUST clear and sort the results by - // descending rating unless the classifier is working with a team of such - // classifiers. - // NOTE: Neither overload of ClassifySample is pure, but at least one must - // be overridden by a classifier in order for it to do anything. - virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix, - int debug, UNICHAR_ID keep_this, - GenericVector* results); - - protected: - virtual int ClassifySample(const TrainingSample& sample, Pix* page_pix, - int debug, UNICHAR_ID keep_this, - GenericVector* results); - - public: - // Returns the shape that contains unichar_id that has the best result. - // If result is not nullptr, it is set with the shape_id and rating. - // Returns -1 if ClassifySample fails to provide any result containing - // unichar_id. BestShapeForUnichar does not need to be overridden if - // ClassifySample respects the keep_this rule. - virtual int BestShapeForUnichar(const TrainingSample& sample, Pix* page_pix, - UNICHAR_ID unichar_id, ShapeRating* result); - - // Provides access to the ShapeTable that this classifier works with. - virtual const ShapeTable* GetShapeTable() const = 0; - // Provides access to the UNICHARSET that this classifier works with. - // Must be overridden IFF GetShapeTable() returns nullptr. - virtual const UNICHARSET& GetUnicharset() const; - - // Visual debugger classifies the given sample, displays the results and - // solicits user input to display other classifications. Returns when - // the user has finished with debugging the sample. - // Probably doesn't need to be overridden if the subclass provides - // DisplayClassifyAs. - virtual void DebugDisplay(const TrainingSample& sample, Pix* page_pix, - UNICHAR_ID unichar_id); - - - // Displays classification as the given unichar_id. Creates as many windows - // as it feels fit, using index as a guide for placement. Adds any created - // windows to the windows output and returns a new index that may be used - // by any subsequent classifiers. Caller waits for the user to view and - // then destroys the windows by clearing the vector. - virtual int DisplayClassifyAs(const TrainingSample& sample, Pix* page_pix, - UNICHAR_ID unichar_id, int index, - PointerVector* windows); - - // Prints debug information on the results. context is some introductory/title - // message. - virtual void UnicharPrintResults( - const char* context, const GenericVector& results) const; - virtual void PrintResults(const char* context, - const GenericVector& results) const; - - protected: - // Removes any result that has all its unichars covered by a better choice, - // regardless of font. - void FilterDuplicateUnichars(GenericVector* results) const; -}; - -} // namespace tesseract. - -#endif // TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/shapetable.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/shapetable.cpp deleted file mode 100644 index 20a94232..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/shapetable.cpp +++ /dev/null @@ -1,726 +0,0 @@ -// Copyright 2010 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: shapetable.cpp -// Description: Class to map a classifier shape index to unicharset -// indices and font indices. -// Author: Ray Smith -// Created: Tue Nov 02 15:31:32 PDT 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "shapetable.h" - -#include "bitvector.h" -#include "fontinfo.h" -#include "intfeaturespace.h" -#include "strngs.h" -#include "unicharset.h" -#include "unicity_table.h" - -#include - -namespace tesseract { - -// Helper function to get the index of the first result with the required -// unichar_id. If the results are sorted by rating, this will also be the -// best result with the required unichar_id. -// Returns -1 if the unichar_id is not found -int ShapeRating::FirstResultWithUnichar( - const GenericVector& results, - const ShapeTable& shape_table, - UNICHAR_ID unichar_id) { - for (int r = 0; r < results.size(); ++r) { - const int shape_id = results[r].shape_id; - const Shape& shape = shape_table.GetShape(shape_id); - if (shape.ContainsUnichar(unichar_id)) { - return r; - } - } - return -1; -} - -// Helper function to get the index of the first result with the required -// unichar_id. If the results are sorted by rating, this will also be the -// best result with the required unichar_id. -// Returns -1 if the unichar_id is not found -int UnicharRating::FirstResultWithUnichar( - const GenericVector& results, - UNICHAR_ID unichar_id) { - for (int r = 0; r < results.size(); ++r) { - if (results[r].unichar_id == unichar_id) - return r; - } - return -1; -} - -// Writes to the given file. Returns false in case of error. -bool UnicharAndFonts::Serialize(FILE* fp) const { - return tesseract::Serialize(fp, &unichar_id) && font_ids.Serialize(fp); -} -// Reads from the given file. Returns false in case of error. - -bool UnicharAndFonts::DeSerialize(TFile* fp) { - return fp->DeSerialize(&unichar_id) && font_ids.DeSerialize(fp); -} - -// Sort function to sort a pair of UnicharAndFonts by unichar_id. -int UnicharAndFonts::SortByUnicharId(const void* v1, const void* v2) { - const UnicharAndFonts* p1 = static_cast(v1); - const UnicharAndFonts* p2 = static_cast(v2); - return p1->unichar_id - p2->unichar_id; -} - -// Writes to the given file. Returns false in case of error. -bool Shape::Serialize(FILE* fp) const { - uint8_t sorted = unichars_sorted_; - return tesseract::Serialize(fp, &sorted) && unichars_.SerializeClasses(fp); -} -// Reads from the given file. Returns false in case of error. - -bool Shape::DeSerialize(TFile* fp) { - uint8_t sorted; - if (!fp->DeSerialize(&sorted)) return false; - unichars_sorted_ = sorted != 0; - return unichars_.DeSerializeClasses(fp); -} - -// Adds a font_id for the given unichar_id. If the unichar_id is not -// in the shape, it is added. -void Shape::AddToShape(int unichar_id, int font_id) { - for (int c = 0; c < unichars_.size(); ++c) { - if (unichars_[c].unichar_id == unichar_id) { - // Found the unichar in the shape table. - GenericVector& font_list = unichars_[c].font_ids; - for (int f = 0; f < font_list.size(); ++f) { - if (font_list[f] == font_id) - return; // Font is already there. - } - font_list.push_back(font_id); - return; - } - } - // Unichar_id is not in shape, so add it to shape. - unichars_.push_back(UnicharAndFonts(unichar_id, font_id)); - unichars_sorted_ = unichars_.size() <= 1; -} - -// Adds everything in other to this. -void Shape::AddShape(const Shape& other) { - for (int c = 0; c < other.unichars_.size(); ++c) { - for (int f = 0; f < other.unichars_[c].font_ids.size(); ++f) { - AddToShape(other.unichars_[c].unichar_id, - other.unichars_[c].font_ids[f]); - } - } - unichars_sorted_ = unichars_.size() <= 1; -} - -// Returns true if the shape contains the given unichar_id, font_id pair. -bool Shape::ContainsUnicharAndFont(int unichar_id, int font_id) const { - for (int c = 0; c < unichars_.size(); ++c) { - if (unichars_[c].unichar_id == unichar_id) { - // Found the unichar, so look for the font. - GenericVector& font_list = unichars_[c].font_ids; - for (int f = 0; f < font_list.size(); ++f) { - if (font_list[f] == font_id) - return true; - } - return false; - } - } - return false; -} - -// Returns true if the shape contains the given unichar_id, ignoring font. -bool Shape::ContainsUnichar(int unichar_id) const { - for (int c = 0; c < unichars_.size(); ++c) { - if (unichars_[c].unichar_id == unichar_id) { - return true; - } - } - return false; -} - -// Returns true if the shape contains the given font, ignoring unichar_id. -bool Shape::ContainsFont(int font_id) const { - for (int c = 0; c < unichars_.size(); ++c) { - GenericVector& font_list = unichars_[c].font_ids; - for (int f = 0; f < font_list.size(); ++f) { - if (font_list[f] == font_id) - return true; - } - } - return false; -} -// Returns true if the shape contains the given font properties, ignoring -// unichar_id. -bool Shape::ContainsFontProperties(const FontInfoTable& font_table, - uint32_t properties) const { - for (int c = 0; c < unichars_.size(); ++c) { - GenericVector& font_list = unichars_[c].font_ids; - for (int f = 0; f < font_list.size(); ++f) { - if (font_table.get(font_list[f]).properties == properties) - return true; - } - } - return false; -} -// Returns true if the shape contains multiple different font properties, -// ignoring unichar_id. -bool Shape::ContainsMultipleFontProperties( - const FontInfoTable& font_table) const { - uint32_t properties = font_table.get(unichars_[0].font_ids[0]).properties; - for (int c = 0; c < unichars_.size(); ++c) { - GenericVector& font_list = unichars_[c].font_ids; - for (int f = 0; f < font_list.size(); ++f) { - if (font_table.get(font_list[f]).properties != properties) - return true; - } - } - return false; -} - -// Returns true if this shape is equal to other (ignoring order of unichars -// and fonts). -bool Shape::operator==(const Shape& other) const { - return IsSubsetOf(other) && other.IsSubsetOf(*this); -} - -// Returns true if this is a subset (including equal) of other. -bool Shape::IsSubsetOf(const Shape& other) const { - for (int c = 0; c < unichars_.size(); ++c) { - int unichar_id = unichars_[c].unichar_id; - const GenericVector& font_list = unichars_[c].font_ids; - for (int f = 0; f < font_list.size(); ++f) { - if (!other.ContainsUnicharAndFont(unichar_id, font_list[f])) - return false; - } - } - return true; -} - -// Returns true if the lists of unichar ids are the same in this and other, -// ignoring fonts. -// NOT const, as it will sort the unichars on demand. -bool Shape::IsEqualUnichars(Shape* other) { - if (unichars_.size() != other->unichars_.size()) return false; - if (!unichars_sorted_) SortUnichars(); - if (!other->unichars_sorted_) other->SortUnichars(); - for (int c = 0; c < unichars_.size(); ++c) { - if (unichars_[c].unichar_id != other->unichars_[c].unichar_id) - return false; - } - return true; -} - -// Sorts the unichars_ vector by unichar. -void Shape::SortUnichars() { - unichars_.sort(UnicharAndFonts::SortByUnicharId); - unichars_sorted_ = true; -} - -ShapeTable::ShapeTable() : unicharset_(nullptr), num_fonts_(0) { -} -ShapeTable::ShapeTable(const UNICHARSET& unicharset) - : unicharset_(&unicharset), num_fonts_(0) { -} - -// Writes to the given file. Returns false in case of error. -bool ShapeTable::Serialize(FILE* fp) const { - return shape_table_.Serialize(fp); -} -// Reads from the given file. Returns false in case of error. - -bool ShapeTable::DeSerialize(TFile* fp) { - if (!shape_table_.DeSerialize(fp)) return false; - num_fonts_ = 0; - return true; -} - -// Returns the number of fonts used in this ShapeTable, computing it if -// necessary. -int ShapeTable::NumFonts() const { - if (num_fonts_ <= 0) { - for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) { - const Shape& shape = *shape_table_[shape_id]; - for (int c = 0; c < shape.size(); ++c) { - for (int f = 0; f < shape[c].font_ids.size(); ++f) { - if (shape[c].font_ids[f] >= num_fonts_) - num_fonts_ = shape[c].font_ids[f] + 1; - } - } - } - } - return num_fonts_; -} - -// Re-indexes the class_ids in the shapetable according to the given map. -// Useful in conjunction with set_unicharset. -void ShapeTable::ReMapClassIds(const GenericVector& unicharset_map) { - for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) { - Shape* shape = shape_table_[shape_id]; - for (int c = 0; c < shape->size(); ++c) { - shape->SetUnicharId(c, unicharset_map[(*shape)[c].unichar_id]); - } - } -} - -// Returns a string listing the classes/fonts in a shape. -STRING ShapeTable::DebugStr(int shape_id) const { - if (shape_id < 0 || shape_id >= shape_table_.size()) - return STRING("INVALID_UNICHAR_ID"); - const Shape& shape = GetShape(shape_id); - STRING result; - result.add_str_int("Shape", shape_id); - if (shape.size() > 100) { - result.add_str_int(" Num unichars=", shape.size()); - return result; - } - for (int c = 0; c < shape.size(); ++c) { - result.add_str_int(" c_id=", shape[c].unichar_id); - result += "="; - result += unicharset_->id_to_unichar(shape[c].unichar_id); - if (shape.size() < 10) { - result.add_str_int(", ", shape[c].font_ids.size()); - result += " fonts ="; - int num_fonts = shape[c].font_ids.size(); - if (num_fonts > 10) { - result.add_str_int(" ", shape[c].font_ids[0]); - result.add_str_int(" ... ", shape[c].font_ids[num_fonts - 1]); - } else { - for (int f = 0; f < num_fonts; ++f) { - result.add_str_int(" ", shape[c].font_ids[f]); - } - } - } - } - return result; -} - -// Returns a debug string summarizing the table. -STRING ShapeTable::SummaryStr() const { - int max_unichars = 0; - int num_multi_shapes = 0; - int num_master_shapes = 0; - for (int s = 0; s < shape_table_.size(); ++s) { - if (MasterDestinationIndex(s) != s) continue; - ++num_master_shapes; - int shape_size = GetShape(s).size(); - if (shape_size > 1) - ++num_multi_shapes; - if (shape_size > max_unichars) - max_unichars = shape_size; - } - STRING result; - result.add_str_int("Number of shapes = ", num_master_shapes); - result.add_str_int(" max unichars = ", max_unichars); - result.add_str_int(" number with multiple unichars = ", num_multi_shapes); - return result; -} - - -// Adds a new shape starting with the given unichar_id and font_id. -// Returns the assigned index. -int ShapeTable::AddShape(int unichar_id, int font_id) { - int index = shape_table_.size(); - Shape* shape = new Shape; - shape->AddToShape(unichar_id, font_id); - shape_table_.push_back(shape); - num_fonts_ = std::max(num_fonts_, font_id + 1); - return index; -} - -// Adds a copy of the given shape unless it is already present. -// Returns the assigned index or index of existing shape if already present. -int ShapeTable::AddShape(const Shape& other) { - int index; - for (index = 0; index < shape_table_.size() && - !(other == *shape_table_[index]); ++index) - continue; - if (index == shape_table_.size()) { - Shape* shape = new Shape(other); - shape_table_.push_back(shape); - } - num_fonts_ = 0; - return index; -} - -// Removes the shape given by the shape index. -void ShapeTable::DeleteShape(int shape_id) { - delete shape_table_[shape_id]; - shape_table_[shape_id] = nullptr; - shape_table_.remove(shape_id); -} - -// Adds a font_id to the given existing shape index for the given -// unichar_id. If the unichar_id is not in the shape, it is added. -void ShapeTable::AddToShape(int shape_id, int unichar_id, int font_id) { - Shape& shape = *shape_table_[shape_id]; - shape.AddToShape(unichar_id, font_id); - num_fonts_ = std::max(num_fonts_, font_id + 1); -} - -// Adds the given shape to the existing shape with the given index. -void ShapeTable::AddShapeToShape(int shape_id, const Shape& other) { - Shape& shape = *shape_table_[shape_id]; - shape.AddShape(other); - num_fonts_ = 0; -} - -// Returns the id of the shape that contains the given unichar and font. -// If not found, returns -1. -// If font_id < 0, the font_id is ignored and the first shape that matches -// the unichar_id is returned. -int ShapeTable::FindShape(int unichar_id, int font_id) const { - for (int s = 0; s < shape_table_.size(); ++s) { - const Shape& shape = GetShape(s); - for (int c = 0; c < shape.size(); ++c) { - if (shape[c].unichar_id == unichar_id) { - if (font_id < 0) - return s; // We don't care about the font. - for (int f = 0; f < shape[c].font_ids.size(); ++f) { - if (shape[c].font_ids[f] == font_id) - return s; - } - } - } - } - return -1; -} - -// Returns the first unichar_id and font_id in the given shape. -void ShapeTable::GetFirstUnicharAndFont(int shape_id, - int* unichar_id, int* font_id) const { - const UnicharAndFonts& unichar_and_fonts = (*shape_table_[shape_id])[0]; - *unichar_id = unichar_and_fonts.unichar_id; - *font_id = unichar_and_fonts.font_ids[0]; -} - -// Expands all the classes/fonts in the shape individually to build -// a ShapeTable. -int ShapeTable::BuildFromShape(const Shape& shape, - const ShapeTable& master_shapes) { - BitVector shape_map(master_shapes.NumShapes()); - for (int u_ind = 0; u_ind < shape.size(); ++u_ind) { - for (int f_ind = 0; f_ind < shape[u_ind].font_ids.size(); ++f_ind) { - int c = shape[u_ind].unichar_id; - int f = shape[u_ind].font_ids[f_ind]; - int master_id = master_shapes.FindShape(c, f); - if (master_id >= 0) { - shape_map.SetBit(master_id); - } else if (FindShape(c, f) < 0) { - AddShape(c, f); - } - } - } - int num_masters = 0; - for (int s = 0; s < master_shapes.NumShapes(); ++s) { - if (shape_map[s]) { - AddShape(master_shapes.GetShape(s)); - ++num_masters; - } - } - return num_masters; -} - -// Returns true if the shapes are already merged. -bool ShapeTable::AlreadyMerged(int shape_id1, int shape_id2) const { - return MasterDestinationIndex(shape_id1) == MasterDestinationIndex(shape_id2); -} - -// Returns true if any shape contains multiple unichars. -bool ShapeTable::AnyMultipleUnichars() const { - int num_shapes = NumShapes(); - for (int s1 = 0; s1 < num_shapes; ++s1) { - if (MasterDestinationIndex(s1) != s1) continue; - if (GetShape(s1).size() > 1) - return true; - } - return false; -} - -// Returns the maximum number of unichars over all shapes. -int ShapeTable::MaxNumUnichars() const { - int max_num_unichars = 0; - int num_shapes = NumShapes(); - for (int s = 0; s < num_shapes; ++s) { - if (GetShape(s).size() > max_num_unichars) - max_num_unichars = GetShape(s).size(); - } - return max_num_unichars; -} - - -// Merges shapes with a common unichar over the [start, end) interval. -// Assumes single unichar per shape. -void ShapeTable::ForceFontMerges(int start, int end) { - for (int s1 = start; s1 < end; ++s1) { - if (MasterDestinationIndex(s1) == s1 && GetShape(s1).size() == 1) { - int unichar_id = GetShape(s1)[0].unichar_id; - for (int s2 = s1 + 1; s2 < end; ++s2) { - if (MasterDestinationIndex(s2) == s2 && GetShape(s2).size() == 1 && - unichar_id == GetShape(s2)[0].unichar_id) { - MergeShapes(s1, s2); - } - } - } - } - ShapeTable compacted(*unicharset_); - compacted.AppendMasterShapes(*this, nullptr); - *this = compacted; -} - -// Returns the number of unichars in the master shape. -int ShapeTable::MasterUnicharCount(int shape_id) const { - int master_id = MasterDestinationIndex(shape_id); - return GetShape(master_id).size(); -} - -// Returns the sum of the font counts in the master shape. -int ShapeTable::MasterFontCount(int shape_id) const { - int master_id = MasterDestinationIndex(shape_id); - const Shape& shape = GetShape(master_id); - int font_count = 0; - for (int c = 0; c < shape.size(); ++c) { - font_count += shape[c].font_ids.size(); - } - return font_count; -} - -// Returns the number of unichars that would result from merging the shapes. -int ShapeTable::MergedUnicharCount(int shape_id1, int shape_id2) const { - // Do it the easy way for now. - int master_id1 = MasterDestinationIndex(shape_id1); - int master_id2 = MasterDestinationIndex(shape_id2); - Shape combined_shape(*shape_table_[master_id1]); - combined_shape.AddShape(*shape_table_[master_id2]); - return combined_shape.size(); -} - -// Merges two shape_ids, leaving shape_id2 marked as merged. -void ShapeTable::MergeShapes(int shape_id1, int shape_id2) { - int master_id1 = MasterDestinationIndex(shape_id1); - int master_id2 = MasterDestinationIndex(shape_id2); - // Point master_id2 (and all merged shapes) to master_id1. - shape_table_[master_id2]->set_destination_index(master_id1); - // Add all the shapes of master_id2 to master_id1. - shape_table_[master_id1]->AddShape(*shape_table_[master_id2]); -} - -// Swaps two shape_ids. -void ShapeTable::SwapShapes(int shape_id1, int shape_id2) { - Shape* tmp = shape_table_[shape_id1]; - shape_table_[shape_id1] = shape_table_[shape_id2]; - shape_table_[shape_id2] = tmp; -} - -// Returns the destination of this shape, (if merged), taking into account -// the fact that the destination may itself have been merged. -int ShapeTable::MasterDestinationIndex(int shape_id) const { - int dest_id = shape_table_[shape_id]->destination_index(); - if (dest_id == shape_id || dest_id < 0) - return shape_id; // Is master already. - int master_id = shape_table_[dest_id]->destination_index(); - if (master_id == dest_id || master_id < 0) - return dest_id; // Dest is the master and shape_id points to it. - master_id = MasterDestinationIndex(master_id); - return master_id; -} - -// Returns false if the unichars in neither shape is a subset of the other. -bool ShapeTable::SubsetUnichar(int shape_id1, int shape_id2) const { - const Shape& shape1 = GetShape(shape_id1); - const Shape& shape2 = GetShape(shape_id2); - int c1, c2; - for (c1 = 0; c1 < shape1.size(); ++c1) { - int unichar_id1 = shape1[c1].unichar_id; - if (!shape2.ContainsUnichar(unichar_id1)) - break; - } - for (c2 = 0; c2 < shape2.size(); ++c2) { - int unichar_id2 = shape2[c2].unichar_id; - if (!shape1.ContainsUnichar(unichar_id2)) - break; - } - return c1 == shape1.size() || c2 == shape2.size(); -} - -// Returns false if the unichars in neither shape is a subset of the other. -bool ShapeTable::MergeSubsetUnichar(int merge_id1, int merge_id2, - int shape_id) const { - const Shape& merge1 = GetShape(merge_id1); - const Shape& merge2 = GetShape(merge_id2); - const Shape& shape = GetShape(shape_id); - int cm1, cm2, cs; - for (cs = 0; cs < shape.size(); ++cs) { - int unichar_id = shape[cs].unichar_id; - if (!merge1.ContainsUnichar(unichar_id) && - !merge2.ContainsUnichar(unichar_id)) - break; // Shape is not a subset of the merge. - } - for (cm1 = 0; cm1 < merge1.size(); ++cm1) { - int unichar_id1 = merge1[cm1].unichar_id; - if (!shape.ContainsUnichar(unichar_id1)) - break; // Merge is not a subset of shape - } - for (cm2 = 0; cm2 < merge2.size(); ++cm2) { - int unichar_id2 = merge2[cm2].unichar_id; - if (!shape.ContainsUnichar(unichar_id2)) - break; // Merge is not a subset of shape - } - return cs == shape.size() || (cm1 == merge1.size() && cm2 == merge2.size()); -} - -// Returns true if the unichar sets are equal between the shapes. -bool ShapeTable::EqualUnichars(int shape_id1, int shape_id2) const { - const Shape& shape1 = GetShape(shape_id1); - const Shape& shape2 = GetShape(shape_id2); - for (int c1 = 0; c1 < shape1.size(); ++c1) { - int unichar_id1 = shape1[c1].unichar_id; - if (!shape2.ContainsUnichar(unichar_id1)) - return false; - } - for (int c2 = 0; c2 < shape2.size(); ++c2) { - int unichar_id2 = shape2[c2].unichar_id; - if (!shape1.ContainsUnichar(unichar_id2)) - return false; - } - return true; -} - -// Returns true if the unichar sets are equal between the shapes. -bool ShapeTable::MergeEqualUnichars(int merge_id1, int merge_id2, - int shape_id) const { - const Shape& merge1 = GetShape(merge_id1); - const Shape& merge2 = GetShape(merge_id2); - const Shape& shape = GetShape(shape_id); - for (int cs = 0; cs < shape.size(); ++cs) { - int unichar_id = shape[cs].unichar_id; - if (!merge1.ContainsUnichar(unichar_id) && - !merge2.ContainsUnichar(unichar_id)) - return false; // Shape has a unichar that appears in neither merge. - } - for (int cm1 = 0; cm1 < merge1.size(); ++cm1) { - int unichar_id1 = merge1[cm1].unichar_id; - if (!shape.ContainsUnichar(unichar_id1)) - return false; // Merge has a unichar that is not in shape. - } - for (int cm2 = 0; cm2 < merge2.size(); ++cm2) { - int unichar_id2 = merge2[cm2].unichar_id; - if (!shape.ContainsUnichar(unichar_id2)) - return false; // Merge has a unichar that is not in shape. - } - return true; -} - -// Returns true if there is a common unichar between the shapes. -bool ShapeTable::CommonUnichars(int shape_id1, int shape_id2) const { - const Shape& shape1 = GetShape(shape_id1); - const Shape& shape2 = GetShape(shape_id2); - for (int c1 = 0; c1 < shape1.size(); ++c1) { - int unichar_id1 = shape1[c1].unichar_id; - if (shape2.ContainsUnichar(unichar_id1)) - return true; - } - return false; -} - -// Returns true if there is a common font id between the shapes. -bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const { - const Shape& shape1 = GetShape(shape_id1); - const Shape& shape2 = GetShape(shape_id2); - for (int c1 = 0; c1 < shape1.size(); ++c1) { - const GenericVector& font_list1 = shape1[c1].font_ids; - for (int f = 0; f < font_list1.size(); ++f) { - if (shape2.ContainsFont(font_list1[f])) - return true; - } - } - return false; -} - -// Appends the master shapes from other to this. -// If not nullptr, shape_map is set to map other shape_ids to this's shape_ids. -void ShapeTable::AppendMasterShapes(const ShapeTable& other, - GenericVector* shape_map) { - if (shape_map != nullptr) - shape_map->init_to_size(other.NumShapes(), -1); - for (int s = 0; s < other.shape_table_.size(); ++s) { - if (other.shape_table_[s]->destination_index() < 0) { - int index = AddShape(*other.shape_table_[s]); - if (shape_map != nullptr) - (*shape_map)[s] = index; - } - } -} - -// Returns the number of master shapes remaining after merging. -int ShapeTable::NumMasterShapes() const { - int num_shapes = 0; - for (int s = 0; s < shape_table_.size(); ++s) { - if (shape_table_[s]->destination_index() < 0) - ++num_shapes; - } - return num_shapes; -} - - -// Adds the unichars of the given shape_id to the vector of results. Any -// unichar_id that is already present just has the fonts added to the -// font set for that result without adding a new entry in the vector. -// NOTE: it is assumed that the results are given to this function in order -// of decreasing rating. -// The unichar_map vector indicates the index of the results entry containing -// each unichar, or -1 if the unichar is not yet included in results. -void ShapeTable::AddShapeToResults(const ShapeRating& shape_rating, - GenericVector* unichar_map, - GenericVector* results)const { - if (shape_rating.joined) { - AddUnicharToResults(UNICHAR_JOINED, shape_rating.rating, unichar_map, - results); - } - if (shape_rating.broken) { - AddUnicharToResults(UNICHAR_BROKEN, shape_rating.rating, unichar_map, - results); - } - const Shape& shape = GetShape(shape_rating.shape_id); - for (int u = 0; u < shape.size(); ++u) { - int result_index = AddUnicharToResults(shape[u].unichar_id, - shape_rating.rating, - unichar_map, results); - for (int f = 0; f < shape[u].font_ids.size(); ++f) { - (*results)[result_index].fonts.push_back( - ScoredFont(shape[u].font_ids[f], - IntCastRounded(shape_rating.rating * INT16_MAX))); - } - } -} - -// Adds the given unichar_id to the results if needed, updating unichar_map -// and returning the index of unichar in results. -int ShapeTable::AddUnicharToResults( - int unichar_id, float rating, GenericVector* unichar_map, - GenericVector* results) const { - int result_index = unichar_map->get(unichar_id); - if (result_index < 0) { - UnicharRating result(unichar_id, rating); - result_index = results->push_back(result); - (*unichar_map)[unichar_id] = result_index; - } - return result_index; -} - - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/shapetable.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/shapetable.h deleted file mode 100644 index 00407a0f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/shapetable.h +++ /dev/null @@ -1,404 +0,0 @@ -// Copyright 2010 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: shapetable.h -// Description: Class to map a classifier shape index to unicharset -// indices and font indices. -// Author: Ray Smith -// Created: Thu Oct 28 17:46:32 PDT 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CLASSIFY_SHAPETABLE_H_ -#define TESSERACT_CLASSIFY_SHAPETABLE_H_ - -#include "bitvector.h" -#include "fontinfo.h" -#include "genericheap.h" -#include "genericvector.h" -#include "intmatcher.h" - -class STRING; -class UNICHARSET; - -namespace tesseract { - -class ShapeTable; - -// Simple struct to hold a single classifier unichar selection, a corresponding -// rating, and a list of appropriate fonts. -struct UnicharRating { - UnicharRating() - : unichar_id(0), rating(0.0f), adapted(false), config(0), - feature_misses(0) {} - UnicharRating(int u, float r) - : unichar_id(u), rating(r), adapted(false), config(0), feature_misses(0) {} - - // Print debug info. - void Print() const { - tprintf("Unichar-id=%d, rating=%g, adapted=%d, config=%d, misses=%d," - " %d fonts\n", unichar_id, rating, adapted, config, feature_misses, - fonts.size()); - } - - // Sort function to sort ratings appropriately by descending rating. - static int SortDescendingRating(const void* t1, const void* t2) { - const UnicharRating* a = static_cast(t1); - const UnicharRating* b = static_cast(t2); - if (a->rating > b->rating) { - return -1; - } else if (a->rating < b->rating) { - return 1; - } else { - return a->unichar_id - b->unichar_id; - } - } - // Helper function to get the index of the first result with the required - // unichar_id. If the results are sorted by rating, this will also be the - // best result with the required unichar_id. - // Returns -1 if the unichar_id is not found - static int FirstResultWithUnichar(const GenericVector& results, - UNICHAR_ID unichar_id); - - // Index into some UNICHARSET table indicates the class of the answer. - UNICHAR_ID unichar_id; - // Rating from classifier with 1.0 perfect and 0.0 impossible. - // Call it a probability if you must. - float rating; - // True if this result is from the adaptive classifier. - bool adapted; - // Index of best matching font configuration of result. - uint8_t config; - // Number of features that were total misses - were liked by no classes. - uint16_t feature_misses; - // Unsorted collection of fontinfo ids and scores. Note that a raw result - // from the IntegerMatch will contain config ids, that require transforming - // to fontinfo ids via fontsets and (possibly) shapetable. - GenericVector fonts; -}; - -// Classifier result from a low-level classification is an index into some -// ShapeTable and a rating. -struct ShapeRating { - ShapeRating() - : shape_id(0), rating(0.0f), raw(0.0f), font(0.0f), - joined(false), broken(false) {} - ShapeRating(int s, float r) - : shape_id(s), rating(r), raw(1.0f), font(0.0f), - joined(false), broken(false) {} - - // Sort function to sort ratings appropriately by descending rating. - static int SortDescendingRating(const void* t1, const void* t2) { - const ShapeRating* a = static_cast(t1); - const ShapeRating* b = static_cast(t2); - if (a->rating > b->rating) { - return -1; - } else if (a->rating < b->rating) { - return 1; - } else { - return a->shape_id - b->shape_id; - } - } - // Helper function to get the index of the first result with the required - // unichar_id. If the results are sorted by rating, this will also be the - // best result with the required unichar_id. - // Returns -1 if the unichar_id is not found - static int FirstResultWithUnichar(const GenericVector& results, - const ShapeTable& shape_table, - UNICHAR_ID unichar_id); - - // Index into some shape table indicates the class of the answer. - int shape_id; - // Rating from classifier with 1.0 perfect and 0.0 impossible. - // Call it a probability if you must. - float rating; - // Subsidiary rating that a classifier may use internally. - float raw; - // Subsidiary rating that a classifier may use internally. - float font; - // Flag indicating that the input may be joined. - bool joined; - // Flag indicating that the input may be broken (a fragment). - bool broken; -}; - -// Simple struct to hold an entry for a heap-based priority queue of -// ShapeRating. -struct ShapeQueueEntry { - ShapeQueueEntry() : result(ShapeRating(0, 0.0f)), level(0) {} - ShapeQueueEntry(const ShapeRating& rating, int level0) - : result(rating), level(level0) {} - - // Sort by decreasing rating and decreasing level for equal rating. - bool operator<(const ShapeQueueEntry& other) const { - if (result.rating > other.result.rating) return true; - if (result.rating == other.result.rating) - return level > other.level; - return false; - } - - // Output from classifier. - ShapeRating result; - // Which level in the tree did this come from? - int level; -}; -using ShapeQueue = GenericHeap; - -// Simple struct to hold a set of fonts associated with a single unichar-id. -// A vector of UnicharAndFonts makes a shape. -struct UnicharAndFonts { - UnicharAndFonts() : unichar_id(0) { - } - UnicharAndFonts(int uni_id, int font_id) : unichar_id(uni_id) { - font_ids.push_back(font_id); - } - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - bool DeSerialize(TFile* fp); - - // Sort function to sort a pair of UnicharAndFonts by unichar_id. - static int SortByUnicharId(const void* v1, const void* v2); - - GenericVector font_ids; - int32_t unichar_id; -}; - -// A Shape is a collection of unichar-ids and a list of fonts associated with -// each, organized as a vector of UnicharAndFonts. Conceptually a Shape is -// a classifiable unit, and represents a group of characters or parts of -// characters that have a similar or identical shape. Shapes/ShapeTables may -// be organized hierarchically from identical shapes at the leaves to vaguely -// similar shapes near the root. -class Shape { - public: - Shape() : destination_index_(-1) {} - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - bool DeSerialize(TFile* fp); - - int destination_index() const { - return destination_index_; - } - void set_destination_index(int index) { - destination_index_ = index; - } - int size() const { - return unichars_.size(); - } - // Returns a UnicharAndFonts entry for the given index, which must be - // in the range [0, size()). - const UnicharAndFonts& operator[](int index) const { - return unichars_[index]; - } - // Sets the unichar_id of the given index to the new unichar_id. - void SetUnicharId(int index, int unichar_id) { - unichars_[index].unichar_id = unichar_id; - } - // Adds a font_id for the given unichar_id. If the unichar_id is not - // in the shape, it is added. - void AddToShape(int unichar_id, int font_id); - // Adds everything in other to this. - void AddShape(const Shape& other); - // Returns true if the shape contains the given unichar_id, font_id pair. - bool ContainsUnicharAndFont(int unichar_id, int font_id) const; - // Returns true if the shape contains the given unichar_id, ignoring font. - bool ContainsUnichar(int unichar_id) const; - // Returns true if the shape contains the given font, ignoring unichar_id. - bool ContainsFont(int font_id) const; - // Returns true if the shape contains the given font properties, ignoring - // unichar_id. - bool ContainsFontProperties(const FontInfoTable& font_table, - uint32_t properties) const; - // Returns true if the shape contains multiple different font properties, - // ignoring unichar_id. - bool ContainsMultipleFontProperties(const FontInfoTable& font_table) const; - // Returns true if this shape is equal to other (ignoring order of unichars - // and fonts). - bool operator==(const Shape& other) const; - // Returns true if this is a subset (including equal) of other. - bool IsSubsetOf(const Shape& other) const; - // Returns true if the lists of unichar ids are the same in this and other, - // ignoring fonts. - // NOT const, as it will sort the unichars on demand. - bool IsEqualUnichars(Shape* other); - - private: - // Sorts the unichars_ vector by unichar. - void SortUnichars(); - - // Flag indicates that the unichars are sorted, allowing faster set - // operations with another shape. - bool unichars_sorted_; - // If this Shape is part of a ShapeTable the destiation_index_ is the index - // of some other shape in the ShapeTable with which this shape is merged. - int destination_index_; - // Array of unichars, each with a set of fonts. Each unichar has at most - // one entry in the vector. - GenericVector unichars_; -}; - -// ShapeTable is a class to encapsulate the triple indirection that is -// used here. -// ShapeTable is a vector of shapes. -// Each shape is a vector of UnicharAndFonts representing the set of unichars -// that the shape represents. -// Each UnicharAndFonts also lists the fonts of the unichar_id that were -// mapped to the shape during training. -class ShapeTable { - public: - ShapeTable(); - // The UNICHARSET reference supplied here, or in set_unicharset below must - // exist for the entire life of the ShapeTable. It is used only by DebugStr. - explicit ShapeTable(const UNICHARSET& unicharset); - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - bool DeSerialize(TFile* fp); - - // Accessors. - int NumShapes() const { - return shape_table_.size(); - } - const UNICHARSET& unicharset() const { - return *unicharset_; - } - // Returns the number of fonts used in this ShapeTable, computing it if - // necessary. - int NumFonts() const; - // Shapetable takes a pointer to the UNICHARSET, so it must persist for the - // entire life of the ShapeTable. - void set_unicharset(const UNICHARSET& unicharset) { - unicharset_ = &unicharset; - } - // Re-indexes the class_ids in the shapetable according to the given map. - // Useful in conjunction with set_unicharset. - void ReMapClassIds(const GenericVector& unicharset_map); - // Returns a string listing the classes/fonts in a shape. - STRING DebugStr(int shape_id) const; - // Returns a debug string summarizing the table. - STRING SummaryStr() const; - - // Adds a new shape starting with the given unichar_id and font_id. - // Returns the assigned index. - int AddShape(int unichar_id, int font_id); - // Adds a copy of the given shape unless it is already present. - // Returns the assigned index or index of existing shape if already present. - int AddShape(const Shape& other); - // Removes the shape given by the shape index. All indices above are changed! - void DeleteShape(int shape_id); - // Adds a font_id to the given existing shape index for the given - // unichar_id. If the unichar_id is not in the shape, it is added. - void AddToShape(int shape_id, int unichar_id, int font_id); - // Adds the given shape to the existing shape with the given index. - void AddShapeToShape(int shape_id, const Shape& other); - // Returns the id of the shape that contains the given unichar and font. - // If not found, returns -1. - // If font_id < 0, the font_id is ignored and the first shape that matches - // the unichar_id is returned. - int FindShape(int unichar_id, int font_id) const; - // Returns the first unichar_id and font_id in the given shape. - void GetFirstUnicharAndFont(int shape_id, - int* unichar_id, int* font_id) const; - - // Accessors for the Shape with the given shape_id. - const Shape& GetShape(int shape_id) const { - return *shape_table_[shape_id]; - } - Shape* MutableShape(int shape_id) { - return shape_table_[shape_id]; - } - - // Expands all the classes/fonts in the shape individually to build - // a ShapeTable. - int BuildFromShape(const Shape& shape, const ShapeTable& master_shapes); - - // Returns true if the shapes are already merged. - bool AlreadyMerged(int shape_id1, int shape_id2) const; - // Returns true if any shape contains multiple unichars. - bool AnyMultipleUnichars() const; - // Returns the maximum number of unichars over all shapes. - int MaxNumUnichars() const; - // Merges shapes with a common unichar over the [start, end) interval. - // Assumes single unichar per shape. - void ForceFontMerges(int start, int end); - // Returns the number of unichars in the master shape. - int MasterUnicharCount(int shape_id) const; - // Returns the sum of the font counts in the master shape. - int MasterFontCount(int shape_id) const; - // Returns the number of unichars that would result from merging the shapes. - int MergedUnicharCount(int shape_id1, int shape_id2) const; - // Merges two shape_ids, leaving shape_id2 marked as merged. - void MergeShapes(int shape_id1, int shape_id2); - // Swaps two shape_ids. - void SwapShapes(int shape_id1, int shape_id2); - // Appends the master shapes from other to this. - // Used to create a clean ShapeTable from a merged one, or to create a - // copy of a ShapeTable. - // If not nullptr, shape_map is set to map other shape_ids to this's shape_ids. - void AppendMasterShapes(const ShapeTable& other, - GenericVector* shape_map); - // Returns the number of master shapes remaining after merging. - int NumMasterShapes() const; - // Returns the destination of this shape, (if merged), taking into account - // the fact that the destination may itself have been merged. - // For a non-merged shape, returns the input shape_id. - int MasterDestinationIndex(int shape_id) const; - - // Returns false if the unichars in neither shape is a subset of the other.. - bool SubsetUnichar(int shape_id1, int shape_id2) const; - // Returns false if the unichars in neither shape is a subset of the other.. - bool MergeSubsetUnichar(int merge_id1, int merge_id2, int shape_id) const; - // Returns true if the unichar sets are equal between the shapes. - bool EqualUnichars(int shape_id1, int shape_id2) const; - bool MergeEqualUnichars(int merge_id1, int merge_id2, int shape_id) const; - // Returns true if there is a common unichar between the shapes. - bool CommonUnichars(int shape_id1, int shape_id2) const; - // Returns true if there is a common font id between the shapes. - bool CommonFont(int shape_id1, int shape_id2) const; - - // Adds the unichars of the given shape_id to the vector of results. Any - // unichar_id that is already present just has the fonts added to the - // font set for that result without adding a new entry in the vector. - // NOTE: it is assumed that the results are given to this function in order - // of decreasing rating. - // The unichar_map vector indicates the index of the results entry containing - // each unichar, or -1 if the unichar is not yet included in results. - void AddShapeToResults(const ShapeRating& shape_rating, - GenericVector* unichar_map, - GenericVector* results) const; - - private: - // Adds the given unichar_id to the results if needed, updating unichar_map - // and returning the index of unichar in results. - int AddUnicharToResults(int unichar_id, float rating, - GenericVector* unichar_map, - GenericVector* results) const; - - // Pointer to a provided unicharset used only by the Debugstr member. - const UNICHARSET* unicharset_; - // Vector of pointers to the Shapes in this ShapeTable. - PointerVector shape_table_; - - // Cached data calculated on demand. - mutable int num_fonts_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_CLASSIFY_SHAPETABLE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/tessclassifier.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/tessclassifier.cpp deleted file mode 100644 index df7866be..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/tessclassifier.cpp +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: tessclassifier.cpp -// Description: Tesseract implementation of a ShapeClassifier. -// Author: Ray Smith -// Created: Tue Nov 22 14:16:25 PST 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "tessclassifier.h" - -#include "classify.h" -#include "trainingsample.h" - -namespace tesseract { - -// Classifies the given [training] sample, writing to results. -// See ShapeClassifier for a full description. -int TessClassifier::UnicharClassifySample( - const TrainingSample& sample, Pix* page_pix, int debug, - UNICHAR_ID keep_this, GenericVector* results) { - const int old_matcher_level = classify_->matcher_debug_level; - const int old_matcher_flags = classify_->matcher_debug_flags; - const int old_classify_level = classify_->classify_debug_level; - if (debug) { - // Explicitly set values of various control parameters to generate debug - // output if required, restoring the old values after classifying. - classify_->matcher_debug_level.set_value(2); - classify_->matcher_debug_flags.set_value(25); - classify_->classify_debug_level.set_value(3); - } - classify_->CharNormTrainingSample(pruner_only_, keep_this, sample, results); - if (debug) { - classify_->matcher_debug_level.set_value(old_matcher_level); - classify_->matcher_debug_flags.set_value(old_matcher_flags); - classify_->classify_debug_level.set_value(old_classify_level); - } - return results->size(); -} - -// Provides access to the ShapeTable that this classifier works with. -const ShapeTable* TessClassifier::GetShapeTable() const { - return classify_->shape_table(); -} -// Provides access to the UNICHARSET that this classifier works with. -// Only needs to be overridden if GetShapeTable() can return nullptr. -const UNICHARSET& TessClassifier::GetUnicharset() const { - return classify_->unicharset; -} - -// Displays classification as the given shape_id. Creates as many windows -// as it feels fit, using index as a guide for placement. Adds any created -// windows to the windows output and returns a new index that may be used -// by any subsequent classifiers. Caller waits for the user to view and -// then destroys the windows by clearing the vector. -int TessClassifier::DisplayClassifyAs( - const TrainingSample& sample, Pix* page_pix, int unichar_id, int index, - PointerVector* windows) { - int shape_id = unichar_id; - // TODO(rays) Fix this so it works with both flat and real shapetables. - // if (GetShapeTable() != nullptr) - // shape_id = BestShapeForUnichar(sample, page_pix, unichar_id, nullptr); - if (shape_id < 0) return index; - if (UnusedClassIdIn(classify_->PreTrainedTemplates, shape_id)) { - tprintf("No built-in templates for class/shape %d\n", shape_id); - return index; - } - classify_->ShowBestMatchFor(shape_id, sample.features(), - sample.num_features()); - return index; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/tessclassifier.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/tessclassifier.h deleted file mode 100644 index b72e90a1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/tessclassifier.h +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -/////////////////////////////////////////////////////////////////////// -// File: tessclassifier.h -// Description: Tesseract implementation of a ShapeClassifier. -// Author: Ray Smith -// Created: Tue Nov 22 14:10:45 PST 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef THIRD_PARTY_TESSERACT_CLASSIFY_TESSCLASSIFIER_H_ -#define THIRD_PARTY_TESSERACT_CLASSIFY_TESSCLASSIFIER_H_ - -#include "shapeclassifier.h" - -namespace tesseract { - -class Classify; -class TrainingSample; - -// Tesseract implementation of a ShapeClassifier. -// Due to limitations in the content of TrainingSample, this currently -// only works for the static classifier and only works if the ShapeTable -// in classify is not nullptr. -class TessClassifier : public ShapeClassifier { - public: - TessClassifier(bool pruner_only, tesseract::Classify* classify) - : pruner_only_(pruner_only), classify_(classify) {} - virtual ~TessClassifier() = default; - - // Classifies the given [training] sample, writing to results. - // See ShapeClassifier for a full description. - virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix, - int debug, UNICHAR_ID keep_this, - GenericVector* results); - // Provides access to the ShapeTable that this classifier works with. - virtual const ShapeTable* GetShapeTable() const; - // Provides access to the UNICHARSET that this classifier works with. - // Only needs to be overridden if GetShapeTable() can return nullptr. - virtual const UNICHARSET& GetUnicharset() const; - - // Displays classification as the given shape_id. Creates as many windows - // as it feels fit, using index as a guide for placement. Adds any created - // windows to the windows output and returns a new index that may be used - // by any subsequent classifiers. Caller waits for the user to view and - // then destroys the windows by clearing the vector. - virtual int DisplayClassifyAs(const TrainingSample& sample, Pix* page_pix, - int unichar_id, int index, - PointerVector* windows); - - private: - // Indicates that this classifier is to use just the ClassPruner, or the - // full classifier if false. - bool pruner_only_; - // Borrowed pointer to the actual Tesseract classifier. - tesseract::Classify* classify_; -}; - - -} // namespace tesseract - - - - - -#endif /* THIRD_PARTY_TESSERACT_CLASSIFY_TESSCLASSIFIER_H_ */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/trainingsample.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/trainingsample.cpp deleted file mode 100644 index 65fcb3ff..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/trainingsample.cpp +++ /dev/null @@ -1,345 +0,0 @@ -// Copyright 2010 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "trainingsample.h" - -#include -#include "allheaders.h" -#include "helpers.h" -#include "intfeaturemap.h" -#include "normfeat.h" -#include "shapetable.h" - -namespace tesseract { - -ELISTIZE(TrainingSample) - -// Center of randomizing operations. -const int kRandomizingCenter = 128; - -// Randomizing factors. -const int TrainingSample::kYShiftValues[kSampleYShiftSize] = { - 6, 3, -3, -6, 0 -}; -const double TrainingSample::kScaleValues[kSampleScaleSize] = { - 1.0625, 0.9375, 1.0 -}; - -TrainingSample::~TrainingSample() { - delete [] features_; - delete [] micro_features_; -} - -// WARNING! Serialize/DeSerialize do not save/restore the "cache" data -// members, which is mostly the mapped features, and the weight. -// It is assumed these can all be reconstructed from what is saved. -// Writes to the given file. Returns false in case of error. -bool TrainingSample::Serialize(FILE* fp) const { - if (fwrite(&class_id_, sizeof(class_id_), 1, fp) != 1) return false; - if (fwrite(&font_id_, sizeof(font_id_), 1, fp) != 1) return false; - if (fwrite(&page_num_, sizeof(page_num_), 1, fp) != 1) return false; - if (!bounding_box_.Serialize(fp)) return false; - if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) return false; - if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) - return false; - if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1) - return false; - if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_) - return false; - if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_, - fp) != num_micro_features_) - return false; - if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != - kNumCNParams) return false; - if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) - return false; - return true; -} - -// Creates from the given file. Returns nullptr in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -TrainingSample* TrainingSample::DeSerializeCreate(bool swap, FILE* fp) { - TrainingSample* sample = new TrainingSample; - if (sample->DeSerialize(swap, fp)) return sample; - delete sample; - return nullptr; -} - -// Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool TrainingSample::DeSerialize(bool swap, FILE* fp) { - if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) return false; - if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) return false; - if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) return false; - if (!bounding_box_.DeSerialize(swap, fp)) return false; - if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false; - if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) - return false; - if (fread(&outline_length_, sizeof(outline_length_), 1, fp) != 1) - return false; - if (swap) { - ReverseN(&class_id_, sizeof(class_id_)); - ReverseN(&num_features_, sizeof(num_features_)); - ReverseN(&num_micro_features_, sizeof(num_micro_features_)); - ReverseN(&outline_length_, sizeof(outline_length_)); - } - // Arbitrarily limit the number of elements to protect against bad data. - if (num_features_ > UINT16_MAX) return false; - if (num_micro_features_ > UINT16_MAX) return false; - delete [] features_; - features_ = new INT_FEATURE_STRUCT[num_features_]; - if (fread(features_, sizeof(*features_), num_features_, fp) - != num_features_) - return false; - delete [] micro_features_; - micro_features_ = new MicroFeature[num_micro_features_]; - if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_, - fp) != num_micro_features_) - return false; - if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != - kNumCNParams) return false; - if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) - return false; - return true; -} - -// Saves the given features into a TrainingSample. -TrainingSample* TrainingSample::CopyFromFeatures( - const INT_FX_RESULT_STRUCT& fx_info, - const TBOX& bounding_box, - const INT_FEATURE_STRUCT* features, - int num_features) { - TrainingSample* sample = new TrainingSample; - sample->num_features_ = num_features; - sample->features_ = new INT_FEATURE_STRUCT[num_features]; - sample->outline_length_ = fx_info.Length; - memcpy(sample->features_, features, num_features * sizeof(features[0])); - sample->geo_feature_[GeoBottom] = bounding_box.bottom(); - sample->geo_feature_[GeoTop] = bounding_box.top(); - sample->geo_feature_[GeoWidth] = bounding_box.width(); - - // Generate the cn_feature_ from the fx_info. - sample->cn_feature_[CharNormY] = - MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset); - sample->cn_feature_[CharNormLength] = - MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION; - sample->cn_feature_[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx; - sample->cn_feature_[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry; - - sample->features_are_indexed_ = false; - sample->features_are_mapped_ = false; - return sample; -} - -// Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining. -FEATURE_STRUCT* TrainingSample::GetCNFeature() const { - FEATURE feature = NewFeature(&CharNormDesc); - for (int i = 0; i < kNumCNParams; ++i) - feature->Params[i] = cn_feature_[i]; - return feature; -} - -// Constructs and returns a copy randomized by the method given by -// the randomizer index. If index is out of [0, kSampleRandomSize) then -// an exact copy is returned. -TrainingSample* TrainingSample::RandomizedCopy(int index) const { - TrainingSample* sample = Copy(); - if (index >= 0 && index < kSampleRandomSize) { - ++index; // Remove the first combination. - const int yshift = kYShiftValues[index / kSampleScaleSize]; - double scaling = kScaleValues[index % kSampleScaleSize]; - for (uint32_t i = 0; i < num_features_; ++i) { - double result = (features_[i].X - kRandomizingCenter) * scaling; - result += kRandomizingCenter; - sample->features_[i].X = ClipToRange(result + 0.5, 0, UINT8_MAX); - result = (features_[i].Y - kRandomizingCenter) * scaling; - result += kRandomizingCenter + yshift; - sample->features_[i].Y = ClipToRange(result + 0.5, 0, UINT8_MAX); - } - } - return sample; -} - -// Constructs and returns an exact copy. -TrainingSample* TrainingSample::Copy() const { - TrainingSample* sample = new TrainingSample; - sample->class_id_ = class_id_; - sample->font_id_ = font_id_; - sample->weight_ = weight_; - sample->sample_index_ = sample_index_; - sample->num_features_ = num_features_; - if (num_features_ > 0) { - sample->features_ = new INT_FEATURE_STRUCT[num_features_]; - memcpy(sample->features_, features_, num_features_ * sizeof(features_[0])); - } - sample->num_micro_features_ = num_micro_features_; - if (num_micro_features_ > 0) { - sample->micro_features_ = new MicroFeature[num_micro_features_]; - memcpy(sample->micro_features_, micro_features_, - num_micro_features_ * sizeof(micro_features_[0])); - } - memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams); - memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount); - return sample; -} - -// Extracts the needed information from the CHAR_DESC_STRUCT. -void TrainingSample::ExtractCharDesc(int int_feature_type, - int micro_type, - int cn_type, - int geo_type, - CHAR_DESC_STRUCT* char_desc) { - // Extract the INT features. - delete[] features_; - FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type]; - if (char_features == nullptr) { - tprintf("Error: no features to train on of type %s\n", - kIntFeatureType); - num_features_ = 0; - features_ = nullptr; - } else { - num_features_ = char_features->NumFeatures; - features_ = new INT_FEATURE_STRUCT[num_features_]; - for (uint32_t f = 0; f < num_features_; ++f) { - features_[f].X = - static_cast(char_features->Features[f]->Params[IntX]); - features_[f].Y = - static_cast(char_features->Features[f]->Params[IntY]); - features_[f].Theta = - static_cast(char_features->Features[f]->Params[IntDir]); - features_[f].CP_misses = 0; - } - } - // Extract the Micro features. - delete[] micro_features_; - char_features = char_desc->FeatureSets[micro_type]; - if (char_features == nullptr) { - tprintf("Error: no features to train on of type %s\n", - kMicroFeatureType); - num_micro_features_ = 0; - micro_features_ = nullptr; - } else { - num_micro_features_ = char_features->NumFeatures; - micro_features_ = new MicroFeature[num_micro_features_]; - for (uint32_t f = 0; f < num_micro_features_; ++f) { - for (int d = 0; d < MFCount; ++d) { - micro_features_[f][d] = char_features->Features[f]->Params[d]; - } - } - } - // Extract the CN feature. - char_features = char_desc->FeatureSets[cn_type]; - if (char_features == nullptr) { - tprintf("Error: no CN feature to train on.\n"); - } else { - ASSERT_HOST(char_features->NumFeatures == 1); - cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY]; - cn_feature_[CharNormLength] = - char_features->Features[0]->Params[CharNormLength]; - cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx]; - cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy]; - } - // Extract the Geo feature. - char_features = char_desc->FeatureSets[geo_type]; - if (char_features == nullptr) { - tprintf("Error: no Geo feature to train on.\n"); - } else { - ASSERT_HOST(char_features->NumFeatures == 1); - geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom]; - geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop]; - geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth]; - } - features_are_indexed_ = false; - features_are_mapped_ = false; -} - -// Sets the mapped_features_ from the features_ using the provided -// feature_space to the indexed versions of the features. -void TrainingSample::IndexFeatures(const IntFeatureSpace& feature_space) { - GenericVector indexed_features; - feature_space.IndexAndSortFeatures(features_, num_features_, - &mapped_features_); - features_are_indexed_ = true; - features_are_mapped_ = false; -} - -// Sets the mapped_features_ from the features using the provided -// feature_map. -void TrainingSample::MapFeatures(const IntFeatureMap& feature_map) { - GenericVector indexed_features; - feature_map.feature_space().IndexAndSortFeatures(features_, num_features_, - &indexed_features); - feature_map.MapIndexedFeatures(indexed_features, &mapped_features_); - features_are_indexed_ = false; - features_are_mapped_ = true; -} - -// Returns a pix representing the sample. (Int features only.) -Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const { - Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1); - for (uint32_t f = 0; f < num_features_; ++f) { - int start_x = features_[f].X; - int start_y = kIntFeatureExtent - features_[f].Y; - double dx = cos((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI); - double dy = -sin((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI); - for (int i = 0; i <= 5; ++i) { - int x = static_cast(start_x + dx * i); - int y = static_cast(start_y + dy * i); - if (x >= 0 && x < 256 && y >= 0 && y < 256) - pixSetPixel(pix, x, y, 1); - } - } - if (unicharset != nullptr) - pixSetText(pix, unicharset->id_to_unichar(class_id_)); - return pix; -} - -// Displays the features in the given window with the given color. -void TrainingSample::DisplayFeatures(ScrollView::Color color, - ScrollView* window) const { - #ifndef GRAPHICS_DISABLED - for (uint32_t f = 0; f < num_features_; ++f) { - RenderIntFeature(window, &features_[f], color); - } - #endif // GRAPHICS_DISABLED -} - -// Returns a pix of the original sample image. The pix is padded all round -// by padding wherever possible. -// The returned Pix must be pixDestroyed after use. -// If the input page_pix is nullptr, nullptr is returned. -Pix* TrainingSample::GetSamplePix(int padding, Pix* page_pix) const { - if (page_pix == nullptr) - return nullptr; - int page_width = pixGetWidth(page_pix); - int page_height = pixGetHeight(page_pix); - TBOX padded_box = bounding_box(); - padded_box.pad(padding, padding); - // Clip the padded_box to the limits of the page - TBOX page_box(0, 0, page_width, page_height); - padded_box &= page_box; - Box* box = boxCreate(page_box.left(), page_height - page_box.top(), - page_box.width(), page_box.height()); - Pix* sample_pix = pixClipRectangle(page_pix, box, nullptr); - boxDestroy(&box); - return sample_pix; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/trainingsample.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/trainingsample.h deleted file mode 100644 index 0964e2be..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/trainingsample.h +++ /dev/null @@ -1,250 +0,0 @@ -// Copyright 2010 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H_ -#define TESSERACT_TRAINING_TRAININGSAMPLE_H_ - -#include "elst.h" -#include "featdefs.h" -#include "intfx.h" -#include "intmatcher.h" -#include "matrix.h" -#include "mf.h" -#include "picofeat.h" -#include "shapetable.h" -#include "unicharset.h" - -struct Pix; - -namespace tesseract { - -class IntFeatureMap; -class IntFeatureSpace; -class ShapeTable; - -// Number of elements of cn_feature_. -static const int kNumCNParams = 4; -// Number of ways to shift the features when randomizing. -static const int kSampleYShiftSize = 5; -// Number of ways to scale the features when randomizing. -static const int kSampleScaleSize = 3; -// Total number of different ways to manipulate the features when randomizing. -// The first and last combinations are removed to avoid an excessive -// top movement (first) and an identity transformation (last). -// WARNING: To avoid patterned duplication of samples, be sure to keep -// kSampleRandomSize prime! -// Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3) -// kSampleRandomSize is 13, which is prime. -static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2; -// ASSERT_IS_PRIME(kSampleRandomSize) !! - -class TrainingSample : public ELIST_LINK { - public: - TrainingSample() - : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0), - num_features_(0), num_micro_features_(0), outline_length_(0), - features_(nullptr), micro_features_(nullptr), weight_(1.0), - max_dist_(0.0), sample_index_(0), - features_are_indexed_(false), features_are_mapped_(false), - is_error_(false) { - } - ~TrainingSample(); - - // Saves the given features into a TrainingSample. The features are copied, - // so may be deleted afterwards. Delete the return value after use. - static TrainingSample* CopyFromFeatures(const INT_FX_RESULT_STRUCT& fx_info, - const TBOX& bounding_box, - const INT_FEATURE_STRUCT* features, - int num_features); - // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining. - FEATURE_STRUCT* GetCNFeature() const; - // Constructs and returns a copy "randomized" by the method given by - // the randomizer index. If index is out of [0, kSampleRandomSize) then - // an exact copy is returned. - TrainingSample* RandomizedCopy(int index) const; - // Constructs and returns an exact copy. - TrainingSample* Copy() const; - - // WARNING! Serialize/DeSerialize do not save/restore the "cache" data - // members, which is mostly the mapped features, and the weight. - // It is assumed these can all be reconstructed from what is saved. - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Creates from the given file. Returns nullptr in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - static TrainingSample* DeSerializeCreate(bool swap, FILE* fp); - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, FILE* fp); - - // Extracts the needed information from the CHAR_DESC_STRUCT. - void ExtractCharDesc(int feature_type, int micro_type, - int cn_type, int geo_type, - CHAR_DESC_STRUCT* char_desc); - - // Sets the mapped_features_ from the features_ using the provided - // feature_space to the indexed versions of the features. - void IndexFeatures(const IntFeatureSpace& feature_space); - // Sets the mapped_features_ from the features_ using the provided - // feature_map. - void MapFeatures(const IntFeatureMap& feature_map); - - // Returns a pix representing the sample. (Int features only.) - Pix* RenderToPix(const UNICHARSET* unicharset) const; - // Displays the features in the given window with the given color. - void DisplayFeatures(ScrollView::Color color, ScrollView* window) const; - - // Returns a pix of the original sample image. The pix is padded all round - // by padding wherever possible. - // The returned Pix must be pixDestroyed after use. - // If the input page_pix is nullptr, nullptr is returned. - Pix* GetSamplePix(int padding, Pix* page_pix) const; - - // Accessors. - UNICHAR_ID class_id() const { - return class_id_; - } - void set_class_id(int id) { - class_id_ = id; - } - int font_id() const { - return font_id_; - } - void set_font_id(int id) { - font_id_ = id; - } - int page_num() const { - return page_num_; - } - void set_page_num(int page) { - page_num_ = page; - } - const TBOX& bounding_box() const { - return bounding_box_; - } - void set_bounding_box(const TBOX& box) { - bounding_box_ = box; - } - uint32_t num_features() const { - return num_features_; - } - const INT_FEATURE_STRUCT* features() const { - return features_; - } - uint32_t num_micro_features() const { - return num_micro_features_; - } - const MicroFeature* micro_features() const { - return micro_features_; - } - int outline_length() const { - return outline_length_; - } - float cn_feature(int index) const { - return cn_feature_[index]; - } - int geo_feature(int index) const { - return geo_feature_[index]; - } - double weight() const { - return weight_; - } - void set_weight(double value) { - weight_ = value; - } - double max_dist() const { - return max_dist_; - } - void set_max_dist(double value) { - max_dist_ = value; - } - int sample_index() const { - return sample_index_; - } - void set_sample_index(int value) { - sample_index_ = value; - } - bool features_are_mapped() const { - return features_are_mapped_; - } - const GenericVector& mapped_features() const { - ASSERT_HOST(features_are_mapped_); - return mapped_features_; - } - const GenericVector& indexed_features() const { - ASSERT_HOST(features_are_indexed_); - return mapped_features_; - } - bool is_error() const { - return is_error_; - } - void set_is_error(bool value) { - is_error_ = value; - } - - private: - // Unichar id that this sample represents. There obviously must be a - // reference UNICHARSET somewhere. Usually in TrainingSampleSet. - UNICHAR_ID class_id_; - // Font id in which this sample was printed. Refers to a fontinfo_table_ in - // MasterTrainer. - int font_id_; - // Number of page that the sample came from. - int page_num_; - // Bounding box of sample in original image. - TBOX bounding_box_; - // Number of INT_FEATURE_STRUCT in features_ array. - uint32_t num_features_; - // Number of MicroFeature in micro_features_ array. - uint32_t num_micro_features_; - // Total length of outline in the baseline normalized coordinate space. - // See comment in WERD_RES class definition for a discussion of coordinate - // spaces. - int outline_length_; - // Array of features. - INT_FEATURE_STRUCT* features_; - // Array of features. - MicroFeature* micro_features_; - // The one and only CN feature. Indexed by NORM_PARAM_NAME enum. - float cn_feature_[kNumCNParams]; - // The one and only geometric feature. (Aims at replacing cn_feature_). - // Indexed by GeoParams enum in picofeat.h - int geo_feature_[GeoCount]; - - // Non-serialized cache data. - // Weight used for boosting training. - double weight_; - // Maximum distance to other samples of same class/font used in computing - // the canonical sample. - double max_dist_; - // Global index of this sample. - int sample_index_; - // Indexed/mapped features, as indicated by the bools below. - GenericVector mapped_features_; - bool features_are_indexed_; - bool features_are_mapped_; - // True if the last classification was an error by the current definition. - bool is_error_; - - // Randomizing factors. - static const int kYShiftValues[kSampleYShiftSize]; - static const double kScaleValues[kSampleScaleSize]; -}; - -ELISTIZEH(TrainingSample) - -} // namespace tesseract - -#endif // TESSERACT_TRAINING_TRAININGSAMPLE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/trainingsampleset.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/trainingsampleset.cpp deleted file mode 100644 index 620b52fa..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/trainingsampleset.cpp +++ /dev/null @@ -1,763 +0,0 @@ -// Copyright 2010 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "trainingsampleset.h" -#include "allheaders.h" -#include "boxread.h" -#include "fontinfo.h" -#include "indexmapbidi.h" -#include "intfeaturedist.h" -#include "intfeaturemap.h" -#include "intfeaturespace.h" -#include "shapetable.h" -#include "trainingsample.h" -#include "unicity_table.h" - -#include - -namespace tesseract { - -const int kTestChar = -1; // 37; -// Max number of distances to compute the squared way -const int kSquareLimit = 25; -// Prime numbers for subsampling distances. -const int kPrime1 = 17; -const int kPrime2 = 13; - -TrainingSampleSet::FontClassInfo::FontClassInfo() - : num_raw_samples(0), canonical_sample(-1), canonical_dist(0.0f) { -} - -// Writes to the given file. Returns false in case of error. -bool TrainingSampleSet::FontClassInfo::Serialize(FILE* fp) const { - if (fwrite(&num_raw_samples, sizeof(num_raw_samples), 1, fp) != 1) - return false; - if (fwrite(&canonical_sample, sizeof(canonical_sample), 1, fp) != 1) - return false; - if (fwrite(&canonical_dist, sizeof(canonical_dist), 1, fp) != 1) return false; - if (!samples.Serialize(fp)) return false; - return true; -} -// Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool TrainingSampleSet::FontClassInfo::DeSerialize(bool swap, FILE* fp) { - if (fread(&num_raw_samples, sizeof(num_raw_samples), 1, fp) != 1) - return false; - if (fread(&canonical_sample, sizeof(canonical_sample), 1, fp) != 1) - return false; - if (fread(&canonical_dist, sizeof(canonical_dist), 1, fp) != 1) return false; - if (!samples.DeSerialize(swap, fp)) return false; - if (swap) { - ReverseN(&num_raw_samples, sizeof(num_raw_samples)); - ReverseN(&canonical_sample, sizeof(canonical_sample)); - ReverseN(&canonical_dist, sizeof(canonical_dist)); - } - return true; -} - -TrainingSampleSet::TrainingSampleSet(const FontInfoTable& font_table) - : num_raw_samples_(0), unicharset_size_(0), - font_class_array_(nullptr), fontinfo_table_(font_table) { -} - -TrainingSampleSet::~TrainingSampleSet() { - delete font_class_array_; -} - -// Writes to the given file. Returns false in case of error. -bool TrainingSampleSet::Serialize(FILE* fp) const { - if (!samples_.Serialize(fp)) return false; - if (!unicharset_.save_to_file(fp)) return false; - if (!font_id_map_.Serialize(fp)) return false; - int8_t not_null = font_class_array_ != nullptr; - if (fwrite(¬_null, sizeof(not_null), 1, fp) != 1) return false; - if (not_null) { - if (!font_class_array_->SerializeClasses(fp)) return false; - } - return true; -} - -// Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool TrainingSampleSet::DeSerialize(bool swap, FILE* fp) { - if (!samples_.DeSerialize(swap, fp)) return false; - num_raw_samples_ = samples_.size(); - if (!unicharset_.load_from_file(fp)) return false; - if (!font_id_map_.DeSerialize(swap, fp)) return false; - delete font_class_array_; - font_class_array_ = nullptr; - int8_t not_null; - if (fread(¬_null, sizeof(not_null), 1, fp) != 1) return false; - if (not_null) { - FontClassInfo empty; - font_class_array_ = new GENERIC_2D_ARRAY(1, 1 , empty); - if (!font_class_array_->DeSerializeClasses(swap, fp)) return false; - } - unicharset_size_ = unicharset_.size(); - return true; -} - -// Load an initial unicharset, or set one up if the file cannot be read. -void TrainingSampleSet::LoadUnicharset(const char* filename) { - if (!unicharset_.load_from_file(filename)) { - tprintf("Failed to load unicharset from file %s\n" - "Building unicharset from scratch...\n", - filename); - unicharset_.clear(); - // Add special characters as they were removed by the clear. - UNICHARSET empty; - unicharset_.AppendOtherUnicharset(empty); - } - unicharset_size_ = unicharset_.size(); -} - -// Adds a character sample to this sample set. -// If the unichar is not already in the local unicharset, it is added. -// Returns the unichar_id of the added sample, from the local unicharset. -int TrainingSampleSet::AddSample(const char* unichar, TrainingSample* sample) { - if (!unicharset_.contains_unichar(unichar)) { - unicharset_.unichar_insert(unichar); - if (unicharset_.size() > MAX_NUM_CLASSES) { - tprintf("Error: Size of unicharset in TrainingSampleSet::AddSample is " - "greater than MAX_NUM_CLASSES\n"); - return -1; - } - } - UNICHAR_ID char_id = unicharset_.unichar_to_id(unichar); - AddSample(char_id, sample); - return char_id; -} - -// Adds a character sample to this sample set with the given unichar_id, -// which must correspond to the local unicharset (in this). -void TrainingSampleSet::AddSample(int unichar_id, TrainingSample* sample) { - sample->set_class_id(unichar_id); - samples_.push_back(sample); - num_raw_samples_ = samples_.size(); - unicharset_size_ = unicharset_.size(); -} - -// Returns the number of samples for the given font,class pair. -// If randomize is true, returns the number of samples accessible -// with randomizing on. (Increases the number of samples if small.) -// OrganizeByFontAndClass must have been already called. -int TrainingSampleSet::NumClassSamples(int font_id, int class_id, - bool randomize) const { - ASSERT_HOST(font_class_array_ != nullptr); - if (font_id < 0 || class_id < 0 || - font_id >= font_id_map_.SparseSize() || class_id >= unicharset_size_) { - // There are no samples because the font or class doesn't exist. - return 0; - } - int font_index = font_id_map_.SparseToCompact(font_id); - if (font_index < 0) - return 0; // The font has no samples. - if (randomize) - return (*font_class_array_)(font_index, class_id).samples.size(); - else - return (*font_class_array_)(font_index, class_id).num_raw_samples; -} - -// Gets a sample by its index. -const TrainingSample* TrainingSampleSet::GetSample(int index) const { - return samples_[index]; -} - -// Gets a sample by its font, class, index. -// OrganizeByFontAndClass must have been already called. -const TrainingSample* TrainingSampleSet::GetSample(int font_id, int class_id, - int index) const { - ASSERT_HOST(font_class_array_ != nullptr); - int font_index = font_id_map_.SparseToCompact(font_id); - if (font_index < 0) return nullptr; - int sample_index = (*font_class_array_)(font_index, class_id).samples[index]; - return samples_[sample_index]; -} - -// Get a sample by its font, class, index. Does not randomize. -// OrganizeByFontAndClass must have been already called. -TrainingSample* TrainingSampleSet::MutableSample(int font_id, int class_id, - int index) { - ASSERT_HOST(font_class_array_ != nullptr); - int font_index = font_id_map_.SparseToCompact(font_id); - if (font_index < 0) return nullptr; - int sample_index = (*font_class_array_)(font_index, class_id).samples[index]; - return samples_[sample_index]; -} - -// Returns a string debug representation of the given sample: -// font, unichar_str, bounding box, page. -STRING TrainingSampleSet::SampleToString(const TrainingSample& sample) const { - STRING boxfile_str; - MakeBoxFileStr(unicharset_.id_to_unichar(sample.class_id()), - sample.bounding_box(), sample.page_num(), &boxfile_str); - return STRING(fontinfo_table_.get(sample.font_id()).name) + " " + boxfile_str; -} - -// Gets the combined set of features used by all the samples of the given -// font/class combination. -const BitVector& TrainingSampleSet::GetCloudFeatures( - int font_id, int class_id) const { - int font_index = font_id_map_.SparseToCompact(font_id); - ASSERT_HOST(font_index >= 0); - return (*font_class_array_)(font_index, class_id).cloud_features; -} -// Gets the indexed features of the canonical sample of the given -// font/class combination. -const GenericVector& TrainingSampleSet::GetCanonicalFeatures( - int font_id, int class_id) const { - int font_index = font_id_map_.SparseToCompact(font_id); - ASSERT_HOST(font_index >= 0); - return (*font_class_array_)(font_index, class_id).canonical_features; -} - -// Returns the distance between the given UniCharAndFonts pair. -// If matched_fonts, only matching fonts, are considered, unless that yields -// the empty set. -// OrganizeByFontAndClass must have been already called. -float TrainingSampleSet::UnicharDistance(const UnicharAndFonts& uf1, - const UnicharAndFonts& uf2, - bool matched_fonts, - const IntFeatureMap& feature_map) { - int num_fonts1 = uf1.font_ids.size(); - int c1 = uf1.unichar_id; - int num_fonts2 = uf2.font_ids.size(); - int c2 = uf2.unichar_id; - double dist_sum = 0.0; - int dist_count = 0; - const bool debug = false; - if (matched_fonts) { - // Compute distances only where fonts match. - for (int i = 0; i < num_fonts1; ++i) { - int f1 = uf1.font_ids[i]; - for (int j = 0; j < num_fonts2; ++j) { - int f2 = uf2.font_ids[j]; - if (f1 == f2) { - dist_sum += ClusterDistance(f1, c1, f2, c2, feature_map); - ++dist_count; - } - } - } - } else if (num_fonts1 * num_fonts2 <= kSquareLimit) { - // Small enough sets to compute all the distances. - for (int i = 0; i < num_fonts1; ++i) { - int f1 = uf1.font_ids[i]; - for (int j = 0; j < num_fonts2; ++j) { - int f2 = uf2.font_ids[j]; - dist_sum += ClusterDistance(f1, c1, f2, c2, feature_map); - if (debug) { - tprintf("Cluster dist %d %d %d %d = %g\n", - f1, c1, f2, c2, - ClusterDistance(f1, c1, f2, c2, feature_map)); - } - ++dist_count; - } - } - } else { - // Subsample distances, using the largest set once, and stepping through - // the smaller set so as to ensure that all the pairs are different. - int increment = kPrime1 != num_fonts2 ? kPrime1 : kPrime2; - int index = 0; - int num_samples = std::max(num_fonts1, num_fonts2); - for (int i = 0; i < num_samples; ++i, index += increment) { - int f1 = uf1.font_ids[i % num_fonts1]; - int f2 = uf2.font_ids[index % num_fonts2]; - if (debug) { - tprintf("Cluster dist %d %d %d %d = %g\n", - f1, c1, f2, c2, ClusterDistance(f1, c1, f2, c2, feature_map)); - } - dist_sum += ClusterDistance(f1, c1, f2, c2, feature_map); - ++dist_count; - } - } - if (dist_count == 0) { - if (matched_fonts) - return UnicharDistance(uf1, uf2, false, feature_map); - return 0.0f; - } - return dist_sum / dist_count; -} - -// Returns the distance between the given pair of font/class pairs. -// Finds in cache or computes and caches. -// OrganizeByFontAndClass must have been already called. -float TrainingSampleSet::ClusterDistance(int font_id1, int class_id1, - int font_id2, int class_id2, - const IntFeatureMap& feature_map) { - ASSERT_HOST(font_class_array_ != nullptr); - int font_index1 = font_id_map_.SparseToCompact(font_id1); - int font_index2 = font_id_map_.SparseToCompact(font_id2); - if (font_index1 < 0 || font_index2 < 0) - return 0.0f; - FontClassInfo& fc_info = (*font_class_array_)(font_index1, class_id1); - if (font_id1 == font_id2) { - // Special case cache for speed. - if (fc_info.unichar_distance_cache.size() == 0) - fc_info.unichar_distance_cache.init_to_size(unicharset_size_, -1.0f); - if (fc_info.unichar_distance_cache[class_id2] < 0) { - // Distance has to be calculated. - float result = ComputeClusterDistance(font_id1, class_id1, - font_id2, class_id2, - feature_map); - fc_info.unichar_distance_cache[class_id2] = result; - // Copy to the symmetric cache entry. - FontClassInfo& fc_info2 = (*font_class_array_)(font_index2, class_id2); - if (fc_info2.unichar_distance_cache.size() == 0) - fc_info2.unichar_distance_cache.init_to_size(unicharset_size_, -1.0f); - fc_info2.unichar_distance_cache[class_id1] = result; - } - return fc_info.unichar_distance_cache[class_id2]; - } else if (class_id1 == class_id2) { - // Another special-case cache for equal class-id. - if (fc_info.font_distance_cache.size() == 0) - fc_info.font_distance_cache.init_to_size(font_id_map_.CompactSize(), - -1.0f); - if (fc_info.font_distance_cache[font_index2] < 0) { - // Distance has to be calculated. - float result = ComputeClusterDistance(font_id1, class_id1, - font_id2, class_id2, - feature_map); - fc_info.font_distance_cache[font_index2] = result; - // Copy to the symmetric cache entry. - FontClassInfo& fc_info2 = (*font_class_array_)(font_index2, class_id2); - if (fc_info2.font_distance_cache.size() == 0) - fc_info2.font_distance_cache.init_to_size(font_id_map_.CompactSize(), - -1.0f); - fc_info2.font_distance_cache[font_index1] = result; - } - return fc_info.font_distance_cache[font_index2]; - } - // Both font and class are different. Linear search for class_id2/font_id2 - // in what is a hopefully short list of distances. - int cache_index = 0; - while (cache_index < fc_info.distance_cache.size() && - (fc_info.distance_cache[cache_index].unichar_id != class_id2 || - fc_info.distance_cache[cache_index].font_id != font_id2)) - ++cache_index; - if (cache_index == fc_info.distance_cache.size()) { - // Distance has to be calculated. - float result = ComputeClusterDistance(font_id1, class_id1, - font_id2, class_id2, - feature_map); - FontClassDistance fc_dist = { class_id2, font_id2, result }; - fc_info.distance_cache.push_back(fc_dist); - // Copy to the symmetric cache entry. We know it isn't there already, as - // we always copy to the symmetric entry. - FontClassInfo& fc_info2 = (*font_class_array_)(font_index2, class_id2); - fc_dist.unichar_id = class_id1; - fc_dist.font_id = font_id1; - fc_info2.distance_cache.push_back(fc_dist); - } - return fc_info.distance_cache[cache_index].distance; -} - -// Computes the distance between the given pair of font/class pairs. -float TrainingSampleSet::ComputeClusterDistance( - int font_id1, int class_id1, int font_id2, int class_id2, - const IntFeatureMap& feature_map) const { - int dist = ReliablySeparable(font_id1, class_id1, font_id2, class_id2, - feature_map, false); - dist += ReliablySeparable(font_id2, class_id2, font_id1, class_id1, - feature_map, false); - int denominator = GetCanonicalFeatures(font_id1, class_id1).size(); - denominator += GetCanonicalFeatures(font_id2, class_id2).size(); - return static_cast(dist) / denominator; -} - -// Helper to add a feature and its near neighbors to the good_features. -// levels indicates how many times to compute the offset features of what is -// already there. This is done by iteration rather than recursion. -static void AddNearFeatures(const IntFeatureMap& feature_map, int f, int levels, - GenericVector* good_features) { - int prev_num_features = 0; - good_features->push_back(f); - int num_features = 1; - for (int level = 0; level < levels; ++level) { - for (int i = prev_num_features; i < num_features; ++i) { - int feature = (*good_features)[i]; - for (int dir = -kNumOffsetMaps; dir <= kNumOffsetMaps; ++dir) { - if (dir == 0) continue; - int f1 = feature_map.OffsetFeature(feature, dir); - if (f1 >= 0) { - good_features->push_back(f1); - } - } - } - prev_num_features = num_features; - num_features = good_features->size(); - } -} - -// Returns the number of canonical features of font/class 2 for which -// neither the feature nor any of its near neighbors occurs in the cloud -// of font/class 1. Each such feature is a reliable separation between -// the classes, ASSUMING that the canonical sample is sufficiently -// representative that every sample has a feature near that particular -// feature. To check that this is so on the fly would be prohibitively -// expensive, but it might be possible to pre-qualify the canonical features -// to include only those for which this assumption is true. -// ComputeCanonicalFeatures and ComputeCloudFeatures must have been called -// first, or the results will be nonsense. -int TrainingSampleSet::ReliablySeparable(int font_id1, int class_id1, - int font_id2, int class_id2, - const IntFeatureMap& feature_map, - bool thorough) const { - int result = 0; - const TrainingSample* sample2 = GetCanonicalSample(font_id2, class_id2); - if (sample2 == nullptr) - return 0; // There are no canonical features. - const GenericVector& canonical2 = GetCanonicalFeatures(font_id2, - class_id2); - const BitVector& cloud1 = GetCloudFeatures(font_id1, class_id1); - if (cloud1.size() == 0) - return canonical2.size(); // There are no cloud features. - - // Find a canonical2 feature that is not in cloud1. - for (int f = 0; f < canonical2.size(); ++f) { - const int feature = canonical2[f]; - if (cloud1[feature]) - continue; - // Gather the near neighbours of f. - GenericVector good_features; - AddNearFeatures(feature_map, feature, 1, &good_features); - // Check that none of the good_features are in the cloud. - int i; - for (i = 0; i < good_features.size(); ++i) { - int good_f = good_features[i]; - if (cloud1[good_f]) { - break; - } - } - if (i < good_features.size()) - continue; // Found one in the cloud. - ++result; - } - return result; -} - -// Returns the total index of the requested sample. -// OrganizeByFontAndClass must have been already called. -int TrainingSampleSet::GlobalSampleIndex(int font_id, int class_id, - int index) const { - ASSERT_HOST(font_class_array_ != nullptr); - int font_index = font_id_map_.SparseToCompact(font_id); - if (font_index < 0) return -1; - return (*font_class_array_)(font_index, class_id).samples[index]; -} - -// Gets the canonical sample for the given font, class pair. -// ComputeCanonicalSamples must have been called first. -const TrainingSample* TrainingSampleSet::GetCanonicalSample( - int font_id, int class_id) const { - ASSERT_HOST(font_class_array_ != nullptr); - int font_index = font_id_map_.SparseToCompact(font_id); - if (font_index < 0) return nullptr; - const int sample_index = (*font_class_array_)(font_index, - class_id).canonical_sample; - return sample_index >= 0 ? samples_[sample_index] : nullptr; -} - -// Gets the max distance for the given canonical sample. -// ComputeCanonicalSamples must have been called first. -float TrainingSampleSet::GetCanonicalDist(int font_id, int class_id) const { - ASSERT_HOST(font_class_array_ != nullptr); - int font_index = font_id_map_.SparseToCompact(font_id); - if (font_index < 0) return 0.0f; - if ((*font_class_array_)(font_index, class_id).canonical_sample >= 0) - return (*font_class_array_)(font_index, class_id).canonical_dist; - else - return 0.0f; -} - -// Generates indexed features for all samples with the supplied feature_space. -void TrainingSampleSet::IndexFeatures(const IntFeatureSpace& feature_space) { - for (int s = 0; s < samples_.size(); ++s) - samples_[s]->IndexFeatures(feature_space); -} - -// Marks the given sample index for deletion. -// Deletion is actually completed by DeleteDeadSamples. -void TrainingSampleSet::KillSample(TrainingSample* sample) { - sample->set_sample_index(-1); -} - -// Deletes all samples with zero features marked by KillSample. -void TrainingSampleSet::DeleteDeadSamples() { - samples_.compact( - NewPermanentTessCallback(this, &TrainingSampleSet::DeleteableSample)); - num_raw_samples_ = samples_.size(); - // Samples must be re-organized now we have deleted a few. -} - -// Callback function returns true if the given sample is to be deleted, due -// to having a negative classid. -bool TrainingSampleSet::DeleteableSample(const TrainingSample* sample) { - return sample == nullptr || sample->class_id() < 0; -} - -// Construct an array to access the samples by font,class pair. -void TrainingSampleSet::OrganizeByFontAndClass() { - // Font indexes are sparse, so we used a map to compact them, so we can - // have an efficient 2-d array of fonts and character classes. - SetupFontIdMap(); - int compact_font_size = font_id_map_.CompactSize(); - // Get a 2-d array of generic vectors. - delete font_class_array_; - FontClassInfo empty; - font_class_array_ = new GENERIC_2D_ARRAY( - compact_font_size, unicharset_size_, empty); - for (int s = 0; s < samples_.size(); ++s) { - int font_id = samples_[s]->font_id(); - int class_id = samples_[s]->class_id(); - if (font_id < 0 || font_id >= font_id_map_.SparseSize()) { - tprintf("Font id = %d/%d, class id = %d/%d on sample %d\n", - font_id, font_id_map_.SparseSize(), class_id, unicharset_size_, - s); - } - ASSERT_HOST(font_id >= 0 && font_id < font_id_map_.SparseSize()); - ASSERT_HOST(class_id >= 0 && class_id < unicharset_size_); - int font_index = font_id_map_.SparseToCompact(font_id); - (*font_class_array_)(font_index, class_id).samples.push_back(s); - } - // Set the num_raw_samples member of the FontClassInfo, to set the boundary - // between the raw samples and the replicated ones. - for (int f = 0; f < compact_font_size; ++f) { - for (int c = 0; c < unicharset_size_; ++c) - (*font_class_array_)(f, c).num_raw_samples = - (*font_class_array_)(f, c).samples.size(); - } - // This is the global number of samples and also marks the boundary between - // real and replicated samples. - num_raw_samples_ = samples_.size(); -} - -// Constructs the font_id_map_ which maps real font_ids (sparse) to a compact -// index for the font_class_array_. -void TrainingSampleSet::SetupFontIdMap() { - // Number of samples for each font_id. - GenericVector font_counts; - for (int s = 0; s < samples_.size(); ++s) { - const int font_id = samples_[s]->font_id(); - while (font_id >= font_counts.size()) - font_counts.push_back(0); - ++font_counts[font_id]; - } - font_id_map_.Init(font_counts.size(), false); - for (int f = 0; f < font_counts.size(); ++f) { - font_id_map_.SetMap(f, font_counts[f] > 0); - } - font_id_map_.Setup(); -} - - -// Finds the sample for each font, class pair that has least maximum -// distance to all the other samples of the same font, class. -// OrganizeByFontAndClass must have been already called. -void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap& map, - bool debug) { - ASSERT_HOST(font_class_array_ != nullptr); - IntFeatureDist f_table; - if (debug) tprintf("feature table size %d\n", map.sparse_size()); - f_table.Init(&map); - int worst_s1 = 0; - int worst_s2 = 0; - double global_worst_dist = 0.0; - // Compute distances independently for each font and char index. - int font_size = font_id_map_.CompactSize(); - for (int font_index = 0; font_index < font_size; ++font_index) { - int font_id = font_id_map_.CompactToSparse(font_index); - for (int c = 0; c < unicharset_size_; ++c) { - int samples_found = 0; - FontClassInfo& fcinfo = (*font_class_array_)(font_index, c); - if (fcinfo.samples.size() == 0 || - (kTestChar >= 0 && c != kTestChar)) { - fcinfo.canonical_sample = -1; - fcinfo.canonical_dist = 0.0f; - if (debug) tprintf("Skipping class %d\n", c); - continue; - } - // The canonical sample will be the one with the min_max_dist, which - // is the sample with the lowest maximum distance to all other samples. - double min_max_dist = 2.0; - // We keep track of the farthest apart pair (max_s1, max_s2) which - // are max_max_dist apart, so we can see how bad the variability is. - double max_max_dist = 0.0; - int max_s1 = 0; - int max_s2 = 0; - fcinfo.canonical_sample = fcinfo.samples[0]; - fcinfo.canonical_dist = 0.0f; - for (int i = 0; i < fcinfo.samples.size(); ++i) { - int s1 = fcinfo.samples[i]; - const GenericVector& features1 = samples_[s1]->indexed_features(); - f_table.Set(features1, features1.size(), true); - double max_dist = 0.0; - // Run the full squared-order search for similar samples. It is still - // reasonably fast because f_table.FeatureDistance is fast, but we - // may have to reconsider if we start playing with too many samples - // of a single char/font. - for (int j = 0; j < fcinfo.samples.size(); ++j) { - int s2 = fcinfo.samples[j]; - if (samples_[s2]->class_id() != c || - samples_[s2]->font_id() != font_id || - s2 == s1) - continue; - GenericVector features2 = samples_[s2]->indexed_features(); - double dist = f_table.FeatureDistance(features2); - if (dist > max_dist) { - max_dist = dist; - if (dist > max_max_dist) { - max_s1 = s1; - max_s2 = s2; - } - } - } - // Using Set(..., false) is far faster than re initializing, due to - // the sparseness of the feature space. - f_table.Set(features1, features1.size(), false); - samples_[s1]->set_max_dist(max_dist); - ++samples_found; - if (max_dist < min_max_dist) { - fcinfo.canonical_sample = s1; - fcinfo.canonical_dist = max_dist; - } - UpdateRange(max_dist, &min_max_dist, &max_max_dist); - } - if (max_max_dist > global_worst_dist) { - // Keep a record of the worst pair over all characters/fonts too. - global_worst_dist = max_max_dist; - worst_s1 = max_s1; - worst_s2 = max_s2; - } - if (debug) { - tprintf("Found %d samples of class %d=%s, font %d, " - "dist range [%g, %g], worst pair= %s, %s\n", - samples_found, c, unicharset_.debug_str(c).string(), - font_index, min_max_dist, max_max_dist, - SampleToString(*samples_[max_s1]).string(), - SampleToString(*samples_[max_s2]).string()); - } - } - } - if (debug) { - tprintf("Global worst dist = %g, between sample %d and %d\n", - global_worst_dist, worst_s1, worst_s2); - } -} - -// Replicates the samples to a minimum frequency defined by -// 2 * kSampleRandomSize, or for larger counts duplicates all samples. -// After replication, the replicated samples are perturbed slightly, but -// in a predictable and repeatable way. -// Use after OrganizeByFontAndClass(). -void TrainingSampleSet::ReplicateAndRandomizeSamples() { - ASSERT_HOST(font_class_array_ != nullptr); - int font_size = font_id_map_.CompactSize(); - for (int font_index = 0; font_index < font_size; ++font_index) { - for (int c = 0; c < unicharset_size_; ++c) { - FontClassInfo& fcinfo = (*font_class_array_)(font_index, c); - int sample_count = fcinfo.samples.size(); - int min_samples = 2 * std::max(kSampleRandomSize, sample_count); - if (sample_count > 0 && sample_count < min_samples) { - int base_count = sample_count; - for (int base_index = 0; sample_count < min_samples; ++sample_count) { - int src_index = fcinfo.samples[base_index++]; - if (base_index >= base_count) base_index = 0; - TrainingSample* sample = samples_[src_index]->RandomizedCopy( - sample_count % kSampleRandomSize); - int sample_index = samples_.size(); - sample->set_sample_index(sample_index); - samples_.push_back(sample); - fcinfo.samples.push_back(sample_index); - } - } - } - } -} - -// Caches the indexed features of the canonical samples. -// ComputeCanonicalSamples must have been already called. -// TODO(rays) see note on ReliablySeparable and try restricting the -// canonical features to those that truly represent all samples. -void TrainingSampleSet::ComputeCanonicalFeatures() { - ASSERT_HOST(font_class_array_ != nullptr); - const int font_size = font_id_map_.CompactSize(); - for (int font_index = 0; font_index < font_size; ++font_index) { - const int font_id = font_id_map_.CompactToSparse(font_index); - for (int c = 0; c < unicharset_size_; ++c) { - int num_samples = NumClassSamples(font_id, c, false); - if (num_samples == 0) - continue; - const TrainingSample* sample = GetCanonicalSample(font_id, c); - FontClassInfo& fcinfo = (*font_class_array_)(font_index, c); - fcinfo.canonical_features = sample->indexed_features(); - } - } -} - -// Computes the combined set of features used by all the samples of each -// font/class combination. Use after ReplicateAndRandomizeSamples. -void TrainingSampleSet::ComputeCloudFeatures(int feature_space_size) { - ASSERT_HOST(font_class_array_ != nullptr); - int font_size = font_id_map_.CompactSize(); - for (int font_index = 0; font_index < font_size; ++font_index) { - int font_id = font_id_map_.CompactToSparse(font_index); - for (int c = 0; c < unicharset_size_; ++c) { - int num_samples = NumClassSamples(font_id, c, false); - if (num_samples == 0) - continue; - FontClassInfo& fcinfo = (*font_class_array_)(font_index, c); - fcinfo.cloud_features.Init(feature_space_size); - for (int s = 0; s < num_samples; ++s) { - const TrainingSample* sample = GetSample(font_id, c, s); - const GenericVector& sample_features = sample->indexed_features(); - for (int i = 0; i < sample_features.size(); ++i) - fcinfo.cloud_features.SetBit(sample_features[i]); - } - } - } -} - -// Adds all fonts of the given class to the shape. -void TrainingSampleSet::AddAllFontsForClass(int class_id, Shape* shape) const { - for (int f = 0; f < font_id_map_.CompactSize(); ++f) { - const int font_id = font_id_map_.CompactToSparse(f); - shape->AddToShape(class_id, font_id); - } -} - -// Display the samples with the given indexed feature that also match -// the given shape. -void TrainingSampleSet::DisplaySamplesWithFeature(int f_index, - const Shape& shape, - const IntFeatureSpace& space, - ScrollView::Color color, - ScrollView* window) const { - for (int s = 0; s < num_raw_samples(); ++s) { - const TrainingSample* sample = GetSample(s); - if (shape.ContainsUnichar(sample->class_id())) { - GenericVector indexed_features; - space.IndexAndSortFeatures(sample->features(), sample->num_features(), - &indexed_features); - for (int f = 0; f < indexed_features.size(); ++f) { - if (indexed_features[f] == f_index) { - sample->DisplayFeatures(color, window); - } - } - } - } -} - - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/trainingsampleset.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/trainingsampleset.h deleted file mode 100644 index f5f0732a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/classify/trainingsampleset.h +++ /dev/null @@ -1,289 +0,0 @@ -// Copyright 2010 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TRAINING_TRAININGSAMPLESET_H_ -#define TESSERACT_TRAINING_TRAININGSAMPLESET_H_ - -#include "bitvector.h" -#include "genericvector.h" -#include "indexmapbidi.h" -#include "matrix.h" -#include "shapetable.h" -#include "trainingsample.h" - -class UNICHARSET; - -namespace tesseract { - -struct FontInfo; -class FontInfoTable; -class IntFeatureMap; -class IntFeatureSpace; -class TrainingSample; -struct UnicharAndFonts; - -// Collection of TrainingSample used for training or testing a classifier. -// Provides several useful methods to operate on the collection as a whole, -// including outlier detection and deletion, providing access by font and -// class, finding the canonical sample, finding the "cloud" features (OR of -// all features in all samples), replication of samples, caching of distance -// metrics. -class TrainingSampleSet { - public: - explicit TrainingSampleSet(const FontInfoTable& fontinfo_table); - ~TrainingSampleSet(); - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, FILE* fp); - - // Accessors - int num_samples() const { - return samples_.size(); - } - int num_raw_samples() const { - return num_raw_samples_; - } - int NumFonts() const { - return font_id_map_.SparseSize(); - } - const UNICHARSET& unicharset() const { - return unicharset_; - } - int charsetsize() const { - return unicharset_size_; - } - const FontInfoTable& fontinfo_table() const { - return fontinfo_table_; - } - - // Loads an initial unicharset, or sets one up if the file cannot be read. - void LoadUnicharset(const char* filename); - - // Adds a character sample to this sample set. - // If the unichar is not already in the local unicharset, it is added. - // Returns the unichar_id of the added sample, from the local unicharset. - int AddSample(const char* unichar, TrainingSample* sample); - // Adds a character sample to this sample set with the given unichar_id, - // which must correspond to the local unicharset (in this). - void AddSample(int unichar_id, TrainingSample* sample); - - // Returns the number of samples for the given font,class pair. - // If randomize is true, returns the number of samples accessible - // with randomizing on. (Increases the number of samples if small.) - // OrganizeByFontAndClass must have been already called. - int NumClassSamples(int font_id, int class_id, bool randomize) const; - - // Gets a sample by its index. - const TrainingSample* GetSample(int index) const; - - // Gets a sample by its font, class, index. - // OrganizeByFontAndClass must have been already called. - const TrainingSample* GetSample(int font_id, int class_id, int index) const; - - // Get a sample by its font, class, index. Does not randomize. - // OrganizeByFontAndClass must have been already called. - TrainingSample* MutableSample(int font_id, int class_id, int index); - - // Returns a string debug representation of the given sample: - // font, unichar_str, bounding box, page. - STRING SampleToString(const TrainingSample& sample) const; - - // Gets the combined set of features used by all the samples of the given - // font/class combination. - const BitVector& GetCloudFeatures(int font_id, int class_id) const; - // Gets the indexed features of the canonical sample of the given - // font/class combination. - const GenericVector& GetCanonicalFeatures(int font_id, - int class_id) const; - - // Returns the distance between the given UniCharAndFonts pair. - // If matched_fonts, only matching fonts, are considered, unless that yields - // the empty set. - // OrganizeByFontAndClass must have been already called. - float UnicharDistance(const UnicharAndFonts& uf1, const UnicharAndFonts& uf2, - bool matched_fonts, const IntFeatureMap& feature_map); - - // Returns the distance between the given pair of font/class pairs. - // Finds in cache or computes and caches. - // OrganizeByFontAndClass must have been already called. - float ClusterDistance(int font_id1, int class_id1, - int font_id2, int class_id2, - const IntFeatureMap& feature_map); - - // Computes the distance between the given pair of font/class pairs. - float ComputeClusterDistance(int font_id1, int class_id1, - int font_id2, int class_id2, - const IntFeatureMap& feature_map) const; - - // Returns the number of canonical features of font/class 2 for which - // neither the feature nor any of its near neighbors occurs in the cloud - // of font/class 1. Each such feature is a reliable separation between - // the classes, ASSUMING that the canonical sample is sufficiently - // representative that every sample has a feature near that particular - // feature. To check that this is so on the fly would be prohibitively - // expensive, but it might be possible to pre-qualify the canonical features - // to include only those for which this assumption is true. - // ComputeCanonicalFeatures and ComputeCloudFeatures must have been called - // first, or the results will be nonsense. - int ReliablySeparable(int font_id1, int class_id1, - int font_id2, int class_id2, - const IntFeatureMap& feature_map, - bool thorough) const; - - - // Returns the total index of the requested sample. - // OrganizeByFontAndClass must have been already called. - int GlobalSampleIndex(int font_id, int class_id, int index) const; - - // Gets the canonical sample for the given font, class pair. - // ComputeCanonicalSamples must have been called first. - const TrainingSample* GetCanonicalSample(int font_id, int class_id) const; - // Gets the max distance for the given canonical sample. - // ComputeCanonicalSamples must have been called first. - float GetCanonicalDist(int font_id, int class_id) const; - - // Returns a mutable pointer to the sample with the given index. - TrainingSample* mutable_sample(int index) { - return samples_[index]; - } - // Gets ownership of the sample with the given index, removing it from this. - TrainingSample* extract_sample(int index) { - TrainingSample* sample = samples_[index]; - samples_[index] = nullptr; - return sample; - } - - // Generates indexed features for all samples with the supplied feature_space. - void IndexFeatures(const IntFeatureSpace& feature_space); - - // Marks the given sample for deletion. - // Deletion is actually completed by DeleteDeadSamples. - void KillSample(TrainingSample* sample); - - // Deletes all samples with a negative sample index marked by KillSample. - // Must be called before OrganizeByFontAndClass, and OrganizeByFontAndClass - // must be called after as the samples have been renumbered. - void DeleteDeadSamples(); - - // Callback function returns true if the given sample is to be deleted, due - // to having a negative classid. - bool DeleteableSample(const TrainingSample* sample); - - // Construct an array to access the samples by font,class pair. - void OrganizeByFontAndClass(); - - // Constructs the font_id_map_ which maps real font_ids (sparse) to a compact - // index for the font_class_array_. - void SetupFontIdMap(); - - // Finds the sample for each font, class pair that has least maximum - // distance to all the other samples of the same font, class. - // OrganizeByFontAndClass must have been already called. - void ComputeCanonicalSamples(const IntFeatureMap& map, bool debug); - - // Replicates the samples to a minimum frequency defined by - // 2 * kSampleRandomSize, or for larger counts duplicates all samples. - // After replication, the replicated samples are perturbed slightly, but - // in a predictable and repeatable way. - // Use after OrganizeByFontAndClass(). - void ReplicateAndRandomizeSamples(); - - // Caches the indexed features of the canonical samples. - // ComputeCanonicalSamples must have been already called. - void ComputeCanonicalFeatures(); - // Computes the combined set of features used by all the samples of each - // font/class combination. Use after ReplicateAndRandomizeSamples. - void ComputeCloudFeatures(int feature_space_size); - - // Adds all fonts of the given class to the shape. - void AddAllFontsForClass(int class_id, Shape* shape) const; - - // Display the samples with the given indexed feature that also match - // the given shape. - void DisplaySamplesWithFeature(int f_index, const Shape& shape, - const IntFeatureSpace& feature_space, - ScrollView::Color color, - ScrollView* window) const; - - private: - // Struct to store a triplet of unichar, font, distance in the distance cache. - struct FontClassDistance { - int unichar_id; - int font_id; // Real font id. - float distance; - }; - // Simple struct to store information related to each font/class combination. - struct FontClassInfo { - FontClassInfo(); - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, FILE* fp); - - // Number of raw samples. - int32_t num_raw_samples; - // Index of the canonical sample. - int32_t canonical_sample; - // Max distance of the canonical sample from any other. - float canonical_dist; - // Sample indices for the samples, including replicated. - GenericVector samples; - - // Non-serialized cache data. - // Indexed features of the canonical sample. - GenericVector canonical_features; - // The mapped features of all the samples. - BitVector cloud_features; - - // Caches for ClusterDistance. - // Caches for other fonts but matching this unichar. -1 indicates not set. - // Indexed by compact font index from font_id_map_. - GenericVector font_distance_cache; - // Caches for other unichars but matching this font. -1 indicates not set. - GenericVector unichar_distance_cache; - // Cache for the rest (non matching font and unichar.) - // A cache of distances computed by ReliablySeparable. - GenericVector distance_cache; - }; - - PointerVector samples_; - // Number of samples before replication/randomization. - int num_raw_samples_; - // Character set we are training for. - UNICHARSET unicharset_; - // Character set size to which the 2-d arrays below refer. - int unicharset_size_; - // Map to allow the font_class_array_ below to be compact. - // The sparse space is the real font_id, used in samples_ . - // The compact space is an index to font_class_array_ - IndexMapBiDi font_id_map_; - // A 2-d array of FontClassInfo holding information related to each - // (font_id, class_id) pair. - GENERIC_2D_ARRAY* font_class_array_; - - // Reference to the fontinfo_table_ in MasterTrainer. Provides names - // for font_ids in the samples. Not serialized! - const FontInfoTable& fontinfo_table_; -}; - -} // namespace tesseract. - - -#endif // TRAININGSAMPLESETSET_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/Makefile.am b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/Makefile.am deleted file mode 100644 index 3b9d4ca6..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/Makefile.am +++ /dev/null @@ -1,20 +0,0 @@ -AM_CPPFLAGS += \ - -I$(top_srcdir)/src/ccutil \ - -I$(top_srcdir)/src/viewer - -if VISIBILITY -AM_CPPFLAGS += -DTESS_EXPORTS \ - -fvisibility=hidden -fvisibility-inlines-hidden -endif - -noinst_HEADERS = \ - bitvec.h callcpp.h cutil.h cutil_class.h \ - emalloc.h globals.h \ - oldlist.h structures.h - -noinst_LTLIBRARIES = libtesseract_cutil.la - -libtesseract_cutil_la_SOURCES = \ - bitvec.cpp callcpp.cpp cutil_class.cpp \ - emalloc.cpp \ - oldlist.cpp structures.cpp diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/bitvec.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/bitvec.cpp deleted file mode 100644 index b84d6b79..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/bitvec.cpp +++ /dev/null @@ -1,85 +0,0 @@ -/****************************************************************************** - ** Filename: bitvec.c - ** Purpose: Routines for manipulating bit vectors - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -/*----------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ -#include "bitvec.h" - -#include - -#include "emalloc.h" -#include "tprintf.h" - -/*----------------------------------------------------------------------------- - Public Code ------------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -/** - * This routine uses realloc to increase the size of - * the specified bit vector. - * - * Globals: - * - none - * - * @param Vector bit vector to be expanded - * @param NewNumBits new size of bit vector - * - * @return New expanded bit vector. - */ -BIT_VECTOR ExpandBitVector(BIT_VECTOR Vector, int NewNumBits) { - return ((BIT_VECTOR) Erealloc(Vector, - sizeof(Vector[0]) * WordsInVectorOfSize(NewNumBits))); -} /* ExpandBitVector */ - - -/*---------------------------------------------------------------------------*/ -void FreeBitVector(BIT_VECTOR BitVector) { -/** - * This routine frees a bit vector. It also decrements - * the global counter that keeps track of the number of - * bit vectors allocated. If BitVector is nullptr, then - * the count is printed to stderr. - * - * Globals: - * - BitVectorCount count of number of bit vectors allocated - * - * @param BitVector bit vector to be freed - * - */ - if (BitVector) { - Efree(BitVector); - } -} /* FreeBitVector */ - - -/*---------------------------------------------------------------------------*/ -/** - * Allocate and return a new bit vector large enough to - * hold the specified number of bits. - * - * Globals: - * - BitVectorCount number of bit vectors allocated - * - * @param NumBits number of bits in new bit vector - * - * @return New bit vector. - */ -BIT_VECTOR NewBitVector(int NumBits) { - return ((BIT_VECTOR) Emalloc(sizeof(uint32_t) * - WordsInVectorOfSize(NumBits))); -} /* NewBitVector */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/bitvec.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/bitvec.h deleted file mode 100644 index 50680066..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/bitvec.h +++ /dev/null @@ -1,75 +0,0 @@ -/****************************************************************************** - ** Filename: bitvec.h - ** Purpose: Routines for manipulating bit vectors - ** Author: Dan Johnson - ** History: Wed Mar 7 17:52:45 1990, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -#ifndef BITVEC_H -#define BITVEC_H - -#include "host.h" - -/*----------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ -// TODO(rays) Rename BITSINLONG to BITSINuint32_t, and use sizeof. -#define BITSINLONG 32 /**< no of bits in a long */ -using BIT_VECTOR = uint32_t *; - -/*----------------------------------------------------------------------------- - Public Function Prototypes ------------------------------------------------------------------------------*/ -#define zero_all_bits(array, length) \ - { \ - int index; /*temporary index*/ \ - \ - for (index = 0; index < length; index++) \ - array[index] = 0; /*zero all bits*/ \ - } - -#define set_all_bits(array, length) \ - { \ - int index; /*temporary index*/ \ - \ - for (index = 0; index < length; index++) \ - array[index] = ~0; /*set all bits*/ \ - } - -#define copy_all_bits(source, dest, length) \ - { \ - int index; /*temporary index*/ \ - \ - for (index = 0; index < length; index++) \ - dest[index] = source[index]; /*copy all bits*/ \ - } - -#define SET_BIT(array,bit) (array[bit/BITSINLONG]|=1<<(bit&(BITSINLONG-1))) - -#define reset_bit(array,bit) (array[bit/BITSINLONG]&=~(1<<(bit&(BITSINLONG-1)))) - -#define test_bit(array,bit) (array[bit/BITSINLONG] & (1<<(bit&(BITSINLONG-1)))) - -#define WordsInVectorOfSize(NumBits) \ -(((NumBits) + BITSINLONG - 1) / BITSINLONG) - -/*-------------------------------------------------------------------------- - Public Function Prototypes ---------------------------------------------------------------------------*/ -BIT_VECTOR ExpandBitVector(BIT_VECTOR Vector, int NewNumBits); - -void FreeBitVector(BIT_VECTOR BitVector); - -BIT_VECTOR NewBitVector(int NumBits); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/callcpp.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/callcpp.cpp deleted file mode 100644 index 7528f0fc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/callcpp.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/********************************************************************** - * File: callcpp.cpp - * Description: extern C interface calling C++ from C. - * Author: Ray Smith - * Created: Sun Feb 04 20:39:23 MST 1996 - * - * (C) Copyright 1996, Hewlett-Packard Co. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "callcpp.h" -#include // for va_end, va_list, va_start -#include // for vsprintf -#include // for unique_ptr -#include "scrollview.h" // for ScrollView, SVEvent, SVET_ANY, SVET_INPUT -#include "tprintf.h" // for tprintf - -void -cprintf ( //Trace printf -const char *format, ... //special message -) { - va_list args; //variable args - char msg[1000]; - - va_start(args, format); //variable list - vsprintf(msg, format, args); //Format into msg - va_end(args); - - tprintf ("%s", msg); -} - - -#ifndef GRAPHICS_DISABLED -ScrollView *c_create_window( /*create a window */ - const char *name, /*name/title of window */ - int16_t xpos, /*coords of window */ - int16_t ypos, /*coords of window */ - int16_t xsize, /*size of window */ - int16_t ysize, /*size of window */ - double xmin, /*scrolling limits */ - double xmax, /*to stop users */ - double ymin, /*getting lost in */ - double ymax /*empty space */ - ) { - return new ScrollView(name, xpos, ypos, xsize, ysize, xmax - xmin, ymax - ymin, true); -} - - -void c_line_color_index( /*set color */ - void *win, - C_COL index) { - // The colors are the same as the SV ones except that SV has COLOR:NONE --> offset of 1 - ScrollView* window = (ScrollView*) win; - window->Pen((ScrollView::Color) (index + 1)); -} - - -void c_move( /*move pen */ - void *win, - double x, - double y) { - ScrollView* window = (ScrollView*) win; - window->SetCursor((int) x, (int) y); -} - - -void c_draw( /*move pen */ - void *win, - double x, - double y) { - ScrollView* window = (ScrollView*) win; - window->DrawTo((int) x, (int) y); -} - - -void c_make_current( /*move pen */ - void *win) { - ScrollView* window = (ScrollView*) win; - window->Update(); -} - - -void c_clear_window( /*move pen */ - void *win) { - ScrollView* window = (ScrollView*) win; - window->Clear(); -} - - -char window_wait(ScrollView* win) { - // Wait till an input or click event (all others are thrown away) - char ret = '\0'; - SVEventType ev_type = SVET_ANY; - do { - std::unique_ptr ev(win->AwaitEvent(SVET_ANY)); - ev_type = ev->type; - if (ev_type == SVET_INPUT) - ret = ev->parameter[0]; - } while (ev_type != SVET_INPUT && ev_type != SVET_CLICK); - return ret; -} -#endif - -void reverse32(void *ptr) { - char tmp; - char *cptr = (char *) ptr; - - tmp = *cptr; - *cptr = *(cptr + 3); - *(cptr + 3) = tmp; - tmp = *(cptr + 1); - *(cptr + 1) = *(cptr + 2); - *(cptr + 2) = tmp; -} - - -void reverse16(void *ptr) { - char tmp; - char *cptr = (char *) ptr; - - tmp = *cptr; - *cptr = *(cptr + 1); - *(cptr + 1) = tmp; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/callcpp.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/callcpp.h deleted file mode 100644 index eacbea60..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/callcpp.h +++ /dev/null @@ -1,113 +0,0 @@ -/********************************************************************** - * File: callcpp.h - * Description: extern C interface calling C++ from C. - * Author: Ray Smith - * Created: Sun Feb 04 20:39:23 MST 1996 - * - * (C) Copyright 1996, Hewlett-Packard Co. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef CALLCPP_H -#define CALLCPP_H - -#include "host.h" -#include "params.h" -#include "unichar.h" - -class ScrollView; - -typedef enum { - Black, - White, - Red, - Yellow, - Green, - Cyan, - Blue, - Magenta, - Aquamarine, - Dark_SLATE_BLUE, - Light_BLUE, - Medium_BLUE, - Midnight_BLUE, - Navy_BLUE, - Sky_BLUE, - Slate_BLUE, - Steel_BLUE, - Coral, - Brown, - Sandy_BROWN, - Gold, - GoldENROD, - Dark_GREEN, - Dark_OLIVE_GREEN, - Forest_GREEN, - Lime_GREEN, - Pale_GREEN, - Yellow_GREEN, - Light_GREY, - Dark_SLATE_GREY, - Dim_GREY, - Grey, - Khaki, - Maroon, - Orange, - Orchid, - Pink, - Plum, - Indian_RED, - Orange_RED, - Violet_RED, - Salmon, - Tan, - Turqoise, - Dark_TURQUOISE, - Violet, - Wheat, - Green_YELLOW -} C_COL; /*starbase colours */ - -void cprintf ( //Trace printf -const char *format, ... //special message -); -ScrollView *c_create_window( /*create a window */ - const char *name, /*name/title of window */ - int16_t xpos, /*coords of window */ - int16_t ypos, /*coords of window */ - int16_t xsize, /*size of window */ - int16_t ysize, /*size of window */ - double xmin, /*scrolling limits */ - double xmax, /*to stop users */ - double ymin, /*getting lost in */ - double ymax /*empty space */ - ); -void c_line_color_index( /*set color */ - void *win, - C_COL index); -void c_move( /*move pen */ - void *win, - double x, - double y); -void c_draw( /*move pen */ - void *win, - double x, - double y); -void c_make_current( /*move pen */ - void *win); -void c_clear_window( /*move pen */ - void *win); -char window_wait(ScrollView* win); -void reverse32(void *ptr); -void reverse16(void *ptr); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/cutil.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/cutil.h deleted file mode 100644 index 5bb352e4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/cutil.h +++ /dev/null @@ -1,35 +0,0 @@ -/* -*-C-*- - ****************************************************************************** - * - * File: cutil.h - * Description: General utility functions - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Wed Dec 5 15:40:26 1990 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ****************************************************************************** - */ - -#ifndef CUTILH -#define CUTILH - -typedef void (*void_proc)(...); - -typedef int (*int_compare)(void*, void*); -typedef void (*void_dest)(void*); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/cutil_class.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/cutil_class.cpp deleted file mode 100644 index 753821e6..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/cutil_class.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: cutil_class.cpp -// Description: cutil class. -// Author: Samuel Charron -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "cutil_class.h" - -namespace tesseract { - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -CUtil::~CUtil() = default; - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/cutil_class.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/cutil_class.h deleted file mode 100644 index 61d14020..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/cutil_class.h +++ /dev/null @@ -1,36 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: cutil_class.h -// Description: cutil class. -// Author: Samuel Charron -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_CUTIL_CUTIL_CLASS_H_ -#define TESSERACT_CUTIL_CUTIL_CLASS_H_ - -#include "ccutil.h" -#include "strngs.h" - -namespace tesseract { - -class CUtil : public CCUtil { - public: - CUtil() = default; - virtual ~CUtil(); - void read_variables(const char *filename, bool global_only); -}; - -} // namespace tesseract - -#endif // TESSERACT_CUTIL_CUTIL_CLASS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/emalloc.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/emalloc.cpp deleted file mode 100644 index 440649d3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/emalloc.cpp +++ /dev/null @@ -1,48 +0,0 @@ -/************************************************************************** - * Filename: emalloc.cpp - * Purpose: Routines for trapping memory allocation errors. - * Author: Dan Johnson -** -** (c) Copyright Hewlett-Packard Company, 1988. -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** http://www.apache.org/licenses/LICENSE-2.0 -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -******************************************************************************/ - -#include "emalloc.h" -#include -#include "errcode.h" // for ASSERT_HOST - -/** - * This routine attempts to allocate the specified number of - * bytes. If the memory can be allocated, a pointer to the - * memory is returned. If the memory cannot be allocated, or - * if the allocation request is negative or zero, - * an error is trapped. - * @param Size number of bytes of memory to be allocated - * @return Pointer to allocated memory. - */ -void *Emalloc(int Size) { - ASSERT_HOST(Size > 0); - void* Buffer = malloc(Size); - ASSERT_HOST(Buffer != nullptr); - return Buffer; -} - -void *Erealloc(void *ptr, int size) { - ASSERT_HOST(size > 0 || (size == 0 && ptr != nullptr)); - void* Buffer = realloc(ptr, size); - ASSERT_HOST(Buffer != nullptr || size == 0); - return Buffer; -} - -void Efree(void *ptr) { - ASSERT_HOST(ptr != nullptr); - free(ptr); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/emalloc.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/emalloc.h deleted file mode 100644 index 23483bd8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/emalloc.h +++ /dev/null @@ -1,27 +0,0 @@ -/****************************************************************************** - ** Filename: emalloc.h - ** Purpose: Definition of memory allocation routines. - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef EMALLOC_H -#define EMALLOC_H - -void *Emalloc(int Size); - -void *Erealloc(void *ptr, int size); - -void Efree(void *ptr); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/globals.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/globals.h deleted file mode 100644 index d4271a7f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/globals.h +++ /dev/null @@ -1,33 +0,0 @@ -/* -*-C-*- - ****************************************************************************** - * - * File: globals.h (Formerly globals.h) - * Description: Global Variables for Wise Owl - * Author: Mark Seaman, OCR Technology - * Created: Thu Dec 21 11:38:36 1989 - * Modified: Thu Jan 4 17:13:00 1990 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1989, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *****************************************************************************/ -#ifndef GLOBALS_H -#define GLOBALS_H - -#include "unicharset.h" -#include "strngs.h" - -#include - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/oldlist.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/oldlist.cpp deleted file mode 100644 index bcc8bc5e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/oldlist.cpp +++ /dev/null @@ -1,377 +0,0 @@ -/* -*-C-*- -############################################################################### -# -# File: oldlist.cpp -# Description: List processing procedures. -# Author: Mark Seaman, Software Productivity -# Created: Thu Jul 23 13:24:09 1987 -# Modified: Thu Dec 22 10:59:52 1988 (Mark Seaman) marks@hpgrlt -# Language: C -# Package: N/A -# Status: Reusable Software Component -# -# (c) Copyright 1987, Hewlett-Packard Company. -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** http://www.apache.org/licenses/LICENSE-2.0 -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -# -############################################################################### - - This file contains a set of general purpose list manipulation routines. - These routines can be used in a wide variety of ways to provide several - different popular data structures. A new list can be created by declaring - a variable of type 'LIST', and can be initialized with the value 'NIL_LIST'. - All of these routines check for the NIL_LIST condition before dereferencing - pointers. NOTE: There is a users' manual available in printed form from - Mark Seaman at (303) 350-4492 at Greeley Hard Copy. - - To implement a STACK use: - - push to add to the Stack l = push(l, (LIST)"jim"); - pop to remove items from the Stack l = pop(l); - first_node to access the head name = (char *)first_node(l); - - To implement a QUEUE use: - - push_last to add to the Queue l = push_last(l, (LIST)"x"); - pop remove items from the Queue l = pop(l); - first_node to access the head name = (char *)first_node (l); - - To implement LISP like functions use: - - first_node CAR x = (int)first_node(l); - rest CDR l = list_rest (l); - push CONS l = push(l, (LIST)this); - last LAST x = last(l); - concat APPEND l = concat(r, s); - count LENGTH x = count(l); - search MEMBER if (search(l, x, nullptr)) - - To implement SETS use: - - adjoin l = adjoin(l, x); - set_union l = set_union(r, s); - intersection l = intersection(r, s); - set_difference l = set_difference(r, s); - delete l = delete(s, x, nullptr); - search if (search(l, x, nullptr)) - - To Implement Associated LISTS use: - - lpush l = lpush(l, p); - assoc s = assoc(l, x); - adelete l = adelete(l, x); - - The following rules of closure exist for the functions provided. - a = first_node (push (a, b)) - b = list_rest (push (a, b)) - a = push (pop (a), a)) For all a <> NIL_LIST - a = reverse (reverse (a)) - -******************************************************************************/ -#include "oldlist.h" -#include -#include // for strcmp -#include "errcode.h" // for ASSERT_HOST -#include "structures.h" - -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ -#define add_on(l, x) l = push(l, first_node(x)) -#define next_one(l) l = list_rest(l) - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -/********************************************************************** - * c o u n t - * - * Recursively count the elements in a list. Return the count. - **********************************************************************/ -int count(LIST var_list) { - int temp = 0; - - iterate(var_list) temp += 1; - return (temp); -} - -/********************************************************************** - * d e l e t e d - * - * Delete all the elements out of the current list that match the key. - * This operation destroys the original list. The caller will supply a - * routine that will compare each node to the - * key, and return a non-zero value when they match. If the value - * nullptr is supplied for is_equal, the is_key routine will be used. - **********************************************************************/ -LIST delete_d(LIST list, void *key, int_compare is_equal) { - LIST result = NIL_LIST; - LIST last_one = NIL_LIST; - - if (is_equal == nullptr) is_equal = is_same; - - while (list != NIL_LIST) { - if (!(*is_equal)(first_node(list), key)) { - if (last_one == NIL_LIST) { - last_one = list; - list = list_rest(list); - result = last_one; - set_rest(last_one, NIL_LIST); - } else { - set_rest(last_one, list); - last_one = list; - list = list_rest(list); - set_rest(last_one, NIL_LIST); - } - } else { - list = pop(list); - } - } - return (result); -} - -LIST delete_d(LIST list, void *key, - TessResultCallback2 *is_equal) { - LIST result = NIL_LIST; - LIST last_one = NIL_LIST; - - while (list != NIL_LIST) { - if (!(*is_equal).Run(first_node(list), key)) { - if (last_one == NIL_LIST) { - last_one = list; - list = list_rest(list); - result = last_one; - set_rest(last_one, NIL_LIST); - } else { - set_rest(last_one, list); - last_one = list; - list = list_rest(list); - set_rest(last_one, NIL_LIST); - } - } else { - list = pop(list); - } - } - return (result); -} - -/********************************************************************** - * d e s t r o y - * - * Return the space taken by a list to the heap. - **********************************************************************/ -LIST destroy(LIST list) { - LIST next; - - while (list != NIL_LIST) { - next = list_rest(list); - free_cell(list); - list = next; - } - return (NIL_LIST); -} - -/********************************************************************** - * d e s t r o y n o d e s - * - * Return the space taken by the LISTs of a list to the heap. - **********************************************************************/ -void destroy_nodes(LIST list, void_dest destructor) { - ASSERT_HOST(destructor != nullptr); - - while (list != NIL_LIST) { - if (first_node(list) != nullptr) (*destructor)(first_node(list)); - list = pop(list); - } -} - -/********************************************************************** - * i n s e r t - * - * Create a list element and rearange the pointers so that the first - * element in the list is the second aurgment. - **********************************************************************/ -void insert(LIST list, void *node) { - LIST element; - - if (list != NIL_LIST) { - element = push(NIL_LIST, node); - set_rest(element, list_rest(list)); - set_rest(list, element); - node = first_node(list); - list->node = first_node(list_rest(list)); - list->next->node = (LIST)node; - } -} - -/********************************************************************** - * i s s a m e - * - * Compare the list node with the key value return TRUE (non-zero) - * if they are equivalent strings. (Return FALSE if not) - **********************************************************************/ -int is_same(void *item1, void *item2) { - return strcmp((char *)item1, (char *)item2) == 0 ? 1 : 0; -} - -/********************************************************************** - * j o i n - * - * Join the two lists together. This function is similar to concat - * except that concat creates a new list. This function returns the - * first list updated. - **********************************************************************/ -LIST join(LIST list1, LIST list2) { - if (list1 == NIL_LIST) return (list2); - set_rest(last(list1), list2); - return (list1); -} - -/********************************************************************** - * l a s t - * - * Return the last list item (this is list type). - **********************************************************************/ -LIST last(LIST var_list) { - while (list_rest(var_list) != NIL_LIST) var_list = list_rest(var_list); - return (var_list); -} - -/********************************************************************** - * n t h c e l l - * - * Return nth list cell in the list. - **********************************************************************/ -void *nth_cell(LIST var_list, int item_num) { - int x = 0; - iterate(var_list) { - if (x++ == item_num) return (var_list); - } - return (var_list); -} - -/********************************************************************** - * p o p - * - * Return the list with the first element removed. Destroy the space - * that it occupied in the list. - **********************************************************************/ -LIST pop(LIST list) { - LIST temp; - - temp = list_rest(list); - - if (list != NIL_LIST) { - free_cell(list); - } - return (temp); -} - -/********************************************************************** - * p u s h - * - * Create a list element. Push the second parameter (the node) onto - * the first parameter (the list). Return the new list to the caller. - **********************************************************************/ -LIST push(LIST list, void *element) { - LIST t; - - t = new_cell(); - t->node = (LIST)element; - set_rest(t, list); - return (t); -} - -/********************************************************************** - * p u s h l a s t - * - * Create a list element. Add the element onto the end of the list. - **********************************************************************/ -LIST push_last(LIST list, void *item) { - LIST t; - - if (list != NIL_LIST) { - t = last(list); - t->next = push(NIL_LIST, item); - return (list); - } else - return (push(NIL_LIST, item)); -} - -/********************************************************************** - * r e v e r s e - * - * Create a new list with the elements reversed. The old list is not - * destroyed. - **********************************************************************/ -LIST reverse(LIST list) { - LIST newlist = NIL_LIST; - - iterate(list) copy_first(list, newlist); - return (newlist); -} - -/********************************************************************** - * r e v e r s e d - * - * Create a new list with the elements reversed. The old list is - * destroyed. - **********************************************************************/ -LIST reverse_d(LIST list) { - LIST result = reverse(list); - destroy(list); - return (result); -} - -/********************************************************************** - * s a d j o i n - * - * Adjoin an element to an assorted list. The original list is - * modified. Returns the modified list. - **********************************************************************/ -LIST s_adjoin(LIST var_list, void *variable, int_compare compare) { - LIST l; - int result; - - if (compare == nullptr) compare = (int_compare)strcmp; - - l = var_list; - iterate(l) { - result = (*compare)(variable, first_node(l)); - if (result == 0) - return (var_list); - else if (result < 0) { - insert(l, variable); - return (var_list); - } - } - return (push_last(var_list, variable)); -} - -/********************************************************************** - * s e a r c h - * - * Search list, return NIL_LIST if not found. Return the list starting from - * the item if found. The compare routine "is_equal" is passed in as - * the third parameter to this routine. If the value nullptr is supplied - * for is_equal, the is_key routine will be used. - **********************************************************************/ -LIST search(LIST list, void *key, int_compare is_equal) { - if (is_equal == nullptr) is_equal = is_same; - - iterate(list) if ((*is_equal)(first_node(list), key)) return (list); - return (NIL_LIST); -} - -LIST search(LIST list, void *key, - TessResultCallback2 *is_equal) { - iterate(list) if ((*is_equal).Run(first_node(list), key)) return (list); - return (NIL_LIST); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/oldlist.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/oldlist.h deleted file mode 100644 index 13f18844..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/oldlist.h +++ /dev/null @@ -1,356 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: oldlist.h (Formerly list.h) - * Description: List processing procedures declarations. - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Wed Dec 5 15:43:17 1990 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ******************************************************************************** - * - * This file contains the interface for a set of general purpose list - * manipulation routines. For the implementation of these routines see - * the file "list.c". - * - ******************************************************************************** - * - * INDEX - * ======= - * - * BASICS: - * ------- - * first_node - Macro to return the first list node (not the cell). - * list_rest - Macro the return the second list cell - * pop - Destroy one list cell - * push - Create one list cell and set the node and next fields - * - * ITERATION: - * ----------------- - * iterate - Macro to create a for loop to visit each cell. - * iterate_list - Macro to visit each cell using a local variable. - * for_each - Applies a function to each node. - * - * LIST CELL COUNTS: - * ----------------- - * count - Returns the number of list cells in the list. - * second_node - Returns the second node. - * third - Returns the third node. - * fourth - Returns the fourth node. - * fifth - Returns the fifth node. - * last - Returns the last list cell. - * pair - Creates a list of two elements. - * - * COPYING: - * ----------------- - * copy_first - Pushes the first element from list 1 onto list 2. - * copy - Create a copy of a list. - * concat - Creates a new list that is a copy of both input lists. - * delete_n - Creates a new list without the chosen elements. - * reverse - Creates a backwards copy of the input list. - * sort - Use quick sort to construct a new list. - * transform - Creates a new list by transforming each of the nodes. - * - * TRANSFORMS: (Note: These functions all modify the input list.) - * ---------- - * join - Concatenates list 1 and list 2. - * delete_d - Removes the requested elements from the list. - * transform_d - Modifies the list by applying a function to each node. - * insert - Add a new element into this spot in a list. (not - *NIL_LIST) push_last - Add a new element onto the end of a list. - * reverse_d - Reverse a list and destroy the old one. - * - * ASSOCIATED LISTS: - * ----------------- - * adelete - Remove a particular entry from an associated list. - * assoc - Find an entry in an associated list that matches a key. - * match - Return the data element of an a-list entry. - * - * DISPLAY: - * ----------------- - * print_cell - Print a hex dump of a list cell. - * show - Displays a string and a list (using lprint). - * - * SETS: - * ----- - * adjoin - Add a new element to list if it does not exist already. - * intersection - Create a new list that is the set intersection. - * set_union - Create a new list that is the set intersection. - * set_difference - Create a new list that is the set difference. - * s_adjoin - Add an element to a sort list if it is not there. - * s_intersection - Set intersection on a sorted list. Modifies old list. - * s_union - Set intersection on a sorted list. Modifies old list. - * search - Return the pointer to the list cell whose node matches. - * - * COMPARISONS: - * ----------------- - * is_same - Compares each node to the key. - * is_not_same - Compares each node to the key. - * is_key - Compares first of each node to the key. - * is_not_key - Compares first of each node to the key. - * - * CELL OPERATIONS: - * ----------------- - * new_cell - Obtain a new list cell from the free list. Allocate. - * free_cell - Return a list cell to the free list. - * destroy - Return all list cells in a list. - * destroy_nodes - Apply a function to each list cell and destroy the list. - * set_node - Assign the node field in a list cell. - * set_rest - Assign the next field in a list cell. - * - ***********************************************************************/ - -#ifndef LIST_H -#define LIST_H - -#include "cutil.h" // for int_compare, void_dest, ... -#include "tesscallback.h" - -/*---------------------------------------------------------------------- - T y p e s -----------------------------------------------------------------------*/ - -#define NIL_LIST ((LIST)nullptr) - -struct list_rec -{ - struct list_rec *node; - struct list_rec *next; -}; -using LIST = list_rec *; - -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ -/* Predefinitions */ -#define list_rest(l) ((l) ? (l)->next : NIL_LIST) -#define first_node(l) ((l) ? (l)->node : NIL_LIST) - -/********************************************************************** - * c o p y f i r s t - * - * Do the appropriate kind a push operation to copy the first node from - * one list to another. - * - **********************************************************************/ - -#define copy_first(l1,l2) \ -(l2=push(l2, first_node(l1))) - -/********************************************************************** - * i t e r a t e - * - * Visit each node in the list. Replace the old list with the list - * minus the head. Continue until the list is NIL_LIST. - **********************************************************************/ - -#define iterate(l) \ -for (; (l) != NIL_LIST; (l) = list_rest (l)) - -/********************************************************************** - * i t e r a t e l i s t - * - * Visit each node in the list (l). Use a local variable (x) to iterate - * through all of the list cells. This macro is identical to iterate - * except that it does not lose the original list. - **********************************************************************/ - -#define iterate_list(x,l) \ -for ((x)=(l); (x)!=0; (x)=list_rest(x)) - -/********************************************************************** - * j o i n o n - * - * Add another list onto the tail of this one. The list given as an input - * parameter is modified. - **********************************************************************/ - -#define JOIN_ON(list1,list2) \ -((list1) = join ((list1), (list2))) - -/********************************************************************** - * p o p o f f - * - * Add a cell onto the front of a list. The list given as an input - * parameter is modified. - **********************************************************************/ - -#define pop_off(list) \ -((list) = pop (list)) - -/********************************************************************** - * p u s h o n - * - * Add a cell onto the front of a list. The list given as an input - * parameter is modified. - **********************************************************************/ - -#define push_on(list,thing) \ -((list) = push (list, (LIST) (thing))) - -/********************************************************************** - * s e c o n d - * - * Return the contents of the second list element. - * - * #define second_node(l) first_node (list_rest (l)) - **********************************************************************/ - -#define second_node(l) \ -first_node (list_rest (l)) - -/********************************************************************** - * s e t r e s t - * - * Change the "next" field of a list element to point to a desired place. - * - * #define set_rest(l,node) l->next = node; - **********************************************************************/ - -#define set_rest(l,cell)\ -((l)->next = (cell)) - -/********************************************************************** - * t h i r d - * - * Return the contents of the third list element. - * - * #define third(l) first_node (list_rest (list_rest (l))) - **********************************************************************/ - -#define third(l) \ -first_node (list_rest (list_rest (l))) - -/*---------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------*/ -int count(LIST var_list); - -LIST delete_d(LIST list, void *key, int_compare is_equal); - -LIST delete_d(LIST list, void *key, - TessResultCallback2* is_equal); - -LIST destroy(LIST list); - -void destroy_nodes(LIST list, void_dest destructor); - -void insert(LIST list, void *node); - -int is_same(void *item1, void *item2); - -LIST join(LIST list1, LIST list2); - -LIST last(LIST var_list); - -void *nth_cell(LIST var_list, int item_num); - -LIST pop(LIST list); - -LIST push(LIST list, void *element); - -LIST push_last(LIST list, void *item); - -LIST reverse(LIST list); - -LIST reverse_d(LIST list); - -LIST s_adjoin(LIST var_list, void *variable, int_compare compare); - -LIST search(LIST list, void *key, int_compare is_equal); - -LIST search(LIST list, void *key, TessResultCallback2*); - -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif - -typedef void (*destructor) _ARGS((LIST l)); - -typedef LIST (*list_proc) _ARGS((LIST a)); - -int count -_ARGS((LIST var_list)); - -LIST delete_d -_ARGS((LIST list, - LIST key, - int_compare is_equal)); - -LIST destroy -_ARGS((LIST list)); - -LIST destroy_nodes -_ARGS((LIST list, - void_dest destructor)); - -void insert -_ARGS((LIST list, - LIST node)); - -int is_same_node -_ARGS((LIST s1, - LIST s2)); - -int is_same -_ARGS((LIST s1, - LIST s2)); - -LIST join -_ARGS((LIST list1, - LIST list2)); - -LIST last -_ARGS((LIST var_list)); - -LIST nth_cell -_ARGS((LIST var_list, - int item_num)); - -LIST pop -_ARGS((LIST list)); - -LIST push -_ARGS((LIST list, - LIST element)); - -LIST push_last -_ARGS((LIST list, - LIST item)); - -LIST reverse -_ARGS((LIST list)); - -LIST reverse_d -_ARGS((LIST list)); - -LIST s_adjoin -_ARGS((LIST var_list, - LIST variable, - int_compare compare)); - -LIST search -_ARGS((LIST list, - LIST key, - int_compare is_equal)); - -#undef _ARGS -*/ -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/structures.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/structures.cpp deleted file mode 100644 index 559f4b69..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/structures.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: structures.cpp (Formerly structures.c) - * Description: Allocate all the different types of structures. - * Author: Mark Seaman, OCR Technology - * Created: Wed May 30 10:27:26 1990 - * Modified: Mon Jul 15 10:39:18 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1990, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "structures.h" - -#include - - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -makestructure(new_cell, free_cell, list_rec) diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/structures.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/structures.h deleted file mode 100644 index c82804c5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/cutil/structures.h +++ /dev/null @@ -1,49 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: structures.h - * Description: Allocate all the different types of structures. - * Author: Mark Seaman, OCR Technology - * - * (c) Copyright 1990, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -#ifndef STRUCTURES_H -#define STRUCTURES_H - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "oldlist.h" - -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ -/********************************************************************** - * makestructure - * - * Allocate a chunk of memory for a particular data type. This macro - * defines an allocation, deallocation, and status printing function - * for each new data type. - **********************************************************************/ - -#define makestructure(newfunc, old, type) \ - type* newfunc() { return new type; } \ - \ - void old(type* deadelement) { delete deadelement; } - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -extern LIST new_cell(); -extern void free_cell(LIST); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/Makefile.am b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/Makefile.am deleted file mode 100644 index f3a95446..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/Makefile.am +++ /dev/null @@ -1,21 +0,0 @@ -AM_CPPFLAGS += \ - -I$(top_srcdir)/src/cutil \ - -I$(top_srcdir)/src/ccutil \ - -I$(top_srcdir)/src/ccstruct \ - -I$(top_srcdir)/src/viewer - -if VISIBILITY -AM_CPPFLAGS += -DTESS_EXPORTS \ - -fvisibility=hidden -fvisibility-inlines-hidden -endif - -noinst_HEADERS = \ - dawg.h dawg_cache.h dict.h matchdefs.h \ - stopper.h trie.h - -noinst_LTLIBRARIES = libtesseract_dict.la - -libtesseract_dict_la_SOURCES = \ - context.cpp \ - dawg.cpp dawg_cache.cpp dict.cpp hyphen.cpp \ - permdawg.cpp stopper.cpp trie.cpp diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/context.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/context.cpp deleted file mode 100644 index dbdcaaf7..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/context.cpp +++ /dev/null @@ -1,82 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: context.cpp (Formerly context.c) - * Description: Context checking functions - * Author: Mark Seaman, OCR Technology - * Created: Thu Feb 15 11:18:24 1990 - * Modified: Tue Jul 9 17:38:16 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1990, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ - -#include "dict.h" -#include "tprintf.h" -#include "unicharset.h" - -namespace tesseract { - -static const int kMinAbsoluteGarbageWordLength = 10; -static const float kMinAbsoluteGarbageAlphanumFrac = 0.5f; - -const int case_state_table[6][4] = { - {/* 0. Beginning of word */ - /* P U L D */ - /* -1. Error on case */ - 0, 1, 5, 4}, - {/* 1. After initial capital */ - 0, 3, 2, 4}, - {/* 2. After lower case */ - 0, -1, 2, -1}, - {/* 3. After upper case */ - 0, 3, -1, 4}, - {/* 4. After a digit */ - 0, -1, -1, 4}, - {/* 5. After initial lower case */ - 5, -1, 2, -1}, -}; - -int Dict::case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) const { - int state = 0; - int x; - for (x = 0; x < word.length(); ++x) { - UNICHAR_ID ch_id = word.unichar_id(x); - if (unicharset.get_isupper(ch_id)) - state = case_state_table[state][1]; - else if (unicharset.get_islower(ch_id)) - state = case_state_table[state][2]; - else if (unicharset.get_isdigit(ch_id)) - state = case_state_table[state][3]; - else - state = case_state_table[state][0]; - if (state == -1) return false; - } - return state != 5; // single lower is bad -} - -bool Dict::absolute_garbage(const WERD_CHOICE &word, - const UNICHARSET &unicharset) { - if (word.length() < kMinAbsoluteGarbageWordLength) return false; - int num_alphanum = 0; - for (int x = 0; x < word.length(); ++x) { - num_alphanum += (unicharset.get_isalpha(word.unichar_id(x)) || - unicharset.get_isdigit(word.unichar_id(x))); - } - return (static_cast(num_alphanum) / - static_cast(word.length()) < kMinAbsoluteGarbageAlphanumFrac); -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dawg.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dawg.cpp deleted file mode 100644 index 864e60c8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dawg.cpp +++ /dev/null @@ -1,424 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: dawg.cpp (Formerly dawg.c) - * Description: Use a Directed Acyclic Word Graph - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Wed Jul 24 16:59:16 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ - -#include "dawg.h" - -#include "dict.h" -#include "emalloc.h" -#include "helpers.h" -#include "strngs.h" -#include "tesscallback.h" -#include "tprintf.h" - -#include - -/*---------------------------------------------------------------------- - F u n c t i o n s f o r D a w g -----------------------------------------------------------------------*/ -namespace tesseract { - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -Dawg::~Dawg() = default; - -bool Dawg::prefix_in_dawg(const WERD_CHOICE &word, - bool requires_complete) const { - if (word.length() == 0) return !requires_complete; - NODE_REF node = 0; - int end_index = word.length() - 1; - for (int i = 0; i < end_index; i++) { - EDGE_REF edge = edge_char_of(node, word.unichar_id(i), false); - if (edge == NO_EDGE) { - return false; - } - if ((node = next_node(edge)) == 0) { - // This only happens if all words following this edge terminate -- - // there are no larger words. See Trie::add_word_to_dawg() - return false; - } - } - // Now check the last character. - return edge_char_of(node, word.unichar_id(end_index), requires_complete) != - NO_EDGE; -} - -bool Dawg::word_in_dawg(const WERD_CHOICE &word) const { - return prefix_in_dawg(word, true); -} - -int Dawg::check_for_words(const char *filename, - const UNICHARSET &unicharset, - bool enable_wildcard) const { - if (filename == nullptr) return 0; - - FILE *word_file; - char string [CHARS_PER_LINE]; - int misses = 0; - UNICHAR_ID wildcard = unicharset.unichar_to_id(kWildcard); - - word_file = fopen(filename, "r"); - if (word_file == nullptr) { - tprintf("Error: Could not open file %s\n", filename); - ASSERT_HOST(word_file); - } - - while (fgets (string, CHARS_PER_LINE, word_file) != nullptr) { - chomp_string(string); // remove newline - WERD_CHOICE word(string, unicharset); - if (word.length() > 0 && - !word.contains_unichar_id(INVALID_UNICHAR_ID)) { - if (!match_words(&word, 0, 0, - enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) { - tprintf("Missing word: %s\n", string); - ++misses; - } - } else { - tprintf("Failed to create a valid word from %s\n", string); - } - } - fclose (word_file); - // Make sure the user sees this with fprintf instead of tprintf. - if (debug_level_) tprintf("Number of lost words=%d\n", misses); - return misses; -} - -void Dawg::iterate_words(const UNICHARSET &unicharset, - TessCallback1 *cb) const { - WERD_CHOICE word(&unicharset); - iterate_words_rec(word, 0, cb); -} - -static void CallWithUTF8(TessCallback1 *cb, - const WERD_CHOICE *wc) { - STRING s; - wc->string_and_lengths(&s, nullptr); - cb->Run(s.string()); -} - -void Dawg::iterate_words(const UNICHARSET &unicharset, - TessCallback1 *cb) const { - std::unique_ptr> shim( - NewPermanentTessCallback(CallWithUTF8, cb)); - WERD_CHOICE word(&unicharset); - iterate_words_rec(word, 0, shim.get()); -} - -void Dawg::iterate_words_rec(const WERD_CHOICE &word_so_far, - NODE_REF to_explore, - TessCallback1 *cb) const { - NodeChildVector children; - this->unichar_ids_of(to_explore, &children, false); - for (int i = 0; i < children.size(); i++) { - WERD_CHOICE next_word(word_so_far); - next_word.append_unichar_id(children[i].unichar_id, 1, 0.0, 0.0); - if (this->end_of_word(children[i].edge_ref)) { - cb->Run(&next_word); - } - NODE_REF next = next_node(children[i].edge_ref); - if (next != 0) { - iterate_words_rec(next_word, next, cb); - } - } -} - -bool Dawg::match_words(WERD_CHOICE *word, int32_t index, - NODE_REF node, UNICHAR_ID wildcard) const { - EDGE_REF edge; - int32_t word_end; - - if (wildcard != INVALID_UNICHAR_ID && word->unichar_id(index) == wildcard) { - bool any_matched = false; - NodeChildVector vec; - this->unichar_ids_of(node, &vec, false); - for (int i = 0; i < vec.size(); ++i) { - word->set_unichar_id(vec[i].unichar_id, index); - if (match_words(word, index, node, wildcard)) - any_matched = true; - } - word->set_unichar_id(wildcard, index); - return any_matched; - } else { - word_end = index == word->length() - 1; - edge = edge_char_of(node, word->unichar_id(index), word_end); - if (edge != NO_EDGE) { // normal edge in DAWG - node = next_node(edge); - if (word_end) { - if (debug_level_ > 1) word->print("match_words() found: "); - return true; - } else if (node != 0) { - return match_words(word, index+1, node, wildcard); - } - } - } - return false; -} - -void Dawg::init(int unicharset_size) { - ASSERT_HOST(unicharset_size > 0); - unicharset_size_ = unicharset_size; - // Set bit masks. We will use the value unicharset_size_ as a null char, so - // the actual number of unichars is unicharset_size_ + 1. - flag_start_bit_ = ceil(log(unicharset_size_ + 1.0) / log(2.0)); - next_node_start_bit_ = flag_start_bit_ + NUM_FLAG_BITS; - letter_mask_ = ~(~0ull << flag_start_bit_); - next_node_mask_ = ~0ull << (flag_start_bit_ + NUM_FLAG_BITS); - flags_mask_ = ~(letter_mask_ | next_node_mask_); -} - - -/*---------------------------------------------------------------------- - F u n c t i o n s f o r S q u i s h e d D a w g -----------------------------------------------------------------------*/ - -SquishedDawg::~SquishedDawg() { delete[] edges_; } - -EDGE_REF SquishedDawg::edge_char_of(NODE_REF node, - UNICHAR_ID unichar_id, - bool word_end) const { - EDGE_REF edge = node; - if (node == 0) { // binary search - EDGE_REF start = 0; - EDGE_REF end = num_forward_edges_in_node0 - 1; - int compare; - while (start <= end) { - edge = (start + end) >> 1; // (start + end) / 2 - compare = given_greater_than_edge_rec(NO_EDGE, word_end, - unichar_id, edges_[edge]); - if (compare == 0) { // given == vec[k] - return edge; - } else if (compare == 1) { // given > vec[k] - start = edge + 1; - } else { // given < vec[k] - end = edge - 1; - } - } - } else { // linear search - if (edge != NO_EDGE && edge_occupied(edge)) { - do { - if ((unichar_id_from_edge_rec(edges_[edge]) == unichar_id) && - (!word_end || end_of_word_from_edge_rec(edges_[edge]))) - return (edge); - } while (!last_edge(edge++)); - } - } - return (NO_EDGE); // not found -} - -int32_t SquishedDawg::num_forward_edges(NODE_REF node) const { - EDGE_REF edge = node; - int32_t num = 0; - - if (forward_edge (edge)) { - do { - num++; - } while (!last_edge(edge++)); - } - - return (num); -} - -void SquishedDawg::print_node(NODE_REF node, int max_num_edges) const { - if (node == NO_EDGE) return; // nothing to print - - EDGE_REF edge = node; - const char *forward_string = "FORWARD"; - const char *backward_string = " "; - - const char *last_string = "LAST"; - const char *not_last_string = " "; - - const char *eow_string = "EOW"; - const char *not_eow_string = " "; - - const char *direction; - const char *is_last; - const char *eow; - - UNICHAR_ID unichar_id; - - if (edge_occupied(edge)) { - do { - direction = - forward_edge(edge) ? forward_string : backward_string; - is_last = last_edge(edge) ? last_string : not_last_string; - eow = end_of_word(edge) ? eow_string : not_eow_string; - - unichar_id = edge_letter(edge); - tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = %d, %s %s %s\n", - edge, next_node(edge), unichar_id, - direction, is_last, eow); - - if (edge - node > max_num_edges) return; - } while (!last_edge(edge++)); - - if (edge < num_edges_ && - edge_occupied(edge) && backward_edge(edge)) { - do { - direction = - forward_edge(edge) ? forward_string : backward_string; - is_last = last_edge(edge) ? last_string : not_last_string; - eow = end_of_word(edge) ? eow_string : not_eow_string; - - unichar_id = edge_letter(edge); - tprintf(REFFORMAT " : next = " REFFORMAT - ", unichar_id = %d, %s %s %s\n", - edge, next_node(edge), unichar_id, - direction, is_last, eow); - - if (edge - node > MAX_NODE_EDGES_DISPLAY) return; - } while (!last_edge(edge++)); - } - } - else { - tprintf(REFFORMAT " : no edges in this node\n", node); - } - tprintf("\n"); -} - -void SquishedDawg::print_edge(EDGE_REF edge) const { - if (edge == NO_EDGE) { - tprintf("NO_EDGE\n"); - } else { - tprintf(REFFORMAT " : next = " REFFORMAT - ", unichar_id = '%d', %s %s %s\n", edge, - next_node(edge), edge_letter(edge), - (forward_edge(edge) ? "FORWARD" : " "), - (last_edge(edge) ? "LAST" : " "), - (end_of_word(edge) ? "EOW" : "")); - } -} - -bool SquishedDawg::read_squished_dawg(TFile *file) { - if (debug_level_) tprintf("Reading squished dawg\n"); - - // Read the magic number and check that it matches kDawgMagicNumber, as - // auto-endian fixing should make sure it is always correct. - int16_t magic; - if (!file->DeSerialize(&magic)) return false; - if (magic != kDawgMagicNumber) { - tprintf("Bad magic number on dawg: %d vs %d\n", magic, kDawgMagicNumber); - return false; - } - - int32_t unicharset_size; - if (!file->DeSerialize(&unicharset_size)) return false; - if (!file->DeSerialize(&num_edges_)) return false; - ASSERT_HOST(num_edges_ > 0); // DAWG should not be empty - Dawg::init(unicharset_size); - - edges_ = new EDGE_RECORD[num_edges_]; - if (!file->DeSerialize(&edges_[0], num_edges_)) return false; - if (debug_level_ > 2) { - tprintf("type: %d lang: %s perm: %d unicharset_size: %d num_edges: %d\n", - type_, lang_.string(), perm_, unicharset_size_, num_edges_); - for (EDGE_REF edge = 0; edge < num_edges_; ++edge) print_edge(edge); - } - return true; -} - -std::unique_ptr SquishedDawg::build_node_map( - int32_t *num_nodes) const { - EDGE_REF edge; - std::unique_ptr node_map(new EDGE_REF[num_edges_]); - int32_t node_counter; - int32_t num_edges; - - for (edge = 0; edge < num_edges_; edge++) // init all slots - node_map[edge] = -1; - - node_counter = num_forward_edges(0); - - *num_nodes = 0; - for (edge = 0; edge < num_edges_; edge++) { // search all slots - - if (forward_edge(edge)) { - (*num_nodes)++; // count nodes links - node_map[edge] = (edge ? node_counter : 0); - num_edges = num_forward_edges(edge); - if (edge != 0) node_counter += num_edges; - edge += num_edges; - if (edge >= num_edges_) break; - if (backward_edge(edge)) while (!last_edge(edge++)); - edge--; - } - } - return node_map; -} - -bool SquishedDawg::write_squished_dawg(TFile *file) { - EDGE_REF edge; - int32_t num_edges; - int32_t node_count = 0; - EDGE_REF old_index; - EDGE_RECORD temp_record; - - if (debug_level_) tprintf("write_squished_dawg\n"); - - std::unique_ptr node_map(build_node_map(&node_count)); - - // Write the magic number to help detecting a change in endianness. - int16_t magic = kDawgMagicNumber; - if (!file->Serialize(&magic)) return false; - if (!file->Serialize(&unicharset_size_)) return false; - - // Count the number of edges in this Dawg. - num_edges = 0; - for (edge=0; edge < num_edges_; edge++) - if (forward_edge(edge)) - num_edges++; - - // Write edge count to file. - if (!file->Serialize(&num_edges)) return false; - - if (debug_level_) { - tprintf("%d nodes in DAWG\n", node_count); - tprintf("%d edges in DAWG\n", num_edges); - } - - for (edge = 0; edge < num_edges_; edge++) { - if (forward_edge(edge)) { // write forward edges - do { - old_index = next_node_from_edge_rec(edges_[edge]); - set_next_node(edge, node_map[old_index]); - temp_record = edges_[edge]; - if (!file->Serialize(&temp_record)) return false; - set_next_node(edge, old_index); - } while (!last_edge(edge++)); - - if (edge >= num_edges_) break; - if (backward_edge(edge)) // skip back links - while (!last_edge(edge++)); - - edge--; - } - } - return true; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dawg.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dawg.h deleted file mode 100644 index 0d3c2f61..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dawg.h +++ /dev/null @@ -1,567 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: dawg.h - * Description: Definition of a class that represents Directed Acyclic Word - * Graph (DAWG), functions to build and manipulate the DAWG. - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Wed Jun 19 16:50:24 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ - -#ifndef DICT_DAWG_H_ -#define DICT_DAWG_H_ - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ - -#include -#include "elst.h" -#include "params.h" -#include "ratngs.h" -#include "tesscallback.h" - -#ifndef __GNUC__ -#ifdef _WIN32 -#define NO_EDGE (int64_t) 0xffffffffffffffffi64 -#endif /*_WIN32*/ -#else -#define NO_EDGE (int64_t) 0xffffffffffffffffll -#endif /*__GNUC__*/ - -/*---------------------------------------------------------------------- - T y p e s -----------------------------------------------------------------------*/ -class UNICHARSET; - -using EDGE_RECORD = uint64_t; -using EDGE_ARRAY = EDGE_RECORD *; -using EDGE_REF = int64_t; -using NODE_REF = int64_t; -using NODE_MAP = EDGE_REF *; - -namespace tesseract { - -struct NodeChild { - UNICHAR_ID unichar_id; - EDGE_REF edge_ref; - NodeChild(UNICHAR_ID id, EDGE_REF ref): unichar_id(id), edge_ref(ref) {} - NodeChild(): unichar_id(INVALID_UNICHAR_ID), edge_ref(NO_EDGE) {} -}; - -using NodeChildVector = GenericVector; -using SuccessorList = GenericVector; -using SuccessorListsVector = GenericVector; - -enum DawgType { - DAWG_TYPE_PUNCTUATION, - DAWG_TYPE_WORD, - DAWG_TYPE_NUMBER, - DAWG_TYPE_PATTERN, - - DAWG_TYPE_COUNT // number of enum entries -}; - -/*---------------------------------------------------------------------- - C o n s t a n t s -----------------------------------------------------------------------*/ - -#define FORWARD_EDGE (int32_t) 0 -#define BACKWARD_EDGE (int32_t) 1 -#define MAX_NODE_EDGES_DISPLAY (int64_t) 100 -#define MARKER_FLAG (int64_t) 1 -#define DIRECTION_FLAG (int64_t) 2 -#define WERD_END_FLAG (int64_t) 4 -#define LETTER_START_BIT 0 -#define NUM_FLAG_BITS 3 -#define REFFORMAT "%" PRId64 - -static const bool kDawgSuccessors[DAWG_TYPE_COUNT][DAWG_TYPE_COUNT] = { - { 0, 1, 1, 0 }, // for DAWG_TYPE_PUNCTUATION - { 1, 0, 0, 0 }, // for DAWG_TYPE_WORD - { 1, 0, 0, 0 }, // for DAWG_TYPE_NUMBER - { 0, 0, 0, 0 }, // for DAWG_TYPE_PATTERN -}; - -static const char kWildcard[] = "*"; - - -/*---------------------------------------------------------------------- - C l a s s e s a n d S t r u c t s -----------------------------------------------------------------------*/ -// -/// Abstract class (an interface) that declares methods needed by the -/// various tesseract classes to operate on SquishedDawg and Trie objects. -/// -/// This class initializes all the edge masks (since their usage by -/// SquishedDawg and Trie is identical) and implements simple accessors -/// for each of the fields encoded in an EDGE_RECORD. -/// This class also implements word_in_dawg() and check_for_words() -/// (since they use only the public methods of SquishedDawg and Trie -/// classes that are inherited from the Dawg base class). -// -class Dawg { - public: - /// Magic number to determine endianness when reading the Dawg from file. - static const int16_t kDawgMagicNumber = 42; - /// A special unichar id that indicates that any appropriate pattern - /// (e.g.dicitonary word, 0-9 digit, etc) can be inserted instead - /// Used for expressing patterns in punctuation and number Dawgs. - static const UNICHAR_ID kPatternUnicharID = 0; - - inline DawgType type() const { return type_; } - inline const STRING &lang() const { return lang_; } - inline PermuterType permuter() const { return perm_; } - - virtual ~Dawg(); - - /// Returns true if the given word is in the Dawg. - bool word_in_dawg(const WERD_CHOICE &word) const; - - // Returns true if the given word prefix is not contraindicated by the dawg. - // If requires_complete is true, then the exact complete word must be present. - bool prefix_in_dawg(const WERD_CHOICE &prefix, bool requires_complete) const; - - /// Checks the Dawg for the words that are listed in the requested file. - /// Returns the number of words in the given file missing from the Dawg. - int check_for_words(const char *filename, - const UNICHARSET &unicharset, - bool enable_wildcard) const; - - // For each word in the Dawg, call the given (permanent) callback with the - // text (UTF-8) version of the word. - void iterate_words(const UNICHARSET &unicharset, - TessCallback1 *cb) const; - - // For each word in the Dawg, call the given (permanent) callback with the - // text (UTF-8) version of the word. - void iterate_words(const UNICHARSET &unicharset, - TessCallback1 *cb) const; - - // Pure virtual function that should be implemented by the derived classes. - - /// Returns the edge that corresponds to the letter out of this node. - virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, - bool word_end) const = 0; - - /// Fills the given NodeChildVector with all the unichar ids (and the - /// corresponding EDGE_REFs) for which there is an edge out of this node. - virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, - bool word_end) const = 0; - - /// Returns the next node visited by following the edge - /// indicated by the given EDGE_REF. - virtual NODE_REF next_node(EDGE_REF edge_ref) const = 0; - - /// Returns true if the edge indicated by the given EDGE_REF - /// marks the end of a word. - virtual bool end_of_word(EDGE_REF edge_ref) const = 0; - - /// Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF. - virtual UNICHAR_ID edge_letter(EDGE_REF edge_ref) const = 0; - - /// Prints the contents of the node indicated by the given NODE_REF. - /// At most max_num_edges will be printed. - virtual void print_node(NODE_REF node, int max_num_edges) const = 0; - - /// Fills vec with unichar ids that represent the character classes - /// of the given unichar_id. - virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, - const UNICHARSET &unicharset, - GenericVector *vec) const { - (void)unichar_id; - (void)unicharset; - (void)vec; - } - - /// Returns the given EDGE_REF if the EDGE_RECORD that it points to has - /// a self loop and the given unichar_id matches the unichar_id stored in the - /// EDGE_RECORD, returns NO_EDGE otherwise. - virtual EDGE_REF pattern_loop_edge( - EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const { - (void)edge_ref; - (void)unichar_id; - (void)word_end; - return false; - } - - protected: - Dawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level) - : type_(type), - lang_(lang), - perm_(perm), - unicharset_size_(0), - debug_level_(debug_level) {} - - /// Returns the next node visited by following this edge. - inline NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const { - return ((edge_rec & next_node_mask_) >> next_node_start_bit_); - } - /// Returns the marker flag of this edge. - inline bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const { - return (edge_rec & (MARKER_FLAG << flag_start_bit_)) != 0; - } - /// Returns the direction flag of this edge. - inline int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const { - return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ? - BACKWARD_EDGE : FORWARD_EDGE; - } - /// Returns true if this edge marks the end of a word. - inline bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const { - return (edge_rec & (WERD_END_FLAG << flag_start_bit_)) != 0; - } - /// Returns UNICHAR_ID recorded in this edge. - inline UNICHAR_ID unichar_id_from_edge_rec( - const EDGE_RECORD &edge_rec) const { - return ((edge_rec & letter_mask_) >> LETTER_START_BIT); - } - /// Sets the next node link for this edge in the Dawg. - inline void set_next_node_in_edge_rec( - EDGE_RECORD *edge_rec, EDGE_REF value) { - *edge_rec &= (~next_node_mask_); - *edge_rec |= ((value << next_node_start_bit_) & next_node_mask_); - } - /// Sets this edge record to be the last one in a sequence of edges. - inline void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec) { - *edge_rec |= (MARKER_FLAG << flag_start_bit_); - } - /// Sequentially compares the given values of unichar ID, next node - /// and word end marker with the values in the given EDGE_RECORD. - /// Returns: 1 if at any step the given input value exceeds - /// that of edge_rec (and all the values already - /// checked are the same) - /// 0 if edge_rec_match() returns true - /// -1 otherwise - inline int given_greater_than_edge_rec(NODE_REF next_node, - bool word_end, - UNICHAR_ID unichar_id, - const EDGE_RECORD &edge_rec) const { - UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(edge_rec); - NODE_REF curr_next_node = next_node_from_edge_rec(edge_rec); - bool curr_word_end = end_of_word_from_edge_rec(edge_rec); - if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node, - curr_word_end, curr_unichar_id)) return 0; - if (unichar_id > curr_unichar_id) return 1; - if (unichar_id == curr_unichar_id) { - if (next_node > curr_next_node) return 1; - if (next_node == curr_next_node) { - if (word_end > curr_word_end) return 1; - } - } - return -1; - } - /// Returns true if all the values are equal (any value matches - /// next_node if next_node == NO_EDGE, any value matches word_end - /// if word_end is false). - inline bool edge_rec_match(NODE_REF next_node, - bool word_end, - UNICHAR_ID unichar_id, - NODE_REF other_next_node, - bool other_word_end, - UNICHAR_ID other_unichar_id) const { - return ((unichar_id == other_unichar_id) && - (next_node == NO_EDGE || next_node == other_next_node) && - (!word_end || (word_end == other_word_end))); - } - - /// Sets unicharset_size_. - /// Initializes the values of various masks from unicharset_size_. - void init(int unicharset_size); - - /// Matches all of the words that are represented by this string. - /// If wildcard is set to something other than INVALID_UNICHAR_ID, - /// the *'s in this string are interpreted as wildcards. - /// WERD_CHOICE param is not passed by const so that wildcard searches - /// can modify it and work without having to copy WERD_CHOICEs. - bool match_words(WERD_CHOICE *word, int32_t index, - NODE_REF node, UNICHAR_ID wildcard) const; - - // Recursively iterate over all words in a dawg (see public iterate_words). - void iterate_words_rec(const WERD_CHOICE &word_so_far, - NODE_REF to_explore, - TessCallback1 *cb) const; - - // Member Variables. - DawgType type_; - STRING lang_; - /// Permuter code that should be used if the word is found in this Dawg. - PermuterType perm_; - // Variables to construct various edge masks. Formerly: - // #define NEXT_EDGE_MASK (int64_t) 0xfffffff800000000i64 - // #define FLAGS_MASK (int64_t) 0x0000000700000000i64 - // #define LETTER_MASK (int64_t) 0x00000000ffffffffi64 - int unicharset_size_; - int flag_start_bit_; - int next_node_start_bit_; - uint64_t next_node_mask_; - uint64_t flags_mask_; - uint64_t letter_mask_; - // Level of debug statements to print to stdout. - int debug_level_; -}; - -// -// DawgPosition keeps track of where we are in the primary dawg we're searching -// as well as where we may be in the "punctuation dawg" which may provide -// surrounding context. -// -// Example: -// punctuation dawg -- space is the "pattern character" -// " " // no punctuation -// "' '" // leading and trailing apostrophes -// " '" // trailing apostrophe -// word dawg: -// "cat" -// "cab" -// "cat's" -// -// DawgPosition(dawg_index, dawg_ref, punc_index, punc_ref, rtp) -// -// DawgPosition(-1, NO_EDGE, p, pe, false) -// We're in the punctuation dawg, no other dawg has been started. -// (1) If there's a pattern edge as a punc dawg child of us, -// for each punc-following dawg starting with ch, produce: -// Result: DawgPosition(k, w, p', false) -// (2) If there's a valid continuation in the punc dawg, produce: -// Result: DawgPosition(-k, NO_EDGE, p', false) -// -// DawgPosition(k, w, -1, NO_EDGE, false) -// We're in dawg k. Going back to punctuation dawg is not an option. -// Follow ch in dawg k. -// -// DawgPosition(k, w, p, pe, false) -// We're in dawg k. Continue in dawg k and/or go back to the punc dawg. -// If ending, check that the punctuation dawg is also ok to end here. -// -// DawgPosition(k, w, p, pe true) -// We're back in the punctuation dawg. Continuing there is the only option. -struct DawgPosition { - DawgPosition() - : dawg_index(-1), dawg_ref(NO_EDGE), punc_ref(NO_EDGE), - back_to_punc(false) {} - DawgPosition(int dawg_idx, EDGE_REF dawgref, - int punc_idx, EDGE_REF puncref, - bool backtopunc) - : dawg_index(dawg_idx), dawg_ref(dawgref), - punc_index(punc_idx), punc_ref(puncref), - back_to_punc(backtopunc) { - } - bool operator==(const DawgPosition &other) { - return dawg_index == other.dawg_index && - dawg_ref == other.dawg_ref && - punc_index == other.punc_index && - punc_ref == other.punc_ref && - back_to_punc == other.back_to_punc; - } - - int8_t dawg_index; - EDGE_REF dawg_ref; - int8_t punc_index; - EDGE_REF punc_ref; - // Have we returned to the punc dawg at the end of the word? - bool back_to_punc; -}; - -class DawgPositionVector : public GenericVector { - public: - /// Overload clear() in order to avoid allocating/deallocating memory - /// when clearing the vector and re-inserting entries into it later. - void clear() { size_used_ = 0; } - /// Adds an entry for the given dawg_index with the given node to the vec. - /// Returns false if the same entry already exists in the vector, - /// true otherwise. - inline bool add_unique(const DawgPosition &new_pos, - bool debug, - const char *debug_msg) { - for (int i = 0; i < size_used_; ++i) { - if (data_[i] == new_pos) return false; - } - push_back(new_pos); - if (debug) { - tprintf("%s[%d, " REFFORMAT "] [punc: " REFFORMAT "%s]\n", - debug_msg, new_pos.dawg_index, new_pos.dawg_ref, - new_pos.punc_ref, new_pos.back_to_punc ? " returned" : ""); - } - return true; - } -}; - -// -/// Concrete class that can operate on a compacted (squished) Dawg (read, -/// search and write to file). This class is read-only in the sense that -/// new words can not be added to an instance of SquishedDawg. -/// The underlying representation of the nodes and edges in SquishedDawg -/// is stored as a contiguous EDGE_ARRAY (read from file or given as an -/// argument to the constructor). -// -class SquishedDawg : public Dawg { - public: - SquishedDawg(DawgType type, const STRING &lang, PermuterType perm, - int debug_level) - : Dawg(type, lang, perm, debug_level) {} - SquishedDawg(const char *filename, DawgType type, const STRING &lang, - PermuterType perm, int debug_level) - : Dawg(type, lang, perm, debug_level) { - TFile file; - ASSERT_HOST(file.Open(filename, nullptr)); - ASSERT_HOST(read_squished_dawg(&file)); - num_forward_edges_in_node0 = num_forward_edges(0); - } - SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type, - const STRING &lang, PermuterType perm, int unicharset_size, - int debug_level) - : Dawg(type, lang, perm, debug_level), - edges_(edges), - num_edges_(num_edges) { - init(unicharset_size); - num_forward_edges_in_node0 = num_forward_edges(0); - if (debug_level > 3) print_all("SquishedDawg:"); - } - virtual ~SquishedDawg(); - - // Loads using the given TFile. Returns false on failure. - bool Load(TFile *fp) { - if (!read_squished_dawg(fp)) return false; - num_forward_edges_in_node0 = num_forward_edges(0); - return true; - } - - int NumEdges() { return num_edges_; } - - /// Returns the edge that corresponds to the letter out of this node. - EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, - bool word_end) const; - - /// Fills the given NodeChildVector with all the unichar ids (and the - /// corresponding EDGE_REFs) for which there is an edge out of this node. - void unichar_ids_of(NODE_REF node, NodeChildVector *vec, - bool word_end) const { - EDGE_REF edge = node; - if (!edge_occupied(edge) || edge == NO_EDGE) return; - assert(forward_edge(edge)); // we don't expect any backward edges to - do { // be present when this function is called - if (!word_end || end_of_word_from_edge_rec(edges_[edge])) { - vec->push_back(NodeChild(unichar_id_from_edge_rec(edges_[edge]), edge)); - } - } while (!last_edge(edge++)); - } - - /// Returns the next node visited by following the edge - /// indicated by the given EDGE_REF. - NODE_REF next_node(EDGE_REF edge) const { - return next_node_from_edge_rec((edges_[edge])); - } - - /// Returns true if the edge indicated by the given EDGE_REF - /// marks the end of a word. - bool end_of_word(EDGE_REF edge_ref) const { - return end_of_word_from_edge_rec((edges_[edge_ref])); - } - - /// Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF. - UNICHAR_ID edge_letter(EDGE_REF edge_ref) const { - return unichar_id_from_edge_rec((edges_[edge_ref])); - } - - /// Prints the contents of the node indicated by the given NODE_REF. - /// At most max_num_edges will be printed. - void print_node(NODE_REF node, int max_num_edges) const; - - /// Writes the squished/reduced Dawg to a file. - bool write_squished_dawg(TFile *file); - - /// Opens the file with the given filename and writes the - /// squished/reduced Dawg to the file. - bool write_squished_dawg(const char *filename) { - TFile file; - file.OpenWrite(nullptr); - if (!this->write_squished_dawg(&file)) { - tprintf("Error serializing %s\n", filename); - return false; - } - if (!file.CloseWrite(filename, nullptr)) { - tprintf("Error writing file %s\n", filename); - return false; - } - return true; - } - - private: - /// Sets the next node link for this edge. - inline void set_next_node(EDGE_REF edge_ref, EDGE_REF value) { - set_next_node_in_edge_rec(&(edges_[edge_ref]), value); - } - /// Sets the edge to be empty. - inline void set_empty_edge(EDGE_REF edge_ref) { - (edges_[edge_ref] = next_node_mask_); - } - /// Goes through all the edges and clears each one out. - inline void clear_all_edges() { - for (int edge = 0; edge < num_edges_; edge++) set_empty_edge(edge); - } - /// Clears the last flag of this edge. - inline void clear_marker_flag(EDGE_REF edge_ref) { - (edges_[edge_ref] &= ~(MARKER_FLAG << flag_start_bit_)); - } - /// Returns true if this edge is in the forward direction. - inline bool forward_edge(EDGE_REF edge_ref) const { - return (edge_occupied(edge_ref) && - (FORWARD_EDGE == direction_from_edge_rec(edges_[edge_ref]))); - } - /// Returns true if this edge is in the backward direction. - inline bool backward_edge(EDGE_REF edge_ref) const { - return (edge_occupied(edge_ref) && - (BACKWARD_EDGE == direction_from_edge_rec(edges_[edge_ref]))); - } - /// Returns true if the edge spot in this location is occupied. - inline bool edge_occupied(EDGE_REF edge_ref) const { - return (edges_[edge_ref] != next_node_mask_); - } - /// Returns true if this edge is the last edge in a sequence. - inline bool last_edge(EDGE_REF edge_ref) const { - return (edges_[edge_ref] & (MARKER_FLAG << flag_start_bit_)) != 0; - } - - /// Counts and returns the number of forward edges in this node. - int32_t num_forward_edges(NODE_REF node) const; - - /// Reads SquishedDawg from a file. - bool read_squished_dawg(TFile *file); - - /// Prints the contents of an edge indicated by the given EDGE_REF. - void print_edge(EDGE_REF edge) const; - - /// Prints the contents of the SquishedDawg. - void print_all(const char* msg) { - tprintf("\n__________________________\n%s\n", msg); - for (int i = 0; i < num_edges_; ++i) print_edge(i); - tprintf("__________________________\n"); - } - /// Constructs a mapping from the memory node indices to disk node indices. - std::unique_ptr build_node_map(int32_t *num_nodes) const; - - // Member variables. - EDGE_ARRAY edges_; - int32_t num_edges_; - int num_forward_edges_in_node0; -}; - -} // namespace tesseract - -#endif // DICT_DAWG_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dawg_cache.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dawg_cache.cpp deleted file mode 100644 index 59b2118c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dawg_cache.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: dawg_cache.cpp -// Description: A class that knows about loading and caching dawgs. -// Author: David Eger -// Created: Fri Jan 27 12:08:00 PST 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "dawg_cache.h" - -#include "dawg.h" -#include "object_cache.h" -#include "strngs.h" -#include "tessdatamanager.h" - -namespace tesseract { - -struct DawgLoader { - DawgLoader(const STRING &lang, TessdataType tessdata_dawg_type, - int dawg_debug_level, TessdataManager *data_file) - : lang_(lang), - data_file_(data_file), - tessdata_dawg_type_(tessdata_dawg_type), - dawg_debug_level_(dawg_debug_level) {} - - Dawg *Load(); - - STRING lang_; - TessdataManager *data_file_; - TessdataType tessdata_dawg_type_; - int dawg_debug_level_; -}; - -Dawg *DawgCache::GetSquishedDawg(const STRING &lang, - TessdataType tessdata_dawg_type, - int debug_level, TessdataManager *data_file) { - STRING data_id = data_file->GetDataFileName(); - data_id += kTessdataFileSuffixes[tessdata_dawg_type]; - DawgLoader loader(lang, tessdata_dawg_type, debug_level, data_file); - return dawgs_.Get(data_id, NewTessCallback(&loader, &DawgLoader::Load)); -} - -Dawg *DawgLoader::Load() { - TFile fp; - if (!data_file_->GetComponent(tessdata_dawg_type_, &fp)) return nullptr; - DawgType dawg_type; - PermuterType perm_type; - switch (tessdata_dawg_type_) { - case TESSDATA_PUNC_DAWG: - case TESSDATA_LSTM_PUNC_DAWG: - dawg_type = DAWG_TYPE_PUNCTUATION; - perm_type = PUNC_PERM; - break; - case TESSDATA_SYSTEM_DAWG: - case TESSDATA_LSTM_SYSTEM_DAWG: - dawg_type = DAWG_TYPE_WORD; - perm_type = SYSTEM_DAWG_PERM; - break; - case TESSDATA_NUMBER_DAWG: - case TESSDATA_LSTM_NUMBER_DAWG: - dawg_type = DAWG_TYPE_NUMBER; - perm_type = NUMBER_PERM; - break; - case TESSDATA_BIGRAM_DAWG: - dawg_type = DAWG_TYPE_WORD; // doesn't actually matter - perm_type = COMPOUND_PERM; // doesn't actually matter - break; - case TESSDATA_UNAMBIG_DAWG: - dawg_type = DAWG_TYPE_WORD; - perm_type = SYSTEM_DAWG_PERM; - break; - case TESSDATA_FREQ_DAWG: - dawg_type = DAWG_TYPE_WORD; - perm_type = FREQ_DAWG_PERM; - break; - default: - return nullptr; - } - SquishedDawg *retval = - new SquishedDawg(dawg_type, lang_, perm_type, dawg_debug_level_); - if (retval->Load(&fp)) return retval; - delete retval; - return nullptr; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dawg_cache.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dawg_cache.h deleted file mode 100644 index 83233802..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dawg_cache.h +++ /dev/null @@ -1,53 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: dawg_cache.h -// Description: A class that knows about loading and caching dawgs. -// Author: David Eger -// Created: Fri Jan 27 12:08:00 PST 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_DICT_DAWG_CACHE_H_ -#define TESSERACT_DICT_DAWG_CACHE_H_ - -#include "dawg.h" -#include "object_cache.h" -#include "strngs.h" -#include "tessdatamanager.h" - -namespace tesseract { - -class DawgCache { - public: - Dawg *GetSquishedDawg(const STRING &lang, TessdataType tessdata_dawg_type, - int debug_level, TessdataManager *data_file); - - // If we manage the given dawg, decrement its count, - // and possibly delete it if the count reaches zero. - // If dawg is unknown to us, return false. - bool FreeDawg(Dawg *dawg) { - return dawgs_.Free(dawg); - } - - // Free up any currently unused dawgs. - void DeleteUnusedDawgs() { - dawgs_.DeleteUnusedObjects(); - } - - private: - ObjectCache dawgs_; -}; - -} // namespace tesseract - -#endif // TESSERACT_DICT_DAWG_CACHE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dict.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dict.cpp deleted file mode 100644 index 9ab542ef..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dict.cpp +++ /dev/null @@ -1,865 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: dict.cpp -// Description: dict class. -// Author: Samuel Charron -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include - -#include "dict.h" -#include "unicodes.h" - -#include "tprintf.h" - -namespace tesseract { - -class Image; - -Dict::Dict(CCUtil *ccutil) - : letter_is_okay_(&tesseract::Dict::def_letter_is_okay), - probability_in_context_(&tesseract::Dict::def_probability_in_context), - params_model_classify_(nullptr), - ccutil_(ccutil), - wildcard_unichar_id_(INVALID_UNICHAR_ID), - apostrophe_unichar_id_(INVALID_UNICHAR_ID), - question_unichar_id_(INVALID_UNICHAR_ID), - slash_unichar_id_(INVALID_UNICHAR_ID), - hyphen_unichar_id_(INVALID_UNICHAR_ID), - STRING_MEMBER(user_words_file, "", "A filename of user-provided words.", - getCCUtil()->params()), - STRING_INIT_MEMBER(user_words_suffix, "", - "A suffix of user-provided words located in tessdata.", - getCCUtil()->params()), - STRING_MEMBER(user_patterns_file, "", - "A filename of user-provided patterns.", - getCCUtil()->params()), - STRING_INIT_MEMBER(user_patterns_suffix, "", - "A suffix of user-provided patterns located in " - "tessdata.", - getCCUtil()->params()), - BOOL_INIT_MEMBER(load_system_dawg, true, "Load system word dawg.", - getCCUtil()->params()), - BOOL_INIT_MEMBER(load_freq_dawg, true, "Load frequent word dawg.", - getCCUtil()->params()), - BOOL_INIT_MEMBER(load_unambig_dawg, true, "Load unambiguous word dawg.", - getCCUtil()->params()), - BOOL_INIT_MEMBER(load_punc_dawg, true, - "Load dawg with punctuation" - " patterns.", - getCCUtil()->params()), - BOOL_INIT_MEMBER(load_number_dawg, true, - "Load dawg with number" - " patterns.", - getCCUtil()->params()), - BOOL_INIT_MEMBER(load_bigram_dawg, true, - "Load dawg with special word " - "bigrams.", - getCCUtil()->params()), - double_MEMBER(xheight_penalty_subscripts, 0.125, - "Score penalty (0.1 = 10%) added if there are subscripts " - "or superscripts in a word, but it is otherwise OK.", - getCCUtil()->params()), - double_MEMBER(xheight_penalty_inconsistent, 0.25, - "Score penalty (0.1 = 10%) added if an xheight is " - "inconsistent.", - getCCUtil()->params()), - double_MEMBER(segment_penalty_dict_frequent_word, 1.0, - "Score multiplier for word matches which have good case and" - " are frequent in the given language (lower is better).", - getCCUtil()->params()), - double_MEMBER(segment_penalty_dict_case_ok, 1.1, - "Score multiplier for word matches that have good case " - "(lower is better).", - getCCUtil()->params()), - double_MEMBER(segment_penalty_dict_case_bad, 1.3125, - "Default score multiplier for word matches, which may have " - "case issues (lower is better).", - getCCUtil()->params()), - double_MEMBER(segment_penalty_dict_nonword, 1.25, - "Score multiplier for glyph fragment segmentations which " - "do not match a dictionary word (lower is better).", - getCCUtil()->params()), - double_MEMBER(segment_penalty_garbage, 1.50, - "Score multiplier for poorly cased strings that are not in" - " the dictionary and generally look like garbage (lower is" - " better).", - getCCUtil()->params()), - STRING_MEMBER(output_ambig_words_file, "", - "Output file for ambiguities found in the dictionary", - getCCUtil()->params()), - INT_MEMBER(dawg_debug_level, 0, - "Set to 1 for general debug info" - ", to 2 for more details, to 3 to see all the debug messages", - getCCUtil()->params()), - INT_MEMBER(hyphen_debug_level, 0, "Debug level for hyphenated words.", - getCCUtil()->params()), - INT_MEMBER(max_viterbi_list_size, 10, "Maximum size of viterbi list.", - getCCUtil()->params()), - BOOL_MEMBER(use_only_first_uft8_step, false, - "Use only the first UTF8 step of the given string" - " when computing log probabilities.", - getCCUtil()->params()), - double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor", - getCCUtil()->params()), - double_MEMBER(stopper_nondict_certainty_base, -2.50, - "Certainty threshold for non-dict words", - getCCUtil()->params()), - double_MEMBER(stopper_phase2_certainty_rejection_offset, 1.0, - "Reject certainty offset", getCCUtil()->params()), - INT_MEMBER(stopper_smallword_size, 2, - "Size of dict word to be treated as non-dict word", - getCCUtil()->params()), - double_MEMBER(stopper_certainty_per_char, -0.50, - "Certainty to add" - " for each dict char above small word size.", - getCCUtil()->params()), - double_MEMBER(stopper_allowable_character_badness, 3.0, - "Max certaintly variation allowed in a word (in sigma)", - getCCUtil()->params()), - INT_MEMBER(stopper_debug_level, 0, "Stopper debug level", - getCCUtil()->params()), - BOOL_MEMBER(stopper_no_acceptable_choices, false, - "Make AcceptableChoice() always return false. Useful" - " when there is a need to explore all segmentations", - getCCUtil()->params()), - INT_MEMBER(tessedit_truncate_wordchoice_log, 10, - "Max words to keep in list", getCCUtil()->params()), - STRING_MEMBER(word_to_debug, "", - "Word for which stopper debug" - " information should be printed to stdout", - getCCUtil()->params()), - STRING_MEMBER(word_to_debug_lengths, "", - "Lengths of unichars in word_to_debug", - getCCUtil()->params()), - INT_MEMBER(fragments_debug, 0, "Debug character fragments", - getCCUtil()->params()), - BOOL_MEMBER(segment_nonalphabetic_script, false, - "Don't use any alphabetic-specific tricks." - " Set to true in the traineddata config file for" - " scripts that are cursive or inherently fixed-pitch", - getCCUtil()->params()), - BOOL_MEMBER(save_doc_words, 0, "Save Document Words", - getCCUtil()->params()), - double_MEMBER(doc_dict_pending_threshold, 0.0, - "Worst certainty for using pending dictionary", - getCCUtil()->params()), - double_MEMBER(doc_dict_certainty_threshold, -2.25, - "Worst certainty for words that can be inserted into the" - " document dictionary", - getCCUtil()->params()), - INT_MEMBER(max_permuter_attempts, 10000, - "Maximum number of different" - " character choices to consider during permutation." - " This limit is especially useful when user patterns" - " are specified, since overly generic patterns can result in" - " dawg search exploring an overly large number of options.", - getCCUtil()->params()) { - dang_ambigs_table_ = nullptr; - replace_ambigs_table_ = nullptr; - reject_offset_ = 0.0; - go_deeper_fxn_ = nullptr; - hyphen_word_ = nullptr; - last_word_on_line_ = false; - document_words_ = nullptr; - dawg_cache_ = nullptr; - dawg_cache_is_ours_ = false; - pending_words_ = nullptr; - bigram_dawg_ = nullptr; - freq_dawg_ = nullptr; - punc_dawg_ = nullptr; - unambig_dawg_ = nullptr; - wordseg_rating_adjust_factor_ = -1.0f; - output_ambig_words_file_ = nullptr; -} - -Dict::~Dict() { - End(); - delete hyphen_word_; - if (output_ambig_words_file_ != nullptr) fclose(output_ambig_words_file_); -} - -DawgCache *Dict::GlobalDawgCache() { - // This global cache (a singleton) will outlive every Tesseract instance - // (even those that someone else might declare as global statics). - static DawgCache cache; - return &cache; -} - -// Sets up ready for a Load or LoadLSTM. -void Dict::SetupForLoad(DawgCache *dawg_cache) { - if (dawgs_.length() != 0) this->End(); - - apostrophe_unichar_id_ = getUnicharset().unichar_to_id(kApostropheSymbol); - question_unichar_id_ = getUnicharset().unichar_to_id(kQuestionSymbol); - slash_unichar_id_ = getUnicharset().unichar_to_id(kSlashSymbol); - hyphen_unichar_id_ = getUnicharset().unichar_to_id(kHyphenSymbol); - - if (dawg_cache != nullptr) { - dawg_cache_ = dawg_cache; - dawg_cache_is_ours_ = false; - } else { - dawg_cache_ = new DawgCache(); - dawg_cache_is_ours_ = true; - } -} - -// Loads the dawgs needed by Tesseract. Call FinishLoad() after. -void Dict::Load(const STRING &lang, TessdataManager *data_file) { - // Load dawgs_. - if (load_punc_dawg) { - punc_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_PUNC_DAWG, - dawg_debug_level, data_file); - if (punc_dawg_) dawgs_ += punc_dawg_; - } - if (load_system_dawg) { - Dawg *system_dawg = dawg_cache_->GetSquishedDawg( - lang, TESSDATA_SYSTEM_DAWG, dawg_debug_level, data_file); - if (system_dawg) dawgs_ += system_dawg; - } - if (load_number_dawg) { - Dawg *number_dawg = dawg_cache_->GetSquishedDawg( - lang, TESSDATA_NUMBER_DAWG, dawg_debug_level, data_file); - if (number_dawg) dawgs_ += number_dawg; - } - if (load_bigram_dawg) { - bigram_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_BIGRAM_DAWG, - dawg_debug_level, data_file); - // The bigram_dawg_ is NOT used like the other dawgs! DO NOT add to the - // dawgs_!! - } - if (load_freq_dawg) { - freq_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_FREQ_DAWG, - dawg_debug_level, data_file); - if (freq_dawg_) dawgs_ += freq_dawg_; - } - if (load_unambig_dawg) { - unambig_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_UNAMBIG_DAWG, - dawg_debug_level, data_file); - if (unambig_dawg_) dawgs_ += unambig_dawg_; - } - - STRING name; - if (((STRING &)user_words_suffix).length() > 0 || - ((STRING &)user_words_file).length() > 0) { - Trie *trie_ptr = new Trie(DAWG_TYPE_WORD, lang, USER_DAWG_PERM, - getUnicharset().size(), dawg_debug_level); - if (((STRING &)user_words_file).length() > 0) { - name = user_words_file; - } else { - name = getCCUtil()->language_data_path_prefix; - name += user_words_suffix; - } - if (!trie_ptr->read_and_add_word_list(name.string(), getUnicharset(), - Trie::RRP_REVERSE_IF_HAS_RTL)) { - tprintf("Error: failed to load %s\n", name.string()); - delete trie_ptr; - } else { - dawgs_ += trie_ptr; - } - } - - if (((STRING &)user_patterns_suffix).length() > 0 || - ((STRING &)user_patterns_file).length() > 0) { - Trie *trie_ptr = new Trie(DAWG_TYPE_PATTERN, lang, USER_PATTERN_PERM, - getUnicharset().size(), dawg_debug_level); - trie_ptr->initialize_patterns(&(getUnicharset())); - if (((STRING &)user_patterns_file).length() > 0) { - name = user_patterns_file; - } else { - name = getCCUtil()->language_data_path_prefix; - name += user_patterns_suffix; - } - if (!trie_ptr->read_pattern_list(name.string(), getUnicharset())) { - tprintf("Error: failed to load %s\n", name.string()); - delete trie_ptr; - } else { - dawgs_ += trie_ptr; - } - } - - document_words_ = new Trie(DAWG_TYPE_WORD, lang, DOC_DAWG_PERM, - getUnicharset().size(), dawg_debug_level); - dawgs_ += document_words_; - - // This dawg is temporary and should not be searched by letter_is_ok. - pending_words_ = new Trie(DAWG_TYPE_WORD, lang, NO_PERM, - getUnicharset().size(), dawg_debug_level); -} - -// Loads the dawgs needed by the LSTM model. Call FinishLoad() after. -void Dict::LoadLSTM(const STRING &lang, TessdataManager *data_file) { - // Load dawgs_. - if (load_punc_dawg) { - punc_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_LSTM_PUNC_DAWG, - dawg_debug_level, data_file); - if (punc_dawg_) dawgs_ += punc_dawg_; - } - if (load_system_dawg) { - Dawg *system_dawg = dawg_cache_->GetSquishedDawg( - lang, TESSDATA_LSTM_SYSTEM_DAWG, dawg_debug_level, data_file); - if (system_dawg) dawgs_ += system_dawg; - } - if (load_number_dawg) { - Dawg *number_dawg = dawg_cache_->GetSquishedDawg( - lang, TESSDATA_LSTM_NUMBER_DAWG, dawg_debug_level, data_file); - if (number_dawg) dawgs_ += number_dawg; - } -} - -// Completes the loading process after Load() and/or LoadLSTM(). -// Returns false if no dictionaries were loaded. -bool Dict::FinishLoad() { - if (dawgs_.empty()) return false; - // Construct a list of corresponding successors for each dawg. Each entry, i, - // in the successors_ vector is a vector of integers that represent the - // indices into the dawgs_ vector of the successors for dawg i. - successors_.reserve(dawgs_.length()); - for (int i = 0; i < dawgs_.length(); ++i) { - const Dawg *dawg = dawgs_[i]; - SuccessorList *lst = new SuccessorList(); - for (int j = 0; j < dawgs_.length(); ++j) { - const Dawg *other = dawgs_[j]; - if (dawg != nullptr && other != nullptr && - (dawg->lang() == other->lang()) && - kDawgSuccessors[dawg->type()][other->type()]) *lst += j; - } - successors_ += lst; - } - return true; -} - -void Dict::End() { - if (dawgs_.length() == 0) - return; // Not safe to call twice. - for (int i = 0; i < dawgs_.size(); i++) { - if (!dawg_cache_->FreeDawg(dawgs_[i])) { - delete dawgs_[i]; - } - } - dawg_cache_->FreeDawg(bigram_dawg_); - if (dawg_cache_is_ours_) { - delete dawg_cache_; - dawg_cache_ = nullptr; - } - successors_.delete_data_pointers(); - dawgs_.clear(); - successors_.clear(); - document_words_ = nullptr; - delete pending_words_; - pending_words_ = nullptr; -} - -// Returns true if in light of the current state unichar_id is allowed -// according to at least one of the dawgs in the dawgs_ vector. -// See more extensive comments in dict.h where this function is declared. -int Dict::def_letter_is_okay(void* void_dawg_args, - const UNICHARSET& unicharset, - UNICHAR_ID unichar_id, - bool word_end) const { - DawgArgs *dawg_args = static_cast(void_dawg_args); - - ASSERT_HOST(unicharset.contains_unichar_id(unichar_id)); - - if (dawg_debug_level >= 3) { - tprintf("def_letter_is_okay: current unichar=%s word_end=%d" - " num active dawgs=%d\n", - getUnicharset().debug_str(unichar_id).string(), word_end, - dawg_args->active_dawgs->length()); - } - - // Do not accept words that contain kPatternUnicharID. - // (otherwise pattern dawgs would not function correctly). - // Do not accept words containing INVALID_UNICHAR_IDs. - if (unichar_id == Dawg::kPatternUnicharID || - unichar_id == INVALID_UNICHAR_ID) { - dawg_args->permuter = NO_PERM; - return NO_PERM; - } - - // Initialization. - PermuterType curr_perm = NO_PERM; - dawg_args->updated_dawgs->clear(); - dawg_args->valid_end = false; - - // Go over the active_dawgs vector and insert DawgPosition records - // with the updated ref (an edge with the corresponding unichar id) into - // dawg_args->updated_pos. - for (int a = 0; a < dawg_args->active_dawgs->length(); ++a) { - const DawgPosition &pos = (*dawg_args->active_dawgs)[a]; - const Dawg *punc_dawg = pos.punc_index >= 0 ? dawgs_[pos.punc_index] : nullptr; - const Dawg *dawg = pos.dawg_index >= 0 ? dawgs_[pos.dawg_index] : nullptr; - - if (!dawg && !punc_dawg) { - // shouldn't happen. - tprintf("Received DawgPosition with no dawg or punc_dawg. wth?\n"); - continue; - } - if (!dawg) { - // We're in the punctuation dawg. A core dawg has not been chosen. - NODE_REF punc_node = GetStartingNode(punc_dawg, pos.punc_ref); - EDGE_REF punc_transition_edge = punc_dawg->edge_char_of( - punc_node, Dawg::kPatternUnicharID, word_end); - if (punc_transition_edge != NO_EDGE) { - // Find all successors, and see which can transition. - const SuccessorList &slist = *(successors_[pos.punc_index]); - for (int s = 0; s < slist.length(); ++s) { - int sdawg_index = slist[s]; - const Dawg *sdawg = dawgs_[sdawg_index]; - UNICHAR_ID ch = char_for_dawg(unicharset, unichar_id, sdawg); - EDGE_REF dawg_edge = sdawg->edge_char_of(0, ch, word_end); - if (dawg_edge != NO_EDGE) { - if (dawg_debug_level >=3) { - tprintf("Letter found in dawg %d\n", sdawg_index); - } - dawg_args->updated_dawgs->add_unique( - DawgPosition(sdawg_index, dawg_edge, - pos.punc_index, punc_transition_edge, false), - dawg_debug_level > 0, - "Append transition from punc dawg to current dawgs: "); - if (sdawg->permuter() > curr_perm) curr_perm = sdawg->permuter(); - if (sdawg->end_of_word(dawg_edge) && - punc_dawg->end_of_word(punc_transition_edge)) - dawg_args->valid_end = true; - } - } - } - EDGE_REF punc_edge = punc_dawg->edge_char_of(punc_node, unichar_id, - word_end); - if (punc_edge != NO_EDGE) { - if (dawg_debug_level >=3) { - tprintf("Letter found in punctuation dawg\n"); - } - dawg_args->updated_dawgs->add_unique( - DawgPosition(-1, NO_EDGE, pos.punc_index, punc_edge, false), - dawg_debug_level > 0, - "Extend punctuation dawg: "); - if (PUNC_PERM > curr_perm) curr_perm = PUNC_PERM; - if (punc_dawg->end_of_word(punc_edge)) dawg_args->valid_end = true; - } - continue; - } - - if (punc_dawg && dawg->end_of_word(pos.dawg_ref)) { - // We can end the main word here. - // If we can continue on the punc ref, add that possibility. - NODE_REF punc_node = GetStartingNode(punc_dawg, pos.punc_ref); - EDGE_REF punc_edge = punc_node == NO_EDGE ? NO_EDGE - : punc_dawg->edge_char_of(punc_node, unichar_id, word_end); - if (punc_edge != NO_EDGE) { - dawg_args->updated_dawgs->add_unique( - DawgPosition(pos.dawg_index, pos.dawg_ref, - pos.punc_index, punc_edge, true), - dawg_debug_level > 0, - "Return to punctuation dawg: "); - if (dawg->permuter() > curr_perm) curr_perm = dawg->permuter(); - if (punc_dawg->end_of_word(punc_edge)) dawg_args->valid_end = true; - } - } - - if (pos.back_to_punc) continue; - - // If we are dealing with the pattern dawg, look up all the - // possible edges, not only for the exact unichar_id, but also - // for all its character classes (alpha, digit, etc). - if (dawg->type() == DAWG_TYPE_PATTERN) { - ProcessPatternEdges(dawg, pos, unichar_id, word_end, dawg_args, - &curr_perm); - // There can't be any successors to dawg that is of type - // DAWG_TYPE_PATTERN, so we are done examining this DawgPosition. - continue; - } - - // Find the edge out of the node for the unichar_id. - NODE_REF node = GetStartingNode(dawg, pos.dawg_ref); - EDGE_REF edge = (node == NO_EDGE) ? NO_EDGE - : dawg->edge_char_of(node, char_for_dawg(unicharset, unichar_id, dawg), - word_end); - - if (dawg_debug_level >= 3) { - tprintf("Active dawg: [%d, " REFFORMAT "] edge=" REFFORMAT "\n", - pos.dawg_index, node, edge); - } - - if (edge != NO_EDGE) { // the unichar was found in the current dawg - if (dawg_debug_level >=3) { - tprintf("Letter found in dawg %d\n", pos.dawg_index); - } - if (word_end && punc_dawg && !punc_dawg->end_of_word(pos.punc_ref)) { - if (dawg_debug_level >= 3) { - tprintf("Punctuation constraint not satisfied at end of word.\n"); - } - continue; - } - if (dawg->permuter() > curr_perm) curr_perm = dawg->permuter(); - if (dawg->end_of_word(edge) && - (punc_dawg == nullptr || punc_dawg->end_of_word(pos.punc_ref))) - dawg_args->valid_end = true; - dawg_args->updated_dawgs->add_unique( - DawgPosition(pos.dawg_index, edge, pos.punc_index, pos.punc_ref, - false), - dawg_debug_level > 0, - "Append current dawg to updated active dawgs: "); - } - } // end for - // Update dawg_args->permuter if it used to be NO_PERM or became NO_PERM - // or if we found the current letter in a non-punctuation dawg. This - // allows preserving information on which dawg the "core" word came from. - // Keep the old value of dawg_args->permuter if it is COMPOUND_PERM. - if (dawg_args->permuter == NO_PERM || curr_perm == NO_PERM || - (curr_perm != PUNC_PERM && dawg_args->permuter != COMPOUND_PERM)) { - dawg_args->permuter = curr_perm; - } - if (dawg_debug_level >= 2) { - tprintf("Returning %d for permuter code for this character.\n", - dawg_args->permuter); - } - return dawg_args->permuter; -} - -void Dict::ProcessPatternEdges(const Dawg *dawg, const DawgPosition &pos, - UNICHAR_ID unichar_id, bool word_end, - DawgArgs *dawg_args, - PermuterType *curr_perm) const { - NODE_REF node = GetStartingNode(dawg, pos.dawg_ref); - // Try to find the edge corresponding to the exact unichar_id and to all the - // edges corresponding to the character class of unichar_id. - GenericVector unichar_id_patterns; - unichar_id_patterns.push_back(unichar_id); - dawg->unichar_id_to_patterns(unichar_id, getUnicharset(), - &unichar_id_patterns); - for (int i = 0; i < unichar_id_patterns.size(); ++i) { - // On the first iteration check all the outgoing edges. - // On the second iteration check all self-loops. - for (int k = 0; k < 2; ++k) { - EDGE_REF edge = (k == 0) - ? dawg->edge_char_of(node, unichar_id_patterns[i], word_end) - : dawg->pattern_loop_edge(pos.dawg_ref, unichar_id_patterns[i], word_end); - if (edge == NO_EDGE) continue; - if (dawg_debug_level >= 3) { - tprintf("Pattern dawg: [%d, " REFFORMAT "] edge=" REFFORMAT "\n", - pos.dawg_index, node, edge); - tprintf("Letter found in pattern dawg %d\n", pos.dawg_index); - } - if (dawg->permuter() > *curr_perm) *curr_perm = dawg->permuter(); - if (dawg->end_of_word(edge)) dawg_args->valid_end = true; - dawg_args->updated_dawgs->add_unique( - DawgPosition(pos.dawg_index, edge, pos.punc_index, pos.punc_ref, - pos.back_to_punc), - dawg_debug_level > 0, - "Append current dawg to updated active dawgs: "); - } - } -} - -// Fill the given active_dawgs vector with dawgs that could contain the -// beginning of the word. If hyphenated() returns true, copy the entries -// from hyphen_active_dawgs_ instead. -void Dict::init_active_dawgs(DawgPositionVector *active_dawgs, - bool ambigs_mode) const { - int i; - if (hyphenated()) { - *active_dawgs = hyphen_active_dawgs_; - if (dawg_debug_level >= 3) { - for (i = 0; i < hyphen_active_dawgs_.size(); ++i) { - tprintf("Adding hyphen beginning dawg [%d, " REFFORMAT "]\n", - hyphen_active_dawgs_[i].dawg_index, - hyphen_active_dawgs_[i].dawg_ref); - } - } - } else { - default_dawgs(active_dawgs, ambigs_mode); - } -} - -void Dict::default_dawgs(DawgPositionVector *dawg_pos_vec, - bool suppress_patterns) const { - bool punc_dawg_available = - (punc_dawg_ != nullptr) && - punc_dawg_->edge_char_of(0, Dawg::kPatternUnicharID, true) != NO_EDGE; - - for (int i = 0; i < dawgs_.length(); i++) { - if (dawgs_[i] != nullptr && - !(suppress_patterns && (dawgs_[i])->type() == DAWG_TYPE_PATTERN)) { - int dawg_ty = dawgs_[i]->type(); - bool subsumed_by_punc = kDawgSuccessors[DAWG_TYPE_PUNCTUATION][dawg_ty]; - if (dawg_ty == DAWG_TYPE_PUNCTUATION) { - *dawg_pos_vec += DawgPosition(-1, NO_EDGE, i, NO_EDGE, false); - if (dawg_debug_level >= 3) { - tprintf("Adding beginning punc dawg [%d, " REFFORMAT "]\n", i, - NO_EDGE); - } - } else if (!punc_dawg_available || !subsumed_by_punc) { - *dawg_pos_vec += DawgPosition(i, NO_EDGE, -1, NO_EDGE, false); - if (dawg_debug_level >= 3) { - tprintf("Adding beginning dawg [%d, " REFFORMAT "]\n", i, NO_EDGE); - } - } - } - } -} - -void Dict::add_document_word(const WERD_CHOICE &best_choice) { - // Do not add hyphenated word parts to the document dawg. - // hyphen_word_ will be non-nullptr after the set_hyphen_word() is - // called when the first part of the hyphenated word is - // discovered and while the second part of the word is recognized. - // hyphen_word_ is cleared in cc_recg() before the next word on - // the line is recognized. - if (hyphen_word_) return; - - int stringlen = best_choice.length(); - - if (valid_word(best_choice) || stringlen < 2) - return; - - // Discard words that contain >= kDocDictMaxRepChars repeating unichars. - if (best_choice.length() >= kDocDictMaxRepChars) { - int num_rep_chars = 1; - UNICHAR_ID uch_id = best_choice.unichar_id(0); - for (int i = 1; i < best_choice.length(); ++i) { - if (best_choice.unichar_id(i) != uch_id) { - num_rep_chars = 1; - uch_id = best_choice.unichar_id(i); - } else { - ++num_rep_chars; - if (num_rep_chars == kDocDictMaxRepChars) return; - } - } - } - - if (best_choice.certainty() < doc_dict_certainty_threshold || - stringlen == 2) { - if (best_choice.certainty() < doc_dict_pending_threshold) - return; - - if (!pending_words_->word_in_dawg(best_choice)) { - if (stringlen > 2 || - (stringlen == 2 && - getUnicharset().get_isupper(best_choice.unichar_id(0)) && - getUnicharset().get_isupper(best_choice.unichar_id(1)))) { - pending_words_->add_word_to_dawg(best_choice); - } - return; - } - } - - if (save_doc_words) { - STRING filename(getCCUtil()->imagefile); - filename += ".doc"; - FILE *doc_word_file = fopen(filename.string(), "a"); - if (doc_word_file == nullptr) { - tprintf("Error: Could not open file %s\n", filename.string()); - ASSERT_HOST(doc_word_file); - } - fprintf(doc_word_file, "%s\n", - best_choice.debug_string().string()); - fclose(doc_word_file); - } - document_words_->add_word_to_dawg(best_choice); -} - -void Dict::adjust_word(WERD_CHOICE *word, - bool nonword, - XHeightConsistencyEnum xheight_consistency, - float additional_adjust, - bool modify_rating, - bool debug) { - bool is_han = (getUnicharset().han_sid() != getUnicharset().null_sid() && - word->GetTopScriptID() == getUnicharset().han_sid()); - bool case_is_ok = (is_han || case_ok(*word, getUnicharset())); - bool punc_is_ok = (is_han || !nonword || valid_punctuation(*word)); - - float adjust_factor = additional_adjust; - float new_rating = word->rating(); - new_rating += kRatingPad; - const char *xheight_triggered = ""; - if (word->length() > 1) { - // Calculate x-height and y-offset consistency penalties. - switch (xheight_consistency) { - case XH_INCONSISTENT: - adjust_factor += xheight_penalty_inconsistent; - xheight_triggered = ", xhtBAD"; - break; - case XH_SUBNORMAL: - adjust_factor += xheight_penalty_subscripts; - xheight_triggered = ", xhtSUB"; - break; - case XH_GOOD: - // leave the factor alone - all good! - break; - } - // TODO(eger): if nonword is true, but there is a "core" that is a dict - // word, negate nonword status. - } else { - if (debug) { - tprintf("Consistency could not be calculated.\n"); - } - } - if (debug) { - tprintf("%sWord: %s %4.2f%s", nonword ? "Non-" : "", - word->unichar_string().string(), word->rating(), - xheight_triggered); - } - - if (nonword) { // non-dictionary word - if (case_is_ok && punc_is_ok) { - adjust_factor += segment_penalty_dict_nonword; - new_rating *= adjust_factor; - if (debug) tprintf(", W"); - } else { - adjust_factor += segment_penalty_garbage; - new_rating *= adjust_factor; - if (debug) { - if (!case_is_ok) tprintf(", C"); - if (!punc_is_ok) tprintf(", P"); - } - } - } else { // dictionary word - if (case_is_ok) { - if (!is_han && freq_dawg_ != nullptr && freq_dawg_->word_in_dawg(*word)) { - word->set_permuter(FREQ_DAWG_PERM); - adjust_factor += segment_penalty_dict_frequent_word; - new_rating *= adjust_factor; - if (debug) tprintf(", F"); - } else { - adjust_factor += segment_penalty_dict_case_ok; - new_rating *= adjust_factor; - if (debug) tprintf(", "); - } - } else { - adjust_factor += segment_penalty_dict_case_bad; - new_rating *= adjust_factor; - if (debug) tprintf(", C"); - } - } - new_rating -= kRatingPad; - if (modify_rating) word->set_rating(new_rating); - if (debug) tprintf(" %4.2f --> %4.2f\n", adjust_factor, new_rating); - word->set_adjust_factor(adjust_factor); -} - -int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) const { - const WERD_CHOICE *word_ptr = &word; - WERD_CHOICE temp_word(word.unicharset()); - if (hyphenated() && hyphen_word_->unicharset() == word.unicharset()) { - copy_hyphen_info(&temp_word); - temp_word += word; - word_ptr = &temp_word; - } - if (word_ptr->length() == 0) return NO_PERM; - // Allocate vectors for holding current and updated - // active_dawgs and initialize them. - DawgPositionVector *active_dawgs = new DawgPositionVector[2]; - init_active_dawgs(&(active_dawgs[0]), false); - DawgArgs dawg_args(&(active_dawgs[0]), &(active_dawgs[1]), NO_PERM); - int last_index = word_ptr->length() - 1; - // Call letter_is_okay for each letter in the word. - for (int i = hyphen_base_size(); i <= last_index; ++i) { - if (!((this->*letter_is_okay_)(&dawg_args, *word_ptr->unicharset(), - word_ptr->unichar_id(i), - i == last_index))) break; - // Swap active_dawgs, constraints with the corresponding updated vector. - if (dawg_args.updated_dawgs == &(active_dawgs[1])) { - dawg_args.updated_dawgs = &(active_dawgs[0]); - ++(dawg_args.active_dawgs); - } else { - ++(dawg_args.updated_dawgs); - dawg_args.active_dawgs = &(active_dawgs[0]); - } - } - delete[] active_dawgs; - return valid_word_permuter(dawg_args.permuter, numbers_ok) ? - dawg_args.permuter : NO_PERM; -} - -bool Dict::valid_bigram(const WERD_CHOICE &word1, - const WERD_CHOICE &word2) const { - if (bigram_dawg_ == nullptr) return false; - - // Extract the core word from the middle of each word with any digits - // replaced with question marks. - int w1start, w1end, w2start, w2end; - word1.punct_stripped(&w1start, &w1end); - word2.punct_stripped(&w2start, &w2end); - - // We don't want to penalize a single guillemet, hyphen, etc. - // But our bigram list doesn't have any information about punctuation. - if (w1start >= w1end) return word1.length() < 3; - if (w2start >= w2end) return word2.length() < 3; - - const UNICHARSET& uchset = getUnicharset(); - GenericVector bigram_string; - bigram_string.reserve(w1end + w2end + 1); - for (int i = w1start; i < w1end; i++) { - const GenericVector& normed_ids = - getUnicharset().normed_ids(word1.unichar_id(i)); - if (normed_ids.size() == 1 && uchset.get_isdigit(normed_ids[0])) - bigram_string.push_back(question_unichar_id_); - else - bigram_string += normed_ids; - } - bigram_string.push_back(UNICHAR_SPACE); - for (int i = w2start; i < w2end; i++) { - const GenericVector& normed_ids = - getUnicharset().normed_ids(word2.unichar_id(i)); - if (normed_ids.size() == 1 && uchset.get_isdigit(normed_ids[0])) - bigram_string.push_back(question_unichar_id_); - else - bigram_string += normed_ids; - } - WERD_CHOICE normalized_word(&uchset, bigram_string.size()); - for (int i = 0; i < bigram_string.size(); ++i) { - normalized_word.append_unichar_id_space_allocated(bigram_string[i], 1, - 0.0f, 0.0f); - } - return bigram_dawg_->word_in_dawg(normalized_word); -} - -bool Dict::valid_punctuation(const WERD_CHOICE &word) { - if (word.length() == 0) return NO_PERM; - int i; - WERD_CHOICE new_word(word.unicharset()); - int last_index = word.length() - 1; - int new_len = 0; - for (i = 0; i <= last_index; ++i) { - UNICHAR_ID unichar_id = (word.unichar_id(i)); - if (getUnicharset().get_ispunctuation(unichar_id)) { - new_word.append_unichar_id(unichar_id, 1, 0.0, 0.0); - } else if (!getUnicharset().get_isalpha(unichar_id) && - !getUnicharset().get_isdigit(unichar_id)) { - return false; // neither punc, nor alpha, nor digit - } else if ((new_len = new_word.length()) == 0 || - new_word.unichar_id(new_len-1) != Dawg::kPatternUnicharID) { - new_word.append_unichar_id(Dawg::kPatternUnicharID, 1, 0.0, 0.0); - } - } - for (i = 0; i < dawgs_.size(); ++i) { - if (dawgs_[i] != nullptr && - dawgs_[i]->type() == DAWG_TYPE_PUNCTUATION && - dawgs_[i]->word_in_dawg(new_word)) return true; - } - return false; -} - -/// Returns true if the language is space-delimited (not CJ, or T). -bool Dict::IsSpaceDelimitedLang() const { - const UNICHARSET &u_set = getUnicharset(); - if (u_set.han_sid() > 0) return false; - if (u_set.katakana_sid() > 0) return false; - if (u_set.thai_sid() > 0) return false; - return true; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dict.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dict.h deleted file mode 100644 index 5dc0fad9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/dict.h +++ /dev/null @@ -1,649 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: dict.h -// Description: dict class. -// Author: Samuel Charron -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_DICT_DICT_H_ -#define TESSERACT_DICT_DICT_H_ - -#include "ambigs.h" -#include "dawg.h" -#include "dawg_cache.h" -#include "host.h" -#include "ratngs.h" -#include "stopper.h" -#include "trie.h" -#include "unicharset.h" -#include "params_training_featdef.h" - -class MATRIX; -class WERD_RES; - -#define CHARS_PER_LINE 500 -#define MAX_WERD_LENGTH (int64_t) 128 -#define NO_RATING -1 - -/** Struct used to hold temporary information about fragments. */ -struct CHAR_FRAGMENT_INFO { - UNICHAR_ID unichar_id; - const CHAR_FRAGMENT *fragment; - int num_fragments; - float rating; - float certainty; -}; - -namespace tesseract { - -using DawgVector = GenericVector; - -// -// Constants -// -static const int kRatingPad = 4; -static const char kDictWildcard[] = "\u2606"; // WHITE STAR -static const int kDictMaxWildcards = 2; // max wildcards for a word -// TODO(daria): If hyphens are different in different languages and can be -// inferred from training data we should load their values dynamically. -static const char kHyphenSymbol[] = "-"; -static const char kSlashSymbol[] = "/"; -static const char kQuestionSymbol[] = "?"; -static const char kApostropheSymbol[] = "'"; -static const float kSimCertaintyScale = -10.0; // similarity matcher scaling -static const float kSimCertaintyOffset = -10.0; // similarity matcher offset -static const float kSimilarityFloor = 100.0; // worst E*L product to stop on -static const int kDocDictMaxRepChars = 4; - -// Enum for describing whether the x-height for the word is consistent: -// 0 - everything is good. -// 1 - there are one or two secondary (but consistent) baselines -// [think subscript and superscript], or there is an oversized -// first character. -// 2 - the word is inconsistent. -enum XHeightConsistencyEnum {XH_GOOD, XH_SUBNORMAL, XH_INCONSISTENT}; - -struct DawgArgs { - DawgArgs(DawgPositionVector *d, DawgPositionVector *up, PermuterType p) - : active_dawgs(d), updated_dawgs(up), permuter(p), valid_end(false) {} - - DawgPositionVector *active_dawgs; - DawgPositionVector *updated_dawgs; - PermuterType permuter; - // True if the current position is a valid word end. - bool valid_end; -}; - -class Dict { - public: - Dict(CCUtil* image_ptr); - ~Dict(); - const CCUtil* getCCUtil() const { - return ccutil_; - } - CCUtil* getCCUtil() { - return ccutil_; - } - const UNICHARSET& getUnicharset() const { - return getCCUtil()->unicharset; - } - UNICHARSET& getUnicharset() { - return getCCUtil()->unicharset; - } - const UnicharAmbigs &getUnicharAmbigs() const { - return getCCUtil()->unichar_ambigs; - } - - // Returns true if unichar_id is a word compounding character like - or /. - inline bool compound_marker(UNICHAR_ID unichar_id) { - const GenericVector& normed_ids = - getUnicharset().normed_ids(unichar_id); - return normed_ids.size() == 1 && - (normed_ids[0] == hyphen_unichar_id_ || - normed_ids[0] == slash_unichar_id_); - } - // Returns true if unichar_id is an apostrophe-like character that may - // separate prefix/suffix words from a main body word. - inline bool is_apostrophe(UNICHAR_ID unichar_id) { - const GenericVector& normed_ids = - getUnicharset().normed_ids(unichar_id); - return normed_ids.size() == 1 && normed_ids[0] == apostrophe_unichar_id_; - } - - /* hyphen.cpp ************************************************************/ - - /// Returns true if we've recorded the beginning of a hyphenated word. - inline bool hyphenated() const { return - !last_word_on_line_ && hyphen_word_; - } - /// Size of the base word (the part on the line before) of a hyphenated word. - inline int hyphen_base_size() const { - return this->hyphenated() ? hyphen_word_->length() : 0; - } - /// If this word is hyphenated copy the base word (the part on - /// the line before) of a hyphenated word into the given word. - /// This function assumes that word is not nullptr. - inline void copy_hyphen_info(WERD_CHOICE *word) const { - if (this->hyphenated()) { - *word = *hyphen_word_; - if (hyphen_debug_level) word->print("copy_hyphen_info: "); - } - } - /// Check whether the word has a hyphen at the end. - inline bool has_hyphen_end(UNICHAR_ID unichar_id, bool first_pos) const { - if (!last_word_on_line_ || first_pos) - return false; - const GenericVector& normed_ids = - getUnicharset().normed_ids(unichar_id); - return normed_ids.size() == 1 && normed_ids[0] == hyphen_unichar_id_; - } - /// Same as above, but check the unichar at the end of the word. - inline bool has_hyphen_end(const WERD_CHOICE &word) const { - int word_index = word.length() - 1; - return has_hyphen_end(word.unichar_id(word_index), word_index == 0); - } - /// Unless the previous word was the last one on the line, and the current - /// one is not (thus it is the first one on the line), erase hyphen_word_, - /// clear hyphen_active_dawgs_, update last_word_on_line_. - void reset_hyphen_vars(bool last_word_on_line); - /// Update hyphen_word_, and copy the given DawgPositionVectors into - /// hyphen_active_dawgs_ . - void set_hyphen_word(const WERD_CHOICE &word, - const DawgPositionVector &active_dawgs); - - /* permdawg.cpp ************************************************************/ - // Note: Functions in permdawg.cpp are only used by NoDangerousAmbig(). - // When this function is refactored, permdawg.cpp can be removed. - - /// Copies word into best_choice if its rating is smaller - /// than that of best_choice. - inline void update_best_choice(const WERD_CHOICE &word, - WERD_CHOICE *best_choice) { - if (word.rating() < best_choice->rating()) { - *best_choice = word; - } - } - /// Fill the given active_dawgs vector with dawgs that could contain the - /// beginning of the word. If hyphenated() returns true, copy the entries - /// from hyphen_active_dawgs_ instead. - void init_active_dawgs(DawgPositionVector *active_dawgs, - bool ambigs_mode) const; - // Fill the given vector with the default collection of any-length dawgs - void default_dawgs(DawgPositionVector *anylength_dawgs, - bool suppress_patterns) const; - - - /// Recursively explore all the possible character combinations in - /// the given char_choices. Use go_deeper_dawg_fxn() to explore all the - /// dawgs in the dawgs_ vector in parallel and discard invalid words. - /// - /// Allocate and return a WERD_CHOICE with the best valid word found. - WERD_CHOICE *dawg_permute_and_select( - const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit); - /// If the choice being composed so far could be a dictionary word - /// and we have not reached the end of the word keep exploring the - /// char_choices further. - void go_deeper_dawg_fxn( - const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, - int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, - bool word_ending, WERD_CHOICE *word, float certainties[], - float *limit, WERD_CHOICE *best_choice, int *attempts_left, - void *void_more_args); - - /// Pointer to go_deeper function. - void (Dict::*go_deeper_fxn_)(const char *debug, - const BLOB_CHOICE_LIST_VECTOR &char_choices, - int char_choice_index, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - bool word_ending, WERD_CHOICE *word, - float certainties[], float *limit, - WERD_CHOICE *best_choice, int *attempts_left, - void *void_more_args); - // - // Helper functions for dawg_permute_and_select(). - // - void permute_choices( - const char *debug, - const BLOB_CHOICE_LIST_VECTOR &char_choices, - int char_choice_index, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - WERD_CHOICE *word, - float certainties[], - float *limit, - WERD_CHOICE *best_choice, - int *attempts_left, - void *more_args); - - void append_choices( - const char *debug, - const BLOB_CHOICE_LIST_VECTOR &char_choices, - const BLOB_CHOICE &blob_choice, - int char_choice_index, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - WERD_CHOICE *word, - float certainties[], - float *limit, - WERD_CHOICE *best_choice, - int *attempts_left, - void *more_args); - - bool fragment_state_okay(UNICHAR_ID curr_unichar_id, - float curr_rating, float curr_certainty, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - const char *debug, int word_ending, - CHAR_FRAGMENT_INFO *char_frag_info); - - /* stopper.cpp *************************************************************/ - bool NoDangerousAmbig(WERD_CHOICE *BestChoice, - DANGERR *fixpt, - bool fix_replaceable, - MATRIX* ratings); - // Replaces the corresponding wrong ngram in werd_choice with the correct - // one. The whole correct n-gram is inserted into the ratings matrix and - // the werd_choice: no more fragments!. Rating and certainty of new entries - // in matrix and werd_choice are the sum and mean of the wrong ngram - // respectively. - // E.g. for werd_choice mystring'' and ambiguity ''->": werd_choice becomes - // mystring", with a new entry in the ratings matrix for ". - void ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, - UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, - MATRIX *ratings); - - /// Returns the length of the shortest alpha run in WordChoice. - int LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const; - /// Returns true if the certainty of the BestChoice word is within a - /// reasonable range of the average certainties for the best choices for - /// each character in the segmentation. This test is used to catch words - /// in which one character is much worse than the other characters in the - /// word (i.e. false will be returned in that case). The algorithm computes - /// the mean and std deviation of the certainties in the word with the worst - /// certainty thrown out. - int UniformCertainties(const WERD_CHOICE& word); - /// Returns true if the given best_choice is good enough to stop. - bool AcceptableChoice(const WERD_CHOICE& best_choice, - XHeightConsistencyEnum xheight_consistency); - /// Returns false if the best choice for the current word is questionable - /// and should be tried again on the second pass or should be flagged to - /// the user. - bool AcceptableResult(WERD_RES *word) const; - void EndDangerousAmbigs(); - /// Prints the current choices for this word to stdout. - void DebugWordChoices(); - /// Sets up stopper variables in preparation for the first pass. - void SettupStopperPass1(); - /// Sets up stopper variables in preparation for the second pass. - void SettupStopperPass2(); - /* context.cpp *************************************************************/ - /// Check a string to see if it matches a set of lexical rules. - int case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) const; - /// Returns true if the word looks like an absolute garbage - /// (e.g. image mistakenly recognized as text). - bool absolute_garbage(const WERD_CHOICE &word, const UNICHARSET &unicharset); - - /* dict.cpp ****************************************************************/ - - /// Initialize Dict class - load dawgs from [lang].traineddata and - /// user-specified wordlist and parttern list. - static DawgCache *GlobalDawgCache(); - // Sets up ready for a Load or LoadLSTM. - void SetupForLoad(DawgCache *dawg_cache); - // Loads the dawgs needed by Tesseract. Call FinishLoad() after. - void Load(const STRING &lang, TessdataManager *data_file); - // Loads the dawgs needed by the LSTM model. Call FinishLoad() after. - void LoadLSTM(const STRING &lang, TessdataManager *data_file); - // Completes the loading process after Load() and/or LoadLSTM(). - // Returns false if no dictionaries were loaded. - bool FinishLoad(); - void End(); - - // Resets the document dictionary analogous to ResetAdaptiveClassifier. - void ResetDocumentDictionary() { - if (pending_words_ != nullptr) - pending_words_->clear(); - if (document_words_ != nullptr) - document_words_->clear(); - } - - /** - * Returns the maximal permuter code (from ccstruct/ratngs.h) if in light - * of the current state the letter at word_index in the given word - * is allowed according to at least one of the dawgs in dawgs_, - * otherwise returns NO_PERM. - * - * The state is described by void_dawg_args, which are interpreted as - * DawgArgs and contain relevant active dawg positions. - * Each entry in the active_dawgs vector contains an index - * into the dawgs_ vector and an EDGE_REF that indicates the last edge - * followed in the dawg. It also may contain a position in the punctuation - * dawg which describes surrounding punctuation (see struct DawgPosition). - * - * Input: - * At word_index 0 dawg_args->active_dawgs should contain an entry for each - * dawg that may start at the beginning of a word, with punc_ref and edge_ref - * initialized to NO_EDGE. Since the punctuation dawg includes the empty - * pattern " " (meaning anything without surrounding punctuation), having a - * single entry for the punctuation dawg will cover all dawgs reachable - * therefrom -- that includes all number and word dawgs. The only dawg - * non-reachable from the punctuation_dawg is the pattern dawg. - * If hyphen state needs to be applied, initial dawg_args->active_dawgs can - * be copied from the saved hyphen state (maintained by Dict). - * For word_index > 0 the corresponding state (active_dawgs and punc position) - * can be obtained from dawg_args->updated_dawgs passed to - * def_letter_is_okay for word_index-1. - * Note: the function assumes that active_dawgs, and updated_dawgs - * member variables of dawg_args are not nullptr. - * - * Output: - * The function fills in dawg_args->updated_dawgs vector with the - * entries for dawgs that contain the word up to the letter at word_index. - * - */ - - // - int def_letter_is_okay(void* void_dawg_args, const UNICHARSET& unicharset, - UNICHAR_ID unichar_id, bool word_end) const; - - int (Dict::*letter_is_okay_)(void* void_dawg_args, - const UNICHARSET& unicharset, - UNICHAR_ID unichar_id, bool word_end) const; - /// Calls letter_is_okay_ member function. - int LetterIsOkay(void* void_dawg_args, const UNICHARSET& unicharset, - UNICHAR_ID unichar_id, bool word_end) const { - return (this->*letter_is_okay_)(void_dawg_args, - unicharset, unichar_id, word_end); - } - - - /// Probability in context function used by the ngram permuter. - double (Dict::*probability_in_context_)(const char* lang, - const char* context, - int context_bytes, - const char* character, - int character_bytes); - /// Calls probability_in_context_ member function. - double ProbabilityInContext(const char* context, - int context_bytes, - const char* character, - int character_bytes) { - return (this->*probability_in_context_)( - getCCUtil()->lang.string(), - context, context_bytes, - character, character_bytes); - } - - /// Default (no-op) implementation of probability in context function. - double def_probability_in_context( - const char* lang, const char* context, int context_bytes, - const char* character, int character_bytes) { - (void)lang; - (void)context; - (void)context_bytes; - (void)character; - (void)character_bytes; - return 0.0; - } - double ngram_probability_in_context(const char* lang, - const char* context, - int context_bytes, - const char* character, - int character_bytes); - - // Interface with params model. - float (Dict::*params_model_classify_)(const char *lang, void *path); - float ParamsModelClassify(const char *lang, void *path); - // Call params_model_classify_ member function. - float CallParamsModelClassify(void *path) { - ASSERT_HOST(params_model_classify_ != nullptr); // ASSERT_HOST -> assert - return (this->*params_model_classify_)( - getCCUtil()->lang.string(), path); - } - - inline void SetWildcardID(UNICHAR_ID id) { wildcard_unichar_id_ = id; } - inline UNICHAR_ID WildcardID() const { return wildcard_unichar_id_; } - /// Return the number of dawgs in the dawgs_ vector. - inline int NumDawgs() const { return dawgs_.size(); } - /// Return i-th dawg pointer recorded in the dawgs_ vector. - inline const Dawg *GetDawg(int index) const { return dawgs_[index]; } - /// Return the points to the punctuation dawg. - inline const Dawg *GetPuncDawg() const { return punc_dawg_; } - /// Return the points to the unambiguous words dawg. - inline const Dawg *GetUnambigDawg() const { return unambig_dawg_; } - /// Returns the appropriate next node given the EDGE_REF. - static inline NODE_REF GetStartingNode(const Dawg *dawg, EDGE_REF edge_ref) { - if (edge_ref == NO_EDGE) return 0; // beginning to explore the dawg - NODE_REF node = dawg->next_node(edge_ref); - if (node == 0) node = NO_EDGE; // end of word - return node; - } - - // Given a unichar from a string and a given dawg, return the unichar - // we should use to match in that dawg type. (for example, in the number - // dawg, all numbers are transformed to kPatternUnicharId). - UNICHAR_ID char_for_dawg(const UNICHARSET& unicharset, UNICHAR_ID ch, - const Dawg *dawg) const { - if (!dawg) return ch; - switch (dawg->type()) { - case DAWG_TYPE_NUMBER: - return unicharset.get_isdigit(ch) ? Dawg::kPatternUnicharID : ch; - default: - return ch; - } - } - - /// For each of the character classes of the given unichar_id (and the - /// unichar_id itself) finds the corresponding outgoing node or self-loop - /// in the given dawg and (after checking that it is valid) records it in - /// dawg_args->updated_ative_dawgs. Updates current_permuter if any valid - /// edges were found. - void ProcessPatternEdges(const Dawg *dawg, const DawgPosition &info, - UNICHAR_ID unichar_id, bool word_end, - DawgArgs *dawg_args, - PermuterType *current_permuter) const; - - /// Read/Write/Access special purpose dawgs which contain words - /// only of a certain length (used for phrase search for - /// non-space-delimited languages). - - /// Check all the DAWGs to see if this word is in any of them. - inline static bool valid_word_permuter(uint8_t perm, bool numbers_ok) { - return (perm == SYSTEM_DAWG_PERM || perm == FREQ_DAWG_PERM || - perm == DOC_DAWG_PERM || perm == USER_DAWG_PERM || - perm == USER_PATTERN_PERM || perm == COMPOUND_PERM || - (numbers_ok && perm == NUMBER_PERM)); - } - int valid_word(const WERD_CHOICE &word, bool numbers_ok) const; - int valid_word(const WERD_CHOICE &word) const { - return valid_word(word, false); // return NO_PERM for words with digits - } - int valid_word_or_number(const WERD_CHOICE &word) const { - return valid_word(word, true); // return NUMBER_PERM for valid numbers - } - /// This function is used by api/tesseract_cube_combiner.cpp - int valid_word(const char *string) const { - WERD_CHOICE word(string, getUnicharset()); - return valid_word(word); - } - // Do the two WERD_CHOICEs form a meaningful bigram? - bool valid_bigram(const WERD_CHOICE &word1, const WERD_CHOICE &word2) const; - /// Returns true if the word contains a valid punctuation pattern. - /// Note: Since the domains of punctuation symbols and symblos - /// used in numbers are not disjoint, a valid number might contain - /// an invalid punctuation pattern (e.g. .99). - bool valid_punctuation(const WERD_CHOICE &word); - /// Returns true if a good answer is found for the unknown blob rating. - int good_choice(const WERD_CHOICE &choice); - /// Adds a word found on this document to the document specific dictionary. - void add_document_word(const WERD_CHOICE &best_choice); - /// Adjusts the rating of the given word. - void adjust_word(WERD_CHOICE *word, - bool nonword, XHeightConsistencyEnum xheight_consistency, - float additional_adjust, - bool modify_rating, - bool debug); - /// Set wordseg_rating_adjust_factor_ to the given value. - inline void SetWordsegRatingAdjustFactor(float f) { - wordseg_rating_adjust_factor_ = f; - } - /// Returns true if the language is space-delimited (not CJ, or T). - bool IsSpaceDelimitedLang() const; - - private: - /** Private member variables. */ - CCUtil* ccutil_; - /** - * Table that stores ambiguities computed during training - * (loaded when NoDangerousAmbigs() is called for the first time). - * Each entry i in the table stores a set of amibiguities whose - * wrong ngram starts with unichar id i. - */ - UnicharAmbigs *dang_ambigs_table_; - /** Same as above, but for ambiguities with replace flag set. */ - UnicharAmbigs *replace_ambigs_table_; - /** Additional certainty padding allowed before a word is rejected. */ - float reject_offset_; - // Cached UNICHAR_IDs: - UNICHAR_ID wildcard_unichar_id_; // kDictWildcard. - UNICHAR_ID apostrophe_unichar_id_; // kApostropheSymbol. - UNICHAR_ID question_unichar_id_; // kQuestionSymbol. - UNICHAR_ID slash_unichar_id_; // kSlashSymbol. - UNICHAR_ID hyphen_unichar_id_; // kHyphenSymbol. - // Hyphen-related variables. - WERD_CHOICE *hyphen_word_; - DawgPositionVector hyphen_active_dawgs_; - bool last_word_on_line_; - // List of lists of "equivalent" UNICHAR_IDs for the purposes of dictionary - // matching. The first member of each list is taken as canonical. For - // example, the first list contains hyphens and dashes with the first symbol - // being the ASCII hyphen minus. - GenericVector > equivalent_symbols_; - // Dawg Cache reference - this is who we ask to allocate/deallocate dawgs. - DawgCache *dawg_cache_; - bool dawg_cache_is_ours_; // we should delete our own dawg_cache_ - // Dawgs. - DawgVector dawgs_; - SuccessorListsVector successors_; - Trie *pending_words_; - /// The following pointers are only cached for convenience. - /// The dawgs will be deleted when dawgs_ vector is destroyed. - // bigram_dawg_ points to a dawg of two-word bigrams which always supersede if - // any of them are present on the best choices list for a word pair. - // the bigrams are stored as space-separated words where: - // (1) leading and trailing punctuation has been removed from each word and - // (2) any digits have been replaced with '?' marks. - Dawg *bigram_dawg_; - // TODO(daria): need to support multiple languages in the future, - // so maybe will need to maintain a list of dawgs of each kind. - Dawg *freq_dawg_; - Dawg *unambig_dawg_; - Dawg *punc_dawg_; - Trie *document_words_; - /// Current segmentation cost adjust factor for word rating. - /// See comments in incorporate_segcost. - float wordseg_rating_adjust_factor_; - // File for recording ambiguities discovered during dictionary search. - FILE *output_ambig_words_file_; - - public: - /// Variable members. - /// These have to be declared and initialized after image_ptr_, which contains - /// the pointer to the params vector - the member of its base CCUtil class. - STRING_VAR_H(user_words_file, "", "A filename of user-provided words."); - STRING_VAR_H(user_words_suffix, "", - "A suffix of user-provided words located in tessdata."); - STRING_VAR_H(user_patterns_file, "", - "A filename of user-provided patterns."); - STRING_VAR_H(user_patterns_suffix, "", - "A suffix of user-provided patterns located in tessdata."); - BOOL_VAR_H(load_system_dawg, true, "Load system word dawg."); - BOOL_VAR_H(load_freq_dawg, true, "Load frequent word dawg."); - BOOL_VAR_H(load_unambig_dawg, true, "Load unambiguous word dawg."); - BOOL_VAR_H(load_punc_dawg, true, - "Load dawg with punctuation patterns."); - BOOL_VAR_H(load_number_dawg, true, "Load dawg with number patterns."); - BOOL_VAR_H(load_bigram_dawg, true, - "Load dawg with special word bigrams."); - double_VAR_H(xheight_penalty_subscripts, 0.125, - "Score penalty (0.1 = 10%) added if there are subscripts " - "or superscripts in a word, but it is otherwise OK."); - double_VAR_H(xheight_penalty_inconsistent, 0.25, - "Score penalty (0.1 = 10%) added if an xheight is " - "inconsistent."); - double_VAR_H(segment_penalty_dict_frequent_word, 1.0, - "Score multiplier for word matches which have good case and" - "are frequent in the given language (lower is better)."); - - double_VAR_H(segment_penalty_dict_case_ok, 1.1, - "Score multiplier for word matches that have good case " - "(lower is better)."); - - double_VAR_H(segment_penalty_dict_case_bad, 1.3125, - "Default score multiplier for word matches, which may have " - "case issues (lower is better)."); - - double_VAR_H(segment_penalty_dict_nonword, 1.25, - "Score multiplier for glyph fragment segmentations which " - "do not match a dictionary word (lower is better)."); - - double_VAR_H(segment_penalty_garbage, 1.50, - "Score multiplier for poorly cased strings that are not in" - " the dictionary and generally look like garbage (lower is" - " better)."); - STRING_VAR_H(output_ambig_words_file, "", - "Output file for ambiguities found in the dictionary"); - INT_VAR_H(dawg_debug_level, 0, "Set to 1 for general debug info" - ", to 2 for more details, to 3 to see all the debug messages"); - INT_VAR_H(hyphen_debug_level, 0, "Debug level for hyphenated words."); - INT_VAR_H(max_viterbi_list_size, 10, "Maximum size of viterbi list."); - BOOL_VAR_H(use_only_first_uft8_step, false, - "Use only the first UTF8 step of the given string" - " when computing log probabilities."); - double_VAR_H(certainty_scale, 20.0, "Certainty scaling factor"); - double_VAR_H(stopper_nondict_certainty_base, -2.50, - "Certainty threshold for non-dict words"); - double_VAR_H(stopper_phase2_certainty_rejection_offset, 1.0, - "Reject certainty offset"); - INT_VAR_H(stopper_smallword_size, 2, - "Size of dict word to be treated as non-dict word"); - double_VAR_H(stopper_certainty_per_char, -0.50, - "Certainty to add for each dict char above small word size."); - double_VAR_H(stopper_allowable_character_badness, 3.0, - "Max certaintly variation allowed in a word (in sigma)"); - INT_VAR_H(stopper_debug_level, 0, "Stopper debug level"); - BOOL_VAR_H(stopper_no_acceptable_choices, false, - "Make AcceptableChoice() always return false. Useful" - " when there is a need to explore all segmentations"); - INT_VAR_H(tessedit_truncate_wordchoice_log, 10, "Max words to keep in list"); - STRING_VAR_H(word_to_debug, "", "Word for which stopper debug information" - " should be printed to stdout"); - STRING_VAR_H(word_to_debug_lengths, "", - "Lengths of unichars in word_to_debug"); - INT_VAR_H(fragments_debug, 0, "Debug character fragments"); - BOOL_VAR_H(segment_nonalphabetic_script, false, - "Don't use any alphabetic-specific tricks." - "Set to true in the traineddata config file for" - " scripts that are cursive or inherently fixed-pitch"); - BOOL_VAR_H(save_doc_words, 0, "Save Document Words"); - double_VAR_H(doc_dict_pending_threshold, 0.0, - "Worst certainty for using pending dictionary"); - double_VAR_H(doc_dict_certainty_threshold, -2.25, "Worst certainty" - " for words that can be inserted into the document dictionary"); - INT_VAR_H(max_permuter_attempts, 10000, "Maximum number of different" - " character choices to consider during permutation." - " This limit is especially useful when user patterns" - " are specified, since overly generic patterns can result in" - " dawg search exploring an overly large number of options."); -}; -} // namespace tesseract - -#endif // THIRD_PARTY_TESSERACT_DICT_DICT_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/hyphen.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/hyphen.cpp deleted file mode 100644 index 9f0feec2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/hyphen.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * File: hyphen.cpp (Formerly hyphen.c) - * Description: Functions for maintaining information about hyphenated words. - * Author: Mark Seaman, OCR Technology - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ - -#include "dict.h" - -namespace tesseract { - -// Unless the previous word was the last one on the line, and the current -// one is not (thus it is the first one on the line), erase hyphen_word_, -// clear hyphen_active_dawgs_, hyphen_constraints_ update last_word_on_line_. -void Dict::reset_hyphen_vars(bool last_word_on_line) { - if (!(last_word_on_line_ == true && last_word_on_line == false)) { - if (hyphen_word_ != nullptr) { - delete hyphen_word_; - hyphen_word_ = nullptr; - hyphen_active_dawgs_.clear(); - } - } - if (hyphen_debug_level) { - tprintf("reset_hyphen_vars: last_word_on_line %d -> %d\n", - last_word_on_line_, last_word_on_line); - } - last_word_on_line_ = last_word_on_line; -} - -// Update hyphen_word_, and copy the given DawgPositionVectors into -// hyphen_active_dawgs_. -void Dict::set_hyphen_word(const WERD_CHOICE &word, - const DawgPositionVector &active_dawgs) { - if (hyphen_word_ == nullptr) { - hyphen_word_ = new WERD_CHOICE(word.unicharset()); - hyphen_word_->make_bad(); - } - if (hyphen_word_->rating() > word.rating()) { - *hyphen_word_ = word; - // Remove the last unichar id as it is a hyphen, and remove - // any unichar_string/lengths that are present. - hyphen_word_->remove_last_unichar_id(); - hyphen_active_dawgs_ = active_dawgs; - } - if (hyphen_debug_level) { - hyphen_word_->print("set_hyphen_word: "); - } -} -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/matchdefs.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/matchdefs.h deleted file mode 100644 index fe4a9709..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/matchdefs.h +++ /dev/null @@ -1,114 +0,0 @@ -/****************************************************************************** - ** Filename: matchdefs.h - ** Purpose: Generic interface definitions for feature matchers. - ** Author: Dan Johnson - ** History: Fri Jan 19 09:21:25 1990, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef MATCHDEFS_H -#define MATCHDEFS_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "host.h" -#include -#include "unichar.h" - -/* define the maximum number of classes defined for any matcher - and the maximum class id for any matcher. This must be changed - if more different classes need to be classified */ -#define MAX_NUM_CLASSES INT16_MAX -#define MAX_CLASS_ID (MAX_NUM_CLASSES - 1) - -/** a CLASS_ID is the ascii character to be associated with a class */ -using CLASS_ID = UNICHAR_ID; -#define NO_CLASS (0) - -/** a PROTO_ID is the index of a prototype within it's class. Valid proto - id's are 0 to N-1 where N is the number of prototypes that make up the - class. */ -using PROTO_ID = int16_t; -#define NO_PROTO (-1) - -/** FEATURE_ID is the index of a feature within a character description - The feature id ranges from 0 to N-1 where N is the number - of features in a character description. */ -using FEATURE_ID = uint8_t; -#define NO_FEATURE 255 -#define NOISE_FEATURE 254 -#define MISSING_PROTO 254 -#define MAX_NUM_FEAT 40 -#define MAX_FEATURE_ID 250 - -/** a RATING is the match rating returned by a classifier. - Higher is better. */ -using RATING = float; - -/** a CERTAINTY is an indication of the degree of confidence of the - classifier. Higher is better. 0 means the match is as good as the - mean of the matches seen in training. -1 means the match was one - standard deviation worse than the training matches, etc. */ -using CERTAINTY = float; - -/** define a data structure to hold a single match result */ -typedef struct -{ - CLASS_ID Class; - RATING Rating; - CERTAINTY Certainty; -} - - -MATCH_RESULT; - -/** define a data structure for holding an array of match results */ -typedef MATCH_RESULT SORTED_CLASSES[MAX_CLASS_ID + 1]; - -/*---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------*/ -/** - all feature matchers that are to be used with the high level - classifier must support the following interface. The names will, of - course, be unique for each different matcher. Note also that - FEATURE_STRUCT is a data structure that is defined specifically for - each feature extractor/matcher pair. -*/ - -/* misc test functions for proto id's and feature id's */ -#define IsValidFeature(Fid) ((Fid) < MAX_FEATURE_ID) -#define IsValidProto(Pid) ((Pid) >= 0) - -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif - -/* matchdefs.c */ -int CompareMatchResults -_ARGS ((MATCH_RESULT * Result1, MATCH_RESULT * Result2)); - -void PrintMatchResult _ARGS ((FILE * File, MATCH_RESULT * MatchResult)); - -void PrintMatchResults -_ARGS ((FILE * File, int N, MATCH_RESULT MatchResults[])); - -#undef _ARGS - -/*---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------*/ -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/permdawg.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/permdawg.cpp deleted file mode 100644 index 87456fcf..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/permdawg.cpp +++ /dev/null @@ -1,397 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: permdawg.cpp (Formerly permdawg.c) - * Description: Scale word choices by a dictionary - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Tue Jul 9 15:43:18 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ - -#include "dawg.h" -#include "globals.h" -#include "stopper.h" -#include "tprintf.h" -#include "params.h" - -#include -#include -#include "dict.h" - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -namespace tesseract { - -/** - * @name go_deeper_dawg_fxn - * - * If the choice being composed so far could be a dictionary word - * keep exploring choices. - */ -void Dict::go_deeper_dawg_fxn( - const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, - int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, - bool word_ending, WERD_CHOICE *word, float certainties[], float *limit, - WERD_CHOICE *best_choice, int *attempts_left, void *void_more_args) { - DawgArgs *more_args = static_cast(void_more_args); - word_ending = (char_choice_index == char_choices.size()-1); - int word_index = word->length() - 1; - if (best_choice->rating() < *limit) return; - // Look up char in DAWG - - // If the current unichar is an ngram first try calling - // letter_is_okay() for each unigram it contains separately. - UNICHAR_ID orig_uch_id = word->unichar_id(word_index); - bool checked_unigrams = false; - if (getUnicharset().get_isngram(orig_uch_id)) { - if (dawg_debug_level) { - tprintf("checking unigrams in an ngram %s\n", - getUnicharset().debug_str(orig_uch_id).string()); - } - int num_unigrams = 0; - word->remove_last_unichar_id(); - GenericVector encoding; - const char *ngram_str = getUnicharset().id_to_unichar(orig_uch_id); - // Since the string came out of the unicharset, failure is impossible. - ASSERT_HOST(getUnicharset().encode_string(ngram_str, true, &encoding, nullptr, - nullptr)); - bool unigrams_ok = true; - // Construct DawgArgs that reflect the current state. - DawgPositionVector unigram_active_dawgs = *(more_args->active_dawgs); - DawgPositionVector unigram_updated_dawgs; - DawgArgs unigram_dawg_args(&unigram_active_dawgs, - &unigram_updated_dawgs, - more_args->permuter); - // Check unigrams in the ngram with letter_is_okay(). - for (int i = 0; unigrams_ok && i < encoding.size(); ++i) { - UNICHAR_ID uch_id = encoding[i]; - ASSERT_HOST(uch_id != INVALID_UNICHAR_ID); - ++num_unigrams; - word->append_unichar_id(uch_id, 1, 0.0, 0.0); - unigrams_ok = (this->*letter_is_okay_)( - &unigram_dawg_args, *word->unicharset(), - word->unichar_id(word_index+num_unigrams-1), - word_ending && i == encoding.size() - 1); - (*unigram_dawg_args.active_dawgs) = *(unigram_dawg_args.updated_dawgs); - if (dawg_debug_level) { - tprintf("unigram %s is %s\n", - getUnicharset().debug_str(uch_id).string(), - unigrams_ok ? "OK" : "not OK"); - } - } - // Restore the word and copy the updated dawg state if needed. - while (num_unigrams-- > 0) word->remove_last_unichar_id(); - word->append_unichar_id_space_allocated(orig_uch_id, 1, 0.0, 0.0); - if (unigrams_ok) { - checked_unigrams = true; - more_args->permuter = unigram_dawg_args.permuter; - *(more_args->updated_dawgs) = *(unigram_dawg_args.updated_dawgs); - } - } - - // Check which dawgs from the dawgs_ vector contain the word - // up to and including the current unichar. - if (checked_unigrams || (this->*letter_is_okay_)( - more_args, *word->unicharset(), word->unichar_id(word_index), - word_ending)) { - // Add a new word choice - if (word_ending) { - if (dawg_debug_level) { - tprintf("found word = %s\n", word->debug_string().string()); - } - if (strcmp(output_ambig_words_file.string(), "") != 0) { - if (output_ambig_words_file_ == nullptr) { - output_ambig_words_file_ = - fopen(output_ambig_words_file.string(), "wb+"); - if (output_ambig_words_file_ == nullptr) { - tprintf("Failed to open output_ambig_words_file %s\n", - output_ambig_words_file.string()); - exit(1); - } - STRING word_str; - word->string_and_lengths(&word_str, nullptr); - word_str += " "; - fprintf(output_ambig_words_file_, "%s", word_str.string()); - } - STRING word_str; - word->string_and_lengths(&word_str, nullptr); - word_str += " "; - fprintf(output_ambig_words_file_, "%s", word_str.string()); - } - WERD_CHOICE *adjusted_word = word; - adjusted_word->set_permuter(more_args->permuter); - update_best_choice(*adjusted_word, best_choice); - } else { // search the next letter - // Make updated_* point to the next entries in the DawgPositionVector - // arrays (that were originally created in dawg_permute_and_select) - ++(more_args->updated_dawgs); - // Make active_dawgs and constraints point to the updated ones. - ++(more_args->active_dawgs); - permute_choices(debug, char_choices, char_choice_index + 1, - prev_char_frag_info, word, certainties, limit, - best_choice, attempts_left, more_args); - // Restore previous state to explore another letter in this position. - --(more_args->updated_dawgs); - --(more_args->active_dawgs); - } - } else { - if (dawg_debug_level) { - tprintf("last unichar not OK at index %d in %s\n", - word_index, word->debug_string().string()); - } - } -} - - -/** - * dawg_permute_and_select - * - * Recursively explore all the possible character combinations in - * the given char_choices. Use go_deeper_dawg_fxn() to search all the - * dawgs in the dawgs_ vector in parallel and discard invalid words. - * - * Allocate and return a WERD_CHOICE with the best valid word found. - */ -WERD_CHOICE *Dict::dawg_permute_and_select( - const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit) { - WERD_CHOICE *best_choice = new WERD_CHOICE(&getUnicharset()); - best_choice->make_bad(); - best_choice->set_rating(rating_limit); - if (char_choices.length() == 0 || char_choices.length() > MAX_WERD_LENGTH) - return best_choice; - DawgPositionVector *active_dawgs = - new DawgPositionVector[char_choices.length() + 1]; - init_active_dawgs(&(active_dawgs[0]), true); - DawgArgs dawg_args(&(active_dawgs[0]), &(active_dawgs[1]), NO_PERM); - WERD_CHOICE word(&getUnicharset(), MAX_WERD_LENGTH); - - float certainties[MAX_WERD_LENGTH]; - this->go_deeper_fxn_ = &tesseract::Dict::go_deeper_dawg_fxn; - int attempts_left = max_permuter_attempts; - permute_choices((dawg_debug_level) ? "permute_dawg_debug" : nullptr, - char_choices, 0, nullptr, &word, certainties, &rating_limit, best_choice, - &attempts_left, &dawg_args); - delete[] active_dawgs; - return best_choice; -} - -/** - * permute_choices - * - * Call append_choices() for each BLOB_CHOICE in BLOB_CHOICE_LIST - * with the given char_choice_index in char_choices. - */ -void Dict::permute_choices( - const char *debug, - const BLOB_CHOICE_LIST_VECTOR &char_choices, - int char_choice_index, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - WERD_CHOICE *word, - float certainties[], - float *limit, - WERD_CHOICE *best_choice, - int *attempts_left, - void *more_args) { - if (debug) { - tprintf("%s permute_choices: char_choice_index=%d" - " limit=%g rating=%g, certainty=%g word=%s\n", - debug, char_choice_index, *limit, word->rating(), - word->certainty(), word->debug_string().string()); - } - if (char_choice_index < char_choices.length()) { - BLOB_CHOICE_IT blob_choice_it; - blob_choice_it.set_to_list(char_choices.get(char_choice_index)); - for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list(); - blob_choice_it.forward()) { - (*attempts_left)--; - append_choices(debug, char_choices, *(blob_choice_it.data()), - char_choice_index, prev_char_frag_info, word, - certainties, limit, best_choice, attempts_left, more_args); - if (*attempts_left <= 0) { - if (debug) tprintf("permute_choices(): attempts_left is 0\n"); - break; - } - } - } -} - -/** - * append_choices - * - * Checks to see whether or not the next choice is worth appending to - * the word being generated. If so then keeps going deeper into the word. - * - * This function assumes that Dict::go_deeper_fxn_ is set. - */ -void Dict::append_choices( - const char *debug, - const BLOB_CHOICE_LIST_VECTOR &char_choices, - const BLOB_CHOICE &blob_choice, - int char_choice_index, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - WERD_CHOICE *word, - float certainties[], - float *limit, - WERD_CHOICE *best_choice, - int *attempts_left, - void *more_args) { - int word_ending = (char_choice_index == char_choices.length() - 1); - - // Deal with fragments. - CHAR_FRAGMENT_INFO char_frag_info; - if (!fragment_state_okay(blob_choice.unichar_id(), blob_choice.rating(), - blob_choice.certainty(), prev_char_frag_info, debug, - word_ending, &char_frag_info)) { - return; // blob_choice must be an invalid fragment - } - // Search the next letter if this character is a fragment. - if (char_frag_info.unichar_id == INVALID_UNICHAR_ID) { - permute_choices(debug, char_choices, char_choice_index + 1, - &char_frag_info, word, certainties, limit, - best_choice, attempts_left, more_args); - return; - } - - // Add the next unichar. - float old_rating = word->rating(); - float old_certainty = word->certainty(); - uint8_t old_permuter = word->permuter(); - certainties[word->length()] = char_frag_info.certainty; - word->append_unichar_id_space_allocated( - char_frag_info.unichar_id, char_frag_info.num_fragments, - char_frag_info.rating, char_frag_info.certainty); - - // Explore the next unichar. - (this->*go_deeper_fxn_)(debug, char_choices, char_choice_index, - &char_frag_info, word_ending, word, certainties, - limit, best_choice, attempts_left, more_args); - - // Remove the unichar we added to explore other choices in it's place. - word->remove_last_unichar_id(); - word->set_rating(old_rating); - word->set_certainty(old_certainty); - word->set_permuter(old_permuter); -} - -/** - * @name fragment_state - * - * Given the current char choice and information about previously seen - * fragments, determines whether adjacent character fragments are - * present and whether they can be concatenated. - * - * The given prev_char_frag_info contains: - * - fragment: if not nullptr contains information about immediately - * preceding fragmented character choice - * - num_fragments: number of fragments that have been used so far - * to construct a character - * - certainty: certainty of the current choice or minimum - * certainty of all fragments concatenated so far - * - rating: rating of the current choice or sum of fragment - * ratings concatenated so far - * - * The output char_frag_info is filled in as follows: - * - character: is set to be nullptr if the choice is a non-matching - * or non-ending fragment piece; is set to unichar of the given choice - * if it represents a regular character or a matching ending fragment - * - fragment,num_fragments,certainty,rating are set as described above - * - * @returns false if a non-matching fragment is discovered, true otherwise. - */ -bool Dict::fragment_state_okay(UNICHAR_ID curr_unichar_id, - float curr_rating, float curr_certainty, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - const char *debug, int word_ending, - CHAR_FRAGMENT_INFO *char_frag_info) { - const CHAR_FRAGMENT *this_fragment = - getUnicharset().get_fragment(curr_unichar_id); - const CHAR_FRAGMENT *prev_fragment = - prev_char_frag_info != nullptr ? prev_char_frag_info->fragment : nullptr; - - // Print debug info for fragments. - if (debug && (prev_fragment || this_fragment)) { - tprintf("%s check fragments: choice=%s word_ending=%d\n", debug, - getUnicharset().debug_str(curr_unichar_id).string(), - word_ending); - if (prev_fragment) { - tprintf("prev_fragment %s\n", prev_fragment->to_string().string()); - } - if (this_fragment) { - tprintf("this_fragment %s\n", this_fragment->to_string().string()); - } - } - - char_frag_info->unichar_id = curr_unichar_id; - char_frag_info->fragment = this_fragment; - char_frag_info->rating = curr_rating; - char_frag_info->certainty = curr_certainty; - char_frag_info->num_fragments = 1; - if (prev_fragment && !this_fragment) { - if (debug) tprintf("Skip choice with incomplete fragment\n"); - return false; - } - if (this_fragment) { - // We are dealing with a fragment. - char_frag_info->unichar_id = INVALID_UNICHAR_ID; - if (prev_fragment) { - if (!this_fragment->is_continuation_of(prev_fragment)) { - if (debug) tprintf("Non-matching fragment piece\n"); - return false; - } - if (this_fragment->is_ending()) { - char_frag_info->unichar_id = - getUnicharset().unichar_to_id(this_fragment->get_unichar()); - char_frag_info->fragment = nullptr; - if (debug) { - tprintf("Built character %s from fragments\n", - getUnicharset().debug_str( - char_frag_info->unichar_id).string()); - } - } else { - if (debug) tprintf("Record fragment continuation\n"); - char_frag_info->fragment = this_fragment; - } - // Update certainty and rating. - char_frag_info->rating = - prev_char_frag_info->rating + curr_rating; - char_frag_info->num_fragments = prev_char_frag_info->num_fragments + 1; - char_frag_info->certainty = - std::min(curr_certainty, prev_char_frag_info->certainty); - } else { - if (this_fragment->is_beginning()) { - if (debug) tprintf("Record fragment beginning\n"); - } else { - if (debug) { - tprintf("Non-starting fragment piece with no prev_fragment\n"); - } - return false; - } - } - } - if (word_ending && char_frag_info->fragment) { - if (debug) tprintf("Word can not end with a fragment\n"); - return false; - } - return true; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/stopper.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/stopper.cpp deleted file mode 100644 index b02e5a19..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/stopper.cpp +++ /dev/null @@ -1,511 +0,0 @@ -/****************************************************************************** - ** Filename: stopper.c - ** Purpose: Stopping criteria for word classifier. - ** Author: Dan Johnson - ** History: Mon Apr 29 14:56:49 1991, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#include -#include -#include -#include - -#include "stopper.h" -#include "ambigs.h" -#include "ccutil.h" -#include "dict.h" -#include "helpers.h" -#include "matchdefs.h" -#include "pageres.h" -#include "params.h" -#include "ratngs.h" -#include "unichar.h" - -/*---------------------------------------------------------------------------- - Private Code -----------------------------------------------------------------------------*/ - -namespace tesseract { - -bool Dict::AcceptableChoice(const WERD_CHOICE& best_choice, - XHeightConsistencyEnum xheight_consistency) { - float CertaintyThreshold = stopper_nondict_certainty_base; - int WordSize; - - if (stopper_no_acceptable_choices) return false; - - if (best_choice.length() == 0) return false; - - bool no_dang_ambigs = !best_choice.dangerous_ambig_found(); - bool is_valid_word = valid_word_permuter(best_choice.permuter(), false); - bool is_case_ok = case_ok(best_choice, getUnicharset()); - - if (stopper_debug_level >= 1) { - const char *xht = "UNKNOWN"; - switch (xheight_consistency) { - case XH_GOOD: xht = "NORMAL"; break; - case XH_SUBNORMAL: xht = "SUBNORMAL"; break; - case XH_INCONSISTENT: xht = "INCONSISTENT"; break; - default: xht = "UNKNOWN"; - } - tprintf("\nStopper: %s (word=%c, case=%c, xht_ok=%s=[%g,%g])\n", - best_choice.unichar_string().string(), - (is_valid_word ? 'y' : 'n'), - (is_case_ok ? 'y' : 'n'), - xht, - best_choice.min_x_height(), - best_choice.max_x_height()); - } - // Do not accept invalid words in PASS1. - if (reject_offset_ <= 0.0f && !is_valid_word) return false; - if (is_valid_word && is_case_ok) { - WordSize = LengthOfShortestAlphaRun(best_choice); - WordSize -= stopper_smallword_size; - if (WordSize < 0) - WordSize = 0; - CertaintyThreshold += WordSize * stopper_certainty_per_char; - } - - if (stopper_debug_level >= 1) - tprintf("Stopper: Rating = %4.1f, Certainty = %4.1f, Threshold = %4.1f\n", - best_choice.rating(), best_choice.certainty(), CertaintyThreshold); - - if (no_dang_ambigs && - best_choice.certainty() > CertaintyThreshold && - xheight_consistency < XH_INCONSISTENT && - UniformCertainties(best_choice)) { - return true; - } else { - if (stopper_debug_level >= 1) { - tprintf("AcceptableChoice() returned false" - " (no_dang_ambig:%d cert:%.4g thresh:%g uniform:%d)\n", - no_dang_ambigs, best_choice.certainty(), - CertaintyThreshold, - UniformCertainties(best_choice)); - } - return false; - } -} - -bool Dict::AcceptableResult(WERD_RES *word) const { - if (word->best_choice == nullptr) return false; - float CertaintyThreshold = stopper_nondict_certainty_base - reject_offset_; - int WordSize; - - if (stopper_debug_level >= 1) { - tprintf("\nRejecter: %s (word=%c, case=%c, unambig=%c, multiple=%c)\n", - word->best_choice->debug_string().string(), - (valid_word(*word->best_choice) ? 'y' : 'n'), - (case_ok(*word->best_choice, getUnicharset()) ? 'y' : 'n'), - word->best_choice->dangerous_ambig_found() ? 'n' : 'y', - word->best_choices.singleton() ? 'n' : 'y'); - } - - if (word->best_choice->length() == 0 || !word->best_choices.singleton()) - return false; - if (valid_word(*word->best_choice) && - case_ok(*word->best_choice, getUnicharset())) { - WordSize = LengthOfShortestAlphaRun(*word->best_choice); - WordSize -= stopper_smallword_size; - if (WordSize < 0) - WordSize = 0; - CertaintyThreshold += WordSize * stopper_certainty_per_char; - } - - if (stopper_debug_level >= 1) - tprintf("Rejecter: Certainty = %4.1f, Threshold = %4.1f ", - word->best_choice->certainty(), CertaintyThreshold); - - if (word->best_choice->certainty() > CertaintyThreshold && - !stopper_no_acceptable_choices) { - if (stopper_debug_level >= 1) - tprintf("ACCEPTED\n"); - return true; - } else { - if (stopper_debug_level >= 1) - tprintf("REJECTED\n"); - return false; - } -} - -bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, - DANGERR *fixpt, - bool fix_replaceable, - MATRIX *ratings) { - if (stopper_debug_level > 2) { - tprintf("\nRunning NoDangerousAmbig() for %s\n", - best_choice->debug_string().string()); - } - - // Construct BLOB_CHOICE_LIST_VECTOR with ambiguities - // for each unichar id in BestChoice. - BLOB_CHOICE_LIST_VECTOR ambig_blob_choices; - int i; - bool ambigs_found = false; - // For each position in best_choice: - // -- choose AMBIG_SPEC_LIST that corresponds to unichar_id at best_choice[i] - // -- initialize wrong_ngram with a single unichar_id at best_choice[i] - // -- look for ambiguities corresponding to wrong_ngram in the list while - // adding the following unichar_ids from best_choice to wrong_ngram - // - // Repeat the above procedure twice: first time look through - // ambigs to be replaced and replace all the ambiguities found; - // second time look through dangerous ambiguities and construct - // ambig_blob_choices with fake a blob choice for each ambiguity - // and pass them to dawg_permute_and_select() to search for - // ambiguous words in the dictionaries. - // - // Note that during the execution of the for loop (on the first pass) - // if replacements are made the length of best_choice might change. - for (int pass = 0; pass < (fix_replaceable ? 2 : 1); ++pass) { - bool replace = (fix_replaceable && pass == 0); - const UnicharAmbigsVector &table = replace ? - getUnicharAmbigs().replace_ambigs() : getUnicharAmbigs().dang_ambigs(); - if (!replace) { - // Initialize ambig_blob_choices with lists containing a single - // unichar id for the corresponding position in best_choice. - // best_choice consisting from only the original letters will - // have a rating of 0.0. - for (i = 0; i < best_choice->length(); ++i) { - BLOB_CHOICE_LIST *lst = new BLOB_CHOICE_LIST(); - BLOB_CHOICE_IT lst_it(lst); - // TODO(rays/antonova) Put real xheights and y shifts here. - lst_it.add_to_end(new BLOB_CHOICE(best_choice->unichar_id(i), - 0.0, 0.0, -1, 0, 1, 0, BCC_AMBIG)); - ambig_blob_choices.push_back(lst); - } - } - UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE + 1]; - int wrong_ngram_index; - int next_index; - int blob_index = 0; - for (i = 0; i < best_choice->length(); blob_index += best_choice->state(i), - ++i) { - UNICHAR_ID curr_unichar_id = best_choice->unichar_id(i); - if (stopper_debug_level > 2) { - tprintf("Looking for %s ngrams starting with %s:\n", - replace ? "replaceable" : "ambiguous", - getUnicharset().debug_str(curr_unichar_id).string()); - } - int num_wrong_blobs = best_choice->state(i); - wrong_ngram_index = 0; - wrong_ngram[wrong_ngram_index] = curr_unichar_id; - if (curr_unichar_id == INVALID_UNICHAR_ID || - curr_unichar_id >= table.size() || - table[curr_unichar_id] == nullptr) { - continue; // there is no ambig spec for this unichar id - } - AmbigSpec_IT spec_it(table[curr_unichar_id]); - for (spec_it.mark_cycle_pt(); !spec_it.cycled_list();) { - const AmbigSpec *ambig_spec = spec_it.data(); - wrong_ngram[wrong_ngram_index+1] = INVALID_UNICHAR_ID; - int compare = UnicharIdArrayUtils::compare(wrong_ngram, - ambig_spec->wrong_ngram); - if (stopper_debug_level > 2) { - tprintf("candidate ngram: "); - UnicharIdArrayUtils::print(wrong_ngram, getUnicharset()); - tprintf("current ngram from spec: "); - UnicharIdArrayUtils::print(ambig_spec->wrong_ngram, getUnicharset()); - tprintf("comparison result: %d\n", compare); - } - if (compare == 0) { - // Record the place where we found an ambiguity. - if (fixpt != nullptr) { - UNICHAR_ID leftmost_id = ambig_spec->correct_fragments[0]; - fixpt->push_back(DANGERR_INFO( - blob_index, blob_index + num_wrong_blobs, replace, - getUnicharset().get_isngram(ambig_spec->correct_ngram_id), - leftmost_id)); - if (stopper_debug_level > 1) { - tprintf("fixpt+=(%d %d %d %d %s)\n", blob_index, - blob_index + num_wrong_blobs, false, - getUnicharset().get_isngram( - ambig_spec->correct_ngram_id), - getUnicharset().id_to_unichar(leftmost_id)); - } - } - - if (replace) { - if (stopper_debug_level > 2) { - tprintf("replace ambiguity with %s : ", - getUnicharset().id_to_unichar( - ambig_spec->correct_ngram_id)); - UnicharIdArrayUtils::print( - ambig_spec->correct_fragments, getUnicharset()); - } - ReplaceAmbig(i, ambig_spec->wrong_ngram_size, - ambig_spec->correct_ngram_id, - best_choice, ratings); - } else if (i > 0 || ambig_spec->type != CASE_AMBIG) { - // We found dang ambig - update ambig_blob_choices. - if (stopper_debug_level > 2) { - tprintf("found ambiguity: "); - UnicharIdArrayUtils::print( - ambig_spec->correct_fragments, getUnicharset()); - } - ambigs_found = true; - for (int tmp_index = 0; tmp_index <= wrong_ngram_index; - ++tmp_index) { - // Add a blob choice for the corresponding fragment of the - // ambiguity. These fake blob choices are initialized with - // negative ratings (which are not possible for real blob - // choices), so that dawg_permute_and_select() considers any - // word not consisting of only the original letters a better - // choice and stops searching for alternatives once such a - // choice is found. - BLOB_CHOICE_IT bc_it(ambig_blob_choices[i+tmp_index]); - bc_it.add_to_end(new BLOB_CHOICE( - ambig_spec->correct_fragments[tmp_index], -1.0, 0.0, - -1, 0, 1, 0, BCC_AMBIG)); - } - } - spec_it.forward(); - } else if (compare == -1) { - if (wrong_ngram_index+1 < ambig_spec->wrong_ngram_size && - ((next_index = wrong_ngram_index+1+i) < best_choice->length())) { - // Add the next unichar id to wrong_ngram and keep looking for - // more ambigs starting with curr_unichar_id in AMBIG_SPEC_LIST. - wrong_ngram[++wrong_ngram_index] = - best_choice->unichar_id(next_index); - num_wrong_blobs += best_choice->state(next_index); - } else { - break; // no more matching ambigs in this AMBIG_SPEC_LIST - } - } else { - spec_it.forward(); - } - } // end searching AmbigSpec_LIST - } // end searching best_choice - } // end searching replace and dangerous ambigs - - // If any ambiguities were found permute the constructed ambig_blob_choices - // to see if an alternative dictionary word can be found. - if (ambigs_found) { - if (stopper_debug_level > 2) { - tprintf("\nResulting ambig_blob_choices:\n"); - for (i = 0; i < ambig_blob_choices.length(); ++i) { - print_ratings_list("", ambig_blob_choices.get(i), getUnicharset()); - tprintf("\n"); - } - } - WERD_CHOICE *alt_word = dawg_permute_and_select(ambig_blob_choices, 0.0); - ambigs_found = (alt_word->rating() < 0.0); - if (ambigs_found) { - if (stopper_debug_level >= 1) { - tprintf ("Stopper: Possible ambiguous word = %s\n", - alt_word->debug_string().string()); - } - if (fixpt != nullptr) { - // Note: Currently character choices combined from fragments can only - // be generated by NoDangrousAmbigs(). This code should be updated if - // the capability to produce classifications combined from character - // fragments is added to other functions. - int orig_i = 0; - for (i = 0; i < alt_word->length(); ++i) { - const UNICHARSET &uchset = getUnicharset(); - bool replacement_is_ngram = - uchset.get_isngram(alt_word->unichar_id(i)); - UNICHAR_ID leftmost_id = alt_word->unichar_id(i); - if (replacement_is_ngram) { - // we have to extract the leftmost unichar from the ngram. - const char *str = uchset.id_to_unichar(leftmost_id); - int step = uchset.step(str); - if (step) leftmost_id = uchset.unichar_to_id(str, step); - } - int end_i = orig_i + alt_word->state(i); - if (alt_word->state(i) > 1 || - (orig_i + 1 == end_i && replacement_is_ngram)) { - // Compute proper blob indices. - int blob_start = 0; - for (int j = 0; j < orig_i; ++j) - blob_start += best_choice->state(j); - int blob_end = blob_start; - for (int j = orig_i; j < end_i; ++j) - blob_end += best_choice->state(j); - fixpt->push_back(DANGERR_INFO(blob_start, blob_end, true, - replacement_is_ngram, leftmost_id)); - if (stopper_debug_level > 1) { - tprintf("fixpt->dangerous+=(%d %d %d %d %s)\n", orig_i, end_i, - true, replacement_is_ngram, - uchset.id_to_unichar(leftmost_id)); - } - } - orig_i += alt_word->state(i); - } - } - } - delete alt_word; - } - if (output_ambig_words_file_ != nullptr) { - fprintf(output_ambig_words_file_, "\n"); - } - - ambig_blob_choices.delete_data_pointers(); - return !ambigs_found; -} - -void Dict::EndDangerousAmbigs() {} - -void Dict::SettupStopperPass1() { - reject_offset_ = 0.0; -} - -void Dict::SettupStopperPass2() { - reject_offset_ = stopper_phase2_certainty_rejection_offset; -} - -void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, - UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, - MATRIX *ratings) { - int num_blobs_to_replace = 0; - int begin_blob_index = 0; - int i; - // Rating and certainty for the new BLOB_CHOICE are derived from the - // replaced choices. - float new_rating = 0.0f; - float new_certainty = 0.0f; - BLOB_CHOICE* old_choice = nullptr; - for (i = 0; i < wrong_ngram_begin_index + wrong_ngram_size; ++i) { - if (i >= wrong_ngram_begin_index) { - int num_blobs = werd_choice->state(i); - int col = begin_blob_index + num_blobs_to_replace; - int row = col + num_blobs - 1; - BLOB_CHOICE_LIST* choices = ratings->get(col, row); - ASSERT_HOST(choices != nullptr); - old_choice = FindMatchingChoice(werd_choice->unichar_id(i), choices); - ASSERT_HOST(old_choice != nullptr); - new_rating += old_choice->rating(); - new_certainty += old_choice->certainty(); - num_blobs_to_replace += num_blobs; - } else { - begin_blob_index += werd_choice->state(i); - } - } - new_certainty /= wrong_ngram_size; - // If there is no entry in the ratings matrix, add it. - MATRIX_COORD coord(begin_blob_index, - begin_blob_index + num_blobs_to_replace - 1); - if (!coord.Valid(*ratings)) { - ratings->IncreaseBandSize(coord.row - coord.col + 1); - } - if (ratings->get(coord.col, coord.row) == nullptr) - ratings->put(coord.col, coord.row, new BLOB_CHOICE_LIST); - BLOB_CHOICE_LIST* new_choices = ratings->get(coord.col, coord.row); - BLOB_CHOICE* choice = FindMatchingChoice(correct_ngram_id, new_choices); - if (choice != nullptr) { - // Already there. Upgrade if new rating better. - if (new_rating < choice->rating()) - choice->set_rating(new_rating); - if (new_certainty < choice->certainty()) - choice->set_certainty(new_certainty); - // DO NOT SORT!! It will mess up the iterator in LanguageModel::UpdateState. - } else { - // Need a new choice with the correct_ngram_id. - choice = new BLOB_CHOICE(*old_choice); - choice->set_unichar_id(correct_ngram_id); - choice->set_rating(new_rating); - choice->set_certainty(new_certainty); - choice->set_classifier(BCC_AMBIG); - choice->set_matrix_cell(coord.col, coord.row); - BLOB_CHOICE_IT it (new_choices); - it.add_to_end(choice); - } - // Remove current unichar from werd_choice. On the last iteration - // set the correct replacement unichar instead of removing a unichar. - for (int replaced_count = 0; replaced_count < wrong_ngram_size; - ++replaced_count) { - if (replaced_count + 1 == wrong_ngram_size) { - werd_choice->set_blob_choice(wrong_ngram_begin_index, - num_blobs_to_replace, choice); - } else { - werd_choice->remove_unichar_id(wrong_ngram_begin_index + 1); - } - } - if (stopper_debug_level >= 1) { - werd_choice->print("ReplaceAmbig() "); - tprintf("Modified blob_choices: "); - print_ratings_list("\n", new_choices, getUnicharset()); - } -} - -int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const { - int shortest = INT32_MAX; - int curr_len = 0; - for (int w = 0; w < WordChoice.length(); ++w) { - if (getUnicharset().get_isalpha(WordChoice.unichar_id(w))) { - curr_len++; - } else if (curr_len > 0) { - if (curr_len < shortest) shortest = curr_len; - curr_len = 0; - } - } - if (curr_len > 0 && curr_len < shortest) { - shortest = curr_len; - } else if (shortest == INT32_MAX) { - shortest = 0; - } - return shortest; -} - -int Dict::UniformCertainties(const WERD_CHOICE& word) { - float Certainty; - float WorstCertainty = FLT_MAX; - float CertaintyThreshold; - double TotalCertainty; - double TotalCertaintySquared; - double Variance; - float Mean, StdDev; - int word_length = word.length(); - - if (word_length < 3) - return true; - - TotalCertainty = TotalCertaintySquared = 0.0; - for (int i = 0; i < word_length; ++i) { - Certainty = word.certainty(i); - TotalCertainty += Certainty; - TotalCertaintySquared += static_cast(Certainty) * Certainty; - if (Certainty < WorstCertainty) - WorstCertainty = Certainty; - } - - // Subtract off worst certainty from statistics. - word_length--; - TotalCertainty -= WorstCertainty; - TotalCertaintySquared -= static_cast(WorstCertainty) * WorstCertainty; - - Mean = TotalCertainty / word_length; - Variance = ((word_length * TotalCertaintySquared - - TotalCertainty * TotalCertainty) / - (word_length * (word_length - 1))); - if (Variance < 0.0) - Variance = 0.0; - StdDev = sqrt(Variance); - - CertaintyThreshold = Mean - stopper_allowable_character_badness * StdDev; - if (CertaintyThreshold > stopper_nondict_certainty_base) - CertaintyThreshold = stopper_nondict_certainty_base; - - if (word.certainty() < CertaintyThreshold) { - if (stopper_debug_level >= 1) - tprintf("Stopper: Non-uniform certainty = %4.1f" - " (m=%4.1f, s=%4.1f, t=%4.1f)\n", - word.certainty(), Mean, StdDev, CertaintyThreshold); - return false; - } else { - return true; - } -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/stopper.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/stopper.h deleted file mode 100644 index 6a4c5cd5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/stopper.h +++ /dev/null @@ -1,50 +0,0 @@ -/****************************************************************************** - ** Filename: stopper.h - ** Purpose: Stopping criteria for word classifier. - ** Author: Dan Johnson - ** History: Wed May 1 09:42:57 1991, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -#ifndef STOPPER_H -#define STOPPER_H - -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ - -#include "genericvector.h" -#include "params.h" -#include "ratngs.h" -#include "unichar.h" - -class WERD_CHOICE; - -using BLOB_WIDTH = uint8_t; - -struct DANGERR_INFO { - DANGERR_INFO() : - begin(-1), end(-1), dangerous(false), correct_is_ngram(false), - leftmost(INVALID_UNICHAR_ID) {} - DANGERR_INFO(int b, int e, bool d, bool n, UNICHAR_ID l) : - begin(b), end(e), dangerous(d), correct_is_ngram(n), leftmost(l) {} - int begin; - int end; - bool dangerous; - bool correct_is_ngram; - UNICHAR_ID leftmost; // in the replacement, what's the leftmost character? -}; - -using DANGERR = GenericVector; - - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/trie.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/trie.cpp deleted file mode 100644 index decce380..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/trie.cpp +++ /dev/null @@ -1,728 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: trie.cpp (Formerly trie.c) - * Description: Functions to build a trie data structure. - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Fri Jul 26 12:18:10 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ - -#include "trie.h" - -#include "callcpp.h" -#include "dawg.h" -#include "dict.h" -#include "genericvector.h" -#include "helpers.h" -#include "kdpair.h" - -namespace tesseract { - -const char kDoNotReverse[] = "RRP_DO_NO_REVERSE"; -const char kReverseIfHasRTL[] = "RRP_REVERSE_IF_HAS_RTL"; -const char kForceReverse[] = "RRP_FORCE_REVERSE"; - -const char * const RTLReversePolicyNames[] = { - kDoNotReverse, - kReverseIfHasRTL, - kForceReverse -}; - -const char Trie::kAlphaPatternUnicode[] = "\u2000"; -const char Trie::kDigitPatternUnicode[] = "\u2001"; -const char Trie::kAlphanumPatternUnicode[] = "\u2002"; -const char Trie::kPuncPatternUnicode[] = "\u2003"; -const char Trie::kLowerPatternUnicode[] = "\u2004"; -const char Trie::kUpperPatternUnicode[] = "\u2005"; - -const char *Trie::get_reverse_policy_name(RTLReversePolicy reverse_policy) { - return RTLReversePolicyNames[reverse_policy]; -} - -// Reset the Trie to empty. -void Trie::clear() { - nodes_.delete_data_pointers(); - nodes_.clear(); - root_back_freelist_.clear(); - num_edges_ = 0; - new_dawg_node(); // Need to allocate node 0. -} - -bool Trie::edge_char_of(NODE_REF node_ref, NODE_REF next_node, - int direction, bool word_end, UNICHAR_ID unichar_id, - EDGE_RECORD **edge_ptr, EDGE_INDEX *edge_index) const { - if (debug_level_ == 3) { - tprintf("edge_char_of() given node_ref " REFFORMAT " next_node " REFFORMAT - " direction %d word_end %d unichar_id %d, exploring node:\n", - node_ref, next_node, direction, word_end, unichar_id); - if (node_ref != NO_EDGE) { - print_node(node_ref, nodes_[node_ref]->forward_edges.size()); - } - } - if (node_ref == NO_EDGE) return false; - assert(node_ref < nodes_.size()); - EDGE_VECTOR &vec = (direction == FORWARD_EDGE) ? - nodes_[node_ref]->forward_edges : nodes_[node_ref]->backward_edges; - int vec_size = vec.size(); - if (node_ref == 0 && direction == FORWARD_EDGE) { // binary search - EDGE_INDEX start = 0; - EDGE_INDEX end = vec_size - 1; - EDGE_INDEX k; - int compare; - while (start <= end) { - k = (start + end) >> 1; // (start + end) / 2 - compare = given_greater_than_edge_rec(next_node, word_end, - unichar_id, vec[k]); - if (compare == 0) { // given == vec[k] - *edge_ptr = &(vec[k]); - *edge_index = k; - return true; - } else if (compare == 1) { // given > vec[k] - start = k + 1; - } else { // given < vec[k] - end = k - 1; - } - } - } else { // linear search - for (int i = 0; i < vec_size; ++i) { - EDGE_RECORD &edge_rec = vec[i]; - if (edge_rec_match(next_node, word_end, unichar_id, - next_node_from_edge_rec(edge_rec), - end_of_word_from_edge_rec(edge_rec), - unichar_id_from_edge_rec(edge_rec))) { - *edge_ptr = &(edge_rec); - *edge_index = i; - return true; - } - } - } - return false; // not found -} - -bool Trie::add_edge_linkage(NODE_REF node1, NODE_REF node2, bool marker_flag, - int direction, bool word_end, - UNICHAR_ID unichar_id) { - EDGE_VECTOR *vec = (direction == FORWARD_EDGE) ? - &(nodes_[node1]->forward_edges) : &(nodes_[node1]->backward_edges); - int search_index; - if (node1 == 0 && direction == FORWARD_EDGE) { - search_index = 0; // find the index to make the add sorted - while (search_index < vec->size() && - given_greater_than_edge_rec(node2, word_end, unichar_id, - (*vec)[search_index]) == 1) { - search_index++; - } - } else { - search_index = vec->size(); // add is unsorted, so index does not matter - } - EDGE_RECORD edge_rec; - link_edge(&edge_rec, node2, marker_flag, direction, word_end, unichar_id); - if (node1 == 0 && direction == BACKWARD_EDGE && - !root_back_freelist_.empty()) { - EDGE_INDEX edge_index = root_back_freelist_.pop_back(); - (*vec)[edge_index] = edge_rec; - } else if (search_index < vec->size()) { - vec->insert(edge_rec, search_index); - } else { - vec->push_back(edge_rec); - } - if (debug_level_ > 1) { - tprintf("new edge in nodes_[" REFFORMAT "]: ", node1); - print_edge_rec(edge_rec); - tprintf("\n"); - } - num_edges_++; - return true; -} - -void Trie::add_word_ending(EDGE_RECORD *edge_ptr, - NODE_REF the_next_node, - bool marker_flag, - UNICHAR_ID unichar_id) { - EDGE_RECORD *back_edge_ptr; - EDGE_INDEX back_edge_index; - ASSERT_HOST(edge_char_of(the_next_node, NO_EDGE, BACKWARD_EDGE, false, - unichar_id, &back_edge_ptr, &back_edge_index)); - if (marker_flag) { - *back_edge_ptr |= (MARKER_FLAG << flag_start_bit_); - *edge_ptr |= (MARKER_FLAG << flag_start_bit_); - } - // Mark both directions as end of word. - *back_edge_ptr |= (WERD_END_FLAG << flag_start_bit_); - *edge_ptr |= (WERD_END_FLAG << flag_start_bit_); -} - -bool Trie::add_word_to_dawg(const WERD_CHOICE &word, - const GenericVector *repetitions) { - if (word.length() <= 0) return false; // can't add empty words - if (repetitions != nullptr) ASSERT_HOST(repetitions->size() == word.length()); - // Make sure the word does not contain invalid unchar ids. - for (int i = 0; i < word.length(); ++i) { - if (word.unichar_id(i) < 0 || - word.unichar_id(i) >= unicharset_size_) return false; - } - - EDGE_RECORD *edge_ptr; - NODE_REF last_node = 0; - NODE_REF the_next_node; - bool marker_flag = false; - EDGE_INDEX edge_index; - int i; - int32_t still_finding_chars = true; - int32_t word_end = false; - bool add_failed = false; - bool found; - - if (debug_level_ > 1) word.print("\nAdding word: "); - - UNICHAR_ID unichar_id; - for (i = 0; i < word.length() - 1; ++i) { - unichar_id = word.unichar_id(i); - marker_flag = (repetitions != nullptr) ? (*repetitions)[i] : false; - if (debug_level_ > 1) tprintf("Adding letter %d\n", unichar_id); - if (still_finding_chars) { - found = edge_char_of(last_node, NO_EDGE, FORWARD_EDGE, word_end, - unichar_id, &edge_ptr, &edge_index); - if (found && debug_level_ > 1) { - tprintf("exploring edge " REFFORMAT " in node " REFFORMAT "\n", - edge_index, last_node); - } - if (!found) { - still_finding_chars = false; - } else if (next_node_from_edge_rec(*edge_ptr) == 0) { - // We hit the end of an existing word, but the new word is longer. - // In this case we have to disconnect the existing word from the - // backwards root node, mark the current position as end-of-word - // and add new nodes for the increased length. Disconnecting the - // existing word from the backwards root node requires a linear - // search, so it is much faster to add the longest words first, - // to avoid having to come here. - word_end = true; - still_finding_chars = false; - remove_edge(last_node, 0, word_end, unichar_id); - } else { - // We have to add a new branch here for the new word. - if (marker_flag) set_marker_flag_in_edge_rec(edge_ptr); - last_node = next_node_from_edge_rec(*edge_ptr); - } - } - if (!still_finding_chars) { - the_next_node = new_dawg_node(); - if (debug_level_ > 1) - tprintf("adding node " REFFORMAT "\n", the_next_node); - if (the_next_node == 0) { - add_failed = true; - break; - } - if (!add_new_edge(last_node, the_next_node, - marker_flag, word_end, unichar_id)) { - add_failed = true; - break; - } - word_end = false; - last_node = the_next_node; - } - } - the_next_node = 0; - unichar_id = word.unichar_id(i); - marker_flag = (repetitions != nullptr) ? (*repetitions)[i] : false; - if (debug_level_ > 1) tprintf("Adding letter %d\n", unichar_id); - if (still_finding_chars && - edge_char_of(last_node, NO_EDGE, FORWARD_EDGE, false, - unichar_id, &edge_ptr, &edge_index)) { - // An extension of this word already exists in the trie, so we - // only have to add the ending flags in both directions. - add_word_ending(edge_ptr, next_node_from_edge_rec(*edge_ptr), - marker_flag, unichar_id); - } else { - // Add a link to node 0. All leaves connect to node 0 so the back links can - // be used in reduction to a dawg. This root backward node has one edge - // entry for every word, (except prefixes of longer words) so it is huge. - if (!add_failed && - !add_new_edge(last_node, the_next_node, marker_flag, true, unichar_id)) - add_failed = true; - } - if (add_failed) { - tprintf("Re-initializing document dictionary...\n"); - clear(); - return false; - } else { - return true; - } -} - -NODE_REF Trie::new_dawg_node() { - TRIE_NODE_RECORD *node = new TRIE_NODE_RECORD(); - nodes_.push_back(node); - return nodes_.length() - 1; -} - -// Sort function to sort words by decreasing order of length. -static int sort_strings_by_dec_length(const void* v1, const void* v2) { - const STRING *s1 = static_cast(v1); - const STRING *s2 = static_cast(v2); - return s2->length() - s1->length(); -} - -bool Trie::read_and_add_word_list(const char *filename, - const UNICHARSET &unicharset, - Trie::RTLReversePolicy reverse_policy) { - GenericVector word_list; - if (!read_word_list(filename, &word_list)) return false; - word_list.sort(sort_strings_by_dec_length); - return add_word_list(word_list, unicharset, reverse_policy); -} - -bool Trie::read_word_list(const char *filename, - GenericVector* words) { - FILE *word_file; - char line_str[CHARS_PER_LINE]; - int word_count = 0; - - word_file = fopen(filename, "rb"); - if (word_file == nullptr) return false; - - while (fgets(line_str, sizeof(line_str), word_file) != nullptr) { - chomp_string(line_str); // remove newline - STRING word_str(line_str); - ++word_count; - if (debug_level_ && word_count % 10000 == 0) - tprintf("Read %d words so far\n", word_count); - words->push_back(word_str); - } - if (debug_level_) - tprintf("Read %d words total.\n", word_count); - fclose(word_file); - return true; -} - -bool Trie::add_word_list(const GenericVector &words, - const UNICHARSET &unicharset, - Trie::RTLReversePolicy reverse_policy) { - for (int i = 0; i < words.size(); ++i) { - WERD_CHOICE word(words[i].string(), unicharset); - if (word.length() == 0 || word.contains_unichar_id(INVALID_UNICHAR_ID)) - continue; - if ((reverse_policy == RRP_REVERSE_IF_HAS_RTL && - word.has_rtl_unichar_id()) || - reverse_policy == RRP_FORCE_REVERSE) { - word.reverse_and_mirror_unichar_ids(); - } - if (!word_in_dawg(word)) { - add_word_to_dawg(word); - if (!word_in_dawg(word)) { - tprintf("Error: word '%s' not in DAWG after adding it\n", - words[i].string()); - return false; - } - } - } - return true; -} - -void Trie::initialize_patterns(UNICHARSET *unicharset) { - unicharset->unichar_insert(kAlphaPatternUnicode); - alpha_pattern_ = unicharset->unichar_to_id(kAlphaPatternUnicode); - unicharset->unichar_insert(kDigitPatternUnicode); - digit_pattern_ = unicharset->unichar_to_id(kDigitPatternUnicode); - unicharset->unichar_insert(kAlphanumPatternUnicode); - alphanum_pattern_ = unicharset->unichar_to_id(kAlphanumPatternUnicode); - unicharset->unichar_insert(kPuncPatternUnicode); - punc_pattern_ = unicharset->unichar_to_id(kPuncPatternUnicode); - unicharset->unichar_insert(kLowerPatternUnicode); - lower_pattern_ = unicharset->unichar_to_id(kLowerPatternUnicode); - unicharset->unichar_insert(kUpperPatternUnicode); - upper_pattern_ = unicharset->unichar_to_id(kUpperPatternUnicode); - initialized_patterns_ = true; - unicharset_size_ = unicharset->size(); -} - -void Trie::unichar_id_to_patterns(UNICHAR_ID unichar_id, - const UNICHARSET &unicharset, - GenericVector *vec) const { - bool is_alpha = unicharset.get_isalpha(unichar_id); - if (is_alpha) { - vec->push_back(alpha_pattern_); - vec->push_back(alphanum_pattern_); - if (unicharset.get_islower(unichar_id)) { - vec->push_back(lower_pattern_); - } else if (unicharset.get_isupper(unichar_id)) { - vec->push_back(upper_pattern_); - } - } - if (unicharset.get_isdigit(unichar_id)) { - vec->push_back(digit_pattern_); - if (!is_alpha) vec->push_back(alphanum_pattern_); - } - if (unicharset.get_ispunctuation(unichar_id)) { - vec->push_back(punc_pattern_); - } -} - -UNICHAR_ID Trie::character_class_to_pattern(char ch) { - if (ch == 'c') { - return alpha_pattern_; - } else if (ch == 'd') { - return digit_pattern_; - } else if (ch == 'n') { - return alphanum_pattern_; - } else if (ch == 'p') { - return punc_pattern_; - } else if (ch == 'a') { - return lower_pattern_; - } else if (ch == 'A') { - return upper_pattern_; - } else { - return INVALID_UNICHAR_ID; - } -} - -bool Trie::read_pattern_list(const char *filename, - const UNICHARSET &unicharset) { - if (!initialized_patterns_) { - tprintf("please call initialize_patterns() before read_pattern_list()\n"); - return false; - } - - FILE *pattern_file = fopen(filename, "rb"); - if (pattern_file == nullptr) { - tprintf("Error opening pattern file %s\n", filename); - return false; - } - - int pattern_count = 0; - char string[CHARS_PER_LINE]; - while (fgets(string, CHARS_PER_LINE, pattern_file) != nullptr) { - chomp_string(string); // remove newline - // Parse the pattern and construct a unichar id vector. - // Record the number of repetitions of each unichar in the parallel vector. - WERD_CHOICE word(&unicharset); - GenericVector repetitions_vec; - const char *str_ptr = string; - int step = unicharset.step(str_ptr); - bool failed = false; - while (step > 0) { - UNICHAR_ID curr_unichar_id = INVALID_UNICHAR_ID; - if (step == 1 && *str_ptr == '\\') { - ++str_ptr; - if (*str_ptr == '\\') { // regular '\' unichar that was escaped - curr_unichar_id = unicharset.unichar_to_id(str_ptr, step); - } else { - if (word.length() < kSaneNumConcreteChars) { - tprintf("Please provide at least %d concrete characters at the" - " beginning of the pattern\n", kSaneNumConcreteChars); - failed = true; - break; - } - // Parse character class from expression. - curr_unichar_id = character_class_to_pattern(*str_ptr); - } - } else { - curr_unichar_id = unicharset.unichar_to_id(str_ptr, step); - } - if (curr_unichar_id == INVALID_UNICHAR_ID) { - failed = true; - break; // failed to parse this pattern - } - word.append_unichar_id(curr_unichar_id, 1, 0.0, 0.0); - repetitions_vec.push_back(false); - str_ptr += step; - step = unicharset.step(str_ptr); - // Check if there is a repetition pattern specified after this unichar. - if (step == 1 && *str_ptr == '\\' && *(str_ptr+1) == '*') { - repetitions_vec[repetitions_vec.size()-1] = true; - str_ptr += 2; - step = unicharset.step(str_ptr); - } - } - if (failed) { - tprintf("Invalid user pattern %s\n", string); - continue; - } - // Insert the pattern into the trie. - if (debug_level_ > 2) { - tprintf("Inserting expanded user pattern %s\n", - word.debug_string().string()); - } - if (!this->word_in_dawg(word)) { - this->add_word_to_dawg(word, &repetitions_vec); - if (!this->word_in_dawg(word)) { - tprintf("Error: failed to insert pattern '%s'\n", string); - } - } - ++pattern_count; - } - if (debug_level_) { - tprintf("Read %d valid patterns from %s\n", pattern_count, filename); - } - fclose(pattern_file); - return true; -} - -void Trie::remove_edge_linkage(NODE_REF node1, NODE_REF node2, int direction, - bool word_end, UNICHAR_ID unichar_id) { - EDGE_RECORD *edge_ptr = nullptr; - EDGE_INDEX edge_index = 0; - ASSERT_HOST(edge_char_of(node1, node2, direction, word_end, - unichar_id, &edge_ptr, &edge_index)); - if (debug_level_ > 1) { - tprintf("removed edge in nodes_[" REFFORMAT "]: ", node1); - print_edge_rec(*edge_ptr); - tprintf("\n"); - } - if (direction == FORWARD_EDGE) { - nodes_[node1]->forward_edges.remove(edge_index); - } else if (node1 == 0) { - KillEdge(&nodes_[node1]->backward_edges[edge_index]); - root_back_freelist_.push_back(edge_index); - } else { - nodes_[node1]->backward_edges.remove(edge_index); - } - --num_edges_; -} - -// Some optimizations employed in add_word_to_dawg and trie_to_dawg: -// 1 Avoid insertion sorting or bubble sorting the tail root node -// (back links on node 0, a list of all the leaves.). The node is -// huge, and sorting it with n^2 time is terrible. -// 2 Avoid using GenericVector::remove on the tail root node. -// (a) During add of words to the trie, zero-out the unichars and -// keep a freelist of spaces to re-use. -// (b) During reduction, just zero-out the unichars of deleted back -// links, skipping zero entries while searching. -// 3 Avoid linear search of the tail root node. This has to be done when -// a suffix is added to an existing word. Adding words by decreasing -// length avoids this problem entirely. Words can still be added in -// any order, but it is faster to add the longest first. -SquishedDawg *Trie::trie_to_dawg() { - root_back_freelist_.clear(); // Will be invalided by trie_to_dawg. - if (debug_level_ > 2) { - print_all("Before reduction:", MAX_NODE_EDGES_DISPLAY); - } - NODE_MARKER reduced_nodes = new bool[nodes_.size()]; - for (int i = 0; i < nodes_.size(); i++) reduced_nodes[i] = 0; - this->reduce_node_input(0, reduced_nodes); - delete[] reduced_nodes; - - if (debug_level_ > 2) { - print_all("After reduction:", MAX_NODE_EDGES_DISPLAY); - } - // Build a translation map from node indices in nodes_ vector to - // their target indices in EDGE_ARRAY. - NODE_REF *node_ref_map = new NODE_REF[nodes_.size() + 1]; - int i, j; - node_ref_map[0] = 0; - for (i = 0; i < nodes_.size(); ++i) { - node_ref_map[i+1] = node_ref_map[i] + nodes_[i]->forward_edges.size(); - } - int num_forward_edges = node_ref_map[i]; - - // Convert nodes_ vector into EDGE_ARRAY translating the next node references - // in edges using node_ref_map. Empty nodes and backward edges are dropped. - EDGE_ARRAY edge_array = new EDGE_RECORD[num_forward_edges]; - EDGE_ARRAY edge_array_ptr = edge_array; - for (i = 0; i < nodes_.size(); ++i) { - TRIE_NODE_RECORD *node_ptr = nodes_[i]; - int end = node_ptr->forward_edges.size(); - for (j = 0; j < end; ++j) { - EDGE_RECORD &edge_rec = node_ptr->forward_edges[j]; - NODE_REF node_ref = next_node_from_edge_rec(edge_rec); - ASSERT_HOST(node_ref < nodes_.size()); - UNICHAR_ID unichar_id = unichar_id_from_edge_rec(edge_rec); - link_edge(edge_array_ptr, node_ref_map[node_ref], false, FORWARD_EDGE, - end_of_word_from_edge_rec(edge_rec), unichar_id); - if (j == end - 1) set_marker_flag_in_edge_rec(edge_array_ptr); - ++edge_array_ptr; - } - } - delete[] node_ref_map; - - return new SquishedDawg(edge_array, num_forward_edges, type_, lang_, - perm_, unicharset_size_, debug_level_); -} - -bool Trie::eliminate_redundant_edges(NODE_REF node, - const EDGE_RECORD &edge1, - const EDGE_RECORD &edge2) { - if (debug_level_ > 1) { - tprintf("\nCollapsing node %" PRIi64 ":\n", node); - print_node(node, MAX_NODE_EDGES_DISPLAY); - tprintf("Candidate edges: "); - print_edge_rec(edge1); - tprintf(", "); - print_edge_rec(edge2); - tprintf("\n\n"); - } - NODE_REF next_node1 = next_node_from_edge_rec(edge1); - NODE_REF next_node2 = next_node_from_edge_rec(edge2); - TRIE_NODE_RECORD *next_node2_ptr = nodes_[next_node2]; - // Translate all edges going to/from next_node2 to go to/from next_node1. - EDGE_RECORD *edge_ptr = nullptr; - EDGE_INDEX edge_index; - int i; - // The backward link in node to next_node2 will be zeroed out by the caller. - // Copy all the backward links in next_node2 to node next_node1 - for (i = 0; i < next_node2_ptr->backward_edges.size(); ++i) { - const EDGE_RECORD &bkw_edge = next_node2_ptr->backward_edges[i]; - NODE_REF curr_next_node = next_node_from_edge_rec(bkw_edge); - UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(bkw_edge); - int curr_word_end = end_of_word_from_edge_rec(bkw_edge); - bool marker_flag = marker_flag_from_edge_rec(bkw_edge); - add_edge_linkage(next_node1, curr_next_node, marker_flag, BACKWARD_EDGE, - curr_word_end, curr_unichar_id); - // Relocate the corresponding forward edge in curr_next_node - ASSERT_HOST(edge_char_of(curr_next_node, next_node2, FORWARD_EDGE, - curr_word_end, curr_unichar_id, - &edge_ptr, &edge_index)); - set_next_node_in_edge_rec(edge_ptr, next_node1); - } - int next_node2_num_edges = (next_node2_ptr->forward_edges.size() + - next_node2_ptr->backward_edges.size()); - if (debug_level_ > 1) { - tprintf("removed %d edges from node " REFFORMAT "\n", - next_node2_num_edges, next_node2); - } - next_node2_ptr->forward_edges.clear(); - next_node2_ptr->backward_edges.clear(); - num_edges_ -= next_node2_num_edges; - return true; -} - -bool Trie::reduce_lettered_edges(EDGE_INDEX edge_index, - UNICHAR_ID unichar_id, - NODE_REF node, - EDGE_VECTOR* backward_edges, - NODE_MARKER reduced_nodes) { - if (debug_level_ > 1) - tprintf("reduce_lettered_edges(edge=" REFFORMAT ")\n", edge_index); - // Compare each of the edge pairs with the given unichar_id. - bool did_something = false; - for (int i = edge_index; i < backward_edges->size() - 1; ++i) { - // Find the first edge that can be eliminated. - UNICHAR_ID curr_unichar_id = INVALID_UNICHAR_ID; - while (i < backward_edges->size()) { - if (!DeadEdge((*backward_edges)[i])) { - curr_unichar_id = unichar_id_from_edge_rec((*backward_edges)[i]); - if (curr_unichar_id != unichar_id) return did_something; - if (can_be_eliminated((*backward_edges)[i])) break; - } - ++i; - } - if (i == backward_edges->size()) break; - const EDGE_RECORD &edge_rec = (*backward_edges)[i]; - // Compare it to the rest of the edges with the given unichar_id. - for (int j = i + 1; j < backward_edges->size(); ++j) { - const EDGE_RECORD &next_edge_rec = (*backward_edges)[j]; - if (DeadEdge(next_edge_rec)) continue; - UNICHAR_ID next_id = unichar_id_from_edge_rec(next_edge_rec); - if (next_id != unichar_id) break; - if (end_of_word_from_edge_rec(next_edge_rec) == - end_of_word_from_edge_rec(edge_rec) && - can_be_eliminated(next_edge_rec) && - eliminate_redundant_edges(node, edge_rec, next_edge_rec)) { - reduced_nodes[next_node_from_edge_rec(edge_rec)] = 0; - did_something = true; - KillEdge(&(*backward_edges)[j]); - } - } - } - return did_something; -} - -void Trie::sort_edges(EDGE_VECTOR *edges) { - int num_edges = edges->size(); - if (num_edges <= 1) return; - GenericVector > sort_vec; - sort_vec.reserve(num_edges); - for (int i = 0; i < num_edges; ++i) { - sort_vec.push_back(KDPairInc( - unichar_id_from_edge_rec((*edges)[i]), (*edges)[i])); - } - sort_vec.sort(); - for (int i = 0; i < num_edges; ++i) - (*edges)[i] = sort_vec[i].data; -} - -void Trie::reduce_node_input(NODE_REF node, - NODE_MARKER reduced_nodes) { - EDGE_VECTOR &backward_edges = nodes_[node]->backward_edges; - sort_edges(&backward_edges); - if (debug_level_ > 1) { - tprintf("reduce_node_input(node=" REFFORMAT ")\n", node); - print_node(node, MAX_NODE_EDGES_DISPLAY); - } - - EDGE_INDEX edge_index = 0; - while (edge_index < backward_edges.size()) { - if (DeadEdge(backward_edges[edge_index])) continue; - UNICHAR_ID unichar_id = - unichar_id_from_edge_rec(backward_edges[edge_index]); - while (reduce_lettered_edges(edge_index, unichar_id, node, - &backward_edges, reduced_nodes)); - while (++edge_index < backward_edges.size()) { - UNICHAR_ID id = unichar_id_from_edge_rec(backward_edges[edge_index]); - if (!DeadEdge(backward_edges[edge_index]) && id != unichar_id) break; - } - } - reduced_nodes[node] = true; // mark as reduced - - if (debug_level_ > 1) { - tprintf("Node " REFFORMAT " after reduction:\n", node); - print_node(node, MAX_NODE_EDGES_DISPLAY); - } - - for (int i = 0; i < backward_edges.size(); ++i) { - if (DeadEdge(backward_edges[i])) continue; - NODE_REF next_node = next_node_from_edge_rec(backward_edges[i]); - if (next_node != 0 && !reduced_nodes[next_node]) { - reduce_node_input(next_node, reduced_nodes); - } - } -} - -void Trie::print_node(NODE_REF node, int max_num_edges) const { - if (node == NO_EDGE) return; // nothing to print - TRIE_NODE_RECORD *node_ptr = nodes_[node]; - int num_fwd = node_ptr->forward_edges.size(); - int num_bkw = node_ptr->backward_edges.size(); - EDGE_VECTOR *vec; - for (int dir = 0; dir < 2; ++dir) { - if (dir == 0) { - vec = &(node_ptr->forward_edges); - tprintf(REFFORMAT " (%d %d): ", node, num_fwd, num_bkw); - } else { - vec = &(node_ptr->backward_edges); - tprintf("\t"); - } - int i; - for (i = 0; (dir == 0 ? i < num_fwd : i < num_bkw) && - i < max_num_edges; ++i) { - if (DeadEdge((*vec)[i])) continue; - print_edge_rec((*vec)[i]); - tprintf(" "); - } - if (dir == 0 ? i < num_fwd : i < num_bkw) tprintf("..."); - tprintf("\n"); - } -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/trie.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/trie.h deleted file mode 100644 index 23d35fa7..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/dict/trie.h +++ /dev/null @@ -1,436 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: trie.h (Formerly trie.h) - * Description: Functions to build a trie data structure. - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Fri Jul 26 11:26:34 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -#ifndef TRIE_H -#define TRIE_H - -#include "dawg.h" -#include "genericvector.h" - -class UNICHARSET; - -// Note: if we consider either NODE_REF or EDGE_INDEX to ever exceed -// max int32, we will need to change GenericVector to use int64 for size -// and address indices. This does not seem to be needed immediately, -// since currently the largest number of edges limit used by tesseract -// (kMaxNumEdges in wordlist2dawg.cpp) is far less than max int32. -// There are also int casts below to satisfy the WIN32 compiler that would -// need to be changed. -// It might be cleanest to change the types of most of the Trie/Dawg related -// typedefs to int and restrict the casts to extracting these values from -// the 64 bit EDGE_RECORD. -using EDGE_INDEX = int64_t ; // index of an edge in a given node -using NODE_MARKER = bool *; -using EDGE_VECTOR = GenericVector ; - -struct TRIE_NODE_RECORD { - EDGE_VECTOR forward_edges; - EDGE_VECTOR backward_edges; -}; -using TRIE_NODES = GenericVector ; - -namespace tesseract { - -/** - * Concrete class for Trie data structure that allows to store a list of - * words (extends Dawg base class) as well as dynamically add new words. - * This class stores a vector of pointers to TRIE_NODE_RECORDs, each of - * which has a vector of forward and backward edges. - */ -class Trie : public Dawg { - public: - enum RTLReversePolicy { - RRP_DO_NO_REVERSE, - RRP_REVERSE_IF_HAS_RTL, - RRP_FORCE_REVERSE, - }; - - // Minimum number of concrete characters at the beginning of user patterns. - static const int kSaneNumConcreteChars = 0; - // Various unicode whitespace characters are used to denote unichar patterns, - // (character classifier would never produce these whitespace characters as a - // valid classification). - static const char kAlphaPatternUnicode[]; - static const char kDigitPatternUnicode[]; - static const char kAlphanumPatternUnicode[]; - static const char kPuncPatternUnicode[]; - static const char kLowerPatternUnicode[]; - static const char kUpperPatternUnicode[]; - - static const char *get_reverse_policy_name( - RTLReversePolicy reverse_policy); - - // max_num_edges argument allows limiting the amount of memory this - // Trie can consume (if a new word insert would cause the Trie to - // contain more edges than max_num_edges, all the edges are cleared - // so that new inserts can proceed). - Trie(DawgType type, const STRING &lang, PermuterType perm, - int unicharset_size, int debug_level) - : Dawg(type, lang, perm, debug_level) { - init(unicharset_size); - num_edges_ = 0; - deref_node_index_mask_ = ~letter_mask_; - new_dawg_node(); // need to allocate node 0 - initialized_patterns_ = false; - } - virtual ~Trie() { nodes_.delete_data_pointers(); } - - // Reset the Trie to empty. - void clear(); - - /** Returns the edge that corresponds to the letter out of this node. */ - EDGE_REF edge_char_of(NODE_REF node_ref, UNICHAR_ID unichar_id, - bool word_end) const { - EDGE_RECORD *edge_ptr; - EDGE_INDEX edge_index; - if (!edge_char_of(node_ref, NO_EDGE, FORWARD_EDGE, word_end, unichar_id, - &edge_ptr, &edge_index)) return NO_EDGE; - return make_edge_ref(node_ref, edge_index); - } - - /** - * Fills the given NodeChildVector with all the unichar ids (and the - * corresponding EDGE_REFs) for which there is an edge out of this node. - */ - void unichar_ids_of(NODE_REF node, NodeChildVector *vec, - bool word_end) const { - const EDGE_VECTOR &forward_edges = - nodes_[static_cast(node)]->forward_edges; - for (int i = 0; i < forward_edges.size(); ++i) { - if (!word_end || end_of_word_from_edge_rec(forward_edges[i])) { - vec->push_back(NodeChild(unichar_id_from_edge_rec(forward_edges[i]), - make_edge_ref(node, i))); - } - } - } - - /** - * Returns the next node visited by following the edge - * indicated by the given EDGE_REF. - */ - NODE_REF next_node(EDGE_REF edge_ref) const { - if (edge_ref == NO_EDGE || num_edges_ == 0) return NO_EDGE; - return next_node_from_edge_rec(*deref_edge_ref(edge_ref)); - } - - /** - * Returns true if the edge indicated by the given EDGE_REF - * marks the end of a word. - */ - bool end_of_word(EDGE_REF edge_ref) const { - if (edge_ref == NO_EDGE || num_edges_ == 0) return false; - return end_of_word_from_edge_rec(*deref_edge_ref(edge_ref)); - } - - /** Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF. */ - UNICHAR_ID edge_letter(EDGE_REF edge_ref) const { - if (edge_ref == NO_EDGE || num_edges_ == 0) return INVALID_UNICHAR_ID; - return unichar_id_from_edge_rec(*deref_edge_ref(edge_ref)); - } - // Sets the UNICHAR_ID in the given edge_rec to unicharset_size_, marking - // the edge dead. - void KillEdge(EDGE_RECORD* edge_rec) const { - *edge_rec &= ~letter_mask_; - *edge_rec |= (unicharset_size_ << LETTER_START_BIT); - } - bool DeadEdge(const EDGE_RECORD& edge_rec) const { - return unichar_id_from_edge_rec(edge_rec) == unicharset_size_; - } - - // Prints the contents of the node indicated by the given NODE_REF. - // At most max_num_edges will be printed. - void print_node(NODE_REF node, int max_num_edges) const; - - // Writes edges from nodes_ to an EDGE_ARRAY and creates a SquishedDawg. - // Eliminates redundant edges and returns the pointer to the SquishedDawg. - // Note: the caller is responsible for deallocating memory associated - // with the returned SquishedDawg pointer. - SquishedDawg *trie_to_dawg(); - - // Reads a list of words from the given file and adds into the Trie. - // Calls WERD_CHOICE::reverse_unichar_ids_if_rtl() according to the reverse - // policy and information in the unicharset. - // Returns false on error. - bool read_and_add_word_list(const char *filename, - const UNICHARSET &unicharset, - Trie::RTLReversePolicy reverse); - - // Reads a list of words from the given file. - // Returns false on error. - bool read_word_list(const char *filename, - GenericVector* words); - // Adds a list of words previously read using read_word_list to the trie - // using the given unicharset and reverse_policy to convert to unichar-ids. - // Returns false on error. - bool add_word_list(const GenericVector &words, - const UNICHARSET &unicharset, - Trie::RTLReversePolicy reverse_policy); - - // Inserts the list of patterns from the given file into the Trie. - // The pattern list file should contain one pattern per line in UTF-8 format. - // - // Each pattern can contain any non-whitespace characters, however only the - // patterns that contain characters from the unicharset of the corresponding - // language will be useful. - // The only meta character is '\'. To be used in a pattern as an ordinary - // string it should be escaped with '\' (e.g. string "C:\Documents" should - // be written in the patterns file as "C:\\Documents"). - // This function supports a very limited regular expression syntax. One can - // express a character, a certain character class and a number of times the - // entity should be repeated in the pattern. - // - // To denote a character class use one of: - // \c - unichar for which UNICHARSET::get_isalpha() is true (character) - // \d - unichar for which UNICHARSET::get_isdigit() is true - // \n - unichar for which UNICHARSET::get_isdigit() and - // UNICHARSET::isalpha() are true - // \p - unichar for which UNICHARSET::get_ispunct() is true - // \a - unichar for which UNICHARSET::get_islower() is true - // \A - unichar for which UNICHARSET::get_isupper() is true - // - // \* could be specified after each character or pattern to indicate that - // the character/pattern can be repeated any number of times before the next - // character/pattern occurs. - // - // Examples: - // 1-8\d\d-GOOG-411 will be expanded to strings: - // 1-800-GOOG-411, 1-801-GOOG-411, ... 1-899-GOOG-411. - // - // http://www.\n\*.com will be expanded to strings like: - // http://www.a.com http://www.a123.com ... http://www.ABCDefgHIJKLMNop.com - // - // Note: In choosing which patterns to include please be aware of the fact - // providing very generic patterns will make tesseract run slower. - // For example \n\* at the beginning of the pattern will make Tesseract - // consider all the combinations of proposed character choices for each - // of the segmentations, which will be unacceptably slow. - // Because of potential problems with speed that could be difficult to - // identify, each user pattern has to have at least kSaneNumConcreteChars - // concrete characters from the unicharset at the beginning. - bool read_pattern_list(const char *filename, const UNICHARSET &unicharset); - - // Initializes the values of *_pattern_ unichar ids. - // This function should be called before calling read_pattern_list(). - void initialize_patterns(UNICHARSET *unicharset); - - // Fills in the given unichar id vector with the unichar ids that represent - // the patterns of the character classes of the given unichar_id. - void unichar_id_to_patterns(UNICHAR_ID unichar_id, - const UNICHARSET &unicharset, - GenericVector *vec) const; - - // Returns the given EDGE_REF if the EDGE_RECORD that it points to has - // a self loop and the given unichar_id matches the unichar_id stored in the - // EDGE_RECORD, returns NO_EDGE otherwise. - virtual EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, - UNICHAR_ID unichar_id, - bool word_end) const { - if (edge_ref == NO_EDGE) return NO_EDGE; - EDGE_RECORD *edge_rec = deref_edge_ref(edge_ref); - return (marker_flag_from_edge_rec(*edge_rec) && - unichar_id == unichar_id_from_edge_rec(*edge_rec) && - word_end == end_of_word_from_edge_rec(*edge_rec)) ? - edge_ref : NO_EDGE; - } - - // Adds a word to the Trie (creates the necessary nodes and edges). - // - // If repetitions vector is not nullptr, each entry in the vector indicates - // whether the unichar id with the corresponding index in the word is allowed - // to repeat an unlimited number of times. For each entry that is true, MARKER - // flag of the corresponding edge created for this unichar id is set to true). - // - // Return true if add succeeded, false otherwise (e.g. when a word contained - // an invalid unichar id or the trie was getting too large and was cleared). - bool add_word_to_dawg(const WERD_CHOICE &word, - const GenericVector *repetitions); - bool add_word_to_dawg(const WERD_CHOICE &word) { - return add_word_to_dawg(word, nullptr); - } - - protected: - // The structure of an EDGE_REF for Trie edges is as follows: - // [LETTER_START_BIT, flag_start_bit_): - // edge index in *_edges in a TRIE_NODE_RECORD - // [flag_start_bit, 30th bit]: node index in nodes (TRIE_NODES vector) - // - // With this arrangement there are enough bits to represent edge indices - // (each node can have at most unicharset_size_ forward edges and - // the position of flag_start_bit is set to be log2(unicharset_size_)). - // It is also possible to accommodate a maximum number of nodes that is at - // least as large as that of the SquishedDawg representation (in SquishedDawg - // each EDGE_RECORD has 32-(flag_start_bit+NUM_FLAG_BITS) bits to represent - // the next node index). - // - - // Returns the pointer to EDGE_RECORD after decoding the location - // of the edge from the information in the given EDGE_REF. - // This function assumes that EDGE_REF holds valid node/edge indices. - inline EDGE_RECORD *deref_edge_ref(EDGE_REF edge_ref) const { - int edge_index = static_cast( - (edge_ref & letter_mask_) >> LETTER_START_BIT); - int node_index = static_cast( - (edge_ref & deref_node_index_mask_) >> flag_start_bit_); - TRIE_NODE_RECORD *node_rec = nodes_[node_index]; - return &(node_rec->forward_edges[edge_index]); - } - /** Constructs EDGE_REF from the given node_index and edge_index. */ - inline EDGE_REF make_edge_ref(NODE_REF node_index, - EDGE_INDEX edge_index) const { - return ((node_index << flag_start_bit_) | - (edge_index << LETTER_START_BIT)); - } - /** Sets up this edge record to the requested values. */ - inline void link_edge(EDGE_RECORD *edge, NODE_REF nxt, bool repeats, - int direction, bool word_end, UNICHAR_ID unichar_id) { - EDGE_RECORD flags = 0; - if (repeats) flags |= MARKER_FLAG; - if (word_end) flags |= WERD_END_FLAG; - if (direction == BACKWARD_EDGE) flags |= DIRECTION_FLAG; - *edge = ((nxt << next_node_start_bit_) | - (static_cast(flags) << flag_start_bit_) | - (static_cast(unichar_id) << LETTER_START_BIT)); - } - /** Prints the given EDGE_RECORD. */ - inline void print_edge_rec(const EDGE_RECORD &edge_rec) const { - tprintf("|" REFFORMAT "|%s%s%s|%d|", next_node_from_edge_rec(edge_rec), - marker_flag_from_edge_rec(edge_rec) ? "R," : "", - (direction_from_edge_rec(edge_rec) == FORWARD_EDGE) ? "F" : "B", - end_of_word_from_edge_rec(edge_rec) ? ",E" : "", - unichar_id_from_edge_rec(edge_rec)); - } - // Returns true if the next node in recorded the given EDGE_RECORD - // has exactly one forward edge. - inline bool can_be_eliminated(const EDGE_RECORD &edge_rec) { - NODE_REF node_ref = next_node_from_edge_rec(edge_rec); - return (node_ref != NO_EDGE && - nodes_[static_cast(node_ref)]->forward_edges.size() == 1); - } - - // Prints the contents of the Trie. - // At most max_num_edges will be printed for each node. - void print_all(const char* msg, int max_num_edges) { - tprintf("\n__________________________\n%s\n", msg); - for (int i = 0; i < nodes_.size(); ++i) print_node(i, max_num_edges); - tprintf("__________________________\n"); - } - - // Finds the edge with the given direction, word_end and unichar_id - // in the node indicated by node_ref. Fills in the pointer to the - // EDGE_RECORD and the index of the edge with the the values - // corresponding to the edge found. Returns true if an edge was found. - bool edge_char_of(NODE_REF node_ref, NODE_REF next_node, - int direction, bool word_end, UNICHAR_ID unichar_id, - EDGE_RECORD **edge_ptr, EDGE_INDEX *edge_index) const; - - // Adds an single edge linkage between node1 and node2 in the direction - // indicated by direction argument. - bool add_edge_linkage(NODE_REF node1, NODE_REF node2, bool repeats, - int direction, bool word_end, - UNICHAR_ID unichar_id); - - // Adds forward edge linkage from node1 to node2 and the corresponding - // backward edge linkage in the other direction. - bool add_new_edge(NODE_REF node1, NODE_REF node2, - bool repeats, bool word_end, UNICHAR_ID unichar_id) { - return (add_edge_linkage(node1, node2, repeats, FORWARD_EDGE, - word_end, unichar_id) && - add_edge_linkage(node2, node1, repeats, BACKWARD_EDGE, - word_end, unichar_id)); - } - - // Sets the word ending flags in an already existing edge pair. - // Returns true on success. - void add_word_ending(EDGE_RECORD *edge, - NODE_REF the_next_node, - bool repeats, - UNICHAR_ID unichar_id); - - // Allocates space for a new node in the Trie. - NODE_REF new_dawg_node(); - - // Removes a single edge linkage to between node1 and node2 in the - // direction indicated by direction argument. - void remove_edge_linkage(NODE_REF node1, NODE_REF node2, int direction, - bool word_end, UNICHAR_ID unichar_id); - - // Removes forward edge linkage from node1 to node2 and the corresponding - // backward edge linkage in the other direction. - void remove_edge(NODE_REF node1, NODE_REF node2, - bool word_end, UNICHAR_ID unichar_id) { - remove_edge_linkage(node1, node2, FORWARD_EDGE, word_end, unichar_id); - remove_edge_linkage(node2, node1, BACKWARD_EDGE, word_end, unichar_id); - } - - // Compares edge1 and edge2 in the given node to see if they point to two - // next nodes that could be collapsed. If they do, performs the reduction - // and returns true. - bool eliminate_redundant_edges(NODE_REF node, const EDGE_RECORD &edge1, - const EDGE_RECORD &edge2); - - // Assuming that edge_index indicates the first edge in a group of edges - // in this node with a particular letter value, looks through these edges - // to see if any of them can be collapsed. If so does it. Returns to the - // caller when all edges with this letter have been reduced. - // Returns true if further reduction is possible with this same letter. - bool reduce_lettered_edges(EDGE_INDEX edge_index, - UNICHAR_ID unichar_id, - NODE_REF node, - EDGE_VECTOR* backward_edges, - NODE_MARKER reduced_nodes); - - /** - * Order num_edges of consecutive EDGE_RECORDS in the given EDGE_VECTOR in - * increasing order of unichar ids. This function is normally called - * for all edges in a single node, and since number of edges in each node - * is usually quite small, selection sort is used. - */ - void sort_edges(EDGE_VECTOR *edges); - - /** Eliminates any redundant edges from this node in the Trie. */ - void reduce_node_input(NODE_REF node, NODE_MARKER reduced_nodes); - - // Returns the pattern unichar id for the given character class code. - UNICHAR_ID character_class_to_pattern(char ch); - - // Member variables - TRIE_NODES nodes_; // vector of nodes in the Trie - uint64_t num_edges_; // sum of all edges (forward and backward) - uint64_t deref_direction_mask_; // mask for EDGE_REF to extract direction - uint64_t deref_node_index_mask_; // mask for EDGE_REF to extract node index - // Freelist of edges in the root backwards node that were previously zeroed. - GenericVector root_back_freelist_; - // Variables for translating character class codes denoted in user patterns - // file to the unichar ids used to represent them in a Trie. - bool initialized_patterns_; - UNICHAR_ID alpha_pattern_; - UNICHAR_ID digit_pattern_; - UNICHAR_ID alphanum_pattern_; - UNICHAR_ID punc_pattern_; - UNICHAR_ID lower_pattern_; - UNICHAR_ID upper_pattern_; -}; -} // namespace tesseract - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/Makefile.am b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/Makefile.am deleted file mode 100644 index 5a677876..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/Makefile.am +++ /dev/null @@ -1,40 +0,0 @@ -AM_CPPFLAGS += \ - -I$(top_srcdir)/src/ccutil \ - -I$(top_srcdir)/src/cutil \ - -I$(top_srcdir)/src/ccstruct \ - -I$(top_srcdir)/src/arch \ - -I$(top_srcdir)/src/viewer \ - -I$(top_srcdir)/src/classify \ - -I$(top_srcdir)/src/dict \ - -I$(top_srcdir)/src/lstm - -SUBDIRS = -AM_CXXFLAGS = $(OPENMP_CXXFLAGS) - -if !NO_TESSDATA_PREFIX -AM_CXXFLAGS += -DTESSDATA_PREFIX=@datadir@ -endif - -if VISIBILITY -AM_CXXFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden -AM_CPPFLAGS += -DTESS_EXPORTS -endif - -pkginclude_HEADERS = - -noinst_HEADERS = convolve.h ctc.h -noinst_HEADERS += fullyconnected.h functions.h input.h -noinst_HEADERS += lstm.h lstmrecognizer.h lstmtrainer.h maxpool.h -noinst_HEADERS += network.h networkbuilder.h networkio.h networkscratch.h -noinst_HEADERS += parallel.h plumbing.h recodebeam.h reconfig.h reversed.h -noinst_HEADERS += series.h static_shape.h stridemap.h -noinst_HEADERS += tfnetwork.h weightmatrix.h - -noinst_LTLIBRARIES = libtesseract_lstm.la - -libtesseract_lstm_la_SOURCES = \ - convolve.cpp ctc.cpp fullyconnected.cpp functions.cpp input.cpp \ - lstm.cpp lstmrecognizer.cpp lstmtrainer.cpp maxpool.cpp \ - networkbuilder.cpp network.cpp networkio.cpp \ - parallel.cpp plumbing.cpp recodebeam.cpp reconfig.cpp reversed.cpp \ - series.cpp stridemap.cpp tfnetwork.cpp weightmatrix.cpp diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/convolve.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/convolve.cpp deleted file mode 100644 index 32518017..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/convolve.cpp +++ /dev/null @@ -1,115 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: convolve.cpp -// Description: Convolutional layer that stacks the inputs over its rectangle -// and pulls in random data to fill out-of-input inputs. -// Output is therefore same size as its input, but deeper. -// Author: Ray Smith -// Created: Tue Mar 18 16:56:06 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "convolve.h" - -#include "networkscratch.h" -#include "serialis.h" - -namespace tesseract { - -Convolve::Convolve(const STRING& name, int ni, int half_x, int half_y) - : Network(NT_CONVOLVE, name, ni, ni * (2*half_x + 1) * (2*half_y + 1)), - half_x_(half_x), half_y_(half_y) { -} - -// Writes to the given file. Returns false in case of error. -bool Convolve::Serialize(TFile* fp) const { - return Network::Serialize(fp) && - fp->Serialize(&half_x_) && - fp->Serialize(&half_y_); -} - -// Reads from the given file. Returns false in case of error. -bool Convolve::DeSerialize(TFile* fp) { - if (!fp->DeSerialize(&half_x_)) return false; - if (!fp->DeSerialize(&half_y_)) return false; - no_ = ni_ * (2*half_x_ + 1) * (2*half_y_ + 1); - return true; -} - -// Runs forward propagation of activations on the input line. -// See NetworkCpp for a detailed discussion of the arguments. -void Convolve::Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) { - output->Resize(input, no_); - int y_scale = 2 * half_y_ + 1; - StrideMap::Index dest_index(output->stride_map()); - do { - // Stack x_scale groups of y_scale * ni_ inputs together. - int t = dest_index.t(); - int out_ix = 0; - for (int x = -half_x_; x <= half_x_; ++x, out_ix += y_scale * ni_) { - StrideMap::Index x_index(dest_index); - if (!x_index.AddOffset(x, FD_WIDTH)) { - // This x is outside the image. - output->Randomize(t, out_ix, y_scale * ni_, randomizer_); - } else { - int out_iy = out_ix; - for (int y = -half_y_; y <= half_y_; ++y, out_iy += ni_) { - StrideMap::Index y_index(x_index); - if (!y_index.AddOffset(y, FD_HEIGHT)) { - // This y is outside the image. - output->Randomize(t, out_iy, ni_, randomizer_); - } else { - output->CopyTimeStepGeneral(t, out_iy, ni_, input, y_index.t(), 0); - } - } - } - } - } while (dest_index.Increment()); - if (debug) DisplayForward(*output); -} - -// Runs backward propagation of errors on the deltas line. -// See NetworkCpp for a detailed discussion of the arguments. -bool Convolve::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { - back_deltas->Resize(fwd_deltas, ni_); - NetworkScratch::IO delta_sum; - delta_sum.ResizeFloat(fwd_deltas, ni_, scratch); - delta_sum->Zero(); - int y_scale = 2 * half_y_ + 1; - StrideMap::Index src_index(fwd_deltas.stride_map()); - do { - // Stack x_scale groups of y_scale * ni_ inputs together. - int t = src_index.t(); - int out_ix = 0; - for (int x = -half_x_; x <= half_x_; ++x, out_ix += y_scale * ni_) { - StrideMap::Index x_index(src_index); - if (x_index.AddOffset(x, FD_WIDTH)) { - int out_iy = out_ix; - for (int y = -half_y_; y <= half_y_; ++y, out_iy += ni_) { - StrideMap::Index y_index(x_index); - if (y_index.AddOffset(y, FD_HEIGHT)) { - fwd_deltas.AddTimeStepPart(t, out_iy, ni_, - delta_sum->f(y_index.t())); - } - } - } - } - } while (src_index.Increment()); - back_deltas->CopyAll(*delta_sum); - return true; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/convolve.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/convolve.h deleted file mode 100644 index fcf5ccf0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/convolve.h +++ /dev/null @@ -1,73 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: convolve.h -// Description: Convolutional layer that stacks the inputs over its rectangle -// and pulls in random data to fill out-of-input inputs. -// Output is therefore same size as its input, but deeper. -// Author: Ray Smith -// Created: Tue Mar 18 16:45:34 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_CONVOLVE_H_ -#define TESSERACT_LSTM_CONVOLVE_H_ - -#include "genericvector.h" -#include "matrix.h" -#include "network.h" - -namespace tesseract { - -// Makes each time-step deeper by stacking inputs over its rectangle. Does not -// affect the size of its input. Achieves this by bringing in random values in -// out-of-input areas. -class Convolve : public Network { - public: - // The area of convolution is 2*half_x + 1 by 2*half_y + 1, forcing it to - // always be odd, so the center is the current pixel. - Convolve(const STRING& name, int ni, int half_x, int half_y); - virtual ~Convolve() = default; - - STRING spec() const override { - STRING spec; - spec.add_str_int("C", half_x_ * 2 + 1); - spec.add_str_int(",", half_y_ * 2 + 1); - return spec; - } - - // Writes to the given file. Returns false in case of error. - bool Serialize(TFile* fp) const override; - // Reads from the given file. Returns false in case of error. - bool DeSerialize(TFile* fp) override; - - // Runs forward propagation of activations on the input line. - // See Network for a detailed discussion of the arguments. - void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) override; - - // Runs backward propagation of errors on the deltas line. - // See Network for a detailed discussion of the arguments. - bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) override; - - protected: - // Serialized data. - int32_t half_x_; - int32_t half_y_; -}; - -} // namespace tesseract. - - -#endif // TESSERACT_LSTM_SUBSAMPLE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/ctc.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/ctc.cpp deleted file mode 100644 index 53489dfe..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/ctc.cpp +++ /dev/null @@ -1,414 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: ctc.cpp -// Description: Slightly improved standard CTC to compute the targets. -// Author: Ray Smith -// Created: Wed Jul 13 15:50:06 PDT 2016 -// -// (C) Copyright 2016, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// -#include "ctc.h" - -#include -#include // for FLT_MAX -#include - -#include "genericvector.h" -#include "host.h" -#include "matrix.h" -#include "networkio.h" - -#include "network.h" -#include "scrollview.h" - -namespace tesseract { - -// Magic constants that keep CTC stable. -// Minimum probability limit for softmax input to ctc_loss. -const float CTC::kMinProb_ = 1e-12; -// Maximum absolute argument to exp(). -const double CTC::kMaxExpArg_ = 80.0; -// Minimum probability for total prob in time normalization. -const double CTC::kMinTotalTimeProb_ = 1e-8; -// Minimum probability for total prob in final normalization. -const double CTC::kMinTotalFinalProb_ = 1e-6; - -// Builds a target using CTC. Slightly improved as follows: -// Includes normalizations and clipping for stability. -// labels should be pre-padded with nulls everywhere. -// labels can be longer than the time sequence, but the total number of -// essential labels (non-null plus nulls between equal labels) must not exceed -// the number of timesteps in outputs. -// outputs is the output of the network, and should have already been -// normalized with NormalizeProbs. -// On return targets is filled with the computed targets. -// Returns false if there is insufficient time for the labels. -/* static */ -bool CTC::ComputeCTCTargets(const GenericVector& labels, int null_char, - const GENERIC_2D_ARRAY& outputs, - NetworkIO* targets) { - std::unique_ptr ctc(new CTC(labels, null_char, outputs)); - if (!ctc->ComputeLabelLimits()) { - return false; // Not enough time. - } - // Generate simple targets purely from the truth labels by spreading them - // evenly over time. - GENERIC_2D_ARRAY simple_targets; - ctc->ComputeSimpleTargets(&simple_targets); - // Add the simple targets as a starter bias to the network outputs. - float bias_fraction = ctc->CalculateBiasFraction(); - simple_targets *= bias_fraction; - ctc->outputs_ += simple_targets; - NormalizeProbs(&ctc->outputs_); - // Run regular CTC on the biased outputs. - // Run forward and backward - GENERIC_2D_ARRAY log_alphas, log_betas; - ctc->Forward(&log_alphas); - ctc->Backward(&log_betas); - // Normalize and come out of log space with a clipped softmax over time. - log_alphas += log_betas; - ctc->NormalizeSequence(&log_alphas); - ctc->LabelsToClasses(log_alphas, targets); - NormalizeProbs(targets); - return true; -} - -CTC::CTC(const GenericVector& labels, int null_char, - const GENERIC_2D_ARRAY& outputs) - : labels_(labels), outputs_(outputs), null_char_(null_char) { - num_timesteps_ = outputs.dim1(); - num_classes_ = outputs.dim2(); - num_labels_ = labels_.size(); -} - -// Computes vectors of min and max label index for each timestep, based on -// whether skippability of nulls makes it possible to complete a valid path. -bool CTC::ComputeLabelLimits() { - min_labels_.init_to_size(num_timesteps_, 0); - max_labels_.init_to_size(num_timesteps_, 0); - int min_u = num_labels_ - 1; - if (labels_[min_u] == null_char_) --min_u; - for (int t = num_timesteps_ - 1; t >= 0; --t) { - min_labels_[t] = min_u; - if (min_u > 0) { - --min_u; - if (labels_[min_u] == null_char_ && min_u > 0 && - labels_[min_u + 1] != labels_[min_u - 1]) { - --min_u; - } - } - } - int max_u = labels_[0] == null_char_; - for (int t = 0; t < num_timesteps_; ++t) { - max_labels_[t] = max_u; - if (max_labels_[t] < min_labels_[t]) return false; // Not enough room. - if (max_u + 1 < num_labels_) { - ++max_u; - if (labels_[max_u] == null_char_ && max_u + 1 < num_labels_ && - labels_[max_u + 1] != labels_[max_u - 1]) { - ++max_u; - } - } - } - return true; -} - -// Computes targets based purely on the labels by spreading the labels evenly -// over the available timesteps. -void CTC::ComputeSimpleTargets(GENERIC_2D_ARRAY* targets) const { - // Initialize all targets to zero. - targets->Resize(num_timesteps_, num_classes_, 0.0f); - GenericVector half_widths; - GenericVector means; - ComputeWidthsAndMeans(&half_widths, &means); - for (int l = 0; l < num_labels_; ++l) { - int label = labels_[l]; - float left_half_width = half_widths[l]; - float right_half_width = left_half_width; - int mean = means[l]; - if (label == null_char_) { - if (!NeededNull(l)) { - if ((l > 0 && mean == means[l - 1]) || - (l + 1 < num_labels_ && mean == means[l + 1])) { - continue; // Drop overlapping null. - } - } - // Make sure that no space is left unoccupied and that non-nulls always - // peak at 1 by stretching nulls to meet their neighbors. - if (l > 0) left_half_width = mean - means[l - 1]; - if (l + 1 < num_labels_) right_half_width = means[l + 1] - mean; - } - if (mean >= 0 && mean < num_timesteps_) targets->put(mean, label, 1.0f); - for (int offset = 1; offset < left_half_width && mean >= offset; ++offset) { - float prob = 1.0f - offset / left_half_width; - if (mean - offset < num_timesteps_ && - prob > targets->get(mean - offset, label)) { - targets->put(mean - offset, label, prob); - } - } - for (int offset = 1; - offset < right_half_width && mean + offset < num_timesteps_; - ++offset) { - float prob = 1.0f - offset / right_half_width; - if (mean + offset >= 0 && prob > targets->get(mean + offset, label)) { - targets->put(mean + offset, label, prob); - } - } - } -} - -// Computes mean positions and half widths of the simple targets by spreading -// the labels evenly over the available timesteps. -void CTC::ComputeWidthsAndMeans(GenericVector* half_widths, - GenericVector* means) const { - // Count the number of labels of each type, in regexp terms, counts plus - // (non-null or necessary null, which must occur at least once) and star - // (optional null). - int num_plus = 0, num_star = 0; - for (int i = 0; i < num_labels_; ++i) { - if (labels_[i] != null_char_ || NeededNull(i)) - ++num_plus; - else - ++num_star; - } - // Compute the size for each type. If there is enough space for everything - // to have size>=1, then all are equal, otherwise plus_size=1 and star gets - // whatever is left-over. - float plus_size = 1.0f, star_size = 0.0f; - float total_floating = num_plus + num_star; - if (total_floating <= num_timesteps_) { - plus_size = star_size = num_timesteps_ / total_floating; - } else if (num_star > 0) { - star_size = static_cast(num_timesteps_ - num_plus) / num_star; - } - // Set the width and compute the mean of each. - float mean_pos = 0.0f; - for (int i = 0; i < num_labels_; ++i) { - float half_width; - if (labels_[i] != null_char_ || NeededNull(i)) { - half_width = plus_size / 2.0f; - } else { - half_width = star_size / 2.0f; - } - mean_pos += half_width; - means->push_back(static_cast(mean_pos)); - mean_pos += half_width; - half_widths->push_back(half_width); - } -} - -// Helper returns the index of the highest probability label at timestep t. -static int BestLabel(const GENERIC_2D_ARRAY& outputs, int t) { - int result = 0; - int num_classes = outputs.dim2(); - const float* outputs_t = outputs[t]; - for (int c = 1; c < num_classes; ++c) { - if (outputs_t[c] > outputs_t[result]) result = c; - } - return result; -} - -// Calculates and returns a suitable fraction of the simple targets to add -// to the network outputs. -float CTC::CalculateBiasFraction() { - // Compute output labels via basic decoding. - GenericVector output_labels; - for (int t = 0; t < num_timesteps_; ++t) { - int label = BestLabel(outputs_, t); - while (t + 1 < num_timesteps_ && BestLabel(outputs_, t + 1) == label) ++t; - if (label != null_char_) output_labels.push_back(label); - } - // Simple bag of labels error calculation. - GenericVector truth_counts(num_classes_, 0); - GenericVector output_counts(num_classes_, 0); - for (int l = 0; l < num_labels_; ++l) { - ++truth_counts[labels_[l]]; - } - for (int l = 0; l < output_labels.size(); ++l) { - ++output_counts[output_labels[l]]; - } - // Count the number of true and false positive non-nulls and truth labels. - int true_pos = 0, false_pos = 0, total_labels = 0; - for (int c = 0; c < num_classes_; ++c) { - if (c == null_char_) continue; - int truth_count = truth_counts[c]; - int ocr_count = output_counts[c]; - if (truth_count > 0) { - total_labels += truth_count; - if (ocr_count > truth_count) { - true_pos += truth_count; - false_pos += ocr_count - truth_count; - } else { - true_pos += ocr_count; - } - } - // We don't need to count classes that don't exist in the truth as - // false positives, because they don't affect CTC at all. - } - if (total_labels == 0) return 0.0f; - return exp(std::max(true_pos - false_pos, 1) * log(kMinProb_) / total_labels); -} - -// Given ln(x) and ln(y), returns ln(x + y), using: -// ln(x + y) = ln(y) + ln(1 + exp(ln(y) - ln(x)), ensuring that ln(x) is the -// bigger number to maximize precision. -static double LogSumExp(double ln_x, double ln_y) { - if (ln_x >= ln_y) { - return ln_x + log1p(exp(ln_y - ln_x)); - } else { - return ln_y + log1p(exp(ln_x - ln_y)); - } -} - -// Runs the forward CTC pass, filling in log_probs. -void CTC::Forward(GENERIC_2D_ARRAY* log_probs) const { - log_probs->Resize(num_timesteps_, num_labels_, -FLT_MAX); - log_probs->put(0, 0, log(outputs_(0, labels_[0]))); - if (labels_[0] == null_char_) - log_probs->put(0, 1, log(outputs_(0, labels_[1]))); - for (int t = 1; t < num_timesteps_; ++t) { - const float* outputs_t = outputs_[t]; - for (int u = min_labels_[t]; u <= max_labels_[t]; ++u) { - // Continuing the same label. - double log_sum = log_probs->get(t - 1, u); - // Change from previous label. - if (u > 0) { - log_sum = LogSumExp(log_sum, log_probs->get(t - 1, u - 1)); - } - // Skip the null if allowed. - if (u >= 2 && labels_[u - 1] == null_char_ && - labels_[u] != labels_[u - 2]) { - log_sum = LogSumExp(log_sum, log_probs->get(t - 1, u - 2)); - } - // Add in the log prob of the current label. - double label_prob = outputs_t[labels_[u]]; - log_sum += log(label_prob); - log_probs->put(t, u, log_sum); - } - } -} - -// Runs the backward CTC pass, filling in log_probs. -void CTC::Backward(GENERIC_2D_ARRAY* log_probs) const { - log_probs->Resize(num_timesteps_, num_labels_, -FLT_MAX); - log_probs->put(num_timesteps_ - 1, num_labels_ - 1, 0.0); - if (labels_[num_labels_ - 1] == null_char_) - log_probs->put(num_timesteps_ - 1, num_labels_ - 2, 0.0); - for (int t = num_timesteps_ - 2; t >= 0; --t) { - const float* outputs_tp1 = outputs_[t + 1]; - for (int u = min_labels_[t]; u <= max_labels_[t]; ++u) { - // Continuing the same label. - double log_sum = log_probs->get(t + 1, u) + log(outputs_tp1[labels_[u]]); - // Change from previous label. - if (u + 1 < num_labels_) { - double prev_prob = outputs_tp1[labels_[u + 1]]; - log_sum = - LogSumExp(log_sum, log_probs->get(t + 1, u + 1) + log(prev_prob)); - } - // Skip the null if allowed. - if (u + 2 < num_labels_ && labels_[u + 1] == null_char_ && - labels_[u] != labels_[u + 2]) { - double skip_prob = outputs_tp1[labels_[u + 2]]; - log_sum = - LogSumExp(log_sum, log_probs->get(t + 1, u + 2) + log(skip_prob)); - } - log_probs->put(t, u, log_sum); - } - } -} - -// Normalizes and brings probs out of log space with a softmax over time. -void CTC::NormalizeSequence(GENERIC_2D_ARRAY* probs) const { - double max_logprob = probs->Max(); - for (int u = 0; u < num_labels_; ++u) { - double total = 0.0; - for (int t = 0; t < num_timesteps_; ++t) { - // Separate impossible path from unlikely probs. - double prob = probs->get(t, u); - if (prob > -FLT_MAX) - prob = ClippedExp(prob - max_logprob); - else - prob = 0.0; - total += prob; - probs->put(t, u, prob); - } - // Note that although this is a probability distribution over time and - // therefore should sum to 1, it is important to allow some labels to be - // all zero, (or at least tiny) as it is necessary to skip some blanks. - if (total < kMinTotalTimeProb_) total = kMinTotalTimeProb_; - for (int t = 0; t < num_timesteps_; ++t) - probs->put(t, u, probs->get(t, u) / total); - } -} - -// For each timestep computes the max prob for each class over all -// instances of the class in the labels_, and sets the targets to -// the max observed prob. -void CTC::LabelsToClasses(const GENERIC_2D_ARRAY& probs, - NetworkIO* targets) const { - // For each timestep compute the max prob for each class over all - // instances of the class in the labels_. - GenericVector class_probs; - for (int t = 0; t < num_timesteps_; ++t) { - float* targets_t = targets->f(t); - class_probs.init_to_size(num_classes_, 0.0); - for (int u = 0; u < num_labels_; ++u) { - double prob = probs(t, u); - // Note that although Graves specifies sum over all labels of the same - // class, we need to allow skipped blanks to go to zero, so they don't - // interfere with the non-blanks, so max is better than sum. - if (prob > class_probs[labels_[u]]) class_probs[labels_[u]] = prob; - // class_probs[labels_[u]] += prob; - } - int best_class = 0; - for (int c = 0; c < num_classes_; ++c) { - targets_t[c] = class_probs[c]; - if (class_probs[c] > class_probs[best_class]) best_class = c; - } - } -} - -// Normalizes the probabilities such that no target has a prob below min_prob, -// and, provided that the initial total is at least min_total_prob, then all -// probs will sum to 1, otherwise to sum/min_total_prob. The maximum output -// probability is thus 1 - (num_classes-1)*min_prob. -/* static */ -void CTC::NormalizeProbs(GENERIC_2D_ARRAY* probs) { - int num_timesteps = probs->dim1(); - int num_classes = probs->dim2(); - for (int t = 0; t < num_timesteps; ++t) { - float* probs_t = (*probs)[t]; - // Compute the total and clip that to prevent amplification of noise. - double total = 0.0; - for (int c = 0; c < num_classes; ++c) total += probs_t[c]; - if (total < kMinTotalFinalProb_) total = kMinTotalFinalProb_; - // Compute the increased total as a result of clipping. - double increment = 0.0; - for (int c = 0; c < num_classes; ++c) { - double prob = probs_t[c] / total; - if (prob < kMinProb_) increment += kMinProb_ - prob; - } - // Now normalize with clipping. Any additional clipping is negligible. - total += increment; - for (int c = 0; c < num_classes; ++c) { - float prob = probs_t[c] / total; - probs_t[c] = std::max(prob, kMinProb_); - } - } -} - -// Returns true if the label at index is a needed null. -bool CTC::NeededNull(int index) const { - return labels_[index] == null_char_ && index > 0 && index + 1 < num_labels_ && - labels_[index + 1] == labels_[index - 1]; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/ctc.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/ctc.h deleted file mode 100644 index 47fba674..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/ctc.h +++ /dev/null @@ -1,130 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: ctc.h -// Description: Slightly improved standard CTC to compute the targets. -// Author: Ray Smith -// Created: Wed Jul 13 15:17:06 PDT 2016 -// -// (C) Copyright 2016, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_CTC_H_ -#define TESSERACT_LSTM_CTC_H_ - -#include "genericvector.h" -#include "network.h" -#include "networkio.h" -#include "scrollview.h" - -namespace tesseract { - -// Class to encapsulate CTC and simple target generation. -class CTC { - public: - // Normalizes the probabilities such that no target has a prob below min_prob, - // and, provided that the initial total is at least min_total_prob, then all - // probs will sum to 1, otherwise to sum/min_total_prob. The maximum output - // probability is thus 1 - (num_classes-1)*min_prob. - static void NormalizeProbs(NetworkIO* probs) { - NormalizeProbs(probs->mutable_float_array()); - } - - // Builds a target using CTC. Slightly improved as follows: - // Includes normalizations and clipping for stability. - // labels should be pre-padded with nulls wherever desired, but they don't - // have to be between all labels. Allows for multi-label codes with no - // nulls between. - // labels can be longer than the time sequence, but the total number of - // essential labels (non-null plus nulls between equal labels) must not exceed - // the number of timesteps in outputs. - // outputs is the output of the network, and should have already been - // normalized with NormalizeProbs. - // On return targets is filled with the computed targets. - // Returns false if there is insufficient time for the labels. - static bool ComputeCTCTargets(const GenericVector& truth_labels, - int null_char, - const GENERIC_2D_ARRAY& outputs, - NetworkIO* targets); - - private: - // Constructor is private as the instance only holds information specific to - // the current labels, outputs etc, and is built by the static function. - CTC(const GenericVector& labels, int null_char, - const GENERIC_2D_ARRAY& outputs); - - // Computes vectors of min and max label index for each timestep, based on - // whether skippability of nulls makes it possible to complete a valid path. - bool ComputeLabelLimits(); - // Computes targets based purely on the labels by spreading the labels evenly - // over the available timesteps. - void ComputeSimpleTargets(GENERIC_2D_ARRAY* targets) const; - // Computes mean positions and half widths of the simple targets by spreading - // the labels even over the available timesteps. - void ComputeWidthsAndMeans(GenericVector* half_widths, - GenericVector* means) const; - // Calculates and returns a suitable fraction of the simple targets to add - // to the network outputs. - float CalculateBiasFraction(); - // Runs the forward CTC pass, filling in log_probs. - void Forward(GENERIC_2D_ARRAY* log_probs) const; - // Runs the backward CTC pass, filling in log_probs. - void Backward(GENERIC_2D_ARRAY* log_probs) const; - // Normalizes and brings probs out of log space with a softmax over time. - void NormalizeSequence(GENERIC_2D_ARRAY* probs) const; - // For each timestep computes the max prob for each class over all - // instances of the class in the labels_, and sets the targets to - // the max observed prob. - void LabelsToClasses(const GENERIC_2D_ARRAY& probs, - NetworkIO* targets) const; - // Normalizes the probabilities such that no target has a prob below min_prob, - // and, provided that the initial total is at least min_total_prob, then all - // probs will sum to 1, otherwise to sum/min_total_prob. The maximum output - // probability is thus 1 - (num_classes-1)*min_prob. - static void NormalizeProbs(GENERIC_2D_ARRAY* probs); - // Returns true if the label at index is a needed null. - bool NeededNull(int index) const; - // Returns exp(clipped(x)), clipping x to a reasonable range to prevent over/ - // underflow. - static double ClippedExp(double x) { - if (x < -kMaxExpArg_) return exp(-kMaxExpArg_); - if (x > kMaxExpArg_) return exp(kMaxExpArg_); - return exp(x); - } - - // Minimum probability limit for softmax input to ctc_loss. - static const float kMinProb_; - // Maximum absolute argument to exp(). - static const double kMaxExpArg_; - // Minimum probability for total prob in time normalization. - static const double kMinTotalTimeProb_; - // Minimum probability for total prob in final normalization. - static const double kMinTotalFinalProb_; - - // The truth label indices that are to be matched to outputs_. - const GenericVector& labels_; - // The network outputs. - GENERIC_2D_ARRAY outputs_; - // The null or "blank" label. - int null_char_; - // Number of timesteps in outputs_. - int num_timesteps_; - // Number of classes in outputs_. - int num_classes_; - // Number of labels in labels_. - int num_labels_; - // Min and max valid label indices for each timestep. - GenericVector min_labels_; - GenericVector max_labels_; -}; - -} // namespace tesseract - -#endif // TESSERACT_LSTM_CTC_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/fullyconnected.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/fullyconnected.cpp deleted file mode 100644 index 0c4b6da1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/fullyconnected.cpp +++ /dev/null @@ -1,313 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: fullyconnected.cpp -// Description: Simple feed-forward layer with various non-linearities. -// Author: Ray Smith -// Created: Wed Feb 26 14:49:15 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "fullyconnected.h" - -#ifdef _OPENMP -#include -#endif -#include -#include - -#include "functions.h" -#include "networkscratch.h" - -// Number of threads to use for parallel calculation of Forward and Backward. -#ifdef _OPENMP -const int kNumThreads = 4; -#else -const int kNumThreads = 1; -#endif - -namespace tesseract { - -FullyConnected::FullyConnected(const STRING& name, int ni, int no, - NetworkType type) - : Network(type, name, ni, no), external_source_(nullptr), int_mode_(false) { -} - -// Returns the shape output from the network given an input shape (which may -// be partially unknown ie zero). -StaticShape FullyConnected::OutputShape(const StaticShape& input_shape) const { - LossType loss_type = LT_NONE; - if (type_ == NT_SOFTMAX) - loss_type = LT_CTC; - else if (type_ == NT_SOFTMAX_NO_CTC) - loss_type = LT_SOFTMAX; - else if (type_ == NT_LOGISTIC) - loss_type = LT_LOGISTIC; - StaticShape result(input_shape); - result.set_depth(no_); - result.set_loss_type(loss_type); - return result; -} - -// Suspends/Enables training by setting the training_ flag. -void FullyConnected::SetEnableTraining(TrainingState state) { - if (state == TS_RE_ENABLE) { - // Enable only from temp disabled. - if (training_ == TS_TEMP_DISABLE) training_ = TS_ENABLED; - } else if (state == TS_TEMP_DISABLE) { - // Temp disable only from enabled. - if (training_ == TS_ENABLED) training_ = state; - } else { - if (state == TS_ENABLED && training_ != TS_ENABLED) - weights_.InitBackward(); - training_ = state; - } -} - -// Sets up the network for training. Initializes weights using weights of -// scale `range` picked according to the random number generator `randomizer`. -int FullyConnected::InitWeights(float range, TRand* randomizer) { - Network::SetRandomizer(randomizer); - num_weights_ = weights_.InitWeightsFloat(no_, ni_ + 1, TestFlag(NF_ADAM), - range, randomizer); - return num_weights_; -} - -// Recursively searches the network for softmaxes with old_no outputs, -// and remaps their outputs according to code_map. See network.h for details. - -int FullyConnected::RemapOutputs(int old_no, const std::vector& code_map) { - if (type_ == NT_SOFTMAX && no_ == old_no) { - num_weights_ = weights_.RemapOutputs(code_map); - no_ = code_map.size(); - } - return num_weights_; -} - -// Converts a float network to an int network. -void FullyConnected::ConvertToInt() { - weights_.ConvertToInt(); -} - -// Provides debug output on the weights. -void FullyConnected::DebugWeights() { - weights_.Debug2D(name_.string()); -} - -// Writes to the given file. Returns false in case of error. -bool FullyConnected::Serialize(TFile* fp) const { - if (!Network::Serialize(fp)) return false; - if (!weights_.Serialize(IsTraining(), fp)) return false; - return true; -} - -// Reads from the given file. Returns false in case of error. -bool FullyConnected::DeSerialize(TFile* fp) { - return weights_.DeSerialize(IsTraining(), fp); -} - -// Runs forward propagation of activations on the input line. -// See NetworkCpp for a detailed discussion of the arguments. -void FullyConnected::Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) { - int width = input.Width(); - if (type_ == NT_SOFTMAX) - output->ResizeFloat(input, no_); - else - output->Resize(input, no_); - SetupForward(input, input_transpose); - GenericVector temp_lines; - temp_lines.init_to_size(kNumThreads, NetworkScratch::FloatVec()); - GenericVector curr_input; - curr_input.init_to_size(kNumThreads, NetworkScratch::FloatVec()); - for (int i = 0; i < kNumThreads; ++i) { - temp_lines[i].Init(no_, scratch); - curr_input[i].Init(ni_, scratch); - } -#ifdef _OPENMP -#pragma omp parallel for num_threads(kNumThreads) - for (int t = 0; t < width; ++t) { - // Thread-local pointer to temporary storage. - int thread_id = omp_get_thread_num(); -#else - for (int t = 0; t < width; ++t) { - // Thread-local pointer to temporary storage. - int thread_id = 0; -#endif - double* temp_line = temp_lines[thread_id]; - if (input.int_mode()) { - ForwardTimeStep(input.i(t), t, temp_line); - } else { - input.ReadTimeStep(t, curr_input[thread_id]); - ForwardTimeStep(curr_input[thread_id], t, temp_line); - } - output->WriteTimeStep(t, temp_line); - if (IsTraining() && type_ != NT_SOFTMAX) { - acts_.CopyTimeStepFrom(t, *output, t); - } - } - // Zero all the elements that are in the padding around images that allows - // multiple different-sized images to exist in a single array. - // acts_ is only used if this is not a softmax op. - if (IsTraining() && type_ != NT_SOFTMAX) { - acts_.ZeroInvalidElements(); - } - output->ZeroInvalidElements(); -#if DEBUG_DETAIL > 0 - tprintf("F Output:%s\n", name_.string()); - output->Print(10); -#endif - if (debug) DisplayForward(*output); -} - -// Components of Forward so FullyConnected can be reused inside LSTM. -void FullyConnected::SetupForward(const NetworkIO& input, - const TransposedArray* input_transpose) { - // Softmax output is always float, so save the input type. - int_mode_ = input.int_mode(); - if (IsTraining()) { - acts_.Resize(input, no_); - // Source_ is a transposed copy of input. It isn't needed if provided. - external_source_ = input_transpose; - if (external_source_ == nullptr) source_t_.ResizeNoInit(ni_, input.Width()); - } -} - -void FullyConnected::ForwardTimeStep(int t, double* output_line) { - if (type_ == NT_TANH) { - FuncInplace(no_, output_line); - } else if (type_ == NT_LOGISTIC) { - FuncInplace(no_, output_line); - } else if (type_ == NT_POSCLIP) { - FuncInplace(no_, output_line); - } else if (type_ == NT_SYMCLIP) { - FuncInplace(no_, output_line); - } else if (type_ == NT_RELU) { - FuncInplace(no_, output_line); - } else if (type_ == NT_SOFTMAX || type_ == NT_SOFTMAX_NO_CTC) { - SoftmaxInPlace(no_, output_line); - } else if (type_ != NT_LINEAR) { - ASSERT_HOST("Invalid fully-connected type!" == nullptr); - } -} - -void FullyConnected::ForwardTimeStep(const double* d_input, - int t, double* output_line) { - // input is copied to source_ line-by-line for cache coherency. - if (IsTraining() && external_source_ == nullptr) - source_t_.WriteStrided(t, d_input); - weights_.MatrixDotVector(d_input, output_line); - ForwardTimeStep(t, output_line); -} - -void FullyConnected::ForwardTimeStep(const int8_t* i_input, - int t, double* output_line) { - // input is copied to source_ line-by-line for cache coherency. - weights_.MatrixDotVector(i_input, output_line); - ForwardTimeStep(t, output_line); -} - -// Runs backward propagation of errors on the deltas line. -// See NetworkCpp for a detailed discussion of the arguments. -bool FullyConnected::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { - if (debug) DisplayBackward(fwd_deltas); - back_deltas->Resize(fwd_deltas, ni_); - GenericVector errors; - errors.init_to_size(kNumThreads, NetworkScratch::FloatVec()); - for (int i = 0; i < kNumThreads; ++i) errors[i].Init(no_, scratch); - GenericVector temp_backprops; - if (needs_to_backprop_) { - temp_backprops.init_to_size(kNumThreads, NetworkScratch::FloatVec()); - for (int i = 0; i < kNumThreads; ++i) temp_backprops[i].Init(ni_, scratch); - } - int width = fwd_deltas.Width(); - NetworkScratch::GradientStore errors_t; - errors_t.Init(no_, width, scratch); -#ifdef _OPENMP -#pragma omp parallel for num_threads(kNumThreads) - for (int t = 0; t < width; ++t) { - int thread_id = omp_get_thread_num(); -#else - for (int t = 0; t < width; ++t) { - int thread_id = 0; -#endif - double* backprop = nullptr; - if (needs_to_backprop_) backprop = temp_backprops[thread_id]; - double* curr_errors = errors[thread_id]; - BackwardTimeStep(fwd_deltas, t, curr_errors, errors_t.get(), backprop); - if (backprop != nullptr) { - back_deltas->WriteTimeStep(t, backprop); - } - } - FinishBackward(*errors_t.get()); - if (needs_to_backprop_) { - back_deltas->ZeroInvalidElements(); -#if DEBUG_DETAIL > 0 - tprintf("F Backprop:%s\n", name_.string()); - back_deltas->Print(10); -#endif - return true; - } - return false; // No point going further back. -} - -void FullyConnected::BackwardTimeStep(const NetworkIO& fwd_deltas, int t, - double* curr_errors, - TransposedArray* errors_t, - double* backprop) { - if (type_ == NT_TANH) - acts_.FuncMultiply(fwd_deltas, t, curr_errors); - else if (type_ == NT_LOGISTIC) - acts_.FuncMultiply(fwd_deltas, t, curr_errors); - else if (type_ == NT_POSCLIP) - acts_.FuncMultiply(fwd_deltas, t, curr_errors); - else if (type_ == NT_SYMCLIP) - acts_.FuncMultiply(fwd_deltas, t, curr_errors); - else if (type_ == NT_RELU) - acts_.FuncMultiply(fwd_deltas, t, curr_errors); - else if (type_ == NT_SOFTMAX || type_ == NT_SOFTMAX_NO_CTC || - type_ == NT_LINEAR) - fwd_deltas.ReadTimeStep(t, curr_errors); // fwd_deltas are the errors. - else - ASSERT_HOST("Invalid fully-connected type!" == nullptr); - // Generate backprop only if needed by the lower layer. - if (backprop != nullptr) weights_.VectorDotMatrix(curr_errors, backprop); - errors_t->WriteStrided(t, curr_errors); -} - -void FullyConnected::FinishBackward(const TransposedArray& errors_t) { - if (external_source_ == nullptr) - weights_.SumOuterTransposed(errors_t, source_t_, true); - else - weights_.SumOuterTransposed(errors_t, *external_source_, true); -} - -// Updates the weights using the given learning rate, momentum and adam_beta. -// num_samples is used in the adam computation iff use_adam_ is true. -void FullyConnected::Update(float learning_rate, float momentum, - float adam_beta, int num_samples) { - weights_.Update(learning_rate, momentum, adam_beta, num_samples); -} - -// Sums the products of weight updates in *this and other, splitting into -// positive (same direction) in *same and negative (different direction) in -// *changed. -void FullyConnected::CountAlternators(const Network& other, double* same, - double* changed) const { - ASSERT_HOST(other.type() == type_); - const FullyConnected* fc = static_cast(&other); - weights_.CountAlternators(fc->weights_, same, changed); -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/fullyconnected.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/fullyconnected.h deleted file mode 100644 index 771830e4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/fullyconnected.h +++ /dev/null @@ -1,136 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: fullyconnected.h -// Description: Simple feed-forward layer with various non-linearities. -// Author: Ray Smith -// Created: Wed Feb 26 14:46:06 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_FULLYCONNECTED_H_ -#define TESSERACT_LSTM_FULLYCONNECTED_H_ - -#include "network.h" -#include "networkscratch.h" - -namespace tesseract { - -// C++ Implementation of the Softmax (output) class from lstm.py. -class FullyConnected : public Network { - public: - FullyConnected(const STRING& name, int ni, int no, NetworkType type); - virtual ~FullyConnected() = default; - - // Returns the shape output from the network given an input shape (which may - // be partially unknown ie zero). - StaticShape OutputShape(const StaticShape& input_shape) const override; - - STRING spec() const override { - STRING spec; - if (type_ == NT_TANH) - spec.add_str_int("Ft", no_); - else if (type_ == NT_LOGISTIC) - spec.add_str_int("Fs", no_); - else if (type_ == NT_RELU) - spec.add_str_int("Fr", no_); - else if (type_ == NT_LINEAR) - spec.add_str_int("Fl", no_); - else if (type_ == NT_POSCLIP) - spec.add_str_int("Fp", no_); - else if (type_ == NT_SYMCLIP) - spec.add_str_int("Fs", no_); - else if (type_ == NT_SOFTMAX) - spec.add_str_int("Fc", no_); - else - spec.add_str_int("Fm", no_); - return spec; - } - - // Changes the type to the given type. Used to commute a softmax to a - // non-output type for adding on other networks. - void ChangeType(NetworkType type) { - type_ = type; - } - - // Suspends/Enables training by setting the training_ flag. Serialize and - // DeSerialize only operate on the run-time data if state is false. - void SetEnableTraining(TrainingState state) override; - - // Sets up the network for training. Initializes weights using weights of - // scale `range` picked according to the random number generator `randomizer`. - int InitWeights(float range, TRand* randomizer) override; - // Recursively searches the network for softmaxes with old_no outputs, - // and remaps their outputs according to code_map. See network.h for details. - int RemapOutputs(int old_no, const std::vector& code_map) override; - - // Converts a float network to an int network. - void ConvertToInt() override; - - // Provides debug output on the weights. - void DebugWeights() override; - - // Writes to the given file. Returns false in case of error. - bool Serialize(TFile* fp) const override; - // Reads from the given file. Returns false in case of error. - bool DeSerialize(TFile* fp) override; - - // Runs forward propagation of activations on the input line. - // See Network for a detailed discussion of the arguments. - void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, NetworkScratch* scratch, - NetworkIO* output) override; - // Components of Forward so FullyConnected can be reused inside LSTM. - void SetupForward(const NetworkIO& input, - const TransposedArray* input_transpose); - void ForwardTimeStep(int t, double* output_line); - void ForwardTimeStep(const double* d_input, int t, double* output_line); - void ForwardTimeStep(const int8_t* i_input, int t, double* output_line); - - // Runs backward propagation of errors on the deltas line. - // See Network for a detailed discussion of the arguments. - bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, NetworkIO* back_deltas) override; - // Components of Backward so FullyConnected can be reused inside LSTM. - void BackwardTimeStep(const NetworkIO& fwd_deltas, int t, double* curr_errors, - TransposedArray* errors_t, double* backprop); - void FinishBackward(const TransposedArray& errors_t); - - // Updates the weights using the given learning rate, momentum and adam_beta. - // num_samples is used in the adam computation iff use_adam_ is true. - void Update(float learning_rate, float momentum, float adam_beta, - int num_samples) override; - // Sums the products of weight updates in *this and other, splitting into - // positive (same direction) in *same and negative (different direction) in - // *changed. - void CountAlternators(const Network& other, double* same, - double* changed) const override; - - protected: - // Weight arrays of size [no, ni + 1]. - WeightMatrix weights_; - // Transposed copy of input used during training of size [ni, width]. - TransposedArray source_t_; - // Pointer to transposed input stored elsewhere. If not null, this is used - // in preference to calculating the transpose and storing it in source_t_. - const TransposedArray* external_source_; - // Activations from forward pass of size [width, no]. - NetworkIO acts_; - // Memory of the integer mode input to forward as softmax always outputs - // float, so the information is otherwise lost. - bool int_mode_; -}; - -} // namespace tesseract. - - - -#endif // TESSERACT_LSTM_FULLYCONNECTED_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/functions.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/functions.cpp deleted file mode 100644 index 644530c3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/functions.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: functions.cpp -// Description: Static initialize-on-first-use non-linearity functions. -// Author: Ray Smith -// Created: Tue Jul 17 14:02:59 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "functions.h" - -namespace tesseract { - -double TanhTable[kTableSize]; -double LogisticTable[kTableSize]; - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/functions.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/functions.h deleted file mode 100644 index d633e6bf..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/functions.h +++ /dev/null @@ -1,249 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: functions.h -// Description: Collection of function-objects used by the network layers. -// Author: Ray Smith -// Created: Fri Jun 20 10:45:37 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_FUNCTIONS_H_ -#define TESSERACT_LSTM_FUNCTIONS_H_ - -#include -#include "helpers.h" -#include "tprintf.h" - -// Setting this to 1 or more causes massive dumps of debug data: weights, -// updates, internal calculations etc, and reduces the number of test iterations -// to a small number, so outputs can be diffed. -#define DEBUG_DETAIL 0 -#if DEBUG_DETAIL > 0 -#undef _OPENMP // Disable open mp to get the outputs in sync. -#endif - -namespace tesseract { - -// Size of static tables. -const int kTableSize = 4096; -// Scale factor for float arg to int index. -const double kScaleFactor = 256.0; - -extern double TanhTable[]; -extern double LogisticTable[]; - -// Non-linearity (sigmoid) functions with cache tables and clipping. -inline double Tanh(double x) { - if (x < 0.0) return -Tanh(-x); - if (x >= (kTableSize - 1) / kScaleFactor) return 1.0; - x *= kScaleFactor; - int index = static_cast(floor(x)); - if (TanhTable[index] == 0.0 && index > 0) { - // Generate the entry. - TanhTable[index] = tanh(index / kScaleFactor); - } - if (index == kTableSize - 1) return TanhTable[kTableSize - 1]; - if (TanhTable[index + 1] == 0.0) { - // Generate the entry. - TanhTable[index + 1] = tanh((index + 1) / kScaleFactor); - } - double offset = x - index; - return TanhTable[index] * (1.0 - offset) + TanhTable[index + 1] * offset; -} - -inline double Logistic(double x) { - if (x < 0.0) return 1.0 - Logistic(-x); - if (x >= (kTableSize - 1) / kScaleFactor) return 1.0; - x *= kScaleFactor; - int index = static_cast(floor(x)); - if (LogisticTable[index] == 0.0) { - // Generate the entry. - LogisticTable[index] = 1.0 / (1.0 + exp(-index / kScaleFactor)); - } - if (index == kTableSize - 1) return LogisticTable[kTableSize - 1]; - if (LogisticTable[index + 1] == 0.0) { - // Generate the entry. - LogisticTable[index + 1] = 1.0 / (1.0 + exp(-(index + 1) / kScaleFactor)); - } - double offset = x - index; - return LogisticTable[index] * (1.0 - offset) + - LogisticTable[index + 1] * offset; -} - -// Non-linearity (sigmoid) functions and their derivatives. -struct FFunc { - inline double operator()(double x) const { return Logistic(x); } -}; -struct FPrime { - inline double operator()(double y) const { return y * (1.0 - y); } -}; -struct ClipFFunc { - inline double operator()(double x) const { - if (x <= 0.0) return 0.0; - if (x >= 1.0) return 1.0; - return x; - } -}; -struct ClipFPrime { - inline double operator()(double y) const { - return 0.0 < y && y < 1.0 ? 1.0 : 0.0; - } -}; -struct Relu { - inline double operator()(double x) const { - if (x <= 0.0) return 0.0; - return x; - } -}; -struct ReluPrime { - inline double operator()(double y) const { return 0.0 < y ? 1.0 : 0.0; } -}; -struct GFunc { - inline double operator()(double x) const { return Tanh(x); } -}; -struct GPrime { - inline double operator()(double y) const { return 1.0 - y * y; } -}; -struct ClipGFunc { - inline double operator()(double x) const { - if (x <= -1.0) return -1.0; - if (x >= 1.0) return 1.0; - return x; - } -}; -struct ClipGPrime { - inline double operator()(double y) const { - return -1.0 < y && y < 1.0 ? 1.0 : 0.0; - } -}; -struct HFunc { - inline double operator()(double x) const { return Tanh(x); } -}; -struct HPrime { - inline double operator()(double y) const { - double u = Tanh(y); - return 1.0 - u * u; - } -}; -struct UnityFunc { - inline double operator()(double x) const { return 1.0; } -}; -struct IdentityFunc { - inline double operator()(double x) const { return x; } -}; - -// Applies Func in-place to inout, of size n. -template -inline void FuncInplace(int n, double* inout) { - Func f; - for (int i = 0; i < n; ++i) { - inout[i] = f(inout[i]); - } -} -// Applies Func to u and multiplies the result by v component-wise, -// putting the product in out, all of size n. -template -inline void FuncMultiply(const double* u, const double* v, int n, double* out) { - Func f; - for (int i = 0; i < n; ++i) { - out[i] = f(u[i]) * v[i]; - } -} -// Applies the Softmax function in-place to inout, of size n. -template -inline void SoftmaxInPlace(int n, T* inout) { - if (n <= 0) return; - // A limit on the negative range input to exp to guarantee non-zero output. - const T kMaxSoftmaxActivation = 86.0f; - - T max_output = inout[0]; - for (int i = 1; i < n; i++) { - T output = inout[i]; - if (output > max_output) max_output = output; - } - T prob_total = 0.0; - for (int i = 0; i < n; i++) { - T prob = inout[i] - max_output; - prob = exp(ClipToRange(prob, -kMaxSoftmaxActivation, static_cast(0))); - prob_total += prob; - inout[i] = prob; - } - if (prob_total > 0.0) { - for (int i = 0; i < n; i++) inout[i] /= prob_total; - } -} - -// Copies n values of the given src vector to dest. -inline void CopyVector(int n, const double* src, double* dest) { - memcpy(dest, src, n * sizeof(dest[0])); -} - -// Adds n values of the given src vector to dest. -inline void AccumulateVector(int n, const double* src, double* dest) { - for (int i = 0; i < n; ++i) dest[i] += src[i]; -} - -// Multiplies n values of inout in-place element-wise by the given src vector. -inline void MultiplyVectorsInPlace(int n, const double* src, double* inout) { - for (int i = 0; i < n; ++i) inout[i] *= src[i]; -} - -// Multiplies n values of u by v, element-wise, accumulating to out. -inline void MultiplyAccumulate(int n, const double* u, const double* v, - double* out) { - for (int i = 0; i < n; i++) { - out[i] += u[i] * v[i]; - } -} - -// Sums the given 5 n-vectors putting the result into sum. -inline void SumVectors(int n, const double* v1, const double* v2, - const double* v3, const double* v4, const double* v5, - double* sum) { - for (int i = 0; i < n; ++i) { - sum[i] = v1[i] + v2[i] + v3[i] + v4[i] + v5[i]; - } -} - -// Sets the given n-vector vec to 0. -template -inline void ZeroVector(int n, T* vec) { - memset(vec, 0, n * sizeof(*vec)); -} - -// Clips the given vector vec, of size n to [lower, upper]. -template -inline void ClipVector(int n, T lower, T upper, T* vec) { - for (int i = 0; i < n; ++i) vec[i] = ClipToRange(vec[i], lower, upper); -} - -// Converts the given n-vector to a binary encoding of the maximum value, -// encoded as vector of nf binary values. -inline void CodeInBinary(int n, int nf, double* vec) { - if (nf <= 0 || n < nf) return; - int index = 0; - double best_score = vec[0]; - for (int i = 1; i < n; ++i) { - if (vec[i] > best_score) { - best_score = vec[i]; - index = i; - } - } - int mask = 1; - for (int i = 0; i < nf; ++i, mask *= 2) { - vec[i] = (index & mask) ? 1.0 : 0.0; - } -} - -} // namespace tesseract. - -#endif // TESSERACT_LSTM_FUNCTIONS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/input.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/input.cpp deleted file mode 100644 index a104198c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/input.cpp +++ /dev/null @@ -1,147 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: input.cpp -// Description: Input layer class for neural network implementations. -// Author: Ray Smith -// Created: Thu Mar 13 09:10:34 PDT 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "input.h" - -#include "allheaders.h" -#include "imagedata.h" -#include "pageres.h" -#include "scrollview.h" - -namespace tesseract { - -// Max height for variable height inputs before scaling anyway. -const int kMaxInputHeight = 48; - -Input::Input(const STRING& name, int ni, int no) - : Network(NT_INPUT, name, ni, no), cached_x_scale_(1) {} -Input::Input(const STRING& name, const StaticShape& shape) - : Network(NT_INPUT, name, shape.height(), shape.depth()), - shape_(shape), - cached_x_scale_(1) { - if (shape.height() == 1) ni_ = shape.depth(); -} - -// Writes to the given file. Returns false in case of error. -bool Input::Serialize(TFile* fp) const { - return Network::Serialize(fp) && shape_.Serialize(fp); -} - -// Reads from the given file. Returns false in case of error. -bool Input::DeSerialize(TFile* fp) { - return shape_.DeSerialize(fp); -} - -// Returns an integer reduction factor that the network applies to the -// time sequence. Assumes that any 2-d is already eliminated. Used for -// scaling bounding boxes of truth data. -int Input::XScaleFactor() const { - return 1; -} - -// Provides the (minimum) x scale factor to the network (of interest only to -// input units) so they can determine how to scale bounding boxes. -void Input::CacheXScaleFactor(int factor) { - cached_x_scale_ = factor; -} - -// Runs forward propagation of activations on the input line. -// See Network for a detailed discussion of the arguments. -void Input::Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) { - *output = input; -} - -// Runs backward propagation of errors on the deltas line. -// See NetworkCpp for a detailed discussion of the arguments. -bool Input::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { - tprintf("Input::Backward should not be called!!\n"); - return false; -} - -// Creates and returns a Pix of appropriate size for the network from the -// image_data. If non-null, *image_scale returns the image scale factor used. -// Returns nullptr on error. -/* static */ -Pix* Input::PrepareLSTMInputs(const ImageData& image_data, - const Network* network, int min_width, - TRand* randomizer, float* image_scale) { - // Note that NumInputs() is defined as input image height. - int target_height = network->NumInputs(); - int width, height; - Pix* pix = image_data.PreScale(target_height, kMaxInputHeight, image_scale, - &width, &height, nullptr); - if (pix == nullptr) { - tprintf("Bad pix from ImageData!\n"); - return nullptr; - } - if (width <= min_width || height < min_width) { - tprintf("Image too small to scale!! (%dx%d vs min width of %d)\n", width, - height, min_width); - pixDestroy(&pix); - return nullptr; - } - return pix; -} - -// Converts the given pix to a NetworkIO of height and depth appropriate to the -// given StaticShape: -// If depth == 3, convert to 24 bit color, otherwise normalized grey. -// Scale to target height, if the shape's height is > 1, or its depth if the -// height == 1. If height == 0 then no scaling. -// NOTE: It isn't safe for multiple threads to call this on the same pix. -/* static */ -void Input::PreparePixInput(const StaticShape& shape, const Pix* pix, - TRand* randomizer, NetworkIO* input) { - bool color = shape.depth() == 3; - Pix* var_pix = const_cast(pix); - int depth = pixGetDepth(var_pix); - Pix* normed_pix = nullptr; - // On input to BaseAPI, an image is forced to be 1, 8 or 24 bit, without - // colormap, so we just have to deal with depth conversion here. - if (color) { - // Force RGB. - if (depth == 32) - normed_pix = pixClone(var_pix); - else - normed_pix = pixConvertTo32(var_pix); - } else { - // Convert non-8-bit images to 8 bit. - if (depth == 8) - normed_pix = pixClone(var_pix); - else - normed_pix = pixConvertTo8(var_pix, false); - } - int height = pixGetHeight(normed_pix); - int target_height = shape.height(); - if (target_height == 1) target_height = shape.depth(); - if (target_height != 0 && target_height != height) { - // Get the scaled image. - float im_factor = static_cast(target_height) / height; - Pix* scaled_pix = pixScale(normed_pix, im_factor, im_factor); - pixDestroy(&normed_pix); - normed_pix = scaled_pix; - } - input->FromPix(shape, normed_pix, randomizer); - pixDestroy(&normed_pix); -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/input.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/input.h deleted file mode 100644 index cec22414..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/input.h +++ /dev/null @@ -1,104 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: input.h -// Description: Input layer class for neural network implementations. -// Author: Ray Smith -// Created: Thu Mar 13 08:56:26 PDT 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_INPUT_H_ -#define TESSERACT_LSTM_INPUT_H_ - -#include "network.h" - -class ScrollView; - -namespace tesseract { - -class Input : public Network { - public: - Input(const STRING& name, int ni, int no); - Input(const STRING& name, const StaticShape& shape); - virtual ~Input() = default; - - STRING spec() const override { - STRING spec; - spec.add_str_int("", shape_.batch()); - spec.add_str_int(",", shape_.height()); - spec.add_str_int(",", shape_.width()); - spec.add_str_int(",", shape_.depth()); - return spec; - } - - // Returns the required shape input to the network. - StaticShape InputShape() const override { return shape_; } - // Returns the shape output from the network given an input shape (which may - // be partially unknown ie zero). - StaticShape OutputShape(const StaticShape& input_shape) const override { - return shape_; - } - // Writes to the given file. Returns false in case of error. - // Should be overridden by subclasses, but called by their Serialize. - bool Serialize(TFile* fp) const override; - // Reads from the given file. Returns false in case of error. - bool DeSerialize(TFile* fp) override; - - // Returns an integer reduction factor that the network applies to the - // time sequence. Assumes that any 2-d is already eliminated. Used for - // scaling bounding boxes of truth data. - // WARNING: if GlobalMinimax is used to vary the scale, this will return - // the last used scale factor. Call it before any forward, and it will return - // the minimum scale factor of the paths through the GlobalMinimax. - int XScaleFactor() const override; - - // Provides the (minimum) x scale factor to the network (of interest only to - // input units) so they can determine how to scale bounding boxes. - void CacheXScaleFactor(int factor) override; - - // Runs forward propagation of activations on the input line. - // See Network for a detailed discussion of the arguments. - void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) override; - - // Runs backward propagation of errors on the deltas line. - // See Network for a detailed discussion of the arguments. - bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) override; - // Creates and returns a Pix of appropriate size for the network from the - // image_data. If non-null, *image_scale returns the image scale factor used. - // Returns nullptr on error. - /* static */ - static Pix* PrepareLSTMInputs(const ImageData& image_data, - const Network* network, int min_width, - TRand* randomizer, float* image_scale); - // Converts the given pix to a NetworkIO of height and depth appropriate to - // the given StaticShape: - // If depth == 3, convert to 24 bit color, otherwise normalized grey. - // Scale to target height, if the shape's height is > 1, or its depth if the - // height == 1. If height == 0 then no scaling. - // NOTE: It isn't safe for multiple threads to call this on the same pix. - static void PreparePixInput(const StaticShape& shape, const Pix* pix, - TRand* randomizer, NetworkIO* input); - - private: - // Input shape determines how images are dealt with. - StaticShape shape_; - // Cached total network x scale factor for scaling bounding boxes. - int cached_x_scale_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_LSTM_INPUT_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstm.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstm.cpp deleted file mode 100644 index 904325f1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstm.cpp +++ /dev/null @@ -1,767 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: lstm.cpp -// Description: Long-term-short-term-memory Recurrent neural network. -// Author: Ray Smith -// Created: Wed May 01 17:43:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "lstm.h" - -#ifdef _OPENMP -#include -#endif -#include -#include - -#if !defined(__GNUC__) && defined(_MSC_VER) -#include // _BitScanReverse -#endif - -#include "fullyconnected.h" -#include "functions.h" -#include "networkscratch.h" -#include "tprintf.h" - -// Macros for openmp code if it is available, otherwise empty macros. -#ifdef _OPENMP -#define PARALLEL_IF_OPENMP(__num_threads) \ - PRAGMA(omp parallel if (__num_threads > 1) num_threads(__num_threads)) { \ - PRAGMA(omp sections nowait) { \ - PRAGMA(omp section) { -#define SECTION_IF_OPENMP \ - } \ - PRAGMA(omp section) \ - { - -#define END_PARALLEL_IF_OPENMP \ - } \ - } /* end of sections */ \ - } /* end of parallel section */ - -// Define the portable PRAGMA macro. -#ifdef _MSC_VER // Different _Pragma -#define PRAGMA(x) __pragma(x) -#else -#define PRAGMA(x) _Pragma(#x) -#endif // _MSC_VER - -#else // _OPENMP -#define PARALLEL_IF_OPENMP(__num_threads) -#define SECTION_IF_OPENMP -#define END_PARALLEL_IF_OPENMP -#endif // _OPENMP - - -namespace tesseract { - -// Max absolute value of state_. It is reasonably high to enable the state -// to count things. -const double kStateClip = 100.0; -// Max absolute value of gate_errors (the gradients). -const double kErrClip = 1.0f; - -// Calculate ceil(log2(n)). -static inline uint32_t ceil_log2(uint32_t n) -{ - // l2 = (unsigned)log2(n). -#if defined(__GNUC__) - // Use fast inline assembler code for gcc or clang. - uint32_t l2 = 31 - __builtin_clz(n); -#elif defined(_MSC_VER) - // Use fast intrinsic function for MS compiler. - unsigned long l2 = 0; - _BitScanReverse(&l2, n); -#else - if (n == 0) return UINT_MAX; - if (n == 1) return 0; - uint32_t val = n; - uint32_t l2 = 0; - while (val > 1) { - val >>= 1; - l2++; - } -#endif - // Round up if n is not a power of 2. - return (n == (1u << l2)) ? l2 : l2 + 1; -} - -LSTM::LSTM(const STRING& name, int ni, int ns, int no, bool two_dimensional, - NetworkType type) - : Network(type, name, ni, no), - na_(ni + ns), - ns_(ns), - nf_(0), - is_2d_(two_dimensional), - softmax_(nullptr), - input_width_(0) { - if (two_dimensional) na_ += ns_; - if (type_ == NT_LSTM || type_ == NT_LSTM_SUMMARY) { - nf_ = 0; - // networkbuilder ensures this is always true. - ASSERT_HOST(no == ns); - } else if (type_ == NT_LSTM_SOFTMAX || type_ == NT_LSTM_SOFTMAX_ENCODED) { - nf_ = type_ == NT_LSTM_SOFTMAX ? no_ : ceil_log2(no_); - softmax_ = new FullyConnected("LSTM Softmax", ns_, no_, NT_SOFTMAX); - } else { - tprintf("%d is invalid type of LSTM!\n", type); - ASSERT_HOST(false); - } - na_ += nf_; -} - -LSTM::~LSTM() { delete softmax_; } - -// Returns the shape output from the network given an input shape (which may -// be partially unknown ie zero). -StaticShape LSTM::OutputShape(const StaticShape& input_shape) const { - StaticShape result = input_shape; - result.set_depth(no_); - if (type_ == NT_LSTM_SUMMARY) result.set_width(1); - if (softmax_ != nullptr) return softmax_->OutputShape(result); - return result; -} - -// Suspends/Enables training by setting the training_ flag. Serialize and -// DeSerialize only operate on the run-time data if state is false. -void LSTM::SetEnableTraining(TrainingState state) { - if (state == TS_RE_ENABLE) { - // Enable only from temp disabled. - if (training_ == TS_TEMP_DISABLE) training_ = TS_ENABLED; - } else if (state == TS_TEMP_DISABLE) { - // Temp disable only from enabled. - if (training_ == TS_ENABLED) training_ = state; - } else { - if (state == TS_ENABLED && training_ != TS_ENABLED) { - for (int w = 0; w < WT_COUNT; ++w) { - if (w == GFS && !Is2D()) continue; - gate_weights_[w].InitBackward(); - } - } - training_ = state; - } - if (softmax_ != nullptr) softmax_->SetEnableTraining(state); -} - -// Sets up the network for training. Initializes weights using weights of -// scale `range` picked according to the random number generator `randomizer`. -int LSTM::InitWeights(float range, TRand* randomizer) { - Network::SetRandomizer(randomizer); - num_weights_ = 0; - for (int w = 0; w < WT_COUNT; ++w) { - if (w == GFS && !Is2D()) continue; - num_weights_ += gate_weights_[w].InitWeightsFloat( - ns_, na_ + 1, TestFlag(NF_ADAM), range, randomizer); - } - if (softmax_ != nullptr) { - num_weights_ += softmax_->InitWeights(range, randomizer); - } - return num_weights_; -} - -// Recursively searches the network for softmaxes with old_no outputs, -// and remaps their outputs according to code_map. See network.h for details. -int LSTM::RemapOutputs(int old_no, const std::vector& code_map) { - if (softmax_ != nullptr) { - num_weights_ -= softmax_->num_weights(); - num_weights_ += softmax_->RemapOutputs(old_no, code_map); - } - return num_weights_; -} - -// Converts a float network to an int network. -void LSTM::ConvertToInt() { - for (int w = 0; w < WT_COUNT; ++w) { - if (w == GFS && !Is2D()) continue; - gate_weights_[w].ConvertToInt(); - } - if (softmax_ != nullptr) { - softmax_->ConvertToInt(); - } -} - -// Sets up the network for training using the given weight_range. -void LSTM::DebugWeights() { - for (int w = 0; w < WT_COUNT; ++w) { - if (w == GFS && !Is2D()) continue; - STRING msg = name_; - msg.add_str_int(" Gate weights ", w); - gate_weights_[w].Debug2D(msg.string()); - } - if (softmax_ != nullptr) { - softmax_->DebugWeights(); - } -} - -// Writes to the given file. Returns false in case of error. -bool LSTM::Serialize(TFile* fp) const { - if (!Network::Serialize(fp)) return false; - if (!fp->Serialize(&na_)) return false; - for (int w = 0; w < WT_COUNT; ++w) { - if (w == GFS && !Is2D()) continue; - if (!gate_weights_[w].Serialize(IsTraining(), fp)) return false; - } - if (softmax_ != nullptr && !softmax_->Serialize(fp)) return false; - return true; -} - -// Reads from the given file. Returns false in case of error. - -bool LSTM::DeSerialize(TFile* fp) { - if (!fp->DeSerialize(&na_)) return false; - if (type_ == NT_LSTM_SOFTMAX) { - nf_ = no_; - } else if (type_ == NT_LSTM_SOFTMAX_ENCODED) { - nf_ = ceil_log2(no_); - } else { - nf_ = 0; - } - is_2d_ = false; - for (int w = 0; w < WT_COUNT; ++w) { - if (w == GFS && !Is2D()) continue; - if (!gate_weights_[w].DeSerialize(IsTraining(), fp)) return false; - if (w == CI) { - ns_ = gate_weights_[CI].NumOutputs(); - is_2d_ = na_ - nf_ == ni_ + 2 * ns_; - } - } - delete softmax_; - if (type_ == NT_LSTM_SOFTMAX || type_ == NT_LSTM_SOFTMAX_ENCODED) { - softmax_ = static_cast(Network::CreateFromFile(fp)); - if (softmax_ == nullptr) return false; - } else { - softmax_ = nullptr; - } - return true; -} - -// Runs forward propagation of activations on the input line. -// See NetworkCpp for a detailed discussion of the arguments. -void LSTM::Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) { - input_map_ = input.stride_map(); - input_width_ = input.Width(); - if (softmax_ != nullptr) - output->ResizeFloat(input, no_); - else if (type_ == NT_LSTM_SUMMARY) - output->ResizeXTo1(input, no_); - else - output->Resize(input, no_); - ResizeForward(input); - // Temporary storage of forward computation for each gate. - NetworkScratch::FloatVec temp_lines[WT_COUNT]; - for (int i = 0; i < WT_COUNT; ++i) temp_lines[i].Init(ns_, scratch); - // Single timestep buffers for the current/recurrent output and state. - NetworkScratch::FloatVec curr_state, curr_output; - curr_state.Init(ns_, scratch); - ZeroVector(ns_, curr_state); - curr_output.Init(ns_, scratch); - ZeroVector(ns_, curr_output); - // Rotating buffers of width buf_width allow storage of the state and output - // for the other dimension, used only when working in true 2D mode. The width - // is enough to hold an entire strip of the major direction. - int buf_width = Is2D() ? input_map_.Size(FD_WIDTH) : 1; - GenericVector states, outputs; - if (Is2D()) { - states.init_to_size(buf_width, NetworkScratch::FloatVec()); - outputs.init_to_size(buf_width, NetworkScratch::FloatVec()); - for (int i = 0; i < buf_width; ++i) { - states[i].Init(ns_, scratch); - ZeroVector(ns_, states[i]); - outputs[i].Init(ns_, scratch); - ZeroVector(ns_, outputs[i]); - } - } - // Used only if a softmax LSTM. - NetworkScratch::FloatVec softmax_output; - NetworkScratch::IO int_output; - if (softmax_ != nullptr) { - softmax_output.Init(no_, scratch); - ZeroVector(no_, softmax_output); - int rounded_softmax_inputs = gate_weights_[CI].RoundInputs(ns_); - if (input.int_mode()) - int_output.Resize2d(true, 1, rounded_softmax_inputs, scratch); - softmax_->SetupForward(input, nullptr); - } - NetworkScratch::FloatVec curr_input; - curr_input.Init(na_, scratch); - StrideMap::Index src_index(input_map_); - // Used only by NT_LSTM_SUMMARY. - StrideMap::Index dest_index(output->stride_map()); - do { - int t = src_index.t(); - // True if there is a valid old state for the 2nd dimension. - bool valid_2d = Is2D(); - if (valid_2d) { - StrideMap::Index dim_index(src_index); - if (!dim_index.AddOffset(-1, FD_HEIGHT)) valid_2d = false; - } - // Index of the 2-D revolving buffers (outputs, states). - int mod_t = Modulo(t, buf_width); // Current timestep. - // Setup the padded input in source. - source_.CopyTimeStepGeneral(t, 0, ni_, input, t, 0); - if (softmax_ != nullptr) { - source_.WriteTimeStepPart(t, ni_, nf_, softmax_output); - } - source_.WriteTimeStepPart(t, ni_ + nf_, ns_, curr_output); - if (Is2D()) - source_.WriteTimeStepPart(t, ni_ + nf_ + ns_, ns_, outputs[mod_t]); - if (!source_.int_mode()) source_.ReadTimeStep(t, curr_input); - // Matrix multiply the inputs with the source. - PARALLEL_IF_OPENMP(GFS) - // It looks inefficient to create the threads on each t iteration, but the - // alternative of putting the parallel outside the t loop, a single around - // the t-loop and then tasks in place of the sections is a *lot* slower. - // Cell inputs. - if (source_.int_mode()) - gate_weights_[CI].MatrixDotVector(source_.i(t), temp_lines[CI]); - else - gate_weights_[CI].MatrixDotVector(curr_input, temp_lines[CI]); - FuncInplace(ns_, temp_lines[CI]); - - SECTION_IF_OPENMP - // Input Gates. - if (source_.int_mode()) - gate_weights_[GI].MatrixDotVector(source_.i(t), temp_lines[GI]); - else - gate_weights_[GI].MatrixDotVector(curr_input, temp_lines[GI]); - FuncInplace(ns_, temp_lines[GI]); - - SECTION_IF_OPENMP - // 1-D forget gates. - if (source_.int_mode()) - gate_weights_[GF1].MatrixDotVector(source_.i(t), temp_lines[GF1]); - else - gate_weights_[GF1].MatrixDotVector(curr_input, temp_lines[GF1]); - FuncInplace(ns_, temp_lines[GF1]); - - // 2-D forget gates. - if (Is2D()) { - if (source_.int_mode()) - gate_weights_[GFS].MatrixDotVector(source_.i(t), temp_lines[GFS]); - else - gate_weights_[GFS].MatrixDotVector(curr_input, temp_lines[GFS]); - FuncInplace(ns_, temp_lines[GFS]); - } - - SECTION_IF_OPENMP - // Output gates. - if (source_.int_mode()) - gate_weights_[GO].MatrixDotVector(source_.i(t), temp_lines[GO]); - else - gate_weights_[GO].MatrixDotVector(curr_input, temp_lines[GO]); - FuncInplace(ns_, temp_lines[GO]); - END_PARALLEL_IF_OPENMP - - // Apply forget gate to state. - MultiplyVectorsInPlace(ns_, temp_lines[GF1], curr_state); - if (Is2D()) { - // Max-pool the forget gates (in 2-d) instead of blindly adding. - int8_t* which_fg_col = which_fg_[t]; - memset(which_fg_col, 1, ns_ * sizeof(which_fg_col[0])); - if (valid_2d) { - const double* stepped_state = states[mod_t]; - for (int i = 0; i < ns_; ++i) { - if (temp_lines[GF1][i] < temp_lines[GFS][i]) { - curr_state[i] = temp_lines[GFS][i] * stepped_state[i]; - which_fg_col[i] = 2; - } - } - } - } - MultiplyAccumulate(ns_, temp_lines[CI], temp_lines[GI], curr_state); - // Clip curr_state to a sane range. - ClipVector(ns_, -kStateClip, kStateClip, curr_state); - if (IsTraining()) { - // Save the gate node values. - node_values_[CI].WriteTimeStep(t, temp_lines[CI]); - node_values_[GI].WriteTimeStep(t, temp_lines[GI]); - node_values_[GF1].WriteTimeStep(t, temp_lines[GF1]); - node_values_[GO].WriteTimeStep(t, temp_lines[GO]); - if (Is2D()) node_values_[GFS].WriteTimeStep(t, temp_lines[GFS]); - } - FuncMultiply(curr_state, temp_lines[GO], ns_, curr_output); - if (IsTraining()) state_.WriteTimeStep(t, curr_state); - if (softmax_ != nullptr) { - if (input.int_mode()) { - int_output->WriteTimeStepPart(0, 0, ns_, curr_output); - softmax_->ForwardTimeStep(int_output->i(0), t, softmax_output); - } else { - softmax_->ForwardTimeStep(curr_output, t, softmax_output); - } - output->WriteTimeStep(t, softmax_output); - if (type_ == NT_LSTM_SOFTMAX_ENCODED) { - CodeInBinary(no_, nf_, softmax_output); - } - } else if (type_ == NT_LSTM_SUMMARY) { - // Output only at the end of a row. - if (src_index.IsLast(FD_WIDTH)) { - output->WriteTimeStep(dest_index.t(), curr_output); - dest_index.Increment(); - } - } else { - output->WriteTimeStep(t, curr_output); - } - // Save states for use by the 2nd dimension only if needed. - if (Is2D()) { - CopyVector(ns_, curr_state, states[mod_t]); - CopyVector(ns_, curr_output, outputs[mod_t]); - } - // Always zero the states at the end of every row, but only for the major - // direction. The 2-D state remains intact. - if (src_index.IsLast(FD_WIDTH)) { - ZeroVector(ns_, curr_state); - ZeroVector(ns_, curr_output); - } - } while (src_index.Increment()); -#if DEBUG_DETAIL > 0 - tprintf("Source:%s\n", name_.string()); - source_.Print(10); - tprintf("State:%s\n", name_.string()); - state_.Print(10); - tprintf("Output:%s\n", name_.string()); - output->Print(10); -#endif - if (debug) DisplayForward(*output); -} - -// Runs backward propagation of errors on the deltas line. -// See NetworkCpp for a detailed discussion of the arguments. -bool LSTM::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { - if (debug) DisplayBackward(fwd_deltas); - back_deltas->ResizeToMap(fwd_deltas.int_mode(), input_map_, ni_); - // ======Scratch space.====== - // Output errors from deltas with recurrence from sourceerr. - NetworkScratch::FloatVec outputerr; - outputerr.Init(ns_, scratch); - // Recurrent error in the state/source. - NetworkScratch::FloatVec curr_stateerr, curr_sourceerr; - curr_stateerr.Init(ns_, scratch); - curr_sourceerr.Init(na_, scratch); - ZeroVector(ns_, curr_stateerr); - ZeroVector(na_, curr_sourceerr); - // Errors in the gates. - NetworkScratch::FloatVec gate_errors[WT_COUNT]; - for (int g = 0; g < WT_COUNT; ++g) gate_errors[g].Init(ns_, scratch); - // Rotating buffers of width buf_width allow storage of the recurrent time- - // steps used only for true 2-D. Stores one full strip of the major direction. - int buf_width = Is2D() ? input_map_.Size(FD_WIDTH) : 1; - GenericVector stateerr, sourceerr; - if (Is2D()) { - stateerr.init_to_size(buf_width, NetworkScratch::FloatVec()); - sourceerr.init_to_size(buf_width, NetworkScratch::FloatVec()); - for (int t = 0; t < buf_width; ++t) { - stateerr[t].Init(ns_, scratch); - sourceerr[t].Init(na_, scratch); - ZeroVector(ns_, stateerr[t]); - ZeroVector(na_, sourceerr[t]); - } - } - // Parallel-generated sourceerr from each of the gates. - NetworkScratch::FloatVec sourceerr_temps[WT_COUNT]; - for (int w = 0; w < WT_COUNT; ++w) - sourceerr_temps[w].Init(na_, scratch); - int width = input_width_; - // Transposed gate errors stored over all timesteps for sum outer. - NetworkScratch::GradientStore gate_errors_t[WT_COUNT]; - for (int w = 0; w < WT_COUNT; ++w) { - gate_errors_t[w].Init(ns_, width, scratch); - } - // Used only if softmax_ != nullptr. - NetworkScratch::FloatVec softmax_errors; - NetworkScratch::GradientStore softmax_errors_t; - if (softmax_ != nullptr) { - softmax_errors.Init(no_, scratch); - softmax_errors_t.Init(no_, width, scratch); - } - double state_clip = Is2D() ? 9.0 : 4.0; -#if DEBUG_DETAIL > 1 - tprintf("fwd_deltas:%s\n", name_.string()); - fwd_deltas.Print(10); -#endif - StrideMap::Index dest_index(input_map_); - dest_index.InitToLast(); - // Used only by NT_LSTM_SUMMARY. - StrideMap::Index src_index(fwd_deltas.stride_map()); - src_index.InitToLast(); - do { - int t = dest_index.t(); - bool at_last_x = dest_index.IsLast(FD_WIDTH); - // up_pos is the 2-D back step, down_pos is the 2-D fwd step, and are only - // valid if >= 0, which is true if 2d and not on the top/bottom. - int up_pos = -1; - int down_pos = -1; - if (Is2D()) { - if (dest_index.index(FD_HEIGHT) > 0) { - StrideMap::Index up_index(dest_index); - if (up_index.AddOffset(-1, FD_HEIGHT)) up_pos = up_index.t(); - } - if (!dest_index.IsLast(FD_HEIGHT)) { - StrideMap::Index down_index(dest_index); - if (down_index.AddOffset(1, FD_HEIGHT)) down_pos = down_index.t(); - } - } - // Index of the 2-D revolving buffers (sourceerr, stateerr). - int mod_t = Modulo(t, buf_width); // Current timestep. - // Zero the state in the major direction only at the end of every row. - if (at_last_x) { - ZeroVector(na_, curr_sourceerr); - ZeroVector(ns_, curr_stateerr); - } - // Setup the outputerr. - if (type_ == NT_LSTM_SUMMARY) { - if (dest_index.IsLast(FD_WIDTH)) { - fwd_deltas.ReadTimeStep(src_index.t(), outputerr); - src_index.Decrement(); - } else { - ZeroVector(ns_, outputerr); - } - } else if (softmax_ == nullptr) { - fwd_deltas.ReadTimeStep(t, outputerr); - } else { - softmax_->BackwardTimeStep(fwd_deltas, t, softmax_errors, - softmax_errors_t.get(), outputerr); - } - if (!at_last_x) - AccumulateVector(ns_, curr_sourceerr + ni_ + nf_, outputerr); - if (down_pos >= 0) - AccumulateVector(ns_, sourceerr[mod_t] + ni_ + nf_ + ns_, outputerr); - // Apply the 1-d forget gates. - if (!at_last_x) { - const float* next_node_gf1 = node_values_[GF1].f(t + 1); - for (int i = 0; i < ns_; ++i) { - curr_stateerr[i] *= next_node_gf1[i]; - } - } - if (Is2D() && t + 1 < width) { - for (int i = 0; i < ns_; ++i) { - if (which_fg_[t + 1][i] != 1) curr_stateerr[i] = 0.0; - } - if (down_pos >= 0) { - const float* right_node_gfs = node_values_[GFS].f(down_pos); - const double* right_stateerr = stateerr[mod_t]; - for (int i = 0; i < ns_; ++i) { - if (which_fg_[down_pos][i] == 2) { - curr_stateerr[i] += right_stateerr[i] * right_node_gfs[i]; - } - } - } - } - state_.FuncMultiply3Add(node_values_[GO], t, outputerr, - curr_stateerr); - // Clip stateerr_ to a sane range. - ClipVector(ns_, -state_clip, state_clip, curr_stateerr); -#if DEBUG_DETAIL > 1 - if (t + 10 > width) { - tprintf("t=%d, stateerr=", t); - for (int i = 0; i < ns_; ++i) - tprintf(" %g,%g,%g", curr_stateerr[i], outputerr[i], - curr_sourceerr[ni_ + nf_ + i]); - tprintf("\n"); - } -#endif - // Matrix multiply to get the source errors. - PARALLEL_IF_OPENMP(GFS) - - // Cell inputs. - node_values_[CI].FuncMultiply3(t, node_values_[GI], t, - curr_stateerr, gate_errors[CI]); - ClipVector(ns_, -kErrClip, kErrClip, gate_errors[CI].get()); - gate_weights_[CI].VectorDotMatrix(gate_errors[CI], sourceerr_temps[CI]); - gate_errors_t[CI].get()->WriteStrided(t, gate_errors[CI]); - - SECTION_IF_OPENMP - // Input Gates. - node_values_[GI].FuncMultiply3(t, node_values_[CI], t, - curr_stateerr, gate_errors[GI]); - ClipVector(ns_, -kErrClip, kErrClip, gate_errors[GI].get()); - gate_weights_[GI].VectorDotMatrix(gate_errors[GI], sourceerr_temps[GI]); - gate_errors_t[GI].get()->WriteStrided(t, gate_errors[GI]); - - SECTION_IF_OPENMP - // 1-D forget Gates. - if (t > 0) { - node_values_[GF1].FuncMultiply3(t, state_, t - 1, curr_stateerr, - gate_errors[GF1]); - ClipVector(ns_, -kErrClip, kErrClip, gate_errors[GF1].get()); - gate_weights_[GF1].VectorDotMatrix(gate_errors[GF1], - sourceerr_temps[GF1]); - } else { - memset(gate_errors[GF1], 0, ns_ * sizeof(gate_errors[GF1][0])); - memset(sourceerr_temps[GF1], 0, na_ * sizeof(*sourceerr_temps[GF1])); - } - gate_errors_t[GF1].get()->WriteStrided(t, gate_errors[GF1]); - - // 2-D forget Gates. - if (up_pos >= 0) { - node_values_[GFS].FuncMultiply3(t, state_, up_pos, curr_stateerr, - gate_errors[GFS]); - ClipVector(ns_, -kErrClip, kErrClip, gate_errors[GFS].get()); - gate_weights_[GFS].VectorDotMatrix(gate_errors[GFS], - sourceerr_temps[GFS]); - } else { - memset(gate_errors[GFS], 0, ns_ * sizeof(gate_errors[GFS][0])); - memset(sourceerr_temps[GFS], 0, na_ * sizeof(*sourceerr_temps[GFS])); - } - if (Is2D()) gate_errors_t[GFS].get()->WriteStrided(t, gate_errors[GFS]); - - SECTION_IF_OPENMP - // Output gates. - state_.Func2Multiply3(node_values_[GO], t, outputerr, - gate_errors[GO]); - ClipVector(ns_, -kErrClip, kErrClip, gate_errors[GO].get()); - gate_weights_[GO].VectorDotMatrix(gate_errors[GO], sourceerr_temps[GO]); - gate_errors_t[GO].get()->WriteStrided(t, gate_errors[GO]); - END_PARALLEL_IF_OPENMP - - SumVectors(na_, sourceerr_temps[CI], sourceerr_temps[GI], - sourceerr_temps[GF1], sourceerr_temps[GO], sourceerr_temps[GFS], - curr_sourceerr); - back_deltas->WriteTimeStep(t, curr_sourceerr); - // Save states for use by the 2nd dimension only if needed. - if (Is2D()) { - CopyVector(ns_, curr_stateerr, stateerr[mod_t]); - CopyVector(na_, curr_sourceerr, sourceerr[mod_t]); - } - } while (dest_index.Decrement()); -#if DEBUG_DETAIL > 2 - for (int w = 0; w < WT_COUNT; ++w) { - tprintf("%s gate errors[%d]\n", name_.string(), w); - gate_errors_t[w].get()->PrintUnTransposed(10); - } -#endif - // Transposed source_ used to speed-up SumOuter. - NetworkScratch::GradientStore source_t, state_t; - source_t.Init(na_, width, scratch); - source_.Transpose(source_t.get()); - state_t.Init(ns_, width, scratch); - state_.Transpose(state_t.get()); -#ifdef _OPENMP -#pragma omp parallel for num_threads(GFS) if (!Is2D()) -#endif - for (int w = 0; w < WT_COUNT; ++w) { - if (w == GFS && !Is2D()) continue; - gate_weights_[w].SumOuterTransposed(*gate_errors_t[w], *source_t, false); - } - if (softmax_ != nullptr) { - softmax_->FinishBackward(*softmax_errors_t); - } - return needs_to_backprop_; -} - -// Updates the weights using the given learning rate, momentum and adam_beta. -// num_samples is used in the adam computation iff use_adam_ is true. -void LSTM::Update(float learning_rate, float momentum, float adam_beta, - int num_samples) { -#if DEBUG_DETAIL > 3 - PrintW(); -#endif - for (int w = 0; w < WT_COUNT; ++w) { - if (w == GFS && !Is2D()) continue; - gate_weights_[w].Update(learning_rate, momentum, adam_beta, num_samples); - } - if (softmax_ != nullptr) { - softmax_->Update(learning_rate, momentum, adam_beta, num_samples); - } -#if DEBUG_DETAIL > 3 - PrintDW(); -#endif -} - -// Sums the products of weight updates in *this and other, splitting into -// positive (same direction) in *same and negative (different direction) in -// *changed. -void LSTM::CountAlternators(const Network& other, double* same, - double* changed) const { - ASSERT_HOST(other.type() == type_); - const LSTM* lstm = static_cast(&other); - for (int w = 0; w < WT_COUNT; ++w) { - if (w == GFS && !Is2D()) continue; - gate_weights_[w].CountAlternators(lstm->gate_weights_[w], same, changed); - } - if (softmax_ != nullptr) { - softmax_->CountAlternators(*lstm->softmax_, same, changed); - } -} - -// Prints the weights for debug purposes. -void LSTM::PrintW() { - tprintf("Weight state:%s\n", name_.string()); - for (int w = 0; w < WT_COUNT; ++w) { - if (w == GFS && !Is2D()) continue; - tprintf("Gate %d, inputs\n", w); - for (int i = 0; i < ni_; ++i) { - tprintf("Row %d:", i); - for (int s = 0; s < ns_; ++s) - tprintf(" %g", gate_weights_[w].GetWeights(s)[i]); - tprintf("\n"); - } - tprintf("Gate %d, outputs\n", w); - for (int i = ni_; i < ni_ + ns_; ++i) { - tprintf("Row %d:", i - ni_); - for (int s = 0; s < ns_; ++s) - tprintf(" %g", gate_weights_[w].GetWeights(s)[i]); - tprintf("\n"); - } - tprintf("Gate %d, bias\n", w); - for (int s = 0; s < ns_; ++s) - tprintf(" %g", gate_weights_[w].GetWeights(s)[na_]); - tprintf("\n"); - } -} - -// Prints the weight deltas for debug purposes. -void LSTM::PrintDW() { - tprintf("Delta state:%s\n", name_.string()); - for (int w = 0; w < WT_COUNT; ++w) { - if (w == GFS && !Is2D()) continue; - tprintf("Gate %d, inputs\n", w); - for (int i = 0; i < ni_; ++i) { - tprintf("Row %d:", i); - for (int s = 0; s < ns_; ++s) - tprintf(" %g", gate_weights_[w].GetDW(s, i)); - tprintf("\n"); - } - tprintf("Gate %d, outputs\n", w); - for (int i = ni_; i < ni_ + ns_; ++i) { - tprintf("Row %d:", i - ni_); - for (int s = 0; s < ns_; ++s) - tprintf(" %g", gate_weights_[w].GetDW(s, i)); - tprintf("\n"); - } - tprintf("Gate %d, bias\n", w); - for (int s = 0; s < ns_; ++s) - tprintf(" %g", gate_weights_[w].GetDW(s, na_)); - tprintf("\n"); - } -} - -// Resizes forward data to cope with an input image of the given width. -void LSTM::ResizeForward(const NetworkIO& input) { - int rounded_inputs = gate_weights_[CI].RoundInputs(na_); - source_.Resize(input, rounded_inputs); - which_fg_.ResizeNoInit(input.Width(), ns_); - if (IsTraining()) { - state_.ResizeFloat(input, ns_); - for (int w = 0; w < WT_COUNT; ++w) { - if (w == GFS && !Is2D()) continue; - node_values_[w].ResizeFloat(input, ns_); - } - } -} - - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstm.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstm.h deleted file mode 100644 index bf73affe..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstm.h +++ /dev/null @@ -1,162 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: lstm.h -// Description: Long-term-short-term-memory Recurrent neural network. -// Author: Ray Smith -// Created: Wed May 01 17:33:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_LSTM_H_ -#define TESSERACT_LSTM_LSTM_H_ - -#include "network.h" -#include "fullyconnected.h" - -namespace tesseract { - -// C++ Implementation of the LSTM class from lstm.py. -class LSTM : public Network { - public: - // Enum for the different weights in LSTM, to reduce some of the I/O and - // setup code to loops. The elements of the enum correspond to elements of an - // array of WeightMatrix or a corresponding array of NetworkIO. - enum WeightType { - CI, // Cell Inputs. - GI, // Gate at the input. - GF1, // Forget gate at the memory (1-d or looking back 1 timestep). - GO, // Gate at the output. - GFS, // Forget gate at the memory, looking back in the other dimension. - - WT_COUNT // Number of WeightTypes. - }; - - // Constructor for NT_LSTM (regular 1 or 2-d LSTM), NT_LSTM_SOFTMAX (LSTM with - // additional softmax layer included and fed back into the input at the next - // timestep), or NT_LSTM_SOFTMAX_ENCODED (as LSTM_SOFTMAX, but the feedback - // is binary encoded instead of categorical) only. - // 2-d and bidi softmax LSTMs are not rejected, but are impossible to build - // in the conventional way because the output feedback both forwards and - // backwards in time does become impossible. - LSTM(const STRING& name, int num_inputs, int num_states, int num_outputs, - bool two_dimensional, NetworkType type); - virtual ~LSTM(); - - // Returns the shape output from the network given an input shape (which may - // be partially unknown ie zero). - StaticShape OutputShape(const StaticShape& input_shape) const override; - - STRING spec() const override { - STRING spec; - if (type_ == NT_LSTM) - spec.add_str_int("Lfx", ns_); - else if (type_ == NT_LSTM_SUMMARY) - spec.add_str_int("Lfxs", ns_); - else if (type_ == NT_LSTM_SOFTMAX) - spec.add_str_int("LS", ns_); - else if (type_ == NT_LSTM_SOFTMAX_ENCODED) - spec.add_str_int("LE", ns_); - if (softmax_ != nullptr) spec += softmax_->spec(); - return spec; - } - - // Suspends/Enables training by setting the training_ flag. Serialize and - // DeSerialize only operate on the run-time data if state is false. - void SetEnableTraining(TrainingState state) override; - - // Sets up the network for training. Initializes weights using weights of - // scale `range` picked according to the random number generator `randomizer`. - int InitWeights(float range, TRand* randomizer) override; - // Recursively searches the network for softmaxes with old_no outputs, - // and remaps their outputs according to code_map. See network.h for details. - int RemapOutputs(int old_no, const std::vector& code_map) override; - - // Converts a float network to an int network. - void ConvertToInt() override; - - // Provides debug output on the weights. - void DebugWeights() override; - - // Writes to the given file. Returns false in case of error. - bool Serialize(TFile* fp) const override; - // Reads from the given file. Returns false in case of error. - bool DeSerialize(TFile* fp) override; - - // Runs forward propagation of activations on the input line. - // See Network for a detailed discussion of the arguments. - void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, NetworkScratch* scratch, - NetworkIO* output) override; - - // Runs backward propagation of errors on the deltas line. - // See Network for a detailed discussion of the arguments. - bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, NetworkIO* back_deltas) override; - // Updates the weights using the given learning rate, momentum and adam_beta. - // num_samples is used in the adam computation iff use_adam_ is true. - void Update(float learning_rate, float momentum, float adam_beta, - int num_samples) override; - // Sums the products of weight updates in *this and other, splitting into - // positive (same direction) in *same and negative (different direction) in - // *changed. - void CountAlternators(const Network& other, double* same, - double* changed) const override; - // Prints the weights for debug purposes. - void PrintW(); - // Prints the weight deltas for debug purposes. - void PrintDW(); - - // Returns true of this is a 2-d lstm. - bool Is2D() const { - return is_2d_; - } - - private: - // Resizes forward data to cope with an input image of the given width. - void ResizeForward(const NetworkIO& input); - - private: - // Size of padded input to weight matrices = ni_ + no_ for 1-D operation - // and ni_ + 2 * no_ for 2-D operation. Note that there is a phantom 1 input - // for the bias that makes the weight matrices of size [na + 1][no]. - int32_t na_; - // Number of internal states. Equal to no_ except for a softmax LSTM. - // ns_ is NOT serialized, but is calculated from gate_weights_. - int32_t ns_; - // Number of additional feedback states. The softmax types feed back - // additional output information on top of the ns_ internal states. - // In the case of a binary-coded (EMBEDDED) softmax, nf_ < no_. - int32_t nf_; - // Flag indicating 2-D operation. - bool is_2d_; - - // Gate weight arrays of size [na + 1, no]. - WeightMatrix gate_weights_[WT_COUNT]; - // Used only if this is a softmax LSTM. - FullyConnected* softmax_; - // Input padded with previous output of size [width, na]. - NetworkIO source_; - // Internal state used during forward operation, of size [width, ns]. - NetworkIO state_; - // State of the 2-d maxpool, generated during forward, used during backward. - GENERIC_2D_ARRAY which_fg_; - // Internal state saved from forward, but used only during backward. - NetworkIO node_values_[WT_COUNT]; - // Preserved input stride_map used for Backward when NT_LSTM_SQUASHED. - StrideMap input_map_; - int input_width_; -}; - -} // namespace tesseract. - - -#endif // TESSERACT_LSTM_LSTM_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstmrecognizer.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstmrecognizer.cpp deleted file mode 100644 index acbc36f1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstmrecognizer.cpp +++ /dev/null @@ -1,517 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: lstmrecognizer.cpp -// Description: Top-level line recognizer class for LSTM-based networks. -// Author: Ray Smith -// Created: Thu May 02 10:59:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "lstmrecognizer.h" - -#include "allheaders.h" -#include "callcpp.h" -#include "dict.h" -#include "genericheap.h" -#include "helpers.h" -#include "imagedata.h" -#include "input.h" -#include "lstm.h" -#include "normalis.h" -#include "pageres.h" -#include "ratngs.h" -#include "recodebeam.h" -#include "scrollview.h" -#include "statistc.h" -#include "tprintf.h" - -namespace tesseract { - -// Default ratio between dict and non-dict words. -const double kDictRatio = 2.25; -// Default certainty offset to give the dictionary a chance. -const double kCertOffset = -0.085; - -LSTMRecognizer::LSTMRecognizer() - : network_(nullptr), - training_flags_(0), - training_iteration_(0), - sample_iteration_(0), - null_char_(UNICHAR_BROKEN), - learning_rate_(0.0f), - momentum_(0.0f), - adam_beta_(0.0f), - dict_(nullptr), - search_(nullptr), - debug_win_(nullptr) {} - -LSTMRecognizer::~LSTMRecognizer() { - delete network_; - delete dict_; - delete search_; -} - -// Loads a model from mgr, including the dictionary only if lang is not null. -bool LSTMRecognizer::Load(const char* lang, TessdataManager* mgr) { - TFile fp; - if (!mgr->GetComponent(TESSDATA_LSTM, &fp)) return false; - if (!DeSerialize(mgr, &fp)) return false; - if (lang == nullptr) return true; - // Allow it to run without a dictionary. - LoadDictionary(lang, mgr); - return true; -} - -// Writes to the given file. Returns false in case of error. -bool LSTMRecognizer::Serialize(const TessdataManager* mgr, TFile* fp) const { - bool include_charsets = mgr == nullptr || - !mgr->IsComponentAvailable(TESSDATA_LSTM_RECODER) || - !mgr->IsComponentAvailable(TESSDATA_LSTM_UNICHARSET); - if (!network_->Serialize(fp)) return false; - if (include_charsets && !GetUnicharset().save_to_file(fp)) return false; - if (!network_str_.Serialize(fp)) return false; - if (!fp->Serialize(&training_flags_)) return false; - if (!fp->Serialize(&training_iteration_)) return false; - if (!fp->Serialize(&sample_iteration_)) return false; - if (!fp->Serialize(&null_char_)) return false; - if (!fp->Serialize(&adam_beta_)) return false; - if (!fp->Serialize(&learning_rate_)) return false; - if (!fp->Serialize(&momentum_)) return false; - if (include_charsets && IsRecoding() && !recoder_.Serialize(fp)) return false; - return true; -} - -// Reads from the given file. Returns false in case of error. -bool LSTMRecognizer::DeSerialize(const TessdataManager* mgr, TFile* fp) { - delete network_; - network_ = Network::CreateFromFile(fp); - if (network_ == nullptr) return false; - bool include_charsets = mgr == nullptr || - !mgr->IsComponentAvailable(TESSDATA_LSTM_RECODER) || - !mgr->IsComponentAvailable(TESSDATA_LSTM_UNICHARSET); - if (include_charsets && !ccutil_.unicharset.load_from_file(fp, false)) - return false; - if (!network_str_.DeSerialize(fp)) return false; - if (!fp->DeSerialize(&training_flags_)) return false; - if (!fp->DeSerialize(&training_iteration_)) return false; - if (!fp->DeSerialize(&sample_iteration_)) return false; - if (!fp->DeSerialize(&null_char_)) return false; - if (!fp->DeSerialize(&adam_beta_)) return false; - if (!fp->DeSerialize(&learning_rate_)) return false; - if (!fp->DeSerialize(&momentum_)) return false; - if (include_charsets && !LoadRecoder(fp)) return false; - if (!include_charsets && !LoadCharsets(mgr)) return false; - network_->SetRandomizer(&randomizer_); - network_->CacheXScaleFactor(network_->XScaleFactor()); - return true; -} - -// Loads the charsets from mgr. -bool LSTMRecognizer::LoadCharsets(const TessdataManager* mgr) { - TFile fp; - if (!mgr->GetComponent(TESSDATA_LSTM_UNICHARSET, &fp)) return false; - if (!ccutil_.unicharset.load_from_file(&fp, false)) return false; - if (!mgr->GetComponent(TESSDATA_LSTM_RECODER, &fp)) return false; - if (!LoadRecoder(&fp)) return false; - return true; -} - -// Loads the Recoder. -bool LSTMRecognizer::LoadRecoder(TFile* fp) { - if (IsRecoding()) { - if (!recoder_.DeSerialize(fp)) return false; - RecodedCharID code; - recoder_.EncodeUnichar(UNICHAR_SPACE, &code); - if (code(0) != UNICHAR_SPACE) { - tprintf("Space was garbled in recoding!!\n"); - return false; - } - } else { - recoder_.SetupPassThrough(GetUnicharset()); - training_flags_ |= TF_COMPRESS_UNICHARSET; - } - return true; -} - -// Loads the dictionary if possible from the traineddata file. -// Prints a warning message, and returns false but otherwise fails silently -// and continues to work without it if loading fails. -// Note that dictionary load is independent from DeSerialize, but dependent -// on the unicharset matching. This enables training to deserialize a model -// from checkpoint or restore without having to go back and reload the -// dictionary. -bool LSTMRecognizer::LoadDictionary(const char* lang, TessdataManager* mgr) { - delete dict_; - dict_ = new Dict(&ccutil_); - dict_->SetupForLoad(Dict::GlobalDawgCache()); - dict_->LoadLSTM(lang, mgr); - if (dict_->FinishLoad()) return true; // Success. - tprintf("Failed to load any lstm-specific dictionaries for lang %s!!\n", - lang); - delete dict_; - dict_ = nullptr; - return false; -} - -// Recognizes the line image, contained within image_data, returning the -// ratings matrix and matching box_word for each WERD_RES in the output. -void LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert, - bool debug, double worst_dict_cert, - const TBOX& line_box, - PointerVector* words, - int lstm_choice_mode) { - NetworkIO outputs; - float scale_factor; - NetworkIO inputs; - if (!RecognizeLine(image_data, invert, debug, false, false, &scale_factor, - &inputs, &outputs)) - return; - if (search_ == nullptr) { - search_ = - new RecodeBeamSearch(recoder_, null_char_, SimpleTextOutput(), dict_); - } - search_->Decode(outputs, kDictRatio, kCertOffset, worst_dict_cert, - &GetUnicharset(), lstm_choice_mode); - search_->ExtractBestPathAsWords(line_box, scale_factor, debug, - &GetUnicharset(), words, lstm_choice_mode); -} - -// Helper computes min and mean best results in the output. -void LSTMRecognizer::OutputStats(const NetworkIO& outputs, float* min_output, - float* mean_output, float* sd) { - const int kOutputScale = INT8_MAX; - STATS stats(0, kOutputScale + 1); - for (int t = 0; t < outputs.Width(); ++t) { - int best_label = outputs.BestLabel(t, nullptr); - if (best_label != null_char_) { - float best_output = outputs.f(t)[best_label]; - stats.add(static_cast(kOutputScale * best_output), 1); - } - } - // If the output is all nulls it could be that the photometric interpretation - // is wrong, so make it look bad, so the other way can win, even if not great. - if (stats.get_total() == 0) { - *min_output = 0.0f; - *mean_output = 0.0f; - *sd = 1.0f; - } else { - *min_output = static_cast(stats.min_bucket()) / kOutputScale; - *mean_output = stats.mean() / kOutputScale; - *sd = stats.sd() / kOutputScale; - } -} - -// Recognizes the image_data, returning the labels, -// scores, and corresponding pairs of start, end x-coords in coords. -bool LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert, - bool debug, bool re_invert, bool upside_down, - float* scale_factor, NetworkIO* inputs, - NetworkIO* outputs) { - // Maximum width of image to train on. - const int kMaxImageWidth = 2560; - // This ensures consistent recognition results. - SetRandomSeed(); - int min_width = network_->XScaleFactor(); - Pix* pix = Input::PrepareLSTMInputs(image_data, network_, min_width, - &randomizer_, scale_factor); - if (pix == nullptr) { - tprintf("Line cannot be recognized!!\n"); - return false; - } - if (network_->IsTraining() && pixGetWidth(pix) > kMaxImageWidth) { - tprintf("Image too large to learn!! Size = %dx%d\n", pixGetWidth(pix), - pixGetHeight(pix)); - pixDestroy(&pix); - return false; - } - if (upside_down) pixRotate180(pix, pix); - // Reduction factor from image to coords. - *scale_factor = min_width / *scale_factor; - inputs->set_int_mode(IsIntMode()); - SetRandomSeed(); - Input::PreparePixInput(network_->InputShape(), pix, &randomizer_, inputs); - network_->Forward(debug, *inputs, nullptr, &scratch_space_, outputs); - // Check for auto inversion. - float pos_min, pos_mean, pos_sd; - OutputStats(*outputs, &pos_min, &pos_mean, &pos_sd); - if (invert && pos_min < 0.5) { - // Run again inverted and see if it is any better. - NetworkIO inv_inputs, inv_outputs; - inv_inputs.set_int_mode(IsIntMode()); - SetRandomSeed(); - pixInvert(pix, pix); - Input::PreparePixInput(network_->InputShape(), pix, &randomizer_, - &inv_inputs); - network_->Forward(debug, inv_inputs, nullptr, &scratch_space_, &inv_outputs); - float inv_min, inv_mean, inv_sd; - OutputStats(inv_outputs, &inv_min, &inv_mean, &inv_sd); - if (inv_min > pos_min && inv_mean > pos_mean && inv_sd < pos_sd) { - // Inverted did better. Use inverted data. - if (debug) { - tprintf("Inverting image: old min=%g, mean=%g, sd=%g, inv %g,%g,%g\n", - pos_min, pos_mean, pos_sd, inv_min, inv_mean, inv_sd); - } - *outputs = inv_outputs; - *inputs = inv_inputs; - } else if (re_invert) { - // Inverting was not an improvement, so undo and run again, so the - // outputs match the best forward result. - SetRandomSeed(); - network_->Forward(debug, *inputs, nullptr, &scratch_space_, outputs); - } - } - pixDestroy(&pix); - if (debug) { - GenericVector labels, coords; - LabelsFromOutputs(*outputs, &labels, &coords); - DisplayForward(*inputs, labels, coords, "LSTMForward", &debug_win_); - DebugActivationPath(*outputs, labels, coords); - } - return true; -} - -// Converts an array of labels to utf-8, whether or not the labels are -// augmented with character boundaries. -STRING LSTMRecognizer::DecodeLabels(const GenericVector& labels) { - STRING result; - int end = 1; - for (int start = 0; start < labels.size(); start = end) { - if (labels[start] == null_char_) { - end = start + 1; - } else { - result += DecodeLabel(labels, start, &end, nullptr); - } - } - return result; -} - -// Displays the forward results in a window with the characters and -// boundaries as determined by the labels and label_coords. -void LSTMRecognizer::DisplayForward(const NetworkIO& inputs, - const GenericVector& labels, - const GenericVector& label_coords, - const char* window_name, - ScrollView** window) { -#ifndef GRAPHICS_DISABLED // do nothing if there's no graphics - Pix* input_pix = inputs.ToPix(); - Network::ClearWindow(false, window_name, pixGetWidth(input_pix), - pixGetHeight(input_pix), window); - int line_height = Network::DisplayImage(input_pix, *window); - DisplayLSTMOutput(labels, label_coords, line_height, *window); -#endif // GRAPHICS_DISABLED -} - -// Displays the labels and cuts at the corresponding xcoords. -// Size of labels should match xcoords. -void LSTMRecognizer::DisplayLSTMOutput(const GenericVector& labels, - const GenericVector& xcoords, - int height, ScrollView* window) { -#ifndef GRAPHICS_DISABLED // do nothing if there's no graphics - int x_scale = network_->XScaleFactor(); - window->TextAttributes("Arial", height / 4, false, false, false); - int end = 1; - for (int start = 0; start < labels.size(); start = end) { - int xpos = xcoords[start] * x_scale; - if (labels[start] == null_char_) { - end = start + 1; - window->Pen(ScrollView::RED); - } else { - window->Pen(ScrollView::GREEN); - const char* str = DecodeLabel(labels, start, &end, nullptr); - if (*str == '\\') str = "\\\\"; - xpos = xcoords[(start + end) / 2] * x_scale; - window->Text(xpos, height, str); - } - window->Line(xpos, 0, xpos, height * 3 / 2); - } - window->Update(); -#endif // GRAPHICS_DISABLED -} - -// Prints debug output detailing the activation path that is implied by the -// label_coords. -void LSTMRecognizer::DebugActivationPath(const NetworkIO& outputs, - const GenericVector& labels, - const GenericVector& xcoords) { - if (xcoords[0] > 0) - DebugActivationRange(outputs, "", null_char_, 0, xcoords[0]); - int end = 1; - for (int start = 0; start < labels.size(); start = end) { - if (labels[start] == null_char_) { - end = start + 1; - DebugActivationRange(outputs, "", null_char_, xcoords[start], - xcoords[end]); - continue; - } else { - int decoded; - const char* label = DecodeLabel(labels, start, &end, &decoded); - DebugActivationRange(outputs, label, labels[start], xcoords[start], - xcoords[start + 1]); - for (int i = start + 1; i < end; ++i) { - DebugActivationRange(outputs, DecodeSingleLabel(labels[i]), labels[i], - xcoords[i], xcoords[i + 1]); - } - } - } -} - -// Prints debug output detailing activations and 2nd choice over a range -// of positions. -void LSTMRecognizer::DebugActivationRange(const NetworkIO& outputs, - const char* label, int best_choice, - int x_start, int x_end) { - tprintf("%s=%d On [%d, %d), scores=", label, best_choice, x_start, x_end); - double max_score = 0.0; - double mean_score = 0.0; - const int width = x_end - x_start; - for (int x = x_start; x < x_end; ++x) { - const float* line = outputs.f(x); - const double score = line[best_choice] * 100.0; - if (score > max_score) max_score = score; - mean_score += score / width; - int best_c = 0; - double best_score = 0.0; - for (int c = 0; c < outputs.NumFeatures(); ++c) { - if (c != best_choice && line[c] > best_score) { - best_c = c; - best_score = line[c]; - } - } - tprintf(" %.3g(%s=%d=%.3g)", score, DecodeSingleLabel(best_c), best_c, - best_score * 100.0); - } - tprintf(", Mean=%g, max=%g\n", mean_score, max_score); -} - -// Helper returns true if the null_char is the winner at t, and it beats the -// null_threshold, or the next choice is space, in which case we will use the -// null anyway. -#if 0 // TODO: unused, remove if still unused after 2020. -static bool NullIsBest(const NetworkIO& output, float null_thr, - int null_char, int t) { - if (output.f(t)[null_char] >= null_thr) return true; - if (output.BestLabel(t, null_char, null_char, nullptr) != UNICHAR_SPACE) - return false; - return output.f(t)[null_char] > output.f(t)[UNICHAR_SPACE]; -} -#endif - -// Converts the network output to a sequence of labels. Outputs labels, scores -// and start xcoords of each char, and each null_char_, with an additional -// final xcoord for the end of the output. -// The conversion method is determined by internal state. -void LSTMRecognizer::LabelsFromOutputs(const NetworkIO& outputs, - GenericVector* labels, - GenericVector* xcoords) { - if (SimpleTextOutput()) { - LabelsViaSimpleText(outputs, labels, xcoords); - } else { - LabelsViaReEncode(outputs, labels, xcoords); - } -} - -// As LabelsViaCTC except that this function constructs the best path that -// contains only legal sequences of subcodes for CJK. -void LSTMRecognizer::LabelsViaReEncode(const NetworkIO& output, - GenericVector* labels, - GenericVector* xcoords) { - if (search_ == nullptr) { - search_ = - new RecodeBeamSearch(recoder_, null_char_, SimpleTextOutput(), dict_); - } - search_->Decode(output, 1.0, 0.0, RecodeBeamSearch::kMinCertainty, nullptr); - search_->ExtractBestPathAsLabels(labels, xcoords); -} - -// Converts the network output to a sequence of labels, with scores, using -// the simple character model (each position is a char, and the null_char_ is -// mainly intended for tail padding.) -void LSTMRecognizer::LabelsViaSimpleText(const NetworkIO& output, - GenericVector* labels, - GenericVector* xcoords) { - labels->truncate(0); - xcoords->truncate(0); - const int width = output.Width(); - for (int t = 0; t < width; ++t) { - float score = 0.0f; - const int label = output.BestLabel(t, &score); - if (label != null_char_) { - labels->push_back(label); - xcoords->push_back(t); - } - } - xcoords->push_back(width); -} - -// Returns a string corresponding to the label starting at start. Sets *end -// to the next start and if non-null, *decoded to the unichar id. -const char* LSTMRecognizer::DecodeLabel(const GenericVector& labels, - int start, int* end, int* decoded) { - *end = start + 1; - if (IsRecoding()) { - // Decode labels via recoder_. - RecodedCharID code; - if (labels[start] == null_char_) { - if (decoded != nullptr) { - code.Set(0, null_char_); - *decoded = recoder_.DecodeUnichar(code); - } - return ""; - } - int index = start; - while (index < labels.size() && - code.length() < RecodedCharID::kMaxCodeLen) { - code.Set(code.length(), labels[index++]); - while (index < labels.size() && labels[index] == null_char_) ++index; - int uni_id = recoder_.DecodeUnichar(code); - // If the next label isn't a valid first code, then we need to continue - // extending even if we have a valid uni_id from this prefix. - if (uni_id != INVALID_UNICHAR_ID && - (index == labels.size() || - code.length() == RecodedCharID::kMaxCodeLen || - recoder_.IsValidFirstCode(labels[index]))) { - *end = index; - if (decoded != nullptr) *decoded = uni_id; - if (uni_id == UNICHAR_SPACE) return " "; - return GetUnicharset().get_normed_unichar(uni_id); - } - } - return ""; - } else { - if (decoded != nullptr) *decoded = labels[start]; - if (labels[start] == null_char_) return ""; - if (labels[start] == UNICHAR_SPACE) return " "; - return GetUnicharset().get_normed_unichar(labels[start]); - } -} - -// Returns a string corresponding to a given single label id, falling back to -// a default of ".." for part of a multi-label unichar-id. -const char* LSTMRecognizer::DecodeSingleLabel(int label) { - if (label == null_char_) return ""; - if (IsRecoding()) { - // Decode label via recoder_. - RecodedCharID code; - code.Set(0, label); - label = recoder_.DecodeUnichar(code); - if (label == INVALID_UNICHAR_ID) return ".."; // Part of a bigger code. - } - if (label == UNICHAR_SPACE) return " "; - return GetUnicharset().get_normed_unichar(label); -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstmrecognizer.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstmrecognizer.h deleted file mode 100644 index 75054b8e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstmrecognizer.h +++ /dev/null @@ -1,312 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: lstmrecognizer.h -// Description: Top-level line recognizer class for LSTM-based networks. -// Author: Ray Smith -// Created: Thu May 02 08:57:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_LSTMRECOGNIZER_H_ -#define TESSERACT_LSTM_LSTMRECOGNIZER_H_ - -#include "ccutil.h" -#include "helpers.h" -#include "imagedata.h" -#include "matrix.h" -#include "network.h" -#include "networkscratch.h" -#include "recodebeam.h" -#include "series.h" -#include "strngs.h" -#include "unicharcompress.h" - -class BLOB_CHOICE_IT; -struct Pix; -class ROW_RES; -class ScrollView; -class TBOX; -class WERD_RES; - -namespace tesseract { - -class Dict; -class ImageData; - -// Enum indicating training mode control flags. -enum TrainingFlags { - TF_INT_MODE = 1, - TF_COMPRESS_UNICHARSET = 64, -}; - -// Top-level line recognizer class for LSTM-based networks. -// Note that a sub-class, LSTMTrainer is used for training. -class LSTMRecognizer { - public: - LSTMRecognizer(); - ~LSTMRecognizer(); - - int NumOutputs() const { - return network_->NumOutputs(); - } - int training_iteration() const { - return training_iteration_; - } - int sample_iteration() const { - return sample_iteration_; - } - double learning_rate() const { - return learning_rate_; - } - LossType OutputLossType() const { - if (network_ == nullptr) return LT_NONE; - StaticShape shape; - shape = network_->OutputShape(shape); - return shape.loss_type(); - } - bool SimpleTextOutput() const { return OutputLossType() == LT_SOFTMAX; } - bool IsIntMode() const { return (training_flags_ & TF_INT_MODE) != 0; } - // True if recoder_ is active to re-encode text to a smaller space. - bool IsRecoding() const { - return (training_flags_ & TF_COMPRESS_UNICHARSET) != 0; - } - // Returns true if the network is a TensorFlow network. - bool IsTensorFlow() const { return network_->type() == NT_TENSORFLOW; } - // Returns a vector of layer ids that can be passed to other layer functions - // to access a specific layer. - GenericVector EnumerateLayers() const { - ASSERT_HOST(network_ != nullptr && network_->type() == NT_SERIES); - Series* series = static_cast(network_); - GenericVector layers; - series->EnumerateLayers(nullptr, &layers); - return layers; - } - // Returns a specific layer from its id (from EnumerateLayers). - Network* GetLayer(const STRING& id) const { - ASSERT_HOST(network_ != nullptr && network_->type() == NT_SERIES); - ASSERT_HOST(id.length() > 1 && id[0] == ':'); - Series* series = static_cast(network_); - return series->GetLayer(&id[1]); - } - // Returns the learning rate of the layer from its id. - float GetLayerLearningRate(const STRING& id) const { - ASSERT_HOST(network_ != nullptr && network_->type() == NT_SERIES); - if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) { - ASSERT_HOST(id.length() > 1 && id[0] == ':'); - Series* series = static_cast(network_); - return series->LayerLearningRate(&id[1]); - } else { - return learning_rate_; - } - } - // Multiplies the all the learning rate(s) by the given factor. - void ScaleLearningRate(double factor) { - ASSERT_HOST(network_ != nullptr && network_->type() == NT_SERIES); - learning_rate_ *= factor; - if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) { - GenericVector layers = EnumerateLayers(); - for (int i = 0; i < layers.size(); ++i) { - ScaleLayerLearningRate(layers[i], factor); - } - } - } - // Multiplies the learning rate of the layer with id, by the given factor. - void ScaleLayerLearningRate(const STRING& id, double factor) { - ASSERT_HOST(network_ != nullptr && network_->type() == NT_SERIES); - ASSERT_HOST(id.length() > 1 && id[0] == ':'); - Series* series = static_cast(network_); - series->ScaleLayerLearningRate(&id[1], factor); - } - - // Converts the network to int if not already. - void ConvertToInt() { - if ((training_flags_ & TF_INT_MODE) == 0) { - network_->ConvertToInt(); - training_flags_ |= TF_INT_MODE; - } - } - - // Provides access to the UNICHARSET that this classifier works with. - const UNICHARSET& GetUnicharset() const { return ccutil_.unicharset; } - // Provides access to the UnicharCompress that this classifier works with. - const UnicharCompress& GetRecoder() const { return recoder_; } - // Provides access to the Dict that this classifier works with. - const Dict* GetDict() const { return dict_; } - // Sets the sample iteration to the given value. The sample_iteration_ - // determines the seed for the random number generator. The training - // iteration is incremented only by a successful training iteration. - void SetIteration(int iteration) { - sample_iteration_ = iteration; - } - // Accessors for textline image normalization. - int NumInputs() const { - return network_->NumInputs(); - } - int null_char() const { return null_char_; } - - // Loads a model from mgr, including the dictionary only if lang is not null. - bool Load(const char* lang, TessdataManager* mgr); - - // Writes to the given file. Returns false in case of error. - // If mgr contains a unicharset and recoder, then they are not encoded to fp. - bool Serialize(const TessdataManager* mgr, TFile* fp) const; - // Reads from the given file. Returns false in case of error. - // If mgr contains a unicharset and recoder, then they are taken from there, - // otherwise, they are part of the serialization in fp. - bool DeSerialize(const TessdataManager* mgr, TFile* fp); - // Loads the charsets from mgr. - bool LoadCharsets(const TessdataManager* mgr); - // Loads the Recoder. - bool LoadRecoder(TFile* fp); - // Loads the dictionary if possible from the traineddata file. - // Prints a warning message, and returns false but otherwise fails silently - // and continues to work without it if loading fails. - // Note that dictionary load is independent from DeSerialize, but dependent - // on the unicharset matching. This enables training to deserialize a model - // from checkpoint or restore without having to go back and reload the - // dictionary. - bool LoadDictionary(const char* lang, TessdataManager* mgr); - - // Recognizes the line image, contained within image_data, returning the - // recognized tesseract WERD_RES for the words. - // If invert, tries inverted as well if the normal interpretation doesn't - // produce a good enough result. The line_box is used for computing the - // box_word in the output words. worst_dict_cert is the worst certainty that - // will be used in a dictionary word. - void RecognizeLine(const ImageData& image_data, bool invert, bool debug, - double worst_dict_cert, const TBOX& line_box, - PointerVector* words, int lstm_choice_mode = 0); - - // Helper computes min and mean best results in the output. - void OutputStats(const NetworkIO& outputs, - float* min_output, float* mean_output, float* sd); - // Recognizes the image_data, returning the labels, - // scores, and corresponding pairs of start, end x-coords in coords. - // Returned in scale_factor is the reduction factor - // between the image and the output coords, for computing bounding boxes. - // If re_invert is true, the input is inverted back to its original - // photometric interpretation if inversion is attempted but fails to - // improve the results. This ensures that outputs contains the correct - // forward outputs for the best photometric interpretation. - // inputs is filled with the used inputs to the network. - bool RecognizeLine(const ImageData& image_data, bool invert, bool debug, - bool re_invert, bool upside_down, float* scale_factor, - NetworkIO* inputs, NetworkIO* outputs); - - // Converts an array of labels to utf-8, whether or not the labels are - // augmented with character boundaries. - STRING DecodeLabels(const GenericVector& labels); - - // Displays the forward results in a window with the characters and - // boundaries as determined by the labels and label_coords. - void DisplayForward(const NetworkIO& inputs, - const GenericVector& labels, - const GenericVector& label_coords, - const char* window_name, - ScrollView** window); - // Converts the network output to a sequence of labels. Outputs labels, scores - // and start xcoords of each char, and each null_char_, with an additional - // final xcoord for the end of the output. - // The conversion method is determined by internal state. - void LabelsFromOutputs(const NetworkIO& outputs, GenericVector* labels, - GenericVector* xcoords); - - protected: - // Sets the random seed from the sample_iteration_; - void SetRandomSeed() { - int64_t seed = static_cast(sample_iteration_) * 0x10000001; - randomizer_.set_seed(seed); - randomizer_.IntRand(); - } - - // Displays the labels and cuts at the corresponding xcoords. - // Size of labels should match xcoords. - void DisplayLSTMOutput(const GenericVector& labels, - const GenericVector& xcoords, - int height, ScrollView* window); - - // Prints debug output detailing the activation path that is implied by the - // xcoords. - void DebugActivationPath(const NetworkIO& outputs, - const GenericVector& labels, - const GenericVector& xcoords); - - // Prints debug output detailing activations and 2nd choice over a range - // of positions. - void DebugActivationRange(const NetworkIO& outputs, const char* label, - int best_choice, int x_start, int x_end); - - // As LabelsViaCTC except that this function constructs the best path that - // contains only legal sequences of subcodes for recoder_. - void LabelsViaReEncode(const NetworkIO& output, GenericVector* labels, - GenericVector* xcoords); - // Converts the network output to a sequence of labels, with scores, using - // the simple character model (each position is a char, and the null_char_ is - // mainly intended for tail padding.) - void LabelsViaSimpleText(const NetworkIO& output, - GenericVector* labels, - GenericVector* xcoords); - - // Returns a string corresponding to the label starting at start. Sets *end - // to the next start and if non-null, *decoded to the unichar id. - const char* DecodeLabel(const GenericVector& labels, int start, int* end, - int* decoded); - - // Returns a string corresponding to a given single label id, falling back to - // a default of ".." for part of a multi-label unichar-id. - const char* DecodeSingleLabel(int label); - - protected: - // The network hierarchy. - Network* network_; - // The unicharset. Only the unicharset element is serialized. - // Has to be a CCUtil, so Dict can point to it. - CCUtil ccutil_; - // For backward compatibility, recoder_ is serialized iff - // training_flags_ & TF_COMPRESS_UNICHARSET. - // Further encode/decode ccutil_.unicharset's ids to simplify the unicharset. - UnicharCompress recoder_; - - // ==Training parameters that are serialized to provide a record of them.== - STRING network_str_; - // Flags used to determine the training method of the network. - // See enum TrainingFlags above. - int32_t training_flags_; - // Number of actual backward training steps used. - int32_t training_iteration_; - // Index into training sample set. sample_iteration >= training_iteration_. - int32_t sample_iteration_; - // Index in softmax of null character. May take the value UNICHAR_BROKEN or - // ccutil_.unicharset.size(). - int32_t null_char_; - // Learning rate and momentum multipliers of deltas in backprop. - float learning_rate_; - float momentum_; - // Smoothing factor for 2nd moment of gradients. - float adam_beta_; - - // === NOT SERIALIZED. - TRand randomizer_; - NetworkScratch scratch_space_; - // Language model (optional) to use with the beam search. - Dict* dict_; - // Beam search held between uses to optimize memory allocation/use. - RecodeBeamSearch* search_; - - // == Debugging parameters.== - // Recognition debug display window. - ScrollView* debug_win_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_LSTM_LSTMRECOGNIZER_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstmtrainer.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstmtrainer.cpp deleted file mode 100644 index 0a7e47bd..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstmtrainer.cpp +++ /dev/null @@ -1,1353 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: lstmtrainer.cpp -// Description: Top-level line trainer class for LSTM-based networks. -// Author: Ray Smith -// Created: Fir May 03 09:14:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "lstmtrainer.h" -#include - -#include "allheaders.h" -#include "boxread.h" -#include "ctc.h" -#include "imagedata.h" -#include "input.h" -#include "networkbuilder.h" -#include "ratngs.h" -#include "recodebeam.h" -#ifdef INCLUDE_TENSORFLOW -#include "tfnetwork.h" -#endif -#include "tprintf.h" - -#include "callcpp.h" - -namespace tesseract { - -// Min actual error rate increase to constitute divergence. -const double kMinDivergenceRate = 50.0; -// Min iterations since last best before acting on a stall. -const int kMinStallIterations = 10000; -// Fraction of current char error rate that sub_trainer_ has to be ahead -// before we declare the sub_trainer_ a success and switch to it. -const double kSubTrainerMarginFraction = 3.0 / 128; -// Factor to reduce learning rate on divergence. -const double kLearningRateDecay = sqrt(0.5); -// LR adjustment iterations. -const int kNumAdjustmentIterations = 100; -// How often to add data to the error_graph_. -const int kErrorGraphInterval = 1000; -// Number of training images to train between calls to MaintainCheckpoints. -const int kNumPagesPerBatch = 100; -// Min percent error rate to consider start-up phase over. -const int kMinStartedErrorRate = 75; -// Error rate at which to transition to stage 1. -const double kStageTransitionThreshold = 10.0; -// Confidence beyond which the truth is more likely wrong than the recognizer. -const double kHighConfidence = 0.9375; // 15/16. -// Fraction of weight sign-changing total to constitute a definite improvement. -const double kImprovementFraction = 15.0 / 16.0; -// Fraction of last written best to make it worth writing another. -const double kBestCheckpointFraction = 31.0 / 32.0; -// Scale factor for display of target activations of CTC. -const int kTargetXScale = 5; -const int kTargetYScale = 100; - -LSTMTrainer::LSTMTrainer() - : randomly_rotate_(false), - training_data_(0), - file_reader_(LoadDataFromFile), - file_writer_(SaveDataToFile), - checkpoint_reader_( - NewPermanentTessCallback(this, &LSTMTrainer::ReadTrainingDump)), - checkpoint_writer_( - NewPermanentTessCallback(this, &LSTMTrainer::SaveTrainingDump)), - sub_trainer_(nullptr) { - EmptyConstructor(); - debug_interval_ = 0; -} - -LSTMTrainer::LSTMTrainer(FileReader file_reader, FileWriter file_writer, - CheckPointReader checkpoint_reader, - CheckPointWriter checkpoint_writer, - const char* model_base, const char* checkpoint_name, - int debug_interval, int64_t max_memory) - : randomly_rotate_(false), - training_data_(max_memory), - file_reader_(file_reader), - file_writer_(file_writer), - checkpoint_reader_(checkpoint_reader), - checkpoint_writer_(checkpoint_writer), - sub_trainer_(nullptr), - mgr_(file_reader) { - EmptyConstructor(); - if (file_reader_ == nullptr) file_reader_ = LoadDataFromFile; - if (file_writer_ == nullptr) file_writer_ = SaveDataToFile; - if (checkpoint_reader_ == nullptr) { - checkpoint_reader_ = - NewPermanentTessCallback(this, &LSTMTrainer::ReadTrainingDump); - } - if (checkpoint_writer_ == nullptr) { - checkpoint_writer_ = - NewPermanentTessCallback(this, &LSTMTrainer::SaveTrainingDump); - } - debug_interval_ = debug_interval; - model_base_ = model_base; - checkpoint_name_ = checkpoint_name; -} - -LSTMTrainer::~LSTMTrainer() { - delete align_win_; - delete target_win_; - delete ctc_win_; - delete recon_win_; - delete checkpoint_reader_; - delete checkpoint_writer_; - delete sub_trainer_; -} - -// Tries to deserialize a trainer from the given file and silently returns -// false in case of failure. -bool LSTMTrainer::TryLoadingCheckpoint(const char* filename, - const char* old_traineddata) { - GenericVector data; - if (!(*file_reader_)(filename, &data)) return false; - tprintf("Loaded file %s, unpacking...\n", filename); - if (!checkpoint_reader_->Run(data, this)) return false; - StaticShape shape = network_->OutputShape(network_->InputShape()); - if (((old_traineddata == nullptr || *old_traineddata == '\0') && - network_->NumOutputs() == recoder_.code_range()) || - filename == old_traineddata) { - return true; // Normal checkpoint load complete. - } - tprintf("Code range changed from %d to %d!\n", network_->NumOutputs(), - recoder_.code_range()); - if (old_traineddata == nullptr || *old_traineddata == '\0') { - tprintf("Must supply the old traineddata for code conversion!\n"); - return false; - } - TessdataManager old_mgr; - ASSERT_HOST(old_mgr.Init(old_traineddata)); - TFile fp; - if (!old_mgr.GetComponent(TESSDATA_LSTM_UNICHARSET, &fp)) return false; - UNICHARSET old_chset; - if (!old_chset.load_from_file(&fp, false)) return false; - if (!old_mgr.GetComponent(TESSDATA_LSTM_RECODER, &fp)) return false; - UnicharCompress old_recoder; - if (!old_recoder.DeSerialize(&fp)) return false; - std::vector code_map = MapRecoder(old_chset, old_recoder); - // Set the null_char_ to the new value. - int old_null_char = null_char_; - SetNullChar(); - // Map the softmax(s) in the network. - network_->RemapOutputs(old_recoder.code_range(), code_map); - tprintf("Previous null char=%d mapped to %d\n", old_null_char, null_char_); - return true; -} - -// Initializes the trainer with a network_spec in the network description -// net_flags control network behavior according to the NetworkFlags enum. -// There isn't really much difference between them - only where the effects -// are implemented. -// For other args see NetworkBuilder::InitNetwork. -// Note: Be sure to call InitCharSet before InitNetwork! -bool LSTMTrainer::InitNetwork(const STRING& network_spec, int append_index, - int net_flags, float weight_range, - float learning_rate, float momentum, - float adam_beta) { - mgr_.SetVersionString(mgr_.VersionString() + ":" + network_spec.string()); - adam_beta_ = adam_beta; - learning_rate_ = learning_rate; - momentum_ = momentum; - SetNullChar(); - if (!NetworkBuilder::InitNetwork(recoder_.code_range(), network_spec, - append_index, net_flags, weight_range, - &randomizer_, &network_)) { - return false; - } - network_str_ += network_spec; - tprintf("Built network:%s from request %s\n", - network_->spec().string(), network_spec.string()); - tprintf( - "Training parameters:\n Debug interval = %d," - " weights = %g, learning rate = %g, momentum=%g\n", - debug_interval_, weight_range, learning_rate_, momentum_); - tprintf("null char=%d\n", null_char_); - return true; -} - -// Initializes a trainer from a serialized TFNetworkModel proto. -// Returns the global step of TensorFlow graph or 0 if failed. -int LSTMTrainer::InitTensorFlowNetwork(const std::string& tf_proto) { -#ifdef INCLUDE_TENSORFLOW - delete network_; - TFNetwork* tf_net = new TFNetwork("TensorFlow"); - training_iteration_ = tf_net->InitFromProtoStr(tf_proto); - if (training_iteration_ == 0) { - tprintf("InitFromProtoStr failed!!\n"); - return 0; - } - network_ = tf_net; - ASSERT_HOST(recoder_.code_range() == tf_net->num_classes()); - return training_iteration_; -#else - tprintf("TensorFlow not compiled in! -DINCLUDE_TENSORFLOW\n"); - return 0; -#endif -} - -// Resets all the iteration counters for fine tuning or traininng a head, -// where we want the error reporting to reset. -void LSTMTrainer::InitIterations() { - sample_iteration_ = 0; - training_iteration_ = 0; - learning_iteration_ = 0; - prev_sample_iteration_ = 0; - best_error_rate_ = 100.0; - best_iteration_ = 0; - worst_error_rate_ = 0.0; - worst_iteration_ = 0; - stall_iteration_ = kMinStallIterations; - improvement_steps_ = kMinStallIterations; - perfect_delay_ = 0; - last_perfect_training_iteration_ = 0; - for (int i = 0; i < ET_COUNT; ++i) { - best_error_rates_[i] = 100.0; - worst_error_rates_[i] = 0.0; - error_buffers_[i].init_to_size(kRollingBufferSize_, 0.0); - error_rates_[i] = 100.0; - } - error_rate_of_last_saved_best_ = kMinStartedErrorRate; -} - -// If the training sample is usable, grid searches for the optimal -// dict_ratio/cert_offset, and returns the results in a string of space- -// separated triplets of ratio,offset=worderr. -Trainability LSTMTrainer::GridSearchDictParams( - const ImageData* trainingdata, int iteration, double min_dict_ratio, - double dict_ratio_step, double max_dict_ratio, double min_cert_offset, - double cert_offset_step, double max_cert_offset, STRING* results) { - sample_iteration_ = iteration; - NetworkIO fwd_outputs, targets; - Trainability result = - PrepareForBackward(trainingdata, &fwd_outputs, &targets); - if (result == UNENCODABLE || result == HI_PRECISION_ERR || dict_ == nullptr) - return result; - - // Encode/decode the truth to get the normalization. - GenericVector truth_labels, ocr_labels, xcoords; - ASSERT_HOST(EncodeString(trainingdata->transcription(), &truth_labels)); - // NO-dict error. - RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(), nullptr); - base_search.Decode(fwd_outputs, 1.0, 0.0, RecodeBeamSearch::kMinCertainty, - nullptr); - base_search.ExtractBestPathAsLabels(&ocr_labels, &xcoords); - STRING truth_text = DecodeLabels(truth_labels); - STRING ocr_text = DecodeLabels(ocr_labels); - double baseline_error = ComputeWordError(&truth_text, &ocr_text); - results->add_str_double("0,0=", baseline_error); - - RecodeBeamSearch search(recoder_, null_char_, SimpleTextOutput(), dict_); - for (double r = min_dict_ratio; r < max_dict_ratio; r += dict_ratio_step) { - for (double c = min_cert_offset; c < max_cert_offset; - c += cert_offset_step) { - search.Decode(fwd_outputs, r, c, RecodeBeamSearch::kMinCertainty, nullptr); - search.ExtractBestPathAsLabels(&ocr_labels, &xcoords); - truth_text = DecodeLabels(truth_labels); - ocr_text = DecodeLabels(ocr_labels); - // This is destructive on both strings. - double word_error = ComputeWordError(&truth_text, &ocr_text); - if ((r == min_dict_ratio && c == min_cert_offset) || - !std::isfinite(word_error)) { - STRING t = DecodeLabels(truth_labels); - STRING o = DecodeLabels(ocr_labels); - tprintf("r=%g, c=%g, truth=%s, ocr=%s, wderr=%g, truth[0]=%d\n", r, c, - t.string(), o.string(), word_error, truth_labels[0]); - } - results->add_str_double(" ", r); - results->add_str_double(",", c); - results->add_str_double("=", word_error); - } - } - return result; -} - -// Provides output on the distribution of weight values. -void LSTMTrainer::DebugNetwork() { - network_->DebugWeights(); -} - -// Loads a set of lstmf files that were created using the lstm.train config to -// tesseract into memory ready for training. Returns false if nothing was -// loaded. -bool LSTMTrainer::LoadAllTrainingData(const GenericVector& filenames, - CachingStrategy cache_strategy, - bool randomly_rotate) { - randomly_rotate_ = randomly_rotate; - training_data_.Clear(); - return training_data_.LoadDocuments(filenames, cache_strategy, file_reader_); -} - -// Keeps track of best and locally worst char error_rate and launches tests -// using tester, when a new min or max is reached. -// Writes checkpoints at appropriate times and builds and returns a log message -// to indicate progress. Returns false if nothing interesting happened. -bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING* log_msg) { - PrepareLogMsg(log_msg); - double error_rate = CharError(); - int iteration = learning_iteration(); - if (iteration >= stall_iteration_ && - error_rate > best_error_rate_ * (1.0 + kSubTrainerMarginFraction) && - best_error_rate_ < kMinStartedErrorRate && !best_trainer_.empty()) { - // It hasn't got any better in a long while, and is a margin worse than the - // best, so go back to the best model and try a different learning rate. - StartSubtrainer(log_msg); - } - SubTrainerResult sub_trainer_result = STR_NONE; - if (sub_trainer_ != nullptr) { - sub_trainer_result = UpdateSubtrainer(log_msg); - if (sub_trainer_result == STR_REPLACED) { - // Reset the inputs, as we have overwritten *this. - error_rate = CharError(); - iteration = learning_iteration(); - PrepareLogMsg(log_msg); - } - } - bool result = true; // Something interesting happened. - GenericVector rec_model_data; - if (error_rate < best_error_rate_) { - SaveRecognitionDump(&rec_model_data); - log_msg->add_str_double(" New best char error = ", error_rate); - *log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester); - // If sub_trainer_ is not nullptr, either *this beat it to a new best, or it - // just overwrote *this. In either case, we have finished with it. - delete sub_trainer_; - sub_trainer_ = nullptr; - stall_iteration_ = learning_iteration() + kMinStallIterations; - if (TransitionTrainingStage(kStageTransitionThreshold)) { - log_msg->add_str_int(" Transitioned to stage ", CurrentTrainingStage()); - } - checkpoint_writer_->Run(NO_BEST_TRAINER, this, &best_trainer_); - if (error_rate < error_rate_of_last_saved_best_ * kBestCheckpointFraction) { - STRING best_model_name = DumpFilename(); - if (!(*file_writer_)(best_trainer_, best_model_name)) { - *log_msg += " failed to write best model:"; - } else { - *log_msg += " wrote best model:"; - error_rate_of_last_saved_best_ = best_error_rate_; - } - *log_msg += best_model_name; - } - } else if (error_rate > worst_error_rate_) { - SaveRecognitionDump(&rec_model_data); - log_msg->add_str_double(" New worst char error = ", error_rate); - *log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester); - if (worst_error_rate_ > best_error_rate_ + kMinDivergenceRate && - best_error_rate_ < kMinStartedErrorRate && !best_trainer_.empty()) { - // Error rate has ballooned. Go back to the best model. - *log_msg += "\nDivergence! "; - // Copy best_trainer_ before reading it, as it will get overwritten. - GenericVector revert_data(best_trainer_); - if (checkpoint_reader_->Run(revert_data, this)) { - LogIterations("Reverted to", log_msg); - ReduceLearningRates(this, log_msg); - } else { - LogIterations("Failed to Revert at", log_msg); - } - // If it fails again, we will wait twice as long before reverting again. - stall_iteration_ = iteration + 2 * (iteration - learning_iteration()); - // Re-save the best trainer with the new learning rates and stall - // iteration. - checkpoint_writer_->Run(NO_BEST_TRAINER, this, &best_trainer_); - } - } else { - // Something interesting happened only if the sub_trainer_ was trained. - result = sub_trainer_result != STR_NONE; - } - if (checkpoint_writer_ != nullptr && file_writer_ != nullptr && - checkpoint_name_.length() > 0) { - // Write a current checkpoint. - GenericVector checkpoint; - if (!checkpoint_writer_->Run(FULL, this, &checkpoint) || - !(*file_writer_)(checkpoint, checkpoint_name_)) { - *log_msg += " failed to write checkpoint."; - } else { - *log_msg += " wrote checkpoint."; - } - } - *log_msg += "\n"; - return result; -} - -// Builds a string containing a progress message with current error rates. -void LSTMTrainer::PrepareLogMsg(STRING* log_msg) const { - LogIterations("At", log_msg); - log_msg->add_str_double(", Mean rms=", error_rates_[ET_RMS]); - log_msg->add_str_double("%, delta=", error_rates_[ET_DELTA]); - log_msg->add_str_double("%, char train=", error_rates_[ET_CHAR_ERROR]); - log_msg->add_str_double("%, word train=", error_rates_[ET_WORD_RECERR]); - log_msg->add_str_double("%, skip ratio=", error_rates_[ET_SKIP_RATIO]); - *log_msg += "%, "; -} - -// Appends iteration learning_iteration()/training_iteration()/ -// sample_iteration() to the log_msg. -void LSTMTrainer::LogIterations(const char* intro_str, STRING* log_msg) const { - *log_msg += intro_str; - log_msg->add_str_int(" iteration ", learning_iteration()); - log_msg->add_str_int("/", training_iteration()); - log_msg->add_str_int("/", sample_iteration()); -} - -// Returns true and increments the training_stage_ if the error rate has just -// passed through the given threshold for the first time. -bool LSTMTrainer::TransitionTrainingStage(float error_threshold) { - if (best_error_rate_ < error_threshold && - training_stage_ + 1 < num_training_stages_) { - ++training_stage_; - return true; - } - return false; -} - -// Writes to the given file. Returns false in case of error. -bool LSTMTrainer::Serialize(SerializeAmount serialize_amount, - const TessdataManager* mgr, TFile* fp) const { - if (!LSTMRecognizer::Serialize(mgr, fp)) return false; - if (!fp->Serialize(&learning_iteration_)) return false; - if (!fp->Serialize(&prev_sample_iteration_)) return false; - if (!fp->Serialize(&perfect_delay_)) return false; - if (!fp->Serialize(&last_perfect_training_iteration_)) return false; - for (int i = 0; i < ET_COUNT; ++i) { - if (!error_buffers_[i].Serialize(fp)) return false; - } - if (!fp->Serialize(&error_rates_[0], countof(error_rates_))) return false; - if (!fp->Serialize(&training_stage_)) return false; - uint8_t amount = serialize_amount; - if (!fp->Serialize(&amount)) return false; - if (serialize_amount == LIGHT) return true; // We are done. - if (!fp->Serialize(&best_error_rate_)) return false; - if (!fp->Serialize(&best_error_rates_[0], countof(best_error_rates_))) return false; - if (!fp->Serialize(&best_iteration_)) return false; - if (!fp->Serialize(&worst_error_rate_)) return false; - if (!fp->Serialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false; - if (!fp->Serialize(&worst_iteration_)) return false; - if (!fp->Serialize(&stall_iteration_)) return false; - if (!best_model_data_.Serialize(fp)) return false; - if (!worst_model_data_.Serialize(fp)) return false; - if (serialize_amount != NO_BEST_TRAINER && !best_trainer_.Serialize(fp)) - return false; - GenericVector sub_data; - if (sub_trainer_ != nullptr && !SaveTrainingDump(LIGHT, sub_trainer_, &sub_data)) - return false; - if (!sub_data.Serialize(fp)) return false; - if (!best_error_history_.Serialize(fp)) return false; - if (!best_error_iterations_.Serialize(fp)) return false; - return fp->Serialize(&improvement_steps_); -} - -// Reads from the given file. Returns false in case of error. -// NOTE: It is assumed that the trainer is never read cross-endian. -bool LSTMTrainer::DeSerialize(const TessdataManager* mgr, TFile* fp) { - if (!LSTMRecognizer::DeSerialize(mgr, fp)) return false; - if (!fp->DeSerialize(&learning_iteration_)) { - // Special case. If we successfully decoded the recognizer, but fail here - // then it means we were just given a recognizer, so issue a warning and - // allow it. - tprintf("Warning: LSTMTrainer deserialized an LSTMRecognizer!\n"); - learning_iteration_ = 0; - network_->SetEnableTraining(TS_ENABLED); - return true; - } - if (!fp->DeSerialize(&prev_sample_iteration_)) return false; - if (!fp->DeSerialize(&perfect_delay_)) return false; - if (!fp->DeSerialize(&last_perfect_training_iteration_)) return false; - for (int i = 0; i < ET_COUNT; ++i) { - if (!error_buffers_[i].DeSerialize(fp)) return false; - } - if (!fp->DeSerialize(&error_rates_[0], countof(error_rates_))) return false; - if (!fp->DeSerialize(&training_stage_)) return false; - uint8_t amount; - if (!fp->DeSerialize(&amount)) return false; - if (amount == LIGHT) return true; // Don't read the rest. - if (!fp->DeSerialize(&best_error_rate_)) return false; - if (!fp->DeSerialize(&best_error_rates_[0], countof(best_error_rates_))) return false; - if (!fp->DeSerialize(&best_iteration_)) return false; - if (!fp->DeSerialize(&worst_error_rate_)) return false; - if (!fp->DeSerialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false; - if (!fp->DeSerialize(&worst_iteration_)) return false; - if (!fp->DeSerialize(&stall_iteration_)) return false; - if (!best_model_data_.DeSerialize(fp)) return false; - if (!worst_model_data_.DeSerialize(fp)) return false; - if (amount != NO_BEST_TRAINER && !best_trainer_.DeSerialize(fp)) return false; - GenericVector sub_data; - if (!sub_data.DeSerialize(fp)) return false; - delete sub_trainer_; - if (sub_data.empty()) { - sub_trainer_ = nullptr; - } else { - sub_trainer_ = new LSTMTrainer(); - if (!ReadTrainingDump(sub_data, sub_trainer_)) return false; - } - if (!best_error_history_.DeSerialize(fp)) return false; - if (!best_error_iterations_.DeSerialize(fp)) return false; - return fp->DeSerialize(&improvement_steps_); -} - -// De-serializes the saved best_trainer_ into sub_trainer_, and adjusts the -// learning rates (by scaling reduction, or layer specific, according to -// NF_LAYER_SPECIFIC_LR). -void LSTMTrainer::StartSubtrainer(STRING* log_msg) { - delete sub_trainer_; - sub_trainer_ = new LSTMTrainer(); - if (!checkpoint_reader_->Run(best_trainer_, sub_trainer_)) { - *log_msg += " Failed to revert to previous best for trial!"; - delete sub_trainer_; - sub_trainer_ = nullptr; - } else { - log_msg->add_str_int(" Trial sub_trainer_ from iteration ", - sub_trainer_->training_iteration()); - // Reduce learning rate so it doesn't diverge this time. - sub_trainer_->ReduceLearningRates(this, log_msg); - // If it fails again, we will wait twice as long before reverting again. - int stall_offset = - learning_iteration() - sub_trainer_->learning_iteration(); - stall_iteration_ = learning_iteration() + 2 * stall_offset; - sub_trainer_->stall_iteration_ = stall_iteration_; - // Re-save the best trainer with the new learning rates and stall iteration. - checkpoint_writer_->Run(NO_BEST_TRAINER, sub_trainer_, &best_trainer_); - } -} - -// While the sub_trainer_ is behind the current training iteration and its -// training error is at least kSubTrainerMarginFraction better than the -// current training error, trains the sub_trainer_, and returns STR_UPDATED if -// it did anything. If it catches up, and has a better error rate than the -// current best, as well as a margin over the current error rate, then the -// trainer in *this is replaced with sub_trainer_, and STR_REPLACED is -// returned. STR_NONE is returned if the subtrainer wasn't good enough to -// receive any training iterations. -SubTrainerResult LSTMTrainer::UpdateSubtrainer(STRING* log_msg) { - double training_error = CharError(); - double sub_error = sub_trainer_->CharError(); - double sub_margin = (training_error - sub_error) / sub_error; - if (sub_margin >= kSubTrainerMarginFraction) { - log_msg->add_str_double(" sub_trainer=", sub_error); - log_msg->add_str_double(" margin=", 100.0 * sub_margin); - *log_msg += "\n"; - // Catch up to current iteration. - int end_iteration = training_iteration(); - while (sub_trainer_->training_iteration() < end_iteration && - sub_margin >= kSubTrainerMarginFraction) { - int target_iteration = - sub_trainer_->training_iteration() + kNumPagesPerBatch; - while (sub_trainer_->training_iteration() < target_iteration) { - sub_trainer_->TrainOnLine(this, false); - } - STRING batch_log = "Sub:"; - sub_trainer_->PrepareLogMsg(&batch_log); - batch_log += "\n"; - tprintf("UpdateSubtrainer:%s", batch_log.string()); - *log_msg += batch_log; - sub_error = sub_trainer_->CharError(); - sub_margin = (training_error - sub_error) / sub_error; - } - if (sub_error < best_error_rate_ && - sub_margin >= kSubTrainerMarginFraction) { - // The sub_trainer_ has won the race to a new best. Switch to it. - GenericVector updated_trainer; - SaveTrainingDump(LIGHT, sub_trainer_, &updated_trainer); - ReadTrainingDump(updated_trainer, this); - log_msg->add_str_int(" Sub trainer wins at iteration ", - training_iteration()); - *log_msg += "\n"; - return STR_REPLACED; - } - return STR_UPDATED; - } - return STR_NONE; -} - -// Reduces network learning rates, either for everything, or for layers -// independently, according to NF_LAYER_SPECIFIC_LR. -void LSTMTrainer::ReduceLearningRates(LSTMTrainer* samples_trainer, - STRING* log_msg) { - if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) { - int num_reduced = ReduceLayerLearningRates( - kLearningRateDecay, kNumAdjustmentIterations, samples_trainer); - log_msg->add_str_int("\nReduced learning rate on layers: ", num_reduced); - } else { - ScaleLearningRate(kLearningRateDecay); - log_msg->add_str_double("\nReduced learning rate to :", learning_rate_); - } - *log_msg += "\n"; -} - -// Considers reducing the learning rate independently for each layer down by -// factor(<1), or leaving it the same, by double-training the given number of -// samples and minimizing the amount of changing of sign of weight updates. -// Even if it looks like all weights should remain the same, an adjustment -// will be made to guarantee a different result when reverting to an old best. -// Returns the number of layer learning rates that were reduced. -int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples, - LSTMTrainer* samples_trainer) { - enum WhichWay { - LR_DOWN, // Learning rate will go down by factor. - LR_SAME, // Learning rate will stay the same. - LR_COUNT // Size of arrays. - }; - GenericVector layers = EnumerateLayers(); - int num_layers = layers.size(); - GenericVector num_weights; - num_weights.init_to_size(num_layers, 0); - GenericVector bad_sums[LR_COUNT]; - GenericVector ok_sums[LR_COUNT]; - for (int i = 0; i < LR_COUNT; ++i) { - bad_sums[i].init_to_size(num_layers, 0.0); - ok_sums[i].init_to_size(num_layers, 0.0); - } - double momentum_factor = 1.0 / (1.0 - momentum_); - GenericVector orig_trainer; - samples_trainer->SaveTrainingDump(LIGHT, this, &orig_trainer); - for (int i = 0; i < num_layers; ++i) { - Network* layer = GetLayer(layers[i]); - num_weights[i] = layer->IsTraining() ? layer->num_weights() : 0; - } - int iteration = sample_iteration(); - for (int s = 0; s < num_samples; ++s) { - // Which way will we modify the learning rate? - for (int ww = 0; ww < LR_COUNT; ++ww) { - // Transfer momentum to learning rate and adjust by the ww factor. - float ww_factor = momentum_factor; - if (ww == LR_DOWN) ww_factor *= factor; - // Make a copy of *this, so we can mess about without damaging anything. - LSTMTrainer copy_trainer; - samples_trainer->ReadTrainingDump(orig_trainer, ©_trainer); - // Clear the updates, doing nothing else. - copy_trainer.network_->Update(0.0, 0.0, 0.0, 0); - // Adjust the learning rate in each layer. - for (int i = 0; i < num_layers; ++i) { - if (num_weights[i] == 0) continue; - copy_trainer.ScaleLayerLearningRate(layers[i], ww_factor); - } - copy_trainer.SetIteration(iteration); - // Train on the sample, but keep the update in updates_ instead of - // applying to the weights. - const ImageData* trainingdata = - copy_trainer.TrainOnLine(samples_trainer, true); - if (trainingdata == nullptr) continue; - // We'll now use this trainer again for each layer. - GenericVector updated_trainer; - samples_trainer->SaveTrainingDump(LIGHT, ©_trainer, &updated_trainer); - for (int i = 0; i < num_layers; ++i) { - if (num_weights[i] == 0) continue; - LSTMTrainer layer_trainer; - samples_trainer->ReadTrainingDump(updated_trainer, &layer_trainer); - Network* layer = layer_trainer.GetLayer(layers[i]); - // Update the weights in just the layer, using Adam if enabled. - layer->Update(0.0, momentum_, adam_beta_, - layer_trainer.training_iteration_ + 1); - // Zero the updates matrix again. - layer->Update(0.0, 0.0, 0.0, 0); - // Train again on the same sample, again holding back the updates. - layer_trainer.TrainOnLine(trainingdata, true); - // Count the sign changes in the updates in layer vs in copy_trainer. - float before_bad = bad_sums[ww][i]; - float before_ok = ok_sums[ww][i]; - layer->CountAlternators(*copy_trainer.GetLayer(layers[i]), - &ok_sums[ww][i], &bad_sums[ww][i]); - float bad_frac = - bad_sums[ww][i] + ok_sums[ww][i] - before_bad - before_ok; - if (bad_frac > 0.0f) - bad_frac = (bad_sums[ww][i] - before_bad) / bad_frac; - } - } - ++iteration; - } - int num_lowered = 0; - for (int i = 0; i < num_layers; ++i) { - if (num_weights[i] == 0) continue; - Network* layer = GetLayer(layers[i]); - float lr = GetLayerLearningRate(layers[i]); - double total_down = bad_sums[LR_DOWN][i] + ok_sums[LR_DOWN][i]; - double total_same = bad_sums[LR_SAME][i] + ok_sums[LR_SAME][i]; - double frac_down = bad_sums[LR_DOWN][i] / total_down; - double frac_same = bad_sums[LR_SAME][i] / total_same; - tprintf("Layer %d=%s: lr %g->%g%%, lr %g->%g%%", i, layer->name().string(), - lr * factor, 100.0 * frac_down, lr, 100.0 * frac_same); - if (frac_down < frac_same * kImprovementFraction) { - tprintf(" REDUCED\n"); - ScaleLayerLearningRate(layers[i], factor); - ++num_lowered; - } else { - tprintf(" SAME\n"); - } - } - if (num_lowered == 0) { - // Just lower everything to make sure. - for (int i = 0; i < num_layers; ++i) { - if (num_weights[i] > 0) { - ScaleLayerLearningRate(layers[i], factor); - ++num_lowered; - } - } - } - return num_lowered; -} - -// Converts the string to integer class labels, with appropriate null_char_s -// in between if not in SimpleTextOutput mode. Returns false on failure. -/* static */ -bool LSTMTrainer::EncodeString(const STRING& str, const UNICHARSET& unicharset, - const UnicharCompress* recoder, bool simple_text, - int null_char, GenericVector* labels) { - if (str.string() == nullptr || str.length() <= 0) { - tprintf("Empty truth string!\n"); - return false; - } - int err_index; - GenericVector internal_labels; - labels->truncate(0); - if (!simple_text) labels->push_back(null_char); - std::string cleaned = unicharset.CleanupString(str.string()); - if (unicharset.encode_string(cleaned.c_str(), true, &internal_labels, nullptr, - &err_index)) { - bool success = true; - for (int i = 0; i < internal_labels.size(); ++i) { - if (recoder != nullptr) { - // Re-encode labels via recoder. - RecodedCharID code; - int len = recoder->EncodeUnichar(internal_labels[i], &code); - if (len > 0) { - for (int j = 0; j < len; ++j) { - labels->push_back(code(j)); - if (!simple_text) labels->push_back(null_char); - } - } else { - success = false; - err_index = 0; - break; - } - } else { - labels->push_back(internal_labels[i]); - if (!simple_text) labels->push_back(null_char); - } - } - if (success) return true; - } - tprintf("Encoding of string failed! Failure bytes:"); - while (err_index < cleaned.size()) { - tprintf(" %x", cleaned[err_index++]); - } - tprintf("\n"); - return false; -} - -// Performs forward-backward on the given trainingdata. -// Returns a Trainability enum to indicate the suitability of the sample. -Trainability LSTMTrainer::TrainOnLine(const ImageData* trainingdata, - bool batch) { - NetworkIO fwd_outputs, targets; - Trainability trainable = - PrepareForBackward(trainingdata, &fwd_outputs, &targets); - ++sample_iteration_; - if (trainable == UNENCODABLE || trainable == NOT_BOXED) { - return trainable; // Sample was unusable. - } - bool debug = debug_interval_ > 0 && - training_iteration() % debug_interval_ == 0; - // Run backprop on the output. - NetworkIO bp_deltas; - if (network_->IsTraining() && - (trainable != PERFECT || - training_iteration() > - last_perfect_training_iteration_ + perfect_delay_)) { - network_->Backward(debug, targets, &scratch_space_, &bp_deltas); - network_->Update(learning_rate_, batch ? -1.0f : momentum_, adam_beta_, - training_iteration_ + 1); - } -#ifndef GRAPHICS_DISABLED - if (debug_interval_ == 1 && debug_win_ != nullptr) { - delete debug_win_->AwaitEvent(SVET_CLICK); - } -#endif // GRAPHICS_DISABLED - // Roll the memory of past means. - RollErrorBuffers(); - return trainable; -} - -// Prepares the ground truth, runs forward, and prepares the targets. -// Returns a Trainability enum to indicate the suitability of the sample. -Trainability LSTMTrainer::PrepareForBackward(const ImageData* trainingdata, - NetworkIO* fwd_outputs, - NetworkIO* targets) { - if (trainingdata == nullptr) { - tprintf("Null trainingdata.\n"); - return UNENCODABLE; - } - // Ensure repeatability of random elements even across checkpoints. - bool debug = debug_interval_ > 0 && - training_iteration() % debug_interval_ == 0; - GenericVector truth_labels; - if (!EncodeString(trainingdata->transcription(), &truth_labels)) { - tprintf("Can't encode transcription: '%s' in language '%s'\n", - trainingdata->transcription().string(), - trainingdata->language().string()); - return UNENCODABLE; - } - bool upside_down = false; - if (randomly_rotate_) { - // This ensures consistent training results. - SetRandomSeed(); - upside_down = randomizer_.SignedRand(1.0) > 0.0; - if (upside_down) { - // Modify the truth labels to match the rotation: - // Apart from space and null, increment the label. This is changes the - // script-id to the same script-id but upside-down. - // The labels need to be reversed in order, as the first is now the last. - for (int c = 0; c < truth_labels.size(); ++c) { - if (truth_labels[c] != UNICHAR_SPACE && truth_labels[c] != null_char_) - ++truth_labels[c]; - } - truth_labels.reverse(); - } - } - int w = 0; - while (w < truth_labels.size() && - (truth_labels[w] == UNICHAR_SPACE || truth_labels[w] == null_char_)) - ++w; - if (w == truth_labels.size()) { - tprintf("Blank transcription: %s\n", - trainingdata->transcription().string()); - return UNENCODABLE; - } - float image_scale; - NetworkIO inputs; - bool invert = trainingdata->boxes().empty(); - if (!RecognizeLine(*trainingdata, invert, debug, invert, upside_down, - &image_scale, &inputs, fwd_outputs)) { - tprintf("Image not trainable\n"); - return UNENCODABLE; - } - targets->Resize(*fwd_outputs, network_->NumOutputs()); - LossType loss_type = OutputLossType(); - if (loss_type == LT_SOFTMAX) { - if (!ComputeTextTargets(*fwd_outputs, truth_labels, targets)) { - tprintf("Compute simple targets failed!\n"); - return UNENCODABLE; - } - } else if (loss_type == LT_CTC) { - if (!ComputeCTCTargets(truth_labels, fwd_outputs, targets)) { - tprintf("Compute CTC targets failed!\n"); - return UNENCODABLE; - } - } else { - tprintf("Logistic outputs not implemented yet!\n"); - return UNENCODABLE; - } - GenericVector ocr_labels; - GenericVector xcoords; - LabelsFromOutputs(*fwd_outputs, &ocr_labels, &xcoords); - // CTC does not produce correct target labels to begin with. - if (loss_type != LT_CTC) { - LabelsFromOutputs(*targets, &truth_labels, &xcoords); - } - if (!DebugLSTMTraining(inputs, *trainingdata, *fwd_outputs, truth_labels, - *targets)) { - tprintf("Input width was %d\n", inputs.Width()); - return UNENCODABLE; - } - STRING ocr_text = DecodeLabels(ocr_labels); - STRING truth_text = DecodeLabels(truth_labels); - targets->SubtractAllFromFloat(*fwd_outputs); - if (debug_interval_ != 0) { - tprintf("Iteration %d: BEST OCR TEXT : %s\n", training_iteration(), - ocr_text.string()); - } - double char_error = ComputeCharError(truth_labels, ocr_labels); - double word_error = ComputeWordError(&truth_text, &ocr_text); - double delta_error = ComputeErrorRates(*targets, char_error, word_error); - if (debug_interval_ != 0) { - tprintf("File %s page %d %s:\n", trainingdata->imagefilename().string(), - trainingdata->page_number(), delta_error == 0.0 ? "(Perfect)" : ""); - } - if (delta_error == 0.0) return PERFECT; - if (targets->AnySuspiciousTruth(kHighConfidence)) return HI_PRECISION_ERR; - return TRAINABLE; -} - -// Writes the trainer to memory, so that the current training state can be -// restored. *this must always be the master trainer that retains the only -// copy of the training data and language model. trainer is the model that is -// actually serialized. -bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount, - const LSTMTrainer* trainer, - GenericVector* data) const { - TFile fp; - fp.OpenWrite(data); - return trainer->Serialize(serialize_amount, &mgr_, &fp); -} - -// Restores the model to *this. -bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager* mgr, - const char* data, int size) { - if (size == 0) { - tprintf("Warning: data size is 0 in LSTMTrainer::ReadLocalTrainingDump\n"); - return false; - } - TFile fp; - fp.Open(data, size); - return DeSerialize(mgr, &fp); -} - -// Writes the full recognition traineddata to the given filename. -bool LSTMTrainer::SaveTraineddata(const STRING& filename) { - GenericVector recognizer_data; - SaveRecognitionDump(&recognizer_data); - mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0], - recognizer_data.size()); - return mgr_.SaveFile(filename, file_writer_); -} - -// Writes the recognizer to memory, so that it can be used for testing later. -void LSTMTrainer::SaveRecognitionDump(GenericVector* data) const { - TFile fp; - fp.OpenWrite(data); - network_->SetEnableTraining(TS_TEMP_DISABLE); - ASSERT_HOST(LSTMRecognizer::Serialize(&mgr_, &fp)); - network_->SetEnableTraining(TS_RE_ENABLE); -} - -// Returns a suitable filename for a training dump, based on the model_base_, -// the iteration and the error rates. -STRING LSTMTrainer::DumpFilename() const { - STRING filename; - filename.add_str_double(model_base_.string(), best_error_rate_); - filename.add_str_int("_", best_iteration_); - filename += ".checkpoint"; - return filename; -} - -// Fills the whole error buffer of the given type with the given value. -void LSTMTrainer::FillErrorBuffer(double new_error, ErrorTypes type) { - for (int i = 0; i < kRollingBufferSize_; ++i) - error_buffers_[type][i] = new_error; - error_rates_[type] = 100.0 * new_error; -} - -// Helper generates a map from each current recoder_ code (ie softmax index) -// to the corresponding old_recoder code, or -1 if there isn't one. -std::vector LSTMTrainer::MapRecoder( - const UNICHARSET& old_chset, const UnicharCompress& old_recoder) const { - int num_new_codes = recoder_.code_range(); - int num_new_unichars = GetUnicharset().size(); - std::vector code_map(num_new_codes, -1); - for (int c = 0; c < num_new_codes; ++c) { - int old_code = -1; - // Find all new unichar_ids that recode to something that includes c. - // The <= is to include the null char, which may be beyond the unicharset. - for (int uid = 0; uid <= num_new_unichars; ++uid) { - RecodedCharID codes; - int length = recoder_.EncodeUnichar(uid, &codes); - int code_index = 0; - while (code_index < length && codes(code_index) != c) ++code_index; - if (code_index == length) continue; - // The old unicharset must have the same unichar. - int old_uid = - uid < num_new_unichars - ? old_chset.unichar_to_id(GetUnicharset().id_to_unichar(uid)) - : old_chset.size() - 1; - if (old_uid == INVALID_UNICHAR_ID) continue; - // The encoding of old_uid at the same code_index is the old code. - RecodedCharID old_codes; - if (code_index < old_recoder.EncodeUnichar(old_uid, &old_codes)) { - old_code = old_codes(code_index); - break; - } - } - code_map[c] = old_code; - } - return code_map; -} - -// Private version of InitCharSet above finishes the job after initializing -// the mgr_ data member. -void LSTMTrainer::InitCharSet() { - EmptyConstructor(); - training_flags_ = TF_COMPRESS_UNICHARSET; - // Initialize the unicharset and recoder. - if (!LoadCharsets(&mgr_)) { - ASSERT_HOST( - "Must provide a traineddata containing lstm_unicharset and" - " lstm_recoder!\n" != nullptr); - } - SetNullChar(); -} - -// Helper computes and sets the null_char_. -void LSTMTrainer::SetNullChar() { - null_char_ = GetUnicharset().has_special_codes() ? UNICHAR_BROKEN - : GetUnicharset().size(); - RecodedCharID code; - recoder_.EncodeUnichar(null_char_, &code); - null_char_ = code(0); -} - -// Factored sub-constructor sets up reasonable default values. -void LSTMTrainer::EmptyConstructor() { - align_win_ = nullptr; - target_win_ = nullptr; - ctc_win_ = nullptr; - recon_win_ = nullptr; - checkpoint_iteration_ = 0; - training_stage_ = 0; - num_training_stages_ = 2; - InitIterations(); -} - -// Outputs the string and periodically displays the given network inputs -// as an image in the given window, and the corresponding labels at the -// corresponding x_starts. -// Returns false if the truth string is empty. -bool LSTMTrainer::DebugLSTMTraining(const NetworkIO& inputs, - const ImageData& trainingdata, - const NetworkIO& fwd_outputs, - const GenericVector& truth_labels, - const NetworkIO& outputs) { - const STRING& truth_text = DecodeLabels(truth_labels); - if (truth_text.string() == nullptr || truth_text.length() <= 0) { - tprintf("Empty truth string at decode time!\n"); - return false; - } - if (debug_interval_ != 0) { - // Get class labels, xcoords and string. - GenericVector labels; - GenericVector xcoords; - LabelsFromOutputs(outputs, &labels, &xcoords); - STRING text = DecodeLabels(labels); - tprintf("Iteration %d: ALIGNED TRUTH : %s\n", - training_iteration(), text.string()); - if (debug_interval_ > 0 && training_iteration() % debug_interval_ == 0) { - tprintf("TRAINING activation path for truth string %s\n", - truth_text.string()); - DebugActivationPath(outputs, labels, xcoords); - DisplayForward(inputs, labels, xcoords, "LSTMTraining", &align_win_); - if (OutputLossType() == LT_CTC) { - DisplayTargets(fwd_outputs, "CTC Outputs", &ctc_win_); - DisplayTargets(outputs, "CTC Targets", &target_win_); - } - } - } - return true; -} - -// Displays the network targets as line a line graph. -void LSTMTrainer::DisplayTargets(const NetworkIO& targets, - const char* window_name, ScrollView** window) { -#ifndef GRAPHICS_DISABLED // do nothing if there's no graphics. - int width = targets.Width(); - int num_features = targets.NumFeatures(); - Network::ClearWindow(true, window_name, width * kTargetXScale, kTargetYScale, - window); - for (int c = 0; c < num_features; ++c) { - int color = c % (ScrollView::GREEN_YELLOW - 1) + 2; - (*window)->Pen(static_cast(color)); - int start_t = -1; - for (int t = 0; t < width; ++t) { - double target = targets.f(t)[c]; - target *= kTargetYScale; - if (target >= 1) { - if (start_t < 0) { - (*window)->SetCursor(t - 1, 0); - start_t = t; - } - (*window)->DrawTo(t, target); - } else if (start_t >= 0) { - (*window)->DrawTo(t, 0); - (*window)->DrawTo(start_t - 1, 0); - start_t = -1; - } - } - if (start_t >= 0) { - (*window)->DrawTo(width, 0); - (*window)->DrawTo(start_t - 1, 0); - } - } - (*window)->Update(); -#endif // GRAPHICS_DISABLED -} - -// Builds a no-compromises target where the first positions should be the -// truth labels and the rest is padded with the null_char_. -bool LSTMTrainer::ComputeTextTargets(const NetworkIO& outputs, - const GenericVector& truth_labels, - NetworkIO* targets) { - if (truth_labels.size() > targets->Width()) { - tprintf("Error: transcription %s too long to fit into target of width %d\n", - DecodeLabels(truth_labels).string(), targets->Width()); - return false; - } - for (int i = 0; i < truth_labels.size() && i < targets->Width(); ++i) { - targets->SetActivations(i, truth_labels[i], 1.0); - } - for (int i = truth_labels.size(); i < targets->Width(); ++i) { - targets->SetActivations(i, null_char_, 1.0); - } - return true; -} - -// Builds a target using standard CTC. truth_labels should be pre-padded with -// nulls wherever desired. They don't have to be between all labels. -// outputs is input-output, as it gets clipped to minimum probability. -bool LSTMTrainer::ComputeCTCTargets(const GenericVector& truth_labels, - NetworkIO* outputs, NetworkIO* targets) { - // Bottom-clip outputs to a minimum probability. - CTC::NormalizeProbs(outputs); - return CTC::ComputeCTCTargets(truth_labels, null_char_, - outputs->float_array(), targets); -} - -// Computes network errors, and stores the results in the rolling buffers, -// along with the supplied text_error. -// Returns the delta error of the current sample (not running average.) -double LSTMTrainer::ComputeErrorRates(const NetworkIO& deltas, - double char_error, double word_error) { - UpdateErrorBuffer(ComputeRMSError(deltas), ET_RMS); - // Delta error is the fraction of timesteps with >0.5 error in the top choice - // score. If zero, then the top choice characters are guaranteed correct, - // even when there is residue in the RMS error. - double delta_error = ComputeWinnerError(deltas); - UpdateErrorBuffer(delta_error, ET_DELTA); - UpdateErrorBuffer(word_error, ET_WORD_RECERR); - UpdateErrorBuffer(char_error, ET_CHAR_ERROR); - // Skip ratio measures the difference between sample_iteration_ and - // training_iteration_, which reflects the number of unusable samples, - // usually due to unencodable truth text, or the text not fitting in the - // space for the output. - double skip_count = sample_iteration_ - prev_sample_iteration_; - UpdateErrorBuffer(skip_count, ET_SKIP_RATIO); - return delta_error; -} - -// Computes the network activation RMS error rate. -double LSTMTrainer::ComputeRMSError(const NetworkIO& deltas) { - double total_error = 0.0; - int width = deltas.Width(); - int num_classes = deltas.NumFeatures(); - for (int t = 0; t < width; ++t) { - const float* class_errs = deltas.f(t); - for (int c = 0; c < num_classes; ++c) { - double error = class_errs[c]; - total_error += error * error; - } - } - return sqrt(total_error / (width * num_classes)); -} - -// Computes network activation winner error rate. (Number of values that are -// in error by >= 0.5 divided by number of time-steps.) More closely related -// to final character error than RMS, but still directly calculable from -// just the deltas. Because of the binary nature of the targets, zero winner -// error is a sufficient but not necessary condition for zero char error. -double LSTMTrainer::ComputeWinnerError(const NetworkIO& deltas) { - int num_errors = 0; - int width = deltas.Width(); - int num_classes = deltas.NumFeatures(); - for (int t = 0; t < width; ++t) { - const float* class_errs = deltas.f(t); - for (int c = 0; c < num_classes; ++c) { - float abs_delta = fabs(class_errs[c]); - // TODO(rays) Filtering cases where the delta is very large to cut out - // GT errors doesn't work. Find a better way or get better truth. - if (0.5 <= abs_delta) - ++num_errors; - } - } - return static_cast(num_errors) / width; -} - -// Computes a very simple bag of chars char error rate. -double LSTMTrainer::ComputeCharError(const GenericVector& truth_str, - const GenericVector& ocr_str) { - GenericVector label_counts; - label_counts.init_to_size(NumOutputs(), 0); - int truth_size = 0; - for (int i = 0; i < truth_str.size(); ++i) { - if (truth_str[i] != null_char_) { - ++label_counts[truth_str[i]]; - ++truth_size; - } - } - for (int i = 0; i < ocr_str.size(); ++i) { - if (ocr_str[i] != null_char_) { - --label_counts[ocr_str[i]]; - } - } - int char_errors = 0; - for (int i = 0; i < label_counts.size(); ++i) { - char_errors += abs(label_counts[i]); - } - if (truth_size == 0) { - return (char_errors == 0) ? 0.0 : 1.0; - } - return static_cast(char_errors) / truth_size; -} - -// Computes word recall error rate using a very simple bag of words algorithm. -// NOTE that this is destructive on both input strings. -double LSTMTrainer::ComputeWordError(STRING* truth_str, STRING* ocr_str) { - using StrMap = std::unordered_map>; - GenericVector truth_words, ocr_words; - truth_str->split(' ', &truth_words); - if (truth_words.empty()) return 0.0; - ocr_str->split(' ', &ocr_words); - StrMap word_counts; - for (int i = 0; i < truth_words.size(); ++i) { - std::string truth_word(truth_words[i].string()); - StrMap::iterator it = word_counts.find(truth_word); - if (it == word_counts.end()) - word_counts.insert(std::make_pair(truth_word, 1)); - else - ++it->second; - } - for (int i = 0; i < ocr_words.size(); ++i) { - std::string ocr_word(ocr_words[i].string()); - StrMap::iterator it = word_counts.find(ocr_word); - if (it == word_counts.end()) - word_counts.insert(std::make_pair(ocr_word, -1)); - else - --it->second; - } - int word_recall_errs = 0; - for (StrMap::const_iterator it = word_counts.begin(); it != word_counts.end(); - ++it) { - if (it->second > 0) word_recall_errs += it->second; - } - return static_cast(word_recall_errs) / truth_words.size(); -} - -// Updates the error buffer and corresponding mean of the given type with -// the new_error. -void LSTMTrainer::UpdateErrorBuffer(double new_error, ErrorTypes type) { - int index = training_iteration_ % kRollingBufferSize_; - error_buffers_[type][index] = new_error; - // Compute the mean error. - int mean_count = std::min(training_iteration_ + 1, error_buffers_[type].size()); - double buffer_sum = 0.0; - for (int i = 0; i < mean_count; ++i) buffer_sum += error_buffers_[type][i]; - double mean = buffer_sum / mean_count; - // Trim precision to 1/1000 of 1%. - error_rates_[type] = IntCastRounded(100000.0 * mean) / 1000.0; -} - -// Rolls error buffers and reports the current means. -void LSTMTrainer::RollErrorBuffers() { - prev_sample_iteration_ = sample_iteration_; - if (NewSingleError(ET_DELTA) > 0.0) - ++learning_iteration_; - else - last_perfect_training_iteration_ = training_iteration_; - ++training_iteration_; - if (debug_interval_ != 0) { - tprintf("Mean rms=%g%%, delta=%g%%, train=%g%%(%g%%), skip ratio=%g%%\n", - error_rates_[ET_RMS], error_rates_[ET_DELTA], - error_rates_[ET_CHAR_ERROR], error_rates_[ET_WORD_RECERR], - error_rates_[ET_SKIP_RATIO]); - } -} - -// Given that error_rate is either a new min or max, updates the best/worst -// error rates, and record of progress. -// Tester is an externally supplied callback function that tests on some -// data set with a given model and records the error rates in a graph. -STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate, - const GenericVector& model_data, - TestCallback tester) { - if (error_rate > best_error_rate_ - && iteration < best_iteration_ + kErrorGraphInterval) { - // Too soon to record a new point. - if (tester != nullptr && !worst_model_data_.empty()) { - mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], - worst_model_data_.size()); - return tester->Run(worst_iteration_, nullptr, mgr_, CurrentTrainingStage()); - } else { - return ""; - } - } - STRING result; - // NOTE: there are 2 asymmetries here: - // 1. We are computing the global minimum, but the local maximum in between. - // 2. If the tester returns an empty string, indicating that it is busy, - // call it repeatedly on new local maxima to test the previous min, but - // not the other way around, as there is little point testing the maxima - // between very frequent minima. - if (error_rate < best_error_rate_) { - // This is a new (global) minimum. - if (tester != nullptr && !worst_model_data_.empty()) { - mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], - worst_model_data_.size()); - result = tester->Run(worst_iteration_, worst_error_rates_, mgr_, - CurrentTrainingStage()); - worst_model_data_.truncate(0); - best_model_data_ = model_data; - } - best_error_rate_ = error_rate; - memcpy(best_error_rates_, error_rates_, sizeof(error_rates_)); - best_iteration_ = iteration; - best_error_history_.push_back(error_rate); - best_error_iterations_.push_back(iteration); - // Compute 2% decay time. - double two_percent_more = error_rate + 2.0; - int i; - for (i = best_error_history_.size() - 1; - i >= 0 && best_error_history_[i] < two_percent_more; --i) { - } - int old_iteration = i >= 0 ? best_error_iterations_[i] : 0; - improvement_steps_ = iteration - old_iteration; - tprintf("2 Percent improvement time=%d, best error was %g @ %d\n", - improvement_steps_, i >= 0 ? best_error_history_[i] : 100.0, - old_iteration); - } else if (error_rate > best_error_rate_) { - // This is a new (local) maximum. - if (tester != nullptr) { - if (!best_model_data_.empty()) { - mgr_.OverwriteEntry(TESSDATA_LSTM, &best_model_data_[0], - best_model_data_.size()); - result = tester->Run(best_iteration_, best_error_rates_, mgr_, - CurrentTrainingStage()); - } else if (!worst_model_data_.empty()) { - // Allow for multiple data points with "worst" error rate. - mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], - worst_model_data_.size()); - result = tester->Run(worst_iteration_, worst_error_rates_, mgr_, - CurrentTrainingStage()); - } - if (result.length() > 0) - best_model_data_.truncate(0); - worst_model_data_ = model_data; - } - } - worst_error_rate_ = error_rate; - memcpy(worst_error_rates_, error_rates_, sizeof(error_rates_)); - worst_iteration_ = iteration; - return result; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstmtrainer.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstmtrainer.h deleted file mode 100644 index 82a8c9b8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/lstmtrainer.h +++ /dev/null @@ -1,488 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: lstmtrainer.h -// Description: Top-level line trainer class for LSTM-based networks. -// Author: Ray Smith -// Created: Fri May 03 09:07:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_LSTMTRAINER_H_ -#define TESSERACT_LSTM_LSTMTRAINER_H_ - -#include "imagedata.h" -#include "lstmrecognizer.h" -#include "rect.h" -#include "tesscallback.h" - -namespace tesseract { - -class LSTM; -class LSTMTrainer; -class Parallel; -class Reversed; -class Softmax; -class Series; - -// Enum for the types of errors that are counted. -enum ErrorTypes { - ET_RMS, // RMS activation error. - ET_DELTA, // Number of big errors in deltas. - ET_WORD_RECERR, // Output text string word recall error. - ET_CHAR_ERROR, // Output text string total char error. - ET_SKIP_RATIO, // Fraction of samples skipped. - ET_COUNT // For array sizing. -}; - -// Enum for the trainability_ flags. -enum Trainability { - TRAINABLE, // Non-zero delta error. - PERFECT, // Zero delta error. - UNENCODABLE, // Not trainable due to coding/alignment trouble. - HI_PRECISION_ERR, // Hi confidence disagreement. - NOT_BOXED, // Early in training and has no character boxes. -}; - -// Enum to define the amount of data to get serialized. -enum SerializeAmount { - LIGHT, // Minimal data for remote training. - NO_BEST_TRAINER, // Save an empty vector in place of best_trainer_. - FULL, // All data including best_trainer_. -}; - -// Enum to indicate how the sub_trainer_ training went. -enum SubTrainerResult { - STR_NONE, // Did nothing as not good enough. - STR_UPDATED, // Subtrainer was updated, but didn't replace *this. - STR_REPLACED // Subtrainer replaced *this. -}; - -class LSTMTrainer; -// Function to restore the trainer state from a given checkpoint. -// Returns false on failure. -typedef TessResultCallback2&, LSTMTrainer*>* - CheckPointReader; -// Function to save a checkpoint of the current trainer state. -// Returns false on failure. SerializeAmount determines the amount of the -// trainer to serialize, typically used for saving the best state. -typedef TessResultCallback3*>* CheckPointWriter; -// Function to compute and record error rates on some external test set(s). -// Args are: iteration, mean errors, model, training stage. -// Returns a STRING containing logging information about the tests. -typedef TessResultCallback4* TestCallback; - -// Trainer class for LSTM networks. Most of the effort is in creating the -// ideal target outputs from the transcription. A box file is used if it is -// available, otherwise estimates of the char widths from the unicharset are -// used to guide a DP search for the best fit to the transcription. -class LSTMTrainer : public LSTMRecognizer { - public: - LSTMTrainer(); - // Callbacks may be null, in which case defaults are used. - LSTMTrainer(FileReader file_reader, FileWriter file_writer, - CheckPointReader checkpoint_reader, - CheckPointWriter checkpoint_writer, - const char* model_base, const char* checkpoint_name, - int debug_interval, int64_t max_memory); - virtual ~LSTMTrainer(); - - // Tries to deserialize a trainer from the given file and silently returns - // false in case of failure. If old_traineddata is not null, then it is - // assumed that the character set is to be re-mapped from old_traineddata to - // the new, with consequent change in weight matrices etc. - bool TryLoadingCheckpoint(const char* filename, const char* old_traineddata); - - // Initializes the character set encode/decode mechanism directly from a - // previously setup traineddata containing dawgs, UNICHARSET and - // UnicharCompress. Note: Call before InitNetwork! - void InitCharSet(const std::string& traineddata_path) { - ASSERT_HOST(mgr_.Init(traineddata_path.c_str())); - InitCharSet(); - } - void InitCharSet(const TessdataManager& mgr) { - mgr_ = mgr; - InitCharSet(); - } - - // Initializes the trainer with a network_spec in the network description - // net_flags control network behavior according to the NetworkFlags enum. - // There isn't really much difference between them - only where the effects - // are implemented. - // For other args see NetworkBuilder::InitNetwork. - // Note: Be sure to call InitCharSet before InitNetwork! - bool InitNetwork(const STRING& network_spec, int append_index, int net_flags, - float weight_range, float learning_rate, float momentum, - float adam_beta); - // Initializes a trainer from a serialized TFNetworkModel proto. - // Returns the global step of TensorFlow graph or 0 if failed. - // Building a compatible TF graph: See tfnetwork.proto. - int InitTensorFlowNetwork(const std::string& tf_proto); - // Resets all the iteration counters for fine tuning or training a head, - // where we want the error reporting to reset. - void InitIterations(); - - // Accessors. - double ActivationError() const { - return error_rates_[ET_DELTA]; - } - double CharError() const { return error_rates_[ET_CHAR_ERROR]; } - const double* error_rates() const { - return error_rates_; - } - double best_error_rate() const { - return best_error_rate_; - } - int best_iteration() const { - return best_iteration_; - } - int learning_iteration() const { return learning_iteration_; } - int32_t improvement_steps() const { return improvement_steps_; } - void set_perfect_delay(int delay) { perfect_delay_ = delay; } - const GenericVector& best_trainer() const { return best_trainer_; } - // Returns the error that was just calculated by PrepareForBackward. - double NewSingleError(ErrorTypes type) const { - return error_buffers_[type][training_iteration() % kRollingBufferSize_]; - } - // Returns the error that was just calculated by TrainOnLine. Since - // TrainOnLine rolls the error buffers, this is one further back than - // NewSingleError. - double LastSingleError(ErrorTypes type) const { - return error_buffers_[type] - [(training_iteration() + kRollingBufferSize_ - 1) % - kRollingBufferSize_]; - } - const DocumentCache& training_data() const { - return training_data_; - } - DocumentCache* mutable_training_data() { return &training_data_; } - - // If the training sample is usable, grid searches for the optimal - // dict_ratio/cert_offset, and returns the results in a string of space- - // separated triplets of ratio,offset=worderr. - Trainability GridSearchDictParams( - const ImageData* trainingdata, int iteration, double min_dict_ratio, - double dict_ratio_step, double max_dict_ratio, double min_cert_offset, - double cert_offset_step, double max_cert_offset, STRING* results); - - // Provides output on the distribution of weight values. - void DebugNetwork(); - - // Loads a set of lstmf files that were created using the lstm.train config to - // tesseract into memory ready for training. Returns false if nothing was - // loaded. - bool LoadAllTrainingData(const GenericVector& filenames, - CachingStrategy cache_strategy, - bool randomly_rotate); - - // Keeps track of best and locally worst error rate, using internally computed - // values. See MaintainCheckpointsSpecific for more detail. - bool MaintainCheckpoints(TestCallback tester, STRING* log_msg); - // Keeps track of best and locally worst error_rate (whatever it is) and - // launches tests using rec_model, when a new min or max is reached. - // Writes checkpoints using train_model at appropriate times and builds and - // returns a log message to indicate progress. Returns false if nothing - // interesting happened. - bool MaintainCheckpointsSpecific(int iteration, - const GenericVector* train_model, - const GenericVector* rec_model, - TestCallback tester, STRING* log_msg); - // Builds a string containing a progress message with current error rates. - void PrepareLogMsg(STRING* log_msg) const; - // Appends iteration learning_iteration()/training_iteration()/ - // sample_iteration() to the log_msg. - void LogIterations(const char* intro_str, STRING* log_msg) const; - - // TODO(rays) Add curriculum learning. - // Returns true and increments the training_stage_ if the error rate has just - // passed through the given threshold for the first time. - bool TransitionTrainingStage(float error_threshold); - // Returns the current training stage. - int CurrentTrainingStage() const { return training_stage_; } - - // Writes to the given file. Returns false in case of error. - bool Serialize(SerializeAmount serialize_amount, - const TessdataManager* mgr, TFile* fp) const; - // Reads from the given file. Returns false in case of error. - bool DeSerialize(const TessdataManager* mgr, TFile* fp); - - // De-serializes the saved best_trainer_ into sub_trainer_, and adjusts the - // learning rates (by scaling reduction, or layer specific, according to - // NF_LAYER_SPECIFIC_LR). - void StartSubtrainer(STRING* log_msg); - // While the sub_trainer_ is behind the current training iteration and its - // training error is at least kSubTrainerMarginFraction better than the - // current training error, trains the sub_trainer_, and returns STR_UPDATED if - // it did anything. If it catches up, and has a better error rate than the - // current best, as well as a margin over the current error rate, then the - // trainer in *this is replaced with sub_trainer_, and STR_REPLACED is - // returned. STR_NONE is returned if the subtrainer wasn't good enough to - // receive any training iterations. - SubTrainerResult UpdateSubtrainer(STRING* log_msg); - // Reduces network learning rates, either for everything, or for layers - // independently, according to NF_LAYER_SPECIFIC_LR. - void ReduceLearningRates(LSTMTrainer* samples_trainer, STRING* log_msg); - // Considers reducing the learning rate independently for each layer down by - // factor(<1), or leaving it the same, by double-training the given number of - // samples and minimizing the amount of changing of sign of weight updates. - // Even if it looks like all weights should remain the same, an adjustment - // will be made to guarantee a different result when reverting to an old best. - // Returns the number of layer learning rates that were reduced. - int ReduceLayerLearningRates(double factor, int num_samples, - LSTMTrainer* samples_trainer); - - // Converts the string to integer class labels, with appropriate null_char_s - // in between if not in SimpleTextOutput mode. Returns false on failure. - bool EncodeString(const STRING& str, GenericVector* labels) const { - return EncodeString(str, GetUnicharset(), IsRecoding() ? &recoder_ : nullptr, - SimpleTextOutput(), null_char_, labels); - } - // Static version operates on supplied unicharset, encoder, simple_text. - static bool EncodeString(const STRING& str, const UNICHARSET& unicharset, - const UnicharCompress* recoder, bool simple_text, - int null_char, GenericVector* labels); - - // Performs forward-backward on the given trainingdata. - // Returns the sample that was used or nullptr if the next sample was deemed - // unusable. samples_trainer could be this or an alternative trainer that - // holds the training samples. - const ImageData* TrainOnLine(LSTMTrainer* samples_trainer, bool batch) { - int sample_index = sample_iteration(); - const ImageData* image = - samples_trainer->training_data_.GetPageBySerial(sample_index); - if (image != nullptr) { - Trainability trainable = TrainOnLine(image, batch); - if (trainable == UNENCODABLE || trainable == NOT_BOXED) { - return nullptr; // Sample was unusable. - } - } else { - ++sample_iteration_; - } - return image; - } - Trainability TrainOnLine(const ImageData* trainingdata, bool batch); - - // Prepares the ground truth, runs forward, and prepares the targets. - // Returns a Trainability enum to indicate the suitability of the sample. - Trainability PrepareForBackward(const ImageData* trainingdata, - NetworkIO* fwd_outputs, NetworkIO* targets); - - // Writes the trainer to memory, so that the current training state can be - // restored. *this must always be the master trainer that retains the only - // copy of the training data and language model. trainer is the model that is - // actually serialized. - bool SaveTrainingDump(SerializeAmount serialize_amount, - const LSTMTrainer* trainer, - GenericVector* data) const; - - // Reads previously saved trainer from memory. *this must always be the - // master trainer that retains the only copy of the training data and - // language model. trainer is the model that is restored. - bool ReadTrainingDump(const GenericVector& data, - LSTMTrainer* trainer) const { - if (data.empty()) return false; - return ReadSizedTrainingDump(&data[0], data.size(), trainer); - } - bool ReadSizedTrainingDump(const char* data, int size, - LSTMTrainer* trainer) const { - return trainer->ReadLocalTrainingDump(&mgr_, data, size); - } - // Restores the model to *this. - bool ReadLocalTrainingDump(const TessdataManager* mgr, const char* data, - int size); - - // Sets up the data for MaintainCheckpoints from a light ReadTrainingDump. - void SetupCheckpointInfo(); - - // Writes the full recognition traineddata to the given filename. - bool SaveTraineddata(const STRING& filename); - - // Writes the recognizer to memory, so that it can be used for testing later. - void SaveRecognitionDump(GenericVector* data) const; - - // Returns a suitable filename for a training dump, based on the model_base_, - // the iteration and the error rates. - STRING DumpFilename() const; - - // Fills the whole error buffer of the given type with the given value. - void FillErrorBuffer(double new_error, ErrorTypes type); - // Helper generates a map from each current recoder_ code (ie softmax index) - // to the corresponding old_recoder code, or -1 if there isn't one. - std::vector MapRecoder(const UNICHARSET& old_chset, - const UnicharCompress& old_recoder) const; - - protected: - // Private version of InitCharSet above finishes the job after initializing - // the mgr_ data member. - void InitCharSet(); - // Helper computes and sets the null_char_. - void SetNullChar(); - - // Factored sub-constructor sets up reasonable default values. - void EmptyConstructor(); - - // Outputs the string and periodically displays the given network inputs - // as an image in the given window, and the corresponding labels at the - // corresponding x_starts. - // Returns false if the truth string is empty. - bool DebugLSTMTraining(const NetworkIO& inputs, - const ImageData& trainingdata, - const NetworkIO& fwd_outputs, - const GenericVector& truth_labels, - const NetworkIO& outputs); - // Displays the network targets as line a line graph. - void DisplayTargets(const NetworkIO& targets, const char* window_name, - ScrollView** window); - - // Builds a no-compromises target where the first positions should be the - // truth labels and the rest is padded with the null_char_. - bool ComputeTextTargets(const NetworkIO& outputs, - const GenericVector& truth_labels, - NetworkIO* targets); - - // Builds a target using standard CTC. truth_labels should be pre-padded with - // nulls wherever desired. They don't have to be between all labels. - // outputs is input-output, as it gets clipped to minimum probability. - bool ComputeCTCTargets(const GenericVector& truth_labels, - NetworkIO* outputs, NetworkIO* targets); - - // Computes network errors, and stores the results in the rolling buffers, - // along with the supplied text_error. - // Returns the delta error of the current sample (not running average.) - double ComputeErrorRates(const NetworkIO& deltas, double char_error, - double word_error); - - // Computes the network activation RMS error rate. - double ComputeRMSError(const NetworkIO& deltas); - - // Computes network activation winner error rate. (Number of values that are - // in error by >= 0.5 divided by number of time-steps.) More closely related - // to final character error than RMS, but still directly calculable from - // just the deltas. Because of the binary nature of the targets, zero winner - // error is a sufficient but not necessary condition for zero char error. - double ComputeWinnerError(const NetworkIO& deltas); - - // Computes a very simple bag of chars char error rate. - double ComputeCharError(const GenericVector& truth_str, - const GenericVector& ocr_str); - // Computes a very simple bag of words word recall error rate. - // NOTE that this is destructive on both input strings. - double ComputeWordError(STRING* truth_str, STRING* ocr_str); - - // Updates the error buffer and corresponding mean of the given type with - // the new_error. - void UpdateErrorBuffer(double new_error, ErrorTypes type); - - // Rolls error buffers and reports the current means. - void RollErrorBuffers(); - - // Given that error_rate is either a new min or max, updates the best/worst - // error rates, and record of progress. - STRING UpdateErrorGraph(int iteration, double error_rate, - const GenericVector& model_data, - TestCallback tester); - - protected: - // Alignment display window. - ScrollView* align_win_; - // CTC target display window. - ScrollView* target_win_; - // CTC output display window. - ScrollView* ctc_win_; - // Reconstructed image window. - ScrollView* recon_win_; - // How often to display a debug image. - int debug_interval_; - // Iteration at which the last checkpoint was dumped. - int checkpoint_iteration_; - // Basename of files to save best models to. - STRING model_base_; - // Checkpoint filename. - STRING checkpoint_name_; - // Training data. - bool randomly_rotate_; - DocumentCache training_data_; - // Name to use when saving best_trainer_. - STRING best_model_name_; - // Number of available training stages. - int num_training_stages_; - // Checkpointing callbacks. - FileReader file_reader_; - FileWriter file_writer_; - // TODO(rays) These are pointers, and must be deleted. Switch to unique_ptr - // when we can commit to c++11. - CheckPointReader checkpoint_reader_; - CheckPointWriter checkpoint_writer_; - - // ===Serialized data to ensure that a restart produces the same results.=== - // These members are only serialized when serialize_amount != LIGHT. - // Best error rate so far. - double best_error_rate_; - // Snapshot of all error rates at best_iteration_. - double best_error_rates_[ET_COUNT]; - // Iteration of best_error_rate_. - int best_iteration_; - // Worst error rate since best_error_rate_. - double worst_error_rate_; - // Snapshot of all error rates at worst_iteration_. - double worst_error_rates_[ET_COUNT]; - // Iteration of worst_error_rate_. - int worst_iteration_; - // Iteration at which the process will be thought stalled. - int stall_iteration_; - // Saved recognition models for computing test error for graph points. - GenericVector best_model_data_; - GenericVector worst_model_data_; - // Saved trainer for reverting back to last known best. - GenericVector best_trainer_; - // A subsidiary trainer running with a different learning rate until either - // *this or sub_trainer_ hits a new best. - LSTMTrainer* sub_trainer_; - // Error rate at which last best model was dumped. - float error_rate_of_last_saved_best_; - // Current stage of training. - int training_stage_; - // History of best error rate against iteration. Used for computing the - // number of steps to each 2% improvement. - GenericVector best_error_history_; - GenericVector best_error_iterations_; - // Number of iterations since the best_error_rate_ was 2% more than it is now. - int32_t improvement_steps_; - // Number of iterations that yielded a non-zero delta error and thus provided - // significant learning. learning_iteration_ <= training_iteration_. - // learning_iteration_ is used to measure rate of learning progress. - int learning_iteration_; - // Saved value of sample_iteration_ before looking for the the next sample. - int prev_sample_iteration_; - // How often to include a PERFECT training sample in backprop. - // A PERFECT training sample is used if the current - // training_iteration_ > last_perfect_training_iteration_ + perfect_delay_, - // so with perfect_delay_ == 0, all samples are used, and with - // perfect_delay_ == 4, at most 1 in 5 samples will be perfect. - int perfect_delay_; - // Value of training_iteration_ at which the last PERFECT training sample - // was used in back prop. - int last_perfect_training_iteration_; - // Rolling buffers storing recent training errors are indexed by - // training_iteration % kRollingBufferSize_. - static const int kRollingBufferSize_ = 1000; - GenericVector error_buffers_[ET_COUNT]; - // Rounded mean percent trailing training errors in the buffers. - double error_rates_[ET_COUNT]; // RMS training error. - // Traineddata file with optional dawgs + UNICHARSET and recoder. - TessdataManager mgr_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_LSTM_LSTMTRAINER_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/maxpool.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/maxpool.cpp deleted file mode 100644 index 5ac26202..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/maxpool.cpp +++ /dev/null @@ -1,82 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: maxpool.cpp -// Description: Standard Max-Pooling layer. -// Author: Ray Smith -// Created: Tue Mar 18 16:28:18 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "maxpool.h" -#include "tprintf.h" - -namespace tesseract { - -Maxpool::Maxpool(const STRING& name, int ni, int x_scale, int y_scale) - : Reconfig(name, ni, x_scale, y_scale) { - type_ = NT_MAXPOOL; - no_ = ni; -} - -// Reads from the given file. Returns false in case of error. -bool Maxpool::DeSerialize(TFile* fp) { - bool result = Reconfig::DeSerialize(fp); - no_ = ni_; - return result; -} - -// Runs forward propagation of activations on the input line. -// See NetworkCpp for a detailed discussion of the arguments. -void Maxpool::Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) { - output->ResizeScaled(input, x_scale_, y_scale_, no_); - maxes_.ResizeNoInit(output->Width(), ni_); - back_map_ = input.stride_map(); - - StrideMap::Index dest_index(output->stride_map()); - do { - int out_t = dest_index.t(); - StrideMap::Index src_index(input.stride_map(), dest_index.index(FD_BATCH), - dest_index.index(FD_HEIGHT) * y_scale_, - dest_index.index(FD_WIDTH) * x_scale_); - // Find the max input out of x_scale_ groups of y_scale_ inputs. - // Do it independently for each input dimension. - int* max_line = maxes_[out_t]; - int in_t = src_index.t(); - output->CopyTimeStepFrom(out_t, input, in_t); - for (int i = 0; i < ni_; ++i) { - max_line[i] = in_t; - } - for (int x = 0; x < x_scale_; ++x) { - for (int y = 0; y < y_scale_; ++y) { - StrideMap::Index src_xy(src_index); - if (src_xy.AddOffset(x, FD_WIDTH) && src_xy.AddOffset(y, FD_HEIGHT)) { - output->MaxpoolTimeStep(out_t, input, src_xy.t(), max_line); - } - } - } - } while (dest_index.Increment()); -} - -// Runs backward propagation of errors on the deltas line. -// See NetworkCpp for a detailed discussion of the arguments. -bool Maxpool::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { - back_deltas->ResizeToMap(fwd_deltas.int_mode(), back_map_, ni_); - back_deltas->MaxpoolBackward(fwd_deltas, maxes_); - return true; -} - - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/maxpool.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/maxpool.h deleted file mode 100644 index 90ddc708..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/maxpool.h +++ /dev/null @@ -1,69 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: maxpool.h -// Description: Standard Max-Pooling layer. -// Author: Ray Smith -// Created: Tue Mar 18 16:28:18 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_MAXPOOL_H_ -#define TESSERACT_LSTM_MAXPOOL_H_ - -#include "reconfig.h" - -namespace tesseract { - -// Maxpooling reduction. Independently for each input, selects the location -// in the rectangle that contains the max value. -// Backprop propagates only to the position that was the max. -class Maxpool : public Reconfig { - public: - Maxpool(const STRING& name, int ni, int x_scale, int y_scale); - virtual ~Maxpool() = default; - - // Accessors. - STRING spec() const override { - STRING spec; - spec.add_str_int("Mp", y_scale_); - spec.add_str_int(",", x_scale_); - return spec; - } - - // Reads from the given file. Returns false in case of error. - bool DeSerialize(TFile* fp) override; - - // Runs forward propagation of activations on the input line. - // See Network for a detailed discussion of the arguments. - void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) override; - - // Runs backward propagation of errors on the deltas line. - // See Network for a detailed discussion of the arguments. - bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) override; - - private: - // Memory of which input was the max. - GENERIC_2D_ARRAY maxes_; -}; - - -} // namespace tesseract. - - - - - -#endif // TESSERACT_LSTM_MAXPOOL_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/network.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/network.cpp deleted file mode 100644 index eaa9baff..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/network.cpp +++ /dev/null @@ -1,337 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: network.cpp -// Description: Base class for neural network implementations. -// Author: Ray Smith -// Created: Wed May 01 17:25:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "network.h" - -#include - -// This base class needs to know about all its sub-classes because of the -// factory deserializing method: CreateFromFile. -#include "allheaders.h" -#include "convolve.h" -#include "fullyconnected.h" -#include "input.h" -#include "lstm.h" -#include "maxpool.h" -#include "parallel.h" -#include "reconfig.h" -#include "reversed.h" -#include "scrollview.h" -#include "series.h" -#include "statistc.h" -#ifdef INCLUDE_TENSORFLOW -#include "tfnetwork.h" -#endif -#include "tprintf.h" - -namespace tesseract { - -// Min and max window sizes. -const int kMinWinSize = 500; -const int kMaxWinSize = 2000; -// Window frame sizes need adding on to make the content fit. -const int kXWinFrameSize = 30; -const int kYWinFrameSize = 80; - -// String names corresponding to the NetworkType enum. Keep in sync. -// Names used in Serialization to allow re-ordering/addition/deletion of -// layer types in NetworkType without invalidating existing network files. -char const* const Network::kTypeNames[NT_COUNT] = { - "Invalid", "Input", - "Convolve", "Maxpool", - "Parallel", "Replicated", - "ParBidiLSTM", "DepParUDLSTM", - "Par2dLSTM", "Series", - "Reconfig", "RTLReversed", - "TTBReversed", "XYTranspose", - "LSTM", "SummLSTM", - "Logistic", "LinLogistic", - "LinTanh", "Tanh", - "Relu", "Linear", - "Softmax", "SoftmaxNoCTC", - "LSTMSoftmax", "LSTMBinarySoftmax", - "TensorFlow", -}; - -Network::Network() - : type_(NT_NONE), - training_(TS_ENABLED), - needs_to_backprop_(true), - network_flags_(0), - ni_(0), - no_(0), - num_weights_(0), - forward_win_(nullptr), - backward_win_(nullptr), - randomizer_(nullptr) {} -Network::Network(NetworkType type, const STRING& name, int ni, int no) - : type_(type), - training_(TS_ENABLED), - needs_to_backprop_(true), - network_flags_(0), - ni_(ni), - no_(no), - num_weights_(0), - name_(name), - forward_win_(nullptr), - backward_win_(nullptr), - randomizer_(nullptr) {} - - -// Suspends/Enables/Permanently disables training by setting the training_ -// flag. Serialize and DeSerialize only operate on the run-time data if state -// is TS_DISABLED or TS_TEMP_DISABLE. Specifying TS_TEMP_DISABLE will -// temporarily disable layers in state TS_ENABLED, allowing a trainer to -// serialize as if it were a recognizer. -// TS_RE_ENABLE will re-enable layers that were previously in any disabled -// state. If in TS_TEMP_DISABLE then the flag is just changed, but if in -// TS_DISABLED, the deltas in the weight matrices are reinitialized so that a -// recognizer can be converted back to a trainer. -void Network::SetEnableTraining(TrainingState state) { - if (state == TS_RE_ENABLE) { - // Enable only from temp disabled. - if (training_ == TS_TEMP_DISABLE) training_ = TS_ENABLED; - } else if (state == TS_TEMP_DISABLE) { - // Temp disable only from enabled. - if (training_ == TS_ENABLED) training_ = state; - } else { - training_ = state; - } -} - -// Sets flags that control the action of the network. See NetworkFlags enum -// for bit values. -void Network::SetNetworkFlags(uint32_t flags) { - network_flags_ = flags; -} - -// Sets up the network for training. Initializes weights using weights of -// scale `range` picked according to the random number generator `randomizer`. -int Network::InitWeights(float range, TRand* randomizer) { - randomizer_ = randomizer; - return 0; -} - -// Provides a pointer to a TRand for any networks that care to use it. -// Note that randomizer is a borrowed pointer that should outlive the network -// and should not be deleted by any of the networks. -void Network::SetRandomizer(TRand* randomizer) { - randomizer_ = randomizer; -} - -// Sets needs_to_backprop_ to needs_backprop and returns true if -// needs_backprop || any weights in this network so the next layer forward -// can be told to produce backprop for this layer if needed. -bool Network::SetupNeedsBackprop(bool needs_backprop) { - needs_to_backprop_ = needs_backprop; - return needs_backprop || num_weights_ > 0; -} - -// Writes to the given file. Returns false in case of error. -bool Network::Serialize(TFile* fp) const { - int8_t data = NT_NONE; - if (!fp->Serialize(&data)) return false; - STRING type_name = kTypeNames[type_]; - if (!type_name.Serialize(fp)) return false; - data = training_; - if (!fp->Serialize(&data)) return false; - data = needs_to_backprop_; - if (!fp->Serialize(&data)) return false; - if (!fp->Serialize(&network_flags_)) return false; - if (!fp->Serialize(&ni_)) return false; - if (!fp->Serialize(&no_)) return false; - if (!fp->Serialize(&num_weights_)) return false; - if (!name_.Serialize(fp)) return false; - return true; -} - -// Reads from the given file. Returns false in case of error. -// Should be overridden by subclasses, but NOT called by their DeSerialize. -bool Network::DeSerialize(TFile* fp) { - int8_t data; - if (!fp->DeSerialize(&data)) return false; - if (data == NT_NONE) { - STRING type_name; - if (!type_name.DeSerialize(fp)) return false; - for (data = 0; data < NT_COUNT && type_name != kTypeNames[data]; ++data) { - } - if (data == NT_COUNT) { - tprintf("Invalid network layer type:%s\n", type_name.string()); - return false; - } - } - type_ = static_cast(data); - if (!fp->DeSerialize(&data)) return false; - training_ = data == TS_ENABLED ? TS_ENABLED : TS_DISABLED; - if (!fp->DeSerialize(&data)) return false; - needs_to_backprop_ = data != 0; - if (!fp->DeSerialize(&network_flags_)) return false; - if (!fp->DeSerialize(&ni_)) return false; - if (!fp->DeSerialize(&no_)) return false; - if (!fp->DeSerialize(&num_weights_)) return false; - if (!name_.DeSerialize(fp)) return false; - return true; -} - -// Reads from the given file. Returns nullptr in case of error. -// Determines the type of the serialized class and calls its DeSerialize -// on a new object of the appropriate type, which is returned. -Network* Network::CreateFromFile(TFile* fp) { - Network stub; - if (!stub.DeSerialize(fp)) return nullptr; - Network* network = nullptr; - switch (stub.type_) { - case NT_CONVOLVE: - network = new Convolve(stub.name_, stub.ni_, 0, 0); - break; - case NT_INPUT: - network = new Input(stub.name_, stub.ni_, stub.no_); - break; - case NT_LSTM: - case NT_LSTM_SOFTMAX: - case NT_LSTM_SOFTMAX_ENCODED: - case NT_LSTM_SUMMARY: - network = - new LSTM(stub.name_, stub.ni_, stub.no_, stub.no_, false, stub.type_); - break; - case NT_MAXPOOL: - network = new Maxpool(stub.name_, stub.ni_, 0, 0); - break; - // All variants of Parallel. - case NT_PARALLEL: - case NT_REPLICATED: - case NT_PAR_RL_LSTM: - case NT_PAR_UD_LSTM: - case NT_PAR_2D_LSTM: - network = new Parallel(stub.name_, stub.type_); - break; - case NT_RECONFIG: - network = new Reconfig(stub.name_, stub.ni_, 0, 0); - break; - // All variants of reversed. - case NT_XREVERSED: - case NT_YREVERSED: - case NT_XYTRANSPOSE: - network = new Reversed(stub.name_, stub.type_); - break; - case NT_SERIES: - network = new Series(stub.name_); - break; - case NT_TENSORFLOW: -#ifdef INCLUDE_TENSORFLOW - network = new TFNetwork(stub.name_); -#else - tprintf("TensorFlow not compiled in! -DINCLUDE_TENSORFLOW\n"); -#endif - break; - // All variants of FullyConnected. - case NT_SOFTMAX: - case NT_SOFTMAX_NO_CTC: - case NT_RELU: - case NT_TANH: - case NT_LINEAR: - case NT_LOGISTIC: - case NT_POSCLIP: - case NT_SYMCLIP: - network = new FullyConnected(stub.name_, stub.ni_, stub.no_, stub.type_); - break; - default: - break; - } - if (network) { - network->training_ = stub.training_; - network->needs_to_backprop_ = stub.needs_to_backprop_; - network->network_flags_ = stub.network_flags_; - network->num_weights_ = stub.num_weights_; - if (!network->DeSerialize(fp)) { - delete network; - network = nullptr; - } - } - return network; -} - -// Returns a random number in [-range, range]. -double Network::Random(double range) { - ASSERT_HOST(randomizer_ != nullptr); - return randomizer_->SignedRand(range); -} - -// === Debug image display methods. === -// Displays the image of the matrix to the forward window. -void Network::DisplayForward(const NetworkIO& matrix) { -#ifndef GRAPHICS_DISABLED // do nothing if there's no graphics - Pix* image = matrix.ToPix(); - ClearWindow(false, name_.string(), pixGetWidth(image), - pixGetHeight(image), &forward_win_); - DisplayImage(image, forward_win_); - forward_win_->Update(); -#endif // GRAPHICS_DISABLED -} - -// Displays the image of the matrix to the backward window. -void Network::DisplayBackward(const NetworkIO& matrix) { -#ifndef GRAPHICS_DISABLED // do nothing if there's no graphics - Pix* image = matrix.ToPix(); - STRING window_name = name_ + "-back"; - ClearWindow(false, window_name.string(), pixGetWidth(image), - pixGetHeight(image), &backward_win_); - DisplayImage(image, backward_win_); - backward_win_->Update(); -#endif // GRAPHICS_DISABLED -} - -#ifndef GRAPHICS_DISABLED -// Creates the window if needed, otherwise clears it. -void Network::ClearWindow(bool tess_coords, const char* window_name, - int width, int height, ScrollView** window) { - if (*window == nullptr) { - int min_size = std::min(width, height); - if (min_size < kMinWinSize) { - if (min_size < 1) min_size = 1; - width = width * kMinWinSize / min_size; - height = height * kMinWinSize / min_size; - } - width += kXWinFrameSize; - height += kYWinFrameSize; - if (width > kMaxWinSize) width = kMaxWinSize; - if (height > kMaxWinSize) height = kMaxWinSize; - *window = new ScrollView(window_name, 80, 100, width, height, width, height, - tess_coords); - tprintf("Created window %s of size %d, %d\n", window_name, width, height); - } else { - (*window)->Clear(); - } -} - -// Displays the pix in the given window. and returns the height of the pix. -// The pix is pixDestroyed. -int Network::DisplayImage(Pix* pix, ScrollView* window) { - int height = pixGetHeight(pix); - window->Image(pix, 0, 0); - pixDestroy(&pix); - return height; -} -#endif // GRAPHICS_DISABLED - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/network.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/network.h deleted file mode 100644 index ba528f11..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/network.h +++ /dev/null @@ -1,320 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: network.h -// Description: Base class for neural network implementations. -// Author: Ray Smith -// Created: Wed May 01 16:38:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_NETWORK_H_ -#define TESSERACT_LSTM_NETWORK_H_ - -#include -#include - -#include "genericvector.h" -#include "helpers.h" -#include "matrix.h" -#include "networkio.h" -#include "serialis.h" -#include "static_shape.h" -#include "tprintf.h" - -struct Pix; -class ScrollView; -class TBOX; - -namespace tesseract { - -class ImageData; -class NetworkScratch; - -// Enum to store the run-time type of a Network. Keep in sync with kTypeNames. -enum NetworkType { - NT_NONE, // The naked base class. - NT_INPUT, // Inputs from an image. - // Plumbing networks combine other networks or rearrange the inputs. - NT_CONVOLVE, // Duplicates inputs in a sliding window neighborhood. - NT_MAXPOOL, // Chooses the max result from a rectangle. - NT_PARALLEL, // Runs networks in parallel. - NT_REPLICATED, // Runs identical networks in parallel. - NT_PAR_RL_LSTM, // Runs LTR and RTL LSTMs in parallel. - NT_PAR_UD_LSTM, // Runs Up and Down LSTMs in parallel. - NT_PAR_2D_LSTM, // Runs 4 LSTMs in parallel. - NT_SERIES, // Executes a sequence of layers. - NT_RECONFIG, // Scales the time/y size but makes the output deeper. - NT_XREVERSED, // Reverses the x direction of the inputs/outputs. - NT_YREVERSED, // Reverses the y-direction of the inputs/outputs. - NT_XYTRANSPOSE, // Transposes x and y (for just a single op). - // Functional networks actually calculate stuff. - NT_LSTM, // Long-Short-Term-Memory block. - NT_LSTM_SUMMARY, // LSTM that only keeps its last output. - NT_LOGISTIC, // Fully connected logistic nonlinearity. - NT_POSCLIP, // Fully connected rect lin version of logistic. - NT_SYMCLIP, // Fully connected rect lin version of tanh. - NT_TANH, // Fully connected with tanh nonlinearity. - NT_RELU, // Fully connected with rectifier nonlinearity. - NT_LINEAR, // Fully connected with no nonlinearity. - NT_SOFTMAX, // Softmax uses exponential normalization, with CTC. - NT_SOFTMAX_NO_CTC, // Softmax uses exponential normalization, no CTC. - // The SOFTMAX LSTMs both have an extra softmax layer on top, but inside, with - // the outputs fed back to the input of the LSTM at the next timestep. - // The ENCODED version binary encodes the softmax outputs, providing log2 of - // the number of outputs as additional inputs, and the other version just - // provides all the softmax outputs as additional inputs. - NT_LSTM_SOFTMAX, // 1-d LSTM with built-in fully connected softmax. - NT_LSTM_SOFTMAX_ENCODED, // 1-d LSTM with built-in binary encoded softmax. - // A TensorFlow graph encapsulated as a Tesseract network. - NT_TENSORFLOW, - - NT_COUNT // Array size. -}; - -// Enum of Network behavior flags. Can in theory be set for each individual -// network element. -enum NetworkFlags { - // Network forward/backprop behavior. - NF_LAYER_SPECIFIC_LR = 64, // Separate learning rate for each layer. - NF_ADAM = 128, // Weight-specific learning rate. -}; - -// State of training and desired state used in SetEnableTraining. -enum TrainingState { - // Valid states of training_. - TS_DISABLED, // Disabled permanently. - TS_ENABLED, // Enabled for backprop and to write a training dump. - // Re-enable from ANY disabled state. - TS_TEMP_DISABLE, // Temporarily disabled to write a recognition dump. - // Valid only for SetEnableTraining. - TS_RE_ENABLE, // Re-Enable from TS_TEMP_DISABLE, but not TS_DISABLED. -}; - -// Base class for network types. Not quite an abstract base class, but almost. -// Most of the time no isolated Network exists, except prior to -// deserialization. -class Network { - public: - Network(); - Network(NetworkType type, const STRING& name, int ni, int no); - virtual ~Network() = default; - - // Accessors. - NetworkType type() const { - return type_; - } - bool IsTraining() const { return training_ == TS_ENABLED; } - bool needs_to_backprop() const { - return needs_to_backprop_; - } - int num_weights() const { return num_weights_; } - int NumInputs() const { - return ni_; - } - int NumOutputs() const { - return no_; - } - // Returns the required shape input to the network. - virtual StaticShape InputShape() const { - StaticShape result; - return result; - } - // Returns the shape output from the network given an input shape (which may - // be partially unknown ie zero). - virtual StaticShape OutputShape(const StaticShape& input_shape) const { - StaticShape result(input_shape); - result.set_depth(no_); - return result; - } - const STRING& name() const { - return name_; - } - virtual STRING spec() const { - return "?"; - } - bool TestFlag(NetworkFlags flag) const { - return (network_flags_ & flag) != 0; - } - - // Initialization and administrative functions that are mostly provided - // by Plumbing. - // Returns true if the given type is derived from Plumbing, and thus contains - // multiple sub-networks that can have their own learning rate. - virtual bool IsPlumbingType() const { return false; } - - // Suspends/Enables/Permanently disables training by setting the training_ - // flag. Serialize and DeSerialize only operate on the run-time data if state - // is TS_DISABLED or TS_TEMP_DISABLE. Specifying TS_TEMP_DISABLE will - // temporarily disable layers in state TS_ENABLED, allowing a trainer to - // serialize as if it were a recognizer. - // TS_RE_ENABLE will re-enable layers that were previously in any disabled - // state. If in TS_TEMP_DISABLE then the flag is just changed, but if in - // TS_DISABLED, the deltas in the weight matrices are reinitialized so that a - // recognizer can be converted back to a trainer. - virtual void SetEnableTraining(TrainingState state); - - // Sets flags that control the action of the network. See NetworkFlags enum - // for bit values. - virtual void SetNetworkFlags(uint32_t flags); - - // Sets up the network for training. Initializes weights using weights of - // scale `range` picked according to the random number generator `randomizer`. - // Note that randomizer is a borrowed pointer that should outlive the network - // and should not be deleted by any of the networks. - // Returns the number of weights initialized. - virtual int InitWeights(float range, TRand* randomizer); - // Changes the number of outputs to the outside world to the size of the given - // code_map. Recursively searches the entire network for Softmax layers that - // have exactly old_no outputs, and operates only on those, leaving all others - // unchanged. This enables networks with multiple output layers to get all - // their softmaxes updated, but if an internal layer, uses one of those - // softmaxes for input, then the inputs will effectively be scrambled. - // TODO(rays) Fix this before any such network is implemented. - // The softmaxes are resized by copying the old weight matrix entries for each - // output from code_map[output] where non-negative, and uses the mean (over - // all outputs) of the existing weights for all outputs with negative code_map - // entries. Returns the new number of weights. - virtual int RemapOutputs(int old_no, const std::vector& code_map) { - return 0; - } - - // Converts a float network to an int network. - virtual void ConvertToInt() {} - - // Provides a pointer to a TRand for any networks that care to use it. - // Note that randomizer is a borrowed pointer that should outlive the network - // and should not be deleted by any of the networks. - virtual void SetRandomizer(TRand* randomizer); - - // Sets needs_to_backprop_ to needs_backprop and returns true if - // needs_backprop || any weights in this network so the next layer forward - // can be told to produce backprop for this layer if needed. - virtual bool SetupNeedsBackprop(bool needs_backprop); - - // Returns the most recent reduction factor that the network applied to the - // time sequence. Assumes that any 2-d is already eliminated. Used for - // scaling bounding boxes of truth data and calculating result bounding boxes. - // WARNING: if GlobalMinimax is used to vary the scale, this will return - // the last used scale factor. Call it before any forward, and it will return - // the minimum scale factor of the paths through the GlobalMinimax. - virtual int XScaleFactor() const { - return 1; - } - - // Provides the (minimum) x scale factor to the network (of interest only to - // input units) so they can determine how to scale bounding boxes. - virtual void CacheXScaleFactor(int factor) {} - - // Provides debug output on the weights. - virtual void DebugWeights() { - tprintf("Must override Network::DebugWeights for type %d\n", type_); - } - - // Writes to the given file. Returns false in case of error. - // Should be overridden by subclasses, but called by their Serialize. - virtual bool Serialize(TFile* fp) const; - // Reads from the given file. Returns false in case of error. - // Should be overridden by subclasses, but NOT called by their DeSerialize. - virtual bool DeSerialize(TFile* fp); - - // Updates the weights using the given learning rate, momentum and adam_beta. - // num_samples is used in the adam computation iff use_adam_ is true. - virtual void Update(float learning_rate, float momentum, float adam_beta, - int num_samples) {} - // Sums the products of weight updates in *this and other, splitting into - // positive (same direction) in *same and negative (different direction) in - // *changed. - virtual void CountAlternators(const Network& other, double* same, - double* changed) const {} - - // Reads from the given file. Returns nullptr in case of error. - // Determines the type of the serialized class and calls its DeSerialize - // on a new object of the appropriate type, which is returned. - static Network* CreateFromFile(TFile* fp); - - // Runs forward propagation of activations on the input line. - // Note that input and output are both 2-d arrays. - // The 1st index is the time element. In a 1-d network, it might be the pixel - // position on the textline. In a 2-d network, the linearization is defined - // by the stride_map. (See networkio.h). - // The 2nd index of input is the network inputs/outputs, and the dimension - // of the input must match NumInputs() of this network. - // The output array will be resized as needed so that its 1st dimension is - // always equal to the number of output values, and its second dimension is - // always NumOutputs(). Note that all this detail is encapsulated away inside - // NetworkIO, as are the internals of the scratch memory space used by the - // network. See networkscratch.h for that. - // If input_transpose is not nullptr, then it contains the transpose of input, - // and the caller guarantees that it will still be valid on the next call to - // backward. The callee is therefore at liberty to save the pointer and - // reference it on a call to backward. This is a bit ugly, but it makes it - // possible for a replicating parallel to calculate the input transpose once - // instead of all the replicated networks having to do it. - virtual void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) { - tprintf("Must override Network::Forward for type %d\n", type_); - } - - // Runs backward propagation of errors on fwdX_deltas. - // Note that fwd_deltas and back_deltas are both 2-d arrays as with Forward. - // Returns false if back_deltas was not set, due to there being no point in - // propagating further backwards. Thus most complete networks will always - // return false from Backward! - virtual bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { - tprintf("Must override Network::Backward for type %d\n", type_); - return false; - } - - // === Debug image display methods. === - // Displays the image of the matrix to the forward window. - void DisplayForward(const NetworkIO& matrix); - // Displays the image of the matrix to the backward window. - void DisplayBackward(const NetworkIO& matrix); - - // Creates the window if needed, otherwise clears it. - static void ClearWindow(bool tess_coords, const char* window_name, - int width, int height, ScrollView** window); - - // Displays the pix in the given window. and returns the height of the pix. - // The pix is pixDestroyed. - static int DisplayImage(Pix* pix, ScrollView* window); - - protected: - // Returns a random number in [-range, range]. - double Random(double range); - - protected: - NetworkType type_; // Type of the derived network class. - TrainingState training_; // Are we currently training? - bool needs_to_backprop_; // This network needs to output back_deltas. - int32_t network_flags_; // Behavior control flags in NetworkFlags. - int32_t ni_; // Number of input values. - int32_t no_; // Number of output values. - int32_t num_weights_; // Number of weights in this and sub-network. - STRING name_; // A unique name for this layer. - - // NOT-serialized debug data. - ScrollView* forward_win_; // Recognition debug display window. - ScrollView* backward_win_; // Training debug display window. - TRand* randomizer_; // Random number generator. - - // Static serialized name/type_ mapping. Keep in sync with NetworkType. - static char const* const kTypeNames[NT_COUNT]; -}; - - -} // namespace tesseract. - -#endif // TESSERACT_LSTM_NETWORK_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/networkbuilder.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/networkbuilder.cpp deleted file mode 100644 index fa3fce01..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/networkbuilder.cpp +++ /dev/null @@ -1,487 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: networkbuilder.cpp -// Description: Class to parse the network description language and -// build a corresponding network. -// Author: Ray Smith -// Created: Wed Jul 16 18:35:38 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "networkbuilder.h" -#include "convolve.h" -#include "fullyconnected.h" -#include "input.h" -#include "lstm.h" -#include "maxpool.h" -#include "network.h" -#include "parallel.h" -#include "reconfig.h" -#include "reversed.h" -#include "series.h" -#include "unicharset.h" - -namespace tesseract { - -// Builds a network with a network_spec in the network description -// language, to recognize a character set of num_outputs size. -// If append_index is non-negative, then *network must be non-null and the -// given network_spec will be appended to *network AFTER append_index, with -// the top of the input *network discarded. -// Note that network_spec is call by value to allow a non-const char* pointer -// into the string for BuildFromString. -// net_flags control network behavior according to the NetworkFlags enum. -// The resulting network is returned via **network. -// Returns false if something failed. -bool NetworkBuilder::InitNetwork(int num_outputs, STRING network_spec, - int append_index, int net_flags, - float weight_range, TRand* randomizer, - Network** network) { - NetworkBuilder builder(num_outputs); - Series* bottom_series = nullptr; - StaticShape input_shape; - if (append_index >= 0) { - // Split the current network after the given append_index. - ASSERT_HOST(*network != nullptr && (*network)->type() == NT_SERIES); - Series* series = static_cast(*network); - Series* top_series = nullptr; - series->SplitAt(append_index, &bottom_series, &top_series); - if (bottom_series == nullptr || top_series == nullptr) { - tprintf("Yikes! Splitting current network failed!!\n"); - return false; - } - input_shape = bottom_series->OutputShape(input_shape); - delete top_series; - } - char* str_ptr = &network_spec[0]; - *network = builder.BuildFromString(input_shape, &str_ptr); - if (*network == nullptr) return false; - (*network)->SetNetworkFlags(net_flags); - (*network)->InitWeights(weight_range, randomizer); - (*network)->SetupNeedsBackprop(false); - if (bottom_series != nullptr) { - bottom_series->AppendSeries(*network); - *network = bottom_series; - } - (*network)->CacheXScaleFactor((*network)->XScaleFactor()); - return true; -} - -// Helper skips whitespace. -static void SkipWhitespace(char** str) { - while (**str == ' ' || **str == '\t' || **str == '\n') ++*str; -} - -// Parses the given string and returns a network according to the network -// description language in networkbuilder.h -Network* NetworkBuilder::BuildFromString(const StaticShape& input_shape, - char** str) { - SkipWhitespace(str); - char code_ch = **str; - if (code_ch == '[') { - return ParseSeries(input_shape, nullptr, str); - } - if (input_shape.depth() == 0) { - // There must be an input at this point. - return ParseInput(str); - } - switch (code_ch) { - case '(': - return ParseParallel(input_shape, str); - case 'R': - return ParseR(input_shape, str); - case 'S': - return ParseS(input_shape, str); - case 'C': - return ParseC(input_shape, str); - case 'M': - return ParseM(input_shape, str); - case 'L': - return ParseLSTM(input_shape, str); - case 'F': - return ParseFullyConnected(input_shape, str); - case 'O': - return ParseOutput(input_shape, str); - default: - tprintf("Invalid network spec:%s\n", *str); - return nullptr; - } - return nullptr; -} - -// Parses an input specification and returns the result, which may include a -// series. -Network* NetworkBuilder::ParseInput(char** str) { - // There must be an input at this point. - int length = 0; - int batch, height, width, depth; - int num_converted = - sscanf(*str, "%d,%d,%d,%d%n", &batch, &height, &width, &depth, &length); - StaticShape shape; - shape.SetShape(batch, height, width, depth); - // num_converted may or may not include the length. - if (num_converted != 4 && num_converted != 5) { - tprintf("Must specify an input layer as the first layer, not %s!!\n", *str); - return nullptr; - } - *str += length; - Input* input = new Input("Input", shape); - // We want to allow [rest of net... or [rest of net... so we - // have to check explicitly for '[' here. - SkipWhitespace(str); - if (**str == '[') return ParseSeries(shape, input, str); - return input; -} - -// Parses a sequential series of networks, defined by [...]. -Network* NetworkBuilder::ParseSeries(const StaticShape& input_shape, - Input* input_layer, char** str) { - StaticShape shape = input_shape; - Series* series = new Series("Series"); - ++*str; - if (input_layer != nullptr) { - series->AddToStack(input_layer); - shape = input_layer->OutputShape(shape); - } - Network* network = nullptr; - while (**str != '\0' && **str != ']' && - (network = BuildFromString(shape, str)) != nullptr) { - shape = network->OutputShape(shape); - series->AddToStack(network); - } - if (**str != ']') { - tprintf("Missing ] at end of [Series]!\n"); - delete series; - return nullptr; - } - ++*str; - return series; -} - -// Parses a parallel set of networks, defined by (...). -Network* NetworkBuilder::ParseParallel(const StaticShape& input_shape, - char** str) { - Parallel* parallel = new Parallel("Parallel", NT_PARALLEL); - ++*str; - Network* network = nullptr; - while (**str != '\0' && **str != ')' && - (network = BuildFromString(input_shape, str)) != nullptr) { - parallel->AddToStack(network); - } - if (**str != ')') { - tprintf("Missing ) at end of (Parallel)!\n"); - delete parallel; - return nullptr; - } - ++*str; - return parallel; -} - -// Parses a network that begins with 'R'. -Network* NetworkBuilder::ParseR(const StaticShape& input_shape, char** str) { - char dir = (*str)[1]; - if (dir == 'x' || dir == 'y') { - STRING name = "Reverse"; - name += dir; - *str += 2; - Network* network = BuildFromString(input_shape, str); - if (network == nullptr) return nullptr; - Reversed* rev = - new Reversed(name, dir == 'y' ? NT_YREVERSED : NT_XREVERSED); - rev->SetNetwork(network); - return rev; - } - int replicas = strtol(*str + 1, str, 10); - if (replicas <= 0) { - tprintf("Invalid R spec!:%s\n", *str); - return nullptr; - } - Parallel* parallel = new Parallel("Replicated", NT_REPLICATED); - char* str_copy = *str; - for (int i = 0; i < replicas; ++i) { - str_copy = *str; - Network* network = BuildFromString(input_shape, &str_copy); - if (network == nullptr) { - tprintf("Invalid replicated network!\n"); - delete parallel; - return nullptr; - } - parallel->AddToStack(network); - } - *str = str_copy; - return parallel; -} - -// Parses a network that begins with 'S'. -Network* NetworkBuilder::ParseS(const StaticShape& input_shape, char** str) { - int y = strtol(*str + 1, str, 10); - if (**str == ',') { - int x = strtol(*str + 1, str, 10); - if (y <= 0 || x <= 0) { - tprintf("Invalid S spec!:%s\n", *str); - return nullptr; - } - return new Reconfig("Reconfig", input_shape.depth(), x, y); - } else if (**str == '(') { - // TODO(rays) Add Generic reshape. - tprintf("Generic reshape not yet implemented!!\n"); - return nullptr; - } - tprintf("Invalid S spec!:%s\n", *str); - return nullptr; -} - -// Helper returns the fully-connected type for the character code. -static NetworkType NonLinearity(char func) { - switch (func) { - case 's': - return NT_LOGISTIC; - case 't': - return NT_TANH; - case 'r': - return NT_RELU; - case 'l': - return NT_LINEAR; - case 'm': - return NT_SOFTMAX; - case 'p': - return NT_POSCLIP; - case 'n': - return NT_SYMCLIP; - default: - return NT_NONE; - } -} - -// Parses a network that begins with 'C'. -Network* NetworkBuilder::ParseC(const StaticShape& input_shape, char** str) { - NetworkType type = NonLinearity((*str)[1]); - if (type == NT_NONE) { - tprintf("Invalid nonlinearity on C-spec!: %s\n", *str); - return nullptr; - } - int y = 0, x = 0, d = 0; - if ((y = strtol(*str + 2, str, 10)) <= 0 || **str != ',' || - (x = strtol(*str + 1, str, 10)) <= 0 || **str != ',' || - (d = strtol(*str + 1, str, 10)) <= 0) { - tprintf("Invalid C spec!:%s\n", *str); - return nullptr; - } - if (x == 1 && y == 1) { - // No actual convolution. Just a FullyConnected on the current depth, to - // be slid over all batch,y,x. - return new FullyConnected("Conv1x1", input_shape.depth(), d, type); - } - Series* series = new Series("ConvSeries"); - Convolve* convolve = - new Convolve("Convolve", input_shape.depth(), x / 2, y / 2); - series->AddToStack(convolve); - StaticShape fc_input = convolve->OutputShape(input_shape); - series->AddToStack(new FullyConnected("ConvNL", fc_input.depth(), d, type)); - return series; -} - -// Parses a network that begins with 'M'. -Network* NetworkBuilder::ParseM(const StaticShape& input_shape, char** str) { - int y = 0, x = 0; - if ((*str)[1] != 'p' || (y = strtol(*str + 2, str, 10)) <= 0 || - **str != ',' || (x = strtol(*str + 1, str, 10)) <= 0) { - tprintf("Invalid Mp spec!:%s\n", *str); - return nullptr; - } - return new Maxpool("Maxpool", input_shape.depth(), x, y); -} - -// Parses an LSTM network, either individual, bi- or quad-directional. -Network* NetworkBuilder::ParseLSTM(const StaticShape& input_shape, char** str) { - bool two_d = false; - NetworkType type = NT_LSTM; - char* spec_start = *str; - int chars_consumed = 1; - int num_outputs = 0; - char key = (*str)[chars_consumed], dir = 'f', dim = 'x'; - if (key == 'S') { - type = NT_LSTM_SOFTMAX; - num_outputs = num_softmax_outputs_; - ++chars_consumed; - } else if (key == 'E') { - type = NT_LSTM_SOFTMAX_ENCODED; - num_outputs = num_softmax_outputs_; - ++chars_consumed; - } else if (key == '2' && (((*str)[2] == 'x' && (*str)[3] == 'y') || - ((*str)[2] == 'y' && (*str)[3] == 'x'))) { - chars_consumed = 4; - dim = (*str)[3]; - two_d = true; - } else if (key == 'f' || key == 'r' || key == 'b') { - dir = key; - dim = (*str)[2]; - if (dim != 'x' && dim != 'y') { - tprintf("Invalid dimension (x|y) in L Spec!:%s\n", *str); - return nullptr; - } - chars_consumed = 3; - if ((*str)[chars_consumed] == 's') { - ++chars_consumed; - type = NT_LSTM_SUMMARY; - } - } else { - tprintf("Invalid direction (f|r|b) in L Spec!:%s\n", *str); - return nullptr; - } - int num_states = strtol(*str + chars_consumed, str, 10); - if (num_states <= 0) { - tprintf("Invalid number of states in L Spec!:%s\n", *str); - return nullptr; - } - Network* lstm = nullptr; - if (two_d) { - lstm = BuildLSTMXYQuad(input_shape.depth(), num_states); - } else { - if (num_outputs == 0) num_outputs = num_states; - STRING name(spec_start, *str - spec_start); - lstm = new LSTM(name, input_shape.depth(), num_states, num_outputs, false, - type); - if (dir != 'f') { - Reversed* rev = new Reversed("RevLSTM", NT_XREVERSED); - rev->SetNetwork(lstm); - lstm = rev; - } - if (dir == 'b') { - name += "LTR"; - Parallel* parallel = new Parallel("BidiLSTM", NT_PAR_RL_LSTM); - parallel->AddToStack(new LSTM(name, input_shape.depth(), num_states, - num_outputs, false, type)); - parallel->AddToStack(lstm); - lstm = parallel; - } - } - if (dim == 'y') { - Reversed* rev = new Reversed("XYTransLSTM", NT_XYTRANSPOSE); - rev->SetNetwork(lstm); - lstm = rev; - } - return lstm; -} - -// Builds a set of 4 lstms with x and y reversal, running in true parallel. -Network* NetworkBuilder::BuildLSTMXYQuad(int num_inputs, int num_states) { - Parallel* parallel = new Parallel("2DLSTMQuad", NT_PAR_2D_LSTM); - parallel->AddToStack(new LSTM("L2DLTRDown", num_inputs, num_states, - num_states, true, NT_LSTM)); - Reversed* rev = new Reversed("L2DLTRXRev", NT_XREVERSED); - rev->SetNetwork(new LSTM("L2DRTLDown", num_inputs, num_states, num_states, - true, NT_LSTM)); - parallel->AddToStack(rev); - rev = new Reversed("L2DRTLYRev", NT_YREVERSED); - rev->SetNetwork( - new LSTM("L2DRTLUp", num_inputs, num_states, num_states, true, NT_LSTM)); - Reversed* rev2 = new Reversed("L2DXRevU", NT_XREVERSED); - rev2->SetNetwork(rev); - parallel->AddToStack(rev2); - rev = new Reversed("L2DXRevY", NT_YREVERSED); - rev->SetNetwork(new LSTM("L2DLTRDown", num_inputs, num_states, num_states, - true, NT_LSTM)); - parallel->AddToStack(rev); - return parallel; -} - -// Helper builds a truly (0-d) fully connected layer of the given type. -static Network* BuildFullyConnected(const StaticShape& input_shape, - NetworkType type, const STRING& name, - int depth) { - if (input_shape.height() == 0 || input_shape.width() == 0) { - tprintf("Fully connected requires positive height and width, had %d,%d\n", - input_shape.height(), input_shape.width()); - return nullptr; - } - int input_size = input_shape.height() * input_shape.width(); - int input_depth = input_size * input_shape.depth(); - Network* fc = new FullyConnected(name, input_depth, depth, type); - if (input_size > 1) { - Series* series = new Series("FCSeries"); - series->AddToStack(new Reconfig("FCReconfig", input_shape.depth(), - input_shape.width(), input_shape.height())); - series->AddToStack(fc); - fc = series; - } - return fc; -} - -// Parses a Fully connected network. -Network* NetworkBuilder::ParseFullyConnected(const StaticShape& input_shape, - char** str) { - char* spec_start = *str; - NetworkType type = NonLinearity((*str)[1]); - if (type == NT_NONE) { - tprintf("Invalid nonlinearity on F-spec!: %s\n", *str); - return nullptr; - } - int depth = strtol(*str + 2, str, 10); - if (depth <= 0) { - tprintf("Invalid F spec!:%s\n", *str); - return nullptr; - } - STRING name(spec_start, *str - spec_start); - return BuildFullyConnected(input_shape, type, name, depth); -} - -// Parses an Output spec. -Network* NetworkBuilder::ParseOutput(const StaticShape& input_shape, - char** str) { - char dims_ch = (*str)[1]; - if (dims_ch != '0' && dims_ch != '1' && dims_ch != '2') { - tprintf("Invalid dims (2|1|0) in output spec!:%s\n", *str); - return nullptr; - } - char type_ch = (*str)[2]; - if (type_ch != 'l' && type_ch != 's' && type_ch != 'c') { - tprintf("Invalid output type (l|s|c) in output spec!:%s\n", *str); - return nullptr; - } - int depth = strtol(*str + 3, str, 10); - if (depth != num_softmax_outputs_) { - tprintf("Warning: given outputs %d not equal to unicharset of %d.\n", depth, - num_softmax_outputs_); - depth = num_softmax_outputs_; - } - NetworkType type = NT_SOFTMAX; - if (type_ch == 'l') - type = NT_LOGISTIC; - else if (type_ch == 's') - type = NT_SOFTMAX_NO_CTC; - if (dims_ch == '0') { - // Same as standard fully connected. - return BuildFullyConnected(input_shape, type, "Output", depth); - } else if (dims_ch == '2') { - // We don't care if x and/or y are variable. - return new FullyConnected("Output2d", input_shape.depth(), depth, type); - } - // For 1-d y has to be fixed, and if not 1, moved to depth. - if (input_shape.height() == 0) { - tprintf("Fully connected requires fixed height!\n"); - return nullptr; - } - int input_size = input_shape.height(); - int input_depth = input_size * input_shape.depth(); - Network* fc = new FullyConnected("Output", input_depth, depth, type); - if (input_size > 1) { - Series* series = new Series("FCSeries"); - series->AddToStack(new Reconfig("FCReconfig", input_shape.depth(), 1, - input_shape.height())); - series->AddToStack(fc); - fc = series; - } - return fc; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/networkbuilder.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/networkbuilder.h deleted file mode 100644 index a405fc52..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/networkbuilder.h +++ /dev/null @@ -1,160 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: networkbuilder.h -// Description: Class to parse the network description language and -// build a corresponding network. -// Author: Ray Smith -// Created: Wed Jul 16 18:35:38 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_NETWORKBUILDER_H_ -#define TESSERACT_LSTM_NETWORKBUILDER_H_ - -#include "static_shape.h" -#include "stridemap.h" - -class STRING; -class UNICHARSET; - -namespace tesseract { - -class Input; -class Network; -class Parallel; -class TRand; - -class NetworkBuilder { - public: - explicit NetworkBuilder(int num_softmax_outputs) - : num_softmax_outputs_(num_softmax_outputs) {} - - // Builds a network with a network_spec in the network description - // language, to recognize a character set of num_outputs size. - // If append_index is non-negative, then *network must be non-null and the - // given network_spec will be appended to *network AFTER append_index, with - // the top of the input *network discarded. - // Note that network_spec is call by value to allow a non-const char* pointer - // into the string for BuildFromString. - // net_flags control network behavior according to the NetworkFlags enum. - // The resulting network is returned via **network. - // Returns false if something failed. - static bool InitNetwork(int num_outputs, STRING network_spec, - int append_index, int net_flags, float weight_range, - TRand* randomizer, Network** network); - - // Parses the given string and returns a network according to the following - // language: - // ============ Syntax of description below: ============ - // represents a number. - // represents any single network element, including (recursively) a - // [...] series or (...) parallel construct. - // (s|t|r|l|m) (regex notation) represents a single required letter. - // NOTE THAT THROUGHOUT, x and y are REVERSED from conventional mathematics, - // to use the same convention as Tensor Flow. The reason TF adopts this - // convention is to eliminate the need to transpose images on input, since - // adjacent memory locations in images increase x and then y, while adjacent - // memory locations in tensors in TF, and NetworkIO in tesseract increase the - // rightmost index first, then the next-left and so-on, like C arrays. - // ============ INPUTS ============ - // ,,, A batch of b images with height h, width w, and depth d. - // b, h and/or w may be zero, to indicate variable size. Some network layer - // (summarizing LSTM) must be used to make a variable h known. - // d may be 1 for greyscale, 3 for color. - // NOTE that throughout the constructed network, the inputs/outputs are all of - // the same [batch,height,width,depth] dimensions, even if a different size. - // ============ PLUMBING ============ - // [...] Execute ... networks in series (layers). - // (...) Execute ... networks in parallel, with their output depths added. - // R Execute d replicas of net in parallel, with their output depths - // added. - // Rx Execute with x-dimension reversal. - // Ry Execute with y-dimension reversal. - // S, Rescale 2-D input by shrink factor x,y, rearranging the data by - // increasing the depth of the input by factor xy. - // Mp, Maxpool the input, reducing the size by an (x,y) rectangle. - // ============ FUNCTIONAL UNITS ============ - // C(s|t|r|l|m),, Convolves using a (x,y) window, with no shrinkage, - // random infill, producing d outputs, then applies a non-linearity: - // s: Sigmoid, t: Tanh, r: Relu, l: Linear, m: Softmax. - // F(s|t|r|l|m) Truly fully-connected with s|t|r|l|m non-linearity and d - // outputs. Connects to every x,y,depth position of the input, reducing - // height, width to 1, producing a single vector as the output. - // Input height and width must be constant. - // For a sliding-window linear or non-linear map that connects just to the - // input depth, and leaves the input image size as-is, use a 1x1 convolution - // eg. Cr1,1,64 instead of Fr64. - // L(f|r|b)(x|y)[s] LSTM cell with n states/outputs. - // The LSTM must have one of: - // f runs the LSTM forward only. - // r runs the LSTM reversed only. - // b runs the LSTM bidirectionally. - // It will operate on either the x- or y-dimension, treating the other - // dimension independently (as if part of the batch). - // s (optional) summarizes the output in the requested dimension, - // outputting only the final step, collapsing the dimension to a - // single element. - // LS Forward-only LSTM cell in the x-direction, with built-in Softmax. - // LE Forward-only LSTM cell in the x-direction, with built-in softmax, - // with binary Encoding. - // L2xy Full 2-d LSTM operating in quad-directions (bidi in x and y) and - // all the output depths added. - // ============ OUTPUTS ============ - // The network description must finish with an output specification: - // O(2|1|0)(l|s|c) output layer with n classes - // 2 (heatmap) Output is a 2-d vector map of the input (possibly at - // different scale). - // 1 (sequence) Output is a 1-d sequence of vector values. - // 0 (category) Output is a 0-d single vector value. - // l uses a logistic non-linearity on the output, allowing multiple - // hot elements in any output vector value. - // s uses a softmax non-linearity, with one-hot output in each value. - // c uses a softmax with CTC. Can only be used with s (sequence). - // NOTE1: Only O1s and O1c are currently supported. - // NOTE2: n is totally ignored, and for compatibility purposes only. The - // output number of classes is obtained automatically from the - // unicharset. - Network* BuildFromString(const StaticShape& input_shape, char** str); - - private: - // Parses an input specification and returns the result, which may include a - // series. - Network* ParseInput(char** str); - // Parses a sequential series of networks, defined by [...]. - Network* ParseSeries(const StaticShape& input_shape, Input* input_layer, - char** str); - // Parses a parallel set of networks, defined by (...). - Network* ParseParallel(const StaticShape& input_shape, char** str); - // Parses a network that begins with 'R'. - Network* ParseR(const StaticShape& input_shape, char** str); - // Parses a network that begins with 'S'. - Network* ParseS(const StaticShape& input_shape, char** str); - // Parses a network that begins with 'C'. - Network* ParseC(const StaticShape& input_shape, char** str); - // Parses a network that begins with 'M'. - Network* ParseM(const StaticShape& input_shape, char** str); - // Parses an LSTM network, either individual, bi- or quad-directional. - Network* ParseLSTM(const StaticShape& input_shape, char** str); - // Builds a set of 4 lstms with t and y reversal, running in true parallel. - static Network* BuildLSTMXYQuad(int num_inputs, int num_states); - // Parses a Fully connected network. - Network* ParseFullyConnected(const StaticShape& input_shape, char** str); - // Parses an Output spec. - Network* ParseOutput(const StaticShape& input_shape, char** str); - - private: - int num_softmax_outputs_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_LSTM_NETWORKBUILDER_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/networkio.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/networkio.cpp deleted file mode 100644 index 88f3f409..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/networkio.cpp +++ /dev/null @@ -1,997 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: networkio.cpp -// Description: Network input/output data, allowing float/int implementations. -// Author: Ray Smith -// Created: Thu Jun 19 13:01:31 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "networkio.h" -#include // for FLT_MAX - -#include "allheaders.h" -#include "functions.h" -#include "statistc.h" -#include "tprintf.h" - -namespace tesseract { - -// Minimum value to output for certainty. -const float kMinCertainty = -20.0f; -// Probability corresponding to kMinCertainty. -const float kMinProb = exp(kMinCertainty); - -// Holds the optimal integer multiplier for this machine. -// This is a leaked, lazily initialized singleton, and is used for computing -// padding to apply to i_ for SIMD use. -IntSimdMatrix* NetworkIO::multiplier_ = nullptr; - -// Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim. -void NetworkIO::Resize2d(bool int_mode, int width, int num_features) { - stride_map_ = StrideMap(); - int_mode_ = int_mode; - if (int_mode_) { - i_.ResizeNoInit(width, num_features, GetPadding(num_features)); - } else { - f_.ResizeNoInit(width, num_features); - } -} - -// Resizes to a specific stride_map. -void NetworkIO::ResizeToMap(bool int_mode, const StrideMap& stride_map, - int num_features) { - // If this method crashes with this == nullptr, - // it most likely got here through an uninitialized scratch element, - // ie call NetworkScratch::IO::Resizexxx() not NetworkIO::Resizexxx()!! - stride_map_ = stride_map; - int_mode_ = int_mode; - if (int_mode_) { - i_.ResizeNoInit(stride_map.Width(), num_features, GetPadding(num_features)); - } else { - f_.ResizeNoInit(stride_map.Width(), num_features); - } - ZeroInvalidElements(); -} - -// Shrinks image size by x_scale,y_scale, and use given number of features. -void NetworkIO::ResizeScaled(const NetworkIO& src, - int x_scale, int y_scale, int num_features) { - StrideMap stride_map = src.stride_map_; - stride_map.ScaleXY(x_scale, y_scale); - ResizeToMap(src.int_mode_, stride_map, num_features); -} - -// Resizes to just 1 x-coord, whatever the input. -void NetworkIO::ResizeXTo1(const NetworkIO& src, int num_features) { - StrideMap stride_map = src.stride_map_; - stride_map.ReduceWidthTo1(); - ResizeToMap(src.int_mode_, stride_map, num_features); -} - -// Initialize all the array to zero. -void NetworkIO::Zero() { - int width = Width(); - // Zero out the everything. Column-by-column in case it is aligned. - for (int t = 0; t < width; ++t) { - ZeroTimeStep(t); - } -} - -// Initializes to zero all elements of the array that do not correspond to -// valid image positions. (If a batch of different-sized images are packed -// together, then there will be padding pixels.) -void NetworkIO::ZeroInvalidElements() { - int num_features = NumFeatures(); - int full_width = stride_map_.Size(FD_WIDTH); - int full_height = stride_map_.Size(FD_HEIGHT); - StrideMap::Index b_index(stride_map_); - do { - int end_x = b_index.MaxIndexOfDim(FD_WIDTH) + 1; - if (end_x < full_width) { - // The width is small, so fill for every valid y. - StrideMap::Index y_index(b_index); - int fill_size = num_features * (full_width - end_x); - do { - StrideMap::Index z_index(y_index); - z_index.AddOffset(end_x, FD_WIDTH); - if (int_mode_) { - ZeroVector(fill_size, i_[z_index.t()]); - } else { - ZeroVector(fill_size, f_[z_index.t()]); - } - } while (y_index.AddOffset(1, FD_HEIGHT)); - } - int end_y = b_index.MaxIndexOfDim(FD_HEIGHT) + 1; - if (end_y < full_height) { - // The height is small, so fill in the space in one go. - StrideMap::Index y_index(b_index); - y_index.AddOffset(end_y, FD_HEIGHT); - int fill_size = num_features * full_width * (full_height - end_y); - if (int_mode_) { - ZeroVector(fill_size, i_[y_index.t()]); - } else { - ZeroVector(fill_size, f_[y_index.t()]); - } - } - } while (b_index.AddOffset(1, FD_BATCH)); -} - -// Helper computes a black point and white point to contrast-enhance an image. -// The computation is based on the assumption that the image is of a single line -// of text, so a horizontal line through the middle of the image passes through -// at least some of it, so local minima and maxima are a good proxy for black -// and white pixel samples. -static void ComputeBlackWhite(Pix* pix, float* black, float* white) { - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - STATS mins(0, 256), maxes(0, 256); - if (width >= 3) { - int y = height / 2; - l_uint32* line = pixGetData(pix) + pixGetWpl(pix) * y; - int prev = GET_DATA_BYTE(line, 0); - int curr = GET_DATA_BYTE(line, 1); - for (int x = 1; x + 1 < width; ++x) { - int next = GET_DATA_BYTE(line, x + 1); - if ((curr < prev && curr <= next) || (curr <= prev && curr < next)) { - // Local minimum. - mins.add(curr, 1); - } - if ((curr > prev && curr >= next) || (curr >= prev && curr > next)) { - // Local maximum. - maxes.add(curr, 1); - } - prev = curr; - curr = next; - } - } - if (mins.get_total() == 0) mins.add(0, 1); - if (maxes.get_total() == 0) maxes.add(255, 1); - *black = mins.ile(0.25); - *white = maxes.ile(0.75); -} - -// Sets up the array from the given image, using the currently set int_mode_. -// If the image width doesn't match the shape, the image is truncated or padded -// with noise to match. -void NetworkIO::FromPix(const StaticShape& shape, const Pix* pix, - TRand* randomizer) { - std::vector pixes(1, pix); - FromPixes(shape, pixes, randomizer); -} - -// Sets up the array from the given set of images, using the currently set -// int_mode_. If the image width doesn't match the shape, the images are -// truncated or padded with noise to match. -void NetworkIO::FromPixes(const StaticShape& shape, - const std::vector& pixes, - TRand* randomizer) { - int target_height = shape.height(); - int target_width = shape.width(); - std::vector> h_w_pairs; - for (auto pix : pixes) { - Pix* var_pix = const_cast(pix); - int width = pixGetWidth(var_pix); - if (target_width != 0) width = target_width; - int height = pixGetHeight(var_pix); - if (target_height != 0) height = target_height; - h_w_pairs.emplace_back(height, width); - } - stride_map_.SetStride(h_w_pairs); - ResizeToMap(int_mode(), stride_map_, shape.depth()); - // Iterate over the images again to copy the data. - for (size_t b = 0; b < pixes.size(); ++b) { - Pix* pix = const_cast(pixes[b]); - float black = 0.0f, white = 255.0f; - if (shape.depth() != 3) ComputeBlackWhite(pix, &black, &white); - float contrast = (white - black) / 2.0f; - if (contrast <= 0.0f) contrast = 1.0f; - if (shape.height() == 1) { - Copy1DGreyImage(b, pix, black, contrast, randomizer); - } else { - Copy2DImage(b, pix, black, contrast, randomizer); - } - } -} - -// Copies the given pix to *this at the given batch index, stretching and -// clipping the pixel values so that [black, black + 2*contrast] maps to the -// dynamic range of *this, ie [-1,1] for a float and (-127,127) for int. -// This is a 2-d operation in the sense that the output depth is the number -// of input channels, the height is the height of the image, and the width -// is the width of the image, or truncated/padded with noise if the width -// is a fixed size. -void NetworkIO::Copy2DImage(int batch, Pix* pix, float black, float contrast, - TRand* randomizer) { - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - int wpl = pixGetWpl(pix); - StrideMap::Index index(stride_map_); - index.AddOffset(batch, FD_BATCH); - int t = index.t(); - int target_height = stride_map_.Size(FD_HEIGHT); - int target_width = stride_map_.Size(FD_WIDTH); - int num_features = NumFeatures(); - bool color = num_features == 3; - if (width > target_width) width = target_width; - uint32_t* line = pixGetData(pix); - for (int y = 0; y < target_height; ++y, line += wpl) { - int x = 0; - if (y < height) { - for (x = 0; x < width; ++x, ++t) { - if (color) { - int f = 0; - for (int c = COLOR_RED; c <= COLOR_BLUE; ++c) { - int pixel = GET_DATA_BYTE(line + x, c); - SetPixel(t, f++, pixel, black, contrast); - } - } else { - int pixel = GET_DATA_BYTE(line, x); - SetPixel(t, 0, pixel, black, contrast); - } - } - } - for (; x < target_width; ++x) Randomize(t++, 0, num_features, randomizer); - } -} - -// Copies the given pix to *this at the given batch index, as Copy2DImage -// above, except that the output depth is the height of the input image, the -// output height is 1, and the output width as for Copy2DImage. -// The image is thus treated as a 1-d set of vertical pixel strips. -void NetworkIO::Copy1DGreyImage(int batch, Pix* pix, float black, - float contrast, TRand* randomizer) { - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - ASSERT_HOST(height == NumFeatures()); - int wpl = pixGetWpl(pix); - StrideMap::Index index(stride_map_); - index.AddOffset(batch, FD_BATCH); - int t = index.t(); - int target_width = stride_map_.Size(FD_WIDTH); - if (width > target_width) width = target_width; - int x; - for (x = 0; x < width; ++x, ++t) { - for (int y = 0; y < height; ++y) { - uint32_t* line = pixGetData(pix) + wpl * y; - int pixel = GET_DATA_BYTE(line, x); - SetPixel(t, y, pixel, black, contrast); - } - } - for (; x < target_width; ++x) Randomize(t++, 0, height, randomizer); -} - -// Helper stores the pixel value in i_ or f_ according to int_mode_. -// t: is the index from the StrideMap corresponding to the current -// [batch,y,x] position -// f: is the index into the depth/channel -// pixel: the value of the pixel from the image (in one channel) -// black: the pixel value to map to the lowest of the range of *this -// contrast: the range of pixel values to stretch to half the range of *this. -void NetworkIO::SetPixel(int t, int f, int pixel, float black, float contrast) { - float float_pixel = (pixel - black) / contrast - 1.0f; - if (int_mode_) { - i_[t][f] = ClipToRange(IntCastRounded((INT8_MAX + 1) * float_pixel), - -INT8_MAX, INT8_MAX); - } else { - f_[t][f] = float_pixel; - } -} - -// Converts the array to a Pix. Must be pixDestroyed after use. -Pix* NetworkIO::ToPix() const { - // Count the width of the image, and find the max multiplication factor. - int im_width = stride_map_.Size(FD_WIDTH); - int im_height = stride_map_.Size(FD_HEIGHT); - int num_features = NumFeatures(); - int feature_factor = 1; - if (num_features == 3) { - // Special hack for color. - num_features = 1; - feature_factor = 3; - } - Pix* pix = pixCreate(im_width, im_height * num_features, 32); - StrideMap::Index index(stride_map_); - do { - int im_x = index.index(FD_WIDTH); - int top_im_y = index.index(FD_HEIGHT); - int im_y = top_im_y; - int t = index.t(); - if (int_mode_) { - const int8_t* features = i_[t]; - for (int y = 0; y < num_features; ++y, im_y += im_height) { - int pixel = features[y * feature_factor]; - // 1 or 2 features use greyscale. - int red = ClipToRange(pixel + 128, 0, 255); - int green = red, blue = red; - if (feature_factor == 3) { - // With 3 features assume RGB color. - green = ClipToRange(features[y * feature_factor + 1] + 128, 0, 255); - blue = ClipToRange(features[y * feature_factor + 2] + 128, 0, 255); - } else if (num_features > 3) { - // More than 3 features use false yellow/blue color, assuming a signed - // input in the range [-1,1]. - red = abs(pixel) * 2; - if (pixel >= 0) { - green = red; - blue = 0; - } else { - blue = red; - green = red = 0; - } - } - pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) | - (green << L_GREEN_SHIFT) | - (blue << L_BLUE_SHIFT)); - } - } else { - const float* features = f_[t]; - for (int y = 0; y < num_features; ++y, im_y += im_height) { - float pixel = features[y * feature_factor]; - // 1 or 2 features use greyscale. - int red = ClipToRange(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255); - int green = red, blue = red; - if (feature_factor == 3) { - // With 3 features assume RGB color. - pixel = features[y * feature_factor + 1]; - green = ClipToRange(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255); - pixel = features[y * feature_factor + 2]; - blue = ClipToRange(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255); - } else if (num_features > 3) { - // More than 3 features use false yellow/blue color, assuming a signed - // input in the range [-1,1]. - red = ClipToRange(IntCastRounded(fabs(pixel) * 255), 0, 255); - if (pixel >= 0) { - green = red; - blue = 0; - } else { - blue = red; - green = red = 0; - } - } - pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) | - (green << L_GREEN_SHIFT) | - (blue << L_BLUE_SHIFT)); - } - } - } while (index.Increment()); - return pix; -} - -// Prints the first and last num timesteps of the array for each feature. -void NetworkIO::Print(int num) const { - int num_features = NumFeatures(); - for (int y = 0; y < num_features; ++y) { - for (int t = 0; t < Width(); ++t) { - if (num == 0 || t < num || t + num >= Width()) { - if (int_mode_) { - tprintf(" %g", static_cast(i_[t][y]) / INT8_MAX); - } else { - tprintf(" %g", f_[t][y]); - } - } - } - tprintf("\n"); - } -} - -// Copies a single time step from src. -void NetworkIO::CopyTimeStepFrom(int dest_t, const NetworkIO& src, int src_t) { - ASSERT_HOST(int_mode_ == src.int_mode_); - if (int_mode_) { - memcpy(i_[dest_t], src.i_[src_t], i_.dim2() * sizeof(i_[0][0])); - } else { - memcpy(f_[dest_t], src.f_[src_t], f_.dim2() * sizeof(f_[0][0])); - } -} - -// Copies a part of single time step from src. -void NetworkIO::CopyTimeStepGeneral(int dest_t, int dest_offset, - int num_features, const NetworkIO& src, - int src_t, int src_offset) { - ASSERT_HOST(int_mode_ == src.int_mode_); - if (int_mode_) { - memcpy(i_[dest_t] + dest_offset, src.i_[src_t] + src_offset, - num_features * sizeof(i_[0][0])); - } else { - memcpy(f_[dest_t] + dest_offset, src.f_[src_t] + src_offset, - num_features * sizeof(f_[0][0])); - } -} - -// Zeroes a single time step. -void NetworkIO::ZeroTimeStepGeneral(int t, int offset, int num_features) { - if (int_mode_) { - ZeroVector(num_features, i_[t] + offset); - } else { - ZeroVector(num_features, f_[t] + offset); - } -} - -// Sets the given range to random values. -void NetworkIO::Randomize(int t, int offset, int num_features, - TRand* randomizer) { - if (int_mode_) { - int8_t* line = i_[t] + offset; - for (int i = 0; i < num_features; ++i) - line[i] = IntCastRounded(randomizer->SignedRand(INT8_MAX)); - } else { - // float mode. - float* line = f_[t] + offset; - for (int i = 0; i < num_features; ++i) - line[i] = randomizer->SignedRand(1.0); - } -} - -// Helper returns the label and score of the best choice over a range. -int NetworkIO::BestChoiceOverRange(int t_start, int t_end, int not_this, - int null_ch, float* rating, - float* certainty) const { - if (t_end <= t_start) return -1; - int max_char = -1; - float min_score = 0.0f; - for (int c = 0; c < NumFeatures(); ++c) { - if (c == not_this || c == null_ch) continue; - ScoresOverRange(t_start, t_end, c, null_ch, rating, certainty); - if (max_char < 0 || *rating < min_score) { - min_score = *rating; - max_char = c; - } - } - ScoresOverRange(t_start, t_end, max_char, null_ch, rating, certainty); - return max_char; -} - -// Helper returns the rating and certainty of the choice over a range in output. -void NetworkIO::ScoresOverRange(int t_start, int t_end, int choice, int null_ch, - float* rating, float* certainty) const { - ASSERT_HOST(!int_mode_); - *rating = 0.0f; - *certainty = 0.0f; - if (t_end <= t_start || t_end <= 0) return; - float ratings[3] = {0.0f, 0.0f, 0.0f}; - float certs[3] = {0.0f, 0.0f, 0.0f}; - for (int t = t_start; t < t_end; ++t) { - const float* line = f_[t]; - float score = ProbToCertainty(line[choice]); - float zero = ProbToCertainty(line[null_ch]); - if (t == t_start) { - ratings[2] = FLT_MAX; - ratings[1] = -score; - certs[1] = score; - } else { - for (int i = 2; i >= 1; --i) { - if (ratings[i] > ratings[i - 1]) { - ratings[i] = ratings[i - 1]; - certs[i] = certs[i - 1]; - } - } - ratings[2] -= zero; - if (zero < certs[2]) certs[2] = zero; - ratings[1] -= score; - if (score < certs[1]) certs[1] = score; - } - ratings[0] -= zero; - if (zero < certs[0]) certs[0] = zero; - } - int best_i = ratings[2] < ratings[1] ? 2 : 1; - *rating = ratings[best_i] + t_end - t_start; - *certainty = certs[best_i]; -} - -// Returns the index (label) of the best value at the given timestep, -// excluding not_this and not_that, and if not null, sets the score to the -// log of the corresponding value. -int NetworkIO::BestLabel(int t, int not_this, int not_that, - float* score) const { - ASSERT_HOST(!int_mode_); - int best_index = -1; - float best_score = -FLT_MAX; - const float* line = f_[t]; - for (int i = 0; i < f_.dim2(); ++i) { - if (line[i] > best_score && i != not_this && i != not_that) { - best_score = line[i]; - best_index = i; - } - } - if (score != nullptr) *score = ProbToCertainty(best_score); - return best_index; -} - -// Returns the best start position out of [start, end) (into which all labels -// must fit) to obtain the highest cumulative score for the given labels. -int NetworkIO::PositionOfBestMatch(const GenericVector& labels, int start, - int end) const { - int length = labels.size(); - int last_start = end - length; - int best_start = -1; - double best_score = 0.0; - for (int s = start; s <= last_start; ++s) { - double score = ScoreOfLabels(labels, s); - if (score > best_score || best_start < 0) { - best_score = score; - best_start = s; - } - } - return best_start; -} - -// Returns the cumulative score of the given labels starting at start, and -// using one label per time-step. -double NetworkIO::ScoreOfLabels(const GenericVector& labels, - int start) const { - int length = labels.size(); - double score = 0.0; - for (int i = 0; i < length; ++i) { - score += f_(start + i, labels[i]); - } - return score; -} - -// Helper function sets all the outputs for a single timestep, such that -// label has value ok_score, and the other labels share 1 - ok_score. -void NetworkIO::SetActivations(int t, int label, float ok_score) { - ASSERT_HOST(!int_mode_); - int num_classes = NumFeatures(); - float bad_score = (1.0f - ok_score) / (num_classes - 1); - float* targets = f_[t]; - for (int i = 0; i < num_classes; ++i) - targets[i] = bad_score; - targets[label] = ok_score; -} - -// Modifies the values, only if needed, so that the given label is -// the winner at the given time step t. -void NetworkIO::EnsureBestLabel(int t, int label) { - ASSERT_HOST(!int_mode_); - if (BestLabel(t, nullptr) != label) { - // Output value needs enhancing. Third all the other elements and add the - // remainder to best_label. - int num_classes = NumFeatures(); - float* targets = f_[t]; - for (int c = 0; c < num_classes; ++c) { - if (c == label) { - targets[c] += (1.0 - targets[c]) * (2 / 3.0); - } else { - targets[c] /= 3.0; - } - } - } -} - -// Helper function converts prob to certainty taking the minimum into account. -/* static */ -float NetworkIO::ProbToCertainty(float prob) { - return prob > kMinProb ? log(prob) : kMinCertainty; -} - -// Returns true if there is any bad value that is suspiciously like a GT -// error. Assuming that *this is the difference(gradient) between target -// and forward output, returns true if there is a large negative value -// (correcting a very confident output) for which there is no corresponding -// positive value in an adjacent timestep for the same feature index. This -// allows the box-truthed samples to make fine adjustments to position while -// stopping other disagreements of confident output with ground truth. -bool NetworkIO::AnySuspiciousTruth(float confidence_thr) const { - int num_features = NumFeatures(); - for (int t = 0; t < Width(); ++t) { - const float* features = f_[t]; - for (int y = 0; y < num_features; ++y) { - float grad = features[y]; - if (grad < -confidence_thr) { - // Correcting strong output. Check for movement. - if ((t == 0 || f_[t - 1][y] < confidence_thr / 2) && - (t + 1 == Width() || f_[t + 1][y] < confidence_thr / 2)) { - return true; // No strong positive on either side. - } - } - } - } - return false; -} - -// Reads a single timestep to floats in the range [-1, 1]. -void NetworkIO::ReadTimeStep(int t, double* output) const { - if (int_mode_) { - const int8_t* line = i_[t]; - for (int i = 0; i < i_.dim2(); ++i) { - output[i] = static_cast(line[i]) / INT8_MAX; - } - } else { - const float* line = f_[t]; - for (int i = 0; i < f_.dim2(); ++i) { - output[i] = static_cast(line[i]); - } - } -} - -// Adds a single timestep to floats. -void NetworkIO::AddTimeStep(int t, double* inout) const { - int num_features = NumFeatures(); - if (int_mode_) { - const int8_t* line = i_[t]; - for (int i = 0; i < num_features; ++i) { - inout[i] += static_cast(line[i]) / INT8_MAX; - } - } else { - const float* line = f_[t]; - for (int i = 0; i < num_features; ++i) { - inout[i] += line[i]; - } - } -} - -// Adds part of a single timestep to floats. -void NetworkIO::AddTimeStepPart(int t, int offset, int num_features, - float* inout) const { - if (int_mode_) { - const int8_t* line = i_[t] + offset; - for (int i = 0; i < num_features; ++i) { - inout[i] += static_cast(line[i]) / INT8_MAX; - } - } else { - const float* line = f_[t] + offset; - for (int i = 0; i < num_features; ++i) { - inout[i] += line[i]; - } - } -} - -// Writes a single timestep from floats in the range [-1, 1]. -void NetworkIO::WriteTimeStep(int t, const double* input) { - WriteTimeStepPart(t, 0, NumFeatures(), input); -} - -// Writes a single timestep from floats in the range [-1, 1] writing only -// num_features elements of input to (*this)[t], starting at offset. -void NetworkIO::WriteTimeStepPart(int t, int offset, int num_features, - const double* input) { - if (int_mode_) { - int8_t* line = i_[t] + offset; - for (int i = 0; i < num_features; ++i) { - line[i] = ClipToRange(IntCastRounded(input[i] * INT8_MAX), - -INT8_MAX, INT8_MAX); - } - } else { - float* line = f_[t] + offset; - for (int i = 0; i < num_features; ++i) { - line[i] = static_cast(input[i]); - } - } -} - -// Maxpools a single time step from src. -void NetworkIO::MaxpoolTimeStep(int dest_t, const NetworkIO& src, int src_t, - int* max_line) { - ASSERT_HOST(int_mode_ == src.int_mode_); - if (int_mode_) { - int dim = i_.dim2(); - int8_t* dest_line = i_[dest_t]; - const int8_t* src_line = src.i_[src_t]; - for (int i = 0; i < dim; ++i) { - if (dest_line[i] < src_line[i]) { - dest_line[i] = src_line[i]; - max_line[i] = src_t; - } - } - } else { - int dim = f_.dim2(); - float* dest_line = f_[dest_t]; - const float* src_line = src.f_[src_t]; - for (int i = 0; i < dim; ++i) { - if (dest_line[i] < src_line[i]) { - dest_line[i] = src_line[i]; - max_line[i] = src_t; - } - } - } -} - -// Runs maxpool backward, using maxes to index timesteps in *this. -void NetworkIO::MaxpoolBackward(const NetworkIO& fwd, - const GENERIC_2D_ARRAY& maxes) { - ASSERT_HOST(!int_mode_); - Zero(); - StrideMap::Index index(fwd.stride_map_); - do { - int t = index.t(); - const int* max_line = maxes[t]; - const float* fwd_line = fwd.f_[t]; - int num_features = fwd.f_.dim2(); - for (int i = 0; i < num_features; ++i) { - f_[max_line[i]][i] = fwd_line[i]; - } - } while (index.Increment()); -} - -// Returns the min over time of the maxes over features of the outputs. -float NetworkIO::MinOfMaxes() const { - float min_max = 0.0f; - int width = Width(); - int num_features = NumFeatures(); - for (int t = 0; t < width; ++t) { - float max_value = -FLT_MAX; - if (int_mode_) { - const int8_t* column = i_[t]; - for (int i = 0; i < num_features; ++i) { - if (column[i] > max_value) max_value = column[i]; - } - } else { - const float* column = f_[t]; - for (int i = 0; i < num_features; ++i) { - if (column[i] > max_value) max_value = column[i]; - } - } - if (t == 0 || max_value < min_max) min_max = max_value; - } - return min_max; -} - -// Computes combined results for a combiner that chooses between an existing -// input and itself, with an additional output to indicate the choice. -void NetworkIO::CombineOutputs(const NetworkIO& base_output, - const NetworkIO& combiner_output) { - int no = base_output.NumFeatures(); - ASSERT_HOST(combiner_output.NumFeatures() == no + 1); - Resize(base_output, no); - int width = Width(); - if (int_mode_) { - // Number of outputs from base and final result. - for (int t = 0; t < width; ++t) { - int8_t* out_line = i_[t]; - const int8_t* base_line = base_output.i_[t]; - const int8_t* comb_line = combiner_output.i_[t]; - float base_weight = static_cast(comb_line[no]) / INT8_MAX; - float boost_weight = 1.0f - base_weight; - for (int i = 0; i < no; ++i) { - out_line[i] = IntCastRounded(base_line[i] * base_weight + - comb_line[i] * boost_weight); - } - } - } else { - for (int t = 0; t < width; ++t) { - float* out_line = f_[t]; - const float* base_line = base_output.f_[t]; - const float* comb_line = combiner_output.f_[t]; - float base_weight = comb_line[no]; - float boost_weight = 1.0f - base_weight; - for (int i = 0; i < no; ++i) { - out_line[i] = base_line[i] * base_weight + comb_line[i] * boost_weight; - } - } - } -} - -// Computes deltas for a combiner that chooses between 2 sets of inputs. -void NetworkIO::ComputeCombinerDeltas(const NetworkIO& fwd_deltas, - const NetworkIO& base_output) { - ASSERT_HOST(!int_mode_); - // Compute the deltas for the combiner. - int width = Width(); - int no = NumFeatures() - 1; - ASSERT_HOST(fwd_deltas.NumFeatures() == no); - ASSERT_HOST(base_output.NumFeatures() == no); - // Number of outputs from base and final result. - for (int t = 0; t < width; ++t) { - const float* delta_line = fwd_deltas.f_[t]; - const float* base_line = base_output.f_[t]; - float* comb_line = f_[t]; - float base_weight = comb_line[no]; - float boost_weight = 1.0f - base_weight; - float max_base_delta = 0.0; - for (int i = 0; i < no; ++i) { - // What did the combiner actually produce? - float output = base_line[i] * base_weight + comb_line[i] * boost_weight; - // Reconstruct the target from the delta. - float comb_target = delta_line[i] + output; - comb_line[i] = comb_target - comb_line[i]; - float base_delta = fabs(comb_target - base_line[i]); - if (base_delta > max_base_delta) max_base_delta = base_delta; - } - if (max_base_delta >= 0.5) { - // The base network got it wrong. The combiner should output the right - // answer and 0 for the base network. - comb_line[no] = 0.0 - base_weight; - } else { - // The base network was right. The combiner should flag that. - for (int i = 0; i < no; ++i) { - // All other targets are 0. - if (comb_line[i] > 0.0) comb_line[i] -= 1.0; - } - comb_line[no] = 1.0 - base_weight; - } - } -} - -// Copies the array checking that the types match. -void NetworkIO::CopyAll(const NetworkIO& src) { - ASSERT_HOST(src.int_mode_ == int_mode_); - f_ = src.f_; -} - -// Checks that both are floats and adds the src array to *this. -void NetworkIO::AddAllToFloat(const NetworkIO& src) { - ASSERT_HOST(!int_mode_); - ASSERT_HOST(!src.int_mode_); - f_ += src.f_; -} - -// Subtracts the array from a float array. src must also be float. -void NetworkIO::SubtractAllFromFloat(const NetworkIO& src) { - ASSERT_HOST(!int_mode_); - ASSERT_HOST(!src.int_mode_); - f_ -= src.f_; -} - -// Copies src to *this, with maxabs normalization to match scale. -void NetworkIO::CopyWithNormalization(const NetworkIO& src, - const NetworkIO& scale) { - ASSERT_HOST(!int_mode_); - ASSERT_HOST(!src.int_mode_); - ASSERT_HOST(!scale.int_mode_); - float src_max = src.f_.MaxAbs(); - ASSERT_HOST(std::isfinite(src_max)); - float scale_max = scale.f_.MaxAbs(); - ASSERT_HOST(std::isfinite(scale_max)); - if (src_max > 0.0f) { - float factor = scale_max / src_max; - for (int t = 0; t < src.Width(); ++t) { - const float* src_ptr = src.f_[t]; - float* dest_ptr = f_[t]; - for (int i = 0; i < src.f_.dim2(); ++i) dest_ptr[i] = src_ptr[i] * factor; - } - } else { - f_.Clear(); - } -} - -// Copies src to *this with independent reversal of the y dimension. -void NetworkIO::CopyWithYReversal(const NetworkIO& src) { - int num_features = src.NumFeatures(); - Resize(src, num_features); - StrideMap::Index b_index(src.stride_map_); - do { - int width = b_index.MaxIndexOfDim(FD_WIDTH) + 1; - StrideMap::Index fwd_index(b_index); - StrideMap::Index rev_index(b_index); - rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_HEIGHT), FD_HEIGHT); - do { - int fwd_t = fwd_index.t(); - int rev_t = rev_index.t(); - for (int x = 0; x < width; ++x) CopyTimeStepFrom(rev_t++, src, fwd_t++); - } while (fwd_index.AddOffset(1, FD_HEIGHT) && - rev_index.AddOffset(-1, FD_HEIGHT)); - } while (b_index.AddOffset(1, FD_BATCH)); -} - -// Copies src to *this with independent reversal of the x dimension. -void NetworkIO::CopyWithXReversal(const NetworkIO& src) { - int num_features = src.NumFeatures(); - Resize(src, num_features); - StrideMap::Index b_index(src.stride_map_); - do { - StrideMap::Index y_index(b_index); - do { - StrideMap::Index fwd_index(y_index); - StrideMap::Index rev_index(y_index); - rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_WIDTH), FD_WIDTH); - do { - CopyTimeStepFrom(rev_index.t(), src, fwd_index.t()); - } while (fwd_index.AddOffset(1, FD_WIDTH) && - rev_index.AddOffset(-1, FD_WIDTH)); - } while (y_index.AddOffset(1, FD_HEIGHT)); - } while (b_index.AddOffset(1, FD_BATCH)); -} - -// Copies src to *this with independent transpose of the x and y dimensions. -void NetworkIO::CopyWithXYTranspose(const NetworkIO& src) { - int num_features = src.NumFeatures(); - stride_map_ = src.stride_map_; - stride_map_.TransposeXY(); - ResizeToMap(src.int_mode(), stride_map_, num_features); - StrideMap::Index src_b_index(src.stride_map_); - StrideMap::Index dest_b_index(stride_map_); - do { - StrideMap::Index src_y_index(src_b_index); - StrideMap::Index dest_x_index(dest_b_index); - do { - StrideMap::Index src_x_index(src_y_index); - StrideMap::Index dest_y_index(dest_x_index); - do { - CopyTimeStepFrom(dest_y_index.t(), src, src_x_index.t()); - } while (src_x_index.AddOffset(1, FD_WIDTH) && - dest_y_index.AddOffset(1, FD_HEIGHT)); - } while (src_y_index.AddOffset(1, FD_HEIGHT) && - dest_x_index.AddOffset(1, FD_WIDTH)); - } while (src_b_index.AddOffset(1, FD_BATCH) && - dest_b_index.AddOffset(1, FD_BATCH)); -} - -// Copies src to *this, at the given feature_offset, returning the total -// feature offset after the copy. Multiple calls will stack outputs from -// multiple sources in feature space. -int NetworkIO::CopyPacking(const NetworkIO& src, int feature_offset) { - ASSERT_HOST(int_mode_ == src.int_mode_); - int width = src.Width(); - ASSERT_HOST(width <= Width()); - int num_features = src.NumFeatures(); - ASSERT_HOST(num_features + feature_offset <= NumFeatures()); - if (int_mode_) { - for (int t = 0; t < width; ++t) { - memcpy(i_[t] + feature_offset, src.i_[t], - num_features * sizeof(i_[t][0])); - } - for (int t = width; t < i_.dim1(); ++t) { - memset(i_[t], 0, num_features * sizeof(i_[t][0])); - } - } else { - for (int t = 0; t < width; ++t) { - memcpy(f_[t] + feature_offset, src.f_[t], - num_features * sizeof(f_[t][0])); - } - for (int t = width; t < f_.dim1(); ++t) { - memset(f_[t], 0, num_features * sizeof(f_[t][0])); - } - } - return num_features + feature_offset; -} - -// Opposite of CopyPacking, fills *this with a part of src, starting at -// feature_offset, and picking num_features. -void NetworkIO::CopyUnpacking(const NetworkIO& src, int feature_offset, - int num_features) { - Resize(src, num_features); - int width = src.Width(); - ASSERT_HOST(num_features + feature_offset <= src.NumFeatures()); - if (int_mode_) { - for (int t = 0; t < width; ++t) { - memcpy(i_[t], src.i_[t] + feature_offset, - num_features * sizeof(i_[t][0])); - } - } else { - for (int t = 0; t < width; ++t) { - memcpy(f_[t], src.f_[t] + feature_offset, - num_features * sizeof(f_[t][0])); - } - } -} - -// Transposes the float part of *this into dest. -void NetworkIO::Transpose(TransposedArray* dest) const { - int width = Width(); - dest->ResizeNoInit(NumFeatures(), width); - for (int t = 0; t < width; ++t) dest->WriteStrided(t, f_[t]); -} - -// Clips the content of a single time-step to +/-range. -void NetworkIO::ClipVector(int t, float range) { - ASSERT_HOST(!int_mode_); - float* v = f_[t]; - int dim = f_.dim2(); - for (int i = 0; i < dim; ++i) - v[i] = ClipToRange(v[i], -range, range); -} - -// Returns the padding required for the given number of features in order -// for the SIMD operations to be safe. -/* static */ -int NetworkIO::GetPadding(int num_features) { - if (multiplier_ == nullptr) - multiplier_ = IntSimdMatrix::GetFastestMultiplier(); - int pad = 0; - if (multiplier_ != nullptr) { - pad = multiplier_->RoundInputs(num_features) - num_features; - } - return pad; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/networkio.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/networkio.h deleted file mode 100644 index b850afdc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/networkio.h +++ /dev/null @@ -1,349 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: networkio.h -// Description: Network input/output data, allowing float/int implementations. -// Author: Ray Smith -// Created: Tue Jun 17 08:43:11 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_NETWORKIO_H_ -#define TESSERACT_LSTM_NETWORKIO_H_ - -#include -#include -#include - -#include "genericvector.h" -#include "helpers.h" -#include "static_shape.h" -#include "stridemap.h" -#include "weightmatrix.h" - -struct Pix; - -namespace tesseract { - -// Class to contain all the input/output of a network, allowing for fixed or -// variable-strided 2d to 1d mapping, and float or int8_t values. Provides -// enough calculating functions to hide the detail of the implementation. -class NetworkIO { - public: - NetworkIO() : int_mode_(false) {} - // Resizes the array (and stride), avoiding realloc if possible, to the given - // size from various size specs: - // Same stride size, but given number of features. - void Resize(const NetworkIO& src, int num_features) { - ResizeToMap(src.int_mode(), src.stride_map(), num_features); - } - // Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim. - void Resize2d(bool int_mode, int width, int num_features); - // Resizes forcing a float representation with the stridemap of src and the - // given number of features. - void ResizeFloat(const NetworkIO& src, int num_features) { - ResizeToMap(false, src.stride_map(), num_features); - } - // Resizes to a specific stride_map. - void ResizeToMap(bool int_mode, const StrideMap& stride_map, - int num_features); - // Shrinks image size by x_scale,y_scale, and use given number of features. - void ResizeScaled(const NetworkIO& src, int x_scale, int y_scale, - int num_features); - // Resizes to just 1 x-coord, whatever the input. - void ResizeXTo1(const NetworkIO& src, int num_features); - // Initialize all the array to zero. - void Zero(); - // Initializes to zero all elements of the array that do not correspond to - // valid image positions. (If a batch of different-sized images are packed - // together, then there will be padding pixels.) - void ZeroInvalidElements(); - // Sets up the array from the given image, using the currently set int_mode_. - // If the image width doesn't match the shape, the image is truncated or - // padded with noise to match. - void FromPix(const StaticShape& shape, const Pix* pix, TRand* randomizer); - // Sets up the array from the given set of images, using the currently set - // int_mode_. If the image width doesn't match the shape, the images are - // truncated or padded with noise to match. - void FromPixes(const StaticShape& shape, const std::vector& pixes, - TRand* randomizer); - // Copies the given pix to *this at the given batch index, stretching and - // clipping the pixel values so that [black, black + 2*contrast] maps to the - // dynamic range of *this, ie [-1,1] for a float and (-127,127) for int. - // This is a 2-d operation in the sense that the output depth is the number - // of input channels, the height is the height of the image, and the width - // is the width of the image, or truncated/padded with noise if the width - // is a fixed size. - void Copy2DImage(int batch, Pix* pix, float black, float contrast, - TRand* randomizer); - // Copies the given pix to *this at the given batch index, as Copy2DImage - // above, except that the output depth is the height of the input image, the - // output height is 1, and the output width as for Copy2DImage. - // The image is thus treated as a 1-d set of vertical pixel strips. - void Copy1DGreyImage(int batch, Pix* pix, float black, float contrast, - TRand* randomizer); - // Helper stores the pixel value in i_ or f_ according to int_mode_. - // t: is the index from the StrideMap corresponding to the current - // [batch,y,x] position - // f: is the index into the depth/channel - // pixel: the value of the pixel from the image (in one channel) - // black: the pixel value to map to the lowest of the range of *this - // contrast: the range of pixel values to stretch to half the range of *this. - void SetPixel(int t, int f, int pixel, float black, float contrast); - // Converts the array to a Pix. Must be pixDestroyed after use. - Pix* ToPix() const; - // Prints the first and last num timesteps of the array for each feature. - void Print(int num) const; - - // Returns the timestep width. - int Width() const { - return int_mode_ ? i_.dim1() : f_.dim1(); - } - // Returns the number of features. - int NumFeatures() const { - return int_mode_ ? i_.dim2() : f_.dim2(); - } - // Accessor to a timestep of the float matrix. - float* f(int t) { - ASSERT_HOST(!int_mode_); - return f_[t]; - } - const float* f(int t) const { - ASSERT_HOST(!int_mode_); - return f_[t]; - } - const int8_t* i(int t) const { - ASSERT_HOST(int_mode_); - return i_[t]; - } - bool int_mode() const { - return int_mode_; - } - void set_int_mode(bool is_quantized) { - int_mode_ = is_quantized; - } - const StrideMap& stride_map() const { - return stride_map_; - } - void set_stride_map(const StrideMap& map) { - stride_map_ = map; - } - const GENERIC_2D_ARRAY& float_array() const { return f_; } - GENERIC_2D_ARRAY* mutable_float_array() { return &f_; } - - // Copies a single time step from src. - void CopyTimeStepFrom(int dest_t, const NetworkIO& src, int src_t); - // Copies a part of single time step from src. - void CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features, - const NetworkIO& src, int src_t, int src_offset); - // Zeroes a single time step. - void ZeroTimeStep(int t) { ZeroTimeStepGeneral(t, 0, NumFeatures()); } - void ZeroTimeStepGeneral(int t, int offset, int num_features); - // Sets the given range to random values. - void Randomize(int t, int offset, int num_features, TRand* randomizer); - - // Helper returns the label and score of the best choice over a range. - int BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, - float* rating, float* certainty) const; - // Helper returns the rating and certainty of the choice over a range in t. - void ScoresOverRange(int t_start, int t_end, int choice, int null_ch, - float* rating, float* certainty) const; - // Returns the index (label) of the best value at the given timestep, - // and if not null, sets the score to the log of the corresponding value. - int BestLabel(int t, float* score) const { - return BestLabel(t, -1, -1, score); - } - // Returns the index (label) of the best value at the given timestep, - // excluding not_this and not_that, and if not null, sets the score to the - // log of the corresponding value. - int BestLabel(int t, int not_this, int not_that, float* score) const; - // Returns the best start position out of range (into which both start and end - // must fit) to obtain the highest cumulative score for the given labels. - int PositionOfBestMatch(const GenericVector& labels, int start, - int end) const; - // Returns the cumulative score of the given labels starting at start, and - // using one label per time-step. - double ScoreOfLabels(const GenericVector& labels, int start) const; - // Helper function sets all the outputs for a single timestep, such that - // label has value ok_score, and the other labels share 1 - ok_score. - // Assumes float mode. - void SetActivations(int t, int label, float ok_score); - // Modifies the values, only if needed, so that the given label is - // the winner at the given time step t. - // Assumes float mode. - void EnsureBestLabel(int t, int label); - // Helper function converts prob to certainty taking the minimum into account. - static float ProbToCertainty(float prob); - // Returns true if there is any bad value that is suspiciously like a GT - // error. Assuming that *this is the difference(gradient) between target - // and forward output, returns true if there is a large negative value - // (correcting a very confident output) for which there is no corresponding - // positive value in an adjacent timestep for the same feature index. This - // allows the box-truthed samples to make fine adjustments to position while - // stopping other disagreements of confident output with ground truth. - bool AnySuspiciousTruth(float confidence_thr) const; - - // Reads a single timestep to floats in the range [-1, 1]. - void ReadTimeStep(int t, double* output) const; - // Adds a single timestep to floats. - void AddTimeStep(int t, double* inout) const; - // Adds part of a single timestep to floats. - void AddTimeStepPart(int t, int offset, int num_features, float* inout) const; - // Writes a single timestep from floats in the range [-1, 1]. - void WriteTimeStep(int t, const double* input); - // Writes a single timestep from floats in the range [-1, 1] writing only - // num_features elements of input to (*this)[t], starting at offset. - void WriteTimeStepPart(int t, int offset, int num_features, - const double* input); - // Maxpools a single time step from src. - void MaxpoolTimeStep(int dest_t, const NetworkIO& src, int src_t, - int* max_line); - // Runs maxpool backward, using maxes to index timesteps in *this. - void MaxpoolBackward(const NetworkIO& fwd, - const GENERIC_2D_ARRAY& maxes); - // Returns the min over time of the maxes over features of the outputs. - float MinOfMaxes() const; - // Returns the min over time. - float Max() const { return int_mode_ ? i_.Max() : f_.Max(); } - // Computes combined results for a combiner that chooses between an existing - // input and itself, with an additional output to indicate the choice. - void CombineOutputs(const NetworkIO& base_output, - const NetworkIO& combiner_output); - // Computes deltas for a combiner that chooses between 2 sets of inputs. - void ComputeCombinerDeltas(const NetworkIO& fwd_deltas, - const NetworkIO& base_output); - - // Copies the array checking that the types match. - void CopyAll(const NetworkIO& src); - // Adds the array to a float array, with scaling to [-1, 1] if the src is int. - void AddAllToFloat(const NetworkIO& src); - // Subtracts the array from a float array. src must also be float. - void SubtractAllFromFloat(const NetworkIO& src); - - // Copies src to *this, with maxabs normalization to match scale. - void CopyWithNormalization(const NetworkIO& src, const NetworkIO& scale); - // Multiplies the float data by the given factor. - void ScaleFloatBy(float factor) { f_ *= factor; } - // Copies src to *this with independent reversal of the y dimension. - void CopyWithYReversal(const NetworkIO& src); - // Copies src to *this with independent reversal of the x dimension. - void CopyWithXReversal(const NetworkIO& src); - // Copies src to *this with independent transpose of the x and y dimensions. - void CopyWithXYTranspose(const NetworkIO& src); - // Copies src to *this, at the given feature_offset, returning the total - // feature offset after the copy. Multiple calls will stack outputs from - // multiple sources in feature space. - int CopyPacking(const NetworkIO& src, int feature_offset); - // Opposite of CopyPacking, fills *this with a part of src, starting at - // feature_offset, and picking num_features. Resizes *this to match. - void CopyUnpacking(const NetworkIO& src, int feature_offset, - int num_features); - // Transposes the float part of *this into dest. - void Transpose(TransposedArray* dest) const; - - // Clips the content of a single time-step to +/-range. - void ClipVector(int t, float range); - - // Applies Func to timestep t of *this (u) and multiplies the result by v - // component-wise, putting the product in *product. - // *this and v may be int or float, but must match. The outputs are double. - template - void FuncMultiply(const NetworkIO& v_io, int t, double* product) { - Func f; - ASSERT_HOST(!int_mode_); - ASSERT_HOST(!v_io.int_mode_); - int dim = f_.dim2(); - if (int_mode_) { - const int8_t* u = i_[t]; - const int8_t* v = v_io.i_[t]; - for (int i = 0; i < dim; ++i) { - product[i] = f(u[i] / static_cast(INT8_MAX)) * v[i] / - static_cast(INT8_MAX); - } - } else { - const float* u = f_[t]; - const float* v = v_io.f_[t]; - for (int i = 0; i < dim; ++i) { - product[i] = f(u[i]) * v[i]; - } - } - } - // Applies Func to *this (u) at u_t, and multiplies the result by v[v_t] * w, - // component-wise, putting the product in *product. - // All NetworkIOs are assumed to be float. - template - void FuncMultiply3(int u_t, const NetworkIO& v_io, int v_t, const double* w, - double* product) const { - ASSERT_HOST(!int_mode_); - ASSERT_HOST(!v_io.int_mode_); - Func f; - const float* u = f_[u_t]; - const float* v = v_io.f_[v_t]; - int dim = f_.dim2(); - for (int i = 0; i < dim; ++i) { - product[i] = f(u[i]) * v[i] * w[i]; - } - } - // Applies Func to *this (u) at u_t, and multiplies the result by v[v_t] * w, - // component-wise, adding the product to *product. - // All NetworkIOs are assumed to be float. - template - void FuncMultiply3Add(const NetworkIO& v_io, int t, const double* w, - double* product) const { - ASSERT_HOST(!int_mode_); - ASSERT_HOST(!v_io.int_mode_); - Func f; - const float* u = f_[t]; - const float* v = v_io.f_[t]; - int dim = f_.dim2(); - for (int i = 0; i < dim; ++i) { - product[i] += f(u[i]) * v[i] * w[i]; - } - } - // Applies Func1 to *this (u), Func2 to v, and multiplies the result by w, - // component-wise, putting the product in product, all at timestep t, except - // w, which is a simple array. All NetworkIOs are assumed to be float. - template - void Func2Multiply3(const NetworkIO& v_io, int t, const double* w, - double* product) const { - ASSERT_HOST(!int_mode_); - ASSERT_HOST(!v_io.int_mode_); - Func1 f; - Func2 g; - const float* u = f_[t]; - const float* v = v_io.f_[t]; - int dim = f_.dim2(); - for (int i = 0; i < dim; ++i) { - product[i] = f(u[i]) * g(v[i]) * w[i]; - } - } - - private: - // Returns the padding required for the given number of features in order - // for the SIMD operations to be safe. - static int GetPadding(int num_features); - - // Choice of float vs 8 bit int for data. - GENERIC_2D_ARRAY f_; - GENERIC_2D_ARRAY i_; - // Which of f_ and i_ are we actually using. - bool int_mode_; - // Stride for 2d input data. - StrideMap stride_map_; - // Holds the optimal integer multiplier for this machine. - // This is a leaked, lazily initialized singleton, and is used for computing - // padding to apply to i_ for SIMD use. - static IntSimdMatrix* multiplier_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_LSTM_NETWORKIO_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/networkscratch.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/networkscratch.h deleted file mode 100644 index c92a9fe2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/networkscratch.h +++ /dev/null @@ -1,257 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: networkscratch.h -// Description: Scratch space for Network layers that hides distinction -// between float/int implementations. -// Author: Ray Smith -// Created: Thu Jun 19 10:50:29 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_NETWORKSCRATCH_H_ -#define TESSERACT_LSTM_NETWORKSCRATCH_H_ - -#include "genericvector.h" -#include "matrix.h" -#include "networkio.h" -#include "svutil.h" -#include "tprintf.h" - -namespace tesseract { - -// Generic scratch space for network layers. Provides NetworkIO that can store -// a complete set (over time) of intermediates, and GenericVector -// scratch space that auto-frees after use. The aim here is to provide a set -// of temporary buffers to network layers that can be reused between layers -// and don't have to be reallocated on each call. -class NetworkScratch { - public: - NetworkScratch() : int_mode_(false) {} - ~NetworkScratch() = default; - - // Sets the network representation. If the representation is integer, then - // default (integer) NetworkIOs are separated from the always-float variety. - // This saves memory by having separate int-specific and float-specific - // stacks. If the network representation is float, then all NetworkIOs go - // to the float stack. - void set_int_mode(bool int_mode) { - int_mode_ = int_mode; - } - - // Class that acts like a NetworkIO (by having an implicit cast operator), - // yet actually holds a pointer to NetworkIOs in the source NetworkScratch, - // and knows how to unstack the borrowed pointers on destruction. - class IO { - public: - // The NetworkIO should be sized after construction. - IO(const NetworkIO& src, NetworkScratch* scratch) - : int_mode_(scratch->int_mode_ && src.int_mode()), - scratch_space_(scratch) { - network_io_ = int_mode_ ? scratch_space_->int_stack_.Borrow() - : scratch_space_->float_stack_.Borrow(); - } - // Default constructor for arrays. Use one of the Resize functions - // below to initialize and size. - IO() : int_mode_(false), network_io_(nullptr), scratch_space_(nullptr) {} - - ~IO() { - if (scratch_space_ == nullptr) { - ASSERT_HOST(network_io_ == nullptr); - } else if (int_mode_) { - scratch_space_->int_stack_.Return(network_io_); - } else { - scratch_space_->float_stack_.Return(network_io_); - } - } - // Resizes the array (and stride), avoiding realloc if possible, to the - // size from various size specs: - // Same time size, given number of features. - void Resize(const NetworkIO& src, int num_features, - NetworkScratch* scratch) { - if (scratch_space_ == nullptr) { - int_mode_ = scratch->int_mode_ && src.int_mode(); - scratch_space_ = scratch; - network_io_ = int_mode_ ? scratch_space_->int_stack_.Borrow() - : scratch_space_->float_stack_.Borrow(); - } - network_io_->Resize(src, num_features); - } - // Resizes to a specific size as a temp buffer. No batches, no y-dim. - void Resize2d(bool int_mode, int width, int num_features, - NetworkScratch* scratch) { - if (scratch_space_ == nullptr) { - int_mode_ = scratch->int_mode_ && int_mode; - scratch_space_ = scratch; - network_io_ = int_mode_ ? scratch_space_->int_stack_.Borrow() - : scratch_space_->float_stack_.Borrow(); - } - network_io_->Resize2d(int_mode, width, num_features); - } - // Resize forcing a float representation with the width of src and the given - // number of features. - void ResizeFloat(const NetworkIO& src, int num_features, - NetworkScratch* scratch) { - if (scratch_space_ == nullptr) { - int_mode_ = false; - scratch_space_ = scratch; - network_io_ = scratch_space_->float_stack_.Borrow(); - } - network_io_->ResizeFloat(src, num_features); - } - - // Returns a ref to a NetworkIO that enables *this to be treated as if - // it were just a NetworkIO*. - NetworkIO& operator*() { - return *network_io_; - } - NetworkIO* operator->() { - return network_io_; - } - operator NetworkIO*() { - return network_io_; - } - - private: - // True if this is from the always-float stack, otherwise the default stack. - bool int_mode_; - // The NetworkIO that we have borrowed from the scratch_space_. - NetworkIO* network_io_; - // The source scratch_space_. Borrowed pointer, used to free the - // NetworkIO. Don't delete! - NetworkScratch* scratch_space_; - }; // class IO. - - // Class that acts like a fixed array of float, yet actually uses space - // from a GenericVector in the source NetworkScratch, and knows how - // to unstack the borrowed vector on destruction. - class FloatVec { - public: - // The array will have size elements in it, uninitialized. - FloatVec(int size, NetworkScratch* scratch) - : vec_(nullptr), scratch_space_(scratch) { - Init(size, scratch); - } - // Default constructor is for arrays. Use Init to setup. - FloatVec() : vec_(nullptr), data_(nullptr), scratch_space_(nullptr) {} - ~FloatVec() { - if (scratch_space_ != nullptr) scratch_space_->vec_stack_.Return(vec_); - } - - void Init(int size, NetworkScratch* scratch) { - if (scratch_space_ != nullptr && vec_ != nullptr) - scratch_space_->vec_stack_.Return(vec_); - scratch_space_ = scratch; - vec_ = scratch_space_->vec_stack_.Borrow(); - vec_->resize_no_init(size); - data_ = &(*vec_)[0]; - } - - // Use the cast operator instead of operator[] so the FloatVec can be used - // as a double* argument to a function call. - operator double*() const { return data_; } - double* get() { return data_; } - - private: - // Vector borrowed from the scratch space. Use Return to free it. - GenericVector* vec_; - // Short-cut pointer to the underlying array. - double* data_; - // The source scratch_space_. Borrowed pointer, used to free the - // vector. Don't delete! - NetworkScratch* scratch_space_; - }; // class FloatVec - - // Class that acts like a 2-D array of double, yet actually uses space - // from the source NetworkScratch, and knows how to unstack the borrowed - // array on destruction. - class GradientStore { - public: - // Default constructor is for arrays. Use Init to setup. - GradientStore() : array_(nullptr), scratch_space_(nullptr) {} - ~GradientStore() { - if (scratch_space_ != nullptr) scratch_space_->array_stack_.Return(array_); - } - - void Init(int size1, int size2, NetworkScratch* scratch) { - if (scratch_space_ != nullptr && array_ != nullptr) - scratch_space_->array_stack_.Return(array_); - scratch_space_ = scratch; - array_ = scratch_space_->array_stack_.Borrow(); - array_->Resize(size1, size2, 0.0); - } - - // Accessors to get to the underlying TransposedArray. - TransposedArray* get() const { return array_; } - const TransposedArray& operator*() const { return *array_; } - - private: - // Array borrowed from the scratch space. Use Return to free it. - TransposedArray* array_; - // The source scratch_space_. Borrowed pointer, used to free the - // vector. Don't delete! - NetworkScratch* scratch_space_; - }; // class GradientStore - - // Class that does the work of holding a stack of objects, a stack pointer - // and a vector of in-use flags, so objects can be returned out of order. - // It is safe to attempt to Borrow/Return in multiple threads. - template class Stack { - public: - Stack() : stack_top_(0) { - } - - // Lends out the next free item, creating one if none available, sets - // the used flags and increments the stack top. - T* Borrow() { - SVAutoLock lock(&mutex_); - if (stack_top_ == stack_.size()) { - stack_.push_back(new T); - flags_.push_back(false); - } - flags_[stack_top_] = true; - return stack_[stack_top_++]; - } - // Takes back the given item, and marks it free. Item does not have to be - // the most recently lent out, but free slots don't get re-used until the - // blocking item is returned. The assumption is that there will only be - // small, temporary variations from true stack use. (Determined by the order - // of destructors within a local scope.) - void Return(T* item) { - SVAutoLock lock(&mutex_); - // Linear search will do. - int index = stack_top_ - 1; - while (index >= 0 && stack_[index] != item) --index; - if (index >= 0) flags_[index] = false; - while (stack_top_ > 0 && !flags_[stack_top_ - 1]) --stack_top_; - } - - private: - PointerVector stack_; - GenericVector flags_; - int stack_top_; - SVMutex mutex_; - }; // class Stack. - - private: - // If true, the network weights are int8_t, if false, float. - bool int_mode_; - // Stacks of NetworkIO and GenericVector. Once allocated, they are not - // deleted until the NetworkScratch is deleted. - Stack int_stack_; - Stack float_stack_; - Stack > vec_stack_; - Stack array_stack_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_LSTM_NETWORKSCRATCH_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/parallel.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/parallel.cpp deleted file mode 100644 index b4f45aa5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/parallel.cpp +++ /dev/null @@ -1,176 +0,0 @@ -///////////////////////////////////////////////////////////////////////// -// File: parallel.cpp -// Description: Runs networks in parallel on the same input. -// Author: Ray Smith -// Created: Thu May 02 08:06:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "parallel.h" - -#ifdef _OPENMP -#include -#endif - -#include "functions.h" // For conditional undef of _OPENMP. -#include "networkscratch.h" - -namespace tesseract { - -// ni_ and no_ will be set by AddToStack. -Parallel::Parallel(const STRING& name, NetworkType type) : Plumbing(name) { - type_ = type; -} - -// Returns the shape output from the network given an input shape (which may -// be partially unknown ie zero). -StaticShape Parallel::OutputShape(const StaticShape& input_shape) const { - StaticShape result = stack_[0]->OutputShape(input_shape); - int stack_size = stack_.size(); - for (int i = 1; i < stack_size; ++i) { - StaticShape shape = stack_[i]->OutputShape(input_shape); - result.set_depth(result.depth() + shape.depth()); - } - return result; -} - -// Runs forward propagation of activations on the input line. -// See NetworkCpp for a detailed discussion of the arguments. -void Parallel::Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) { - bool parallel_debug = false; - // If this parallel is a replicator of convolvers, or holds a 1-d LSTM pair, - // or a 2-d LSTM quad, do debug locally, and don't pass the flag on. - if (debug && type_ != NT_PARALLEL) { - parallel_debug = true; - debug = false; - } - int stack_size = stack_.size(); - if (type_ == NT_PAR_2D_LSTM) { - // Special case, run parallel in parallel. - GenericVector results; - results.init_to_size(stack_size, NetworkScratch::IO()); - for (int i = 0; i < stack_size; ++i) { - results[i].Resize(input, stack_[i]->NumOutputs(), scratch); - } -#ifdef _OPENMP -#pragma omp parallel for num_threads(stack_size) -#endif - for (int i = 0; i < stack_size; ++i) { - stack_[i]->Forward(debug, input, nullptr, scratch, results[i]); - } - // Now pack all the results (serially) into the output. - int out_offset = 0; - output->Resize(*results[0], NumOutputs()); - for (int i = 0; i < stack_size; ++i) { - out_offset = output->CopyPacking(*results[i], out_offset); - } - } else { - // Revolving intermediate result. - NetworkScratch::IO result(input, scratch); - // Source for divided replicated. - NetworkScratch::IO source_part; - TransposedArray* src_transpose = nullptr; - if (IsTraining() && type_ == NT_REPLICATED) { - // Make a transposed copy of the input. - input.Transpose(&transposed_input_); - src_transpose = &transposed_input_; - } - // Run each network, putting the outputs into result. - int out_offset = 0; - for (int i = 0; i < stack_size; ++i) { - stack_[i]->Forward(debug, input, src_transpose, scratch, result); - // All networks must have the same output width - if (i == 0) { - output->Resize(*result, NumOutputs()); - } else { - ASSERT_HOST(result->Width() == output->Width()); - } - out_offset = output->CopyPacking(*result, out_offset); - } - } - if (parallel_debug) { - DisplayForward(*output); - } -} - -// Runs backward propagation of errors on the deltas line. -// See NetworkCpp for a detailed discussion of the arguments. -bool Parallel::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { - // If this parallel is a replicator of convolvers, or holds a 1-d LSTM pair, - // or a 2-d LSTM quad, do debug locally, and don't pass the flag on. - if (debug && type_ != NT_PARALLEL) { - DisplayBackward(fwd_deltas); - debug = false; - } - int stack_size = stack_.size(); - if (type_ == NT_PAR_2D_LSTM) { - // Special case, run parallel in parallel. - GenericVector in_deltas, out_deltas; - in_deltas.init_to_size(stack_size, NetworkScratch::IO()); - out_deltas.init_to_size(stack_size, NetworkScratch::IO()); - // Split the forward deltas for each stack element. - int feature_offset = 0; - for (int i = 0; i < stack_.size(); ++i) { - int num_features = stack_[i]->NumOutputs(); - in_deltas[i].Resize(fwd_deltas, num_features, scratch); - out_deltas[i].Resize(fwd_deltas, stack_[i]->NumInputs(), scratch); - in_deltas[i]->CopyUnpacking(fwd_deltas, feature_offset, num_features); - feature_offset += num_features; - } -#ifdef _OPENMP -#pragma omp parallel for num_threads(stack_size) -#endif - for (int i = 0; i < stack_size; ++i) { - stack_[i]->Backward(debug, *in_deltas[i], scratch, - i == 0 ? back_deltas : out_deltas[i]); - } - if (needs_to_backprop_) { - for (int i = 1; i < stack_size; ++i) { - back_deltas->AddAllToFloat(*out_deltas[i]); - } - } - } else { - // Revolving partial deltas. - NetworkScratch::IO in_deltas(fwd_deltas, scratch); - // The sum of deltas from different sources, which will eventually go into - // back_deltas. - NetworkScratch::IO out_deltas; - int feature_offset = 0; - for (int i = 0; i < stack_.size(); ++i) { - int num_features = stack_[i]->NumOutputs(); - in_deltas->CopyUnpacking(fwd_deltas, feature_offset, num_features); - feature_offset += num_features; - if (stack_[i]->Backward(debug, *in_deltas, scratch, back_deltas)) { - if (i == 0) { - out_deltas.ResizeFloat(*back_deltas, back_deltas->NumFeatures(), - scratch); - out_deltas->CopyAll(*back_deltas); - } else if (back_deltas->NumFeatures() == out_deltas->NumFeatures()) { - // Widths are allowed to be different going back, as we may have - // input nets, so only accumulate the deltas if the widths are the - // same. - out_deltas->AddAllToFloat(*back_deltas); - } - } - } - if (needs_to_backprop_) back_deltas->CopyAll(*out_deltas); - } - if (needs_to_backprop_) back_deltas->ScaleFloatBy(1.0f / stack_size); - return needs_to_backprop_; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/parallel.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/parallel.h deleted file mode 100644 index 671de96c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/parallel.h +++ /dev/null @@ -1,87 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: parallel.h -// Description: Runs networks in parallel on the same input. -// Author: Ray Smith -// Created: Thu May 02 08:02:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_PARALLEL_H_ -#define TESSERACT_LSTM_PARALLEL_H_ - -#include "plumbing.h" - -namespace tesseract { - -// Runs multiple networks in parallel, interlacing their outputs. -class Parallel : public Plumbing { - public: - // ni_ and no_ will be set by AddToStack. - Parallel(const STRING& name, NetworkType type); - virtual ~Parallel() = default; - - // Returns the shape output from the network given an input shape (which may - // be partially unknown ie zero). - StaticShape OutputShape(const StaticShape& input_shape) const override; - - STRING spec() const override { - STRING spec; - if (type_ == NT_PAR_2D_LSTM) { - // We have 4 LSTMs operating in parallel here, so the size of each is - // the number of outputs/4. - spec.add_str_int("L2xy", no_ / 4); - } else if (type_ == NT_PAR_RL_LSTM) { - // We have 2 LSTMs operating in parallel here, so the size of each is - // the number of outputs/2. - if (stack_[0]->type() == NT_LSTM_SUMMARY) - spec.add_str_int("Lbxs", no_ / 2); - else - spec.add_str_int("Lbx", no_ / 2); - } else { - if (type_ == NT_REPLICATED) { - spec.add_str_int("R", stack_.size()); - spec += "("; - spec += stack_[0]->spec(); - } else { - spec = "("; - for (int i = 0; i < stack_.size(); ++i) spec += stack_[i]->spec(); - } - spec += ")"; - } - return spec; - } - - // Runs forward propagation of activations on the input line. - // See Network for a detailed discussion of the arguments. - void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) override; - - // Runs backward propagation of errors on the deltas line. - // See Network for a detailed discussion of the arguments. - bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) override; - - private: - // If *this is a NT_REPLICATED, then it feeds a replicated network with - // identical inputs, and it would be extremely wasteful for them to each - // calculate and store the same transpose of the inputs, so Parallel does it - // and passes a pointer to the replicated network, allowing it to use the - // transpose on the next call to Backward. - TransposedArray transposed_input_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_LSTM_PARALLEL_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/plumbing.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/plumbing.cpp deleted file mode 100644 index 7fbf802f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/plumbing.cpp +++ /dev/null @@ -1,243 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: plumbing.cpp -// Description: Base class for networks that organize other networks -// eg series or parallel. -// Author: Ray Smith -// Created: Mon May 12 08:17:34 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "plumbing.h" - -namespace tesseract { - -// ni_ and no_ will be set by AddToStack. -Plumbing::Plumbing(const STRING& name) - : Network(NT_PARALLEL, name, 0, 0) { -} - -// Suspends/Enables training by setting the training_ flag. Serialize and -// DeSerialize only operate on the run-time data if state is false. -void Plumbing::SetEnableTraining(TrainingState state) { - Network::SetEnableTraining(state); - for (int i = 0; i < stack_.size(); ++i) - stack_[i]->SetEnableTraining(state); -} - -// Sets flags that control the action of the network. See NetworkFlags enum -// for bit values. -void Plumbing::SetNetworkFlags(uint32_t flags) { - Network::SetNetworkFlags(flags); - for (int i = 0; i < stack_.size(); ++i) - stack_[i]->SetNetworkFlags(flags); -} - -// Sets up the network for training. Initializes weights using weights of -// scale `range` picked according to the random number generator `randomizer`. -// Note that randomizer is a borrowed pointer that should outlive the network -// and should not be deleted by any of the networks. -// Returns the number of weights initialized. -int Plumbing::InitWeights(float range, TRand* randomizer) { - num_weights_ = 0; - for (int i = 0; i < stack_.size(); ++i) - num_weights_ += stack_[i]->InitWeights(range, randomizer); - return num_weights_; -} - -// Recursively searches the network for softmaxes with old_no outputs, -// and remaps their outputs according to code_map. See network.h for details. -int Plumbing::RemapOutputs(int old_no, const std::vector& code_map) { - num_weights_ = 0; - for (int i = 0; i < stack_.size(); ++i) { - num_weights_ += stack_[i]->RemapOutputs(old_no, code_map); - } - return num_weights_; -} - -// Converts a float network to an int network. -void Plumbing::ConvertToInt() { - for (int i = 0; i < stack_.size(); ++i) - stack_[i]->ConvertToInt(); -} - -// Provides a pointer to a TRand for any networks that care to use it. -// Note that randomizer is a borrowed pointer that should outlive the network -// and should not be deleted by any of the networks. -void Plumbing::SetRandomizer(TRand* randomizer) { - for (int i = 0; i < stack_.size(); ++i) - stack_[i]->SetRandomizer(randomizer); -} - -// Adds the given network to the stack. -void Plumbing::AddToStack(Network* network) { - if (stack_.empty()) { - ni_ = network->NumInputs(); - no_ = network->NumOutputs(); - } else if (type_ == NT_SERIES) { - // ni is input of first, no output of last, others match output to input. - ASSERT_HOST(no_ == network->NumInputs()); - no_ = network->NumOutputs(); - } else { - // All parallel types. Output is sum of outputs, inputs all match. - ASSERT_HOST(ni_ == network->NumInputs()); - no_ += network->NumOutputs(); - } - stack_.push_back(network); -} - -// Sets needs_to_backprop_ to needs_backprop and calls on sub-network -// according to needs_backprop || any weights in this network. -bool Plumbing::SetupNeedsBackprop(bool needs_backprop) { - if (IsTraining()) { - needs_to_backprop_ = needs_backprop; - bool retval = needs_backprop; - for (int i = 0; i < stack_.size(); ++i) { - if (stack_[i]->SetupNeedsBackprop(needs_backprop)) retval = true; - } - return retval; - } - // Frozen networks don't do backprop. - needs_to_backprop_ = false; - return false; -} - -// Returns an integer reduction factor that the network applies to the -// time sequence. Assumes that any 2-d is already eliminated. Used for -// scaling bounding boxes of truth data. -// WARNING: if GlobalMinimax is used to vary the scale, this will return -// the last used scale factor. Call it before any forward, and it will return -// the minimum scale factor of the paths through the GlobalMinimax. -int Plumbing::XScaleFactor() const { - return stack_[0]->XScaleFactor(); -} - -// Provides the (minimum) x scale factor to the network (of interest only to -// input units) so they can determine how to scale bounding boxes. -void Plumbing::CacheXScaleFactor(int factor) { - for (int i = 0; i < stack_.size(); ++i) { - stack_[i]->CacheXScaleFactor(factor); - } -} - -// Provides debug output on the weights. -void Plumbing::DebugWeights() { - for (int i = 0; i < stack_.size(); ++i) - stack_[i]->DebugWeights(); -} - -// Returns a set of strings representing the layer-ids of all layers below. -void Plumbing::EnumerateLayers(const STRING* prefix, - GenericVector* layers) const { - for (int i = 0; i < stack_.size(); ++i) { - STRING layer_name; - if (prefix) layer_name = *prefix; - layer_name.add_str_int(":", i); - if (stack_[i]->IsPlumbingType()) { - Plumbing* plumbing = static_cast(stack_[i]); - plumbing->EnumerateLayers(&layer_name, layers); - } else { - layers->push_back(layer_name); - } - } -} - -// Returns a pointer to the network layer corresponding to the given id. -Network* Plumbing::GetLayer(const char* id) const { - char* next_id; - int index = strtol(id, &next_id, 10); - if (index < 0 || index >= stack_.size()) return nullptr; - if (stack_[index]->IsPlumbingType()) { - Plumbing* plumbing = static_cast(stack_[index]); - ASSERT_HOST(*next_id == ':'); - return plumbing->GetLayer(next_id + 1); - } - return stack_[index]; -} - -// Returns a pointer to the learning rate for the given layer id. -float* Plumbing::LayerLearningRatePtr(const char* id) const { - char* next_id; - int index = strtol(id, &next_id, 10); - if (index < 0 || index >= stack_.size()) return nullptr; - if (stack_[index]->IsPlumbingType()) { - Plumbing* plumbing = static_cast(stack_[index]); - ASSERT_HOST(*next_id == ':'); - return plumbing->LayerLearningRatePtr(next_id + 1); - } - if (index >= learning_rates_.size()) return nullptr; - return &learning_rates_[index]; -} - -// Writes to the given file. Returns false in case of error. -bool Plumbing::Serialize(TFile* fp) const { - if (!Network::Serialize(fp)) return false; - uint32_t size = stack_.size(); - // Can't use PointerVector::Serialize here as we need a special DeSerialize. - if (!fp->Serialize(&size)) return false; - for (uint32_t i = 0; i < size; ++i) - if (!stack_[i]->Serialize(fp)) return false; - if ((network_flags_ & NF_LAYER_SPECIFIC_LR) && - !learning_rates_.Serialize(fp)) { - return false; - } - return true; -} - -// Reads from the given file. Returns false in case of error. -bool Plumbing::DeSerialize(TFile* fp) { - stack_.truncate(0); - no_ = 0; // We will be modifying this as we AddToStack. - uint32_t size; - if (!fp->DeSerialize(&size)) return false; - for (uint32_t i = 0; i < size; ++i) { - Network* network = CreateFromFile(fp); - if (network == nullptr) return false; - AddToStack(network); - } - if ((network_flags_ & NF_LAYER_SPECIFIC_LR) && - !learning_rates_.DeSerialize(fp)) { - return false; - } - return true; -} - -// Updates the weights using the given learning rate, momentum and adam_beta. -// num_samples is used in the adam computation iff use_adam_ is true. -void Plumbing::Update(float learning_rate, float momentum, float adam_beta, - int num_samples) { - for (int i = 0; i < stack_.size(); ++i) { - if (network_flags_ & NF_LAYER_SPECIFIC_LR) { - if (i < learning_rates_.size()) - learning_rate = learning_rates_[i]; - else - learning_rates_.push_back(learning_rate); - } - if (stack_[i]->IsTraining()) { - stack_[i]->Update(learning_rate, momentum, adam_beta, num_samples); - } - } -} - -// Sums the products of weight updates in *this and other, splitting into -// positive (same direction) in *same and negative (different direction) in -// *changed. -void Plumbing::CountAlternators(const Network& other, double* same, - double* changed) const { - ASSERT_HOST(other.type() == type_); - const Plumbing* plumbing = static_cast(&other); - ASSERT_HOST(plumbing->stack_.size() == stack_.size()); - for (int i = 0; i < stack_.size(); ++i) - stack_[i]->CountAlternators(*plumbing->stack_[i], same, changed); -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/plumbing.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/plumbing.h deleted file mode 100644 index 7c55e2e1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/plumbing.h +++ /dev/null @@ -1,144 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: plumbing.h -// Description: Base class for networks that organize other networks -// eg series or parallel. -// Author: Ray Smith -// Created: Mon May 12 08:11:36 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_PLUMBING_H_ -#define TESSERACT_LSTM_PLUMBING_H_ - -#include "genericvector.h" -#include "matrix.h" -#include "network.h" - -namespace tesseract { - -// Holds a collection of other networks and forwards calls to each of them. -class Plumbing : public Network { - public: - // ni_ and no_ will be set by AddToStack. - explicit Plumbing(const STRING& name); - virtual ~Plumbing() = default; - - // Returns the required shape input to the network. - StaticShape InputShape() const override { return stack_[0]->InputShape(); } - STRING spec() const override { - return "Sub-classes of Plumbing must implement spec()!"; - } - - // Returns true if the given type is derived from Plumbing, and thus contains - // multiple sub-networks that can have their own learning rate. - bool IsPlumbingType() const override { return true; } - - // Suspends/Enables training by setting the training_ flag. Serialize and - // DeSerialize only operate on the run-time data if state is false. - void SetEnableTraining(TrainingState state) override; - - // Sets flags that control the action of the network. See NetworkFlags enum - // for bit values. - void SetNetworkFlags(uint32_t flags) override; - - // Sets up the network for training. Initializes weights using weights of - // scale `range` picked according to the random number generator `randomizer`. - // Note that randomizer is a borrowed pointer that should outlive the network - // and should not be deleted by any of the networks. - // Returns the number of weights initialized. - int InitWeights(float range, TRand* randomizer) override; - // Recursively searches the network for softmaxes with old_no outputs, - // and remaps their outputs according to code_map. See network.h for details. - int RemapOutputs(int old_no, const std::vector& code_map) override; - - // Converts a float network to an int network. - void ConvertToInt() override; - - // Provides a pointer to a TRand for any networks that care to use it. - // Note that randomizer is a borrowed pointer that should outlive the network - // and should not be deleted by any of the networks. - void SetRandomizer(TRand* randomizer) override; - - // Adds the given network to the stack. - virtual void AddToStack(Network* network); - - // Sets needs_to_backprop_ to needs_backprop and returns true if - // needs_backprop || any weights in this network so the next layer forward - // can be told to produce backprop for this layer if needed. - bool SetupNeedsBackprop(bool needs_backprop) override; - - // Returns an integer reduction factor that the network applies to the - // time sequence. Assumes that any 2-d is already eliminated. Used for - // scaling bounding boxes of truth data. - // WARNING: if GlobalMinimax is used to vary the scale, this will return - // the last used scale factor. Call it before any forward, and it will return - // the minimum scale factor of the paths through the GlobalMinimax. - int XScaleFactor() const override; - - // Provides the (minimum) x scale factor to the network (of interest only to - // input units) so they can determine how to scale bounding boxes. - void CacheXScaleFactor(int factor) override; - - // Provides debug output on the weights. - void DebugWeights() override; - - // Returns the current stack. - const PointerVector& stack() const { - return stack_; - } - // Returns a set of strings representing the layer-ids of all layers below. - void EnumerateLayers(const STRING* prefix, - GenericVector* layers) const; - // Returns a pointer to the network layer corresponding to the given id. - Network* GetLayer(const char* id) const; - // Returns the learning rate for a specific layer of the stack. - float LayerLearningRate(const char* id) const { - const float* lr_ptr = LayerLearningRatePtr(id); - ASSERT_HOST(lr_ptr != nullptr); - return *lr_ptr; - } - // Scales the learning rate for a specific layer of the stack. - void ScaleLayerLearningRate(const char* id, double factor) { - float* lr_ptr = LayerLearningRatePtr(id); - ASSERT_HOST(lr_ptr != nullptr); - *lr_ptr *= factor; - } - // Returns a pointer to the learning rate for the given layer id. - float* LayerLearningRatePtr(const char* id) const; - - // Writes to the given file. Returns false in case of error. - bool Serialize(TFile* fp) const override; - // Reads from the given file. Returns false in case of error. - bool DeSerialize(TFile* fp) override; - - // Updates the weights using the given learning rate, momentum and adam_beta. - // num_samples is used in the adam computation iff use_adam_ is true. - void Update(float learning_rate, float momentum, float adam_beta, - int num_samples) override; - // Sums the products of weight updates in *this and other, splitting into - // positive (same direction) in *same and negative (different direction) in - // *changed. - void CountAlternators(const Network& other, double* same, - double* changed) const override; - - protected: - // The networks. - PointerVector stack_; - // Layer-specific learning rate iff network_flags_ & NF_LAYER_SPECIFIC_LR. - // One element for each element of stack_. - GenericVector learning_rates_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_LSTM_PLUMBING_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/recodebeam.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/recodebeam.cpp deleted file mode 100644 index b4716ae3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/recodebeam.cpp +++ /dev/null @@ -1,1010 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: recodebeam.cpp -// Description: Beam search to decode from the re-encoded CJK as a sequence of -// smaller numbers in place of a single large code. -// Author: Ray Smith -// Created: Fri Mar 13 09:39:01 PDT 2015 -// -// (C) Copyright 2015, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "recodebeam.h" -#include "networkio.h" -#include "pageres.h" -#include "unicharcompress.h" -#include -#include -#include -#include - -#include - -namespace tesseract { - -// Clipping value for certainty inside Tesseract. Reflects the minimum value -// of certainty that will be returned by ExtractBestPathAsUnicharIds. -// Supposedly on a uniform scale that can be compared across languages and -// engines. -const float RecodeBeamSearch::kMinCertainty = -20.0f; - -// The beam width at each code position. -const int RecodeBeamSearch::kBeamWidths[RecodedCharID::kMaxCodeLen + 1] = { - 5, 10, 16, 16, 16, 16, 16, 16, 16, 16, -}; - -const char* kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"}; - -// Prints debug details of the node. -void RecodeNode::Print(int null_char, const UNICHARSET& unicharset, - int depth) const { - if (code == null_char) { - tprintf("null_char"); - } else { - tprintf("label=%d, uid=%d=%s", code, unichar_id, - unicharset.debug_str(unichar_id).string()); - } - tprintf(" score=%g, c=%g,%s%s%s perm=%d, hash=%lx", score, certainty, - start_of_dawg ? " DawgStart" : "", start_of_word ? " Start" : "", - end_of_word ? " End" : "", permuter, code_hash); - if (depth > 0 && prev != nullptr) { - tprintf(" prev:"); - prev->Print(null_char, unicharset, depth - 1); - } else { - tprintf("\n"); - } -} - -// Borrows the pointer, which is expected to survive until *this is deleted. -RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress& recoder, - int null_char, bool simple_text, Dict* dict) - : recoder_(recoder), - beam_size_(0), - top_code_(-1), - second_code_(-1), - dict_(dict), - space_delimited_(true), - is_simple_text_(simple_text), - null_char_(null_char) { - if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) space_delimited_ = false; -} - -// Decodes the set of network outputs, storing the lattice internally. -void RecodeBeamSearch::Decode(const NetworkIO& output, double dict_ratio, - double cert_offset, double worst_dict_cert, - const UNICHARSET* charset, int lstm_choice_mode) { - beam_size_ = 0; - int width = output.Width(); - if (lstm_choice_mode) - timesteps.clear(); - for (int t = 0; t < width; ++t) { - ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]); - DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert, - charset); - if (lstm_choice_mode) { - SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t); - } - } -} -void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY& output, - double dict_ratio, double cert_offset, - double worst_dict_cert, - const UNICHARSET* charset) { - beam_size_ = 0; - int width = output.dim1(); - for (int t = 0; t < width; ++t) { - ComputeTopN(output[t], output.dim2(), kBeamWidths[0]); - DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset); - } -} - -void RecodeBeamSearch::SaveMostCertainChoices(const float* outputs, - int num_outputs, - const UNICHARSET* charset, - int xCoord) { - std::vector> choices; - int pos = 0; - for (int i = 0; i < num_outputs; ++i) { - if (outputs[i] >= 0.01f) { - const char* character; - if (i + 2 >= num_outputs) { - character = ""; - } else if (i > 0) { - character = charset->id_to_unichar_ext(i + 2); - } else { - character = charset->id_to_unichar_ext(i); - } - pos = 0; - //order the possible choices within one timestep - //beginning with the most likely - while (choices.size() > pos && choices[pos].second > outputs[i]) { - pos++; - } - choices.insert(choices.begin() + pos, - std::pair(character, outputs[i])); - } - } - timesteps.push_back(choices); -} - -// Returns the best path as labels/scores/xcoords similar to simple CTC. -void RecodeBeamSearch::ExtractBestPathAsLabels( - GenericVector* labels, GenericVector* xcoords) const { - labels->truncate(0); - xcoords->truncate(0); - GenericVector best_nodes; - ExtractBestPaths(&best_nodes, nullptr); - // Now just run CTC on the best nodes. - int t = 0; - int width = best_nodes.size(); - while (t < width) { - int label = best_nodes[t]->code; - if (label != null_char_) { - labels->push_back(label); - xcoords->push_back(t); - } - while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) { - } - } - xcoords->push_back(width); -} - -// Returns the best path as unichar-ids/certs/ratings/xcoords skipping -// duplicates, nulls and intermediate parts. -void RecodeBeamSearch::ExtractBestPathAsUnicharIds( - bool debug, const UNICHARSET* unicharset, GenericVector* unichar_ids, - GenericVector* certs, GenericVector* ratings, - GenericVector* xcoords) const { - GenericVector best_nodes; - ExtractBestPaths(&best_nodes, nullptr); - ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords); - if (debug) { - DebugPath(unicharset, best_nodes); - DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings, - *xcoords); - } -} - -// Returns the best path as a set of WERD_RES. -void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box, - float scale_factor, bool debug, - const UNICHARSET* unicharset, - PointerVector* words, - int lstm_choice_mode) { - words->truncate(0); - GenericVector unichar_ids; - GenericVector certs; - GenericVector ratings; - GenericVector xcoords; - GenericVector best_nodes; - GenericVector second_nodes; - std::deque> best_choices; - ExtractBestPaths(&best_nodes, &second_nodes); - if (debug) { - DebugPath(unicharset, best_nodes); - ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings, - &xcoords); - tprintf("\nSecond choice path:\n"); - DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings, - xcoords); - } - int current_char; - int timestepEnd = 0; - //if lstm choice mode is required in granularity level 2 it stores the x - //Coordinates of every chosen character to match the alternative choices to it - if (lstm_choice_mode == 2) { - ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, - &xcoords, &best_choices); - if (best_choices.size() > 0) { - current_char = best_choices.front().first; - timestepEnd = best_choices.front().second; - best_choices.pop_front(); - } - } else { - ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, - &xcoords); - } - int num_ids = unichar_ids.size(); - if (debug) { - DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings, - xcoords); - } - // Convert labels to unichar-ids. - int word_end = 0; - float prev_space_cert = 0.0f; - for (int word_start = 0; word_start < num_ids; word_start = word_end) { - for (word_end = word_start + 1; word_end < num_ids; ++word_end) { - // A word is terminated when a space character or start_of_word flag is - // hit. We also want to force a separate word for every non - // space-delimited character when not in a dictionary context. - if (unichar_ids[word_end] == UNICHAR_SPACE) break; - int index = xcoords[word_end]; - if (best_nodes[index]->start_of_word) break; - if (best_nodes[index]->permuter == TOP_CHOICE_PERM && - (!unicharset->IsSpaceDelimited(unichar_ids[word_end]) || - !unicharset->IsSpaceDelimited(unichar_ids[word_end - 1]))) - break; - } - float space_cert = 0.0f; - if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE) - space_cert = certs[word_end]; - bool leading_space = - word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE; - // Create a WERD_RES for the output word. - WERD_RES* word_res = InitializeWord( - leading_space, line_box, word_start, word_end, - std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor); - if (lstm_choice_mode == 1) { - for (size_t i = timestepEnd; i < xcoords[word_end]; i++) { - word_res->timesteps.push_back(timesteps[i]); - } - timestepEnd = xcoords[word_end]; - } else if (lstm_choice_mode == 2) { - float sum = 0; - std::vector> choice_pairs; - for (size_t i = timestepEnd; i < xcoords[word_end]; i++) { - for (std::pair choice : timesteps[i]) { - if (std::strcmp(choice.first, "") != 0) { - sum += choice.second; - choice_pairs.push_back(choice); - } - } - if ((best_choices.size() > 0 && i == best_choices.front().second - 1) - || i == xcoords[word_end]-1) { - std::map summed_propabilities; - for (auto it = choice_pairs.begin(); it != choice_pairs.end(); ++it) { - summed_propabilities[it->first] += it->second; - } - std::vector> accumulated_timestep; - accumulated_timestep.push_back(std::pair - (unicharset->id_to_unichar_ext - (current_char), 2.0)); - int pos; - for (auto it = summed_propabilities.begin(); - it != summed_propabilities.end(); ++it) { - if(sum == 0) break; - it->second/=sum; - pos = 0; - while (accumulated_timestep.size() > pos - && accumulated_timestep[pos].second > it->second) { - pos++; - } - accumulated_timestep.insert(accumulated_timestep.begin() + pos, - std::pair(it->first, - it->second)); - } - if (best_choices.size() > 0) { - current_char = best_choices.front().first; - best_choices.pop_front(); - } - choice_pairs.clear(); - word_res->timesteps.push_back(accumulated_timestep); - sum = 0; - } - } - timestepEnd = xcoords[word_end]; - } - for (int i = word_start; i < word_end; ++i) { - BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST; - BLOB_CHOICE_IT bc_it(choices); - BLOB_CHOICE* choice = new BLOB_CHOICE( - unichar_ids[i], ratings[i], certs[i], -1, 1.0f, - static_cast(INT16_MAX), 0.0f, BCC_STATIC_CLASSIFIER); - int col = i - word_start; - choice->set_matrix_cell(col, col); - bc_it.add_after_then_move(choice); - word_res->ratings->put(col, col, choices); - } - int index = xcoords[word_end - 1]; - word_res->FakeWordFromRatings(best_nodes[index]->permuter); - words->push_back(word_res); - prev_space_cert = space_cert; - if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE) - ++word_end; - } -} - -// Generates debug output of the content of the beams after a Decode. -void RecodeBeamSearch::DebugBeams(const UNICHARSET& unicharset) const { - for (int p = 0; p < beam_size_; ++p) { - for (int d = 0; d < 2; ++d) { - for (int c = 0; c < NC_COUNT; ++c) { - NodeContinuation cont = static_cast(c); - int index = BeamIndex(d, cont, 0); - if (beam_[p]->beams_[index].empty()) continue; - // Print all the best scoring nodes for each unichar found. - tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict", - kNodeContNames[c]); - DebugBeamPos(unicharset, beam_[p]->beams_[index]); - } - } - } -} - -// Generates debug output of the content of a single beam position. -void RecodeBeamSearch::DebugBeamPos(const UNICHARSET& unicharset, - const RecodeHeap& heap) const { - GenericVector unichar_bests; - unichar_bests.init_to_size(unicharset.size(), nullptr); - const RecodeNode* null_best = nullptr; - int heap_size = heap.size(); - for (int i = 0; i < heap_size; ++i) { - const RecodeNode* node = &heap.get(i).data; - if (node->unichar_id == INVALID_UNICHAR_ID) { - if (null_best == nullptr || null_best->score < node->score) null_best = node; - } else { - if (unichar_bests[node->unichar_id] == nullptr || - unichar_bests[node->unichar_id]->score < node->score) { - unichar_bests[node->unichar_id] = node; - } - } - } - for (int u = 0; u < unichar_bests.size(); ++u) { - if (unichar_bests[u] != nullptr) { - const RecodeNode& node = *unichar_bests[u]; - node.Print(null_char_, unicharset, 1); - } - } - if (null_best != nullptr) { - null_best->Print(null_char_, unicharset, 1); - } -} - -// Returns the given best_nodes as unichar-ids/certs/ratings/xcoords skipping -// duplicates, nulls and intermediate parts. -/* static */ -void RecodeBeamSearch::ExtractPathAsUnicharIds( - const GenericVector& best_nodes, - GenericVector* unichar_ids, GenericVector* certs, - GenericVector* ratings, GenericVector* xcoords, - std::deque>* best_choices) { - unichar_ids->truncate(0); - certs->truncate(0); - ratings->truncate(0); - xcoords->truncate(0); - // Backtrack extracting only valid, non-duplicate unichar-ids. - int t = 0; - int width = best_nodes.size(); - while (t < width) { - double certainty = 0.0; - double rating = 0.0; - while (t < width && best_nodes[t]->unichar_id == INVALID_UNICHAR_ID) { - double cert = best_nodes[t++]->certainty; - if (cert < certainty) certainty = cert; - rating -= cert; - } - if (t < width) { - int unichar_id = best_nodes[t]->unichar_id; - if (unichar_id == UNICHAR_SPACE && !certs->empty() && - best_nodes[t]->permuter != NO_PERM) { - // All the rating and certainty go on the previous character except - // for the space itself. - if (certainty < certs->back()) certs->back() = certainty; - ratings->back() += rating; - certainty = 0.0; - rating = 0.0; - } - unichar_ids->push_back(unichar_id); - xcoords->push_back(t); - if (best_choices != nullptr) { - best_choices->push_back(std::pair(unichar_id, t)); - } - do { - double cert = best_nodes[t++]->certainty; - // Special-case NO-PERM space to forget the certainty of the previous - // nulls. See long comment in ContinueContext. - if (cert < certainty || (unichar_id == UNICHAR_SPACE && - best_nodes[t - 1]->permuter == NO_PERM)) { - certainty = cert; - } - rating -= cert; - } while (t < width && best_nodes[t]->duplicate); - certs->push_back(certainty); - ratings->push_back(rating); - } else if (!certs->empty()) { - if (certainty < certs->back()) certs->back() = certainty; - ratings->back() += rating; - } - } - xcoords->push_back(width); -} - -// Sets up a word with the ratings matrix and fake blobs with boxes in the -// right places. -WERD_RES* RecodeBeamSearch::InitializeWord(bool leading_space, - const TBOX& line_box, int word_start, - int word_end, float space_certainty, - const UNICHARSET* unicharset, - const GenericVector& xcoords, - float scale_factor) { - // Make a fake blob for each non-zero label. - C_BLOB_LIST blobs; - C_BLOB_IT b_it(&blobs); - for (int i = word_start; i < word_end; ++i) { - int min_half_width = xcoords[i + 1] - xcoords[i]; - if (i > 0 && xcoords[i] - xcoords[i - 1] < min_half_width) - min_half_width = xcoords[i] - xcoords[i - 1]; - if (min_half_width < 1) min_half_width = 1; - // Make a fake blob. - TBOX box(xcoords[i] - min_half_width, 0, xcoords[i] + min_half_width, - line_box.height()); - box.scale(scale_factor); - box.move(ICOORD(line_box.left(), line_box.bottom())); - box.set_top(line_box.top()); - b_it.add_after_then_move(C_BLOB::FakeBlob(box)); - } - // Make a fake word from the blobs. - WERD* word = new WERD(&blobs, leading_space, nullptr); - // Make a WERD_RES from the word. - WERD_RES* word_res = new WERD_RES(word); - word_res->uch_set = unicharset; - word_res->combination = true; // Give it ownership of the word. - word_res->space_certainty = space_certainty; - word_res->ratings = new MATRIX(word_end - word_start, 1); - return word_res; -} - -// Fills top_n_flags_ with bools that are true iff the corresponding output -// is one of the top_n. -void RecodeBeamSearch::ComputeTopN(const float* outputs, int num_outputs, - int top_n) { - top_n_flags_.init_to_size(num_outputs, TN_ALSO_RAN); - top_code_ = -1; - second_code_ = -1; - top_heap_.clear(); - for (int i = 0; i < num_outputs; ++i) { - if (top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key) { - TopPair entry(outputs[i], i); - top_heap_.Push(&entry); - if (top_heap_.size() > top_n) top_heap_.Pop(&entry); - } - } - while (!top_heap_.empty()) { - TopPair entry; - top_heap_.Pop(&entry); - if (top_heap_.size() > 1) { - top_n_flags_[entry.data] = TN_TOPN; - } else { - top_n_flags_[entry.data] = TN_TOP2; - if (top_heap_.empty()) - top_code_ = entry.data; - else - second_code_ = entry.data; - } - } - top_n_flags_[null_char_] = TN_TOP2; -} - -// Adds the computation for the current time-step to the beam. Call at each -// time-step in sequence from left to right. outputs is the activation vector -// for the current timestep. -void RecodeBeamSearch::DecodeStep(const float* outputs, int t, - double dict_ratio, double cert_offset, - double worst_dict_cert, - const UNICHARSET* charset, bool debug) { - if (t == beam_.size()) beam_.push_back(new RecodeBeam); - RecodeBeam* step = beam_[t]; - beam_size_ = t + 1; - step->Clear(); - if (t == 0) { - // The first step can only use singles and initials. - ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2, - dict_ratio, cert_offset, worst_dict_cert, step); - if (dict_ != nullptr) { - ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs, - TN_TOP2, dict_ratio, cert_offset, worst_dict_cert, step); - } - } else { - RecodeBeam* prev = beam_[t - 1]; - if (debug) { - int beam_index = BeamIndex(true, NC_ANYTHING, 0); - for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) { - GenericVector path; - ExtractPath(&prev->beams_[beam_index].get(i).data, &path); - tprintf("Step %d: Dawg beam %d:\n", t, i); - DebugPath(charset, path); - } - beam_index = BeamIndex(false, NC_ANYTHING, 0); - for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) { - GenericVector path; - ExtractPath(&prev->beams_[beam_index].get(i).data, &path); - tprintf("Step %d: Non-Dawg beam %d:\n", t, i); - DebugPath(charset, path); - } - } - int total_beam = 0; - // Work through the scores by group (top-2, top-n, the rest) while the beam - // is empty. This enables extending the context using only the top-n results - // first, which may have an empty intersection with the valid codes, so we - // fall back to the rest if the beam is empty. - for (int tn = 0; tn < TN_COUNT && total_beam == 0; ++tn) { - TopNState top_n = static_cast(tn); - for (int index = 0; index < kNumBeams; ++index) { - // Working backwards through the heaps doesn't guarantee that we see the - // best first, but it comes before a lot of the worst, so it is slightly - // more efficient than going forwards. - for (int i = prev->beams_[index].size() - 1; i >= 0; --i) { - ContinueContext(&prev->beams_[index].get(i).data, index, outputs, - top_n, dict_ratio, cert_offset, worst_dict_cert, - step); - } - } - for (int index = 0; index < kNumBeams; ++index) { - if (ContinuationFromBeamsIndex(index) == NC_ANYTHING) - total_beam += step->beams_[index].size(); - } - } - // Special case for the best initial dawg. Push it on the heap if good - // enough, but there is only one, so it doesn't blow up the beam. - for (int c = 0; c < NC_COUNT; ++c) { - if (step->best_initial_dawgs_[c].code >= 0) { - int index = BeamIndex(true, static_cast(c), 0); - RecodeHeap* dawg_heap = &step->beams_[index]; - PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c], - dawg_heap); - } - } - } -} - -// Adds to the appropriate beams the legal (according to recoder) -// continuations of context prev, which is of the given length, using the -// given network outputs to provide scores to the choices. Uses only those -// choices for which top_n_flags[index] == top_n_flag. -void RecodeBeamSearch::ContinueContext(const RecodeNode* prev, int index, - const float* outputs, - TopNState top_n_flag, double dict_ratio, - double cert_offset, - double worst_dict_cert, - RecodeBeam* step) { - RecodedCharID prefix; - RecodedCharID full_code; - const RecodeNode* previous = prev; - int length = LengthFromBeamsIndex(index); - bool use_dawgs = IsDawgFromBeamsIndex(index); - NodeContinuation prev_cont = ContinuationFromBeamsIndex(index); - for (int p = length - 1; p >= 0; --p, previous = previous->prev) { - while (previous != nullptr && - (previous->duplicate || previous->code == null_char_)) { - previous = previous->prev; - } - if (previous != nullptr) { - prefix.Set(p, previous->code); - full_code.Set(p, previous->code); - } - } - if (prev != nullptr && !is_simple_text_) { - if (top_n_flags_[prev->code] == top_n_flag) { - if (prev_cont != NC_NO_DUP) { - float cert = - NetworkIO::ProbToCertainty(outputs[prev->code]) + cert_offset; - PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id, - cert, worst_dict_cert, dict_ratio, use_dawgs, - NC_ANYTHING, prev, step); - } - if (prev_cont == NC_ANYTHING && top_n_flag == TN_TOP2 && - prev->code != null_char_) { - float cert = NetworkIO::ProbToCertainty(outputs[prev->code] + - outputs[null_char_]) + - cert_offset; - PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id, - cert, worst_dict_cert, dict_ratio, use_dawgs, - NC_NO_DUP, prev, step); - } - } - if (prev_cont == NC_ONLY_DUP) return; - if (prev->code != null_char_ && length > 0 && - top_n_flags_[null_char_] == top_n_flag) { - // Allow nulls within multi code sequences, as the nulls within are not - // explicitly included in the code sequence. - float cert = - NetworkIO::ProbToCertainty(outputs[null_char_]) + cert_offset; - PushDupOrNoDawgIfBetter(length, false, null_char_, INVALID_UNICHAR_ID, - cert, worst_dict_cert, dict_ratio, use_dawgs, - NC_ANYTHING, prev, step); - } - } - const GenericVector* final_codes = recoder_.GetFinalCodes(prefix); - if (final_codes != nullptr) { - for (int i = 0; i < final_codes->size(); ++i) { - int code = (*final_codes)[i]; - if (top_n_flags_[code] != top_n_flag) continue; - if (prev != nullptr && prev->code == code && !is_simple_text_) continue; - float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset; - if (cert < kMinCertainty && code != null_char_) continue; - full_code.Set(length, code); - int unichar_id = recoder_.DecodeUnichar(full_code); - // Map the null char to INVALID. - if (length == 0 && code == null_char_) unichar_id = INVALID_UNICHAR_ID; - ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio, - use_dawgs, NC_ANYTHING, prev, step); - if (top_n_flag == TN_TOP2 && code != null_char_) { - float prob = outputs[code] + outputs[null_char_]; - if (prev != nullptr && prev_cont == NC_ANYTHING && - prev->code != null_char_ && - ((prev->code == top_code_ && code == second_code_) || - (code == top_code_ && prev->code == second_code_))) { - prob += outputs[prev->code]; - } - float cert = NetworkIO::ProbToCertainty(prob) + cert_offset; - ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio, - use_dawgs, NC_ONLY_DUP, prev, step); - } - } - } - const GenericVector* next_codes = recoder_.GetNextCodes(prefix); - if (next_codes != nullptr) { - for (int i = 0; i < next_codes->size(); ++i) { - int code = (*next_codes)[i]; - if (top_n_flags_[code] != top_n_flag) continue; - if (prev != nullptr && prev->code == code && !is_simple_text_) continue; - float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset; - PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert, - worst_dict_cert, dict_ratio, use_dawgs, - NC_ANYTHING, prev, step); - if (top_n_flag == TN_TOP2 && code != null_char_) { - float prob = outputs[code] + outputs[null_char_]; - if (prev != nullptr && prev_cont == NC_ANYTHING && - prev->code != null_char_ && - ((prev->code == top_code_ && code == second_code_) || - (code == top_code_ && prev->code == second_code_))) { - prob += outputs[prev->code]; - } - float cert = NetworkIO::ProbToCertainty(prob) + cert_offset; - PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, - cert, worst_dict_cert, dict_ratio, use_dawgs, - NC_ONLY_DUP, prev, step); - } - } - } -} - -// Continues for a new unichar, using dawg or non-dawg as per flag. -void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert, - float worst_dict_cert, float dict_ratio, - bool use_dawgs, NodeContinuation cont, - const RecodeNode* prev, - RecodeBeam* step) { - if (use_dawgs) { - if (cert > worst_dict_cert) { - ContinueDawg(code, unichar_id, cert, cont, prev, step); - } - } else { - RecodeHeap* nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)]; - PushHeapIfBetter(kBeamWidths[0], code, unichar_id, TOP_CHOICE_PERM, false, - false, false, false, cert * dict_ratio, prev, nullptr, - nodawg_heap); - if (dict_ != nullptr && - ((unichar_id == UNICHAR_SPACE && cert > worst_dict_cert) || - !dict_->getUnicharset().IsSpaceDelimited(unichar_id))) { - // Any top choice position that can start a new word, ie a space or - // any non-space-delimited character, should also be considered - // by the dawg search, so push initial dawg to the dawg heap. - float dawg_cert = cert; - PermuterType permuter = TOP_CHOICE_PERM; - // Since we use the space either side of a dictionary word in the - // certainty of the word, (to properly handle weak spaces) and the - // space is coming from a non-dict word, we need special conditions - // to avoid degrading the certainty of the dict word that follows. - // With a space we don't multiply the certainty by dict_ratio, and we - // flag the space with NO_PERM to indicate that we should not use the - // predecessor nulls to generate the confidence for the space, as they - // have already been multiplied by dict_ratio, and we can't go back to - // insert more entries in any previous heaps. - if (unichar_id == UNICHAR_SPACE) - permuter = NO_PERM; - else - dawg_cert *= dict_ratio; - PushInitialDawgIfBetter(code, unichar_id, permuter, false, false, - dawg_cert, cont, prev, step); - } - } -} - -// Adds a RecodeNode composed of the tuple (code, unichar_id, cert, prev, -// appropriate-dawg-args, cert) to the given heap (dawg_beam_) if unichar_id -// is a valid continuation of whatever is in prev. -void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, - NodeContinuation cont, - const RecodeNode* prev, RecodeBeam* step) { - RecodeHeap* dawg_heap = &step->beams_[BeamIndex(true, cont, 0)]; - RecodeHeap* nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)]; - if (unichar_id == INVALID_UNICHAR_ID) { - PushHeapIfBetter(kBeamWidths[0], code, unichar_id, NO_PERM, false, false, - false, false, cert, prev, nullptr, dawg_heap); - return; - } - // Avoid dictionary probe if score a total loss. - float score = cert; - if (prev != nullptr) score += prev->score; - if (dawg_heap->size() >= kBeamWidths[0] && - score <= dawg_heap->PeekTop().data.score && - nodawg_heap->size() >= kBeamWidths[0] && - score <= nodawg_heap->PeekTop().data.score) { - return; - } - const RecodeNode* uni_prev = prev; - // Prev may be a partial code, null_char, or duplicate, so scan back to the - // last valid unichar_id. - while (uni_prev != nullptr && - (uni_prev->unichar_id == INVALID_UNICHAR_ID || uni_prev->duplicate)) - uni_prev = uni_prev->prev; - if (unichar_id == UNICHAR_SPACE) { - if (uni_prev != nullptr && uni_prev->end_of_word) { - // Space is good. Push initial state, to the dawg beam and a regular - // space to the top choice beam. - PushInitialDawgIfBetter(code, unichar_id, uni_prev->permuter, false, - false, cert, cont, prev, step); - PushHeapIfBetter(kBeamWidths[0], code, unichar_id, uni_prev->permuter, - false, false, false, false, cert, prev, nullptr, - nodawg_heap); - } - return; - } else if (uni_prev != nullptr && uni_prev->start_of_dawg && - uni_prev->unichar_id != UNICHAR_SPACE && - dict_->getUnicharset().IsSpaceDelimited(uni_prev->unichar_id) && - dict_->getUnicharset().IsSpaceDelimited(unichar_id)) { - return; // Can't break words between space delimited chars. - } - DawgPositionVector initial_dawgs; - DawgPositionVector* updated_dawgs = new DawgPositionVector; - DawgArgs dawg_args(&initial_dawgs, updated_dawgs, NO_PERM); - bool word_start = false; - if (uni_prev == nullptr) { - // Starting from beginning of line. - dict_->default_dawgs(&initial_dawgs, false); - word_start = true; - } else if (uni_prev->dawgs != nullptr) { - // Continuing a previous dict word. - dawg_args.active_dawgs = uni_prev->dawgs; - word_start = uni_prev->start_of_dawg; - } else { - return; // Can't continue if not a dict word. - } - PermuterType permuter = static_cast( - dict_->def_letter_is_okay(&dawg_args, - dict_->getUnicharset(), unichar_id, false)); - if (permuter != NO_PERM) { - PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false, - word_start, dawg_args.valid_end, false, cert, prev, - dawg_args.updated_dawgs, dawg_heap); - if (dawg_args.valid_end && !space_delimited_) { - // We can start another word right away, so push initial state as well, - // to the dawg beam, and the regular character to the top choice beam, - // since non-dict words can start here too. - PushInitialDawgIfBetter(code, unichar_id, permuter, word_start, true, - cert, cont, prev, step); - PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false, - word_start, true, false, cert, prev, nullptr, nodawg_heap); - } - } else { - delete updated_dawgs; - } -} - -// Adds a RecodeNode composed of the tuple (code, unichar_id, -// initial-dawg-state, prev, cert) to the given heap if/ there is room or if -// better than the current worst element if already full. -void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id, - PermuterType permuter, - bool start, bool end, float cert, - NodeContinuation cont, - const RecodeNode* prev, - RecodeBeam* step) { - RecodeNode* best_initial_dawg = &step->best_initial_dawgs_[cont]; - float score = cert; - if (prev != nullptr) score += prev->score; - if (best_initial_dawg->code < 0 || score > best_initial_dawg->score) { - DawgPositionVector* initial_dawgs = new DawgPositionVector; - dict_->default_dawgs(initial_dawgs, false); - RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert, - score, prev, initial_dawgs, - ComputeCodeHash(code, false, prev)); - *best_initial_dawg = node; - } -} - -// Adds a RecodeNode composed of the tuple (code, unichar_id, permuter, -// false, false, false, false, cert, prev, nullptr) to heap if there is room -// or if better than the current worst element if already full. -/* static */ -void RecodeBeamSearch::PushDupOrNoDawgIfBetter( - int length, bool dup, int code, int unichar_id, float cert, - float worst_dict_cert, float dict_ratio, bool use_dawgs, - NodeContinuation cont, const RecodeNode* prev, RecodeBeam* step) { - int index = BeamIndex(use_dawgs, cont, length); - if (use_dawgs) { - if (cert > worst_dict_cert) { - PushHeapIfBetter(kBeamWidths[length], code, unichar_id, - prev ? prev->permuter : NO_PERM, false, false, false, - dup, cert, prev, nullptr, &step->beams_[index]); - } - } else { - cert *= dict_ratio; - if (cert >= kMinCertainty || code == null_char_) { - PushHeapIfBetter(kBeamWidths[length], code, unichar_id, - prev ? prev->permuter : TOP_CHOICE_PERM, false, false, - false, dup, cert, prev, nullptr, &step->beams_[index]); - } - } -} - -// Adds a RecodeNode composed of the tuple (code, unichar_id, permuter, -// dawg_start, word_start, end, dup, cert, prev, d) to heap if there is room -// or if better than the current worst element if already full. -void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id, - PermuterType permuter, bool dawg_start, - bool word_start, bool end, bool dup, - float cert, const RecodeNode* prev, - DawgPositionVector* d, - RecodeHeap* heap) { - float score = cert; - if (prev != nullptr) score += prev->score; - if (heap->size() < max_size || score > heap->PeekTop().data.score) { - uint64_t hash = ComputeCodeHash(code, dup, prev); - RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end, - dup, cert, score, prev, d, hash); - if (UpdateHeapIfMatched(&node, heap)) return; - RecodePair entry(score, node); - heap->Push(&entry); - ASSERT_HOST(entry.data.dawgs == nullptr); - if (heap->size() > max_size) heap->Pop(&entry); - } else { - delete d; - } -} - -// Adds a RecodeNode to heap if there is room -// or if better than the current worst element if already full. -void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode* node, - RecodeHeap* heap) { - if (heap->size() < max_size || node->score > heap->PeekTop().data.score) { - if (UpdateHeapIfMatched(node, heap)) { - return; - } - RecodePair entry(node->score, *node); - heap->Push(&entry); - ASSERT_HOST(entry.data.dawgs == nullptr); - if (heap->size() > max_size) heap->Pop(&entry); - } -} - -// Searches the heap for a matching entry, and updates the score with -// reshuffle if needed. Returns true if there was a match. -bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode* new_node, - RecodeHeap* heap) { - // TODO(rays) consider hash map instead of linear search. - // It might not be faster because the hash map would have to be updated - // every time a heap reshuffle happens, and that would be a lot of overhead. - GenericVector* nodes = heap->heap(); - for (int i = 0; i < nodes->size(); ++i) { - RecodeNode& node = (*nodes)[i].data; - if (node.code == new_node->code && node.code_hash == new_node->code_hash && - node.permuter == new_node->permuter && - node.start_of_dawg == new_node->start_of_dawg) { - if (new_node->score > node.score) { - // The new one is better. Update the entire node in the heap and - // reshuffle. - node = *new_node; - (*nodes)[i].key = node.score; - heap->Reshuffle(&(*nodes)[i]); - } - return true; - } - } - return false; -} - -// Computes and returns the code-hash for the given code and prev. -uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup, - const RecodeNode* prev) const { - uint64_t hash = prev == nullptr ? 0 : prev->code_hash; - if (!dup && code != null_char_) { - int num_classes = recoder_.code_range(); - uint64_t carry = (((hash >> 32) * num_classes) >> 32); - hash *= num_classes; - hash += carry; - hash += code; - } - return hash; -} - -// Backtracks to extract the best path through the lattice that was built -// during Decode. On return the best_nodes vector essentially contains the set -// of code, score pairs that make the optimal path with the constraint that -// the recoder can decode the code sequence back to a sequence of unichar-ids. -void RecodeBeamSearch::ExtractBestPaths( - GenericVector* best_nodes, - GenericVector* second_nodes) const { - // Scan both beams to extract the best and second best paths. - const RecodeNode* best_node = nullptr; - const RecodeNode* second_best_node = nullptr; - const RecodeBeam* last_beam = beam_[beam_size_ - 1]; - for (int c = 0; c < NC_COUNT; ++c) { - if (c == NC_ONLY_DUP) continue; - NodeContinuation cont = static_cast(c); - for (int is_dawg = 0; is_dawg < 2; ++is_dawg) { - int beam_index = BeamIndex(is_dawg, cont, 0); - int heap_size = last_beam->beams_[beam_index].size(); - for (int h = 0; h < heap_size; ++h) { - const RecodeNode* node = &last_beam->beams_[beam_index].get(h).data; - if (is_dawg) { - // dawg_node may be a null_char, or duplicate, so scan back to the - // last valid unichar_id. - const RecodeNode* dawg_node = node; - while (dawg_node != nullptr && - (dawg_node->unichar_id == INVALID_UNICHAR_ID || - dawg_node->duplicate)) - dawg_node = dawg_node->prev; - if (dawg_node == nullptr || (!dawg_node->end_of_word && - dawg_node->unichar_id != UNICHAR_SPACE)) { - // Dawg node is not valid. - continue; - } - } - if (best_node == nullptr || node->score > best_node->score) { - second_best_node = best_node; - best_node = node; - } else if (second_best_node == nullptr || - node->score > second_best_node->score) { - second_best_node = node; - } - } - } - } - if (second_nodes != nullptr) ExtractPath(second_best_node, second_nodes); - ExtractPath(best_node, best_nodes); -} - -// Helper backtracks through the lattice from the given node, storing the -// path and reversing it. -void RecodeBeamSearch::ExtractPath( - const RecodeNode* node, GenericVector* path) const { - path->truncate(0); - while (node != nullptr) { - path->push_back(node); - node = node->prev; - } - path->reverse(); -} - -// Helper prints debug information on the given lattice path. -void RecodeBeamSearch::DebugPath( - const UNICHARSET* unicharset, - const GenericVector& path) const { - for (int c = 0; c < path.size(); ++c) { - const RecodeNode& node = *path[c]; - tprintf("%d ", c); - node.Print(null_char_, *unicharset, 1); - } -} - -// Helper prints debug information on the given unichar path. -void RecodeBeamSearch::DebugUnicharPath( - const UNICHARSET* unicharset, const GenericVector& path, - const GenericVector& unichar_ids, const GenericVector& certs, - const GenericVector& ratings, - const GenericVector& xcoords) const { - int num_ids = unichar_ids.size(); - double total_rating = 0.0; - for (int c = 0; c < num_ids; ++c) { - int coord = xcoords[c]; - tprintf("%d %d=%s r=%g, c=%g, s=%d, e=%d, perm=%d\n", coord, unichar_ids[c], - unicharset->debug_str(unichar_ids[c]).string(), ratings[c], - certs[c], path[coord]->start_of_word, path[coord]->end_of_word, - path[coord]->permuter); - total_rating += ratings[c]; - } - tprintf("Path total rating = %g\n", total_rating); -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/recodebeam.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/recodebeam.h deleted file mode 100644 index ef462102..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/recodebeam.h +++ /dev/null @@ -1,405 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: recodebeam.h -// Description: Beam search to decode from the re-encoded CJK as a sequence of -// smaller numbers in place of a single large code. -// Author: Ray Smith -// Created: Fri Mar 13 09:12:01 PDT 2015 -// -// (C) Copyright 2015, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef THIRD_PARTY_TESSERACT_LSTM_RECODEBEAM_H_ -#define THIRD_PARTY_TESSERACT_LSTM_RECODEBEAM_H_ - -#include "dawg.h" -#include "dict.h" -#include "genericheap.h" -#include "kdpair.h" -#include "networkio.h" -#include "ratngs.h" -#include "unicharcompress.h" -#include -#include -#include - -namespace tesseract { - -// Enum describing what can follow the current node. -// Consider the following softmax outputs: -// Timestep 0 1 2 3 4 5 6 7 8 -// X-score 0.01 0.55 0.98 0.42 0.01 0.01 0.40 0.95 0.01 -// Y-score 0.00 0.01 0.01 0.01 0.01 0.97 0.59 0.04 0.01 -// Null-score 0.99 0.44 0.01 0.57 0.98 0.02 0.01 0.01 0.98 -// Then the correct CTC decoding (in which adjacent equal classes are folded, -// and then all nulls are dropped) is clearly XYX, but simple decoding (taking -// the max at each timestep) leads to: -// Null@0.99 X@0.55 X@0.98 Null@0.57 Null@0.98 Y@0.97 Y@0.59 X@0.95 Null@0.98, -// which folds to the correct XYX. The conversion to Tesseract rating and -// certainty uses the sum of the log probs (log of the product of probabilities) -// for the Rating and the minimum log prob for the certainty, but that yields a -// minimum certainty of log(0.55), which is poor for such an obvious case. -// CTC says that the probability of the result is the SUM of the products of the -// probabilities over ALL PATHS that decode to the same result, which includes: -// NXXNNYYXN, NNXNNYYN, NXXXNYYXN, NNXXNYXXN, and others including XXXXXYYXX. -// That is intractable, so some compromise between simple and ideal is needed. -// Observing that evenly split timesteps rarely happen next to each other, we -// allow scores at a transition between classes to be added for decoding thus: -// N@0.99 (N+X)@0.99 X@0.98 (N+X)@0.99 N@0.98 Y@0.97 (X+Y+N)@1.00 X@0.95 N@0.98. -// This works because NNX and NXX both decode to X, so in the middle we can use -// N+X. Note that the classes either side of a sum must stand alone, i.e. use a -// single score, to force all paths to pass through them and decode to the same -// result. Also in the special case of a transition from X to Y, with only one -// timestep between, it is possible to add X+Y+N, since XXY, XYY, and XNY all -// decode to XY. -// An important condition is that we cannot combine X and Null between two -// stand-alone Xs, since that can decode as XNX->XX or XXX->X, so the scores for -// X and Null have to go in separate paths. Combining scores in this way -// provides a much better minimum certainty of log(0.95). -// In the implementation of the beam search, we have to place the possibilities -// X, X+N and X+Y+N in the beam under appropriate conditions of the previous -// node, and constrain what can follow, to enforce the rules explained above. -// We therefore have 3 different types of node determined by what can follow: -enum NodeContinuation { - NC_ANYTHING, // This node used just its own score, so anything can follow. - NC_ONLY_DUP, // The current node combined another score with the score for - // itself, without a stand-alone duplicate before, so must be - // followed by a stand-alone duplicate. - NC_NO_DUP, // The current node combined another score with the score for - // itself, after a stand-alone, so can only be followed by - // something other than a duplicate of the current node. - NC_COUNT -}; - -// Enum describing the top-n status of a code. -enum TopNState { - TN_TOP2, // Winner or 2nd. - TN_TOPN, // Runner up in top-n, but not 1st or 2nd. - TN_ALSO_RAN, // Not in the top-n. - TN_COUNT -}; - -// Lattice element for Re-encode beam search. -struct RecodeNode { - RecodeNode() - : code(-1), - unichar_id(INVALID_UNICHAR_ID), - permuter(TOP_CHOICE_PERM), - start_of_dawg(false), - start_of_word(false), - end_of_word(false), - duplicate(false), - certainty(0.0f), - score(0.0f), - prev(nullptr), - dawgs(nullptr), - code_hash(0) {} - RecodeNode(int c, int uni_id, PermuterType perm, bool dawg_start, - bool word_start, bool end, bool dup, float cert, float s, - const RecodeNode* p, DawgPositionVector* d, uint64_t hash) - : code(c), - unichar_id(uni_id), - permuter(perm), - start_of_dawg(dawg_start), - start_of_word(word_start), - end_of_word(end), - duplicate(dup), - certainty(cert), - score(s), - prev(p), - dawgs(d), - code_hash(hash) {} - // NOTE: If we could use C++11, then this would be a move constructor. - // Instead we have copy constructor that does a move!! This is because we - // don't want to copy the whole DawgPositionVector each time, and true - // copying isn't necessary for this struct. It does get moved around a lot - // though inside the heap and during heap push, hence the move semantics. - RecodeNode(RecodeNode& src) : dawgs(nullptr) { - *this = src; - ASSERT_HOST(src.dawgs == nullptr); - } - RecodeNode& operator=(RecodeNode& src) { - delete dawgs; - memcpy(this, &src, sizeof(src)); - src.dawgs = nullptr; - return *this; - } - ~RecodeNode() { delete dawgs; } - // Prints details of the node. - void Print(int null_char, const UNICHARSET& unicharset, int depth) const; - - // The re-encoded code here = index to network output. - int code; - // The decoded unichar_id is only valid for the final code of a sequence. - int unichar_id; - // The type of permuter active at this point. Intervals between start_of_word - // and end_of_word make valid words of type given by permuter where - // end_of_word is true. These aren't necessarily delimited by spaces. - PermuterType permuter; - // True if this is the initial dawg state. May be attached to a space or, - // in a non-space-delimited lang, the end of the previous word. - bool start_of_dawg; - // True if this is the first node in a dictionary word. - bool start_of_word; - // True if this represents a valid candidate end of word position. Does not - // necessarily mark the end of a word, since a word can be extended beyond a - // candidate end by a continuation, eg 'the' continues to 'these'. - bool end_of_word; - // True if this->code is a duplicate of prev->code. Some training modes - // allow the network to output duplicate characters and crush them with CTC, - // but that would mess up the dictionary search, so we just smash them - // together on the fly using the duplicate flag. - bool duplicate; - // Certainty (log prob) of (just) this position. - float certainty; - // Total certainty of the path to this position. - float score; - // The previous node in this chain. Borrowed pointer. - const RecodeNode* prev; - // The currently active dawgs at this position. Owned pointer. - DawgPositionVector* dawgs; - // A hash of all codes in the prefix and this->code as well. Used for - // duplicate path removal. - uint64_t code_hash; -}; - -using RecodePair = KDPairInc; -using RecodeHeap = GenericHeap; - -// Class that holds the entire beam search for recognition of a text line. -class RecodeBeamSearch { - public: - // Borrows the pointer, which is expected to survive until *this is deleted. - RecodeBeamSearch(const UnicharCompress& recoder, int null_char, - bool simple_text, Dict* dict); - - // Decodes the set of network outputs, storing the lattice internally. - // If charset is not null, it enables detailed debugging of the beam search. - void Decode(const NetworkIO& output, double dict_ratio, double cert_offset, - double worst_dict_cert, const UNICHARSET* charset, - int lstm_choice_mode = 0); - void Decode(const GENERIC_2D_ARRAY& output, double dict_ratio, - double cert_offset, double worst_dict_cert, - const UNICHARSET* charset); - - // Returns the best path as labels/scores/xcoords similar to simple CTC. - void ExtractBestPathAsLabels(GenericVector* labels, - GenericVector* xcoords) const; - // Returns the best path as unichar-ids/certs/ratings/xcoords skipping - // duplicates, nulls and intermediate parts. - void ExtractBestPathAsUnicharIds(bool debug, const UNICHARSET* unicharset, - GenericVector* unichar_ids, - GenericVector* certs, - GenericVector* ratings, - GenericVector* xcoords) const; - - // Returns the best path as a set of WERD_RES. - void ExtractBestPathAsWords(const TBOX& line_box, float scale_factor, - bool debug, const UNICHARSET* unicharset, - PointerVector* words, - int lstm_choice_mode = 0); - - // Generates debug output of the content of the beams after a Decode. - void DebugBeams(const UNICHARSET& unicharset) const; - - // Stores the alternative characters of every timestep together with their - // probability. - std::vector< std::vector>> timesteps; - - // Clipping value for certainty inside Tesseract. Reflects the minimum value - // of certainty that will be returned by ExtractBestPathAsUnicharIds. - // Supposedly on a uniform scale that can be compared across languages and - // engines. - static const float kMinCertainty; - // Number of different code lengths for which we have a separate beam. - static const int kNumLengths = RecodedCharID::kMaxCodeLen + 1; - // Total number of beams: dawg/nodawg * number of NodeContinuation * number - // of different lengths. - static const int kNumBeams = 2 * NC_COUNT * kNumLengths; - // Returns the relevant factor in the beams_ index. - static int LengthFromBeamsIndex(int index) { return index % kNumLengths; } - static NodeContinuation ContinuationFromBeamsIndex(int index) { - return static_cast((index / kNumLengths) % NC_COUNT); - } - static bool IsDawgFromBeamsIndex(int index) { - return index / (kNumLengths * NC_COUNT) > 0; - } - // Computes a beams_ index from the given factors. - static int BeamIndex(bool is_dawg, NodeContinuation cont, int length) { - return (is_dawg * NC_COUNT + cont) * kNumLengths + length; - } - - private: - // Struct for the Re-encode beam search. This struct holds the data for - // a single time-step position of the output. Use a PointerVector - // to hold all the timesteps and prevent reallocation of the individual heaps. - struct RecodeBeam { - // Resets to the initial state without deleting all the memory. - void Clear() { - for (int i = 0; i < kNumBeams; ++i) { - beams_[i].clear(); - } - RecodeNode empty; - for (int i = 0; i < NC_COUNT; ++i) { - best_initial_dawgs_[i] = empty; - } - } - - // A separate beam for each combination of code length, - // NodeContinuation, and dictionary flag. Separating out all these types - // allows the beam to be quite narrow, and yet still have a low chance of - // losing the best path. - // We have to keep all these beams separate, since the highest scoring paths - // come from the paths that are most likely to dead-end at any time, like - // dawg paths, NC_ONLY_DUP etc. - // Each heap is stored with the WORST result at the top, so we can quickly - // get the top-n values. - RecodeHeap beams_[kNumBeams]; - // While the language model is only a single word dictionary, we can use - // word starts as a choke point in the beam, and keep only a single dict - // start node at each step (for each NodeContinuation type), so we find the - // best one here and push it on the heap, if it qualifies, after processing - // all of the step. - RecodeNode best_initial_dawgs_[NC_COUNT]; - }; - using TopPair = KDPairInc; - - // Generates debug output of the content of a single beam position. - void DebugBeamPos(const UNICHARSET& unicharset, const RecodeHeap& heap) const; - - // Returns the given best_nodes as unichar-ids/certs/ratings/xcoords skipping - // duplicates, nulls and intermediate parts. - static void ExtractPathAsUnicharIds( - const GenericVector& best_nodes, - GenericVector* unichar_ids, GenericVector* certs, - GenericVector* ratings, GenericVector* xcoords, - std::deque>* best_choices = nullptr); - - // Sets up a word with the ratings matrix and fake blobs with boxes in the - // right places. - WERD_RES* InitializeWord(bool leading_space, const TBOX& line_box, - int word_start, int word_end, float space_certainty, - const UNICHARSET* unicharset, - const GenericVector& xcoords, - float scale_factor); - - // Fills top_n_flags_ with bools that are true iff the corresponding output - // is one of the top_n. - void ComputeTopN(const float* outputs, int num_outputs, int top_n); - - // Adds the computation for the current time-step to the beam. Call at each - // time-step in sequence from left to right. outputs is the activation vector - // for the current timestep. - void DecodeStep(const float* outputs, int t, double dict_ratio, - double cert_offset, double worst_dict_cert, - const UNICHARSET* charset, bool debug = false); - - //Saves the most certain choices for the current time-step - void SaveMostCertainChoices(const float* outputs, int num_outputs, const UNICHARSET* charset, int xCoord); - - // Adds to the appropriate beams the legal (according to recoder) - // continuations of context prev, which is from the given index to beams_, - // using the given network outputs to provide scores to the choices. Uses only - // those choices for which top_n_flags[code] == top_n_flag. - void ContinueContext(const RecodeNode* prev, int index, const float* outputs, - TopNState top_n_flag, double dict_ratio, - double cert_offset, double worst_dict_cert, - RecodeBeam* step); - // Continues for a new unichar, using dawg or non-dawg as per flag. - void ContinueUnichar(int code, int unichar_id, float cert, - float worst_dict_cert, float dict_ratio, bool use_dawgs, - NodeContinuation cont, const RecodeNode* prev, - RecodeBeam* step); - // Adds a RecodeNode composed of the args to the correct heap in step if - // unichar_id is a valid dictionary continuation of whatever is in prev. - void ContinueDawg(int code, int unichar_id, float cert, NodeContinuation cont, - const RecodeNode* prev, RecodeBeam* step); - // Sets the correct best_initial_dawgs_ with a RecodeNode composed of the args - // if better than what is already there. - void PushInitialDawgIfBetter(int code, int unichar_id, PermuterType permuter, - bool start, bool end, float cert, - NodeContinuation cont, const RecodeNode* prev, - RecodeBeam* step); - // Adds a RecodeNode composed of the args to the correct heap in step for - // partial unichar or duplicate if there is room or if better than the - // current worst element if already full. - void PushDupOrNoDawgIfBetter(int length, bool dup, int code, int unichar_id, - float cert, float worst_dict_cert, - float dict_ratio, bool use_dawgs, - NodeContinuation cont, const RecodeNode* prev, - RecodeBeam* step); - // Adds a RecodeNode composed of the args to the correct heap in step if there - // is room or if better than the current worst element if already full. - void PushHeapIfBetter(int max_size, int code, int unichar_id, - PermuterType permuter, bool dawg_start, bool word_start, - bool end, bool dup, float cert, const RecodeNode* prev, - DawgPositionVector* d, RecodeHeap* heap); - // Adds a RecodeNode to heap if there is room - // or if better than the current worst element if already full. - void PushHeapIfBetter(int max_size, RecodeNode* node, RecodeHeap* heap); - // Searches the heap for an entry matching new_node, and updates the entry - // with reshuffle if needed. Returns true if there was a match. - bool UpdateHeapIfMatched(RecodeNode* new_node, RecodeHeap* heap); - // Computes and returns the code-hash for the given code and prev. - uint64_t ComputeCodeHash(int code, bool dup, const RecodeNode* prev) const; - // Backtracks to extract the best path through the lattice that was built - // during Decode. On return the best_nodes vector essentially contains the set - // of code, score pairs that make the optimal path with the constraint that - // the recoder can decode the code sequence back to a sequence of unichar-ids. - void ExtractBestPaths(GenericVector* best_nodes, - GenericVector* second_nodes) const; - // Helper backtracks through the lattice from the given node, storing the - // path and reversing it. - void ExtractPath(const RecodeNode* node, - GenericVector* path) const; - // Helper prints debug information on the given lattice path. - void DebugPath(const UNICHARSET* unicharset, - const GenericVector& path) const; - // Helper prints debug information on the given unichar path. - void DebugUnicharPath(const UNICHARSET* unicharset, - const GenericVector& path, - const GenericVector& unichar_ids, - const GenericVector& certs, - const GenericVector& ratings, - const GenericVector& xcoords) const; - - static const int kBeamWidths[RecodedCharID::kMaxCodeLen + 1]; - - // The encoder/decoder that we will be using. - const UnicharCompress& recoder_; - // The beam for each timestep in the output. - PointerVector beam_; - // The number of timesteps valid in beam_; - int beam_size_; - // A flag to indicate which outputs are the top-n choices. Current timestep - // only. - GenericVector top_n_flags_; - // A record of the highest and second scoring codes. - int top_code_; - int second_code_; - // Heap used to compute the top_n_flags_. - GenericHeap top_heap_; - // Borrowed pointer to the dictionary to use in the search. - Dict* dict_; - // True if the language is space-delimited, which is true for most languages - // except chi*, jpn, tha. - bool space_delimited_; - // True if the input is simple text, ie adjacent equal chars are not to be - // eliminated. - bool is_simple_text_; - // The encoded (class label) of the null/reject character. - int null_char_; -}; - -} // namespace tesseract. - -#endif // THIRD_PARTY_TESSERACT_LSTM_RECODEBEAM_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/reconfig.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/reconfig.cpp deleted file mode 100644 index 2b36b6f8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/reconfig.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: reconfig.cpp -// Description: Network layer that reconfigures the scaling vs feature -// depth. -// Author: Ray Smith -// Created: Wed Feb 26 15:42:25 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// -#include "reconfig.h" -#include "tprintf.h" - -namespace tesseract { - -Reconfig::Reconfig(const STRING& name, int ni, int x_scale, int y_scale) - : Network(NT_RECONFIG, name, ni, ni * x_scale * y_scale), - x_scale_(x_scale), y_scale_(y_scale) { -} - -// Returns the shape output from the network given an input shape (which may -// be partially unknown ie zero). -StaticShape Reconfig::OutputShape(const StaticShape& input_shape) const { - StaticShape result = input_shape; - result.set_height(result.height() / y_scale_); - result.set_width(result.width() / x_scale_); - if (type_ != NT_MAXPOOL) - result.set_depth(result.depth() * y_scale_ * x_scale_); - return result; -} - -// Returns an integer reduction factor that the network applies to the -// time sequence. Assumes that any 2-d is already eliminated. Used for -// scaling bounding boxes of truth data. -// WARNING: if GlobalMinimax is used to vary the scale, this will return -// the last used scale factor. Call it before any forward, and it will return -// the minimum scale factor of the paths through the GlobalMinimax. -int Reconfig::XScaleFactor() const { - return x_scale_; -} - -// Writes to the given file. Returns false in case of error. -bool Reconfig::Serialize(TFile* fp) const { - return Network::Serialize(fp) && - fp->Serialize(&x_scale_) && - fp->Serialize(&y_scale_); -} - -// Reads from the given file. Returns false in case of error. -bool Reconfig::DeSerialize(TFile* fp) { - if (!fp->DeSerialize(&x_scale_)) return false; - if (!fp->DeSerialize(&y_scale_)) return false; - no_ = ni_ * x_scale_ * y_scale_; - return true; -} - -// Runs forward propagation of activations on the input line. -// See NetworkCpp for a detailed discussion of the arguments. -void Reconfig::Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) { - output->ResizeScaled(input, x_scale_, y_scale_, no_); - back_map_ = input.stride_map(); - StrideMap::Index dest_index(output->stride_map()); - do { - int out_t = dest_index.t(); - StrideMap::Index src_index(input.stride_map(), dest_index.index(FD_BATCH), - dest_index.index(FD_HEIGHT) * y_scale_, - dest_index.index(FD_WIDTH) * x_scale_); - // Stack x_scale_ groups of y_scale_ inputs together. - for (int x = 0; x < x_scale_; ++x) { - for (int y = 0; y < y_scale_; ++y) { - StrideMap::Index src_xy(src_index); - if (src_xy.AddOffset(x, FD_WIDTH) && src_xy.AddOffset(y, FD_HEIGHT)) { - output->CopyTimeStepGeneral(out_t, (x * y_scale_ + y) * ni_, ni_, - input, src_xy.t(), 0); - } - } - } - } while (dest_index.Increment()); -} - -// Runs backward propagation of errors on the deltas line. -// See NetworkCpp for a detailed discussion of the arguments. -bool Reconfig::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { - back_deltas->ResizeToMap(fwd_deltas.int_mode(), back_map_, ni_); - StrideMap::Index src_index(fwd_deltas.stride_map()); - do { - int in_t = src_index.t(); - StrideMap::Index dest_index(back_deltas->stride_map(), - src_index.index(FD_BATCH), - src_index.index(FD_HEIGHT) * y_scale_, - src_index.index(FD_WIDTH) * x_scale_); - // Unstack x_scale_ groups of y_scale_ inputs that are together. - for (int x = 0; x < x_scale_; ++x) { - for (int y = 0; y < y_scale_; ++y) { - StrideMap::Index dest_xy(dest_index); - if (dest_xy.AddOffset(x, FD_WIDTH) && dest_xy.AddOffset(y, FD_HEIGHT)) { - back_deltas->CopyTimeStepGeneral(dest_xy.t(), 0, ni_, fwd_deltas, - in_t, (x * y_scale_ + y) * ni_); - } - } - } - } while (src_index.Increment()); - return needs_to_backprop_; -} - - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/reconfig.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/reconfig.h deleted file mode 100644 index 6e26399d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/reconfig.h +++ /dev/null @@ -1,85 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: reconfig.h -// Description: Network layer that reconfigures the scaling vs feature -// depth. -// Author: Ray Smith -// Created: Wed Feb 26 15:37:42 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// -#ifndef TESSERACT_LSTM_RECONFIG_H_ -#define TESSERACT_LSTM_RECONFIG_H_ - - -#include "genericvector.h" -#include "matrix.h" -#include "network.h" - -namespace tesseract { - -// Reconfigures (Shrinks) the inputs by concatenating an x_scale by y_scale tile -// of inputs together, producing a single, deeper output per tile. -// Note that fractional parts are truncated for efficiency, so make sure the -// input stride is a multiple of the y_scale factor! -class Reconfig : public Network { - public: - Reconfig(const STRING& name, int ni, int x_scale, int y_scale); - virtual ~Reconfig() = default; - - // Returns the shape output from the network given an input shape (which may - // be partially unknown ie zero). - StaticShape OutputShape(const StaticShape& input_shape) const override; - - STRING spec() const override { - STRING spec; - spec.add_str_int("S", y_scale_); - spec.add_str_int(",", x_scale_); - return spec; - } - - // Returns an integer reduction factor that the network applies to the - // time sequence. Assumes that any 2-d is already eliminated. Used for - // scaling bounding boxes of truth data. - // WARNING: if GlobalMinimax is used to vary the scale, this will return - // the last used scale factor. Call it before any forward, and it will return - // the minimum scale factor of the paths through the GlobalMinimax. - int XScaleFactor() const override; - - // Writes to the given file. Returns false in case of error. - bool Serialize(TFile* fp) const override; - // Reads from the given file. Returns false in case of error. - bool DeSerialize(TFile* fp) override; - - // Runs forward propagation of activations on the input line. - // See Network for a detailed discussion of the arguments. - void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) override; - - // Runs backward propagation of errors on the deltas line. - // See Network for a detailed discussion of the arguments. - bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) override; - - protected: - // Non-serialized data used to store parameters between forward and back. - StrideMap back_map_; - // Serialized data. - int32_t x_scale_; - int32_t y_scale_; -}; - -} // namespace tesseract. - - -#endif // TESSERACT_LSTM_SUBSAMPLE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/reversed.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/reversed.cpp deleted file mode 100644 index 7ef7006d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/reversed.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: reversed.cpp -// Description: Runs a single network on time-reversed input, reversing output. -// Author: Ray Smith -// Created: Thu May 02 08:42:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "reversed.h" - -#include - -#include "networkscratch.h" - -namespace tesseract { - -Reversed::Reversed(const STRING& name, NetworkType type) : Plumbing(name) { - type_ = type; -} - -// Returns the shape output from the network given an input shape (which may -// be partially unknown ie zero). -StaticShape Reversed::OutputShape(const StaticShape& input_shape) const { - if (type_ == NT_XYTRANSPOSE) { - StaticShape x_shape(input_shape); - x_shape.set_width(input_shape.height()); - x_shape.set_height(input_shape.width()); - x_shape = stack_[0]->OutputShape(x_shape); - x_shape.SetShape(x_shape.batch(), x_shape.width(), x_shape.height(), - x_shape.depth()); - return x_shape; - } - return stack_[0]->OutputShape(input_shape); -} - -// Takes ownership of the given network to make it the reversed one. -void Reversed::SetNetwork(Network* network) { - stack_.clear(); - AddToStack(network); -} - -// Runs forward propagation of activations on the input line. -// See NetworkCpp for a detailed discussion of the arguments. -void Reversed::Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) { - NetworkScratch::IO rev_input(input, scratch); - ReverseData(input, rev_input); - NetworkScratch::IO rev_output(input, scratch); - stack_[0]->Forward(debug, *rev_input, nullptr, scratch, rev_output); - ReverseData(*rev_output, output); -} - -// Runs backward propagation of errors on the deltas line. -// See NetworkCpp for a detailed discussion of the arguments. -bool Reversed::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { - NetworkScratch::IO rev_input(fwd_deltas, scratch); - ReverseData(fwd_deltas, rev_input); - NetworkScratch::IO rev_output(fwd_deltas, scratch); - if (stack_[0]->Backward(debug, *rev_input, scratch, rev_output)) { - ReverseData(*rev_output, back_deltas); - return true; - } - return false; -} - -// Copies src to *dest with the reversal according to type_. -void Reversed::ReverseData(const NetworkIO& src, NetworkIO* dest) const { - if (type_ == NT_XREVERSED) - dest->CopyWithXReversal(src); - else if (type_ == NT_YREVERSED) - dest->CopyWithYReversal(src); - else - dest->CopyWithXYTranspose(src); -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/reversed.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/reversed.h deleted file mode 100644 index ec91e7e7..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/reversed.h +++ /dev/null @@ -1,89 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: reversed.h -// Description: Runs a single network on time-reversed input, reversing output. -// Author: Ray Smith -// Created: Thu May 02 08:38:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_REVERSED_H_ -#define TESSERACT_LSTM_REVERSED_H_ - -#include "matrix.h" -#include "plumbing.h" - -namespace tesseract { - -// C++ Implementation of the Reversed class from lstm.py. -class Reversed : public Plumbing { - public: - explicit Reversed(const STRING& name, NetworkType type); - virtual ~Reversed() = default; - - // Returns the shape output from the network given an input shape (which may - // be partially unknown ie zero). - StaticShape OutputShape(const StaticShape& input_shape) const override; - - STRING spec() const override { - STRING spec(type_ == NT_XREVERSED ? "Rx" - : (type_ == NT_YREVERSED ? "Ry" : "Txy")); - // For most simple cases, we will output Rx or Ry where is - // the network in stack_[0], but in the special case that is an - // LSTM, we will just output the LSTM's spec modified to take the reversal - // into account. This is because when the user specified Lfy64, we actually - // generated TxyLfx64, and if the user specified Lrx64 we actually - // generated RxLfx64, and we want to display what the user asked for. - STRING net_spec = stack_[0]->spec(); - if (net_spec[0] == 'L') { - // Setup a from and to character according to the type of the reversal - // such that the LSTM spec gets modified to the spec that the user - // asked for - char from = 'f'; - char to = 'r'; - if (type_ == NT_XYTRANSPOSE) { - from = 'x'; - to = 'y'; - } - // Change the from char to the to char. - for (int i = 0; i < net_spec.length(); ++i) { - if (net_spec[i] == from) net_spec[i] = to; - } - return net_spec; - } - spec += net_spec; - return spec; - } - - // Takes ownership of the given network to make it the reversed one. - void SetNetwork(Network* network); - - // Runs forward propagation of activations on the input line. - // See Network for a detailed discussion of the arguments. - void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) override; - - // Runs backward propagation of errors on the deltas line. - // See Network for a detailed discussion of the arguments. - bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) override; - - private: - // Copies src to *dest with the reversal according to type_. - void ReverseData(const NetworkIO& src, NetworkIO* dest) const; -}; - -} // namespace tesseract. - -#endif // TESSERACT_LSTM_REVERSED_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/series.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/series.cpp deleted file mode 100644 index 0c1599fb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/series.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: series.cpp -// Description: Runs networks in series on the same input. -// Author: Ray Smith -// Created: Thu May 02 08:26:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "series.h" - -#include "fullyconnected.h" -#include "networkscratch.h" -#include "scrollview.h" -#include "tprintf.h" - -namespace tesseract { - -// ni_ and no_ will be set by AddToStack. -Series::Series(const STRING& name) : Plumbing(name) { - type_ = NT_SERIES; -} - -// Returns the shape output from the network given an input shape (which may -// be partially unknown ie zero). -StaticShape Series::OutputShape(const StaticShape& input_shape) const { - StaticShape result(input_shape); - int stack_size = stack_.size(); - for (int i = 0; i < stack_size; ++i) { - result = stack_[i]->OutputShape(result); - } - return result; -} - -// Sets up the network for training. Initializes weights using weights of -// scale `range` picked according to the random number generator `randomizer`. -// Note that series has its own implementation just for debug purposes. -int Series::InitWeights(float range, TRand* randomizer) { - num_weights_ = 0; - tprintf("Num outputs,weights in Series:\n"); - for (int i = 0; i < stack_.size(); ++i) { - int weights = stack_[i]->InitWeights(range, randomizer); - tprintf(" %s:%d, %d\n", - stack_[i]->spec().string(), stack_[i]->NumOutputs(), weights); - num_weights_ += weights; - } - tprintf("Total weights = %d\n", num_weights_); - return num_weights_; -} - -// Recursively searches the network for softmaxes with old_no outputs, -// and remaps their outputs according to code_map. See network.h for details. -int Series::RemapOutputs(int old_no, const std::vector& code_map) { - num_weights_ = 0; - tprintf("Num (Extended) outputs,weights in Series:\n"); - for (int i = 0; i < stack_.size(); ++i) { - int weights = stack_[i]->RemapOutputs(old_no, code_map); - tprintf(" %s:%d, %d\n", stack_[i]->spec().string(), - stack_[i]->NumOutputs(), weights); - num_weights_ += weights; - } - tprintf("Total weights = %d\n", num_weights_); - no_ = stack_.back()->NumOutputs(); - return num_weights_; -} - -// Sets needs_to_backprop_ to needs_backprop and returns true if -// needs_backprop || any weights in this network so the next layer forward -// can be told to produce backprop for this layer if needed. -bool Series::SetupNeedsBackprop(bool needs_backprop) { - needs_to_backprop_ = needs_backprop; - for (int i = 0; i < stack_.size(); ++i) - needs_backprop = stack_[i]->SetupNeedsBackprop(needs_backprop); - return needs_backprop; -} - -// Returns an integer reduction factor that the network applies to the -// time sequence. Assumes that any 2-d is already eliminated. Used for -// scaling bounding boxes of truth data. -// WARNING: if GlobalMinimax is used to vary the scale, this will return -// the last used scale factor. Call it before any forward, and it will return -// the minimum scale factor of the paths through the GlobalMinimax. -int Series::XScaleFactor() const { - int factor = 1; - for (int i = 0; i < stack_.size(); ++i) - factor *= stack_[i]->XScaleFactor(); - return factor; -} - -// Provides the (minimum) x scale factor to the network (of interest only to -// input units) so they can determine how to scale bounding boxes. -void Series::CacheXScaleFactor(int factor) { - stack_[0]->CacheXScaleFactor(factor); -} - -// Runs forward propagation of activations on the input line. -// See NetworkCpp for a detailed discussion of the arguments. -void Series::Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) { - int stack_size = stack_.size(); - ASSERT_HOST(stack_size > 1); - // Revolving intermediate buffers. - NetworkScratch::IO buffer1(input, scratch); - NetworkScratch::IO buffer2(input, scratch); - // Run each network in turn, giving the output of n as the input to n + 1, - // with the final network providing the real output. - stack_[0]->Forward(debug, input, input_transpose, scratch, buffer1); - for (int i = 1; i < stack_size; i += 2) { - stack_[i]->Forward(debug, *buffer1, nullptr, scratch, - i + 1 < stack_size ? buffer2 : output); - if (i + 1 == stack_size) return; - stack_[i + 1]->Forward(debug, *buffer2, nullptr, scratch, - i + 2 < stack_size ? buffer1 : output); - } -} - -// Runs backward propagation of errors on the deltas line. -// See NetworkCpp for a detailed discussion of the arguments. -bool Series::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { - if (!IsTraining()) return false; - int stack_size = stack_.size(); - ASSERT_HOST(stack_size > 1); - // Revolving intermediate buffers. - NetworkScratch::IO buffer1(fwd_deltas, scratch); - NetworkScratch::IO buffer2(fwd_deltas, scratch); - // Run each network in reverse order, giving the back_deltas output of n as - // the fwd_deltas input to n-1, with the 0 network providing the real output. - if (!stack_.back()->IsTraining() || - !stack_.back()->Backward(debug, fwd_deltas, scratch, buffer1)) - return false; - for (int i = stack_size - 2; i >= 0; i -= 2) { - if (!stack_[i]->IsTraining() || - !stack_[i]->Backward(debug, *buffer1, scratch, - i > 0 ? buffer2 : back_deltas)) - return false; - if (i == 0) return needs_to_backprop_; - if (!stack_[i - 1]->IsTraining() || - !stack_[i - 1]->Backward(debug, *buffer2, scratch, - i > 1 ? buffer1 : back_deltas)) - return false; - } - return needs_to_backprop_; -} - -// Splits the series after the given index, returning the two parts and -// deletes itself. The first part, up to network with index last_start, goes -// into start, and the rest goes into end. -void Series::SplitAt(int last_start, Series** start, Series** end) { - *start = nullptr; - *end = nullptr; - if (last_start < 0 || last_start >= stack_.size()) { - tprintf("Invalid split index %d must be in range [0,%d]!\n", - last_start, stack_.size() - 1); - return; - } - Series* master_series = new Series("MasterSeries"); - Series* boosted_series = new Series("BoostedSeries"); - for (int s = 0; s <= last_start; ++s) { - if (s + 1 == stack_.size() && stack_[s]->type() == NT_SOFTMAX) { - // Change the softmax to a tanh. - FullyConnected* fc = static_cast(stack_[s]); - fc->ChangeType(NT_TANH); - } - master_series->AddToStack(stack_[s]); - stack_[s] = nullptr; - } - for (int s = last_start + 1; s < stack_.size(); ++s) { - boosted_series->AddToStack(stack_[s]); - stack_[s] = nullptr; - } - *start = master_series; - *end = boosted_series; - delete this; -} - -// Appends the elements of the src series to this, removing from src and -// deleting it. -void Series::AppendSeries(Network* src) { - ASSERT_HOST(src->type() == NT_SERIES); - Series* src_series = static_cast(src); - for (int s = 0; s < src_series->stack_.size(); ++s) { - AddToStack(src_series->stack_[s]); - src_series->stack_[s] = nullptr; - } - delete src; -} - - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/series.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/series.h deleted file mode 100644 index 1b381c38..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/series.h +++ /dev/null @@ -1,93 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: series.h -// Description: Runs networks in series on the same input. -// Author: Ray Smith -// Created: Thu May 02 08:20:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_SERIES_H_ -#define TESSERACT_LSTM_SERIES_H_ - -#include "plumbing.h" - -namespace tesseract { - -// Runs two or more networks in series (layers) on the same input. -class Series : public Plumbing { - public: - // ni_ and no_ will be set by AddToStack. - explicit Series(const STRING& name); - virtual ~Series() = default; - - // Returns the shape output from the network given an input shape (which may - // be partially unknown ie zero). - StaticShape OutputShape(const StaticShape& input_shape) const override; - - STRING spec() const override { - STRING spec("["); - for (int i = 0; i < stack_.size(); ++i) - spec += stack_[i]->spec(); - spec += "]"; - return spec; - } - - // Sets up the network for training. Initializes weights using weights of - // scale `range` picked according to the random number generator `randomizer`. - // Returns the number of weights initialized. - int InitWeights(float range, TRand* randomizer) override; - // Recursively searches the network for softmaxes with old_no outputs, - // and remaps their outputs according to code_map. See network.h for details. - int RemapOutputs(int old_no, const std::vector& code_map) override; - - // Sets needs_to_backprop_ to needs_backprop and returns true if - // needs_backprop || any weights in this network so the next layer forward - // can be told to produce backprop for this layer if needed. - bool SetupNeedsBackprop(bool needs_backprop) override; - - // Returns an integer reduction factor that the network applies to the - // time sequence. Assumes that any 2-d is already eliminated. Used for - // scaling bounding boxes of truth data. - // WARNING: if GlobalMinimax is used to vary the scale, this will return - // the last used scale factor. Call it before any forward, and it will return - // the minimum scale factor of the paths through the GlobalMinimax. - int XScaleFactor() const override; - - // Provides the (minimum) x scale factor to the network (of interest only to - // input units) so they can determine how to scale bounding boxes. - void CacheXScaleFactor(int factor) override; - - // Runs forward propagation of activations on the input line. - // See Network for a detailed discussion of the arguments. - void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, NetworkScratch* scratch, - NetworkIO* output) override; - - // Runs backward propagation of errors on the deltas line. - // See Network for a detailed discussion of the arguments. - bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, NetworkIO* back_deltas) override; - - // Splits the series after the given index, returning the two parts and - // deletes itself. The first part, up to network with index last_start, goes - // into start, and the rest goes into end. - void SplitAt(int last_start, Series** start, Series** end); - - // Appends the elements of the src series to this, removing from src and - // deleting it. - void AppendSeries(Network* src); -}; - -} // namespace tesseract. - -#endif // TESSERACT_LSTM_SERIES_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/static_shape.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/static_shape.h deleted file mode 100644 index 9b28c008..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/static_shape.h +++ /dev/null @@ -1,104 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: static_shape.h -// Description: Defines the size of the 4-d tensor input/output from a network. -// Author: Ray Smith -// Created: Fri Oct 14 09:07:31 PST 2016 -// -// (C) Copyright 2016, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_STATIC_SHAPE_H_ -#define TESSERACT_LSTM_STATIC_SHAPE_H_ - -#include "serialis.h" // for TFile -#include "tprintf.h" // for tprintf - -namespace tesseract { - -// Enum describing the loss function to apply during training and/or the -// decoding method to apply at runtime. -enum LossType { - LT_NONE, // Undefined. - LT_CTC, // Softmax with standard CTC for training/decoding. - LT_SOFTMAX, // Outputs sum to 1 in fixed positions. - LT_LOGISTIC, // Logistic outputs with independent values. -}; - -// Simple class to hold the tensor shape that is known at network build time -// and the LossType of the loss function. -class StaticShape { - public: - StaticShape() - : batch_(0), height_(0), width_(0), depth_(0), loss_type_(LT_NONE) {} - int batch() const { return batch_; } - void set_batch(int value) { batch_ = value; } - int height() const { return height_; } - void set_height(int value) { height_ = value; } - int width() const { return width_; } - void set_width(int value) { width_ = value; } - int depth() const { return depth_; } - void set_depth(int value) { depth_ = value; } - LossType loss_type() const { return loss_type_; } - void set_loss_type(LossType value) { loss_type_ = value; } - void SetShape(int batch, int height, int width, int depth) { - batch_ = batch; - height_ = height; - width_ = width; - depth_ = depth; - } - - void Print() const { - tprintf("Batch=%d, Height=%d, Width=%d, Depth=%d, loss=%d\n", batch_, - height_, width_, depth_, loss_type_); - } - - bool DeSerialize(TFile *fp) { - int32_t tmp = LT_NONE; - bool result = - fp->DeSerialize(&batch_) && - fp->DeSerialize(&height_) && - fp->DeSerialize(&width_) && - fp->DeSerialize(&depth_) && - fp->DeSerialize(&tmp); - loss_type_ = static_cast(tmp); - return result; - } - - bool Serialize(TFile *fp) const { - int32_t tmp = loss_type_; - return - fp->Serialize(&batch_) && - fp->Serialize(&height_) && - fp->Serialize(&width_) && - fp->Serialize(&depth_) && - fp->Serialize(&tmp); - } - - private: - // Size of the 4-D tensor input/output to a network. A value of zero is - // allowed for all except depth_ and means to be determined at runtime, and - // regarded as variable. - // Number of elements in a batch, or number of frames in a video stream. - int32_t batch_; - // Height of the image. - int32_t height_; - // Width of the image. - int32_t width_; - // Depth of the image. (Number of "nodes"). - int32_t depth_; - // How to train/interpret the output. - LossType loss_type_; -}; - -} // namespace tesseract - -#endif // TESSERACT_LSTM_STATIC_SHAPE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/stridemap.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/stridemap.cpp deleted file mode 100644 index 9ce93b49..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/stridemap.cpp +++ /dev/null @@ -1,175 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: stridemap.cpp -// Description: Indexing into a 4-d tensor held in a 2-d Array. -// Author: Ray Smith -// Created: Fri Sep 20 15:30:31 PST 2016 -// -// (C) Copyright 2016, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "stridemap.h" -#include // for assert - -namespace tesseract { - -// Returns true if *this is a valid index. -bool StrideMap::Index::IsValid() const { - // Cheap check first. - for (int d = 0; d < FD_DIMSIZE; ++d) { - if (indices_[d] < 0) return false; - } - for (int d = 0; d < FD_DIMSIZE; ++d) { - if (indices_[d] > MaxIndexOfDim(static_cast(d))) - return false; - } - return true; -} - -// Returns true if the index of the given dimension is the last. -bool StrideMap::Index::IsLast(FlexDimensions dimension) const { - return MaxIndexOfDim(dimension) == indices_[dimension]; -} - -// Given that the dimensions up to and including dim-1 are valid, returns the -// maximum index for dimension dim. -int StrideMap::Index::MaxIndexOfDim(FlexDimensions dim) const { - int max_index = stride_map_->shape_[dim] - 1; - if (dim == FD_BATCH) return max_index; - assert(0 <= indices_[FD_BATCH]); - const size_t batch = indices_[FD_BATCH]; - if (dim == FD_HEIGHT) { - if (batch >= stride_map_->heights_.size() || - stride_map_->heights_[batch] > max_index) - return max_index; - return stride_map_->heights_[batch] - 1; - } - if (batch >= stride_map_->widths_.size() || - stride_map_->widths_[batch] > max_index) - return max_index; - return stride_map_->widths_[batch] - 1; -} - -// Adds the given offset to the given dimension. Returns true if the result -// makes a valid index. -bool StrideMap::Index::AddOffset(int offset, FlexDimensions dimension) { - indices_[dimension] += offset; - SetTFromIndices(); - return IsValid(); -} - -// Increments the index in some encapsulated way that guarantees to remain -// valid until it returns false, meaning that the iteration is complete. -bool StrideMap::Index::Increment() { - for (int d = FD_DIMSIZE - 1; d >= 0; --d) { - if (!IsLast(static_cast(d))) { - t_ += stride_map_->t_increments_[d]; - ++indices_[d]; - return true; - } - t_ -= stride_map_->t_increments_[d] * indices_[d]; - indices_[d] = 0; - // Now carry to the next dimension. - } - return false; -} - -// Decrements the index in some encapsulated way that guarantees to remain -// valid until it returns false, meaning that the iteration (that started -// with InitToLast()) is complete. -bool StrideMap::Index::Decrement() { - for (int d = FD_DIMSIZE - 1; d >= 0; --d) { - if (indices_[d] > 0) { - --indices_[d]; - if (d == FD_BATCH) { - // The upper limits of the other dimensions may have changed as a result - // of a different batch index, so they have to be reset. - InitToLastOfBatch(indices_[FD_BATCH]); - } else { - t_ -= stride_map_->t_increments_[d]; - } - return true; - } - indices_[d] = MaxIndexOfDim(static_cast(d)); - t_ += stride_map_->t_increments_[d] * indices_[d]; - // Now borrow from the next dimension. - } - return false; -} - -// Initializes the indices to the last valid location in the given batch -// index. -void StrideMap::Index::InitToLastOfBatch(int batch) { - indices_[FD_BATCH] = batch; - for (int d = FD_BATCH + 1; d < FD_DIMSIZE; ++d) { - indices_[d] = MaxIndexOfDim(static_cast(d)); - } - SetTFromIndices(); -} - -// Computes and sets t_ from the current indices_. -void StrideMap::Index::SetTFromIndices() { - t_ = 0; - for (int d = 0; d < FD_DIMSIZE; ++d) { - t_ += stride_map_->t_increments_[d] * indices_[d]; - } -} - -// Sets up the stride for the given array of height, width pairs. -void StrideMap::SetStride(const std::vector>& h_w_pairs) { - int max_height = 0; - int max_width = 0; - for (const std::pair& hw : h_w_pairs) { - int height = hw.first; - int width = hw.second; - heights_.push_back(height); - widths_.push_back(width); - if (height > max_height) max_height = height; - if (width > max_width) max_width = width; - } - shape_[FD_BATCH] = heights_.size(); - shape_[FD_HEIGHT] = max_height; - shape_[FD_WIDTH] = max_width; - ComputeTIncrements(); -} - -// Scales width and height dimensions by the given factors. -void StrideMap::ScaleXY(int x_factor, int y_factor) { - for (int& height : heights_) height /= y_factor; - for (int& width : widths_) width /= x_factor; - shape_[FD_HEIGHT] /= y_factor; - shape_[FD_WIDTH] /= x_factor; - ComputeTIncrements(); -} - -// Reduces width to 1, across the batch, whatever the input size. -void StrideMap::ReduceWidthTo1() { - widths_.assign(widths_.size(), 1); - shape_[FD_WIDTH] = 1; - ComputeTIncrements(); -} - -// Transposes the width and height dimensions. -void StrideMap::TransposeXY() { - std::swap(shape_[FD_HEIGHT], shape_[FD_WIDTH]); - std::swap(heights_, widths_); - ComputeTIncrements(); -} - -// Computes t_increments_ from shape_. -void StrideMap::ComputeTIncrements() { - t_increments_[FD_DIMSIZE - 1] = 1; - for (int d = FD_DIMSIZE - 2; d >= 0; --d) { - t_increments_[d] = t_increments_[d + 1] * shape_[d + 1]; - } -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/stridemap.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/stridemap.h deleted file mode 100644 index 83c7799f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/stridemap.h +++ /dev/null @@ -1,137 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: stridemap.h -// Description: Indexing into a 4-d tensor held in a 2-d Array. -// Author: Ray Smith -// Created: Fri Sep 20 16:00:31 PST 2016 -// -// (C) Copyright 2016, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// -#ifndef TESSERACT_LSTM_STRIDEMAP_H_ -#define TESSERACT_LSTM_STRIDEMAP_H_ - -#include -#include -#include "tprintf.h" - -namespace tesseract { - -// Enum describing the dimensions of the 'Tensor' in a NetworkIO. -// A NetworkIO is analogous to a TF Tensor, except that the number of dimensions -// is fixed (4), and they always have the same meaning. The underlying -// representation is a 2-D array, for which the product batch*height*width -// is always dim1 and depth is always dim2. FlexDimensions is used only for -// batch, height, width with the StrideMap, and therefore represents the runtime -// shape. The build-time shape is defined by StaticShape. -enum FlexDimensions { - FD_BATCH, // Index of multiple images. - FD_HEIGHT, // y-coordinate in image. - FD_WIDTH, // x-coordinate in image. - FD_DIMSIZE, // Number of flexible non-depth dimensions. -}; - -// Encapsulation of information relating to the mapping from [batch][y][x] to -// the first index into the 2-d array underlying a NetworkIO. -class StrideMap { - public: - // Class holding the non-depth indices. - class Index { - public: - explicit Index(const StrideMap& stride_map) : stride_map_(&stride_map) { - InitToFirst(); - } - Index(const StrideMap& stride_map, int batch, int y, int x) - : stride_map_(&stride_map) { - indices_[FD_BATCH] = batch; - indices_[FD_HEIGHT] = y; - indices_[FD_WIDTH] = x; - SetTFromIndices(); - } - // Accesses the index to the underlying array. - int t() const { return t_; } - int index(FlexDimensions dimension) const { return indices_[dimension]; } - // Initializes the indices to the first valid location. - void InitToFirst() { - memset(indices_, 0, sizeof(indices_)); - t_ = 0; - } - // Initializes the indices to the last valid location. - void InitToLast() { InitToLastOfBatch(MaxIndexOfDim(FD_BATCH)); } - // Returns true if *this is a valid index. - bool IsValid() const; - // Returns true if the index of the given dimension is the last. - bool IsLast(FlexDimensions dimension) const; - // Given that the dimensions up to and including dim-1 are valid, returns - // the maximum index for dimension dim. - int MaxIndexOfDim(FlexDimensions dim) const; - // Adds the given offset to the given dimension. Returns true if the result - // makes a valid index. - bool AddOffset(int offset, FlexDimensions dimension); - // Increments the index in some encapsulated way that guarantees to remain - // valid until it returns false, meaning that the iteration is complete. - bool Increment(); - // Decrements the index in some encapsulated way that guarantees to remain - // valid until it returns false, meaning that the iteration (that started - // with InitToLast()) is complete. - bool Decrement(); - - private: - // Initializes the indices to the last valid location in the given batch - // index. - void InitToLastOfBatch(int batch); - // Computes and sets t_ from the current indices_. - void SetTFromIndices(); - - // Map into which *this is an index. - const StrideMap* stride_map_; - // Index to the first dimension of the underlying array. - int t_; - // Indices into the individual dimensions. - int indices_[FD_DIMSIZE]; - }; - - StrideMap() { - memset(shape_, 0, sizeof(shape_)); - memset(t_increments_, 0, sizeof(t_increments_)); - } - // Default copy constructor and operator= are OK to use here! - - // Sets up the stride for the given array of height, width pairs. - void SetStride(const std::vector< std::pair >& h_w_pairs); - // Scales width and height dimensions by the given factors. - void ScaleXY(int x_factor, int y_factor); - // Reduces width to 1, across the batch, whatever the input size. - void ReduceWidthTo1(); - // Transposes the width and height dimensions. - void TransposeXY(); - // Returns the size of the given dimension. - int Size(FlexDimensions dimension) const { return shape_[dimension]; } - // Returns the total width required. - int Width() const { return t_increments_[FD_BATCH] * shape_[FD_BATCH]; } - - private: - // Computes t_increments_ from shape_. - void ComputeTIncrements(); - - // The size of each non-depth dimension. - int shape_[FD_DIMSIZE]; - // Precomputed 't' increments for each dimension. This is the value of - // the given dimension in the packed 3-d array that the shape_ represents. - int t_increments_[FD_DIMSIZE]; - // Vector of size shape_[FD_BATCH] holds the height of each image in a batch. - std::vector heights_; - // Vector of size shape_[FD_BATCH] holds the width of each image in a batch. - std::vector widths_; -}; - -} // namespace tesseract - -#endif // TESSERACT_LSTM_STRIDEMAP_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/tfnetwork.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/tfnetwork.cpp deleted file mode 100644 index fe456b43..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/tfnetwork.cpp +++ /dev/null @@ -1,146 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tfnetwork.cpp -// Description: Encapsulation of an entire tensorflow graph as a -// Tesseract Network. -// Author: Ray Smith -// Created: Fri Feb 26 09:35:29 PST 2016 -// -// (C) Copyright 2016, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// -#ifdef INCLUDE_TENSORFLOW - -#include "tfnetwork.h" - -#include "allheaders.h" -#include "input.h" -#include "networkscratch.h" - -using tensorflow::Status; -using tensorflow::Tensor; -using tensorflow::TensorShape; - -namespace tesseract { - -TFNetwork::TFNetwork(const STRING& name) : Network(NT_TENSORFLOW, name, 0, 0) {} - -int TFNetwork::InitFromProtoStr(const string& proto_str) { - if (!model_proto_.ParseFromString(proto_str)) return 0; - return InitFromProto(); -} - -// Writes to the given file. Returns false in case of error. -// Should be overridden by subclasses, but called by their Serialize. -bool TFNetwork::Serialize(TFile* fp) const { - if (!Network::Serialize(fp)) return false; - string proto_str; - model_proto_.SerializeToString(&proto_str); - GenericVector data; - data.resize_no_init(proto_str.size()); - memcpy(&data[0], proto_str.data(), proto_str.size()); - if (!data.Serialize(fp)) return false; - return true; -} - -// Reads from the given file. Returns false in case of error. -// Should be overridden by subclasses, but NOT called by their DeSerialize. -bool TFNetwork::DeSerialize(TFile* fp) { - GenericVector data; - if (!data.DeSerialize(fp)) return false; - if (!model_proto_.ParseFromArray(&data[0], data.size())) { - return false; - } - return InitFromProto(); -} - -// Runs forward propagation of activations on the input line. -// See Network for a detailed discussion of the arguments. -void TFNetwork::Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) { - std::vector> tf_inputs; - int depth = input_shape_.depth(); - ASSERT_HOST(depth == input.NumFeatures()); - // TODO(rays) Allow batching. For now batch_size = 1. - const StrideMap& stride_map = input.stride_map(); - // TF requires a tensor of shape float[batch, height, width, depth]. - TensorShape shape{1, stride_map.Size(FD_HEIGHT), stride_map.Size(FD_WIDTH), - depth}; - Tensor input_tensor(tensorflow::DT_FLOAT, shape); - // The flat() member gives a 1d array, with a data() member to get the data. - auto eigen_tensor = input_tensor.flat(); - memcpy(eigen_tensor.data(), input.f(0), - input.Width() * depth * sizeof(input.f(0)[0])); - // Add the tensor to the vector of inputs. - tf_inputs.emplace_back(model_proto_.image_input(), input_tensor); - - // Provide tensors giving the width and/or height of the image if they are - // required. Some tf ops require a separate tensor with knowledge of the - // size of the input as they cannot obtain it from the input tensor. This is - // usually true in the case of ops that process a batch of variable-sized - // objects. - if (!model_proto_.image_widths().empty()) { - TensorShape size_shape{1}; - Tensor width_tensor(tensorflow::DT_INT64, size_shape); - auto eigen_wtensor = width_tensor.flat(); - *eigen_wtensor.data() = stride_map.Size(FD_WIDTH); - tf_inputs.emplace_back(model_proto_.image_widths(), width_tensor); - } - if (!model_proto_.image_heights().empty()) { - TensorShape size_shape{1}; - Tensor height_tensor(tensorflow::DT_INT64, size_shape); - auto eigen_htensor = height_tensor.flat(); - *eigen_htensor.data() = stride_map.Size(FD_HEIGHT); - tf_inputs.emplace_back(model_proto_.image_heights(), height_tensor); - } - std::vector target_layers = {model_proto_.output_layer()}; - std::vector outputs; - Status s = session_->Run(tf_inputs, target_layers, {}, &outputs); - if (!s.ok()) tprintf("session->Run failed:%s\n", s.error_message().c_str()); - ASSERT_HOST(s.ok()); - ASSERT_HOST(outputs.size() == 1); - const Tensor& output_tensor = outputs[0]; - // Check the dimensions of the output. - ASSERT_HOST(output_tensor.shape().dims() == 3); - int output_batch = output_tensor.shape().dim_size(0); - int output_steps = output_tensor.shape().dim_size(1); - int output_depth = output_tensor.shape().dim_size(2); - ASSERT_HOST(output_batch == 1); - ASSERT_HOST(output_depth == output_shape_.depth()); - output->Resize2d(false, output_steps, output_depth); - auto eigen_output = output_tensor.flat(); - memcpy(output->f(0), eigen_output.data(), - output_steps * output_depth * sizeof(output->f(0)[0])); -} - -int TFNetwork::InitFromProto() { - spec_ = model_proto_.spec(); - input_shape_.SetShape( - model_proto_.batch_size(), std::max(0, model_proto_.y_size()), - std::max(0, model_proto_.x_size()), model_proto_.depth()); - output_shape_.SetShape(model_proto_.batch_size(), 1, 0, - model_proto_.num_classes()); - output_shape_.set_loss_type(model_proto_.using_ctc() ? LT_CTC : LT_SOFTMAX); - ni_ = input_shape_.height(); - no_ = output_shape_.depth(); - // Initialize the session_ with the graph. Since we can't get the graph - // back from the session_, we have to keep the proto as well - tensorflow::SessionOptions options; - session_.reset(NewSession(options)); - Status s = session_->Create(model_proto_.graph()); - if (s.ok()) return model_proto_.global_step(); - tprintf("Session_->Create returned '%s'\n", s.error_message().c_str()); - return 0; -} - -} // namespace tesseract - -#endif // ifdef INCLUDE_TENSORFLOW diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/tfnetwork.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/tfnetwork.h deleted file mode 100644 index 1bccd233..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/tfnetwork.h +++ /dev/null @@ -1,90 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tfnetwork.h -// Description: Encapsulation of an entire tensorflow graph as a -// Tesseract Network. -// Author: Ray Smith -// Created: Fri Feb 26 09:35:29 PST 2016 -// -// (C) Copyright 2016, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_TFNETWORK_H_ -#define TESSERACT_LSTM_TFNETWORK_H_ - -#ifdef INCLUDE_TENSORFLOW - -#include -#include - -#include "network.h" -#include "static_shape.h" -#include "tfnetwork.proto.h" -#include "third_party/tensorflow/core/framework/graph.pb.h" -#include "third_party/tensorflow/core/public/session.h" - -namespace tesseract { - -class TFNetwork : public Network { - public: - explicit TFNetwork(const STRING& name); - virtual ~TFNetwork() = default; - - // Returns the required shape input to the network. - StaticShape InputShape() const override { return input_shape_; } - // Returns the shape output from the network given an input shape (which may - // be partially unknown ie zero). - StaticShape OutputShape(const StaticShape& input_shape) const override { - return output_shape_; - } - - STRING spec() const override { return spec_.c_str(); } - - // Deserializes *this from a serialized TFNetwork proto. Returns 0 if failed, - // otherwise the global step of the serialized graph. - int InitFromProtoStr(const string& proto_str); - // The number of classes in this network should be equal to those in the - // recoder_ in LSTMRecognizer. - int num_classes() const { return output_shape_.depth(); } - - // Writes to the given file. Returns false in case of error. - // Should be overridden by subclasses, but called by their Serialize. - bool Serialize(TFile* fp) const override; - // Reads from the given file. Returns false in case of error. - // Should be overridden by subclasses, but NOT called by their DeSerialize. - bool DeSerialize(TFile* fp) override; - - // Runs forward propagation of activations on the input line. - // See Network for a detailed discussion of the arguments. - void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) override; - - private: - int InitFromProto(); - - // The original network definition for reference. - string spec_; - // Input tensor parameters. - StaticShape input_shape_; - // Output tensor parameters. - StaticShape output_shape_; - // The tensor flow graph is contained in here. - std::unique_ptr session_; - // The serialized graph is also contained in here. - TFNetworkModel model_proto_; -}; - -} // namespace tesseract. - -#endif // ifdef INCLUDE_TENSORFLOW - -#endif // TESSERACT_TENSORFLOW_TFNETWORK_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/tfnetwork.proto b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/tfnetwork.proto deleted file mode 100644 index 0942fd27..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/tfnetwork.proto +++ /dev/null @@ -1,61 +0,0 @@ -syntax = "proto3"; - -package tesseract; - -// TODO(rays) How to make this usable both in Google and open source? -import "third_party/tensorflow/core/framework/graph.proto"; - -// This proto is the interface between a python TF graph builder/trainer and -// the C++ world. The writer of this proto must provide fields as documented -// by the comments below. -// The graph must have a placeholder for NetworkIO, Widths and Heights. The -// following python code creates the appropriate placeholders: -// -// input_layer = tf.placeholder(tf.float32, -// shape=[batch_size, xsize, ysize, depth_dim], -// name='NetworkIO') -// widths = tf.placeholder(tf.int32, shape=[batch_size], name='Widths') -// heights = tf.placeholder(tf.int32, shape=[batch_size], name='Heights') -// # Flip x and y to the TF convention. -// input_layer = tf.transpose(input_layer, [0, 2, 1, 3]) -// -// The widths and heights will be set to indicate the post-scaling size of the -// input image(s). -// For now batch_size is ignored and set to 1. -// The graph should return a 2-dimensional float32 tensor called 'softmax' of -// shape [sequence_length, num_classes], where sequence_length is allowed to -// be variable, given by the tensor itself. -// TODO(rays) determine whether it is worth providing for batch_size >1 and if -// so, how. -message TFNetworkModel { - // The TF graph definition. Required. - tensorflow.GraphDef graph = 1; - // The training index. Required to be > 0. - int64 global_step = 2; - // The original network definition for reference. Optional - string spec = 3; - // Input tensor parameters. - // Values per pixel. Required to be 1 or 3. Inputs assumed to be float32. - int32 depth = 4; - // Image size. Required. Zero implies flexible sizes, fixed if non-zero. - // If x_size > 0, images will be cropped/padded to the given size, after - // any scaling required by the y_size. - // If y_size > 0, images will be scaled isotropically to the given height. - int32 x_size = 5; - int32 y_size = 6; - // Number of images in a batch. Optional. - int32 batch_size = 8; - // Output tensor parameters. - // Number of output classes. Required to match the depth of the softmax. - int32 num_classes = 9; - // True if this network needs CTC-like decoding, dropping duplicated labels. - // The decoder always drops the null character. - bool using_ctc = 10; - // Name of input image tensor. - string image_input = 11; - // Name of image height and width tensors. - string image_widths = 12; - string image_heights = 13; - // Name of output (softmax) tensor. - string output_layer = 14; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/weightmatrix.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/weightmatrix.cpp deleted file mode 100644 index ccc77c1e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/weightmatrix.cpp +++ /dev/null @@ -1,426 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: weightmatrix.cpp -// Description: Hides distinction between float/int implementations. -// Author: Ray Smith -// Created: Tue Jun 17 11:46:20 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "weightmatrix.h" - -#include "dotproductavx.h" -#include "dotproductsse.h" -#include "intsimdmatrix.h" -#include "simddetect.h" -#include "statistc.h" -#include "tprintf.h" - -namespace tesseract { - -#if defined(ANDROID) -static inline double log2(double n) { - return log(n) / log(2.0); -} -#endif // ANDROID - -// Number of iterations after which the correction effectively becomes unity. -const int kAdamCorrectionIterations = 200000; -// Epsilon in Adam to prevent division by zero. -const double kAdamEpsilon = 1e-8; - -// Copies the whole input transposed, converted to double, into *this. -void TransposedArray::Transpose(const GENERIC_2D_ARRAY& input) { - int width = input.dim1(); - int num_features = input.dim2(); - ResizeNoInit(num_features, width); - for (int t = 0; t < width; ++t) WriteStrided(t, input[t]); -} - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -TransposedArray::~TransposedArray() = default; - -// Sets up the network for training. Initializes weights using weights of -// scale `range` picked according to the random number generator `randomizer`. -int WeightMatrix::InitWeightsFloat(int no, int ni, bool use_adam, - float weight_range, TRand* randomizer) { - int_mode_ = false; - wf_.Resize(no, ni, 0.0); - if (randomizer != nullptr) { - for (int i = 0; i < no; ++i) { - for (int j = 0; j < ni; ++j) { - wf_[i][j] = randomizer->SignedRand(weight_range); - } - } - } - use_adam_ = use_adam; - InitBackward(); - return ni * no; -} - -// Changes the number of outputs to the size of the given code_map, copying -// the old weight matrix entries for each output from code_map[output] where -// non-negative, and uses the mean (over all outputs) of the existing weights -// for all outputs with negative code_map entries. Returns the new number of -// weights. -int WeightMatrix::RemapOutputs(const std::vector& code_map) { - GENERIC_2D_ARRAY old_wf(wf_); - int old_no = wf_.dim1(); - int new_no = code_map.size(); - int ni = wf_.dim2(); - std::vector means(ni, 0.0); - for (int c = 0; c < old_no; ++c) { - const double* weights = wf_[c]; - for (int i = 0; i < ni; ++i) means[i] += weights[i]; - } - for (double& mean : means) mean /= old_no; - wf_.ResizeNoInit(new_no, ni); - InitBackward(); - for (int dest = 0; dest < new_no; ++dest) { - int src = code_map[dest]; - const double* src_data = src >= 0 ? old_wf[src] : means.data(); - memcpy(wf_[dest], src_data, ni * sizeof(*src_data)); - } - return ni * new_no; -} - -// Converts a float network to an int network. Each set of input weights that -// corresponds to a single output weight is converted independently: -// Compute the max absolute value of the weight set. -// Scale so the max absolute value becomes INT8_MAX. -// Round to integer. -// Store a multiplicative scale factor (as a double) that will reproduce -// the original value, subject to rounding errors. -void WeightMatrix::ConvertToInt() { - wi_.ResizeNoInit(wf_.dim1(), wf_.dim2()); - scales_.init_to_size(wi_.dim1(), 0.0); - int dim2 = wi_.dim2(); - for (int t = 0; t < wi_.dim1(); ++t) { - double* f_line = wf_[t]; - int8_t* i_line = wi_[t]; - double max_abs = 0.0; - for (int f = 0; f < dim2; ++f) { - double abs_val = fabs(f_line[f]); - if (abs_val > max_abs) max_abs = abs_val; - } - double scale = max_abs / INT8_MAX; - scales_[t] = scale; - if (scale == 0.0) scale = 1.0; - for (int f = 0; f < dim2; ++f) { - i_line[f] = IntCastRounded(f_line[f] / scale); - } - } - wf_.Resize(1, 1, 0.0); - int_mode_ = true; - multiplier_.reset(IntSimdMatrix::GetFastestMultiplier()); - if (multiplier_ != nullptr) multiplier_->Init(wi_); -} - -// Allocates any needed memory for running Backward, and zeroes the deltas, -// thus eliminating any existing momentum. -void WeightMatrix::InitBackward() { - int no = int_mode_ ? wi_.dim1() : wf_.dim1(); - int ni = int_mode_ ? wi_.dim2() : wf_.dim2(); - dw_.Resize(no, ni, 0.0); - updates_.Resize(no, ni, 0.0); - wf_t_.Transpose(wf_); - if (use_adam_) dw_sq_sum_.Resize(no, ni, 0.0); -} - -// Flag on mode to indicate that this weightmatrix uses int8_t. -const int kInt8Flag = 1; -// Flag on mode to indicate that this weightmatrix uses adam. -const int kAdamFlag = 4; -// Flag on mode to indicate that this weightmatrix uses double. Set -// independently of kInt8Flag as even in int mode the scales can -// be float or double. -const int kDoubleFlag = 128; - -// Writes to the given file. Returns false in case of error. -bool WeightMatrix::Serialize(bool training, TFile* fp) const { - // For backward compatibility, add kDoubleFlag to mode to indicate the doubles - // format, without errs, so we can detect and read old format weight matrices. - uint8_t mode = - (int_mode_ ? kInt8Flag : 0) | (use_adam_ ? kAdamFlag : 0) | kDoubleFlag; - if (!fp->Serialize(&mode)) return false; - if (int_mode_) { - if (!wi_.Serialize(fp)) return false; - if (!scales_.Serialize(fp)) return false; - } else { - if (!wf_.Serialize(fp)) return false; - if (training && !updates_.Serialize(fp)) return false; - if (training && use_adam_ && !dw_sq_sum_.Serialize(fp)) return false; - } - return true; -} - -// Reads from the given file. Returns false in case of error. - -bool WeightMatrix::DeSerialize(bool training, TFile* fp) { - uint8_t mode; - if (!fp->DeSerialize(&mode)) return false; - int_mode_ = (mode & kInt8Flag) != 0; - use_adam_ = (mode & kAdamFlag) != 0; - if ((mode & kDoubleFlag) == 0) return DeSerializeOld(training, fp); - if (int_mode_) { - if (!wi_.DeSerialize(fp)) return false; - if (!scales_.DeSerialize(fp)) return false; - multiplier_.reset(IntSimdMatrix::GetFastestMultiplier()); - if (multiplier_ != nullptr) multiplier_->Init(wi_); - } else { - if (!wf_.DeSerialize(fp)) return false; - if (training) { - InitBackward(); - if (!updates_.DeSerialize(fp)) return false; - if (use_adam_ && !dw_sq_sum_.DeSerialize(fp)) return false; - } - } - return true; -} - -// As DeSerialize, but reads an old (float) format WeightMatrix for -// backward compatibility. -bool WeightMatrix::DeSerializeOld(bool training, TFile* fp) { - GENERIC_2D_ARRAY float_array; - if (int_mode_) { - if (!wi_.DeSerialize(fp)) return false; - GenericVector old_scales; - if (!old_scales.DeSerialize(fp)) return false; - scales_.resize_no_init(old_scales.size()); - for (int i = 0; i < old_scales.size(); ++i) scales_[i] = old_scales[i]; - } else { - if (!float_array.DeSerialize(fp)) return false; - FloatToDouble(float_array, &wf_); - } - if (training) { - InitBackward(); - if (!float_array.DeSerialize(fp)) return false; - FloatToDouble(float_array, &updates_); - // Errs was only used in int training, which is now dead. - if (!float_array.DeSerialize(fp)) return false; - } - return true; -} - -// Computes matrix.vector v = Wu. -// u is of size W.dim2() - 1 and the output v is of size W.dim1(). -// u is imagined to have an extra element at the end with value 1, to -// implement the bias, but it doesn't actually have it. -// Asserts that the call matches what we have. -void WeightMatrix::MatrixDotVector(const double* u, double* v) const { - ASSERT_HOST(!int_mode_); - MatrixDotVectorInternal(wf_, true, false, u, v); -} - -void WeightMatrix::MatrixDotVector(const int8_t* u, double* v) const { - ASSERT_HOST(int_mode_); - ASSERT_HOST(multiplier_ != nullptr); - multiplier_->MatrixDotVector(wi_, scales_, u, v); -} - -// MatrixDotVector for peep weights, MultiplyAccumulate adds the -// component-wise products of *this[0] and v to inout. -void WeightMatrix::MultiplyAccumulate(const double* v, double* inout) { - ASSERT_HOST(!int_mode_); - ASSERT_HOST(wf_.dim1() == 1); - int n = wf_.dim2(); - const double* u = wf_[0]; - for (int i = 0; i < n; ++i) { - inout[i] += u[i] * v[i]; - } -} - -// Computes vector.matrix v = uW. -// u is of size W.dim1() and the output v is of size W.dim2() - 1. -// The last result is discarded, as v is assumed to have an imaginary -// last value of 1, as with MatrixDotVector. -void WeightMatrix::VectorDotMatrix(const double* u, double* v) const { - ASSERT_HOST(!int_mode_); - MatrixDotVectorInternal(wf_t_, false, true, u, v); -} - -// Fills dw_[i][j] with the dot product u[i][] . v[j][], using elements from -// u and v. In terms of the neural network, u is the gradients and v is the -// inputs. -// Note that (matching MatrixDotVector) v[last][] is missing, presumed 1.0. -// Runs parallel if requested. Note that u and v must be transposed. -void WeightMatrix::SumOuterTransposed(const TransposedArray& u, - const TransposedArray& v, - bool in_parallel) { - ASSERT_HOST(!int_mode_); - int num_outputs = dw_.dim1(); - ASSERT_HOST(u.dim1() == num_outputs); - ASSERT_HOST(u.dim2() == v.dim2()); - int num_inputs = dw_.dim2() - 1; - int num_samples = u.dim2(); - // v is missing the last element in dim1. - ASSERT_HOST(v.dim1() == num_inputs); -#ifdef _OPENMP -#pragma omp parallel for num_threads(4) if (in_parallel) -#endif - for (int i = 0; i < num_outputs; ++i) { - double* dwi = dw_[i]; - const double* ui = u[i]; - for (int j = 0; j < num_inputs; ++j) { - dwi[j] = DotProduct(ui, v[j], num_samples); - } - // The last element of v is missing, presumed 1.0f. - double total = 0.0; - for (int k = 0; k < num_samples; ++k) total += ui[k]; - dwi[num_inputs] = total; - } -} - -// Updates the weights using the given learning rate and momentum. -// num_samples is the quotient to be used in the adam computation iff -// use_adam_ is true. -void WeightMatrix::Update(double learning_rate, double momentum, - double adam_beta, int num_samples) { - ASSERT_HOST(!int_mode_); - if (use_adam_ && num_samples > 0 && num_samples < kAdamCorrectionIterations) { - learning_rate *= sqrt(1.0 - pow(adam_beta, num_samples)); - learning_rate /= 1.0 - pow(momentum, num_samples); - } - if (use_adam_ && num_samples > 0 && momentum > 0.0) { - dw_sq_sum_.SumSquares(dw_, adam_beta); - dw_ *= learning_rate * (1.0 - momentum); - updates_ *= momentum; - updates_ += dw_; - wf_.AdamUpdate(updates_, dw_sq_sum_, learning_rate * kAdamEpsilon); - } else { - dw_ *= learning_rate; - updates_ += dw_; - if (momentum > 0.0) wf_ += updates_; - if (momentum >= 0.0) updates_ *= momentum; - } - wf_t_.Transpose(wf_); -} - -// Adds the dw_ in other to the dw_ is *this. -void WeightMatrix::AddDeltas(const WeightMatrix& other) { - ASSERT_HOST(dw_.dim1() == other.dw_.dim1()); - ASSERT_HOST(dw_.dim2() == other.dw_.dim2()); - dw_ += other.dw_; -} - -// Sums the products of weight updates in *this and other, splitting into -// positive (same direction) in *same and negative (different direction) in -// *changed. -void WeightMatrix::CountAlternators(const WeightMatrix& other, double* same, - double* changed) const { - int num_outputs = updates_.dim1(); - int num_inputs = updates_.dim2(); - ASSERT_HOST(num_outputs == other.updates_.dim1()); - ASSERT_HOST(num_inputs == other.updates_.dim2()); - for (int i = 0; i < num_outputs; ++i) { - const double* this_i = updates_[i]; - const double* other_i = other.updates_[i]; - for (int j = 0; j < num_inputs; ++j) { - double product = this_i[j] * other_i[j]; - if (product < 0.0) - *changed -= product; - else - *same += product; - } - } -} - -// Helper computes an integer histogram bucket for a weight and adds it -// to the histogram. -const int kHistogramBuckets = 16; -static void HistogramWeight(double weight, STATS* histogram) { - int bucket = kHistogramBuckets - 1; - if (weight != 0.0) { - double logval = -log2(fabs(weight)); - bucket = ClipToRange(IntCastRounded(logval), 0, kHistogramBuckets - 1); - } - histogram->add(bucket, 1); -} - -void WeightMatrix::Debug2D(const char* msg) { - STATS histogram(0, kHistogramBuckets); - if (int_mode_) { - for (int i = 0; i < wi_.dim1(); ++i) { - for (int j = 0; j < wi_.dim2(); ++j) { - HistogramWeight(wi_[i][j] * scales_[i], &histogram); - } - } - } else { - for (int i = 0; i < wf_.dim1(); ++i) { - for (int j = 0; j < wf_.dim2(); ++j) { - HistogramWeight(wf_[i][j], &histogram); - } - } - } - tprintf("%s\n", msg); - histogram.print(); -} - -// Computes and returns the dot product of the two n-vectors u and v. -/* static */ -double WeightMatrix::DotProduct(const double* u, const double* v, int n) { - // Note: because the order of addition is different among the 3 DotProduct - // functions, the results can (and do) vary slightly (although they agree - // to within about 4e-15). This produces different results when running - // training, despite all random inputs being precisely equal. - // To get consistent results, use just one of these DotProduct functions. - // On a test multi-layer network, serial is 57% slower than sse, and avx - // is about 8% faster than sse. This suggests that the time is memory - // bandwidth constrained and could benefit from holding the reused vector - // in AVX registers. - if (SIMDDetect::IsAVXAvailable()) return DotProductAVX(u, v, n); - if (SIMDDetect::IsSSEAvailable()) return DotProductSSE(u, v, n); - double total = 0.0; - for (int k = 0; k < n; ++k) total += u[k] * v[k]; - return total; -} - -// Utility function converts an array of float to the corresponding array -// of double. -/* static */ -void WeightMatrix::FloatToDouble(const GENERIC_2D_ARRAY& wf, - GENERIC_2D_ARRAY* wd) { - int dim1 = wf.dim1(); - int dim2 = wf.dim2(); - wd->ResizeNoInit(dim1, dim2); - for (int i = 0; i < dim1; ++i) { - const float* wfi = wf[i]; - double* wdi = (*wd)[i]; - for (int j = 0; j < dim2; ++j) wdi[j] = static_cast(wfi[j]); - } -} - -// Computes matrix.vector v = Wu. -// u is of size W.dim2() - add_bias_fwd and the output v is of size -// W.dim1() - skip_bias_back. -// If add_bias_fwd, u is imagined to have an extra element at the end with value -// 1, to implement the bias, weight. -// If skip_bias_back, we are actullay performing the backwards product on a -// transposed matrix, so we need to drop the v output corresponding to the last -// element in dim1. -void WeightMatrix::MatrixDotVectorInternal(const GENERIC_2D_ARRAY& w, - bool add_bias_fwd, - bool skip_bias_back, const double* u, - double* v) { - int num_results = w.dim1() - skip_bias_back; - int extent = w.dim2() - add_bias_fwd; - for (int i = 0; i < num_results; ++i) { - const double* wi = w[i]; - double total = DotProduct(wi, u, extent); - if (add_bias_fwd) total += wi[extent]; // The bias value. - v[i] = total; - } -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/weightmatrix.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/weightmatrix.h deleted file mode 100644 index 5f4b306f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/lstm/weightmatrix.h +++ /dev/null @@ -1,199 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: weightmatrix.h -// Description: Hides distinction between float/int implementations. -// Author: Ray Smith -// Created: Tue Jun 17 09:05:39 PST 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_WEIGHTMATRIX_H_ -#define TESSERACT_LSTM_WEIGHTMATRIX_H_ - -#include -#include "genericvector.h" -#include "intsimdmatrix.h" -#include "matrix.h" -#include "tprintf.h" - -namespace tesseract { - -// Convenience instantiation of GENERIC_2D_ARRAY with additional -// operations to write a strided vector, so the transposed form of the input -// is memory-contiguous. -class TransposedArray : public GENERIC_2D_ARRAY { - public: - // Copies the whole input transposed, converted to double, into *this. - void Transpose(const GENERIC_2D_ARRAY& input); - // Writes a vector of data representing a timestep (gradients or sources). - // The data is assumed to be of size1 in size (the strided dimension). - virtual ~TransposedArray(); - void WriteStrided(int t, const float* data) { - int size1 = dim1(); - for (int i = 0; i < size1; ++i) put(i, t, data[i]); - } - void WriteStrided(int t, const double* data) { - int size1 = dim1(); - for (int i = 0; i < size1; ++i) put(i, t, data[i]); - } - // Prints the first and last num elements of the un-transposed array. - void PrintUnTransposed(int num) { - int num_features = dim1(); - int width = dim2(); - for (int y = 0; y < num_features; ++y) { - for (int t = 0; t < width; ++t) { - if (num == 0 || t < num || t + num >= width) { - tprintf(" %g", (*this)(y, t)); - } - } - tprintf("\n"); - } - } -}; // class TransposedArray - -// Generic weight matrix for network layers. Can store the matrix as either -// an array of floats or int8_t. Provides functions to compute the forward and -// backward steps with the matrix and updates to the weights. -class WeightMatrix { - public: - WeightMatrix() : int_mode_(false), use_adam_(false) {} - // Sets up the network for training. Initializes weights using weights of - // scale `range` picked according to the random number generator `randomizer`. - // Note the order is outputs, inputs, as this is the order of indices to - // the matrix, so the adjacent elements are multiplied by the input during - // a forward operation. - int InitWeightsFloat(int no, int ni, bool use_adam, float weight_range, - TRand* randomizer); - // Changes the number of outputs to the size of the given code_map, copying - // the old weight matrix entries for each output from code_map[output] where - // non-negative, and uses the mean (over all outputs) of the existing weights - // for all outputs with negative code_map entries. Returns the new number of - // weights. - int RemapOutputs(const std::vector& code_map); - - // Converts a float network to an int network. Each set of input weights that - // corresponds to a single output weight is converted independently: - // Compute the max absolute value of the weight set. - // Scale so the max absolute value becomes INT8_MAX. - // Round to integer. - // Store a multiplicative scale factor (as a float) that will reproduce - // the original value, subject to rounding errors. - void ConvertToInt(); - // Returns the size rounded up to an internal factor used by the SIMD - // implementation for its input. - int RoundInputs(int size) const { - if (multiplier_ == nullptr) return size; - return multiplier_->RoundInputs(size); - } - - // Accessors. - bool is_int_mode() const { - return int_mode_; - } - int NumOutputs() const { return int_mode_ ? wi_.dim1() : wf_.dim1(); } - // Provides one set of weights. Only used by peep weight maxpool. - const double* GetWeights(int index) const { return wf_[index]; } - // Provides access to the deltas (dw_). - double GetDW(int i, int j) const { return dw_(i, j); } - - // Allocates any needed memory for running Backward, and zeroes the deltas, - // thus eliminating any existing momentum. - void InitBackward(); - - // Writes to the given file. Returns false in case of error. - bool Serialize(bool training, TFile* fp) const; - // Reads from the given file. Returns false in case of error. - bool DeSerialize(bool training, TFile* fp); - // As DeSerialize, but reads an old (float) format WeightMatrix for - // backward compatibility. - bool DeSerializeOld(bool training, TFile* fp); - - // Computes matrix.vector v = Wu. - // u is of size W.dim2() - 1 and the output v is of size W.dim1(). - // u is imagined to have an extra element at the end with value 1, to - // implement the bias, but it doesn't actually have it. - // Asserts that the call matches what we have. - void MatrixDotVector(const double* u, double* v) const; - void MatrixDotVector(const int8_t* u, double* v) const; - // MatrixDotVector for peep weights, MultiplyAccumulate adds the - // component-wise products of *this[0] and v to inout. - void MultiplyAccumulate(const double* v, double* inout); - // Computes vector.matrix v = uW. - // u is of size W.dim1() and the output v is of size W.dim2() - 1. - // The last result is discarded, as v is assumed to have an imaginary - // last value of 1, as with MatrixDotVector. - void VectorDotMatrix(const double* u, double* v) const; - // Fills dw_[i][j] with the dot product u[i][] . v[j][], using elements - // from u and v, starting with u[i][offset] and v[j][offset]. - // Note that (matching MatrixDotVector) v[last][] is missing, presumed 1.0. - // Runs parallel if requested. Note that inputs must be transposed. - void SumOuterTransposed(const TransposedArray& u, const TransposedArray& v, - bool parallel); - // Updates the weights using the given learning rate, momentum and adam_beta. - // num_samples is used in the Adam correction factor. - void Update(double learning_rate, double momentum, double adam_beta, - int num_samples); - // Adds the dw_ in other to the dw_ is *this. - void AddDeltas(const WeightMatrix& other); - // Sums the products of weight updates in *this and other, splitting into - // positive (same direction) in *same and negative (different direction) in - // *changed. - void CountAlternators(const WeightMatrix& other, double* same, - double* changed) const; - - void Debug2D(const char* msg); - - // Computes and returns the dot product of the two n-vectors u and v. - static double DotProduct(const double* u, const double* v, int n); - // Utility function converts an array of float to the corresponding array - // of double. - static void FloatToDouble(const GENERIC_2D_ARRAY& wf, - GENERIC_2D_ARRAY* wd); - - private: - // Computes matrix.vector v = Wu. - // u is of size starts.back()+extents.back() and the output v is of size - // starts.size(). - // The weight matrix w, is of size starts.size()xMAX(extents)+add_bias_fwd. - // If add_bias_fwd, an extra element at the end of w[i] is the bias weight - // and is added to v[i]. - static void MatrixDotVectorInternal(const GENERIC_2D_ARRAY& w, - bool add_bias_fwd, bool skip_bias_back, - const double* u, double* v); - - private: - // Choice between float and 8 bit int implementations. - GENERIC_2D_ARRAY wf_; - GENERIC_2D_ARRAY wi_; - // Transposed copy of wf_, used only for Backward, and set with each Update. - TransposedArray wf_t_; - // Which of wf_ and wi_ are we actually using. - bool int_mode_; - // True if we are running adam in this weight matrix. - bool use_adam_; - // If we are using wi_, then scales_ is a factor to restore the row product - // with a vector to the correct range. - GenericVector scales_; - // Weight deltas. dw_ is the new delta, and updates_ the momentum-decaying - // amount to be added to wf_/wi_. - GENERIC_2D_ARRAY dw_; - GENERIC_2D_ARRAY updates_; - // Iff use_adam_, the sum of squares of dw_. The number of samples is - // given to Update(). Serialized iff use_adam_. - GENERIC_2D_ARRAY dw_sq_sum_; - // Holds the optimal integer multiplier for this machine. - std::unique_ptr multiplier_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_LSTM_WEIGHTMATRIX_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/opencl/Makefile.am b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/opencl/Makefile.am deleted file mode 100644 index fb3ebf51..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/opencl/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ -AM_CPPFLAGS += $(OPENCL_CFLAGS) \ - -I$(top_srcdir)/src/ccutil \ - -I$(top_srcdir)/src/ccstruct \ - -I$(top_srcdir)/src/ccmain - -noinst_HEADERS = \ - openclwrapper.h oclkernels.h opencl_device_selection.h - -noinst_LTLIBRARIES = libtesseract_opencl.la - -libtesseract_opencl_la_SOURCES = \ - openclwrapper.cpp diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/opencl/oclkernels.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/opencl/oclkernels.h deleted file mode 100644 index 76fe3de0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/opencl/oclkernels.h +++ /dev/null @@ -1,1073 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TESSERACT_OPENCL_OCLKERNELS_H_ -#define TESSERACT_OPENCL_OCLKERNELS_H_ - -#ifndef USE_EXTERNAL_KERNEL -#define KERNEL(...) #__VA_ARGS__ "\n" -// Double precision is a default of spreadsheets -// cl_khr_fp64: Khronos extension -// cl_amd_fp64: AMD extension -// use build option outside to define fp_t -///////////////////////////////////////////// -const char *kernel_src = KERNEL( -\n#ifdef KHR_DP_EXTENSION\n -\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n -\n#elif AMD_DP_EXTENSION\n -\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n -\n#else\n -\n#endif\n -__kernel void composeRGBPixel(__global uint *tiffdata, int w, int h,int wpl, __global uint *output) -{ - int i = get_global_id(1); - int j = get_global_id(0); - int tiffword,rval,gval,bval; - - //Ignore the excess - if ((i >= h) || (j >= w)) - return; - - tiffword = tiffdata[i * w + j]; - rval = ((tiffword) & 0xff); - gval = (((tiffword) >> 8) & 0xff); - bval = (((tiffword) >> 16) & 0xff); - output[i*wpl+j] = (rval << (8 * (sizeof(uint) - 1 - 0))) | (gval << (8 * (sizeof(uint) - 1 - 1))) | (bval << (8 * (sizeof(uint) - 1 - 2))); -} -) - -KERNEL( -\n__kernel void pixSubtract_inplace(__global int *dword, __global int *sword, - const int wpl, const int h) -{ - const unsigned int row = get_global_id(1); - const unsigned int col = get_global_id(0); - const unsigned int pos = row * wpl + col; - - //Ignore the execss - if (row >= h || col >= wpl) - return; - - *(dword + pos) &= ~(*(sword + pos)); -}\n -) - -KERNEL( -\n__kernel void pixSubtract(__global int *dword, __global int *sword, - const int wpl, const int h, __global int *outword) -{ - const unsigned int row = get_global_id(1); - const unsigned int col = get_global_id(0); - const unsigned int pos = row * wpl + col; - - //Ignore the execss - if (row >= h || col >= wpl) - return; - - *(outword + pos) = *(dword + pos) & ~(*(sword + pos)); -}\n -) - -KERNEL( -\n__kernel void morphoDilateHor_5x5(__global int *sword,__global int *dword, - const int wpl, const int h) -{ - const unsigned int pos = get_global_id(0); - unsigned int prevword, nextword, currword,tempword; - unsigned int destword; - const int col = pos % wpl; - - //Ignore the execss - if (pos >= (wpl * h)) - return; - - - currword = *(sword + pos); - destword = currword; - - //Handle boundary conditions - if(col==0) - prevword=0; - else - prevword = *(sword + pos - 1); - - if(col==(wpl - 1)) - nextword=0; - else - nextword = *(sword + pos + 1); - - //Loop unrolled - - //1 bit to left and 1 bit to right - //Get the max value on LHS of every pixel - tempword = (prevword << (31)) | ((currword >> 1)); - destword |= tempword; - //Get max value on RHS of every pixel - tempword = (currword << 1) | (nextword >> (31)); - destword |= tempword; - - //2 bit to left and 2 bit to right - //Get the max value on LHS of every pixel - tempword = (prevword << (30)) | ((currword >> 2)); - destword |= tempword; - //Get max value on RHS of every pixel - tempword = (currword << 2) | (nextword >> (30)); - destword |= tempword; - - - *(dword + pos) = destword; - -}\n -) - -KERNEL( -\n__kernel void morphoDilateVer_5x5(__global int *sword,__global int *dword, - const int wpl, const int h) -{ - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int tempword; - unsigned int destword; - int i; - - //Ignore the execss - if (row >= h || col >= wpl) - return; - - destword = *(sword + pos); - - //2 words above - i = (row - 2) < 0 ? row : (row - 2); - tempword = *(sword + i*wpl + col); - destword |= tempword; - - //1 word above - i = (row - 1) < 0 ? row : (row - 1); - tempword = *(sword + i*wpl + col); - destword |= tempword; - - //1 word below - i = (row >= (h - 1)) ? row : (row + 1); - tempword = *(sword + i*wpl + col); - destword |= tempword; - - //2 words below - i = (row >= (h - 2)) ? row : (row + 2); - tempword = *(sword + i*wpl + col); - destword |= tempword; - - *(dword + pos) = destword; -}\n -) - -KERNEL( -\n__kernel void morphoDilateHor(__global int *sword,__global int *dword,const int xp, const int xn, const int wpl, const int h) -{ - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int parbitsxp, parbitsxn, nwords; - unsigned int destword, tempword, lastword, currword; - unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword; - int i, j, siter, eiter; - - //Ignore the execss - if (pos >= (wpl*h) || (xn < 1 && xp < 1)) - return; - - currword = *(sword + pos); - destword = currword; - - parbitsxp = xp & 31; - parbitsxn = xn & 31; - nwords = xp >> 5; - - if (parbitsxp > 0) - nwords += 1; - else - parbitsxp = 31; - - siter = (col - nwords); - eiter = (col + nwords); - - //Get prev word - if (col==0) - firstword = 0x0; - else - firstword = *(sword + pos - 1); - - //Get next word - if (col == (wpl - 1)) - secondword = 0x0; - else - secondword = *(sword + pos + 1); - - //Last partial bits on either side - for (i = 1; i <= parbitsxp; i++) - { - //Get the max value on LHS of every pixel - tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0x0 : (firstword << (32-i)) | ((currword >> i)); - - destword |= tempword; - - //Get max value on RHS of every pixel - tempword = (currword << i) | (secondword >> (32 - i)); - destword |= tempword; - } - - //Return if halfwidth <= 1 word - if (nwords == 1) - { - if (xn == 32) - { - destword |= firstword; - } - if (xp == 32) - { - destword |= secondword; - } - - *(dword + pos) = destword; - return; - } - - if (siter < 0) - firstword = 0x0; - else - firstword = *(sword + row*wpl + siter); - - if (eiter >= wpl) - lastword = 0x0; - else - lastword = *(sword + row*wpl + eiter); - - for (i = 1; i < nwords; i++) - { - //Gets LHS words - if ((siter + i) < 0) - secondword = 0x0; - else - secondword = *(sword + row*wpl + siter + i); - - lprevword = firstword << (32 - parbitsxn) | secondword >> parbitsxn; - - firstword = secondword; - - if ((siter + i + 1) < 0) - secondword = 0x0; - else - secondword = *(sword + row*wpl + siter + i + 1); - - lnextword = firstword << (32 - parbitsxn) | secondword >> parbitsxn; - - //Gets RHS words - if ((eiter - i) >= wpl) - firstword = 0x0; - else - firstword = *(sword + row*wpl + eiter - i); - - rnextword = firstword << parbitsxp | lastword >> (32 - parbitsxp); - - lastword = firstword; - if ((eiter - i - 1) >= wpl) - firstword = 0x0; - else - firstword = *(sword + row*wpl + eiter - i - 1); - - rprevword = firstword << parbitsxp | lastword >> (32 - parbitsxp); - - for (j = 1; j < 32; j++) - { - //OR LHS full words - tempword = (lprevword << j) | (lnextword >> (32 - j)); - destword |= tempword; - - //OR RHS full words - tempword = (rprevword << j) | (rnextword >> (32 - j)); - destword |= tempword; - } - - destword |= lprevword; - destword |= lnextword; - destword |= rprevword; - destword |= rnextword; - - lastword = firstword; - firstword = secondword; - } - - *(dword + pos) = destword; -}\n -) - -KERNEL( -\n__kernel void morphoDilateHor_32word(__global int *sword,__global int *dword, - const int halfwidth, - const int wpl, const int h, - const char isEven) -{ - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int prevword, nextword, currword,tempword; - unsigned int destword; - int i; - - //Ignore the execss - if (pos >= (wpl * h)) - return; - - currword = *(sword + pos); - destword = currword; - - //Handle boundary conditions - if(col==0) - prevword=0; - else - prevword = *(sword + pos - 1); - - if(col==(wpl - 1)) - nextword=0; - else - nextword = *(sword + pos + 1); - - for (i = 1; i <= halfwidth; i++) - { - //Get the max value on LHS of every pixel - if (i == halfwidth && isEven) - { - tempword = 0x0; - } - else - { - tempword = (prevword << (32-i)) | ((currword >> i)); - } - - destword |= tempword; - - //Get max value on RHS of every pixel - tempword = (currword << i) | (nextword >> (32 - i)); - - destword |= tempword; - } - - *(dword + pos) = destword; -}\n -) - -KERNEL( -\n__kernel void morphoDilateVer(__global int *sword,__global int *dword, - const int yp, - const int wpl, const int h, - const int yn) -{ - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int tempword; - unsigned int destword; - int i, siter, eiter; - - //Ignore the execss - if (row >= h || col >= wpl) - return; - - destword = *(sword + pos); - - //Set start position and end position considering the boundary conditions - siter = (row - yn) < 0 ? 0 : (row - yn); - eiter = (row >= (h - yp)) ? (h - 1) : (row + yp); - - for (i = siter; i <= eiter; i++) - { - tempword = *(sword + i*wpl + col); - - destword |= tempword; - } - - *(dword + pos) = destword; -}\n -) - -KERNEL( -\n__kernel void morphoErodeHor_5x5(__global int *sword,__global int *dword, - const int wpl, const int h) -{ - const unsigned int pos = get_global_id(0); - unsigned int prevword, nextword, currword,tempword; - unsigned int destword; - const int col = pos % wpl; - - //Ignore the execss - if (pos >= (wpl * h)) - return; - - currword = *(sword + pos); - destword = currword; - - //Handle boundary conditions - if(col==0) - prevword=0xffffffff; - else - prevword = *(sword + pos - 1); - - if(col==(wpl - 1)) - nextword=0xffffffff; - else - nextword = *(sword + pos + 1); - - //Loop unrolled - - //1 bit to left and 1 bit to right - //Get the min value on LHS of every pixel - tempword = (prevword << (31)) | ((currword >> 1)); - destword &= tempword; - //Get min value on RHS of every pixel - tempword = (currword << 1) | (nextword >> (31)); - destword &= tempword; - - //2 bit to left and 2 bit to right - //Get the min value on LHS of every pixel - tempword = (prevword << (30)) | ((currword >> 2)); - destword &= tempword; - //Get min value on RHS of every pixel - tempword = (currword << 2) | (nextword >> (30)); - destword &= tempword; - - - *(dword + pos) = destword; - -}\n -) - -KERNEL( -\n__kernel void morphoErodeVer_5x5(__global int *sword,__global int *dword, - const int wpl, const int h, - const int fwmask, const int lwmask) -{ - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int tempword; - unsigned int destword; - int i; - - //Ignore the execss - if (row >= h || col >= wpl) - return; - - destword = *(sword + pos); - - if (row < 2 || row >= (h - 2)) - { - destword = 0x0; - } - else - { - //2 words above - //i = (row - 2) < 0 ? row : (row - 2); - i = (row - 2); - tempword = *(sword + i*wpl + col); - destword &= tempword; - - //1 word above - //i = (row - 1) < 0 ? row : (row - 1); - i = (row - 1); - tempword = *(sword + i*wpl + col); - destword &= tempword; - - //1 word below - //i = (row >= (h - 1)) ? row : (row + 1); - i = (row + 1); - tempword = *(sword + i*wpl + col); - destword &= tempword; - - //2 words below - //i = (row >= (h - 2)) ? row : (row + 2); - i = (row + 2); - tempword = *(sword + i*wpl + col); - destword &= tempword; - - if (col == 0) - { - destword &= fwmask; - } - if (col == (wpl - 1)) - { - destword &= lwmask; - } - } - - - *(dword + pos) = destword; -}\n -) - -KERNEL( -\n__kernel void morphoErodeHor(__global int *sword,__global int *dword, const int xp, const int xn, const int wpl, - const int h, const char isAsymmetric, const int rwmask, const int lwmask) -{ - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int parbitsxp, parbitsxn, nwords; - unsigned int destword, tempword, lastword, currword; - unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword; - int i, j, siter, eiter; - - //Ignore the execss - if (pos >= (wpl*h) || (xn < 1 && xp < 1)) - return; - - currword = *(sword + pos); - destword = currword; - - parbitsxp = xp & 31; - parbitsxn = xn & 31; - nwords = xp >> 5; - - if (parbitsxp > 0) - nwords += 1; - else - parbitsxp = 31; - - siter = (col - nwords); - eiter = (col + nwords); - - //Get prev word - if (col==0) - firstword = 0xffffffff; - else - firstword = *(sword + pos - 1); - - //Get next word - if (col == (wpl - 1)) - secondword = 0xffffffff; - else - secondword = *(sword + pos + 1); - - //Last partial bits on either side - for (i = 1; i <= parbitsxp; i++) - { - //Get the max value on LHS of every pixel - tempword = (firstword << (32-i)) | ((currword >> i)); - destword &= tempword; - - //Get max value on RHS of every pixel - tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0xffffffff : (currword << i) | (secondword >> (32 - i)); - - //tempword = (currword << i) | (secondword >> (32 - i)); - destword &= tempword; - } - - //Return if halfwidth <= 1 word - if (nwords == 1) - { - if (xp == 32) - { - destword &= firstword; - } - if (xn == 32) - { - destword &= secondword; - } - - //Clear boundary pixels - if (isAsymmetric) - { - if (col == 0) - destword &= rwmask; - if (col == (wpl - 1)) - destword &= lwmask; - } - - *(dword + pos) = destword; - return; - } - - if (siter < 0) - firstword = 0xffffffff; - else - firstword = *(sword + row*wpl + siter); - - if (eiter >= wpl) - lastword = 0xffffffff; - else - lastword = *(sword + row*wpl + eiter); - - - for (i = 1; i < nwords; i++) - { - //Gets LHS words - if ((siter + i) < 0) - secondword = 0xffffffff; - else - secondword = *(sword + row*wpl + siter + i); - - lprevword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp); - - firstword = secondword; - - if ((siter + i + 1) < 0) - secondword = 0xffffffff; - else - secondword = *(sword + row*wpl + siter + i + 1); - - lnextword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp); - - //Gets RHS words - if ((eiter - i) >= wpl) - firstword = 0xffffffff; - else - firstword = *(sword + row*wpl + eiter - i); - - rnextword = firstword << parbitsxn | lastword >> (32 - parbitsxn); - - lastword = firstword; - if ((eiter - i - 1) >= wpl) - firstword = 0xffffffff; - else - firstword = *(sword + row*wpl + eiter - i - 1); - - rprevword = firstword << parbitsxn | lastword >> (32 - parbitsxn); - - for (j = 0; j < 32; j++) - { - //OR LHS full words - tempword = (lprevword << j) | (lnextword >> (32 - j)); - destword &= tempword; - - //OR RHS full words - tempword = (rprevword << j) | (rnextword >> (32 - j)); - destword &= tempword; - } - - destword &= lprevword; - destword &= lnextword; - destword &= rprevword; - destword &= rnextword; - - lastword = firstword; - firstword = secondword; - } - - if (isAsymmetric) - { - //Clear boundary pixels - if (col < (nwords - 1)) - destword = 0x0; - else if (col == (nwords - 1)) - destword &= rwmask; - else if (col > (wpl - nwords)) - destword = 0x0; - else if (col == (wpl - nwords)) - destword &= lwmask; - } - - *(dword + pos) = destword; -}\n -) - -KERNEL( -\n__kernel void morphoErodeHor_32word(__global int *sword,__global int *dword, - const int halfwidth, const int wpl, - const int h, const char clearBoundPixH, - const int rwmask, const int lwmask, - const char isEven) -{ - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int prevword, nextword, currword,tempword, destword; - int i; - - //Ignore the execss - if (pos >= (wpl * h)) - return; - - currword = *(sword + pos); - destword = currword; - - //Handle boundary conditions - if(col==0) - prevword=0xffffffff; - else - prevword = *(sword + pos - 1); - - if(col==(wpl - 1)) - nextword=0xffffffff; - else - nextword = *(sword + pos + 1); - - for (i = 1; i <= halfwidth; i++) - { - //Get the min value on LHS of every pixel - tempword = (prevword << (32-i)) | ((currword >> i)); - - destword &= tempword; - - //Get min value on RHS of every pixel - if (i == halfwidth && isEven) - { - tempword = 0xffffffff; - } - else - { - tempword = (currword << i) | (nextword >> (32 - i)); - } - - destword &= tempword; - } - - if (clearBoundPixH) - { - if (col == 0) - { - destword &= rwmask; - } - else if (col == (wpl - 1)) - { - destword &= lwmask; - } - } - - *(dword + pos) = destword; -}\n -) - -KERNEL( -\n__kernel void morphoErodeVer(__global int *sword,__global int *dword, - const int yp, - const int wpl, const int h, - const char clearBoundPixV, const int yn) -{ - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int tempword, destword; - int i, siter, eiter; - - //Ignore the execss - if (row >= h || col >= wpl) - return; - - destword = *(sword + pos); - - //Set start position and end position considering the boundary conditions - siter = (row - yp) < 0 ? 0 : (row - yp); - eiter = (row >= (h - yn)) ? (h - 1) : (row + yn); - - for (i = siter; i <= eiter; i++) - { - tempword = *(sword + i*wpl + col); - - destword &= tempword; - } - - //Clear boundary pixels - if (clearBoundPixV && ((row < yp) || ((h - row) <= yn))) - { - destword = 0x0; - } - - *(dword + pos) = destword; -}\n -) - -// HistogramRect Kernel: Accumulate -// assumes 4 channels, i.e., bytes_per_pixel = 4 -// assumes number of pixels is multiple of 8 -// data is laid out as -// ch0 ch1 ... -// bin0 bin1 bin2... bin0... -// rpt0,1,2...256 rpt0,1,2... -KERNEL( -\n#define HIST_REDUNDANCY 256\n -\n#define GROUP_SIZE 256\n -\n#define HIST_SIZE 256\n -\n#define NUM_CHANNELS 4\n -\n#define HR_UNROLL_SIZE 8 \n -\n#define HR_UNROLL_TYPE uchar8 \n - -__attribute__((reqd_work_group_size(256, 1, 1))) -__kernel -void kernel_HistogramRectAllChannels( - __global const uchar8 *data, - uint numPixels, - __global uint *histBuffer) { - - // declare variables - uchar8 pixels; - int threadOffset = get_global_id(0)%HIST_REDUNDANCY; - - // for each pixel/channel, accumulate in global memory - for (uint pc = get_global_id(0); pc < numPixels*NUM_CHANNELS/HR_UNROLL_SIZE; pc += get_global_size(0)) { - pixels = data[pc]; - // channel bin thread - atomic_inc(&histBuffer[0*HIST_SIZE*HIST_REDUNDANCY + pixels.s0*HIST_REDUNDANCY + threadOffset]); // ch0 - atomic_inc(&histBuffer[0*HIST_SIZE*HIST_REDUNDANCY + pixels.s4*HIST_REDUNDANCY + threadOffset]); // ch0 - atomic_inc(&histBuffer[1*HIST_SIZE*HIST_REDUNDANCY + pixels.s1*HIST_REDUNDANCY + threadOffset]); // ch1 - atomic_inc(&histBuffer[1*HIST_SIZE*HIST_REDUNDANCY + pixels.s5*HIST_REDUNDANCY + threadOffset]); // ch1 - atomic_inc(&histBuffer[2*HIST_SIZE*HIST_REDUNDANCY + pixels.s2*HIST_REDUNDANCY + threadOffset]); // ch2 - atomic_inc(&histBuffer[2*HIST_SIZE*HIST_REDUNDANCY + pixels.s6*HIST_REDUNDANCY + threadOffset]); // ch2 - atomic_inc(&histBuffer[3*HIST_SIZE*HIST_REDUNDANCY + pixels.s3*HIST_REDUNDANCY + threadOffset]); // ch3 - atomic_inc(&histBuffer[3*HIST_SIZE*HIST_REDUNDANCY + pixels.s7*HIST_REDUNDANCY + threadOffset]); // ch3 - } -} -) - -KERNEL( -// NUM_CHANNELS = 1 -__attribute__((reqd_work_group_size(256, 1, 1))) -__kernel -void kernel_HistogramRectOneChannel( - __global const uchar8 *data, - uint numPixels, - __global uint *histBuffer) { - - // declare variables - uchar8 pixels; - int threadOffset = get_global_id(0)%HIST_REDUNDANCY; - - // for each pixel/channel, accumulate in global memory - for (uint pc = get_global_id(0); pc < numPixels/HR_UNROLL_SIZE; pc += get_global_size(0)) { - pixels = data[pc]; - // bin thread - atomic_inc(&histBuffer[pixels.s0*HIST_REDUNDANCY + threadOffset]); - atomic_inc(&histBuffer[pixels.s1*HIST_REDUNDANCY + threadOffset]); - atomic_inc(&histBuffer[pixels.s2*HIST_REDUNDANCY + threadOffset]); - atomic_inc(&histBuffer[pixels.s3*HIST_REDUNDANCY + threadOffset]); - atomic_inc(&histBuffer[pixels.s4*HIST_REDUNDANCY + threadOffset]); - atomic_inc(&histBuffer[pixels.s5*HIST_REDUNDANCY + threadOffset]); - atomic_inc(&histBuffer[pixels.s6*HIST_REDUNDANCY + threadOffset]); - atomic_inc(&histBuffer[pixels.s7*HIST_REDUNDANCY + threadOffset]); - } -} -) - -// HistogramRect Kernel: Reduction -// only supports 4 channels -// each work group handles a single channel of a single histogram bin -KERNEL( -__attribute__((reqd_work_group_size(256, 1, 1))) -__kernel -void kernel_HistogramRectAllChannelsReduction( - int n, // unused pixel redundancy - __global uint *histBuffer, - __global int* histResult) { - - // declare variables - int channel = get_group_id(0)/HIST_SIZE; - int bin = get_group_id(0)%HIST_SIZE; - int value = 0; - - // accumulate in register - for (uint i = get_local_id(0); i < HIST_REDUNDANCY; i+=GROUP_SIZE) { - value += histBuffer[ channel*HIST_SIZE*HIST_REDUNDANCY+bin*HIST_REDUNDANCY+i]; - } - - // reduction in local memory - __local int localHist[GROUP_SIZE]; - localHist[get_local_id(0)] = value; - barrier(CLK_LOCAL_MEM_FENCE); - for (int stride = GROUP_SIZE/2; stride >= 1; stride /= 2) { - if (get_local_id(0) < stride) { - value = localHist[ get_local_id(0)+stride]; - } - barrier(CLK_LOCAL_MEM_FENCE); - if (get_local_id(0) < stride) { - localHist[ get_local_id(0)] += value; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - - // write reduction to final result - if (get_local_id(0) == 0) { - histResult[get_group_id(0)] = localHist[0]; - } -} // kernel_HistogramRectAllChannels -) - - -KERNEL( -// NUM_CHANNELS = 1 -__attribute__((reqd_work_group_size(256, 1, 1))) -__kernel -void kernel_HistogramRectOneChannelReduction( - int n, // unused pixel redundancy - __global uint *histBuffer, - __global int* histResult) { - - // declare variables - // int channel = get_group_id(0)/HIST_SIZE; - int bin = get_group_id(0)%HIST_SIZE; - int value = 0; - - // accumulate in register - for (int i = get_local_id(0); i < HIST_REDUNDANCY; i+=GROUP_SIZE) { - value += histBuffer[ bin*HIST_REDUNDANCY+i]; - } - - // reduction in local memory - __local int localHist[GROUP_SIZE]; - localHist[get_local_id(0)] = value; - barrier(CLK_LOCAL_MEM_FENCE); - for (int stride = GROUP_SIZE/2; stride >= 1; stride /= 2) { - if (get_local_id(0) < stride) { - value = localHist[ get_local_id(0)+stride]; - } - barrier(CLK_LOCAL_MEM_FENCE); - if (get_local_id(0) < stride) { - localHist[ get_local_id(0)] += value; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - - // write reduction to final result - if (get_local_id(0) == 0) { - histResult[get_group_id(0)] = localHist[0]; - } -} // kernel_HistogramRectOneChannelReduction -) - -// ThresholdRectToPix Kernel -// only supports 4 channels -// imageData is input image (24-bits/pixel) -// pix is output image (1-bit/pixel) -KERNEL( -\n#define CHAR_VEC_WIDTH 4 \n -\n#define PIXELS_PER_WORD 32 \n -\n#define PIXELS_PER_BURST 8 \n -\n#define BURSTS_PER_WORD (PIXELS_PER_WORD/PIXELS_PER_BURST) \n - typedef union { - uchar s[PIXELS_PER_BURST*NUM_CHANNELS]; - uchar4 v[(PIXELS_PER_BURST*NUM_CHANNELS)/CHAR_VEC_WIDTH]; - } charVec; - -__attribute__((reqd_work_group_size(256, 1, 1))) -__kernel -void kernel_ThresholdRectToPix( - __global const uchar4 *imageData, - int height, - int width, - int wpl, // words per line - __global int *thresholds, - __global int *hi_values, - __global int *pix) { - - // declare variables - int pThresholds[NUM_CHANNELS]; - int pHi_Values[NUM_CHANNELS]; - for (int i = 0; i < NUM_CHANNELS; i++) { - pThresholds[i] = thresholds[i]; - pHi_Values[i] = hi_values[i]; - } - - // for each word (32 pixels) in output image - for (uint w = get_global_id(0); w < wpl*height; w += get_global_size(0)) { - unsigned int word = 0; // all bits start at zero - // for each burst in word - for (int b = 0; b < BURSTS_PER_WORD; b++) { - // load burst - charVec pixels; - int offset = (w / wpl) * width; - offset += (w % wpl) * PIXELS_PER_WORD; - offset += b * PIXELS_PER_BURST; - - for (int i = 0; i < PIXELS_PER_BURST; ++i) - pixels.v[i] = imageData[offset + i]; - - // for each pixel in burst - for (int p = 0; p < PIXELS_PER_BURST; p++) { - for (int c = 0; c < NUM_CHANNELS; c++) { - unsigned char pixChan = pixels.s[p*NUM_CHANNELS + c]; - if (pHi_Values[c] >= 0 && (pixChan > pThresholds[c]) == (pHi_Values[c] == 0)) { - const uint kTopBit = 0x80000000; - word |= (kTopBit >> ((b*PIXELS_PER_BURST+p)&31)); - } - } - } - } - pix[w] = word; - } -} - -\n#define CHAR_VEC_WIDTH 8 \n -\n#define PIXELS_PER_WORD 32 \n -\n#define PIXELS_PER_BURST 8 \n -\n#define BURSTS_PER_WORD (PIXELS_PER_WORD/PIXELS_PER_BURST) \n - typedef union { - uchar s[PIXELS_PER_BURST*1]; - uchar8 v[(PIXELS_PER_BURST*1)/CHAR_VEC_WIDTH]; - } charVec1; - -__attribute__((reqd_work_group_size(256, 1, 1))) -__kernel -void kernel_ThresholdRectToPix_OneChan( - __global const uchar8 *imageData, - int height, - int width, - int wpl, // words per line of output image - __global int *thresholds, - __global int *hi_values, - __global int *pix) { - - // declare variables - int pThresholds[1]; - int pHi_Values[1]; - for (int i = 0; i < 1; i++) { - pThresholds[i] = thresholds[i]; - pHi_Values[i] = hi_values[i]; - } - - // for each word (32 pixels) in output image - for (uint w = get_global_id(0); w < wpl*height; w += get_global_size(0)) { - unsigned int word = 0; // all bits start at zero - - // for each burst in word - for (int b = 0; b < BURSTS_PER_WORD; b++) { - - // load burst - charVec1 pixels; - // for each char8 in burst - pixels.v[0] = imageData[ - w*BURSTS_PER_WORD - + b - + 0 ]; - - // for each pixel in burst - for (int p = 0; p < PIXELS_PER_BURST; p++) { - - //int littleEndianIdx = p ^ 3; - //int bigEndianIdx = p; - int idx = -\n#ifdef __ENDIAN_LITTLE__\n - p ^ 3; -\n#else\n - p; -\n#endif\n - unsigned char pixChan = pixels.s[idx]; - if (pHi_Values[0] >= 0 && (pixChan > pThresholds[0]) == (pHi_Values[0] == 0)) { - const uint kTopBit = 0x80000000; - word |= (kTopBit >> ((b*PIXELS_PER_BURST+p)&31)); - } - } - } - pix[w] = word; - } -} -) - - ; // close char* - -#endif // USE_EXTERNAL_KERNEL -#endif // TESSERACT_OPENCL_OCLKERNELS_H_ -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/opencl/opencl_device_selection.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/opencl/opencl_device_selection.h deleted file mode 100644 index 0e538e90..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/opencl/opencl_device_selection.h +++ /dev/null @@ -1,48 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef DEVICE_SELECTION_H -#define DEVICE_SELECTION_H - -#ifdef USE_OPENCL - -#ifdef _MSC_VER -#define _CRT_SECURE_NO_WARNINGS -#endif - -#include -#include -#include - -#ifdef __APPLE__ -#include -#else -#include -#endif - -struct TessDeviceScore; - -// device type -enum ds_device_type { - DS_DEVICE_NATIVE_CPU = 0, - DS_DEVICE_OPENCL_DEVICE -}; - -struct ds_device { - ds_device_type type; - cl_device_id oclDeviceID; - char* oclDeviceName; - char* oclDriverVersion; - // a pointer to the score data, the content/format is application defined. - TessDeviceScore* score; -}; - -#endif // USE_OPENCL -#endif // DEVICE_SELECTION_H diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/opencl/openclwrapper.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/opencl/openclwrapper.cpp deleted file mode 100644 index cf2be4bf..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/opencl/openclwrapper.cpp +++ /dev/null @@ -1,2637 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#ifdef _WIN32 -#include -#else -#include -#include -#endif -#include - -#include "oclkernels.h" -#include "openclwrapper.h" - -// for micro-benchmark -#include "otsuthr.h" -#include "thresholder.h" - -#if ON_APPLE -#include -#endif - -#ifdef USE_OPENCL - -#include -#include - -#include "errcode.h" // for ASSERT_HOST -#include "opencl_device_selection.h" - -GPUEnv OpenclDevice::gpuEnv; - -bool OpenclDevice::deviceIsSelected = false; -ds_device OpenclDevice::selectedDevice; - -int OpenclDevice::isInited = 0; - -static l_int32 MORPH_BC = ASYMMETRIC_MORPH_BC; - -static const l_uint32 lmask32[] = { - 0x80000000, 0xc0000000, 0xe0000000, 0xf0000000, 0xf8000000, 0xfc000000, - 0xfe000000, 0xff000000, 0xff800000, 0xffc00000, 0xffe00000, 0xfff00000, - 0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000, 0xffff8000, 0xffffc000, - 0xffffe000, 0xfffff000, 0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00, - 0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0, 0xfffffff8, 0xfffffffc, - 0xfffffffe, 0xffffffff}; - -static const l_uint32 rmask32[] = { - 0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f, - 0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, - 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 0x0001ffff, 0x0003ffff, - 0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, - 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff, - 0x7fffffff, 0xffffffff}; - -static cl_mem pixsCLBuffer, pixdCLBuffer, - pixdCLIntermediate; // Morph operations buffers -static cl_mem pixThBuffer; // output from thresholdtopix calculation -static cl_int clStatus; -static KernelEnv rEnv; - -#define DS_TAG_VERSION "" -#define DS_TAG_VERSION_END "" -#define DS_TAG_DEVICE "" -#define DS_TAG_DEVICE_END "" -#define DS_TAG_SCORE "" -#define DS_TAG_SCORE_END "" -#define DS_TAG_DEVICE_TYPE "" -#define DS_TAG_DEVICE_TYPE_END "" -#define DS_TAG_DEVICE_NAME "" -#define DS_TAG_DEVICE_NAME_END "" -#define DS_TAG_DEVICE_DRIVER_VERSION "" -#define DS_TAG_DEVICE_DRIVER_VERSION_END "" - -#define DS_DEVICE_NATIVE_CPU_STRING "native_cpu" - -#define DS_DEVICE_NAME_LENGTH 256 - -enum ds_evaluation_type { DS_EVALUATE_ALL, DS_EVALUATE_NEW_ONLY }; - -struct ds_profile { - std::vector devices; - unsigned int numDevices; - const char* version; -}; - -enum ds_status { - DS_SUCCESS = 0, - DS_INVALID_PROFILE = 1000, - DS_MEMORY_ERROR, - DS_INVALID_PERF_EVALUATOR_TYPE, - DS_INVALID_PERF_EVALUATOR, - DS_PERF_EVALUATOR_ERROR, - DS_FILE_ERROR, - DS_UNKNOWN_DEVICE_TYPE, - DS_PROFILE_FILE_ERROR, - DS_SCORE_SERIALIZER_ERROR, - DS_SCORE_DESERIALIZER_ERROR -}; - -// Pointer to a function that calculates the score of a device (ex: -// device->score) update the data size of score. The encoding and the format -// of the score data is implementation defined. The function should return -// DS_SUCCESS if there's no error to be reported. -typedef ds_status (*ds_perf_evaluator)(ds_device* device, void* data); - -// deallocate memory used by score -typedef ds_status (*ds_score_release)(TessDeviceScore* score); - -static ds_status releaseDSProfile(ds_profile* profile, ds_score_release sr) { - ds_status status = DS_SUCCESS; - if (profile != nullptr) { - if (sr != nullptr) { - unsigned int i; - for (i = 0; i < profile->numDevices; i++) { - free(profile->devices[i].oclDeviceName); - free(profile->devices[i].oclDriverVersion); - status = sr(profile->devices[i].score); - if (status != DS_SUCCESS) break; - } - } - delete profile; - } - return status; -} - -static ds_status initDSProfile(ds_profile** p, const char* version) { - int numDevices; - cl_uint numPlatforms; - std::vector platforms; - std::vector devices; - ds_status status = DS_SUCCESS; - unsigned int next; - unsigned int i; - - if (p == nullptr) return DS_INVALID_PROFILE; - - ds_profile* profile = new ds_profile; - - memset(profile, 0, sizeof(ds_profile)); - - clGetPlatformIDs(0, nullptr, &numPlatforms); - - if (numPlatforms > 0) { - platforms.reserve(numPlatforms); - clGetPlatformIDs(numPlatforms, &platforms[0], nullptr); - } - - numDevices = 0; - for (i = 0; i < numPlatforms; i++) { - cl_uint num; - clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, nullptr, &num); - numDevices += num; - } - - if (numDevices > 0) { - devices.reserve(numDevices); - } - - profile->numDevices = - numDevices + 1; // +1 to numDevices to include the native CPU - profile->devices.reserve(profile->numDevices); - memset(&profile->devices[0], 0, profile->numDevices * sizeof(ds_device)); - - next = 0; - for (i = 0; i < numPlatforms; i++) { - cl_uint num; - unsigned j; - clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numDevices, &devices[0], &num); - for (j = 0; j < num; j++, next++) { - char buffer[DS_DEVICE_NAME_LENGTH]; - size_t length; - - profile->devices[next].type = DS_DEVICE_OPENCL_DEVICE; - profile->devices[next].oclDeviceID = devices[j]; - - clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME, - DS_DEVICE_NAME_LENGTH, &buffer, nullptr); - length = strlen(buffer); - profile->devices[next].oclDeviceName = (char*)malloc(length + 1); - memcpy(profile->devices[next].oclDeviceName, buffer, length + 1); - - clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION, - DS_DEVICE_NAME_LENGTH, &buffer, nullptr); - length = strlen(buffer); - profile->devices[next].oclDriverVersion = (char*)malloc(length + 1); - memcpy(profile->devices[next].oclDriverVersion, buffer, length + 1); - } - } - profile->devices[next].type = DS_DEVICE_NATIVE_CPU; - profile->version = version; - - *p = profile; - return status; -} - -static ds_status profileDevices(ds_profile* profile, - const ds_evaluation_type type, - ds_perf_evaluator evaluator, - void* evaluatorData, unsigned int* numUpdates) { - ds_status status = DS_SUCCESS; - unsigned int i; - unsigned int updates = 0; - - if (profile == nullptr) { - return DS_INVALID_PROFILE; - } - if (evaluator == nullptr) { - return DS_INVALID_PERF_EVALUATOR; - } - - for (i = 0; i < profile->numDevices; i++) { - ds_status evaluatorStatus; - - switch (type) { - case DS_EVALUATE_NEW_ONLY: - if (profile->devices[i].score != nullptr) break; - // else fall through - case DS_EVALUATE_ALL: - evaluatorStatus = evaluator(&profile->devices[i], evaluatorData); - if (evaluatorStatus != DS_SUCCESS) { - status = evaluatorStatus; - return status; - } - updates++; - break; - default: - return DS_INVALID_PERF_EVALUATOR_TYPE; - break; - }; - } - if (numUpdates) *numUpdates = updates; - return status; -} - -static const char* findString(const char* contentStart, const char* contentEnd, - const char* string) { - size_t stringLength; - const char* currentPosition; - const char* found = nullptr; - stringLength = strlen(string); - currentPosition = contentStart; - for (currentPosition = contentStart; currentPosition < contentEnd; - currentPosition++) { - if (*currentPosition == string[0]) { - if (currentPosition + stringLength < contentEnd) { - if (strncmp(currentPosition, string, stringLength) == 0) { - found = currentPosition; - break; - } - } - } - } - return found; -} - -static ds_status readProFile(const char* fileName, char** content, - size_t* contentSize) { - *contentSize = 0; - *content = nullptr; - ds_status status = DS_SUCCESS; - FILE* input = fopen(fileName, "rb"); - if (input == nullptr) { - status = DS_FILE_ERROR; - } else { - fseek(input, 0L, SEEK_END); - long pos = ftell(input); - rewind(input); - if (pos > 0) { - size_t size = pos; - char *binary = new char[size]; - if (fread(binary, sizeof(char), size, input) != size) { - status = DS_FILE_ERROR; - delete[] binary; - } else { - *contentSize = size; - *content = binary; - } - } - fclose(input); - } - return status; -} - -typedef ds_status (*ds_score_deserializer)(ds_device* device, - const unsigned char* serializedScore, - unsigned int serializedScoreSize); - -static ds_status readProfileFromFile(ds_profile* profile, - ds_score_deserializer deserializer, - const char* file) { - ds_status status = DS_SUCCESS; - char* contentStart; - size_t contentSize; - - if (profile == nullptr) return DS_INVALID_PROFILE; - - status = readProFile(file, &contentStart, &contentSize); - if (status == DS_SUCCESS) { - const char* currentPosition; - const char* dataStart; - const char* dataEnd; - - const char* contentEnd = contentStart + contentSize; - currentPosition = contentStart; - - // parse the version string - dataStart = findString(currentPosition, contentEnd, DS_TAG_VERSION); - if (dataStart == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - dataStart += strlen(DS_TAG_VERSION); - - dataEnd = findString(dataStart, contentEnd, DS_TAG_VERSION_END); - if (dataEnd == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - - size_t versionStringLength = strlen(profile->version); - if (versionStringLength + dataStart != dataEnd || - strncmp(profile->version, dataStart, versionStringLength) != 0) { - // version mismatch - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - currentPosition = dataEnd + strlen(DS_TAG_VERSION_END); - - // parse the device information - while (1) { - unsigned int i; - - const char* deviceTypeStart; - const char* deviceTypeEnd; - ds_device_type deviceType; - - const char* deviceNameStart; - const char* deviceNameEnd; - - const char* deviceScoreStart; - const char* deviceScoreEnd; - - const char* deviceDriverStart; - const char* deviceDriverEnd; - - dataStart = findString(currentPosition, contentEnd, DS_TAG_DEVICE); - if (dataStart == nullptr) { - // nothing useful remain, quit... - break; - } - dataStart += strlen(DS_TAG_DEVICE); - dataEnd = findString(dataStart, contentEnd, DS_TAG_DEVICE_END); - if (dataEnd == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - - // parse the device type - deviceTypeStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_TYPE); - if (deviceTypeStart == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - deviceTypeStart += strlen(DS_TAG_DEVICE_TYPE); - deviceTypeEnd = - findString(deviceTypeStart, contentEnd, DS_TAG_DEVICE_TYPE_END); - if (deviceTypeEnd == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - memcpy(&deviceType, deviceTypeStart, sizeof(ds_device_type)); - - // parse the device name - if (deviceType == DS_DEVICE_OPENCL_DEVICE) { - deviceNameStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_NAME); - if (deviceNameStart == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - deviceNameStart += strlen(DS_TAG_DEVICE_NAME); - deviceNameEnd = - findString(deviceNameStart, contentEnd, DS_TAG_DEVICE_NAME_END); - if (deviceNameEnd == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - - deviceDriverStart = - findString(dataStart, contentEnd, DS_TAG_DEVICE_DRIVER_VERSION); - if (deviceDriverStart == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - deviceDriverStart += strlen(DS_TAG_DEVICE_DRIVER_VERSION); - deviceDriverEnd = findString(deviceDriverStart, contentEnd, - DS_TAG_DEVICE_DRIVER_VERSION_END); - if (deviceDriverEnd == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - - // check if this device is on the system - for (i = 0; i < profile->numDevices; i++) { - if (profile->devices[i].type == DS_DEVICE_OPENCL_DEVICE) { - size_t actualDeviceNameLength; - size_t driverVersionLength; - - actualDeviceNameLength = strlen(profile->devices[i].oclDeviceName); - driverVersionLength = strlen(profile->devices[i].oclDriverVersion); - if (deviceNameStart + actualDeviceNameLength == deviceNameEnd && - deviceDriverStart + driverVersionLength == deviceDriverEnd && - strncmp(profile->devices[i].oclDeviceName, deviceNameStart, - actualDeviceNameLength) == 0 && - strncmp(profile->devices[i].oclDriverVersion, deviceDriverStart, - driverVersionLength) == 0) { - deviceScoreStart = - findString(dataStart, contentEnd, DS_TAG_SCORE); - deviceScoreStart += strlen(DS_TAG_SCORE); - deviceScoreEnd = - findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END); - status = deserializer(&profile->devices[i], - (const unsigned char*)deviceScoreStart, - deviceScoreEnd - deviceScoreStart); - if (status != DS_SUCCESS) { - goto cleanup; - } - } - } - } - } else if (deviceType == DS_DEVICE_NATIVE_CPU) { - for (i = 0; i < profile->numDevices; i++) { - if (profile->devices[i].type == DS_DEVICE_NATIVE_CPU) { - deviceScoreStart = findString(dataStart, contentEnd, DS_TAG_SCORE); - if (deviceScoreStart == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - deviceScoreStart += strlen(DS_TAG_SCORE); - deviceScoreEnd = - findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END); - status = deserializer(&profile->devices[i], - (const unsigned char*)deviceScoreStart, - deviceScoreEnd - deviceScoreStart); - if (status != DS_SUCCESS) { - goto cleanup; - } - } - } - } - - // skip over the current one to find the next device - currentPosition = dataEnd + strlen(DS_TAG_DEVICE_END); - } - } -cleanup: - delete[] contentStart; - return status; -} - -typedef ds_status (*ds_score_serializer)(ds_device* device, - void** serializedScore, - unsigned int* serializedScoreSize); -static ds_status writeProfileToFile(ds_profile* profile, - ds_score_serializer serializer, - const char* file) { - ds_status status = DS_SUCCESS; - - if (profile == nullptr) return DS_INVALID_PROFILE; - - FILE* profileFile = fopen(file, "wb"); - if (profileFile == nullptr) { - status = DS_FILE_ERROR; - } else { - unsigned int i; - - // write version string - fwrite(DS_TAG_VERSION, sizeof(char), strlen(DS_TAG_VERSION), profileFile); - fwrite(profile->version, sizeof(char), strlen(profile->version), - profileFile); - fwrite(DS_TAG_VERSION_END, sizeof(char), strlen(DS_TAG_VERSION_END), - profileFile); - fwrite("\n", sizeof(char), 1, profileFile); - - for (i = 0; i < profile->numDevices && status == DS_SUCCESS; i++) { - void* serializedScore; - unsigned int serializedScoreSize; - - fwrite(DS_TAG_DEVICE, sizeof(char), strlen(DS_TAG_DEVICE), profileFile); - - fwrite(DS_TAG_DEVICE_TYPE, sizeof(char), strlen(DS_TAG_DEVICE_TYPE), - profileFile); - fwrite(&profile->devices[i].type, sizeof(ds_device_type), 1, profileFile); - fwrite(DS_TAG_DEVICE_TYPE_END, sizeof(char), - strlen(DS_TAG_DEVICE_TYPE_END), profileFile); - - switch (profile->devices[i].type) { - case DS_DEVICE_NATIVE_CPU: { - // There's no need to emit a device name for the native CPU device. - /* - fwrite(DS_TAG_DEVICE_NAME, sizeof(char), strlen(DS_TAG_DEVICE_NAME), - profileFile); - fwrite(DS_DEVICE_NATIVE_CPU_STRING,sizeof(char), - strlen(DS_DEVICE_NATIVE_CPU_STRING), profileFile); - fwrite(DS_TAG_DEVICE_NAME_END, sizeof(char), - strlen(DS_TAG_DEVICE_NAME_END), profileFile); - */ - } break; - case DS_DEVICE_OPENCL_DEVICE: { - fwrite(DS_TAG_DEVICE_NAME, sizeof(char), strlen(DS_TAG_DEVICE_NAME), - profileFile); - fwrite(profile->devices[i].oclDeviceName, sizeof(char), - strlen(profile->devices[i].oclDeviceName), profileFile); - fwrite(DS_TAG_DEVICE_NAME_END, sizeof(char), - strlen(DS_TAG_DEVICE_NAME_END), profileFile); - - fwrite(DS_TAG_DEVICE_DRIVER_VERSION, sizeof(char), - strlen(DS_TAG_DEVICE_DRIVER_VERSION), profileFile); - fwrite(profile->devices[i].oclDriverVersion, sizeof(char), - strlen(profile->devices[i].oclDriverVersion), profileFile); - fwrite(DS_TAG_DEVICE_DRIVER_VERSION_END, sizeof(char), - strlen(DS_TAG_DEVICE_DRIVER_VERSION_END), profileFile); - } break; - default: - status = DS_UNKNOWN_DEVICE_TYPE; - continue; - }; - - fwrite(DS_TAG_SCORE, sizeof(char), strlen(DS_TAG_SCORE), profileFile); - status = serializer(&profile->devices[i], &serializedScore, - &serializedScoreSize); - if (status == DS_SUCCESS && serializedScore != nullptr && - serializedScoreSize > 0) { - fwrite(serializedScore, sizeof(char), serializedScoreSize, profileFile); - free(serializedScore); - } - fwrite(DS_TAG_SCORE_END, sizeof(char), strlen(DS_TAG_SCORE_END), - profileFile); - fwrite(DS_TAG_DEVICE_END, sizeof(char), strlen(DS_TAG_DEVICE_END), - profileFile); - fwrite("\n", sizeof(char), 1, profileFile); - } - fclose(profileFile); - } - return status; -} - -// substitute invalid characters in device name with _ -static void legalizeFileName(char* fileName) { - // tprintf("fileName: %s\n", fileName); - const char* invalidChars = - "/\?:*\"><| "; // space is valid but can cause headaches - // for each invalid char - for (unsigned i = 0; i < strlen(invalidChars); i++) { - char invalidStr[4]; - invalidStr[0] = invalidChars[i]; - invalidStr[1] = '\0'; - // tprintf("eliminating %s\n", invalidStr); - // char *pos = strstr(fileName, invalidStr); - // initial ./ is valid for present directory - // if (*pos == '.') pos++; - // if (*pos == '/') pos++; - for (char* pos = strstr(fileName, invalidStr); pos != nullptr; - pos = strstr(pos + 1, invalidStr)) { - // tprintf("\tfound: %s, ", pos); - pos[0] = '_'; - // tprintf("fileName: %s\n", fileName); - } - } -} - -static void populateGPUEnvFromDevice(GPUEnv* gpuInfo, cl_device_id device) { - // tprintf("[DS] populateGPUEnvFromDevice\n"); - size_t size; - gpuInfo->mnIsUserCreated = 1; - // device - gpuInfo->mpDevID = device; - gpuInfo->mpArryDevsID = new cl_device_id[1]; - gpuInfo->mpArryDevsID[0] = gpuInfo->mpDevID; - clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_TYPE, - sizeof(cl_device_type), &gpuInfo->mDevType, &size); - CHECK_OPENCL(clStatus, "populateGPUEnv::getDeviceInfo(TYPE)"); - // platform - clStatus = - clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &gpuInfo->mpPlatformID, &size); - CHECK_OPENCL(clStatus, "populateGPUEnv::getDeviceInfo(PLATFORM)"); - // context - cl_context_properties props[3]; - props[0] = CL_CONTEXT_PLATFORM; - props[1] = (cl_context_properties)gpuInfo->mpPlatformID; - props[2] = 0; - gpuInfo->mpContext = - clCreateContext(props, 1, &gpuInfo->mpDevID, nullptr, nullptr, &clStatus); - CHECK_OPENCL(clStatus, "populateGPUEnv::createContext"); - // queue - cl_command_queue_properties queueProperties = 0; - gpuInfo->mpCmdQueue = clCreateCommandQueue( - gpuInfo->mpContext, gpuInfo->mpDevID, queueProperties, &clStatus); - CHECK_OPENCL(clStatus, "populateGPUEnv::createCommandQueue"); -} - -int OpenclDevice::LoadOpencl() { -#ifdef WIN32 - HINSTANCE HOpenclDll = nullptr; - void* OpenclDll = nullptr; - // fprintf(stderr, " LoadOpenclDllxx... \n"); - OpenclDll = static_cast(HOpenclDll); - OpenclDll = LoadLibrary("openCL.dll"); - if (!static_cast(OpenclDll)) { - fprintf(stderr, "[OD] Load opencl.dll failed!\n"); - FreeLibrary(static_cast(OpenclDll)); - return 0; - } - fprintf(stderr, "[OD] Load opencl.dll successful!\n"); -#endif - return 1; -} -int OpenclDevice::SetKernelEnv(KernelEnv* envInfo) { - envInfo->mpkContext = gpuEnv.mpContext; - envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue; - envInfo->mpkProgram = gpuEnv.mpArryPrograms[0]; - - return 1; -} - -static cl_mem allocateZeroCopyBuffer(const KernelEnv& rEnv, - l_uint32* hostbuffer, size_t nElements, - cl_mem_flags flags, cl_int* pStatus) { - cl_mem membuffer = - clCreateBuffer(rEnv.mpkContext, (cl_mem_flags)(flags), - nElements * sizeof(l_uint32), hostbuffer, pStatus); - - return membuffer; -} - -static Pix* mapOutputCLBuffer(const KernelEnv& rEnv, cl_mem clbuffer, Pix* pixd, - Pix* pixs, int elements, cl_mem_flags flags, - bool memcopy = false, bool sync = true) { - if (!pixd) { - if (memcopy) { - if ((pixd = pixCreateTemplate(pixs)) == nullptr) - tprintf("pixd not made\n"); - } else { - if ((pixd = pixCreateHeader(pixGetWidth(pixs), pixGetHeight(pixs), - pixGetDepth(pixs))) == nullptr) - tprintf("pixd not made\n"); - } - } - l_uint32* pValues = (l_uint32*)clEnqueueMapBuffer( - rEnv.mpkCmdQueue, clbuffer, CL_TRUE, flags, 0, - elements * sizeof(l_uint32), 0, nullptr, nullptr, nullptr); - - if (memcopy) { - memcpy(pixGetData(pixd), pValues, elements * sizeof(l_uint32)); - } else { - pixSetData(pixd, pValues); - } - - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, clbuffer, pValues, 0, nullptr, - nullptr); - - if (sync) { - clFinish(rEnv.mpkCmdQueue); - } - - return pixd; -} - -void OpenclDevice::releaseMorphCLBuffers() { - if (pixdCLIntermediate != nullptr) clReleaseMemObject(pixdCLIntermediate); - if (pixsCLBuffer != nullptr) clReleaseMemObject(pixsCLBuffer); - if (pixdCLBuffer != nullptr) clReleaseMemObject(pixdCLBuffer); - if (pixThBuffer != nullptr) clReleaseMemObject(pixThBuffer); - pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = nullptr; -} - -int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix* pixs) { - SetKernelEnv(&rEnv); - - if (pixThBuffer != nullptr) { - pixsCLBuffer = allocateZeroCopyBuffer(rEnv, nullptr, wpl * h, - CL_MEM_ALLOC_HOST_PTR, &clStatus); - - // Get the output from ThresholdToPix operation - clStatus = - clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixThBuffer, pixsCLBuffer, 0, 0, - sizeof(l_uint32) * wpl * h, 0, nullptr, nullptr); - } else { - // Get data from the source image - l_uint32* srcdata = - reinterpret_cast(malloc(wpl * h * sizeof(l_uint32))); - memcpy(srcdata, pixGetData(pixs), wpl * h * sizeof(l_uint32)); - - pixsCLBuffer = allocateZeroCopyBuffer(rEnv, srcdata, wpl * h, - CL_MEM_USE_HOST_PTR, &clStatus); - } - - pixdCLBuffer = allocateZeroCopyBuffer(rEnv, nullptr, wpl * h, - CL_MEM_ALLOC_HOST_PTR, &clStatus); - - pixdCLIntermediate = allocateZeroCopyBuffer(rEnv, nullptr, wpl * h, - CL_MEM_ALLOC_HOST_PTR, &clStatus); - - return (int)clStatus; -} - -int OpenclDevice::InitEnv() { -// PERF_COUNT_START("OD::InitEnv") -// tprintf("[OD] OpenclDevice::InitEnv()\n"); -#ifdef SAL_WIN32 - while (1) { - if (1 == LoadOpencl()) break; - } - PERF_COUNT_SUB("LoadOpencl") -#endif - // sets up environment, compiles programs - - InitOpenclRunEnv_DeviceSelection(0); - // PERF_COUNT_SUB("called InitOpenclRunEnv_DS") - // PERF_COUNT_END - return 1; -} - -int OpenclDevice::ReleaseOpenclRunEnv() { - ReleaseOpenclEnv(&gpuEnv); -#ifdef SAL_WIN32 - FreeOpenclDll(); -#endif - return 1; -} - -inline int OpenclDevice::AddKernelConfig(int kCount, const char* kName) { - ASSERT_HOST(kCount > 0); - ASSERT_HOST(strlen(kName) < sizeof(gpuEnv.mArrykernelNames[kCount - 1])); - strcpy(gpuEnv.mArrykernelNames[kCount - 1], kName); - gpuEnv.mnKernelCount++; - return 0; -} - -int OpenclDevice::RegistOpenclKernel() { - if (!gpuEnv.mnIsUserCreated) memset(&gpuEnv, 0, sizeof(gpuEnv)); - - gpuEnv.mnFileCount = 0; // argc; - gpuEnv.mnKernelCount = 0UL; - - AddKernelConfig(1, "oclAverageSub1"); - return 0; -} - -int OpenclDevice::InitOpenclRunEnv_DeviceSelection(int argc) { - // PERF_COUNT_START("InitOpenclRunEnv_DS") - if (!isInited) { - // after programs compiled, selects best device - ds_device bestDevice_DS = getDeviceSelection(); - // PERF_COUNT_SUB("called getDeviceSelection()") - cl_device_id bestDevice = bestDevice_DS.oclDeviceID; - // overwrite global static GPUEnv with new device - if (selectedDeviceIsOpenCL()) { - // tprintf("[DS] InitOpenclRunEnv_DS::Calling populateGPUEnvFromDevice() - // for selected device\n"); - populateGPUEnvFromDevice(&gpuEnv, bestDevice); - gpuEnv.mnFileCount = 0; // argc; - gpuEnv.mnKernelCount = 0UL; - // PERF_COUNT_SUB("populate gpuEnv") - CompileKernelFile(&gpuEnv, ""); - // PERF_COUNT_SUB("CompileKernelFile") - } else { - // tprintf("[DS] InitOpenclRunEnv_DS::Skipping populateGPUEnvFromDevice() - // b/c native cpu selected\n"); - } - isInited = 1; - } - // PERF_COUNT_END - return 0; -} - -OpenclDevice::OpenclDevice() { - // InitEnv(); -} - -OpenclDevice::~OpenclDevice() { - // ReleaseOpenclRunEnv(); -} - -int OpenclDevice::ReleaseOpenclEnv(GPUEnv* gpuInfo) { - int i = 0; - int clStatus = 0; - - if (!isInited) { - return 1; - } - - for (i = 0; i < gpuEnv.mnFileCount; i++) { - if (gpuEnv.mpArryPrograms[i]) { - clStatus = clReleaseProgram(gpuEnv.mpArryPrograms[i]); - CHECK_OPENCL(clStatus, "clReleaseProgram"); - gpuEnv.mpArryPrograms[i] = nullptr; - } - } - if (gpuEnv.mpCmdQueue) { - clReleaseCommandQueue(gpuEnv.mpCmdQueue); - gpuEnv.mpCmdQueue = nullptr; - } - if (gpuEnv.mpContext) { - clReleaseContext(gpuEnv.mpContext); - gpuEnv.mpContext = nullptr; - } - isInited = 0; - gpuInfo->mnIsUserCreated = 0; - delete[] gpuInfo->mpArryDevsID; - return 1; -} -int OpenclDevice::BinaryGenerated(const char* clFileName, FILE** fhandle) { - unsigned int i = 0; - cl_int clStatus; - int status = 0; - FILE* fd = nullptr; - char fileName[256] = {0}, cl_name[128] = {0}; - char deviceName[1024]; - clStatus = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME, - sizeof(deviceName), deviceName, nullptr); - CHECK_OPENCL(clStatus, "clGetDeviceInfo"); - const char* str = strstr(clFileName, ".cl"); - memcpy(cl_name, clFileName, str - clFileName); - cl_name[str - clFileName] = '\0'; - sprintf(fileName, "%s-%s.bin", cl_name, deviceName); - legalizeFileName(fileName); - fd = fopen(fileName, "rb"); - status = (fd != nullptr) ? 1 : 0; - if (fd != nullptr) { - *fhandle = fd; - } - return status; -} -int OpenclDevice::CachedOfKernerPrg(const GPUEnv* gpuEnvCached, - const char* clFileName) { - int i; - for (i = 0; i < gpuEnvCached->mnFileCount; i++) { - if (strcasecmp(gpuEnvCached->mArryKnelSrcFile[i], clFileName) == 0) { - if (gpuEnvCached->mpArryPrograms[i] != nullptr) { - return 1; - } - } - } - - return 0; -} -int OpenclDevice::WriteBinaryToFile(const char* fileName, const char* birary, - size_t numBytes) { - FILE* output = nullptr; - output = fopen(fileName, "wb"); - if (output == nullptr) { - return 0; - } - - fwrite(birary, sizeof(char), numBytes, output); - fclose(output); - - return 1; -} - -int OpenclDevice::GeneratBinFromKernelSource(cl_program program, - const char* clFileName) { - unsigned int i = 0; - cl_int clStatus; - cl_uint numDevices; - - clStatus = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, - sizeof(numDevices), &numDevices, nullptr); - CHECK_OPENCL(clStatus, "clGetProgramInfo"); - - std::vector mpArryDevsID(numDevices); - - /* grab the handles to all of the devices in the program. */ - clStatus = clGetProgramInfo(program, CL_PROGRAM_DEVICES, - sizeof(cl_device_id) * numDevices, - &mpArryDevsID[0], - nullptr); - CHECK_OPENCL(clStatus, "clGetProgramInfo"); - - /* figure out the sizes of each of the binaries. */ - std::vector binarySizes(numDevices); - - clStatus = - clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, - sizeof(size_t) * numDevices, &binarySizes[0], nullptr); - CHECK_OPENCL(clStatus, "clGetProgramInfo"); - - /* copy over all of the generated binaries. */ - std::vector binaries(numDevices); - - for (i = 0; i < numDevices; i++) { - if (binarySizes[i] != 0) { - binaries[i] = new char[binarySizes[i]]; - } else { - binaries[i] = nullptr; - } - } - - clStatus = - clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(char*) * numDevices, - &binaries[0], nullptr); - CHECK_OPENCL(clStatus, "clGetProgramInfo"); - - /* dump out each binary into its own separate file. */ - for (i = 0; i < numDevices; i++) { - char fileName[256] = {0}, cl_name[128] = {0}; - - if (binarySizes[i] != 0) { - char deviceName[1024]; - clStatus = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME, - sizeof(deviceName), deviceName, nullptr); - CHECK_OPENCL(clStatus, "clGetDeviceInfo"); - - const char* str = strstr(clFileName, ".cl"); - memcpy(cl_name, clFileName, str - clFileName); - cl_name[str - clFileName] = '\0'; - sprintf(fileName, "%s-%s.bin", cl_name, deviceName); - legalizeFileName(fileName); - if (!WriteBinaryToFile(fileName, binaries[i], binarySizes[i])) { - tprintf("[OD] write binary[%s] failed\n", fileName); - return 0; - } // else - tprintf("[OD] write binary[%s] successfully\n", fileName); - } - } - - // Release all resources and memory - for (i = 0; i < numDevices; i++) { - delete[] binaries[i]; - } - - return 1; -} - -int OpenclDevice::CompileKernelFile(GPUEnv* gpuInfo, const char* buildOption) { - // PERF_COUNT_START("CompileKernelFile") - cl_int clStatus = 0; - const char* source; - size_t source_size[1]; - int binary_status, binaryExisted, idx; - cl_uint numDevices; - FILE *fd, *fd1; - const char* filename = "kernel.cl"; - // fprintf(stderr, "[OD] CompileKernelFile ... \n"); - if (CachedOfKernerPrg(gpuInfo, filename) == 1) { - return 1; - } - - idx = gpuInfo->mnFileCount; - - source = kernel_src; - - source_size[0] = strlen(source); - binaryExisted = 0; - binaryExisted = BinaryGenerated( - filename, &fd); // don't check for binary during microbenchmark - // PERF_COUNT_SUB("BinaryGenerated") - if (binaryExisted == 1) { - clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES, - sizeof(numDevices), &numDevices, nullptr); - CHECK_OPENCL(clStatus, "clGetContextInfo"); - - std::vector mpArryDevsID(numDevices); - // PERF_COUNT_SUB("get numDevices") - bool b_error = fseek(fd, 0, SEEK_END) < 0; - long pos = ftell(fd); - b_error |= (pos <= 0); - size_t length = pos; - b_error |= fseek(fd, 0, SEEK_SET) < 0; - if (b_error) { - fclose(fd); - return 0; - } - - std::vector binary(length + 2); - - memset(&binary[0], 0, length + 2); - b_error |= fread(&binary[0], 1, length, fd) != length; - - fclose(fd); - // PERF_COUNT_SUB("read file") - fd = nullptr; - // grab the handles to all of the devices in the context. - clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, - sizeof(cl_device_id) * numDevices, - &mpArryDevsID[0], nullptr); - CHECK_OPENCL(clStatus, "clGetContextInfo"); - // PERF_COUNT_SUB("get devices") - // fprintf(stderr, "[OD] Create kernel from binary\n"); - const uint8_t* c_binary = &binary[0]; - gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( - gpuInfo->mpContext, numDevices, &mpArryDevsID[0], &length, &c_binary, - &binary_status, &clStatus); - CHECK_OPENCL(clStatus, "clCreateProgramWithBinary"); - // PERF_COUNT_SUB("clCreateProgramWithBinary") - // PERF_COUNT_SUB("binaryExisted") - } else { - // create a CL program using the kernel source - // fprintf(stderr, "[OD] Create kernel from source\n"); - gpuInfo->mpArryPrograms[idx] = clCreateProgramWithSource( - gpuInfo->mpContext, 1, &source, source_size, &clStatus); - CHECK_OPENCL(clStatus, "clCreateProgramWithSource"); - // PERF_COUNT_SUB("!binaryExisted") - } - - if (gpuInfo->mpArryPrograms[idx] == (cl_program) nullptr) { - return 0; - } - - // char options[512]; - // create a cl program executable for all the devices specified - // tprintf("[OD] BuildProgram.\n"); - PERF_COUNT_START("OD::CompileKernel::clBuildProgram") - if (!gpuInfo->mnIsUserCreated) { - clStatus = - clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID, - buildOption, nullptr, nullptr); - // PERF_COUNT_SUB("clBuildProgram notUserCreated") - } else { - clStatus = - clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID), - buildOption, nullptr, nullptr); - // PERF_COUNT_SUB("clBuildProgram isUserCreated") - } - PERF_COUNT_END - if (clStatus != CL_SUCCESS) { - tprintf("BuildProgram error!\n"); - size_t length; - if (!gpuInfo->mnIsUserCreated) { - clStatus = clGetProgramBuildInfo( - gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0], - CL_PROGRAM_BUILD_LOG, 0, nullptr, &length); - } else { - clStatus = - clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID, - CL_PROGRAM_BUILD_LOG, 0, nullptr, &length); - } - if (clStatus != CL_SUCCESS) { - tprintf("opencl create build log fail\n"); - return 0; - } - std::vector buildLog(length); - if (!gpuInfo->mnIsUserCreated) { - clStatus = clGetProgramBuildInfo( - gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0], - CL_PROGRAM_BUILD_LOG, length, &buildLog[0], &length); - } else { - clStatus = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx], - gpuInfo->mpDevID, CL_PROGRAM_BUILD_LOG, - length, &buildLog[0], &length); - } - if (clStatus != CL_SUCCESS) { - tprintf("opencl program build info fail\n"); - return 0; - } - - fd1 = fopen("kernel-build.log", "w+"); - if (fd1 != nullptr) { - fwrite(&buildLog[0], sizeof(char), length, fd1); - fclose(fd1); - } - - // PERF_COUNT_SUB("build error log") - return 0; - } - - strcpy(gpuInfo->mArryKnelSrcFile[idx], filename); - // PERF_COUNT_SUB("strcpy") - if (binaryExisted == 0) { - GeneratBinFromKernelSource(gpuInfo->mpArryPrograms[idx], filename); - PERF_COUNT_SUB("GenerateBinFromKernelSource") - } - - gpuInfo->mnFileCount += 1; - // PERF_COUNT_END - return 1; -} - -l_uint32* OpenclDevice::pixReadFromTiffKernel(l_uint32* tiffdata, l_int32 w, - l_int32 h, l_int32 wpl, - l_uint32* line) { - PERF_COUNT_START("pixReadFromTiffKernel") - cl_int clStatus; - KernelEnv rEnv; - size_t globalThreads[2]; - size_t localThreads[2]; - int gsize; - cl_mem valuesCl; - cl_mem outputCl; - - // global and local work dimensions for Horizontal pass - gsize = (w + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - localThreads[0] = GROUPSIZE_X; - localThreads[1] = GROUPSIZE_Y; - - SetKernelEnv(&rEnv); - - l_uint32* pResult = (l_uint32*)malloc(w * h * sizeof(l_uint32)); - rEnv.mpkKernel = - clCreateKernel(rEnv.mpkProgram, "composeRGBPixel", &clStatus); - CHECK_OPENCL(clStatus, "clCreateKernel composeRGBPixel"); - - // Allocate input and output OCL buffers - valuesCl = allocateZeroCopyBuffer( - rEnv, tiffdata, w * h, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &clStatus); - outputCl = allocateZeroCopyBuffer( - rEnv, pResult, w * h, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &clStatus); - - // Kernel arguments - clStatus = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &valuesCl); - CHECK_OPENCL(clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(w), &w); - CHECK_OPENCL(clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(h), &h); - CHECK_OPENCL(clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); - CHECK_OPENCL(clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &outputCl); - CHECK_OPENCL(clStatus, "clSetKernelArg"); - - // Kernel enqueue - PERF_COUNT_SUB("before") - clStatus = - clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, - globalThreads, localThreads, 0, nullptr, nullptr); - CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel"); - - /* map results back from gpu */ - void* ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, outputCl, CL_TRUE, - CL_MAP_READ, 0, w * h * sizeof(l_uint32), 0, - nullptr, nullptr, &clStatus); - CHECK_OPENCL(clStatus, "clEnqueueMapBuffer outputCl"); - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, outputCl, ptr, 0, nullptr, nullptr); - - // Sync - clFinish(rEnv.mpkCmdQueue); - PERF_COUNT_SUB("kernel & map") - PERF_COUNT_END - return pResult; -} - -// Morphology Dilate operation for 5x5 structuring element. Invokes the relevant -// OpenCL kernels -static cl_int pixDilateCL_55(l_int32 wpl, l_int32 h) { - size_t globalThreads[2]; - cl_mem pixtemp; - cl_int status; - int gsize; - size_t localThreads[2]; - - // Horizontal pass - gsize = (wpl * h + GROUPSIZE_HMORX - 1) / GROUPSIZE_HMORX * GROUPSIZE_HMORX; - globalThreads[0] = gsize; - globalThreads[1] = GROUPSIZE_HMORY; - localThreads[0] = GROUPSIZE_HMORX; - localThreads[1] = GROUPSIZE_HMORY; - - rEnv.mpkKernel = - clCreateKernel(rEnv.mpkProgram, "morphoDilateHor_5x5", &status); - CHECK_OPENCL(status, "clCreateKernel morphoDilateHor_5x5"); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h); - - status = - clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, - globalThreads, localThreads, 0, nullptr, nullptr); - - // Swap source and dest buffers - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - - // Vertical - gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - localThreads[0] = GROUPSIZE_X; - localThreads[1] = GROUPSIZE_Y; - - rEnv.mpkKernel = - clCreateKernel(rEnv.mpkProgram, "morphoDilateVer_5x5", &status); - CHECK_OPENCL(status, "clCreateKernel morphoDilateVer_5x5"); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h); - status = - clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, - globalThreads, localThreads, 0, nullptr, nullptr); - - return status; -} - -// Morphology Erode operation for 5x5 structuring element. Invokes the relevant -// OpenCL kernels -static cl_int pixErodeCL_55(l_int32 wpl, l_int32 h) { - size_t globalThreads[2]; - cl_mem pixtemp; - cl_int status; - int gsize; - l_uint32 fwmask, lwmask; - size_t localThreads[2]; - - lwmask = lmask32[31 - 2]; - fwmask = rmask32[31 - 2]; - - // Horizontal pass - gsize = (wpl * h + GROUPSIZE_HMORX - 1) / GROUPSIZE_HMORX * GROUPSIZE_HMORX; - globalThreads[0] = gsize; - globalThreads[1] = GROUPSIZE_HMORY; - localThreads[0] = GROUPSIZE_HMORX; - localThreads[1] = GROUPSIZE_HMORY; - - rEnv.mpkKernel = - clCreateKernel(rEnv.mpkProgram, "morphoErodeHor_5x5", &status); - CHECK_OPENCL(status, "clCreateKernel morphoErodeHor_5x5"); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h); - - status = - clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, - globalThreads, localThreads, 0, nullptr, nullptr); - - // Swap source and dest buffers - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - - // Vertical - gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - localThreads[0] = GROUPSIZE_X; - localThreads[1] = GROUPSIZE_Y; - - rEnv.mpkKernel = - clCreateKernel(rEnv.mpkProgram, "morphoErodeVer_5x5", &status); - CHECK_OPENCL(status, "clCreateKernel morphoErodeVer_5x5"); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(fwmask), &fwmask); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(lwmask), &lwmask); - status = - clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, - globalThreads, localThreads, 0, nullptr, nullptr); - - return status; -} - -// Morphology Dilate operation. Invokes the relevant OpenCL kernels -static cl_int pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, - l_int32 h) { - l_int32 xp, yp, xn, yn; - SEL* sel; - size_t globalThreads[2]; - cl_mem pixtemp; - cl_int status = 0; - int gsize; - size_t localThreads[2]; - char isEven; - - OpenclDevice::SetKernelEnv(&rEnv); - - if (hsize == 5 && vsize == 5) { - // Specific case for 5x5 - status = pixDilateCL_55(wpl, h); - return status; - } - - sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT); - - selFindMaxTranslations(sel, &xp, &yp, &xn, &yn); - selDestroy(&sel); - // global and local work dimensions for Horizontal pass - gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - localThreads[0] = GROUPSIZE_X; - localThreads[1] = GROUPSIZE_Y; - - if (xp > 31 || xn > 31) { - // Generic case. - rEnv.mpkKernel = - clCreateKernel(rEnv.mpkProgram, "morphoDilateHor", &status); - CHECK_OPENCL(status, "clCreateKernel morphoDilateHor"); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), &xn); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), &h); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, - nullptr, globalThreads, localThreads, 0, - nullptr, nullptr); - - if (yp > 0 || yn > 0) { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - } - } else if (xp > 0 || xn > 0) { - // Specific Horizontal pass kernel for half width < 32 - rEnv.mpkKernel = - clCreateKernel(rEnv.mpkProgram, "morphoDilateHor_32word", &status); - CHECK_OPENCL(status, "clCreateKernel morphoDilateHor_32word"); - isEven = (xp != xn); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isEven), &isEven); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, - nullptr, globalThreads, localThreads, 0, - nullptr, nullptr); - - if (yp > 0 || yn > 0) { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - } - } - - if (yp > 0 || yn > 0) { - rEnv.mpkKernel = - clCreateKernel(rEnv.mpkProgram, "morphoDilateVer", &status); - CHECK_OPENCL(status, "clCreateKernel morphoDilateVer"); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), &yp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(yn), &yn); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, - nullptr, globalThreads, localThreads, 0, - nullptr, nullptr); - } - - return status; -} - -// Morphology Erode operation. Invokes the relevant OpenCL kernels -static cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, - l_uint32 h) { - l_int32 xp, yp, xn, yn; - SEL* sel; - size_t globalThreads[2]; - size_t localThreads[2]; - cl_mem pixtemp; - cl_int status = 0; - int gsize; - char isAsymmetric = (MORPH_BC == ASYMMETRIC_MORPH_BC); - l_uint32 rwmask, lwmask; - char isEven; - - sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT); - - selFindMaxTranslations(sel, &xp, &yp, &xn, &yn); - selDestroy(&sel); - OpenclDevice::SetKernelEnv(&rEnv); - - if (hsize == 5 && vsize == 5 && isAsymmetric) { - // Specific kernel for 5x5 - status = pixErodeCL_55(wpl, h); - return status; - } - - lwmask = lmask32[31 - (xn & 31)]; - rwmask = rmask32[31 - (xp & 31)]; - - // global and local work dimensions for Horizontal pass - gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - localThreads[0] = GROUPSIZE_X; - localThreads[1] = GROUPSIZE_Y; - - // Horizontal Pass - if (xp > 31 || xn > 31) { - // Generic case. - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeHor", &status); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), &xn); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), &h); - status = - clSetKernelArg(rEnv.mpkKernel, 6, sizeof(isAsymmetric), &isAsymmetric); - status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(rwmask), &rwmask); - status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(lwmask), &lwmask); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, - nullptr, globalThreads, localThreads, 0, - nullptr, nullptr); - - if (yp > 0 || yn > 0) { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - } - } else if (xp > 0 || xn > 0) { - rEnv.mpkKernel = - clCreateKernel(rEnv.mpkProgram, "morphoErodeHor_32word", &status); - isEven = (xp != xn); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); - status = - clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), &isAsymmetric); - status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(rwmask), &rwmask); - status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(lwmask), &lwmask); - status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(isEven), &isEven); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, - nullptr, globalThreads, localThreads, 0, - nullptr, nullptr); - - if (yp > 0 || yn > 0) { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - } - } - - // Vertical Pass - if (yp > 0 || yn > 0) { - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeVer", &status); - CHECK_OPENCL(status, "clCreateKernel morphoErodeVer"); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), &yp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); - status = - clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), &isAsymmetric); - status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(yn), &yn); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, - nullptr, globalThreads, localThreads, 0, - nullptr, nullptr); - } - - return status; -} - -// Morphology Open operation. Invokes the relevant OpenCL kernels -static cl_int pixOpenCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) { - cl_int status; - cl_mem pixtemp; - - // Erode followed by Dilate - status = pixErodeCL(hsize, vsize, wpl, h); - - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - - status = pixDilateCL(hsize, vsize, wpl, h); - - return status; -} - -// Morphology Close operation. Invokes the relevant OpenCL kernels -static cl_int pixCloseCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) { - cl_int status; - cl_mem pixtemp; - - // Dilate followed by Erode - status = pixDilateCL(hsize, vsize, wpl, h); - - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - - status = pixErodeCL(hsize, vsize, wpl, h); - - return status; -} - -// output = buffer1 & ~(buffer2) -static cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, - cl_mem buffer2, cl_mem outBuffer = nullptr) { - cl_int status; - size_t globalThreads[2]; - int gsize; - size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y}; - - gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - - if (outBuffer != nullptr) { - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "pixSubtract", &status); - CHECK_OPENCL(status, "clCreateKernel pixSubtract"); - } else { - rEnv.mpkKernel = - clCreateKernel(rEnv.mpkProgram, "pixSubtract_inplace", &status); - CHECK_OPENCL(status, "clCreateKernel pixSubtract_inplace"); - } - - // Enqueue a kernel run call. - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &buffer1); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &buffer2); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h); - if (outBuffer != nullptr) { - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &outBuffer); - } - status = - clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, - globalThreads, localThreads, 0, nullptr, nullptr); - - return status; -} - -// OpenCL implementation of Get Lines from pix function -// Note: Assumes the source and dest opencl buffer are initialized. No check -// done -void OpenclDevice::pixGetLinesCL(Pix* pixd, Pix* pixs, Pix** pix_vline, - Pix** pix_hline, Pix** pixClosed, - bool getpixClosed, l_int32 close_hsize, - l_int32 close_vsize, l_int32 open_hsize, - l_int32 open_vsize, l_int32 line_hsize, - l_int32 line_vsize) { - l_uint32 wpl, h; - cl_mem pixtemp; - - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); - - // First step : Close Morph operation: Dilate followed by Erode - clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h); - - // Copy the Close output to CPU buffer - if (getpixClosed) { - *pixClosed = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pixClosed, pixs, - wpl * h, CL_MAP_READ, true, false); - } - - // Store the output of close operation in an intermediate buffer - // this will be later used for pixsubtract - clStatus = - clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, - 0, sizeof(int) * wpl * h, 0, nullptr, nullptr); - - // Second step: Open Operation - Erode followed by Dilate - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - - clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h); - - // Third step: Subtract : (Close - Open) - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixdCLIntermediate; - pixdCLIntermediate = pixtemp; - - clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer); - - // Store the output of Hollow operation in an intermediate buffer - // this will be later used - clStatus = - clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, - 0, sizeof(int) * wpl * h, 0, nullptr, nullptr); - - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - - // Fourth step: Get vertical line - // pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); - clStatus = pixOpenCL(1, line_vsize, wpl, h); - - // Copy the vertical line output to CPU buffer - *pix_vline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_vline, pixs, wpl * h, - CL_MAP_READ, true, false); - - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLIntermediate; - pixdCLIntermediate = pixtemp; - - // Fifth step: Get horizontal line - // pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); - clStatus = pixOpenCL(line_hsize, 1, wpl, h); - - // Copy the horizontal line output to CPU buffer - *pix_hline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_hline, pixs, wpl * h, - CL_MAP_READ, true, true); - - return; -} - -/************************************************************************* - * HistogramRect - * Otsu Thresholding Operations - * histogramAllChannels is laid out as all channel 0, then all channel 1... - * only supports 1 or 4 channels (bytes_per_pixel) - ************************************************************************/ -int OpenclDevice::HistogramRectOCL(void* imageData, - int bytes_per_pixel, int bytes_per_line, - int left, // always 0 - int top, // always 0 - int width, int height, int kHistogramSize, - int* histogramAllChannels) { - PERF_COUNT_START("HistogramRectOCL") - cl_int clStatus; - int retVal = 0; - KernelEnv histKern; - SetKernelEnv(&histKern); - KernelEnv histRedKern; - SetKernelEnv(&histRedKern); - /* map imagedata to device as read only */ - // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be - // coherent which we don't need. - // faster option would be to allocate initial image buffer - // using a garlic bus memory type - cl_mem imageBuffer = clCreateBuffer( - histKern.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - width * height * bytes_per_pixel * sizeof(char), imageData, &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); - - /* setup work group size parameters */ - int block_size = 256; - cl_uint numCUs; - clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, - sizeof(numCUs), &numCUs, nullptr); - CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); - - int requestedOccupancy = 10; - int numWorkGroups = numCUs * requestedOccupancy; - int numThreads = block_size * numWorkGroups; - size_t local_work_size[] = {static_cast(block_size)}; - size_t global_work_size[] = {static_cast(numThreads)}; - size_t red_global_work_size[] = { - static_cast(block_size * kHistogramSize * bytes_per_pixel)}; - - /* map histogramAllChannels as write only */ - - cl_mem histogramBuffer = clCreateBuffer( - histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, - kHistogramSize * bytes_per_pixel * sizeof(int), histogramAllChannels, - &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer histogramBuffer"); - - /* intermediate histogram buffer */ - int histRed = 256; - int tmpHistogramBins = kHistogramSize * bytes_per_pixel * histRed; - - cl_mem tmpHistogramBuffer = - clCreateBuffer(histKern.mpkContext, CL_MEM_READ_WRITE, - tmpHistogramBins * sizeof(cl_uint), nullptr, &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer tmpHistogramBuffer"); - - /* atomic sync buffer */ - int* zeroBuffer = new int[1]; - zeroBuffer[0] = 0; - cl_mem atomicSyncBuffer = clCreateBuffer( - histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, - sizeof(cl_int), zeroBuffer, &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer atomicSyncBuffer"); - delete[] zeroBuffer; - // Create kernel objects based on bytes_per_pixel - if (bytes_per_pixel == 1) { - histKern.mpkKernel = clCreateKernel( - histKern.mpkProgram, "kernel_HistogramRectOneChannel", &clStatus); - CHECK_OPENCL(clStatus, "clCreateKernel kernel_HistogramRectOneChannel"); - - histRedKern.mpkKernel = - clCreateKernel(histRedKern.mpkProgram, - "kernel_HistogramRectOneChannelReduction", &clStatus); - CHECK_OPENCL(clStatus, - "clCreateKernel kernel_HistogramRectOneChannelReduction"); - } else { - histKern.mpkKernel = clCreateKernel( - histKern.mpkProgram, "kernel_HistogramRectAllChannels", &clStatus); - CHECK_OPENCL(clStatus, "clCreateKernel kernel_HistogramRectAllChannels"); - - histRedKern.mpkKernel = - clCreateKernel(histRedKern.mpkProgram, - "kernel_HistogramRectAllChannelsReduction", &clStatus); - CHECK_OPENCL(clStatus, - "clCreateKernel kernel_HistogramRectAllChannelsReduction"); - } - - void* ptr; - - // Initialize tmpHistogramBuffer buffer - ptr = clEnqueueMapBuffer(histKern.mpkCmdQueue, tmpHistogramBuffer, CL_TRUE, - CL_MAP_WRITE, 0, tmpHistogramBins * sizeof(cl_uint), - 0, nullptr, nullptr, &clStatus); - CHECK_OPENCL(clStatus, "clEnqueueMapBuffer tmpHistogramBuffer"); - - memset(ptr, 0, tmpHistogramBins * sizeof(cl_uint)); - clEnqueueUnmapMemObject(histKern.mpkCmdQueue, tmpHistogramBuffer, ptr, 0, - nullptr, nullptr); - - /* set kernel 1 arguments */ - clStatus = - clSetKernelArg(histKern.mpkKernel, 0, sizeof(cl_mem), &imageBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer"); - cl_uint numPixels = width * height; - clStatus = clSetKernelArg(histKern.mpkKernel, 1, sizeof(cl_uint), &numPixels); - CHECK_OPENCL(clStatus, "clSetKernelArg numPixels"); - clStatus = clSetKernelArg(histKern.mpkKernel, 2, sizeof(cl_mem), - &tmpHistogramBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg tmpHistogramBuffer"); - - /* set kernel 2 arguments */ - int n = numThreads / bytes_per_pixel; - clStatus = clSetKernelArg(histRedKern.mpkKernel, 0, sizeof(cl_int), &n); - CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer"); - clStatus = clSetKernelArg(histRedKern.mpkKernel, 1, sizeof(cl_mem), - &tmpHistogramBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg tmpHistogramBuffer"); - clStatus = clSetKernelArg(histRedKern.mpkKernel, 2, sizeof(cl_mem), - &histogramBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg histogramBuffer"); - - /* launch histogram */ - PERF_COUNT_SUB("before") - clStatus = clEnqueueNDRangeKernel(histKern.mpkCmdQueue, histKern.mpkKernel, 1, - nullptr, global_work_size, local_work_size, - 0, nullptr, nullptr); - CHECK_OPENCL(clStatus, - "clEnqueueNDRangeKernel kernel_HistogramRectAllChannels"); - clFinish(histKern.mpkCmdQueue); - if (clStatus != 0) { - retVal = -1; - } - /* launch histogram */ - clStatus = clEnqueueNDRangeKernel( - histRedKern.mpkCmdQueue, histRedKern.mpkKernel, 1, nullptr, - red_global_work_size, local_work_size, 0, nullptr, nullptr); - CHECK_OPENCL( - clStatus, - "clEnqueueNDRangeKernel kernel_HistogramRectAllChannelsReduction"); - clFinish(histRedKern.mpkCmdQueue); - if (clStatus != 0) { - retVal = -1; - } - PERF_COUNT_SUB("redKernel") - - /* map results back from gpu */ - ptr = clEnqueueMapBuffer(histRedKern.mpkCmdQueue, histogramBuffer, CL_TRUE, - CL_MAP_READ, 0, - kHistogramSize * bytes_per_pixel * sizeof(int), 0, - nullptr, nullptr, &clStatus); - CHECK_OPENCL(clStatus, "clEnqueueMapBuffer histogramBuffer"); - if (clStatus != 0) { - retVal = -1; - } - clEnqueueUnmapMemObject(histRedKern.mpkCmdQueue, histogramBuffer, ptr, 0, - nullptr, nullptr); - - clReleaseMemObject(histogramBuffer); - clReleaseMemObject(imageBuffer); - PERF_COUNT_SUB("after") - PERF_COUNT_END - return retVal; -} - -/************************************************************************* - * Threshold the rectangle, taking everything except the image buffer pointer - * from the class, using thresholds/hi_values to the output IMAGE. - * only supports 1 or 4 channels - ************************************************************************/ -int OpenclDevice::ThresholdRectToPixOCL(unsigned char* imageData, - int bytes_per_pixel, int bytes_per_line, - int* thresholds, int* hi_values, - Pix** pix, int height, int width, - int top, int left) { - PERF_COUNT_START("ThresholdRectToPixOCL") - int retVal = 0; - /* create pix result buffer */ - *pix = pixCreate(width, height, 1); - uint32_t* pixData = pixGetData(*pix); - int wpl = pixGetWpl(*pix); - int pixSize = wpl * height * sizeof(uint32_t); // number of pixels - - cl_int clStatus; - KernelEnv rEnv; - SetKernelEnv(&rEnv); - - /* setup work group size parameters */ - int block_size = 256; - cl_uint numCUs = 6; - clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, - sizeof(numCUs), &numCUs, nullptr); - CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); - - int requestedOccupancy = 10; - int numWorkGroups = numCUs * requestedOccupancy; - int numThreads = block_size * numWorkGroups; - size_t local_work_size[] = {(size_t)block_size}; - size_t global_work_size[] = {(size_t)numThreads}; - - /* map imagedata to device as read only */ - // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be - // coherent which we don't need. - // faster option would be to allocate initial image buffer - // using a garlic bus memory type - cl_mem imageBuffer = clCreateBuffer( - rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - width * height * bytes_per_pixel * sizeof(char), imageData, &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); - - /* map pix as write only */ - pixThBuffer = - clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, - pixSize, pixData, &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer pix"); - - /* map thresholds and hi_values */ - cl_mem thresholdsBuffer = - clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - bytes_per_pixel * sizeof(int), thresholds, &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer thresholdBuffer"); - cl_mem hiValuesBuffer = - clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - bytes_per_pixel * sizeof(int), hi_values, &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer hiValuesBuffer"); - - /* compile kernel */ - if (bytes_per_pixel == 4) { - rEnv.mpkKernel = - clCreateKernel(rEnv.mpkProgram, "kernel_ThresholdRectToPix", &clStatus); - CHECK_OPENCL(clStatus, "clCreateKernel kernel_ThresholdRectToPix"); - } else { - rEnv.mpkKernel = clCreateKernel( - rEnv.mpkProgram, "kernel_ThresholdRectToPix_OneChan", &clStatus); - CHECK_OPENCL(clStatus, "clCreateKernel kernel_ThresholdRectToPix_OneChan"); - } - - /* set kernel arguments */ - clStatus = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &imageBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(int), &height); - CHECK_OPENCL(clStatus, "clSetKernelArg height"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(int), &width); - CHECK_OPENCL(clStatus, "clSetKernelArg width"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(int), &wpl); - CHECK_OPENCL(clStatus, "clSetKernelArg wpl"); - clStatus = - clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &thresholdsBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg thresholdsBuffer"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(cl_mem), &hiValuesBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg hiValuesBuffer"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(cl_mem), &pixThBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg pixThBuffer"); - - /* launch kernel & wait */ - PERF_COUNT_SUB("before") - clStatus = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 1, - nullptr, global_work_size, local_work_size, - 0, nullptr, nullptr); - CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_ThresholdRectToPix"); - clFinish(rEnv.mpkCmdQueue); - PERF_COUNT_SUB("kernel") - if (clStatus != 0) { - tprintf("Setting return value to -1\n"); - retVal = -1; - } - /* map results back from gpu */ - void* ptr = - clEnqueueMapBuffer(rEnv.mpkCmdQueue, pixThBuffer, CL_TRUE, CL_MAP_READ, 0, - pixSize, 0, nullptr, nullptr, &clStatus); - CHECK_OPENCL(clStatus, "clEnqueueMapBuffer histogramBuffer"); - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, pixThBuffer, ptr, 0, nullptr, - nullptr); - - clReleaseMemObject(imageBuffer); - clReleaseMemObject(thresholdsBuffer); - clReleaseMemObject(hiValuesBuffer); - - PERF_COUNT_SUB("after") - PERF_COUNT_END - return retVal; -} - -/****************************************************************************** - * Data Types for Device Selection - *****************************************************************************/ - -struct TessScoreEvaluationInputData { - int height; - int width; - int numChannels; - unsigned char* imageData; - Pix* pix; -}; - -static void populateTessScoreEvaluationInputData( - TessScoreEvaluationInputData* input) { - srand(1); - // 8.5x11 inches @ 300dpi rounded to clean multiples - int height = 3328; // %256 - int width = 2560; // %512 - int numChannels = 4; - input->height = height; - input->width = width; - input->numChannels = numChannels; - unsigned char(*imageData4)[4] = (unsigned char(*)[4])malloc( - height * width * numChannels * - sizeof(unsigned char)); // new unsigned char[4][height*width]; - input->imageData = (unsigned char*)&imageData4[0]; - - // zero out image - unsigned char pixelWhite[4] = {0, 0, 0, 255}; - unsigned char pixelBlack[4] = {255, 255, 255, 255}; - for (int p = 0; p < height * width; p++) { - // unsigned char tmp[4] = imageData4[0]; - imageData4[p][0] = pixelWhite[0]; - imageData4[p][1] = pixelWhite[1]; - imageData4[p][2] = pixelWhite[2]; - imageData4[p][3] = pixelWhite[3]; - } - // random lines to be eliminated - int maxLineWidth = 64; // pixels wide - int numLines = 10; - // vertical lines - for (int i = 0; i < numLines; i++) { - int lineWidth = rand() % maxLineWidth; - int vertLinePos = lineWidth + rand() % (width - 2 * lineWidth); - // tprintf("[PI] VerticalLine @ %i (w=%i)\n", vertLinePos, lineWidth); - for (int row = vertLinePos - lineWidth / 2; - row < vertLinePos + lineWidth / 2; row++) { - for (int col = 0; col < height; col++) { - // imageData4[row*width+col] = pixelBlack; - imageData4[row * width + col][0] = pixelBlack[0]; - imageData4[row * width + col][1] = pixelBlack[1]; - imageData4[row * width + col][2] = pixelBlack[2]; - imageData4[row * width + col][3] = pixelBlack[3]; - } - } - } - // horizontal lines - for (int i = 0; i < numLines; i++) { - int lineWidth = rand() % maxLineWidth; - int horLinePos = lineWidth + rand() % (height - 2 * lineWidth); - // tprintf("[PI] HorizontalLine @ %i (w=%i)\n", horLinePos, lineWidth); - for (int row = 0; row < width; row++) { - for (int col = horLinePos - lineWidth / 2; - col < horLinePos + lineWidth / 2; - col++) { // for (int row = vertLinePos-lineWidth/2; row < - // vertLinePos+lineWidth/2; row++) { - // tprintf("[PI] HoizLine pix @ (%3i, %3i)\n", row, col); - // imageData4[row*width+col] = pixelBlack; - imageData4[row * width + col][0] = pixelBlack[0]; - imageData4[row * width + col][1] = pixelBlack[1]; - imageData4[row * width + col][2] = pixelBlack[2]; - imageData4[row * width + col][3] = pixelBlack[3]; - } - } - } - // spots (noise, squares) - float fractionBlack = 0.1; // how much of the image should be blackened - int numSpots = - (height * width) * fractionBlack / (maxLineWidth * maxLineWidth / 2 / 2); - for (int i = 0; i < numSpots; i++) { - int lineWidth = rand() % maxLineWidth; - int col = lineWidth + rand() % (width - 2 * lineWidth); - int row = lineWidth + rand() % (height - 2 * lineWidth); - // tprintf("[PI] Spot[%i/%i] @ (%3i, %3i)\n", i, numSpots, row, col ); - for (int r = row - lineWidth / 2; r < row + lineWidth / 2; r++) { - for (int c = col - lineWidth / 2; c < col + lineWidth / 2; c++) { - // tprintf("[PI] \tSpot[%i/%i] @ (%3i, %3i)\n", i, numSpots, r, c ); - // imageData4[row*width+col] = pixelBlack; - imageData4[r * width + c][0] = pixelBlack[0]; - imageData4[r * width + c][1] = pixelBlack[1]; - imageData4[r * width + c][2] = pixelBlack[2]; - imageData4[r * width + c][3] = pixelBlack[3]; - } - } - } - - input->pix = pixCreate(input->width, input->height, 1); -} - -struct TessDeviceScore { - float time; // small time means faster device - bool clError; // were there any opencl errors - bool valid; // was the correct response generated -}; - -/****************************************************************************** - * Micro Benchmarks for Device Selection - *****************************************************************************/ - -static double composeRGBPixelMicroBench(GPUEnv* env, - TessScoreEvaluationInputData input, - ds_device_type type) { - double time = 0; -#if ON_WINDOWS - LARGE_INTEGER freq, time_funct_start, time_funct_end; - QueryPerformanceFrequency(&freq); -#elif ON_APPLE - mach_timebase_info_data_t info = {0, 0}; - mach_timebase_info(&info); - long long start, stop; -#else - timespec time_funct_start, time_funct_end; -#endif - // input data - l_uint32* tiffdata = - (l_uint32*)input.imageData; // same size and random data; data doesn't - // change workload - - // function call - if (type == DS_DEVICE_OPENCL_DEVICE) { -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -#endif - - OpenclDevice::gpuEnv = *env; - int wpl = pixGetWpl(input.pix); - OpenclDevice::pixReadFromTiffKernel(tiffdata, input.width, input.height, - wpl, nullptr); -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / - (double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -#endif - - } else { -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -#endif - Pix* pix = pixCreate(input.width, input.height, 32); - l_uint32* pixData = pixGetData(pix); - int i, j; - int idx = 0; - for (i = 0; i < input.height; i++) { - for (j = 0; j < input.width; j++) { - l_uint32 tiffword = tiffdata[i * input.width + j]; - l_int32 rval = ((tiffword)&0xff); - l_int32 gval = (((tiffword) >> 8) & 0xff); - l_int32 bval = (((tiffword) >> 16) & 0xff); - l_uint32 value = (rval << 24) | (gval << 16) | (bval << 8); - pixData[idx] = value; - idx++; - } - } -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / - (double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -#endif - pixDestroy(&pix); - } - - return time; -} - -static double histogramRectMicroBench(GPUEnv* env, - TessScoreEvaluationInputData input, - ds_device_type type) { - double time; -#if ON_WINDOWS - LARGE_INTEGER freq, time_funct_start, time_funct_end; - QueryPerformanceFrequency(&freq); -#elif ON_APPLE - mach_timebase_info_data_t info = {0, 0}; - mach_timebase_info(&info); - long long start, stop; -#else - timespec time_funct_start, time_funct_end; -#endif - - const int left = 0; - const int top = 0; - int kHistogramSize = 256; - int bytes_per_line = input.width * input.numChannels; - int* histogramAllChannels = new int[kHistogramSize * input.numChannels]; - // function call - if (type == DS_DEVICE_OPENCL_DEVICE) { -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -#endif - - OpenclDevice::gpuEnv = *env; - int retVal = OpenclDevice::HistogramRectOCL( - input.imageData, input.numChannels, bytes_per_line, left, top, - input.width, input.height, kHistogramSize, histogramAllChannels); - -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / - (double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - if (retVal == 0) { - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; - } else { - time = FLT_MAX; - } -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -#endif - } else { - int* histogram = new int[kHistogramSize]; -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -#endif - for (int ch = 0; ch < input.numChannels; ++ch) { - tesseract::HistogramRect(input.pix, input.numChannels, left, top, - input.width, input.height, histogram); - } -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / - (double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -#endif - delete[] histogram; - } - - // cleanup - delete[] histogramAllChannels; - return time; -} - -// Reproducing the ThresholdRectToPix native version -static void ThresholdRectToPix_Native(const unsigned char* imagedata, - int bytes_per_pixel, int bytes_per_line, - const int* thresholds, - const int* hi_values, Pix** pix) { - int top = 0; - int left = 0; - int width = pixGetWidth(*pix); - int height = pixGetHeight(*pix); - - *pix = pixCreate(width, height, 1); - uint32_t* pixdata = pixGetData(*pix); - int wpl = pixGetWpl(*pix); - const unsigned char* srcdata = - imagedata + top * bytes_per_line + left * bytes_per_pixel; - for (int y = 0; y < height; ++y) { - const uint8_t* linedata = srcdata; - uint32_t* pixline = pixdata + y * wpl; - for (int x = 0; x < width; ++x, linedata += bytes_per_pixel) { - bool white_result = true; - for (int ch = 0; ch < bytes_per_pixel; ++ch) { - if (hi_values[ch] >= 0 && - (linedata[ch] > thresholds[ch]) == (hi_values[ch] == 0)) { - white_result = false; - break; - } - } - if (white_result) - CLEAR_DATA_BIT(pixline, x); - else - SET_DATA_BIT(pixline, x); - } - srcdata += bytes_per_line; - } -} - -static double thresholdRectToPixMicroBench(GPUEnv* env, - TessScoreEvaluationInputData input, - ds_device_type type) { - double time; -#if ON_WINDOWS - LARGE_INTEGER freq, time_funct_start, time_funct_end; - QueryPerformanceFrequency(&freq); -#elif ON_APPLE - mach_timebase_info_data_t info = {0, 0}; - mach_timebase_info(&info); - long long start, stop; -#else - timespec time_funct_start, time_funct_end; -#endif - - // input data - unsigned char pixelHi = (unsigned char)255; - int thresholds[4] = {pixelHi, pixelHi, pixelHi, pixelHi}; - - // Pix* pix = pixCreate(width, height, 1); - int top = 0; - int left = 0; - int bytes_per_line = input.width * input.numChannels; - - // function call - if (type == DS_DEVICE_OPENCL_DEVICE) { -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -#endif - - OpenclDevice::gpuEnv = *env; - int hi_values[4]; - int retVal = OpenclDevice::ThresholdRectToPixOCL( - input.imageData, input.numChannels, bytes_per_line, thresholds, - hi_values, &input.pix, input.height, input.width, top, left); - -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / - (double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - if (retVal == 0) { - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; - } else { - time = FLT_MAX; - } - -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -#endif - } else { - tesseract::ImageThresholder thresholder; - thresholder.SetImage(input.pix); -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -#endif - int hi_values[4] = {}; - ThresholdRectToPix_Native(input.imageData, input.numChannels, - bytes_per_line, thresholds, hi_values, - &input.pix); - -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / - (double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -#endif - } - - return time; -} - -static double getLineMasksMorphMicroBench(GPUEnv* env, - TessScoreEvaluationInputData input, - ds_device_type type) { - double time = 0; -#if ON_WINDOWS - LARGE_INTEGER freq, time_funct_start, time_funct_end; - QueryPerformanceFrequency(&freq); -#elif ON_APPLE - mach_timebase_info_data_t info = {0, 0}; - mach_timebase_info(&info); - long long start, stop; -#else - timespec time_funct_start, time_funct_end; -#endif - - // input data - int resolution = 300; - int wpl = pixGetWpl(input.pix); - int kThinLineFraction = 20; // tess constant - int kMinLineLengthFraction = 4; // tess constant - int max_line_width = resolution / kThinLineFraction; - int min_line_length = resolution / kMinLineLengthFraction; - int closing_brick = max_line_width / 3; - - // function call - if (type == DS_DEVICE_OPENCL_DEVICE) { -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -#endif - OpenclDevice::gpuEnv = *env; - OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix); - Pix *pix_vline = nullptr, *pix_hline = nullptr, *pix_closed = nullptr; - OpenclDevice::pixGetLinesCL(nullptr, input.pix, &pix_vline, &pix_hline, - &pix_closed, true, closing_brick, closing_brick, - max_line_width, max_line_width, min_line_length, - min_line_length); - - OpenclDevice::releaseMorphCLBuffers(); - -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / - (double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -#endif - } else { -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -#endif - - // native serial code - Pix* src_pix = input.pix; - Pix* pix_closed = - pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick); - Pix* pix_solid = - pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width); - Pix* pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid); - pixDestroy(&pix_solid); - Pix* pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); - Pix* pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); - pixDestroy(&pix_hline); - pixDestroy(&pix_vline); - pixDestroy(&pix_hollow); - -#if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / - (double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; -#else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -#endif - } - - return time; -} - -/****************************************************************************** - * Device Selection - *****************************************************************************/ - -#include - -// encode score object as byte string -static ds_status serializeScore(ds_device* device, void** serializedScore, - unsigned int* serializedScoreSize) { - *serializedScoreSize = sizeof(TessDeviceScore); - *serializedScore = new unsigned char[*serializedScoreSize]; - memcpy(*serializedScore, device->score, *serializedScoreSize); - return DS_SUCCESS; -} - -// parses byte string and stores in score object -static ds_status deserializeScore(ds_device* device, - const unsigned char* serializedScore, - unsigned int serializedScoreSize) { - // check that serializedScoreSize == sizeof(TessDeviceScore); - device->score = new TessDeviceScore; - memcpy(device->score, serializedScore, serializedScoreSize); - return DS_SUCCESS; -} - -static ds_status releaseScore(TessDeviceScore* score) { - delete score; - return DS_SUCCESS; -} - -// evaluate devices -static ds_status evaluateScoreForDevice(ds_device* device, void* inputData) { - // overwrite statuc gpuEnv w/ current device - // so native opencl calls can be used; they use static gpuEnv - tprintf("\n[DS] Device: \"%s\" (%s) evaluation...\n", device->oclDeviceName, - device->type == DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native"); - GPUEnv* env = nullptr; - if (device->type == DS_DEVICE_OPENCL_DEVICE) { - env = &OpenclDevice::gpuEnv; - memset(env, 0, sizeof(*env)); - // tprintf("[DS] populating tmp GPUEnv from device\n"); - populateGPUEnvFromDevice(env, device->oclDeviceID); - env->mnFileCount = 0; // argc; - env->mnKernelCount = 0UL; - // tprintf("[DS] compiling kernels for tmp GPUEnv\n"); - OpenclDevice::CompileKernelFile(env, ""); - } - - TessScoreEvaluationInputData* input = - static_cast(inputData); - - // pixReadTiff - double composeRGBPixelTime = - composeRGBPixelMicroBench(env, *input, device->type); - - // HistogramRect - double histogramRectTime = histogramRectMicroBench(env, *input, device->type); - - // ThresholdRectToPix - double thresholdRectToPixTime = - thresholdRectToPixMicroBench(env, *input, device->type); - - // getLineMasks - double getLineMasksMorphTime = - getLineMasksMorphMicroBench(env, *input, device->type); - - // weigh times (% of cpu time) - // these weights should be the % execution time that the native cpu code took - float composeRGBPixelWeight = 1.2f; - float histogramRectWeight = 2.4f; - float thresholdRectToPixWeight = 4.5f; - float getLineMasksMorphWeight = 5.0f; - - float weightedTime = composeRGBPixelWeight * composeRGBPixelTime + - histogramRectWeight * histogramRectTime + - thresholdRectToPixWeight * thresholdRectToPixTime + - getLineMasksMorphWeight * getLineMasksMorphTime; - device->score = new TessDeviceScore; - device->score->time = weightedTime; - - tprintf("[DS] Device: \"%s\" (%s) evaluated\n", device->oclDeviceName, - device->type == DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native"); - tprintf("[DS]%25s: %f (w=%.1f)\n", "composeRGBPixel", composeRGBPixelTime, - composeRGBPixelWeight); - tprintf("[DS]%25s: %f (w=%.1f)\n", "HistogramRect", histogramRectTime, - histogramRectWeight); - tprintf("[DS]%25s: %f (w=%.1f)\n", "ThresholdRectToPix", - thresholdRectToPixTime, thresholdRectToPixWeight); - tprintf("[DS]%25s: %f (w=%.1f)\n", "getLineMasksMorph", getLineMasksMorphTime, - getLineMasksMorphWeight); - tprintf("[DS]%25s: %f\n", "Score", device->score->time); - return DS_SUCCESS; -} - -// initial call to select device -ds_device OpenclDevice::getDeviceSelection() { - if (!deviceIsSelected) { - PERF_COUNT_START("getDeviceSelection") - // check if opencl is available at runtime - if (1 == LoadOpencl()) { - // opencl is available - // PERF_COUNT_SUB("LoadOpencl") - // setup devices - ds_status status; - ds_profile* profile; - status = initDSProfile(&profile, "v0.1"); - PERF_COUNT_SUB("initDSProfile") - // try reading scores from file - const char* fileName = "tesseract_opencl_profile_devices.dat"; - status = readProfileFromFile(profile, deserializeScore, fileName); - if (status != DS_SUCCESS) { - // need to run evaluation - tprintf("[DS] Profile file not available (%s); performing profiling.\n", - fileName); - - // create input data - TessScoreEvaluationInputData input; - populateTessScoreEvaluationInputData(&input); - // PERF_COUNT_SUB("populateTessScoreEvaluationInputData") - // perform evaluations - unsigned int numUpdates; - status = profileDevices(profile, DS_EVALUATE_ALL, - evaluateScoreForDevice, &input, &numUpdates); - PERF_COUNT_SUB("profileDevices") - // write scores to file - if (status == DS_SUCCESS) { - status = writeProfileToFile(profile, serializeScore, fileName); - PERF_COUNT_SUB("writeProfileToFile") - if (status == DS_SUCCESS) { - tprintf("[DS] Scores written to file (%s).\n", fileName); - } else { - tprintf( - "[DS] Error saving scores to file (%s); scores not written to " - "file.\n", - fileName); - } - } else { - tprintf( - "[DS] Unable to evaluate performance; scores not written to " - "file.\n"); - } - } else { - PERF_COUNT_SUB("readProfileFromFile") - tprintf("[DS] Profile read from file (%s).\n", fileName); - } - - // we now have device scores either from file or evaluation - // select fastest using custom Tesseract selection algorithm - float bestTime = FLT_MAX; // begin search with worst possible time - int bestDeviceIdx = -1; - for (unsigned d = 0; d < profile->numDevices; d++) { - ds_device device = profile->devices[d]; - if (device.score == nullptr) continue; - TessDeviceScore score = *device.score; - - float time = score.time; - tprintf("[DS] Device[%u] %i:%s score is %f\n", d + 1, device.type, - device.oclDeviceName, time); - if (time < bestTime) { - bestTime = time; - bestDeviceIdx = d; - } - } - if (bestDeviceIdx >= 0) { - tprintf("[DS] Selected Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1, - profile->devices[bestDeviceIdx].oclDeviceName, - profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE - ? "OpenCL" - : "Native"); - } - // cleanup - // TODO: call destructor for profile object? - - bool overridden = false; - char* overrideDeviceStr = getenv("TESSERACT_OPENCL_DEVICE"); - if (overrideDeviceStr != nullptr) { - int overrideDeviceIdx = atoi(overrideDeviceStr); - if (overrideDeviceIdx > 0 && overrideDeviceIdx <= profile->numDevices) { - tprintf( - "[DS] Overriding Device Selection (TESSERACT_OPENCL_DEVICE=%s, " - "%i)\n", - overrideDeviceStr, overrideDeviceIdx); - bestDeviceIdx = overrideDeviceIdx - 1; - overridden = true; - } else { - tprintf( - "[DS] Ignoring invalid TESSERACT_OPENCL_DEVICE=%s ([1,%i] are " - "valid devices).\n", - overrideDeviceStr, profile->numDevices); - } - } - - if (overridden) { - tprintf("[DS] Overridden Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1, - profile->devices[bestDeviceIdx].oclDeviceName, - profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE - ? "OpenCL" - : "Native"); - } - selectedDevice = profile->devices[bestDeviceIdx]; - // cleanup - releaseDSProfile(profile, releaseScore); - } else { - // opencl isn't available at runtime, select native cpu device - tprintf("[DS] OpenCL runtime not available.\n"); - selectedDevice.type = DS_DEVICE_NATIVE_CPU; - selectedDevice.oclDeviceName = "(null)"; - selectedDevice.score = nullptr; - selectedDevice.oclDeviceID = nullptr; - selectedDevice.oclDriverVersion = nullptr; - } - deviceIsSelected = true; - PERF_COUNT_SUB("select from Profile") - PERF_COUNT_END - } - // PERF_COUNT_END - return selectedDevice; -} - -bool OpenclDevice::selectedDeviceIsOpenCL() { - ds_device device = getDeviceSelection(); - return (device.type == DS_DEVICE_OPENCL_DEVICE); -} - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/opencl/openclwrapper.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/opencl/openclwrapper.h deleted file mode 100644 index 1bd9aa9a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/opencl/openclwrapper.h +++ /dev/null @@ -1,295 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TESSERACT_OPENCL_OPENCLWRAPPER_H_ -#define TESSERACT_OPENCL_OPENCLWRAPPER_H_ - -#include -#include "allheaders.h" -#include "pix.h" -#include "tprintf.h" - -// including CL/cl.h doesn't occur until USE_OPENCL defined below - -// platform preprocessor commands -#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || \ - defined(__CYGWIN__) || defined(__MINGW32__) -#define ON_WINDOWS 1 -#define ON_LINUX 0 -#define ON_APPLE 0 -#define ON_OTHER 0 -#define IF_WINDOWS(X) X -#define IF_LINUX(X) -#define IF_APPLE(X) -#define IF_OTHER(X) -#define NOT_WINDOWS(X) -#elif defined(__linux__) -#define ON_WINDOWS 0 -#define ON_LINUX 1 -#define ON_APPLE 0 -#define ON_OTHER 0 -#define IF_WINDOWS(X) -#define IF_LINUX(X) X -#define IF_APPLE(X) -#define IF_OTHER(X) -#define NOT_WINDOWS(X) X -#elif defined(__APPLE__) -#define ON_WINDOWS 0 -#define ON_LINUX 0 -#define ON_APPLE 1 -#define ON_OTHER 0 -#define IF_WINDOWS(X) -#define IF_LINUX(X) -#define IF_APPLE(X) X -#define IF_OTHER(X) -#define NOT_WINDOWS(X) X -#else -#define ON_WINDOWS 0 -#define ON_LINUX 0 -#define ON_APPLE 0 -#define ON_OTHER 1 -#define IF_WINDOWS(X) -#define IF_LINUX(X) -#define IF_APPLE(X) -#define IF_OTHER(X) X -#define NOT_WINDOWS(X) X -#endif - -#if ON_LINUX -#include -#endif - -/************************************************************************************ - * enable/disable reporting of performance - * PERF_REPORT_LEVEL - * 0 - no reporting - * 1 - no reporting - * 2 - report total function call time for functions we're tracking - * 3 - optionally report breakdown of function calls (kernel launch, kernel - *time, data copies) - ************************************************************************************/ -#define PERF_COUNT_VERBOSE 1 -#define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n" - -#if ON_WINDOWS - -#if PERF_COUNT_VERBOSE >= 2 -#define PERF_COUNT_START(FUNCT_NAME) \ - char* funct_name = FUNCT_NAME; \ - double elapsed_time_sec; \ - LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, \ - time_sub_end; \ - QueryPerformanceFrequency(&freq); \ - QueryPerformanceCounter(&time_funct_start); \ - time_sub_start = time_funct_start; \ - time_sub_end = time_funct_start; - -#define PERF_COUNT_END \ - QueryPerformanceCounter(&time_funct_end); \ - elapsed_time_sec = (time_funct_end.QuadPart - time_funct_start.QuadPart) / \ - (double)(freq.QuadPart); \ - tprintf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); -#else -#define PERF_COUNT_START(FUNCT_NAME) -#define PERF_COUNT_END -#endif - -#if PERF_COUNT_VERBOSE >= 3 -#define PERF_COUNT_SUB(SUB) \ - QueryPerformanceCounter(&time_sub_end); \ - elapsed_time_sec = (time_sub_end.QuadPart - time_sub_start.QuadPart) / \ - (double)(freq.QuadPart); \ - tprintf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ - time_sub_start = time_sub_end; -#else -#define PERF_COUNT_SUB(SUB) -#endif - -// not on windows -#else - -#if PERF_COUNT_VERBOSE >= 2 -#define PERF_COUNT_START(FUNCT_NAME) \ - char* funct_name = FUNCT_NAME; \ - double elapsed_time_sec; \ - timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \ - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); \ - time_sub_start = time_funct_start; \ - time_sub_end = time_funct_start; - -#define PERF_COUNT_END \ - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); \ - elapsed_time_sec = \ - (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + \ - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; \ - tprintf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); -#else -#define PERF_COUNT_START(FUNCT_NAME) -#define PERF_COUNT_END -#endif - -#if PERF_COUNT_VERBOSE >= 3 -#define PERF_COUNT_SUB(SUB) \ - clock_gettime(CLOCK_MONOTONIC, &time_sub_end); \ - elapsed_time_sec = \ - (time_sub_end.tv_sec - time_sub_start.tv_sec) * 1.0 + \ - (time_sub_end.tv_nsec - time_sub_start.tv_nsec) / 1000000000.0; \ - tprintf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ - time_sub_start = time_sub_end; -#else -#define PERF_COUNT_SUB(SUB) -#endif - -#endif -/************************************************************************** - * enable/disable use of OpenCL - **************************************************************************/ - -#ifdef USE_OPENCL -#include "opencl_device_selection.h" - -#ifndef strcasecmp -#define strcasecmp strcmp -#endif - -#define MAX_KERNEL_STRING_LEN 64 -#define MAX_CLFILE_NUM 50 -#define MAX_CLKERNEL_NUM 200 -#define MAX_KERNEL_NAME_LEN 64 -#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E -#define GROUPSIZE_X 16 -#define GROUPSIZE_Y 16 -#define GROUPSIZE_HMORX 256 -#define GROUPSIZE_HMORY 1 - -struct KernelEnv { - cl_context mpkContext; - cl_command_queue mpkCmdQueue; - cl_program mpkProgram; - cl_kernel mpkKernel; - char mckKernelName[150]; -}; - -struct OpenCLEnv { - cl_platform_id mpOclPlatformID; - cl_context mpOclContext; - cl_device_id mpOclDevsID; - cl_command_queue mpOclCmdQueue; -}; -typedef int (*cl_kernel_function)(void** userdata, KernelEnv* kenv); - -#define CHECK_OPENCL(status, name) \ - if (status != CL_SUCCESS) { \ - tprintf("OpenCL error code is %d at when %s .\n", status, name); \ - } - -struct GPUEnv { - // share vb in all modules in hb library - cl_platform_id mpPlatformID; - cl_device_type mDevType; - cl_context mpContext; - cl_device_id* mpArryDevsID; - cl_device_id mpDevID; - cl_command_queue mpCmdQueue; - cl_kernel mpArryKernels[MAX_CLFILE_NUM]; - cl_program mpArryPrograms[MAX_CLFILE_NUM]; // one program object maps one - // kernel source file - char mArryKnelSrcFile[MAX_CLFILE_NUM] - [256], // the max len of kernel file name is 256 - mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1]; - cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM]; - int mnKernelCount, mnFileCount, // only one kernel file - mnIsUserCreated; // 1: created , 0:no create and needed to create by - // opencl wrapper - int mnKhrFp64Flag; - int mnAmdFp64Flag; -}; - -class OpenclDevice { - public: - static GPUEnv gpuEnv; - static int isInited; - OpenclDevice(); - ~OpenclDevice(); - static int InitEnv(); // load dll, call InitOpenclRunEnv(0) - static int InitOpenclRunEnv( - int argc); // RegistOpenclKernel, double flags, compile kernels - static int InitOpenclRunEnv_DeviceSelection( - int argc); // RegistOpenclKernel, double flags, compile kernels - static int RegistOpenclKernel(); - static int ReleaseOpenclRunEnv(); - static int ReleaseOpenclEnv(GPUEnv* gpuInfo); - static int CompileKernelFile(GPUEnv* gpuInfo, const char* buildOption); - static int CachedOfKernerPrg(const GPUEnv* gpuEnvCached, - const char* clFileName); - static int GeneratBinFromKernelSource(cl_program program, - const char* clFileName); - static int WriteBinaryToFile(const char* fileName, const char* birary, - size_t numBytes); - static int BinaryGenerated(const char* clFileName, FILE** fhandle); - // static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const - // char *buildOption ); - static l_uint32* pixReadFromTiffKernel(l_uint32* tiffdata, l_int32 w, - l_int32 h, l_int32 wpl, - l_uint32* line); - static int composeRGBPixelCl(int* tiffdata, int* line, int h, int w); - - /* OpenCL implementations of Morphological operations*/ - - // Initialization of OCL buffers used in Morph operations - static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix* pixs); - static void releaseMorphCLBuffers(); - - static void pixGetLinesCL(Pix* pixd, Pix* pixs, Pix** pix_vline, - Pix** pix_hline, Pix** pixClosed, bool getpixClosed, - l_int32 close_hsize, l_int32 close_vsize, - l_int32 open_hsize, l_int32 open_vsize, - l_int32 line_hsize, l_int32 line_vsize); - - // int InitOpenclAttr( OpenCLEnv * env ); - // int ReleaseKernel( KernelEnv * env ); - static int SetKernelEnv(KernelEnv* envInfo); - // int CreateKernel( char * kernelname, KernelEnv * env ); - // int RunKernel( const char *kernelName, void **userdata ); - // int ConvertToString( const char *filename, char **source ); - // int CheckKernelName( KernelEnv *envInfo, const char *kernelName ); - // int RegisterKernelWrapper( const char *kernelName, cl_kernel_function - // function ); int RunKernelWrapper( cl_kernel_function function, const char * - // kernelName, void **usrdata ); int GetKernelEnvAndFunc( const char - // *kernelName, KernelEnv *env, cl_kernel_function *function ); - - static int LoadOpencl(); -#ifdef WIN32 - // static int OpenclInite(); - static void FreeOpenclDll(); -#endif - - inline static int AddKernelConfig(int kCount, const char* kName); - - /* for binarization */ - static int HistogramRectOCL(void* imagedata, int bytes_per_pixel, - int bytes_per_line, int left, int top, int width, - int height, int kHistogramSize, - int* histogramAllChannels); - - static int ThresholdRectToPixOCL(unsigned char* imagedata, - int bytes_per_pixel, int bytes_per_line, - int* thresholds, int* hi_values, Pix** pix, - int rect_height, int rect_width, - int rect_top, int rect_left); - - static ds_device getDeviceSelection(); - static ds_device selectedDevice; - static bool deviceIsSelected; - static bool selectedDeviceIsOpenCL(); -}; - -#endif // USE_OPENCL -#endif // TESSERACT_OPENCL_OPENCLWRAPPER_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/Makefile.am b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/Makefile.am deleted file mode 100644 index 56f7bd30..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/Makefile.am +++ /dev/null @@ -1,47 +0,0 @@ -AM_CPPFLAGS += \ - -I$(top_srcdir)/src/ccstruct \ - -I$(top_srcdir)/src/ccutil \ - -I$(top_srcdir)/src/viewer \ - -I$(top_srcdir)/src/ccmain \ - -I$(top_srcdir)/src/wordrec \ - -I$(top_srcdir)/src/api \ - -I$(top_srcdir)/src/cutil \ - -I$(top_srcdir)/src/classify \ - -I$(top_srcdir)/src/dict \ - -I$(top_srcdir)/src/opencl - -AM_CPPFLAGS += $(OPENCL_CPPFLAGS) - -if VISIBILITY -AM_CPPFLAGS += -DTESS_EXPORTS \ - -fvisibility=hidden -fvisibility-inlines-hidden -endif - - -noinst_HEADERS = \ - alignedblob.h baselinedetect.h bbgrid.h blkocc.h blobgrid.h \ - ccnontextdetect.h cjkpitch.h colfind.h colpartition.h colpartitionset.h \ - colpartitiongrid.h \ - devanagari_processing.h drawedg.h drawtord.h edgblob.h edgloop.h \ - equationdetectbase.h \ - fpchop.h gap_map.h imagefind.h linefind.h makerow.h oldbasel.h \ - pithsync.h pitsync1.h scanedg.h sortflts.h strokewidth.h \ - tabfind.h tablefind.h tabvector.h \ - tablerecog.h textlineprojection.h textord.h \ - topitch.h tordmain.h tovars.h \ - underlin.h wordseg.h workingpartset.h - -noinst_LTLIBRARIES = libtesseract_textord.la - -libtesseract_textord_la_SOURCES = \ - alignedblob.cpp baselinedetect.cpp bbgrid.cpp blkocc.cpp blobgrid.cpp \ - ccnontextdetect.cpp cjkpitch.cpp colfind.cpp colpartition.cpp colpartitionset.cpp \ - colpartitiongrid.cpp devanagari_processing.cpp \ - drawedg.cpp drawtord.cpp edgblob.cpp edgloop.cpp \ - equationdetectbase.cpp \ - fpchop.cpp gap_map.cpp imagefind.cpp linefind.cpp makerow.cpp oldbasel.cpp \ - pithsync.cpp pitsync1.cpp scanedg.cpp sortflts.cpp strokewidth.cpp \ - tabfind.cpp tablefind.cpp tabvector.cpp \ - tablerecog.cpp textlineprojection.cpp textord.cpp \ - topitch.cpp tordmain.cpp tospace.cpp tovars.cpp \ - underlin.cpp wordseg.cpp workingpartset.cpp diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/alignedblob.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/alignedblob.cpp deleted file mode 100644 index 9a088dc3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/alignedblob.cpp +++ /dev/null @@ -1,534 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: alignedblob.cpp -// Description: Subclass of BBGrid to find vertically aligned blobs. -// Author: Ray Smith -// Created: Fri Mar 21 15:03:01 PST 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "alignedblob.h" - -#include - -INT_VAR(textord_debug_tabfind, 0, "Debug tab finding"); -INT_VAR(textord_debug_bugs, 0, "Turn on output related to bugs in tab finding"); -INT_VAR(textord_testregion_left, -1, "Left edge of debug reporting rectangle"); -INT_VAR(textord_testregion_top, -1, "Top edge of debug reporting rectangle"); -INT_VAR(textord_testregion_right, INT32_MAX, "Right edge of debug rectangle"); -INT_VAR(textord_testregion_bottom, INT32_MAX, "Bottom edge of debug rectangle"); -BOOL_VAR(textord_debug_printable, false, "Make debug windows printable"); - -namespace tesseract { - -// Fraction of resolution used as alignment tolerance for aligned tabs. -const double kAlignedFraction = 0.03125; -// Fraction of resolution used as alignment tolerance for ragged tabs. -const double kRaggedFraction = 2.5; -// Fraction of height used as a minimum gutter gap for aligned blobs. -const double kAlignedGapFraction = 0.75; -// Fraction of height used as a minimum gutter gap for ragged tabs. -const double kRaggedGapFraction = 1.0; -// Constant number of pixels used as alignment tolerance for line finding. -const int kVLineAlignment = 3; -// Constant number of pixels used as gutter gap tolerance for line finding. -const int kVLineGutter = 1; -// Constant number of pixels used as the search size for line finding. -const int kVLineSearchSize = 150; -// Min number of points to accept for a ragged tab stop. -const int kMinRaggedTabs = 5; -// Min number of points to accept for an aligned tab stop. -const int kMinAlignedTabs = 4; -// Constant number of pixels minimum height of a vertical line. -const int kVLineMinLength = 500; -// Minimum gradient for a vertical tab vector. Used to prune away junk -// tab vectors with what would be a ridiculously large skew angle. -// Value corresponds to tan(90 - max allowed skew angle) -const double kMinTabGradient = 4.0; -// Tolerance to skew on top of current estimate of skew. Divide x or y length -// by kMaxSkewFactor to get the y or x skew distance. -// If the angle is small, the angle in degrees is roughly 60/kMaxSkewFactor. -const int kMaxSkewFactor = 15; - -// Constructor to set the parameters for finding aligned and ragged tabs. -// Vertical_x and vertical_y are the current estimates of the true vertical -// direction (up) in the image. Height is the height of the starter blob. -// v_gap_multiple is the multiple of height that will be used as a limit -// on vertical gap before giving up and calling the line ended. -// resolution is the original image resolution, and align0 indicates the -// type of tab stop to be found. -AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, - int height, int v_gap_multiple, - int min_gutter_width, - int resolution, TabAlignment align0) - : right_tab(align0 == TA_RIGHT_RAGGED || align0 == TA_RIGHT_ALIGNED), - ragged(align0 == TA_LEFT_RAGGED || align0 == TA_RIGHT_RAGGED), - alignment(align0), - confirmed_type(TT_CONFIRMED), - min_length(0) { - // Set the tolerances according to the type of line sought. - // For tab search, these are based on the image resolution for most, or - // the height of the starting blob for the maximum vertical gap. - max_v_gap = height * v_gap_multiple; - if (ragged) { - // In the case of a ragged edge, we are much more generous with the - // inside alignment fraction, but also require a much bigger gutter. - gutter_fraction = kRaggedGapFraction; - if (alignment == TA_RIGHT_RAGGED) { - l_align_tolerance = static_cast(resolution * kRaggedFraction + 0.5); - r_align_tolerance = static_cast(resolution * kAlignedFraction + 0.5); - } else { - l_align_tolerance = static_cast(resolution * kAlignedFraction + 0.5); - r_align_tolerance = static_cast(resolution * kRaggedFraction + 0.5); - } - min_points = kMinRaggedTabs; - } else { - gutter_fraction = kAlignedGapFraction; - l_align_tolerance = static_cast(resolution * kAlignedFraction + 0.5); - r_align_tolerance = static_cast(resolution * kAlignedFraction + 0.5); - min_points = kMinAlignedTabs; - } - min_gutter = static_cast(height * gutter_fraction + 0.5); - if (min_gutter < min_gutter_width) - min_gutter = min_gutter_width; - // Fit the vertical vector into an ICOORD, which is 16 bit. - set_vertical(vertical_x, vertical_y); -} - -// Constructor to set the parameters for finding vertical lines. -// Vertical_x and vertical_y are the current estimates of the true vertical -// direction (up) in the image. Width is the width of the starter blob. -AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, - int width) - : gutter_fraction(0.0), - right_tab(false), - ragged(false), - alignment(TA_SEPARATOR), - confirmed_type(TT_VLINE), - max_v_gap(kVLineSearchSize), - min_gutter(kVLineGutter), - min_points(1), - min_length(kVLineMinLength) { - // Compute threshold for left and right alignment. - l_align_tolerance = std::max(kVLineAlignment, width); - r_align_tolerance = std::max(kVLineAlignment, width); - - // Fit the vertical vector into an ICOORD, which is 16 bit. - set_vertical(vertical_x, vertical_y); -} - -// Fit the vertical vector into an ICOORD, which is 16 bit. -void AlignedBlobParams::set_vertical(int vertical_x, int vertical_y) { - int factor = 1; - if (vertical_y > INT16_MAX) - factor = vertical_y / INT16_MAX + 1; - vertical.set_x(vertical_x / factor); - vertical.set_y(vertical_y / factor); -} - - -AlignedBlob::AlignedBlob(int gridsize, - const ICOORD& bleft, const ICOORD& tright) - : BlobGrid(gridsize, bleft, tright) { -} - -// Return true if the given coordinates are within the test rectangle -// and the debug level is at least the given detail level. -bool AlignedBlob::WithinTestRegion(int detail_level, int x, int y) { - if (textord_debug_tabfind < detail_level) - return false; - return x >= textord_testregion_left && x <= textord_testregion_right && - y <= textord_testregion_top && y >= textord_testregion_bottom; -} - -// Display the tab codes of the BLOBNBOXes in this grid. -ScrollView* AlignedBlob::DisplayTabs(const char* window_name, - ScrollView* tab_win) { -#ifndef GRAPHICS_DISABLED - if (tab_win == nullptr) - tab_win = MakeWindow(0, 50, window_name); - // For every tab in the grid, display it. - GridSearch gsearch(this); - gsearch.StartFullSearch(); - BLOBNBOX* bbox; - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - const TBOX& box = bbox->bounding_box(); - int left_x = box.left(); - int right_x = box.right(); - int top_y = box.top(); - int bottom_y = box.bottom(); - TabType tabtype = bbox->left_tab_type(); - if (tabtype != TT_NONE) { - if (tabtype == TT_MAYBE_ALIGNED) - tab_win->Pen(ScrollView::BLUE); - else if (tabtype == TT_MAYBE_RAGGED) - tab_win->Pen(ScrollView::YELLOW); - else if (tabtype == TT_CONFIRMED) - tab_win->Pen(ScrollView::GREEN); - else - tab_win->Pen(ScrollView::GREY); - tab_win->Line(left_x, top_y, left_x, bottom_y); - } - tabtype = bbox->right_tab_type(); - if (tabtype != TT_NONE) { - if (tabtype == TT_MAYBE_ALIGNED) - tab_win->Pen(ScrollView::MAGENTA); - else if (tabtype == TT_MAYBE_RAGGED) - tab_win->Pen(ScrollView::ORANGE); - else if (tabtype == TT_CONFIRMED) - tab_win->Pen(ScrollView::RED); - else - tab_win->Pen(ScrollView::GREY); - tab_win->Line(right_x, top_y, right_x, bottom_y); - } - } - tab_win->Update(); -#endif - return tab_win; -} - -// Helper returns true if the total number of line_crossings of all the blobs -// in the list is at least 2. -static bool AtLeast2LineCrossings(BLOBNBOX_CLIST* blobs) { - BLOBNBOX_C_IT it(blobs); - int total_crossings = 0; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - total_crossings += it.data()->line_crossings(); - } - return total_crossings >= 2; -} - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -AlignedBlob::~AlignedBlob() = default; - -// Finds a vector corresponding to a set of vertically aligned blob edges -// running through the given box. The type of vector returned and the -// search parameters are determined by the AlignedBlobParams. -// vertical_x and y are updated with an estimate of the real -// vertical direction. (skew finding.) -// Returns nullptr if no decent vector can be found. -TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, - BLOBNBOX* bbox, - int* vertical_x, - int* vertical_y) { - int ext_start_y, ext_end_y; - BLOBNBOX_CLIST good_points; - // Search up and then down from the starting bbox. - TBOX box = bbox->bounding_box(); - bool debug = WithinTestRegion(2, box.left(), box.bottom()); - int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y); - pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y); - BLOBNBOX_C_IT it(&good_points); - it.move_to_last(); - box = it.data()->bounding_box(); - int end_y = box.top(); - int end_x = align_params.right_tab ? box.right() : box.left(); - it.move_to_first(); - box = it.data()->bounding_box(); - int start_x = align_params.right_tab ? box.right() : box.left(); - int start_y = box.bottom(); - // Acceptable tab vectors must have a minimum number of points, - // have a minimum acceptable length, and have a minimum gradient. - // The gradient corresponds to the skew angle. - // Ragged tabs don't need to satisfy the gradient condition, as they - // will always end up parallel to the vertical direction. - bool at_least_2_crossings = AtLeast2LineCrossings(&good_points); - if ((pt_count >= align_params.min_points && - end_y - start_y >= align_params.min_length && - (align_params.ragged || - end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) || - at_least_2_crossings) { - int confirmed_points = 0; - // Count existing confirmed points to see if vector is acceptable. - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - bbox = it.data(); - if (align_params.right_tab) { - if (bbox->right_tab_type() == align_params.confirmed_type) - ++confirmed_points; - } else { - if (bbox->left_tab_type() == align_params.confirmed_type) - ++confirmed_points; - } - } - // Ragged vectors are not allowed to use too many already used points. - if (!align_params.ragged || - confirmed_points + confirmed_points < pt_count) { - const TBOX& box = bbox->bounding_box(); - if (debug) { - tprintf("Confirming tab vector of %d pts starting at %d,%d\n", - pt_count, box.left(), box.bottom()); - } - // Flag all the aligned neighbours as confirmed . - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - bbox = it.data(); - if (align_params.right_tab) { - bbox->set_right_tab_type(align_params.confirmed_type); - } else { - bbox->set_left_tab_type(align_params.confirmed_type); - } - if (debug) { - bbox->bounding_box().print(); - } - } - // Now make the vector and return it. - TabVector* result = TabVector::FitVector(align_params.alignment, - align_params.vertical, - ext_start_y, ext_end_y, - &good_points, - vertical_x, vertical_y); - result->set_intersects_other_lines(at_least_2_crossings); - if (debug) { - tprintf("Box was %d, %d\n", box.left(), box.bottom()); - result->Print("After fitting"); - } - return result; - } else if (debug) { - tprintf("Ragged tab used too many used points: %d out of %d\n", - confirmed_points, pt_count); - } - } else if (debug) { - tprintf("Tab vector failed basic tests: pt count %d vs min %d, " - "length %d vs min %d, min grad %g\n", - pt_count, align_params.min_points, end_y - start_y, - align_params.min_length, abs(end_x - start_x) * kMinTabGradient); - } - return nullptr; -} - -// Find a set of blobs that are aligned in the given vertical -// direction with the given blob. Returns a list of aligned -// blobs and the number in the list. -// For other parameters see FindAlignedBlob below. -int AlignedBlob::AlignTabs(const AlignedBlobParams& params, - bool top_to_bottom, BLOBNBOX* bbox, - BLOBNBOX_CLIST* good_points, int* end_y) { - int ptcount = 0; - BLOBNBOX_C_IT it(good_points); - - TBOX box = bbox->bounding_box(); - bool debug = WithinTestRegion(2, box.left(), box.bottom()); - if (debug) { - tprintf("Starting alignment run at blob:"); - box.print(); - } - int x_start = params.right_tab ? box.right() : box.left(); - while (bbox != nullptr) { - // Add the blob to the list if the appropriate side is a tab candidate, - // or if we are working on a ragged tab. - TabType type = params.right_tab ? bbox->right_tab_type() - : bbox->left_tab_type(); - if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) && - (it.empty() || it.data() != bbox)) { - if (top_to_bottom) - it.add_before_then_move(bbox); - else - it.add_after_then_move(bbox); - ++ptcount; - } - // Find the next blob that is aligned with the current one. - // FindAlignedBlob guarantees that forward progress will be made in the - // top_to_bottom direction, and therefore eventually it will return nullptr, - // making this while (bbox != nullptr) loop safe. - bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y); - if (bbox != nullptr) { - box = bbox->bounding_box(); - if (!params.ragged) - x_start = params.right_tab ? box.right() : box.left(); - } - } - if (debug) { - tprintf("Alignment run ended with %d pts at blob:", ptcount); - box.print(); - } - return ptcount; -} - -// Search vertically for a blob that is aligned with the input bbox. -// The search parameters are determined by AlignedBlobParams. -// top_to_bottom tells whether to search down or up. -// The return value is nullptr if nothing was found in the search box -// or if a blob was found in the gutter. On a nullptr return, end_y -// is set to the edge of the search box or the leading edge of the -// gutter blob if one was found. -BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p, - bool top_to_bottom, BLOBNBOX* bbox, - int x_start, int* end_y) { - TBOX box = bbox->bounding_box(); - // If there are separator lines, get the column edges. - int left_column_edge = bbox->left_rule(); - int right_column_edge = bbox->right_rule(); - // start_y is used to guarantee that forward progress is made and the - // search does not go into an infinite loop. New blobs must extend the - // line beyond start_y. - int start_y = top_to_bottom ? box.bottom() : box.top(); - if (WithinTestRegion(2, x_start, start_y)) { - tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", - box.left(), box.top(), box.right(), box.bottom(), - left_column_edge, right_column_edge); - } - // Compute skew tolerance. - int skew_tolerance = p.max_v_gap / kMaxSkewFactor; - // Calculate xmin and xmax of the search box so that it contains - // all possibly relevant boxes up to p.max_v_gap above or below accoording - // to top_to_bottom. - // Start with a notion of vertical with the current estimate. - int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y()/2) / p.vertical.y(); - if (top_to_bottom) { - x2 = x_start - x2; - *end_y = start_y - p.max_v_gap; - } else { - x2 = x_start + x2; - *end_y = start_y + p.max_v_gap; - } - // Expand the box by an additional skew tolerance - int xmin = std::min(x_start, x2) - skew_tolerance; - int xmax = std::max(x_start, x2) + skew_tolerance; - // Now add direction-specific tolerances. - if (p.right_tab) { - xmax += p.min_gutter; - xmin -= p.l_align_tolerance; - } else { - xmax += p.r_align_tolerance; - xmin -= p.min_gutter; - } - // Setup a vertical search for an aligned blob. - GridSearch vsearch(this); - if (WithinTestRegion(2, x_start, start_y)) - tprintf("Starting %s %s search at %d-%d,%d, search_size=%d, gutter=%d\n", - p.ragged ? "Ragged" : "Aligned", p.right_tab ? "Right" : "Left", - xmin, xmax, start_y, p.max_v_gap, p.min_gutter); - vsearch.StartVerticalSearch(xmin, xmax, start_y); - // result stores the best real return value. - BLOBNBOX* result = nullptr; - // The backup_result is not a tab candidate and can be used if no - // real tab candidate result is found. - BLOBNBOX* backup_result = nullptr; - // neighbour is the blob that is currently being investigated. - BLOBNBOX* neighbour = nullptr; - while ((neighbour = vsearch.NextVerticalSearch(top_to_bottom)) != nullptr) { - if (neighbour == bbox) - continue; - TBOX nbox = neighbour->bounding_box(); - int n_y = (nbox.top() + nbox.bottom()) / 2; - if ((!top_to_bottom && n_y > start_y + p.max_v_gap) || - (top_to_bottom && n_y < start_y - p.max_v_gap)) { - if (WithinTestRegion(2, x_start, start_y)) - tprintf("Neighbour too far at (%d,%d)->(%d,%d)\n", - nbox.left(), nbox.bottom(), nbox.right(), nbox.top()); - break; // Gone far enough. - } - // It is CRITICAL to ensure that forward progress is made, (strictly - // in/decreasing n_y) or the caller could loop infinitely, while - // waiting for a sequence of blobs in a line to end. - // NextVerticalSearch alone does not guarantee this, as there may be - // more than one blob in a grid cell. See comment in AlignTabs. - if ((n_y < start_y) != top_to_bottom || nbox.y_overlap(box)) - continue; // Only look in the required direction. - if (result != nullptr && result->bounding_box().y_gap(nbox) > gridsize()) - return result; // This result is clear. - if (backup_result != nullptr && p.ragged && result == nullptr && - backup_result->bounding_box().y_gap(nbox) > gridsize()) - return backup_result; // This result is clear. - - // If the neighbouring blob is the wrong side of a separator line, then it - // "doesn't exist" as far as we are concerned. - int x_at_n_y = x_start + (n_y - start_y) * p.vertical.x() / p.vertical.y(); - if (x_at_n_y < neighbour->left_crossing_rule() || - x_at_n_y > neighbour->right_crossing_rule()) - continue; // Separator line in the way. - int n_left = nbox.left(); - int n_right = nbox.right(); - int n_x = p.right_tab ? n_right : n_left; - if (WithinTestRegion(2, x_start, start_y)) - tprintf("neighbour at (%d,%d)->(%d,%d), n_x=%d, n_y=%d, xatn=%d\n", - nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), - n_x, n_y, x_at_n_y); - if (p.right_tab && - n_left < x_at_n_y + p.min_gutter && - n_right > x_at_n_y + p.r_align_tolerance && - (p.ragged || n_left < x_at_n_y + p.gutter_fraction * nbox.height())) { - // In the gutter so end of line. - if (bbox->right_tab_type() >= TT_MAYBE_ALIGNED) - bbox->set_right_tab_type(TT_DELETED); - *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); - if (WithinTestRegion(2, x_start, start_y)) - tprintf("gutter\n"); - return nullptr; - } - if (!p.right_tab && - n_left < x_at_n_y - p.l_align_tolerance && - n_right > x_at_n_y - p.min_gutter && - (p.ragged || n_right > x_at_n_y - p.gutter_fraction * nbox.height())) { - // In the gutter so end of line. - if (bbox->left_tab_type() >= TT_MAYBE_ALIGNED) - bbox->set_left_tab_type(TT_DELETED); - *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); - if (WithinTestRegion(2, x_start, start_y)) - tprintf("gutter\n"); - return nullptr; - } - if ((p.right_tab && neighbour->leader_on_right()) || - (!p.right_tab && neighbour->leader_on_left())) - continue; // Neighbours of leaders are not allowed to be used. - if (n_x <= x_at_n_y + p.r_align_tolerance && - n_x >= x_at_n_y - p.l_align_tolerance) { - // Aligned so keep it. If it is a marked tab save it as result, - // otherwise keep it as backup_result to return in case of later failure. - if (WithinTestRegion(2, x_start, start_y)) - tprintf("aligned, seeking%d, l=%d, r=%d\n", - p.right_tab, neighbour->left_tab_type(), - neighbour->right_tab_type()); - TabType n_type = p.right_tab ? neighbour->right_tab_type() - : neighbour->left_tab_type(); - if (n_type != TT_NONE && (p.ragged || n_type != TT_MAYBE_RAGGED)) { - if (result == nullptr) { - result = neighbour; - } else { - // Keep the closest neighbour by Euclidean distance. - // This prevents it from picking a tab blob in another column. - const TBOX& old_box = result->bounding_box(); - int x_diff = p.right_tab ? old_box.right() : old_box.left(); - x_diff -= x_at_n_y; - int y_diff = (old_box.top() + old_box.bottom()) / 2 - start_y; - int old_dist = x_diff * x_diff + y_diff * y_diff; - x_diff = n_x - x_at_n_y; - y_diff = n_y - start_y; - int new_dist = x_diff * x_diff + y_diff * y_diff; - if (new_dist < old_dist) - result = neighbour; - } - } else if (backup_result == nullptr) { - if (WithinTestRegion(2, x_start, start_y)) - tprintf("Backup\n"); - backup_result = neighbour; - } else { - TBOX backup_box = backup_result->bounding_box(); - if ((p.right_tab && backup_box.right() < nbox.right()) || - (!p.right_tab && backup_box.left() > nbox.left())) { - if (WithinTestRegion(2, x_start, start_y)) - tprintf("Better backup\n"); - backup_result = neighbour; - } - } - } - } - return result != nullptr ? result : backup_result; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/alignedblob.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/alignedblob.h deleted file mode 100644 index febd0761..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/alignedblob.h +++ /dev/null @@ -1,126 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: alignedblob.h -// Description: A class to find vertically aligned blobs in a BBGrid, -// and a struct to hold control parameters. -// Author: Ray Smith -// Created: Fri Mar 21 15:03:01 PST 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_ALIGNEDBLOB_H_ -#define TESSERACT_TEXTORD_ALIGNEDBLOB_H_ - -#include "bbgrid.h" -#include "blobbox.h" -#include "strngs.h" -#include "tabvector.h" - -extern INT_VAR_H(textord_debug_bugs, 0, - "Turn on output related to bugs in tab finding"); -extern INT_VAR_H(textord_debug_tabfind, 2, "Debug tab finding"); -extern BOOL_VAR_H(textord_debug_printable, false, - "Make debug windows printable"); - -namespace tesseract { - -// Simple structure to hold the search parameters for AlignedBlob. -// The members are mostly derived from constants, which are -// conditioned on the alignment parameter. -// For finding vertical lines, a different set of constants are -// used, conditioned on the different constructor. -struct AlignedBlobParams { - // Constructor to set the parameters for finding aligned and ragged tabs. - // Vertical_x and vertical_y are the current estimates of the true vertical - // direction (up) in the image. Height is the height of the starter blob. - // v_gap_multiple is the multiple of height that will be used as a limit - // on vertical gap before giving up and calling the line ended. - // resolution is the original image resolution, and align0 indicates the - // type of tab stop to be found. - AlignedBlobParams(int vertical_x, int vertical_y, int height, - int v_gap_multiple, int min_gutter_width, int resolution, - TabAlignment alignment0); - // Constructor to set the parameters for finding vertical lines. - // Vertical_x and vertical_y are the current estimates of the true vertical - // direction (up) in the image. Width is the width of the starter blob. - AlignedBlobParams(int vertical_x, int vertical_y, int width); - - // Fit the vertical vector into an ICOORD, which is 16 bit. - void set_vertical(int vertical_x, int vertical_y); - - double gutter_fraction; // Multiple of height used for min_gutter. - bool right_tab; // We are looking at right edges. - bool ragged; // We are looking for a ragged (vs aligned) edge. - TabAlignment alignment; // The type we are trying to produce. - TabType confirmed_type; // Type to flag blobs if accepted. - int max_v_gap; // Max vertical gap to be tolerated. - int min_gutter; // Minimum gutter between columns. - // Tolerances allowed on horizontal alignment of aligned edges. - int l_align_tolerance; // Left edges. - int r_align_tolerance; // Right edges. - // Conditions for accepting a line. - int min_points; // Minimum number of points to be OK. - int min_length; // Min length of completed line. - - ICOORD vertical; // Current estimate of logical vertical. -}; - -// The AlignedBlob class contains code to find vertically aligned blobs. -// This is factored out into a separate class, so it can be used by both -// vertical line finding (LineFind) and tabstop finding (TabFind). -class AlignedBlob : public BlobGrid { - public: - AlignedBlob(int gridsize, const ICOORD& bleft, const ICOORD& tright); - virtual ~AlignedBlob(); - - // Return true if the given coordinates are within the test rectangle - // and the debug level is at least the given detail level. - static bool WithinTestRegion(int detail_level, int x, int y); - - // Display the tab codes of the BLOBNBOXes in this grid. - ScrollView* DisplayTabs(const char* window_name, ScrollView* tab_win); - - // Finds a vector corresponding to a set of vertically aligned blob edges - // running through the given box. The type of vector returned and the - // search parameters are determined by the AlignedBlobParams. - // vertical_x and y are updated with an estimate of the real - // vertical direction. (skew finding.) - // Returns nullptr if no decent vector can be found. - TabVector* FindVerticalAlignment(AlignedBlobParams align_params, - BLOBNBOX* bbox, - int* vertical_x, int* vertical_y); - - private: - // Find a set of blobs that are aligned in the given vertical - // direction with the given blob. Returns a list of aligned - // blobs and the number in the list. - // For other parameters see FindAlignedBlob below. - int AlignTabs(const AlignedBlobParams& params, - bool top_to_bottom, BLOBNBOX* bbox, - BLOBNBOX_CLIST* good_points, int* end_y); - - // Search vertically for a blob that is aligned with the input bbox. - // The search parameters are determined by AlignedBlobParams. - // top_to_bottom tells whether to search down or up. - // The return value is nullptr if nothing was found in the search box - // or if a blob was found in the gutter. On a nullptr return, end_y - // is set to the edge of the search box or the leading edge of the - // gutter blob if one was found. - BLOBNBOX* FindAlignedBlob(const AlignedBlobParams& p, - bool top_to_bottom, BLOBNBOX* bbox, - int x_start, int* end_y); -}; - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_ALIGNEDBLOB_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/baselinedetect.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/baselinedetect.cpp deleted file mode 100644 index 92536ae8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/baselinedetect.cpp +++ /dev/null @@ -1,863 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: baselinedetect.cpp -// Description: Initial Baseline Determination. -// Copyright 2012 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// Created: Mon Apr 30 10:15:31 PDT 2012 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef _MSC_VER -#define _USE_MATH_DEFINES -#endif // _MSC_VER - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "baselinedetect.h" - -#include -#include // for FLT_MAX -#include -#include "allheaders.h" -#include "blobbox.h" -#include "detlinefit.h" -#include "drawtord.h" -#include "helpers.h" -#include "linlsq.h" -#include "makerow.h" -#include "textord.h" -#include "tprintf.h" -#include "underlin.h" - -// Number of displacement modes kept in displacement_modes_; -const int kMaxDisplacementsModes = 3; -// Number of points to skip when retrying initial fit. -const int kNumSkipPoints = 3; -// Max angle deviation (in radians) allowed to keep the independent baseline. -const double kMaxSkewDeviation = 1.0 / 64; -// Fraction of line spacing estimate for quantization of blob displacements. -const double kOffsetQuantizationFactor = 3.0 / 64; -// Fraction of line spacing estimate for computing blob fit error. -const double kFitHalfrangeFactor = 6.0 / 64; -// Max fraction of line spacing allowed before a baseline counts as badly fitting. -const double kMaxBaselineError = 3.0 / 64; -// Multiple of linespacing that sets max_blob_size in TO_BLOCK. -// Copied from textord_excess_blobsize. -const double kMaxBlobSizeMultiple = 1.3; -// Min fraction of linespacing gaps that should be close to the model before -// we will force the linespacing model on all the lines. -const double kMinFittingLinespacings = 0.25; -// A y-coordinate within a textline that is to be debugged. -//#define kDebugYCoord 1525 - -namespace tesseract { - -BaselineRow::BaselineRow(double line_spacing, TO_ROW* to_row) - : blobs_(to_row->blob_list()), - baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f), - baseline_error_(0.0), good_baseline_(false) { - ComputeBoundingBox(); - // Compute a scale factor for rounding to ints. - disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing; - fit_halfrange_ = kFitHalfrangeFactor * line_spacing; - max_baseline_error_ = kMaxBaselineError * line_spacing; -} - -// Sets the TO_ROW with the output straight line. -void BaselineRow::SetupOldLineParameters(TO_ROW* row) const { - // TODO(rays) get rid of this when m and c are no longer used. - double gradient = tan(BaselineAngle()); - // para_c is the actual intercept of the baseline on the y-axis. - float para_c = StraightYAtX(0.0); - row->set_line(gradient, para_c, baseline_error_); - row->set_parallel_line(gradient, para_c, baseline_error_); -} - -// Outputs diagnostic information. -void BaselineRow::Print() const { - tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n", - baseline_pt1_.x(), baseline_pt1_.y(), - baseline_pt2_.x(), baseline_pt2_.y(), - BaselineAngle(), StraightYAtX(0.0)); - tprintf("Quant factor=%g, error=%g, good=%d, box:", - disp_quant_factor_, baseline_error_, good_baseline_); - bounding_box_.print(); -} - -// Returns the skew angle (in radians) of the current baseline in [-pi,pi]. -double BaselineRow::BaselineAngle() const { - FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_); - double angle = baseline_dir.angle(); - // Baseline directions are only unique in a range of pi so constrain to - // [-pi/2, pi/2]. - return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5; -} - -// Computes and returns the linespacing at the middle of the overlap -// between this and other. -double BaselineRow::SpaceBetween(const BaselineRow& other) const { - // Find the x-centre of overlap of the lines. - float x = (std::max(bounding_box_.left(), other.bounding_box_.left()) + - std::min(bounding_box_.right(), other.bounding_box_.right())) / 2.0f; - // Find the vertical centre between them. - float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f; - // Find the perpendicular distance of (x,y) from each line. - FCOORD pt(x, y); - return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt); -} - -// Computes and returns the displacement of the center of the line -// perpendicular to the given direction. -double BaselineRow::PerpDisp(const FCOORD& direction) const { - float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f; - FCOORD middle_pos(middle_x, StraightYAtX(middle_x)); - return direction * middle_pos / direction.length(); -} - -// Computes the y coordinate at the given x using the straight baseline -// defined by baseline_pt1_ and baseline_pt2__. -double BaselineRow::StraightYAtX(double x) const { - double denominator = baseline_pt2_.x() - baseline_pt1_.x(); - if (denominator == 0.0) - return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0; - return baseline_pt1_.y() + - (x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) / - denominator; -} - -// Fits a straight baseline to the points. Returns true if it had enough -// points to be reasonably sure of the fitted baseline. -// If use_box_bottoms is false, baselines positions are formed by -// considering the outlines of the blobs. -bool BaselineRow::FitBaseline(bool use_box_bottoms) { - // Deterministic fitting is used wherever possible. - fitter_.Clear(); - // Linear least squares is a backup if the DetLineFit produces a bad line. - LLSQ llsq; - BLOBNBOX_IT blob_it(blobs_); - - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - if (!use_box_bottoms) blob->EstimateBaselinePosition(); - const TBOX& box = blob->bounding_box(); - int x_middle = (box.left() + box.right()) / 2; -#ifdef kDebugYCoord - if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) { - tprintf("Box bottom = %d, baseline pos=%d for box at:", - box.bottom(), blob->baseline_position()); - box.print(); - } -#endif - fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2); - llsq.add(x_middle, blob->baseline_position()); - } - // Fit the line. - ICOORD pt1, pt2; - baseline_error_ = fitter_.Fit(&pt1, &pt2); - baseline_pt1_ = pt1; - baseline_pt2_ = pt2; - if (baseline_error_ > max_baseline_error_ && - fitter_.SufficientPointsForIndependentFit()) { - // The fit was bad but there were plenty of points, so try skipping - // the first and last few, and use the new line if it dramatically improves - // the error of fit. - double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2); - if (error < baseline_error_ / 2.0) { - baseline_error_ = error; - baseline_pt1_ = pt1; - baseline_pt2_ = pt2; - } - } - int debug = 0; -#ifdef kDebugYCoord - Print(); - debug = bounding_box_.bottom() < kDebugYCoord && - bounding_box_.top() > kDebugYCoord - ? 3 : 2; -#endif - // Now we obtained a direction from that fit, see if we can improve the - // fit using the same direction and some other start point. - FCOORD direction(pt2 - pt1); - double target_offset = direction * pt1; - good_baseline_ = false; - FitConstrainedIfBetter(debug, direction, 0.0, target_offset); - // Wild lines can be produced because DetLineFit allows vertical lines, but - // vertical text has been rotated so angles over pi/4 should be disallowed. - // Near vertical lines can still be produced by vertically aligned components - // on very short lines. - double angle = BaselineAngle(); - if (fabs(angle) > M_PI * 0.25) { - // Use the llsq fit as a backup. - baseline_pt1_ = llsq.mean_point(); - baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m()); - // TODO(rays) get rid of this when m and c are no longer used. - double m = llsq.m(); - double c = llsq.c(m); - baseline_error_ = llsq.rms(m, c); - good_baseline_ = false; - } - return good_baseline_; -} - -// Modifies an existing result of FitBaseline to be parallel to the given -// direction vector if that produces a better result. -void BaselineRow::AdjustBaselineToParallel(int debug, - const FCOORD& direction) { - SetupBlobDisplacements(direction); - if (displacement_modes_.empty()) - return; -#ifdef kDebugYCoord - if (bounding_box_.bottom() < kDebugYCoord && - bounding_box_.top() > kDebugYCoord && debug < 3) - debug = 3; -#endif - FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]); -} - -// Modifies the baseline to snap to the textline grid if the existing -// result is not good enough. -double BaselineRow::AdjustBaselineToGrid(int debug, - const FCOORD& direction, - double line_spacing, - double line_offset) { - if (blobs_->empty()) { - if (debug > 1) { - tprintf("Row empty at:"); - bounding_box_.print(); - } - return line_offset; - } - // Find the displacement_modes_ entry nearest to the grid. - double best_error = 0.0; - int best_index = -1; - for (int i = 0; i < displacement_modes_.size(); ++i) { - double blob_y = displacement_modes_[i]; - double error = BaselineBlock::SpacingModelError(blob_y, line_spacing, - line_offset); - if (debug > 1) { - tprintf("Mode at %g has error %g from model \n", blob_y, error); - } - if (best_index < 0 || error < best_error) { - best_error = error; - best_index = i; - } - } - // We will move the baseline only if the chosen mode is close enough to the - // model. - double model_margin = max_baseline_error_ - best_error; - if (best_index >= 0 && model_margin > 0.0) { - // But if the current baseline is already close to the mode there is no - // point, and only the potential to damage accuracy by changing its angle. - double perp_disp = PerpDisp(direction); - double shift = displacement_modes_[best_index] - perp_disp; - if (fabs(shift) > max_baseline_error_) { - if (debug > 1) { - tprintf("Attempting linespacing model fit with mode %g to row at:", - displacement_modes_[best_index]); - bounding_box_.print(); - } - FitConstrainedIfBetter(debug, direction, model_margin, - displacement_modes_[best_index]); - } else if (debug > 1) { - tprintf("Linespacing model only moves current line by %g for row at:", - shift); - bounding_box_.print(); - } - } else if (debug > 1) { - tprintf("Linespacing model not close enough to any mode for row at:"); - bounding_box_.print(); - } - return fmod(PerpDisp(direction), line_spacing); -} - -// Sets up displacement_modes_ with the top few modes of the perpendicular -// distance of each blob from the given direction vector, after rounding. -void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) { - // Set of perpendicular displacements of the blob bottoms from the required - // baseline direction. - GenericVector perp_blob_dists; - displacement_modes_.truncate(0); - // Gather the skew-corrected position of every blob. - double min_dist = FLT_MAX; - double max_dist = -FLT_MAX; - BLOBNBOX_IT blob_it(blobs_); -#ifdef kDebugYCoord - bool debug = false; -#endif - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - const TBOX& box = blob->bounding_box(); -#ifdef kDebugYCoord - if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true; -#endif - FCOORD blob_pos((box.left() + box.right()) / 2.0f, - blob->baseline_position()); - double offset = direction * blob_pos; - perp_blob_dists.push_back(offset); -#ifdef kDebugYCoord - if (debug) { - tprintf("Displacement %g for blob at:", offset); - box.print(); - } -#endif - UpdateRange(offset, &min_dist, &max_dist); - } - // Set up a histogram using disp_quant_factor_ as the bucket size. - STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_), - IntCastRounded(max_dist / disp_quant_factor_) + 1); - for (int i = 0; i < perp_blob_dists.size(); ++i) { - dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1); - } - GenericVector > scaled_modes; - dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes); -#ifdef kDebugYCoord - if (debug) { - for (int i = 0; i < scaled_modes.size(); ++i) { - tprintf("Top mode = %g * %d\n", - scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data); - } - } -#endif - for (int i = 0; i < scaled_modes.size(); ++i) - displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key); -} - -// Fits a line in the given direction to blobs that are close to the given -// target_offset perpendicular displacement from the direction. The fit -// error is allowed to be cheat_allowance worse than the existing fit, and -// will still be used. -// If cheat_allowance > 0, the new fit will be good and replace the current -// fit if it has better fit (with cheat) OR its error is below -// max_baseline_error_ and the old fit is marked bad. -// Otherwise the new fit will only replace the old if it is really better, -// or the old fit is marked bad and the new fit has sufficient points, as -// well as being within the max_baseline_error_. -void BaselineRow::FitConstrainedIfBetter(int debug, - const FCOORD& direction, - double cheat_allowance, - double target_offset) { - double halfrange = fit_halfrange_ * direction.length(); - double min_dist = target_offset - halfrange; - double max_dist = target_offset + halfrange; - ICOORD line_pt; - double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist, - debug > 2, &line_pt); - // Allow cheat_allowance off the new error - new_error -= cheat_allowance; - double old_angle = BaselineAngle(); - double new_angle = direction.angle(); - if (debug > 1) { - tprintf("Constrained error = %g, original = %g", - new_error, baseline_error_); - tprintf(" angles = %g, %g, delta=%g vs threshold %g\n", - old_angle, new_angle, - new_angle - old_angle, kMaxSkewDeviation); - } - bool new_good_baseline = new_error <= max_baseline_error_ && - (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit()); - // The new will replace the old if any are true: - // 1. the new error is better - // 2. the old is NOT good, but the new is - // 3. there is a wild angular difference between them (assuming that the new - // is a better guess at the angle.) - if (new_error <= baseline_error_ || - (!good_baseline_ && new_good_baseline) || - fabs(new_angle - old_angle) > kMaxSkewDeviation) { - baseline_error_ = new_error; - baseline_pt1_ = line_pt; - baseline_pt2_ = baseline_pt1_ + direction; - good_baseline_ = new_good_baseline; - if (debug > 1) { - tprintf("Replacing with constrained baseline, good = %d\n", - good_baseline_); - } - } else if (debug > 1) { - tprintf("Keeping old baseline\n"); - } -} - -// Returns the perpendicular distance of the point from the straight -// baseline. -double BaselineRow::PerpDistanceFromBaseline(const FCOORD& pt) const { - FCOORD baseline_vector(baseline_pt2_ - baseline_pt1_); - FCOORD offset_vector(pt - baseline_pt1_); - double distance = baseline_vector * offset_vector; - return sqrt(distance * distance / baseline_vector.sqlength()); -} - -// Computes the bounding box of the row. -void BaselineRow::ComputeBoundingBox() { - BLOBNBOX_IT it(blobs_); - TBOX box; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - box += it.data()->bounding_box(); - } - bounding_box_ = box; -} - - -BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block) - : block_(block), debug_level_(debug_level), non_text_block_(non_text), - good_skew_angle_(false), skew_angle_(0.0), - line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) { - TO_ROW_IT row_it(block_->get_rows()); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - // Sort the blobs on the rows. - row_it.data()->blob_list()->sort(blob_x_order); - rows_.push_back(new BaselineRow(block->line_spacing, row_it.data())); - } -} - -// Computes and returns the absolute error of the given perp_disp from the -// given linespacing model. -double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing, - double line_offset) { - // Round to the nearest multiple of line_spacing + line offset. - int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing); - double model_y = line_spacing * multiple + line_offset; - return fabs(perp_disp - model_y); -} - -// Fits straight line baselines and computes the skew angle from the -// median angle. Returns true if a good angle is found. -// If use_box_bottoms is false, baseline positions are formed by -// considering the outlines of the blobs. -bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) { - if (non_text_block_) return false; - GenericVector angles; - for (int r = 0; r < rows_.size(); ++r) { - BaselineRow* row = rows_[r]; - if (row->FitBaseline(use_box_bottoms)) { - double angle = row->BaselineAngle(); - angles.push_back(angle); - } - if (debug_level_ > 1) - row->Print(); - } - - if (!angles.empty()) { - skew_angle_ = MedianOfCircularValues(M_PI, &angles); - good_skew_angle_ = true; - } else { - skew_angle_ = 0.0f; - good_skew_angle_ = false; - } - if (debug_level_ > 0) { - tprintf("Initial block skew angle = %g, good = %d\n", - skew_angle_, good_skew_angle_); - } - return good_skew_angle_; -} - -// Refits the baseline to a constrained angle, using the stored block -// skew if good enough, otherwise the supplied default skew. -void BaselineBlock::ParallelizeBaselines(double default_block_skew) { - if (non_text_block_) return; - if (!good_skew_angle_) skew_angle_ = default_block_skew; - if (debug_level_ > 0) - tprintf("Adjusting block to skew angle %g\n", skew_angle_); - FCOORD direction(cos(skew_angle_), sin(skew_angle_)); - for (int r = 0; r < rows_.size(); ++r) { - BaselineRow* row = rows_[r]; - row->AdjustBaselineToParallel(debug_level_, direction); - if (debug_level_ > 1) - row->Print(); - } - if (rows_.size() < 3 || !ComputeLineSpacing()) - return; - // Enforce the line spacing model on all lines that don't yet have a good - // baseline. - // Start by finding the row that is best fitted to the model. - int best_row = 0; - double best_error = SpacingModelError(rows_[0]->PerpDisp(direction), - line_spacing_, line_offset_); - for (int r = 1; r < rows_.size(); ++r) { - double error = SpacingModelError(rows_[r]->PerpDisp(direction), - line_spacing_, line_offset_); - if (error < best_error) { - best_error = error; - best_row = r; - } - } - // Starting at the best fitting row, work outwards, syncing the offset. - double offset = line_offset_; - for (int r = best_row + 1; r < rows_.size(); ++r) { - offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, - line_spacing_, offset); - } - offset = line_offset_; - for (int r = best_row - 1; r >= 0; --r) { - offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, - line_spacing_, offset); - } -} - -// Sets the parameters in TO_BLOCK that are needed by subsequent processes. -void BaselineBlock::SetupBlockParameters() const { - if (line_spacing_ > 0.0) { - // Where was block_line_spacing set before? - float min_spacing = std::min(block_->line_spacing, static_cast(line_spacing_)); - if (min_spacing < block_->line_size) - block_->line_size = min_spacing; - block_->line_spacing = line_spacing_; - block_->baseline_offset = line_offset_; - block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple; - } - // Setup the parameters on all the rows. - TO_ROW_IT row_it(block_->get_rows()); - for (int r = 0; r < rows_.size(); ++r, row_it.forward()) { - BaselineRow* row = rows_[r]; - TO_ROW* to_row = row_it.data(); - row->SetupOldLineParameters(to_row); - } -} - -// Processing that is required before fitting baseline splines, but requires -// linear baselines in order to be successful: -// Removes noise if required -// Separates out underlines -// Pre-associates blob fragments. -// TODO(rays/joeliu) This entire section of code is inherited from the past -// and could be improved/eliminated. -// page_tr is used to size a debug window. -void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) { - if (non_text_block_) return; - if (remove_noise) { - vigorous_noise_removal(block_); - } - FCOORD rotation(1.0f, 0.0f); - double gradient = tan(skew_angle_); - separate_underlines(block_, gradient, rotation, true); - pre_associate_blobs(page_tr, block_, rotation, true); -} - -// Fits splines to the textlines, or creates fake QSPLINES from the straight -// baselines that are already on the TO_ROWs. -// As a side-effect, computes the xheights of the rows and the block. -// Although x-height estimation is conceptually separate, it is part of -// detecting perspective distortion and therefore baseline fitting. -void BaselineBlock::FitBaselineSplines(bool enable_splines, - bool show_final_rows, - Textord* textord) { - double gradient = tan(skew_angle_); - FCOORD rotation(1.0f, 0.0f); - - if (enable_splines) { - textord->make_spline_rows(block_, gradient, show_final_rows); - } else { - // Make a fake spline from the existing line. - TBOX block_box= block_->block->pdblk.bounding_box(); - TO_ROW_IT row_it = block_->get_rows(); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - TO_ROW* row = row_it.data(); - int32_t xstarts[2] = { block_box.left(), block_box.right() }; - double coeffs[3] = { 0.0, row->line_m(), row->line_c() }; - row->baseline = QSPLINE(1, xstarts, coeffs); - textord->compute_row_xheight(row, block_->block->classify_rotation(), - row->line_m(), block_->line_size); - } - } - textord->compute_block_xheight(block_, gradient); - block_->block->set_xheight(block_->xheight); - if (textord_restore_underlines) // fix underlines - restore_underlined_blobs(block_); -} - -// Draws the (straight) baselines and final blobs colored according to -// what was discarded as noise and what is associated with each row. -void BaselineBlock::DrawFinalRows(const ICOORD& page_tr) { -#ifndef GRAPHICS_DISABLED - if (non_text_block_) return; - double gradient = tan(skew_angle_); - FCOORD rotation(1.0f, 0.0f); - int left_edge = block_->block->pdblk.bounding_box().left(); - ScrollView* win = create_to_win(page_tr); - ScrollView::Color colour = ScrollView::RED; - TO_ROW_IT row_it = block_->get_rows(); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation); - colour = static_cast(colour + 1); - if (colour > ScrollView::MAGENTA) - colour = ScrollView::RED; - } - plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE); - // Show discarded blobs. - plot_blob_list(win, &block_->underlines, - ScrollView::YELLOW, ScrollView::CORAL); - if (block_->blobs.length() > 0) - tprintf("%d blobs discarded as noise\n", block_->blobs.length()); - draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation); -#endif -} - -void BaselineBlock::DrawPixSpline(Pix* pix_in) { - if (non_text_block_) return; - TO_ROW_IT row_it = block_->get_rows(); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - row_it.data()->baseline.plot(pix_in); - } -} - -// Top-level line-spacing calculation. Computes an estimate of the line- -// spacing, using the current baselines in the TO_ROWS of the block, and -// then refines it by fitting a regression line to the baseline positions -// as a function of their integer index. -// Returns true if it seems that the model is a reasonable fit to the -// observations. -bool BaselineBlock::ComputeLineSpacing() { - FCOORD direction(cos(skew_angle_), sin(skew_angle_)); - GenericVector row_positions; - ComputeBaselinePositions(direction, &row_positions); - if (row_positions.size() < 2) return false; - EstimateLineSpacing(); - RefineLineSpacing(row_positions); - // Verify that the model is reasonable. - double max_baseline_error = kMaxBaselineError * line_spacing_; - int non_trivial_gaps = 0; - int fitting_gaps = 0; - for (int i = 1; i < row_positions.size(); ++i) { - double row_gap = fabs(row_positions[i - 1] - row_positions[i]); - if (row_gap > max_baseline_error) { - ++non_trivial_gaps; - if (fabs(row_gap - line_spacing_) <= max_baseline_error) - ++fitting_gaps; - } - } - if (debug_level_ > 0) { - tprintf("Spacing %g, in %d rows, %d gaps fitted out of %d non-trivial\n", - line_spacing_, row_positions.size(), fitting_gaps, - non_trivial_gaps); - } - return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings; -} - -// Computes the deskewed vertical position of each baseline in the block and -// stores them in the given vector. -// This is calculated as the perpendicular distance of the middle of each -// baseline (in case it has a different skew angle) from the line passing -// through the origin parallel to the block baseline angle. -// NOTE that "distance" above is a signed quantity so we can tell which side -// of the block baseline a line sits, hence the function and argument name -// positions not distances. -void BaselineBlock::ComputeBaselinePositions(const FCOORD& direction, - GenericVector* positions) { - positions->clear(); - for (int r = 0; r < rows_.size(); ++r) { - BaselineRow* row = rows_[r]; - const TBOX& row_box = row->bounding_box(); - float x_middle = (row_box.left() + row_box.right()) / 2.0f; - FCOORD row_pos(x_middle, static_cast(row->StraightYAtX(x_middle))); - float offset = direction * row_pos; - positions->push_back(offset); - } -} - -// Computes an estimate of the line spacing of the block from the median -// of the spacings between adjacent overlapping textlines. -void BaselineBlock::EstimateLineSpacing() { - GenericVector spacings; - for (int r = 0; r < rows_.size(); ++r) { - BaselineRow* row = rows_[r]; - // Exclude silly lines. - if (fabs(row->BaselineAngle()) > M_PI * 0.25) continue; - // Find the first row after row that overlaps it significantly. - const TBOX& row_box = row->bounding_box(); - int r2; - for (r2 = r + 1; r2 < rows_.size() && - !row_box.major_x_overlap(rows_[r2]->bounding_box()); - ++r2); - if (r2 < rows_.size()) { - BaselineRow* row2 = rows_[r2]; - // Exclude silly lines. - if (fabs(row2->BaselineAngle()) > M_PI * 0.25) continue; - float spacing = row->SpaceBetween(*row2); - spacings.push_back(spacing); - } - } - // If we have at least one value, use it, otherwise leave the previous - // value unchanged. - if (!spacings.empty()) { - line_spacing_ = spacings[spacings.choose_nth_item(spacings.size() / 2)]; - if (debug_level_ > 1) - tprintf("Estimate of linespacing = %g\n", line_spacing_); - } -} - -// Refines the line spacing of the block by fitting a regression -// line to the deskewed y-position of each baseline as a function of its -// estimated line index, allowing for a small error in the initial linespacing -// and choosing the best available model. -void BaselineBlock::RefineLineSpacing(const GenericVector& positions) { - double spacings[3], offsets[3], errors[3]; - int index_range; - errors[0] = FitLineSpacingModel(positions, line_spacing_, - &spacings[0], &offsets[0], &index_range); - if (index_range > 1) { - double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range); - // Try the hypotheses that there might be index_range +/- 1 line spaces. - errors[1] = FitLineSpacingModel(positions, spacing_plus, - &spacings[1], &offsets[1], nullptr); - double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range); - errors[2] = FitLineSpacingModel(positions, spacing_minus, - &spacings[2], &offsets[2], nullptr); - for (int i = 1; i <= 2; ++i) { - if (errors[i] < errors[0]) { - spacings[0] = spacings[i]; - offsets[0] = offsets[i]; - errors[0] = errors[i]; - } - } - } - if (spacings[0] > 0.0) { - line_spacing_ = spacings[0]; - line_offset_ = offsets[0]; - model_error_ = errors[0]; - if (debug_level_ > 0) { - tprintf("Final linespacing model = %g + offset %g, error %g\n", - line_spacing_, line_offset_, model_error_); - } - } -} - -// Given an initial estimate of line spacing (m_in) and the positions of each -// baseline, computes the line spacing of the block more accurately in m_out, -// and the corresponding intercept in c_out, and the number of spacings seen -// in index_delta. Returns the error of fit to the line spacing model. -// Uses a simple linear regression, but optimized the offset using the median. -double BaselineBlock::FitLineSpacingModel( - const GenericVector& positions, double m_in, - double* m_out, double* c_out, int* index_delta) { - if (m_in == 0.0f || positions.size() < 2) { - *m_out = m_in; - *c_out = 0.0; - if (index_delta != nullptr) *index_delta = 0; - return 0.0; - } - GenericVector offsets; - // Get the offset (remainder) linespacing for each line and choose the median. - for (int i = 0; i < positions.size(); ++i) - offsets.push_back(fmod(positions[i], m_in)); - // Get the median offset. - double median_offset = MedianOfCircularValues(m_in, &offsets); - // Now fit a line to quantized line number and offset. - LLSQ llsq; - int min_index = INT32_MAX; - int max_index = -INT32_MAX; - for (int i = 0; i < positions.size(); ++i) { - double y_pos = positions[i]; - int row_index = IntCastRounded((y_pos - median_offset) / m_in); - UpdateRange(row_index, &min_index, &max_index); - llsq.add(row_index, y_pos); - } - // Get the refined line spacing. - *m_out = llsq.m(); - // Use the median offset rather than the mean. - offsets.truncate(0); - for (int i = 0; i < positions.size(); ++i) - offsets.push_back(fmod(positions[i], *m_out)); - // Get the median offset. - if (debug_level_ > 2) { - for (int i = 0; i < offsets.size(); ++i) - tprintf("%d: %g\n", i, offsets[i]); - } - *c_out = MedianOfCircularValues(*m_out, &offsets); - if (debug_level_ > 1) { - tprintf("Median offset = %g, compared to mean of %g.\n", - *c_out, llsq.c(*m_out)); - } - // Index_delta is the number of hypothesized line gaps present. - if (index_delta != nullptr) - *index_delta = max_index - min_index; - // Use the regression model's intercept to compute the error, as it may be - // a full line-spacing in disagreement with the median. - double rms_error = llsq.rms(*m_out, llsq.c(*m_out)); - if (debug_level_ > 1) { - tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n", - m_in, median_offset, *m_out, *c_out, rms_error); - } - return rms_error; -} - -BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew, - TO_BLOCK_LIST* blocks) - : page_skew_(page_skew), debug_level_(debug_level) { - TO_BLOCK_IT it(blocks); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TO_BLOCK* to_block = it.data(); - BLOCK* block = to_block->block; - POLY_BLOCK* pb = block->pdblk.poly_block(); - // A note about non-text blocks. - // On output, non-text blocks are supposed to contain a single empty word - // in each incoming text line. These mark out the polygonal bounds of the - // block. Ideally no baselines should be required, but currently - // make_words crashes if a baseline and xheight are not provided, so we - // include non-text blocks here, but flag them for special treatment. - bool non_text = pb != nullptr && !pb->IsText(); - blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block)); - } -} - -// Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers -// block-wise and page-wise data to smooth small blocks/rows, and applies -// smoothing based on block/page-level skew and block-level linespacing. -void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) { - GenericVector block_skew_angles; - for (int i = 0; i < blocks_.size(); ++i) { - BaselineBlock* bl_block = blocks_[i]; - if (debug_level_ > 0) - tprintf("Fitting initial baselines...\n"); - if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) { - block_skew_angles.push_back(bl_block->skew_angle()); - } - } - // Compute a page-wide default skew for blocks with too little information. - double default_block_skew = page_skew_.angle(); - if (!block_skew_angles.empty()) { - default_block_skew = MedianOfCircularValues(M_PI, &block_skew_angles); - } - if (debug_level_ > 0) { - tprintf("Page skew angle = %g\n", default_block_skew); - } - // Set bad lines in each block to the default block skew and then force fit - // a linespacing model where it makes sense to do so. - for (int i = 0; i < blocks_.size(); ++i) { - BaselineBlock* bl_block = blocks_[i]; - bl_block->ParallelizeBaselines(default_block_skew); - bl_block->SetupBlockParameters(); // This replaced compute_row_stats. - } -} - -// Computes the baseline splines for each TO_ROW in each TO_BLOCK and -// other associated side-effects, including pre-associating blobs, computing -// x-heights and displaying debug information. -// NOTE that ComputeStraightBaselines must have been called first as this -// sets up data in the TO_ROWs upon which this function depends. -void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD& page_tr, - bool enable_splines, - bool remove_noise, - bool show_final_rows, - Textord* textord) { - for (int i = 0; i < blocks_.size(); ++i) { - BaselineBlock* bl_block = blocks_[i]; - if (enable_splines) - bl_block->PrepareForSplineFitting(page_tr, remove_noise); - bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord); - if (show_final_rows) { - bl_block->DrawFinalRows(page_tr); - } - } -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/baselinedetect.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/baselinedetect.h deleted file mode 100644 index 325922e1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/baselinedetect.h +++ /dev/null @@ -1,277 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: baselinedetect.h -// Description: Initial Baseline Determination. -// Copyright 2012 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// Created: Mon Apr 30 10:03:19 PDT 2012 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_BASELINEDETECT_H_ -#define TESSERACT_TEXTORD_BASELINEDETECT_H_ - -#include "detlinefit.h" -#include "genericvector.h" -#include "points.h" -#include "rect.h" -#include "strngs.h" - -class BLOBNBOX_LIST; -class TO_BLOCK; -class TO_BLOCK_LIST; -class TO_ROW; -struct Pix; - -namespace tesseract { - -class Textord; - -// Class to compute and hold baseline data for a TO_ROW. -class BaselineRow { - public: - BaselineRow(double line_size, TO_ROW* to_row); - - const TBOX& bounding_box() const { - return bounding_box_; - } - // Sets the TO_ROW with the output straight line. - void SetupOldLineParameters(TO_ROW* row) const; - - // Outputs diagnostic information. - void Print() const; - - // Returns the skew angle (in radians) of the current baseline in [-pi,pi]. - double BaselineAngle() const; - // Computes and returns the linespacing at the middle of the overlap - // between this and other. - double SpaceBetween(const BaselineRow& other) const; - // Computes and returns the displacement of the center of the line - // perpendicular to the given direction. - double PerpDisp(const FCOORD& direction) const; - // Computes the y coordinate at the given x using the straight baseline - // defined by baseline1_ and baseline2_. - double StraightYAtX(double x) const; - - // Fits a straight baseline to the points. Returns true if it had enough - // points to be reasonably sure of the fitted baseline. - // If use_box_bottoms is false, baselines positions are formed by - // considering the outlines of the blobs. - bool FitBaseline(bool use_box_bottoms); - // Modifies an existing result of FitBaseline to be parallel to the given - // vector if that produces a better result. - void AdjustBaselineToParallel(int debug, const FCOORD& direction); - // Modifies the baseline to snap to the textline grid if the existing - // result is not good enough. - double AdjustBaselineToGrid(int debug, const FCOORD& direction, - double line_spacing, double line_offset); - - private: - // Sets up displacement_modes_ with the top few modes of the perpendicular - // distance of each blob from the given direction vector, after rounding. - void SetupBlobDisplacements(const FCOORD& direction); - - // Fits a line in the given direction to blobs that are close to the given - // target_offset perpendicular displacement from the direction. The fit - // error is allowed to be cheat_allowance worse than the existing fit, and - // will still be used. - // If cheat_allowance > 0, the new fit will be good and replace the current - // fit if it has better fit (with cheat) OR its error is below - // max_baseline_error_ and the old fit is marked bad. - // Otherwise the new fit will only replace the old if it is really better, - // or the old fit is marked bad and the new fit has sufficient points, as - // well as being within the max_baseline_error_. - void FitConstrainedIfBetter(int debug, const FCOORD& direction, - double cheat_allowance, - double target_offset); - // Returns the perpendicular distance of the point from the straight - // baseline. - double PerpDistanceFromBaseline(const FCOORD& pt) const; - // Computes the bounding box of the row. - void ComputeBoundingBox(); - - // The blobs of the row to which this BaselineRow adds extra information - // during baseline fitting. Note that blobs_ could easily come from either - // a TO_ROW or a ColPartition. - BLOBNBOX_LIST* blobs_; - // Bounding box of all the blobs. - TBOX bounding_box_; - // Fitter used to fit lines to the blobs. - DetLineFit fitter_; - // 2 points on the straight baseline. - FCOORD baseline_pt1_; - FCOORD baseline_pt2_; - // Set of modes of displacements. They indicate preferable baseline positions. - GenericVector displacement_modes_; - // Quantization factor used for displacement_modes_. - double disp_quant_factor_; - // Half the acceptance range of blob displacements for computing the - // error during a constrained fit. - double fit_halfrange_; - // Max baseline error before a line is regarded as fitting badly. - double max_baseline_error_; - // The error of fit of the baseline. - double baseline_error_; - // True if this row seems to have a good baseline. - bool good_baseline_; -}; - -// Class to compute and hold baseline data for a TO_BLOCK. -class BaselineBlock { - public: - BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block); - - TO_BLOCK* block() const { - return block_; - } - double skew_angle() const { - return skew_angle_; - } - - // Computes and returns the absolute error of the given perp_disp from the - // given linespacing model. - static double SpacingModelError(double perp_disp, double line_spacing, - double line_offset); - - // Fits straight line baselines and computes the skew angle from the - // median angle. Returns true if a good angle is found. - // If use_box_bottoms is false, baseline positions are formed by - // considering the outlines of the blobs. - bool FitBaselinesAndFindSkew(bool use_box_bottoms); - - // Refits the baseline to a constrained angle, using the stored block - // skew if good enough, otherwise the supplied default skew. - void ParallelizeBaselines(double default_block_skew); - - // Sets the parameters in TO_BLOCK that are needed by subsequent processes. - void SetupBlockParameters() const; - - // Processing that is required before fitting baseline splines, but requires - // linear baselines in order to be successful: - // Removes noise if required - // Separates out underlines - // Pre-associates blob fragments. - // TODO(rays/joeliu) This entire section of code is inherited from the past - // and could be improved/eliminated. - // page_tr is used to size a debug window. - void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise); - - // Fits splines to the textlines, or creates fake QSPLINES from the straight - // baselines that are already on the TO_ROWs. - // As a side-effect, computes the xheights of the rows and the block. - // Although x-height estimation is conceptually separate, it is part of - // detecting perspective distortion and therefore baseline fitting. - void FitBaselineSplines(bool enable_splines, bool show_final_rows, - Textord* textord); - - // Draws the (straight) baselines and final blobs colored according to - // what was discarded as noise and what is associated with each row. - void DrawFinalRows(const ICOORD& page_tr); - - // Render the generated spline baselines for this block on pix_in. - void DrawPixSpline(Pix* pix_in); - - private: - // Top-level line-spacing calculation. Computes an estimate of the line- - // spacing, using the current baselines in the TO_ROWS of the block, and - // then refines it by fitting a regression line to the baseline positions - // as a function of their integer index. - // Returns true if it seems that the model is a reasonable fit to the - // observations. - bool ComputeLineSpacing(); - - // Computes the deskewed vertical position of each baseline in the block and - // stores them in the given vector. - void ComputeBaselinePositions(const FCOORD& direction, - GenericVector* positions); - - // Computes an estimate of the line spacing of the block from the median - // of the spacings between adjacent overlapping textlines. - void EstimateLineSpacing(); - - // Refines the line spacing of the block by fitting a regression - // line to the deskewed y-position of each baseline as a function of its - // estimated line index, allowing for a small error in the initial linespacing - // and choosing the best available model. - void RefineLineSpacing(const GenericVector& positions); - - // Given an initial estimate of line spacing (m_in) and the positions of each - // baseline, computes the line spacing of the block more accurately in m_out, - // and the corresponding intercept in c_out, and the number of spacings seen - // in index_delta. Returns the error of fit to the line spacing model. - double FitLineSpacingModel(const GenericVector& positions, - double m_in, double* m_out, double* c_out, - int* index_delta); - - - // The block to which this class adds extra information used during baseline - // calculation. - TO_BLOCK* block_; - // The rows in the block that we will be working with. - PointerVector rows_; - // Amount of debugging output to provide. - int debug_level_; - // True if the block is non-text (graphic). - bool non_text_block_; - // True if the block has at least one good enough baseline to compute the - // skew angle and therefore skew_angle_ is valid. - bool good_skew_angle_; - // Angle of skew in radians using the conventional anticlockwise from x-axis. - double skew_angle_; - // Current best estimate line spacing in pixels perpendicular to skew_angle_. - double line_spacing_; - // Offset for baseline positions, in pixels. Each baseline is at - // line_spacing_ * n + line_offset_ for integer n, which represents - // [textline] line number in a line numbering system that has line 0 on or - // at least near the x-axis. Not equal to the actual line number of a line - // within a block as most blocks are not near the x-axis. - double line_offset_; - // The error of the line spacing model. - double model_error_; -}; - -class BaselineDetect { - public: - BaselineDetect(int debug_level, const FCOORD& page_skew, - TO_BLOCK_LIST* blocks); - - ~BaselineDetect() = default; - - // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers - // block-wise and page-wise data to smooth small blocks/rows, and applies - // smoothing based on block/page-level skew and block-level linespacing. - void ComputeStraightBaselines(bool use_box_bottoms); - - // Computes the baseline splines for each TO_ROW in each TO_BLOCK and - // other associated side-effects, including pre-associating blobs, computing - // x-heights and displaying debug information. - // NOTE that ComputeStraightBaselines must have been called first as this - // sets up data in the TO_ROWs upon which this function depends. - void ComputeBaselineSplinesAndXheights(const ICOORD& page_tr, - bool enable_splines, - bool remove_noise, - bool show_final_rows, - Textord* textord); - - private: - // Average (median) skew of the blocks on the page among those that have - // a good angle of their own. - FCOORD page_skew_; - // Amount of debug output to produce. - int debug_level_; - // The blocks that we are working with. - PointerVector blocks_; -}; - -} // namespace tesseract - -#endif // TESSERACT_TEXTORD_BASELINEDETECT_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/bbgrid.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/bbgrid.cpp deleted file mode 100644 index 1e05ba9f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/bbgrid.cpp +++ /dev/null @@ -1,286 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: bbgrid.cpp -// Description: Class to hold BLOBNBOXs in a grid for fast access -// to neighbours. -// Author: Ray Smith -// Created: Wed Jun 06 17:22:01 PDT 2007 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "bbgrid.h" -#include "helpers.h" -#include "ocrblock.h" - -namespace tesseract { - -/////////////////////////////////////////////////////////////////////// -// BBGrid IMPLEMENTATION. -/////////////////////////////////////////////////////////////////////// -GridBase::GridBase(int gridsize, const ICOORD& bleft, const ICOORD& tright) { - Init(gridsize, bleft, tright); -} - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -GridBase::~GridBase() = default; - -// (Re)Initialize the grid. The gridsize is the size in pixels of each cell, -// and bleft, tright are the bounding box of everything to go in it. -void GridBase::Init(int gridsize, const ICOORD& bleft, const ICOORD& tright) { - gridsize_ = gridsize; - bleft_ = bleft; - tright_ = tright; - if (gridsize_ == 0) - gridsize_ = 1; - gridwidth_ = (tright.x() - bleft.x() + gridsize_ - 1) / gridsize_; - gridheight_ = (tright.y() - bleft.y() + gridsize_ - 1) / gridsize_; - gridbuckets_ = gridwidth_ * gridheight_; -} - -// Compute the given grid coordinates from image coords. -void GridBase::GridCoords(int x, int y, int* grid_x, int* grid_y) const { - *grid_x = (x - bleft_.x()) / gridsize_; - *grid_y = (y - bleft_.y()) / gridsize_; - ClipGridCoords(grid_x, grid_y); -} - -// Clip the given grid coordinates to fit within the grid. -void GridBase::ClipGridCoords(int* x, int* y) const { - *x = ClipToRange(*x, 0, gridwidth_ - 1); - *y = ClipToRange(*y, 0, gridheight_ - 1); -} - -IntGrid::IntGrid() { - grid_ = nullptr; -} - -IntGrid::IntGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright) - : grid_(nullptr) { - Init(gridsize, bleft, tright); -} - -IntGrid::~IntGrid() { - delete [] grid_; -} - -// (Re)Initialize the grid. The gridsize is the size in pixels of each cell, -// and bleft, tright are the bounding box of everything to go in it. -void IntGrid::Init(int gridsize, const ICOORD& bleft, const ICOORD& tright) { - GridBase::Init(gridsize, bleft, tright); - delete [] grid_; - grid_ = new int[gridbuckets_]; - Clear(); -} - -// Clear all the ints in the grid to zero. -void IntGrid::Clear() { - for (int i = 0; i < gridbuckets_; ++i) { - grid_[i] = 0; - } -} - -// Rotate the grid by rotation, keeping cell contents. -// rotation must be a multiple of 90 degrees. -// NOTE: due to partial cells, cell coverage in the rotated grid will be -// inexact. This is why there is no Rotate for the generic BBGrid. -// TODO(rays) investigate fixing this inaccuracy by moving the origin after -// rotation. -void IntGrid::Rotate(const FCOORD& rotation) { - ASSERT_HOST(rotation.x() == 0.0f || rotation.y() == 0.0f); - ICOORD old_bleft(bleft()); - ICOORD old_tright(tright()); - int old_width = gridwidth(); - int old_height = gridheight(); - TBOX box(bleft(), tright()); - box.rotate(rotation); - int* old_grid = grid_; - grid_ = nullptr; - Init(gridsize(), box.botleft(), box.topright()); - // Iterate over the old grid, copying data to the rotated position in the new. - int oldi = 0; - FCOORD x_step(rotation); - x_step *= gridsize(); - for (int oldy = 0; oldy < old_height; ++oldy) { - FCOORD line_pos(old_bleft.x(), old_bleft.y() + gridsize() * oldy); - line_pos.rotate(rotation); - for (int oldx = 0; oldx < old_width; ++oldx, line_pos += x_step, ++oldi) { - int grid_x, grid_y; - GridCoords(static_cast(line_pos.x() + 0.5), - static_cast(line_pos.y() + 0.5), - &grid_x, &grid_y); - grid_[grid_y * gridwidth() + grid_x] = old_grid[oldi]; - } - } - delete [] old_grid; -} - -// Returns a new IntGrid containing values equal to the sum of all the -// neighbouring cells. The returned grid must be deleted after use. -// For ease of implementation, edge cells are double counted, to make them -// have the same range as the non-edge cells. -IntGrid* IntGrid::NeighbourhoodSum() const { - IntGrid* sumgrid = new IntGrid(gridsize(), bleft(), tright()); - for (int y = 0; y < gridheight(); ++y) { - for (int x = 0; x < gridwidth(); ++x) { - int cell_count = 0; - for (int yoffset = -1; yoffset <= 1; ++yoffset) { - for (int xoffset = -1; xoffset <= 1; ++xoffset) { - int grid_x = x + xoffset; - int grid_y = y + yoffset; - ClipGridCoords(&grid_x, &grid_y); - cell_count += GridCellValue(grid_x, grid_y); - } - } - if (GridCellValue(x, y) > 1) - sumgrid->SetGridCell(x, y, cell_count); - } - } - return sumgrid; -} - -// Returns true if more than half the area of the rect is covered by grid -// cells that are over the threshold. -bool IntGrid::RectMostlyOverThreshold(const TBOX& rect, int threshold) const { - int min_x, min_y, max_x, max_y; - GridCoords(rect.left(), rect.bottom(), &min_x, &min_y); - GridCoords(rect.right(), rect.top(), &max_x, &max_y); - int total_area = 0; - for (int y = min_y; y <= max_y; ++y) { - for (int x = min_x; x <= max_x; ++x) { - int value = GridCellValue(x, y); - if (value > threshold) { - TBOX cell_box(x * gridsize_, y * gridsize_, - (x + 1) * gridsize_, (y + 1) * gridsize_); - cell_box &= rect; // This is in-place box intersection. - total_area += cell_box.area(); - } - } - } - return total_area * 2 > rect.area(); -} - -// Returns true if any cell value in the given rectangle is zero. -bool IntGrid::AnyZeroInRect(const TBOX& rect) const { - int min_x, min_y, max_x, max_y; - GridCoords(rect.left(), rect.bottom(), &min_x, &min_y); - GridCoords(rect.right(), rect.top(), &max_x, &max_y); - for (int y = min_y; y <= max_y; ++y) { - for (int x = min_x; x <= max_x; ++x) { - if (GridCellValue(x, y) == 0) - return true; - } - } - return false; -} - -// Returns a full-resolution binary pix in which each cell over the given -// threshold is filled as a black square. pixDestroy after use. -// Edge cells, which have a zero 4-neighbour, are not marked. -Pix* IntGrid::ThresholdToPix(int threshold) const { - Pix* pix = pixCreate(tright().x() - bleft().x(), - tright().y() - bleft().y(), 1); - int cellsize = gridsize(); - for (int y = 0; y < gridheight(); ++y) { - for (int x = 0; x < gridwidth(); ++x) { - if (GridCellValue(x, y) > threshold && - GridCellValue(x - 1, y) > 0 && GridCellValue(x + 1, y) > 0 && - GridCellValue(x, y - 1) > 0 && GridCellValue(x, y + 1) > 0) { - pixRasterop(pix, x * cellsize, tright().y() - ((y + 1) * cellsize), - cellsize, cellsize, PIX_SET, nullptr, 0, 0); - } - } - } - return pix; -} - -// Make a Pix of the correct scaled size for the TraceOutline functions. -static Pix* GridReducedPix(const TBOX& box, int gridsize, - ICOORD bleft, int* left, int* bottom) { - // Compute grid bounds of the outline and pad all round by 1. - int grid_left = (box.left() - bleft.x()) / gridsize - 1; - int grid_bottom = (box.bottom() - bleft.y()) / gridsize - 1; - int grid_right = (box.right() - bleft.x()) / gridsize + 1; - int grid_top = (box.top() - bleft.y()) / gridsize + 1; - *left = grid_left; - *bottom = grid_bottom; - return pixCreate(grid_right - grid_left + 1, - grid_top - grid_bottom + 1, - 1); -} - -// Helper function to return a scaled Pix with one pixel per grid cell, -// set (black) where the given outline enters the corresponding grid cell, -// and clear where the outline does not touch the grid cell. -// Also returns the grid coords of the bottom-left of the Pix, in *left -// and *bottom, which corresponds to (0, 0) on the Pix. -// Note that the Pix is used upside-down, with (0, 0) being the bottom-left. -Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize, - ICOORD bleft, int* left, int* bottom) { - const TBOX& box = outline->bounding_box(); - Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom); - int wpl = pixGetWpl(pix); - l_uint32* data = pixGetData(pix); - int length = outline->pathlength(); - ICOORD pos = outline->start_pos(); - for (int i = 0; i < length; ++i) { - int grid_x = (pos.x() - bleft.x()) / gridsize - *left; - int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom; - SET_DATA_BIT(data + grid_y * wpl, grid_x); - pos += outline->step(i); - } - return pix; -} -#if 0 // Example code shows how to use TraceOutlineOnReducedPix. - C_OUTLINE_IT ol_it(blob->cblob()->out_list()); - int grid_left, grid_bottom; - Pix* pix = TraceOutlineOnReducedPix(ol_it.data(), gridsize_, bleft_, - &grid_left, &grid_bottom); - grid->InsertPixPtBBox(grid_left, grid_bottom, pix, blob); - pixDestroy(&pix); -#endif - -// As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE. -Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize, - ICOORD bleft, int* left, int* bottom) { - const TBOX& box = block->pdblk.bounding_box(); - Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom); - int wpl = pixGetWpl(pix); - l_uint32* data = pixGetData(pix); - ICOORDELT_IT it(block->pdblk.poly_block()->points()); - for (it.mark_cycle_pt(); !it.cycled_list();) { - ICOORD pos = *it.data(); - it.forward(); - ICOORD next_pos = *it.data(); - ICOORD line_vector = next_pos - pos; - int major, minor; - ICOORD major_step, minor_step; - line_vector.setup_render(&major_step, &minor_step, &major, &minor); - int accumulator = major / 2; - while (pos != next_pos) { - int grid_x = (pos.x() - bleft.x()) / gridsize - *left; - int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom; - SET_DATA_BIT(data + grid_y * wpl, grid_x); - pos += major_step; - accumulator += minor; - if (accumulator >= major) { - accumulator -= major; - pos += minor_step; - } - } - } - return pix; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/bbgrid.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/bbgrid.h deleted file mode 100644 index 1fd9e85a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/bbgrid.h +++ /dev/null @@ -1,960 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: bbgrid.h -// Description: Class to hold BLOBNBOXs in a grid for fast access -// to neighbours. -// Author: Ray Smith -// Created: Wed Jun 06 17:22:01 PDT 2007 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_BBGRID_H_ -#define TESSERACT_TEXTORD_BBGRID_H_ - -#include - -#include "clst.h" -#include "coutln.h" -#include "rect.h" -#include "scrollview.h" - -#include "allheaders.h" - -class BLOCK; - -namespace tesseract { - -// Helper function to return a scaled Pix with one pixel per grid cell, -// set (black) where the given outline enters the corresponding grid cell, -// and clear where the outline does not touch the grid cell. -// Also returns the grid coords of the bottom-left of the Pix, in *left -// and *bottom, which corresponds to (0, 0) on the Pix. -// Note that the Pix is used upside-down, with (0, 0) being the bottom-left. -Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize, - ICOORD bleft, int* left, int* bottom); -// As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE. -Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize, - ICOORD bleft, int* left, int* bottom); - -template class GridSearch; - -// The GridBase class is the base class for BBGrid and IntGrid. -// It holds the geometry and scale of the grid. -class GridBase { - public: - GridBase() = default; - GridBase(int gridsize, const ICOORD& bleft, const ICOORD& tright); - virtual ~GridBase(); - - // (Re)Initialize the grid. The gridsize is the size in pixels of each cell, - // and bleft, tright are the bounding box of everything to go in it. - void Init(int gridsize, const ICOORD& bleft, const ICOORD& tright); - - // Simple accessors. - int gridsize() const { - return gridsize_; - } - int gridwidth() const { - return gridwidth_; - } - int gridheight() const { - return gridheight_; - } - const ICOORD& bleft() const { - return bleft_; - } - const ICOORD& tright() const { - return tright_; - } - // Compute the given grid coordinates from image coords. - void GridCoords(int x, int y, int* grid_x, int* grid_y) const; - - // Clip the given grid coordinates to fit within the grid. - void ClipGridCoords(int* x, int* y) const; - - protected: - // TODO(rays) Make these private and migrate to the accessors in subclasses. - int gridsize_; // Pixel size of each grid cell. - int gridwidth_; // Size of the grid in cells. - int gridheight_; - int gridbuckets_; // Total cells in grid. - ICOORD bleft_; // Pixel coords of bottom-left of grid. - ICOORD tright_; // Pixel coords of top-right of grid. - - private: -}; - -// The IntGrid maintains a single int for each cell in a grid. -class IntGrid : public GridBase { - public: - IntGrid(); - IntGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); - virtual ~IntGrid(); - - // (Re)Initialize the grid. The gridsize is the size in pixels of each cell, - // and bleft, tright are the bounding box of everything to go in it. - void Init(int gridsize, const ICOORD& bleft, const ICOORD& tright); - - // Clear all the ints in the grid to zero. - void Clear(); - - // Rotate the grid by rotation, keeping cell contents. - // rotation must be a multiple of 90 degrees. - // NOTE: due to partial cells, cell coverage in the rotated grid will be - // inexact. This is why there is no Rotate for the generic BBGrid. - void Rotate(const FCOORD& rotation); - - // Returns a new IntGrid containing values equal to the sum of all the - // neighbouring cells. The returned grid must be deleted after use. - IntGrid* NeighbourhoodSum() const; - - int GridCellValue(int grid_x, int grid_y) const { - ClipGridCoords(&grid_x, &grid_y); - return grid_[grid_y * gridwidth_ + grid_x]; - } - void SetGridCell(int grid_x, int grid_y, int value) { - ASSERT_HOST(grid_x >= 0 && grid_x < gridwidth()); - ASSERT_HOST(grid_y >= 0 && grid_y < gridheight()); - grid_[grid_y * gridwidth_ + grid_x] = value; - } - // Returns true if more than half the area of the rect is covered by grid - // cells that are over the threshold. - bool RectMostlyOverThreshold(const TBOX& rect, int threshold) const; - - // Returns true if any cell value in the given rectangle is zero. - bool AnyZeroInRect(const TBOX& rect) const; - - // Returns a full-resolution binary pix in which each cell over the given - // threshold is filled as a black square. pixDestroy after use. - Pix* ThresholdToPix(int threshold) const; - - private: - int* grid_; // 2-d array of ints. -}; - -// The BBGrid class holds C_LISTs of template classes BBC (bounding box class) -// in a grid for fast neighbour access. -// The BBC class must have a member const TBOX& bounding_box() const. -// The BBC class must have been CLISTIZEH'ed elsewhere to make the -// list class BBC_CLIST and the iterator BBC_C_IT. -// Use of C_LISTs enables BBCs to exist in multiple cells simultaneously. -// As a consequence, ownership of BBCs is assumed to be elsewhere and -// persistent for at least the life of the BBGrid, or at least until Clear is -// called which removes all references to inserted objects without actually -// deleting them. -// Most uses derive a class from a specific instantiation of BBGrid, -// thereby making most of the ugly template notation go away. -// The friend class GridSearch, with the same template arguments, is -// used to search a grid efficiently in one of several search patterns. -template class BBGrid - : public GridBase { - friend class GridSearch; - public: - BBGrid(); - BBGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); - virtual ~BBGrid(); - - // (Re)Initialize the grid. The gridsize is the size in pixels of each cell, - // and bleft, tright are the bounding box of everything to go in it. - void Init(int gridsize, const ICOORD& bleft, const ICOORD& tright); - - // Empty all the lists but leave the grid itself intact. - void Clear(); - // Deallocate the data in the lists but otherwise leave the lists and the grid - // intact. - void ClearGridData(void (*free_method)(BBC*)); - - // Insert a bbox into the appropriate place in the grid. - // If h_spread, then all cells covered horizontally by the box are - // used, otherwise, just the bottom-left. Similarly for v_spread. - // WARNING: InsertBBox may invalidate an active GridSearch. Call - // RepositionIterator() on any GridSearches that are active on this grid. - void InsertBBox(bool h_spread, bool v_spread, BBC* bbox); - - // Using a pix from TraceOutlineOnReducedPix or TraceBlockOnReducedPix, in - // which each pixel corresponds to a grid cell, insert a bbox into every - // place in the grid where the corresponding pixel is 1. The Pix is handled - // upside-down to match the Tesseract coordinate system. (As created by - // TraceOutlineOnReducedPix or TraceBlockOnReducedPix.) - // (0, 0) in the pix corresponds to (left, bottom) in the - // grid (in grid coords), and the pix works up the grid from there. - // WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call - // RepositionIterator() on any GridSearches that are active on this grid. - void InsertPixPtBBox(int left, int bottom, Pix* pix, BBC* bbox); - - // Remove the bbox from the grid. - // WARNING: Any GridSearch operating on this grid could be invalidated! - // If a GridSearch is operating, call GridSearch::RemoveBBox() instead. - void RemoveBBox(BBC* bbox); - - // Returns true if the given rectangle has no overlapping elements. - bool RectangleEmpty(const TBOX& rect); - - // Returns an IntGrid showing the number of elements in each cell. - // Returned IntGrid must be deleted after use. - IntGrid* CountCellElements(); - - // Make a window of an appropriate size to display things in the grid. - ScrollView* MakeWindow(int x, int y, const char* window_name); - - // Display the bounding boxes of the BLOBNBOXes in this grid. - // Use of this function requires an additional member of the BBC class: - // ScrollView::Color BBC::BoxColor() const. - void DisplayBoxes(ScrollView* window); - - // ASSERT_HOST that every cell contains no more than one copy of each entry. - void AssertNoDuplicates(); - - // Handle a click event in a display window. - virtual void HandleClick(int x, int y); - - protected: - BBC_CLIST* grid_; // 2-d array of CLISTS of BBC elements. - - private: -}; - -// Hash functor for generic pointers. -template struct PtrHash { - size_t operator()(const T* ptr) const { - return reinterpret_cast(ptr) / sizeof(T); - } -}; - - -// The GridSearch class enables neighbourhood searching on a BBGrid. -template class GridSearch { - public: - GridSearch(BBGrid* grid) - : grid_(grid), unique_mode_(false), - previous_return_(nullptr), next_return_(nullptr) { - } - - // Get the grid x, y coords of the most recently returned BBC. - int GridX() const { - return x_; - } - int GridY() const { - return y_; - } - - // Sets the search mode to return a box only once. - // Efficiency warning: Implementation currently uses a squared-order - // search in the number of returned elements. Use only where a small - // number of elements are spread over a wide area, eg ColPartitions. - void SetUniqueMode(bool mode) { - unique_mode_ = mode; - } - // TODO(rays) Replace calls to ReturnedSeedElement with SetUniqueMode. - // It only works if the search includes the bottom-left corner. - // Apart from full search, all other searches return a box several - // times if the box is inserted with h_spread or v_spread. - // This method will return true for only one occurrence of each box - // that was inserted with both h_spread and v_spread as true. - // It will usually return false for boxes that were not inserted with - // both h_spread=true and v_spread=true - bool ReturnedSeedElement() const { - TBOX box = previous_return_->bounding_box(); - int x_center = (box.left()+box.right())/2; - int y_center = (box.top()+box.bottom())/2; - int grid_x, grid_y; - grid_->GridCoords(x_center, y_center, &grid_x, &grid_y); - return (x_ == grid_x) && (y_ == grid_y); - } - - // Various searching iterations... Note that these iterations - // all share data members, so you can't run more than one iteration - // in parallel in a single GridSearch instance, but multiple instances - // can search the same BBGrid in parallel. - // Note that all the searches can return blobs that may not exactly - // match the search conditions, since they return everything in the - // covered grid cells. It is up to the caller to check for - // appropriateness. - // TODO(rays) NextRectSearch only returns valid elements. Make the other - // searches test before return also and remove the tests from code - // that uses GridSearch. - - // Start a new full search. Will iterate all stored blobs, from the top. - // If the blobs have been inserted using InsertBBox, (not InsertPixPtBBox) - // then the full search guarantees to return each blob in the grid once. - // Other searches may return a blob more than once if they have been - // inserted using h_spread or v_spread. - void StartFullSearch(); - // Return the next bbox in the search or nullptr if done. - BBC* NextFullSearch(); - - // Start a new radius search. Will search in a spiral up to a - // given maximum radius in grid cells from the given center in pixels. - void StartRadSearch(int x, int y, int max_radius); - // Return the next bbox in the radius search or nullptr if the - // maximum radius has been reached. - BBC* NextRadSearch(); - - // Start a new left or right-looking search. Will search to the side - // for a box that vertically overlaps the given vertical line segment. - // CAVEAT: This search returns all blobs from the cells to the side - // of the start, and somewhat below, since there is no guarantee - // that there may not be a taller object in a lower cell. The - // blobs returned will include all those that vertically overlap and - // are no more than twice as high, but may also include some that do - // not overlap and some that are more than twice as high. - void StartSideSearch(int x, int ymin, int ymax); - // Return the next bbox in the side search or nullptr if the - // edge has been reached. Searches left to right or right to left - // according to the flag. - BBC* NextSideSearch(bool right_to_left); - - // Start a vertical-looking search. Will search up or down - // for a box that horizontally overlaps the given line segment. - void StartVerticalSearch(int xmin, int xmax, int y); - // Return the next bbox in the vertical search or nullptr if the - // edge has been reached. Searches top to bottom or bottom to top - // according to the flag. - BBC* NextVerticalSearch(bool top_to_bottom); - - // Start a rectangular search. Will search for a box that overlaps the - // given rectangle. - void StartRectSearch(const TBOX& rect); - // Return the next bbox in the rectangular search or nullptr if complete. - BBC* NextRectSearch(); - - // Remove the last returned BBC. Will not invalidate this. May invalidate - // any other concurrent GridSearch on the same grid. If any others are - // in use, call RepositionIterator on those, to continue without harm. - void RemoveBBox(); - void RepositionIterator(); - - private: - // Factored out helper to start a search. - void CommonStart(int x, int y); - // Factored out helper to complete a next search. - BBC* CommonNext(); - // Factored out final return when search is exhausted. - BBC* CommonEnd(); - // Factored out function to set the iterator to the current x_, y_ - // grid coords and mark the cycle pt. - void SetIterator(); - - private: - // The grid we are searching. - BBGrid* grid_; - // For executing a search. The different search algorithms use these in - // different ways, but most use x_origin_ and y_origin_ as the start position. - int x_origin_; - int y_origin_; - int max_radius_; - int radius_; - int rad_index_; - int rad_dir_; - TBOX rect_; - int x_; // The current location in grid coords, of the current search. - int y_; - bool unique_mode_; - BBC* previous_return_; // Previous return from Next*. - BBC* next_return_; // Current value of it_.data() used for repositioning. - // An iterator over the list at (x_, y_) in the grid_. - BBC_C_IT it_; - // Set of unique returned elements used when unique_mode_ is true. - std::unordered_set > returns_; -}; - -// Sort function to sort a BBC by bounding_box().left(). -template -int SortByBoxLeft(const void* void1, const void* void2) { - // The void*s are actually doubly indirected, so get rid of one level. - const BBC* p1 = *static_cast(void1); - const BBC* p2 = *static_cast(void2); - int result = p1->bounding_box().left() - p2->bounding_box().left(); - if (result != 0) - return result; - result = p1->bounding_box().right() - p2->bounding_box().right(); - if (result != 0) - return result; - result = p1->bounding_box().bottom() - p2->bounding_box().bottom(); - if (result != 0) - return result; - return p1->bounding_box().top() - p2->bounding_box().top(); -} - -// Sort function to sort a BBC by bounding_box().right() in right-to-left order. -template -int SortRightToLeft(const void* void1, const void* void2) { - // The void*s are actually doubly indirected, so get rid of one level. - const BBC* p1 = *static_cast(void1); - const BBC* p2 = *static_cast(void2); - int result = p2->bounding_box().right() - p1->bounding_box().right(); - if (result != 0) - return result; - result = p2->bounding_box().left() - p1->bounding_box().left(); - if (result != 0) - return result; - result = p1->bounding_box().bottom() - p2->bounding_box().bottom(); - if (result != 0) - return result; - return p1->bounding_box().top() - p2->bounding_box().top(); -} - -// Sort function to sort a BBC by bounding_box().bottom(). -template -int SortByBoxBottom(const void* void1, const void* void2) { - // The void*s are actually doubly indirected, so get rid of one level. - const BBC* p1 = *static_cast(void1); - const BBC* p2 = *static_cast(void2); - int result = p1->bounding_box().bottom() - p2->bounding_box().bottom(); - if (result != 0) - return result; - result = p1->bounding_box().top() - p2->bounding_box().top(); - if (result != 0) - return result; - result = p1->bounding_box().left() - p2->bounding_box().left(); - if (result != 0) - return result; - return p1->bounding_box().right() - p2->bounding_box().right(); -} - -/////////////////////////////////////////////////////////////////////// -// BBGrid IMPLEMENTATION. -/////////////////////////////////////////////////////////////////////// -template -BBGrid::BBGrid() : grid_(nullptr) { -} - -template -BBGrid::BBGrid( - int gridsize, const ICOORD& bleft, const ICOORD& tright) - : grid_(nullptr) { - Init(gridsize, bleft, tright); -} - -template -BBGrid::~BBGrid() { - delete [] grid_; -} - -// (Re)Initialize the grid. The gridsize is the size in pixels of each cell, -// and bleft, tright are the bounding box of everything to go in it. -template -void BBGrid::Init(int gridsize, - const ICOORD& bleft, - const ICOORD& tright) { - GridBase::Init(gridsize, bleft, tright); - delete [] grid_; - grid_ = new BBC_CLIST[gridbuckets_]; -} - -// Clear all lists, but leave the array of lists present. -template -void BBGrid::Clear() { - for (int i = 0; i < gridbuckets_; ++i) { - grid_[i].shallow_clear(); - } -} - -// Deallocate the data in the lists but otherwise leave the lists and the grid -// intact. -template -void BBGrid::ClearGridData( - void (*free_method)(BBC*)) { - if (grid_ == nullptr) return; - GridSearch search(this); - search.StartFullSearch(); - BBC* bb; - BBC_CLIST bb_list; - BBC_C_IT it(&bb_list); - while ((bb = search.NextFullSearch()) != nullptr) { - it.add_after_then_move(bb); - } - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - free_method(it.data()); - } -} - -// Insert a bbox into the appropriate place in the grid. -// If h_spread, then all cells covered horizontally by the box are -// used, otherwise, just the bottom-left. Similarly for v_spread. -// WARNING: InsertBBox may invalidate an active GridSearch. Call -// RepositionIterator() on any GridSearches that are active on this grid. -template -void BBGrid::InsertBBox(bool h_spread, bool v_spread, - BBC* bbox) { - TBOX box = bbox->bounding_box(); - int start_x, start_y, end_x, end_y; - GridCoords(box.left(), box.bottom(), &start_x, &start_y); - GridCoords(box.right(), box.top(), &end_x, &end_y); - if (!h_spread) - end_x = start_x; - if (!v_spread) - end_y = start_y; - int grid_index = start_y * gridwidth_; - for (int y = start_y; y <= end_y; ++y, grid_index += gridwidth_) { - for (int x = start_x; x <= end_x; ++x) { - grid_[grid_index + x].add_sorted(SortByBoxLeft, true, bbox); - } - } -} - -// Using a pix from TraceOutlineOnReducedPix or TraceBlockOnReducedPix, in -// which each pixel corresponds to a grid cell, insert a bbox into every -// place in the grid where the corresponding pixel is 1. The Pix is handled -// upside-down to match the Tesseract coordinate system. (As created by -// TraceOutlineOnReducedPix or TraceBlockOnReducedPix.) -// (0, 0) in the pix corresponds to (left, bottom) in the -// grid (in grid coords), and the pix works up the grid from there. -// WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call -// RepositionIterator() on any GridSearches that are active on this grid. -template -void BBGrid::InsertPixPtBBox(int left, int bottom, - Pix* pix, BBC* bbox) { - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - for (int y = 0; y < height; ++y) { - l_uint32* data = pixGetData(pix) + y * pixGetWpl(pix); - for (int x = 0; x < width; ++x) { - if (GET_DATA_BIT(data, x)) { - grid_[(bottom + y) * gridwidth_ + x + left]. - add_sorted(SortByBoxLeft, true, bbox); - } - } - } -} - -// Remove the bbox from the grid. -// WARNING: Any GridSearch operating on this grid could be invalidated! -// If a GridSearch is operating, call GridSearch::RemoveBBox() instead. -template -void BBGrid::RemoveBBox(BBC* bbox) { - TBOX box = bbox->bounding_box(); - int start_x, start_y, end_x, end_y; - GridCoords(box.left(), box.bottom(), &start_x, &start_y); - GridCoords(box.right(), box.top(), &end_x, &end_y); - int grid_index = start_y * gridwidth_; - for (int y = start_y; y <= end_y; ++y, grid_index += gridwidth_) { - for (int x = start_x; x <= end_x; ++x) { - BBC_C_IT it(&grid_[grid_index + x]); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - if (it.data() == bbox) - it.extract(); - } - } - } -} - -// Returns true if the given rectangle has no overlapping elements. -template -bool BBGrid::RectangleEmpty(const TBOX& rect) { - GridSearch rsearch(this); - rsearch.StartRectSearch(rect); - return rsearch.NextRectSearch() == nullptr; -} - -// Returns an IntGrid showing the number of elements in each cell. -// Returned IntGrid must be deleted after use. -template -IntGrid* BBGrid::CountCellElements() { - IntGrid* intgrid = new IntGrid(gridsize(), bleft(), tright()); - for (int y = 0; y < gridheight(); ++y) { - for (int x = 0; x < gridwidth(); ++x) { - int cell_count = grid_[y * gridwidth() + x].length(); - intgrid->SetGridCell(x, y, cell_count); - } - } - return intgrid; -} - - -template class TabEventHandler : public SVEventHandler { - public: - explicit TabEventHandler(G* grid) : grid_(grid) { - } - void Notify(const SVEvent* sv_event) { - if (sv_event->type == SVET_CLICK) { - grid_->HandleClick(sv_event->x, sv_event->y); - } - } - private: - G* grid_; -}; - -// Make a window of an appropriate size to display things in the grid. -// Position the window at the given x,y. -template -ScrollView* BBGrid::MakeWindow( - int x, int y, const char* window_name) { - ScrollView* tab_win = nullptr; -#ifndef GRAPHICS_DISABLED - tab_win = new ScrollView(window_name, x, y, - tright_.x() - bleft_.x(), - tright_.y() - bleft_.y(), - tright_.x() - bleft_.x(), - tright_.y() - bleft_.y(), - true); - TabEventHandler >* handler = - new TabEventHandler >(this); - tab_win->AddEventHandler(handler); - tab_win->Pen(ScrollView::GREY); - tab_win->Rectangle(0, 0, tright_.x() - bleft_.x(), tright_.y() - bleft_.y()); -#endif - return tab_win; -} - -// Create a window at (x,y) and display the bounding boxes of the -// BLOBNBOXes in this grid. -// Use of this function requires an additional member of the BBC class: -// ScrollView::Color BBC::BoxColor() const. -template -void BBGrid::DisplayBoxes(ScrollView* tab_win) { -#ifndef GRAPHICS_DISABLED - tab_win->Pen(ScrollView::BLUE); - tab_win->Brush(ScrollView::NONE); - - // For every bbox in the grid, display it. - GridSearch gsearch(this); - gsearch.StartFullSearch(); - BBC* bbox; - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - const TBOX& box = bbox->bounding_box(); - int left_x = box.left(); - int right_x = box.right(); - int top_y = box.top(); - int bottom_y = box.bottom(); - ScrollView::Color box_color = bbox->BoxColor(); - tab_win->Pen(box_color); - tab_win->Rectangle(left_x, bottom_y, right_x, top_y); - } - tab_win->Update(); -#endif -} - -// ASSERT_HOST that every cell contains no more than one copy of each entry. -template -void BBGrid::AssertNoDuplicates() { - // Process all grid cells. - for (int i = gridwidth_ * gridheight_ - 1; i >= 0; --i) { - // Iterate over all elements excent the last. - for (BBC_C_IT it(&grid_[i]); !it.at_last(); it.forward()) { - BBC* ptr = it.data(); - BBC_C_IT it2(it); - // None of the rest of the elements in the list should equal ptr. - for (it2.forward(); !it2.at_first(); it2.forward()) { - ASSERT_HOST(it2.data() != ptr); - } - } - } -} - -// Handle a click event in a display window. -template -void BBGrid::HandleClick(int x, int y) { - tprintf("Click at (%d, %d)\n", x, y); -} - -/////////////////////////////////////////////////////////////////////// -// GridSearch IMPLEMENTATION. -/////////////////////////////////////////////////////////////////////// - -// Start a new full search. Will iterate all stored blobs. -template -void GridSearch::StartFullSearch() { - // Full search uses x_ and y_ as the current grid - // cell being searched. - CommonStart(grid_->bleft_.x(), grid_->tright_.y()); -} - -// Return the next bbox in the search or nullptr if done. -// The other searches will return a box that overlaps the grid cell -// thereby duplicating boxes, but NextFullSearch only returns each box once. -template -BBC* GridSearch::NextFullSearch() { - int x; - int y; - do { - while (it_.cycled_list()) { - ++x_; - if (x_ >= grid_->gridwidth_) { - --y_; - if (y_ < 0) - return CommonEnd(); - x_ = 0; - } - SetIterator(); - } - CommonNext(); - TBOX box = previous_return_->bounding_box(); - grid_->GridCoords(box.left(), box.bottom(), &x, &y); - } while (x != x_ || y != y_); - return previous_return_; -} - -// Start a new radius search. -template -void GridSearch::StartRadSearch(int x, int y, - int max_radius) { - // Rad search uses x_origin_ and y_origin_ as the center of the circle. - // The radius_ is the radius of the (diamond-shaped) circle and - // rad_index/rad_dir_ combine to determine the position around it. - max_radius_ = max_radius; - radius_ = 0; - rad_index_ = 0; - rad_dir_ = 3; - CommonStart(x, y); -} - -// Return the next bbox in the radius search or nullptr if the -// maximum radius has been reached. -template -BBC* GridSearch::NextRadSearch() { - do { - while (it_.cycled_list()) { - ++rad_index_; - if (rad_index_ >= radius_) { - ++rad_dir_; - rad_index_ = 0; - if (rad_dir_ >= 4) { - ++radius_; - if (radius_ > max_radius_) - return CommonEnd(); - rad_dir_ = 0; - } - } - ICOORD offset = C_OUTLINE::chain_step(rad_dir_); - offset *= radius_ - rad_index_; - offset += C_OUTLINE::chain_step(rad_dir_ + 1) * rad_index_; - x_ = x_origin_ + offset.x(); - y_ = y_origin_ + offset.y(); - if (x_ >= 0 && x_ < grid_->gridwidth_ && - y_ >= 0 && y_ < grid_->gridheight_) - SetIterator(); - } - CommonNext(); - } while (unique_mode_ && returns_.find(previous_return_) != returns_.end()); - if (unique_mode_) - returns_.insert(previous_return_); - return previous_return_; -} - -// Start a new left or right-looking search. Will search to the side -// for a box that vertically overlaps the given vertical line segment. -template -void GridSearch::StartSideSearch(int x, - int ymin, int ymax) { - // Right search records the x in x_origin_, the ymax in y_origin_ - // and the size of the vertical strip to search in radius_. - // To guarantee finding overlapping objects of up to twice the - // given size, double the height. - radius_ = ((ymax - ymin) * 2 + grid_->gridsize_ - 1) / grid_->gridsize_; - rad_index_ = 0; - CommonStart(x, ymax); -} - -// Return the next bbox in the side search or nullptr if the -// edge has been reached. Searches left to right or right to left -// according to the flag. -template -BBC* GridSearch::NextSideSearch(bool right_to_left) { - do { - while (it_.cycled_list()) { - ++rad_index_; - if (rad_index_ > radius_) { - if (right_to_left) - --x_; - else - ++x_; - rad_index_ = 0; - if (x_ < 0 || x_ >= grid_->gridwidth_) - return CommonEnd(); - } - y_ = y_origin_ - rad_index_; - if (y_ >= 0 && y_ < grid_->gridheight_) - SetIterator(); - } - CommonNext(); - } while (unique_mode_ && returns_.find(previous_return_) != returns_.end()); - if (unique_mode_) - returns_.insert(previous_return_); - return previous_return_; -} - -// Start a vertical-looking search. Will search up or down -// for a box that horizontally overlaps the given line segment. -template -void GridSearch::StartVerticalSearch(int xmin, - int xmax, - int y) { - // Right search records the xmin in x_origin_, the y in y_origin_ - // and the size of the horizontal strip to search in radius_. - radius_ = (xmax - xmin + grid_->gridsize_ - 1) / grid_->gridsize_; - rad_index_ = 0; - CommonStart(xmin, y); -} - -// Return the next bbox in the vertical search or nullptr if the -// edge has been reached. Searches top to bottom or bottom to top -// according to the flag. -template -BBC* GridSearch::NextVerticalSearch( - bool top_to_bottom) { - do { - while (it_.cycled_list()) { - ++rad_index_; - if (rad_index_ > radius_) { - if (top_to_bottom) - --y_; - else - ++y_; - rad_index_ = 0; - if (y_ < 0 || y_ >= grid_->gridheight_) - return CommonEnd(); - } - x_ = x_origin_ + rad_index_; - if (x_ >= 0 && x_ < grid_->gridwidth_) - SetIterator(); - } - CommonNext(); - } while (unique_mode_ && returns_.find(previous_return_) != returns_.end()); - if (unique_mode_) - returns_.insert(previous_return_); - return previous_return_; -} - -// Start a rectangular search. Will search for a box that overlaps the -// given rectangle. -template -void GridSearch::StartRectSearch(const TBOX& rect) { - // Rect search records the xmin in x_origin_, the ymin in y_origin_ - // and the xmax in max_radius_. - // The search proceeds left to right, top to bottom. - rect_ = rect; - CommonStart(rect.left(), rect.top()); - grid_->GridCoords(rect.right(), rect.bottom(), // - rect.height(), - &max_radius_, &y_origin_); -} - -// Return the next bbox in the rectangular search or nullptr if complete. -template -BBC* GridSearch::NextRectSearch() { - do { - while (it_.cycled_list()) { - ++x_; - if (x_ > max_radius_) { - --y_; - x_ = x_origin_; - if (y_ < y_origin_) - return CommonEnd(); - } - SetIterator(); - } - CommonNext(); - } while (!rect_.overlap(previous_return_->bounding_box()) || - (unique_mode_ && returns_.find(previous_return_) != returns_.end())); - if (unique_mode_) - returns_.insert(previous_return_); - return previous_return_; -} - -// Remove the last returned BBC. Will not invalidate this. May invalidate -// any other concurrent GridSearch on the same grid. If any others are -// in use, call RepositionIterator on those, to continue without harm. -template -void GridSearch::RemoveBBox() { - if (previous_return_ != nullptr) { - // Remove all instances of previous_return_ from the list, so the iterator - // remains valid after removal from the rest of the grid cells. - // if previous_return_ is not on the list, then it has been removed already. - BBC* prev_data = nullptr; - BBC* new_previous_return = nullptr; - it_.move_to_first(); - for (it_.mark_cycle_pt(); !it_.cycled_list();) { - if (it_.data() == previous_return_) { - new_previous_return = prev_data; - it_.extract(); - it_.forward(); - next_return_ = it_.cycled_list() ? nullptr : it_.data(); - } else { - prev_data = it_.data(); - it_.forward(); - } - } - grid_->RemoveBBox(previous_return_); - previous_return_ = new_previous_return; - RepositionIterator(); - } -} - -template -void GridSearch::RepositionIterator() { - // Something was deleted, so we have little choice but to clear the - // returns list. - returns_.clear(); - // Reset the iterator back to one past the previous return. - // If the previous_return_ is no longer in the list, then - // next_return_ serves as a backup. - it_.move_to_first(); - // Special case, the first element was removed and reposition - // iterator was called. In this case, the data is fine, but the - // cycle point is not. Detect it and return. - if (!it_.empty() && it_.data() == next_return_) { - it_.mark_cycle_pt(); - return; - } - for (it_.mark_cycle_pt(); !it_.cycled_list(); it_.forward()) { - if (it_.data() == previous_return_ || - it_.data_relative(1) == next_return_) { - CommonNext(); - return; - } - } - // We ran off the end of the list. Move to a new cell next time. - previous_return_ = nullptr; - next_return_ = nullptr; -} - -// Factored out helper to start a search. -template -void GridSearch::CommonStart(int x, int y) { - grid_->GridCoords(x, y, &x_origin_, &y_origin_); - x_ = x_origin_; - y_ = y_origin_; - SetIterator(); - previous_return_ = nullptr; - next_return_ = it_.empty() ? nullptr : it_.data(); - returns_.clear(); -} - -// Factored out helper to complete a next search. -template -BBC* GridSearch::CommonNext() { - previous_return_ = it_.data(); - it_.forward(); - next_return_ = it_.cycled_list() ? nullptr : it_.data(); - return previous_return_; -} - -// Factored out final return when search is exhausted. -template -BBC* GridSearch::CommonEnd() { - previous_return_ = nullptr; - next_return_ = nullptr; - return nullptr; -} - -// Factored out function to set the iterator to the current x_, y_ -// grid coords and mark the cycle pt. -template -void GridSearch::SetIterator() { - it_= &(grid_->grid_[y_ * grid_->gridwidth_ + x_]); - it_.mark_cycle_pt(); -} - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_BBGRID_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/blkocc.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/blkocc.cpp deleted file mode 100644 index 2a9af198..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/blkocc.cpp +++ /dev/null @@ -1,169 +0,0 @@ -/***************************************************************************** - * - * File: blkocc.cpp (Formerly blockocc.c) - * Description: Block Occupancy routines - * Author: Chris Newton - * Created: Fri Nov 8 - * Modified: - * Language: C++ - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1991, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ******************************************************************************/ - -/* ----------------------------------------------------------------------- - I n c l u d e s ----------------------------------------------------------------------- -*/ - -#include -#include -#include "errcode.h" -#include "drawtord.h" -#include "blkocc.h" -#include "helpers.h" - -double_VAR(textord_underline_threshold, 0.5, "Fraction of width occupied"); - -// Forward declarations of static functions -static void horizontal_cblob_projection(C_BLOB *blob, // blob to project - STATS *stats); // output -static void horizontal_coutline_projection(C_OUTLINE *outline, - STATS *stats); // output - -/** - * test_underline - * - * Check to see if the blob is an underline. - * Return TRUE if it is. - */ - -bool test_underline( //look for underlines - bool testing_on, //< drawing blob - C_BLOB* blob, //< blob to test - int16_t baseline, //< coords of baseline - int16_t xheight //< height of line -) { - int16_t occ; - int16_t blob_width; //width of blob - TBOX blob_box; //bounding box - int32_t desc_occ; - int32_t x_occ; - int32_t asc_occ; - STATS projection; - - blob_box = blob->bounding_box (); - blob_width = blob->bounding_box ().width (); - projection.set_range (blob_box.bottom (), blob_box.top () + 1); - if (testing_on) { - // blob->plot(to_win,GOLDENROD,GOLDENROD); - // line_color_index(to_win,GOLDENROD); - // move2d(to_win,blob_box.left(),baseline); - // draw2d(to_win,blob_box.right(),baseline); - // move2d(to_win,blob_box.left(),baseline+xheight); - // draw2d(to_win,blob_box.right(),baseline+xheight); - tprintf - ("Testing underline on blob at (%d,%d)->(%d,%d), base=%d\nOccs:", - blob->bounding_box ().left (), blob->bounding_box ().bottom (), - blob->bounding_box ().right (), blob->bounding_box ().top (), - baseline); - } - horizontal_cblob_projection(blob, &projection); - desc_occ = 0; - for (occ = blob_box.bottom (); occ < baseline; occ++) - if (occ <= blob_box.top () && projection.pile_count (occ) > desc_occ) - //max in region - desc_occ = projection.pile_count (occ); - x_occ = 0; - for (occ = baseline; occ <= baseline + xheight; occ++) - if (occ >= blob_box.bottom () && occ <= blob_box.top () - && projection.pile_count (occ) > x_occ) - //max in region - x_occ = projection.pile_count (occ); - asc_occ = 0; - for (occ = baseline + xheight + 1; occ <= blob_box.top (); occ++) - if (occ >= blob_box.bottom () && projection.pile_count (occ) > asc_occ) - asc_occ = projection.pile_count (occ); - if (testing_on) { - tprintf ("%d %d %d\n", desc_occ, x_occ, asc_occ); - } - if (desc_occ == 0 && x_occ == 0 && asc_occ == 0) { - tprintf ("Bottom=%d, top=%d, base=%d, x=%d\n", - blob_box.bottom (), blob_box.top (), baseline, xheight); - projection.print(); - } - if (desc_occ > x_occ + x_occ - && desc_occ > blob_width * textord_underline_threshold) - return true; //real underline - return asc_occ > x_occ + x_occ && - asc_occ > blob_width * textord_underline_threshold; //overline - //neither -} - - -/** - * horizontal_cblob_projection - * - * Compute the horizontal projection of a cblob from its outlines - * and add to the given STATS. - */ - -static void horizontal_cblob_projection( //project outlines - C_BLOB *blob, //< blob to project - STATS *stats //< output - ) { - //outlines of blob - C_OUTLINE_IT out_it = blob->out_list (); - - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - horizontal_coutline_projection (out_it.data (), stats); - } -} - - -/** - * horizontal_coutline_projection - * - * Compute the horizontal projection of a outline from its outlines - * and add to the given STATS. - */ - -static void horizontal_coutline_projection( //project outlines - C_OUTLINE *outline, //< outline to project - STATS *stats //< output - ) { - ICOORD pos; //current point - ICOORD step; //edge step - int32_t length; //of outline - int16_t stepindex; //current step - C_OUTLINE_IT out_it = outline->child (); - - pos = outline->start_pos (); - length = outline->pathlength (); - for (stepindex = 0; stepindex < length; stepindex++) { - step = outline->step (stepindex); - if (step.y () > 0) { - stats->add (pos.y (), pos.x ()); - } - else if (step.y () < 0) { - stats->add (pos.y () - 1, -pos.x ()); - } - pos += step; - } - - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - horizontal_coutline_projection (out_it.data (), stats); - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/blkocc.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/blkocc.h deleted file mode 100644 index bb0db400..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/blkocc.h +++ /dev/null @@ -1,252 +0,0 @@ -/****************************************************************************** - * - * File: blkocc.h (Formerly blockocc.h) - * Description: Block Occupancy routines - * Author: Chris Newton - * Created: Fri Nov 8 - * Modified: - * Language: C++ - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1991, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ******************************************************************************/ - -#ifndef BLKOCC_H -#define BLKOCC_H - -#include "params.h" -#include "elst.h" - -/*************************************************************************** -CLASS REGION_OCC - - The class REGION_OCC defines a section of outline which exists entirely - within a single region. The only data held is the min and max x limits of - the outline within the region. - - REGION_OCCs are held on lists, one list for each region. The lists are - built in sorted order of min x. Overlapping REGION_OCCs are not permitted on - a single list. An overlapping region to be added causes the existing region - to be extended. This extension may result in the following REGION_OCC on the - list overlapping the amended one. In this case the amended REGION_OCC is - further extended to include the range of the following one, so that the - following one can be deleted. - -****************************************************************************/ - -class REGION_OCC:public ELIST_LINK -{ - public: - float min_x; //Lowest x in region - float max_x; //Highest x in region - int16_t region_type; //Type of crossing - - REGION_OCC() = default; // constructor used - // only in COPIER etc - REGION_OCC( //constructor - float min, - float max, - int16_t region) { - min_x = min; - max_x = max; - region_type = region; - } -}; - -ELISTIZEH (REGION_OCC) -#define RANGE_IN_BAND(band_max, band_min, range_max, range_min) \ -(((range_min) >= (band_min)) && ((range_max) < (band_max))) -/************************************************************************ -Adapted from the following procedure so that it can be used in the bands -class in an include file... - -BOOL8 range_in_band[ - range within band? -int16_t band_max, -int16_t band_min, -int16_t range_max, -int16_t range_min] -{ - if ((range_min >= band_min) && (range_max < band_max)) - return TRUE; - else - return FALSE; -} -***********************************************************************/ -#define RANGE_OVERLAPS_BAND(band_max, band_min, range_max, range_min) \ -(((range_max) >= (band_min)) && ((range_min) < (band_max))) -/************************************************************************ -Adapted from the following procedure so that it can be used in the bands -class in an include file... - -BOOL8 range_overlaps_band[ - range crosses band? -int16_t band_max, -int16_t band_min, -int16_t range_max, -int16_t range_min] -{ - if ((range_max >= band_min) && (range_min < band_max)) - return TRUE; - else - return FALSE; -} -***********************************************************************/ -/********************************************************************** - Bands - ----- - - BAND 4 --------------------------------- - BAND 3 --------------------------------- - - BAND 2 - --------------------------------- - - BAND 1 - -Band 0 is the dot band - -Each band has an error margin above and below. An outline is not considered to -have significantly changed bands until it has moved out of the error margin. -*************************************************************************/ -class BAND -{ - public: - int16_t max_max; //upper max - int16_t max; //nominal max - int16_t min_max; //lower max - int16_t max_min; //upper min - int16_t min; //nominal min - int16_t min_min; //lower min - - BAND() = default; // constructor - - void set( // initialise a band - int16_t new_max_max, // upper max - int16_t new_max, // new nominal max - int16_t new_min_max, // new lower max - int16_t new_max_min, // new upper min - int16_t new_min, // new nominal min - int16_t new_min_min) { // new lower min - max_max = new_max_max; - max = new_max; - min_max = new_min_max; - max_min = new_max_min; - min = new_min; - min_min = new_min_min; - } - - bool in_minimal( //in minimal limits? - float y) { //y value - return (y >= max_min) && (y < min_max); - } - - bool in_nominal( //in nominal limits? - float y) { //y value - return (y >= min) && (y < max); - } - - bool in_maximal( //in maximal limits? - float y) { //y value - return (y >= min_min) && (y < max_max); - } - - //overlaps min limits? - bool range_overlaps_minimal(float y1, //one range limit - float y2) { //other range limit - if (y1 > y2) - return RANGE_OVERLAPS_BAND (min_max, max_min, y1, y2); - else - return RANGE_OVERLAPS_BAND (min_max, max_min, y2, y1); - } - - //overlaps nom limits? - bool range_overlaps_nominal(float y1, //one range limit - float y2) { //other range limit - if (y1 > y2) - return RANGE_OVERLAPS_BAND (max, min, y1, y2); - else - return RANGE_OVERLAPS_BAND (max, min, y2, y1); - } - - //overlaps max limits? - bool range_overlaps_maximal(float y1, //one range limit - float y2) { //other range limit - if (y1 > y2) - return RANGE_OVERLAPS_BAND (max_max, min_min, y1, y2); - else - return RANGE_OVERLAPS_BAND (max_max, min_min, y2, y1); - } - - bool range_in_minimal( //within min limits? - float y1, //one range limit - float y2) { //other range limit - if (y1 > y2) - return RANGE_IN_BAND (min_max, max_min, y1, y2); - else - return RANGE_IN_BAND (min_max, max_min, y2, y1); - } - - bool range_in_nominal( //within nom limits? - float y1, //one range limit - float y2) { //other range limit - if (y1 > y2) - return RANGE_IN_BAND (max, min, y1, y2); - else - return RANGE_IN_BAND (max, min, y2, y1); - } - - bool range_in_maximal( //within max limits? - float y1, //one range limit - float y2) { //other range limit - if (y1 > y2) - return RANGE_IN_BAND (max_max, min_min, y1, y2); - else - return RANGE_IN_BAND (max_max, min_min, y2, y1); - } -}; - -/* Standard positions */ - -#define MAX_NUM_BANDS 5 -#define UNDEFINED_BAND 99 -#define NO_LOWER_LIMIT -9999 -#define NO_UPPER_LIMIT 9999 - -#define DOT_BAND 0 - -/* Special occupancy code emitted for the 0 region at the end of a word */ - -#define END_OF_WERD_CODE 255 - -extern BOOL_VAR_H (blockocc_show_result, FALSE, "Show intermediate results"); -extern INT_VAR_H (blockocc_desc_height, 0, -"Descender height after normalisation"); -extern INT_VAR_H (blockocc_asc_height, 255, -"Ascender height after normalisation"); -extern INT_VAR_H (blockocc_band_count, 4, "Number of bands used"); -extern double_VAR_H (textord_underline_threshold, 0.9, -"Fraction of width occupied"); - -bool test_underline( //look for underlines - bool testing_on, //drawing blob - C_BLOB* blob, //blob to test - int16_t baseline, //coords of baseline - int16_t xheight //height of line -); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/blobgrid.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/blobgrid.cpp deleted file mode 100644 index 53a1d7b3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/blobgrid.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: blobgrid.cpp -// Description: BBGrid of BLOBNBOX with useful BLOBNBOX-specific methods. -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// Created: Sat Jun 11 10:30:01 PST 2011 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "blobgrid.h" - -namespace tesseract { - -BlobGrid::BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright) - : BBGrid(gridsize, bleft, tright) { -} - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -BlobGrid::~BlobGrid() = default; - -// Inserts all the blobs from the given list, with x and y spreading, -// without removing from the source list, so ownership remains with the -// source list. -void BlobGrid::InsertBlobList(BLOBNBOX_LIST* blobs) { - BLOBNBOX_IT blob_it(blobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - if (!blob->joined_to_prev()) - InsertBBox(true, true, blob); - } -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/blobgrid.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/blobgrid.h deleted file mode 100644 index a8a94414..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/blobgrid.h +++ /dev/null @@ -1,46 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: blobgrid.h -// Description: BBGrid of BLOBNBOX with useful BLOBNBOX-specific methods. -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// Created: Sat Jun 11 10:26:01 PST 2011 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - - -#ifndef TESSERACT_TEXTORD_BLOBGRID_H_ -#define TESSERACT_TEXTORD_BLOBGRID_H_ - -#include "bbgrid.h" -#include "blobbox.h" - -CLISTIZEH(BLOBNBOX) - -namespace tesseract { - -using BlobGridSearch = GridSearch; - -class BlobGrid : public BBGrid { - public: - BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); - virtual ~BlobGrid(); - - // Inserts all the blobs from the given list, with x and y spreading, - // without removing from the source list, so ownership remains with the - // source list. - void InsertBlobList(BLOBNBOX_LIST* blobs); -}; - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_BLOBGRID_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/ccnontextdetect.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/ccnontextdetect.cpp deleted file mode 100644 index 734c75fd..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/ccnontextdetect.cpp +++ /dev/null @@ -1,324 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: ccnontextdetect.cpp -// Description: Connected-Component-based photo (non-text) detection. -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// Created: Sat Jun 11 10:12:01 PST 2011 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "ccnontextdetect.h" -#include "imagefind.h" -#include "strokewidth.h" - -namespace tesseract { - -// Max number of neighbour small objects per squared gridsize before a grid -// cell becomes image. -const double kMaxSmallNeighboursPerPix = 1.0 / 32; -// Max number of small blobs a large blob may overlap before it is rejected -// and determined to be image. -const int kMaxLargeOverlapsWithSmall = 3; -// Max number of small blobs a medium blob may overlap before it is rejected -// and determined to be image. Larger than for large blobs as medium blobs -// may be complex Chinese characters. Very large Chinese characters are going -// to overlap more medium blobs than small. -const int kMaxMediumOverlapsWithSmall = 12; -// Max number of normal blobs a large blob may overlap before it is rejected -// and determined to be image. This is set higher to allow for drop caps, which -// may overlap a lot of good text blobs. -const int kMaxLargeOverlapsWithMedium = 12; -// Multiplier of original noise_count used to test for the case of spreading -// noise beyond where it should really be. -const int kOriginalNoiseMultiple = 8; -// Pixel padding for noise blobs when rendering on the image -// mask to encourage them to join together. Make it too big and images -// will fatten out too much and have to be clipped to text. -const int kNoisePadding = 4; -// Fraction of max_noise_count_ to be added to the noise count if there is -// photo mask in the background. -const double kPhotoOffsetFraction = 0.375; -// Min ratio of perimeter^2/16area for a "good" blob in estimating noise -// density. Good blobs are supposed to be highly likely real text. -// We consider a square to have unit ratio, where A=(p/4)^2, hence the factor -// of 16. Digital circles are weird and have a minimum ratio of pi/64, not -// the 1/(4pi) that you would expect. -const double kMinGoodTextPARatio = 1.5; - -CCNonTextDetect::CCNonTextDetect(int gridsize, - const ICOORD& bleft, const ICOORD& tright) - : BlobGrid(gridsize, bleft, tright), - max_noise_count_(static_cast(kMaxSmallNeighboursPerPix * - gridsize * gridsize)), - noise_density_(nullptr) { - // TODO(rays) break max_noise_count_ out into an area-proportional - // value, as now plus an additive constant for the number of text blobs - // in the 3x3 neighbourhood - maybe 9. -} - -CCNonTextDetect::~CCNonTextDetect() { - delete noise_density_; -} - -// Creates and returns a Pix with the same resolution as the original -// in which 1 (black) pixels represent likely non text (photo, line drawing) -// areas of the page, deleting from the blob_block the blobs that were -// determined to be non-text. -// The photo_map is used to bias the decision towards non-text, rather than -// supplying definite decision. -// The blob_block is the usual result of connected component analysis, -// holding the detected blobs. -// The returned Pix should be PixDestroyed after use. -Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map, - TO_BLOCK* blob_block) { - // Insert the smallest blobs into the grid. - InsertBlobList(&blob_block->small_blobs); - InsertBlobList(&blob_block->noise_blobs); - // Add the medium blobs that don't have a good strokewidth neighbour. - // Those that do go into good_grid as an antidote to spreading beyond the - // real reaches of a noise region. - BlobGrid good_grid(gridsize(), bleft(), tright()); - BLOBNBOX_IT blob_it(&blob_block->blobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0; - perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area(); - if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio) - InsertBBox(true, true, blob); - else - good_grid.InsertBBox(true, true, blob); - } - noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid); - good_grid.Clear(); // Not needed any more. - Pix* pix = noise_density_->ThresholdToPix(max_noise_count_); - if (debug) { - pixWrite("junknoisemask.png", pix, IFF_PNG); - } - ScrollView* win = nullptr; - #ifndef GRAPHICS_DISABLED - if (debug) { - win = MakeWindow(0, 400, "Photo Mask Blobs"); - } - #endif // GRAPHICS_DISABLED - // Large and medium blobs are not text if they overlap with "a lot" of small - // blobs. - MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, - kMaxLargeOverlapsWithSmall, - win, ScrollView::DARK_GREEN, pix); - MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall, - win, ScrollView::WHITE, pix); - // Clear the grid of small blobs and insert the medium blobs. - Clear(); - InsertBlobList(&blob_block->blobs); - MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, - kMaxLargeOverlapsWithMedium, - win, ScrollView::DARK_GREEN, pix); - // Clear again before we start deleting the blobs in the grid. - Clear(); - MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1, - win, ScrollView::CORAL, pix); - MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1, - win, ScrollView::GOLDENROD, pix); - MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1, - win, ScrollView::WHITE, pix); - if (debug) { - #ifndef GRAPHICS_DISABLED - win->Update(); - #endif // GRAPHICS_DISABLED - pixWrite("junkccphotomask.png", pix, IFF_PNG); - #ifndef GRAPHICS_DISABLED - delete win->AwaitEvent(SVET_DESTROY); - delete win; - #endif // GRAPHICS_DISABLED - } - return pix; -} - -// Computes and returns the noise_density IntGrid, at the same gridsize as -// this by summing the number of small elements in a 3x3 neighbourhood of -// each grid cell. good_grid is filled with blobs that are considered most -// likely good text, and this is filled with small and medium blobs that are -// more likely non-text. -// The photo_map is used to bias the decision towards non-text, rather than -// supplying definite decision. -IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map, - BlobGrid* good_grid) { - IntGrid* noise_counts = CountCellElements(); - IntGrid* noise_density = noise_counts->NeighbourhoodSum(); - IntGrid* good_counts = good_grid->CountCellElements(); - // Now increase noise density in photo areas, to bias the decision and - // minimize hallucinated text on image, but trim the noise_density where - // there are good blobs and the original count is low in non-photo areas, - // indicating that most of the result came from neighbouring cells. - int height = pixGetHeight(photo_map); - int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction); - for (int y = 0; y < gridheight(); ++y) { - for (int x = 0; x < gridwidth(); ++x) { - int noise = noise_density->GridCellValue(x, y); - if (max_noise_count_ < noise + photo_offset && - noise <= max_noise_count_) { - // Test for photo. - int left = x * gridsize(); - int right = left + gridsize(); - int bottom = height - y * gridsize(); - int top = bottom - gridsize(); - if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right, - &bottom)) { - noise_density->SetGridCell(x, y, noise + photo_offset); - } - } - if (debug && noise > max_noise_count_ && - good_counts->GridCellValue(x, y) > 0) { - tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n", - x * gridsize(), y * gridsize(), - noise_density->GridCellValue(x, y), - good_counts->GridCellValue(x, y), - noise_counts->GridCellValue(x, y), max_noise_count_); - } - if (noise > max_noise_count_ && - good_counts->GridCellValue(x, y) > 0 && - noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <= - max_noise_count_) { - noise_density->SetGridCell(x, y, 0); - } - } - } - delete noise_counts; - delete good_counts; - return noise_density; -} - -// Helper to expand a box in one of the 4 directions by the given pad, -// provided it does not expand into any cell with a zero noise density. -// If that is not possible, try expanding all round by a small constant. -static TBOX AttemptBoxExpansion(const TBOX& box, const IntGrid& noise_density, - int pad) { - TBOX expanded_box(box); - expanded_box.set_right(box.right() + pad); - if (!noise_density.AnyZeroInRect(expanded_box)) - return expanded_box; - expanded_box = box; - expanded_box.set_left(box.left() - pad); - if (!noise_density.AnyZeroInRect(expanded_box)) - return expanded_box; - expanded_box = box; - expanded_box.set_top(box.top() + pad); - if (!noise_density.AnyZeroInRect(expanded_box)) - return expanded_box; - expanded_box = box; - expanded_box.set_bottom(box.bottom() + pad); - if (!noise_density.AnyZeroInRect(expanded_box)) - return expanded_box; - expanded_box = box; - expanded_box.pad(kNoisePadding, kNoisePadding); - if (!noise_density.AnyZeroInRect(expanded_box)) - return expanded_box; - return box; -} - -// Tests each blob in the list to see if it is certain non-text using 2 -// conditions: -// 1. blob overlaps a cell with high value in noise_density_ (previously set -// by ComputeNoiseDensity). -// OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This -// condition is disabled with max_blob_overlaps == -1. -// If it does, the blob is declared non-text, and is used to mark up the -// nontext_mask. Such blobs are fully deleted, and non-noise blobs have their -// neighbours reset, as they may now point to deleted data. -// WARNING: The blobs list blobs may be in the *this grid, but they are -// not removed. If any deleted blobs might be in *this, then this must be -// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. -// If the win is not nullptr, deleted blobs are drawn on it in red, and kept -// blobs are drawn on it in ok_color. -void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, - int max_blob_overlaps, - ScrollView* win, - ScrollView::Color ok_color, - Pix* nontext_mask) { - int imageheight = tright().y() - bleft().x(); - BLOBNBOX_IT blob_it(blobs); - BLOBNBOX_LIST dead_blobs; - BLOBNBOX_IT dead_it(&dead_blobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - TBOX box = blob->bounding_box(); - if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) && - (max_blob_overlaps < 0 || - !BlobOverlapsTooMuch(blob, max_blob_overlaps))) { - blob->ClearNeighbours(); - #ifndef GRAPHICS_DISABLED - if (win != nullptr) - blob->plot(win, ok_color, ok_color); - #endif // GRAPHICS_DISABLED - } else { - if (noise_density_->AnyZeroInRect(box)) { - // There is a danger that the bounding box may overlap real text, so - // we need to render the outline. - Pix* blob_pix = blob->cblob()->render_outline(); - pixRasterop(nontext_mask, box.left(), imageheight - box.top(), - box.width(), box.height(), PIX_SRC | PIX_DST, - blob_pix, 0, 0); - pixDestroy(&blob_pix); - } else { - if (box.area() < gridsize() * gridsize()) { - // It is a really bad idea to make lots of small components in the - // photo mask, so try to join it to a bigger area by expanding the - // box in a way that does not touch any zero noise density cell. - box = AttemptBoxExpansion(box, *noise_density_, gridsize()); - } - // All overlapped cells are non-zero, so just mark the rectangle. - pixRasterop(nontext_mask, box.left(), imageheight - box.top(), - box.width(), box.height(), PIX_SET, nullptr, 0, 0); - } - #ifndef GRAPHICS_DISABLED - if (win != nullptr) - blob->plot(win, ScrollView::RED, ScrollView::RED); - #endif // GRAPHICS_DISABLED - // It is safe to delete the cblob now, as it isn't used by the grid - // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the - // dead_blobs list. - // TODO(rays) delete the delete when the BLOBNBOX destructor deletes - // the cblob. - delete blob->cblob(); - dead_it.add_to_end(blob_it.extract()); - } - } -} - -// Returns true if the given blob overlaps more than max_overlaps blobs -// in the current grid. -bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) { - // Search the grid to see what intersects it. - // Setup a Rectangle search for overlapping this blob. - BlobGridSearch rsearch(this); - const TBOX& box = blob->bounding_box(); - rsearch.StartRectSearch(box); - rsearch.SetUniqueMode(true); - BLOBNBOX* neighbour; - int overlap_count = 0; - while (overlap_count <= max_overlaps && - (neighbour = rsearch.NextRectSearch()) != nullptr) { - if (box.major_overlap(neighbour->bounding_box())) { - ++overlap_count; - if (overlap_count > max_overlaps) - return true; - } - } - return false; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/ccnontextdetect.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/ccnontextdetect.h deleted file mode 100644 index 6f536ef2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/ccnontextdetect.h +++ /dev/null @@ -1,87 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: ccnontextdetect.h -// Description: Connected-Component-based non-text detection. -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// Created: Sat Jun 11 09:52:01 PST 2011 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_CCPHOTODETECT_H_ -#define TESSERACT_TEXTORD_CCPHOTODETECT_H_ - -#include "blobgrid.h" -#include "scrollview.h" - -namespace tesseract { - -// The CCNonTextDetect class contains grid-based operations on blobs to create -// a full-resolution image mask analogous yet complementary to -// pixGenHalftoneMask as it is better at line-drawings, graphs and charts. -class CCNonTextDetect : public BlobGrid { - public: - CCNonTextDetect(int gridsize, const ICOORD& bleft, const ICOORD& tright); - virtual ~CCNonTextDetect(); - - // Creates and returns a Pix with the same resolution as the original - // in which 1 (black) pixels represent likely non text (photo, line drawing) - // areas of the page, deleting from the blob_block the blobs that were - // determined to be non-text. - // The photo_map (binary image mask) is used to bias the decision towards - // non-text, rather than supplying a definite decision. - // The blob_block is the usual result of connected component analysis, - // holding the detected blobs. - // The returned Pix should be PixDestroyed after use. - Pix* ComputeNonTextMask(bool debug, Pix* photo_map, TO_BLOCK* blob_block); - - private: - // Computes and returns the noise_density IntGrid, at the same gridsize as - // this by summing the number of small elements in a 3x3 neighbourhood of - // each grid cell. good_grid is filled with blobs that are considered most - // likely good text, and this is filled with small and medium blobs that are - // more likely non-text. - // The photo_map is used to bias the decision towards non-text, rather than - // supplying definite decision. - IntGrid* ComputeNoiseDensity(bool debug, Pix* photo_map, BlobGrid* good_grid); - - // Tests each blob in the list to see if it is certain non-text using 2 - // conditions: - // 1. blob overlaps a cell with high value in noise_density_ (previously set - // by ComputeNoiseDensity). - // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This - // condition is disabled with max_blob_overlaps == -1. - // If it does, the blob is declared non-text, and is used to mark up the - // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their - // neighbours reset, as they may now point to deleted data. - // WARNING: The blobs list blobs may be in the *this grid, but they are - // not removed. If any deleted blobs might be in *this, then this must be - // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. - // If the win is not nullptr, deleted blobs are drawn on it in red, and kept - void MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, - int max_blob_overlaps, - ScrollView* win, ScrollView::Color ok_color, - Pix* nontext_mask); - // Returns true if the given blob overlaps more than max_overlaps blobs - // in the current grid. - bool BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps); - - // Max entry in noise_density_ before the cell is declared noisy. - int max_noise_count_; - // Completed noise density map, which we keep around to use for secondary - // noise detection. - IntGrid* noise_density_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_CCPHOTODETECT_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/cjkpitch.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/cjkpitch.cpp deleted file mode 100644 index 48de76ef..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/cjkpitch.cpp +++ /dev/null @@ -1,1096 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: cjkpitch.cpp -// Description: Code to determine fixed pitchness and the pitch if fixed, -// for CJK text. -// Author: takenaka@google.com (Hiroshi Takenaka) -// Created: Mon Jun 27 12:48:35 JST 2011 -// -// Copyright 2011 Google Inc. All Rights Reserved. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "cjkpitch.h" -#include "genericvector.h" -#include "topitch.h" -#include "tovars.h" - -#include -#include // for std::vector - -BOOL_VAR(textord_space_size_is_variable, FALSE, - "If true, word delimiter spaces are assumed to have " - "variable width, even though characters have fixed pitch."); - -namespace { - -// Allow +/-10% error for character pitch / body size. -static const float kFPTolerance = 0.1; - -// Minimum ratio of "good" character pitch for a row to be considered -// to be fixed-pitch. -static const float kFixedPitchThreshold = 0.35; - -// rank statistics for a small collection of float values. -class SimpleStats { - public: - SimpleStats(): finalized_(false), values_() { } - ~SimpleStats() { } - - void Clear() { - values_.clear(); - finalized_ = false; - } - - void Add(float value) { - values_.push_back(value); - finalized_ = false; - } - - void Finish() { - values_.sort(float_compare); - finalized_ = true; - } - - float ile(double frac) { - if (!finalized_) Finish(); - if (values_.empty()) return 0.0; - if (frac >= 1.0) return values_.back(); - if (frac <= 0.0 || values_.size() == 1) return values_[0]; - int index = static_cast((values_.size() - 1) * frac); - float reminder = (values_.size() - 1) * frac - index; - - return values_[index] * (1.0 - reminder) + - values_[index + 1] * reminder; - } - - float median() { - return ile(0.5); - } - - float maximum() { - if (!finalized_) Finish(); - if (values_.empty()) return 0.0; - return values_.back(); - } - - float minimum() { - if (!finalized_) Finish(); - if (values_.empty()) return 0.0; - return values_[0]; - } - - int size() const { - return values_.size(); - } - - private: - static int float_compare(const void* a, const void* b) { - const float* f_a = static_cast(a); - const float* f_b = static_cast(b); - return (*f_a > *f_b) ? 1 : ((*f_a < *f_b) ? -1 : 0); - } - - bool finalized_; - GenericVector values_; -}; - -// statistics for a small collection of float pairs (x, y). -// EstimateYFor(x, r) returns the estimated y at x, based on -// existing samples between x*(1-r) ~ x*(1+r). -class LocalCorrelation { - public: - struct float_pair { - float x, y; - int vote; - }; - - LocalCorrelation(): finalized_(false) { } - ~LocalCorrelation() { } - - void Finish() { - values_.sort(float_pair_compare); - finalized_ = true; - } - - void Clear() { - finalized_ = false; - } - - void Add(float x, float y, int v) { - struct float_pair value; - value.x = x; - value.y = y; - value.vote = v; - values_.push_back(value); - finalized_ = false; - } - - float EstimateYFor(float x, float r) { - ASSERT_HOST(finalized_); - int start = 0, end = values_.size(); - // Because the number of samples (used_) is assumed to be small, - // just use linear search to find values within the range. - while (start < values_.size() && values_[start].x < x * (1.0 - r)) start++; - while (end - 1 >= 0 && values_[end - 1].x > x * (1.0 + r)) end--; - - // Fall back to the global average if there are no data within r - // of x. - if (start >= end) { - start = 0; - end = values_.size(); - } - - // Compute weighted average of the values. - float rc = 0; - int vote = 0; - for (int i = start; i < end; i++) { - rc += values_[i].vote * x * values_[i].y / values_[i].x; - vote += values_[i].vote; - } - - return rc / vote; - } - - private: - static int float_pair_compare(const void* a, const void* b) { - const float_pair* f_a = static_cast(a); - const float_pair* f_b = static_cast(b); - return (f_a->x > f_b->x) ? 1 : ((f_a->x < f_b->x) ? -1 : 0); - } - - bool finalized_; - GenericVector values_; -}; - -// Class to represent a character on a fixed pitch row. A FPChar may -// consist of multiple blobs (BLOBNBOX's). -class FPChar { - public: - enum Alignment { - ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD - }; - - FPChar(): box_(), real_body_(), - from_(nullptr), to_(nullptr), num_blobs_(0), max_gap_(0), - final_(false), alignment_(ALIGN_UNKNOWN), - merge_to_prev_(false), delete_flag_(false) { - } - - // Initialize from blob. - void Init(BLOBNBOX *blob) { - box_ = blob->bounding_box(); - real_body_ = box_; - from_ = to_ = blob; - num_blobs_ = 1; - } - - // Merge this character with "next". The "next" character should - // consist of succeeding blobs on the same row. - void Merge(const FPChar &next) { - int gap = real_body_.x_gap(next.real_body_); - if (gap > max_gap_) max_gap_ = gap; - - box_ += next.box_; - real_body_ += next.real_body_; - to_ = next.to_; - num_blobs_ += next.num_blobs_; - } - - // Accessors. - const TBOX &box() const { return box_; } - void set_box(const TBOX &box) { - box_ = box; - } - const TBOX &real_body() const { return real_body_; } - - bool is_final() const { return final_; } - void set_final(bool flag) { - final_ = flag; - } - - const Alignment& alignment() const { - return alignment_; - } - void set_alignment(Alignment alignment) { - alignment_ = alignment; - } - - bool merge_to_prev() const { - return merge_to_prev_; - } - void set_merge_to_prev(bool flag) { - merge_to_prev_ = flag; - } - - bool delete_flag() const { - return delete_flag_; - } - void set_delete_flag(bool flag) { - delete_flag_ = flag; - } - - int max_gap() const { - return max_gap_; - } - - int num_blobs() const { - return num_blobs_; - } - - private: - TBOX box_; // Rectangle region considered to be occupied by this - // character. It could be bigger than the bounding box. - TBOX real_body_; // Real bounding box of this character. - BLOBNBOX *from_; // The first blob of this character. - BLOBNBOX *to_; // The last blob of this character. - int num_blobs_; // Number of blobs that belong to this character. - int max_gap_; // Maximum x gap between the blobs. - - bool final_; // True if alignment/fragmentation decision for this - // character is finalized. - - Alignment alignment_; // Alignment status. - bool merge_to_prev_; // True if this is a fragmented blob that - // needs to be merged to the previous - // character. - - int delete_flag_; // True if this character is merged to another - // one and needs to be deleted. -}; - -// Class to represent a fixed pitch row, as a linear collection of -// FPChar's. -class FPRow { - public: - FPRow() : pitch_(0.0f), estimated_pitch_(0.0f), - all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(), - heights_(), characters_(), real_row_(nullptr) { - } - - ~FPRow() { } - - // Initialize from TD_ROW. - void Init(TO_ROW *row); - - // Estimate character pitch of this row, based on current alignment - // status of underlying FPChar's. The argument pass1 can be set to - // true if the function is called after Pass1Analyze(), to eliminate - // some redundant computation. - void EstimatePitch(bool pass1); - - // Check each character if it has good character pitches between its - // predecessor and its successor and set its alignment status. If - // we already calculated the estimated pitch for this row, the value - // is used. If we didn't, a character is considered to be good, if - // the pitches between its predecessor and its successor are almost - // equal. - void Pass1Analyze(); - - // Find characters that fit nicely into one imaginary body next to a - // character which is already finalized. Then mark them as character - // fragments. - bool Pass2Analyze(); - - // Merge FPChars marked as character fragments into one. - void MergeFragments(); - - // Finalize characters that are already large enough and cannot be - // merged with others any more. - void FinalizeLargeChars(); - - // Output pitch estimation results to attributes of TD_ROW. - void OutputEstimations(); - - void DebugOutputResult(int row_index); - - int good_pitches() { - return good_pitches_.size(); - } - - int good_gaps() { - return good_gaps_.size(); - } - - float pitch() { - return pitch_; - } - - float estimated_pitch() { - return estimated_pitch_; - } - - void set_estimated_pitch(float v) { - estimated_pitch_ = v; - } - - float height() { - return height_; - } - - float height_pitch_ratio() { - if (good_pitches_.size() < 2) return -1.0; - return height_ / good_pitches_.median(); - } - - float gap() { - return gap_; - } - - size_t num_chars() { - return characters_.size(); - } - FPChar *character(int i) { - return &characters_[i]; - } - - const TBOX &box(int i) { - return characters_[i].box(); - } - - const TBOX &real_body(int i) { - return characters_[i].real_body(); - } - - bool is_box_modified(int i) { - return !(characters_[i].box() == characters_[i].real_body()); - } - - float center_x(int i) { - return (characters_[i].box().left() + characters_[i].box().right()) / 2.0; - } - - bool is_final(int i) { - return characters_[i].is_final(); - } - - void finalize(int i) { - characters_[i].set_final(true); - } - - bool is_good(int i) { - return characters_[i].alignment() == FPChar::ALIGN_GOOD; - } - - bool is_bad(int i) { - return characters_[i].alignment() == FPChar::ALIGN_BAD; - } - - bool is_unknown(int i) { - return characters_[i].alignment() == FPChar::ALIGN_UNKNOWN; - } - - void mark_good(int i) { - characters_[i].set_alignment(FPChar::ALIGN_GOOD); - } - - void mark_bad(int i) { - characters_[i].set_alignment(FPChar::ALIGN_BAD); - } - - void clear_alignment(int i) { - characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN); - } - - private: - static float x_overlap_fraction(const TBOX& box1, const TBOX& box2) { - if (std::min(box1.width(), box2.width()) == 0) return 0.0; - return -box1.x_gap(box2) / (float)std::min(box1.width(), box2.width()); - } - - static bool mostly_overlap(const TBOX& box1, const TBOX& box2) { - return x_overlap_fraction(box1, box2) > 0.9; - } - - static bool significant_overlap(const TBOX& box1, const TBOX& box2) { - if (std::min(box1.width(), box2.width()) == 0) return false; - int overlap = -box1.x_gap(box2); - return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1; - } - - static float box_pitch(const TBOX& ref, const TBOX& box) { - return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0; - } - - // Check if two neighboring characters satisfy the fixed pitch model. - static bool is_good_pitch(float pitch, const TBOX& box1, const TBOX& box2) { - // Character box shouldn't exceed pitch. - if (box1.width() >= pitch * (1.0 + kFPTolerance) || - box2.width() >= pitch * (1.0 + kFPTolerance) || - box1.height() >= pitch * (1.0 + kFPTolerance) || - box2.height() >= pitch * (1.0 + kFPTolerance)) return false; - - const float real_pitch = box_pitch(box1, box2); - if (fabs(real_pitch - pitch) < pitch * kFPTolerance) return true; - - if (textord_space_size_is_variable) { - // Hangul characters usually have fixed pitch, but words are - // delimited by space which can be narrower than characters. - if (real_pitch > pitch && real_pitch < pitch * 2.0 && - real_pitch - box1.x_gap(box2) < pitch) { - return true; - } - } - return false; - } - - static bool is_interesting_blob(const BLOBNBOX *blob) { - return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER; - } - - // Cleanup chars that are already merged to others. - void DeleteChars() { - int index = 0; - for (int i = 0; i < characters_.size(); ++i) { - if (!characters_[i].delete_flag()) { - if (index != i) characters_[index] = characters_[i]; - index++; - } - } - characters_.truncate(index); - } - - float pitch_; // Character pitch. - float estimated_pitch_; // equal to pitch_ if pitch_ is considered - // to be good enough. - float height_; // Character height. - float gap_; // Minimum gap between characters. - - // Pitches between any two successive characters. - SimpleStats all_pitches_; - // Gaps between any two successive characters. - SimpleStats all_gaps_; - // Pitches between any two successive characters that are consistent - // with the fixed pitch model. - SimpleStats good_pitches_; - // Gaps between any two successive characters that are consistent - // with the fixed pitch model. - SimpleStats good_gaps_; - - SimpleStats heights_; - - GenericVector characters_; - TO_ROW *real_row_; // Underlying TD_ROW for this row. -}; - -void FPRow::Init(TO_ROW *row) { - ASSERT_HOST(row != nullptr); - ASSERT_HOST(row->xheight > 0); - real_row_ = row; - real_row_->pitch_decision = PITCH_CORR_PROP; // Default decision. - - BLOBNBOX_IT blob_it = row->blob_list(); - // Initialize characters_ and compute the initial estimation of - // character height. - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - if (is_interesting_blob(blob_it.data())) { - FPChar fp_char; - fp_char.Init(blob_it.data()); - // Merge unconditionally if two blobs overlap. - if (!characters_.empty() && - significant_overlap(fp_char.box(), characters_.back().box())) { - characters_.back().Merge(fp_char); - } else { - characters_.push_back(fp_char); - } - TBOX bound = blob_it.data()->bounding_box(); - if (bound.height() * 3.0 > bound.width()) { - heights_.Add(bound.height()); - } - } - } - heights_.Finish(); - height_ = heights_.ile(0.875); -} - -void FPRow::OutputEstimations() { - if (good_pitches_.size() == 0) { - pitch_ = 0.0f; - real_row_->pitch_decision = PITCH_CORR_PROP; - return; - } - - pitch_ = good_pitches_.median(); - real_row_->fixed_pitch = pitch_; - // good_gaps_.ile(0.125) can be large if most characters on the row - // are skinny. Use pitch_ - height_ instead if it's smaller, but - // positive. - real_row_->kern_size = real_row_->pr_nonsp = - std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f)); - real_row_->body_size = pitch_ - real_row_->kern_size; - - if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) { - // If more than half of the characters of a line don't fit to the - // fixed pitch model, consider the line to be proportional. 50% - // seems to be a good threshold in practice as well. - // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in - // real_row_ as a partial estimation result and try to use them in the - // normalization process. - real_row_->pitch_decision = PITCH_CORR_PROP; - return; - } else if (good_pitches_.size() > all_pitches_.size() * 0.75) { - real_row_->pitch_decision = PITCH_DEF_FIXED; - } else { - real_row_->pitch_decision = PITCH_CORR_FIXED; - } - - real_row_->space_size = real_row_->pr_space = pitch_; - // Set min_space to 50% of character pitch so that we can break CJK - // text at a half-width space after punctuation. - real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5; - - // Don't consider a quarter space as a real space, because it's used - // for line justification in traditional Japanese books. - real_row_->max_nonspace = std::max(pitch_ * 0.25 + good_gaps_.minimum(), - (double)good_gaps_.ile(0.875)); - - int space_threshold = - std::min((real_row_->max_nonspace + real_row_->min_space) / 2, - static_cast(real_row_->xheight)); - - // Make max_nonspace larger than any intra-character gap so that - // make_prop_words() won't break a row at the middle of a character. - for (size_t i = 0; i < num_chars(); ++i) { - if (characters_[i].max_gap() > real_row_->max_nonspace) { - real_row_->max_nonspace = characters_[i].max_gap(); - } - } - real_row_->space_threshold = - std::min((real_row_->max_nonspace + real_row_->min_space) / 2, - static_cast(real_row_->xheight)); - real_row_->used_dm_model = false; - - // Setup char_cells. - ICOORDELT_IT cell_it = &real_row_->char_cells; - ICOORDELT *cell = new ICOORDELT(real_body(0).left(), 0); - cell_it.add_after_then_move(cell); - - int right = real_body(0).right(); - for (size_t i = 1; i < num_chars(); ++i) { - // Put a word break if gap between two characters is bigger than - // space_threshold. Don't break if none of two characters - // couldn't be "finalized", because maybe they need to be merged - // to one character. - if ((is_final(i - 1) || is_final(i)) && - real_body(i - 1).x_gap(real_body(i)) > space_threshold) { - cell = new ICOORDELT(right + 1, 0); - cell_it.add_after_then_move(cell); - while (right + pitch_ < box(i).left()) { - right += pitch_; - cell = new ICOORDELT(right + 1, 0); - cell_it.add_after_then_move(cell); - } - right = box(i).left(); - } - cell = new ICOORDELT((right + real_body(i).left()) / 2, 0); - cell_it.add_after_then_move(cell); - right = real_body(i).right(); - } - - cell = new ICOORDELT(right + 1, 0); - cell_it.add_after_then_move(cell); - - // TODO(takenaka): add code to store alignment/fragmentation - // information to blobs so that it can be reused later, e.g. in - // recognition phase. -} - -void FPRow::EstimatePitch(bool pass1) { - good_pitches_.Clear(); - all_pitches_.Clear(); - good_gaps_.Clear(); - all_gaps_.Clear(); - heights_.Clear(); - if (num_chars() == 0) return; - - int32_t cx0, cx1; - bool prev_was_good = is_good(0); - cx0 = center_x(0); - - heights_.Add(box(0).height()); - for (size_t i = 1; i < num_chars(); i++) { - cx1 = center_x(i); - int32_t pitch = cx1 - cx0; - int32_t gap = std::max(0, real_body(i - 1).x_gap(real_body(i))); - - heights_.Add(box(i).height()); - // Ignore if the pitch is too close. But don't ignore wide pitch - // may be the result of large tracking. - if (pitch > height_ * 0.5) { - all_pitches_.Add(pitch); - all_gaps_.Add(gap); - if (is_good(i)) { - // In pass1 (after Pass1Analyze()), all characters marked as - // "good" have a good consistent pitch with their previous - // characters. However, it's not true in pass2 and a good - // character may have a good pitch only between its successor. - // So we collect only pitch values between two good - // characters. and within tolerance in pass2. - if (pass1 || (prev_was_good && - fabs(estimated_pitch_ - pitch) < - kFPTolerance * estimated_pitch_)) { - good_pitches_.Add(pitch); - if (!is_box_modified(i - 1) && !is_box_modified(i)) { - good_gaps_.Add(gap); - } - } - prev_was_good = true; - } else { - prev_was_good = false; - } - } - cx0 = cx1; - } - - good_pitches_.Finish(); - all_pitches_.Finish(); - good_gaps_.Finish(); - all_gaps_.Finish(); - heights_.Finish(); - - height_ = heights_.ile(0.875); - if (all_pitches_.size() == 0) { - pitch_ = 0.0f; - gap_ = 0.0f; - } else if (good_pitches_.size() < 2) { - // We don't have enough data to estimate the pitch of this row yet. - // Use median of all pitches as the initial guess. - pitch_ = all_pitches_.median(); - ASSERT_HOST(pitch_ > 0.0f); - gap_ = all_gaps_.ile(0.125); - } else { - pitch_ = good_pitches_.median(); - ASSERT_HOST(pitch_ > 0.0f); - gap_ = good_gaps_.ile(0.125); - } -} - -void FPRow::DebugOutputResult(int row_index) { - if (num_chars() > 0) { - tprintf("Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, " - "space_size=%f, space_threshold=%d, xheight=%f\n", - row_index, (int)(real_row_->pitch_decision), - real_row_->fixed_pitch, real_row_->max_nonspace, - real_row_->space_size, real_row_->space_threshold, - real_row_->xheight); - - for (unsigned i = 0; i < num_chars(); i++) { - tprintf("Char %u: is_final=%d is_good=%d num_blobs=%d: ", - i, is_final(i), is_good(i), character(i)->num_blobs()); - box(i).print(); - } - } -} - -void FPRow::Pass1Analyze() { - if (num_chars() < 2) return; - - if (estimated_pitch_ > 0.0f) { - for (size_t i = 2; i < num_chars(); i++) { - if (is_good_pitch(estimated_pitch_, box(i - 2), box(i-1)) && - is_good_pitch(estimated_pitch_, box(i - 1), box(i))) { - mark_good(i - 1); - } - } - } else { - for (size_t i = 2; i < num_chars(); i++) { - if (is_good_pitch(box_pitch(box(i-2), box(i-1)), box(i - 1), box(i))) { - mark_good(i - 1); - } - } - } - character(0)->set_alignment(character(1)->alignment()); - character(num_chars() - 1)->set_alignment( - character(num_chars() - 2)->alignment()); -} - -bool FPRow::Pass2Analyze() { - bool changed = false; - if (num_chars() <= 1 || estimated_pitch_ == 0.0f) { - return false; - } - for (size_t i = 0; i < num_chars(); i++) { - if (is_final(i)) continue; - - FPChar::Alignment alignment = character(i)->alignment(); - bool intersecting = false; - bool not_intersecting = false; - - if (i < num_chars() - 1 && is_final(i + 1)) { - // Next character is already finalized. Estimate the imaginary - // body including this character based on the character. Skip - // whitespace if necessary. - bool skipped_whitespaces = false; - float c1 = center_x(i + 1) - 1.5 * estimated_pitch_; - while (c1 > box(i).right()) { - skipped_whitespaces = true; - c1 -= estimated_pitch_; - } - TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top()); - - // Collect all characters that mostly fit in the region. - // Also, their union height shouldn't be too big. - int j = i; - TBOX merged; - while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) && - merged.bounding_union(box(j)).height() < - estimated_pitch_ * (1 + kFPTolerance)) { - merged += box(j); - j--; - } - - if (j >= 0 && significant_overlap(ibody, box(j))) { - // character(j) lies on the character boundary and doesn't fit - // well into the imaginary body. - if (!is_final(j)) intersecting = true; - } else { - not_intersecting = true; - if (i - j > 0) { - // Merge character(j+1) ... character(i) because they fit - // into the body nicely. - if (i - j == 1) { - // Only one char in the imaginary body. - if (!skipped_whitespaces) mark_good(i); - // set ibody as bounding box of this character to get - // better pitch analysis result for halfwidth glyphs - // followed by a halfwidth space. - if (box(i).width() <= estimated_pitch_ * 0.5) { - ibody += box(i); - character(i)->set_box(ibody); - } - character(i)->set_merge_to_prev(false); - finalize(i); - } else { - for (int k = i; k > j + 1; k--) { - character(k)->set_merge_to_prev(true); - } - } - } - } - } - if (i > 0 && is_final(i - 1)) { - // Now we repeat everything from the opposite side. Previous - // character is already finalized. Estimate the imaginary body - // including this character based on the character. - bool skipped_whitespaces = false; - float c1 = center_x(i - 1) + 1.5 * estimated_pitch_; - while (c1 < box(i).left()) { - skipped_whitespaces = true; - c1 += estimated_pitch_; - } - TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top()); - - size_t j = i; - TBOX merged; - while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) && - merged.bounding_union(box(j)).height() < - estimated_pitch_ * (1 + kFPTolerance)) { - merged += box(j); - j++; - } - - if (j < num_chars() && significant_overlap(ibody, box(j))) { - if (!is_final(j)) intersecting = true; - } else { - not_intersecting = true; - if (j - i > 0) { - if (j - i == 1) { - if (!skipped_whitespaces) mark_good(i); - if (box(i).width() <= estimated_pitch_ * 0.5) { - ibody += box(i); - character(i)->set_box(ibody); - } - character(i)->set_merge_to_prev(false); - finalize(i); - } else { - for (size_t k = i + 1; k < j; k++) { - character(k)->set_merge_to_prev(true); - } - } - } - } - } - - // This character doesn't fit well into the estimated imaginary - // bodies. Mark it as bad. - if (intersecting && !not_intersecting) mark_bad(i); - if (character(i)->alignment() != alignment || - character(i)->merge_to_prev()) { - changed = true; - } - } - - return changed; -} - -void FPRow::MergeFragments() { - int last_char = 0; - - for (size_t j = 0; j < num_chars(); ++j) { - if (character(j)->merge_to_prev()) { - character(last_char)->Merge(*character(j)); - character(j)->set_delete_flag(true); - clear_alignment(last_char); - character(j-1)->set_merge_to_prev(false); - } else { - last_char = j; - } - } - DeleteChars(); -} - -void FPRow::FinalizeLargeChars() { - float row_pitch = estimated_pitch(); - for (size_t i = 0; i < num_chars(); i++) { - if (is_final(i)) continue; - - // Finalize if both neighbors are finalized. We have no other choice. - if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) { - finalize(i); - continue; - } - - float cx = center_x(i); - TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1); - if (i > 0) { - // The preceding character significantly intersects with the - // imaginary body of this character. Let Pass2Analyze() handle - // this case. - if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) continue; - if (!is_final(i - 1)) { - TBOX merged = box(i); - merged += box(i - 1); - if (merged.width() < row_pitch) continue; - // This character cannot be finalized yet because it can be - // merged with the previous one. Again, let Pass2Analyze() - // handle this case. - } - } - if (i < num_chars() - 1) { - if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) continue; - if (!is_final(i + 1)) { - TBOX merged = box(i); - merged += box(i + 1); - if (merged.width() < row_pitch) continue; - } - } - finalize(i); - } - - // Update alignment decision. We only consider finalized characters - // in pass2. E.g. if a finalized character C has another finalized - // character L on its left and a not-finalized character R on its - // right, we mark C as good if the pitch between C and L is good, - // regardless of the pitch between C and R. - for (size_t i = 0; i < num_chars(); i++) { - if (!is_final(i)) continue; - bool good_pitch = false; - bool bad_pitch = false; - if (i > 0 && is_final(i - 1)) { - if (is_good_pitch(row_pitch, box(i - 1), box(i))) { - good_pitch = true; - } else { - bad_pitch = true; - } - } - if (i < num_chars() - 1 && is_final(i + 1)) { - if (is_good_pitch(row_pitch, box(i), box(i + 1))) { - good_pitch = true; - } else { - bad_pitch = true; - } - } - if (good_pitch && !bad_pitch) mark_good(i); - else if (!good_pitch && bad_pitch) mark_bad(i); - } -} - -class FPAnalyzer { - public: - FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks); - ~FPAnalyzer() { } - - void Pass1Analyze() { - for (size_t i = 0; i < rows_.size(); i++) rows_[i].Pass1Analyze(); - } - - // Estimate character pitch for each row. The argument pass1 can be - // set to true if the function is called after Pass1Analyze(), to - // eliminate some redundant computation. - void EstimatePitch(bool pass1); - - bool maybe_fixed_pitch() { - if (rows_.empty() || - rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) return false; - return true; - } - - void MergeFragments() { - for (size_t i = 0; i < rows_.size(); i++) rows_[i].MergeFragments(); - } - - void FinalizeLargeChars() { - for (size_t i = 0; i < rows_.size(); i++) rows_[i].FinalizeLargeChars(); - } - - bool Pass2Analyze() { - bool changed = false; - for (size_t i = 0; i < rows_.size(); i++) { - if (rows_[i].Pass2Analyze()) { - changed = true; - } - } - return changed; - } - - void OutputEstimations() { - for (size_t i = 0; i < rows_.size(); i++) rows_[i].OutputEstimations(); - // Don't we need page-level estimation of gaps/spaces? - } - - void DebugOutputResult() { - tprintf("FPAnalyzer: final result\n"); - for (size_t i = 0; i < rows_.size(); i++) rows_[i].DebugOutputResult(i); - } - - size_t num_rows() { - return rows_.size(); - } - - // Returns the upper limit for pass2 loop iteration. - unsigned max_iteration() { - // We're fixing at least one character per iteration. So basically - // we shouldn't require more than max_chars_per_row_ iterations. - return max_chars_per_row_ + 100; - } - - private: - ICOORD page_tr_; - std::vector rows_; - unsigned num_tall_rows_; - unsigned num_bad_rows_; - // TODO: num_empty_rows_ is incremented, but never used overwise. - unsigned num_empty_rows_; - unsigned max_chars_per_row_; -}; - -FPAnalyzer::FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) -: page_tr_(page_tr), - num_tall_rows_(0), - num_bad_rows_(0), - num_empty_rows_(0), - max_chars_per_row_(0) -{ - TO_BLOCK_IT block_it(port_blocks); - - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { - TO_BLOCK *block = block_it.data(); - if (!block->get_rows()->empty()) { - ASSERT_HOST(block->xheight > 0); - find_repeated_chars(block, FALSE); - } - } - - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { - TO_ROW_IT row_it = block_it.data()->get_rows(); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - FPRow row; - row.Init(row_it.data()); - rows_.push_back(row); - size_t num_chars = rows_.back().num_chars(); - if (num_chars <= 1) num_empty_rows_++; - if (num_chars > max_chars_per_row_) max_chars_per_row_ = num_chars; - } - } -} - -void FPAnalyzer::EstimatePitch(bool pass1) { - LocalCorrelation pitch_height_stats; - - num_tall_rows_ = 0; - num_bad_rows_ = 0; - pitch_height_stats.Clear(); - for (size_t i = 0; i < rows_.size(); i++) { - rows_[i].EstimatePitch(pass1); - if (rows_[i].good_pitches()) { - pitch_height_stats.Add(rows_[i].height() + rows_[i].gap(), - rows_[i].pitch(), rows_[i].good_pitches()); - if (rows_[i].height_pitch_ratio() > 1.1) num_tall_rows_++; - } else { - num_bad_rows_++; - } - } - - pitch_height_stats.Finish(); - for (size_t i = 0; i < rows_.size(); i++) { - if (rows_[i].good_pitches() >= 5) { - // We have enough evidences. Just use the pitch estimation - // from this row. - rows_[i].set_estimated_pitch(rows_[i].pitch()); - } else if (rows_[i].num_chars() > 1) { - float estimated_pitch = - pitch_height_stats.EstimateYFor(rows_[i].height() + rows_[i].gap(), - 0.1); - // CJK characters are more likely to be fragmented than poorly - // chopped. So trust the page-level estimation of character - // pitch only if it's larger than row-level estimation or - // row-level estimation is too large (2x bigger than row height). - if (estimated_pitch > rows_[i].pitch() || - rows_[i].pitch() > rows_[i].height() * 2.0) { - rows_[i].set_estimated_pitch(estimated_pitch); - } else { - rows_[i].set_estimated_pitch(rows_[i].pitch()); - } - } - } -} - -} // namespace - -void compute_fixed_pitch_cjk(ICOORD page_tr, - TO_BLOCK_LIST *port_blocks) { - FPAnalyzer analyzer(page_tr, port_blocks); - if (analyzer.num_rows() == 0) return; - - analyzer.Pass1Analyze(); - analyzer.EstimatePitch(true); - - // Perform pass1 analysis again with the initial estimation of row - // pitches, for better estimation. - analyzer.Pass1Analyze(); - analyzer.EstimatePitch(true); - - // Early exit if the page doesn't seem to contain fixed pitch rows. - if (!analyzer.maybe_fixed_pitch()) { - if (textord_debug_pitch_test) { - tprintf("Page doesn't seem to contain fixed pitch rows\n"); - } - return; - } - - unsigned iteration = 0; - do { - analyzer.MergeFragments(); - analyzer.FinalizeLargeChars(); - analyzer.EstimatePitch(false); - iteration++; - } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration()); - - if (textord_debug_pitch_test) { - tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n", - iteration, analyzer.max_iteration()); - } - - analyzer.OutputEstimations(); - if (textord_debug_pitch_test) analyzer.DebugOutputResult(); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/cjkpitch.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/cjkpitch.h deleted file mode 100644 index ea4fe2b4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/cjkpitch.h +++ /dev/null @@ -1,71 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: cjkpitch.h -// Description: Code to determine fixed pitchness and the pitch if fixed, -// for CJK text. -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: takenaka@google.com (Hiroshi Takenaka) -// Created: Mon Jun 27 12:48:35 JST 2011 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// -#ifndef CJKPITCH_H_ -#define CJKPITCH_H_ - -#include "blobbox.h" - -// Function to test "fixed-pitchness" of the input text and estimating -// character pitch parameters for it, based on CJK fixed-pitch layout -// model. -// -// This function assumes that a fixed-pitch CJK text has following -// characteristics: -// -// - Most glyphs are designed to fit within the same sized square -// (imaginary body). Also they are aligned to the center of their -// imaginary bodies. -// - The imaginary body is always a regular rectangle. -// - There may be some extra space between character bodies -// (tracking). -// - There may be some extra space after punctuations. -// - The text is *not* space-delimited. Thus spaces are rare. -// - Character may consists of multiple unconnected blobs. -// -// And the function works in two passes. On pass 1, it looks for such -// "good" blobs that has the pitch same pitch on the both side and -// looks like a complete CJK character. Then estimates the character -// pitch for every row, based on those good blobs. If we couldn't find -// enough good blobs for a row, then the pitch is estimated from other -// rows with similar character height instead. -// -// Pass 2 is an iterative process to fit the blobs into fixed-pitch -// character cells. Once we have estimated the character pitch, blobs -// that are almost as large as the pitch can be considered to be -// complete characters. And once we know that some characters are -// complete characters, we can estimate the region occupied by its -// neighbors. And so on. -// -// We repeat the process until all ambiguities are resolved. Then make -// the final decision about fixed-pitchness of each row and compute -// pitch and spacing parameters. -// -// (If a row is considered to be proportional, pitch_decision for the -// row is set to PITCH_CORR_PROP and the later phase -// (i.e. Textord::to_spacing()) should determine its spacing -// parameters) -// -// This function doesn't provide all information required by -// fixed_pitch_words() and the rows need to be processed with -// make_prop_words() even if they are fixed pitched. -void compute_fixed_pitch_cjk(ICOORD page_tr, // top right - TO_BLOCK_LIST *port_blocks); // input list - -#endif // CJKPITCH_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colfind.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colfind.cpp deleted file mode 100644 index 5b17bd88..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colfind.cpp +++ /dev/null @@ -1,1626 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: colfind.cpp -// Description: Class to hold BLOBNBOXs in a grid for fast access -// to neighbours. -// Author: Ray Smith -// Created: Wed Jun 06 17:22:01 PDT 2007 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "colfind.h" - -#include "ccnontextdetect.h" -#include "colpartition.h" -#include "colpartitionset.h" -#include "equationdetectbase.h" -#include "linefind.h" -#include "normalis.h" -#include "strokewidth.h" -#include "blobbox.h" -#include "scrollview.h" -#include "tablefind.h" -#include "params.h" -#include "workingpartset.h" - -#include - -namespace tesseract { - -// When assigning columns, the max number of misfit grid rows/ColPartitionSets -// that can be ignored. -const int kMaxIncompatibleColumnCount = 2; -// Max fraction of mean_column_gap_ for the gap between two partitions within a -// column to allow them to merge. -const double kHorizontalGapMergeFraction = 0.5; -// Minimum gutter width as a fraction of gridsize -const double kMinGutterWidthGrid = 0.5; -// Max multiple of a partition's median size as a distance threshold for -// adding noise blobs. -const double kMaxDistToPartSizeRatio = 1.5; - -BOOL_VAR(textord_tabfind_show_initial_partitions, - false, "Show partition bounds"); -BOOL_VAR(textord_tabfind_show_reject_blobs, - false, "Show blobs rejected as noise"); -INT_VAR(textord_tabfind_show_partitions, 0, - "Show partition bounds, waiting if >1"); -BOOL_VAR(textord_tabfind_show_columns, false, "Show column bounds"); -BOOL_VAR(textord_tabfind_show_blocks, false, "Show final block bounds"); -BOOL_VAR(textord_tabfind_find_tables, true, "run table detection"); - -ScrollView* ColumnFinder::blocks_win_ = nullptr; - -// Gridsize is an estimate of the text size in the image. A suitable value -// is in TO_BLOCK::line_size after find_components has been used to make -// the blobs. -// bleft and tright are the bounds of the image (or rectangle) being processed. -// vlines is a (possibly empty) list of TabVector and vertical_x and y are -// the sum logical vertical vector produced by LineFinder::FindVerticalLines. -ColumnFinder::ColumnFinder(int gridsize, - const ICOORD& bleft, const ICOORD& tright, - int resolution, bool cjk_script, - double aligned_gap_fraction, - TabVector_LIST* vlines, TabVector_LIST* hlines, - int vertical_x, int vertical_y) - : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y, - resolution), - cjk_script_(cjk_script), - min_gutter_width_(static_cast(kMinGutterWidthGrid * gridsize)), - mean_column_gap_(tright.x() - bleft.x()), - tabfind_aligned_gap_fraction_(aligned_gap_fraction), - deskew_(0.0f, 0.0f), - reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f), - text_rotation_(0.0f, 0.0f), - best_columns_(nullptr), stroke_width_(nullptr), - part_grid_(gridsize, bleft, tright), nontext_map_(nullptr), - projection_(resolution), - denorm_(nullptr), input_blobs_win_(nullptr), equation_detect_(nullptr) { - TabVector_IT h_it(&horizontal_lines_); - h_it.add_list_after(hlines); -} - -ColumnFinder::~ColumnFinder() { - column_sets_.delete_data_pointers(); - delete [] best_columns_; - delete stroke_width_; - delete input_blobs_win_; - pixDestroy(&nontext_map_); - while (denorm_ != nullptr) { - DENORM* dead_denorm = denorm_; - denorm_ = const_cast(denorm_->predecessor()); - delete dead_denorm; - } - - // The ColPartitions are destroyed automatically, but any boxes in - // the noise_parts_ list are owned and need to be deleted explicitly. - ColPartition_IT part_it(&noise_parts_); - for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { - ColPartition* part = part_it.data(); - part->DeleteBoxes(); - } - // Likewise any boxes in the good_parts_ list need to be deleted. - // These are just the image parts. Text parts have already given their - // boxes on to the TO_BLOCK, and have empty lists. - part_it.set_to_list(&good_parts_); - for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { - ColPartition* part = part_it.data(); - part->DeleteBoxes(); - } - // Also, any blobs on the image_bblobs_ list need to have their cblobs - // deleted. This only happens if there has been an early return from - // FindColumns, as in a normal return, the blobs go into the grid and - // end up in noise_parts_, good_parts_ or the output blocks. - BLOBNBOX_IT bb_it(&image_bblobs_); - for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { - BLOBNBOX* bblob = bb_it.data(); - delete bblob->cblob(); - } -} - -// Performs initial processing on the blobs in the input_block: -// Setup the part_grid, stroke_width_, nontext_map. -// Obvious noise blobs are filtered out and used to mark the nontext_map_. -// Initial stroke-width analysis is used to get local text alignment -// direction, so the textline projection_ map can be setup. -// On return, IsVerticallyAlignedText may be called (now optionally) to -// determine the gross textline alignment of the page. -void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, - Pix* photo_mask_pix, - TO_BLOCK* input_block) { - part_grid_.Init(gridsize(), bleft(), tright()); - delete stroke_width_; - stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright()); - min_gutter_width_ = static_cast(kMinGutterWidthGrid * gridsize()); - input_block->ReSetAndReFilterBlobs(); - #ifndef GRAPHICS_DISABLED - if (textord_tabfind_show_blocks) { - input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs"); - input_block->plot_graded_blobs(input_blobs_win_); - } - #endif // GRAPHICS_DISABLED - SetBlockRuleEdges(input_block); - pixDestroy(&nontext_map_); - // Run a preliminary strokewidth neighbour detection on the medium blobs. - stroke_width_->SetNeighboursOnMediumBlobs(input_block); - CCNonTextDetect nontext_detect(gridsize(), bleft(), tright()); - // Remove obvious noise and make the initial non-text map. - nontext_map_ = nontext_detect.ComputeNonTextMask(textord_debug_tabfind, - photo_mask_pix, input_block); - stroke_width_->FindTextlineDirectionAndFixBrokenCJK(pageseg_mode, cjk_script_, - input_block); - // Clear the strokewidth grid ready for rotation or leader finding. - stroke_width_->Clear(); -} - -// Tests for vertical alignment of text (returning true if so), and generates -// a list of blobs of moderate aspect ratio, in the most frequent writing -// direction (in osd_blobs) for orientation and script detection to test -// the character orientation. -// block is the single block for the whole page or rectangle to be OCRed. -// Note that the vertical alignment may be due to text whose writing direction -// is vertical, like say Japanese, or due to text whose writing direction is -// horizontal but whose text appears vertically aligned because the image is -// not the right way up. -bool ColumnFinder::IsVerticallyAlignedText(double find_vertical_text_ratio, - TO_BLOCK* block, - BLOBNBOX_CLIST* osd_blobs) { - return stroke_width_->TestVerticalTextDirection(find_vertical_text_ratio, - block, osd_blobs); -} - -// Rotates the blobs and the TabVectors so that the gross writing direction -// (text lines) are horizontal and lines are read down the page. -// Applied rotation stored in rotation_. -// A second rotation is calculated for application during recognition to -// make the rotated blobs upright for recognition. -// Subsequent rotation stored in text_rotation_. -// -// Arguments: -// vertical_text_lines true if the text lines are vertical. -// recognition_rotation [0..3] is the number of anti-clockwise 90 degree -// rotations from osd required for the text to be upright and readable. -void ColumnFinder::CorrectOrientation(TO_BLOCK* block, - bool vertical_text_lines, - int recognition_rotation) { - const FCOORD anticlockwise90(0.0f, 1.0f); - const FCOORD clockwise90(0.0f, -1.0f); - const FCOORD rotation180(-1.0f, 0.0f); - const FCOORD norotation(1.0f, 0.0f); - - text_rotation_ = norotation; - // Rotate the page to make the text upright, as implied by - // recognition_rotation. - rotation_ = norotation; - if (recognition_rotation == 1) { - rotation_ = anticlockwise90; - } else if (recognition_rotation == 2) { - rotation_ = rotation180; - } else if (recognition_rotation == 3) { - rotation_ = clockwise90; - } - // We infer text writing direction to be vertical if there are several - // vertical text lines detected, and horizontal if not. But if the page - // orientation was determined to be 90 or 270 degrees, the true writing - // direction is the opposite of what we inferred. - if (recognition_rotation & 1) { - vertical_text_lines = !vertical_text_lines; - } - // If we still believe the writing direction is vertical, we use the - // convention of rotating the page ccw 90 degrees to make the text lines - // horizontal, and mark the blobs for rotation cw 90 degrees for - // classification so that the text order is correct after recognition. - if (vertical_text_lines) { - rotation_.rotate(anticlockwise90); - text_rotation_.rotate(clockwise90); - } - // Set rerotate_ to the inverse of rotation_. - rerotate_ = FCOORD(rotation_.x(), -rotation_.y()); - if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) { - // Rotate all the blobs and tab vectors. - RotateBlobList(rotation_, &block->large_blobs); - RotateBlobList(rotation_, &block->blobs); - RotateBlobList(rotation_, &block->small_blobs); - RotateBlobList(rotation_, &block->noise_blobs); - TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_, - &min_gutter_width_); - part_grid_.Init(gridsize(), bleft(), tright()); - // Reset all blobs to initial state and filter by size. - // Since they have rotated, the list they belong on could have changed. - block->ReSetAndReFilterBlobs(); - SetBlockRuleEdges(block); - stroke_width_->CorrectForRotation(rerotate_, &part_grid_); - } - if (textord_debug_tabfind) { - tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n", - vertical_text_lines, recognition_rotation, - rotation_.x(), rotation_.y(), - text_rotation_.x(), text_rotation_.y()); - } - // Setup the denormalization. - ASSERT_HOST(denorm_ == nullptr); - denorm_ = new DENORM; - denorm_->SetupNormalization(nullptr, &rotation_, nullptr, - 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); -} - -// Finds blocks of text, image, rule line, table etc, returning them in the -// blocks and to_blocks -// (Each TO_BLOCK points to the basic BLOCK and adds more information.) -// Image blocks are generated by a combination of photo_mask_pix (which may -// NOT be nullptr) and the rejected text found during preliminary textline -// finding. -// The input_block is the result of a call to find_components, and contains -// the blobs found in the image or rectangle to be OCRed. These blobs will be -// removed and placed in the output blocks, while unused ones will be deleted. -// If single_column is true, the input is treated as single column, but -// it is still divided into blocks of equal line spacing/text size. -// scaled_color is scaled down by scaled_factor from the input color image, -// and may be nullptr if the input was not color. -// grey_pix is optional, but if present must match the photo_mask_pix in size, -// and must be a *real* grey image instead of binary_pix * 255. -// thresholds_pix is expected to be present iff grey_pix is present and -// can be an integer factor reduction of the grey_pix. It represents the -// thresholds that were used to create the binary_pix from the grey_pix. -// If diacritic_blobs is non-null, then diacritics/noise blobs, that would -// confuse layout analysis by causing textline overlap, are placed there, -// with the expectation that they will be reassigned to words later and -// noise/diacriticness determined via classification. -// Returns -1 if the user hits the 'd' key in the blocks window while running -// in debug mode, which requests a retry with more debug info. -int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, - int scaled_factor, TO_BLOCK* input_block, - Pix* photo_mask_pix, Pix* thresholds_pix, - Pix* grey_pix, DebugPixa* pixa_debug, - BLOCK_LIST* blocks, BLOBNBOX_LIST* diacritic_blobs, - TO_BLOCK_LIST* to_blocks) { - pixOr(photo_mask_pix, photo_mask_pix, nontext_map_); - stroke_width_->FindLeaderPartitions(input_block, &part_grid_); - stroke_width_->RemoveLineResidue(&big_parts_); - FindInitialTabVectors(nullptr, min_gutter_width_, tabfind_aligned_gap_fraction_, - input_block); - SetBlockRuleEdges(input_block); - stroke_width_->GradeBlobsIntoPartitions( - pageseg_mode, rerotate_, input_block, nontext_map_, denorm_, cjk_script_, - &projection_, diacritic_blobs, &part_grid_, &big_parts_); - if (!PSM_SPARSE(pageseg_mode)) { - ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, - input_block, this, pixa_debug, &part_grid_, - &big_parts_); - ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_, - photo_mask_pix); - ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, - input_block, this, pixa_debug, &part_grid_, - &big_parts_); - } - part_grid_.ReTypeBlobs(&image_bblobs_); - TidyBlobs(input_block); - Reset(); - // TODO(rays) need to properly handle big_parts_. - ColPartition_IT p_it(&big_parts_); - for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) - p_it.data()->DisownBoxesNoAssert(); - big_parts_.clear(); - delete stroke_width_; - stroke_width_ = nullptr; - // Compute the edge offsets whether or not there is a grey_pix. It is done - // here as the c_blobs haven't been touched by rotation or anything yet, - // so no denorm is required, yet the text has been separated from image, so - // no time is wasted running it on image blobs. - input_block->ComputeEdgeOffsets(thresholds_pix, grey_pix); - - // A note about handling right-to-left scripts (Hebrew/Arabic): - // The columns must be reversed and come out in right-to-left instead of - // the normal left-to-right order. Because the left-to-right ordering - // is implicit in many data structures, it is simpler to fool the algorithms - // into thinking they are dealing with left-to-right text. - // To do this, we reflect the needed data in the y-axis and then reflect - // the blocks back after they have been created. This is a temporary - // arrangement that is confined to this function only, so the reflection - // is completely invisible in the output blocks. - // The only objects reflected are: - // The vertical separator lines that have already been found; - // The bounding boxes of all BLOBNBOXES on all lists on the input_block - // plus the image_bblobs. The outlines are not touched, since they are - // not looked at. - bool input_is_rtl = input_block->block->right_to_left(); - if (input_is_rtl) { - // Reflect the vertical separator lines (member of TabFind). - ReflectInYAxis(); - // Reflect the blob boxes. - ReflectForRtl(input_block, &image_bblobs_); - part_grid_.ReflectInYAxis(); - } - - if (!PSM_SPARSE(pageseg_mode)) { - if (!PSM_COL_FIND_ENABLED(pageseg_mode)) { - // No tab stops needed. Just the grid that FindTabVectors makes. - DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_); - } else { - SetBlockRuleEdges(input_block); - // Find the tab stops, estimate skew, and deskew the tabs, blobs and - // part_grid_. - FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block, - min_gutter_width_, tabfind_aligned_gap_fraction_, - &part_grid_, &deskew_, &reskew_); - // Add the deskew to the denorm_. - DENORM* new_denorm = new DENORM; - new_denorm->SetupNormalization(nullptr, &deskew_, denorm_, - 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); - denorm_ = new_denorm; - } - SetBlockRuleEdges(input_block); - part_grid_.SetTabStops(this); - - // Make the column_sets_. - if (!MakeColumns(false)) { - tprintf("Empty page!!\n"); - part_grid_.DeleteParts(); - return 0; // This is an empty page. - } - - // Refill the grid using rectangular spreading, and get the benefit - // of the completed tab vectors marking the rule edges of each blob. - Clear(); - #ifndef GRAPHICS_DISABLED - if (textord_tabfind_show_reject_blobs) { - ScrollView* rej_win = MakeWindow(500, 300, "Rejected blobs"); - input_block->plot_graded_blobs(rej_win); - } - #endif // GRAPHICS_DISABLED - InsertBlobsToGrid(false, false, &image_bblobs_, this); - InsertBlobsToGrid(true, true, &input_block->blobs, this); - - part_grid_.GridFindMargins(best_columns_); - // Split and merge the partitions by looking at local neighbours. - GridSplitPartitions(); - // Resolve unknown partitions by adding to an existing partition, fixing - // the type, or declaring them noise. - part_grid_.GridFindMargins(best_columns_); - GridMergePartitions(); - // Insert any unused noise blobs that are close enough to an appropriate - // partition. - InsertRemainingNoise(input_block); - // Add horizontal line separators as partitions. - GridInsertHLinePartitions(); - GridInsertVLinePartitions(); - // Recompute margins based on a local neighbourhood search. - part_grid_.GridFindMargins(best_columns_); - SetPartitionTypes(); - } - if (textord_tabfind_show_initial_partitions) { - ScrollView* part_win = MakeWindow(100, 300, "InitialPartitions"); - part_grid_.DisplayBoxes(part_win); - DisplayTabVectors(part_win); - } - - if (!PSM_SPARSE(pageseg_mode)) { - if (equation_detect_) { - equation_detect_->FindEquationParts(&part_grid_, best_columns_); - } - if (textord_tabfind_find_tables) { - TableFinder table_finder; - table_finder.Init(gridsize(), bleft(), tright()); - table_finder.set_resolution(resolution_); - table_finder.set_left_to_right_language( - !input_block->block->right_to_left()); - // Copy cleaned partitions from part_grid_ to clean_part_grid_ and - // insert dot-like noise into period_grid_ - table_finder.InsertCleanPartitions(&part_grid_, input_block); - // Get Table Regions - table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_); - } - GridRemoveUnderlinePartitions(); - part_grid_.DeleteUnknownParts(input_block); - - // Build the partitions into chains that belong in the same block and - // refine into one-to-one links, then smooth the types within each chain. - part_grid_.FindPartitionPartners(); - part_grid_.FindFigureCaptions(); - part_grid_.RefinePartitionPartners(true); - SmoothPartnerRuns(); - - #ifndef GRAPHICS_DISABLED - if (textord_tabfind_show_partitions) { - ScrollView* window = MakeWindow(400, 300, "Partitions"); - if (window != nullptr) { - part_grid_.DisplayBoxes(window); - if (!textord_debug_printable) - DisplayTabVectors(window); - if (window != nullptr && textord_tabfind_show_partitions > 1) { - delete window->AwaitEvent(SVET_DESTROY); - } - } - } - #endif // GRAPHICS_DISABLED - part_grid_.AssertNoDuplicates(); - } - // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here, - // and ownership of the BLOBNBOXes moves to the ColPartitions. - // (They were previously owned by the block or the image_bblobs list.) - ReleaseBlobsAndCleanupUnused(input_block); - // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and - // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves - // from the ColPartitions to the output TO_BLOCK. In non-text, the - // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor. - if (PSM_SPARSE(pageseg_mode)) - part_grid_.ExtractPartitionsAsBlocks(blocks, to_blocks); - else - TransformToBlocks(blocks, to_blocks); - if (textord_debug_tabfind) { - tprintf("Found %d blocks, %d to_blocks\n", - blocks->length(), to_blocks->length()); - } - - DisplayBlocks(blocks); - RotateAndReskewBlocks(input_is_rtl, to_blocks); - int result = 0; - #ifndef GRAPHICS_DISABLED - if (blocks_win_ != nullptr) { - bool waiting = false; - do { - waiting = false; - SVEvent* event = blocks_win_->AwaitEvent(SVET_ANY); - if (event->type == SVET_INPUT && event->parameter != nullptr) { - if (*event->parameter == 'd') - result = -1; - else - blocks->clear(); - } else if (event->type == SVET_DESTROY) { - blocks_win_ = nullptr; - } else { - waiting = true; - } - delete event; - } while (waiting); - } - #endif // GRAPHICS_DISABLED - return result; -} - -// Get the rotation required to deskew, and its inverse rotation. -void ColumnFinder::GetDeskewVectors(FCOORD* deskew, FCOORD* reskew) { - *reskew = reskew_; - *deskew = reskew_; - deskew->set_y(-deskew->y()); -} - -void ColumnFinder::SetEquationDetect(EquationDetectBase* detect) { - equation_detect_ = detect; -} - -//////////////// PRIVATE CODE ///////////////////////// - -// Displays the blob and block bounding boxes in a window called Blocks. -void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) { -#ifndef GRAPHICS_DISABLED - if (textord_tabfind_show_blocks) { - if (blocks_win_ == nullptr) - blocks_win_ = MakeWindow(700, 300, "Blocks"); - else - blocks_win_->Clear(); - DisplayBoxes(blocks_win_); - BLOCK_IT block_it(blocks); - int serial = 1; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { - BLOCK* block = block_it.data(); - block->pdblk.plot(blocks_win_, serial++, - textord_debug_printable ? ScrollView::BLUE - : ScrollView::GREEN); - } - blocks_win_->Update(); - } -#endif -} - -// Displays the column edges at each grid y coordinate defined by -// best_columns_. -void ColumnFinder::DisplayColumnBounds(PartSetVector* sets) { -#ifndef GRAPHICS_DISABLED - ScrollView* col_win = MakeWindow(50, 300, "Columns"); - DisplayBoxes(col_win); - col_win->Pen(textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN); - for (int i = 0; i < gridheight_; ++i) { - ColPartitionSet* columns = best_columns_[i]; - if (columns != nullptr) - columns->DisplayColumnEdges(i * gridsize_, (i + 1) * gridsize_, col_win); - } -#endif -} - -// Sets up column_sets_ (the determined column layout at each horizontal -// slice). Returns false if the page is empty. -bool ColumnFinder::MakeColumns(bool single_column) { - // The part_sets_ are a temporary structure used during column creation, - // and is a vector of ColPartitionSets, representing ColPartitions found - // at horizontal slices through the page. - PartSetVector part_sets; - if (!single_column) { - if (!part_grid_.MakeColPartSets(&part_sets)) - return false; // Empty page. - ASSERT_HOST(part_grid_.gridheight() == gridheight_); - // Try using only the good parts first. - bool good_only = true; - do { - for (int i = 0; i < gridheight_; ++i) { - ColPartitionSet* line_set = part_sets.get(i); - if (line_set != nullptr && line_set->LegalColumnCandidate()) { - ColPartitionSet* column_candidate = line_set->Copy(good_only); - if (column_candidate != nullptr) - column_candidate->AddToColumnSetsIfUnique(&column_sets_, WidthCB()); - } - } - good_only = !good_only; - } while (column_sets_.empty() && !good_only); - if (textord_debug_tabfind) - PrintColumnCandidates("Column candidates"); - // Improve the column candidates against themselves. - ImproveColumnCandidates(&column_sets_, &column_sets_); - if (textord_debug_tabfind) - PrintColumnCandidates("Improved columns"); - // Improve the column candidates using the part_sets_. - ImproveColumnCandidates(&part_sets, &column_sets_); - } - ColPartitionSet* single_column_set = - part_grid_.MakeSingleColumnSet(WidthCB()); - if (single_column_set != nullptr) { - // Always add the single column set as a backup even if not in - // single column mode. - single_column_set->AddToColumnSetsIfUnique(&column_sets_, WidthCB()); - } - if (textord_debug_tabfind) - PrintColumnCandidates("Final Columns"); - bool has_columns = !column_sets_.empty(); - if (has_columns) { - // Divide the page into sections of uniform column layout. - bool any_multi_column = AssignColumns(part_sets); - if (textord_tabfind_show_columns) { - DisplayColumnBounds(&part_sets); - } - ComputeMeanColumnGap(any_multi_column); - } - for (int i = 0; i < part_sets.size(); ++i) { - ColPartitionSet* line_set = part_sets.get(i); - if (line_set != nullptr) { - line_set->RelinquishParts(); - delete line_set; - } - } - return has_columns; -} - -// Attempt to improve the column_candidates by expanding the columns -// and adding new partitions from the partition sets in src_sets. -// Src_sets may be equal to column_candidates, in which case it will -// use them as a source to improve themselves. -void ColumnFinder::ImproveColumnCandidates(PartSetVector* src_sets, - PartSetVector* column_sets) { - PartSetVector temp_cols; - temp_cols.move(column_sets); - if (src_sets == column_sets) - src_sets = &temp_cols; - int set_size = temp_cols.size(); - // Try using only the good parts first. - bool good_only = true; - do { - for (int i = 0; i < set_size; ++i) { - ColPartitionSet* column_candidate = temp_cols.get(i); - ASSERT_HOST(column_candidate != nullptr); - ColPartitionSet* improved = column_candidate->Copy(good_only); - if (improved != nullptr) { - improved->ImproveColumnCandidate(WidthCB(), src_sets); - improved->AddToColumnSetsIfUnique(column_sets, WidthCB()); - } - } - good_only = !good_only; - } while (column_sets->empty() && !good_only); - if (column_sets->empty()) - column_sets->move(&temp_cols); - else - temp_cols.delete_data_pointers(); -} - -// Prints debug information on the column candidates. -void ColumnFinder::PrintColumnCandidates(const char* title) { - int set_size = column_sets_.size(); - tprintf("Found %d %s:\n", set_size, title); - if (textord_debug_tabfind >= 3) { - for (int i = 0; i < set_size; ++i) { - ColPartitionSet* column_set = column_sets_.get(i); - column_set->Print(); - } - } -} - -// Finds the optimal set of columns that cover the entire image with as -// few changes in column partition as possible. -// NOTE: this could be thought of as an optimization problem, but a simple -// greedy algorithm is used instead. The algorithm repeatedly finds the modal -// compatible column in an unassigned region and uses that with the extra -// tweak of extending the modal region over small breaks in compatibility. -// Where modal regions overlap, the boundary is chosen so as to minimize -// the cost in terms of ColPartitions not fitting an approved column. -// Returns true if any part of the page is multi-column. -bool ColumnFinder::AssignColumns(const PartSetVector& part_sets) { - int set_count = part_sets.size(); - ASSERT_HOST(set_count == gridheight()); - // Allocate and init the best_columns_. - best_columns_ = new ColPartitionSet*[set_count]; - for (int y = 0; y < set_count; ++y) - best_columns_[y] = nullptr; - int column_count = column_sets_.size(); - // column_set_costs[part_sets_ index][column_sets_ index] is - // < INT32_MAX if the partition set is compatible with the column set, - // in which case its value is the cost for that set used in deciding - // which competing set to assign. - // any_columns_possible[part_sets_ index] is true if any of - // possible_column_sets[part_sets_ index][*] is < INT32_MAX. - // assigned_costs[part_sets_ index] is set to the column_set_costs - // of the assigned column_sets_ index or INT32_MAX if none is set. - // On return the best_columns_ member is set. - bool* any_columns_possible = new bool[set_count]; - int* assigned_costs = new int[set_count]; - int** column_set_costs = new int*[set_count]; - // Set possible column_sets to indicate whether each set is compatible - // with each column. - for (int part_i = 0; part_i < set_count; ++part_i) { - ColPartitionSet* line_set = part_sets.get(part_i); - bool debug = line_set != nullptr && - WithinTestRegion(2, line_set->bounding_box().left(), - line_set->bounding_box().bottom()); - column_set_costs[part_i] = new int[column_count]; - any_columns_possible[part_i] = false; - assigned_costs[part_i] = INT32_MAX; - for (int col_i = 0; col_i < column_count; ++col_i) { - if (line_set != nullptr && - column_sets_.get(col_i)->CompatibleColumns(debug, line_set, - WidthCB())) { - column_set_costs[part_i][col_i] = - column_sets_.get(col_i)->UnmatchedWidth(line_set); - any_columns_possible[part_i] = true; - } else { - column_set_costs[part_i][col_i] = INT32_MAX; - if (debug) - tprintf("Set id %d did not match at y=%d, lineset =%p\n", - col_i, part_i, line_set); - } - } - } - bool any_multi_column = false; - // Assign a column set to each vertical grid position. - // While there is an unassigned range, find its mode. - int start, end; - while (BiggestUnassignedRange(set_count, any_columns_possible, - &start, &end)) { - if (textord_debug_tabfind >= 2) - tprintf("Biggest unassigned range = %d- %d\n", start, end); - // Find the modal column_set_id in the range. - int column_set_id = RangeModalColumnSet(column_set_costs, - assigned_costs, start, end); - if (textord_debug_tabfind >= 2) { - tprintf("Range modal column id = %d\n", column_set_id); - column_sets_.get(column_set_id)->Print(); - } - // Now find the longest run of the column_set_id in the range. - ShrinkRangeToLongestRun(column_set_costs, assigned_costs, - any_columns_possible, - column_set_id, &start, &end); - if (textord_debug_tabfind >= 2) - tprintf("Shrunk range = %d- %d\n", start, end); - // Extend the start and end past the longest run, while there are - // only small gaps in compatibility that can be overcome by larger - // regions of compatibility beyond. - ExtendRangePastSmallGaps(column_set_costs, assigned_costs, - any_columns_possible, - column_set_id, -1, -1, &start); - --end; - ExtendRangePastSmallGaps(column_set_costs, assigned_costs, - any_columns_possible, - column_set_id, 1, set_count, &end); - ++end; - if (textord_debug_tabfind) - tprintf("Column id %d applies to range = %d - %d\n", - column_set_id, start, end); - // Assign the column to the range, which now may overlap with other ranges. - AssignColumnToRange(column_set_id, start, end, column_set_costs, - assigned_costs); - if (column_sets_.get(column_set_id)->GoodColumnCount() > 1) - any_multi_column = true; - } - // If anything remains unassigned, the whole lot is unassigned, so - // arbitrarily assign id 0. - if (best_columns_[0] == nullptr) { - AssignColumnToRange(0, 0, gridheight_, column_set_costs, assigned_costs); - } - // Free memory. - for (int i = 0; i < set_count; ++i) { - delete [] column_set_costs[i]; - } - delete [] assigned_costs; - delete [] any_columns_possible; - delete [] column_set_costs; - return any_multi_column; -} - -// Finds the biggest range in part_sets_ that has no assigned column, but -// column assignment is possible. -bool ColumnFinder::BiggestUnassignedRange(int set_count, - const bool* any_columns_possible, - int* best_start, int* best_end) { - int best_range_size = 0; - *best_start = set_count; - *best_end = set_count; - int end = set_count; - for (int start = 0; start < gridheight_; start = end) { - // Find the first unassigned index in start. - while (start < set_count) { - if (best_columns_[start] == nullptr && any_columns_possible[start]) - break; - ++start; - } - // Find the first past the end and count the good ones in between. - int range_size = 1; // Number of non-null, but unassigned line sets. - end = start + 1; - while (end < set_count) { - if (best_columns_[end] != nullptr) - break; - if (any_columns_possible[end]) - ++range_size; - ++end; - } - if (start < set_count && range_size > best_range_size) { - best_range_size = range_size; - *best_start = start; - *best_end = end; - } - } - return *best_start < *best_end; -} - -// Finds the modal compatible column_set_ index within the given range. -int ColumnFinder::RangeModalColumnSet(int** column_set_costs, - const int* assigned_costs, - int start, int end) { - int column_count = column_sets_.size(); - STATS column_stats(0, column_count); - for (int part_i = start; part_i < end; ++part_i) { - for (int col_j = 0; col_j < column_count; ++col_j) { - if (column_set_costs[part_i][col_j] < assigned_costs[part_i]) - column_stats.add(col_j, 1); - } - } - ASSERT_HOST(column_stats.get_total() > 0); - return column_stats.mode(); -} - -// Given that there are many column_set_id compatible columns in the range, -// shrinks the range to the longest contiguous run of compatibility, allowing -// gaps where no columns are possible, but not where competing columns are -// possible. -void ColumnFinder::ShrinkRangeToLongestRun(int** column_set_costs, - const int* assigned_costs, - const bool* any_columns_possible, - int column_set_id, - int* best_start, int* best_end) { - // orig_start and orig_end are the maximum range we will look at. - int orig_start = *best_start; - int orig_end = *best_end; - int best_range_size = 0; - *best_start = orig_end; - *best_end = orig_end; - int end = orig_end; - for (int start = orig_start; start < orig_end; start = end) { - // Find the first possible - while (start < orig_end) { - if (column_set_costs[start][column_set_id] < assigned_costs[start] || - !any_columns_possible[start]) - break; - ++start; - } - // Find the first past the end. - end = start + 1; - while (end < orig_end) { - if (column_set_costs[end][column_set_id] >= assigned_costs[start] && - any_columns_possible[end]) - break; - ++end; - } - if (start < orig_end && end - start > best_range_size) { - best_range_size = end - start; - *best_start = start; - *best_end = end; - } - } -} - -// Moves start in the direction of step, up to, but not including end while -// the only incompatible regions are no more than kMaxIncompatibleColumnCount -// in size, and the compatible regions beyond are bigger. -void ColumnFinder::ExtendRangePastSmallGaps(int** column_set_costs, - const int* assigned_costs, - const bool* any_columns_possible, - int column_set_id, - int step, int end, int* start) { - if (textord_debug_tabfind > 2) - tprintf("Starting expansion at %d, step=%d, limit=%d\n", - *start, step, end); - if (*start == end) - return; // Cannot be expanded. - - int barrier_size = 0; - int good_size = 0; - do { - // Find the size of the incompatible barrier. - barrier_size = 0; - int i; - for (i = *start + step; i != end; i += step) { - if (column_set_costs[i][column_set_id] < assigned_costs[i]) - break; // We are back on. - // Locations where none are possible don't count. - if (any_columns_possible[i]) - ++barrier_size; - } - if (textord_debug_tabfind > 2) - tprintf("At %d, Barrier size=%d\n", i, barrier_size); - if (barrier_size > kMaxIncompatibleColumnCount) - return; // Barrier too big. - if (i == end) { - // We can't go any further, but the barrier was small, so go to the end. - *start = i - step; - return; - } - // Now find the size of the good region on the other side. - good_size = 1; - for (i += step; i != end; i += step) { - if (column_set_costs[i][column_set_id] < assigned_costs[i]) - ++good_size; - else if (any_columns_possible[i]) - break; - } - if (textord_debug_tabfind > 2) - tprintf("At %d, good size = %d\n", i, good_size); - // If we had enough good ones we can extend the start and keep looking. - if (good_size >= barrier_size) - *start = i - step; - } while (good_size >= barrier_size); -} - -// Assigns the given column_set_id to the given range. -void ColumnFinder::AssignColumnToRange(int column_set_id, int start, int end, - int** column_set_costs, - int* assigned_costs) { - ColPartitionSet* column_set = column_sets_.get(column_set_id); - for (int i = start; i < end; ++i) { - assigned_costs[i] = column_set_costs[i][column_set_id]; - best_columns_[i] = column_set; - } -} - -// Computes the mean_column_gap_. -void ColumnFinder::ComputeMeanColumnGap(bool any_multi_column) { - int total_gap = 0; - int total_width = 0; - int gap_samples = 0; - int width_samples = 0; - for (int i = 0; i < gridheight_; ++i) { - ASSERT_HOST(best_columns_[i] != nullptr); - best_columns_[i]->AccumulateColumnWidthsAndGaps(&total_width, - &width_samples, - &total_gap, - &gap_samples); - } - mean_column_gap_ = any_multi_column && gap_samples > 0 - ? total_gap / gap_samples : width_samples > 0 - ? total_width / width_samples : 0; -} - -//////// Functions that manipulate ColPartitions in the part_grid_ ///// -//////// to split, merge, find margins, and find types. ////////////// - -// Helper to delete all the deletable blobs on the list. Owned blobs are -// extracted from the list, but not deleted, leaving them owned by the owner(). -static void ReleaseAllBlobsAndDeleteUnused(BLOBNBOX_LIST* blobs) { - for (BLOBNBOX_IT blob_it(blobs); !blob_it.empty(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.extract(); - if (blob->owner() == nullptr) { - delete blob->cblob(); - delete blob; - } - } -} - -// Hoovers up all un-owned blobs and deletes them. -// The rest get released from the block so the ColPartitions can pass -// ownership to the output blocks. -void ColumnFinder::ReleaseBlobsAndCleanupUnused(TO_BLOCK* block) { - ReleaseAllBlobsAndDeleteUnused(&block->blobs); - ReleaseAllBlobsAndDeleteUnused(&block->small_blobs); - ReleaseAllBlobsAndDeleteUnused(&block->noise_blobs); - ReleaseAllBlobsAndDeleteUnused(&block->large_blobs); - ReleaseAllBlobsAndDeleteUnused(&image_bblobs_); -} - -// Splits partitions that cross columns where they have nothing in the gap. -void ColumnFinder::GridSplitPartitions() { - // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&part_grid_); - gsearch.StartFullSearch(); - ColPartition* dont_repeat = nullptr; - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->blob_type() < BRT_UNKNOWN || part == dont_repeat) - continue; // Only applies to text partitions. - ColPartitionSet* column_set = best_columns_[gsearch.GridY()]; - int first_col = -1; - int last_col = -1; - // Find which columns the partition spans. - part->ColumnRange(resolution_, column_set, &first_col, &last_col); - if (first_col > 0) - --first_col; - // Convert output column indices to physical column indices. - first_col /= 2; - last_col /= 2; - // We will only consider cases where a partition spans two columns, - // since a heading that spans more columns than that is most likely - // genuine. - if (last_col != first_col + 1) - continue; - // Set up a rectangle search x-bounded by the column gap and y by the part. - int y = part->MidY(); - TBOX margin_box = part->bounding_box(); - bool debug = AlignedBlob::WithinTestRegion(2, margin_box.left(), - margin_box.bottom()); - if (debug) { - tprintf("Considering partition for GridSplit:"); - part->Print(); - } - ColPartition* column = column_set->GetColumnByIndex(first_col); - if (column == nullptr) - continue; - margin_box.set_left(column->RightAtY(y) + 2); - column = column_set->GetColumnByIndex(last_col); - if (column == nullptr) - continue; - margin_box.set_right(column->LeftAtY(y) - 2); - // TODO(rays) Decide whether to keep rectangular filling or not in the - // main grid and therefore whether we need a fancier search here. - // Now run the rect search on the main blob grid. - GridSearch rectsearch(this); - if (debug) { - tprintf("Searching box (%d,%d)->(%d,%d)\n", - margin_box.left(), margin_box.bottom(), - margin_box.right(), margin_box.top()); - part->Print(); - } - rectsearch.StartRectSearch(margin_box); - BLOBNBOX* bbox; - while ((bbox = rectsearch.NextRectSearch()) != nullptr) { - if (bbox->bounding_box().overlap(margin_box)) - break; - } - if (bbox == nullptr) { - // There seems to be nothing in the hole, so split the partition. - gsearch.RemoveBBox(); - int x_middle = (margin_box.left() + margin_box.right()) / 2; - if (debug) { - tprintf("Splitting part at %d:", x_middle); - part->Print(); - } - ColPartition* split_part = part->SplitAt(x_middle); - if (split_part != nullptr) { - if (debug) { - tprintf("Split result:"); - part->Print(); - split_part->Print(); - } - part_grid_.InsertBBox(true, true, split_part); - } else { - // Split had no effect - if (debug) - tprintf("Split had no effect\n"); - dont_repeat = part; - } - part_grid_.InsertBBox(true, true, part); - gsearch.RepositionIterator(); - } else if (debug) { - tprintf("Part cannot be split: blob (%d,%d)->(%d,%d) in column gap\n", - bbox->bounding_box().left(), bbox->bounding_box().bottom(), - bbox->bounding_box().right(), bbox->bounding_box().top()); - } - } -} - -// Merges partitions where there is vertical overlap, within a single column, -// and the horizontal gap is small enough. -void ColumnFinder::GridMergePartitions() { - // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&part_grid_); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->IsUnMergeableType()) - continue; - // Set up a rectangle search x-bounded by the column and y by the part. - ColPartitionSet* columns = best_columns_[gsearch.GridY()]; - TBOX box = part->bounding_box(); - bool debug = AlignedBlob::WithinTestRegion(1, box.left(), box.bottom()); - if (debug) { - tprintf("Considering part for merge at:"); - part->Print(); - } - int y = part->MidY(); - ColPartition* left_column = columns->ColumnContaining(box.left(), y); - ColPartition* right_column = columns->ColumnContaining(box.right(), y); - if (left_column == nullptr || right_column != left_column) { - if (debug) - tprintf("In different columns\n"); - continue; - } - box.set_left(left_column->LeftAtY(y)); - box.set_right(right_column->RightAtY(y)); - // Now run the rect search. - bool modified_box = false; - GridSearch - rsearch(&part_grid_); - rsearch.SetUniqueMode(true); - rsearch.StartRectSearch(box); - ColPartition* neighbour; - - while ((neighbour = rsearch.NextRectSearch()) != nullptr) { - if (neighbour == part || neighbour->IsUnMergeableType()) - continue; - const TBOX& neighbour_box = neighbour->bounding_box(); - if (debug) { - tprintf("Considering merge with neighbour at:"); - neighbour->Print(); - } - if (neighbour_box.right() < box.left() || - neighbour_box.left() > box.right()) - continue; // Not within the same column. - if (part->VSignificantCoreOverlap(*neighbour) && - part->TypesMatch(*neighbour)) { - // There is vertical overlap and the gross types match, but only - // merge if the horizontal gap is small enough, as one of the - // partitions may be a figure caption within a column. - // If there is only one column, then the mean_column_gap_ is large - // enough to allow almost any merge, by being the mean column width. - const TBOX& part_box = part->bounding_box(); - // Don't merge if there is something else in the way. Use the margin - // to decide, and check both to allow a bit of overlap. - if (neighbour_box.left() > part->right_margin() && - part_box.right() < neighbour->left_margin()) - continue; // Neighbour is too far to the right. - if (neighbour_box.right() < part->left_margin() && - part_box.left() > neighbour->right_margin()) - continue; // Neighbour is too far to the left. - int h_gap = std::max(part_box.left(), neighbour_box.left()) - - std::min(part_box.right(), neighbour_box.right()); - if (h_gap < mean_column_gap_ * kHorizontalGapMergeFraction || - part_box.width() < mean_column_gap_ || - neighbour_box.width() < mean_column_gap_) { - if (debug) { - tprintf("Running grid-based merge between:\n"); - part->Print(); - neighbour->Print(); - } - rsearch.RemoveBBox(); - if (!modified_box) { - // We are going to modify part, so remove it and re-insert it after. - gsearch.RemoveBBox(); - rsearch.RepositionIterator(); - modified_box = true; - } - part->Absorb(neighbour, WidthCB()); - } else if (debug) { - tprintf("Neighbour failed hgap test\n"); - } - } else if (debug) { - tprintf("Neighbour failed overlap or typesmatch test\n"); - } - } - if (modified_box) { - // We modified the box of part, so re-insert it into the grid. - // This does no harm in the current cell, as it already exists there, - // but it needs to exist in all the cells covered by its bounding box, - // or it will never be found by a full search. - // Because the box has changed, it has to be removed first, otherwise - // add_sorted may fail to keep a single copy of the pointer. - part_grid_.InsertBBox(true, true, part); - gsearch.RepositionIterator(); - } - } -} - -// Inserts remaining noise blobs into the most applicable partition if any. -// If there is no applicable partition, then the blobs are deleted. -void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) { - BLOBNBOX_IT blob_it(&block->noise_blobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - if (blob->owner() != nullptr) continue; - TBOX search_box(blob->bounding_box()); - bool debug = WithinTestRegion(2, search_box.left(), search_box.bottom()); - search_box.pad(gridsize(), gridsize()); - // Setup a rectangle search to find the best partition to merge with. - ColPartitionGridSearch rsearch(&part_grid_); - rsearch.SetUniqueMode(true); - rsearch.StartRectSearch(search_box); - ColPartition* part; - ColPartition* best_part = nullptr; - int best_distance = 0; - while ((part = rsearch.NextRectSearch()) != nullptr) { - if (part->IsUnMergeableType()) - continue; - int distance = projection_.DistanceOfBoxFromPartition( - blob->bounding_box(), *part, denorm_, debug); - if (best_part == nullptr || distance < best_distance) { - best_part = part; - best_distance = distance; - } - } - if (best_part != nullptr && - best_distance < kMaxDistToPartSizeRatio * best_part->median_height()) { - // Close enough to merge. - if (debug) { - tprintf("Adding noise blob with distance %d, thr=%g:box:", - best_distance, - kMaxDistToPartSizeRatio * best_part->median_height()); - blob->bounding_box().print(); - tprintf("To partition:"); - best_part->Print(); - } - part_grid_.RemoveBBox(best_part); - best_part->AddBox(blob); - part_grid_.InsertBBox(true, true, best_part); - blob->set_owner(best_part); - blob->set_flow(best_part->flow()); - blob->set_region_type(best_part->blob_type()); - } else { - // Mark the blob for deletion. - blob->set_region_type(BRT_NOISE); - } - } - // Delete the marked blobs, clearing neighbour references. - block->DeleteUnownedNoise(); -} - -// Helper makes a box from a horizontal line. -static TBOX BoxFromHLine(const TabVector* hline) { - int top = std::max(hline->startpt().y(), hline->endpt().y()); - int bottom = std::min(hline->startpt().y(), hline->endpt().y()); - top += hline->mean_width(); - if (top == bottom) { - if (bottom > 0) - --bottom; - else - ++top; - } - return TBOX(hline->startpt().x(), bottom, hline->endpt().x(), top); -} - -// Remove partitions that come from horizontal lines that look like -// underlines, but are not part of a table. -void ColumnFinder::GridRemoveUnderlinePartitions() { - TabVector_IT hline_it(&horizontal_lines_); - for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) { - TabVector* hline = hline_it.data(); - if (hline->intersects_other_lines()) - continue; - TBOX line_box = BoxFromHLine(hline); - TBOX search_box = line_box; - search_box.pad(0, line_box.height()); - ColPartitionGridSearch part_search(&part_grid_); - part_search.SetUniqueMode(true); - part_search.StartRectSearch(search_box); - ColPartition* covered; - bool touched_table = false; - bool touched_text = false; - ColPartition* line_part = nullptr; - while ((covered = part_search.NextRectSearch()) != nullptr) { - if (covered->type() == PT_TABLE) { - touched_table = true; - break; - } else if (covered->IsTextType()) { - // TODO(rays) Add a list of underline sections to ColPartition. - int text_bottom = covered->median_bottom(); - if (line_box.bottom() <= text_bottom && text_bottom <= search_box.top()) - touched_text = true; - } else if (covered->blob_type() == BRT_HLINE && - line_box.contains(covered->bounding_box())) { - line_part = covered; - } - } - if (line_part != nullptr && !touched_table && touched_text) { - part_grid_.RemoveBBox(line_part); - delete line_part; - } - } -} - -// Add horizontal line separators as partitions. -void ColumnFinder::GridInsertHLinePartitions() { - TabVector_IT hline_it(&horizontal_lines_); - for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) { - TabVector* hline = hline_it.data(); - TBOX line_box = BoxFromHLine(hline); - ColPartition* part = ColPartition::MakeLinePartition( - BRT_HLINE, vertical_skew_, - line_box.left(), line_box.bottom(), line_box.right(), line_box.top()); - part->set_type(PT_HORZ_LINE); - bool any_image = false; - ColPartitionGridSearch part_search(&part_grid_); - part_search.SetUniqueMode(true); - part_search.StartRectSearch(line_box); - ColPartition* covered; - while ((covered = part_search.NextRectSearch()) != nullptr) { - if (covered->IsImageType()) { - any_image = true; - break; - } - } - if (!any_image) - part_grid_.InsertBBox(true, true, part); - else - delete part; - } -} - -// Add horizontal line separators as partitions. -void ColumnFinder::GridInsertVLinePartitions() { - TabVector_IT vline_it(dead_vectors()); - for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) { - TabVector* vline = vline_it.data(); - if (!vline->IsSeparator()) - continue; - int left = std::min(vline->startpt().x(), vline->endpt().x()); - int right = std::max(vline->startpt().x(), vline->endpt().x()); - right += vline->mean_width(); - if (left == right) { - if (left > 0) - --left; - else - ++right; - } - ColPartition* part = ColPartition::MakeLinePartition( - BRT_VLINE, vertical_skew_, - left, vline->startpt().y(), right, vline->endpt().y()); - part->set_type(PT_VERT_LINE); - bool any_image = false; - ColPartitionGridSearch part_search(&part_grid_); - part_search.SetUniqueMode(true); - part_search.StartRectSearch(part->bounding_box()); - ColPartition* covered; - while ((covered = part_search.NextRectSearch()) != nullptr) { - if (covered->IsImageType()) { - any_image = true; - break; - } - } - if (!any_image) - part_grid_.InsertBBox(true, true, part); - else - delete part; - } -} - -// For every ColPartition in the grid, sets its type based on position -// in the columns. -void ColumnFinder::SetPartitionTypes() { - GridSearch - gsearch(&part_grid_); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - part->SetPartitionType(resolution_, best_columns_[gsearch.GridY()]); - } -} - -// Only images remain with multiple types in a run of partners. -// Sets the type of all in the group to the maximum of the group. -void ColumnFinder::SmoothPartnerRuns() { - // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&part_grid_); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - ColPartition* partner = part->SingletonPartner(true); - if (partner != nullptr) { - if (partner->SingletonPartner(false) != part) { - tprintf("Ooops! Partition:(%d partners)", - part->upper_partners()->length()); - part->Print(); - tprintf("has singleton partner:(%d partners", - partner->lower_partners()->length()); - partner->Print(); - tprintf("but its singleton partner is:"); - if (partner->SingletonPartner(false) == nullptr) - tprintf("NULL\n"); - else - partner->SingletonPartner(false)->Print(); - } - ASSERT_HOST(partner->SingletonPartner(false) == part); - } else if (part->SingletonPartner(false) != nullptr) { - ColPartitionSet* column_set = best_columns_[gsearch.GridY()]; - int column_count = column_set->ColumnCount(); - part->SmoothPartnerRun(column_count * 2 + 1); - } - } -} - -// Helper functions for TransformToBlocks. -// Add the part to the temp list in the correct order. -void ColumnFinder::AddToTempPartList(ColPartition* part, - ColPartition_CLIST* temp_list) { - int mid_y = part->MidY(); - ColPartition_C_IT it(temp_list); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* test_part = it.data(); - if (part->type() == PT_NOISE || test_part->type() == PT_NOISE) - continue; // Noise stays in sequence. - if (test_part == part->SingletonPartner(false)) - break; // Insert before its lower partner. - int neighbour_bottom = test_part->median_bottom(); - int neighbour_top = test_part->median_top(); - int neighbour_y = (neighbour_bottom + neighbour_top) / 2; - if (neighbour_y < mid_y) - break; // part is above test_part so insert it. - if (!part->HOverlaps(*test_part) && !part->WithinSameMargins(*test_part)) - continue; // Incompatibles stay in order - } - if (it.cycled_list()) { - it.add_to_end(part); - } else { - it.add_before_stay_put(part); - } -} - -// Add everything from the temp list to the work_set assuming correct order. -void ColumnFinder::EmptyTempPartList(ColPartition_CLIST* temp_list, - WorkingPartSet_LIST* work_set) { - ColPartition_C_IT it(temp_list); - while (!it.empty()) { - it.extract()->AddToWorkingSet(bleft_, tright_, resolution_, - &good_parts_, work_set); - it.forward(); - } -} - -// Transform the grid of partitions to the output blocks. -void ColumnFinder::TransformToBlocks(BLOCK_LIST* blocks, - TO_BLOCK_LIST* to_blocks) { - WorkingPartSet_LIST work_set; - ColPartitionSet* column_set = nullptr; - ColPartition_IT noise_it(&noise_parts_); - // The temp_part_list holds a list of parts at the same grid y coord - // so they can be added in the correct order. This prevents thin objects - // like horizontal lines going before the text lines above them. - ColPartition_CLIST temp_part_list; - // Iterate the ColPartitions in the grid. It starts at the top - GridSearch - gsearch(&part_grid_); - gsearch.StartFullSearch(); - int prev_grid_y = -1; - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - int grid_y = gsearch.GridY(); - if (grid_y != prev_grid_y) { - EmptyTempPartList(&temp_part_list, &work_set); - prev_grid_y = grid_y; - } - if (best_columns_[grid_y] != column_set) { - column_set = best_columns_[grid_y]; - // Every line should have a non-null best column. - ASSERT_HOST(column_set != nullptr); - column_set->ChangeWorkColumns(bleft_, tright_, resolution_, - &good_parts_, &work_set); - if (textord_debug_tabfind) - tprintf("Changed column groups at grid index %d, y=%d\n", - gsearch.GridY(), gsearch.GridY() * gridsize()); - } - if (part->type() == PT_NOISE) { - noise_it.add_to_end(part); - } else { - AddToTempPartList(part, &temp_part_list); - } - } - EmptyTempPartList(&temp_part_list, &work_set); - // Now finish all working sets and transfer ColPartitionSets to block_sets. - WorkingPartSet_IT work_it(&work_set); - while (!work_it.empty()) { - WorkingPartSet* working_set = work_it.extract(); - working_set->ExtractCompletedBlocks(bleft_, tright_, resolution_, - &good_parts_, blocks, to_blocks); - delete working_set; - work_it.forward(); - } -} - -// Helper reflects a list of blobs in the y-axis. -// Only reflects the BLOBNBOX bounding box. Not the blobs or outlines below. -static void ReflectBlobList(BLOBNBOX_LIST* bblobs) { - BLOBNBOX_IT it(bblobs); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - it.data()->reflect_box_in_y_axis(); - } -} - -// Reflect the blob boxes (but not the outlines) in the y-axis so that -// the blocks get created in the correct RTL order. Reflects the blobs -// in the input_block and the bblobs list. -// The reflection is undone in RotateAndReskewBlocks by -// reflecting the blocks themselves, and then recomputing the blob bounding -// boxes. -void ColumnFinder::ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) { - ReflectBlobList(bblobs); - ReflectBlobList(&input_block->blobs); - ReflectBlobList(&input_block->small_blobs); - ReflectBlobList(&input_block->noise_blobs); - ReflectBlobList(&input_block->large_blobs); - // Update the denorm with the reflection. - DENORM* new_denorm = new DENORM; - new_denorm->SetupNormalization(nullptr, nullptr, denorm_, - 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f); - denorm_ = new_denorm; -} - -// Helper fixes up blobs and cblobs to match the desired rotation, -// exploding multi-outline blobs back to single blobs and accumulating -// the bounding box widths and heights. -static void RotateAndExplodeBlobList(const FCOORD& blob_rotation, - BLOBNBOX_LIST* bblobs, - STATS* widths, - STATS* heights) { - BLOBNBOX_IT it(bblobs); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - C_BLOB* cblob = blob->cblob(); - C_OUTLINE_LIST* outlines = cblob->out_list(); - C_OUTLINE_IT ol_it(outlines); - if (!outlines->singleton()) { - // This blob has multiple outlines from CJK repair. - // Explode the blob back into individual outlines. - for (;!ol_it.empty(); ol_it.forward()) { - C_OUTLINE* outline = ol_it.extract(); - BLOBNBOX* new_blob = BLOBNBOX::RealBlob(outline); - // This blob will be revisited later since we add_after_stay_put here. - // This means it will get rotated and have its width/height added to - // the stats below. - it.add_after_stay_put(new_blob); - } - it.extract(); - delete cblob; - delete blob; - } else { - if (blob_rotation.x() != 1.0f || blob_rotation.y() != 0.0f) { - cblob->rotate(blob_rotation); - } - blob->compute_bounding_box(); - widths->add(blob->bounding_box().width(), 1); - heights->add(blob->bounding_box().height(), 1); - } - } -} - -// Undo the deskew that was done in FindTabVectors, as recognition is done -// without correcting blobs or blob outlines for skew. -// Reskew the completed blocks to put them back to the original rotated coords -// that were created by CorrectOrientation. -// If the input_is_rtl, then reflect the blocks in the y-axis to undo the -// reflection that was done before FindTabVectors. -// Blocks that were identified as vertical text (relative to the rotated -// coordinates) are further rotated so the text lines are horizontal. -// blob polygonal outlines are rotated to match the position of the blocks -// that they are in, and their bounding boxes are recalculated to be accurate. -// Record appropriate inverse transformations and required -// classifier transformation in the blocks. -void ColumnFinder::RotateAndReskewBlocks(bool input_is_rtl, - TO_BLOCK_LIST* blocks) { - if (input_is_rtl) { - // The skew is backwards because of the reflection. - FCOORD tmp = deskew_; - deskew_ = reskew_; - reskew_ = tmp; - } - TO_BLOCK_IT it(blocks); - int block_index = 1; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TO_BLOCK* to_block = it.data(); - BLOCK* block = to_block->block; - // Blocks are created on the deskewed blob outlines in TransformToBlocks() - // so we need to reskew them back to page coordinates. - if (input_is_rtl) { - block->reflect_polygon_in_y_axis(); - } - block->rotate(reskew_); - // Copy the right_to_left flag to the created block. - block->set_right_to_left(input_is_rtl); - // Save the skew angle in the block for baseline computations. - block->set_skew(reskew_); - block->pdblk.set_index(block_index++); - FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block); - // Rotate all the blobs if needed and recompute the bounding boxes. - // Compute the block median blob width and height as we go. - STATS widths(0, block->pdblk.bounding_box().width()); - STATS heights(0, block->pdblk.bounding_box().height()); - RotateAndExplodeBlobList(blob_rotation, &to_block->blobs, - &widths, &heights); - TO_ROW_IT row_it(to_block->get_rows()); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - TO_ROW* row = row_it.data(); - RotateAndExplodeBlobList(blob_rotation, row->blob_list(), - &widths, &heights); - } - block->set_median_size(static_cast(widths.median() + 0.5), - static_cast(heights.median() + 0.5)); - if (textord_debug_tabfind >= 2) - tprintf("Block median size = (%d, %d)\n", - block->median_size().x(), block->median_size().y()); - } -} - -// Computes the rotations for the block (to make textlines horizontal) and -// for the blobs (for classification) and sets the appropriate members -// of the given block. -// Returns the rotation that needs to be applied to the blobs to make -// them sit in the rotated block. -FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(BLOCK* block) { - // The text_rotation_ tells us the gross page text rotation that needs - // to be applied for classification - // TODO(rays) find block-level classify rotation by orientation detection. - // In the mean time, assume that "up" for text printed in the minority - // direction (PT_VERTICAL_TEXT) is perpendicular to the line of reading. - // Accomplish this by zero-ing out the text rotation. This covers the - // common cases of image credits in documents written in Latin scripts - // and page headings for predominantly vertically written CJK books. - FCOORD classify_rotation(text_rotation_); - FCOORD block_rotation(1.0f, 0.0f); - if (block->pdblk.poly_block()->isA() == PT_VERTICAL_TEXT) { - // Vertical text needs to be 90 degrees rotated relative to the rest. - // If the rest has a 90 degree rotation already, use the inverse, making - // the vertical text the original way up. Otherwise use 90 degrees - // clockwise. - if (rerotate_.x() == 0.0f) - block_rotation = rerotate_; - else - block_rotation = FCOORD(0.0f, -1.0f); - block->rotate(block_rotation); - classify_rotation = FCOORD(1.0f, 0.0f); - } - block_rotation.rotate(rotation_); - // block_rotation is now what we have done to the blocks. Now do the same - // thing to the blobs, but save the inverse rotation in the block, as that - // is what we need to DENORM back to the image coordinates. - FCOORD blob_rotation(block_rotation); - block_rotation.set_y(-block_rotation.y()); - block->set_re_rotation(block_rotation); - block->set_classify_rotation(classify_rotation); - if (textord_debug_tabfind) { - tprintf("Blk %d, type %d rerotation(%.2f, %.2f), char(%.2f,%.2f), box:", - block->pdblk.index(), block->pdblk.poly_block()->isA(), - block->re_rotation().x(), block->re_rotation().y(), - classify_rotation.x(), classify_rotation.y()); - block->pdblk.bounding_box().print(); - } - return blob_rotation; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colfind.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colfind.h deleted file mode 100644 index 9c537104..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colfind.h +++ /dev/null @@ -1,369 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: colfind.h -// Description: Class to find columns in the grid of BLOBNBOXes. -// Author: Ray Smith -// Created: Thu Feb 21 14:04:01 PST 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_COLFIND_H_ -#define TESSERACT_TEXTORD_COLFIND_H_ - -#include "colpartitiongrid.h" -#include "colpartitionset.h" -#include "debugpixa.h" -#include "imagefind.h" -#include "ocrblock.h" -#include "tabfind.h" -#include "textlineprojection.h" - -class BLOCK_LIST; -struct Boxa; -struct Pixa; -class DENORM; -class ScrollView; -class STATS; -class TO_BLOCK; - -namespace tesseract { - -extern BOOL_VAR_H(textord_tabfind_find_tables, false, "run table detection"); - -class ColPartitionSet; -class ColPartitionSet_LIST; -class ColSegment_LIST; -class ColumnGroup_LIST; -class LineSpacing; -class StrokeWidth; -class TempColumn_LIST; -class EquationDetectBase; - -// The ColumnFinder class finds columns in the grid. -class ColumnFinder : public TabFind { - public: - // Gridsize is an estimate of the text size in the image. A suitable value - // is in TO_BLOCK::line_size after find_components has been used to make - // the blobs. - // bleft and tright are the bounds of the image (rectangle) being processed. - // vlines is a (possibly empty) list of TabVector and vertical_x and y are - // the sum logical vertical vector produced by LineFinder::FindVerticalLines. - // If cjk_script is true, then broken CJK characters are fixed during - // layout analysis to assist in detecting horizontal vs vertically written - // textlines. - ColumnFinder(int gridsize, const ICOORD& bleft, const ICOORD& tright, - int resolution, bool cjk_script, double aligned_gap_fraction, - TabVector_LIST* vlines, TabVector_LIST* hlines, - int vertical_x, int vertical_y); - virtual ~ColumnFinder(); - - // Accessors for testing - const DENORM* denorm() const { - return denorm_; - } - const TextlineProjection* projection() const { - return &projection_; - } - void set_cjk_script(bool is_cjk) { - cjk_script_ = is_cjk; - } - - // ====================================================================== - // The main function of ColumnFinder is broken into pieces to facilitate - // optional insertion of orientation and script detection in an efficient - // way. The calling sequence IS MANDATORY however, whether or not - // OSD is being used: - // 1. Construction. - // 2. SetupAndFilterNoise. - // 3. IsVerticallyAlignedText. - // 4. CorrectOrientation. - // 5. FindBlocks. - // 6. Destruction. Use of a single column finder for multiple images does not - // make sense. - // Throughout these steps, the ColPartitions are owned by part_grid_, which - // means that that it must be kept correct. Exception: big_parts_ owns its - // own ColPartitions. - // The BLOBNBOXes are owned by the input TO_BLOCK for the whole time, except - // for a phase in FindBlocks before TransformToBlocks, when they become - // owned by the ColPartitions. The owner() ColPartition of a BLOBNBOX - // indicates more of a betrothal for the majority of layout analysis, ie - // which ColPartition will take ownership when the blobs are release from - // the input TO_BLOCK. Exception: image_bblobs_ owns the fake blobs that - // are part of the image regions, as they are not on any TO_BLOCK list. - // TODO(rays) break up column finder further into smaller classes, as - // there is a lot more to it than column finding now. - // ====================================================================== - - // Performs initial processing on the blobs in the input_block: - // Setup the part_grid, stroke_width_, nontext_map_. - // Obvious noise blobs are filtered out and used to mark the nontext_map_. - // Initial stroke-width analysis is used to get local text alignment - // direction, so the textline projection_ map can be setup. - // On return, IsVerticallyAlignedText may be called (now optionally) to - // determine the gross textline alignment of the page. - void SetupAndFilterNoise(PageSegMode pageseg_mode, Pix* photo_mask_pix, - TO_BLOCK* input_block); - - // Tests for vertical alignment of text (returning true if so), and generates - // a list of blobs (in osd_blobs) for orientation and script detection. - // block is the single block for the whole page or rectangle to be OCRed. - // Note that the vertical alignment may be due to text whose writing direction - // is vertical, like say Japanese, or due to text whose writing direction is - // horizontal but whose text appears vertically aligned because the image is - // not the right way up. - // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio. - bool IsVerticallyAlignedText(double find_vertical_text_ratio, - TO_BLOCK* block, BLOBNBOX_CLIST* osd_blobs); - - // Rotates the blobs and the TabVectors so that the gross writing direction - // (text lines) are horizontal and lines are read down the page. - // Applied rotation stored in rotation_. - // A second rotation is calculated for application during recognition to - // make the rotated blobs upright for recognition. - // Subsequent rotation stored in text_rotation_. - // - // Arguments: - // vertical_text_lines is true if the text lines are vertical. - // recognition_rotation [0..3] is the number of anti-clockwise 90 degree - // rotations from osd required for the text to be upright and readable. - void CorrectOrientation(TO_BLOCK* block, bool vertical_text_lines, - int recognition_rotation); - - // Finds blocks of text, image, rule line, table etc, returning them in the - // blocks and to_blocks - // (Each TO_BLOCK points to the basic BLOCK and adds more information.) - // Image blocks are generated by a combination of photo_mask_pix (which may - // NOT be nullptr) and the rejected text found during preliminary textline - // finding. - // The input_block is the result of a call to find_components, and contains - // the blobs found in the image or rectangle to be OCRed. These blobs will be - // removed and placed in the output blocks, while unused ones will be deleted. - // If single_column is true, the input is treated as single column, but - // it is still divided into blocks of equal line spacing/text size. - // scaled_color is scaled down by scaled_factor from the input color image, - // and may be nullptr if the input was not color. - // grey_pix is optional, but if present must match the photo_mask_pix in size, - // and must be a *real* grey image instead of binary_pix * 255. - // thresholds_pix is expected to be present iff grey_pix is present and - // can be an integer factor reduction of the grey_pix. It represents the - // thresholds that were used to create the binary_pix from the grey_pix. - // Small blobs that confuse the segmentation into lines are placed into - // diacritic_blobs, with the intention that they be put into the most - // appropriate word after the rest of layout analysis. - // Returns -1 if the user hits the 'd' key in the blocks window while running - // in debug mode, which requests a retry with more debug info. - int FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, int scaled_factor, - TO_BLOCK* block, Pix* photo_mask_pix, Pix* thresholds_pix, - Pix* grey_pix, DebugPixa* pixa_debug, BLOCK_LIST* blocks, - BLOBNBOX_LIST* diacritic_blobs, TO_BLOCK_LIST* to_blocks); - - // Get the rotation required to deskew, and its inverse rotation. - void GetDeskewVectors(FCOORD* deskew, FCOORD* reskew); - - // Set the equation detection pointer. - void SetEquationDetect(EquationDetectBase* detect); - - private: - // Displays the blob and block bounding boxes in a window called Blocks. - void DisplayBlocks(BLOCK_LIST* blocks); - // Displays the column edges at each grid y coordinate defined by - // best_columns_. - void DisplayColumnBounds(PartSetVector* sets); - - ////// Functions involved in determining the columns used on the page. ///// - - // Sets up column_sets_ (the determined column layout at each horizontal - // slice). Returns false if the page is empty. - bool MakeColumns(bool single_column); - // Attempt to improve the column_candidates by expanding the columns - // and adding new partitions from the partition sets in src_sets. - // Src_sets may be equal to column_candidates, in which case it will - // use them as a source to improve themselves. - void ImproveColumnCandidates(PartSetVector* src_sets, - PartSetVector* column_sets); - // Prints debug information on the column candidates. - void PrintColumnCandidates(const char* title); - // Finds the optimal set of columns that cover the entire image with as - // few changes in column partition as possible. - // Returns true if any part of the page is multi-column. - bool AssignColumns(const PartSetVector& part_sets); - // Finds the biggest range in part_sets_ that has no assigned column, but - // column assignment is possible. - bool BiggestUnassignedRange(int set_count, const bool* any_columns_possible, - int* start, int* end); - // Finds the modal compatible column_set_ index within the given range. - int RangeModalColumnSet(int** column_set_costs, const int* assigned_costs, - int start, int end); - // Given that there are many column_set_id compatible columns in the range, - // shrinks the range to the longest contiguous run of compatibility, allowing - // gaps where no columns are possible, but not where competing columns are - // possible. - void ShrinkRangeToLongestRun(int** column_set_costs, - const int* assigned_costs, - const bool* any_columns_possible, - int column_set_id, - int* best_start, int* best_end); - // Moves start in the direction of step, up to, but not including end while - // the only incompatible regions are no more than kMaxIncompatibleColumnCount - // in size, and the compatible regions beyond are bigger. - void ExtendRangePastSmallGaps(int** column_set_costs, - const int* assigned_costs, - const bool* any_columns_possible, - int column_set_id, - int step, int end, int* start); - // Assigns the given column_set_id to the part_sets_ in the given range. - void AssignColumnToRange(int column_set_id, int start, int end, - int** column_set_costs, int* assigned_costs); - - // Computes the mean_column_gap_. - void ComputeMeanColumnGap(bool any_multi_column); - - //////// Functions that manipulate ColPartitions in the part_grid_ ///// - //////// to split, merge, find margins, and find types. ////////////// - - // Hoovers up all un-owned blobs and deletes them. - // The rest get released from the block so the ColPartitions can pass - // ownership to the output blocks. - void ReleaseBlobsAndCleanupUnused(TO_BLOCK* block); - // Splits partitions that cross columns where they have nothing in the gap. - void GridSplitPartitions(); - // Merges partitions where there is vertical overlap, within a single column, - // and the horizontal gap is small enough. - void GridMergePartitions(); - // Inserts remaining noise blobs into the most applicable partition if any. - // If there is no applicable partition, then the blobs are deleted. - void InsertRemainingNoise(TO_BLOCK* block); - // Remove partitions that come from horizontal lines that look like - // underlines, but are not part of a table. - void GridRemoveUnderlinePartitions(); - // Add horizontal line separators as partitions. - void GridInsertHLinePartitions(); - // Add vertical line separators as partitions. - void GridInsertVLinePartitions(); - // For every ColPartition in the grid, sets its type based on position - // in the columns. - void SetPartitionTypes(); - // Only images remain with multiple types in a run of partners. - // Sets the type of all in the group to the maximum of the group. - void SmoothPartnerRuns(); - - //////// Functions that make the final output blocks /////// - - // Helper functions for TransformToBlocks. - // Add the part to the temp list in the correct order. - void AddToTempPartList(ColPartition* part, ColPartition_CLIST* temp_list); - // Add everything from the temp list to the work_set assuming correct order. - void EmptyTempPartList(ColPartition_CLIST* temp_list, - WorkingPartSet_LIST* work_set); - - // Transform the grid of partitions to the output blocks. - void TransformToBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); - - // Reflect the blob boxes (but not the outlines) in the y-axis so that - // the blocks get created in the correct RTL order. Rotates the blobs - // in the input_block and the bblobs list. - // The reflection is undone in RotateAndReskewBlocks by - // reflecting the blocks themselves, and then recomputing the blob bounding - // boxes. - void ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs); - - // Undo the deskew that was done in FindTabVectors, as recognition is done - // without correcting blobs or blob outlines for skew. - // Reskew the completed blocks to put them back to the original rotated coords - // that were created by CorrectOrientation. - // If the input_is_rtl, then reflect the blocks in the y-axis to undo the - // reflection that was done before FindTabVectors. - // Blocks that were identified as vertical text (relative to the rotated - // coordinates) are further rotated so the text lines are horizontal. - // blob polygonal outlines are rotated to match the position of the blocks - // that they are in, and their bounding boxes are recalculated to be accurate. - // Record appropriate inverse transformations and required - // classifier transformation in the blocks. - void RotateAndReskewBlocks(bool input_is_rtl, TO_BLOCK_LIST* to_blocks); - - // Computes the rotations for the block (to make textlines horizontal) and - // for the blobs (for classification) and sets the appropriate members - // of the given block. - // Returns the rotation that needs to be applied to the blobs to make - // them sit in the rotated block. - FCOORD ComputeBlockAndClassifyRotation(BLOCK* block); - - // If true then the page language is cjk, so it is safe to perform - // FixBrokenCJK. - bool cjk_script_; - // The minimum gutter width to apply for finding columns. - // Modified when vertical text is detected to prevent detection of - // vertical text lines as columns. - int min_gutter_width_; - // The mean gap between columns over the page. - int mean_column_gap_; - // Config param saved at construction time. Modifies min_gutter_width_ with - // vertical text to prevent detection of vertical text as columns. - double tabfind_aligned_gap_fraction_; - // The rotation vector needed to convert original coords to deskewed. - FCOORD deskew_; - // The rotation vector needed to convert deskewed back to original coords. - FCOORD reskew_; - // The rotation vector used to rotate vertically oriented pages. - FCOORD rotation_; - // The rotation vector needed to convert the rotated back to original coords. - FCOORD rerotate_; - // The additional rotation vector needed to rotate text for recognition. - FCOORD text_rotation_; - // The column_sets_ contain the ordered candidate ColPartitionSets that - // define the possible divisions of the page into columns. - PartSetVector column_sets_; - // A simple array of pointers to the best assigned column division at - // each grid y coordinate. - ColPartitionSet** best_columns_; - // The grid used for creating initial partitions with strokewidth. - StrokeWidth* stroke_width_; - // The grid used to hold ColPartitions after the columns have been determined. - ColPartitionGrid part_grid_; - // List of ColPartitions that are no longer needed after they have been - // turned into regions, but are kept around because they are referenced - // by the part_grid_. - ColPartition_LIST good_parts_; - // List of ColPartitions that are big and might be dropcap or vertically - // joined. - ColPartition_LIST big_parts_; - // List of ColPartitions that have been declared noise. - ColPartition_LIST noise_parts_; - // The fake blobs that are made from the images. - BLOBNBOX_LIST image_bblobs_; - // Horizontal line separators. - TabVector_LIST horizontal_lines_; - // Image map of photo/noise areas on the page. - Pix* nontext_map_; - // Textline projection map. - TextlineProjection projection_; - // Sequence of DENORMS that indicate how to get back to the original image - // coordinate space. The destructor must delete all the DENORMs in the chain. - DENORM* denorm_; - - // Various debug windows that automatically go away on completion. - ScrollView* input_blobs_win_; - - // The equation region detector pointer. Note: This pointer is passed in by - // member function SetEquationDetect, and releasing it is NOT owned by this - // class. - EquationDetectBase* equation_detect_; - - // Allow a subsequent instance to reuse the blocks window. - // Not thread-safe, but multiple threads shouldn't be using windows anyway. - static ScrollView* blocks_win_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_COLFIND_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartition.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartition.cpp deleted file mode 100644 index 0167d59f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartition.cpp +++ /dev/null @@ -1,2589 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: colpartition.cpp -// Description: Class to hold partitions of the page that correspond -// roughly to text lines. -// Author: Ray Smith -// Created: Thu Aug 14 10:54:01 PDT 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "colpartition.h" -#include "colpartitiongrid.h" -#include "colpartitionset.h" -#include "detlinefit.h" -#include "dppoint.h" -#include "imagefind.h" -#include "workingpartset.h" - -#include - -namespace tesseract { - -ELIST2IZE(ColPartition) -CLISTIZE(ColPartition) - -//////////////// ColPartition Implementation //////////////// - -// Maximum change in spacing (in inches) to ignore. -const double kMaxSpacingDrift = 1.0 / 72; // 1/72 is one point. -// Maximum fraction of line height used as an additional allowance -// for top spacing. -const double kMaxTopSpacingFraction = 0.25; -// What multiple of the largest line height should be used as an upper bound -// for whether lines are in the same text block? -const double kMaxSameBlockLineSpacing = 3; -// Maximum ratio of sizes for lines to be considered the same size. -const double kMaxSizeRatio = 1.5; -// Fraction of max of leader width and gap for max IQR of gaps. -const double kMaxLeaderGapFractionOfMax = 0.25; -// Fraction of min of leader width and gap for max IQR of gaps. -const double kMaxLeaderGapFractionOfMin = 0.5; -// Minimum number of blobs to be considered a leader. -const int kMinLeaderCount = 5; -// Minimum score for a STRONG_CHAIN textline. -const int kMinStrongTextValue = 6; -// Minimum score for a CHAIN textline. -const int kMinChainTextValue = 3; -// Minimum number of blobs for strong horizontal text lines. -const int kHorzStrongTextlineCount = 8; -// Minimum height (in image pixels) for strong horizontal text lines. -const int kHorzStrongTextlineHeight = 10; -// Minimum aspect ratio for strong horizontal text lines. -const int kHorzStrongTextlineAspect = 5; -// Maximum upper quartile error allowed on a baseline fit as a fraction -// of height. -const double kMaxBaselineError = 0.4375; -// Min coverage for a good baseline between vectors -const double kMinBaselineCoverage = 0.5; -// Max RMS color noise to compare colors. -const int kMaxRMSColorNoise = 128; -// Maximum distance to allow a partition color to be to use that partition -// in smoothing neighbouring types. This is a squared distance. -const int kMaxColorDistance = 900; - -// blob_type is the blob_region_type_ of the blobs in this partition. -// Vertical is the direction of logical vertical on the possibly skewed image. -ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD& vertical) - : left_margin_(-INT32_MAX), right_margin_(INT32_MAX), - median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_height_(0), - median_left_(INT32_MAX), median_right_(-INT32_MAX), median_width_(0), - blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0), - good_width_(false), good_column_(false), - left_key_tab_(false), right_key_tab_(false), - left_key_(0), right_key_(0), type_(PT_UNKNOWN), vertical_(vertical), - working_set_(nullptr), last_add_was_vertical_(false), block_owned_(false), - desperately_merged_(false), - first_column_(-1), last_column_(-1), column_set_(nullptr), - side_step_(0), top_spacing_(0), bottom_spacing_(0), - type_before_table_(PT_UNKNOWN), inside_table_column_(false), - nearest_neighbor_above_(nullptr), nearest_neighbor_below_(nullptr), - space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0), - owns_blobs_(true) { - memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_)); -} - -// Constructs a fake ColPartition with a single fake BLOBNBOX, all made -// from a single TBOX. -// WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and -// the ColPartition owns the BLOBNBOX!!! -// Call DeleteBoxes before deleting the ColPartition. -ColPartition* ColPartition::FakePartition(const TBOX& box, - PolyBlockType block_type, - BlobRegionType blob_type, - BlobTextFlowType flow) { - ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1)); - part->set_type(block_type); - part->set_flow(flow); - part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box))); - part->set_left_margin(box.left()); - part->set_right_margin(box.right()); - part->SetBlobTypes(); - part->ComputeLimits(); - part->ClaimBoxes(); - return part; -} - -// Constructs and returns a ColPartition with the given real BLOBNBOX, -// and sets it up to be a "big" partition (single-blob partition bigger -// than the surrounding text that may be a dropcap, two or more vertically -// touching characters, or some graphic element. -// If the given list is not nullptr, the partition is also added to the list. -ColPartition* ColPartition::MakeBigPartition(BLOBNBOX* box, - ColPartition_LIST* big_part_list) { - box->set_owner(nullptr); - ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1)); - single->set_flow(BTFT_NONE); - single->AddBox(box); - single->ComputeLimits(); - single->ClaimBoxes(); - single->SetBlobTypes(); - single->set_block_owned(true); - if (big_part_list != nullptr) { - ColPartition_IT part_it(big_part_list); - part_it.add_to_end(single); - } - return single; -} - -ColPartition::~ColPartition() { - // Remove this as a partner of all partners, as we don't want them - // referring to a deleted object. - ColPartition_C_IT it(&upper_partners_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - it.data()->RemovePartner(false, this); - } - it.set_to_list(&lower_partners_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - it.data()->RemovePartner(true, this); - } -} - -// Constructs a fake ColPartition with no BLOBNBOXes to represent a -// horizontal or vertical line, given a type and a bounding box. -ColPartition* ColPartition::MakeLinePartition(BlobRegionType blob_type, - const ICOORD& vertical, - int left, int bottom, - int right, int top) { - ColPartition* part = new ColPartition(blob_type, vertical); - part->bounding_box_ = TBOX(left, bottom, right, top); - part->median_bottom_ = bottom; - part->median_top_ = top; - part->median_height_ = top - bottom; - part->median_left_ = left; - part->median_right_ = right; - part->median_width_ = right - left; - part->left_key_ = part->BoxLeftKey(); - part->right_key_ = part->BoxRightKey(); - return part; -} - - -// Adds the given box to the partition, updating the partition bounds. -// The list of boxes in the partition is updated, ensuring that no box is -// recorded twice, and the boxes are kept in increasing left position. -void ColPartition::AddBox(BLOBNBOX* bbox) { - TBOX box = bbox->bounding_box(); - // Update the partition limits. - if (boxes_.length() == 0) { - bounding_box_ = box; - } else { - bounding_box_ += box; - } - - if (IsVerticalType()) { - if (!last_add_was_vertical_) { - boxes_.sort(SortByBoxBottom); - last_add_was_vertical_ = true; - } - boxes_.add_sorted(SortByBoxBottom, true, bbox); - } else { - if (last_add_was_vertical_) { - boxes_.sort(SortByBoxLeft); - last_add_was_vertical_ = false; - } - boxes_.add_sorted(SortByBoxLeft, true, bbox); - } - if (!left_key_tab_) - left_key_ = BoxLeftKey(); - if (!right_key_tab_) - right_key_ = BoxRightKey(); - if (TabFind::WithinTestRegion(2, box.left(), box.bottom())) - tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n", - box.left(), box.bottom(), box.right(), box.top(), - bounding_box_.left(), bounding_box_.right()); -} - -// Removes the given box from the partition, updating the bounds. -void ColPartition::RemoveBox(BLOBNBOX* box) { - BLOBNBOX_C_IT bb_it(&boxes_); - for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { - if (box == bb_it.data()) { - bb_it.extract(); - ComputeLimits(); - return; - } - } -} - -// Returns the tallest box in the partition, as measured perpendicular to the -// presumed flow of text. -BLOBNBOX* ColPartition::BiggestBox() { - BLOBNBOX* biggest = nullptr; - BLOBNBOX_C_IT bb_it(&boxes_); - for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { - BLOBNBOX* bbox = bb_it.data(); - if (IsVerticalType()) { - if (biggest == nullptr || - bbox->bounding_box().width() > biggest->bounding_box().width()) - biggest = bbox; - } else { - if (biggest == nullptr || - bbox->bounding_box().height() > biggest->bounding_box().height()) - biggest = bbox; - } - } - return biggest; -} - -// Returns the bounding box excluding the given box. -TBOX ColPartition::BoundsWithoutBox(BLOBNBOX* box) { - TBOX result; - BLOBNBOX_C_IT bb_it(&boxes_); - for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { - if (box != bb_it.data()) { - result += bb_it.data()->bounding_box(); - } - } - return result; -} - -// Claims the boxes in the boxes_list by marking them with a this owner -// pointer. If a box is already owned, then it must be owned by this. -void ColPartition::ClaimBoxes() { - BLOBNBOX_C_IT bb_it(&boxes_); - for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { - BLOBNBOX* bblob = bb_it.data(); - ColPartition* other = bblob->owner(); - if (other == nullptr) { - // Normal case: ownership is available. - bblob->set_owner(this); - } else { - ASSERT_HOST(other == this); - } - } -} - -// nullptr the owner of the blobs in this partition, so they can be deleted -// independently of the ColPartition. -void ColPartition::DisownBoxes() { - BLOBNBOX_C_IT bb_it(&boxes_); - for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { - BLOBNBOX* bblob = bb_it.data(); - ASSERT_HOST(bblob->owner() == this || bblob->owner() == nullptr); - bblob->set_owner(nullptr); - } -} - -// nullptr the owner of the blobs in this partition that are owned by this -// partition, so they can be deleted independently of the ColPartition. -// Any blobs that are not owned by this partition get to keep their owner -// without an assert failure. -void ColPartition::DisownBoxesNoAssert() { - BLOBNBOX_C_IT bb_it(&boxes_); - for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { - BLOBNBOX* bblob = bb_it.data(); - if (bblob->owner() == this) - bblob->set_owner(nullptr); - } -} - -// Nulls the owner of the blobs in this partition that are owned by this -// partition and not leader blobs, removing them from the boxes_ list, thus -// turning this partition back to a leader partition if it contains a leader, -// or otherwise leaving it empty. Returns true if any boxes remain. -bool ColPartition::ReleaseNonLeaderBoxes() { - BLOBNBOX_C_IT bb_it(&boxes_); - for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { - BLOBNBOX* bblob = bb_it.data(); - if (bblob->flow() != BTFT_LEADER) { - if (bblob->owner() == this) bblob->set_owner(nullptr); - bb_it.extract(); - } - } - if (bb_it.empty()) return false; - flow_ = BTFT_LEADER; - ComputeLimits(); - return true; -} - -// Delete the boxes that this partition owns. -void ColPartition::DeleteBoxes() { - // Although the boxes_ list is a C_LIST, in some cases it owns the - // BLOBNBOXes, as the ColPartition takes ownership from the grid, - // and the BLOBNBOXes own the underlying C_BLOBs. - for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) { - BLOBNBOX* bblob = bb_it.extract(); - delete bblob->cblob(); - delete bblob; - } -} - -// Reflects the partition in the y-axis, assuming that its blobs have -// already been done. Corrects only a limited part of the members, since -// this function is assumed to be used shortly after initial creation, which -// is before a lot of the members are used. -void ColPartition::ReflectInYAxis() { - BLOBNBOX_CLIST reversed_boxes; - BLOBNBOX_C_IT reversed_it(&reversed_boxes); - // Reverse the order of the boxes_. - BLOBNBOX_C_IT bb_it(&boxes_); - for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { - reversed_it.add_before_then_move(bb_it.extract()); - } - bb_it.add_list_after(&reversed_boxes); - ASSERT_HOST(!left_key_tab_ && !right_key_tab_); - int tmp = left_margin_; - left_margin_ = -right_margin_; - right_margin_ = -tmp; - ComputeLimits(); -} - -// Returns true if this is a legal partition - meaning that the conditions -// left_margin <= bounding_box left -// left_key <= bounding box left key -// bounding box left <= bounding box right -// and likewise for right margin and key -// are all met. -bool ColPartition::IsLegal() { - if (bounding_box_.left() > bounding_box_.right()) { - if (textord_debug_bugs) { - tprintf("Bounding box invalid\n"); - Print(); - } - return false; // Bounding box invalid. - } - if (left_margin_ > bounding_box_.left() || - right_margin_ < bounding_box_.right()) { - if (textord_debug_bugs) { - tprintf("Margins invalid\n"); - Print(); - } - return false; // Margins invalid. - } - if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) { - if (textord_debug_bugs) { - tprintf("Key inside box: %d v %d or %d v %d\n", - left_key_, BoxLeftKey(), right_key_, BoxRightKey()); - Print(); - } - return false; // Keys inside the box. - } - return true; -} - -// Returns true if the left and right edges are approximately equal. -bool ColPartition::MatchingColumns(const ColPartition& other) const { - int y = (MidY() + other.MidY()) / 2; - if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor, - LeftAtY(y) / kColumnWidthFactor, 1)) - return false; - if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor, - RightAtY(y) / kColumnWidthFactor, 1)) - return false; - return true; -} - -// Returns true if the colors match for two text partitions. -bool ColPartition::MatchingTextColor(const ColPartition& other) const { - if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise && - other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise) - return false; // Too noisy. - - // Colors must match for other to count. - double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_, - other.color2_, - color1_); - double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_, - other.color2_, - color2_); - double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_, - other.color1_); - double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_, - other.color2_); -// All 4 distances must be small enough. - return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance && - d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance; -} - -// Returns true if the sizes match for two text partitions, -// taking orientation into account. See also SizesSimilar. -bool ColPartition::MatchingSizes(const ColPartition& other) const { - if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT) - return !TabFind::DifferentSizes(median_width_, other.median_width_); - else - return !TabFind::DifferentSizes(median_height_, other.median_height_); -} - -// Returns true if there is no tabstop violation in merging this and other. -bool ColPartition::ConfirmNoTabViolation(const ColPartition& other) const { - if (bounding_box_.right() < other.bounding_box_.left() && - bounding_box_.right() < other.LeftBlobRule()) - return false; - if (other.bounding_box_.right() < bounding_box_.left() && - other.bounding_box_.right() < LeftBlobRule()) - return false; - if (bounding_box_.left() > other.bounding_box_.right() && - bounding_box_.left() > other.RightBlobRule()) - return false; - if (other.bounding_box_.left() > bounding_box_.right() && - other.bounding_box_.left() > RightBlobRule()) - return false; - return true; -} - -// Returns true if other has a similar stroke width to this. -bool ColPartition::MatchingStrokeWidth(const ColPartition& other, - double fractional_tolerance, - double constant_tolerance) const { - int match_count = 0; - int nonmatch_count = 0; - BLOBNBOX_C_IT box_it(const_cast(&boxes_)); - BLOBNBOX_C_IT other_it(const_cast(&other.boxes_)); - box_it.mark_cycle_pt(); - other_it.mark_cycle_pt(); - while (!box_it.cycled_list() && !other_it.cycled_list()) { - if (box_it.data()->MatchingStrokeWidth(*other_it.data(), - fractional_tolerance, - constant_tolerance)) - ++match_count; - else - ++nonmatch_count; - box_it.forward(); - other_it.forward(); - } - return match_count > nonmatch_count; -} - -// Returns true if base is an acceptable diacritic base char merge -// with this as the diacritic. -// Returns true if: -// (1) this is a ColPartition containing only diacritics, and -// (2) the base characters indicated on the diacritics all believably lie -// within the text line of the candidate ColPartition. -bool ColPartition::OKDiacriticMerge(const ColPartition& candidate, - bool debug) const { - BLOBNBOX_C_IT it(const_cast(&boxes_)); - int min_top = INT32_MAX; - int max_bottom = -INT32_MAX; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - if (!blob->IsDiacritic()) { - if (debug) { - tprintf("Blob is not a diacritic:"); - blob->bounding_box().print(); - } - return false; // All blobs must have diacritic bases. - } - if (blob->base_char_top() < min_top) - min_top = blob->base_char_top(); - if (blob->base_char_bottom() > max_bottom) - max_bottom = blob->base_char_bottom(); - } - // If the intersection of all vertical ranges of all base characters - // overlaps the median range of this, then it is OK. - bool result = min_top > candidate.median_bottom_ && - max_bottom < candidate.median_top_; - if (debug) { - if (result) - tprintf("OKDiacritic!\n"); - else - tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n", - max_bottom, min_top, median_bottom_, median_top_); - } - return result; -} - -// Sets the sort key using either the tab vector, or the bounding box if -// the tab vector is nullptr. If the tab_vector lies inside the bounding_box, -// use the edge of the box as a key any way. -void ColPartition::SetLeftTab(const TabVector* tab_vector) { - if (tab_vector != nullptr) { - left_key_ = tab_vector->sort_key(); - left_key_tab_ = left_key_ <= BoxLeftKey(); - } else { - left_key_tab_ = false; - } - if (!left_key_tab_) - left_key_ = BoxLeftKey(); -} - -// As SetLeftTab, but with the right. -void ColPartition::SetRightTab(const TabVector* tab_vector) { - if (tab_vector != nullptr) { - right_key_ = tab_vector->sort_key(); - right_key_tab_ = right_key_ >= BoxRightKey(); - } else { - right_key_tab_ = false; - } - if (!right_key_tab_) - right_key_ = BoxRightKey(); -} - -// Copies the left/right tab from the src partition, but if take_box is -// true, copies the box instead and uses that as a key. -void ColPartition::CopyLeftTab(const ColPartition& src, bool take_box) { - left_key_tab_ = take_box ? false : src.left_key_tab_; - if (left_key_tab_) { - left_key_ = src.left_key_; - } else { - bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY())); - left_key_ = BoxLeftKey(); - } - if (left_margin_ > bounding_box_.left()) - left_margin_ = src.left_margin_; -} - -// As CopyLeftTab, but with the right. -void ColPartition::CopyRightTab(const ColPartition& src, bool take_box) { - right_key_tab_ = take_box ? false : src.right_key_tab_; - if (right_key_tab_) { - right_key_ = src.right_key_; - } else { - bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY())); - right_key_ = BoxRightKey(); - } - if (right_margin_ < bounding_box_.right()) - right_margin_ = src.right_margin_; -} - -// Returns the left rule line x coord of the leftmost blob. -int ColPartition::LeftBlobRule() const { - BLOBNBOX_C_IT it(const_cast(&boxes_)); - return it.data()->left_rule(); -} -// Returns the right rule line x coord of the rightmost blob. -int ColPartition::RightBlobRule() const { - BLOBNBOX_C_IT it(const_cast(&boxes_)); - it.move_to_last(); - return it.data()->right_rule(); -} - -float ColPartition::SpecialBlobsDensity(const BlobSpecialTextType type) const { - ASSERT_HOST(type < BSTT_COUNT); - return special_blobs_densities_[type]; -} - -int ColPartition::SpecialBlobsCount(const BlobSpecialTextType type) { - ASSERT_HOST(type < BSTT_COUNT); - BLOBNBOX_C_IT blob_it(&boxes_); - int count = 0; - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - BlobSpecialTextType blob_type = blob->special_text_type(); - if (blob_type == type) { - count++; - } - } - - return count; -} - -void ColPartition::SetSpecialBlobsDensity( - const BlobSpecialTextType type, const float density) { - ASSERT_HOST(type < BSTT_COUNT); - special_blobs_densities_[type] = density; -} - -void ColPartition::ComputeSpecialBlobsDensity() { - memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_)); - if (boxes_.empty()) { - return; - } - - BLOBNBOX_C_IT blob_it(&boxes_); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - BlobSpecialTextType type = blob->special_text_type(); - special_blobs_densities_[type]++; - } - - for (int type = 0; type < BSTT_COUNT; ++type) { - special_blobs_densities_[type] /= boxes_.length(); - } -} - -// Add a partner above if upper, otherwise below. -// Add them uniquely and keep the list sorted by box left. -// Partnerships are added symmetrically to partner and this. -void ColPartition::AddPartner(bool upper, ColPartition* partner) { - if (upper) { - partner->lower_partners_.add_sorted(SortByBoxLeft, - true, this); - upper_partners_.add_sorted(SortByBoxLeft, true, partner); - } else { - partner->upper_partners_.add_sorted(SortByBoxLeft, - true, this); - lower_partners_.add_sorted(SortByBoxLeft, true, partner); - } -} - -// Removes the partner from this, but does not remove this from partner. -// This asymmetric removal is so as not to mess up the iterator that is -// working on partner's partner list. -void ColPartition::RemovePartner(bool upper, ColPartition* partner) { - ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - if (it.data() == partner) { - it.extract(); - break; - } - } -} - -// Returns the partner if the given partner is a singleton, otherwise nullptr. -ColPartition* ColPartition::SingletonPartner(bool upper) { - ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_; - if (!partners->singleton()) - return nullptr; - ColPartition_C_IT it(partners); - return it.data(); -} - -// Merge with the other partition and delete it. -void ColPartition::Absorb(ColPartition* other, WidthCallback* cb) { - // The result has to either own all of the blobs or none of them. - // Verify the flag is consistent. - ASSERT_HOST(owns_blobs() == other->owns_blobs()); - // TODO(nbeato): check owns_blobs better. Right now owns_blobs - // should always be true when this is called. So there is no issues. - if (TabFind::WithinTestRegion(2, bounding_box_.left(), - bounding_box_.bottom()) || - TabFind::WithinTestRegion(2, other->bounding_box_.left(), - other->bounding_box_.bottom())) { - tprintf("Merging:"); - Print(); - other->Print(); - } - - // Update the special_blobs_densities_. - memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_)); - for (int type = 0; type < BSTT_COUNT; ++type) { - unsigned w1 = boxes_.length(); - unsigned w2 = other->boxes_.length(); - float new_val = special_blobs_densities_[type] * w1 + - other->special_blobs_densities_[type] * w2; - if (!w1 || !w2) { - ASSERT_HOST((w1 + w2) > 0); - special_blobs_densities_[type] = new_val / (w1 + w2); - } - } - - // Merge the two sorted lists. - BLOBNBOX_C_IT it(&boxes_); - BLOBNBOX_C_IT it2(&other->boxes_); - for (; !it2.empty(); it2.forward()) { - BLOBNBOX* bbox2 = it2.extract(); - ColPartition* prev_owner = bbox2->owner(); - if (prev_owner != other && prev_owner != nullptr) { - // A blob on other's list is owned by someone else; let them have it. - continue; - } - ASSERT_HOST(prev_owner == other || prev_owner == nullptr); - if (prev_owner == other) - bbox2->set_owner(this); - it.add_to_end(bbox2); - } - left_margin_ = std::min(left_margin_, other->left_margin_); - right_margin_ = std::max(right_margin_, other->right_margin_); - if (other->left_key_ < left_key_) { - left_key_ = other->left_key_; - left_key_tab_ = other->left_key_tab_; - } - if (other->right_key_ > right_key_) { - right_key_ = other->right_key_; - right_key_tab_ = other->right_key_tab_; - } - // Combine the flow and blob_type in a sensible way. - // Dominant flows stay. - if (!DominatesInMerge(flow_, other->flow_)) { - flow_ = other->flow_; - blob_type_ = other->blob_type_; - } - SetBlobTypes(); - if (IsVerticalType()) { - boxes_.sort(SortByBoxBottom); - last_add_was_vertical_ = true; - } else { - boxes_.sort(SortByBoxLeft); - last_add_was_vertical_ = false; - } - ComputeLimits(); - // Fix partner lists. other is going away, so remove it as a - // partner of all its partners and add this in its place. - for (int upper = 0; upper < 2; ++upper) { - ColPartition_CLIST partners; - ColPartition_C_IT part_it(&partners); - part_it.add_list_after(upper ? &other->upper_partners_ - : &other->lower_partners_); - for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { - ColPartition* partner = part_it.extract(); - partner->RemovePartner(!upper, other); - partner->RemovePartner(!upper, this); - partner->AddPartner(!upper, this); - } - } - delete other; - if (cb != nullptr) { - SetColumnGoodness(cb); - } -} - -// Merge1 and merge2 are candidates to be merged, yet their combined box -// overlaps this. Is that allowed? -// Returns true if the overlap between this and the merged pair of -// merge candidates is sufficiently trivial to be allowed. -// The merged box can graze the edge of this by the ok_box_overlap -// if that exceeds the margin to the median top and bottom. -// ok_box_overlap should be set by the caller appropriate to the sizes of -// the text involved, and is usually a fraction of the median size of merge1 -// and/or merge2, or this. -// TODO(rays) Determine whether vertical text needs to be considered. -bool ColPartition::OKMergeOverlap(const ColPartition& merge1, - const ColPartition& merge2, - int ok_box_overlap, bool debug) { - // Vertical partitions are not allowed to be involved. - if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) { - if (debug) - tprintf("Vertical partition\n"); - return false; - } - // The merging partitions must strongly overlap each other. - if (!merge1.VSignificantCoreOverlap(merge2)) { - if (debug) - tprintf("Voverlap %d (%d)\n", - merge1.VCoreOverlap(merge2), - merge1.VSignificantCoreOverlap(merge2)); - return false; - } - // The merged box must not overlap the median bounds of this. - TBOX merged_box(merge1.bounding_box()); - merged_box += merge2.bounding_box(); - if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ && - merged_box.bottom() < bounding_box_.top() - ok_box_overlap && - merged_box.top() > bounding_box_.bottom() + ok_box_overlap) { - if (debug) - tprintf("Excessive box overlap\n"); - return false; - } - // Looks OK! - return true; -} - -// Find the blob at which to split this to minimize the overlap with the -// given box. Returns the first blob to go in the second partition. -BLOBNBOX* ColPartition::OverlapSplitBlob(const TBOX& box) { - if (boxes_.empty() || boxes_.singleton()) - return nullptr; - BLOBNBOX_C_IT it(&boxes_); - TBOX left_box(it.data()->bounding_box()); - for (it.forward(); !it.at_first(); it.forward()) { - BLOBNBOX* bbox = it.data(); - left_box += bbox->bounding_box(); - if (left_box.overlap(box)) - return bbox; - } - return nullptr; -} - -// Split this partition keeping the first half in this and returning -// the second half. -// Splits by putting the split_blob and the blobs that follow -// in the second half, and the rest in the first half. -ColPartition* ColPartition::SplitAtBlob(BLOBNBOX* split_blob) { - ColPartition* split_part = ShallowCopy(); - split_part->set_owns_blobs(owns_blobs()); - BLOBNBOX_C_IT it(&boxes_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* bbox = it.data(); - ColPartition* prev_owner = bbox->owner(); - ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr); - if (bbox == split_blob || !split_part->boxes_.empty()) { - split_part->AddBox(it.extract()); - if (owns_blobs() && prev_owner != nullptr) - bbox->set_owner(split_part); - } - } - ASSERT_HOST(!it.empty()); - if (split_part->IsEmpty()) { - // Split part ended up with nothing. Possible if split_blob is not - // in the list of blobs. - delete split_part; - return nullptr; - } - right_key_tab_ = false; - split_part->left_key_tab_ = false; - ComputeLimits(); - // TODO(nbeato) Merge Ray's CL like this: - // if (owns_blobs()) - // SetBlobTextlineGoodness(); - split_part->ComputeLimits(); - // TODO(nbeato) Merge Ray's CL like this: - // if (split_part->owns_blobs()) - // split_part->SetBlobTextlineGoodness(); - return split_part; -} - -// Split this partition at the given x coordinate, returning the right -// half and keeping the left half in this. -ColPartition* ColPartition::SplitAt(int split_x) { - if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right()) - return nullptr; // There will be no change. - ColPartition* split_part = ShallowCopy(); - split_part->set_owns_blobs(owns_blobs()); - BLOBNBOX_C_IT it(&boxes_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* bbox = it.data(); - ColPartition* prev_owner = bbox->owner(); - ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr); - const TBOX& box = bbox->bounding_box(); - if (box.left() >= split_x) { - split_part->AddBox(it.extract()); - if (owns_blobs() && prev_owner != nullptr) - bbox->set_owner(split_part); - } - } - if (it.empty()) { - // Possible if split-x passes through the first blob. - it.add_list_after(&split_part->boxes_); - } - ASSERT_HOST(!it.empty()); - if (split_part->IsEmpty()) { - // Split part ended up with nothing. Possible if split_x passes - // through the last blob. - delete split_part; - return nullptr; - } - right_key_tab_ = false; - split_part->left_key_tab_ = false; - right_margin_ = split_x; - split_part->left_margin_ = split_x; - ComputeLimits(); - split_part->ComputeLimits(); - return split_part; -} - -// Recalculates all the coordinate limits of the partition. -void ColPartition::ComputeLimits() { - bounding_box_ = TBOX(); // Clear it - BLOBNBOX_C_IT it(&boxes_); - BLOBNBOX* bbox = nullptr; - int non_leader_count = 0; - if (it.empty()) { - bounding_box_.set_left(left_margin_); - bounding_box_.set_right(right_margin_); - bounding_box_.set_bottom(0); - bounding_box_.set_top(0); - } else { - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - bbox = it.data(); - bounding_box_ += bbox->bounding_box(); - if (bbox->flow() != BTFT_LEADER) - ++non_leader_count; - } - } - if (!left_key_tab_) - left_key_ = BoxLeftKey(); - if (left_key_ > BoxLeftKey() && textord_debug_bugs) { - // TODO(rays) investigate the causes of these error messages, to find - // out if they are genuinely harmful, or just indicative of junk input. - tprintf("Computed left-illegal partition\n"); - Print(); - } - if (!right_key_tab_) - right_key_ = BoxRightKey(); - if (right_key_ < BoxRightKey() && textord_debug_bugs) { - tprintf("Computed right-illegal partition\n"); - Print(); - } - if (it.empty()) - return; - if (IsImageType() || blob_type() == BRT_RECTIMAGE || - blob_type() == BRT_POLYIMAGE) { - median_top_ = bounding_box_.top(); - median_bottom_ = bounding_box_.bottom(); - median_height_ = bounding_box_.height(); - median_left_ = bounding_box_.left(); - median_right_ = bounding_box_.right(); - median_width_ = bounding_box_.width(); - } else { - STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1); - STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1); - STATS height_stats(0, bounding_box_.height() + 1); - STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1); - STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1); - STATS width_stats(0, bounding_box_.width() + 1); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - bbox = it.data(); - if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) { - const TBOX& box = bbox->bounding_box(); - int area = box.area(); - top_stats.add(box.top(), area); - bottom_stats.add(box.bottom(), area); - height_stats.add(box.height(), area); - left_stats.add(box.left(), area); - right_stats.add(box.right(), area); - width_stats.add(box.width(), area); - } - } - median_top_ = static_cast(top_stats.median() + 0.5); - median_bottom_ = static_cast(bottom_stats.median() + 0.5); - median_height_ = static_cast(height_stats.median() + 0.5); - median_left_ = static_cast(left_stats.median() + 0.5); - median_right_ = static_cast(right_stats.median() + 0.5); - median_width_ = static_cast(width_stats.median() + 0.5); - } - - if (right_margin_ < bounding_box_.right() && textord_debug_bugs) { - tprintf("Made partition with bad right coords"); - Print(); - } - if (left_margin_ > bounding_box_.left() && textord_debug_bugs) { - tprintf("Made partition with bad left coords"); - Print(); - } - // Fix partner lists. The bounding box has changed and partners are stored - // in bounding box order, so remove and reinsert this as a partner - // of all its partners. - for (int upper = 0; upper < 2; ++upper) { - ColPartition_CLIST partners; - ColPartition_C_IT part_it(&partners); - part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_); - for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { - ColPartition* partner = part_it.extract(); - partner->RemovePartner(!upper, this); - partner->AddPartner(!upper, this); - } - } - if (TabFind::WithinTestRegion(2, bounding_box_.left(), - bounding_box_.bottom())) { - tprintf("Recomputed box for partition %p\n", this); - Print(); - } -} - -// Returns the number of boxes that overlap the given box. -int ColPartition::CountOverlappingBoxes(const TBOX& box) { - BLOBNBOX_C_IT it(&boxes_); - int overlap_count = 0; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* bbox = it.data(); - if (box.overlap(bbox->bounding_box())) - ++overlap_count; - } - return overlap_count; -} - -// Computes and sets the type_ and first_column_, last_column_ and column_set_. -// resolution refers to the ppi resolution of the image. -void ColPartition::SetPartitionType(int resolution, ColPartitionSet* columns) { - int first_spanned_col = -1; - ColumnSpanningType span_type = - columns->SpanningType(resolution, - bounding_box_.left(), bounding_box_.right(), - std::min(bounding_box_.height(), bounding_box_.width()), - MidY(), left_margin_, right_margin_, - &first_column_, &last_column_, - &first_spanned_col); - column_set_ = columns; - if (first_column_ < last_column_ && span_type == CST_PULLOUT && - !IsLineType()) { - // Unequal columns may indicate that the pullout spans one of the columns - // it lies in, so force it to be allocated to just that column. - if (first_spanned_col >= 0) { - first_column_ = first_spanned_col; - last_column_ = first_spanned_col; - } else { - if ((first_column_ & 1) == 0) - last_column_ = first_column_; - else if ((last_column_ & 1) == 0) - first_column_ = last_column_; - else - first_column_ = last_column_ = (first_column_ + last_column_) / 2; - } - } - type_ = PartitionType(span_type); -} - -// Returns the PartitionType from the current BlobRegionType and a column -// flow spanning type ColumnSpanningType, generated by -// ColPartitionSet::SpanningType, that indicates how the partition sits -// in the columns. -PolyBlockType ColPartition::PartitionType(ColumnSpanningType flow) const { - if (flow == CST_NOISE) { - if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE && - blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT) - return PT_NOISE; - flow = CST_FLOWING; - } - - switch (blob_type_) { - case BRT_NOISE: - return PT_NOISE; - case BRT_HLINE: - return PT_HORZ_LINE; - case BRT_VLINE: - return PT_VERT_LINE; - case BRT_RECTIMAGE: - case BRT_POLYIMAGE: - switch (flow) { - case CST_FLOWING: - return PT_FLOWING_IMAGE; - case CST_HEADING: - return PT_HEADING_IMAGE; - case CST_PULLOUT: - return PT_PULLOUT_IMAGE; - default: - ASSERT_HOST(!"Undefined flow type for image!"); - } - break; - case BRT_VERT_TEXT: - return PT_VERTICAL_TEXT; - case BRT_TEXT: - case BRT_UNKNOWN: - default: - switch (flow) { - case CST_FLOWING: - return PT_FLOWING_TEXT; - case CST_HEADING: - return PT_HEADING_TEXT; - case CST_PULLOUT: - return PT_PULLOUT_TEXT; - default: - ASSERT_HOST(!"Undefined flow type for text!"); - } - } - ASSERT_HOST(!"Should never get here!"); - return PT_NOISE; -} - -// Returns the first and last column touched by this partition. -// resolution refers to the ppi resolution of the image. -void ColPartition::ColumnRange(int resolution, ColPartitionSet* columns, - int* first_col, int* last_col) { - int first_spanned_col = -1; - ColumnSpanningType span_type = - columns->SpanningType(resolution, - bounding_box_.left(), bounding_box_.right(), - std::min(bounding_box_.height(), bounding_box_.width()), - MidY(), left_margin_, right_margin_, - first_col, last_col, - &first_spanned_col); - type_ = PartitionType(span_type); -} - -// Sets the internal flags good_width_ and good_column_. -void ColPartition::SetColumnGoodness(WidthCallback* cb) { - int y = MidY(); - int width = RightAtY(y) - LeftAtY(y); - good_width_ = cb->Run(width); - good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_; -} - -// Determines whether the blobs in this partition mostly represent -// a leader (fixed pitch sequence) and sets the member blobs accordingly. -// Note that height is assumed to have been tested elsewhere, and that this -// function will find most fixed-pitch text as leader without a height filter. -// Leader detection is limited to sequences of identical width objects, -// such as .... or ----, so patterns, such as .-.-.-.-. will not be found. -bool ColPartition::MarkAsLeaderIfMonospaced() { - bool result = false; - // Gather statistics on the gaps between blobs and the widths of the blobs. - int part_width = bounding_box_.width(); - STATS gap_stats(0, part_width); - STATS width_stats(0, part_width); - BLOBNBOX_C_IT it(&boxes_); - BLOBNBOX* prev_blob = it.data(); - prev_blob->set_flow(BTFT_NEIGHBOURS); - width_stats.add(prev_blob->bounding_box().width(), 1); - int blob_count = 1; - for (it.forward(); !it.at_first(); it.forward()) { - BLOBNBOX* blob = it.data(); - int left = blob->bounding_box().left(); - int right = blob->bounding_box().right(); - gap_stats.add(left - prev_blob->bounding_box().right(), 1); - width_stats.add(right - left, 1); - blob->set_flow(BTFT_NEIGHBOURS); - prev_blob = blob; - ++blob_count; - } - double median_gap = gap_stats.median(); - double median_width = width_stats.median(); - double max_width = std::max(median_gap, median_width); - double min_width = std::min(median_gap, median_width); - double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f); - if (textord_debug_tabfind >= 4) { - tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n", - gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax, - min_width * kMaxLeaderGapFractionOfMin); - } - if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax && - gap_iqr < min_width * kMaxLeaderGapFractionOfMin && - blob_count >= kMinLeaderCount) { - // This is stable enough to be called a leader, so check the widths. - // Since leader dashes can join, run a dp cutting algorithm and go - // on the cost. - int offset = static_cast(ceil(gap_iqr * 2)); - int min_step = static_cast(median_gap + median_width + 0.5); - int max_step = min_step + offset; - min_step -= offset; - // Pad the buffer with min_step/2 on each end. - int part_left = bounding_box_.left() - min_step / 2; - part_width += min_step; - DPPoint* projection = new DPPoint[part_width]; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - int left = blob->bounding_box().left(); - int right = blob->bounding_box().right(); - int height = blob->bounding_box().height(); - for (int x = left; x < right; ++x) { - projection[left - part_left].AddLocalCost(height); - } - } - DPPoint* best_end = DPPoint::Solve(min_step, max_step, false, - &DPPoint::CostWithVariance, - part_width, projection); - if (best_end != nullptr && best_end->total_cost() < blob_count) { - // Good enough. Call it a leader. - result = true; - bool modified_blob_list = false; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - TBOX box = blob->bounding_box(); - // If the first or last blob is spaced too much, don't mark it. - if (it.at_first()) { - int gap = it.data_relative(1)->bounding_box().left() - - blob->bounding_box().right(); - if (blob->bounding_box().width() + gap > max_step) { - it.extract(); - modified_blob_list = true; - continue; - } - } - if (it.at_last()) { - int gap = blob->bounding_box().left() - - it.data_relative(-1)->bounding_box().right(); - if (blob->bounding_box().width() + gap > max_step) { - it.extract(); - modified_blob_list = true; - break; - } - } - blob->set_region_type(BRT_TEXT); - blob->set_flow(BTFT_LEADER); - } - if (modified_blob_list) ComputeLimits(); - blob_type_ = BRT_TEXT; - flow_ = BTFT_LEADER; - } else if (textord_debug_tabfind) { - if (best_end == nullptr) { - tprintf("No path\n"); - } else { - tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(), - blob_count); - } - } - delete [] projection; - } - return result; -} - -// Given the result of TextlineProjection::EvaluateColPartition, (positive for -// horizontal text, negative for vertical text, and near zero for non-text), -// sets the blob_type_ and flow_ for this partition to indicate whether it -// is strongly or weakly vertical or horizontal text, or non-text. -// The function assumes that the blob neighbours are valid (from -// StrokeWidth::SetNeighbours) and that those neighbours have their -// region_type() set. -void ColPartition::SetRegionAndFlowTypesFromProjectionValue(int value) { - int blob_count = 0; // Total # blobs. - int good_blob_score_ = 0; // Total # good strokewidth neighbours. - int noisy_count = 0; // Total # neighbours marked as noise. - int hline_count = 0; - int vline_count = 0; - BLOBNBOX_C_IT it(&boxes_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - ++blob_count; - noisy_count += blob->NoisyNeighbours(); - good_blob_score_ += blob->GoodTextBlob(); - if (blob->region_type() == BRT_HLINE) ++hline_count; - if (blob->region_type() == BRT_VLINE) ++vline_count; - } - flow_ = BTFT_NEIGHBOURS; - blob_type_ = BRT_UNKNOWN; - if (hline_count > vline_count) { - flow_ = BTFT_NONE; - blob_type_ = BRT_HLINE; - } else if (vline_count > hline_count) { - flow_ = BTFT_NONE; - blob_type_ = BRT_VLINE; - } else if (value < -1 || 1 < value) { - int long_side; - int short_side; - if (value > 0) { - long_side = bounding_box_.width(); - short_side = bounding_box_.height(); - blob_type_ = BRT_TEXT; - } else { - long_side = bounding_box_.height(); - short_side = bounding_box_.width(); - blob_type_ = BRT_VERT_TEXT; - } - // We will combine the old metrics using aspect ratio and blob counts - // with the input value by allowing a strong indication to flip the - // STRONG_CHAIN/CHAIN flow values. - int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0; - if (short_side > kHorzStrongTextlineHeight) ++strong_score; - if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score; - if (abs(value) >= kMinStrongTextValue) - flow_ = BTFT_STRONG_CHAIN; - else if (abs(value) >= kMinChainTextValue) - flow_ = BTFT_CHAIN; - else - flow_ = BTFT_NEIGHBOURS; - // Upgrade chain to strong chain if the other indicators are good - if (flow_ == BTFT_CHAIN && strong_score == 3) - flow_ = BTFT_STRONG_CHAIN; - // Downgrade strong vertical text to chain if the indicators are bad. - if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2) - flow_ = BTFT_CHAIN; - } - if (flow_ == BTFT_NEIGHBOURS) { - // Check for noisy neighbours. - if (noisy_count >= blob_count) { - flow_ = BTFT_NONTEXT; - blob_type_= BRT_NOISE; - } - } - if (TabFind::WithinTestRegion(2, bounding_box_.left(), - bounding_box_.bottom())) { - tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,", - blob_count, noisy_count, good_blob_score_); - tprintf(" Projection value=%d, flow=%d, blob_type=%d\n", - value, flow_, blob_type_); - Print(); - } - SetBlobTypes(); -} - -// Sets all blobs with the partition blob type and flow, but never overwrite -// leader blobs, as we need to be able to identify them later. -void ColPartition::SetBlobTypes() { - if (!owns_blobs()) - return; - BLOBNBOX_C_IT it(&boxes_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - if (blob->flow() != BTFT_LEADER) - blob->set_flow(flow_); - blob->set_region_type(blob_type_); - ASSERT_HOST(blob->owner() == nullptr || blob->owner() == this); - } -} - -// Returns true if a decent baseline can be fitted through the blobs. -// Works for both horizontal and vertical text. -bool ColPartition::HasGoodBaseline() { - // Approximation of the baseline. - DetLineFit linepoints; - // Calculation of the mean height on this line segment. Note that these - // variable names apply to the context of a horizontal line, and work - // analogously, rather than literally in the case of a vertical line. - int total_height = 0; - int coverage = 0; - int height_count = 0; - int width = 0; - BLOBNBOX_C_IT it(&boxes_); - TBOX box(it.data()->bounding_box()); - // Accumulate points representing the baseline at the middle of each blob, - // but add an additional point for each end of the line. This makes it - // harder to fit a severe skew angle, as it is most likely not right. - if (IsVerticalType()) { - // For a vertical line, use the right side as the baseline. - ICOORD first_pt(box.right(), box.bottom()); - // Use the bottom-right of the first (bottom) box, the top-right of the - // last, and the middle-right of all others. - linepoints.Add(first_pt); - for (it.forward(); !it.at_last(); it.forward()) { - BLOBNBOX* blob = it.data(); - box = blob->bounding_box(); - ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2); - linepoints.Add(box_pt); - total_height += box.width(); - coverage += box.height(); - ++height_count; - } - box = it.data()->bounding_box(); - ICOORD last_pt(box.right(), box.top()); - linepoints.Add(last_pt); - width = last_pt.y() - first_pt.y(); - - } else { - // Horizontal lines use the bottom as the baseline. - TBOX box(it.data()->bounding_box()); - // Use the bottom-left of the first box, the the bottom-right of the last, - // and the middle of all others. - ICOORD first_pt(box.left(), box.bottom()); - linepoints.Add(first_pt); - for (it.forward(); !it.at_last(); it.forward()) { - BLOBNBOX* blob = it.data(); - box = blob->bounding_box(); - ICOORD box_pt((box.left() + box.right()) / 2, box.bottom()); - linepoints.Add(box_pt); - total_height += box.height(); - coverage += box.width(); - ++height_count; - } - box = it.data()->bounding_box(); - ICOORD last_pt(box.right(), box.bottom()); - linepoints.Add(last_pt); - width = last_pt.x() - first_pt.x(); - } - // Maximum median error allowed to be a good text line. - if (height_count == 0) - return false; - double max_error = kMaxBaselineError * total_height / height_count; - ICOORD start_pt, end_pt; - double error = linepoints.Fit(&start_pt, &end_pt); - return error < max_error && coverage >= kMinBaselineCoverage * width; -} - -// Adds this ColPartition to a matching WorkingPartSet if one can be found, -// otherwise starts a new one in the appropriate column, ending the previous. -void ColPartition::AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright, - int resolution, - ColPartition_LIST* used_parts, - WorkingPartSet_LIST* working_sets) { - if (block_owned_) - return; // Done it already. - block_owned_ = true; - WorkingPartSet_IT it(working_sets); - // If there is an upper partner use its working_set_ directly. - ColPartition* partner = SingletonPartner(true); - if (partner != nullptr && partner->working_set_ != nullptr) { - working_set_ = partner->working_set_; - working_set_->AddPartition(this); - return; - } - if (partner != nullptr && textord_debug_bugs) { - tprintf("Partition with partner has no working set!:"); - Print(); - partner->Print(); - } - // Search for the column that the left edge fits in. - WorkingPartSet* work_set = nullptr; - it.move_to_first(); - int col_index = 0; - for (it.mark_cycle_pt(); !it.cycled_list() && - col_index != first_column_; - it.forward(), ++col_index); - if (textord_debug_tabfind >= 2) { - tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between"); - Print(); - } - if (it.cycled_list() && textord_debug_bugs) { - tprintf("Target column=%d, only had %d\n", first_column_, col_index); - } - ASSERT_HOST(!it.cycled_list()); - work_set = it.data(); - // If last_column_ != first_column, then we need to scoop up all blocks - // between here and the last_column_ and put back in work_set. - if (!it.cycled_list() && last_column_ != first_column_ && !IsPulloutType()) { - // Find the column that the right edge falls in. - BLOCK_LIST completed_blocks; - TO_BLOCK_LIST to_blocks; - for (; !it.cycled_list() && col_index <= last_column_; - it.forward(), ++col_index) { - WorkingPartSet* end_set = it.data(); - end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, - &completed_blocks, &to_blocks); - } - work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks); - } - working_set_ = work_set; - work_set->AddPartition(this); -} - -// From the given block_parts list, builds one or more BLOCKs and -// corresponding TO_BLOCKs, such that the line spacing is uniform in each. -// Created blocks are appended to the end of completed_blocks and to_blocks. -// The used partitions are put onto used_parts, as they may still be referred -// to in the partition grid. bleft, tright and resolution are the bounds -// and resolution of the original image. -void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright, - int resolution, - ColPartition_LIST* block_parts, - ColPartition_LIST* used_parts, - BLOCK_LIST* completed_blocks, - TO_BLOCK_LIST* to_blocks) { - int page_height = tright.y() - bleft.y(); - // Compute the initial spacing stats. - ColPartition_IT it(block_parts); - int part_count = 0; - int max_line_height = 0; - - // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type - // because their line spacing with their neighbors maybe smaller and their - // height may be slightly larger. - - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* part = it.data(); - ASSERT_HOST(!part->boxes()->empty()); - STATS side_steps(0, part->bounding_box().height()); - if (part->bounding_box().height() > max_line_height) - max_line_height = part->bounding_box().height(); - BLOBNBOX_C_IT blob_it(part->boxes()); - int prev_bottom = blob_it.data()->bounding_box().bottom(); - for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - int bottom = blob->bounding_box().bottom(); - int step = bottom - prev_bottom; - if (step < 0) - step = -step; - side_steps.add(step, 1); - prev_bottom = bottom; - } - part->set_side_step(static_cast(side_steps.median() + 0.5)); - if (!it.at_last()) { - ColPartition* next_part = it.data_relative(1); - part->set_bottom_spacing(part->median_bottom() - - next_part->median_bottom()); - part->set_top_spacing(part->median_top() - next_part->median_top()); - } else { - part->set_bottom_spacing(page_height); - part->set_top_spacing(page_height); - } - if (textord_debug_tabfind) { - part->Print(); - tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n", - side_steps.median(), part->top_spacing(), part->bottom_spacing()); - } - ++part_count; - } - if (part_count == 0) - return; - - SmoothSpacings(resolution, page_height, block_parts); - - // Move the partitions into individual block lists and make the blocks. - BLOCK_IT block_it(completed_blocks); - TO_BLOCK_IT to_block_it(to_blocks); - ColPartition_LIST spacing_parts; - ColPartition_IT sp_block_it(&spacing_parts); - int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing; - for (it.mark_cycle_pt(); !it.empty();) { - ColPartition* part = it.extract(); - sp_block_it.add_to_end(part); - it.forward(); - if (it.empty() || part->bottom_spacing() > same_block_threshold || - !part->SpacingsEqual(*it.data(), resolution)) { - // There is a spacing boundary. Check to see if it.data() belongs - // better in the current block or the next one. - if (!it.empty() && part->bottom_spacing() <= same_block_threshold) { - ColPartition* next_part = it.data(); - // If there is a size match one-way, then the middle line goes with - // its matched size, otherwise it goes with the smallest spacing. - ColPartition* third_part = it.at_last() ? nullptr : it.data_relative(1); - if (textord_debug_tabfind) { - tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d," - " sizes %d %d %d\n", - part->top_spacing(), part->bottom_spacing(), - next_part->top_spacing(), next_part->bottom_spacing(), - part->median_height(), next_part->median_height(), - third_part != nullptr ? third_part->median_height() : 0); - } - // We can only consider adding the next line to the block if the sizes - // match and the lines are close enough for their size. - if (part->SizesSimilar(*next_part) && - next_part->median_height() * kMaxSameBlockLineSpacing > - part->bottom_spacing() && - part->median_height() * kMaxSameBlockLineSpacing > - part->top_spacing()) { - // Even now, we can only add it as long as the third line doesn't - // match in the same way and have a smaller bottom spacing. - if (third_part == nullptr || - !next_part->SizesSimilar(*third_part) || - third_part->median_height() * kMaxSameBlockLineSpacing <= - next_part->bottom_spacing() || - next_part->median_height() * kMaxSameBlockLineSpacing <= - next_part->top_spacing() || - next_part->bottom_spacing() > part->bottom_spacing()) { - // Add to the current block. - sp_block_it.add_to_end(it.extract()); - it.forward(); - if (textord_debug_tabfind) { - tprintf("Added line to current block.\n"); - } - } - } - } - TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts); - if (to_block != nullptr) { - to_block_it.add_to_end(to_block); - block_it.add_to_end(to_block->block); - } - sp_block_it.set_to_list(&spacing_parts); - } else { - if (textord_debug_tabfind && !it.empty()) { - ColPartition* next_part = it.data(); - tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n", - part->top_spacing(), part->bottom_spacing(), - next_part->top_spacing(), next_part->bottom_spacing(), - part->median_height(), next_part->median_height()); - } - } - } -} - -// Helper function to clip the input pos to the given bleft, tright bounds. -static void ClipCoord(const ICOORD& bleft, const ICOORD& tright, ICOORD* pos) { - if (pos->x() < bleft.x()) - pos->set_x(bleft.x()); - if (pos->x() > tright.x()) - pos->set_x(tright.x()); - if (pos->y() < bleft.y()) - pos->set_y(bleft.y()); - if (pos->y() > tright.y()) - pos->set_y(tright.y()); -} - -// Helper moves the blobs from the given list of block_parts into the block -// itself. Sets up the block for (old) textline formation correctly for -// vertical and horizontal text. The partitions are moved to used_parts -// afterwards, as they cannot be deleted yet. -static TO_BLOCK* MoveBlobsToBlock(bool vertical_text, int line_spacing, - BLOCK* block, - ColPartition_LIST* block_parts, - ColPartition_LIST* used_parts) { - // Make a matching TO_BLOCK and put all the BLOBNBOXes from the parts in it. - // Move all the parts to a done list as they are no longer needed, except - // that have have to continue to exist until the part grid is deleted. - // Compute the median blob size as we go, as the block needs to know. - TBOX block_box(block->pdblk.bounding_box()); - STATS sizes(0, std::max(block_box.width(), block_box.height())); - bool text_type = block->pdblk.poly_block()->IsText(); - ColPartition_IT it(block_parts); - TO_BLOCK* to_block = new TO_BLOCK(block); - BLOBNBOX_IT blob_it(&to_block->blobs); - ColPartition_IT used_it(used_parts); - for (it.move_to_first(); !it.empty(); it.forward()) { - ColPartition* part = it.extract(); - // Transfer blobs from all regions to the output blocks. - // Blobs for non-text regions will be used to define the polygonal - // bounds of the region. - for (BLOBNBOX_C_IT bb_it(part->boxes()); !bb_it.empty(); - bb_it.forward()) { - BLOBNBOX* bblob = bb_it.extract(); - if (bblob->owner() != part) { - tprintf("Ownership incorrect for blob:"); - bblob->bounding_box().print(); - tprintf("Part="); - part->Print(); - if (bblob->owner() == nullptr) { - tprintf("Not owned\n"); - } else { - tprintf("Owner part:"); - bblob->owner()->Print(); - } - } - ASSERT_HOST(bblob->owner() == part); - // Assert failure here is caused by arbitrarily changing the partition - // type without also changing the blob type, such as in - // InsertSmallBlobsAsUnknowns. - ASSERT_HOST(!text_type || bblob->region_type() >= BRT_UNKNOWN); - C_OUTLINE_LIST* outlines = bblob->cblob()->out_list(); - C_OUTLINE_IT ol_it(outlines); - ASSERT_HOST(!text_type || ol_it.data()->pathlength() > 0); - if (vertical_text) - sizes.add(bblob->bounding_box().width(), 1); - else - sizes.add(bblob->bounding_box().height(), 1); - blob_it.add_after_then_move(bblob); - } - used_it.add_to_end(part); - } - if (text_type && blob_it.empty()) { - delete block; - delete to_block; - return nullptr; - } - to_block->line_size = sizes.median(); - if (vertical_text) { - int block_width = block->pdblk.bounding_box().width(); - if (block_width < line_spacing) - line_spacing = block_width; - to_block->line_spacing = static_cast(line_spacing); - to_block->max_blob_size = static_cast(block_width + 1); - } else { - int block_height = block->pdblk.bounding_box().height(); - if (block_height < line_spacing) - line_spacing = block_height; - to_block->line_spacing = static_cast(line_spacing); - to_block->max_blob_size = static_cast(block_height + 1); - } - return to_block; -} - -// Constructs a block from the given list of partitions. -// Arguments are as LineSpacingBlocks above. -TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright, - ColPartition_LIST* block_parts, - ColPartition_LIST* used_parts) { - if (block_parts->empty()) - return nullptr; // Nothing to do. - // If the block_parts are not in reading order, then it will make an invalid - // block polygon and bounding_box, so sort by bounding box now just to make - // sure. - block_parts->sort(&ColPartition::SortByBBox); - ColPartition_IT it(block_parts); - ColPartition* part = it.data(); - PolyBlockType type = part->type(); - if (type == PT_VERTICAL_TEXT) - return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts); - // LineSpacingBlocks has handed us a collection of evenly spaced lines and - // put the average spacing in each partition, so we can just take the - // linespacing from the first partition. - int line_spacing = part->bottom_spacing(); - if (line_spacing < part->median_height()) - line_spacing = part->bounding_box().height(); - ICOORDELT_LIST vertices; - ICOORDELT_IT vert_it(&vertices); - ICOORD start, end; - int min_x = INT32_MAX; - int max_x = -INT32_MAX; - int min_y = INT32_MAX; - int max_y = -INT32_MAX; - int iteration = 0; - do { - if (iteration == 0) - ColPartition::LeftEdgeRun(&it, &start, &end); - else - ColPartition::RightEdgeRun(&it, &start, &end); - ClipCoord(bleft, tright, &start); - ClipCoord(bleft, tright, &end); - vert_it.add_after_then_move(new ICOORDELT(start)); - vert_it.add_after_then_move(new ICOORDELT(end)); - UpdateRange(start.x(), &min_x, &max_x); - UpdateRange(end.x(), &min_x, &max_x); - UpdateRange(start.y(), &min_y, &max_y); - UpdateRange(end.y(), &min_y, &max_y); - if ((iteration == 0 && it.at_first()) || - (iteration == 1 && it.at_last())) { - ++iteration; - it.move_to_last(); - } - } while (iteration < 2); - if (textord_debug_tabfind) - tprintf("Making block at (%d,%d)->(%d,%d)\n", - min_x, min_y, max_x, max_y); - BLOCK* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y); - block->pdblk.set_poly_block(new POLY_BLOCK(&vertices, type)); - return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts); -} - -// Constructs a block from the given list of vertical text partitions. -// Currently only creates rectangular blocks. -TO_BLOCK* ColPartition::MakeVerticalTextBlock(const ICOORD& bleft, - const ICOORD& tright, - ColPartition_LIST* block_parts, - ColPartition_LIST* used_parts) { - if (block_parts->empty()) - return nullptr; // Nothing to do. - ColPartition_IT it(block_parts); - ColPartition* part = it.data(); - TBOX block_box = part->bounding_box(); - int line_spacing = block_box.width(); - PolyBlockType type = it.data()->type(); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - block_box += it.data()->bounding_box(); - } - if (textord_debug_tabfind) { - tprintf("Making block at:"); - block_box.print(); - } - BLOCK* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(), - block_box.right(), block_box.top()); - block->pdblk.set_poly_block(new POLY_BLOCK(block_box, type)); - return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts); -} - -// Makes a TO_ROW matching this and moves all the blobs to it, transferring -// ownership to to returned TO_ROW. -TO_ROW* ColPartition::MakeToRow() { - BLOBNBOX_C_IT blob_it(&boxes_); - TO_ROW* row = nullptr; - int line_size = IsVerticalType() ? median_width_ : median_height_; - // Add all the blobs to a single TO_ROW. - for (; !blob_it.empty(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.extract(); -// blob->compute_bounding_box(); - int top = blob->bounding_box().top(); - int bottom = blob->bounding_box().bottom(); - if (row == nullptr) { - row = new TO_ROW(blob, static_cast(top), - static_cast(bottom), - static_cast(line_size)); - } else { - row->add_blob(blob, static_cast(top), - static_cast(bottom), - static_cast(line_size)); - } - } - return row; -} - -// Returns a copy of everything except the list of boxes. The resulting -// ColPartition is only suitable for keeping in a column candidate list. -ColPartition* ColPartition::ShallowCopy() const { - ColPartition* part = new ColPartition(blob_type_, vertical_); - part->left_margin_ = left_margin_; - part->right_margin_ = right_margin_; - part->bounding_box_ = bounding_box_; - memcpy(part->special_blobs_densities_, special_blobs_densities_, - sizeof(special_blobs_densities_)); - part->median_bottom_ = median_bottom_; - part->median_top_ = median_top_; - part->median_height_ = median_height_; - part->median_left_ = median_left_; - part->median_right_ = median_right_; - part->median_width_ = median_width_; - part->good_width_ = good_width_; - part->good_column_ = good_column_; - part->left_key_tab_ = left_key_tab_; - part->right_key_tab_ = right_key_tab_; - part->type_ = type_; - part->flow_ = flow_; - part->left_key_ = left_key_; - part->right_key_ = right_key_; - part->first_column_ = first_column_; - part->last_column_ = last_column_; - part->owns_blobs_ = false; - return part; -} - -ColPartition* ColPartition::CopyButDontOwnBlobs() { - ColPartition* copy = ShallowCopy(); - copy->set_owns_blobs(false); - BLOBNBOX_C_IT inserter(copy->boxes()); - BLOBNBOX_C_IT traverser(boxes()); - for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward()) - inserter.add_after_then_move(traverser.data()); - return copy; -} - -#ifndef GRAPHICS_DISABLED -// Provides a color for BBGrid to draw the rectangle. -// Must be kept in sync with PolyBlockType. -ScrollView::Color ColPartition::BoxColor() const { - if (type_ == PT_UNKNOWN) - return BLOBNBOX::TextlineColor(blob_type_, flow_); - return POLY_BLOCK::ColorForPolyBlockType(type_); -} -#endif // GRAPHICS_DISABLED - -// Keep in sync with BlobRegionType. -static char kBlobTypes[BRT_COUNT + 1] = "NHSRIUVT"; - -// Prints debug information on this. -void ColPartition::Print() const { - int y = MidY(); - tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)" - " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d" - " ts=%d bs=%d ls=%d rs=%d\n", - boxes_.empty() ? 'E' : ' ', - left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y), - bounding_box_.left(), median_left_, - bounding_box_.bottom(), median_bottom_, - bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B', - right_margin_, median_right_, bounding_box_.top(), median_top_, - good_width_, good_column_, type_, - kBlobTypes[blob_type_], flow_, - first_column_, last_column_, boxes_.length(), - space_above_, space_below_, space_to_left_, space_to_right_); -} - -// Prints debug information on the colors. -void ColPartition::PrintColors() { - tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", - color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE], - color1_[L_ALPHA_CHANNEL], - color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]); -} - -// Sets the types of all partitions in the run to be the max of the types. -void ColPartition::SmoothPartnerRun(int working_set_count) { - STATS left_stats(0, working_set_count); - STATS right_stats(0, working_set_count); - PolyBlockType max_type = type_; - ColPartition* partner; - for (partner = SingletonPartner(false); partner != nullptr; - partner = partner->SingletonPartner(false)) { - if (partner->type_ > max_type) - max_type = partner->type_; - if (column_set_ == partner->column_set_) { - left_stats.add(partner->first_column_, 1); - right_stats.add(partner->last_column_, 1); - } - } - type_ = max_type; - // TODO(rays) Either establish that it isn't necessary to set the columns, - // or find a way to do it that does not cause an assert failure in - // AddToWorkingSet. -#if 0 - first_column_ = left_stats.mode(); - last_column_ = right_stats.mode(); - if (last_column_ < first_column_) - last_column_ = first_column_; -#endif - - for (partner = SingletonPartner(false); partner != nullptr; - partner = partner->SingletonPartner(false)) { - partner->type_ = max_type; -#if 0 // See TODO above - if (column_set_ == partner->column_set_) { - partner->first_column_ = first_column_; - partner->last_column_ = last_column_; - } -#endif - } -} - -// ======= Scenario common to all Refine*Partners* functions ======= -// ColPartitions are aiming to represent textlines, or horizontal slices -// of images, and we are trying to form bi-directional (upper/lower) chains -// of UNIQUE partner ColPartitions that can be made into blocks. -// The ColPartitions have previously been typed (see SetPartitionType) -// according to a combination of the content type and -// how they lie on the columns. We want to chain text into -// groups of a single type, but image ColPartitions may have been typed -// differently in different parts of the image, due to being non-rectangular. -// -// We previously ran a search for upper and lower partners, but there may -// be more than one, and they may be of mixed types, so now we wish to -// refine the partners down to at most one. -// A heading may have multiple partners: -// =============================== -// ======== ========== ========= -// ======== ========== ========= -// but it should be a different type. -// A regular flowing text line may have multiple partners: -// ================== =================== -// ======= ================= =========== -// This could be the start of a pull-out, or it might all be in a single -// column and might be caused by tightly spaced text, bold words, bullets, -// funny punctuation etc, all of which can cause textlines to be split into -// multiple ColPartitions. Pullouts and figure captions should now be different -// types so we can more aggressively merge groups of partners that all sit -// in a single column. -// -// Cleans up the partners of the given type so that there is at most -// one partner. This makes block creation simpler. -// If get_desperate is true, goes to more desperate merge methods -// to merge flowing text before breaking partnerships. -void ColPartition::RefinePartners(PolyBlockType type, bool get_desperate, - ColPartitionGrid* grid) { - if (TypesSimilar(type_, type)) { - RefinePartnersInternal(true, get_desperate, grid); - RefinePartnersInternal(false, get_desperate, grid); - } else if (type == PT_COUNT) { - // This is the final pass. Make sure only the correctly typed - // partners surivive, however many there are. - RefinePartnersByType(true, &upper_partners_); - RefinePartnersByType(false, &lower_partners_); - // It is possible for a merge to have given a partition multiple - // partners again, so the last resort is to use overlap which is - // guaranteed to leave at most one partner left. - if (!upper_partners_.empty() && !upper_partners_.singleton()) - RefinePartnersByOverlap(true, &upper_partners_); - if (!lower_partners_.empty() && !lower_partners_.singleton()) - RefinePartnersByOverlap(false, &lower_partners_); - } -} - -////////////////// PRIVATE CODE ///////////////////////////// - -// Cleans up the partners above if upper is true, else below. -// If get_desperate is true, goes to more desperate merge methods -// to merge flowing text before breaking partnerships. -void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate, - ColPartitionGrid* grid) { - ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_; - if (!partners->empty() && !partners->singleton()) { - RefinePartnersByType(upper, partners); - if (!partners->empty() && !partners->singleton()) { - // Check for transitive partnerships and break the cycle. - RefinePartnerShortcuts(upper, partners); - if (!partners->empty() && !partners->singleton()) { - // Types didn't fix it. Flowing text keeps the one with the longest - // sequence of singleton matching partners. All others max overlap. - if (TypesSimilar(type_, PT_FLOWING_TEXT) && get_desperate) { - RefineTextPartnersByMerge(upper, false, partners, grid); - if (!partners->empty() && !partners->singleton()) - RefineTextPartnersByMerge(upper, true, partners, grid); - } - // The last resort is to use overlap. - if (!partners->empty() && !partners->singleton()) - RefinePartnersByOverlap(upper, partners); - } - } - } -} - -// Cleans up the partners above if upper is true, else below. -// Restricts the partners to only desirable types. For text and BRT_HLINE this -// means the same type_ , and for image types it means any image type. -void ColPartition::RefinePartnersByType(bool upper, - ColPartition_CLIST* partners) { - bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), - bounding_box_.bottom()); - if (debug) { - tprintf("Refining %d %s partners by type for:\n", - partners->length(), upper ? "Upper" : "Lower"); - Print(); - } - ColPartition_C_IT it(partners); - // Purify text by type. - if (!IsImageType() && !IsLineType() && type() != PT_TABLE) { - // Keep only partners matching type_. - // Exception: PT_VERTICAL_TEXT is allowed to stay with the other - // text types if it is the only partner. - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* partner = it.data(); - if (!TypesSimilar(type_, partner->type_)) { - if (debug) { - tprintf("Removing partner:"); - partner->Print(); - } - partner->RemovePartner(!upper, this); - it.extract(); - } else if (debug) { - tprintf("Keeping partner:"); - partner->Print(); - } - } - } else { - // Only polyimages are allowed to have partners of any kind! - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* partner = it.data(); - if (partner->blob_type() != BRT_POLYIMAGE || - blob_type() != BRT_POLYIMAGE) { - if (debug) { - tprintf("Removing partner:"); - partner->Print(); - } - partner->RemovePartner(!upper, this); - it.extract(); - } else if (debug) { - tprintf("Keeping partner:"); - partner->Print(); - } - } - } -} - -// Cleans up the partners above if upper is true, else below. -// Remove transitive partnerships: this<->a, and a<->b and this<->b. -// Gets rid of this<->b, leaving a clean chain. -// Also if we have this<->a and a<->this, then gets rid of this<->a, as -// this has multiple partners. -void ColPartition::RefinePartnerShortcuts(bool upper, - ColPartition_CLIST* partners) { - bool done_any = false; - do { - done_any = false; - ColPartition_C_IT it(partners); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* a = it.data(); - // Check for a match between all of a's partners (it1/b1) and all - // of this's partners (it2/b2). - ColPartition_C_IT it1(upper ? &a->upper_partners_ : &a->lower_partners_); - for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) { - ColPartition* b1 = it1.data(); - if (b1 == this) { - done_any = true; - it.extract(); - a->RemovePartner(!upper, this); - break; - } - ColPartition_C_IT it2(partners); - for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) { - ColPartition* b2 = it2.data(); - if (b1 == b2) { - // Jackpot! b2 should not be a partner of this. - it2.extract(); - b2->RemovePartner(!upper, this); - done_any = true; - // That potentially invalidated all the iterators, so break out - // and start again. - break; - } - } - if (done_any) - break; - } - if (done_any) - break; - } - } while (done_any && !partners->empty() && !partners->singleton()); -} - -// Cleans up the partners above if upper is true, else below. -// If multiple text partners can be merged, (with each other, NOT with this), -// then do so. -// If desperate is true, then an increase in overlap with the merge is -// allowed. If the overlap increases, then the desperately_merged_ flag -// is set, indicating that the textlines probably need to be regenerated -// by aggressive line fitting/splitting, as there are probably vertically -// joined blobs that cross textlines. -void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate, - ColPartition_CLIST* partners, - ColPartitionGrid* grid) { - bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), - bounding_box_.bottom()); - if (debug) { - tprintf("Refining %d %s partners by merge for:\n", - partners->length(), upper ? "Upper" : "Lower"); - Print(); - } - while (!partners->empty() && !partners->singleton()) { - // Absorb will mess up the iterators, so we have to merge one partition - // at a time and rebuild the iterators each time. - ColPartition_C_IT it(partners); - ColPartition* part = it.data(); - // Gather a list of merge candidates, from the list of partners, that - // are all in the same single column. See general scenario comment above. - ColPartition_CLIST candidates; - ColPartition_C_IT cand_it(&candidates); - for (it.forward(); !it.at_first(); it.forward()) { - ColPartition* candidate = it.data(); - if (part->first_column_ == candidate->last_column_ && - part->last_column_ == candidate->first_column_) - cand_it.add_after_then_move(it.data()); - } - int overlap_increase; - ColPartition* candidate = grid->BestMergeCandidate(part, &candidates, debug, - nullptr, &overlap_increase); - if (candidate != nullptr && (overlap_increase <= 0 || desperate)) { - if (debug) { - tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", - part->HCoreOverlap(*candidate), part->VCoreOverlap(*candidate), - overlap_increase); - } - // Remove before merge and re-insert to keep the integrity of the grid. - grid->RemoveBBox(candidate); - grid->RemoveBBox(part); - part->Absorb(candidate, nullptr); - // We modified the box of part, so re-insert it into the grid. - grid->InsertBBox(true, true, part); - if (overlap_increase > 0) - part->desperately_merged_ = true; - } else { - break; // Can't merge. - } - } -} - -// Cleans up the partners above if upper is true, else below. -// Keep the partner with the biggest overlap. -void ColPartition::RefinePartnersByOverlap(bool upper, - ColPartition_CLIST* partners) { - bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), - bounding_box_.bottom()); - if (debug) { - tprintf("Refining %d %s partners by overlap for:\n", - partners->length(), upper ? "Upper" : "Lower"); - Print(); - } - ColPartition_C_IT it(partners); - ColPartition* best_partner = it.data(); - // Find the partner with the best overlap. - int best_overlap = 0; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* partner = it.data(); - int overlap = std::min(bounding_box_.right(), partner->bounding_box_.right()) - - std::max(bounding_box_.left(), partner->bounding_box_.left()); - if (overlap > best_overlap) { - best_overlap = overlap; - best_partner = partner; - } - } - // Keep only the best partner. - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* partner = it.data(); - if (partner != best_partner) { - if (debug) { - tprintf("Removing partner:"); - partner->Print(); - } - partner->RemovePartner(!upper, this); - it.extract(); - } - } -} - -// Return true if bbox belongs better in this than other. -bool ColPartition::ThisPartitionBetter(BLOBNBOX* bbox, - const ColPartition& other) { - const TBOX& box = bbox->bounding_box(); - // Margins take priority. - int left = box.left(); - int right = box.right(); - if (left < left_margin_ || right > right_margin_) - return false; - if (left < other.left_margin_ || right > other.right_margin_) - return true; - int top = box.top(); - int bottom = box.bottom(); - int this_overlap = std::min(top, median_top_) - std::max(bottom, median_bottom_); - int other_overlap = std::min(top, other.median_top_) - - std::max(bottom, other.median_bottom_); - int this_miss = median_top_ - median_bottom_ - this_overlap; - int other_miss = other.median_top_ - other.median_bottom_ - other_overlap; - if (TabFind::WithinTestRegion(3, box.left(), box.bottom())) { - tprintf("Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n", - box.left(), box.bottom(), box.right(), box.top(), - this_overlap, other_overlap, this_miss, other_miss, - median_top_, other.median_top_); - } - if (this_miss < other_miss) - return true; - if (this_miss > other_miss) - return false; - if (this_overlap > other_overlap) - return true; - if (this_overlap < other_overlap) - return false; - return median_top_ >= other.median_top_; -} - -// Returns the median line-spacing between the current position and the end -// of the list. -// The iterator is passed by value so the iteration does not modify the -// caller's iterator. -static int MedianSpacing(int page_height, ColPartition_IT it) { - STATS stats(0, page_height); - while (!it.cycled_list()) { - ColPartition* part = it.data(); - it.forward(); - stats.add(part->bottom_spacing(), 1); - stats.add(part->top_spacing(), 1); - } - return static_cast(stats.median() + 0.5); -} - -// Returns true if this column partition is in the same column as -// part. This function will only work after the SetPartitionType function -// has been called on both column partitions. This is useful for -// doing a SideSearch when you want things in the same page column. -// -// Currently called by the table detection code to identify if potential table -// partitions exist in the same column. -bool ColPartition::IsInSameColumnAs(const ColPartition& part) const { - // Overlap does not occur when last < part.first or first > part.last. - // In other words, one is completely to the side of the other. - // This is just DeMorgan's law applied to that so the function returns true. - return (last_column_ >= part.first_column_) && - (first_column_ <= part.last_column_); -} - -// Smoothes the spacings in the list into groups of equal linespacing. -// resolution is the resolution of the original image, used as a basis -// for thresholds in change of spacing. page_height is in pixels. -void ColPartition::SmoothSpacings(int resolution, int page_height, - ColPartition_LIST* parts) { - // The task would be trivial if we didn't have to allow for blips - - // occasional offsets in spacing caused by anomalous text, such as all - // caps, groups of descenders, joined words, Arabic etc. - // The neighbourhood stores a consecutive group of partitions so that - // blips can be detected correctly, yet conservatively enough to not - // mistake genuine spacing changes for blips. See example below. - ColPartition* neighbourhood[PN_COUNT]; - ColPartition_IT it(parts); - it.mark_cycle_pt(); - // Although we know nothing about the spacings is this list, the median is - // used as an approximation to allow blips. - // If parts of this block aren't spaced to the median, then we can't - // accept blips in those parts, but we'll recalculate it each time we - // split the block, so the median becomes more likely to match all the text. - int median_space = MedianSpacing(page_height, it); - ColPartition_IT start_it(it); - ColPartition_IT end_it(it); - for (int i = 0; i < PN_COUNT; ++i) { - if (i < PN_UPPER || it.cycled_list()) { - neighbourhood[i] = nullptr; - } else { - if (i == PN_LOWER) - end_it = it; - neighbourhood[i] = it.data(); - it.forward(); - } - } - while (neighbourhood[PN_UPPER] != nullptr) { - // Test for end of a group. Normally SpacingsEqual is true within a group, - // but in the case of a blip, it will be false. Here is an example: - // Line enum Spacing below (spacing between tops of lines) - // 1 ABOVE2 20 - // 2 ABOVE1 20 - // 3 UPPER 15 - // 4 LOWER 25 - // 5 BELOW1 20 - // 6 BELOW2 20 - // Line 4 is all in caps (regular caps), so the spacing between line 3 - // and line 4 (looking at the tops) is smaller than normal, and the - // spacing between line 4 and line 5 is larger than normal, but the - // two of them add to twice the normal spacing. - // The following if has to accept unequal spacings 3 times to pass the - // blip (20/15, 15/25 and 25/20) - // When the blip is in the middle, OKSpacingBlip tests that one of - // ABOVE1 and BELOW1 matches the median. - // The first time, everything is shifted down 1, so we present - // OKSpacingBlip with neighbourhood+1 and check that PN_UPPER is median. - // The last time, everything is shifted up 1, so we present OKSpacingBlip - // with neighbourhood-1 and check that PN_LOWER matches the median. - if (neighbourhood[PN_LOWER] == nullptr || - (!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER], - resolution) && - !OKSpacingBlip(resolution, median_space, neighbourhood) && - (!OKSpacingBlip(resolution, median_space, neighbourhood - 1) || - !neighbourhood[PN_LOWER]->SpacingEqual(median_space, resolution)) && - (!OKSpacingBlip(resolution, median_space, neighbourhood + 1) || - !neighbourhood[PN_UPPER]->SpacingEqual(median_space, resolution)))) { - // The group has ended. PN_UPPER is the last member. - // Compute the mean spacing over the group. - ColPartition_IT sum_it(start_it); - ColPartition* last_part = neighbourhood[PN_UPPER]; - double total_bottom = 0.0; - double total_top = 0.0; - int total_count = 0; - ColPartition* upper = sum_it.data(); - // We do not process last_part, as its spacing is different. - while (upper != last_part) { - total_bottom += upper->bottom_spacing(); - total_top += upper->top_spacing(); - ++total_count; - sum_it.forward(); - upper = sum_it.data(); - } - if (total_count > 0) { - // There were at least 2 lines, so set them all to the mean. - int top_spacing = static_cast(total_top / total_count + 0.5); - int bottom_spacing = static_cast(total_bottom / total_count + 0.5); - if (textord_debug_tabfind) { - tprintf("Spacing run ended. Cause:"); - if (neighbourhood[PN_LOWER] == nullptr) { - tprintf("No more lines\n"); - } else { - tprintf("Spacing change. Spacings:\n"); - for (int i = 0; i < PN_COUNT; ++i) { - if (neighbourhood[i] == nullptr) { - tprintf("NULL"); - if (i > 0 && neighbourhood[i - 1] != nullptr) { - if (neighbourhood[i - 1]->SingletonPartner(false) != nullptr) { - tprintf(" Lower partner:"); - neighbourhood[i - 1]->SingletonPartner(false)->Print(); - } else { - tprintf(" nullptr lower partner:\n"); - } - } else { - tprintf("\n"); - } - } else { - tprintf("Top = %d, bottom = %d\n", - neighbourhood[i]->top_spacing(), - neighbourhood[i]->bottom_spacing()); - } - } - } - tprintf("Mean spacing = %d/%d\n", top_spacing, bottom_spacing); - } - sum_it = start_it; - upper = sum_it.data(); - while (upper != last_part) { - upper->set_top_spacing(top_spacing); - upper->set_bottom_spacing(bottom_spacing); - if (textord_debug_tabfind) { - tprintf("Setting mean on:"); - upper->Print(); - } - sum_it.forward(); - upper = sum_it.data(); - } - } - // PN_LOWER starts the next group and end_it is the next start_it. - start_it = end_it; - // Recalculate the median spacing to maximize the chances of detecting - // spacing blips. - median_space = MedianSpacing(page_height, end_it); - } - // Shuffle pointers. - for (int j = 1; j < PN_COUNT; ++j) { - neighbourhood[j - 1] = neighbourhood[j]; - } - if (it.cycled_list()) { - neighbourhood[PN_COUNT - 1] = nullptr; - } else { - neighbourhood[PN_COUNT - 1] = it.data(); - it.forward(); - } - end_it.forward(); - } -} - -// Returns true if the parts array of pointers to partitions matches the -// condition for a spacing blip. See SmoothSpacings for what this means -// and how it is used. -bool ColPartition::OKSpacingBlip(int resolution, int median_spacing, - ColPartition** parts) { - if (parts[PN_UPPER] == nullptr || parts[PN_LOWER] == nullptr) - return false; - // The blip is OK if upper and lower sum to an OK value and at least - // one of above1 and below1 is equal to the median. - return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER], - median_spacing, resolution) && - ((parts[PN_ABOVE1] != nullptr && - parts[PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) || - (parts[PN_BELOW1] != nullptr && - parts[PN_BELOW1]->SpacingEqual(median_spacing, resolution))); -} - -// Returns true if both the top and bottom spacings of this match the given -// spacing to within suitable margins dictated by the image resolution. -bool ColPartition::SpacingEqual(int spacing, int resolution) const { - int bottom_error = BottomSpacingMargin(resolution); - int top_error = TopSpacingMargin(resolution); - return NearlyEqual(bottom_spacing_, spacing, bottom_error) && - NearlyEqual(top_spacing_, spacing, top_error); -} - -// Returns true if both the top and bottom spacings of this and other -// match to within suitable margins dictated by the image resolution. -bool ColPartition::SpacingsEqual(const ColPartition& other, - int resolution) const { - int bottom_error = std::max(BottomSpacingMargin(resolution), - other.BottomSpacingMargin(resolution)); - int top_error = std::max(TopSpacingMargin(resolution), - other.TopSpacingMargin(resolution)); - return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) && - (NearlyEqual(top_spacing_, other.top_spacing_, top_error) || - NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2, - bottom_error)); -} - -// Returns true if the sum spacing of this and other match the given -// spacing (or twice the given spacing) to within a suitable margin dictated -// by the image resolution. -bool ColPartition::SummedSpacingOK(const ColPartition& other, - int spacing, int resolution) const { - int bottom_error = std::max(BottomSpacingMargin(resolution), - other.BottomSpacingMargin(resolution)); - int top_error = std::max(TopSpacingMargin(resolution), - other.TopSpacingMargin(resolution)); - int bottom_total = bottom_spacing_ + other.bottom_spacing_; - int top_total = top_spacing_ + other.top_spacing_; - return (NearlyEqual(spacing, bottom_total, bottom_error) && - NearlyEqual(spacing, top_total, top_error)) || - (NearlyEqual(spacing * 2, bottom_total, bottom_error) && - NearlyEqual(spacing * 2, top_total, top_error)); -} - -// Returns a suitable spacing margin that can be applied to bottoms of -// text lines, based on the resolution and the stored side_step_. -int ColPartition::BottomSpacingMargin(int resolution) const { - return static_cast(kMaxSpacingDrift * resolution + 0.5) + side_step_; -} - -// Returns a suitable spacing margin that can be applied to tops of -// text lines, based on the resolution and the stored side_step_. -int ColPartition::TopSpacingMargin(int resolution) const { - return static_cast(kMaxTopSpacingFraction * median_height_ + 0.5) + - BottomSpacingMargin(resolution); -} - -// Returns true if the median text sizes of this and other agree to within -// a reasonable multiplicative factor. -bool ColPartition::SizesSimilar(const ColPartition& other) const { - return median_height_ <= other.median_height_ * kMaxSizeRatio && - other.median_height_ <= median_height_ * kMaxSizeRatio; -} - -// Helper updates margin_left and margin_right, being the bounds of the left -// margin of part of a block. Returns false and does not update the bounds if -// this partition has a disjoint margin with the established margin. -static bool UpdateLeftMargin(const ColPartition& part, - int* margin_left, int* margin_right) { - const TBOX& part_box = part.bounding_box(); - int top = part_box.top(); - int bottom = part_box.bottom(); - int tl_key = part.SortKey(part.left_margin(), top); - int tr_key = part.SortKey(part_box.left(), top); - int bl_key = part.SortKey(part.left_margin(), bottom); - int br_key = part.SortKey(part_box.left(), bottom); - int left_key = std::max(tl_key, bl_key); - int right_key = std::min(tr_key, br_key); - if (left_key <= *margin_right && right_key >= *margin_left) { - // This part is good - let's keep it. - *margin_right = std::min(*margin_right, right_key); - *margin_left = std::max(*margin_left, left_key); - return true; - } - return false; -} - -// Computes and returns in start, end a line segment formed from a -// forwards-iterated group of left edges of partitions that satisfy the -// condition that the intersection of the left margins is non-empty, ie the -// rightmost left margin is to the left of the leftmost left bounding box edge. -// On return the iterator is set to the start of the next run. -void ColPartition::LeftEdgeRun(ColPartition_IT* part_it, - ICOORD* start, ICOORD* end) { - ColPartition* part = part_it->data(); - ColPartition* start_part = part; - int start_y = part->bounding_box_.top(); - if (!part_it->at_first()) { - int prev_bottom = part_it->data_relative(-1)->bounding_box_.bottom(); - if (prev_bottom < start_y) - start_y = prev_bottom; - else if (prev_bottom > start_y) - start_y = (start_y + prev_bottom) / 2; - } - int end_y = part->bounding_box_.bottom(); - int margin_right = INT32_MAX; - int margin_left = -INT32_MAX; - UpdateLeftMargin(*part, &margin_left, &margin_right); - do { - part_it->forward(); - part = part_it->data(); - } while (!part_it->at_first() && - UpdateLeftMargin(*part, &margin_left, &margin_right)); - // The run ended. If we were pushed inwards, compute the next run and - // extend it backwards into the run we just calculated to find the end of - // this run that provides a tight box. - int next_margin_right = INT32_MAX; - int next_margin_left = -INT32_MAX; - UpdateLeftMargin(*part, &next_margin_left, &next_margin_right); - if (next_margin_left > margin_right) { - ColPartition_IT next_it(*part_it); - do { - next_it.forward(); - part = next_it.data(); - } while (!next_it.at_first() && - UpdateLeftMargin(*part, &next_margin_left, &next_margin_right)); - // Now extend the next run backwards into the original run to get the - // tightest fit. - do { - part_it->backward(); - part = part_it->data(); - } while (part != start_part && - UpdateLeftMargin(*part, &next_margin_left, &next_margin_right)); - part_it->forward(); - } - // Now calculate the end_y. - part = part_it->data_relative(-1); - end_y = part->bounding_box_.bottom(); - if (!part_it->at_first() && part_it->data()->bounding_box_.top() < end_y) - end_y = (end_y + part_it->data()->bounding_box_.top()) / 2; - start->set_y(start_y); - start->set_x(part->XAtY(margin_right, start_y)); - end->set_y(end_y); - end->set_x(part->XAtY(margin_right, end_y)); - if (textord_debug_tabfind && !part_it->at_first()) - tprintf("Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n", - start_y, end_y, part->XAtY(margin_left, end_y), - end->x(), part->left_margin_, part->bounding_box_.left()); -} - -// Helper updates margin_left and margin_right, being the bounds of the right -// margin of part of a block. Returns false and does not update the bounds if -// this partition has a disjoint margin with the established margin. -static bool UpdateRightMargin(const ColPartition& part, - int* margin_left, int* margin_right) { - const TBOX& part_box = part.bounding_box(); - int top = part_box.top(); - int bottom = part_box.bottom(); - int tl_key = part.SortKey(part_box.right(), top); - int tr_key = part.SortKey(part.right_margin(), top); - int bl_key = part.SortKey(part_box.right(), bottom); - int br_key = part.SortKey(part.right_margin(), bottom); - int left_key = std::max(tl_key, bl_key); - int right_key = std::min(tr_key, br_key); - if (left_key <= *margin_right && right_key >= *margin_left) { - // This part is good - let's keep it. - *margin_right = std::min(*margin_right, right_key); - *margin_left = std::max(*margin_left, left_key); - return true; - } - return false; -} - -// Computes and returns in start, end a line segment formed from a -// backwards-iterated group of right edges of partitions that satisfy the -// condition that the intersection of the right margins is non-empty, ie the -// leftmost right margin is to the right of the rightmost right bounding box -// edge. -// On return the iterator is set to the start of the next run. -void ColPartition::RightEdgeRun(ColPartition_IT* part_it, - ICOORD* start, ICOORD* end) { - ColPartition* part = part_it->data(); - ColPartition* start_part = part; - int start_y = part->bounding_box_.bottom(); - if (!part_it->at_last()) { - int next_y = part_it->data_relative(1)->bounding_box_.top(); - if (next_y > start_y) - start_y = next_y; - else if (next_y < start_y) - start_y = (start_y + next_y) / 2; - } - int end_y = part->bounding_box_.top(); - int margin_right = INT32_MAX; - int margin_left = -INT32_MAX; - UpdateRightMargin(*part, &margin_left, &margin_right); - do { - part_it->backward(); - part = part_it->data(); - } while (!part_it->at_last() && - UpdateRightMargin(*part, &margin_left, &margin_right)); - // The run ended. If we were pushed inwards, compute the next run and - // extend it backwards to find the end of this run for a tight box. - int next_margin_right = INT32_MAX; - int next_margin_left = -INT32_MAX; - UpdateRightMargin(*part, &next_margin_left, &next_margin_right); - if (next_margin_right < margin_left) { - ColPartition_IT next_it(*part_it); - do { - next_it.backward(); - part = next_it.data(); - } while (!next_it.at_last() && - UpdateRightMargin(*part, &next_margin_left, - &next_margin_right)); - // Now extend the next run forwards into the original run to get the - // tightest fit. - do { - part_it->forward(); - part = part_it->data(); - } while (part != start_part && - UpdateRightMargin(*part, &next_margin_left, - &next_margin_right)); - part_it->backward(); - } - // Now calculate the end_y. - part = part_it->data_relative(1); - end_y = part->bounding_box().top(); - if (!part_it->at_last() && - part_it->data()->bounding_box_.bottom() > end_y) - end_y = (end_y + part_it->data()->bounding_box_.bottom()) / 2; - start->set_y(start_y); - start->set_x(part->XAtY(margin_left, start_y)); - end->set_y(end_y); - end->set_x(part->XAtY(margin_left, end_y)); - if (textord_debug_tabfind && !part_it->at_last()) - tprintf("Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n", - start_y, end_y, end->x(), part->XAtY(margin_right, end_y), - part->bounding_box_.right(), part->right_margin_); -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartition.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartition.h deleted file mode 100644 index 702b6ea9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartition.h +++ /dev/null @@ -1,940 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: colpartition.h -// Description: Class to hold partitions of the page that correspond -// roughly to text lines. -// Author: Ray Smith -// Created: Thu Aug 14 10:50:01 PDT 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_COLPARTITION_H_ -#define TESSERACT_TEXTORD_COLPARTITION_H_ - -#include "bbgrid.h" -#include "blobbox.h" // For BlobRegionType. -#include "ocrblock.h" -#include "rect.h" // For TBOX. -#include "scrollview.h" -#include "tabfind.h" // For WidthCallback. -#include "tabvector.h" // For BLOBNBOX_CLIST. - -#include - -namespace tesseract { - -// Number of colors in the color1, color2 arrays. -const int kRGBRMSColors = 4; - -class ColPartition; -class ColPartitionSet; -class ColPartitionGrid; -class WorkingPartSet; -class WorkingPartSet_LIST; - -// An enum to indicate how a partition sits on the columns. -// The order of flowing/heading/pullout must be kept consistent with -// PolyBlockType. -enum ColumnSpanningType { - CST_NOISE, // Strictly between columns. - CST_FLOWING, // Strictly within a single column. - CST_HEADING, // Spans multiple columns. - CST_PULLOUT, // Touches multiple columns, but doesn't span them. - CST_COUNT // Number of entries. -}; - -ELIST2IZEH(ColPartition) -CLISTIZEH(ColPartition) - -/** - * ColPartition is a partition of a horizontal slice of the page. - * It starts out as a collection of blobs at a particular y-coord in the grid, - * but ends up (after merging and uniquing) as an approximate text line. - * ColPartitions are also used to hold a partitioning of the page into - * columns, each representing one column. Although a ColPartition applies - * to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions - * emerges, which represents the columns over a wide y-coordinate range. - */ -class ColPartition : public ELIST2_LINK { - public: - // This empty constructor is here only so that the class can be ELISTIZED. - // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier - // and eliminate CLASSNAME##_copier. - ColPartition() = default; - - /** - * @param blob_type is the blob_region_type_ of the blobs in this partition. - * @param vertical is the direction of logical vertical on the possibly skewed image. - */ - ColPartition(BlobRegionType blob_type, const ICOORD& vertical); - /** - * Constructs a fake ColPartition with no BLOBNBOXes to represent a - * horizontal or vertical line, given a type and a bounding box. - */ - static ColPartition* MakeLinePartition(BlobRegionType blob_type, - const ICOORD& vertical, - int left, int bottom, - int right, int top); - - // Constructs and returns a fake ColPartition with a single fake BLOBNBOX, - // all made from a single TBOX. - // WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and - // the ColPartition owns the BLOBNBOX!!! - // Call DeleteBoxes before deleting the ColPartition. - static ColPartition* FakePartition(const TBOX& box, - PolyBlockType block_type, - BlobRegionType blob_type, - BlobTextFlowType flow); - - // Constructs and returns a ColPartition with the given real BLOBNBOX, - // and sets it up to be a "big" partition (single-blob partition bigger - // than the surrounding text that may be a dropcap, two or more vertically - // touching characters, or some graphic element. - // If the given list is not nullptr, the partition is also added to the list. - static ColPartition* MakeBigPartition(BLOBNBOX* box, - ColPartition_LIST* big_part_list); - - ~ColPartition(); - - // Simple accessors. - const TBOX& bounding_box() const { - return bounding_box_; - } - int left_margin() const { - return left_margin_; - } - void set_left_margin(int margin) { - left_margin_ = margin; - } - int right_margin() const { - return right_margin_; - } - void set_right_margin(int margin) { - right_margin_ = margin; - } - int median_top() const { - return median_top_; - } - int median_bottom() const { - return median_bottom_; - } - int median_left() const { - return median_left_; - } - int median_right() const { - return median_right_; - } - int median_height() const { - return median_height_; - } - void set_median_height(int height) { - median_height_ = height; - } - int median_width() const { - return median_width_; - } - void set_median_width(int width) { - median_width_ = width; - } - BlobRegionType blob_type() const { - return blob_type_; - } - void set_blob_type(BlobRegionType t) { - blob_type_ = t; - } - BlobTextFlowType flow() const { - return flow_; - } - void set_flow(BlobTextFlowType f) { - flow_ = f; - } - int good_blob_score() const { - return good_blob_score_; - } - bool good_width() const { - return good_width_; - } - bool good_column() const { - return good_column_; - } - bool left_key_tab() const { - return left_key_tab_; - } - int left_key() const { - return left_key_; - } - bool right_key_tab() const { - return right_key_tab_; - } - int right_key() const { - return right_key_; - } - PolyBlockType type() const { - return type_; - } - void set_type(PolyBlockType t) { - type_ = t; - } - BLOBNBOX_CLIST* boxes() { - return &boxes_; - } - int boxes_count() const { - return boxes_.length(); - } - void set_vertical(const ICOORD& v) { - vertical_ = v; - } - ColPartition_CLIST* upper_partners() { - return &upper_partners_; - } - ColPartition_CLIST* lower_partners() { - return &lower_partners_; - } - void set_working_set(WorkingPartSet* working_set) { - working_set_ = working_set; - } - bool block_owned() const { - return block_owned_; - } - void set_block_owned(bool owned) { - block_owned_ = owned; - } - bool desperately_merged() const { - return desperately_merged_; - } - ColPartitionSet* column_set() const { - return column_set_; - } - void set_side_step(int step) { - side_step_ = step; - } - int bottom_spacing() const { - return bottom_spacing_; - } - void set_bottom_spacing(int spacing) { - bottom_spacing_ = spacing; - } - int top_spacing() const { - return top_spacing_; - } - void set_top_spacing(int spacing) { - top_spacing_ = spacing; - } - - void set_table_type() { - if (type_ != PT_TABLE) { - type_before_table_ = type_; - type_ = PT_TABLE; - } - } - void clear_table_type() { - if (type_ == PT_TABLE) - type_ = type_before_table_; - } - bool inside_table_column() { - return inside_table_column_; - } - void set_inside_table_column(bool val) { - inside_table_column_ = val; - } - ColPartition* nearest_neighbor_above() const { - return nearest_neighbor_above_; - } - void set_nearest_neighbor_above(ColPartition* part) { - nearest_neighbor_above_ = part; - } - ColPartition* nearest_neighbor_below() const { - return nearest_neighbor_below_; - } - void set_nearest_neighbor_below(ColPartition* part) { - nearest_neighbor_below_ = part; - } - int space_above() const { - return space_above_; - } - void set_space_above(int space) { - space_above_ = space; - } - int space_below() const { - return space_below_; - } - void set_space_below(int space) { - space_below_ = space; - } - int space_to_left() const { - return space_to_left_; - } - void set_space_to_left(int space) { - space_to_left_ = space; - } - int space_to_right() const { - return space_to_right_; - } - void set_space_to_right(int space) { - space_to_right_ = space; - } - uint8_t* color1() { - return color1_; - } - uint8_t* color2() { - return color2_; - } - bool owns_blobs() const { - return owns_blobs_; - } - void set_owns_blobs(bool owns_blobs) { - // Do NOT change ownership flag when there are blobs in the list. - // Immediately set the ownership flag when creating copies. - ASSERT_HOST(boxes_.empty()); - owns_blobs_ = owns_blobs; - } - - // Inline quasi-accessors that require some computation. - - // Returns the middle y-coord of the bounding box. - int MidY() const { - return (bounding_box_.top() + bounding_box_.bottom()) / 2; - } - // Returns the middle y-coord of the median top and bottom. - int MedianY() const { - return (median_top_ + median_bottom_) / 2; - } - // Returns the middle x-coord of the bounding box. - int MidX() const { - return (bounding_box_.left() + bounding_box_.right()) / 2; - } - // Returns the sort key at any given x,y. - int SortKey(int x, int y) const { - return TabVector::SortKey(vertical_, x, y); - } - // Returns the x corresponding to the sortkey, y pair. - int XAtY(int sort_key, int y) const { - return TabVector::XAtY(vertical_, sort_key, y); - } - // Returns the x difference between the two sort keys. - int KeyWidth(int left_key, int right_key) const { - return (right_key - left_key) / vertical_.y(); - } - // Returns the column width between the left and right keys. - int ColumnWidth() const { - return KeyWidth(left_key_, right_key_); - } - // Returns the sort key of the box left edge. - int BoxLeftKey() const { - return SortKey(bounding_box_.left(), MidY()); - } - // Returns the sort key of the box right edge. - int BoxRightKey() const { - return SortKey(bounding_box_.right(), MidY()); - } - // Returns the left edge at the given y, using the sort key. - int LeftAtY(int y) const { - return XAtY(left_key_, y); - } - // Returns the right edge at the given y, using the sort key. - int RightAtY(int y) const { - return XAtY(right_key_, y); - } - // Returns true if the right edge of this is to the left of the right - // edge of other. - bool IsLeftOf(const ColPartition& other) const { - return bounding_box_.right() < other.bounding_box_.right(); - } - // Returns true if the partition contains the given x coordinate at the y. - bool ColumnContains(int x, int y) const { - return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1; - } - // Returns true if there are no blobs in the list. - bool IsEmpty() const { - return boxes_.empty(); - } - // Returns true if there is a single blob in the list. - bool IsSingleton() const { - return boxes_.singleton(); - } - // Returns true if this and other overlap horizontally by bounding box. - bool HOverlaps(const ColPartition& other) const { - return bounding_box_.x_overlap(other.bounding_box_); - } - // Returns true if this and other's bounding boxes overlap vertically. - // TODO(rays) Make HOverlaps and VOverlaps truly symmetric. - bool VOverlaps(const ColPartition& other) const { - return bounding_box_.y_gap(other.bounding_box_) < 0; - } - // Returns the vertical overlap (by median) of this and other. - // WARNING! Only makes sense on horizontal partitions! - int VCoreOverlap(const ColPartition& other) const { - if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) { - return 0; - } - return std::min(median_top_, other.median_top_) - - std::max(median_bottom_, other.median_bottom_); - } - // Returns the horizontal overlap (by median) of this and other. - // WARNING! Only makes sense on vertical partitions! - int HCoreOverlap(const ColPartition& other) const { - return std::min(median_right_, other.median_right_) - - std::max(median_left_, other.median_left_); - } - // Returns true if this and other overlap significantly vertically. - // WARNING! Only makes sense on horizontal partitions! - bool VSignificantCoreOverlap(const ColPartition& other) const { - if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) { - return false; - } - int overlap = VCoreOverlap(other); - int height = std::min(median_top_ - median_bottom_, - other.median_top_ - other.median_bottom_); - return overlap * 3 > height; - } - // Returns true if this and other can be combined without putting a - // horizontal step in either left or right edge of the resulting block. - bool WithinSameMargins(const ColPartition& other) const { - return left_margin_ <= other.bounding_box_.left() && - bounding_box_.left() >= other.left_margin_ && - bounding_box_.right() <= other.right_margin_ && - right_margin_ >= other.bounding_box_.right(); - } - // Returns true if the region types (aligned_text_) match. - // Lines never match anything, as they should never be merged or chained. - bool TypesMatch(const ColPartition& other) const { - return TypesMatch(blob_type_, other.blob_type_); - } - static bool TypesMatch(BlobRegionType type1, BlobRegionType type2) { - return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) && - !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2); - } - - // Returns true if the types are similar to each other. - static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2) { - return (type1 == type2 || - (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) || - (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION)); - } - - // Returns true if partitions is of horizontal line type - bool IsLineType() const { - return PTIsLineType(type_); - } - // Returns true if partitions is of image type - bool IsImageType() const { - return PTIsImageType(type_); - } - // Returns true if partitions is of text type - bool IsTextType() const { - return PTIsTextType(type_); - } - // Returns true if partitions is of pullout(inter-column) type - bool IsPulloutType() const { - return PTIsPulloutType(type_); - } - // Returns true if the partition is of an exclusively vertical type. - bool IsVerticalType() const { - return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE; - } - // Returns true if the partition is of a definite horizontal type. - bool IsHorizontalType() const { - return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE; - } - // Returns true is the partition is of a type that cannot be merged. - bool IsUnMergeableType() const { - return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE; - } - // Returns true if this partition is a vertical line - // TODO(nbeato): Use PartitionType enum when Ray's code is submitted. - bool IsVerticalLine() const { - return IsVerticalType() && IsLineType(); - } - // Returns true if this partition is a horizontal line - // TODO(nbeato): Use PartitionType enum when Ray's code is submitted. - bool IsHorizontalLine() const { - return IsHorizontalType() && IsLineType(); - } - - // Adds the given box to the partition, updating the partition bounds. - // The list of boxes in the partition is updated, ensuring that no box is - // recorded twice, and the boxes are kept in increasing left position. - void AddBox(BLOBNBOX* box); - - // Removes the given box from the partition, updating the bounds. - void RemoveBox(BLOBNBOX* box); - - // Returns the tallest box in the partition, as measured perpendicular to the - // presumed flow of text. - BLOBNBOX* BiggestBox(); - - // Returns the bounding box excluding the given box. - TBOX BoundsWithoutBox(BLOBNBOX* box); - - // Claims the boxes in the boxes_list by marking them with a this owner - // pointer. - void ClaimBoxes(); - - // nullptr the owner of the blobs in this partition, so they can be deleted - // independently of the ColPartition. - void DisownBoxes(); - // nullptr the owner of the blobs in this partition that are owned by this - // partition, so they can be deleted independently of the ColPartition. - // Any blobs that are not owned by this partition get to keep their owner - // without an assert failure. - void DisownBoxesNoAssert(); - // Nulls the owner of the blobs in this partition that are owned by this - // partition and not leader blobs, removing them from the boxes_ list, thus - // turning this partition back to a leader partition if it contains a leader, - // or otherwise leaving it empty. Returns true if any boxes remain. - bool ReleaseNonLeaderBoxes(); - - // Delete the boxes that this partition owns. - void DeleteBoxes(); - - // Reflects the partition in the y-axis, assuming that its blobs have - // already been done. Corrects only a limited part of the members, since - // this function is assumed to be used shortly after initial creation, which - // is before a lot of the members are used. - void ReflectInYAxis(); - - // Returns true if this is a legal partition - meaning that the conditions - // left_margin <= bounding_box left - // left_key <= bounding box left key - // bounding box left <= bounding box right - // and likewise for right margin and key - // are all met. - bool IsLegal(); - - // Returns true if the left and right edges are approximately equal. - bool MatchingColumns(const ColPartition& other) const; - - // Returns true if the colors match for two text partitions. - bool MatchingTextColor(const ColPartition& other) const; - - // Returns true if the sizes match for two text partitions, - // taking orientation into account - bool MatchingSizes(const ColPartition& other) const; - - // Returns true if there is no tabstop violation in merging this and other. - bool ConfirmNoTabViolation(const ColPartition& other) const; - - // Returns true if other has a similar stroke width to this. - bool MatchingStrokeWidth(const ColPartition& other, - double fractional_tolerance, - double constant_tolerance) const; - // Returns true if candidate is an acceptable diacritic base char merge - // with this as the diacritic. - bool OKDiacriticMerge(const ColPartition& candidate, bool debug) const; - - // Sets the sort key using either the tab vector, or the bounding box if - // the tab vector is nullptr. If the tab_vector lies inside the bounding_box, - // use the edge of the box as a key any way. - void SetLeftTab(const TabVector* tab_vector); - void SetRightTab(const TabVector* tab_vector); - - // Copies the left/right tab from the src partition, but if take_box is - // true, copies the box instead and uses that as a key. - void CopyLeftTab(const ColPartition& src, bool take_box); - void CopyRightTab(const ColPartition& src, bool take_box); - - // Returns the left rule line x coord of the leftmost blob. - int LeftBlobRule() const; - // Returns the right rule line x coord of the rightmost blob. - int RightBlobRule() const; - - // Returns the density value for a particular BlobSpecialTextType. - float SpecialBlobsDensity(const BlobSpecialTextType type) const; - // Returns the number of blobs for a particular BlobSpecialTextType. - int SpecialBlobsCount(const BlobSpecialTextType type); - // Set the density value for a particular BlobSpecialTextType, should ONLY be - // used for debugging or testing. In production code, use - // ComputeSpecialBlobsDensity instead. - void SetSpecialBlobsDensity( - const BlobSpecialTextType type, const float density); - // Compute the SpecialTextType density of blobs, where we assume - // that the SpecialTextType in the boxes_ has been set. - void ComputeSpecialBlobsDensity(); - - // Add a partner above if upper, otherwise below. - // Add them uniquely and keep the list sorted by box left. - // Partnerships are added symmetrically to partner and this. - void AddPartner(bool upper, ColPartition* partner); - // Removes the partner from this, but does not remove this from partner. - // This asymmetric removal is so as not to mess up the iterator that is - // working on partner's partner list. - void RemovePartner(bool upper, ColPartition* partner); - // Returns the partner if the given partner is a singleton, otherwise nullptr. - ColPartition* SingletonPartner(bool upper); - - // Merge with the other partition and delete it. - void Absorb(ColPartition* other, WidthCallback* cb); - - // Returns true if the overlap between this and the merged pair of - // merge candidates is sufficiently trivial to be allowed. - // The merged box can graze the edge of this by the ok_box_overlap - // if that exceeds the margin to the median top and bottom. - bool OKMergeOverlap(const ColPartition& merge1, const ColPartition& merge2, - int ok_box_overlap, bool debug); - - // Find the blob at which to split this to minimize the overlap with the - // given box. Returns the first blob to go in the second partition. - BLOBNBOX* OverlapSplitBlob(const TBOX& box); - - // Split this partition keeping the first half in this and returning - // the second half. - // Splits by putting the split_blob and the blobs that follow - // in the second half, and the rest in the first half. - ColPartition* SplitAtBlob(BLOBNBOX* split_blob); - - // Splits this partition at the given x coordinate, returning the right - // half and keeping the left half in this. - ColPartition* SplitAt(int split_x); - - // Recalculates all the coordinate limits of the partition. - void ComputeLimits(); - - // Returns the number of boxes that overlap the given box. - int CountOverlappingBoxes(const TBOX& box); - - // Computes and sets the type_, first_column_, last_column_ and column_set_. - // resolution refers to the ppi resolution of the image. - void SetPartitionType(int resolution, ColPartitionSet* columns); - - // Returns the PartitionType from the current BlobRegionType and a column - // flow spanning type ColumnSpanningType, generated by - // ColPartitionSet::SpanningType, that indicates how the partition sits - // in the columns. - PolyBlockType PartitionType(ColumnSpanningType flow) const; - - // Returns the first and last column touched by this partition. - // resolution refers to the ppi resolution of the image. - void ColumnRange(int resolution, ColPartitionSet* columns, - int* first_col, int* last_col); - - // Sets the internal flags good_width_ and good_column_. - void SetColumnGoodness(WidthCallback* cb); - - // Determines whether the blobs in this partition mostly represent - // a leader (fixed pitch sequence) and sets the member blobs accordingly. - // Note that height is assumed to have been tested elsewhere, and that this - // function will find most fixed-pitch text as leader without a height filter. - // Leader detection is limited to sequences of identical width objects, - // such as .... or ----, so patterns, such as .-.-.-.-. will not be found. - bool MarkAsLeaderIfMonospaced(); - // Given the result of TextlineProjection::EvaluateColPartition, (positive for - // horizontal text, negative for vertical text, and near zero for non-text), - // sets the blob_type_ and flow_ for this partition to indicate whether it - // is strongly or weakly vertical or horizontal text, or non-text. - void SetRegionAndFlowTypesFromProjectionValue(int value); - - // Sets all blobs with the partition blob type and flow, but never overwrite - // leader blobs, as we need to be able to identify them later. - void SetBlobTypes(); - - // Returns true if a decent baseline can be fitted through the blobs. - // Works for both horizontal and vertical text. - bool HasGoodBaseline(); - - // Adds this ColPartition to a matching WorkingPartSet if one can be found, - // otherwise starts a new one in the appropriate column, ending the previous. - void AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright, - int resolution, ColPartition_LIST* used_parts, - WorkingPartSet_LIST* working_set); - - // From the given block_parts list, builds one or more BLOCKs and - // corresponding TO_BLOCKs, such that the line spacing is uniform in each. - // Created blocks are appended to the end of completed_blocks and to_blocks. - // The used partitions are put onto used_parts, as they may still be referred - // to in the partition grid. bleft, tright and resolution are the bounds - // and resolution of the original image. - static void LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright, - int resolution, - ColPartition_LIST* block_parts, - ColPartition_LIST* used_parts, - BLOCK_LIST* completed_blocks, - TO_BLOCK_LIST* to_blocks); - // Constructs a block from the given list of partitions. - // Arguments are as LineSpacingBlocks above. - static TO_BLOCK* MakeBlock(const ICOORD& bleft, const ICOORD& tright, - ColPartition_LIST* block_parts, - ColPartition_LIST* used_parts); - - // Constructs a block from the given list of vertical text partitions. - // Currently only creates rectangular blocks. - static TO_BLOCK* MakeVerticalTextBlock(const ICOORD& bleft, - const ICOORD& tright, - ColPartition_LIST* block_parts, - ColPartition_LIST* used_parts); - - // Makes a TO_ROW matching this and moves all the blobs to it, transferring - // ownership to to returned TO_ROW. - TO_ROW* MakeToRow(); - - - // Returns a copy of everything except the list of boxes. The resulting - // ColPartition is only suitable for keeping in a column candidate list. - ColPartition* ShallowCopy() const; - // Returns a copy of everything with a shallow copy of the blobs. - // The blobs are still owned by their original parent, so they are - // treated as read-only. - ColPartition* CopyButDontOwnBlobs(); - - #ifndef GRAPHICS_DISABLED - // Provides a color for BBGrid to draw the rectangle. - ScrollView::Color BoxColor() const; - #endif // GRAPHICS_DISABLED - - // Prints debug information on this. - void Print() const; - // Prints debug information on the colors. - void PrintColors(); - - // Sets the types of all partitions in the run to be the max of the types. - void SmoothPartnerRun(int working_set_count); - - // Cleans up the partners of the given type so that there is at most - // one partner. This makes block creation simpler. - // If get_desperate is true, goes to more desperate merge methods - // to merge flowing text before breaking partnerships. - void RefinePartners(PolyBlockType type, bool get_desperate, - ColPartitionGrid* grid); - - // Returns true if this column partition is in the same column as - // part. This function will only work after the SetPartitionType function - // has been called on both column partitions. This is useful for - // doing a SideSearch when you want things in the same page column. - bool IsInSameColumnAs(const ColPartition& part) const; - - // Sort function to sort by bounding box. - static int SortByBBox(const void* p1, const void* p2) { - const ColPartition* part1 = *static_cast(p1); - const ColPartition* part2 = *static_cast(p2); - int mid_y1 = part1->bounding_box_.y_middle(); - int mid_y2 = part2->bounding_box_.y_middle(); - if ((part2->bounding_box_.bottom() <= mid_y1 && - mid_y1 <= part2->bounding_box_.top()) || - (part1->bounding_box_.bottom() <= mid_y2 && - mid_y2 <= part1->bounding_box_.top())) { - // Sort by increasing x. - return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle(); - } - // Sort by decreasing y. - return mid_y2 - mid_y1; - } - - // Sets the column bounds. Primarily used in testing. - void set_first_column(int column) { - first_column_ = column; - } - void set_last_column(int column) { - last_column_ = column; - } - - private: - // enum to refer to the entries in a neighbourhood of lines. - // Used by SmoothSpacings to test for blips with OKSpacingBlip. - enum SpacingNeighbourhood { - PN_ABOVE2, - PN_ABOVE1, - PN_UPPER, - PN_LOWER, - PN_BELOW1, - PN_BELOW2, - PN_COUNT - }; - - // Cleans up the partners above if upper is true, else below. - // If get_desperate is true, goes to more desperate merge methods - // to merge flowing text before breaking partnerships. - void RefinePartnersInternal(bool upper, bool get_desperate, - ColPartitionGrid* grid); - // Restricts the partners to only desirable types. For text and BRT_HLINE this - // means the same type_ , and for image types it means any image type. - void RefinePartnersByType(bool upper, ColPartition_CLIST* partners); - // Remove transitive partnerships: this<->a, and a<->b and this<->b. - // Gets rid of this<->b, leaving a clean chain. - // Also if we have this<->a and a<->this, then gets rid of this<->a, as - // this has multiple partners. - void RefinePartnerShortcuts(bool upper, ColPartition_CLIST* partners); - // If multiple text partners can be merged, then do so. - // If desperate is true, then an increase in overlap with the merge is - // allowed. If the overlap increases, then the desperately_merged_ flag - // is set, indicating that the textlines probably need to be regenerated - // by aggressive line fitting/splitting, as there are probably vertically - // joined blobs that cross textlines. - void RefineTextPartnersByMerge(bool upper, bool desperate, - ColPartition_CLIST* partners, - ColPartitionGrid* grid); - // Keep the partner with the biggest overlap. - void RefinePartnersByOverlap(bool upper, ColPartition_CLIST* partners); - - // Return true if bbox belongs better in this than other. - bool ThisPartitionBetter(BLOBNBOX* bbox, const ColPartition& other); - - // Smoothes the spacings in the list into groups of equal linespacing. - // resolution is the resolution of the original image, used as a basis - // for thresholds in change of spacing. page_height is in pixels. - static void SmoothSpacings(int resolution, int page_height, - ColPartition_LIST* parts); - - // Returns true if the parts array of pointers to partitions matches the - // condition for a spacing blip. See SmoothSpacings for what this means - // and how it is used. - static bool OKSpacingBlip(int resolution, int median_spacing, - ColPartition** parts); - - // Returns true if both the top and bottom spacings of this match the given - // spacing to within suitable margins dictated by the image resolution. - bool SpacingEqual(int spacing, int resolution) const; - - // Returns true if both the top and bottom spacings of this and other - // match to within suitable margins dictated by the image resolution. - bool SpacingsEqual(const ColPartition& other, int resolution) const; - - // Returns true if the sum spacing of this and other match the given - // spacing (or twice the given spacing) to within a suitable margin dictated - // by the image resolution. - bool SummedSpacingOK(const ColPartition& other, - int spacing, int resolution) const; - - // Returns a suitable spacing margin that can be applied to bottoms of - // text lines, based on the resolution and the stored side_step_. - int BottomSpacingMargin(int resolution) const; - - // Returns a suitable spacing margin that can be applied to tops of - // text lines, based on the resolution and the stored side_step_. - int TopSpacingMargin(int resolution) const; - - // Returns true if the median text sizes of this and other agree to within - // a reasonable multiplicative factor. - bool SizesSimilar(const ColPartition& other) const; - - // Computes and returns in start, end a line segment formed from a - // forwards-iterated group of left edges of partitions that satisfy the - // condition that the rightmost left margin is to the left of the - // leftmost left bounding box edge. - // TODO(rays) Not good enough. Needs improving to tightly wrap text in both - // directions, and to loosely wrap images. - static void LeftEdgeRun(ColPartition_IT* part_it, - ICOORD* start, ICOORD* end); - // Computes and returns in start, end a line segment formed from a - // backwards-iterated group of right edges of partitions that satisfy the - // condition that the leftmost right margin is to the right of the - // rightmost right bounding box edge. - // TODO(rays) Not good enough. Needs improving to tightly wrap text in both - // directions, and to loosely wrap images. - static void RightEdgeRun(ColPartition_IT* part_it, - ICOORD* start, ICOORD* end); - - // The margins are determined by the position of the nearest vertically - // overlapping neighbour to the side. They indicate the maximum extent - // that the block/column may be extended without touching something else. - // Leftmost coordinate that the region may occupy over the y limits. - int left_margin_; - // Rightmost coordinate that the region may occupy over the y limits. - int right_margin_; - // Bounding box of all blobs in the partition. - TBOX bounding_box_; - // Median top and bottom of blobs in this partition. - int median_bottom_; - int median_top_; - // Median height of blobs in this partition. - int median_height_; - // Median left and right of blobs in this partition. - int median_left_; - int median_right_; - // Median width of blobs in this partition. - int median_width_; - // blob_region_type_ for the blobs in this partition. - BlobRegionType blob_type_; - BlobTextFlowType flow_; // Quality of text flow. - // Total of GoodTextBlob results for all blobs in the partition. - int good_blob_score_; - // True if this partition has a common width. - bool good_width_; - // True if this is a good column candidate. - bool good_column_; - // True if the left_key_ is from a tab vector. - bool left_key_tab_; - // True if the right_key_ is from a tab vector. - bool right_key_tab_; - // Left and right sort keys for the edges of the partition. - // If the respective *_key_tab_ is true then this key came from a tab vector. - // If not, then the class promises to keep the key equal to the sort key - // for the respective edge of the bounding box at the MidY, so that - // LeftAtY and RightAtY always returns an x coordinate on the line parallel - // to vertical_ through the bounding box edge at MidY. - int left_key_; - int right_key_; - // Type of this partition after looking at its relation to the columns. - PolyBlockType type_; - // All boxes in the partition stored in increasing left edge coordinate. - BLOBNBOX_CLIST boxes_; - // The global vertical skew direction. - ICOORD vertical_; - // The partitions above that matched this. - ColPartition_CLIST upper_partners_; - // The partitions below that matched this. - ColPartition_CLIST lower_partners_; - // The WorkingPartSet it lives in while blocks are being made. - WorkingPartSet* working_set_; - // Flag is true when AddBox is sorting vertically, false otherwise. - bool last_add_was_vertical_; - // True when the partition's ownership has been taken from the grid and - // placed in a working set, or, after that, in the good_parts_ list. - bool block_owned_; - // Flag to indicate that this partition was subjected to a desperate merge, - // and therefore the textlines need rebuilding. - bool desperately_merged_; - // The first and last column that this partition applies to. - // Flowing partitions (see type_) will have an equal first and last value - // of the form 2n + 1, where n is the zero-based index into the partitions - // in column_set_. (See ColPartitionSet::GetColumnByIndex). - // Heading partitions will have unequal values of the same form. - // Pullout partitions will have equal values, but may have even values, - // indicating placement between columns. - int first_column_; - int last_column_; - // Column_set_ is the column layout applicable to this ColPartition. - ColPartitionSet* column_set_; - // Linespacing data. - int side_step_; // Median y-shift to next blob on same line. - int top_spacing_; // Line spacing from median_top_. - int bottom_spacing_; // Line spacing from median_bottom_. - - // Type of this partition before considering it as a table cell. This is - // used to revert the type if a partition is first marked as a table cell but - // later filtering steps decide it does not belong to a table - PolyBlockType type_before_table_; - bool inside_table_column_; // Check whether the current partition has been - // assigned to a table column - // Nearest neighbor above with major x-overlap - ColPartition* nearest_neighbor_above_; - // Nearest neighbor below with major x-overlap - ColPartition* nearest_neighbor_below_; - int space_above_; // Distance from nearest_neighbor_above - int space_below_; // Distance from nearest_neighbor_below - int space_to_left_; // Distance from the left edge of the column - int space_to_right_; // Distance from the right edge of the column - // Color foreground/background data. - uint8_t color1_[kRGBRMSColors]; - uint8_t color2_[kRGBRMSColors]; - bool owns_blobs_; // Does the partition own its blobs? - // The density of special blobs. - float special_blobs_densities_[BSTT_COUNT]; -}; - -// Typedef it now in case it becomes a class later. -using ColPartitionGridSearch = GridSearch ; - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_COLPARTITION_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartitiongrid.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartitiongrid.cpp deleted file mode 100644 index ed644b20..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartitiongrid.cpp +++ /dev/null @@ -1,1750 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: colpartitiongrid.cpp -// Description: Class collecting code that acts on a BBGrid of ColPartitions. -// Author: Ray Smith -// Created: Mon Oct 05 08:42:01 PDT 2009 -// -// (C) Copyright 2009, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "colpartitiongrid.h" -#include "colpartitionset.h" -#include "imagefind.h" - -#include - -namespace tesseract { - -BOOL_VAR(textord_tabfind_show_color_fit, false, "Show stroke widths"); - -// Max pad factor used to search the neighbourhood of a partition to smooth -// partition types. -const int kMaxPadFactor = 6; -// Max multiple of size (min(height, width)) for the distance of the nearest -// neighbour for the change of type to be used. -const int kMaxNeighbourDistFactor = 4; -// Maximum number of lines in a credible figure caption. -const int kMaxCaptionLines = 7; -// Min ratio between biggest and smallest gap to bound a caption. -const double kMinCaptionGapRatio = 2.0; -// Min ratio between biggest gap and mean line height to bound a caption. -const double kMinCaptionGapHeightRatio = 0.5; -// Min fraction of ColPartition height to be overlapping for margin purposes. -const double kMarginOverlapFraction = 0.25; -// Size ratio required to consider an unmerged overlapping partition to be big. -const double kBigPartSizeRatio = 1.75; -// Fraction of gridsize to allow arbitrary overlap between partitions. -const double kTinyEnoughTextlineOverlapFraction = 0.25; -// Max vertical distance of neighbouring ColPartition as a multiple of -// partition height for it to be a partner. -// TODO(rays) fix the problem that causes a larger number to not work well. -// The value needs to be larger as sparse text blocks in a page that gets -// marked as single column will not find adjacent lines as partners, and -// will merge horizontally distant, but aligned lines. See rep.4B3 p5. -// The value needs to be small because double-spaced legal docs written -// in a single column, but justified courier have widely spaced lines -// that need to get merged before they partner-up with the lines above -// and below. See legal.3B5 p13/17. Neither of these should depend on -// the value of kMaxPartitionSpacing to be successful, and ColPartition -// merging needs attention to fix this problem. -const double kMaxPartitionSpacing = 1.75; -// Margin by which text has to beat image or vice-versa to make a firm -// decision in GridSmoothNeighbour. -const int kSmoothDecisionMargin = 4; - -ColPartitionGrid::ColPartitionGrid(int gridsize, - const ICOORD& bleft, const ICOORD& tright) - : BBGrid(gridsize, - bleft, tright) { -} - -// Handles a click event in a display window. -void ColPartitionGrid::HandleClick(int x, int y) { - BBGrid::HandleClick(x, y); - // Run a radial search for partitions that overlap. - ColPartitionGridSearch radsearch(this); - radsearch.SetUniqueMode(true); - radsearch.StartRadSearch(x, y, 1); - ColPartition* neighbour; - FCOORD click(x, y); - while ((neighbour = radsearch.NextRadSearch()) != nullptr) { - const TBOX& nbox = neighbour->bounding_box(); - if (nbox.contains(click)) { - tprintf("Block box:"); - neighbour->bounding_box().print(); - neighbour->Print(); - } - } -} - -// Merges ColPartitions in the grid that look like they belong in the same -// textline. -// For all partitions in the grid, calls the box_cb permanent callback -// to compute the search box, searches the box, and if a candidate is found, -// calls the confirm_cb to check any more rules. If the confirm_cb returns -// true, then the partitions are merged. -// Both callbacks are deleted before returning. -void ColPartitionGrid::Merges( - TessResultCallback2* box_cb, - TessResultCallback2* confirm_cb) { - // Iterate the ColPartitions in the grid. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (MergePart(box_cb, confirm_cb, part)) - gsearch.RepositionIterator(); - } - delete box_cb; - delete confirm_cb; -} - -// For the given partition, calls the box_cb permanent callback -// to compute the search box, searches the box, and if a candidate is found, -// calls the confirm_cb to check any more rules. If the confirm_cb returns -// true, then the partitions are merged. -// Returns true if the partition is consumed by one or more merges. -bool ColPartitionGrid::MergePart( - TessResultCallback2* box_cb, - TessResultCallback2* confirm_cb, - ColPartition* part) { - if (part->IsUnMergeableType()) - return false; - bool any_done = false; - // Repeatedly merge part while we find a best merge candidate that works. - bool merge_done = false; - do { - merge_done = false; - TBOX box = part->bounding_box(); - bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); - if (debug) { - tprintf("Merge candidate:"); - box.print(); - } - // Set up a rectangle search bounded by the part. - if (!box_cb->Run(part, &box)) - continue; - // Create a list of merge candidates. - ColPartition_CLIST merge_candidates; - FindMergeCandidates(part, box, debug, &merge_candidates); - // Find the best merge candidate based on minimal overlap increase. - int overlap_increase; - ColPartition* neighbour = BestMergeCandidate(part, &merge_candidates, debug, - confirm_cb, - &overlap_increase); - if (neighbour != nullptr && overlap_increase <= 0) { - if (debug) { - tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", - part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour), - overlap_increase); - } - // Looks like a good candidate so merge it. - RemoveBBox(neighbour); - // We will modify the box of part, so remove it from the grid, merge - // it and then re-insert it into the grid. - RemoveBBox(part); - part->Absorb(neighbour, nullptr); - InsertBBox(true, true, part); - merge_done = true; - any_done = true; - } else if (neighbour != nullptr) { - if (debug) { - tprintf("Overlapped when merged with increase %d: ", overlap_increase); - neighbour->bounding_box().print(); - } - } else if (debug) { - tprintf("No candidate neighbour returned\n"); - } - } while (merge_done); - return any_done; -} - -// Returns true if the given part and merge candidate might believably -// be part of a single text line according to the default rules. -// In general we only want to merge partitions that look like they -// are on the same text line, ie their median limits overlap, but we have -// to make exceptions for diacritics and stray punctuation. -static bool OKMergeCandidate(const ColPartition* part, - const ColPartition* candidate, - bool debug) { - const TBOX& part_box = part->bounding_box(); - if (candidate == part) - return false; // Ignore itself. - if (!part->TypesMatch(*candidate) || candidate->IsUnMergeableType()) - return false; // Don't mix inappropriate types. - - const TBOX& c_box = candidate->bounding_box(); - if (debug) { - tprintf("Examining merge candidate:"); - c_box.print(); - } - // Candidates must be within a reasonable distance. - if (candidate->IsVerticalType() || part->IsVerticalType()) { - int h_dist = -part->HCoreOverlap(*candidate); - if (h_dist >= std::max(part_box.width(), c_box.width()) / 2) { - if (debug) - tprintf("Too far away: h_dist = %d\n", h_dist); - return false; - } - } else { - // Coarse filter by vertical distance between partitions. - int v_dist = -part->VCoreOverlap(*candidate); - if (v_dist >= std::max(part_box.height(), c_box.height()) / 2) { - if (debug) - tprintf("Too far away: v_dist = %d\n", v_dist); - return false; - } - // Candidates must either overlap in median y, - // or part or candidate must be an acceptable diacritic. - if (!part->VSignificantCoreOverlap(*candidate) && - !part->OKDiacriticMerge(*candidate, debug) && - !candidate->OKDiacriticMerge(*part, debug)) { - if (debug) - tprintf("Candidate fails overlap and diacritic tests!\n"); - return false; - } - } - return true; -} - -// Helper function to compute the increase in overlap of the parts list of -// Colpartitions with the combination of merge1 and merge2, compared to -// the overlap with them uncombined. -// An overlap is not counted if passes the OKMergeOverlap test with ok_overlap -// as the pixel overlap limit. merge1 and merge2 must both be non-nullptr. -static int IncreaseInOverlap(const ColPartition* merge1, - const ColPartition* merge2, - int ok_overlap, - ColPartition_CLIST* parts) { - ASSERT_HOST(merge1 != nullptr && merge2 != nullptr); - int total_area = 0; - ColPartition_C_IT it(parts); - TBOX merged_box(merge1->bounding_box()); - merged_box += merge2->bounding_box(); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* part = it.data(); - if (part == merge1 || part == merge2) - continue; - TBOX part_box = part->bounding_box(); - // Compute the overlap of the merged box with part. - int overlap_area = part_box.intersection(merged_box).area(); - if (overlap_area > 0 && !part->OKMergeOverlap(*merge1, *merge2, - ok_overlap, false)) { - total_area += overlap_area; - // Subtract the overlap of merge1 and merge2 individually. - overlap_area = part_box.intersection(merge1->bounding_box()).area(); - if (overlap_area > 0) - total_area -= overlap_area; - TBOX intersection_box = part_box.intersection(merge2->bounding_box()); - overlap_area = intersection_box.area(); - if (overlap_area > 0) { - total_area -= overlap_area; - // Add back the 3-way area. - intersection_box &= merge1->bounding_box(); // In-place intersection. - overlap_area = intersection_box.area(); - if (overlap_area > 0) - total_area += overlap_area; - } - } - } - return total_area; -} - -// Helper function to test that each partition in candidates is either a -// good diacritic merge with part or an OK merge candidate with all others -// in the candidates list. -// ASCII Art Scenario: -// We sometimes get text such as "join-this" where the - is actually a long -// dash culled from a standard set of extra characters that don't match the -// font of the text. This makes its strokewidth not match and forms a broken -// set of 3 partitions for "join", "-" and "this" and the dash may slightly -// overlap BOTH words. -// ------- ------- -// | ==== | -// ------- ------- -// The standard merge rule: "you can merge 2 partitions as long as there is -// no increase in overlap elsewhere" fails miserably here. Merge any pair -// of partitions and the combined box overlaps more with the third than -// before. To allow the merge, we need to consider whether it is safe to -// merge everything, without merging separate text lines. For that we need -// everything to be an OKMergeCandidate (which is supposed to prevent -// separate text lines merging), but this is hard for diacritics to satisfy, -// so an alternative to being OKMergeCandidate with everything is to be an -// OKDiacriticMerge with part as the base character. -static bool TestCompatibleCandidates(const ColPartition& part, bool debug, - ColPartition_CLIST* candidates) { - ColPartition_C_IT it(candidates); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* candidate = it.data(); - if (!candidate->OKDiacriticMerge(part, false)) { - ColPartition_C_IT it2(it); - for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) { - ColPartition* candidate2 = it2.data(); - if (candidate2 != candidate && - !OKMergeCandidate(candidate, candidate2, false)) { - if (debug) { - tprintf("NC overlap failed:Candidate:"); - candidate2->bounding_box().print(); - tprintf("fails to be a good merge with:"); - candidate->bounding_box().print(); - } - return false; - } - } - } - } - return true; -} - -// Computes and returns the total overlap of all partitions in the grid. -// If overlap_grid is non-null, it is filled with a grid that holds empty -// partitions representing the union of all overlapped partitions. -int ColPartitionGrid::ComputeTotalOverlap(ColPartitionGrid** overlap_grid) { - int total_overlap = 0; - // Iterate the ColPartitions in the grid. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - ColPartition_CLIST neighbors; - const TBOX& part_box = part->bounding_box(); - FindOverlappingPartitions(part_box, part, &neighbors); - ColPartition_C_IT n_it(&neighbors); - bool any_part_overlap = false; - for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) { - const TBOX& n_box = n_it.data()->bounding_box(); - int overlap = n_box.intersection(part_box).area(); - if (overlap > 0 && overlap_grid != nullptr) { - if (*overlap_grid == nullptr) { - *overlap_grid = new ColPartitionGrid(gridsize(), bleft(), tright()); - } - (*overlap_grid)->InsertBBox(true, true, n_it.data()->ShallowCopy()); - if (!any_part_overlap) { - (*overlap_grid)->InsertBBox(true, true, part->ShallowCopy()); - } - } - any_part_overlap = true; - total_overlap += overlap; - } - } - return total_overlap; -} - -// Finds all the ColPartitions in the grid that overlap with the given -// box and returns them SortByBoxLeft(ed) and uniqued in the given list. -// Any partition equal to not_this (may be nullptr) is excluded. -void ColPartitionGrid::FindOverlappingPartitions(const TBOX& box, - const ColPartition* not_this, - ColPartition_CLIST* parts) { - ColPartitionGridSearch rsearch(this); - rsearch.StartRectSearch(box); - ColPartition* part; - while ((part = rsearch.NextRectSearch()) != nullptr) { - if (part != not_this) - parts->add_sorted(SortByBoxLeft, true, part); - } -} - -// Finds and returns the best candidate ColPartition to merge with part, -// selected from the candidates list, based on the minimum increase in -// pairwise overlap among all the partitions overlapped by the combined box. -// If overlap_increase is not nullptr then it returns the increase in overlap -// that would result from the merge. -// confirm_cb is a permanent callback that (if non-null) will be used to -// confirm the validity of a proposed merge candidate before selecting it. -// -// ======HOW MERGING WORKS====== -// The problem: -// We want to merge all the parts of a textline together, but avoid merging -// separate textlines. Diacritics, i dots, punctuation, and broken characters -// are examples of small bits that need merging with the main textline. -// Drop-caps and descenders in one line that touch ascenders in the one below -// are examples of cases where we don't want to merge. -// -// The solution: -// Merges that increase overlap among other partitions are generally bad. -// Those that don't increase overlap (much) and minimize the total area -// seem to be good. -// -// Ascii art example: -// The text: -// groggy descenders -// minimum ascenders -// The boxes: The === represents a small box near or overlapping the lower box. -// ----------------- -// | | -// ----------------- -// -===------------- -// | | -// ----------------- -// In considering what to do with the small === box, we find the 2 larger -// boxes as neighbours and possible merge candidates, but merging with the -// upper box increases overlap with the lower box, whereas merging with the -// lower box does not increase overlap. -// If the small === box didn't overlap either to start with, total area -// would be minimized by merging with the nearer (lower) box. -// -// This is a simple example. In reality, we have to allow some increase -// in overlap, or tightly spaced text would end up in bits. -ColPartition* ColPartitionGrid::BestMergeCandidate( - const ColPartition* part, ColPartition_CLIST* candidates, bool debug, - TessResultCallback2* confirm_cb, - int* overlap_increase) { - if (overlap_increase != nullptr) - *overlap_increase = 0; - if (candidates->empty()) - return nullptr; - int ok_overlap = - static_cast(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); - // The best neighbour to merge with is the one that causes least - // total pairwise overlap among all the neighbours. - // If more than one offers the same total overlap, choose the one - // with the least total area. - const TBOX& part_box = part->bounding_box(); - ColPartition_C_IT it(candidates); - ColPartition* best_candidate = nullptr; - // Find the total combined box of all candidates and the original. - TBOX full_box(part_box); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* candidate = it.data(); - full_box += candidate->bounding_box(); - } - // Keep valid neighbours in a list. - ColPartition_CLIST neighbours; - // Now run a rect search of the merged box for overlapping neighbours, as - // we need anything that might be overlapped by the merged box. - FindOverlappingPartitions(full_box, part, &neighbours); - if (debug) { - tprintf("Finding best merge candidate from %d, %d neighbours for box:", - candidates->length(), neighbours.length()); - part_box.print(); - } - // If the best increase in overlap is positive, then we also check the - // worst non-candidate overlap. This catches the case of multiple good - // candidates that overlap each other when merged. If the worst - // non-candidate overlap is better than the best overlap, then return - // the worst non-candidate overlap instead. - ColPartition_CLIST non_candidate_neighbours; - non_candidate_neighbours.set_subtract(SortByBoxLeft, true, - &neighbours, candidates); - int worst_nc_increase = 0; - int best_increase = INT32_MAX; - int best_area = 0; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* candidate = it.data(); - if (confirm_cb != nullptr && !confirm_cb->Run(part, candidate)) { - if (debug) { - tprintf("Candidate not confirmed:"); - candidate->bounding_box().print(); - } - continue; - } - int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours); - const TBOX& cand_box = candidate->bounding_box(); - if (best_candidate == nullptr || increase < best_increase) { - best_candidate = candidate; - best_increase = increase; - best_area = cand_box.bounding_union(part_box).area() - cand_box.area(); - if (debug) { - tprintf("New best merge candidate has increase %d, area %d, over box:", - increase, best_area); - full_box.print(); - candidate->Print(); - } - } else if (increase == best_increase) { - int area = cand_box.bounding_union(part_box).area() - cand_box.area(); - if (area < best_area) { - best_area = area; - best_candidate = candidate; - } - } - increase = IncreaseInOverlap(part, candidate, ok_overlap, - &non_candidate_neighbours); - if (increase > worst_nc_increase) - worst_nc_increase = increase; - } - if (best_increase > 0) { - // If the worst non-candidate increase is less than the best increase - // including the candidates, then all the candidates can merge together - // and the increase in outside overlap would be less, so use that result, - // but only if each candidate is either a good diacritic merge with part, - // or an ok merge candidate with all the others. - // See TestCompatibleCandidates for more explanation and a picture. - if (worst_nc_increase < best_increase && - TestCompatibleCandidates(*part, debug, candidates)) { - best_increase = worst_nc_increase; - } - } - if (overlap_increase != nullptr) - *overlap_increase = best_increase; - return best_candidate; -} - -// Helper to remove the given box from the given partition, put it in its -// own partition, and add to the partition list. -static void RemoveBadBox(BLOBNBOX* box, ColPartition* part, - ColPartition_LIST* part_list) { - part->RemoveBox(box); - ColPartition::MakeBigPartition(box, part_list); -} - - -// Split partitions where it reduces overlap between their bounding boxes. -// ColPartitions are after all supposed to be a partitioning of the blobs -// AND of the space on the page! -// Blobs that cause overlaps get removed, put in individual partitions -// and added to the big_parts list. They are most likely characters on -// 2 textlines that touch, or something big like a dropcap. -void ColPartitionGrid::SplitOverlappingPartitions( - ColPartition_LIST* big_parts) { - int ok_overlap = - static_cast(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); - // Iterate the ColPartitions in the grid. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - // Set up a rectangle search bounded by the part. - const TBOX& box = part->bounding_box(); - ColPartitionGridSearch rsearch(this); - rsearch.SetUniqueMode(true); - rsearch.StartRectSearch(box); - int unresolved_overlaps = 0; - - ColPartition* neighbour; - while ((neighbour = rsearch.NextRectSearch()) != nullptr) { - if (neighbour == part) - continue; - const TBOX& neighbour_box = neighbour->bounding_box(); - if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) && - part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false)) - continue; // The overlap is OK both ways. - - // If removal of the biggest box from either partition eliminates the - // overlap, and it is much bigger than the box left behind, then - // it is either a drop-cap, an inter-line join, or some junk that - // we don't want anyway, so put it in the big_parts list. - if (!part->IsSingleton()) { - BLOBNBOX* excluded = part->BiggestBox(); - TBOX shrunken = part->BoundsWithoutBox(excluded); - if (!shrunken.overlap(neighbour_box) && - excluded->bounding_box().height() > - kBigPartSizeRatio * shrunken.height()) { - // Removing the biggest box fixes the overlap, so do it! - gsearch.RemoveBBox(); - RemoveBadBox(excluded, part, big_parts); - InsertBBox(true, true, part); - gsearch.RepositionIterator(); - break; - } - } else if (box.contains(neighbour_box)) { - ++unresolved_overlaps; - continue; // No amount of splitting will fix it. - } - if (!neighbour->IsSingleton()) { - BLOBNBOX* excluded = neighbour->BiggestBox(); - TBOX shrunken = neighbour->BoundsWithoutBox(excluded); - if (!shrunken.overlap(box) && - excluded->bounding_box().height() > - kBigPartSizeRatio * shrunken.height()) { - // Removing the biggest box fixes the overlap, so do it! - rsearch.RemoveBBox(); - RemoveBadBox(excluded, neighbour, big_parts); - InsertBBox(true, true, neighbour); - gsearch.RepositionIterator(); - break; - } - } - int part_overlap_count = part->CountOverlappingBoxes(neighbour_box); - int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box); - ColPartition* right_part = nullptr; - if (neighbour_overlap_count <= part_overlap_count || - part->IsSingleton()) { - // Try to split the neighbour to reduce overlap. - BLOBNBOX* split_blob = neighbour->OverlapSplitBlob(box); - if (split_blob != nullptr) { - rsearch.RemoveBBox(); - right_part = neighbour->SplitAtBlob(split_blob); - InsertBBox(true, true, neighbour); - ASSERT_HOST(right_part != nullptr); - } - } else { - // Try to split part to reduce overlap. - BLOBNBOX* split_blob = part->OverlapSplitBlob(neighbour_box); - if (split_blob != nullptr) { - gsearch.RemoveBBox(); - right_part = part->SplitAtBlob(split_blob); - InsertBBox(true, true, part); - ASSERT_HOST(right_part != nullptr); - } - } - if (right_part != nullptr) { - InsertBBox(true, true, right_part); - gsearch.RepositionIterator(); - rsearch.RepositionIterator(); - break; - } - } - if (unresolved_overlaps > 2 && part->IsSingleton()) { - // This part is no good so just add to big_parts. - RemoveBBox(part); - ColPartition_IT big_it(big_parts); - part->set_block_owned(true); - big_it.add_to_end(part); - gsearch.RepositionIterator(); - } - } -} - -// Filters partitions of source_type by looking at local neighbours. -// Where a majority of neighbours have a text type, the partitions are -// changed to text, where the neighbours have image type, they are changed -// to image, and partitions that have no definite neighbourhood type are -// left unchanged. -// im_box and rerotation are used to map blob coordinates onto the -// nontext_map, which is used to prevent the spread of text neighbourhoods -// into images. -// Returns true if anything was changed. -bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, - Pix* nontext_map, - const TBOX& im_box, - const FCOORD& rotation) { - // Iterate the ColPartitions in the grid. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - bool any_changed = false; - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type())) - continue; - const TBOX& box = part->bounding_box(); - bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); - if (SmoothRegionType(nontext_map, im_box, rotation, debug, part)) - any_changed = true; - } - return any_changed; -} - -// Reflects the grid and its colpartitions in the y-axis, assuming that -// all blob boxes have already been done. -void ColPartitionGrid::ReflectInYAxis() { - ColPartition_LIST parts; - ColPartition_IT part_it(&parts); - // Iterate the ColPartitions in the grid to extract them. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - part_it.add_after_then_move(part); - } - ICOORD bot_left(-tright().x(), bleft().y()); - ICOORD top_right(-bleft().x(), tright().y()); - // Reinitializing the grid with reflected coords also clears all the - // pointers, so parts will now own the ColPartitions. (Briefly). - Init(gridsize(), bot_left, top_right); - for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { - part = part_it.extract(); - part->ReflectInYAxis(); - InsertBBox(true, true, part); - } -} - -// Transforms the grid of partitions to the output blocks, putting each -// partition into a separate block. We don't really care about the order, -// as we just want to get as much text as possible without trying to organize -// it into proper blocks or columns. -// TODO(rays) some kind of sort function would be useful and probably better -// than the default here, which is to sort by order of the grid search. -void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST* blocks, - TO_BLOCK_LIST* to_blocks) { - TO_BLOCK_IT to_block_it(to_blocks); - BLOCK_IT block_it(blocks); - // All partitions will be put on this list and deleted on return. - ColPartition_LIST parts; - ColPartition_IT part_it(&parts); - // Iterate the ColPartitions in the grid to extract them. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - part_it.add_after_then_move(part); - // The partition has to be at least vaguely like text. - BlobRegionType blob_type = part->blob_type(); - if (BLOBNBOX::IsTextType(blob_type) || - (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) { - PolyBlockType type = blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT - : PT_FLOWING_TEXT; - // Get metrics from the row that will be used for the block. - TBOX box = part->bounding_box(); - int median_width = part->median_width(); - int median_height = part->median_height(); - // Turn the partition into a TO_ROW. - TO_ROW* row = part->MakeToRow(); - if (row == nullptr) { - // This partition is dead. - part->DeleteBoxes(); - continue; - } - BLOCK* block = new BLOCK("", true, 0, 0, box.left(), box.bottom(), - box.right(), box.top()); - block->pdblk.set_poly_block(new POLY_BLOCK(box, type)); - TO_BLOCK* to_block = new TO_BLOCK(block); - TO_ROW_IT row_it(to_block->get_rows()); - row_it.add_after_then_move(row); - // We haven't differentially rotated vertical and horizontal text at - // this point, so use width or height as appropriate. - if (blob_type == BRT_VERT_TEXT) { - to_block->line_size = static_cast(median_width); - to_block->line_spacing = static_cast(box.width()); - to_block->max_blob_size = static_cast(box.width() + 1); - } else { - to_block->line_size = static_cast(median_height); - to_block->line_spacing = static_cast(box.height()); - to_block->max_blob_size = static_cast(box.height() + 1); - } - if (to_block->line_size == 0) to_block->line_size = 1; - block_it.add_to_end(block); - to_block_it.add_to_end(to_block); - } else { - // This partition is dead. - part->DeleteBoxes(); - } - } - Clear(); - // Now it is safe to delete the ColPartitions as parts goes out of scope. -} - -// Rotates the grid and its colpartitions by the given angle, assuming that -// all blob boxes have already been done. -void ColPartitionGrid::Deskew(const FCOORD& deskew) { - ColPartition_LIST parts; - ColPartition_IT part_it(&parts); - // Iterate the ColPartitions in the grid to extract them. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - part_it.add_after_then_move(part); - } - // Rebuild the grid to the new size. - TBOX grid_box(bleft_, tright_); - grid_box.rotate_large(deskew); - Init(gridsize(), grid_box.botleft(), grid_box.topright()); - // Reinitializing the grid with rotated coords also clears all the - // pointers, so parts will now own the ColPartitions. (Briefly). - for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { - part = part_it.extract(); - part->ComputeLimits(); - InsertBBox(true, true, part); - } -} - -// Sets the left and right tabs of the partitions in the grid. -void ColPartitionGrid::SetTabStops(TabFind* tabgrid) { - // Iterate the ColPartitions in the grid. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - const TBOX& part_box = part->bounding_box(); - TabVector* left_line = tabgrid->LeftTabForBox(part_box, true, false); - // If the overlapping line is not a left tab, try for non-overlapping. - if (left_line != nullptr && !left_line->IsLeftTab()) - left_line = tabgrid->LeftTabForBox(part_box, false, false); - if (left_line != nullptr && left_line->IsLeftTab()) - part->SetLeftTab(left_line); - TabVector* right_line = tabgrid->RightTabForBox(part_box, true, false); - if (right_line != nullptr && !right_line->IsRightTab()) - right_line = tabgrid->RightTabForBox(part_box, false, false); - if (right_line != nullptr && right_line->IsRightTab()) - part->SetRightTab(right_line); - part->SetColumnGoodness(tabgrid->WidthCB()); - } -} - -// Makes the ColPartSets and puts them in the PartSetVector ready -// for finding column bounds. Returns false if no partitions were found. -bool ColPartitionGrid::MakeColPartSets(PartSetVector* part_sets) { - ColPartition_LIST* part_lists = new ColPartition_LIST[gridheight()]; - part_sets->reserve(gridheight()); - // Iterate the ColPartitions in the grid to get parts onto lists for the - // y bottom of each. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - bool any_parts_found = false; - while ((part = gsearch.NextFullSearch()) != nullptr) { - BlobRegionType blob_type = part->blob_type(); - if (blob_type != BRT_NOISE && - (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { - int grid_x, grid_y; - const TBOX& part_box = part->bounding_box(); - GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); - ColPartition_IT part_it(&part_lists[grid_y]); - part_it.add_to_end(part); - any_parts_found = true; - } - } - if (any_parts_found) { - for (int grid_y = 0; grid_y < gridheight(); ++grid_y) { - ColPartitionSet* line_set = nullptr; - if (!part_lists[grid_y].empty()) { - line_set = new ColPartitionSet(&part_lists[grid_y]); - } - part_sets->push_back(line_set); - } - } - delete [] part_lists; - return any_parts_found; -} - -// Makes a single ColPartitionSet consisting of a single ColPartition that -// represents the total horizontal extent of the significant content on the -// page. Used for the single column setting in place of automatic detection. -// Returns nullptr if the page is empty of significant content. -ColPartitionSet* ColPartitionGrid::MakeSingleColumnSet(WidthCallback* cb) { - ColPartition* single_column_part = nullptr; - // Iterate the ColPartitions in the grid to get parts onto lists for the - // y bottom of each. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - BlobRegionType blob_type = part->blob_type(); - if (blob_type != BRT_NOISE && - (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { - // Consider for single column. - BlobTextFlowType flow = part->flow(); - if ((blob_type == BRT_TEXT && - (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN || - flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) || - blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) { - if (single_column_part == nullptr) { - single_column_part = part->ShallowCopy(); - single_column_part->set_blob_type(BRT_TEXT); - // Copy the tabs from itself to properly setup the margins. - single_column_part->CopyLeftTab(*single_column_part, false); - single_column_part->CopyRightTab(*single_column_part, false); - } else { - if (part->left_key() < single_column_part->left_key()) - single_column_part->CopyLeftTab(*part, false); - if (part->right_key() > single_column_part->right_key()) - single_column_part->CopyRightTab(*part, false); - } - } - } - } - if (single_column_part != nullptr) { - // Make a ColPartitionSet out of the single_column_part as a candidate - // for the single column case. - single_column_part->SetColumnGoodness(cb); - return new ColPartitionSet(single_column_part); - } - return nullptr; -} - -// Mark the BLOBNBOXes in each partition as being owned by that partition. -void ColPartitionGrid::ClaimBoxes() { - // Iterate the ColPartitions in the grid. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - part->ClaimBoxes(); - } -} - -// Retypes all the blobs referenced by the partitions in the grid. -// Image blobs are found and returned in the im_blobs list, as they are not -// owned by the block. -void ColPartitionGrid::ReTypeBlobs(BLOBNBOX_LIST* im_blobs) { - BLOBNBOX_IT im_blob_it(im_blobs); - ColPartition_LIST dead_parts; - ColPartition_IT dead_part_it(&dead_parts); - // Iterate the ColPartitions in the grid. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - BlobRegionType blob_type = part->blob_type(); - BlobTextFlowType flow = part->flow(); - bool any_blobs_moved = false; - if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) { - BLOBNBOX_C_IT blob_it(part->boxes()); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - im_blob_it.add_after_then_move(blob); - } - } else if (blob_type != BRT_NOISE) { - // Make sure the blobs are marked with the correct type and flow. - BLOBNBOX_C_IT blob_it(part->boxes()); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - if (blob->region_type() == BRT_NOISE) { - // TODO(rays) Deprecated. Change this section to an assert to verify - // and then delete. - ASSERT_HOST(blob->cblob()->area() != 0); - blob->set_owner(nullptr); - blob_it.extract(); - any_blobs_moved = true; - } else { - blob->set_region_type(blob_type); - if (blob->flow() != BTFT_LEADER) - blob->set_flow(flow); - } - } - } - if (blob_type == BRT_NOISE || part->boxes()->empty()) { - BLOBNBOX_C_IT blob_it(part->boxes()); - part->DisownBoxes(); - dead_part_it.add_to_end(part); - gsearch.RemoveBBox(); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - if (blob->cblob()->area() == 0) { - // Any blob with zero area is a fake image blob and should be deleted. - delete blob->cblob(); - delete blob; - } - } - } else if (any_blobs_moved) { - gsearch.RemoveBBox(); - part->ComputeLimits(); - InsertBBox(true, true, part); - gsearch.RepositionIterator(); - } - } -} - -// The boxes within the partitions have changed (by deskew) so recompute -// the bounds of all the partitions and reinsert them into the grid. -void ColPartitionGrid::RecomputeBounds(int gridsize, - const ICOORD& bleft, - const ICOORD& tright, - const ICOORD& vertical) { - ColPartition_LIST saved_parts; - ColPartition_IT part_it(&saved_parts); - // Iterate the ColPartitions in the grid to get parts onto a list. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - part_it.add_to_end(part); - } - // Reinitialize grid to the new size. - Init(gridsize, bleft, tright); - // Recompute the bounds of the parts and put them back in the new grid. - for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { - part = part_it.extract(); - part->set_vertical(vertical); - part->ComputeLimits(); - InsertBBox(true, true, part); - } -} - -// Improves the margins of the ColPartitions in the grid by calling -// FindPartitionMargins on each. -// best_columns, which may be nullptr, is an array of pointers indicating the -// column set at each y-coordinate in the grid. -// best_columns is usually the best_columns_ member of ColumnFinder. -void ColPartitionGrid::GridFindMargins(ColPartitionSet** best_columns) { - // Iterate the ColPartitions in the grid. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - // Set up a rectangle search x-bounded by the column and y by the part. - ColPartitionSet* columns = best_columns != nullptr - ? best_columns[gsearch.GridY()] - : nullptr; - FindPartitionMargins(columns, part); - const TBOX& box = part->bounding_box(); - if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) { - tprintf("Computed margins for part:"); - part->Print(); - } - } -} - -// Improves the margins of the ColPartitions in the list by calling -// FindPartitionMargins on each. -// best_columns, which may be nullptr, is an array of pointers indicating the -// column set at each y-coordinate in the grid. -// best_columns is usually the best_columns_ member of ColumnFinder. -void ColPartitionGrid::ListFindMargins(ColPartitionSet** best_columns, - ColPartition_LIST* parts) { - ColPartition_IT part_it(parts); - for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { - ColPartition* part = part_it.data(); - ColPartitionSet* columns = nullptr; - if (best_columns != nullptr) { - const TBOX& part_box = part->bounding_box(); - // Get the columns from the y grid coord. - int grid_x, grid_y; - GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); - columns = best_columns[grid_y]; - } - FindPartitionMargins(columns, part); - } -} - -// Deletes all the partitions in the grid after disowning all the blobs. -void ColPartitionGrid::DeleteParts() { - ColPartition_LIST dead_parts; - ColPartition_IT dead_it(&dead_parts); - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - part->DisownBoxes(); - dead_it.add_to_end(part); // Parts will be deleted on return. - } - Clear(); -} - -// Deletes all the partitions in the grid that are of type BRT_UNKNOWN and -// all the blobs in them. -void ColPartitionGrid::DeleteUnknownParts(TO_BLOCK* block) { - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->blob_type() == BRT_UNKNOWN) { - gsearch.RemoveBBox(); - // Once marked, the blobs will be swept up by DeleteUnownedNoise. - part->set_flow(BTFT_NONTEXT); - part->set_blob_type(BRT_NOISE); - part->SetBlobTypes(); - part->DisownBoxes(); - delete part; - } - } - block->DeleteUnownedNoise(); -} - -// Deletes all the partitions in the grid that are NOT of flow type BTFT_LEADER. -void ColPartitionGrid::DeleteNonLeaderParts() { - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->flow() != BTFT_LEADER) { - gsearch.RemoveBBox(); - if (part->ReleaseNonLeaderBoxes()) { - InsertBBox(true, true, part); - gsearch.RepositionIterator(); - } else { - delete part; - } - } - } -} - -// Finds and marks text partitions that represent figure captions. -void ColPartitionGrid::FindFigureCaptions() { - // For each image region find its best candidate text caption region, - // if any and mark it as such. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->IsImageType()) { - const TBOX& part_box = part->bounding_box(); - bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(), - part_box.bottom()); - ColPartition* best_caption = nullptr; - int best_dist = 0; // Distance to best_caption. - int best_upper = 0; // Direction of best_caption. - // Handle both lower and upper directions. - for (int upper = 0; upper < 2; ++upper) { - ColPartition_C_IT partner_it(upper ? part->upper_partners() - : part->lower_partners()); - // If there are no image partners, then this direction is ok. - for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); - partner_it.forward()) { - ColPartition* partner = partner_it.data(); - if (partner->IsImageType()) { - break; - } - } - if (!partner_it.cycled_list()) continue; - // Find the nearest totally overlapping text partner. - for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); - partner_it.forward()) { - ColPartition* partner = partner_it.data(); - if (!partner->IsTextType() || partner->type() == PT_TABLE) continue; - const TBOX& partner_box = partner->bounding_box(); - if (debug) { - tprintf("Finding figure captions for image part:"); - part_box.print(); - tprintf("Considering partner:"); - partner_box.print(); - } - if (partner_box.left() >= part_box.left() && - partner_box.right() <= part_box.right()) { - int dist = partner_box.y_gap(part_box); - if (best_caption == nullptr || dist < best_dist) { - best_dist = dist; - best_caption = partner; - best_upper = upper; - } - } - } - } - if (best_caption != nullptr) { - if (debug) { - tprintf("Best caption candidate:"); - best_caption->bounding_box().print(); - } - // We have a candidate caption. Qualify it as being separable from - // any body text. We are looking for either a small number of lines - // or a big gap that indicates a separation from the body text. - int line_count = 0; - int biggest_gap = 0; - int smallest_gap = INT16_MAX; - int total_height = 0; - int mean_height = 0; - ColPartition* end_partner = nullptr; - ColPartition* next_partner = nullptr; - for (ColPartition* partner = best_caption; partner != nullptr && - line_count <= kMaxCaptionLines; - partner = next_partner) { - if (!partner->IsTextType()) { - end_partner = partner; - break; - } - ++line_count; - total_height += partner->bounding_box().height(); - next_partner = partner->SingletonPartner(best_upper); - if (next_partner != nullptr) { - int gap = partner->bounding_box().y_gap( - next_partner->bounding_box()); - if (gap > biggest_gap) { - biggest_gap = gap; - end_partner = next_partner; - mean_height = total_height / line_count; - } else if (gap < smallest_gap) { - smallest_gap = gap; - } - // If the gap looks big compared to the text size and the smallest - // gap seen so far, then we can stop. - if (biggest_gap > mean_height * kMinCaptionGapHeightRatio && - biggest_gap > smallest_gap * kMinCaptionGapRatio) - break; - } - } - if (debug) { - tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n", - line_count, biggest_gap, smallest_gap, mean_height); - if (end_partner != nullptr) { - tprintf("End partner:"); - end_partner->bounding_box().print(); - } - } - if (next_partner == nullptr && line_count <= kMaxCaptionLines) - end_partner = nullptr; // No gap, but line count is small. - if (line_count <= kMaxCaptionLines) { - // This is a qualified caption. Mark the text as caption. - for (ColPartition* partner = best_caption; partner != nullptr && - partner != end_partner; - partner = next_partner) { - partner->set_type(PT_CAPTION_TEXT); - partner->SetBlobTypes(); - if (debug) { - tprintf("Set caption type for partition:"); - partner->bounding_box().print(); - } - next_partner = partner->SingletonPartner(best_upper); - } - } - } - } - } -} - -//////// Functions that manipulate ColPartitions in the part_grid_ ///// -//////// to find chains of partner partitions of the same type. /////// - -// For every ColPartition in the grid, finds its upper and lower neighbours. -void ColPartitionGrid::FindPartitionPartners() { - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->IsVerticalType()) { - FindVPartitionPartners(true, part); - FindVPartitionPartners(false, part); - } else { - FindPartitionPartners(true, part); - FindPartitionPartners(false, part); - } - } -} - -// Finds the best partner in the given direction for the given partition. -// Stores the result with AddPartner. -void ColPartitionGrid::FindPartitionPartners(bool upper, ColPartition* part) { - if (part->type() == PT_NOISE) - return; // Noise is not allowed to partner anything. - const TBOX& box = part->bounding_box(); - int top = part->median_top(); - int bottom = part->median_bottom(); - int height = top - bottom; - int mid_y = (bottom + top) / 2; - ColPartitionGridSearch vsearch(this); - // Search down for neighbour below - vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY()); - ColPartition* neighbour; - ColPartition* best_neighbour = nullptr; - int best_dist = INT32_MAX; - while ((neighbour = vsearch.NextVerticalSearch(!upper)) != nullptr) { - if (neighbour == part || neighbour->type() == PT_NOISE) - continue; // Noise is not allowed to partner anything. - int neighbour_bottom = neighbour->median_bottom(); - int neighbour_top = neighbour->median_top(); - int neighbour_y = (neighbour_bottom + neighbour_top) / 2; - if (upper != (neighbour_y > mid_y)) - continue; - if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour)) - continue; - if (!part->TypesMatch(*neighbour)) { - if (best_neighbour == nullptr) - best_neighbour = neighbour; - continue; - } - int dist = upper ? neighbour_bottom - top : bottom - neighbour_top; - if (dist <= kMaxPartitionSpacing * height) { - if (dist < best_dist) { - best_dist = dist; - best_neighbour = neighbour; - } - } else { - break; - } - } - if (best_neighbour != nullptr) - part->AddPartner(upper, best_neighbour); -} - -// Finds the best partner in the given direction for the given partition. -// Stores the result with AddPartner. -void ColPartitionGrid::FindVPartitionPartners(bool to_the_left, - ColPartition* part) { - if (part->type() == PT_NOISE) - return; // Noise is not allowed to partner anything. - const TBOX& box = part->bounding_box(); - int left = part->median_left(); - int right = part->median_right(); - int width = right >= left ? right - left : -1; - int mid_x = (left + right) / 2; - ColPartitionGridSearch hsearch(this); - // Search left for neighbour to_the_left - hsearch.StartSideSearch(mid_x, box.bottom(), box.top()); - ColPartition* neighbour; - ColPartition* best_neighbour = nullptr; - int best_dist = INT32_MAX; - while ((neighbour = hsearch.NextSideSearch(to_the_left)) != nullptr) { - if (neighbour == part || neighbour->type() == PT_NOISE) - continue; // Noise is not allowed to partner anything. - int neighbour_left = neighbour->median_left(); - int neighbour_right = neighbour->median_right(); - int neighbour_x = (neighbour_left + neighbour_right) / 2; - if (to_the_left != (neighbour_x < mid_x)) - continue; - if (!part->VOverlaps(*neighbour)) - continue; - if (!part->TypesMatch(*neighbour)) - continue; // Only match to other vertical text. - int dist = to_the_left ? left - neighbour_right : neighbour_left - right; - if (dist <= kMaxPartitionSpacing * width) { - if (dist < best_dist || best_neighbour == nullptr) { - best_dist = dist; - best_neighbour = neighbour; - } - } else { - break; - } - } - // For vertical partitions, the upper partner is to the left, and lower is - // to the right. - if (best_neighbour != nullptr) - part->AddPartner(to_the_left, best_neighbour); -} - -// For every ColPartition with multiple partners in the grid, reduces the -// number of partners to 0 or 1. If get_desperate is true, goes to more -// desperate merge methods to merge flowing text before breaking partnerships. -void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) { - ColPartitionGridSearch gsearch(this); - // Refine in type order so that chasing multiple partners can be done - // before eliminating type mis-matching partners. - for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) { - // Iterate the ColPartitions in the grid. - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - part->RefinePartners(static_cast(type), - get_desperate, this); - // Iterator may have been messed up by a merge. - gsearch.RepositionIterator(); - } - } -} - - -// ========================== PRIVATE CODE ======================== - -// Finds and returns a list of candidate ColPartitions to merge with part. -// The candidates must overlap search_box, and when merged must not -// overlap any other partitions that are not overlapped by each individually. -void ColPartitionGrid::FindMergeCandidates(const ColPartition* part, - const TBOX& search_box, bool debug, - ColPartition_CLIST* candidates) { - int ok_overlap = - static_cast(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); - const TBOX& part_box = part->bounding_box(); - // Now run the rect search. - ColPartitionGridSearch rsearch(this); - rsearch.SetUniqueMode(true); - rsearch.StartRectSearch(search_box); - ColPartition* candidate; - while ((candidate = rsearch.NextRectSearch()) != nullptr) { - if (!OKMergeCandidate(part, candidate, debug)) - continue; - const TBOX& c_box = candidate->bounding_box(); - // Candidate seems to be a potential merge with part. If one contains - // the other, then the merge is a no-brainer. Otherwise, search the - // combined box to see if anything else is inappropriately overlapped. - if (!part_box.contains(c_box) && !c_box.contains(part_box)) { - // Search the combined rectangle to see if anything new is overlapped. - // This is a preliminary test designed to quickly weed-out poor - // merge candidates that would create a big list of overlapped objects - // for the squared-order overlap analysis. Eg. vertical and horizontal - // line-like objects that overlap real text when merged: - // || ========================== - // || - // || r e a l t e x t - // || - // || - TBOX merged_box(part_box); - merged_box += c_box; - ColPartitionGridSearch msearch(this); - msearch.SetUniqueMode(true); - msearch.StartRectSearch(merged_box); - ColPartition* neighbour; - while ((neighbour = msearch.NextRectSearch()) != nullptr) { - if (neighbour == part || neighbour == candidate) - continue; // Ignore itself. - if (neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, false)) - continue; // This kind of merge overlap is OK. - TBOX n_box = neighbour->bounding_box(); - // The overlap is OK if: - // * the n_box already overlapped the part or the candidate OR - // * the n_box is a suitable merge with either part or candidate - if (!n_box.overlap(part_box) && !n_box.overlap(c_box) && - !OKMergeCandidate(part, neighbour, false) && - !OKMergeCandidate(candidate, neighbour, false)) - break; - } - if (neighbour != nullptr) { - if (debug) { - tprintf("Combined box overlaps another that is not OK despite" - " allowance of %d:", ok_overlap); - neighbour->bounding_box().print(); - tprintf("Reason:"); - OKMergeCandidate(part, neighbour, true); - tprintf("...and:"); - OKMergeCandidate(candidate, neighbour, true); - tprintf("Overlap:"); - neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, true); - } - continue; - } - } - if (debug) { - tprintf("Adding candidate:"); - candidate->bounding_box().print(); - } - // Unique elements as they arrive. - candidates->add_sorted(SortByBoxLeft, true, candidate); - } -} - -// Smoothes the region type/flow type of the given part by looking at local -// neighbours and the given image mask. Searches a padded rectangle with the -// padding truncated on one size of the part's box in turn for each side, -// using the result (if any) that has the least distance to all neighbours -// that contribute to the decision. This biases in favor of rectangular -// regions without completely enforcing them. -// If a good decision cannot be reached, the part is left unchanged. -// im_box and rerotation are used to map blob coordinates onto the -// nontext_map, which is used to prevent the spread of text neighbourhoods -// into images. -// Returns true if the partition was changed. -bool ColPartitionGrid::SmoothRegionType(Pix* nontext_map, - const TBOX& im_box, - const FCOORD& rerotation, - bool debug, - ColPartition* part) { - const TBOX& part_box = part->bounding_box(); - if (debug) { - tprintf("Smooothing part at:"); - part_box.print(); - } - BlobRegionType best_type = BRT_UNKNOWN; - int best_dist = INT32_MAX; - int max_dist = std::min(part_box.width(), part_box.height()); - max_dist = std::max(max_dist * kMaxNeighbourDistFactor, gridsize() * 2); - // Search with the pad truncated on each side of the box in turn. - bool any_image = false; - bool all_image = true; - for (int d = 0; d < BND_COUNT; ++d) { - int dist; - BlobNeighbourDir dir = static_cast(d); - BlobRegionType type = SmoothInOneDirection(dir, nontext_map, im_box, - rerotation, debug, *part, - &dist); - if (debug) { - tprintf("Result in dir %d = %d at dist %d\n", dir, type, dist); - } - if (type != BRT_UNKNOWN && dist < best_dist) { - best_dist = dist; - best_type = type; - } - if (type == BRT_POLYIMAGE) - any_image = true; - else - all_image = false; - } - if (best_dist > max_dist) - return false; // Too far away to set the type with it. - if (part->flow() == BTFT_STRONG_CHAIN && !all_image) { - return false; // We are not modifying it. - } - BlobRegionType new_type = part->blob_type(); - BlobTextFlowType new_flow = part->flow(); - if (best_type == BRT_TEXT && !any_image) { - new_flow = BTFT_STRONG_CHAIN; - new_type = BRT_TEXT; - } else if (best_type == BRT_VERT_TEXT && !any_image) { - new_flow = BTFT_STRONG_CHAIN; - new_type = BRT_VERT_TEXT; - } else if (best_type == BRT_POLYIMAGE) { - new_flow = BTFT_NONTEXT; - new_type = BRT_UNKNOWN; - } - if (new_type != part->blob_type() || new_flow != part->flow()) { - part->set_flow(new_flow); - part->set_blob_type(new_type); - part->SetBlobTypes(); - if (debug) { - tprintf("Modified part:"); - part->Print(); - } - return true; - } else { - return false; - } -} - -// Sets up a search box based on the part_box, padded in all directions -// except direction. Also setup dist_scaling to weight x,y distances according -// to the given direction. -static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction, - const TBOX& part_box, - int min_padding, - TBOX* search_box, - ICOORD* dist_scaling) { - *search_box = part_box; - // Generate a pad value based on the min dimension of part_box, but at least - // min_padding and then scaled by kMaxPadFactor. - int padding = std::min(part_box.height(), part_box.width()); - padding = std::max(padding, min_padding); - padding *= kMaxPadFactor; - search_box->pad(padding, padding); - // Truncate the box in the appropriate direction and make the distance - // metric slightly biased in the truncated direction. - switch (direction) { - case BND_LEFT: - search_box->set_left(part_box.left()); - *dist_scaling = ICOORD(2, 1); - break; - case BND_BELOW: - search_box->set_bottom(part_box.bottom()); - *dist_scaling = ICOORD(1, 2); - break; - case BND_RIGHT: - search_box->set_right(part_box.right()); - *dist_scaling = ICOORD(2, 1); - break; - case BND_ABOVE: - search_box->set_top(part_box.top()); - *dist_scaling = ICOORD(1, 2); - break; - default: - ASSERT_HOST(false); - } -} - -// Local enum used by SmoothInOneDirection and AccumulatePartDistances -// for the different types of partition neighbour. -enum NeighbourPartitionType { - NPT_HTEXT, // Definite horizontal text. - NPT_VTEXT, // Definite vertical text. - NPT_WEAK_HTEXT, // Weakly horizontal text. Counts as HTEXT for HTEXT, but - // image for image and VTEXT. - NPT_WEAK_VTEXT, // Weakly vertical text. Counts as VTEXT for VTEXT, but - // image for image and HTEXT. - NPT_IMAGE, // Defininte non-text. - NPT_COUNT // Number of array elements. -}; - -// Executes the search for SmoothRegionType in a single direction. -// Creates a bounding box that is padded in all directions except direction, -// and searches it for other partitions. Finds the nearest collection of -// partitions that makes a decisive result (if any) and returns the type -// and the distance of the collection. If there are any pixels in the -// nontext_map, then the decision is biased towards image. -BlobRegionType ColPartitionGrid::SmoothInOneDirection( - BlobNeighbourDir direction, Pix* nontext_map, - const TBOX& im_box, const FCOORD& rerotation, - bool debug, const ColPartition& part, int* best_distance) { - // Set up a rectangle search bounded by the part. - const TBOX& part_box = part.bounding_box(); - TBOX search_box; - ICOORD dist_scaling; - ComputeSearchBoxAndScaling(direction, part_box, gridsize(), - &search_box, &dist_scaling); - bool image_region = ImageFind::CountPixelsInRotatedBox(search_box, im_box, - rerotation, - nontext_map) > 0; - GenericVector dists[NPT_COUNT]; - AccumulatePartDistances(part, dist_scaling, search_box, - nontext_map, im_box, rerotation, debug, dists); - // By iteratively including the next smallest distance across the vectors, - // (as in a merge sort) we can use the vector indices as counts of each type - // and find the nearest set of objects that give us a definite decision. - int counts[NPT_COUNT]; - memset(counts, 0, sizeof(counts[0]) * NPT_COUNT); - // If there is image in the search box, tip the balance in image's favor. - int image_bias = image_region ? kSmoothDecisionMargin / 2 : 0; - BlobRegionType text_dir = part.blob_type(); - BlobTextFlowType flow_type = part.flow(); - int min_dist = 0; - do { - // Find the minimum new entry across the vectors - min_dist = INT32_MAX; - for (int i = 0; i < NPT_COUNT; ++i) { - if (counts[i] < dists[i].size() && dists[i][counts[i]] < min_dist) - min_dist = dists[i][counts[i]]; - } - // Step all the indices/counts forward to include min_dist. - for (int i = 0; i < NPT_COUNT; ++i) { - while (counts[i] < dists[i].size() && dists[i][counts[i]] <= min_dist) - ++counts[i]; - } - *best_distance = min_dist; - if (debug) { - tprintf("Totals: htext=%d+%d, vtext=%d+%d, image=%d+%d, at dist=%d\n", - counts[NPT_HTEXT], counts[NPT_WEAK_HTEXT], - counts[NPT_VTEXT], counts[NPT_WEAK_VTEXT], - counts[NPT_IMAGE], image_bias, min_dist); - } - // See if we have a decision yet. - int image_count = counts[NPT_IMAGE]; - int htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] - - (image_count + counts[NPT_WEAK_VTEXT]); - int vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] - - (image_count + counts[NPT_WEAK_HTEXT]); - if (image_count > 0 && - image_bias - htext_score >= kSmoothDecisionMargin && - image_bias - vtext_score >= kSmoothDecisionMargin) { - *best_distance = dists[NPT_IMAGE][0]; - if (!dists[NPT_WEAK_VTEXT].empty() && - *best_distance > dists[NPT_WEAK_VTEXT][0]) - *best_distance = dists[NPT_WEAK_VTEXT][0]; - if (!dists[NPT_WEAK_HTEXT].empty() && - *best_distance > dists[NPT_WEAK_HTEXT][0]) - *best_distance = dists[NPT_WEAK_HTEXT][0]; - return BRT_POLYIMAGE; - } - if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) && - counts[NPT_HTEXT] > 0 && htext_score >= kSmoothDecisionMargin) { - *best_distance = dists[NPT_HTEXT][0]; - return BRT_TEXT; - } else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) && - counts[NPT_VTEXT] > 0 && vtext_score >= kSmoothDecisionMargin) { - *best_distance = dists[NPT_VTEXT][0]; - return BRT_VERT_TEXT; - } - } while (min_dist < INT32_MAX); - return BRT_UNKNOWN; -} - -// Counts the partitions in the given search_box by appending the gap -// distance (scaled by dist_scaling) of the part from the base_part to the -// vector of the appropriate type for the partition. Prior to return, the -// vectors in the dists array are sorted in increasing order. -// The nontext_map (+im_box, rerotation) is used to make text invisible if -// there is non-text in between. -// dists must be an array of GenericVectors of size NPT_COUNT. -void ColPartitionGrid::AccumulatePartDistances(const ColPartition& base_part, - const ICOORD& dist_scaling, - const TBOX& search_box, - Pix* nontext_map, - const TBOX& im_box, - const FCOORD& rerotation, - bool debug, - GenericVector* dists) { - const TBOX& part_box = base_part.bounding_box(); - ColPartitionGridSearch rsearch(this); - rsearch.SetUniqueMode(true); - rsearch.StartRectSearch(search_box); - ColPartition* neighbour; - // Search for compatible neighbours with a similar strokewidth, but not - // on the other side of a tab vector. - while ((neighbour = rsearch.NextRectSearch()) != nullptr) { - if (neighbour->IsUnMergeableType() || - !base_part.ConfirmNoTabViolation(*neighbour) || - neighbour == &base_part) - continue; - TBOX nbox = neighbour->bounding_box(); - BlobRegionType n_type = neighbour->blob_type(); - if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) && - !ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation, - nontext_map)) - continue; // Text not visible the other side of image. - if (BLOBNBOX::IsLineType(n_type)) - continue; // Don't use horizontal lines as neighbours. - int x_gap = std::max(part_box.x_gap(nbox), 0); - int y_gap = std::max(part_box.y_gap(nbox), 0); - int n_dist = x_gap * dist_scaling.x() + y_gap* dist_scaling.y(); - if (debug) { - tprintf("Part has x-gap=%d, y=%d, dist=%d at:", - x_gap, y_gap, n_dist); - nbox.print(); - } - // Truncate the number of boxes, so text doesn't get too much advantage. - int n_boxes = std::min(neighbour->boxes_count(), kSmoothDecisionMargin); - BlobTextFlowType n_flow = neighbour->flow(); - GenericVector* count_vector = nullptr; - if (n_flow == BTFT_STRONG_CHAIN) { - if (n_type == BRT_TEXT) - count_vector = &dists[NPT_HTEXT]; - else - count_vector = &dists[NPT_VTEXT]; - if (debug) { - tprintf("%s %d\n", n_type == BRT_TEXT ? "Htext" : "Vtext", n_boxes); - } - } else if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) && - (n_flow == BTFT_CHAIN || n_flow == BTFT_NEIGHBOURS)) { - // Medium text counts as weak, and all else counts as image. - if (n_type == BRT_TEXT) - count_vector = &dists[NPT_WEAK_HTEXT]; - else - count_vector = &dists[NPT_WEAK_VTEXT]; - if (debug) tprintf("Weak %d\n", n_boxes); - } else { - count_vector = &dists[NPT_IMAGE]; - if (debug) tprintf("Image %d\n", n_boxes); - } - if (count_vector != nullptr) { - for (int i = 0; i < n_boxes; ++i) - count_vector->push_back(n_dist); - } - if (debug) { - neighbour->Print(); - } - } - for (int i = 0; i < NPT_COUNT; ++i) - dists[i].sort(); -} - -// Improves the margins of the part ColPartition by searching for -// neighbours that vertically overlap significantly. -// columns may be nullptr, and indicates the assigned column structure this -// is applicable to part. -void ColPartitionGrid::FindPartitionMargins(ColPartitionSet* columns, - ColPartition* part) { - // Set up a rectangle search x-bounded by the column and y by the part. - TBOX box = part->bounding_box(); - int y = part->MidY(); - // Initial left margin is based on the column, if there is one. - int left_margin = bleft().x(); - int right_margin = tright().x(); - if (columns != nullptr) { - ColPartition* column = columns->ColumnContaining(box.left(), y); - if (column != nullptr) - left_margin = column->LeftAtY(y); - column = columns->ColumnContaining(box.right(), y); - if (column != nullptr) - right_margin = column->RightAtY(y); - } - left_margin -= kColumnWidthFactor; - right_margin += kColumnWidthFactor; - // Search for ColPartitions that reduce the margin. - left_margin = FindMargin(box.left() + box.height(), true, left_margin, - box.bottom(), box.top(), part); - part->set_left_margin(left_margin); - // Search for ColPartitions that reduce the margin. - right_margin = FindMargin(box.right() - box.height(), false, right_margin, - box.bottom(), box.top(), part); - part->set_right_margin(right_margin); -} - -// Starting at x, and going in the specified direction, up to x_limit, finds -// the margin for the given y range by searching sideways, -// and ignoring not_this. -int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit, - int y_bottom, int y_top, - const ColPartition* not_this) { - int height = y_top - y_bottom; - // Iterate the ColPartitions in the grid. - ColPartitionGridSearch side_search(this); - side_search.SetUniqueMode(true); - side_search.StartSideSearch(x, y_bottom, y_top); - ColPartition* part; - while ((part = side_search.NextSideSearch(right_to_left)) != nullptr) { - // Ignore itself. - if (part == not_this) // || part->IsLineType()) - continue; - // Must overlap by enough, based on the min of the heights, so - // large partitions can't smash through small ones. - TBOX box = part->bounding_box(); - int min_overlap = std::min(height, static_cast(box.height())); - min_overlap = static_cast(min_overlap * kMarginOverlapFraction + 0.5); - int y_overlap = std::min(y_top, static_cast(box.top())) - std::max(y_bottom, static_cast(box.bottom())); - if (y_overlap < min_overlap) - continue; - // Must be going the right way. - int x_edge = right_to_left ? box.right() : box.left(); - if ((x_edge < x) != right_to_left) - continue; - // If we have gone past x_limit, then x_limit will do. - if ((x_edge < x_limit) == right_to_left) - break; - // It reduces x limit, so save the new one. - x_limit = x_edge; - } - return x_limit; -} - - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartitiongrid.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartitiongrid.h deleted file mode 100644 index 5b70702d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartitiongrid.h +++ /dev/null @@ -1,253 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: colpartitiongrid.h -// Description: Class collecting code that acts on a BBGrid of ColPartitions. -// Author: Ray Smith -// Created: Mon Oct 05 08:42:01 PDT 2009 -// -// (C) Copyright 2009, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_COLPARTITIONGRID_H_ -#define TESSERACT_TEXTORD_COLPARTITIONGRID_H_ - -#include "bbgrid.h" -#include "colpartition.h" -#include "colpartitionset.h" - -namespace tesseract { - -class TabFind; - -// ColPartitionGrid is a BBGrid of ColPartition. -// It collects functions that work on the grid. -class ColPartitionGrid : public BBGrid { - public: - ColPartitionGrid() = default; - ColPartitionGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); - - virtual ~ColPartitionGrid() = default; - - // Handles a click event in a display window. - void HandleClick(int x, int y); - - // Merges ColPartitions in the grid that look like they belong in the same - // textline. - // For all partitions in the grid, calls the box_cb permanent callback - // to compute the search box, searches the box, and if a candidate is found, - // calls the confirm_cb to check any more rules. If the confirm_cb returns - // true, then the partitions are merged. - // Both callbacks are deleted before returning. - void Merges(TessResultCallback2* box_cb, - TessResultCallback2* confirm_cb); - - // For the given partition, calls the box_cb permanent callback - // to compute the search box, searches the box, and if a candidate is found, - // calls the confirm_cb to check any more rules. If the confirm_cb returns - // true, then the partitions are merged. - // Returns true if the partition is consumed by one or more merges. - bool MergePart(TessResultCallback2* box_cb, - TessResultCallback2* confirm_cb, - ColPartition* part); - - // Computes and returns the total overlap of all partitions in the grid. - // If overlap_grid is non-null, it is filled with a grid that holds empty - // partitions representing the union of all overlapped partitions. - int ComputeTotalOverlap(ColPartitionGrid** overlap_grid); - - // Finds all the ColPartitions in the grid that overlap with the given - // box and returns them SortByBoxLeft(ed) and uniqued in the given list. - // Any partition equal to not_this (may be nullptr) is excluded. - void FindOverlappingPartitions(const TBOX& box, const ColPartition* not_this, - ColPartition_CLIST* parts); - - // Finds and returns the best candidate ColPartition to merge with part, - // selected from the candidates list, based on the minimum increase in - // pairwise overlap among all the partitions overlapped by the combined box. - // If overlap_increase is not nullptr then it returns the increase in overlap - // that would result from the merge. - // See colpartitiongrid.cpp for a diagram. - ColPartition* BestMergeCandidate( - const ColPartition* part, ColPartition_CLIST* candidates, bool debug, - TessResultCallback2* confirm_cb, - int* overlap_increase); - - // Split partitions where it reduces overlap between their bounding boxes. - // ColPartitions are after all supposed to be a partitioning of the blobs - // AND of the space on the page! - // Blobs that cause overlaps get removed, put in individual partitions - // and added to the big_parts list. They are most likely characters on - // 2 textlines that touch, or something big like a dropcap. - void SplitOverlappingPartitions(ColPartition_LIST* big_parts); - - // Filters partitions of source_type by looking at local neighbours. - // Where a majority of neighbours have a text type, the partitions are - // changed to text, where the neighbours have image type, they are changed - // to image, and partitions that have no definite neighbourhood type are - // left unchanged. - // im_box and rerotation are used to map blob coordinates onto the - // nontext_map, which is used to prevent the spread of text neighbourhoods - // into images. - // Returns true if anything was changed. - bool GridSmoothNeighbours(BlobTextFlowType source_type, Pix* nontext_map, - const TBOX& im_box, const FCOORD& rerotation); - - // Reflects the grid and its colpartitions in the y-axis, assuming that - // all blob boxes have already been done. - void ReflectInYAxis(); - - // Rotates the grid and its colpartitions by the given angle, assuming that - // all blob boxes have already been done. - void Deskew(const FCOORD& deskew); - - // Transforms the grid of partitions to the output blocks, putting each - // partition into a separate block. We don't really care about the order, - // as we just want to get as much text as possible without trying to organize - // it into proper blocks or columns. - void ExtractPartitionsAsBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); - - // Sets the left and right tabs of the partitions in the grid. - void SetTabStops(TabFind* tabgrid); - - // Makes the ColPartSets and puts them in the PartSetVector ready - // for finding column bounds. Returns false if no partitions were found. - // Each ColPartition in the grid is placed in a single ColPartSet based - // on the bottom-left of its bounding box. - bool MakeColPartSets(PartSetVector* part_sets); - - // Makes a single ColPartitionSet consisting of a single ColPartition that - // represents the total horizontal extent of the significant content on the - // page. Used for the single column setting in place of automatic detection. - // Returns nullptr if the page is empty of significant content. - ColPartitionSet* MakeSingleColumnSet(WidthCallback* cb); - - // Mark the BLOBNBOXes in each partition as being owned by that partition. - void ClaimBoxes(); - - // Retypes all the blobs referenced by the partitions in the grid. - // Image blobs are sliced on the grid boundaries to give the tab finder - // a better handle on the edges of the images, and the actual blobs are - // returned in the im_blobs list, as they are not owned by the block. - void ReTypeBlobs(BLOBNBOX_LIST* im_blobs); - - // The boxes within the partitions have changed (by deskew) so recompute - // the bounds of all the partitions and reinsert them into the grid. - void RecomputeBounds(int gridsize, const ICOORD& bleft, - const ICOORD& tright, const ICOORD& vertical); - - // Improves the margins of the ColPartitions in the grid by calling - // FindPartitionMargins on each. - void GridFindMargins(ColPartitionSet** best_columns); - - // Improves the margins of the ColPartitions in the list by calling - // FindPartitionMargins on each. - void ListFindMargins(ColPartitionSet** best_columns, - ColPartition_LIST* parts); - - // Deletes all the partitions in the grid after disowning all the blobs. - void DeleteParts(); - - // Deletes all the partitions in the grid that are of type BRT_UNKNOWN and - // all the blobs in them. - void DeleteUnknownParts(TO_BLOCK* block); - - // Deletes all the partitions in the grid that are NOT of flow type - // BTFT_LEADER. - void DeleteNonLeaderParts(); - - // Finds and marks text partitions that represent figure captions. - void FindFigureCaptions(); - - //////// Functions that manipulate ColPartitions in the grid /////// - //////// to find chains of partner partitions of the same type. /////// - // For every ColPartition in the grid, finds its upper and lower neighbours. - void FindPartitionPartners(); - // Finds the best partner in the given direction for the given partition. - // Stores the result with AddPartner. - void FindPartitionPartners(bool upper, ColPartition* part); - // Finds the best partner in the given direction for the given partition. - // Stores the result with AddPartner. - void FindVPartitionPartners(bool to_the_left, ColPartition* part); - // For every ColPartition with multiple partners in the grid, reduces the - // number of partners to 0 or 1. If get_desperate is true, goes to more - // desperate merge methods to merge flowing text before breaking partnerships. - void RefinePartitionPartners(bool get_desperate); - - private: - // Finds and returns a list of candidate ColPartitions to merge with part. - // The candidates must overlap search_box, and when merged must not - // overlap any other partitions that are not overlapped by each individually. - void FindMergeCandidates(const ColPartition* part, const TBOX& search_box, - bool debug, ColPartition_CLIST* candidates); - - // Smoothes the region type/flow type of the given part by looking at local - // neighbours and the given image mask. Searches a padded rectangle with the - // padding truncated on one size of the part's box in turn for each side, - // using the result (if any) that has the least distance to all neighbours - // that contribute to the decision. This biases in favor of rectangular - // regions without completely enforcing them. - // If a good decision cannot be reached, the part is left unchanged. - // im_box and rerotation are used to map blob coordinates onto the - // nontext_map, which is used to prevent the spread of text neighbourhoods - // into images. - // Returns true if the partition was changed. - bool SmoothRegionType(Pix* nontext_map, - const TBOX& im_box, - const FCOORD& rerotation, - bool debug, - ColPartition* part); - // Executes the search for SmoothRegionType in a single direction. - // Creates a bounding box that is padded in all directions except direction, - // and searches it for other partitions. Finds the nearest collection of - // partitions that makes a decisive result (if any) and returns the type - // and the distance of the collection. If there are any pixels in the - // nontext_map, then the decision is biased towards image. - BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, - Pix* nontext_map, - const TBOX& im_box, - const FCOORD& rerotation, - bool debug, - const ColPartition& part, - int* best_distance); - // Counts the partitions in the given search_box by appending the gap - // distance (scaled by dist_scaling) of the part from the base_part to the - // vector of the appropriate type for the partition. Prior to return, the - // vectors in the dists array are sorted in increasing order. - // dists must be an array of GenericVectors of size NPT_COUNT. - void AccumulatePartDistances(const ColPartition& base_part, - const ICOORD& dist_scaling, - const TBOX& search_box, - Pix* nontext_map, - const TBOX& im_box, - const FCOORD& rerotation, - bool debug, - GenericVector* dists); - - // Improves the margins of the ColPartition by searching for - // neighbours that vertically overlap significantly. - void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part); - - // Starting at x, and going in the specified direction, up to x_limit, finds - // the margin for the given y range by searching sideways, - // and ignoring not_this. - int FindMargin(int x, bool right_to_left, int x_limit, - int y_bottom, int y_top, const ColPartition* not_this); -}; - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_COLPARTITIONGRID_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartitionset.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartitionset.cpp deleted file mode 100644 index 8e13d445..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartitionset.cpp +++ /dev/null @@ -1,666 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: colpartitionset.cpp -// Description: Class to hold a list of ColPartitions of the page that -// correspond roughly to columns. -// Author: Ray Smith -// Created: Thu Aug 14 10:54:01 PDT 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "colpartitionset.h" -#include "workingpartset.h" -#include "tablefind.h" - -namespace tesseract { - -// Minimum width of a column to be interesting as a multiple of resolution. -const double kMinColumnWidth = 2.0 / 3; - -ELISTIZE(ColPartitionSet) - -ColPartitionSet::ColPartitionSet(ColPartition_LIST* partitions) { - ColPartition_IT it(&parts_); - it.add_list_after(partitions); - ComputeCoverage(); -} - -ColPartitionSet::ColPartitionSet(ColPartition* part) { - ColPartition_IT it(&parts_); - it.add_after_then_move(part); - ComputeCoverage(); -} - -// Returns the number of columns of good width. -int ColPartitionSet::GoodColumnCount() const { - int num_good_cols = 0; - // This is a read-only iteration of the list. - ColPartition_IT it(const_cast(&parts_)); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - if (it.data()->good_width()) ++num_good_cols; - } - return num_good_cols; -} - -// Return an element of the parts_ list from its index. -ColPartition* ColPartitionSet::GetColumnByIndex(int index) { - ColPartition_IT it(&parts_); - it.mark_cycle_pt(); - for (int i = 0; i < index && !it.cycled_list(); ++i, it.forward()); - if (it.cycled_list()) - return nullptr; - return it.data(); -} - -// Return the ColPartition that contains the given coords, if any, else nullptr. -ColPartition* ColPartitionSet::ColumnContaining(int x, int y) { - ColPartition_IT it(&parts_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* part = it.data(); - if (part->ColumnContains(x, y)) - return part; - } - return nullptr; -} - -// Extract all the parts from the list, relinquishing ownership. -void ColPartitionSet::RelinquishParts() { - ColPartition_IT it(&parts_); - while (!it.empty()) { - it.extract(); - it.forward(); - } -} - -// Attempt to improve this by adding partitions or expanding partitions. -void ColPartitionSet::ImproveColumnCandidate(WidthCallback* cb, - PartSetVector* src_sets) { - int set_size = src_sets->size(); - // Iterate over the provided column sets, as each one may have something - // to improve this. - for (int i = 0; i < set_size; ++i) { - ColPartitionSet* column_set = src_sets->get(i); - if (column_set == nullptr) - continue; - // Iterate over the parts in this and column_set, adding bigger or - // new parts in column_set to this. - ColPartition_IT part_it(&parts_); - ASSERT_HOST(!part_it.empty()); - int prev_right = INT32_MIN; - part_it.mark_cycle_pt(); - ColPartition_IT col_it(&column_set->parts_); - for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) { - ColPartition* col_part = col_it.data(); - if (col_part->blob_type() < BRT_UNKNOWN) - continue; // Ignore image partitions. - int col_left = col_part->left_key(); - int col_right = col_part->right_key(); - // Sync-up part_it (in this) so it matches the col_part in column_set. - ColPartition* part = part_it.data(); - while (!part_it.at_last() && part->right_key() < col_left) { - prev_right = part->right_key(); - part_it.forward(); - part = part_it.data(); - } - int part_left = part->left_key(); - int part_right = part->right_key(); - if (part_right < col_left || col_right < part_left) { - // There is no overlap so this is a new partition. - AddPartition(col_part->ShallowCopy(), &part_it); - continue; - } - // Check the edges of col_part to see if they can improve part. - bool part_width_ok = cb->Run(part->KeyWidth(part_left, part_right)); - if (col_left < part_left && col_left > prev_right) { - // The left edge of the column is better and it doesn't overlap, - // so we can potentially expand it. - int col_box_left = col_part->BoxLeftKey(); - bool tab_width_ok = cb->Run(part->KeyWidth(col_left, part_right)); - bool box_width_ok = cb->Run(part->KeyWidth(col_box_left, part_right)); - if (tab_width_ok || (!part_width_ok)) { - // The tab is leaving the good column metric at least as good as - // it was before, so use the tab. - part->CopyLeftTab(*col_part, false); - part->SetColumnGoodness(cb); - } else if (col_box_left < part_left && - (box_width_ok || !part_width_ok)) { - // The box is leaving the good column metric at least as good as - // it was before, so use the box. - part->CopyLeftTab(*col_part, true); - part->SetColumnGoodness(cb); - } - part_left = part->left_key(); - } - if (col_right > part_right && - (part_it.at_last() || - part_it.data_relative(1)->left_key() > col_right)) { - // The right edge is better, so we can possibly expand it. - int col_box_right = col_part->BoxRightKey(); - bool tab_width_ok = cb->Run(part->KeyWidth(part_left, col_right)); - bool box_width_ok = cb->Run(part->KeyWidth(part_left, col_box_right)); - if (tab_width_ok || (!part_width_ok)) { - // The tab is leaving the good column metric at least as good as - // it was before, so use the tab. - part->CopyRightTab(*col_part, false); - part->SetColumnGoodness(cb); - } else if (col_box_right > part_right && - (box_width_ok || !part_width_ok)) { - // The box is leaving the good column metric at least as good as - // it was before, so use the box. - part->CopyRightTab(*col_part, true); - part->SetColumnGoodness(cb); - } - } - } - } - ComputeCoverage(); -} - -// If this set is good enough to represent a new partitioning into columns, -// add it to the vector of sets, otherwise delete it. -void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector* column_sets, - WidthCallback* cb) { - bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), - bounding_box_.bottom()); - if (debug) { - tprintf("Considering new column candidate:\n"); - Print(); - } - if (!LegalColumnCandidate()) { - if (debug) { - tprintf("Not a legal column candidate:\n"); - Print(); - } - delete this; - return; - } - for (int i = 0; i < column_sets->size(); ++i) { - ColPartitionSet* columns = column_sets->get(i); - // In ordering the column set candidates, good_coverage_ is king, - // followed by good_column_count_ and then bad_coverage_. - bool better = good_coverage_ > columns->good_coverage_; - if (good_coverage_ == columns->good_coverage_) { - better = good_column_count_ > columns->good_column_count_; - if (good_column_count_ == columns->good_column_count_) { - better = bad_coverage_ > columns->bad_coverage_; - } - } - if (better) { - // The new one is better so add it. - if (debug) - tprintf("Good one\n"); - column_sets->insert(this, i); - return; - } - if (columns->CompatibleColumns(false, this, cb)) { - if (debug) - tprintf("Duplicate\n"); - delete this; - return; // It is not unique. - } - } - if (debug) - tprintf("Added to end\n"); - column_sets->push_back(this); -} - -// Return true if the partitions in other are all compatible with the columns -// in this. -bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other, - WidthCallback* cb) { - if (debug) { - tprintf("CompatibleColumns testing compatibility\n"); - Print(); - other->Print(); - } - if (other->parts_.empty()) { - if (debug) - tprintf("CompatibleColumns true due to empty other\n"); - return true; - } - ColPartition_IT it(&other->parts_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* part = it.data(); - if (part->blob_type() < BRT_UNKNOWN) { - if (debug) { - tprintf("CompatibleColumns ignoring image partition\n"); - part->Print(); - } - continue; // Image partitions are irrelevant to column compatibility. - } - int y = part->MidY(); - int left = part->bounding_box().left(); - int right = part->bounding_box().right(); - ColPartition* left_col = ColumnContaining(left, y); - ColPartition* right_col = ColumnContaining(right, y); - if (right_col == nullptr || left_col == nullptr) { - if (debug) { - tprintf("CompatibleColumns false due to partition edge outside\n"); - part->Print(); - } - return false; // A partition edge lies outside of all columns - } - if (right_col != left_col && cb->Run(right - left)) { - if (debug) { - tprintf("CompatibleColumns false due to good width in multiple cols\n"); - part->Print(); - } - return false; // Partition with a good width must be in a single column. - } - - ColPartition_IT it2= it; - while (!it2.at_last()) { - it2.forward(); - ColPartition* next_part = it2.data(); - if (!BLOBNBOX::IsTextType(next_part->blob_type())) - continue; // Non-text partitions are irrelevant. - int next_left = next_part->bounding_box().left(); - if (next_left == right) { - break; // They share the same edge, so one must be a pull-out. - } - // Search to see if right and next_left fall within a single column. - ColPartition* next_left_col = ColumnContaining(next_left, y); - if (right_col == next_left_col) { - // There is a column break in this column. - // This can be due to a figure caption within a column, a pull-out - // block, or a simple broken textline that remains to be merged: - // all allowed, or a change in column layout: not allowed. - // If both partitions are of good width, then it is likely - // a change in column layout, otherwise probably an allowed situation. - if (part->good_width() && next_part->good_width()) { - if (debug) { - int next_right = next_part->bounding_box().right(); - tprintf("CompatibleColumns false due to 2 parts of good width\n"); - tprintf("part1 %d-%d, part2 %d-%d\n", - left, right, next_left, next_right); - right_col->Print(); - } - return false; - } - } - break; - } - } - if (debug) - tprintf("CompatibleColumns true!\n"); - return true; -} - -// Returns the total width of all blobs in the part_set that do not lie -// within an approved column. Used as a cost measure for using this -// column set over another that might be compatible. -int ColPartitionSet::UnmatchedWidth(ColPartitionSet* part_set) { - int total_width = 0; - ColPartition_IT it(&part_set->parts_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* part = it.data(); - if (!BLOBNBOX::IsTextType(part->blob_type())) { - continue; // Non-text partitions are irrelevant to column compatibility. - } - int y = part->MidY(); - BLOBNBOX_C_IT box_it(part->boxes()); - for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) { - const TBOX& box = it.data()->bounding_box(); - // Assume that the whole blob is outside any column iff its x-middle - // is outside. - int x = (box.left() + box.right()) / 2; - ColPartition* col = ColumnContaining(x, y); - if (col == nullptr) - total_width += box.width(); - } - } - return total_width; -} - -// Return true if this ColPartitionSet makes a legal column candidate by -// having legal individual partitions and non-overlapping adjacent pairs. -bool ColPartitionSet::LegalColumnCandidate() { - ColPartition_IT it(&parts_); - if (it.empty()) - return false; - bool any_text_parts = false; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* part = it.data(); - if (BLOBNBOX::IsTextType(part->blob_type())) { - if (!part->IsLegal()) - return false; // Individual partition is illegal. - any_text_parts = true; - } - if (!it.at_last()) { - ColPartition* next_part = it.data_relative(1); - if (next_part->left_key() < part->right_key()) { - return false; - } - } - } - return any_text_parts; -} - -// Return a copy of this. If good_only will only copy the Good ColPartitions. -ColPartitionSet* ColPartitionSet::Copy(bool good_only) { - ColPartition_LIST copy_parts; - ColPartition_IT src_it(&parts_); - ColPartition_IT dest_it(©_parts); - for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { - ColPartition* part = src_it.data(); - if (BLOBNBOX::IsTextType(part->blob_type()) && - (!good_only || part->good_width() || part->good_column())) - dest_it.add_after_then_move(part->ShallowCopy()); - } - if (dest_it.empty()) - return nullptr; - return new ColPartitionSet(©_parts); -} - -// Return the bounding boxes of columns at the given y-range -void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top, - ColSegment_LIST *segments) { - ColPartition_IT it(&parts_); - ColSegment_IT col_it(segments); - col_it.move_to_last(); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* part = it.data(); - ICOORD bot_left(part->LeftAtY(y_top), y_bottom); - ICOORD top_right(part->RightAtY(y_bottom), y_top); - ColSegment *col_seg = new ColSegment(); - col_seg->InsertBox(TBOX(bot_left, top_right)); - col_it.add_after_then_move(col_seg); - } -} - -// Display the edges of the columns at the given y coords. -void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top, - ScrollView* win) { -#ifndef GRAPHICS_DISABLED - ColPartition_IT it(&parts_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* part = it.data(); - win->Line(part->LeftAtY(y_top), y_top, part->LeftAtY(y_bottom), y_bottom); - win->Line(part->RightAtY(y_top), y_top, part->RightAtY(y_bottom), y_bottom); - } -#endif // GRAPHICS_DISABLED -} - -// Return the ColumnSpanningType that best explains the columns overlapped -// by the given coords(left,right,y), with the given margins. -// Also return the first and last column index touched by the coords and -// the leftmost spanned column. -// Column indices are 2n + 1 for real columns (0 based) and even values -// represent the gaps in between columns, with 0 being left of the leftmost. -// resolution refers to the ppi resolution of the image. -ColumnSpanningType ColPartitionSet::SpanningType(int resolution, - int left, int right, - int height, int y, - int left_margin, - int right_margin, - int* first_col, - int* last_col, - int* first_spanned_col) { - *first_col = -1; - *last_col = -1; - *first_spanned_col = -1; - int margin_columns = 0; - ColPartition_IT it(&parts_); - int col_index = 1; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) { - ColPartition* part = it.data(); - if (part->ColumnContains(left, y) || - (it.at_first() && part->ColumnContains(left + height, y))) { - // In the default case, first_col is set, but columns_spanned remains - // zero, so first_col will get reset in the first column genuinely - // spanned, but we can tell the difference from a noise partition - // that touches no column. - *first_col = col_index; - if (part->ColumnContains(right, y) || - (it.at_last() && part->ColumnContains(right - height, y))) { - // Both within a single column. - *last_col = col_index; - return CST_FLOWING; - } - if (left_margin <= part->LeftAtY(y)) { - // It completely spans this column. - *first_spanned_col = col_index; - margin_columns = 1; - } - } else if (part->ColumnContains(right, y) || - (it.at_last() && part->ColumnContains(right - height, y))) { - if (*first_col < 0) { - // It started in-between. - *first_col = col_index - 1; - } - if (right_margin >= part->RightAtY(y)) { - // It completely spans this column. - if (margin_columns == 0) - *first_spanned_col = col_index; - ++margin_columns; - } - *last_col = col_index; - break; - } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) { - // Neither left nor right are contained within, so it spans this - // column. - if (*first_col < 0) { - // It started in between the previous column and the current column. - *first_col = col_index - 1; - } - if (margin_columns == 0) - *first_spanned_col = col_index; - *last_col = col_index; - } else if (right < part->LeftAtY(y)) { - // We have gone past the end. - *last_col = col_index - 1; - if (*first_col < 0) { - // It must lie completely between columns =>noise. - *first_col = col_index - 1; - } - break; - } - } - if (*first_col < 0) - *first_col = col_index - 1; // The last in-between. - if (*last_col < 0) - *last_col = col_index - 1; // The last in-between. - ASSERT_HOST(*first_col >= 0 && *last_col >= 0); - ASSERT_HOST(*first_col <= *last_col); - if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) { - // Neither end was in a column, and it didn't span any, so it lies - // entirely between columns, therefore noise. - return CST_NOISE; - } else if (margin_columns <= 1) { - // An exception for headings that stick outside of single-column text. - if (margin_columns == 1 && parts_.singleton()) { - return CST_HEADING; - } - // It is a pullout, as left and right were not in the same column, but - // it doesn't go to the edge of its start and end. - return CST_PULLOUT; - } - // Its margins went to the edges of first and last columns => heading. - return CST_HEADING; -} - -// The column_set has changed. Close down all in-progress WorkingPartSets in -// columns that do not match and start new ones for the new columns in this. -// As ColPartitions are turned into BLOCKs, the used ones are put in -// used_parts, as they still need to be referenced in the grid. -void ColPartitionSet::ChangeWorkColumns(const ICOORD& bleft, - const ICOORD& tright, - int resolution, - ColPartition_LIST* used_parts, - WorkingPartSet_LIST* working_set_list) { - // Move the input list to a temporary location so we can delete its elements - // as we add them to the output working_set. - WorkingPartSet_LIST work_src; - WorkingPartSet_IT src_it(&work_src); - src_it.add_list_after(working_set_list); - src_it.move_to_first(); - WorkingPartSet_IT dest_it(working_set_list); - // Completed blocks and to_blocks are accumulated and given to the first new - // one whenever we keep a column, or at the end. - BLOCK_LIST completed_blocks; - TO_BLOCK_LIST to_blocks; - WorkingPartSet* first_new_set = nullptr; - WorkingPartSet* working_set = nullptr; - ColPartition_IT col_it(&parts_); - for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) { - ColPartition* column = col_it.data(); - // Any existing column to the left of column is completed. - while (!src_it.empty() && - ((working_set = src_it.data())->column() == nullptr || - working_set->column()->right_key() <= column->left_key())) { - src_it.extract(); - working_set->ExtractCompletedBlocks(bleft, tright, resolution, - used_parts, &completed_blocks, - &to_blocks); - delete working_set; - src_it.forward(); - } - // Make a new between-column WorkingSet for before the current column. - working_set = new WorkingPartSet(nullptr); - dest_it.add_after_then_move(working_set); - if (first_new_set == nullptr) - first_new_set = working_set; - // A matching column gets to stay, and first_new_set gets all the - // completed_sets. - working_set = src_it.empty() ? nullptr : src_it.data(); - if (working_set != nullptr && - working_set->column()->MatchingColumns(*column)) { - working_set->set_column(column); - dest_it.add_after_then_move(src_it.extract()); - src_it.forward(); - first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks); - first_new_set = nullptr; - } else { - // Just make a new working set for the current column. - working_set = new WorkingPartSet(column); - dest_it.add_after_then_move(working_set); - } - } - // Complete any remaining src working sets. - while (!src_it.empty()) { - working_set = src_it.extract(); - working_set->ExtractCompletedBlocks(bleft, tright, resolution, - used_parts, &completed_blocks, - &to_blocks); - delete working_set; - src_it.forward(); - } - // Make a new between-column WorkingSet for after the last column. - working_set = new WorkingPartSet(nullptr); - dest_it.add_after_then_move(working_set); - if (first_new_set == nullptr) - first_new_set = working_set; - // The first_new_set now gets any accumulated completed_parts/blocks. - first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks); -} - -// Accumulate the widths and gaps into the given variables. -void ColPartitionSet::AccumulateColumnWidthsAndGaps(int* total_width, - int* width_samples, - int* total_gap, - int* gap_samples) { - ColPartition_IT it(&parts_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* part = it.data(); - *total_width += part->ColumnWidth(); - ++*width_samples; - if (!it.at_last()) { - ColPartition* next_part = it.data_relative(1); - int part_left = part->right_key(); - int part_right = next_part->left_key(); - int gap = part->KeyWidth(part_left, part_right); - *total_gap += gap; - ++*gap_samples; - } - } -} - -// Provide debug output for this ColPartitionSet and all the ColPartitions. -void ColPartitionSet::Print() { - ColPartition_IT it(&parts_); - tprintf("Partition set of %d parts, %d good, coverage=%d+%d" - " (%d,%d)->(%d,%d)\n", - it.length(), good_column_count_, good_coverage_, bad_coverage_, - bounding_box_.left(), bounding_box_.bottom(), - bounding_box_.right(), bounding_box_.top()); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* part = it.data(); - part->Print(); - } -} - -// PRIVATE CODE. - -// Add the given partition to the list in the appropriate place. -void ColPartitionSet::AddPartition(ColPartition* new_part, - ColPartition_IT* it) { - AddPartitionCoverageAndBox(*new_part); - int new_right = new_part->right_key(); - if (it->data()->left_key() >= new_right) - it->add_before_stay_put(new_part); - else - it->add_after_stay_put(new_part); -} - -// Compute the coverage and good column count. Coverage is the amount of the -// width of the page (in pixels) that is covered by ColPartitions, which are -// used to provide candidate column layouts. -// Coverage is split into good and bad. Good coverage is provided by -// ColPartitions of a frequent width (according to the callback function -// provided by TabFinder::WidthCB, which accesses stored statistics on the -// widths of ColParititions) and bad coverage is provided by all other -// ColPartitions, even if they have tab vectors at both sides. Thus: -// |-----------------------------------------------------------------| -// | Double width heading | -// |-----------------------------------------------------------------| -// |-------------------------------| |-------------------------------| -// | Common width ColParition | | Common width ColPartition | -// |-------------------------------| |-------------------------------| -// the layout with two common-width columns has better coverage than the -// double width heading, because the coverage is "good," even though less in -// total coverage than the heading, because the heading coverage is "bad." -void ColPartitionSet::ComputeCoverage() { - // Count the number of good columns and sum their width. - ColPartition_IT it(&parts_); - good_column_count_ = 0; - good_coverage_ = 0; - bad_coverage_ = 0; - bounding_box_ = TBOX(); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* part = it.data(); - AddPartitionCoverageAndBox(*part); - } -} - -// Adds the coverage, column count and box for a single partition, -// without adding it to the list. (Helper factored from ComputeCoverage.) -void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition& part) { - bounding_box_ += part.bounding_box(); - int coverage = part.ColumnWidth(); - if (part.good_width()) { - good_coverage_ += coverage; - good_column_count_ += 2; - } else { - if (part.blob_type() < BRT_UNKNOWN) - coverage /= 2; - if (part.good_column()) - ++good_column_count_; - bad_coverage_ += coverage; - } -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartitionset.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartitionset.h deleted file mode 100644 index 51b430ee..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/colpartitionset.h +++ /dev/null @@ -1,172 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: colpartitionset.h -// Description: Class to hold a list of ColPartitions of the page that -// correspond roughly to columns. -// Author: Ray Smith -// Created: Thu Aug 14 10:50:01 PDT 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_COLPARTITIONSET_H_ -#define TESSERACT_TEXTORD_COLPARTITIONSET_H_ - -#include "colpartition.h" // For ColPartition_LIST. -#include "genericvector.h" // For GenericVector. -#include "rect.h" // For TBOX. -#include "tabvector.h" // For BLOBNBOX_CLIST. - -namespace tesseract { - -class WorkingPartSet_LIST; -class ColSegment_LIST; -class ColPartitionSet; -using PartSetVector = GenericVector; - -// ColPartitionSet is a class that holds a list of ColPartitions. -// Its main use is in holding a candidate partitioning of the width of the -// image into columns, where each member ColPartition is a single column. -// ColPartitionSets are used in building the column layout of a page. -class ColPartitionSet : public ELIST_LINK { - public: - ColPartitionSet() = default; - explicit ColPartitionSet(ColPartition_LIST* partitions); - explicit ColPartitionSet(ColPartition* partition); - - ~ColPartitionSet() = default; - - // Simple accessors. - const TBOX& bounding_box() const { - return bounding_box_; - } - bool Empty() const { - return parts_.empty(); - } - int ColumnCount() const { - return parts_.length(); - } - - // Returns the number of columns of good width. - int GoodColumnCount() const; - - // Return an element of the parts_ list from its index. - ColPartition* GetColumnByIndex(int index); - - // Return the ColPartition that contains the given coords, if any, else nullptr. - ColPartition* ColumnContaining(int x, int y); - - // Return the bounding boxes of columns at the given y-range - void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments); - - // Extract all the parts from the list, relinquishing ownership. - void RelinquishParts(); - - // Attempt to improve this by adding partitions or expanding partitions. - void ImproveColumnCandidate(WidthCallback* cb, PartSetVector* src_sets); - - // If this set is good enough to represent a new partitioning into columns, - // add it to the vector of sets, otherwise delete it. - void AddToColumnSetsIfUnique(PartSetVector* column_sets, WidthCallback* cb); - - // Return true if the partitions in other are all compatible with the columns - // in this. - bool CompatibleColumns(bool debug, ColPartitionSet* other, WidthCallback* cb); - - // Returns the total width of all blobs in the part_set that do not lie - // within an approved column. Used as a cost measure for using this - // column set over another that might be compatible. - int UnmatchedWidth(ColPartitionSet* part_set); - - // Return true if this ColPartitionSet makes a legal column candidate by - // having legal individual partitions and non-overlapping adjacent pairs. - bool LegalColumnCandidate(); - - // Return a copy of this. If good_only will only copy the Good ColPartitions. - ColPartitionSet* Copy(bool good_only); - - // Display the edges of the columns at the given y coords. - void DisplayColumnEdges(int y_bottom, int y_top, ScrollView* win); - - // Return the ColumnSpanningType that best explains the columns overlapped - // by the given coords(left,right,y), with the given margins. - // Also return the first and last column index touched by the coords and - // the leftmost spanned column. - // Column indices are 2n + 1 for real columns (0 based) and even values - // represent the gaps in between columns, with 0 being left of the leftmost. - // resolution refers to the ppi resolution of the image. It may be 0 if only - // the first_col and last_col are required. - ColumnSpanningType SpanningType(int resolution, - int left, int right, int height, int y, - int left_margin, int right_margin, - int* first_col, int* last_col, - int* first_spanned_col); - - // The column_set has changed. Close down all in-progress WorkingPartSets in - // columns that do not match and start new ones for the new columns in this. - // As ColPartitions are turned into BLOCKs, the used ones are put in - // used_parts, as they still need to be referenced in the grid. - void ChangeWorkColumns(const ICOORD& bleft, const ICOORD& tright, - int resolution, ColPartition_LIST* used_parts, - WorkingPartSet_LIST* working_set); - - // Accumulate the widths and gaps into the given variables. - void AccumulateColumnWidthsAndGaps(int* total_width, int* width_samples, - int* total_gap, int* gap_samples); - - // Provide debug output for this ColPartitionSet and all the ColPartitions. - void Print(); - - private: - // Add the given partition to the list in the appropriate place. - void AddPartition(ColPartition* new_part, ColPartition_IT* it); - - // Compute the coverage and good column count. Coverage is the amount of the - // width of the page (in pixels) that is covered by ColPartitions, which are - // used to provide candidate column layouts. - // Coverage is split into good and bad. Good coverage is provided by - // ColPartitions of a frequent width (according to the callback function - // provided by TabFinder::WidthCB, which accesses stored statistics on the - // widths of ColParititions) and bad coverage is provided by all other - // ColPartitions, even if they have tab vectors at both sides. Thus: - // |-----------------------------------------------------------------| - // | Double width heading | - // |-----------------------------------------------------------------| - // |-------------------------------| |-------------------------------| - // | Common width ColParition | | Common width ColPartition | - // |-------------------------------| |-------------------------------| - // the layout with two common-width columns has better coverage than the - // double width heading, because the coverage is "good," even though less in - // total coverage than the heading, because the heading coverage is "bad." - void ComputeCoverage(); - - // Adds the coverage, column count and box for a single partition, - // without adding it to the list. (Helper factored from ComputeCoverage.) - void AddPartitionCoverageAndBox(const ColPartition& part); - - // The partitions in this column candidate. - ColPartition_LIST parts_; - // The number of partitions that have a frequent column width. - int good_column_count_; - // Total width of all the good ColPartitions. - int good_coverage_; - // Total width of all the bad ColPartitions. - int bad_coverage_; - // Bounding box of all partitions in the set. - TBOX bounding_box_; -}; - -ELISTIZEH(ColPartitionSet) - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_COLPARTITION_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/devanagari_processing.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/devanagari_processing.cpp deleted file mode 100644 index cc0b17d3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/devanagari_processing.cpp +++ /dev/null @@ -1,500 +0,0 @@ -/********************************************************************** - * File: devanagari_processing.cpp - * Description: Methods to process images containing devanagari symbols, - * prior to classification. - * Author: Shobhit Saxena - * Created: Mon Nov 17 20:26:01 IST 2008 - * - * (C) Copyright 2008, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "allheaders.h" -#include "debugpixa.h" -#include "devanagari_processing.h" -#include "statistc.h" -#include "tordmain.h" - -// Flags controlling the debugging information for shiro-rekha splitting -// strategies. -INT_VAR(devanagari_split_debuglevel, 0, - "Debug level for split shiro-rekha process."); - -BOOL_VAR(devanagari_split_debugimage, 0, - "Whether to create a debug image for split shiro-rekha process."); - -namespace tesseract { - -ShiroRekhaSplitter::ShiroRekhaSplitter() { - orig_pix_ = nullptr; - segmentation_block_list_ = nullptr; - splitted_image_ = nullptr; - global_xheight_ = kUnspecifiedXheight; - perform_close_ = false; - debug_image_ = nullptr; - pageseg_split_strategy_ = NO_SPLIT; - ocr_split_strategy_ = NO_SPLIT; -} - -ShiroRekhaSplitter::~ShiroRekhaSplitter() { - Clear(); -} - -void ShiroRekhaSplitter::Clear() { - pixDestroy(&orig_pix_); - pixDestroy(&splitted_image_); - pageseg_split_strategy_ = NO_SPLIT; - ocr_split_strategy_ = NO_SPLIT; - pixDestroy(&debug_image_); - segmentation_block_list_ = nullptr; - global_xheight_ = kUnspecifiedXheight; - perform_close_ = false; -} - -// On setting the input image, a clone of it is owned by this class. -void ShiroRekhaSplitter::set_orig_pix(Pix* pix) { - if (orig_pix_) { - pixDestroy(&orig_pix_); - } - orig_pix_ = pixClone(pix); -} - -// Top-level method to perform splitting based on current settings. -// Returns true if a split was actually performed. -// split_for_pageseg should be true if the splitting is being done prior to -// page segmentation. This mode uses the flag -// pageseg_devanagari_split_strategy to determine the splitting strategy. -bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa* pixa_debug) { - SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ : - ocr_split_strategy_; - if (split_strategy == NO_SPLIT) { - return false; // Nothing to do. - } - ASSERT_HOST(split_strategy == MINIMAL_SPLIT || - split_strategy == MAXIMAL_SPLIT); - ASSERT_HOST(orig_pix_); - if (devanagari_split_debuglevel > 0) { - tprintf("Splitting shiro-rekha ...\n"); - tprintf("Split strategy = %s\n", - split_strategy == MINIMAL_SPLIT ? "Minimal" : "Maximal"); - tprintf("Initial pageseg available = %s\n", - segmentation_block_list_ ? "yes" : "no"); - } - // Create a copy of original image to store the splitting output. - pixDestroy(&splitted_image_); - splitted_image_ = pixCopy(nullptr, orig_pix_); - - // Initialize debug image if required. - if (devanagari_split_debugimage) { - pixDestroy(&debug_image_); - debug_image_ = pixConvertTo32(orig_pix_); - } - - // Determine all connected components in the input image. A close operation - // may be required prior to this, depending on the current settings. - Pix* pix_for_ccs = pixClone(orig_pix_); - if (perform_close_ && global_xheight_ != kUnspecifiedXheight && - !segmentation_block_list_) { - if (devanagari_split_debuglevel > 0) { - tprintf("Performing a global close operation..\n"); - } - // A global measure is available for xheight, but no local information - // exists. - pixDestroy(&pix_for_ccs); - pix_for_ccs = pixCopy(nullptr, orig_pix_); - PerformClose(pix_for_ccs, global_xheight_); - } - Pixa* ccs; - Boxa* tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8); - boxaDestroy(&tmp_boxa); - pixDestroy(&pix_for_ccs); - - // Iterate over all connected components. Get their bounding boxes and clip - // out the image regions corresponding to these boxes from the original image. - // Conditionally run splitting on each of them. - Boxa* regions_to_clear = boxaCreate(0); - int num_ccs = 0; - if (ccs != nullptr) num_ccs = pixaGetCount(ccs); - for (int i = 0; i < num_ccs; ++i) { - Box* box = ccs->boxa->box[i]; - Pix* word_pix = pixClipRectangle(orig_pix_, box, nullptr); - ASSERT_HOST(word_pix); - int xheight = GetXheightForCC(box); - if (xheight == kUnspecifiedXheight && segmentation_block_list_ && - devanagari_split_debugimage) { - pixRenderBoxArb(debug_image_, box, 1, 255, 0, 0); - } - // If some xheight measure is available, attempt to pre-eliminate small - // blobs from the shiro-rekha process. This is primarily to save the CCs - // corresponding to punctuation marks/small dots etc which are part of - // larger graphemes. - if (xheight == kUnspecifiedXheight || - (box->w > xheight / 3 && box->h > xheight / 2)) { - SplitWordShiroRekha(split_strategy, word_pix, xheight, - box->x, box->y, regions_to_clear); - } else if (devanagari_split_debuglevel > 0) { - tprintf("CC dropped from splitting: %d,%d (%d, %d)\n", - box->x, box->y, box->w, box->h); - } - pixDestroy(&word_pix); - } - // Actually clear the boxes now. - for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) { - Box* box = boxaGetBox(regions_to_clear, i, L_CLONE); - pixClearInRect(splitted_image_, box); - boxDestroy(&box); - } - boxaDestroy(®ions_to_clear); - pixaDestroy(&ccs); - if (devanagari_split_debugimage && pixa_debug != nullptr) { - pixa_debug->AddPix(debug_image_, - split_for_pageseg ? "pageseg_split" : "ocr_split"); - } - return true; -} - -// Method to perform a close operation on the input image. The xheight -// estimate decides the size of sel used. -void ShiroRekhaSplitter::PerformClose(Pix* pix, int xheight_estimate) { - pixCloseBrick(pix, pix, xheight_estimate / 8, xheight_estimate / 3); -} - -// This method resolves the cc bbox to a particular row and returns the row's -// xheight. -int ShiroRekhaSplitter::GetXheightForCC(Box* cc_bbox) { - if (!segmentation_block_list_) { - return global_xheight_; - } - // Compute the box coordinates in Tesseract's coordinate system. - TBOX bbox(cc_bbox->x, - pixGetHeight(orig_pix_) - cc_bbox->y - cc_bbox->h - 1, - cc_bbox->x + cc_bbox->w, - pixGetHeight(orig_pix_) - cc_bbox->y - 1); - // Iterate over all blocks. - BLOCK_IT block_it(segmentation_block_list_); - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { - BLOCK* block = block_it.data(); - // Iterate over all rows in the block. - ROW_IT row_it(block->row_list()); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - ROW* row = row_it.data(); - if (!row->bounding_box().major_overlap(bbox)) { - continue; - } - // Row could be skewed, warped, etc. Use the position of the box to - // determine the baseline position of the row for that x-coordinate. - // Create a square TBOX whose baseline's mid-point lies at this point - // and side is row's xheight. Take the overlap of this box with the input - // box and check if it is a 'major overlap'. If so, this box lies in this - // row. In that case, return the xheight for this row. - float box_middle = 0.5 * (bbox.left() + bbox.right()); - int baseline = static_cast(row->base_line(box_middle) + 0.5); - TBOX test_box(box_middle - row->x_height() / 2, - baseline, - box_middle + row->x_height() / 2, - static_cast(baseline + row->x_height())); - // Compute overlap. If it is is a major overlap, this is the right row. - if (bbox.major_overlap(test_box)) { - return row->x_height(); - } - } - } - // No row found for this bbox. - return kUnspecifiedXheight; -} - -// Returns a list of regions (boxes) which should be cleared in the original -// image so as to perform shiro-rekha splitting. Pix is assumed to carry one -// (or less) word only. Xheight measure could be the global estimate, the row -// estimate, or unspecified. If unspecified, over splitting may occur, since a -// conservative estimate of stroke width along with an associated multiplier -// is used in its place. It is advisable to have a specified xheight when -// splitting for classification/training. -// A vertical projection histogram of all the on-pixels in the input pix is -// computed. The maxima of this histogram is regarded as an approximate location -// of the shiro-rekha. By descending on the maxima's peak on both sides, -// stroke width of shiro-rekha is estimated. -// A horizontal projection histogram is computed for a sub-image of the input -// image, which extends from just below the shiro-rekha down to a certain -// leeway. The leeway depends on the input xheight, if provided, else a -// conservative multiplier on approximate stroke width is used (which may lead -// to over-splitting). -void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, - Pix* pix, - int xheight, - int word_left, - int word_top, - Boxa* regions_to_clear) { - if (split_strategy == NO_SPLIT) { - return; - } - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - // Statistically determine the yextents of the shiro-rekha. - int shirorekha_top, shirorekha_bottom, shirorekha_ylevel; - GetShiroRekhaYExtents(pix, &shirorekha_top, &shirorekha_bottom, - &shirorekha_ylevel); - // Since the shiro rekha is also a stroke, its width is equal to the stroke - // width. - int stroke_width = shirorekha_bottom - shirorekha_top + 1; - - // Some safeguards to protect CCs we do not want to be split. - // These are particularly useful when the word wasn't eliminated earlier - // because xheight information was unavailable. - if (shirorekha_ylevel > height / 2) { - // Shirorekha shouldn't be in the bottom half of the word. - if (devanagari_split_debuglevel > 0) { - tprintf("Skipping splitting CC at (%d, %d): shirorekha in lower half..\n", - word_left, word_top); - } - return; - } - if (stroke_width > height / 3) { - // Even the boldest of fonts shouldn't do this. - if (devanagari_split_debuglevel > 0) { - tprintf("Skipping splitting CC at (%d, %d): stroke width too huge..\n", - word_left, word_top); - } - return; - } - - // Clear the ascender and descender regions of the word. - // Obtain a vertical projection histogram for the resulting image. - Box* box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3, - width, 5 * stroke_width / 3); - Pix* word_in_xheight = pixCopy(nullptr, pix); - pixClearInRect(word_in_xheight, box_to_clear); - // Also clear any pixels which are below shirorekha_bottom + some leeway. - // The leeway is set to xheight if the information is available, else it is a - // multiplier applied to the stroke width. - int leeway_to_keep = stroke_width * 3; - if (xheight != kUnspecifiedXheight) { - // This is because the xheight-region typically includes the shiro-rekha - // inside it, i.e., the top of the xheight range corresponds to the top of - // shiro-rekha. - leeway_to_keep = xheight - stroke_width; - } - box_to_clear->y = shirorekha_bottom + leeway_to_keep; - box_to_clear->h = height - box_to_clear->y; - pixClearInRect(word_in_xheight, box_to_clear); - boxDestroy(&box_to_clear); - - PixelHistogram vert_hist; - vert_hist.ConstructVerticalCountHist(word_in_xheight); - pixDestroy(&word_in_xheight); - - // If the number of black pixel in any column of the image is less than a - // fraction of the stroke width, treat it as noise / a stray mark. Perform - // these changes inside the vert_hist data itself, as that is used later on as - // a bit vector for the final split decision at every column. - for (int i = 0; i < width; ++i) { - if (vert_hist.hist()[i] <= stroke_width / 4) - vert_hist.hist()[i] = 0; - else - vert_hist.hist()[i] = 1; - } - // In order to split the line at any point, we make sure that the width of the - // gap is at least half the stroke width. - int i = 0; - int cur_component_width = 0; - while (i < width) { - if (!vert_hist.hist()[i]) { - int j = 0; - while (i + j < width && !vert_hist.hist()[i+j]) - ++j; - if (j >= stroke_width / 2 && cur_component_width >= stroke_width / 2) { - // Perform a shiro-rekha split. The intervening region lies from i to - // i+j-1. - // A minimal single-pixel split makes the estimation of intra- and - // inter-word spacing easier during page layout analysis, - // whereas a maximal split may be needed for OCR, depending on - // how the engine was trained. - bool minimal_split = (split_strategy == MINIMAL_SPLIT); - int split_width = minimal_split ? 1 : j; - int split_left = minimal_split ? i + (j / 2) - (split_width / 2) : i; - if (!minimal_split || (i != 0 && i + j != width)) { - Box* box_to_clear = - boxCreate(word_left + split_left, - word_top + shirorekha_top - stroke_width / 3, - split_width, - 5 * stroke_width / 3); - if (box_to_clear) { - boxaAddBox(regions_to_clear, box_to_clear, L_CLONE); - // Mark this in the debug image if needed. - if (devanagari_split_debugimage) { - pixRenderBoxArb(debug_image_, box_to_clear, 1, 128, 255, 128); - } - boxDestroy(&box_to_clear); - cur_component_width = 0; - } - } - } - i += j; - } else { - ++i; - ++cur_component_width; - } - } -} - -// Refreshes the words in the segmentation block list by using blobs in the -// input block list. -// The segmentation block list must be set. -void ShiroRekhaSplitter::RefreshSegmentationWithNewBlobs( - C_BLOB_LIST* new_blobs) { - // The segmentation block list must have been specified. - ASSERT_HOST(segmentation_block_list_); - if (devanagari_split_debuglevel > 0) { - tprintf("Before refreshing blobs:\n"); - PrintSegmentationStats(segmentation_block_list_); - tprintf("New Blobs found: %d\n", new_blobs->length()); - } - - C_BLOB_LIST not_found_blobs; - RefreshWordBlobsFromNewBlobs(segmentation_block_list_, - new_blobs, - ((devanagari_split_debugimage && debug_image_) ? - ¬_found_blobs : nullptr)); - - if (devanagari_split_debuglevel > 0) { - tprintf("After refreshing blobs:\n"); - PrintSegmentationStats(segmentation_block_list_); - } - if (devanagari_split_debugimage && debug_image_) { - // Plot out the original blobs for which no match was found in the new - // all_blobs list. - C_BLOB_IT not_found_it(¬_found_blobs); - for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list(); - not_found_it.forward()) { - C_BLOB* not_found = not_found_it.data(); - TBOX not_found_box = not_found->bounding_box(); - Box* box_to_plot = GetBoxForTBOX(not_found_box); - pixRenderBoxArb(debug_image_, box_to_plot, 1, 255, 0, 255); - boxDestroy(&box_to_plot); - } - - // Plot out the blobs unused from all blobs. - C_BLOB_IT all_blobs_it(new_blobs); - for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list(); - all_blobs_it.forward()) { - C_BLOB* a_blob = all_blobs_it.data(); - Box* box_to_plot = GetBoxForTBOX(a_blob->bounding_box()); - pixRenderBoxArb(debug_image_, box_to_plot, 3, 0, 127, 0); - boxDestroy(&box_to_plot); - } - } -} - -// Returns a new box object for the corresponding TBOX, based on the original -// image's coordinate system. -Box* ShiroRekhaSplitter::GetBoxForTBOX(const TBOX& tbox) const { - return boxCreate(tbox.left(), pixGetHeight(orig_pix_) - tbox.top() - 1, - tbox.width(), tbox.height()); -} - -// This method returns the computed mode-height of blobs in the pix. -// It also prunes very small blobs from calculation. -int ShiroRekhaSplitter::GetModeHeight(Pix* pix) { - Boxa* boxa = pixConnComp(pix, nullptr, 8); - STATS heights(0, pixGetHeight(pix)); - heights.clear(); - for (int i = 0; i < boxaGetCount(boxa); ++i) { - Box* box = boxaGetBox(boxa, i, L_CLONE); - if (box->h >= 3 || box->w >= 3) { - heights.add(box->h, 1); - } - boxDestroy(&box); - } - boxaDestroy(&boxa); - return heights.mode(); -} - -// This method returns y-extents of the shiro-rekha computed from the input -// word image. -void ShiroRekhaSplitter::GetShiroRekhaYExtents(Pix* word_pix, - int* shirorekha_top, - int* shirorekha_bottom, - int* shirorekha_ylevel) { - // Compute a histogram from projecting the word on a vertical line. - PixelHistogram hist_horiz; - hist_horiz.ConstructHorizontalCountHist(word_pix); - // Get the ylevel where the top-line exists. This is basically the global - // maxima in the horizontal histogram. - int topline_onpixel_count = 0; - int topline_ylevel = hist_horiz.GetHistogramMaximum(&topline_onpixel_count); - - // Get the upper and lower extents of the shiro rekha. - int thresh = (topline_onpixel_count * 70) / 100; - int ulimit = topline_ylevel; - int llimit = topline_ylevel; - while (ulimit > 0 && hist_horiz.hist()[ulimit] >= thresh) - --ulimit; - while (llimit < pixGetHeight(word_pix) && hist_horiz.hist()[llimit] >= thresh) - ++llimit; - - if (shirorekha_top) *shirorekha_top = ulimit; - if (shirorekha_bottom) *shirorekha_bottom = llimit; - if (shirorekha_ylevel) *shirorekha_ylevel = topline_ylevel; -} - -// This method returns the global-maxima for the histogram. The frequency of -// the global maxima is returned in count, if specified. -int PixelHistogram::GetHistogramMaximum(int* count) const { - int best_value = 0; - for (int i = 0; i < length_; ++i) { - if (hist_[i] > hist_[best_value]) { - best_value = i; - } - } - if (count) { - *count = hist_[best_value]; - } - return best_value; -} - -// Methods to construct histograms from images. -void PixelHistogram::ConstructVerticalCountHist(Pix* pix) { - Clear(); - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - hist_ = new int[width]; - length_ = width; - int wpl = pixGetWpl(pix); - l_uint32 *data = pixGetData(pix); - for (int i = 0; i < width; ++i) - hist_[i] = 0; - for (int i = 0; i < height; ++i) { - l_uint32 *line = data + i * wpl; - for (int j = 0; j < width; ++j) - if (GET_DATA_BIT(line, j)) - ++(hist_[j]); - } -} - -void PixelHistogram::ConstructHorizontalCountHist(Pix* pix) { - Clear(); - Numa* counts = pixCountPixelsByRow(pix, nullptr); - length_ = numaGetCount(counts); - hist_ = new int[length_]; - for (int i = 0; i < length_; ++i) { - l_int32 val = 0; - numaGetIValue(counts, i, &val); - hist_[i] = val; - } - numaDestroy(&counts); -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/devanagari_processing.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/devanagari_processing.h deleted file mode 100644 index cca0bd62..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/devanagari_processing.h +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright 2008 Google Inc. All Rights Reserved. -// Author: shobhitsaxena@google.com (Shobhit Saxena) -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ -#define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ - -#include "allheaders.h" -#include "debugpixa.h" -#include "ocrblock.h" -#include "params.h" - -struct Pix; -struct Box; -struct Boxa; - -extern -INT_VAR_H(devanagari_split_debuglevel, 0, - "Debug level for split shiro-rekha process."); - -extern -BOOL_VAR_H(devanagari_split_debugimage, 0, - "Whether to create a debug image for split shiro-rekha process."); - -class TBOX; - -namespace tesseract { - -class PixelHistogram { - public: - PixelHistogram() { - hist_ = nullptr; - length_ = 0; - } - - ~PixelHistogram() { - Clear(); - } - - void Clear() { - delete[] hist_; - length_ = 0; - } - - int* hist() const { return hist_; } - - int length() const { - return length_; - } - - // Methods to construct histograms from images. These clear any existing data. - void ConstructVerticalCountHist(Pix* pix); - void ConstructHorizontalCountHist(Pix* pix); - - // This method returns the global-maxima for the histogram. The frequency of - // the global maxima is returned in count, if specified. - int GetHistogramMaximum(int* count) const; - - private: - int* hist_; - int length_; -}; - -class ShiroRekhaSplitter { - public: - enum SplitStrategy { - NO_SPLIT = 0, // No splitting is performed for the phase. - MINIMAL_SPLIT, // Blobs are split minimally. - MAXIMAL_SPLIT // Blobs are split maximally. - }; - - ShiroRekhaSplitter(); - virtual ~ShiroRekhaSplitter(); - - // Top-level method to perform splitting based on current settings. - // Returns true if a split was actually performed. - // If split_for_pageseg is true, the pageseg_split_strategy_ is used for - // splitting. If false, the ocr_split_strategy_ is used. - bool Split(bool split_for_pageseg, DebugPixa* pixa_debug); - - // Clears the memory held by this object. - void Clear(); - - // Refreshes the words in the segmentation block list by using blobs in the - // input blob list. - // The segmentation block list must be set. - void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs); - - // Returns true if the split strategies for pageseg and ocr are different. - bool HasDifferentSplitStrategies() const { - return pageseg_split_strategy_ != ocr_split_strategy_; - } - - // This only keeps a copy of the block list pointer. At split call, the list - // object should still be alive. This block list is used as a golden - // segmentation when performing splitting. - void set_segmentation_block_list(BLOCK_LIST* block_list) { - segmentation_block_list_ = block_list; - } - - static const int kUnspecifiedXheight = -1; - - void set_global_xheight(int xheight) { - global_xheight_ = xheight; - } - - void set_perform_close(bool perform) { - perform_close_ = perform; - } - - // Returns the image obtained from shiro-rekha splitting. The returned object - // is owned by this class. Callers may want to clone the returned pix to keep - // it alive beyond the life of ShiroRekhaSplitter object. - Pix* splitted_image() { - return splitted_image_; - } - - // On setting the input image, a clone of it is owned by this class. - void set_orig_pix(Pix* pix); - - // Returns the input image provided to the object. This object is owned by - // this class. Callers may want to clone the returned pix to work with it. - Pix* orig_pix() { - return orig_pix_; - } - - SplitStrategy ocr_split_strategy() const { - return ocr_split_strategy_; - } - - void set_ocr_split_strategy(SplitStrategy strategy) { - ocr_split_strategy_ = strategy; - } - - SplitStrategy pageseg_split_strategy() const { - return pageseg_split_strategy_; - } - - void set_pageseg_split_strategy(SplitStrategy strategy) { - pageseg_split_strategy_ = strategy; - } - - BLOCK_LIST* segmentation_block_list() { - return segmentation_block_list_; - } - - // This method returns the computed mode-height of blobs in the pix. - // It also prunes very small blobs from calculation. Could be used to provide - // a global xheight estimate for images which have the same point-size text. - static int GetModeHeight(Pix* pix); - - private: - // Method to perform a close operation on the input image. The xheight - // estimate decides the size of sel used. - static void PerformClose(Pix* pix, int xheight_estimate); - - // This method resolves the cc bbox to a particular row and returns the row's - // xheight. This uses block_list_ if available, else just returns the - // global_xheight_ estimate currently set in the object. - int GetXheightForCC(Box* cc_bbox); - - // Returns a list of regions (boxes) which should be cleared in the original - // image so as to perform shiro-rekha splitting. Pix is assumed to carry one - // (or less) word only. Xheight measure could be the global estimate, the row - // estimate, or unspecified. If unspecified, over splitting may occur, since a - // conservative estimate of stroke width along with an associated multiplier - // is used in its place. It is advisable to have a specified xheight when - // splitting for classification/training. - void SplitWordShiroRekha(SplitStrategy split_strategy, - Pix* pix, - int xheight, - int word_left, - int word_top, - Boxa* regions_to_clear); - - // Returns a new box object for the corresponding TBOX, based on the original - // image's coordinate system. - Box* GetBoxForTBOX(const TBOX& tbox) const; - - // This method returns y-extents of the shiro-rekha computed from the input - // word image. - static void GetShiroRekhaYExtents(Pix* word_pix, - int* shirorekha_top, - int* shirorekha_bottom, - int* shirorekha_ylevel); - - Pix* orig_pix_; // Just a clone of the input image passed. - Pix* splitted_image_; // Image produced after the last splitting round. The - // object is owned by this class. - SplitStrategy pageseg_split_strategy_; - SplitStrategy ocr_split_strategy_; - Pix* debug_image_; - // This block list is used as a golden segmentation when performing splitting. - BLOCK_LIST* segmentation_block_list_; - int global_xheight_; - bool perform_close_; // Whether a morphological close operation should be - // performed before CCs are run through splitting. -}; - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/drawedg.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/drawedg.cpp deleted file mode 100644 index f677969d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/drawedg.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/********************************************************************** - * File: drawedg.cpp (Formerly drawedge.c) - * Description: Collection of functions to draw things to do with edge - * detection. - * Author: Ray Smith - * Created: Thu Jun 06 13:29:20 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "drawedg.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#ifndef GRAPHICS_DISABLED - -/** title of window */ -#define IMAGE_WIN_NAME "Edges" -#define IMAGE_XPOS 250 -/** default position */ -#define IMAGE_YPOS 0 - -/** - * @name create_edges_window - * - * Create the edges window. - * @param page_tr size of image - */ - -ScrollView* create_edges_window(ICOORD page_tr) { - ScrollView* image_win; //image window - - //create the window - image_win = new ScrollView (IMAGE_WIN_NAME, IMAGE_XPOS, IMAGE_YPOS, 0, 0, page_tr.x (), page_tr.y ()); - return image_win; //window -} - - -/** - * @name draw_raw_edge - * - * Draw the raw steps to the given window in the given colour. - * @param fd window to draw in - * @param start start of loop - * @param colour colour to draw in - */ - -void draw_raw_edge(ScrollView* fd, - CRACKEDGE *start, - ScrollView::Color colour) { - CRACKEDGE *edgept; //current point - - fd->Pen(colour); - edgept = start; - fd->SetCursor(edgept->pos.x (), edgept->pos.y ()); - do { - do - edgept = edgept->next; - //merge straight lines - while (edgept != start && edgept->prev->stepx == edgept->stepx && edgept->prev->stepy == edgept->stepy); - - //draw lines - fd->DrawTo(edgept->pos.x (), edgept->pos.y ()); - } - while (edgept != start); -} - -#endif // GRAPHICS_DISABLED diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/drawedg.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/drawedg.h deleted file mode 100644 index ef5ed5e2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/drawedg.h +++ /dev/null @@ -1,37 +0,0 @@ -/********************************************************************** - * File: drawedg.h (Formerly drawedge.h) - * Description: Collection of functions to draw things to do with edge - *detection. - * Author: Ray Smith - * Created: Thu Jun 06 13:29:20 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef DRAWEDG_H -#define DRAWEDG_H -#ifndef GRAPHICS_DISABLED - -#include "scrollview.h" -#include "crakedge.h" - -ScrollView* create_edges_window( //make window - ICOORD page_tr //size of image - ); -void draw_raw_edge( //draw the cracks - ScrollView* fd, //window to draw in - CRACKEDGE *start, //start of loop - ScrollView::Color colour //colour to draw in - ); -#endif -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/drawtord.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/drawtord.cpp deleted file mode 100644 index 8cf62f43..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/drawtord.cpp +++ /dev/null @@ -1,419 +0,0 @@ -/********************************************************************** - * File: drawtord.cpp (Formerly drawto.c) - * Description: Draw things to do with textord. - * Author: Ray Smith - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "pithsync.h" -#include "topitch.h" -#include "drawtord.h" - -#define TO_WIN_XPOS 0 //default window pos -#define TO_WIN_YPOS 0 -#define TO_WIN_NAME "Textord" - //title of window - -#define EXTERN - -EXTERN BOOL_VAR (textord_show_fixed_cuts, FALSE, -"Draw fixed pitch cell boundaries"); - -EXTERN ScrollView* to_win = nullptr; - -/********************************************************************** - * create_to_win - * - * Create the to window used to show the fit. - **********************************************************************/ -#ifndef GRAPHICS_DISABLED - -ScrollView* create_to_win(ICOORD page_tr) { - if (to_win != nullptr) return to_win; - to_win = new ScrollView(TO_WIN_NAME, TO_WIN_XPOS, TO_WIN_YPOS, - page_tr.x() + 1, page_tr.y() + 1, - page_tr.x(), page_tr.y(), true); - return to_win; -} - - -void close_to_win() { - // to_win is leaked, but this enables the user to view the contents. - if (to_win != nullptr) { - to_win->Update(); - } -} - - -/********************************************************************** - * plot_box_list - * - * Draw a list of blobs. - **********************************************************************/ - -void plot_box_list( //make gradients win - ScrollView* win, //window to draw in - BLOBNBOX_LIST *list, //blob list - ScrollView::Color body_colour //colour to draw - ) { - BLOBNBOX_IT it = list; //iterator - - win->Pen(body_colour); - win->Brush(ScrollView::NONE); - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - it.data ()->bounding_box ().plot (win); - } -} - - -/********************************************************************** - * plot_to_row - * - * Draw the blobs of a row in a given colour and draw the line fit. - **********************************************************************/ - -void plot_to_row( //draw a row - TO_ROW *row, //row to draw - ScrollView::Color colour, //colour to draw in - FCOORD rotation //rotation for line - ) { - FCOORD plot_pt; //point to plot - //blobs - BLOBNBOX_IT it = row->blob_list (); - float left, right; //end of row - - if (it.empty ()) { - tprintf ("No blobs in row at %g\n", row->parallel_c ()); - return; - } - left = it.data ()->bounding_box ().left (); - it.move_to_last (); - right = it.data ()->bounding_box ().right (); - plot_blob_list (to_win, row->blob_list (), colour, ScrollView::BROWN); - to_win->Pen(colour); - plot_pt = FCOORD (left, row->line_m () * left + row->line_c ()); - plot_pt.rotate (rotation); - to_win->SetCursor(plot_pt.x (), plot_pt.y ()); - plot_pt = FCOORD (right, row->line_m () * right + row->line_c ()); - plot_pt.rotate (rotation); - to_win->DrawTo(plot_pt.x (), plot_pt.y ()); -} - - -/********************************************************************** - * plot_parallel_row - * - * Draw the blobs of a row in a given colour and draw the line fit. - **********************************************************************/ - -void plot_parallel_row( //draw a row - TO_ROW *row, //row to draw - float gradient, //gradients of lines - int32_t left, //edge of block - ScrollView::Color colour, //colour to draw in - FCOORD rotation //rotation for line - ) { - FCOORD plot_pt; //point to plot - //blobs - BLOBNBOX_IT it = row->blob_list (); - float fleft = (float) left; //floating version - float right; //end of row - - // left=it.data()->bounding_box().left(); - it.move_to_last (); - right = it.data ()->bounding_box ().right (); - plot_blob_list (to_win, row->blob_list (), colour, ScrollView::BROWN); - to_win->Pen(colour); - plot_pt = FCOORD (fleft, gradient * left + row->max_y ()); - plot_pt.rotate (rotation); - to_win->SetCursor(plot_pt.x (), plot_pt.y ()); - plot_pt = FCOORD (fleft, gradient * left + row->min_y ()); - plot_pt.rotate (rotation); - to_win->DrawTo(plot_pt.x (), plot_pt.y ()); - plot_pt = FCOORD (fleft, gradient * left + row->parallel_c ()); - plot_pt.rotate (rotation); - to_win->SetCursor(plot_pt.x (), plot_pt.y ()); - plot_pt = FCOORD (right, gradient * right + row->parallel_c ()); - plot_pt.rotate (rotation); - to_win->DrawTo(plot_pt.x (), plot_pt.y ()); -} - - -/********************************************************************** - * draw_occupation - * - * Draw the row occupation with points above the threshold in white - * and points below the threshold in black. - **********************************************************************/ - -void -draw_occupation ( //draw projection -int32_t xleft, //edge of block -int32_t ybottom, //bottom of block -int32_t min_y, //coordinate limits -int32_t max_y, int32_t occupation[], //projection counts -int32_t thresholds[] //for drop out -) { - int32_t line_index; //pixel coord - ScrollView::Color colour; //of histogram - float fleft = (float) xleft; //float version - - colour = ScrollView::WHITE; - to_win->Pen(colour); - to_win->SetCursor(fleft, (float) ybottom); - for (line_index = min_y; line_index <= max_y; line_index++) { - if (occupation[line_index - min_y] < thresholds[line_index - min_y]) { - if (colour != ScrollView::BLUE) { - colour = ScrollView::BLUE; - to_win->Pen(colour); - } - } - else { - if (colour != ScrollView::WHITE) { - colour = ScrollView::WHITE; - to_win->Pen(colour); - } - } - to_win->DrawTo(fleft + occupation[line_index - min_y] / 10.0, (float) line_index); - } - colour=ScrollView::STEEL_BLUE; - to_win->Pen(colour); - to_win->SetCursor(fleft, (float) ybottom); - for (line_index = min_y; line_index <= max_y; line_index++) { - to_win->DrawTo(fleft + thresholds[line_index - min_y] / 10.0, (float) line_index); - } -} - - -/********************************************************************** - * draw_meanlines - * - * Draw the meanlines of the given block in the given colour. - **********************************************************************/ - -void draw_meanlines( //draw a block - TO_BLOCK *block, //block to draw - float gradient, //gradients of lines - int32_t left, //edge of block - ScrollView::Color colour, //colour to draw in - FCOORD rotation //rotation for line - ) { - FCOORD plot_pt; //point to plot - //rows - TO_ROW_IT row_it = block->get_rows (); - TO_ROW *row; //current row - BLOBNBOX_IT blob_it; //blobs - float right; //end of row - to_win->Pen(colour); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - blob_it.set_to_list (row->blob_list ()); - blob_it.move_to_last (); - right = blob_it.data ()->bounding_box ().right (); - plot_pt = - FCOORD ((float) left, - gradient * left + row->parallel_c () + row->xheight); - plot_pt.rotate (rotation); - to_win->SetCursor(plot_pt.x (), plot_pt.y ()); - plot_pt = - FCOORD ((float) right, - gradient * right + row->parallel_c () + row->xheight); - plot_pt.rotate (rotation); - to_win->DrawTo (plot_pt.x (), plot_pt.y ()); - } -} - - -/********************************************************************** - * plot_word_decisions - * - * Plot a row with words in different colours and fuzzy spaces - * highlighted. - **********************************************************************/ - -void plot_word_decisions( //draw words - ScrollView* win, //window tro draw in - int16_t pitch, //of block - TO_ROW *row //row to draw - ) { - ScrollView::Color colour = ScrollView::MAGENTA; //current colour - ScrollView::Color rect_colour; //fuzzy colour - int32_t prev_x; //end of prev blob - int16_t blob_count; //blobs in word - BLOBNBOX *blob; //current blob - TBOX blob_box; //bounding box - //iterator - BLOBNBOX_IT blob_it = row->blob_list (); - BLOBNBOX_IT start_it = blob_it;//word start - - rect_colour = ScrollView::BLACK; - prev_x = -INT16_MAX; - blob_count = 0; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - if (!blob->joined_to_prev () - && blob_box.left () - prev_x > row->max_nonspace) { - if ((blob_box.left () - prev_x >= row->min_space - || blob_box.left () - prev_x > row->space_threshold) - && blob_count > 0) { - if (pitch > 0 && textord_show_fixed_cuts) - plot_fp_cells (win, colour, &start_it, pitch, blob_count, - &row->projection, row->projection_left, - row->projection_right, - row->xheight * textord_projection_scale); - blob_count = 0; - start_it = blob_it; - } - if (colour == ScrollView::MAGENTA) - colour = ScrollView::RED; - else - colour = (ScrollView::Color) (colour + 1); - if (blob_box.left () - prev_x < row->min_space) { - if (blob_box.left () - prev_x > row->space_threshold) - rect_colour = ScrollView::GOLDENROD; - else - rect_colour = ScrollView::CORAL; - //fill_color_index(win, rect_colour); - win->Brush(rect_colour); - win->Rectangle (prev_x, blob_box.bottom (), - blob_box.left (), blob_box.top ()); - } - } - if (!blob->joined_to_prev()) - prev_x = blob_box.right(); - if (blob->cblob () != nullptr) - blob->cblob ()->plot (win, colour, colour); - if (!blob->joined_to_prev() && blob->cblob() != nullptr) - blob_count++; - } - if (pitch > 0 && textord_show_fixed_cuts && blob_count > 0) - plot_fp_cells (win, colour, &start_it, pitch, blob_count, - &row->projection, row->projection_left, - row->projection_right, - row->xheight * textord_projection_scale); -} - - -/********************************************************************** - * plot_fp_cells - * - * Make a list of fixed pitch cuts and draw them. - **********************************************************************/ - -void plot_fp_cells( //draw words - ScrollView* win, //window tro draw in - ScrollView::Color colour, //colour of lines - BLOBNBOX_IT *blob_it, //blobs - int16_t pitch, //of block - int16_t blob_count, //no of real blobs - STATS *projection, //vertical - int16_t projection_left, //edges //scale factor - int16_t projection_right, - float projection_scale) { - int16_t occupation; //occupied cells - TBOX word_box; //bounding box - FPSEGPT_LIST seg_list; //list of cuts - FPSEGPT_IT seg_it; - FPSEGPT *segpt; //current point - - if (pitsync_linear_version) - check_pitch_sync2 (blob_it, blob_count, pitch, 2, projection, - projection_left, projection_right, - projection_scale, occupation, &seg_list, 0, 0); - else - check_pitch_sync (blob_it, blob_count, pitch, 2, projection, &seg_list); - word_box = blob_it->data ()->bounding_box (); - for (; blob_count > 0; blob_count--) - word_box += box_next (blob_it); - seg_it.set_to_list (&seg_list); - for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { - segpt = seg_it.data (); - if (segpt->faked) { - colour = ScrollView::WHITE; - win->Pen(colour); } - else { - win->Pen(colour); } - win->Line(segpt->position (), word_box.bottom (),segpt->position (), word_box.top ()); - } -} - - -/********************************************************************** - * plot_fp_cells2 - * - * Make a list of fixed pitch cuts and draw them. - **********************************************************************/ - -void plot_fp_cells2( //draw words - ScrollView* win, //window tro draw in - ScrollView::Color colour, //colour of lines - TO_ROW *row, //for location - FPSEGPT_LIST *seg_list //segments to plot - ) { - TBOX word_box; //bounding box - FPSEGPT_IT seg_it = seg_list; - //blobs in row - BLOBNBOX_IT blob_it = row->blob_list (); - FPSEGPT *segpt; //current point - - word_box = blob_it.data ()->bounding_box (); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();) - word_box += box_next (&blob_it); - for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { - segpt = seg_it.data (); - if (segpt->faked) { - colour = ScrollView::WHITE; - win->Pen(colour); } - else { - win->Pen(colour); } - win->Line(segpt->position (), word_box.bottom (),segpt->position (), word_box.top ()); - } -} - - -/********************************************************************** - * plot_row_cells - * - * Make a list of fixed pitch cuts and draw them. - **********************************************************************/ - -void plot_row_cells( //draw words - ScrollView* win, //window tro draw in - ScrollView::Color colour, //colour of lines - TO_ROW *row, //for location - float xshift, //amount of shift - ICOORDELT_LIST *cells //cells to draw - ) { - TBOX word_box; //bounding box - ICOORDELT_IT cell_it = cells; - //blobs in row - BLOBNBOX_IT blob_it = row->blob_list (); - ICOORDELT *cell; //current cell - - word_box = blob_it.data ()->bounding_box (); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();) - word_box += box_next (&blob_it); - win->Pen(colour); - for (cell_it.mark_cycle_pt (); !cell_it.cycled_list (); cell_it.forward ()) { - cell = cell_it.data (); - win->Line(cell->x () + xshift, word_box.bottom (), cell->x () + xshift, word_box.top ()); - } -} - -#endif // GRAPHICS_DISABLED diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/drawtord.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/drawtord.h deleted file mode 100644 index 5296e5f3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/drawtord.h +++ /dev/null @@ -1,98 +0,0 @@ -/********************************************************************** - * File: drawtord.h (Formerly drawto.h) - * Description: Draw things to do with textord. - * Author: Ray Smith - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef DRAWTORD_H -#define DRAWTORD_H - -#include "params.h" -#include "scrollview.h" -#include "pitsync1.h" -#include "blobbox.h" - -#define NO_SMD "none" - -extern BOOL_VAR_H (textord_show_fixed_cuts, FALSE, -"Draw fixed pitch cell boundaries"); -extern STRING_VAR_H (to_debugfile, DEBUG_WIN_NAME, "Name of debugfile"); -extern STRING_VAR_H (to_smdfile, NO_SMD, "Name of SMD file"); -extern ScrollView* to_win; -extern FILE *to_debug; -// Creates a static display window for textord, and returns a pointer to it. -ScrollView* create_to_win(ICOORD page_tr); -void close_to_win(); // Destroy the textord window. -void create_todebug_win(); //make gradients win -void plot_box_list( //make gradients win - ScrollView* win, //window to draw in - BLOBNBOX_LIST *list, //blob list - ScrollView::Color body_colour //colour to draw - ); -void plot_to_row( //draw a row - TO_ROW *row, //row to draw - ScrollView::Color colour, //colour to draw in - FCOORD rotation //rotation for line - ); -void plot_parallel_row( //draw a row - TO_ROW *row, //row to draw - float gradient, //gradients of lines - int32_t left, //edge of block - ScrollView::Color colour, //colour to draw in - FCOORD rotation //rotation for line - ); -void draw_occupation ( //draw projection -int32_t xleft, //edge of block -int32_t ybottom, //bottom of block -int32_t min_y, //coordinate limits -int32_t max_y, int32_t occupation[], //projection counts -int32_t thresholds[] //for drop out -); -void draw_meanlines( //draw a block - TO_BLOCK *block, //block to draw - float gradient, //gradients of lines - int32_t left, //edge of block - ScrollView::Color colour, //colour to draw in - FCOORD rotation //rotation for line - ); -void plot_word_decisions( //draw words - ScrollView* win, //window tro draw in - int16_t pitch, //of block - TO_ROW *row //row to draw - ); -void plot_fp_cells( //draw words - ScrollView* win, //window tro draw in - ScrollView::Color colour, //colour of lines - BLOBNBOX_IT *blob_it, //blobs - int16_t pitch, //of block - int16_t blob_count, //no of real blobs - STATS *projection, //vertical - int16_t projection_left, //edges //scale factor - int16_t projection_right, - float projection_scale); -void plot_fp_cells2( //draw words - ScrollView* win, //window tro draw in - ScrollView::Color colour, //colour of lines - TO_ROW *row, //for location - FPSEGPT_LIST *seg_list //segments to plot - ); -void plot_row_cells( //draw words - ScrollView* win, //window tro draw in - ScrollView::Color colour, //colour of lines - TO_ROW *row, //for location - float xshift, //amount of shift - ICOORDELT_LIST *cells //cells to draw - ); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/edgblob.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/edgblob.cpp deleted file mode 100644 index 920100a1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/edgblob.cpp +++ /dev/null @@ -1,462 +0,0 @@ -/********************************************************************** - * File: edgblob.cpp (Formerly edgeloop.c) - * Description: Functions to clean up an outline before approximation. - * Author: Ray Smith - * Created: Tue Mar 26 16:56:25 GMT 1991 - * - *(C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0(the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "scanedg.h" -#include "drawedg.h" -#include "edgloop.h" -#include "edgblob.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#define EXTERN - -// Control parameters used in outline_complexity(), which rejects an outline -// if any one of the 3 conditions is satisfied: -// - number of children exceeds edges_max_children_per_outline -// - number of nested layers exceeds edges_max_children_layers -// - joint complexity exceeds edges_children_count_limit(as in child_count()) -EXTERN BOOL_VAR(edges_use_new_outline_complexity, FALSE, - "Use the new outline complexity module"); -EXTERN INT_VAR(edges_max_children_per_outline, 10, - "Max number of children inside a character outline"); -EXTERN INT_VAR(edges_max_children_layers, 5, - "Max layers of nested children inside a character outline"); -EXTERN BOOL_VAR(edges_debug, FALSE, - "turn on debugging for this module"); - - -EXTERN INT_VAR(edges_children_per_grandchild, 10, - "Importance ratio for chucking outlines"); -EXTERN INT_VAR(edges_children_count_limit, 45, - "Max holes allowed in blob"); -EXTERN BOOL_VAR(edges_children_fix, FALSE, - "Remove boxy parents of char-like children"); -EXTERN INT_VAR(edges_min_nonhole, 12, - "Min pixels for potential char in box"); -EXTERN INT_VAR(edges_patharea_ratio, 40, - "Max lensq/area for acceptable child outline"); -EXTERN double_VAR(edges_childarea, 0.5, - "Min area fraction of child outline"); -EXTERN double_VAR(edges_boxarea, 0.875, - "Min area fraction of grandchild for box"); - -/** - * @name OL_BUCKETS::OL_BUCKETS - * - * Construct an array of buckets for associating outlines into blobs. - */ - -OL_BUCKETS::OL_BUCKETS( -ICOORD bleft, // corners -ICOORD tright): bl(bleft), tr(tright) { - bxdim =(tright.x() - bleft.x()) / BUCKETSIZE + 1; - bydim =(tright.y() - bleft.y()) / BUCKETSIZE + 1; - // make array - buckets.reset(new C_OUTLINE_LIST[bxdim * bydim]); - index = 0; -} - - -/** - * @name OL_BUCKETS::operator( - * - * Return a pointer to a list of C_OUTLINEs corresponding to the - * given pixel coordinates. - */ - -C_OUTLINE_LIST * -OL_BUCKETS::operator()( // array access -int16_t x, // image coords -int16_t y) { - return &buckets[(y-bl.y()) / BUCKETSIZE * bxdim + (x-bl.x()) / BUCKETSIZE]; -} - - -/** - * @name OL_BUCKETS::outline_complexity - * - * This is the new version of count_child. - * - * The goal of this function is to determine if an outline and its - * interiors could be part of a character blob. This is done by - * computing a "complexity" index for the outline, which is the return - * value of this function, and checking it against a threshold. - * The max_count is used for short-circuiting the recursion and forcing - * a rejection that guarantees to fail the threshold test. - * The complexity F for outline X with N children X[i] is - * F(X) = N + sum_i F(X[i]) * edges_children_per_grandchild - * so each layer of nesting increases complexity exponentially. - * An outline can be rejected as a text blob candidate if its complexity - * is too high, has too many children(likely a container), or has too - * many layers of nested inner loops. This has the side-effect of - * flattening out boxed or reversed video text regions. - */ - -int32_t OL_BUCKETS::outline_complexity( - C_OUTLINE *outline, // parent outline - int32_t max_count, // max output - int16_t depth // recurion depth - ) { - int16_t xmin, xmax; // coord limits - int16_t ymin, ymax; - int16_t xindex, yindex; // current bucket - C_OUTLINE *child; // current child - int32_t child_count; // no of children - int32_t grandchild_count; // no of grandchildren - C_OUTLINE_IT child_it; // search iterator - - TBOX olbox = outline->bounding_box(); - xmin =(olbox.left() - bl.x()) / BUCKETSIZE; - xmax =(olbox.right() - bl.x()) / BUCKETSIZE; - ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE; - ymax =(olbox.top() - bl.y()) / BUCKETSIZE; - child_count = 0; - grandchild_count = 0; - if (++depth > edges_max_children_layers) // nested loops are too deep - return max_count + depth; - - for (yindex = ymin; yindex <= ymax; yindex++) { - for (xindex = xmin; xindex <= xmax; xindex++) { - child_it.set_to_list(&buckets[yindex * bxdim + xindex]); - if (child_it.empty()) - continue; - for (child_it.mark_cycle_pt(); !child_it.cycled_list(); - child_it.forward()) { - child = child_it.data(); - if (child == outline || !(*child < *outline)) - continue; - child_count++; - - if (child_count > edges_max_children_per_outline) { // too fragmented - if (edges_debug) - tprintf("Discard outline on child_count=%d > " - "max_children_per_outline=%d\n", - child_count, - static_cast(edges_max_children_per_outline)); - return max_count + child_count; - } - - // Compute the "complexity" of each child recursively - int32_t remaining_count = max_count - child_count - grandchild_count; - if (remaining_count > 0) - grandchild_count += edges_children_per_grandchild * - outline_complexity(child, remaining_count, depth); - if (child_count + grandchild_count > max_count) { // too complex - if (edges_debug) - tprintf("Disgard outline on child_count=%d + grandchild_count=%d " - "> max_count=%d\n", - child_count, grandchild_count, max_count); - return child_count + grandchild_count; - } - } - } - } - return child_count + grandchild_count; -} - - -/** - * @name OL_BUCKETS::count_children - * - * Find number of descendants of this outline. - */ -// TODO(rays) Merge with outline_complexity. -int32_t OL_BUCKETS::count_children( // recursive count - C_OUTLINE *outline, // parent outline - int32_t max_count // max output - ) { - bool parent_box; // could it be boxy - int16_t xmin, xmax; // coord limits - int16_t ymin, ymax; - int16_t xindex, yindex; // current bucket - C_OUTLINE *child; // current child - int32_t child_count; // no of children - int32_t grandchild_count; // no of grandchildren - int32_t parent_area; // potential box - float max_parent_area; // potential box - int32_t child_area; // current child - int32_t child_length; // current child - TBOX olbox; - C_OUTLINE_IT child_it; // search iterator - - olbox = outline->bounding_box(); - xmin =(olbox.left() - bl.x()) / BUCKETSIZE; - xmax =(olbox.right() - bl.x()) / BUCKETSIZE; - ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE; - ymax =(olbox.top() - bl.y()) / BUCKETSIZE; - child_count = 0; - grandchild_count = 0; - parent_area = 0; - max_parent_area = 0; - parent_box = true; - for (yindex = ymin; yindex <= ymax; yindex++) { - for (xindex = xmin; xindex <= xmax; xindex++) { - child_it.set_to_list(&buckets[yindex * bxdim + xindex]); - if (child_it.empty()) - continue; - for (child_it.mark_cycle_pt(); !child_it.cycled_list(); - child_it.forward()) { - child = child_it.data(); - if (child != outline && *child < *outline) { - child_count++; - if (child_count <= max_count) { - int max_grand =(max_count - child_count) / - edges_children_per_grandchild; - if (max_grand > 0) - grandchild_count += count_children(child, max_grand) * - edges_children_per_grandchild; - else - grandchild_count += count_children(child, 1); - } - if (child_count + grandchild_count > max_count) { - if (edges_debug) - tprintf("Discarding parent with child count=%d, gc=%d\n", - child_count,grandchild_count); - return child_count + grandchild_count; - } - if (parent_area == 0) { - parent_area = outline->outer_area(); - if (parent_area < 0) - parent_area = -parent_area; - max_parent_area = outline->bounding_box().area() * edges_boxarea; - if (parent_area < max_parent_area) - parent_box = false; - } - if (parent_box && - (!edges_children_fix || - child->bounding_box().height() > edges_min_nonhole)) { - child_area = child->outer_area(); - if (child_area < 0) - child_area = -child_area; - if (edges_children_fix) { - if (parent_area - child_area < max_parent_area) { - parent_box = false; - continue; - } - if (grandchild_count > 0) { - if (edges_debug) - tprintf("Discarding parent of area %d, child area=%d, max%g " - "with gc=%d\n", - parent_area, child_area, max_parent_area, - grandchild_count); - return max_count + 1; - } - child_length = child->pathlength(); - if (child_length * child_length > - child_area * edges_patharea_ratio) { - if (edges_debug) - tprintf("Discarding parent of area %d, child area=%d, max%g " - "with child length=%d\n", - parent_area, child_area, max_parent_area, - child_length); - return max_count + 1; - } - } - if (child_area < child->bounding_box().area() * edges_childarea) { - if (edges_debug) - tprintf("Discarding parent of area %d, child area=%d, max%g " - "with child rect=%d\n", - parent_area, child_area, max_parent_area, - child->bounding_box().area()); - return max_count + 1; - } - } - } - } - } - } - return child_count + grandchild_count; -} - - - - -/** - * @name OL_BUCKETS::extract_children - * - * Find number of descendants of this outline. - */ - -void OL_BUCKETS::extract_children( // recursive count - C_OUTLINE *outline, // parent outline - C_OUTLINE_IT *it // destination iterator - ) { - int16_t xmin, xmax; // coord limits - int16_t ymin, ymax; - int16_t xindex, yindex; // current bucket - TBOX olbox; - C_OUTLINE_IT child_it; // search iterator - - olbox = outline->bounding_box(); - xmin =(olbox.left() - bl.x()) / BUCKETSIZE; - xmax =(olbox.right() - bl.x()) / BUCKETSIZE; - ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE; - ymax =(olbox.top() - bl.y()) / BUCKETSIZE; - for (yindex = ymin; yindex <= ymax; yindex++) { - for (xindex = xmin; xindex <= xmax; xindex++) { - child_it.set_to_list(&buckets[yindex * bxdim + xindex]); - for (child_it.mark_cycle_pt(); !child_it.cycled_list(); - child_it.forward()) { - if (*child_it.data() < *outline) { - it->add_after_then_move(child_it.extract()); - } - } - } - } -} - - -/** - * @name extract_edges - * - * Run the edge detector over the block and return a list of blobs. - */ - -void extract_edges(Pix* pix, // thresholded image - BLOCK *block) { // block to scan - C_OUTLINE_LIST outlines; // outlines in block - C_OUTLINE_IT out_it = &outlines; - - block_edges(pix, &(block->pdblk), &out_it); - ICOORD bleft; // block box - ICOORD tright; - block->pdblk.bounding_box(bleft, tright); - // make blobs - outlines_to_blobs(block, bleft, tright, &outlines); -} - - -/** - * @name outlines_to_blobs - * - * Gather together outlines into blobs using the usual bucket sort. - */ - -void outlines_to_blobs( // find blobs - BLOCK *block, // block to scan - ICOORD bleft, - ICOORD tright, - C_OUTLINE_LIST *outlines) { - // make buckets - OL_BUCKETS buckets(bleft, tright); - - fill_buckets(outlines, &buckets); - empty_buckets(block, &buckets); -} - - -/** - * @name fill_buckets - * - * Run the edge detector over the block and return a list of blobs. - */ - -void fill_buckets( // find blobs - C_OUTLINE_LIST *outlines, // outlines in block - OL_BUCKETS *buckets // output buckets - ) { - TBOX ol_box; // outline box - C_OUTLINE_IT out_it = outlines; // iterator - C_OUTLINE_IT bucket_it; // iterator in bucket - C_OUTLINE *outline; // current outline - - for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { - outline = out_it.extract(); // take off list - // get box - ol_box = outline->bounding_box(); - bucket_it.set_to_list((*buckets) (ol_box.left(), ol_box.bottom())); - bucket_it.add_to_end(outline); - } -} - - -/** - * @name empty_buckets - * - * Run the edge detector over the block and return a list of blobs. - */ - -void empty_buckets( // find blobs - BLOCK *block, // block to scan - OL_BUCKETS *buckets // output buckets - ) { - bool good_blob; // healthy blob - C_OUTLINE_LIST outlines; // outlines in block - // iterator - C_OUTLINE_IT out_it = &outlines; - C_OUTLINE_IT bucket_it = buckets->start_scan(); - C_OUTLINE_IT parent_it; // parent outline - C_BLOB_IT good_blobs = block->blob_list(); - C_BLOB_IT junk_blobs = block->reject_blobs(); - - while (!bucket_it.empty()) { - out_it.set_to_list(&outlines); - do { - parent_it = bucket_it; // find outermost - do { - bucket_it.forward(); - } while (!bucket_it.at_first() && - !(*parent_it.data() < *bucket_it.data())); - } while (!bucket_it.at_first()); - - // move to new list - out_it.add_after_then_move(parent_it.extract()); - good_blob = capture_children(buckets, &junk_blobs, &out_it); - C_BLOB::ConstructBlobsFromOutlines(good_blob, &outlines, &good_blobs, - &junk_blobs); - - bucket_it.set_to_list(buckets->scan_next()); - } -} - - -/** - * @name capture_children - * - * Find all neighbouring outlines that are children of this outline - * and either move them to the output list or declare this outline - * illegal and return FALSE. - */ - -bool capture_children( // find children - OL_BUCKETS* buckets, // bucket sort clanss - C_BLOB_IT* reject_it, // dead grandchildren - C_OUTLINE_IT* blob_it // output outlines -) { - C_OUTLINE *outline; // master outline - int32_t child_count; // no of children - - outline = blob_it->data(); - if (edges_use_new_outline_complexity) - child_count = buckets->outline_complexity(outline, - edges_children_count_limit, - 0); - else - child_count = buckets->count_children(outline, - edges_children_count_limit); - if (child_count > edges_children_count_limit) - return false; - - if (child_count > 0) - buckets->extract_children(outline, blob_it); - return true; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/edgblob.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/edgblob.h deleted file mode 100644 index e59bea2f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/edgblob.h +++ /dev/null @@ -1,95 +0,0 @@ -/********************************************************************** - * File: edgblob.h (Formerly edgeloop.h) - * Description: Functions to clean up an outline before approximation. - * Author: Ray Smith - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef EDGBLOB_H -#define EDGBLOB_H - -#include "scrollview.h" -#include "params.h" -#include "ocrblock.h" -#include "coutln.h" -#include "crakedge.h" - -#include - -#define BUCKETSIZE 16 - -class OL_BUCKETS -{ - public: - OL_BUCKETS( //constructor - ICOORD bleft, //corners - ICOORD tright); - - ~OL_BUCKETS () = default; - - C_OUTLINE_LIST *operator () (//array access - int16_t x, //image coords - int16_t y); - //first non-empty bucket - C_OUTLINE_LIST *start_scan() { - for (index = 0; buckets[index].empty () && index < bxdim * bydim - 1; - index++); - return &buckets[index]; - } - //next non-empty bucket - C_OUTLINE_LIST *scan_next() { - for (; buckets[index].empty () && index < bxdim * bydim - 1; index++); - return &buckets[index]; - } - int32_t count_children( //recursive sum - C_OUTLINE *outline, //parent outline - int32_t max_count); // max output - int32_t outline_complexity( // new version of count_children - C_OUTLINE *outline, // parent outline - int32_t max_count, // max output - int16_t depth); // level of recursion - void extract_children( //single level get - C_OUTLINE *outline, //parent outline - C_OUTLINE_IT *it); //destination iterator - - private: - std::unique_ptr buckets; //array of buckets - int16_t bxdim; //size of array - int16_t bydim; - ICOORD bl; //corners - ICOORD tr; - int32_t index; //for extraction scan -}; - -void extract_edges(Pix* pix, // thresholded image - BLOCK* block); // block to scan -void outlines_to_blobs( //find blobs - BLOCK *block, //block to scan - ICOORD bleft, //block box //outlines in block - ICOORD tright, - C_OUTLINE_LIST *outlines); -void fill_buckets( //find blobs - C_OUTLINE_LIST *outlines, //outlines in block - OL_BUCKETS *buckets //output buckets - ); -void empty_buckets( //find blobs - BLOCK *block, //block to scan - OL_BUCKETS *buckets //output buckets - ); -bool capture_children( //find children - OL_BUCKETS* buckets, //bucket sort clanss - C_BLOB_IT* reject_it, //dead grandchildren - C_OUTLINE_IT* blob_it //output outlines -); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/edgloop.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/edgloop.cpp deleted file mode 100644 index 2c42e174..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/edgloop.cpp +++ /dev/null @@ -1,159 +0,0 @@ -/********************************************************************** - * File: edgloop.cpp (Formerly edgeloop.c) - * Description: Functions to clean up an outline before approximation. - * Author: Ray Smith - * Created: Tue Mar 26 16:56:25 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "scanedg.h" -#include "drawedg.h" -#include "edgloop.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#define MINEDGELENGTH 8 // min decent length - -/********************************************************************** - * complete_edge - * - * Complete the edge by cleaning it up. - **********************************************************************/ - -void complete_edge(CRACKEDGE *start, //start of loop - C_OUTLINE_IT* outline_it) { - ScrollView::Color colour; //colour to draw in - int16_t looplength; //steps in loop - ICOORD botleft; //bounding box - ICOORD topright; - C_OUTLINE *outline; //new outline - - //check length etc. - colour = check_path_legal (start); - - if (colour == ScrollView::RED || colour == ScrollView::BLUE) { - looplength = loop_bounding_box (start, botleft, topright); - outline = new C_OUTLINE (start, botleft, topright, looplength); - //add to list - outline_it->add_after_then_move (outline); - } -} - - -/********************************************************************** - * check_path_legal - * - * Check that the outline is legal for length and for chaincode sum. - * The return value is RED for a normal black-inside outline, - * BLUE for a white-inside outline, MAGENTA if it is too short, - * YELLOW if it is too long, and GREEN if it is illegal. - * These colours are used to draw the raw outline. - **********************************************************************/ - -ScrollView::Color check_path_legal( //certify outline - CRACKEDGE *start //start of loop - ) { - int lastchain; //last chain code - int chaindiff; //chain code diff - int32_t length; //length of loop - int32_t chainsum; //sum of chain diffs - CRACKEDGE *edgept; //current point - const ERRCODE ED_ILLEGAL_SUM = "Illegal sum of chain codes"; - - length = 0; - chainsum = 0; //sum of chain codes - edgept = start; - lastchain = edgept->prev->stepdir; //previous chain code - do { - length++; - if (edgept->stepdir != lastchain) { - //chain code difference - chaindiff = edgept->stepdir - lastchain; - if (chaindiff > 2) - chaindiff -= 4; - else if (chaindiff < -2) - chaindiff += 4; - chainsum += chaindiff; //sum differences - lastchain = edgept->stepdir; - } - edgept = edgept->next; - } - while (edgept != start && length < C_OUTLINE::kMaxOutlineLength); - - if ((chainsum != 4 && chainsum != -4) - || edgept != start || length < MINEDGELENGTH) { - if (edgept != start) { - return ScrollView::YELLOW; - } else if (length < MINEDGELENGTH) { - return ScrollView::MAGENTA; - } else { - ED_ILLEGAL_SUM.error ("check_path_legal", TESSLOG, "chainsum=%d", - chainsum); - return ScrollView::GREEN; - } - } - //colour on inside - return chainsum < 0 ? ScrollView::BLUE : ScrollView::RED; -} - -/********************************************************************** - * loop_bounding_box - * - * Find the bounding box of the edge loop. - **********************************************************************/ - -int16_t loop_bounding_box( //get bounding box - CRACKEDGE *&start, //edge loop - ICOORD &botleft, //bounding box - ICOORD &topright) { - int16_t length; //length of loop - int16_t leftmost; //on top row - CRACKEDGE *edgept; //current point - CRACKEDGE *realstart; //topleft start - - edgept = start; - realstart = start; - botleft = topright = ICOORD (edgept->pos.x (), edgept->pos.y ()); - leftmost = edgept->pos.x (); - length = 0; //coutn length - do { - edgept = edgept->next; - if (edgept->pos.x () < botleft.x ()) - //get bounding box - botleft.set_x (edgept->pos.x ()); - else if (edgept->pos.x () > topright.x ()) - topright.set_x (edgept->pos.x ()); - if (edgept->pos.y () < botleft.y ()) - //get bounding box - botleft.set_y (edgept->pos.y ()); - else if (edgept->pos.y () > topright.y ()) { - realstart = edgept; - leftmost = edgept->pos.x (); - topright.set_y (edgept->pos.y ()); - } - else if (edgept->pos.y () == topright.y () - && edgept->pos.x () < leftmost) { - //leftmost on line - leftmost = edgept->pos.x (); - realstart = edgept; - } - length++; //count elements - } - while (edgept != start); - start = realstart; //shift it to topleft - return length; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/edgloop.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/edgloop.h deleted file mode 100644 index c3686cbc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/edgloop.h +++ /dev/null @@ -1,49 +0,0 @@ -/********************************************************************** - * File: edgloop.h (Formerly edgeloop.h) - * Description: Functions to clean up an outline before approximation. - * Author: Ray Smith - * Created: Tue Mar 26 16:56:25 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef EDGLOOP_H -#define EDGLOOP_H - -#include "scrollview.h" -#include "params.h" -#include "pdblock.h" -#include "coutln.h" -#include "crakedge.h" - -#define BUCKETSIZE 16 - - -extern INT_VAR_H (edges_children_per_grandchild, 10, -"Importance ratio for chucking outlines"); -extern INT_VAR_H (edges_children_count_limit, 45, -"Max holes allowed in blob"); -extern double_VAR_H (edges_childarea, 0.5, -"Max area fraction of child outline"); -extern double_VAR_H (edges_boxarea, 0.8, -"Min area fraction of grandchild for box"); -void complete_edge(CRACKEDGE *start, //start of loop - C_OUTLINE_IT* outline_it); -ScrollView::Color check_path_legal( //certify outline - CRACKEDGE *start //start of loop - ); -int16_t loop_bounding_box( //get bounding box - CRACKEDGE *&start, //edge loop - ICOORD &botleft, //bounding box - ICOORD &topright); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/equationdetectbase.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/equationdetectbase.cpp deleted file mode 100644 index 1d40ed8e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/equationdetectbase.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: equationdetectbase.cpp -// Description: The base class equation detection class. -// Author: Zongyi (Joe) Liu (joeliu@google.com) -// Created: Fri Aug 31 11:13:01 PST 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "allheaders.h" -#include "blobbox.h" -#include "equationdetectbase.h" - -namespace tesseract { - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -EquationDetectBase::~EquationDetectBase() = default; - -void EquationDetectBase::RenderSpecialText(Pix* pix, - BLOBNBOX* blob) { - ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32 && blob != nullptr); - const TBOX& tbox = blob->bounding_box(); - int height = pixGetHeight(pix); - const int box_width = 5; - - // Coordinate translation: tesseract use left bottom as the original, while - // leptonica uses left top as the original. - Box *box = boxCreate(tbox.left(), height - tbox.top(), - tbox.width(), tbox.height()); - switch (blob->special_text_type()) { - case BSTT_MATH: // Red box. - pixRenderBoxArb(pix, box, box_width, 255, 0, 0); - break; - case BSTT_DIGIT: // cyan box. - pixRenderBoxArb(pix, box, box_width, 0, 255, 255); - break; - case BSTT_ITALIC: // Green box. - pixRenderBoxArb(pix, box, box_width, 0, 255, 0); - break; - case BSTT_UNCLEAR: // blue box. - pixRenderBoxArb(pix, box, box_width, 0, 255, 0); - break; - case BSTT_NONE: - default: - // yellow box. - pixRenderBoxArb(pix, box, box_width, 255, 255, 0); - break; - } - boxDestroy(&box); -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/equationdetectbase.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/equationdetectbase.h deleted file mode 100644 index d0e8c12e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/equationdetectbase.h +++ /dev/null @@ -1,59 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: equationdetectbase.h -// Description: The base class equation detection class. -// Author: Zongyi (Joe) Liu (joeliu@google.com) -// Created: Fri Aug 31 11:13:01 PST 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_ -#define TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_ - -class BLOBNBOX_LIST; -class TO_BLOCK; -struct Pix; - -namespace tesseract { - -class ColPartitionGrid; -class ColPartitionSet; - -class EquationDetectBase { - public: - EquationDetectBase() = default; - virtual ~EquationDetectBase(); - - // Iterate over the blobs inside to_block, and set the blobs that we want to - // process to BSTT_NONE. (By default, they should be BSTT_SKIP). The function - // returns 0 upon success. - virtual int LabelSpecialText(TO_BLOCK* to_block) = 0; - - // Interface to find possible equation partition grid from part_grid. This - // should be called after IdentifySpecialText function. - virtual int FindEquationParts(ColPartitionGrid* part_grid, - ColPartitionSet** best_columns) = 0; - - // Debug function: Render a bounding box on pix based on the value of its - // special_text_type, specifically: - // BSTT_MATH: red box - // BSTT_DIGIT: cyan box - // BSTT_ITALIC: green box - // BSTT_UNCLEAR: blue box - // All others: yellow box - static void RenderSpecialText(Pix* pix, BLOBNBOX* blob); -}; - -} // namespace tesseract - -#endif // TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/fpchop.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/fpchop.cpp deleted file mode 100644 index fe0d3c6d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/fpchop.cpp +++ /dev/null @@ -1,816 +0,0 @@ -/********************************************************************** - * File: fpchop.cpp (Formerly fp_chop.c) - * Description: Code to chop fixed pitch text into character cells. - * Author: Ray Smith - * Created: Thu Sep 16 11:14:15 BST 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "blobbox.h" -#include "statistc.h" -#include "drawtord.h" -#include "tovars.h" -#include "topitch.h" -#include "fpchop.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#define EXTERN - -EXTERN INT_VAR (textord_fp_chop_error, 2, -"Max allowed bending of chop cells"); -EXTERN double_VAR (textord_fp_chop_snap, 0.5, -"Max distance of chop pt from vertex"); - -ELISTIZE(C_OUTLINE_FRAG) -//#undef ASSERT_HOST -//#define ASSERT_HOST(x) if (!(x)) AfxMessageBox(#x); -/********************************************************************** - * fixed_pitch_words - * - * Make a ROW from a fixed pitch TO_ROW. - **********************************************************************/ -ROW *fixed_pitch_words( //find lines - TO_ROW *row, //row to do - FCOORD rotation //for drawing - ) { - bool bol; //start of line - uint8_t blanks; //in front of word - uint8_t new_blanks; //blanks in empty cell - int16_t chop_coord; //chop boundary - int16_t prev_chop_coord; //start of cell - int16_t rep_left; //left edge of rep word - ROW *real_row; //output row - C_OUTLINE_LIST left_coutlines; - C_OUTLINE_LIST right_coutlines; - C_BLOB_LIST cblobs; - C_BLOB_IT cblob_it = &cblobs; - WERD_LIST words; - WERD_IT word_it = &words; //new words - //repeated blobs - WERD_IT rep_it = &row->rep_words; - WERD *word; //new word - int32_t xstarts[2]; //row ends - int32_t prev_x; //end of prev blob - //iterator - BLOBNBOX_IT box_it = row->blob_list (); - //boundaries - ICOORDELT_IT cell_it = &row->char_cells; - -#ifndef GRAPHICS_DISABLED - if (textord_show_page_cuts && to_win != nullptr) { - plot_row_cells (to_win, ScrollView::RED, row, 0, &row->char_cells); - } -#endif - - prev_x = -INT16_MAX; - bol = true; - blanks = 0; - if (rep_it.empty ()) - rep_left = INT16_MAX; - else - rep_left = rep_it.data ()->bounding_box ().left (); - if (box_it.empty ()) - return nullptr; //empty row - xstarts[0] = box_it.data ()->bounding_box ().left (); - if (rep_left < xstarts[0]) { - xstarts[0] = rep_left; - } - if (cell_it.empty () || row->char_cells.singleton ()) { - tprintf ("Row without enough char cells!\n"); - tprintf ("Leftmost blob is at (%d,%d)\n", - box_it.data ()->bounding_box ().left (), - box_it.data ()->bounding_box ().bottom ()); - return nullptr; - } - ASSERT_HOST (!cell_it.empty () && !row->char_cells.singleton ()); - prev_chop_coord = cell_it.data ()->x (); - word = nullptr; - while (rep_left < cell_it.data ()->x ()) { - word = add_repeated_word (&rep_it, rep_left, prev_chop_coord, - blanks, row->fixed_pitch, &word_it); - } - cell_it.mark_cycle_pt (); - if (prev_chop_coord >= cell_it.data ()->x ()) - cell_it.forward (); - for (; !cell_it.cycled_list (); cell_it.forward ()) { - chop_coord = cell_it.data ()->x (); - while (!box_it.empty () - && box_it.data ()->bounding_box ().left () <= chop_coord) { - if (box_it.data ()->bounding_box ().right () > prev_x) - prev_x = box_it.data ()->bounding_box ().right (); - split_to_blob (box_it.extract (), chop_coord, - textord_fp_chop_error + 0.5f, - &left_coutlines, - &right_coutlines); - box_it.forward (); - while (!box_it.empty() && box_it.data()->cblob() == nullptr) { - delete box_it.extract(); - box_it.forward(); - } - } - if (!right_coutlines.empty() && left_coutlines.empty()) - split_to_blob (nullptr, chop_coord, - textord_fp_chop_error + 0.5f, - &left_coutlines, - &right_coutlines); - if (!left_coutlines.empty()) { - cblob_it.add_after_then_move(new C_BLOB(&left_coutlines)); - } else { - if (rep_left < chop_coord) { - if (rep_left > prev_chop_coord) - new_blanks = (uint8_t) floor ((rep_left - prev_chop_coord) - / row->fixed_pitch + 0.5); - else - new_blanks = 0; - } - else { - if (chop_coord > prev_chop_coord) - new_blanks = (uint8_t) floor ((chop_coord - prev_chop_coord) - / row->fixed_pitch + 0.5); - else - new_blanks = 0; - } - if (!cblob_it.empty()) { - if (blanks < 1 && word != nullptr && !word->flag (W_REP_CHAR)) - blanks = 1; - word = new WERD (&cblobs, blanks, nullptr); - cblob_it.set_to_list (&cblobs); - word->set_flag (W_DONT_CHOP, TRUE); - word_it.add_after_then_move (word); - if (bol) { - word->set_flag (W_BOL, TRUE); - bol = false; - } - blanks = new_blanks; - } - else - blanks += new_blanks; - while (rep_left < chop_coord) { - word = add_repeated_word (&rep_it, rep_left, prev_chop_coord, - blanks, row->fixed_pitch, &word_it); - } - } - if (prev_chop_coord < chop_coord) - prev_chop_coord = chop_coord; - } - if (!cblob_it.empty()) { - word = new WERD(&cblobs, blanks, nullptr); - word->set_flag (W_DONT_CHOP, TRUE); - word_it.add_after_then_move (word); - if (bol) - word->set_flag (W_BOL, TRUE); - } - ASSERT_HOST (word != nullptr); - while (!rep_it.empty ()) { - add_repeated_word (&rep_it, rep_left, prev_chop_coord, - blanks, row->fixed_pitch, &word_it); - } - //at end of line - word_it.data ()->set_flag (W_EOL, TRUE); - if (prev_chop_coord > prev_x) - prev_x = prev_chop_coord; - xstarts[1] = prev_x + 1; - real_row = new ROW (row, (int16_t) row->kern_size, (int16_t) row->space_size); - word_it.set_to_list (real_row->word_list ()); - //put words in row - word_it.add_list_after (&words); - real_row->recalc_bounding_box (); - return real_row; -} - - -/********************************************************************** - * add_repeated_word - * - * Add repeated word into the row at the given point. - **********************************************************************/ - -WERD *add_repeated_word( //move repeated word - WERD_IT *rep_it, //repeated words - int16_t &rep_left, //left edge of word - int16_t &prev_chop_coord, //previous word end - uint8_t &blanks, //no of blanks - float pitch, //char cell size - WERD_IT *word_it //list of words - ) { - WERD *word; //word to move - int16_t new_blanks; //extra blanks - - if (rep_left > prev_chop_coord) { - new_blanks = (uint8_t) floor ((rep_left - prev_chop_coord) / pitch + 0.5); - blanks += new_blanks; - } - word = rep_it->extract (); - prev_chop_coord = word->bounding_box ().right (); - word_it->add_after_then_move (word); - word->set_blanks (blanks); - rep_it->forward (); - if (rep_it->empty ()) - rep_left = INT16_MAX; - else - rep_left = rep_it->data ()->bounding_box ().left (); - blanks = 0; - return word; -} - - -/********************************************************************** - * split_to_blob - * - * Split a BLOBNBOX across a vertical chop line and put the pieces - * into a left outline list and a right outline list. - **********************************************************************/ - -void split_to_blob( //split the blob - BLOBNBOX *blob, //blob to split - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_LIST *left_coutlines, //for cblobs - C_OUTLINE_LIST *right_coutlines) { - C_BLOB *real_cblob; //cblob to chop - - if (blob != nullptr) { - real_cblob = blob->cblob(); - } else { - real_cblob = nullptr; - } - if (!right_coutlines->empty() || real_cblob != nullptr) - fixed_chop_cblob(real_cblob, - chop_coord, - pitch_error, - left_coutlines, - right_coutlines); - - delete blob; -} - -/********************************************************************** - * fixed_chop_cblob - * - * Chop the given cblob (if any) and the existing right outlines to - * produce a list of outlines left of the chop point and more to the right. - **********************************************************************/ - -void fixed_chop_cblob( //split the blob - C_BLOB *blob, //blob to split - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_LIST *left_outlines, //left half of chop - C_OUTLINE_LIST *right_outlines //right half of chop - ) { - C_OUTLINE *old_right; //already there - C_OUTLINE_LIST new_outlines; //new right ones - //output iterator - C_OUTLINE_IT left_it = left_outlines; - //in/out iterator - C_OUTLINE_IT right_it = right_outlines; - C_OUTLINE_IT new_it = &new_outlines; - C_OUTLINE_IT blob_it; //outlines in blob - - if (!right_it.empty ()) { - while (!right_it.empty ()) { - old_right = right_it.extract (); - right_it.forward (); - fixed_split_coutline(old_right, - chop_coord, - pitch_error, - &left_it, - &new_it); - } - right_it.add_list_before (&new_outlines); - } - if (blob != nullptr) { - blob_it.set_to_list (blob->out_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) - fixed_split_coutline (blob_it.extract (), chop_coord, pitch_error, - &left_it, &right_it); - delete blob; - } -} - - -/********************************************************************** - * fixed_split_outline - * - * Chop the given outline (if necessary) placing the fragments which - * fall either side of the chop line into the appropriate list. - **********************************************************************/ - -void fixed_split_coutline( //chop the outline - C_OUTLINE *srcline, //source outline - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_IT *left_it, //left half of chop - C_OUTLINE_IT *right_it //right half of chop - ) { - C_OUTLINE *child; //child outline - TBOX srcbox; //box of outline - C_OUTLINE_LIST left_ch; //left children - C_OUTLINE_LIST right_ch; //right children - C_OUTLINE_FRAG_LIST left_frags;//chopped fragments - C_OUTLINE_FRAG_LIST right_frags;; - C_OUTLINE_IT left_ch_it = &left_ch; - //for whole children - C_OUTLINE_IT right_ch_it = &right_ch; - //for holes - C_OUTLINE_IT child_it = srcline->child (); - - srcbox = srcline->bounding_box(); - if (srcbox.left() + srcbox.right() <= chop_coord * 2 - && srcbox.right() < chop_coord + pitch_error) { - // Whole outline is in the left side or not far over the chop_coord, - // so put the whole thing on the left. - left_it->add_after_then_move(srcline); - } else if (srcbox.left() + srcbox.right() > chop_coord * 2 - && srcbox.left () > chop_coord - pitch_error) { - // Whole outline is in the right side or not far over the chop_coord, - // so put the whole thing on the right. - right_it->add_before_stay_put(srcline); - } else { - // Needs real chopping. - if (fixed_chop_coutline(srcline, chop_coord, pitch_error, - &left_frags, &right_frags)) { - for (child_it.mark_cycle_pt(); !child_it.cycled_list(); - child_it.forward()) { - child = child_it.extract(); - srcbox = child->bounding_box(); - if (srcbox.right() < chop_coord) { - // Whole child is on the left. - left_ch_it.add_after_then_move(child); - } else if (srcbox.left() > chop_coord) { - // Whole child is on the right. - right_ch_it.add_after_then_move (child); - } else { - // No pitch_error is allowed when chopping children to prevent - // impossible outlines from being created. - if (fixed_chop_coutline(child, chop_coord, 0.0f, - &left_frags, &right_frags)) { - delete child; - } else { - if (srcbox.left() + srcbox.right() <= chop_coord * 2) - left_ch_it.add_after_then_move(child); - else - right_ch_it.add_after_then_move(child); - } - } - } - close_chopped_cfragments(&left_frags, &left_ch, pitch_error, left_it); - close_chopped_cfragments(&right_frags, &right_ch, pitch_error, right_it); - ASSERT_HOST(left_ch.empty() && right_ch.empty()); - // No children left. - delete srcline; // Smashed up. - } else { - // Chop failed. Just use middle coord. - if (srcbox.left() + srcbox.right() <= chop_coord * 2) - left_it->add_after_then_move(srcline); // Stick whole in left. - else - right_it->add_before_stay_put(srcline); - } - } -} - - -/********************************************************************** - * fixed_chop_coutline - * - * Chop the given coutline (if necessary) placing the fragments which - * fall either side of the chop line into the appropriate list. - * If the coutline lies too heavily to one side to chop, FALSE is returned. - **********************************************************************/ - -bool fixed_chop_coutline( //chop the outline - C_OUTLINE* srcline, //source outline - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_FRAG_LIST* left_frags, //left half of chop - C_OUTLINE_FRAG_LIST* right_frags //right half of chop -) { - bool first_frag; //fragment - int16_t left_edge; //of outline - int16_t startindex; //in first fragment - int32_t length; //of outline - int16_t stepindex; //into outline - int16_t head_index; //start of fragment - ICOORD head_pos; //start of fragment - int16_t tail_index; //end of fragment - ICOORD tail_pos; //end of fragment - ICOORD pos; //current point - int16_t first_index = 0; //first tail - ICOORD first_pos; //first tail - - length = srcline->pathlength (); - pos = srcline->start_pos (); - left_edge = pos.x (); - tail_index = 0; - tail_pos = pos; - for (stepindex = 0; stepindex < length; stepindex++) { - if (pos.x () < left_edge) { - left_edge = pos.x (); - tail_index = stepindex; - tail_pos = pos; - } - pos += srcline->step (stepindex); - } - if (left_edge >= chop_coord - pitch_error) - return false; //not worth it - - startindex = tail_index; - first_frag = true; - head_index = tail_index; - head_pos = tail_pos; - do { - do { - tail_pos += srcline->step (tail_index); - tail_index++; - if (tail_index == length) - tail_index = 0; - } - while (tail_pos.x () != chop_coord && tail_index != startindex); - if (tail_index == startindex) { - if (first_frag) - return false; //doesn't cross line - else - break; - } - ASSERT_HOST (head_index != tail_index); - if (!first_frag) { - save_chop_cfragment(head_index, - head_pos, - tail_index, - tail_pos, - srcline, - left_frags); - } - else { - first_index = tail_index; - first_pos = tail_pos; - first_frag = false; - } - while (srcline->step (tail_index).x () == 0) { - tail_pos += srcline->step (tail_index); - tail_index++; - if (tail_index == length) - tail_index = 0; - } - head_index = tail_index; - head_pos = tail_pos; - while (srcline->step (tail_index).x () > 0) { - do { - tail_pos += srcline->step (tail_index); - tail_index++; - if (tail_index == length) - tail_index = 0; - } - while (tail_pos.x () != chop_coord); - ASSERT_HOST (head_index != tail_index); - save_chop_cfragment(head_index, - head_pos, - tail_index, - tail_pos, - srcline, - right_frags); - while (srcline->step (tail_index).x () == 0) { - tail_pos += srcline->step (tail_index); - tail_index++; - if (tail_index == length) - tail_index = 0; - } - head_index = tail_index; - head_pos = tail_pos; - } - } - while (tail_index != startindex); - save_chop_cfragment(head_index, - head_pos, - first_index, - first_pos, - srcline, - left_frags); - return true; //did some chopping -} - -/********************************************************************** - * save_chop_cfragment - * - * Store the given fragment in the given fragment list. - **********************************************************************/ - -void save_chop_cfragment( //chop the outline - int16_t head_index, //head of fragment - ICOORD head_pos, //head of fragment - int16_t tail_index, //tail of fragment - ICOORD tail_pos, //tail of fragment - C_OUTLINE *srcline, //source of edgesteps - C_OUTLINE_FRAG_LIST *frags //fragment list - ) { - int16_t jump; //gap across end - int16_t stepcount; //total steps - C_OUTLINE_FRAG *head; //head of fragment - C_OUTLINE_FRAG *tail; //tail of fragment - int16_t tail_y; //ycoord of tail - - ASSERT_HOST (tail_pos.x () == head_pos.x ()); - ASSERT_HOST (tail_index != head_index); - stepcount = tail_index - head_index; - if (stepcount < 0) - stepcount += srcline->pathlength (); - jump = tail_pos.y () - head_pos.y (); - if (jump < 0) - jump = -jump; - if (jump == stepcount) - return; //its a nop - tail_y = tail_pos.y (); - head = new C_OUTLINE_FRAG (head_pos, tail_pos, srcline, - head_index, tail_index); - tail = new C_OUTLINE_FRAG (head, tail_y); - head->other_end = tail; - add_frag_to_list(head, frags); - add_frag_to_list(tail, frags); -} - - -/********************************************************************** - * C_OUTLINE_FRAG::C_OUTLINE_FRAG - * - * Constructors for C_OUTLINE_FRAG. - **********************************************************************/ - -C_OUTLINE_FRAG::C_OUTLINE_FRAG( //record fragment - ICOORD start_pt, //start coord - ICOORD end_pt, //end coord - C_OUTLINE *outline, //source of steps - int16_t start_index, - int16_t end_index) { - start = start_pt; - end = end_pt; - ycoord = start_pt.y (); - stepcount = end_index - start_index; - if (stepcount < 0) - stepcount += outline->pathlength (); - ASSERT_HOST (stepcount > 0); - steps = new DIR128[stepcount]; - if (end_index > start_index) { - for (int i = start_index; i < end_index; ++i) - steps[i - start_index] = outline->step_dir(i); - } - else { - int len = outline->pathlength(); - int i = start_index; - for (; i < len; ++i) - steps[i - start_index] = outline->step_dir(i); - if (end_index > 0) - for (; i < end_index + len; ++i) - steps[i - start_index] = outline->step_dir(i - len); - } - other_end = nullptr; - delete close(); -} - - -C_OUTLINE_FRAG::C_OUTLINE_FRAG( //record fragment - C_OUTLINE_FRAG *head, //other end - int16_t tail_y) { - ycoord = tail_y; - other_end = head; - start = head->start; - end = head->end; - steps = nullptr; - stepcount = 0; -} - - -/********************************************************************** - * add_frag_to_list - * - * Insert the fragment in the list at the appropriate place to keep - * them in ascending ycoord order. - **********************************************************************/ - -void add_frag_to_list( //ordered add - C_OUTLINE_FRAG *frag, //fragment to add - C_OUTLINE_FRAG_LIST *frags //fragment list - ) { - //output list - C_OUTLINE_FRAG_IT frag_it = frags; - - if (!frags->empty ()) { - for (frag_it.mark_cycle_pt (); !frag_it.cycled_list (); - frag_it.forward ()) { - if (frag_it.data ()->ycoord > frag->ycoord - || (frag_it.data ()->ycoord == frag->ycoord - && frag->other_end->ycoord < frag->ycoord)) { - frag_it.add_before_then_move (frag); - return; - } - } - } - frag_it.add_to_end (frag); -} - - -/********************************************************************** - * close_chopped_cfragments - * - * Clear the given list of fragments joining them up into outlines. - * Each outline made soaks up any of the child outlines which it encloses. - **********************************************************************/ - -void close_chopped_cfragments( //chop the outline - C_OUTLINE_FRAG_LIST *frags, //list to clear - C_OUTLINE_LIST *children, //potential children - float pitch_error, //allowed shrinkage - C_OUTLINE_IT *dest_it //output list - ) { - //iterator - C_OUTLINE_FRAG_IT frag_it = frags; - C_OUTLINE_FRAG *bottom_frag; //bottom of cut - C_OUTLINE_FRAG *top_frag; //top of cut - C_OUTLINE *outline; //new outline - C_OUTLINE *child; //current child - C_OUTLINE_IT child_it = children; - C_OUTLINE_IT olchild_it; //children of outline - - while (!frag_it.empty()) { - frag_it.move_to_first(); - // get bottom one - bottom_frag = frag_it.extract(); - frag_it.forward(); - top_frag = frag_it.data(); // look at next - if ((bottom_frag->steps == nullptr && top_frag->steps == nullptr) - || (bottom_frag->steps != nullptr && top_frag->steps != nullptr)) { - if (frag_it.data_relative(1)->ycoord == top_frag->ycoord) - frag_it.forward(); - } - top_frag = frag_it.extract(); - if (top_frag->other_end != bottom_frag) { - outline = join_chopped_fragments(bottom_frag, top_frag); - ASSERT_HOST(outline == nullptr); - } else { - outline = join_chopped_fragments(bottom_frag, top_frag); - if (outline != nullptr) { - olchild_it.set_to_list(outline->child()); - for (child_it.mark_cycle_pt(); !child_it.cycled_list(); - child_it.forward()) { - child = child_it.data(); - if (*child < *outline) - olchild_it.add_to_end(child_it.extract()); - } - if (outline->bounding_box().width() > pitch_error) - dest_it->add_after_then_move(outline); - else - delete outline; // Make it disappear. - } - } - } - while (!child_it.empty ()) { - dest_it->add_after_then_move (child_it.extract ()); - child_it.forward (); - } -} - - -/********************************************************************** - * join_chopped_fragments - * - * Join the two lists of POLYPTs such that neither OUTLINE_FRAG - * operand keeps responsibility for the fragment. - **********************************************************************/ - -C_OUTLINE *join_chopped_fragments( //join pieces - C_OUTLINE_FRAG *bottom, //bottom of cut - C_OUTLINE_FRAG *top //top of cut - ) { - C_OUTLINE *outline; //closed loop - - if (bottom->other_end == top) { - if (bottom->steps == nullptr) - outline = top->close (); //turn to outline - else - outline = bottom->close (); - delete top; - delete bottom; - return outline; - } - if (bottom->steps == nullptr) { - ASSERT_HOST (top->steps != nullptr); - join_segments (bottom->other_end, top); - } - else { - ASSERT_HOST (top->steps == nullptr); - join_segments (top->other_end, bottom); - } - top->other_end->other_end = bottom->other_end; - bottom->other_end->other_end = top->other_end; - delete bottom; - delete top; - return nullptr; -} - -/********************************************************************** - * join_segments - * - * Join the two edgestep fragments such that the second comes after - * the first and the gap between them is closed. - **********************************************************************/ - -void join_segments( //join pieces - C_OUTLINE_FRAG *bottom, //bottom of cut - C_OUTLINE_FRAG *top //top of cut - ) { - DIR128 *steps; //new steps - int32_t stepcount; //no of steps - int16_t fake_count; //fake steps - DIR128 fake_step; //step entry - - ASSERT_HOST (bottom->end.x () == top->start.x ()); - fake_count = top->start.y () - bottom->end.y (); - if (fake_count < 0) { - fake_count = -fake_count; - fake_step = 32; - } - else - fake_step = 96; - - stepcount = bottom->stepcount + fake_count + top->stepcount; - steps = new DIR128[stepcount]; - memmove (steps, bottom->steps, bottom->stepcount); - memset (steps + bottom->stepcount, fake_step.get_dir(), fake_count); - memmove (steps + bottom->stepcount + fake_count, top->steps, - top->stepcount); - delete [] bottom->steps; - bottom->steps = steps; - bottom->stepcount = stepcount; - bottom->end = top->end; - bottom->other_end->end = top->end; -} - - -/********************************************************************** - * C_OUTLINE_FRAG::close - * - * Join the ends of this fragment and turn it into an outline. - **********************************************************************/ - -C_OUTLINE *C_OUTLINE_FRAG::close() { //join pieces - DIR128 *new_steps; //new steps - int32_t new_stepcount; //no of steps - int16_t fake_count; //fake steps - DIR128 fake_step; //step entry - - ASSERT_HOST (start.x () == end.x ()); - fake_count = start.y () - end.y (); - if (fake_count < 0) { - fake_count = -fake_count; - fake_step = 32; - } - else - fake_step = 96; - - new_stepcount = stepcount + fake_count; - if (new_stepcount > C_OUTLINE::kMaxOutlineLength) - return nullptr; // Can't join them - new_steps = new DIR128[new_stepcount]; - memmove(new_steps, steps, stepcount); - memset (new_steps + stepcount, fake_step.get_dir(), fake_count); - C_OUTLINE* result = new C_OUTLINE (start, new_steps, new_stepcount); - delete [] new_steps; - return result; -} - - -/********************************************************************** - * C_OUTLINE_FRAG::operator= - * - * Copy this fragment. - **********************************************************************/ - - //join pieces -C_OUTLINE_FRAG & C_OUTLINE_FRAG::operator= ( -const C_OUTLINE_FRAG & src //fragment to copy -) { - delete [] steps; - - stepcount = src.stepcount; - steps = new DIR128[stepcount]; - memmove (steps, src.steps, stepcount); - start = src.start; - end = src.end; - ycoord = src.ycoord; - return *this; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/fpchop.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/fpchop.h deleted file mode 100644 index 4cda239d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/fpchop.h +++ /dev/null @@ -1,132 +0,0 @@ -/********************************************************************** - * File: fpchop.h (Formerly fp_chop.h) - * Description: Code to chop fixed pitch text into character cells. - * Author: Ray Smith - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef FPCHOP_H -#define FPCHOP_H - -#include "params.h" -#include "blobbox.h" - -class C_OUTLINE_FRAG:public ELIST_LINK -{ - public: - C_OUTLINE_FRAG() { //empty constructor - steps = nullptr; - stepcount = 0; - } - ~C_OUTLINE_FRAG () { - delete [] steps; - } - //start coord - C_OUTLINE_FRAG(ICOORD start_pt, - ICOORD end_pt, //end coord - C_OUTLINE *outline, //source of steps - int16_t start_index, - int16_t end_index); - //other end - C_OUTLINE_FRAG(C_OUTLINE_FRAG *head, int16_t tail_y); - C_OUTLINE *close(); //copy to outline - C_OUTLINE_FRAG & operator= ( //assign - const C_OUTLINE_FRAG & src); - - ICOORD start; //start coord - ICOORD end; //end coord - DIR128 *steps; //step array - int32_t stepcount; //no of steps - C_OUTLINE_FRAG *other_end; //head if a tail - int16_t ycoord; //coord of cut pt - - private: - // Copy constructor (currently unused, therefore private). - C_OUTLINE_FRAG(const C_OUTLINE_FRAG& other); -}; - -ELISTIZEH(C_OUTLINE_FRAG) - -extern -INT_VAR_H (textord_fp_chop_error, 2, -"Max allowed bending of chop cells"); -extern -double_VAR_H (textord_fp_chop_snap, 0.5, -"Max distance of chop pt from vertex"); -ROW *fixed_pitch_words( //find lines - TO_ROW *row, //row to do - FCOORD rotation //for drawing - ); -WERD *add_repeated_word( //move repeated word - WERD_IT *rep_it, //repeated words - int16_t &rep_left, //left edge of word - int16_t &prev_chop_coord, //previous word end - uint8_t &blanks, //no of blanks - float pitch, //char cell size - WERD_IT *word_it //list of words - ); -void split_to_blob( //split the blob - BLOBNBOX *blob, //blob to split - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_LIST *left_coutlines, //for cblobs - C_OUTLINE_LIST *right_coutlines); -void fixed_chop_cblob( //split the blob - C_BLOB *blob, //blob to split - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_LIST *left_outlines, //left half of chop - C_OUTLINE_LIST *right_outlines //right half of chop - ); -void fixed_split_coutline( //chop the outline - C_OUTLINE *srcline, //source outline - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_IT *left_it, //left half of chop - C_OUTLINE_IT *right_it //right half of chop - ); -bool fixed_chop_coutline( //chop the outline - C_OUTLINE* srcline, //source outline - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_FRAG_LIST* left_frags, //left half of chop - C_OUTLINE_FRAG_LIST* right_frags //right half of chop -); -void save_chop_cfragment( //chop the outline - int16_t head_index, //head of fragment - ICOORD head_pos, //head of fragment - int16_t tail_index, //tail of fragment - ICOORD tail_pos, //tail of fragment - C_OUTLINE *srcline, //source of edgesteps - C_OUTLINE_FRAG_LIST *frags //fragment list - ); -void add_frag_to_list( //ordered add - C_OUTLINE_FRAG *frag, //fragment to add - C_OUTLINE_FRAG_LIST *frags //fragment list - ); -void close_chopped_cfragments( //chop the outline - C_OUTLINE_FRAG_LIST *frags, //list to clear - C_OUTLINE_LIST *children, //potential children - float pitch_error, //allowed shrinkage - C_OUTLINE_IT *dest_it //output list - ); -C_OUTLINE *join_chopped_fragments( //join pieces - C_OUTLINE_FRAG *bottom, //bottom of cut - C_OUTLINE_FRAG *top //top of cut - ); -void join_segments( //join pieces - C_OUTLINE_FRAG *bottom, //bottom of cut - C_OUTLINE_FRAG *top //top of cut - ); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/gap_map.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/gap_map.cpp deleted file mode 100644 index 2a7e7e64..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/gap_map.cpp +++ /dev/null @@ -1,184 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "statistc.h" -#include "gap_map.h" - -BOOL_VAR(gapmap_debug, FALSE, "Say which blocks have tables"); -BOOL_VAR(gapmap_use_ends, FALSE, "Use large space at start and end of rows"); -BOOL_VAR(gapmap_no_isolated_quanta, FALSE, -"Ensure gaps not less than 2quanta wide"); -double_VAR(gapmap_big_gaps, 1.75, "xht multiplier"); - -/************************************************************************* - * A block gap map is a quantised histogram of whitespace regions in the - * block. It is a vertical projection of wide gaps WITHIN lines - * - * The map is held as an array of counts of rows which have a wide gap - * covering that region of the row. Each bucket in the map represents a width - * of about half an xheight - (The median of the xhts in the rows is used.) - * - * The block is considered RECTANGULAR - delimited by the left and right - * extremes of the rows in the block. However, ONLY wide gaps WITHIN a row are - * counted. - * - *************************************************************************/ - -GAPMAP::GAPMAP( //Constructor - TO_BLOCK *block //block - ) { - TO_ROW *row; //current row - BLOBNBOX_IT blob_it; //iterator - TBOX blob_box; - TBOX prev_blob_box; - int16_t gap_width; - int16_t start_of_row; - int16_t end_of_row; - STATS xht_stats (0, 128); - int16_t min_quantum; - int16_t max_quantum; - int16_t i; - - /* - Find left and right extremes and bucket size - */ - map = nullptr; - min_left = INT16_MAX; - max_right = -INT16_MAX; - total_rows = 0; - any_tabs = false; - - // row iterator - TO_ROW_IT row_it(block->get_rows()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (!row->blob_list ()->empty ()) { - total_rows++; - xht_stats.add ((int16_t) floor (row->xheight + 0.5), 1); - blob_it.set_to_list (row->blob_list ()); - start_of_row = blob_it.data ()->bounding_box ().left (); - end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); - if (min_left > start_of_row) - min_left = start_of_row; - if (max_right < end_of_row) - max_right = end_of_row; - } - } - if ((total_rows < 3) || (min_left >= max_right)) { - bucket_size = 0; - map_max = 0; - total_rows = 0; - min_left = max_right = 0; - return; - } - bucket_size = (int16_t) floor (xht_stats.median () + 0.5) / 2; - map_max = (max_right - min_left) / bucket_size; - map = new int16_t[map_max + 1]; - for (i = 0; i <= map_max; i++) - map[i] = 0; - - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (!row->blob_list ()->empty ()) { - blob_it.set_to_list (row->blob_list ()); - blob_it.mark_cycle_pt (); - blob_box = box_next (&blob_it); - prev_blob_box = blob_box; - if (gapmap_use_ends) { - /* Leading space */ - gap_width = blob_box.left () - min_left; - if ((gap_width > gapmap_big_gaps * row->xheight) - && gap_width > 2) { - max_quantum = (blob_box.left () - min_left) / bucket_size; - if (max_quantum > map_max) max_quantum = map_max; - for (i = 0; i <= max_quantum; i++) - map[i]++; - } - } - while (!blob_it.cycled_list ()) { - blob_box = box_next (&blob_it); - gap_width = blob_box.left () - prev_blob_box.right (); - if ((gap_width > gapmap_big_gaps * row->xheight) - && gap_width > 2) { - min_quantum = - (prev_blob_box.right () - min_left) / bucket_size; - max_quantum = (blob_box.left () - min_left) / bucket_size; - if (max_quantum > map_max) max_quantum = map_max; - for (i = min_quantum; i <= max_quantum; i++) - map[i]++; - } - prev_blob_box = blob_box; - } - if (gapmap_use_ends) { - /* Trailing space */ - gap_width = max_right - prev_blob_box.right (); - if ((gap_width > gapmap_big_gaps * row->xheight) - && gap_width > 2) { - min_quantum = - (prev_blob_box.right () - min_left) / bucket_size; - if (min_quantum < 0) min_quantum = 0; - for (i = min_quantum; i <= map_max; i++) - map[i]++; - } - } - } - } - for (i = 0; i <= map_max; i++) { - if (map[i] > total_rows / 2) { - if (gapmap_no_isolated_quanta && - (((i == 0) && - (map[i + 1] <= total_rows / 2)) || - ((i == map_max) && - (map[i - 1] <= total_rows / 2)) || - ((i > 0) && - (i < map_max) && - (map[i - 1] <= total_rows / 2) && - (map[i + 1] <= total_rows / 2)))) { - map[i] = 0; //prevent isolated quantum - } - else - any_tabs = true; - } - } - if (gapmap_debug && any_tabs) - tprintf ("Table found\n"); -} - - -/************************************************************************* - * GAPMAP::table_gap() - * Is there a bucket in the specified range where more than half the rows in the - * block have a wide gap? - *************************************************************************/ - -bool GAPMAP::table_gap( //Is gap a table? - int16_t left, //From here - int16_t right //To here -) { - int16_t min_quantum; - int16_t max_quantum; - int16_t i; - bool tab_found = false; - - if (!any_tabs) - return false; - - min_quantum = (left - min_left) / bucket_size; - max_quantum = (right - min_left) / bucket_size; - // Clip to the bounds of the array. In some circumstances (big blob followed - // by small blob) max_quantum can exceed the map_max bounds, but we clip - // here instead, as it provides better long-term safety. - if (min_quantum < 0) min_quantum = 0; - if (max_quantum > map_max) max_quantum = map_max; - for (i = min_quantum; (!tab_found && (i <= max_quantum)); i++) - if (map[i] > total_rows / 2) - tab_found = true; - return tab_found; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/gap_map.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/gap_map.h deleted file mode 100644 index cc94690d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/gap_map.h +++ /dev/null @@ -1,49 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef GAP_MAP_H -#define GAP_MAP_H - -#include "blobbox.h" - -class GAPMAP -{ - public: - GAPMAP( //constructor - TO_BLOCK *block); - - ~GAPMAP () { //destructor - delete[] map; - } - - bool table_gap( //Is gap a table? - int16_t left, //From here - int16_t right); //To here - - private: - int16_t total_rows; //in block - int16_t min_left; //Left extreme - int16_t max_right; //Right extreme - int16_t bucket_size; // half an x ht - int16_t *map; //empty counts - int16_t map_max; //map[0..max_map] defined - bool any_tabs; -}; - -/*-----------------------------*/ - -extern BOOL_VAR_H (gapmap_debug, FALSE, "Say which blocks have tables"); -extern BOOL_VAR_H (gapmap_use_ends, FALSE, -"Use large space at start and end of rows"); -extern BOOL_VAR_H (gapmap_no_isolated_quanta, FALSE, -"Ensure gaps not less than 2quanta wide"); -extern double_VAR_H (gapmap_big_gaps, 1.75, "xht multiplier"); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/imagefind.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/imagefind.cpp deleted file mode 100644 index 6c8d9e6d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/imagefind.cpp +++ /dev/null @@ -1,1365 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: imagefind.cpp -// Description: Function to find image and drawing regions in an image -// and create a corresponding list of empty blobs. -// Author: Ray Smith -// Created: Thu Mar 20 09:49:01 PDT 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "imagefind.h" -#include "colpartitiongrid.h" -#include "linlsq.h" -#include "statistc.h" -#include "params.h" - -#include "allheaders.h" - -#include - -INT_VAR(textord_tabfind_show_images, false, "Show image blobs"); - -namespace tesseract { - -// Fraction of width or height of on pixels that can be discarded from a -// roughly rectangular image. -const double kMinRectangularFraction = 0.125; -// Fraction of width or height to consider image completely used. -const double kMaxRectangularFraction = 0.75; -// Fraction of width or height to allow transition from kMinRectangularFraction -// to kMaxRectangularFraction, equivalent to a dy/dx skew. -const double kMaxRectangularGradient = 0.1; // About 6 degrees. -// Minimum image size to be worth looking for images on. -const int kMinImageFindSize = 100; -// Scale factor for the rms color fit error. -const double kRMSFitScaling = 8.0; -// Min color difference to call it two colors. -const int kMinColorDifference = 16; -// Pixel padding for noise blobs and partitions when rendering on the image -// mask to encourage them to join together. Make it too big and images -// will fatten out too much and have to be clipped to text. -const int kNoisePadding = 4; - -// Finds image regions within the BINARY source pix (page image) and returns -// the image regions as a mask image. -// The returned pix may be nullptr, meaning no images found. -// If not nullptr, it must be PixDestroyed by the caller. -// If textord_tabfind_show_images, debug images are appended to pixa_debug. -Pix* ImageFind::FindImages(Pix* pix, DebugPixa* pixa_debug) { - // Not worth looking at small images. - if (pixGetWidth(pix) < kMinImageFindSize || - pixGetHeight(pix) < kMinImageFindSize) - return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); - - // Reduce by factor 2. - Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0); - if (textord_tabfind_show_images && pixa_debug != nullptr) - pixa_debug->AddPix(pixr, "CascadeReduced"); - - // Get the halftone mask directly from Leptonica. - // - // Leptonica will print an error message and return nullptr if we call - // pixGenHalftoneMask(pixr, nullptr, ...) with too small image, so we - // want to bypass that. - if (pixGetWidth(pixr) < kMinImageFindSize || - pixGetHeight(pixr) < kMinImageFindSize) { - pixDestroy(&pixr); - return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); - } - // Get the halftone mask. - l_int32 ht_found = 0; - Pixa* pixadb = (textord_tabfind_show_images && pixa_debug != nullptr) - ? pixaCreate(0) - : nullptr; - Pix* pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb); - if (pixadb) { - Pix* pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2); - if (textord_tabfind_show_images && pixa_debug != nullptr) - pixa_debug->AddPix(pixdb, "HalftoneMask"); - pixDestroy(&pixdb); - pixaDestroy(&pixadb); - } - pixDestroy(&pixr); - if (!ht_found && pixht2 != nullptr) - pixDestroy(&pixht2); - if (pixht2 == nullptr) - return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); - - // Expand back up again. - Pix *pixht = pixExpandReplicate(pixht2, 2); - if (textord_tabfind_show_images && pixa_debug != nullptr) - pixa_debug->AddPix(pixht, "HalftoneReplicated"); - pixDestroy(&pixht2); - - // Fill to capture pixels near the mask edges that were missed - Pix *pixt = pixSeedfillBinary(nullptr, pixht, pix, 8); - pixOr(pixht, pixht, pixt); - pixDestroy(&pixt); - - // Eliminate lines and bars that may be joined to images. - Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3); - pixDilateBrick(pixfinemask, pixfinemask, 5, 5); - if (textord_tabfind_show_images && pixa_debug != nullptr) - pixa_debug->AddPix(pixfinemask, "FineMask"); - Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1); - Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0); - pixDestroy(&pixreduced); - pixDilateBrick(pixreduced2, pixreduced2, 5, 5); - Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8); - pixDestroy(&pixreduced2); - if (textord_tabfind_show_images && pixa_debug != nullptr) - pixa_debug->AddPix(pixcoarsemask, "CoarseMask"); - // Combine the coarse and fine image masks. - pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask); - pixDestroy(&pixfinemask); - // Dilate a bit to make sure we get everything. - pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3); - Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16); - pixDestroy(&pixcoarsemask); - if (textord_tabfind_show_images && pixa_debug != nullptr) - pixa_debug->AddPix(pixmask, "MaskDilated"); - // And the image mask with the line and bar remover. - pixAnd(pixht, pixht, pixmask); - pixDestroy(&pixmask); - if (textord_tabfind_show_images && pixa_debug != nullptr) - pixa_debug->AddPix(pixht, "FinalMask"); - // Make the result image the same size as the input. - Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); - pixOr(result, result, pixht); - pixDestroy(&pixht); - return result; -} - -// Generates a Boxa, Pixa pair from the input binary (image mask) pix, -// analgous to pixConnComp, except that connected components which are nearly -// rectangular are replaced with solid rectangles. -// The returned boxa, pixa may be nullptr, meaning no images found. -// If not nullptr, they must be destroyed by the caller. -// Resolution of pix should match the source image (Tesseract::pix_binary_) -// so the output coordinate systems match. -void ImageFind::ConnCompAndRectangularize(Pix* pix, DebugPixa* pixa_debug, - Boxa** boxa, Pixa** pixa) { - *boxa = nullptr; - *pixa = nullptr; - - if (textord_tabfind_show_images && pixa_debug != nullptr) - pixa_debug->AddPix(pix, "Conncompimage"); - // Find the individual image regions in the mask image. - *boxa = pixConnComp(pix, pixa, 8); - // Rectangularize the individual images. If a sharp edge in vertical and/or - // horizontal occupancy can be found, it indicates a probably rectangular - // image with unwanted bits merged on, so clip to the approximate rectangle. - int npixes = 0; - if (*boxa != nullptr && *pixa != nullptr) npixes = pixaGetCount(*pixa); - for (int i = 0; i < npixes; ++i) { - int x_start, x_end, y_start, y_end; - Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE); - if (textord_tabfind_show_images && pixa_debug != nullptr) - pixa_debug->AddPix(img_pix, "A component"); - if (pixNearlyRectangular(img_pix, kMinRectangularFraction, - kMaxRectangularFraction, - kMaxRectangularGradient, - &x_start, &y_start, &x_end, &y_end)) { - Pix* simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1); - pixSetAll(simple_pix); - pixDestroy(&img_pix); - // pixaReplacePix takes ownership of the simple_pix. - pixaReplacePix(*pixa, i, simple_pix, nullptr); - img_pix = pixaGetPix(*pixa, i, L_CLONE); - // Fix the box to match the new pix. - l_int32 x, y, width, height; - boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height); - Box* simple_box = boxCreate(x + x_start, y + y_start, - x_end - x_start, y_end - y_start); - boxaReplaceBox(*boxa, i, simple_box); - } - pixDestroy(&img_pix); - } -} - -// Scans horizontally on x=[x_start,x_end), starting with y=*y_start, -// stepping y+=y_step, until y=y_end. *ystart is input/output. -// If the number of black pixels in a row, pix_count fits this pattern: -// 0 or more rows with pix_count < min_count then -// <= mid_width rows with min_count <= pix_count <= max_count then -// a row with pix_count > max_count then -// true is returned, and *y_start = the first y with pix_count >= min_count. -static bool HScanForEdge(uint32_t* data, int wpl, int x_start, int x_end, - int min_count, int mid_width, int max_count, - int y_end, int y_step, int* y_start) { - int mid_rows = 0; - for (int y = *y_start; y != y_end; y += y_step) { - // Need pixCountPixelsInRow(pix, y, &pix_count, nullptr) to count in a subset. - int pix_count = 0; - uint32_t* line = data + wpl * y; - for (int x = x_start; x < x_end; ++x) { - if (GET_DATA_BIT(line, x)) - ++pix_count; - } - if (mid_rows == 0 && pix_count < min_count) - continue; // In the min phase. - if (mid_rows == 0) - *y_start = y; // Save the y_start where we came out of the min phase. - if (pix_count > max_count) - return true; // Found the pattern. - ++mid_rows; - if (mid_rows > mid_width) - break; // Middle too big. - } - return false; // Never found max_count. -} - -// Scans vertically on y=[y_start,y_end), starting with x=*x_start, -// stepping x+=x_step, until x=x_end. *x_start is input/output. -// If the number of black pixels in a column, pix_count fits this pattern: -// 0 or more cols with pix_count < min_count then -// <= mid_width cols with min_count <= pix_count <= max_count then -// a column with pix_count > max_count then -// true is returned, and *x_start = the first x with pix_count >= min_count. -static bool VScanForEdge(uint32_t* data, int wpl, int y_start, int y_end, - int min_count, int mid_width, int max_count, - int x_end, int x_step, int* x_start) { - int mid_cols = 0; - for (int x = *x_start; x != x_end; x += x_step) { - int pix_count = 0; - uint32_t* line = data + y_start * wpl; - for (int y = y_start; y < y_end; ++y, line += wpl) { - if (GET_DATA_BIT(line, x)) - ++pix_count; - } - if (mid_cols == 0 && pix_count < min_count) - continue; // In the min phase. - if (mid_cols == 0) - *x_start = x; // Save the place where we came out of the min phase. - if (pix_count > max_count) - return true; // found the pattern. - ++mid_cols; - if (mid_cols > mid_width) - break; // Middle too big. - } - return false; // Never found max_count. -} - -// Returns true if there is a rectangle in the source pix, such that all -// pixel rows and column slices outside of it have less than -// min_fraction of the pixels black, and within max_skew_gradient fraction -// of the pixels on the inside, there are at least max_fraction of the -// pixels black. In other words, the inside of the rectangle looks roughly -// rectangular, and the outside of it looks like extra bits. -// On return, the rectangle is defined by x_start, y_start, x_end and y_end. -// Note: the algorithm is iterative, allowing it to slice off pixels from -// one edge, allowing it to then slice off more pixels from another edge. -bool ImageFind::pixNearlyRectangular(Pix* pix, - double min_fraction, double max_fraction, - double max_skew_gradient, - int* x_start, int* y_start, - int* x_end, int* y_end) { - ASSERT_HOST(pix != nullptr); - *x_start = 0; - *x_end = pixGetWidth(pix); - *y_start = 0; - *y_end = pixGetHeight(pix); - - uint32_t* data = pixGetData(pix); - int wpl = pixGetWpl(pix); - bool any_cut = false; - bool left_done = false; - bool right_done = false; - bool top_done = false; - bool bottom_done = false; - do { - any_cut = false; - // Find the top/bottom edges. - int width = *x_end - *x_start; - int min_count = static_cast(width * min_fraction); - int max_count = static_cast(width * max_fraction); - int edge_width = static_cast(width * max_skew_gradient); - if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width, - max_count, *y_end, 1, y_start) && !top_done) { - top_done = true; - any_cut = true; - } - --(*y_end); - if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width, - max_count, *y_start, -1, y_end) && !bottom_done) { - bottom_done = true; - any_cut = true; - } - ++(*y_end); - - // Find the left/right edges. - int height = *y_end - *y_start; - min_count = static_cast(height * min_fraction); - max_count = static_cast(height * max_fraction); - edge_width = static_cast(height * max_skew_gradient); - if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width, - max_count, *x_end, 1, x_start) && !left_done) { - left_done = true; - any_cut = true; - } - --(*x_end); - if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width, - max_count, *x_start, -1, x_end) && !right_done) { - right_done = true; - any_cut = true; - } - ++(*x_end); - } while (any_cut); - - // All edges must satisfy the condition of sharp gradient in pixel density - // in order for the full rectangle to be present. - return left_done && right_done && top_done && bottom_done; -} - -// Given an input pix, and a bounding rectangle, the sides of the rectangle -// are shrunk inwards until they bound any black pixels found within the -// original rectangle. Returns false if the rectangle contains no black -// pixels at all. -bool ImageFind::BoundsWithinRect(Pix* pix, int* x_start, int* y_start, - int* x_end, int* y_end) { - Box* input_box = boxCreate(*x_start, *y_start, *x_end - *x_start, - *y_end - *y_start); - Box* output_box = nullptr; - pixClipBoxToForeground(pix, input_box, nullptr, &output_box); - bool result = output_box != nullptr; - if (result) { - l_int32 x, y, width, height; - boxGetGeometry(output_box, &x, &y, &width, &height); - *x_start = x; - *y_start = y; - *x_end = x + width; - *y_end = y + height; - boxDestroy(&output_box); - } - boxDestroy(&input_box); - return result; -} - -// Given a point in 3-D (RGB) space, returns the squared Euclidean distance -// of the point from the given line, defined by a pair of points in the 3-D -// (RGB) space, line1 and line2. -double ImageFind::ColorDistanceFromLine(const uint8_t* line1, - const uint8_t* line2, - const uint8_t* point) { - int line_vector[kRGBRMSColors]; - int point_vector[kRGBRMSColors]; - for (int i = 0; i < kRGBRMSColors; ++i) { - line_vector[i] = static_cast(line2[i]) - static_cast(line1[i]); - point_vector[i] = static_cast(point[i]) - static_cast(line1[i]); - } - line_vector[L_ALPHA_CHANNEL] = 0; - // Now the cross product in 3d. - int cross[kRGBRMSColors]; - cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE] - - line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN]; - cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED] - - line_vector[COLOR_RED] * point_vector[COLOR_BLUE]; - cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN] - - line_vector[COLOR_GREEN] * point_vector[COLOR_RED]; - cross[L_ALPHA_CHANNEL] = 0; - // Now the sums of the squares. - double cross_sq = 0.0; - double line_sq = 0.0; - for (int j = 0; j < kRGBRMSColors; ++j) { - cross_sq += static_cast(cross[j]) * cross[j]; - line_sq += static_cast(line_vector[j]) * line_vector[j]; - } - if (line_sq == 0.0) { - return 0.0; - } - return cross_sq / line_sq; // This is the squared distance. -} - - -// Returns the leptonica combined code for the given RGB triplet. -uint32_t ImageFind::ComposeRGB(uint32_t r, uint32_t g, uint32_t b) { - l_uint32 result; - composeRGBPixel(r, g, b, &result); - return result; -} - -// Returns the input value clipped to a uint8_t. -uint8_t ImageFind::ClipToByte(double pixel) { - if (pixel < 0.0) - return 0; - else if (pixel >= 255.0) - return 255; - return static_cast(pixel); -} - -// Computes the light and dark extremes of color in the given rectangle of -// the given pix, which is factor smaller than the coordinate system in rect. -// The light and dark points are taken to be the upper and lower 8th-ile of -// the most deviant of R, G and B. The value of the other 2 channels are -// computed by linear fit against the most deviant. -// The colors of the two points are returned in color1 and color2, with the -// alpha channel set to a scaled mean rms of the fits. -// If color_map1 is not null then it and color_map2 get rect pasted in them -// with the two calculated colors, and rms map gets a pasted rect of the rms. -// color_map1, color_map2 and rms_map are assumed to be the same scale as pix. -void ImageFind::ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor, - Pix* color_map1, Pix* color_map2, - Pix* rms_map, - uint8_t* color1, uint8_t* color2) { - ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32); - // Pad the rectangle outwards by 2 (scaled) pixels if possible to get more - // background. - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - int left_pad = std::max(rect.left() - 2 * factor, 0) / factor; - int top_pad = (rect.top() + 2 * factor + (factor - 1)) / factor; - top_pad = std::min(height, top_pad); - int right_pad = (rect.right() + 2 * factor + (factor - 1)) / factor; - right_pad = std::min(width, right_pad); - int bottom_pad = std::max(rect.bottom() - 2 * factor, 0) / factor; - int width_pad = right_pad - left_pad; - int height_pad = top_pad - bottom_pad; - if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4) - return; - // Now crop the pix to the rectangle. - Box* scaled_box = boxCreate(left_pad, height - top_pad, - width_pad, height_pad); - Pix* scaled = pixClipRectangle(pix, scaled_box, nullptr); - - // Compute stats over the whole image. - STATS red_stats(0, 256); - STATS green_stats(0, 256); - STATS blue_stats(0, 256); - uint32_t* data = pixGetData(scaled); - ASSERT_HOST(pixGetWpl(scaled) == width_pad); - for (int y = 0; y < height_pad; ++y) { - for (int x = 0; x < width_pad; ++x, ++data) { - int r = GET_DATA_BYTE(data, COLOR_RED); - int g = GET_DATA_BYTE(data, COLOR_GREEN); - int b = GET_DATA_BYTE(data, COLOR_BLUE); - red_stats.add(r, 1); - green_stats.add(g, 1); - blue_stats.add(b, 1); - } - } - // Find the RGB component with the greatest 8th-ile-range. - // 8th-iles are used instead of quartiles to get closer to the true - // foreground color, which is going to be faint at best because of the - // pre-scaling of the input image. - int best_l8 = static_cast(red_stats.ile(0.125f)); - int best_u8 = static_cast(ceil(red_stats.ile(0.875f))); - int best_i8r = best_u8 - best_l8; - int x_color = COLOR_RED; - int y1_color = COLOR_GREEN; - int y2_color = COLOR_BLUE; - int l8 = static_cast(green_stats.ile(0.125f)); - int u8 = static_cast(ceil(green_stats.ile(0.875f))); - if (u8 - l8 > best_i8r) { - best_i8r = u8 - l8; - best_l8 = l8; - best_u8 = u8; - x_color = COLOR_GREEN; - y1_color = COLOR_RED; - } - l8 = static_cast(blue_stats.ile(0.125f)); - u8 = static_cast(ceil(blue_stats.ile(0.875f))); - if (u8 - l8 > best_i8r) { - best_i8r = u8 - l8; - best_l8 = l8; - best_u8 = u8; - x_color = COLOR_BLUE; - y1_color = COLOR_GREEN; - y2_color = COLOR_RED; - } - if (best_i8r >= kMinColorDifference) { - LLSQ line1; - LLSQ line2; - uint32_t* data = pixGetData(scaled); - for (int im_y = 0; im_y < height_pad; ++im_y) { - for (int im_x = 0; im_x < width_pad; ++im_x, ++data) { - int x = GET_DATA_BYTE(data, x_color); - int y1 = GET_DATA_BYTE(data, y1_color); - int y2 = GET_DATA_BYTE(data, y2_color); - line1.add(x, y1); - line2.add(x, y2); - } - } - double m1 = line1.m(); - double c1 = line1.c(m1); - double m2 = line2.m(); - double c2 = line2.c(m2); - double rms = line1.rms(m1, c1) + line2.rms(m2, c2); - rms *= kRMSFitScaling; - // Save the results. - color1[x_color] = ClipToByte(best_l8); - color1[y1_color] = ClipToByte(m1 * best_l8 + c1 + 0.5); - color1[y2_color] = ClipToByte(m2 * best_l8 + c2 + 0.5); - color1[L_ALPHA_CHANNEL] = ClipToByte(rms); - color2[x_color] = ClipToByte(best_u8); - color2[y1_color] = ClipToByte(m1 * best_u8 + c1 + 0.5); - color2[y2_color] = ClipToByte(m2 * best_u8 + c2 + 0.5); - color2[L_ALPHA_CHANNEL] = ClipToByte(rms); - } else { - // There is only one color. - color1[COLOR_RED] = ClipToByte(red_stats.median()); - color1[COLOR_GREEN] = ClipToByte(green_stats.median()); - color1[COLOR_BLUE] = ClipToByte(blue_stats.median()); - color1[L_ALPHA_CHANNEL] = 0; - memcpy(color2, color1, 4); - } - if (color_map1 != nullptr) { - pixSetInRectArbitrary(color_map1, scaled_box, - ComposeRGB(color1[COLOR_RED], - color1[COLOR_GREEN], - color1[COLOR_BLUE])); - pixSetInRectArbitrary(color_map2, scaled_box, - ComposeRGB(color2[COLOR_RED], - color2[COLOR_GREEN], - color2[COLOR_BLUE])); - pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]); - } - pixDestroy(&scaled); - boxDestroy(&scaled_box); -} - -// ================ CUTTING POLYGONAL IMAGES FROM A RECTANGLE ================ -// The following functions are responsible for cutting a polygonal image from -// a rectangle: CountPixelsInRotatedBox, AttemptToShrinkBox, CutChunkFromParts -// with DivideImageIntoParts as the master. -// Problem statement: -// We start with a single connected component from the image mask: we get -// a Pix of the component, and its location on the page (im_box). -// The objective of cutting a polygonal image from its rectangle is to avoid -// interfering text, but not text that completely overlaps the image. -// ------------------------------ ------------------------------ -// | Single input partition | | 1 Cut up output partitions | -// | | ------------------------------ -// Av|oid | Avoid | | -// | | |________________________| -// Int|erfering | Interfering | | -// | | _____|__________________| -// T|ext | Text | | -// | Text-on-image | | Text-on-image | -// ------------------------------ -------------------------- -// DivideImageIntoParts does this by building a ColPartition_LIST (not in the -// grid) with each ColPartition representing one of the rectangles needed, -// starting with a single rectangle for the whole image component, and cutting -// bits out of it with CutChunkFromParts as needed to avoid text. The output -// ColPartitions are supposed to be ordered from top to bottom. - -// The problem is complicated by the fact that we have rotated the coordinate -// system to make text lines horizontal, so if we need to look at the component -// image, we have to rotate the coordinates. Throughout the functions in this -// section im_box is the rectangle representing the image component in the -// rotated page coordinates (where we are building our output ColPartitions), -// rotation is the rotation that we used to get there, and rerotation is the -// rotation required to get back to original page image coordinates. -// To get to coordinates in the component image, pix, we rotate the im_box, -// the point we want to locate, and subtract the rotated point from the top-left -// of the rotated im_box. -// im_box is therefore essential to calculating coordinates within the pix. - -// Returns true if there are no black pixels in between the boxes. -// The im_box must represent the bounding box of the pix in tesseract -// coordinates, which may be negative, due to rotations to make the textlines -// horizontal. The boxes are rotated by rotation, which should undo such -// rotations, before mapping them onto the pix. -bool ImageFind::BlankImageInBetween(const TBOX& box1, const TBOX& box2, - const TBOX& im_box, const FCOORD& rotation, - Pix* pix) { - TBOX search_box(box1); - search_box += box2; - if (box1.x_gap(box2) >= box1.y_gap(box2)) { - if (box1.x_gap(box2) <= 0) - return true; - search_box.set_left(std::min(box1.right(), box2.right())); - search_box.set_right(std::max(box1.left(), box2.left())); - } else { - if (box1.y_gap(box2) <= 0) - return true; - search_box.set_top(std::max(box1.bottom(), box2.bottom())); - search_box.set_bottom(std::min(box1.top(), box2.top())); - } - return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0; -} - -// Returns the number of pixels in box in the pix. -// rotation, pix and im_box are defined in the large comment above. -int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX& im_box, - const FCOORD& rotation, Pix* pix) { - // Intersect it with the image box. - box &= im_box; // This is in-place box intersection. - if (box.null_box()) - return 0; - box.rotate(rotation); - TBOX rotated_im_box(im_box); - rotated_im_box.rotate(rotation); - Pix* rect_pix = pixCreate(box.width(), box.height(), 1); - pixRasterop(rect_pix, 0, 0, box.width(), box.height(), - PIX_SRC, pix, box.left() - rotated_im_box.left(), - rotated_im_box.top() - box.top()); - l_int32 result; - pixCountPixels(rect_pix, &result, nullptr); - pixDestroy(&rect_pix); - return result; -} - -// The box given by slice contains some black pixels, but not necessarily -// over the whole box. Shrink the x bounds of slice, but not the y bounds -// until there is at least one black pixel in the outermost columns. -// rotation, rerotation, pix and im_box are defined in the large comment above. -static void AttemptToShrinkBox(const FCOORD& rotation, const FCOORD& rerotation, - const TBOX& im_box, Pix* pix, TBOX* slice) { - TBOX rotated_box(*slice); - rotated_box.rotate(rerotation); - TBOX rotated_im_box(im_box); - rotated_im_box.rotate(rerotation); - int left = rotated_box.left() - rotated_im_box.left(); - int right = rotated_box.right() - rotated_im_box.left(); - int top = rotated_im_box.top() - rotated_box.top(); - int bottom = rotated_im_box.top() - rotated_box.bottom(); - ImageFind::BoundsWithinRect(pix, &left, &top, &right, &bottom); - top = rotated_im_box.top() - top; - bottom = rotated_im_box.top() - bottom; - left += rotated_im_box.left(); - right += rotated_im_box.left(); - rotated_box.set_to_given_coords(left, bottom, right, top); - rotated_box.rotate(rotation); - slice->set_left(rotated_box.left()); - slice->set_right(rotated_box.right()); -} - -// The meat of cutting a polygonal image around text. -// This function covers the general case of cutting a box out of a box -// as shown: -// Input Output -// ------------------------------ ------------------------------ -// | Single input partition | | 1 Cut up output partitions | -// | | ------------------------------ -// | ---------- | --------- ---------- -// | | box | | | 2 | box | 3 | -// | | | | | | is cut | | -// | ---------- | --------- out ---------- -// | | ------------------------------ -// | | | 4 | -// ------------------------------ ------------------------------ -// In the context that this function is used, at most 3 of the above output -// boxes will be created, as the overlapping box is never contained by the -// input. -// The above cutting operation is executed for each element of part_list that -// is overlapped by the input box. Each modified ColPartition is replaced -// in place in the list by the output of the cutting operation in the order -// shown above, so iff no holes are ever created, the output will be in -// top-to-bottom order, but in extreme cases, hole creation is possible. -// In such cases, the output order may cause strange block polygons. -// rotation, rerotation, pix and im_box are defined in the large comment above. -static void CutChunkFromParts(const TBOX& box, const TBOX& im_box, - const FCOORD& rotation, const FCOORD& rerotation, - Pix* pix, ColPartition_LIST* part_list) { - ASSERT_HOST(!part_list->empty()); - ColPartition_IT part_it(part_list); - do { - ColPartition* part = part_it.data(); - TBOX part_box = part->bounding_box(); - if (part_box.overlap(box)) { - // This part must be cut and replaced with the remains. There are - // up to 4 pieces to be made. Start with the first one and use - // add_before_stay_put. For each piece if it has no black pixels - // left, just don't make the box. - // Above box. - if (box.top() < part_box.top()) { - TBOX slice(part_box); - slice.set_bottom(box.top()); - if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, - pix) > 0) { - AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); - part_it.add_before_stay_put( - ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, - BTFT_NONTEXT)); - } - } - // Left of box. - if (box.left() > part_box.left()) { - TBOX slice(part_box); - slice.set_right(box.left()); - if (box.top() < part_box.top()) - slice.set_top(box.top()); - if (box.bottom() > part_box.bottom()) - slice.set_bottom(box.bottom()); - if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, - pix) > 0) { - AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); - part_it.add_before_stay_put( - ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, - BTFT_NONTEXT)); - } - } - // Right of box. - if (box.right() < part_box.right()) { - TBOX slice(part_box); - slice.set_left(box.right()); - if (box.top() < part_box.top()) - slice.set_top(box.top()); - if (box.bottom() > part_box.bottom()) - slice.set_bottom(box.bottom()); - if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, - pix) > 0) { - AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); - part_it.add_before_stay_put( - ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, - BTFT_NONTEXT)); - } - } - // Below box. - if (box.bottom() > part_box.bottom()) { - TBOX slice(part_box); - slice.set_top(box.bottom()); - if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, - pix) > 0) { - AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); - part_it.add_before_stay_put( - ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, - BTFT_NONTEXT)); - } - } - part->DeleteBoxes(); - delete part_it.extract(); - } - part_it.forward(); - } while (!part_it.at_first()); -} - -// Starts with the bounding box of the image component and cuts it up -// so that it doesn't intersect text where possible. -// Strong fully contained horizontal text is marked as text on image, -// and does not cause a division of the image. -// For more detail see the large comment above on cutting polygonal images -// from a rectangle. -// rotation, rerotation, pix and im_box are defined in the large comment above. -static void DivideImageIntoParts(const TBOX& im_box, const FCOORD& rotation, - const FCOORD& rerotation, Pix* pix, - ColPartitionGridSearch* rectsearch, - ColPartition_LIST* part_list) { - // Add the full im_box partition to the list to begin with. - ColPartition* pix_part = ColPartition::FakePartition(im_box, PT_UNKNOWN, - BRT_RECTIMAGE, - BTFT_NONTEXT); - ColPartition_IT part_it(part_list); - part_it.add_after_then_move(pix_part); - - rectsearch->StartRectSearch(im_box); - ColPartition* part; - while ((part = rectsearch->NextRectSearch()) != nullptr) { - TBOX part_box = part->bounding_box(); - if (part_box.contains(im_box) && part->flow() >= BTFT_CHAIN) { - // This image is completely covered by an existing text partition. - for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { - ColPartition* pix_part = part_it.extract(); - pix_part->DeleteBoxes(); - delete pix_part; - } - } else if (part->flow() == BTFT_STRONG_CHAIN) { - // Text intersects the box. - TBOX overlap_box = part_box.intersection(im_box); - // Intersect it with the image box. - int black_area = ImageFind::CountPixelsInRotatedBox(overlap_box, im_box, - rerotation, pix); - if (black_area * 2 < part_box.area() || !im_box.contains(part_box)) { - // Eat a piece out of the image. - // Pad it so that pieces eaten out look decent. - int padding = part->blob_type() == BRT_VERT_TEXT - ? part_box.width() : part_box.height(); - part_box.set_top(part_box.top() + padding / 2); - part_box.set_bottom(part_box.bottom() - padding / 2); - CutChunkFromParts(part_box, im_box, rotation, rerotation, - pix, part_list); - } else { - // Strong overlap with the black area, so call it text on image. - part->set_flow(BTFT_TEXT_ON_IMAGE); - } - } - if (part_list->empty()) { - break; - } - } -} - -// Search for the rightmost text that overlaps vertically and is to the left -// of the given box, but within the given left limit. -static int ExpandImageLeft(const TBOX& box, int left_limit, - ColPartitionGrid* part_grid) { - ColPartitionGridSearch search(part_grid); - ColPartition* part; - // Search right to left for any text that overlaps. - search.StartSideSearch(box.left(), box.bottom(), box.top()); - while ((part = search.NextSideSearch(true)) != nullptr) { - if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { - const TBOX& part_box(part->bounding_box()); - if (part_box.y_gap(box) < 0) { - if (part_box.right() > left_limit && part_box.right() < box.left()) - left_limit = part_box.right(); - break; - } - } - } - if (part != nullptr) { - // Search for the nearest text up to the one we already found. - TBOX search_box(left_limit, box.bottom(), box.left(), box.top()); - search.StartRectSearch(search_box); - while ((part = search.NextRectSearch()) != nullptr) { - if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { - const TBOX& part_box(part->bounding_box()); - if (part_box.y_gap(box) < 0) { - if (part_box.right() > left_limit && part_box.right() < box.left()) { - left_limit = part_box.right(); - } - } - } - } - } - return left_limit; -} - -// Search for the leftmost text that overlaps vertically and is to the right -// of the given box, but within the given right limit. -static int ExpandImageRight(const TBOX& box, int right_limit, - ColPartitionGrid* part_grid) { - ColPartitionGridSearch search(part_grid); - ColPartition* part; - // Search left to right for any text that overlaps. - search.StartSideSearch(box.right(), box.bottom(), box.top()); - while ((part = search.NextSideSearch(false)) != nullptr) { - if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { - const TBOX& part_box(part->bounding_box()); - if (part_box.y_gap(box) < 0) { - if (part_box.left() < right_limit && part_box.left() > box.right()) - right_limit = part_box.left(); - break; - } - } - } - if (part != nullptr) { - // Search for the nearest text up to the one we already found. - TBOX search_box(box.left(), box.bottom(), right_limit, box.top()); - search.StartRectSearch(search_box); - while ((part = search.NextRectSearch()) != nullptr) { - if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { - const TBOX& part_box(part->bounding_box()); - if (part_box.y_gap(box) < 0) { - if (part_box.left() < right_limit && part_box.left() > box.right()) - right_limit = part_box.left(); - } - } - } - } - return right_limit; -} - -// Search for the topmost text that overlaps horizontally and is below -// the given box, but within the given bottom limit. -static int ExpandImageBottom(const TBOX& box, int bottom_limit, - ColPartitionGrid* part_grid) { - ColPartitionGridSearch search(part_grid); - ColPartition* part; - // Search right to left for any text that overlaps. - search.StartVerticalSearch(box.left(), box.right(), box.bottom()); - while ((part = search.NextVerticalSearch(true)) != nullptr) { - if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { - const TBOX& part_box(part->bounding_box()); - if (part_box.x_gap(box) < 0) { - if (part_box.top() > bottom_limit && part_box.top() < box.bottom()) - bottom_limit = part_box.top(); - break; - } - } - } - if (part != nullptr) { - // Search for the nearest text up to the one we already found. - TBOX search_box(box.left(), bottom_limit, box.right(), box.bottom()); - search.StartRectSearch(search_box); - while ((part = search.NextRectSearch()) != nullptr) { - if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { - const TBOX& part_box(part->bounding_box()); - if (part_box.x_gap(box) < 0) { - if (part_box.top() > bottom_limit && part_box.top() < box.bottom()) - bottom_limit = part_box.top(); - } - } - } - } - return bottom_limit; -} - -// Search for the bottommost text that overlaps horizontally and is above -// the given box, but within the given top limit. -static int ExpandImageTop(const TBOX& box, int top_limit, - ColPartitionGrid* part_grid) { - ColPartitionGridSearch search(part_grid); - ColPartition* part; - // Search right to left for any text that overlaps. - search.StartVerticalSearch(box.left(), box.right(), box.top()); - while ((part = search.NextVerticalSearch(false)) != nullptr) { - if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { - const TBOX& part_box(part->bounding_box()); - if (part_box.x_gap(box) < 0) { - if (part_box.bottom() < top_limit && part_box.bottom() > box.top()) - top_limit = part_box.bottom(); - break; - } - } - } - if (part != nullptr) { - // Search for the nearest text up to the one we already found. - TBOX search_box(box.left(), box.top(), box.right(), top_limit); - search.StartRectSearch(search_box); - while ((part = search.NextRectSearch()) != nullptr) { - if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { - const TBOX& part_box(part->bounding_box()); - if (part_box.x_gap(box) < 0) { - if (part_box.bottom() < top_limit && part_box.bottom() > box.top()) - top_limit = part_box.bottom(); - } - } - } - } - return top_limit; -} - -// Expands the image box in the given direction until it hits text, -// limiting the expansion to the given limit box, returning the result -// in the expanded box, and -// returning the increase in area resulting from the expansion. -static int ExpandImageDir(BlobNeighbourDir dir, const TBOX& im_box, - const TBOX& limit_box, - ColPartitionGrid* part_grid, TBOX* expanded_box) { - *expanded_box = im_box; - switch (dir) { - case BND_LEFT: - expanded_box->set_left(ExpandImageLeft(im_box, limit_box.left(), - part_grid)); - break; - case BND_RIGHT: - expanded_box->set_right(ExpandImageRight(im_box, limit_box.right(), - part_grid)); - break; - case BND_ABOVE: - expanded_box->set_top(ExpandImageTop(im_box, limit_box.top(), part_grid)); - break; - case BND_BELOW: - expanded_box->set_bottom(ExpandImageBottom(im_box, limit_box.bottom(), - part_grid)); - break; - default: - return 0; - } - return expanded_box->area() - im_box.area(); -} - -// Expands the image partition into any non-text until it touches text. -// The expansion proceeds in the order of increasing increase in area -// as a heuristic to find the best rectangle by expanding in the most -// constrained direction first. -static void MaximalImageBoundingBox(ColPartitionGrid* part_grid, TBOX* im_box) { - bool dunnit[BND_COUNT]; - memset(dunnit, 0, sizeof(dunnit)); - TBOX limit_box(part_grid->bleft().x(), part_grid->bleft().y(), - part_grid->tright().x(), part_grid->tright().y()); - TBOX text_box(*im_box); - for (int iteration = 0; iteration < BND_COUNT; ++iteration) { - // Find the direction with least area increase. - int best_delta = -1; - BlobNeighbourDir best_dir = BND_LEFT; - TBOX expanded_boxes[BND_COUNT]; - for (int dir = 0; dir < BND_COUNT; ++dir) { - BlobNeighbourDir bnd = static_cast(dir); - if (!dunnit[bnd]) { - TBOX expanded_box; - int area_delta = ExpandImageDir(bnd, text_box, limit_box, part_grid, - &expanded_boxes[bnd]); - if (best_delta < 0 || area_delta < best_delta) { - best_delta = area_delta; - best_dir = bnd; - } - } - } - // Run the best and remember the direction. - dunnit[best_dir] = true; - text_box = expanded_boxes[best_dir]; - } - *im_box = text_box; -} - -// Helper deletes the given partition but first marks up all the blobs as -// noise, so they get deleted later, and disowns them. -// If the initial type of the partition is image, then it actually deletes -// the blobs, as the partition owns them in that case. -static void DeletePartition(ColPartition* part) { - BlobRegionType type = part->blob_type(); - if (type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) { - // The partition owns the boxes of these types, so just delete them. - part->DeleteBoxes(); // From a previous iteration. - } else { - // Once marked, the blobs will be swept up by TidyBlobs. - part->set_flow(BTFT_NONTEXT); - part->set_blob_type(BRT_NOISE); - part->SetBlobTypes(); - part->DisownBoxes(); // Created before FindImagePartitions. - } - delete part; -} - -// The meat of joining fragmented images and consuming ColPartitions of -// uncertain type. -// *part_ptr is an input/output BRT_RECTIMAGE ColPartition that is to be -// expanded to consume overlapping and nearby ColPartitions of uncertain type -// and other BRT_RECTIMAGE partitions, but NOT to be expanded beyond -// max_image_box. *part_ptr is NOT in the part_grid. -// rectsearch is already constructed on the part_grid, and is used for -// searching for overlapping and nearby ColPartitions. -// ExpandImageIntoParts is called iteratively until it returns false. Each -// time it absorbs the nearest non-contained candidate, and everything that -// is fully contained within part_ptr's bounding box. -// TODO(rays) what if it just eats everything inside max_image_box in one go? -static bool ExpandImageIntoParts(const TBOX& max_image_box, - ColPartitionGridSearch* rectsearch, - ColPartitionGrid* part_grid, - ColPartition** part_ptr) { - ColPartition* image_part = *part_ptr; - TBOX im_part_box = image_part->bounding_box(); - if (textord_tabfind_show_images > 1) { - tprintf("Searching for merge with image part:"); - im_part_box.print(); - tprintf("Text box="); - max_image_box.print(); - } - rectsearch->StartRectSearch(max_image_box); - ColPartition* part; - ColPartition* best_part = nullptr; - int best_dist = 0; - while ((part = rectsearch->NextRectSearch()) != nullptr) { - if (textord_tabfind_show_images > 1) { - tprintf("Considering merge with part:"); - part->Print(); - if (im_part_box.contains(part->bounding_box())) - tprintf("Fully contained\n"); - else if (!max_image_box.contains(part->bounding_box())) - tprintf("Not within text box\n"); - else if (part->flow() == BTFT_STRONG_CHAIN) - tprintf("Too strong text\n"); - else - tprintf("Real candidate\n"); - } - if (part->flow() == BTFT_STRONG_CHAIN || - part->flow() == BTFT_TEXT_ON_IMAGE || - part->blob_type() == BRT_POLYIMAGE) - continue; - TBOX box = part->bounding_box(); - if (max_image_box.contains(box) && part->blob_type() != BRT_NOISE) { - if (im_part_box.contains(box)) { - // Eat it completely. - rectsearch->RemoveBBox(); - DeletePartition(part); - continue; - } - int x_dist = std::max(0, box.x_gap(im_part_box)); - int y_dist = std::max(0, box.y_gap(im_part_box)); - int dist = x_dist * x_dist + y_dist * y_dist; - if (dist > box.area() || dist > im_part_box.area()) - continue; // Not close enough. - if (best_part == nullptr || dist < best_dist) { - // We keep the nearest qualifier, which is not necessarily the nearest. - best_part = part; - best_dist = dist; - } - } - } - if (best_part != nullptr) { - // It needs expanding. We can do it without touching text. - TBOX box = best_part->bounding_box(); - if (textord_tabfind_show_images > 1) { - tprintf("Merging image part:"); - im_part_box.print(); - tprintf("with part:"); - box.print(); - } - im_part_box += box; - *part_ptr = ColPartition::FakePartition(im_part_box, PT_UNKNOWN, - BRT_RECTIMAGE, - BTFT_NONTEXT); - DeletePartition(image_part); - part_grid->RemoveBBox(best_part); - DeletePartition(best_part); - rectsearch->RepositionIterator(); - return true; - } - return false; -} - -// Helper function to compute the overlap area between the box and the -// given list of partitions. -static int IntersectArea(const TBOX& box, ColPartition_LIST* part_list) { - int intersect_area = 0; - ColPartition_IT part_it(part_list); - // Iterate the parts and subtract intersecting area. - for (part_it.mark_cycle_pt(); !part_it.cycled_list(); - part_it.forward()) { - ColPartition* image_part = part_it.data(); - TBOX intersect = box.intersection(image_part->bounding_box()); - intersect_area += intersect.area(); - } - return intersect_area; -} - -// part_list is a set of ColPartitions representing a polygonal image, and -// im_box is the union of the bounding boxes of all the parts in part_list. -// Tests whether part is to be consumed by the polygonal image. -// Returns true if part is weak text and more than half of its area is -// intersected by parts from the part_list, and it is contained within im_box. -static bool TestWeakIntersectedPart(const TBOX& im_box, - ColPartition_LIST* part_list, - ColPartition* part) { - if (part->flow() < BTFT_STRONG_CHAIN) { - // A weak partition intersects the box. - const TBOX& part_box = part->bounding_box(); - if (im_box.contains(part_box)) { - int area = part_box.area(); - int intersect_area = IntersectArea(part_box, part_list); - if (area < 2 * intersect_area) { - return true; - } - } - } - return false; -} - -// A rectangular or polygonal image has been completed, in part_list, bounding -// box in im_box. We want to eliminate weak text or other uncertain partitions -// (basically anything that is not BRT_STRONG_CHAIN or better) from both the -// part_grid and the big_parts list that are contained within im_box and -// overlapped enough by the possibly polygonal image. -static void EliminateWeakParts(const TBOX& im_box, - ColPartitionGrid* part_grid, - ColPartition_LIST* big_parts, - ColPartition_LIST* part_list) { - ColPartitionGridSearch rectsearch(part_grid); - ColPartition* part; - rectsearch.StartRectSearch(im_box); - while ((part = rectsearch.NextRectSearch()) != nullptr) { - if (TestWeakIntersectedPart(im_box, part_list, part)) { - BlobRegionType type = part->blob_type(); - if (type == BRT_POLYIMAGE || type == BRT_RECTIMAGE) { - rectsearch.RemoveBBox(); - DeletePartition(part); - } else { - // The part is mostly covered, so mark it. Non-image partitions are - // kept hanging around to mark the image for pass2 - part->set_flow(BTFT_NONTEXT); - part->set_blob_type(BRT_NOISE); - part->SetBlobTypes(); - } - } - } - ColPartition_IT big_it(big_parts); - for (big_it.mark_cycle_pt(); !big_it.cycled_list(); big_it.forward()) { - part = big_it.data(); - if (TestWeakIntersectedPart(im_box, part_list, part)) { - // Once marked, the blobs will be swept up by TidyBlobs. - DeletePartition(big_it.extract()); - } - } -} - -// Helper scans for good text partitions overlapping the given box. -// If there are no good text partitions overlapping an expanded box, then -// the box is expanded, otherwise, the original box is returned. -// If good text overlaps the box, true is returned. -static bool ScanForOverlappingText(ColPartitionGrid* part_grid, TBOX* box) { - ColPartitionGridSearch rectsearch(part_grid); - TBOX padded_box(*box); - padded_box.pad(kNoisePadding, kNoisePadding); - rectsearch.StartRectSearch(padded_box); - ColPartition* part; - bool any_text_in_padded_rect = false; - while ((part = rectsearch.NextRectSearch()) != nullptr) { - if (part->flow() == BTFT_CHAIN || - part->flow() == BTFT_STRONG_CHAIN) { - // Text intersects the box. - any_text_in_padded_rect = true; - const TBOX& part_box = part->bounding_box(); - if (box->overlap(part_box)) { - return true; - } - } - } - if (!any_text_in_padded_rect) - *box = padded_box; - return false; -} - -// Renders the boxes of image parts from the supplied list onto the image_pix, -// except where they interfere with existing strong text in the part_grid, -// and then deletes them. -// Box coordinates are rotated by rerotate to match the image. -static void MarkAndDeleteImageParts(const FCOORD& rerotate, - ColPartitionGrid* part_grid, - ColPartition_LIST* image_parts, - Pix* image_pix) { - if (image_pix == nullptr) - return; - int imageheight = pixGetHeight(image_pix); - ColPartition_IT part_it(image_parts); - for (; !part_it.empty(); part_it.forward()) { - ColPartition* part = part_it.extract(); - TBOX part_box = part->bounding_box(); - BlobRegionType type = part->blob_type(); - if (!ScanForOverlappingText(part_grid, &part_box) || - type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) { - // Mark the box on the image. - // All coords need to be rotated to match the image. - part_box.rotate(rerotate); - int left = part_box.left(); - int top = part_box.top(); - pixRasterop(image_pix, left, imageheight - top, - part_box.width(), part_box.height(), PIX_SET, nullptr, 0, 0); - } - DeletePartition(part); - } -} - -// Locates all the image partitions in the part_grid, that were found by a -// previous call to FindImagePartitions, marks them in the image_mask, -// removes them from the grid, and deletes them. This makes it possible to -// call FindImagePartitions again to produce less broken-up and less -// overlapping image partitions. -// rerotation specifies how to rotate the partition coords to match -// the image_mask, since this function is used after orientation correction. -void ImageFind::TransferImagePartsToImageMask(const FCOORD& rerotation, - ColPartitionGrid* part_grid, - Pix* image_mask) { - // Extract the noise parts from the grid and put them on a temporary list. - ColPartition_LIST parts_list; - ColPartition_IT part_it(&parts_list); - ColPartitionGridSearch gsearch(part_grid); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - BlobRegionType type = part->blob_type(); - if (type == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) { - part_it.add_after_then_move(part); - gsearch.RemoveBBox(); - } - } - // Render listed noise partitions to the image mask. - MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask); -} - -// Removes and deletes all image partitions that are too small to be worth -// keeping. We have to do this as a separate phase after creating the image -// partitions as the small images are needed to join the larger ones together. -static void DeleteSmallImages(ColPartitionGrid* part_grid) { - if (part_grid != nullptr) return; - ColPartitionGridSearch gsearch(part_grid); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - // Only delete rectangular images, since if it became a poly image, it - // is more evidence that it is somehow important. - if (part->blob_type() == BRT_RECTIMAGE) { - const TBOX& part_box = part->bounding_box(); - if (part_box.width() < kMinImageFindSize || - part_box.height() < kMinImageFindSize) { - // It is too small to keep. Just make it disappear. - gsearch.RemoveBBox(); - DeletePartition(part); - } - } - } -} - -// Runs a CC analysis on the image_pix mask image, and creates -// image partitions from them, cutting out strong text, and merging with -// nearby image regions such that they don't interfere with text. -// Rotation and rerotation specify how to rotate image coords to match -// the blob and partition coords and back again. -// The input/output part_grid owns all the created partitions, and -// the partitions own all the fake blobs that belong in the partitions. -// Since the other blobs in the other partitions will be owned by the block, -// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this -// situation and collect the image blobs. -void ImageFind::FindImagePartitions(Pix* image_pix, const FCOORD& rotation, - const FCOORD& rerotation, TO_BLOCK* block, - TabFind* tab_grid, DebugPixa* pixa_debug, - ColPartitionGrid* part_grid, - ColPartition_LIST* big_parts) { - int imageheight = pixGetHeight(image_pix); - Boxa* boxa; - Pixa* pixa; - ConnCompAndRectangularize(image_pix, pixa_debug, &boxa, &pixa); - // Iterate the connected components in the image regions mask. - int nboxes = 0; - if (boxa != nullptr && pixa != nullptr) nboxes = boxaGetCount(boxa); - for (int i = 0; i < nboxes; ++i) { - l_int32 x, y, width, height; - boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height); - Pix* pix = pixaGetPix(pixa, i, L_CLONE); - TBOX im_box(x, imageheight -y - height, x + width, imageheight - y); - im_box.rotate(rotation); // Now matches all partitions and blobs. - ColPartitionGridSearch rectsearch(part_grid); - rectsearch.SetUniqueMode(true); - ColPartition_LIST part_list; - DivideImageIntoParts(im_box, rotation, rerotation, pix, - &rectsearch, &part_list); - if (textord_tabfind_show_images && pixa_debug != nullptr) { - pixa_debug->AddPix(pix, "ImageComponent"); - tprintf("Component has %d parts\n", part_list.length()); - } - pixDestroy(&pix); - if (!part_list.empty()) { - ColPartition_IT part_it(&part_list); - if (part_list.singleton()) { - // We didn't have to chop it into a polygon to fit around text, so - // try expanding it to merge fragmented image parts, as long as it - // doesn't touch strong text. - ColPartition* part = part_it.extract(); - TBOX text_box(im_box); - MaximalImageBoundingBox(part_grid, &text_box); - while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part)); - part_it.set_to_list(&part_list); - part_it.add_after_then_move(part); - im_box = part->bounding_box(); - } - EliminateWeakParts(im_box, part_grid, big_parts, &part_list); - // Iterate the part_list and put the parts into the grid. - for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { - ColPartition* image_part = part_it.extract(); - im_box = image_part->bounding_box(); - part_grid->InsertBBox(true, true, image_part); - if (!part_it.at_last()) { - ColPartition* neighbour = part_it.data_relative(1); - image_part->AddPartner(false, neighbour); - neighbour->AddPartner(true, image_part); - } - } - } - } - boxaDestroy(&boxa); - pixaDestroy(&pixa); - DeleteSmallImages(part_grid); - if (textord_tabfind_show_images) { - ScrollView* images_win_ = part_grid->MakeWindow(1000, 400, "With Images"); - part_grid->DisplayBoxes(images_win_); - } -} - - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/imagefind.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/imagefind.h deleted file mode 100644 index 36825d5e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/imagefind.h +++ /dev/null @@ -1,159 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: imagefind.h -// Description: Class to find image and drawing regions in an image -// and create a corresponding list of empty blobs. -// Author: Ray Smith -// Created: Fri Aug 01 10:50:01 PDT 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_IMAGEFIND_H_ -#define TESSERACT_TEXTORD_IMAGEFIND_H_ - -#include "debugpixa.h" -#include "host.h" - -struct Boxa; -struct Pix; -struct Pixa; -class TBOX; -class FCOORD; -class TO_BLOCK; -class BLOBNBOX_LIST; - -namespace tesseract { - -class ColPartitionGrid; -class ColPartition_LIST; -class TabFind; - -// The ImageFind class is a simple static function wrapper class that -// exposes the FindImages function and some useful helper functions. -class ImageFind { - public: - // Finds image regions within the BINARY source pix (page image) and returns - // the image regions as a mask image. - // The returned pix may be nullptr, meaning no images found. - // If not nullptr, it must be PixDestroyed by the caller. - // If textord_tabfind_show_images, debug images are appended to pixa_debug. - static Pix* FindImages(Pix* pix, DebugPixa* pixa_debug); - - // Generates a Boxa, Pixa pair from the input binary (image mask) pix, - // analgous to pixConnComp, except that connected components which are nearly - // rectangular are replaced with solid rectangles. - // The returned boxa, pixa may be nullptr, meaning no images found. - // If not nullptr, they must be destroyed by the caller. - // Resolution of pix should match the source image (Tesseract::pix_binary_) - // so the output coordinate systems match. - static void ConnCompAndRectangularize(Pix* pix, DebugPixa* pixa_debug, - Boxa** boxa, Pixa** pixa); - - // Returns true if there is a rectangle in the source pix, such that all - // pixel rows and column slices outside of it have less than - // min_fraction of the pixels black, and within max_skew_gradient fraction - // of the pixels on the inside, there are at least max_fraction of the - // pixels black. In other words, the inside of the rectangle looks roughly - // rectangular, and the outside of it looks like extra bits. - // On return, the rectangle is defined by x_start, y_start, x_end and y_end. - // Note: the algorithm is iterative, allowing it to slice off pixels from - // one edge, allowing it to then slice off more pixels from another edge. - static bool pixNearlyRectangular(Pix* pix, - double min_fraction, double max_fraction, - double max_skew_gradient, - int* x_start, int* y_start, - int* x_end, int* y_end); - - // Given an input pix, and a bounding rectangle, the sides of the rectangle - // are shrunk inwards until they bound any black pixels found within the - // original rectangle. Returns false if the rectangle contains no black - // pixels at all. - static bool BoundsWithinRect(Pix* pix, int* x_start, int* y_start, - int* x_end, int* y_end); - - // Given a point in 3-D (RGB) space, returns the squared Euclidean distance - // of the point from the given line, defined by a pair of points in the 3-D - // (RGB) space, line1 and line2. - static double ColorDistanceFromLine(const uint8_t* line1, const uint8_t* line2, - const uint8_t* point); - - // Returns the leptonica combined code for the given RGB triplet. - static uint32_t ComposeRGB(uint32_t r, uint32_t g, uint32_t b); - - // Returns the input value clipped to a uint8_t. - static uint8_t ClipToByte(double pixel); - - // Computes the light and dark extremes of color in the given rectangle of - // the given pix, which is factor smaller than the coordinate system in rect. - // The light and dark points are taken to be the upper and lower 8th-ile of - // the most deviant of R, G and B. The value of the other 2 channels are - // computed by linear fit against the most deviant. - // The colors of the two point are returned in color1 and color2, with the - // alpha channel set to a scaled mean rms of the fits. - // If color_map1 is not null then it and color_map2 get rect pasted in them - // with the two calculated colors, and rms map gets a pasted rect of the rms. - // color_map1, color_map2 and rms_map are assumed to be the same scale as pix. - static void ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor, - Pix* color_map1, Pix* color_map2, - Pix* rms_map, - uint8_t* color1, uint8_t* color2); - - // Returns true if there are no black pixels in between the boxes. - // The im_box must represent the bounding box of the pix in tesseract - // coordinates, which may be negative, due to rotations to make the textlines - // horizontal. The boxes are rotated by rotation, which should undo such - // rotations, before mapping them onto the pix. - static bool BlankImageInBetween(const TBOX& box1, const TBOX& box2, - const TBOX& im_box, const FCOORD& rotation, - Pix* pix); - - // Returns the number of pixels in box in the pix. - // The im_box must represent the bounding box of the pix in tesseract - // coordinates, which may be negative, due to rotations to make the textlines - // horizontal. The boxes are rotated by rotation, which should undo such - // rotations, before mapping them onto the pix. - static int CountPixelsInRotatedBox(TBOX box, const TBOX& im_box, - const FCOORD& rotation, Pix* pix); - - - // Locates all the image partitions in the part_grid, that were found by a - // previous call to FindImagePartitions, marks them in the image_mask, - // removes them from the grid, and deletes them. This makes it possible to - // call FindImagePartitions again to produce less broken-up and less - // overlapping image partitions. - // rerotation specifies how to rotate the partition coords to match - // the image_mask, since this function is used after orientation correction. - static void TransferImagePartsToImageMask(const FCOORD& rerotation, - ColPartitionGrid* part_grid, - Pix* image_mask); - - // Runs a CC analysis on the image_pix mask image, and creates - // image partitions from them, cutting out strong text, and merging with - // nearby image regions such that they don't interfere with text. - // Rotation and rerotation specify how to rotate image coords to match - // the blob and partition coords and back again. - // The input/output part_grid owns all the created partitions, and - // the partitions own all the fake blobs that belong in the partitions. - // Since the other blobs in the other partitions will be owned by the block, - // ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this - // situation and collect the image blobs. - static void FindImagePartitions(Pix* image_pix, const FCOORD& rotation, - const FCOORD& rerotation, TO_BLOCK* block, - TabFind* tab_grid, DebugPixa* pixa_debug, - ColPartitionGrid* part_grid, - ColPartition_LIST* big_parts); -}; - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_LINEFIND_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/linefind.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/linefind.cpp deleted file mode 100644 index e5679102..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/linefind.cpp +++ /dev/null @@ -1,771 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: linefind.cpp -// Description: Class to find vertical lines in an image and create -// a corresponding list of empty blobs. -// Author: Ray Smith -// Created: Thu Mar 20 09:49:01 PDT 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "linefind.h" -#include "alignedblob.h" -#include "tabvector.h" -#include "blobbox.h" -#include "edgblob.h" -#include "openclwrapper.h" - -#include "allheaders.h" - -#include - -namespace tesseract { - -/// Denominator of resolution makes max pixel width to allow thin lines. -const int kThinLineFraction = 20; -/// Denominator of resolution makes min pixels to demand line lengths to be. -const int kMinLineLengthFraction = 4; -/// Spacing of cracks across the page to break up tall vertical lines. -const int kCrackSpacing = 100; -/// Grid size used by line finder. Not very critical. -const int kLineFindGridSize = 50; -// Min width of a line in pixels to be considered thick. -const int kMinThickLineWidth = 12; -// Max size of line residue. (The pixels that fail the long thin opening, and -// therefore don't make it to the candidate line mask, but are nevertheless -// part of the line.) -const int kMaxLineResidue = 6; -// Min length in inches of a line segment that exceeds kMinThickLineWidth in -// thickness. (Such lines shouldn't break by simple image degradation.) -const double kThickLengthMultiple = 0.75; -// Max fraction of line box area that can be occupied by non-line pixels. -const double kMaxNonLineDensity = 0.25; -// Max height of a music stave in inches. -const double kMaxStaveHeight = 1.0; -// Minimum fraction of pixels in a music rectangle connected to the staves. -const double kMinMusicPixelFraction = 0.75; - -// Erases the unused blobs from the line_pix image, taking into account -// whether this was a horizontal or vertical line set. -static void RemoveUnusedLineSegments(bool horizontal_lines, - BLOBNBOX_LIST* line_bblobs, - Pix* line_pix) { - int height = pixGetHeight(line_pix); - BLOBNBOX_IT bbox_it(line_bblobs); - for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { - BLOBNBOX* blob = bbox_it.data(); - if (blob->left_tab_type() != TT_VLINE) { - const TBOX& box = blob->bounding_box(); - Box* pixbox = nullptr; - if (horizontal_lines) { - // Horizontal lines are in tess format and also have x and y flipped - // (to use FindVerticalAlignment) so we have to flip x and y and then - // convert to Leptonica by height - flipped x (ie the right edge). - // See GetLineBoxes for more explanation. - pixbox = boxCreate(box.bottom(), height - box.right(), - box.height(), box.width()); - } else { - // For vertical lines, just flip upside-down to convert to Leptonica. - // The y position of the box in Leptonica terms is the distance from - // the top of the image to the top of the box. - pixbox = boxCreate(box.left(), height - box.top(), - box.width(), box.height()); - } - pixClearInRect(line_pix, pixbox); - boxDestroy(&pixbox); - } - } -} - -// Helper subtracts the line_pix image from the src_pix, and removes residue -// as well by removing components that touch the line, but are not in the -// non_line_pix mask. It is assumed that the non_line_pix mask has already -// been prepared to required accuracy. -static void SubtractLinesAndResidue(Pix* line_pix, Pix* non_line_pix, - int resolution, Pix* src_pix) { - // First remove the lines themselves. - pixSubtract(src_pix, src_pix, line_pix); - // Subtract the non-lines from the image to get the residue. - Pix* residue_pix = pixSubtract(nullptr, src_pix, non_line_pix); - // Dilate the lines so they touch the residue. - Pix* fat_line_pix = pixDilateBrick(nullptr, line_pix, 3, 3); - // Seed fill the fat lines to get all the residue. - pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8); - // Subtract the residue from the original image. - pixSubtract(src_pix, src_pix, fat_line_pix); - pixDestroy(&fat_line_pix); - pixDestroy(&residue_pix); -} - -// Returns the maximum strokewidth in the given binary image by doubling -// the maximum of the distance function. -static int MaxStrokeWidth(Pix* pix) { - Pix* dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG); - int width = pixGetWidth(dist_pix); - int height = pixGetHeight(dist_pix); - int wpl = pixGetWpl(dist_pix); - l_uint32* data = pixGetData(dist_pix); - // Find the maximum value in the distance image. - int max_dist = 0; - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - int pixel = GET_DATA_BYTE(data, x); - if (pixel > max_dist) - max_dist = pixel; - } - data += wpl; - } - pixDestroy(&dist_pix); - return max_dist * 2; -} - -// Returns the number of components in the intersection_pix touched by line_box. -static int NumTouchingIntersections(Box* line_box, Pix* intersection_pix) { - if (intersection_pix == nullptr) return 0; - Pix* rect_pix = pixClipRectangle(intersection_pix, line_box, nullptr); - Boxa* boxa = pixConnComp(rect_pix, nullptr, 8); - pixDestroy(&rect_pix); - if (boxa == nullptr) return false; - int result = boxaGetCount(boxa); - boxaDestroy(&boxa); - return result; -} - -// Returns the number of black pixels found in the box made by adding the line -// width to both sides of the line bounding box. (Increasing the smallest -// dimension of the bounding box.) -static int CountPixelsAdjacentToLine(int line_width, Box* line_box, - Pix* nonline_pix) { - l_int32 x, y, box_width, box_height; - boxGetGeometry(line_box, &x, &y, &box_width, &box_height); - if (box_width > box_height) { - // horizontal line. - int bottom = std::min(pixGetHeight(nonline_pix), y + box_height + line_width); - y = std::max(0, y - line_width); - box_height = bottom - y; - } else { - // Vertical line. - int right = std::min(pixGetWidth(nonline_pix), x + box_width + line_width); - x = std::max(0, x - line_width); - box_width = right - x; - } - Box* box = boxCreate(x, y, box_width, box_height); - Pix* rect_pix = pixClipRectangle(nonline_pix, box, nullptr); - boxDestroy(&box); - l_int32 result; - pixCountPixels(rect_pix, &result, nullptr); - pixDestroy(&rect_pix); - return result; -} - -// Helper erases false-positive line segments from the input/output line_pix. -// 1. Since thick lines shouldn't really break up, we can eliminate some false -// positives by marking segments that are at least kMinThickLineWidth -// thickness, yet have a length less than min_thick_length. -// 2. Lines that don't have at least 2 intersections with other lines and have -// a lot of neighbouring non-lines are probably not lines (perhaps arabic -// or Hindi words, or underlines.) -// Bad line components are erased from line_pix. -// Returns the number of remaining connected components. -static int FilterFalsePositives(int resolution, Pix* nonline_pix, - Pix* intersection_pix, Pix* line_pix) { - int min_thick_length = static_cast(resolution * kThickLengthMultiple); - Pixa* pixa = nullptr; - Boxa* boxa = pixConnComp(line_pix, &pixa, 8); - // Iterate over the boxes to remove false positives. - int nboxes = boxaGetCount(boxa); - int remaining_boxes = nboxes; - for (int i = 0; i < nboxes; ++i) { - Box* box = boxaGetBox(boxa, i, L_CLONE); - l_int32 x, y, box_width, box_height; - boxGetGeometry(box, &x, &y, &box_width, &box_height); - Pix* comp_pix = pixaGetPix(pixa, i, L_CLONE); - int max_width = MaxStrokeWidth(comp_pix); - pixDestroy(&comp_pix); - bool bad_line = false; - // If the length is too short to stand-alone as a line, and the box width - // is thick enough, and the stroke width is thick enough it is bad. - if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth && - box_width < min_thick_length && box_height < min_thick_length && - max_width > kMinThickLineWidth) { - // Too thick for the length. - bad_line = true; - } - if (!bad_line && - (intersection_pix == nullptr || - NumTouchingIntersections(box, intersection_pix) < 2)) { - // Test non-line density near the line. - int nonline_count = CountPixelsAdjacentToLine(max_width, box, - nonline_pix); - if (nonline_count > box_height * box_width * kMaxNonLineDensity) - bad_line = true; - } - if (bad_line) { - // Not a good line. - pixClearInRect(line_pix, box); - --remaining_boxes; - } - boxDestroy(&box); - } - pixaDestroy(&pixa); - boxaDestroy(&boxa); - return remaining_boxes; -} - -// Finds vertical and horizontal line objects in the given pix. -// Uses the given resolution to determine size thresholds instead of any -// that may be present in the pix. -// The output vertical_x and vertical_y contain a sum of the output vectors, -// thereby giving the mean vertical direction. -// If pix_music_mask != nullptr, and music is detected, a mask of the staves -// and anything that is connected (bars, notes etc.) will be returned in -// pix_music_mask, the mask subtracted from pix, and the lines will not -// appear in v_lines or h_lines. -// The output vectors are owned by the list and Frozen (cannot refit) by -// having no boxes, as there is no need to refit or merge separator lines. -// The detected lines are removed from the pix. -void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix* pix, - int* vertical_x, int* vertical_y, - Pix** pix_music_mask, - TabVector_LIST* v_lines, - TabVector_LIST* h_lines) { - PERF_COUNT_START("FindAndRemoveLines") - if (pix == nullptr || vertical_x == nullptr || vertical_y == nullptr) { - tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n"); - return; - } - Pix* pix_vline = nullptr; - Pix* pix_non_vline = nullptr; - Pix* pix_hline = nullptr; - Pix* pix_non_hline = nullptr; - Pix* pix_intersections = nullptr; - Pixa* pixa_display = debug ? pixaCreate(0) : nullptr; - GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline, - &pix_non_hline, &pix_intersections, pix_music_mask, - pixa_display); - // Find lines, convert to TabVector_LIST and remove those that are used. - FindAndRemoveVLines(resolution, pix_intersections, vertical_x, vertical_y, - &pix_vline, pix_non_vline, pix, v_lines); - if (pix_hline != nullptr) { - // Recompute intersections and re-filter false positive h-lines. - if (pix_vline != nullptr) - pixAnd(pix_intersections, pix_vline, pix_hline); - else - pixDestroy(&pix_intersections); - if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections, - pix_hline)) { - pixDestroy(&pix_hline); - } - } - FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y, - &pix_hline, pix_non_hline, pix, h_lines); - if (pixa_display != nullptr && pix_vline != nullptr) - pixaAddPix(pixa_display, pix_vline, L_CLONE); - if (pixa_display != nullptr && pix_hline != nullptr) - pixaAddPix(pixa_display, pix_hline, L_CLONE); - if (pix_vline != nullptr && pix_hline != nullptr) { - // Remove joins (intersections) where lines cross, and the residue. - // Recalculate the intersections, since some lines have been deleted. - pixAnd(pix_intersections, pix_vline, pix_hline); - // Fatten up the intersections and seed-fill to get the intersection - // residue. - Pix* pix_join_residue = pixDilateBrick(nullptr, pix_intersections, 5, 5); - pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8); - // Now remove the intersection residue. - pixSubtract(pix, pix, pix_join_residue); - pixDestroy(&pix_join_residue); - } - // Remove any detected music. - if (pix_music_mask != nullptr && *pix_music_mask != nullptr) { - if (pixa_display != nullptr) - pixaAddPix(pixa_display, *pix_music_mask, L_CLONE); - pixSubtract(pix, pix, *pix_music_mask); - } - if (pixa_display != nullptr) - pixaAddPix(pixa_display, pix, L_CLONE); - - pixDestroy(&pix_vline); - pixDestroy(&pix_non_vline); - pixDestroy(&pix_hline); - pixDestroy(&pix_non_hline); - pixDestroy(&pix_intersections); - if (pixa_display != nullptr) { - pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding", - "vhlinefinding.pdf"); - pixaDestroy(&pixa_display); - } - PERF_COUNT_END -} - -// Converts the Boxa array to a list of C_BLOB, getting rid of severely -// overlapping outlines and those that are children of a bigger one. -// The output is a list of C_BLOBs that are owned by the list. -// The C_OUTLINEs in the C_BLOBs contain no outline data - just empty -// bounding boxes. The Boxa is consumed and destroyed. -void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height, - Boxa** boxes, C_BLOB_LIST* blobs) { - C_OUTLINE_LIST outlines; - C_OUTLINE_IT ol_it = &outlines; - // Iterate the boxes to convert to outlines. - int nboxes = boxaGetCount(*boxes); - for (int i = 0; i < nboxes; ++i) { - l_int32 x, y, width, height; - boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height); - // Make a C_OUTLINE from the leptonica box. This is a bit of a hack, - // as there is no outline, just a bounding box, but with some very - // small changes to coutln.cpp, it works nicely. - ICOORD top_left(x, y); - ICOORD bot_right(x + width, y + height); - CRACKEDGE startpt; - startpt.pos = top_left; - C_OUTLINE* outline = new C_OUTLINE(&startpt, top_left, bot_right, 0); - ol_it.add_after_then_move(outline); - } - // Use outlines_to_blobs to convert the outlines to blobs and find - // overlapping and contained objects. The output list of blobs in the block - // has all the bad ones filtered out and deleted. - BLOCK block; - ICOORD page_tl(0, 0); - ICOORD page_br(image_width, image_height); - outlines_to_blobs(&block, page_tl, page_br, &outlines); - // Transfer the created blobs to the output list. - C_BLOB_IT blob_it(blobs); - blob_it.add_list_after(block.blob_list()); - // The boxes aren't needed any more. - boxaDestroy(boxes); -} - -// Finds vertical line objects in pix_vline and removes the from src_pix. -// Uses the given resolution to determine size thresholds instead of any -// that may be present in the pix. -// The output vertical_x and vertical_y contain a sum of the output vectors, -// thereby giving the mean vertical direction. -// The output vectors are owned by the list and Frozen (cannot refit) by -// having no boxes, as there is no need to refit or merge separator lines. -// If no good lines are found, pix_vline is destroyed. -// None of the input pointers may be nullptr, and if *pix_vline is nullptr then -// the function does nothing. -void LineFinder::FindAndRemoveVLines(int resolution, - Pix* pix_intersections, - int* vertical_x, int* vertical_y, - Pix** pix_vline, Pix* pix_non_vline, - Pix* src_pix, TabVector_LIST* vectors) { - if (pix_vline == nullptr || *pix_vline == nullptr) return; - C_BLOB_LIST line_cblobs; - BLOBNBOX_LIST line_bblobs; - GetLineBoxes(false, *pix_vline, pix_intersections, - &line_cblobs, &line_bblobs); - int width = pixGetWidth(src_pix); - int height = pixGetHeight(src_pix); - ICOORD bleft(0, 0); - ICOORD tright(width, height); - FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors); - if (!vectors->empty()) { - RemoveUnusedLineSegments(false, &line_bblobs, *pix_vline); - SubtractLinesAndResidue(*pix_vline, pix_non_vline, resolution, src_pix); - ICOORD vertical; - vertical.set_with_shrink(*vertical_x, *vertical_y); - TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr); - } else { - pixDestroy(pix_vline); - } -} - -// Finds horizontal line objects in pix_hline and removes them from src_pix. -// Uses the given resolution to determine size thresholds instead of any -// that may be present in the pix. -// The output vertical_x and vertical_y contain a sum of the output vectors, -// thereby giving the mean vertical direction. -// The output vectors are owned by the list and Frozen (cannot refit) by -// having no boxes, as there is no need to refit or merge separator lines. -// If no good lines are found, pix_hline is destroyed. -// None of the input pointers may be nullptr, and if *pix_hline is nullptr then -// the function does nothing. -void LineFinder::FindAndRemoveHLines(int resolution, - Pix* pix_intersections, - int vertical_x, int vertical_y, - Pix** pix_hline, Pix* pix_non_hline, - Pix* src_pix, TabVector_LIST* vectors) { - if (pix_hline == nullptr || *pix_hline == nullptr) return; - C_BLOB_LIST line_cblobs; - BLOBNBOX_LIST line_bblobs; - GetLineBoxes(true, *pix_hline, pix_intersections, &line_cblobs, &line_bblobs); - int width = pixGetWidth(src_pix); - int height = pixGetHeight(src_pix); - ICOORD bleft(0, 0); - ICOORD tright(height, width); - FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y, - vectors); - if (!vectors->empty()) { - RemoveUnusedLineSegments(true, &line_bblobs, *pix_hline); - SubtractLinesAndResidue(*pix_hline, pix_non_hline, resolution, src_pix); - ICOORD vertical; - vertical.set_with_shrink(vertical_x, vertical_y); - TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr); - // Iterate the vectors to flip them. x and y were flipped for horizontal - // lines, so FindLineVectors can work just with the vertical case. - // See GetLineBoxes for more on the flip. - TabVector_IT h_it(vectors); - for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { - h_it.data()->XYFlip(); - } - } else { - pixDestroy(pix_hline); - } -} - -// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright -// are the bounds of the image on which the input line_bblobs were found. -// The input line_bblobs list is const really. -// The output vertical_x and vertical_y are the total of all the vectors. -// The output list of TabVector makes no reference to the input BLOBNBOXes. -void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright, - BLOBNBOX_LIST* line_bblobs, - int* vertical_x, int* vertical_y, - TabVector_LIST* vectors) { - BLOBNBOX_IT bbox_it(line_bblobs); - int b_count = 0; - // Put all the blobs into the grid to find the lines, and move the blobs - // to the output lists. - AlignedBlob blob_grid(kLineFindGridSize, bleft, tright); - for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { - BLOBNBOX* bblob = bbox_it.data(); - bblob->set_left_tab_type(TT_MAYBE_ALIGNED); - bblob->set_left_rule(bleft.x()); - bblob->set_right_rule(tright.x()); - bblob->set_left_crossing_rule(bleft.x()); - bblob->set_right_crossing_rule(tright.x()); - blob_grid.InsertBBox(false, true, bblob); - ++b_count; - } - if (b_count == 0) - return; - - // Search the entire grid, looking for vertical line vectors. - BlobGridSearch lsearch(&blob_grid); - BLOBNBOX* bbox; - TabVector_IT vector_it(vectors); - *vertical_x = 0; - *vertical_y = 1; - lsearch.StartFullSearch(); - while ((bbox = lsearch.NextFullSearch()) != nullptr) { - if (bbox->left_tab_type() == TT_MAYBE_ALIGNED) { - const TBOX& box = bbox->bounding_box(); - if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) - tprintf("Finding line vector starting at bbox (%d,%d)\n", - box.left(), box.bottom()); - AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width()); - TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox, - vertical_x, - vertical_y); - if (vector != nullptr) { - vector->Freeze(); - vector_it.add_to_end(vector); - } - } - } -} - -// Returns a Pix music mask if music is detected. -// Any vertical line that has at least 5 intersections in sufficient density -// is taken to be a bar. Bars are used as a seed and the entire touching -// component is added to the output music mask and subtracted from the lines. -// Returns nullptr and does minimal work if no music is found. -static Pix* FilterMusic(int resolution, Pix* pix_closed, - Pix* pix_vline, Pix* pix_hline, - l_int32* v_empty, l_int32* h_empty) { - int max_stave_height = static_cast(resolution * kMaxStaveHeight); - Pix* intersection_pix = pixAnd(nullptr, pix_vline, pix_hline); - Boxa* boxa = pixConnComp(pix_vline, nullptr, 8); - // Iterate over the boxes to find music bars. - int nboxes = boxaGetCount(boxa); - Pix* music_mask = nullptr; - for (int i = 0; i < nboxes; ++i) { - Box* box = boxaGetBox(boxa, i, L_CLONE); - l_int32 x, y, box_width, box_height; - boxGetGeometry(box, &x, &y, &box_width, &box_height); - int joins = NumTouchingIntersections(box, intersection_pix); - // Test for the join density being at least 5 per max_stave_height, - // ie (joins-1)/box_height >= (5-1)/max_stave_height. - if (joins >= 5 && (joins - 1) * max_stave_height >= 4 * box_height) { - // This is a music bar. Add to the mask. - if (music_mask == nullptr) - music_mask = pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline), - 1); - pixSetInRect(music_mask, box); - } - boxDestroy(&box); - } - boxaDestroy(&boxa); - pixDestroy(&intersection_pix); - if (music_mask != nullptr) { - // The mask currently contains just the bars. Use the mask as a seed - // and the pix_closed as the mask for a seedfill to get all the - // intersecting staves. - pixSeedfillBinary(music_mask, music_mask, pix_closed, 8); - // Filter out false positives. CCs in the music_mask should be the vast - // majority of the pixels in their bounding boxes, as we expect just a - // tiny amount of text, a few phrase marks, and crescendo etc left. - Boxa* boxa = pixConnComp(music_mask, nullptr, 8); - // Iterate over the boxes to find music components. - int nboxes = boxaGetCount(boxa); - for (int i = 0; i < nboxes; ++i) { - Box* box = boxaGetBox(boxa, i, L_CLONE); - Pix* rect_pix = pixClipRectangle(music_mask, box, nullptr); - l_int32 music_pixels; - pixCountPixels(rect_pix, &music_pixels, nullptr); - pixDestroy(&rect_pix); - rect_pix = pixClipRectangle(pix_closed, box, nullptr); - l_int32 all_pixels; - pixCountPixels(rect_pix, &all_pixels, nullptr); - pixDestroy(&rect_pix); - if (music_pixels < kMinMusicPixelFraction * all_pixels) { - // False positive. Delete from the music mask. - pixClearInRect(music_mask, box); - } - boxDestroy(&box); - } - l_int32 no_remaining_music; - boxaDestroy(&boxa); - pixZero(music_mask, &no_remaining_music); - if (no_remaining_music) { - pixDestroy(&music_mask); - } else { - pixSubtract(pix_vline, pix_vline, music_mask); - pixSubtract(pix_hline, pix_hline, music_mask); - // We may have deleted all the lines - pixZero(pix_vline, v_empty); - pixZero(pix_hline, h_empty); - } - } - return music_mask; -} - -// Most of the heavy lifting of line finding. Given src_pix and its separate -// resolution, returns image masks: -// pix_vline candidate vertical lines. -// pix_non_vline pixels that didn't look like vertical lines. -// pix_hline candidate horizontal lines. -// pix_non_hline pixels that didn't look like horizontal lines. -// pix_intersections pixels where vertical and horizontal lines meet. -// pix_music_mask candidate music staves. -// This function promises to initialize all the output (2nd level) pointers, -// but any of the returns that are empty will be nullptr on output. -// None of the input (1st level) pointers may be nullptr except pix_music_mask, -// which will disable music detection, and pixa_display. -void LineFinder::GetLineMasks(int resolution, Pix* src_pix, - Pix** pix_vline, Pix** pix_non_vline, - Pix** pix_hline, Pix** pix_non_hline, - Pix** pix_intersections, Pix** pix_music_mask, - Pixa* pixa_display) { - Pix* pix_closed = nullptr; - Pix* pix_hollow = nullptr; - - int max_line_width = resolution / kThinLineFraction; - int min_line_length = resolution / kMinLineLengthFraction; - if (pixa_display != nullptr) { - tprintf("Image resolution = %d, max line width = %d, min length=%d\n", - resolution, max_line_width, min_line_length); - } - int closing_brick = max_line_width / 3; - - PERF_COUNT_START("GetLineMasksMorph") -// only use opencl if compiled w/ OpenCL and selected device is opencl -#ifdef USE_OPENCL - if (OpenclDevice::selectedDeviceIsOpenCL()) { - // OpenCL pixGetLines Operation - int clStatus = OpenclDevice::initMorphCLAllocations(pixGetWpl(src_pix), - pixGetHeight(src_pix), - src_pix); - bool getpixclosed = pix_music_mask != nullptr; - OpenclDevice::pixGetLinesCL(nullptr, src_pix, pix_vline, pix_hline, - &pix_closed, getpixclosed, closing_brick, - closing_brick, max_line_width, max_line_width, - min_line_length, min_line_length); - } else { -#endif - // Close up small holes, making it less likely that false alarms are found - // in thickened text (as it will become more solid) and also smoothing over - // some line breaks and nicks in the edges of the lines. - pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick); - if (pixa_display != nullptr) - pixaAddPix(pixa_display, pix_closed, L_CLONE); - // Open up with a big box to detect solid areas, which can then be subtracted. - // This is very generous and will leave in even quite wide lines. - Pix* pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, - max_line_width); - if (pixa_display != nullptr) - pixaAddPix(pixa_display, pix_solid, L_CLONE); - pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid); - - pixDestroy(&pix_solid); - - // Now open up in both directions independently to find lines of at least - // 1 inch/kMinLineLengthFraction in length. - if (pixa_display != nullptr) - pixaAddPix(pixa_display, pix_hollow, L_CLONE); - *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); - *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); - - pixDestroy(&pix_hollow); -#ifdef USE_OPENCL - } -#endif - PERF_COUNT_END - - // Lines are sufficiently rare, that it is worth checking for a zero image. - l_int32 v_empty = 0; - l_int32 h_empty = 0; - pixZero(*pix_vline, &v_empty); - pixZero(*pix_hline, &h_empty); - if (pix_music_mask != nullptr) { - if (!v_empty && !h_empty) { - *pix_music_mask = FilterMusic(resolution, pix_closed, - *pix_vline, *pix_hline, - &v_empty, &h_empty); - } else { - *pix_music_mask = nullptr; - } - } - pixDestroy(&pix_closed); - Pix* pix_nonlines = nullptr; - *pix_intersections = nullptr; - Pix* extra_non_hlines = nullptr; - if (!v_empty) { - // Subtract both line candidates from the source to get definite non-lines. - pix_nonlines = pixSubtract(nullptr, src_pix, *pix_vline); - if (!h_empty) { - pixSubtract(pix_nonlines, pix_nonlines, *pix_hline); - // Intersections are a useful indicator for likelihood of being a line. - *pix_intersections = pixAnd(nullptr, *pix_vline, *pix_hline); - // Candidate vlines are not hlines (apart from the intersections) - // and vice versa. - extra_non_hlines = pixSubtract(nullptr, *pix_vline, *pix_intersections); - } - *pix_non_vline = pixErodeBrick(nullptr, pix_nonlines, kMaxLineResidue, 1); - pixSeedfillBinary(*pix_non_vline, *pix_non_vline, pix_nonlines, 8); - if (!h_empty) { - // Candidate hlines are not vlines. - pixOr(*pix_non_vline, *pix_non_vline, *pix_hline); - pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections); - } - if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections, - *pix_vline)) - pixDestroy(pix_vline); // No candidates left. - } else { - // No vertical lines. - pixDestroy(pix_vline); - *pix_non_vline = nullptr; - if (!h_empty) { - pix_nonlines = pixSubtract(nullptr, src_pix, *pix_hline); - } - } - if (h_empty) { - pixDestroy(pix_hline); - *pix_non_hline = nullptr; - if (v_empty) { - return; - } - } else { - *pix_non_hline = pixErodeBrick(nullptr, pix_nonlines, 1, kMaxLineResidue); - pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8); - if (extra_non_hlines != nullptr) { - pixOr(*pix_non_hline, *pix_non_hline, extra_non_hlines); - pixDestroy(&extra_non_hlines); - } - if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections, - *pix_hline)) - pixDestroy(pix_hline); // No candidates left. - } - if (pixa_display != nullptr) { - if (*pix_vline != nullptr) pixaAddPix(pixa_display, *pix_vline, L_CLONE); - if (*pix_hline != nullptr) pixaAddPix(pixa_display, *pix_hline, L_CLONE); - if (pix_nonlines != nullptr) pixaAddPix(pixa_display, pix_nonlines, L_CLONE); - if (*pix_non_vline != nullptr) - pixaAddPix(pixa_display, *pix_non_vline, L_CLONE); - if (*pix_non_hline != nullptr) - pixaAddPix(pixa_display, *pix_non_hline, L_CLONE); - if (*pix_intersections != nullptr) - pixaAddPix(pixa_display, *pix_intersections, L_CLONE); - if (pix_music_mask != nullptr && *pix_music_mask != nullptr) - pixaAddPix(pixa_display, *pix_music_mask, L_CLONE); - } - pixDestroy(&pix_nonlines); -} - -// Returns a list of boxes corresponding to the candidate line segments. Sets -// the line_crossings member of the boxes so we can later determine the number -// of intersections touched by a full line. -void LineFinder::GetLineBoxes(bool horizontal_lines, - Pix* pix_lines, Pix* pix_intersections, - C_BLOB_LIST* line_cblobs, - BLOBNBOX_LIST* line_bblobs) { - // Put a single pixel crack in every line at an arbitrary spacing, - // so they break up and the bounding boxes can be used to get the - // direction accurately enough without needing outlines. - int wpl = pixGetWpl(pix_lines); - int width = pixGetWidth(pix_lines); - int height = pixGetHeight(pix_lines); - l_uint32* data = pixGetData(pix_lines); - if (horizontal_lines) { - for (int y = 0; y < height; ++y, data += wpl) { - for (int x = kCrackSpacing; x < width; x += kCrackSpacing) { - CLEAR_DATA_BIT(data, x); - } - } - } else { - for (int y = kCrackSpacing; y < height; y += kCrackSpacing) { - memset(data + wpl * y, 0, wpl * sizeof(*data)); - } - } - // Get the individual connected components - Boxa* boxa = pixConnComp(pix_lines, nullptr, 8); - ConvertBoxaToBlobs(width, height, &boxa, line_cblobs); - // Make the BLOBNBOXes from the C_BLOBs. - C_BLOB_IT blob_it(line_cblobs); - BLOBNBOX_IT bbox_it(line_bblobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - C_BLOB* cblob = blob_it.data(); - BLOBNBOX* bblob = new BLOBNBOX(cblob); - bbox_it.add_to_end(bblob); - // Determine whether the line segment touches two intersections. - const TBOX& bbox = bblob->bounding_box(); - Box* box = boxCreate(bbox.left(), bbox.bottom(), - bbox.width(), bbox.height()); - bblob->set_line_crossings(NumTouchingIntersections(box, pix_intersections)); - boxDestroy(&box); - // Transform the bounding box prior to finding lines. To save writing - // two line finders, flip x and y for horizontal lines and re-use the - // tab-stop detection code. For vertical lines we still have to flip the - // y-coordinates to switch from leptonica coords to tesseract coords. - if (horizontal_lines) { - // Note that we have Leptonica coords stored in a Tesseract box, so that - // bbox.bottom(), being the MIN y coord, is actually the top, so to get - // back to Leptonica coords in RemoveUnusedLineSegments, we have to - // use height - box.right() as the top, which looks very odd. - TBOX new_box(height - bbox.top(), bbox.left(), - height - bbox.bottom(), bbox.right()); - bblob->set_bounding_box(new_box); - } else { - TBOX new_box(bbox.left(), height - bbox.top(), - bbox.right(), height - bbox.bottom()); - bblob->set_bounding_box(new_box); - } - } -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/linefind.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/linefind.h deleted file mode 100644 index 6b0b212a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/linefind.h +++ /dev/null @@ -1,149 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: linefind.h -// Description: Class to find vertical lines in an image and create -// a corresponding list of empty blobs. -// Author: Ray Smith -// Created: Thu Mar 20 09:49:01 PDT 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_LINEFIND_H_ -#define TESSERACT_TEXTORD_LINEFIND_H_ - -struct Boxa; -struct Pix; -struct Pixa; -class C_BLOB_LIST; -class BLOBNBOX_LIST; -class ICOORD; - -namespace tesseract { - -class TabVector_LIST; - -/** - * The LineFinder class is a simple static function wrapper class that mainly - * exposes the FindVerticalLines function. - */ -class LineFinder { - public: - /** - * Finds vertical and horizontal line objects in the given pix and removes - * them. - * - * Uses the given resolution to determine size thresholds instead of any - * that may be present in the pix. - * - * The output vertical_x and vertical_y contain a sum of the output vectors, - * thereby giving the mean vertical direction. - * - * If pix_music_mask != nullptr, and music is detected, a mask of the staves - * and anything that is connected (bars, notes etc.) will be returned in - * pix_music_mask, the mask subtracted from pix, and the lines will not - * appear in v_lines or h_lines. - * - * The output vectors are owned by the list and Frozen (cannot refit) by - * having no boxes, as there is no need to refit or merge separator lines. - * - * The detected lines are removed from the pix. - */ - static void FindAndRemoveLines(int resolution, bool debug, Pix* pix, - int* vertical_x, int* vertical_y, - Pix** pix_music_mask, - TabVector_LIST* v_lines, - TabVector_LIST* h_lines); - - /** - * Converts the Boxa array to a list of C_BLOB, getting rid of severely - * overlapping outlines and those that are children of a bigger one. - * - * The output is a list of C_BLOBs that are owned by the list. - * - * The C_OUTLINEs in the C_BLOBs contain no outline data - just empty - * bounding boxes. The Boxa is consumed and destroyed. - */ - static void ConvertBoxaToBlobs(int image_width, int image_height, - Boxa** boxes, C_BLOB_LIST* blobs); - - private: - // Finds vertical line objects in pix_vline and removes them from src_pix. - // Uses the given resolution to determine size thresholds instead of any - // that may be present in the pix. - // The output vertical_x and vertical_y contain a sum of the output vectors, - // thereby giving the mean vertical direction. - // The output vectors are owned by the list and Frozen (cannot refit) by - // having no boxes, as there is no need to refit or merge separator lines. - // If no good lines are found, pix_vline is destroyed. - static void FindAndRemoveVLines(int resolution, - Pix* pix_intersections, - int* vertical_x, int* vertical_y, - Pix** pix_vline, Pix* pix_non_vline, - Pix* src_pix, TabVector_LIST* vectors); - - - // Finds horizontal line objects in pix_vline and removes them from src_pix. - // Uses the given resolution to determine size thresholds instead of any - // that may be present in the pix. - // The output vertical_x and vertical_y contain a sum of the output vectors, - // thereby giving the mean vertical direction. - // The output vectors are owned by the list and Frozen (cannot refit) by - // having no boxes, as there is no need to refit or merge separator lines. - // If no good lines are found, pix_hline is destroyed. - static void FindAndRemoveHLines(int resolution, - Pix* pix_intersections, - int vertical_x, int vertical_y, - Pix** pix_hline, Pix* pix_non_hline, - Pix* src_pix, TabVector_LIST* vectors); - - // Finds vertical lines in the given list of BLOBNBOXes. bleft and tright - // are the bounds of the image on which the input line_bblobs were found. - // The input line_bblobs list is const really. - // The output vertical_x and vertical_y are the total of all the vectors. - // The output list of TabVector makes no reference to the input BLOBNBOXes. - static void FindLineVectors(const ICOORD& bleft, const ICOORD& tright, - BLOBNBOX_LIST* line_bblobs, - int* vertical_x, int* vertical_y, - TabVector_LIST* vectors); - - // Most of the heavy lifting of line finding. Given src_pix and its separate - // resolution, returns image masks: - // Returns image masks: - // pix_vline candidate vertical lines. - // pix_non_vline pixels that didn't look like vertical lines. - // pix_hline candidate horizontal lines. - // pix_non_hline pixels that didn't look like horizontal lines. - // pix_intersections pixels where vertical and horizontal lines meet. - // pix_music_mask candidate music staves. - // This function promises to initialize all the output (2nd level) pointers, - // but any of the returns that are empty will be nullptr on output. - // None of the input (1st level) pointers may be nullptr except pix_music_mask, - // which will disable music detection, and pixa_display, which is for debug. - static void GetLineMasks(int resolution, Pix* src_pix, - Pix** pix_vline, Pix** pix_non_vline, - Pix** pix_hline, Pix** pix_non_hline, - Pix** pix_intersections, Pix** pix_music_mask, - Pixa* pixa_display); - - // Returns a list of boxes corresponding to the candidate line segments. Sets - // the line_crossings member of the boxes so we can later determine the number - // of intersections touched by a full line. - static void GetLineBoxes(bool horizontal_lines, - Pix* pix_lines, Pix* pix_intersections, - C_BLOB_LIST* line_cblobs, - BLOBNBOX_LIST* line_bblobs); -}; - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_LINEFIND_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/makerow.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/makerow.cpp deleted file mode 100644 index 7fb5c043..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/makerow.cpp +++ /dev/null @@ -1,2677 +0,0 @@ -/********************************************************************** - * File: makerow.cpp (Formerly makerows.c) - * Description: Code to arrange blobs into rows of text. - * Author: Ray Smith - * Created: Mon Sep 21 14:34:48 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include // for std::vector -#include "blobbox.h" -#include "ccstruct.h" -#include "detlinefit.h" -#include "statistc.h" -#include "drawtord.h" -#include "blkocc.h" -#include "sortflts.h" -#include "oldbasel.h" -#include "textord.h" -#include "tordmain.h" -#include "underlin.h" -#include "makerow.h" -#include "tprintf.h" -#include "tovars.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include - -BOOL_VAR(textord_heavy_nr, FALSE, "Vigorously remove noise"); -BOOL_VAR(textord_show_initial_rows, FALSE, "Display row accumulation"); -BOOL_VAR(textord_show_parallel_rows, FALSE, "Display page correlated rows"); -BOOL_VAR(textord_show_expanded_rows, FALSE, "Display rows after expanding"); -BOOL_VAR(textord_show_final_rows, FALSE, "Display rows after final fitting"); -BOOL_VAR(textord_show_final_blobs, FALSE, "Display blob bounds after pre-ass"); -BOOL_VAR(textord_test_landscape, FALSE, "Tests refer to land/port"); -BOOL_VAR(textord_parallel_baselines, TRUE, "Force parallel baselines"); -BOOL_VAR(textord_straight_baselines, FALSE, "Force straight baselines"); -BOOL_VAR(textord_old_baselines, TRUE, "Use old baseline algorithm"); -BOOL_VAR(textord_old_xheight, FALSE, "Use old xheight algorithm"); -BOOL_VAR(textord_fix_xheight_bug, TRUE, "Use spline baseline"); -BOOL_VAR(textord_fix_makerow_bug, TRUE, "Prevent multiple baselines"); -BOOL_VAR(textord_debug_xheights, FALSE, "Test xheight algorithms"); -BOOL_VAR(textord_biased_skewcalc, TRUE, "Bias skew estimates with line length"); -BOOL_VAR(textord_interpolating_skew, TRUE, "Interpolate across gaps"); -INT_VAR(textord_skewsmooth_offset, 4, "For smooth factor"); -INT_VAR(textord_skewsmooth_offset2, 1, "For smooth factor"); -INT_VAR(textord_test_x, -INT32_MAX, "coord of test pt"); -INT_VAR(textord_test_y, -INT32_MAX, "coord of test pt"); -INT_VAR(textord_min_blobs_in_row, 4, "Min blobs before gradient counted"); -INT_VAR(textord_spline_minblobs, 8, "Min blobs in each spline segment"); -INT_VAR(textord_spline_medianwin, 6, "Size of window for spline segmentation"); -INT_VAR(textord_max_blob_overlaps, 4, - "Max number of blobs a big blob can overlap"); -INT_VAR(textord_min_xheight, 10, "Min credible pixel xheight"); -double_VAR(textord_spline_shift_fraction, 0.02, - "Fraction of line spacing for quad"); -double_VAR(textord_spline_outlier_fraction, 0.1, - "Fraction of line spacing for outlier"); -double_VAR(textord_skew_ile, 0.5, "Ile of gradients for page skew"); -double_VAR(textord_skew_lag, 0.02, "Lag for skew on row accumulation"); -double_VAR(textord_linespace_iqrlimit, 0.2, "Max iqr/median for linespace"); -double_VAR(textord_width_limit, 8, "Max width of blobs to make rows"); -double_VAR(textord_chop_width, 1.5, "Max width before chopping"); -double_VAR(textord_expansion_factor, 1.0, - "Factor to expand rows by in expand_rows"); -double_VAR(textord_overlap_x, 0.375, "Fraction of linespace for good overlap"); -double_VAR(textord_minxh, 0.25, "fraction of linesize for min xheight"); -double_VAR(textord_min_linesize, 1.25, "* blob height for initial linesize"); -double_VAR(textord_excess_blobsize, 1.3, - "New row made if blob makes row this big"); -double_VAR(textord_occupancy_threshold, 0.4, "Fraction of neighbourhood"); -double_VAR(textord_underline_width, 2.0, "Multiple of line_size for underline"); -double_VAR(textord_min_blob_height_fraction, 0.75, - "Min blob height/top to include blob top into xheight stats"); -double_VAR(textord_xheight_mode_fraction, 0.4, - "Min pile height to make xheight"); -double_VAR(textord_ascheight_mode_fraction, 0.08, - "Min pile height to make ascheight"); -double_VAR(textord_descheight_mode_fraction, 0.08, - "Min pile height to make descheight"); -double_VAR(textord_ascx_ratio_min, 1.25, "Min cap/xheight"); -double_VAR(textord_ascx_ratio_max, 1.8, "Max cap/xheight"); -double_VAR(textord_descx_ratio_min, 0.25, "Min desc/xheight"); -double_VAR(textord_descx_ratio_max, 0.6, "Max desc/xheight"); -double_VAR(textord_xheight_error_margin, 0.1, "Accepted variation"); -INT_VAR(textord_lms_line_trials, 12, "Number of linew fits to do"); -BOOL_VAR(textord_new_initial_xheight, TRUE, "Use test xheight mechanism"); -BOOL_VAR(textord_debug_blob, FALSE, "Print test blob information"); - -#define MAX_HEIGHT_MODES 12 - -const int kMinLeaderCount = 5; - -// Factored-out helper to build a single row from a list of blobs. -// Returns the mean blob size. -static float MakeRowFromBlobs(float line_size, - BLOBNBOX_IT* blob_it, TO_ROW_IT* row_it) { - blob_it->sort(blob_x_order); - blob_it->move_to_first(); - TO_ROW* row = nullptr; - float total_size = 0.0f; - int blob_count = 0; - // Add all the blobs to a single TO_ROW. - for (; !blob_it->empty(); blob_it->forward()) { - BLOBNBOX* blob = blob_it->extract(); - int top = blob->bounding_box().top(); - int bottom = blob->bounding_box().bottom(); - if (row == nullptr) { - row = new TO_ROW(blob, top, bottom, line_size); - row_it->add_before_then_move(row); - } else { - row->add_blob(blob, top, bottom, line_size); - } - total_size += top - bottom; - ++blob_count; - } - return blob_count > 0 ? total_size / blob_count : total_size; -} - -// Helper to make a row using the children of a single blob. -// Returns the mean size of the blobs created. -static float MakeRowFromSubBlobs(TO_BLOCK* block, C_BLOB* blob, - TO_ROW_IT* row_it) { - // The blobs made from the children will go in the small_blobs list. - BLOBNBOX_IT bb_it(&block->small_blobs); - C_OUTLINE_IT ol_it(blob->out_list()); - // Get the children. - ol_it.set_to_list(ol_it.data()->child()); - if (ol_it.empty()) - return 0.0f; - for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) { - // Deep copy the child outline and use that to make a blob. - blob = new C_BLOB(C_OUTLINE::deep_copy(ol_it.data())); - // Correct direction as needed. - blob->CheckInverseFlagAndDirection(); - BLOBNBOX* bbox = new BLOBNBOX(blob); - bb_it.add_after_then_move(bbox); - } - // Now we can make a row from the blobs. - return MakeRowFromBlobs(block->line_size, &bb_it, row_it); -} - -/** - * @name make_single_row - * - * Arrange the blobs into a single row... well actually, if there is - * only a single blob, it makes 2 rows, in case the top-level blob - * is a container of the real blobs to recognize. - */ -float make_single_row(ICOORD page_tr, bool allow_sub_blobs, - TO_BLOCK* block, TO_BLOCK_LIST* blocks) { - BLOBNBOX_IT blob_it = &block->blobs; - TO_ROW_IT row_it = block->get_rows(); - - // Include all the small blobs and large blobs. - blob_it.add_list_after(&block->small_blobs); - blob_it.add_list_after(&block->noise_blobs); - blob_it.add_list_after(&block->large_blobs); - if (block->blobs.singleton() && allow_sub_blobs) { - blob_it.move_to_first(); - float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it); - if (size > block->line_size) - block->line_size = size; - } else if (block->blobs.empty()) { - // Make a fake blob. - C_BLOB* blob = C_BLOB::FakeBlob(block->block->pdblk.bounding_box()); - // The blobnbox owns the blob. - BLOBNBOX* bblob = new BLOBNBOX(blob); - blob_it.add_after_then_move(bblob); - } - MakeRowFromBlobs(block->line_size, &blob_it, &row_it); - // Fit an LMS line to the rows. - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) - fit_lms_line(row_it.data()); - float gradient; - float fit_error; - // Compute the skew based on the fitted line. - compute_page_skew(blocks, gradient, fit_error); - return gradient; -} - -/** - * @name make_rows - * - * Arrange the blobs into rows. - */ -float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) { - float port_m; // global skew - float port_err; // global noise - TO_BLOCK_IT block_it; // iterator - - block_it.set_to_list(port_blocks); - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) - make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f), - !textord_test_landscape); - // compute globally - compute_page_skew(port_blocks, port_m, port_err); - block_it.set_to_list(port_blocks); - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { - cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f), - block_it.data()->block->pdblk.bounding_box().left(), - !textord_test_landscape); - } - return port_m; // global skew -} - -/** - * @name make_initial_textrows - * - * Arrange the good blobs into rows of text. - */ -void make_initial_textrows( //find lines - ICOORD page_tr, - TO_BLOCK* block, //block to do - FCOORD rotation, //for drawing - bool testing_on //correct orientation -) { - TO_ROW_IT row_it = block->get_rows (); - -#ifndef GRAPHICS_DISABLED - ScrollView::Color colour; //of row - - if (textord_show_initial_rows && testing_on) { - if (to_win == nullptr) - create_to_win(page_tr); - } -#endif - //guess skew - assign_blobs_to_rows (block, nullptr, 0, TRUE, TRUE, textord_show_initial_rows && testing_on); - row_it.move_to_first (); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) - fit_lms_line (row_it.data ()); -#ifndef GRAPHICS_DISABLED - if (textord_show_initial_rows && testing_on) { - colour = ScrollView::RED; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - plot_to_row (row_it.data (), colour, rotation); - colour = (ScrollView::Color) (colour + 1); - if (colour > ScrollView::MAGENTA) - colour = ScrollView::RED; - } - } -#endif -} - - -/** - * @name fit_lms_line - * - * Fit an LMS line to a row. - */ -void fit_lms_line(TO_ROW *row) { - float m, c; // fitted line - tesseract::DetLineFit lms; - BLOBNBOX_IT blob_it = row->blob_list(); - - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - const TBOX& box = blob_it.data()->bounding_box(); - lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom())); - } - double error = lms.Fit(&m, &c); - row->set_line(m, c, error); -} - - -/** - * @name compute_page_skew - * - * Compute the skew over a full page by averaging the gradients over - * all the lines. Get the error of the same row. - */ -void compute_page_skew( //get average gradient - TO_BLOCK_LIST *blocks, //list of blocks - float &page_m, //average gradient - float &page_err //average error - ) { - int32_t row_count; //total rows - int32_t blob_count; //total_blobs - int32_t row_err; //integer error - int32_t row_index; //of total - TO_ROW *row; //current row - TO_BLOCK_IT block_it = blocks; //iterator - - row_count = 0; - blob_count = 0; - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block(); - if (pb != nullptr && !pb->IsText()) - continue; // Pretend non-text blocks don't exist. - row_count += block_it.data ()->get_rows ()->length (); - //count up rows - TO_ROW_IT row_it(block_it.data()->get_rows()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) - blob_count += row_it.data ()->blob_list ()->length (); - } - if (row_count == 0) { - page_m = 0.0f; - page_err = 0.0f; - return; - } - // of rows - std::vector gradients(blob_count); - // of rows - std::vector errors(blob_count); - - row_index = 0; - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block(); - if (pb != nullptr && !pb->IsText()) - continue; // Pretend non-text blocks don't exist. - TO_ROW_IT row_it(block_it.data ()->get_rows()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - blob_count = row->blob_list ()->length (); - row_err = (int32_t) ceil (row->line_error ()); - if (row_err <= 0) - row_err = 1; - if (textord_biased_skewcalc) { - blob_count /= row_err; - for (blob_count /= row_err; blob_count > 0; blob_count--) { - gradients[row_index] = row->line_m (); - errors[row_index] = row->line_error (); - row_index++; - } - } - else if (blob_count >= textord_min_blobs_in_row) { - //get gradient - gradients[row_index] = row->line_m (); - errors[row_index] = row->line_error (); - row_index++; - } - } - } - if (row_index == 0) { - //desperate - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block(); - if (pb != nullptr && !pb->IsText()) - continue; // Pretend non-text blocks don't exist. - TO_ROW_IT row_it(block_it.data()->get_rows()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); - row_it.forward ()) { - row = row_it.data (); - gradients[row_index] = row->line_m (); - errors[row_index] = row->line_error (); - row_index++; - } - } - } - row_count = row_index; - row_index = choose_nth_item ((int32_t) (row_count * textord_skew_ile), - &gradients[0], row_count); - page_m = gradients[row_index]; - row_index = choose_nth_item ((int32_t) (row_count * textord_skew_ile), - &errors[0], row_count); - page_err = errors[row_index]; -} - -const double kNoiseSize = 0.5; // Fraction of xheight. -const int kMinSize = 8; // Min pixels to be xheight. - -/** - * Return true if the dot looks like it is part of the i. - * Doesn't work for any other diacritical. - */ -static bool dot_of_i(BLOBNBOX* dot, BLOBNBOX* i, TO_ROW* row) { - const TBOX& ibox = i->bounding_box(); - const TBOX& dotbox = dot->bounding_box(); - - // Must overlap horizontally by enough and be high enough. - int overlap = std::min(dotbox.right(), ibox.right()) - - std::max(dotbox.left(), ibox.left()); - if (ibox.height() <= 2 * dotbox.height() || - (overlap * 2 < ibox.width() && overlap < dotbox.width())) - return false; - - // If the i is tall and thin then it is good. - if (ibox.height() > ibox.width() * 2) - return true; // The i or ! must be tall and thin. - - // It might still be tall and thin, but it might be joined to something. - // So search the outline for a piece of large height close to the edges - // of the dot. - const double kHeightFraction = 0.6; - double target_height = std::min(dotbox.bottom(), ibox.top()); - target_height -= row->line_m()*dotbox.left() + row->line_c(); - target_height *= kHeightFraction; - int left_min = dotbox.left() - dotbox.width(); - int middle = (dotbox.left() + dotbox.right())/2; - int right_max = dotbox.right() + dotbox.width(); - int left_miny = 0; - int left_maxy = 0; - int right_miny = 0; - int right_maxy = 0; - bool found_left = false; - bool found_right = false; - bool in_left = false; - bool in_right = false; - C_BLOB* blob = i->cblob(); - C_OUTLINE_IT o_it = blob->out_list(); - for (o_it.mark_cycle_pt(); !o_it.cycled_list(); o_it.forward()) { - C_OUTLINE* outline = o_it.data(); - int length = outline->pathlength(); - ICOORD pos = outline->start_pos(); - for (int step = 0; step < length; pos += outline->step(step++)) { - int x = pos.x(); - int y = pos.y(); - if (x >= left_min && x < middle && !found_left) { - // We are in the left part so find min and max y. - if (in_left) { - if (y > left_maxy) left_maxy = y; - if (y < left_miny) left_miny = y; - } else { - left_maxy = left_miny = y; - in_left = true; - } - } else if (in_left) { - // We just left the left so look for size. - if (left_maxy - left_miny > target_height) { - if (found_right) - return true; - found_left = true; - } - in_left = false; - } - if (x <= right_max && x > middle && !found_right) { - // We are in the right part so find min and max y. - if (in_right) { - if (y > right_maxy) right_maxy = y; - if (y < right_miny) right_miny = y; - } else { - right_maxy = right_miny = y; - in_right = true; - } - } else if (in_right) { - // We just left the right so look for size. - if (right_maxy - right_miny > target_height) { - if (found_left) - return true; - found_right = true; - } - in_right = false; - } - } - } - return false; -} - -void vigorous_noise_removal(TO_BLOCK* block) { - TO_ROW_IT row_it = block->get_rows (); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - TO_ROW* row = row_it.data(); - BLOBNBOX_IT b_it = row->blob_list(); - // Estimate the xheight on the row. - int max_height = 0; - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOBNBOX* blob = b_it.data(); - if (blob->bounding_box().height() > max_height) - max_height = blob->bounding_box().height(); - } - STATS hstats(0, max_height + 1); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOBNBOX* blob = b_it.data(); - int height = blob->bounding_box().height(); - if (height >= kMinSize) - hstats.add(blob->bounding_box().height(), 1); - } - float xheight = hstats.median(); - // Delete small objects. - BLOBNBOX* prev = nullptr; - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOBNBOX* blob = b_it.data(); - const TBOX& box = blob->bounding_box(); - if (box.height() < kNoiseSize * xheight) { - // Small so delete unless it looks like an i dot. - if (prev != nullptr) { - if (dot_of_i(blob, prev, row)) - continue; // Looks OK. - } - if (!b_it.at_last()) { - BLOBNBOX* next = b_it.data_relative(1); - if (dot_of_i(blob, next, row)) - continue; // Looks OK. - } - // It might be noise so get rid of it. - delete blob->cblob(); - delete b_it.extract(); - } else { - prev = blob; - } - } - } -} - -/** - * cleanup_rows_making - * - * Remove overlapping rows and fit all the blobs to what's left. - */ -void cleanup_rows_making( //find lines - ICOORD page_tr, //top right - TO_BLOCK* block, //block to do - float gradient, //gradient to fit - FCOORD rotation, //for drawing - int32_t block_edge, //edge of block - bool testing_on //correct orientation -) { - //iterators - BLOBNBOX_IT blob_it = &block->blobs; - TO_ROW_IT row_it = block->get_rows (); - -#ifndef GRAPHICS_DISABLED - if (textord_show_parallel_rows && testing_on) { - if (to_win == nullptr) - create_to_win(page_tr); - } -#endif - //get row coords - fit_parallel_rows(block, - gradient, - rotation, - block_edge, - textord_show_parallel_rows && testing_on); - delete_non_dropout_rows(block, - gradient, - rotation, - block_edge, - textord_show_parallel_rows && testing_on); - expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on); - blob_it.set_to_list (&block->blobs); - row_it.set_to_list (block->get_rows ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) - blob_it.add_list_after (row_it.data ()->blob_list ()); - //give blobs back - assign_blobs_to_rows (block, &gradient, 1, FALSE, FALSE, FALSE); - //now new rows must be genuine - blob_it.set_to_list (&block->blobs); - blob_it.add_list_after (&block->large_blobs); - assign_blobs_to_rows (block, &gradient, 2, TRUE, TRUE, FALSE); - //safe to use big ones now - blob_it.set_to_list (&block->blobs); - //throw all blobs in - blob_it.add_list_after (&block->noise_blobs); - blob_it.add_list_after (&block->small_blobs); - assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE); -} - -/** - * delete_non_dropout_rows - * - * Compute the linespacing and offset. - */ -void delete_non_dropout_rows( //find lines - TO_BLOCK* block, //block to do - float gradient, //global skew - FCOORD rotation, //deskew vector - int32_t block_edge, //left edge - bool testing_on //correct orientation -) { - TBOX block_box; //deskewed block - int32_t max_y; //in block - int32_t min_y; - int32_t line_index; //of scan line - int32_t line_count; //no of scan lines - int32_t distance; //to drop-out - int32_t xleft; //of block - int32_t ybottom; //of block - TO_ROW *row; //current row - TO_ROW_IT row_it = block->get_rows (); - BLOBNBOX_IT blob_it = &block->blobs; - - if (row_it.length () == 0) - return; //empty block - block_box = deskew_block_coords (block, gradient); - xleft = block->block->pdblk.bounding_box ().left (); - ybottom = block->block->pdblk.bounding_box ().bottom (); - min_y = block_box.bottom () - 1; - max_y = block_box.top () + 1; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - line_index = (int32_t) floor (row_it.data ()->intercept ()); - if (line_index <= min_y) - min_y = line_index - 1; - if (line_index >= max_y) - max_y = line_index + 1; - } - line_count = max_y - min_y + 1; - if (line_count <= 0) - return; //empty block - // change in occupation - std::vector deltas(line_count); - // of pixel coords - std::vector occupation(line_count); - - compute_line_occupation(block, gradient, min_y, max_y, &occupation[0], &deltas[0]); - compute_occupation_threshold ((int32_t) - ceil (block->line_spacing * - (tesseract::CCStruct::kDescenderFraction + - tesseract::CCStruct::kAscenderFraction)), - (int32_t) ceil (block->line_spacing * - (tesseract::CCStruct::kXHeightFraction + - tesseract::CCStruct::kAscenderFraction)), - max_y - min_y + 1, &occupation[0], &deltas[0]); -#ifndef GRAPHICS_DISABLED - if (testing_on) { - draw_occupation(xleft, ybottom, min_y, max_y, &occupation[0], &deltas[0]); - } -#endif - compute_dropout_distances(&occupation[0], &deltas[0], line_count); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - line_index = (int32_t) floor (row->intercept ()); - distance = deltas[line_index - min_y]; - if (find_best_dropout_row (row, distance, block->line_spacing / 2, - line_index, &row_it, testing_on)) { -#ifndef GRAPHICS_DISABLED - if (testing_on) - plot_parallel_row(row, gradient, block_edge, - ScrollView::WHITE, rotation); -#endif - blob_it.add_list_after (row_it.data ()->blob_list ()); - delete row_it.extract (); //too far away - } - } - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - blob_it.add_list_after (row_it.data ()->blob_list ()); - } -} - - -/** - * @name find_best_dropout_row - * - * Delete this row if it has a neighbour with better dropout characteristics. - * TRUE is returned if the row should be deleted. - */ -bool find_best_dropout_row( //find neighbours - TO_ROW* row, //row to test - int32_t distance, //dropout dist - float dist_limit, //threshold distance - int32_t line_index, //index of row - TO_ROW_IT* row_it, //current position - bool testing_on //correct orientation -) { - int32_t next_index; // of neighbouring row - int32_t row_offset; //from current row - int32_t abs_dist; //absolute distance - int8_t row_inc; //increment to row_index - TO_ROW *next_row; //nextious row - - if (testing_on) - tprintf ("Row at %g(%g), dropout dist=%d,", - row->intercept (), row->parallel_c (), distance); - if (distance < 0) { - row_inc = 1; - abs_dist = -distance; - } - else { - row_inc = -1; - abs_dist = distance; - } - if (abs_dist > dist_limit) { - if (testing_on) { - tprintf (" too far - deleting\n"); - } - return true; - } - if ((distance < 0 && !row_it->at_last ()) - || (distance >= 0 && !row_it->at_first ())) { - row_offset = row_inc; - do { - next_row = row_it->data_relative (row_offset); - next_index = (int32_t) floor (next_row->intercept ()); - if ((distance < 0 - && next_index < line_index - && next_index > line_index + distance + distance) - || (distance >= 0 - && next_index > line_index - && next_index < line_index + distance + distance)) { - if (testing_on) { - tprintf (" nearer neighbour (%d) at %g\n", - line_index + distance - next_index, - next_row->intercept ()); - } - return true; //other is nearer - } - else if (next_index == line_index - || next_index == line_index + distance + distance) { - if (row->believability () <= next_row->believability ()) { - if (testing_on) { - tprintf (" equal but more believable at %g (%g/%g)\n", - next_row->intercept (), - row->believability (), - next_row->believability ()); - } - return true; //other is more believable - } - } - row_offset += row_inc; - } - while ((next_index == line_index - || next_index == line_index + distance + distance) - && row_offset < row_it->length ()); - if (testing_on) - tprintf (" keeping\n"); - } - return false; -} - - -/** - * @name deskew_block_coords - * - * Compute the bounding box of all the blobs in the block - * if they were deskewed without actually doing it. - */ -TBOX deskew_block_coords( //block box - TO_BLOCK *block, //block to do - float gradient //global skew - ) { - TBOX result; //block bounds - TBOX blob_box; //of block - FCOORD rotation; //deskew vector - float length; //of gradient vector - TO_ROW_IT row_it = block->get_rows (); - TO_ROW *row; //current row - BLOBNBOX *blob; //current blob - BLOBNBOX_IT blob_it; //iterator - - length = sqrt (gradient * gradient + 1); - rotation = FCOORD (1 / length, -gradient / length); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - blob_it.set_to_list (row->blob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - blob_box.rotate (rotation);//de-skew it - result += blob_box; - } - } - return result; -} - - -/** - * @name compute_line_occupation - * - * Compute the pixel projection back on the y axis given the global - * skew. Also compute the 1st derivative. - */ -void compute_line_occupation( //project blobs - TO_BLOCK *block, //block to do - float gradient, //global skew - int32_t min_y, //min coord in block - int32_t max_y, //in block - int32_t *occupation, //output projection - int32_t *deltas //derivative - ) { - int32_t line_count; //maxy-miny+1 - int32_t line_index; //of scan line - int index; //array index for daft compilers - TO_ROW *row; //current row - TO_ROW_IT row_it = block->get_rows (); - BLOBNBOX *blob; //current blob - BLOBNBOX_IT blob_it; //iterator - float length; //of skew vector - TBOX blob_box; //bounding box - FCOORD rotation; //inverse of skew - - line_count = max_y - min_y + 1; - length = sqrt (gradient * gradient + 1); - rotation = FCOORD (1 / length, -gradient / length); - for (line_index = 0; line_index < line_count; line_index++) - deltas[line_index] = 0; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - blob_it.set_to_list (row->blob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - blob_box.rotate (rotation);//de-skew it - int32_t width = blob_box.right() - blob_box.left(); - index = blob_box.bottom() - min_y; - ASSERT_HOST(index >= 0 && index < line_count); - // count transitions - deltas[index] += width; - index = blob_box.top() - min_y; - ASSERT_HOST(index >= 0 && index < line_count); - deltas[index] -= width; - } - } - occupation[0] = deltas[0]; - for (line_index = 1; line_index < line_count; line_index++) - occupation[line_index] = occupation[line_index - 1] + deltas[line_index]; -} - - -/** - * compute_occupation_threshold - * - * Compute thresholds for textline or not for the occupation array. - */ -void compute_occupation_threshold( //project blobs - int32_t low_window, //below result point - int32_t high_window, //above result point - int32_t line_count, //array sizes - int32_t *occupation, //input projection - int32_t *thresholds //output thresholds - ) { - int32_t line_index; //of thresholds line - int32_t low_index; //in occupation - int32_t high_index; //in occupation - int32_t sum; //current average - int32_t divisor; //to get thresholds - int32_t min_index; //of min occ - int32_t min_occ; //min in locality - int32_t test_index; //for finding min - - divisor = - (int32_t) ceil ((low_window + high_window) / textord_occupancy_threshold); - if (low_window + high_window < line_count) { - for (sum = 0, high_index = 0; high_index < low_window; high_index++) - sum += occupation[high_index]; - for (low_index = 0; low_index < high_window; low_index++, high_index++) - sum += occupation[high_index]; - min_occ = occupation[0]; - min_index = 0; - for (test_index = 1; test_index < high_index; test_index++) { - if (occupation[test_index] <= min_occ) { - min_occ = occupation[test_index]; - min_index = test_index; //find min in region - } - } - for (line_index = 0; line_index < low_window; line_index++) - thresholds[line_index] = (sum - min_occ) / divisor + min_occ; - //same out to end - for (low_index = 0; high_index < line_count; low_index++, high_index++) { - sum -= occupation[low_index]; - sum += occupation[high_index]; - if (occupation[high_index] <= min_occ) { - //find min in region - min_occ = occupation[high_index]; - min_index = high_index; - } - //lost min from region - if (min_index <= low_index) { - min_occ = occupation[low_index + 1]; - min_index = low_index + 1; - for (test_index = low_index + 2; test_index <= high_index; - test_index++) { - if (occupation[test_index] <= min_occ) { - min_occ = occupation[test_index]; - //find min in region - min_index = test_index; - } - } - } - thresholds[line_index++] = (sum - min_occ) / divisor + min_occ; - } - } - else { - min_occ = occupation[0]; - min_index = 0; - for (sum = 0, low_index = 0; low_index < line_count; low_index++) { - if (occupation[low_index] < min_occ) { - min_occ = occupation[low_index]; - min_index = low_index; - } - sum += occupation[low_index]; - } - line_index = 0; - } - for (; line_index < line_count; line_index++) - thresholds[line_index] = (sum - min_occ) / divisor + min_occ; - //same out to end -} - - -/** - * @name compute_dropout_distances - * - * Compute the distance from each coordinate to the nearest dropout. - */ -void compute_dropout_distances( //project blobs - int32_t *occupation, //input projection - int32_t *thresholds, //output thresholds - int32_t line_count //array sizes - ) { - int32_t line_index; //of thresholds line - int32_t distance; //from prev dropout - int32_t next_dist; //to next dropout - int32_t back_index; //for back filling - int32_t prev_threshold; //before overwrite - - distance = -line_count; - line_index = 0; - do { - do { - distance--; - prev_threshold = thresholds[line_index]; - //distance from prev - thresholds[line_index] = distance; - line_index++; - } - while (line_index < line_count - && (occupation[line_index] < thresholds[line_index] - || occupation[line_index - 1] >= prev_threshold)); - if (line_index < line_count) { - back_index = line_index - 1; - next_dist = 1; - while (next_dist < -distance && back_index >= 0) { - thresholds[back_index] = next_dist; - back_index--; - next_dist++; - distance++; - } - distance = 1; - } - } - while (line_index < line_count); -} - - -/** - * @name expand_rows - * - * Expand each row to the least of its allowed size and touching its - * neighbours. If the expansion would entirely swallow a neighbouring row - * then do so. - */ -void expand_rows( //find lines - ICOORD page_tr, //top right - TO_BLOCK* block, //block to do - float gradient, //gradient to fit - FCOORD rotation, //for drawing - int32_t block_edge, //edge of block - bool testing_on //correct orientation -) { - bool swallowed_row; //eaten a neighbour - float y_max, y_min; //new row limits - float y_bottom, y_top; //allowed limits - TO_ROW *test_row; //next row - TO_ROW *row; //current row - //iterators - BLOBNBOX_IT blob_it = &block->blobs; - TO_ROW_IT row_it = block->get_rows (); - -#ifndef GRAPHICS_DISABLED - if (textord_show_expanded_rows && testing_on) { - if (to_win == nullptr) - create_to_win(page_tr); - } -#endif - - adjust_row_limits(block); //shift min,max. - if (textord_new_initial_xheight) { - if (block->get_rows ()->length () == 0) - return; - compute_row_stats(block, textord_show_expanded_rows && testing_on); - } - assign_blobs_to_rows (block, &gradient, 4, true, false, false); - //get real membership - if (block->get_rows ()->length () == 0) - return; - fit_parallel_rows(block, - gradient, - rotation, - block_edge, - textord_show_expanded_rows && testing_on); - if (!textord_new_initial_xheight) - compute_row_stats(block, textord_show_expanded_rows && testing_on); - row_it.move_to_last (); - do { - row = row_it.data (); - y_max = row->max_y (); //get current limits - y_min = row->min_y (); - y_bottom = row->intercept () - block->line_size * textord_expansion_factor * - tesseract::CCStruct::kDescenderFraction; - y_top = row->intercept () + block->line_size * textord_expansion_factor * - (tesseract::CCStruct::kXHeightFraction + - tesseract::CCStruct::kAscenderFraction); - if (y_min > y_bottom) { //expansion allowed - if (textord_show_expanded_rows && testing_on) - tprintf("Expanding bottom of row at %f from %f to %f\n", - row->intercept(), y_min, y_bottom); - //expandable - swallowed_row = true; - while (swallowed_row && !row_it.at_last ()) { - swallowed_row = false; - //get next one - test_row = row_it.data_relative (1); - //overlaps space - if (test_row->max_y () > y_bottom) { - if (test_row->min_y () > y_bottom) { - if (textord_show_expanded_rows && testing_on) - tprintf("Eating row below at %f\n", test_row->intercept()); - row_it.forward (); -#ifndef GRAPHICS_DISABLED - if (textord_show_expanded_rows && testing_on) - plot_parallel_row(test_row, - gradient, - block_edge, - ScrollView::WHITE, - rotation); -#endif - blob_it.set_to_list (row->blob_list ()); - blob_it.add_list_after (test_row->blob_list ()); - //swallow complete row - delete row_it.extract (); - row_it.backward (); - swallowed_row = true; - } - else if (test_row->max_y () < y_min) { - //shorter limit - y_bottom = test_row->max_y (); - if (textord_show_expanded_rows && testing_on) - tprintf("Truncating limit to %f due to touching row at %f\n", - y_bottom, test_row->intercept()); - } - else { - y_bottom = y_min; //can't expand it - if (textord_show_expanded_rows && testing_on) - tprintf("Not expanding limit beyond %f due to touching row at %f\n", - y_bottom, test_row->intercept()); - } - } - } - y_min = y_bottom; //expand it - } - if (y_max < y_top) { //expansion allowed - if (textord_show_expanded_rows && testing_on) - tprintf("Expanding top of row at %f from %f to %f\n", - row->intercept(), y_max, y_top); - swallowed_row = true; - while (swallowed_row && !row_it.at_first ()) { - swallowed_row = false; - //get one above - test_row = row_it.data_relative (-1); - if (test_row->min_y () < y_top) { - if (test_row->max_y () < y_top) { - if (textord_show_expanded_rows && testing_on) - tprintf("Eating row above at %f\n", test_row->intercept()); - row_it.backward (); - blob_it.set_to_list (row->blob_list ()); -#ifndef GRAPHICS_DISABLED - if (textord_show_expanded_rows && testing_on) - plot_parallel_row(test_row, - gradient, - block_edge, - ScrollView::WHITE, - rotation); -#endif - blob_it.add_list_after (test_row->blob_list ()); - //swallow complete row - delete row_it.extract (); - row_it.forward (); - swallowed_row = true; - } - else if (test_row->min_y () < y_max) { - //shorter limit - y_top = test_row->min_y (); - if (textord_show_expanded_rows && testing_on) - tprintf("Truncating limit to %f due to touching row at %f\n", - y_top, test_row->intercept()); - } - else { - y_top = y_max; //can't expand it - if (textord_show_expanded_rows && testing_on) - tprintf("Not expanding limit beyond %f due to touching row at %f\n", - y_top, test_row->intercept()); - } - } - } - y_max = y_top; - } - //new limits - row->set_limits (y_min, y_max); - row_it.backward (); - } - while (!row_it.at_last ()); -} - - -/** - * adjust_row_limits - * - * Change the limits of rows to suit the default fractions. - */ -void adjust_row_limits( //tidy limits - TO_BLOCK *block //block to do - ) { - TO_ROW *row; //current row - float size; //size of row - float ymax; //top of row - float ymin; //bottom of row - TO_ROW_IT row_it = block->get_rows (); - - if (textord_show_expanded_rows) - tprintf("Adjusting row limits for block(%d,%d)\n", - block->block->pdblk.bounding_box().left(), - block->block->pdblk.bounding_box().top()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - size = row->max_y () - row->min_y (); - if (textord_show_expanded_rows) - tprintf("Row at %f has min %f, max %f, size %f\n", - row->intercept(), row->min_y(), row->max_y(), size); - size /= tesseract::CCStruct::kXHeightFraction + - tesseract::CCStruct::kAscenderFraction + - tesseract::CCStruct::kDescenderFraction; - ymax = size * (tesseract::CCStruct::kXHeightFraction + - tesseract::CCStruct::kAscenderFraction); - ymin = -size * tesseract::CCStruct::kDescenderFraction; - row->set_limits (row->intercept () + ymin, row->intercept () + ymax); - row->merged = false; - } -} - - -/** - * @name compute_row_stats - * - * Compute the linespacing and offset. - */ -void compute_row_stats( //find lines - TO_BLOCK* block, //block to do - bool testing_on //correct orientation -) { - int32_t row_index; //of median - TO_ROW *row; //current row - TO_ROW *prev_row; //previous row - float iqr; //inter quartile range - TO_ROW_IT row_it = block->get_rows (); - //number of rows - int16_t rowcount = row_it.length (); - // for choose nth - std::vector rows(rowcount); - rowcount = 0; - prev_row = nullptr; - row_it.move_to_last (); //start at bottom - do { - row = row_it.data (); - if (prev_row != nullptr) { - rows[rowcount++] = prev_row; - prev_row->spacing = row->intercept () - prev_row->intercept (); - if (testing_on) - tprintf ("Row at %g yields spacing of %g\n", - row->intercept (), prev_row->spacing); - } - prev_row = row; - row_it.backward (); - } - while (!row_it.at_last ()); - block->key_row = prev_row; - block->baseline_offset = - fmod (prev_row->parallel_c (), block->line_spacing); - if (testing_on) - tprintf ("Blob based spacing=(%g,%g), offset=%g", - block->line_size, block->line_spacing, block->baseline_offset); - if (rowcount > 0) { - row_index = choose_nth_item(rowcount * 3 / 4, &rows[0], rowcount, - sizeof (TO_ROW *), row_spacing_order); - iqr = rows[row_index]->spacing; - row_index = choose_nth_item(rowcount / 4, &rows[0], rowcount, - sizeof (TO_ROW *), row_spacing_order); - iqr -= rows[row_index]->spacing; - row_index = choose_nth_item(rowcount / 2, &rows[0], rowcount, - sizeof (TO_ROW *), row_spacing_order); - block->key_row = rows[row_index]; - if (testing_on) - tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr); - if (rowcount > 2 - && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) { - if (!textord_new_initial_xheight) { - if (rows[row_index]->spacing < block->line_spacing - && rows[row_index]->spacing > block->line_size) - //within range - block->line_size = rows[row_index]->spacing; - //spacing=size - else if (rows[row_index]->spacing > block->line_spacing) - block->line_size = block->line_spacing; - //too big so use max - } - else { - if (rows[row_index]->spacing < block->line_spacing) - block->line_size = rows[row_index]->spacing; - else - block->line_size = block->line_spacing; - //too big so use max - } - if (block->line_size < textord_min_xheight) - block->line_size = (float) textord_min_xheight; - block->line_spacing = rows[row_index]->spacing; - block->max_blob_size = - block->line_spacing * textord_excess_blobsize; - } - block->baseline_offset = fmod (rows[row_index]->intercept (), - block->line_spacing); - } - if (testing_on) - tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n", - block->line_size, block->line_spacing, block->baseline_offset); -} - - -/** - * @name compute_block_xheight - * - * Compute the xheight of the individual rows, then correlate them - * and interpret ascenderless lines, correcting xheights. - * - * First we compute our best guess of the x-height of each row independently - * with compute_row_xheight(), which looks for a pair of commonly occurring - * heights that could be x-height and ascender height. This function also - * attempts to find descenders of lowercase letters (i.e. not the small - * descenders that could appear in upper case letters as Q,J). - * - * After this computation each row falls into one of the following categories: - * ROW_ASCENDERS_FOUND: we found xheight and ascender modes, so this must be - * a regular row; we'll use its xheight to compute - * xheight and ascrise estimates for the block - * ROW_DESCENDERS_FOUND: no ascenders, so we do not have a high confidence in - * the xheight of this row (don't use it for estimating - * block xheight), but this row can't contain all caps - * ROW_UNKNOWN: a row with no ascenders/descenders, could be all lowercase - * (or mostly lowercase for fonts with very few ascenders), - * all upper case or small caps - * ROW_INVALID: no meaningful xheight could be found for this row - * - * We then run correct_row_xheight() and use the computed xheight and ascrise - * averages to correct xheight values of the rows in ROW_DESCENDERS_FOUND, - * ROW_UNKNOWN and ROW_INVALID categories. - * - */ -namespace tesseract { -void Textord::compute_block_xheight(TO_BLOCK *block, float gradient) { - TO_ROW *row; // current row - float asc_frac_xheight = CCStruct::kAscenderFraction / - CCStruct::kXHeightFraction; - float desc_frac_xheight = CCStruct::kDescenderFraction / - CCStruct::kXHeightFraction; - int32_t min_height, max_height; // limits on xheight - TO_ROW_IT row_it = block->get_rows(); - if (row_it.empty()) return; // no rows - - // Compute the best guess of xheight of each row individually. - // Use xheight and ascrise values of the rows where ascenders were found. - get_min_max_xheight(block->line_size, &min_height, &max_height); - STATS row_asc_xheights(min_height, max_height + 1); - STATS row_asc_ascrise(static_cast(min_height * asc_frac_xheight), - static_cast(max_height * asc_frac_xheight) + 1); - int min_desc_height = static_cast(min_height * desc_frac_xheight); - int max_desc_height = static_cast(max_height * desc_frac_xheight); - STATS row_asc_descdrop(min_desc_height, max_desc_height + 1); - STATS row_desc_xheights(min_height, max_height + 1); - STATS row_desc_descdrop(min_desc_height, max_desc_height + 1); - STATS row_cap_xheights(min_height, max_height + 1); - STATS row_cap_floating_xheights(min_height, max_height + 1); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - row = row_it.data(); - // Compute the xheight of this row if it has not been computed before. - if (row->xheight <= 0.0) { - compute_row_xheight(row, block->block->classify_rotation(), - gradient, block->line_size); - } - ROW_CATEGORY row_category = get_row_category(row); - if (row_category == ROW_ASCENDERS_FOUND) { - row_asc_xheights.add(static_cast(row->xheight), - row->xheight_evidence); - row_asc_ascrise.add(static_cast(row->ascrise), - row->xheight_evidence); - row_asc_descdrop.add(static_cast(-row->descdrop), - row->xheight_evidence); - } else if (row_category == ROW_DESCENDERS_FOUND) { - row_desc_xheights.add(static_cast(row->xheight), - row->xheight_evidence); - row_desc_descdrop.add(static_cast(-row->descdrop), - row->xheight_evidence); - } else if (row_category == ROW_UNKNOWN) { - fill_heights(row, gradient, min_height, max_height, - &row_cap_xheights, &row_cap_floating_xheights); - } - } - - float xheight = 0.0; - float ascrise = 0.0; - float descdrop = 0.0; - // Compute our best guess of xheight of this block. - if (row_asc_xheights.get_total() > 0) { - // Determine xheight from rows where ascenders were found. - xheight = row_asc_xheights.median(); - ascrise = row_asc_ascrise.median(); - descdrop = -row_asc_descdrop.median(); - } else if (row_desc_xheights.get_total() > 0) { - // Determine xheight from rows where descenders were found. - xheight = row_desc_xheights.median(); - descdrop = -row_desc_descdrop.median(); - } else if (row_cap_xheights.get_total() > 0) { - // All the rows in the block were (a/de)scenderless. - // Try to search for two modes in row_cap_heights that could - // be the xheight and the capheight (e.g. some of the rows - // were lowercase, but did not have enough (a/de)scenders. - // If such two modes can not be found, this block is most - // likely all caps (or all small caps, in which case the code - // still works as intended). - compute_xheight_from_modes(&row_cap_xheights, &row_cap_floating_xheights, - textord_single_height_mode && - block->block->classify_rotation().y() == 0.0, - min_height, max_height, &(xheight), &(ascrise)); - if (ascrise == 0) { // assume only caps in the whole block - xheight = row_cap_xheights.median() * CCStruct::kXHeightCapRatio; - } - } else { // default block sizes - xheight = block->line_size * CCStruct::kXHeightFraction; - } - // Correct xheight, ascrise and descdrop if necessary. - bool corrected_xheight = false; - if (xheight < textord_min_xheight) { - xheight = static_cast(textord_min_xheight); - corrected_xheight = true; - } - if (corrected_xheight || ascrise <= 0.0) { - ascrise = xheight * asc_frac_xheight; - } - if (corrected_xheight || descdrop >= 0.0) { - descdrop = -(xheight * desc_frac_xheight); - } - block->xheight = xheight; - - if (textord_debug_xheights) { - tprintf("Block average xheight=%.4f, ascrise=%.4f, descdrop=%.4f\n", - xheight, ascrise, descdrop); - } - // Correct xheight, ascrise, descdrop of rows based on block averages. - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - correct_row_xheight(row_it.data(), xheight, ascrise, descdrop); - } -} - -/** - * @name compute_row_xheight - * - * Estimate the xheight of this row. - * Compute the ascender rise and descender drop at the same time. - * Set xheigh_evidence to the number of blobs with the chosen xheight - * that appear in this row. - */ -void Textord::compute_row_xheight(TO_ROW *row, // row to do - const FCOORD& rotation, - float gradient, // global skew - int block_line_size) { - // Find blobs representing repeated characters in rows and mark them. - // This information is used for computing row xheight and at a later - // stage when words are formed by make_words. - if (!row->rep_chars_marked()) { - mark_repeated_chars(row); - } - - int min_height, max_height; - get_min_max_xheight(block_line_size, &min_height, &max_height); - STATS heights(min_height, max_height + 1); - STATS floating_heights(min_height, max_height + 1); - fill_heights(row, gradient, min_height, max_height, - &heights, &floating_heights); - row->ascrise = 0.0f; - row->xheight = 0.0f; - row->xheight_evidence = - compute_xheight_from_modes(&heights, &floating_heights, - textord_single_height_mode && - rotation.y() == 0.0, - min_height, max_height, - &(row->xheight), &(row->ascrise)); - row->descdrop = 0.0f; - if (row->xheight > 0.0) { - row->descdrop = static_cast( - compute_row_descdrop(row, gradient, row->xheight_evidence, &heights)); - } -} - -} // namespace tesseract. - -/** - * @name fill_heights - * - * Fill the given heights with heights of the blobs that are legal - * candidates for estimating xheight. - */ -void fill_heights(TO_ROW *row, float gradient, int min_height, - int max_height, STATS *heights, STATS *floating_heights) { - float xcentre; // centre of blob - float top; // top y coord of blob - float height; // height of blob - BLOBNBOX *blob; // current blob - int repeated_set; - BLOBNBOX_IT blob_it = row->blob_list(); - if (blob_it.empty()) return; // no blobs in this row - bool has_rep_chars = - row->rep_chars_marked() && row->num_repeated_sets() > 0; - do { - blob = blob_it.data(); - if (!blob->joined_to_prev()) { - xcentre = (blob->bounding_box().left() + - blob->bounding_box().right()) / 2.0f; - top = blob->bounding_box().top(); - height = blob->bounding_box().height(); - if (textord_fix_xheight_bug) - top -= row->baseline.y(xcentre); - else - top -= gradient * xcentre + row->parallel_c(); - if (top >= min_height && top <= max_height) { - heights->add(static_cast(floor(top + 0.5)), 1); - if (height / top < textord_min_blob_height_fraction) { - floating_heights->add(static_cast(floor(top + 0.5)), 1); - } - } - } - // Skip repeated chars, since they are likely to skew the height stats. - if (has_rep_chars && blob->repeated_set() != 0) { - repeated_set = blob->repeated_set(); - blob_it.forward(); - while (!blob_it.at_first() && - blob_it.data()->repeated_set() == repeated_set) { - blob_it.forward(); - if (textord_debug_xheights) - tprintf("Skipping repeated char when computing xheight\n"); - } - } else { - blob_it.forward(); - } - } while (!blob_it.at_first()); -} - -/** - * @name compute_xheight_from_modes - * - * Given a STATS object heights, looks for two most frequently occurring - * heights that look like xheight and xheight + ascrise. If found, sets - * the values of *xheight and *ascrise accordingly, otherwise sets xheight - * to any most frequently occurring height and sets *ascrise to 0. - * Returns the number of times xheight occurred in heights. - * For each mode that is considered for being an xheight the count of - * floating blobs (stored in floating_heights) is subtracted from the - * total count of the blobs of this height. This is done because blobs - * that sit far above the baseline could represent valid ascenders, but - * it is highly unlikely that such a character's height will be an xheight - * (e.g. -, ', =, ^, `, ", ', etc) - * If cap_only, then force finding of only the top mode. - */ -int compute_xheight_from_modes( - STATS *heights, STATS *floating_heights, bool cap_only, int min_height, - int max_height, float *xheight, float *ascrise) { - int blob_index = heights->mode(); // find mode - int blob_count = heights->pile_count(blob_index); // get count of mode - if (textord_debug_xheights) { - tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n", - min_height, max_height, blob_index, blob_count, - heights->get_total()); - heights->print(); - floating_heights->print(); - } - if (blob_count == 0) return 0; - int modes[MAX_HEIGHT_MODES]; // biggest piles - bool in_best_pile = FALSE; - int prev_size = -INT32_MAX; - int best_count = 0; - int mode_count = compute_height_modes(heights, min_height, max_height, - modes, MAX_HEIGHT_MODES); - if (cap_only && mode_count > 1) - mode_count = 1; - int x; - if (textord_debug_xheights) { - tprintf("found %d modes: ", mode_count); - for (x = 0; x < mode_count; x++) tprintf("%d ", modes[x]); - tprintf("\n"); - } - - for (x = 0; x < mode_count - 1; x++) { - if (modes[x] != prev_size + 1) - in_best_pile = FALSE; // had empty height - int modes_x_count = heights->pile_count(modes[x]) - - floating_heights->pile_count(modes[x]); - if ((modes_x_count >= blob_count * textord_xheight_mode_fraction) && - (in_best_pile || modes_x_count > best_count)) { - for (int asc = x + 1; asc < mode_count; asc++) { - float ratio = - static_cast(modes[asc]) / static_cast(modes[x]); - if (textord_ascx_ratio_min < ratio && - ratio < textord_ascx_ratio_max && - (heights->pile_count(modes[asc]) >= - blob_count * textord_ascheight_mode_fraction)) { - if (modes_x_count > best_count) { - in_best_pile = true; - best_count = modes_x_count; - } - if (textord_debug_xheights) { - tprintf("X=%d, asc=%d, count=%d, ratio=%g\n", - modes[x], modes[asc]-modes[x], modes_x_count, ratio); - } - prev_size = modes[x]; - *xheight = static_cast(modes[x]); - *ascrise = static_cast(modes[asc] - modes[x]); - } - } - } - } - if (*xheight == 0) { // single mode - // Remove counts of the "floating" blobs (the one whose height is too - // small in relation to it's top end of the bounding box) from heights - // before computing the single-mode xheight. - // Restore the counts in heights after the mode is found, since - // floating blobs might be useful for determining potential ascenders - // in compute_row_descdrop(). - if (floating_heights->get_total() > 0) { - for (x = min_height; x < max_height; ++x) { - heights->add(x, -(floating_heights->pile_count(x))); - } - blob_index = heights->mode(); // find the modified mode - for (x = min_height; x < max_height; ++x) { - heights->add(x, floating_heights->pile_count(x)); - } - } - *xheight = static_cast(blob_index); - *ascrise = 0.0f; - best_count = heights->pile_count(blob_index); - if (textord_debug_xheights) - tprintf("Single mode xheight set to %g\n", *xheight); - } else if (textord_debug_xheights) { - tprintf("Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise); - } - return best_count; -} - -/** - * @name compute_row_descdrop - * - * Estimates the descdrop of this row. This function looks for - * "significant" descenders of lowercase letters (those that could - * not just be the small descenders of upper case letters like Q,J). - * The function also takes into account how many potential ascenders - * this row might contain. If the number of potential ascenders along - * with descenders is close to the expected fraction of the total - * number of blobs in the row, the function returns the descender - * height, returns 0 otherwise. - */ -int32_t compute_row_descdrop(TO_ROW *row, float gradient, - int xheight_blob_count, STATS *asc_heights) { - // Count how many potential ascenders are in this row. - int i_min = asc_heights->min_bucket(); - if ((i_min / row->xheight) < textord_ascx_ratio_min) { - i_min = static_cast( - floor(row->xheight * textord_ascx_ratio_min + 0.5)); - } - int i_max = asc_heights->max_bucket(); - if ((i_max / row->xheight) > textord_ascx_ratio_max) { - i_max = static_cast(floor(row->xheight * textord_ascx_ratio_max)); - } - int num_potential_asc = 0; - for (int i = i_min; i <= i_max; ++i) { - num_potential_asc += asc_heights->pile_count(i); - } - int32_t min_height = - static_cast(floor(row->xheight * textord_descx_ratio_min + 0.5)); - int32_t max_height = - static_cast(floor(row->xheight * textord_descx_ratio_max)); - float xcentre; // centre of blob - float height; // height of blob - BLOBNBOX_IT blob_it = row->blob_list(); - BLOBNBOX *blob; // current blob - STATS heights (min_height, max_height + 1); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - blob = blob_it.data(); - if (!blob->joined_to_prev()) { - xcentre = (blob->bounding_box().left() + - blob->bounding_box().right()) / 2.0f; - height = (gradient * xcentre + row->parallel_c() - - blob->bounding_box().bottom()); - if (height >= min_height && height <= max_height) - heights.add(static_cast(floor(height + 0.5)), 1); - } - } - int blob_index = heights.mode(); // find mode - int blob_count = heights.pile_count(blob_index); // get count of mode - float total_fraction = - (textord_descheight_mode_fraction + textord_ascheight_mode_fraction); - if (static_cast(blob_count + num_potential_asc) < - xheight_blob_count * total_fraction) { - blob_count = 0; - } - int descdrop = blob_count > 0 ? -blob_index : 0; - if (textord_debug_xheights) { - tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n", - descdrop, num_potential_asc, blob_count); - heights.print(); - } - return descdrop; -} - - -/** - * @name compute_height_modes - * - * Find the top maxmodes values in the input array and put their - * indices in the output in the order in which they occurred. - */ -int32_t compute_height_modes(STATS *heights, // stats to search - int32_t min_height, // bottom of range - int32_t max_height, // top of range - int32_t *modes, // output array - int32_t maxmodes) { // size of modes - int32_t pile_count; // no in source pile - int32_t src_count; // no of source entries - int32_t src_index; // current entry - int32_t least_count; // height of smalllest - int32_t least_index; // index of least - int32_t dest_count; // index in modes - - src_count = max_height + 1 - min_height; - dest_count = 0; - least_count = INT32_MAX; - least_index = -1; - for (src_index = 0; src_index < src_count; src_index++) { - pile_count = heights->pile_count(min_height + src_index); - if (pile_count > 0) { - if (dest_count < maxmodes) { - if (pile_count < least_count) { - // find smallest in array - least_count = pile_count; - least_index = dest_count; - } - modes[dest_count++] = min_height + src_index; - } else if (pile_count >= least_count) { - while (least_index < maxmodes - 1) { - modes[least_index] = modes[least_index + 1]; - // shuffle up - least_index++; - } - // new one on end - modes[maxmodes - 1] = min_height + src_index; - if (pile_count == least_count) { - // new smallest - least_index = maxmodes - 1; - } else { - least_count = heights->pile_count(modes[0]); - least_index = 0; - for (dest_count = 1; dest_count < maxmodes; dest_count++) { - pile_count = heights->pile_count(modes[dest_count]); - if (pile_count < least_count) { - // find smallest - least_count = pile_count; - least_index = dest_count; - } - } - } - } - } - } - return dest_count; -} - - -/** - * @name correct_row_xheight - * - * Adjust the xheight etc of this row if not within reasonable limits - * of the average for the block. - */ -void correct_row_xheight(TO_ROW *row, float xheight, - float ascrise, float descdrop) { - ROW_CATEGORY row_category = get_row_category(row); - if (textord_debug_xheights) { - tprintf("correcting row xheight: row->xheight %.4f" - ", row->acrise %.4f row->descdrop %.4f\n", - row->xheight, row->ascrise, row->descdrop); - } - bool normal_xheight = - within_error_margin(row->xheight, xheight, textord_xheight_error_margin); - bool cap_xheight = - within_error_margin(row->xheight, xheight + ascrise, - textord_xheight_error_margin); - // Use the average xheight/ascrise for the following cases: - // -- the xheight of the row could not be determined at all - // -- the row has descenders (e.g. "many groups", "ISBN 12345 p.3") - // and its xheight is close to either cap height or average xheight - // -- the row does not have ascenders or descenders, but its xheight - // is close to the average block xheight (e.g. row with "www.mmm.com") - if (row_category == ROW_ASCENDERS_FOUND) { - if (row->descdrop >= 0.0) { - row->descdrop = row->xheight * (descdrop / xheight); - } - } else if (row_category == ROW_INVALID || - (row_category == ROW_DESCENDERS_FOUND && - (normal_xheight || cap_xheight)) || - (row_category == ROW_UNKNOWN && normal_xheight)) { - if (textord_debug_xheights) tprintf("using average xheight\n"); - row->xheight = xheight; - row->ascrise = ascrise; - row->descdrop = descdrop; - } else if (row_category == ROW_DESCENDERS_FOUND) { - // Assume this is a row with mostly lowercase letters and it's xheight - // is computed correctly (unfortunately there is no way to distinguish - // this from the case when descenders are found, but the most common - // height is capheight). - if (textord_debug_xheights) tprintf("lowercase, corrected ascrise\n"); - row->ascrise = row->xheight * (ascrise / xheight); - } else if (row_category == ROW_UNKNOWN) { - // Otherwise assume this row is an all-caps or small-caps row - // and adjust xheight and ascrise of the row. - - row->all_caps = true; - if (cap_xheight) { // regular all caps - if (textord_debug_xheights) tprintf("all caps\n"); - row->xheight = xheight; - row->ascrise = ascrise; - row->descdrop = descdrop; - } else { // small caps or caps with an odd xheight - if (textord_debug_xheights) { - if (row->xheight < xheight + ascrise && row->xheight > xheight) { - tprintf("small caps\n"); - } else { - tprintf("all caps with irregular xheight\n"); - } - } - row->ascrise = row->xheight * (ascrise / (xheight + ascrise)); - row->xheight -= row->ascrise; - row->descdrop = row->xheight * (descdrop / xheight); - } - } - if (textord_debug_xheights) { - tprintf("corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop" - " = %.4f\n", row->xheight, row->ascrise, row->descdrop); - } -} - -static int CountOverlaps(const TBOX& box, int min_height, - BLOBNBOX_LIST* blobs) { - int overlaps = 0; - BLOBNBOX_IT blob_it(blobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - const TBOX &blob_box = blob->bounding_box(); - if (blob_box.height() >= min_height && box.major_overlap(blob_box)) { - ++overlaps; - } - } - return overlaps; -} - -/** - * @name separate_underlines - * - * Test wide objects for being potential underlines. If they are then - * put them in a separate list in the block. - */ -void separate_underlines(TO_BLOCK* block, // block to do - float gradient, // skew angle - FCOORD rotation, // inverse landscape - bool testing_on) { // correct orientation - BLOBNBOX *blob; // current blob - C_BLOB *rotated_blob; // rotated blob - TO_ROW *row; // current row - float length; // of g_vec - TBOX blob_box; - FCOORD blob_rotation; // inverse of rotation - FCOORD g_vec; // skew rotation - BLOBNBOX_IT blob_it; // iterator - // iterator - BLOBNBOX_IT under_it = &block->underlines; - BLOBNBOX_IT large_it = &block->large_blobs; - TO_ROW_IT row_it = block->get_rows(); - int min_blob_height = static_cast(textord_min_blob_height_fraction * - block->line_size + 0.5); - - // length of vector - length = sqrt(1 + gradient * gradient); - g_vec = FCOORD(1 / length, -gradient / length); - blob_rotation = FCOORD(rotation.x(), -rotation.y()); - blob_rotation.rotate(g_vec); // undoing everything - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - row = row_it.data(); - // get blobs - blob_it.set_to_list(row->blob_list()); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); - blob_it.forward()) { - blob = blob_it.data(); - blob_box = blob->bounding_box(); - if (blob_box.width() > block->line_size * textord_underline_width) { - ASSERT_HOST(blob->cblob() != nullptr); - rotated_blob = crotate_cblob (blob->cblob(), - blob_rotation); - if (test_underline( - testing_on && textord_show_final_rows, - rotated_blob, static_cast(row->intercept()), - static_cast( - block->line_size * - (tesseract::CCStruct::kXHeightFraction + - tesseract::CCStruct::kAscenderFraction / 2.0f)))) { - under_it.add_after_then_move(blob_it.extract()); - if (testing_on && textord_show_final_rows) { - tprintf("Underlined blob at:"); - rotated_blob->bounding_box().print(); - tprintf("Was:"); - blob_box.print(); - } - } else if (CountOverlaps(blob->bounding_box(), min_blob_height, - row->blob_list()) > - textord_max_blob_overlaps) { - large_it.add_after_then_move(blob_it.extract()); - if (testing_on && textord_show_final_rows) { - tprintf("Large blob overlaps %d blobs at:", - CountOverlaps(blob_box, min_blob_height, - row->blob_list())); - blob_box.print(); - } - } - delete rotated_blob; - } - } - } -} - - -/** - * @name pre_associate_blobs - * - * Associate overlapping blobs and fake chop wide blobs. - */ -void pre_associate_blobs( //make rough chars - ICOORD page_tr, //top right - TO_BLOCK* block, //block to do - FCOORD rotation, //inverse landscape - bool testing_on //correct orientation -) { -#ifndef GRAPHICS_DISABLED - ScrollView::Color colour; //of boxes -#endif - BLOBNBOX *blob; //current blob - BLOBNBOX *nextblob; //next in list - TBOX blob_box; - FCOORD blob_rotation; //inverse of rotation - BLOBNBOX_IT blob_it; //iterator - BLOBNBOX_IT start_it; //iterator - TO_ROW_IT row_it = block->get_rows (); - -#ifndef GRAPHICS_DISABLED - colour = ScrollView::RED; -#endif - - blob_rotation = FCOORD (rotation.x (), -rotation.y ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - //get blobs - blob_it.set_to_list (row_it.data ()->blob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - start_it = blob_it; //save start point - // if (testing_on && textord_show_final_blobs) - // { - // tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n", - // blob_box.left(),blob_box.bottom(), - // blob_box.right(),blob_box.top(), - // (void*)blob,blob_it.length()); - // } - bool overlap; - do { - overlap = false; - if (!blob_it.at_last ()) { - nextblob = blob_it.data_relative(1); - overlap = blob_box.major_x_overlap(nextblob->bounding_box()); - if (overlap) { - blob->merge(nextblob); // merge new blob - blob_box = blob->bounding_box(); // get bigger box - blob_it.forward(); - } - } - } - while (overlap); - blob->chop (&start_it, &blob_it, - blob_rotation, - block->line_size * tesseract::CCStruct::kXHeightFraction * - textord_chop_width); - //attempt chop - } -#ifndef GRAPHICS_DISABLED - if (testing_on && textord_show_final_blobs) { - if (to_win == nullptr) - create_to_win(page_tr); - to_win->Pen(colour); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - blob_box.rotate (rotation); - if (!blob->joined_to_prev ()) { - to_win->Rectangle (blob_box.left (), blob_box.bottom (), - blob_box.right (), blob_box.top ()); - } - } - colour = (ScrollView::Color) (colour + 1); - if (colour > ScrollView::MAGENTA) - colour = ScrollView::RED; - } -#endif - } -} - - -/** - * @name fit_parallel_rows - * - * Re-fit the rows in the block to the given gradient. - */ -void fit_parallel_rows( //find lines - TO_BLOCK* block, //block to do - float gradient, //gradient to fit - FCOORD rotation, //for drawing - int32_t block_edge, //edge of block - bool testing_on //correct orientation -) { -#ifndef GRAPHICS_DISABLED - ScrollView::Color colour; //of row -#endif - TO_ROW_IT row_it = block->get_rows (); - - row_it.move_to_first (); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - if (row_it.data ()->blob_list ()->empty ()) - delete row_it.extract (); //nothing in it - else - fit_parallel_lms (gradient, row_it.data ()); - } -#ifndef GRAPHICS_DISABLED - if (testing_on) { - colour = ScrollView::RED; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - plot_parallel_row (row_it.data (), gradient, - block_edge, colour, rotation); - colour = (ScrollView::Color) (colour + 1); - if (colour > ScrollView::MAGENTA) - colour = ScrollView::RED; - } - } -#endif - row_it.sort (row_y_order); //may have gone out of order -} - - -/** - * @name fit_parallel_lms - * - * Fit an LMS line to a row. - * Make the fit parallel to the given gradient and set the - * row accordingly. - */ -void fit_parallel_lms(float gradient, TO_ROW *row) { - float c; // fitted line - int blobcount; // no of blobs - tesseract::DetLineFit lms; - BLOBNBOX_IT blob_it = row->blob_list(); - - blobcount = 0; - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - if (!blob_it.data()->joined_to_prev()) { - const TBOX& box = blob_it.data()->bounding_box(); - lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom())); - blobcount++; - } - } - double error = lms.ConstrainedFit(gradient, &c); - row->set_parallel_line(gradient, c, error); - if (textord_straight_baselines && blobcount > textord_lms_line_trials) { - error = lms.Fit(&gradient, &c); - } - //set the other too - row->set_line(gradient, c, error); -} - - -/** - * @name make_spline_rows - * - * Re-fit the rows in the block to the given gradient. - */ -namespace tesseract { -void Textord::make_spline_rows(TO_BLOCK* block, // block to do - float gradient, // gradient to fit - bool testing_on) { -#ifndef GRAPHICS_DISABLED - ScrollView::Color colour; //of row -#endif - TO_ROW_IT row_it = block->get_rows (); - - row_it.move_to_first (); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - if (row_it.data ()->blob_list ()->empty ()) - delete row_it.extract (); //nothing in it - else - make_baseline_spline (row_it.data (), block); - } - if (textord_old_baselines) { -#ifndef GRAPHICS_DISABLED - if (testing_on) { - colour = ScrollView::RED; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); - row_it.forward ()) { - row_it.data ()->baseline.plot (to_win, colour); - colour = (ScrollView::Color) (colour + 1); - if (colour > ScrollView::MAGENTA) - colour = ScrollView::RED; - } - } -#endif - make_old_baselines(block, testing_on, gradient); - } -#ifndef GRAPHICS_DISABLED - if (testing_on) { - colour = ScrollView::RED; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row_it.data ()->baseline.plot (to_win, colour); - colour = (ScrollView::Color) (colour + 1); - if (colour > ScrollView::MAGENTA) - colour = ScrollView::RED; - } - } -#endif -} - -} // namespace tesseract. - - -/** - * @name make_baseline_spline - * - * Fit an LMS line to a row. - * Make the fit parallel to the given gradient and set the - * row accordingly. - */ -void make_baseline_spline(TO_ROW *row, //row to fit - TO_BLOCK *block) { - double *coeffs; // quadratic coeffs - int32_t segments; // no of segments - - // spline boundaries - int32_t *xstarts = new int32_t[row->blob_list()->length() + 1]; - if (segment_baseline(row, block, segments, xstarts) - && !textord_straight_baselines && !textord_parallel_baselines) { - coeffs = linear_spline_baseline(row, block, segments, xstarts); - } else { - xstarts[1] = xstarts[segments]; - segments = 1; - coeffs = new double[3]; - coeffs[0] = 0; - coeffs[1] = row->line_m (); - coeffs[2] = row->line_c (); - } - row->baseline = QSPLINE (segments, xstarts, coeffs); - delete[] coeffs; - delete[] xstarts; -} - - -/** - * @name segment_baseline - * - * Divide the baseline up into segments which require a different - * quadratic fitted to them. - * Return TRUE if enough blobs were far enough away to need a quadratic. - */ -bool -segment_baseline( //split baseline - TO_ROW* row, //row to fit - TO_BLOCK* block, //block it came from - int32_t& segments, //no fo segments - int32_t* xstarts //coords of segments -) { - bool needs_curve; //needs curved line - int blobcount; //no of blobs - int blobindex; //current blob - int last_state; //above, on , below - int state; //of current blob - float yshift; //from baseline - TBOX box; //blob box - TBOX new_box; //new_it box - float middle; //xcentre of blob - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); - BLOBNBOX_IT new_it = blob_it; //front end - SORTED_FLOATS yshifts; //shifts from baseline - - needs_curve = false; - box = box_next_pre_chopped (&blob_it); - xstarts[0] = box.left (); - segments = 1; - blobcount = row->blob_list ()->length (); - if (textord_oldbl_debug) - tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n", - blobcount, box.left (), box.bottom ()); - if (blobcount <= textord_spline_medianwin - || blobcount < textord_spline_minblobs) { - blob_it.move_to_last (); - box = blob_it.data ()->bounding_box (); - xstarts[1] = box.right (); - return false; - } - last_state = 0; - new_it.mark_cycle_pt (); - for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) { - new_box = box_next_pre_chopped (&new_it); - middle = (new_box.left () + new_box.right ()) / 2.0; - yshift = new_box.bottom () - row->line_m () * middle - row->line_c (); - //record shift - yshifts.add (yshift, blobindex); - if (new_it.cycled_list ()) { - xstarts[1] = new_box.right (); - return false; - } - } - for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++) - box = box_next_pre_chopped (&blob_it); - do { - new_box = box_next_pre_chopped (&new_it); - //get middle one - yshift = yshifts[textord_spline_medianwin / 2]; - if (yshift > textord_spline_shift_fraction * block->line_size) - state = 1; - else if (-yshift > textord_spline_shift_fraction * block->line_size) - state = -1; - else - state = 0; - if (state != 0) - needs_curve = true; - // tprintf("State=%d, prev=%d, shift=%g\n", - // state,last_state,yshift); - if (state != last_state && blobcount > textord_spline_minblobs) { - xstarts[segments++] = box.left (); - blobcount = 0; - } - last_state = state; - yshifts.remove (blobindex - textord_spline_medianwin); - box = box_next_pre_chopped (&blob_it); - middle = (new_box.left () + new_box.right ()) / 2.0; - yshift = new_box.bottom () - row->line_m () * middle - row->line_c (); - yshifts.add (yshift, blobindex); - blobindex++; - blobcount++; - } - while (!new_it.cycled_list ()); - if (blobcount > textord_spline_minblobs || segments == 1) { - xstarts[segments] = new_box.right (); - } - else { - xstarts[--segments] = new_box.right (); - } - if (textord_oldbl_debug) - tprintf ("Made %d segments on row at (%d,%d)\n", - segments, box.right (), box.bottom ()); - return needs_curve; -} - - -/** - * @name linear_spline_baseline - * - * Divide the baseline up into segments which require a different - * quadratic fitted to them. - * @return TRUE if enough blobs were far enough away to need a quadratic. - */ -double * -linear_spline_baseline ( //split baseline -TO_ROW * row, //row to fit -TO_BLOCK * block, //block it came from -int32_t & segments, //no fo segments -int32_t xstarts[] //coords of segments -) { - int blobcount; //no of blobs - int blobindex; //current blob - int index1, index2; //blob numbers - int blobs_per_segment; //blobs in each - TBOX box; //blob box - TBOX new_box; //new_it box - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); - BLOBNBOX_IT new_it = blob_it; //front end - float b, c; //fitted curve - tesseract::DetLineFit lms; - int32_t segment; //current segment - - box = box_next_pre_chopped (&blob_it); - xstarts[0] = box.left (); - blobcount = 1; - while (!blob_it.at_first ()) { - blobcount++; - box = box_next_pre_chopped (&blob_it); - } - segments = blobcount / textord_spline_medianwin; - if (segments < 1) - segments = 1; - blobs_per_segment = blobcount / segments; - // quadratic coeffs - double *coeffs = new double[segments * 3]; - if (textord_oldbl_debug) - tprintf - ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n", - blobcount, box.left (), box.bottom (), segments, blobs_per_segment); - segment = 1; - for (index2 = 0; index2 < blobs_per_segment / 2; index2++) - box_next_pre_chopped(&new_it); - index1 = 0; - blobindex = index2; - do { - blobindex += blobs_per_segment; - lms.Clear(); - while (index1 < blobindex || (segment == segments && index1 < blobcount)) { - box = box_next_pre_chopped (&blob_it); - int middle = (box.left() + box.right()) / 2; - lms.Add(ICOORD(middle, box.bottom())); - index1++; - if (index1 == blobindex - blobs_per_segment / 2 - || index1 == blobcount - 1) { - xstarts[segment] = box.left (); - } - } - lms.Fit(&b, &c); - coeffs[segment * 3 - 3] = 0; - coeffs[segment * 3 - 2] = b; - coeffs[segment * 3 - 1] = c; - segment++; - if (segment > segments) - break; - - blobindex += blobs_per_segment; - lms.Clear(); - while (index2 < blobindex || (segment == segments && index2 < blobcount)) { - new_box = box_next_pre_chopped (&new_it); - int middle = (new_box.left() + new_box.right()) / 2; - lms.Add(ICOORD (middle, new_box.bottom())); - index2++; - if (index2 == blobindex - blobs_per_segment / 2 - || index2 == blobcount - 1) { - xstarts[segment] = new_box.left (); - } - } - lms.Fit(&b, &c); - coeffs[segment * 3 - 3] = 0; - coeffs[segment * 3 - 2] = b; - coeffs[segment * 3 - 1] = c; - segment++; - } - while (segment <= segments); - return coeffs; -} - - -/** - * @name assign_blobs_to_rows - * - * Make enough rows to allocate all the given blobs to one. - * If a block skew is given, use that, else attempt to track it. - */ -void assign_blobs_to_rows( //find lines - TO_BLOCK* block, //block to do - float* gradient, //block skew - int pass, //identification - bool reject_misses, //chuck big ones out - bool make_new_rows, //add rows for unmatched - bool drawing_skew //draw smoothed skew -) { - OVERLAP_STATE overlap_result; //what to do with it - float ycoord; //current y - float top, bottom; //of blob - float g_length = 1.0f; //from gradient - int16_t row_count; //no of rows - int16_t left_x; //left edge - int16_t last_x; //previous edge - float block_skew; //y delta - float smooth_factor; //for new coords - float near_dist; //dist to nearest row - ICOORD testpt; //testing only - BLOBNBOX *blob; //current blob - TO_ROW *row; //current row - TO_ROW *dest_row = nullptr; //row to put blob in - //iterators - BLOBNBOX_IT blob_it = &block->blobs; - TO_ROW_IT row_it = block->get_rows (); - - ycoord = - (block->block->pdblk.bounding_box ().bottom () + - block->block->pdblk.bounding_box ().top ()) / 2.0f; - if (gradient != nullptr) - g_length = sqrt (1 + *gradient * *gradient); -#ifndef GRAPHICS_DISABLED - if (drawing_skew) - to_win->SetCursor(block->block->pdblk.bounding_box ().left (), ycoord); -#endif - testpt = ICOORD (textord_test_x, textord_test_y); - blob_it.sort (blob_x_order); - smooth_factor = 1.0; - block_skew = 0.0f; - row_count = row_it.length (); //might have rows - if (!blob_it.empty ()) { - left_x = blob_it.data ()->bounding_box ().left (); - } - else { - left_x = block->block->pdblk.bounding_box ().left (); - } - last_x = left_x; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - if (gradient != nullptr) { - block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom () - + *gradient / g_length * blob->bounding_box ().left (); - } - else if (blob->bounding_box ().left () - last_x > block->line_size / 2 - && last_x - left_x > block->line_size * 2 - && textord_interpolating_skew) { - // tprintf("Interpolating skew from %g",block_skew); - block_skew *= (float) (blob->bounding_box ().left () - left_x) - / (last_x - left_x); - // tprintf("to %g\n",block_skew); - } - last_x = blob->bounding_box ().left (); - top = blob->bounding_box ().top () - block_skew; - bottom = blob->bounding_box ().bottom () - block_skew; -#ifndef GRAPHICS_DISABLED - if (drawing_skew) - to_win->DrawTo(blob->bounding_box ().left (), ycoord + block_skew); -#endif - if (!row_it.empty ()) { - for (row_it.move_to_first (); - !row_it.at_last () && row_it.data ()->min_y () > top; - row_it.forward ()); - row = row_it.data (); - if (row->min_y () <= top && row->max_y () >= bottom) { - //any overlap - dest_row = row; - overlap_result = most_overlapping_row (&row_it, dest_row, - top, bottom, - block->line_size, - blob->bounding_box (). - contains (testpt)); - if (overlap_result == NEW_ROW && !reject_misses) - overlap_result = ASSIGN; - } - else { - overlap_result = NEW_ROW; - if (!make_new_rows) { - near_dist = row_it.data_relative (-1)->min_y () - top; - //below bottom - if (bottom < row->min_y ()) { - if (row->min_y () - bottom <= - (block->line_spacing - - block->line_size) * tesseract::CCStruct::kDescenderFraction) { - //done it - overlap_result = ASSIGN; - dest_row = row; - } - } - else if (near_dist > 0 - && near_dist < bottom - row->max_y ()) { - row_it.backward (); - dest_row = row_it.data (); - if (dest_row->min_y () - bottom <= - (block->line_spacing - - block->line_size) * tesseract::CCStruct::kDescenderFraction) { - //done it - overlap_result = ASSIGN; - } - } - else { - if (top - row->max_y () <= - (block->line_spacing - - block->line_size) * (textord_overlap_x + - tesseract::CCStruct::kAscenderFraction)) { - //done it - overlap_result = ASSIGN; - dest_row = row; - } - } - } - } - if (overlap_result == ASSIGN) - dest_row->add_blob (blob_it.extract (), top, bottom, - block->line_size); - if (overlap_result == NEW_ROW) { - if (make_new_rows && top - bottom < block->max_blob_size) { - dest_row = - new TO_ROW (blob_it.extract (), top, bottom, - block->line_size); - row_count++; - if (bottom > row_it.data ()->min_y ()) - row_it.add_before_then_move (dest_row); - //insert in right place - else - row_it.add_after_then_move (dest_row); - smooth_factor = - 1.0 / (row_count * textord_skew_lag + - textord_skewsmooth_offset); - } - else - overlap_result = REJECT; - } - } - else if (make_new_rows && top - bottom < block->max_blob_size) { - overlap_result = NEW_ROW; - dest_row = - new TO_ROW(blob_it.extract(), top, bottom, block->line_size); - row_count++; - row_it.add_after_then_move(dest_row); - smooth_factor = 1.0 / (row_count * textord_skew_lag + - textord_skewsmooth_offset2); - } - else - overlap_result = REJECT; - if (blob->bounding_box ().contains(testpt) && textord_debug_blob) { - if (overlap_result != REJECT) { - tprintf("Test blob assigned to row at (%g,%g) on pass %d\n", - dest_row->min_y(), dest_row->max_y(), pass); - } - else { - tprintf("Test blob assigned to no row on pass %d\n", pass); - } - } - if (overlap_result != REJECT) { - while (!row_it.at_first() && - row_it.data()->min_y() > row_it.data_relative(-1)->min_y()) { - row = row_it.extract(); - row_it.backward(); - row_it.add_before_then_move(row); - } - while (!row_it.at_last() && - row_it.data ()->min_y() < row_it.data_relative (1)->min_y()) { - row = row_it.extract(); - row_it.forward(); - // Keep rows in order. - row_it.add_after_then_move(row); - } - BLOBNBOX_IT added_blob_it(dest_row->blob_list()); - added_blob_it.move_to_last(); - TBOX prev_box = added_blob_it.data_relative(-1)->bounding_box(); - if (dest_row->blob_list()->singleton() || - !prev_box.major_x_overlap(blob->bounding_box())) { - block_skew = (1 - smooth_factor) * block_skew - + smooth_factor * (blob->bounding_box().bottom() - - dest_row->initial_min_y()); - } - } - } - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - if (row_it.data()->blob_list()->empty()) - delete row_it.extract(); // Discard empty rows. - } -} - - -/** - * @name most_overlapping_row - * - * Return the row which most overlaps the blob. - */ -OVERLAP_STATE most_overlapping_row( //find best row - TO_ROW_IT* row_it, //iterator - TO_ROW*& best_row, //output row - float top, //top of blob - float bottom, //bottom of blob - float rowsize, //max row size - bool testing_blob //test stuff -) { - OVERLAP_STATE result; //result of tests - float overlap; //of blob & row - float bestover; //nearest row - float merge_top, merge_bottom; //size of merged row - ICOORD testpt; //testing only - TO_ROW *row; //current row - TO_ROW *test_row; //for multiple overlaps - BLOBNBOX_IT blob_it; //for merging rows - - result = ASSIGN; - row = row_it->data (); - bestover = top - bottom; - if (top > row->max_y ()) - bestover -= top - row->max_y (); - if (bottom < row->min_y ()) - //compute overlap - bestover -= row->min_y () - bottom; - if (testing_blob && textord_debug_blob) { - tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f\n", - bottom, top, row->min_y(), row->max_y(), rowsize, bestover); - } - test_row = row; - do { - if (!row_it->at_last ()) { - row_it->forward (); - test_row = row_it->data (); - if (test_row->min_y () <= top && test_row->max_y () >= bottom) { - merge_top = - test_row->max_y () > - row->max_y ()? test_row->max_y () : row->max_y (); - merge_bottom = - test_row->min_y () < - row->min_y ()? test_row->min_y () : row->min_y (); - if (merge_top - merge_bottom <= rowsize) { - if (testing_blob && textord_debug_blob) { - tprintf ("Merging rows at (%g,%g), (%g,%g)\n", - row->min_y (), row->max_y (), - test_row->min_y (), test_row->max_y ()); - } - test_row->set_limits (merge_bottom, merge_top); - blob_it.set_to_list (test_row->blob_list ()); - blob_it.add_list_after (row->blob_list ()); - blob_it.sort (blob_x_order); - row_it->backward (); - delete row_it->extract (); - row_it->forward (); - bestover = -1.0f; //force replacement - } - overlap = top - bottom; - if (top > test_row->max_y ()) - overlap -= top - test_row->max_y (); - if (bottom < test_row->min_y ()) - overlap -= test_row->min_y () - bottom; - if (bestover >= rowsize - 1 && overlap >= rowsize - 1) { - result = REJECT; - } - if (overlap > bestover) { - bestover = overlap; //find biggest overlap - row = test_row; - } - if (testing_blob && textord_debug_blob) { - tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f->%f\n", - bottom, top, test_row->min_y(), test_row->max_y(), - rowsize, overlap, bestover); - } - } - } - } - while (!row_it->at_last () - && test_row->min_y () <= top && test_row->max_y () >= bottom); - while (row_it->data () != row) - row_it->backward (); //make it point to row - //doesn't overlap much - if (top - bottom - bestover > rowsize * textord_overlap_x && - (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x) - && result == ASSIGN) - result = NEW_ROW; //doesn't overlap enough - best_row = row; - return result; -} - - -/** - * @name blob_x_order - * - * Sort function to sort blobs in x from page left. - */ -int blob_x_order( //sort function - const void *item1, //items to compare - const void *item2) { - //converted ptr - const BLOBNBOX *blob1 = *reinterpret_cast(item1); - //converted ptr - const BLOBNBOX *blob2 = *reinterpret_cast(item2); - - if (blob1->bounding_box ().left () < blob2->bounding_box ().left ()) - return -1; - else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ()) - return 1; - else - return 0; -} - - -/** - * @name row_y_order - * - * Sort function to sort rows in y from page top. - */ -int row_y_order( //sort function - const void *item1, //items to compare - const void *item2) { - //converted ptr - const TO_ROW *row1 = *reinterpret_cast(item1); - //converted ptr - const TO_ROW *row2 = *reinterpret_cast(item2); - - if (row1->parallel_c () > row2->parallel_c ()) - return -1; - else if (row1->parallel_c () < row2->parallel_c ()) - return 1; - else - return 0; -} - - -/** - * @name row_spacing_order - * - * Qsort style function to compare 2 TO_ROWS based on their spacing value. - */ -int row_spacing_order( //sort function - const void *item1, //items to compare - const void *item2) { - //converted ptr - const TO_ROW *row1 = *reinterpret_cast(item1); - //converted ptr - const TO_ROW *row2 = *reinterpret_cast(item2); - - if (row1->spacing < row2->spacing) - return -1; - else if (row1->spacing > row2->spacing) - return 1; - else - return 0; -} - -/** - * @name mark_repeated_chars - * - * Mark blobs marked with BTFT_LEADER in repeated sets using the - * repeated_set member of BLOBNBOX. - */ -void mark_repeated_chars(TO_ROW *row) { - BLOBNBOX_IT box_it(row->blob_list()); // Iterator. - int num_repeated_sets = 0; - if (!box_it.empty()) { - do { - BLOBNBOX* bblob = box_it.data(); - int repeat_length = 1; - if (bblob->flow() == BTFT_LEADER && - !bblob->joined_to_prev() && bblob->cblob() != nullptr) { - BLOBNBOX_IT test_it(box_it); - for (test_it.forward(); !test_it.at_first();) { - bblob = test_it.data(); - if (bblob->flow() != BTFT_LEADER) - break; - test_it.forward(); - bblob = test_it.data(); - if (bblob->joined_to_prev() || bblob->cblob() == nullptr) { - repeat_length = 0; - break; - } - ++repeat_length; - } - } - if (repeat_length >= kMinLeaderCount) { - num_repeated_sets++; - for (; repeat_length > 0; box_it.forward(), --repeat_length) { - bblob = box_it.data(); - bblob->set_repeated_set(num_repeated_sets); - } - } else { - bblob->set_repeated_set(0); - box_it.forward(); - } - } while (!box_it.at_first()); // until all done - } - row->set_num_repeated_sets(num_repeated_sets); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/makerow.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/makerow.h deleted file mode 100644 index ff010a69..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/makerow.h +++ /dev/null @@ -1,286 +0,0 @@ -/********************************************************************** - * File: makerow.h (Formerly makerows.h) - * Description: Code to arrange blobs into rows of text. - * Author: Ray Smith - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef MAKEROW_H -#define MAKEROW_H - -#include "params.h" -#include "ocrblock.h" -#include "blobs.h" -#include "blobbox.h" -#include "statistc.h" - -enum OVERLAP_STATE -{ - ASSIGN, //assign it to row - REJECT, //reject it - dual overlap - NEW_ROW -}; - -enum ROW_CATEGORY { - ROW_ASCENDERS_FOUND, - ROW_DESCENDERS_FOUND, - ROW_UNKNOWN, - ROW_INVALID, -}; - -extern BOOL_VAR_H(textord_heavy_nr, FALSE, "Vigorously remove noise"); -extern BOOL_VAR_H (textord_show_initial_rows, FALSE, -"Display row accumulation"); -extern BOOL_VAR_H (textord_show_parallel_rows, FALSE, -"Display page correlated rows"); -extern BOOL_VAR_H (textord_show_expanded_rows, FALSE, -"Display rows after expanding"); -extern BOOL_VAR_H (textord_show_final_rows, FALSE, -"Display rows after final fitting"); -extern BOOL_VAR_H (textord_show_final_blobs, FALSE, -"Display blob bounds after pre-ass"); -extern BOOL_VAR_H (textord_test_landscape, FALSE, "Tests refer to land/port"); -extern BOOL_VAR_H (textord_parallel_baselines, TRUE, -"Force parallel baselines"); -extern BOOL_VAR_H (textord_straight_baselines, FALSE, -"Force straight baselines"); -extern BOOL_VAR_H (textord_quadratic_baselines, FALSE, -"Use quadratic splines"); -extern BOOL_VAR_H (textord_old_baselines, TRUE, "Use old baseline algorithm"); -extern BOOL_VAR_H (textord_old_xheight, TRUE, "Use old xheight algorithm"); -extern BOOL_VAR_H (textord_fix_xheight_bug, TRUE, "Use spline baseline"); -extern BOOL_VAR_H (textord_fix_makerow_bug, TRUE, -"Prevent multiple baselines"); -extern BOOL_VAR_H (textord_cblob_blockocc, TRUE, -"Use new projection for underlines"); -extern BOOL_VAR_H (textord_debug_xheights, FALSE, "Test xheight algorithms"); -extern INT_VAR_H (textord_test_x, -INT32_MAX, "coord of test pt"); -extern INT_VAR_H (textord_test_y, -INT32_MAX, "coord of test pt"); -extern INT_VAR_H (textord_min_blobs_in_row, 4, -"Min blobs before gradient counted"); -extern INT_VAR_H (textord_spline_minblobs, 8, -"Min blobs in each spline segment"); -extern INT_VAR_H (textord_spline_medianwin, 6, -"Size of window for spline segmentation"); -extern INT_VAR_H (textord_min_xheight, 10, "Min credible pixel xheight"); -extern double_VAR_H (textord_spline_shift_fraction, 0.02, -"Fraction of line spacing for quad"); -extern double_VAR_H (textord_spline_outlier_fraction, 0.1, -"Fraction of line spacing for outlier"); -extern double_VAR_H (textord_skew_ile, 0.5, "Ile of gradients for page skew"); -extern double_VAR_H (textord_skew_lag, 0.75, -"Lag for skew on row accumulation"); -extern double_VAR_H (textord_linespace_iqrlimit, 0.2, -"Max iqr/median for linespace"); -extern double_VAR_H (textord_width_limit, 8, -"Max width of blobs to make rows"); -extern double_VAR_H (textord_chop_width, 1.5, "Max width before chopping"); -extern double_VAR_H (textord_minxh, 0.25, -"fraction of linesize for min xheight"); -extern double_VAR_H (textord_min_linesize, 1.25, -"* blob height for initial linesize"); -extern double_VAR_H (textord_excess_blobsize, 1.3, -"New row made if blob makes row this big"); -extern double_VAR_H (textord_occupancy_threshold, 0.4, -"Fraction of neighbourhood"); -extern double_VAR_H (textord_underline_width, 2.0, -"Multiple of line_size for underline"); -extern double_VAR_H(textord_min_blob_height_fraction, 0.75, -"Min blob height/top to include blob top into xheight stats"); -extern double_VAR_H (textord_xheight_mode_fraction, 0.4, -"Min pile height to make xheight"); -extern double_VAR_H (textord_ascheight_mode_fraction, 0.15, -"Min pile height to make ascheight"); -extern double_VAR_H (textord_ascx_ratio_min, 1.2, "Min cap/xheight"); -extern double_VAR_H (textord_ascx_ratio_max, 1.7, "Max cap/xheight"); -extern double_VAR_H (textord_descx_ratio_min, 0.15, "Min desc/xheight"); -extern double_VAR_H (textord_descx_ratio_max, 0.6, "Max desc/xheight"); -extern double_VAR_H (textord_xheight_error_margin, 0.1, "Accepted variation"); -extern INT_VAR_H (textord_lms_line_trials, 12, "Number of linew fits to do"); -extern BOOL_VAR_H (textord_new_initial_xheight, TRUE, -"Use test xheight mechanism"); -extern BOOL_VAR_H(textord_debug_blob, FALSE, "Print test blob information"); - -inline void get_min_max_xheight(int block_linesize, - int *min_height, int *max_height) { - *min_height = static_cast(floor(block_linesize * textord_minxh)); - if (*min_height < textord_min_xheight) *min_height = textord_min_xheight; - *max_height = static_cast(ceil(block_linesize * 3.0)); -} - -inline ROW_CATEGORY get_row_category(const TO_ROW *row) { - if (row->xheight <= 0) return ROW_INVALID; - return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND : - (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN; -} - -inline bool within_error_margin(float test, float num, float margin) { - return (test >= num * (1 - margin) && test <= num * (1 + margin)); -} - -void fill_heights(TO_ROW *row, float gradient, int min_height, - int max_height, STATS *heights, STATS *floating_heights); - -float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK* block, - TO_BLOCK_LIST* blocks); -float make_rows(ICOORD page_tr, // top right - TO_BLOCK_LIST *port_blocks); -void make_initial_textrows(ICOORD page_tr, - TO_BLOCK* block, // block to do - FCOORD rotation, // for drawing - bool testing_on); // correct orientation -void fit_lms_line(TO_ROW *row); -void compute_page_skew(TO_BLOCK_LIST *blocks, // list of blocks - float &page_m, // average gradient - float &page_err); // average error -void vigorous_noise_removal(TO_BLOCK* block); -void cleanup_rows_making(ICOORD page_tr, // top right - TO_BLOCK* block, // block to do - float gradient, // gradient to fit - FCOORD rotation, // for drawing - int32_t block_edge, // edge of block - bool testing_on); // correct orientation -void delete_non_dropout_rows( //find lines - TO_BLOCK* block, //block to do - float gradient, //global skew - FCOORD rotation, //deskew vector - int32_t block_edge, //left edge - bool testing_on //correct orientation -); -bool find_best_dropout_row( //find neighbours - TO_ROW* row, //row to test - int32_t distance, //dropout dist - float dist_limit, //threshold distance - int32_t line_index, //index of row - TO_ROW_IT* row_it, //current position - bool testing_on //correct orientation -); -TBOX deskew_block_coords( //block box - TO_BLOCK *block, //block to do - float gradient //global skew - ); -void compute_line_occupation( //project blobs - TO_BLOCK *block, //block to do - float gradient, //global skew - int32_t min_y, //min coord in block - int32_t max_y, //in block - int32_t *occupation, //output projection - int32_t *deltas //derivative - ); -void compute_occupation_threshold( //project blobs - int32_t low_window, //below result point - int32_t high_window, //above result point - int32_t line_count, //array sizes - int32_t *occupation, //input projection - int32_t *thresholds //output thresholds - ); -void compute_dropout_distances( //project blobs - int32_t *occupation, //input projection - int32_t *thresholds, //output thresholds - int32_t line_count //array sizes - ); -void expand_rows( //find lines - ICOORD page_tr, //top right - TO_BLOCK* block, //block to do - float gradient, //gradient to fit - FCOORD rotation, //for drawing - int32_t block_edge, //edge of block - bool testing_on //correct orientation -); -void adjust_row_limits( //tidy limits - TO_BLOCK *block //block to do - ); -void compute_row_stats( //find lines - TO_BLOCK* block, //block to do - bool testing_on //correct orientation -); -float median_block_xheight( //find lines - TO_BLOCK *block, //block to do - float gradient //global skew - ); - -int compute_xheight_from_modes( - STATS *heights, STATS *floating_heights, bool cap_only, int min_height, - int max_height, float *xheight, float *ascrise); - -int32_t compute_row_descdrop(TO_ROW *row, // row to do - float gradient, // global skew - int xheight_blob_count, - STATS *heights); -int32_t compute_height_modes(STATS *heights, // stats to search - int32_t min_height, // bottom of range - int32_t max_height, // top of range - int32_t *modes, // output array - int32_t maxmodes); // size of modes -void correct_row_xheight(TO_ROW *row, // row to fix - float xheight, // average values - float ascrise, - float descdrop); -void separate_underlines(TO_BLOCK* block, // block to do - float gradient, // skew angle - FCOORD rotation, // inverse landscape - bool testing_on); // correct orientation -void pre_associate_blobs(ICOORD page_tr, // top right - TO_BLOCK* block, // block to do - FCOORD rotation, // inverse landscape - bool testing_on); // correct orientation -void fit_parallel_rows(TO_BLOCK* block, // block to do - float gradient, // gradient to fit - FCOORD rotation, // for drawing - int32_t block_edge, // edge of block - bool testing_on); // correct orientation -void fit_parallel_lms(float gradient, // forced gradient - TO_ROW *row); // row to fit -void make_baseline_spline(TO_ROW *row, // row to fit - TO_BLOCK *block); // block it came from -bool segment_baseline( //split baseline - TO_ROW* row, //row to fit - TO_BLOCK* block, //block it came from - int32_t& segments, //no fo segments - int32_t* xstarts //coords of segments -); -double *linear_spline_baseline ( //split baseline -TO_ROW * row, //row to fit -TO_BLOCK * block, //block it came from -int32_t & segments, //no fo segments -int32_t xstarts[] //coords of segments -); -void assign_blobs_to_rows( //find lines - TO_BLOCK* block, //block to do - float* gradient, //block skew - int pass, //identification - bool reject_misses, //chuck big ones out - bool make_new_rows, //add rows for unmatched - bool drawing_skew //draw smoothed skew -); - //find best row -OVERLAP_STATE most_overlapping_row(TO_ROW_IT* row_it, //iterator - TO_ROW*& best_row, //output row - float top, //top of blob - float bottom, //bottom of blob - float rowsize, //max row size - bool testing_blob //test stuff - ); -int blob_x_order( //sort function - const void *item1, //items to compare - const void *item2); -int row_y_order( //sort function - const void *item1, //items to compare - const void *item2); -int row_spacing_order( //sort function - const void *item1, //items to compare - const void *item2); - -void mark_repeated_chars(TO_ROW *row); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/oldbasel.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/oldbasel.cpp deleted file mode 100644 index 6e5a5443..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/oldbasel.cpp +++ /dev/null @@ -1,1703 +0,0 @@ -/********************************************************************** - * File: oldbasel.cpp (Formerly oldbl.c) - * Description: A re-implementation of the old baseline algorithm. - * Author: Ray Smith - * Created: Wed Oct 6 09:41:48 BST 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include // for std::vector -#include "ccstruct.h" -#include "statistc.h" -#include "quadlsq.h" -#include "detlinefit.h" -#include "makerow.h" -#include "drawtord.h" -#include "oldbasel.h" -#include "textord.h" -#include "tprintf.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include - -#define EXTERN - -EXTERN BOOL_VAR (textord_really_old_xheight, FALSE, -"Use original wiseowl xheight"); -EXTERN BOOL_VAR (textord_oldbl_debug, FALSE, "Debug old baseline generation"); -EXTERN BOOL_VAR (textord_debug_baselines, FALSE, "Debug baseline generation"); -EXTERN BOOL_VAR (textord_oldbl_paradef, TRUE, "Use para default mechanism"); -EXTERN BOOL_VAR (textord_oldbl_split_splines, TRUE, "Split stepped splines"); -EXTERN BOOL_VAR (textord_oldbl_merge_parts, TRUE, "Merge suspect partitions"); -EXTERN BOOL_VAR (oldbl_corrfix, TRUE, "Improve correlation of heights"); -EXTERN BOOL_VAR (oldbl_xhfix, FALSE, -"Fix bug in modes threshold for xheights"); -EXTERN BOOL_VAR(textord_ocropus_mode, FALSE, "Make baselines for ocropus"); -EXTERN double_VAR (oldbl_xhfract, 0.4, "Fraction of est allowed in calc"); -EXTERN INT_VAR (oldbl_holed_losscount, 10, -"Max lost before fallback line used"); -EXTERN double_VAR (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot"); -EXTERN double_VAR (textord_oldbl_jumplimit, 0.15, -"X fraction for new partition"); - -#define TURNLIMIT 1 /*min size for turning point */ -#define X_HEIGHT_FRACTION 0.7 /*x-height/caps height */ -#define DESCENDER_FRACTION 0.5 /*descender/x-height */ -#define MIN_ASC_FRACTION 0.20 /*min size of ascenders */ -#define MIN_DESC_FRACTION 0.25 /*min size of descenders */ -#define MINASCRISE 2.0 /*min ascender/desc step */ -#define MAXHEIGHTVARIANCE 0.15 /*accepted variation in x-height */ -#define MAXHEIGHT 300 /*max blob height */ -#define MAXOVERLAP 0.1 /*max 10% missed overlap */ -#define MAXBADRUN 2 /*max non best for failed */ -#define HEIGHTBUCKETS 200 /* Num of buckets */ -#define DELTAHEIGHT 5.0 /* Small amount of diff */ -#define GOODHEIGHT 5 -#define MAXLOOPS 10 -#define MODENUM 10 -#define MAXPARTS 6 -#define SPLINESIZE 23 - -#define ABS(x) ((x)<0 ? (-(x)) : (x)) - -namespace tesseract { - -/********************************************************************** - * make_old_baselines - * - * Top level function to make baselines the old way. - **********************************************************************/ - -void Textord::make_old_baselines(TO_BLOCK* block, // block to do - bool testing_on, // correct orientation - float gradient) { - QSPLINE *prev_baseline; // baseline of previous row - TO_ROW *row; // current row - TO_ROW_IT row_it = block->get_rows(); - BLOBNBOX_IT blob_it; - - prev_baseline = nullptr; // nothing yet - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - row = row_it.data(); - find_textlines(block, row, 2, nullptr); - if (row->xheight <= 0 && prev_baseline != nullptr) - find_textlines(block, row, 2, prev_baseline); - if (row->xheight > 0) { // was a good one - prev_baseline = &row->baseline; - } else { - prev_baseline = nullptr; - blob_it.set_to_list(row->blob_list()); - if (textord_debug_baselines) - tprintf("Row baseline generation failed on row at (%d,%d)\n", - blob_it.data()->bounding_box().left(), - blob_it.data()->bounding_box().bottom()); - } - } - correlate_lines(block, gradient); - block->block->set_xheight(block->xheight); -} - - -/********************************************************************** - * correlate_lines - * - * Correlate the x-heights and ascender heights of a block to fill-in - * the ascender height and descender height for rows without one. - * Also fix baselines of rows without a decent fit. - **********************************************************************/ - -void Textord::correlate_lines(TO_BLOCK *block, float gradient) { - int rowcount; /*no of rows to do */ - int rowindex; /*no of row */ - // iterator - TO_ROW_IT row_it = block->get_rows (); - - rowcount = row_it.length (); - if (rowcount == 0) { - //default value - block->xheight = block->line_size; - return; /*none to do */ - } - // array of ptrs - std::vector rows(rowcount); - rowindex = 0; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) - //make array - rows[rowindex++] = row_it.data (); - - /*try to fix bad lines */ - correlate_neighbours(block, &rows[0], rowcount); - - if (textord_really_old_xheight || textord_old_xheight) { - block->xheight = (float) correlate_with_stats(&rows[0], rowcount, block); - if (block->xheight <= 0) - block->xheight = block->line_size * tesseract::CCStruct::kXHeightFraction; - if (block->xheight < textord_min_xheight) - block->xheight = (float) textord_min_xheight; - } else { - compute_block_xheight(block, gradient); - } -} - - -/********************************************************************** - * correlate_neighbours - * - * Try to fix rows that had a bad spline fit by using neighbours. - **********************************************************************/ - -void Textord::correlate_neighbours(TO_BLOCK *block, // block rows are in. - TO_ROW **rows, // rows of block. - int rowcount) { // no of rows to do. - TO_ROW *row; /*current row */ - int rowindex; /*no of row */ - int otherrow; /*second row */ - int upperrow; /*row above to use */ - int lowerrow; /*row below to use */ - float biggest; - - for (rowindex = 0; rowindex < rowcount; rowindex++) { - row = rows[rowindex]; /*current row */ - if (row->xheight < 0) { - /*quadratic failed */ - for (otherrow = rowindex - 2; - otherrow >= 0 - && (rows[otherrow]->xheight < 0.0 - || !row->baseline.overlap (&rows[otherrow]->baseline, - MAXOVERLAP)); otherrow--); - upperrow = otherrow; /*decent row above */ - for (otherrow = rowindex + 1; - otherrow < rowcount - && (rows[otherrow]->xheight < 0.0 - || !row->baseline.overlap (&rows[otherrow]->baseline, - MAXOVERLAP)); otherrow++); - lowerrow = otherrow; /*decent row below */ - if (upperrow >= 0) - find_textlines(block, row, 2, &rows[upperrow]->baseline); - if (row->xheight < 0 && lowerrow < rowcount) - find_textlines(block, row, 2, &rows[lowerrow]->baseline); - if (row->xheight < 0) { - if (upperrow >= 0) - find_textlines(block, row, 1, &rows[upperrow]->baseline); - else if (lowerrow < rowcount) - find_textlines(block, row, 1, &rows[lowerrow]->baseline); - } - } - } - - for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) { - row = rows[rowindex]; /*current row */ - if (row->xheight < 0) /*linear failed */ - /*make do */ - row->xheight = -row->xheight; - biggest = std::max(biggest, row->xheight); - } -} - - -/********************************************************************** - * correlate_with_stats - * - * correlate the x-heights and ascender heights of a block to fill-in - * the ascender height and descender height for rows without one. - **********************************************************************/ - -int Textord::correlate_with_stats(TO_ROW **rows, // rows of block. - int rowcount, // no of rows to do. - TO_BLOCK* block) { - TO_ROW *row; /*current row */ - int rowindex; /*no of row */ - float lineheight; /*mean x-height */ - float ascheight; /*average ascenders */ - float minascheight; /*min allowed ascheight */ - int xcount; /*no of samples for xheight */ - float fullheight; /*mean top height */ - int fullcount; /*no of samples */ - float descheight; /*mean descender drop */ - float mindescheight; /*min allowed descheight */ - int desccount; /*no of samples */ - - /*no samples */ - xcount = fullcount = desccount = 0; - lineheight = ascheight = fullheight = descheight = 0.0; - for (rowindex = 0; rowindex < rowcount; rowindex++) { - row = rows[rowindex]; /*current row */ - if (row->ascrise > 0.0) { /*got ascenders? */ - lineheight += row->xheight;/*average x-heights */ - ascheight += row->ascrise; /*average ascenders */ - xcount++; - } - else { - fullheight += row->xheight;/*assume full height */ - fullcount++; - } - if (row->descdrop < 0.0) { /*got descenders? */ - /*average descenders */ - descheight += row->descdrop; - desccount++; - } - } - - if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) { - lineheight /= xcount; /*average x-height */ - /*average caps height */ - fullheight = lineheight + ascheight / xcount; - /*must be decent size */ - if (fullheight < lineheight * (1 + MIN_ASC_FRACTION)) - fullheight = lineheight * (1 + MIN_ASC_FRACTION); - } - else { - fullheight /= fullcount; /*average max height */ - /*guess x-height */ - lineheight = fullheight * X_HEIGHT_FRACTION; - } - if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2)) - descheight /= desccount; /*average descenders */ - else - /*guess descenders */ - descheight = -lineheight * DESCENDER_FRACTION; - - if (lineheight > 0.0f) - block->block->set_cell_over_xheight((fullheight - descheight) / lineheight); - - minascheight = lineheight * MIN_ASC_FRACTION; - mindescheight = -lineheight * MIN_DESC_FRACTION; - for (rowindex = 0; rowindex < rowcount; rowindex++) { - row = rows[rowindex]; /*do each row */ - row->all_caps = false; - if (row->ascrise / row->xheight < MIN_ASC_FRACTION) { - /*no ascenders */ - if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE) - && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) { - row->ascrise = fullheight - lineheight; - /*set to average */ - row->xheight = lineheight; - - } - else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE) - && row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) { - row->ascrise = row->xheight - lineheight; - /*set to average */ - row->xheight = lineheight; - row->all_caps = true; - } - else { - row->ascrise = (fullheight - lineheight) * row->xheight - / fullheight; - /*scale it */ - row->xheight -= row->ascrise; - row->all_caps = true; - } - if (row->ascrise < minascheight) - row->ascrise = - row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION); - } - if (row->descdrop > mindescheight) { - if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE) - && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) - /*set to average */ - row->descdrop = descheight; - else - row->descdrop = -row->xheight * DESCENDER_FRACTION; - } - } - return (int) lineheight; //block xheight -} - - -/********************************************************************** - * find_textlines - * - * Compute the baseline for the given row. - **********************************************************************/ - -void Textord::find_textlines(TO_BLOCK *block, // block row is in - TO_ROW *row, // row to do - int degree, // required approximation - QSPLINE *spline) { // starting spline - int partcount; /*no of partitions of */ - bool holed_line = false; //lost too many blobs - int bestpart; /*biggest partition */ - int partsizes[MAXPARTS]; /*no in each partition */ - int lineheight; /*guessed x-height */ - float jumplimit; /*allowed delta change */ - int blobcount; /*no of blobs on line */ - int pointcount; /*no of coords */ - int xstarts[SPLINESIZE + 1]; //segment boundaries - int segments; //no of segments - - //no of blobs in row - blobcount = row->blob_list ()->length (); - // partition no of each blob - std::vector partids(blobcount); - // useful sample points - std::vector xcoords(blobcount); - // useful sample points - std::vector ycoords(blobcount); - // edges of blob rectangles - std::vector blobcoords(blobcount); - // diffs from 1st approx - std::vector ydiffs(blobcount); - - lineheight = get_blob_coords(row, (int)block->line_size, &blobcoords[0], - holed_line, blobcount); - /*limit for line change */ - jumplimit = lineheight * textord_oldbl_jumplimit; - if (jumplimit < MINASCRISE) - jumplimit = MINASCRISE; - - if (textord_oldbl_debug) { - tprintf - ("\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n", - block->line_size, lineheight, jumplimit); - } - if (holed_line) - make_holed_baseline(&blobcoords[0], blobcount, spline, &row->baseline, - row->line_m ()); - else - make_first_baseline(&blobcoords[0], blobcount, - &xcoords[0], &ycoords[0], spline, &row->baseline, jumplimit); -#ifndef GRAPHICS_DISABLED - if (textord_show_final_rows) - row->baseline.plot (to_win, ScrollView::GOLDENROD); -#endif - if (blobcount > 1) { - bestpart = partition_line(&blobcoords[0], blobcount, - &partcount, &partids[0], partsizes, - &row->baseline, jumplimit, &ydiffs[0]); - pointcount = partition_coords(&blobcoords[0], blobcount, - &partids[0], bestpart, &xcoords[0], &ycoords[0]); - segments = segment_spline(&blobcoords[0], blobcount, - &xcoords[0], &ycoords[0], degree, pointcount, xstarts); - if (!holed_line) { - do { - row->baseline = QSPLINE(xstarts, segments, - &xcoords[0], &ycoords[0], pointcount, degree); - } - while (textord_oldbl_split_splines - && split_stepped_spline (&row->baseline, jumplimit / 2, - &xcoords[0], xstarts, segments)); - } - find_lesser_parts(row, &blobcoords[0], blobcount, - &partids[0], partsizes, partcount, bestpart); - - } - else { - row->xheight = -1.0f; /*failed */ - row->descdrop = 0.0f; - row->ascrise = 0.0f; - } - row->baseline.extrapolate (row->line_m (), - block->block->pdblk.bounding_box ().left (), - block->block->pdblk.bounding_box ().right ()); - - if (textord_really_old_xheight) { - old_first_xheight (row, &blobcoords[0], lineheight, - blobcount, &row->baseline, jumplimit); - } else if (textord_old_xheight) { - make_first_xheight (row, &blobcoords[0], lineheight, (int)block->line_size, - blobcount, &row->baseline, jumplimit); - } else { - compute_row_xheight(row, block->block->classify_rotation(), - row->line_m(), block->line_size); - } -} - -} // namespace tesseract. - - -/********************************************************************** - * get_blob_coords - * - * Fill the blobcoords array with the coordinates of the blobs - * in the row. The return value is the first guess at the line height. - **********************************************************************/ - -int get_blob_coords( //get boxes - TO_ROW* row, //row to use - int32_t lineheight, //block level - TBOX* blobcoords, //output boxes - bool& holed_line, //lost a lot of blobs - int& outcount //no of real blobs -) { - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); - int blobindex; /*no along text line */ - int losscount; //lost blobs - int maxlosscount; //greatest lost blobs - /*height stat collection */ - STATS heightstat (0, MAXHEIGHT); - - if (blob_it.empty ()) - return 0; //none - maxlosscount = 0; - losscount = 0; - blob_it.mark_cycle_pt (); - blobindex = 0; - do { - blobcoords[blobindex] = box_next_pre_chopped (&blob_it); - if (blobcoords[blobindex].height () > lineheight * 0.25) - heightstat.add (blobcoords[blobindex].height (), 1); - if (blobindex == 0 - || blobcoords[blobindex].height () > lineheight * 0.25 - || blob_it.cycled_list ()) { - blobindex++; /*no of merged blobs */ - losscount = 0; - } - else { - if (blobcoords[blobindex].height () - < blobcoords[blobindex].width () * oldbl_dot_error_size - && blobcoords[blobindex].width () - < blobcoords[blobindex].height () * oldbl_dot_error_size) { - //counts as dot - blobindex++; - losscount = 0; - } - else { - losscount++; //lost it - if (losscount > maxlosscount) - //remember max - maxlosscount = losscount; - } - } - } - while (!blob_it.cycled_list ()); - - holed_line = maxlosscount > oldbl_holed_losscount; - outcount = blobindex; /*total blobs */ - - if (heightstat.get_total () > 1) - /*guess x-height */ - return (int) heightstat.ile (0.25); - else - return blobcoords[0].height (); -} - - -/********************************************************************** - * make_first_baseline - * - * Make the first estimate at a baseline, either by shifting - * a supplied previous spline, or by doing a piecewise linear - * approximation using all the blobs. - **********************************************************************/ - -void -make_first_baseline ( //initial approximation -TBOX blobcoords[], /*blob bounding boxes */ -int blobcount, /*no of blobcoords */ -int xcoords[], /*coords for spline */ -int ycoords[], /*approximator */ -QSPLINE * spline, /*initial spline */ -QSPLINE * baseline, /*output spline */ -float jumplimit /*guess half descenders */ -) { - int leftedge; /*left edge of line */ - int rightedge; /*right edge of line */ - int blobindex; /*current blob */ - int segment; /*current segment */ - float prevy, thisy, nexty; /*3 y coords */ - float y1, y2, y3; /*3 smooth blobs */ - float maxmax, minmin; /*absolute limits */ - int x2 = 0; /*right edge of old y3 */ - int ycount; /*no of ycoords in use */ - float yturns[SPLINESIZE]; /*y coords of turn pts */ - int xturns[SPLINESIZE]; /*xcoords of turn pts */ - int xstarts[SPLINESIZE + 1]; - int segments; //no of segments - ICOORD shift; //shift of spline - - prevy = 0; - /*left edge of row */ - leftedge = blobcoords[0].left (); - /*right edge of line */ - rightedge = blobcoords[blobcount - 1].right (); - if (spline == nullptr /*no given spline */ - || spline->segments < 3 /*or trivial */ - /*or too non-overlap */ - || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge) - || spline->xcoords[spline->segments - 1] < rightedge - - MAXOVERLAP * (rightedge - leftedge)) { - if (textord_oldbl_paradef) - return; //use default - xstarts[0] = blobcoords[0].left () - 1; - for (blobindex = 0; blobindex < blobcount; blobindex++) { - xcoords[blobindex] = (blobcoords[blobindex].left () - + blobcoords[blobindex].right ()) / 2; - ycoords[blobindex] = blobcoords[blobindex].bottom (); - } - xstarts[1] = blobcoords[blobcount - 1].right () + 1; - segments = 1; /*no of segments */ - - /*linear */ - *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1); - - if (blobcount >= 3) { - y1 = y2 = y3 = 0.0f; - ycount = 0; - segment = 0; /*no of segments */ - maxmax = minmin = 0.0f; - thisy = ycoords[0] - baseline->y (xcoords[0]); - nexty = ycoords[1] - baseline->y (xcoords[1]); - for (blobindex = 2; blobindex < blobcount; blobindex++) { - prevy = thisy; /*shift ycoords */ - thisy = nexty; - nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]); - /*middle of smooth y */ - if (ABS (thisy - prevy) < jumplimit && ABS (thisy - nexty) < jumplimit) { - y1 = y2; /*shift window */ - y2 = y3; - y3 = thisy; /*middle point */ - ycount++; - /*local max */ - if (ycount >= 3 && ((y1 < y2 && y2 >= y3) - /*local min */ - || (y1 > y2 && y2 <= y3))) { - if (segment < SPLINESIZE - 2) { - /*turning pt */ - xturns[segment] = x2; - yturns[segment] = y2; - segment++; /*no of spline segs */ - } - } - if (ycount == 1) { - maxmax = minmin = y3;/*initialise limits */ - } - else { - if (y3 > maxmax) - maxmax = y3; /*biggest max */ - if (y3 < minmin) - minmin = y3; /*smallest min */ - } - /*possible turning pt */ - x2 = blobcoords[blobindex - 1].right (); - } - } - - jumplimit *= 1.2; - /*must be wavy */ - if (maxmax - minmin > jumplimit) { - ycount = segment; /*no of segments */ - for (blobindex = 0, segment = 1; blobindex < ycount; - blobindex++) { - if (yturns[blobindex] > minmin + jumplimit - || yturns[blobindex] < maxmax - jumplimit) { - /*significant peak */ - if (segment == 1 - || yturns[blobindex] > prevy + jumplimit - || yturns[blobindex] < prevy - jumplimit) { - /*different to previous */ - xstarts[segment] = xturns[blobindex]; - segment++; - prevy = yturns[blobindex]; - } - /*bigger max */ - else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy) - /*smaller min */ - || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) { - xstarts[segment - 1] = xturns[blobindex]; - /*improved previous */ - prevy = yturns[blobindex]; - } - } - } - xstarts[segment] = blobcoords[blobcount - 1].right () + 1; - segments = segment; /*no of segments */ - /*linear */ - *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1); - } - } - } - else { - *baseline = *spline; /*copy it */ - shift = ICOORD (0, (int16_t) (blobcoords[0].bottom () - - spline->y (blobcoords[0].right ()))); - baseline->move (shift); - } -} - - -/********************************************************************** - * make_holed_baseline - * - * Make the first estimate at a baseline, either by shifting - * a supplied previous spline, or by doing a piecewise linear - * approximation using all the blobs. - **********************************************************************/ - -void -make_holed_baseline ( //initial approximation -TBOX blobcoords[], /*blob bounding boxes */ -int blobcount, /*no of blobcoords */ -QSPLINE * spline, /*initial spline */ -QSPLINE * baseline, /*output spline */ -float gradient //of line -) { - int leftedge; /*left edge of line */ - int rightedge; /*right edge of line */ - int blobindex; /*current blob */ - float x; //centre of row - ICOORD shift; //shift of spline - - tesseract::DetLineFit lms; // straight baseline - int32_t xstarts[2]; //straight line - double coeffs[3]; - float c; //line parameter - - /*left edge of row */ - leftedge = blobcoords[0].left (); - /*right edge of line */ - rightedge = blobcoords[blobcount - 1].right(); - for (blobindex = 0; blobindex < blobcount; blobindex++) { - lms.Add(ICOORD((blobcoords[blobindex].left() + - blobcoords[blobindex].right()) / 2, - blobcoords[blobindex].bottom())); - } - lms.ConstrainedFit(gradient, &c); - xstarts[0] = leftedge; - xstarts[1] = rightedge; - coeffs[0] = 0; - coeffs[1] = gradient; - coeffs[2] = c; - *baseline = QSPLINE (1, xstarts, coeffs); - if (spline != nullptr /*no given spline */ - && spline->segments >= 3 /*or trivial */ - /*or too non-overlap */ - && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge) - && spline->xcoords[spline->segments - 1] >= rightedge - - MAXOVERLAP * (rightedge - leftedge)) { - *baseline = *spline; /*copy it */ - x = (leftedge + rightedge) / 2.0; - shift = ICOORD (0, (int16_t) (gradient * x + c - spline->y (x))); - baseline->move (shift); - } -} - - -/********************************************************************** - * partition_line - * - * Partition a row of blobs into different groups of continuous - * y position. jumplimit specifies the max allowable limit on a jump - * before a new partition is started. - * The return value is the biggest partition - **********************************************************************/ - -int -partition_line ( //partition blobs -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs on row */ -int *numparts, /*number of partitions */ -char partids[], /*partition no of each blob */ -int partsizes[], /*no in each partition */ -QSPLINE * spline, /*curve to fit to */ -float jumplimit, /*allowed delta change */ -float ydiffs[] /*diff from spline */ -) { - int blobindex; /*no along text line */ - int bestpart; /*best new partition */ - int biggestpart; /*part with most members */ - float diff; /*difference from line */ - int startx; /*index of start blob */ - float partdiffs[MAXPARTS]; /*step between parts */ - - for (bestpart = 0; bestpart < MAXPARTS; bestpart++) - partsizes[bestpart] = 0; /*zero them all */ - - startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs); - *numparts = 1; /*1 partition */ - bestpart = -1; /*first point */ - float drift = 0.0f; - float last_delta = 0.0f; - for (blobindex = startx; blobindex < blobcount; blobindex++) { - /*do each blob in row */ - diff = ydiffs[blobindex]; /*diff from line */ - if (textord_oldbl_debug) { - tprintf ("%d(%d,%d), ", blobindex, - blobcoords[blobindex].left (), - blobcoords[blobindex].bottom ()); - } - bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit, - &drift, &last_delta, numparts); - /*record partition */ - partids[blobindex] = bestpart; - partsizes[bestpart]++; /*another in it */ - } - - bestpart = -1; /*first point */ - drift = 0.0f; - last_delta = 0.0f; - partsizes[0]--; /*doing 1st pt again */ - /*do each blob in row */ - for (blobindex = startx; blobindex >= 0; blobindex--) { - diff = ydiffs[blobindex]; /*diff from line */ - if (textord_oldbl_debug) { - tprintf ("%d(%d,%d), ", blobindex, - blobcoords[blobindex].left (), - blobcoords[blobindex].bottom ()); - } - bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit, - &drift, &last_delta, numparts); - /*record partition */ - partids[blobindex] = bestpart; - partsizes[bestpart]++; /*another in it */ - } - - for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++) - if (partsizes[bestpart] >= partsizes[biggestpart]) - biggestpart = bestpart; /*new biggest */ - if (textord_oldbl_merge_parts) - merge_oldbl_parts(blobcoords, - blobcount, - partids, - partsizes, - biggestpart, - jumplimit); - return biggestpart; /*biggest partition */ -} - - -/********************************************************************** - * merge_oldbl_parts - * - * For any adjacent group of blobs in a different part, put them in the - * main part if they fit closely to neighbours in the main part. - **********************************************************************/ - -void -merge_oldbl_parts ( //partition blobs -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs on row */ -char partids[], /*partition no of each blob */ -int partsizes[], /*no in each partition */ -int biggestpart, //major partition -float jumplimit /*allowed delta change */ -) { - bool found_one; //found a bestpart blob - bool close_one; //found was close enough - int blobindex; /*no along text line */ - int prevpart; //previous iteration - int runlength; //no in this part - float diff; /*difference from line */ - int startx; /*index of start blob */ - int test_blob; //another index - FCOORD coord; //blob coordinate - float m, c; //fitted line - QLSQ stats; //line stuff - - prevpart = biggestpart; - runlength = 0; - startx = 0; - for (blobindex = 0; blobindex < blobcount; blobindex++) { - if (partids[blobindex] != prevpart) { - // tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n", - // blobcoords[blobindex].left(),blobcoords[blobindex].bottom(), - // prevpart,partids[blobindex],runlength); - if (prevpart != biggestpart && runlength > MAXBADRUN) { - stats.clear (); - for (test_blob = startx; test_blob < blobindex; test_blob++) { - coord = FCOORD ((blobcoords[test_blob].left () - + blobcoords[test_blob].right ()) / 2.0, - blobcoords[test_blob].bottom ()); - stats.add (coord.x (), coord.y ()); - } - stats.fit (1); - m = stats.get_b (); - c = stats.get_c (); - if (textord_oldbl_debug) - tprintf ("Fitted line y=%g x + %g\n", m, c); - found_one = false; - close_one = false; - for (test_blob = 1; !found_one - && (startx - test_blob >= 0 - || blobindex + test_blob <= blobcount); test_blob++) { - if (startx - test_blob >= 0 - && partids[startx - test_blob] == biggestpart) { - found_one = true; - coord = FCOORD ((blobcoords[startx - test_blob].left () - + blobcoords[startx - - test_blob].right ()) / - 2.0, - blobcoords[startx - - test_blob].bottom ()); - diff = m * coord.x () + c - coord.y (); - if (textord_oldbl_debug) - tprintf - ("Diff of common blob to suspect part=%g at (%g,%g)\n", - diff, coord.x (), coord.y ()); - if (diff < jumplimit && -diff < jumplimit) - close_one = true; - } - if (blobindex + test_blob <= blobcount - && partids[blobindex + test_blob - 1] == biggestpart) { - found_one = true; - coord = - FCOORD ((blobcoords[blobindex + test_blob - 1]. - left () + blobcoords[blobindex + test_blob - - 1].right ()) / 2.0, - blobcoords[blobindex + test_blob - - 1].bottom ()); - diff = m * coord.x () + c - coord.y (); - if (textord_oldbl_debug) - tprintf - ("Diff of common blob to suspect part=%g at (%g,%g)\n", - diff, coord.x (), coord.y ()); - if (diff < jumplimit && -diff < jumplimit) - close_one = true; - } - } - if (close_one) { - if (textord_oldbl_debug) - tprintf - ("Merged %d blobs back into part %d from %d starting at (%d,%d)\n", - runlength, biggestpart, prevpart, - blobcoords[startx].left (), - blobcoords[startx].bottom ()); - //switch sides - partsizes[prevpart] -= runlength; - for (test_blob = startx; test_blob < blobindex; test_blob++) - partids[test_blob] = biggestpart; - } - } - prevpart = partids[blobindex]; - runlength = 1; - startx = blobindex; - } - else - runlength++; - } -} - - -/********************************************************************** - * get_ydiffs - * - * Get the differences between the blobs and the spline, - * putting them in ydiffs. The return value is the index - * of the blob in the middle of the "best behaved" region - **********************************************************************/ - -int -get_ydiffs ( //evaluate differences -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs */ -QSPLINE * spline, /*approximating spline */ -float ydiffs[] /*output */ -) { - int blobindex; /*current blob */ - int xcentre; /*xcoord */ - int lastx; /*last xcentre */ - float diffsum; /*sum of diffs */ - float diff; /*current difference */ - float drift; /*sum of spline steps */ - float bestsum; /*smallest diffsum */ - int bestindex; /*index of bestsum */ - - diffsum = 0.0f; - bestindex = 0; - bestsum = (float) INT32_MAX; - drift = 0.0f; - lastx = blobcoords[0].left (); - /*do each blob in row */ - for (blobindex = 0; blobindex < blobcount; blobindex++) { - /*centre of blob */ - xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1; - //step functions in spline - drift += spline->step (lastx, xcentre); - lastx = xcentre; - diff = blobcoords[blobindex].bottom (); - diff -= spline->y (xcentre); - diff += drift; - ydiffs[blobindex] = diff; /*store difference */ - if (blobindex > 2) - /*remove old one */ - diffsum -= ABS (ydiffs[blobindex - 3]); - diffsum += ABS (diff); /*add new one */ - if (blobindex >= 2 && diffsum < bestsum) { - bestsum = diffsum; /*find min sum */ - bestindex = blobindex - 1; /*middle of set */ - } - } - return bestindex; -} - - -/********************************************************************** - * choose_partition - * - * Choose a partition for the point and return the index. - **********************************************************************/ - -int -choose_partition ( //select partition -float diff, /*diff from spline */ -float partdiffs[], /*diff on all parts */ -int lastpart, /*last assigned partition */ -float jumplimit, /*new part threshold */ -float* drift, -float* lastdelta, -int *partcount /*no of partitions */ -) { - int partition; /*partition no */ - int bestpart; /*best new partition */ - float bestdelta; /*best gap from a part */ - float delta; /*diff from part */ - - if (lastpart < 0) { - partdiffs[0] = diff; - lastpart = 0; /*first point */ - *drift = 0.0f; - *lastdelta = 0.0f; - } - /*adjusted diff from part */ - delta = diff - partdiffs[lastpart] - *drift; - if (textord_oldbl_debug) { - tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift); - } - if (ABS (delta) > jumplimit / 2) { - /*delta on part 0 */ - bestdelta = diff - partdiffs[0] - *drift; - bestpart = 0; /*0 best so far */ - for (partition = 1; partition < *partcount; partition++) { - delta = diff - partdiffs[partition] - *drift; - if (ABS (delta) < ABS (bestdelta)) { - bestdelta = delta; - bestpart = partition; /*part with nearest jump */ - } - } - delta = bestdelta; - /*too far away */ - if (ABS (bestdelta) > jumplimit - && *partcount < MAXPARTS) { /*and spare part left */ - bestpart = (*partcount)++; /*best was new one */ - /*start new one */ - partdiffs[bestpart] = diff - *drift; - delta = 0.0f; - } - } - else { - bestpart = lastpart; /*best was last one */ - } - - if (bestpart == lastpart - && (ABS (delta - *lastdelta) < jumplimit / 2 - || ABS (delta) < jumplimit / 2)) - /*smooth the drift */ - *drift = (3 * *drift + delta) / 3; - *lastdelta = delta; - - if (textord_oldbl_debug) { - tprintf ("P=%d\n", bestpart); - } - - return bestpart; -} - -/********************************************************************** - * partition_coords - * - * Get the x,y coordinates of all points in the bestpart and put them - * in xcoords,ycoords. Return the number of points found. - **********************************************************************/ - -int -partition_coords ( //find relevant coords -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs in row */ -char partids[], /*partition no of each blob */ -int bestpart, /*best new partition */ -int xcoords[], /*points to work on */ -int ycoords[] /*points to work on */ -) { - int blobindex; /*no along text line */ - int pointcount; /*no of points */ - - pointcount = 0; - for (blobindex = 0; blobindex < blobcount; blobindex++) { - if (partids[blobindex] == bestpart) { - /*centre of blob */ - xcoords[pointcount] = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1; - ycoords[pointcount++] = blobcoords[blobindex].bottom (); - } - } - return pointcount; /*no of points found */ -} - - -/********************************************************************** - * segment_spline - * - * Segment the row at midpoints between maxima and minima of the x,y pairs. - * The xstarts of the segments are returned and the number found. - **********************************************************************/ - -int -segment_spline ( //make xstarts -TBOX blobcoords[], //boundign boxes -int blobcount, /*no of blobs in row */ -int xcoords[], /*points to work on */ -int ycoords[], /*points to work on */ -int degree, int pointcount, /*no of points */ -int xstarts[] //result -) { - int ptindex; /*no along text line */ - int segment; /*partition no */ - int lastmin, lastmax; /*possible turn points */ - int turnpoints[SPLINESIZE]; /*good turning points */ - int turncount; /*no of turning points */ - int max_x; //max specified coord - - xstarts[0] = xcoords[0] - 1; //leftmost defined pt - max_x = xcoords[pointcount - 1] + 1; - if (degree < 2) - pointcount = 0; - turncount = 0; /*no turning points yet */ - if (pointcount > 3) { - ptindex = 1; - lastmax = lastmin = 0; /*start with first one */ - while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) { - /*minimum */ - if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) { - if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) { - if (turncount == 0 || turnpoints[turncount - 1] != lastmax) - /*new max point */ - turnpoints[turncount++] = lastmax; - lastmin = ptindex; /*latest minimum */ - } - else if (ycoords[ptindex] < ycoords[lastmin]) { - lastmin = ptindex; /*lower minimum */ - } - } - - /*maximum */ - if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) { - if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) { - if (turncount == 0 || turnpoints[turncount - 1] != lastmin) - /*new min point */ - turnpoints[turncount++] = lastmin; - lastmax = ptindex; /*latest maximum */ - } - else if (ycoords[ptindex] > ycoords[lastmax]) { - lastmax = ptindex; /*higher maximum */ - } - } - ptindex++; - } - /*possible global min */ - if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT - && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) { - if (turncount < SPLINESIZE - 1) - /*2 more turns */ - turnpoints[turncount++] = lastmax; - if (turncount < SPLINESIZE - 1) - turnpoints[turncount++] = ptindex; - } - else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT - /*possible global max */ - && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) { - if (turncount < SPLINESIZE - 1) - /*2 more turns */ - turnpoints[turncount++] = lastmin; - if (turncount < SPLINESIZE - 1) - turnpoints[turncount++] = ptindex; - } - else if (turncount > 0 && turnpoints[turncount - 1] == lastmin - && turncount < SPLINESIZE - 1) { - if (ycoords[ptindex] > ycoords[lastmax]) - turnpoints[turncount++] = ptindex; - else - turnpoints[turncount++] = lastmax; - } - else if (turncount > 0 && turnpoints[turncount - 1] == lastmax - && turncount < SPLINESIZE - 1) { - if (ycoords[ptindex] < ycoords[lastmin]) - turnpoints[turncount++] = ptindex; - else - turnpoints[turncount++] = lastmin; - } - } - - if (textord_oldbl_debug && turncount > 0) - tprintf ("First turn is %d at (%d,%d)\n", - turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]); - for (segment = 1; segment < turncount; segment++) { - /*centre y coord */ - lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2; - - /* fix alg so that it works with both rising and falling sections */ - if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]]) - /*find rising y centre */ - for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++); - else - /*find falling y centre */ - for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++); - - /*centre x */ - xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex] - + xcoords[turnpoints[segment - 1]] - + xcoords[turnpoints[segment]] + 2) / 4; - /*halfway between turns */ - if (textord_oldbl_debug) - tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n", - segment, turnpoints[segment], - xcoords[turnpoints[segment]], ycoords[turnpoints[segment]], - ptindex - 1, xcoords[ptindex - 1], xstarts[segment]); - } - - xstarts[segment] = max_x; - return segment; /*no of splines */ -} - - -/********************************************************************** - * split_stepped_spline - * - * Re-segment the spline in cases where there is a big step function. - * Return TRUE if any were done. - **********************************************************************/ - -bool -split_stepped_spline( //make xstarts - QSPLINE* baseline, //current shot - float jumplimit, //max step function - int* xcoords, /*points to work on */ - int* xstarts, //result - int& segments //no of segments -) { - bool doneany; //return value - int segment; /*partition no */ - int startindex, centreindex, endindex; - float leftcoord, rightcoord; - int leftindex, rightindex; - float step; //spline step - - doneany = false; - startindex = 0; - for (segment = 1; segment < segments - 1; segment++) { - step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0, - (xstarts[segment] + xstarts[segment + 1]) / 2.0); - if (step < 0) - step = -step; - if (step > jumplimit) { - while (xcoords[startindex] < xstarts[segment - 1]) - startindex++; - centreindex = startindex; - while (xcoords[centreindex] < xstarts[segment]) - centreindex++; - endindex = centreindex; - while (xcoords[endindex] < xstarts[segment + 1]) - endindex++; - if (segments >= SPLINESIZE) { - if (textord_debug_baselines) - tprintf ("Too many segments to resegment spline!!\n"); - } - else if (endindex - startindex >= textord_spline_medianwin * 3) { - while (centreindex - startindex < - textord_spline_medianwin * 3 / 2) - centreindex++; - while (endindex - centreindex < - textord_spline_medianwin * 3 / 2) - centreindex--; - leftindex = (startindex + startindex + centreindex) / 3; - rightindex = (centreindex + endindex + endindex) / 3; - leftcoord = - (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0; - rightcoord = - (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0; - while (xcoords[leftindex] > leftcoord - && leftindex - startindex > textord_spline_medianwin) - leftindex--; - while (xcoords[leftindex] < leftcoord - && centreindex - leftindex > - textord_spline_medianwin / 2) - leftindex++; - if (xcoords[leftindex] - leftcoord > - leftcoord - xcoords[leftindex - 1]) - leftindex--; - while (xcoords[rightindex] > rightcoord - && rightindex - centreindex > - textord_spline_medianwin / 2) - rightindex--; - while (xcoords[rightindex] < rightcoord - && endindex - rightindex > textord_spline_medianwin) - rightindex++; - if (xcoords[rightindex] - rightcoord > - rightcoord - xcoords[rightindex - 1]) - rightindex--; - if (textord_debug_baselines) - tprintf ("Splitting spline at %d with step %g at (%d,%d)\n", - xstarts[segment], - baseline-> - step ((xstarts[segment - 1] + - xstarts[segment]) / 2.0, - (xstarts[segment] + - xstarts[segment + 1]) / 2.0), - (xcoords[leftindex - 1] + xcoords[leftindex]) / 2, - (xcoords[rightindex - 1] + xcoords[rightindex]) / 2); - insert_spline_point (xstarts, segment, - (xcoords[leftindex - 1] + - xcoords[leftindex]) / 2, - (xcoords[rightindex - 1] + - xcoords[rightindex]) / 2, segments); - doneany = true; - } - else if (textord_debug_baselines) { - tprintf - ("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n", - startindex, centreindex, endindex, - (int32_t) textord_spline_medianwin); - } - } - // else tprintf("Spline step at %d is %g\n", - // xstarts[segment], - // baseline->step((xstarts[segment-1]+xstarts[segment])/2.0, - // (xstarts[segment]+xstarts[segment+1])/2.0)); - } - return doneany; -} - - -/********************************************************************** - * insert_spline_point - * - * Insert a new spline point and shuffle up the others. - **********************************************************************/ - -void -insert_spline_point ( //get descenders -int xstarts[], //starts to shuffle -int segment, //insertion pt -int coord1, //coords to add -int coord2, int &segments //total segments -) { - int index; //for shuffling - - for (index = segments; index > segment; index--) - xstarts[index + 1] = xstarts[index]; - segments++; - xstarts[segment] = coord1; - xstarts[segment + 1] = coord2; -} - - -/********************************************************************** - * find_lesser_parts - * - * Average the step from the spline for the other partitions - * and find the commonest partition which has a descender. - **********************************************************************/ - -void -find_lesser_parts ( //get descenders -TO_ROW * row, //row to process -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs */ -char partids[], /*partition of each blob */ -int partsizes[], /*size of each part */ -int partcount, /*no of partitions */ -int bestpart /*biggest partition */ -) { - int blobindex; /*index of blob */ - int partition; /*current partition */ - int xcentre; /*centre of blob */ - int poscount; /*count of best up step */ - int negcount; /*count of best down step */ - float partsteps[MAXPARTS]; /*average step to part */ - float bestneg; /*best down step */ - int runlength; /*length of bad run */ - int biggestrun; /*biggest bad run */ - - biggestrun = 0; - for (partition = 0; partition < partcount; partition++) - partsteps[partition] = 0.0; /*zero accumulators */ - for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) { - xcentre = (blobcoords[blobindex].left () - + blobcoords[blobindex].right ()) >> 1; - /*in other parts */ - int part_id = - static_cast(static_cast(partids[blobindex])); - if (part_id != bestpart) { - runlength++; /*run of non bests */ - if (runlength > biggestrun) - biggestrun = runlength; - partsteps[part_id] += blobcoords[blobindex].bottom() - - row->baseline.y(xcentre); - } - else - runlength = 0; - } - if (biggestrun > MAXBADRUN) - row->xheight = -1.0f; /*failed */ - else - row->xheight = 1.0f; /*success */ - poscount = negcount = 0; - bestneg = 0.0; /*no step yet */ - for (partition = 0; partition < partcount; partition++) { - if (partition != bestpart) { - // by jetsoft divide by zero possible - if (partsizes[partition] == 0) - partsteps[partition] = 0; - else - partsteps[partition] /= partsizes[partition]; - // - - if (partsteps[partition] >= MINASCRISE - && partsizes[partition] > poscount) { - poscount = partsizes[partition]; - } - if (partsteps[partition] <= -MINASCRISE - && partsizes[partition] > negcount) { - /*ascender rise */ - bestneg = partsteps[partition]; - /*2nd most popular */ - negcount = partsizes[partition]; - } - } - } - /*average x-height */ - partsteps[bestpart] /= blobcount; - row->descdrop = bestneg; -} - - -/********************************************************************** - * old_first_xheight - * - * Makes an x-height spline by copying the baseline and shifting it. - * It estimates the x-height across the line to use as the shift. - * It also finds the ascender height if it can. - **********************************************************************/ - -void -old_first_xheight ( //the wiseowl way -TO_ROW * row, /*current row */ -TBOX blobcoords[], /*blob bounding boxes */ -int initialheight, //initial guess -int blobcount, /*blobs in blobcoords */ -QSPLINE * baseline, /*established */ -float jumplimit /*min ascender height */ -) { - int blobindex; /*current blob */ - /*height statistics */ - STATS heightstat (0, MAXHEIGHT); - int height; /*height of blob */ - int xcentre; /*centre of blob */ - int lineheight; /*approx xheight */ - float ascenders; /*ascender sum */ - int asccount; /*no of ascenders */ - float xsum; /*xheight sum */ - int xcount; /*xheight count */ - float diff; /*height difference */ - - if (blobcount > 1) { - for (blobindex = 0; blobindex < blobcount; blobindex++) { - xcentre = (blobcoords[blobindex].left () - + blobcoords[blobindex].right ()) / 2; - /*height of blob */ - height = (int) (blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5); - if (height > initialheight * oldbl_xhfract - && height > textord_min_xheight) - heightstat.add (height, 1); - } - if (heightstat.get_total () > 3) { - lineheight = (int) heightstat.ile (0.25); - if (lineheight <= 0) - lineheight = (int) heightstat.ile (0.5); - } - else - lineheight = initialheight; - } - else { - lineheight = (int) (blobcoords[0].top () - - baseline->y ((blobcoords[0].left () - + blobcoords[0].right ()) / 2) + - 0.5); - } - - xsum = 0.0f; - xcount = 0; - for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount; - blobindex++) { - xcentre = (blobcoords[blobindex].left () - + blobcoords[blobindex].right ()) / 2; - diff = blobcoords[blobindex].top () - baseline->y (xcentre); - /*is it ascender */ - if (diff > lineheight + jumplimit) { - ascenders += diff; - asccount++; /*count ascenders */ - } - else if (diff > lineheight - jumplimit) { - xsum += diff; /*mean xheight */ - xcount++; - } - } - if (xcount > 0) - xsum /= xcount; /*average xheight */ - else - xsum = (float) lineheight; /*guess it */ - row->xheight *= xsum; - if (asccount > 0) - row->ascrise = ascenders / asccount - xsum; - else - row->ascrise = 0.0f; /*had none */ - if (row->xheight == 0) - row->xheight = -1.0f; -} - - -/********************************************************************** - * make_first_xheight - * - * Makes an x-height spline by copying the baseline and shifting it. - * It estimates the x-height across the line to use as the shift. - * It also finds the ascender height if it can. - **********************************************************************/ - -void -make_first_xheight ( //find xheight -TO_ROW * row, /*current row */ -TBOX blobcoords[], /*blob bounding boxes */ -int lineheight, //initial guess -int init_lineheight, //block level guess -int blobcount, /*blobs in blobcoords */ -QSPLINE * baseline, /*established */ -float jumplimit /*min ascender height */ -) { - STATS heightstat (0, HEIGHTBUCKETS); - int lefts[HEIGHTBUCKETS]; - int rights[HEIGHTBUCKETS]; - int modelist[MODENUM]; - int blobindex; - int mode_count; //blobs to count in thr - int sign_bit; - int mode_threshold; - const int kBaselineTouch = 2; // This really should change with resolution. - const int kGoodStrength = 8; // Strength of baseline-touching heights. - const float kMinHeight = 0.25; // Min fraction of lineheight to use. - - sign_bit = row->xheight > 0 ? 1 : -1; - - memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0])); - memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0])); - mode_count = 0; - for (blobindex = 0; blobindex < blobcount; blobindex++) { - int xcenter = (blobcoords[blobindex].left () + - blobcoords[blobindex].right ()) / 2; - float base = baseline->y(xcenter); - float bottomdiff = fabs(base - blobcoords[blobindex].bottom()); - int strength = textord_ocropus_mode && - bottomdiff <= kBaselineTouch ? kGoodStrength : 1; - int height = static_cast(blobcoords[blobindex].top () - base + 0.5); - if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) { - if (height > lineheight * oldbl_xhfract - && height > textord_min_xheight) { - heightstat.add (height, strength); - if (height < HEIGHTBUCKETS) { - if (xcenter > rights[height]) - rights[height] = xcenter; - if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height])) - lefts[height] = xcenter; - } - } - mode_count += strength; - } - } - - mode_threshold = (int) (blobcount * 0.1); - if (oldbl_dot_error_size > 1 || oldbl_xhfix) - mode_threshold = (int) (mode_count * 0.1); - - if (textord_oldbl_debug) { - tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n", - blobcount, mode_count, mode_threshold); - } - find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM); - if (textord_oldbl_debug) { - for (blobindex = 0; blobindex < MODENUM; blobindex++) - tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]); - tprintf ("\n"); - } - pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold); - - if (textord_oldbl_debug) - tprintf ("Output xheight=%g\n", row->xheight); - if (row->xheight < 0 && textord_oldbl_debug) - tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight); - - if (sign_bit < 0) - row->xheight = -row->xheight; -} - -/********************************************************************** - * find_top_modes - * - * Fill the input array with the indices of the top ten modes of the - * input distribution. - **********************************************************************/ - -const int kMinModeFactorOcropus = 32; -const int kMinModeFactor = 12; - -void -find_top_modes ( //get modes -STATS * stats, //stats to hack -int statnum, //no of piles -int modelist[], int modenum //no of modes to get -) { - int mode_count; - int last_i = 0; - int last_max = INT32_MAX; - int i; - int mode; - int total_max = 0; - int mode_factor = textord_ocropus_mode ? - kMinModeFactorOcropus : kMinModeFactor; - - for (mode_count = 0; mode_count < modenum; mode_count++) { - mode = 0; - for (i = 0; i < statnum; i++) { - if (stats->pile_count (i) > stats->pile_count (mode)) { - if ((stats->pile_count (i) < last_max) || - ((stats->pile_count (i) == last_max) && (i > last_i))) { - mode = i; - } - } - } - last_i = mode; - last_max = stats->pile_count (last_i); - total_max += last_max; - if (last_max <= total_max / mode_factor) - mode = 0; - modelist[mode_count] = mode; - } -} - - -/********************************************************************** - * pick_x_height - * - * Choose based on the height modes the best x height value. - **********************************************************************/ - -void pick_x_height(TO_ROW * row, //row to do - int modelist[], - int lefts[], int rights[], - STATS * heightstat, - int mode_threshold) { - int x; - int y; - int z; - float ratio; - int found_one_bigger = FALSE; - int best_x_height = 0; - int best_asc = 0; - int num_in_best; - - for (x = 0; x < MODENUM; x++) { - for (y = 0; y < MODENUM; y++) { - /* Check for two modes */ - if (modelist[x] && modelist[y] && - heightstat->pile_count (modelist[x]) > mode_threshold && - (!textord_ocropus_mode || - std::min(rights[modelist[x]], rights[modelist[y]]) > - std::max(lefts[modelist[x]], lefts[modelist[y]]))) { - ratio = (float) modelist[y] / (float) modelist[x]; - if (1.2 < ratio && ratio < 1.8) { - /* Two modes found */ - best_x_height = modelist[x]; - num_in_best = heightstat->pile_count (modelist[x]); - - /* Try to get one higher */ - do { - found_one_bigger = FALSE; - for (z = 0; z < MODENUM; z++) { - if (modelist[z] == best_x_height + 1 && - (!textord_ocropus_mode || - std::min(rights[modelist[x]], rights[modelist[y]]) > - std::max(lefts[modelist[x]], lefts[modelist[y]]))) { - ratio = (float) modelist[y] / (float) modelist[z]; - if ((1.2 < ratio && ratio < 1.8) && - /* Should be half of best */ - heightstat->pile_count (modelist[z]) > - num_in_best * 0.5) { - best_x_height++; - found_one_bigger = TRUE; - break; - } - } - } - } - while (found_one_bigger); - - /* try to get a higher ascender */ - - best_asc = modelist[y]; - num_in_best = heightstat->pile_count (modelist[y]); - - /* Try to get one higher */ - do { - found_one_bigger = FALSE; - for (z = 0; z < MODENUM; z++) { - if (modelist[z] > best_asc && - (!textord_ocropus_mode || - std::min(rights[modelist[x]], rights[modelist[y]]) > - std::max(lefts[modelist[x]], lefts[modelist[y]]))) { - ratio = (float) modelist[z] / (float) best_x_height; - if ((1.2 < ratio && ratio < 1.8) && - /* Should be half of best */ - heightstat->pile_count (modelist[z]) > - num_in_best * 0.5) { - best_asc = modelist[z]; - found_one_bigger = TRUE; - break; - } - } - } - } - while (found_one_bigger); - - row->xheight = (float) best_x_height; - row->ascrise = (float) best_asc - best_x_height; - return; - } - } - } - } - - best_x_height = modelist[0]; /* Single Mode found */ - num_in_best = heightstat->pile_count (best_x_height); - do { - /* Try to get one higher */ - found_one_bigger = FALSE; - for (z = 1; z < MODENUM; z++) { - /* Should be half of best */ - if ((modelist[z] == best_x_height + 1) && - (heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) { - best_x_height++; - found_one_bigger = TRUE; - break; - } - } - } - while (found_one_bigger); - - row->ascrise = 0.0f; - row->xheight = (float) best_x_height; - if (row->xheight == 0) - row->xheight = -1.0f; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/oldbasel.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/oldbasel.h deleted file mode 100644 index e27afa79..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/oldbasel.h +++ /dev/null @@ -1,174 +0,0 @@ -/********************************************************************** - * File: oldbasel.h (Formerly oldbl.h) - * Description: A re-implementation of the old baseline algorithm. - * Author: Ray Smith - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef OLDBASEL_H -#define OLDBASEL_H - -#include "params.h" -#include "blobbox.h" - -extern BOOL_VAR_H (textord_really_old_xheight, FALSE, -"Use original wiseowl xheight"); -extern BOOL_VAR_H (textord_oldbl_debug, FALSE, -"Debug old baseline generation"); -extern BOOL_VAR_H (textord_debug_baselines, FALSE, -"Debug baseline generation"); -extern BOOL_VAR_H (textord_oldbl_paradef, TRUE, "Use para default mechanism"); -extern BOOL_VAR_H (textord_oldbl_split_splines, TRUE, -"Split stepped splines"); -extern BOOL_VAR_H (textord_oldbl_merge_parts, TRUE, -"Merge suspect partitions"); -extern BOOL_VAR_H (oldbl_xhfix, FALSE, -"Fix bug in modes threshold for xheights"); -extern INT_VAR_H (oldbl_holed_losscount, 10, -"Max lost before fallback line used"); -extern double_VAR_H (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot"); -extern double_VAR_H (textord_oldbl_jumplimit, 0.15, -"X fraction for new partition"); -int get_blob_coords( //get boxes - TO_ROW* row, //row to use - int32_t lineheight, //block level - TBOX* blobcoords, //output boxes - bool& holed_line, //lost a lot of blobs - int& outcount //no of real blobs -); -void make_first_baseline ( //initial approximation -TBOX blobcoords[], /*blob bounding boxes */ -int blobcount, /*no of blobcoords */ -int xcoords[], /*coords for spline */ -int ycoords[], /*approximator */ -QSPLINE * spline, /*initial spline */ -QSPLINE * baseline, /*output spline */ -float jumplimit /*guess half descenders */ -); -void make_holed_baseline ( //initial approximation -TBOX blobcoords[], /*blob bounding boxes */ -int blobcount, /*no of blobcoords */ -QSPLINE * spline, /*initial spline */ -QSPLINE * baseline, /*output spline */ -float gradient //of line -); -int partition_line ( //partition blobs -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs on row */ -int *numparts, /*number of partitions */ -char partids[], /*partition no of each blob */ -int partsizes[], /*no in each partition */ -QSPLINE * spline, /*curve to fit to */ -float jumplimit, /*allowed delta change */ -float ydiffs[] /*diff from spline */ -); -void merge_oldbl_parts ( //partition blobs -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs on row */ -char partids[], /*partition no of each blob */ -int partsizes[], /*no in each partition */ -int biggestpart, //major partition -float jumplimit /*allowed delta change */ -); -int get_ydiffs ( //evaluate differences -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs */ -QSPLINE * spline, /*approximating spline */ -float ydiffs[] /*output */ -); -int choose_partition ( //select partition -float diff, /*diff from spline */ -float partdiffs[], /*diff on all parts */ -int lastpart, /*last assigned partition */ -float jumplimit, /*new part threshold */ -float* drift, -float* last_delta, -int *partcount /*no of partitions */ -); -int partition_coords ( //find relevant coords -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs in row */ -char partids[], /*partition no of each blob */ -int bestpart, /*best new partition */ -int xcoords[], /*points to work on */ -int ycoords[] /*points to work on */ -); -int segment_spline ( //make xstarts -TBOX blobcoords[], //boundign boxes -int blobcount, /*no of blobs in row */ -int xcoords[], /*points to work on */ -int ycoords[], /*points to work on */ -int degree, int pointcount, /*no of points */ -int xstarts[] //result -); -bool split_stepped_spline( //make xstarts - QSPLINE* baseline, //current shot - float jumplimit, //max step function - int* xcoords, /*points to work on */ - int* xstarts, //result - int& segments //no of segments -); -void insert_spline_point ( //get descenders -int xstarts[], //starts to shuffle -int segment, //insertion pt -int coord1, //coords to add -int coord2, int &segments //total segments -); -void find_lesser_parts ( //get descenders -TO_ROW * row, //row to process -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs */ -char partids[], /*partition of each blob */ -int partsizes[], /*size of each part */ -int partcount, /*no of partitions */ -int bestpart /*biggest partition */ -); - -void old_first_xheight ( //the wiseowl way -TO_ROW * row, /*current row */ -TBOX blobcoords[], /*blob bounding boxes */ -int initialheight, //initial guess -int blobcount, /*blobs in blobcoords */ -QSPLINE * baseline, /*established */ -float jumplimit /*min ascender height */ -); - -void make_first_xheight ( //find xheight -TO_ROW * row, /*current row */ -TBOX blobcoords[], /*blob bounding boxes */ -int lineheight, //initial guess -int init_lineheight, //block level guess -int blobcount, /*blobs in blobcoords */ -QSPLINE * baseline, /*established */ -float jumplimit /*min ascender height */ -); - -int *make_height_array ( //get array of heights -TBOX blobcoords[], /*blob bounding boxes */ -int blobcount, /*blobs in blobcoords */ -QSPLINE * baseline /*established */ -); - -void find_top_modes ( //get modes -STATS * stats, //stats to hack -int statnum, //no of piles -int modelist[], int modenum //no of modes to get -); - -void pick_x_height(TO_ROW * row, //row to do -int modelist[], -int lefts[], int rights[], -STATS * heightstat, -int mode_threshold); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/pithsync.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/pithsync.cpp deleted file mode 100644 index 8381af04..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/pithsync.cpp +++ /dev/null @@ -1,690 +0,0 @@ -/********************************************************************** - * File: pithsync.cpp (Formerly pitsync2.c) - * Description: Code to find the optimum fixed pitch segmentation of some blobs. - * Author: Ray Smith - * Created: Thu Nov 19 11:48:05 GMT 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include -#include // for FLT_MAX -#include // for std::vector -#include "makerow.h" -#include "pitsync1.h" -#include "topitch.h" -#include "pithsync.h" -#include "tprintf.h" - -#define PROJECTION_MARGIN 10 //arbitrary - -/********************************************************************** - * FPCUTPT::setup - * - * Constructor to make a new FPCUTPT. - **********************************************************************/ - -void FPCUTPT::setup( //constructor - FPCUTPT *cutpts, //predecessors - int16_t array_origin, //start coord - STATS *projection, //vertical occupation - int16_t zero_count, //official zero - int16_t pitch, //proposed pitch - int16_t x, //position - int16_t offset //dist to gap - ) { - //half of pitch - int16_t half_pitch = pitch / 2 - 1; - uint32_t lead_flag; //new flag - int32_t ind; //current position - - if (half_pitch > 31) - half_pitch = 31; - else if (half_pitch < 0) - half_pitch = 0; - lead_flag = 1 << half_pitch; - - pred = nullptr; - mean_sum = 0; - sq_sum = offset * offset; - cost = sq_sum; - faked = FALSE; - terminal = false; - fake_count = 0; - xpos = x; - region_index = 0; - mid_cuts = 0; - if (x == array_origin) { - back_balance = 0; - fwd_balance = 0; - for (ind = 0; ind <= half_pitch; ind++) { - fwd_balance >>= 1; - if (projection->pile_count (ind) > zero_count) - fwd_balance |= lead_flag; - } - } - else { - back_balance = cutpts[x - 1 - array_origin].back_balance << 1; - back_balance &= lead_flag + lead_flag - 1; - if (projection->pile_count (x) > zero_count) - back_balance |= 1; - fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1; - if (projection->pile_count (x + half_pitch) > zero_count) - fwd_balance |= lead_flag; - } -} - - -/********************************************************************** - * FPCUTPT::assign - * - * Constructor to make a new FPCUTPT. - **********************************************************************/ - -void FPCUTPT::assign( //constructor - FPCUTPT* cutpts, //predecessors - int16_t array_origin, //start coord - int16_t x, //position - bool faking, //faking this one - bool mid_cut, //cheap cut. - int16_t offset, //dist to gap - STATS* projection, //vertical occupation - float projection_scale, //scaling - int16_t zero_count, //official zero - int16_t pitch, //proposed pitch - int16_t pitch_error //allowed tolerance -) { - int index; //test index - int balance_index; //for balance factor - int16_t balance_count; //ding factor - int16_t r_index; //test cut number - FPCUTPT *segpt; //segment point - int32_t dist; //from prev segment - double sq_dist; //squared distance - double mean; //mean pitch - double total; //total dists - double factor; //cost function - //half of pitch - int16_t half_pitch = pitch / 2 - 1; - uint32_t lead_flag; //new flag - - if (half_pitch > 31) - half_pitch = 31; - else if (half_pitch < 0) - half_pitch = 0; - lead_flag = 1 << half_pitch; - - back_balance = cutpts[x - 1 - array_origin].back_balance << 1; - back_balance &= lead_flag + lead_flag - 1; - if (projection->pile_count (x) > zero_count) - back_balance |= 1; - fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1; - if (projection->pile_count (x + half_pitch) > zero_count) - fwd_balance |= lead_flag; - - xpos = x; - cost = FLT_MAX; - pred = nullptr; - faked = faking; - terminal = false; - region_index = 0; - fake_count = INT16_MAX; - for (index = x - pitch - pitch_error; index <= x - pitch + pitch_error; - index++) { - if (index >= array_origin) { - segpt = &cutpts[index - array_origin]; - dist = x - segpt->xpos; - if (!segpt->terminal && segpt->fake_count < INT16_MAX) { - balance_count = 0; - if (textord_balance_factor > 0) { - if (textord_fast_pitch_test) { - lead_flag = back_balance ^ segpt->fwd_balance; - balance_count = 0; - while (lead_flag != 0) { - balance_count++; - lead_flag &= lead_flag - 1; - } - } - else { - for (balance_index = 0; - index + balance_index < x - balance_index; - balance_index++) - balance_count += - (projection->pile_count (index + balance_index) <= - zero_count) ^ (projection->pile_count (x - - balance_index) - <= zero_count); - } - balance_count = - (int16_t) (balance_count * textord_balance_factor / - projection_scale); - } - r_index = segpt->region_index + 1; - total = segpt->mean_sum + dist; - balance_count += offset; - sq_dist = - dist * dist + segpt->sq_sum + balance_count * balance_count; - mean = total / r_index; - factor = mean - pitch; - factor *= factor; - factor += sq_dist / (r_index) - mean * mean; - if (factor < cost && segpt->fake_count + faked <= fake_count) { - cost = factor; //find least cost - pred = segpt; //save path - mean_sum = total; - sq_sum = sq_dist; - fake_count = segpt->fake_count + faked; - mid_cuts = segpt->mid_cuts + mid_cut; - region_index = r_index; - } - } - } - } -} - - -/********************************************************************** - * FPCUTPT::assign_cheap - * - * Constructor to make a new FPCUTPT on the cheap. - **********************************************************************/ - -void FPCUTPT::assign_cheap( //constructor - FPCUTPT *cutpts, //predecessors - int16_t array_origin, //start coord - int16_t x, //position - BOOL8 faking, //faking this one - BOOL8 mid_cut, //cheap cut. - int16_t offset, //dist to gap - STATS *projection, //vertical occupation - float projection_scale, //scaling - int16_t zero_count, //official zero - int16_t pitch, //proposed pitch - int16_t pitch_error //allowed tolerance - ) { - int index; //test index - int16_t balance_count; //ding factor - int16_t r_index; //test cut number - FPCUTPT *segpt; //segment point - int32_t dist; //from prev segment - double sq_dist; //squared distance - double mean; //mean pitch - double total; //total dists - double factor; //cost function - //half of pitch - int16_t half_pitch = pitch / 2 - 1; - uint32_t lead_flag; //new flag - - if (half_pitch > 31) - half_pitch = 31; - else if (half_pitch < 0) - half_pitch = 0; - lead_flag = 1 << half_pitch; - - back_balance = cutpts[x - 1 - array_origin].back_balance << 1; - back_balance &= lead_flag + lead_flag - 1; - if (projection->pile_count (x) > zero_count) - back_balance |= 1; - fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1; - if (projection->pile_count (x + half_pitch) > zero_count) - fwd_balance |= lead_flag; - - xpos = x; - cost = FLT_MAX; - pred = nullptr; - faked = faking; - terminal = false; - region_index = 0; - fake_count = INT16_MAX; - index = x - pitch; - if (index >= array_origin) { - segpt = &cutpts[index - array_origin]; - dist = x - segpt->xpos; - if (!segpt->terminal && segpt->fake_count < INT16_MAX) { - balance_count = 0; - if (textord_balance_factor > 0) { - lead_flag = back_balance ^ segpt->fwd_balance; - balance_count = 0; - while (lead_flag != 0) { - balance_count++; - lead_flag &= lead_flag - 1; - } - balance_count = (int16_t) (balance_count * textord_balance_factor - / projection_scale); - } - r_index = segpt->region_index + 1; - total = segpt->mean_sum + dist; - balance_count += offset; - sq_dist = - dist * dist + segpt->sq_sum + balance_count * balance_count; - mean = total / r_index; - factor = mean - pitch; - factor *= factor; - factor += sq_dist / (r_index) - mean * mean; - cost = factor; //find least cost - pred = segpt; //save path - mean_sum = total; - sq_sum = sq_dist; - fake_count = segpt->fake_count + faked; - mid_cuts = segpt->mid_cuts + mid_cut; - region_index = r_index; - } - } -} - - -/********************************************************************** - * check_pitch_sync - * - * Construct the lattice of possible segmentation points and choose the - * optimal path. Return the optimal path only. - * The return value is a measure of goodness of the sync. - **********************************************************************/ - -double check_pitch_sync2( //find segmentation - BLOBNBOX_IT *blob_it, //blobs to do - int16_t blob_count, //no of blobs - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - STATS *projection, //vertical - int16_t projection_left, //edges //scale factor - int16_t projection_right, - float projection_scale, - int16_t &occupation_count, //no of occupied cells - FPSEGPT_LIST *seg_list, //output list - int16_t start, //start of good range - int16_t end //end of good range - ) { - bool faking; //illegal cut pt - bool mid_cut; //cheap cut pt. - int16_t x; //current coord - int16_t blob_index; //blob number - int16_t left_edge; //of word - int16_t right_edge; //of word - int16_t array_origin; //x coord of array - int16_t offset; //dist to legal area - int16_t zero_count; //projection zero - int16_t best_left_x = 0; //for equals - int16_t best_right_x = 0; //right edge - TBOX this_box; //bounding box - TBOX next_box; //box of next blob - FPSEGPT *segpt; //segment point - double best_cost; //best path - double mean_sum; //computes result - FPCUTPT *best_end; //end of best path - int16_t best_fake; //best fake level - int16_t best_count; //no of cuts - BLOBNBOX_IT this_it; //copy iterator - FPSEGPT_IT seg_it = seg_list; //output iterator - - // tprintf("Computing sync on word of %d blobs with pitch %d\n", - // blob_count, pitch); - // if (blob_count==8 && pitch==27) - // projection->print(stdout,TRUE); - zero_count = 0; - if (pitch < 3) - pitch = 3; //nothing ludicrous - if ((pitch - 3) / 2 < pitch_error) - pitch_error = (pitch - 3) / 2; - this_it = *blob_it; - this_box = box_next (&this_it);//get box - // left_edge=this_box.left(); //left of word - // right_edge=this_box.right(); - // for (blob_index=1;blob_indexright_edge) - // right_edge=this_box.right(); - // } - for (left_edge = projection_left; projection->pile_count (left_edge) == 0 - && left_edge < projection_right; left_edge++); - for (right_edge = projection_right; projection->pile_count (right_edge) == 0 - && right_edge > left_edge; right_edge--); - ASSERT_HOST (right_edge >= left_edge); - if (pitsync_linear_version >= 4) - return check_pitch_sync3 (projection_left, projection_right, zero_count, - pitch, pitch_error, projection, - projection_scale, occupation_count, seg_list, - start, end); - array_origin = left_edge - pitch; - // array of points - std::vector cutpts(right_edge - left_edge + pitch * 2 + 1); - for (x = array_origin; x < left_edge; x++) - //free cuts - cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, - zero_count, pitch, x, 0); - for (offset = 0; offset <= pitch_error; offset++, x++) - //not quite free - cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, - zero_count, pitch, x, offset); - - this_it = *blob_it; - best_cost = FLT_MAX; - best_end = nullptr; - this_box = box_next (&this_it);//first box - next_box = box_next (&this_it);//second box - blob_index = 1; - while (x < right_edge - pitch_error) { - if (x > this_box.right () + pitch_error && blob_index < blob_count) { - this_box = next_box; - next_box = box_next (&this_it); - blob_index++; - } - faking = false; - mid_cut = false; - if (x <= this_box.left ()) - offset = 0; - else if (x <= this_box.left () + pitch_error) - offset = x - this_box.left (); - else if (x >= this_box.right ()) - offset = 0; - else if (x >= next_box.left () && blob_index < blob_count) { - offset = x - next_box.left (); - if (this_box.right () - x < offset) - offset = this_box.right () - x; - } - else if (x >= this_box.right () - pitch_error) - offset = this_box.right () - x; - else if (x - this_box.left () > pitch * pitsync_joined_edge - && this_box.right () - x > pitch * pitsync_joined_edge) { - mid_cut = true; - offset = 0; - } - else { - faking = true; - offset = projection->pile_count (x); - } - cutpts[x - array_origin].assign (&cutpts[0], array_origin, x, - faking, mid_cut, offset, projection, - projection_scale, zero_count, pitch, - pitch_error); - x++; - } - - best_fake = INT16_MAX; - best_cost = INT32_MAX; - best_count = INT16_MAX; - while (x < right_edge + pitch) { - offset = x < right_edge ? right_edge - x : 0; - cutpts[x - array_origin].assign (&cutpts[0], array_origin, x, - false, false, offset, projection, - projection_scale, zero_count, pitch, - pitch_error); - cutpts[x - array_origin].terminal = true; - if (cutpts[x - array_origin].index () + - cutpts[x - array_origin].fake_count <= best_count + best_fake) { - if (cutpts[x - array_origin].fake_count < best_fake - || (cutpts[x - array_origin].fake_count == best_fake - && cutpts[x - array_origin].cost_function () < best_cost)) { - best_fake = cutpts[x - array_origin].fake_count; - best_cost = cutpts[x - array_origin].cost_function (); - best_left_x = x; - best_right_x = x; - best_count = cutpts[x - array_origin].index (); - } - else if (cutpts[x - array_origin].fake_count == best_fake - && x == best_right_x + 1 - && cutpts[x - array_origin].cost_function () == best_cost) { - //exactly equal - best_right_x = x; - } - } - x++; - } - ASSERT_HOST (best_fake < INT16_MAX); - - best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin]; - if (this_box.right () == textord_test_x - && this_box.top () == textord_test_y) { - for (x = left_edge - pitch; x < right_edge + pitch; x++) { - tprintf ("x=%d, C=%g, s=%g, sq=%g, prev=%d\n", - x, cutpts[x - array_origin].cost_function (), - cutpts[x - array_origin].sum (), - cutpts[x - array_origin].squares (), - cutpts[x - array_origin].previous ()->position ()); - } - } - occupation_count = -1; - do { - for (x = best_end->position () - pitch + pitch_error; - x < best_end->position () - pitch_error - && projection->pile_count (x) == 0; x++); - if (x < best_end->position () - pitch_error) - occupation_count++; - //copy it - segpt = new FPSEGPT (best_end); - seg_it.add_before_then_move (segpt); - best_end = best_end->previous (); - } - while (best_end != nullptr); - seg_it.move_to_last (); - mean_sum = seg_it.data ()->sum (); - mean_sum = mean_sum * mean_sum / best_count; - if (seg_it.data ()->squares () - mean_sum < 0) - tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n", - seg_it.data ()->squares (), seg_it.data ()->sum (), best_count); - // tprintf("blob_count=%d, pitch=%d, sync=%g, occ=%d\n", - // blob_count,pitch,seg_it.data()->squares()-mean_sum, - // occupation_count); - return seg_it.data ()->squares () - mean_sum; -} - - -/********************************************************************** - * check_pitch_sync - * - * Construct the lattice of possible segmentation points and choose the - * optimal path. Return the optimal path only. - * The return value is a measure of goodness of the sync. - **********************************************************************/ - -double check_pitch_sync3( //find segmentation - int16_t projection_left, //edges //to be considered 0 - int16_t projection_right, - int16_t zero_count, - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - STATS *projection, //vertical - float projection_scale, //scale factor - int16_t &occupation_count, //no of occupied cells - FPSEGPT_LIST *seg_list, //output list - int16_t start, //start of good range - int16_t end //end of good range - ) { - bool faking; //illegal cut pt - bool mid_cut; //cheap cut pt. - int16_t left_edge; //of word - int16_t right_edge; //of word - int16_t x; //current coord - int16_t array_origin; //x coord of array - int16_t offset; //dist to legal area - int16_t projection_offset; //from scaled projection - int16_t prev_zero; //previous zero dist - int16_t next_zero; //next zero dist - int16_t zero_offset; //scan window - int16_t best_left_x = 0; //for equals - int16_t best_right_x = 0; //right edge - FPSEGPT *segpt; //segment point - int minindex; //next input position - int test_index; //index to mins - double best_cost; //best path - double mean_sum; //computes result - FPCUTPT *best_end; //end of best path - int16_t best_fake; //best fake level - int16_t best_count; //no of cuts - FPSEGPT_IT seg_it = seg_list; //output iterator - - end = (end - start) % pitch; - if (pitch < 3) - pitch = 3; //nothing ludicrous - if ((pitch - 3) / 2 < pitch_error) - pitch_error = (pitch - 3) / 2; - //min dist of zero - zero_offset = (int16_t) (pitch * pitsync_joined_edge); - for (left_edge = projection_left; projection->pile_count (left_edge) == 0 - && left_edge < projection_right; left_edge++); - for (right_edge = projection_right; projection->pile_count (right_edge) == 0 - && right_edge > left_edge; right_edge--); - array_origin = left_edge - pitch; - // array of points - std::vector cutpts(right_edge - left_edge + pitch * 2 + 1); - // local min results - std::vector mins(pitch_error * 2 + 1); - for (x = array_origin; x < left_edge; x++) - //free cuts - cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, - zero_count, pitch, x, 0); - prev_zero = left_edge - 1; - for (offset = 0; offset <= pitch_error; offset++, x++) - //not quite free - cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, - zero_count, pitch, x, offset); - - best_cost = FLT_MAX; - best_end = nullptr; - for (offset = -pitch_error, minindex = 0; offset < pitch_error; - offset++, minindex++) - mins[minindex] = projection->local_min (x + offset); - next_zero = x + zero_offset + 1; - for (offset = next_zero - 1; offset >= x; offset--) { - if (projection->pile_count (offset) <= zero_count) { - next_zero = offset; - break; - } - } - while (x < right_edge - pitch_error) { - mins[minindex] = projection->local_min (x + pitch_error); - minindex++; - if (minindex > pitch_error * 2) - minindex = 0; - faking = false; - mid_cut = false; - offset = 0; - if (projection->pile_count (x) <= zero_count) { - prev_zero = x; - } - else { - for (offset = 1; offset <= pitch_error; offset++) - if (projection->pile_count (x + offset) <= zero_count - || projection->pile_count (x - offset) <= zero_count) - break; - } - if (offset > pitch_error) { - if (x - prev_zero > zero_offset && next_zero - x > zero_offset) { - for (offset = 0; offset <= pitch_error; offset++) { - test_index = minindex + pitch_error + offset; - if (test_index > pitch_error * 2) - test_index -= pitch_error * 2 + 1; - if (mins[test_index]) - break; - test_index = minindex + pitch_error - offset; - if (test_index > pitch_error * 2) - test_index -= pitch_error * 2 + 1; - if (mins[test_index]) - break; - } - } - if (offset > pitch_error) { - offset = projection->pile_count (x); - faking = true; - } - else { - projection_offset = - (int16_t) (projection->pile_count (x) / projection_scale); - if (projection_offset > offset) - offset = projection_offset; - mid_cut = true; - } - } - if ((start == 0 && end == 0) - || !textord_fast_pitch_test - || (x - projection_left - start) % pitch <= end) - cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, - faking, mid_cut, offset, projection, - projection_scale, zero_count, pitch, - pitch_error); - else - cutpts[x - array_origin].assign_cheap(&cutpts[0], array_origin, x, - faking, mid_cut, offset, - projection, projection_scale, - zero_count, pitch, - pitch_error); - x++; - if (next_zero < x || next_zero == x + zero_offset) - next_zero = x + zero_offset + 1; - if (projection->pile_count (x + zero_offset) <= zero_count) - next_zero = x + zero_offset; - } - - best_fake = INT16_MAX; - best_cost = INT32_MAX; - best_count = INT16_MAX; - while (x < right_edge + pitch) { - offset = x < right_edge ? right_edge - x : 0; - cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, - false, false, offset, projection, - projection_scale, zero_count, pitch, - pitch_error); - cutpts[x - array_origin].terminal = true; - if (cutpts[x - array_origin].index () + - cutpts[x - array_origin].fake_count <= best_count + best_fake) { - if (cutpts[x - array_origin].fake_count < best_fake - || (cutpts[x - array_origin].fake_count == best_fake - && cutpts[x - array_origin].cost_function () < best_cost)) { - best_fake = cutpts[x - array_origin].fake_count; - best_cost = cutpts[x - array_origin].cost_function (); - best_left_x = x; - best_right_x = x; - best_count = cutpts[x - array_origin].index (); - } - else if (cutpts[x - array_origin].fake_count == best_fake - && x == best_right_x + 1 - && cutpts[x - array_origin].cost_function () == best_cost) { - //exactly equal - best_right_x = x; - } - } - x++; - } - ASSERT_HOST (best_fake < INT16_MAX); - - best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin]; - // for (x=left_edge-pitch;xposition()); - // } - occupation_count = -1; - do { - for (x = best_end->position () - pitch + pitch_error; - x < best_end->position () - pitch_error - && projection->pile_count (x) == 0; x++); - if (x < best_end->position () - pitch_error) - occupation_count++; - //copy it - segpt = new FPSEGPT (best_end); - seg_it.add_before_then_move (segpt); - best_end = best_end->previous (); - } - while (best_end != nullptr); - seg_it.move_to_last (); - mean_sum = seg_it.data ()->sum (); - mean_sum = mean_sum * mean_sum / best_count; - if (seg_it.data ()->squares () - mean_sum < 0) - tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n", - seg_it.data ()->squares (), seg_it.data ()->sum (), best_count); - return seg_it.data ()->squares () - mean_sum; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/pithsync.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/pithsync.h deleted file mode 100644 index c03838b6..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/pithsync.h +++ /dev/null @@ -1,132 +0,0 @@ -/********************************************************************** - * File: pithsync.h (Formerly pitsync2.h) - * Description: Code to find the optimum fixed pitch segmentation of some blobs. - * Author: Ray Smith - * Created: Thu Nov 19 11:48:05 GMT 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef PITHSYNC_H -#define PITHSYNC_H - -#include "blobbox.h" -#include "params.h" -#include "statistc.h" - -class FPSEGPT_LIST; - -class FPCUTPT -{ - public: - FPCUTPT() = default; - void setup ( //start of cut - FPCUTPT cutpts[], //predecessors - int16_t array_origin, //start coord - STATS * projection, //occupation - int16_t zero_count, //official zero - int16_t pitch, //proposed pitch - int16_t x, //position - int16_t offset); //dist to gap - - void assign( //evaluate cut - FPCUTPT cutpts[], //predecessors - int16_t array_origin, //start coord - int16_t x, //position - bool faking, //faking this one - bool mid_cut, //doing free cut - int16_t offset, //extra cost dist - STATS* projection, //occupation - float projection_scale, //scaling - int16_t zero_count, //official zero - int16_t pitch, //proposed pitch - int16_t pitch_error); //allowed tolerance - - void assign_cheap ( //evaluate cut - FPCUTPT cutpts[], //predecessors - int16_t array_origin, //start coord - int16_t x, //position - BOOL8 faking, //faking this one - BOOL8 mid_cut, //doing free cut - int16_t offset, //extra cost dist - STATS * projection, //occupation - float projection_scale, //scaling - int16_t zero_count, //official zero - int16_t pitch, //proposed pitch - int16_t pitch_error); //allowed tolerance - - int32_t position() { // access func - return xpos; - } - double cost_function() { - return cost; - } - double squares() { - return sq_sum; - } - double sum() { - return mean_sum; - } - FPCUTPT *previous() { - return pred; - } - int16_t cheap_cuts() const { //no of mi cuts - return mid_cuts; - } - int16_t index() const { - return region_index; - } - - bool faked; //faked split point - bool terminal; //successful end - int16_t fake_count; //total fakes to here - - private: - int16_t region_index; //cut serial number - int16_t mid_cuts; //no of cheap cuts - int32_t xpos; //location - uint32_t back_balance; //proj backwards - uint32_t fwd_balance; //proj forwards - FPCUTPT *pred; //optimal previous - double mean_sum; //mean so far - double sq_sum; //summed distsances - double cost; //cost function -}; -double check_pitch_sync2( //find segmentation - BLOBNBOX_IT *blob_it, //blobs to do - int16_t blob_count, //no of blobs - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - STATS *projection, //vertical - int16_t projection_left, //edges //scale factor - int16_t projection_right, - float projection_scale, - int16_t &occupation_count, //no of occupied cells - FPSEGPT_LIST *seg_list, //output list - int16_t start, //start of good range - int16_t end //end of good range - ); -double check_pitch_sync3( //find segmentation - int16_t projection_left, //edges //to be considered 0 - int16_t projection_right, - int16_t zero_count, - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - STATS *projection, //vertical - float projection_scale, //scale factor - int16_t &occupation_count, //no of occupied cells - FPSEGPT_LIST *seg_list, //output list - int16_t start, //start of good range - int16_t end //end of good range - ); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/pitsync1.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/pitsync1.cpp deleted file mode 100644 index 870f1d75..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/pitsync1.cpp +++ /dev/null @@ -1,418 +0,0 @@ -/********************************************************************** - * File: pitsync1.cpp (Formerly pitsync.c) - * Description: Code to find the optimum fixed pitch segmentation of some blobs. - * Author: Ray Smith - * Created: Thu Nov 19 11:48:05 GMT 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include // for FLT_MAX -#include -#include "pitsync1.h" - -ELISTIZE (FPSEGPT) CLISTIZE (FPSEGPT_LIST) - -INT_VAR(pitsync_linear_version, 6, "Use new fast algorithm"); -double_VAR(pitsync_joined_edge, 0.75, "Dist inside big blob for chopping"); -double_VAR(pitsync_offset_freecut_fraction, 0.25, - "Fraction of cut for free cuts"); -INT_VAR(pitsync_fake_depth, 1, "Max advance fake generation"); - -/********************************************************************** - * FPSEGPT::FPSEGPT - * - * Constructor to make a new FPSEGPT. - * The existing FPCUTPT is duplicated. - **********************************************************************/ - -FPSEGPT::FPSEGPT( //constructor - FPCUTPT *cutpt //create from new form - ) { - pred = nullptr; - mean_sum = cutpt->sum (); - sq_sum = cutpt->squares (); - cost = cutpt->cost_function (); - faked = cutpt->faked; - terminal = cutpt->terminal; - fake_count = cutpt->fake_count; - xpos = cutpt->position (); - mid_cuts = cutpt->cheap_cuts (); -} - - -/********************************************************************** - * FPSEGPT::FPSEGPT - * - * Constructor to make a new FPSEGPT. - **********************************************************************/ - -FPSEGPT::FPSEGPT ( //constructor -int16_t x //position -):xpos (x) { - pred = nullptr; - mean_sum = 0; - sq_sum = 0; - cost = 0; - faked = FALSE; - terminal = FALSE; - fake_count = 0; - mid_cuts = 0; -} - - -/********************************************************************** - * FPSEGPT::FPSEGPT - * - * Constructor to make a new FPSEGPT. - **********************************************************************/ - -FPSEGPT::FPSEGPT ( //constructor -int16_t x, //position -BOOL8 faking, //faking this one -int16_t offset, //dist to gap -int16_t region_index, //segment number -int16_t pitch, //proposed pitch -int16_t pitch_error, //allowed tolerance -FPSEGPT_LIST * prev_list //previous segment -) -: fake_count(0), - xpos(x), - mean_sum(0.0), - sq_sum(0.0) -{ - int16_t best_fake; //on previous - FPSEGPT *segpt; //segment point - int32_t dist; //from prev segment - double sq_dist; //squared distance - double mean; //mean pitch - double total; //total dists - double factor; //cost function - FPSEGPT_IT pred_it = prev_list;//for previuos segment - - cost = FLT_MAX; - pred = nullptr; - faked = faking; - terminal = FALSE; - best_fake = INT16_MAX; - mid_cuts = 0; - for (pred_it.mark_cycle_pt (); !pred_it.cycled_list (); pred_it.forward ()) { - segpt = pred_it.data (); - if (segpt->fake_count < best_fake) - best_fake = segpt->fake_count; - dist = x - segpt->xpos; - if (dist >= pitch - pitch_error && dist <= pitch + pitch_error - && !segpt->terminal) { - total = segpt->mean_sum + dist; - sq_dist = dist * dist + segpt->sq_sum + offset * offset; - //sum of squarees - mean = total / region_index; - factor = mean - pitch; - factor *= factor; - factor += sq_dist / (region_index) - mean * mean; - if (factor < cost) { - cost = factor; //find least cost - pred = segpt; //save path - mean_sum = total; - sq_sum = sq_dist; - fake_count = segpt->fake_count + faked; - } - } - } - if (fake_count > best_fake + 1) - pred = nullptr; //fail it -} - -/********************************************************************** - * check_pitch_sync - * - * Construct the lattice of possible segmentation points and choose the - * optimal path. Return the optimal path only. - * The return value is a measure of goodness of the sync. - **********************************************************************/ - -double check_pitch_sync( //find segmentation - BLOBNBOX_IT *blob_it, //blobs to do - int16_t blob_count, //no of blobs - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - STATS *projection, //vertical - FPSEGPT_LIST *seg_list //output list - ) { - int16_t x; //current coord - int16_t min_index; //blob number - int16_t max_index; //blob number - int16_t left_edge; //of word - int16_t right_edge; //of word - int16_t right_max; //max allowed x - int16_t min_x; //in this region - int16_t max_x; - int16_t region_index; - int16_t best_region_index = 0; //for best result - int16_t offset; //dist to legal area - int16_t left_best_x; //edge of good region - int16_t right_best_x; //right edge - TBOX min_box; //bounding box - TBOX max_box; //bounding box - TBOX next_box; //box of next blob - FPSEGPT *segpt; //segment point - FPSEGPT_LIST *segpts; //points in a segment - double best_cost; //best path - double mean_sum; //computes result - FPSEGPT *best_end; //end of best path - BLOBNBOX_IT min_it; //copy iterator - BLOBNBOX_IT max_it; //copy iterator - FPSEGPT_IT segpt_it; //iterator - //output segments - FPSEGPT_IT outseg_it = seg_list; - FPSEGPT_LIST_CLIST lattice; //list of lists - //region iterator - FPSEGPT_LIST_C_IT lattice_it = &lattice; - - // tprintf("Computing sync on word of %d blobs with pitch %d\n", - // blob_count, pitch); - // if (blob_count==8 && pitch==27) - // projection->print(stdout,TRUE); - if (pitch < 3) - pitch = 3; //nothing ludicrous - if ((pitch - 3) / 2 < pitch_error) - pitch_error = (pitch - 3) / 2; - min_it = *blob_it; - min_box = box_next (&min_it); //get box - // if (blob_count==8 && pitch==27) - // tprintf("1st box at (%d,%d)->(%d,%d)\n", - // min_box.left(),min_box.bottom(), - // min_box.right(),min_box.top()); - //left of word - left_edge = min_box.left () + pitch_error; - for (min_index = 1; min_index < blob_count; min_index++) { - min_box = box_next (&min_it); - // if (blob_count==8 && pitch==27) - // tprintf("Box at (%d,%d)->(%d,%d)\n", - // min_box.left(),min_box.bottom(), - // min_box.right(),min_box.top()); - } - right_edge = min_box.right (); //end of word - max_x = left_edge; - //min permissible - min_x = max_x - pitch + pitch_error * 2 + 1; - right_max = right_edge + pitch - pitch_error - 1; - segpts = new FPSEGPT_LIST; //list of points - segpt_it.set_to_list (segpts); - for (x = min_x; x <= max_x; x++) { - segpt = new FPSEGPT (x); //make a new one - //put in list - segpt_it.add_after_then_move (segpt); - } - //first segment - lattice_it.add_before_then_move (segpts); - min_index = 0; - region_index = 1; - best_cost = FLT_MAX; - best_end = nullptr; - min_it = *blob_it; - min_box = box_next (&min_it); //first box - do { - left_best_x = -1; - right_best_x = -1; - segpts = new FPSEGPT_LIST; //list of points - segpt_it.set_to_list (segpts); - min_x += pitch - pitch_error;//next limits - max_x += pitch + pitch_error; - while (min_box.right () < min_x && min_index < blob_count) { - min_index++; - min_box = box_next (&min_it); - } - max_it = min_it; - max_index = min_index; - max_box = min_box; - next_box = box_next (&max_it); - for (x = min_x; x <= max_x && x <= right_max; x++) { - while (x < right_edge && max_index < blob_count - && x > max_box.right ()) { - max_index++; - max_box = next_box; - next_box = box_next (&max_it); - } - if (x <= max_box.left () + pitch_error - || x >= max_box.right () - pitch_error || x >= right_edge - || (max_index < blob_count - 1 && x >= next_box.left ()) - || (x - max_box.left () > pitch * pitsync_joined_edge - && max_box.right () - x > pitch * pitsync_joined_edge)) { - // || projection->local_min(x)) - if (x - max_box.left () > 0 - && x - max_box.left () <= pitch_error) - //dist to real break - offset = x - max_box.left (); - else if (max_box.right () - x > 0 - && max_box.right () - x <= pitch_error - && (max_index >= blob_count - 1 - || x < next_box.left ())) - offset = max_box.right () - x; - else - offset = 0; - // offset=pitsync_offset_freecut_fraction*projection->pile_count(x); - segpt = new FPSEGPT (x, FALSE, offset, region_index, - pitch, pitch_error, lattice_it.data ()); - } - else { - offset = projection->pile_count (x); - segpt = new FPSEGPT (x, TRUE, offset, region_index, - pitch, pitch_error, lattice_it.data ()); - } - if (segpt->previous () != nullptr) { - segpt_it.add_after_then_move (segpt); - if (x >= right_edge - pitch_error) { - segpt->terminal = TRUE;//no more wanted - if (segpt->cost_function () < best_cost) { - best_cost = segpt->cost_function (); - //find least - best_end = segpt; - best_region_index = region_index; - left_best_x = x; - right_best_x = x; - } - else if (segpt->cost_function () == best_cost - && right_best_x == x - 1) - right_best_x = x; - } - } - else { - delete segpt; //no good - } - } - if (segpts->empty ()) { - if (best_end != nullptr) - break; //already found one - make_illegal_segment (lattice_it.data (), min_box, min_it, - region_index, pitch, pitch_error, segpts); - } - else { - if (right_best_x > left_best_x + 1) { - left_best_x = (left_best_x + right_best_x + 1) / 2; - for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list () - && segpt_it.data ()->position () != left_best_x; - segpt_it.forward ()); - if (segpt_it.data ()->position () == left_best_x) - //middle of region - best_end = segpt_it.data (); - } - } - //new segment - lattice_it.add_before_then_move (segpts); - region_index++; - } - while (min_x < right_edge); - ASSERT_HOST (best_end != nullptr);//must always find some - - for (lattice_it.mark_cycle_pt (); !lattice_it.cycled_list (); - lattice_it.forward ()) { - segpts = lattice_it.data (); - segpt_it.set_to_list (segpts); - // if (blob_count==8 && pitch==27) - // { - // for (segpt_it.mark_cycle_pt();!segpt_it.cycled_list();segpt_it.forward()) - // { - // segpt=segpt_it.data(); - // tprintf("At %d, (%x) cost=%g, m=%g, sq=%g, pred=%x\n", - // segpt->position(),segpt,segpt->cost_function(), - // segpt->sum(),segpt->squares(),segpt->previous()); - // } - // tprintf("\n"); - // } - for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list () - && segpt_it.data () != best_end; segpt_it.forward ()); - if (segpt_it.data () == best_end) { - //save good one - segpt = segpt_it.extract (); - outseg_it.add_before_then_move (segpt); - best_end = segpt->previous (); - } - } - ASSERT_HOST (best_end == nullptr); - ASSERT_HOST (!outseg_it.empty ()); - outseg_it.move_to_last (); - mean_sum = outseg_it.data ()->sum (); - mean_sum = mean_sum * mean_sum / best_region_index; - if (outseg_it.data ()->squares () - mean_sum < 0) - tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n", - outseg_it.data ()->squares (), outseg_it.data ()->sum (), - best_region_index); - lattice.deep_clear (); //shift the lot - return outseg_it.data ()->squares () - mean_sum; -} - - -/********************************************************************** - * make_illegal_segment - * - * Make a fake set of chop points due to having no legal places. - **********************************************************************/ - -void make_illegal_segment( //find segmentation - FPSEGPT_LIST *prev_list, //previous segments - TBOX blob_box, //bounding box - BLOBNBOX_IT blob_it, //iterator - int16_t region_index, //number of segment - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - FPSEGPT_LIST *seg_list //output list - ) { - int16_t x; //current coord - int16_t min_x = 0; //in this region - int16_t max_x = 0; - int16_t offset; //dist to edge - FPSEGPT *segpt; //segment point - FPSEGPT *prevpt; //previous point - float best_cost; //best path - FPSEGPT_IT segpt_it = seg_list;//iterator - //previous points - FPSEGPT_IT prevpt_it = prev_list; - - best_cost = FLT_MAX; - for (prevpt_it.mark_cycle_pt (); !prevpt_it.cycled_list (); - prevpt_it.forward ()) { - prevpt = prevpt_it.data (); - if (prevpt->cost_function () < best_cost) { - //find least - best_cost = prevpt->cost_function (); - min_x = prevpt->position (); - max_x = min_x; //limits on coords - } - else if (prevpt->cost_function () == best_cost) { - max_x = prevpt->position (); - } - } - min_x += pitch - pitch_error; - max_x += pitch + pitch_error; - for (x = min_x; x <= max_x; x++) { - while (x > blob_box.right ()) { - blob_box = box_next (&blob_it); - } - offset = x - blob_box.left (); - if (blob_box.right () - x < offset) - offset = blob_box.right () - x; - segpt = new FPSEGPT (x, FALSE, offset, - region_index, pitch, pitch_error, prev_list); - if (segpt->previous () != nullptr) { - ASSERT_HOST (offset >= 0); - fprintf (stderr, "made fake at %d\n", x); - //make one up - segpt_it.add_after_then_move (segpt); - segpt->faked = TRUE; - segpt->fake_count++; - } - else - delete segpt; - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/pitsync1.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/pitsync1.h deleted file mode 100644 index a2b381db..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/pitsync1.h +++ /dev/null @@ -1,121 +0,0 @@ -/********************************************************************** - * File: pitsync1.h (Formerly pitsync.h) - * Description: Code to find the optimum fixed pitch segmentation of some blobs. - * Author: Ray Smith - * Created: Thu Nov 19 11:48:05 GMT 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef PITSYNC1_H -#define PITSYNC1_H - -#include "elst.h" -#include "clst.h" -#include "blobbox.h" -#include "params.h" -#include "statistc.h" -#include "pithsync.h" - -class FPSEGPT_LIST; - -class FPSEGPT:public ELIST_LINK -{ - public: - FPSEGPT() = default; - FPSEGPT( //constructor - int16_t x); //position - FPSEGPT( //constructor - int16_t x, //position - BOOL8 faking, //faking this one - int16_t offset, //extra cost dist - int16_t region_index, //segment number - int16_t pitch, //proposed pitch - int16_t pitch_error, //allowed tolerance - FPSEGPT_LIST *prev_list); //previous segment - FPSEGPT(FPCUTPT *cutpt); //build from new type - - int32_t position() { // access func - return xpos; - } - double cost_function() { - return cost; - } - double squares() { - return sq_sum; - } - double sum() { - return mean_sum; - } - FPSEGPT *previous() { - return pred; - } - int16_t cheap_cuts() const { //no of cheap cuts - return mid_cuts; - } - - //faked split point - BOOL8 faked; - BOOL8 terminal; //successful end - int16_t fake_count; //total fakes to here - - private: - int16_t mid_cuts; //no of cheap cuts - int32_t xpos; //location - FPSEGPT *pred; //optimal previous - double mean_sum; //mean so far - double sq_sum; //summed distsances - double cost; //cost function -}; - -ELISTIZEH (FPSEGPT) CLISTIZEH (FPSEGPT_LIST) -extern -INT_VAR_H (pitsync_linear_version, 0, "Use new fast algorithm"); -extern -double_VAR_H (pitsync_joined_edge, 0.75, -"Dist inside big blob for chopping"); -extern -double_VAR_H (pitsync_offset_freecut_fraction, 0.25, -"Fraction of cut for free cuts"); -extern -INT_VAR_H (pitsync_fake_depth, 1, "Max advance fake generation"); -double check_pitch_sync( //find segmentation - BLOBNBOX_IT *blob_it, //blobs to do - int16_t blob_count, //no of blobs - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - STATS *projection, //vertical - FPSEGPT_LIST *seg_list //output list - ); -void make_illegal_segment( //find segmentation - FPSEGPT_LIST *prev_list, //previous segments - TBOX blob_box, //bounding box - BLOBNBOX_IT blob_it, //iterator - int16_t region_index, //number of segment - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - FPSEGPT_LIST *seg_list //output list - ); -int16_t vertical_torow_projection( //project whole row - TO_ROW *row, //row to do - STATS *projection //output - ); -void vertical_cblob_projection( //project outlines - C_BLOB *blob, //blob to project - STATS *stats //output - ); -void vertical_coutline_projection( //project outlines - C_OUTLINE *outline, //outline to project - STATS *stats //output - ); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/scanedg.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/scanedg.cpp deleted file mode 100644 index 2e7e4ab8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/scanedg.cpp +++ /dev/null @@ -1,370 +0,0 @@ -/********************************************************************** - * File: scanedg.cpp (Formerly scanedge.c) - * Description: Raster scanning crack based edge extractor. - * Author: Ray Smith - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "scanedg.h" - -#include // std::unique_ptr - -#include "allheaders.h" -#include "edgloop.h" - -#define WHITE_PIX 1 /*thresholded colours */ -#define BLACK_PIX 0 -// Flips between WHITE_PIX and BLACK_PIX. -#define FLIP_COLOUR(pix) (1-(pix)) - -/********************************************************************** - * block_edges - * - * Extract edges from a PDBLK. - **********************************************************************/ - -void block_edges(Pix *t_pix, // thresholded image - PDBLK *block, // block in image - C_OUTLINE_IT* outline_it) { - ICOORD bleft; // bounding box - ICOORD tright; - BLOCK_LINE_IT line_it = block; // line iterator - - int width = pixGetWidth(t_pix); - int height = pixGetHeight(t_pix); - int wpl = pixGetWpl(t_pix); - // lines in progress - std::unique_ptr ptrline(new CRACKEDGE*[width + 1]); - CRACKEDGE *free_cracks = nullptr; - - block->bounding_box(bleft, tright); // block box - ASSERT_HOST(tright.x() <= width); - ASSERT_HOST(tright.y() <= height); - int block_width = tright.x() - bleft.x(); - for (int x = block_width; x >= 0; x--) - ptrline[x] = nullptr; // no lines in progress - - std::unique_ptr bwline(new uint8_t[width]); - - uint8_t margin = WHITE_PIX; - - for (int y = tright.y() - 1; y >= bleft.y() - 1; y--) { - if (y >= bleft.y() && y < tright.y()) { - // Get the binary pixels from the image. - l_uint32* line = pixGetData(t_pix) + wpl * (height - 1 - y); - for (int x = 0; x < block_width; ++x) { - bwline[x] = GET_DATA_BIT(line, x + bleft.x()) ^ 1; - } - make_margins(block, &line_it, bwline.get(), margin, bleft.x(), tright.x(), y); - } else { - memset(bwline.get(), margin, block_width * sizeof(bwline[0])); - } - line_edges(bleft.x(), y, block_width, - margin, bwline.get(), ptrline.get(), &free_cracks, outline_it); - } - - free_crackedges(free_cracks); // really free them -} - - -/********************************************************************** - * make_margins - * - * Get an image line and set to margin non-text pixels. - **********************************************************************/ - -void make_margins( //get a line - PDBLK *block, //block in image - BLOCK_LINE_IT *line_it, //for old style - uint8_t *pixels, //pixels to strip - uint8_t margin, //white-out pixel - int16_t left, //block edges - int16_t right, - int16_t y //line coord - ) { - ICOORDELT_IT seg_it; - int32_t start; //of segment - int16_t xext; //of segment - int xindex; //index to pixel - - if (block->poly_block () != nullptr) { - std::unique_ptr lines(new PB_LINE_IT (block->poly_block ())); - const std::unique_ptr segments( - lines->get_line(y)); - if (!segments->empty ()) { - seg_it.set_to_list(segments.get()); - seg_it.mark_cycle_pt (); - start = seg_it.data ()->x (); - xext = seg_it.data ()->y (); - for (xindex = left; xindex < right; xindex++) { - if (xindex >= start && !seg_it.cycled_list ()) { - xindex = start + xext - 1; - seg_it.forward (); - start = seg_it.data ()->x (); - xext = seg_it.data ()->y (); - } - else - pixels[xindex - left] = margin; - } - } - else { - for (xindex = left; xindex < right; xindex++) - pixels[xindex - left] = margin; - } - } - else { - start = line_it->get_line (y, xext); - for (xindex = left; xindex < start; xindex++) - pixels[xindex - left] = margin; - for (xindex = start + xext; xindex < right; xindex++) - pixels[xindex - left] = margin; - } -} - -/********************************************************************** - * line_edges - * - * Scan a line for edges and update the edges in progress. - * When edges close into loops, send them for approximation. - **********************************************************************/ - -void line_edges(int16_t x, // coord of line start - int16_t y, // coord of line - int16_t xext, // width of line - uint8_t uppercolour, // start of prev line - uint8_t * bwpos, // thresholded line - CRACKEDGE ** prevline, // edges in progress - CRACKEDGE **free_cracks, - C_OUTLINE_IT* outline_it) { - CrackPos pos = {free_cracks, x, y }; - int xmax; // max x coord - int colour; // of current pixel - int prevcolour; // of previous pixel - CRACKEDGE *current; // current h edge - CRACKEDGE *newcurrent; // new h edge - - xmax = x + xext; // max allowable coord - prevcolour = uppercolour; // forced plain margin - current = nullptr; // nothing yet - - // do each pixel - for (; pos.x < xmax; pos.x++, prevline++) { - colour = *bwpos++; // current pixel - if (*prevline != nullptr) { - // changed above - // change colour - uppercolour = FLIP_COLOUR(uppercolour); - if (colour == prevcolour) { - if (colour == uppercolour) { - // finish a line - join_edges(current, *prevline, free_cracks, outline_it); - current = nullptr; // no edge now - } else { - // new horiz edge - current = h_edge(uppercolour - colour, *prevline, &pos); - } - *prevline = nullptr; // no change this time - } else { - if (colour == uppercolour) - *prevline = v_edge(colour - prevcolour, *prevline, &pos); - // 8 vs 4 connection - else if (colour == WHITE_PIX) { - join_edges(current, *prevline, free_cracks, outline_it); - current = h_edge(uppercolour - colour, nullptr, &pos); - *prevline = v_edge(colour - prevcolour, current, &pos); - } else { - newcurrent = h_edge(uppercolour - colour, *prevline, &pos); - *prevline = v_edge(colour - prevcolour, current, &pos); - current = newcurrent; // right going h edge - } - prevcolour = colour; // remember new colour - } - } else { - if (colour != prevcolour) { - *prevline = current = v_edge(colour - prevcolour, current, &pos); - prevcolour = colour; - } - if (colour != uppercolour) - current = h_edge(uppercolour - colour, current, &pos); - else - current = nullptr; // no edge now - } - } - if (current != nullptr) { - // out of block - if (*prevline != nullptr) { // got one to join to? - join_edges(current, *prevline, free_cracks, outline_it); - *prevline = nullptr; // tidy now - } else { - // fake vertical - *prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, current, &pos); - } - } else if (*prevline != nullptr) { - //continue fake - *prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, *prevline, &pos); - } -} - - -/********************************************************************** - * h_edge - * - * Create a new horizontal CRACKEDGE and join it to the given edge. - **********************************************************************/ - -CRACKEDGE *h_edge(int sign, // sign of edge - CRACKEDGE* join, // edge to join to - CrackPos* pos) { - CRACKEDGE *newpt; // return value - - if (*pos->free_cracks != nullptr) { - newpt = *pos->free_cracks; - *pos->free_cracks = newpt->next; // get one fast - } else { - newpt = new CRACKEDGE; - } - newpt->pos.set_y(pos->y + 1); // coords of pt - newpt->stepy = 0; // edge is horizontal - - if (sign > 0) { - newpt->pos.set_x(pos->x + 1); // start location - newpt->stepx = -1; - newpt->stepdir = 0; - } else { - newpt->pos.set_x(pos->x); // start location - newpt->stepx = 1; - newpt->stepdir = 2; - } - - if (join == nullptr) { - newpt->next = newpt; // ptrs to other ends - newpt->prev = newpt; - } else { - if (newpt->pos.x() + newpt->stepx == join->pos.x() - && newpt->pos.y() == join->pos.y()) { - newpt->prev = join->prev; // update other ends - newpt->prev->next = newpt; - newpt->next = join; // join up - join->prev = newpt; - } else { - newpt->next = join->next; // update other ends - newpt->next->prev = newpt; - newpt->prev = join; // join up - join->next = newpt; - } - } - return newpt; -} - - -/********************************************************************** - * v_edge - * - * Create a new vertical CRACKEDGE and join it to the given edge. - **********************************************************************/ - -CRACKEDGE *v_edge(int sign, // sign of edge - CRACKEDGE* join, - CrackPos* pos) { - CRACKEDGE *newpt; // return value - - if (*pos->free_cracks != nullptr) { - newpt = *pos->free_cracks; - *pos->free_cracks = newpt->next; // get one fast - } else { - newpt = new CRACKEDGE; - } - newpt->pos.set_x(pos->x); // coords of pt - newpt->stepx = 0; // edge is vertical - - if (sign > 0) { - newpt->pos.set_y(pos->y); // start location - newpt->stepy = 1; - newpt->stepdir = 3; - } else { - newpt->pos.set_y(pos->y + 1); // start location - newpt->stepy = -1; - newpt->stepdir = 1; - } - - if (join == nullptr) { - newpt->next = newpt; //ptrs to other ends - newpt->prev = newpt; - } else { - if (newpt->pos.x() == join->pos.x() - && newpt->pos.y() + newpt->stepy == join->pos.y()) { - newpt->prev = join->prev; // update other ends - newpt->prev->next = newpt; - newpt->next = join; // join up - join->prev = newpt; - } else { - newpt->next = join->next; // update other ends - newpt->next->prev = newpt; - newpt->prev = join; // join up - join->next = newpt; - } - } - return newpt; -} - - -/********************************************************************** - * join_edges - * - * Join 2 edges together. Send the outline for approximation when a - * closed loop is formed. - **********************************************************************/ - -void join_edges(CRACKEDGE *edge1, // edges to join - CRACKEDGE *edge2, // no specific order - CRACKEDGE **free_cracks, - C_OUTLINE_IT* outline_it) { - if (edge1->pos.x() + edge1->stepx != edge2->pos.x() - || edge1->pos.y() + edge1->stepy != edge2->pos.y()) { - CRACKEDGE *tempedge = edge1; - edge1 = edge2; // swap around - edge2 = tempedge; - } - - if (edge1->next == edge2) { - // already closed - complete_edge(edge1, outline_it); - // attach freelist to end - edge1->prev->next = *free_cracks; - *free_cracks = edge1; // and free list - } else { - // update opposite ends - edge2->prev->next = edge1->next; - edge1->next->prev = edge2->prev; - edge1->next = edge2; // make joins - edge2->prev = edge1; - } -} - - -/********************************************************************** - * free_crackedges - * - * Really free the CRACKEDGEs by giving them back to delete. - **********************************************************************/ - -void free_crackedges(CRACKEDGE *start) { - CRACKEDGE *current; // current edge to free - CRACKEDGE *next; // next one to free - - for (current = start; current != nullptr; current = next) { - next = current->next; - delete current; // delete them all - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/scanedg.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/scanedg.h deleted file mode 100644 index 0a8367c2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/scanedg.h +++ /dev/null @@ -1,67 +0,0 @@ -/********************************************************************** - * File: scanedg.h (Formerly scanedge.h) - * Description: Raster scanning crack based edge extractor. - * Author: Ray Smith - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef SCANEDG_H -#define SCANEDG_H - -#include "params.h" -#include "scrollview.h" -#include "pdblock.h" -#include "crakedge.h" - -class C_OUTLINE_IT; - -struct CrackPos { - CRACKEDGE** free_cracks; // Freelist for fast allocation. - int x; // Position of new edge. - int y; -}; - -struct Pix; - -void block_edges(Pix *t_image, // thresholded image - PDBLK *block, // block in image - C_OUTLINE_IT* outline_it); -void make_margins(PDBLK *block, // block in image - BLOCK_LINE_IT *line_it, // for old style - uint8_t *pixels, // pixels to strip - uint8_t margin, // white-out pixel - int16_t left, // block edges - int16_t right, - int16_t y); // line coord ); -void line_edges(int16_t x, // coord of line start - int16_t y, // coord of line - int16_t xext, // width of line - uint8_t uppercolour, // start of prev line - uint8_t * bwpos, // thresholded line - CRACKEDGE ** prevline, // edges in progress - CRACKEDGE **free_cracks, - C_OUTLINE_IT* outline_it); -CRACKEDGE *h_edge(int sign, // sign of edge - CRACKEDGE * join, // edge to join to - CrackPos* pos); -CRACKEDGE *v_edge(int sign, // sign of edge - CRACKEDGE * join, // edge to join to - CrackPos* pos); -void join_edges(CRACKEDGE *edge1, // edges to join - CRACKEDGE *edge2, // no specific order - CRACKEDGE **free_cracks, - C_OUTLINE_IT* outline_it); -void free_crackedges(CRACKEDGE *start); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/sortflts.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/sortflts.cpp deleted file mode 100644 index 72604d76..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/sortflts.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/********************************************************************** - * File: sortflts.cpp (Formerly sfloats.c) - * Description: Code to maintain a sorted list of floats. - * Author: Ray Smith - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "sortflts.h" - -ELISTIZE (SORTED_FLOAT) -/** - * @name SORTED_FLOATS::add - * - * Add a new entry to the sorted list of floats. - */ -void SORTED_FLOATS::add( //add new entry - float value, - int32_t key) { - SORTED_FLOAT *new_float = new SORTED_FLOAT (value, key); - - if (list.empty ()) - it.add_after_stay_put (new_float); - else { - it.move_to_first (); - while (!it.at_last () && it.data ()->entry < value) - it.forward (); - if (it.data ()->entry < value) - it.add_after_stay_put (new_float); - else - it.add_before_stay_put (new_float); - } -} - - -/** - * @name SORTED_FLOATS::remove - * - * Remove an entry from the sorted list of floats. - */ - -void SORTED_FLOATS::remove( //remove the entry - int32_t key) { - if (!list.empty ()) { - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - if (it.data ()->address == key) { - delete it.extract (); - return; - } - } - } -} - - -/** - * @name SORTED_FLOATS::operator[] - * - * Return the floating point value of the given index into the list. - */ - -float -SORTED_FLOATS::operator[] ( //get an entry -int32_t index //to list -) { - it.move_to_first (); - return it.data_relative (index)->entry; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/sortflts.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/sortflts.h deleted file mode 100644 index 882ab746..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/sortflts.h +++ /dev/null @@ -1,71 +0,0 @@ -/********************************************************************** - * File: sortflts.h (Formerly sfloats.h) - * Description: Code to maintain a sorted list of floats. - * Author: Ray Smith - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef SORTFLTS_H -#define SORTFLTS_H - -#include "elst.h" - -class SORTED_FLOAT:public ELIST_LINK -{ - friend class SORTED_FLOATS; - - public: - SORTED_FLOAT() = default; - SORTED_FLOAT( //create one - float value, //value of entry - int32_t key) { //reference - entry = value; - address = key; - } - private: - float entry; //value of float - int32_t address; //key -}; - -ELISTIZEH (SORTED_FLOAT) -class SORTED_FLOATS -{ - public: - /** empty constructor */ - SORTED_FLOATS() { - it.set_to_list (&list); - } - /** - * add sample - * @param value sample float - * @param key retrieval key - */ - void add(float value, - int32_t key); - /** - * delete sample - * @param key key to delete - */ - void remove(int32_t key); - /** - * index to list - * @param index item to get - */ - float operator[] (int32_t index); - - private: - SORTED_FLOAT_LIST list; //list of floats - SORTED_FLOAT_IT it; //iterator built-in -}; -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/strokewidth.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/strokewidth.cpp deleted file mode 100644 index 84399bc8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/strokewidth.cpp +++ /dev/null @@ -1,2015 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: strokewidth.cpp -// Description: Subclass of BBGrid to find uniformity of strokewidth. -// Author: Ray Smith -// Created: Mon Mar 31 16:17:01 PST 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "strokewidth.h" - -#include -#include - -#include "blobbox.h" -#include "colpartition.h" -#include "colpartitiongrid.h" -#include "imagefind.h" -#include "linlsq.h" -#include "statistc.h" -#include "tabfind.h" -#include "textlineprojection.h" -#include "tordmain.h" // For SetBlobStrokeWidth. - -namespace tesseract { - -INT_VAR(textord_tabfind_show_strokewidths, 0, "Show stroke widths"); -BOOL_VAR(textord_tabfind_only_strokewidths, false, "Only run stroke widths"); - -/** Allowed proportional change in stroke width to be the same font. */ -const double kStrokeWidthFractionTolerance = 0.125; -/** - * Allowed constant change in stroke width to be the same font. - * Really 1.5 pixels. - */ -const double kStrokeWidthTolerance = 1.5; -// Same but for CJK we are a bit more generous. -const double kStrokeWidthFractionCJK = 0.25; -const double kStrokeWidthCJK = 2.0; -// Radius in grid cells of search for broken CJK. Doesn't need to be very -// large as the grid size should be about the size of a character anyway. -const int kCJKRadius = 2; -// Max distance fraction of size to join close but broken CJK characters. -const double kCJKBrokenDistanceFraction = 0.25; -// Max number of components in a broken CJK character. -const int kCJKMaxComponents = 8; -// Max aspect ratio of CJK broken characters when put back together. -const double kCJKAspectRatio = 1.25; -// Max increase in aspect ratio of CJK broken characters when merged. -const double kCJKAspectRatioIncrease = 1.0625; -// Max multiple of the grid size that will be used in computing median CJKsize. -const int kMaxCJKSizeRatio = 5; -// Min fraction of blobs broken CJK to iterate and run it again. -const double kBrokenCJKIterationFraction = 0.125; -// Multiple of gridsize as x-padding for a search box for diacritic base -// characters. -const double kDiacriticXPadRatio = 7.0; -// Multiple of gridsize as y-padding for a search box for diacritic base -// characters. -const double kDiacriticYPadRatio = 1.75; -// Min multiple of diacritic height that a neighbour must be to be a -// convincing base character. -const double kMinDiacriticSizeRatio = 1.0625; -// Max multiple of a textline's median height as a threshold for the sum of -// a diacritic's farthest x and y distances (gap + size). -const double kMaxDiacriticDistanceRatio = 1.25; -// Max x-gap between a diacritic and its base char as a fraction of the height -// of the base char (allowing other blobs to fill the gap.) -const double kMaxDiacriticGapToBaseCharHeight = 1.0; -// Ratio between longest side of a line and longest side of a character. -// (neighbor_min > blob_min * kLineTrapShortest && -// neighbor_max < blob_max / kLineTrapLongest) -// => neighbor is a grapheme and blob is a line. -const int kLineTrapLongest = 4; -// Ratio between shortest side of a line and shortest side of a character. -const int kLineTrapShortest = 2; -// Max aspect ratio of the total box before CountNeighbourGaps -// decides immediately based on the aspect ratio. -const int kMostlyOneDirRatio = 3; -// Aspect ratio for a blob to be considered as line residue. -const double kLineResidueAspectRatio = 8.0; -// Padding ratio for line residue search box. -const int kLineResiduePadRatio = 3; -// Min multiple of neighbour size for a line residue to be genuine. -const double kLineResidueSizeRatio = 1.75; -// Aspect ratio filter for OSD. -const float kSizeRatioToReject = 2.0; -// Expansion factor for search box for good neighbours. -const double kNeighbourSearchFactor = 2.5; -// Factor of increase of overlap when adding diacritics to make an image noisy. -const double kNoiseOverlapGrowthFactor = 4.0; -// Fraction of the image size to add overlap when adding diacritics for an -// image to qualify as noisy. -const double kNoiseOverlapAreaFactor = 1.0 / 512; - -StrokeWidth::StrokeWidth(int gridsize, - const ICOORD& bleft, const ICOORD& tright) - : BlobGrid(gridsize, bleft, tright), nontext_map_(nullptr), projection_(nullptr), - denorm_(nullptr), grid_box_(bleft, tright), rerotation_(1.0f, 0.0f) { - leaders_win_ = nullptr; - widths_win_ = nullptr; - initial_widths_win_ = nullptr; - chains_win_ = nullptr; - diacritics_win_ = nullptr; - textlines_win_ = nullptr; - smoothed_win_ = nullptr; -} - -StrokeWidth::~StrokeWidth() { - if (widths_win_ != nullptr) { - #ifndef GRAPHICS_DISABLED - delete widths_win_->AwaitEvent(SVET_DESTROY); - #endif // GRAPHICS_DISABLED - if (textord_tabfind_only_strokewidths) - exit(0); - delete widths_win_; - } - delete leaders_win_; - delete initial_widths_win_; - delete chains_win_; - delete textlines_win_; - delete smoothed_win_; - delete diacritics_win_; -} - -// Sets the neighbours member of the medium-sized blobs in the block. -// Searches on 4 sides of each blob for similar-sized, similar-strokewidth -// blobs and sets pointers to the good neighbours. -void StrokeWidth::SetNeighboursOnMediumBlobs(TO_BLOCK* block) { - // Run a preliminary strokewidth neighbour detection on the medium blobs. - InsertBlobList(&block->blobs); - BLOBNBOX_IT blob_it(&block->blobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - SetNeighbours(false, false, blob_it.data()); - } - Clear(); -} - -// Sets the neighbour/textline writing direction members of the medium -// and large blobs with optional repair of broken CJK characters first. -// Repair of broken CJK is needed here because broken CJK characters -// can fool the textline direction detection algorithm. -void StrokeWidth::FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, - bool cjk_merge, - TO_BLOCK* input_block) { - // Setup the grid with the remaining (non-noise) blobs. - InsertBlobs(input_block); - // Repair broken CJK characters if needed. - while (cjk_merge && FixBrokenCJK(input_block)); - // Grade blobs by inspection of neighbours. - FindTextlineFlowDirection(pageseg_mode, false); - // Clear the grid ready for rotation or leader finding. - Clear(); -} - -// Helper to collect and count horizontal and vertical blobs from a list. -static void CollectHorizVertBlobs(BLOBNBOX_LIST* input_blobs, - int* num_vertical_blobs, - int* num_horizontal_blobs, - BLOBNBOX_CLIST* vertical_blobs, - BLOBNBOX_CLIST* horizontal_blobs, - BLOBNBOX_CLIST* nondescript_blobs) { - BLOBNBOX_C_IT v_it(vertical_blobs); - BLOBNBOX_C_IT h_it(horizontal_blobs); - BLOBNBOX_C_IT n_it(nondescript_blobs); - BLOBNBOX_IT blob_it(input_blobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - const TBOX& box = blob->bounding_box(); - float y_x = static_cast(box.height()) / box.width(); - float x_y = 1.0f / y_x; - // Select a >= 1.0 ratio - float ratio = x_y > y_x ? x_y : y_x; - // If the aspect ratio is small and we want them for osd, save the blob. - bool ok_blob = ratio <= kSizeRatioToReject; - if (blob->UniquelyVertical()) { - ++*num_vertical_blobs; - if (ok_blob) v_it.add_after_then_move(blob); - } else if (blob->UniquelyHorizontal()) { - ++*num_horizontal_blobs; - if (ok_blob) h_it.add_after_then_move(blob); - } else if (ok_blob) { - n_it.add_after_then_move(blob); - } - } -} - - -// Types all the blobs as vertical or horizontal text or unknown and -// returns true if the majority are vertical. -// If the blobs are rotated, it is necessary to call CorrectForRotation -// after rotating everything, otherwise the work done here will be enough. -// If osd_blobs is not null, a list of blobs from the dominant textline -// direction are returned for use in orientation and script detection. -bool StrokeWidth::TestVerticalTextDirection(double find_vertical_text_ratio, - TO_BLOCK* block, - BLOBNBOX_CLIST* osd_blobs) { - int vertical_boxes = 0; - int horizontal_boxes = 0; - // Count vertical normal and large blobs. - BLOBNBOX_CLIST vertical_blobs; - BLOBNBOX_CLIST horizontal_blobs; - BLOBNBOX_CLIST nondescript_blobs; - CollectHorizVertBlobs(&block->blobs, &vertical_boxes, &horizontal_boxes, - &vertical_blobs, &horizontal_blobs, &nondescript_blobs); - CollectHorizVertBlobs(&block->large_blobs, &vertical_boxes, &horizontal_boxes, - &vertical_blobs, &horizontal_blobs, &nondescript_blobs); - if (textord_debug_tabfind) - tprintf("TextDir hbox=%d vs vbox=%d, %dH, %dV, %dN osd blobs\n", - horizontal_boxes, vertical_boxes, - horizontal_blobs.length(), vertical_blobs.length(), - nondescript_blobs.length()); - if (osd_blobs != nullptr && vertical_boxes == 0 && horizontal_boxes == 0) { - // Only nondescript blobs available, so return those. - BLOBNBOX_C_IT osd_it(osd_blobs); - osd_it.add_list_after(&nondescript_blobs); - return false; - } - int min_vert_boxes = static_cast((vertical_boxes + horizontal_boxes) * - find_vertical_text_ratio); - if (vertical_boxes >= min_vert_boxes) { - if (osd_blobs != nullptr) { - BLOBNBOX_C_IT osd_it(osd_blobs); - osd_it.add_list_after(&vertical_blobs); - } - return true; - } else { - if (osd_blobs != nullptr) { - BLOBNBOX_C_IT osd_it(osd_blobs); - osd_it.add_list_after(&horizontal_blobs); - } - return false; - } -} - -// Corrects the data structures for the given rotation. -void StrokeWidth::CorrectForRotation(const FCOORD& rotation, - ColPartitionGrid* part_grid) { - Init(part_grid->gridsize(), part_grid->bleft(), part_grid->tright()); - grid_box_ = TBOX(bleft(), tright()); - rerotation_.set_x(rotation.x()); - rerotation_.set_y(-rotation.y()); -} - -// Finds leader partitions and inserts them into the given part_grid. -void StrokeWidth::FindLeaderPartitions(TO_BLOCK* block, - ColPartitionGrid* part_grid) { - Clear(); - // Find and isolate leaders in the noise list. - ColPartition_LIST leader_parts; - FindLeadersAndMarkNoise(block, &leader_parts); - // Setup the strokewidth grid with the block's remaining (non-noise) blobs. - InsertBlobList(&block->blobs); - // Mark blobs that have leader neighbours. - for (ColPartition_IT it(&leader_parts); !it.empty(); it.forward()) { - ColPartition* part = it.extract(); - part->ClaimBoxes(); - MarkLeaderNeighbours(part, LR_LEFT); - MarkLeaderNeighbours(part, LR_RIGHT); - part_grid->InsertBBox(true, true, part); - } -} - -// Finds and marks noise those blobs that look like bits of vertical lines -// that would otherwise screw up layout analysis. -void StrokeWidth::RemoveLineResidue(ColPartition_LIST* big_part_list) { - BlobGridSearch gsearch(this); - BLOBNBOX* bbox; - // For every vertical line-like bbox in the grid, search its neighbours - // to find the tallest, and if the original box is taller by sufficient - // margin, then call it line residue and delete it. - gsearch.StartFullSearch(); - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - TBOX box = bbox->bounding_box(); - if (box.height() < box.width() * kLineResidueAspectRatio) - continue; - // Set up a rectangle search around the blob to find the size of its - // neighbours. - int padding = box.height() * kLineResiduePadRatio; - TBOX search_box = box; - search_box.pad(padding, padding); - bool debug = AlignedBlob::WithinTestRegion(2, box.left(), - box.bottom()); - // Find the largest object in the search box not equal to bbox. - BlobGridSearch rsearch(this); - int max_height = 0; - BLOBNBOX* n; - rsearch.StartRectSearch(search_box); - while ((n = rsearch.NextRectSearch()) != nullptr) { - if (n == bbox) continue; - TBOX nbox = n->bounding_box(); - if (nbox.height() > max_height) { - max_height = nbox.height(); - } - } - if (debug) { - tprintf("Max neighbour size=%d for candidate line box at:", max_height); - box.print(); - } - if (max_height * kLineResidueSizeRatio < box.height()) { - #ifndef GRAPHICS_DISABLED - if (leaders_win_ != nullptr) { - // We are debugging, so display deleted in pink blobs in the same - // window that we use to display leader detection. - leaders_win_->Pen(ScrollView::PINK); - leaders_win_->Rectangle(box.left(), box.bottom(), - box.right(), box.top()); - } - #endif // GRAPHICS_DISABLED - ColPartition::MakeBigPartition(bbox, big_part_list); - } - } -} - -// Types all the blobs as vertical text or horizontal text or unknown and -// puts them into initial ColPartitions in the supplied part_grid. -// rerotation determines how to get back to the image coordinates from the -// blob coordinates (since they may have been rotated for vertical text). -// block is the single block for the whole page or rectangle to be OCRed. -// nontext_pix (full-size), is a binary mask used to prevent merges across -// photo/text boundaries. It is not kept beyond this function. -// denorm provides a mapping back to the image from the current blob -// coordinate space. -// projection provides a measure of textline density over the image and -// provides functions to assist with diacritic detection. It should be a -// pointer to a new TextlineProjection, and will be setup here. -// part_grid is the output grid of textline partitions. -// Large blobs that cause overlap are put in separate partitions and added -// to the big_parts list. -void StrokeWidth::GradeBlobsIntoPartitions( - PageSegMode pageseg_mode, const FCOORD& rerotation, TO_BLOCK* block, - Pix* nontext_pix, const DENORM* denorm, bool cjk_script, - TextlineProjection* projection, BLOBNBOX_LIST* diacritic_blobs, - ColPartitionGrid* part_grid, ColPartition_LIST* big_parts) { - nontext_map_ = nontext_pix; - projection_ = projection; - denorm_ = denorm; - // Clear and re Insert to take advantage of the tab stops in the blobs. - Clear(); - // Setup the strokewidth grid with the remaining non-noise, non-leader blobs. - InsertBlobs(block); - - // Run FixBrokenCJK() again if the page is CJK. - if (cjk_script) { - FixBrokenCJK(block); - } - FindTextlineFlowDirection(pageseg_mode, false); - projection_->ConstructProjection(block, rerotation, nontext_map_); - if (textord_tabfind_show_strokewidths) { - ScrollView* line_blobs_win = MakeWindow(0, 0, "Initial textline Blobs"); - projection_->PlotGradedBlobs(&block->blobs, line_blobs_win); - projection_->PlotGradedBlobs(&block->small_blobs, line_blobs_win); - } - projection_->MoveNonTextlineBlobs(&block->blobs, &block->noise_blobs); - projection_->MoveNonTextlineBlobs(&block->small_blobs, &block->noise_blobs); - // Clear and re Insert to take advantage of the removed diacritics. - Clear(); - InsertBlobs(block); - FCOORD skew; - FindTextlineFlowDirection(pageseg_mode, true); - PartitionFindResult r = - FindInitialPartitions(pageseg_mode, rerotation, true, block, - diacritic_blobs, part_grid, big_parts, &skew); - if (r == PFR_NOISE) { - tprintf("Detected %d diacritics\n", diacritic_blobs->length()); - // Noise was found, and removed. - Clear(); - InsertBlobs(block); - FindTextlineFlowDirection(pageseg_mode, true); - r = FindInitialPartitions(pageseg_mode, rerotation, false, block, - diacritic_blobs, part_grid, big_parts, &skew); - } - nontext_map_ = nullptr; - projection_ = nullptr; - denorm_ = nullptr; -} - -static void PrintBoxWidths(BLOBNBOX* neighbour) { - const TBOX& nbox = neighbour->bounding_box(); - tprintf("Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n", - nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), - neighbour->horz_stroke_width(), neighbour->vert_stroke_width(), - 2.0 * neighbour->cblob()->area()/neighbour->cblob()->perimeter()); -} - -/** Handles a click event in a display window. */ -void StrokeWidth::HandleClick(int x, int y) { - BBGrid::HandleClick(x, y); - // Run a radial search for blobs that overlap. - BlobGridSearch radsearch(this); - radsearch.StartRadSearch(x, y, 1); - BLOBNBOX* neighbour; - FCOORD click(static_cast(x), static_cast(y)); - while ((neighbour = radsearch.NextRadSearch()) != nullptr) { - TBOX nbox = neighbour->bounding_box(); - if (nbox.contains(click) && neighbour->cblob() != nullptr) { - PrintBoxWidths(neighbour); - if (neighbour->neighbour(BND_LEFT) != nullptr) - PrintBoxWidths(neighbour->neighbour(BND_LEFT)); - if (neighbour->neighbour(BND_RIGHT) != nullptr) - PrintBoxWidths(neighbour->neighbour(BND_RIGHT)); - if (neighbour->neighbour(BND_ABOVE) != nullptr) - PrintBoxWidths(neighbour->neighbour(BND_ABOVE)); - if (neighbour->neighbour(BND_BELOW) != nullptr) - PrintBoxWidths(neighbour->neighbour(BND_BELOW)); - int gaps[BND_COUNT]; - neighbour->NeighbourGaps(gaps); - tprintf("Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n" - "Good= %d %d %d %d\n", - gaps[BND_LEFT], gaps[BND_RIGHT], - gaps[BND_ABOVE], gaps[BND_BELOW], - neighbour->horz_possible(), - neighbour->vert_possible(), - neighbour->good_stroke_neighbour(BND_LEFT), - neighbour->good_stroke_neighbour(BND_RIGHT), - neighbour->good_stroke_neighbour(BND_ABOVE), - neighbour->good_stroke_neighbour(BND_BELOW)); - break; - } - } -} - -// Detects and marks leader dots/dashes. -// Leaders are horizontal chains of small or noise blobs that look -// monospace according to ColPartition::MarkAsLeaderIfMonospaced(). -// Detected leaders become the only occupants of the block->small_blobs list. -// Non-leader small blobs get moved to the blobs list. -// Non-leader noise blobs remain singletons in the noise list. -// All small and noise blobs in high density regions are marked BTFT_NONTEXT. -// block is the single block for the whole page or rectangle to be OCRed. -// leader_parts is the output. -void StrokeWidth::FindLeadersAndMarkNoise(TO_BLOCK* block, - ColPartition_LIST* leader_parts) { - InsertBlobList(&block->small_blobs); - InsertBlobList(&block->noise_blobs); - BlobGridSearch gsearch(this); - BLOBNBOX* bbox; - // For every bbox in the grid, set its neighbours. - gsearch.StartFullSearch(); - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - SetNeighbours(true, false, bbox); - } - ColPartition_IT part_it(leader_parts); - gsearch.StartFullSearch(); - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - if (bbox->flow() == BTFT_NONE) { - if (bbox->neighbour(BND_RIGHT) == nullptr && - bbox->neighbour(BND_LEFT) == nullptr) - continue; - // Put all the linked blobs into a ColPartition. - ColPartition* part = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1)); - BLOBNBOX* blob; - for (blob = bbox; blob != nullptr && blob->flow() == BTFT_NONE; - blob = blob->neighbour(BND_RIGHT)) - part->AddBox(blob); - for (blob = bbox->neighbour(BND_LEFT); blob != nullptr && - blob->flow() == BTFT_NONE; - blob = blob->neighbour(BND_LEFT)) - part->AddBox(blob); - if (part->MarkAsLeaderIfMonospaced()) - part_it.add_after_then_move(part); - else - delete part; - } - } - if (textord_tabfind_show_strokewidths) { - leaders_win_ = DisplayGoodBlobs("LeaderNeighbours", 0, 0); - } - // Move any non-leaders from the small to the blobs list, as they are - // most likely dashes or broken characters. - BLOBNBOX_IT blob_it(&block->blobs); - BLOBNBOX_IT small_it(&block->small_blobs); - for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) { - BLOBNBOX* blob = small_it.data(); - if (blob->flow() != BTFT_LEADER) { - if (blob->flow() == BTFT_NEIGHBOURS) - blob->set_flow(BTFT_NONE); - blob->ClearNeighbours(); - blob_it.add_to_end(small_it.extract()); - } - } - // Move leaders from the noise list to the small list, leaving the small - // list exclusively leaders, so they don't get processed further, - // and the remaining small blobs all in the noise list. - BLOBNBOX_IT noise_it(&block->noise_blobs); - for (noise_it.mark_cycle_pt(); !noise_it.cycled_list(); noise_it.forward()) { - BLOBNBOX* blob = noise_it.data(); - if (blob->flow() == BTFT_LEADER || blob->joined_to_prev()) { - small_it.add_to_end(noise_it.extract()); - } else if (blob->flow() == BTFT_NEIGHBOURS) { - blob->set_flow(BTFT_NONE); - blob->ClearNeighbours(); - } - } - // Clear the grid as we don't want the small stuff hanging around in it. - Clear(); -} - -/** Inserts the block blobs (normal and large) into this grid. - * Blobs remain owned by the block. */ -void StrokeWidth::InsertBlobs(TO_BLOCK* block) { - InsertBlobList(&block->blobs); - InsertBlobList(&block->large_blobs); -} - -// Checks the left or right side of the given leader partition and sets the -// (opposite) leader_on_right or leader_on_left flags for blobs -// that are next to the given side of the given leader partition. -void StrokeWidth::MarkLeaderNeighbours(const ColPartition* part, - LeftOrRight side) { - const TBOX& part_box = part->bounding_box(); - BlobGridSearch blobsearch(this); - // Search to the side of the leader for the nearest neighbour. - BLOBNBOX* best_blob = nullptr; - int best_gap = 0; - blobsearch.StartSideSearch(side == LR_LEFT ? part_box.left() - : part_box.right(), - part_box.bottom(), part_box.top()); - BLOBNBOX* blob; - while ((blob = blobsearch.NextSideSearch(side == LR_LEFT)) != nullptr) { - const TBOX& blob_box = blob->bounding_box(); - if (!blob_box.y_overlap(part_box)) - continue; - int x_gap = blob_box.x_gap(part_box); - if (x_gap > 2 * gridsize()) { - break; - } else if (best_blob == nullptr || x_gap < best_gap) { - best_blob = blob; - best_gap = x_gap; - } - } - if (best_blob != nullptr) { - if (side == LR_LEFT) - best_blob->set_leader_on_right(true); - else - best_blob->set_leader_on_left(true); - #ifndef GRAPHICS_DISABLED - if (leaders_win_ != nullptr) { - leaders_win_->Pen(side == LR_LEFT ? ScrollView::RED : ScrollView::GREEN); - const TBOX& blob_box = best_blob->bounding_box(); - leaders_win_->Rectangle(blob_box.left(), blob_box.bottom(), - blob_box.right(), blob_box.top()); - } - #endif // GRAPHICS_DISABLED - } -} - -// Helper to compute the UQ of the square-ish CJK characters. -static int UpperQuartileCJKSize(int gridsize, BLOBNBOX_LIST* blobs) { - STATS sizes(0, gridsize * kMaxCJKSizeRatio); - BLOBNBOX_IT it(blobs); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - int width = blob->bounding_box().width(); - int height = blob->bounding_box().height(); - if (width <= height * kCJKAspectRatio && height < width * kCJKAspectRatio) - sizes.add(height, 1); - } - return static_cast(sizes.ile(0.75f) + 0.5); -} - -// Fix broken CJK characters, using the fake joined blobs mechanism. -// Blobs are really merged, ie the master takes all the outlines and the -// others are deleted. -// Returns true if sufficient blobs are merged that it may be worth running -// again, due to a better estimate of character size. -bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) { - BLOBNBOX_LIST* blobs = &block->blobs; - int median_height = UpperQuartileCJKSize(gridsize(), blobs); - int max_dist = static_cast(median_height * kCJKBrokenDistanceFraction); - int max_height = static_cast(median_height * kCJKAspectRatio); - int num_fixed = 0; - BLOBNBOX_IT blob_it(blobs); - - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - if (blob->cblob() == nullptr || blob->cblob()->out_list()->empty()) - continue; - TBOX bbox = blob->bounding_box(); - bool debug = AlignedBlob::WithinTestRegion(3, bbox.left(), - bbox.bottom()); - if (debug) { - tprintf("Checking for Broken CJK (max size=%d):", max_height); - bbox.print(); - } - // Generate a list of blobs that overlap or are near enough to merge. - BLOBNBOX_CLIST overlapped_blobs; - AccumulateOverlaps(blob, debug, max_height, max_dist, - &bbox, &overlapped_blobs); - if (!overlapped_blobs.empty()) { - // There are overlapping blobs, so qualify them as being satisfactory - // before removing them from the grid and replacing them with the union. - // The final box must be roughly square. - if (bbox.width() > bbox.height() * kCJKAspectRatio || - bbox.height() > bbox.width() * kCJKAspectRatio) { - if (debug) { - tprintf("Bad final aspectratio:"); - bbox.print(); - } - continue; - } - // There can't be too many blobs to merge. - if (overlapped_blobs.length() >= kCJKMaxComponents) { - if (debug) - tprintf("Too many neighbours: %d\n", overlapped_blobs.length()); - continue; - } - // The strokewidths must match amongst the join candidates. - BLOBNBOX_C_IT n_it(&overlapped_blobs); - for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) { - BLOBNBOX* neighbour = nullptr; - neighbour = n_it.data(); - if (!blob->MatchingStrokeWidth(*neighbour, kStrokeWidthFractionCJK, - kStrokeWidthCJK)) - break; - } - if (!n_it.cycled_list()) { - if (debug) { - tprintf("Bad stroke widths:"); - PrintBoxWidths(blob); - } - continue; // Not good enough. - } - - // Merge all the candidates into blob. - // We must remove blob from the grid and reinsert it after merging - // to maintain the integrity of the grid. - RemoveBBox(blob); - // Everything else will be calculated later. - for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) { - BLOBNBOX* neighbour = n_it.data(); - RemoveBBox(neighbour); - // Mark empty blob for deletion. - neighbour->set_region_type(BRT_NOISE); - blob->really_merge(neighbour); - if (rerotation_.x() != 1.0f || rerotation_.y() != 0.0f) { - blob->rotate_box(rerotation_); - } - } - InsertBBox(true, true, blob); - ++num_fixed; - if (debug) { - tprintf("Done! Final box:"); - bbox.print(); - } - } - } - // Count remaining blobs. - int num_remaining = 0; - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - if (blob->cblob() != nullptr && !blob->cblob()->out_list()->empty()) { - ++num_remaining; - } - } - // Permanently delete all the marked blobs after first removing all - // references in the neighbour members. - block->DeleteUnownedNoise(); - return num_fixed > num_remaining * kBrokenCJKIterationFraction; -} - -// Helper function to determine whether it is reasonable to merge the -// bbox and the nbox for repairing broken CJK. -// The distance apart must not exceed max_dist, the combined size must -// not exceed max_size, and the aspect ratio must either improve or at -// least not get worse by much. -static bool AcceptableCJKMerge(const TBOX& bbox, const TBOX& nbox, - bool debug, int max_size, int max_dist, - int* x_gap, int* y_gap) { - *x_gap = bbox.x_gap(nbox); - *y_gap = bbox.y_gap(nbox); - TBOX merged(nbox); - merged += bbox; - if (debug) { - tprintf("gaps = %d, %d, merged_box:", *x_gap, *y_gap); - merged.print(); - } - if (*x_gap <= max_dist && *y_gap <= max_dist && - merged.width() <= max_size && merged.height() <= max_size) { - // Close enough to call overlapping. Check aspect ratios. - double old_ratio = static_cast(bbox.width()) / bbox.height(); - if (old_ratio < 1.0) old_ratio = 1.0 / old_ratio; - double new_ratio = static_cast(merged.width()) / merged.height(); - if (new_ratio < 1.0) new_ratio = 1.0 / new_ratio; - if (new_ratio <= old_ratio * kCJKAspectRatioIncrease) - return true; - } - return false; -} - -// Collect blobs that overlap or are within max_dist of the input bbox. -// Return them in the list of blobs and expand the bbox to be the union -// of all the boxes. not_this is excluded from the search, as are blobs -// that cause the merged box to exceed max_size in either dimension. -void StrokeWidth::AccumulateOverlaps(const BLOBNBOX* not_this, bool debug, - int max_size, int max_dist, - TBOX* bbox, BLOBNBOX_CLIST* blobs) { - // While searching, nearests holds the nearest failed blob in each - // direction. When we have a nearest in each of the 4 directions, then - // the search is over, and at this point the final bbox must not overlap - // any of the nearests. - BLOBNBOX* nearests[BND_COUNT]; - for (int i = 0; i < BND_COUNT; ++i) { - nearests[i] = nullptr; - } - int x = (bbox->left() + bbox->right()) / 2; - int y = (bbox->bottom() + bbox->top()) / 2; - // Run a radial search for blobs that overlap or are sufficiently close. - BlobGridSearch radsearch(this); - radsearch.StartRadSearch(x, y, kCJKRadius); - BLOBNBOX* neighbour; - while ((neighbour = radsearch.NextRadSearch()) != nullptr) { - if (neighbour == not_this) continue; - TBOX nbox = neighbour->bounding_box(); - int x_gap, y_gap; - if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist, - &x_gap, &y_gap)) { - // Close enough to call overlapping. Merge boxes. - *bbox += nbox; - blobs->add_sorted(SortByBoxLeft, true, neighbour); - if (debug) { - tprintf("Added:"); - nbox.print(); - } - // Since we merged, search the nearests, as some might now me mergeable. - for (int dir = 0; dir < BND_COUNT; ++dir) { - if (nearests[dir] == nullptr) continue; - nbox = nearests[dir]->bounding_box(); - if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, - max_dist, &x_gap, &y_gap)) { - // Close enough to call overlapping. Merge boxes. - *bbox += nbox; - blobs->add_sorted(SortByBoxLeft, true, nearests[dir]); - if (debug) { - tprintf("Added:"); - nbox.print(); - } - nearests[dir] = nullptr; - dir = -1; // Restart the search. - } - } - } else if (x_gap < 0 && x_gap <= y_gap) { - // A vertical neighbour. Record the nearest. - BlobNeighbourDir dir = nbox.top() > bbox->top() ? BND_ABOVE : BND_BELOW; - if (nearests[dir] == nullptr || - y_gap < bbox->y_gap(nearests[dir]->bounding_box())) { - nearests[dir] = neighbour; - } - } else if (y_gap < 0 && y_gap <= x_gap) { - // A horizontal neighbour. Record the nearest. - BlobNeighbourDir dir = nbox.left() > bbox->left() ? BND_RIGHT : BND_LEFT; - if (nearests[dir] == nullptr || - x_gap < bbox->x_gap(nearests[dir]->bounding_box())) { - nearests[dir] = neighbour; - } - } - // If all nearests are non-null, then we have finished. - if (nearests[BND_LEFT] && nearests[BND_RIGHT] && - nearests[BND_ABOVE] && nearests[BND_BELOW]) - break; - } - // Final overlap with a nearest is not allowed. - for (int dir = 0; dir < BND_COUNT; ++dir) { - if (nearests[dir] == nullptr) continue; - const TBOX& nbox = nearests[dir]->bounding_box(); - if (debug) { - tprintf("Testing for overlap with:"); - nbox.print(); - } - if (bbox->overlap(nbox)) { - blobs->shallow_clear(); - if (debug) - tprintf("Final box overlaps nearest\n"); - return; - } - } -} - -// For each blob in this grid, Finds the textline direction to be horizontal -// or vertical according to distance to neighbours and 1st and 2nd order -// neighbours. Non-text tends to end up without a definite direction. -// Result is setting of the neighbours and vert_possible/horz_possible -// flags in the BLOBNBOXes currently in this grid. -// This function is called more than once if page orientation is uncertain, -// so display_if_debugging is true on the final call to display the results. -void StrokeWidth::FindTextlineFlowDirection(PageSegMode pageseg_mode, - bool display_if_debugging) { - BlobGridSearch gsearch(this); - BLOBNBOX* bbox; - // For every bbox in the grid, set its neighbours. - gsearch.StartFullSearch(); - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - SetNeighbours(false, display_if_debugging, bbox); - } - // Where vertical or horizontal wins by a big margin, clarify it. - gsearch.StartFullSearch(); - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - SimplifyObviousNeighbours(bbox); - } - // Now try to make the blobs only vertical or horizontal using neighbours. - gsearch.StartFullSearch(); - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - if (FindingVerticalOnly(pageseg_mode)) { - bbox->set_vert_possible(true); - bbox->set_horz_possible(false); - } else if (FindingHorizontalOnly(pageseg_mode)) { - bbox->set_vert_possible(false); - bbox->set_horz_possible(true); - } else { - SetNeighbourFlows(bbox); - } - } - if ((textord_tabfind_show_strokewidths && display_if_debugging) || - textord_tabfind_show_strokewidths > 1) { - initial_widths_win_ = DisplayGoodBlobs("InitialStrokewidths", 400, 0); - } - // Improve flow direction with neighbours. - gsearch.StartFullSearch(); - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - SmoothNeighbourTypes(pageseg_mode, false, bbox); - } - // Now allow reset of firm values to fix renegades. - gsearch.StartFullSearch(); - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - SmoothNeighbourTypes(pageseg_mode, true, bbox); - } - // Repeat. - gsearch.StartFullSearch(); - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - SmoothNeighbourTypes(pageseg_mode, true, bbox); - } - if ((textord_tabfind_show_strokewidths && display_if_debugging) || - textord_tabfind_show_strokewidths > 1) { - widths_win_ = DisplayGoodBlobs("ImprovedStrokewidths", 800, 0); - } -} - -// Sets the neighbours and good_stroke_neighbours members of the blob by -// searching close on all 4 sides. -// When finding leader dots/dashes, there is a slightly different rule for -// what makes a good neighbour. -void StrokeWidth::SetNeighbours(bool leaders, bool activate_line_trap, - BLOBNBOX* blob) { - int line_trap_count = 0; - for (int dir = 0; dir < BND_COUNT; ++dir) { - BlobNeighbourDir bnd = static_cast(dir); - line_trap_count += FindGoodNeighbour(bnd, leaders, blob); - } - if (line_trap_count > 0 && activate_line_trap) { - // It looks like a line so isolate it by clearing its neighbours. - blob->ClearNeighbours(); - const TBOX& box = blob->bounding_box(); - blob->set_region_type(box.width() > box.height() ? BRT_HLINE : BRT_VLINE); - } -} - - -// Sets the good_stroke_neighbours member of the blob if it has a -// GoodNeighbour on the given side. -// Also sets the neighbour in the blob, whether or not a good one is found. -// Returns the number of blobs in the nearby search area that would lead us to -// believe that this blob is a line separator. -// Leaders get extra special lenient treatment. -int StrokeWidth::FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, - BLOBNBOX* blob) { - // Search for neighbours that overlap vertically. - TBOX blob_box = blob->bounding_box(); - bool debug = AlignedBlob::WithinTestRegion(2, blob_box.left(), - blob_box.bottom()); - if (debug) { - tprintf("FGN in dir %d for blob:", dir); - blob_box.print(); - } - int top = blob_box.top(); - int bottom = blob_box.bottom(); - int left = blob_box.left(); - int right = blob_box.right(); - int width = right - left; - int height = top - bottom; - - // A trap to detect lines tests for the min dimension of neighbours - // being larger than a multiple of the min dimension of the line - // and the larger dimension being smaller than a fraction of the max - // dimension of the line. - int line_trap_max = std::max(width, height) / kLineTrapLongest; - int line_trap_min = std::min(width, height) * kLineTrapShortest; - int line_trap_count = 0; - - int min_good_overlap = (dir == BND_LEFT || dir == BND_RIGHT) - ? height / 2 : width / 2; - int min_decent_overlap = (dir == BND_LEFT || dir == BND_RIGHT) - ? height / 3 : width / 3; - if (leaders) - min_good_overlap = min_decent_overlap = 1; - - int search_pad = static_cast( - sqrt(static_cast(width * height)) * kNeighbourSearchFactor); - if (gridsize() > search_pad) - search_pad = gridsize(); - TBOX search_box = blob_box; - // Pad the search in the appropriate direction. - switch (dir) { - case BND_LEFT: - search_box.set_left(search_box.left() - search_pad); - break; - case BND_RIGHT: - search_box.set_right(search_box.right() + search_pad); - break; - case BND_BELOW: - search_box.set_bottom(search_box.bottom() - search_pad); - break; - case BND_ABOVE: - search_box.set_top(search_box.top() + search_pad); - break; - case BND_COUNT: - return 0; - } - - BlobGridSearch rectsearch(this); - rectsearch.StartRectSearch(search_box); - BLOBNBOX* best_neighbour = nullptr; - double best_goodness = 0.0; - bool best_is_good = false; - BLOBNBOX* neighbour; - while ((neighbour = rectsearch.NextRectSearch()) != nullptr) { - TBOX nbox = neighbour->bounding_box(); - if (neighbour == blob) - continue; - int mid_x = (nbox.left() + nbox.right()) / 2; - if (mid_x < blob->left_rule() || mid_x > blob->right_rule()) - continue; // In a different column. - if (debug) { - tprintf("Neighbour at:"); - nbox.print(); - } - - // Last-minute line detector. There is a small upper limit to the line - // width accepted by the morphological line detector. - int n_width = nbox.width(); - int n_height = nbox.height(); - if (std::min(n_width, n_height) > line_trap_min && - std::max(n_width, n_height) < line_trap_max) - ++line_trap_count; - // Heavily joined text, such as Arabic may have very different sizes when - // looking at the maxes, but the heights may be almost identical, so check - // for a difference in height if looking sideways or width vertically. - if (TabFind::VeryDifferentSizes(std::max(n_width, n_height), - std::max(width, height)) && - (((dir == BND_LEFT || dir ==BND_RIGHT) && - TabFind::DifferentSizes(n_height, height)) || - ((dir == BND_BELOW || dir ==BND_ABOVE) && - TabFind::DifferentSizes(n_width, width)))) { - if (debug) tprintf("Bad size\n"); - continue; // Could be a different font size or non-text. - } - // Amount of vertical overlap between the blobs. - int overlap; - // If the overlap is along the short side of the neighbour, and it - // is fully overlapped, then perp_overlap holds the length of the long - // side of the neighbour. A measure to include hyphens and dashes as - // legitimate neighbours. - int perp_overlap; - int gap; - if (dir == BND_LEFT || dir == BND_RIGHT) { - overlap = std::min(static_cast(nbox.top()), top) - std::max(static_cast(nbox.bottom()), bottom); - if (overlap == nbox.height() && nbox.width() > nbox.height()) - perp_overlap = nbox.width(); - else - perp_overlap = overlap; - gap = dir == BND_LEFT ? left - nbox.left() : nbox.right() - right; - if (gap <= 0) { - if (debug) tprintf("On wrong side\n"); - continue; // On the wrong side. - } - gap -= n_width; - } else { - overlap = std::min(static_cast(nbox.right()), right) - std::max(static_cast(nbox.left()), left); - if (overlap == nbox.width() && nbox.height() > nbox.width()) - perp_overlap = nbox.height(); - else - perp_overlap = overlap; - gap = dir == BND_BELOW ? bottom - nbox.bottom() : nbox.top() - top; - if (gap <= 0) { - if (debug) tprintf("On wrong side\n"); - continue; // On the wrong side. - } - gap -= n_height; - } - if (-gap > overlap) { - if (debug) tprintf("Overlaps wrong way\n"); - continue; // Overlaps the wrong way. - } - if (perp_overlap < min_decent_overlap) { - if (debug) tprintf("Doesn't overlap enough\n"); - continue; // Doesn't overlap enough. - } - bool bad_sizes = TabFind::DifferentSizes(height, n_height) && - TabFind::DifferentSizes(width, n_width); - bool is_good = overlap >= min_good_overlap && !bad_sizes && - blob->MatchingStrokeWidth(*neighbour, - kStrokeWidthFractionTolerance, - kStrokeWidthTolerance); - // Best is a fuzzy combination of gap, overlap and is good. - // Basically if you make one thing twice as good without making - // anything else twice as bad, then it is better. - if (gap < 1) gap = 1; - double goodness = (1.0 + is_good) * overlap / gap; - if (debug) { - tprintf("goodness = %g vs best of %g, good=%d, overlap=%d, gap=%d\n", - goodness, best_goodness, is_good, overlap, gap); - } - if (goodness > best_goodness) { - best_neighbour = neighbour; - best_goodness = goodness; - best_is_good = is_good; - } - } - blob->set_neighbour(dir, best_neighbour, best_is_good); - return line_trap_count; -} - -// Helper to get a list of 1st-order neighbours. -static void ListNeighbours(const BLOBNBOX* blob, - BLOBNBOX_CLIST* neighbours) { - for (int dir = 0; dir < BND_COUNT; ++dir) { - BlobNeighbourDir bnd = static_cast(dir); - BLOBNBOX* neighbour = blob->neighbour(bnd); - if (neighbour != nullptr) { - neighbours->add_sorted(SortByBoxLeft, true, neighbour); - } - } -} - -// Helper to get a list of 1st and 2nd order neighbours. -static void List2ndNeighbours(const BLOBNBOX* blob, - BLOBNBOX_CLIST* neighbours) { - ListNeighbours(blob, neighbours); - for (int dir = 0; dir < BND_COUNT; ++dir) { - BlobNeighbourDir bnd = static_cast(dir); - BLOBNBOX* neighbour = blob->neighbour(bnd); - if (neighbour != nullptr) { - ListNeighbours(neighbour, neighbours); - } - } -} - -// Helper to get a list of 1st, 2nd and 3rd order neighbours. -static void List3rdNeighbours(const BLOBNBOX* blob, - BLOBNBOX_CLIST* neighbours) { - List2ndNeighbours(blob, neighbours); - for (int dir = 0; dir < BND_COUNT; ++dir) { - BlobNeighbourDir bnd = static_cast(dir); - BLOBNBOX* neighbour = blob->neighbour(bnd); - if (neighbour != nullptr) { - List2ndNeighbours(neighbour, neighbours); - } - } -} - -// Helper to count the evidence for verticalness or horizontalness -// in a list of neighbours. -static void CountNeighbourGaps(bool debug, BLOBNBOX_CLIST* neighbours, - int* pure_h_count, int* pure_v_count) { - if (neighbours->length() <= kMostlyOneDirRatio) - return; - BLOBNBOX_C_IT it(neighbours); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - int h_min, h_max, v_min, v_max; - blob->MinMaxGapsClipped(&h_min, &h_max, &v_min, &v_max); - if (debug) - tprintf("Hgaps [%d,%d], vgaps [%d,%d]:", h_min, h_max, v_min, v_max); - if (h_max < v_min || - blob->leader_on_left() || blob->leader_on_right()) { - // Horizontal gaps are clear winners. Count a pure horizontal. - ++*pure_h_count; - if (debug) tprintf("Horz at:"); - } else if (v_max < h_min) { - // Vertical gaps are clear winners. Clear a pure vertical. - ++*pure_v_count; - if (debug) tprintf("Vert at:"); - } else { - if (debug) tprintf("Neither at:"); - } - if (debug) - blob->bounding_box().print(); - } -} - -// Makes the blob to be only horizontal or vertical where evidence -// is clear based on gaps of 2nd order neighbours, or definite individual -// blobs. -void StrokeWidth::SetNeighbourFlows(BLOBNBOX* blob) { - if (blob->DefiniteIndividualFlow()) - return; - bool debug = AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(), - blob->bounding_box().bottom()); - if (debug) { - tprintf("SetNeighbourFlows (current flow=%d, type=%d) on:", - blob->flow(), blob->region_type()); - blob->bounding_box().print(); - } - BLOBNBOX_CLIST neighbours; - List3rdNeighbours(blob, &neighbours); - // The number of pure horizontal and vertical neighbours. - int pure_h_count = 0; - int pure_v_count = 0; - CountNeighbourGaps(debug, &neighbours, &pure_h_count, &pure_v_count); - if (debug) { - HandleClick(blob->bounding_box().left() + 1, - blob->bounding_box().bottom() + 1); - tprintf("SetFlows: h_count=%d, v_count=%d\n", - pure_h_count, pure_v_count); - } - if (!neighbours.empty()) { - blob->set_vert_possible(true); - blob->set_horz_possible(true); - if (pure_h_count > 2 * pure_v_count) { - // Horizontal gaps are clear winners. Clear vertical neighbours. - blob->set_vert_possible(false); - } else if (pure_v_count > 2 * pure_h_count) { - // Vertical gaps are clear winners. Clear horizontal neighbours. - blob->set_horz_possible(false); - } - } else { - // Lonely blob. Can't tell its flow direction. - blob->set_vert_possible(false); - blob->set_horz_possible(false); - } -} - - -// Helper to count the number of horizontal and vertical blobs in a list. -static void CountNeighbourTypes(BLOBNBOX_CLIST* neighbours, - int* pure_h_count, int* pure_v_count) { - BLOBNBOX_C_IT it(neighbours); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - if (blob->UniquelyHorizontal()) - ++*pure_h_count; - if (blob->UniquelyVertical()) - ++*pure_v_count; - } -} - -// Nullify the neighbours in the wrong directions where the direction -// is clear-cut based on a distance margin. Good for isolating vertical -// text from neighbouring horizontal text. -void StrokeWidth::SimplifyObviousNeighbours(BLOBNBOX* blob) { - // Case 1: We have text that is likely several characters, blurry and joined - // together. - if ((blob->bounding_box().width() > 3 * blob->area_stroke_width() && - blob->bounding_box().height() > 3 * blob->area_stroke_width())) { - // The blob is complex (not stick-like). - if (blob->bounding_box().width() > 4 * blob->bounding_box().height()) { - // Horizontal conjoined text. - blob->set_neighbour(BND_ABOVE, nullptr, false); - blob->set_neighbour(BND_BELOW, nullptr, false); - return; - } - if (blob->bounding_box().height() > 4 * blob->bounding_box().width()) { - // Vertical conjoined text. - blob->set_neighbour(BND_LEFT, nullptr, false); - blob->set_neighbour(BND_RIGHT, nullptr, false); - return; - } - } - - // Case 2: This blob is likely a single character. - int margin = gridsize() / 2; - int h_min, h_max, v_min, v_max; - blob->MinMaxGapsClipped(&h_min, &h_max, &v_min, &v_max); - if ((h_max + margin < v_min && h_max < margin / 2) || - blob->leader_on_left() || blob->leader_on_right()) { - // Horizontal gaps are clear winners. Clear vertical neighbours. - blob->set_neighbour(BND_ABOVE, nullptr, false); - blob->set_neighbour(BND_BELOW, nullptr, false); - } else if (v_max + margin < h_min && v_max < margin / 2) { - // Vertical gaps are clear winners. Clear horizontal neighbours. - blob->set_neighbour(BND_LEFT, nullptr, false); - blob->set_neighbour(BND_RIGHT, nullptr, false); - } -} - -// Smoothes the vertical/horizontal type of the blob based on the -// 2nd-order neighbours. If reset_all is true, then all blobs are -// changed. Otherwise, only ambiguous blobs are processed. -void StrokeWidth::SmoothNeighbourTypes(PageSegMode pageseg_mode, bool reset_all, - BLOBNBOX* blob) { - if ((blob->vert_possible() && blob->horz_possible()) || reset_all) { - // There are both horizontal and vertical so try to fix it. - BLOBNBOX_CLIST neighbours; - List2ndNeighbours(blob, &neighbours); - // The number of pure horizontal and vertical neighbours. - int pure_h_count = 0; - int pure_v_count = 0; - CountNeighbourTypes(&neighbours, &pure_h_count, &pure_v_count); - if (AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(), - blob->bounding_box().bottom())) { - HandleClick(blob->bounding_box().left() + 1, - blob->bounding_box().bottom() + 1); - tprintf("pure_h=%d, pure_v=%d\n", - pure_h_count, pure_v_count); - } - if (pure_h_count > pure_v_count && !FindingVerticalOnly(pageseg_mode)) { - // Horizontal gaps are clear winners. Clear vertical neighbours. - blob->set_vert_possible(false); - blob->set_horz_possible(true); - } else if (pure_v_count > pure_h_count && - !FindingHorizontalOnly(pageseg_mode)) { - // Vertical gaps are clear winners. Clear horizontal neighbours. - blob->set_horz_possible(false); - blob->set_vert_possible(true); - } - } else if (AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(), - blob->bounding_box().bottom())) { - HandleClick(blob->bounding_box().left() + 1, - blob->bounding_box().bottom() + 1); - tprintf("Clean on pass 3!\n"); - } -} - -// Partition creation. Accumulates vertical and horizontal text chains, -// puts the remaining blobs in as unknowns, and then merges/splits to -// minimize overlap and smoothes the types with neighbours and the color -// image if provided. rerotation is used to rotate the coordinate space -// back to the nontext_map_ image. -// If find_problems is true, detects possible noise pollution by the amount -// of partition overlap that is created by the diacritics. If excessive, the -// noise is separated out into diacritic blobs, and PFR_NOISE is returned. -// [TODO(rays): if the partition overlap is caused by heavy skew, deskews -// the components, saves the skew_angle and returns PFR_SKEW.] If the return -// is not PFR_OK, the job is incomplete, and FindInitialPartitions must be -// called again after cleaning up the partly done work. -PartitionFindResult StrokeWidth::FindInitialPartitions( - PageSegMode pageseg_mode, const FCOORD& rerotation, bool find_problems, - TO_BLOCK* block, BLOBNBOX_LIST* diacritic_blobs, - ColPartitionGrid* part_grid, ColPartition_LIST* big_parts, - FCOORD* skew_angle) { - if (!FindingHorizontalOnly(pageseg_mode)) FindVerticalTextChains(part_grid); - if (!FindingVerticalOnly(pageseg_mode)) FindHorizontalTextChains(part_grid); - if (textord_tabfind_show_strokewidths) { - chains_win_ = MakeWindow(0, 400, "Initial text chains"); - part_grid->DisplayBoxes(chains_win_); - projection_->DisplayProjection(); - } - if (find_problems) { - // TODO(rays) Do something to find skew, set skew_angle and return if there - // is some. - } - part_grid->SplitOverlappingPartitions(big_parts); - EasyMerges(part_grid); - RemoveLargeUnusedBlobs(block, part_grid, big_parts); - TBOX grid_box(bleft(), tright()); - while (part_grid->GridSmoothNeighbours(BTFT_CHAIN, nontext_map_, grid_box, - rerotation)); - while (part_grid->GridSmoothNeighbours(BTFT_NEIGHBOURS, nontext_map_, - grid_box, rerotation)); - int pre_overlap = part_grid->ComputeTotalOverlap(nullptr); - TestDiacritics(part_grid, block); - MergeDiacritics(block, part_grid); - if (find_problems && diacritic_blobs != nullptr && - DetectAndRemoveNoise(pre_overlap, grid_box, block, part_grid, - diacritic_blobs)) { - return PFR_NOISE; - } - if (textord_tabfind_show_strokewidths) { - textlines_win_ = MakeWindow(400, 400, "GoodTextline blobs"); - part_grid->DisplayBoxes(textlines_win_); - diacritics_win_ = DisplayDiacritics("Diacritics", 0, 0, block); - } - PartitionRemainingBlobs(pageseg_mode, part_grid); - part_grid->SplitOverlappingPartitions(big_parts); - EasyMerges(part_grid); - while (part_grid->GridSmoothNeighbours(BTFT_CHAIN, nontext_map_, grid_box, - rerotation)); - while (part_grid->GridSmoothNeighbours(BTFT_NEIGHBOURS, nontext_map_, - grid_box, rerotation)); - // Now eliminate strong stuff in a sea of the opposite. - while (part_grid->GridSmoothNeighbours(BTFT_STRONG_CHAIN, nontext_map_, - grid_box, rerotation)); - if (textord_tabfind_show_strokewidths) { - smoothed_win_ = MakeWindow(800, 400, "Smoothed blobs"); - part_grid->DisplayBoxes(smoothed_win_); - } - return PFR_OK; -} - -// Detects noise by a significant increase in partition overlap from -// pre_overlap to now, and removes noise from the union of all the overlapping -// partitions, placing the blobs in diacritic_blobs. Returns true if any noise -// was found and removed. -bool StrokeWidth::DetectAndRemoveNoise(int pre_overlap, const TBOX& grid_box, - TO_BLOCK* block, - ColPartitionGrid* part_grid, - BLOBNBOX_LIST* diacritic_blobs) { - ColPartitionGrid* noise_grid = nullptr; - int post_overlap = part_grid->ComputeTotalOverlap(&noise_grid); - if (pre_overlap == 0) pre_overlap = 1; - BLOBNBOX_IT diacritic_it(diacritic_blobs); - if (noise_grid != nullptr) { - if (post_overlap > pre_overlap * kNoiseOverlapGrowthFactor && - post_overlap > grid_box.area() * kNoiseOverlapAreaFactor) { - // This is noisy enough to fix. - if (textord_tabfind_show_strokewidths) { - ScrollView* noise_win = MakeWindow(1000, 500, "Noise Areas"); - noise_grid->DisplayBoxes(noise_win); - } - part_grid->DeleteNonLeaderParts(); - BLOBNBOX_IT blob_it(&block->noise_blobs); - ColPartitionGridSearch rsearch(noise_grid); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - blob->ClearNeighbours(); - if (!blob->IsDiacritic() || blob->owner() != nullptr) - continue; // Not a noise candidate. - TBOX blob_box(blob->bounding_box()); - TBOX search_box(blob->bounding_box()); - search_box.pad(gridsize(), gridsize()); - rsearch.StartRectSearch(search_box); - ColPartition* part = rsearch.NextRectSearch(); - if (part != nullptr) { - // Consider blob as possible noise. - blob->set_owns_cblob(true); - blob->compute_bounding_box(); - diacritic_it.add_after_then_move(blob_it.extract()); - } - } - noise_grid->DeleteParts(); - delete noise_grid; - return true; - } - noise_grid->DeleteParts(); - delete noise_grid; - } - return false; -} - -// Helper verifies that blob's neighbour in direction dir is good to add to a -// vertical text chain by returning the neighbour if it is not null, not owned, -// and not uniquely horizontal, as well as its neighbour in the opposite -// direction is blob. -static BLOBNBOX* MutualUnusedVNeighbour(const BLOBNBOX* blob, - BlobNeighbourDir dir) { - BLOBNBOX* next_blob = blob->neighbour(dir); - if (next_blob == nullptr || next_blob->owner() != nullptr || - next_blob->UniquelyHorizontal()) - return nullptr; - if (next_blob->neighbour(DirOtherWay(dir)) == blob) - return next_blob; - return nullptr; -} - -// Finds vertical chains of text-like blobs and puts them in ColPartitions. -void StrokeWidth::FindVerticalTextChains(ColPartitionGrid* part_grid) { - // A PageSegMode that forces vertical textlines with the current rotation. - PageSegMode pageseg_mode = - rerotation_.y() == 0.0f ? PSM_SINGLE_BLOCK_VERT_TEXT : PSM_SINGLE_COLUMN; - BlobGridSearch gsearch(this); - BLOBNBOX* bbox; - gsearch.StartFullSearch(); - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - // Only process boxes that have no horizontal hope and have not yet - // been included in a chain. - BLOBNBOX* blob; - if (bbox->owner() == nullptr && bbox->UniquelyVertical() && - (blob = MutualUnusedVNeighbour(bbox, BND_ABOVE)) != nullptr) { - // Put all the linked blobs into a ColPartition. - ColPartition* part = new ColPartition(BRT_VERT_TEXT, ICOORD(0, 1)); - part->AddBox(bbox); - while (blob != nullptr) { - part->AddBox(blob); - blob = MutualUnusedVNeighbour(blob, BND_ABOVE); - } - blob = MutualUnusedVNeighbour(bbox, BND_BELOW); - while (blob != nullptr) { - part->AddBox(blob); - blob = MutualUnusedVNeighbour(blob, BND_BELOW); - } - CompletePartition(pageseg_mode, part, part_grid); - } - } -} - -// Helper verifies that blob's neighbour in direction dir is good to add to a -// horizontal text chain by returning the neighbour if it is not null, not -// owned, and not uniquely vertical, as well as its neighbour in the opposite -// direction is blob. -static BLOBNBOX* MutualUnusedHNeighbour(const BLOBNBOX* blob, - BlobNeighbourDir dir) { - BLOBNBOX* next_blob = blob->neighbour(dir); - if (next_blob == nullptr || next_blob->owner() != nullptr || - next_blob->UniquelyVertical()) - return nullptr; - if (next_blob->neighbour(DirOtherWay(dir)) == blob) - return next_blob; - return nullptr; -} - -// Finds horizontal chains of text-like blobs and puts them in ColPartitions. -void StrokeWidth::FindHorizontalTextChains(ColPartitionGrid* part_grid) { - // A PageSegMode that forces horizontal textlines with the current rotation. - PageSegMode pageseg_mode = - rerotation_.y() == 0.0f ? PSM_SINGLE_COLUMN : PSM_SINGLE_BLOCK_VERT_TEXT; - BlobGridSearch gsearch(this); - BLOBNBOX* bbox; - gsearch.StartFullSearch(); - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - BLOBNBOX* blob; - if (bbox->owner() == nullptr && bbox->UniquelyHorizontal() && - (blob = MutualUnusedHNeighbour(bbox, BND_RIGHT)) != nullptr) { - // Put all the linked blobs into a ColPartition. - ColPartition* part = new ColPartition(BRT_TEXT, ICOORD(0, 1)); - part->AddBox(bbox); - while (blob != nullptr) { - part->AddBox(blob); - blob = MutualUnusedHNeighbour(blob, BND_RIGHT); - } - blob = MutualUnusedHNeighbour(bbox, BND_LEFT); - while (blob != nullptr) { - part->AddBox(blob); - blob = MutualUnusedVNeighbour(blob, BND_LEFT); - } - CompletePartition(pageseg_mode, part, part_grid); - } - } -} - -// Finds diacritics and saves their base character in the blob. -// The objective is to move all diacritics to the noise_blobs list, so -// they don't mess up early textline finding/merging, or force splits -// on textlines that overlap a bit. Blobs that become diacritics must be -// either part of no ColPartition (nullptr owner) or in a small partition in -// which ALL the blobs are diacritics, in which case the partition is -// exploded (deleted) back to its blobs. -void StrokeWidth::TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block) { - BlobGrid small_grid(gridsize(), bleft(), tright()); - small_grid.InsertBlobList(&block->noise_blobs); - small_grid.InsertBlobList(&block->blobs); - int medium_diacritics = 0; - int small_diacritics = 0; - BLOBNBOX_IT small_it(&block->noise_blobs); - for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) { - BLOBNBOX* blob = small_it.data(); - if (blob->owner() == nullptr && !blob->IsDiacritic() && - DiacriticBlob(&small_grid, blob)) { - ++small_diacritics; - } - } - BLOBNBOX_IT blob_it(&block->blobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - if (blob->IsDiacritic()) { - small_it.add_to_end(blob_it.extract()); - continue; // Already a diacritic. - } - ColPartition* part = blob->owner(); - if (part == nullptr && DiacriticBlob(&small_grid, blob)) { - ++medium_diacritics; - RemoveBBox(blob); - small_it.add_to_end(blob_it.extract()); - } else if (part != nullptr && !part->block_owned() && - part->boxes_count() < 3) { - // We allow blobs in small partitions to become diacritics if ALL the - // blobs in the partition qualify as we can then cleanly delete the - // partition, turn all the blobs in it to diacritics and they can be - // merged into the base character partition more easily than merging - // the partitions. - BLOBNBOX_C_IT box_it(part->boxes()); - for (box_it.mark_cycle_pt(); !box_it.cycled_list() && - DiacriticBlob(&small_grid, box_it.data()); - box_it.forward()); - if (box_it.cycled_list()) { - // They are all good. - while (!box_it.empty()) { - // Liberate the blob from its partition so it can be treated - // as a diacritic and merged explicitly with the base part. - // The blob is really owned by the block. The partition "owner" - // is nulled to allow the blob to get merged with its base character - // partition. - BLOBNBOX* box = box_it.extract(); - box->set_owner(nullptr); - box_it.forward(); - ++medium_diacritics; - // We remove the blob from the grid so it isn't found by subsequent - // searches where we might not want to include diacritics. - RemoveBBox(box); - } - // We only move the one blob to the small list here, but the others - // all get moved by the test at the top of the loop. - small_it.add_to_end(blob_it.extract()); - part_grid->RemoveBBox(part); - delete part; - } - } else if (AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(), - blob->bounding_box().bottom())) { - tprintf("Blob not available to be a diacritic at:"); - blob->bounding_box().print(); - } - } - if (textord_tabfind_show_strokewidths) { - tprintf("Found %d small diacritics, %d medium\n", - small_diacritics, medium_diacritics); - } -} - -// Searches this grid for an appropriately close and sized neighbour of the -// given [small] blob. If such a blob is found, the diacritic base is saved -// in the blob and true is returned. -// The small_grid is a secondary grid that contains the small/noise objects -// that are not in this grid, but may be useful for determining a connection -// between blob and its potential base character. (See DiacriticXGapFilled.) -bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob) { - if (BLOBNBOX::UnMergeableType(blob->region_type()) || - blob->region_type() == BRT_VERT_TEXT) - return false; - TBOX small_box(blob->bounding_box()); - bool debug = AlignedBlob::WithinTestRegion(2, small_box.left(), - small_box.bottom()); - if (debug) { - tprintf("Testing blob for diacriticness at:"); - small_box.print(); - } - int x = (small_box.left() + small_box.right()) / 2; - int y = (small_box.bottom() + small_box.top()) / 2; - int grid_x, grid_y; - GridCoords(x, y, &grid_x, &grid_y); - int height = small_box.height(); - // Setup a rectangle search to find its nearest base-character neighbour. - // We keep 2 different best candidates: - // best_x_overlap is a category of base characters that have an overlap in x - // (like a acute) in which we look for the least y-gap, computed using the - // projection to favor base characters in the same textline. - // best_y_overlap is a category of base characters that have no x overlap, - // (nominally a y-overlap is preferrecd but not essential) in which we - // look for the least weighted sum of x-gap and y-gap, with x-gap getting - // a lower weight to catch quotes at the end of a textline. - // NOTE that x-gap and y-gap are measured from the nearest side of the base - // character to the FARTHEST side of the diacritic to allow small diacritics - // to be a reasonable distance away, but not big diacritics. - BLOBNBOX* best_x_overlap = nullptr; - BLOBNBOX* best_y_overlap = nullptr; - int best_total_dist = 0; - int best_y_gap = 0; - TBOX best_xbox; - // TODO(rays) the search box could be setup using the projection as a guide. - TBOX search_box(small_box); - int x_pad = IntCastRounded(gridsize() * kDiacriticXPadRatio); - int y_pad = IntCastRounded(gridsize() * kDiacriticYPadRatio); - search_box.pad(x_pad, y_pad); - BlobGridSearch rsearch(this); - rsearch.SetUniqueMode(true); - int min_height = height * kMinDiacriticSizeRatio; - rsearch.StartRectSearch(search_box); - BLOBNBOX* neighbour; - while ((neighbour = rsearch.NextRectSearch()) != nullptr) { - if (BLOBNBOX::UnMergeableType(neighbour->region_type()) || - neighbour == blob || neighbour->owner() == blob->owner()) - continue; - TBOX nbox = neighbour->bounding_box(); - if (neighbour->owner() == nullptr || neighbour->owner()->IsVerticalType() || - (neighbour->flow() != BTFT_CHAIN && - neighbour->flow() != BTFT_STRONG_CHAIN)) { - if (debug) { - tprintf("Neighbour not strong enough:"); - nbox.print(); - } - continue; // Diacritics must be attached to strong text. - } - if (nbox.height() < min_height) { - if (debug) { - tprintf("Neighbour not big enough:"); - nbox.print(); - } - continue; // Too small to be the base character. - } - int x_gap = small_box.x_gap(nbox); - int y_gap = small_box.y_gap(nbox); - int total_distance = projection_->DistanceOfBoxFromBox(small_box, nbox, - true, denorm_, - debug); - if (debug) tprintf("xgap=%d, y=%d, total dist=%d\n", - x_gap, y_gap, total_distance); - if (total_distance > - neighbour->owner()->median_height() * kMaxDiacriticDistanceRatio) { - if (debug) { - tprintf("Neighbour with median size %d too far away:", - neighbour->owner()->median_height()); - neighbour->bounding_box().print(); - } - continue; // Diacritics must not be too distant. - } - if (x_gap <= 0) { - if (debug) { - tprintf("Computing reduced box for :"); - nbox.print(); - } - int left = small_box.left() - small_box.width(); - int right = small_box.right() + small_box.width(); - nbox = neighbour->BoundsWithinLimits(left, right); - y_gap = small_box.y_gap(nbox); - if (best_x_overlap == nullptr || y_gap < best_y_gap) { - best_x_overlap = neighbour; - best_xbox = nbox; - best_y_gap = y_gap; - if (debug) { - tprintf("New best:"); - nbox.print(); - } - } else if (debug) { - tprintf("Shrunken box doesn't win:"); - nbox.print(); - } - } else if (blob->ConfirmNoTabViolation(*neighbour)) { - if (best_y_overlap == nullptr || total_distance < best_total_dist) { - if (debug) { - tprintf("New best y overlap:"); - nbox.print(); - } - best_y_overlap = neighbour; - best_total_dist = total_distance; - } else if (debug) { - tprintf("New y overlap box doesn't win:"); - nbox.print(); - } - } else if (debug) { - tprintf("Neighbour wrong side of a tab:"); - nbox.print(); - } - } - if (best_x_overlap != nullptr && - (best_y_overlap == nullptr || - best_xbox.major_y_overlap(best_y_overlap->bounding_box()))) { - blob->set_diacritic_box(best_xbox); - blob->set_base_char_blob(best_x_overlap); - if (debug) { - tprintf("DiacriticBlob OK! (x-overlap:"); - small_box.print(); - best_xbox.print(); - } - return true; - } - if (best_y_overlap != nullptr && - DiacriticXGapFilled(small_grid, small_box, - best_y_overlap->bounding_box()) && - NoNoiseInBetween(small_box, best_y_overlap->bounding_box())) { - blob->set_diacritic_box(best_y_overlap->bounding_box()); - blob->set_base_char_blob(best_y_overlap); - if (debug) { - tprintf("DiacriticBlob OK! (y-overlap:"); - small_box.print(); - best_y_overlap->bounding_box().print(); - } - return true; - } - if (debug) { - tprintf("DiacriticBlob fails:"); - small_box.print(); - tprintf("Best x+y gap = %d, y = %d\n", best_total_dist, best_y_gap); - if (best_y_overlap != nullptr) { - tprintf("XGapFilled=%d, NoiseBetween=%d\n", - DiacriticXGapFilled(small_grid, small_box, - best_y_overlap->bounding_box()), - NoNoiseInBetween(small_box, best_y_overlap->bounding_box())); - } - } - return false; -} - -// Returns true if there is no gap between the base char and the diacritic -// bigger than a fraction of the height of the base char: -// Eg: line end.....' -// The quote is a long way from the end of the line, yet it needs to be a -// diacritic. To determine that the quote is not part of an image, or -// a different text block, we check for other marks in the gap between -// the base char and the diacritic. -// '<--Diacritic -// |---------| -// | |<-toobig-gap-> -// | Base | -// |---------| x<-----Dot occupying gap -// The grid is const really. -bool StrokeWidth::DiacriticXGapFilled(BlobGrid* grid, - const TBOX& diacritic_box, - const TBOX& base_box) { - // Since most gaps are small, use an iterative algorithm to search the gap. - int max_gap = IntCastRounded(base_box.height() * - kMaxDiacriticGapToBaseCharHeight); - TBOX occupied_box(base_box); - int diacritic_gap; - while ((diacritic_gap = diacritic_box.x_gap(occupied_box)) > max_gap) { - TBOX search_box(occupied_box); - if (diacritic_box.left() > search_box.right()) { - // We are looking right. - search_box.set_left(search_box.right()); - search_box.set_right(search_box.left() + max_gap); - } else { - // We are looking left. - search_box.set_right(search_box.left()); - search_box.set_left(search_box.left() - max_gap); - } - BlobGridSearch rsearch(grid); - rsearch.StartRectSearch(search_box); - BLOBNBOX* neighbour; - while ((neighbour = rsearch.NextRectSearch()) != nullptr) { - const TBOX& nbox = neighbour->bounding_box(); - if (nbox.x_gap(diacritic_box) < diacritic_gap) { - if (nbox.left() < occupied_box.left()) - occupied_box.set_left(nbox.left()); - if (nbox.right() > occupied_box.right()) - occupied_box.set_right(nbox.right()); - break; - } - } - if (neighbour == nullptr) - return false; // Found a big gap. - } - return true; // The gap was filled. -} - -// Merges diacritics with the ColPartition of the base character blob. -void StrokeWidth::MergeDiacritics(TO_BLOCK* block, - ColPartitionGrid* part_grid) { - BLOBNBOX_IT small_it(&block->noise_blobs); - for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) { - BLOBNBOX* blob = small_it.data(); - if (blob->base_char_blob() != nullptr) { - ColPartition* part = blob->base_char_blob()->owner(); - // The base character must be owned by a partition and that partition - // must not be on the big_parts list (not block owned). - if (part != nullptr && !part->block_owned() && blob->owner() == nullptr && - blob->IsDiacritic()) { - // The partition has to be removed from the grid and reinserted - // because its bounding box may change. - part_grid->RemoveBBox(part); - part->AddBox(blob); - blob->set_region_type(part->blob_type()); - blob->set_flow(part->flow()); - blob->set_owner(part); - part_grid->InsertBBox(true, true, part); - } - // Set all base chars to nullptr before any blobs get deleted. - blob->set_base_char_blob(nullptr); - } - } -} - -// Any blobs on the large_blobs list of block that are still unowned by a -// ColPartition, are probably drop-cap or vertically touching so the blobs -// are removed to the big_parts list and treated separately. -void StrokeWidth::RemoveLargeUnusedBlobs(TO_BLOCK* block, - ColPartitionGrid* part_grid, - ColPartition_LIST* big_parts) { - BLOBNBOX_IT large_it(&block->large_blobs); - for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) { - BLOBNBOX* blob = large_it.data(); - ColPartition* big_part = blob->owner(); - if (big_part == nullptr) { - // Large blobs should have gone into partitions by now if they are - // genuine characters, so move any unowned ones out to the big parts - // list. This will include drop caps and vertically touching characters. - ColPartition::MakeBigPartition(blob, big_parts); - } - } -} - -// All remaining unused blobs are put in individual ColPartitions. -void StrokeWidth::PartitionRemainingBlobs(PageSegMode pageseg_mode, - ColPartitionGrid* part_grid) { - BlobGridSearch gsearch(this); - BLOBNBOX* bbox; - int prev_grid_x = -1; - int prev_grid_y = -1; - BLOBNBOX_CLIST cell_list; - BLOBNBOX_C_IT cell_it(&cell_list); - bool cell_all_noise = true; - gsearch.StartFullSearch(); - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - int grid_x = gsearch.GridX(); - int grid_y = gsearch.GridY(); - if (grid_x != prev_grid_x || grid_y != prev_grid_y) { - // New cell. Process old cell. - MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid, - &cell_list); - cell_it.set_to_list(&cell_list); - prev_grid_x = grid_x; - prev_grid_y = grid_y; - cell_all_noise = true; - } - if (bbox->owner() == nullptr) { - cell_it.add_to_end(bbox); - if (bbox->flow() != BTFT_NONTEXT) - cell_all_noise = false; - } else { - cell_all_noise = false; - } - } - MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid, - &cell_list); -} - -// If combine, put all blobs in the cell_list into a single partition, otherwise -// put each one into its own partition. -void StrokeWidth::MakePartitionsFromCellList(PageSegMode pageseg_mode, - bool combine, - ColPartitionGrid* part_grid, - BLOBNBOX_CLIST* cell_list) { - if (cell_list->empty()) - return; - BLOBNBOX_C_IT cell_it(cell_list); - if (combine) { - BLOBNBOX* bbox = cell_it.extract(); - ColPartition* part = new ColPartition(bbox->region_type(), ICOORD(0, 1)); - part->AddBox(bbox); - part->set_flow(bbox->flow()); - for (cell_it.forward(); !cell_it.empty(); cell_it.forward()) { - part->AddBox(cell_it.extract()); - } - CompletePartition(pageseg_mode, part, part_grid); - } else { - for (; !cell_it.empty(); cell_it.forward()) { - BLOBNBOX* bbox = cell_it.extract(); - ColPartition* part = new ColPartition(bbox->region_type(), ICOORD(0, 1)); - part->set_flow(bbox->flow()); - part->AddBox(bbox); - CompletePartition(pageseg_mode, part, part_grid); - } - } -} - -// Helper function to finish setting up a ColPartition and insert into -// part_grid. -void StrokeWidth::CompletePartition(PageSegMode pageseg_mode, - ColPartition* part, - ColPartitionGrid* part_grid) { - part->ComputeLimits(); - TBOX box = part->bounding_box(); - bool debug = AlignedBlob::WithinTestRegion(2, box.left(), - box.bottom()); - int value = projection_->EvaluateColPartition(*part, denorm_, debug); - // Override value if pageseg_mode disagrees. - if (value > 0 && FindingVerticalOnly(pageseg_mode)) { - value = part->boxes_count() == 1 ? 0 : -2; - } else if (value < 0 && FindingHorizontalOnly(pageseg_mode)) { - value = part->boxes_count() == 1 ? 0 : 2; - } - part->SetRegionAndFlowTypesFromProjectionValue(value); - part->ClaimBoxes(); - part_grid->InsertBBox(true, true, part); -} - -// Merge partitions where the merge appears harmless. -// As this -void StrokeWidth::EasyMerges(ColPartitionGrid* part_grid) { - part_grid->Merges( - NewPermanentTessCallback(this, &StrokeWidth::OrientationSearchBox), - NewPermanentTessCallback(this, &StrokeWidth::ConfirmEasyMerge)); -} - -// Compute a search box based on the orientation of the partition. -// Returns true if a suitable box can be calculated. -// Callback for EasyMerges. -bool StrokeWidth::OrientationSearchBox(ColPartition* part, TBOX* box) { - if (part->IsVerticalType()) { - box->set_top(box->top() + box->width()); - box->set_bottom(box->bottom() - box->width()); - } else { - box->set_left(box->left() - box->height()); - box->set_right(box->right() + box->height()); - } - return true; -} - -// Merge confirmation callback for EasyMerges. -bool StrokeWidth::ConfirmEasyMerge(const ColPartition* p1, - const ColPartition* p2) { - ASSERT_HOST(p1 != nullptr && p2 != nullptr); - ASSERT_HOST(!p1->IsEmpty() && !p2->IsEmpty()); - if ((p1->flow() == BTFT_NONTEXT && p2->flow() >= BTFT_CHAIN) || - (p1->flow() >= BTFT_CHAIN && p2->flow() == BTFT_NONTEXT)) - return false; // Don't merge confirmed image with text. - if ((p1->IsVerticalType() || p2->IsVerticalType()) && - p1->HCoreOverlap(*p2) <= 0 && - ((!p1->IsSingleton() && - !p2->IsSingleton()) || - !p1->bounding_box().major_overlap(p2->bounding_box()))) - return false; // Overlap must be in the text line. - if ((p1->IsHorizontalType() || p2->IsHorizontalType()) && - p1->VCoreOverlap(*p2) <= 0 && - ((!p1->IsSingleton() && - !p2->IsSingleton()) || - (!p1->bounding_box().major_overlap(p2->bounding_box()) && - !p1->OKDiacriticMerge(*p2, false) && - !p2->OKDiacriticMerge(*p1, false)))) - return false; // Overlap must be in the text line. - if (!p1->ConfirmNoTabViolation(*p2)) - return false; - if (p1->flow() <= BTFT_NONTEXT && p2->flow() <= BTFT_NONTEXT) - return true; - return NoNoiseInBetween(p1->bounding_box(), p2->bounding_box()); -} - -// Returns true if there is no significant noise in between the boxes. -bool StrokeWidth::NoNoiseInBetween(const TBOX& box1, const TBOX& box2) const { - return ImageFind::BlankImageInBetween(box1, box2, grid_box_, rerotation_, - nontext_map_); -} - -/** Displays the blobs colored according to the number of good neighbours - * and the vertical/horizontal flow. - */ -ScrollView* StrokeWidth::DisplayGoodBlobs(const char* window_name, - int x, int y) { - ScrollView* window = nullptr; -#ifndef GRAPHICS_DISABLED - window = MakeWindow(x, y, window_name); - // For every blob in the grid, display it. - window->Brush(ScrollView::NONE); - - // For every bbox in the grid, display it. - BlobGridSearch gsearch(this); - gsearch.StartFullSearch(); - BLOBNBOX* bbox; - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - const TBOX& box = bbox->bounding_box(); - int left_x = box.left(); - int right_x = box.right(); - int top_y = box.top(); - int bottom_y = box.bottom(); - int goodness = bbox->GoodTextBlob(); - BlobRegionType blob_type = bbox->region_type(); - if (bbox->UniquelyVertical()) - blob_type = BRT_VERT_TEXT; - if (bbox->UniquelyHorizontal()) - blob_type = BRT_TEXT; - BlobTextFlowType flow = bbox->flow(); - if (flow == BTFT_NONE) { - if (goodness == 0) - flow = BTFT_NEIGHBOURS; - else if (goodness == 1) - flow = BTFT_CHAIN; - else - flow = BTFT_STRONG_CHAIN; - } - window->Pen(BLOBNBOX::TextlineColor(blob_type, flow)); - window->Rectangle(left_x, bottom_y, right_x, top_y); - } - window->Update(); -#endif - return window; -} - -static void DrawDiacriticJoiner(const BLOBNBOX* blob, ScrollView* window) { -#ifndef GRAPHICS_DISABLED - const TBOX& blob_box(blob->bounding_box()); - int top = std::max(static_cast(blob_box.top()), blob->base_char_top()); - int bottom = std::min(static_cast(blob_box.bottom()), blob->base_char_bottom()); - int x = (blob_box.left() + blob_box.right()) / 2; - window->Line(x, top, x, bottom); -#endif // GRAPHICS_DISABLED -} - -// Displays blobs colored according to whether or not they are diacritics. -ScrollView* StrokeWidth::DisplayDiacritics(const char* window_name, - int x, int y, TO_BLOCK* block) { - ScrollView* window = nullptr; -#ifndef GRAPHICS_DISABLED - window = MakeWindow(x, y, window_name); - // For every blob in the grid, display it. - window->Brush(ScrollView::NONE); - - BLOBNBOX_IT it(&block->blobs); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - if (blob->IsDiacritic()) { - window->Pen(ScrollView::GREEN); - DrawDiacriticJoiner(blob, window); - } else { - window->Pen(blob->BoxColor()); - } - const TBOX& box = blob->bounding_box(); - window->Rectangle(box.left(), box. bottom(), box.right(), box.top()); - } - it.set_to_list(&block->noise_blobs); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - if (blob->IsDiacritic()) { - window->Pen(ScrollView::GREEN); - DrawDiacriticJoiner(blob, window); - } else { - window->Pen(ScrollView::WHITE); - } - const TBOX& box = blob->bounding_box(); - window->Rectangle(box.left(), box. bottom(), box.right(), box.top()); - } - window->Update(); -#endif - return window; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/strokewidth.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/strokewidth.h deleted file mode 100644 index 497ffaeb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/strokewidth.h +++ /dev/null @@ -1,355 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: strokewidth.h -// Description: Subclass of BBGrid to find uniformity of strokewidth. -// Author: Ray Smith -// Created: Mon Mar 31 16:17:01 PST 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_STROKEWIDTH_H_ -#define TESSERACT_TEXTORD_STROKEWIDTH_H_ - -#include "blobbox.h" // BlobNeighourDir. -#include "blobgrid.h" // Base class. -#include "colpartitiongrid.h" -#include "textlineprojection.h" - -class DENORM; -class ScrollView; -class TO_BLOCK; - -namespace tesseract { - -class ColPartition_LIST; -class TabFind; -class TextlineProjection; - -// Misc enums to clarify bool arguments for direction-controlling args. -enum LeftOrRight { - LR_LEFT, - LR_RIGHT -}; - -// Return value from FindInitialPartitions indicates detection of severe -// skew or noise. -enum PartitionFindResult { - PFR_OK, // Everything is OK. - PFR_SKEW, // Skew was detected and rotated. - PFR_NOISE // Noise was detected and removed. -}; - -/** - * The StrokeWidth class holds all the normal and large blobs. - * It is used to find good large blobs and move them to the normal blobs - * by virtue of having a reasonable strokewidth compatible neighbour. - */ -class StrokeWidth : public BlobGrid { - public: - StrokeWidth(int gridsize, const ICOORD& bleft, const ICOORD& tright); - virtual ~StrokeWidth(); - - // Sets the neighbours member of the medium-sized blobs in the block. - // Searches on 4 sides of each blob for similar-sized, similar-strokewidth - // blobs and sets pointers to the good neighbours. - void SetNeighboursOnMediumBlobs(TO_BLOCK* block); - - // Sets the neighbour/textline writing direction members of the medium - // and large blobs with optional repair of broken CJK characters first. - // Repair of broken CJK is needed here because broken CJK characters - // can fool the textline direction detection algorithm. - void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, - bool cjk_merge, - TO_BLOCK* input_block); - - // To save computation, the process of generating partitions is broken - // into the following 4 steps: - // TestVerticalTextDirection - // CorrectForRotation (used only if a rotation is to be applied) - // FindLeaderPartitions - // GradeBlobsIntoPartitions. - // These functions are all required, in sequence, except for - // CorrectForRotation, which is not needed if no rotation is applied. - - // Types all the blobs as vertical or horizontal text or unknown and - // returns true if the majority are vertical. - // If the blobs are rotated, it is necessary to call CorrectForRotation - // after rotating everything, otherwise the work done here will be enough. - // If osd_blobs is not null, a list of blobs from the dominant textline - // direction are returned for use in orientation and script detection. - // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio. - bool TestVerticalTextDirection(double find_vertical_text_ratio, - TO_BLOCK* block, - BLOBNBOX_CLIST* osd_blobs); - - // Corrects the data structures for the given rotation. - void CorrectForRotation(const FCOORD& rerotation, - ColPartitionGrid* part_grid); - - // Finds leader partitions and inserts them into the give grid. - void FindLeaderPartitions(TO_BLOCK* block, - ColPartitionGrid* part_grid); - - // Finds and marks noise those blobs that look like bits of vertical lines - // that would otherwise screw up layout analysis. - void RemoveLineResidue(ColPartition_LIST* big_part_list); - - // Types all the blobs as vertical text or horizontal text or unknown and - // puts them into initial ColPartitions in the supplied part_grid. - // rerotation determines how to get back to the image coordinates from the - // blob coordinates (since they may have been rotated for vertical text). - // block is the single block for the whole page or rectangle to be OCRed. - // nontext_pix (full-size), is a binary mask used to prevent merges across - // photo/text boundaries. It is not kept beyond this function. - // denorm provides a mapping back to the image from the current blob - // coordinate space. - // projection provides a measure of textline density over the image and - // provides functions to assist with diacritic detection. It should be a - // pointer to a new TextlineProjection, and will be setup here. - // part_grid is the output grid of textline partitions. - // Large blobs that cause overlap are put in separate partitions and added - // to the big_parts list. - void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, - const FCOORD& rerotation, TO_BLOCK* block, - Pix* nontext_pix, const DENORM* denorm, - bool cjk_script, TextlineProjection* projection, - BLOBNBOX_LIST* diacritic_blobs, - ColPartitionGrid* part_grid, - ColPartition_LIST* big_parts); - - // Handles a click event in a display window. - virtual void HandleClick(int x, int y); - - private: - // Computes the noise_density_ by summing the number of elements in a - // neighbourhood of each grid cell. - void ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid); - - // Detects and marks leader dots/dashes. - // Leaders are horizontal chains of small or noise blobs that look - // monospace according to ColPartition::MarkAsLeaderIfMonospaced(). - // Detected leaders become the only occupants of the block->small_blobs list. - // Non-leader small blobs get moved to the blobs list. - // Non-leader noise blobs remain singletons in the noise list. - // All small and noise blobs in high density regions are marked BTFT_NONTEXT. - // block is the single block for the whole page or rectangle to be OCRed. - // leader_parts is the output. - void FindLeadersAndMarkNoise(TO_BLOCK* block, - ColPartition_LIST* leader_parts); - - /** Inserts the block blobs (normal and large) into this grid. - * Blobs remain owned by the block. */ - void InsertBlobs(TO_BLOCK* block); - - // Fix broken CJK characters, using the fake joined blobs mechanism. - // Blobs are really merged, ie the master takes all the outlines and the - // others are deleted. - // Returns true if sufficient blobs are merged that it may be worth running - // again, due to a better estimate of character size. - bool FixBrokenCJK(TO_BLOCK* block); - - // Collect blobs that overlap or are within max_dist of the input bbox. - // Return them in the list of blobs and expand the bbox to be the union - // of all the boxes. not_this is excluded from the search, as are blobs - // that cause the merged box to exceed max_size in either dimension. - void AccumulateOverlaps(const BLOBNBOX* not_this, bool debug, - int max_size, int max_dist, - TBOX* bbox, BLOBNBOX_CLIST* blobs); - - // For each blob in this grid, Finds the textline direction to be horizontal - // or vertical according to distance to neighbours and 1st and 2nd order - // neighbours. Non-text tends to end up without a definite direction. - // Result is setting of the neighbours and vert_possible/horz_possible - // flags in the BLOBNBOXes currently in this grid. - // This function is called more than once if page orientation is uncertain, - // so display_if_debugging is true on the final call to display the results. - void FindTextlineFlowDirection(PageSegMode pageseg_mode, - bool display_if_debugging); - - // Sets the neighbours and good_stroke_neighbours members of the blob by - // searching close on all 4 sides. - // When finding leader dots/dashes, there is a slightly different rule for - // what makes a good neighbour. - // If activate_line_trap, then line-like objects are found and isolated. - void SetNeighbours(bool leaders, bool activate_line_trap, BLOBNBOX* blob); - - // Sets the good_stroke_neighbours member of the blob if it has a - // GoodNeighbour on the given side. - // Also sets the neighbour in the blob, whether or not a good one is found. - // Return value is the number of neighbours in the line trap size range. - // Leaders get extra special lenient treatment. - int FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX* blob); - - // Makes the blob to be only horizontal or vertical where evidence - // is clear based on gaps of 2nd order neighbours. - void SetNeighbourFlows(BLOBNBOX* blob); - - // Nullify the neighbours in the wrong directions where the direction - // is clear-cut based on a distance margin. Good for isolating vertical - // text from neighbouring horizontal text. - void SimplifyObviousNeighbours(BLOBNBOX* blob); - - // Smoothes the vertical/horizontal type of the blob based on the - // 2nd-order neighbours. If reset_all is true, then all blobs are - // changed. Otherwise, only ambiguous blobs are processed. - void SmoothNeighbourTypes(PageSegMode pageseg_mode, bool desperate, - BLOBNBOX* blob); - - // Checks the left or right side of the given leader partition and sets the - // (opposite) leader_on_right or leader_on_left flags for blobs - // that are next to the given side of the given leader partition. - void MarkLeaderNeighbours(const ColPartition* part, LeftOrRight side); - - // Partition creation. Accumulates vertical and horizontal text chains, - // puts the remaining blobs in as unknowns, and then merges/splits to - // minimize overlap and smoothes the types with neighbours and the color - // image if provided. rerotation is used to rotate the coordinate space - // back to the nontext_map_ image. - // If find_problems is true, detects possible noise pollution by the amount - // of partition overlap that is created by the diacritics. If excessive, the - // noise is separated out into diacritic blobs, and PFR_NOISE is returned. - // [TODO(rays): if the partition overlap is caused by heavy skew, deskews - // the components, saves the skew_angle and returns PFR_SKEW.] If the return - // is not PFR_OK, the job is incomplete, and FindInitialPartitions must be - // called again after cleaning up the partly done work. - PartitionFindResult FindInitialPartitions(PageSegMode pageseg_mode, - const FCOORD& rerotation, - bool find_problems, TO_BLOCK* block, - BLOBNBOX_LIST* diacritic_blobs, - ColPartitionGrid* part_grid, - ColPartition_LIST* big_parts, - FCOORD* skew_angle); - // Detects noise by a significant increase in partition overlap from - // pre_overlap to now, and removes noise from the union of all the overlapping - // partitions, placing the blobs in diacritic_blobs. Returns true if any noise - // was found and removed. - bool DetectAndRemoveNoise(int pre_overlap, const TBOX& grid_box, - TO_BLOCK* block, ColPartitionGrid* part_grid, - BLOBNBOX_LIST* diacritic_blobs); - // Finds vertical chains of text-like blobs and puts them in ColPartitions. - void FindVerticalTextChains(ColPartitionGrid* part_grid); - // Finds horizontal chains of text-like blobs and puts them in ColPartitions. - void FindHorizontalTextChains(ColPartitionGrid* part_grid); - // Finds diacritics and saves their base character in the blob. - void TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block); - // Searches this grid for an appropriately close and sized neighbour of the - // given [small] blob. If such a blob is found, the diacritic base is saved - // in the blob and true is returned. - // The small_grid is a secondary grid that contains the small/noise objects - // that are not in this grid, but may be useful for determining a connection - // between blob and its potential base character. (See DiacriticXGapFilled.) - bool DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob); - // Returns true if there is no gap between the base char and the diacritic - // bigger than a fraction of the height of the base char: - // Eg: line end.....' - // The quote is a long way from the end of the line, yet it needs to be a - // diacritic. To determine that the quote is not part of an image, or - // a different text block, we check for other marks in the gap between - // the base char and the diacritic. - // '<--Diacritic - // |---------| - // | |<-toobig-gap-> - // | Base | - // |---------| x<-----Dot occupying gap - // The grid is const really. - bool DiacriticXGapFilled(BlobGrid* grid, const TBOX& diacritic_box, - const TBOX& base_box); - // Merges diacritics with the ColPartition of the base character blob. - void MergeDiacritics(TO_BLOCK* block, ColPartitionGrid* part_grid); - // Any blobs on the large_blobs list of block that are still unowned by a - // ColPartition, are probably drop-cap or vertically touching so the blobs - // are removed to the big_parts list and treated separately. - void RemoveLargeUnusedBlobs(TO_BLOCK* block, - ColPartitionGrid* part_grid, - ColPartition_LIST* big_parts); - - // All remaining unused blobs are put in individual ColPartitions. - void PartitionRemainingBlobs(PageSegMode pageseg_mode, - ColPartitionGrid* part_grid); - - // If combine, put all blobs in the cell_list into a single partition, - // otherwise put each one into its own partition. - void MakePartitionsFromCellList(PageSegMode pageseg_mode, bool combine, - ColPartitionGrid* part_grid, - BLOBNBOX_CLIST* cell_list); - - // Helper function to finish setting up a ColPartition and insert into - // part_grid. - void CompletePartition(PageSegMode pageseg_mode, ColPartition* part, - ColPartitionGrid* part_grid); - - // Helper returns true if we are looking only for vertical textlines, - // taking into account any rotation that has been done. - bool FindingVerticalOnly(PageSegMode pageseg_mode) const { - if (rerotation_.y() == 0.0f) { - return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT; - } - return !PSM_ORIENTATION_ENABLED(pageseg_mode) && - pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT; - } - // Helper returns true if we are looking only for horizontal textlines, - // taking into account any rotation that has been done. - bool FindingHorizontalOnly(PageSegMode pageseg_mode) const { - if (rerotation_.y() == 0.0f) { - return !PSM_ORIENTATION_ENABLED(pageseg_mode) && - pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT; - } - return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT; - } - - // Merge partitions where the merge appears harmless. - void EasyMerges(ColPartitionGrid* part_grid); - - // Compute a search box based on the orientation of the partition. - // Returns true if a suitable box can be calculated. - // Callback for EasyMerges. - bool OrientationSearchBox(ColPartition* part, TBOX* box); - - // Merge confirmation callback for EasyMerges. - bool ConfirmEasyMerge(const ColPartition* p1, const ColPartition* p2); - - // Returns true if there is no significant noise in between the boxes. - bool NoNoiseInBetween(const TBOX& box1, const TBOX& box2) const; - - // Displays the blobs colored according to the number of good neighbours - // and the vertical/horizontal flow. - ScrollView* DisplayGoodBlobs(const char* window_name, int x, int y); - - // Displays blobs colored according to whether or not they are diacritics. - ScrollView* DisplayDiacritics(const char* window_name, - int x, int y, TO_BLOCK* block); - - private: - // Image map of photo/noise areas on the page. Borrowed pointer (not owned.) - Pix* nontext_map_; - // Textline projection map. Borrowed pointer. - TextlineProjection* projection_; - // DENORM used by projection_ to get back to image coords. Borrowed pointer. - const DENORM* denorm_; - // Bounding box of the grid. - TBOX grid_box_; - // Rerotation to get back to the original image. - FCOORD rerotation_; - // Windows for debug display. - ScrollView* leaders_win_; - ScrollView* initial_widths_win_; - ScrollView* widths_win_; - ScrollView* chains_win_; - ScrollView* diacritics_win_; - ScrollView* textlines_win_; - ScrollView* smoothed_win_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_STROKEWIDTH_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tabfind.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tabfind.cpp deleted file mode 100644 index 8cc5d1f4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tabfind.cpp +++ /dev/null @@ -1,1431 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tabfind.cpp -// Description: Subclass of BBGrid to find vertically aligned blobs. -// Author: Ray Smith -// Created: Fri Mar 21 15:03:01 PST 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "tabfind.h" -#include "alignedblob.h" -#include "blobbox.h" -#include "colpartitiongrid.h" -#include "detlinefit.h" -#include "linefind.h" - -#include - -namespace tesseract { - -// Multiple of box size to search for initial gaps. -const int kTabRadiusFactor = 5; -// Min and Max multiple of height to search vertically when extrapolating. -const int kMinVerticalSearch = 3; -const int kMaxVerticalSearch = 12; -const int kMaxRaggedSearch = 25; -// Minimum number of lines in a column width to make it interesting. -const int kMinLinesInColumn = 10; -// Minimum width of a column to be interesting. -const int kMinColumnWidth = 200; -// Minimum fraction of total column lines for a column to be interesting. -const double kMinFractionalLinesInColumn = 0.125; -// Fraction of height used as alignment tolerance for aligned tabs. -const double kAlignedFraction = 0.03125; -// Maximum gutter width (in absolute inch) that we care about -const double kMaxGutterWidthAbsolute = 2.00; -// Multiplier of gridsize for min gutter width of TT_MAYBE_RAGGED blobs. -const int kRaggedGutterMultiple = 5; -// Min aspect ratio of tall objects to be considered a separator line. -// (These will be ignored in searching the gutter for obstructions.) -const double kLineFragmentAspectRatio = 10.0; -// Min number of points to accept after evaluation. -const int kMinEvaluatedTabs = 3; -// Up to 30 degrees is allowed for rotations of diacritic blobs. -// Keep this value slightly larger than kCosSmallAngle in blobbox.cpp -// so that the assert there never fails. -const double kCosMaxSkewAngle = 0.866025; - -BOOL_VAR(textord_tabfind_show_initialtabs, false, "Show tab candidates"); -BOOL_VAR(textord_tabfind_show_finaltabs, false, "Show tab vectors"); - -TabFind::TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright, - TabVector_LIST* vlines, int vertical_x, int vertical_y, - int resolution) - : AlignedBlob(gridsize, bleft, tright), - resolution_(resolution), - image_origin_(0, tright.y() - 1), - v_it_(&vectors_) { - width_cb_ = nullptr; - v_it_.add_list_after(vlines); - SetVerticalSkewAndParallelize(vertical_x, vertical_y); - width_cb_ = NewPermanentTessCallback(this, &TabFind::CommonWidth); -} - -TabFind::~TabFind() { - delete width_cb_; -} - -///////////////// PUBLIC functions (mostly used by TabVector). ////////////// - -// Insert a list of blobs into the given grid (not necessarily this). -// If take_ownership is true, then the blobs are removed from the source list. -// See InsertBlob for the other arguments. -// It would seem to make more sense to swap this and grid, but this way -// around allows grid to not be derived from TabFind, eg a ColPartitionGrid, -// while the grid that provides the tab stops(this) has to be derived from -// TabFind. -void TabFind::InsertBlobsToGrid(bool h_spread, bool v_spread, - BLOBNBOX_LIST* blobs, - BBGrid* grid) { - BLOBNBOX_IT blob_it(blobs); - int b_count = 0; - int reject_count = 0; - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); -// if (InsertBlob(true, true, blob, grid)) { - if (InsertBlob(h_spread, v_spread, blob, grid)) { - ++b_count; - } else { - ++reject_count; - } - } - if (textord_debug_tabfind) { - tprintf("Inserted %d blobs into grid, %d rejected.\n", - b_count, reject_count); - } -} - -// Insert a single blob into the given grid (not necessarily this). -// If h_spread, then all cells covered horizontally by the box are -// used, otherwise, just the bottom-left. Similarly for v_spread. -// A side effect is that the left and right rule edges of the blob are -// set according to the tab vectors in this (not grid). -bool TabFind::InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob, - BBGrid* grid) { - TBOX box = blob->bounding_box(); - blob->set_left_rule(LeftEdgeForBox(box, false, false)); - blob->set_right_rule(RightEdgeForBox(box, false, false)); - blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false)); - blob->set_right_crossing_rule(RightEdgeForBox(box, true, false)); - if (blob->joined_to_prev()) - return false; - grid->InsertBBox(h_spread, v_spread, blob); - return true; -} - -// Calls SetBlobRuleEdges for all the blobs in the given block. -void TabFind::SetBlockRuleEdges(TO_BLOCK* block) { - SetBlobRuleEdges(&block->blobs); - SetBlobRuleEdges(&block->small_blobs); - SetBlobRuleEdges(&block->noise_blobs); - SetBlobRuleEdges(&block->large_blobs); -} - -// Sets the left and right rule and crossing_rules for the blobs in the given -// list by fiding the next outermost tabvectors for each blob. -void TabFind::SetBlobRuleEdges(BLOBNBOX_LIST* blobs) { - BLOBNBOX_IT blob_it(blobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - TBOX box = blob->bounding_box(); - blob->set_left_rule(LeftEdgeForBox(box, false, false)); - blob->set_right_rule(RightEdgeForBox(box, false, false)); - blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false)); - blob->set_right_crossing_rule(RightEdgeForBox(box, true, false)); - } -} - -// Returns the gutter width of the given TabVector between the given y limits. -// Also returns x-shift to be added to the vector to clear any intersecting -// blobs. The shift is deducted from the returned gutter. -// If ignore_unmergeables is true, then blobs of UnMergeableType are -// ignored as if they don't exist. (Used for text on image.) -// max_gutter_width is used as the maximum width worth searching for in case -// there is nothing near the TabVector. -int TabFind::GutterWidth(int bottom_y, int top_y, const TabVector& v, - bool ignore_unmergeables, int max_gutter_width, - int* required_shift) { - bool right_to_left = v.IsLeftTab(); - int bottom_x = v.XAtY(bottom_y); - int top_x = v.XAtY(top_y); - int start_x = right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x); - BlobGridSearch sidesearch(this); - sidesearch.StartSideSearch(start_x, bottom_y, top_y); - int min_gap = max_gutter_width; - *required_shift = 0; - BLOBNBOX* blob = nullptr; - while ((blob = sidesearch.NextSideSearch(right_to_left)) != nullptr) { - const TBOX& box = blob->bounding_box(); - if (box.bottom() >= top_y || box.top() <= bottom_y) - continue; // Doesn't overlap enough. - if (box.height() >= gridsize() * 2 && - box.height() > box.width() * kLineFragmentAspectRatio) { - // Skip likely separator line residue. - continue; - } - if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type())) - continue; // Skip non-text if required. - int mid_y = (box.bottom() + box.top()) / 2; - // We use the x at the mid-y so that the required_shift guarantees - // to clear all the blobs on the tab-stop. If we use the min/max - // of x at top/bottom of the blob, then exactness would be required, - // which is not a good thing. - int tab_x = v.XAtY(mid_y); - int gap; - if (right_to_left) { - gap = tab_x - box.right(); - if (gap < 0 && box.left() - tab_x < *required_shift) - *required_shift = box.left() - tab_x; - } else { - gap = box.left() - tab_x; - if (gap < 0 && box.right() - tab_x > *required_shift) - *required_shift = box.right() - tab_x; - } - if (gap > 0 && gap < min_gap) - min_gap = gap; - } - // Result may be negative, in which case, this is a really bad tabstop. - return min_gap - abs(*required_shift); -} - -// Find the gutter width and distance to inner neighbour for the given blob. -void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height, - int max_gutter, bool left, - BLOBNBOX* bbox, int* gutter_width, - int* neighbour_gap) { - const TBOX& box = bbox->bounding_box(); - // The gutter and internal sides of the box. - int gutter_x = left ? box.left() : box.right(); - int internal_x = left ? box.right() : box.left(); - // On ragged edges, the gutter side of the box is away from the tabstop. - int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x; - *gutter_width = max_gutter; - // If the box is away from the tabstop, we need to increase - // the allowed gutter width. - if (tab_gap > 0) - *gutter_width += tab_gap; - bool debug = WithinTestRegion(2, box.left(), box.bottom()); - if (debug) - tprintf("Looking in gutter\n"); - // Find the nearest blob on the outside of the column. - BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left, - bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0, - *gutter_width, box.top(), box.bottom()); - if (gutter_bbox != nullptr) { - const TBOX& gutter_box = gutter_bbox->bounding_box(); - *gutter_width = left ? tab_x - gutter_box.right() - : gutter_box.left() - tab_x; - } - if (*gutter_width >= max_gutter) { - // If there is no box because a tab was in the way, get the tab coord. - TBOX gutter_box(box); - if (left) { - gutter_box.set_left(tab_x - max_gutter - 1); - gutter_box.set_right(tab_x - max_gutter); - int tab_gutter = RightEdgeForBox(gutter_box, true, false); - if (tab_gutter < tab_x - 1) - *gutter_width = tab_x - tab_gutter; - } else { - gutter_box.set_left(tab_x + max_gutter); - gutter_box.set_right(tab_x + max_gutter + 1); - int tab_gutter = LeftEdgeForBox(gutter_box, true, false); - if (tab_gutter > tab_x + 1) - *gutter_width = tab_gutter - tab_x; - } - } - if (*gutter_width > max_gutter) - *gutter_width = max_gutter; - // Now look for a neighbour on the inside. - if (debug) - tprintf("Looking for neighbour\n"); - BLOBNBOX* neighbour = AdjacentBlob(bbox, !left, - bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0, - *gutter_width, box.top(), box.bottom()); - int neighbour_edge = left ? RightEdgeForBox(box, true, false) - : LeftEdgeForBox(box, true, false); - if (neighbour != nullptr) { - const TBOX& n_box = neighbour->bounding_box(); - if (debug) { - tprintf("Found neighbour:"); - n_box.print(); - } - if (left && n_box.left() < neighbour_edge) - neighbour_edge = n_box.left(); - else if (!left && n_box.right() > neighbour_edge) - neighbour_edge = n_box.right(); - } - *neighbour_gap = left ? neighbour_edge - internal_x - : internal_x - neighbour_edge; -} - -// Return the x-coord that corresponds to the right edge for the given -// box. If there is a rule line to the right that vertically overlaps it, -// then return the x-coord of the rule line, otherwise return the right -// edge of the page. For details see RightTabForBox below. -int TabFind::RightEdgeForBox(const TBOX& box, bool crossing, bool extended) { - TabVector* v = RightTabForBox(box, crossing, extended); - return v == nullptr ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2); -} -// As RightEdgeForBox, but finds the left Edge instead. -int TabFind::LeftEdgeForBox(const TBOX& box, bool crossing, bool extended) { - TabVector* v = LeftTabForBox(box, crossing, extended); - return v == nullptr ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2); -} - -// This comment documents how this function works. -// For its purpose and arguments, see the comment in tabfind.h. -// TabVectors are stored sorted by perpendicular distance of middle from -// the global mean vertical vector. Since the individual vectors can have -// differing directions, their XAtY for a given y is not necessarily in the -// right order. Therefore the search has to be run with a margin. -// The middle of a vector that passes through (x,y) cannot be higher than -// halfway from y to the top, or lower than halfway from y to the bottom -// of the coordinate range; therefore, the search margin is the range of -// sort keys between these halfway points. Any vector with a sort key greater -// than the upper margin must be to the right of x at y, and likewise any -// vector with a sort key less than the lower margin must pass to the left -// of x at y. -TabVector* TabFind::RightTabForBox(const TBOX& box, bool crossing, - bool extended) { - if (v_it_.empty()) - return nullptr; - int top_y = box.top(); - int bottom_y = box.bottom(); - int mid_y = (top_y + bottom_y) / 2; - int right = crossing ? (box.left() + box.right()) / 2 : box.right(); - int min_key, max_key; - SetupTabSearch(right, mid_y, &min_key, &max_key); - // Position the iterator at the first TabVector with sort_key >= min_key. - while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key) - v_it_.backward(); - while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key) - v_it_.forward(); - // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right. - TabVector* best_v = nullptr; - int best_x = -1; - int key_limit = -1; - do { - TabVector* v = v_it_.data(); - int x = v->XAtY(mid_y); - if (x >= right && - (v->VOverlap(top_y, bottom_y) > 0 || - (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) { - if (best_v == nullptr || x < best_x) { - best_v = v; - best_x = x; - // We can guarantee that no better vector can be found if the - // sort key exceeds that of the best by max_key - min_key. - key_limit = v->sort_key() + max_key - min_key; - } - } - // Break when the search is done to avoid wrapping the iterator and - // thereby potentially slowing the next search. - if (v_it_.at_last() || - (best_v != nullptr && v->sort_key() > key_limit)) - break; // Prevent restarting list for next call. - v_it_.forward(); - } while (!v_it_.at_first()); - return best_v; -} - -// As RightTabForBox, but finds the left TabVector instead. -TabVector* TabFind::LeftTabForBox(const TBOX& box, bool crossing, - bool extended) { - if (v_it_.empty()) - return nullptr; - int top_y = box.top(); - int bottom_y = box.bottom(); - int mid_y = (top_y + bottom_y) / 2; - int left = crossing ? (box.left() + box.right()) / 2 : box.left(); - int min_key, max_key; - SetupTabSearch(left, mid_y, &min_key, &max_key); - // Position the iterator at the last TabVector with sort_key <= max_key. - while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key) - v_it_.forward(); - while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) { - v_it_.backward(); - } - // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left. - TabVector* best_v = nullptr; - int best_x = -1; - int key_limit = -1; - do { - TabVector* v = v_it_.data(); - int x = v->XAtY(mid_y); - if (x <= left && - (v->VOverlap(top_y, bottom_y) > 0 || - (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) { - if (best_v == nullptr || x > best_x) { - best_v = v; - best_x = x; - // We can guarantee that no better vector can be found if the - // sort key is less than that of the best by max_key - min_key. - key_limit = v->sort_key() - (max_key - min_key); - } - } - // Break when the search is done to avoid wrapping the iterator and - // thereby potentially slowing the next search. - if (v_it_.at_first() || - (best_v != nullptr && v->sort_key() < key_limit)) - break; // Prevent restarting list for next call. - v_it_.backward(); - } while (!v_it_.at_last()); - return best_v; -} - -// Return true if the given width is close to one of the common -// widths in column_widths_. -bool TabFind::CommonWidth(int width) { - width /= kColumnWidthFactor; - ICOORDELT_IT it(&column_widths_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ICOORDELT* w = it.data(); - if (w->x() - 1 <= width && width <= w->y() + 1) - return true; - } - return false; -} - -// Return true if the sizes are more than a -// factor of 2 different. -bool TabFind::DifferentSizes(int size1, int size2) { - return size1 > size2 * 2 || size2 > size1 * 2; -} - -// Return true if the sizes are more than a -// factor of 5 different. -bool TabFind::VeryDifferentSizes(int size1, int size2) { - return size1 > size2 * 5 || size2 > size1 * 5; -} - -///////////////// PROTECTED functions (used by ColumnFinder). ////////////// - -// Top-level function to find TabVectors in an input page block. -// Returns false if the detected skew angle is impossible. -// Applies the detected skew angle to deskew the tabs, blobs and part_grid. -bool TabFind::FindTabVectors(TabVector_LIST* hlines, - BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, - int min_gutter_width, - double tabfind_aligned_gap_fraction, - ColPartitionGrid* part_grid, - FCOORD* deskew, FCOORD* reskew) { - ScrollView* tab_win = FindInitialTabVectors(image_blobs, min_gutter_width, - tabfind_aligned_gap_fraction, - block); - ComputeColumnWidths(tab_win, part_grid); - TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this); - SortVectors(); - CleanupTabs(); - if (!Deskew(hlines, image_blobs, block, deskew, reskew)) - return false; // Skew angle is too large. - part_grid->Deskew(*deskew); - ApplyTabConstraints(); - #ifndef GRAPHICS_DISABLED - if (textord_tabfind_show_finaltabs) { - tab_win = MakeWindow(640, 50, "FinalTabs"); - DisplayBoxes(tab_win); - DisplayTabs("FinalTabs", tab_win); - tab_win = DisplayTabVectors(tab_win); - } - #endif // GRAPHICS_DISABLED - return true; -} - -// Top-level function to not find TabVectors in an input page block, -// but setup for single column mode. -void TabFind::DontFindTabVectors(BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, - FCOORD* deskew, FCOORD* reskew) { - InsertBlobsToGrid(false, false, image_blobs, this); - InsertBlobsToGrid(true, false, &block->blobs, this); - deskew->set_x(1.0f); - deskew->set_y(0.0f); - reskew->set_x(1.0f); - reskew->set_y(0.0f); -} - -// Cleans up the lists of blobs in the block ready for use by TabFind. -// Large blobs that look like text are moved to the main blobs list. -// Main blobs that are superseded by the image blobs are deleted. -void TabFind::TidyBlobs(TO_BLOCK* block) { - BLOBNBOX_IT large_it = &block->large_blobs; - BLOBNBOX_IT blob_it = &block->blobs; - int b_count = 0; - for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) { - BLOBNBOX* large_blob = large_it.data(); - if (large_blob->owner() != nullptr) { - blob_it.add_to_end(large_it.extract()); - ++b_count; - } - } - if (textord_debug_tabfind) { - tprintf("Moved %d large blobs to normal list\n", - b_count); - #ifndef GRAPHICS_DISABLED - ScrollView* rej_win = MakeWindow(500, 300, "Image blobs"); - block->plot_graded_blobs(rej_win); - block->plot_noise_blobs(rej_win); - rej_win->Update(); - #endif // GRAPHICS_DISABLED - } - block->DeleteUnownedNoise(); -} - -// Helper function to setup search limits for *TabForBox. -void TabFind::SetupTabSearch(int x, int y, int* min_key, int* max_key) { - int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2); - int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2); - *min_key = std::min(key1, key2); - *max_key = std::max(key1, key2); -} - -ScrollView* TabFind::DisplayTabVectors(ScrollView* tab_win) { -#ifndef GRAPHICS_DISABLED - // For every vector, display it. - TabVector_IT it(&vectors_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TabVector* vector = it.data(); - vector->Display(tab_win); - } - tab_win->Update(); -#endif - return tab_win; -} - -// PRIVATE CODE. -// -// First part of FindTabVectors, which may be used twice if the text -// is mostly of vertical alignment. -ScrollView* TabFind::FindInitialTabVectors(BLOBNBOX_LIST* image_blobs, - int min_gutter_width, - double tabfind_aligned_gap_fraction, - TO_BLOCK* block) { - if (textord_tabfind_show_initialtabs) { - ScrollView* line_win = MakeWindow(0, 0, "VerticalLines"); - line_win = DisplayTabVectors(line_win); - } - // Prepare the grid. - if (image_blobs != nullptr) - InsertBlobsToGrid(true, false, image_blobs, this); - InsertBlobsToGrid(true, false, &block->blobs, this); - ScrollView* initial_win = FindTabBoxes(min_gutter_width, - tabfind_aligned_gap_fraction); - FindAllTabVectors(min_gutter_width); - - TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this); - SortVectors(); - EvaluateTabs(); - if (textord_tabfind_show_initialtabs && initial_win != nullptr) - initial_win = DisplayTabVectors(initial_win); - MarkVerticalText(); - return initial_win; -} - -// Helper displays all the boxes in the given vector on the given window. -static void DisplayBoxVector(const GenericVector& boxes, - ScrollView* win) { - #ifndef GRAPHICS_DISABLED - for (int i = 0; i < boxes.size(); ++i) { - TBOX box = boxes[i]->bounding_box(); - int left_x = box.left(); - int right_x = box.right(); - int top_y = box.top(); - int bottom_y = box.bottom(); - ScrollView::Color box_color = boxes[i]->BoxColor(); - win->Pen(box_color); - win->Rectangle(left_x, bottom_y, right_x, top_y); - } - win->Update(); - #endif // GRAPHICS_DISABLED -} - -// For each box in the grid, decide whether it is a candidate tab-stop, -// and if so add it to the left/right tab boxes. -ScrollView* TabFind::FindTabBoxes(int min_gutter_width, - double tabfind_aligned_gap_fraction) { - left_tab_boxes_.clear(); - right_tab_boxes_.clear(); - // For every bbox in the grid, determine whether it uses a tab on an edge. - GridSearch gsearch(this); - gsearch.StartFullSearch(); - BLOBNBOX* bbox; - while ((bbox = gsearch.NextFullSearch()) != nullptr) { - if (TestBoxForTabs(bbox, min_gutter_width, tabfind_aligned_gap_fraction)) { - // If it is any kind of tab, insert it into the vectors. - if (bbox->left_tab_type() != TT_NONE) - left_tab_boxes_.push_back(bbox); - if (bbox->right_tab_type() != TT_NONE) - right_tab_boxes_.push_back(bbox); - } - } - // Sort left tabs by left and right by right to see the outermost one first - // on a ragged tab. - left_tab_boxes_.sort(SortByBoxLeft); - right_tab_boxes_.sort(SortRightToLeft); - ScrollView* tab_win = nullptr; - #ifndef GRAPHICS_DISABLED - if (textord_tabfind_show_initialtabs) { - tab_win = MakeWindow(0, 100, "InitialTabs"); - tab_win->Pen(ScrollView::BLUE); - tab_win->Brush(ScrollView::NONE); - // Display the left and right tab boxes. - DisplayBoxVector(left_tab_boxes_, tab_win); - DisplayBoxVector(right_tab_boxes_, tab_win); - tab_win = DisplayTabs("Tabs", tab_win); - } - #endif // GRAPHICS_DISABLED - return tab_win; -} - -bool TabFind::TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width, - double tabfind_aligned_gap_fraction) { - GridSearch radsearch(this); - TBOX box = bbox->bounding_box(); - // If there are separator lines, get the column edges. - int left_column_edge = bbox->left_rule(); - int right_column_edge = bbox->right_rule(); - // The edges of the bounding box of the blob being processed. - int left_x = box.left(); - int right_x = box.right(); - int top_y = box.top(); - int bottom_y = box.bottom(); - int height = box.height(); - bool debug = WithinTestRegion(3, left_x, top_y); - if (debug) { - tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", - left_x, top_y, right_x, bottom_y, - left_column_edge, right_column_edge); - } - // Compute a search radius based on a multiple of the height. - int radius = (height * kTabRadiusFactor + gridsize_ - 1) / gridsize_; - radsearch.StartRadSearch((left_x + right_x)/2, (top_y + bottom_y)/2, radius); - // In Vertical Page mode, once we have an estimate of the vertical line - // spacing, the minimum amount of gutter space before a possible tab is - // increased under the assumption that column partition is always larger - // than line spacing. - int min_spacing = - static_cast(height * tabfind_aligned_gap_fraction); - if (min_gutter_width > min_spacing) - min_spacing = min_gutter_width; - int min_ragged_gutter = kRaggedGutterMultiple * gridsize(); - if (min_gutter_width > min_ragged_gutter) - min_ragged_gutter = min_gutter_width; - int target_right = left_x - min_spacing; - int target_left = right_x + min_spacing; - // We will be evaluating whether the left edge could be a left tab, and - // whether the right edge could be a right tab. - // A box can be a tab if its bool is_(left/right)_tab remains true, meaning - // that no blobs have been found in the gutter during the radial search. - // A box can also be a tab if there are objects in the gutter only above - // or only below, and there are aligned objects on the opposite side, but - // not too many unaligned objects. The maybe_(left/right)_tab_up counts - // aligned objects above and negatively counts unaligned objects above, - // and is set to -INT32_MAX if a gutter object is found above. - // The other 3 maybe ints work similarly for the other sides. - // These conditions are very strict, to minimize false positives, and really - // only aligned tabs and outermost ragged tab blobs will qualify, so we - // also have maybe_ragged_left/right with less stringent rules. - // A blob that is maybe_ragged_left/right will be further qualified later, - // using the min_ragged_gutter. - bool is_left_tab = true; - bool is_right_tab = true; - bool maybe_ragged_left = true; - bool maybe_ragged_right = true; - int maybe_left_tab_up = 0; - int maybe_right_tab_up = 0; - int maybe_left_tab_down = 0; - int maybe_right_tab_down = 0; - if (bbox->leader_on_left()) { - is_left_tab = false; - maybe_ragged_left = false; - maybe_left_tab_up = -INT32_MAX; - maybe_left_tab_down = -INT32_MAX; - } - if (bbox->leader_on_right()) { - is_right_tab = false; - maybe_ragged_right = false; - maybe_right_tab_up = -INT32_MAX; - maybe_right_tab_down = -INT32_MAX; - } - int alignment_tolerance = static_cast(resolution_ * kAlignedFraction); - BLOBNBOX* neighbour = nullptr; - while ((neighbour = radsearch.NextRadSearch()) != nullptr) { - if (neighbour == bbox) - continue; - TBOX nbox = neighbour->bounding_box(); - int n_left = nbox.left(); - int n_right = nbox.right(); - if (debug) - tprintf("Neighbour at (%d,%d)->(%d,%d)\n", - n_left, nbox.bottom(), n_right, nbox.top()); - // If the neighbouring blob is the wrong side of a separator line, then it - // "doesn't exist" as far as we are concerned. - if (n_right > right_column_edge || n_left < left_column_edge || - left_x < neighbour->left_rule() || right_x > neighbour->right_rule()) - continue; // Separator line in the way. - int n_mid_x = (n_left + n_right) / 2; - int n_mid_y = (nbox.top() + nbox.bottom()) / 2; - if (n_mid_x <= left_x && n_right >= target_right) { - if (debug) - tprintf("Not a left tab\n"); - is_left_tab = false; - if (n_mid_y < top_y) - maybe_left_tab_down = -INT32_MAX; - if (n_mid_y > bottom_y) - maybe_left_tab_up = -INT32_MAX; - } else if (NearlyEqual(left_x, n_left, alignment_tolerance)) { - if (debug) - tprintf("Maybe a left tab\n"); - if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX) - ++maybe_left_tab_up; - if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX) - ++maybe_left_tab_down; - } else if (n_left < left_x && n_right >= left_x) { - // Overlaps but not aligned so negative points on a maybe. - if (debug) - tprintf("Maybe Not a left tab\n"); - if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX) - --maybe_left_tab_up; - if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX) - --maybe_left_tab_down; - } - if (n_left < left_x && nbox.y_overlap(box) && n_right >= target_right) { - maybe_ragged_left = false; - if (debug) - tprintf("Not a ragged left\n"); - } - if (n_mid_x >= right_x && n_left <= target_left) { - if (debug) - tprintf("Not a right tab\n"); - is_right_tab = false; - if (n_mid_y < top_y) - maybe_right_tab_down = -INT32_MAX; - if (n_mid_y > bottom_y) - maybe_right_tab_up = -INT32_MAX; - } else if (NearlyEqual(right_x, n_right, alignment_tolerance)) { - if (debug) - tprintf("Maybe a right tab\n"); - if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX) - ++maybe_right_tab_up; - if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX) - ++maybe_right_tab_down; - } else if (n_right > right_x && n_left <= right_x) { - // Overlaps but not aligned so negative points on a maybe. - if (debug) - tprintf("Maybe Not a right tab\n"); - if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX) - --maybe_right_tab_up; - if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX) - --maybe_right_tab_down; - } - if (n_right > right_x && nbox.y_overlap(box) && n_left <= target_left) { - maybe_ragged_right = false; - if (debug) - tprintf("Not a ragged right\n"); - } - if (maybe_left_tab_down == -INT32_MAX && maybe_left_tab_up == -INT32_MAX && - maybe_right_tab_down == -INT32_MAX && maybe_right_tab_up == -INT32_MAX) - break; - } - if (is_left_tab || maybe_left_tab_up > 1 || maybe_left_tab_down > 1) { - bbox->set_left_tab_type(TT_MAYBE_ALIGNED); - } else if (maybe_ragged_left && ConfirmRaggedLeft(bbox, min_ragged_gutter)) { - bbox->set_left_tab_type(TT_MAYBE_RAGGED); - } else { - bbox->set_left_tab_type(TT_NONE); - } - if (is_right_tab || maybe_right_tab_up > 1 || maybe_right_tab_down > 1) { - bbox->set_right_tab_type(TT_MAYBE_ALIGNED); - } else if (maybe_ragged_right && - ConfirmRaggedRight(bbox, min_ragged_gutter)) { - bbox->set_right_tab_type(TT_MAYBE_RAGGED); - } else { - bbox->set_right_tab_type(TT_NONE); - } - if (debug) { - tprintf("Left result = %s, Right result=%s\n", - bbox->left_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" : - (bbox->left_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"), - bbox->right_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" : - (bbox->right_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None")); - } - return bbox->left_tab_type() != TT_NONE || bbox->right_tab_type() != TT_NONE; -} - -// Returns true if there is nothing in the rectangle of width min_gutter to -// the left of bbox. -bool TabFind::ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter) { - TBOX search_box(bbox->bounding_box()); - search_box.set_right(search_box.left()); - search_box.set_left(search_box.left() - min_gutter); - return NothingYOverlapsInBox(search_box, bbox->bounding_box()); -} - -// Returns true if there is nothing in the rectangle of width min_gutter to -// the right of bbox. -bool TabFind::ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter) { - TBOX search_box(bbox->bounding_box()); - search_box.set_left(search_box.right()); - search_box.set_right(search_box.right() + min_gutter); - return NothingYOverlapsInBox(search_box, bbox->bounding_box()); -} - -// Returns true if there is nothing in the given search_box that vertically -// overlaps target_box other than target_box itself. -bool TabFind::NothingYOverlapsInBox(const TBOX& search_box, - const TBOX& target_box) { - BlobGridSearch rsearch(this); - rsearch.StartRectSearch(search_box); - BLOBNBOX* blob; - while ((blob = rsearch.NextRectSearch()) != nullptr) { - const TBOX& box = blob->bounding_box(); - if (box.y_overlap(target_box) && !(box == target_box)) - return false; - } - return true; -} - -void TabFind::FindAllTabVectors(int min_gutter_width) { - // A list of vectors that will be created in estimating the skew. - TabVector_LIST dummy_vectors; - // An estimate of the vertical direction, revised as more lines are added. - int vertical_x = 0; - int vertical_y = 1; - // Find an estimate of the vertical direction by finding some tab vectors. - // Slowly up the search size until we get some vectors. - for (int search_size = kMinVerticalSearch; search_size < kMaxVerticalSearch; - search_size += kMinVerticalSearch) { - int vector_count = FindTabVectors(search_size, TA_LEFT_ALIGNED, - min_gutter_width, - &dummy_vectors, - &vertical_x, &vertical_y); - vector_count += FindTabVectors(search_size, TA_RIGHT_ALIGNED, - min_gutter_width, - &dummy_vectors, - &vertical_x, &vertical_y); - if (vector_count > 0) - break; - } - // Get rid of the test vectors and reset the types of the tabs. - dummy_vectors.clear(); - for (int i = 0; i < left_tab_boxes_.size(); ++i) { - BLOBNBOX* bbox = left_tab_boxes_[i]; - if (bbox->left_tab_type() == TT_CONFIRMED) - bbox->set_left_tab_type(TT_MAYBE_ALIGNED); - } - for (int i = 0; i < right_tab_boxes_.size(); ++i) { - BLOBNBOX* bbox = right_tab_boxes_[i]; - if (bbox->right_tab_type() == TT_CONFIRMED) - bbox->set_right_tab_type(TT_MAYBE_ALIGNED); - } - if (textord_debug_tabfind) { - tprintf("Beginning real tab search with vertical = %d,%d...\n", - vertical_x, vertical_y); - } - // Now do the real thing ,but keep the vectors in the dummy_vectors list - // until they are all done, so we don't get the tab vectors confused with - // the rule line vectors. - FindTabVectors(kMaxVerticalSearch, TA_LEFT_ALIGNED, min_gutter_width, - &dummy_vectors, &vertical_x, &vertical_y); - FindTabVectors(kMaxVerticalSearch, TA_RIGHT_ALIGNED, min_gutter_width, - &dummy_vectors, &vertical_x, &vertical_y); - FindTabVectors(kMaxRaggedSearch, TA_LEFT_RAGGED, min_gutter_width, - &dummy_vectors, &vertical_x, &vertical_y); - FindTabVectors(kMaxRaggedSearch, TA_RIGHT_RAGGED, min_gutter_width, - &dummy_vectors, &vertical_x, &vertical_y); - // Now add the vectors to the vectors_ list. - TabVector_IT v_it(&vectors_); - v_it.add_list_after(&dummy_vectors); - // Now use the summed (mean) vertical vector as the direction for everything. - SetVerticalSkewAndParallelize(vertical_x, vertical_y); -} - -// Helper for FindAllTabVectors finds the vectors of a particular type. -int TabFind::FindTabVectors(int search_size_multiple, TabAlignment alignment, - int min_gutter_width, TabVector_LIST* vectors, - int* vertical_x, int* vertical_y) { - TabVector_IT vector_it(vectors); - int vector_count = 0; - // Search the right or left tab boxes, looking for tab vectors. - bool right = alignment == TA_RIGHT_ALIGNED || alignment == TA_RIGHT_RAGGED; - const GenericVector& boxes = right ? right_tab_boxes_ - : left_tab_boxes_; - for (int i = 0; i < boxes.size(); ++i) { - BLOBNBOX* bbox = boxes[i]; - if ((!right && bbox->left_tab_type() == TT_MAYBE_ALIGNED) || - (right && bbox->right_tab_type() == TT_MAYBE_ALIGNED)) { - TabVector* vector = FindTabVector(search_size_multiple, min_gutter_width, - alignment, - bbox, vertical_x, vertical_y); - if (vector != nullptr) { - ++vector_count; - vector_it.add_to_end(vector); - } - } - } - return vector_count; -} - -// Finds a vector corresponding to a tabstop running through the -// given box of the given alignment type. -// search_size_multiple is a multiple of height used to control -// the size of the search. -// vertical_x and y are updated with an estimate of the real -// vertical direction. (skew finding.) -// Returns nullptr if no decent tabstop can be found. -TabVector* TabFind::FindTabVector(int search_size_multiple, - int min_gutter_width, - TabAlignment alignment, - BLOBNBOX* bbox, - int* vertical_x, int* vertical_y) { - int height = std::max(static_cast(bbox->bounding_box().height()), gridsize()); - AlignedBlobParams align_params(*vertical_x, *vertical_y, - height, - search_size_multiple, min_gutter_width, - resolution_, alignment); - // FindVerticalAlignment is in the parent (AlignedBlob) class. - return FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y); -} - -// Set the vertical_skew_ member from the given vector and refit -// all vectors parallel to the skew vector. -void TabFind::SetVerticalSkewAndParallelize(int vertical_x, int vertical_y) { - // Fit the vertical vector into an ICOORD, which is 16 bit. - vertical_skew_.set_with_shrink(vertical_x, vertical_y); - if (textord_debug_tabfind) - tprintf("Vertical skew vector=(%d,%d)\n", - vertical_skew_.x(), vertical_skew_.y()); - v_it_.set_to_list(&vectors_); - for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) { - TabVector* v = v_it_.data(); - v->Fit(vertical_skew_, true); - } - // Now sort the vectors as their direction has potentially changed. - SortVectors(); -} - -// Sort all the current vectors using the given vertical direction vector. -void TabFind::SortVectors() { - vectors_.sort(TabVector::SortVectorsByKey); - v_it_.set_to_list(&vectors_); -} - -// Evaluate all the current tab vectors. -void TabFind::EvaluateTabs() { - TabVector_IT rule_it(&vectors_); - for (rule_it.mark_cycle_pt(); !rule_it.cycled_list(); rule_it.forward()) { - TabVector* tab = rule_it.data(); - if (!tab->IsSeparator()) { - tab->Evaluate(vertical_skew_, this); - if (tab->BoxCount() < kMinEvaluatedTabs) { - if (textord_debug_tabfind > 2) - tab->Print("Too few boxes"); - delete rule_it.extract(); - v_it_.set_to_list(&vectors_); - } else if (WithinTestRegion(3, tab->startpt().x(), tab->startpt().y())) { - tab->Print("Evaluated tab"); - } - } - } -} - -// Trace textlines from one side to the other of each tab vector, saving -// the most frequent column widths found in a list so that a given width -// can be tested for being a common width with a simple callback function. -void TabFind::ComputeColumnWidths(ScrollView* tab_win, - ColPartitionGrid* part_grid) { - #ifndef GRAPHICS_DISABLED - if (tab_win != nullptr) - tab_win->Pen(ScrollView::WHITE); - #endif // GRAPHICS_DISABLED - // Accumulate column sections into a STATS - int col_widths_size = (tright_.x() - bleft_.x()) / kColumnWidthFactor; - STATS col_widths(0, col_widths_size + 1); - ApplyPartitionsToColumnWidths(part_grid, &col_widths); - #ifndef GRAPHICS_DISABLED - if (tab_win != nullptr) { - tab_win->Update(); - } - #endif // GRAPHICS_DISABLED - if (textord_debug_tabfind > 1) - col_widths.print(); - // Now make a list of column widths. - MakeColumnWidths(col_widths_size, &col_widths); - // Turn the column width into a range. - ApplyPartitionsToColumnWidths(part_grid, nullptr); -} - -// Finds column width and: -// if col_widths is not null (pass1): -// pair-up tab vectors with existing ColPartitions and accumulate widths. -// else (pass2): -// find the largest real partition width for each recorded column width, -// to be used as the minimum acceptable width. -void TabFind::ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid, - STATS* col_widths) { - // For every ColPartition in the part_grid, add partners to the tabvectors - // and accumulate the column widths. - ColPartitionGridSearch gsearch(part_grid); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - BLOBNBOX_C_IT blob_it(part->boxes()); - if (blob_it.empty()) - continue; - BLOBNBOX* left_blob = blob_it.data(); - blob_it.move_to_last(); - BLOBNBOX* right_blob = blob_it.data(); - TabVector* left_vector = LeftTabForBox(left_blob->bounding_box(), - true, false); - if (left_vector == nullptr || left_vector->IsRightTab()) - continue; - TabVector* right_vector = RightTabForBox(right_blob->bounding_box(), - true, false); - if (right_vector == nullptr || right_vector->IsLeftTab()) - continue; - - int line_left = left_vector->XAtY(left_blob->bounding_box().bottom()); - int line_right = right_vector->XAtY(right_blob->bounding_box().bottom()); - // Add to STATS of measurements if the width is significant. - int width = line_right - line_left; - if (col_widths != nullptr) { - AddPartnerVector(left_blob, right_blob, left_vector, right_vector); - if (width >= kMinColumnWidth) - col_widths->add(width / kColumnWidthFactor, 1); - } else { - width /= kColumnWidthFactor; - ICOORDELT_IT it(&column_widths_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ICOORDELT* w = it.data(); - if (NearlyEqual(width, w->y(), 1)) { - int true_width = part->bounding_box().width() / kColumnWidthFactor; - if (true_width <= w->y() && true_width > w->x()) - w->set_x(true_width); - break; - } - } - } - } -} - -// Helper makes the list of common column widths in column_widths_ from the -// input col_widths. Destroys the content of col_widths by repeatedly -// finding the mode and erasing the peak. -void TabFind::MakeColumnWidths(int col_widths_size, STATS* col_widths) { - ICOORDELT_IT w_it(&column_widths_); - int total_col_count = col_widths->get_total(); - while (col_widths->get_total() > 0) { - int width = col_widths->mode(); - int col_count = col_widths->pile_count(width); - col_widths->add(width, -col_count); - // Get the entire peak. - for (int left = width - 1; left > 0 && - col_widths->pile_count(left) > 0; - --left) { - int new_count = col_widths->pile_count(left); - col_count += new_count; - col_widths->add(left, -new_count); - } - for (int right = width + 1; right < col_widths_size && - col_widths->pile_count(right) > 0; - ++right) { - int new_count = col_widths->pile_count(right); - col_count += new_count; - col_widths->add(right, -new_count); - } - if (col_count > kMinLinesInColumn && - col_count > kMinFractionalLinesInColumn * total_col_count) { - ICOORDELT* w = new ICOORDELT(0, width); - w_it.add_after_then_move(w); - if (textord_debug_tabfind) - tprintf("Column of width %d has %d = %.2f%% lines\n", - width * kColumnWidthFactor, col_count, - 100.0 * col_count / total_col_count); - } - } -} - -// Mark blobs as being in a vertical text line where that is the case. -// Returns true if the majority of the image is vertical text lines. -void TabFind::MarkVerticalText() { - if (textord_debug_tabfind) - tprintf("Checking for vertical lines\n"); - BlobGridSearch gsearch(this); - gsearch.StartFullSearch(); - BLOBNBOX* blob = nullptr; - while ((blob = gsearch.NextFullSearch()) != nullptr) { - if (blob->region_type() < BRT_UNKNOWN) - continue; - if (blob->UniquelyVertical()) { - blob->set_region_type(BRT_VERT_TEXT); - } - } -} - -int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) { - TabVector_IT it(lines); - int prev_right = -1; - int max_gap = static_cast(kMaxGutterWidthAbsolute * resolution_); - STATS gaps(0, max_gap); - STATS heights(0, max_gap); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TabVector* v = it.data(); - TabVector* partner = v->GetSinglePartner(); - if (!v->IsLeftTab() || v->IsSeparator() || !partner) continue; - heights.add(partner->startpt().x() - v->startpt().x(), 1); - if (prev_right > 0 && v->startpt().x() > prev_right) { - gaps.add(v->startpt().x() - prev_right, 1); - } - prev_right = partner->startpt().x(); - } - if (textord_debug_tabfind) - tprintf("TabGutter total %d median_gap %.2f median_hgt %.2f\n", - gaps.get_total(), gaps.median(), heights.median()); - if (gaps.get_total() < kMinLinesInColumn) return 0; - return static_cast(gaps.median()); -} - -// Find the next adjacent (looking to the left or right) blob on this text -// line, with the constraint that it must vertically significantly overlap -// the [top_y, bottom_y] range. -// If ignore_images is true, then blobs with aligned_text() < 0 are treated -// as if they do not exist. -BLOBNBOX* TabFind::AdjacentBlob(const BLOBNBOX* bbox, - bool look_left, bool ignore_images, - double min_overlap_fraction, - int gap_limit, int top_y, int bottom_y) { - GridSearch sidesearch(this); - const TBOX& box = bbox->bounding_box(); - int left = box.left(); - int right = box.right(); - int mid_x = (left + right) / 2; - sidesearch.StartSideSearch(mid_x, bottom_y, top_y); - int best_gap = 0; - bool debug = WithinTestRegion(3, left, bottom_y); - BLOBNBOX* result = nullptr; - BLOBNBOX* neighbour = nullptr; - while ((neighbour = sidesearch.NextSideSearch(look_left)) != nullptr) { - if (debug) { - tprintf("Adjacent blob: considering box:"); - neighbour->bounding_box().print(); - } - if (neighbour == bbox || - (ignore_images && neighbour->region_type() < BRT_UNKNOWN)) - continue; - const TBOX& nbox = neighbour->bounding_box(); - int n_top_y = nbox.top(); - int n_bottom_y = nbox.bottom(); - int v_overlap = std::min(n_top_y, top_y) - std::max(n_bottom_y, bottom_y); - int height = top_y - bottom_y; - int n_height = n_top_y - n_bottom_y; - if (v_overlap > min_overlap_fraction * std::min(height, n_height) && - (min_overlap_fraction == 0.0 || !DifferentSizes(height, n_height))) { - int n_left = nbox.left(); - int n_right = nbox.right(); - int h_gap = std::max(n_left, left) - std::min(n_right, right); - int n_mid_x = (n_left + n_right) / 2; - if (look_left == (n_mid_x < mid_x) && n_mid_x != mid_x) { - if (h_gap > gap_limit) { - // Hit a big gap before next tab so don't return anything. - if (debug) - tprintf("Giving up due to big gap = %d vs %d\n", - h_gap, gap_limit); - return result; - } - if (h_gap > 0 && (look_left ? neighbour->right_tab_type() - : neighbour->left_tab_type()) >= TT_CONFIRMED) { - // Hit a tab facing the wrong way. Stop in case we are crossing - // the column boundary. - if (debug) - tprintf("Collision with like tab of type %d at %d,%d\n", - look_left ? neighbour->right_tab_type() - : neighbour->left_tab_type(), - n_left, nbox.bottom()); - return result; - } - // This is a good fit to the line. Continue with this - // neighbour as the bbox if the best gap. - if (result == nullptr || h_gap < best_gap) { - if (debug) - tprintf("Good result\n"); - result = neighbour; - best_gap = h_gap; - } else { - // The new one is worse, so we probably already have the best result. - return result; - } - } else if (debug) { - tprintf("Wrong way\n"); - } - } else if (debug) { - tprintf("Insufficient overlap\n"); - } - } - if (WithinTestRegion(3, left, box.top())) - tprintf("Giving up due to end of search\n"); - return result; // Hit the edge and found nothing. -} - -// Add a bi-directional partner relationship between the left -// and the right. If one (or both) of the vectors is a separator, -// extend a nearby extendable vector or create a new one of the -// correct type, using the given left or right blob as a guide. -void TabFind::AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob, - TabVector* left, TabVector* right) { - const TBOX& left_box = left_blob->bounding_box(); - const TBOX& right_box = right_blob->bounding_box(); - if (left->IsSeparator()) { - // Try to find a nearby left edge to extend. - TabVector* v = LeftTabForBox(left_box, true, true); - if (v != nullptr && v != left && v->IsLeftTab() && - v->XAtY(left_box.top()) > left->XAtY(left_box.top())) { - left = v; // Found a good replacement. - left->ExtendToBox(left_blob); - } else { - // Fake a vector. - left = new TabVector(*left, TA_LEFT_RAGGED, vertical_skew_, left_blob); - vectors_.add_sorted(TabVector::SortVectorsByKey, left); - v_it_.move_to_first(); - } - } - if (right->IsSeparator()) { - // Try to find a nearby left edge to extend. - if (WithinTestRegion(3, right_box.right(), right_box.bottom())) { - tprintf("Box edge (%d,%d-%d)", - right_box.right(), right_box.bottom(), right_box.top()); - right->Print(" looking for improvement for"); - } - TabVector* v = RightTabForBox(right_box, true, true); - if (v != nullptr && v != right && v->IsRightTab() && - v->XAtY(right_box.top()) < right->XAtY(right_box.top())) { - right = v; // Found a good replacement. - right->ExtendToBox(right_blob); - if (WithinTestRegion(3, right_box.right(), right_box.bottom())) { - right->Print("Extended vector"); - } - } else { - // Fake a vector. - right = new TabVector(*right, TA_RIGHT_RAGGED, vertical_skew_, - right_blob); - vectors_.add_sorted(TabVector::SortVectorsByKey, right); - v_it_.move_to_first(); - if (WithinTestRegion(3, right_box.right(), right_box.bottom())) { - right->Print("Created new vector"); - } - } - } - left->AddPartner(right); - right->AddPartner(left); -} - -// Remove separators and unused tabs from the main vectors_ list -// to the dead_vectors_ list. -void TabFind::CleanupTabs() { - // TODO(rays) Before getting rid of separators and unused vectors, it - // would be useful to try moving ragged vectors outwards to see if this - // allows useful extension. Could be combined with checking ends of partners. - TabVector_IT it(&vectors_); - TabVector_IT dead_it(&dead_vectors_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TabVector* v = it.data(); - if (v->IsSeparator() || v->Partnerless()) { - dead_it.add_after_then_move(it.extract()); - v_it_.set_to_list(&vectors_); - } else { - v->FitAndEvaluateIfNeeded(vertical_skew_, this); - } - } -} - -// Apply the given rotation to the given list of blobs. -void TabFind::RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs) { - BLOBNBOX_IT it(blobs); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - it.data()->rotate_box(rotation); - } -} - -// Recreate the grid with deskewed BLOBNBOXes. -// Returns false if the detected skew angle is impossible. -bool TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, - TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew) { - ComputeDeskewVectors(deskew, reskew); - if (deskew->x() < kCosMaxSkewAngle) - return false; - RotateBlobList(*deskew, image_blobs); - RotateBlobList(*deskew, &block->blobs); - RotateBlobList(*deskew, &block->small_blobs); - RotateBlobList(*deskew, &block->noise_blobs); - - // Rotate the horizontal vectors. The vertical vectors don't need - // rotating as they can just be refitted. - TabVector_IT h_it(hlines); - for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { - TabVector* h = h_it.data(); - h->Rotate(*deskew); - } - TabVector_IT d_it(&dead_vectors_); - for (d_it.mark_cycle_pt(); !d_it.cycled_list(); d_it.forward()) { - TabVector* d = d_it.data(); - d->Rotate(*deskew); - } - SetVerticalSkewAndParallelize(0, 1); - // Rebuild the grid to the new size. - TBOX grid_box(bleft_, tright_); - grid_box.rotate_large(*deskew); - Init(gridsize(), grid_box.botleft(), grid_box.topright()); - InsertBlobsToGrid(false, false, image_blobs, this); - InsertBlobsToGrid(true, false, &block->blobs, this); - return true; -} - -// Flip the vertical and horizontal lines and rotate the grid ready -// for working on the rotated image. -// This also makes parameter adjustments for FindInitialTabVectors(). -void TabFind::ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate, - TabVector_LIST* horizontal_lines, - int* min_gutter_width) { - // Rotate the horizontal and vertical vectors and swap them over. - // Only the separators are kept and rotated; other tabs are used - // to estimate the gutter width then thrown away. - TabVector_LIST ex_verticals; - TabVector_IT ex_v_it(&ex_verticals); - TabVector_LIST vlines; - TabVector_IT v_it(&vlines); - while (!v_it_.empty()) { - TabVector* v = v_it_.extract(); - if (v->IsSeparator()) { - v->Rotate(rotate); - ex_v_it.add_after_then_move(v); - } else { - v_it.add_after_then_move(v); - } - v_it_.forward(); - } - - // Adjust the min gutter width for better tabbox selection - // in 2nd call to FindInitialTabVectors(). - int median_gutter = FindMedianGutterWidth(&vlines); - if (median_gutter > *min_gutter_width) - *min_gutter_width = median_gutter; - - TabVector_IT h_it(horizontal_lines); - for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { - TabVector* h = h_it.data(); - h->Rotate(rotate); - } - v_it_.add_list_after(horizontal_lines); - v_it_.move_to_first(); - h_it.set_to_list(horizontal_lines); - h_it.add_list_after(&ex_verticals); - - // Rebuild the grid to the new size. - TBOX grid_box(bleft(), tright()); - grid_box.rotate_large(rotate); - Init(gridsize(), grid_box.botleft(), grid_box.topright()); -} - -// Clear the grid and get rid of the tab vectors, but not separators, -// ready to start again. -void TabFind::Reset() { - v_it_.move_to_first(); - for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) { - if (!v_it_.data()->IsSeparator()) - delete v_it_.extract(); - } - Clear(); -} - -// Reflect the separator tab vectors and the grids in the y-axis. -// Can only be called after Reset! -void TabFind::ReflectInYAxis() { - TabVector_LIST temp_list; - TabVector_IT temp_it(&temp_list); - v_it_.move_to_first(); - // The TabVector list only contains vertical lines, but they need to be - // reflected and the list needs to be reversed, so they are still in - // sort_key order. - while (!v_it_.empty()) { - TabVector* v = v_it_.extract(); - v_it_.forward(); - v->ReflectInYAxis(); - temp_it.add_before_then_move(v); - } - v_it_.add_list_after(&temp_list); - v_it_.move_to_first(); - // Reset this grid with reflected bounding boxes. - TBOX grid_box(bleft(), tright()); - int tmp = grid_box.left(); - grid_box.set_left(-grid_box.right()); - grid_box.set_right(-tmp); - Init(gridsize(), grid_box.botleft(), grid_box.topright()); -} - -// Compute the rotation required to deskew, and its inverse rotation. -void TabFind::ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew) { - double length = vertical_skew_ % vertical_skew_; - length = sqrt(length); - deskew->set_x(static_cast(vertical_skew_.y() / length)); - deskew->set_y(static_cast(vertical_skew_.x() / length)); - reskew->set_x(deskew->x()); - reskew->set_y(-deskew->y()); -} - -// Compute and apply constraints to the end positions of TabVectors so -// that where possible partners end at the same y coordinate. -void TabFind::ApplyTabConstraints() { - TabVector_IT it(&vectors_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TabVector* v = it.data(); - v->SetupConstraints(); - } - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TabVector* v = it.data(); - // With the first and last partner, we want a common bottom and top, - // respectively, and for each change of partner, we want a common - // top of first with bottom of next. - v->SetupPartnerConstraints(); - } - // TODO(rays) The back-to-back pairs should really be done like the - // front-to-front pairs, but there is no convenient way of producing the - // list of partners like there is with the front-to-front. - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TabVector* v = it.data(); - if (!v->IsRightTab()) - continue; - // For each back-to-back pair of vectors, try for common top and bottom. - TabVector_IT partner_it(it); - for (partner_it.forward(); !partner_it.at_first(); partner_it.forward()) { - TabVector* partner = partner_it.data(); - if (!partner->IsLeftTab() || !v->VOverlap(*partner)) - continue; - v->SetupPartnerConstraints(partner); - } - } - // Now actually apply the constraints to get common start/end points. - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TabVector* v = it.data(); - if (!v->IsSeparator()) - v->ApplyConstraints(); - } - // TODO(rays) Where constraint application fails, it would be good to try - // checking the ends to see if they really should be moved. -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tabfind.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tabfind.h deleted file mode 100644 index 6e4d96ea..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tabfind.h +++ /dev/null @@ -1,385 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tabfind.h -// Description: Subclass of BBGrid to find tabstops. -// Author: Ray Smith -// Created: Fri Mar 21 15:03:01 PST 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_TABFIND_H_ -#define TESSERACT_TEXTORD_TABFIND_H_ - -#include "alignedblob.h" -#include "tesscallback.h" -#include "tabvector.h" -#include "linefind.h" - -class BLOBNBOX; -class BLOBNBOX_LIST; -class TO_BLOCK; -class ScrollView; -struct Pix; - -namespace tesseract { - -using WidthCallback = TessResultCallback1; - -struct AlignedBlobParams; -class ColPartitionGrid; - -/** Pixel resolution of column width estimates. */ -const int kColumnWidthFactor = 20; - -/** - * The TabFind class contains code to find tab-stops and maintain the - * vectors_ list of tab vectors. - * Also provides an interface to find neighbouring blobs - * in the grid of BLOBNBOXes that is used by multiple subclasses. - * Searching is a complex operation because of the need to enforce - * rule/separator lines, and tabstop boundaries, (when available), so - * as the holder of the list of TabVectors this class provides the functions. - */ -class TabFind : public AlignedBlob { - public: - TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright, - TabVector_LIST* vlines, int vertical_x, int vertical_y, - int resolution); - virtual ~TabFind(); - - /** - * Insert a list of blobs into the given grid (not necessarily this). - * See InsertBlob for the other arguments. - * It would seem to make more sense to swap this and grid, but this way - * around allows grid to not be derived from TabFind, eg a ColPartitionGrid, - * while the grid that provides the tab stops(this) has to be derived from - * TabFind. - */ - void InsertBlobsToGrid(bool h_spread, bool v_spread, - BLOBNBOX_LIST* blobs, - BBGrid* grid); - - /** - * Insert a single blob into the given grid (not necessarily this). - * If h_spread, then all cells covered horizontally by the box are - * used, otherwise, just the bottom-left. Similarly for v_spread. - * A side effect is that the left and right rule edges of the blob are - * set according to the tab vectors in this (not grid). - */ - bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob, - BBGrid* grid); - // Calls SetBlobRuleEdges for all the blobs in the given block. - void SetBlockRuleEdges(TO_BLOCK* block); - // Sets the left and right rule and crossing_rules for the blobs in the given - // list by finding the next outermost tabvectors for each blob. - void SetBlobRuleEdges(BLOBNBOX_LIST* blobs); - - // Returns the gutter width of the given TabVector between the given y limits. - // Also returns x-shift to be added to the vector to clear any intersecting - // blobs. The shift is deducted from the returned gutter. - // If ignore_unmergeables is true, then blobs of UnMergeableType are - // ignored as if they don't exist. (Used for text on image.) - // max_gutter_width is used as the maximum width worth searching for in case - // there is nothing near the TabVector. - int GutterWidth(int bottom_y, int top_y, const TabVector& v, - bool ignore_unmergeables, int max_gutter_width, - int* required_shift); - /** - * Find the gutter width and distance to inner neighbour for the given blob. - */ - void GutterWidthAndNeighbourGap(int tab_x, int mean_height, - int max_gutter, bool left, - BLOBNBOX* bbox, int* gutter_width, - int* neighbour_gap); - - /** - * Return the x-coord that corresponds to the right edge for the given - * box. If there is a rule line to the right that vertically overlaps it, - * then return the x-coord of the rule line, otherwise return the right - * edge of the page. For details see RightTabForBox below. - */ - int RightEdgeForBox(const TBOX& box, bool crossing, bool extended); - /** - * As RightEdgeForBox, but finds the left Edge instead. - */ - int LeftEdgeForBox(const TBOX& box, bool crossing, bool extended); - - /** - * Return the TabVector that corresponds to the right edge for the given - * box. If there is a TabVector to the right that vertically overlaps it, - * then return it, otherwise return nullptr. Note that Right and Left refer - * to the position of the TabVector, not its type, ie RightTabForBox - * returns the nearest TabVector to the right of the box, regardless of - * its type. - * If a TabVector crosses right through the box (as opposed to grazing one - * edge or missing entirely), then crossing false will ignore such a line. - * Crossing true will return the line for BOTH left and right edges. - * If extended is true, then TabVectors are considered to extend to their - * extended_start/end_y, otherwise, just the startpt_ and endpt_. - * These functions make use of an internal iterator to the vectors_ list - * for speed when used repeatedly on neighbouring boxes. The caveat is - * that the iterator must be updated whenever the list is modified. - */ - TabVector* RightTabForBox(const TBOX& box, bool crossing, bool extended); - /** - * As RightTabForBox, but finds the left TabVector instead. - */ - TabVector* LeftTabForBox(const TBOX& box, bool crossing, bool extended); - - /** - * Return true if the given width is close to one of the common - * widths in column_widths_. - */ - bool CommonWidth(int width); - /** - * Return true if the sizes are more than a - * factor of 2 different. - */ - static bool DifferentSizes(int size1, int size2); - /** - * Return true if the sizes are more than a - * factor of 5 different. - */ - static bool VeryDifferentSizes(int size1, int size2); - - /** - * Return a callback for testing CommonWidth. - */ - WidthCallback* WidthCB() { - return width_cb_; - } - - /** - * Return the coords at which to draw the image backdrop. - */ - const ICOORD& image_origin() const { - return image_origin_; - } - - protected: - /** - // Accessors - */ - TabVector_LIST* vectors() { - return &vectors_; - } - TabVector_LIST* dead_vectors() { - return &dead_vectors_; - } - - /** - * Top-level function to find TabVectors in an input page block. - * Returns false if the detected skew angle is impossible. - * Applies the detected skew angle to deskew the tabs, blobs and part_grid. - * tabfind_aligned_gap_fraction should be the value of parameter - * textord_tabfind_aligned_gap_fraction - */ - bool FindTabVectors(TabVector_LIST* hlines, - BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, - int min_gutter_width, double tabfind_aligned_gap_fraction, - ColPartitionGrid* part_grid, - FCOORD* deskew, FCOORD* reskew); - - // Top-level function to not find TabVectors in an input page block, - // but setup for single column mode. - void DontFindTabVectors(BLOBNBOX_LIST* image_blobs, - TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew); - - // Cleans up the lists of blobs in the block ready for use by TabFind. - // Large blobs that look like text are moved to the main blobs list. - // Main blobs that are superseded by the image blobs are deleted. - void TidyBlobs(TO_BLOCK* block); - - // Helper function to setup search limits for *TabForBox. - void SetupTabSearch(int x, int y, int* min_key, int* max_key); - - /** - * Display the tab vectors found in this grid. - */ - ScrollView* DisplayTabVectors(ScrollView* tab_win); - - // First part of FindTabVectors, which may be used twice if the text - // is mostly of vertical alignment. If find_vertical_text flag is - // true, this finds vertical textlines in possibly rotated blob space. - // In other words, when the page has mostly vertical lines and is rotated, - // setting this to true will find horizontal lines on the page. - // tabfind_aligned_gap_fraction should be the value of parameter - // textord_tabfind_aligned_gap_fraction - ScrollView* FindInitialTabVectors(BLOBNBOX_LIST* image_blobs, - int min_gutter_width, - double tabfind_aligned_gap_fraction, - TO_BLOCK* block); - - // Apply the given rotation to the given list of blobs. - static void RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs); - - // Flip the vertical and horizontal lines and rotate the grid ready - // for working on the rotated image. - // The min_gutter_width will be adjusted to the median gutter width between - // vertical tabs to set a better threshold for tabboxes in the 2nd pass. - void ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate, - TabVector_LIST* horizontal_lines, - int* min_gutter_width); - - // Clear the grid and get rid of the tab vectors, but not separators, - // ready to start again. - void Reset(); - - // Reflect the separator tab vectors and the grids in the y-axis. - // Can only be called after Reset! - void ReflectInYAxis(); - - private: - // For each box in the grid, decide whether it is a candidate tab-stop, - // and if so add it to the left and right tab boxes. - // tabfind_aligned_gap_fraction should be the value of parameter - // textord_tabfind_aligned_gap_fraction - ScrollView* FindTabBoxes(int min_gutter_width, - double tabfind_aligned_gap_fraction); - - // Return true if this box looks like a candidate tab stop, and set - // the appropriate tab type(s) to TT_UNCONFIRMED. - // tabfind_aligned_gap_fraction should be the value of parameter - // textord_tabfind_aligned_gap_fraction - bool TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width, - double tabfind_aligned_gap_fraction); - - // Returns true if there is nothing in the rectangle of width min_gutter to - // the left of bbox. - bool ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter); - // Returns true if there is nothing in the rectangle of width min_gutter to - // the right of bbox. - bool ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter); - // Returns true if there is nothing in the given search_box that vertically - // overlaps target_box other than target_box itself. - bool NothingYOverlapsInBox(const TBOX& search_box, const TBOX& target_box); - - // Fills the list of TabVector with the tabstops found in the grid, - // and estimates the logical vertical direction. - void FindAllTabVectors(int min_gutter_width); - // Helper for FindAllTabVectors finds the vectors of a particular type. - int FindTabVectors(int search_size_multiple, - TabAlignment alignment, - int min_gutter_width, - TabVector_LIST* vectors, - int* vertical_x, int* vertical_y); - // Finds a vector corresponding to a tabstop running through the - // given box of the given alignment type. - // search_size_multiple is a multiple of height used to control - // the size of the search. - // vertical_x and y are updated with an estimate of the real - // vertical direction. (skew finding.) - // Returns nullptr if no decent tabstop can be found. - TabVector* FindTabVector(int search_size_multiple, int min_gutter_width, - TabAlignment alignment, - BLOBNBOX* bbox, - int* vertical_x, int* vertical_y); - - // Set the vertical_skew_ member from the given vector and refit - // all vectors parallel to the skew vector. - void SetVerticalSkewAndParallelize(int vertical_x, int vertical_y); - - // Sort all the current vectors using the vertical_skew_ vector. - void SortVectors(); - - // Evaluate all the current tab vectors. - void EvaluateTabs(); - - // Trace textlines from one side to the other of each tab vector, saving - // the most frequent column widths found in a list so that a given width - // can be tested for being a common width with a simple callback function. - void ComputeColumnWidths(ScrollView* tab_win, - ColPartitionGrid* part_grid); - - // Finds column width and: - // if col_widths is not null (pass1): - // pair-up tab vectors with existing ColPartitions and accumulate widths. - // else (pass2): - // find the largest real partition width for each recorded column width, - // to be used as the minimum acceptable width. - void ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid, - STATS* col_widths); - - // Helper makes the list of common column widths in column_widths_ from the - // input col_widths. Destroys the content of col_widths by repeatedly - // finding the mode and erasing the peak. - void MakeColumnWidths(int col_widths_size, STATS* col_widths); - - // Mark blobs as being in a vertical text line where that is the case. - void MarkVerticalText(); - - // Returns the median gutter width between pairs of matching tab vectors - // assuming they are sorted left-to-right. If there are too few data - // points (< kMinLinesInColumn), then 0 is returned. - int FindMedianGutterWidth(TabVector_LIST* tab_vectors); - - // Find the next adjacent (to left or right) blob on this text line, - // with the constraint that it must vertically significantly overlap - // the [top_y, bottom_y] range. - // If ignore_images is true, then blobs with aligned_text() < 0 are treated - // as if they do not exist. - BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox, - bool look_left, bool ignore_images, - double min_overlap_fraction, - int gap_limit, int top_y, int bottom_y); - - // Add a bi-directional partner relationship between the left - // and the right. If one (or both) of the vectors is a separator, - // extend a nearby extendable vector or create a new one of the - // correct type, using the given left or right blob as a guide. - void AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob, - TabVector* left, TabVector* right); - - /** - * Remove separators and unused tabs from the main vectors_ list - * to the dead_vectors_ list. - */ - void CleanupTabs(); - - /** - * Deskew the tab vectors and blobs, computing the rotation and resetting - * the storked vertical_skew_. The deskew inverse is returned in reskew. - * Returns false if the detected skew angle is impossible. - */ - bool Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, - TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew); - - // Compute the rotation required to deskew, and its inverse rotation. - void ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew); - - /** - * Compute and apply constraints to the end positions of TabVectors so - * that where possible partners end at the same y coordinate. - */ - void ApplyTabConstraints(); - - protected: - ICOORD vertical_skew_; //< Estimate of true vertical in this image. - int resolution_; //< Of source image in pixels per inch. - private: - ICOORD image_origin_; //< Top-left of image in deskewed coords - TabVector_LIST vectors_; //< List of rule line and tabstops. - TabVector_IT v_it_; //< Iterator for searching vectors_. - TabVector_LIST dead_vectors_; //< Separators and unpartnered tab vectors. - // List of commonly occurring width ranges with x=min and y=max. - ICOORDELT_LIST column_widths_; //< List of commonly occurring width ranges. - /** Callback to test an int for being a common width. */ - WidthCallback* width_cb_; - // Sets of bounding boxes that are candidate tab stops. - GenericVector left_tab_boxes_; - GenericVector right_tab_boxes_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_TABFIND_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tablefind.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tablefind.cpp deleted file mode 100644 index f7ee3d27..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tablefind.cpp +++ /dev/null @@ -1,2097 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tablefind.cpp -// Description: Helper classes to find tables from ColPartitions. -// Author: Faisal Shafait (faisal.shafait@dfki.de) -// Created: Tue Jan 06 11:13:01 PST 2009 -// -// (C) Copyright 2009, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "tablefind.h" -#include -#include - -#include "allheaders.h" - -#include "colpartitionset.h" -#include "tablerecog.h" - -namespace tesseract { - -// These numbers are used to calculate the global median stats. -// They just set an upper bound on the stats objects. -// Maximum vertical spacing between neighbor partitions. -const int kMaxVerticalSpacing = 500; -// Maximum width of a blob in a partition. -const int kMaxBlobWidth = 500; - -// Minimum whitespace size to split a partition (measured as a multiple -// of a partition's median width). -const double kSplitPartitionSize = 2.0; -// To insert text, the partition must satisfy these size constraints -// in AllowTextPartition(). The idea is to filter noise partitions -// determined by the size compared to the global medians. -// TODO(nbeato): Need to find good numbers again. -const double kAllowTextHeight = 0.5; -const double kAllowTextWidth = 0.6; -const double kAllowTextArea = 0.8; -// The same thing applies to blobs (to filter noise). -// TODO(nbeato): These numbers are a shot in the dark... -// height and width are 0.5 * gridsize() in colfind.cpp -// area is a rough guess for the size of a period. -const double kAllowBlobHeight = 0.3; -const double kAllowBlobWidth = 0.4; -const double kAllowBlobArea = 0.05; - -// Minimum number of components in a text partition. A partition having fewer -// components than that is more likely a data partition and is a candidate -// table cell. -const int kMinBoxesInTextPartition = 10; - -// Maximum number of components that a data partition can have -const int kMaxBoxesInDataPartition = 20; - -// Maximum allowed gap in a text partitions as a multiple of its median size. -const double kMaxGapInTextPartition = 4.0; - -// Minimum value that the maximum gap in a text partition should have as a -// factor of its median size. -const double kMinMaxGapInTextPartition = 0.5; - -// The amount of overlap that is "normal" for adjacent blobs in a text -// partition. This is used to calculate gap between overlapping blobs. -const double kMaxBlobOverlapFactor = 4.0; - -// Maximum x-height a table partition can have as a multiple of global -// median x-height -const double kMaxTableCellXheight = 2.0; - -// Maximum line spacing between a table column header and column contents -// for merging the two (as a multiple of the partition's median_height). -const int kMaxColumnHeaderDistance = 4; - -// Minimum ratio of num_table_partitions to num_text_partitions in a column -// block to be called it a table column -const double kTableColumnThreshold = 3.0; - -// Search for horizontal ruling lines within the vertical margin as a -// multiple of grid size -const int kRulingVerticalMargin = 3; - -// Minimum overlap that a colpartition must have with a table region -// to become part of that table -const double kMinOverlapWithTable = 0.6; - -// Maximum side space (distance from column boundary) that a typical -// text-line in flowing text should have as a multiple of its x-height -// (Median size). -const int kSideSpaceMargin = 10; - -// Fraction of the peak of x-projection of a table region to set the -// threshold for the x-projection histogram -const double kSmallTableProjectionThreshold = 0.35; -const double kLargeTableProjectionThreshold = 0.45; -// Minimum number of rows required to look for more rows in the projection. -const int kLargeTableRowCount = 6; - -// Minimum number of rows in a table -const int kMinRowsInTable = 3; - -// The amount of padding (multiplied by global_median_xheight_ during use) -// that is vertically added to the search adjacent leader search during -// ColPartition marking. -const int kAdjacentLeaderSearchPadding = 2; - -// Used when filtering false positives. When finding the last line -// of a paragraph (typically left-aligned), the previous line should have -// its center to the right of the last line by this scaled amount. -const double kParagraphEndingPreviousLineRatio = 1.3; - -// The maximum amount of whitespace allowed left of a paragraph ending. -// Do not filter a ColPartition with more than this space left of it. -const double kMaxParagraphEndingLeftSpaceMultiple = 3.0; - -// Used when filtering false positives. The last line of a paragraph -// should be preceded by a line that is predominantly text. This is the -// ratio of text to whitespace (to the right of the text) that is required -// for the previous line to be a text. -const double kMinParagraphEndingTextToWhitespaceRatio = 3.0; - -// When counting table columns, this is the required gap between two columns -// (it is multiplied by global_median_xheight_). -const double kMaxXProjectionGapFactor = 2.0; - -// Used for similarity in partitions using stroke width. Values copied -// from ColFind.cpp in Ray's CL. -const double kStrokeWidthFractionalTolerance = 0.25; -const double kStrokeWidthConstantTolerance = 2.0; - -BOOL_VAR(textord_show_tables, false, "Show table regions"); -BOOL_VAR(textord_tablefind_show_mark, false, - "Debug table marking steps in detail"); -BOOL_VAR(textord_tablefind_show_stats, false, - "Show page stats used in table finding"); -BOOL_VAR(textord_tablefind_recognize_tables, false, - "Enables the table recognizer for table layout and filtering."); - -ELISTIZE(ColSegment) -CLISTIZE(ColSegment) - -// Templated helper function used to create destructor callbacks for the -// BBGrid::ClearGridData() method. -template void DeleteObject(T *object) { - delete object; -} - -TableFinder::TableFinder() - : resolution_(0), - global_median_xheight_(0), - global_median_blob_width_(0), - global_median_ledding_(0), - left_to_right_language_(true) { -} - -TableFinder::~TableFinder() { - // ColPartitions and ColSegments created by this class for storage in grids - // need to be deleted explicitly. - clean_part_grid_.ClearGridData(&DeleteObject); - leader_and_ruling_grid_.ClearGridData(&DeleteObject); - fragmented_text_grid_.ClearGridData(&DeleteObject); - col_seg_grid_.ClearGridData(&DeleteObject); - table_grid_.ClearGridData(&DeleteObject); -} - -void TableFinder::set_left_to_right_language(bool order) { - left_to_right_language_ = order; -} - -void TableFinder::Init(int grid_size, const ICOORD& bottom_left, - const ICOORD& top_right) { - // Initialize clean partitions list and grid - clean_part_grid_.Init(grid_size, bottom_left, top_right); - leader_and_ruling_grid_.Init(grid_size, bottom_left, top_right); - fragmented_text_grid_.Init(grid_size, bottom_left, top_right); - col_seg_grid_.Init(grid_size, bottom_left, top_right); - table_grid_.Init(grid_size, bottom_left, top_right); -} - -// Copy cleaned partitions from part_grid_ to clean_part_grid_ and -// insert leaders and rulers into the leader_and_ruling_grid_ -void TableFinder::InsertCleanPartitions(ColPartitionGrid* grid, - TO_BLOCK* block) { - // Calculate stats. This lets us filter partitions in AllowTextPartition() - // and filter blobs in AllowBlob(). - SetGlobalSpacings(grid); - - // Iterate the ColPartitions in the grid. - ColPartitionGridSearch gsearch(grid); - gsearch.SetUniqueMode(true); - gsearch.StartFullSearch(); - ColPartition* part = nullptr; - while ((part = gsearch.NextFullSearch()) != nullptr) { - // Reject partitions with nothing useful inside of them. - if (part->blob_type() == BRT_NOISE || part->bounding_box().area() <= 0) - continue; - ColPartition* clean_part = part->ShallowCopy(); - ColPartition* leader_part = nullptr; - if (part->IsLineType()) { - InsertRulingPartition(clean_part); - continue; - } - // Insert all non-text partitions to clean_parts - if (!part->IsTextType()) { - InsertImagePartition(clean_part); - continue; - } - // Insert text colpartitions after removing noisy components from them - // The leaders are split into a separate grid. - BLOBNBOX_CLIST* part_boxes = part->boxes(); - BLOBNBOX_C_IT pit(part_boxes); - for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) { - BLOBNBOX *pblob = pit.data(); - // Bad blobs... happens in UNLV set. - // news.3G1, page 17 (around x=6) - if (!AllowBlob(*pblob)) - continue; - if (pblob->flow() == BTFT_LEADER) { - if (leader_part == nullptr) { - leader_part = part->ShallowCopy(); - leader_part->set_flow(BTFT_LEADER); - } - leader_part->AddBox(pblob); - } else if (pblob->region_type() != BRT_NOISE) { - clean_part->AddBox(pblob); - } - } - clean_part->ComputeLimits(); - ColPartition* fragmented = clean_part->CopyButDontOwnBlobs(); - InsertTextPartition(clean_part); - SplitAndInsertFragmentedTextPartition(fragmented); - if (leader_part != nullptr) { - // TODO(nbeato): Note that ComputeLimits does not update the column - // information. So the leader may appear to span more columns than it - // really does later on when IsInSameColumnAs gets called to test - // for adjacent leaders. - leader_part->ComputeLimits(); - InsertLeaderPartition(leader_part); - } - } - - // Make the partition partners better for upper and lower neighbors. - clean_part_grid_.FindPartitionPartners(); - clean_part_grid_.RefinePartitionPartners(false); -} - -// High level function to perform table detection -void TableFinder::LocateTables(ColPartitionGrid* grid, - ColPartitionSet** all_columns, - WidthCallback* width_cb, - const FCOORD& reskew) { - // initialize spacing, neighbors, and columns - InitializePartitions(all_columns); - -#ifndef GRAPHICS_DISABLED - if (textord_show_tables) { - ScrollView* table_win = MakeWindow(0, 300, "Column Partitions & Neighbors"); - DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); - DisplayColPartitions(table_win, &leader_and_ruling_grid_, - ScrollView::AQUAMARINE); - DisplayColPartitionConnections(table_win, &clean_part_grid_, - ScrollView::ORANGE); - - table_win = MakeWindow(100, 300, "Fragmented Text"); - DisplayColPartitions(table_win, &fragmented_text_grid_, ScrollView::BLUE); - } -#endif // GRAPHICS_DISABLED - - // mark, filter, and smooth candidate table partitions - MarkTablePartitions(); - - // Make single-column blocks from good_columns_ partitions. col_segments are - // moved to a grid later which takes the ownership - ColSegment_LIST column_blocks; - GetColumnBlocks(all_columns, &column_blocks); - // Set the ratio of candidate table partitions in each column - SetColumnsType(&column_blocks); - - // Move column segments to col_seg_grid_ - MoveColSegmentsToGrid(&column_blocks, &col_seg_grid_); - - // Detect split in column layout that might have occurred due to the - // presence of a table. In such a case, merge the corresponding columns. - GridMergeColumnBlocks(); - - // Group horizontally overlapping table partitions into table columns. - // table_columns created here get deleted at the end of this method. - ColSegment_LIST table_columns; - GetTableColumns(&table_columns); - - // Within each column, mark the range table regions occupy based on the - // table columns detected. table_regions are moved to a grid later which - // takes the ownership - ColSegment_LIST table_regions; - GetTableRegions(&table_columns, &table_regions); - -#ifndef GRAPHICS_DISABLED - if (textord_tablefind_show_mark) { - ScrollView* table_win = MakeWindow(1200, 300, "Table Columns and Regions"); - DisplayColSegments(table_win, &table_columns, ScrollView::DARK_TURQUOISE); - DisplayColSegments(table_win, &table_regions, ScrollView::YELLOW); - } -#endif // GRAPHICS_DISABLED - - // Merge table regions across columns for tables spanning multiple - // columns - MoveColSegmentsToGrid(&table_regions, &table_grid_); - GridMergeTableRegions(); - - // Adjust table boundaries by including nearby horizontal lines and left - // out column headers - AdjustTableBoundaries(); - GridMergeTableRegions(); - - if (textord_tablefind_recognize_tables) { - // Remove false alarms consiting of a single column - DeleteSingleColumnTables(); - -#ifndef GRAPHICS_DISABLED - if (textord_show_tables) { - ScrollView* table_win = MakeWindow(1200, 300, "Detected Table Locations"); - DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); - DisplayColSegments(table_win, &table_columns, ScrollView::KHAKI); - table_grid_.DisplayBoxes(table_win); - } -#endif // GRAPHICS_DISABLED - - // Find table grid structure and reject tables that are malformed. - RecognizeTables(); - GridMergeTableRegions(); - RecognizeTables(); - -#ifndef GRAPHICS_DISABLED - if (textord_show_tables) { - ScrollView* table_win = MakeWindow(1400, 600, "Recognized Tables"); - DisplayColPartitions(table_win, &clean_part_grid_, - ScrollView::BLUE, ScrollView::BLUE); - table_grid_.DisplayBoxes(table_win); - } -#endif // GRAPHICS_DISABLED - } else { - // Remove false alarms consiting of a single column - // TODO(nbeato): verify this is a NOP after structured table rejection. - // Right now it isn't. If the recognize function is doing what it is - // supposed to do, this function is obsolete. - DeleteSingleColumnTables(); - -#ifndef GRAPHICS_DISABLED - if (textord_show_tables) { - ScrollView* table_win = MakeWindow(1500, 300, "Detected Tables"); - DisplayColPartitions(table_win, &clean_part_grid_, - ScrollView::BLUE, ScrollView::BLUE); - table_grid_.DisplayBoxes(table_win); - } -#endif // GRAPHICS_DISABLED - } - - // Merge all colpartitions in table regions to make them a single - // colpartition and revert types of isolated table cells not - // assigned to any table to their original types. - MakeTableBlocks(grid, all_columns, width_cb); -} -// All grids have the same dimensions. The clean_part_grid_ sizes are set from -// the part_grid_ that is passed to InsertCleanPartitions, which was the same as -// the grid that is the base of ColumnFinder. Just return the clean_part_grid_ -// dimensions instead of duplicated memory. -int TableFinder::gridsize() const { - return clean_part_grid_.gridsize(); -} -int TableFinder::gridwidth() const { - return clean_part_grid_.gridwidth(); -} -int TableFinder::gridheight() const { - return clean_part_grid_.gridheight(); -} -const ICOORD& TableFinder::bleft() const { - return clean_part_grid_.bleft(); -} -const ICOORD& TableFinder::tright() const { - return clean_part_grid_.tright(); -} - -void TableFinder::InsertTextPartition(ColPartition* part) { - ASSERT_HOST(part != nullptr); - if (AllowTextPartition(*part)) { - clean_part_grid_.InsertBBox(true, true, part); - } else { - delete part; - } -} -void TableFinder::InsertFragmentedTextPartition(ColPartition* part) { - ASSERT_HOST(part != nullptr); - if (AllowTextPartition(*part)) { - fragmented_text_grid_.InsertBBox(true, true, part); - } else { - delete part; - } -} -void TableFinder::InsertLeaderPartition(ColPartition* part) { - ASSERT_HOST(part != nullptr); - if (!part->IsEmpty() && part->bounding_box().area() > 0) { - leader_and_ruling_grid_.InsertBBox(true, true, part); - } else { - delete part; - } -} -void TableFinder::InsertRulingPartition(ColPartition* part) { - leader_and_ruling_grid_.InsertBBox(true, true, part); -} -void TableFinder::InsertImagePartition(ColPartition* part) { - // NOTE: If images are placed into a different grid in the future, - // the function SetPartitionSpacings needs to be updated. It should - // be the only thing that cares about image partitions. - clean_part_grid_.InsertBBox(true, true, part); -} - -// Splits a partition into its "words". The splits happen -// at locations with wide inter-blob spacing. This is useful -// because it allows the table recognize to "cut through" the -// text lines on the page. The assumption is that a table -// will have several lines with similar overlapping whitespace -// whereas text will not have this type of property. -// Note: The code Assumes that blobs are sorted by the left side x! -// This will not work (as well) if the blobs are sorted by center/right. -void TableFinder::SplitAndInsertFragmentedTextPartition(ColPartition* part) { - ASSERT_HOST(part != nullptr); - // Bye bye empty partitions! - if (part->boxes()->empty()) { - delete part; - return; - } - - // The AllowBlob function prevents this. - ASSERT_HOST(part->median_width() > 0); - const double kThreshold = part->median_width() * kSplitPartitionSize; - - ColPartition* right_part = part; - bool found_split = true; - while (found_split) { - found_split = false; - BLOBNBOX_C_IT box_it(right_part->boxes()); - // Blobs are sorted left side first. If blobs overlap, - // the previous blob may have a "more right" right side. - // Account for this by always keeping the largest "right" - // so far. - int previous_right = INT32_MIN; - - // Look for the next split in the partition. - for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) { - const TBOX& box = box_it.data()->bounding_box(); - if (previous_right != INT32_MIN && - box.left() - previous_right > kThreshold) { - // We have a split position. Split the partition in two pieces. - // Insert the left piece in the grid and keep processing the right. - int mid_x = (box.left() + previous_right) / 2; - ColPartition* left_part = right_part; - right_part = left_part->SplitAt(mid_x); - - InsertFragmentedTextPartition(left_part); - found_split = true; - break; - } - - // The right side of the previous blobs. - previous_right = std::max(previous_right, static_cast(box.right())); - } - } - // When a split is not found, the right part is minimized - // as much as possible, so process it. - InsertFragmentedTextPartition(right_part); -} - -// Some simple criteria to filter out now. We want to make sure the -// average blob size in the partition is consistent with the -// global page stats. -// The area metric will almost always pass for multi-blob partitions. -// It is useful when filtering out noise caused by an isolated blob. -bool TableFinder::AllowTextPartition(const ColPartition& part) const { - const double kHeightRequired = global_median_xheight_ * kAllowTextHeight; - const double kWidthRequired = global_median_blob_width_ * kAllowTextWidth; - const int median_area = global_median_xheight_ * global_median_blob_width_; - const double kAreaPerBlobRequired = median_area * kAllowTextArea; - // Keep comparisons strictly greater to disallow 0! - return part.median_height() > kHeightRequired && - part.median_width() > kWidthRequired && - part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count(); -} - -// Same as above, applied to blobs. Keep in mind that -// leaders, commas, and periods are important in tables. -bool TableFinder::AllowBlob(const BLOBNBOX& blob) const { - const TBOX& box = blob.bounding_box(); - const double kHeightRequired = global_median_xheight_ * kAllowBlobHeight; - const double kWidthRequired = global_median_blob_width_ * kAllowBlobWidth; - const int median_area = global_median_xheight_ * global_median_blob_width_; - const double kAreaRequired = median_area * kAllowBlobArea; - // Keep comparisons strictly greater to disallow 0! - return box.height() > kHeightRequired && - box.width() > kWidthRequired && - box.area() > kAreaRequired; -} - -// TODO(nbeato): The grid that makes the window doesn't seem to matter. -// The only downside is that window messages will be caught by -// clean_part_grid_ instead of a useful object. This is a temporary solution -// for the debug windows created by the TableFinder. -ScrollView* TableFinder::MakeWindow(int x, int y, const char* window_name) { - return clean_part_grid_.MakeWindow(x, y, window_name); -} - -// Make single-column blocks from good_columns_ partitions. -void TableFinder::GetColumnBlocks(ColPartitionSet** all_columns, - ColSegment_LIST* column_blocks) { - for (int i = 0; i < gridheight(); ++i) { - ColPartitionSet* columns = all_columns[i]; - if (columns != nullptr) { - ColSegment_LIST new_blocks; - // Get boxes from the current vertical position on the grid - columns->GetColumnBoxes(i * gridsize(), (i+1) * gridsize(), &new_blocks); - // Merge the new_blocks boxes into column_blocks if they are well-aligned - GroupColumnBlocks(&new_blocks, column_blocks); - } - } -} - -// Merge column segments into the current list if they are well aligned. -void TableFinder::GroupColumnBlocks(ColSegment_LIST* new_blocks, - ColSegment_LIST* column_blocks) { - ColSegment_IT src_it(new_blocks); - ColSegment_IT dest_it(column_blocks); - // iterate through the source list - for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { - ColSegment* src_seg = src_it.data(); - const TBOX& src_box = src_seg->bounding_box(); - bool match_found = false; - // iterate through the destination list to find a matching column block - for (dest_it.mark_cycle_pt(); !dest_it.cycled_list(); dest_it.forward()) { - ColSegment* dest_seg = dest_it.data(); - TBOX dest_box = dest_seg->bounding_box(); - if (ConsecutiveBoxes(src_box, dest_box)) { - // If matching block is found, insert the current block into it - // and delete the source block. - dest_seg->InsertBox(src_box); - match_found = true; - delete src_it.extract(); - break; - } - } - // If no match is found, just append the source block to column_blocks - if (!match_found) { - dest_it.add_after_then_move(src_it.extract()); - } - } -} - -// are the two boxes immediate neighbors along the vertical direction -bool TableFinder::ConsecutiveBoxes(const TBOX &b1, const TBOX &b2) { - int x_margin = 20; - int y_margin = 5; - return (abs(b1.left() - b2.left()) < x_margin) && - (abs(b1.right() - b2.right()) < x_margin) && - (abs(b1.top()-b2.bottom()) < y_margin || - abs(b2.top()-b1.bottom()) < y_margin); -} - -// Set up info for clean_part_grid_ partitions to be valid during detection -// code. -void TableFinder::InitializePartitions(ColPartitionSet** all_columns) { - FindNeighbors(); - SetPartitionSpacings(&clean_part_grid_, all_columns); - SetGlobalSpacings(&clean_part_grid_); -} - -// Set left, right and top, bottom spacings of each colpartition. -void TableFinder::SetPartitionSpacings(ColPartitionGrid* grid, - ColPartitionSet** all_columns) { - // Iterate the ColPartitions in the grid. - ColPartitionGridSearch gsearch(grid); - gsearch.StartFullSearch(); - ColPartition* part = nullptr; - while ((part = gsearch.NextFullSearch()) != nullptr) { - ColPartitionSet* columns = all_columns[gsearch.GridY()]; - TBOX box = part->bounding_box(); - int y = part->MidY(); - ColPartition* left_column = columns->ColumnContaining(box.left(), y); - ColPartition* right_column = columns->ColumnContaining(box.right(), y); - // set distance from left column as space to the left - if (left_column) { - int left_space = std::max(0, box.left() - left_column->LeftAtY(y)); - part->set_space_to_left(left_space); - } - // set distance from right column as space to the right - if (right_column) { - int right_space = std::max(0, right_column->RightAtY(y) - box.right()); - part->set_space_to_right(right_space); - } - - // Look for images that may be closer. - // NOTE: used to be part_grid_, might cause issues now - ColPartitionGridSearch hsearch(grid); - hsearch.StartSideSearch(box.left(), box.bottom(), box.top()); - ColPartition* neighbor = nullptr; - while ((neighbor = hsearch.NextSideSearch(true)) != nullptr) { - if (neighbor->type() == PT_PULLOUT_IMAGE || - neighbor->type() == PT_FLOWING_IMAGE || - neighbor->type() == PT_HEADING_IMAGE) { - int right = neighbor->bounding_box().right(); - if (right < box.left()) { - int space = std::min(box.left() - right, part->space_to_left()); - part->set_space_to_left(space); - } - } - } - hsearch.StartSideSearch(box.left(), box.bottom(), box.top()); - neighbor = nullptr; - while ((neighbor = hsearch.NextSideSearch(false)) != nullptr) { - if (neighbor->type() == PT_PULLOUT_IMAGE || - neighbor->type() == PT_FLOWING_IMAGE || - neighbor->type() == PT_HEADING_IMAGE) { - int left = neighbor->bounding_box().left(); - if (left > box.right()) { - int space = std::min(left - box.right(), part->space_to_right()); - part->set_space_to_right(space); - } - } - } - - ColPartition* upper_part = part->SingletonPartner(true); - if (upper_part) { - int space = std::max(0, static_cast(upper_part->bounding_box().bottom() - - part->bounding_box().bottom())); - part->set_space_above(space); - } else { - // TODO(nbeato): What constitutes a good value? - // 0 is the default value when not set, explicitly noting it needs to - // be something else. - part->set_space_above(INT32_MAX); - } - - ColPartition* lower_part = part->SingletonPartner(false); - if (lower_part) { - int space = std::max(0, static_cast(part->bounding_box().bottom() - - lower_part->bounding_box().bottom())); - part->set_space_below(space); - } else { - // TODO(nbeato): What constitutes a good value? - // 0 is the default value when not set, explicitly noting it needs to - // be something else. - part->set_space_below(INT32_MAX); - } - } -} - -// Set spacing and closest neighbors above and below a given colpartition. -void TableFinder::SetVerticalSpacing(ColPartition* part) { - TBOX box = part->bounding_box(); - int top_range = std::min(box.top() + kMaxVerticalSpacing, static_cast(tright().y())); - int bottom_range = std::max(box.bottom() - kMaxVerticalSpacing, static_cast(bleft().y())); - box.set_top(top_range); - box.set_bottom(bottom_range); - - TBOX part_box = part->bounding_box(); - // Start a rect search - GridSearch - rectsearch(&clean_part_grid_); - rectsearch.StartRectSearch(box); - ColPartition* neighbor; - int min_space_above = kMaxVerticalSpacing; - int min_space_below = kMaxVerticalSpacing; - ColPartition* above_neighbor = nullptr; - ColPartition* below_neighbor = nullptr; - while ((neighbor = rectsearch.NextRectSearch()) != nullptr) { - if (neighbor == part) - continue; - TBOX neighbor_box = neighbor->bounding_box(); - if (neighbor_box.major_x_overlap(part_box)) { - int gap = abs(part->median_bottom() - neighbor->median_bottom()); - // If neighbor is below current partition - if (neighbor_box.top() < part_box.bottom() && - gap < min_space_below) { - min_space_below = gap; - below_neighbor = neighbor; - } // If neighbor is above current partition - else if (part_box.top() < neighbor_box.bottom() && - gap < min_space_above) { - min_space_above = gap; - above_neighbor = neighbor; - } - } - } - part->set_space_above(min_space_above); - part->set_space_below(min_space_below); - part->set_nearest_neighbor_above(above_neighbor); - part->set_nearest_neighbor_below(below_neighbor); -} - -// Set global spacing and x-height estimates -void TableFinder::SetGlobalSpacings(ColPartitionGrid* grid) { - STATS xheight_stats(0, kMaxVerticalSpacing + 1); - STATS width_stats(0, kMaxBlobWidth + 1); - STATS ledding_stats(0, kMaxVerticalSpacing + 1); - // Iterate the ColPartitions in the grid. - ColPartitionGridSearch gsearch(grid); - gsearch.SetUniqueMode(true); - gsearch.StartFullSearch(); - ColPartition* part = nullptr; - while ((part = gsearch.NextFullSearch()) != nullptr) { - // TODO(nbeato): HACK HACK HACK! medians are equal to partition length. - // ComputeLimits needs to get called somewhere outside of TableFinder - // to make sure the partitions are properly initialized. - // When this is called, SmoothPartitionPartners dies in an assert after - // table find runs. Alternative solution. - // part->ComputeLimits(); - if (part->IsTextType()) { - // xheight_stats.add(part->median_height(), part->boxes_count()); - // width_stats.add(part->median_width(), part->boxes_count()); - - // This loop can be removed when above issues are fixed. - // Replace it with the 2 lines commented out above. - BLOBNBOX_C_IT it(part->boxes()); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - xheight_stats.add(it.data()->bounding_box().height(), 1); - width_stats.add(it.data()->bounding_box().width(), 1); - } - - ledding_stats.add(part->space_above(), 1); - ledding_stats.add(part->space_below(), 1); - } - } - // Set estimates based on median of statistics obtained - set_global_median_xheight(static_cast(xheight_stats.median() + 0.5)); - set_global_median_blob_width(static_cast(width_stats.median() + 0.5)); - set_global_median_ledding(static_cast(ledding_stats.median() + 0.5)); - #ifndef GRAPHICS_DISABLED - if (textord_tablefind_show_stats) { - const char* kWindowName = "X-height (R), X-width (G), and ledding (B)"; - ScrollView* stats_win = MakeWindow(500, 10, kWindowName); - xheight_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::RED); - width_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::GREEN); - ledding_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::BLUE); - } - #endif // GRAPHICS_DISABLED -} - -void TableFinder::set_global_median_xheight(int xheight) { - global_median_xheight_ = xheight; -} -void TableFinder::set_global_median_blob_width(int width) { - global_median_blob_width_ = width; -} -void TableFinder::set_global_median_ledding(int ledding) { - global_median_ledding_ = ledding; -} - -void TableFinder::FindNeighbors() { - ColPartitionGridSearch gsearch(&clean_part_grid_); - gsearch.StartFullSearch(); - ColPartition* part = nullptr; - while ((part = gsearch.NextFullSearch()) != nullptr) { - // TODO(nbeato): Rename this function, meaning is different now. - // IT is finding nearest neighbors its own way - //SetVerticalSpacing(part); - - ColPartition* upper = part->SingletonPartner(true); - if (upper) - part->set_nearest_neighbor_above(upper); - - ColPartition* lower = part->SingletonPartner(false); - if (lower) - part->set_nearest_neighbor_below(lower); - } -} - -// High level interface. Input is an unmarked ColPartitionGrid -// (namely, clean_part_grid_). Partitions are identified using local -// information and filter/smoothed. The function exit should contain -// a good sampling of the table partitions. -void TableFinder::MarkTablePartitions() { - MarkPartitionsUsingLocalInformation(); - if (textord_tablefind_show_mark) { - ScrollView* table_win = MakeWindow(300, 300, "Initial Table Partitions"); - DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); - DisplayColPartitions(table_win, &leader_and_ruling_grid_, - ScrollView::AQUAMARINE); - } - FilterFalseAlarms(); - if (textord_tablefind_show_mark) { - ScrollView* table_win = MakeWindow(600, 300, "Filtered Table Partitions"); - DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); - DisplayColPartitions(table_win, &leader_and_ruling_grid_, - ScrollView::AQUAMARINE); - } - SmoothTablePartitionRuns(); - if (textord_tablefind_show_mark) { - ScrollView* table_win = MakeWindow(900, 300, "Smoothed Table Partitions"); - DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); - DisplayColPartitions(table_win, &leader_and_ruling_grid_, - ScrollView::AQUAMARINE); - } - FilterFalseAlarms(); - if (textord_tablefind_show_mark || textord_show_tables) { - ScrollView* table_win = MakeWindow(900, 300, "Final Table Partitions"); - DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); - DisplayColPartitions(table_win, &leader_and_ruling_grid_, - ScrollView::AQUAMARINE); - } -} - -// These types of partitions are marked as table partitions: -// 1- Partitions that have at lease one large gap between words -// 2- Partitions that consist of only one word (no significant gap -// between components) -// 3- Partitions that vertically overlap with other partitions within the -// same column. -// 4- Partitions with leaders before/after them. -void TableFinder::MarkPartitionsUsingLocalInformation() { - // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&clean_part_grid_); - gsearch.StartFullSearch(); - ColPartition* part = nullptr; - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (!part->IsTextType()) // Only consider text partitions - continue; - // Only consider partitions in dominant font size or smaller - if (part->median_height() > kMaxTableCellXheight * global_median_xheight_) - continue; - // Mark partitions with a large gap, or no significant gap as - // table partitions. - // Comments: It produces several false alarms at: - // - last line of a paragraph (fixed) - // - single word section headings - // - page headers and footers - // - numbered equations - // - line drawing regions - // TODO(faisal): detect and fix above-mentioned cases - if (HasWideOrNoInterWordGap(part) || - HasLeaderAdjacent(*part)) { - part->set_table_type(); - } - } -} - -// Check if the partition has at least one large gap between words or no -// significant gap at all -bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const { - // Should only get text partitions. - ASSERT_HOST(part->IsTextType()); - // Blob access - BLOBNBOX_CLIST* part_boxes = part->boxes(); - BLOBNBOX_C_IT it(part_boxes); - // Check if this is a relatively small partition (such as a single word) - if (part->bounding_box().width() < - kMinBoxesInTextPartition * part->median_height() && - part_boxes->length() < kMinBoxesInTextPartition) - return true; - - // Variables used to compute inter-blob spacing. - int current_x0 = -1; - int current_x1 = -1; - int previous_x1 = -1; - // Stores the maximum gap detected. - int largest_partition_gap_found = -1; - // Text partition gap limits. If this is text (and not a table), - // there should be at least one gap larger than min_gap and no gap - // larger than max_gap. - const double max_gap = kMaxGapInTextPartition * part->median_height(); - const double min_gap = kMinMaxGapInTextPartition * part->median_height(); - - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - current_x0 = blob->bounding_box().left(); - current_x1 = blob->bounding_box().right(); - if (previous_x1 != -1) { - int gap = current_x0 - previous_x1; - - // TODO(nbeato): Boxes may overlap? Huh? - // For example, mag.3B 8003_033.3B.tif in UNLV data. The titles/authors - // on the top right of the page are filtered out with this line. - // Note 2: Iterating over blobs in a partition, so we are looking for - // spacing between the words. - if (gap < 0) { - // More likely case, the blobs slightly overlap. This can happen - // with diacritics (accents) or broken alphabet symbols (characters). - // Merge boxes together by taking max of right sides. - if (-gap < part->median_height() * kMaxBlobOverlapFactor) { - previous_x1 = std::max(previous_x1, current_x1); - continue; - } - // Extreme case, blobs overlap significantly in the same partition... - // This should not happen often (if at all), but it does. - // TODO(nbeato): investigate cases when this happens. - else { - // The behavior before was to completely ignore this case. - } - } - - // If a large enough gap is found, mark it as a table cell (return true) - if (gap > max_gap) - return true; - if (gap > largest_partition_gap_found) - largest_partition_gap_found = gap; - } - previous_x1 = current_x1; - } - // Since no large gap was found, return false if the partition is too - // long to be a data cell - if (part->bounding_box().width() > - kMaxBoxesInDataPartition * part->median_height() || - part_boxes->length() > kMaxBoxesInDataPartition) - return false; - - // A partition may be a single blob. In this case, it's an isolated symbol - // or non-text (such as a ruling or image). - // Detect these as table partitions? Shouldn't this be case by case? - // The behavior before was to ignore this, making max_partition_gap < 0 - // and implicitly return true. Just making it explicit. - if (largest_partition_gap_found == -1) - return true; - - // return true if the maximum gap found is smaller than the minimum allowed - // max_gap in a text partition. This indicates that there is no significant - // space in the partition, hence it is likely a single word. - return largest_partition_gap_found < min_gap; -} - -// A criteria for possible tables is that a table may have leaders -// between data cells. An aggressive solution to find such tables is to -// explicitly mark partitions that have adjacent leaders. -// Note that this includes overlapping leaders. However, it does not -// include leaders in different columns on the page. -// Possible false-positive will include lists, such as a table of contents. -// As these arise, the aggressive nature of this search may need to be -// trimmed down. -bool TableFinder::HasLeaderAdjacent(const ColPartition& part) { - if (part.flow() == BTFT_LEADER) - return true; - // Search range is left and right bounded by an offset of the - // median xheight. This offset is to allow some tolerance to the - // the leaders on the page in the event that the alignment is still - // a bit off. - const TBOX& box = part.bounding_box(); - const int search_size = kAdjacentLeaderSearchPadding * global_median_xheight_; - const int top = box.top() + search_size; - const int bottom = box.bottom() - search_size; - ColPartitionGridSearch hsearch(&leader_and_ruling_grid_); - for (int direction = 0; direction < 2; ++direction) { - bool right_to_left = (direction == 0); - int x = right_to_left ? box.right() : box.left(); - hsearch.StartSideSearch(x, bottom, top); - ColPartition* leader = nullptr; - while ((leader = hsearch.NextSideSearch(right_to_left)) != nullptr) { - // The leader could be a horizontal ruling in the grid. - // Make sure it is actually a leader. - if (leader->flow() != BTFT_LEADER) - continue; - // This should not happen, they are in different grids. - ASSERT_HOST(&part != leader); - // Make sure the leader shares a page column with the partition, - // otherwise we are spreading across columns. - if (!part.IsInSameColumnAs(*leader)) - break; - // There should be a significant vertical overlap - if (!leader->VSignificantCoreOverlap(part)) - continue; - // Leader passed all tests, so it is adjacent. - return true; - } - } - // No leaders are adjacent to the given partition. - return false; -} - -// Filter individual text partitions marked as table partitions -// consisting of paragraph endings, small section headings, and -// headers and footers. -void TableFinder::FilterFalseAlarms() { - FilterParagraphEndings(); - FilterHeaderAndFooter(); - // TODO(nbeato): Fully justified text as non-table? -} - -void TableFinder::FilterParagraphEndings() { - // Detect last line of paragraph - // Iterate the ColPartitions in the grid. - ColPartitionGridSearch gsearch(&clean_part_grid_); - gsearch.StartFullSearch(); - ColPartition* part = nullptr; - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->type() != PT_TABLE) - continue; // Consider only table partitions - - // Paragraph ending should have flowing text above it. - ColPartition* upper_part = part->nearest_neighbor_above(); - if (!upper_part) - continue; - if (upper_part->type() != PT_FLOWING_TEXT) - continue; - if (upper_part->bounding_box().width() < - 2 * part->bounding_box().width()) - continue; - // Check if its the last line of a paragraph. - // In most cases, a paragraph ending should be left-aligned to text line - // above it. Sometimes, it could be a 2 line paragraph, in which case - // the line above it is indented. - // To account for that, check if the partition center is to - // the left of the one above it. - int mid = (part->bounding_box().left() + part->bounding_box().right()) / 2; - int upper_mid = (upper_part->bounding_box().left() + - upper_part->bounding_box().right()) / 2; - int current_spacing = 0; // spacing of the current line to margin - int upper_spacing = 0; // spacing of the previous line to the margin - if (left_to_right_language_) { - // Left to right languages, use mid - left to figure out the distance - // the middle is from the left margin. - int left = std::min(part->bounding_box().left(), - upper_part->bounding_box().left()); - current_spacing = mid - left; - upper_spacing = upper_mid - left; - } else { - // Right to left languages, use right - mid to figure out the distance - // the middle is from the right margin. - int right = std::max(part->bounding_box().right(), - upper_part->bounding_box().right()); - current_spacing = right - mid; - upper_spacing = right - upper_mid; - } - if (current_spacing * kParagraphEndingPreviousLineRatio > upper_spacing) - continue; - - // Paragraphs should have similar fonts. - if (!part->MatchingSizes(*upper_part) || - !part->MatchingStrokeWidth(*upper_part, kStrokeWidthFractionalTolerance, - kStrokeWidthConstantTolerance)) { - continue; - } - - // The last line of a paragraph should be left aligned. - // TODO(nbeato): This would be untrue if the text was right aligned. - // How often is that? - if (part->space_to_left() > - kMaxParagraphEndingLeftSpaceMultiple * part->median_height()) - continue; - // The line above it should be right aligned (assuming justified format). - // Since we can't assume justified text, we compare whitespace to text. - // The above line should have majority spanning text (or the current - // line could have fit on the previous line). So compare - // whitespace to text. - if (upper_part->bounding_box().width() < - kMinParagraphEndingTextToWhitespaceRatio * upper_part->space_to_right()) - continue; - - // Ledding above the line should be less than ledding below - if (part->space_above() >= part->space_below() || - part->space_above() > 2 * global_median_ledding_) - continue; - - // If all checks failed, it is probably text. - part->clear_table_type(); - } -} - -void TableFinder::FilterHeaderAndFooter() { - // Consider top-most text colpartition as header and bottom most as footer - ColPartition* header = nullptr; - ColPartition* footer = nullptr; - int max_top = INT32_MIN; - int min_bottom = INT32_MAX; - ColPartitionGridSearch gsearch(&clean_part_grid_); - gsearch.StartFullSearch(); - ColPartition* part = nullptr; - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (!part->IsTextType()) - continue; // Consider only text partitions - int top = part->bounding_box().top(); - int bottom = part->bounding_box().bottom(); - if (top > max_top) { - max_top = top; - header = part; - } - if (bottom < min_bottom) { - min_bottom = bottom; - footer = part; - } - } - if (header) - header->clear_table_type(); - if (footer) - footer->clear_table_type(); -} - -// Mark all ColPartitions as table cells that have a table cell above -// and below them -// TODO(faisal): This is too aggressive at the moment. The method needs to -// consider spacing and alignment as well. Detection of false alarm table cells -// should also be done as part of it. -void TableFinder::SmoothTablePartitionRuns() { - // Iterate the ColPartitions in the grid. - ColPartitionGridSearch gsearch(&clean_part_grid_); - gsearch.StartFullSearch(); - ColPartition* part = nullptr; - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->type() >= PT_TABLE || part->type() == PT_UNKNOWN) - continue; // Consider only text partitions - ColPartition* upper_part = part->nearest_neighbor_above(); - ColPartition* lower_part = part->nearest_neighbor_below(); - if (!upper_part || !lower_part) - continue; - if (upper_part->type() == PT_TABLE && lower_part->type() == PT_TABLE) - part->set_table_type(); - } - - // Pass 2, do the opposite. If both the upper and lower neighbors - // exist and are not tables, this probably shouldn't be a table. - gsearch.StartFullSearch(); - part = nullptr; - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->type() != PT_TABLE) - continue; // Consider only text partitions - ColPartition* upper_part = part->nearest_neighbor_above(); - ColPartition* lower_part = part->nearest_neighbor_below(); - - // table can't be by itself - if ((upper_part && upper_part->type() != PT_TABLE) && - (lower_part && lower_part->type() != PT_TABLE)) { - part->clear_table_type(); - } - } -} - -// Set the type of a column segment based on the ratio of table to text cells -void TableFinder::SetColumnsType(ColSegment_LIST* column_blocks) { - ColSegment_IT it(column_blocks); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColSegment* seg = it.data(); - TBOX box = seg->bounding_box(); - int num_table_cells = 0; - int num_text_cells = 0; - GridSearch - rsearch(&clean_part_grid_); - rsearch.SetUniqueMode(true); - rsearch.StartRectSearch(box); - ColPartition* part = nullptr; - while ((part = rsearch.NextRectSearch()) != nullptr) { - if (part->type() == PT_TABLE) { - num_table_cells++; - } else if (part->type() == PT_FLOWING_TEXT) { - num_text_cells++; - } - } - // If a column block has no text or table partition in it, it is not needed - // for table detection. - if (!num_table_cells && !num_text_cells) { - delete it.extract(); - } else { - seg->set_num_table_cells(num_table_cells); - seg->set_num_text_cells(num_text_cells); - // set column type based on the ratio of table to text cells - seg->set_type(); - } - } -} - -// Move column blocks to grid -void TableFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments, - ColSegmentGrid *col_seg_grid) { - ColSegment_IT it(segments); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColSegment* seg = it.extract(); - col_seg_grid->InsertBBox(true, true, seg); - } -} - -// Merge column blocks if a split is detected due to the presence of a -// table. A text block is considered split if it has multiple -// neighboring blocks above/below it, and at least one of the -// neighboring blocks is of table type (has a high density of table -// partitions). In this case neighboring blocks in the direction -// (above/below) of the table block are merged with the text block. - -// Comment: This method does not handle split due to a full page table -// since table columns in this case do not have a text column on which -// split decision can be based. -void TableFinder::GridMergeColumnBlocks() { - int margin = gridsize(); - - // Iterate the Column Blocks in the grid. - GridSearch - gsearch(&col_seg_grid_); - gsearch.StartFullSearch(); - ColSegment* seg; - while ((seg = gsearch.NextFullSearch()) != nullptr) { - if (seg->type() != COL_TEXT) - continue; // only consider text blocks for split detection - bool neighbor_found = false; - bool modified = false; // Modified at least once - // keep expanding current box as long as neighboring table columns - // are found above or below it. - do { - TBOX box = seg->bounding_box(); - // slightly expand the search region vertically - int top_range = std::min(box.top() + margin, static_cast(tright().y())); - int bottom_range = std::max(box.bottom() - margin, static_cast(bleft().y())); - box.set_top(top_range); - box.set_bottom(bottom_range); - neighbor_found = false; - GridSearch - rectsearch(&col_seg_grid_); - rectsearch.StartRectSearch(box); - ColSegment* neighbor = nullptr; - while ((neighbor = rectsearch.NextRectSearch()) != nullptr) { - if (neighbor == seg) - continue; - const TBOX& neighbor_box = neighbor->bounding_box(); - // If the neighbor box significantly overlaps with the current - // box (due to the expansion of the current box in the - // previous iteration of this loop), remove the neighbor box - // and expand the current box to include it. - if (neighbor_box.overlap_fraction(box) >= 0.9) { - seg->InsertBox(neighbor_box); - modified = true; - rectsearch.RemoveBBox(); - gsearch.RepositionIterator(); - delete neighbor; - continue; - } - // Only expand if the neighbor box is of table type - if (neighbor->type() != COL_TABLE) - continue; - // Insert the neighbor box into the current column block - if (neighbor_box.major_x_overlap(box) && - !box.contains(neighbor_box)) { - seg->InsertBox(neighbor_box); - neighbor_found = true; - modified = true; - rectsearch.RemoveBBox(); - gsearch.RepositionIterator(); - delete neighbor; - } - } - } while (neighbor_found); - if (modified) { - // Because the box has changed, it has to be removed first. - gsearch.RemoveBBox(); - col_seg_grid_.InsertBBox(true, true, seg); - gsearch.RepositionIterator(); - } - } -} - -// Group horizontally overlapping table partitions into table columns. -// TODO(faisal): This is too aggressive at the moment. The method should -// consider more attributes to group table partitions together. Some common -// errors are: -// 1- page number is merged with a table column above it even -// if there is a large vertical gap between them. -// 2- column headers go on to catch one of the columns arbitrarily -// 3- an isolated noise blob near page top or bottom merges with the table -// column below/above it -// 4- cells from two vertically adjacent tables merge together to make a -// single column resulting in merging of the two tables -void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) { - ColSegment_IT it(table_columns); - // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&clean_part_grid_); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->inside_table_column() || part->type() != PT_TABLE) - continue; // prevent a partition to be assigned to multiple columns - const TBOX& box = part->bounding_box(); - ColSegment* col = new ColSegment(); - col->InsertBox(box); - part->set_inside_table_column(true); - // Start a search below the current cell to find bottom neighbours - // Note: a full search will always process things above it first, so - // this should be starting at the highest cell and working its way down. - GridSearch - vsearch(&clean_part_grid_); - vsearch.StartVerticalSearch(box.left(), box.right(), box.bottom()); - ColPartition* neighbor = nullptr; - bool found_neighbours = false; - while ((neighbor = vsearch.NextVerticalSearch(true)) != nullptr) { - // only consider neighbors not assigned to any column yet - if (neighbor->inside_table_column()) - continue; - // Horizontal lines should not break the flow - if (neighbor->IsHorizontalLine()) - continue; - // presence of a non-table neighbor marks the end of current - // table column - if (neighbor->type() != PT_TABLE) - break; - // add the neighbor partition to the table column - const TBOX& neighbor_box = neighbor->bounding_box(); - col->InsertBox(neighbor_box); - neighbor->set_inside_table_column(true); - found_neighbours = true; - } - if (found_neighbours) { - it.add_after_then_move(col); - } else { - part->set_inside_table_column(false); - delete col; - } - } -} - -// Mark regions in a column that are x-bounded by the column boundaries and -// y-bounded by the table columns' projection on the y-axis as table regions -void TableFinder::GetTableRegions(ColSegment_LIST* table_columns, - ColSegment_LIST* table_regions) { - ColSegment_IT cit(table_columns); - ColSegment_IT rit(table_regions); - // Iterate through column blocks - GridSearch - gsearch(&col_seg_grid_); - gsearch.StartFullSearch(); - ColSegment* part; - int page_height = tright().y() - bleft().y(); - ASSERT_HOST(page_height > 0); - // create a bool array to hold projection on y-axis - bool* table_region = new bool[page_height]; - while ((part = gsearch.NextFullSearch()) != nullptr) { - const TBOX& part_box = part->bounding_box(); - // reset the projection array - for (int i = 0; i < page_height; i++) { - table_region[i] = false; - } - // iterate through all table columns to find regions in the current - // page column block - cit.move_to_first(); - for (cit.mark_cycle_pt(); !cit.cycled_list(); cit.forward()) { - TBOX col_box = cit.data()->bounding_box(); - // find intersection region of table column and page column - TBOX intersection_box = col_box.intersection(part_box); - // project table column on the y-axis - for (int i = intersection_box.bottom(); i < intersection_box.top(); i++) { - table_region[i - bleft().y()] = true; - } - } - // set x-limits of table regions to page column width - TBOX current_table_box; - current_table_box.set_left(part_box.left()); - current_table_box.set_right(part_box.right()); - // go through the y-axis projection to find runs of table - // regions. Each run makes one table region. - for (int i = 1; i < page_height; i++) { - // detect start of a table region - if (!table_region[i - 1] && table_region[i]) { - current_table_box.set_bottom(i + bleft().y()); - } - // TODO(nbeato): Is it guaranteed that the last row is not a table region? - // detect end of a table region - if (table_region[i - 1] && !table_region[i]) { - current_table_box.set_top(i + bleft().y()); - if (!current_table_box.null_box()) { - ColSegment* seg = new ColSegment(); - seg->InsertBox(current_table_box); - rit.add_after_then_move(seg); - } - } - } - } - delete[] table_region; -} - -// Merge table regions corresponding to tables spanning multiple columns if -// there is a colpartition (horizontal ruling line or normal text) that -// touches both regions. -// TODO(faisal): A rare error occurs if there are two horizontally adjacent -// tables with aligned ruling lines. In this case, line finder returns a -// single line and hence the tables get merged together -void TableFinder::GridMergeTableRegions() { - // Iterate the table regions in the grid. - GridSearch - gsearch(&table_grid_); - gsearch.StartFullSearch(); - ColSegment* seg = nullptr; - while ((seg = gsearch.NextFullSearch()) != nullptr) { - bool neighbor_found = false; - bool modified = false; // Modified at least once - do { - // Start a rectangle search x-bounded by the image and y by the table - const TBOX& box = seg->bounding_box(); - TBOX search_region(box); - search_region.set_left(bleft().x()); - search_region.set_right(tright().x()); - neighbor_found = false; - GridSearch - rectsearch(&table_grid_); - rectsearch.StartRectSearch(search_region); - ColSegment* neighbor = nullptr; - while ((neighbor = rectsearch.NextRectSearch()) != nullptr) { - if (neighbor == seg) - continue; - const TBOX& neighbor_box = neighbor->bounding_box(); - // Check if a neighbor box has a large overlap with the table - // region. This may happen as a result of merging two table - // regions in the previous iteration. - if (neighbor_box.overlap_fraction(box) >= 0.9) { - seg->InsertBox(neighbor_box); - rectsearch.RemoveBBox(); - gsearch.RepositionIterator(); - delete neighbor; - modified = true; - continue; - } - // Check if two table regions belong together based on a common - // horizontal ruling line - if (BelongToOneTable(box, neighbor_box)) { - seg->InsertBox(neighbor_box); - neighbor_found = true; - modified = true; - rectsearch.RemoveBBox(); - gsearch.RepositionIterator(); - delete neighbor; - } - } - } while (neighbor_found); - if (modified) { - // Because the box has changed, it has to be removed first. - gsearch.RemoveBBox(); - table_grid_.InsertBBox(true, true, seg); - gsearch.RepositionIterator(); - } - } -} - -// Decide if two table regions belong to one table based on a common -// horizontal ruling line or another colpartition -bool TableFinder::BelongToOneTable(const TBOX &box1, const TBOX &box2) { - // Check the obvious case. Most likely not true because overlapping boxes - // should already be merged, but seems like a good thing to do in case things - // change. - if (box1.overlap(box2)) - return true; - // Check for ColPartitions spanning both table regions - TBOX bbox = box1.bounding_union(box2); - // Start a rect search on bbox - GridSearch - rectsearch(&clean_part_grid_); - rectsearch.StartRectSearch(bbox); - ColPartition* part = nullptr; - while ((part = rectsearch.NextRectSearch()) != nullptr) { - const TBOX& part_box = part->bounding_box(); - // return true if a colpartition spanning both table regions is found - if (part_box.overlap(box1) && part_box.overlap(box2) && - !part->IsImageType()) - return true; - } - return false; -} - -// Adjust table boundaries by: -// - building a tight bounding box around all ColPartitions contained in it. -// - expanding table boundaries to include all colpartitions that overlap the -// table by more than half of their area -// - expanding table boundaries to include nearby horizontal rule lines -// - expanding table vertically to include left out column headers -// TODO(faisal): Expansion of table boundaries is quite aggressive. It usually -// makes following errors: -// 1- horizontal lines consisting of underlines are included in the table if -// they are close enough -// 2- horizontal lines originating from noise tend to get merged with a table -// near the top of the page -// 3- the criteria for including horizontal lines is very generous. Many times -// horizontal lines separating headers and footers get merged with a -// single-column table in a multi-column page thereby including text -// from the neighboring column inside the table -// 4- the criteria for including left out column headers also tends to -// occasionally include text-lines above the tables, typically from -// table caption -void TableFinder::AdjustTableBoundaries() { - // Iterate the table regions in the grid - ColSegment_CLIST adjusted_tables; - ColSegment_C_IT it(&adjusted_tables); - ColSegmentGridSearch gsearch(&table_grid_); - gsearch.StartFullSearch(); - ColSegment* table = nullptr; - while ((table = gsearch.NextFullSearch()) != nullptr) { - const TBOX& table_box = table->bounding_box(); - TBOX grown_box = table_box; - GrowTableBox(table_box, &grown_box); - // To prevent a table from expanding again, do not insert the - // modified box back to the grid. Instead move it to a list and - // and remove it from the grid. The list is moved later back to the grid. - if (!grown_box.null_box()) { - ColSegment* col = new ColSegment(); - col->InsertBox(grown_box); - it.add_after_then_move(col); - } - gsearch.RemoveBBox(); - delete table; - } - // clear table grid to move final tables in it - // TODO(nbeato): table_grid_ should already be empty. The above loop - // removed everything. Maybe just assert it is empty? - table_grid_.Clear(); - it.move_to_first(); - // move back final tables to table_grid_ - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColSegment* seg = it.extract(); - table_grid_.InsertBBox(true, true, seg); - } -} - -void TableFinder::GrowTableBox(const TBOX& table_box, TBOX* result_box) { - // TODO(nbeato): The growing code is a bit excessive right now. - // By removing these lines, the partitions considered need - // to have some overlap or be special cases. These lines could - // be added again once a check is put in place to make sure that - // growing tables don't stomp on a lot of non-table partitions. - - // search for horizontal ruling lines within the vertical margin - // int vertical_margin = kRulingVerticalMargin * gridsize(); - TBOX search_box = table_box; - // int top = MIN(search_box.top() + vertical_margin, tright().y()); - // int bottom = MAX(search_box.bottom() - vertical_margin, bleft().y()); - // search_box.set_top(top); - // search_box.set_bottom(bottom); - - GrowTableToIncludePartials(table_box, search_box, result_box); - GrowTableToIncludeLines(table_box, search_box, result_box); - IncludeLeftOutColumnHeaders(result_box); -} - -// Grow a table by increasing the size of the box to include -// partitions with significant overlap with the table. -void TableFinder::GrowTableToIncludePartials(const TBOX& table_box, - const TBOX& search_range, - TBOX* result_box) { - // Rulings are in a different grid, so search 2 grids for rulings, text, - // and table partitions that are not entirely within the new box. - for (int i = 0; i < 2; ++i) { - ColPartitionGrid* grid = (i == 0) ? &fragmented_text_grid_ : - &leader_and_ruling_grid_; - ColPartitionGridSearch rectsearch(grid); - rectsearch.StartRectSearch(search_range); - ColPartition* part = nullptr; - while ((part = rectsearch.NextRectSearch()) != nullptr) { - // Only include text and table types. - if (part->IsImageType()) - continue; - const TBOX& part_box = part->bounding_box(); - // Include partition in the table if more than half of it - // is covered by the table - if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) { - *result_box = result_box->bounding_union(part_box); - continue; - } - } - } -} - -// Grow a table by expanding to the extents of significantly -// overlapping lines. -void TableFinder::GrowTableToIncludeLines(const TBOX& table_box, - const TBOX& search_range, - TBOX* result_box) { - ColPartitionGridSearch rsearch(&leader_and_ruling_grid_); - rsearch.SetUniqueMode(true); - rsearch.StartRectSearch(search_range); - ColPartition* part = nullptr; - while ((part = rsearch.NextRectSearch()) != nullptr) { - // TODO(nbeato) This should also do vertical, but column - // boundaries are breaking things. This function needs to be - // updated to allow vertical lines as well. - if (!part->IsLineType()) - continue; - // Avoid the following function call if the result of the - // function is irrelevant. - const TBOX& part_box = part->bounding_box(); - if (result_box->contains(part_box)) - continue; - // Include a partially overlapping horizontal line only if the - // extra ColPartitions that will be included due to expansion - // have large side spacing w.r.t. columns containing them. - if (HLineBelongsToTable(*part, table_box)) - *result_box = result_box->bounding_union(part_box); - // TODO(nbeato): Vertical - } -} - -// Checks whether the horizontal line belong to the table by looking at the -// side spacing of extra ColParitions that will be included in the table -// due to expansion -bool TableFinder::HLineBelongsToTable(const ColPartition& part, - const TBOX& table_box) { - if (!part.IsHorizontalLine()) - return false; - const TBOX& part_box = part.bounding_box(); - if (!part_box.major_x_overlap(table_box)) - return false; - // Do not consider top-most horizontal line since it usually - // originates from noise. - // TODO(nbeato): I had to comment this out because the ruling grid doesn't - // have neighbors solved. - // if (!part.nearest_neighbor_above()) - // return false; - const TBOX bbox = part_box.bounding_union(table_box); - // In the "unioned table" box (the table extents expanded by the line), - // keep track of how many partitions have significant padding to the left - // and right. If more than half of the partitions covered by the new table - // have significant spacing, the line belongs to the table and the table - // grows to include all of the partitions. - int num_extra_partitions = 0; - int extra_space_to_right = 0; - int extra_space_to_left = 0; - // Rulings are in a different grid, so search 2 grids for rulings, text, - // and table partitions that are introduced by the new box. - for (int i = 0; i < 2; ++i) { - ColPartitionGrid* grid = (i == 0) ? &clean_part_grid_ : - &leader_and_ruling_grid_; - // Start a rect search on bbox - ColPartitionGridSearch rectsearch(grid); - rectsearch.SetUniqueMode(true); - rectsearch.StartRectSearch(bbox); - ColPartition* extra_part = nullptr; - while ((extra_part = rectsearch.NextRectSearch()) != nullptr) { - // ColPartition already in table - const TBOX& extra_part_box = extra_part->bounding_box(); - if (extra_part_box.overlap_fraction(table_box) > kMinOverlapWithTable) - continue; - // Non-text ColPartitions do not contribute - if (extra_part->IsImageType()) - continue; - // Consider this partition. - num_extra_partitions++; - // presence of a table cell is a strong hint, so just increment the scores - // without looking at the spacing. - if (extra_part->type() == PT_TABLE || extra_part->IsLineType()) { - extra_space_to_right++; - extra_space_to_left++; - continue; - } - int space_threshold = kSideSpaceMargin * part.median_height(); - if (extra_part->space_to_right() > space_threshold) - extra_space_to_right++; - if (extra_part->space_to_left() > space_threshold) - extra_space_to_left++; - } - } - // tprintf("%d %d %d\n", - // num_extra_partitions,extra_space_to_right,extra_space_to_left); - return (extra_space_to_right > num_extra_partitions / 2) || - (extra_space_to_left > num_extra_partitions / 2); -} - -// Look for isolated column headers above the given table box and -// include them in the table -void TableFinder::IncludeLeftOutColumnHeaders(TBOX* table_box) { - // Start a search above the current table to look for column headers - ColPartitionGridSearch vsearch(&clean_part_grid_); - vsearch.StartVerticalSearch(table_box->left(), table_box->right(), - table_box->top()); - ColPartition* neighbor = nullptr; - ColPartition* previous_neighbor = nullptr; - while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) { - // Max distance to find a table heading. - const int max_distance = kMaxColumnHeaderDistance * - neighbor->median_height(); - int table_top = table_box->top(); - const TBOX& box = neighbor->bounding_box(); - // Do not continue if the next box is way above - if (box.bottom() - table_top > max_distance) - break; - // Unconditionally include partitions of type TABLE or LINE - // TODO(faisal): add some reasonable conditions here - if (neighbor->type() == PT_TABLE || neighbor->IsLineType()) { - table_box->set_top(box.top()); - previous_neighbor = nullptr; - continue; - } - // If there are two text partitions, one above the other, without a table - // cell on their left or right side, consider them a barrier and quit - if (previous_neighbor == nullptr) { - previous_neighbor = neighbor; - } else { - const TBOX& previous_box = previous_neighbor->bounding_box(); - if (!box.major_y_overlap(previous_box)) - break; - } - } -} - -// Remove false alarms consiting of a single column based on their -// projection on the x-axis. Projection of a real table on the x-axis -// should have at least one zero-valley larger than the global median -// x-height of the page. -void TableFinder::DeleteSingleColumnTables() { - int page_width = tright().x() - bleft().x(); - ASSERT_HOST(page_width > 0); - // create an integer array to hold projection on x-axis - int* table_xprojection = new int[page_width]; - // Iterate through all tables in the table grid - GridSearch - table_search(&table_grid_); - table_search.StartFullSearch(); - ColSegment* table; - while ((table = table_search.NextFullSearch()) != nullptr) { - TBOX table_box = table->bounding_box(); - // reset the projection array - for (int i = 0; i < page_width; i++) { - table_xprojection[i] = 0; - } - // Start a rect search on table_box - GridSearch - rectsearch(&clean_part_grid_); - rectsearch.SetUniqueMode(true); - rectsearch.StartRectSearch(table_box); - ColPartition* part; - while ((part = rectsearch.NextRectSearch()) != nullptr) { - if (!part->IsTextType()) - continue; // Do not consider non-text partitions - if (part->flow() == BTFT_LEADER) - continue; // Assume leaders are in tables - TBOX part_box = part->bounding_box(); - // Do not consider partitions partially covered by the table - if (part_box.overlap_fraction(table_box) < kMinOverlapWithTable) - continue; - BLOBNBOX_CLIST* part_boxes = part->boxes(); - BLOBNBOX_C_IT pit(part_boxes); - - // Make sure overlapping blobs don't artificially inflate the number - // of rows in the table. This happens frequently with things such as - // decimals and split characters. Do this by assuming the column - // partition is sorted mostly left to right and just clip - // bounding boxes by the previous box's extent. - int next_position_to_write = 0; - - for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) { - BLOBNBOX *pblob = pit.data(); - // ignore blob height for the purpose of projection since we - // are only interested in finding valleys - int xstart = pblob->bounding_box().left(); - int xend = pblob->bounding_box().right(); - - xstart = std::max(xstart, next_position_to_write); - for (int i = xstart; i < xend; i++) - table_xprojection[i - bleft().x()]++; - next_position_to_write = xend; - } - } - // Find largest valley between two reasonable peaks in the table - if (!GapInXProjection(table_xprojection, page_width)) { - table_search.RemoveBBox(); - delete table; - } - } - delete[] table_xprojection; -} - -// Return true if at least one gap larger than the global x-height -// exists in the horizontal projection -bool TableFinder::GapInXProjection(int* xprojection, int length) { - // Find peak value of the histogram - int peak_value = 0; - for (int i = 0; i < length; i++) { - if (xprojection[i] > peak_value) { - peak_value = xprojection[i]; - } - } - // Peak value represents the maximum number of horizontally - // overlapping colpartitions, so this can be considered as the - // number of rows in the table - if (peak_value < kMinRowsInTable) - return false; - double projection_threshold = kSmallTableProjectionThreshold * peak_value; - if (peak_value >= kLargeTableRowCount) - projection_threshold = kLargeTableProjectionThreshold * peak_value; - // Threshold the histogram - for (int i = 0; i < length; i++) { - xprojection[i] = (xprojection[i] >= projection_threshold) ? 1 : 0; - } - // Find the largest run of zeros between two ones - int largest_gap = 0; - int run_start = -1; - for (int i = 1; i < length; i++) { - // detect start of a run of zeros - if (xprojection[i - 1] && !xprojection[i]) { - run_start = i; - } - // detect end of a run of zeros and update the value of largest gap - if (run_start != -1 && !xprojection[i - 1] && xprojection[i]) { - int gap = i - run_start; - if (gap > largest_gap) - largest_gap = gap; - run_start = -1; - } - } - return largest_gap > kMaxXProjectionGapFactor * global_median_xheight_; -} - -// Given the location of a table "guess", try to overlay a cellular -// grid in the location, adjusting the boundaries. -// TODO(nbeato): Falsely introduces: -// -headers/footers (not any worse, too much overlap destroys cells) -// -page numbers (not worse, included because maximize margins) -// -equations (nicely fit into a celluar grid, but more sparsely) -// -figures (random text box, also sparse) -// -small left-aligned text areas with overlapping positioned whitespace -// (rejected before) -// Overall, this just needs some more work. -void TableFinder::RecognizeTables() { - ScrollView* table_win = nullptr; - if (textord_show_tables) { - table_win = MakeWindow(0, 0, "Table Structure"); - DisplayColPartitions(table_win, &fragmented_text_grid_, - ScrollView::BLUE, ScrollView::LIGHT_BLUE); - // table_grid_.DisplayBoxes(table_win); - } - - - TableRecognizer recognizer; - recognizer.Init(); - recognizer.set_line_grid(&leader_and_ruling_grid_); - recognizer.set_text_grid(&fragmented_text_grid_); - recognizer.set_max_text_height(global_median_xheight_ * 2.0); - recognizer.set_min_height(1.5 * gridheight()); - // Loop over all of the tables and try to fit them. - // Store the good tables here. - ColSegment_CLIST good_tables; - ColSegment_C_IT good_it(&good_tables); - - ColSegmentGridSearch gsearch(&table_grid_); - gsearch.StartFullSearch(); - ColSegment* found_table = nullptr; - while ((found_table = gsearch.NextFullSearch()) != nullptr) { - gsearch.RemoveBBox(); - - // The goal is to make the tables persistent in a list. - // When that happens, this will move into the search loop. - const TBOX& found_box = found_table->bounding_box(); - StructuredTable* table_structure = recognizer.RecognizeTable(found_box); - - // Process a table. Good tables are inserted into the grid again later on - // We can't change boxes in the grid while it is running a search. - if (table_structure != nullptr) { - if (textord_show_tables) { - table_structure->Display(table_win, ScrollView::LIME_GREEN); - } - found_table->set_bounding_box(table_structure->bounding_box()); - delete table_structure; - good_it.add_after_then_move(found_table); - } else { - delete found_table; - } - } - // TODO(nbeato): MERGE!! There is awesome info now available for merging. - - // At this point, the grid is empty. We can safely insert the good tables - // back into grid. - for (good_it.mark_cycle_pt(); !good_it.cycled_list(); good_it.forward()) - table_grid_.InsertBBox(true, true, good_it.extract()); -} - -// Displays the column segments in some window. -void TableFinder::DisplayColSegments(ScrollView* win, - ColSegment_LIST *segments, - ScrollView::Color color) { -#ifndef GRAPHICS_DISABLED - win->Pen(color); - win->Brush(ScrollView::NONE); - ColSegment_IT it(segments); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColSegment* col = it.data(); - const TBOX& box = col->bounding_box(); - int left_x = box.left(); - int right_x = box.right(); - int top_y = box.top(); - int bottom_y = box.bottom(); - win->Rectangle(left_x, bottom_y, right_x, top_y); - } - win->UpdateWindow(); -#endif -} - -void TableFinder::DisplayColSegmentGrid(ScrollView* win, ColSegmentGrid* grid, - ScrollView::Color color) { -#ifndef GRAPHICS_DISABLED - // Iterate the ColPartitions in the grid. - GridSearch - gsearch(grid); - gsearch.StartFullSearch(); - ColSegment* seg = nullptr; - while ((seg = gsearch.NextFullSearch()) != nullptr) { - const TBOX& box = seg->bounding_box(); - int left_x = box.left(); - int right_x = box.right(); - int top_y = box.top(); - int bottom_y = box.bottom(); - win->Brush(ScrollView::NONE); - win->Pen(color); - win->Rectangle(left_x, bottom_y, right_x, top_y); - } - win->UpdateWindow(); -#endif -} - -// Displays the colpartitions using a new coloring on an existing window. -// Note: This method is only for debug purpose during development and -// would not be part of checked in code -void TableFinder::DisplayColPartitions(ScrollView* win, - ColPartitionGrid* grid, - ScrollView::Color default_color, - ScrollView::Color table_color) { -#ifndef GRAPHICS_DISABLED - ScrollView::Color color = default_color; - // Iterate the ColPartitions in the grid. - GridSearch - gsearch(grid); - gsearch.StartFullSearch(); - ColPartition* part = nullptr; - while ((part = gsearch.NextFullSearch()) != nullptr) { - color = default_color; - if (part->type() == PT_TABLE) - color = table_color; - - const TBOX& box = part->bounding_box(); - int left_x = box.left(); - int right_x = box.right(); - int top_y = box.top(); - int bottom_y = box.bottom(); - win->Brush(ScrollView::NONE); - win->Pen(color); - win->Rectangle(left_x, bottom_y, right_x, top_y); - } - win->UpdateWindow(); -#endif -} -void TableFinder::DisplayColPartitions(ScrollView* win, - ColPartitionGrid* grid, - ScrollView::Color default_color) { - DisplayColPartitions(win, grid, default_color, ScrollView::YELLOW); -} - -void TableFinder::DisplayColPartitionConnections( - ScrollView* win, - ColPartitionGrid* grid, - ScrollView::Color color) { -#ifndef GRAPHICS_DISABLED - // Iterate the ColPartitions in the grid. - GridSearch - gsearch(grid); - gsearch.StartFullSearch(); - ColPartition* part = nullptr; - while ((part = gsearch.NextFullSearch()) != nullptr) { - const TBOX& box = part->bounding_box(); - int left_x = box.left(); - int right_x = box.right(); - int top_y = box.top(); - int bottom_y = box.bottom(); - - ColPartition* upper_part = part->nearest_neighbor_above(); - if (upper_part) { - const TBOX& upper_box = upper_part->bounding_box(); - int mid_x = (left_x + right_x) / 2; - int mid_y = (top_y + bottom_y) / 2; - int other_x = (upper_box.left() + upper_box.right()) / 2; - int other_y = (upper_box.top() + upper_box.bottom()) / 2; - win->Brush(ScrollView::NONE); - win->Pen(color); - win->Line(mid_x, mid_y, other_x, other_y); - } - ColPartition* lower_part = part->nearest_neighbor_below(); - if (lower_part) { - const TBOX& lower_box = lower_part->bounding_box(); - int mid_x = (left_x + right_x) / 2; - int mid_y = (top_y + bottom_y) / 2; - int other_x = (lower_box.left() + lower_box.right()) / 2; - int other_y = (lower_box.top() + lower_box.bottom()) / 2; - win->Brush(ScrollView::NONE); - win->Pen(color); - win->Line(mid_x, mid_y, other_x, other_y); - } - } - win->UpdateWindow(); -#endif -} - -// Merge all colpartitions in table regions to make them a single -// colpartition and revert types of isolated table cells not -// assigned to any table to their original types. -void TableFinder::MakeTableBlocks(ColPartitionGrid* grid, - ColPartitionSet** all_columns, - WidthCallback* width_cb) { - // Since we have table blocks already, remove table tags from all - // colpartitions - GridSearch - gsearch(grid); - gsearch.StartFullSearch(); - ColPartition* part = nullptr; - - while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->type() == PT_TABLE) { - part->clear_table_type(); - } - } - // Now make a single colpartition out of each table block and remove - // all colpartitions contained within a table - GridSearch - table_search(&table_grid_); - table_search.StartFullSearch(); - ColSegment* table; - while ((table = table_search.NextFullSearch()) != nullptr) { - const TBOX& table_box = table->bounding_box(); - // Start a rect search on table_box - GridSearch - rectsearch(grid); - rectsearch.StartRectSearch(table_box); - ColPartition* part; - ColPartition* table_partition = nullptr; - while ((part = rectsearch.NextRectSearch()) != nullptr) { - // Do not consider image partitions - if (!part->IsTextType()) - continue; - TBOX part_box = part->bounding_box(); - // Include partition in the table if more than half of it - // is covered by the table - if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) { - rectsearch.RemoveBBox(); - if (table_partition) { - table_partition->Absorb(part, width_cb); - } else { - table_partition = part; - } - } - } - // Insert table colpartition back to part_grid_ - if (table_partition) { - // To match the columns used when transforming to blocks, the new table - // partition must have its first and last column set at the grid y that - // corresponds to its bottom. - const TBOX& table_box = table_partition->bounding_box(); - int grid_x, grid_y; - grid->GridCoords(table_box.left(), table_box.bottom(), &grid_x, &grid_y); - table_partition->SetPartitionType(resolution_, all_columns[grid_y]); - table_partition->set_table_type(); - table_partition->set_blob_type(BRT_TEXT); - table_partition->set_flow(BTFT_CHAIN); - table_partition->SetBlobTypes(); - grid->InsertBBox(true, true, table_partition); - } - } -} - -//////// ColSegment code -//////// -ColSegment::ColSegment() - : ELIST_LINK(), - num_table_cells_(0), - num_text_cells_(0), - type_(COL_UNKNOWN) { -} - -// Provides a color for BBGrid to draw the rectangle. -ScrollView::Color ColSegment::BoxColor() const { - const ScrollView::Color kBoxColors[PT_COUNT] = { - ScrollView::YELLOW, - ScrollView::BLUE, - ScrollView::YELLOW, - ScrollView::MAGENTA, - }; - return kBoxColors[type_]; -} - -// Insert a box into this column segment -void ColSegment::InsertBox(const TBOX& other) { - bounding_box_ = bounding_box_.bounding_union(other); -} - -// Set column segment type based on the ratio of text and table partitions -// in it. -void ColSegment::set_type() { - if (num_table_cells_ > kTableColumnThreshold * num_text_cells_) - type_ = COL_TABLE; - else if (num_text_cells_ > num_table_cells_) - type_ = COL_TEXT; - else - type_ = COL_MIXED; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tablefind.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tablefind.h deleted file mode 100644 index 20e746a2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tablefind.h +++ /dev/null @@ -1,430 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tablefind.h -// Description: Helper classes to find tables from ColPartitions. -// Author: Faisal Shafait (faisal.shafait@dfki.de) -// Created: Tue Jan 06 11:13:01 PST 2009 -// -// (C) Copyright 2009, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_TABLEFIND_H_ -#define TESSERACT_TEXTORD_TABLEFIND_H_ - -#include "colpartitiongrid.h" -#include "elst.h" -#include "rect.h" - -namespace tesseract { - -// Possible types for a column segment. -enum ColSegType { - COL_UNKNOWN, - COL_TEXT, - COL_TABLE, - COL_MIXED, - COL_COUNT -}; - -class ColPartitionSet; - -// ColSegment holds rectangular blocks that represent segmentation of a page -// into regions containing single column text/table. -class ColSegment; -ELISTIZEH(ColSegment) -CLISTIZEH(ColSegment) - -class ColSegment : public ELIST_LINK { - public: - ColSegment(); - ~ColSegment() = default; - - // Simple accessors and mutators - const TBOX& bounding_box() const { - return bounding_box_; - } - - void set_top(int y) { - bounding_box_.set_top(y); - } - - void set_bottom(int y) { - bounding_box_.set_bottom(y); - } - - void set_left(int x) { - bounding_box_.set_left(x); - } - - void set_right(int x) { - bounding_box_.set_right(x); - } - - void set_bounding_box(const TBOX& other) { - bounding_box_ = other; - } - - int get_num_table_cells() const { - return num_table_cells_; - } - - // set the number of table colpartitions covered by the bounding_box_ - void set_num_table_cells(int n) { - num_table_cells_ = n; - } - - int get_num_text_cells() const { - return num_text_cells_; - } - - // set the number of text colpartitions covered by the bounding_box_ - void set_num_text_cells(int n) { - num_text_cells_ = n; - } - - ColSegType type() const { - return type_; - } - - // set the type of the block based on the ratio of table to text - // colpartitions covered by it. - void set_type(); - - // Provides a color for BBGrid to draw the rectangle. - ScrollView::Color BoxColor() const; - - // Insert a rectangle into bounding_box_ - void InsertBox(const TBOX& other); - - private: - TBOX bounding_box_; // bounding box - int num_table_cells_; - int num_text_cells_; - ColSegType type_; -}; - -// Typedef BBGrid of ColSegments -using ColSegmentGrid = BBGrid; -using ColSegmentGridSearch = GridSearch; - -// TableFinder is a utility class to find a set of tables given a set of -// ColPartitions and Columns. The TableFinder will mark candidate ColPartitions -// based on research in "Table Detection in Heterogeneous Documents". -// Usage flow is as follows: -// TableFinder finder; -// finder.InsertCleanPartitions(/* grid info */) -// finder.LocateTables(/* ColPartitions and Columns */); -// finder.Update TODO(nbeato) -class TableFinder { - public: - // Constructor is simple initializations - TableFinder(); - ~TableFinder(); - - // Set the resolution of the connected components in ppi. - void set_resolution(int resolution) { - resolution_ = resolution; - } - // Change the reading order. Initially it is left to right. - void set_left_to_right_language(bool order); - - // Initialize - void Init(int grid_size, const ICOORD& bottom_left, const ICOORD& top_right); - - // Copy cleaned partitions from ColumnFinder's part_grid_ to this - // clean_part_grid_ and insert dot-like noise into period_grid_. - // It resizes the grids in this object to the dimensions of grid. - void InsertCleanPartitions(ColPartitionGrid* grid, TO_BLOCK* block); - - // High level function to perform table detection - // Finds tables and updates the grid object with new partitions for the - // tables. The columns and width callbacks are used to merge tables. - // The reskew argument is only used to write the tables to the out.png - // if that feature is enabled. - void LocateTables(ColPartitionGrid* grid, - ColPartitionSet** columns, - WidthCallback* width_cb, - const FCOORD& reskew); - - protected: - // Access for the grid dimensions. - // The results will not be correct until InsertCleanPartitions - // has been called. The values are taken from the grid passed as an argument - // to that function. - int gridsize() const; - int gridwidth() const; - int gridheight() const; - const ICOORD& bleft() const; - const ICOORD& tright() const; - - // Makes a window for debugging, see BBGrid - ScrollView* MakeWindow(int x, int y, const char* window_name); - - //////// Functions to insert objects from the grid into the table finder. - //////// In all cases, ownership is transferred to the table finder. - // Inserts text into the table finder. - void InsertTextPartition(ColPartition* part); - void InsertFragmentedTextPartition(ColPartition* part); - void InsertLeaderPartition(ColPartition* part); - void InsertRulingPartition(ColPartition* part); - void InsertImagePartition(ColPartition* part); - void SplitAndInsertFragmentedTextPartition(ColPartition* part); - bool AllowTextPartition(const ColPartition& part) const; - bool AllowBlob(const BLOBNBOX& blob) const; - - //////// Functions that manipulate ColPartitions in the part_grid_ ///// - //////// to find tables. - //////// - - // Utility function to move segments to col_seg_grid - // Note: Move includes ownership, - // so segments will be be owned by col_seg_grid - void MoveColSegmentsToGrid(ColSegment_LIST* segments, - ColSegmentGrid* col_seg_grid); - - //////// Set up code to run during table detection to correctly - //////// initialize variables on column partitions that are used later. - //////// - - // Initialize the grid and partitions - void InitializePartitions(ColPartitionSet** all_columns); - - // Set left, right and top, bottom spacings of each colpartition. - // Left/right spacings are w.r.t the column boundaries - // Top/bottom spacings are w.r.t. previous and next colpartitions - static void SetPartitionSpacings(ColPartitionGrid* grid, - ColPartitionSet** all_columns); - - // Set spacing and closest neighbors above and below a given colpartition. - void SetVerticalSpacing(ColPartition* part); - - // Set global spacing estimates. This function is dependent on the - // partition spacings. So make sure SetPartitionSpacings is called - // on the same grid before this. - void SetGlobalSpacings(ColPartitionGrid* grid); - // Access to the global median xheight. The xheight is the height - // of a lowercase 'x' character on the page. This can be viewed as the - // average height of a lowercase letter in a textline. As a result - // it is used to make assumptions about spacing between words and - // table cells. - void set_global_median_xheight(int xheight); - // Access to the global median blob width. The width is useful - // when deciding if a partition is noise. - void set_global_median_blob_width(int width); - // Access to the global median ledding. The ledding is the distance between - // two adjacent text lines. This value can be used to get a rough estimate - // for the amount of space between two lines of text. As a result, it - // is used to calculate appropriate spacing between adjacent rows of text. - void set_global_median_ledding(int ledding); - - // Updates the nearest neighbors for each ColPartition in clean_part_grid_. - // The neighbors are most likely SingletonPartner calls after the neighbors - // are assigned. This is hear until it is decided to remove the - // nearest_neighbor code in ColPartition - void FindNeighbors(); - - //////// Functions to mark candidate column partitions as tables. - //////// Tables are marked as described in - //////// Table Detection in Heterogeneous Documents (2010, Shafait & Smith) - //////// - - // High level function to mark partitions as table rows/cells. - // When this function is done, the column partitions in clean_part_grid_ - // should mostly be marked as tables. - void MarkTablePartitions(); - // Marks partitions given a local view of a single partition - void MarkPartitionsUsingLocalInformation(); - /////// Heuristics for local marking - // Check if the partition has at least one large gap between words or no - // significant gap at all - // TODO(nbeato): Make const, prevented because blobnbox array access - bool HasWideOrNoInterWordGap(ColPartition* part) const; - // Checks if a partition is adjacent to leaders on the page - bool HasLeaderAdjacent(const ColPartition& part); - // Filter individual text partitions marked as table partitions - // consisting of paragraph endings, small section headings, and - // headers and footers. - void FilterFalseAlarms(); - void FilterParagraphEndings(); - void FilterHeaderAndFooter(); - // Mark all ColPartitions as table cells that have a table cell above - // and below them - void SmoothTablePartitionRuns(); - - //////// Functions to create bounding boxes (ColSegment) objects for - //////// the columns on the page. The columns are not necessarily - //////// vertical lines, meaning if tab stops strongly suggests that - //////// a column changes horizontal position, as in the case below, - //////// The ColSegment objects will respect that after processing. - //////// - //////// _____________ - //////// Ex. | | | - //////// |_____|______| 5 boxes: 2 on this line - //////// | | | | 3 on this line - //////// |___|____|___| - //////// - - // Get Column segments from best_columns_ - void GetColumnBlocks(ColPartitionSet** columns, - ColSegment_LIST *col_segments); - - // Group Column segments into consecutive single column regions. - void GroupColumnBlocks(ColSegment_LIST *current_segments, - ColSegment_LIST *col_segments); - - // Check if two boxes are consecutive within the same column - bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2); - - // Set the ratio of candidate table partitions in each column - void SetColumnsType(ColSegment_LIST* col_segments); - - // Merge Column Blocks that were split due to the presence of a table - void GridMergeColumnBlocks(); - - //////// Functions to turn marked ColPartitions into candidate tables - //////// using a modified T-Recs++ algorithm described in - //////// Applying The T-Recs Table Recognition System - //////// To The Business Letter Domain (2001, Kieninger & Dengel) - //////// - - // Merge partititons cells into table columns - // Differs from paper by just looking at marked table partitions - // instead of similarity metric. - // Modified section 4.1 of paper. - void GetTableColumns(ColSegment_LIST *table_columns); - - // Finds regions within a column that potentially contain a table. - // Ie, the table columns from GetTableColumns are turned into boxes - // that span the entire page column (using ColumnBlocks found in - // earlier functions) in the x direction and the min/max extent of - // overlapping table columns in the y direction. - // Section 4.2 of paper. - void GetTableRegions(ColSegment_LIST *table_columns, - ColSegment_LIST *table_regions); - - - //////// Functions to "patch up" found tables - //////// - - // Merge table regions corresponding to tables spanning multiple columns - void GridMergeTableRegions(); - bool BelongToOneTable(const TBOX &box1, const TBOX &box2); - - // Adjust table boundaries by building a tight bounding box around all - // ColPartitions contained in it. - void AdjustTableBoundaries(); - - // Grows a table to include partitions that are partially covered - // by the table. This includes lines and text. It does not include - // noise or images. - // On entry, result_box is the minimum size of the result. The results of the - // function will union the actual result with result_box. - void GrowTableBox(const TBOX& table_box, TBOX* result_box); - // Grow a table by increasing the size of the box to include - // partitions with significant overlap with the table. - void GrowTableToIncludePartials(const TBOX& table_box, - const TBOX& search_range, - TBOX* result_box); - // Grow a table by expanding to the extents of significantly - // overlapping lines. - void GrowTableToIncludeLines(const TBOX& table_box, const TBOX& search_range, - TBOX* result_box); - // Checks whether the horizontal line belong to the table by looking at the - // side spacing of extra ColParitions that will be included in the table - // due to expansion - bool HLineBelongsToTable(const ColPartition& part, const TBOX& table_box); - - // Look for isolated column headers above the given table box and - // include them in the table - void IncludeLeftOutColumnHeaders(TBOX* table_box); - - // Remove false alarms consiting of a single column - void DeleteSingleColumnTables(); - - // Return true if at least one gap larger than the global x-height - // exists in the horizontal projection - bool GapInXProjection(int* xprojection, int length); - - //////// Recognize the tables. - //////// - // This function will run the table recognizer and try to find better - // bounding boxes. The structures of the tables never leave this function - // right now. It just tries to prune and merge tables based on info it - // has available. - void RecognizeTables(); - - //////// Debugging functions. Render different structures to GUI - //////// for visual debugging / intuition. - //////// - - // Displays Colpartitions marked as table row. Overlays them on top of - // part_grid_. - void DisplayColSegments(ScrollView* win, ColSegment_LIST *cols, - ScrollView::Color color); - - // Displays the colpartitions using a new coloring on an existing window. - // Note: This method is only for debug purpose during development and - // would not be part of checked in code - void DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid, - ScrollView::Color text_color, - ScrollView::Color table_color); - void DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid, - ScrollView::Color default_color); - void DisplayColPartitionConnections(ScrollView* win, - ColPartitionGrid* grid, - ScrollView::Color default_color); - void DisplayColSegmentGrid(ScrollView* win, ColSegmentGrid* grid, - ScrollView::Color color); - - // Merge all colpartitions in table regions to make them a single - // colpartition and revert types of isolated table cells not - // assigned to any table to their original types. - void MakeTableBlocks(ColPartitionGrid* grid, - ColPartitionSet** columns, - WidthCallback* width_cb); - - ///////////////////////////////////////////////// - // Useful objects used during table find process. - ///////////////////////////////////////////////// - // Resolution of the connected components in ppi. - int resolution_; - // Estimate of median x-height over the page - int global_median_xheight_; - // Estimate of the median blob width on the page - int global_median_blob_width_; - // Estimate of median leading on the page - int global_median_ledding_; - // Grid to hold cleaned colpartitions after removing all - // colpartitions that consist of only noise blobs, and removing - // noise blobs from remaining colpartitions. - ColPartitionGrid clean_part_grid_; - // Grid contains the leaders and ruling lines. - ColPartitionGrid leader_and_ruling_grid_; - // Grid contains the broken down column partitions. It can be thought - // of as a "word" grid. However, it usually doesn't break apart text lines. - // It does break apart table data (most of the time). - ColPartitionGrid fragmented_text_grid_; - // Grid of page column blocks - ColSegmentGrid col_seg_grid_; - // Grid of detected tables - ColSegmentGrid table_grid_; - // The reading order of text. Defaults to true, for languages such as English. - bool left_to_right_language_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_TABLEFIND_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tablerecog.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tablerecog.cpp deleted file mode 100644 index 5de16c4c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tablerecog.cpp +++ /dev/null @@ -1,1065 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tablerecog.cpp -// Description: Helper class to help structure table areas. Given an bounding -// box from TableFinder, the TableRecognizer should give a -// StructuredTable (maybe a list in the future) of "good" tables -// in that area. -// Author: Nicholas Beato -// Created: Friday, Aug. 20, 2010 -// -// (C) Copyright 2009, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "tablerecog.h" - -#include - -namespace tesseract { - -// The amount of space required between the ColPartitions in 2 columns -// of a non-lined table as a multiple of the median width. -const double kHorizontalSpacing = 0.30; -// The amount of space required between the ColPartitions in 2 rows -// of a non-lined table as multiples of the median height. -const double kVerticalSpacing = -0.2; -// The number of cells that the grid lines may intersect. -// See FindCellSplitLocations for explanation. -const int kCellSplitRowThreshold = 0; -const int kCellSplitColumnThreshold = 0; -// For "lined tables", the number of required lines. Currently a guess. -const int kLinedTableMinVerticalLines = 3; -const int kLinedTableMinHorizontalLines = 3; -// Number of columns required, as a fraction of the most columns found. -// None of these are tweaked at all. -const double kRequiredColumns = 0.7; -// The tolerance for comparing margins of potential tables. -const double kMarginFactor = 1.1; -// The first and last row should be consistent cell height. -// This factor is the first and last row cell height max. -const double kMaxRowSize = 2.5; -// Number of filled columns required to form a strong table row. -// For small tables, this is an absolute number. -const double kGoodRowNumberOfColumnsSmall[] = { 2, 2, 2, 2, 2, 3, 3 }; -const int kGoodRowNumberOfColumnsSmallSize = - sizeof(kGoodRowNumberOfColumnsSmall) / sizeof(double) - 1; -// For large tables, it is a relative number -const double kGoodRowNumberOfColumnsLarge = 0.7; -// The amount of area that must be covered in a cell by ColPartitions to -// be considered "filled" -const double kMinFilledArea = 0.35; - -//////// -//////// StructuredTable Class -//////// - -StructuredTable::StructuredTable() - : text_grid_(nullptr), - line_grid_(nullptr), - is_lined_(false), - space_above_(0), - space_below_(0), - space_left_(0), - space_right_(0), - median_cell_height_(0), - median_cell_width_(0), - max_text_height_(INT32_MAX) { -} - -void StructuredTable::Init() { -} - -void StructuredTable::set_text_grid(ColPartitionGrid* text_grid) { - text_grid_ = text_grid; -} -void StructuredTable::set_line_grid(ColPartitionGrid* line_grid) { - line_grid_ = line_grid; -} -void StructuredTable::set_max_text_height(int height) { - max_text_height_ = height; -} -bool StructuredTable::is_lined() const { - return is_lined_; -} -int StructuredTable::row_count() const { - return cell_y_.length() == 0 ? 0 : cell_y_.length() - 1; -} -int StructuredTable::column_count() const { - return cell_x_.length() == 0 ? 0 : cell_x_.length() - 1; -} -int StructuredTable::cell_count() const { - return row_count() * column_count(); -} -void StructuredTable::set_bounding_box(const TBOX& box) { - bounding_box_ = box; -} -const TBOX& StructuredTable::bounding_box() const { - return bounding_box_; -} -int StructuredTable::median_cell_height() { - return median_cell_height_; -} -int StructuredTable::median_cell_width() { - return median_cell_width_; -} -int StructuredTable::row_height(int row) const { - ASSERT_HOST(0 <= row && row < row_count()); - return cell_y_[row + 1] - cell_y_[row]; -} -int StructuredTable::column_width(int column) const { - ASSERT_HOST(0 <= column && column < column_count()); - return cell_x_[column + 1] - cell_x_[column]; -} -int StructuredTable::space_above() const { - return space_above_; -} -int StructuredTable::space_below() const { - return space_below_; -} - -// At this point, we know that the lines are contained -// by the box (by FindLinesBoundingBox). -// So try to find the cell structure and make sure it works out. -// The assumption is that all lines span the table. If this -// assumption fails, the VerifyLinedTable method will -// abort the lined table. The TableRecognizer will fall -// back on FindWhitespacedStructure. -bool StructuredTable::FindLinedStructure() { - ClearStructure(); - - // Search for all of the lines in the current box. - // Update the cellular structure with the exact lines. - ColPartitionGridSearch box_search(line_grid_); - box_search.SetUniqueMode(true); - box_search.StartRectSearch(bounding_box_); - ColPartition* line = nullptr; - - while ((line = box_search.NextRectSearch()) != nullptr) { - if (line->IsHorizontalLine()) - cell_y_.push_back(line->MidY()); - if (line->IsVerticalLine()) - cell_x_.push_back(line->MidX()); - } - - // HasSignificantLines should guarantee cells. - // Because that code is a different class, just gracefully - // return false. This could be an assert. - if (cell_x_.length() < 3 || cell_y_.length() < 3) - return false; - - cell_x_.sort(); - cell_y_.sort(); - - // Remove duplicates that may have occurred due to split lines. - cell_x_.compact_sorted(); - cell_y_.compact_sorted(); - - // The border should be the extents of line boxes, not middle. - cell_x_[0] = bounding_box_.left(); - cell_x_[cell_x_.length() - 1] = bounding_box_.right(); - cell_y_[0] = bounding_box_.bottom(); - cell_y_[cell_y_.length() - 1] = bounding_box_.top(); - - // Remove duplicates that may have occurred due to moving the borders. - cell_x_.compact_sorted(); - cell_y_.compact_sorted(); - - CalculateMargins(); - CalculateStats(); - is_lined_ = VerifyLinedTableCells(); - return is_lined_; -} - -// Finds the cellular structure given a particular box. -bool StructuredTable::FindWhitespacedStructure() { - ClearStructure(); - FindWhitespacedColumns(); - FindWhitespacedRows(); - - if (!VerifyWhitespacedTable()) { - return false; - } else { - bounding_box_.set_left(cell_x_[0]); - bounding_box_.set_right(cell_x_[cell_x_.length() - 1]); - bounding_box_.set_bottom(cell_y_[0]); - bounding_box_.set_top(cell_y_[cell_y_.length() - 1]); - AbsorbNearbyLines(); - CalculateMargins(); - CalculateStats(); - return true; - } -} - -// Tests if a partition fits inside the table structure. -// Partitions must fully span a grid line in order to intersect it. -// This means that a partition does not intersect a line -// that it "just" touches. This is mainly because the assumption -// throughout the code is that "0" distance is a very very small space. -bool StructuredTable::DoesPartitionFit(const ColPartition& part) const { - const TBOX& box = part.bounding_box(); - for (int i = 0; i < cell_x_.length(); ++i) - if (box.left() < cell_x_[i] && cell_x_[i] < box.right()) - return false; - for (int i = 0; i < cell_y_.length(); ++i) - if (box.bottom() < cell_y_[i] && cell_y_[i] < box.top()) - return false; - return true; -} - -// Checks if a sub-table has multiple data cells filled. -int StructuredTable::CountFilledCells() { - return CountFilledCells(0, row_count() - 1, 0, column_count() - 1); -} -int StructuredTable::CountFilledCellsInRow(int row) { - return CountFilledCells(row, row, 0, column_count() - 1); -} -int StructuredTable::CountFilledCellsInColumn(int column) { - return CountFilledCells(0, row_count() - 1, column, column); -} -int StructuredTable::CountFilledCells(int row_start, int row_end, - int column_start, int column_end) { - ASSERT_HOST(0 <= row_start && row_start <= row_end && row_end < row_count()); - ASSERT_HOST(0 <= column_start && column_start <= column_end && - column_end < column_count()); - int cell_count = 0; - TBOX cell_box; - for (int row = row_start; row <= row_end; ++row) { - cell_box.set_bottom(cell_y_[row]); - cell_box.set_top(cell_y_[row + 1]); - for (int col = column_start; col <= column_end; ++col) { - cell_box.set_left(cell_x_[col]); - cell_box.set_right(cell_x_[col + 1]); - if (CountPartitions(cell_box) > 0) - ++cell_count; - } - } - return cell_count; -} - -// Makes sure that at least one cell in a row has substantial area filled. -// This can filter out large whitespace caused by growing tables too far -// and page numbers. -bool StructuredTable::VerifyRowFilled(int row) { - for (int i = 0; i < column_count(); ++i) { - double area_filled = CalculateCellFilledPercentage(row, i); - if (area_filled >= kMinFilledArea) - return true; - } - return false; -} - -// Finds the filled area in a cell. -// Assume ColPartitions do not overlap for simplicity (even though they do). -double StructuredTable::CalculateCellFilledPercentage(int row, int column) { - ASSERT_HOST(0 <= row && row <= row_count()); - ASSERT_HOST(0 <= column && column <= column_count()); - const TBOX kCellBox(cell_x_[column], cell_y_[row], - cell_x_[column + 1], cell_y_[row + 1]); - ASSERT_HOST(!kCellBox.null_box()); - - ColPartitionGridSearch gsearch(text_grid_); - gsearch.SetUniqueMode(true); - gsearch.StartRectSearch(kCellBox); - double area_covered = 0; - ColPartition* text = nullptr; - while ((text = gsearch.NextRectSearch()) != nullptr) { - if (text->IsTextType()) - area_covered += text->bounding_box().intersection(kCellBox).area(); - } - const int32_t current_area = kCellBox.area(); - if (current_area == 0) { - return 1.0; - } - return std::min(1.0, area_covered / current_area); -} - -void StructuredTable::Display(ScrollView* window, ScrollView::Color color) { -#ifndef GRAPHICS_DISABLED - window->Brush(ScrollView::NONE); - window->Pen(color); - window->Rectangle(bounding_box_.left(), bounding_box_.bottom(), - bounding_box_.right(), bounding_box_.top()); - for (int i = 0; i < cell_x_.length(); i++) { - window->Line(cell_x_[i], bounding_box_.bottom(), - cell_x_[i], bounding_box_.top()); - } - for (int i = 0; i < cell_y_.length(); i++) { - window->Line(bounding_box_.left(), cell_y_[i], - bounding_box_.right(), cell_y_[i]); - } - window->UpdateWindow(); -#endif -} - -// Clear structure information. -void StructuredTable::ClearStructure() { - cell_x_.clear(); - cell_y_.clear(); - is_lined_ = false; - space_above_ = 0; - space_below_ = 0; - space_left_ = 0; - space_right_ = 0; - median_cell_height_ = 0; - median_cell_width_ = 0; -} - -// When a table has lines, the lines should not intersect any partitions. -// The following function makes sure the previous assumption is met. -bool StructuredTable::VerifyLinedTableCells() { - // Function only called when lines exist. - ASSERT_HOST(cell_y_.length() >= 2 && cell_x_.length() >= 2); - for (int i = 0; i < cell_y_.length(); ++i) { - if (CountHorizontalIntersections(cell_y_[i]) > 0) - return false; - } - for (int i = 0; i < cell_x_.length(); ++i) { - if (CountVerticalIntersections(cell_x_[i]) > 0) - return false; - } - return true; -} - -// TODO(nbeato): Could be much better than this. -// Examples: -// - Caclulate the percentage of filled cells. -// - Calculate the average number of ColPartitions per cell. -// - Calculate the number of cells per row with partitions. -// - Check if ColPartitions in adjacent cells are similar. -// - Check that all columns are at least a certain width. -// - etc. -bool StructuredTable::VerifyWhitespacedTable() { - // criteria for a table, must be at least 2x3 or 3x2 - return row_count() >= 2 && column_count() >= 2 && cell_count() >= 6; -} - -// Finds vertical splits in the ColPartitions of text_grid_ by considering -// all possible "good" guesses. A good guess is just the left/right sides of -// the partitions, since these locations will uniquely define where the -// extremal values where the splits can occur. The split happens -// in the middle of the two nearest partitions. -void StructuredTable::FindWhitespacedColumns() { - // Set of the extents of all partitions on the page. - GenericVectorEqEq left_sides; - GenericVectorEqEq right_sides; - - // Look at each text partition. We want to find the partitions - // that have extremal left/right sides. These will give us a basis - // for the table columns. - ColPartitionGridSearch gsearch(text_grid_); - gsearch.SetUniqueMode(true); - gsearch.StartRectSearch(bounding_box_); - ColPartition* text = nullptr; - while ((text = gsearch.NextRectSearch()) != nullptr) { - if (!text->IsTextType()) - continue; - - ASSERT_HOST(text->bounding_box().left() < text->bounding_box().right()); - int spacing = static_cast(text->median_width() * - kHorizontalSpacing / 2.0 + 0.5); - left_sides.push_back(text->bounding_box().left() - spacing); - right_sides.push_back(text->bounding_box().right() + spacing); - } - // It causes disaster below, so avoid it! - if (left_sides.length() == 0 || right_sides.length() == 0) - return; - - // Since data may be inserted in grid order, we sort the left/right sides. - left_sides.sort(); - right_sides.sort(); - - // At this point, in the "merged list", we expect to have a left side, - // followed by either more left sides or a right side. The last number - // should be a right side. We find places where the splits occur by looking - // for "valleys". If we want to force gap sizes or allow overlap, change - // the spacing above. If you want to let lines "slice" partitions as long - // as it is infrequent, change the following function. - FindCellSplitLocations(left_sides, right_sides, kCellSplitColumnThreshold, - &cell_x_); -} - -// Finds horizontal splits in the ColPartitions of text_grid_ by considering -// all possible "good" guesses. A good guess is just the bottom/top sides of -// the partitions, since these locations will uniquely define where the -// extremal values where the splits can occur. The split happens -// in the middle of the two nearest partitions. -void StructuredTable::FindWhitespacedRows() { - // Set of the extents of all partitions on the page. - GenericVectorEqEq bottom_sides; - GenericVectorEqEq top_sides; - // We will be "shrinking" partitions, so keep the min/max around to - // make sure the bottom/top lines do not intersect text. - int min_bottom = INT32_MAX; - int max_top = INT32_MIN; - - // Look at each text partition. We want to find the partitions - // that have extremal bottom/top sides. These will give us a basis - // for the table rows. Because the textlines can be skewed and close due - // to warping, the height of the partitions is toned down a little bit. - ColPartitionGridSearch gsearch(text_grid_); - gsearch.SetUniqueMode(true); - gsearch.StartRectSearch(bounding_box_); - ColPartition* text = nullptr; - while ((text = gsearch.NextRectSearch()) != nullptr) { - if (!text->IsTextType()) - continue; - - ASSERT_HOST(text->bounding_box().bottom() < text->bounding_box().top()); - min_bottom = std::min(min_bottom, static_cast(text->bounding_box().bottom())); - max_top = std::max(max_top, static_cast(text->bounding_box().top())); - - // Ignore "tall" text partitions, as these are usually false positive - // vertical text or multiple lines pulled together. - if (text->bounding_box().height() > max_text_height_) - continue; - - int spacing = static_cast(text->bounding_box().height() * - kVerticalSpacing / 2.0 + 0.5); - int bottom = text->bounding_box().bottom() - spacing; - int top = text->bounding_box().top() + spacing; - // For horizontal text, the factor can be negative. This should - // probably cause a warning or failure. I haven't actually checked if - // it happens. - if (bottom >= top) - continue; - - bottom_sides.push_back(bottom); - top_sides.push_back(top); - } - // It causes disaster below, so avoid it! - if (bottom_sides.length() == 0 || top_sides.length() == 0) - return; - - // Since data may be inserted in grid order, we sort the bottom/top sides. - bottom_sides.sort(); - top_sides.sort(); - - // At this point, in the "merged list", we expect to have a bottom side, - // followed by either more bottom sides or a top side. The last number - // should be a top side. We find places where the splits occur by looking - // for "valleys". If we want to force gap sizes or allow overlap, change - // the spacing above. If you want to let lines "slice" partitions as long - // as it is infrequent, change the following function. - FindCellSplitLocations(bottom_sides, top_sides, kCellSplitRowThreshold, - &cell_y_); - - // Recover the min/max correctly since it was shifted. - cell_y_[0] = min_bottom; - cell_y_[cell_y_.length() - 1] = max_top; -} - -void StructuredTable::CalculateMargins() { - space_above_ = INT32_MAX; - space_below_ = INT32_MAX; - space_right_ = INT32_MAX; - space_left_ = INT32_MAX; - UpdateMargins(text_grid_); - UpdateMargins(line_grid_); -} -// Finds the nearest partition in grid to the table -// boundaries and updates the margin. -void StructuredTable::UpdateMargins(ColPartitionGrid* grid) { - int below = FindVerticalMargin(grid, bounding_box_.bottom(), true); - space_below_ = std::min(space_below_, below); - int above = FindVerticalMargin(grid, bounding_box_.top(), false); - space_above_ = std::min(space_above_, above); - int left = FindHorizontalMargin(grid, bounding_box_.left(), true); - space_left_ = std::min(space_left_, left); - int right = FindHorizontalMargin(grid, bounding_box_.right(), false); - space_right_ = std::min(space_right_, right); -} -int StructuredTable::FindVerticalMargin(ColPartitionGrid* grid, int border, - bool decrease) const { - ColPartitionGridSearch gsearch(grid); - gsearch.SetUniqueMode(true); - gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), - border); - ColPartition* part = nullptr; - while ((part = gsearch.NextVerticalSearch(decrease)) != nullptr) { - if (!part->IsTextType() && !part->IsHorizontalLine()) - continue; - int distance = decrease ? border - part->bounding_box().top() - : part->bounding_box().bottom() - border; - if (distance >= 0) - return distance; - } - return INT32_MAX; -} -int StructuredTable::FindHorizontalMargin(ColPartitionGrid* grid, int border, - bool decrease) const { - ColPartitionGridSearch gsearch(grid); - gsearch.SetUniqueMode(true); - gsearch.StartSideSearch(border, bounding_box_.bottom(), bounding_box_.top()); - ColPartition* part = nullptr; - while ((part = gsearch.NextSideSearch(decrease)) != nullptr) { - if (!part->IsTextType() && !part->IsVerticalLine()) - continue; - int distance = decrease ? border - part->bounding_box().right() - : part->bounding_box().left() - border; - if (distance >= 0) - return distance; - } - return INT32_MAX; -} - -void StructuredTable::CalculateStats() { - const int kMaxCellHeight = 1000; - const int kMaxCellWidth = 1000; - STATS height_stats(0, kMaxCellHeight + 1); - STATS width_stats(0, kMaxCellWidth + 1); - - for (int i = 0; i < row_count(); ++i) - height_stats.add(row_height(i), column_count()); - for (int i = 0; i < column_count(); ++i) - width_stats.add(column_width(i), row_count()); - - median_cell_height_ = static_cast(height_stats.median() + 0.5); - median_cell_width_ = static_cast(width_stats.median() + 0.5); -} - -// Looks for grid lines near the current bounding box and -// grows the bounding box to include them if no intersections -// will occur as a result. This is necessary because the margins -// are calculated relative to the closest line/text. If the -// line isn't absorbed, the margin will be the distance to the line. -void StructuredTable::AbsorbNearbyLines() { - ColPartitionGridSearch gsearch(line_grid_); - gsearch.SetUniqueMode(true); - - // Is the closest line above good? Loop multiple times for tables with - // multi-line (sometimes 2) borders. Limit the number of lines by - // making sure they stay within a table cell or so. - ColPartition* line = nullptr; - gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), - bounding_box_.top()); - while ((line = gsearch.NextVerticalSearch(false)) != nullptr) { - if (!line->IsHorizontalLine()) - break; - TBOX text_search(bounding_box_.left(), bounding_box_.top() + 1, - bounding_box_.right(), line->MidY()); - if (text_search.height() > median_cell_height_ * 2) - break; - if (CountPartitions(text_search) > 0) - break; - bounding_box_.set_top(line->MidY()); - } - // As above, is the closest line below good? - line = nullptr; - gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), - bounding_box_.bottom()); - while ((line = gsearch.NextVerticalSearch(true)) != nullptr) { - if (!line->IsHorizontalLine()) - break; - TBOX text_search(bounding_box_.left(), line->MidY(), - bounding_box_.right(), bounding_box_.bottom() - 1); - if (text_search.height() > median_cell_height_ * 2) - break; - if (CountPartitions(text_search) > 0) - break; - bounding_box_.set_bottom(line->MidY()); - } - // TODO(nbeato): vertical lines -} - - -// This function will find all "0 valleys" (of any length) given two -// arrays. The arrays are the mins and maxes of partitions (either -// left and right or bottom and top). Since the min/max lists are generated -// with pairs of increasing integers, we can make some assumptions in -// the function about ordering of the overall list, which are shown in the -// asserts. -// The algorithm works as follows: -// While there are numbers to process, take the smallest number. -// If it is from the min_list, increment the "hill" counter. -// Otherwise, decrement the "hill" counter. -// In the process of doing this, keep track of "crossing" the -// desired height. -// The first/last items are extremal values of the list and known. -// NOTE: This function assumes the lists are sorted! -void StructuredTable::FindCellSplitLocations(const GenericVector& min_list, - const GenericVector& max_list, - int max_merged, - GenericVector* locations) { - locations->clear(); - ASSERT_HOST(min_list.length() == max_list.length()); - if (min_list.length() == 0) - return; - ASSERT_HOST(min_list.get(0) < max_list.get(0)); - ASSERT_HOST(min_list.get(min_list.length() - 1) < - max_list.get(max_list.length() - 1)); - - locations->push_back(min_list.get(0)); - int min_index = 0; - int max_index = 0; - int stacked_partitions = 0; - int last_cross_position = INT32_MAX; - // max_index will expire after min_index. - // However, we can't "increase" the hill size if min_index expired. - // So finish processing when min_index expires. - while (min_index < min_list.length()) { - // Increase the hill count. - if (min_list[min_index] < max_list[max_index]) { - ++stacked_partitions; - if (last_cross_position != INT32_MAX && - stacked_partitions > max_merged) { - int mid = (last_cross_position + min_list[min_index]) / 2; - locations->push_back(mid); - last_cross_position = INT32_MAX; - } - ++min_index; - } else { - // Decrease the hill count. - --stacked_partitions; - if (last_cross_position == INT32_MAX && - stacked_partitions <= max_merged) { - last_cross_position = max_list[max_index]; - } - ++max_index; - } - } - locations->push_back(max_list.get(max_list.length() - 1)); -} - -// Counts the number of partitions in the table -// box that intersection the given x value. -int StructuredTable::CountVerticalIntersections(int x) { - int count = 0; - // Make a small box to keep the search time down. - const int kGridSize = text_grid_->gridsize(); - TBOX vertical_box = bounding_box_; - vertical_box.set_left(x - kGridSize); - vertical_box.set_right(x + kGridSize); - - ColPartitionGridSearch gsearch(text_grid_); - gsearch.SetUniqueMode(true); - gsearch.StartRectSearch(vertical_box); - ColPartition* text = nullptr; - while ((text = gsearch.NextRectSearch()) != nullptr) { - if (!text->IsTextType()) - continue; - const TBOX& box = text->bounding_box(); - if (box.left() < x && x < box.right()) - ++count; - } - return count; -} - -// Counts the number of partitions in the table -// box that intersection the given y value. -int StructuredTable::CountHorizontalIntersections(int y) { - int count = 0; - // Make a small box to keep the search time down. - const int kGridSize = text_grid_->gridsize(); - TBOX horizontal_box = bounding_box_; - horizontal_box.set_bottom(y - kGridSize); - horizontal_box.set_top(y + kGridSize); - - ColPartitionGridSearch gsearch(text_grid_); - gsearch.SetUniqueMode(true); - gsearch.StartRectSearch(horizontal_box); - ColPartition* text = nullptr; - while ((text = gsearch.NextRectSearch()) != nullptr) { - if (!text->IsTextType()) - continue; - - const TBOX& box = text->bounding_box(); - if (box.bottom() < y && y < box.top()) - ++count; - } - return count; -} - -// Counts how many text partitions are in this box. -// This is used to count partitons in cells, as that can indicate -// how "strong" a potential table row/column (or even full table) actually is. -int StructuredTable::CountPartitions(const TBOX& box) { - ColPartitionGridSearch gsearch(text_grid_); - gsearch.SetUniqueMode(true); - gsearch.StartRectSearch(box); - int count = 0; - ColPartition* text = nullptr; - while ((text = gsearch.NextRectSearch()) != nullptr) { - if (text->IsTextType()) - ++count; - } - return count; -} - -//////// -//////// TableRecognizer Class -//////// - -TableRecognizer::TableRecognizer() - : text_grid_(nullptr), - line_grid_(nullptr), - min_height_(0), - min_width_(0), - max_text_height_(INT32_MAX) { -} - -TableRecognizer::~TableRecognizer() { -} - -void TableRecognizer::Init() { -} - -void TableRecognizer::set_text_grid(ColPartitionGrid* text_grid) { - text_grid_ = text_grid; -} -void TableRecognizer::set_line_grid(ColPartitionGrid* line_grid) { - line_grid_ = line_grid; -} -void TableRecognizer::set_min_height(int height) { - min_height_ = height; -} -void TableRecognizer::set_min_width(int width) { - min_width_ = width; -} -void TableRecognizer::set_max_text_height(int height) { - max_text_height_ = height; -} - -StructuredTable* TableRecognizer::RecognizeTable(const TBOX& guess) { - StructuredTable* table = new StructuredTable(); - table->Init(); - table->set_text_grid(text_grid_); - table->set_line_grid(line_grid_); - table->set_max_text_height(max_text_height_); - - // Try to solve this simple case, a table with *both* - // vertical and horizontal lines. - if (RecognizeLinedTable(guess, table)) - return table; - - // Fallback to whitespace if that failed. - // TODO(nbeato): Break this apart to take advantage of horizontal - // lines or vertical lines when present. - if (RecognizeWhitespacedTable(guess, table)) - return table; - - // No table found... - delete table; - return nullptr; -} - -bool TableRecognizer::RecognizeLinedTable(const TBOX& guess_box, - StructuredTable* table) { - if (!HasSignificantLines(guess_box)) - return false; - TBOX line_bound = guess_box; - if (!FindLinesBoundingBox(&line_bound)) - return false; - table->set_bounding_box(line_bound); - return table->FindLinedStructure(); -} - -// Quick implementation. Just count the number of lines in the box. -// A better implementation would counter intersections and look for connected -// components. It could even go as far as finding similar length lines. -// To account for these possible issues, the VerifyLinedTableCells function -// will reject lined tables that cause intersections with text on the page. -// TODO(nbeato): look for "better" lines -bool TableRecognizer::HasSignificantLines(const TBOX& guess) { - ColPartitionGridSearch box_search(line_grid_); - box_search.SetUniqueMode(true); - box_search.StartRectSearch(guess); - ColPartition* line = nullptr; - int vertical_count = 0; - int horizontal_count = 0; - - while ((line = box_search.NextRectSearch()) != nullptr) { - if (line->IsHorizontalLine()) - ++horizontal_count; - if (line->IsVerticalLine()) - ++vertical_count; - } - - return vertical_count >= kLinedTableMinVerticalLines && - horizontal_count >= kLinedTableMinHorizontalLines; -} - -// Given a bounding box with a bunch of horizontal / vertical lines, -// we just find the extents of all of these lines iteratively. -// The box will be at least as large as guess. This -// could possibly be a bad assumption. -// It is guaranteed to halt in at least O(n * gridarea) where n -// is the number of lines. -// The assumption is that growing the box iteratively will add lines -// several times, but eventually we'll find the extents. -// -// For tables, the approach is a bit aggressive, a single line (which could be -// noise or a column ruling) can destroy the table inside. -// -// TODO(nbeato): This is a quick first implementation. -// A better implementation would actually look for consistency -// in extents of the lines and find the extents using lines -// that clearly describe the table. This would allow the -// lines to "vote" for height/width. An approach like -// this would solve issues with page layout rulings. -// I haven't looked for these issues yet, so I can't even -// say they happen confidently. -bool TableRecognizer::FindLinesBoundingBox(TBOX* bounding_box) { - // The first iteration will tell us if there are lines - // present and shrink the box to a minimal iterative size. - if (!FindLinesBoundingBoxIteration(bounding_box)) - return false; - - // Keep growing until the area of the table stabilizes. - // The box can only get bigger, increasing area. - bool changed = true; - while (changed) { - changed = false; - int old_area = bounding_box->area(); - bool check = FindLinesBoundingBoxIteration(bounding_box); - // At this point, the function will return true. - ASSERT_HOST(check); - ASSERT_HOST(bounding_box->area() >= old_area); - changed = (bounding_box->area() > old_area); - } - - return true; -} - -bool TableRecognizer::FindLinesBoundingBoxIteration(TBOX* bounding_box) { - // Search for all of the lines in the current box, keeping track of extents. - ColPartitionGridSearch box_search(line_grid_); - box_search.SetUniqueMode(true); - box_search.StartRectSearch(*bounding_box); - ColPartition* line = nullptr; - bool first_line = true; - - while ((line = box_search.NextRectSearch()) != nullptr) { - if (line->IsLineType()) { - if (first_line) { - // The first iteration can shrink the box. - *bounding_box = line->bounding_box(); - first_line = false; - } else { - *bounding_box += line->bounding_box(); - } - } - } - return !first_line; -} - -// The goal of this function is to move the table boundaries around and find -// a table that maximizes the whitespace around the table while maximizing -// the cellular structure. As a result, it gets confused by headers, footers, -// and merged columns (text that crosses columns). There is a tolerance -// that allows a few partitions to count towards potential cell merges. -// It's the max_merged parameter to FindPartitionLocations. -// It can work, but it needs some false positive remove on boundaries. -// For now, the grid structure must not intersect any partitions. -// Also, small tolerance is added to the horizontal lines for tightly packed -// tables. The tolerance is added by adjusting the bounding boxes of the -// partitions (in FindHorizontalPartitions). The current implementation -// only adjusts the vertical extents of the table. -// -// Also note. This was hacked at a lot. It could probably use some -// more hacking at to find a good set of border conditions and then a -// nice clean up. -bool TableRecognizer::RecognizeWhitespacedTable(const TBOX& guess_box, - StructuredTable* table) { - TBOX best_box = guess_box; // Best borders known. - int best_below = 0; // Margin size above best table. - int best_above = 0; // Margin size below best table. - TBOX adjusted = guess_box; // The search box. - - // We assume that the guess box is somewhat accurate, so we don't allow - // the adjusted border to pass half of the guessed area. This prevents - // "negative" tables from forming. - const int kMidGuessY = (guess_box.bottom() + guess_box.top()) / 2; - // Keeps track of the most columns in an accepted table. The resulting table - // may be less than the max, but we don't want to stray too far. - int best_cols = 0; - // Make sure we find a good border. - bool found_good_border = false; - - // Find the bottom of the table by trying a few different locations. For - // each location, the top, left, and right are fixed. We start the search - // in a smaller table to favor best_cols getting a good estimate sooner. - int last_bottom = INT32_MAX; - int bottom = NextHorizontalSplit(guess_box.left(), guess_box.right(), - kMidGuessY - min_height_ / 2, true); - int top = NextHorizontalSplit(guess_box.left(), guess_box.right(), - kMidGuessY + min_height_ / 2, false); - adjusted.set_top(top); - - // Headers/footers can be spaced far from everything. - // Make sure that the space below is greater than the space above - // the lowest row. - int previous_below = 0; - const int kMaxChances = 10; - int chances = kMaxChances; - while (bottom != last_bottom) { - adjusted.set_bottom(bottom); - - if (adjusted.height() >= min_height_) { - // Try to fit the grid on the current box. We give it a chance - // if the number of columns didn't significantly drop. - table->set_bounding_box(adjusted); - if (table->FindWhitespacedStructure() && - table->column_count() >= best_cols * kRequiredColumns) { - if (false && IsWeakTableRow(table, 0)) { - // Currently buggy, but was looking promising so disabled. - --chances; - } else { - // We favor 2 things, - // 1- Adding rows that have partitioned data. - // 2- Better margins (to find header/footer). - // For better tables, we just look for multiple cells in the - // bottom row with data in them. - // For margins, the space below the last row should - // be better than a table with the last row removed. - chances = kMaxChances; - double max_row_height = kMaxRowSize * table->median_cell_height(); - if ((table->space_below() * kMarginFactor >= best_below && - table->space_below() >= previous_below) || - (table->CountFilledCellsInRow(0) > 1 && - table->row_height(0) < max_row_height)) { - best_box.set_bottom(bottom); - best_below = table->space_below(); - best_cols = std::max(table->column_count(), best_cols); - found_good_border = true; - } - } - previous_below = table->space_below(); - } else { - --chances; - } - } - if (chances <= 0) - break; - - last_bottom = bottom; - bottom = NextHorizontalSplit(guess_box.left(), guess_box.right(), - last_bottom, true); - } - if (!found_good_border) - return false; - - // TODO(nbeato) comments: follow modified code above... put it in a function! - found_good_border = false; - int last_top = INT32_MIN; - top = NextHorizontalSplit(guess_box.left(), guess_box.right(), - kMidGuessY + min_height_ / 2, false); - int previous_above = 0; - chances = kMaxChances; - - adjusted.set_bottom(best_box.bottom()); - while (last_top != top) { - adjusted.set_top(top); - if (adjusted.height() >= min_height_) { - table->set_bounding_box(adjusted); - if (table->FindWhitespacedStructure() && - table->column_count() >= best_cols * kRequiredColumns) { - int last_row = table->row_count() - 1; - if (false && IsWeakTableRow(table, last_row)) { - // Currently buggy, but was looking promising so disabled. - --chances; - } else { - chances = kMaxChances; - double max_row_height = kMaxRowSize * table->median_cell_height(); - if ((table->space_above() * kMarginFactor >= best_above && - table->space_above() >= previous_above) || - (table->CountFilledCellsInRow(last_row) > 1 && - table->row_height(last_row) < max_row_height)) { - best_box.set_top(top); - best_above = table->space_above(); - best_cols = std::max(table->column_count(), best_cols); - found_good_border = true; - } - } - previous_above = table->space_above(); - } else { - --chances; - } - } - if (chances <= 0) - break; - - last_top = top; - top = NextHorizontalSplit(guess_box.left(), guess_box.right(), - last_top, false); - } - - if (!found_good_border) - return false; - - // If we get here, this shouldn't happen. It can be an assert, but - // I haven't tested it enough to make it crash things. - if (best_box.null_box()) - return false; - - // Given the best locations, fit the box to those locations. - table->set_bounding_box(best_box); - return table->FindWhitespacedStructure(); -} - -// Finds the closest value to y that can safely cause a horizontal -// split in the partitions. -// This function has been buggy and not as reliable as I would've -// liked. I suggest finding all of the splits using the -// FindPartitionLocations once and then just keeping the results -// of that function cached somewhere. -int TableRecognizer::NextHorizontalSplit(int left, int right, int y, - bool top_to_bottom) { - ColPartitionGridSearch gsearch(text_grid_); - gsearch.SetUniqueMode(true); - gsearch.StartVerticalSearch(left, right, y); - ColPartition* text = nullptr; - int last_y = y; - while ((text = gsearch.NextVerticalSearch(top_to_bottom)) != nullptr) { - if (!text->IsTextType() || !text->IsHorizontalType()) - continue; - if (text->bounding_box().height() > max_text_height_) - continue; - - const TBOX& text_box = text->bounding_box(); - if (top_to_bottom && (last_y >= y || last_y <= text_box.top())) { - last_y = std::min(last_y, static_cast(text_box.bottom())); - continue; - } - if (!top_to_bottom && (last_y <= y || last_y >= text_box.bottom())) { - last_y = std::max(last_y, static_cast(text_box.top())); - continue; - } - - return last_y; - } - // If none is found, we at least want to preserve the min/max, - // which defines the overlap of y with the last partition in the grid. - return last_y; -} - -// Code is buggy right now. It is disabled in the calling function. -// It seems like sometimes the row that is passed in is not correct -// sometimes (like a phantom row is introduced). There's something going -// on in the cell_y_ data member before this is called... not certain. -bool TableRecognizer::IsWeakTableRow(StructuredTable* table, int row) { - if (!table->VerifyRowFilled(row)) - return false; - - double threshold = 0.0; - if (table->column_count() > kGoodRowNumberOfColumnsSmallSize) - threshold = table->column_count() * kGoodRowNumberOfColumnsLarge; - else - threshold = kGoodRowNumberOfColumnsSmall[table->column_count()]; - - return table->CountFilledCellsInRow(row) < threshold; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tablerecog.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tablerecog.h deleted file mode 100644 index c1019df7..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tablerecog.h +++ /dev/null @@ -1,378 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tablerecog.h -// Description: Functions to detect structure of tables. -// Author: Nicholas Beato -// Created: Aug 17, 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TABLERECOG_H_ -#define TABLERECOG_H_ - -#include "colpartitiongrid.h" -#include "genericvector.h" - -namespace tesseract { - -// There are 2 classes in this file. They have 2 different purposes. -// - StructuredTable contains the methods to find the structure given -// a specific bounding box and grow that structure. -// - TableRecognizer contains the methods to adjust the possible positions -// of a table without worrying about structure. -// -// To use these classes, the assumption is that the TableFinder will -// have a guess of the location of a table (or possibly over/undersegmented -// tables). The TableRecognizer is responsible for finding the table boundaries -// at a high level. The StructuredTable class is responsible for determining -// the structure of the table and trying to maximize its bounds while retaining -// the structure. -// (The latter part is not implemented yet, but that was the goal). -// -// While on the boundary discussion, keep in mind that this is a first pass. -// There should eventually be some things like internal structure checks, -// and, more importantly, surrounding text flow checks. -// - -// Usage: -// The StructuredTable class contains methods to query a potential table. -// It has functions to find structure, count rows, find ColPartitions that -// intersect gridlines, etc. It is not meant to blindly find a table. It -// is meant to start with a known table location and enhance it. -// Usage: -// ColPartitionGrid text_grid, line_grid; // init -// TBOX table_box; // known location of table location -// -// StructuredTable table; -// table.Init(); // construction code -// table.set_text_grid(/* text */); // These 2 grids can be the same! -// table.set_line_grid(/* lines */); -// table.set_min_text_height(10); // Filter vertical and tall text. -// // IMPORTANT! The table needs to be told where it is! -// table.set_bounding_box(table_box); // Set initial table location. -// if (table.FindWhitespacedStructure()) { -// // process table -// table.column_count(); // number of columns -// table.row_count(); // number of rows -// table.cells_count(); // number of cells -// table.bounding_box(); // updated bounding box -// // etc. -// } -// -class StructuredTable { - public: - StructuredTable(); - ~StructuredTable() = default; - - // Initialization code. Must be called after the constructor. - void Init(); - - // Sets the grids used by the table. These can be changed between - // calls to Recognize. They are treated as read-only data. - void set_text_grid(ColPartitionGrid* text); - void set_line_grid(ColPartitionGrid* lines); - // Filters text partitions that are ridiculously tall to prevent - // merging rows. - void set_max_text_height(int height); - - // Basic accessors. Some are treated as attributes despite having indirect - // representation. - bool is_lined() const; - int row_count() const; - int column_count() const; - int cell_count() const; - void set_bounding_box(const TBOX& box); - const TBOX& bounding_box() const; - int median_cell_height(); - int median_cell_width(); - int row_height(int row) const; - int column_width(int column) const; - int space_above() const; - int space_below() const; - - // Given enough horizontal and vertical lines in a region, create this table - // based on the structure given by the lines. Return true if it worked out. - // Code assumes the lines exist. It is the caller's responsibility to check - // for lines and find an appropriate bounding box. - bool FindLinedStructure(); - - // The main subroutine for finding generic table structure. The function - // finds the grid structure in the given box. Returns true if a good grid - // exists, implying that "this" table is valid. - bool FindWhitespacedStructure(); - - //////// - //////// Functions to query table info. - //////// - - // Returns true if inserting part into the table does not cause any - // cell merges. - bool DoesPartitionFit(const ColPartition& part) const; - // Checks if a sub-table has multiple data cells filled. - int CountFilledCells(); - int CountFilledCellsInRow(int row); - int CountFilledCellsInColumn(int column); - int CountFilledCells(int row_start, int row_end, - int column_start, int column_end); - - // Makes sure that at least one cell in a row has substantial area filled. - // This can filter out large whitespace caused by growing tables too far - // and page numbers. - // (currently bugged for some reason). - bool VerifyRowFilled(int row); - // Finds the filled area in a cell. - double CalculateCellFilledPercentage(int row, int column); - - // Debug display, draws the table in the given color. If the table is not - // valid, the table and "best" grid lines are still drawn in the given color. - void Display(ScrollView* window, ScrollView::Color color); - - protected: - // Clear the structure information. - void ClearStructure(); - - //////// - //////// Lined tables - //////// - - // Verifies the lines do not intersect partitions. This happens when - // the lines are in column boundaries and extend the full page. As a result, - // the grid lines go through column text. The condition is detectable. - bool VerifyLinedTableCells(); - - //////// - //////// Tables with whitespace - //////// - - // This is the function to change if you want to filter resulting tables - // better. Right now it just checks for a minimum cell count and such. - // You could add things like maximum number of ColPartitions per cell or - // similar. - bool VerifyWhitespacedTable(); - // Find the columns of a table using whitespace. - void FindWhitespacedColumns(); - // Find the rows of a table using whitespace. - void FindWhitespacedRows(); - - //////// - //////// Functions to provide information about the table. - //////// - - // Calculates the whitespace around the table using the table boundary and - // the supplied grids (set_text_grid and set_line_grid). - void CalculateMargins(); - // Update the table margins with the supplied grid. This is - // only called by calculate margins to use multiple grid sources. - void UpdateMargins(ColPartitionGrid* grid); - int FindVerticalMargin(ColPartitionGrid* grid, int start_x, - bool decrease) const; - int FindHorizontalMargin(ColPartitionGrid* grid, int start_y, - bool decrease) const; - // Calculates stats on the table, namely the median cell height and width. - void CalculateStats(); - - //////// - //////// Functions to try to "fix" some table errors. - //////// - - // Given a whitespaced table, this looks for bordering lines that might - // be page layout boxes around the table. It is necessary to get the margins - // correct on the table. If the lines are not joined, the margins will be - // the distance to the line, which is not right. - void AbsorbNearbyLines(); - - // Nice utility function for finding partition gaps. You feed it a sorted - // list of all of the mins/maxes of the partitions in the table, and it gives - // you the gaps (middle). This works for both vertical and horizontal - // gaps. - // - // If you want to allow slight overlap in the division and the partitions, - // just scale down the partitions before inserting them in the list. - // Likewise, you can force at least some space between partitions. - // This trick is how the horizontal partitions are done (since the page - // skew could make it hard to find splits in the text). - // - // As a result, "0 distance" between closest partitions causes a gap. - // This is not a programmatic assumption. It is intentional and simplifies - // things. - // - // "max_merged" indicates both the minimum number of stacked partitions - // to cause a cell (add 1 to it), and the maximum number of partitions that - // a grid line can intersect. For example, if max_merged is 0, then lines - // are inserted wherever space exists between partitions. If it is 2, - // lines may intersect 2 partitions at most, but you also need at least - // 2 partitions to generate a line. - static void FindCellSplitLocations(const GenericVector& min_list, - const GenericVector& max_list, - int max_merged, - GenericVector* locations); - - //////// - //////// Utility function for table queries - //////// - - // Counts the number of ColPartitions that intersect vertical cell - // division at this x value. Used by VerifyLinedTable. - int CountVerticalIntersections(int x); - int CountHorizontalIntersections(int y); - - // Counts how many text partitions are in this box. - int CountPartitions(const TBOX& box); - - //////// - //////// Data members. - //////// - - // Input data, used as read only data to make decisions. - ColPartitionGrid* text_grid_; // Text ColPartitions - ColPartitionGrid* line_grid_; // Line ColPartitions - // Table structure. - // bounding box is a convenient external representation. - // cell_x_ and cell_y_ indicate the grid lines. - TBOX bounding_box_; // Bounding box - GenericVectorEqEq cell_x_; // Locations of vertical divisions (sorted) - GenericVectorEqEq cell_y_; // Locations of horizontal divisions (sorted) - bool is_lined_; // Is the table backed up by a line structure - // Table margins, set via CalculateMargins - int space_above_; - int space_below_; - int space_left_; - int space_right_; - int median_cell_height_; - int median_cell_width_; - // Filters, used to prevent awkward partitions from destroying structure. - int max_text_height_; -}; - -class TableRecognizer { - public: - TableRecognizer(); - ~TableRecognizer(); - - // Initialization code. Must be called after the constructor. - void Init(); - - //////// - //////// Pre-recognize methods to initial table constraints. - //////// - - // Sets the grids used by the table. These can be changed between - // calls to Recognize. They are treated as read-only data. - void set_text_grid(ColPartitionGrid* text); - void set_line_grid(ColPartitionGrid* lines); - // Sets some additional constraints on the table. - void set_min_height(int height); - void set_min_width(int width); - // Filters text partitions that are ridiculously tall to prevent - // merging rows. Note that "filters" refers to allowing horizontal - // cells to slice through them on the premise that they were - // merged text rows during previous layout. - void set_max_text_height(int height); - - // Given a guess location, the RecognizeTable function will try to find a - // structured grid in the area. On success, it will return a new - // StructuredTable (and assumes you will delete it). Otherwise, - // nullptr is returned. - // - // Keep in mind, this may "overgrow" or "undergrow" the size of guess. - // Ideally, there is a either a one-to-one correspondence between - // the guess and table or no table at all. This is not the best of - // assumptions right now, but was made to try to keep things simple in - // the first pass. - // - // If a line structure is available on the page in the given region, - // the table will use the linear structure as it is. - // Otherwise, it will try to maximize the whitespace around it while keeping - // a grid structure. This is somewhat working. - // - // Since the combination of adjustments can get high, effort was - // originally made to keep the number of adjustments linear in the number - // of partitions. The underlying structure finding code used to be - // much more complex. I don't know how necessary this constraint is anymore. - // The evaluation of a possible table is kept within O(nlogn) in the size of - // the table (where size is the number of partitions in the table). - // As a result, the algorithm is capable of O(n^2 log n). Depending - // on the grid search size, it may be higher. - // - // Last note: it is possible to just try all partition boundaries at a high - // level O(n^4) and do a verification scheme (at least O(nlogn)). If there - // area 200 partitions on a page, this could be too costly. Effort could go - // into pruning the search, but I opted for something quicker. I'm confident - // that the independent adjustments can get similar results and keep the - // complextiy down. However, the other approach could work without using - // TableFinder at all if it is fast enough. It comes down to properly - // deciding what is a table. The code currently relies on TableFinder's - // guess to the location of a table for that. - StructuredTable* RecognizeTable(const TBOX& guess_box); - - protected: - //////// - //////// Lined tables - //////// - - // Returns true if the given box has a lined table within it. The - // table argument will be updated with the table if the table exists. - bool RecognizeLinedTable(const TBOX& guess_box, StructuredTable* table); - // Returns true if the given box has a large number of horizontal and - // vertical lines present. If so, we assume the extent of these lines - // uniquely defines a table and find that table via SolveLinedTable. - bool HasSignificantLines(const TBOX& guess); - - // Given enough horizontal and vertical lines in a region, find a bounding - // box that encloses all of them (as well as newly introduced lines). - // The bounding box is the smallest box that encloses the lines in guess - // without having any lines sticking out of it. - // bounding_box is an in/out parameter. - // On input, it in the extents of the box to search. - // On output, it is the resulting bounding box. - bool FindLinesBoundingBox(TBOX* bounding_box); - // Iteration in above search. - // bounding_box is an in/out parameter. - // On input, it in the extents of the box to search. - // On output, it is the resulting bounding box. - bool FindLinesBoundingBoxIteration(TBOX* bounding_box); - - //////// - //////// Generic "whitespaced" tables - //////// - - // Returns true if the given box has a whitespaced table within it. The - // table argument will be updated if the table exists. Also note - // that this method will fail if the guess_box center is not - // mostly within the table. - bool RecognizeWhitespacedTable(const TBOX& guess_box, StructuredTable* table); - - // Finds the location of a horizontal split relative to y. - // This function is mostly unused now. If the SolveWhitespacedTable - // changes much, it can be removed. Note, it isn't really as reliable - // as I thought. I went with alternatives for most of the other uses. - int NextHorizontalSplit(int left, int right, int y, bool top_to_bottom); - - // Indicates that a table row is weak. This means that it has - // many missing data cells or very large cell heights compared. - // to the rest of the table. - static bool IsWeakTableRow(StructuredTable* table, int row); - - // Input data, used as read only data to make decisions. - ColPartitionGrid* text_grid_; // Text ColPartitions - ColPartitionGrid* line_grid_; // Line ColPartitions - // Table constraints, a "good" table must satisfy these. - int min_height_; - int min_width_; - // Filters, used to prevent awkward partitions from destroying structure. - int max_text_height_; // Horizontal lines may intersect taller text. -}; - -} // namespace tesseract - -#endif /* TABLERECOG_H_ */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tabvector.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tabvector.cpp deleted file mode 100644 index 6bd54778..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tabvector.cpp +++ /dev/null @@ -1,983 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tabvector.cpp -// Description: Class to hold a near-vertical vector representing a tab-stop. -// Author: Ray Smith -// Created: Thu Apr 10 16:28:01 PST 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "tabvector.h" -#include "blobbox.h" -#include "colfind.h" -#include "colpartitionset.h" -#include "detlinefit.h" -#include "statistc.h" - -#include - -namespace tesseract { - -// Multiple of height used as a gutter for evaluation search. -const int kGutterMultiple = 4; -// Multiple of neighbour gap that we expect the gutter gap to be at minimum. -const int kGutterToNeighbourRatio = 3; -// Pixel distance for tab vectors to be considered the same. -const int kSimilarVectorDist = 10; -// Pixel distance for ragged tab vectors to be considered the same if there -// is nothing in the overlap box -const int kSimilarRaggedDist = 50; -// Max multiple of height to allow filling in between blobs when evaluating. -const int kMaxFillinMultiple = 11; -// Min fraction of mean gutter size to allow a gutter on a good tab blob. -const double kMinGutterFraction = 0.5; -// Multiple of 1/n lines as a minimum gutter in evaluation. -const double kLineCountReciprocal = 4.0; -// Constant add-on for minimum gutter for aligned tabs. -const double kMinAlignedGutter = 0.25; -// Constant add-on for minimum gutter for ragged tabs. -const double kMinRaggedGutter = 1.5; - -double_VAR(textord_tabvector_vertical_gap_fraction, 0.5, - "max fraction of mean blob width allowed for vertical gaps in vertical text"); - -double_VAR(textord_tabvector_vertical_box_ratio, 0.5, - "Fraction of box matches required to declare a line vertical"); - -ELISTIZE(TabConstraint) - -// Create a constraint for the top or bottom of this TabVector. -void TabConstraint::CreateConstraint(TabVector* vector, bool is_top) { - TabConstraint* constraint = new TabConstraint(vector, is_top); - TabConstraint_LIST* constraints = new TabConstraint_LIST; - TabConstraint_IT it(constraints); - it.add_to_end(constraint); - if (is_top) - vector->set_top_constraints(constraints); - else - vector->set_bottom_constraints(constraints); -} - -// Test to see if the constraints are compatible enough to merge. -bool TabConstraint::CompatibleConstraints(TabConstraint_LIST* list1, - TabConstraint_LIST* list2) { - if (list1 == list2) - return false; - int y_min = -INT32_MAX; - int y_max = INT32_MAX; - if (textord_debug_tabfind > 3) - tprintf("Testing constraint compatibility\n"); - GetConstraints(list1, &y_min, &y_max); - GetConstraints(list2, &y_min, &y_max); - if (textord_debug_tabfind > 3) - tprintf("Resulting range = [%d,%d]\n", y_min, y_max); - return y_max >= y_min; -} - -// Merge the lists of constraints and update the TabVector pointers. -// The second list is deleted. -void TabConstraint::MergeConstraints(TabConstraint_LIST* list1, - TabConstraint_LIST* list2) { - if (list1 == list2) - return; - TabConstraint_IT it(list2); - if (textord_debug_tabfind > 3) - tprintf("Merging constraints\n"); - // The vectors of all constraints on list2 are now going to be on list1. - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TabConstraint* constraint = it.data(); - if (textord_debug_tabfind> 3) - constraint->vector_->Print("Merge"); - if (constraint->is_top_) - constraint->vector_->set_top_constraints(list1); - else - constraint->vector_->set_bottom_constraints(list1); - } - it = list1; - it.add_list_before(list2); - delete list2; -} - -// Set all the tops and bottoms as appropriate to a mean of the -// constrained range. Delete all the constraints and list. -void TabConstraint::ApplyConstraints(TabConstraint_LIST* constraints) { - int y_min = -INT32_MAX; - int y_max = INT32_MAX; - GetConstraints(constraints, &y_min, &y_max); - int y = (y_min + y_max) / 2; - TabConstraint_IT it(constraints); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TabConstraint* constraint = it.data(); - TabVector* v = constraint->vector_; - if (constraint->is_top_) { - v->SetYEnd(y); - v->set_top_constraints(nullptr); - } else { - v->SetYStart(y); - v->set_bottom_constraints(nullptr); - } - } - delete constraints; -} - -TabConstraint::TabConstraint(TabVector* vector, bool is_top) - : vector_(vector), is_top_(is_top) { - if (is_top) { - y_min_ = vector->endpt().y(); - y_max_ = vector->extended_ymax(); - } else { - y_max_ = vector->startpt().y(); - y_min_ = vector->extended_ymin(); - } -} - -// Get the max of the mins and the min of the maxes. -void TabConstraint::GetConstraints(TabConstraint_LIST* constraints, - int* y_min, int* y_max) { - TabConstraint_IT it(constraints); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TabConstraint* constraint = it.data(); - if (textord_debug_tabfind > 3) { - tprintf("Constraint is [%d,%d]", constraint->y_min_, constraint->y_max_); - constraint->vector_->Print(" for"); - } - *y_min = std::max(*y_min, constraint->y_min_); - *y_max = std::min(*y_max, constraint->y_max_); - } -} - -ELIST2IZE(TabVector) -CLISTIZE(TabVector) - -// The constructor is private. See the bottom of the file... - - -// Public factory to build a TabVector from a list of boxes. -// The TabVector will be of the given alignment type. -// The input vertical vector is used in fitting, and the output -// vertical_x, vertical_y have the resulting line vector added to them -// if the alignment is not ragged. -// The extended_start_y and extended_end_y are the maximum possible -// extension to the line segment that can be used to align with others. -// The input CLIST of BLOBNBOX good_points is consumed and taken over. -TabVector* TabVector::FitVector(TabAlignment alignment, ICOORD vertical, - int extended_start_y, int extended_end_y, - BLOBNBOX_CLIST* good_points, - int* vertical_x, int* vertical_y) { - TabVector* vector = new TabVector(extended_start_y, extended_end_y, - alignment, good_points); - if (!vector->Fit(vertical, false)) { - delete vector; - return nullptr; - } - if (!vector->IsRagged()) { - vertical = vector->endpt_ - vector->startpt_; - int weight = vector->BoxCount(); - *vertical_x += vertical.x() * weight; - *vertical_y += vertical.y() * weight; - } - return vector; -} - -// Build a ragged TabVector by copying another's direction, shifting it -// to match the given blob, and making its initial extent the height -// of the blob, but its extended bounds from the bounds of the original. -TabVector::TabVector(const TabVector& src, TabAlignment alignment, - const ICOORD& vertical_skew, BLOBNBOX* blob) - : extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_), - sort_key_(0), percent_score_(0), mean_width_(0), - needs_refit_(true), needs_evaluation_(true), intersects_other_lines_(false), - alignment_(alignment), - top_constraints_(nullptr), bottom_constraints_(nullptr) { - BLOBNBOX_C_IT it(&boxes_); - it.add_to_end(blob); - TBOX box = blob->bounding_box(); - if (IsLeftTab()) { - startpt_ = box.botleft(); - endpt_ = box.topleft(); - } else { - startpt_ = box.botright(); - endpt_ = box.topright(); - } - sort_key_ = SortKey(vertical_skew, - (startpt_.x() + endpt_.x()) / 2, - (startpt_.y() + endpt_.y()) / 2); - if (textord_debug_tabfind > 3) - Print("Constructed a new tab vector:"); -} - -// Copies basic attributes of a tab vector for simple operations. -// Copies things such startpt, endpt, range. -// Does not copy things such as partners, boxes, or constraints. -// This is useful if you only need vector information for processing, such -// as in the table detection code. -TabVector* TabVector::ShallowCopy() const { - TabVector* copy = new TabVector(); - copy->startpt_ = startpt_; - copy->endpt_ = endpt_; - copy->alignment_ = alignment_; - copy->extended_ymax_ = extended_ymax_; - copy->extended_ymin_ = extended_ymin_; - copy->intersects_other_lines_ = intersects_other_lines_; - return copy; -} - -// Extend this vector to include the supplied blob if it doesn't -// already have it. -void TabVector::ExtendToBox(BLOBNBOX* new_blob) { - TBOX new_box = new_blob->bounding_box(); - BLOBNBOX_C_IT it(&boxes_); - if (!it.empty()) { - BLOBNBOX* blob = it.data(); - TBOX box = blob->bounding_box(); - while (!it.at_last() && box.top() <= new_box.top()) { - if (blob == new_blob) - return; // We have it already. - it.forward(); - blob = it.data(); - box = blob->bounding_box(); - } - if (box.top() >= new_box.top()) { - it.add_before_stay_put(new_blob); - needs_refit_ = true; - return; - } - } - needs_refit_ = true; - it.add_after_stay_put(new_blob); -} - -// Set the ycoord of the start and move the xcoord to match. -void TabVector::SetYStart(int start_y) { - startpt_.set_x(XAtY(start_y)); - startpt_.set_y(start_y); -} -// Set the ycoord of the end and move the xcoord to match. -void TabVector::SetYEnd(int end_y) { - endpt_.set_x(XAtY(end_y)); - endpt_.set_y(end_y); -} - -// Rotate the ends by the given vector. Auto flip start and end if needed. -void TabVector::Rotate(const FCOORD& rotation) { - startpt_.rotate(rotation); - endpt_.rotate(rotation); - int dx = endpt_.x() - startpt_.x(); - int dy = endpt_.y() - startpt_.y(); - if ((dy < 0 && abs(dy) > abs(dx)) || (dx < 0 && abs(dx) > abs(dy))) { - // Need to flip start/end. - ICOORD tmp = startpt_; - startpt_ = endpt_; - endpt_ = tmp; - } -} - -// Setup the initial constraints, being the limits of -// the vector and the extended ends. -void TabVector::SetupConstraints() { - TabConstraint::CreateConstraint(this, false); - TabConstraint::CreateConstraint(this, true); -} - -// Setup the constraints between the partners of this TabVector. -void TabVector::SetupPartnerConstraints() { - // With the first and last partner, we want a common bottom and top, - // respectively, and for each change of partner, we want a common - // top of first with bottom of next. - TabVector_C_IT it(&partners_); - TabVector* prev_partner = nullptr; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TabVector* partner = it.data(); - if (partner->top_constraints_ == nullptr || - partner->bottom_constraints_ == nullptr) { - partner->Print("Impossible: has no constraints"); - Print("This vector has it as a partner"); - continue; - } - if (prev_partner == nullptr) { - // This is the first partner, so common bottom. - if (TabConstraint::CompatibleConstraints(bottom_constraints_, - partner->bottom_constraints_)) - TabConstraint::MergeConstraints(bottom_constraints_, - partner->bottom_constraints_); - } else { - // We need prev top to be common with partner bottom. - if (TabConstraint::CompatibleConstraints(prev_partner->top_constraints_, - partner->bottom_constraints_)) - TabConstraint::MergeConstraints(prev_partner->top_constraints_, - partner->bottom_constraints_); - } - prev_partner = partner; - if (it.at_last()) { - // This is the last partner, so common top. - if (TabConstraint::CompatibleConstraints(top_constraints_, - partner->top_constraints_)) - TabConstraint::MergeConstraints(top_constraints_, - partner->top_constraints_); - } - } -} - -// Setup the constraints between this and its partner. -void TabVector::SetupPartnerConstraints(TabVector* partner) { - if (TabConstraint::CompatibleConstraints(bottom_constraints_, - partner->bottom_constraints_)) - TabConstraint::MergeConstraints(bottom_constraints_, - partner->bottom_constraints_); - if (TabConstraint::CompatibleConstraints(top_constraints_, - partner->top_constraints_)) - TabConstraint::MergeConstraints(top_constraints_, - partner->top_constraints_); -} - -// Use the constraints to modify the top and bottom. -void TabVector::ApplyConstraints() { - if (top_constraints_ != nullptr) - TabConstraint::ApplyConstraints(top_constraints_); - if (bottom_constraints_ != nullptr) - TabConstraint::ApplyConstraints(bottom_constraints_); -} - -// Merge close tab vectors of the same side that overlap. -void TabVector::MergeSimilarTabVectors(const ICOORD& vertical, - TabVector_LIST* vectors, - BlobGrid* grid) { - TabVector_IT it1(vectors); - for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) { - TabVector* v1 = it1.data(); - TabVector_IT it2(it1); - for (it2.forward(); !it2.at_first(); it2.forward()) { - TabVector* v2 = it2.data(); - if (v2->SimilarTo(vertical, *v1, grid)) { - // Merge into the forward one, in case the combined vector now - // overlaps one in between. - if (textord_debug_tabfind) { - v2->Print("Merging"); - v1->Print("by deleting"); - } - v2->MergeWith(vertical, it1.extract()); - if (textord_debug_tabfind) { - v2->Print("Producing"); - } - ICOORD merged_vector = v2->endpt(); - merged_vector -= v2->startpt(); - if (textord_debug_tabfind && abs(merged_vector.x()) > 100) { - v2->Print("Garbage result of merge?"); - } - break; - } - } - } -} - -// Return true if this vector is the same side, overlaps, and close -// enough to the other to be merged. -bool TabVector::SimilarTo(const ICOORD& vertical, - const TabVector& other, BlobGrid* grid) const { - if ((IsRightTab() && other.IsRightTab()) || - (IsLeftTab() && other.IsLeftTab())) { - // If they don't overlap, at least in extensions, then there is no chance. - if (ExtendedOverlap(other.extended_ymax_, other.extended_ymin_) < 0) - return false; - // A fast approximation to the scale factor of the sort_key_. - int v_scale = abs(vertical.y()); - if (v_scale == 0) - v_scale = 1; - // If they are close enough, then OK. - if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ && - sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_) - return true; - // Ragged tabs get a bigger threshold. - if (!IsRagged() || !other.IsRagged() || - sort_key_ + kSimilarRaggedDist * v_scale < other.sort_key_ || - sort_key_ - kSimilarRaggedDist * v_scale > other.sort_key_) - return false; - if (grid == nullptr) { - // There is nothing else to test! - return true; - } - // If there is nothing in the rectangle between the vector that is going to - // move, and the place it is moving to, then they can be merged. - // Setup a vertical search for any blob. - const TabVector* mover = (IsRightTab() && - sort_key_ < other.sort_key_) ? this : &other; - int top_y = mover->endpt_.y(); - int bottom_y = mover->startpt_.y(); - int left = std::min(mover->XAtY(top_y), mover->XAtY(bottom_y)); - int right = std::max(mover->XAtY(top_y), mover->XAtY(bottom_y)); - int shift = abs(sort_key_ - other.sort_key_) / v_scale; - if (IsRightTab()) { - right += shift; - } else { - left -= shift; - } - - GridSearch vsearch(grid); - vsearch.StartVerticalSearch(left, right, top_y); - BLOBNBOX* blob; - while ((blob = vsearch.NextVerticalSearch(true)) != nullptr) { - const TBOX& box = blob->bounding_box(); - if (box.top() > bottom_y) - return true; // Nothing found. - if (box.bottom() < top_y) - continue; // Doesn't overlap. - int left_at_box = XAtY(box.bottom()); - int right_at_box = left_at_box; - if (IsRightTab()) - right_at_box += shift; - else - left_at_box -= shift; - if (std::min(right_at_box, static_cast(box.right())) > std::max(left_at_box, static_cast(box.left()))) - return false; - } - return true; // Nothing found. - } - return false; -} - -// Eat the other TabVector into this and delete it. -void TabVector::MergeWith(const ICOORD& vertical, TabVector* other) { - extended_ymin_ = std::min(extended_ymin_, other->extended_ymin_); - extended_ymax_ = std::max(extended_ymax_, other->extended_ymax_); - if (other->IsRagged()) { - alignment_ = other->alignment_; - } - // Merge sort the two lists of boxes. - BLOBNBOX_C_IT it1(&boxes_); - BLOBNBOX_C_IT it2(&other->boxes_); - while (!it2.empty()) { - BLOBNBOX* bbox2 = it2.extract(); - it2.forward(); - TBOX box2 = bbox2->bounding_box(); - BLOBNBOX* bbox1 = it1.data(); - TBOX box1 = bbox1->bounding_box(); - while (box1.bottom() < box2.bottom() && !it1.at_last()) { - it1.forward(); - bbox1 = it1.data(); - box1 = bbox1->bounding_box(); - } - if (box1.bottom() < box2.bottom()) { - it1.add_to_end(bbox2); - } else if (bbox1 != bbox2) { - it1.add_before_stay_put(bbox2); - } - } - Fit(vertical, true); - other->Delete(this); -} - -// Add a new element to the list of partner TabVectors. -// Partners must be added in order of increasing y coordinate of the text line -// that makes them partners. -// Groups of identical partners are merged into one. -void TabVector::AddPartner(TabVector* partner) { - if (IsSeparator() || partner->IsSeparator()) - return; - TabVector_C_IT it(&partners_); - if (!it.empty()) { - it.move_to_last(); - if (it.data() == partner) - return; - } - it.add_after_then_move(partner); -} - -// Return true if other is a partner of this. -bool TabVector::IsAPartner(const TabVector* other) { - TabVector_C_IT it(&partners_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - if (it.data() == other) - return true; - } - return false; -} - -// These names must be synced with the TabAlignment enum in tabvector.h. -const char* kAlignmentNames[] = { - "Left Aligned", - "Left Ragged", - "Center", - "Right Aligned", - "Right Ragged", - "Separator" -}; - -// Print basic information about this tab vector. -void TabVector::Print(const char* prefix) { - tprintf( - "%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d," - " partners=%d\n", - prefix, kAlignmentNames[alignment_], startpt_.x(), startpt_.y(), - endpt_.x(), endpt_.y(), mean_width_, percent_score_, sort_key_, - boxes_.length(), partners_.length()); -} - -// Print basic information about this tab vector and every box in it. -void TabVector::Debug(const char* prefix) { - Print(prefix); - BLOBNBOX_C_IT it(&boxes_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* bbox = it.data(); - const TBOX& box = bbox->bounding_box(); - tprintf("Box at (%d,%d)->(%d,%d)\n", - box.left(), box.bottom(), box.right(), box.top()); - } -} - -// Draw this tabvector in place in the given window. -void TabVector::Display(ScrollView* tab_win) { -#ifndef GRAPHICS_DISABLED - if (textord_debug_printable) - tab_win->Pen(ScrollView::BLUE); - else if (alignment_ == TA_LEFT_ALIGNED) - tab_win->Pen(ScrollView::LIME_GREEN); - else if (alignment_ == TA_LEFT_RAGGED) - tab_win->Pen(ScrollView::DARK_GREEN); - else if (alignment_ == TA_RIGHT_ALIGNED) - tab_win->Pen(ScrollView::PINK); - else if (alignment_ == TA_RIGHT_RAGGED) - tab_win->Pen(ScrollView::CORAL); - else - tab_win->Pen(ScrollView::WHITE); - tab_win->Line(startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y()); - tab_win->Pen(ScrollView::GREY); - tab_win->Line(startpt_.x(), startpt_.y(), startpt_.x(), extended_ymin_); - tab_win->Line(endpt_.x(), extended_ymax_, endpt_.x(), endpt_.y()); - char score_buf[64]; - snprintf(score_buf, sizeof(score_buf), "%d", percent_score_); - tab_win->TextAttributes("Times", 50, false, false, false); - tab_win->Text(startpt_.x(), startpt_.y(), score_buf); -#endif -} - -// Refit the line and/or re-evaluate the vector if the dirty flags are set. -void TabVector::FitAndEvaluateIfNeeded(const ICOORD& vertical, - TabFind* finder) { - if (needs_refit_) - Fit(vertical, true); - if (needs_evaluation_) - Evaluate(vertical, finder); -} - -// Evaluate the vector in terms of coverage of its length by good-looking -// box edges. A good looking box is one where its nearest neighbour on the -// inside is nearer than half the distance its nearest neighbour on the -// outside of the putative column. Bad boxes are removed from the line. -// A second pass then further filters boxes by requiring that the gutter -// width be a minimum fraction of the mean gutter along the line. -void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) { - bool debug = false; - needs_evaluation_ = false; - int length = endpt_.y() - startpt_.y(); - if (length == 0 || boxes_.empty()) { - percent_score_ = 0; - Print("Zero length in evaluate"); - return; - } - // Compute the mean box height. - BLOBNBOX_C_IT it(&boxes_); - int mean_height = 0; - int height_count = 0; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* bbox = it.data(); - const TBOX& box = bbox->bounding_box(); - int height = box.height(); - mean_height += height; - ++height_count; - } - if (height_count > 0) mean_height /= height_count; - int max_gutter = kGutterMultiple * mean_height; - if (IsRagged()) { - // Ragged edges face a tougher test in that the gap must always be within - // the height of the blob. - max_gutter = kGutterToNeighbourRatio * mean_height; - } - - STATS gutters(0, max_gutter + 1); - // Evaluate the boxes for their goodness, calculating the coverage as we go. - // Remove boxes that are not good and shorten the list to the first and - // last good boxes. - int num_deleted_boxes = 0; - bool text_on_image = false; - int good_length = 0; - const TBOX* prev_good_box = nullptr; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* bbox = it.data(); - const TBOX& box = bbox->bounding_box(); - int mid_y = (box.top() + box.bottom()) / 2; - if (TabFind::WithinTestRegion(2, XAtY(box.bottom()), box.bottom())) { - if (!debug) { - tprintf("After already deleting %d boxes, ", num_deleted_boxes); - Print("Starting evaluation"); - } - debug = true; - } - // A good box is one where the nearest neighbour on the inside is closer - // than half the distance to the nearest neighbour on the outside - // (of the putative column). - bool left = IsLeftTab(); - int tab_x = XAtY(mid_y); - int gutter_width; - int neighbour_gap; - finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left, - bbox, &gutter_width, &neighbour_gap); - if (debug) { - tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n", - box.left(), box.bottom(), box.right(), box.top(), - gutter_width, neighbour_gap); - } - // Now we can make the test. - if (neighbour_gap * kGutterToNeighbourRatio <= gutter_width) { - // A good box contributes its height to the good_length. - good_length += box.top() - box.bottom(); - gutters.add(gutter_width, 1); - // Two good boxes together contribute the gap between them - // to the good_length as well, as long as the gap is not - // too big. - if (prev_good_box != nullptr) { - int vertical_gap = box.bottom() - prev_good_box->top(); - double size1 = sqrt(static_cast(prev_good_box->area())); - double size2 = sqrt(static_cast(box.area())); - if (vertical_gap < kMaxFillinMultiple * std::min(size1, size2)) - good_length += vertical_gap; - if (debug) { - tprintf("Box and prev good, gap=%d, target %g, goodlength=%d\n", - vertical_gap, kMaxFillinMultiple * std::min(size1, size2), - good_length); - } - } else { - // Adjust the start to the first good box. - SetYStart(box.bottom()); - } - prev_good_box = &box; - if (bbox->flow() == BTFT_TEXT_ON_IMAGE) - text_on_image = true; - } else { - // Get rid of boxes that are not good. - if (debug) { - tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n", - box.left(), box.bottom(), box.right(), box.top(), - gutter_width, neighbour_gap); - } - it.extract(); - ++num_deleted_boxes; - } - } - if (debug) { - Print("Evaluating:"); - } - // If there are any good boxes, do it again, except this time get rid of - // boxes that have a gutter that is a small fraction of the mean gutter. - // This filters out ends that run into a coincidental gap in the text. - int search_top = endpt_.y(); - int search_bottom = startpt_.y(); - int median_gutter = IntCastRounded(gutters.median()); - if (gutters.get_total() > 0) { - prev_good_box = nullptr; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* bbox = it.data(); - const TBOX& box = bbox->bounding_box(); - int mid_y = (box.top() + box.bottom()) / 2; - // A good box is one where the gutter width is at least some constant - // fraction of the mean gutter width. - bool left = IsLeftTab(); - int tab_x = XAtY(mid_y); - int max_gutter = kGutterMultiple * mean_height; - if (IsRagged()) { - // Ragged edges face a tougher test in that the gap must always be - // within the height of the blob. - max_gutter = kGutterToNeighbourRatio * mean_height; - } - int gutter_width; - int neighbour_gap; - finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left, - bbox, &gutter_width, &neighbour_gap); - // Now we can make the test. - if (gutter_width >= median_gutter * kMinGutterFraction) { - if (prev_good_box == nullptr) { - // Adjust the start to the first good box. - SetYStart(box.bottom()); - search_bottom = box.top(); - } - prev_good_box = &box; - search_top = box.bottom(); - } else { - // Get rid of boxes that are not good. - if (debug) { - tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n", - box.left(), box.bottom(), box.right(), box.top(), - gutter_width, median_gutter); - } - it.extract(); - ++num_deleted_boxes; - } - } - } - // If there has been a good box, adjust the end. - if (prev_good_box != nullptr) { - SetYEnd(prev_good_box->top()); - // Compute the percentage of the vector that is occupied by good boxes. - int length = endpt_.y() - startpt_.y(); - percent_score_ = 100 * good_length / length; - if (num_deleted_boxes > 0) { - needs_refit_ = true; - FitAndEvaluateIfNeeded(vertical, finder); - if (boxes_.empty()) - return; - } - // Test the gutter over the whole vector, instead of just at the boxes. - int required_shift; - if (search_bottom > search_top) { - search_bottom = startpt_.y(); - search_top = endpt_.y(); - } - double min_gutter_width = kLineCountReciprocal / boxes_.length(); - min_gutter_width += IsRagged() ? kMinRaggedGutter : kMinAlignedGutter; - min_gutter_width *= mean_height; - int max_gutter_width = IntCastRounded(min_gutter_width) + 1; - if (median_gutter > max_gutter_width) - max_gutter_width = median_gutter; - int gutter_width = finder->GutterWidth(search_bottom, search_top, *this, - text_on_image, max_gutter_width, - &required_shift); - if (gutter_width < min_gutter_width) { - if (debug) { - tprintf("Rejecting bad tab Vector with %d gutter vs %g min\n", - gutter_width, min_gutter_width); - } - boxes_.shallow_clear(); - percent_score_ = 0; - } else if (debug) { - tprintf("Final gutter %d, vs limit of %g, required shift = %d\n", - gutter_width, min_gutter_width, required_shift); - } - } else { - // There are no good boxes left, so score is 0. - percent_score_ = 0; - } - - if (debug) { - Print("Evaluation complete:"); - } -} - -// (Re)Fit a line to the stored points. Returns false if the line -// is degenerate. Althougth the TabVector code mostly doesn't care about the -// direction of lines, XAtY would give silly results for a horizontal line. -// The class is mostly aimed at use for vertical lines representing -// horizontal tab stops. -bool TabVector::Fit(ICOORD vertical, bool force_parallel) { - needs_refit_ = false; - if (boxes_.empty()) { - // Don't refit something with no boxes, as that only happens - // in Evaluate, and we don't want to end up with a zero vector. - if (!force_parallel) - return false; - // If we are forcing parallel, then we just need to set the sort_key_. - ICOORD midpt = startpt_; - midpt += endpt_; - midpt /= 2; - sort_key_ = SortKey(vertical, midpt.x(), midpt.y()); - return startpt_.y() != endpt_.y(); - } - if (!force_parallel && !IsRagged()) { - // Use a fitted line as the vertical. - DetLineFit linepoints; - BLOBNBOX_C_IT it(&boxes_); - // Fit a line to all the boxes in the list. - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* bbox = it.data(); - const TBOX& box = bbox->bounding_box(); - int x1 = IsRightTab() ? box.right() : box.left(); - ICOORD boxpt(x1, box.bottom()); - linepoints.Add(boxpt); - if (it.at_last()) { - ICOORD top_pt(x1, box.top()); - linepoints.Add(top_pt); - } - } - linepoints.Fit(&startpt_, &endpt_); - if (startpt_.y() != endpt_.y()) { - vertical = endpt_; - vertical -= startpt_; - } - } - int start_y = startpt_.y(); - int end_y = endpt_.y(); - sort_key_ = IsLeftTab() ? INT32_MAX : -INT32_MAX; - BLOBNBOX_C_IT it(&boxes_); - // Choose a line parallel to the vertical such that all boxes are on the - // correct side of it. - mean_width_ = 0; - int width_count = 0; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* bbox = it.data(); - const TBOX& box = bbox->bounding_box(); - mean_width_ += box.width(); - ++width_count; - int x1 = IsRightTab() ? box.right() : box.left(); - // Test both the bottom and the top, as one will be more extreme, depending - // on the direction of skew. - int bottom_y = box.bottom(); - int top_y = box.top(); - int key = SortKey(vertical, x1, bottom_y); - if (IsLeftTab() == (key < sort_key_)) { - sort_key_ = key; - startpt_ = ICOORD(x1, bottom_y); - } - key = SortKey(vertical, x1, top_y); - if (IsLeftTab() == (key < sort_key_)) { - sort_key_ = key; - startpt_ = ICOORD(x1, top_y); - } - if (it.at_first()) - start_y = bottom_y; - if (it.at_last()) - end_y = top_y; - } - if (width_count > 0) { - mean_width_ = (mean_width_ + width_count - 1) / width_count; - } - endpt_ = startpt_ + vertical; - needs_evaluation_ = true; - if (start_y != end_y) { - // Set the ends of the vector to fully include the first and last blobs. - startpt_.set_x(XAtY(vertical, sort_key_, start_y)); - startpt_.set_y(start_y); - endpt_.set_x(XAtY(vertical, sort_key_, end_y)); - endpt_.set_y(end_y); - return true; - } - return false; -} - -// Returns the singleton partner if there is one, or nullptr otherwise. -TabVector* TabVector::GetSinglePartner() { - if (!partners_.singleton()) - return nullptr; - TabVector_C_IT partner_it(&partners_); - TabVector* partner = partner_it.data(); - return partner; -} - -// Return the partner of this TabVector if the vector qualifies as -// being a vertical text line, otherwise nullptr. -TabVector* TabVector::VerticalTextlinePartner() { - if (!partners_.singleton()) - return nullptr; - TabVector_C_IT partner_it(&partners_); - TabVector* partner = partner_it.data(); - BLOBNBOX_C_IT box_it1(&boxes_); - BLOBNBOX_C_IT box_it2(&partner->boxes_); - // Count how many boxes are also in the other list. - // At the same time, gather the mean width and median vertical gap. - if (textord_debug_tabfind > 1) { - Print("Testing for vertical text"); - partner->Print(" partner"); - } - int num_matched = 0; - int num_unmatched = 0; - int total_widths = 0; - int width = startpt().x() - partner->startpt().x(); - if (width < 0) - width = -width; - STATS gaps(0, width * 2); - BLOBNBOX* prev_bbox = nullptr; - box_it2.mark_cycle_pt(); - for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) { - BLOBNBOX* bbox = box_it1.data(); - TBOX box = bbox->bounding_box(); - if (prev_bbox != nullptr) { - gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1); - } - while (!box_it2.cycled_list() && box_it2.data() != bbox && - box_it2.data()->bounding_box().bottom() < box.bottom()) { - box_it2.forward(); - } - if (!box_it2.cycled_list() && box_it2.data() == bbox && - bbox->region_type() >= BRT_UNKNOWN && - (prev_bbox == nullptr || prev_bbox->region_type() >= BRT_UNKNOWN)) - ++num_matched; - else - ++num_unmatched; - total_widths += box.width(); - prev_bbox = bbox; - } - if (num_unmatched + num_matched == 0) return nullptr; - double avg_width = total_widths * 1.0 / (num_unmatched + num_matched); - double max_gap = textord_tabvector_vertical_gap_fraction * avg_width; - int min_box_match = static_cast((num_matched + num_unmatched) * - textord_tabvector_vertical_box_ratio); - bool is_vertical = (gaps.get_total() > 0 && - num_matched >= min_box_match && - gaps.median() <= max_gap); - if (textord_debug_tabfind > 1) { - tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d " - "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n", - gaps.get_total(), num_matched, num_unmatched, min_box_match, - gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No"); - } - return (is_vertical) ? partner : nullptr; -} - -// The constructor is private. -TabVector::TabVector(int extended_ymin, int extended_ymax, - TabAlignment alignment, BLOBNBOX_CLIST* boxes) - : extended_ymin_(extended_ymin), extended_ymax_(extended_ymax), - sort_key_(0), percent_score_(0), mean_width_(0), - needs_refit_(true), needs_evaluation_(true), alignment_(alignment), - top_constraints_(nullptr), bottom_constraints_(nullptr) { - BLOBNBOX_C_IT it(&boxes_); - it.add_list_after(boxes); -} - -// Delete this, but first, repoint all the partners to point to -// replacement. If replacement is nullptr, then partner relationships -// are removed. -void TabVector::Delete(TabVector* replacement) { - TabVector_C_IT it(&partners_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TabVector* partner = it.data(); - TabVector_C_IT p_it(&partner->partners_); - // If partner already has replacement in its list, then make - // replacement null, and just remove this TabVector when we find it. - TabVector* partner_replacement = replacement; - for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) { - TabVector* p_partner = p_it.data(); - if (p_partner == partner_replacement) { - partner_replacement = nullptr; - break; - } - } - // Remove all references to this, and replace with replacement if not nullptr. - for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) { - TabVector* p_partner = p_it.data(); - if (p_partner == this) { - p_it.extract(); - if (partner_replacement != nullptr) - p_it.add_before_stay_put(partner_replacement); - } - } - if (partner_replacement != nullptr) { - partner_replacement->AddPartner(partner); - } - } - delete this; -} - - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tabvector.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tabvector.h deleted file mode 100644 index 981412ab..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tabvector.h +++ /dev/null @@ -1,430 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tabvector.h -// Description: Class to hold a near-vertical vector representing a tab-stop. -// Author: Ray Smith -// Created: Thu Apr 10 16:25:01 PST 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_TABVECTOR_H_ -#define TESSERACT_TEXTORD_TABVECTOR_H_ - -#include "blobgrid.h" -#include "clst.h" -#include "elst.h" -#include "elst2.h" -#include "rect.h" -#include "bbgrid.h" - -#include - -class BLOBNBOX; -class ScrollView; - -namespace tesseract { - - -extern double_VAR_H(textord_tabvector_vertical_gap_fraction, 0.5, - "Max fraction of mean blob width allowed for vertical gaps in vertical text"); -extern double_VAR_H(textord_tabvector_vertical_box_ratio, 0.5, - "Fraction of box matches required to declare a line vertical"); - -// The alignment type that a tab vector represents. -// Keep this enum synced with kAlignmentNames in tabvector.cpp. -enum TabAlignment { - TA_LEFT_ALIGNED, - TA_LEFT_RAGGED, - TA_CENTER_JUSTIFIED, - TA_RIGHT_ALIGNED, - TA_RIGHT_RAGGED, - TA_SEPARATOR, - TA_COUNT -}; - -// Forward declarations. The classes use their own list types, so we -// need to make the list types first. -class TabFind; -class TabVector; -class TabConstraint; - -ELIST2IZEH(TabVector) -CLISTIZEH(TabVector) -ELISTIZEH(TabConstraint) - -// TabConstraint is a totally self-contained class to maintain -// a list of [min,max] constraints, each referring to a TabVector. -// The constraints are manipulated through static methods that act -// on a list of constraints. The list itself is cooperatively owned -// by the TabVectors of the constraints on the list and managed -// by implicit reference counting via the elements of the list. -class TabConstraint : public ELIST_LINK { - public: - // This empty constructor is here only so that the class can be ELISTIZED. - // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier - // and eliminate CLASSNAME##_copier. - TabConstraint() = default; - - // Create a constraint for the top or bottom of this TabVector. - static void CreateConstraint(TabVector* vector, bool is_top); - - // Test to see if the constraints are compatible enough to merge. - static bool CompatibleConstraints(TabConstraint_LIST* list1, - TabConstraint_LIST* list2); - - // Merge the lists of constraints and update the TabVector pointers. - // The second list is deleted. - static void MergeConstraints(TabConstraint_LIST* list1, - TabConstraint_LIST* list2); - - // Set all the tops and bottoms as appropriate to a mean of the - // constrained range. Delete all the constraints and list. - static void ApplyConstraints(TabConstraint_LIST* constraints); - - private: - TabConstraint(TabVector* vector, bool is_top); - - // Get the max of the mins and the min of the maxes. - static void GetConstraints(TabConstraint_LIST* constraints, - int* y_min, int* y_max); - - // The TabVector this constraint applies to. - TabVector* vector_; - // If true then we refer to the top of the vector_. - bool is_top_; - // The allowed range of this vector_. - int y_min_; - int y_max_; -}; - -// Class to hold information about a single vector -// that represents a tab stop or a rule line. -class TabVector : public ELIST2_LINK { - public: - // TODO(rays) fix this in elst.h line 1076, where it should use the - // copy constructor instead of operator=. - TabVector() = default; - ~TabVector() = default; - - // Public factory to build a TabVector from a list of boxes. - // The TabVector will be of the given alignment type. - // The input vertical vector is used in fitting, and the output - // vertical_x, vertical_y have the resulting line vector added to them - // if the alignment is not ragged. - // The extended_start_y and extended_end_y are the maximum possible - // extension to the line segment that can be used to align with others. - // The input CLIST of BLOBNBOX good_points is consumed and taken over. - static TabVector* FitVector(TabAlignment alignment, ICOORD vertical, - int extended_start_y, int extended_end_y, - BLOBNBOX_CLIST* good_points, - int* vertical_x, int* vertical_y); - - // Build a ragged TabVector by copying another's direction, shifting it - // to match the given blob, and making its initial extent the height - // of the blob, but its extended bounds from the bounds of the original. - TabVector(const TabVector& src, TabAlignment alignment, - const ICOORD& vertical_skew, BLOBNBOX* blob); - - // Copies basic attributes of a tab vector for simple operations. - // Copies things such startpt, endpt, range, width. - // Does not copy things such as partners, boxes, or constraints. - // This is useful if you only need vector information for processing, such - // as in the table detection code. - TabVector* ShallowCopy() const; - - // Simple accessors. - const ICOORD& startpt() const { - return startpt_; - } - const ICOORD& endpt() const { - return endpt_; - } - int extended_ymax() const { - return extended_ymax_; - } - int extended_ymin() const { - return extended_ymin_; - } - int sort_key() const { - return sort_key_; - } - int mean_width() const { - return mean_width_; - } - void set_top_constraints(TabConstraint_LIST* constraints) { - top_constraints_ = constraints; - } - void set_bottom_constraints(TabConstraint_LIST* constraints) { - bottom_constraints_ = constraints; - } - TabVector_CLIST* partners() { - return &partners_; - } - void set_startpt(const ICOORD& start) { - startpt_ = start; - } - void set_endpt(const ICOORD& end) { - endpt_ = end; - } - bool intersects_other_lines() const { - return intersects_other_lines_; - } - void set_intersects_other_lines(bool value) { - intersects_other_lines_ = value; - } - - // Inline quasi-accessors that require some computation. - - // Compute the x coordinate at the given y coordinate. - int XAtY(int y) const { - int height = endpt_.y() - startpt_.y(); - if (height != 0) - return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height + - startpt_.x(); - else - return startpt_.x(); - } - - // Compute the vertical overlap with the other TabVector. - int VOverlap(const TabVector& other) const { - return std::min(other.endpt_.y(), endpt_.y()) - - std::max(other.startpt_.y(), startpt_.y()); - } - // Compute the vertical overlap with the given y bounds. - int VOverlap(int top_y, int bottom_y) const { - return std::min(top_y, static_cast(endpt_.y())) - std::max(bottom_y, static_cast(startpt_.y())); - } - // Compute the extended vertical overlap with the given y bounds. - int ExtendedOverlap(int top_y, int bottom_y) const { - return std::min(top_y, extended_ymax_) - std::max(bottom_y, extended_ymin_); - } - - // Return true if this is a left tab stop, either aligned, or ragged. - bool IsLeftTab() const { - return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED; - } - // Return true if this is a right tab stop, either aligned, or ragged. - bool IsRightTab() const { - return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED; - } - // Return true if this is a separator. - bool IsSeparator() const { - return alignment_ == TA_SEPARATOR; - } - // Return true if this is a center aligned tab stop. - bool IsCenterTab() const { - return alignment_ == TA_CENTER_JUSTIFIED; - } - // Return true if this is a ragged tab top, either left or right. - bool IsRagged() const { - return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED; - } - - // Return true if this vector is to the left of the other in terms - // of sort_key_. - bool IsLeftOf(const TabVector& other) const { - return sort_key_ < other.sort_key_; - } - - // Return true if the vector has no partners. - bool Partnerless() { - return partners_.empty(); - } - - // Return the number of tab boxes in this vector. - int BoxCount() { - return boxes_.length(); - } - - // Lock the vector from refits by clearing the boxes_ list. - void Freeze() { - boxes_.shallow_clear(); - } - - // Flip x and y on the ends so a vector can be created from flipped input. - void XYFlip() { - int x = startpt_.y(); - startpt_.set_y(startpt_.x()); - startpt_.set_x(x); - x = endpt_.y(); - endpt_.set_y(endpt_.x()); - endpt_.set_x(x); - } - - // Reflect the tab vector in the y-axis. - void ReflectInYAxis() { - startpt_.set_x(-startpt_.x()); - endpt_.set_x(-endpt_.x()); - sort_key_ = -sort_key_; - if (alignment_ == TA_LEFT_ALIGNED) - alignment_ = TA_RIGHT_ALIGNED; - else if (alignment_ == TA_RIGHT_ALIGNED) - alignment_ = TA_LEFT_ALIGNED; - if (alignment_ == TA_LEFT_RAGGED) - alignment_ = TA_RIGHT_RAGGED; - else if (alignment_ == TA_RIGHT_RAGGED) - alignment_ = TA_LEFT_RAGGED; - } - - // Separate function to compute the sort key for a given coordinate pair. - static int SortKey(const ICOORD& vertical, int x, int y) { - ICOORD pt(x, y); - return pt * vertical; - } - - // Return the x at the given y for the given sort key. - static int XAtY(const ICOORD& vertical, int sort_key, int y) { - if (vertical.y() != 0) - return (vertical.x() * y + sort_key) / vertical.y(); - else - return sort_key; - } - - // Sort function for E2LIST::sort to sort by sort_key_. - static int SortVectorsByKey(const void* v1, const void* v2) { - const TabVector* tv1 = *static_cast(v1); - const TabVector* tv2 = *static_cast(v2); - return tv1->sort_key_ - tv2->sort_key_; - } - - // More complex members. - - // Extend this vector to include the supplied blob if it doesn't - // already have it. - void ExtendToBox(BLOBNBOX* blob); - - // Set the ycoord of the start and move the xcoord to match. - void SetYStart(int start_y); - // Set the ycoord of the end and move the xcoord to match. - void SetYEnd(int end_y); - - // Rotate the ends by the given vector. - void Rotate(const FCOORD& rotation); - - // Setup the initial constraints, being the limits of - // the vector and the extended ends. - void SetupConstraints(); - - // Setup the constraints between the partners of this TabVector. - void SetupPartnerConstraints(); - - // Setup the constraints between this and its partner. - void SetupPartnerConstraints(TabVector* partner); - - // Use the constraints to modify the top and bottom. - void ApplyConstraints(); - - // Merge close tab vectors of the same side that overlap. - static void MergeSimilarTabVectors(const ICOORD& vertical, - TabVector_LIST* vectors, BlobGrid* grid); - - // Return true if this vector is the same side, overlaps, and close - // enough to the other to be merged. - bool SimilarTo(const ICOORD& vertical, - const TabVector& other, BlobGrid* grid) const; - - // Eat the other TabVector into this and delete it. - void MergeWith(const ICOORD& vertical, TabVector* other); - - // Add a new element to the list of partner TabVectors. - // Partners must be added in order of increasing y coordinate of the text line - // that makes them partners. - // Groups of identical partners are merged into one. - void AddPartner(TabVector* partner); - - // Return true if other is a partner of this. - bool IsAPartner(const TabVector* other); - - // Print basic information about this tab vector. - void Print(const char* prefix); - - // Print basic information about this tab vector and every box in it. - void Debug(const char* prefix); - - // Draw this tabvector in place in the given window. - void Display(ScrollView* tab_win); - - // Refit the line and/or re-evaluate the vector if the dirty flags are set. - void FitAndEvaluateIfNeeded(const ICOORD& vertical, TabFind* finder); - - // Evaluate the vector in terms of coverage of its length by good-looking - // box edges. A good looking box is one where its nearest neighbour on the - // inside is nearer than half the distance its nearest neighbour on the - // outside of the putative column. Bad boxes are removed from the line. - // A second pass then further filters boxes by requiring that the gutter - // width be a minimum fraction of the mean gutter along the line. - void Evaluate(const ICOORD& vertical, TabFind* finder); - - // (Re)Fit a line to the stored points. Returns false if the line - // is degenerate. Althougth the TabVector code mostly doesn't care about the - // direction of lines, XAtY would give silly results for a horizontal line. - // The class is mostly aimed at use for vertical lines representing - // horizontal tab stops. - bool Fit(ICOORD vertical, bool force_parallel); - - // Return the partner of this TabVector if the vector qualifies as - // being a vertical text line, otherwise nullptr. - TabVector* VerticalTextlinePartner(); - - // Return the matching tabvector if there is exactly one partner, or - // nullptr otherwise. This can be used after matching is done, eg. by - // VerticalTextlinePartner(), without checking if the line is vertical. - TabVector* GetSinglePartner(); - - private: - // Constructor is private as the static factory is the external way - // to build a TabVector. - TabVector(int extended_ymin, int extended_ymax, - TabAlignment alignment, BLOBNBOX_CLIST* boxes); - - // Delete this, but first, repoint all the partners to point to - // replacement. If replacement is nullptr, then partner relationships - // are removed. - void Delete(TabVector* replacement); - - private: - // The bottom of the tab line. - ICOORD startpt_; - // The top of the tab line. - ICOORD endpt_; - // The lowest y that the vector might extend to. - int extended_ymin_; - // The highest y that the vector might extend to. - int extended_ymax_; - // Perpendicular distance of vector from a given vertical for sorting. - int sort_key_; - // Result of Evaluate 0-100. Coverage of line with good boxes. - int percent_score_; - // The mean width of the blobs. Meaningful only for separator lines. - int mean_width_; - // True if the boxes_ list has been modified, so a refit is needed. - bool needs_refit_; - // True if a fit has been done, so re-evaluation is needed. - bool needs_evaluation_; - // True if a separator line intersects at least 2 other lines. - bool intersects_other_lines_; - // The type of this TabVector. - TabAlignment alignment_; - // The list of boxes whose edges are aligned at this TabVector. - BLOBNBOX_CLIST boxes_; - // List of TabVectors that have a connection with this via a text line. - TabVector_CLIST partners_; - // Constraints used to resolve the exact location of the top and bottom - // of the tab line. - TabConstraint_LIST* top_constraints_; - TabConstraint_LIST* bottom_constraints_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_TABVECTOR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/textlineprojection.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/textlineprojection.cpp deleted file mode 100644 index 5d48ff02..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/textlineprojection.cpp +++ /dev/null @@ -1,775 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "textlineprojection.h" -#include "allheaders.h" -#include "bbgrid.h" // Base class. -#include "blobbox.h" // BlobNeighourDir. -#include "blobs.h" -#include "colpartition.h" -#include "normalis.h" - -#include - -// Padding factor to use on definitely oriented blobs -const int kOrientedPadFactor = 8; -// Padding factor to use on not definitely oriented blobs. -const int kDefaultPadFactor = 2; -// Penalty factor for going away from the line center. -const int kWrongWayPenalty = 4; -// Ratio between parallel gap and perpendicular gap used to measure total -// distance of a box from a target box in curved textline space. -// parallel-gap is treated more favorably by this factor to allow catching -// quotes and elipsis at the end of textlines. -const int kParaPerpDistRatio = 4; -// Multiple of scale_factor_ that the inter-line gap must be before we start -// padding the increment box perpendicular to the text line. -const int kMinLineSpacingFactor = 4; -// Maximum tab-stop overrun for horizontal padding, in projection pixels. -const int kMaxTabStopOverrun = 6; - -namespace tesseract { - -TextlineProjection::TextlineProjection(int resolution) - : x_origin_(0), y_origin_(0), pix_(nullptr) { - // The projection map should be about 100 ppi, whatever the input. - scale_factor_ = IntCastRounded(resolution / 100.0); - if (scale_factor_ < 1) scale_factor_ = 1; -} -TextlineProjection::~TextlineProjection() { - pixDestroy(&pix_); -} - -// Build the projection profile given the input_block containing lists of -// blobs, a rotation to convert to image coords, -// and a full-resolution nontext_map, marking out areas to avoid. -// During construction, we have the following assumptions: -// The rotation is a multiple of 90 degrees, ie no deskew yet. -// The blobs have had their left and right rules set to also limit -// the range of projection. -void TextlineProjection::ConstructProjection(TO_BLOCK* input_block, - const FCOORD& rotation, - Pix* nontext_map) { - pixDestroy(&pix_); - TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map)); - x_origin_ = 0; - y_origin_ = image_box.height(); - int width = (image_box.width() + scale_factor_ - 1) / scale_factor_; - int height = (image_box.height() + scale_factor_ - 1) / scale_factor_; - - pix_ = pixCreate(width, height, 8); - ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map); - ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map); - Pix* final_pix = pixBlockconv(pix_, 1, 1); -// Pix* final_pix = pixBlockconv(pix_, 2, 2); - pixDestroy(&pix_); - pix_ = final_pix; -} - -// Display the blobs in the window colored according to textline quality. -void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST* blobs, - ScrollView* win) { -#ifndef GRAPHICS_DISABLED - BLOBNBOX_IT it(blobs); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - const TBOX& box = blob->bounding_box(); - bool bad_box = BoxOutOfHTextline(box, nullptr, false); - if (blob->UniquelyVertical()) - win->Pen(ScrollView::YELLOW); - else - win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE); - win->Rectangle(box.left(), box.bottom(), box.right(), box.top()); - } - win->Update(); -#endif // GRAPHICS_DISABLED -} - -// Moves blobs that look like they don't sit well on a textline from the -// input blobs list to the output small_blobs list. -// This gets them away from initial textline finding to stop diacritics -// from forming incorrect textlines. (Introduced mainly to fix Thai.) -void TextlineProjection::MoveNonTextlineBlobs( - BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const { - BLOBNBOX_IT it(blobs); - BLOBNBOX_IT small_it(small_blobs); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - const TBOX& box = blob->bounding_box(); - bool debug = AlignedBlob::WithinTestRegion(2, box.left(), - box.bottom()); - if (BoxOutOfHTextline(box, nullptr, debug) && !blob->UniquelyVertical()) { - blob->ClearNeighbours(); - small_it.add_to_end(it.extract()); - } - } -} - -// Create a window and display the projection in it. -void TextlineProjection::DisplayProjection() const { -#ifndef GRAPHICS_DISABLED - int width = pixGetWidth(pix_); - int height = pixGetHeight(pix_); - Pix* pixc = pixCreate(width, height, 32); - int src_wpl = pixGetWpl(pix_); - int col_wpl = pixGetWpl(pixc); - uint32_t* src_data = pixGetData(pix_); - uint32_t* col_data = pixGetData(pixc); - for (int y = 0; y < height; ++y, src_data += src_wpl, col_data += col_wpl) { - for (int x = 0; x < width; ++x) { - int pixel = GET_DATA_BYTE(src_data, x); - l_uint32 result; - if (pixel <= 17) - composeRGBPixel(0, 0, pixel * 15, &result); - else if (pixel <= 145) - composeRGBPixel(0, (pixel - 17) * 2, 255, &result); - else - composeRGBPixel((pixel - 145) * 2, 255, 255, &result); - col_data[x] = result; - } - } - ScrollView* win = new ScrollView("Projection", 0, 0, - width, height, width, height); - win->Image(pixc, 0, 0); - win->Update(); - pixDestroy(&pixc); -#endif // GRAPHICS_DISABLED -} - -// Compute the distance of the box from the partition using curved projection -// space. As DistanceOfBoxFromBox, except that the direction is taken from -// the ColPartition and the median bounds of the ColPartition are used as -// the to_box. -int TextlineProjection::DistanceOfBoxFromPartition(const TBOX& box, - const ColPartition& part, - const DENORM* denorm, - bool debug) const { - // Compute a partition box that uses the median top/bottom of the blobs - // within and median left/right for vertical. - TBOX part_box = part.bounding_box(); - if (part.IsHorizontalType()) { - part_box.set_top(part.median_top()); - part_box.set_bottom(part.median_bottom()); - } else { - part_box.set_left(part.median_left()); - part_box.set_right(part.median_right()); - } - // Now use DistanceOfBoxFromBox to make the actual calculation. - return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(), - denorm, debug); -} - -// Compute the distance from the from_box to the to_box using curved -// projection space. Separation that involves a decrease in projection -// density (moving from the from_box to the to_box) is weighted more heavily -// than constant density, and an increase is weighted less. -// If horizontal_textline is true, then curved space is used vertically, -// as for a diacritic on the edge of a textline. -// The projection uses original image coords, so denorm is used to get -// back to the image coords from box/part space. -// How the calculation works: Think of a diacritic near a textline. -// Distance is measured from the far side of the from_box to the near side of -// the to_box. Shown is the horizontal textline case. -// |------^-----| -// | from | box | -// |------|-----| -// perpendicular | -// <------v-------->|--------------------| -// parallel | to box | -// |--------------------| -// Perpendicular distance uses "curved space" See VerticalDistance below. -// Parallel distance is linear. -// Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio. -int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box, - const TBOX& to_box, - bool horizontal_textline, - const DENORM* denorm, - bool debug) const { - // The parallel_gap is the horizontal gap between a horizontal textline and - // the box. Analogous for vertical. - int parallel_gap = 0; - // start_pt is the box end of the line to be modified for curved space. - TPOINT start_pt; - // end_pt is the partition end of the line to be modified for curved space. - TPOINT end_pt; - if (horizontal_textline) { - parallel_gap = from_box.x_gap(to_box) + from_box.width(); - start_pt.x = (from_box.left() + from_box.right()) / 2; - end_pt.x = start_pt.x; - if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) { - start_pt.y = from_box.top(); - end_pt.y = std::min(to_box.top(), start_pt.y); - } else { - start_pt.y = from_box.bottom(); - end_pt.y = std::max(to_box.bottom(), start_pt.y); - } - } else { - parallel_gap = from_box.y_gap(to_box) + from_box.height(); - if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) { - start_pt.x = from_box.right(); - end_pt.x = std::min(to_box.right(), start_pt.x); - } else { - start_pt.x = from_box.left(); - end_pt.x = std::max(to_box.left(), start_pt.x); - } - start_pt.y = (from_box.bottom() + from_box.top()) / 2; - end_pt.y = start_pt.y; - } - // The perpendicular gap is the max vertical distance gap out of: - // top of from_box to to_box top and bottom of from_box to to_box bottom. - // This value is then modified for curved projection space. - // Analogous for vertical. - int perpendicular_gap = 0; - // If start_pt == end_pt, then the from_box lies entirely within the to_box - // (in the perpendicular direction), so we don't need to calculate the - // perpendicular_gap. - if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) { - if (denorm != nullptr) { - // Denormalize the start and end. - denorm->DenormTransform(nullptr, start_pt, &start_pt); - denorm->DenormTransform(nullptr, end_pt, &end_pt); - } - if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) { - perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y, - end_pt.y); - } else { - perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x, - start_pt.y); - } - } - // The parallel_gap weighs less than the perpendicular_gap. - return perpendicular_gap + parallel_gap / kParaPerpDistRatio; -} - -// Compute the distance between (x, y1) and (x, y2) using the rule that -// a decrease in textline density is weighted more heavily than an increase. -// The coordinates are in source image space, ie processed by any denorm -// already, but not yet scaled by scale_factor_. -// Going from the outside of a textline to the inside should measure much -// less distance than going from the inside of a textline to the outside. -// How it works: -// An increase is cheap (getting closer to a textline). -// Constant costs unity. -// A decrease is expensive (getting further from a textline). -// Pixels in projection map Counted distance -// 2 -// 3 1/x -// 3 1 -// 2 x -// 5 1/x -// 7 1/x -// Total: 1 + x + 3/x where x = kWrongWayPenalty. -int TextlineProjection::VerticalDistance(bool debug, int x, - int y1, int y2) const { - x = ImageXToProjectionX(x); - y1 = ImageYToProjectionY(y1); - y2 = ImageYToProjectionY(y2); - if (y1 == y2) return 0; - int wpl = pixGetWpl(pix_); - int step = y1 < y2 ? 1 : -1; - uint32_t* data = pixGetData(pix_) + y1 * wpl; - wpl *= step; - int prev_pixel = GET_DATA_BYTE(data, x); - int distance = 0; - int right_way_steps = 0; - for (int y = y1; y != y2; y += step) { - data += wpl; - int pixel = GET_DATA_BYTE(data, x); - if (debug) - tprintf("At (%d,%d), pix = %d, prev=%d\n", - x, y + step, pixel, prev_pixel); - if (pixel < prev_pixel) - distance += kWrongWayPenalty; - else if (pixel > prev_pixel) - ++right_way_steps; - else - ++distance; - prev_pixel = pixel; - } - return distance * scale_factor_ + - right_way_steps * scale_factor_ / kWrongWayPenalty; -} - -// Compute the distance between (x1, y) and (x2, y) using the rule that -// a decrease in textline density is weighted more heavily than an increase. -int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2, - int y) const { - x1 = ImageXToProjectionX(x1); - x2 = ImageXToProjectionX(x2); - y = ImageYToProjectionY(y); - if (x1 == x2) return 0; - int wpl = pixGetWpl(pix_); - int step = x1 < x2 ? 1 : -1; - uint32_t* data = pixGetData(pix_) + y * wpl; - int prev_pixel = GET_DATA_BYTE(data, x1); - int distance = 0; - int right_way_steps = 0; - for (int x = x1; x != x2; x += step) { - int pixel = GET_DATA_BYTE(data, x + step); - if (debug) - tprintf("At (%d,%d), pix = %d, prev=%d\n", - x + step, y, pixel, prev_pixel); - if (pixel < prev_pixel) - distance += kWrongWayPenalty; - else if (pixel > prev_pixel) - ++right_way_steps; - else - ++distance; - prev_pixel = pixel; - } - return distance * scale_factor_ + - right_way_steps * scale_factor_ / kWrongWayPenalty; -} - -// Returns true if the blob appears to be outside of a textline. -// Such blobs are potentially diacritics (even if large in Thai) and should -// be kept away from initial textline finding. -bool TextlineProjection::BoxOutOfHTextline(const TBOX& box, - const DENORM* denorm, - bool debug) const { - int grad1 = 0; - int grad2 = 0; - EvaluateBoxInternal(box, denorm, debug, &grad1, &grad2, nullptr, nullptr); - int worst_result = std::min(grad1, grad2); - int total_result = grad1 + grad2; - if (total_result >= 6) return false; // Strongly in textline. - // Medium strength: if either gradient is negative, it is likely outside - // the body of the textline. - if (worst_result < 0) - return true; - return false; -} - -// Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below, -// but uses the median top/bottom for horizontal and median left/right for -// vertical instead of the bounding box edges. -// Evaluates for both horizontal and vertical and returns the best result, -// with a positive value for horizontal and a negative value for vertical. -int TextlineProjection::EvaluateColPartition(const ColPartition& part, - const DENORM* denorm, - bool debug) const { - if (part.IsSingleton()) - return EvaluateBox(part.bounding_box(), denorm, debug); - // Test vertical orientation. - TBOX box = part.bounding_box(); - // Use the partition median for left/right. - box.set_left(part.median_left()); - box.set_right(part.median_right()); - int vresult = EvaluateBox(box, denorm, debug); - - // Test horizontal orientation. - box = part.bounding_box(); - // Use the partition median for top/bottom. - box.set_top(part.median_top()); - box.set_bottom(part.median_bottom()); - int hresult = EvaluateBox(box, denorm, debug); - if (debug) { - tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult); - part.bounding_box().print(); - part.Print(); - } - return hresult >= -vresult ? hresult : vresult; -} - -// Computes the mean projection gradients over the horizontal and vertical -// edges of the box: -// -h-h-h-h-h-h -// |------------| mean=htop -v|+v--------+v|-v -// |+h+h+h+h+h+h| -v|+v +v|-v -// | | -v|+v +v|-v -// | box | -v|+v box +v|-v -// | | -v|+v +v|-v -// |+h+h+h+h+h+h| -v|+v +v|-v -// |------------| mean=hbot -v|+v--------+v|-v -// -h-h-h-h-h-h -// mean=vleft mean=vright -// -// Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number -// for a horizontal textline, a negative number for a vertical textline, -// and near zero for undecided. Undecided is most likely non-text. -// All the gradients are truncated to remain non-negative, since negative -// horizontal gradients don't give any indication of being vertical and -// vice versa. -// Additional complexity: The coordinates have to be transformed to original -// image coordinates with denorm (if not null), scaled to match the projection -// pix, and THEN step out 2 pixels each way from the edge to compute the -// gradient, and tries 3 positions, each measuring the gradient over a -// 4-pixel spread: (+3/-1), (+2/-2), (+1/-3). This complexity is handled by -// several layers of helpers below. -int TextlineProjection::EvaluateBox(const TBOX& box, const DENORM* denorm, - bool debug) const { - return EvaluateBoxInternal(box, denorm, debug, nullptr, nullptr, nullptr, nullptr); -} - -// Internal version of EvaluateBox returns the unclipped gradients as well -// as the result of EvaluateBox. -// hgrad1 and hgrad2 are the gradients for the horizontal textline. -int TextlineProjection::EvaluateBoxInternal(const TBOX& box, - const DENORM* denorm, bool debug, - int* hgrad1, int* hgrad2, - int* vgrad1, int* vgrad2) const { - int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(), - box.top(), true); - int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(), - box.bottom(), false); - int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(), - box.top(), true); - int right_gradient = -BestMeanGradientInColumn(denorm, box.right(), - box.bottom(), box.top(), - false); - int top_clipped = std::max(top_gradient, 0); - int bottom_clipped = std::max(bottom_gradient, 0); - int left_clipped = std::max(left_gradient, 0); - int right_clipped = std::max(right_gradient, 0); - if (debug) { - tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:", - top_gradient, bottom_gradient, left_gradient, right_gradient); - box.print(); - } - int result = std::max(top_clipped, bottom_clipped) - - std::max(left_clipped, right_clipped); - if (hgrad1 != nullptr && hgrad2 != nullptr) { - *hgrad1 = top_gradient; - *hgrad2 = bottom_gradient; - } - if (vgrad1 != nullptr && vgrad2 != nullptr) { - *vgrad1 = left_gradient; - *vgrad2 = right_gradient; - } - return result; -} - -// Helper returns the mean gradient value for the horizontal row at the given -// y, (in the external coordinates) by subtracting the mean of the transformed -// row 2 pixels above from the mean of the transformed row 2 pixels below. -// This gives a positive value for a good top edge and negative for bottom. -// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge. -int TextlineProjection::BestMeanGradientInRow(const DENORM* denorm, - int16_t min_x, int16_t max_x, int16_t y, - bool best_is_max) const { - TPOINT start_pt(min_x, y); - TPOINT end_pt(max_x, y); - int upper = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt); - int lower = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt); - int best_gradient = lower - upper; - upper = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt); - lower = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt); - int gradient = lower - upper; - if ((gradient > best_gradient) == best_is_max) - best_gradient = gradient; - upper = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt); - lower = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt); - gradient = lower - upper; - if ((gradient > best_gradient) == best_is_max) - best_gradient = gradient; - return best_gradient; -} - -// Helper returns the mean gradient value for the vertical column at the -// given x, (in the external coordinates) by subtracting the mean of the -// transformed column 2 pixels left from the mean of the transformed column -// 2 pixels to the right. -// This gives a positive value for a good left edge and negative for right. -// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge. -int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, int16_t x, - int16_t min_y, int16_t max_y, - bool best_is_max) const { - TPOINT start_pt(x, min_y); - TPOINT end_pt(x, max_y); - int left = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt); - int right = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt); - int best_gradient = right - left; - left = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt); - right = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt); - int gradient = right - left; - if ((gradient > best_gradient) == best_is_max) - best_gradient = gradient; - left = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt); - right = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt); - gradient = right - left; - if ((gradient > best_gradient) == best_is_max) - best_gradient = gradient; - return best_gradient; -} - -// Helper returns the mean pixel value over the line between the start_pt and -// end_pt (inclusive), but shifted perpendicular to the line in the projection -// image by offset pixels. For simplicity, it is assumed that the vector is -// either nearly horizontal or nearly vertical. It works on skewed textlines! -// The end points are in external coordinates, and will be denormalized with -// the denorm if not nullptr before further conversion to pix coordinates. -// After all the conversions, the offset is added to the direction -// perpendicular to the line direction. The offset is thus in projection image -// coordinates, which allows the caller to get a guaranteed displacement -// between pixels used to calculate gradients. -int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm, - int offset, - TPOINT start_pt, - TPOINT end_pt) const { - TransformToPixCoords(denorm, &start_pt); - TransformToPixCoords(denorm, &end_pt); - TruncateToImageBounds(&start_pt); - TruncateToImageBounds(&end_pt); - int wpl = pixGetWpl(pix_); - uint32_t* data = pixGetData(pix_); - int total = 0; - int count = 0; - int x_delta = end_pt.x - start_pt.x; - int y_delta = end_pt.y - start_pt.y; - if (abs(x_delta) >= abs(y_delta)) { - if (x_delta == 0) - return 0; - // Horizontal line. Add the offset vertically. - int x_step = x_delta > 0 ? 1 : -1; - // Correct offset for rotation, keeping it anti-clockwise of the delta. - offset *= x_step; - start_pt.y += offset; - end_pt.y += offset; - TruncateToImageBounds(&start_pt); - TruncateToImageBounds(&end_pt); - x_delta = end_pt.x - start_pt.x; - y_delta = end_pt.y - start_pt.y; - count = x_delta * x_step + 1; - for (int x = start_pt.x; x != end_pt.x; x += x_step) { - int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta); - total += GET_DATA_BYTE(data + wpl * y, x); - } - } else { - // Vertical line. Add the offset horizontally. - int y_step = y_delta > 0 ? 1 : -1; - // Correct offset for rotation, keeping it anti-clockwise of the delta. - // Pix holds the image with y=0 at the top, so the offset is negated. - offset *= -y_step; - start_pt.x += offset; - end_pt.x += offset; - TruncateToImageBounds(&start_pt); - TruncateToImageBounds(&end_pt); - x_delta = end_pt.x - start_pt.x; - y_delta = end_pt.y - start_pt.y; - count = y_delta * y_step + 1; - for (int y = start_pt.y; y != end_pt.y; y += y_step) { - int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta); - total += GET_DATA_BYTE(data + wpl * y, x); - } - } - return DivRounded(total, count); -} - -// Given an input pix, and a box, the sides of the box are shrunk inwards until -// they bound any black pixels found within the original box. -// The function converts between tesseract coords and the pix coords assuming -// that this pix is full resolution equal in size to the original image. -// Returns an empty box if there are no black pixels in the source box. -static TBOX BoundsWithinBox(Pix* pix, const TBOX& box) { - int im_height = pixGetHeight(pix); - Box* input_box = boxCreate(box.left(), im_height - box.top(), - box.width(), box.height()); - Box* output_box = nullptr; - pixClipBoxToForeground(pix, input_box, nullptr, &output_box); - TBOX result_box; - if (output_box != nullptr) { - l_int32 x, y, width, height; - boxGetGeometry(output_box, &x, &y, &width, &height); - result_box.set_left(x); - result_box.set_right(x + width); - result_box.set_top(im_height - y); - result_box.set_bottom(result_box.top() - height); - boxDestroy(&output_box); - } - boxDestroy(&input_box); - return result_box; -} - -// Splits the given box in half at x_middle or y_middle according to split_on_x -// and checks for nontext_map pixels in each half. Reduces the bbox so that it -// still includes the middle point, but does not touch any fg pixels in -// nontext_map. An empty box may be returned if there is no such box. -static void TruncateBoxToMissNonText(int x_middle, int y_middle, - bool split_on_x, Pix* nontext_map, - TBOX* bbox) { - TBOX box1(*bbox); - TBOX box2(*bbox); - TBOX im_box; - if (split_on_x) { - box1.set_right(x_middle); - im_box = BoundsWithinBox(nontext_map, box1); - if (!im_box.null_box()) box1.set_left(im_box.right()); - box2.set_left(x_middle); - im_box = BoundsWithinBox(nontext_map, box2); - if (!im_box.null_box()) box2.set_right(im_box.left()); - } else { - box1.set_bottom(y_middle); - im_box = BoundsWithinBox(nontext_map, box1); - if (!im_box.null_box()) box1.set_top(im_box.bottom()); - box2.set_top(y_middle); - im_box = BoundsWithinBox(nontext_map, box2); - if (!im_box.null_box()) box2.set_bottom(im_box.top()); - } - box1 += box2; - *bbox = box1; -} - - -// Helper function to add 1 to a rectangle in source image coords to the -// internal projection pix_. -void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) { - int scaled_left = ImageXToProjectionX(box.left()); - int scaled_top = ImageYToProjectionY(box.top()); - int scaled_right = ImageXToProjectionX(box.right()); - int scaled_bottom = ImageYToProjectionY(box.bottom()); - int wpl = pixGetWpl(pix_); - uint32_t* data = pixGetData(pix_) + scaled_top * wpl; - for (int y = scaled_top; y <= scaled_bottom; ++y) { - for (int x = scaled_left; x <= scaled_right; ++x) { - int pixel = GET_DATA_BYTE(data, x); - if (pixel < 255) - SET_DATA_BYTE(data, x, pixel + 1); - } - data += wpl; - } -} - -// Inserts a list of blobs into the projection. -// Rotation is a multiple of 90 degrees to get from blob coords to -// nontext_map coords, nontext_map_box is the bounds of the nontext_map. -// Blobs are spread horizontally or vertically according to their internal -// flags, but the spreading is truncated by set pixels in the nontext_map -// and also by the horizontal rule line limits on the blobs. -void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs, - const FCOORD& rotation, - const TBOX& nontext_map_box, - Pix* nontext_map) { - BLOBNBOX_IT blob_it(blobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.data(); - TBOX bbox = blob->bounding_box(); - ICOORD middle((bbox.left() + bbox.right()) / 2, - (bbox.bottom() + bbox.top()) / 2); - bool spreading_horizontally = PadBlobBox(blob, &bbox); - // Rotate to match the nontext_map. - bbox.rotate(rotation); - middle.rotate(rotation); - if (rotation.x() == 0.0f) - spreading_horizontally = !spreading_horizontally; - // Clip to the image before applying the increments. - bbox &= nontext_map_box; // This is in-place box intersection. - // Check for image pixels before spreading. - TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally, - nontext_map, &bbox); - if (bbox.area() > 0) { - IncrementRectangle8Bit(bbox); - } - } -} - -// Pads the bounding box of the given blob according to whether it is on -// a horizontal or vertical text line, taking into account tab-stops near -// the blob. Returns true if padding was in the horizontal direction. -bool TextlineProjection::PadBlobBox(BLOBNBOX* blob, TBOX* bbox) { - // Determine which direction to spread. - // If text is well spaced out, it can be useful to pad perpendicular to - // the textline direction, so as to ensure diacritics get absorbed - // correctly, but if the text is tightly spaced, this will destroy the - // blank space between textlines in the projection map, and that would - // be very bad. - int pad_limit = scale_factor_ * kMinLineSpacingFactor; - int xpad = 0; - int ypad = 0; - bool padding_horizontally = false; - if (blob->UniquelyHorizontal()) { - xpad = bbox->height() * kOrientedPadFactor; - padding_horizontally = true; - // If the text appears to be very well spaced, pad the other direction by a - // single pixel in the projection profile space to help join diacritics to - // the textline. - if ((blob->neighbour(BND_ABOVE) == nullptr || - bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) && - (blob->neighbour(BND_BELOW) == nullptr || - bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) { - ypad = scale_factor_; - } - } else if (blob->UniquelyVertical()) { - ypad = bbox->width() * kOrientedPadFactor; - if ((blob->neighbour(BND_LEFT) == nullptr || - bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) && - (blob->neighbour(BND_RIGHT) == nullptr || - bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) { - xpad = scale_factor_; - } - } else { - if ((blob->neighbour(BND_ABOVE) != nullptr && - blob->neighbour(BND_ABOVE)->neighbour(BND_BELOW) == blob) || - (blob->neighbour(BND_BELOW) != nullptr && - blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) { - ypad = bbox->width() * kDefaultPadFactor; - } - if ((blob->neighbour(BND_RIGHT) != nullptr && - blob->neighbour(BND_RIGHT)->neighbour(BND_LEFT) == blob) || - (blob->neighbour(BND_LEFT) != nullptr && - blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) { - xpad = bbox->height() * kDefaultPadFactor; - padding_horizontally = true; - } - } - bbox->pad(xpad, ypad); - pad_limit = scale_factor_ * kMaxTabStopOverrun; - // Now shrink horizontally to avoid stepping more than pad_limit over a - // tab-stop. - if (bbox->left() < blob->left_rule() - pad_limit) { - bbox->set_left(blob->left_rule() - pad_limit); - } - if (bbox->right() > blob->right_rule() + pad_limit) { - bbox->set_right(blob->right_rule() + pad_limit); - } - return padding_horizontally; -} - -// Helper denormalizes the TPOINT with the denorm if not nullptr, then -// converts to pix_ coordinates. -void TextlineProjection::TransformToPixCoords(const DENORM* denorm, - TPOINT* pt) const { - if (denorm != nullptr) { - // Denormalize the point. - denorm->DenormTransform(nullptr, *pt, pt); - } - pt->x = ImageXToProjectionX(pt->x); - pt->y = ImageYToProjectionY(pt->y); -} - -#ifdef _MSC_VER -#pragma optimize("g", off) -#endif // _MSC_VER -// Helper truncates the TPOINT to be within the pix_. -void TextlineProjection::TruncateToImageBounds(TPOINT* pt) const { - pt->x = ClipToRange(pt->x, 0, pixGetWidth(pix_) - 1); - pt->y = ClipToRange(pt->y, 0, pixGetHeight(pix_) - 1); -} -#ifdef _MSC_VER -#pragma optimize("", on) -#endif // _MSC_VER - -// Transform tesseract image coordinates to coordinates used in the projection. -int TextlineProjection::ImageXToProjectionX(int x) const { - x = ClipToRange((x - x_origin_) / scale_factor_, 0, pixGetWidth(pix_) - 1); - return x; -} -int TextlineProjection::ImageYToProjectionY(int y) const { - y = ClipToRange((y_origin_ - y) / scale_factor_, 0, pixGetHeight(pix_) - 1); - return y; -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/textlineprojection.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/textlineprojection.h deleted file mode 100644 index c91569b8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/textlineprojection.h +++ /dev/null @@ -1,206 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_ -#define TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_ - -#include "blobgrid.h" // For BlobGrid - -class DENORM; -struct Pix; -struct TPOINT; - -namespace tesseract { - -class ColPartition; - -// Simple class to encapsulate the computation of an image representing -// local textline density, and function(s) to make use of it. -// The underlying principle is that if you smear connected components -// horizontally (vertically for components on a vertically written textline) -// and count the number of smeared components in an image, then the resulting -// image shows the density of the textlines at each image position. -class TextlineProjection { - public: - // The down-scaling factor is computed to obtain a projection resolution - // of about 100 dpi, whatever the input. - explicit TextlineProjection(int resolution); - ~TextlineProjection(); - - // Build the projection profile given the input_block containing lists of - // blobs, a rotation to convert to image coords, - // and a full-resolution nontext_map, marking out areas to avoid. - // During construction, we have the following assumptions: - // The rotation is a multiple of 90 degrees, ie no deskew yet. - // The blobs have had their left and right rules set to also limit - // the range of projection. - void ConstructProjection(TO_BLOCK* input_block, - const FCOORD& rotation, Pix* nontext_map); - - // Display the blobs in the window colored according to textline quality. - void PlotGradedBlobs(BLOBNBOX_LIST* blobs, ScrollView* win); - - // Moves blobs that look like they don't sit well on a textline from the - // input blobs list to the output small_blobs list. - // This gets them away from initial textline finding to stop diacritics - // from forming incorrect textlines. (Introduced mainly to fix Thai.) - void MoveNonTextlineBlobs(BLOBNBOX_LIST* blobs, - BLOBNBOX_LIST* small_blobs) const; - - // Create a window and display the projection in it. - void DisplayProjection() const; - - // Compute the distance of the box from the partition using curved projection - // space. As DistanceOfBoxFromBox, except that the direction is taken from - // the ColPartition and the median bounds of the ColPartition are used as - // the to_box. - int DistanceOfBoxFromPartition(const TBOX& box, const ColPartition& part, - const DENORM* denorm, bool debug) const; - - // Compute the distance from the from_box to the to_box using curved - // projection space. Separation that involves a decrease in projection - // density (moving from the from_box to the to_box) is weighted more heavily - // than constant density, and an increase is weighted less. - // If horizontal_textline is true, then curved space is used vertically, - // as for a diacritic on the edge of a textline. - // The projection uses original image coords, so denorm is used to get - // back to the image coords from box/part space. - int DistanceOfBoxFromBox(const TBOX& from_box, const TBOX& to_box, - bool horizontal_textline, - const DENORM* denorm, bool debug) const; - - // Compute the distance between (x, y1) and (x, y2) using the rule that - // a decrease in textline density is weighted more heavily than an increase. - // The coordinates are in source image space, ie processed by any denorm - // already, but not yet scaled by scale_factor_. - // Going from the outside of a textline to the inside should measure much - // less distance than going from the inside of a textline to the outside. - int VerticalDistance(bool debug, int x, int y1, int y2) const; - - // Compute the distance between (x1, y) and (x2, y) using the rule that - // a decrease in textline density is weighted more heavily than an increase. - int HorizontalDistance(bool debug, int x1, int x2, int y) const; - - // Returns true if the blob appears to be outside of a horizontal textline. - // Such blobs are potentially diacritics (even if large in Thai) and should - // be kept away from initial textline finding. - bool BoxOutOfHTextline(const TBOX& box, const DENORM* denorm, - bool debug) const; - - // Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below, - // but uses the median top/bottom for horizontal and median left/right for - // vertical instead of the bounding box edges. - // Evaluates for both horizontal and vertical and returns the best result, - // with a positive value for horizontal and a negative value for vertical. - int EvaluateColPartition(const ColPartition& part, const DENORM* denorm, - bool debug) const; - - // Computes the mean projection gradients over the horizontal and vertical - // edges of the box: - // -h-h-h-h-h-h - // |------------| mean=htop -v|+v--------+v|-v - // |+h+h+h+h+h+h| -v|+v +v|-v - // | | -v|+v +v|-v - // | box | -v|+v box +v|-v - // | | -v|+v +v|-v - // |+h+h+h+h+h+h| -v|+v +v|-v - // |------------| mean=hbot -v|+v--------+v|-v - // -h-h-h-h-h-h - // mean=vleft mean=vright - // - // Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number - // for a horizontal textline, a negative number for a vertical textline, - // and near zero for undecided. Undecided is most likely non-text. - int EvaluateBox(const TBOX& box, const DENORM* denorm, bool debug) const; - - private: - // Internal version of EvaluateBox returns the unclipped gradients as well - // as the result of EvaluateBox. - // hgrad1 and hgrad2 are the gradients for the horizontal textline. - int EvaluateBoxInternal(const TBOX& box, const DENORM* denorm, bool debug, - int* hgrad1, int* hgrad2, - int* vgrad1, int* vgrad2) const; - - // Helper returns the mean gradient value for the horizontal row at the given - // y, (in the external coordinates) by subtracting the mean of the transformed - // row 2 pixels above from the mean of the transformed row 2 pixels below. - // This gives a positive value for a good top edge and negative for bottom. - // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge. - int BestMeanGradientInRow(const DENORM* denorm, int16_t min_x, int16_t max_x, - int16_t y, bool best_is_max) const; - - // Helper returns the mean gradient value for the vertical column at the - // given x, (in the external coordinates) by subtracting the mean of the - // transformed column 2 pixels left from the mean of the transformed column - // 2 pixels to the right. - // This gives a positive value for a good left edge and negative for right. - // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge. - int BestMeanGradientInColumn(const DENORM* denorm, int16_t x, int16_t min_y, - int16_t max_y, bool best_is_max) const; - - // Helper returns the mean pixel value over the line between the start_pt and - // end_pt (inclusive), but shifted perpendicular to the line in the projection - // image by offset pixels. For simplicity, it is assumed that the vector is - // either nearly horizontal or nearly vertical. It works on skewed textlines! - // The end points are in external coordinates, and will be denormalized with - // the denorm if not nullptr before further conversion to pix coordinates. - // After all the conversions, the offset is added to the direction - // perpendicular to the line direction. The offset is thus in projection image - // coordinates, which allows the caller to get a guaranteed displacement - // between pixels used to calculate gradients. - int MeanPixelsInLineSegment(const DENORM* denorm, int offset, - TPOINT start_pt, TPOINT end_pt) const; - - // Helper function to add 1 to a rectangle in source image coords to the - // internal projection pix_. - void IncrementRectangle8Bit(const TBOX& box); - // Inserts a list of blobs into the projection. - // Rotation is a multiple of 90 degrees to get from blob coords to - // nontext_map coords, image_box is the bounds of the nontext_map. - // Blobs are spread horizontally or vertically according to their internal - // flags, but the spreading is truncated by set pixels in the nontext_map - // and also by the horizontal rule line limits on the blobs. - void ProjectBlobs(BLOBNBOX_LIST* blobs, const FCOORD& rotation, - const TBOX& image_box, Pix* nontext_map); - // Pads the bounding box of the given blob according to whether it is on - // a horizontal or vertical text line, taking into account tab-stops near - // the blob. Returns true if padding was in the horizontal direction. - bool PadBlobBox(BLOBNBOX* blob, TBOX* bbox); - - // Helper denormalizes the TPOINT with the denorm if not nullptr, then - // converts to pix_ coordinates. - void TransformToPixCoords(const DENORM* denorm, TPOINT* pt) const; - - // Helper truncates the TPOINT to be within the pix_. - void TruncateToImageBounds(TPOINT* pt) const; - - // Transform tesseract coordinates to coordinates used in the pix. - int ImageXToProjectionX(int x) const; - int ImageYToProjectionY(int y) const; - - // The down-sampling scale factor used in building the image. - int scale_factor_; - // The blob coordinates of the top-left (origin of the pix_) in tesseract - // coordinates. Used to transform the bottom-up tesseract coordinates to - // the top-down coordinates of the pix. - int x_origin_; - int y_origin_; - // The image of horizontally smeared blob boxes summed to provide a - // textline density map. As with a horizontal projection, the map has - // dips in the gaps between textlines. - Pix* pix_; -}; - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/textord.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/textord.cpp deleted file mode 100644 index c8ffe98c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/textord.cpp +++ /dev/null @@ -1,353 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: textord.cpp -// Description: The top-level text line and word finding functionality. -// Author: Ray Smith -// Created: Fri Mar 13 14:43:01 PDT 2009 -// -// (C) Copyright 2009, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "baselinedetect.h" -#include "drawtord.h" -#include "textord.h" -#include "makerow.h" -#include "pageres.h" -#include "tordmain.h" -#include "wordseg.h" - -namespace tesseract { - -Textord::Textord(CCStruct* ccstruct) - : ccstruct_(ccstruct), - use_cjk_fp_model_(false), - // makerow.cpp /////////////////////////////////////////// - BOOL_MEMBER(textord_single_height_mode, false, - "Script has no xheight, so use a single mode", - ccstruct_->params()), - // tospace.cpp /////////////////////////////////////////// - BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?", - ccstruct_->params()), - BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false, - "Constrain relative values of inter and intra-word gaps for " - "old_to_method.", - ccstruct_->params()), - BOOL_MEMBER(tosp_only_use_prop_rows, true, - "Block stats to use fixed pitch rows?", ccstruct_->params()), - BOOL_MEMBER(tosp_force_wordbreak_on_punct, false, - "Force word breaks on punct to break long lines in non-space " - "delimited langs", - ccstruct_->params()), - BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?", - ccstruct_->params()), - BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code", - ccstruct_->params()), - BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces", - ccstruct_->params()), - BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces", - ccstruct_->params()), - BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces", - ccstruct_->params()), - BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces", - ccstruct_->params()), - BOOL_MEMBER(tosp_recovery_isolated_row_stats, true, - "Use row alone when inadequate cert spaces", - ccstruct_->params()), - BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess", - ccstruct_->params()), - BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?", - ccstruct_->params()), - BOOL_MEMBER(tosp_fuzzy_limit_all, true, - "Don't restrict kn->sp fuzzy limit to tables", - ccstruct_->params()), - BOOL_MEMBER(tosp_stats_use_xht_gaps, true, - "Use within xht gap for wd breaks", ccstruct_->params()), - BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks", - ccstruct_->params()), - BOOL_MEMBER(tosp_only_use_xht_gaps, false, - "Only use within xht gap for wd breaks", ccstruct_->params()), - BOOL_MEMBER(tosp_rule_9_test_punct, false, - "Don't chng kn to space next to punct", ccstruct_->params()), - BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip", - ccstruct_->params()), - BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip", - ccstruct_->params()), - BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic", - ccstruct_->params()), - INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params()), - INT_MEMBER(tosp_enough_space_samples_for_median, 3, - "or should we use mean", ccstruct_->params()), - INT_MEMBER(tosp_redo_kern_limit, 10, - "No.samples reqd to reestimate for row", ccstruct_->params()), - INT_MEMBER(tosp_few_samples, 40, - "No.gaps reqd with 1 large gap to treat as a table", - ccstruct_->params()), - INT_MEMBER(tosp_short_row, 20, - "No.gaps reqd with few cert spaces to use certs", - ccstruct_->params()), - INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly", - ccstruct_->params()), - double_MEMBER(tosp_old_sp_kn_th_factor, 2.0, - "Factor for defining space threshold in terms of space and " - "kern sizes", - ccstruct_->params()), - double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?", - ccstruct_->params()), - double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?", - ccstruct_->params()), - double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow", - ccstruct_->params()), - double_MEMBER(tosp_narrow_aspect_ratio, 0.48, - "narrow if w/h less than this", ccstruct_->params()), - double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide", - ccstruct_->params()), - double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this", - ccstruct_->params()), - double_MEMBER(tosp_fuzzy_space_factor, 0.6, - "Fract of xheight for fuzz sp", ccstruct_->params()), - double_MEMBER(tosp_fuzzy_space_factor1, 0.5, - "Fract of xheight for fuzz sp", ccstruct_->params()), - double_MEMBER(tosp_fuzzy_space_factor2, 0.72, - "Fract of xheight for fuzz sp", ccstruct_->params()), - double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern", - ccstruct_->params()), - double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp", - ccstruct_->params()), - double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp", - ccstruct_->params()), - double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp", - ccstruct_->params()), - double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier", - ccstruct_->params()), - double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier", - ccstruct_->params()), - double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space", - ccstruct_->params()), - double_MEMBER(tosp_enough_small_gaps, 0.65, - "Fract of kerns reqd for isolated row stats", - ccstruct_->params()), - double_MEMBER(tosp_table_kn_sp_ratio, 2.25, - "Min difference of kn & sp in table", ccstruct_->params()), - double_MEMBER(tosp_table_xht_sp_ratio, 0.33, - "Expect spaces bigger than this", ccstruct_->params()), - double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0, - "Fuzzy if less than this", ccstruct_->params()), - double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg", - ccstruct_->params()), - double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg", - ccstruct_->params()), - double_MEMBER(tosp_min_sane_kn_sp, 1.5, - "Don't trust spaces less than this time kn", - ccstruct_->params()), - double_MEMBER(tosp_init_guess_kn_mult, 2.2, - "Thresh guess - mult kn by this", ccstruct_->params()), - double_MEMBER(tosp_init_guess_xht_mult, 0.28, - "Thresh guess - mult xht by this", ccstruct_->params()), - double_MEMBER(tosp_max_sane_kn_thresh, 5.0, - "Multiplier on kn to limit thresh", ccstruct_->params()), - double_MEMBER(tosp_flip_caution, 0.0, - "Don't autoflip kn to sp when large separation", - ccstruct_->params()), - double_MEMBER(tosp_large_kerning, 0.19, - "Limit use of xht gap with large kns", ccstruct_->params()), - double_MEMBER(tosp_dont_fool_with_small_kerns, -1, - "Limit use of xht gap with odd small kns", - ccstruct_->params()), - double_MEMBER(tosp_near_lh_edge, 0, - "Don't reduce box if the top left is non blank", - ccstruct_->params()), - double_MEMBER(tosp_silly_kn_sp_gap, 0.2, - "Don't let sp minus kn get too small", ccstruct_->params()), - double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75, - "How wide fuzzies need context", ccstruct_->params()), - // tordmain.cpp /////////////////////////////////////////// - BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs", - ccstruct_->params()), - BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs", - ccstruct_->params()), - BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs", - ccstruct_->params()), - INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise", - ccstruct_->params()), - INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level", - ccstruct_->params()), - double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs", - ccstruct_->params()), - double_MEMBER(textord_noise_area_ratio, 0.7, - "Fraction of bounding box for noise", ccstruct_->params()), - double_MEMBER(textord_blob_size_smallile, 20, - "Percentile for small blobs", ccstruct_->params()), - double_MEMBER(textord_initialx_ile, 0.75, - "Ile of sizes for xheight guess", ccstruct_->params()), - double_MEMBER(textord_initialasc_ile, 0.90, - "Ile of sizes for xheight guess", ccstruct_->params()), - INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima", - ccstruct_->params()), - double_MEMBER(textord_noise_sizelimit, 0.5, - "Fraction of x for big t count", ccstruct_->params()), - INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob", - ccstruct_->params()), - double_MEMBER(textord_noise_normratio, 2.0, - "Dot to norm ratio for deletion", ccstruct_->params()), - BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words", - ccstruct_->params()), - BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows", - ccstruct_->params()), - double_MEMBER(textord_noise_syfract, 0.2, - "xh fract height error for norm blobs", - ccstruct_->params()), - double_MEMBER(textord_noise_sxfract, 0.4, - "xh fract width error for norm blobs", ccstruct_->params()), - double_MEMBER(textord_noise_hfract, 1.0 / 64, - "Height fraction to discard outlines as speckle noise", - ccstruct_->params()), - INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row", - ccstruct_->params()), - double_MEMBER(textord_noise_rowratio, 6.0, - "Dot to norm ratio for deletion", ccstruct_->params()), - BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector", - ccstruct_->params()), - double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift", - ccstruct_->params()), - double_MEMBER(textord_blshift_xfraction, 9.99, - "Min size of baseline shift", ccstruct_->params()) {} - -// Make the textlines and words inside each block. -void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew, - int width, int height, Pix* binary_pix, - Pix* thresholds_pix, Pix* grey_pix, - bool use_box_bottoms, BLOBNBOX_LIST* diacritic_blobs, - BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) { - page_tr_.set_x(width); - page_tr_.set_y(height); - if (to_blocks->empty()) { - // AutoPageSeg was not used, so we need to find_components first. - find_components(binary_pix, blocks, to_blocks); - TO_BLOCK_IT it(to_blocks); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TO_BLOCK* to_block = it.data(); - // Compute the edge offsets whether or not there is a grey_pix. - // We have by-passed auto page seg, so we have to run it here. - // By page segmentation mode there is no non-text to avoid running on. - to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix); - } - } else if (!PSM_SPARSE(pageseg_mode)) { - // AutoPageSeg does not need to find_components as it did that already. - // Filter_blobs sets up the TO_BLOCKs the same as find_components does. - filter_blobs(page_tr_, to_blocks, true); - } - - ASSERT_HOST(!to_blocks->empty()); - if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) { - const FCOORD anticlockwise90(0.0f, 1.0f); - const FCOORD clockwise90(0.0f, -1.0f); - TO_BLOCK_IT it(to_blocks); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TO_BLOCK* to_block = it.data(); - BLOCK* block = to_block->block; - // Create a fake poly_block in block from its bounding box. - block->pdblk.set_poly_block(new POLY_BLOCK(block->pdblk.bounding_box(), - PT_VERTICAL_TEXT)); - // Rotate the to_block along with its contained block and blobnbox lists. - to_block->rotate(anticlockwise90); - // Set the block's rotation values to obey the convention followed in - // layout analysis for vertical text. - block->set_re_rotation(clockwise90); - block->set_classify_rotation(clockwise90); - } - } - - TO_BLOCK_IT to_block_it(to_blocks); - TO_BLOCK* to_block = to_block_it.data(); - // Make the rows in the block. - float gradient; - // Do it the old fashioned way. - if (PSM_LINE_FIND_ENABLED(pageseg_mode)) { - gradient = make_rows(page_tr_, to_blocks); - } else if (!PSM_SPARSE(pageseg_mode)) { - // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row. - gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, - to_block, to_blocks); - } else { - gradient = 0.0f; - } - BaselineDetect baseline_detector(textord_baseline_debug, - reskew, to_blocks); - baseline_detector.ComputeStraightBaselines(use_box_bottoms); - baseline_detector.ComputeBaselineSplinesAndXheights( - page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr, - textord_show_final_rows, this); - // Now make the words in the lines. - if (PSM_WORD_FIND_ENABLED(pageseg_mode)) { - // SINGLE_LINE uses the old word maker on the single line. - make_words(this, page_tr_, gradient, blocks, to_blocks); - } else { - // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a - // single word, and in SINGLE_CHAR mode, all the outlines - // go in a single blob. - TO_BLOCK* to_block = to_block_it.data(); - make_single_word(pageseg_mode == PSM_SINGLE_CHAR, - to_block->get_rows(), to_block->block->row_list()); - } - // Remove empties. - cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks); - TransferDiacriticsToBlockGroups(diacritic_blobs, blocks); - // Compute the margins for each row in the block, to be used later for - // paragraph detection. - BLOCK_IT b_it(blocks); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - b_it.data()->compute_row_margins(); - } -#ifndef GRAPHICS_DISABLED - close_to_win(); -#endif -} - -// If we were supposed to return only a single textline, and there is more -// than one, clean up and leave only the best. -void Textord::CleanupSingleRowResult(PageSegMode pageseg_mode, - PAGE_RES* page_res) { - if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode)) - return; // No cleanup required. - PAGE_RES_IT it(page_res); - // Find the best row, being the greatest mean word conf. - float row_total_conf = 0.0f; - int row_word_count = 0; - ROW_RES* best_row = nullptr; - float best_conf = 0.0f; - for (it.restart_page(); it.word() != nullptr; it.forward()) { - WERD_RES* word = it.word(); - row_total_conf += word->best_choice->certainty(); - ++row_word_count; - if (it.next_row() != it.row()) { - row_total_conf /= row_word_count; - if (best_row == nullptr || best_conf < row_total_conf) { - best_row = it.row(); - best_conf = row_total_conf; - } - row_total_conf = 0.0f; - row_word_count = 0; - } - } - // Now eliminate any word not in the best row. - for (it.restart_page(); it.word() != nullptr; it.forward()) { - if (it.row() != best_row) - it.DeleteCurrentWord(); - } -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/textord.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/textord.h deleted file mode 100644 index d798609d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/textord.h +++ /dev/null @@ -1,403 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: textord.h -// Description: The Textord class definition gathers text line and word -// finding functionality. -// Author: Ray Smith -// Created: Fri Mar 13 14:29:01 PDT 2009 -// -// (C) Copyright 2009, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_TEXTORD_H_ -#define TESSERACT_TEXTORD_TEXTORD_H_ - -#include "ccstruct.h" -#include "bbgrid.h" -#include "blobbox.h" -#include "gap_map.h" -#include "publictypes.h" // For PageSegMode. - -class FCOORD; -class BLOCK_LIST; -class PAGE_RES; -class TO_BLOCK; -class TO_BLOCK_LIST; -class ScrollView; - -namespace tesseract { - -// A simple class that can be used by BBGrid to hold a word and an expanded -// bounding box that makes it easy to find words to put diacritics. -class WordWithBox { - public: - WordWithBox() : word_(nullptr) {} - explicit WordWithBox(WERD *word) - : word_(word), bounding_box_(word->bounding_box()) { - int height = bounding_box_.height(); - bounding_box_.pad(height, height); - } - - const TBOX &bounding_box() const { return bounding_box_; } - // Returns the bounding box of only the good blobs. - TBOX true_bounding_box() const { return word_->true_bounding_box(); } - C_BLOB_LIST *RejBlobs() const { return word_->rej_cblob_list(); } - const WERD *word() const { return word_; } - - private: - // Borrowed pointer to a real word somewhere that must outlive this class. - WERD *word_; - // Cached expanded bounding box of the word, padded all round by its height. - TBOX bounding_box_; -}; - -// Make it usable by BBGrid. -CLISTIZEH(WordWithBox) -using WordGrid = BBGrid; -using WordSearch = GridSearch; - -class Textord { - public: - explicit Textord(CCStruct* ccstruct); - ~Textord() = default; - - // Make the textlines and words inside each block. - // binary_pix is mandatory and is the binarized input after line removal. - // grey_pix is optional, but if present must match the binary_pix in size, - // and must be a *real* grey image instead of binary_pix * 255. - // thresholds_pix is expected to be present iff grey_pix is present and - // can be an integer factor reduction of the grey_pix. It represents the - // thresholds that were used to create the binary_pix from the grey_pix. - // diacritic_blobs contain small confusing components that should be added - // to the appropriate word(s) in case they are really diacritics. - void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, - int height, Pix *binary_pix, Pix *thresholds_pix, - Pix *grey_pix, bool use_box_bottoms, - BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, - TO_BLOCK_LIST *to_blocks); - - // If we were supposed to return only a single textline, and there is more - // than one, clean up and leave only the best. - void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES* page_res); - - bool use_cjk_fp_model() const { - return use_cjk_fp_model_; - } - void set_use_cjk_fp_model(bool flag) { - use_cjk_fp_model_ = flag; - } - - // tospace.cpp /////////////////////////////////////////// - void to_spacing( - ICOORD page_tr, //topright of page - TO_BLOCK_LIST *blocks //blocks on page - ); - ROW *make_prop_words(TO_ROW *row, // row to make - FCOORD rotation // for drawing - ); - ROW *make_blob_words(TO_ROW *row, // row to make - FCOORD rotation // for drawing - ); - // tordmain.cpp /////////////////////////////////////////// - void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks); - void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST* blocks, bool testing_on); - - private: - // For underlying memory management and other utilities. - CCStruct* ccstruct_; - - // The size of the input image. - ICOORD page_tr_; - - bool use_cjk_fp_model_; - - // makerow.cpp /////////////////////////////////////////// - // Make the textlines inside each block. - void MakeRows(PageSegMode pageseg_mode, const FCOORD& skew, - int width, int height, TO_BLOCK_LIST* to_blocks); - // Make the textlines inside a single block. - void MakeBlockRows(int min_spacing, int max_spacing, - const FCOORD& skew, TO_BLOCK* block, - ScrollView* win); - - public: - void compute_block_xheight(TO_BLOCK *block, float gradient); - void compute_row_xheight(TO_ROW *row, // row to do - const FCOORD& rotation, - float gradient, // global skew - int block_line_size); - void make_spline_rows(TO_BLOCK* block, // block to do - float gradient, // gradient to fit - bool testing_on); - private: - //// oldbasel.cpp //////////////////////////////////////// - void make_old_baselines(TO_BLOCK* block, // block to do - bool testing_on, // correct orientation - float gradient); - void correlate_lines(TO_BLOCK *block, float gradient); - void correlate_neighbours(TO_BLOCK *block, // block rows are in. - TO_ROW **rows, // rows of block. - int rowcount); // no of rows to do. - int correlate_with_stats(TO_ROW **rows, // rows of block. - int rowcount, // no of rows to do. - TO_BLOCK* block); - void find_textlines(TO_BLOCK *block, // block row is in - TO_ROW *row, // row to do - int degree, // required approximation - QSPLINE *spline); // starting spline - // tospace.cpp /////////////////////////////////////////// - //DEBUG USE ONLY - void block_spacing_stats(TO_BLOCK* block, - GAPMAP* gapmap, - bool& old_text_ord_proportional, - //resulting estimate - int16_t& block_space_gap_width, - //resulting estimate - int16_t& block_non_space_gap_width - ); - void row_spacing_stats(TO_ROW *row, - GAPMAP *gapmap, - int16_t block_idx, - int16_t row_idx, - //estimate for block - int16_t block_space_gap_width, - //estimate for block - int16_t block_non_space_gap_width - ); - void old_to_method(TO_ROW *row, - STATS *all_gap_stats, - STATS *space_gap_stats, - STATS *small_gap_stats, - int16_t block_space_gap_width, - //estimate for block - int16_t block_non_space_gap_width - ); - bool isolated_row_stats(TO_ROW* row, - GAPMAP* gapmap, - STATS* all_gap_stats, - bool suspected_table, - int16_t block_idx, - int16_t row_idx); - int16_t stats_count_under(STATS *stats, int16_t threshold); - void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats); - bool make_a_word_break(TO_ROW* row, // row being made - TBOX blob_box, // for next_blob // how many blanks? - int16_t prev_gap, - TBOX prev_blob_box, - int16_t real_current_gap, - int16_t within_xht_current_gap, - TBOX next_blob_box, - int16_t next_gap, - uint8_t& blanks, - bool& fuzzy_sp, - bool& fuzzy_non, - bool& prev_gap_was_a_space, - bool& break_at_next_gap); - bool narrow_blob(TO_ROW* row, TBOX blob_box); - bool wide_blob(TO_ROW* row, TBOX blob_box); - bool suspected_punct_blob(TO_ROW* row, TBOX box); - void peek_at_next_gap(TO_ROW *row, - BLOBNBOX_IT box_it, - TBOX &next_blob_box, - int16_t &next_gap, - int16_t &next_within_xht_gap); - void mark_gap(TBOX blob, //blob following gap - int16_t rule, // heuristic id - int16_t prev_gap, - int16_t prev_blob_width, - int16_t current_gap, - int16_t next_blob_width, - int16_t next_gap); - float find_mean_blob_spacing(WERD *word); - bool ignore_big_gap(TO_ROW* row, - int32_t row_length, - GAPMAP* gapmap, - int16_t left, - int16_t right); - //get bounding box - TBOX reduced_box_next(TO_ROW *row, //current row - BLOBNBOX_IT *it //iterator to blobds - ); - TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, int16_t *left_above_xht); - // tordmain.cpp /////////////////////////////////////////// - float filter_noise_blobs(BLOBNBOX_LIST *src_list, - BLOBNBOX_LIST *noise_list, - BLOBNBOX_LIST *small_list, - BLOBNBOX_LIST *large_list); - // Fixes the block so it obeys all the rules: - // Must have at least one ROW. - // Must have at least one WERD. - // WERDs contain a fake blob. - void cleanup_nontext_block(BLOCK* block); - void cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks); - bool clean_noise_from_row(ROW* row); - void clean_noise_from_words(ROW *row); - // Remove outlines that are a tiny fraction in either width or height - // of the word height. - void clean_small_noise_from_words(ROW *row); - // Groups blocks by rotation, then, for each group, makes a WordGrid and calls - // TransferDiacriticsToWords to copy the diacritic blobs to the most - // appropriate words in the group of blocks. Source blobs are not touched. - void TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs, - BLOCK_LIST* blocks); - // Places a copy of blobs that are near a word (after applying rotation to the - // blob) in the most appropriate word, unless there is doubt, in which case a - // blob can end up in two words. Source blobs are not touched. - void TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs, - const FCOORD &rotation, WordGrid *word_grid); - - public: - // makerow.cpp /////////////////////////////////////////// - BOOL_VAR_H(textord_single_height_mode, false, - "Script has no xheight, so use a single mode for horizontal text"); - // tospace.cpp /////////////////////////////////////////// - BOOL_VAR_H(tosp_old_to_method, false, "Space stats use prechopping?"); - BOOL_VAR_H(tosp_old_to_constrain_sp_kn, false, - "Constrain relative values of inter and intra-word gaps for " - "old_to_method."); - BOOL_VAR_H(tosp_only_use_prop_rows, true, - "Block stats to use fixed pitch rows?"); - BOOL_VAR_H(tosp_force_wordbreak_on_punct, false, - "Force word breaks on punct to break long lines in non-space " - "delimited langs"); - BOOL_VAR_H(tosp_use_pre_chopping, false, - "Space stats use prechopping?"); - BOOL_VAR_H(tosp_old_to_bug_fix, false, - "Fix suspected bug in old code"); - BOOL_VAR_H(tosp_block_use_cert_spaces, true, - "Only stat OBVIOUS spaces"); - BOOL_VAR_H(tosp_row_use_cert_spaces, true, - "Only stat OBVIOUS spaces"); - BOOL_VAR_H(tosp_narrow_blobs_not_cert, true, - "Only stat OBVIOUS spaces"); - BOOL_VAR_H(tosp_row_use_cert_spaces1, true, - "Only stat OBVIOUS spaces"); - BOOL_VAR_H(tosp_recovery_isolated_row_stats, true, - "Use row alone when inadequate cert spaces"); - BOOL_VAR_H(tosp_only_small_gaps_for_kern, false, "Better guess"); - BOOL_VAR_H(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?"); - BOOL_VAR_H(tosp_fuzzy_limit_all, true, - "Don't restrict kn->sp fuzzy limit to tables"); - BOOL_VAR_H(tosp_stats_use_xht_gaps, true, - "Use within xht gap for wd breaks"); - BOOL_VAR_H(tosp_use_xht_gaps, true, - "Use within xht gap for wd breaks"); - BOOL_VAR_H(tosp_only_use_xht_gaps, false, - "Only use within xht gap for wd breaks"); - BOOL_VAR_H(tosp_rule_9_test_punct, false, - "Don't chng kn to space next to punct"); - BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp, true, "Default flip"); - BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn, true, "Default flip"); - BOOL_VAR_H(tosp_improve_thresh, false, - "Enable improvement heuristic"); - INT_VAR_H(tosp_debug_level, 0, "Debug data"); - INT_VAR_H(tosp_enough_space_samples_for_median, 3, - "or should we use mean"); - INT_VAR_H(tosp_redo_kern_limit, 10, - "No.samples reqd to reestimate for row"); - INT_VAR_H(tosp_few_samples, 40, - "No.gaps reqd with 1 large gap to treat as a table"); - INT_VAR_H(tosp_short_row, 20, - "No.gaps reqd with few cert spaces to use certs"); - INT_VAR_H(tosp_sanity_method, 1, "How to avoid being silly"); - double_VAR_H(tosp_old_sp_kn_th_factor, 2.0, - "Factor for defining space threshold in terms of space and " - "kern sizes"); - double_VAR_H(tosp_threshold_bias1, 0, - "how far between kern and space?"); - double_VAR_H(tosp_threshold_bias2, 0, - "how far between kern and space?"); - double_VAR_H(tosp_narrow_fraction, 0.3, - "Fract of xheight for narrow"); - double_VAR_H(tosp_narrow_aspect_ratio, 0.48, - "narrow if w/h less than this"); - double_VAR_H(tosp_wide_fraction, 0.52, "Fract of xheight for wide"); - double_VAR_H(tosp_wide_aspect_ratio, 0.0, - "wide if w/h less than this"); - double_VAR_H(tosp_fuzzy_space_factor, 0.6, - "Fract of xheight for fuzz sp"); - double_VAR_H(tosp_fuzzy_space_factor1, 0.5, - "Fract of xheight for fuzz sp"); - double_VAR_H(tosp_fuzzy_space_factor2, 0.72, - "Fract of xheight for fuzz sp"); - double_VAR_H(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern"); - double_VAR_H(tosp_kern_gap_factor1, 2.0, - "gap ratio to flip kern->sp"); - double_VAR_H(tosp_kern_gap_factor2, 1.3, - "gap ratio to flip kern->sp"); - double_VAR_H(tosp_kern_gap_factor3, 2.5, - "gap ratio to flip kern->sp"); - double_VAR_H(tosp_ignore_big_gaps, -1, "xht multiplier"); - double_VAR_H(tosp_ignore_very_big_gaps, 3.5, "xht multiplier"); - double_VAR_H(tosp_rep_space, 1.6, "rep gap multiplier for space"); - double_VAR_H(tosp_enough_small_gaps, 0.65, - "Fract of kerns reqd for isolated row stats"); - double_VAR_H(tosp_table_kn_sp_ratio, 2.25, - "Min difference of kn & sp in table"); - double_VAR_H(tosp_table_xht_sp_ratio, 0.33, - "Expect spaces bigger than this"); - double_VAR_H(tosp_table_fuzzy_kn_sp_ratio, 3.0, - "Fuzzy if less than this"); - double_VAR_H(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg"); - double_VAR_H(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg"); - double_VAR_H(tosp_min_sane_kn_sp, 1.5, - "Don't trust spaces less than this time kn"); - double_VAR_H(tosp_init_guess_kn_mult, 2.2, - "Thresh guess - mult kn by this"); - double_VAR_H(tosp_init_guess_xht_mult, 0.28, - "Thresh guess - mult xht by this"); - double_VAR_H(tosp_max_sane_kn_thresh, 5.0, - "Multiplier on kn to limit thresh"); - double_VAR_H(tosp_flip_caution, 0.0, - "Don't autoflip kn to sp when large separation"); - double_VAR_H(tosp_large_kerning, 0.19, - "Limit use of xht gap with large kns"); - double_VAR_H(tosp_dont_fool_with_small_kerns, -1, - "Limit use of xht gap with odd small kns"); - double_VAR_H(tosp_near_lh_edge, 0, - "Don't reduce box if the top left is non blank"); - double_VAR_H(tosp_silly_kn_sp_gap, 0.2, - "Don't let sp minus kn get too small"); - double_VAR_H(tosp_pass_wide_fuzz_sp_to_context, 0.75, - "How wide fuzzies need context"); - // tordmain.cpp /////////////////////////////////////////// - BOOL_VAR_H(textord_no_rejects, false, "Don't remove noise blobs"); - BOOL_VAR_H(textord_show_blobs, false, "Display unsorted blobs"); - BOOL_VAR_H(textord_show_boxes, false, "Display boxes"); - INT_VAR_H(textord_max_noise_size, 7, "Pixel size of noise"); - INT_VAR_H(textord_baseline_debug, 0, "Baseline debug level"); - double_VAR_H(textord_blob_size_bigile, 95, "Percentile for large blobs"); - double_VAR_H(textord_noise_area_ratio, 0.7, - "Fraction of bounding box for noise"); - double_VAR_H(textord_blob_size_smallile, 20, "Percentile for small blobs"); - double_VAR_H(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess"); - double_VAR_H(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess"); - INT_VAR_H(textord_noise_sizefraction, 10, "Fraction of size for maxima"); - double_VAR_H(textord_noise_sizelimit, 0.5, "Fraction of x for big t count"); - INT_VAR_H(textord_noise_translimit, 16, "Transitions for normal blob"); - double_VAR_H(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion"); - BOOL_VAR_H(textord_noise_rejwords, true, "Reject noise-like words"); - BOOL_VAR_H(textord_noise_rejrows, true, "Reject noise-like rows"); - double_VAR_H(textord_noise_syfract, 0.2, "xh fract error for norm blobs"); - double_VAR_H(textord_noise_sxfract, 0.4, - "xh fract width error for norm blobs"); - double_VAR_H(textord_noise_hfract, 1.0/64, - "Height fraction to discard outlines as speckle noise"); - INT_VAR_H(textord_noise_sncount, 1, "super norm blobs to save row"); - double_VAR_H(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion"); - BOOL_VAR_H(textord_noise_debug, FALSE, "Debug row garbage detector"); - double_VAR_H(textord_blshift_maxshift, 0.00, "Max baseline shift"); - double_VAR_H(textord_blshift_xfraction, 9.99, "Min size of baseline shift"); -}; -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_TEXTORD_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/topitch.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/topitch.cpp deleted file mode 100644 index 6cd0bcf0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/topitch.cpp +++ /dev/null @@ -1,1845 +0,0 @@ -/********************************************************************** - * File: topitch.cpp (Formerly to_pitch.c) - * Description: Code to determine fixed pitchness and the pitch if fixed. - * Author: Ray Smith - * Created: Tue Aug 24 16:57:29 BST 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "blobbox.h" -#include "statistc.h" -#include "drawtord.h" -#include "makerow.h" -#include "pitsync1.h" -#include "pithsync.h" -#include "tovars.h" -#include "wordseg.h" -#include "topitch.h" -#include "helpers.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include - -#define EXTERN - -EXTERN BOOL_VAR (textord_all_prop, FALSE, "All doc is proportial text"); -EXTERN BOOL_VAR (textord_debug_pitch_test, FALSE, -"Debug on fixed pitch test"); -EXTERN BOOL_VAR (textord_disable_pitch_test, FALSE, -"Turn off dp fixed pitch algorithm"); -EXTERN BOOL_VAR (textord_fast_pitch_test, FALSE, -"Do even faster pitch algorithm"); -EXTERN BOOL_VAR (textord_debug_pitch_metric, FALSE, -"Write full metric stuff"); -EXTERN BOOL_VAR (textord_show_row_cuts, FALSE, "Draw row-level cuts"); -EXTERN BOOL_VAR (textord_show_page_cuts, FALSE, "Draw page-level cuts"); -EXTERN BOOL_VAR (textord_pitch_cheat, FALSE, -"Use correct answer for fixed/prop"); -EXTERN BOOL_VAR (textord_blockndoc_fixed, FALSE, -"Attempt whole doc/block fixed pitch"); -EXTERN double_VAR (textord_projection_scale, 0.200, "Ding rate for mid-cuts"); -EXTERN double_VAR (textord_balance_factor, 1.0, -"Ding rate for unbalanced char cells"); - -#define FIXED_WIDTH_MULTIPLE 5 -#define BLOCK_STATS_CLUSTERS 10 -#define MAX_ALLOWED_PITCH 100 //max pixel pitch. - -// qsort function to sort 2 floats. -static int sort_floats(const void *arg1, const void *arg2) { - float diff = *reinterpret_cast(arg1) - - *reinterpret_cast(arg2); - if (diff > 0) { - return 1; - } else if (diff < 0) { - return -1; - } else { - return 0; - } -} - -/********************************************************************** - * compute_fixed_pitch - * - * Decide whether each row is fixed pitch individually. - * Correlate definite and uncertain results to obtain an individual - * result for each row in the TO_ROW class. - **********************************************************************/ - -void compute_fixed_pitch(ICOORD page_tr, // top right - TO_BLOCK_LIST* port_blocks, // input list - float gradient, // page skew - FCOORD rotation, // for drawing - bool testing_on) { // correct orientation - TO_BLOCK_IT block_it; //iterator - TO_BLOCK *block; //current block; - TO_ROW *row; //current row - int block_index; //block number - int row_index; //row number - -#ifndef GRAPHICS_DISABLED - if (textord_show_initial_words && testing_on) { - if (to_win == nullptr) - create_to_win(page_tr); - } -#endif - - block_it.set_to_list (port_blocks); - block_index = 1; - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - block = block_it.data (); - compute_block_pitch(block, rotation, block_index, testing_on); - block_index++; - } - - if (!try_doc_fixed (page_tr, port_blocks, gradient)) { - block_index = 1; - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - block = block_it.data (); - if (!try_block_fixed (block, block_index)) - try_rows_fixed(block, block_index, testing_on); - block_index++; - } - } - - block_index = 1; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { - block = block_it.data (); - POLY_BLOCK* pb = block->block->pdblk.poly_block(); - if (pb != nullptr && !pb->IsText()) continue; // Non-text doesn't exist! - // row iterator - TO_ROW_IT row_it(block->get_rows()); - row_index = 1; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - fix_row_pitch(row, block, port_blocks, row_index, block_index); - row_index++; - } - block_index++; - } -#ifndef GRAPHICS_DISABLED - if (textord_show_initial_words && testing_on) { - ScrollView::Update(); - } -#endif -} - - -/********************************************************************** - * fix_row_pitch - * - * Get a pitch_decision for this row by voting among similar rows in the - * block, then similar rows over all the page, or any other rows at all. - **********************************************************************/ - -void fix_row_pitch(TO_ROW *bad_row, // row to fix - TO_BLOCK *bad_block, // block of bad_row - TO_BLOCK_LIST *blocks, // blocks to scan - int32_t row_target, // number of row - int32_t block_target) { // number of block - int16_t mid_cuts; - int block_votes; //votes in block - int like_votes; //votes over page - int other_votes; //votes of unlike blocks - int block_index; //number of block - int row_index; //number of row - int maxwidth; //max pitch - TO_BLOCK_IT block_it = blocks; //block iterator - TO_BLOCK *block; //current block - TO_ROW *row; //current row - float sp_sd; //space deviation - STATS block_stats; //pitches in block - STATS like_stats; //pitches in page - - block_votes = like_votes = other_votes = 0; - maxwidth = (int32_t) ceil (bad_row->xheight * textord_words_maxspace); - if (bad_row->pitch_decision != PITCH_DEF_FIXED - && bad_row->pitch_decision != PITCH_DEF_PROP) { - block_stats.set_range (0, maxwidth); - like_stats.set_range (0, maxwidth); - block_index = 1; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { - block = block_it.data(); - POLY_BLOCK* pb = block->block->pdblk.poly_block(); - if (pb != nullptr && !pb->IsText()) continue; // Non text doesn't exist! - row_index = 1; - TO_ROW_IT row_it(block->get_rows()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); - row_it.forward ()) { - row = row_it.data (); - if ((bad_row->all_caps - && row->xheight + row->ascrise - < - (bad_row->xheight + bad_row->ascrise) * (1 + - textord_pitch_rowsimilarity) - && row->xheight + row->ascrise > - (bad_row->xheight + bad_row->ascrise) * (1 - - textord_pitch_rowsimilarity)) - || (!bad_row->all_caps - && row->xheight < - bad_row->xheight * (1 + textord_pitch_rowsimilarity) - && row->xheight > - bad_row->xheight * (1 - textord_pitch_rowsimilarity))) { - if (block_index == block_target) { - if (row->pitch_decision == PITCH_DEF_FIXED) { - block_votes += textord_words_veto_power; - block_stats.add ((int32_t) row->fixed_pitch, - textord_words_veto_power); - } - else if (row->pitch_decision == PITCH_MAYBE_FIXED - || row->pitch_decision == PITCH_CORR_FIXED) { - block_votes++; - block_stats.add ((int32_t) row->fixed_pitch, 1); - } - else if (row->pitch_decision == PITCH_DEF_PROP) - block_votes -= textord_words_veto_power; - else if (row->pitch_decision == PITCH_MAYBE_PROP - || row->pitch_decision == PITCH_CORR_PROP) - block_votes--; - } - else { - if (row->pitch_decision == PITCH_DEF_FIXED) { - like_votes += textord_words_veto_power; - like_stats.add ((int32_t) row->fixed_pitch, - textord_words_veto_power); - } - else if (row->pitch_decision == PITCH_MAYBE_FIXED - || row->pitch_decision == PITCH_CORR_FIXED) { - like_votes++; - like_stats.add ((int32_t) row->fixed_pitch, 1); - } - else if (row->pitch_decision == PITCH_DEF_PROP) - like_votes -= textord_words_veto_power; - else if (row->pitch_decision == PITCH_MAYBE_PROP - || row->pitch_decision == PITCH_CORR_PROP) - like_votes--; - } - } - else { - if (row->pitch_decision == PITCH_DEF_FIXED) - other_votes += textord_words_veto_power; - else if (row->pitch_decision == PITCH_MAYBE_FIXED - || row->pitch_decision == PITCH_CORR_FIXED) - other_votes++; - else if (row->pitch_decision == PITCH_DEF_PROP) - other_votes -= textord_words_veto_power; - else if (row->pitch_decision == PITCH_MAYBE_PROP - || row->pitch_decision == PITCH_CORR_PROP) - other_votes--; - } - row_index++; - } - block_index++; - } - if (block_votes > textord_words_veto_power) { - bad_row->fixed_pitch = block_stats.ile (0.5); - bad_row->pitch_decision = PITCH_CORR_FIXED; - } - else if (block_votes <= textord_words_veto_power && like_votes > 0) { - bad_row->fixed_pitch = like_stats.ile (0.5); - bad_row->pitch_decision = PITCH_CORR_FIXED; - } - else { - bad_row->pitch_decision = PITCH_CORR_PROP; - if (block_votes == 0 && like_votes == 0 && other_votes > 0 - && (textord_debug_pitch_test || textord_debug_pitch_metric)) - tprintf - ("Warning:row %d of block %d set prop with no like rows against trend\n", - row_target, block_target); - } - } - if (textord_debug_pitch_metric) { - tprintf(":b_votes=%d:l_votes=%d:o_votes=%d", - block_votes, like_votes, other_votes); - tprintf("x=%g:asc=%g\n", bad_row->xheight, bad_row->ascrise); - } - if (bad_row->pitch_decision == PITCH_CORR_FIXED) { - if (bad_row->fixed_pitch < textord_min_xheight) { - if (block_votes > 0) - bad_row->fixed_pitch = block_stats.ile (0.5); - else if (block_votes == 0 && like_votes > 0) - bad_row->fixed_pitch = like_stats.ile (0.5); - else { - tprintf - ("Warning:guessing pitch as xheight on row %d, block %d\n", - row_target, block_target); - bad_row->fixed_pitch = bad_row->xheight; - } - } - if (bad_row->fixed_pitch < textord_min_xheight) - bad_row->fixed_pitch = (float) textord_min_xheight; - bad_row->kern_size = bad_row->fixed_pitch / 4; - bad_row->min_space = (int32_t) (bad_row->fixed_pitch * 0.6); - bad_row->max_nonspace = (int32_t) (bad_row->fixed_pitch * 0.4); - bad_row->space_threshold = - (bad_row->min_space + bad_row->max_nonspace) / 2; - bad_row->space_size = bad_row->fixed_pitch; - if (bad_row->char_cells.empty() && !bad_row->blob_list()->empty()) { - tune_row_pitch (bad_row, &bad_row->projection, - bad_row->projection_left, bad_row->projection_right, - (bad_row->fixed_pitch + - bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch, - sp_sd, mid_cuts, &bad_row->char_cells, FALSE); - } - } - else if (bad_row->pitch_decision == PITCH_CORR_PROP - || bad_row->pitch_decision == PITCH_DEF_PROP) { - bad_row->fixed_pitch = 0.0f; - bad_row->char_cells.clear (); - } -} - - -/********************************************************************** - * compute_block_pitch - * - * Decide whether each block is fixed pitch individually. - **********************************************************************/ - -void compute_block_pitch(TO_BLOCK* block, // input list - FCOORD rotation, // for drawing - int32_t block_index, // block number - bool testing_on) { // correct orientation - TBOX block_box; //bounding box - - block_box = block->block->pdblk.bounding_box (); - if (testing_on && textord_debug_pitch_test) { - tprintf ("Block %d at (%d,%d)->(%d,%d)\n", - block_index, - block_box.left (), block_box.bottom (), - block_box.right (), block_box.top ()); - } - block->min_space = (int32_t) floor (block->xheight - * textord_words_default_minspace); - block->max_nonspace = (int32_t) ceil (block->xheight - * textord_words_default_nonspace); - block->fixed_pitch = 0.0f; - block->space_size = (float) block->min_space; - block->kern_size = (float) block->max_nonspace; - block->pr_nonsp = block->xheight * words_default_prop_nonspace; - block->pr_space = block->pr_nonsp * textord_spacesize_ratioprop; - if (!block->get_rows ()->empty ()) { - ASSERT_HOST (block->xheight > 0); - find_repeated_chars(block, textord_show_initial_words && testing_on); -#ifndef GRAPHICS_DISABLED - if (textord_show_initial_words && testing_on) - //overlap_picture_ops(TRUE); - ScrollView::Update(); -#endif - compute_rows_pitch(block, - block_index, - textord_debug_pitch_test && testing_on); - } -} - - -/********************************************************************** - * compute_rows_pitch - * - * Decide whether each row is fixed pitch individually. - **********************************************************************/ - -bool compute_rows_pitch( //find line stats - TO_BLOCK* block, //block to do - int32_t block_index, //block number - bool testing_on //correct orientation -) { - int32_t maxwidth; //of spaces - TO_ROW *row; //current row - int32_t row_index; //row number. - float lower, upper; //cluster thresholds - TO_ROW_IT row_it = block->get_rows (); - - row_index = 1; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - ASSERT_HOST (row->xheight > 0); - row->compute_vertical_projection (); - maxwidth = (int32_t) ceil (row->xheight * textord_words_maxspace); - if (row_pitch_stats (row, maxwidth, testing_on) - && find_row_pitch (row, maxwidth, - textord_dotmatrix_gap + 1, block, block_index, - row_index, testing_on)) { - if (row->fixed_pitch == 0) { - lower = row->pr_nonsp; - upper = row->pr_space; - row->space_size = upper; - row->kern_size = lower; - } - } - else { - row->fixed_pitch = 0.0f; //insufficient data - row->pitch_decision = PITCH_DUNNO; - } - row_index++; - } - return false; -} - - -/********************************************************************** - * try_doc_fixed - * - * Attempt to call the entire document fixed pitch. - **********************************************************************/ - -bool try_doc_fixed( //determine pitch - ICOORD page_tr, //top right - TO_BLOCK_LIST* port_blocks, //input list - float gradient //page skew -) { - int16_t master_x; //uniform shifts - int16_t pitch; //median pitch. - int x; //profile coord - int prop_blocks; //correct counts - int fixed_blocks; - int total_row_count; //total in page - //iterator - TO_BLOCK_IT block_it = port_blocks; - TO_BLOCK *block; //current block; - TO_ROW *row; //current row - int16_t projection_left; //edges - int16_t projection_right; - int16_t row_left; //edges of row - int16_t row_right; - ICOORDELT_LIST *master_cells; //cells for page - float master_y; //uniform shifts - float shift_factor; //page skew correction - float row_shift; //shift for row - float final_pitch; //output pitch - float row_y; //baseline - STATS projection; //entire page - STATS pitches (0, MAX_ALLOWED_PITCH); - //for median - float sp_sd; //space sd - int16_t mid_cuts; //no of cheap cuts - float pitch_sd; //sync rating - - if (block_it.empty () - // || block_it.data()==block_it.data_relative(1) - || !textord_blockndoc_fixed) - return false; - shift_factor = gradient / (gradient * gradient + 1); - // row iterator - TO_ROW_IT row_it(block_it.data ()->get_rows()); - master_x = row_it.data ()->projection_left; - master_y = row_it.data ()->baseline.y (master_x); - projection_left = INT16_MAX; - projection_right = -INT16_MAX; - prop_blocks = 0; - fixed_blocks = 0; - total_row_count = 0; - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - block = block_it.data (); - row_it.set_to_list (block->get_rows ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - total_row_count++; - if (row->fixed_pitch > 0) - pitches.add ((int32_t) (row->fixed_pitch), 1); - //find median - row_y = row->baseline.y (master_x); - row_left = - (int16_t) (row->projection_left - - shift_factor * (master_y - row_y)); - row_right = - (int16_t) (row->projection_right - - shift_factor * (master_y - row_y)); - if (row_left < projection_left) - projection_left = row_left; - if (row_right > projection_right) - projection_right = row_right; - } - } - if (pitches.get_total () == 0) - return false; - projection.set_range (projection_left, projection_right); - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - block = block_it.data (); - row_it.set_to_list (block->get_rows ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - row_y = row->baseline.y (master_x); - row_left = - (int16_t) (row->projection_left - - shift_factor * (master_y - row_y)); - for (x = row->projection_left; x < row->projection_right; - x++, row_left++) { - projection.add (row_left, row->projection.pile_count (x)); - } - } - } - - row_it.set_to_list (block_it.data ()->get_rows ()); - row = row_it.data (); -#ifndef GRAPHICS_DISABLED - if (textord_show_page_cuts && to_win != nullptr) - projection.plot (to_win, projection_left, - row->intercept (), 1.0f, -1.0f, ScrollView::CORAL); -#endif - final_pitch = pitches.ile (0.5); - pitch = (int16_t) final_pitch; - pitch_sd = - tune_row_pitch (row, &projection, projection_left, projection_right, - pitch * 0.75, final_pitch, sp_sd, mid_cuts, - &row->char_cells, FALSE); - - if (textord_debug_pitch_metric) - tprintf - ("try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n", - prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd, - pitch_sd / total_row_count, pitch_sd / pitch, - pitch_sd / total_row_count / pitch); - -#ifndef GRAPHICS_DISABLED - if (textord_show_page_cuts && to_win != nullptr) { - master_cells = &row->char_cells; - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - block = block_it.data (); - row_it.set_to_list (block->get_rows ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); - row_it.forward ()) { - row = row_it.data (); - row_y = row->baseline.y (master_x); - row_shift = shift_factor * (master_y - row_y); - plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells); - } - } - } -#endif - row->char_cells.clear (); - return false; -} - - -/********************************************************************** - * try_block_fixed - * - * Try to call the entire block fixed. - **********************************************************************/ - -bool try_block_fixed( //find line stats - TO_BLOCK* block, //block to do - int32_t block_index //block number -) { - return false; -} - - -/********************************************************************** - * try_rows_fixed - * - * Decide whether each row is fixed pitch individually. - **********************************************************************/ - -bool try_rows_fixed( //find line stats - TO_BLOCK* block, //block to do - int32_t block_index, //block number - bool testing_on //correct orientation -) { - TO_ROW *row; //current row - int32_t row_index; //row number. - int32_t def_fixed = 0; //counters - int32_t def_prop = 0; - int32_t maybe_fixed = 0; - int32_t maybe_prop = 0; - int32_t dunno = 0; - int32_t corr_fixed = 0; - int32_t corr_prop = 0; - float lower, upper; //cluster thresholds - TO_ROW_IT row_it = block->get_rows (); - - row_index = 1; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - ASSERT_HOST (row->xheight > 0); - if (row->fixed_pitch > 0 && - fixed_pitch_row(row, block->block, block_index)) { - if (row->fixed_pitch == 0) { - lower = row->pr_nonsp; - upper = row->pr_space; - row->space_size = upper; - row->kern_size = lower; - } - } - row_index++; - } - count_block_votes(block, - def_fixed, - def_prop, - maybe_fixed, - maybe_prop, - corr_fixed, - corr_prop, - dunno); - if (testing_on - && (textord_debug_pitch_test - || textord_blocksall_prop || textord_blocksall_fixed)) { - tprintf ("Initially:"); - print_block_counts(block, block_index); - } - if (def_fixed > def_prop * textord_words_veto_power) - block->pitch_decision = PITCH_DEF_FIXED; - else if (def_prop > def_fixed * textord_words_veto_power) - block->pitch_decision = PITCH_DEF_PROP; - else if (def_fixed > 0 || def_prop > 0) - block->pitch_decision = PITCH_DUNNO; - else if (maybe_fixed > maybe_prop * textord_words_veto_power) - block->pitch_decision = PITCH_MAYBE_FIXED; - else if (maybe_prop > maybe_fixed * textord_words_veto_power) - block->pitch_decision = PITCH_MAYBE_PROP; - else - block->pitch_decision = PITCH_DUNNO; - return false; -} - - -/********************************************************************** - * print_block_counts - * - * Count up how many rows have what decision and print the results. - **********************************************************************/ - -void print_block_counts( //find line stats - TO_BLOCK *block, //block to do - int32_t block_index //block number - ) { - int32_t def_fixed = 0; //counters - int32_t def_prop = 0; - int32_t maybe_fixed = 0; - int32_t maybe_prop = 0; - int32_t dunno = 0; - int32_t corr_fixed = 0; - int32_t corr_prop = 0; - - count_block_votes(block, - def_fixed, - def_prop, - maybe_fixed, - maybe_prop, - corr_fixed, - corr_prop, - dunno); - tprintf ("Block %d has (%d,%d,%d)", - block_index, def_fixed, maybe_fixed, corr_fixed); - if (textord_blocksall_prop && (def_fixed || maybe_fixed || corr_fixed)) - tprintf (" (Wrongly)"); - tprintf (" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop); - if (textord_blocksall_fixed && (def_prop || maybe_prop || corr_prop)) - tprintf (" (Wrongly)"); - tprintf (" prop, %d dunno\n", dunno); -} - - -/********************************************************************** - * count_block_votes - * - * Count the number of rows in the block with each kind of pitch_decision. - **********************************************************************/ - -void count_block_votes( //find line stats - TO_BLOCK *block, //block to do - int32_t &def_fixed, //add to counts - int32_t &def_prop, - int32_t &maybe_fixed, - int32_t &maybe_prop, - int32_t &corr_fixed, - int32_t &corr_prop, - int32_t &dunno) { - TO_ROW *row; //current row - TO_ROW_IT row_it = block->get_rows (); - - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - switch (row->pitch_decision) { - case PITCH_DUNNO: - dunno++; - break; - case PITCH_DEF_PROP: - def_prop++; - break; - case PITCH_MAYBE_PROP: - maybe_prop++; - break; - case PITCH_DEF_FIXED: - def_fixed++; - break; - case PITCH_MAYBE_FIXED: - maybe_fixed++; - break; - case PITCH_CORR_PROP: - corr_prop++; - break; - case PITCH_CORR_FIXED: - corr_fixed++; - break; - } - } -} - - -/********************************************************************** - * row_pitch_stats - * - * Decide whether each row is fixed pitch individually. - **********************************************************************/ - -bool row_pitch_stats( //find line stats - TO_ROW* row, //current row - int32_t maxwidth, //of spaces - bool testing_on //correct orientation -) { - BLOBNBOX *blob; //current blob - int gap_index; //current gap - int32_t prev_x; //end of prev blob - int32_t cluster_count; //no of clusters - int32_t prev_count; //of clusters - int32_t smooth_factor; //for smoothing stats - TBOX blob_box; //bounding box - float lower, upper; //cluster thresholds - //gap sizes - float gaps[BLOCK_STATS_CLUSTERS]; - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); - STATS gap_stats (0, maxwidth); - STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1]; - //clusters - - smooth_factor = - (int32_t) (row->xheight * textord_wordstats_smooth_factor + 1.5); - if (!blob_it.empty ()) { - prev_x = blob_it.data ()->bounding_box ().right (); - blob_it.forward (); - while (!blob_it.at_first ()) { - blob = blob_it.data (); - if (!blob->joined_to_prev ()) { - blob_box = blob->bounding_box (); - if (blob_box.left () - prev_x < maxwidth) - gap_stats.add (blob_box.left () - prev_x, 1); - prev_x = blob_box.right (); - } - blob_it.forward (); - } - } - if (gap_stats.get_total () == 0) { - return false; - } - cluster_count = 0; - lower = row->xheight * words_initial_lower; - upper = row->xheight * words_initial_upper; - gap_stats.smooth (smooth_factor); - do { - prev_count = cluster_count; - cluster_count = gap_stats.cluster (lower, upper, - textord_spacesize_ratioprop, - BLOCK_STATS_CLUSTERS, cluster_stats); - } - while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS); - if (cluster_count < 1) { - return false; - } - for (gap_index = 0; gap_index < cluster_count; gap_index++) - gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5); - //get medians - if (testing_on) { - tprintf ("cluster_count=%d:", cluster_count); - for (gap_index = 0; gap_index < cluster_count; gap_index++) - tprintf (" %g(%d)", gaps[gap_index], - cluster_stats[gap_index + 1].get_total ()); - tprintf ("\n"); - } - qsort (gaps, cluster_count, sizeof (float), sort_floats); - - //Try to find proportional non-space and space for row. - lower = row->xheight * words_default_prop_nonspace; - upper = row->xheight * textord_words_min_minspace; - for (gap_index = 0; gap_index < cluster_count - && gaps[gap_index] < lower; gap_index++); - if (gap_index == 0) { - if (testing_on) - tprintf ("No clusters below nonspace threshold!!\n"); - if (cluster_count > 1) { - row->pr_nonsp = gaps[0]; - row->pr_space = gaps[1]; - } - else { - row->pr_nonsp = lower; - row->pr_space = gaps[0]; - } - } - else { - row->pr_nonsp = gaps[gap_index - 1]; - while (gap_index < cluster_count && gaps[gap_index] < upper) - gap_index++; - if (gap_index == cluster_count) { - if (testing_on) - tprintf ("No clusters above nonspace threshold!!\n"); - row->pr_space = lower * textord_spacesize_ratioprop; - } - else - row->pr_space = gaps[gap_index]; - } - - //Now try to find the fixed pitch space and non-space. - upper = row->xheight * words_default_fixed_space; - for (gap_index = 0; gap_index < cluster_count - && gaps[gap_index] < upper; gap_index++); - if (gap_index == 0) { - if (testing_on) - tprintf ("No clusters below space threshold!!\n"); - row->fp_nonsp = upper; - row->fp_space = gaps[0]; - } - else { - row->fp_nonsp = gaps[gap_index - 1]; - if (gap_index == cluster_count) { - if (testing_on) - tprintf ("No clusters above space threshold!!\n"); - row->fp_space = row->xheight; - } - else - row->fp_space = gaps[gap_index]; - } - if (testing_on) { - tprintf - ("Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n", - row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space); - } - return true; //computed some stats -} - - -/********************************************************************** - * find_row_pitch - * - * Check to see if this row could be fixed pitch using the given spacings. - * Blobs with gaps smaller than the lower threshold are assumed to be one. - * The larger threshold is the word gap threshold. - **********************************************************************/ - -bool find_row_pitch( //find lines - TO_ROW* row, //row to do - int32_t maxwidth, //max permitted space - int32_t dm_gap, //ignorable gaps - TO_BLOCK* block, //block of row - int32_t block_index, //block_number - int32_t row_index, //number of row - bool testing_on //correct orientation -) { - bool used_dm_model; //looks like dot matrix - float min_space; //estimate threshold - float non_space; //gap size - float gap_iqr; //interquartile range - float pitch_iqr; - float dm_gap_iqr; //interquartile range - float dm_pitch_iqr; - float dm_pitch; //pitch with dm on - float pitch; //revised estimate - float initial_pitch; //guess at pitch - STATS gap_stats (0, maxwidth); - //centre-centre - STATS pitch_stats (0, maxwidth); - - row->fixed_pitch = 0.0f; - initial_pitch = row->fp_space; - if (initial_pitch > row->xheight * (1 + words_default_fixed_limit)) - initial_pitch = row->xheight;//keep pitch decent - non_space = row->fp_nonsp; - if (non_space > initial_pitch) - non_space = initial_pitch; - min_space = (initial_pitch + non_space) / 2; - - if (!count_pitch_stats (row, &gap_stats, &pitch_stats, - initial_pitch, min_space, TRUE, FALSE, dm_gap)) { - dm_gap_iqr = 0.0001; - dm_pitch_iqr = maxwidth * 2.0f; - dm_pitch = initial_pitch; - } - else { - dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); - dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); - dm_pitch = pitch_stats.ile (0.5); - } - gap_stats.clear (); - pitch_stats.clear (); - if (!count_pitch_stats (row, &gap_stats, &pitch_stats, - initial_pitch, min_space, TRUE, FALSE, 0)) { - gap_iqr = 0.0001; - pitch_iqr = maxwidth * 3.0f; - } - else { - gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); - pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); - if (testing_on) - tprintf - ("First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n", - initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5)); - initial_pitch = pitch_stats.ile (0.5); - if (min_space > initial_pitch - && count_pitch_stats (row, &gap_stats, &pitch_stats, - initial_pitch, initial_pitch, TRUE, FALSE, 0)) { - min_space = initial_pitch; - gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); - pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); - if (testing_on) - tprintf - ("Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n", - initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5)); - initial_pitch = pitch_stats.ile (0.5); - } - } - if (textord_debug_pitch_metric) - tprintf("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:", - block_index, row_index, 'X', - pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr, - pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ? 'D' : - (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ? 'S' : 'M')); - if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) { - row->pitch_decision = PITCH_DUNNO; - if (textord_debug_pitch_metric) - tprintf ("\n"); - return false; //insufficient data - } - if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) { - if (testing_on) - tprintf - ("Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n", - pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr); - gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); - pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); - pitch = pitch_stats.ile (0.5); - used_dm_model = false; - } - else { - if (testing_on) - tprintf - ("Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n", - pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr); - gap_iqr = dm_gap_iqr; - pitch_iqr = dm_pitch_iqr; - pitch = dm_pitch; - used_dm_model = true; - } - if (textord_debug_pitch_metric) { - tprintf ("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:", - pitch_iqr, gap_iqr, pitch); - tprintf ("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:", - pitch_iqr / gap_iqr, pitch_iqr / block->xheight, - pitch_iqr < gap_iqr * textord_fpiqr_ratio - && pitch_iqr < block->xheight * textord_max_pitch_iqr - && pitch < block->xheight * textord_words_default_maxspace - ? 'F' : 'P'); - } - if (pitch_iqr < gap_iqr * textord_fpiqr_ratio - && pitch_iqr < block->xheight * textord_max_pitch_iqr - && pitch < block->xheight * textord_words_default_maxspace) - row->pitch_decision = PITCH_MAYBE_FIXED; - else - row->pitch_decision = PITCH_MAYBE_PROP; - row->fixed_pitch = pitch; - row->kern_size = gap_stats.ile (0.5); - row->min_space = (int32_t) (row->fixed_pitch + non_space) / 2; - if (row->min_space > row->fixed_pitch) - row->min_space = (int32_t) row->fixed_pitch; - row->max_nonspace = row->min_space; - row->space_size = row->fixed_pitch; - row->space_threshold = (row->max_nonspace + row->min_space) / 2; - row->used_dm_model = used_dm_model; - return true; -} - - -/********************************************************************** - * fixed_pitch_row - * - * Check to see if this row could be fixed pitch using the given spacings. - * Blobs with gaps smaller than the lower threshold are assumed to be one. - * The larger threshold is the word gap threshold. - **********************************************************************/ - -bool fixed_pitch_row(TO_ROW* row, // row to do - BLOCK* block, - int32_t block_index // block_number -) { - const char *res_string; // pitch result - int16_t mid_cuts; // no of cheap cuts - float non_space; // gap size - float pitch_sd; // error on pitch - float sp_sd = 0.0f; // space sd - - non_space = row->fp_nonsp; - if (non_space > row->fixed_pitch) - non_space = row->fixed_pitch; - POLY_BLOCK* pb = block != nullptr ? block->pdblk.poly_block() : nullptr; - if (textord_all_prop || (pb != nullptr && !pb->IsText())) { - // Set the decision to definitely proportional. - pitch_sd = textord_words_def_prop * row->fixed_pitch; - row->pitch_decision = PITCH_DEF_PROP; - } else { - pitch_sd = tune_row_pitch (row, &row->projection, row->projection_left, - row->projection_right, - (row->fixed_pitch + non_space * 3) / 4, - row->fixed_pitch, sp_sd, mid_cuts, - &row->char_cells, - block_index == textord_debug_block); - if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch - && ((pitsync_linear_version & 3) < 3 - || ((pitsync_linear_version & 3) >= 3 && (row->used_dm_model - || sp_sd > 20 - || (pitch_sd == 0 && sp_sd > 10))))) { - if (pitch_sd < textord_words_def_fixed * row->fixed_pitch - && !row->all_caps - && ((pitsync_linear_version & 3) < 3 || sp_sd > 20)) - row->pitch_decision = PITCH_DEF_FIXED; - else - row->pitch_decision = PITCH_MAYBE_FIXED; - } - else if ((pitsync_linear_version & 3) < 3 - || sp_sd > 20 - || mid_cuts > 0 - || pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) { - if (pitch_sd < textord_words_def_prop * row->fixed_pitch) - row->pitch_decision = PITCH_MAYBE_PROP; - else - row->pitch_decision = PITCH_DEF_PROP; - } - else - row->pitch_decision = PITCH_DUNNO; - } - - if (textord_debug_pitch_metric) { - res_string = "??"; - switch (row->pitch_decision) { - case PITCH_DEF_PROP: - res_string = "DP"; - break; - case PITCH_MAYBE_PROP: - res_string = "MP"; - break; - case PITCH_DEF_FIXED: - res_string = "DF"; - break; - case PITCH_MAYBE_FIXED: - res_string = "MF"; - break; - default: - res_string = "??"; - } - tprintf (":sd/p=%g:occ=%g:init_res=%s\n", - pitch_sd / row->fixed_pitch, sp_sd, res_string); - } - return true; -} - - -/********************************************************************** - * count_pitch_stats - * - * Count up the gap and pitch stats on the block to see if it is fixed pitch. - * Blobs with gaps smaller than the lower threshold are assumed to be one. - * The larger threshold is the word gap threshold. - * The return value indicates whether there were any decent values to use. - **********************************************************************/ - -bool count_pitch_stats( //find lines - TO_ROW* row, //row to do - STATS* gap_stats, //blob gaps - STATS* pitch_stats, //centre-centre stats - float initial_pitch, //guess at pitch - float min_space, //estimate space size - bool ignore_outsize, //discard big objects - bool split_outsize, //split big objects - int32_t dm_gap //ignorable gaps -) { - bool prev_valid; //not word broken - BLOBNBOX *blob; //current blob - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); - int32_t prev_right; //end of prev blob - int32_t prev_centre; //centre of previous blob - int32_t x_centre; //centre of this blob - int32_t blob_width; //width of blob - int32_t width_units; //no of widths in blob - float width; //blob width - TBOX blob_box; //bounding box - TBOX joined_box; //of super blob - - gap_stats->clear (); - pitch_stats->clear (); - if (blob_it.empty ()) - return false; - prev_valid = false; - prev_centre = 0; - prev_right = 0; // stop compiler warning - joined_box = blob_it.data ()->bounding_box (); - do { - blob_it.forward (); - blob = blob_it.data (); - if (!blob->joined_to_prev ()) { - blob_box = blob->bounding_box (); - if ((blob_box.left () - joined_box.right () < dm_gap - && !blob_it.at_first ()) - || blob->cblob() == nullptr) - joined_box += blob_box; //merge blobs - else { - blob_width = joined_box.width (); - if (split_outsize) { - width_units = - (int32_t) floor ((float) blob_width / initial_pitch + 0.5); - if (width_units < 1) - width_units = 1; - width_units--; - } - else if (ignore_outsize) { - width = (float) blob_width / initial_pitch; - width_units = width < 1 + words_default_fixed_limit - && width > 1 - words_default_fixed_limit ? 0 : -1; - } - else - width_units = 0; //everything in - x_centre = (int32_t) (joined_box.left () - + (blob_width - - width_units * initial_pitch) / 2); - if (prev_valid && width_units >= 0) { - // if (width_units>0) - // { - // tprintf("wu=%d, width=%d, xc=%d, adding %d\n", - // width_units,blob_width,x_centre,x_centre-prev_centre); - // } - gap_stats->add (joined_box.left () - prev_right, 1); - pitch_stats->add (x_centre - prev_centre, 1); - } - prev_centre = (int32_t) (x_centre + width_units * initial_pitch); - prev_right = joined_box.right (); - prev_valid = blob_box.left () - joined_box.right () < min_space; - prev_valid = prev_valid && width_units >= 0; - joined_box = blob_box; - } - } - } - while (!blob_it.at_first ()); - return gap_stats->get_total () >= 3; -} - - -/********************************************************************** - * tune_row_pitch - * - * Use a dp algorithm to fit the character cells and return the sd of - * the cell size over the row. - **********************************************************************/ - -float tune_row_pitch( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge of projection - int16_t projection_right, //edge of projection - float space_size, //size of blank - float& initial_pitch, //guess at pitch - float& best_sp_sd, //space sd - int16_t& best_mid_cuts, //no of cheap cuts - ICOORDELT_LIST* best_cells, //row cells - bool testing_on //inidividual words -) { - int pitch_delta; //offset pitch - int16_t mid_cuts; //cheap cuts - float pitch_sd; //current sd - float best_sd; //best result - float best_pitch; //pitch for best result - float initial_sd; //starting error - float sp_sd; //space sd - ICOORDELT_LIST test_cells; //row cells - ICOORDELT_IT best_it; //start of best list - - if (textord_fast_pitch_test) - return tune_row_pitch2 (row, projection, projection_left, - projection_right, space_size, initial_pitch, - best_sp_sd, - //space sd - best_mid_cuts, best_cells, testing_on); - if (textord_disable_pitch_test) { - best_sp_sd = initial_pitch; - return initial_pitch; - } - initial_sd = - compute_pitch_sd(row, - projection, - projection_left, - projection_right, - space_size, - initial_pitch, - best_sp_sd, - best_mid_cuts, - best_cells, - testing_on); - best_sd = initial_sd; - best_pitch = initial_pitch; - if (testing_on) - tprintf ("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd); - for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) { - pitch_sd = - compute_pitch_sd (row, projection, projection_left, projection_right, - space_size, initial_pitch + pitch_delta, sp_sd, - mid_cuts, &test_cells, testing_on); - if (testing_on) - tprintf ("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta, - pitch_sd); - if (pitch_sd < best_sd) { - best_sd = pitch_sd; - best_mid_cuts = mid_cuts; - best_sp_sd = sp_sd; - best_pitch = initial_pitch + pitch_delta; - best_cells->clear (); - best_it.set_to_list (best_cells); - best_it.add_list_after (&test_cells); - } - else - test_cells.clear (); - if (pitch_sd > initial_sd) - break; //getting worse - } - for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) { - pitch_sd = - compute_pitch_sd (row, projection, projection_left, projection_right, - space_size, initial_pitch - pitch_delta, sp_sd, - mid_cuts, &test_cells, testing_on); - if (testing_on) - tprintf ("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta, - pitch_sd); - if (pitch_sd < best_sd) { - best_sd = pitch_sd; - best_mid_cuts = mid_cuts; - best_sp_sd = sp_sd; - best_pitch = initial_pitch - pitch_delta; - best_cells->clear (); - best_it.set_to_list (best_cells); - best_it.add_list_after (&test_cells); - } - else - test_cells.clear (); - if (pitch_sd > initial_sd) - break; - } - initial_pitch = best_pitch; - - if (textord_debug_pitch_metric) - print_pitch_sd(row, - projection, - projection_left, - projection_right, - space_size, - best_pitch); - - return best_sd; -} - - -/********************************************************************** - * tune_row_pitch - * - * Use a dp algorithm to fit the character cells and return the sd of - * the cell size over the row. - **********************************************************************/ - -float tune_row_pitch2( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge of projection - int16_t projection_right, //edge of projection - float space_size, //size of blank - float& initial_pitch, //guess at pitch - float& best_sp_sd, //space sd - int16_t& best_mid_cuts, //no of cheap cuts - ICOORDELT_LIST* best_cells, //row cells - bool testing_on //inidividual words -) { - int pitch_delta; //offset pitch - int16_t pixel; //pixel coord - int16_t best_pixel; //pixel coord - int16_t best_delta; //best pitch - int16_t best_pitch; //best pitch - int16_t start; //of good range - int16_t end; //of good range - int32_t best_count; //lowest sum - float best_sd; //best result - - best_sp_sd = initial_pitch; - - best_pitch = static_cast(initial_pitch); - if (textord_disable_pitch_test || best_pitch <= textord_pitch_range) { - return initial_pitch; - } - std::unique_ptr sum_proj(new STATS[textord_pitch_range * 2 + 1]); //summed projection - - for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; - pitch_delta++) - sum_proj[textord_pitch_range + pitch_delta].set_range (0, - best_pitch + - pitch_delta + 1); - for (pixel = projection_left; pixel <= projection_right; pixel++) { - for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; - pitch_delta++) { - sum_proj[textord_pitch_range + pitch_delta].add( - (pixel - projection_left) % (best_pitch + pitch_delta), - projection->pile_count(pixel)); - } - } - best_count = sum_proj[textord_pitch_range].pile_count (0); - best_delta = 0; - best_pixel = 0; - for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; - pitch_delta++) { - for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) { - if (sum_proj[textord_pitch_range + pitch_delta].pile_count (pixel) - < best_count) { - best_count = - sum_proj[textord_pitch_range + - pitch_delta].pile_count (pixel); - best_delta = pitch_delta; - best_pixel = pixel; - } - } - } - if (testing_on) - tprintf ("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n", - initial_pitch, best_delta, best_count); - best_pitch += best_delta; - initial_pitch = best_pitch; - best_count++; - best_count += best_count; - for (start = best_pixel - 2; start > best_pixel - best_pitch - && sum_proj[textord_pitch_range + - best_delta].pile_count (start % best_pitch) <= best_count; - start--); - for (end = best_pixel + 2; - end < best_pixel + best_pitch - && sum_proj[textord_pitch_range + - best_delta].pile_count (end % best_pitch) <= best_count; - end++); - - best_sd = - compute_pitch_sd(row, - projection, - projection_left, - projection_right, - space_size, - initial_pitch, - best_sp_sd, - best_mid_cuts, - best_cells, - testing_on, - start, - end); - if (testing_on) - tprintf ("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch, - best_sd); - - if (textord_debug_pitch_metric) - print_pitch_sd(row, - projection, - projection_left, - projection_right, - space_size, - initial_pitch); - - return best_sd; -} - - -/********************************************************************** - * compute_pitch_sd - * - * Use a dp algorithm to fit the character cells and return the sd of - * the cell size over the row. - **********************************************************************/ - -float compute_pitch_sd( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge - int16_t projection_right, //edge - float space_size, //size of blank - float initial_pitch, //guess at pitch - float& sp_sd, //space sd - int16_t& mid_cuts, //no of free cuts - ICOORDELT_LIST* row_cells, //list of chop pts - bool testing_on, //inidividual words - int16_t start, //start of good range - int16_t end //end of good range -) { - int16_t occupation; //no of cells in word. - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); - BLOBNBOX_IT start_it; //start of word - BLOBNBOX_IT plot_it; //for plotting - int16_t blob_count; //no of blobs - TBOX blob_box; //bounding box - TBOX prev_box; //of super blob - int32_t prev_right; //of word sync - int scale_factor; //on scores for big words - int32_t sp_count; //spaces - FPSEGPT_LIST seg_list; //char cells - FPSEGPT_IT seg_it; //iterator - int16_t segpos; //position of segment - int16_t cellpos; //previous cell boundary - //iterator - ICOORDELT_IT cell_it = row_cells; - ICOORDELT *cell; //new cell - double sqsum; //sum of squares - double spsum; //of spaces - double sp_var; //space error - double word_sync; //result for word - int32_t total_count; //total blobs - - if ((pitsync_linear_version & 3) > 1) { - word_sync = compute_pitch_sd2 (row, projection, projection_left, - projection_right, initial_pitch, - occupation, mid_cuts, row_cells, - testing_on, start, end); - sp_sd = occupation; - return word_sync; - } - mid_cuts = 0; - cellpos = 0; - total_count = 0; - sqsum = 0; - sp_count = 0; - spsum = 0; - prev_right = -1; - if (blob_it.empty ()) - return space_size * 10; -#ifndef GRAPHICS_DISABLED - if (testing_on && to_win != nullptr) { - blob_box = blob_it.data ()->bounding_box (); - projection->plot (to_win, projection_left, - row->intercept (), 1.0f, -1.0f, ScrollView::CORAL); - } -#endif - start_it = blob_it; - blob_count = 0; - blob_box = box_next (&blob_it);//first blob - blob_it.mark_cycle_pt (); - do { - for (; blob_count > 0; blob_count--) - box_next(&start_it); - do { - prev_box = blob_box; - blob_count++; - blob_box = box_next (&blob_it); - } - while (!blob_it.cycled_list () - && blob_box.left () - prev_box.right () < space_size); - plot_it = start_it; - if (pitsync_linear_version & 3) - word_sync = - check_pitch_sync2 (&start_it, blob_count, (int16_t) initial_pitch, 2, - projection, projection_left, projection_right, - row->xheight * textord_projection_scale, - occupation, &seg_list, start, end); - else - word_sync = - check_pitch_sync (&start_it, blob_count, (int16_t) initial_pitch, 2, - projection, &seg_list); - if (testing_on) { - tprintf ("Word ending at (%d,%d), len=%d, sync rating=%g, ", - prev_box.right (), prev_box.top (), - seg_list.length () - 1, word_sync); - seg_it.set_to_list (&seg_list); - for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); - seg_it.forward ()) { - if (seg_it.data ()->faked) - tprintf ("(F)"); - tprintf ("%d, ", seg_it.data ()->position ()); - // tprintf("C=%g, s=%g, sq=%g\n", - // seg_it.data()->cost_function(), - // seg_it.data()->sum(), - // seg_it.data()->squares()); - } - tprintf ("\n"); - } -#ifndef GRAPHICS_DISABLED - if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr) - plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list); -#endif - seg_it.set_to_list (&seg_list); - if (prev_right >= 0) { - sp_var = seg_it.data ()->position () - prev_right; - sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch; - sp_var *= sp_var; - spsum += sp_var; - sp_count++; - } - for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { - segpos = seg_it.data ()->position (); - if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) { - //big gap - while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) { - cell = new ICOORDELT (cellpos + (int16_t) initial_pitch, 0); - cell_it.add_after_then_move (cell); - cellpos += (int16_t) initial_pitch; - } - //make new one - cell = new ICOORDELT (segpos, 0); - cell_it.add_after_then_move (cell); - cellpos = segpos; - } - else if (segpos > cellpos - initial_pitch / 2) { - cell = cell_it.data (); - //average positions - cell->set_x ((cellpos + segpos) / 2); - cellpos = cell->x (); - } - } - seg_it.move_to_last (); - prev_right = seg_it.data ()->position (); - if (textord_pitch_scalebigwords) { - scale_factor = (seg_list.length () - 2) / 2; - if (scale_factor < 1) - scale_factor = 1; - } - else - scale_factor = 1; - sqsum += word_sync * scale_factor; - total_count += (seg_list.length () - 1) * scale_factor; - seg_list.clear (); - } - while (!blob_it.cycled_list ()); - sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0; - return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10; -} - - -/********************************************************************** - * compute_pitch_sd2 - * - * Use a dp algorithm to fit the character cells and return the sd of - * the cell size over the row. - **********************************************************************/ - -float compute_pitch_sd2( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge - int16_t projection_right, //edge - float initial_pitch, //guess at pitch - int16_t& occupation, //no of occupied cells - int16_t& mid_cuts, //no of free cuts - ICOORDELT_LIST* row_cells, //list of chop pts - bool testing_on, //inidividual words - int16_t start, //start of good range - int16_t end //end of good range -) { - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); - BLOBNBOX_IT plot_it; - int16_t blob_count; //no of blobs - TBOX blob_box; //bounding box - FPSEGPT_LIST seg_list; //char cells - FPSEGPT_IT seg_it; //iterator - int16_t segpos; //position of segment - //iterator - ICOORDELT_IT cell_it = row_cells; - ICOORDELT *cell; //new cell - double word_sync; //result for word - - mid_cuts = 0; - if (blob_it.empty ()) { - occupation = 0; - return initial_pitch * 10; - } -#ifndef GRAPHICS_DISABLED - if (testing_on && to_win != nullptr) { - projection->plot (to_win, projection_left, - row->intercept (), 1.0f, -1.0f, ScrollView::CORAL); - } -#endif - blob_count = 0; - blob_it.mark_cycle_pt (); - do { - //first blob - blob_box = box_next (&blob_it); - blob_count++; - } - while (!blob_it.cycled_list ()); - plot_it = blob_it; - word_sync = check_pitch_sync2 (&blob_it, blob_count, (int16_t) initial_pitch, - 2, projection, projection_left, - projection_right, - row->xheight * textord_projection_scale, - occupation, &seg_list, start, end); - if (testing_on) { - tprintf ("Row ending at (%d,%d), len=%d, sync rating=%g, ", - blob_box.right (), blob_box.top (), - seg_list.length () - 1, word_sync); - seg_it.set_to_list (&seg_list); - for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { - if (seg_it.data ()->faked) - tprintf ("(F)"); - tprintf ("%d, ", seg_it.data ()->position ()); - // tprintf("C=%g, s=%g, sq=%g\n", - // seg_it.data()->cost_function(), - // seg_it.data()->sum(), - // seg_it.data()->squares()); - } - tprintf ("\n"); - } -#ifndef GRAPHICS_DISABLED - if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr) - plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list); -#endif - seg_it.set_to_list (&seg_list); - for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { - segpos = seg_it.data ()->position (); - //make new one - cell = new ICOORDELT (segpos, 0); - cell_it.add_after_then_move (cell); - if (seg_it.at_last ()) - mid_cuts = seg_it.data ()->cheap_cuts (); - } - seg_list.clear (); - return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10; -} - - -/********************************************************************** - * print_pitch_sd - * - * Use a dp algorithm to fit the character cells and return the sd of - * the cell size over the row. - **********************************************************************/ - -void print_pitch_sd( //find fp cells - TO_ROW *row, //row to do - STATS *projection, //vertical projection - int16_t projection_left, //edges //size of blank - int16_t projection_right, - float space_size, - float initial_pitch //guess at pitch - ) { - const char *res2; //pitch result - int16_t occupation; //used cells - float sp_sd; //space sd - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); - BLOBNBOX_IT start_it; //start of word - BLOBNBOX_IT row_start; //start of row - int16_t blob_count; //no of blobs - int16_t total_blob_count; //total blobs in line - TBOX blob_box; //bounding box - TBOX prev_box; //of super blob - int32_t prev_right; //of word sync - int scale_factor; //on scores for big words - int32_t sp_count; //spaces - FPSEGPT_LIST seg_list; //char cells - FPSEGPT_IT seg_it; //iterator - double sqsum; //sum of squares - double spsum; //of spaces - double sp_var; //space error - double word_sync; //result for word - double total_count; //total cuts - - if (blob_it.empty ()) - return; - row_start = blob_it; - total_blob_count = 0; - - total_count = 0; - sqsum = 0; - sp_count = 0; - spsum = 0; - prev_right = -1; - blob_it = row_start; - start_it = blob_it; - blob_count = 0; - blob_box = box_next (&blob_it);//first blob - blob_it.mark_cycle_pt (); - do { - for (; blob_count > 0; blob_count--) - box_next(&start_it); - do { - prev_box = blob_box; - blob_count++; - blob_box = box_next (&blob_it); - } - while (!blob_it.cycled_list () - && blob_box.left () - prev_box.right () < space_size); - word_sync = - check_pitch_sync2 (&start_it, blob_count, (int16_t) initial_pitch, 2, - projection, projection_left, projection_right, - row->xheight * textord_projection_scale, - occupation, &seg_list, 0, 0); - total_blob_count += blob_count; - seg_it.set_to_list (&seg_list); - if (prev_right >= 0) { - sp_var = seg_it.data ()->position () - prev_right; - sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch; - sp_var *= sp_var; - spsum += sp_var; - sp_count++; - } - seg_it.move_to_last (); - prev_right = seg_it.data ()->position (); - if (textord_pitch_scalebigwords) { - scale_factor = (seg_list.length () - 2) / 2; - if (scale_factor < 1) - scale_factor = 1; - } - else - scale_factor = 1; - sqsum += word_sync * scale_factor; - total_count += (seg_list.length () - 1) * scale_factor; - seg_list.clear (); - } - while (!blob_it.cycled_list ()); - sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0; - word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10; - tprintf ("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:", - word_sync, word_sync / initial_pitch, sp_sd, - word_sync < textord_words_pitchsd_threshold * initial_pitch - ? 'F' : 'P'); - - start_it = row_start; - blob_it = row_start; - word_sync = - check_pitch_sync2 (&blob_it, total_blob_count, (int16_t) initial_pitch, 2, - projection, projection_left, projection_right, - row->xheight * textord_projection_scale, occupation, - &seg_list, 0, 0); - if (occupation > 1) - word_sync /= occupation; - word_sync = sqrt (word_sync); - -#ifndef GRAPHICS_DISABLED - if (textord_show_row_cuts && to_win != nullptr) - plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list); -#endif - seg_list.clear (); - if (word_sync < textord_words_pitchsd_threshold * initial_pitch) { - if (word_sync < textord_words_def_fixed * initial_pitch - && !row->all_caps) - res2 = "DF"; - else - res2 = "MF"; - } - else - res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP"; - tprintf - ("row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n", - word_sync, word_sync / initial_pitch, - word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P', - occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps); -} - -/********************************************************************** - * find_repeated_chars - * - * Extract marked leader blobs and put them - * into words in advance of fixed pitch checking and word generation. - **********************************************************************/ -void find_repeated_chars(TO_BLOCK* block, // Block to search. - bool testing_on) { // Debug mode. - POLY_BLOCK* pb = block->block->pdblk.poly_block(); - if (pb != nullptr && !pb->IsText()) - return; // Don't find repeated chars in non-text blocks. - - TO_ROW *row; - BLOBNBOX_IT box_it; - BLOBNBOX_IT search_it; // forward search - WERD *word; // new word - TBOX word_box; // for plotting - int blobcount, repeated_set; - - TO_ROW_IT row_it = block->get_rows(); - if (row_it.empty()) return; // empty block - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - row = row_it.data(); - box_it.set_to_list(row->blob_list()); - if (box_it.empty()) continue; // no blobs in this row - if (!row->rep_chars_marked()) { - mark_repeated_chars(row); - } - if (row->num_repeated_sets() == 0) continue; // nothing to do for this row - // new words - WERD_IT word_it(&row->rep_words); - do { - if (box_it.data()->repeated_set() != 0 && - !box_it.data()->joined_to_prev()) { - blobcount = 1; - repeated_set = box_it.data()->repeated_set(); - search_it = box_it; - search_it.forward(); - while (!search_it.at_first() && - search_it.data()->repeated_set() == repeated_set) { - blobcount++; - search_it.forward(); - } - // After the call to make_real_word() all the blobs from this - // repeated set will be removed from the blob list. box_it will be - // set to point to the blob after the end of the extracted sequence. - word = make_real_word(&box_it, blobcount, box_it.at_first(), 1); - if (!box_it.empty() && box_it.data()->joined_to_prev()) { - tprintf("Bad box joined to prev at"); - box_it.data()->bounding_box().print(); - tprintf("After repeated word:"); - word->bounding_box().print(); - } - ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev()); - word->set_flag(W_REP_CHAR, true); - word->set_flag(W_DONT_CHOP, true); - word_it.add_after_then_move(word); - } else { - box_it.forward(); - } - } while (!box_it.at_first()); - } -} - - -/********************************************************************** - * plot_fp_word - * - * Plot a block of words as if fixed pitch. - **********************************************************************/ - -#ifndef GRAPHICS_DISABLED -void plot_fp_word( //draw block of words - TO_BLOCK *block, //block to draw - float pitch, //pitch to draw with - float nonspace //for space threshold - ) { - TO_ROW *row; //current row - TO_ROW_IT row_it = block->get_rows (); - - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - row->min_space = (int32_t) ((pitch + nonspace) / 2); - row->max_nonspace = row->min_space; - row->space_threshold = row->min_space; - plot_word_decisions (to_win, (int16_t) pitch, row); - } -} -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/topitch.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/topitch.h deleted file mode 100644 index 0c5c3970..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/topitch.h +++ /dev/null @@ -1,187 +0,0 @@ -/********************************************************************** - * File: topitch.h (Formerly to_pitch.h) - * Description: Code to determine fixed pitchness and the pitch if fixed. - * Author: Ray Smith - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TOPITCH_H -#define TOPITCH_H - -#include "blobbox.h" - -namespace tesseract { -class Tesseract; -} -extern BOOL_VAR_H (textord_debug_pitch_test, FALSE, -"Debug on fixed pitch test"); -extern BOOL_VAR_H (textord_debug_pitch_metric, FALSE, -"Write full metric stuff"); -extern BOOL_VAR_H (textord_show_row_cuts, FALSE, "Draw row-level cuts"); -extern BOOL_VAR_H (textord_show_page_cuts, FALSE, "Draw page-level cuts"); -extern BOOL_VAR_H (textord_pitch_cheat, FALSE, -"Use correct answer for fixed/prop"); -extern BOOL_VAR_H (textord_blockndoc_fixed, TRUE, -"Attempt whole doc/block fixed pitch"); -extern BOOL_VAR_H (textord_fast_pitch_test, FALSE, -"Do even faster pitch algorithm"); -extern double_VAR_H (textord_projection_scale, 0.125, -"Ding rate for mid-cuts"); -extern double_VAR_H (textord_balance_factor, 2.0, -"Ding rate for unbalanced char cells"); - -void compute_fixed_pitch(ICOORD page_tr, // top right - TO_BLOCK_LIST* port_blocks, // input list - float gradient, // page skew - FCOORD rotation, // for drawing - bool testing_on); // correct orientation -void fix_row_pitch( //get some value - TO_ROW *bad_row, //row to fix - TO_BLOCK *bad_block, //block of bad_row - TO_BLOCK_LIST *blocks, //blocks to scan - int32_t row_target, //number of row - int32_t block_target //number of block - ); -void compute_block_pitch(TO_BLOCK* block, // input list - FCOORD rotation, // for drawing - int32_t block_index, // block number - bool testing_on); // correct orientation -bool compute_rows_pitch( //find line stats - TO_BLOCK* block, //block to do - int32_t block_index, //block number - bool testing_on //correct orientation -); -bool try_doc_fixed( //determine pitch - ICOORD page_tr, //top right - TO_BLOCK_LIST* port_blocks, //input list - float gradient //page skew -); -bool try_block_fixed( //find line stats - TO_BLOCK* block, //block to do - int32_t block_index //block number -); -bool try_rows_fixed( //find line stats - TO_BLOCK* block, //block to do - int32_t block_index, //block number - bool testing_on //correct orientation -); -void print_block_counts( //find line stats - TO_BLOCK *block, //block to do - int32_t block_index //block number - ); -void count_block_votes( //find line stats - TO_BLOCK *block, //block to do - int32_t &def_fixed, //add to counts - int32_t &def_prop, - int32_t &maybe_fixed, - int32_t &maybe_prop, - int32_t &corr_fixed, - int32_t &corr_prop, - int32_t &dunno); -bool row_pitch_stats( //find line stats - TO_ROW* row, //current row - int32_t maxwidth, //of spaces - bool testing_on //correct orientation -); -bool find_row_pitch( //find lines - TO_ROW* row, //row to do - int32_t maxwidth, //max permitted space - int32_t dm_gap, //ignorable gaps - TO_BLOCK* block, //block of row - int32_t block_index, //block_number - int32_t row_index, //number of row - bool testing_on //correct orientation -); -bool fixed_pitch_row( //find lines - TO_ROW* row, //row to do - BLOCK* block, - int32_t block_index //block_number -); -bool count_pitch_stats( //find lines - TO_ROW* row, //row to do - STATS* gap_stats, //blob gaps - STATS* pitch_stats, //centre-centre stats - float initial_pitch, //guess at pitch - float min_space, //estimate space size - bool ignore_outsize, //discard big objects - bool split_outsize, //split big objects - int32_t dm_gap //ignorable gaps -); -float tune_row_pitch( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge of projection - int16_t projection_right, //edge of projection - float space_size, //size of blank - float& initial_pitch, //guess at pitch - float& best_sp_sd, //space sd - int16_t& best_mid_cuts, //no of cheap cuts - ICOORDELT_LIST* best_cells, //row cells - bool testing_on //inidividual words -); -float tune_row_pitch2( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge of projection - int16_t projection_right, //edge of projection - float space_size, //size of blank - float& initial_pitch, //guess at pitch - float& best_sp_sd, //space sd - int16_t& best_mid_cuts, //no of cheap cuts - ICOORDELT_LIST* best_cells, //row cells - bool testing_on //inidividual words -); -float compute_pitch_sd( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge - int16_t projection_right, //edge - float space_size, //size of blank - float initial_pitch, //guess at pitch - float& sp_sd, //space sd - int16_t& mid_cuts, //no of free cuts - ICOORDELT_LIST* row_cells, //list of chop pts - bool testing_on, //inidividual words - int16_t start = 0, //start of good range - int16_t end = 0 //end of good range -); -float compute_pitch_sd2( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge - int16_t projection_right, //edge - float initial_pitch, //guess at pitch - int16_t& occupation, //no of occupied cells - int16_t& mid_cuts, //no of free cuts - ICOORDELT_LIST* row_cells, //list of chop pts - bool testing_on, //inidividual words - int16_t start = 0, //start of good range - int16_t end = 0 //end of good range -); -void print_pitch_sd( //find fp cells - TO_ROW *row, //row to do - STATS *projection, //vertical projection - int16_t projection_left, //edges //size of blank - int16_t projection_right, - float space_size, - float initial_pitch //guess at pitch - ); -void find_repeated_chars(TO_BLOCK* block, // Block to search. - bool testing_on); // Debug mode. -void plot_fp_word( //draw block of words - TO_BLOCK *block, //block to draw - float pitch, //pitch to draw with - float nonspace //for space threshold - ); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tordmain.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tordmain.cpp deleted file mode 100644 index 820bb4bb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tordmain.cpp +++ /dev/null @@ -1,994 +0,0 @@ -/********************************************************************** - * File: tordmain.cpp (Formerly textordp.c) - * Description: C++ top level textord code. - * Author: Ray Smith - * Created: Tue Jul 28 17:12:33 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "tordmain.h" -#include // for FLT_MAX -#include // for ceil, floor, M_PI -#include // for INT16_MAX, uint32_t, int32_t, int16_t -#include "allheaders.h" // for pixDestroy, pixGetHeight, boxCreate -#include "arrayaccess.h" // for GET_DATA_BYTE -#include "blobbox.h" // for BLOBNBOX_IT, BLOBNBOX, TO_BLOCK, TO_B... -#include "ccstruct.h" // for CCStruct, CCStruct::kXHeightFraction -#include "clst.h" // for CLISTIZE -#include "coutln.h" // for C_OUTLINE_IT, C_OUTLINE_LIST, C_OUTLINE -#include "drawtord.h" // for plot_box_list, to_win, create_to_win -#include "edgblob.h" // for extract_edges -#include "errcode.h" // for set_global_loc_code, ASSERT_HOST, LOC... -#include "genericvector.h" // for PointerVector, GenericVector -#include "makerow.h" // for textord_test_x, textord_test_y, texto... -#include "morph.h" // for L_BOUNDARY_BG -#include "ocrblock.h" // for BLOCK_IT, BLOCK, BLOCK_LIST (ptr only) -#include "ocrrow.h" // for ROW, ROW_IT, ROW_LIST, tweak_row_base... -#include "params.h" // for DoubleParam, BoolParam, IntParam -#include "pdblock.h" // for PDBLK -#include "points.h" // for FCOORD, ICOORD -#include "polyblk.h" // for POLY_BLOCK -#include "quadratc.h" // for QUAD_COEFFS -#include "quspline.h" // for QSPLINE, tweak_row_baseline -#include "rect.h" // for TBOX -#include "scrollview.h" // for ScrollView, ScrollView::WHITE -#include "statistc.h" // for STATS -#include "stepblob.h" // for C_BLOB_IT, C_BLOB, C_BLOB_LIST -#include "textord.h" // for Textord, WordWithBox, WordGrid, WordS... -#include "tprintf.h" // for tprintf -#include "werd.h" // for WERD_IT, WERD, WERD_LIST, W_DONT_CHOP - -struct Box; - -#define MAX_NEAREST_DIST 600 //for block skew stats - -namespace tesseract { - -CLISTIZE(WordWithBox) - -/********************************************************************** - * SetBlobStrokeWidth - * - * Set the horizontal and vertical stroke widths in the blob. - **********************************************************************/ -void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob) { - // Cut the blob rectangle into a Pix. - int pix_height = pixGetHeight(pix); - const TBOX& box = blob->bounding_box(); - int width = box.width(); - int height = box.height(); - Box* blob_pix_box = boxCreate(box.left(), pix_height - box.top(), - width, height); - Pix* pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr); - boxDestroy(&blob_pix_box); - Pix* dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG); - pixDestroy(&pix_blob); - // Compute the stroke widths. - uint32_t* data = pixGetData(dist_pix); - int wpl = pixGetWpl(dist_pix); - // Horizontal width of stroke. - STATS h_stats(0, width + 1); - for (int y = 0; y < height; ++y) { - uint32_t* pixels = data + y*wpl; - int prev_pixel = 0; - int pixel = GET_DATA_BYTE(pixels, 0); - for (int x = 1; x < width; ++x) { - int next_pixel = GET_DATA_BYTE(pixels, x); - // We are looking for a pixel that is equal to its vertical neighbours, - // yet greater than its left neighbour. - if (prev_pixel < pixel && - (y == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) && - (y == height - 1 || pixel == GET_DATA_BYTE(pixels + wpl, x - 1))) { - if (pixel > next_pixel) { - // Single local max, so an odd width. - h_stats.add(pixel * 2 - 1, 1); - } else if (pixel == next_pixel && x + 1 < width && - pixel > GET_DATA_BYTE(pixels, x + 1)) { - // Double local max, so an even width. - h_stats.add(pixel * 2, 1); - } - } - prev_pixel = pixel; - pixel = next_pixel; - } - } - // Vertical width of stroke. - STATS v_stats(0, height + 1); - for (int x = 0; x < width; ++x) { - int prev_pixel = 0; - int pixel = GET_DATA_BYTE(data, x); - for (int y = 1; y < height; ++y) { - uint32_t* pixels = data + y*wpl; - int next_pixel = GET_DATA_BYTE(pixels, x); - // We are looking for a pixel that is equal to its horizontal neighbours, - // yet greater than its upper neighbour. - if (prev_pixel < pixel && - (x == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) && - (x == width - 1 || pixel == GET_DATA_BYTE(pixels - wpl, x + 1))) { - if (pixel > next_pixel) { - // Single local max, so an odd width. - v_stats.add(pixel * 2 - 1, 1); - } else if (pixel == next_pixel && y + 1 < height && - pixel > GET_DATA_BYTE(pixels + wpl, x)) { - // Double local max, so an even width. - v_stats.add(pixel * 2, 1); - } - } - prev_pixel = pixel; - pixel = next_pixel; - } - } - pixDestroy(&dist_pix); - // Store the horizontal and vertical width in the blob, keeping both - // widths if there is enough information, otherwse only the one with - // the most samples. - // If there are insufficient samples, store zero, rather than using - // 2*area/perimeter, as the numbers that gives do not match the numbers - // from the distance method. - if (h_stats.get_total() >= (width + height) / 4) { - blob->set_horz_stroke_width(h_stats.ile(0.5f)); - if (v_stats.get_total() >= (width + height) / 4) - blob->set_vert_stroke_width(v_stats.ile(0.5f)); - else - blob->set_vert_stroke_width(0.0f); - } else { - if (v_stats.get_total() >= (width + height) / 4 || - v_stats.get_total() > h_stats.get_total()) { - blob->set_horz_stroke_width(0.0f); - blob->set_vert_stroke_width(v_stats.ile(0.5f)); - } else { - blob->set_horz_stroke_width(h_stats.get_total() > 2 ? h_stats.ile(0.5f) - : 0.0f); - blob->set_vert_stroke_width(0.0f); - } - } -} - -/********************************************************************** - * assign_blobs_to_blocks2 - * - * Make a list of TO_BLOCKs for portrait and landscape orientation. - **********************************************************************/ - -void assign_blobs_to_blocks2(Pix* pix, - BLOCK_LIST *blocks, // blocks to process - TO_BLOCK_LIST *port_blocks) { // output list - BLOCK *block; // current block - BLOBNBOX *newblob; // created blob - C_BLOB *blob; // current blob - BLOCK_IT block_it = blocks; - C_BLOB_IT blob_it; // iterator - BLOBNBOX_IT port_box_it; // iterator - // destination iterator - TO_BLOCK_IT port_block_it = port_blocks; - TO_BLOCK *port_block; // created block - - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { - block = block_it.data(); - port_block = new TO_BLOCK(block); - - // Convert the good outlines to block->blob_list - port_box_it.set_to_list(&port_block->blobs); - blob_it.set_to_list(block->blob_list()); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - blob = blob_it.extract(); - newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX. - SetBlobStrokeWidth(pix, newblob); - port_box_it.add_after_then_move(newblob); - } - - // Put the rejected outlines in block->noise_blobs, which allows them to - // be reconsidered and sorted back into rows and recover outlines mistakenly - // rejected. - port_box_it.set_to_list(&port_block->noise_blobs); - blob_it.set_to_list(block->reject_blobs()); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - blob = blob_it.extract(); - newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX. - SetBlobStrokeWidth(pix, newblob); - port_box_it.add_after_then_move(newblob); - } - - port_block_it.add_after_then_move(port_block); - } -} - -/********************************************************************** - * find_components - * - * Find the C_OUTLINEs of the connected components in each block, put them - * in C_BLOBs, and filter them by size, putting the different size - * grades on different lists in the matching TO_BLOCK in to_blocks. - **********************************************************************/ - -void Textord::find_components(Pix* pix, BLOCK_LIST *blocks, - TO_BLOCK_LIST *to_blocks) { - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - if (width > INT16_MAX || height > INT16_MAX) { - tprintf("Input image too large! (%d, %d)\n", width, height); - return; // Can't handle it. - } - - set_global_loc_code(LOC_EDGE_PROG); - - BLOCK_IT block_it(blocks); // iterator - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { - BLOCK* block = block_it.data(); - if (block->pdblk.poly_block() == nullptr || block->pdblk.poly_block()->IsText()) { - extract_edges(pix, block); - } - } - - assign_blobs_to_blocks2(pix, blocks, to_blocks); - ICOORD page_tr(width, height); - filter_blobs(page_tr, to_blocks, !textord_test_landscape); -} - -/********************************************************************** - * filter_blobs - * - * Sort the blobs into sizes in all the blocks for later work. - **********************************************************************/ - -void Textord::filter_blobs(ICOORD page_tr, // top right - TO_BLOCK_LIST* blocks, // output list - bool testing_on) { // for plotting - TO_BLOCK_IT block_it = blocks; // destination iterator - TO_BLOCK *block; // created block - - #ifndef GRAPHICS_DISABLED - if (to_win != nullptr) - to_win->Clear(); - #endif // GRAPHICS_DISABLED - - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { - block = block_it.data(); - block->line_size = filter_noise_blobs(&block->blobs, - &block->noise_blobs, - &block->small_blobs, - &block->large_blobs); - if (block->line_size == 0) block->line_size = 1; - block->line_spacing = block->line_size * - (tesseract::CCStruct::kDescenderFraction + - tesseract::CCStruct::kXHeightFraction + - 2 * tesseract::CCStruct::kAscenderFraction) / - tesseract::CCStruct::kXHeightFraction; - block->line_size *= textord_min_linesize; - block->max_blob_size = block->line_size * textord_excess_blobsize; - - #ifndef GRAPHICS_DISABLED - if (textord_show_blobs && testing_on) { - if (to_win == nullptr) - create_to_win(page_tr); - block->plot_graded_blobs(to_win); - } - if (textord_show_boxes && testing_on) { - if (to_win == nullptr) - create_to_win(page_tr); - plot_box_list(to_win, &block->noise_blobs, ScrollView::WHITE); - plot_box_list(to_win, &block->small_blobs, ScrollView::WHITE); - plot_box_list(to_win, &block->large_blobs, ScrollView::WHITE); - plot_box_list(to_win, &block->blobs, ScrollView::WHITE); - } - #endif // GRAPHICS_DISABLED - } -} - -/********************************************************************** - * filter_noise_blobs - * - * Move small blobs to a separate list. - **********************************************************************/ - -float Textord::filter_noise_blobs( - BLOBNBOX_LIST *src_list, // original list - BLOBNBOX_LIST *noise_list, // noise list - BLOBNBOX_LIST *small_list, // small blobs - BLOBNBOX_LIST *large_list) { // large blobs - int16_t height; //height of blob - int16_t width; //of blob - BLOBNBOX *blob; //current blob - float initial_x; //first guess - BLOBNBOX_IT src_it = src_list; //iterators - BLOBNBOX_IT noise_it = noise_list; - BLOBNBOX_IT small_it = small_list; - BLOBNBOX_IT large_it = large_list; - STATS size_stats (0, MAX_NEAREST_DIST); - //blob heights - float min_y; //size limits - float max_y; - float max_x; - float max_height; //of good blobs - - for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { - blob = src_it.data(); - if (blob->bounding_box().height() < textord_max_noise_size) - noise_it.add_after_then_move(src_it.extract()); - else if (blob->enclosed_area() >= blob->bounding_box().height() - * blob->bounding_box().width() * textord_noise_area_ratio) - small_it.add_after_then_move(src_it.extract()); - } - for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { - size_stats.add(src_it.data()->bounding_box().height(), 1); - } - initial_x = size_stats.ile(textord_initialx_ile); - max_y = ceil(initial_x * - (tesseract::CCStruct::kDescenderFraction + - tesseract::CCStruct::kXHeightFraction + - 2 * tesseract::CCStruct::kAscenderFraction) / - tesseract::CCStruct::kXHeightFraction); - min_y = floor (initial_x / 2); - max_x = ceil (initial_x * textord_width_limit); - small_it.move_to_first (); - for (small_it.mark_cycle_pt (); !small_it.cycled_list (); - small_it.forward ()) { - height = small_it.data()->bounding_box().height(); - if (height > max_y) - large_it.add_after_then_move(small_it.extract ()); - else if (height >= min_y) - src_it.add_after_then_move(small_it.extract ()); - } - size_stats.clear (); - for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) { - height = src_it.data ()->bounding_box ().height (); - width = src_it.data ()->bounding_box ().width (); - if (height < min_y) - small_it.add_after_then_move (src_it.extract ()); - else if (height > max_y || width > max_x) - large_it.add_after_then_move (src_it.extract ()); - else - size_stats.add (height, 1); - } - max_height = size_stats.ile (textord_initialasc_ile); - // tprintf("max_y=%g, min_y=%g, initial_x=%g, max_height=%g,", - // max_y,min_y,initial_x,max_height); - max_height *= tesseract::CCStruct::kXHeightCapRatio; - if (max_height > initial_x) - initial_x = max_height; - // tprintf(" ret=%g\n",initial_x); - return initial_x; -} - -// Fixes the block so it obeys all the rules: -// Must have at least one ROW. -// Must have at least one WERD. -// WERDs contain a fake blob. -void Textord::cleanup_nontext_block(BLOCK* block) { - // Non-text blocks must contain at least one row. - ROW_IT row_it(block->row_list()); - if (row_it.empty()) { - const TBOX& box = block->pdblk.bounding_box(); - float height = box.height(); - int32_t xstarts[2] = {box.left(), box.right()}; - double coeffs[3] = {0.0, 0.0, static_cast(box.bottom())}; - ROW* row = new ROW(1, xstarts, coeffs, height / 2.0f, height / 4.0f, - height / 4.0f, 0, 1); - row_it.add_after_then_move(row); - } - // Each row must contain at least one word. - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - ROW* row = row_it.data(); - WERD_IT w_it(row->word_list()); - if (w_it.empty()) { - // Make a fake blob to put in the word. - TBOX box = block->row_list()->singleton() ? block->pdblk.bounding_box() - : row->bounding_box(); - C_BLOB* blob = C_BLOB::FakeBlob(box); - C_BLOB_LIST blobs; - C_BLOB_IT blob_it(&blobs); - blob_it.add_after_then_move(blob); - WERD* word = new WERD(&blobs, 0, nullptr); - w_it.add_after_then_move(word); - } - // Each word must contain a fake blob. - for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { - WERD* word = w_it.data(); - // Just assert that this is true, as it would be useful to find - // out why it isn't. - ASSERT_HOST(!word->cblob_list()->empty()); - } - row->recalc_bounding_box(); - } -} - -/********************************************************************** - * cleanup_blocks - * - * Delete empty blocks, rows from the page. - **********************************************************************/ - -void Textord::cleanup_blocks(bool clean_noise, BLOCK_LIST* blocks) { - BLOCK_IT block_it = blocks; //iterator - ROW_IT row_it; //row iterator - - int num_rows = 0; - int num_rows_all = 0; - int num_blocks = 0; - int num_blocks_all = 0; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { - BLOCK* block = block_it.data(); - if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) { - cleanup_nontext_block(block); - continue; - } - num_rows = 0; - num_rows_all = 0; - if (clean_noise) { - row_it.set_to_list(block->row_list()); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - ROW* row = row_it.data(); - ++num_rows_all; - clean_small_noise_from_words(row); - if ((textord_noise_rejrows && !row->word_list()->empty() && - clean_noise_from_row(row)) || - row->word_list()->empty()) { - delete row_it.extract(); // lose empty row. - } else { - if (textord_noise_rejwords) - clean_noise_from_words(row_it.data()); - if (textord_blshift_maxshift >= 0) - tweak_row_baseline(row, textord_blshift_maxshift, - textord_blshift_xfraction); - ++num_rows; - } - } - } - if (block->row_list()->empty()) { - delete block_it.extract(); // Lose empty text blocks. - } else { - ++num_blocks; - } - ++num_blocks_all; - if (textord_noise_debug) - tprintf("cleanup_blocks: # rows = %d / %d\n", num_rows, num_rows_all); - } - if (textord_noise_debug) - tprintf("cleanup_blocks: # blocks = %d / %d\n", num_blocks, num_blocks_all); -} - - -/********************************************************************** - * clean_noise_from_row - * - * Move blobs of words from rows of garbage into the reject blobs list. - **********************************************************************/ - -bool Textord::clean_noise_from_row( //remove empties - ROW* row //row to clean -) { - bool testing_on; - TBOX blob_box; //bounding box - C_BLOB *blob; //current blob - C_OUTLINE *outline; //current outline - WERD *word; //current word - int32_t blob_size; //biggest size - int32_t trans_count = 0; //no of transitions - int32_t trans_threshold; //noise tolerance - int32_t dot_count; //small objects - int32_t norm_count; //normal objects - int32_t super_norm_count; //real char-like - //words of row - WERD_IT word_it = row->word_list (); - C_BLOB_IT blob_it; //blob iterator - C_OUTLINE_IT out_it; //outline iterator - - testing_on = textord_test_y > row->base_line (textord_test_x) - && textord_show_blobs - && textord_test_y < row->base_line (textord_test_x) + row->x_height (); - dot_count = 0; - norm_count = 0; - super_norm_count = 0; - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); //current word - //blobs in word - blob_it.set_to_list (word->cblob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - if (!word->flag (W_DONT_CHOP)) { - //get outlines - out_it.set_to_list (blob->out_list ()); - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); - out_it.forward ()) { - outline = out_it.data (); - blob_box = outline->bounding_box (); - blob_size = - blob_box.width () > - blob_box.height ()? blob_box.width () : blob_box. - height(); - if (blob_size < textord_noise_sizelimit * row->x_height ()) - dot_count++; //count smal outlines - if (!outline->child ()->empty () - && blob_box.height () < - (1 + textord_noise_syfract) * row->x_height () - && blob_box.height () > - (1 - textord_noise_syfract) * row->x_height () - && blob_box.width () < - (1 + textord_noise_sxfract) * row->x_height () - && blob_box.width () > - (1 - textord_noise_sxfract) * row->x_height ()) - super_norm_count++; //count smal outlines - } - } - else - super_norm_count++; - blob_box = blob->bounding_box (); - blob_size = - blob_box.width () > - blob_box.height ()? blob_box.width () : blob_box.height (); - if (blob_size >= textord_noise_sizelimit * row->x_height () - && blob_size < row->x_height () * 2) { - trans_threshold = blob_size / textord_noise_sizefraction; - trans_count = blob->count_transitions (trans_threshold); - if (trans_count < textord_noise_translimit) - norm_count++; - } - else if (blob_box.height () > row->x_height () * 2 - && (!word_it.at_first () || !blob_it.at_first ())) - dot_count += 2; - if (testing_on) { - tprintf - ("Blob at (%d,%d) -> (%d,%d), ols=%d, tc=%d, bldiff=%g\n", - blob_box.left (), blob_box.bottom (), blob_box.right (), - blob_box.top (), blob->out_list ()->length (), trans_count, - blob_box.bottom () - row->base_line (blob_box.left ())); - } - } - } - if (textord_noise_debug) { - tprintf ("Row ending at (%d,%g):", - blob_box.right (), row->base_line (blob_box.right ())); - tprintf (" R=%g, dc=%d, nc=%d, %s\n", - norm_count > 0 ? (float) dot_count / norm_count : 9999, - dot_count, norm_count, - dot_count > norm_count * textord_noise_normratio - && dot_count > 2 ? "REJECTED" : "ACCEPTED"); - } - return super_norm_count < textord_noise_sncount - && dot_count > norm_count * textord_noise_rowratio && dot_count > 2; -} - -/********************************************************************** - * clean_noise_from_words - * - * Move blobs of words from rows of garbage into the reject blobs list. - **********************************************************************/ - -void Textord::clean_noise_from_words( //remove empties - ROW *row //row to clean - ) { - TBOX blob_box; //bounding box - C_BLOB *blob; //current blob - C_OUTLINE *outline; //current outline - WERD *word; //current word - int32_t blob_size; //biggest size - int32_t trans_count; //no of transitions - int32_t trans_threshold; //noise tolerance - int32_t dot_count; //small objects - int32_t norm_count; //normal objects - int32_t dud_words; //number discarded - int32_t ok_words; //number remaining - int32_t word_index; //current word - //words of row - WERD_IT word_it = row->word_list (); - C_BLOB_IT blob_it; //blob iterator - C_OUTLINE_IT out_it; //outline iterator - - ok_words = word_it.length (); - if (ok_words == 0 || textord_no_rejects) - return; - // was it chucked - std::vector word_dud(ok_words); - dud_words = 0; - ok_words = 0; - word_index = 0; - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); //current word - dot_count = 0; - norm_count = 0; - //blobs in word - blob_it.set_to_list (word->cblob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - if (!word->flag (W_DONT_CHOP)) { - //get outlines - out_it.set_to_list (blob->out_list ()); - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); - out_it.forward ()) { - outline = out_it.data (); - blob_box = outline->bounding_box (); - blob_size = - blob_box.width () > - blob_box.height ()? blob_box.width () : blob_box. - height(); - if (blob_size < textord_noise_sizelimit * row->x_height ()) - dot_count++; //count smal outlines - if (!outline->child ()->empty () - && blob_box.height () < - (1 + textord_noise_syfract) * row->x_height () - && blob_box.height () > - (1 - textord_noise_syfract) * row->x_height () - && blob_box.width () < - (1 + textord_noise_sxfract) * row->x_height () - && blob_box.width () > - (1 - textord_noise_sxfract) * row->x_height ()) - norm_count++; //count smal outlines - } - } - else - norm_count++; - blob_box = blob->bounding_box (); - blob_size = - blob_box.width () > - blob_box.height ()? blob_box.width () : blob_box.height (); - if (blob_size >= textord_noise_sizelimit * row->x_height () - && blob_size < row->x_height () * 2) { - trans_threshold = blob_size / textord_noise_sizefraction; - trans_count = blob->count_transitions (trans_threshold); - if (trans_count < textord_noise_translimit) - norm_count++; - } - else if (blob_box.height () > row->x_height () * 2 - && (!word_it.at_first () || !blob_it.at_first ())) - dot_count += 2; - } - if (dot_count > 2 && !word->flag(W_REP_CHAR)) { - if (dot_count > norm_count * textord_noise_normratio * 2) - word_dud[word_index] = 2; - else if (dot_count > norm_count * textord_noise_normratio) - word_dud[word_index] = 1; - else - word_dud[word_index] = 0; - } else { - word_dud[word_index] = 0; - } - if (word_dud[word_index] == 2) - dud_words++; - else - ok_words++; - word_index++; - } - - word_index = 0; - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - if (word_dud[word_index] == 2 - || (word_dud[word_index] == 1 && dud_words > ok_words)) { - word = word_it.data(); // Current word. - // Previously we threw away the entire word. - // Now just aggressively throw all small blobs into the reject list, where - // the classifier can decide whether they are actually needed. - word->CleanNoise(textord_noise_sizelimit * row->x_height()); - } - word_index++; - } -} - -// Remove outlines that are a tiny fraction in either width or height -// of the word height. -void Textord::clean_small_noise_from_words(ROW *row) { - WERD_IT word_it(row->word_list()); - for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { - WERD* word = word_it.data(); - int min_size = static_cast( - textord_noise_hfract * word->bounding_box().height() + 0.5); - C_BLOB_IT blob_it(word->cblob_list()); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - C_BLOB* blob = blob_it.data(); - C_OUTLINE_IT out_it(blob->out_list()); - for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { - C_OUTLINE* outline = out_it.data(); - outline->RemoveSmallRecursive(min_size, &out_it); - } - if (blob->out_list()->empty()) { - delete blob_it.extract(); - } - } - if (word->cblob_list()->empty()) { - if (!word_it.at_last()) { - // The next word is no longer a fuzzy non space if it was before, - // since the word before is about to be deleted. - WERD* next_word = word_it.data_relative(1); - if (next_word->flag(W_FUZZY_NON)) { - next_word->set_flag(W_FUZZY_NON, false); - } - } - delete word_it.extract(); - } - } -} - -// Local struct to hold a group of blocks. -struct BlockGroup { - BlockGroup() : rotation(1.0f, 0.0f), angle(0.0f), min_xheight(1.0f) {} - explicit BlockGroup(BLOCK* block) - : bounding_box(block->pdblk.bounding_box()), - rotation(block->re_rotation()), - angle(block->re_rotation().angle()), - min_xheight(block->x_height()) { - blocks.push_back(block); - } - // Union of block bounding boxes. - TBOX bounding_box; - // Common rotation of the blocks. - FCOORD rotation; - // Angle of rotation. - float angle; - // Min xheight of the blocks. - float min_xheight; - // Collection of borrowed pointers to the blocks in the group. - GenericVector blocks; -}; - -// Groups blocks by rotation, then, for each group, makes a WordGrid and calls -// TransferDiacriticsToWords to copy the diacritic blobs to the most -// appropriate words in the group of blocks. Source blobs are not touched. -void Textord::TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs, - BLOCK_LIST* blocks) { - // Angle difference larger than this is too much to consider equal. - // They should only be in multiples of M_PI/2 anyway. - const double kMaxAngleDiff = 0.01; // About 0.6 degrees. - PointerVector groups; - BLOCK_IT bk_it(blocks); - for (bk_it.mark_cycle_pt(); !bk_it.cycled_list(); bk_it.forward()) { - BLOCK* block = bk_it.data(); - if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) { - continue; - } - // Linear search of the groups to find a matching rotation. - float block_angle = block->re_rotation().angle(); - int best_g = 0; - float best_angle_diff = FLT_MAX; - for (int g = 0; g < groups.size(); ++g) { - double angle_diff = fabs(block_angle - groups[g]->angle); - if (angle_diff > M_PI) angle_diff = fabs(angle_diff - 2.0 * M_PI); - if (angle_diff < best_angle_diff) { - best_angle_diff = angle_diff; - best_g = g; - } - } - if (best_angle_diff > kMaxAngleDiff) { - groups.push_back(new BlockGroup(block)); - } else { - groups[best_g]->blocks.push_back(block); - groups[best_g]->bounding_box += block->pdblk.bounding_box(); - float x_height = block->x_height(); - if (x_height < groups[best_g]->min_xheight) - groups[best_g]->min_xheight = x_height; - } - } - // Now process each group of blocks. - PointerVector word_ptrs; - for (int g = 0; g < groups.size(); ++g) { - const BlockGroup* group = groups[g]; - if (group->bounding_box.null_box()) continue; - WordGrid word_grid(group->min_xheight, group->bounding_box.botleft(), - group->bounding_box.topright()); - for (int b = 0; b < group->blocks.size(); ++b) { - ROW_IT row_it(group->blocks[b]->row_list()); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - ROW* row = row_it.data(); - // Put the words of the row into the grid. - WERD_IT w_it(row->word_list()); - for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { - WERD* word = w_it.data(); - WordWithBox* box_word = new WordWithBox(word); - word_grid.InsertBBox(true, true, box_word); - // Save the pointer where it will be auto-deleted. - word_ptrs.push_back(box_word); - } - } - } - FCOORD rotation = group->rotation; - // Make it a forward rotation that will transform blob coords to block. - rotation.set_y(-rotation.y()); - TransferDiacriticsToWords(diacritic_blobs, rotation, &word_grid); - } -} - -// Places a copy of blobs that are near a word (after applying rotation to the -// blob) in the most appropriate word, unless there is doubt, in which case a -// blob can end up in two words. Source blobs are not touched. -void Textord::TransferDiacriticsToWords(BLOBNBOX_LIST* diacritic_blobs, - const FCOORD& rotation, - WordGrid* word_grid) { - WordSearch ws(word_grid); - BLOBNBOX_IT b_it(diacritic_blobs); - // Apply rotation to each blob before finding the nearest words. The rotation - // allows us to only consider above/below placement and not left/right on - // vertical text, because all text is horizontal here. - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOBNBOX* blobnbox = b_it.data(); - TBOX blob_box = blobnbox->bounding_box(); - blob_box.rotate(rotation); - ws.StartRectSearch(blob_box); - // Above/below refer to word position relative to diacritic. Since some - // scripts eg Kannada/Telugu habitually put diacritics below words, and - // others eg Thai/Vietnamese/Latin put most diacritics above words, try - // for both if there isn't much in it. - WordWithBox* best_above_word = nullptr; - WordWithBox* best_below_word = nullptr; - int best_above_distance = 0; - int best_below_distance = 0; - for (WordWithBox* word = ws.NextRectSearch(); word != nullptr; - word = ws.NextRectSearch()) { - if (word->word()->flag(W_REP_CHAR)) continue; - TBOX word_box = word->true_bounding_box(); - int x_distance = blob_box.x_gap(word_box); - int y_distance = blob_box.y_gap(word_box); - if (x_distance > 0) { - // Arbitrarily divide x-distance by 2 if there is a major y overlap, - // and the word is to the left of the diacritic. If the - // diacritic is a dropped broken character between two words, this will - // help send all the pieces to a single word, instead of splitting them - // over the 2 words. - if (word_box.major_y_overlap(blob_box) && - blob_box.left() > word_box.right()) { - x_distance /= 2; - } - y_distance += x_distance; - } - if (word_box.y_middle() > blob_box.y_middle() && - (best_above_word == nullptr || y_distance < best_above_distance)) { - best_above_word = word; - best_above_distance = y_distance; - } - if (word_box.y_middle() <= blob_box.y_middle() && - (best_below_word == nullptr || y_distance < best_below_distance)) { - best_below_word = word; - best_below_distance = y_distance; - } - } - bool above_good = - best_above_word != nullptr && - (best_below_word == nullptr || - best_above_distance < best_below_distance + blob_box.height()); - bool below_good = - best_below_word != nullptr && best_below_word != best_above_word && - (best_above_word == nullptr || - best_below_distance < best_above_distance + blob_box.height()); - if (below_good) { - C_BLOB* copied_blob = C_BLOB::deep_copy(blobnbox->cblob()); - copied_blob->rotate(rotation); - // Put the blob into the word's reject blobs list. - C_BLOB_IT blob_it(best_below_word->RejBlobs()); - blob_it.add_to_end(copied_blob); - } - if (above_good) { - C_BLOB* copied_blob = C_BLOB::deep_copy(blobnbox->cblob()); - copied_blob->rotate(rotation); - // Put the blob into the word's reject blobs list. - C_BLOB_IT blob_it(best_above_word->RejBlobs()); - blob_it.add_to_end(copied_blob); - } - } -} - -} // tesseract - -/********************************************************************** - * tweak_row_baseline - * - * Shift baseline to fit the blobs more accurately where they are - * close enough. - **********************************************************************/ - -void tweak_row_baseline(ROW *row, - double blshift_maxshift, - double blshift_xfraction) { - TBOX blob_box; //bounding box - C_BLOB *blob; //current blob - WERD *word; //current word - int32_t blob_count; //no of blobs - int32_t src_index; //source segment - int32_t dest_index; //destination segment - float ydiff; //baseline error - float x_centre; //centre of blob - //words of row - WERD_IT word_it = row->word_list (); - C_BLOB_IT blob_it; //blob iterator - - blob_count = 0; - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); //current word - //get total blobs - blob_count += word->cblob_list ()->length (); - } - if (blob_count == 0) - return; - // spline segments - std::vector xstarts(blob_count + row->baseline.segments + 1); - // spline coeffs - std::vector coeffs((blob_count + row->baseline.segments) * 3); - - src_index = 0; - dest_index = 0; - xstarts[0] = row->baseline.xcoords[0]; - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); //current word - //blobs in word - blob_it.set_to_list (word->cblob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - x_centre = (blob_box.left () + blob_box.right ()) / 2.0; - ydiff = blob_box.bottom () - row->base_line (x_centre); - if (ydiff < 0) - ydiff = -ydiff / row->x_height (); - else - ydiff = ydiff / row->x_height (); - if (ydiff < blshift_maxshift - && blob_box.height () / row->x_height () > blshift_xfraction) { - if (xstarts[dest_index] >= x_centre) - xstarts[dest_index] = blob_box.left (); - coeffs[dest_index * 3] = 0; - coeffs[dest_index * 3 + 1] = 0; - coeffs[dest_index * 3 + 2] = blob_box.bottom (); - //shift it - dest_index++; - xstarts[dest_index] = blob_box.right () + 1; - } - else { - if (xstarts[dest_index] <= x_centre) { - while (row->baseline.xcoords[src_index + 1] <= x_centre - && src_index < row->baseline.segments - 1) { - if (row->baseline.xcoords[src_index + 1] > - xstarts[dest_index]) { - coeffs[dest_index * 3] = - row->baseline.quadratics[src_index].a; - coeffs[dest_index * 3 + 1] = - row->baseline.quadratics[src_index].b; - coeffs[dest_index * 3 + 2] = - row->baseline.quadratics[src_index].c; - dest_index++; - xstarts[dest_index] = - row->baseline.xcoords[src_index + 1]; - } - src_index++; - } - coeffs[dest_index * 3] = - row->baseline.quadratics[src_index].a; - coeffs[dest_index * 3 + 1] = - row->baseline.quadratics[src_index].b; - coeffs[dest_index * 3 + 2] = - row->baseline.quadratics[src_index].c; - dest_index++; - xstarts[dest_index] = row->baseline.xcoords[src_index + 1]; - } - } - } - } - while (src_index < row->baseline.segments - && row->baseline.xcoords[src_index + 1] <= xstarts[dest_index]) - src_index++; - while (src_index < row->baseline.segments) { - coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a; - coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b; - coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c; - dest_index++; - src_index++; - xstarts[dest_index] = row->baseline.xcoords[src_index]; - } - //turn to spline - row->baseline = QSPLINE(dest_index, &xstarts[0], &coeffs[0]); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tordmain.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tordmain.h deleted file mode 100644 index a0ed6264..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tordmain.h +++ /dev/null @@ -1,41 +0,0 @@ -/********************************************************************** - * File: tordmain.h (Formerly textordp.h) - * Description: C++ top level textord code. - * Author: Ray Smith - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TORDMAIN_H -#define TORDMAIN_H - -#include -#include "params.h" -#include "ocrblock.h" -#include "blobs.h" -#include "blobbox.h" - -struct Pix; -namespace tesseract { -class Tesseract; - -void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob); -void assign_blobs_to_blocks2(Pix* pix, BLOCK_LIST *blocks, - TO_BLOCK_LIST *port_blocks); -} // namespace tesseract - -void tweak_row_baseline(ROW *row, - double blshift_maxshift, - double blshift_xfraction); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tospace.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tospace.cpp deleted file mode 100644 index 009f20c8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tospace.cpp +++ /dev/null @@ -1,1895 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/********************************************************************** - * tospace.cpp - * - * Compute fuzzy word spacing thresholds for each row. - * I.e. set : max_nonspace - * space_threshold - * min_space - * kern_size - * space_size - * for each row. - * ONLY FOR PROPORTIONAL BLOCKS - FIXED PITCH IS ASSUMED ALREADY DONE - * - * Note: functions in this file were originally not members of any - * class or enclosed by any namespace. Now they are all static members - * of the Textord class. - * - **********************************************************************/ - -#include "drawtord.h" -#include "statistc.h" -#include "textord.h" -#include "tovars.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include -#include - -#define MAXSPACING 128 /*max expected spacing in pix */ - -namespace tesseract { -void Textord::to_spacing( - ICOORD page_tr, //topright of page - TO_BLOCK_LIST *blocks //blocks on page - ) { - TO_BLOCK_IT block_it; //iterator - TO_BLOCK *block; //current block; - TO_ROW *row; //current row - int block_index; //block number - int row_index; //row number - //estimated width of real spaces for whole block - int16_t block_space_gap_width; - //estimated width of non space gaps for whole block - int16_t block_non_space_gap_width; - bool old_text_ord_proportional;//old fixed/prop result - - block_it.set_to_list (blocks); - block_index = 1; - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - block = block_it.data (); - std::unique_ptr gapmap(new GAPMAP (block)); //map of big vert gaps in blk - block_spacing_stats(block, - gapmap.get(), - old_text_ord_proportional, - block_space_gap_width, - block_non_space_gap_width); - // Make sure relative values of block-level space and non-space gap - // widths are reasonable. The ratio of 1:3 is also used in - // block_spacing_stats, to corrrect the block_space_gap_width - // Useful for arabic and hindi, when the non-space gap width is - // often over-estimated and should not be trusted. A similar ratio - // is found in block_spacing_stats. - if (tosp_old_to_method && tosp_old_to_constrain_sp_kn && - (float) block_space_gap_width / block_non_space_gap_width < 3.0) { - block_non_space_gap_width = (int16_t) floor (block_space_gap_width / 3.0); - } - // row iterator - TO_ROW_IT row_it(block->get_rows()); - row_index = 1; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if ((row->pitch_decision == PITCH_DEF_PROP) || - (row->pitch_decision == PITCH_CORR_PROP)) { - if ((tosp_debug_level > 0) && !old_text_ord_proportional) - tprintf ("Block %d Row %d: Now Proportional\n", - block_index, row_index); - row_spacing_stats(row, - gapmap.get(), - block_index, - row_index, - block_space_gap_width, - block_non_space_gap_width); - } - else { - if ((tosp_debug_level > 0) && old_text_ord_proportional) - tprintf - ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n", - block_index, row_index, row->pitch_decision, - row->fixed_pitch); - } -#ifndef GRAPHICS_DISABLED - if (textord_show_initial_words) - plot_word_decisions (to_win, (int16_t) row->fixed_pitch, row); -#endif - row_index++; - } - block_index++; - } -} - - -/************************************************************************* - * block_spacing_stats() - *************************************************************************/ - -void Textord::block_spacing_stats( - TO_BLOCK* block, - GAPMAP* gapmap, - bool& old_text_ord_proportional, - int16_t& block_space_gap_width, // resulting estimate - int16_t& block_non_space_gap_width // resulting estimate -) { - TO_ROW *row; // current row - BLOBNBOX_IT blob_it; // iterator - - STATS centre_to_centre_stats (0, MAXSPACING); - // DEBUG USE ONLY - STATS all_gap_stats (0, MAXSPACING); - STATS space_gap_stats (0, MAXSPACING); - int16_t minwidth = MAXSPACING; // narrowest blob - TBOX blob_box; - TBOX prev_blob_box; - int16_t centre_to_centre; - int16_t gap_width; - float real_space_threshold; - float iqr_centre_to_centre; // DEBUG USE ONLY - float iqr_all_gap_stats; // DEBUG USE ONLY - int32_t end_of_row; - int32_t row_length; - - // row iterator - TO_ROW_IT row_it(block->get_rows()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (!row->blob_list ()->empty () && - (!tosp_only_use_prop_rows || - (row->pitch_decision == PITCH_DEF_PROP) || - (row->pitch_decision == PITCH_CORR_PROP))) { - blob_it.set_to_list (row->blob_list ()); - blob_it.mark_cycle_pt (); - end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); - if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); - else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); - else - blob_box = box_next (&blob_it); - row_length = end_of_row - blob_box.left (); - if (blob_box.width () < minwidth) - minwidth = blob_box.width (); - prev_blob_box = blob_box; - while (!blob_it.cycled_list ()) { - if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); - else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); - else - blob_box = box_next (&blob_it); - if (blob_box.width () < minwidth) - minwidth = blob_box.width (); - int16_t left = prev_blob_box.right(); - int16_t right = blob_box.left(); - gap_width = right - left; - if (!ignore_big_gap(row, row_length, gapmap, left, right)) { - all_gap_stats.add (gap_width, 1); - - centre_to_centre = (right + blob_box.right () - - (prev_blob_box.left () + left)) / 2; - //DEBUG - centre_to_centre_stats.add (centre_to_centre, 1); - // DEBUG - } - prev_blob_box = blob_box; - } - } - } - - //Inadequate samples - if (all_gap_stats.get_total () <= 1) { - block_non_space_gap_width = minwidth; - block_space_gap_width = -1; //No est. space width - //DEBUG - old_text_ord_proportional = true; - } - else { - /* For debug only ..... */ - iqr_centre_to_centre = centre_to_centre_stats.ile (0.75) - - centre_to_centre_stats.ile (0.25); - iqr_all_gap_stats = all_gap_stats.ile (0.75) - all_gap_stats.ile (0.25); - old_text_ord_proportional = - iqr_centre_to_centre * 2 > iqr_all_gap_stats; - /* .......For debug only */ - - /* - The median of the gaps is used as an estimate of the NON-SPACE gap width. - This RELIES on the assumption that there are more gaps WITHIN words than - BETWEEN words in a block - - Now try to estimate the width of a real space for all real spaces in the - block. Do this by using a crude threshold to ignore "narrow" gaps, then - find the median of the "wide" gaps and use this. - */ - block_non_space_gap_width = (int16_t) floor (all_gap_stats.median ()); - // median gap - - row_it.set_to_list (block->get_rows ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (!row->blob_list ()->empty () && - (!tosp_only_use_prop_rows || - (row->pitch_decision == PITCH_DEF_PROP) || - (row->pitch_decision == PITCH_CORR_PROP))) { - real_space_threshold = - std::max (tosp_init_guess_kn_mult * block_non_space_gap_width, - tosp_init_guess_xht_mult * row->xheight); - blob_it.set_to_list (row->blob_list ()); - blob_it.mark_cycle_pt (); - end_of_row = - blob_it.data_relative (-1)->bounding_box ().right (); - if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); - else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); - else - blob_box = box_next (&blob_it); - row_length = blob_box.left () - end_of_row; - prev_blob_box = blob_box; - while (!blob_it.cycled_list ()) { - if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); - else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); - else - blob_box = box_next (&blob_it); - int16_t left = prev_blob_box.right(); - int16_t right = blob_box.left(); - gap_width = right - left; - if ((gap_width > real_space_threshold) && - !ignore_big_gap(row, row_length, gapmap, left, right)) { - /* - If tosp_use_cert_spaces is enabled, the estimate of the space gap is - restricted to obvious spaces - those wider than half the xht or those - with wide blobs on both sides - i.e not things that are suspect 1's or - punctuation that is sometimes widely spaced. - */ - if (!tosp_block_use_cert_spaces || - (gap_width > - tosp_fuzzy_space_factor2 * row->xheight) - || - ((gap_width > - tosp_fuzzy_space_factor1 * row->xheight) - && (!tosp_narrow_blobs_not_cert - || (!narrow_blob (row, prev_blob_box) - && !narrow_blob (row, blob_box)))) - || (wide_blob (row, prev_blob_box) - && wide_blob (row, blob_box))) - space_gap_stats.add (gap_width, 1); - } - prev_blob_box = blob_box; - } - } - } - //Inadequate samples - if (space_gap_stats.get_total () <= 2) - block_space_gap_width = -1;//No est. space width - else - block_space_gap_width = - std::max(static_cast(floor(space_gap_stats.median())), - static_cast(3 * block_non_space_gap_width)); - } -} - - -/************************************************************************* - * row_spacing_stats() - * Set values for min_space, max_non_space based on row stats only - * If failure - return 0 values. - *************************************************************************/ -void Textord::row_spacing_stats( - TO_ROW *row, - GAPMAP *gapmap, - int16_t block_idx, - int16_t row_idx, - int16_t block_space_gap_width, //estimate for block - int16_t block_non_space_gap_width //estimate for block - ) { - //iterator - BLOBNBOX_IT blob_it = row->blob_list (); - STATS all_gap_stats (0, MAXSPACING); - STATS cert_space_gap_stats (0, MAXSPACING); - STATS all_space_gap_stats (0, MAXSPACING); - STATS small_gap_stats (0, MAXSPACING); - TBOX blob_box; - TBOX prev_blob_box; - int16_t gap_width; - int16_t real_space_threshold = 0; - int16_t max = 0; - int16_t index; - int16_t large_gap_count = 0; - bool suspected_table; - int32_t max_max_nonspace; //upper bound - bool good_block_space_estimate = block_space_gap_width > 0; - int32_t end_of_row; - int32_t row_length = 0; - float sane_space; - int32_t sane_threshold; - - /* Collect first pass stats for row */ - - if (!good_block_space_estimate) - block_space_gap_width = int16_t (floor (row->xheight / 2)); - if (!row->blob_list ()->empty ()) { - if (tosp_threshold_bias1 > 0) - real_space_threshold = - block_non_space_gap_width + - int16_t (floor (0.5 + - tosp_threshold_bias1 * (block_space_gap_width - - block_non_space_gap_width))); - else - real_space_threshold = //Old TO method - (block_space_gap_width + block_non_space_gap_width) / 2; - blob_it.set_to_list (row->blob_list ()); - blob_it.mark_cycle_pt (); - end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); - if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); - else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); - else - blob_box = box_next (&blob_it); - row_length = end_of_row - blob_box.left (); - prev_blob_box = blob_box; - while (!blob_it.cycled_list ()) { - if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); - else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); - else - blob_box = box_next (&blob_it); - int16_t left = prev_blob_box.right(); - int16_t right = blob_box.left(); - gap_width = right - left; - if (ignore_big_gap(row, row_length, gapmap, left, right)) { - large_gap_count++; - } else { - if (gap_width >= real_space_threshold) { - if (!tosp_row_use_cert_spaces || - (gap_width > tosp_fuzzy_space_factor2 * row->xheight) || - ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) - && (!tosp_narrow_blobs_not_cert - || (!narrow_blob (row, prev_blob_box) - && !narrow_blob (row, blob_box)))) - || (wide_blob (row, prev_blob_box) - && wide_blob (row, blob_box))) - cert_space_gap_stats.add (gap_width, 1); - all_space_gap_stats.add (gap_width, 1); - } - else - small_gap_stats.add (gap_width, 1); - all_gap_stats.add (gap_width, 1); - } - prev_blob_box = blob_box; - } - } - suspected_table = (large_gap_count > 1) || - ((large_gap_count > 0) && - (all_gap_stats.get_total () <= tosp_few_samples)); - - /* Now determine row kern size, space size and threshold */ - - if ((cert_space_gap_stats.get_total () >= - tosp_enough_space_samples_for_median) || - ((suspected_table || - all_gap_stats.get_total () <= tosp_short_row) && - cert_space_gap_stats.get_total () > 0)) { - old_to_method(row, - &all_gap_stats, - &cert_space_gap_stats, - &small_gap_stats, - block_space_gap_width, - block_non_space_gap_width); - } else { - if (!tosp_recovery_isolated_row_stats || - !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table, - block_idx, row_idx)) { - if (tosp_row_use_cert_spaces && (tosp_debug_level > 5)) - tprintf ("B:%d R:%d -- Inadequate certain spaces.\n", - block_idx, row_idx); - if (tosp_row_use_cert_spaces1 && good_block_space_estimate) { - //Use block default - row->space_size = block_space_gap_width; - if (all_gap_stats.get_total () > tosp_redo_kern_limit) - row->kern_size = all_gap_stats.median (); - else - row->kern_size = block_non_space_gap_width; - row->space_threshold = - int32_t (floor ((row->space_size + row->kern_size) / - tosp_old_sp_kn_th_factor)); - } - else - old_to_method(row, - &all_gap_stats, - &all_space_gap_stats, - &small_gap_stats, - block_space_gap_width, - block_non_space_gap_width); - } - } - - if (tosp_improve_thresh && !suspected_table) - improve_row_threshold(row, &all_gap_stats); - - /* Now lets try to be careful not to do anything silly with tables when we - are ignoring big gaps*/ - if (tosp_sanity_method == 0) { - if (suspected_table && - (row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) { - if (tosp_debug_level > 5) - tprintf("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f.\n", block_idx, - row_idx, row->kern_size, row->space_threshold, row->space_size); - row->space_threshold = - (int32_t) (tosp_table_kn_sp_ratio * row->kern_size); - row->space_size = std::max(row->space_threshold + 1.0f, row->xheight); - } - } - else if (tosp_sanity_method == 1) { - sane_space = row->space_size; - /* NEVER let space size get too close to kern size */ - if ((row->space_size < tosp_min_sane_kn_sp * std::max(row->kern_size, 2.5f)) - || ((row->space_size - row->kern_size) < - (tosp_silly_kn_sp_gap * row->xheight))) { - if (good_block_space_estimate && - (block_space_gap_width >= tosp_min_sane_kn_sp * row->kern_size)) - sane_space = block_space_gap_width; - else - sane_space = - std::max(static_cast(tosp_min_sane_kn_sp) * std::max(row->kern_size, 2.5f), - row->xheight / 2.0f); - if (tosp_debug_level > 5) - tprintf("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n", - block_idx, row_idx, row->kern_size, row->space_threshold, - row->space_size, sane_space); - row->space_size = sane_space; - row->space_threshold = - int32_t (floor ((row->space_size + row->kern_size) / - tosp_old_sp_kn_th_factor)); - } - /* NEVER let threshold get VERY far away from kern */ - sane_threshold = int32_t (floor (tosp_max_sane_kn_thresh * - std::max(row->kern_size, 2.5f))); - if (row->space_threshold > sane_threshold) { - if (tosp_debug_level > 5) - tprintf("B:%d R:%d -- DON'T BELIEVE THRESH %3.2f %d %3.2f->%d.\n", - block_idx, row_idx, row->kern_size, row->space_threshold, - row->space_size, sane_threshold); - row->space_threshold = sane_threshold; - if (row->space_size <= sane_threshold) - row->space_size = row->space_threshold + 1.0f; - } - /* Beware of tables - there may be NO spaces */ - if (suspected_table) { - sane_space = std::max(tosp_table_kn_sp_ratio * row->kern_size, - tosp_table_xht_sp_ratio * row->xheight); - sane_threshold = int32_t (floor ((sane_space + row->kern_size) / 2)); - - if ((row->space_size < sane_space) || - (row->space_threshold < sane_threshold)) { - if (tosp_debug_level > 5) - tprintf ("B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n", - block_idx, row_idx, - row->kern_size, - row->space_threshold, row->space_size); - //the minimum sane value - row->space_threshold = (int32_t) sane_space; - row->space_size = std::max(row->space_threshold + 1.0f, row->xheight); - } - } - } - - /* Now lets try to put some error limits on the threshold */ - - if (tosp_old_to_method) { - /* Old textord made a space if gap >= threshold */ - //NO FUZZY SPACES YET - row->max_nonspace = row->space_threshold; - //NO FUZZY SPACES YET - row->min_space = row->space_threshold + 1; - } - else { - /* Any gap greater than 0.6 x-ht is bound to be a space (isn't it:-) */ - row->min_space = - std::min(int32_t (ceil (tosp_fuzzy_space_factor * row->xheight)), - int32_t (row->space_size)); - if (row->min_space <= row->space_threshold) - // Don't be silly - row->min_space = row->space_threshold + 1; - /* - Lets try to guess the max certain kern gap by looking at the cluster of - kerns for the row. The row is proportional so the kerns should cluster - tightly at the bottom of the distribution. We also expect most gaps to be - kerns. Find the maximum of the kern piles between 0 and twice the kern - estimate. Piles before the first one with less than 1/10 the maximum - number of samples can be taken as certain kerns. - - Of course, there are some cases where the kern peak and space peaks merge, - so we will put an UPPER limit on the max certain kern gap of some fraction - below the threshold. - */ - - max_max_nonspace = int32_t ((row->space_threshold + row->kern_size) / 2); - - //default - row->max_nonspace = max_max_nonspace; - for (index = 0; index <= max_max_nonspace; index++) { - if (all_gap_stats.pile_count (index) > max) - max = all_gap_stats.pile_count (index); - if ((index > row->kern_size) && - (all_gap_stats.pile_count (index) < 0.1 * max)) { - row->max_nonspace = index; - break; - } - } - } - - /* Yet another algorithm - simpler this time - just choose a fraction of the - threshold to space range */ - - if ((tosp_fuzzy_sp_fraction > 0) && - (row->space_size > row->space_threshold)) - row->min_space = std::max(row->min_space, - (int32_t) ceil (row->space_threshold + - tosp_fuzzy_sp_fraction * - (row->space_size - - row->space_threshold))); - - /* Ensure that ANY space less than some multiplier times the kern size is - fuzzy. In tables there is a risk of erroneously setting a small space size - when there are no real spaces. Sometimes tables have text squashed into - columns so that the kn->sp ratio is small anyway - this means that we can't - use this to force a wider separation - hence we rely on context to join any - dubious breaks. */ - - if ((tosp_table_fuzzy_kn_sp_ratio > 0) && - (suspected_table || tosp_fuzzy_limit_all)) - row->min_space = std::max(row->min_space, - (int32_t) ceil (tosp_table_fuzzy_kn_sp_ratio * - row->kern_size)); - - if ((tosp_fuzzy_kn_fraction > 0) && (row->kern_size < row->space_threshold)) { - row->max_nonspace = (int32_t) floor (0.5 + row->kern_size + - tosp_fuzzy_kn_fraction * - (row->space_threshold - - row->kern_size)); - } - if (row->max_nonspace > row->space_threshold) { - // Don't be silly - row->max_nonspace = row->space_threshold; - } - - if (tosp_debug_level > 5) - tprintf - ("B:%d R:%d L:%d-- Kn:%d Sp:%d Thr:%d -- Kn:%3.2f (%d) Thr:%d (%d) Sp:%3.2f\n", - block_idx, row_idx, row_length, block_non_space_gap_width, - block_space_gap_width, real_space_threshold, row->kern_size, - row->max_nonspace, row->space_threshold, row->min_space, - row->space_size); - if (tosp_debug_level > 10) - tprintf("row->kern_size = %3.2f, row->space_size = %3.2f, " - "row->space_threshold = %d\n", - row->kern_size, row->space_size, row->space_threshold); -} - -void Textord::old_to_method( - TO_ROW *row, - STATS *all_gap_stats, - STATS *space_gap_stats, - STATS *small_gap_stats, - int16_t block_space_gap_width, //estimate for block - int16_t block_non_space_gap_width //estimate for block - ) { - /* First, estimate row space size */ - /* Old to condition was > 2 */ - if (space_gap_stats->get_total () >= tosp_enough_space_samples_for_median) { - //Adequate samples - /* Set space size to median of spaces BUT limits it if it seems wildly out */ - row->space_size = space_gap_stats->median (); - if (row->space_size > block_space_gap_width * 1.5) { - if (tosp_old_to_bug_fix) - row->space_size = block_space_gap_width * 1.5; - else - //BUG??? should be *1.5 - row->space_size = block_space_gap_width; - } - if (row->space_size < (block_non_space_gap_width * 2) + 1) - row->space_size = (block_non_space_gap_width * 2) + 1; - } - //Only 1 or 2 samples - else if (space_gap_stats->get_total () >= 1) { - //hence mean not median - row->space_size = space_gap_stats->mean (); - if (row->space_size > block_space_gap_width * 1.5) { - if (tosp_old_to_bug_fix) - row->space_size = block_space_gap_width * 1.5; - else - //BUG??? should be *1.5 - row->space_size = block_space_gap_width; - } - if (row->space_size < (block_non_space_gap_width * 3) + 1) - row->space_size = (block_non_space_gap_width * 3) + 1; - } - else { - //Use block default - row->space_size = block_space_gap_width; - } - - /* Next, estimate row kern size */ - if ((tosp_only_small_gaps_for_kern) && - (small_gap_stats->get_total () > tosp_redo_kern_limit)) - row->kern_size = small_gap_stats->median (); - else if (all_gap_stats->get_total () > tosp_redo_kern_limit) - row->kern_size = all_gap_stats->median (); - else //old TO -SAME FOR ALL ROWS - row->kern_size = block_non_space_gap_width; - - /* Finally, estimate row space threshold */ - if (tosp_threshold_bias2 > 0) { - row->space_threshold = - int32_t (floor (0.5 + row->kern_size + - tosp_threshold_bias2 * (row->space_size - - row->kern_size))); - } else { - /* - NOTE old text ord uses (space_size + kern_size + 1)/2 as the threshold - and holds this in a float. The use is with a >= test - NEW textord uses an integer threshold and a > test - It comes to the same thing. - (Though there is a difference in that old textor has integer space_size - and kern_size.) - */ - row->space_threshold = - int32_t (floor ((row->space_size + row->kern_size) / 2)); - } - - // Apply the same logic and ratios as in row_spacing_stats to - // restrict relative values of the row's space_size, kern_size, and - // space_threshold - if (tosp_old_to_constrain_sp_kn && tosp_sanity_method == 1 && - ((row->space_size < - tosp_min_sane_kn_sp * std::max(row->kern_size, 2.5f)) || - ((row->space_size - row->kern_size) < - tosp_silly_kn_sp_gap * row->xheight))) { - if (row->kern_size > 2.5) - row->kern_size = row->space_size / tosp_min_sane_kn_sp; - row->space_threshold = int32_t (floor ((row->space_size + row->kern_size) / - tosp_old_sp_kn_th_factor)); - } -} - - -/************************************************************************* - * isolated_row_stats() - * Set values for min_space, max_non_space based on row stats only - *************************************************************************/ -bool Textord::isolated_row_stats(TO_ROW* row, - GAPMAP* gapmap, - STATS* all_gap_stats, - bool suspected_table, - int16_t block_idx, - int16_t row_idx) { - float kern_estimate; - float crude_threshold_estimate; - int16_t small_gaps_count; - int16_t total; - //iterator - BLOBNBOX_IT blob_it = row->blob_list (); - STATS cert_space_gap_stats (0, MAXSPACING); - STATS all_space_gap_stats (0, MAXSPACING); - STATS small_gap_stats (0, MAXSPACING); - TBOX blob_box; - TBOX prev_blob_box; - int16_t gap_width; - int32_t end_of_row; - int32_t row_length; - - kern_estimate = all_gap_stats->median (); - crude_threshold_estimate = std::max(tosp_init_guess_kn_mult * kern_estimate, - tosp_init_guess_xht_mult * row->xheight); - small_gaps_count = stats_count_under (all_gap_stats, - (int16_t) - ceil (crude_threshold_estimate)); - total = all_gap_stats->get_total (); - - if ((total <= tosp_redo_kern_limit) || - ((small_gaps_count / (float) total) < tosp_enough_small_gaps) || - (total - small_gaps_count < 1)) { - if (tosp_debug_level > 5) - tprintf("B:%d R:%d -- Can't do isolated row stats.\n", block_idx, - row_idx); - return false; - } - blob_it.set_to_list (row->blob_list ()); - blob_it.mark_cycle_pt (); - end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); - if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); - else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); - else - blob_box = box_next (&blob_it); - row_length = end_of_row - blob_box.left (); - prev_blob_box = blob_box; - while (!blob_it.cycled_list ()) { - if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); - else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); - else - blob_box = box_next (&blob_it); - int16_t left = prev_blob_box.right(); - int16_t right = blob_box.left(); - gap_width = right - left; - if (!ignore_big_gap(row, row_length, gapmap, left, right) && - (gap_width > crude_threshold_estimate)) { - if ((gap_width > tosp_fuzzy_space_factor2 * row->xheight) || - ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) && - (!tosp_narrow_blobs_not_cert || - (!narrow_blob (row, prev_blob_box) && - !narrow_blob (row, blob_box)))) || - (wide_blob (row, prev_blob_box) && wide_blob (row, blob_box))) - cert_space_gap_stats.add (gap_width, 1); - all_space_gap_stats.add (gap_width, 1); - } - if (gap_width < crude_threshold_estimate) - small_gap_stats.add (gap_width, 1); - - prev_blob_box = blob_box; - } - if (cert_space_gap_stats.get_total () >= - tosp_enough_space_samples_for_median) - //median - row->space_size = cert_space_gap_stats.median (); - else if (suspected_table && (cert_space_gap_stats.get_total () > 0)) - //to avoid spaced - row->space_size = cert_space_gap_stats.mean (); - // 1's in tables - else if (all_space_gap_stats.get_total () >= - tosp_enough_space_samples_for_median) - //median - row->space_size = all_space_gap_stats.median (); - else - row->space_size = all_space_gap_stats.mean (); - - if (tosp_only_small_gaps_for_kern) - row->kern_size = small_gap_stats.median (); - else - row->kern_size = all_gap_stats->median (); - row->space_threshold = - int32_t (floor ((row->space_size + row->kern_size) / 2)); - /* Sanity check */ - if ((row->kern_size >= row->space_threshold) || - (row->space_threshold >= row->space_size) || - (row->space_threshold <= 0)) { - if (tosp_debug_level > 5) - tprintf ("B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n", - block_idx, row_idx, - row->kern_size, row->space_threshold, row->space_size); - row->kern_size = 0.0f; - row->space_threshold = 0; - row->space_size = 0.0f; - return false; - } - - if (tosp_debug_level > 5) - tprintf ("B:%d R:%d -- Isolated row stats: %f %d %f\n", - block_idx, row_idx, - row->kern_size, row->space_threshold, row->space_size); - return true; -} - -int16_t Textord::stats_count_under(STATS *stats, int16_t threshold) { - int16_t index; - int16_t total = 0; - - for (index = 0; index < threshold; index++) - total += stats->pile_count (index); - return total; -} - - -/************************************************************************* - * improve_row_threshold() - * Try to recognise a "normal line" - - * > 25 gaps - * && space > 3 * kn && space > 10 - * (I.e. reasonably large space and kn:sp ratio) - * && > 3/4 # gaps < kn + (sp - kn)/3 - * (I.e. most gaps are well away from space estimate) - * && a gap of max(3, (sp - kn) / 3) empty histogram positions is found - * somewhere in the histogram between kn and sp - * THEN set the threshold and fuzzy limits to this gap - ie NO fuzzies - * NO!!!!! the bristol line has "11" with a gap of 12 between the 1's!!! - * try moving the default threshold to within this band but leave the - * fuzzy limit calculation as at present. - *************************************************************************/ -void Textord::improve_row_threshold(TO_ROW *row, STATS *all_gap_stats) { - float sp = row->space_size; - float kn = row->kern_size; - int16_t reqd_zero_width = 0; - int16_t zero_width = 0; - int16_t zero_start = 0; - int16_t index = 0; - - if (tosp_debug_level > 10) - tprintf ("Improve row threshold 0"); - if ((all_gap_stats->get_total () <= 25) || - (sp <= 10) || - (sp <= 3 * kn) || - (stats_count_under (all_gap_stats, - (int16_t) ceil (kn + (sp - kn) / 3 + 0.5)) < - (0.75 * all_gap_stats->get_total ()))) - return; - if (tosp_debug_level > 10) - tprintf (" 1"); - /* - Look for the first region of all 0's in the histogram which is wider than - max(3, (sp - kn) / 3) and starts between kn and sp. If found, and current - threshold is not within it, move the threshold so that is is just inside it. - */ - reqd_zero_width = (int16_t) floor ((sp - kn) / 3 + 0.5); - if (reqd_zero_width < 3) - reqd_zero_width = 3; - - for (index = int16_t (ceil (kn)); index < int16_t (floor (sp)); index++) { - if (all_gap_stats->pile_count (index) == 0) { - if (zero_width == 0) - zero_start = index; - zero_width++; - } - else { - if (zero_width >= reqd_zero_width) - break; - else { - zero_width = 0; - } - } - } - index--; - if (tosp_debug_level > 10) - tprintf (" reqd_z_width: %d found %d 0's, starting %d; thresh: %d/n", - reqd_zero_width, zero_width, zero_start, row->space_threshold); - if ((zero_width < reqd_zero_width) || - ((row->space_threshold >= zero_start) && - (row->space_threshold <= index))) - return; - if (tosp_debug_level > 10) - tprintf (" 2"); - if (row->space_threshold < zero_start) { - if (tosp_debug_level > 5) - tprintf - ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n", - kn, sp, zero_start, index, row->space_threshold, zero_start); - row->space_threshold = zero_start; - } - if (row->space_threshold > index) { - if (tosp_debug_level > 5) - tprintf - ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n", - kn, sp, zero_start, index, row->space_threshold, index); - row->space_threshold = index; - } -} - - -/********************************************************************** - * make_prop_words - * - * Convert a TO_BLOCK to a BLOCK. - **********************************************************************/ -ROW *Textord::make_prop_words( - TO_ROW *row, // row to make - FCOORD rotation // for drawing - ) { - bool bol; // start of line - /* prev_ values are for start of word being built. non prev_ values are for - the gap between the word being built and the next one. */ - bool prev_fuzzy_sp; // probably space - bool prev_fuzzy_non; // probably not - uint8_t prev_blanks; // in front of word - bool fuzzy_sp = false; // probably space - bool fuzzy_non = false; // probably not - uint8_t blanks = 0; // in front of word - bool prev_gap_was_a_space = false; - bool break_at_next_gap = false; - ROW *real_row; // output row - C_OUTLINE_IT cout_it; - C_BLOB_LIST cblobs; - C_BLOB_IT cblob_it = &cblobs; - WERD_LIST words; - WERD *word; // new word - int32_t next_rep_char_word_right = INT32_MAX; - float repetition_spacing; // gap between repetitions - int32_t xstarts[2]; // row ends - int32_t prev_x; // end of prev blob - BLOBNBOX *bblob; // current blob - TBOX blob_box; // bounding box - BLOBNBOX_IT box_it; // iterator - TBOX prev_blob_box; - TBOX next_blob_box; - int16_t prev_gap = INT16_MAX; - int16_t current_gap = INT16_MAX; - int16_t next_gap = INT16_MAX; - int16_t prev_within_xht_gap = INT16_MAX; - int16_t current_within_xht_gap = INT16_MAX; - int16_t next_within_xht_gap = INT16_MAX; - int16_t word_count = 0; - - // repeated char words - WERD_IT rep_char_it(&(row->rep_words)); - if (!rep_char_it.empty ()) { - next_rep_char_word_right = - rep_char_it.data ()->bounding_box ().right (); - } - - prev_x = -INT16_MAX; - cblob_it.set_to_list (&cblobs); - box_it.set_to_list (row->blob_list ()); - // new words - WERD_IT word_it(&words); - bol = true; - prev_blanks = 0; - prev_fuzzy_sp = false; - prev_fuzzy_non = false; - if (!box_it.empty ()) { - xstarts[0] = box_it.data ()->bounding_box ().left (); - if (xstarts[0] > next_rep_char_word_right) { - /* We need to insert a repeated char word at the start of the row */ - word = rep_char_it.extract (); - word_it.add_after_then_move (word); - /* Set spaces before repeated char word */ - word->set_flag (W_BOL, true); - bol = false; - word->set_blanks (0); - //NO uncertainty - word->set_flag (W_FUZZY_SP, false); - word->set_flag (W_FUZZY_NON, false); - xstarts[0] = word->bounding_box ().left (); - /* Set spaces after repeated char word (and leave current word set) */ - repetition_spacing = find_mean_blob_spacing (word); - current_gap = box_it.data ()->bounding_box ().left () - - next_rep_char_word_right; - current_within_xht_gap = current_gap; - if (current_gap > tosp_rep_space * repetition_spacing) { - prev_blanks = (uint8_t) floor (current_gap / row->space_size); - if (prev_blanks < 1) - prev_blanks = 1; - } - else - prev_blanks = 0; - if (tosp_debug_level > 5) - tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ", - box_it.data ()->bounding_box ().left (), - box_it.data ()->bounding_box ().bottom (), - repetition_spacing, current_gap); - prev_fuzzy_sp = false; - prev_fuzzy_non = false; - if (rep_char_it.empty ()) { - next_rep_char_word_right = INT32_MAX; - } - else { - rep_char_it.forward (); - next_rep_char_word_right = - rep_char_it.data ()->bounding_box ().right (); - } - } - - peek_at_next_gap(row, - box_it, - next_blob_box, - next_gap, - next_within_xht_gap); - do { - bblob = box_it.data (); - blob_box = bblob->bounding_box (); - if (bblob->joined_to_prev ()) { - if (bblob->cblob () != nullptr) { - cout_it.set_to_list (cblob_it.data ()->out_list ()); - cout_it.move_to_last (); - cout_it.add_list_after (bblob->cblob ()->out_list ()); - delete bblob->cblob (); - } - } else { - if (bblob->cblob() != nullptr) - cblob_it.add_after_then_move (bblob->cblob ()); - prev_x = blob_box.right (); - } - box_it.forward (); //next one - bblob = box_it.data (); - blob_box = bblob->bounding_box (); - - if (!bblob->joined_to_prev() && bblob->cblob() != nullptr) { - /* Real Blob - not multiple outlines or pre-chopped */ - prev_gap = current_gap; - prev_within_xht_gap = current_within_xht_gap; - prev_blob_box = next_blob_box; - current_gap = next_gap; - current_within_xht_gap = next_within_xht_gap; - peek_at_next_gap(row, - box_it, - next_blob_box, - next_gap, - next_within_xht_gap); - - int16_t prev_gap_arg = prev_gap; - int16_t next_gap_arg = next_gap; - if (tosp_only_use_xht_gaps) { - prev_gap_arg = prev_within_xht_gap; - next_gap_arg = next_within_xht_gap; - } - // Decide if a word-break should be inserted - if (blob_box.left () > next_rep_char_word_right || - make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box, - current_gap, current_within_xht_gap, - next_blob_box, next_gap_arg, - blanks, fuzzy_sp, fuzzy_non, - prev_gap_was_a_space, - break_at_next_gap) || - box_it.at_first()) { - /* Form a new word out of the blobs collected */ - word = new WERD (&cblobs, prev_blanks, nullptr); - word_count++; - word_it.add_after_then_move (word); - if (bol) { - word->set_flag (W_BOL, true); - bol = false; - } - if (prev_fuzzy_sp) - //probably space - word->set_flag (W_FUZZY_SP, true); - else if (prev_fuzzy_non) - word->set_flag (W_FUZZY_NON, true); - //probably not - - if (blob_box.left () > next_rep_char_word_right) { - /* We need to insert a repeated char word */ - word = rep_char_it.extract (); - word_it.add_after_then_move (word); - - /* Set spaces before repeated char word */ - repetition_spacing = find_mean_blob_spacing (word); - current_gap = word->bounding_box ().left () - prev_x; - current_within_xht_gap = current_gap; - if (current_gap > tosp_rep_space * repetition_spacing) { - blanks = - (uint8_t) floor (current_gap / row->space_size); - if (blanks < 1) - blanks = 1; - } - else - blanks = 0; - if (tosp_debug_level > 5) - tprintf - ("Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);", - word->bounding_box ().left (), - word->bounding_box ().bottom (), - repetition_spacing, current_gap, blanks); - word->set_blanks (blanks); - //NO uncertainty - word->set_flag (W_FUZZY_SP, false); - word->set_flag (W_FUZZY_NON, false); - - /* Set spaces after repeated char word (and leave current word set) */ - current_gap = - blob_box.left () - next_rep_char_word_right; - if (current_gap > tosp_rep_space * repetition_spacing) { - blanks = (uint8_t) (current_gap / row->space_size); - if (blanks < 1) - blanks = 1; - } - else - blanks = 0; - if (tosp_debug_level > 5) - tprintf (" Rgap:%d (%d blanks)\n", - current_gap, blanks); - fuzzy_sp = FALSE; - fuzzy_non = FALSE; - - if (rep_char_it.empty ()) { - next_rep_char_word_right = INT32_MAX; - } - else { - rep_char_it.forward (); - next_rep_char_word_right = - rep_char_it.data ()->bounding_box ().right (); - } - } - - if (box_it.at_first () && rep_char_it.empty ()) { - //at end of line - word->set_flag (W_EOL, true); - xstarts[1] = prev_x; - } - else { - prev_blanks = blanks; - prev_fuzzy_sp = fuzzy_sp; - prev_fuzzy_non = fuzzy_non; - } - } - } - } - while (!box_it.at_first ()); //until back at start - - /* Insert any further repeated char words */ - while (!rep_char_it.empty ()) { - word = rep_char_it.extract (); - word_it.add_after_then_move (word); - - /* Set spaces before repeated char word */ - repetition_spacing = find_mean_blob_spacing (word); - current_gap = word->bounding_box ().left () - prev_x; - if (current_gap > tosp_rep_space * repetition_spacing) { - blanks = (uint8_t) floor (current_gap / row->space_size); - if (blanks < 1) - blanks = 1; - } - else - blanks = 0; - if (tosp_debug_level > 5) - tprintf( - "Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n", - word->bounding_box().left(), word->bounding_box().bottom(), - repetition_spacing, current_gap, blanks); - word->set_blanks (blanks); - //NO uncertainty - word->set_flag (W_FUZZY_SP, false); - word->set_flag (W_FUZZY_NON, false); - prev_x = word->bounding_box ().right (); - if (rep_char_it.empty ()) { - //at end of line - word->set_flag (W_EOL, true); - xstarts[1] = prev_x; - } - else { - rep_char_it.forward (); - } - } - real_row = new ROW (row, - (int16_t) row->kern_size, (int16_t) row->space_size); - word_it.set_to_list (real_row->word_list ()); - //put words in row - word_it.add_list_after (&words); - real_row->recalc_bounding_box (); - - if (tosp_debug_level > 4) { - tprintf ("Row: Made %d words in row ((%d,%d)(%d,%d))\n", - word_count, - real_row->bounding_box ().left (), - real_row->bounding_box ().bottom (), - real_row->bounding_box ().right (), - real_row->bounding_box ().top ()); - } - return real_row; - } - return nullptr; -} - -/********************************************************************** - * make_blob_words - * - * Converts words into blobs so that each blob is a single character. - * Used for chopper test. - **********************************************************************/ -ROW *Textord::make_blob_words( - TO_ROW *row, // row to make - FCOORD rotation // for drawing - ) { - bool bol; // start of line - ROW *real_row; // output row - C_OUTLINE_IT cout_it; - C_BLOB_LIST cblobs; - C_BLOB_IT cblob_it = &cblobs; - WERD_LIST words; - WERD *word; // new word - BLOBNBOX *bblob; // current blob - TBOX blob_box; // bounding box - BLOBNBOX_IT box_it; // iterator - int16_t word_count = 0; - - cblob_it.set_to_list(&cblobs); - box_it.set_to_list(row->blob_list()); - // new words - WERD_IT word_it(&words); - bol = TRUE; - if (!box_it.empty()) { - - do { - bblob = box_it.data(); - blob_box = bblob->bounding_box(); - if (bblob->joined_to_prev()) { - if (bblob->cblob() != nullptr) { - cout_it.set_to_list(cblob_it.data()->out_list()); - cout_it.move_to_last(); - cout_it.add_list_after(bblob->cblob()->out_list()); - delete bblob->cblob(); - } - } else { - if (bblob->cblob() != nullptr) - cblob_it.add_after_then_move(bblob->cblob()); - } - box_it.forward(); // next one - bblob = box_it.data(); - blob_box = bblob->bounding_box(); - - if (!bblob->joined_to_prev() && !cblobs.empty()) { - word = new WERD(&cblobs, 1, nullptr); - word_count++; - word_it.add_after_then_move(word); - if (bol) { - word->set_flag(W_BOL, TRUE); - bol = FALSE; - } - if (box_it.at_first()) { // at end of line - word->set_flag(W_EOL, TRUE); - } - } - } - while (!box_it.at_first()); // until back at start - /* Setup the row with created words. */ - real_row = new ROW(row, (int16_t) row->kern_size, (int16_t) row->space_size); - word_it.set_to_list(real_row->word_list()); - //put words in row - word_it.add_list_after(&words); - real_row->recalc_bounding_box(); - if (tosp_debug_level > 4) { - tprintf ("Row:Made %d words in row ((%d,%d)(%d,%d))\n", - word_count, - real_row->bounding_box().left(), - real_row->bounding_box().bottom(), - real_row->bounding_box().right(), - real_row->bounding_box().top()); - } - return real_row; - } - return nullptr; -} - -bool Textord::make_a_word_break( - TO_ROW* row, // row being made - TBOX blob_box, // for next_blob // how many blanks? - int16_t prev_gap, - TBOX prev_blob_box, - int16_t real_current_gap, - int16_t within_xht_current_gap, - TBOX next_blob_box, - int16_t next_gap, - uint8_t& blanks, - bool& fuzzy_sp, - bool& fuzzy_non, - bool& prev_gap_was_a_space, - bool& break_at_next_gap) { - bool space; - int16_t current_gap; - float fuzzy_sp_to_kn_limit; - - if (break_at_next_gap) { - break_at_next_gap = false; - return true; - } - /* Inhibit using the reduced gap if - The kerning is large - chars are not kerned and reducing "f"s can cause - erroneous blanks - OR The real gap is less than 0 - OR The real gap is less than the kerning estimate - */ - if ((row->kern_size > tosp_large_kerning * row->xheight) || - ((tosp_dont_fool_with_small_kerns >= 0) && - (real_current_gap < tosp_dont_fool_with_small_kerns * row->kern_size))) - //Ignore the difference - within_xht_current_gap = real_current_gap; - - if (tosp_use_xht_gaps && tosp_only_use_xht_gaps) - current_gap = within_xht_current_gap; - else - current_gap = real_current_gap; - - if (tosp_old_to_method) { - //Boring old method - space = current_gap > row->max_nonspace; - if (space && (current_gap < INT16_MAX)) { - if (current_gap < row->min_space) { - if (current_gap > row->space_threshold) { - blanks = 1; - fuzzy_sp = true; - fuzzy_non = false; - } - else { - blanks = 0; - fuzzy_sp = false; - fuzzy_non = true; - } - } - else { - blanks = (uint8_t) (current_gap / row->space_size); - if (blanks < 1) - blanks = 1; - fuzzy_sp = false; - fuzzy_non = false; - } - } - return space; - } - else { - /* New exciting heuristic method */ - if (prev_blob_box.null_box ()) // Beginning of row - prev_gap_was_a_space = true; - - //Default as old TO - space = current_gap > row->space_threshold; - - /* Set defaults for the word break in case we find one. Currently there are - no fuzzy spaces. Depending on the reliability of the different heuristics - we may need to set PARTICULAR spaces to fuzzy or not. The values will ONLY - be used if the function returns TRUE - ie the word is to be broken. - */ - int num_blanks = current_gap; - if (row->space_size > 1.0f) - num_blanks = IntCastRounded(current_gap / row->space_size); - blanks = static_cast(ClipToRange(num_blanks, 1, UINT8_MAX)); - fuzzy_sp = false; - fuzzy_non = false; - /* - If xht measure causes gap to flip one of the 3 thresholds act accordingly - - despite any other heuristics - the MINIMUM action is to pass a fuzzy kern to - context. - */ - if (tosp_use_xht_gaps && - (real_current_gap <= row->max_nonspace) && - (within_xht_current_gap > row->max_nonspace)) { - space = true; - fuzzy_non = true; -#ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 20, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); -#endif - } - else if (tosp_use_xht_gaps && - (real_current_gap <= row->space_threshold) && - (within_xht_current_gap > row->space_threshold)) { - space = true; - if (tosp_flip_fuzz_kn_to_sp) - fuzzy_sp = true; - else - fuzzy_non = true; -#ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 21, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); -#endif - } - else if (tosp_use_xht_gaps && - (real_current_gap < row->min_space) && - (within_xht_current_gap >= row->min_space)) { - space = true; -#ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 22, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); -#endif - } - else if (tosp_force_wordbreak_on_punct && - !suspected_punct_blob(row, prev_blob_box) && - suspected_punct_blob(row, blob_box)) { - break_at_next_gap = true; - } - /* Now continue with normal heuristics */ - else if ((current_gap < row->min_space) && - (current_gap > row->space_threshold)) { - /* Heuristics to turn dubious spaces to kerns */ - if (tosp_pass_wide_fuzz_sp_to_context > 0) - fuzzy_sp_to_kn_limit = row->kern_size + - tosp_pass_wide_fuzz_sp_to_context * - (row->space_size - row->kern_size); - else - fuzzy_sp_to_kn_limit = 99999.0f; - - /* If current gap is significantly smaller than the previous space the other - side of a narrow blob then this gap is a kern. */ - if ((prev_blob_box.width () > 0) && - narrow_blob (row, prev_blob_box) && - prev_gap_was_a_space && - (current_gap <= tosp_gap_factor * prev_gap)) { - if ((tosp_all_flips_fuzzy) || - (current_gap > fuzzy_sp_to_kn_limit)) { - if (tosp_flip_fuzz_sp_to_kn) - fuzzy_non = true; - else - fuzzy_sp = true; - } - else - space = false; -#ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 1, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); -#endif - } - /* If current gap not much bigger than the previous kern the other side of a - narrow blob then this gap is a kern as well */ - else if ((prev_blob_box.width () > 0) && - narrow_blob (row, prev_blob_box) && - !prev_gap_was_a_space && - (current_gap * tosp_gap_factor <= prev_gap)) { - if ((tosp_all_flips_fuzzy) || - (current_gap > fuzzy_sp_to_kn_limit)) { - if (tosp_flip_fuzz_sp_to_kn) - fuzzy_non = true; - else - fuzzy_sp = true; - } - else - space = false; -#ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 2, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); -#endif - } - else if ((next_blob_box.width () > 0) && - narrow_blob (row, next_blob_box) && - (next_gap > row->space_threshold) && - (current_gap <= tosp_gap_factor * next_gap)) { - if ((tosp_all_flips_fuzzy) || - (current_gap > fuzzy_sp_to_kn_limit)) { - if (tosp_flip_fuzz_sp_to_kn) - fuzzy_non = true; - else - fuzzy_sp = true; - } - else - space = false; -#ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 3, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); -#endif - } - else if ((next_blob_box.width () > 0) && - narrow_blob (row, next_blob_box) && - (next_gap <= row->space_threshold) && - (current_gap * tosp_gap_factor <= next_gap)) { - if ((tosp_all_flips_fuzzy) || - (current_gap > fuzzy_sp_to_kn_limit)) { - if (tosp_flip_fuzz_sp_to_kn) - fuzzy_non = true; - else - fuzzy_sp = true; - } - else - space = false; -#ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 4, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); -#endif - } - else if ((((next_blob_box.width () > 0) && - narrow_blob (row, next_blob_box)) || - ((prev_blob_box.width () > 0) && - narrow_blob (row, prev_blob_box)))) { - fuzzy_sp = true; -#ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 6, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); -#endif - } - } - else if ((current_gap > row->max_nonspace) && - (current_gap <= row->space_threshold)) { - - /* Heuristics to turn dubious kerns to spaces */ - /* TRIED THIS BUT IT MADE THINGS WORSE - if (prev_gap == INT16_MAX) - prev_gap = 0; // start of row - if (next_gap == INT16_MAX) - next_gap = 0; // end of row - */ - if ((prev_blob_box.width () > 0) && - (next_blob_box.width () > 0) && - (current_gap >= - tosp_kern_gap_factor1 * std::max(prev_gap, next_gap)) && - wide_blob (row, prev_blob_box) && - wide_blob (row, next_blob_box)) { - - space = true; - /* - tosp_flip_caution is an attempt to stop the default changing in cases - where there is a large difference between the kern and space estimates. - See problem in 'chiefs' where "have" gets split in the quotation. - */ - if ((tosp_flip_fuzz_kn_to_sp) && - ((tosp_flip_caution <= 0) || - (tosp_flip_caution * row->kern_size > row->space_size))) - fuzzy_sp = true; - else - fuzzy_non = true; -#ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 7, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); -#endif - } else if (prev_blob_box.width() > 0 && - next_blob_box.width() > 0 && - current_gap > 5 && // Rule 9 handles small gap, big ratio. - current_gap >= - tosp_kern_gap_factor2 * std::max(prev_gap, next_gap) && - !(narrow_blob(row, prev_blob_box) || - suspected_punct_blob(row, prev_blob_box)) && - !(narrow_blob(row, next_blob_box) || - suspected_punct_blob(row, next_blob_box))) { - space = true; - fuzzy_non = true; -#ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 8, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); -#endif - } - else if ((tosp_kern_gap_factor3 > 0) && - (prev_blob_box.width () > 0) && - (next_blob_box.width () > 0) && - (current_gap >= tosp_kern_gap_factor3 * std::max(prev_gap, next_gap)) && - (!tosp_rule_9_test_punct || - (!suspected_punct_blob (row, prev_blob_box) && - !suspected_punct_blob (row, next_blob_box)))) { - space = true; - fuzzy_non = true; -#ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 9, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); -#endif - } - } - if (tosp_debug_level > 10) - tprintf("word break = %d current_gap = %d, prev_gap = %d, " - "next_gap = %d\n", space ? 1 : 0, current_gap, - prev_gap, next_gap); - prev_gap_was_a_space = space && !(fuzzy_non); - return space; - } -} - -bool Textord::narrow_blob(TO_ROW* row, TBOX blob_box) { - bool result; - result = ((blob_box.width () <= tosp_narrow_fraction * row->xheight) || - (((float) blob_box.width () / blob_box.height ()) <= - tosp_narrow_aspect_ratio)); - return result; -} - -bool Textord::wide_blob(TO_ROW* row, TBOX blob_box) { - bool result; - if (tosp_wide_fraction > 0) { - if (tosp_wide_aspect_ratio > 0) - result = ((blob_box.width () >= tosp_wide_fraction * row->xheight) && - (((float) blob_box.width () / blob_box.height ()) > - tosp_wide_aspect_ratio)); - else - result = (blob_box.width () >= tosp_wide_fraction * row->xheight); - } - else - result = !narrow_blob (row, blob_box); - return result; -} - -bool Textord::suspected_punct_blob(TO_ROW* row, TBOX box) { - bool result; - float baseline; - float blob_x_centre; - /* Find baseline of centre of blob */ - blob_x_centre = (box.right () + box.left ()) / 2.0; - baseline = row->baseline.y (blob_x_centre); - - result = (box.height () <= 0.66 * row->xheight) || - (box.top () < baseline + row->xheight / 2.0) || - (box.bottom () > baseline + row->xheight / 2.0); - return result; -} - - -void Textord::peek_at_next_gap(TO_ROW *row, - BLOBNBOX_IT box_it, - TBOX &next_blob_box, - int16_t &next_gap, - int16_t &next_within_xht_gap) { - TBOX next_reduced_blob_box; - TBOX bit_beyond; - BLOBNBOX_IT reduced_box_it = box_it; - - next_blob_box = box_next (&box_it); - next_reduced_blob_box = reduced_box_next (row, &reduced_box_it); - if (box_it.at_first ()) { - next_gap = INT16_MAX; - next_within_xht_gap = INT16_MAX; - } - else { - bit_beyond = box_it.data ()->bounding_box (); - next_gap = bit_beyond.left () - next_blob_box.right (); - bit_beyond = reduced_box_next (row, &reduced_box_it); - next_within_xht_gap = - bit_beyond.left () - next_reduced_blob_box.right (); - } -} - - -#ifndef GRAPHICS_DISABLED -void Textord::mark_gap( - TBOX blob, // blob following gap - int16_t rule, // heuristic id - int16_t prev_gap, - int16_t prev_blob_width, - int16_t current_gap, - int16_t next_blob_width, - int16_t next_gap) { - ScrollView::Color col; //of ellipse marking flipped gap - - switch (rule) { - case 1: - col = ScrollView::RED; - break; - case 2: - col = ScrollView::CYAN; - break; - case 3: - col = ScrollView::GREEN; - break; - case 4: - col = ScrollView::BLACK; - break; - case 5: - col = ScrollView::MAGENTA; - break; - case 6: - col = ScrollView::BLUE; - break; - - case 7: - col = ScrollView::WHITE; - break; - case 8: - col = ScrollView::YELLOW; - break; - case 9: - col = ScrollView::BLACK; - break; - - case 20: - col = ScrollView::CYAN; - break; - case 21: - col = ScrollView::GREEN; - break; - case 22: - col = ScrollView::MAGENTA; - break; - default: - col = ScrollView::BLACK; - } - if (textord_show_initial_words) { - to_win->Pen(col); - /* if (rule < 20) - //interior_style(to_win, INT_SOLID, FALSE); - else - //interior_style(to_win, INT_HOLLOW, TRUE);*/ - //x radius - to_win->Ellipse (current_gap / 2.0f, - blob.height () / 2.0f, //y radius - //x centre - blob.left () - current_gap / 2.0f, - //y centre - blob.bottom () + blob.height () / 2.0f); - } - if (tosp_debug_level > 5) - tprintf(" (%d,%d) Sp<->Kn Rule %d %d %d %d %d %d\n", - blob.left() - current_gap / 2, blob.bottom(), rule, prev_gap, - prev_blob_width, current_gap, next_blob_width, next_gap); -} -#endif - -float Textord::find_mean_blob_spacing(WERD *word) { - C_BLOB_IT cblob_it; - TBOX blob_box; - int32_t gap_sum = 0; - int16_t gap_count = 0; - int16_t prev_right; - - cblob_it.set_to_list (word->cblob_list ()); - if (!cblob_it.empty ()) { - cblob_it.mark_cycle_pt (); - prev_right = cblob_it.data ()->bounding_box ().right (); - //first blob - cblob_it.forward (); - for (; !cblob_it.cycled_list (); cblob_it.forward ()) { - blob_box = cblob_it.data ()->bounding_box (); - gap_sum += blob_box.left () - prev_right; - gap_count++; - prev_right = blob_box.right (); - } - } - if (gap_count > 0) - return (gap_sum / (float) gap_count); - else - return 0.0f; -} - - -bool Textord::ignore_big_gap(TO_ROW* row, - int32_t row_length, - GAPMAP* gapmap, - int16_t left, - int16_t right) { - int16_t gap = right - left + 1; - - if (tosp_ignore_big_gaps > 999) return FALSE; // Don't ignore - if (tosp_ignore_big_gaps > 0) - return (gap > tosp_ignore_big_gaps * row->xheight); - if (gap > tosp_ignore_very_big_gaps * row->xheight) - return true; - if (tosp_ignore_big_gaps == 0) { - if ((gap > 2.1 * row->xheight) && (row_length > 20 * row->xheight)) - return true; - if ((gap > 1.75 * row->xheight) && - ((row_length > 35 * row->xheight) || - gapmap->table_gap (left, right))) - return true; - } - else { - /* ONLY time gaps < 3.0 * xht are ignored is when they are part of a table */ - if ((gap > gapmap_big_gaps * row->xheight) && - gapmap->table_gap (left, right)) - return true; - } - return false; -} - -/********************************************************************** - * reduced_box_next - * - * Compute the bounding box of this blob with merging of x overlaps - * but no pre-chopping. - * Then move the iterator on to the start of the next blob. - * DON'T reduce the box for small things - eg punctuation. - **********************************************************************/ -TBOX Textord::reduced_box_next( - TO_ROW *row, // current row - BLOBNBOX_IT *it // iterator to blobds - ) { - BLOBNBOX *blob; //current blob - BLOBNBOX *head_blob; //place to store box - TBOX full_box; //full blob boundg box - TBOX reduced_box; //box of significant part - int16_t left_above_xht; //ABOVE xht left limit - int16_t new_left_above_xht; //ABOVE xht left limit - - blob = it->data (); - if (blob->red_box_set ()) { - reduced_box = blob->reduced_box (); - do { - it->forward(); - blob = it->data(); - } - while (blob->cblob() == nullptr || blob->joined_to_prev()); - return reduced_box; - } - head_blob = blob; - full_box = blob->bounding_box (); - reduced_box = reduced_box_for_blob (blob, row, &left_above_xht); - do { - it->forward (); - blob = it->data (); - if (blob->cblob() == nullptr) - //was pre-chopped - full_box += blob->bounding_box (); - else if (blob->joined_to_prev ()) { - reduced_box += - reduced_box_for_blob(blob, row, &new_left_above_xht); - left_above_xht = std::min(left_above_xht, new_left_above_xht); - } - } - //until next real blob - while (blob->cblob() == nullptr || blob->joined_to_prev()); - - if ((reduced_box.width () > 0) && - ((reduced_box.left () + tosp_near_lh_edge * reduced_box.width ()) - < left_above_xht) && (reduced_box.height () > 0.7 * row->xheight)) { -#ifndef GRAPHICS_DISABLED - if (textord_show_initial_words) - reduced_box.plot (to_win, ScrollView::YELLOW, ScrollView::YELLOW); -#endif - } - else - reduced_box = full_box; - head_blob->set_reduced_box (reduced_box); - return reduced_box; -} - - -/************************************************************************* - * reduced_box_for_blob() - * Find box for blob which is the same height and y position as the whole blob, - * but whose left limit is the left most position of the blob ABOVE the - * baseline and whose right limit is the right most position of the blob BELOW - * the xheight. - * - * - * !!!!!!! WONT WORK WITH LARGE UPPER CASE CHARS - T F V W - look at examples on - * "home". Perhaps we need something which say if the width ABOVE the - * xht alone includes the whole of the reduced width, then use the full - * blob box - Might still fail on italic F - * - * Alternatively we could be a little less severe and only reduce the - * left and right edges by half the difference between the full box and - * the reduced box. - * - * NOTE that we need to rotate all the coordinates as - * find_blob_limits finds the y min and max within a specified x band - *************************************************************************/ -TBOX Textord::reduced_box_for_blob( - BLOBNBOX *blob, - TO_ROW *row, - int16_t *left_above_xht) { - float baseline; - float blob_x_centre; - float left_limit; - float right_limit; - float junk; - TBOX blob_box; - - /* Find baseline of centre of blob */ - - blob_box = blob->bounding_box (); - blob_x_centre = (blob_box.left () + blob_box.right ()) / 2.0; - baseline = row->baseline.y (blob_x_centre); - - /* - Find LH limit of blob ABOVE the xht. This is so that we can detect certain - caps ht chars which should NOT have their box reduced: T, Y, V, W etc - */ - left_limit = (float) INT32_MAX; - junk = (float) -INT32_MAX; - find_cblob_hlimits(blob->cblob(), (baseline + 1.1 * row->xheight), - static_cast(INT16_MAX), left_limit, junk); - if (left_limit > junk) - *left_above_xht = INT16_MAX; //No area above xht - else - *left_above_xht = (int16_t) floor (left_limit); - /* - Find reduced LH limit of blob - the left extent of the region ABOVE the - baseline. - */ - left_limit = (float) INT32_MAX; - junk = (float) -INT32_MAX; - find_cblob_hlimits(blob->cblob(), baseline, static_cast(INT16_MAX), - left_limit, junk); - - if (left_limit > junk) - return TBOX (); //no area within xht so return empty box - /* - Find reduced RH limit of blob - the right extent of the region BELOW the xht. - */ - junk = (float) INT32_MAX; - right_limit = (float) -INT32_MAX; - find_cblob_hlimits(blob->cblob(), static_cast(-INT16_MAX), - (baseline + row->xheight), junk, right_limit); - if (junk > right_limit) - return TBOX (); //no area within xht so return empty box - - return TBOX (ICOORD ((int16_t) floor (left_limit), blob_box.bottom ()), - ICOORD ((int16_t) ceil (right_limit), blob_box.top ())); -} -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tovars.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tovars.cpp deleted file mode 100644 index 6b1b8332..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tovars.cpp +++ /dev/null @@ -1,83 +0,0 @@ -/********************************************************************** - * File: tovars.cpp (Formerly to_vars.c) - * Description: Variables used by textord. - * Author: Ray Smith - * Created: Tue Aug 24 16:55:02 BST 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "tovars.h" -#include "params.h" - -#define EXTERN - -EXTERN BOOL_VAR (textord_show_initial_words, FALSE, "Display separate words"); -EXTERN BOOL_VAR (textord_show_new_words, FALSE, "Display separate words"); -EXTERN BOOL_VAR (textord_show_fixed_words, FALSE, -"Display forced fixed pitch words"); -EXTERN BOOL_VAR (textord_blocksall_fixed, FALSE, "Moan about prop blocks"); -EXTERN BOOL_VAR (textord_blocksall_prop, FALSE, -"Moan about fixed pitch blocks"); -EXTERN BOOL_VAR (textord_blocksall_testing, FALSE, "Dump stats when moaning"); -EXTERN BOOL_VAR (textord_test_mode, FALSE, "Do current test"); -EXTERN INT_VAR (textord_dotmatrix_gap, 3, -"Max pixel gap for broken pixed pitch"); -EXTERN INT_VAR (textord_debug_block, 0, "Block to do debug on"); -EXTERN INT_VAR (textord_pitch_range, 2, "Max range test on pitch"); -EXTERN double_VAR (textord_wordstats_smooth_factor, 0.05, -"Smoothing gap stats"); -EXTERN double_VAR (textord_width_smooth_factor, 0.10, -"Smoothing width stats"); -EXTERN double_VAR (textord_words_width_ile, 0.4, -"Ile of blob widths for space est"); -EXTERN double_VAR (textord_words_maxspace, 4.0, "Multiple of xheight"); -EXTERN double_VAR (textord_words_default_maxspace, 3.5, -"Max believable third space"); -EXTERN double_VAR (textord_words_default_minspace, 0.6, -"Fraction of xheight"); -EXTERN double_VAR (textord_words_min_minspace, 0.3, "Fraction of xheight"); -EXTERN double_VAR (textord_words_default_nonspace, 0.2, -"Fraction of xheight"); -EXTERN double_VAR(textord_words_initial_lower, 0.25, - "Max initial cluster size"); -EXTERN double_VAR (textord_words_initial_upper, 0.15, -"Min initial cluster spacing"); -EXTERN double_VAR (textord_words_minlarge, 0.75, -"Fraction of valid gaps needed"); -EXTERN double_VAR (textord_words_pitchsd_threshold, 0.040, -"Pitch sync threshold"); -EXTERN double_VAR (textord_words_def_fixed, 0.016, -"Threshold for definite fixed"); -EXTERN double_VAR (textord_words_def_prop, 0.090, -"Threshold for definite prop"); -EXTERN INT_VAR (textord_words_veto_power, 5, -"Rows required to outvote a veto"); -EXTERN double_VAR (textord_pitch_rowsimilarity, 0.08, -"Fraction of xheight for sameness"); -EXTERN BOOL_VAR (textord_pitch_scalebigwords, FALSE, -"Scale scores on big words"); -EXTERN double_VAR(words_initial_lower, 0.5, "Max initial cluster size"); -EXTERN double_VAR (words_initial_upper, 0.15, "Min initial cluster spacing"); -EXTERN double_VAR (words_default_prop_nonspace, 0.25, "Fraction of xheight"); -EXTERN double_VAR (words_default_fixed_space, 0.75, "Fraction of xheight"); -EXTERN double_VAR (words_default_fixed_limit, 0.6, "Allowed size variance"); -EXTERN double_VAR (textord_words_definite_spread, 0.30, -"Non-fuzzy spacing region"); -EXTERN double_VAR (textord_spacesize_ratiofp, 2.8, -"Min ratio space/nonspace"); -EXTERN double_VAR (textord_spacesize_ratioprop, 2.0, -"Min ratio space/nonspace"); -EXTERN double_VAR (textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold"); -EXTERN double_VAR (textord_max_pitch_iqr, 0.20, "Xh fraction noise in pitch"); -EXTERN double_VAR (textord_fp_min_width, 0.5, "Min width of decent blobs"); diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tovars.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tovars.h deleted file mode 100644 index 46315bb9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/tovars.h +++ /dev/null @@ -1,89 +0,0 @@ -/********************************************************************** - * File: tovars.h (Formerly to_vars.h) - * Description: Variables used by textord. - * Author: Ray Smith - * Created: Tue Aug 24 16:55:02 BST 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TOVARS_H -#define TOVARS_H - -#include "params.h" - -extern BOOL_VAR_H (textord_show_initial_words, FALSE, -"Display separate words"); -extern BOOL_VAR_H (textord_show_new_words, FALSE, "Display separate words"); -extern BOOL_VAR_H (textord_show_fixed_words, FALSE, -"Display forced fixed pitch words"); -extern BOOL_VAR_H (textord_blocksall_fixed, FALSE, "Moan about prop blocks"); -extern BOOL_VAR_H (textord_blocksall_prop, FALSE, -"Moan about fixed pitch blocks"); -extern BOOL_VAR_H (textord_blocksall_testing, FALSE, -"Dump stats when moaning"); -extern BOOL_VAR_H (textord_test_mode, FALSE, "Do current test"); -extern INT_VAR_H (textord_dotmatrix_gap, 3, -"Max pixel gap for broken pixed pitch"); -extern INT_VAR_H (textord_debug_block, 0, "Block to do debug on"); -extern INT_VAR_H (textord_pitch_range, 2, "Max range test on pitch"); -extern double_VAR_H (textord_wordstats_smooth_factor, 0.05, -"Smoothing gap stats"); -extern double_VAR_H (textord_width_smooth_factor, 0.10, -"Smoothing width stats"); -extern double_VAR_H (textord_words_width_ile, 0.4, -"Ile of blob widths for space est"); -extern double_VAR_H (textord_words_maxspace, 4.0, "Multiple of xheight"); -extern double_VAR_H (textord_words_default_maxspace, 3.5, -"Max believable third space"); -extern double_VAR_H (textord_words_default_minspace, 0.6, -"Fraction of xheight"); -extern double_VAR_H (textord_words_min_minspace, 0.3, "Fraction of xheight"); -extern double_VAR_H (textord_words_default_nonspace, 0.2, -"Fraction of xheight"); -extern double_VAR_H(textord_words_initial_lower, 0.25, - "Max initial cluster size"); -extern double_VAR_H (textord_words_initial_upper, 0.15, -"Min initial cluster spacing"); -extern double_VAR_H (textord_words_minlarge, 0.75, -"Fraction of valid gaps needed"); -extern double_VAR_H (textord_words_pitchsd_threshold, 0.025, -"Pitch sync threshold"); -extern double_VAR_H (textord_words_def_fixed, 0.01, -"Threshold for definite fixed"); -extern double_VAR_H (textord_words_def_prop, 0.06, -"Threshold for definite prop"); -extern INT_VAR_H (textord_words_veto_power, 5, -"Rows required to outvote a veto"); -extern double_VAR_H (textord_pitch_rowsimilarity, 0.08, -"Fraction of xheight for sameness"); -extern BOOL_VAR_H (textord_pitch_scalebigwords, FALSE, -"Scale scores on big words"); -extern double_VAR_H(words_initial_lower, 0.5, "Max initial cluster size"); -extern double_VAR_H (words_initial_upper, 0.15, -"Min initial cluster spacing"); -extern double_VAR_H (words_default_prop_nonspace, 0.25, -"Fraction of xheight"); -extern double_VAR_H (words_default_fixed_space, 0.75, "Fraction of xheight"); -extern double_VAR_H (words_default_fixed_limit, 0.6, "Allowed size variance"); -extern double_VAR_H (textord_words_definite_spread, 0.30, -"Non-fuzzy spacing region"); -extern double_VAR_H (textord_spacesize_ratiofp, 2.8, -"Min ratio space/nonspace"); -extern double_VAR_H (textord_spacesize_ratioprop, 2.0, -"Min ratio space/nonspace"); -extern double_VAR_H (textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold"); -extern double_VAR_H (textord_max_pitch_iqr, 0.20, -"Xh fraction noise in pitch"); -extern double_VAR_H (textord_fp_min_width, 0.5, "Min width of decent blobs"); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/underlin.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/underlin.cpp deleted file mode 100644 index af262032..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/underlin.cpp +++ /dev/null @@ -1,278 +0,0 @@ -/********************************************************************** - * File: underlin.cpp (Formerly undrline.c) - * Description: Code to chop blobs apart from underlines. - * Author: Ray Smith - * - * (C) Copyright 1994, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "underlin.h" - -#define PROJECTION_MARGIN 10 //arbitrary -#define EXTERN - -EXTERN double_VAR (textord_underline_offset, 0.1, "Fraction of x to ignore"); -EXTERN BOOL_VAR (textord_restore_underlines, TRUE, -"Chop underlines & put back"); - -/********************************************************************** - * restore_underlined_blobs - * - * Find underlined blobs and put them back in the row. - **********************************************************************/ - -void restore_underlined_blobs( //get chop points - TO_BLOCK *block //block to do - ) { - int16_t chop_coord; //chop boundary - TBOX blob_box; //of underline - BLOBNBOX *u_line; //underline bit - TO_ROW *row; //best row for blob - ICOORDELT_LIST chop_cells; //blobs to cut out - //real underlines - BLOBNBOX_LIST residual_underlines; - C_OUTLINE_LIST left_coutlines; - C_OUTLINE_LIST right_coutlines; - ICOORDELT_IT cell_it = &chop_cells; - //under lines - BLOBNBOX_IT under_it = &block->underlines; - BLOBNBOX_IT ru_it = &residual_underlines; - - if (block->get_rows()->empty()) - return; // Don't crash if there are no rows. - for (under_it.mark_cycle_pt (); !under_it.cycled_list (); - under_it.forward ()) { - u_line = under_it.extract (); - blob_box = u_line->bounding_box (); - row = most_overlapping_row (block->get_rows (), u_line); - if (row == nullptr) - return; // Don't crash if there is no row. - find_underlined_blobs (u_line, &row->baseline, row->xheight, - row->xheight * textord_underline_offset, - &chop_cells); - cell_it.set_to_list (&chop_cells); - for (cell_it.mark_cycle_pt (); !cell_it.cycled_list (); - cell_it.forward ()) { - chop_coord = cell_it.data ()->x (); - if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) { - split_to_blob (u_line, chop_coord, - textord_fp_chop_error + 0.5, - &left_coutlines, - &right_coutlines); - if (!left_coutlines.empty()) { - ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines))); - } - chop_coord = cell_it.data ()->y (); - split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5, - &left_coutlines, &right_coutlines); - if (!left_coutlines.empty()) { - row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines))); - } - u_line = nullptr; //no more blobs to add - } - delete cell_it.extract(); - } - if (!right_coutlines.empty ()) { - split_to_blob(nullptr, blob_box.right(), textord_fp_chop_error + 0.5, - &left_coutlines, &right_coutlines); - if (!left_coutlines.empty()) - ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines))); - } - if (u_line != nullptr) { - delete u_line->cblob(); - delete u_line; - } - } - if (!ru_it.empty()) { - ru_it.move_to_first(); - for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) { - under_it.add_after_then_move(ru_it.extract()); - } - } -} - - -/********************************************************************** - * most_overlapping_row - * - * Return the row which most overlaps the blob. - **********************************************************************/ - -TO_ROW *most_overlapping_row( //find best row - TO_ROW_LIST *rows, //list of rows - BLOBNBOX *blob //blob to place - ) { - int16_t x = (blob->bounding_box ().left () - + blob->bounding_box ().right ()) / 2; - TO_ROW_IT row_it = rows; //row iterator - TO_ROW *row; //current row - TO_ROW *best_row; //output row - float overlap; //of blob & row - float bestover; //best overlap - - best_row = nullptr; - bestover = (float) -INT32_MAX; - if (row_it.empty ()) - return nullptr; - row = row_it.data (); - row_it.mark_cycle_pt (); - while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top () - && !row_it.cycled_list ()) { - best_row = row; - bestover = - blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop; - row_it.forward (); - row = row_it.data (); - } - while (row->baseline.y (x) + row->xheight + row->ascrise - >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) { - overlap = row->baseline.y (x) + row->xheight + row->ascrise; - if (blob->bounding_box ().top () < overlap) - overlap = blob->bounding_box ().top (); - if (blob->bounding_box ().bottom () > - row->baseline.y (x) + row->descdrop) - overlap -= blob->bounding_box ().bottom (); - else - overlap -= row->baseline.y (x) + row->descdrop; - if (overlap > bestover) { - bestover = overlap; - best_row = row; - } - row_it.forward (); - row = row_it.data (); - } - if (bestover < 0 - && row->baseline.y (x) + row->xheight + row->ascrise - - blob->bounding_box ().bottom () > bestover) - best_row = row; - return best_row; -} - - -/********************************************************************** - * find_underlined_blobs - * - * Find the start and end coords of blobs in the underline. - **********************************************************************/ - -void find_underlined_blobs( //get chop points - BLOBNBOX *u_line, //underlined unit - QSPLINE *baseline, //actual baseline - float xheight, //height of line - float baseline_offset, //amount to shrinke it - ICOORDELT_LIST *chop_cells //places to chop - ) { - int16_t x, y; //sides of blob - ICOORD blob_chop; //sides of blob - TBOX blob_box = u_line->bounding_box (); - //cell iterator - ICOORDELT_IT cell_it = chop_cells; - STATS upper_proj (blob_box.left (), blob_box.right () + 1); - STATS middle_proj (blob_box.left (), blob_box.right () + 1); - STATS lower_proj (blob_box.left (), blob_box.right () + 1); - C_OUTLINE_IT out_it; //outlines of blob - - ASSERT_HOST (u_line->cblob () != nullptr); - - out_it.set_to_list (u_line->cblob ()->out_list ()); - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - vertical_cunderline_projection (out_it.data (), - baseline, xheight, baseline_offset, - &lower_proj, &middle_proj, &upper_proj); - } - - for (x = blob_box.left (); x < blob_box.right (); x++) { - if (middle_proj.pile_count (x) > 0) { - for (y = x + 1; - y < blob_box.right () && middle_proj.pile_count (y) > 0; y++); - blob_chop = ICOORD (x, y); - cell_it.add_after_then_move (new ICOORDELT (blob_chop)); - x = y; - } - } -} - - -/********************************************************************** - * vertical_cunderline_projection - * - * Compute the vertical projection of a outline from its outlines - * and add to the given STATS. - **********************************************************************/ - -void vertical_cunderline_projection( //project outlines - C_OUTLINE *outline, //outline to project - QSPLINE *baseline, //actual baseline - float xheight, //height of line - float baseline_offset, //amount to shrinke it - STATS *lower_proj, //below baseline - STATS *middle_proj, //centre region - STATS *upper_proj //top region - ) { - ICOORD pos; //current point - ICOORD step; //edge step - int16_t lower_y, upper_y; //region limits - int32_t length; //of outline - int16_t stepindex; //current step - C_OUTLINE_IT out_it = outline->child (); - - pos = outline->start_pos (); - length = outline->pathlength (); - for (stepindex = 0; stepindex < length; stepindex++) { - step = outline->step (stepindex); - if (step.x () > 0) { - lower_y = - (int16_t) floor (baseline->y (pos.x ()) + baseline_offset + 0.5); - upper_y = - (int16_t) floor (baseline->y (pos.x ()) + baseline_offset + - xheight + 0.5); - if (pos.y () >= lower_y) { - lower_proj->add (pos.x (), -lower_y); - if (pos.y () >= upper_y) { - middle_proj->add (pos.x (), lower_y - upper_y); - upper_proj->add (pos.x (), upper_y - pos.y ()); - } - else - middle_proj->add (pos.x (), lower_y - pos.y ()); - } - else - lower_proj->add (pos.x (), -pos.y ()); - } - else if (step.x () < 0) { - lower_y = - (int16_t) floor (baseline->y (pos.x () - 1) + baseline_offset + - 0.5); - upper_y = - (int16_t) floor (baseline->y (pos.x () - 1) + baseline_offset + - xheight + 0.5); - if (pos.y () >= lower_y) { - lower_proj->add (pos.x () - 1, lower_y); - if (pos.y () >= upper_y) { - middle_proj->add (pos.x () - 1, upper_y - lower_y); - upper_proj->add (pos.x () - 1, pos.y () - upper_y); - } - else - middle_proj->add (pos.x () - 1, pos.y () - lower_y); - } - else - lower_proj->add (pos.x () - 1, pos.y ()); - } - pos += step; - } - - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - vertical_cunderline_projection (out_it.data (), - baseline, xheight, baseline_offset, - lower_proj, middle_proj, upper_proj); - } -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/underlin.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/underlin.h deleted file mode 100644 index eb0051ac..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/underlin.h +++ /dev/null @@ -1,51 +0,0 @@ -/********************************************************************** - * File: underlin.h (Formerly undrline.h) - * Description: Code to chop blobs apart from underlines. - * Author: Ray Smith - * - * (C) Copyright 1994, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef UNDERLIN_H -#define UNDERLIN_H - -#include "fpchop.h" - -extern double_VAR_H (textord_underline_offset, 0.1, -"Fraction of x to ignore"); -extern BOOL_VAR_H (textord_restore_underlines, FALSE, -"Chop underlines & put back"); -void restore_underlined_blobs( //get chop points - TO_BLOCK *block //block to do - ); -TO_ROW *most_overlapping_row( //find best row - TO_ROW_LIST *rows, //list of rows - BLOBNBOX *blob //blob to place - ); -void find_underlined_blobs( //get chop points - BLOBNBOX *u_line, //underlined unit - QSPLINE *baseline, //actual baseline - float xheight, //height of line - float baseline_offset, //amount to shrinke it - ICOORDELT_LIST *chop_cells //places to chop - ); -void vertical_cunderline_projection( //project outlines - C_OUTLINE *outline, //outline to project - QSPLINE *baseline, //actual baseline - float xheight, //height of line - float baseline_offset, //amount to shrinke it - STATS *lower_proj, //below baseline - STATS *middle_proj, //centre region - STATS *upper_proj //top region - ); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/wordseg.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/wordseg.cpp deleted file mode 100644 index b245baab..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/wordseg.cpp +++ /dev/null @@ -1,624 +0,0 @@ -/********************************************************************** - * File: wordseg.cpp (Formerly wspace.c) - * Description: Code to segment the blobs into words. - * Author: Ray Smith - * Created: Fri Oct 16 11:32:28 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "blobbox.h" -#include "statistc.h" -#include "drawtord.h" -#include "makerow.h" -#include "pitsync1.h" -#include "tovars.h" -#include "topitch.h" -#include "cjkpitch.h" -#include "textord.h" -#include "fpchop.h" -#include "wordseg.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#define EXTERN - -EXTERN BOOL_VAR(textord_fp_chopping, TRUE, "Do fixed pitch chopping"); -EXTERN BOOL_VAR(textord_force_make_prop_words, FALSE, - "Force proportional word segmentation on all rows"); -EXTERN BOOL_VAR(textord_chopper_test, FALSE, - "Chopper is being tested."); - -#define FIXED_WIDTH_MULTIPLE 5 -#define BLOCK_STATS_CLUSTERS 10 - - -/** - * @name make_single_word - * - * For each row, arrange the blobs into one word. There is no fixed - * pitch detection. - */ - -void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) { - TO_ROW_IT to_row_it(rows); - ROW_IT row_it(real_rows); - for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list(); - to_row_it.forward()) { - TO_ROW* row = to_row_it.data(); - // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready - // to create the word. - C_BLOB_LIST cblobs; - C_BLOB_IT cblob_it(&cblobs); - BLOBNBOX_IT box_it(row->blob_list()); - for (;!box_it.empty(); box_it.forward()) { - BLOBNBOX* bblob= box_it.extract(); - if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) { - if (bblob->cblob() != nullptr) { - C_OUTLINE_IT cout_it(cblob_it.data()->out_list()); - cout_it.move_to_last(); - cout_it.add_list_after(bblob->cblob()->out_list()); - delete bblob->cblob(); - } - } else { - if (bblob->cblob() != nullptr) - cblob_it.add_after_then_move(bblob->cblob()); - } - delete bblob; - } - // Convert the TO_ROW to a ROW. - ROW* real_row = new ROW(row, static_cast(row->kern_size), - static_cast(row->space_size)); - WERD_IT word_it(real_row->word_list()); - WERD* word = new WERD(&cblobs, 0, nullptr); - word->set_flag(W_BOL, TRUE); - word->set_flag(W_EOL, TRUE); - word->set_flag(W_DONT_CHOP, one_blob); - word_it.add_after_then_move(word); - row_it.add_after_then_move(real_row); - } -} - -/** - * make_words - * - * Arrange the blobs into words. - */ -void make_words(tesseract::Textord *textord, - ICOORD page_tr, // top right - float gradient, // page skew - BLOCK_LIST *blocks, // block list - TO_BLOCK_LIST *port_blocks) { // output list - TO_BLOCK_IT block_it; // iterator - TO_BLOCK *block; // current block - - if (textord->use_cjk_fp_model()) { - compute_fixed_pitch_cjk(page_tr, port_blocks); - } else { - compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f), - !(BOOL8) textord_test_landscape); - } - textord->to_spacing(page_tr, port_blocks); - block_it.set_to_list(port_blocks); - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { - block = block_it.data(); - make_real_words(textord, block, FCOORD(1.0f, 0.0f)); - } -} - - -/** - * @name set_row_spaces - * - * Set the min_space and max_nonspace members of the row so that - * the blobs can be arranged into words. - */ - -void set_row_spaces( //find space sizes - TO_BLOCK* block, //block to do - FCOORD rotation, //for drawing - bool testing_on //correct orientation -) { - TO_ROW *row; //current row - TO_ROW_IT row_it = block->get_rows (); - - if (row_it.empty ()) - return; //empty block - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (row->fixed_pitch == 0) { - row->min_space = - (int32_t) ceil (row->pr_space - - (row->pr_space - - row->pr_nonsp) * textord_words_definite_spread); - row->max_nonspace = - (int32_t) floor (row->pr_nonsp + - (row->pr_space - - row->pr_nonsp) * textord_words_definite_spread); - if (testing_on && textord_show_initial_words) { - tprintf ("Assigning defaults %d non, %d space to row at %g\n", - row->max_nonspace, row->min_space, row->intercept ()); - } - row->space_threshold = (row->max_nonspace + row->min_space) / 2; - row->space_size = row->pr_space; - row->kern_size = row->pr_nonsp; - } -#ifndef GRAPHICS_DISABLED - if (textord_show_initial_words && testing_on) { - plot_word_decisions (to_win, (int16_t) row->fixed_pitch, row); - } -#endif - } -} - - -/** - * @name row_words - * - * Compute the max nonspace and min space for the row. - */ - -int32_t row_words( //compute space size - TO_BLOCK* block, //block it came from - TO_ROW* row, //row to operate on - int32_t maxwidth, //max expected space size - FCOORD rotation, //for drawing - bool testing_on //for debug -) { - bool testing_row; //contains testpt - bool prev_valid; //if decent size - int32_t prev_x; //end of prev blob - int32_t cluster_count; //no of clusters - int32_t gap_index; //which cluster - int32_t smooth_factor; //for smoothing stats - BLOBNBOX *blob; //current blob - float lower, upper; //clustering parameters - float gaps[3]; //gap clusers - ICOORD testpt; - TBOX blob_box; //bounding box - //iterator - BLOBNBOX_IT blob_it = row->blob_list (); - STATS gap_stats (0, maxwidth); - STATS cluster_stats[4]; //clusters - - testpt = ICOORD (textord_test_x, textord_test_y); - smooth_factor = - (int32_t) (block->xheight * textord_wordstats_smooth_factor + 1.5); - // if (testing_on) - // tprintf("Row smooth factor=%d\n",smooth_factor); - prev_valid = false; - prev_x = -INT32_MAX; - testing_row = false; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - if (blob_box.contains (testpt)) - testing_row = true; - gap_stats.add (blob_box.width (), 1); - } - gap_stats.clear (); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - if (!blob->joined_to_prev ()) { - blob_box = blob->bounding_box (); - if (prev_valid && blob_box.left () - prev_x < maxwidth) { - gap_stats.add (blob_box.left () - prev_x, 1); - } - prev_valid = TRUE; - prev_x = blob_box.right (); - } - } - if (gap_stats.get_total () == 0) { - row->min_space = 0; //no evidence - row->max_nonspace = 0; - return 0; - } - gap_stats.smooth (smooth_factor); - lower = row->xheight * textord_words_initial_lower; - upper = row->xheight * textord_words_initial_upper; - cluster_count = gap_stats.cluster (lower, upper, - textord_spacesize_ratioprop, 3, - cluster_stats); - while (cluster_count < 2 && ceil (lower) < floor (upper)) { - //shrink gap - upper = (upper * 3 + lower) / 4; - lower = (lower * 3 + upper) / 4; - cluster_count = gap_stats.cluster (lower, upper, - textord_spacesize_ratioprop, 3, - cluster_stats); - } - if (cluster_count < 2) { - row->min_space = 0; //no evidence - row->max_nonspace = 0; - return 0; - } - for (gap_index = 0; gap_index < cluster_count; gap_index++) - gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5); - //get medians - if (cluster_count > 2) { - if (testing_on && textord_show_initial_words) { - tprintf ("Row at %g has 3 sizes of gap:%g,%g,%g\n", - row->intercept (), - cluster_stats[1].ile (0.5), - cluster_stats[2].ile (0.5), cluster_stats[3].ile (0.5)); - } - lower = gaps[0]; - if (gaps[1] > lower) { - upper = gaps[1]; //prefer most frequent - if (upper < block->xheight * textord_words_min_minspace - && gaps[2] > gaps[1]) { - upper = gaps[2]; - } - } - else if (gaps[2] > lower - && gaps[2] >= block->xheight * textord_words_min_minspace) - upper = gaps[2]; - else if (lower >= block->xheight * textord_words_min_minspace) { - upper = lower; //not nice - lower = gaps[1]; - if (testing_on && textord_show_initial_words) { - tprintf ("Had to switch most common from lower to upper!!\n"); - gap_stats.print(); - } - } - else { - row->min_space = 0; //no evidence - row->max_nonspace = 0; - return 0; - } - } - else { - if (gaps[1] < gaps[0]) { - if (testing_on && textord_show_initial_words) { - tprintf ("Had to switch most common from lower to upper!!\n"); - gap_stats.print(); - } - lower = gaps[1]; - upper = gaps[0]; - } - else { - upper = gaps[1]; - lower = gaps[0]; - } - } - if (upper < block->xheight * textord_words_min_minspace) { - row->min_space = 0; //no evidence - row->max_nonspace = 0; - return 0; - } - if (upper * 3 < block->min_space * 2 + block->max_nonspace - || lower * 3 > block->min_space * 2 + block->max_nonspace) { - if (testing_on && textord_show_initial_words) { - tprintf ("Disagreement between block and row at %g!!\n", - row->intercept ()); - tprintf ("Lower=%g, upper=%g, Stats:\n", lower, upper); - gap_stats.print(); - } - } - row->min_space = - (int32_t) ceil (upper - (upper - lower) * textord_words_definite_spread); - row->max_nonspace = - (int32_t) floor (lower + (upper - lower) * textord_words_definite_spread); - row->space_threshold = (row->max_nonspace + row->min_space) / 2; - row->space_size = upper; - row->kern_size = lower; - if (testing_on && textord_show_initial_words) { - if (testing_row) { - tprintf ("GAP STATS\n"); - gap_stats.print(); - tprintf ("SPACE stats\n"); - cluster_stats[2].print_summary(); - tprintf ("NONSPACE stats\n"); - cluster_stats[1].print_summary(); - } - tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", - row->intercept (), row->min_space, upper, - row->max_nonspace, lower); - } - return cluster_stats[2].get_total (); -} - - -/** - * @name row_words2 - * - * Compute the max nonspace and min space for the row. - */ - -int32_t row_words2( //compute space size - TO_BLOCK* block, //block it came from - TO_ROW* row, //row to operate on - int32_t maxwidth, //max expected space size - FCOORD rotation, //for drawing - bool testing_on //for debug -) { - bool prev_valid; //if decent size - bool this_valid; //current blob big enough - int32_t prev_x; //end of prev blob - int32_t min_width; //min interesting width - int32_t valid_count; //good gaps - int32_t total_count; //total gaps - int32_t cluster_count; //no of clusters - int32_t prev_count; //previous cluster_count - int32_t gap_index; //which cluster - int32_t smooth_factor; //for smoothing stats - BLOBNBOX *blob; //current blob - float lower, upper; //clustering parameters - ICOORD testpt; - TBOX blob_box; //bounding box - //iterator - BLOBNBOX_IT blob_it = row->blob_list (); - STATS gap_stats (0, maxwidth); - //gap sizes - float gaps[BLOCK_STATS_CLUSTERS]; - STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1]; - //clusters - - testpt = ICOORD (textord_test_x, textord_test_y); - smooth_factor = - (int32_t) (block->xheight * textord_wordstats_smooth_factor + 1.5); - // if (testing_on) - // tprintf("Row smooth factor=%d\n",smooth_factor); - prev_valid = false; - prev_x = -INT16_MAX; - const bool testing_row = false; - //min blob size - min_width = (int32_t) block->pr_space; - total_count = 0; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - if (!blob->joined_to_prev ()) { - blob_box = blob->bounding_box (); - this_valid = blob_box.width () >= min_width; - if (this_valid && prev_valid - && blob_box.left () - prev_x < maxwidth) { - gap_stats.add (blob_box.left () - prev_x, 1); - } - total_count++; //count possibles - prev_x = blob_box.right (); - prev_valid = this_valid; - } - } - valid_count = gap_stats.get_total (); - if (valid_count < total_count * textord_words_minlarge) { - gap_stats.clear (); - prev_x = -INT16_MAX; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - if (!blob->joined_to_prev ()) { - blob_box = blob->bounding_box (); - if (blob_box.left () - prev_x < maxwidth) { - gap_stats.add (blob_box.left () - prev_x, 1); - } - prev_x = blob_box.right (); - } - } - } - if (gap_stats.get_total () == 0) { - row->min_space = 0; //no evidence - row->max_nonspace = 0; - return 0; - } - - cluster_count = 0; - lower = block->xheight * words_initial_lower; - upper = block->xheight * words_initial_upper; - gap_stats.smooth (smooth_factor); - do { - prev_count = cluster_count; - cluster_count = gap_stats.cluster (lower, upper, - textord_spacesize_ratioprop, - BLOCK_STATS_CLUSTERS, cluster_stats); - } - while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS); - if (cluster_count < 1) { - row->min_space = 0; - row->max_nonspace = 0; - return 0; - } - for (gap_index = 0; gap_index < cluster_count; gap_index++) - gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5); - //get medians - if (testing_on) { - tprintf ("cluster_count=%d:", cluster_count); - for (gap_index = 0; gap_index < cluster_count; gap_index++) - tprintf (" %g(%d)", gaps[gap_index], - cluster_stats[gap_index + 1].get_total ()); - tprintf ("\n"); - } - - //Try to find proportional non-space and space for row. - for (gap_index = 0; gap_index < cluster_count - && gaps[gap_index] > block->max_nonspace; gap_index++); - if (gap_index < cluster_count) - lower = gaps[gap_index]; //most frequent below - else { - if (testing_on) - tprintf ("No cluster below block threshold!, using default=%g\n", - block->pr_nonsp); - lower = block->pr_nonsp; - } - for (gap_index = 0; gap_index < cluster_count - && gaps[gap_index] <= block->max_nonspace; gap_index++); - if (gap_index < cluster_count) - upper = gaps[gap_index]; //most frequent above - else { - if (testing_on) - tprintf ("No cluster above block threshold!, using default=%g\n", - block->pr_space); - upper = block->pr_space; - } - row->min_space = - (int32_t) ceil (upper - (upper - lower) * textord_words_definite_spread); - row->max_nonspace = - (int32_t) floor (lower + (upper - lower) * textord_words_definite_spread); - row->space_threshold = (row->max_nonspace + row->min_space) / 2; - row->space_size = upper; - row->kern_size = lower; - if (testing_on) { - if (testing_row) { - tprintf ("GAP STATS\n"); - gap_stats.print(); - tprintf ("SPACE stats\n"); - cluster_stats[2].print_summary(); - tprintf ("NONSPACE stats\n"); - cluster_stats[1].print_summary(); - } - tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", - row->intercept (), row->min_space, upper, - row->max_nonspace, lower); - } - return 1; -} - - -/** - * @name make_real_words - * - * Convert a TO_BLOCK to a BLOCK. - */ - -void make_real_words( - tesseract::Textord *textord, - TO_BLOCK *block, //block to do - FCOORD rotation //for drawing - ) { - TO_ROW *row; //current row - TO_ROW_IT row_it = block->get_rows (); - ROW *real_row = nullptr; //output row - ROW_IT real_row_it = block->block->row_list (); - - if (row_it.empty ()) - return; //empty block - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (row->blob_list ()->empty () && !row->rep_words.empty ()) { - real_row = make_rep_words (row, block); - } else if (!row->blob_list()->empty()) { - // In a fixed pitch document, some lines may be detected as fixed pitch - // while others don't, and will go through different path. - // For non-space delimited language like CJK, fixed pitch chop always - // leave the entire line as one word. We can force consistent chopping - // with force_make_prop_words flag. - POLY_BLOCK* pb = block->block->pdblk.poly_block(); - if (textord_chopper_test) { - real_row = textord->make_blob_words (row, rotation); - } else if (textord_force_make_prop_words || - (pb != nullptr && !pb->IsText()) || - row->pitch_decision == PITCH_DEF_PROP || - row->pitch_decision == PITCH_CORR_PROP) { - real_row = textord->make_prop_words (row, rotation); - } else if (row->pitch_decision == PITCH_DEF_FIXED || - row->pitch_decision == PITCH_CORR_FIXED) { - real_row = fixed_pitch_words (row, rotation); - } else { - ASSERT_HOST(FALSE); - } - } - if (real_row != nullptr) { - //put row in block - real_row_it.add_after_then_move (real_row); - } - } - block->block->set_stats (block->fixed_pitch == 0, (int16_t) block->kern_size, - (int16_t) block->space_size, - (int16_t) block->fixed_pitch); - block->block->check_pitch (); -} - - -/** - * @name make_rep_words - * - * Fabricate a real row from only the repeated blob words. - * Get the xheight from the block as it may be more meaningful. - */ - -ROW *make_rep_words( //make a row - TO_ROW *row, //row to convert - TO_BLOCK *block //block it lives in - ) { - ROW *real_row; //output row - TBOX word_box; //bounding box - //iterator - WERD_IT word_it = &row->rep_words; - - if (word_it.empty ()) - return nullptr; - word_box = word_it.data ()->bounding_box (); - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) - word_box += word_it.data ()->bounding_box (); - row->xheight = block->xheight; - real_row = new ROW(row, - (int16_t) block->kern_size, (int16_t) block->space_size); - word_it.set_to_list (real_row->word_list ()); - //put words in row - word_it.add_list_after (&row->rep_words); - real_row->recalc_bounding_box (); - return real_row; -} - - -/** - * @name make_real_word - * - * Construct a WERD from a given number of adjacent entries in a - * list of BLOBNBOXs. - */ - -WERD *make_real_word(BLOBNBOX_IT *box_it, //iterator - int32_t blobcount, //no of blobs to use - bool bol, //start of line - uint8_t blanks //no of blanks - ) { - C_OUTLINE_IT cout_it; - C_BLOB_LIST cblobs; - C_BLOB_IT cblob_it = &cblobs; - WERD *word; // new word - BLOBNBOX *bblob; // current blob - int32_t blobindex; // in row - - for (blobindex = 0; blobindex < blobcount; blobindex++) { - bblob = box_it->extract(); - if (bblob->joined_to_prev()) { - if (bblob->cblob() != nullptr) { - cout_it.set_to_list(cblob_it.data()->out_list()); - cout_it.move_to_last(); - cout_it.add_list_after(bblob->cblob()->out_list()); - delete bblob->cblob(); - } - } - else { - if (bblob->cblob() != nullptr) - cblob_it.add_after_then_move(bblob->cblob()); - } - delete bblob; - box_it->forward(); // next one - } - - if (blanks < 1) - blanks = 1; - - word = new WERD(&cblobs, blanks, nullptr); - - if (bol) - word->set_flag(W_BOL, true); - if (box_it->at_first()) - word->set_flag(W_EOL, true); // at end of line - - return word; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/wordseg.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/wordseg.h deleted file mode 100644 index 4877f90c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/wordseg.h +++ /dev/null @@ -1,76 +0,0 @@ -/********************************************************************** - * File: wordseg.h (Formerly wspace.h) - * Description: Code to segment the blobs into words. - * Author: Ray Smith - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef WORDSEG_H -#define WORDSEG_H - -#include "params.h" -#include "blobbox.h" -#include "textord.h" - -namespace tesseract { -class Tesseract; -} - -extern BOOL_VAR_H (textord_fp_chopping, TRUE, "Do fixed pitch chopping"); -extern BOOL_VAR_H(textord_force_make_prop_words, FALSE, - "Force proportional word segmentation on all rows"); -extern BOOL_VAR_H (textord_chopper_test, FALSE, - "Chopper is being tested."); - -void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows); -void make_words(tesseract::Textord *textord, - ICOORD page_tr, // top right - float gradient, // page skew - BLOCK_LIST *blocks, // block list - TO_BLOCK_LIST *port_blocks); // output list -void set_row_spaces( //find space sizes - TO_BLOCK* block, //block to do - FCOORD rotation, //for drawing - bool testing_on //correct orientation -); -int32_t row_words( //compute space size - TO_BLOCK* block, //block it came from - TO_ROW* row, //row to operate on - int32_t maxwidth, //max expected space size - FCOORD rotation, //for drawing - bool testing_on //for debug -); -int32_t row_words2( //compute space size - TO_BLOCK* block, //block it came from - TO_ROW* row, //row to operate on - int32_t maxwidth, //max expected space size - FCOORD rotation, //for drawing - bool testing_on //for debug -); -void make_real_words( - tesseract::Textord *textord, - TO_BLOCK *block, //block to do - FCOORD rotation //for drawing - ); -ROW *make_rep_words( //make a row - TO_ROW *row, //row to convert - TO_BLOCK *block //block it lives in - ); -WERD *make_real_word( //make a WERD - BLOBNBOX_IT* box_it, //iterator - int32_t blobcount, //no of blobs to use - bool bol, //start of line - uint8_t blanks //no of blanks -); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/workingpartset.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/workingpartset.cpp deleted file mode 100644 index 97ce70ae..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/workingpartset.cpp +++ /dev/null @@ -1,144 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: workingpartset.cpp -// Description: Class to hold a working set of partitions of the page -// during construction of text/image regions. -// Author: Ray Smith -// Created: Tue Ocr 28 17:21:01 PDT 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "workingpartset.h" -#include "colpartition.h" - -namespace tesseract { - -ELISTIZE(WorkingPartSet) - -// Add the partition to this WorkingPartSet. Unrelated partitions are -// stored in the order in which they are received, but if the partition -// has a SingletonPartner, make sure that it stays with its partner. -void WorkingPartSet::AddPartition(ColPartition* part) { - ColPartition* partner = part->SingletonPartner(true); - if (partner != nullptr) { - ASSERT_HOST(partner->SingletonPartner(false) == part); - } - if (latest_part_ == nullptr || partner == nullptr) { - // This partition goes at the end of the list - part_it_.move_to_last(); - } else if (latest_part_->SingletonPartner(false) != part) { - // Reposition the iterator to the correct partner, or at the end. - for (part_it_.move_to_first(); !part_it_.at_last() && - part_it_.data() != partner; - part_it_.forward()); - } - part_it_.add_after_then_move(part); - latest_part_ = part; -} - -// Make blocks out of any partitions in this WorkingPartSet, and append -// them to the end of the blocks list. bleft, tright and resolution give -// the bounds and resolution of the source image, so that blocks can be -// made to fit in the bounds. -// All ColPartitions go in the used_parts list, as they need to be kept -// around, but are no longer needed. -void WorkingPartSet::ExtractCompletedBlocks(const ICOORD& bleft, - const ICOORD& tright, - int resolution, - ColPartition_LIST* used_parts, - BLOCK_LIST* blocks, - TO_BLOCK_LIST* to_blocks) { - MakeBlocks(bleft, tright, resolution, used_parts); - BLOCK_IT block_it(blocks); - block_it.move_to_last(); - block_it.add_list_after(&completed_blocks_); - TO_BLOCK_IT to_block_it(to_blocks); - to_block_it.move_to_last(); - to_block_it.add_list_after(&to_blocks_); -} - -// Insert the given blocks at the front of the completed_blocks_ list so -// they can be kept in the correct reading order. -void WorkingPartSet::InsertCompletedBlocks(BLOCK_LIST* blocks, - TO_BLOCK_LIST* to_blocks) { - BLOCK_IT block_it(&completed_blocks_); - block_it.add_list_before(blocks); - TO_BLOCK_IT to_block_it(&to_blocks_); - to_block_it.add_list_before(to_blocks); -} - -// Make a block using lines parallel to the given vector that fit between -// the min and max coordinates specified by the ColPartitions. -// Construct a block from the given list of partitions. -void WorkingPartSet::MakeBlocks(const ICOORD& bleft, const ICOORD& tright, - int resolution, ColPartition_LIST* used_parts) { - part_it_.move_to_first(); - while (!part_it_.empty()) { - // Gather a list of ColPartitions in block_parts that will be split - // by linespacing into smaller blocks. - ColPartition_LIST block_parts; - ColPartition_IT block_it(&block_parts); - ColPartition* next_part = nullptr; - bool text_block = false; - do { - ColPartition* part = part_it_.extract(); - if (part->blob_type() == BRT_UNKNOWN || - (part->IsTextType() && part->type() != PT_TABLE)) - text_block = true; - part->set_working_set(nullptr); - part_it_.forward(); - block_it.add_after_then_move(part); - next_part = part->SingletonPartner(false); - if (part_it_.empty() || next_part != part_it_.data()) { - // Sequences of partitions can get split by titles. - next_part = nullptr; - } - // Merge adjacent blocks that are of the same type and let the - // linespacing determine the real boundaries. - if (next_part == nullptr && !part_it_.empty()) { - ColPartition* next_block_part = part_it_.data(); - const TBOX& part_box = part->bounding_box(); - const TBOX& next_box = next_block_part->bounding_box(); - - // In addition to the same type, the next box must not be above the - // current box, nor (if image) too far below. - PolyBlockType type = part->type(), next_type = next_block_part->type(); - if (ColPartition::TypesSimilar(type, next_type) && - !part->IsLineType() && !next_block_part->IsLineType() && - next_box.bottom() <= part_box.top() && - (text_block || part_box.bottom() <= next_box.top())) - next_part = next_block_part; - } - } while (!part_it_.empty() && next_part != nullptr); - if (!text_block) { - TO_BLOCK* to_block = ColPartition::MakeBlock(bleft, tright, - &block_parts, used_parts); - if (to_block != nullptr) { - TO_BLOCK_IT to_block_it(&to_blocks_); - to_block_it.add_to_end(to_block); - BLOCK_IT block_it(&completed_blocks_); - block_it.add_to_end(to_block->block); - } - } else { - // Further sub-divide text blocks where linespacing changes. - ColPartition::LineSpacingBlocks(bleft, tright, resolution, &block_parts, - used_parts, - &completed_blocks_, &to_blocks_); - } - } - part_it_.set_to_list(&part_set_); - latest_part_ = nullptr; - ASSERT_HOST(completed_blocks_.length() == to_blocks_.length()); -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/workingpartset.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/workingpartset.h deleted file mode 100644 index 6fb342aa..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/textord/workingpartset.h +++ /dev/null @@ -1,88 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: workingpartset.h -// Description: Class to hold a working set of partitions of the page -// during construction of text/image regions. -// Author: Ray Smith -// Created: Tue Ocr 28 17:21:01 PDT 2008 -// -// (C) Copyright 2008, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TEXTORD_WORKINGPARSET_H_ -#define TESSERACT_TEXTORD_WORKINGPARSET_H_ - -#include "blobbox.h" // For TO_BLOCK_LIST and BLOCK_LIST. -#include "colpartition.h" // For ColPartition_LIST. - -namespace tesseract { - -// WorkingPartSet holds a working set of ColPartitions during transformation -// from the grid-based storage to regions in logical reading order, and is -// therefore only used during construction of the regions. -class WorkingPartSet : public ELIST_LINK { - public: - explicit WorkingPartSet(ColPartition* column) - : column_(column), latest_part_(nullptr), part_it_(&part_set_) { - } - - // Simple accessors. - ColPartition* column() const { - return column_; - } - void set_column(ColPartition* col) { - column_ = col; - } - - // Add the partition to this WorkingPartSet. Partitions are generally - // stored in the order in which they are received, but if the partition - // has a SingletonPartner, make sure that it stays with its partner. - void AddPartition(ColPartition* part); - - // Make blocks out of any partitions in this WorkingPartSet, and append - // them to the end of the blocks list. bleft, tright and resolution give - // the bounds and resolution of the source image, so that blocks can be - // made to fit in the bounds. - // All ColPartitions go in the used_parts list, as they need to be kept - // around, but are no longer needed. - void ExtractCompletedBlocks(const ICOORD& bleft, const ICOORD& tright, - int resolution, ColPartition_LIST* used_parts, - BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); - - // Insert the given blocks at the front of the completed_blocks_ list so - // they can be kept in the correct reading order. - void InsertCompletedBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); - - private: - // Convert the part_set_ into blocks, starting a new block at a break - // in partnerships, or a change in linespacing (for text). - void MakeBlocks(const ICOORD& bleft, const ICOORD& tright, int resolution, - ColPartition_LIST* used_parts); - - // The column that this working set applies to. Used by the caller. - ColPartition* column_; - // The most recently added partition. - ColPartition* latest_part_; - // All the partitions in the block that is currently under construction. - ColPartition_LIST part_set_; - // Iteratorn on part_set_ pointing to the most recent addition. - ColPartition_IT part_it_; - // The blocks that have been made so far and belong before the current block. - BLOCK_LIST completed_blocks_; - TO_BLOCK_LIST to_blocks_; -}; - -ELISTIZEH(WorkingPartSet) - -} // namespace tesseract. - -#endif // TESSERACT_TEXTORD_WORKINGPARSET_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/CMakeLists.txt b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/CMakeLists.txt deleted file mode 100644 index af1d5208..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/CMakeLists.txt +++ /dev/null @@ -1,312 +0,0 @@ -# -# tesseract -# - -if (NOT CPPAN_BUILD AND NOT (WIN32 OR CYGWIN)) - return() -endif() - -if (CPPAN_BUILD) - set(ICU_FOUND 1) -endif() - -# experimental -if (MSVC AND NOT CPPAN_BUILD AND NOT USE_SYSTEM_ICU) - include(CheckTypeSize) - check_type_size("void *" SIZEOF_VOID_P) - - if (SIZEOF_VOID_P EQUAL 8) - set(X64 1) - set(ARCH_DIR_NAME 64) - elseif (SIZEOF_VOID_P EQUAL 4) - set(X86 1) - set(ARCH_DIR_NAME 32) - else() - message(FATAL_ERROR "Cannot determine target architecture") - endif() - - set(icu_dir "${CMAKE_CURRENT_BINARY_DIR}/icu") - set(icu_archive "${icu_dir}/icu${ARCH_DIR_NAME}.zip") - - if (X86) - set(icu_hash 45167a240b60e36b59a87eda23490ce4) - else() - set(icu_hash 480c72491576c048de1218c3c5519399) - endif() - - message(STATUS "Downloading latest ICU binaries") - - file(DOWNLOAD - "http://download.icu-project.org/files/icu4c/56.1/icu4c-56_1-Win${ARCH_DIR_NAME}-msvc10.zip" - "${icu_archive}" - SHOW_PROGRESS - INACTIVITY_TIMEOUT 60 # seconds - EXPECTED_HASH MD5=${icu_hash} - ) - execute_process(COMMAND ${CMAKE_COMMAND} -E tar xz "${icu_archive}" - WORKING_DIRECTORY "${icu_dir}" - RESULT_VARIABLE __result - ) - if(NOT __result EQUAL 0) - message(FATAL_ERROR "error ${__result}") - endif() - - set(ICU_ROOT ${icu_dir}/icu) -endif() -# experimental - -if (NOT CPPAN_BUILD) - find_package(ICU COMPONENTS uc i18n) -endif() - -######################################## -# LIBRARY tessopt -######################################## - -add_library (tessopt tessopt.cpp tessopt.h) -project_group (tessopt "Training Tools") - - -######################################## -# LIBRARY common_training -######################################## - -set(common_training_src - commandlineflags.cpp - commandlineflags.h - commontraining.cpp - commontraining.h -) -add_library (common_training ${common_training_src}) -target_link_libraries (common_training libtesseract tessopt) -project_group (common_training "Training Tools") - - -######################################## -# EXECUTABLE ambiguous_words -######################################## - -add_executable (ambiguous_words ambiguous_words.cpp) -target_link_libraries (ambiguous_words libtesseract) -project_group (ambiguous_words "Training Tools") -install (TARGETS ambiguous_words RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) - - -######################################## -# EXECUTABLE classifier_tester -######################################## - -add_executable (classifier_tester classifier_tester.cpp) -target_link_libraries (classifier_tester common_training) -project_group (classifier_tester "Training Tools") -install (TARGETS classifier_tester RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) - - -######################################## -# EXECUTABLE combine_tessdata -######################################## - -add_executable (combine_tessdata combine_tessdata.cpp) -target_link_libraries (combine_tessdata libtesseract) -project_group (combine_tessdata "Training Tools") -install (TARGETS combine_tessdata RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) - - -######################################## -# EXECUTABLE cntraining -######################################## - -add_executable (cntraining cntraining.cpp) -target_link_libraries (cntraining common_training) -project_group (cntraining "Training Tools") -install (TARGETS cntraining RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) - - -######################################## -# EXECUTABLE dawg2wordlist -######################################## - -add_executable (dawg2wordlist dawg2wordlist.cpp) -target_link_libraries (dawg2wordlist libtesseract) -project_group (dawg2wordlist "Training Tools") -install (TARGETS dawg2wordlist RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) - - -######################################## -# EXECUTABLE mftraining -######################################## - -add_executable (mftraining mftraining.cpp mergenf.cpp mergenf.h) -target_link_libraries (mftraining common_training) -project_group (mftraining "Training Tools") -install (TARGETS mftraining RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) - - -######################################## -# EXECUTABLE shapeclustering -######################################## - -add_executable (shapeclustering shapeclustering.cpp) -target_link_libraries (shapeclustering common_training) -project_group (shapeclustering "Training Tools") -install (TARGETS shapeclustering RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) - - -######################################## -# EXECUTABLE wordlist2dawg -######################################## - -add_executable (wordlist2dawg wordlist2dawg.cpp) -target_link_libraries (wordlist2dawg libtesseract) -project_group (wordlist2dawg "Training Tools") -install (TARGETS wordlist2dawg RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) - - -if (ICU_FOUND) - -if (NOT CPPAN_BUILD) -include_directories(${ICU_INCLUDE_DIRS}) -endif() - -######################################## -# LIBRARY unicharset_training -######################################## - -set(unicharset_training_src - icuerrorcode.cpp - icuerrorcode.h - lang_model_helpers.cpp - lang_model_helpers.h - lstmtester.cpp - lstmtester.h - normstrngs.cpp - normstrngs.h - unicharset_training_utils.cpp - unicharset_training_utils.h - - validate_grapheme.h validate_indic.h validate_khmer.h - validate_javanese.h validate_myanmar.h validator.h - validate_grapheme.cpp validate_indic.cpp validate_khmer.cpp - validate_javanese.cpp validate_myanmar.cpp validator.cpp - -) -add_library (unicharset_training ${unicharset_training_src}) -if (NOT CPPAN_BUILD) -target_link_libraries (unicharset_training common_training ${ICU_LIBRARIES}) -else() -target_link_libraries (unicharset_training common_training pvt.cppan.demo.unicode.icu.i18n) -endif() -project_group (unicharset_training "Training Tools") - - -######################################## -# EXECUTABLE combine_lang_model -######################################## - -add_executable (combine_lang_model combine_lang_model.cpp) -target_link_libraries (combine_lang_model unicharset_training) -project_group (combine_lang_model "Training Tools") -install (TARGETS combine_lang_model RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) - - -######################################## -# EXECUTABLE lstmeval -######################################## - -add_executable (lstmeval lstmeval.cpp) -target_link_libraries (lstmeval unicharset_training) -project_group (lstmeval "Training Tools") -install (TARGETS lstmeval RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) - - -######################################## -# EXECUTABLE lstmtraining -######################################## - -add_executable (lstmtraining lstmtraining.cpp) -target_link_libraries (lstmtraining unicharset_training) -project_group (lstmtraining "Training Tools") -install (TARGETS lstmtraining RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) - - -######################################## -# EXECUTABLE set_unicharset_properties -######################################## - -add_executable (set_unicharset_properties set_unicharset_properties.cpp) -target_link_libraries (set_unicharset_properties unicharset_training) -project_group (set_unicharset_properties "Training Tools") -install (TARGETS set_unicharset_properties RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) - - -######################################## -# EXECUTABLE unicharset_extractor -######################################## - -add_executable (unicharset_extractor unicharset_extractor.cpp) -target_link_libraries (unicharset_extractor libtesseract tessopt unicharset_training) -project_group (unicharset_extractor "Training Tools") -install (TARGETS unicharset_extractor RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) - - -######################################## -# EXECUTABLE text2image -######################################## - -if (NOT CPPAN_BUILD) -find_package(PkgConfig) -endif() - -if (PKG_CONFIG_FOUND OR CPPAN_BUILD) - -if (PKG_CONFIG_FOUND) -pkg_check_modules(Pango REQUIRED pango) -pkg_check_modules(Cairo REQUIRED cairo) -pkg_check_modules(PangoFt2 REQUIRED pangoft2) -pkg_check_modules(PangoCairo REQUIRED pangocairo) -pkg_check_modules(FontConfig REQUIRED fontconfig) -endif() - -set(text2image_src - text2image.cpp - boxchar.cpp - boxchar.h - degradeimage.cpp - degradeimage.h - ligature_table.cpp - ligature_table.h - normstrngs.cpp - normstrngs.h - pango_font_info.cpp - pango_font_info.h - stringrenderer.cpp - stringrenderer.h - tlog.cpp - tlog.h - util.h - icuerrorcode.h -) - -add_executable (text2image ${text2image_src}) -target_link_libraries (text2image unicharset_training) -if (PKG_CONFIG_FOUND) -target_include_directories (text2image BEFORE PRIVATE ${Cairo_INCLUDE_DIRS} ${Pango_INCLUDE_DIRS}) -target_compile_definitions (text2image PRIVATE -DPANGO_ENABLE_ENGINE) -target_link_libraries (text2image - ${Pango_LIBRARIES} - ${Cairo_LIBRARIES} - ${PangoCairo_LIBRARIES} - ${PangoFt2_LIBRARIES} - ${FontConfig_LIBRARIES} -) -endif() -if (CPPAN_BUILD) -target_link_libraries (text2image pvt.cppan.demo.gnome.pango.pangocairo) -endif() -project_group (text2image "Training Tools") -install (TARGETS text2image RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) - -endif() -endif(ICU_FOUND) - -############################################################################### diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/Makefile.am b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/Makefile.am deleted file mode 100644 index d86b8d41..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/Makefile.am +++ /dev/null @@ -1,282 +0,0 @@ -AM_CPPFLAGS += \ - -DPANGO_ENABLE_ENGINE \ - -I$(top_builddir)/src/api \ - -I$(top_srcdir)/src/api \ - -I$(top_srcdir)/src/ccmain \ - -I$(top_srcdir)/src/ccutil \ - -I$(top_srcdir)/src/ccstruct \ - -I$(top_srcdir)/src/lstm \ - -I$(top_srcdir)/src/arch \ - -I$(top_srcdir)/src/viewer \ - -I$(top_srcdir)/src/textord \ - -I$(top_srcdir)/src/dict \ - -I$(top_srcdir)/src/classify \ - -I$(top_srcdir)/src/wordrec \ - -I$(top_srcdir)/src/cutil - -bin_SCRIPTS = language-specific.sh tesstrain.sh -scripts_DATA = tesstrain_utils.sh -scriptsdir = $(bindir) - -if DISABLED_LEGACY_ENGINE -AM_CPPFLAGS += -DDISABLED_LEGACY_ENGINE -endif - -# TODO: training programs can not be linked to shared library created -# with -fvisibility -if VISIBILITY -AM_LDFLAGS += -all-static -endif - -noinst_HEADERS = \ - boxchar.h \ - commandlineflags.h \ - commontraining.h \ - degradeimage.h \ - icuerrorcode.h \ - lang_model_helpers.h \ - ligature_table.h \ - lstmtester.h \ - normstrngs.h \ - pango_font_info.h \ - stringrenderer.h \ - tessopt.h \ - tlog.h \ - unicharset_training_utils.h \ - util.h \ - validate_grapheme.h \ - validate_indic.h \ - validate_javanese.h \ - validate_khmer.h \ - validate_myanmar.h \ - validator.h - -if !DISABLED_LEGACY_ENGINE - noinst_HEADERS += mergenf.h -endif - - -noinst_LTLIBRARIES = libtesseract_training.la libtesseract_tessopt.la - -libtesseract_training_la_LIBADD = \ - ../cutil/libtesseract_cutil.la -# ../api/libtesseract.la - -libtesseract_training_la_SOURCES = \ - boxchar.cpp \ - commandlineflags.cpp \ - commontraining.cpp \ - degradeimage.cpp \ - icuerrorcode.cpp \ - lang_model_helpers.cpp \ - ligature_table.cpp \ - lstmtester.cpp \ - normstrngs.cpp \ - pango_font_info.cpp \ - stringrenderer.cpp \ - tlog.cpp \ - unicharset_training_utils.cpp \ - validate_grapheme.cpp \ - validate_indic.cpp \ - validate_javanese.cpp \ - validate_khmer.cpp \ - validate_myanmar.cpp \ - validator.cpp - -libtesseract_tessopt_la_SOURCES = \ - tessopt.cpp - -bin_PROGRAMS = \ - combine_lang_model \ - combine_tessdata \ - dawg2wordlist \ - lstmeval \ - lstmtraining \ - merge_unicharsets \ - set_unicharset_properties \ - text2image \ - unicharset_extractor \ - wordlist2dawg - -if !DISABLED_LEGACY_ENGINE -bin_PROGRAMS += \ - ambiguous_words \ - classifier_tester \ - cntraining \ - mftraining \ - shapeclustering -endif - -if !DISABLED_LEGACY_ENGINE -ambiguous_words_SOURCES = ambiguous_words.cpp -ambiguous_words_LDADD = \ - libtesseract_training.la \ - libtesseract_tessopt.la -ambiguous_words_LDADD += \ - ../api/libtesseract.la - -classifier_tester_SOURCES = classifier_tester.cpp -#classifier_tester_LDFLAGS = -static -classifier_tester_LDADD = \ - libtesseract_training.la \ - libtesseract_tessopt.la -classifier_tester_LDADD += \ - ../api/libtesseract.la - -cntraining_SOURCES = cntraining.cpp -#cntraining_LDFLAGS = -static -cntraining_LDADD = \ - libtesseract_training.la \ - libtesseract_tessopt.la -cntraining_LDADD += \ - ../api/libtesseract.la - -mftraining_SOURCES = mftraining.cpp mergenf.cpp -#mftraining_LDFLAGS = -static -mftraining_LDADD = \ - libtesseract_training.la \ - libtesseract_tessopt.la \ - $(ICU_UC_LIBS) -mftraining_LDADD += \ - ../api/libtesseract.la - -shapeclustering_SOURCES = shapeclustering.cpp -#shapeclustering_LDFLAGS = -static -shapeclustering_LDADD = \ - libtesseract_training.la \ - libtesseract_tessopt.la -shapeclustering_LDADD += \ - ../api/libtesseract.la -endif - -combine_lang_model_SOURCES = combine_lang_model.cpp -#combine_lang_model_LDFLAGS = -static -combine_lang_model_LDADD = \ - libtesseract_training.la \ - libtesseract_tessopt.la \ - $(ICU_I18N_LIBS) $(ICU_UC_LIBS) -combine_lang_model_LDADD += \ - ../api/libtesseract.la - -combine_tessdata_SOURCES = combine_tessdata.cpp -#combine_tessdata_LDFLAGS = -static -combine_tessdata_LDADD = \ - ../api/libtesseract.la - -dawg2wordlist_SOURCES = dawg2wordlist.cpp -#dawg2wordlist_LDFLAGS = -static -dawg2wordlist_LDADD = \ - libtesseract_tessopt.la -dawg2wordlist_LDADD += \ - ../api/libtesseract.la - -lstmeval_SOURCES = lstmeval.cpp -#lstmeval_LDFLAGS = -static -lstmeval_LDADD = \ - libtesseract_training.la \ - libtesseract_tessopt.la \ - $(ICU_UC_LIBS) -lstmeval_LDADD += \ - ../api/libtesseract.la - -lstmtraining_SOURCES = lstmtraining.cpp -#lstmtraining_LDFLAGS = -static -lstmtraining_LDADD = \ - libtesseract_training.la \ - libtesseract_tessopt.la \ - $(ICU_I18N_LIBS) $(ICU_UC_LIBS) -lstmtraining_LDADD += \ - ../api/libtesseract.la - -merge_unicharsets_SOURCES = merge_unicharsets.cpp -#merge_unicharsets_LDFLAGS = -static -merge_unicharsets_LDADD = \ - libtesseract_tessopt.la -merge_unicharsets_LDADD += \ - ../api/libtesseract.la - -set_unicharset_properties_SOURCES = set_unicharset_properties.cpp -set_unicharset_properties_LDADD = \ - libtesseract_training.la \ - libtesseract_tessopt.la \ - $(ICU_I18N_LIBS) $(ICU_UC_LIBS) -set_unicharset_properties_LDADD += \ - ../api/libtesseract.la - -text2image_SOURCES = text2image.cpp -#text2image_LDFLAGS = -static -text2image_LDADD = \ - libtesseract_training.la \ - libtesseract_tessopt.la \ - $(ICU_I18N_LIBS) $(ICU_UC_LIBS) -text2image_LDADD += \ - ../api/libtesseract.la -text2image_LDADD += $(ICU_UC_LIBS) -lpango-1.0 -lpangocairo-1.0 -text2image_LDADD += -lgobject-2.0 -lglib-2.0 -lcairo -lpangoft2-1.0 -lfontconfig - -unicharset_extractor_SOURCES = unicharset_extractor.cpp -#unicharset_extractor_LDFLAGS = -static -unicharset_extractor_LDADD = \ - libtesseract_training.la \ - libtesseract_tessopt.la \ - $(ICU_I18N_LIBS) $(ICU_UC_LIBS) -unicharset_extractor_LDADD += \ - ../api/libtesseract.la - -wordlist2dawg_SOURCES = wordlist2dawg.cpp -#wordlist2dawg_LDFLAGS = -static -wordlist2dawg_LDADD = \ - libtesseract_tessopt.la -wordlist2dawg_LDADD += \ - ../api/libtesseract.la - -if T_WIN -if !DISABLED_LEGACY_ENGINE -ambiguous_words_LDADD += -lws2_32 -classifier_tester_LDADD += -lws2_32 -cntraining_LDADD += -lws2_32 -mftraining_LDADD += -lws2_32 -shapeclustering_LDADD += -lws2_32 -endif - -combine_tessdata_LDADD += -lws2_32 -dawg2wordlist_LDADD += -lws2_32 -merge_unicharsets_LDADD += -lws2_32 -set_unicharset_properties_LDADD += -lws2_32 -unicharset_extractor_LDADD += -lws2_32 -text2image_LDADD += -lws2_32 -wordlist2dawg_LDADD += -lws2_32 -endif - -if !DISABLED_LEGACY_ENGINE -ambiguous_words_LDFLAGS = $(OPENCL_LDFLAGS) -classifier_tester_LDFLAGS = $(OPENCL_LDFLAGS) -cntraining_LDFLAGS = $(OPENCL_LDFLAGS) -mftraining_LDFLAGS = $(OPENCL_LDFLAGS) -shapeclustering_LDFLAGS = $(OPENCL_LDFLAGS) -endif - -combine_tessdata_LDFLAGS = $(OPENCL_LDFLAGS) -dawg2wordlist_LDFLAGS = $(OPENCL_LDFLAGS) -merge_unicharsets_LDFLAGS = $(OPENCL_LDFLAGS) -set_unicharset_properties_LDFLAGS = $(OPENCL_LDFLAGS) -text2image_LDFLAGS = $(OPENCL_LDFLAGS) -unicharset_extractor_LDFLAGS = $(OPENCL_LDFLAGS) -wordlist2dawg_LDFLAGS = $(OPENCL_LDFLAGS) - -if !DISABLED_LEGACY_ENGINE -ambiguous_words_LDADD += $(LEPTONICA_LIBS) -classifier_tester_LDADD += $(LEPTONICA_LIBS) -cntraining_LDADD += $(LEPTONICA_LIBS) -mftraining_LDADD += $(LEPTONICA_LIBS) -shapeclustering_LDADD += $(LEPTONICA_LIBS) -endif - -combine_tessdata_LDADD += $(LEPTONICA_LIBS) -dawg2wordlist_LDADD += $(LEPTONICA_LIBS) -lstmeval_LDADD += $(LEPTONICA_LIBS) -lstmtraining_LDADD += $(LEPTONICA_LIBS) -set_unicharset_properties_LDADD += $(LEPTONICA_LIBS) -text2image_LDADD += $(LEPTONICA_LIBS) -unicharset_extractor_LDADD += $(LEPTONICA_LIBS) -wordlist2dawg_LDADD += $(LEPTONICA_LIBS) diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/ambiguous_words.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/ambiguous_words.cpp deleted file mode 100644 index 9cf79246..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/ambiguous_words.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: ambiguous_words.cpp -// Description: A program that takes a text file with a list of words as -// input (one per line) and outputs a file with the words -// that were found in the dictionary followed by the words -// that are ambiguous to them. -// Author: Rika Antonova -// Created: Fri Oct 21 11:26:43 PDT 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// -// - -#include "baseapi.h" -#include "commontraining.h" // CheckSharedLibraryVersion -#include "helpers.h" -#include "strngs.h" -#include "dict.h" -#include "tesseractclass.h" - -int main(int argc, char** argv) { - tesseract::CheckSharedLibraryVersion(); - - // Parse input arguments. - if (argc > 1 && (!strcmp(argv[1], "-v") || !strcmp(argv[1], "--version"))) { - printf("%s\n", tesseract::TessBaseAPI::Version()); - return 0; - } else if (argc != 4 && (argc != 6 || strcmp(argv[1], "-l") != 0)) { - printf("Usage: %s -v | --version | %s [-l lang] tessdata_dir wordlist_file" - " output_ambiguous_wordlist_file\n", argv[0], argv[0]); - return 1; - } - int argv_offset = 0; - STRING lang; - if (argc == 6) { - lang = argv[2]; - argv_offset = 2; - } else { - lang = "eng"; - } - const char *tessdata_dir = argv[++argv_offset]; - const char *input_file_str = argv[++argv_offset]; - const char *output_file_str = argv[++argv_offset]; - - // Initialize Tesseract. - tesseract::TessBaseAPI api; - GenericVector vars_vec; - GenericVector vars_values; - vars_vec.push_back("output_ambig_words_file"); - vars_values.push_back(output_file_str); - api.Init(tessdata_dir, lang.string(), tesseract::OEM_TESSERACT_ONLY, nullptr, - 0, &vars_vec, &vars_values, false); - tesseract::Dict &dict = api.tesseract()->getDict(); - FILE *input_file = fopen(input_file_str, "rb"); - if (input_file == nullptr) { - tprintf("Failed to open input wordlist file %s\n", input_file_str); - exit(1); - } - char str[CHARS_PER_LINE]; - - // Read word list and call Dict::NoDangerousAmbig() for each word - // to record ambiguities in the output file. - while (fgets(str, CHARS_PER_LINE, input_file) != nullptr) { - chomp_string(str); // remove newline - WERD_CHOICE word(str, dict.getUnicharset()); - dict.NoDangerousAmbig(&word, nullptr, false, nullptr); - } - // Clean up. - fclose(input_file); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/boxchar.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/boxchar.cpp deleted file mode 100644 index 2120cbea..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/boxchar.cpp +++ /dev/null @@ -1,346 +0,0 @@ -/********************************************************************** - * File: boxchar.cpp - * Description: Simple class to associate a Tesseract classification unit with - * its bounding box so that the boxes can be rotated as the image - * is rotated for degradation. Also includes routines to output - * the character-tagged boxes to a boxfile. - * Author: Ray Smith - * Created: Mon Nov 18 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -#include "boxchar.h" - -#include -#include -#include - -#include "fileio.h" -#include "genericvector.h" -#include "normstrngs.h" -#include "tprintf.h" -#include "unicharset.h" -#include "unicode/uchar.h" // from libicu - -// Absolute Ratio of dx:dy or dy:dx to be a newline. -const int kMinNewlineRatio = 5; - -namespace tesseract { - -BoxChar::BoxChar(const char* utf8_str, int len) - : ch_(utf8_str, len), box_(nullptr), page_(0), rtl_index_(-1) {} - -BoxChar::~BoxChar() { boxDestroy(&box_); } - -void BoxChar::AddBox(int x, int y, int width, int height) { - box_ = boxCreate(x, y, width, height); -} - -// Increments *num_rtl and *num_ltr according to the directionality of -// characters in the box. -void BoxChar::GetDirection(int* num_rtl, int* num_ltr) const { - // Convert the unichar to UTF32 representation - std::vector uni_vector = UNICHAR::UTF8ToUTF32(ch_.c_str()); - if (uni_vector.empty()) { - tprintf("Illegal utf8 in boxchar string:%s = ", ch_.c_str()); - for (int c = 0; c < ch_.size(); ++c) { - tprintf(" 0x%x", ch_[c]); - } - tprintf("\n"); - return; - } - for (char32 ch : uni_vector) { - UCharDirection dir = u_charDirection(ch); - if (dir == U_RIGHT_TO_LEFT || dir == U_RIGHT_TO_LEFT_ARABIC || - dir == U_ARABIC_NUMBER || dir == U_RIGHT_TO_LEFT_ISOLATE) { - ++*num_rtl; - } else if (dir != U_DIR_NON_SPACING_MARK && dir != U_BOUNDARY_NEUTRAL) { - ++*num_ltr; - } - } -} - -// Reverses the order of unicodes within the box. If Pango generates a -// ligature, these will get reversed on output, so reverse now. -void BoxChar::ReverseUnicodesInBox() { - std::vector unicodes = UNICHAR::UTF8ToUTF32(ch_.c_str()); - std::reverse(unicodes.begin(), unicodes.end()); - ch_ = UNICHAR::UTF32ToUTF8(unicodes); -} - -/* static */ -void BoxChar::TranslateBoxes(int xshift, int yshift, - std::vector* boxes) { - for (size_t i = 0; i < boxes->size(); ++i) { - BOX* box = (*boxes)[i]->box_; - if (box != nullptr) { - box->x += xshift; - box->y += yshift; - } - } -} - -// Prepares for writing the boxes to a file by inserting newlines, spaces, -// and re-ordering so the boxes are strictly left-to-right. -/* static */ -void BoxChar::PrepareToWrite(std::vector* boxes) { - bool rtl_rules = ContainsMostlyRTL(*boxes); - bool vertical_rules = MostlyVertical(*boxes); - InsertNewlines(rtl_rules, vertical_rules, boxes); - InsertSpaces(rtl_rules, vertical_rules, boxes); - for (unsigned int i = 0; i < boxes->size(); ++i) { - if ((*boxes)[i]->box_ == nullptr) tprintf("Null box at index %u\n", i); - } - if (rtl_rules) { - ReorderRTLText(boxes); - } -} - -// Inserts newline (tab) characters into the vector at newline positions. -/* static */ -void BoxChar::InsertNewlines(bool rtl_rules, bool vertical_rules, - std::vector* boxes) { - int prev_i = -1; - int max_shift = 0; - for (size_t i = 0; i < boxes->size(); ++i) { - Box* box = (*boxes)[i]->box_; - if (box == nullptr) { - if (prev_i < 0 || prev_i + 1 < i || i + 1 == boxes->size()) { - // Erase null boxes at the start of a line and after another null box. - do { - delete (*boxes)[i]; - boxes->erase(boxes->begin() + i); - if (i == 0) break; - } while (i-- == boxes->size() && (*boxes)[i]->box_ == nullptr); - } - continue; - } - if (prev_i >= 0) { - Box* prev_box = (*boxes)[prev_i]->box_; - int shift = box->x - prev_box->x; - if (vertical_rules) { - shift = box->y - prev_box->y; - } else if (rtl_rules) { - shift = -shift; - } - if (-shift > max_shift) { - // This is a newline. Since nothing cares about the size of the box, - // except the out-of-bounds checker, minimize the chance of creating - // a box outside the image by making the width and height 1. - int width = 1; - int height = 1; - int x = prev_box->x + prev_box->w; - int y = prev_box->y; - if (vertical_rules) { - x = prev_box->x; - y = prev_box->y + prev_box->h; - } else if (rtl_rules) { - x = prev_box->x - width; - if (x < 0) { - tprintf("prev x = %d, width=%d\n", prev_box->x, width); - x = 0; - } - } - if (prev_i + 1 == i) { - // New character needed. - BoxChar* new_box = new BoxChar("\t", 1); - new_box->AddBox(x, y, width, height); - new_box->page_ = (*boxes)[i]->page_; - boxes->insert(boxes->begin() + i, new_box); - ++i; - } else { - (*boxes)[i - 1]->AddBox(x, y, width, height); - (*boxes)[i - 1]->ch_ = "\t"; - } - max_shift = 0; - } else if (shift > max_shift) { - max_shift = shift; - } - } - prev_i = i; - } -} - -// Converts nullptr boxes to space characters, with appropriate bounding boxes. -/* static */ -void BoxChar::InsertSpaces(bool rtl_rules, bool vertical_rules, - std::vector* boxes) { - // After InsertNewlines, any remaining null boxes are not newlines, and are - // singletons, so add a box to each remaining null box. - for (size_t i = 1; i + 1 < boxes->size(); ++i) { - Box* box = (*boxes)[i]->box_; - if (box == nullptr) { - Box* prev = (*boxes)[i - 1]->box_; - Box* next = (*boxes)[i + 1]->box_; - ASSERT_HOST(prev != nullptr && next != nullptr); - int top = std::min(prev->y, next->y); - int bottom = std::max(prev->y + prev->h, next->y + next->h); - int left = prev->x + prev->w; - int right = next->x; - if (vertical_rules) { - top = prev->y + prev->h; - bottom = next->y; - left = std::min(prev->x, next->x); - right = std::max(prev->x + prev->w, next->x + next->w); - } else if (rtl_rules) { - // With RTL we have to account for BiDi. - // Right becomes the min left of all prior boxes back to the first - // space or newline. - right = prev->x; - left = next->x + next->w; - for (int j = i - 2; - j >= 0 && (*boxes)[j]->ch_ != " " && (*boxes)[j]->ch_ != "\t"; - --j) { - prev = (*boxes)[j]->box_; - ASSERT_HOST(prev != nullptr); - if (prev->x < right) { - right = prev->x; - } - } - // Left becomes the max right of all next boxes forward to the first - // space or newline. - for (size_t j = i + 2; - j < boxes->size() && (*boxes)[j]->box_ != nullptr && - (*boxes)[j]->ch_ != "\t"; - ++j) { - next = (*boxes)[j]->box_; - if (next->x + next->w > left) { - left = next->x + next->w; - } - } - } - // Italic and stylized characters can produce negative spaces, which - // Leptonica doesn't like, so clip to a positive size. - if (right <= left) right = left + 1; - if (bottom <= top) bottom = top + 1; - (*boxes)[i]->AddBox(left, top, right - left, bottom - top); - (*boxes)[i]->ch_ = " "; - } - } -} - -// Reorders text in a right-to-left script in left-to-right order. -/* static */ -void BoxChar::ReorderRTLText(std::vector* boxes) { - // Ideally we need the inverse of the algorithm used by ResultIterator. - // For now, let's try a sort that reverses original positions for RTL - // characters, otherwise by x-position. This should be much closer to - // correct than just sorting by x-position. - int num_boxes = boxes->size(); - for (int i = 0; i < num_boxes; ++i) { - int num_rtl = 0, num_ltr = 0; - (*boxes)[i]->GetDirection(&num_rtl, &num_ltr); - if (num_rtl > num_ltr) { - (*boxes)[i]->set_rtl_index(i); - (*boxes)[i]->ReverseUnicodesInBox(); - } - } - BoxCharPtrSort sorter; - size_t end = 0; - for (size_t start = 0; start < boxes->size(); start = end + 1) { - end = start + 1; - while (end < boxes->size() && (*boxes)[end]->ch_ != "\t") ++end; - std::sort(boxes->begin() + start, boxes->begin() + end, sorter); - } -} - -// Returns true if the vector contains mostly RTL characters. -/* static */ -bool BoxChar::ContainsMostlyRTL(const std::vector& boxes) { - int num_rtl = 0, num_ltr = 0; - for (int i = 0; i < boxes.size(); ++i) { - boxes[i]->GetDirection(&num_rtl, &num_ltr); - } - return num_rtl > num_ltr; -} - -// Returns true if the text is mostly laid out vertically. -/* static */ -bool BoxChar::MostlyVertical(const std::vector& boxes) { - int64_t total_dx = 0, total_dy = 0; - for (size_t i = 1; i < boxes.size(); ++i) { - if (boxes[i - 1]->box_ != nullptr && boxes[i]->box_ != nullptr && - boxes[i - 1]->page_ == boxes[i]->page_) { - int dx = boxes[i]->box_->x - boxes[i - 1]->box_->x; - int dy = boxes[i]->box_->y - boxes[i - 1]->box_->y; - if (abs(dx) > abs(dy) * kMinNewlineRatio || - abs(dy) > abs(dx) * kMinNewlineRatio) { - total_dx += dx * dx; - total_dy += dy * dy; - } - } - } - return total_dy > total_dx; -} - -// Returns the total length of all the strings in the boxes. -/* static */ -int BoxChar::TotalByteLength(const std::vector& boxes) { - int total_length = 0; - for (size_t i = 0; i < boxes.size(); ++i) - total_length += boxes[i]->ch_.size(); - return total_length; -} - -// Rotate the boxes in [start_box, end_box) by the given rotation. -// The rotation is in radians clockwise about the given center. -/* static */ -void BoxChar::RotateBoxes(float rotation, int xcenter, int ycenter, - int start_box, int end_box, - std::vector* boxes) { - Boxa* orig = boxaCreate(0); - for (int i = start_box; i < end_box; ++i) { - BOX* box = (*boxes)[i]->box_; - if (box) boxaAddBox(orig, box, L_CLONE); - } - Boxa* rotated = boxaRotate(orig, xcenter, ycenter, rotation); - boxaDestroy(&orig); - for (int i = start_box, box_ind = 0; i < end_box; ++i) { - if ((*boxes)[i]->box_) { - boxDestroy(&((*boxes)[i]->box_)); - (*boxes)[i]->box_ = boxaGetBox(rotated, box_ind++, L_CLONE); - } - } - boxaDestroy(&rotated); -} - -const int kMaxLineLength = 1024; -/* static */ -void BoxChar::WriteTesseractBoxFile(const std::string& filename, int height, - const std::vector& boxes) { - std::string output = GetTesseractBoxStr(height, boxes); - File::WriteStringToFileOrDie(output, filename); -} - -/* static */ -std::string BoxChar::GetTesseractBoxStr(int height, - const std::vector& boxes) { - std::string output; - char buffer[kMaxLineLength]; - for (size_t i = 0; i < boxes.size(); ++i) { - const Box* box = boxes[i]->box_; - if (box == nullptr) { - tprintf("Error: Call PrepareToWrite before WriteTesseractBoxFile!!\n"); - return ""; - } - int nbytes = - snprintf(buffer, kMaxLineLength, "%s %d %d %d %d %d\n", - boxes[i]->ch_.c_str(), box->x, height - box->y - box->h, - box->x + box->w, height - box->y, boxes[i]->page_); - output.append(buffer, nbytes); - } - return output; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/boxchar.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/boxchar.h deleted file mode 100644 index 6d9b59a8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/boxchar.h +++ /dev/null @@ -1,132 +0,0 @@ -/********************************************************************** - * File: boxchar.h - * Description: Simple class to associate a Tesseract classification unit with - * its bounding box so that the boxes can be rotated as the image - * is rotated for degradation. Also includes routines to output - * the character-tagged boxes to a boxfile. - * Author: Ray Smith - * Created: Mon Nov 18 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_TRAINING_BOXCHAR_H_ -#define TESSERACT_TRAINING_BOXCHAR_H_ - -#include -#include - -#include "allheaders.h" // from Leptonica -#include "platform.h" - -struct Box; - -namespace tesseract { - -class BoxChar { - public: - BoxChar(const char* utf8_str, int len); - - ~BoxChar(); - - // Accessors. - const std::string& ch() const { return ch_; } - const Box* box() const { return box_; } - const int& page() const { return page_; } - void set_rtl_index(int index) { rtl_index_ = index; } - const int& rtl_index() const { return rtl_index_; } - - // Set the box_ member. - void AddBox(int x, int y, int width, int height); - - void set_page(int page) { page_ = page; } - - std::string* mutable_ch() { return &ch_; } - Box* mutable_box() { return box_; } - - // Sort function for sorting by left edge of box. Note that this will not - // work properly until after InsertNewlines and InsertSpaces. - bool operator<(const BoxChar& other) const { - if (box_ == nullptr) return true; - if (other.box_ == nullptr) return false; - return box_->x < other.box_->x; - } - // Increments *num_rtl and *num_ltr according to the directionality of - // characters in the box. - void GetDirection(int* num_rtl, int* num_ltr) const; - // Reverses the order of unicodes within the box. If Pango generates a - // ligature, these will get reversed on output, so reverse now. - void ReverseUnicodesInBox(); - - static void TranslateBoxes(int xshift, int yshift, - std::vector* boxes); - - // Prepares for writing the boxes to a file by inserting newlines, spaces, - // and re-ordering so the boxes are strictly left-to-right. - static void PrepareToWrite(std::vector* boxes); - // Inserts newline (tab) characters into the vector at newline positions. - static void InsertNewlines(bool rtl_rules, bool vertical_rules, - std::vector* boxes); - // Converts nullptr boxes to space characters, with appropriate bounding - // boxes. - static void InsertSpaces(bool rtl_rules, bool vertical_rules, - std::vector* boxes); - // Reorders text in a right-to-left script in left-to-right order. - static void ReorderRTLText(std::vector* boxes); - // Returns true if the vector contains mostly RTL characters. - static bool ContainsMostlyRTL(const std::vector& boxes); - // Returns true if the text is mostly laid out vertically. - static bool MostlyVertical(const std::vector& boxes); - - // Returns the total length of all the strings in the boxes. - static int TotalByteLength(const std::vector& boxes); - - // Rotate the vector of boxes between start and end by the given rotation. - // The rotation is in radians clockwise about the given center. - static void RotateBoxes(float rotation, - int xcenter, - int ycenter, - int start_box, - int end_box, - std::vector* boxes); - - // Create a tesseract box file from the vector of boxes. The image height - // is needed to convert to tesseract coordinates. - static void WriteTesseractBoxFile(const std::string& name, int height, - const std::vector& boxes); - // Gets the tesseract box file as a string from the vector of boxes. - // The image height is needed to convert to tesseract coordinates. - static std::string GetTesseractBoxStr(int height, - const std::vector& boxes); - - private: - std::string ch_; - Box* box_; - int page_; - // If the box is an RTL character, contains the original position in the - // array of boxes (before reversal), otherwise -1. - int rtl_index_; -}; - -// Sort predicate to sort a vector of BoxChar*. -struct BoxCharPtrSort { - bool operator()(const BoxChar* box1, const BoxChar* box2) const { - if (box1->rtl_index() >= 0 && box2->rtl_index() >= 0) - return box2->rtl_index() < box1->rtl_index(); - return *box1 < *box2; - } -}; - -} // namespace tesseract - -#endif // TESSERACT_TRAINING_BOXCHAR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/classifier_tester.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/classifier_tester.cpp deleted file mode 100644 index c495f123..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/classifier_tester.cpp +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Filename: classifier_tester.cpp -// Purpose: Tests a character classifier on data as formatted for training, -// but doesn't have to be the same as the training data. -// Author: Ray Smith - -#include -#include -#ifdef GOOGLE_TESSERACT -#include "base/commandlineflags.h" -#endif // GOOGLE_TESSERACT -#include "baseapi.h" -#include "commontraining.h" -#include "mastertrainer.h" -#include "params.h" -#include "strngs.h" -#include "tessclassifier.h" -#include "tesseractclass.h" - -STRING_PARAM_FLAG(classifier, "", "Classifier to test"); -STRING_PARAM_FLAG(lang, "eng", "Language to test"); -STRING_PARAM_FLAG(tessdata_dir, "", "Directory of traineddata files"); -DECLARE_INT_PARAM_FLAG(debug_level); - -enum ClassifierName { - CN_PRUNER, - CN_FULL, - CN_COUNT -}; - -static const char* names[] = {"pruner", "full"}; - -static tesseract::ShapeClassifier* InitializeClassifier( - const char* classifer_name, const UNICHARSET& unicharset, - int argc, char **argv, - tesseract::TessBaseAPI** api) { - // Decode the classifier string. - ClassifierName classifier = CN_COUNT; - for (int c = 0; c < CN_COUNT; ++c) { - if (strcmp(classifer_name, names[c]) == 0) { - classifier = static_cast(c); - break; - } - } - if (classifier == CN_COUNT) { - fprintf(stderr, "Invalid classifier name:%s\n", FLAGS_classifier.c_str()); - return nullptr; - } - - // We need to initialize tesseract to test. - *api = new tesseract::TessBaseAPI; - tesseract::OcrEngineMode engine_mode = tesseract::OEM_TESSERACT_ONLY; - tesseract::Tesseract* tesseract = nullptr; - tesseract::Classify* classify = nullptr; - if ( - classifier == CN_PRUNER || classifier == CN_FULL) { - if ((*api)->Init(FLAGS_tessdata_dir.c_str(), FLAGS_lang.c_str(), - engine_mode) < 0) { - fprintf(stderr, "Tesseract initialization failed!\n"); - return nullptr; - } - tesseract = const_cast((*api)->tesseract()); - classify = static_cast(tesseract); - if (classify->shape_table() == nullptr) { - fprintf(stderr, "Tesseract must contain a ShapeTable!\n"); - return nullptr; - } - } - tesseract::ShapeClassifier* shape_classifier = nullptr; - - if (classifier == CN_PRUNER) { - shape_classifier = new tesseract::TessClassifier(true, classify); - } else if (classifier == CN_FULL) { - shape_classifier = new tesseract::TessClassifier(false, classify); - } - tprintf("Testing classifier %s:\n", classifer_name); - return shape_classifier; -} - -// This program has complex setup requirements, so here is some help: -// Two different modes, tr files and serialized mastertrainer. -// From tr files: -// classifier_tester -U unicharset -F font_properties -X xheights -// -classifier x -lang lang [-output_trainer trainer] *.tr -// From a serialized trainer: -// classifier_tester -input_trainer trainer [-lang lang] -classifier x -// -// In the first case, the unicharset must be the unicharset from within -// the classifier under test, and the font_properties and xheights files must -// match the files used during training. -// In the second case, the trainer file must have been prepared from -// some previous run of shapeclustering, mftraining, or classifier_tester -// using the same conditions as above, ie matching unicharset/font_properties. -// -// Available values of classifier (x above) are: -// pruner : Tesseract class pruner only. -// full : Tesseract full classifier. -// with an input trainer.) -int main(int argc, char **argv) { - tesseract::CheckSharedLibraryVersion(); - ParseArguments(&argc, &argv); - STRING file_prefix; - tesseract::MasterTrainer* trainer = - tesseract::LoadTrainingData(argc, argv, false, nullptr, &file_prefix); - tesseract::TessBaseAPI* api; - // Decode the classifier string. - tesseract::ShapeClassifier* shape_classifier = InitializeClassifier( - FLAGS_classifier.c_str(), trainer->unicharset(), argc, argv, &api); - if (shape_classifier == nullptr) { - fprintf(stderr, "Classifier init failed!:%s\n", FLAGS_classifier.c_str()); - return 1; - } - - // We want to test junk as well if it is available. - // trainer->IncludeJunk(); - // We want to test with replicated samples too. - trainer->ReplicateAndRandomizeSamplesIfRequired(); - - trainer->TestClassifierOnSamples(tesseract::CT_UNICHAR_TOP1_ERR, - std::max(3, static_cast(FLAGS_debug_level)), false, - shape_classifier, nullptr); - delete shape_classifier; - delete api; - delete trainer; - - return 0; -} /* main */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/cntraining.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/cntraining.cpp deleted file mode 100644 index 372f2f71..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/cntraining.cpp +++ /dev/null @@ -1,259 +0,0 @@ -/****************************************************************************** -** Filename: cntraining.cpp -** Purpose: Generates a normproto and pffmtable. -** Author: Dan Johnson -** Revisment: Christy Russon -** History: Fri Aug 18 08:53:50 1989, DSJ, Created. -** 5/25/90, DSJ, Adapted to multiple feature types. -** Tuesday, May 17, 1998 Changes made to make feature specific and -** simplify structures. First step in simplifying training process. -** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. -******************************************************************************/ - -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "oldlist.h" -#include "emalloc.h" -#include "featdefs.h" -#include "tessopt.h" -#include "ocrfeatures.h" -#include "clusttool.h" -#include "cluster.h" -#include -#include -#include -#include "unichar.h" -#include "commontraining.h" - -#define PROGRAM_FEATURE_TYPE "cn" - -DECLARE_STRING_PARAM_FLAG(D); - -/*---------------------------------------------------------------------------- - Private Function Prototypes -----------------------------------------------------------------------------*/ - -static void WriteNormProtos(const char *Directory, LIST LabeledProtoList, - const FEATURE_DESC_STRUCT *feature_desc); - -static void WriteProtos(FILE* File, uint16_t N, LIST ProtoList, - bool WriteSigProtos, bool WriteInsigProtos); - -/*---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------*/ -/* global variable to hold configuration parameters to control clustering */ -//-M 0.025 -B 0.05 -I 0.8 -C 1e-3 -CLUSTERCONFIG CNConfig = -{ - elliptical, 0.025, 0.05, 0.8, 1e-3, 0 -}; - -/*---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -/** -* This program reads in a text file consisting of feature -* samples from a training page in the following format: -* @verbatim - FontName CharName NumberOfFeatureTypes(N) - FeatureTypeName1 NumberOfFeatures(M) - Feature1 - ... - FeatureM - FeatureTypeName2 NumberOfFeatures(M) - Feature1 - ... - FeatureM - ... - FeatureTypeNameN NumberOfFeatures(M) - Feature1 - ... - FeatureM - FontName CharName ... -@endverbatim -* It then appends these samples into a separate file for each -* character. The name of the file is -* -* DirectoryName/FontName/CharName.FeatureTypeName -* -* The DirectoryName can be specified via a command -* line argument. If not specified, it defaults to the -* current directory. The format of the resulting files is: -* @verbatim - NumberOfFeatures(M) - Feature1 - ... - FeatureM - NumberOfFeatures(M) - ... -@endverbatim -* The output files each have a header which describes the -* type of feature which the file contains. This header is -* in the format required by the clusterer. A command line -* argument can also be used to specify that only the first -* N samples of each class should be used. -* @param argc number of command line arguments -* @param argv array of command line arguments -* @return none -* @note Globals: none -*/ -int main(int argc, char *argv[]) { - tesseract::CheckSharedLibraryVersion(); - - // Set the global Config parameters before parsing the command line. - Config = CNConfig; - - const char *PageName; - LIST CharList = NIL_LIST; - CLUSTERER *Clusterer = nullptr; - LIST ProtoList = NIL_LIST; - LIST NormProtoList = NIL_LIST; - LIST pCharList; - LABELEDLIST CharSample; - FEATURE_DEFS_STRUCT FeatureDefs; - InitFeatureDefs(&FeatureDefs); - - ParseArguments(&argc, &argv); - int num_fonts = 0; - while ((PageName = GetNextFilename(argc, argv)) != nullptr) { - printf("Reading %s ...\n", PageName); - FILE *TrainingPage = fopen(PageName, "rb"); - ASSERT_HOST(TrainingPage); - if (TrainingPage) { - ReadTrainingSamples(FeatureDefs, PROGRAM_FEATURE_TYPE, 100, nullptr, - TrainingPage, &CharList); - fclose(TrainingPage); - ++num_fonts; - } - } - printf("Clustering ...\n"); - // To allow an individual font to form a separate cluster, - // reduce the min samples: - // Config.MinSamples = 0.5 / num_fonts; - pCharList = CharList; - // The norm protos will count the source protos, so we keep them here in - // freeable_protos, so they can be freed later. - GenericVector freeable_protos; - iterate(pCharList) { - //Cluster - CharSample = (LABELEDLIST)first_node(pCharList); - Clusterer = - SetUpForClustering(FeatureDefs, CharSample, PROGRAM_FEATURE_TYPE); - if (Clusterer == nullptr) { // To avoid a SIGSEGV - fprintf(stderr, "Error: nullptr clusterer!\n"); - return 1; - } - float SavedMinSamples = Config.MinSamples; - // To disable the tendency to produce a single cluster for all fonts, - // make MagicSamples an impossible to achieve number: - // Config.MagicSamples = CharSample->SampleCount * 10; - Config.MagicSamples = CharSample->SampleCount; - while (Config.MinSamples > 0.001) { - ProtoList = ClusterSamples(Clusterer, &Config); - if (NumberOfProtos(ProtoList, true, false) > 0) { - break; - } else { - Config.MinSamples *= 0.95; - printf("0 significant protos for %s." - " Retrying clustering with MinSamples = %f%%\n", - CharSample->Label, Config.MinSamples); - } - } - Config.MinSamples = SavedMinSamples; - AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label); - freeable_protos.push_back(ProtoList); - FreeClusterer(Clusterer); - } - FreeTrainingSamples(CharList); - int desc_index = ShortNameToFeatureType(FeatureDefs, PROGRAM_FEATURE_TYPE); - WriteNormProtos(FLAGS_D.c_str(), NormProtoList, - FeatureDefs.FeatureDesc[desc_index]); - FreeNormProtoList(NormProtoList); - for (int i = 0; i < freeable_protos.size(); ++i) { - FreeProtoList(&freeable_protos[i]); - } - printf ("\n"); - return 0; -} // main - -/*---------------------------------------------------------------------------- - Private Code -----------------------------------------------------------------------------*/ - -/*----------------------------------------------------------------------------*/ -/** -* This routine writes the specified samples into files which -* are organized according to the font name and character name -* of the samples. -* @param Directory directory to place sample files into -* @param LabeledProtoList List of labeled protos -* @param feature_desc Description of the features -* @return none -*/ -static void WriteNormProtos(const char *Directory, LIST LabeledProtoList, - const FEATURE_DESC_STRUCT *feature_desc) { - FILE *File; - STRING Filename; - LABELEDLIST LabeledProto; - int N; - - Filename = ""; - if (Directory != nullptr && Directory[0] != '\0') { - Filename += Directory; - Filename += "/"; - } - Filename += "normproto"; - printf ("\nWriting %s ...", Filename.string()); - File = fopen(Filename.string(), "wb"); - ASSERT_HOST(File); - fprintf(File, "%0d\n", feature_desc->NumParams); - WriteParamDesc(File, feature_desc->NumParams, feature_desc->ParamDesc); - iterate(LabeledProtoList) - { - LabeledProto = (LABELEDLIST) first_node (LabeledProtoList); - N = NumberOfProtos(LabeledProto->List, true, false); - if (N < 1) { - printf ("\nError! Not enough protos for %s: %d protos" - " (%d significant protos" - ", %d insignificant protos)\n", - LabeledProto->Label, N, - NumberOfProtos(LabeledProto->List, true, false), - NumberOfProtos(LabeledProto->List, false, true)); - exit(1); - } - fprintf(File, "\n%s %d\n", LabeledProto->Label, N); - WriteProtos(File, feature_desc->NumParams, LabeledProto->List, true, false); - } - fclose (File); - -} // WriteNormProtos - -/*-------------------------------------------------------------------------*/ - -static void WriteProtos(FILE* File, uint16_t N, LIST ProtoList, - bool WriteSigProtos, bool WriteInsigProtos) -{ - PROTOTYPE *Proto; - - // write prototypes - iterate(ProtoList) - { - Proto = (PROTOTYPE*)first_node(ProtoList); - if ((Proto->Significant && WriteSigProtos) || - (! Proto->Significant && WriteInsigProtos)) - WritePrototype(File, N, Proto); - } -} // WriteProtos diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/combine_lang_model.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/combine_lang_model.cpp deleted file mode 100644 index 2969b5b3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/combine_lang_model.cpp +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright 2017 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// Purpose: Program to generate a traineddata file that can be used to train an -// LSTM-based neural network model from a unicharset and an optional -// set of wordlists. Eliminates the need to run -// set_unicharset_properties, wordlist2dawg, some non-existent binary -// to generate the recoder, and finally combine_tessdata. - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "commandlineflags.h" -#include "commontraining.h" // CheckSharedLibraryVersion -#include "lang_model_helpers.h" -#include "tprintf.h" -#include "unicharset_training_utils.h" - -STRING_PARAM_FLAG(input_unicharset, "", - "Filename with unicharset to complete and use in encoding"); -STRING_PARAM_FLAG(script_dir, "", - "Directory name for input script unicharsets"); -STRING_PARAM_FLAG(words, "", - "File listing words to use for the system dictionary"); -STRING_PARAM_FLAG(puncs, "", "File listing punctuation patterns"); -STRING_PARAM_FLAG(numbers, "", "File listing number patterns"); -STRING_PARAM_FLAG(output_dir, "", "Root directory for output files"); -STRING_PARAM_FLAG(version_str, "", "Version string to add to traineddata file"); -STRING_PARAM_FLAG(lang, "", "Name of language being processed"); -BOOL_PARAM_FLAG(lang_is_rtl, false, - "True if lang being processed is written right-to-left"); -BOOL_PARAM_FLAG(pass_through_recoder, false, - "If true, the recoder is a simple pass-through of the" - " unicharset. Otherwise, potentially a compression of it"); - -int main(int argc, char** argv) { - // Sets properties on the input unicharset file, and writes: - // rootdir/lang/lang.charset_size=ddd.txt - // rootdir/lang/lang.traineddata - // rootdir/lang/lang.unicharset - // If the 3 word lists are provided, the dawgs are also added - // to the traineddata file. - // The output unicharset and charset_size files are just for - // human readability. - tesseract::CheckSharedLibraryVersion(); - tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true); - - GenericVector words, puncs, numbers; - // If these reads fail, we get a warning message and an empty list of words. - tesseract::ReadFile(FLAGS_words.c_str(), nullptr).split('\n', &words); - tesseract::ReadFile(FLAGS_puncs.c_str(), nullptr).split('\n', &puncs); - tesseract::ReadFile(FLAGS_numbers.c_str(), nullptr).split('\n', &numbers); - // Load the input unicharset - UNICHARSET unicharset; - if (!unicharset.load_from_file(FLAGS_input_unicharset.c_str(), false)) { - tprintf("Failed to load unicharset from %s\n", - FLAGS_input_unicharset.c_str()); - return 1; - } - tprintf("Loaded unicharset of size %d from file %s\n", unicharset.size(), - FLAGS_input_unicharset.c_str()); - - // Set unichar properties - tprintf("Setting unichar properties\n"); - tesseract::SetupBasicProperties(/*report_errors*/ true, - /*decompose (NFD)*/ false, &unicharset); - tprintf("Setting script properties\n"); - tesseract::SetScriptProperties(FLAGS_script_dir.c_str(), &unicharset); - // Combine everything into a traineddata file. - return tesseract::CombineLangModel( - unicharset, FLAGS_script_dir.c_str(), FLAGS_version_str.c_str(), - FLAGS_output_dir.c_str(), FLAGS_lang.c_str(), FLAGS_pass_through_recoder, - words, puncs, numbers, FLAGS_lang_is_rtl, /*reader*/ nullptr, - /*writer*/ nullptr); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/combine_tessdata.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/combine_tessdata.cpp deleted file mode 100644 index ff84bacc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/combine_tessdata.cpp +++ /dev/null @@ -1,202 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: combine_tessdata.cpp -// Description: Creates a unified traineddata file from several -// data files produced by the training process. -// Author: Daria Antonova -// Created: Wed Jun 03 11:26:43 PST 2009 -// -// (C) Copyright 2009, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include -#include "commontraining.h" // CheckSharedLibraryVersion -#include "lstmrecognizer.h" -#include "tessdatamanager.h" - -// Main program to combine/extract/overwrite tessdata components -// in [lang].traineddata files. -// -// To combine all the individual tessdata components (unicharset, DAWGs, -// classifier templates, ambiguities, language configs) located at, say, -// /home/$USER/temp/eng.* run: -// -// combine_tessdata /home/$USER/temp/eng. -// -// The result will be a combined tessdata file /home/$USER/temp/eng.traineddata -// -// Specify option -e if you would like to extract individual components -// from a combined traineddata file. For example, to extract language config -// file and the unicharset from tessdata/eng.traineddata run: -// -// combine_tessdata -e tessdata/eng.traineddata -// /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset -// -// The desired config file and unicharset will be written to -// /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset -// -// Specify option -o to overwrite individual components of the given -// [lang].traineddata file. For example, to overwrite language config -// and unichar ambiguities files in tessdata/eng.traineddata use: -// -// combine_tessdata -o tessdata/eng.traineddata -// /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs -// -// As a result, tessdata/eng.traineddata will contain the new language config -// and unichar ambigs, plus all the original DAWGs, classifier teamples, etc. -// -// Note: the file names of the files to extract to and to overwrite from should -// have the appropriate file suffixes (extensions) indicating their tessdata -// component type (.unicharset for the unicharset, .unicharambigs for unichar -// ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h. -// -// Specify option -u to unpack all the components to the specified path: -// -// combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng. -// -// This will create /home/$USER/temp/eng.* files with individual tessdata -// components from tessdata/eng.traineddata. -// -int main(int argc, char **argv) { - tesseract::CheckSharedLibraryVersion(); - - int i; - tesseract::TessdataManager tm; - if (argc > 1 && (!strcmp(argv[1], "-v") || !strcmp(argv[1], "--version"))) { - printf("%s\n", tesseract::TessBaseAPI::Version()); - return EXIT_SUCCESS; - } else if (argc == 2) { - printf("Combining tessdata files\n"); - STRING lang = argv[1]; - char* last = &argv[1][strlen(argv[1])-1]; - if (*last != '.') - lang += '.'; - STRING output_file = lang; - output_file += kTrainedDataSuffix; - if (!tm.CombineDataFiles(lang.string(), output_file.string())) { - printf("Error combining tessdata files into %s\n", - output_file.string()); - } else { - printf("Output %s created successfully.\n", output_file.string()); - } - } else if (argc >= 4 && (strcmp(argv[1], "-e") == 0 || - strcmp(argv[1], "-u") == 0)) { - // Initialize TessdataManager with the data in the given traineddata file. - if (!tm.Init(argv[2])) { - tprintf("Failed to read %s\n", argv[2]); - return EXIT_FAILURE; - } - printf("Extracting tessdata components from %s\n", argv[2]); - if (strcmp(argv[1], "-e") == 0) { - for (i = 3; i < argc; ++i) { - errno = 0; - if (tm.ExtractToFile(argv[i])) { - printf("Wrote %s\n", argv[i]); - } else if (errno == 0) { - printf("Not extracting %s, since this component" - " is not present\n", argv[i]); - return EXIT_FAILURE; - } else { - printf("Error, could not extract %s: %s\n", - argv[i], strerror(errno)); - return EXIT_FAILURE; - } - } - } else { // extract all the components - for (i = 0; i < tesseract::TESSDATA_NUM_ENTRIES; ++i) { - STRING filename = argv[3]; - char* last = &argv[3][strlen(argv[3])-1]; - if (*last != '.') - filename += '.'; - filename += tesseract::kTessdataFileSuffixes[i]; - errno = 0; - if (tm.ExtractToFile(filename.string())) { - printf("Wrote %s\n", filename.string()); - } else if (errno != 0) { - printf("Error, could not extract %s: %s\n", - filename.string(), strerror(errno)); - return EXIT_FAILURE; - } - } - } - } else if (argc >= 4 && strcmp(argv[1], "-o") == 0) { - // Rename the current traineddata file to a temporary name. - const char *new_traineddata_filename = argv[2]; - STRING traineddata_filename = new_traineddata_filename; - traineddata_filename += ".__tmp__"; - if (rename(new_traineddata_filename, traineddata_filename.string()) != 0) { - tprintf("Failed to create a temporary file %s\n", - traineddata_filename.string()); - return EXIT_FAILURE; - } - - // Initialize TessdataManager with the data in the given traineddata file. - tm.Init(traineddata_filename.string()); - - // Write the updated traineddata file. - tm.OverwriteComponents(new_traineddata_filename, argv+3, argc-3); - } else if (argc == 3 && strcmp(argv[1], "-c") == 0) { - if (!tm.Init(argv[2])) { - tprintf("Failed to read %s\n", argv[2]); - return EXIT_FAILURE; - } - tesseract::TFile fp; - if (!tm.GetComponent(tesseract::TESSDATA_LSTM, &fp)) { - tprintf("No LSTM Component found in %s!\n", argv[2]); - return EXIT_FAILURE; - } - tesseract::LSTMRecognizer recognizer; - if (!recognizer.DeSerialize(&tm, &fp)) { - tprintf("Failed to deserialize LSTM in %s!\n", argv[2]); - return EXIT_FAILURE; - } - recognizer.ConvertToInt(); - GenericVector lstm_data; - fp.OpenWrite(&lstm_data); - ASSERT_HOST(recognizer.Serialize(&tm, &fp)); - tm.OverwriteEntry(tesseract::TESSDATA_LSTM, &lstm_data[0], - lstm_data.size()); - if (!tm.SaveFile(argv[2], nullptr)) { - tprintf("Failed to write modified traineddata:%s!\n", argv[2]); - return EXIT_FAILURE; - } - } else if (argc == 3 && strcmp(argv[1], "-d") == 0) { - // Initialize TessdataManager with the data in the given traineddata file. - tm.Init(argv[2]); - } else { - printf("Usage for combining tessdata components:\n" - " %s language_data_path_prefix\n" - " (e.g. %s tessdata/eng.)\n\n", argv[0], argv[0]); - printf("Usage for extracting tessdata components:\n" - " %s -e traineddata_file [output_component_file...]\n" - " (e.g. %s -e eng.traineddata eng.unicharset)\n\n", - argv[0], argv[0]); - printf("Usage for overwriting tessdata components:\n" - " %s -o traineddata_file [input_component_file...]\n" - " (e.g. %s -o eng.traineddata eng.unicharset)\n\n", - argv[0], argv[0]); - printf("Usage for unpacking all tessdata components:\n" - " %s -u traineddata_file output_path_prefix\n" - " (e.g. %s -u eng.traineddata tmp/eng.)\n", argv[0], argv[0]); - printf( - "Usage for listing directory of components:\n" - " %s -d traineddata_file\n", - argv[0]); - printf( - "Usage for compacting LSTM component to int:\n" - " %s -c traineddata_file\n", - argv[0]); - return 1; - } - tm.Directory(); - return EXIT_SUCCESS; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/commandlineflags.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/commandlineflags.cpp deleted file mode 100644 index fa17b4a5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/commandlineflags.cpp +++ /dev/null @@ -1,340 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "baseapi.h" // TessBaseAPI::Version -#include "commandlineflags.h" -#include "errcode.h" - -#ifndef GOOGLE_TESSERACT - -namespace tesseract { -static bool IntFlagExists(const char* flag_name, int32_t* value) { - STRING full_flag_name("FLAGS_"); - full_flag_name += flag_name; - GenericVector empty; - IntParam *p = ParamUtils::FindParam( - full_flag_name.string(), GlobalParams()->int_params, empty); - if (p == nullptr) return false; - *value = (int32_t)(*p); - return true; -} - -static bool DoubleFlagExists(const char* flag_name, double* value) { - STRING full_flag_name("FLAGS_"); - full_flag_name += flag_name; - GenericVector empty; - DoubleParam *p = ParamUtils::FindParam( - full_flag_name.string(), GlobalParams()->double_params, empty); - if (p == nullptr) return false; - *value = static_cast(*p); - return true; -} - -static bool BoolFlagExists(const char* flag_name, bool* value) { - STRING full_flag_name("FLAGS_"); - full_flag_name += flag_name; - GenericVector empty; - BoolParam *p = ParamUtils::FindParam( - full_flag_name.string(), GlobalParams()->bool_params, empty); - if (p == nullptr) return false; - *value = (BOOL8)(*p); - return true; -} - -static bool StringFlagExists(const char* flag_name, const char** value) { - STRING full_flag_name("FLAGS_"); - full_flag_name += flag_name; - GenericVector empty; - StringParam *p = ParamUtils::FindParam( - full_flag_name.string(), GlobalParams()->string_params, empty); - *value = (p != nullptr) ? p->string() : nullptr; - return p != nullptr; -} - -static void SetIntFlagValue(const char* flag_name, const int32_t new_val) { - STRING full_flag_name("FLAGS_"); - full_flag_name += flag_name; - GenericVector empty; - IntParam *p = ParamUtils::FindParam( - full_flag_name.string(), GlobalParams()->int_params, empty); - ASSERT_HOST(p != nullptr); - p->set_value(new_val); -} - -static void SetDoubleFlagValue(const char* flag_name, const double new_val) { - STRING full_flag_name("FLAGS_"); - full_flag_name += flag_name; - GenericVector empty; - DoubleParam *p = ParamUtils::FindParam( - full_flag_name.string(), GlobalParams()->double_params, empty); - ASSERT_HOST(p != nullptr); - p->set_value(new_val); -} - -static void SetBoolFlagValue(const char* flag_name, const bool new_val) { - STRING full_flag_name("FLAGS_"); - full_flag_name += flag_name; - GenericVector empty; - BoolParam *p = ParamUtils::FindParam( - full_flag_name.string(), GlobalParams()->bool_params, empty); - ASSERT_HOST(p != nullptr); - p->set_value(new_val); -} - -static void SetStringFlagValue(const char* flag_name, const char* new_val) { - STRING full_flag_name("FLAGS_"); - full_flag_name += flag_name; - GenericVector empty; - StringParam *p = ParamUtils::FindParam( - full_flag_name.string(), GlobalParams()->string_params, empty); - ASSERT_HOST(p != nullptr); - p->set_value(STRING(new_val)); -} - -static bool SafeAtoi(const char* str, int* val) { - char* endptr = nullptr; - *val = strtol(str, &endptr, 10); - return endptr != nullptr && *endptr == '\0'; -} - -static bool SafeAtod(const char* str, double* val) { - char* endptr = nullptr; - *val = strtod(str, &endptr); - return endptr != nullptr && *endptr == '\0'; -} - -static void PrintCommandLineFlags() { - const char* kFlagNamePrefix = "FLAGS_"; - const int kFlagNamePrefixLen = strlen(kFlagNamePrefix); - for (int i = 0; i < GlobalParams()->int_params.size(); ++i) { - if (!strncmp(GlobalParams()->int_params[i]->name_str(), - kFlagNamePrefix, kFlagNamePrefixLen)) { - printf(" --%s %s (type:int default:%d)\n", - GlobalParams()->int_params[i]->name_str() + kFlagNamePrefixLen, - GlobalParams()->int_params[i]->info_str(), - int32_t(*(GlobalParams()->int_params[i]))); - } - } - for (int i = 0; i < GlobalParams()->double_params.size(); ++i) { - if (!strncmp(GlobalParams()->double_params[i]->name_str(), - kFlagNamePrefix, kFlagNamePrefixLen)) { - printf(" --%s %s (type:double default:%g)\n", - GlobalParams()->double_params[i]->name_str() + kFlagNamePrefixLen, - GlobalParams()->double_params[i]->info_str(), - static_cast(*(GlobalParams()->double_params[i]))); - } - } - for (int i = 0; i < GlobalParams()->bool_params.size(); ++i) { - if (!strncmp(GlobalParams()->bool_params[i]->name_str(), - kFlagNamePrefix, kFlagNamePrefixLen)) { - printf(" --%s %s (type:bool default:%s)\n", - GlobalParams()->bool_params[i]->name_str() + kFlagNamePrefixLen, - GlobalParams()->bool_params[i]->info_str(), - (BOOL8(*(GlobalParams()->bool_params[i])) ? "true" : "false")); - } - } - for (int i = 0; i < GlobalParams()->string_params.size(); ++i) { - if (!strncmp(GlobalParams()->string_params[i]->name_str(), - kFlagNamePrefix, kFlagNamePrefixLen)) { - printf(" --%s %s (type:string default:%s)\n", - GlobalParams()->string_params[i]->name_str() + kFlagNamePrefixLen, - GlobalParams()->string_params[i]->info_str(), - GlobalParams()->string_params[i]->string()); - } - } -} - -void ParseCommandLineFlags(const char* usage, - int* argc, char*** argv, - const bool remove_flags) { - if (*argc == 1) { - printf("USAGE: %s\n", usage); - PrintCommandLineFlags(); - exit(0); - } - - if (*argc > 1 && (!strcmp((*argv)[1], "-v") || !strcmp((*argv)[1], "--version"))) { - printf("%s\n", TessBaseAPI::Version()); - exit(0); - } - - unsigned int i = 1; - for (i = 1; i < *argc; ++i) { - const char* current_arg = (*argv)[i]; - // If argument does not start with a hyphen then break. - if (current_arg[0] != '-') { - break; - } - // Position current_arg after startings hyphens. We treat a sequence of - // one or two consecutive hyphens identically. - ++current_arg; - if (current_arg[0] == '-') { - ++current_arg; - } - // If this is asking for usage, print the help message and abort. - if (!strcmp(current_arg, "help")) { - printf("Usage:\n %s [OPTION ...]\n\n", usage); - PrintCommandLineFlags(); - exit(0); - } - // Find the starting position of the value if it was specified in this - // string. - const char* equals_position = strchr(current_arg, '='); - const char* rhs = nullptr; - if (equals_position != nullptr) { - rhs = equals_position + 1; - } - // Extract the flag name. - STRING lhs; - if (equals_position == nullptr) { - lhs = current_arg; - } else { - lhs.assign(current_arg, equals_position - current_arg); - } - if (!lhs.length()) { - tprintf("ERROR: Bad argument: %s\n", (*argv)[i]); - exit(1); - } - - // Find the flag name in the list of global flags. - // int32_t flag - int32_t int_val; - if (IntFlagExists(lhs.string(), &int_val)) { - if (rhs != nullptr) { - if (!strlen(rhs)) { - // Bad input of the format --int_flag= - tprintf("ERROR: Bad argument: %s\n", (*argv)[i]); - exit(1); - } - if (!SafeAtoi(rhs, &int_val)) { - tprintf("ERROR: Could not parse int from %s in flag %s\n", - rhs, (*argv)[i]); - exit(1); - } - } else { - // We need to parse the next argument - if (i + 1 >= *argc) { - tprintf("ERROR: Could not find value argument for flag %s\n", - lhs.string()); - exit(1); - } else { - ++i; - if (!SafeAtoi((*argv)[i], &int_val)) { - tprintf("ERROR: Could not parse int32_t from %s\n", (*argv)[i]); - exit(1); - } - } - } - SetIntFlagValue(lhs.string(), int_val); - continue; - } - - // double flag - double double_val; - if (DoubleFlagExists(lhs.string(), &double_val)) { - if (rhs != nullptr) { - if (!strlen(rhs)) { - // Bad input of the format --double_flag= - tprintf("ERROR: Bad argument: %s\n", (*argv)[i]); - exit(1); - } - if (!SafeAtod(rhs, &double_val)) { - tprintf("ERROR: Could not parse double from %s in flag %s\n", - rhs, (*argv)[i]); - exit(1); - } - } else { - // We need to parse the next argument - if (i + 1 >= *argc) { - tprintf("ERROR: Could not find value argument for flag %s\n", - lhs.string()); - exit(1); - } else { - ++i; - if (!SafeAtod((*argv)[i], &double_val)) { - tprintf("ERROR: Could not parse double from %s\n", (*argv)[i]); - exit(1); - } - } - } - SetDoubleFlagValue(lhs.string(), double_val); - continue; - } - - // Bool flag. Allow input forms --flag (equivalent to --flag=true), - // --flag=false, --flag=true, --flag=0 and --flag=1 - bool bool_val; - if (BoolFlagExists(lhs.string(), &bool_val)) { - if (rhs == nullptr) { - // --flag form - bool_val = true; - } else { - if (!strlen(rhs)) { - // Bad input of the format --bool_flag= - tprintf("ERROR: Bad argument: %s\n", (*argv)[i]); - exit(1); - } - if (!strcmp(rhs, "false") || !strcmp(rhs, "0")) { - bool_val = false; - } else if (!strcmp(rhs, "true") || !strcmp(rhs, "1")) { - bool_val = true; - } else { - tprintf("ERROR: Could not parse bool from flag %s\n", (*argv)[i]); - exit(1); - } - } - SetBoolFlagValue(lhs.string(), bool_val); - continue; - } - - // string flag - const char* string_val; - if (StringFlagExists(lhs.string(), &string_val)) { - if (rhs != nullptr) { - string_val = rhs; - } else { - // Pick the next argument - if (i + 1 >= *argc) { - tprintf("ERROR: Could not find string value for flag %s\n", - lhs.string()); - exit(1); - } else { - string_val = (*argv)[++i]; - } - } - SetStringFlagValue(lhs.string(), string_val); - continue; - } - - // Flag was not found. Exit with an error message. - tprintf("ERROR: Non-existent flag %s\n", (*argv)[i]); - exit(1); - } // for each argv - if (remove_flags) { - (*argv)[i - 1] = (*argv)[0]; - (*argv) += (i - 1); - (*argc) -= (i - 1); - } -} -} // namespace tesseract - -#else - -#include "base/init_google.h" - -namespace tesseract { -void ParseCommandLineFlags(const char* usage, - int* argc, char*** argv, - const bool remove_flags) { - InitGoogle(usage, argc, argv, remove_flags); -} -} // namespace tesseract - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/commandlineflags.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/commandlineflags.h deleted file mode 100644 index 7e563b2d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/commandlineflags.h +++ /dev/null @@ -1,83 +0,0 @@ -/********************************************************************** - * File: commandlineflags.h - * Description: Header file for commandline flag parsing. - * Author: Ranjith Unnikrishnan - * Created: July 2013 - * - * (C) Copyright 2013, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ -#ifndef TESSERACT_TRAINING_COMMANDLINEFLAGS_H_ -#define TESSERACT_TRAINING_COMMANDLINEFLAGS_H_ - -#ifndef GOOGLE_TESSERACT - -#include -#include "tprintf.h" -#include "params.h" - -#define INT_PARAM_FLAG(name, val, comment) \ - INT_VAR(FLAGS_##name, val, comment) -#define DECLARE_INT_PARAM_FLAG(name) \ - extern INT_VAR_H(FLAGS_##name, 0, "") -#define DOUBLE_PARAM_FLAG(name, val, comment) \ - double_VAR(FLAGS_##name, val, comment) -#define DECLARE_DOUBLE_PARAM_FLAG(name) \ - extern double_VAR_H(FLAGS_##name, "", "") -#define BOOL_PARAM_FLAG(name, val, comment) \ - BOOL_VAR(FLAGS_##name, val, comment) -#define DECLARE_BOOL_PARAM_FLAG(name) \ - extern BOOL_VAR_H(FLAGS_##name, 0, "") -#define STRING_PARAM_FLAG(name, val, comment) \ - STRING_VAR(FLAGS_##name, val, comment) -#define DECLARE_STRING_PARAM_FLAG(name) \ - extern STRING_VAR_H(FLAGS_##name, "", "") - -#else - -#include "base/commandlineflags.h" -#define INT_PARAM_FLAG(name, val, comment) \ - DEFINE_int32(name, val, comment) -#define DECLARE_INT_PARAM_FLAG(name) \ - DECLARE_int32(name) -#define DOUBLE_PARAM_FLAG(name, val, comment) \ - DEFINE_double(name, val, comment) -#define DECLARE_DOUBLE_PARAM_FLAG(name) \ - DECLARE_double(name) -#define BOOL_PARAM_FLAG(name, val, comment) \ - DEFINE_bool(name, val, comment) -#define DECLARE_BOOL_PARAM_FLAG(name) \ - DECLARE_bool(name) -#define STRING_PARAM_FLAG(name, val, comment) \ - DEFINE_string(name, val, comment) -#define DECLARE_STRING_PARAM_FLAG(name) \ - DECLARE_string(name) - -#endif - -namespace tesseract { - -// Parse commandline flags and values. Prints the usage string and exits on -// input of --help or --version. -// -// If remove_flags is true, the argv pointer is advanced so that (*argv)[1] -// points to the first non-flag argument, (*argv)[0] points to the same string -// as before, and argc is decremented to reflect the new shorter length of argv. -// eg. If the input *argv is -// { "program", "--foo=4", "--bar=true", "file1", "file2" } with *argc = 5, the -// output *argv is { "program", "file1", "file2" } with *argc = 3 -void ParseCommandLineFlags(const char* usage, int* argc, - char*** argv, const bool remove_flags); - -} - -#endif // TESSERACT_TRAINING_COMMANDLINEFLAGS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/commontraining.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/commontraining.cpp deleted file mode 100644 index e7a2673e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/commontraining.cpp +++ /dev/null @@ -1,867 +0,0 @@ -// Copyright 2008 Google Inc. All Rights Reserved. -// Author: scharron@google.com (Samuel Charron) -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "commontraining.h" - -#ifdef DISABLED_LEGACY_ENGINE - -#include -#include - -#include "params.h" -#include "tessopt.h" -#include "tprintf.h" - - -INT_PARAM_FLAG(debug_level, 0, "Level of Trainer debugging"); -INT_PARAM_FLAG(load_images, 0, "Load images with tr files"); -STRING_PARAM_FLAG(configfile, "", "File to load more configs from"); -STRING_PARAM_FLAG(D, "", "Directory to write output files to"); -STRING_PARAM_FLAG(F, "font_properties", "File listing font properties"); -STRING_PARAM_FLAG(X, "", "File listing font xheights"); -STRING_PARAM_FLAG(U, "unicharset", "File to load unicharset from"); -STRING_PARAM_FLAG(O, "", "File to write unicharset to"); -STRING_PARAM_FLAG(output_trainer, "", "File to write trainer to"); -STRING_PARAM_FLAG(test_ch, "", "UTF8 test character string"); - - -/** - * This routine parses the command line arguments that were - * passed to the program and uses them to set relevant - * training-related global parameters. - * - * Globals: - * - Config current clustering parameters - * @param argc number of command line arguments to parse - * @param argv command line arguments - * @return none - * @note Exceptions: Illegal options terminate the program. - */ -void ParseArguments(int* argc, char ***argv) { - STRING usage; - if (*argc) { - usage += (*argv)[0]; - usage += " -v | --version | "; - usage += (*argv)[0]; - } - usage += " [.tr files ...]"; - tesseract::ParseCommandLineFlags(usage.c_str(), argc, argv, true); -} - -#else - -#include -#include - -#include "allheaders.h" -#include "ccutil.h" -#include "classify.h" -#include "cluster.h" -#include "clusttool.h" -#include "emalloc.h" -#include "featdefs.h" -#include "fontinfo.h" -#include "globals.h" -#include "intfeaturespace.h" -#include "mastertrainer.h" -#include "mf.h" -#include "oldlist.h" -#include "params.h" -#include "shapetable.h" -#include "tessdatamanager.h" -#include "tessopt.h" -#include "tprintf.h" -#include "unicity_table.h" - -using tesseract::CCUtil; -using tesseract::IntFeatureSpace; -using tesseract::ParamUtils; -using tesseract::ShapeTable; - -// Global Variables. - -// global variable to hold configuration parameters to control clustering -// -M 0.625 -B 0.05 -I 1.0 -C 1e-6. -CLUSTERCONFIG Config = { elliptical, 0.625, 0.05, 1.0, 1e-6, 0 }; -FEATURE_DEFS_STRUCT feature_defs; -CCUtil ccutil; - -INT_PARAM_FLAG(debug_level, 0, "Level of Trainer debugging"); -INT_PARAM_FLAG(load_images, 0, "Load images with tr files"); -STRING_PARAM_FLAG(configfile, "", "File to load more configs from"); -STRING_PARAM_FLAG(D, "", "Directory to write output files to"); -STRING_PARAM_FLAG(F, "font_properties", "File listing font properties"); -STRING_PARAM_FLAG(X, "", "File listing font xheights"); -STRING_PARAM_FLAG(U, "unicharset", "File to load unicharset from"); -STRING_PARAM_FLAG(O, "", "File to write unicharset to"); -STRING_PARAM_FLAG(output_trainer, "", "File to write trainer to"); -STRING_PARAM_FLAG(test_ch, "", "UTF8 test character string"); -DOUBLE_PARAM_FLAG(clusterconfig_min_samples_fraction, Config.MinSamples, - "Min number of samples per proto as % of total"); -DOUBLE_PARAM_FLAG(clusterconfig_max_illegal, Config.MaxIllegal, - "Max percentage of samples in a cluster which have more" - " than 1 feature in that cluster"); -DOUBLE_PARAM_FLAG(clusterconfig_independence, Config.Independence, - "Desired independence between dimensions"); -DOUBLE_PARAM_FLAG(clusterconfig_confidence, Config.Confidence, - "Desired confidence in prototypes created"); - -/** - * This routine parses the command line arguments that were - * passed to the program and uses them to set relevant - * training-related global parameters. - * - * Globals: - * - Config current clustering parameters - * @param argc number of command line arguments to parse - * @param argv command line arguments - * @return none - */ -void ParseArguments(int* argc, char ***argv) { - STRING usage; - if (*argc) { - usage += (*argv)[0]; - usage += " -v | --version | "; - usage += (*argv)[0]; - } - usage += " [.tr files ...]"; - tesseract::ParseCommandLineFlags(usage.c_str(), argc, argv, true); - // Record the index of the first non-flag argument to 1, since we set - // remove_flags to true when parsing the flags. - tessoptind = 1; - // Set some global values based on the flags. - Config.MinSamples = - std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_min_samples_fraction))); - Config.MaxIllegal = - std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_max_illegal))); - Config.Independence = - std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_independence))); - Config.Confidence = - std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_confidence))); - // Set additional parameters from config file if specified. - if (!FLAGS_configfile.empty()) { - tesseract::ParamUtils::ReadParamsFile( - FLAGS_configfile.c_str(), - tesseract::SET_PARAM_CONSTRAINT_NON_INIT_ONLY, - ccutil.params()); - } -} - -namespace tesseract { -// Helper loads shape table from the given file. -ShapeTable* LoadShapeTable(const STRING& file_prefix) { - ShapeTable* shape_table = nullptr; - STRING shape_table_file = file_prefix; - shape_table_file += kShapeTableFileSuffix; - TFile shape_fp; - if (shape_fp.Open(shape_table_file.string(), nullptr)) { - shape_table = new ShapeTable; - if (!shape_table->DeSerialize(&shape_fp)) { - delete shape_table; - shape_table = nullptr; - tprintf("Error: Failed to read shape table %s\n", - shape_table_file.string()); - } else { - int num_shapes = shape_table->NumShapes(); - tprintf("Read shape table %s of %d shapes\n", - shape_table_file.string(), num_shapes); - } - } else { - tprintf("Warning: No shape table file present: %s\n", - shape_table_file.string()); - } - return shape_table; -} - -// Helper to write the shape_table. -void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table) { - STRING shape_table_file = file_prefix; - shape_table_file += kShapeTableFileSuffix; - FILE* fp = fopen(shape_table_file.string(), "wb"); - if (fp != nullptr) { - if (!shape_table.Serialize(fp)) { - fprintf(stderr, "Error writing shape table: %s\n", - shape_table_file.string()); - } - fclose(fp); - } else { - fprintf(stderr, "Error creating shape table: %s\n", - shape_table_file.string()); - } -} - -/** - * Creates a MasterTrainer and loads the training data into it: - * Initializes feature_defs and IntegerFX. - * Loads the shape_table if shape_table != nullptr. - * Loads initial unicharset from -U command-line option. - * If FLAGS_T is set, loads the majority of data from there, else: - * - Loads font info from -F option. - * - Loads xheights from -X option. - * - Loads samples from .tr files in remaining command-line args. - * - Deletes outliers and computes canonical samples. - * - If FLAGS_output_trainer is set, saves the trainer for future use. - * TODO: Who uses that? There is currently no code which reads it. - * Computes canonical and cloud features. - * If shape_table is not nullptr, but failed to load, make a fake flat one, - * as shape clustering was not run. - */ -MasterTrainer* LoadTrainingData(int argc, const char* const * argv, - bool replication, - ShapeTable** shape_table, - STRING* file_prefix) { - InitFeatureDefs(&feature_defs); - InitIntegerFX(); - *file_prefix = ""; - if (!FLAGS_D.empty()) { - *file_prefix += FLAGS_D.c_str(); - *file_prefix += "/"; - } - // If we are shape clustering (nullptr shape_table) or we successfully load - // a shape_table written by a previous shape clustering, then - // shape_analysis will be true, meaning that the MasterTrainer will replace - // some members of the unicharset with their fragments. - bool shape_analysis = false; - if (shape_table != nullptr) { - *shape_table = LoadShapeTable(*file_prefix); - if (*shape_table != nullptr) shape_analysis = true; - } else { - shape_analysis = true; - } - MasterTrainer* trainer = new MasterTrainer(NM_CHAR_ANISOTROPIC, - shape_analysis, - replication, - FLAGS_debug_level); - IntFeatureSpace fs; - fs.Init(kBoostXYBuckets, kBoostXYBuckets, kBoostDirBuckets); - trainer->LoadUnicharset(FLAGS_U.c_str()); - // Get basic font information from font_properties. - if (!FLAGS_F.empty()) { - if (!trainer->LoadFontInfo(FLAGS_F.c_str())) { - delete trainer; - return nullptr; - } - } - if (!FLAGS_X.empty()) { - if (!trainer->LoadXHeights(FLAGS_X.c_str())) { - delete trainer; - return nullptr; - } - } - trainer->SetFeatureSpace(fs); - const char* page_name; - // Load training data from .tr files on the command line. - while ((page_name = GetNextFilename(argc, argv)) != nullptr) { - tprintf("Reading %s ...\n", page_name); - trainer->ReadTrainingSamples(page_name, feature_defs, false); - - // If there is a file with [lang].[fontname].exp[num].fontinfo present, - // read font spacing information in to fontinfo_table. - int pagename_len = strlen(page_name); - char* fontinfo_file_name = new char[pagename_len + 7]; - strncpy(fontinfo_file_name, page_name, pagename_len - 2); // remove "tr" - strcpy(fontinfo_file_name + pagename_len - 2, "fontinfo"); // +"fontinfo" - trainer->AddSpacingInfo(fontinfo_file_name); - delete[] fontinfo_file_name; - - // Load the images into memory if required by the classifier. - if (FLAGS_load_images) { - STRING image_name = page_name; - // Chop off the tr and replace with tif. Extension must be tif! - image_name.truncate_at(image_name.length() - 2); - image_name += "tif"; - trainer->LoadPageImages(image_name.string()); - } - } - trainer->PostLoadCleanup(); - // Write the master trainer if required. - if (!FLAGS_output_trainer.empty()) { - FILE* fp = fopen(FLAGS_output_trainer.c_str(), "wb"); - if (fp == nullptr) { - tprintf("Can't create saved trainer data!\n"); - } else { - trainer->Serialize(fp); - fclose(fp); - } - } - trainer->PreTrainingSetup(); - if (!FLAGS_O.empty() && - !trainer->unicharset().save_to_file(FLAGS_O.c_str())) { - fprintf(stderr, "Failed to save unicharset to file %s\n", FLAGS_O.c_str()); - delete trainer; - return nullptr; - } - if (shape_table != nullptr) { - // If we previously failed to load a shapetable, then shape clustering - // wasn't run so make a flat one now. - if (*shape_table == nullptr) { - *shape_table = new ShapeTable; - trainer->SetupFlatShapeTable(*shape_table); - tprintf("Flat shape table summary: %s\n", - (*shape_table)->SummaryStr().string()); - } - (*shape_table)->set_unicharset(trainer->unicharset()); - } - return trainer; -} - -} // namespace tesseract. - -/*---------------------------------------------------------------------------*/ -/** - * This routine returns the next command line argument. If - * there are no remaining command line arguments, it returns - * nullptr. This routine should only be called after all option - * arguments have been parsed and removed with ParseArguments. - * - * Globals: - * - tessoptind defined by tessopt sys call - * @return Next command line argument or nullptr. - */ -const char *GetNextFilename(int argc, const char* const * argv) { - if (tessoptind < argc) - return argv[tessoptind++]; - else - return nullptr; -} /* GetNextFilename */ - -/*---------------------------------------------------------------------------*/ -/** - * This routine searches through a list of labeled lists to find - * a list with the specified label. If a matching labeled list - * cannot be found, nullptr is returned. - * @param List list to search - * @param Label label to search for - * @return Labeled list with the specified label or nullptr. - * @note Globals: none - */ -LABELEDLIST FindList(LIST List, char* Label) { - LABELEDLIST LabeledList; - - iterate (List) - { - LabeledList = (LABELEDLIST) first_node (List); - if (strcmp (LabeledList->Label, Label) == 0) - return (LabeledList); - } - return (nullptr); - -} /* FindList */ - -/*---------------------------------------------------------------------------*/ -/** - * This routine allocates a new, empty labeled list and gives - * it the specified label. - * @param Label label for new list - * @return New, empty labeled list. - * @note Globals: none - */ -LABELEDLIST NewLabeledList(const char* Label) { - LABELEDLIST LabeledList; - - LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE)); - LabeledList->Label = (char*)Emalloc (strlen (Label)+1); - strcpy (LabeledList->Label, Label); - LabeledList->List = NIL_LIST; - LabeledList->SampleCount = 0; - LabeledList->font_sample_count = 0; - return (LabeledList); - -} /* NewLabeledList */ - -/*---------------------------------------------------------------------------*/ -// TODO(rays) This is now used only by cntraining. Convert cntraining to use -// the new method or get rid of it entirely. -/** - * This routine reads training samples from a file and - * places them into a data structure which organizes the - * samples by FontName and CharName. It then returns this - * data structure. - * @param file open text file to read samples from - * @param feature_definitions - * @param feature_name - * @param max_samples - * @param unicharset - * @param training_samples - * @return none - * @note Globals: none - */ -void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_definitions, - const char *feature_name, int max_samples, - UNICHARSET* unicharset, - FILE* file, LIST* training_samples) { - char buffer[2048]; - char unichar[UNICHAR_LEN + 1]; - LABELEDLIST char_sample; - FEATURE_SET feature_samples; - CHAR_DESC char_desc; - uint32_t feature_type = - ShortNameToFeatureType(feature_definitions, feature_name); - - // Zero out the font_sample_count for all the classes. - LIST it = *training_samples; - iterate(it) { - char_sample = reinterpret_cast(first_node(it)); - char_sample->font_sample_count = 0; - } - - while (fgets(buffer, 2048, file) != nullptr) { - if (buffer[0] == '\n') - continue; - - sscanf(buffer, "%*s %s", unichar); - if (unicharset != nullptr && !unicharset->contains_unichar(unichar)) { - unicharset->unichar_insert(unichar); - if (unicharset->size() > MAX_NUM_CLASSES) { - tprintf("Error: Size of unicharset in training is " - "greater than MAX_NUM_CLASSES\n"); - exit(1); - } - } - char_sample = FindList(*training_samples, unichar); - if (char_sample == nullptr) { - char_sample = NewLabeledList(unichar); - *training_samples = push(*training_samples, char_sample); - } - char_desc = ReadCharDescription(feature_definitions, file); - feature_samples = char_desc->FeatureSets[feature_type]; - if (char_sample->font_sample_count < max_samples || max_samples <= 0) { - char_sample->List = push(char_sample->List, feature_samples); - char_sample->SampleCount++; - char_sample->font_sample_count++; - } else { - FreeFeatureSet(feature_samples); - } - for (size_t i = 0; i < char_desc->NumFeatureSets; i++) { - if (feature_type != i) - FreeFeatureSet(char_desc->FeatureSets[i]); - } - free(char_desc); - } -} // ReadTrainingSamples - - -/*---------------------------------------------------------------------------*/ -/** - * This routine deallocates all of the space allocated to - * the specified list of training samples. - * @param CharList list of all fonts in document - * @return none - * @note Globals: none - */ -void FreeTrainingSamples(LIST CharList) { - LABELEDLIST char_sample; - FEATURE_SET FeatureSet; - LIST FeatureList; - - LIST nodes = CharList; - iterate(CharList) { /* iterate through all of the fonts */ - char_sample = (LABELEDLIST) first_node(CharList); - FeatureList = char_sample->List; - iterate(FeatureList) { /* iterate through all of the classes */ - FeatureSet = (FEATURE_SET) first_node(FeatureList); - FreeFeatureSet(FeatureSet); - } - FreeLabeledList(char_sample); - } - destroy(nodes); -} /* FreeTrainingSamples */ - -/*---------------------------------------------------------------------------*/ -/** - * This routine deallocates all of the memory consumed by - * a labeled list. It does not free any memory which may be - * consumed by the items in the list. - * @param LabeledList labeled list to be freed - * @note Globals: none - * @return none - */ -void FreeLabeledList(LABELEDLIST LabeledList) { - destroy(LabeledList->List); - free(LabeledList->Label); - free(LabeledList); -} /* FreeLabeledList */ - -/*---------------------------------------------------------------------------*/ -/** - * This routine reads samples from a LABELEDLIST and enters - * those samples into a clusterer data structure. This - * data structure is then returned to the caller. - * @param char_sample: LABELEDLIST that holds all the feature information for a - * @param FeatureDefs - * @param program_feature_type - * given character. - * @return Pointer to new clusterer data structure. - * @note Globals: None - */ -CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs, - LABELEDLIST char_sample, - const char* program_feature_type) { - uint16_t N; - int i, j; - float* Sample = nullptr; - CLUSTERER *Clusterer; - int32_t CharID; - LIST FeatureList = nullptr; - FEATURE_SET FeatureSet = nullptr; - - int32_t desc_index = - ShortNameToFeatureType(FeatureDefs, program_feature_type); - N = FeatureDefs.FeatureDesc[desc_index]->NumParams; - Clusterer = MakeClusterer(N, FeatureDefs.FeatureDesc[desc_index]->ParamDesc); - - FeatureList = char_sample->List; - CharID = 0; - iterate(FeatureList) { - FeatureSet = (FEATURE_SET) first_node(FeatureList); - for (i = 0; i < FeatureSet->MaxNumFeatures; i++) { - if (Sample == nullptr) Sample = (float*)Emalloc(N * sizeof(float)); - for (j = 0; j < N; j++) - Sample[j] = FeatureSet->Features[i]->Params[j]; - MakeSample (Clusterer, Sample, CharID); - } - CharID++; - } - free(Sample); - return Clusterer; - -} /* SetUpForClustering */ - -/*------------------------------------------------------------------------*/ -void MergeInsignificantProtos(LIST ProtoList, const char* label, - CLUSTERER* Clusterer, - CLUSTERCONFIG* clusterconfig) { - PROTOTYPE* Prototype; - bool debug = strcmp(FLAGS_test_ch.c_str(), label) == 0; - - LIST pProtoList = ProtoList; - iterate(pProtoList) { - Prototype = (PROTOTYPE *) first_node (pProtoList); - if (Prototype->Significant || Prototype->Merged) - continue; - float best_dist = 0.125; - PROTOTYPE* best_match = nullptr; - // Find the nearest alive prototype. - LIST list_it = ProtoList; - iterate(list_it) { - PROTOTYPE* test_p = (PROTOTYPE *) first_node (list_it); - if (test_p != Prototype && !test_p->Merged) { - float dist = ComputeDistance(Clusterer->SampleSize, - Clusterer->ParamDesc, - Prototype->Mean, test_p->Mean); - if (dist < best_dist) { - best_match = test_p; - best_dist = dist; - } - } - } - if (best_match != nullptr && !best_match->Significant) { - if (debug) - tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n", - best_match->NumSamples, Prototype->NumSamples, - best_match->Mean[0], best_match->Mean[1], - Prototype->Mean[0], Prototype->Mean[1]); - best_match->NumSamples = MergeClusters(Clusterer->SampleSize, - Clusterer->ParamDesc, - best_match->NumSamples, - Prototype->NumSamples, - best_match->Mean, - best_match->Mean, Prototype->Mean); - Prototype->NumSamples = 0; - Prototype->Merged = 1; - } else if (best_match != nullptr) { - if (debug) - tprintf("Red proto at %g,%g matched a green one at %g,%g\n", - Prototype->Mean[0], Prototype->Mean[1], - best_match->Mean[0], best_match->Mean[1]); - Prototype->Merged = 1; - } - } - // Mark significant those that now have enough samples. - int min_samples = - static_cast(clusterconfig->MinSamples * Clusterer->NumChar); - pProtoList = ProtoList; - iterate(pProtoList) { - Prototype = (PROTOTYPE *) first_node (pProtoList); - // Process insignificant protos that do not match a green one - if (!Prototype->Significant && Prototype->NumSamples >= min_samples && - !Prototype->Merged) { - if (debug) - tprintf("Red proto at %g,%g becoming green\n", - Prototype->Mean[0], Prototype->Mean[1]); - Prototype->Significant = true; - } - } -} /* MergeInsignificantProtos */ - -/*-----------------------------------------------------------------------------*/ -void CleanUpUnusedData( - LIST ProtoList) -{ - PROTOTYPE* Prototype; - - iterate(ProtoList) - { - Prototype = (PROTOTYPE *) first_node (ProtoList); - free(Prototype->Variance.Elliptical); - Prototype->Variance.Elliptical = nullptr; - free(Prototype->Magnitude.Elliptical); - Prototype->Magnitude.Elliptical = nullptr; - free(Prototype->Weight.Elliptical); - Prototype->Weight.Elliptical = nullptr; - } -} - -/*------------------------------------------------------------------------*/ -LIST RemoveInsignificantProtos( - LIST ProtoList, - bool KeepSigProtos, - bool KeepInsigProtos, - int N) - -{ - LIST NewProtoList = NIL_LIST; - LIST pProtoList; - PROTOTYPE* Proto; - PROTOTYPE* NewProto; - int i; - - pProtoList = ProtoList; - iterate(pProtoList) - { - Proto = (PROTOTYPE *) first_node (pProtoList); - if ((Proto->Significant && KeepSigProtos) || - (!Proto->Significant && KeepInsigProtos)) - { - NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE)); - - NewProto->Mean = (float *)Emalloc(N * sizeof(float)); - NewProto->Significant = Proto->Significant; - NewProto->Style = Proto->Style; - NewProto->NumSamples = Proto->NumSamples; - NewProto->Cluster = nullptr; - NewProto->Distrib = nullptr; - - for (i=0; i < N; i++) - NewProto->Mean[i] = Proto->Mean[i]; - if (Proto->Variance.Elliptical != nullptr) { - NewProto->Variance.Elliptical = (float *)Emalloc(N * sizeof(float)); - for (i=0; i < N; i++) - NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i]; - } - else - NewProto->Variance.Elliptical = nullptr; - //--------------------------------------------- - if (Proto->Magnitude.Elliptical != nullptr) { - NewProto->Magnitude.Elliptical = (float *)Emalloc(N * sizeof(float)); - for (i=0; i < N; i++) - NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i]; - } - else - NewProto->Magnitude.Elliptical = nullptr; - //------------------------------------------------ - if (Proto->Weight.Elliptical != nullptr) { - NewProto->Weight.Elliptical = (float *)Emalloc(N * sizeof(float)); - for (i=0; i < N; i++) - NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i]; - } - else - NewProto->Weight.Elliptical = nullptr; - - NewProto->TotalMagnitude = Proto->TotalMagnitude; - NewProto->LogMagnitude = Proto->LogMagnitude; - NewProtoList = push_last(NewProtoList, NewProto); - } - } - FreeProtoList(&ProtoList); - return (NewProtoList); -} /* RemoveInsignificantProtos */ - -/*----------------------------------------------------------------------------*/ -MERGE_CLASS FindClass(LIST List, const char* Label) { - MERGE_CLASS MergeClass; - - iterate (List) - { - MergeClass = (MERGE_CLASS) first_node (List); - if (strcmp (MergeClass->Label, Label) == 0) - return (MergeClass); - } - return (nullptr); - -} /* FindClass */ - -/*---------------------------------------------------------------------------*/ -MERGE_CLASS NewLabeledClass(const char* Label) { - MERGE_CLASS MergeClass; - - MergeClass = new MERGE_CLASS_NODE; - MergeClass->Label = (char*)Emalloc (strlen (Label)+1); - strcpy (MergeClass->Label, Label); - MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS); - return (MergeClass); - -} /* NewLabeledClass */ - -/*-----------------------------------------------------------------------------*/ -/** - * This routine deallocates all of the space allocated to - * the specified list of training samples. - * @param ClassList list of all fonts in document - * @return none - * @note Globals: none - */ -void FreeLabeledClassList(LIST ClassList) { - MERGE_CLASS MergeClass; - - LIST nodes = ClassList; - iterate(ClassList) /* iterate through all of the fonts */ - { - MergeClass = (MERGE_CLASS) first_node (ClassList); - free (MergeClass->Label); - FreeClass(MergeClass->Class); - delete MergeClass; - } - destroy(nodes); - -} /* FreeLabeledClassList */ - -/* SetUpForFloat2Int */ -CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset, - LIST LabeledClassList) { - MERGE_CLASS MergeClass; - CLASS_TYPE Class; - int NumProtos; - int NumConfigs; - int NumWords; - int i, j; - float Values[3]; - PROTO NewProto; - PROTO OldProto; - BIT_VECTOR NewConfig; - BIT_VECTOR OldConfig; - - // printf("Float2Int ...\n"); - - CLASS_STRUCT* float_classes = new CLASS_STRUCT[unicharset.size()]; - iterate(LabeledClassList) - { - UnicityTableEqEq font_set; - MergeClass = (MERGE_CLASS) first_node (LabeledClassList); - Class = &float_classes[unicharset.unichar_to_id(MergeClass->Label)]; - NumProtos = MergeClass->Class->NumProtos; - NumConfigs = MergeClass->Class->NumConfigs; - font_set.move(&MergeClass->Class->font_set); - Class->NumProtos = NumProtos; - Class->MaxNumProtos = NumProtos; - Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos); - for(i=0; i < NumProtos; i++) - { - NewProto = ProtoIn(Class, i); - OldProto = ProtoIn(MergeClass->Class, i); - Values[0] = OldProto->X; - Values[1] = OldProto->Y; - Values[2] = OldProto->Angle; - Normalize(Values); - NewProto->X = OldProto->X; - NewProto->Y = OldProto->Y; - NewProto->Length = OldProto->Length; - NewProto->Angle = OldProto->Angle; - NewProto->A = Values[0]; - NewProto->B = Values[1]; - NewProto->C = Values[2]; - } - - Class->NumConfigs = NumConfigs; - Class->MaxNumConfigs = NumConfigs; - Class->font_set.move(&font_set); - Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs); - NumWords = WordsInVectorOfSize(NumProtos); - for(i=0; i < NumConfigs; i++) - { - NewConfig = NewBitVector(NumProtos); - OldConfig = MergeClass->Class->Configurations[i]; - for(j=0; j < NumWords; j++) - NewConfig[j] = OldConfig[j]; - Class->Configurations[i] = NewConfig; - } - } - return float_classes; -} // SetUpForFloat2Int - -/*--------------------------------------------------------------------------*/ -void Normalize ( - float *Values) -{ - float Slope; - float Intercept; - float Normalizer; - - Slope = tan(Values [2] * 2 * M_PI); - Intercept = Values [1] - Slope * Values [0]; - Normalizer = 1 / sqrt (Slope * Slope + 1.0); - - Values [0] = Slope * Normalizer; - Values [1] = - Normalizer; - Values [2] = Intercept * Normalizer; -} // Normalize - -/*-------------------------------------------------------------------------*/ -void FreeNormProtoList(LIST CharList) - -{ - LABELEDLIST char_sample; - - LIST nodes = CharList; - iterate(CharList) /* iterate through all of the fonts */ - { - char_sample = (LABELEDLIST) first_node (CharList); - FreeLabeledList (char_sample); - } - destroy(nodes); - -} // FreeNormProtoList - -/*---------------------------------------------------------------------------*/ -void AddToNormProtosList( - LIST* NormProtoList, - LIST ProtoList, - char* CharName) -{ - PROTOTYPE* Proto; - LABELEDLIST LabeledProtoList; - - LabeledProtoList = NewLabeledList(CharName); - iterate(ProtoList) - { - Proto = (PROTOTYPE *) first_node (ProtoList); - LabeledProtoList->List = push(LabeledProtoList->List, Proto); - } - *NormProtoList = push(*NormProtoList, LabeledProtoList); -} - -/*---------------------------------------------------------------------------*/ -int NumberOfProtos(LIST ProtoList, bool CountSigProtos, - bool CountInsigProtos) { - int N = 0; - iterate(ProtoList) - { - PROTOTYPE* Proto = (PROTOTYPE*)first_node(ProtoList); - if ((Proto->Significant && CountSigProtos) || - (!Proto->Significant && CountInsigProtos)) - N++; - } - return(N); -} - -#endif // def DISABLED_LEGACY_ENGINE diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/commontraining.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/commontraining.h deleted file mode 100644 index 480026da..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/commontraining.h +++ /dev/null @@ -1,224 +0,0 @@ -// Copyright 2008 Google Inc. All Rights Reserved. -// Author: scharron@google.com (Samuel Charron) -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TESSERACT_TRAINING_COMMONTRAINING_H_ -#define TESSERACT_TRAINING_COMMONTRAINING_H_ - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#include "baseapi.h" -#endif - -#ifdef DISABLED_LEGACY_ENGINE - -#include "tprintf.h" -#include "commandlineflags.h" - - -void ParseArguments(int* argc, char*** argv); - - -namespace tesseract { - -// Check whether the shared tesseract library is the right one. -// This function must be inline because otherwise it would be part of -// the shared library, so it could not compare the versions. -static inline void CheckSharedLibraryVersion() -{ -#ifdef HAVE_CONFIG_H - if (!!strcmp(TESSERACT_VERSION_STR, TessBaseAPI::Version())) { - tprintf("ERROR: shared library version mismatch (was %s, expected %s\n" - "Did you use a wrong shared tesseract library?\n", - TessBaseAPI::Version(), TESSERACT_VERSION_STR); - exit(1); - } -#endif -} - -} // namespace tesseract - - -#else - -#include "cluster.h" -#include "commandlineflags.h" -#include "featdefs.h" -#include "intproto.h" -#include "oldlist.h" - -namespace tesseract { -class Classify; -class MasterTrainer; -class ShapeTable; -} - -////////////////////////////////////////////////////////////////////////////// -// Globals /////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////// - -extern FEATURE_DEFS_STRUCT feature_defs; - -// Must be defined in the file that "implements" commonTraining facilities. -extern CLUSTERCONFIG Config; - -////////////////////////////////////////////////////////////////////////////// -// Structs /////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////// -typedef struct -{ - char *Label; - int SampleCount; - int font_sample_count; - LIST List; -} -LABELEDLISTNODE, *LABELEDLIST; - -typedef struct -{ - char* Label; - int NumMerged[MAX_NUM_PROTOS]; - CLASS_TYPE Class; -}MERGE_CLASS_NODE; -using MERGE_CLASS = MERGE_CLASS_NODE*; - - -////////////////////////////////////////////////////////////////////////////// -// Functions ///////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////// -void ParseArguments(int* argc, char*** argv); - -namespace tesseract { - -// Check whether the shared tesseract library is the right one. -// This function must be inline because otherwise it would be part of -// the shared library, so it could not compare the versions. -static inline void CheckSharedLibraryVersion() -{ -#ifdef HAVE_CONFIG_H - if (!!strcmp(TESSERACT_VERSION_STR, TessBaseAPI::Version())) { - tprintf("ERROR: shared library version mismatch (was %s, expected %s\n" - "Did you use a wrong shared tesseract library?\n", - TessBaseAPI::Version(), TESSERACT_VERSION_STR); - exit(1); - } -#endif -} - -// Helper loads shape table from the given file. -ShapeTable* LoadShapeTable(const STRING& file_prefix); -// Helper to write the shape_table. -void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table); - -// Creates a MasterTraininer and loads the training data into it: -// Initializes feature_defs and IntegerFX. -// Loads the shape_table if shape_table != nullptr. -// Loads initial unicharset from -U command-line option. -// If FLAGS_input_trainer is set, loads the majority of data from there, else: -// Loads font info from -F option. -// Loads xheights from -X option. -// Loads samples from .tr files in remaining command-line args. -// Deletes outliers and computes canonical samples. -// If FLAGS_output_trainer is set, saves the trainer for future use. -// Computes canonical and cloud features. -// If shape_table is not nullptr, but failed to load, make a fake flat one, -// as shape clustering was not run. -MasterTrainer* LoadTrainingData(int argc, const char* const * argv, - bool replication, - ShapeTable** shape_table, - STRING* file_prefix); -} // namespace tesseract. - -const char *GetNextFilename(int argc, const char* const * argv); - -LABELEDLIST FindList( - LIST List, - char *Label); - -LABELEDLIST NewLabeledList( - const char *Label); - -void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs, - const char *feature_name, int max_samples, - UNICHARSET* unicharset, - FILE* file, LIST* training_samples); - -void WriteTrainingSamples( - const FEATURE_DEFS_STRUCT &FeatureDefs, - char *Directory, - LIST CharList, - const char *program_feature_type); - -void FreeTrainingSamples( - LIST CharList); - -void FreeLabeledList( - LABELEDLIST LabeledList); - -void FreeLabeledClassList( - LIST ClassListList); - -CLUSTERER *SetUpForClustering( - const FEATURE_DEFS_STRUCT &FeatureDefs, - LABELEDLIST CharSample, - const char *program_feature_type); - -LIST RemoveInsignificantProtos( - LIST ProtoList, - bool KeepSigProtos, - bool KeepInsigProtos, - int N); - -void CleanUpUnusedData( - LIST ProtoList); - -void MergeInsignificantProtos( - LIST ProtoList, - const char *label, - CLUSTERER *Clusterer, - CLUSTERCONFIG *Config); - -MERGE_CLASS FindClass( - LIST List, - const char *Label); - -MERGE_CLASS NewLabeledClass( - const char *Label); - -void FreeTrainingSamples( - LIST CharList); - -CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset, - LIST LabeledClassList); - -void Normalize( - float *Values); - -void FreeNormProtoList( - LIST CharList); - -void AddToNormProtosList( - LIST* NormProtoList, - LIST ProtoList, - char *CharName); - -int NumberOfProtos( - LIST ProtoList, - bool CountSigProtos, - bool CountInsigProtos); - - -void allocNormProtos(); - -#endif // def DISABLED_LEGACY_ENGINE - -#endif // TESSERACT_TRAINING_COMMONTRAINING_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/dawg2wordlist.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/dawg2wordlist.cpp deleted file mode 100644 index def4d784..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/dawg2wordlist.cpp +++ /dev/null @@ -1,101 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: dawg2wordlist.cpp -// Description: Program to create a word list from a DAWG and unicharset. -// Author: David Eger -// Created: Thu 22 Dec 2011 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "commontraining.h" // CheckSharedLibraryVersion -#include "dawg.h" -#include "host.h" -#include "serialis.h" -#include "tesscallback.h" -#include "trie.h" -#include "unicharset.h" - -static tesseract::Dawg *LoadSquishedDawg(const UNICHARSET &unicharset, - const char *filename) { - const int kDictDebugLevel = 1; - tesseract::TFile dawg_file; - if (!dawg_file.Open(filename, nullptr)) { - tprintf("Could not open %s for reading.\n", filename); - return nullptr; - } - tprintf("Loading word list from %s\n", filename); - tesseract::SquishedDawg *retval = new tesseract::SquishedDawg( - tesseract::DAWG_TYPE_WORD, "eng", SYSTEM_DAWG_PERM, kDictDebugLevel); - if (!retval->Load(&dawg_file)) { - tprintf("Could not read %s\n", filename); - delete retval; - return nullptr; - } - tprintf("Word list loaded.\n"); - return retval; -} - -class WordOutputter { - public: - WordOutputter(FILE *file) : file_(file) {} - void output_word(const char *word) { fprintf(file_, "%s\n", word); } - private: - FILE *file_; -}; - -// returns 0 if successful. -static int WriteDawgAsWordlist(const UNICHARSET &unicharset, - const tesseract::Dawg *dawg, - const char *outfile_name) { - FILE *out = fopen(outfile_name, "wb"); - if (out == nullptr) { - tprintf("Could not open %s for writing.\n", outfile_name); - return 1; - } - WordOutputter outputter(out); - TessCallback1 *print_word_cb = - NewPermanentTessCallback(&outputter, &WordOutputter::output_word); - dawg->iterate_words(unicharset, print_word_cb); - delete print_word_cb; - return fclose(out); -} - -int main(int argc, char *argv[]) { - tesseract::CheckSharedLibraryVersion(); - - if (argc > 1 && (!strcmp(argv[1], "-v") || !strcmp(argv[1], "--version"))) { - printf("%s\n", tesseract::TessBaseAPI::Version()); - return 0; - } else if (argc != 4) { - tprintf("Print all the words in a given dawg.\n"); - tprintf("Usage: %s -v | --version | %s \n", - argv[0], argv[0]); - return 1; - } - const char *unicharset_file = argv[1]; - const char *dawg_file = argv[2]; - const char *wordlist_file = argv[3]; - UNICHARSET unicharset; - if (!unicharset.load_from_file(unicharset_file)) { - tprintf("Error loading unicharset from %s.\n", unicharset_file); - return 1; - } - tesseract::Dawg *dict = LoadSquishedDawg(unicharset, dawg_file); - if (dict == nullptr) { - tprintf("Error loading dictionary from %s.\n", dawg_file); - return 1; - } - int retval = WriteDawgAsWordlist(unicharset, dict, wordlist_file); - delete dict; - return retval; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/degradeimage.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/degradeimage.cpp deleted file mode 100644 index 3f449cc7..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/degradeimage.cpp +++ /dev/null @@ -1,310 +0,0 @@ -/********************************************************************** - * File: degradeimage.cpp - * Description: Function to degrade an image (usually of text) as if it - * has been printed and then scanned. - * Authors: Ray Smith - * Created: Tue Nov 19 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -#include "degradeimage.h" - -#include -#include "allheaders.h" // from leptonica -#include "genericvector.h" -#include "helpers.h" // For TRand. -#include "rect.h" - -namespace tesseract { - -// A randomized perspective distortion can be applied to synthetic input. -// The perspective distortion comes from leptonica, which uses 2 sets of 4 -// corners to determine the distortion. There are random values for each of -// the x numbers x0..x3 and y0..y3, except for x2 and x3 which are instead -// defined in terms of a single shear value. This reduces the degrees of -// freedom enough to make the distortion more realistic than it would otherwise -// be if all 8 coordinates could move independently. -// One additional factor is used for the color of the pixels that don't exist -// in the source image. -// Name for each of the randomizing factors. -enum FactorNames { - FN_INCOLOR, - FN_Y0, - FN_Y1, - FN_Y2, - FN_Y3, - FN_X0, - FN_X1, - FN_SHEAR, - // x2 = x1 - shear - // x3 = x0 + shear - FN_NUM_FACTORS -}; - -// Rotation is +/- kRotationRange radians. -const float kRotationRange = 0.02f; -// Number of grey levels to shift by for each exposure step. -const int kExposureFactor = 16; -// Salt and pepper noise is +/- kSaltnPepper. -const int kSaltnPepper = 5; -// Min sum of width + height on which to operate the ramp. -const int kMinRampSize = 1000; - -// Degrade the pix as if by a print/copy/scan cycle with exposure > 0 -// corresponding to darkening on the copier and <0 lighter and 0 not copied. -// Exposures in [-2,2] are most useful, with -3 and 3 being extreme. -// If rotation is nullptr, rotation is skipped. If *rotation is non-zero, the -// pix is rotated by *rotation else it is randomly rotated and *rotation is -// modified. -// -// HOW IT WORKS: -// Most of the process is really dictated by the fact that the minimum -// available convolution is 3X3, which is too big really to simulate a -// good quality print/scan process. (2X2 would be better.) -// 1 pixel wide inputs are heavily smeared by the 3X3 convolution, making the -// images generally biased to being too light, so most of the work is to make -// them darker. 3 levels of thickening/darkening are achieved with 2 dilations, -// (using a greyscale erosion) one heavy (by being before convolution) and one -// light (after convolution). -// With no dilation, after covolution, the images are so light that a heavy -// constant offset is required to make the 0 image look reasonable. A simple -// constant offset multiple of exposure to undo this value is enough to achieve -// all the required lightening. This gives the advantage that exposure level 1 -// with a single dilation gives a good impression of the broken-yet-too-dark -// problem that is often seen in scans. -// A small random rotation gives some varying greyscale values on the edges, -// and some random salt and pepper noise on top helps to realistically jaggy-up -// the edges. -// Finally a greyscale ramp provides a continuum of effects between exposure -// levels. -Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer, - float* rotation) { - Pix* pix = pixConvertTo8(input, false); - pixDestroy(&input); - input = pix; - int width = pixGetWidth(input); - int height = pixGetHeight(input); - if (exposure >= 2) { - // An erosion simulates the spreading darkening of a dark copy. - // This is backwards to binary morphology, - // see http://www.leptonica.com/grayscale-morphology.html - pix = input; - input = pixErodeGray(pix, 3, 3); - pixDestroy(&pix); - } - // A convolution is essential to any mode as no scanner produces an - // image as sharp as the electronic image. - pix = pixBlockconv(input, 1, 1); - pixDestroy(&input); - // A small random rotation helps to make the edges jaggy in a realistic way. - if (rotation != nullptr) { - float radians_clockwise = 0.0f; - if (*rotation) { - radians_clockwise = *rotation; - } else if (randomizer != nullptr) { - radians_clockwise = randomizer->SignedRand(kRotationRange); - } - - input = pixRotate(pix, radians_clockwise, - L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, - 0, 0); - // Rotate the boxes to match. - *rotation = radians_clockwise; - pixDestroy(&pix); - } else { - input = pix; - } - - if (exposure >= 3 || exposure == 1) { - // Erosion after the convolution is not as heavy as before, so it is - // good for level 1 and in addition as a level 3. - // This is backwards to binary morphology, - // see http://www.leptonica.com/grayscale-morphology.html - pix = input; - input = pixErodeGray(pix, 3, 3); - pixDestroy(&pix); - } - // The convolution really needed to be 2x2 to be realistic enough, but - // we only have 3x3, so we have to bias the image darker or lose thin - // strokes. - int erosion_offset = 0; - // For light and 0 exposure, there is no dilation, so compensate for the - // convolution with a big darkening bias which is undone for lighter - // exposures. - if (exposure <= 0) - erosion_offset = -3 * kExposureFactor; - // Add in a general offset of the greyscales for the exposure level so - // a threshold of 128 gives a reasonable binary result. - erosion_offset -= exposure * kExposureFactor; - // Add a gradual fade over the page and a small amount of salt and pepper - // noise to simulate noise in the sensor/paper fibres and varying - // illumination. - l_uint32* data = pixGetData(input); - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - int pixel = GET_DATA_BYTE(data, x); - if (randomizer != nullptr) - pixel += randomizer->IntRand() % (kSaltnPepper*2 + 1) - kSaltnPepper; - if (height + width > kMinRampSize) - pixel -= (2*x + y) * 32 / (height + width); - pixel += erosion_offset; - if (pixel < 0) - pixel = 0; - if (pixel > 255) - pixel = 255; - SET_DATA_BYTE(data, x, pixel); - } - data += input->wpl; - } - return input; -} - -// Creates and returns a Pix distorted by various means according to the bool -// flags. If boxes is not nullptr, the boxes are resized/positioned according to -// any spatial distortion and also by the integer reduction factor box_scale -// so they will match what the network will output. -// Returns nullptr on error. The returned Pix must be pixDestroyed. -Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert, - bool white_noise, bool smooth_noise, bool blur, - int box_reduction, TRand* randomizer, - GenericVector* boxes) { - Pix* distorted = pixCopy(nullptr, const_cast(pix)); - // Things to do to synthetic training data. - if (invert && randomizer->SignedRand(1.0) < 0) - pixInvert(distorted, distorted); - if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) { - // TODO(rays) Cook noise in a more thread-safe manner than rand(). - // Attempt to make the sequences reproducible. - srand(randomizer->IntRand()); - Pix* pixn = pixAddGaussianNoise(distorted, 8.0); - pixDestroy(&distorted); - if (smooth_noise) { - distorted = pixBlockconv(pixn, 1, 1); - pixDestroy(&pixn); - } else { - distorted = pixn; - } - } - if (blur && randomizer->SignedRand(1.0) > 0.0) { - Pix* blurred = pixBlockconv(distorted, 1, 1); - pixDestroy(&distorted); - distorted = blurred; - } - if (perspective) - GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes); - if (boxes != nullptr) { - for (int b = 0; b < boxes->size(); ++b) { - (*boxes)[b].scale(1.0f / box_reduction); - if ((*boxes)[b].width() <= 0) - (*boxes)[b].set_right((*boxes)[b].left() + 1); - } - } - return distorted; -} - -// Distorts anything that has a non-null pointer with the same pseudo-random -// perspective distortion. Width and height only need to be set if there -// is no pix. If there is a pix, then they will be taken from there. -void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer, - Pix** pix, GenericVector* boxes) { - if (pix != nullptr && *pix != nullptr) { - width = pixGetWidth(*pix); - height = pixGetHeight(*pix); - } - float* im_coeffs = nullptr; - float* box_coeffs = nullptr; - l_int32 incolor = - ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs); - if (pix != nullptr && *pix != nullptr) { - // Transform the image. - Pix* transformed = pixProjective(*pix, im_coeffs, incolor); - if (transformed == nullptr) { - tprintf("Projective transformation failed!!\n"); - return; - } - pixDestroy(pix); - *pix = transformed; - } - if (boxes != nullptr) { - // Transform the boxes. - for (int b = 0; b < boxes->size(); ++b) { - int x1, y1, x2, y2; - const TBOX& box = (*boxes)[b]; - projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1, - &y1); - projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(), - &x2, &y2); - TBOX new_box1(x1, height - y2, x2, height - y1); - projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(), - &x1, &y1); - projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2, - &y2); - TBOX new_box2(x1, height - y1, x2, height - y2); - (*boxes)[b] = new_box1.bounding_union(new_box2); - } - } - free(im_coeffs); - free(box_coeffs); -} - -// Computes the coefficients of a randomized projective transformation. -// The image transform requires backward transformation coefficient, and the -// box transform the forward coefficients. -// Returns the incolor arg to pixProjective. -int ProjectiveCoeffs(int width, int height, TRand* randomizer, - float** im_coeffs, float** box_coeffs) { - // Setup "from" points. - Pta* src_pts = ptaCreate(4); - ptaAddPt(src_pts, 0.0f, 0.0f); - ptaAddPt(src_pts, width, 0.0f); - ptaAddPt(src_pts, width, height); - ptaAddPt(src_pts, 0.0f, height); - // Extract factors from pseudo-random sequence. - float factors[FN_NUM_FACTORS]; - float shear = 0.0f; // Shear is signed. - for (int i = 0; i < FN_NUM_FACTORS; ++i) { - // Everything is squared to make wild values rarer. - if (i == FN_SHEAR) { - // Shear is signed. - shear = randomizer->SignedRand(0.5 / 3.0); - shear = shear >= 0.0 ? shear * shear : -shear * shear; - // Keep the sheared points within the original rectangle. - if (shear < -factors[FN_X0]) shear = -factors[FN_X0]; - if (shear > factors[FN_X1]) shear = factors[FN_X1]; - factors[i] = shear; - } else if (i != FN_INCOLOR) { - factors[i] = fabs(randomizer->SignedRand(1.0)); - if (i <= FN_Y3) - factors[i] *= 5.0 / 8.0; - else - factors[i] *= 0.5; - factors[i] *= factors[i]; - } - } - // Setup "to" points. - Pta* dest_pts = ptaCreate(4); - ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height); - ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height); - ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width, - (1 - factors[FN_Y2]) * height); - ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width, - (1 - factors[FN_Y3]) * height); - getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs); - getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs); - ptaDestroy(&src_pts); - ptaDestroy(&dest_pts); - return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/degradeimage.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/degradeimage.h deleted file mode 100644 index 85e35f0a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/degradeimage.h +++ /dev/null @@ -1,61 +0,0 @@ -/********************************************************************** - * File: degradeimage.h - * Description: Function to degrade an image (usually of text) as if it - * has been printed and then scanned. - * Authors: Ray Smith - * Created: Tue Nov 19 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ -#ifndef TESSERACT_TRAINING_DEGRADEIMAGE_H_ -#define TESSERACT_TRAINING_DEGRADEIMAGE_H_ - -#include "allheaders.h" -#include "genericvector.h" -#include "helpers.h" // For TRand. -#include "rect.h" - -namespace tesseract { - -// Degrade the pix as if by a print/copy/scan cycle with exposure > 0 -// corresponding to darkening on the copier and <0 lighter and 0 not copied. -// If rotation is not nullptr, the clockwise rotation in radians is saved there. -// The input pix must be 8 bit grey. (Binary with values 0 and 255 is OK.) -// The input image is destroyed and a different image returned. -struct Pix* DegradeImage(struct Pix* input, int exposure, TRand* randomizer, - float* rotation); - -// Creates and returns a Pix distorted by various means according to the bool -// flags. If boxes is not nullptr, the boxes are resized/positioned according to -// any spatial distortion and also by the integer reduction factor box_scale -// so they will match what the network will output. -// Returns nullptr on error. The returned Pix must be pixDestroyed. -Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert, - bool white_noise, bool smooth_noise, bool blur, - int box_reduction, TRand* randomizer, - GenericVector* boxes); -// Distorts anything that has a non-null pointer with the same pseudo-random -// perspective distortion. Width and height only need to be set if there -// is no pix. If there is a pix, then they will be taken from there. -void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer, - Pix** pix, GenericVector* boxes); -// Computes the coefficients of a randomized projective transformation. -// The image transform requires backward transformation coefficient, and the -// box transform the forward coefficients. -// Returns the incolor arg to pixProjective. -int ProjectiveCoeffs(int width, int height, TRand* randomizer, - float** im_coeffs, float** box_coeffs); - -} // namespace tesseract - -#endif // TESSERACT_TRAINING_DEGRADEIMAGE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/icuerrorcode.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/icuerrorcode.cpp deleted file mode 100644 index 525ec2ad..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/icuerrorcode.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "icuerrorcode.h" - -namespace tesseract { - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -IcuErrorCode::~IcuErrorCode() { - if (isFailure()) { - handleFailure(); - } -} - -} // namespace tesseract. diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/icuerrorcode.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/icuerrorcode.h deleted file mode 100644 index dbe28fbc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/icuerrorcode.h +++ /dev/null @@ -1,63 +0,0 @@ -/********************************************************************** - * File: icuerrorcode.h - * Description: Wrapper class for UErrorCode, with conversion operators for - * direct use in ICU C and C++ APIs. - * Author: Fredrik Roubert - * Created: Thu July 4 2013 - * - * Features: - * - The constructor initializes the internal UErrorCode to U_ZERO_ERROR, - * removing one common source of errors. - * - Same use in C APIs taking a UErrorCode* (pointer) and C++ taking - * UErrorCode& (reference), via conversion operators. - * - Automatic checking for success when it goes out of scope. On failure, - * the destructor will log an error message and exit. - * - * Most of ICU will handle errors gracefully and provide sensible fallbacks. - * Using IcuErrorCode, it is therefore possible to write very compact code - * that does sensible things on failure and provides logging for debugging. - * - * Example: - * IcuErrorCode icuerrorcode; - * return collator.compareUTF8(a, b, icuerrorcode) == UCOL_EQUAL; - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ -#ifndef TESSERACT_CCUTIL_ICUERRORCODE_H_ -#define TESSERACT_CCUTIL_ICUERRORCODE_H_ - -#include // for exit -#include "tprintf.h" -#include "unicode/errorcode.h" // From libicu - -namespace tesseract { - -class IcuErrorCode : public icu::ErrorCode { - public: - IcuErrorCode() {} - virtual ~IcuErrorCode(); - - protected: - virtual void handleFailure() const { - tprintf("ICU ERROR: %s\n", errorName()); - exit(errorCode); - } - - private: - // Disallow implicit copying of object. - IcuErrorCode(const IcuErrorCode&); - void operator=(const IcuErrorCode&); -}; - -} // namespace tesseract -#endif // TESSERACT_CCUTIL_ICUERRORCODE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lang_model_helpers.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lang_model_helpers.cpp deleted file mode 100644 index e42613ab..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lang_model_helpers.cpp +++ /dev/null @@ -1,240 +0,0 @@ -// Copyright 2017 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// Purpose: Collection of convenience functions to simplify creation of the -// unicharset, recoder, and dawgs for an LSTM model. - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "lang_model_helpers.h" - -#if defined(_WIN32) -#include -#endif -#include -#include -#include -#include "dawg.h" -#include "fileio.h" -#include "tessdatamanager.h" -#include "trie.h" -#include "unicharcompress.h" - -namespace tesseract { - -// Helper makes a filename (//) and writes data -// to the file, using writer if not null, otherwise, a default writer. -// Default writer will overwrite any existing file, but a supplied writer -// can do its own thing. If lang is empty, returns true but does nothing. -// NOTE that suffix should contain any required . for the filename. -bool WriteFile(const std::string& output_dir, const std::string& lang, - const std::string& suffix, const GenericVector& data, - FileWriter writer) { - if (lang.empty()) return true; - std::string dirname = output_dir + "/" + lang; - // Attempt to make the directory, but ignore errors, as it may not be a - // standard filesystem, and the writer will complain if not successful. -#if defined(_WIN32) - _mkdir(dirname.c_str()); -#else - mkdir(dirname.c_str(), S_IRWXU | S_IRWXG); -#endif - std::string filename = dirname + "/" + lang + suffix; - if (writer == nullptr) - return SaveDataToFile(data, filename.c_str()); - else - return (*writer)(data, filename.c_str()); -} - -// Helper reads a file with optional reader and returns a STRING. -// On failure emits a warning message and returns and empty STRING. -STRING ReadFile(const std::string& filename, FileReader reader) { - if (filename.empty()) return STRING(); - GenericVector data; - bool read_result; - if (reader == nullptr) - read_result = LoadDataFromFile(filename.c_str(), &data); - else - read_result = (*reader)(filename.c_str(), &data); - if (read_result) return STRING(&data[0], data.size()); - tprintf("Failed to read data from: %s\n", filename.c_str()); - return STRING(); -} - -// Helper writes the unicharset to file and to the traineddata. -bool WriteUnicharset(const UNICHARSET& unicharset, const std::string& output_dir, - const std::string& lang, FileWriter writer, - TessdataManager* traineddata) { - GenericVector unicharset_data; - TFile fp; - fp.OpenWrite(&unicharset_data); - if (!unicharset.save_to_file(&fp)) return false; - traineddata->OverwriteEntry(TESSDATA_LSTM_UNICHARSET, &unicharset_data[0], - unicharset_data.size()); - return WriteFile(output_dir, lang, ".unicharset", unicharset_data, writer); -} - -// Helper creates the recoder and writes it to the traineddata, and a human- -// readable form to file. -bool WriteRecoder(const UNICHARSET& unicharset, bool pass_through, - const std::string& output_dir, const std::string& lang, - FileWriter writer, STRING* radical_table_data, - TessdataManager* traineddata) { - UnicharCompress recoder; - // Where the unicharset is carefully setup already to contain a good - // compact encoding, use a pass-through recoder that does nothing. - // For scripts that have a large number of unicodes (Han, Hangul) we want - // to use the recoder to compress the symbol space by re-encoding each - // unicode as multiple codes from a smaller 'alphabet' that are related to the - // shapes in the character. Hangul Jamo is a perfect example of this. - // See the Hangul Syllables section, sub-section "Equivalence" in: - // http://www.unicode.org/versions/Unicode10.0.0/ch18.pdf - if (pass_through) { - recoder.SetupPassThrough(unicharset); - } else { - int null_char = - unicharset.has_special_codes() ? UNICHAR_BROKEN : unicharset.size(); - tprintf("Null char=%d\n", null_char); - if (!recoder.ComputeEncoding(unicharset, null_char, radical_table_data)) { - tprintf("Creation of encoded unicharset failed!!\n"); - return false; - } - } - TFile fp; - GenericVector recoder_data; - fp.OpenWrite(&recoder_data); - if (!recoder.Serialize(&fp)) return false; - traineddata->OverwriteEntry(TESSDATA_LSTM_RECODER, &recoder_data[0], - recoder_data.size()); - STRING encoding = recoder.GetEncodingAsString(unicharset); - recoder_data.init_to_size(encoding.length(), 0); - memcpy(&recoder_data[0], &encoding[0], encoding.length()); - STRING suffix; - suffix.add_str_int(".charset_size=", recoder.code_range()); - suffix += ".txt"; - return WriteFile(output_dir, lang, suffix.string(), recoder_data, writer); -} - -// Helper builds a dawg from the given words, using the unicharset as coding, -// and reverse_policy for LTR/RTL, and overwrites file_type in the traineddata. -static bool WriteDawg(const GenericVector& words, - const UNICHARSET& unicharset, - Trie::RTLReversePolicy reverse_policy, - TessdataType file_type, TessdataManager* traineddata) { - // The first 3 arguments are not used in this case. - Trie trie(DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM, unicharset.size(), 0); - trie.add_word_list(words, unicharset, reverse_policy); - tprintf("Reducing Trie to SquishedDawg\n"); - std::unique_ptr dawg(trie.trie_to_dawg()); - if (dawg == nullptr || dawg->NumEdges() == 0) return false; - TFile fp; - GenericVector dawg_data; - fp.OpenWrite(&dawg_data); - if (!dawg->write_squished_dawg(&fp)) return false; - traineddata->OverwriteEntry(file_type, &dawg_data[0], dawg_data.size()); - return true; -} - -// Builds and writes the dawgs, given a set of words, punctuation -// patterns, number patterns, to the traineddata. Encoding uses the given -// unicharset, and the punc dawgs is reversed if lang_is_rtl. -static bool WriteDawgs(const GenericVector& words, - const GenericVector& puncs, - const GenericVector& numbers, bool lang_is_rtl, - const UNICHARSET& unicharset, - TessdataManager* traineddata) { - if (puncs.empty()) { - tprintf("Must have non-empty puncs list to use language models!!\n"); - return false; - } - // For each of the dawg types, make the dawg, and write to traineddata. - // Dawgs are reversed as follows: - // Words: According to the word content. - // Puncs: According to lang_is_rtl. - // Numbers: Never. - // System dawg (main wordlist). - if (!words.empty() && - !WriteDawg(words, unicharset, Trie::RRP_REVERSE_IF_HAS_RTL, - TESSDATA_LSTM_SYSTEM_DAWG, traineddata)) { - return false; - } - // punc/punc-dawg. - Trie::RTLReversePolicy reverse_policy = - lang_is_rtl ? Trie::RRP_FORCE_REVERSE : Trie::RRP_DO_NO_REVERSE; - if (!WriteDawg(puncs, unicharset, reverse_policy, TESSDATA_LSTM_PUNC_DAWG, - traineddata)) { - return false; - } - // numbers/number-dawg. - if (!numbers.empty() && - !WriteDawg(numbers, unicharset, Trie::RRP_DO_NO_REVERSE, - TESSDATA_LSTM_NUMBER_DAWG, traineddata)) { - return false; - } - return true; -} - -// The main function for combine_lang_model.cpp. -// Returns EXIT_SUCCESS or EXIT_FAILURE for error. -int CombineLangModel(const UNICHARSET& unicharset, const std::string& script_dir, - const std::string& version_str, const std::string& output_dir, - const std::string& lang, bool pass_through_recoder, - const GenericVector& words, - const GenericVector& puncs, - const GenericVector& numbers, bool lang_is_rtl, - FileReader reader, FileWriter writer) { - // Build the traineddata file. - TessdataManager traineddata; - if (!version_str.empty()) { - traineddata.SetVersionString(traineddata.VersionString() + ":" + - version_str); - } - // Unicharset and recoder. - if (!WriteUnicharset(unicharset, output_dir, lang, writer, &traineddata)) { - tprintf("Error writing unicharset!!\n"); - return EXIT_FAILURE; - } else { - tprintf("Config file is optional, continuing...\n"); - } - // If there is a config file, read it and add to traineddata. - std::string config_filename = script_dir + "/" + lang + "/" + lang + ".config"; - STRING config_file = ReadFile(config_filename, reader); - if (config_file.length() > 0) { - traineddata.OverwriteEntry(TESSDATA_LANG_CONFIG, &config_file[0], - config_file.length()); - } - std::string radical_filename = script_dir + "/radical-stroke.txt"; - STRING radical_data = ReadFile(radical_filename, reader); - if (radical_data.length() == 0) { - tprintf("Error reading radical code table %s\n", radical_filename.c_str()); - return EXIT_FAILURE; - } - if (!WriteRecoder(unicharset, pass_through_recoder, output_dir, lang, writer, - &radical_data, &traineddata)) { - tprintf("Error writing recoder!!\n"); - } - if (!words.empty() || !puncs.empty() || !numbers.empty()) { - if (!WriteDawgs(words, puncs, numbers, lang_is_rtl, unicharset, - &traineddata)) { - tprintf("Error during conversion of wordlists to DAWGs!!\n"); - return EXIT_FAILURE; - } - } - - // Traineddata file. - GenericVector traineddata_data; - traineddata.Serialize(&traineddata_data); - if (!WriteFile(output_dir, lang, ".traineddata", traineddata_data, writer)) { - tprintf("Error writing output traineddata file!!\n"); - return EXIT_FAILURE; - } - return EXIT_SUCCESS; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lang_model_helpers.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lang_model_helpers.h deleted file mode 100644 index 58e73c45..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lang_model_helpers.h +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2017 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) -// Purpose: Collection of convenience functions to simplify creation of the -// unicharset, recoder, and dawgs for an LSTM model. - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#ifndef TESSERACT_TRAINING_LANG_MODEL_HELPERS_H_ -#define TESSERACT_TRAINING_LANG_MODEL_HELPERS_H_ - -#include -#include "genericvector.h" -#include "serialis.h" -#include "strngs.h" -#include "tessdatamanager.h" -#include "unicharset.h" - -namespace tesseract { - -// Helper makes a filename (//) and writes data -// to the file, using writer if not null, otherwise, a default writer. -// Default writer will overwrite any existing file, but a supplied writer -// can do its own thing. If lang is empty, returns true but does nothing. -// NOTE that suffix should contain any required . for the filename. -bool WriteFile(const std::string& output_dir, const std::string& lang, - const std::string& suffix, const GenericVector& data, - FileWriter writer); -// Helper reads a file with optional reader and returns a STRING. -// On failure emits a warning message and returns and empty STRING. -STRING ReadFile(const std::string& filename, FileReader reader); - -// Helper writes the unicharset to file and to the traineddata. -bool WriteUnicharset(const UNICHARSET& unicharset, const std::string& output_dir, - const std::string& lang, FileWriter writer, - TessdataManager* traineddata); -// Helper creates the recoder from the unicharset and writes it to the -// traineddata, with a human-readable form to file at: -// //.charset_size= for some num being the size -// of the re-encoded character set. The charset_size file is written using -// writer if not null, or using a default file writer otherwise, overwriting -// any existing content. -// If pass_through is true, then the recoder will be a no-op, passing the -// unicharset codes through unchanged. Otherwise, the recoder will "compress" -// the unicharset by encoding Hangul in Jamos, decomposing multi-unicode -// symbols into sequences of unicodes, and encoding Han using the data in the -// radical_table_data, which must be the content of the file: -// langdata/radical-stroke.txt. -bool WriteRecoder(const UNICHARSET& unicharset, bool pass_through, - const std::string& output_dir, const std::string& lang, - FileWriter writer, STRING* radical_table_data, - TessdataManager* traineddata); - -// The main function for combine_lang_model.cpp. -// Returns EXIT_SUCCESS or EXIT_FAILURE for error. -// unicharset: can be a hand-created file with incomplete fields. Its basic -// and script properties will be set before it is used. -// script_dir: should point to the langdata (github repo) directory. -// version_str: arbitrary version label. -// Output files will be written to //.* -// If pass_through_recoder is true, the unicharset will be used unchanged as -// labels in the classifier, otherwise, the unicharset will be "compressed" to -// make the recognition task simpler and faster. -// The words/puncs/numbers lists may be all empty. If any are non-empty then -// puncs must be non-empty. -// lang_is_rtl indicates that the language is generally written from right -// to left (eg Arabic/Hebrew). -int CombineLangModel(const UNICHARSET& unicharset, const std::string& script_dir, - const std::string& version_str, const std::string& output_dir, - const std::string& lang, bool pass_through_recoder, - const GenericVector& words, - const GenericVector& puncs, - const GenericVector& numbers, bool lang_is_rtl, - FileReader reader, FileWriter writer); - -} // namespace tesseract - -#endif // TESSERACT_TRAINING_LANG_MODEL_HELPERS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/language-specific.sh b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/language-specific.sh deleted file mode 100644 index 741fdd81..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/language-specific.sh +++ /dev/null @@ -1,1196 +0,0 @@ -#!/bin/bash -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Set some language specific variables. Works in conjunction with -# tesstrain.sh -# - -#============================================================================= -# Language specific info -#============================================================================= - -# Array of all valid language codes. -VALID_LANGUAGE_CODES="afr amh ara asm aze aze_cyrl bel ben bih bod bos bul cat - ceb ces chi_sim chi_tra chr cym cyr_lid dan deu div dzo - ell eng enm epo est eus fas fil fin fra frk frm gle glg - grc guj hat heb hin hrv hun hye iast iku ind isl ita ita_old - jav jav_java jpn kan kat kat_old kaz khm kir kor kur lao lat - lat_lid lav lit mal mar mkd mlt msa mya nep nld nor ori - pan pol por pus ron rus san sin slk slv snd spa spa_old - sqi srp srp_latn swa swe syr tam tel tgk tgl tha tir tur - uig ukr urd uzb uzb_cyrl vie yid gle_uncial" - -# Codes for which we have webtext but no fonts: -UNUSABLE_LANGUAGE_CODES="" - -FRAKTUR_FONTS=( - "CaslonishFraxx Medium" \ - "Cloister Black, Light" \ - "Proclamate Light" \ - "UnifrakturMaguntia" \ - "Walbaum-Fraktur" \ -) - -# List of fonts to train on -LATIN_FONTS=( - "Arial Bold" \ - "Arial Bold Italic" \ - "Arial Italic" \ - "Arial" \ - "Courier New Bold" \ - "Courier New Bold Italic" \ - "Courier New Italic" \ - "Courier New" \ - "Times New Roman, Bold" \ - "Times New Roman, Bold Italic" \ - "Times New Roman, Italic" \ - "Times New Roman," \ - "Georgia Bold" \ - "Georgia Italic" \ - "Georgia" \ - "Georgia Bold Italic" \ - "Trebuchet MS Bold" \ - "Trebuchet MS Bold Italic" \ - "Trebuchet MS Italic" \ - "Trebuchet MS" \ - "Verdana Bold" \ - "Verdana Italic" \ - "Verdana" \ - "Verdana Bold Italic" \ - "URW Bookman L Bold" \ - "URW Bookman L Italic" \ - "URW Bookman L Bold Italic" \ - "Century Schoolbook L Bold" \ - "Century Schoolbook L Italic" \ - "Century Schoolbook L Bold Italic" \ - "Century Schoolbook L Medium" \ - "DejaVu Sans Ultra-Light" \ -) - -# List of fonts for printed/neo-Latin ('lat' language code, different from Latin script) -NEOLATIN_FONTS=( - "GFS Bodoni" \ - "GFS Bodoni Bold" \ - "GFS Bodoni Italic" \ - "GFS Bodoni Bold Italic" \ - "GFS Didot" \ - "GFS Didot Bold" \ - "GFS Didot Italic" \ - "GFS Didot Bold Italic" \ - "Cardo" \ - "Cardo Bold" \ - "Cardo Italic" \ - "Wyld" \ - "Wyld Italic" \ - "EB Garamond" \ - "EB Garamond Italic" \ - "Junicode" \ - "Junicode Bold" \ - "Junicode Italic" \ - "Junicode Bold Italic" \ - "IM FELL DW Pica PRO" \ - "IM FELL English PRO" \ - "IM FELL Double Pica PRO" \ - "IM FELL French Canon PRO" \ - "IM FELL Great Primer PRO" \ - "IM FELL DW Pica PRO Italic" \ - "IM FELL English PRO Italic" \ - "IM FELL Double Pica PRO Italic" \ - "IM FELL French Canon PRO Italic" \ - "IM FELL Great Primer PRO Italic" \ -) - -IRISH_UNCIAL_FONTS=( - "Bunchlo Arsa Dubh GC" \ - "Bunchlo Arsa GC" \ - "Bunchlo Arsa GC Bold" \ - "Bunchlo Dubh GC" \ - "Bunchlo GC" \ - "Bunchlo GC Bold" \ - "Bunchlo Nua GC Bold" \ - "Bunchló na Nod GC" \ - "Gadelica" \ - "Glanchlo Dubh GC" \ - "Glanchlo GC" \ - "Glanchlo GC Bold" \ - "Seanchló Dubh GC" \ - "Seanchló GC" \ - "Seanchló GC Bold" \ - "Seanchló na Nod GC" \ - "Seanchló Ársa Dubh GC" \ - "Seanchló Ársa GC" \ - "Seanchló Ársa GC Bold" \ - "Tromchlo Beag GC" \ - "Tromchlo Mor GC" \ - "Urchlo GC" \ - "Urchlo GC Bold" \ -) - -EARLY_LATIN_FONTS=( - "${FRAKTUR_FONTS[@]}" \ - "${LATIN_FONTS[@]}" \ - # The Wyld font family renders early modern ligatures encoded in the private - # unicode area. - "Wyld" \ - "Wyld Italic" \ - # Fonts that render the Yogh symbol (U+021C, U+021D) found in Old English. - "GentiumAlt" \ -) - -VIETNAMESE_FONTS=( \ - "Arial Unicode MS Bold" \ - "Arial Bold Italic" \ - "Arial Italic" \ - "Arial Unicode MS" \ - "FreeMono Bold" \ - "Courier New Bold Italic" \ - "FreeMono Italic" \ - "FreeMono" \ - "GentiumAlt Italic" \ - "GentiumAlt" \ - "Palatino Linotype Bold" \ - "Palatino Linotype Bold Italic" \ - "Palatino Linotype Italic" \ - "Palatino Linotype" \ - "Really No 2 LT W2G Light" \ - "Really No 2 LT W2G Light Italic" \ - "Really No 2 LT W2G Medium" \ - "Really No 2 LT W2G Medium Italic" \ - "Really No 2 LT W2G Semi-Bold" \ - "Really No 2 LT W2G Semi-Bold Italic" \ - "Really No 2 LT W2G Ultra-Bold" \ - "Really No 2 LT W2G Ultra-Bold Italic" \ - "Times New Roman, Bold" \ - "Times New Roman, Bold Italic" \ - "Times New Roman, Italic" \ - "Times New Roman," \ - "Verdana Bold" \ - "Verdana Italic" \ - "Verdana" \ - "Verdana Bold Italic" \ - "VL Gothic" \ - "VL PGothic" \ - ) - -DEVANAGARI_FONTS=( \ - "FreeSans" \ - "Chandas" \ - "Kalimati" \ - "Uttara" \ - "Lucida Sans" \ - "gargi Medium" \ - "Lohit Devanagari" \ - "Arial Unicode MS Bold" \ - "Ascender Uni" \ - "Noto Sans Devanagari Bold" \ - "Noto Sans Devanagari" \ - "Samyak Devanagari Medium" \ - "Sarai" \ - "Saral LT Bold" \ - "Saral LT Light" \ - "Nakula" \ - "Sahadeva" \ - "Samanata" \ - "Santipur OT Medium" \ - ) - -KANNADA_FONTS=( \ - "Kedage Bold" \ - "Kedage Italic" \ - "Kedage" \ - "Kedage Bold Italic" \ - "Mallige Bold" \ - "Mallige Italic" \ - "Mallige" \ - "Mallige Bold Italic" \ - "Arial Unicode MS" \ - "Arial Unicode MS Bold" \ - "Ascender Uni" \ - "cheluvi Medium" \ - "Noto Sans Kannada Bold" \ - "Noto Sans Kannada" \ - "Lohit Kannada" \ - "Tunga" \ - "Tunga Bold" \ - ) - -TELUGU_FONTS=( \ - "Pothana2000" \ - "Vemana2000" \ - "Lohit Telugu" \ - "Arial Unicode MS Bold" \ - "Ascender Uni" \ - "Dhurjati" \ - "Gautami Bold" \ - "Gidugu" \ - "Gurajada" \ - "Lakki Reddy" \ - "Mallanna" \ - "Mandali" \ - "NATS" \ - "NTR" \ - "Noto Sans Telugu Bold" \ - "Noto Sans Telugu" \ - "Peddana" \ - "Ponnala" \ - "Ramabhadra" \ - "Ravi Prakash" \ - "Sree Krushnadevaraya" \ - "Suranna" \ - "Suravaram" \ - "Tenali Ramakrishna" \ - "Gautami" \ - ) - -TAMIL_FONTS=( \ - "TAMu_Kadambri" \ - "TAMu_Kalyani" \ - "TAMu_Maduram" \ - "TSCu_Paranar" \ - "TSCu_Times" \ - "TSCu_Paranar Bold" \ - "FreeSans" \ - "FreeSerif" \ - "Lohit Tamil" \ - "Arial Unicode MS Bold" \ - "Ascender Uni" \ - "Droid Sans Tamil Bold" \ - "Droid Sans Tamil" \ - "Karla Tamil Inclined Bold Italic" \ - "Karla Tamil Inclined Italic" \ - "Karla Tamil Upright Bold" \ - "Karla Tamil Upright" \ - "Noto Sans Tamil Bold" \ - "Noto Sans Tamil" \ - "Noto Sans Tamil UI Bold" \ - "Noto Sans Tamil UI" \ - "TSCu_Comic Normal" \ - "Lohit Tamil Classical" \ - ) - -THAI_FONTS=( \ - "FreeSerif" \ - "FreeSerif Italic" \ - "Garuda" \ - "Norasi" \ - "Lucida Sans Typewriter" \ - "Lucida Sans" \ - "Garuda Oblique" \ - "Norasi Oblique" \ - "Norasi Italic" \ - "Garuda Bold" \ - "Norasi Bold" \ - "Lucida Sans Typewriter Bold" \ - "Lucida Sans Semi-Bold" \ - "Garuda Bold Oblique" \ - "Norasi Bold Italic" \ - "Norasi Bold Oblique" \ - "AnuParp LT Thai" \ - "Arial Unicode MS Bold" \ - "Arial Unicode MS" \ - "Ascender Uni" \ - "Loma" \ - "Noto Serif Thai Bold" \ - "Noto Serif Thai" \ - "Purisa Light" \ - "Sirichana LT Bold" \ - "Sirichana LT" \ - "Sukothai LT Bold" \ - "Sukothai LT" \ - "UtSaHaGumm LT Thai" \ - "Tahoma" \ - ) - -KOREAN_FONTS=( \ - "Arial Unicode MS" \ - "Arial Unicode MS Bold" \ - "Baekmuk Batang Patched" \ - "Baekmuk Batang" \ - "Baekmuk Dotum" \ - "Baekmuk Gulim" \ - "Baekmuk Headline" \ - ) - -CHI_SIM_FONTS=( \ - "AR PL UKai CN" \ - "AR PL UMing Patched Light" \ - "Arial Unicode MS" \ - "Arial Unicode MS Bold" \ - "WenQuanYi Zen Hei Medium" \ - ) - -CHI_TRA_FONTS=( \ - "AR PL UKai TW" \ - "AR PL UMing TW MBE Light" \ - "AR PL UKai Patched" \ - "AR PL UMing Patched Light" \ - "Arial Unicode MS" \ - "Arial Unicode MS Bold" \ - "WenQuanYi Zen Hei Medium" \ - ) - -JPN_FONTS=( \ - "TakaoExGothic" \ - "TakaoExMincho" \ - "TakaoGothic" \ - "TakaoMincho" \ - "TakaoPGothic" \ - "TakaoPMincho" \ - "VL Gothic" \ - "VL PGothic" \ - "Noto Sans Japanese Bold" \ - "Noto Sans Japanese Light" \ - ) - -RUSSIAN_FONTS=( \ - "Arial Bold" \ - "Arial Bold Italic" \ - "Arial Italic" \ - "Arial" \ - "Courier New Bold" \ - "Courier New Bold Italic" \ - "Courier New Italic" \ - "Courier New" \ - "Times New Roman, Bold" \ - "Times New Roman, Bold Italic" \ - "Times New Roman, Italic" \ - "Times New Roman," \ - "Georgia Bold" \ - "Georgia Italic" \ - "Georgia" \ - "Georgia Bold Italic" \ - "Trebuchet MS Bold" \ - "Trebuchet MS Bold Italic" \ - "Trebuchet MS Italic" \ - "Trebuchet MS" \ - "Verdana Bold" \ - "Verdana Italic" \ - "Verdana" \ - "Verdana Bold Italic" \ - "DejaVu Serif" \ - "DejaVu Serif Oblique" \ - "DejaVu Serif Bold" \ - "DejaVu Serif Bold Oblique" \ - "Lucida Bright" \ - "FreeSerif Bold" \ - "FreeSerif Bold Italic" \ - "DejaVu Sans Ultra-Light" \ - ) - -GREEK_FONTS=( \ - "Arial Unicode MS" \ - "Arial Unicode MS Bold" \ - "DejaVu Sans Mono" \ - "DejaVu Sans Mono Oblique" \ - "DejaVu Sans Mono Bold" \ - "DejaVu Sans Mono Bold Oblique" \ - "DejaVu Serif" \ - "DejaVu Serif Semi-Condensed" \ - "DejaVu Serif Oblique" \ - "DejaVu Serif Bold" \ - "DejaVu Serif Bold Oblique" \ - "DejaVu Serif Bold Semi-Condensed" \ - "FreeSerif Bold" \ - "FreeSerif Bold Italic" \ - "FreeSerif Italic" \ - "FreeSerif" \ - "GentiumAlt" \ - "GentiumAlt Italic" \ - "Linux Biolinum O Bold" \ - "Linux Biolinum O" \ - "Linux Libertine O Bold" \ - "Linux Libertine O" \ - "Linux Libertine O Bold Italic" \ - "Linux Libertine O Italic" \ - "Palatino Linotype Bold" \ - "Palatino Linotype Bold Italic" \ - "Palatino Linotype Italic" \ - "Palatino Linotype" \ - "UmePlus P Gothic" \ - "VL PGothic" \ - ) - -ANCIENT_GREEK_FONTS=( \ - "GFS Artemisia" \ - "GFS Artemisia Bold" \ - "GFS Artemisia Bold Italic" \ - "GFS Artemisia Italic" \ - "GFS Bodoni" \ - "GFS Bodoni Bold" \ - "GFS Bodoni Bold Italic" \ - "GFS Bodoni Italic" \ - "GFS Didot" \ - "GFS Didot Bold" \ - "GFS Didot Bold Italic" \ - "GFS Didot Italic" \ - "GFS DidotClassic" \ - "GFS Neohellenic" \ - "GFS Neohellenic Bold" \ - "GFS Neohellenic Bold Italic" \ - "GFS Neohellenic Italic" \ - "GFS Philostratos" \ - "GFS Porson" \ - "GFS Pyrsos" \ - "GFS Solomos" \ - ) - -ARABIC_FONTS=( \ - "Arabic Transparent Bold" \ - "Arabic Transparent" \ - "Arab" \ - "Arial Unicode MS Bold" \ - "Arial Unicode MS" \ - "ASVCodar LT Bold" \ - "ASVCodar LT Light" \ - "Badiya LT Bold" \ - "Badiya LT" \ - "Badr LT Bold" \ - "Badr LT" \ - "Dimnah" \ - "Frutiger LT Arabic Bold" \ - "Frutiger LT Arabic" \ - "Furat" \ - "Hassan LT Bold" \ - "Hassan LT Light" \ - "Jalal LT Bold" \ - "Jalal LT Light" \ - "Midan Bold" \ - "Midan" \ - "Mitra LT Bold" \ - "Mitra LT Light" \ - "Palatino LT Arabic" \ - "Palatino Sans Arabic Bold" \ - "Palatino Sans Arabic" \ - "Simplified Arabic Bold" \ - "Simplified Arabic" \ - "Times New Roman, Bold" \ - "Times New Roman," \ - "Traditional Arabic Bold" \ - "Traditional Arabic" \ - ) - -HEBREW_FONTS=( \ - "Arial Bold" \ - "Arial Bold Italic" \ - "Arial Italic" \ - "Arial" \ - "Courier New Bold" \ - "Courier New Bold Italic" \ - "Courier New Italic" \ - "Courier New" \ - "Ergo Hebrew Semi-Bold" \ - "Ergo Hebrew Semi-Bold Italic" \ - "Ergo Hebrew" \ - "Ergo Hebrew Italic" \ - "Really No 2 LT W2G Light" \ - "Really No 2 LT W2G Light Italic" \ - "Really No 2 LT W2G Medium" \ - "Really No 2 LT W2G Medium Italic" \ - "Really No 2 LT W2G Semi-Bold" \ - "Really No 2 LT W2G Semi-Bold Italic" \ - "Really No 2 LT W2G Ultra-Bold" \ - "Really No 2 LT W2G Ultra-Bold Italic" \ - "Times New Roman, Bold" \ - "Times New Roman, Bold Italic" \ - "Times New Roman, Italic" \ - "Times New Roman," \ - "Lucida Sans" \ - "Tahoma" \ - ) - -BENGALI_FONTS=( \ - "Bangla Medium" \ - "Lohit Bengali" \ - "Mukti Narrow" \ - "Mukti Narrow Bold" \ - "Jamrul Medium Semi-Expanded" \ - "Likhan Medium" \ - "Arial Unicode MS Bold" \ - "Ascender Uni" \ - "FreeSans" \ - "FreeSans Oblique" \ - "FreeSerif" \ - "FreeSerif Italic" \ - "Noto Sans Bengali Bold" \ - "Noto Sans Bengali" \ - "Ani" \ - "Lohit Assamese" \ - "Lohit Bengali" \ - "Mitra Mono" \ - ) - -KYRGYZ_FONTS=( \ - "Arial" \ - "Arial Bold" \ - "Arial Italic" \ - "Arial Bold Italic" \ - "Courier New" \ - "Courier New Bold" \ - "Courier New Italic" \ - "Courier New Bold Italic" \ - "Times New Roman," \ - "Times New Roman, Bold" \ - "Times New Roman, Bold Italic" \ - "Times New Roman, Italic" \ - "DejaVu Serif" \ - "DejaVu Serif Oblique" \ - "DejaVu Serif Bold" \ - "DejaVu Serif Bold Oblique" \ - "Lucida Bright" \ - "FreeSerif Bold" \ - "FreeSerif Bold Italic" \ - ) - -PERSIAN_FONTS=( \ - "Amiri Bold Italic" \ - "Amiri Bold" \ - "Amiri Italic" \ - "Amiri" \ - "Andale Sans Arabic Farsi" \ - "Arial Unicode MS" \ - "Arial Unicode MS Bold" \ - "Lateef" \ - "Lucida Bright" \ - "Lucida Sans Oblique" \ - "Lucida Sans Semi-Bold" \ - "Lucida Sans" \ - "Lucida Sans Typewriter Bold" \ - "Lucida Sans Typewriter Oblique" \ - "Lucida Sans Typewriter" \ - "Scheherazade" \ - "Tahoma" \ - "Times New Roman," \ - "Times New Roman, Bold" \ - "Times New Roman, Bold Italic" \ - "Times New Roman, Italic" \ - "Yakout Linotype Bold" \ - "Yakout Linotype" \ - ) - -AMHARIC_FONTS=( \ - "Abyssinica SIL" - "Droid Sans Ethiopic Bold" \ - "Droid Sans Ethiopic" \ - "FreeSerif" \ - "Noto Sans Ethiopic Bold" \ - "Noto Sans Ethiopic" \ - ) - -ARMENIAN_FONTS=( \ - "Arial Unicode MS" \ - "Arial Unicode MS Bold" \ - "Ascender Uni" \ - "FreeMono" \ - "FreeMono Italic" \ - "FreeSans" \ - "FreeSans Bold" \ - "FreeSans Oblique" \ - ) - -BURMESE_FONTS=( \ - "Myanmar Sans Pro" \ - "Noto Sans Myanmar Bold" \ - "Noto Sans Myanmar" \ - "Padauk Bold" \ - "Padauk" \ - "TharLon" \ - ) - -JAVANESE_FONTS=( \ - "Prada" \ - ) - -NORTH_AMERICAN_ABORIGINAL_FONTS=( \ - "Aboriginal Sans" \ - "Aboriginal Sans Bold Italic" \ - "Aboriginal Sans Italic" \ - "Aboriginal Sans Bold" \ - "Aboriginal Serif Bold" \ - "Aboriginal Serif Bold Italic" \ - "Aboriginal Serif Italic" \ - "Aboriginal Serif" \ - ) - -GEORGIAN_FONTS=( \ - "Arial Unicode MS Bold" \ - "Arial Unicode MS" \ - "BPG Algeti GPL\&GNU" \ - "BPG Chveulebrivi GPL\&GNU" \ - "BPG Courier GPL\&GNU" \ - "BPG Courier S GPL\&GNU" \ - "BPG DejaVu Sans 2011 GNU-GPL" \ - "BPG Elite GPL\&GNU" \ - "BPG Excelsior GPL\&GNU" \ - "BPG Glaho GPL\&GNU" \ - "BPG Gorda GPL\&GNU" \ - "BPG Ingiri GPL\&GNU" \ - "BPG Mrgvlovani Caps GNU\&GPL" \ - "BPG Mrgvlovani GPL\&GNU" \ - "BPG Nateli Caps GPL\&GNU Light" \ - "BPG Nateli Condenced GPL\&GNU Light" \ - "BPG Nateli GPL\&GNU Light" \ - "BPG Nino Medium Cond GPL\&GNU" \ - "BPG Nino Medium GPL\&GNU Medium" \ - "BPG Sans GPL\&GNU" \ - "BPG Sans Medium GPL\&GNU" \ - "BPG Sans Modern GPL\&GNU" \ - "BPG Sans Regular GPL\&GNU" \ - "BPG Serif GPL\&GNU" \ - "BPG Serif Modern GPL\&GNU" \ - "FreeMono" \ - "FreeMono Bold Italic" \ - "FreeSans" \ - "FreeSerif" \ - "FreeSerif Bold" \ - "FreeSerif Bold Italic" \ - "FreeSerif Italic" \ - ) - -OLD_GEORGIAN_FONTS=( \ - "Arial Unicode MS Bold" \ - "Arial Unicode MS" \ - "BPG Algeti GPL\&GNU" \ - "BPG Courier S GPL\&GNU" \ - "BPG DejaVu Sans 2011 GNU-GPL" \ - "BPG Elite GPL\&GNU" \ - "BPG Excelsior GPL\&GNU" \ - "BPG Glaho GPL\&GNU" \ - "BPG Ingiri GPL\&GNU" \ - "BPG Mrgvlovani Caps GNU\&GPL" \ - "BPG Mrgvlovani GPL\&GNU" \ - "BPG Nateli Caps GPL\&GNU Light" \ - "BPG Nateli Condenced GPL\&GNU Light" \ - "BPG Nateli GPL\&GNU Light" \ - "BPG Nino Medium Cond GPL\&GNU" \ - "BPG Nino Medium GPL\&GNU Medium" \ - "BPG Sans GPL\&GNU" \ - "BPG Sans Medium GPL\&GNU" \ - "BPG Sans Modern GPL\&GNU" \ - "BPG Sans Regular GPL\&GNU" \ - "BPG Serif GPL\&GNU" \ - "BPG Serif Modern GPL\&GNU" \ - "FreeSans" \ - "FreeSerif" \ - "FreeSerif Bold" \ - "FreeSerif Bold Italic" \ - "FreeSerif Italic" \ - ) - -KHMER_FONTS=( \ - "Khmer OS" \ - "Khmer OS System" \ - "Khmer OS Battambang" \ - "Khmer OS Bokor" \ - "Khmer OS Content" \ - "Khmer OS Fasthand" \ - "Khmer OS Freehand" \ - "Khmer OS Metal Chrieng" \ - "Khmer OS Muol Light" \ - "Khmer OS Muol Pali" \ - "Khmer OS Muol" \ - "Khmer OS Siemreap" \ - "Noto Sans Bold" \ - "Noto Sans" \ - "Noto Serif Khmer Bold" \ - "Noto Serif Khmer Light" \ - ) - -KURDISH_FONTS=( \ - "Amiri Bold Italic" \ - "Amiri Bold" \ - "Amiri Italic" \ - "Amiri" \ - "Arial Unicode MS" \ - "Arial Unicode MS Bold" \ - "Lateef" \ - "Lucida Bright" \ - "Lucida Sans Oblique" \ - "Lucida Sans Semi-Bold" \ - "Lucida Sans" \ - "Lucida Sans Typewriter Bold" \ - "Lucida Sans Typewriter Oblique" \ - "Lucida Sans Typewriter" \ - "Scheherazade" \ - "Tahoma" \ - "Times New Roman," \ - "Times New Roman, Bold" \ - "Times New Roman, Bold Italic" \ - "Times New Roman, Italic" \ - "Unikurd Web" \ - "Yakout Linotype Bold" \ - "Yakout Linotype" \ - ) - -LAOTHIAN_FONTS=( \ - "Phetsarath OT" \ - "Arial Unicode MS" \ - "Arial Unicode MS Bold" \ - "Ascender Uni" \ - "Dhyana Bold" \ - "Dhyana" \ - "Lao Muang Don" \ - "Lao Muang Khong" \ - "Lao Sans Pro" \ - "Noto Sans Lao Bold" \ - "Noto Sans Lao" \ - "Noto Sans Lao UI Bold" \ - "Noto Sans Lao UI" \ - "Noto Serif Lao Bold" \ - "Noto Serif Lao" \ - "Phetsarath Bold" \ - "Phetsarath" \ - "Souliyo Unicode" \ -) - -GUJARATI_FONTS=( \ - "Lohit Gujarati" \ - "Rekha Medium" \ - "Samyak Gujarati Medium" \ - "aakar Medium" \ - "padmaa Bold" \ - "padmaa Medium" \ - "Arial Unicode MS" \ - "Arial Unicode MS Bold" \ - "Ascender Uni" \ - "FreeSans" \ - "Noto Sans Gujarati Bold" \ - "Noto Sans Gujarati" \ - "Shruti" \ - "Shruti Bold" \ - ) - -MALAYALAM_FONTS=( \ - "AnjaliOldLipi" \ - "Arial Unicode MS" \ - "Arial Unicode MS Bold" \ - "Ascender Uni" \ - "Dyuthi" \ - "FreeSerif" \ - "Kalyani" \ - "Kartika" \ - "Kartika Bold" \ - "Lohit Malayalam" \ - "Meera" \ - "Noto Sans Malayalam Bold" \ - "Noto Sans Malayalam" \ - "Rachana" \ - "Rachana_w01" \ - "RaghuMalayalam" \ - "suruma" \ - ) - -ORIYA_FONTS=( \ - "Arial Unicode MS" \ - "Arial Unicode MS Bold" \ - "Ascender Uni" \ - "ori1Uni Medium" \ - "Samyak Oriya Medium" \ - "Lohit Oriya" \ - ) - -PUNJABI_FONTS=( \ - "Arial Unicode MS" \ - "Arial Unicode MS Bold" \ - "Ascender Uni" \ - "Saab" \ - "Lohit Punjabi" \ - "Noto Sans Gurmukhi" \ - "Noto Sans Gurmukhi Bold" \ - "FreeSans" \ - "FreeSans Bold" \ - "FreeSerif" \ - ) - -SINHALA_FONTS=( \ - "Noto Sans Sinhala Bold" \ - "Noto Sans Sinhala" \ - "OCRUnicode" \ - "Yagpo" \ - "LKLUG" \ - "FreeSerif" \ - ) - -SYRIAC_FONTS=( \ - "East Syriac Adiabene" \ - "East Syriac Ctesiphon" \ - "Estrangelo Antioch" \ - "Estrangelo Edessa" \ - "Estrangelo Midyat" \ - "Estrangelo Nisibin" \ - "Estrangelo Quenneshrin" \ - "Estrangelo Talada" \ - "Estrangelo TurAbdin" \ - "Serto Batnan Bold" \ - "Serto Batnan" \ - "Serto Jerusalem Bold" \ - "Serto Jerusalem Italic" \ - "Serto Jerusalem" \ - "Serto Kharput" \ - "Serto Malankara" \ - "Serto Mardin Bold" \ - "Serto Mardin" \ - "Serto Urhoy Bold" \ - "Serto Urhoy" \ - "FreeSans" \ - ) - -THAANA_FONTS=( \ - "FreeSerif" \ - ) - -TIBETAN_FONTS=( \ - "Arial Unicode MS" \ - "Arial Unicode MS Bold" \ - "Ascender Uni" \ - "DDC Uchen" \ - "Jomolhari" \ - "Kailasa" \ - "Kokonor" \ - "Tibetan Machine Uni" \ - "TibetanTsugRing" \ - "Yagpo" \ - ) - -# The following fonts will be rendered vertically in phase I. -VERTICAL_FONTS=( \ - "TakaoExGothic" \ # for jpn - "TakaoExMincho" \ # for jpn - "AR PL UKai Patched" \ # for chi_tra - "AR PL UMing Patched Light" \ # for chi_tra - "Baekmuk Batang Patched" \ # for kor - ) - -FLAGS_webtext_prefix=${FLAGS_webtext_prefix:-} - -# Set language-specific values for several global variables, including -# ${TEXT_CORPUS} -# holds the text corpus file for the language, used in phase F -# ${FONTS[@]} -# holds a sequence of applicable fonts for the language, used in -# phase F & I. only set if not already set, i.e. from command line -# ${TRAINING_DATA_ARGUMENTS} -# non-default arguments to the training_data program used in phase T -# ${FILTER_ARGUMENTS} - -# character-code-specific filtering to distinguish between scripts -# (eg. CJK) used by filter_borbidden_characters in phase F -# ${WORDLIST2DAWG_ARGUMENTS} -# specify fixed length dawg generation for non-space-delimited lang -# TODO(dsl): We can refactor these into functions that assign FONTS, -# TEXT_CORPUS, etc. separately. -set_lang_specific_parameters() { - local lang=$1 - # The default text location is now given directly from the language code. - TEXT_CORPUS="${FLAGS_webtext_prefix}/${lang}.corpus.txt" - FILTER_ARGUMENTS="" - WORDLIST2DAWG_ARGUMENTS="" - # These dawg factors represent the fraction of the corpus not covered by the - # dawg, and seem like reasonable defaults, but the optimal value is likely - # to be highly corpus-dependent, as well as somewhat language-dependent. - # Number dawg factor is the fraction of all numeric strings that are not - # covered, which is why it is higher relative to the others. - PUNC_DAWG_FACTOR= - NUMBER_DAWG_FACTOR=0.125 - WORD_DAWG_FACTOR=0.05 - BIGRAM_DAWG_FACTOR=0.015 - TRAINING_DATA_ARGUMENTS="" - FRAGMENTS_DISABLED="y" - RUN_SHAPE_CLUSTERING=0 - AMBIGS_FILTER_DENOMINATOR="100000" - LEADING="32" - MEAN_COUNT="40" # Default for latin script. - # Language to mix with the language for maximum accuracy. Defaults to eng. - # If no language is good, set to the base language. - MIX_LANG="eng" - - case ${lang} in - # Latin languages. - enm ) TEXT2IMAGE_EXTRA_ARGS=" --ligatures" # Add ligatures when supported - test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );; - frm ) TEXT_CORPUS="${FLAGS_webtext_prefix}/fra.corpus.txt" - # Make long-s substitutions for Middle French text - FILTER_ARGUMENTS="--make_early_language_variant=fra" - TEXT2IMAGE_EXTRA_ARGS=" --ligatures" # Add ligatures when supported. - test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );; - frk ) TEXT_CORPUS="${FLAGS_webtext_prefix}/deu.corpus.txt" - test -z "$FONTS" && FONTS=( "${FRAKTUR_FONTS[@]}" );; - ita_old ) - TEXT_CORPUS="${FLAGS_webtext_prefix}/ita.corpus.txt" - # Make long-s substitutions for Early Italian text - FILTER_ARGUMENTS="--make_early_language_variant=ita" - TEXT2IMAGE_EXTRA_ARGS=" --ligatures" # Add ligatures when supported. - test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );; - lat ) - test -z "$EXPOSURES" && EXPOSURES="-3 -2 -1 0 1 2 3" - test -z "$FONTS" && FONTS=( "${NEOLATIN_FONTS[@]}" ) ;; - spa_old ) - TEXT_CORPUS="${FLAGS_webtext_prefix}/spa.corpus.txt" - # Make long-s substitutions for Early Spanish text - FILTER_ARGUMENTS="--make_early_language_variant=spa" - TEXT2IMAGE_EXTRA_ARGS=" --ligatures" # Add ligatures when supported. - test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );; - srp_latn ) - TEXT_CORPUS=${FLAGS_webtext_prefix}/srp.corpus.txt ;; - vie ) TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000" - test -z "$FONTS" && FONTS=( "${VIETNAMESE_FONTS[@]}" ) ;; - # Highly inflective languages get a bigger dawg size. - # TODO(rays) Add more here! - hun ) WORD_DAWG_SIZE=1000000 ;; - pol ) WORD_DAWG_SIZE=1000000 ;; - - # Latin with default treatment. - afr ) ;; - aze ) ;; - bos ) ;; - cat ) ;; - ceb ) ;; - ces ) PUNC_DAWG_FACTOR=0.004 ;; - cym ) ;; - dan ) ;; - deu ) WORD_DAWG_FACTOR=0.125 ;; - eng ) WORD_DAWG_FACTOR=0.03 ;; - epo ) ;; - est ) ;; - eus ) ;; - fil ) ;; - fin ) ;; - fra ) WORD_DAWG_FACTOR=0.08 ;; - gle ) ;; - gle_uncial ) test -z "$FONTS" && FONTS=( "${IRISH_UNCIAL_FONTS[@]}" );; - glg ) ;; - hat ) ;; - hrv ) ;; - iast ) ;; - ind ) ;; - isl ) ;; - ita ) ;; - jav ) ;; - lav ) ;; - lit ) ;; - mlt ) ;; - msa ) ;; - nld ) WORD_DAWG_FACTOR=0.02 ;; - nor ) ;; - por ) ;; - ron ) ;; - slk ) ;; - slv ) ;; - spa ) ;; - sqi ) ;; - swa ) ;; - swe ) ;; - tgl ) ;; - tur ) ;; - uzb ) ;; - zlm ) ;; - - # Special code for performing language-id that is trained on - # EFIGS+Latin+Vietnamese text with regular + fraktur fonts. - lat_lid ) - TEXT_CORPUS=${FLAGS_webtext_prefix}/lat_lid.corpus.txt - TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000" - GENERATE_WORD_BIGRAMS=0 - # Strip unrenderable words as not all fonts will render the extended - # latin symbols found in Vietnamese text. - WORD_DAWG_SIZE=1000000 - test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );; - - # Cyrillic script-based languages. It is bad to mix Latin with Cyrillic. - rus ) test -z "$FONTS" && FONTS=( "${RUSSIAN_FONTS[@]}" ) - MIX_LANG="rus" - NUMBER_DAWG_FACTOR=0.05 - WORD_DAWG_SIZE=1000000 ;; - aze_cyrl | bel | bul | kaz | mkd | srp | tgk | ukr | uzb_cyrl ) - MIX_LANG="${lang}" - test -z "$FONTS" && FONTS=( "${RUSSIAN_FONTS[@]}" ) ;; - - # Special code for performing Cyrillic language-id that is trained on - # Russian, Serbian, Ukrainian, Belarusian, Macedonian, Tajik and Mongolian - # text with the list of Russian fonts. - cyr_lid ) - TEXT_CORPUS=${FLAGS_webtext_prefix}/cyr_lid.corpus.txt - TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000" - GENERATE_WORD_BIGRAMS=0 - WORD_DAWG_SIZE=1000000 - test -z "$FONTS" && FONTS=( "${RUSSIAN_FONTS[@]}" );; - - # South Asian scripts mostly have a lot of different graphemes, so trim - # down the MEAN_COUNT so as not to get a huge amount of text. - asm | ben ) - MEAN_COUNT="15" - WORD_DAWG_FACTOR=0.15 - test -z "$FONTS" && FONTS=( "${BENGALI_FONTS[@]}" ) ;; - bih | hin | mar | nep | san ) - MEAN_COUNT="15" - WORD_DAWG_FACTOR=0.15 - test -z "$FONTS" && FONTS=( "${DEVANAGARI_FONTS[@]}" ) ;; - bod ) MEAN_COUNT="15" - WORD_DAWG_FACTOR=0.15 - test -z "$FONTS" && FONTS=( "${TIBETAN_FONTS[@]}" ) ;; - dzo ) - WORD_DAWG_FACTOR=0.01 - test -z "$FONTS" && FONTS=( "${TIBETAN_FONTS[@]}" ) ;; - guj ) MEAN_COUNT="15" - WORD_DAWG_FACTOR=0.15 - test -z "$FONTS" && FONTS=( "${GUJARATI_FONTS[@]}" ) ;; - kan ) MEAN_COUNT="15" - WORD_DAWG_FACTOR=0.15 - TRAINING_DATA_ARGUMENTS+=" --no_newline_in_output" - TEXT2IMAGE_EXTRA_ARGS=" --char_spacing=0.5" - test -z "$FONTS" && FONTS=( "${KANNADA_FONTS[@]}" ) ;; - mal ) MEAN_COUNT="15" - WORD_DAWG_FACTOR=0.15 - TRAINING_DATA_ARGUMENTS+=" --no_newline_in_output" - TEXT2IMAGE_EXTRA_ARGS=" --char_spacing=0.5" - test -z "$FONTS" && FONTS=( "${MALAYALAM_FONTS[@]}" ) ;; - ori ) - WORD_DAWG_FACTOR=0.01 - test -z "$FONTS" && FONTS=( "${ORIYA_FONTS[@]}" ) ;; - pan ) MEAN_COUNT="15" - WORD_DAWG_FACTOR=0.01 - test -z "$FONTS" && FONTS=( "${PUNJABI_FONTS[@]}" ) ;; - sin ) MEAN_COUNT="15" - WORD_DAWG_FACTOR=0.01 - test -z "$FONTS" && FONTS=( "${SINHALA_FONTS[@]}" ) ;; - tam ) MEAN_COUNT="30" - WORD_DAWG_FACTOR=0.15 - TRAINING_DATA_ARGUMENTS+=" --no_newline_in_output" - TEXT2IMAGE_EXTRA_ARGS=" --char_spacing=0.5" - test -z "$FONTS" && FONTS=( "${TAMIL_FONTS[@]}" ) ;; - tel ) MEAN_COUNT="15" - WORD_DAWG_FACTOR=0.15 - TRAINING_DATA_ARGUMENTS+=" --no_newline_in_output" - TEXT2IMAGE_EXTRA_ARGS=" --char_spacing=0.5" - test -z "$FONTS" && FONTS=( "${TELUGU_FONTS[@]}" ) ;; - - # SouthEast Asian scripts. - jav_java ) MEAN_COUNT="15" - WORD_DAWG_FACTOR=0.15 - TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000" - test -z "$FONTS" && FONTS=( "${JAVANESE_FONTS[@]}" ) ;; - khm ) MEAN_COUNT="15" - WORD_DAWG_FACTOR=0.15 - TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000" - test -z "$FONTS" && FONTS=( "${KHMER_FONTS[@]}" ) ;; - lao ) MEAN_COUNT="15" - WORD_DAWG_FACTOR=0.15 - TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000" - test -z "$FONTS" && FONTS=( "${LAOTHIAN_FONTS[@]}" ) ;; - mya ) MEAN_COUNT="12" - WORD_DAWG_FACTOR=0.15 - TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000" - test -z "$FONTS" && FONTS=( "${BURMESE_FONTS[@]}" ) ;; - tha ) MEAN_COUNT="30" - WORD_DAWG_FACTOR=0.01 - TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000" - FILTER_ARGUMENTS="--segmenter_lang=tha" - TRAINING_DATA_ARGUMENTS+=" --no_space_in_output --desired_bigrams=" - AMBIGS_FILTER_DENOMINATOR="1000" - LEADING=48 - test -z "$FONTS" && FONTS=( "${THAI_FONTS[@]}" ) ;; - - # CJK - chi_sim ) - MEAN_COUNT="15" - PUNC_DAWG_FACTOR=0.015 - WORD_DAWG_FACTOR=0.015 - GENERATE_WORD_BIGRAMS=0 - TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000" - TRAINING_DATA_ARGUMENTS+=" --no_space_in_output --desired_bigrams=" - FILTER_ARGUMENTS="--charset_filter=chi_sim --segmenter_lang=chi_sim" - test -z "$FONTS" && FONTS=( "${CHI_SIM_FONTS[@]}" ) ;; - chi_tra ) - MEAN_COUNT="15" - WORD_DAWG_FACTOR=0.015 - GENERATE_WORD_BIGRAMS=0 - TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000" - TRAINING_DATA_ARGUMENTS+=" --no_space_in_output --desired_bigrams=" - FILTER_ARGUMENTS="--charset_filter=chi_tra --segmenter_lang=chi_tra" - test -z "$FONTS" && FONTS=( "${CHI_TRA_FONTS[@]}" ) ;; - jpn ) MEAN_COUNT="15" - WORD_DAWG_FACTOR=0.015 - GENERATE_WORD_BIGRAMS=0 - TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000" - TRAINING_DATA_ARGUMENTS+=" --no_space_in_output --desired_bigrams=" - FILTER_ARGUMENTS="--charset_filter=jpn --segmenter_lang=jpn" - test -z "$FONTS" && FONTS=( "${JPN_FONTS[@]}" ) ;; - kor ) MEAN_COUNT="20" - WORD_DAWG_FACTOR=0.015 - NUMBER_DAWG_FACTOR=0.05 - TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000" - TRAINING_DATA_ARGUMENTS+=" --desired_bigrams=" - GENERATE_WORD_BIGRAMS=0 - FILTER_ARGUMENTS="--charset_filter=kor --segmenter_lang=kor" - test -z "$FONTS" && FONTS=( "${KOREAN_FONTS[@]}" ) ;; - - # Middle-Eastern scripts. - ara ) test -z "$FONTS" && FONTS=( "${ARABIC_FONTS[@]}" ) ;; - div ) test -z "$FONTS" && FONTS=( "${THAANA_FONTS[@]}" ) ;; - fas | pus | snd | uig | urd ) - test -z "$FONTS" && FONTS=( "${PERSIAN_FONTS[@]}" ) ;; - heb | yid ) - NUMBER_DAWG_FACTOR=0.05 - WORD_DAWG_FACTOR=0.08 - test -z "$FONTS" && FONTS=( "${HEBREW_FONTS[@]}" ) ;; - syr ) test -z "$FONTS" && FONTS=( "${SYRIAC_FONTS[@]}" ) ;; - - # Other scripts. - amh | tir) - test -z "$FONTS" && FONTS=( "${AMHARIC_FONTS[@]}" ) ;; - chr ) test -z "$FONTS" && FONTS=( "${NORTH_AMERICAN_ABORIGINAL_FONTS[@]}" \ - "Noto Sans Cherokee" \ - ) ;; - ell ) - NUMBER_DAWG_FACTOR=0.05 - WORD_DAWG_FACTOR=0.08 - test -z "$FONTS" && FONTS=( "${GREEK_FONTS[@]}" ) ;; - grc ) - test -z "$EXPOSURES" && EXPOSURES="-3 -2 -1 0 1 2 3" - test -z "$FONTS" && FONTS=( "${ANCIENT_GREEK_FONTS[@]}" ) ;; - hye ) test -z "$FONTS" && FONTS=( "${ARMENIAN_FONTS[@]}" ) ;; - iku ) test -z "$FONTS" && FONTS=( "${NORTH_AMERICAN_ABORIGINAL_FONTS[@]}" ) ;; - kat) test -z "$FONTS" && FONTS=( "${GEORGIAN_FONTS[@]}" ) ;; - kat_old) - TEXT_CORPUS="${FLAGS_webtext_prefix}/kat.corpus.txt" - test -z "$FONTS" && FONTS=( "${OLD_GEORGIAN_FONTS[@]}" ) ;; - kir ) test -z "$FONTS" && FONTS=( "${KYRGYZ_FONTS[@]}" ) - TRAINING_DATA_ARGUMENTS=" --infrequent_ratio=100" ;; - kur ) test -z "$FONTS" && FONTS=( "${KURDISH_FONTS[@]}" ) ;; - - *) err_exit "Error: ${lang} is not a valid language code" - esac - if [[ ${FLAGS_mean_count:-} -gt 0 ]]; then - TRAINING_DATA_ARGUMENTS+=" --mean_count=${FLAGS_mean_count}" - elif [[ ! -z ${MEAN_COUNT:-} ]]; then - TRAINING_DATA_ARGUMENTS+=" --mean_count=${MEAN_COUNT}" - fi - # Default to Latin fonts if none have been set - test -z "$FONTS" && FONTS=( "${LATIN_FONTS[@]}" ) - - # Default to 0 exposure if it hasn't been set - test -z "${EXPOSURES:-}" && EXPOSURES=0 - # Set right-to-left and normalization mode. - case "${LANG_CODE}" in - ara | div| fas | pus | snd | syr | uig | urd | kur_ara | heb | yid ) - LANG_IS_RTL="1" - NORM_MODE="2" ;; - asm | ben | bih | hin | mar | nep | guj | kan | mal | tam | tel | pan | \ - dzo | sin | san | bod | ori | khm | mya | tha | lao | jav | jav_java) - LANG_IS_RTL="0" - NORM_MODE="2" ;; - * ) - LANG_IS_RTL="0" - NORM_MODE="1" ;; - esac -} - -#============================================================================= -# END of Language specific info -#============================================================================= diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/ligature_table.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/ligature_table.cpp deleted file mode 100644 index 38b18dce..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/ligature_table.cpp +++ /dev/null @@ -1,194 +0,0 @@ -/********************************************************************** - * File: ligature_table.cpp - * Description: Class for adding and removing optional latin ligatures, - * conditional on codepoint support by a specified font - * (if specified). - * Author: Ranjith Unnikrishnan - * Created: Mon Nov 18 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -#include "ligature_table.h" - -#include - -#include "pango_font_info.h" -#include "tlog.h" -#include "unichar.h" -#include "unicharset.h" -#include "unicode/errorcode.h" // from libicu -#include "unicode/normlzr.h" // from libicu -#include "unicode/unistr.h" // from libicu -#include "unicode/utypes.h" // from libicu - -namespace tesseract { - -static std::string EncodeAsUTF8(const char32 ch32) { - UNICHAR uni_ch(ch32); - return std::string(uni_ch.utf8(), uni_ch.utf8_len()); -} - -// Range of optional latin ligature characters in Unicode to build ligatures -// from. Note that this range does not contain the custom ligatures that we -// encode in the private use area. -const int kMinLigature = 0xfb00; -const int kMaxLigature = 0xfb17; // Don't put the wide Hebrew letters in. - -/* static */ -std::unique_ptr LigatureTable::instance_; - -/* static */ -LigatureTable* LigatureTable::Get() { - if (instance_ == nullptr) { - instance_.reset(new LigatureTable()); - instance_->Init(); - } - return instance_.get(); -} - -LigatureTable::LigatureTable() : min_lig_length_(0), max_lig_length_(0), - min_norm_length_(0), max_norm_length_(0) {} - -void LigatureTable::Init() { - if (norm_to_lig_table_.empty()) { - for (char32 lig = kMinLigature; lig <= kMaxLigature; ++lig) { - // For each char in the range, convert to utf8, nfkc normalize, and if - // the strings are different put the both mappings in the hash_maps. - std::string lig8 = EncodeAsUTF8(lig); - icu::UnicodeString unicode_lig8(static_cast(lig)); - icu::UnicodeString normed8_result; - icu::ErrorCode status; - icu::Normalizer::normalize(unicode_lig8, UNORM_NFKC, 0, normed8_result, - status); - std::string normed8; - normed8_result.toUTF8String(normed8); - // The icu::Normalizer maps the "LONG S T" ligature to "st". Correct that - // here manually so that AddLigatures() will work as desired. - if (lig8 == "\uFB05") - normed8 = "ſt"; - int lig_length = lig8.length(); - int norm_length = normed8.size(); - if (normed8 != lig8 && lig_length > 1 && norm_length > 1) { - norm_to_lig_table_[normed8] = lig8; - lig_to_norm_table_[lig8] = normed8; - if (min_lig_length_ == 0 || lig_length < min_lig_length_) - min_lig_length_ = lig_length; - if (lig_length > max_lig_length_) - max_lig_length_ = lig_length; - if (min_norm_length_ == 0 || norm_length < min_norm_length_) - min_norm_length_ = norm_length; - if (norm_length > max_norm_length_) - max_norm_length_ = norm_length; - } - } - // Add custom extra ligatures. - for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) { - norm_to_lig_table_[UNICHARSET::kCustomLigatures[i][0]] = - UNICHARSET::kCustomLigatures[i][1]; - int norm_length = strlen(UNICHARSET::kCustomLigatures[i][0]); - if (min_norm_length_ == 0 || norm_length < min_norm_length_) - min_norm_length_ = norm_length; - if (norm_length > max_norm_length_) - max_norm_length_ = norm_length; - - lig_to_norm_table_[UNICHARSET::kCustomLigatures[i][1]] = - UNICHARSET::kCustomLigatures[i][0]; - } - } -} - -std::string LigatureTable::RemoveLigatures(const std::string& str) const { - std::string result; - UNICHAR::const_iterator it_begin = UNICHAR::begin(str.c_str(), str.length()); - UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(), str.length()); - char tmp[5]; - int len; - for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) { - len = it.get_utf8(tmp); - tmp[len] = '\0'; - LigHash::const_iterator lig_it = lig_to_norm_table_.find(tmp); - if (lig_it != lig_to_norm_table_.end()) { - result += lig_it->second; - } else { - result += tmp; - } - } - return result; -} - -std::string LigatureTable::RemoveCustomLigatures(const std::string& str) const { - std::string result; - UNICHAR::const_iterator it_begin = UNICHAR::begin(str.c_str(), str.length()); - UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(), str.length()); - char tmp[5]; - int len; - int norm_ind; - for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) { - len = it.get_utf8(tmp); - tmp[len] = '\0'; - norm_ind = -1; - for (int i = 0; - UNICHARSET::kCustomLigatures[i][0] != nullptr && norm_ind < 0; ++i) { - if (!strcmp(tmp, UNICHARSET::kCustomLigatures[i][1])) { - norm_ind = i; - } - } - if (norm_ind >= 0) { - result += UNICHARSET::kCustomLigatures[norm_ind][0]; - } else { - result += tmp; - } - } - return result; -} - -std::string LigatureTable::AddLigatures(const std::string& str, - const PangoFontInfo* font) const { - std::string result; - int len = str.size(); - int step = 0; - int i = 0; - for (i = 0; i < len - min_norm_length_ + 1; i += step) { - step = 0; - for (int liglen = max_norm_length_; liglen >= min_norm_length_; --liglen) { - if (i + liglen <= len) { - std::string lig_cand = str.substr(i, liglen); - LigHash::const_iterator it = norm_to_lig_table_.find(lig_cand); - if (it != norm_to_lig_table_.end()) { - tlog(3, "Considering %s -> %s\n", lig_cand.c_str(), - it->second.c_str()); - if (font) { - // Test for renderability. - if (!font->CanRenderString(it->second.data(), it->second.length())) - continue; // Not renderable - } - // Found a match so convert it. - step = liglen; - result += it->second; - tlog(2, "Substituted %s -> %s\n", lig_cand.c_str(), - it->second.c_str()); - break; - } - } - } - if (step == 0) { - result += str[i]; - step = 1; - } - } - result += str.substr(i, len - i); - return result; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/ligature_table.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/ligature_table.h deleted file mode 100644 index 725432c0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/ligature_table.h +++ /dev/null @@ -1,80 +0,0 @@ -/********************************************************************** - * File: ligature_table.h - * Description: Class for adding and removing optional latin ligatures, - * conditional on codepoint support by a specified font - * (if specified). - * Author: Ranjith Unnikrishnan - * Created: Mon Nov 18 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -#ifndef TRAININGDATA_LIGATURE_TABLE_H_ -#define TRAININGDATA_LIGATURE_TABLE_H_ - -#include -#include -#include - -#include "util.h" - -namespace tesseract { - -class PangoFontInfo; // defined in pango_font_info.h - -// Map to substitute strings for ligatures. -using LigHash = std::unordered_map; - -class LigatureTable { - public: - // Get a static instance of this class. - static LigatureTable* Get(); - - // Convert the utf8 string so that ligaturizable sequences, such as "fi" get - // replaced by the (utf8 code for) appropriate ligature characters. Only do so - // if the corresponding ligature character is renderable in the current font. - std::string AddLigatures(const std::string& str, const PangoFontInfo* font) const; - // Remove all ligatures. - std::string RemoveLigatures(const std::string& str) const; - // Remove only custom ligatures (eg. "ct") encoded in the private-use-area. - std::string RemoveCustomLigatures(const std::string& str) const; - - const LigHash& norm_to_lig_table() const { - return norm_to_lig_table_; - } - const LigHash& lig_to_norm_table() const { - return lig_to_norm_table_; - } - - protected: - LigatureTable(); - // Initialize the hash tables mapping between ligature strings and the - // corresponding ligature characters. - void Init(); - - static std::unique_ptr instance_; - LigHash norm_to_lig_table_; - LigHash lig_to_norm_table_; - int min_lig_length_; - int max_lig_length_; - int min_norm_length_; - int max_norm_length_; - - private: - LigatureTable(const LigatureTable&); - void operator=(const LigatureTable&); -}; - -} // namespace tesseract - -#endif // OCR_TRAININGDATA_TYPESETTING_LIGATURE_TABLE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lstmeval.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lstmeval.cpp deleted file mode 100644 index 3492a14c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lstmeval.cpp +++ /dev/null @@ -1,82 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: lstmeval.cpp -// Description: Evaluation program for LSTM-based networks. -// Author: Ray Smith -// Created: Wed Nov 23 12:20:06 PST 2016 -// -// (C) Copyright 2016, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifdef GOOGLE_TESSERACT -#include "base/commandlineflags.h" -#endif -#include "commontraining.h" -#include "genericvector.h" -#include "lstmtester.h" -#include "strngs.h" -#include "tprintf.h" - -STRING_PARAM_FLAG(model, "", "Name of model file (training or recognition)"); -STRING_PARAM_FLAG(traineddata, "", - "If model is a training checkpoint, then traineddata must " - "be the traineddata file that was given to the trainer"); -STRING_PARAM_FLAG(eval_listfile, "", - "File listing sample files in lstmf training format."); -INT_PARAM_FLAG(max_image_MB, 2000, "Max memory to use for images."); -INT_PARAM_FLAG(verbosity, 1, - "Amount of diagnosting information to output (0-2)."); - -int main(int argc, char **argv) { - tesseract::CheckSharedLibraryVersion(); - ParseArguments(&argc, &argv); - if (FLAGS_model.empty()) { - tprintf("Must provide a --model!\n"); - return 1; - } - if (FLAGS_eval_listfile.empty()) { - tprintf("Must provide a --eval_listfile!\n"); - return 1; - } - tesseract::TessdataManager mgr; - if (!mgr.Init(FLAGS_model.c_str())) { - if (FLAGS_traineddata.empty()) { - tprintf("Must supply --traineddata to eval a training checkpoint!\n"); - return 1; - } - tprintf("%s is not a recognition model, trying training checkpoint...\n", - FLAGS_model.c_str()); - if (!mgr.Init(FLAGS_traineddata.c_str())) { - tprintf("Failed to load language model from %s!\n", - FLAGS_traineddata.c_str()); - return 1; - } - GenericVector model_data; - if (!tesseract::LoadDataFromFile(FLAGS_model.c_str(), &model_data)) { - tprintf("Failed to load model from: %s\n", FLAGS_model.c_str()); - return 1; - } - mgr.OverwriteEntry(tesseract::TESSDATA_LSTM, &model_data[0], - model_data.size()); - } - tesseract::LSTMTester tester(static_cast(FLAGS_max_image_MB) * - 1048576); - if (!tester.LoadAllEvalData(FLAGS_eval_listfile.c_str())) { - tprintf("Failed to load eval data from: %s\n", FLAGS_eval_listfile.c_str()); - return 1; - } - double errs = 0.0; - STRING result = - tester.RunEvalSync(0, &errs, mgr, - /*training_stage (irrelevant)*/ 0, FLAGS_verbosity); - tprintf("%s\n", result.string()); - return 0; -} /* main */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lstmtester.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lstmtester.cpp deleted file mode 100644 index c20a2d9e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lstmtester.cpp +++ /dev/null @@ -1,158 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: lstmtester.cpp -// Description: Top-level line evaluation class for LSTM-based networks. -// Author: Ray Smith -// Created: Wed Nov 23 11:18:06 PST 2016 -// -// (C) Copyright 2016, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#include "lstmtester.h" -#include "genericvector.h" - -namespace tesseract { - -LSTMTester::LSTMTester(int64_t max_memory) - : test_data_(max_memory), total_pages_(0), async_running_(false) {} - -// Loads a set of lstmf files that were created using the lstm.train config to -// tesseract into memory ready for testing. Returns false if nothing was -// loaded. The arg is a filename of a file that lists the filenames. -bool LSTMTester::LoadAllEvalData(const STRING& filenames_file) { - GenericVector filenames; - if (!LoadFileLinesToStrings(filenames_file, &filenames)) { - tprintf("Failed to load list of eval filenames from %s\n", - filenames_file.string()); - return false; - } - return LoadAllEvalData(filenames); -} - -// Loads a set of lstmf files that were created using the lstm.train config to -// tesseract into memory ready for testing. Returns false if nothing was -// loaded. -bool LSTMTester::LoadAllEvalData(const GenericVector& filenames) { - test_data_.Clear(); - bool result = test_data_.LoadDocuments(filenames, CS_SEQUENTIAL, nullptr); - total_pages_ = test_data_.TotalPages(); - return result; -} - -// Runs an evaluation asynchronously on the stored data and returns a string -// describing the results of the previous test. -STRING LSTMTester::RunEvalAsync(int iteration, const double* training_errors, - const TessdataManager& model_mgr, - int training_stage) { - STRING result; - if (total_pages_ == 0) { - result.add_str_int("No test data at iteration", iteration); - return result; - } - if (!LockIfNotRunning()) { - result.add_str_int("Previous test incomplete, skipping test at iteration", - iteration); - return result; - } - // Save the args. - STRING prev_result = test_result_; - test_result_ = ""; - if (training_errors != nullptr) { - test_iteration_ = iteration; - test_training_errors_ = training_errors; - test_model_mgr_ = model_mgr; - test_training_stage_ = training_stage; - SVSync::StartThread(&LSTMTester::ThreadFunc, this); - } else { - UnlockRunning(); - } - return prev_result; -} - -// Runs an evaluation synchronously on the stored data and returns a string -// describing the results. -STRING LSTMTester::RunEvalSync(int iteration, const double* training_errors, - const TessdataManager& model_mgr, - int training_stage, int verbosity) { - LSTMTrainer trainer; - trainer.InitCharSet(model_mgr); - TFile fp; - if (!model_mgr.GetComponent(TESSDATA_LSTM, &fp) || - !trainer.DeSerialize(&model_mgr, &fp)) { - return "Deserialize failed"; - } - int eval_iteration = 0; - double char_error = 0.0; - double word_error = 0.0; - int error_count = 0; - while (error_count < total_pages_) { - const ImageData* trainingdata = test_data_.GetPageBySerial(eval_iteration); - trainer.SetIteration(++eval_iteration); - NetworkIO fwd_outputs, targets; - Trainability result = - trainer.PrepareForBackward(trainingdata, &fwd_outputs, &targets); - if (result != UNENCODABLE) { - char_error += trainer.NewSingleError(tesseract::ET_CHAR_ERROR); - word_error += trainer.NewSingleError(tesseract::ET_WORD_RECERR); - ++error_count; - if (verbosity > 1 || (verbosity > 0 && result != PERFECT)) { - tprintf("Truth:%s\n", trainingdata->transcription().string()); - GenericVector ocr_labels; - GenericVector xcoords; - trainer.LabelsFromOutputs(fwd_outputs, &ocr_labels, &xcoords); - STRING ocr_text = trainer.DecodeLabels(ocr_labels); - tprintf("OCR :%s\n", ocr_text.string()); - } - } - } - char_error *= 100.0 / total_pages_; - word_error *= 100.0 / total_pages_; - STRING result; - result.add_str_int("At iteration ", iteration); - result.add_str_int(", stage ", training_stage); - result.add_str_double(", Eval Char error rate=", char_error); - result.add_str_double(", Word error rate=", word_error); - return result; -} - -// Static helper thread function for RunEvalAsync, with a specific signature -// required by SVSync::StartThread. Actually a member function pretending to -// be static, its arg is a this pointer that it will cast back to LSTMTester* -// to call RunEvalSync using the stored args that RunEvalAsync saves in *this. -// LockIfNotRunning must have returned true before calling ThreadFunc, and -// it will call UnlockRunning to release the lock after RunEvalSync completes. -/* static */ -void* LSTMTester::ThreadFunc(void* lstmtester_void) { - LSTMTester* lstmtester = static_cast(lstmtester_void); - lstmtester->test_result_ = lstmtester->RunEvalSync( - lstmtester->test_iteration_, lstmtester->test_training_errors_, - lstmtester->test_model_mgr_, lstmtester->test_training_stage_, - /*verbosity*/ 0); - lstmtester->UnlockRunning(); - return lstmtester_void; -} - -// Returns true if there is currently nothing running, and takes the lock -// if there is nothing running. -bool LSTMTester::LockIfNotRunning() { - SVAutoLock lock(&running_mutex_); - if (async_running_) return false; - async_running_ = true; - return true; -} - -// Releases the running lock. -void LSTMTester::UnlockRunning() { - SVAutoLock lock(&running_mutex_); - async_running_ = false; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lstmtester.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lstmtester.h deleted file mode 100644 index 56c04d72..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lstmtester.h +++ /dev/null @@ -1,95 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: lstmtester.h -// Description: Top-level line evaluation class for LSTM-based networks. -// Author: Ray Smith -// Created: Wed Nov 23 11:05:06 PST 2016 -// -// (C) Copyright 2016, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TRAINING_LSTMTESTER_H_ -#define TESSERACT_TRAINING_LSTMTESTER_H_ - -#include "genericvector.h" -#include "lstmtrainer.h" -#include "strngs.h" -#include "svutil.h" - -namespace tesseract { - -class LSTMTester { - public: - LSTMTester(int64_t max_memory); - - // Loads a set of lstmf files that were created using the lstm.train config to - // tesseract into memory ready for testing. Returns false if nothing was - // loaded. The arg is a filename of a file that lists the filenames, with one - // name per line. Conveniently, tesstrain.sh generates such a file, along - // with the files themselves. - bool LoadAllEvalData(const STRING& filenames_file); - // Loads a set of lstmf files that were created using the lstm.train config to - // tesseract into memory ready for testing. Returns false if nothing was - // loaded. - bool LoadAllEvalData(const GenericVector& filenames); - - // Runs an evaluation asynchronously on the stored eval data and returns a - // string describing the results of the previous test. Args match TestCallback - // declared in lstmtrainer.h: - // iteration: Current learning iteration number. - // training_errors: If not null, is an array of size ET_COUNT, indexed by - // the ErrorTypes enum and indicates the current errors measured by the - // trainer, and this is a serious request to run an evaluation. If null, - // then the caller is just polling for the results of the previous eval. - // model_data: is the model to evaluate, which should be a serialized - // LSTMTrainer. - // training_stage: an arbitrary number on the progress of training. - STRING RunEvalAsync(int iteration, const double* training_errors, - const TessdataManager& model_mgr, int training_stage); - // Runs an evaluation synchronously on the stored eval data and returns a - // string describing the results. Args as RunEvalAsync, except verbosity, - // which outputs errors, if 1, or all results if 2. - STRING RunEvalSync(int iteration, const double* training_errors, - const TessdataManager& model_mgr, int training_stage, - int verbosity); - - private: - // Static helper thread function for RunEvalAsync, with a specific signature - // required by SVSync::StartThread. Actually a member function pretending to - // be static, its arg is a this pointer that it will cast back to LSTMTester* - // to call RunEvalSync using the stored args that RunEvalAsync saves in *this. - // LockIfNotRunning must have returned true before calling ThreadFunc, and - // it will call UnlockRunning to release the lock after RunEvalSync completes. - static void* ThreadFunc(void* lstmtester_void); - // Returns true if there is currently nothing running, and takes the lock - // if there is nothing running. - bool LockIfNotRunning(); - // Releases the running lock. - void UnlockRunning(); - - // The data to test with. - DocumentCache test_data_; - int total_pages_; - // Flag that indicates an asynchronous test is currently running. - // Protected by running_mutex_. - bool async_running_; - SVMutex running_mutex_; - // Stored copies of the args for use while running asynchronously. - int test_iteration_; - const double* test_training_errors_; - TessdataManager test_model_mgr_; - int test_training_stage_; - STRING test_result_; -}; - -} // namespace tesseract - -#endif // TESSERACT_TRAINING_LSTMTESTER_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lstmtraining.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lstmtraining.cpp deleted file mode 100644 index d3eb7747..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/lstmtraining.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: lstmtraining.cpp -// Description: Training program for LSTM-based networks. -// Author: Ray Smith -// Created: Fri May 03 11:05:06 PST 2013 -// -// (C) Copyright 2013, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifdef GOOGLE_TESSERACT -#include "base/commandlineflags.h" -#endif -#include -#include "commontraining.h" -#include "lstmtester.h" -#include "lstmtrainer.h" -#include "params.h" -#include "strngs.h" -#include "tprintf.h" -#include "unicharset_training_utils.h" - -INT_PARAM_FLAG(debug_interval, 0, "How often to display the alignment."); -STRING_PARAM_FLAG(net_spec, "", "Network specification"); -INT_PARAM_FLAG(net_mode, 192, "Controls network behavior."); -INT_PARAM_FLAG(perfect_sample_delay, 0, - "How many imperfect samples between perfect ones."); -DOUBLE_PARAM_FLAG(target_error_rate, 0.01, "Final error rate in percent."); -DOUBLE_PARAM_FLAG(weight_range, 0.1, "Range of initial random weights."); -DOUBLE_PARAM_FLAG(learning_rate, 10.0e-4, "Weight factor for new deltas."); -DOUBLE_PARAM_FLAG(momentum, 0.5, "Decay factor for repeating deltas."); -DOUBLE_PARAM_FLAG(adam_beta, 0.999, "Decay factor for repeating deltas."); -INT_PARAM_FLAG(max_image_MB, 6000, "Max memory to use for images."); -STRING_PARAM_FLAG(continue_from, "", "Existing model to extend"); -STRING_PARAM_FLAG(model_output, "lstmtrain", "Basename for output models"); -STRING_PARAM_FLAG(train_listfile, "", - "File listing training files in lstmf training format."); -STRING_PARAM_FLAG(eval_listfile, "", - "File listing eval files in lstmf training format."); -BOOL_PARAM_FLAG(stop_training, false, - "Just convert the training model to a runtime model."); -BOOL_PARAM_FLAG(convert_to_int, false, - "Convert the recognition model to an integer model."); -BOOL_PARAM_FLAG(sequential_training, false, - "Use the training files sequentially instead of round-robin."); -INT_PARAM_FLAG(append_index, -1, "Index in continue_from Network at which to" - " attach the new network defined by net_spec"); -BOOL_PARAM_FLAG(debug_network, false, - "Get info on distribution of weight values"); -INT_PARAM_FLAG(max_iterations, 0, "If set, exit after this many iterations"); -STRING_PARAM_FLAG(traineddata, "", - "Combined Dawgs/Unicharset/Recoder for language model"); -STRING_PARAM_FLAG(old_traineddata, "", - "When changing the character set, this specifies the old" - " character set that is to be replaced"); -BOOL_PARAM_FLAG(randomly_rotate, false, - "Train OSD and randomly turn training samples upside-down"); - -// Number of training images to train between calls to MaintainCheckpoints. -const int kNumPagesPerBatch = 100; - -// Apart from command-line flags, input is a collection of lstmf files, that -// were previously created using tesseract with the lstm.train config file. -// The program iterates over the inputs, feeding the data to the network, -// until the error rate reaches a specified target or max_iterations is reached. -int main(int argc, char **argv) { - tesseract::CheckSharedLibraryVersion(); - ParseArguments(&argc, &argv); - if (FLAGS_model_output.empty()) { - tprintf("Must provide a --model_output!\n"); - return EXIT_FAILURE; - } - if (FLAGS_traineddata.empty()) { - tprintf("Must provide a --traineddata see training wiki\n"); - return EXIT_FAILURE; - } - - // Check write permissions. - STRING test_file = FLAGS_model_output.c_str(); - test_file += "_wtest"; - FILE* f = fopen(test_file.c_str(), "wb"); - if (f != nullptr) { - fclose(f); - if (remove(test_file.c_str()) != 0) { - tprintf("Error, failed to remove %s: %s\n", - test_file.c_str(), strerror(errno)); - return EXIT_FAILURE; - } - } else { - tprintf("Error, model output cannot be written: %s\n", strerror(errno)); - return EXIT_FAILURE; - } - - // Setup the trainer. - STRING checkpoint_file = FLAGS_model_output.c_str(); - checkpoint_file += "_checkpoint"; - STRING checkpoint_bak = checkpoint_file + ".bak"; - tesseract::LSTMTrainer trainer( - nullptr, nullptr, nullptr, nullptr, FLAGS_model_output.c_str(), - checkpoint_file.c_str(), FLAGS_debug_interval, - static_cast(FLAGS_max_image_MB) * 1048576); - trainer.InitCharSet(FLAGS_traineddata.c_str()); - - // Reading something from an existing model doesn't require many flags, - // so do it now and exit. - if (FLAGS_stop_training || FLAGS_debug_network) { - if (!trainer.TryLoadingCheckpoint(FLAGS_continue_from.c_str(), nullptr)) { - tprintf("Failed to read continue from: %s\n", - FLAGS_continue_from.c_str()); - return EXIT_FAILURE; - } - if (FLAGS_debug_network) { - trainer.DebugNetwork(); - } else { - if (FLAGS_convert_to_int) trainer.ConvertToInt(); - if (!trainer.SaveTraineddata(FLAGS_model_output.c_str())) { - tprintf("Failed to write recognition model : %s\n", - FLAGS_model_output.c_str()); - } - } - return EXIT_SUCCESS; - } - - // Get the list of files to process. - if (FLAGS_train_listfile.empty()) { - tprintf("Must supply a list of training filenames! --train_listfile\n"); - return EXIT_FAILURE; - } - GenericVector filenames; - if (!tesseract::LoadFileLinesToStrings(FLAGS_train_listfile.c_str(), - &filenames)) { - tprintf("Failed to load list of training filenames from %s\n", - FLAGS_train_listfile.c_str()); - return EXIT_FAILURE; - } - - // Checkpoints always take priority if they are available. - if (trainer.TryLoadingCheckpoint(checkpoint_file.string(), nullptr) || - trainer.TryLoadingCheckpoint(checkpoint_bak.string(), nullptr)) { - tprintf("Successfully restored trainer from %s\n", - checkpoint_file.string()); - } else { - if (!FLAGS_continue_from.empty()) { - // Load a past model file to improve upon. - if (!trainer.TryLoadingCheckpoint(FLAGS_continue_from.c_str(), - FLAGS_append_index >= 0 - ? FLAGS_continue_from.c_str() - : FLAGS_old_traineddata.c_str())) { - tprintf("Failed to continue from: %s\n", FLAGS_continue_from.c_str()); - return EXIT_FAILURE; - } - tprintf("Continuing from %s\n", FLAGS_continue_from.c_str()); - trainer.InitIterations(); - } - if (FLAGS_continue_from.empty() || FLAGS_append_index >= 0) { - if (FLAGS_append_index >= 0) { - tprintf("Appending a new network to an old one!!"); - if (FLAGS_continue_from.empty()) { - tprintf("Must set --continue_from for appending!\n"); - return EXIT_FAILURE; - } - } - // We are initializing from scratch. - if (!trainer.InitNetwork(FLAGS_net_spec.c_str(), FLAGS_append_index, - FLAGS_net_mode, FLAGS_weight_range, - FLAGS_learning_rate, FLAGS_momentum, - FLAGS_adam_beta)) { - tprintf("Failed to create network from spec: %s\n", - FLAGS_net_spec.c_str()); - return EXIT_FAILURE; - } - trainer.set_perfect_delay(FLAGS_perfect_sample_delay); - } - } - if (!trainer.LoadAllTrainingData(filenames, - FLAGS_sequential_training - ? tesseract::CS_SEQUENTIAL - : tesseract::CS_ROUND_ROBIN, - FLAGS_randomly_rotate)) { - tprintf("Load of images failed!!\n"); - return EXIT_FAILURE; - } - - tesseract::LSTMTester tester(static_cast(FLAGS_max_image_MB) * - 1048576); - tesseract::TestCallback tester_callback = nullptr; - if (!FLAGS_eval_listfile.empty()) { - if (!tester.LoadAllEvalData(FLAGS_eval_listfile.c_str())) { - tprintf("Failed to load eval data from: %s\n", - FLAGS_eval_listfile.c_str()); - return EXIT_FAILURE; - } - tester_callback = - NewPermanentTessCallback(&tester, &tesseract::LSTMTester::RunEvalAsync); - } - do { - // Train a few. - int iteration = trainer.training_iteration(); - for (int target_iteration = iteration + kNumPagesPerBatch; - iteration < target_iteration && - (iteration < FLAGS_max_iterations || FLAGS_max_iterations == 0); - iteration = trainer.training_iteration()) { - trainer.TrainOnLine(&trainer, false); - } - STRING log_str; - trainer.MaintainCheckpoints(tester_callback, &log_str); - tprintf("%s\n", log_str.string()); - } while (trainer.best_error_rate() > FLAGS_target_error_rate && - (trainer.training_iteration() < FLAGS_max_iterations || - FLAGS_max_iterations == 0)); - delete tester_callback; - tprintf("Finished! Error rate = %g\n", trainer.best_error_rate()); - return EXIT_SUCCESS; -} /* main */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/merge_unicharsets.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/merge_unicharsets.cpp deleted file mode 100644 index f5facbda..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/merge_unicharsets.cpp +++ /dev/null @@ -1,58 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: merge_unicharsets.cpp -// Description: Simple tool to merge two or more unicharsets. -// Author: Ray Smith -// Created: Wed Sep 30 16:09:01 PDT 2015 -// -// (C) Copyright 2015, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "commontraining.h" // CheckSharedLibraryVersion -#include "unicharset.h" - -int main(int argc, char** argv) { - tesseract::CheckSharedLibraryVersion(); - - if (argc > 1 && (!strcmp(argv[1], "-v") || !strcmp(argv[1], "--version"))) { - printf("%s\n", tesseract::TessBaseAPI::Version()); - return 0; - } else if (argc < 4) { - // Print usage - printf("Usage: %s -v | --version |\n" - " %s unicharset-in-1 ... unicharset-in-n unicharset-out\n", - argv[0], argv[0]); - return 1; - } - - UNICHARSET input_unicharset, result_unicharset; - for (int arg = 1; arg < argc - 1; ++arg) { - // Load the input unicharset - if (input_unicharset.load_from_file(argv[arg])) { - printf("Loaded unicharset of size %d from file %s\n", - input_unicharset.size(), argv[arg]); - result_unicharset.AppendOtherUnicharset(input_unicharset); - } else { - printf("Failed to load unicharset from file %s!!\n", argv[arg]); - exit(1); - } - } - - // Save the combined unicharset. - if (result_unicharset.save_to_file(argv[argc - 1])) { - printf("Wrote unicharset file %s.\n", argv[argc - 1]); - } else { - printf("Cannot save unicharset file %s.\n", argv[argc - 1]); - exit(1); - } - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/mergenf.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/mergenf.cpp deleted file mode 100644 index 029f029c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/mergenf.cpp +++ /dev/null @@ -1,328 +0,0 @@ -/****************************************************************************** -** Filename: MergeNF.c -** Purpose: Program for merging similar nano-feature protos -** Author: Dan Johnson -** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. -******************************************************************************/ -#include "mergenf.h" -#include "host.h" -#include "clusttool.h" -#include "cluster.h" -#include "oldlist.h" -#include "protos.h" -#include "ocrfeatures.h" -#include "featdefs.h" -#include "intproto.h" -#include "params.h" - -#include -#include -#include -#include - -/*-------------------once in subfeat---------------------------------*/ -double_VAR(training_angle_match_scale, 1.0, "Angle Match Scale ..."); - -double_VAR(training_similarity_midpoint, 0.0075, "Similarity Midpoint ..."); - -double_VAR(training_similarity_curl, 2.0, "Similarity Curl ..."); - -/*-----------------------------once in fasttrain----------------------------------*/ -double_VAR(training_tangent_bbox_pad, 0.5, "Tangent bounding box pad ..."); - -double_VAR(training_orthogonal_bbox_pad, 2.5, "Orthogonal bounding box pad ..."); - -double_VAR(training_angle_pad, 45.0, "Angle pad ..."); - -/** - * Compare protos p1 and p2 and return an estimate of the - * worst evidence rating that will result for any part of p1 - * that is compared to p2. In other words, if p1 were broken - * into pico-features and each pico-feature was matched to p2, - * what is the worst evidence rating that will be achieved for - * any pico-feature. - * - * @param p1, p2 protos to be compared - * - * Globals: none - * - * @return Worst possible result when matching p1 to p2. - */ -float CompareProtos(PROTO p1, PROTO p2) { - FEATURE Feature; - float WorstEvidence = WORST_EVIDENCE; - float Evidence; - float Angle, Length; - - /* if p1 and p2 are not close in length, don't let them match */ - Length = fabs (p1->Length - p2->Length); - if (Length > MAX_LENGTH_MISMATCH) - return (0.0); - - /* create a dummy pico-feature to be used for comparisons */ - Feature = NewFeature (&PicoFeatDesc); - Feature->Params[PicoFeatDir] = p1->Angle; - - /* convert angle to radians */ - Angle = p1->Angle * 2.0 * M_PI; - - /* find distance from center of p1 to 1/2 picofeat from end */ - Length = p1->Length / 2.0 - GetPicoFeatureLength () / 2.0; - if (Length < 0) Length = 0; - - /* set the dummy pico-feature at one end of p1 and match it to p2 */ - Feature->Params[PicoFeatX] = p1->X + cos (Angle) * Length; - Feature->Params[PicoFeatY] = p1->Y + sin (Angle) * Length; - if (DummyFastMatch (Feature, p2)) { - Evidence = SubfeatureEvidence (Feature, p2); - if (Evidence < WorstEvidence) - WorstEvidence = Evidence; - } else { - FreeFeature(Feature); - return 0.0; - } - - /* set the dummy pico-feature at the other end of p1 and match it to p2 */ - Feature->Params[PicoFeatX] = p1->X - cos (Angle) * Length; - Feature->Params[PicoFeatY] = p1->Y - sin (Angle) * Length; - if (DummyFastMatch (Feature, p2)) { - Evidence = SubfeatureEvidence (Feature, p2); - if (Evidence < WorstEvidence) - WorstEvidence = Evidence; - } else { - FreeFeature(Feature); - return 0.0; - } - - FreeFeature (Feature); - return (WorstEvidence); - -} /* CompareProtos */ - -/** - * This routine computes a proto which is the weighted - * average of protos p1 and p2. The new proto is returned - * in MergedProto. - * - * @param p1, p2 protos to be merged - * @param w1, w2 weight of each proto - * @param MergedProto place to put resulting merged proto - * - * Globals: none - * - * @return none (results are returned in MergedProto) - */ -void ComputeMergedProto (PROTO p1, - PROTO p2, - float w1, - float w2, - PROTO MergedProto) { - float TotalWeight; - - TotalWeight = w1 + w2; - w1 /= TotalWeight; - w2 /= TotalWeight; - - MergedProto->X = p1->X * w1 + p2->X * w2; - MergedProto->Y = p1->Y * w1 + p2->Y * w2; - MergedProto->Length = p1->Length * w1 + p2->Length * w2; - MergedProto->Angle = p1->Angle * w1 + p2->Angle * w2; - FillABC(MergedProto); -} /* ComputeMergedProto */ - -/** - * This routine searches through all of the prototypes in - * Class and returns the id of the proto which would provide - * the best approximation of Prototype. If no close - * approximation can be found, NO_PROTO is returned. - * - * @param Class class to search for matching old proto in - * @param NumMerged # of protos merged into each proto of Class - * @param Prototype new proto to find match for - * - * Globals: none - * - * @return Id of closest proto in Class or NO_PROTO. - */ -int FindClosestExistingProto(CLASS_TYPE Class, int NumMerged[], - PROTOTYPE *Prototype) { - PROTO_STRUCT NewProto; - PROTO_STRUCT MergedProto; - int Pid; - PROTO Proto; - int BestProto; - float BestMatch; - float Match, OldMatch, NewMatch; - - MakeNewFromOld (&NewProto, Prototype); - - BestProto = NO_PROTO; - BestMatch = WORST_MATCH_ALLOWED; - for (Pid = 0; Pid < Class->NumProtos; Pid++) { - Proto = ProtoIn(Class, Pid); - ComputeMergedProto(Proto, &NewProto, - (float) NumMerged[Pid], 1.0, &MergedProto); - OldMatch = CompareProtos(Proto, &MergedProto); - NewMatch = CompareProtos(&NewProto, &MergedProto); - Match = std::min(OldMatch, NewMatch); - if (Match > BestMatch) { - BestProto = Pid; - BestMatch = Match; - } - } - return BestProto; -} /* FindClosestExistingProto */ - -/** - * This fills in the fields of the New proto based on the - * fields of the Old proto. - * - * @param New new proto to be filled in - * @param Old old proto to be converted - * - * Globals: none - */ -void MakeNewFromOld(PROTO New, PROTOTYPE *Old) { - New->X = CenterX(Old->Mean); - New->Y = CenterY(Old->Mean); - New->Length = LengthOf(Old->Mean); - New->Angle = OrientationOf(Old->Mean); - FillABC(New); -} /* MakeNewFromOld */ - -/*-------------------once in subfeat---------------------------------*/ - -/** - * @name SubfeatureEvidence - * - * Compare a feature to a prototype. Print the result. - */ -float SubfeatureEvidence(FEATURE Feature, PROTO Proto) { - float Distance; - float Dangle; - - Dangle = Proto->Angle - Feature->Params[PicoFeatDir]; - if (Dangle < -0.5) Dangle += 1.0; - if (Dangle > 0.5) Dangle -= 1.0; - Dangle *= training_angle_match_scale; - - Distance = Proto->A * Feature->Params[PicoFeatX] + - Proto->B * Feature->Params[PicoFeatY] + - Proto->C; - - return (EvidenceOf (Distance * Distance + Dangle * Dangle)); -} - -/** - * @name EvidenceOf - * - * Return the new type of evidence number corresponding to this - * distance value. This number is no longer based on the chi squared - * approximation. The equation that represents the transform is: - * 1 / (1 + (sim / midpoint) ^ curl) - */ -double EvidenceOf (double Similarity) { - - Similarity /= training_similarity_midpoint; - - if (training_similarity_curl == 3) - Similarity = Similarity * Similarity * Similarity; - else if (training_similarity_curl == 2) - Similarity = Similarity * Similarity; - else - Similarity = pow (Similarity, training_similarity_curl); - - return (1.0 / (1.0 + Similarity)); -} - -/** - * This routine returns true if Feature would be matched - * by a fast match table built from Proto. - * - * @param Feature feature to be "fast matched" to proto - * @param Proto proto being "fast matched" against - * - * Globals: - * - training_tangent_bbox_pad bounding box pad tangent to proto - * - training_orthogonal_bbox_pad bounding box pad orthogonal to proto - * - * @return true if feature could match Proto. - */ -bool DummyFastMatch(FEATURE Feature, PROTO Proto) -{ - FRECT BoundingBox; - float MaxAngleError; - float AngleError; - - MaxAngleError = training_angle_pad / 360.0; - AngleError = fabs (Proto->Angle - Feature->Params[PicoFeatDir]); - if (AngleError > 0.5) - AngleError = 1.0 - AngleError; - - if (AngleError > MaxAngleError) - return false; - - ComputePaddedBoundingBox (Proto, - training_tangent_bbox_pad * GetPicoFeatureLength (), - training_orthogonal_bbox_pad * GetPicoFeatureLength (), - &BoundingBox); - - return PointInside(&BoundingBox, Feature->Params[PicoFeatX], - Feature->Params[PicoFeatY]); -} /* DummyFastMatch */ - -/** - * This routine computes a bounding box that encloses the - * specified proto along with some padding. The - * amount of padding is specified as separate distances - * in the tangential and orthogonal directions. - * - * @param Proto proto to compute bounding box for - * @param TangentPad amount of pad to add in direction of segment - * @param OrthogonalPad amount of pad to add orthogonal to segment - * @param[out] BoundingBox place to put results - * - * Globals: none - * - * @return none (results are returned in BoundingBox) - */ -void ComputePaddedBoundingBox (PROTO Proto, float TangentPad, - float OrthogonalPad, FRECT *BoundingBox) { - float Length = Proto->Length / 2.0 + TangentPad; - float Angle = Proto->Angle * 2.0 * M_PI; - float CosOfAngle = fabs(cos(Angle)); - float SinOfAngle = fabs(sin(Angle)); - - float Pad = std::max(CosOfAngle * Length, SinOfAngle * OrthogonalPad); - BoundingBox->MinX = Proto->X - Pad; - BoundingBox->MaxX = Proto->X + Pad; - - Pad = std::max(SinOfAngle * Length, CosOfAngle * OrthogonalPad); - BoundingBox->MinY = Proto->Y - Pad; - BoundingBox->MaxY = Proto->Y + Pad; - -} /* ComputePaddedBoundingBox */ - -/** - * Return true if point (X,Y) is inside of Rectangle. - * - * Globals: none - * - * @return true if point (X,Y) is inside of Rectangle. - */ -bool PointInside(FRECT *Rectangle, float X, float Y) { - return (X >= Rectangle->MinX) && - (X <= Rectangle->MaxX) && - (Y >= Rectangle->MinY) && - (Y <= Rectangle->MaxY); -} /* PointInside */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/mergenf.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/mergenf.h deleted file mode 100644 index 84e0f2d7..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/mergenf.h +++ /dev/null @@ -1,79 +0,0 @@ -/****************************************************************************** - ** Filename: MergeNF.c - ** Purpose: Program for merging similar nano-feature protos - ** Author: Dan Johnson - ** History: Wed Nov 21 09:55:23 1990, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - *****************************************************************************/ - -#ifndef TESSERACT_TRAINING_MERGENF_H_ -#define TESSERACT_TRAINING_MERGENF_H_ - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "callcpp.h" -#include "cluster.h" -#include "ocrfeatures.h" -#include "picofeat.h" -#include "protos.h" - -#define WORST_MATCH_ALLOWED (0.9) -#define WORST_EVIDENCE (1.0) -#define MAX_LENGTH_MISMATCH (2.0 * GetPicoFeatureLength()) - -#define PROTO_SUFFIX ".mf.p" -#define CONFIG_SUFFIX ".cl" -#define NO_PROTO (-1) -#define XPOSITION 0 -#define YPOSITION 1 -#define MFLENGTH 2 -#define ORIENTATION 3 - -typedef struct { - float MinX, MaxX, MinY, MaxY; -} FRECT; - -/**---------------------------------------------------------------------------- - Public Macros -----------------------------------------------------------------------------**/ -#define CenterX(M) ((M)[XPOSITION]) -#define CenterY(M) ((M)[YPOSITION]) -#define LengthOf(M) ((M)[MFLENGTH]) -#define OrientationOf(M) ((M)[ORIENTATION]) - -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ -float CompareProtos(PROTO p1, PROTO p2); - -void ComputeMergedProto(PROTO p1, PROTO p2, float w1, float w2, - PROTO MergedProto); - -int FindClosestExistingProto(CLASS_TYPE Class, int NumMerged[], - PROTOTYPE* Prototype); - -void MakeNewFromOld(PROTO New, PROTOTYPE* Old); - -float SubfeatureEvidence(FEATURE Feature, PROTO Proto); - -double EvidenceOf(double Similarity); - -bool DummyFastMatch(FEATURE Feature, PROTO Proto); - -void ComputePaddedBoundingBox(PROTO Proto, float TangentPad, - float OrthogonalPad, FRECT* BoundingBox); - -bool PointInside(FRECT* Rectangle, float X, float Y); - -#endif // TESSERACT_TRAINING_MERGENF_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/mftraining.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/mftraining.cpp deleted file mode 100644 index 56b51373..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/mftraining.cpp +++ /dev/null @@ -1,295 +0,0 @@ -/****************************************************************************** - ** Filename: mftraining.c - ** Purpose: Separates training pages into files for each character. - ** Strips from files only the features and there parameters of - ** the feature type mf. - ** Author: Dan Johnson - ** Revisment: Christy Russon - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. -******************************************************************************/ -/*---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------*/ -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include -#include -#define _USE_MATH_DEFINES -#include - -#include "classify.h" -#include "cluster.h" -#include "clusttool.h" -#include "commontraining.h" -#include "emalloc.h" -#include "featdefs.h" -#include "fontinfo.h" -#include "genericvector.h" -#include "indexmapbidi.h" -#include "intproto.h" -#include "mastertrainer.h" -#include "mergenf.h" -#include "mf.h" -#include "ocrfeatures.h" -#include "oldlist.h" -#include "protos.h" -#include "shapetable.h" -#include "tessopt.h" -#include "tprintf.h" -#include "unicity_table.h" - -using tesseract::IndexMapBiDi; -using tesseract::MasterTrainer; -using tesseract::Shape; -using tesseract::ShapeTable; - -#define PROGRAM_FEATURE_TYPE "mf" - -// Max length of a fake shape label. -const int kMaxShapeLabelLength = 10; - -DECLARE_STRING_PARAM_FLAG(test_ch); - -/*---------------------------------------------------------------------------- - Public Code ------------------------------------------------------------------------------*/ -#ifndef GRAPHICS_DISABLED -static void DisplayProtoList(const char* ch, LIST protolist) { - void* window = c_create_window("Char samples", 50, 200, - 520, 520, -130.0, 130.0, -130.0, 130.0); - LIST proto = protolist; - iterate(proto) { - PROTOTYPE* prototype = reinterpret_cast(first_node(proto)); - if (prototype->Significant) - c_line_color_index(window, Green); - else if (prototype->NumSamples == 0) - c_line_color_index(window, Blue); - else if (prototype->Merged) - c_line_color_index(window, Magenta); - else - c_line_color_index(window, Red); - float x = CenterX(prototype->Mean); - float y = CenterY(prototype->Mean); - double angle = OrientationOf(prototype->Mean) * 2 * M_PI; - float dx = static_cast(LengthOf(prototype->Mean) * cos(angle) / 2); - float dy = static_cast(LengthOf(prototype->Mean) * sin(angle) / 2); - c_move(window, (x - dx) * 256, (y - dy) * 256); - c_draw(window, (x + dx) * 256, (y + dy) * 256); - if (prototype->Significant) - tprintf("Green proto at (%g,%g)+(%g,%g) %d samples\n", - x, y, dx, dy, prototype->NumSamples); - else if (prototype->NumSamples > 0 && !prototype->Merged) - tprintf("Red proto at (%g,%g)+(%g,%g) %d samples\n", - x, y, dx, dy, prototype->NumSamples); - } - c_make_current(window); -} -#endif // GRAPHICS_DISABLED - -// Helper to run clustering on a single config. -// Mostly copied from the old mftraining, but with renamed variables. -static LIST ClusterOneConfig(int shape_id, const char* class_label, - LIST mf_classes, - const ShapeTable& shape_table, - MasterTrainer* trainer) { - int num_samples; - CLUSTERER *clusterer = trainer->SetupForClustering(shape_table, - feature_defs, - shape_id, - &num_samples); - Config.MagicSamples = num_samples; - LIST proto_list = ClusterSamples(clusterer, &Config); - CleanUpUnusedData(proto_list); - - // Merge protos where reasonable to make more of them significant by - // representing almost all samples of the class/font. - MergeInsignificantProtos(proto_list, class_label, clusterer, &Config); - #ifndef GRAPHICS_DISABLED - if (strcmp(FLAGS_test_ch.c_str(), class_label) == 0) - DisplayProtoList(FLAGS_test_ch.c_str(), proto_list); - #endif // GRAPHICS_DISABLED - // Delete the protos that will not be used in the inttemp output file. - proto_list = RemoveInsignificantProtos(proto_list, true, - false, - clusterer->SampleSize); - FreeClusterer(clusterer); - MERGE_CLASS merge_class = FindClass(mf_classes, class_label); - if (merge_class == nullptr) { - merge_class = NewLabeledClass(class_label); - mf_classes = push(mf_classes, merge_class); - } - int config_id = AddConfigToClass(merge_class->Class); - merge_class->Class->font_set.push_back(shape_id); - LIST proto_it = proto_list; - iterate(proto_it) { - PROTOTYPE* prototype = reinterpret_cast(first_node(proto_it)); - // See if proto can be approximated by existing proto. - int p_id = FindClosestExistingProto(merge_class->Class, - merge_class->NumMerged, prototype); - if (p_id == NO_PROTO) { - // Need to make a new proto, as it doesn't match anything. - p_id = AddProtoToClass(merge_class->Class); - MakeNewFromOld(ProtoIn(merge_class->Class, p_id), prototype); - merge_class->NumMerged[p_id] = 1; - } else { - PROTO_STRUCT dummy_proto; - MakeNewFromOld(&dummy_proto, prototype); - // Merge with the similar proto. - ComputeMergedProto(ProtoIn(merge_class->Class, p_id), &dummy_proto, - static_cast(merge_class->NumMerged[p_id]), - 1.0, - ProtoIn(merge_class->Class, p_id)); - merge_class->NumMerged[p_id]++; - } - AddProtoToConfig(p_id, merge_class->Class->Configurations[config_id]); - } - FreeProtoList(&proto_list); - return mf_classes; -} - -// Helper to setup the config map. -// Setup an index mapping from the shapes in the shape table to the classes -// that will be trained. In keeping with the original design, each shape -// with the same list of unichars becomes a different class and the configs -// represent the different combinations of fonts. -static void SetupConfigMap(ShapeTable* shape_table, IndexMapBiDi* config_map) { - int num_configs = shape_table->NumShapes(); - config_map->Init(num_configs, true); - config_map->Setup(); - for (int c1 = 0; c1 < num_configs; ++c1) { - // Only process ids that are not already merged. - if (config_map->SparseToCompact(c1) == c1) { - Shape* shape1 = shape_table->MutableShape(c1); - // Find all the subsequent shapes that are equal. - for (int c2 = c1 + 1; c2 < num_configs; ++c2) { - if (shape_table->MutableShape(c2)->IsEqualUnichars(shape1)) { - config_map->Merge(c1, c2); - } - } - } - } - config_map->CompleteMerges(); -} - -/** - * This program reads in a text file consisting of feature - * samples from a training page in the following format: - * @verbatim - FontName UTF8-char-str xmin ymin xmax ymax page-number - NumberOfFeatureTypes(N) - FeatureTypeName1 NumberOfFeatures(M) - Feature1 - ... - FeatureM - FeatureTypeName2 NumberOfFeatures(M) - Feature1 - ... - FeatureM - ... - FeatureTypeNameN NumberOfFeatures(M) - Feature1 - ... - FeatureM - FontName CharName ... - @endverbatim - * The result of this program is a binary inttemp file used by - * the OCR engine. - * @param argc number of command line arguments - * @param argv array of command line arguments - * @return 0 if no error occurred - */ -int main (int argc, char **argv) { - tesseract::CheckSharedLibraryVersion(); - - ParseArguments(&argc, &argv); - - ShapeTable* shape_table = nullptr; - STRING file_prefix; - // Load the training data. - MasterTrainer* trainer = tesseract::LoadTrainingData(argc, argv, - false, - &shape_table, - &file_prefix); - if (trainer == nullptr) return 1; // Failed. - - // Setup an index mapping from the shapes in the shape table to the classes - // that will be trained. In keeping with the original design, each shape - // with the same list of unichars becomes a different class and the configs - // represent the different combinations of fonts. - IndexMapBiDi config_map; - SetupConfigMap(shape_table, &config_map); - - WriteShapeTable(file_prefix, *shape_table); - // If the shape_table is flat, then either we didn't run shape clustering, or - // it did nothing, so we just output the trainer's unicharset. - // Otherwise shape_set will hold a fake unicharset with an entry for each - // shape in the shape table, and we will output that instead. - UNICHARSET shape_set; - const UNICHARSET* unicharset = &trainer->unicharset(); - // If we ran shapeclustering (and it worked) then at least one shape will - // have multiple unichars, so we have to build a fake unicharset. - if (shape_table->AnyMultipleUnichars()) { - unicharset = &shape_set; - // Now build a fake unicharset for the compact shape space to keep the - // output modules happy that we are doing things correctly. - int num_shapes = config_map.CompactSize(); - for (int s = 0; s < num_shapes; ++s) { - char shape_label[kMaxShapeLabelLength + 1]; - snprintf(shape_label, kMaxShapeLabelLength, "sh%04d", s); - shape_set.unichar_insert(shape_label); - } - } - - // Now train each config separately. - int num_configs = shape_table->NumShapes(); - LIST mf_classes = NIL_LIST; - for (int s = 0; s < num_configs; ++s) { - int unichar_id, font_id; - if (unicharset == &shape_set) { - // Using fake unichar_ids from the config_map/shape_set. - unichar_id = config_map.SparseToCompact(s); - } else { - // Get the real unichar_id from the shape table/unicharset. - shape_table->GetFirstUnicharAndFont(s, &unichar_id, &font_id); - } - const char* class_label = unicharset->id_to_unichar(unichar_id); - mf_classes = ClusterOneConfig(s, class_label, mf_classes, *shape_table, - trainer); - } - STRING inttemp_file = file_prefix; - inttemp_file += "inttemp"; - STRING pffmtable_file = file_prefix; - pffmtable_file += "pffmtable"; - CLASS_STRUCT* float_classes = SetUpForFloat2Int(*unicharset, mf_classes); - // Now write the inttemp and pffmtable. - trainer->WriteInttempAndPFFMTable(trainer->unicharset(), *unicharset, - *shape_table, float_classes, - inttemp_file.string(), - pffmtable_file.string()); - for (int c = 0; c < unicharset->size(); ++c) { - FreeClassFields(&float_classes[c]); - } - delete [] float_classes; - FreeLabeledClassList(mf_classes); - delete trainer; - delete shape_table; - printf("Done!\n"); - if (!FLAGS_test_ch.empty()) { - // If we are displaying debug window(s), wait for the user to look at them. - printf("Hit return to exit...\n"); - while (getchar() != '\n'); - } - return 0; -} /* main */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/normstrngs.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/normstrngs.cpp deleted file mode 100644 index 759284b2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/normstrngs.cpp +++ /dev/null @@ -1,304 +0,0 @@ -/********************************************************************** - * File: normstrngs.cpp - * Description: Utilities to normalize and manipulate UTF-32 and - * UTF-8 strings. - * Author: Ranjith Unnikrishnan - * Created: Thu July 4 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -#include "normstrngs.h" - -#include -#include -#include - -#include "errcode.h" -#include "icuerrorcode.h" -#include "unichar.h" -#include "unicode/normalizer2.h" // From libicu -#include "unicode/translit.h" // From libicu -#include "unicode/uchar.h" // From libicu -#include "unicode/unorm2.h" // From libicu -#include "unicode/uscript.h" // From libicu - -namespace tesseract { - -static bool is_hyphen_punc(const char32 ch) { - static const int kNumHyphenPuncUnicodes = 13; - static const char32 kHyphenPuncUnicodes[kNumHyphenPuncUnicodes] = { - '-', 0x2010, 0x2011, 0x2012, - 0x2013, 0x2014, 0x2015, // hyphen..horizontal bar - 0x207b, // superscript minus - 0x208b, // subscript minus - 0x2212, // minus sign - 0xfe58, // small em dash - 0xfe63, // small hyphen-minus - 0xff0d, // fullwidth hyphen-minus - }; - for (int i = 0; i < kNumHyphenPuncUnicodes; ++i) { - if (kHyphenPuncUnicodes[i] == ch) return true; - } - return false; -} - -static bool is_single_quote(const char32 ch) { - static const int kNumSingleQuoteUnicodes = 8; - static const char32 kSingleQuoteUnicodes[kNumSingleQuoteUnicodes] = { - '\'', '`', - 0x2018, // left single quotation mark (English, others) - 0x2019, // right single quotation mark (Danish, Finnish, Swedish, Norw.) - // We may have to introduce a comma set with 0x201a - 0x201B, // single high-reveresed-9 quotation mark (PropList.txt) - 0x2032, // prime - 0x300C, // left corner bracket (East Asian languages) - 0xFF07, // fullwidth apostrophe - }; - for (int i = 0; i < kNumSingleQuoteUnicodes; ++i) { - if (kSingleQuoteUnicodes[i] == ch) return true; - } - return false; -} - -static bool is_double_quote(const char32 ch) { - static const int kNumDoubleQuoteUnicodes = 8; - static const char32 kDoubleQuoteUnicodes[kNumDoubleQuoteUnicodes] = { - '"', - 0x201C, // left double quotation mark (English, others) - 0x201D, // right double quotation mark (Danish, Finnish, Swedish, Norw.) - 0x201F, // double high-reversed-9 quotation mark (PropList.txt) - 0x2033, // double prime - 0x301D, // reversed double prime quotation mark (East Asian langs, - // horiz.) - 0x301E, // close double prime (East Asian languages written horizontally) - 0xFF02, // fullwidth quotation mark - }; - for (int i = 0; i < kNumDoubleQuoteUnicodes; ++i) { - if (kDoubleQuoteUnicodes[i] == ch) return true; - } - return false; -} - -// Helper runs a standard unicode normalization, optional OCR normalization, -// and leaves the result as char32 for subsequent processing. -static void NormalizeUTF8ToUTF32(UnicodeNormMode u_mode, OCRNorm ocr_normalize, - const char* str8, - std::vector* normed32) { - // Convert to ICU string for unicode normalization. - icu::UnicodeString uch_str(str8, "UTF-8"); - IcuErrorCode error_code; - // Convert the enum to the new weird icu representation. - const char* norm_type = - u_mode == UnicodeNormMode::kNFKD || u_mode == UnicodeNormMode::kNFKC - ? "nfkc" - : "nfc"; - UNormalization2Mode compose = - u_mode == UnicodeNormMode::kNFC || u_mode == UnicodeNormMode::kNFKC - ? UNORM2_COMPOSE - : UNORM2_DECOMPOSE; - // Pointer to singleton does not require deletion. - const icu::Normalizer2* normalizer = - icu::Normalizer2::getInstance(nullptr, norm_type, compose, error_code); - error_code.assertSuccess(); - error_code.reset(); - icu::UnicodeString norm_str = normalizer->normalize(uch_str, error_code); - error_code.assertSuccess(); - // Convert to char32 for output. OCR normalization if required. - normed32->reserve(norm_str.length()); // An approximation. - for (int offset = 0; offset < norm_str.length(); - offset = norm_str.moveIndex32(offset, 1)) { - char32 ch = norm_str.char32At(offset); - // Skip all ZWS, RTL and LTR marks. - if (Validator::IsZeroWidthMark(ch)) continue; - if (ocr_normalize == OCRNorm::kNormalize) ch = OCRNormalize(ch); - normed32->push_back(ch); - } -} - -// Helper removes joiners from strings that contain no letters. -static void StripJoiners(std::vector* str32) { - for (char32 ch : *str32) { - if (u_isalpha(ch)) return; - } - int len = 0; - for (char32 ch : *str32) { - if (ch != Validator::kZeroWidthJoiner && - ch != Validator::kZeroWidthNonJoiner) { - (*str32)[len++] = ch; - } - } - str32->resize(len); -} - -// Normalizes a UTF8 string according to the given modes. Returns true on -// success. If false is returned, some failure or invalidity was present, and -// the result string is produced on a "best effort" basis. -bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, - GraphemeNorm grapheme_normalize, const char* str8, - std::string* normalized) { - std::vector normed32; - NormalizeUTF8ToUTF32(u_mode, ocr_normalize, str8, &normed32); - if (grapheme_normalize == GraphemeNorm::kNormalize) { - StripJoiners(&normed32); - std::vector> graphemes; - bool success = Validator::ValidateCleanAndSegment( - GraphemeNormMode::kSingleString, false, normed32, &graphemes); - if (graphemes.empty() || graphemes[0].empty()) { - success = false; - } else if (normalized != nullptr) { - *normalized = UNICHAR::UTF32ToUTF8(graphemes[0]); - } - return success; - } - if (normalized != nullptr) *normalized = UNICHAR::UTF32ToUTF8(normed32); - return true; -} - -// Normalizes a UTF8 string according to the given modes and splits into -// graphemes according to g_mode. Returns true on success. If false is returned, -// some failure or invalidity was present, and the result string is produced on -// a "best effort" basis. -bool NormalizeCleanAndSegmentUTF8(UnicodeNormMode u_mode, OCRNorm ocr_normalize, - GraphemeNormMode g_mode, bool report_errors, - const char* str8, - std::vector* graphemes) { - std::vector normed32; - NormalizeUTF8ToUTF32(u_mode, ocr_normalize, str8, &normed32); - StripJoiners(&normed32); - std::vector> graphemes32; - bool success = Validator::ValidateCleanAndSegment(g_mode, report_errors, - normed32, &graphemes32); - if (g_mode != GraphemeNormMode::kSingleString && success) { - // If we modified the string to clean it up, the segmentation may not be - // correct, so check for changes and do it again. - std::vector cleaned32; - for (const auto& g : graphemes32) { - cleaned32.insert(cleaned32.end(), g.begin(), g.end()); - } - if (cleaned32 != normed32) { - graphemes32.clear(); - success = Validator::ValidateCleanAndSegment(g_mode, report_errors, - cleaned32, &graphemes32); - } - } - graphemes->clear(); - graphemes->reserve(graphemes32.size()); - for (const auto& grapheme : graphemes32) { - graphemes->push_back(UNICHAR::UTF32ToUTF8(grapheme)); - } - return success; -} - -// Apply just the OCR-specific normalizations and return the normalized char. -char32 OCRNormalize(char32 ch) { - if (is_hyphen_punc(ch)) - return '-'; - else if (is_single_quote(ch)) - return '\''; - else if (is_double_quote(ch)) - return '"'; - return ch; -} - -bool IsOCREquivalent(char32 ch1, char32 ch2) { - return OCRNormalize(ch1) == OCRNormalize(ch2); -} - -bool IsValidCodepoint(const char32 ch) { - // In the range [0, 0xD800) or [0xE000, 0x10FFFF] - return (static_cast(ch) < 0xD800) || (ch >= 0xE000 && ch <= 0x10FFFF); -} - -bool IsWhitespace(const char32 ch) { - ASSERT_HOST_MSG(IsValidCodepoint(ch), "Invalid Unicode codepoint: 0x%x\n", - ch); - return u_isUWhiteSpace(static_cast(ch)); -} - -bool IsUTF8Whitespace(const char* text) { - return SpanUTF8Whitespace(text) == strlen(text); -} - -unsigned int SpanUTF8Whitespace(const char* text) { - int n_white = 0; - for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text)); - it != UNICHAR::end(text, strlen(text)); ++it) { - if (!IsWhitespace(*it)) break; - n_white += it.utf8_len(); - } - return n_white; -} - -unsigned int SpanUTF8NotWhitespace(const char* text) { - int n_notwhite = 0; - for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text)); - it != UNICHAR::end(text, strlen(text)); ++it) { - if (IsWhitespace(*it)) break; - n_notwhite += it.utf8_len(); - } - return n_notwhite; -} - -bool IsInterchangeValid(const char32 ch) { - return IsValidCodepoint(ch) && - !(ch >= 0xFDD0 && ch <= 0xFDEF) && // Noncharacters. - !(ch >= 0xFFFE && ch <= 0xFFFF) && !(ch >= 0x1FFFE && ch <= 0x1FFFF) && - !(ch >= 0x2FFFE && ch <= 0x2FFFF) && - !(ch >= 0x3FFFE && ch <= 0x3FFFF) && - !(ch >= 0x4FFFE && ch <= 0x4FFFF) && - !(ch >= 0x5FFFE && ch <= 0x5FFFF) && - !(ch >= 0x6FFFE && ch <= 0x6FFFF) && - !(ch >= 0x7FFFE && ch <= 0x7FFFF) && - !(ch >= 0x8FFFE && ch <= 0x8FFFF) && - !(ch >= 0x9FFFE && ch <= 0x9FFFF) && - !(ch >= 0xAFFFE && ch <= 0xAFFFF) && - !(ch >= 0xBFFFE && ch <= 0xBFFFF) && - !(ch >= 0xCFFFE && ch <= 0xCFFFF) && - !(ch >= 0xDFFFE && ch <= 0xDFFFF) && - !(ch >= 0xEFFFE && ch <= 0xEFFFF) && - !(ch >= 0xFFFFE && ch <= 0xFFFFF) && - !(ch >= 0x10FFFE && ch <= 0x10FFFF) && - (!u_isISOControl(static_cast(ch)) || ch == '\n' || - ch == '\f' || ch == '\t' || ch == '\r'); -} - -bool IsInterchangeValid7BitAscii(const char32 ch) { - return IsValidCodepoint(ch) && ch <= 128 && - (!u_isISOControl(static_cast(ch)) || ch == '\n' || - ch == '\f' || ch == '\t' || ch == '\r'); -} - -char32 FullwidthToHalfwidth(const char32 ch) { - // Return unchanged if not in the fullwidth-halfwidth Unicode block. - if (ch < 0xFF00 || ch > 0xFFEF || !IsValidCodepoint(ch)) { - if (ch != 0x3000) return ch; - } - // Special case for fullwidth left and right "white parentheses". - if (ch == 0xFF5F) return 0x2985; - if (ch == 0xFF60) return 0x2986; - // Construct a full-to-half width transliterator. - IcuErrorCode error_code; - icu::UnicodeString uch_str(static_cast(ch)); - const icu::Transliterator* fulltohalf = icu::Transliterator::createInstance( - "Fullwidth-Halfwidth", UTRANS_FORWARD, error_code); - error_code.assertSuccess(); - error_code.reset(); - - fulltohalf->transliterate(uch_str); - delete fulltohalf; - ASSERT_HOST(uch_str.length() != 0); - return uch_str[0]; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/normstrngs.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/normstrngs.h deleted file mode 100644 index b7caefc9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/normstrngs.h +++ /dev/null @@ -1,103 +0,0 @@ -/********************************************************************** - * File: normstrngs.h - * Description: Utilities to normalize and manipulate UTF-32 and - * UTF-8 strings. - * Author: Ranjith Unnikrishnan - * Created: Thu July 4 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_CCUTIL_NORMSTRNGS_H_ -#define TESSERACT_CCUTIL_NORMSTRNGS_H_ - -#include -#include - -#include "validator.h" - -namespace tesseract { - -// The standard unicode normalizations. -enum class UnicodeNormMode { - kNFD, - kNFC, - kNFKD, - kNFKC, -}; - -// To normalize away differences in punctuation that are ambiguous, like -// curly quotes and different widths of dash. -enum class OCRNorm { - kNone, - kNormalize, -}; - -// To validate and normalize away some subtle differences that can occur in -// Indic scripts, eg ensuring that an explicit virama is always followed by -// a zero-width non-joiner. -enum class GraphemeNorm { - kNone, - kNormalize, -}; - -// Normalizes a UTF8 string according to the given modes. Returns true on -// success. If false is returned, some failure or invalidity was present, and -// the result string is produced on a "best effort" basis. -bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, - GraphemeNorm grapheme_normalize, const char* str8, - std::string* normalized); -// Normalizes a UTF8 string according to the given modes and splits into -// graphemes according to g_mode. Returns true on success. If false is returned, -// some failure or invalidity was present, and the result string is produced on -// a "best effort" basis. -bool NormalizeCleanAndSegmentUTF8(UnicodeNormMode u_mode, OCRNorm ocr_normalize, - GraphemeNormMode g_mode, bool report_errors, - const char* str8, - std::vector* graphemes); - -// Applies just the OCR-specific normalizations and return the normalized char. -char32 OCRNormalize(char32 ch); - -// Returns true if the OCRNormalized ch1 and ch2 are the same. -bool IsOCREquivalent(char32 ch1, char32 ch2); - -// Returns true if the value lies in the range of valid unicodes. -bool IsValidCodepoint(const char32 ch); - -// Returns true a code point has the White_Space Unicode property. -bool IsWhitespace(const char32 ch); -// Returns true if every char in the given (null-terminated) string has the -// White_Space Unicode property. -bool IsUTF8Whitespace(const char* text); - -// Returns the length of bytes of the prefix of 'text' that have the White_Space -// unicode property. -unsigned int SpanUTF8Whitespace(const char* text); - -// Returns the length of bytes of the prefix of 'text' that DO NOT have the -// White_Space unicode property. -unsigned int SpanUTF8NotWhitespace(const char* text); - -// Returns true if the char is interchange valid i.e. no C0 or C1 control codes -// (other than CR LF HT FF) and no non-characters. -bool IsInterchangeValid(const char32 ch); -// Same as above but restricted to 7-bit ASCII. -bool IsInterchangeValid7BitAscii(const char32 ch); - -// Convert a full-width UTF-8 string to half-width. -char32 FullwidthToHalfwidth(const char32 ch); - -} // namespace tesseract - -#endif // TESSERACT_CCUTIL_NORMSTRNGS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/.cmake/api/v1/query/client-MicrosoftVS/query.json b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/.cmake/api/v1/query/client-MicrosoftVS/query.json deleted file mode 100644 index 308f68dd..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/.cmake/api/v1/query/client-MicrosoftVS/query.json +++ /dev/null @@ -1 +0,0 @@ -{"requests":[{"kind":"cache","version":2},{"kind":"cmakeFiles","version":1},{"kind":"codemodel","version":2}]} \ No newline at end of file diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeCache.txt b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeCache.txt deleted file mode 100644 index c73e17b7..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeCache.txt +++ /dev/null @@ -1,429 +0,0 @@ -# This is the CMakeCache file. -# For build in directory: e:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug -# It was generated by CMake: C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/Common7/IDE/CommonExtensions/Microsoft/CMake/CMake/bin/cmake.exe -# You can edit this file to change values found and used by cmake. -# If you do not want to change any of the values, simply exit the editor. -# If you do want to change a value, simply edit, save, and exit the editor. -# The syntax for the file is as follows: -# KEY:TYPE=VALUE -# KEY is the name of a variable in the cache. -# TYPE is a hint to GUIs for the type of VALUE, DO NOT EDIT TYPE!. -# VALUE is the current value for the KEY. - -######################## -# EXTERNAL cache entries -######################## - -//For backwards compatibility, what version of CMake commands and -// syntax should this version of CMake try to support. -CMAKE_BACKWARDS_COMPATIBILITY:STRING=2.4 - -//Choose the type of build, options are: None Debug Release RelWithDebInfo -// MinSizeRel ... -CMAKE_BUILD_TYPE:STRING=Debug - -//No help, variable specified on the command line. -CMAKE_CXX_COMPILER:FILEPATH=C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/HostX64/x64/cl.exe - -//Flags used by the CXX compiler during all build types. -CMAKE_CXX_FLAGS:STRING=/DWIN32 /D_WINDOWS /W3 /GR /EHsc - -//Flags used by the CXX compiler during DEBUG builds. -CMAKE_CXX_FLAGS_DEBUG:STRING=/MDd /Zi /Ob0 /Od /RTC1 - -//Flags used by the CXX compiler during MINSIZEREL builds. -CMAKE_CXX_FLAGS_MINSIZEREL:STRING=/MD /O1 /Ob1 /DNDEBUG - -//Flags used by the CXX compiler during RELEASE builds. -CMAKE_CXX_FLAGS_RELEASE:STRING=/MD /O2 /Ob2 /DNDEBUG - -//Flags used by the CXX compiler during RELWITHDEBINFO builds. -CMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=/MD /Zi /O2 /Ob1 /DNDEBUG - -//Libraries linked by default with all C++ applications. -CMAKE_CXX_STANDARD_LIBRARIES:STRING=kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib - -//No help, variable specified on the command line. -CMAKE_C_COMPILER:FILEPATH=C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/HostX64/x64/cl.exe - -//Flags used by the C compiler during all build types. -CMAKE_C_FLAGS:STRING=/DWIN32 /D_WINDOWS /W3 - -//Flags used by the C compiler during DEBUG builds. -CMAKE_C_FLAGS_DEBUG:STRING=/MDd /Zi /Ob0 /Od /RTC1 - -//Flags used by the C compiler during MINSIZEREL builds. -CMAKE_C_FLAGS_MINSIZEREL:STRING=/MD /O1 /Ob1 /DNDEBUG - -//Flags used by the C compiler during RELEASE builds. -CMAKE_C_FLAGS_RELEASE:STRING=/MD /O2 /Ob2 /DNDEBUG - -//Flags used by the C compiler during RELWITHDEBINFO builds. -CMAKE_C_FLAGS_RELWITHDEBINFO:STRING=/MD /Zi /O2 /Ob1 /DNDEBUG - -//Libraries linked by default with all C applications. -CMAKE_C_STANDARD_LIBRARIES:STRING=kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib - -//Flags used by the linker during all build types. -CMAKE_EXE_LINKER_FLAGS:STRING=/machine:x64 - -//Flags used by the linker during DEBUG builds. -CMAKE_EXE_LINKER_FLAGS_DEBUG:STRING=/debug /INCREMENTAL - -//Flags used by the linker during MINSIZEREL builds. -CMAKE_EXE_LINKER_FLAGS_MINSIZEREL:STRING=/INCREMENTAL:NO - -//Flags used by the linker during RELEASE builds. -CMAKE_EXE_LINKER_FLAGS_RELEASE:STRING=/INCREMENTAL:NO - -//Flags used by the linker during RELWITHDEBINFO builds. -CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO:STRING=/debug /INCREMENTAL - -//Enable/Disable output of compile commands during generation. -CMAKE_EXPORT_COMPILE_COMMANDS:BOOL=OFF - -//No help, variable specified on the command line. -CMAKE_INSTALL_PREFIX:PATH=E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/install/x64-Debug - -//Path to a program. -CMAKE_LINKER:FILEPATH=C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/Hostx64/x64/link.exe - -//make program -CMAKE_MAKE_PROGRAM:FILEPATH=C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/Common7/IDE/CommonExtensions/Microsoft/CMake/Ninja/ninja.exe - -//Flags used by the linker during the creation of modules during -// all build types. -CMAKE_MODULE_LINKER_FLAGS:STRING=/machine:x64 - -//Flags used by the linker during the creation of modules during -// DEBUG builds. -CMAKE_MODULE_LINKER_FLAGS_DEBUG:STRING=/debug /INCREMENTAL - -//Flags used by the linker during the creation of modules during -// MINSIZEREL builds. -CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL:STRING=/INCREMENTAL:NO - -//Flags used by the linker during the creation of modules during -// RELEASE builds. -CMAKE_MODULE_LINKER_FLAGS_RELEASE:STRING=/INCREMENTAL:NO - -//Flags used by the linker during the creation of modules during -// RELWITHDEBINFO builds. -CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO:STRING=/debug /INCREMENTAL - -//Path to a program. -CMAKE_MT:FILEPATH=C:/Program Files (x86)/Windows Kits/10/bin/10.0.18362.0/x64/mt.exe - -//Value Computed by CMake -CMAKE_PROJECT_DESCRIPTION:STATIC= - -//Value Computed by CMake -CMAKE_PROJECT_HOMEPAGE_URL:STATIC= - -//Value Computed by CMake -CMAKE_PROJECT_NAME:STATIC=Project - -//RC compiler -CMAKE_RC_COMPILER:FILEPATH=C:/Program Files (x86)/Windows Kits/10/bin/10.0.18362.0/x64/rc.exe - -//Flags for Windows Resource Compiler during all build types. -CMAKE_RC_FLAGS:STRING=-DWIN32 - -//Flags for Windows Resource Compiler during DEBUG builds. -CMAKE_RC_FLAGS_DEBUG:STRING=-D_DEBUG - -//Flags for Windows Resource Compiler during MINSIZEREL builds. -CMAKE_RC_FLAGS_MINSIZEREL:STRING= - -//Flags for Windows Resource Compiler during RELEASE builds. -CMAKE_RC_FLAGS_RELEASE:STRING= - -//Flags for Windows Resource Compiler during RELWITHDEBINFO builds. -CMAKE_RC_FLAGS_RELWITHDEBINFO:STRING= - -//Flags used by the linker during the creation of shared libraries -// during all build types. -CMAKE_SHARED_LINKER_FLAGS:STRING=/machine:x64 - -//Flags used by the linker during the creation of shared libraries -// during DEBUG builds. -CMAKE_SHARED_LINKER_FLAGS_DEBUG:STRING=/debug /INCREMENTAL - -//Flags used by the linker during the creation of shared libraries -// during MINSIZEREL builds. -CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL:STRING=/INCREMENTAL:NO - -//Flags used by the linker during the creation of shared libraries -// during RELEASE builds. -CMAKE_SHARED_LINKER_FLAGS_RELEASE:STRING=/INCREMENTAL:NO - -//Flags used by the linker during the creation of shared libraries -// during RELWITHDEBINFO builds. -CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO:STRING=/debug /INCREMENTAL - -//If set, runtime paths are not added when installing shared libraries, -// but are added when building. -CMAKE_SKIP_INSTALL_RPATH:BOOL=NO - -//If set, runtime paths are not added when using shared libraries. -CMAKE_SKIP_RPATH:BOOL=NO - -//Flags used by the linker during the creation of static libraries -// during all build types. -CMAKE_STATIC_LINKER_FLAGS:STRING=/machine:x64 - -//Flags used by the linker during the creation of static libraries -// during DEBUG builds. -CMAKE_STATIC_LINKER_FLAGS_DEBUG:STRING= - -//Flags used by the linker during the creation of static libraries -// during MINSIZEREL builds. -CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL:STRING= - -//Flags used by the linker during the creation of static libraries -// during RELEASE builds. -CMAKE_STATIC_LINKER_FLAGS_RELEASE:STRING= - -//Flags used by the linker during the creation of static libraries -// during RELWITHDEBINFO builds. -CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO:STRING= - -//If this value is on, makefiles will be generated without the -// .SILENT directive, and all commands will be echoed to the console -// during the make. This is useful for debugging only. With Visual -// Studio IDE projects all commands are done without /nologo. -CMAKE_VERBOSE_MAKEFILE:BOOL=FALSE - -//Single output directory for building all executables. -EXECUTABLE_OUTPUT_PATH:PATH= - -//ICU derb executable -ICU_DERB_EXECUTABLE:FILEPATH=E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/derb.exe - -//ICU genbrk executable -ICU_GENBRK_EXECUTABLE:FILEPATH=E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/genbrk.exe - -//ICU genccode executable -ICU_GENCCODE_EXECUTABLE:FILEPATH=E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/genccode.exe - -//ICU gencfu executable -ICU_GENCFU_EXECUTABLE:FILEPATH=E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gencfu.exe - -//ICU gencmn executable -ICU_GENCMN_EXECUTABLE:FILEPATH=E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gencmn.exe - -//ICU gencnval executable -ICU_GENCNVAL_EXECUTABLE:FILEPATH=E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gencnval.exe - -//ICU gendict executable -ICU_GENDICT_EXECUTABLE:FILEPATH=E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gendict.exe - -//ICU gennorm2 executable -ICU_GENNORM2_EXECUTABLE:FILEPATH=E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gennorm2.exe - -//ICU genrb executable -ICU_GENRB_EXECUTABLE:FILEPATH=E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/genrb.exe - -//ICU gensprep executable -ICU_GENSPREP_EXECUTABLE:FILEPATH=E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gensprep.exe - -//ICU i18n library (debug) -ICU_I18N_LIBRARY_DEBUG:FILEPATH=ICU_I18N_LIBRARY_DEBUG-NOTFOUND - -//ICU i18n library (release) -ICU_I18N_LIBRARY_RELEASE:FILEPATH=C:/Program Files (x86)/Windows Kits/10/Lib/10.0.18362.0/um/x64/icuin.Lib - -//ICU icu-config executable -ICU_ICU-CONFIG_EXECUTABLE:FILEPATH=ICU_ICU-CONFIG_EXECUTABLE-NOTFOUND - -//ICU icuinfo executable -ICU_ICUINFO_EXECUTABLE:FILEPATH=ICU_ICUINFO_EXECUTABLE-NOTFOUND - -//ICU icupkg executable -ICU_ICUPKG_EXECUTABLE:FILEPATH=ICU_ICUPKG_EXECUTABLE-NOTFOUND - -//ICU include directory -ICU_INCLUDE_DIR:PATH=ICU_INCLUDE_DIR-NOTFOUND - -//ICU makeconv executable -ICU_MAKECONV_EXECUTABLE:FILEPATH=ICU_MAKECONV_EXECUTABLE-NOTFOUND - -//ICU Makefile.inc data file -ICU_MAKEFILE_INC:FILEPATH=ICU_MAKEFILE_INC-NOTFOUND - -//ICU pkgdata executable -ICU_PKGDATA_EXECUTABLE:FILEPATH=ICU_PKGDATA_EXECUTABLE-NOTFOUND - -//ICU pkgdata.inc data file -ICU_PKGDATA_INC:FILEPATH=ICU_PKGDATA_INC-NOTFOUND - -//ICU uconv executable -ICU_UCONV_EXECUTABLE:FILEPATH=ICU_UCONV_EXECUTABLE-NOTFOUND - -//ICU uc library (debug) -ICU_UC_LIBRARY_DEBUG:FILEPATH=ICU_UC_LIBRARY_DEBUG-NOTFOUND - -//ICU uc library (release) -ICU_UC_LIBRARY_RELEASE:FILEPATH=C:/Program Files (x86)/Windows Kits/10/Lib/10.0.18362.0/um/x64/icuuc.lib - -//Single output directory for building all libraries. -LIBRARY_OUTPUT_PATH:PATH= - -//Value Computed by CMake -Project_BINARY_DIR:STATIC=E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug - -//Value Computed by CMake -Project_SOURCE_DIR:STATIC=E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training - - -######################## -# INTERNAL cache entries -######################## - -//This is the directory where this CMakeCache.txt was created -CMAKE_CACHEFILE_DIR:INTERNAL=e:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug -//Major version of cmake used to create the current loaded cache -CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3 -//Minor version of cmake used to create the current loaded cache -CMAKE_CACHE_MINOR_VERSION:INTERNAL=15 -//Patch version of cmake used to create the current loaded cache -CMAKE_CACHE_PATCH_VERSION:INTERNAL=19101501 -//Path to CMake executable. -CMAKE_COMMAND:INTERNAL=C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/Common7/IDE/CommonExtensions/Microsoft/CMake/CMake/bin/cmake.exe -//Path to cpack program executable. -CMAKE_CPACK_COMMAND:INTERNAL=C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/Common7/IDE/CommonExtensions/Microsoft/CMake/CMake/bin/cpack.exe -//Path to ctest program executable. -CMAKE_CTEST_COMMAND:INTERNAL=C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/Common7/IDE/CommonExtensions/Microsoft/CMake/CMake/bin/ctest.exe -//ADVANCED property for variable: CMAKE_CXX_COMPILER -CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_CXX_FLAGS -CMAKE_CXX_FLAGS-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_CXX_FLAGS_DEBUG -CMAKE_CXX_FLAGS_DEBUG-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_CXX_FLAGS_MINSIZEREL -CMAKE_CXX_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELEASE -CMAKE_CXX_FLAGS_RELEASE-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELWITHDEBINFO -CMAKE_CXX_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_CXX_STANDARD_LIBRARIES -CMAKE_CXX_STANDARD_LIBRARIES-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_C_COMPILER -CMAKE_C_COMPILER-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_C_FLAGS -CMAKE_C_FLAGS-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_C_FLAGS_DEBUG -CMAKE_C_FLAGS_DEBUG-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_C_FLAGS_MINSIZEREL -CMAKE_C_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_C_FLAGS_RELEASE -CMAKE_C_FLAGS_RELEASE-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_C_FLAGS_RELWITHDEBINFO -CMAKE_C_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_C_STANDARD_LIBRARIES -CMAKE_C_STANDARD_LIBRARIES-ADVANCED:INTERNAL=1 -//Executable file format -CMAKE_EXECUTABLE_FORMAT:INTERNAL=Unknown -//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS -CMAKE_EXE_LINKER_FLAGS-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_DEBUG -CMAKE_EXE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_MINSIZEREL -CMAKE_EXE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELEASE -CMAKE_EXE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO -CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_EXPORT_COMPILE_COMMANDS -CMAKE_EXPORT_COMPILE_COMMANDS-ADVANCED:INTERNAL=1 -//Name of external makefile project generator. -CMAKE_EXTRA_GENERATOR:INTERNAL= -//Name of generator. -CMAKE_GENERATOR:INTERNAL=Ninja -//Generator instance identifier. -CMAKE_GENERATOR_INSTANCE:INTERNAL= -//Name of generator platform. -CMAKE_GENERATOR_PLATFORM:INTERNAL= -//Name of generator toolset. -CMAKE_GENERATOR_TOOLSET:INTERNAL= -//Source directory with the top level CMakeLists.txt file for this -// project -CMAKE_HOME_DIRECTORY:INTERNAL=E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training -//ADVANCED property for variable: CMAKE_LINKER -CMAKE_LINKER-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS -CMAKE_MODULE_LINKER_FLAGS-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_DEBUG -CMAKE_MODULE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL -CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELEASE -CMAKE_MODULE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO -CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_MT -CMAKE_MT-ADVANCED:INTERNAL=1 -//number of local generators -CMAKE_NUMBER_OF_MAKEFILES:INTERNAL=1 -//Platform information initialized -CMAKE_PLATFORM_INFO_INITIALIZED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_RC_COMPILER -CMAKE_RC_COMPILER-ADVANCED:INTERNAL=1 -CMAKE_RC_COMPILER_WORKS:INTERNAL=1 -//ADVANCED property for variable: CMAKE_RC_FLAGS -CMAKE_RC_FLAGS-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_RC_FLAGS_DEBUG -CMAKE_RC_FLAGS_DEBUG-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_RC_FLAGS_MINSIZEREL -CMAKE_RC_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_RC_FLAGS_RELEASE -CMAKE_RC_FLAGS_RELEASE-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_RC_FLAGS_RELWITHDEBINFO -CMAKE_RC_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 -//Path to CMake installation. -CMAKE_ROOT:INTERNAL=C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/Common7/IDE/CommonExtensions/Microsoft/CMake/CMake/share/cmake-3.15 -//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS -CMAKE_SHARED_LINKER_FLAGS-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_DEBUG -CMAKE_SHARED_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL -CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELEASE -CMAKE_SHARED_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO -CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_SKIP_INSTALL_RPATH -CMAKE_SKIP_INSTALL_RPATH-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_SKIP_RPATH -CMAKE_SKIP_RPATH-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS -CMAKE_STATIC_LINKER_FLAGS-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_DEBUG -CMAKE_STATIC_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL -CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELEASE -CMAKE_STATIC_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO -CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: CMAKE_VERBOSE_MAKEFILE -CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1 -//Result of TRY_COMPILE -HAVE_SIZEOF_VOID_P:INTERNAL=TRUE -//Have include stddef.h -HAVE_STDDEF_H:INTERNAL=1 -//Have include stdint.h -HAVE_STDINT_H:INTERNAL=1 -//Have include sys/types.h -HAVE_SYS_TYPES_H:INTERNAL=1 -//ADVANCED property for variable: ICU_I18N_LIBRARY_DEBUG -ICU_I18N_LIBRARY_DEBUG-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: ICU_I18N_LIBRARY_RELEASE -ICU_I18N_LIBRARY_RELEASE-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: ICU_UC_LIBRARY_DEBUG -ICU_UC_LIBRARY_DEBUG-ADVANCED:INTERNAL=1 -//ADVANCED property for variable: ICU_UC_LIBRARY_RELEASE -ICU_UC_LIBRARY_RELEASE-ADVANCED:INTERNAL=1 -//CHECK_TYPE_SIZE: sizeof(void *) -SIZEOF_VOID_P:INTERNAL=8 - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeCCompiler.cmake b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeCCompiler.cmake deleted file mode 100644 index 70a0d85d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeCCompiler.cmake +++ /dev/null @@ -1,76 +0,0 @@ -set(CMAKE_C_COMPILER "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/HostX64/x64/cl.exe") -set(CMAKE_C_COMPILER_ARG1 "") -set(CMAKE_C_COMPILER_ID "MSVC") -set(CMAKE_C_COMPILER_VERSION "19.24.28316.0") -set(CMAKE_C_COMPILER_VERSION_INTERNAL "") -set(CMAKE_C_COMPILER_WRAPPER "") -set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "90") -set(CMAKE_C_COMPILE_FEATURES "c_std_90;c_std_99;c_std_11;c_function_prototypes;c_variadic_macros") -set(CMAKE_C90_COMPILE_FEATURES "c_std_90;c_function_prototypes") -set(CMAKE_C99_COMPILE_FEATURES "c_std_99;c_variadic_macros") -set(CMAKE_C11_COMPILE_FEATURES "c_std_11") - -set(CMAKE_C_PLATFORM_ID "Windows") -set(CMAKE_C_SIMULATE_ID "") -set(CMAKE_C_COMPILER_FRONTEND_VARIANT "") -set(CMAKE_C_SIMULATE_VERSION "") -set(CMAKE_C_COMPILER_ARCHITECTURE_ID x64) -set(MSVC_C_ARCHITECTURE_ID x64) - -set(CMAKE_AR "") -set(CMAKE_C_COMPILER_AR "") -set(CMAKE_RANLIB "") -set(CMAKE_C_COMPILER_RANLIB "") -set(CMAKE_LINKER "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/Hostx64/x64/link.exe") -set(CMAKE_MT "C:/Program Files (x86)/Windows Kits/10/bin/10.0.18362.0/x64/mt.exe") -set(CMAKE_COMPILER_IS_GNUCC ) -set(CMAKE_C_COMPILER_LOADED 1) -set(CMAKE_C_COMPILER_WORKS TRUE) -set(CMAKE_C_ABI_COMPILED TRUE) -set(CMAKE_COMPILER_IS_MINGW ) -set(CMAKE_COMPILER_IS_CYGWIN ) -if(CMAKE_COMPILER_IS_CYGWIN) - set(CYGWIN 1) - set(UNIX 1) -endif() - -set(CMAKE_C_COMPILER_ENV_VAR "CC") - -if(CMAKE_COMPILER_IS_MINGW) - set(MINGW 1) -endif() -set(CMAKE_C_COMPILER_ID_RUN 1) -set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m) -set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC) -set(CMAKE_C_LINKER_PREFERENCE 10) - -# Save compiler ABI information. -set(CMAKE_C_SIZEOF_DATA_PTR "8") -set(CMAKE_C_COMPILER_ABI "") -set(CMAKE_C_LIBRARY_ARCHITECTURE "") - -if(CMAKE_C_SIZEOF_DATA_PTR) - set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}") -endif() - -if(CMAKE_C_COMPILER_ABI) - set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}") -endif() - -if(CMAKE_C_LIBRARY_ARCHITECTURE) - set(CMAKE_LIBRARY_ARCHITECTURE "") -endif() - -set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "注意: 包含文件: ") -if(CMAKE_C_CL_SHOWINCLUDES_PREFIX) - set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}") -endif() - - - - - -set(CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "") -set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "") -set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "") -set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeCXXCompiler.cmake b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeCXXCompiler.cmake deleted file mode 100644 index 73602f67..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeCXXCompiler.cmake +++ /dev/null @@ -1,79 +0,0 @@ -set(CMAKE_CXX_COMPILER "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/HostX64/x64/cl.exe") -set(CMAKE_CXX_COMPILER_ARG1 "") -set(CMAKE_CXX_COMPILER_ID "MSVC") -set(CMAKE_CXX_COMPILER_VERSION "19.24.28316.0") -set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "") -set(CMAKE_CXX_COMPILER_WRAPPER "") -set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "14") -set(CMAKE_CXX_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters;cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates;cxx_std_17;cxx_std_20") -set(CMAKE_CXX98_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters") -set(CMAKE_CXX11_COMPILE_FEATURES "cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates") -set(CMAKE_CXX14_COMPILE_FEATURES "cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates") -set(CMAKE_CXX17_COMPILE_FEATURES "cxx_std_17") -set(CMAKE_CXX20_COMPILE_FEATURES "cxx_std_20") - -set(CMAKE_CXX_PLATFORM_ID "Windows") -set(CMAKE_CXX_SIMULATE_ID "") -set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "") -set(CMAKE_CXX_SIMULATE_VERSION "") -set(CMAKE_CXX_COMPILER_ARCHITECTURE_ID x64) -set(MSVC_CXX_ARCHITECTURE_ID x64) - -set(CMAKE_AR "") -set(CMAKE_CXX_COMPILER_AR "") -set(CMAKE_RANLIB "") -set(CMAKE_CXX_COMPILER_RANLIB "") -set(CMAKE_LINKER "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/Hostx64/x64/link.exe") -set(CMAKE_MT "C:/Program Files (x86)/Windows Kits/10/bin/10.0.18362.0/x64/mt.exe") -set(CMAKE_COMPILER_IS_GNUCXX ) -set(CMAKE_CXX_COMPILER_LOADED 1) -set(CMAKE_CXX_COMPILER_WORKS TRUE) -set(CMAKE_CXX_ABI_COMPILED TRUE) -set(CMAKE_COMPILER_IS_MINGW ) -set(CMAKE_COMPILER_IS_CYGWIN ) -if(CMAKE_COMPILER_IS_CYGWIN) - set(CYGWIN 1) - set(UNIX 1) -endif() - -set(CMAKE_CXX_COMPILER_ENV_VAR "CXX") - -if(CMAKE_COMPILER_IS_MINGW) - set(MINGW 1) -endif() -set(CMAKE_CXX_COMPILER_ID_RUN 1) -set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC) -set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;mm;CPP) -set(CMAKE_CXX_LINKER_PREFERENCE 30) -set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1) - -# Save compiler ABI information. -set(CMAKE_CXX_SIZEOF_DATA_PTR "8") -set(CMAKE_CXX_COMPILER_ABI "") -set(CMAKE_CXX_LIBRARY_ARCHITECTURE "") - -if(CMAKE_CXX_SIZEOF_DATA_PTR) - set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}") -endif() - -if(CMAKE_CXX_COMPILER_ABI) - set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}") -endif() - -if(CMAKE_CXX_LIBRARY_ARCHITECTURE) - set(CMAKE_LIBRARY_ARCHITECTURE "") -endif() - -set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "注意: 包含文件: ") -if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX) - set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}") -endif() - - - - - -set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "") -set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "") -set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "") -set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeDetermineCompilerABI_C.bin b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeDetermineCompilerABI_C.bin deleted file mode 100644 index 47d7fa12..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeDetermineCompilerABI_C.bin and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeDetermineCompilerABI_CXX.bin b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeDetermineCompilerABI_CXX.bin deleted file mode 100644 index 4d2b0e8e..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeDetermineCompilerABI_CXX.bin and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeRCCompiler.cmake b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeRCCompiler.cmake deleted file mode 100644 index 97eaaf6d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeRCCompiler.cmake +++ /dev/null @@ -1,6 +0,0 @@ -set(CMAKE_RC_COMPILER "C:/Program Files (x86)/Windows Kits/10/bin/10.0.18362.0/x64/rc.exe") -set(CMAKE_RC_COMPILER_ARG1 "") -set(CMAKE_RC_COMPILER_LOADED 1) -set(CMAKE_RC_SOURCE_FILE_EXTENSIONS rc;RC) -set(CMAKE_RC_OUTPUT_EXTENSION .res) -set(CMAKE_RC_COMPILER_ENV_VAR "RC") diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeSystem.cmake b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeSystem.cmake deleted file mode 100644 index 8a1d2422..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CMakeSystem.cmake +++ /dev/null @@ -1,15 +0,0 @@ -set(CMAKE_HOST_SYSTEM "Windows-10.0.18362") -set(CMAKE_HOST_SYSTEM_NAME "Windows") -set(CMAKE_HOST_SYSTEM_VERSION "10.0.18362") -set(CMAKE_HOST_SYSTEM_PROCESSOR "AMD64") - - - -set(CMAKE_SYSTEM "Windows-10.0.18362") -set(CMAKE_SYSTEM_NAME "Windows") -set(CMAKE_SYSTEM_VERSION "10.0.18362") -set(CMAKE_SYSTEM_PROCESSOR "AMD64") - -set(CMAKE_CROSSCOMPILING "FALSE") - -set(CMAKE_SYSTEM_LOADED 1) diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdC/CMakeCCompilerId.c b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdC/CMakeCCompilerId.c deleted file mode 100644 index 917e8b98..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdC/CMakeCCompilerId.c +++ /dev/null @@ -1,665 +0,0 @@ -#ifdef __cplusplus -# error "A C++ compiler has been selected for C." -#endif - -#if defined(__18CXX) -# define ID_VOID_MAIN -#endif -#if defined(__CLASSIC_C__) -/* cv-qualifiers did not exist in K&R C */ -# define const -# define volatile -#endif - - -/* Version number components: V=Version, R=Revision, P=Patch - Version date components: YYYY=Year, MM=Month, DD=Day */ - -#if defined(__INTEL_COMPILER) || defined(__ICC) -# define COMPILER_ID "Intel" -# if defined(_MSC_VER) -# define SIMULATE_ID "MSVC" -# endif -# if defined(__GNUC__) -# define SIMULATE_ID "GNU" -# endif - /* __INTEL_COMPILER = VRP */ -# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100) -# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10) -# if defined(__INTEL_COMPILER_UPDATE) -# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE) -# else -# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10) -# endif -# if defined(__INTEL_COMPILER_BUILD_DATE) - /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */ -# define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE) -# endif -# if defined(_MSC_VER) - /* _MSC_VER = VVRR */ -# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) -# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) -# endif -# if defined(__GNUC__) -# define SIMULATE_VERSION_MAJOR DEC(__GNUC__) -# elif defined(__GNUG__) -# define SIMULATE_VERSION_MAJOR DEC(__GNUG__) -# endif -# if defined(__GNUC_MINOR__) -# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__) -# endif -# if defined(__GNUC_PATCHLEVEL__) -# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) -# endif - -#elif defined(__PATHCC__) -# define COMPILER_ID "PathScale" -# define COMPILER_VERSION_MAJOR DEC(__PATHCC__) -# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__) -# if defined(__PATHCC_PATCHLEVEL__) -# define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__) -# endif - -#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__) -# define COMPILER_ID "Embarcadero" -# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF) -# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF) -# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF) - -#elif defined(__BORLANDC__) -# define COMPILER_ID "Borland" - /* __BORLANDC__ = 0xVRR */ -# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8) -# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF) - -#elif defined(__WATCOMC__) && __WATCOMC__ < 1200 -# define COMPILER_ID "Watcom" - /* __WATCOMC__ = VVRR */ -# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100) -# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) -# if (__WATCOMC__ % 10) > 0 -# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) -# endif - -#elif defined(__WATCOMC__) -# define COMPILER_ID "OpenWatcom" - /* __WATCOMC__ = VVRP + 1100 */ -# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100) -# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) -# if (__WATCOMC__ % 10) > 0 -# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) -# endif - -#elif defined(__SUNPRO_C) -# define COMPILER_ID "SunPro" -# if __SUNPRO_C >= 0x5100 - /* __SUNPRO_C = 0xVRRP */ -# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>12) -# define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xFF) -# define COMPILER_VERSION_PATCH HEX(__SUNPRO_C & 0xF) -# else - /* __SUNPRO_CC = 0xVRP */ -# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>8) -# define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xF) -# define COMPILER_VERSION_PATCH HEX(__SUNPRO_C & 0xF) -# endif - -#elif defined(__HP_cc) -# define COMPILER_ID "HP" - /* __HP_cc = VVRRPP */ -# define COMPILER_VERSION_MAJOR DEC(__HP_cc/10000) -# define COMPILER_VERSION_MINOR DEC(__HP_cc/100 % 100) -# define COMPILER_VERSION_PATCH DEC(__HP_cc % 100) - -#elif defined(__DECC) -# define COMPILER_ID "Compaq" - /* __DECC_VER = VVRRTPPPP */ -# define COMPILER_VERSION_MAJOR DEC(__DECC_VER/10000000) -# define COMPILER_VERSION_MINOR DEC(__DECC_VER/100000 % 100) -# define COMPILER_VERSION_PATCH DEC(__DECC_VER % 10000) - -#elif defined(__IBMC__) && defined(__COMPILER_VER__) -# define COMPILER_ID "zOS" - /* __IBMC__ = VRP */ -# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100) -# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10) -# define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10) - -#elif defined(__ibmxl__) && defined(__clang__) -# define COMPILER_ID "XLClang" -# define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__) -# define COMPILER_VERSION_MINOR DEC(__ibmxl_release__) -# define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__) -# define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__) - - -#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ >= 800 -# define COMPILER_ID "XL" - /* __IBMC__ = VRP */ -# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100) -# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10) -# define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10) - -#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ < 800 -# define COMPILER_ID "VisualAge" - /* __IBMC__ = VRP */ -# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100) -# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10) -# define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10) - -#elif defined(__PGI) -# define COMPILER_ID "PGI" -# define COMPILER_VERSION_MAJOR DEC(__PGIC__) -# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__) -# if defined(__PGIC_PATCHLEVEL__) -# define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__) -# endif - -#elif defined(_CRAYC) -# define COMPILER_ID "Cray" -# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR) -# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR) - -#elif defined(__TI_COMPILER_VERSION__) -# define COMPILER_ID "TI" - /* __TI_COMPILER_VERSION__ = VVVRRRPPP */ -# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000) -# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000) -# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000) - -#elif defined(__FUJITSU) || defined(__FCC_VERSION) || defined(__fcc_version) -# define COMPILER_ID "Fujitsu" - -#elif defined(__ghs__) -# define COMPILER_ID "GHS" -/* __GHS_VERSION_NUMBER = VVVVRP */ -# ifdef __GHS_VERSION_NUMBER -# define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100) -# define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10) -# define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER % 10) -# endif - -#elif defined(__TINYC__) -# define COMPILER_ID "TinyCC" - -#elif defined(__BCC__) -# define COMPILER_ID "Bruce" - -#elif defined(__SCO_VERSION__) -# define COMPILER_ID "SCO" - -#elif defined(__ARMCC_VERSION) && !defined(__clang__) -# define COMPILER_ID "ARMCC" -#if __ARMCC_VERSION >= 1000000 - /* __ARMCC_VERSION = VRRPPPP */ - # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000) - # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100) - # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) -#else - /* __ARMCC_VERSION = VRPPPP */ - # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000) - # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10) - # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) -#endif - - -#elif defined(__clang__) && defined(__apple_build_version__) -# define COMPILER_ID "AppleClang" -# if defined(_MSC_VER) -# define SIMULATE_ID "MSVC" -# endif -# define COMPILER_VERSION_MAJOR DEC(__clang_major__) -# define COMPILER_VERSION_MINOR DEC(__clang_minor__) -# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) -# if defined(_MSC_VER) - /* _MSC_VER = VVRR */ -# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) -# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) -# endif -# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__) - -#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION) -# define COMPILER_ID "ARMClang" - # define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000) - # define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100) - # define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION % 10000) -# define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION) - -#elif defined(__clang__) -# define COMPILER_ID "Clang" -# if defined(_MSC_VER) -# define SIMULATE_ID "MSVC" -# endif -# define COMPILER_VERSION_MAJOR DEC(__clang_major__) -# define COMPILER_VERSION_MINOR DEC(__clang_minor__) -# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) -# if defined(_MSC_VER) - /* _MSC_VER = VVRR */ -# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) -# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) -# endif - -#elif defined(__GNUC__) -# define COMPILER_ID "GNU" -# define COMPILER_VERSION_MAJOR DEC(__GNUC__) -# if defined(__GNUC_MINOR__) -# define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__) -# endif -# if defined(__GNUC_PATCHLEVEL__) -# define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) -# endif - -#elif defined(_MSC_VER) -# define COMPILER_ID "MSVC" - /* _MSC_VER = VVRR */ -# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100) -# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100) -# if defined(_MSC_FULL_VER) -# if _MSC_VER >= 1400 - /* _MSC_FULL_VER = VVRRPPPPP */ -# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000) -# else - /* _MSC_FULL_VER = VVRRPPPP */ -# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000) -# endif -# endif -# if defined(_MSC_BUILD) -# define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD) -# endif - -#elif defined(__VISUALDSPVERSION__) || defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__) -# define COMPILER_ID "ADSP" -#if defined(__VISUALDSPVERSION__) - /* __VISUALDSPVERSION__ = 0xVVRRPP00 */ -# define COMPILER_VERSION_MAJOR HEX(__VISUALDSPVERSION__>>24) -# define COMPILER_VERSION_MINOR HEX(__VISUALDSPVERSION__>>16 & 0xFF) -# define COMPILER_VERSION_PATCH HEX(__VISUALDSPVERSION__>>8 & 0xFF) -#endif - -#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) -# define COMPILER_ID "IAR" -# if defined(__VER__) && defined(__ICCARM__) -# define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000) -# define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000) -# define COMPILER_VERSION_PATCH DEC((__VER__) % 1000) -# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) -# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__)) -# define COMPILER_VERSION_MAJOR DEC((__VER__) / 100) -# define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100)) -# define COMPILER_VERSION_PATCH DEC(__SUBVERSION__) -# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) -# endif - -#elif defined(__SDCC_VERSION_MAJOR) || defined(SDCC) -# define COMPILER_ID "SDCC" -# if defined(__SDCC_VERSION_MAJOR) -# define COMPILER_VERSION_MAJOR DEC(__SDCC_VERSION_MAJOR) -# define COMPILER_VERSION_MINOR DEC(__SDCC_VERSION_MINOR) -# define COMPILER_VERSION_PATCH DEC(__SDCC_VERSION_PATCH) -# else - /* SDCC = VRP */ -# define COMPILER_VERSION_MAJOR DEC(SDCC/100) -# define COMPILER_VERSION_MINOR DEC(SDCC/10 % 10) -# define COMPILER_VERSION_PATCH DEC(SDCC % 10) -# endif - - -/* These compilers are either not known or too old to define an - identification macro. Try to identify the platform and guess that - it is the native compiler. */ -#elif defined(__hpux) || defined(__hpua) -# define COMPILER_ID "HP" - -#else /* unknown compiler */ -# define COMPILER_ID "" -#endif - -/* Construct the string literal in pieces to prevent the source from - getting matched. Store it in a pointer rather than an array - because some compilers will just produce instructions to fill the - array rather than assigning a pointer to a static array. */ -char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; -#ifdef SIMULATE_ID -char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]"; -#endif - -#ifdef __QNXNTO__ -char const* qnxnto = "INFO" ":" "qnxnto[]"; -#endif - -#if defined(__CRAYXE) || defined(__CRAYXC) -char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]"; -#endif - -#define STRINGIFY_HELPER(X) #X -#define STRINGIFY(X) STRINGIFY_HELPER(X) - -/* Identify known platforms by name. */ -#if defined(__linux) || defined(__linux__) || defined(linux) -# define PLATFORM_ID "Linux" - -#elif defined(__CYGWIN__) -# define PLATFORM_ID "Cygwin" - -#elif defined(__MINGW32__) -# define PLATFORM_ID "MinGW" - -#elif defined(__APPLE__) -# define PLATFORM_ID "Darwin" - -#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) -# define PLATFORM_ID "Windows" - -#elif defined(__FreeBSD__) || defined(__FreeBSD) -# define PLATFORM_ID "FreeBSD" - -#elif defined(__NetBSD__) || defined(__NetBSD) -# define PLATFORM_ID "NetBSD" - -#elif defined(__OpenBSD__) || defined(__OPENBSD) -# define PLATFORM_ID "OpenBSD" - -#elif defined(__sun) || defined(sun) -# define PLATFORM_ID "SunOS" - -#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) -# define PLATFORM_ID "AIX" - -#elif defined(__hpux) || defined(__hpux__) -# define PLATFORM_ID "HP-UX" - -#elif defined(__HAIKU__) -# define PLATFORM_ID "Haiku" - -#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) -# define PLATFORM_ID "BeOS" - -#elif defined(__QNX__) || defined(__QNXNTO__) -# define PLATFORM_ID "QNX" - -#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) -# define PLATFORM_ID "Tru64" - -#elif defined(__riscos) || defined(__riscos__) -# define PLATFORM_ID "RISCos" - -#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) -# define PLATFORM_ID "SINIX" - -#elif defined(__UNIX_SV__) -# define PLATFORM_ID "UNIX_SV" - -#elif defined(__bsdos__) -# define PLATFORM_ID "BSDOS" - -#elif defined(_MPRAS) || defined(MPRAS) -# define PLATFORM_ID "MP-RAS" - -#elif defined(__osf) || defined(__osf__) -# define PLATFORM_ID "OSF1" - -#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) -# define PLATFORM_ID "SCO_SV" - -#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) -# define PLATFORM_ID "ULTRIX" - -#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) -# define PLATFORM_ID "Xenix" - -#elif defined(__WATCOMC__) -# if defined(__LINUX__) -# define PLATFORM_ID "Linux" - -# elif defined(__DOS__) -# define PLATFORM_ID "DOS" - -# elif defined(__OS2__) -# define PLATFORM_ID "OS2" - -# elif defined(__WINDOWS__) -# define PLATFORM_ID "Windows3x" - -# else /* unknown platform */ -# define PLATFORM_ID -# endif - -#elif defined(__INTEGRITY) -# if defined(INT_178B) -# define PLATFORM_ID "Integrity178" - -# else /* regular Integrity */ -# define PLATFORM_ID "Integrity" -# endif - -#else /* unknown platform */ -# define PLATFORM_ID - -#endif - -/* For windows compilers MSVC and Intel we can determine - the architecture of the compiler being used. This is because - the compilers do not have flags that can change the architecture, - but rather depend on which compiler is being used -*/ -#if defined(_WIN32) && defined(_MSC_VER) -# if defined(_M_IA64) -# define ARCHITECTURE_ID "IA64" - -# elif defined(_M_X64) || defined(_M_AMD64) -# define ARCHITECTURE_ID "x64" - -# elif defined(_M_IX86) -# define ARCHITECTURE_ID "X86" - -# elif defined(_M_ARM64) -# define ARCHITECTURE_ID "ARM64" - -# elif defined(_M_ARM) -# if _M_ARM == 4 -# define ARCHITECTURE_ID "ARMV4I" -# elif _M_ARM == 5 -# define ARCHITECTURE_ID "ARMV5I" -# else -# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM) -# endif - -# elif defined(_M_MIPS) -# define ARCHITECTURE_ID "MIPS" - -# elif defined(_M_SH) -# define ARCHITECTURE_ID "SHx" - -# else /* unknown architecture */ -# define ARCHITECTURE_ID "" -# endif - -#elif defined(__WATCOMC__) -# if defined(_M_I86) -# define ARCHITECTURE_ID "I86" - -# elif defined(_M_IX86) -# define ARCHITECTURE_ID "X86" - -# else /* unknown architecture */ -# define ARCHITECTURE_ID "" -# endif - -#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) -# if defined(__ICCARM__) -# define ARCHITECTURE_ID "ARM" - -# elif defined(__ICCRX__) -# define ARCHITECTURE_ID "RX" - -# elif defined(__ICCRH850__) -# define ARCHITECTURE_ID "RH850" - -# elif defined(__ICCRL78__) -# define ARCHITECTURE_ID "RL78" - -# elif defined(__ICCRISCV__) -# define ARCHITECTURE_ID "RISCV" - -# elif defined(__ICCAVR__) -# define ARCHITECTURE_ID "AVR" - -# elif defined(__ICC430__) -# define ARCHITECTURE_ID "MSP430" - -# else /* unknown architecture */ -# define ARCHITECTURE_ID "" -# endif - -#elif defined(__ghs__) -# if defined(__PPC64__) -# define ARCHITECTURE_ID "PPC64" - -# elif defined(__ppc__) -# define ARCHITECTURE_ID "PPC" - -# elif defined(__ARM__) -# define ARCHITECTURE_ID "ARM" - -# elif defined(__x86_64__) -# define ARCHITECTURE_ID "x64" - -# elif defined(__i386__) -# define ARCHITECTURE_ID "X86" - -# else /* unknown architecture */ -# define ARCHITECTURE_ID "" -# endif -#else -# define ARCHITECTURE_ID -#endif - -/* Convert integer to decimal digit literals. */ -#define DEC(n) \ - ('0' + (((n) / 10000000)%10)), \ - ('0' + (((n) / 1000000)%10)), \ - ('0' + (((n) / 100000)%10)), \ - ('0' + (((n) / 10000)%10)), \ - ('0' + (((n) / 1000)%10)), \ - ('0' + (((n) / 100)%10)), \ - ('0' + (((n) / 10)%10)), \ - ('0' + ((n) % 10)) - -/* Convert integer to hex digit literals. */ -#define HEX(n) \ - ('0' + ((n)>>28 & 0xF)), \ - ('0' + ((n)>>24 & 0xF)), \ - ('0' + ((n)>>20 & 0xF)), \ - ('0' + ((n)>>16 & 0xF)), \ - ('0' + ((n)>>12 & 0xF)), \ - ('0' + ((n)>>8 & 0xF)), \ - ('0' + ((n)>>4 & 0xF)), \ - ('0' + ((n) & 0xF)) - -/* Construct a string literal encoding the version number components. */ -#ifdef COMPILER_VERSION_MAJOR -char const info_version[] = { - 'I', 'N', 'F', 'O', ':', - 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[', - COMPILER_VERSION_MAJOR, -# ifdef COMPILER_VERSION_MINOR - '.', COMPILER_VERSION_MINOR, -# ifdef COMPILER_VERSION_PATCH - '.', COMPILER_VERSION_PATCH, -# ifdef COMPILER_VERSION_TWEAK - '.', COMPILER_VERSION_TWEAK, -# endif -# endif -# endif - ']','\0'}; -#endif - -/* Construct a string literal encoding the internal version number. */ -#ifdef COMPILER_VERSION_INTERNAL -char const info_version_internal[] = { - 'I', 'N', 'F', 'O', ':', - 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_', - 'i','n','t','e','r','n','a','l','[', - COMPILER_VERSION_INTERNAL,']','\0'}; -#endif - -/* Construct a string literal encoding the version number components. */ -#ifdef SIMULATE_VERSION_MAJOR -char const info_simulate_version[] = { - 'I', 'N', 'F', 'O', ':', - 's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[', - SIMULATE_VERSION_MAJOR, -# ifdef SIMULATE_VERSION_MINOR - '.', SIMULATE_VERSION_MINOR, -# ifdef SIMULATE_VERSION_PATCH - '.', SIMULATE_VERSION_PATCH, -# ifdef SIMULATE_VERSION_TWEAK - '.', SIMULATE_VERSION_TWEAK, -# endif -# endif -# endif - ']','\0'}; -#endif - -/* Construct the string literal in pieces to prevent the source from - getting matched. Store it in a pointer rather than an array - because some compilers will just produce instructions to fill the - array rather than assigning a pointer to a static array. */ -char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; -char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]"; - - - - -#if !defined(__STDC__) -# if (defined(_MSC_VER) && !defined(__clang__)) \ - || (defined(__ibmxl__) || defined(__IBMC__)) -# define C_DIALECT "90" -# else -# define C_DIALECT -# endif -#elif __STDC_VERSION__ >= 201000L -# define C_DIALECT "11" -#elif __STDC_VERSION__ >= 199901L -# define C_DIALECT "99" -#else -# define C_DIALECT "90" -#endif -const char* info_language_dialect_default = - "INFO" ":" "dialect_default[" C_DIALECT "]"; - -/*--------------------------------------------------------------------------*/ - -#ifdef ID_VOID_MAIN -void main() {} -#else -# if defined(__CLASSIC_C__) -int main(argc, argv) int argc; char *argv[]; -# else -int main(int argc, char* argv[]) -# endif -{ - int require = 0; - require += info_compiler[argc]; - require += info_platform[argc]; - require += info_arch[argc]; -#ifdef COMPILER_VERSION_MAJOR - require += info_version[argc]; -#endif -#ifdef COMPILER_VERSION_INTERNAL - require += info_version_internal[argc]; -#endif -#ifdef SIMULATE_ID - require += info_simulate[argc]; -#endif -#ifdef SIMULATE_VERSION_MAJOR - require += info_simulate_version[argc]; -#endif -#if defined(__CRAYXE) || defined(__CRAYXC) - require += info_cray[argc]; -#endif - require += info_language_dialect_default[argc]; - (void)argv; - return require; -} -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdC/CMakeCCompilerId.exe b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdC/CMakeCCompilerId.exe deleted file mode 100644 index 4219e727..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdC/CMakeCCompilerId.exe and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdC/CMakeCCompilerId.obj b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdC/CMakeCCompilerId.obj deleted file mode 100644 index 0c8a8ffd..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdC/CMakeCCompilerId.obj and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdCXX/CMakeCXXCompilerId.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdCXX/CMakeCXXCompilerId.cpp deleted file mode 100644 index 4761ea2b..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdCXX/CMakeCXXCompilerId.cpp +++ /dev/null @@ -1,644 +0,0 @@ -/* This source file must have a .cpp extension so that all C++ compilers - recognize the extension without flags. Borland does not know .cxx for - example. */ -#ifndef __cplusplus -# error "A C compiler has been selected for C++." -#endif - - -/* Version number components: V=Version, R=Revision, P=Patch - Version date components: YYYY=Year, MM=Month, DD=Day */ - -#if defined(__COMO__) -# define COMPILER_ID "Comeau" - /* __COMO_VERSION__ = VRR */ -# define COMPILER_VERSION_MAJOR DEC(__COMO_VERSION__ / 100) -# define COMPILER_VERSION_MINOR DEC(__COMO_VERSION__ % 100) - -#elif defined(__INTEL_COMPILER) || defined(__ICC) -# define COMPILER_ID "Intel" -# if defined(_MSC_VER) -# define SIMULATE_ID "MSVC" -# endif -# if defined(__GNUC__) -# define SIMULATE_ID "GNU" -# endif - /* __INTEL_COMPILER = VRP */ -# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100) -# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10) -# if defined(__INTEL_COMPILER_UPDATE) -# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE) -# else -# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10) -# endif -# if defined(__INTEL_COMPILER_BUILD_DATE) - /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */ -# define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE) -# endif -# if defined(_MSC_VER) - /* _MSC_VER = VVRR */ -# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) -# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) -# endif -# if defined(__GNUC__) -# define SIMULATE_VERSION_MAJOR DEC(__GNUC__) -# elif defined(__GNUG__) -# define SIMULATE_VERSION_MAJOR DEC(__GNUG__) -# endif -# if defined(__GNUC_MINOR__) -# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__) -# endif -# if defined(__GNUC_PATCHLEVEL__) -# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) -# endif - -#elif defined(__PATHCC__) -# define COMPILER_ID "PathScale" -# define COMPILER_VERSION_MAJOR DEC(__PATHCC__) -# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__) -# if defined(__PATHCC_PATCHLEVEL__) -# define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__) -# endif - -#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__) -# define COMPILER_ID "Embarcadero" -# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF) -# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF) -# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF) - -#elif defined(__BORLANDC__) -# define COMPILER_ID "Borland" - /* __BORLANDC__ = 0xVRR */ -# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8) -# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF) - -#elif defined(__WATCOMC__) && __WATCOMC__ < 1200 -# define COMPILER_ID "Watcom" - /* __WATCOMC__ = VVRR */ -# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100) -# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) -# if (__WATCOMC__ % 10) > 0 -# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) -# endif - -#elif defined(__WATCOMC__) -# define COMPILER_ID "OpenWatcom" - /* __WATCOMC__ = VVRP + 1100 */ -# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100) -# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) -# if (__WATCOMC__ % 10) > 0 -# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) -# endif - -#elif defined(__SUNPRO_CC) -# define COMPILER_ID "SunPro" -# if __SUNPRO_CC >= 0x5100 - /* __SUNPRO_CC = 0xVRRP */ -# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>12) -# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xFF) -# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF) -# else - /* __SUNPRO_CC = 0xVRP */ -# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>8) -# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xF) -# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF) -# endif - -#elif defined(__HP_aCC) -# define COMPILER_ID "HP" - /* __HP_aCC = VVRRPP */ -# define COMPILER_VERSION_MAJOR DEC(__HP_aCC/10000) -# define COMPILER_VERSION_MINOR DEC(__HP_aCC/100 % 100) -# define COMPILER_VERSION_PATCH DEC(__HP_aCC % 100) - -#elif defined(__DECCXX) -# define COMPILER_ID "Compaq" - /* __DECCXX_VER = VVRRTPPPP */ -# define COMPILER_VERSION_MAJOR DEC(__DECCXX_VER/10000000) -# define COMPILER_VERSION_MINOR DEC(__DECCXX_VER/100000 % 100) -# define COMPILER_VERSION_PATCH DEC(__DECCXX_VER % 10000) - -#elif defined(__IBMCPP__) && defined(__COMPILER_VER__) -# define COMPILER_ID "zOS" - /* __IBMCPP__ = VRP */ -# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) -# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) -# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) - -#elif defined(__ibmxl__) && defined(__clang__) -# define COMPILER_ID "XLClang" -# define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__) -# define COMPILER_VERSION_MINOR DEC(__ibmxl_release__) -# define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__) -# define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__) - - -#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ >= 800 -# define COMPILER_ID "XL" - /* __IBMCPP__ = VRP */ -# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) -# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) -# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) - -#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ < 800 -# define COMPILER_ID "VisualAge" - /* __IBMCPP__ = VRP */ -# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) -# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) -# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) - -#elif defined(__PGI) -# define COMPILER_ID "PGI" -# define COMPILER_VERSION_MAJOR DEC(__PGIC__) -# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__) -# if defined(__PGIC_PATCHLEVEL__) -# define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__) -# endif - -#elif defined(_CRAYC) -# define COMPILER_ID "Cray" -# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR) -# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR) - -#elif defined(__TI_COMPILER_VERSION__) -# define COMPILER_ID "TI" - /* __TI_COMPILER_VERSION__ = VVVRRRPPP */ -# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000) -# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000) -# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000) - -#elif defined(__FUJITSU) || defined(__FCC_VERSION) || defined(__fcc_version) -# define COMPILER_ID "Fujitsu" - -#elif defined(__ghs__) -# define COMPILER_ID "GHS" -/* __GHS_VERSION_NUMBER = VVVVRP */ -# ifdef __GHS_VERSION_NUMBER -# define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100) -# define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10) -# define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER % 10) -# endif - -#elif defined(__SCO_VERSION__) -# define COMPILER_ID "SCO" - -#elif defined(__ARMCC_VERSION) && !defined(__clang__) -# define COMPILER_ID "ARMCC" -#if __ARMCC_VERSION >= 1000000 - /* __ARMCC_VERSION = VRRPPPP */ - # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000) - # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100) - # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) -#else - /* __ARMCC_VERSION = VRPPPP */ - # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000) - # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10) - # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) -#endif - - -#elif defined(__clang__) && defined(__apple_build_version__) -# define COMPILER_ID "AppleClang" -# if defined(_MSC_VER) -# define SIMULATE_ID "MSVC" -# endif -# define COMPILER_VERSION_MAJOR DEC(__clang_major__) -# define COMPILER_VERSION_MINOR DEC(__clang_minor__) -# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) -# if defined(_MSC_VER) - /* _MSC_VER = VVRR */ -# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) -# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) -# endif -# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__) - -#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION) -# define COMPILER_ID "ARMClang" - # define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000) - # define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100) - # define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION % 10000) -# define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION) - -#elif defined(__clang__) -# define COMPILER_ID "Clang" -# if defined(_MSC_VER) -# define SIMULATE_ID "MSVC" -# endif -# define COMPILER_VERSION_MAJOR DEC(__clang_major__) -# define COMPILER_VERSION_MINOR DEC(__clang_minor__) -# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) -# if defined(_MSC_VER) - /* _MSC_VER = VVRR */ -# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) -# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) -# endif - -#elif defined(__GNUC__) || defined(__GNUG__) -# define COMPILER_ID "GNU" -# if defined(__GNUC__) -# define COMPILER_VERSION_MAJOR DEC(__GNUC__) -# else -# define COMPILER_VERSION_MAJOR DEC(__GNUG__) -# endif -# if defined(__GNUC_MINOR__) -# define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__) -# endif -# if defined(__GNUC_PATCHLEVEL__) -# define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) -# endif - -#elif defined(_MSC_VER) -# define COMPILER_ID "MSVC" - /* _MSC_VER = VVRR */ -# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100) -# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100) -# if defined(_MSC_FULL_VER) -# if _MSC_VER >= 1400 - /* _MSC_FULL_VER = VVRRPPPPP */ -# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000) -# else - /* _MSC_FULL_VER = VVRRPPPP */ -# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000) -# endif -# endif -# if defined(_MSC_BUILD) -# define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD) -# endif - -#elif defined(__VISUALDSPVERSION__) || defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__) -# define COMPILER_ID "ADSP" -#if defined(__VISUALDSPVERSION__) - /* __VISUALDSPVERSION__ = 0xVVRRPP00 */ -# define COMPILER_VERSION_MAJOR HEX(__VISUALDSPVERSION__>>24) -# define COMPILER_VERSION_MINOR HEX(__VISUALDSPVERSION__>>16 & 0xFF) -# define COMPILER_VERSION_PATCH HEX(__VISUALDSPVERSION__>>8 & 0xFF) -#endif - -#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) -# define COMPILER_ID "IAR" -# if defined(__VER__) && defined(__ICCARM__) -# define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000) -# define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000) -# define COMPILER_VERSION_PATCH DEC((__VER__) % 1000) -# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) -# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__)) -# define COMPILER_VERSION_MAJOR DEC((__VER__) / 100) -# define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100)) -# define COMPILER_VERSION_PATCH DEC(__SUBVERSION__) -# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) -# endif - - -/* These compilers are either not known or too old to define an - identification macro. Try to identify the platform and guess that - it is the native compiler. */ -#elif defined(__hpux) || defined(__hpua) -# define COMPILER_ID "HP" - -#else /* unknown compiler */ -# define COMPILER_ID "" -#endif - -/* Construct the string literal in pieces to prevent the source from - getting matched. Store it in a pointer rather than an array - because some compilers will just produce instructions to fill the - array rather than assigning a pointer to a static array. */ -char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; -#ifdef SIMULATE_ID -char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]"; -#endif - -#ifdef __QNXNTO__ -char const* qnxnto = "INFO" ":" "qnxnto[]"; -#endif - -#if defined(__CRAYXE) || defined(__CRAYXC) -char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]"; -#endif - -#define STRINGIFY_HELPER(X) #X -#define STRINGIFY(X) STRINGIFY_HELPER(X) - -/* Identify known platforms by name. */ -#if defined(__linux) || defined(__linux__) || defined(linux) -# define PLATFORM_ID "Linux" - -#elif defined(__CYGWIN__) -# define PLATFORM_ID "Cygwin" - -#elif defined(__MINGW32__) -# define PLATFORM_ID "MinGW" - -#elif defined(__APPLE__) -# define PLATFORM_ID "Darwin" - -#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) -# define PLATFORM_ID "Windows" - -#elif defined(__FreeBSD__) || defined(__FreeBSD) -# define PLATFORM_ID "FreeBSD" - -#elif defined(__NetBSD__) || defined(__NetBSD) -# define PLATFORM_ID "NetBSD" - -#elif defined(__OpenBSD__) || defined(__OPENBSD) -# define PLATFORM_ID "OpenBSD" - -#elif defined(__sun) || defined(sun) -# define PLATFORM_ID "SunOS" - -#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) -# define PLATFORM_ID "AIX" - -#elif defined(__hpux) || defined(__hpux__) -# define PLATFORM_ID "HP-UX" - -#elif defined(__HAIKU__) -# define PLATFORM_ID "Haiku" - -#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) -# define PLATFORM_ID "BeOS" - -#elif defined(__QNX__) || defined(__QNXNTO__) -# define PLATFORM_ID "QNX" - -#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) -# define PLATFORM_ID "Tru64" - -#elif defined(__riscos) || defined(__riscos__) -# define PLATFORM_ID "RISCos" - -#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) -# define PLATFORM_ID "SINIX" - -#elif defined(__UNIX_SV__) -# define PLATFORM_ID "UNIX_SV" - -#elif defined(__bsdos__) -# define PLATFORM_ID "BSDOS" - -#elif defined(_MPRAS) || defined(MPRAS) -# define PLATFORM_ID "MP-RAS" - -#elif defined(__osf) || defined(__osf__) -# define PLATFORM_ID "OSF1" - -#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) -# define PLATFORM_ID "SCO_SV" - -#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) -# define PLATFORM_ID "ULTRIX" - -#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) -# define PLATFORM_ID "Xenix" - -#elif defined(__WATCOMC__) -# if defined(__LINUX__) -# define PLATFORM_ID "Linux" - -# elif defined(__DOS__) -# define PLATFORM_ID "DOS" - -# elif defined(__OS2__) -# define PLATFORM_ID "OS2" - -# elif defined(__WINDOWS__) -# define PLATFORM_ID "Windows3x" - -# else /* unknown platform */ -# define PLATFORM_ID -# endif - -#elif defined(__INTEGRITY) -# if defined(INT_178B) -# define PLATFORM_ID "Integrity178" - -# else /* regular Integrity */ -# define PLATFORM_ID "Integrity" -# endif - -#else /* unknown platform */ -# define PLATFORM_ID - -#endif - -/* For windows compilers MSVC and Intel we can determine - the architecture of the compiler being used. This is because - the compilers do not have flags that can change the architecture, - but rather depend on which compiler is being used -*/ -#if defined(_WIN32) && defined(_MSC_VER) -# if defined(_M_IA64) -# define ARCHITECTURE_ID "IA64" - -# elif defined(_M_X64) || defined(_M_AMD64) -# define ARCHITECTURE_ID "x64" - -# elif defined(_M_IX86) -# define ARCHITECTURE_ID "X86" - -# elif defined(_M_ARM64) -# define ARCHITECTURE_ID "ARM64" - -# elif defined(_M_ARM) -# if _M_ARM == 4 -# define ARCHITECTURE_ID "ARMV4I" -# elif _M_ARM == 5 -# define ARCHITECTURE_ID "ARMV5I" -# else -# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM) -# endif - -# elif defined(_M_MIPS) -# define ARCHITECTURE_ID "MIPS" - -# elif defined(_M_SH) -# define ARCHITECTURE_ID "SHx" - -# else /* unknown architecture */ -# define ARCHITECTURE_ID "" -# endif - -#elif defined(__WATCOMC__) -# if defined(_M_I86) -# define ARCHITECTURE_ID "I86" - -# elif defined(_M_IX86) -# define ARCHITECTURE_ID "X86" - -# else /* unknown architecture */ -# define ARCHITECTURE_ID "" -# endif - -#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) -# if defined(__ICCARM__) -# define ARCHITECTURE_ID "ARM" - -# elif defined(__ICCRX__) -# define ARCHITECTURE_ID "RX" - -# elif defined(__ICCRH850__) -# define ARCHITECTURE_ID "RH850" - -# elif defined(__ICCRL78__) -# define ARCHITECTURE_ID "RL78" - -# elif defined(__ICCRISCV__) -# define ARCHITECTURE_ID "RISCV" - -# elif defined(__ICCAVR__) -# define ARCHITECTURE_ID "AVR" - -# elif defined(__ICC430__) -# define ARCHITECTURE_ID "MSP430" - -# else /* unknown architecture */ -# define ARCHITECTURE_ID "" -# endif - -#elif defined(__ghs__) -# if defined(__PPC64__) -# define ARCHITECTURE_ID "PPC64" - -# elif defined(__ppc__) -# define ARCHITECTURE_ID "PPC" - -# elif defined(__ARM__) -# define ARCHITECTURE_ID "ARM" - -# elif defined(__x86_64__) -# define ARCHITECTURE_ID "x64" - -# elif defined(__i386__) -# define ARCHITECTURE_ID "X86" - -# else /* unknown architecture */ -# define ARCHITECTURE_ID "" -# endif -#else -# define ARCHITECTURE_ID -#endif - -/* Convert integer to decimal digit literals. */ -#define DEC(n) \ - ('0' + (((n) / 10000000)%10)), \ - ('0' + (((n) / 1000000)%10)), \ - ('0' + (((n) / 100000)%10)), \ - ('0' + (((n) / 10000)%10)), \ - ('0' + (((n) / 1000)%10)), \ - ('0' + (((n) / 100)%10)), \ - ('0' + (((n) / 10)%10)), \ - ('0' + ((n) % 10)) - -/* Convert integer to hex digit literals. */ -#define HEX(n) \ - ('0' + ((n)>>28 & 0xF)), \ - ('0' + ((n)>>24 & 0xF)), \ - ('0' + ((n)>>20 & 0xF)), \ - ('0' + ((n)>>16 & 0xF)), \ - ('0' + ((n)>>12 & 0xF)), \ - ('0' + ((n)>>8 & 0xF)), \ - ('0' + ((n)>>4 & 0xF)), \ - ('0' + ((n) & 0xF)) - -/* Construct a string literal encoding the version number components. */ -#ifdef COMPILER_VERSION_MAJOR -char const info_version[] = { - 'I', 'N', 'F', 'O', ':', - 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[', - COMPILER_VERSION_MAJOR, -# ifdef COMPILER_VERSION_MINOR - '.', COMPILER_VERSION_MINOR, -# ifdef COMPILER_VERSION_PATCH - '.', COMPILER_VERSION_PATCH, -# ifdef COMPILER_VERSION_TWEAK - '.', COMPILER_VERSION_TWEAK, -# endif -# endif -# endif - ']','\0'}; -#endif - -/* Construct a string literal encoding the internal version number. */ -#ifdef COMPILER_VERSION_INTERNAL -char const info_version_internal[] = { - 'I', 'N', 'F', 'O', ':', - 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_', - 'i','n','t','e','r','n','a','l','[', - COMPILER_VERSION_INTERNAL,']','\0'}; -#endif - -/* Construct a string literal encoding the version number components. */ -#ifdef SIMULATE_VERSION_MAJOR -char const info_simulate_version[] = { - 'I', 'N', 'F', 'O', ':', - 's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[', - SIMULATE_VERSION_MAJOR, -# ifdef SIMULATE_VERSION_MINOR - '.', SIMULATE_VERSION_MINOR, -# ifdef SIMULATE_VERSION_PATCH - '.', SIMULATE_VERSION_PATCH, -# ifdef SIMULATE_VERSION_TWEAK - '.', SIMULATE_VERSION_TWEAK, -# endif -# endif -# endif - ']','\0'}; -#endif - -/* Construct the string literal in pieces to prevent the source from - getting matched. Store it in a pointer rather than an array - because some compilers will just produce instructions to fill the - array rather than assigning a pointer to a static array. */ -char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; -char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]"; - - - - -#if defined(_MSC_VER) && defined(_MSVC_LANG) -#define CXX_STD _MSVC_LANG -#else -#define CXX_STD __cplusplus -#endif - -const char* info_language_dialect_default = "INFO" ":" "dialect_default[" -#if CXX_STD > 201703L - "20" -#elif CXX_STD >= 201703L - "17" -#elif CXX_STD >= 201402L - "14" -#elif CXX_STD >= 201103L - "11" -#else - "98" -#endif -"]"; - -/*--------------------------------------------------------------------------*/ - -int main(int argc, char* argv[]) -{ - int require = 0; - require += info_compiler[argc]; - require += info_platform[argc]; -#ifdef COMPILER_VERSION_MAJOR - require += info_version[argc]; -#endif -#ifdef COMPILER_VERSION_INTERNAL - require += info_version_internal[argc]; -#endif -#ifdef SIMULATE_ID - require += info_simulate[argc]; -#endif -#ifdef SIMULATE_VERSION_MAJOR - require += info_simulate_version[argc]; -#endif -#if defined(__CRAYXE) || defined(__CRAYXC) - require += info_cray[argc]; -#endif - require += info_language_dialect_default[argc]; - (void)argv; - return require; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdCXX/CMakeCXXCompilerId.exe b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdCXX/CMakeCXXCompilerId.exe deleted file mode 100644 index 56d6c480..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdCXX/CMakeCXXCompilerId.exe and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdCXX/CMakeCXXCompilerId.obj b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdCXX/CMakeCXXCompilerId.obj deleted file mode 100644 index 839d0c36..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdCXX/CMakeCXXCompilerId.obj and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CMakeOutput.log b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CMakeOutput.log deleted file mode 100644 index 9681d0cc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CMakeOutput.log +++ /dev/null @@ -1,113 +0,0 @@ -The system is: Windows - 10.0.18362 - AMD64 -Compiling the C compiler identification source file "CMakeCCompilerId.c" succeeded. -Compiler: C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/HostX64/x64/cl.exe -Build flags: -Id flags: - -The output was: -0 -用于 x64 的 Microsoft (R) C/C++ 优化编译器 19.24.28316 版 -版权所有(C) Microsoft Corporation。保留所有权利。 - -CMakeCCompilerId.c -Microsoft (R) Incremental Linker Version 14.24.28316.0 -Copyright (C) Microsoft Corporation. All rights reserved. - -/out:CMakeCCompilerId.exe -CMakeCCompilerId.obj - - -Compilation of the C compiler identification source "CMakeCCompilerId.c" produced "CMakeCCompilerId.exe" - -Compilation of the C compiler identification source "CMakeCCompilerId.c" produced "CMakeCCompilerId.obj" - -The C compiler identification is MSVC, found in "E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdC/CMakeCCompilerId.exe" - -Compiling the CXX compiler identification source file "CMakeCXXCompilerId.cpp" succeeded. -Compiler: C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/HostX64/x64/cl.exe -Build flags: -Id flags: - -The output was: -0 -用于 x64 的 Microsoft (R) C/C++ 优化编译器 19.24.28316 版 -版权所有(C) Microsoft Corporation。保留所有权利。 - -CMakeCXXCompilerId.cpp -Microsoft (R) Incremental Linker Version 14.24.28316.0 -Copyright (C) Microsoft Corporation. All rights reserved. - -/out:CMakeCXXCompilerId.exe -CMakeCXXCompilerId.obj - - -Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "CMakeCXXCompilerId.exe" - -Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "CMakeCXXCompilerId.obj" - -The CXX compiler identification is MSVC, found in "E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/3.15.19101501-MSVC_2/CompilerIdCXX/CMakeCXXCompilerId.exe" - -Determining if the C compiler works passed with the following output: -Change Dir: E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CMakeTmp - -Run Build Command(s):C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/Common7/IDE/CommonExtensions/Microsoft/CMake/Ninja/ninja.exe cmTC_ec8c4 && [1/2] Building C object CMakeFiles\cmTC_ec8c4.dir\testCCompiler.c.obj -[2/2] Linking C executable cmTC_ec8c4.exe - - - -Detecting C compiler ABI info compiled with the following output: -Change Dir: E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CMakeTmp - -Run Build Command(s):C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/Common7/IDE/CommonExtensions/Microsoft/CMake/Ninja/ninja.exe cmTC_6f63c && [1/2] Building C object CMakeFiles\cmTC_6f63c.dir\CMakeCCompilerABI.c.obj -[2/2] Linking C executable cmTC_6f63c.exe - - - -Determining if the CXX compiler works passed with the following output: -Change Dir: E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CMakeTmp - -Run Build Command(s):C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/Common7/IDE/CommonExtensions/Microsoft/CMake/Ninja/ninja.exe cmTC_cc7ad && [1/2] Building CXX object CMakeFiles\cmTC_cc7ad.dir\testCXXCompiler.cxx.obj -[2/2] Linking CXX executable cmTC_cc7ad.exe - - - -Detecting CXX compiler ABI info compiled with the following output: -Change Dir: E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CMakeTmp - -Run Build Command(s):C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/Common7/IDE/CommonExtensions/Microsoft/CMake/Ninja/ninja.exe cmTC_2d542 && [1/2] Building CXX object CMakeFiles\cmTC_2d542.dir\CMakeCXXCompilerABI.cpp.obj -[2/2] Linking CXX executable cmTC_2d542.exe - - - -Determining if the include file sys/types.h exists passed with the following output: -Change Dir: E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CMakeTmp - -Run Build Command(s):C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/Common7/IDE/CommonExtensions/Microsoft/CMake/Ninja/ninja.exe cmTC_8669f && [1/2] Building C object CMakeFiles\cmTC_8669f.dir\CheckIncludeFile.c.obj -[2/2] Linking C executable cmTC_8669f.exe - - - -Determining if the include file stdint.h exists passed with the following output: -Change Dir: E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CMakeTmp - -Run Build Command(s):C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/Common7/IDE/CommonExtensions/Microsoft/CMake/Ninja/ninja.exe cmTC_1c4f1 && [1/2] Building C object CMakeFiles\cmTC_1c4f1.dir\CheckIncludeFile.c.obj -[2/2] Linking C executable cmTC_1c4f1.exe - - - -Determining if the include file stddef.h exists passed with the following output: -Change Dir: E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CMakeTmp - -Run Build Command(s):C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/Common7/IDE/CommonExtensions/Microsoft/CMake/Ninja/ninja.exe cmTC_4d735 && [1/2] Building C object CMakeFiles\cmTC_4d735.dir\CheckIncludeFile.c.obj -[2/2] Linking C executable cmTC_4d735.exe - - - -Determining size of void * passed with the following output: -Change Dir: E:/Work/My Git Code/nick908/hg_gpdf/3rdparty/tesseract/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CMakeTmp - -Run Build Command(s):C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/Common7/IDE/CommonExtensions/Microsoft/CMake/Ninja/ninja.exe cmTC_8c91f && [1/2] Building C object CMakeFiles\cmTC_8c91f.dir\SIZEOF_VOID_P.c.obj -[2/2] Linking C executable cmTC_8c91f.exe - - - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CheckTypeSize/SIZEOF_VOID_P.bin b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CheckTypeSize/SIZEOF_VOID_P.bin deleted file mode 100644 index 804dba51..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CheckTypeSize/SIZEOF_VOID_P.bin and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CheckTypeSize/SIZEOF_VOID_P.c b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CheckTypeSize/SIZEOF_VOID_P.c deleted file mode 100644 index 8ab55fe8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/CheckTypeSize/SIZEOF_VOID_P.c +++ /dev/null @@ -1,46 +0,0 @@ -#include -#include -#include - - -#undef KEY -#if defined(__i386) -# define KEY '_','_','i','3','8','6' -#elif defined(__x86_64) -# define KEY '_','_','x','8','6','_','6','4' -#elif defined(__ppc__) -# define KEY '_','_','p','p','c','_','_' -#elif defined(__ppc64__) -# define KEY '_','_','p','p','c','6','4','_','_' -#elif defined(__aarch64__) -# define KEY '_','_','a','a','r','c','h','6','4','_','_' -#elif defined(__ARM_ARCH_7A__) -# define KEY '_','_','A','R','M','_','A','R','C','H','_','7','A','_','_' -#elif defined(__ARM_ARCH_7S__) -# define KEY '_','_','A','R','M','_','A','R','C','H','_','7','S','_','_' -#endif - -#define SIZE (sizeof(void *)) -static char info_size[] = {'I', 'N', 'F', 'O', ':', 's','i','z','e','[', - ('0' + ((SIZE / 10000)%10)), - ('0' + ((SIZE / 1000)%10)), - ('0' + ((SIZE / 100)%10)), - ('0' + ((SIZE / 10)%10)), - ('0' + (SIZE % 10)), - ']', -#ifdef KEY - ' ','k','e','y','[', KEY, ']', -#endif - '\0'}; - -#ifdef __CLASSIC_C__ -int main(argc, argv) int argc; char *argv[]; -#else -int main(int argc, char *argv[]) -#endif -{ - int require = 0; - require += info_size[argc]; - (void)argv; - return require; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/ShowIncludes/foo.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/ShowIncludes/foo.h deleted file mode 100644 index 8b137891..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/ShowIncludes/foo.h +++ /dev/null @@ -1 +0,0 @@ - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/ShowIncludes/main.c b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/ShowIncludes/main.c deleted file mode 100644 index cd3cbc1f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/ShowIncludes/main.c +++ /dev/null @@ -1,2 +0,0 @@ -#include "foo.h" -int main(){} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/ShowIncludes/main.obj b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/ShowIncludes/main.obj deleted file mode 100644 index edabce38..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/ShowIncludes/main.obj and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/cmake.check_cache b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/cmake.check_cache deleted file mode 100644 index 3dccd731..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/CMakeFiles/cmake.check_cache +++ /dev/null @@ -1 +0,0 @@ -# This file is generated by cmake for dependency checking of the CMakeCache.txt file diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/VSInheritEnvironments.txt b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/VSInheritEnvironments.txt deleted file mode 100644 index f8cc9d8a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/VSInheritEnvironments.txt +++ /dev/null @@ -1 +0,0 @@ -msvc_x64_x64 \ No newline at end of file diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/derb.exe b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/derb.exe deleted file mode 100644 index 8476b30d..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/derb.exe and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/genbrk.exe b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/genbrk.exe deleted file mode 100644 index 018d9e27..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/genbrk.exe and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/genccode.exe b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/genccode.exe deleted file mode 100644 index 8eea8a7e..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/genccode.exe and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gencfu.exe b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gencfu.exe deleted file mode 100644 index d0befcaf..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gencfu.exe and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gencmn.exe b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gencmn.exe deleted file mode 100644 index 1cf9dfd0..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gencmn.exe and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gencnval.exe b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gencnval.exe deleted file mode 100644 index 37f9cc6e..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gencnval.exe and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gendict.exe b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gendict.exe deleted file mode 100644 index 344efde1..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gendict.exe and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gennorm2.exe b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gennorm2.exe deleted file mode 100644 index 1a1f7242..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gennorm2.exe and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/genrb.exe b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/genrb.exe deleted file mode 100644 index da4f18de..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/genrb.exe and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gensprep.exe b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gensprep.exe deleted file mode 100644 index e045d4ac..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/gensprep.exe and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/icudt56.dll b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/icudt56.dll deleted file mode 100644 index 993515b6..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/bin64/icudt56.dll and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/license.html b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/license.html deleted file mode 100644 index 87823226..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu/license.html +++ /dev/null @@ -1,384 +0,0 @@ - - - - -ICU License - ICU 1.8.1 and later - - - -

ICU License - ICU 1.8.1 and later

- -

COPYRIGHT AND PERMISSION NOTICE

- -

-Copyright (c) 1995-2015 International Business Machines Corporation and others -

-

-All rights reserved. -

-

-Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, and/or sell -copies of the Software, and to permit persons -to whom the Software is furnished to do so, provided that the above -copyright notice(s) and this permission notice appear in all copies -of the Software and that both the above copyright notice(s) and this -permission notice appear in supporting documentation. -

-

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL -THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, -OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER -RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, -NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE -USE OR PERFORMANCE OF THIS SOFTWARE. -

-

-Except as contained in this notice, the name of a copyright holder shall not be -used in advertising or otherwise to promote the sale, use or other dealings in -this Software without prior written authorization of the copyright holder. -

- -
-

-All trademarks and registered trademarks mentioned herein are the property of their respective owners. -

- -
- -

Third-Party Software Licenses

-This section contains third-party software notices and/or additional terms for licensed -third-party software components included within ICU libraries. - -

1. Unicode Data Files and Software

- -
COPYRIGHT AND PERMISSION NOTICE
-
-Copyright © 1991-2015 Unicode, Inc. All rights reserved.
-Distributed under the Terms of Use in 
-http://www.unicode.org/copyright.html.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of the Unicode data files and any associated documentation
-(the "Data Files") or Unicode software and any associated documentation
-(the "Software") to deal in the Data Files or Software
-without restriction, including without limitation the rights to use,
-copy, modify, merge, publish, distribute, and/or sell copies of
-the Data Files or Software, and to permit persons to whom the Data Files
-or Software are furnished to do so, provided that
-(a) this copyright and permission notice appear with all copies 
-of the Data Files or Software,
-(b) this copyright and permission notice appear in associated 
-documentation, and
-(c) there is clear notice in each modified Data File or in the Software
-as well as in the documentation associated with the Data File(s) or
-Software that the data or software has been modified.
-
-THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
-ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
-WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT OF THIRD PARTY RIGHTS.
-IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
-NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
-DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
-DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
-TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
-PERFORMANCE OF THE DATA FILES OR SOFTWARE.
-
-Except as contained in this notice, the name of a copyright holder
-shall not be used in advertising or otherwise to promote the sale,
-use or other dealings in these Data Files or Software without prior
-written authorization of the copyright holder.
- -

2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)

-
- #    The Google Chrome software developed by Google is licensed under the BSD license. Other software included in this distribution is provided under other licenses, as set forth below.
- #	
- #	The BSD License
- #	http://opensource.org/licenses/bsd-license.php 
- #	Copyright (C) 2006-2008, Google Inc.
- #	
- #	All rights reserved.
- #	
- #	Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
- #	
- #	Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
- #	Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
- #	Neither the name of  Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
- #	 
- #	
- #	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- #	
- #	                                             
- #	The word list in cjdict.txt are generated by combining three word lists listed
- #	below with further processing for compound word breaking. The frequency is generated
- #	with an iterative training against Google web corpora. 
- #	
- #	* Libtabe (Chinese)
- #	  - https://sourceforge.net/project/?group_id=1519
- #	  - Its license terms and conditions are shown below.
- #	
- #	* IPADIC (Japanese)
- #	  - http://chasen.aist-nara.ac.jp/chasen/distribution.html
- #	  - Its license terms and conditions are shown below.
- #	
- #	---------COPYING.libtabe ---- BEGIN--------------------
- #	
- #	/*
- #	 * Copyrighy (c) 1999 TaBE Project.
- #	 * Copyright (c) 1999 Pai-Hsiang Hsiao.
- #	 * All rights reserved.
- #	 *
- #	 * Redistribution and use in source and binary forms, with or without
- #	 * modification, are permitted provided that the following conditions
- #	 * are met:
- #	 *
- #	 * . Redistributions of source code must retain the above copyright
- #	 *   notice, this list of conditions and the following disclaimer.
- #	 * . Redistributions in binary form must reproduce the above copyright
- #	 *   notice, this list of conditions and the following disclaimer in
- #	 *   the documentation and/or other materials provided with the
- #	 *   distribution.
- #	 * . Neither the name of the TaBE Project nor the names of its
- #	 *   contributors may be used to endorse or promote products derived
- #	 *   from this software without specific prior written permission.
- #	 *
- #	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- #	 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- #	 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- #	 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- #	 * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- #	 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- #	 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- #	 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- #	 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- #	 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- #	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- #	 * OF THE POSSIBILITY OF SUCH DAMAGE.
- #	 */
- #	
- #	/*
- #	 * Copyright (c) 1999 Computer Systems and Communication Lab,
- #	 *                    Institute of Information Science, Academia Sinica.
- #	 * All rights reserved.
- #	 *
- #	 * Redistribution and use in source and binary forms, with or without
- #	 * modification, are permitted provided that the following conditions
- #	 * are met:
- #	 *
- #	 * . Redistributions of source code must retain the above copyright
- #	 *   notice, this list of conditions and the following disclaimer.
- #	 * . Redistributions in binary form must reproduce the above copyright
- #	 *   notice, this list of conditions and the following disclaimer in
- #	 *   the documentation and/or other materials provided with the
- #	 *   distribution.
- #	 * . Neither the name of the Computer Systems and Communication Lab
- #	 *   nor the names of its contributors may be used to endorse or
- #	 *   promote products derived from this software without specific
- #	 *   prior written permission.
- #	 *
- #	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- #	 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- #	 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- #	 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- #	 * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- #	 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- #	 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- #	 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- #	 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- #	 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- #	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- #	 * OF THE POSSIBILITY OF SUCH DAMAGE.
- #	 */
- #	
- #	Copyright 1996 Chih-Hao Tsai @ Beckman Institute, University of Illinois
- #	c-tsai4@uiuc.edu  http://casper.beckman.uiuc.edu/~c-tsai4
- #	
- #	---------------COPYING.libtabe-----END------------------------------------
- #	
- #	
- #	---------------COPYING.ipadic-----BEGIN------------------------------------
- #	
- #	Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
- #	and Technology.  All Rights Reserved.
- #	
- #	Use, reproduction, and distribution of this software is permitted.
- #	Any copy of this software, whether in its original form or modified,
- #	must include both the above copyright notice and the following
- #	paragraphs.
- #	
- #	Nara Institute of Science and Technology (NAIST),
- #	the copyright holders, disclaims all warranties with regard to this
- #	software, including all implied warranties of merchantability and
- #	fitness, in no event shall NAIST be liable for
- #	any special, indirect or consequential damages or any damages
- #	whatsoever resulting from loss of use, data or profits, whether in an
- #	action of contract, negligence or other tortuous action, arising out
- #	of or in connection with the use or performance of this software.
- #	
- #	A large portion of the dictionary entries
- #	originate from ICOT Free Software.  The following conditions for ICOT
- #	Free Software applies to the current dictionary as well.
- #	
- #	Each User may also freely distribute the Program, whether in its
- #	original form or modified, to any third party or parties, PROVIDED
- #	that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
- #	on, or be attached to, the Program, which is distributed substantially
- #	in the same form as set out herein and that such intended
- #	distribution, if actually made, will neither violate or otherwise
- #	contravene any of the laws and regulations of the countries having
- #	jurisdiction over the User or the intended distribution itself.
- #	
- #	NO WARRANTY
- #	
- #	The program was produced on an experimental basis in the course of the
- #	research and development conducted during the project and is provided
- #	to users as so produced on an experimental basis.  Accordingly, the
- #	program is provided without any warranty whatsoever, whether express,
- #	implied, statutory or otherwise.  The term "warranty" used herein
- #	includes, but is not limited to, any warranty of the quality,
- #	performance, merchantability and fitness for a particular purpose of
- #	the program and the nonexistence of any infringement or violation of
- #	any right of any third party.
- #	
- #	Each user of the program will agree and understand, and be deemed to
- #	have agreed and understood, that there is no warranty whatsoever for
- #	the program and, accordingly, the entire risk arising from or
- #	otherwise connected with the program is assumed by the user.
- #	
- #	Therefore, neither ICOT, the copyright holder, or any other
- #	organization that participated in or was otherwise related to the
- #	development of the program and their respective officials, directors,
- #	officers and other employees shall be held liable for any and all
- #	damages, including, without limitation, general, special, incidental
- #	and consequential damages, arising out of or otherwise in connection
- #	with the use or inability to use the program or any product, material
- #	or result produced or otherwise obtained by using the program,
- #	regardless of whether they have been advised of, or otherwise had
- #	knowledge of, the possibility of such damages at any time during the
- #	project or thereafter.  Each user will be deemed to have agreed to the
- #	foregoing by his or her commencement of use of the program.  The term
- #	"use" as used herein includes, but is not limited to, the use,
- #	modification, copying and distribution of the program and the
- #	production of secondary products from the program.
- #	
- #	In the case where the program, whether in its original form or
- #	modified, was distributed or delivered to or received by a user from
- #	any person, organization or entity other than ICOT, unless it makes or
- #	grants independently of ICOT any specific warranty to the user in
- #	writing, such person, organization or entity, will also be exempted
- #	from and not be held liable to the user for any such damages as noted
- #	above as far as the program is concerned.
- #	
- #	---------------COPYING.ipadic-----END------------------------------------
-
- -

3. Lao Word Break Dictionary Data (laodict.txt)

-
- #	Copyright (c) 2013 International Business Machines Corporation
- #	and others. All Rights Reserved.
- #
- #	Project:    http://code.google.com/p/lao-dictionary/
- #	Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt
- #	License:    http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt
- #	            (copied below)
- #
- #	This file is derived from the above dictionary, with slight modifications.
- #	--------------------------------------------------------------------------------
- #	Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
- #	All rights reserved.
- #
- #	Redistribution and use in source and binary forms, with or without modification,
- #	are permitted provided that the following conditions are met:
- #
- #		Redistributions of source code must retain the above copyright notice, this
- #		list of conditions and the following disclaimer. Redistributions in binary
- #		form must reproduce the above copyright notice, this list of conditions and
- #		the following disclaimer in the documentation and/or other materials
- #		provided with the distribution.
- #
- #	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- #	ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- #	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- #	DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
- #	ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- #	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- #	LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- #	ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- #	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- #	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- #	--------------------------------------------------------------------------------
-
- -

4. Burmese Word Break Dictionary Data (burmesedict.txt)

-
- #	Copyright (c) 2014 International Business Machines Corporation
- #	and others. All Rights Reserved.
- #
- #	This list is part of a project hosted at:
- #	  github.com/kanyawtech/myanmar-karen-word-lists
- #
- #	--------------------------------------------------------------------------------
- #	Copyright (c) 2013, LeRoy Benjamin Sharon
- #	All rights reserved.
- #
- #	Redistribution and use in source and binary forms, with or without modification,
- #	are permitted provided that the following conditions are met:
- #
- #	  Redistributions of source code must retain the above copyright notice, this
- #	  list of conditions and the following disclaimer.
- #
- #	  Redistributions in binary form must reproduce the above copyright notice, this
- #	  list of conditions and the following disclaimer in the documentation and/or
- #	  other materials provided with the distribution.
- #
- #	  Neither the name Myanmar Karen Word Lists, nor the names of its
- #	  contributors may be used to endorse or promote products derived from
- #	  this software without specific prior written permission.
- #
- #	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- #	ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- #	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- #	DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
- #	ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- #	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- #	LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- #	ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- #	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- #	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- #	--------------------------------------------------------------------------------
-
- -

5. Time Zone Database

-

ICU uses the public domain data and code derived from -Time Zone Database for its time zone support. The ownership of the TZ database is explained -in BCP 175: Procedure for Maintaining the Time Zone -Database section 7.

- -

-7.  Database Ownership
-
-   The TZ database itself is not an IETF Contribution or an IETF
-   document.  Rather it is a pre-existing and regularly updated work
-   that is in the public domain, and is intended to remain in the public
-   domain.  Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do not apply
-   to the TZ Database or contributions that individuals make to it.
-   Should any claims be made and substantiated against the TZ Database,
-   the organization that is providing the IANA Considerations defined in
-   this RFC, under the memorandum of understanding with the IETF,
-   currently ICANN, may act in accordance with all competent court
-   orders.  No ownership claims will be made by ICANN or the IETF Trust
-   on the database or the code.  Any person making a contribution to the
-   database or code waives all rights to future claims in that
-   contribution or in the TZ Database.
-
-
- - - - diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu64.zip b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu64.zip deleted file mode 100644 index 55e9451f..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/out/build/x64-Debug/icu/icu64.zip and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/pango_font_info.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/pango_font_info.cpp deleted file mode 100644 index ddc0c6ee..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/pango_font_info.cpp +++ /dev/null @@ -1,799 +0,0 @@ -/********************************************************************** - * File: pango_font_info.cpp - * Description: Font-related objects and helper functions - * Author: Ranjith Unnikrishnan - * Created: Mon Nov 18 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#if (defined __MINGW32__) || (defined __CYGWIN__) -// workaround for stdlib.h and putenv -#undef __STRICT_ANSI__ -#endif - -#include -#include -#include -#ifndef _MSC_VER -#include -#endif -#include - -#include "pango_font_info.h" -#include "commandlineflags.h" -#include "fileio.h" -#include "normstrngs.h" -#include "tlog.h" -#include "unichar.h" -#include "util.h" -#include "pango/pango.h" -#include "pango/pangocairo.h" -#include "pango/pangofc-font.h" - -STRING_PARAM_FLAG(fontconfig_tmpdir, "/tmp", - "Overrides fontconfig default temporary dir"); - -#ifdef GOOGLE_TESSERACT -#include "ocr/trainingdata/typesetting/legacy_fonts.h" -BOOL_PARAM_FLAG(use_only_legacy_fonts, false, - "Overrides --fonts_dir and sets the known universe of fonts to" - "the list in legacy_fonts.h"); - -STRING_PARAM_FLAG(fonts_dir, "/auto/ocr-data/tesstraining/fonts", - "Overrides system default font location"); -#else -using std::pair; -STRING_PARAM_FLAG(fonts_dir, "", - "If empty it use system default. Otherwise it overrides" - " system default font location"); -#endif - -namespace tesseract { - -// Default assumed output resolution. Required only for providing font metrics -// in pixels. -const int kDefaultResolution = 300; - -std::string PangoFontInfo::fonts_dir_; -std::string PangoFontInfo::cache_dir_; - -PangoFontInfo::PangoFontInfo() - : desc_(nullptr), resolution_(kDefaultResolution) { - Clear(); -} - -PangoFontInfo::PangoFontInfo(const std::string& desc) - : desc_(nullptr), resolution_(kDefaultResolution) { - if (!ParseFontDescriptionName(desc)) { - tprintf("ERROR: Could not parse %s\n", desc.c_str()); - Clear(); - } -} - -void PangoFontInfo::Clear() { - font_size_ = 0; - family_name_.clear(); - font_type_ = UNKNOWN; - if (desc_) { - pango_font_description_free(desc_); - desc_ = nullptr; - } -} - -PangoFontInfo::~PangoFontInfo() { pango_font_description_free(desc_); } - -std::string PangoFontInfo::DescriptionName() const { - if (!desc_) return ""; - char* desc_str = pango_font_description_to_string(desc_); - std::string desc_name(desc_str); - g_free(desc_str); - return desc_name; -} - -// If not already initialized, initializes FontConfig by setting its -// environment variable and creating a fonts.conf file that points to the -// FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir. -/* static */ -void PangoFontInfo::SoftInitFontConfig() { - if (fonts_dir_.empty()) { - HardInitFontConfig(FLAGS_fonts_dir.c_str(), - FLAGS_fontconfig_tmpdir.c_str()); - } -} - -// Re-initializes font config, whether or not already initialized. -// If already initialized, any existing cache is deleted, just to be sure. -/* static */ -void PangoFontInfo::HardInitFontConfig(const std::string& fonts_dir, - const std::string& cache_dir) { - if (!cache_dir_.empty()) { - File::DeleteMatchingFiles( - File::JoinPath(cache_dir_.c_str(), "*cache-?").c_str()); - } - const int MAX_FONTCONF_FILESIZE = 1024; - char fonts_conf_template[MAX_FONTCONF_FILESIZE]; - cache_dir_ = cache_dir; - fonts_dir_ = fonts_dir; - snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE, - "\n" - "\n" - "\n" - "%s\n" - "%s\n" - "\n" - "", - fonts_dir.c_str(), cache_dir_.c_str()); - std::string fonts_conf_file = File::JoinPath(cache_dir_.c_str(), "fonts.conf"); - File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file); -#ifdef _WIN32 - std::string env("FONTCONFIG_PATH="); - env.append(cache_dir_.c_str()); - _putenv(env.c_str()); - _putenv("LANG=en_US.utf8"); -#else - setenv("FONTCONFIG_PATH", cache_dir_.c_str(), true); - // Fix the locale so that the reported font names are consistent. - setenv("LANG", "en_US.utf8", true); -#endif // _WIN32 - - if (FcInitReinitialize() != FcTrue) { - tprintf("FcInitiReinitialize failed!!\n"); - } - FontUtils::ReInit(); - // Clear Pango's font cache too. - pango_cairo_font_map_set_default(nullptr); -} - -static void ListFontFamilies(PangoFontFamily*** families, - int* n_families) { - PangoFontInfo::SoftInitFontConfig(); - PangoFontMap* font_map = pango_cairo_font_map_get_default(); - DISABLE_HEAP_LEAK_CHECK; - pango_font_map_list_families(font_map, families, n_families); -} - -bool PangoFontInfo::ParseFontDescription(const PangoFontDescription *desc) { - Clear(); - const char* family = pango_font_description_get_family(desc); - if (!family) { - char* desc_str = pango_font_description_to_string(desc); - tprintf("WARNING: Could not parse family name from description: '%s'\n", - desc_str); - g_free(desc_str); - return false; - } - family_name_ = std::string(family); - desc_ = pango_font_description_copy(desc); - - // Set font size in points - font_size_ = pango_font_description_get_size(desc); - if (!pango_font_description_get_size_is_absolute(desc)) { - font_size_ /= PANGO_SCALE; - } - - return true; -} - -bool PangoFontInfo::ParseFontDescriptionName(const std::string& name) { - PangoFontDescription *desc = pango_font_description_from_string(name.c_str()); - bool success = ParseFontDescription(desc); - pango_font_description_free(desc); - return success; -} - -// Returns the PangoFont structure corresponding to the closest available font -// in the font map. Note that if the font is wholly missing, this could -// correspond to a completely different font family and face. -PangoFont* PangoFontInfo::ToPangoFont() const { - SoftInitFontConfig(); - PangoFontMap* font_map = pango_cairo_font_map_get_default(); - PangoContext* context = pango_context_new(); - pango_cairo_context_set_resolution(context, resolution_); - pango_context_set_font_map(context, font_map); - PangoFont* font = nullptr; - { - DISABLE_HEAP_LEAK_CHECK; - font = pango_font_map_load_font(font_map, context, desc_); - } - g_object_unref(context); - return font; -} - -bool PangoFontInfo::CoversUTF8Text(const char* utf8_text, int byte_length) const { - PangoFont* font = ToPangoFont(); - PangoCoverage* coverage = pango_font_get_coverage(font, nullptr); - for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length); - it != UNICHAR::end(utf8_text, byte_length); - ++it) { - if (IsWhitespace(*it) || pango_is_zero_width(*it)) - continue; - if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) { - char tmp[5]; - int len = it.get_utf8(tmp); - tmp[len] = '\0'; - tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it); - return false; - } - } - pango_coverage_unref(coverage); - g_object_unref(font); - return true; -} - -// This variant of strncpy permits src and dest to overlap. It will copy the -// first byte first. -static char* my_strnmove(char* dest, const char* src, size_t n) { - char* ret = dest; - - // Copy characters until n reaches zero or the src byte is a nul. - do { - *dest = *src; - --n; - ++dest; - ++src; - } while (n && src[0]); - - // If we reached a nul byte and there are more 'n' left, zero them out. - while (n) { - *dest = '\0'; - --n; - ++dest; - } - return ret; -} - -int PangoFontInfo::DropUncoveredChars(std::string* utf8_text) const { - PangoFont* font = ToPangoFont(); - PangoCoverage* coverage = pango_font_get_coverage(font, nullptr); - int num_dropped_chars = 0; - // Maintain two iterators that point into the string. For space efficiency, we - // will repeatedly copy one covered UTF8 character from one to the other, and - // at the end resize the string to the right length. - char* out = const_cast(utf8_text->c_str()); - const UNICHAR::const_iterator it_begin = - UNICHAR::begin(utf8_text->c_str(), utf8_text->length()); - const UNICHAR::const_iterator it_end = - UNICHAR::end(utf8_text->c_str(), utf8_text->length()); - for (UNICHAR::const_iterator it = it_begin; it != it_end;) { - // Skip bad utf-8. - if (!it.is_legal()) { - ++it; // One suitable error message will still be issued. - continue; - } - int unicode = *it; - int utf8_len = it.utf8_len(); - const char* utf8_char = it.utf8_data(); - // Move it forward before the data gets modified. - ++it; - if (!IsWhitespace(unicode) && !pango_is_zero_width(unicode) && - pango_coverage_get(coverage, unicode) != PANGO_COVERAGE_EXACT) { - if (TLOG_IS_ON(2)) { - UNICHAR unichar(unicode); - char* str = unichar.utf8_str(); - tlog(2, "'%s' (U+%x) not covered by font\n", str, unicode); - delete[] str; - } - ++num_dropped_chars; - continue; - } - my_strnmove(out, utf8_char, utf8_len); - out += utf8_len; - } - pango_coverage_unref(coverage); - g_object_unref(font); - utf8_text->resize(out - utf8_text->c_str()); - return num_dropped_chars; -} - -bool PangoFontInfo::GetSpacingProperties(const std::string& utf8_char, - int* x_bearing, int* x_advance) const { - // Convert to equivalent PangoFont structure - PangoFont* font = ToPangoFont(); - // Find the glyph index in the font for the supplied utf8 character. - int total_advance = 0; - int min_bearing = 0; - // Handle multi-unicode strings by reporting the left-most position of the - // x-bearing, and right-most position of the x-advance if the string were to - // be rendered. - const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_char.c_str(), - utf8_char.length()); - const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_char.c_str(), - utf8_char.length()); - for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) { - PangoGlyph glyph_index = pango_fc_font_get_glyph( - reinterpret_cast(font), *it); - if (!glyph_index) { - // Glyph for given unicode character doesn't exist in font. - g_object_unref(font); - return false; - } - // Find the ink glyph extents for the glyph - PangoRectangle ink_rect, logical_rect; - pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect); - pango_extents_to_pixels(&ink_rect, nullptr); - pango_extents_to_pixels(&logical_rect, nullptr); - - int bearing = total_advance + PANGO_LBEARING(ink_rect); - if (it == it_begin || bearing < min_bearing) { - min_bearing = bearing; - } - total_advance += PANGO_RBEARING(logical_rect); - } - *x_bearing = min_bearing; - *x_advance = total_advance; - g_object_unref(font); - return true; -} - -bool PangoFontInfo::CanRenderString(const char* utf8_word, int len) const { - std::vector graphemes; - return CanRenderString(utf8_word, len, &graphemes); -} - -bool PangoFontInfo::CanRenderString(const char* utf8_word, int len, - std::vector* graphemes) const { - if (graphemes) graphemes->clear(); - // We check for font coverage of the text first, as otherwise Pango could - // (undesirably) fall back to another font that does have the required - // coverage. - if (!CoversUTF8Text(utf8_word, len)) { - return false; - } - // U+25CC dotted circle character that often (but not always) gets rendered - // when there is an illegal grapheme sequence. - const char32 kDottedCircleGlyph = 9676; - bool bad_glyph = false; - PangoFontMap* font_map = pango_cairo_font_map_get_default(); - PangoContext* context = pango_context_new(); - pango_context_set_font_map(context, font_map); - PangoLayout* layout; - { - // Pango is not releasing the cached layout. - DISABLE_HEAP_LEAK_CHECK; - layout = pango_layout_new(context); - } - if (desc_) { - pango_layout_set_font_description(layout, desc_); - } else { - PangoFontDescription *desc = pango_font_description_from_string( - DescriptionName().c_str()); - pango_layout_set_font_description(layout, desc); - pango_font_description_free(desc); - } - pango_layout_set_text(layout, utf8_word, len); - PangoLayoutIter* run_iter = nullptr; - { // Fontconfig caches some information here that is not freed before exit. - DISABLE_HEAP_LEAK_CHECK; - run_iter = pango_layout_get_iter(layout); - } - do { - PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter); - if (!run) { - tlog(2, "Found end of line nullptr run marker\n"); - continue; - } - PangoGlyph dotted_circle_glyph; - PangoFont* font = run->item->analysis.font; - -#ifdef _WIN32 // Fixme! Leaks memory and breaks unittests. - PangoGlyphString* glyphs = pango_glyph_string_new(); - char s[] = "\xc2\xa7"; - pango_shape(s, sizeof(s), &(run->item->analysis), glyphs); - dotted_circle_glyph = glyphs->glyphs[0].glyph; -#else - dotted_circle_glyph = pango_fc_font_get_glyph( - reinterpret_cast(font), kDottedCircleGlyph); -#endif - - if (TLOG_IS_ON(2)) { - PangoFontDescription* desc = pango_font_describe(font); - char* desc_str = pango_font_description_to_string(desc); - tlog(2, "Desc of font in run: %s\n", desc_str); - g_free(desc_str); - pango_font_description_free(desc); - } - - PangoGlyphItemIter cluster_iter; - gboolean have_cluster; - for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter, - run, utf8_word); - have_cluster && !bad_glyph; - have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) { - const int start_byte_index = cluster_iter.start_index; - const int end_byte_index = cluster_iter.end_index; - int start_glyph_index = cluster_iter.start_glyph; - int end_glyph_index = cluster_iter.end_glyph; - std::string cluster_text = std::string(utf8_word + start_byte_index, - end_byte_index - start_byte_index); - if (graphemes) graphemes->push_back(cluster_text); - if (IsUTF8Whitespace(cluster_text.c_str())) { - tlog(2, "Skipping whitespace\n"); - continue; - } - if (TLOG_IS_ON(2)) { - printf("start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ", - start_byte_index, end_byte_index, - start_glyph_index, end_glyph_index); - } - for (int i = start_glyph_index, - step = (end_glyph_index > start_glyph_index) ? 1 : -1; - !bad_glyph && i != end_glyph_index; i+= step) { - const bool unknown_glyph = - (cluster_iter.glyph_item->glyphs->glyphs[i].glyph & - PANGO_GLYPH_UNKNOWN_FLAG); - const bool illegal_glyph = - (cluster_iter.glyph_item->glyphs->glyphs[i].glyph == - dotted_circle_glyph); - bad_glyph = unknown_glyph || illegal_glyph; - if (TLOG_IS_ON(2)) { - printf("(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph, - bad_glyph ? 1 : 0); - } - } - if (TLOG_IS_ON(2)) { - printf(" '%s'\n", cluster_text.c_str()); - } - if (bad_glyph) - tlog(1, "Found illegal glyph!\n"); - } - } while (!bad_glyph && pango_layout_iter_next_run(run_iter)); - - pango_layout_iter_free(run_iter); - g_object_unref(context); - g_object_unref(layout); - if (bad_glyph && graphemes) graphemes->clear(); - return !bad_glyph; -} - - -// ------------------------ FontUtils ------------------------------------ -std::vector FontUtils::available_fonts_; // cache list - -// Returns whether the specified font description is available in the fonts -// directory. -// -// The generated list of font families and faces includes "synthesized" font -// faces that are not truly loadable. Pango versions >=1.18 have a -// pango_font_face_is_synthesized method that can be used to prune the list. -// Until then, we are restricted to using a hack where we try to load the font -// from the font_map, and then check what we loaded to see if it has the -// description we expected. If it is not, then the font is deemed unavailable. -/* static */ -bool FontUtils::IsAvailableFont(const char* input_query_desc, - std::string* best_match) { - std::string query_desc(input_query_desc); - PangoFontDescription *desc = pango_font_description_from_string( - query_desc.c_str()); - PangoFont* selected_font = nullptr; - { - PangoFontInfo::SoftInitFontConfig(); - PangoFontMap* font_map = pango_cairo_font_map_get_default(); - PangoContext* context = pango_context_new(); - pango_context_set_font_map(context, font_map); - { - DISABLE_HEAP_LEAK_CHECK; - selected_font = pango_font_map_load_font(font_map, context, desc); - } - g_object_unref(context); - } - if (selected_font == nullptr) { - pango_font_description_free(desc); - return false; - } - PangoFontDescription* selected_desc = pango_font_describe(selected_font); - - bool equal = pango_font_description_equal(desc, selected_desc); - tlog(3, "query weight = %d \t selected weight =%d\n", - pango_font_description_get_weight(desc), - pango_font_description_get_weight(selected_desc)); - - char* selected_desc_str = pango_font_description_to_string(selected_desc); - tlog(2, "query_desc: '%s' Selected: '%s'\n", query_desc.c_str(), - selected_desc_str); - if (!equal && best_match != nullptr) { - *best_match = selected_desc_str; - // Clip the ending ' 0' if there is one. It seems that, if there is no - // point size on the end of the fontname, then Pango always appends ' 0'. - int len = best_match->size(); - if (len > 2 && best_match->at(len - 1) == '0' && - best_match->at(len - 2) == ' ') { - *best_match = best_match->substr(0, len - 2); - } - } - g_free(selected_desc_str); - pango_font_description_free(selected_desc); - g_object_unref(selected_font); - pango_font_description_free(desc); - return equal; -} - -static bool ShouldIgnoreFontFamilyName(const char* query) { - static const char* kIgnoredFamilyNames[] = {"Sans", "Serif", "Monospace", - nullptr}; - const char** list = kIgnoredFamilyNames; - for (; *list != nullptr; ++list) { - if (!strcmp(*list, query)) - return true; - } - return false; -} - -// Outputs description names of available fonts. -/* static */ -const std::vector& FontUtils::ListAvailableFonts() { - if (!available_fonts_.empty()) { - return available_fonts_; - } -#ifdef GOOGLE_TESSERACT - if (FLAGS_use_only_legacy_fonts) { - // Restrict view to list of fonts in legacy_fonts.h - tprintf("Using list of legacy fonts only\n"); - const int kNumFontLists = 4; - for (int i = 0; i < kNumFontLists; ++i) { - for (int j = 0; kFontlists[i][j] != nullptr; ++j) { - available_fonts_.push_back(kFontlists[i][j]); - } - } - return available_fonts_; - } -#endif - - PangoFontFamily** families = nullptr; - int n_families = 0; - ListFontFamilies(&families, &n_families); - for (int i = 0; i < n_families; ++i) { - const char* family_name = pango_font_family_get_name(families[i]); - tlog(2, "Listing family %s\n", family_name); - if (ShouldIgnoreFontFamilyName(family_name)) { - continue; - } - - int n_faces; - PangoFontFace** faces = nullptr; - pango_font_family_list_faces(families[i], &faces, &n_faces); - for (int j = 0; j < n_faces; ++j) { - PangoFontDescription* desc = pango_font_face_describe(faces[j]); - char* desc_str = pango_font_description_to_string(desc); - if (IsAvailableFont(desc_str)) { - available_fonts_.push_back(desc_str); - } - pango_font_description_free(desc); - g_free(desc_str); - } - g_free(faces); - } - g_free(families); - std::sort(available_fonts_.begin(), available_fonts_.end()); - return available_fonts_; -} - - -static void CharCoverageMapToBitmap(PangoCoverage* coverage, - std::vector* unichar_bitmap) { - const int kMinUnicodeValue = 33; - const int kMaxUnicodeValue = 0x10FFFF; - unichar_bitmap->resize(kMaxUnicodeValue + 1, false); - // Mark off characters that the font can render. - for (int i = kMinUnicodeValue; i <= kMaxUnicodeValue; ++i) { - if (IsInterchangeValid(i)) { - (*unichar_bitmap)[i] - = (pango_coverage_get(coverage, i) == PANGO_COVERAGE_EXACT); - } - } -} - -/* static */ -void FontUtils::GetAllRenderableCharacters(std::vector* unichar_bitmap) { - const std::vector& all_fonts = ListAvailableFonts(); - return GetAllRenderableCharacters(all_fonts, unichar_bitmap); -} - -/* static */ -void FontUtils::GetAllRenderableCharacters(const std::string& font_name, - std::vector* unichar_bitmap) { - PangoFontInfo font_info(font_name); - PangoFont* font = font_info.ToPangoFont(); - PangoCoverage* coverage = pango_font_get_coverage(font, nullptr); - CharCoverageMapToBitmap(coverage, unichar_bitmap); - pango_coverage_unref(coverage); - g_object_unref(font); -} - -/* static */ -void FontUtils::GetAllRenderableCharacters(const std::vector& fonts, - std::vector* unichar_bitmap) { - // Form the union of coverage maps from the fonts - PangoCoverage* all_coverage = pango_coverage_new(); - tlog(1, "Processing %u fonts\n", static_cast(fonts.size())); - for (unsigned i = 0; i < fonts.size(); ++i) { - PangoFontInfo font_info(fonts[i]); - PangoFont* font = font_info.ToPangoFont(); - PangoCoverage* coverage = pango_font_get_coverage(font, nullptr); - // Mark off characters that any font can render. - pango_coverage_max(all_coverage, coverage); - pango_coverage_unref(coverage); - g_object_unref(font); - } - CharCoverageMapToBitmap(all_coverage, unichar_bitmap); - pango_coverage_unref(all_coverage); -} - - -// Utilities written to be backward compatible with StringRender - -/* static */ -int FontUtils::FontScore(const std::unordered_map& ch_map, - const std::string& fontname, int* raw_score, - std::vector* ch_flags) { - PangoFontInfo font_info; - if (!font_info.ParseFontDescriptionName(fontname)) { - tprintf("ERROR: Could not parse %s\n", fontname.c_str()); - } - PangoFont* font = font_info.ToPangoFont(); - PangoCoverage* coverage = pango_font_get_coverage(font, nullptr); - - if (ch_flags) { - ch_flags->clear(); - ch_flags->reserve(ch_map.size()); - } - *raw_score = 0; - int ok_chars = 0; - for (std::unordered_map::const_iterator it = ch_map.begin(); - it != ch_map.end(); ++it) { - bool covered = (IsWhitespace(it->first) || - (pango_coverage_get(coverage, it->first) - == PANGO_COVERAGE_EXACT)); - if (covered) { - ++(*raw_score); - ok_chars += it->second; - } - if (ch_flags) { - ch_flags->push_back(covered); - } - } - pango_coverage_unref(coverage); - g_object_unref(font); - return ok_chars; -} - - -/* static */ -std::string FontUtils::BestFonts( - const std::unordered_map& ch_map, - std::vector > >* fonts) { - const double kMinOKFraction = 0.99; - // Weighted fraction of characters that must be renderable in a font to make - // it OK even if the raw count is not good. - const double kMinWeightedFraction = 0.99995; - - fonts->clear(); - std::vector > font_flags; - std::vector font_scores; - std::vector raw_scores; - int most_ok_chars = 0; - int best_raw_score = 0; - const std::vector& font_names = FontUtils::ListAvailableFonts(); - for (unsigned i = 0; i < font_names.size(); ++i) { - std::vector ch_flags; - int raw_score = 0; - int ok_chars = FontScore(ch_map, font_names[i], &raw_score, &ch_flags); - most_ok_chars = std::max(ok_chars, most_ok_chars); - best_raw_score = std::max(raw_score, best_raw_score); - - font_flags.push_back(ch_flags); - font_scores.push_back(ok_chars); - raw_scores.push_back(raw_score); - } - - // Now select the fonts with a score above a threshold fraction - // of both the raw and weighted best scores. To prevent bogus fonts being - // selected for CJK, we require a high fraction (kMinOKFraction = 0.99) of - // BOTH weighted and raw scores. - // In low character-count scripts, the issue is more getting enough fonts, - // when only 1 or 2 might have all those rare dingbats etc in them, so we - // allow a font with a very high weighted (coverage) score - // (kMinWeightedFraction = 0.99995) to be used even if its raw score is poor. - int least_good_enough = static_cast(most_ok_chars * kMinOKFraction); - int least_raw_enough = static_cast(best_raw_score * kMinOKFraction); - int override_enough = static_cast(most_ok_chars * kMinWeightedFraction); - - std::string font_list; - for (unsigned i = 0; i < font_names.size(); ++i) { - int score = font_scores[i]; - int raw_score = raw_scores[i]; - if ((score >= least_good_enough && raw_score >= least_raw_enough) || - score >= override_enough) { - fonts->push_back(std::make_pair(font_names[i].c_str(), font_flags[i])); - tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n", - font_names[i].c_str(), - 100.0 * score / most_ok_chars, - raw_score, 100.0 * raw_score / best_raw_score); - font_list += font_names[i]; - font_list += "\n"; - } else if (score >= least_good_enough || raw_score >= least_raw_enough) { - tlog(1, "Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n", - font_names[i].c_str(), - 100.0 * score / most_ok_chars, - raw_score, 100.0 * raw_score / best_raw_score); - } - } - return font_list; -} - -/* static */ -bool FontUtils::SelectFont(const char* utf8_word, const int utf8_len, - std::string* font_name, std::vector* graphemes) { - return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name, - graphemes); -} - -/* static */ -bool FontUtils::SelectFont(const char* utf8_word, const int utf8_len, - const std::vector& all_fonts, - std::string* font_name, std::vector* graphemes) { - if (font_name) font_name->clear(); - if (graphemes) graphemes->clear(); - for (unsigned i = 0; i < all_fonts.size(); ++i) { - PangoFontInfo font; - std::vector found_graphemes; - ASSERT_HOST_MSG(font.ParseFontDescriptionName(all_fonts[i]), - "Could not parse font desc name %s\n", - all_fonts[i].c_str()); - if (font.CanRenderString(utf8_word, utf8_len, &found_graphemes)) { - if (graphemes) graphemes->swap(found_graphemes); - if (font_name) *font_name = all_fonts[i]; - return true; - } - } - return false; -} - -// PangoFontInfo is reinitialized, so clear the static list of fonts. -/* static */ -void FontUtils::ReInit() { available_fonts_.clear(); } - -// Print info about used font backend -/* static */ -void FontUtils::PangoFontTypeInfo() { - PangoFontMap* font_map = pango_cairo_font_map_get_default(); - if (pango_cairo_font_map_get_font_type(reinterpret_cast( - font_map)) == CAIRO_FONT_TYPE_TOY) { - printf("Using CAIRO_FONT_TYPE_TOY.\n"); - } else if (pango_cairo_font_map_get_font_type( - reinterpret_cast(font_map)) == - CAIRO_FONT_TYPE_FT) { - printf("Using CAIRO_FONT_TYPE_FT.\n"); - } else if (pango_cairo_font_map_get_font_type( - reinterpret_cast(font_map)) == - CAIRO_FONT_TYPE_WIN32) { - printf("Using CAIRO_FONT_TYPE_WIN32.\n"); - } else if (pango_cairo_font_map_get_font_type( - reinterpret_cast(font_map)) == - CAIRO_FONT_TYPE_QUARTZ) { - printf("Using CAIRO_FONT_TYPE_QUARTZ.\n"); - } else if (pango_cairo_font_map_get_font_type( - reinterpret_cast(font_map)) == - CAIRO_FONT_TYPE_USER) { - printf("Using CAIRO_FONT_TYPE_USER.\n"); - } else if (!font_map) { - printf("Can not create pango cairo font map!\n"); - } -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/pango_font_info.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/pango_font_info.h deleted file mode 100644 index a7a7e0dc..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/pango_font_info.h +++ /dev/null @@ -1,217 +0,0 @@ -/********************************************************************** - * File: pango_font_info.h - * Description: Font-related objects and helper functions - * Author: Ranjith Unnikrishnan - * Created: Mon Nov 18 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_TRAINING_PANGO_FONT_INFO_H_ -#define TESSERACT_TRAINING_PANGO_FONT_INFO_H_ - -#include -#include -#include -#include - -#include "commandlineflags.h" -#include "host.h" -#include "pango/pango-font.h" -#include "pango/pango.h" -#include "pango/pangocairo.h" -#include "util.h" - -DECLARE_STRING_PARAM_FLAG(fonts_dir); -DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir); - -using char32 = signed int; - -namespace tesseract { - -// Data holder class for a font, intended to avoid having to work with Pango or -// FontConfig-specific objects directly. -class PangoFontInfo { - public: - enum FontTypeEnum { - UNKNOWN, - SERIF, - SANS_SERIF, - DECORATIVE, - }; - PangoFontInfo(); - ~PangoFontInfo(); - // Initialize from parsing a font description name, defined as a string of the - // format: - // "FamilyName [FaceName] [PointSize]" - // where a missing FaceName implies the default regular face. - // eg. "Arial Italic 12", "Verdana" - // - // FaceName is a combination of: - // [StyleName] [Variant] [Weight] [Stretch] - // with (all optional) Pango-defined values of: - // StyleName: Oblique, Italic - // Variant : Small-Caps - // Weight : Ultra-Light, Light, Medium, Semi-Bold, Bold, Ultra-Bold, Heavy - // Stretch : Ultra-Condensed, Extra-Condensed, Condensed, Semi-Condensed, - // Semi-Expanded, Expanded, Extra-Expanded, Ultra-Expanded. - explicit PangoFontInfo(const std::string& name); - bool ParseFontDescriptionName(const std::string& name); - - // Returns true if the font have codepoint coverage for the specified text. - bool CoversUTF8Text(const char* utf8_text, int byte_length) const; - // Modifies string to remove unicode points that are not covered by the - // font. Returns the number of characters dropped. - int DropUncoveredChars(std::string* utf8_text) const; - - // Returns true if the entire string can be rendered by the font with full - // character coverage and no unknown glyph or dotted-circle glyph - // substitutions on encountering a badly formed unicode sequence. - // If true, returns individual graphemes. Any whitespace characters in the - // original string are also included in the list. - bool CanRenderString(const char* utf8_word, int len, - std::vector* graphemes) const; - bool CanRenderString(const char* utf8_word, int len) const; - - // Retrieves the x_bearing and x_advance for the given utf8 character in the - // font. Returns false if the glyph for the character could not be found in - // the font. - // Ref: http://freetype.sourceforge.net/freetype2/docs/glyphs/glyphs-3.html - bool GetSpacingProperties(const std::string& utf8_char, - int* x_bearing, int* x_advance) const; - - // If not already initialized, initializes FontConfig by setting its - // environment variable and creating a fonts.conf file that points to the - // FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir. - static void SoftInitFontConfig(); - // Re-initializes font config, whether or not already initialized. - // If already initialized, any existing cache is deleted, just to be sure. - static void HardInitFontConfig(const std::string& fonts_dir, - const std::string& cache_dir); - - // Accessors - std::string DescriptionName() const; - // Font Family name eg. "Arial" - const std::string& family_name() const { return family_name_; } - // Size in points (1/72"), rounded to the nearest integer. - int font_size() const { return font_size_; } - FontTypeEnum font_type() const { return font_type_; } - - int resolution() const { return resolution_; } - void set_resolution(const int resolution) { - resolution_ = resolution; - } - - private: - friend class FontUtils; - void Clear(); - bool ParseFontDescription(const PangoFontDescription* desc); - // Returns the PangoFont structure corresponding to the closest available font - // in the font map. - PangoFont* ToPangoFont() const; - - // Font properties set automatically from parsing the font description name. - std::string family_name_; - int font_size_; - FontTypeEnum font_type_; - // The Pango description that was used to initialize the instance. - PangoFontDescription* desc_; - // Default output resolution to assume for GetSpacingProperties() and any - // other methods that returns pixel values. - int resolution_; - // Fontconfig operates through an environment variable, so it intrinsically - // cannot be thread-friendly, but you can serialize multiple independent - // font configurations by calling HardInitFontConfig(fonts_dir, cache_dir). - // These hold the last initialized values set by HardInitFontConfig or - // the first call to SoftInitFontConfig. - // Directory to be scanned for font files. - static std::string fonts_dir_; - // Directory to store the cache of font information. (Can be the same as - // fonts_dir_) - static std::string cache_dir_; - - private: - PangoFontInfo(const PangoFontInfo&); - void operator=(const PangoFontInfo&); -}; - -// Static utility methods for querying font availability and font-selection -// based on codepoint coverage. -class FontUtils { - public: - // Returns true if the font of the given description name is available in the - // target directory specified by --fonts_dir - static bool IsAvailableFont(const char* font_desc) { - return IsAvailableFont(font_desc, nullptr); - } - // Returns true if the font of the given description name is available in the - // target directory specified by --fonts_dir. If false is returned, and - // best_match is not nullptr, the closest matching font is returned there. - static bool IsAvailableFont(const char* font_desc, std::string* best_match); - // Outputs description names of available fonts. - static const std::vector& ListAvailableFonts(); - - // Picks font among available fonts that covers and can render the given word, - // and returns the font description name and the decomposition of the word to - // graphemes. Returns false if no suitable font was found. - static bool SelectFont(const char* utf8_word, const int utf8_len, - std::string* font_name, std::vector* graphemes); - - // Picks font among all_fonts that covers and can render the given word, - // and returns the font description name and the decomposition of the word to - // graphemes. Returns false if no suitable font was found. - static bool SelectFont(const char* utf8_word, const int utf8_len, - const std::vector& all_fonts, - std::string* font_name, std::vector* graphemes); - - // Returns a bitmask where the value of true at index 'n' implies that unicode - // value 'n' is renderable by at least one available font. - static void GetAllRenderableCharacters(std::vector* unichar_bitmap); - // Variant of the above function that inspects only the provided font names. - static void GetAllRenderableCharacters(const std::vector& font_names, - std::vector* unichar_bitmap); - static void GetAllRenderableCharacters(const std::string& font_name, - std::vector* unichar_bitmap); - - // NOTE: The following utilities were written to be backward compatible with - // StringRender. - - // BestFonts returns a font name and a bit vector of the characters it - // can render for the fonts that score within some fraction of the best - // font on the characters in the given hash map. - // In the flags vector, each flag is set according to whether the - // corresponding character (in order of iterating ch_map) can be rendered. - // The return string is a list of the acceptable fonts that were used. - static std::string BestFonts( - const std::unordered_map& ch_map, - std::vector > >* font_flag); - - // FontScore returns the weighted renderability score of the given - // hash map character table in the given font. The unweighted score - // is also returned in raw_score. - // The values in the bool vector ch_flags correspond to whether the - // corresponding character (in order of iterating ch_map) can be rendered. - static int FontScore(const std::unordered_map& ch_map, - const std::string& fontname, int* raw_score, - std::vector* ch_flags); - - // PangoFontInfo is reinitialized, so clear the static list of fonts. - static void ReInit(); - static void PangoFontTypeInfo(); - - private: - static std::vector available_fonts_; // cache list -}; -} // namespace tesseract - -#endif // TESSERACT_TRAINING_PANGO_FONT_INFO_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/set_unicharset_properties.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/set_unicharset_properties.cpp deleted file mode 100644 index bdd65989..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/set_unicharset_properties.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// This program reads a unicharset file, puts the result in a UNICHARSET -// object, fills it with properties about the unichars it contains and writes -// the result back to a file. - -#include "commandlineflags.h" -#include "commontraining.h" // CheckSharedLibraryVersion -#include "tprintf.h" -#include "unicharset_training_utils.h" - -// The directory that is searched for universal script unicharsets. -STRING_PARAM_FLAG(script_dir, "", - "Directory name for input script unicharsets/xheights"); - -// Flags from commontraining.cpp -DECLARE_STRING_PARAM_FLAG(U); -DECLARE_STRING_PARAM_FLAG(O); -DECLARE_STRING_PARAM_FLAG(X); - -int main(int argc, char** argv) { - tesseract::CheckSharedLibraryVersion(); - tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true); - - // Check validity of input flags. - if (FLAGS_U.empty() || FLAGS_O.empty()) { - tprintf("Specify both input and output unicharsets!\n"); - exit(1); - } - if (FLAGS_script_dir.empty()) { - tprintf("Must specify a script_dir!\n"); - exit(1); - } - - tesseract::SetPropertiesForInputFile(FLAGS_script_dir.c_str(), - FLAGS_U.c_str(), FLAGS_O.c_str(), - FLAGS_X.c_str()); - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/shapeclustering.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/shapeclustering.cpp deleted file mode 100644 index 8567a3da..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/shapeclustering.cpp +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// Author: rays@google.com (Ray Smith) - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Filename: shapeclustering.cpp -// Purpose: Generates a master shape table to merge similarly-shaped -// training data of whole, partial or multiple characters. -// Author: Ray Smith - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#ifdef GOOGLE_TESSERACT -#include "base/commandlineflags.h" -#endif -#include "commontraining.h" -#include "mastertrainer.h" -#include "params.h" -#include "strngs.h" - -INT_PARAM_FLAG(display_cloud_font, -1, - "Display cloud of this font, canonical_class1"); -INT_PARAM_FLAG(display_canonical_font, -1, - "Display canonical sample of this font, canonical_class2"); -STRING_PARAM_FLAG(canonical_class1, "", "Class to show ambigs for"); -STRING_PARAM_FLAG(canonical_class2, "", "Class to show ambigs for"); - -// Loads training data, if requested displays debug information, otherwise -// creates the master shape table by shape clustering and writes it to a file. -// If FLAGS_display_cloud_font is set, then the cloud features of -// FLAGS_canonical_class1/FLAGS_display_cloud_font are shown in green ON TOP -// OF the red canonical features of FLAGS_canonical_class2/ -// FLAGS_display_canonical_font, so as to show which canonical features are -// NOT in the cloud. -// Otherwise, if FLAGS_canonical_class1 is set, prints a table of font-wise -// cluster distances between FLAGS_canonical_class1 and FLAGS_canonical_class2. -int main(int argc, char **argv) { - tesseract::CheckSharedLibraryVersion(); - - ParseArguments(&argc, &argv); - - STRING file_prefix; - tesseract::MasterTrainer* trainer = - tesseract::LoadTrainingData(argc, argv, false, nullptr, &file_prefix); - - if (!trainer) - return 1; - - if (FLAGS_display_cloud_font >= 0) { -#ifndef GRAPHICS_DISABLED - trainer->DisplaySamples(FLAGS_canonical_class1.c_str(), - FLAGS_display_cloud_font, - FLAGS_canonical_class2.c_str(), - FLAGS_display_canonical_font); -#endif // GRAPHICS_DISABLED - return 0; - } else if (!FLAGS_canonical_class1.empty()) { - trainer->DebugCanonical(FLAGS_canonical_class1.c_str(), - FLAGS_canonical_class2.c_str()); - return 0; - } - trainer->SetupMasterShapes(); - WriteShapeTable(file_prefix, trainer->master_shapes()); - delete trainer; - - return 0; -} /* main */ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/stringrenderer.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/stringrenderer.cpp deleted file mode 100644 index 24b162d5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/stringrenderer.cpp +++ /dev/null @@ -1,907 +0,0 @@ -/********************************************************************** - * File: stringrenderer.cpp - * Description: Class for rendering UTF-8 text to an image, and retrieving - * bounding boxes around each grapheme cluster. - * Author: Ranjith Unnikrishnan - * Created: Mon Nov 18 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -#include "stringrenderer.h" - -#include -#include -#include -#include -#include -#include -#include - -#include "allheaders.h" // from leptonica -#include "boxchar.h" -#include "ligature_table.h" -#include "normstrngs.h" -#include "pango/pango-font.h" -#include "pango/pango-glyph-item.h" -#include "tlog.h" -#include "unichar.h" -#include "unicode/uchar.h" // from libicu -#include "util.h" - -namespace tesseract { - -static const int kDefaultOutputResolution = 300; - -// Word joiner (U+2060) inserted after letters in ngram mode, as per -// recommendation in http://unicode.org/reports/tr14/ to avoid line-breaks at -// hyphens and other non-alpha characters. -static const char* kWordJoinerUTF8 = "\u2060"; - -static bool IsCombiner(int ch) { - const int char_type = u_charType(ch); - return ((char_type == U_NON_SPACING_MARK) || - (char_type == U_ENCLOSING_MARK) || - (char_type == U_COMBINING_SPACING_MARK)); -} - -static std::string EncodeAsUTF8(const char32 ch32) { - UNICHAR uni_ch(ch32); - return std::string(uni_ch.utf8(), uni_ch.utf8_len()); -} - -// Returns true with probability 'prob'. -static bool RandBool(const double prob, TRand* rand) { - if (prob == 1.0) return true; - if (prob == 0.0) return false; - return rand->UnsignedRand(1.0) < prob; -} - -/* static */ -static Pix* CairoARGB32ToPixFormat(cairo_surface_t *surface) { - if (cairo_image_surface_get_format(surface) != CAIRO_FORMAT_ARGB32) { - printf("Unexpected surface format %d\n", - cairo_image_surface_get_format(surface)); - return nullptr; - } - const int width = cairo_image_surface_get_width(surface); - const int height = cairo_image_surface_get_height(surface); - Pix* pix = pixCreate(width, height, 32); - int byte_stride = cairo_image_surface_get_stride(surface); - - for (int i = 0; i < height; ++i) { - memcpy(reinterpret_cast(pix->data + i * pix->wpl) + 1, - cairo_image_surface_get_data(surface) + i * byte_stride, - byte_stride - ((i == height - 1) ? 1 : 0)); - } - return pix; -} - -StringRenderer::StringRenderer(const std::string& font_desc, int page_width, - int page_height) - : font_(font_desc), - page_width_(page_width), - page_height_(page_height), - h_margin_(50), - v_margin_(50), - pen_color_{0.0, 0.0, 0.0}, - char_spacing_(0), - leading_(0), - vertical_text_(false), - gravity_hint_strong_(false), - render_fullwidth_latin_(false), - underline_start_prob_(0), - underline_continuation_prob_(0), - underline_style_(PANGO_UNDERLINE_SINGLE), - features_(nullptr), - drop_uncovered_chars_(true), - strip_unrenderable_words_(false), - add_ligatures_(false), - output_word_boxes_(false), - surface_(nullptr), - cr_(nullptr), - layout_(nullptr), - start_box_(0), - page_(0), - box_padding_(0), - page_boxes_(nullptr), - total_chars_(0), - font_index_(0), - last_offset_(0) { - set_resolution(kDefaultOutputResolution); - set_font(font_desc); -} - -bool StringRenderer::set_font(const std::string& desc) { - bool success = font_.ParseFontDescriptionName(desc); - font_.set_resolution(resolution_); - return success; -} - -void StringRenderer::set_resolution(const int resolution) { - resolution_ = resolution; - font_.set_resolution(resolution); -} - -void StringRenderer::set_underline_start_prob(const double frac) { - underline_start_prob_ = std::min(std::max(frac, 0.0), 1.0); -} - -void StringRenderer::set_underline_continuation_prob(const double frac) { - underline_continuation_prob_ = std::min(std::max(frac, 0.0), 1.0); -} - -StringRenderer::~StringRenderer() { - free(features_); - ClearBoxes(); - FreePangoCairo(); -} - -void StringRenderer::InitPangoCairo() { - FreePangoCairo(); - surface_ = cairo_image_surface_create(CAIRO_FORMAT_ARGB32, page_width_, - page_height_); - cr_ = cairo_create(surface_); - { - DISABLE_HEAP_LEAK_CHECK; - layout_ = pango_cairo_create_layout(cr_); - } - - if (vertical_text_) { - PangoContext* context = pango_layout_get_context(layout_); - pango_context_set_base_gravity(context, PANGO_GRAVITY_EAST); - if (gravity_hint_strong_) { - pango_context_set_gravity_hint(context, PANGO_GRAVITY_HINT_STRONG); - } - pango_layout_context_changed(layout_); - } - - SetLayoutProperties(); -} - -void StringRenderer::SetLayoutProperties() { - std::string font_desc = font_.DescriptionName(); - // Specify the font via a description name - PangoFontDescription *desc = - pango_font_description_from_string(font_desc.c_str()); - // Assign the font description to the layout - pango_layout_set_font_description(layout_, desc); - pango_font_description_free(desc); // free the description - pango_cairo_context_set_resolution(pango_layout_get_context(layout_), - resolution_); - - int max_width = page_width_ - 2 * h_margin_; - int max_height = page_height_ - 2 * v_margin_; - tlog(3, "max_width = %d, max_height = %d\n", max_width, max_height); - if (vertical_text_) { - using std::swap; - swap(max_width, max_height); - } - pango_layout_set_width(layout_, max_width * PANGO_SCALE); - // Ultra-wide Thai strings need to wrap at char level. - pango_layout_set_wrap(layout_, PANGO_WRAP_WORD_CHAR); - - // Adjust character spacing - PangoAttrList* attr_list = pango_attr_list_new(); - if (char_spacing_) { - PangoAttribute* spacing_attr = - pango_attr_letter_spacing_new(char_spacing_ * PANGO_SCALE); - spacing_attr->start_index = 0; - spacing_attr->end_index = static_cast(-1); - pango_attr_list_change(attr_list, spacing_attr); - } -#if (PANGO_VERSION_MAJOR == 1 && PANGO_VERSION_MINOR >= 38) - if (add_ligatures_) { - set_features("liga, clig, dlig, hlig"); - PangoAttribute* feature_attr = pango_attr_font_features_new(features_); - pango_attr_list_change(attr_list, feature_attr); - } -#endif - pango_layout_set_attributes(layout_, attr_list); - pango_attr_list_unref(attr_list); - // Adjust line spacing - if (leading_) { - pango_layout_set_spacing(layout_, leading_ * PANGO_SCALE); - } -} - -void StringRenderer::FreePangoCairo() { - if (layout_) { - g_object_unref(layout_); - layout_ = nullptr; - } - if (cr_) { - cairo_destroy(cr_); - cr_ = nullptr; - } - if (surface_) { - cairo_surface_destroy(surface_); - surface_ = nullptr; - } -} - -void StringRenderer::SetWordUnderlineAttributes(const std::string& page_text) { - if (underline_start_prob_ == 0) return; - PangoAttrList* attr_list = pango_layout_get_attributes(layout_); - - const char* text = page_text.c_str(); - size_t offset = 0; - TRand rand; - bool started_underline = false; - PangoAttribute* und_attr = nullptr; - - while (offset < page_text.length()) { - offset += SpanUTF8Whitespace(text + offset); - if (offset == page_text.length()) break; - - int word_start = offset; - int word_len = SpanUTF8NotWhitespace(text + offset); - offset += word_len; - if (started_underline) { - // Should we continue the underline to the next word? - if (RandBool(underline_continuation_prob_, &rand)) { - // Continue the current underline to this word. - und_attr->end_index = word_start + word_len; - } else { - // Otherwise end the current underline attribute at the end of the - // previous word. - pango_attr_list_insert(attr_list, und_attr); - started_underline = false; - und_attr = nullptr; - } - } - if (!started_underline && RandBool(underline_start_prob_, &rand)) { - // Start a new underline attribute - und_attr = pango_attr_underline_new(underline_style_); - und_attr->start_index = word_start; - und_attr->end_index = word_start + word_len; - started_underline = true; - } - } - // Finish the current underline attribute at the end of the page. - if (started_underline) { - und_attr->end_index = page_text.length(); - pango_attr_list_insert(attr_list, und_attr); - } -} - -// Returns offset in utf8 bytes to first page. -int StringRenderer::FindFirstPageBreakOffset(const char* text, - int text_length) { - if (!text_length) return 0; - const int max_height = (page_height_ - 2 * v_margin_); - const int max_width = (page_width_ - 2 * h_margin_); - const int max_layout_height = vertical_text_ ? max_width : max_height; - - UNICHAR::const_iterator it = UNICHAR::begin(text, text_length); - const UNICHAR::const_iterator it_end = UNICHAR::end(text, text_length); - const int kMaxUnicodeBufLength = 15000; - for (int i = 0; i < kMaxUnicodeBufLength && it != it_end; ++it, ++i); - int buf_length = it.utf8_data() - text; - tlog(1, "len = %d buf_len = %d\n", text_length, buf_length); - pango_layout_set_text(layout_, text, buf_length); - - PangoLayoutIter* line_iter = nullptr; - { // Fontconfig caches some info here that is not freed before exit. - DISABLE_HEAP_LEAK_CHECK; - line_iter = pango_layout_get_iter(layout_); - } - bool first_page = true; - int page_top = 0; - int offset = buf_length; - do { - // Get bounding box of the current line - PangoRectangle line_ink_rect; - pango_layout_iter_get_line_extents(line_iter, &line_ink_rect, nullptr); - pango_extents_to_pixels(&line_ink_rect, nullptr); - PangoLayoutLine* line = pango_layout_iter_get_line_readonly(line_iter); - if (first_page) { - page_top = line_ink_rect.y; - first_page = false; - } - int line_bottom = line_ink_rect.y + line_ink_rect.height; - if (line_bottom - page_top > max_layout_height) { - offset = line->start_index; - tlog(1, "Found offset = %d\n", offset); - break; - } - } while (pango_layout_iter_next_line(line_iter)); - pango_layout_iter_free(line_iter); - return offset; -} - -const std::vector& StringRenderer::GetBoxes() const { - return boxchars_; -} - -Boxa* StringRenderer::GetPageBoxes() const { - return page_boxes_; -} - -void StringRenderer::RotatePageBoxes(float rotation) { - BoxChar::RotateBoxes(rotation, page_width_ / 2, page_height_ / 2, - start_box_, boxchars_.size(), &boxchars_); -} - - -void StringRenderer::ClearBoxes() { - for (size_t i = 0; i < boxchars_.size(); ++i) delete boxchars_[i]; - boxchars_.clear(); - boxaDestroy(&page_boxes_); -} - -std::string StringRenderer::GetBoxesStr() { - BoxChar::PrepareToWrite(&boxchars_); - return BoxChar::GetTesseractBoxStr(page_height_, boxchars_); -} - -void StringRenderer::WriteAllBoxes(const std::string& filename) { - BoxChar::PrepareToWrite(&boxchars_); - BoxChar::WriteTesseractBoxFile(filename, page_height_, boxchars_); -} - -// Returns cluster strings in logical order. -bool StringRenderer::GetClusterStrings(std::vector* cluster_text) { - std::map start_byte_to_text; - PangoLayoutIter* run_iter = pango_layout_get_iter(layout_); - const char* full_text = pango_layout_get_text(layout_); - do { - PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter); - if (!run) { - // End of line nullptr run marker - tlog(2, "Found end of line marker\n"); - continue; - } - PangoGlyphItemIter cluster_iter; - gboolean have_cluster; - for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter, - run, full_text); - have_cluster; - have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) { - const int start_byte_index = cluster_iter.start_index; - const int end_byte_index = cluster_iter.end_index; - std::string text = std::string(full_text + start_byte_index, - end_byte_index - start_byte_index); - if (IsUTF8Whitespace(text.c_str())) { - tlog(2, "Found whitespace\n"); - text = " "; - } - tlog(2, "start_byte=%d end_byte=%d : '%s'\n", start_byte_index, - end_byte_index, text.c_str()); - if (add_ligatures_) { - // Make sure the output box files have ligatured text in case the font - // decided to use an unmapped glyph. - text = LigatureTable::Get()->AddLigatures(text, nullptr); - } - start_byte_to_text[start_byte_index] = text; - } - } while (pango_layout_iter_next_run(run_iter)); - pango_layout_iter_free(run_iter); - - cluster_text->clear(); - for (std::map::const_iterator it = start_byte_to_text.begin(); - it != start_byte_to_text.end(); ++it) { - cluster_text->push_back(it->second); - } - return !cluster_text->empty(); -} - -// Merges an array of BoxChars into words based on the identification of -// BoxChars containing the space character as inter-word separators. -// -// Sometime two adjacent characters in the sequence may be detected as lying on -// different lines based on their spatial positions. This may be the result of a -// newline character at end of the last word on a line in the source text, or of -// a discretionary line-break created by Pango at intra-word locations like -// hyphens. When this is detected the word is split at that location into -// multiple BoxChars. Otherwise, each resulting BoxChar will contain a word and -// its bounding box. -static void MergeBoxCharsToWords(std::vector* boxchars) { - std::vector result; - bool started_word = false; - for (size_t i = 0; i < boxchars->size(); ++i) { - if (boxchars->at(i)->ch() == " " || boxchars->at(i)->box() == nullptr) { - result.push_back(boxchars->at(i)); - boxchars->at(i) = nullptr; - started_word = false; - continue; - } - - if (!started_word) { - // Begin new word - started_word = true; - result.push_back(boxchars->at(i)); - boxchars->at(i) = nullptr; - } else { - BoxChar* last_boxchar = result.back(); - // Compute bounding box union - const Box* box = boxchars->at(i)->box(); - Box* last_box = last_boxchar->mutable_box(); - int left = std::min(last_box->x, box->x); - int right = std::max(last_box->x + last_box->w, box->x + box->w); - int top = std::min(last_box->y, box->y); - int bottom = std::max(last_box->y + last_box->h, box->y + box->h); - // Conclude that the word was broken to span multiple lines based on the - // size of the merged bounding box in relation to those of the individual - // characters seen so far. - if (right - left > last_box->w + 5 * box->w) { - tlog(1, "Found line break after '%s'", last_boxchar->ch().c_str()); - // Insert a fake interword space and start a new word with the current - // boxchar. - result.push_back(new BoxChar(" ", 1)); - result.push_back(boxchars->at(i)); - boxchars->at(i) = nullptr; - continue; - } - // Append to last word - last_boxchar->mutable_ch()->append(boxchars->at(i)->ch()); - last_box->x = left; - last_box->w = right - left; - last_box->y = top; - last_box->h = bottom - top; - delete boxchars->at(i); - boxchars->at(i) = nullptr; - } - } - boxchars->swap(result); -} - - -void StringRenderer::ComputeClusterBoxes() { - const char* text = pango_layout_get_text(layout_); - PangoLayoutIter* cluster_iter = pango_layout_get_iter(layout_); - - // Do a first pass to store cluster start indexes. - std::vector cluster_start_indices; - do { - cluster_start_indices.push_back(pango_layout_iter_get_index(cluster_iter)); - tlog(3, "Added %d\n", cluster_start_indices.back()); - } while (pango_layout_iter_next_cluster(cluster_iter)); - pango_layout_iter_free(cluster_iter); - cluster_start_indices.push_back(strlen(text)); - tlog(3, "Added last index %d\n", cluster_start_indices.back()); - // Sort the indices and create a map from start to end indices. - std::sort(cluster_start_indices.begin(), cluster_start_indices.end()); - std::map cluster_start_to_end_index; - for (size_t i = 0; i + 1 < cluster_start_indices.size(); ++i) { - cluster_start_to_end_index[cluster_start_indices[i]] - = cluster_start_indices[i + 1]; - } - - // Iterate again to compute cluster boxes and their text with the obtained - // cluster extent information. - cluster_iter = pango_layout_get_iter(layout_); - // Store BoxChars* sorted by their byte start positions - std::map start_byte_to_box; - do { - PangoRectangle cluster_rect; - pango_layout_iter_get_cluster_extents(cluster_iter, &cluster_rect, nullptr); - pango_extents_to_pixels(&cluster_rect, nullptr); - const int start_byte_index = pango_layout_iter_get_index(cluster_iter); - const int end_byte_index = cluster_start_to_end_index[start_byte_index]; - std::string cluster_text = std::string(text + start_byte_index, - end_byte_index - start_byte_index); - if (!cluster_text.empty() && cluster_text[0] == '\n') { - tlog(2, "Skipping newlines at start of text.\n"); - continue; - } - if (!cluster_rect.width || !cluster_rect.height || - IsUTF8Whitespace(cluster_text.c_str())) { - tlog(2, "Skipping whitespace with boxdim (%d,%d) '%s'\n", - cluster_rect.width, cluster_rect.height, cluster_text.c_str()); - BoxChar* boxchar = new BoxChar(" ", 1); - boxchar->set_page(page_); - start_byte_to_box[start_byte_index] = boxchar; - continue; - } - // Prepare a boxchar for addition at this byte position. - tlog(2, "[%d %d], %d, %d : start_byte=%d end_byte=%d : '%s'\n", - cluster_rect.x, cluster_rect.y, - cluster_rect.width, cluster_rect.height, - start_byte_index, end_byte_index, - cluster_text.c_str()); - ASSERT_HOST_MSG(cluster_rect.width, - "cluster_text:%s start_byte_index:%d\n", - cluster_text.c_str(), start_byte_index); - ASSERT_HOST_MSG(cluster_rect.height, - "cluster_text:%s start_byte_index:%d\n", - cluster_text.c_str(), start_byte_index); - if (box_padding_) { - cluster_rect.x = std::max(0, cluster_rect.x - box_padding_); - cluster_rect.width += 2 * box_padding_; - cluster_rect.y = std::max(0, cluster_rect.y - box_padding_); - cluster_rect.height += 2 * box_padding_; - } - if (add_ligatures_) { - // Make sure the output box files have ligatured text in case the font - // decided to use an unmapped glyph. - cluster_text = LigatureTable::Get()->AddLigatures(cluster_text, nullptr); - } - BoxChar* boxchar = new BoxChar(cluster_text.c_str(), cluster_text.size()); - boxchar->set_page(page_); - boxchar->AddBox(cluster_rect.x, cluster_rect.y, - cluster_rect.width, cluster_rect.height); - start_byte_to_box[start_byte_index] = boxchar; - } while (pango_layout_iter_next_cluster(cluster_iter)); - pango_layout_iter_free(cluster_iter); - - // There is a subtle bug in the cluster text reported by the PangoLayoutIter - // on ligatured characters (eg. The word "Lam-Aliph" in arabic). To work - // around this, we use text reported using the PangoGlyphIter which is - // accurate. - // TODO(ranjith): Revisit whether this is still needed in newer versions of - // pango. - std::vector cluster_text; - if (GetClusterStrings(&cluster_text)) { - ASSERT_HOST(cluster_text.size() == start_byte_to_box.size()); - int ind = 0; - for (std::map::iterator it = start_byte_to_box.begin(); - it != start_byte_to_box.end(); ++it, ++ind) { - it->second->mutable_ch()->swap(cluster_text[ind]); - } - } - - // Append to the boxchars list in byte order. - std::vector page_boxchars; - page_boxchars.reserve(start_byte_to_box.size()); - std::string last_ch; - for (std::map::const_iterator it = start_byte_to_box.begin(); - it != start_byte_to_box.end(); ++it) { - if (it->second->ch() == kWordJoinerUTF8) { - // Skip zero-width joiner characters (ZWJs) here. - delete it->second; - } else { - page_boxchars.push_back(it->second); - } - } - CorrectBoxPositionsToLayout(&page_boxchars); - - if (render_fullwidth_latin_) { - for (std::map::iterator it = start_byte_to_box.begin(); - it != start_byte_to_box.end(); ++it) { - // Convert fullwidth Latin characters to their halfwidth forms. - std::string half(ConvertFullwidthLatinToBasicLatin(it->second->ch())); - it->second->mutable_ch()->swap(half); - } - } - - // Merge the character boxes into word boxes if we are rendering n-grams. - if (output_word_boxes_) { - MergeBoxCharsToWords(&page_boxchars); - } - - boxchars_.insert(boxchars_.end(), page_boxchars.begin(), page_boxchars.end()); - - // Compute the page bounding box - Box* page_box = nullptr; - Boxa* all_boxes = nullptr; - for (size_t i = 0; i < page_boxchars.size(); ++i) { - if (page_boxchars[i]->box() == nullptr) continue; - if (all_boxes == nullptr) all_boxes = boxaCreate(0); - boxaAddBox(all_boxes, page_boxchars[i]->mutable_box(), L_CLONE); - } - if (all_boxes != nullptr) { - boxaGetExtent(all_boxes, nullptr, nullptr, &page_box); - boxaDestroy(&all_boxes); - if (page_boxes_ == nullptr) page_boxes_ = boxaCreate(0); - boxaAddBox(page_boxes_, page_box, L_INSERT); - } -} - - -void StringRenderer::CorrectBoxPositionsToLayout( - std::vector* boxchars) { - if (vertical_text_) { - const double rotation = - pango_gravity_to_rotation( - pango_context_get_base_gravity(pango_layout_get_context(layout_))); - BoxChar::TranslateBoxes(page_width_ - h_margin_, v_margin_, boxchars); - BoxChar::RotateBoxes(rotation, page_width_ - h_margin_, v_margin_, - 0, boxchars->size(), boxchars); - } else { - BoxChar::TranslateBoxes(h_margin_, v_margin_, boxchars); - } -} - -int StringRenderer::StripUnrenderableWords(std::string* utf8_text) const { - std::string output_text; - const char* text = utf8_text->c_str(); - size_t offset = 0; - int num_dropped = 0; - while (offset < utf8_text->length()) { - int space_len = SpanUTF8Whitespace(text + offset); - output_text.append(text + offset, space_len); - offset += space_len; - if (offset == utf8_text->length()) break; - - int word_len = SpanUTF8NotWhitespace(text + offset); - if (font_.CanRenderString(text + offset, word_len)) { - output_text.append(text + offset, word_len); - } else { - ++num_dropped; - } - offset += word_len; - } - utf8_text->swap(output_text); - - if (num_dropped > 0) { - tprintf("Stripped %d unrenderable words\n", num_dropped); - } - return num_dropped; -} - -int StringRenderer::RenderToGrayscaleImage(const char* text, int text_length, - Pix** pix) { - Pix* orig_pix = nullptr; - int offset = RenderToImage(text, text_length, &orig_pix); - if (orig_pix) { - *pix = pixConvertTo8(orig_pix, false); - pixDestroy(&orig_pix); - } - return offset; -} - -int StringRenderer::RenderToBinaryImage(const char* text, int text_length, - int threshold, Pix** pix) { - Pix* orig_pix = nullptr; - int offset = RenderToImage(text, text_length, &orig_pix); - if (orig_pix) { - Pix* gray_pix = pixConvertTo8(orig_pix, false); - pixDestroy(&orig_pix); - *pix = pixThresholdToBinary(gray_pix, threshold); - pixDestroy(&gray_pix); - } else { - *pix = orig_pix; - } - return offset; -} - -// Add word joiner (WJ) characters between adjacent non-space characters except -// immediately before a combiner. -/* static */ -std::string StringRenderer::InsertWordJoiners(const std::string& text) { - std::string out_str; - const UNICHAR::const_iterator it_end = UNICHAR::end(text.c_str(), - text.length()); - for (UNICHAR::const_iterator it = UNICHAR::begin(text.c_str(), text.length()); - it != it_end; ++it) { - // Add the symbol to the output string. - out_str.append(it.utf8_data(), it.utf8_len()); - // Check the next symbol. - UNICHAR::const_iterator next_it = it; - ++next_it; - bool next_char_is_boundary = (next_it == it_end || *next_it == ' '); - bool next_char_is_combiner = (next_it == it_end) ? - false : IsCombiner(*next_it); - if (*it != ' ' && *it != '\n' && !next_char_is_boundary && - !next_char_is_combiner) { - out_str += kWordJoinerUTF8; - } - } - return out_str; -} - -// Convert halfwidth Basic Latin characters to their fullwidth forms. -std::string StringRenderer::ConvertBasicLatinToFullwidthLatin(const std::string& str) { - std::string full_str; - const UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(), - str.length()); - for (UNICHAR::const_iterator it = UNICHAR::begin(str.c_str(), str.length()); - it != it_end; ++it) { - // Convert printable and non-space 7-bit ASCII characters to - // their fullwidth forms. - if (IsInterchangeValid7BitAscii(*it) && isprint(*it) && !isspace(*it)) { - // Convert by adding 0xFEE0 to the codepoint of 7-bit ASCII. - char32 full_char = *it + 0xFEE0; - full_str.append(EncodeAsUTF8(full_char)); - } else { - full_str.append(it.utf8_data(), it.utf8_len()); - } - } - return full_str; -} - -// Convert fullwidth Latin characters to their halfwidth forms. -std::string StringRenderer::ConvertFullwidthLatinToBasicLatin(const std::string& str) { - std::string half_str; - UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(), str.length()); - for (UNICHAR::const_iterator it = UNICHAR::begin(str.c_str(), str.length()); - it != it_end; ++it) { - char32 half_char = FullwidthToHalfwidth(*it); - // Convert fullwidth Latin characters to their halfwidth forms - // only if halfwidth forms are printable and non-space 7-bit ASCII. - if (IsInterchangeValid7BitAscii(half_char) && - isprint(half_char) && !isspace(half_char)) { - half_str.append(EncodeAsUTF8(half_char)); - } else { - half_str.append(it.utf8_data(), it.utf8_len()); - } - } - return half_str; -} - -// Returns offset to end of text substring rendered in this method. -int StringRenderer::RenderToImage(const char* text, int text_length, - Pix** pix) { - if (pix && *pix) pixDestroy(pix); - InitPangoCairo(); - - const int page_offset = FindFirstPageBreakOffset(text, text_length); - if (!page_offset) { - return 0; - } - start_box_ = boxchars_.size(); - - if (!vertical_text_) { - // Translate by the specified margin - cairo_translate(cr_, h_margin_, v_margin_); - } else { - // Vertical text rendering is achieved by a two-step process of first - // performing regular horizontal layout with character orientation set to - // EAST, and then translating and rotating the layout before rendering onto - // the desired image surface. The settings required for the former step are - // done within InitPangoCairo(). - // - // Translate to the top-right margin of page - cairo_translate(cr_, page_width_ - h_margin_, v_margin_); - // Rotate the layout - double rotation = - pango_gravity_to_rotation( - pango_context_get_base_gravity(pango_layout_get_context(layout_))); - tlog(2, "Rotating by %f radians\n", rotation); - cairo_rotate(cr_, rotation); - pango_cairo_update_layout(cr_, layout_); - } - std::string page_text(text, page_offset); - if (render_fullwidth_latin_) { - // Convert Basic Latin to their fullwidth forms. - page_text = ConvertBasicLatinToFullwidthLatin(page_text); - } - if (strip_unrenderable_words_) { - StripUnrenderableWords(&page_text); - } - if (drop_uncovered_chars_ && - !font_.CoversUTF8Text(page_text.c_str(), page_text.length())) { - int num_dropped = font_.DropUncoveredChars(&page_text); - if (num_dropped) { - tprintf("WARNING: Dropped %d uncovered characters\n", num_dropped); - } - } - if (add_ligatures_) { - // Add ligatures wherever possible, including custom ligatures. - page_text = LigatureTable::Get()->AddLigatures(page_text, &font_); - } - if (underline_start_prob_ > 0) { - SetWordUnderlineAttributes(page_text); - } - - pango_layout_set_text(layout_, page_text.c_str(), page_text.length()); - - if (pix) { - // Set a white background for the target image surface. - cairo_set_source_rgb(cr_, 1.0, 1.0, 1.0); // sets drawing colour to white - // Fill the surface with the active colour (if you don't do this, you will - // be given a surface with a transparent background to draw on) - cairo_paint(cr_); - // Set the ink color to black - cairo_set_source_rgb(cr_, pen_color_[0], pen_color_[1], pen_color_[2]); - // If the target surface or transformation properties of the cairo instance - // have changed, update the pango layout to reflect this - pango_cairo_update_layout(cr_, layout_); - { - DISABLE_HEAP_LEAK_CHECK; // for Fontconfig - // Draw the pango layout onto the cairo surface - pango_cairo_show_layout(cr_, layout_); - } - *pix = CairoARGB32ToPixFormat(surface_); - } - ComputeClusterBoxes(); - FreePangoCairo(); - // Update internal state variables. - ++page_; - return page_offset; -} - -// Render a string to an image, returning it as an 8 bit pix. Behaves as -// RenderString, except that it ignores the font set at construction and works -// through all the fonts, returning 0 until they are exhausted, at which point -// it returns the value it should have returned all along, but no pix this time. -// Fonts that don't contain a given proportion of the characters in the string -// get skipped. -// Fonts that work each get rendered and the font name gets added -// to the image. -// NOTE that no boxes are produced by this function. -// -// Example usage: To render a null terminated char-array "txt" -// -// int offset = 0; -// do { -// Pix *pix; -// offset += renderer.RenderAllFontsToImage(min_proportion, txt + offset, -// strlen(txt + offset), nullptr, -// &pix); -// ... -// } while (offset < strlen(text)); -// -int StringRenderer::RenderAllFontsToImage(double min_coverage, - const char* text, int text_length, - std::string* font_used, Pix** image) { - *image = nullptr; - // Select a suitable font to render the title with. - const char kTitleTemplate[] = "%s : %d hits = %.2f%%, raw = %d = %.2f%%"; - std::string title_font; - if (!FontUtils::SelectFont(kTitleTemplate, strlen(kTitleTemplate), - &title_font, nullptr)) { - tprintf("WARNING: Could not find a font to render image title with!\n"); - title_font = "Arial"; - } - title_font += " 8"; - tlog(1, "Selected title font: %s\n", title_font.c_str()); - if (font_used) font_used->clear(); - - std::string orig_font = font_.DescriptionName(); - if (char_map_.empty()) { - total_chars_ = 0; - // Fill the hash table and use that for computing which fonts to use. - for (UNICHAR::const_iterator it = UNICHAR::begin(text, text_length); - it != UNICHAR::end(text, text_length); ++it) { - ++total_chars_; - ++char_map_[*it]; - } - tprintf("Total chars = %d\n", total_chars_); - } - const std::vector& all_fonts = FontUtils::ListAvailableFonts(); - - for (size_t i = font_index_; i < all_fonts.size(); ++i) { - ++font_index_; - int raw_score = 0; - int ok_chars = - FontUtils::FontScore(char_map_, all_fonts[i], &raw_score, nullptr); - if (ok_chars > 0 && ok_chars >= total_chars_ * min_coverage) { - set_font(all_fonts[i]); - int offset = RenderToBinaryImage(text, text_length, 128, image); - ClearBoxes(); // Get rid of them as they are garbage. - const int kMaxTitleLength = 1024; - char title[kMaxTitleLength]; - snprintf(title, kMaxTitleLength, kTitleTemplate, - all_fonts[i].c_str(), ok_chars, - 100.0 * ok_chars / total_chars_, raw_score, - 100.0 * raw_score / char_map_.size()); - tprintf("%s\n", title); - // This is a good font! Store the offset to return once we've tried all - // the fonts. - if (offset) { - last_offset_ = offset; - if (font_used) *font_used = all_fonts[i]; - } - // Add the font to the image. - set_font(title_font); - v_margin_ /= 8; - Pix* title_image = nullptr; - RenderToBinaryImage(title, strlen(title), 128, &title_image); - pixOr(*image, *image, title_image); - pixDestroy(&title_image); - - v_margin_ *= 8; - set_font(orig_font); - // We return the real offset only after cycling through the list of fonts. - return 0; - } else { - tprintf("Font %s failed with %d hits = %.2f%%\n", - all_fonts[i].c_str(), ok_chars, 100.0 * ok_chars / total_chars_); - } - } - font_index_ = 0; - char_map_.clear(); - return last_offset_ == 0 ? -1 : last_offset_; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/stringrenderer.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/stringrenderer.h deleted file mode 100644 index 718ecfd2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/stringrenderer.h +++ /dev/null @@ -1,227 +0,0 @@ -/********************************************************************** - * File: stringrenderer.h - * Description: Class for rendering UTF-8 text to an image, and retrieving - * bounding boxes around each grapheme cluster. - * - * Instances are created using a font description string - * (eg. "Arial Italic 12"; see pango_font_info.h for the format) - * and the page dimensions. Other renderer properties such as - * spacing, ligaturization, as well a preprocessing behavior such - * as removal of unrenderable words and a special n-gram mode may - * be set using respective set_* methods. - * - * Author: Ranjith Unnikrishnan - * Created: Mon Nov 18 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_TRAINING_STRINGRENDERER_H_ -#define TESSERACT_TRAINING_STRINGRENDERER_H_ - -#include -#include -#include - -#include "host.h" -#include "pango_font_info.h" -#include "pango/pango-layout.h" -#include "pango/pangocairo.h" - -struct Boxa; -struct Pix; - -#ifdef _MSC_VER -# define strdup(s) _strdup(s) -#endif - -namespace tesseract { - -class BoxChar; - -class StringRenderer { - public: - StringRenderer(const std::string& font_desc, int page_width, int page_height); - ~StringRenderer(); - - // Renders the text with the chosen font and returns the byte offset up to - // which the text could be rendered so as to fit the specified page - // dimensions. - int RenderToImage(const char* text, int text_length, Pix** pix); - int RenderToGrayscaleImage(const char* text, int text_length, Pix** pix); - int RenderToBinaryImage(const char* text, int text_length, int threshold, - Pix** pix); - // Renders a line of text with all available fonts that were able to render - // at least min_coverage fraction of the input text. Use 1.0 to require that - // a font be able to render all the text. - int RenderAllFontsToImage(double min_coverage, const char* text, - int text_length, std::string* font_used, Pix** pix); - - bool set_font(const std::string& desc); - // Char spacing is in PIXELS!!!!. - void set_char_spacing(int char_spacing) { char_spacing_ = char_spacing; } - void set_leading(int leading) { - leading_ = leading; - } - void set_resolution(const int resolution); - void set_vertical_text(bool vertical_text) { - vertical_text_ = vertical_text; - } - void set_gravity_hint_strong(bool gravity_hint_strong) { - gravity_hint_strong_ = gravity_hint_strong; - } - void set_render_fullwidth_latin(bool render_fullwidth_latin) { - render_fullwidth_latin_ = render_fullwidth_latin; - } - // Sets the probability (value in [0, 1]) of starting to render a word with an - // underline. This implementation consider words to be space-delimited - // sequences of characters. - void set_underline_start_prob(const double frac); - // Set the probability (value in [0, 1]) of continuing a started underline to - // the next word. - void set_underline_continuation_prob(const double frac); - void set_underline_style(const PangoUnderline style) { - underline_style_ = style; - } - void set_features(const char* features) { - free(features_); - features_ = strdup(features); - } - void set_page(int page) { - page_ = page; - } - void set_box_padding(int val) { - box_padding_ = val; - } - void set_drop_uncovered_chars(bool val) { - drop_uncovered_chars_ = val; - } - void set_strip_unrenderable_words(bool val) { - strip_unrenderable_words_ = val; - } - void set_output_word_boxes(bool val) { - output_word_boxes_ = val; - } - // Before rendering the string, replace latin characters with their optional - // ligatured forms (such as "fi", "ffi" etc.) if the font_ covers those - // unicodes. - void set_add_ligatures(bool add_ligatures) { - add_ligatures_ = add_ligatures; - } - // Set the rgb value of the text ink. Values range in [0, 1.0] - void set_pen_color(double r, double g, double b) { - pen_color_[0] = r; - pen_color_[1] = g; - pen_color_[2] = b; - } - void set_h_margin(const int h_margin) { - h_margin_ = h_margin; - } - void set_v_margin(const int v_margin) { - v_margin_ = v_margin; - } - const PangoFontInfo& font() const { - return font_; - } - int h_margin() const { return h_margin_; } - int v_margin() const { return v_margin_; } - - // Get the boxchars of all clusters rendered thus far (or since the last call - // to ClearBoxes()). - const std::vector& GetBoxes() const; - // Get the rendered page bounding boxes of all pages created thus far (or - // since last call to ClearBoxes()). - Boxa* GetPageBoxes() const; - - // Rotate the boxes on the most recent page by the given rotation. - void RotatePageBoxes(float rotation); - // Delete all boxes. - void ClearBoxes(); - // Returns the boxes in a boxfile string. - std::string GetBoxesStr(); - // Writes the boxes to a boxfile. - void WriteAllBoxes(const std::string& filename); - // Removes space-delimited words from the string that are not renderable by - // the current font and returns the count of such words. - int StripUnrenderableWords(std::string* utf8_text) const; - - // Insert a Word Joiner symbol (U+2060) between adjacent characters, excluding - // spaces and combining types, in each word before rendering to ensure words - // are not broken across lines. The output boxchars will not contain the - // joiner. - static std::string InsertWordJoiners(const std::string& text); - - // Helper functions to convert fullwidth Latin and halfwidth Basic Latin. - static std::string ConvertBasicLatinToFullwidthLatin(const std::string& text); - static std::string ConvertFullwidthLatinToBasicLatin(const std::string& text); - - protected: - // Init and free local renderer objects. - void InitPangoCairo(); - void FreePangoCairo(); - // Set rendering properties. - void SetLayoutProperties(); - void SetWordUnderlineAttributes(const std::string& page_text); - // Compute bounding boxes around grapheme clusters. - void ComputeClusterBoxes(); - void CorrectBoxPositionsToLayout(std::vector* boxchars); - bool GetClusterStrings(std::vector* cluster_text); - int FindFirstPageBreakOffset(const char* text, int text_length); - - PangoFontInfo font_; - // Page properties - int page_width_, page_height_, h_margin_, v_margin_; - // Text rendering properties - double pen_color_[3]; - int char_spacing_; - int leading_, resolution_; - bool vertical_text_; - bool gravity_hint_strong_; - bool render_fullwidth_latin_; - double underline_start_prob_; - double underline_continuation_prob_; - PangoUnderline underline_style_; - char* features_; - // Text filtering options - bool drop_uncovered_chars_; - bool strip_unrenderable_words_; - bool add_ligatures_; - bool output_word_boxes_; - // Pango and cairo specific objects - cairo_surface_t* surface_; - cairo_t* cr_; - PangoLayout* layout_; - // Internal state of current page number, updated on successive calls to - // RenderToImage() - int start_box_; - int page_; - // Boxes and associated text for all pages rendered with RenderToImage() since - // the last call to ClearBoxes(). - std::vector boxchars_; - int box_padding_; - // Bounding boxes for pages since the last call to ClearBoxes(). - Boxa* page_boxes_; - - // Objects cached for subsequent calls to RenderAllFontsToImage() - std::unordered_map char_map_; // Time-saving char histogram. - int total_chars_; // Number in the string to be rendered. - unsigned int font_index_; // Index of next font to use in font list. - int last_offset_; // Offset returned from last successful rendering - - private: - StringRenderer(const StringRenderer&); - void operator=(const StringRenderer&); -}; -} // namespace tesseract - -#endif // THIRD_PARTY_TESSERACT_TRAINING_STRINGRENDERER_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tessopt.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tessopt.cpp deleted file mode 100644 index 15611c4c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tessopt.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/********************************************************************** - * File: tessopt.cpp - * Description: Re-implementation of the unix code. - * Author: Ray Smith - * Created: Tue Nov 28 05:52:50 MST 1995 - * - * (C) Copyright 1995, Hewlett-Packard Co. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include -#include -#include "tessopt.h" - -int tessoptind; -char *tessoptarg; - -/********************************************************************** - * tessopt - * - * parse command line args. - **********************************************************************/ - -int tessopt ( //parse args -int32_t argc, //arg count -char *argv[], //args -const char *arglist //string of arg chars -) { - const char *arg; //arg char - - if (tessoptind == 0) - tessoptind = 1; - if (tessoptind < argc && argv[tessoptind][0] == '-') { - arg = strchr (arglist, argv[tessoptind][1]); - if (arg == nullptr || *arg == ':') - return '?'; //dud option - tessoptind++; - tessoptarg = argv[tessoptind]; - if (arg[1] == ':') { - if (argv[tessoptind - 1][2] != '\0') - //immediately after - tessoptarg = argv[tessoptind - 1] + 2; - else - tessoptind++; - } - return *arg; - } - else - return EOF; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tessopt.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tessopt.h deleted file mode 100644 index 925f8f6d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tessopt.h +++ /dev/null @@ -1,33 +0,0 @@ -/********************************************************************** - * File: tessopt.h - * Description: Re-implementation of the unix code. - * Author: Ray Smith - * - * (C) Copyright 1995, Hewlett-Packard Co. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_TRAINING_TESSOPT_H_ -#define TESSERACT_TRAINING_TESSOPT_H_ - -#include "host.h" - -extern int tessoptind; -extern char *tessoptarg; - -int tessopt ( //parse args -int32_t argc, //arg count -char *argv[], //args -const char *arglist //string of arg chars -); - -#endif // TESSERACT_TRAINING_TESSOPT_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tesstrain.sh b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tesstrain.sh deleted file mode 100644 index 6aa04626..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tesstrain.sh +++ /dev/null @@ -1,88 +0,0 @@ -#!/bin/bash -# (C) Copyright 2014, Google Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# This script provides an easy way to execute various phases of training -# Tesseract. For a detailed description of the phases, see -# https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract -# - -display_usage() { -echo -e "USAGE: tesstrain.sh - --fontlist FONTS # A list of fontnames to train on. - --fonts_dir FONTS_PATH # Path to font files. - --lang LANG_CODE # ISO 639 code. - --langdata_dir DATADIR # Path to tesseract/training/langdata directory. - --output_dir OUTPUTDIR # Location of output traineddata file. - --save_box_tiff # Save box/tiff pairs along with lstmf files. - --overwrite # Safe to overwrite files in output_dir. - --linedata_only # Only generate training data for lstmtraining. - --run_shape_clustering # Run shape clustering (use for Indic langs). - --exposures EXPOSURES # A list of exposure levels to use (e.g. "-1 0 1"). - - OPTIONAL flags for input data. If unspecified we will look for them in - the langdata_dir directory. - --training_text TEXTFILE # Text to render and use for training. - --wordlist WORDFILE # Word list for the language ordered by - # decreasing frequency. - - OPTIONAL flag to specify location of existing traineddata files, required - during feature extraction. If unspecified will use TESSDATA_PREFIX defined in - the current environment. - --tessdata_dir TESSDATADIR # Path to tesseract/tessdata directory. - - NOTE: - The font names specified in --fontlist need to be recognizable by Pango using - fontconfig. An easy way to list the canonical names of all fonts available on - your system is to run text2image with --list_available_fonts and the - appropriate --fonts_dir path." -} - -source "$(dirname $0)/tesstrain_utils.sh" -if [[ $# -eq 0 || "$1" == "--help" || "$1" == "-h" ]]; then - display_usage - exit 0 -fi -if [ $# == 0 ]; then - display_usage - exit 1 -fi - -ARGV=("$@") -parse_flags - -mkdir -p ${TRAINING_DIR} -tlog "\n=== Starting training for language '${LANG_CODE}'" - -source "$(dirname $0)/language-specific.sh" -set_lang_specific_parameters ${LANG_CODE} - -initialize_fontconfig - -phase_I_generate_image 8 -phase_UP_generate_unicharset -if ((LINEDATA)); then - phase_E_extract_features " --psm 6 lstm.train " 8 "lstmf" - make__lstmdata - tlog "\nCreated starter traineddata for language '${LANG_CODE}'\n" - tlog "\nRun lstmtraining to do the LSTM training for language '${LANG_CODE}'\n" -else - phase_D_generate_dawg - phase_E_extract_features "box.train" 8 "tr" - phase_C_cluster_prototypes "${TRAINING_DIR}/${LANG_CODE}.normproto" - if [[ "${ENABLE_SHAPE_CLUSTERING}" == "y" ]]; then - phase_S_cluster_shapes - fi - phase_M_cluster_microfeatures - phase_B_generate_ambiguities - make__traineddata - tlog "\nCompleted training for language '${LANG_CODE}'\n" -fi diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tesstrain_utils.sh b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tesstrain_utils.sh deleted file mode 100644 index e9818aab..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tesstrain_utils.sh +++ /dev/null @@ -1,609 +0,0 @@ -#!/bin/bash -# (C) Copyright 2014, Google Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# This script defines functions that are used by tesstrain.sh -# For a detailed description of the phases, see -# https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract -# -# USAGE: source tesstrain_utils.sh - -if [ -n "$BASH_VERSION" ];then - set -u # comment in case of "unbound variable" error or fix the code - set -eo pipefail; -else - echo "Warning: you aren't running script in bash - expect problems..." - fi - -UNAME=$(uname -s | tr 'A-Z' 'a-z') -LANG_CODE="ENG" -TIMESTAMP=`date +%Y-%m-%d` - -case $UNAME in - darwin | *freebsd | dragonfly | cygwin*) - MKTEMP_DT=$(mktemp -d -t) - ;; - * ) - MKTEMP_DT=$(mktemp -d --tmpdir) - ;; -esac -FONT_CONFIG_CACHE=(${MKTEMP_DT} font_tmp.XXXXXXXXXX) - -if [[ ($UNAME == *darwin*) ]]; then - FONTS_DIR="/Library/Fonts/" -else - FONTS_DIR="/usr/share/fonts/" -fi - -MAX_PAGES=0 -SAVE_BOX_TIFF=0 -OUTPUT_DIR="/tmp/tesstrain/tessdata" -OVERWRITE=0 -LINEDATA=0 -RUN_SHAPE_CLUSTERING=0 -EXTRACT_FONT_PROPERTIES=1 -WORKSPACE_DIR=$(mktemp -d) - -# set TESSDATA_PREFIX as empty, if not defined in environment to avoid an unbound variable -TESSDATA_PREFIX=${TESSDATA_PREFIX:-} - -# Logging helper functions. -tlog() { - echo -e $* 2>&1 1>&2 | tee -a ${LOG_FILE} -} - -err_exit() { - echo -e "ERROR: "$* 2>&1 1>&2 | tee -a ${LOG_FILE} - exit 1 -} - -# Helper function to run a command and append its output to a log. Aborts early -# if the program file is not found. -# Usage: run_command CMD ARG1 ARG2... -run_command() { - local cmd=$(which $1) - if [[ -z ${cmd} ]]; then - for d in api training; do - cmd=$(which $d/$1) - if [[ ! -z ${cmd} ]]; then - break - fi - done - if [[ -z ${cmd} ]]; then - err_exit "$1 not found" - fi - fi - shift - tlog "[$(date)] ${cmd} $@" - "${cmd}" "$@" 2>&1 1>&2 | tee -a ${LOG_FILE} - # check completion status - if [[ $? -gt 0 ]]; then - err_exit "Program $(basename ${cmd}) failed. Abort." - fi -} - -# Check if all the given files exist, or exit otherwise. -# Used to check required input files and produced output files in each phase. -# Usage: check_file_readable FILE1 FILE2... -check_file_readable() { - for file in $@; do - if [[ ! -r ${file} ]]; then - err_exit "${file} does not exist or is not readable" - fi - done -} - -# Sets the named variable to given value. Aborts if the value is missing or -# if it looks like a flag. -# Usage: parse_value VAR_NAME VALUE -parse_value() { - local val="$2" - if [[ -z $val ]]; then - err_exit "Missing value for variable $1" - exit - fi - if [[ ${val:0:2} == "--" ]]; then - err_exit "Invalid value $val passed for variable $1" - exit - fi - eval $1=\"$val\" -} - -# Does simple command-line parsing and initialization. -parse_flags() { - local i=0 - while test $i -lt ${#ARGV[@]}; do - local j=$((i+1)) - case ${ARGV[$i]} in - --) - break;; - --fontlist) - fn=0 - FONTS="" - while test $j -lt ${#ARGV[@]}; do - test -z "${ARGV[$j]}" && break - test $(echo ${ARGV[$j]} | cut -c -2) = "--" && break - FONTS[$fn]="${ARGV[$j]}" - fn=$((fn+1)) - j=$((j+1)) - done - i=$((j-1)) ;; - --exposures) - exp="" - while test $j -lt ${#ARGV[@]}; do - test -z "${ARGV[$j]}" && break - test $(echo ${ARGV[$j]} | cut -c -2) = "--" && break - exp="$exp ${ARGV[$j]}" - j=$((j+1)) - done - parse_value "EXPOSURES" "$exp" - i=$((j-1)) ;; - --fonts_dir) - parse_value "FONTS_DIR" ${ARGV[$j]} - i=$j ;; - --lang) - parse_value "LANG_CODE" ${ARGV[$j]} - i=$j ;; - --langdata_dir) - parse_value "LANGDATA_ROOT" ${ARGV[$j]} - i=$j ;; - --maxpages) - parse_value "MAX_PAGES" ${ARGV[$j]} - i=$j ;; - --output_dir) - parse_value "OUTPUT_DIR" ${ARGV[$j]} - i=$j ;; - --overwrite) - OVERWRITE=1 ;; - --save_box_tiff) - SAVE_BOX_TIFF=1 ;; - --linedata_only) - LINEDATA=1 ;; - --extract_font_properties) - EXTRACT_FONT_PROPERTIES=1 ;; - --noextract_font_properties) - EXTRACT_FONT_PROPERTIES=0 ;; - --tessdata_dir) - parse_value "TESSDATA_DIR" ${ARGV[$j]} - i=$j ;; - --training_text) - parse_value "TRAINING_TEXT" "${ARGV[$j]}" - i=$j ;; - --wordlist) - parse_value "WORDLIST_FILE" ${ARGV[$j]} - i=$j ;; - --workspace_dir) - rmdir "$FONT_CONFIG_CACHE" - rmdir "$WORKSPACE_DIR" - parse_value "WORKSPACE_DIR" ${ARGV[$j]} - FONT_CONFIG_CACHE=$WORKSPACE_DIR/fc-cache - mkdir -p $FONT_CONFIG_CACHE - i=$j ;; - *) - err_exit "Unrecognized argument ${ARGV[$i]}" ;; - esac - i=$((i+1)) - done - if [[ -z ${LANG_CODE} ]]; then - err_exit "Need to specify a language --lang" - fi - if [[ -z ${LANGDATA_ROOT} ]]; then - err_exit "Need to specify path to language files --langdata_dir" - fi - if [[ -z ${TESSDATA_DIR} ]]; then - if [[ -z ${TESSDATA_PREFIX} ]]; then - err_exit "Need to specify a --tessdata_dir or have a "\ - "TESSDATA_PREFIX variable defined in your environment" - else - TESSDATA_DIR="${TESSDATA_PREFIX}" - fi - fi - - # Location where intermediate files will be created. - TIMESTAMP=`date +%Y-%m-%d` - TMP_DIR=(${MKTEMP_DT} ${LANG_CODE}-${TIMESTAMP}.XXX ) - TRAINING_DIR=${TMP_DIR} - # Location of log file for the whole run. - LOG_FILE=${TRAINING_DIR}/tesstrain.log - - # Take training text and wordlist from the langdata directory if not - # specified in the command-line. - TRAINING_TEXT=${TRAINING_TEXT:-${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.training_text} - WORDLIST_FILE=${TRAINING_TEXT:-${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.wordlist} - - WORD_BIGRAMS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.word.bigrams - NUMBERS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.numbers - PUNC_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.punc - BIGRAM_FREQS_FILE=${TRAINING_TEXT}.bigram_freqs - UNIGRAM_FREQS_FILE=${TRAINING_TEXT}.unigram_freqs - TRAIN_NGRAMS_FILE=${TRAINING_TEXT}.train_ngrams - GENERATE_DAWGS=1 -} - -# Function initializes font config with a unique font cache dir. -initialize_fontconfig() { - export FONT_CONFIG_CACHE - local sample_path=${FONT_CONFIG_CACHE}/sample_text.txt - echo "Text" >${sample_path} - run_command text2image --fonts_dir=${FONTS_DIR} \ - --font="${FONTS[0]}" --outputbase=${sample_path} --text=${sample_path} \ - --fontconfig_tmpdir=${FONT_CONFIG_CACHE} -} - -# Helper function for phaseI_generate_image. Generates the image for a single -# language/font combination in a way that can be run in parallel. -generate_font_image() { - local font="$1" - tlog "Rendering using ${font}" - local fontname=$(echo ${font} | tr ' ' '_' | sed 's/,//g') - local outbase=${TRAINING_DIR}/${LANG_CODE}.${fontname}.exp${EXPOSURE} - - local common_args="--fontconfig_tmpdir=${FONT_CONFIG_CACHE}" - common_args+=" --fonts_dir=${FONTS_DIR} --strip_unrenderable_words" - common_args+=" --leading=${LEADING}" - common_args+=" --char_spacing=${CHAR_SPACING} --exposure=${EXPOSURE}" - common_args+=" --outputbase=${outbase} --max_pages=${MAX_PAGES}" - - # add --writing_mode=vertical-upright to common_args if the font is - # specified to be rendered vertically. - for vfont in "${VERTICAL_FONTS[@]}"; do - if [[ "${font}" == "${vfont}" ]]; then - common_args+=" --writing_mode=vertical-upright " - break - fi - done - - run_command text2image ${common_args} --font="${font}" \ - --text=${TRAINING_TEXT} ${TEXT2IMAGE_EXTRA_ARGS:-} - check_file_readable ${outbase}.box ${outbase}.tif - - if ((EXTRACT_FONT_PROPERTIES)) && - [[ -r ${TRAIN_NGRAMS_FILE} ]]; then - tlog "Extracting font properties of ${font}" - run_command text2image ${common_args} --font="${font}" \ - --ligatures=false --text=${TRAIN_NGRAMS_FILE} \ - --only_extract_font_properties --ptsize=32 - check_file_readable ${outbase}.fontinfo - fi -} - -# Phase I : Generate (I)mages from training text for each font. -phase_I_generate_image() { - local par_factor=$1 - if [[ -z ${par_factor} || ${par_factor} -le 0 ]]; then - par_factor=1 - fi - tlog "\n=== Phase I: Generating training images ===" - if [[ -z ${TRAINING_TEXT} ]] || [[ ! -r ${TRAINING_TEXT} ]]; then - err_exit "Could not find training text file ${TRAINING_TEXT}" - fi - CHAR_SPACING="0.0" - - for EXPOSURE in $EXPOSURES; do - if ((EXTRACT_FONT_PROPERTIES)) && [[ -r ${BIGRAM_FREQS_FILE} ]]; then - # Parse .bigram_freqs file and compose a .train_ngrams file with text - # for tesseract to recognize during training. Take only the ngrams whose - # combined weight accounts for 95% of all the bigrams in the language. - NGRAM_FRAC=$(cat ${BIGRAM_FREQS_FILE} \ - | awk '{s=s+$2}; END {print (s/100)*p}' p=99) - cat ${BIGRAM_FREQS_FILE} | sort -rnk2 \ - | awk '{s=s+$2; if (s <= x) {printf "%s ", $1; } }' \ - x=${NGRAM_FRAC} > ${TRAIN_NGRAMS_FILE} - check_file_readable ${TRAIN_NGRAMS_FILE} - fi - - local counter=0 - for font in "${FONTS[@]}"; do - sleep 1 - generate_font_image "${font}" & - let counter=counter+1 - let rem=counter%par_factor - if [[ "${rem}" -eq 0 ]]; then - wait - fi - done - wait - # Check that each process was successful. - for font in "${FONTS[@]}"; do - local fontname=$(echo ${font} | tr ' ' '_' | sed 's/,//g') - local outbase=${TRAINING_DIR}/${LANG_CODE}.${fontname}.exp${EXPOSURE} - check_file_readable ${outbase}.box ${outbase}.tif - done - done -} - -# Phase UP : Generate (U)nicharset and (P)roperties file. -phase_UP_generate_unicharset() { - tlog "\n=== Phase UP: Generating unicharset and unichar properties files ===" - - local box_files=$(ls ${TRAINING_DIR}/*.box) - UNICHARSET_FILE="${TRAINING_DIR}/${LANG_CODE}.unicharset" - run_command unicharset_extractor --output_unicharset "${UNICHARSET_FILE}" \ - --norm_mode "${NORM_MODE}" ${box_files} - check_file_readable ${UNICHARSET_FILE} - - XHEIGHTS_FILE="${TRAINING_DIR}/${LANG_CODE}.xheights" - run_command set_unicharset_properties \ - -U ${UNICHARSET_FILE} -O ${UNICHARSET_FILE} -X ${XHEIGHTS_FILE} \ - --script_dir=${LANGDATA_ROOT} - check_file_readable ${XHEIGHTS_FILE} -} - -# Phase D : Generate (D)awg files from unicharset file and wordlist files -phase_D_generate_dawg() { - tlog "\n=== Phase D: Generating Dawg files ===" - - # Skip if requested - if [[ ${GENERATE_DAWGS} -eq 0 ]]; then - tlog "Skipping ${phase_name}" - return - fi - - # Output files - WORD_DAWG=${TRAINING_DIR}/${LANG_CODE}.word-dawg - FREQ_DAWG=${TRAINING_DIR}/${LANG_CODE}.freq-dawg - PUNC_DAWG=${TRAINING_DIR}/${LANG_CODE}.punc-dawg - NUMBER_DAWG=${TRAINING_DIR}/${LANG_CODE}.number-dawg - BIGRAM_DAWG=${TRAINING_DIR}/${LANG_CODE}.bigram-dawg - - # Word DAWG - local freq_wordlist_file=${TRAINING_DIR}/${LANG_CODE}.wordlist.clean.freq - if [[ -s ${WORDLIST_FILE} ]]; then - tlog "Generating word Dawg" - check_file_readable ${UNICHARSET_FILE} - run_command wordlist2dawg -r 1 ${WORDLIST_FILE} ${WORD_DAWG} \ - ${UNICHARSET_FILE} - check_file_readable ${WORD_DAWG} - - FREQ_DAWG_SIZE=100 - head -n ${FREQ_DAWG_SIZE} ${WORDLIST_FILE} > ${freq_wordlist_file} - fi - - # Freq-word DAWG - if [[ -s ${freq_wordlist_file} ]]; then - check_file_readable ${UNICHARSET_FILE} - tlog "Generating frequent-word Dawg" - run_command wordlist2dawg -r 1 ${freq_wordlist_file} \ - ${FREQ_DAWG} ${UNICHARSET_FILE} - check_file_readable ${FREQ_DAWG} - fi - - # Punctuation DAWG - # -r arguments to wordlist2dawg denote RTL reverse policy - # (see Trie::RTLReversePolicy enum in third_party/tesseract/dict/trie.h). - # We specify 0/RRP_DO_NO_REVERSE when generating number DAWG, - # 1/RRP_REVERSE_IF_HAS_RTL for freq and word DAWGS, - # 2/RRP_FORCE_REVERSE for the punctuation DAWG. - local punc_reverse_policy=0; - if [[ "${LANG_IS_RTL}" == "1" ]]; then - punc_reverse_policy=2 - fi - if [[ ! -s ${PUNC_FILE} ]]; then - PUNC_FILE="${LANGDATA_ROOT}/common.punc" - fi - check_file_readable ${PUNC_FILE} - run_command wordlist2dawg -r ${punc_reverse_policy} \ - ${PUNC_FILE} ${PUNC_DAWG} ${UNICHARSET_FILE} - check_file_readable ${PUNC_DAWG} - - # Numbers DAWG - if [[ -s ${NUMBERS_FILE} ]]; then - run_command wordlist2dawg -r 0 \ - ${NUMBERS_FILE} ${NUMBER_DAWG} ${UNICHARSET_FILE} - check_file_readable ${NUMBER_DAWG} - fi - - # Bigram dawg - if [[ -s ${WORD_BIGRAMS_FILE} ]]; then - run_command wordlist2dawg -r 1 \ - ${WORD_BIGRAMS_FILE} ${BIGRAM_DAWG} ${UNICHARSET_FILE} - check_file_readable ${BIGRAM_DAWG} - fi -} - -# Phase E : (E)xtract .tr feature files from .tif/.box files -phase_E_extract_features() { - local box_config=$1 - local par_factor=$2 - local ext=$3 - if [[ -z ${par_factor} || ${par_factor} -le 0 ]]; then - par_factor=1 - fi - tlog "\n=== Phase E: Generating ${ext} files ===" - - local img_files="" - for exposure in ${EXPOSURES}; do - img_files=${img_files}' '$(ls ${TRAINING_DIR}/*.exp${exposure}.tif) - done - - # Use any available language-specific configs. - local config="" - if [[ -r ${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.config ]]; then - config=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.config - fi - - OLD_TESSDATA_PREFIX=${TESSDATA_PREFIX} - export TESSDATA_PREFIX=${TESSDATA_DIR} - tlog "Using TESSDATA_PREFIX=${TESSDATA_PREFIX}" - local counter=0 - for img_file in ${img_files}; do - run_command tesseract ${img_file} ${img_file%.*} \ - ${box_config} ${config} & - let counter=counter+1 - let rem=counter%par_factor - if [[ "${rem}" -eq 0 ]]; then - wait - fi - done - wait - export TESSDATA_PREFIX=${OLD_TESSDATA_PREFIX} - # Check that all the output files were produced. - for img_file in ${img_files}; do - check_file_readable "${img_file%.*}.${ext}" - done -} - -# Phase C : (C)luster feature prototypes in .tr into normproto file (cnTraining) -# phaseC_cluster_prototypes ${TRAINING_DIR}/${LANG_CODE}.normproto -phase_C_cluster_prototypes() { - tlog "\n=== Phase C: Clustering feature prototypes (cnTraining) ===" - local out_normproto=$1 - - run_command cntraining -D "${TRAINING_DIR}/" \ - $(ls ${TRAINING_DIR}/*.tr) - - check_file_readable ${TRAINING_DIR}/normproto - mv ${TRAINING_DIR}/normproto ${out_normproto} -} - -# Phase S : (S)hape clustering -phase_S_cluster_shapes() { - if ((! RUN_SHAPE_CLUSTERING)); then - tlog "\n=== Shape Clustering disabled ===" - return - fi - check_file_readable ${LANGDATA_ROOT}/font_properties - local font_props="-F ${LANGDATA_ROOT}/font_properties" - if [[ -r ${TRAINING_DIR}/${LANG_CODE}.xheights ]] &&\ - [[ -s ${TRAINING_DIR}/${LANG_CODE}.xheights ]]; then - font_props=${font_props}" -X ${TRAINING_DIR}/${LANG_CODE}.xheights" - fi - - run_command shapeclustering \ - -D "${TRAINING_DIR}/" \ - -U ${TRAINING_DIR}/${LANG_CODE}.unicharset \ - -O ${TRAINING_DIR}/${LANG_CODE}.mfunicharset \ - ${font_props} \ - $(ls ${TRAINING_DIR}/*.tr) - check_file_readable ${TRAINING_DIR}/shapetable \ - ${TRAINING_DIR}/${LANG_CODE}.mfunicharset -} - -# Phase M : Clustering microfeatures (mfTraining) -phase_M_cluster_microfeatures() { - tlog "\n=== Phase M : Clustering microfeatures (mfTraining) ===" - - check_file_readable ${LANGDATA_ROOT}/font_properties - font_props="-F ${LANGDATA_ROOT}/font_properties" - if [[ -r ${TRAINING_DIR}/${LANG_CODE}.xheights ]] && \ - [[ -s ${TRAINING_DIR}/${LANG_CODE}.xheights ]]; then - font_props=${font_props}" -X ${TRAINING_DIR}/${LANG_CODE}.xheights" - fi - - run_command mftraining \ - -D "${TRAINING_DIR}/" \ - -U ${TRAINING_DIR}/${LANG_CODE}.unicharset \ - -O ${TRAINING_DIR}/${LANG_CODE}.mfunicharset \ - ${font_props} \ - $(ls ${TRAINING_DIR}/*.tr) - check_file_readable ${TRAINING_DIR}/inttemp ${TRAINING_DIR}/shapetable \ - ${TRAINING_DIR}/pffmtable ${TRAINING_DIR}/${LANG_CODE}.mfunicharset - mv ${TRAINING_DIR}/inttemp ${TRAINING_DIR}/${LANG_CODE}.inttemp - mv ${TRAINING_DIR}/shapetable ${TRAINING_DIR}/${LANG_CODE}.shapetable - mv ${TRAINING_DIR}/pffmtable ${TRAINING_DIR}/${LANG_CODE}.pffmtable - mv ${TRAINING_DIR}/${LANG_CODE}.mfunicharset ${TRAINING_DIR}/${LANG_CODE}.unicharset -} - -phase_B_generate_ambiguities() { - tlog "\n=== Phase B : ambiguities training ===" - - # Check for manually created ambiguities data. - if [[ -r ${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.unicharambigs ]]; then - tlog "Found file ${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.unicharambigs" - cp ${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.unicharambigs \ - ${TRAINING_DIR}/${LANG_CODE}.unicharambigs - # Make it writable, as it may be read-only in the client. - chmod u+w ${TRAINING_DIR}/${LANG_CODE}.unicharambigs - return - else - tlog "No unicharambigs file found!" - fi - - # TODO: Add support for generating ambiguities automatically. -} - -make__lstmdata() { - tlog "\n=== Constructing LSTM training data ===" - local lang_prefix="${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}" - if [[ ! -d "${OUTPUT_DIR}" ]]; then - tlog "Creating new directory ${OUTPUT_DIR}" - mkdir -p "${OUTPUT_DIR}" - fi - local lang_is_rtl="" - if [[ "${LANG_IS_RTL}" == "1" ]]; then - lang_is_rtl="--lang_is_rtl" - fi - local pass_through="" - if [[ "${NORM_MODE}" -ge "2" ]]; then - pass_through="--pass_through_recoder" - fi - - # Build the starter traineddata from the inputs. - run_command combine_lang_model \ - --input_unicharset "${TRAINING_DIR}/${LANG_CODE}.unicharset" \ - --script_dir "${LANGDATA_ROOT}" \ - --words "${lang_prefix}.wordlist" \ - --numbers "${lang_prefix}.numbers" \ - --puncs "${lang_prefix}.punc" \ - --output_dir "${OUTPUT_DIR}" --lang "${LANG_CODE}" \ - "${pass_through}" "${lang_is_rtl}" - - if ((SAVE_BOX_TIFF)); then - tlog "\n=== Saving box/tiff pairs for training data ===" - for f in "${TRAINING_DIR}/${LANG_CODE}".*.box; do - tlog "Moving ${f} to ${OUTPUT_DIR}" - mv "${f}" "${OUTPUT_DIR}" - done - for f in "${TRAINING_DIR}/${LANG_CODE}".*.tif; do - tlog "Moving ${f} to ${OUTPUT_DIR}" - mv "${f}" "${OUTPUT_DIR}" - done - fi - tlog "\n=== Moving lstmf files for training data ===" - for f in "${TRAINING_DIR}/${LANG_CODE}".*.lstmf; do - tlog "Moving ${f} to ${OUTPUT_DIR}" - mv "${f}" "${OUTPUT_DIR}" - done - local lstm_list="${OUTPUT_DIR}/${LANG_CODE}.training_files.txt" - ls -1 "${OUTPUT_DIR}/${LANG_CODE}".*.lstmf > "${lstm_list}" -} - -make__traineddata() { - tlog "\n=== Making final traineddata file ===" - local lang_prefix=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE} - - # Combine available files for this language from the langdata dir. - if [[ -r ${lang_prefix}.config ]]; then - tlog "Copying ${lang_prefix}.config to ${TRAINING_DIR}" - cp ${lang_prefix}.config ${TRAINING_DIR} - chmod u+w ${TRAINING_DIR}/${LANG_CODE}.config - fi - if [[ -r ${lang_prefix}.params-model ]]; then - tlog "Copying ${lang_prefix}.params-model to ${TRAINING_DIR}" - cp ${lang_prefix}.params-model ${TRAINING_DIR} - chmod u+w ${TRAINING_DIR}/${LANG_CODE}.params-model - fi - - # Compose the traineddata file. - run_command combine_tessdata ${TRAINING_DIR}/${LANG_CODE}. - - # Copy it to the output dir, overwriting only if allowed by the cmdline flag. - if [[ ! -d ${OUTPUT_DIR} ]]; then - tlog "Creating new directory ${OUTPUT_DIR}" - mkdir -p ${OUTPUT_DIR} - fi - local destfile=${OUTPUT_DIR}/${LANG_CODE}.traineddata; - if [[ -f ${destfile} ]] && ((! OVERWRITE)); then - err_exit "File ${destfile} exists and no --overwrite specified"; - fi - tlog "Moving ${TRAINING_DIR}/${LANG_CODE}.traineddata to ${OUTPUT_DIR}" - cp -f ${TRAINING_DIR}/${LANG_CODE}.traineddata ${destfile} -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/text2image.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/text2image.cpp deleted file mode 100644 index f1f308d3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/text2image.cpp +++ /dev/null @@ -1,708 +0,0 @@ -/********************************************************************** - * File: text2image.cpp - * Description: Program to generate OCR training pages. Given a text file it - * outputs an image with a given font and degradation. - * - * Note that since the results depend on the fonts available on - * your system, running the code on a different machine, or - * different OS, or even at a different time on the same machine, - * may produce different fonts even if --font is given explicitly. - * To see names of available fonts, use --list_available_fonts with - * the appropriate --fonts_dir path. - * Specifying --use_only_legacy_fonts will restrict the available - * fonts to those listed in legacy_fonts.h - * - * Authors: Ranjith Unnikrishnan, Ray Smith - * Created: Tue Nov 19 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "allheaders.h" // from leptonica -#include "boxchar.h" -#include "commandlineflags.h" -#include "commontraining.h" // CheckSharedLibraryVersion -#include "degradeimage.h" -#include "errcode.h" -#include "fileio.h" -#include "helpers.h" -#include "normstrngs.h" -#include "stringrenderer.h" -#include "tlog.h" -#include "unicharset.h" -#include "util.h" -#ifdef _MSC_VER -# define putenv(s) _putenv(s) -#endif - -// A number with which to initialize the random number generator. -const int kRandomSeed = 0x18273645; - -// The text input file. -STRING_PARAM_FLAG(text, "", "File name of text input to process"); - -// The text output file. -STRING_PARAM_FLAG(outputbase, "", "Basename for output image/box file"); - -// Degrade the rendered image to mimic scanner quality. -BOOL_PARAM_FLAG(degrade_image, true, - "Degrade rendered image with speckle noise, dilation/erosion " - "and rotation"); - -// Rotate the rendered image to have more realistic glyph borders -BOOL_PARAM_FLAG(rotate_image, true, "Rotate the image in a random way."); - -// Degradation to apply to the image. -INT_PARAM_FLAG(exposure, 0, "Exposure level in photocopier"); - -// Output image resolution. -INT_PARAM_FLAG(resolution, 300, "Pixels per inch"); - -// Width of output image (in pixels). -INT_PARAM_FLAG(xsize, 3600, "Width of output image"); - -// Max height of output image (in pixels). -INT_PARAM_FLAG(ysize, 4800, "Height of output image"); - -// Max number of pages to produce. -INT_PARAM_FLAG(max_pages, 0, "Maximum number of pages to output (0=unlimited)"); - -// Margin around text (in pixels). -INT_PARAM_FLAG(margin, 100, "Margin round edges of image"); - -// Size of text (in points). -INT_PARAM_FLAG(ptsize, 12, "Size of printed text"); - -// Inter-character space (in ems). -DOUBLE_PARAM_FLAG(char_spacing, 0, "Inter-character space in ems"); - -// Sets the probability (value in [0, 1]) of starting to render a word with an -// underline. Words are assumed to be space-delimited. -DOUBLE_PARAM_FLAG(underline_start_prob, 0, - "Fraction of words to underline (value in [0,1])"); -// Set the probability (value in [0, 1]) of continuing a started underline to -// the next word. -DOUBLE_PARAM_FLAG(underline_continuation_prob, 0, - "Fraction of words to underline (value in [0,1])"); - -// Inter-line space (in pixels). -INT_PARAM_FLAG(leading, 12, "Inter-line space (in pixels)"); - -// Layout and glyph orientation on rendering. -STRING_PARAM_FLAG(writing_mode, "horizontal", - "Specify one of the following writing" - " modes.\n" - "'horizontal' : Render regular horizontal text. (default)\n" - "'vertical' : Render vertical text. Glyph orientation is" - " selected by Pango.\n" - "'vertical-upright' : Render vertical text. Glyph " - " orientation is set to be upright."); - -INT_PARAM_FLAG(box_padding, 0, "Padding around produced bounding boxes"); - -BOOL_PARAM_FLAG(strip_unrenderable_words, true, - "Remove unrenderable words from source text"); - -// Font name. -STRING_PARAM_FLAG(font, "Arial", "Font description name to use"); - -BOOL_PARAM_FLAG(ligatures, false, - "Rebuild and render ligatures"); - -BOOL_PARAM_FLAG(find_fonts, false, - "Search for all fonts that can render the text"); -BOOL_PARAM_FLAG(render_per_font, true, - "If find_fonts==true, render each font to its own image. " - "Image filenames are of the form output_name.font_name.tif"); -DOUBLE_PARAM_FLAG(min_coverage, 1.0, - "If find_fonts==true, the minimum coverage the font has of " - "the characters in the text file to include it, between " - "0 and 1."); - -BOOL_PARAM_FLAG(list_available_fonts, false, "List available fonts and quit."); - -BOOL_PARAM_FLAG(render_ngrams, false, "Put each space-separated entity from the" - " input file into one bounding box. The ngrams in the input" - " file will be randomly permuted before rendering (so that" - " there is sufficient variety of characters on each line)."); - -BOOL_PARAM_FLAG(output_word_boxes, false, - "Output word bounding boxes instead of character boxes. " - "This is used for Cube training, and implied by " - "--render_ngrams."); - -STRING_PARAM_FLAG(unicharset_file, "", - "File with characters in the unicharset. If --render_ngrams" - " is true and --unicharset_file is specified, ngrams with" - " characters that are not in unicharset will be omitted"); - -BOOL_PARAM_FLAG(bidirectional_rotation, false, - "Rotate the generated characters both ways."); - -BOOL_PARAM_FLAG(only_extract_font_properties, false, - "Assumes that the input file contains a list of ngrams. Renders" - " each ngram, extracts spacing properties and records them in" - " output_base/[font_name].fontinfo file."); - -// Use these flags to output zero-padded, square individual character images -BOOL_PARAM_FLAG(output_individual_glyph_images, false, - "If true also outputs individual character images"); -INT_PARAM_FLAG(glyph_resized_size, 0, - "Each glyph is square with this side length in pixels"); -INT_PARAM_FLAG(glyph_num_border_pixels_to_pad, 0, - "Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad"); - -namespace tesseract { - -struct SpacingProperties { - SpacingProperties() : x_gap_before(0), x_gap_after(0) {} - SpacingProperties(int b, int a) : x_gap_before(b), x_gap_after(a) {} - // These values are obtained from FT_Glyph_Metrics struct - // used by the FreeType font engine. - int x_gap_before; // horizontal x bearing - int x_gap_after; // horizontal advance - x_gap_before - width - std::map kerned_x_gaps; -}; - -static bool IsWhitespaceBox(const BoxChar* boxchar) { - return (boxchar->box() == nullptr || - SpanUTF8Whitespace(boxchar->ch().c_str())); -} - -static std::string StringReplace(const std::string& in, - const std::string& oldsub, const std::string& newsub) { - std::string out; - size_t start_pos = 0, pos; - while ((pos = in.find(oldsub, start_pos)) != std::string::npos) { - out.append(in.data() + start_pos, pos - start_pos); - out.append(newsub.data(), newsub.length()); - start_pos = pos + oldsub.length(); - } - out.append(in.data() + start_pos, in.length() - start_pos); - return out; -} - -// Assumes that each word (whitespace-separated entity) in text is a bigram. -// Renders the bigrams and calls FontInfo::GetSpacingProperties() to -// obtain spacing information. Produces the output .fontinfo file with a line -// per unichar of the form: -// unichar space_before space_after kerned1 kerned_space1 kerned2 ... -// Fox example, if unichar "A" has spacing of 0 pixels before and -1 pixels -// after, is kerned with "V" resulting in spacing of "AV" to be -7 and kerned -// with "T", such that "AT" has spacing of -5, the entry/line for unichar "A" -// in .fontinfo file will be: -// A 0 -1 T -5 V -7 -static void ExtractFontProperties(const std::string &utf8_text, - StringRenderer *render, - const std::string &output_base) { - std::map spacing_map; - std::map::iterator spacing_map_it0; - std::map::iterator spacing_map_it1; - int x_bearing, x_advance; - int len = utf8_text.length(); - int offset = 0; - const char* text = utf8_text.c_str(); - while (offset < len) { - offset += - render->RenderToImage(text + offset, strlen(text + offset), nullptr); - const std::vector &boxes = render->GetBoxes(); - - // If the page break split a bigram, correct the offset so we try the bigram - // on the next iteration. - if (boxes.size() > 2 && !IsWhitespaceBox(boxes[boxes.size() - 1]) && - IsWhitespaceBox(boxes[boxes.size() - 2])) { - if (boxes.size() > 3) { - tprintf("WARNING: Adjusting to bad page break after '%s%s'\n", - boxes[boxes.size() - 4]->ch().c_str(), - boxes[boxes.size() - 3]->ch().c_str()); - } - offset -= boxes[boxes.size() - 1]->ch().size(); - } - - for (size_t b = 0; b < boxes.size(); b += 2) { - while (b < boxes.size() && IsWhitespaceBox(boxes[b])) ++b; - if (b + 1 >= boxes.size()) break; - const std::string &ch0 = boxes[b]->ch(); - // We encountered a ligature. This happens in at least two scenarios: - // One is when the rendered bigram forms a grapheme cluster (eg. the - // second character in the bigram is a combining vowel), in which case we - // correctly output only one bounding box. - // A second far less frequent case is when caused some fonts like 'DejaVu - // Sans Ultra-Light' force Pango to render a ligatured character even if - // the input consists of the separated characters. NOTE(ranjith): As per - // behdad@ this is not currently controllable at the level of the Pango - // API. - // The most frequent of all is a single character "word" made by the CJK - // segmenter. - // Safeguard against these cases here by just skipping the bigram. - if (IsWhitespaceBox(boxes[b+1])) { - continue; - } - int xgap = (boxes[b+1]->box()->x - - (boxes[b]->box()->x + boxes[b]->box()->w)); - spacing_map_it0 = spacing_map.find(ch0); - int ok_count = 0; - if (spacing_map_it0 == spacing_map.end() && - render->font().GetSpacingProperties(ch0, &x_bearing, &x_advance)) { - spacing_map[ch0] = SpacingProperties( - x_bearing, x_advance - x_bearing - boxes[b]->box()->w); - spacing_map_it0 = spacing_map.find(ch0); - ++ok_count; - } - const std::string &ch1 = boxes[b+1]->ch(); - tlog(3, "%s%s\n", ch0.c_str(), ch1.c_str()); - spacing_map_it1 = spacing_map.find(ch1); - if (spacing_map_it1 == spacing_map.end() && - render->font().GetSpacingProperties(ch1, &x_bearing, &x_advance)) { - spacing_map[ch1] = SpacingProperties( - x_bearing, x_advance - x_bearing - boxes[b+1]->box()->w); - spacing_map_it1 = spacing_map.find(ch1); - ++ok_count; - } - if (ok_count == 2 && xgap != (spacing_map_it0->second.x_gap_after + - spacing_map_it1->second.x_gap_before)) { - spacing_map_it0->second.kerned_x_gaps[ch1] = xgap; - } - } - render->ClearBoxes(); - } - std::string output_string; - const int kBufSize = 1024; - char buf[kBufSize]; - snprintf(buf, kBufSize, "%d\n", static_cast(spacing_map.size())); - output_string.append(buf); - std::map::const_iterator spacing_map_it; - for (spacing_map_it = spacing_map.begin(); - spacing_map_it != spacing_map.end(); ++spacing_map_it) { - snprintf(buf, kBufSize, - "%s %d %d %d", spacing_map_it->first.c_str(), - spacing_map_it->second.x_gap_before, - spacing_map_it->second.x_gap_after, - static_cast(spacing_map_it->second.kerned_x_gaps.size())); - output_string.append(buf); - std::map::const_iterator kern_it; - for (kern_it = spacing_map_it->second.kerned_x_gaps.begin(); - kern_it != spacing_map_it->second.kerned_x_gaps.end(); ++kern_it) { - snprintf(buf, kBufSize, - " %s %d", kern_it->first.c_str(), kern_it->second); - output_string.append(buf); - } - output_string.append("\n"); - } - File::WriteStringToFileOrDie(output_string, output_base + ".fontinfo"); -} - -static bool MakeIndividualGlyphs(Pix* pix, const std::vector& vbox, - const int input_tiff_page) { - // If checks fail, return false without exiting text2image - if (!pix) { - tprintf("ERROR: MakeIndividualGlyphs(): Input Pix* is nullptr\n"); - return false; - } else if (FLAGS_glyph_resized_size <= 0) { - tprintf("ERROR: --glyph_resized_size must be positive\n"); - return false; - } else if (FLAGS_glyph_num_border_pixels_to_pad < 0) { - tprintf("ERROR: --glyph_num_border_pixels_to_pad must be 0 or positive\n"); - return false; - } - - const int n_boxes = vbox.size(); - int n_boxes_saved = 0; - int current_tiff_page = 0; - int y_previous = 0; - static int glyph_count = 0; - for (int i = 0; i < n_boxes; i++) { - // Get one bounding box - Box* b = vbox[i]->mutable_box(); - if (!b) continue; - const int x = b->x; - const int y = b->y; - const int w = b->w; - const int h = b->h; - // Check present tiff page (for multipage tiff) - if (y < y_previous-pixGetHeight(pix)/10) { - tprintf("ERROR: Wrap-around encountered, at i=%d\n", i); - current_tiff_page++; - } - if (current_tiff_page < input_tiff_page) continue; - else if (current_tiff_page > input_tiff_page) break; - // Check box validity - if (x < 0 || y < 0 || - (x+w-1) >= pixGetWidth(pix) || - (y+h-1) >= pixGetHeight(pix)) { - tprintf("ERROR: MakeIndividualGlyphs(): Index out of range, at i=%d" - " (x=%d, y=%d, w=%d, h=%d\n)", i, x, y, w, h); - continue; - } else if (w < FLAGS_glyph_num_border_pixels_to_pad && - h < FLAGS_glyph_num_border_pixels_to_pad) { - tprintf("ERROR: Input image too small to be a character, at i=%d\n", i); - continue; - } - // Crop the boxed character - Pix* pix_glyph = pixClipRectangle(pix, b, nullptr); - if (!pix_glyph) { - tprintf("ERROR: MakeIndividualGlyphs(): Failed to clip, at i=%d\n", i); - continue; - } - // Resize to square - Pix* pix_glyph_sq = pixScaleToSize(pix_glyph, - FLAGS_glyph_resized_size, - FLAGS_glyph_resized_size); - if (!pix_glyph_sq) { - tprintf("ERROR: MakeIndividualGlyphs(): Failed to resize, at i=%d\n", i); - continue; - } - // Zero-pad - Pix* pix_glyph_sq_pad = pixAddBorder(pix_glyph_sq, - FLAGS_glyph_num_border_pixels_to_pad, - 0); - if (!pix_glyph_sq_pad) { - tprintf("ERROR: MakeIndividualGlyphs(): Failed to zero-pad, at i=%d\n", - i); - continue; - } - // Write out - Pix* pix_glyph_sq_pad_8 = pixConvertTo8(pix_glyph_sq_pad, false); - char filename[1024]; - snprintf(filename, 1024, "%s_%d.jpg", FLAGS_outputbase.c_str(), - glyph_count++); - if (pixWriteJpeg(filename, pix_glyph_sq_pad_8, 100, 0)) { - tprintf("ERROR: MakeIndividualGlyphs(): Failed to write JPEG to %s," - " at i=%d\n", filename, i); - continue; - } - - pixDestroy(&pix_glyph); - pixDestroy(&pix_glyph_sq); - pixDestroy(&pix_glyph_sq_pad); - pixDestroy(&pix_glyph_sq_pad_8); - n_boxes_saved++; - y_previous = y; - } - if (n_boxes_saved == 0) { - return false; - } else { - tprintf("Total number of characters saved = %d\n", n_boxes_saved); - return true; - } -} -} // namespace tesseract - -using tesseract::DegradeImage; -using tesseract::ExtractFontProperties; -using tesseract::File; -using tesseract::FontUtils; -using tesseract::SpanUTF8NotWhitespace; -using tesseract::SpanUTF8Whitespace; -using tesseract::StringRenderer; - -static int Main() { - if (FLAGS_list_available_fonts) { - const std::vector& all_fonts = FontUtils::ListAvailableFonts(); - for (unsigned int i = 0; i < all_fonts.size(); ++i) { - // Remove trailing comma: pango-font-description-to-string adds a comma - // to some fonts. - // See https://github.com/tesseract-ocr/tesseract/issues/408 - std::string font_name(all_fonts[i].c_str()); - if (font_name.back() == ',') - font_name.pop_back(); - printf("%3u: %s\n", i, font_name.c_str()); - ASSERT_HOST_MSG(FontUtils::IsAvailableFont(all_fonts[i].c_str()), - "Font %s is unrecognized.\n", all_fonts[i].c_str()); - } - return EXIT_SUCCESS; - } - - // Check validity of input flags. - if (FLAGS_text.empty()) { - tprintf("'--text' option is missing!\n"); - exit(1); - } - if (FLAGS_outputbase.empty()) { - tprintf("'--outputbase' option is missing!\n"); - exit(1); - } - if (!FLAGS_unicharset_file.empty() && FLAGS_render_ngrams) { - tprintf("Use '--unicharset_file' only if '--render_ngrams' is set.\n"); - exit(1); - } - - std::string font_name = FLAGS_font.c_str(); - if (!FLAGS_find_fonts && !FontUtils::IsAvailableFont(font_name.c_str())) { - font_name += ','; - std::string pango_name; - if (!FontUtils::IsAvailableFont(font_name.c_str(), &pango_name)) { - tprintf("Could not find font named '%s'.\n", FLAGS_font.c_str()); - if (!pango_name.empty()) { - tprintf("Pango suggested font '%s'.\n", pango_name.c_str()); - } - tprintf("Please correct --font arg.\n"); - exit(1); - } - } - - if (FLAGS_render_ngrams) - FLAGS_output_word_boxes = true; - - char font_desc_name[1024]; - snprintf(font_desc_name, 1024, "%s %d", font_name.c_str(), - static_cast(FLAGS_ptsize)); - - StringRenderer render(font_desc_name, FLAGS_xsize, FLAGS_ysize); - render.set_add_ligatures(FLAGS_ligatures); - render.set_leading(FLAGS_leading); - render.set_resolution(FLAGS_resolution); - render.set_char_spacing(FLAGS_char_spacing * FLAGS_ptsize); - render.set_h_margin(FLAGS_margin); - render.set_v_margin(FLAGS_margin); - render.set_output_word_boxes(FLAGS_output_word_boxes); - render.set_box_padding(FLAGS_box_padding); - render.set_strip_unrenderable_words(FLAGS_strip_unrenderable_words); - render.set_underline_start_prob(FLAGS_underline_start_prob); - render.set_underline_continuation_prob(FLAGS_underline_continuation_prob); - - // Set text rendering orientation and their forms. - if (FLAGS_writing_mode == "horizontal") { - // Render regular horizontal text (default). - render.set_vertical_text(false); - render.set_gravity_hint_strong(false); - render.set_render_fullwidth_latin(false); - } else if (FLAGS_writing_mode == "vertical") { - // Render vertical text. Glyph orientation is selected by Pango. - render.set_vertical_text(true); - render.set_gravity_hint_strong(false); - render.set_render_fullwidth_latin(false); - } else if (FLAGS_writing_mode == "vertical-upright") { - // Render vertical text. Glyph orientation is set to be upright. - // Also Basic Latin characters are converted to their fullwidth forms - // on rendering, since fullwidth Latin characters are well designed to fit - // vertical text lines, while .box files store halfwidth Basic Latin - // unichars. - render.set_vertical_text(true); - render.set_gravity_hint_strong(true); - render.set_render_fullwidth_latin(true); - } else { - tprintf("Invalid writing mode: %s\n", FLAGS_writing_mode.c_str()); - exit(1); - } - - std::string src_utf8; - // This c_str is NOT redundant! - if (!File::ReadFileToString(FLAGS_text.c_str(), &src_utf8)) { - tprintf("Failed to read file: %s\n", FLAGS_text.c_str()); - exit(1); - } - - // Remove the unicode mark if present. - if (strncmp(src_utf8.c_str(), "\xef\xbb\xbf", 3) == 0) { - src_utf8.erase(0, 3); - } - tlog(1, "Render string of size %d\n", src_utf8.length()); - - if (FLAGS_render_ngrams || FLAGS_only_extract_font_properties) { - // Try to preserve behavior of old text2image by expanding inter-word - // spaces by a factor of 4. - const std::string kSeparator = FLAGS_render_ngrams ? " " : " "; - // Also restrict the number of characters per line to try and avoid - // line-breaking in the middle of words like "-A", "R$" etc. which are - // otherwise allowed by the standard unicode line-breaking rules. - const unsigned int kCharsPerLine = (FLAGS_ptsize > 20) ? 50 : 100; - std::string rand_utf8; - UNICHARSET unicharset; - if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() && - !unicharset.load_from_file(FLAGS_unicharset_file.c_str())) { - tprintf("Failed to load unicharset from file %s\n", - FLAGS_unicharset_file.c_str()); - exit(1); - } - - // If we are rendering ngrams that will be OCRed later, shuffle them so that - // tesseract does not have difficulties finding correct baseline, word - // spaces, etc. - const char *str8 = src_utf8.c_str(); - int len = src_utf8.length(); - int step; - std::vector > offsets; - int offset = SpanUTF8Whitespace(str8); - while (offset < len) { - step = SpanUTF8NotWhitespace(str8 + offset); - offsets.push_back(std::make_pair(offset, step)); - offset += step; - offset += SpanUTF8Whitespace(str8 + offset); - } - if (FLAGS_render_ngrams) - std::random_shuffle(offsets.begin(), offsets.end()); - - for (size_t i = 0, line = 1; i < offsets.size(); ++i) { - const char *curr_pos = str8 + offsets[i].first; - int ngram_len = offsets[i].second; - // Skip words that contain characters not in found in unicharset. - std::string cleaned = UNICHARSET::CleanupString(curr_pos, ngram_len); - if (!FLAGS_unicharset_file.empty() && - !unicharset.encodable_string(cleaned.c_str(), nullptr)) { - continue; - } - rand_utf8.append(curr_pos, ngram_len); - if (rand_utf8.length() > line * kCharsPerLine) { - rand_utf8.append(" \n"); - ++line; - if (line & 0x1) rand_utf8.append(kSeparator); - } else { - rand_utf8.append(kSeparator); - } - } - tlog(1, "Rendered ngram string of size %d\n", rand_utf8.length()); - src_utf8.swap(rand_utf8); - } - if (FLAGS_only_extract_font_properties) { - tprintf("Extracting font properties only\n"); - ExtractFontProperties(src_utf8, &render, FLAGS_outputbase.c_str()); - tprintf("Done!\n"); - return 0; - } - - int im = 0; - std::vector page_rotation; - const char* to_render_utf8 = src_utf8.c_str(); - - tesseract::TRand randomizer; - randomizer.set_seed(kRandomSeed); - std::vector font_names; - // We use a two pass mechanism to rotate images in both direction. - // The first pass(0) will rotate the images in random directions and - // the second pass(1) will mirror those rotations. - int num_pass = FLAGS_bidirectional_rotation ? 2 : 1; - for (int pass = 0; pass < num_pass; ++pass) { - int page_num = 0; - std::string font_used; - for (size_t offset = 0; - offset < strlen(to_render_utf8) && - (FLAGS_max_pages == 0 || page_num < FLAGS_max_pages); - ++im, ++page_num) { - tlog(1, "Starting page %d\n", im); - Pix* pix = nullptr; - if (FLAGS_find_fonts) { - offset += render.RenderAllFontsToImage(FLAGS_min_coverage, - to_render_utf8 + offset, - strlen(to_render_utf8 + offset), - &font_used, &pix); - } else { - offset += render.RenderToImage(to_render_utf8 + offset, - strlen(to_render_utf8 + offset), &pix); - } - if (pix != nullptr) { - float rotation = 0; - if (pass == 1) { - // Pass 2, do mirror rotation. - rotation = -1 * page_rotation[page_num]; - } - if (FLAGS_degrade_image) { - pix = DegradeImage(pix, FLAGS_exposure, &randomizer, - FLAGS_rotate_image ? &rotation : nullptr); - } - render.RotatePageBoxes(rotation); - - if (pass == 0) { - // Pass 1, rotate randomly and store the rotation.. - page_rotation.push_back(rotation); - } - - Pix* gray_pix = pixConvertTo8(pix, false); - pixDestroy(&pix); - Pix* binary = pixThresholdToBinary(gray_pix, 128); - pixDestroy(&gray_pix); - char tiff_name[1024]; - if (FLAGS_find_fonts) { - if (FLAGS_render_per_font) { - std::string fontname_for_file = tesseract::StringReplace( - font_used, " ", "_"); - snprintf(tiff_name, 1024, "%s.%s.tif", FLAGS_outputbase.c_str(), - fontname_for_file.c_str()); - pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, "w"); - tprintf("Rendered page %d to file %s\n", im, tiff_name); - } else { - font_names.push_back(font_used); - } - } else { - snprintf(tiff_name, 1024, "%s.tif", FLAGS_outputbase.c_str()); - pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, im == 0 ? "w" : "a"); - tprintf("Rendered page %d to file %s\n", im, tiff_name); - } - // Make individual glyphs - if (FLAGS_output_individual_glyph_images) { - if (!MakeIndividualGlyphs(binary, render.GetBoxes(), im)) { - tprintf("ERROR: Individual glyphs not saved\n"); - } - } - pixDestroy(&binary); - } - if (FLAGS_find_fonts && offset != 0) { - // We just want a list of names, or some sample images so we don't need - // to render more than the first page of the text. - break; - } - } - } - if (!FLAGS_find_fonts) { - std::string box_name = FLAGS_outputbase.c_str(); - box_name += ".box"; - render.WriteAllBoxes(box_name); - } else if (!FLAGS_render_per_font && !font_names.empty()) { - std::string filename = FLAGS_outputbase.c_str(); - filename += ".fontlist.txt"; - FILE* fp = fopen(filename.c_str(), "wb"); - if (fp == nullptr) { - tprintf("Failed to create output font list %s\n", filename.c_str()); - } else { - for (size_t i = 0; i < font_names.size(); ++i) { - fprintf(fp, "%s\n", font_names[i].c_str()); - } - fclose(fp); - } - } - - return 0; -} - -int main(int argc, char** argv) { - // Respect enviroment variable. could be: - // fc (fontconfig), win32, and coretext - // If not set force fontconfig for Mac OS. - // See https://github.com/tesseract-ocr/tesseract/issues/736 - char* backend; - backend = getenv("PANGOCAIRO_BACKEND"); - if (backend == NULL) { - putenv("PANGOCAIRO_BACKEND=fc"); - } else { - printf("Using '%s' as pango cairo backend based on enviroment " - "variable.\n", backend); - } - tesseract::CheckSharedLibraryVersion(); - if (argc > 1) { - if ((strcmp(argv[1], "-v") == 0) || - (strcmp(argv[1], "--version") == 0)) { - FontUtils::PangoFontTypeInfo(); - } - } - tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true); - return Main(); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tlog.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tlog.cpp deleted file mode 100644 index e493bd09..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tlog.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/********************************************************************** - * File: tlog.cpp - * Description: Variant of printf with logging level controllable by a - * commandline flag. - * Author: Ranjith Unnikrishnan - * Created: Wed Nov 20 2013 - * - * (C) Copyright 2013, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "tlog.h" - -INT_PARAM_FLAG(tlog_level, 0, "Minimum logging level for tlog() output"); diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tlog.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tlog.h deleted file mode 100644 index 29db457c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/tlog.h +++ /dev/null @@ -1,41 +0,0 @@ -/********************************************************************** - * File: tlog.h - * Description: Variant of printf with logging level controllable by a - * commandline flag. - * Author: Ranjith Unnikrishnan - * Created: Wed Nov 20 2013 - * - * (C) Copyright 2013, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ -#ifndef TESSERACT_TRAINING_TLOG_H_ -#define TESSERACT_TRAINING_TLOG_H_ - -#include "commandlineflags.h" -#include "errcode.h" -#include "tprintf.h" - -DECLARE_INT_PARAM_FLAG(tlog_level); - -// Variant guarded by the numeric logging level parameter FLAGS_tlog_level -// (default 0). Code using ParseCommandLineFlags() can control its value using -// the --tlog_level commandline argument. Otherwise it must be specified in a -// config file like other params. -#define tlog(level, ...) { \ - if (FLAGS_tlog_level >= level) { \ - tprintf(__VA_ARGS__); \ - } \ -} - -#define TLOG_IS_ON(level) (FLAGS_tlog_level >= level) - -#endif // TESSERACT_TRAINING_TLOG_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/unicharset_extractor.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/unicharset_extractor.cpp deleted file mode 100644 index 64b93717..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/unicharset_extractor.cpp +++ /dev/null @@ -1,115 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: unicharset_extractor.cpp -// Description: Unicode character/ligature set extractor. -// Author: Thomas Kielbus -// Created: Wed Jun 28 17:05:01 PDT 2006 -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -// Given a list of box files or text files on the command line, this program -// normalizes the text according to command-line options and generates -// a unicharset. - -#include -#include "boxread.h" -#include "commandlineflags.h" -#include "commontraining.h" // CheckSharedLibraryVersion -#include "genericvector.h" -#include "lang_model_helpers.h" -#include "normstrngs.h" -#include "strngs.h" -#include "unicharset.h" -#include "unicharset_training_utils.h" - -STRING_PARAM_FLAG(output_unicharset, "unicharset", "Output file path"); -INT_PARAM_FLAG(norm_mode, 1, - "Normalization mode: 1=Combine graphemes, " - "2=Split graphemes, 3=Pure unicode"); - -namespace tesseract { - -// Helper normalizes and segments the given strings according to norm_mode, and -// adds the segmented parts to unicharset. -static void AddStringsToUnicharset(const GenericVector& strings, - int norm_mode, UNICHARSET* unicharset) { - for (int i = 0; i < strings.size(); ++i) { - std::vector normalized; - if (NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone, - static_cast(norm_mode), - /*report_errors*/ true, - strings[i].string(), &normalized)) { - for (const std::string& normed : normalized) { - - // normed is a UTF-8 encoded string - if (normed.empty() || IsUTF8Whitespace(normed.c_str())) continue; - unicharset->unichar_insert(normed.c_str()); - } - } else { - tprintf("Normalization failed for string '%s'\n", strings[i].c_str()); - } - } -} - -static int Main(int argc, char** argv) { - UNICHARSET unicharset; - // Load input files - for (int arg = 1; arg < argc; ++arg) { - STRING file_data = tesseract::ReadFile(argv[arg], /*reader*/ nullptr); - if (file_data.length() == 0) continue; - GenericVector texts; - if (ReadMemBoxes(-1, /*skip_blanks*/ true, &file_data[0], - /*continue_on_failure*/ false, /*boxes*/ nullptr, - &texts, /*box_texts*/ nullptr, /*pages*/ nullptr)) { - tprintf("Extracting unicharset from box file %s\n", argv[arg]); - } else { - tprintf("Extracting unicharset from plain text file %s\n", argv[arg]); - texts.truncate(0); - file_data.split('\n', &texts); - } - AddStringsToUnicharset(texts, FLAGS_norm_mode, &unicharset); - } - SetupBasicProperties(/*report_errors*/ true, /*decompose*/ false, - &unicharset); - // Write unicharset file. - if (unicharset.save_to_file(FLAGS_output_unicharset.c_str())) { - tprintf("Wrote unicharset file %s\n", FLAGS_output_unicharset.c_str()); - } else { - tprintf("Cannot save unicharset file %s\n", - FLAGS_output_unicharset.c_str()); - return EXIT_FAILURE; - } - return EXIT_SUCCESS; -} - -} // namespace tesseract - -int main(int argc, char** argv) { - tesseract::CheckSharedLibraryVersion(); - if (argc > 1) { - tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true); - } - if (argc < 2) { - tprintf( - "Usage: %s [--output_unicharset filename] [--norm_mode mode]" - " box_or_text_file [...]\n", - argv[0]); - tprintf("Where mode means:\n"); - tprintf(" 1=combine graphemes (use for Latin and other simple scripts)\n"); - tprintf(" 2=split graphemes (use for Indic/Khmer/Myanmar)\n"); - tprintf(" 3=pure unicode (use for Arabic/Hebrew/Thai/Tibetan)\n"); - tprintf("Reads box or plain text files to extract the unicharset.\n"); - return EXIT_FAILURE; - } - return tesseract::Main(argc, argv); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/unicharset_training_utils.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/unicharset_training_utils.cpp deleted file mode 100644 index 418c8052..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/unicharset_training_utils.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: unicharset_training_utils.cpp -// Description: Training utilities for UNICHARSET. -// Author: Ray Smith -// Created: Fri Oct 17 17:09:01 PDT 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "unicharset_training_utils.h" - -#include -#include -#include -#include - -#include "fileio.h" -#include "icuerrorcode.h" -#include "normstrngs.h" -#include "statistc.h" -#include "unichar.h" -#include "unicharset.h" -#include "unicode/uchar.h" // from libicu -#include "unicode/uscript.h" // from libicu - -namespace tesseract { - -// Helper sets the character attribute properties and sets up the script table. -// Does not set tops and bottoms. -void SetupBasicProperties(bool report_errors, bool decompose, - UNICHARSET* unicharset) { - for (int unichar_id = 0; unichar_id < unicharset->size(); ++unichar_id) { - // Convert any custom ligatures. - const char* unichar_str = unicharset->id_to_unichar(unichar_id); - for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) { - if (!strcmp(UNICHARSET::kCustomLigatures[i][1], unichar_str)) { - unichar_str = UNICHARSET::kCustomLigatures[i][0]; - break; - } - } - - // Convert the unichar to UTF32 representation - std::vector uni_vector = UNICHAR::UTF8ToUTF32(unichar_str); - - // Assume that if the property is true for any character in the string, - // then it holds for the whole "character". - bool unichar_isalpha = false; - bool unichar_islower = false; - bool unichar_isupper = false; - bool unichar_isdigit = false; - bool unichar_ispunct = false; - - for (char32 u_ch : uni_vector) { - if (u_isalpha(u_ch)) unichar_isalpha = true; - if (u_islower(u_ch)) unichar_islower = true; - if (u_isupper(u_ch)) unichar_isupper = true; - if (u_isdigit(u_ch)) unichar_isdigit = true; - if (u_ispunct(u_ch)) unichar_ispunct = true; - } - - unicharset->set_isalpha(unichar_id, unichar_isalpha); - unicharset->set_islower(unichar_id, unichar_islower); - unicharset->set_isupper(unichar_id, unichar_isupper); - unicharset->set_isdigit(unichar_id, unichar_isdigit); - unicharset->set_ispunctuation(unichar_id, unichar_ispunct); - - tesseract::IcuErrorCode err; - unicharset->set_script(unichar_id, uscript_getName( - uscript_getScript(uni_vector[0], err))); - - const int num_code_points = uni_vector.size(); - // Obtain the lower/upper case if needed and record it in the properties. - unicharset->set_other_case(unichar_id, unichar_id); - if (unichar_islower || unichar_isupper) { - std::vector other_case(num_code_points, 0); - for (int i = 0; i < num_code_points; ++i) { - // TODO(daria): Ideally u_strToLower()/ustrToUpper() should be used. - // However since they deal with UChars (so need a conversion function - // from char32 or UTF8string) and require a meaningful locale string, - // for now u_tolower()/u_toupper() are used. - other_case[i] = unichar_islower ? u_toupper(uni_vector[i]) : - u_tolower(uni_vector[i]); - } - std::string other_case_uch = UNICHAR::UTF32ToUTF8(other_case); - UNICHAR_ID other_case_id = - unicharset->unichar_to_id(other_case_uch.c_str()); - if (other_case_id != INVALID_UNICHAR_ID) { - unicharset->set_other_case(unichar_id, other_case_id); - } else if (unichar_id >= SPECIAL_UNICHAR_CODES_COUNT && report_errors) { - tprintf("Other case %s of %s is not in unicharset\n", - other_case_uch.c_str(), unichar_str); - } - } - - // Set RTL property and obtain mirror unichar ID from ICU. - std::vector mirrors(num_code_points, 0); - for (int i = 0; i < num_code_points; ++i) { - mirrors[i] = u_charMirror(uni_vector[i]); - if (i == 0) { // set directionality to that of the 1st code point - unicharset->set_direction(unichar_id, - static_cast( - u_charDirection(uni_vector[i]))); - } - } - std::string mirror_uch = UNICHAR::UTF32ToUTF8(mirrors); - UNICHAR_ID mirror_uch_id = unicharset->unichar_to_id(mirror_uch.c_str()); - if (mirror_uch_id != INVALID_UNICHAR_ID) { - unicharset->set_mirror(unichar_id, mirror_uch_id); - } else if (report_errors) { - tprintf("Mirror %s of %s is not in unicharset\n", - mirror_uch.c_str(), unichar_str); - } - - // Record normalized version of this unichar. - std::string normed_str; - if (unichar_id != 0 && - tesseract::NormalizeUTF8String( - decompose ? tesseract::UnicodeNormMode::kNFKD - : tesseract::UnicodeNormMode::kNFKC, - tesseract::OCRNorm::kNormalize, tesseract::GraphemeNorm::kNone, - unichar_str, &normed_str) && - !normed_str.empty()) { - unicharset->set_normed(unichar_id, normed_str.c_str()); - } else { - unicharset->set_normed(unichar_id, unichar_str); - } - ASSERT_HOST(unicharset->get_other_case(unichar_id) < unicharset->size()); - } - unicharset->post_load_setup(); -} - -// Helper sets the properties from universal script unicharsets, if found. -void SetScriptProperties(const std::string& script_dir, UNICHARSET* unicharset) { - for (int s = 0; s < unicharset->get_script_table_size(); ++s) { - // Load the unicharset for the script if available. - std::string filename = script_dir + "/" + - unicharset->get_script_from_script_id(s) + ".unicharset"; - UNICHARSET script_set; - if (script_set.load_from_file(filename.c_str())) { - unicharset->SetPropertiesFromOther(script_set); - } else if (s != unicharset->common_sid() && s != unicharset->null_sid()) { - tprintf("Failed to load script unicharset from:%s\n", filename.c_str()); - } - } - for (int c = SPECIAL_UNICHAR_CODES_COUNT; c < unicharset->size(); ++c) { - if (unicharset->PropertiesIncomplete(c)) { - tprintf("Warning: properties incomplete for index %d = %s\n", c, - unicharset->id_to_unichar(c)); - } - } -} - -// Helper gets the combined x-heights string. -std::string GetXheightString(const std::string& script_dir, - const UNICHARSET& unicharset) { - std::string xheights_str; - for (int s = 0; s < unicharset.get_script_table_size(); ++s) { - // Load the xheights for the script if available. - std::string filename = script_dir + "/" + - unicharset.get_script_from_script_id(s) + ".xheights"; - std::string script_heights; - if (File::ReadFileToString(filename, &script_heights)) - xheights_str += script_heights; - } - return xheights_str; -} - -// Helper to set the properties for an input unicharset file, writes to the -// output file. If an appropriate script unicharset can be found in the -// script_dir directory, then the tops and bottoms are expanded using the -// script unicharset. -// If non-empty, xheight data for the fonts are written to the xheights_file. -void SetPropertiesForInputFile(const std::string& script_dir, - const std::string& input_unicharset_file, - const std::string& output_unicharset_file, - const std::string& output_xheights_file) { - UNICHARSET unicharset; - - // Load the input unicharset - unicharset.load_from_file(input_unicharset_file.c_str()); - tprintf("Loaded unicharset of size %d from file %s\n", unicharset.size(), - input_unicharset_file.c_str()); - - // Set unichar properties - tprintf("Setting unichar properties\n"); - SetupBasicProperties(true, false, &unicharset); - tprintf("Setting script properties\n"); - SetScriptProperties(script_dir, &unicharset); - if (!output_xheights_file.empty()) { - std::string xheights_str = GetXheightString(script_dir, unicharset); - File::WriteStringToFileOrDie(xheights_str, output_xheights_file); - } - - // Write the output unicharset - tprintf("Writing unicharset to file %s\n", output_unicharset_file.c_str()); - unicharset.save_to_file(output_unicharset_file.c_str()); -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/unicharset_training_utils.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/unicharset_training_utils.h deleted file mode 100644 index 410eeb39..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/unicharset_training_utils.h +++ /dev/null @@ -1,58 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: unicharset_training_utils.h -// Description: Training utilities for UNICHARSET. -// Author: Ray Smith -// Created: Fri Oct 17 17:14:01 PDT 2014 -// -// (C) Copyright 2014, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_ -#define TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_ - -#include - -#include "platform.h" - -class STATS; -class UNICHARSET; - -namespace tesseract { - -// Helper sets the character attribute properties and sets up the script table. -// Does not set tops and bottoms. -void SetupBasicProperties(bool report_errors, bool decompose, - UNICHARSET* unicharset); -// Default behavior is to compose, until it is proven that decomposed benefits -// at least one language. -inline void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset) { - SetupBasicProperties(report_errors, false, unicharset); -} -// Helper sets the properties from universal script unicharsets, if found. -void SetScriptProperties(const std::string& script_dir, UNICHARSET* unicharset); -// Helper gets the combined x-heights string. -std::string GetXheightString(const std::string& script_dir, const UNICHARSET& unicharset); - -// Helper to set the properties for an input unicharset file, writes to the -// output file. If an appropriate script unicharset can be found in the -// script_dir directory, then the tops and bottoms are expanded using the -// script unicharset. -// If non-empty, xheight data for the fonts are written to the xheights_file. -void SetPropertiesForInputFile(const std::string& script_dir, - const std::string& input_unicharset_file, - const std::string& output_unicharset_file, - const std::string& output_xheights_file); - -} // namespace tesseract. - -#endif // TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/util.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/util.h deleted file mode 100644 index 3e9957b1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/util.h +++ /dev/null @@ -1,65 +0,0 @@ -/********************************************************************** - * File: util.h - * Description: Misc STL string utility functions. - * Author: Samuel Charron - * Created: Mon Nov 18 2013 - * - * (C) Copyright 2013, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_TRAINING_UTIL_H_ -#define TESSERACT_TRAINING_UTIL_H_ - -#include -#include -#include -#include - -#include "platform.h" - -// StringHash is the hashing functor needed by the stl hash map. -#ifndef COMPILER_MSVC -struct StringHash { - size_t operator()(const std::string& s) const { - size_t hash_code = 0; - const char* str = s.c_str(); - for (int ch = 0; str[ch] != 0; ++ch) { - hash_code += str[ch] << (ch % 24); - } - return hash_code; - } -}; -#else // COMPILER_MSVC -struct StringHash : public stdext::hash_compare { - size_t operator()(const std::string& s) const { - size_t hash_code = 0; - const char* str = s.c_str(); - for (int ch = 0; str[ch] != 0; ++ch) { - hash_code += str[ch] << (ch % 24); - } - return hash_code; - } - bool operator()(const std::string& s1, const std::string& s2) const { - return s1 == s2; - } -}; -#endif // !COMPILER_MSVC - -#ifdef GOOGLE_TESSERACT -#include "base/heap-checker.h" -#define DISABLE_HEAP_LEAK_CHECK HeapLeakChecker::Disabler disabler -#else -#define DISABLE_HEAP_LEAK_CHECK {} -#endif - -#endif // TESSERACT_TRAINING_UTIL_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_grapheme.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_grapheme.cpp deleted file mode 100644 index b6965de8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_grapheme.cpp +++ /dev/null @@ -1,176 +0,0 @@ -#include "validate_grapheme.h" -#include "tprintf.h" -#include "unicode/uchar.h" // From libicu - -namespace tesseract { - -bool ValidateGrapheme::ConsumeGraphemeIfValid() { - int num_codes = codes_.size(); - char32 prev_prev_ch = ' '; - char32 prev_ch = ' '; - CharClass prev_cc = CharClass::kWhitespace; - int num_codes_in_grapheme = 0; - while (codes_used_ < num_codes) { - CharClass cc = codes_[codes_used_].first; - char32 ch = codes_[codes_used_].second; - const bool is_combiner = - cc == CharClass::kCombiner || cc == CharClass::kVirama; - // Reject easily detected badly formed sequences. - if (prev_cc == CharClass::kWhitespace && is_combiner) { - if (report_errors_) tprintf("Word started with a combiner:0x%x\n", ch); - return false; - } - if (prev_cc == CharClass::kVirama && cc == CharClass::kVirama) { - if (report_errors_) - tprintf("Two grapheme links in a row:0x%x 0x%x\n", prev_ch, ch); - return false; - } - if (prev_cc != CharClass::kWhitespace && cc != CharClass::kWhitespace && - IsBadlyFormed(prev_ch, ch)) { - return false; - } - bool prev_is_fwd_combiner = - prev_ch == kZeroWidthJoiner || prev_cc == CharClass::kVirama || - (prev_ch == kZeroWidthNonJoiner && - (cc == CharClass::kVirama || prev_prev_ch == kZeroWidthJoiner)); - if (num_codes_in_grapheme > 0 && !is_combiner && !prev_is_fwd_combiner) - break; - CodeOnlyToOutput(); - ++num_codes_in_grapheme; - prev_prev_ch = prev_ch; - prev_ch = ch; - prev_cc = cc; - } - if (num_codes_in_grapheme > 0) MultiCodePart(num_codes_in_grapheme); - return true; -} - -Validator::CharClass ValidateGrapheme::UnicodeToCharClass(char32 ch) const { - if (IsVedicAccent(ch)) return CharClass::kVedicMark; - // The ZeroWidth[Non]Joiner characters are mapped to kCombiner as they - // always combine with the previous character. - if (u_hasBinaryProperty(ch, UCHAR_GRAPHEME_LINK)) return CharClass::kVirama; - if (u_isUWhiteSpace(ch)) return CharClass::kWhitespace; - // Workaround for Javanese Aksara's Taling, do not label it as a combiner - if (ch == 0xa9ba) return CharClass::kConsonant; - int char_type = u_charType(ch); - if (char_type == U_NON_SPACING_MARK || char_type == U_ENCLOSING_MARK || - char_type == U_COMBINING_SPACING_MARK || ch == kZeroWidthNonJoiner || - ch == kZeroWidthJoiner) - return CharClass::kCombiner; - return CharClass::kOther; -} - -// Helper returns true if the sequence prev_ch,ch is invalid. -bool ValidateGrapheme::IsBadlyFormed(char32 prev_ch, char32 ch) { - // Reject badly formed Indic vowels. - if (IsBadlyFormedIndicVowel(prev_ch, ch)) { - if (report_errors_) - tprintf("Badly formed Indic vowel sequence:0x%x 0x%x\n", prev_ch, ch); - return true; - } - if (IsBadlyFormedThai(prev_ch, ch)) { - if (report_errors_) tprintf("Badly formed Thai:0x%x 0x%x\n", prev_ch, ch); - return true; - } - return false; -} - -// Helper returns true if the sequence prev_ch,ch is an invalid Indic vowel. -// Some vowels in Indic scripts may be analytically decomposed into atomic pairs -// of components that are themselves valid unicode symbols. (See Table 12-1 in -// http://www.unicode.org/versions/Unicode9.0.0/ch12.pdf -// for examples in Devanagari). The Unicode standard discourages specifying -// vowels this way, but they are sometimes encountered in text, probably because -// some editors still permit it. Renderers however dislike such pairs, and so -// this function may be used to detect their occurrence for removal. -// TODO(rays) This function only covers a subset of Indic languages and doesn't -// include all rules. Add rules as appropriate to support other languages or -// find a way to generalize these existing rules that makes use of the -// regularity of the mapping from ISCII to Unicode. -/* static */ -bool ValidateGrapheme::IsBadlyFormedIndicVowel(char32 prev_ch, char32 ch) { - return ((prev_ch == 0x905 && (ch == 0x946 || ch == 0x93E)) || - (prev_ch == 0x909 && ch == 0x941) || - (prev_ch == 0x90F && (ch >= 0x945 && ch <= 0x947)) || - (prev_ch == 0x905 && (ch >= 0x949 && ch <= 0x94C)) || - (prev_ch == 0x906 && (ch >= 0x949 && ch <= 0x94C)) || - // Illegal combinations of two dependent Devanagari vowels. - (prev_ch == 0x93E && (ch >= 0x945 && ch <= 0x948)) || - // Dependent Devanagari vowels following a virama. - (prev_ch == 0x94D && (ch >= 0x93E && ch <= 0x94C)) || - // Bengali vowels (Table 9-5, pg 313) - (prev_ch == 0x985 && ch == 0x9BE) || - // Telugu vowels (Table 9-19, pg 331) - (prev_ch == 0xC12 && (ch == 0xC55 || ch == 0xC4C)) || - // Kannada vowels (Table 9-20, pg 332) - (prev_ch == 0xC92 && ch == 0xCCC)); -} - -// Helper returns true if ch is a Thai consonant. -static bool IsThaiConsonant(char32 ch) { return 0xe01 <= ch && ch <= 0xe2e; } - -// Helper returns true is ch is a before-consonant vowel. -static bool IsThaiBeforeConsonantVowel(char32 ch) { - return 0xe40 <= ch && ch <= 0xe44; -} - -// Helper returns true if ch is a Thai tone mark. -static bool IsThaiToneMark(char32 ch) { return 0xe48 <= ch && ch <= 0xe4b; } - -// Helper returns true if ch is a Thai vowel that may be followed by a tone -// mark. -static bool IsThaiTonableVowel(char32 ch) { - return (0xe34 <= ch && ch <= 0xe39) || ch == 0xe31; -} - -// Helper returns true if the sequence prev_ch,ch is invalid Thai. -// These rules come from a native Thai speaker, and are not covered by the -// Thai section in the unicode book: -// http://www.unicode.org/versions/Unicode9.0.0/ch16.pdf -// Comments below added by Ray interpreting the code ranges. -/* static */ -bool ValidateGrapheme::IsBadlyFormedThai(char32 prev_ch, char32 ch) { - // Tone marks must follow consonants or specific vowels. - if (IsThaiToneMark(ch) && - !(IsThaiConsonant(prev_ch) || IsThaiTonableVowel(prev_ch))) { - return true; - } - // Tonable vowels must follow consonants. - if ((IsThaiTonableVowel(ch) || ch == 0xe47) && !IsThaiConsonant(prev_ch)) { - return true; - } - // Thanthakhat must follow consonant or specific vowels. - if (ch == 0xe4c && - !(IsThaiConsonant(prev_ch) || prev_ch == 0xe38 || prev_ch == 0xe34)) { - return true; - } - // Nikkhahit must follow a consonant ?or certain markers?. - // TODO(rays) confirm this, but there were so many in the ground truth of the - // validation set that it seems reasonable to assume it is valid. - if (ch == 0xe4d && - !(IsThaiConsonant(prev_ch) || prev_ch == 0xe48 || prev_ch == 0xe49)) { - return true; - } - // The vowels e30, e32, e33 can be used more liberally. - if ((ch == 0xe30 || ch == 0xe32 || ch == 0xe33) && - !(IsThaiConsonant(prev_ch) || IsThaiToneMark(prev_ch)) && - !(prev_ch == 0xe32 && ch == 0xe30) && - !(prev_ch == 0xe4d && ch == 0xe32)) { - return true; - } - // Some vowels come before consonants, and therefore cannot follow things - // that cannot end a syllable. - if (IsThaiBeforeConsonantVowel(ch) && - (IsThaiBeforeConsonantVowel(prev_ch) || prev_ch == 0xe31 || - prev_ch == 0xe37)) { - return true; - } - // Don't allow the standalone vowel U+0e24 to be followed by other vowels. - if ((0xe30 <= ch && ch <= 0xe4D) && prev_ch == 0xe24) { - return true; - } - return false; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_grapheme.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_grapheme.h deleted file mode 100644 index 138ad570..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_grapheme.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef TESSERACT_TRAINING_VALIDATE_GRAPHEME_H_ -#define TESSERACT_TRAINING_VALIDATE_GRAPHEME_H_ - -#include "validator.h" - -namespace tesseract { - -// Subclass of Validator that validates and segments generic unicode into -// grapheme clusters, including Latin with diacritics. -class ValidateGrapheme : public Validator { - public: - ValidateGrapheme(ViramaScript script, bool report_errors) - : Validator(script, report_errors) {} - ~ValidateGrapheme() {} - - protected: - // Consumes the next Grapheme in codes_[codes_used_++...] and copies it to - // parts_ and output_. Returns true if a valid Grapheme was consumed, - // otherwise does not increment codes_used_. - bool ConsumeGraphemeIfValid() override; - // Returns the CharClass corresponding to the given Unicode ch. - CharClass UnicodeToCharClass(char32 ch) const override; - - private: - // Helper returns true if the sequence prev_ch,ch is invalid. - bool IsBadlyFormed(char32 prev_ch, char32 ch); - // Helper returns true if the sequence prev_ch,ch is an invalid Indic vowel. - static bool IsBadlyFormedIndicVowel(char32 prev_ch, char32 ch); - // Helper returns true if the sequence prev_ch,ch is invalid Thai. - static bool IsBadlyFormedThai(char32 prev_ch, char32 ch); -}; - -} // namespace tesseract - -#endif // TESSERACT_TRAINING_VALIDATE_GRAPHEME_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_indic.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_indic.cpp deleted file mode 100644 index 4cc1816f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_indic.cpp +++ /dev/null @@ -1,275 +0,0 @@ -#include "validate_indic.h" -#include "errcode.h" -#include "tprintf.h" - -namespace tesseract { - -// Returns whether codes matches the pattern for an Indic Grapheme. -// The ISCII standard http://varamozhi.sourceforge.net/iscii91.pdf -// has a BNF for valid syllables (Graphemes) which is modified slightly -// for Unicode. Notably U+200C and U+200D are used before/after the -// virama/virama to express explicit or soft viramas. -// Also the unicode v.9 Malayalam entry states that CZHC can be used in several -// Indic languages to request traditional ligatures, and CzHC is Malayalam- -// specific for requesting open conjuncts. -// -// + vowel Grapheme: V[D](v)* -// + consonant Grapheme: (C[N](H|HZ|Hz|ZH)?)*C[N](H|Hz)?[M[P]][D](v)* -bool ValidateIndic::ConsumeGraphemeIfValid() { - switch (codes_[codes_used_].first) { - case CharClass::kConsonant: - return ConsumeConsonantHeadIfValid() && ConsumeConsonantTailIfValid(); - case CharClass::kVowel: - case CharClass::kVedicMark: - return ConsumeVowelIfValid(); - case CharClass::kZeroWidthJoiner: - case CharClass::kZeroWidthNonJoiner: - // Apart from within an aksara, joiners are silently dropped. - if (report_errors_) - tprintf("Dropping isolated joiner: 0x%x\n", codes_[codes_used_].second); - ++codes_used_; - return true; - case CharClass::kOther: - UseMultiCode(1); - return true; - default: - if (report_errors_) { - tprintf("Invalid start of grapheme sequence:%c=0x%x\n", - codes_[codes_used_].first, codes_[codes_used_].second); - } - return false; - } -} - -Validator::CharClass ValidateIndic::UnicodeToCharClass(char32 ch) const { - if (IsVedicAccent(ch)) return CharClass::kVedicMark; - if (ch == kZeroWidthNonJoiner) return CharClass::kZeroWidthNonJoiner; - if (ch == kZeroWidthJoiner) return CharClass::kZeroWidthJoiner; - // Offset from the start of the relevant unicode code block aka code page. - int base = static_cast(script_); - int off = ch - base; - // Anything in another code block is other. - if (off < 0 || off >= kIndicCodePageSize) return CharClass::kOther; - // Exception for Tamil. The aytham character is considered a letter. - if (script_ == ViramaScript::kTamil && off == 0x03) return CharClass::kVowel; - if (off < 0x4) return CharClass::kVowelModifier; - if (script_ == ViramaScript::kSinhala) { - // Sinhala is an exception. - if (off <= 0x19) return CharClass::kVowel; - if (off <= 0x49) return CharClass::kConsonant; - if (off == 0x4a) return CharClass::kVirama; - if (off <= 0x5f) return CharClass::kMatra; - } else { - if (off <= 0x14 || off == 0x50) return CharClass::kVowel; - if (off <= 0x3b || (0x58 <= off && off <= 0x5f)) - return CharClass::kConsonant; - // Sinhala doesn't have Nukta or Avagraha. - if (off == 0x3c) return CharClass::kNukta; - if (off == 0x3d) return CharClass::kVowel; // avagraha - if (off <= 0x4c || (0x51 <= off && off <= 0x54)) return CharClass::kMatra; - if (0x55 <= off && off <= 0x57) return CharClass::kMatraPiece; - if (off == 0x4d) return CharClass::kVirama; - } - if (off == 0x60 || off == 0x61) return CharClass::kVowel; - if (off == 0x62 || off == 0x63) return CharClass::kMatra; - // Danda and digits up to 6f are OK as other. - // 70-7f are script-specific. - if (script_ == ViramaScript::kBengali && (off == 0x70 || off == 0x71)) - return CharClass::kConsonant; - if (script_ == ViramaScript::kGurmukhi && (off == 0x72 || off == 0x73)) - return CharClass::kConsonant; - if (script_ == ViramaScript::kSinhala && off == 0x70) - return CharClass::kConsonant; - if (script_ == ViramaScript::kDevanagari && off == 0x70) - return CharClass::kOther; - if (0x70 <= off && off <= 0x73) return CharClass::kVowelModifier; - // Non Indic, Digits, Measures, danda, etc. - return CharClass::kOther; -} - -// Helper consumes/copies a virama and any associated post-virama joiners. -// A linking virama (with either type of pre-virama joiner, post-virama ZWJ, or -// no joiner at all) must be followed by a consonant. -// A non-linking (explicit) virama is indicated by a ZWNJ after it, or a non -// consonant, space, or character from a different script. We clean up the -// representation to make it consistent by adding a ZWNJ if missing from a -// non-linking virama. Returns false with an invalid sequence. -bool ValidateIndic::ConsumeViramaIfValid(IndicPair joiner, bool post_matra) { - int num_codes = codes_.size(); - if (joiner.first == CharClass::kOther) { - CodeOnlyToOutput(); - if (codes_used_ < num_codes && - codes_[codes_used_].second == kZeroWidthJoiner) { - // Post-matra viramas must be explicit, so no joiners allowed here. - if (post_matra) { - if (report_errors_) tprintf("ZWJ after a post-matra virama!!\n"); - return false; - } - if (codes_used_ + 1 < num_codes && - codes_[codes_used_ - 2].second != kRayana && - (codes_[codes_used_ + 1].second == kZeroWidthNonJoiner || - codes_[codes_used_ + 1].second == kYayana || - codes_[codes_used_ + 1].second == kRayana)) { - // This combination will be picked up later. - ASSERT_HOST(!CodeOnlyToOutput()); - } else { - // Half-form with optional Nukta. - int len = output_.size() + 1 - output_used_; - if (UseMultiCode(len)) return true; - } - if (codes_used_ < num_codes && - codes_[codes_used_].second == kZeroWidthNonJoiner) { - if (output_used_ == output_.size() || - output_[output_used_] != kRayana) { - if (report_errors_) { - tprintf("Virama ZWJ ZWNJ in non-Sinhala: base=0x%x!\n", - static_cast(script_)); - } - return false; - } - // Special Sinhala case of Stand-alone Repaya. ['RA' H Z z] - if (UseMultiCode(4)) return true; - } - } else if (codes_used_ == num_codes || - codes_[codes_used_].first != CharClass::kConsonant || - post_matra) { - if (codes_used_ == num_codes || - codes_[codes_used_].second != kZeroWidthNonJoiner) { - // It is valid to have an unterminated virama at the end of a word, but - // for consistency, we will always add ZWNJ if not present. - output_.push_back(kZeroWidthNonJoiner); - } else { - CodeOnlyToOutput(); - } - // Explicit virama [H z] - MultiCodePart(2); - } - } else { - // Pre-virama joiner [{Z|z} H] requests specific conjunct. - if (UseMultiCode(2)) { - if (report_errors_) - tprintf("Invalid pre-virama joiner with no 2nd consonant!!\n"); - return false; - } - if (codes_[codes_used_].second == kZeroWidthJoiner || - codes_[codes_used_].second == kZeroWidthNonJoiner) { - if (report_errors_) { - tprintf("JHJ!!: 0x%x 0x%x 0x%x\n", joiner.second, output_.back(), - codes_[codes_used_].second); - } - return false; - } - } - // It is good so far as it goes. - return true; -} - -// Helper consumes/copies a series of consonants separated by viramas while -// valid, but not any vowel or other modifiers. -bool ValidateIndic::ConsumeConsonantHeadIfValid() { - const int num_codes = codes_.size(); - // Consonant aksara - do { - CodeOnlyToOutput(); - // Special Sinhala case of [H Z Yayana/Rayana]. - int index = output_.size() - 3; - if (output_used_ <= index && - (output_.back() == kYayana || output_.back() == kRayana) && - IsVirama(output_[index]) && output_[index + 1] == kZeroWidthJoiner) { - MultiCodePart(3); - } - bool have_nukta = false; - if (codes_used_ < num_codes && - codes_[codes_used_].first == CharClass::kNukta) { - have_nukta = true; - CodeOnlyToOutput(); - } - // Test for subscript conjunct. - index = output_.size() - 2 - have_nukta; - if (output_used_ <= index && IsSubscriptScript() && - IsVirama(output_[index])) { - // Output previous virama, consonant + optional nukta. - MultiCodePart(2 + have_nukta); - } - IndicPair joiner(CharClass::kOther, 0); - if (codes_used_ < num_codes && - (codes_[codes_used_].second == kZeroWidthJoiner || - (codes_[codes_used_].second == kZeroWidthNonJoiner && - script_ == ViramaScript::kMalayalam))) { - joiner = codes_[codes_used_]; - if (++codes_used_ == num_codes) { - if (report_errors_) { - tprintf("Skipping ending joiner: 0x%x 0x%x\n", output_.back(), - joiner.second); - } - return true; - } - if (codes_[codes_used_].first == CharClass::kVirama) { - output_.push_back(joiner.second); - } else { - if (report_errors_) { - tprintf("Skipping unnecessary joiner: 0x%x 0x%x 0x%x\n", - output_.back(), joiner.second, codes_[codes_used_].second); - } - joiner = std::make_pair(CharClass::kOther, 0); - } - } - if (codes_used_ < num_codes && - codes_[codes_used_].first == CharClass::kVirama) { - if (!ConsumeViramaIfValid(joiner, false)) return false; - } else { - break; // No virama, so the run of consonants is over. - } - } while (codes_used_ < num_codes && - codes_[codes_used_].first == CharClass::kConsonant); - if (output_used_ < output_.size()) MultiCodePart(1); - return true; -} - -// Helper consumes/copies a tail part of a consonant, comprising optional -// matra/piece, vowel modifier, vedic mark, terminating virama. -bool ValidateIndic::ConsumeConsonantTailIfValid() { - if (codes_used_ == codes_.size()) return true; - // No virama: Finish the grapheme. - // Are multiple matras allowed? - if (codes_[codes_used_].first == CharClass::kMatra) { - if (UseMultiCode(1)) return true; - if (codes_[codes_used_].first == CharClass::kMatraPiece) { - if (UseMultiCode(1)) return true; - } - } - while (codes_[codes_used_].first == CharClass::kVowelModifier) { - if (UseMultiCode(1)) return true; - // Only Malayalam allows only repeated 0xd02. - if (script_ != ViramaScript::kMalayalam || output_.back() != 0xd02) break; - } - while (codes_[codes_used_].first == CharClass::kVedicMark) { - if (UseMultiCode(1)) return true; - } - if (codes_[codes_used_].first == CharClass::kVirama) { - if (!ConsumeViramaIfValid(IndicPair(CharClass::kOther, 0), true)) { - return false; - } - } - // What we have consumed so far is a valid consonant cluster. - if (output_used_ < output_.size()) MultiCodePart(1); - - return true; -} - -// Helper consumes/copies a vowel and optional modifiers. -bool ValidateIndic::ConsumeVowelIfValid() { - if (UseMultiCode(1)) return true; - while (codes_[codes_used_].first == CharClass::kVowelModifier) { - if (UseMultiCode(1)) return true; - // Only Malayalam allows repeated modifiers? - if (script_ != ViramaScript::kMalayalam) break; - } - while (codes_[codes_used_].first == CharClass::kVedicMark) { - if (UseMultiCode(1)) return true; - } - // What we have consumed so far is a valid vowel cluster. - return true; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_indic.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_indic.h deleted file mode 100644 index 62dbcb23..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_indic.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef TESSERACT_TRAINING_VALIDATE_INDIC_H_ -#define TESSERACT_TRAINING_VALIDATE_INDIC_H_ - -#include "validator.h" - -namespace tesseract { - -// Subclass of Validator that validates and segments Indic scripts in the -// unicode range 0x900-0xdff (Devanagari-Sinhala). -class ValidateIndic : public Validator { - public: - ValidateIndic(ViramaScript script, bool report_errors) - : Validator(script, report_errors) {} - ~ValidateIndic() {} - - protected: - // Returns whether codes matches the pattern for an Indic Grapheme. - // Consumes the next Grapheme in codes_[codes_used_++...] and copies it to - // parts_ and output_. Returns true if a valid Grapheme was consumed, - // otherwise does not increment codes_used_. - bool ConsumeGraphemeIfValid() override; - // Returns the CharClass corresponding to the given Unicode ch. - Validator::CharClass UnicodeToCharClass(char32 ch) const override; - - private: - // Helper consumes/copies a virama and any associated post-virama joiners. - bool ConsumeViramaIfValid(IndicPair joiner, bool post_matra); - // Helper consumes/copies a series of consonants separated by viramas while - // valid, but not any vowel or other modifiers. - bool ConsumeConsonantHeadIfValid(); - // Helper consumes/copies a tail part of a consonant, comprising optional - // matra/piece, vowel modifier, vedic mark, terminating virama. - bool ConsumeConsonantTailIfValid(); - // Helper consumes/copies a vowel and optional modifiers. - bool ConsumeVowelIfValid(); - - // Some special unicodes used only for Indic processing. - static const char32 kYayana = 0xdba; // Sinhala Ya - static const char32 kRayana = 0xdbb; // Sinhala Ra -}; - -} // namespace tesseract - -#endif // TESSERACT_TRAINING_VALIDATE_INDIC_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_javanese.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_javanese.cpp deleted file mode 100644 index 435c3628..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_javanese.cpp +++ /dev/null @@ -1,276 +0,0 @@ -/********************************************************************** - * File: validate_javanese.cpp - * Description: Text validator for Javanese Script - aksara jawa. - * Author: Shree Devi Kumar - * Created: August 03, 2018 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -#include "validate_javanese.h" -#include "errcode.h" -#include "tprintf.h" - -namespace tesseract { - -// Returns whether codes matches the pattern for a Javanese Grapheme. -// Taken from unicode standard: -// http://www.unicode.org/charts/PDF/UA980.pdf -// http://www.unicode.org/versions/Unicode11.0.0/ch17.pdf -// The Consonant class here includes independent vowels. -// The order of components in an orthographic syllable as expressed in BNF is: -// {C F} C {{R}Y} {V{A}} {Z} -// Translated to the codes used by the CharClass enum: -// [(V|C[N])(H)] (V|C[N]) [[N]N] [M[D]] [v] -// Also see https://r12a.github.io/scripts/javanese/ for detailed notes. -// Validation rules copied from validate_indic.cpp and modified for Javanese. -// Indic - for reference -// + vowel Grapheme: V[D](v)* -// + consonant Grapheme: (C[N](H|HZ|Hz|ZH)?)*C[N](H|Hz)?[M[P]][D](v)* - -bool ValidateJavanese::ConsumeGraphemeIfValid() { - switch (codes_[codes_used_].first) { - case CharClass::kConsonant: - return ConsumeConsonantHeadIfValid() && ConsumeConsonantTailIfValid(); - case CharClass::kVowel: - case CharClass::kVedicMark: - return ConsumeVowelIfValid(); - case CharClass::kZeroWidthJoiner: - case CharClass::kZeroWidthNonJoiner: - // Apart from within an aksara, joiners are silently dropped. - if (report_errors_) - tprintf("Dropping isolated joiner: 0x%x\n", codes_[codes_used_].second); - ++codes_used_; - return true; - case CharClass::kOther: - UseMultiCode(1); - return true; - default: - if (report_errors_) { - tprintf("Invalid start of grapheme sequence:%c=0x%x\n", - codes_[codes_used_].first, codes_[codes_used_].second); - } - return false; - } -} - -// Helper consumes/copies a virama and any associated post-virama joiners. -// A linking virama (with either type of pre-virama joiner, post-virama ZWJ, or -// no joiner at all) must be followed by a consonant. -// A non-linking (explicit) virama is indicated by a ZWNJ after it, or a non -// consonant, space, or character from a different script. We clean up the -// representation to make it consistent by adding a ZWNJ if missing from a -// non-linking virama. Returns false with an invalid sequence. -bool ValidateJavanese::ConsumeViramaIfValid(IndicPair joiner, bool post_matra) { - int num_codes = codes_.size(); - if (joiner.first == CharClass::kOther) { - CodeOnlyToOutput(); - if (codes_used_ < num_codes && - codes_[codes_used_].second == kZeroWidthJoiner) { - // Post-matra viramas must be explicit, so no joiners allowed here. - if (post_matra) { - if (report_errors_) tprintf("ZWJ after a post-matra virama!!\n"); - return false; - } - if (codes_used_ + 1 < num_codes && - codes_[codes_used_ - 2].second != kCakra && - (codes_[codes_used_ + 1].second == kZeroWidthNonJoiner || - codes_[codes_used_ + 1].second == kPengkal || - codes_[codes_used_ + 1].second == kCakra)) { - // This combination will be picked up later. - ASSERT_HOST(!CodeOnlyToOutput()); - } else { - // Half-form with optional Nukta. - int len = output_.size() + 1 - output_used_; - if (UseMultiCode(len)) return true; - } - if (codes_used_ < num_codes && - codes_[codes_used_].second == kZeroWidthNonJoiner) { - if (output_used_ == output_.size() || - output_[output_used_] != kCakra) { - if (report_errors_) { - tprintf("Virama ZWJ ZWNJ in non-Sinhala: base=0x%x!\n", - static_cast(script_)); - } - return false; - } - // Special Sinhala case of Stand-alone Repaya. ['RA' H Z z] - if (UseMultiCode(4)) return true; - } - } else if (codes_used_ == num_codes || - codes_[codes_used_].first != CharClass::kConsonant || - post_matra) { - if (codes_used_ == num_codes || - codes_[codes_used_].second != kZeroWidthNonJoiner) { - // It is valid to have an unterminated virama at the end of a word, but - // for consistency, we will always add ZWNJ if not present. - CodeOnlyToOutput(); - } else { - CodeOnlyToOutput(); - } - // Explicit virama [H z] - MultiCodePart(2); - } - } else { - // Pre-virama joiner [{Z|z} H] requests specific conjunct. - if (UseMultiCode(2)) { - if (report_errors_) - tprintf("Invalid pre-virama joiner with no 2nd consonant!!\n"); - return false; - } - if (codes_[codes_used_].second == kZeroWidthJoiner || - codes_[codes_used_].second == kZeroWidthNonJoiner) { - if (report_errors_) { - tprintf("JHJ!!: 0x%x 0x%x 0x%x\n", joiner.second, output_.back(), - codes_[codes_used_].second); - } - return false; - } - } - // It is good so far as it goes. - return true; -} - -// Helper consumes/copies a series of consonants separated by viramas while -// valid, but not any vowel or other modifiers. -bool ValidateJavanese::ConsumeConsonantHeadIfValid() { - const int num_codes = codes_.size(); - // Consonant aksara - do { - CodeOnlyToOutput(); - // Special Sinhala case of [H Z Yayana/Rayana]. - int index = output_.size() - 3; - if (output_used_ <= index && - (output_.back() == kPengkal || output_.back() == kCakra) && - IsVirama(output_[index]) && output_[index + 1] == kZeroWidthJoiner) { - MultiCodePart(3); - } - bool have_nukta = false; - if (codes_used_ < num_codes && - codes_[codes_used_].first == CharClass::kNukta) { - have_nukta = true; - CodeOnlyToOutput(); - } - // Test for subscript conjunct. - index = output_.size() - 2 - have_nukta; - if (output_used_ <= index && IsSubscriptScript() && - IsVirama(output_[index])) { - // Output previous virama, consonant + optional nukta. - MultiCodePart(2 + have_nukta); - } - IndicPair joiner(CharClass::kOther, 0); - if (codes_used_ < num_codes && - (codes_[codes_used_].second == kZeroWidthJoiner || - (codes_[codes_used_].second == kZeroWidthNonJoiner && - script_ == ViramaScript::kMalayalam))) { - joiner = codes_[codes_used_]; - if (++codes_used_ == num_codes) { - if (report_errors_) { - tprintf("Skipping ending joiner: 0x%x 0x%x\n", output_.back(), - joiner.second); - } - return true; - } - if (codes_[codes_used_].first == CharClass::kVirama) { - output_.push_back(joiner.second); - } else { - if (report_errors_) { - tprintf("Skipping unnecessary joiner: 0x%x 0x%x 0x%x\n", - output_.back(), joiner.second, codes_[codes_used_].second); - } - joiner = std::make_pair(CharClass::kOther, 0); - } - } - if (codes_used_ < num_codes && - codes_[codes_used_].first == CharClass::kVirama) { - if (!ConsumeViramaIfValid(joiner, false)) return false; - } else { - break; // No virama, so the run of consonants is over. - } - } while (codes_used_ < num_codes && - codes_[codes_used_].first == CharClass::kConsonant); - if (output_used_ < output_.size()) MultiCodePart(1); - return true; -} - -// Helper consumes/copies a tail part of a consonant, comprising optional -// matra/piece, vowel modifier, vedic mark, terminating virama. -bool ValidateJavanese::ConsumeConsonantTailIfValid() { - if (codes_used_ == codes_.size()) return true; - // No virama: Finish the grapheme. - // Are multiple matras allowed? - if (codes_[codes_used_].first == CharClass::kMatra) { - if (UseMultiCode(1)) return true; - if (codes_[codes_used_].first == CharClass::kMatraPiece) { - if (UseMultiCode(1)) return true; - } - } - // Tarung also used for long versions of u and o vowels and vocalic r - // Taling + Tarung is valid eg. ꦏ + ◌ꦺ + ◌ꦴ - while (codes_[codes_used_].first == CharClass::kMatraPiece) { - if (UseMultiCode(1)) return true; - } - while (codes_[codes_used_].first == CharClass::kVowelModifier) { - if (UseMultiCode(1)) return true; - // Only Malayalam allows only repeated 0xd02. - if (script_ != ViramaScript::kMalayalam || output_.back() != 0xd02) break; - } - while (codes_[codes_used_].first == CharClass::kVedicMark) { - if (UseMultiCode(1)) return true; - } - if (codes_[codes_used_].first == CharClass::kVirama) { - if (!ConsumeViramaIfValid(IndicPair(CharClass::kOther, 0), true)) { - return false; - } - } - // What we have consumed so far is a valid consonant cluster. - if (output_used_ < output_.size()) MultiCodePart(1); - - return true; -} - -// Helper consumes/copies a vowel and optional modifiers. -bool ValidateJavanese::ConsumeVowelIfValid() { - if (UseMultiCode(1)) return true; - while (codes_[codes_used_].first == CharClass::kVowelModifier) { - if (UseMultiCode(1)) return true; - // Only Malayalam allows repeated modifiers? - if (script_ != ViramaScript::kMalayalam) break; - } - while (codes_[codes_used_].first == CharClass::kVedicMark) { - if (UseMultiCode(1)) return true; - } - // What we have consumed so far is a valid vowel cluster. - return true; -} - - -Validator::CharClass ValidateJavanese::UnicodeToCharClass(char32 ch) const { - if (ch == kZeroWidthNonJoiner) return CharClass::kZeroWidthNonJoiner; - if (ch == kZeroWidthJoiner) return CharClass::kZeroWidthJoiner; - // Offset from the start of the relevant unicode code block aka code page. - int off = ch - static_cast(script_); - // Anything in another code block is other. - if (off < 0 || off >= kIndicCodePageSize) return CharClass::kOther; - if (off < 0x4) return CharClass::kVowelModifier; - if (off <= 0x32) return CharClass::kConsonant; // includes independent vowels - if (off == 0x33) return CharClass::kNukta; // A9B3 CECAK TELU - if (off == 0x34) return CharClass::kMatraPiece; // A9B4 TARUNG two part vowels - if (off <= 0x39) return CharClass::kMatra; - if (off <= 0x3a) return CharClass::kConsonant; // A9BA TALING - pre base vowel - if (off <= 0x3d) return CharClass::kMatra; - if (off <= 0x3f) return CharClass::kNukta; // A9BE-A9BF PENGKAL-CAKRA medial consonants - if (off == 0x40) return CharClass::kVirama; // A9C0 PANGKON - return CharClass::kOther; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_javanese.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_javanese.h deleted file mode 100644 index f759b287..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_javanese.h +++ /dev/null @@ -1,63 +0,0 @@ -/********************************************************************** - * File: validate_javanese.h - * Description: Text validator for Javanese Script - aksara jawa. - * Author: Shree Devi Kumar - * Created: August 03, 2018 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_TRAINING_VALIDATE_JAVANESE_H_ -#define TESSERACT_TRAINING_VALIDATE_JAVANESE_H_ - -#include "validator.h" - - -namespace tesseract { - -// Subclass of Validator that validates and segments Javanese scripts - -class ValidateJavanese : public Validator { - public: - ValidateJavanese(ViramaScript script, bool report_errors) - : Validator(script, report_errors) {} - ~ValidateJavanese() {} - - protected: - // Returns whether codes matches the pattern for an Javanese Grapheme. - // Consumes the next Grapheme in codes_[codes_used_++...] and copies it to - // parts_ and output_. Returns true if a valid Grapheme was consumed, - // otherwise does not increment codes_used_. - bool ConsumeGraphemeIfValid() override; - // Returns the CharClass corresponding to the given Unicode ch. - Validator::CharClass UnicodeToCharClass(char32 ch) const override; - - private: - // Helper consumes/copies a virama and any associated post-virama joiners. - bool ConsumeViramaIfValid(IndicPair joiner, bool post_matra); - // Helper consumes/copies a series of consonants separated by viramas while - // valid, but not any vowel or other modifiers. - bool ConsumeConsonantHeadIfValid(); - // Helper consumes/copies a tail part of a consonant, comprising optional - // matra/piece, vowel modifier, vedic mark, terminating virama. - bool ConsumeConsonantTailIfValid(); - // Helper consumes/copies a vowel and optional modifiers. - bool ConsumeVowelIfValid(); - - // Some special unicodes used only for Javanese processing. - static const char32 kPengkal = 0xa9be; // Javanese Ya - static const char32 kCakra = 0xa9bf; // Javanese Ra -}; - -} // namespace tesseract - -#endif // TESSERACT_TRAINING_VALIDATE_JAVANESE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_khmer.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_khmer.cpp deleted file mode 100644 index 45c8f061..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_khmer.cpp +++ /dev/null @@ -1,106 +0,0 @@ -#include "validate_khmer.h" -#include "errcode.h" -#include "tprintf.h" - -namespace tesseract { - -// Returns whether codes matches the pattern for a Khmer Grapheme. -// Taken from unicode standard: -// http://www.unicode.org/versions/Unicode9.0.0/ch16.pdf. -// where it gives: B {R | C} {S {R}}* {{Z} V} {O} {S}, using different notation -// to the ISCII standard http://varamozhi.sourceforge.net/iscii91.pdf. -// Translated to the codes used by the CharClass enum: -// C {R | N} {HC {R}}* {{Z|z} M{P}} {D} {HC} -// Where R is a new symbol (Robat) and N is repurposed as a consonant shifter. -// Also the Consonant class here includes independent vowels, as they are -// treated the same anyway. -// In the split grapheme mode, the only characters that get grouped are the -// HC and the {Z|z}M The unicode chapter on Khmer only mentions the joiners in -// the BNF syntax, so who knows what they do. -bool ValidateKhmer::ConsumeGraphemeIfValid() { - int num_codes = codes_.size(); - if (codes_used_ == num_codes) return false; - if (codes_[codes_used_].first == CharClass::kOther) { - UseMultiCode(1); - return true; - } - if (codes_[codes_used_].first != CharClass::kConsonant) { - if (report_errors_) { - tprintf("Invalid start of Khmer syllable:0x%x\n", - codes_[codes_used_].second); - } - return false; - } - if (UseMultiCode(1)) return true; - if (codes_[codes_used_].first == CharClass::kRobat || - codes_[codes_used_].first == CharClass::kNukta) { - if (UseMultiCode(1)) return true; - } - while (codes_used_ + 1 < num_codes && - codes_[codes_used_].first == CharClass::kVirama && - codes_[codes_used_ + 1].first == CharClass::kConsonant) { - ASSERT_HOST(!CodeOnlyToOutput()); - if (UseMultiCode(2)) return true; - if (codes_[codes_used_].first == CharClass::kRobat) { - if (UseMultiCode(1)) return true; - } - } - int num_matra_parts = 0; - if (codes_[codes_used_].second == kZeroWidthJoiner || - codes_[codes_used_].second == kZeroWidthNonJoiner) { - if (CodeOnlyToOutput()) { - if (report_errors_) { - tprintf("Unterminated joiner: 0x%x\n", output_.back()); - } - return false; - } - ++num_matra_parts; - } - // Not quite as shown by the BNF, the matra piece is allowed as a matra on its - // own or as an addition to other matras. - if (codes_[codes_used_].first == CharClass::kMatra || - codes_[codes_used_].first == CharClass::kMatraPiece) { - ++num_matra_parts; - if (UseMultiCode(num_matra_parts)) return true; - } else if (num_matra_parts) { - if (report_errors_) { - tprintf("Joiner with non-dependent vowel after it!:0x%x 0x%x\n", - output_.back(), codes_[codes_used_].second); - } - return false; - } - if (codes_[codes_used_].first == CharClass::kMatraPiece && - codes_[codes_used_ - 1].first != CharClass::kMatraPiece) { - if (UseMultiCode(1)) return true; - } - if (codes_[codes_used_].first == CharClass::kVowelModifier) { - if (UseMultiCode(1)) return true; - } - if (codes_used_ + 1 < num_codes && - codes_[codes_used_].first == CharClass::kVirama && - codes_[codes_used_ + 1].first == CharClass::kConsonant) { - ASSERT_HOST(!CodeOnlyToOutput()); - if (UseMultiCode(2)) return true; - } - return true; -} - -Validator::CharClass ValidateKhmer::UnicodeToCharClass(char32 ch) const { - if (IsVedicAccent(ch)) return CharClass::kVedicMark; - if (ch == kZeroWidthNonJoiner) return CharClass::kZeroWidthNonJoiner; - if (ch == kZeroWidthJoiner) return CharClass::kZeroWidthJoiner; - // Offset from the start of the relevant unicode code block aka code page. - int off = ch - static_cast(script_); - // Anything in another code block is other. - if (off < 0 || off >= kIndicCodePageSize) return CharClass::kOther; - if (off <= 0x33) return CharClass::kConsonant; - if (off <= 0x45) return CharClass::kMatra; - if (off == 0x46) return CharClass::kMatraPiece; - if (off == 0x4c) return CharClass::kRobat; - if (off == 0x49 || off == 0x4a) return CharClass::kNukta; - if (off <= 0x51) return CharClass::kVowelModifier; - if (off == 0x52) return CharClass::kVirama; - return CharClass::kOther; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_khmer.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_khmer.h deleted file mode 100644 index a2fe75c9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_khmer.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef TESSERACT_TRAINING_VALIDATE_KHMER_H_ -#define TESSERACT_TRAINING_VALIDATE_KHMER_H_ - -#include "validator.h" - -namespace tesseract { - -// Subclass of Validator that validates and segments Khmer. -class ValidateKhmer : public Validator { - public: - ValidateKhmer(ViramaScript script, bool report_errors) - : Validator(script, report_errors) {} - ~ValidateKhmer() {} - - protected: - // Returns whether codes matches the pattern for an Khmer Grapheme. - // Consumes the next Grapheme in codes_[codes_used_++...] and copies it to - // parts_ and output_. Returns true if a valid Grapheme was consumed, - // otherwise does not increment codes_used_. - bool ConsumeGraphemeIfValid() override; - // Returns the CharClass corresponding to the given Unicode ch. - CharClass UnicodeToCharClass(char32 ch) const override; -}; - -} // namespace tesseract - -#endif // TESSERACT_TRAINING_VALIDATE_KHMER_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_myanmar.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_myanmar.cpp deleted file mode 100644 index 44934690..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_myanmar.cpp +++ /dev/null @@ -1,160 +0,0 @@ -#include "validate_myanmar.h" -#include "errcode.h" -#include "icuerrorcode.h" -#include "tprintf.h" -#include "unicode/uchar.h" // From libicu -#include "unicode/uscript.h" // From libicu - -namespace tesseract { - -// Returns whether codes matches the pattern for a Myanmar Grapheme. -// Taken directly from the unicode table 16-3. -// See http://www.unicode.org/versions/Unicode9.0.0/ch16.pdf -bool ValidateMyanmar::ConsumeGraphemeIfValid() { - int num_codes = codes_.size(); - if (codes_used_ == num_codes) return true; - // Other. - if (IsMyanmarOther(codes_[codes_used_].second)) { - UseMultiCode(1); - return true; - } - // Kinzi. - if (codes_used_ + 2 < num_codes && codes_[codes_used_].second == 0x1004 && - codes_[codes_used_ + 1].second == kMyanmarAsat && - codes_[codes_used_ + 2].second == kMyanmarVirama) { - ASSERT_HOST(!CodeOnlyToOutput()); - ASSERT_HOST(!CodeOnlyToOutput()); - if (UseMultiCode(3)) return true; - } - // Base consonant/vowel. NOTE that since everything in Myanmar appears to be - // optional, except the base, this is the only place where invalid input can - // be detected and false returned. - if (IsMyanmarLetter(codes_[codes_used_].second)) { - if (UseMultiCode(1)) return true; - } else { - if (report_errors_) { - tprintf("Invalid start of Myanmar syllable:0x%x\n", - codes_[codes_used_].second); - } - return false; // One of these is required. - } - if (ConsumeSubscriptIfPresent()) return true; - ConsumeOptionalSignsIfPresent(); - // What we have consumed so far is a valid syllable. - return true; -} - -// TODO(rays) Doesn't use intermediate coding like the other scripts, as there -// is little correspondence between the content of table 16-3 and the char -// classes of the Indic languages. (Experts may disagree and improve!) -// In unicode table 16-3 there is basically a long list of optional characters, -// which can be coded quite easily. -// Unfortunately, table 16-3 doesn't include even half the Myanmar unicodes!! -// The table also allows sequences that still result in dotted circles!! -// So with a lot of guesswork the rest have been added in a reasonable place. -Validator::CharClass ValidateMyanmar::UnicodeToCharClass(char32 ch) const { - if (IsMyanmarLetter(ch)) return CharClass::kConsonant; - return CharClass::kOther; -} - -// Helper consumes/copies a virama and any subscript consonant. -// Returns true if the end of input is reached. -bool ValidateMyanmar::ConsumeSubscriptIfPresent() { - // Subscript consonant. It appears there can be only one. - int num_codes = codes_.size(); - if (codes_used_ + 1 < num_codes && - codes_[codes_used_].second == kMyanmarVirama) { - if (IsMyanmarLetter(codes_[codes_used_ + 1].second)) { - ASSERT_HOST(!CodeOnlyToOutput()); - if (UseMultiCode(2)) return true; - } - } - return false; -} - -// Helper consumes/copies a series of optional signs. -// Returns true if the end of input is reached. -bool ValidateMyanmar::ConsumeOptionalSignsIfPresent() { - // The following characters are allowed, all optional, and in sequence. - // An exception is kMyanmarMedialYa, which can include kMyanmarAsat. - const std::vector kMedials({kMyanmarAsat, kMyanmarMedialYa, 0x103c, - 0x103d, 0x103e, 0x105e, 0x105f, 0x1060, - 0x1081, 0x1031}); - for (char32 ch : kMedials) { - if (codes_[codes_used_].second == ch) { - if (UseMultiCode(1)) return true; - if (ch == kMyanmarMedialYa && - codes_[codes_used_].second == kMyanmarAsat) { - if (UseMultiCode(1)) return true; - } - } - } - // Vowel sign i, ii, ai. - char32 ch = codes_[codes_used_].second; - if (ch == 0x102d || ch == 0x102e || ch == 0x1032) { - if (UseMultiCode(1)) return true; - } - // Vowel sign u, uu, and extensions. - ch = codes_[codes_used_].second; - if (ch == 0x102f || ch == 0x1030 || (0x1056 <= ch && ch <= 0x1059) || - ch == 0x1062 || ch == 0x1067 || ch == 0x1068 || - (0x1071 <= ch && ch <= 0x1074) || (0x1083 <= ch && ch <= 0x1086) || - ch == 0x109c || ch == 0x109d) { - if (UseMultiCode(1)) return true; - } - // Tall aa, aa with optional asat. - if (codes_[codes_used_].second == 0x102b || - codes_[codes_used_].second == 0x102c) { - if (UseMultiCode(1)) return true; - if (codes_[codes_used_].second == kMyanmarAsat) { - if (UseMultiCode(1)) return true; - } - } - // The following characters are allowed, all optional, and in sequence. - const std::vector kSigns({0x1036, 0x1037}); - for (char32 ch : kSigns) { - if (codes_[codes_used_].second == ch) { - if (UseMultiCode(1)) return true; - } - } - // Tone mark extensions. - ch = codes_[codes_used_].second; - if (ch == 0x1038 || ch == kMyanmarAsat || ch == 0x1063 || ch == 0x1064 || - (0x1069 <= ch && ch <= 0x106d) || (0x1087 <= ch && ch <= 0x108d) || - ch == 0x108f || ch == 0x109a || ch == 0x109b || - (0xaa7b <= ch && ch <= 0xaa7d)) { - if (UseMultiCode(1)) return true; - } - return false; -} - -// Returns true if the unicode is a Myanmar "letter" including consonants -// and independent vowels. Although table 16-3 distinguishes between some -// base consonants and vowels, the extensions make no such distinction, so we -// put them all into a single bucket. -/* static */ -bool ValidateMyanmar::IsMyanmarLetter(char32 ch) { - return (0x1000 <= ch && ch <= 0x102a) || ch == 0x103f || - (0x1050 <= ch && ch <= 0x1055) || (0x105a <= ch && ch <= 0x105d) || - ch == 0x1061 || ch == 0x1065 || ch == 0x1066 || - (0x106e <= ch && ch <= 0x1070) || (0x1075 <= ch && ch <= 0x1080) || - ch == 0x108e || (0xa9e0 <= ch && ch <= 0xa9ef) || - (0xa9fa <= ch && ch <= 0xa9ff) || (0xaa60 <= ch && ch <= 0xaa73) || - ch == 0xaa7a || ch == 0xaa7e || ch == 0xaa7f; -} - -// Returns true if ch is a Myanmar digit or other symbol that does not take -// part in being a syllable. -/* static */ -bool ValidateMyanmar::IsMyanmarOther(char32 ch) { - IcuErrorCode err; - UScriptCode script_code = uscript_getScript(ch, err); - if (script_code != USCRIPT_MYANMAR && ch != Validator::kZeroWidthJoiner && - ch != Validator::kZeroWidthNonJoiner) - return true; - return (0x1040 <= ch && ch <= 0x1049) || (0x1090 <= ch && ch <= 0x1099) || - (0x109c <= ch && ch <= 0x109d) || (0xa9f0 <= ch && ch <= 0xa9f9) || - (0xaa74 <= ch && ch <= 0xaa79); -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_myanmar.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_myanmar.h deleted file mode 100644 index d2ada745..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validate_myanmar.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef TESSERACT_TRAINING_VALIDATE_MYANMAR_H_ -#define TESSERACT_TRAINING_VALIDATE_MYANMAR_H_ - -#include "validator.h" - -namespace tesseract { - -// Subclass of Validator that validates and segments Myanmar. -class ValidateMyanmar : public Validator { - public: - ValidateMyanmar(ViramaScript script, bool report_errors) - : Validator(script, report_errors) {} - ~ValidateMyanmar() {} - - protected: - // Returns whether codes matches the pattern for a Myanmar Grapheme. - // Consumes the next Grapheme in codes_[codes_used_++...] and copies it to - // parts_ and output_. Returns true if a valid Grapheme was consumed, - // otherwise does not increment codes_used_. - bool ConsumeGraphemeIfValid() override; - // Returns the CharClass corresponding to the given Unicode ch. - Validator::CharClass UnicodeToCharClass(char32 ch) const override; - - private: - // Helper consumes/copies a virama and any subscript consonant. - // Returns true if the end of input is reached. - bool ConsumeSubscriptIfPresent(); - // Helper consumes/copies a series of optional signs. - // Returns true if the end of input is reached. - bool ConsumeOptionalSignsIfPresent(); - // Returns true if the unicode is a Myanmar "letter" including consonants - // and independent vowels. Although table 16-3 distinguishes between some - // base consonants and vowels, the extensions make no such distinction, so we - // put them all into a single bucket. - static bool IsMyanmarLetter(char32 ch); - // Returns true if ch is a Myanmar digit or other symbol that does not take - // part in being a syllable. - static bool IsMyanmarOther(char32 ch); - - // Some special unicodes used only for Myanmar processing. - static const char32 kMyanmarAsat = 0x103a; - static const char32 kMyanmarMedialYa = 0x103b; -}; - -} // namespace tesseract - -#endif // TESSERACT_TRAINING_VALIDATE_MYANMAR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validator.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validator.cpp deleted file mode 100644 index 0dc70a32..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validator.cpp +++ /dev/null @@ -1,222 +0,0 @@ -#include "validator.h" - -#include -#include -#include -#include - -#include "icuerrorcode.h" -#include "unicode/uchar.h" // From libicu -#include "unicode/uscript.h" // From libicu -#include "validate_grapheme.h" -#include "validate_indic.h" -#include "validate_javanese.h" -#include "validate_khmer.h" -#include "validate_myanmar.h" - -namespace tesseract { - -// Some specific but universally useful unicodes. -const char32 Validator::kZeroWidthSpace = 0x200B; -const char32 Validator::kZeroWidthNonJoiner = 0x200C; -const char32 Validator::kZeroWidthJoiner = 0x200D; -const char32 Validator::kLeftToRightMark = 0x200E; -const char32 Validator::kRightToLeftMark = 0x200F; -const char32 Validator::kInvalid = 0xfffd; - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -Validator::~Validator() = default; - -// Validates and cleans the src vector of unicodes to the *dest, according to -// g_mode. In the case of kSingleString, a single vector containing the whole -// result is added to *dest. With kCombined, multiple vectors are added to -// *dest with one grapheme in each. With kGlyphSplit, multiple vectors are -// added to *dest with a smaller unit representing a glyph in each. -// In case of validation error, returns false and as much as possible of the -// input, without discarding invalid text. -/* static */ -bool Validator::ValidateCleanAndSegment( - GraphemeNormMode g_mode, bool report_errors, const std::vector& src, - std::vector>* dest) { - ValidateGrapheme g_validator(ViramaScript::kNonVirama, report_errors); - std::vector> graphemes; - ViramaScript script = MostFrequentViramaScript(src); - bool success = true; - if (script == ViramaScript::kNonVirama) { - // The grapheme segmenter's maximum segmentation is the grapheme unit, so - // up the mode by 1 to get the desired effect. - if (g_mode == GraphemeNormMode::kCombined) - g_mode = GraphemeNormMode::kGlyphSplit; - else if (g_mode == GraphemeNormMode::kGlyphSplit) - g_mode = GraphemeNormMode::kIndividualUnicodes; - // Just do grapheme segmentation. - success = g_validator.ValidateCleanAndSegmentInternal(g_mode, src, dest); - } else { - success = g_validator.ValidateCleanAndSegmentInternal( - GraphemeNormMode::kGlyphSplit, src, &graphemes); - std::unique_ptr validator( - ScriptValidator(script, report_errors)); - for (const auto& grapheme : graphemes) { - if (!validator->ValidateCleanAndSegmentInternal(g_mode, grapheme, dest)) { - success = false; - } - } - } - return success; -} - -// Factory method that understands how to map script to the right subclass. -std::unique_ptr Validator::ScriptValidator(ViramaScript script, - bool report_errors) { - switch (script) { - case ViramaScript::kNonVirama: - return std::unique_ptr( - new ValidateGrapheme(script, report_errors)); - case ViramaScript::kJavanese: - return std::unique_ptr( - new ValidateJavanese(script, report_errors)); - case ViramaScript::kMyanmar: - return std::unique_ptr( - new ValidateMyanmar(script, report_errors)); - case ViramaScript::kKhmer: - return std::unique_ptr( - new ValidateKhmer(script, report_errors)); - default: - return std::unique_ptr( - new ValidateIndic(script, report_errors)); - } -} - -// Internal version of the public static ValidateCleanAndSegment. -// Validates and cleans the src vector of unicodes to the *dest, according to -// its type and the given g_mode. -// In case of validation error, returns false and returns as much as possible -// of the input, without discarding invalid text. -bool Validator::ValidateCleanAndSegmentInternal( - GraphemeNormMode g_mode, const std::vector& src, - std::vector>* dest) { - Clear(); - ComputeClassCodes(src); - bool success = true; - for (codes_used_ = 0; codes_used_ < codes_.size();) { - if (!ConsumeGraphemeIfValid()) { - success = false; - ++codes_used_; - } - } - MoveResultsToDest(g_mode, dest); - return success; -} - -// Moves the results from parts_ or output_ to dest according to g_mode. -void Validator::MoveResultsToDest(GraphemeNormMode g_mode, - std::vector>* dest) { - if (g_mode == GraphemeNormMode::kIndividualUnicodes) { - // Append each element of the combined output_ that we made as a new vector - // in dest. - dest->reserve(dest->size() + output_.size()); - for (char32 ch : output_) dest->push_back({ch}); - } else if (g_mode == GraphemeNormMode::kGlyphSplit) { - // Append all the parts_ that we made onto dest. - std::move(parts_.begin(), parts_.end(), std::back_inserter(*dest)); - } else if (g_mode == GraphemeNormMode::kCombined || dest->empty()) { - // Append the combined output_ that we made onto dest as one new vector. - dest->push_back(std::vector()); - output_.swap(dest->back()); - } else { // kNone. - // Append the combined output_ that we made onto the last existing element - // of dest. - dest->back().insert(dest->back().end(), output_.begin(), output_.end()); - } -} - -static bool CmpPairSecond(const std::pair& p1, - const std::pair& p2) { - return p1.second < p2.second; -} - -// Computes and returns the ViramaScript corresponding to the most frequent -// virama-using script in the input, or kNonVirama if none are present. -/* static */ -ViramaScript Validator::MostFrequentViramaScript( - const std::vector& utf32) { - std::unordered_map histogram; - for (char32 ch : utf32) { - // Determine the codepage base. For the Indic scripts, Khmer and Javanese, it is - // sufficient to divide by kIndicCodePageSize but Myanmar is all over the - // unicode code space, so use its script id. - int base = ch / kIndicCodePageSize; - IcuErrorCode err; - UScriptCode script_code = uscript_getScript(ch, err); - if ((kMinIndicUnicode <= ch && ch <= kMaxJavaneseUnicode && - script_code != USCRIPT_COMMON) || - script_code == USCRIPT_MYANMAR) { - if (script_code == USCRIPT_MYANMAR) - base = static_cast(ViramaScript::kMyanmar) / kIndicCodePageSize; - ++histogram[base]; - } - } - if (!histogram.empty()) { - int base = - std::max_element(histogram.begin(), histogram.end(), CmpPairSecond) - ->first; - char32 codebase = static_cast(base * kIndicCodePageSize); - // Check for validity. - if (codebase == static_cast(ViramaScript::kMyanmar) || - codebase == static_cast(ViramaScript::kJavanese) || - codebase == static_cast(ViramaScript::kKhmer) || - (static_cast(ViramaScript::kDevanagari) <= codebase && - codebase <= static_cast(ViramaScript::kSinhala))) { - return static_cast(codebase); - } - } - return ViramaScript::kNonVirama; -} - -// Returns true if the given UTF-32 unicode is a "virama" character. -/* static */ -bool Validator::IsVirama(char32 unicode) { - return (kMinIndicUnicode <= unicode && unicode <= kMaxSinhalaUnicode && - (unicode & 0x7f) == 0x4d) || - unicode == kSinhalaVirama || - unicode == kJavaneseVirama || - unicode == kMyanmarVirama || - unicode == kKhmerVirama; -} - -// Returns true if the given UTF-32 unicode is a vedic accent. -/* static */ -bool Validator::IsVedicAccent(char32 unicode) { - return (0x1cd0 <= unicode && unicode < 0x1d00) || - (0xa8e0 <= unicode && unicode <= 0xa8f7) || - (0x951 <= unicode && unicode <= 0x954); -} - -// Returns true if the script is one that uses subscripts for conjuncts. -bool Validator::IsSubscriptScript() const { - return script_ == ViramaScript::kTelugu || - script_ == ViramaScript::kKannada || - script_ == ViramaScript::kJavanese || - script_ == ViramaScript::kMyanmar || - script_ == ViramaScript::kKhmer; -} - -void Validator::ComputeClassCodes(const std::vector& text) { - codes_.reserve(text.size()); - for (char32 c : text) { - codes_.push_back(std::make_pair(UnicodeToCharClass(c), c)); - } -} - -// Resets to the initial state. -void Validator::Clear() { - codes_.clear(); - parts_.clear(); - output_.clear(); - codes_used_ = 0; - output_used_ = 0; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validator.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validator.h deleted file mode 100644 index 3b422d91..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/validator.h +++ /dev/null @@ -1,247 +0,0 @@ -/********************************************************************** - * File: validator.h - * Description: Base class for various text validators. Intended mainly for - * scripts that use a virama character. - * Author: Ray Smith - * Created: Tue May 23 2017 - * - * (C) Copyright 2017, Google Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - **********************************************************************/ - -#ifndef TESSERACT_TRAINING_VALIDATOR_H_ -#define TESSERACT_TRAINING_VALIDATOR_H_ - -#include -#include -#include "unichar.h" - -namespace tesseract { - -// Different kinds of grapheme normalization - not just for Indic! -// A grapheme is a syllable unit in Indic and can be several unicodes. -// In other scripts, a grapheme is a base character and accent/diacritic -// combination, as not all accented characters have a single composed form. -enum class GraphemeNormMode { - // Validation result is a single string, even if input is multi-word. - kSingleString, - // Standard unicode graphemes are validated and output as grapheme units. - kCombined, - // Graphemes are validated and sub-divided. For virama-using scripts, units - // that correspond to repeatable glyphs are generated. (Mostly single unicodes - // but viramas and joiners are paired with the most sensible neighbor.) - // For non-virama scripts, this means that base/accent pairs are separated, - // ie the output is individual unicodes. - kGlyphSplit, - // The output is always single unicodes, regardless of the script. - kIndividualUnicodes, -}; - -// An enum representing the scripts that use a virama character. It is -// guaranteed that the value of any element, (except kNonVirama) can be cast -// to a unicode (char32) value that represents the start of the unicode range -// of the corresponding script. -enum class ViramaScript : char32 { - kNonVirama = 0, - kDevanagari = 0x900, - kBengali = 0x980, - kGurmukhi = 0xa00, - kGujarati = 0xa80, - kOriya = 0xb00, - kTamil = 0xb80, - kTelugu = 0xc00, - kKannada = 0xc80, - kMalayalam = 0xd00, - kSinhala = 0xd80, - kMyanmar = 0x1000, - kKhmer = 0x1780, - kJavanese = 0xa980, -}; - -// Base class offers a validation API and protected methods to allow subclasses -// to easily build the validated/segmented output. -class Validator { - public: - // Validates and cleans the src vector of unicodes to the *dest, according to - // g_mode. In the case of kSingleString, a single vector containing the whole - // result is added to *dest. With kCombined, multiple vectors are added to - // *dest with one grapheme in each. With kGlyphSplit, multiple vectors are - // added to *dest with a smaller unit representing a glyph in each. - // In case of validation error, returns false and as much as possible of the - // input, without discarding invalid text. - static bool ValidateCleanAndSegment(GraphemeNormMode g_mode, - bool report_errors, - const std::vector& src, - std::vector>* dest); - - // Returns true if the unicode ch is a non-printing zero-width mark of no - // significance to OCR training or evaluation. - static bool IsZeroWidthMark(char32 ch) { - return ch == kZeroWidthSpace || ch == kLeftToRightMark || - ch == kRightToLeftMark || ch == kInvalid; - } - virtual ~Validator(); - - // Some specific but universally useful unicodes. - static const char32 kZeroWidthSpace; - static const char32 kZeroWidthNonJoiner; - static const char32 kZeroWidthJoiner; - static const char32 kLeftToRightMark; - static const char32 kRightToLeftMark; - static const char32 kInvalid; - - protected: - // These are more or less the character class identifiers in the ISCII - // standard, section 8. They have been augmented with the Unicode meta - // characters Zero Width Joiner and Zero Width Non Joiner, and the - // Unicode Vedic Marks. - // The best sources of information on Unicode and Indic scripts are: - // http://varamozhi.sourceforge.net/iscii91.pdf - // http://www.unicode.org/versions/Unicode9.0.0/ch12.pdf - // http://unicode.org/faq/indic.html - // http://www.microsoft.com/typography/otfntdev/teluguot/shaping.aspx - enum class CharClass { - // NOTE: The values of the enum members are meaningless and arbitrary, ie - // they are not used for sorting, or any other risky application. - // The reason they are what they are is they are a single character - // abbreviation that can be used in a regexp/BNF definition of a grammar, - // IN A COMMENT, and still not relied upon in the code. - kConsonant = 'C', - kVowel = 'V', - kVirama = 'H', // (aka Halant) - kMatra = 'M', // (aka Dependent Vowel) - kMatraPiece = 'P', // unicode provides pieces of Matras. - kVowelModifier = 'D', // (candrabindu, anusvara, visarga, other marks) - kZeroWidthNonJoiner = 'z', // Unicode Zero Width Non-Joiner U+200C - kZeroWidthJoiner = 'Z', // Unicode Zero Width Joiner U+200D - kVedicMark = 'v', // Modifiers can come modify any indic syllable. - kNukta = 'N', // Occurs only immediately after consonants. - kRobat = 'R', // Khmer only. - kOther = 'O', // (digits, measures, non-Indic, etc) - // Additional classes used only by ValidateGrapheme. - kWhitespace = ' ', - kCombiner = 'c', // Combiners other than virama. - }; - using IndicPair = std::pair; - - Validator(ViramaScript script, bool report_errors) - : script_(script), - codes_used_(0), - output_used_(0), - report_errors_(report_errors) {} - - // Factory method that understands how to map script to the right subclass. - static std::unique_ptr ScriptValidator(ViramaScript script, - bool report_errors); - - // Internal version of the public static ValidateCleanAndSegment. - // Validates and cleans the src vector of unicodes to the *dest, according to - // its type and the given g_mode. - // In case of validation error, returns false and returns as much as possible - // of the input, without discarding invalid text. - bool ValidateCleanAndSegmentInternal(GraphemeNormMode g_mode, - const std::vector& src, - std::vector>* dest); - // Moves the results from parts_ or output_ to dest according to g_mode. - void MoveResultsToDest(GraphemeNormMode g_mode, - std::vector>* dest); - - // Computes and returns the ViramaScript corresponding to the most frequent - // virama-using script in the input, or kNonVirama if none are present. - static ViramaScript MostFrequentViramaScript( - const std::vector& utf32); - // Returns true if the given UTF-32 unicode is a "virama" character. - static bool IsVirama(char32 unicode); - // Returns true if the given UTF-32 unicode is a vedic accent. - static bool IsVedicAccent(char32 unicode); - // Returns true if the script is one that uses subscripts for conjuncts. - bool IsSubscriptScript() const; - - // Helper function appends the next element of codes_ only to output_, - // without touching parts_ - // Returns true at the end of codes_. - bool CodeOnlyToOutput() { - output_.push_back(codes_[codes_used_].second); - return ++codes_used_ == codes_.size(); - } - - // Helper function adds a length-element vector to parts_ from the last length - // elements of output_. If there are more than length unused elements in - // output_, adds unicodes as single-element vectors to parts_ to catch - // output_used_ up to output->size() - length before adding the length-element - // vector. - void MultiCodePart(int length) { - while (output_used_ + length < output_.size()) { - parts_.emplace_back( - std::initializer_list{output_[output_used_++]}); - } - parts_.emplace_back(std::initializer_list{output_[output_used_]}); - while (++output_used_ < output_.size()) { - parts_.back().push_back(output_[output_used_]); - } - } - - // Helper function appends the next element of codes_ to output_, and then - // calls MultiCodePart to add the appropriate components to parts_. - // Returns true at the end of codes_. - bool UseMultiCode(int length) { - output_.push_back(codes_[codes_used_].second); - MultiCodePart(length); - return ++codes_used_ == codes_.size(); - } - - // Consumes the next Grapheme in codes_[codes_used_++...] and copies it to - // parts_ and output_. Returns true if a valid Grapheme was consumed, - // otherwise does not increment codes_used_. - virtual bool ConsumeGraphemeIfValid() = 0; - // Sets codes_ to the class codes for the given unicode text. - void ComputeClassCodes(const std::vector& text); - // Returns the CharClass corresponding to the given Unicode ch. - virtual CharClass UnicodeToCharClass(char32 ch) const = 0; - // Resets to the initial state. - void Clear(); - - // Number of unicodes in each Indic codepage. - static const int kIndicCodePageSize = 128; - // Lowest unicode value of any Indic script. (Devanagari). - static const char32 kMinIndicUnicode = 0x900; - // Highest unicode value of any consistent (ISCII-based) Indic script. - static const char32 kMaxSinhalaUnicode = 0xdff; - // Highest unicode value of any virama-using script. (Khmer). - static const char32 kMaxViramaScriptUnicode = 0x17ff; - // Some special unicodes. - static const char32 kSinhalaVirama = 0xdca; - static const char32 kMyanmarVirama = 0x1039; - static const char32 kKhmerVirama = 0x17d2; - // Javanese Script - aksarajawa - static const char32 kJavaneseVirama = 0xa9c0; - static const char32 kMaxJavaneseUnicode = 0xa9df; - - // Script we are operating on. - ViramaScript script_; - // Input unicodes with assigned CharClass is the data to be validated. - std::vector codes_; - // Glyph-like components of the input. - std::vector> parts_; - // Copied validated unicodes from codes_ that are OK to output. - std::vector output_; - // The number of elements of codes_ that have been processed so far. - int codes_used_; - // The number of elements of output_ that have already been added to parts_. - int output_used_; - // Log error messages for reasons why text is invalid. - bool report_errors_; -}; - -} // namespace tesseract - -#endif // TESSERACT_TRAINING_VALIDATOR_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/wordlist2dawg.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/wordlist2dawg.cpp deleted file mode 100644 index dd6b7306..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/training/wordlist2dawg.cpp +++ /dev/null @@ -1,105 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: wordlist2dawg.cpp -// Description: Program to generate a DAWG from a word list file -// Author: Thomas Kielbus -// Created: Thu May 10 18:11:42 PDT 2007 -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -// Given a file that contains a list of words (one word per line) this program -// generates the corresponding squished DAWG file. - -#include "classify.h" -#include "commontraining.h" // CheckSharedLibraryVersion -#include "dawg.h" -#include "dict.h" -#include "emalloc.h" -#include "helpers.h" -#include "serialis.h" -#include "trie.h" -#include "unicharset.h" - -int main(int argc, char** argv) { - tesseract::CheckSharedLibraryVersion(); - - if (argc > 1 && (!strcmp(argv[1], "-v") || !strcmp(argv[1], "--version"))) { - printf("%s\n", tesseract::TessBaseAPI::Version()); - return 0; - } else if (!(argc == 4 || (argc == 5 && strcmp(argv[1], "-t") == 0) || - (argc == 6 && strcmp(argv[1], "-r") == 0))) { - printf("Usage: %s -v | --version |\n" - " %s [-t | -r [reverse policy] ] word_list_file" - " dawg_file unicharset_file\n", argv[0], argv[0]); - return 1; - } - tesseract::Classify *classify = new tesseract::Classify(); - int argv_index = 0; - if (argc == 5) ++argv_index; - tesseract::Trie::RTLReversePolicy reverse_policy = - tesseract::Trie::RRP_DO_NO_REVERSE; - if (argc == 6) { - ++argv_index; - int tmp_int; - sscanf(argv[++argv_index], "%d", &tmp_int); - reverse_policy = static_cast(tmp_int); - tprintf("Set reverse_policy to %s\n", - tesseract::Trie::get_reverse_policy_name(reverse_policy)); - } - if (argc == 7) argv_index += 3; - const char* wordlist_filename = argv[++argv_index]; - const char* dawg_filename = argv[++argv_index]; - const char* unicharset_file = argv[++argv_index]; - tprintf("Loading unicharset from '%s'\n", unicharset_file); - if (!classify->getDict().getUnicharset().load_from_file(unicharset_file)) { - tprintf("Failed to load unicharset from '%s'\n", unicharset_file); - delete classify; - return 1; - } - const UNICHARSET &unicharset = classify->getDict().getUnicharset(); - if (argc == 4 || argc == 6) { - tesseract::Trie trie( - // the first 3 arguments are not used in this case - tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM, - unicharset.size(), classify->getDict().dawg_debug_level); - tprintf("Reading word list from '%s'\n", wordlist_filename); - if (!trie.read_and_add_word_list(wordlist_filename, unicharset, - reverse_policy)) { - tprintf("Failed to add word list from '%s'\n", wordlist_filename); - exit(1); - } - tprintf("Reducing Trie to SquishedDawg\n"); - tesseract::SquishedDawg *dawg = trie.trie_to_dawg(); - if (dawg != nullptr && dawg->NumEdges() > 0) { - tprintf("Writing squished DAWG to '%s'\n", dawg_filename); - dawg->write_squished_dawg(dawg_filename); - } else { - tprintf("Dawg is empty, skip producing the output file\n"); - } - delete dawg; - } else if (argc == 5) { - tprintf("Loading dawg DAWG from '%s'\n", dawg_filename); - tesseract::SquishedDawg words( - dawg_filename, - // these 3 arguments are not used in this case - tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM, - classify->getDict().dawg_debug_level); - tprintf("Checking word list from '%s'\n", wordlist_filename); - words.check_for_words(wordlist_filename, unicharset, true); - } else { // should never get here - tprintf("Invalid command-line options\n"); - exit(1); - } - delete classify; - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/Makefile.am b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/Makefile.am deleted file mode 100644 index 39bc5225..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/Makefile.am +++ /dev/null @@ -1,14 +0,0 @@ -AM_CPPFLAGS += -I$(top_srcdir)/src/ccutil - -if VISIBILITY -AM_CPPFLAGS += -DTESS_EXPORTS \ - -fvisibility=hidden -fvisibility-inlines-hidden -endif - -noinst_HEADERS = \ - scrollview.h svmnode.h svutil.h - -noinst_LTLIBRARIES = libtesseract_viewer.la - -libtesseract_viewer_la_SOURCES = \ - scrollview.cpp svmnode.cpp svutil.cpp svpaint.cpp diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/scrollview.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/scrollview.cpp deleted file mode 100644 index 5fdf88a3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/scrollview.cpp +++ /dev/null @@ -1,835 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: scrollview.cpp -// Description: ScrollView -// Author: Joern Wanke -// Created: Thu Nov 29 2007 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// -// - -#define NOMINMAX - -#include -#include -#include -#include -#include -#include -#include -#include - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "scrollview.h" - -const int kSvPort = 8461; -const int kMaxMsgSize = 4096; -const int kMaxIntPairSize = 45; // Holds %d,%d, for up to 64 bit. - -#include "svutil.h" - -#include "allheaders.h" - -struct SVPolyLineBuffer { - bool empty; // Independent indicator to allow SendMsg to call SendPolygon. - std::vector xcoords; - std::vector ycoords; -}; - -// A map between the window IDs and their corresponding pointers. -static std::map svmap; -static SVMutex* svmap_mu; -// A map of all semaphores waiting for a specific event on a specific window. -static std::map, - std::pair > waiting_for_events; -static SVMutex* waiting_for_events_mu; - -SVEvent* SVEvent::copy() { - SVEvent* any = new SVEvent; - any->command_id = command_id; - any->counter = counter; - any->parameter = new char[strlen(parameter) + 1]; - strcpy(any->parameter, parameter); - any->type = type; - any->x = x; - any->y = y; - any->x_size = x_size; - any->y_size = y_size; - any->window = window; - return any; -} - -// Destructor. -// It is defined here, so the compiler can create a single vtable -// instead of weak vtables in every compilation unit. -SVEventHandler::~SVEventHandler() = default; - -#ifndef GRAPHICS_DISABLED -/// This is the main loop which handles the ScrollView-logic from the server -/// to the client. It basically loops through messages, parses them to events -/// and distributes it to the waiting handlers. -/// It is run from a different thread and synchronizes via SVSync. -void* ScrollView::MessageReceiver(void* a) { - int counter_event_id = 0; // ongoing counter - char* message = nullptr; - // Wait until a new message appears in the input stream_. - do { - message = ScrollView::GetStream()->Receive(); - } while (message == nullptr); - -// This is the main loop which iterates until the server is dead (strlen = -1). -// It basically parses for 3 different messagetypes and then distributes the -// events accordingly. - while (1) { - // The new event we create. - SVEvent* cur = new SVEvent; - // The ID of the corresponding window. - int window_id; - - int ev_type; - - int n; - // Fill the new SVEvent properly. - sscanf(message, "%d,%d,%d,%d,%d,%d,%d,%n", &window_id, &ev_type, &cur->x, - &cur->y, &cur->x_size, &cur->y_size, &cur->command_id, &n); - char* p = (message + n); - - svmap_mu->Lock(); - cur->window = svmap[window_id]; - - if (cur->window != nullptr) { - cur->parameter = new char[strlen(p) + 1]; - strcpy(cur->parameter, p); - if (strlen(p) > 0) { // remove the last \n - cur->parameter[strlen(p)] = '\0'; - } - cur->type = static_cast(ev_type); - // Correct selection coordinates so x,y is the min pt and size is +ve. - if (cur->x_size > 0) - cur->x -= cur->x_size; - else - cur->x_size = -cur->x_size; - if (cur->y_size > 0) - cur->y -= cur->y_size; - else - cur->y_size = -cur->y_size; - // Returned y will be the bottom-left if y is reversed. - if (cur->window->y_axis_is_reversed_) - cur->y = cur->window->TranslateYCoordinate(cur->y + cur->y_size); - cur->counter = counter_event_id; - // Increase by 2 since we will also create an SVET_ANY event from cur, - // which will have a counter_id of cur + 1 (and thus gets processed - // after cur). - counter_event_id += 2; - - // In case of an SVET_EXIT event, quit the whole application. - if (ev_type == SVET_EXIT) { ScrollView::Exit(); } - - // Place two copies of it in the table for the window. - cur->window->SetEvent(cur); - - // Check if any of the threads currently waiting want it. - std::pair awaiting_list(cur->window, - cur->type); - std::pair awaiting_list_any(cur->window, - SVET_ANY); - std::pair awaiting_list_any_window((ScrollView*)nullptr, - SVET_ANY); - waiting_for_events_mu->Lock(); - if (waiting_for_events.count(awaiting_list) > 0) { - waiting_for_events[awaiting_list].second = cur; - waiting_for_events[awaiting_list].first->Signal(); - } else if (waiting_for_events.count(awaiting_list_any) > 0) { - waiting_for_events[awaiting_list_any].second = cur; - waiting_for_events[awaiting_list_any].first->Signal(); - } else if (waiting_for_events.count(awaiting_list_any_window) > 0) { - waiting_for_events[awaiting_list_any_window].second = cur; - waiting_for_events[awaiting_list_any_window].first->Signal(); - } else { - // No one wanted it, so delete it. - delete cur; - } - waiting_for_events_mu->Unlock(); - // Signal the corresponding semaphore twice (for both copies). - ScrollView* sv = svmap[window_id]; - if (sv != nullptr) { - sv->Signal(); - sv->Signal(); - } - } else { - delete cur; // Applied to no window. - } - svmap_mu->Unlock(); - - // Wait until a new message appears in the input stream_. - do { - message = ScrollView::GetStream()->Receive(); - } while (message == nullptr); - } - return nullptr; -} - -// Table to implement the color index values in the old system. -static const uint8_t table_colors[ScrollView::GREEN_YELLOW+1][4]= { - {0, 0, 0, 0}, // NONE (transparent) - {0, 0, 0, 255}, // BLACK. - {255, 255, 255, 255}, // WHITE. - {255, 0, 0, 255}, // RED. - {255, 255, 0, 255}, // YELLOW. - {0, 255, 0, 255}, // GREEN. - {0, 255, 255, 255}, // CYAN. - {0, 0, 255, 255}, // BLUE. - {255, 0, 255, 255}, // MAGENTA. - {0, 128, 255, 255}, // AQUAMARINE. - {0, 0, 64, 255}, // DARK_SLATE_BLUE. - {128, 128, 255, 255}, // LIGHT_BLUE. - {64, 64, 255, 255}, // MEDIUM_BLUE. - {0, 0, 32, 255}, // MIDNIGHT_BLUE. - {0, 0, 128, 255}, // NAVY_BLUE. - {192, 192, 255, 255}, // SKY_BLUE. - {64, 64, 128, 255}, // SLATE_BLUE. - {32, 32, 64, 255}, // STEEL_BLUE. - {255, 128, 128, 255}, // CORAL. - {128, 64, 0, 255}, // BROWN. - {128, 128, 0, 255}, // SANDY_BROWN. - {192, 192, 0, 255}, // GOLD. - {192, 192, 128, 255}, // GOLDENROD. - {0, 64, 0, 255}, // DARK_GREEN. - {32, 64, 0, 255}, // DARK_OLIVE_GREEN. - {64, 128, 0, 255}, // FOREST_GREEN. - {128, 255, 0, 255}, // LIME_GREEN. - {192, 255, 192, 255}, // PALE_GREEN. - {192, 255, 0, 255}, // YELLOW_GREEN. - {192, 192, 192, 255}, // LIGHT_GREY. - {64, 64, 128, 255}, // DARK_SLATE_GREY. - {64, 64, 64, 255}, // DIM_GREY. - {128, 128, 128, 255}, // GREY. - {64, 192, 0, 255}, // KHAKI. - {255, 0, 192, 255}, // MAROON. - {255, 128, 0, 255}, // ORANGE. - {255, 128, 64, 255}, // ORCHID. - {255, 192, 192, 255}, // PINK. - {128, 0, 128, 255}, // PLUM. - {255, 0, 64, 255}, // INDIAN_RED. - {255, 64, 0, 255}, // ORANGE_RED. - {255, 0, 192, 255}, // VIOLET_RED. - {255, 192, 128, 255}, // SALMON. - {128, 128, 0, 255}, // TAN. - {0, 255, 255, 255}, // TURQUOISE. - {0, 128, 128, 255}, // DARK_TURQUOISE. - {192, 0, 255, 255}, // VIOLET. - {128, 128, 0, 255}, // WHEAT. - {128, 255, 0, 255} // GREEN_YELLOW -}; - - -/******************************************************************************* -* Scrollview implementation. -*******************************************************************************/ - -SVNetwork* ScrollView::stream_ = nullptr; -int ScrollView::nr_created_windows_ = 0; -int ScrollView::image_index_ = 0; - -/// Calls Initialize with all arguments given. -ScrollView::ScrollView(const char* name, int x_pos, int y_pos, int x_size, - int y_size, int x_canvas_size, int y_canvas_size, - bool y_axis_reversed, const char* server_name) { - Initialize(name, x_pos, y_pos, x_size, y_size, x_canvas_size, y_canvas_size, - y_axis_reversed, server_name);} - -/// Calls Initialize with default argument for server_name_. -ScrollView::ScrollView(const char* name, int x_pos, int y_pos, int x_size, - int y_size, int x_canvas_size, int y_canvas_size, - bool y_axis_reversed) { - Initialize(name, x_pos, y_pos, x_size, y_size, x_canvas_size, y_canvas_size, - y_axis_reversed, "localhost"); -} - -/// Calls Initialize with default argument for server_name_ & y_axis_reversed. -ScrollView::ScrollView(const char* name, int x_pos, int y_pos, int x_size, - int y_size, int x_canvas_size, int y_canvas_size) { - Initialize(name, x_pos, y_pos, x_size, y_size, x_canvas_size, y_canvas_size, - false, "localhost"); -} - -/// Sets up a ScrollView window, depending on the constructor variables. -void ScrollView::Initialize(const char* name, int x_pos, int y_pos, int x_size, - int y_size, int x_canvas_size, int y_canvas_size, - bool y_axis_reversed, const char* server_name) { - // If this is the first ScrollView Window which gets created, there is no - // network connection yet and we have to set it up in a different thread. - if (stream_ == nullptr) { - nr_created_windows_ = 0; - stream_ = new SVNetwork(server_name, kSvPort); - waiting_for_events_mu = new SVMutex(); - svmap_mu = new SVMutex(); - SendRawMessage( - "svmain = luajava.bindClass('com.google.scrollview.ScrollView')\n"); - SVSync::StartThread(MessageReceiver, nullptr); - } - - // Set up the variables on the clientside. - nr_created_windows_++; - event_handler_ = nullptr; - event_handler_ended_ = false; - y_axis_is_reversed_ = y_axis_reversed; - y_size_ = y_canvas_size; - window_name_ = name; - window_id_ = nr_created_windows_; - // Set up polygon buffering. - points_ = new SVPolyLineBuffer; - points_->empty = true; - - svmap_mu->Lock(); - svmap[window_id_] = this; - svmap_mu->Unlock(); - - for (int i = 0; i < SVET_COUNT; i++) { - event_table_[i] = nullptr; - } - - mutex_ = new SVMutex(); - semaphore_ = new SVSemaphore(); - - // Set up an actual Window on the client side. - char message[kMaxMsgSize]; - snprintf(message, sizeof(message), - "w%u = luajava.newInstance('com.google.scrollview.ui" - ".SVWindow','%s',%u,%u,%u,%u,%u,%u,%u)\n", - window_id_, window_name_, window_id_, - x_pos, y_pos, x_size, y_size, x_canvas_size, y_canvas_size); - SendRawMessage(message); - - SVSync::StartThread(StartEventHandler, this); -} - -/// Sits and waits for events on this window. -void* ScrollView::StartEventHandler(void* a) { - ScrollView* sv = static_cast(a); - SVEvent* new_event; - - do { - stream_->Flush(); - sv->semaphore_->Wait(); - new_event = nullptr; - int serial = -1; - int k = -1; - sv->mutex_->Lock(); - // Check every table entry if he is is valid and not already processed. - - for (int i = 0; i < SVET_COUNT; i++) { - if (sv->event_table_[i] != nullptr && - (serial < 0 || sv->event_table_[i]->counter < serial)) { - new_event = sv->event_table_[i]; - serial = sv->event_table_[i]->counter; - k = i; - } - } - // If we didn't find anything we had an old alarm and just sleep again. - if (new_event != nullptr) { - sv->event_table_[k] = nullptr; - sv->mutex_->Unlock(); - if (sv->event_handler_ != nullptr) { sv->event_handler_->Notify(new_event); } - if (new_event->type == SVET_DESTROY) { - // Signal the destructor that it is safe to terminate. - sv->event_handler_ended_ = true; - sv = nullptr; - } - delete new_event; // Delete the pointer after it has been processed. - } else { sv->mutex_->Unlock(); } - // The thread should run as long as its associated window is alive. - } while (sv != nullptr); - return nullptr; -} -#endif // GRAPHICS_DISABLED - -ScrollView::~ScrollView() { - #ifndef GRAPHICS_DISABLED - svmap_mu->Lock(); - if (svmap[window_id_] != nullptr) { - svmap_mu->Unlock(); - // So the event handling thread can quit. - SendMsg("destroy()"); - - SVEvent* sve = AwaitEvent(SVET_DESTROY); - delete sve; - svmap_mu->Lock(); - svmap[window_id_] = nullptr; - svmap_mu->Unlock(); - // The event handler thread for this window *must* receive the - // destroy event and set its pointer to this to nullptr before we allow - // the destructor to exit. - while (!event_handler_ended_) - Update(); - } else { - svmap_mu->Unlock(); - } - delete mutex_; - delete semaphore_; - delete points_; - for (int i = 0; i < SVET_COUNT; i++) { - delete event_table_[i]; - } - #endif // GRAPHICS_DISABLED -} - -#ifndef GRAPHICS_DISABLED -/// Send a message to the server, attaching the window id. -void ScrollView::SendMsg(const char* format, ...) { - if (!points_->empty) - SendPolygon(); - va_list args; - char message[kMaxMsgSize]; - - va_start(args, format); // variable list - vsnprintf(message, kMaxMsgSize, format, args); - va_end(args); - - char form[kMaxMsgSize]; - snprintf(form, kMaxMsgSize, "w%u:%s\n", window_id_, message); - - stream_->Send(form); -} - -/// Send a message to the server without a -/// window id. Used for global events like exit(). -void ScrollView::SendRawMessage(const char* msg) { - stream_->Send(msg); -} - -/// Add an Event Listener to this ScrollView Window -void ScrollView::AddEventHandler(SVEventHandler* listener) { - event_handler_ = listener; -} - -void ScrollView::Signal() { - semaphore_->Signal(); -} - -void ScrollView::SetEvent(SVEvent* svevent) { -// Copy event - SVEvent* any = svevent->copy(); - SVEvent* specific = svevent->copy(); - any->counter = specific->counter + 1; - -// Place both events into the queue. - mutex_->Lock(); - // Delete the old objects.. - delete event_table_[specific->type]; - delete event_table_[SVET_ANY]; - // ...and put the new ones in the table. - event_table_[specific->type] = specific; - event_table_[SVET_ANY] = any; - mutex_->Unlock(); -} - - -/// Block until an event of the given type is received. -/// Note: The calling function is responsible for deleting the returned -/// SVEvent afterwards! -SVEvent* ScrollView::AwaitEvent(SVEventType type) { - // Initialize the waiting semaphore. - SVSemaphore* sem = new SVSemaphore(); - std::pair ea(this, type); - waiting_for_events_mu->Lock(); - waiting_for_events[ea] = std::pair (sem, (SVEvent*)nullptr); - waiting_for_events_mu->Unlock(); - // Wait on it, but first flush. - stream_->Flush(); - sem->Wait(); - // Process the event we got woken up for (its in waiting_for_events pair). - waiting_for_events_mu->Lock(); - SVEvent* ret = waiting_for_events[ea].second; - waiting_for_events.erase(ea); - delete sem; - waiting_for_events_mu->Unlock(); - return ret; -} - -// Block until any event on any window is received. -// No event is returned here! -SVEvent* ScrollView::AwaitEventAnyWindow() { - // Initialize the waiting semaphore. - SVSemaphore* sem = new SVSemaphore(); - std::pair ea((ScrollView*)nullptr, SVET_ANY); - waiting_for_events_mu->Lock(); - waiting_for_events[ea] = std::pair (sem, (SVEvent*)nullptr); - waiting_for_events_mu->Unlock(); - // Wait on it. - stream_->Flush(); - sem->Wait(); - // Process the event we got woken up for (its in waiting_for_events pair). - waiting_for_events_mu->Lock(); - SVEvent* ret = waiting_for_events[ea].second; - waiting_for_events.erase(ea); - waiting_for_events_mu->Unlock(); - return ret; -} - -// Send the current buffered polygon (if any) and clear it. -void ScrollView::SendPolygon() { - if (!points_->empty) { - points_->empty = true; // Allows us to use SendMsg. - int length = points_->xcoords.size(); - // length == 1 corresponds to 2 SetCursors in a row and only the - // last setCursor has any effect. - if (length == 2) { - // An isolated line! - SendMsg("drawLine(%d,%d,%d,%d)", - points_->xcoords[0], points_->ycoords[0], - points_->xcoords[1], points_->ycoords[1]); - } else if (length > 2) { - // A polyline. - SendMsg("createPolyline(%d)", length); - char coordpair[kMaxIntPairSize]; - std::string decimal_coords; - for (int i = 0; i < length; ++i) { - snprintf(coordpair, kMaxIntPairSize, "%d,%d,", - points_->xcoords[i], points_->ycoords[i]); - decimal_coords += coordpair; - } - decimal_coords += '\n'; - SendRawMessage(decimal_coords.c_str()); - SendMsg("drawPolyline()"); - } - points_->xcoords.clear(); - points_->ycoords.clear(); - } -} - - -/******************************************************************************* -* LUA "API" functions. -*******************************************************************************/ - -// Sets the position from which to draw to (x,y). -void ScrollView::SetCursor(int x, int y) { - SendPolygon(); - DrawTo(x, y); -} - -// Draws from the current position to (x,y) and sets the new position to it. -void ScrollView::DrawTo(int x, int y) { - points_->xcoords.push_back(x); - points_->ycoords.push_back(TranslateYCoordinate(y)); - points_->empty = false; -} - -// Draw a line using the current pen color. -void ScrollView::Line(int x1, int y1, int x2, int y2) { - if (!points_->xcoords.empty() && x1 == points_->xcoords.back() && - TranslateYCoordinate(y1) == points_->ycoords.back()) { - // We are already at x1, y1, so just draw to x2, y2. - DrawTo(x2, y2); - } else if (!points_->xcoords.empty() && x2 == points_->xcoords.back() && - TranslateYCoordinate(y2) == points_->ycoords.back()) { - // We are already at x2, y2, so just draw to x1, y1. - DrawTo(x1, y1); - } else { - // This is a new line. - SetCursor(x1, y1); - DrawTo(x2, y2); - } -} - -// Set the visibility of the window. -void ScrollView::SetVisible(bool visible) { - if (visible) { SendMsg("setVisible(true)"); - } else { SendMsg("setVisible(false)"); } -} - -// Set the alwaysOnTop flag. -void ScrollView::AlwaysOnTop(bool b) { - if (b) { SendMsg("setAlwaysOnTop(true)"); - } else { SendMsg("setAlwaysOnTop(false)"); } -} - -// Adds a message entry to the message box. -void ScrollView::AddMessage(const char* format, ...) { - va_list args; - char message[kMaxMsgSize]; - char form[kMaxMsgSize]; - - va_start(args, format); // variable list - vsnprintf(message, kMaxMsgSize, format, args); - va_end(args); - - snprintf(form, kMaxMsgSize, "w%u:%s", window_id_, message); - - char* esc = AddEscapeChars(form); - SendMsg("addMessage(\"%s\")", esc); - delete[] esc; -} - -// Set a messagebox. -void ScrollView::AddMessageBox() { - SendMsg("addMessageBox()"); -} - -// Exit the client completely (and notify the server of it). -void ScrollView::Exit() { - SendRawMessage("svmain:exit()"); - exit(0); -} - -// Clear the canvas. -void ScrollView::Clear() { - SendMsg("clear()"); -} - -// Set the stroke width. -void ScrollView::Stroke(float width) { - SendMsg("setStrokeWidth(%f)", width); -} - -// Draw a rectangle using the current pen color. -// The rectangle is filled with the current brush color. -void ScrollView::Rectangle(int x1, int y1, int x2, int y2) { - if (x1 == x2 && y1 == y2) - return; // Scrollviewer locks up. - SendMsg("drawRectangle(%d,%d,%d,%d)", - x1, TranslateYCoordinate(y1), x2, TranslateYCoordinate(y2)); -} - -// Draw an ellipse using the current pen color. -// The ellipse is filled with the current brush color. -void ScrollView::Ellipse(int x1, int y1, int width, int height) { - SendMsg("drawEllipse(%d,%d,%u,%u)", - x1, TranslateYCoordinate(y1), width, height); -} - -// Set the pen color to the given RGB values. -void ScrollView::Pen(int red, int green, int blue) { - SendMsg("pen(%d,%d,%d)", red, green, blue); -} - -// Set the pen color to the given RGB values. -void ScrollView::Pen(int red, int green, int blue, int alpha) { - SendMsg("pen(%d,%d,%d,%d)", red, green, blue, alpha); -} - -// Set the brush color to the given RGB values. -void ScrollView::Brush(int red, int green, int blue) { - SendMsg("brush(%d,%d,%d)", red, green, blue); -} - -// Set the brush color to the given RGB values. -void ScrollView::Brush(int red, int green, int blue, int alpha) { - SendMsg("brush(%d,%d,%d,%d)", red, green, blue, alpha); -} - -// Set the attributes for future Text(..) calls. -void ScrollView::TextAttributes(const char* font, int pixel_size, - bool bold, bool italic, bool underlined) { - const char* b; - const char* i; - const char* u; - - if (bold) { b = "true"; - } else { b = "false"; } - if (italic) { i = "true"; - } else { i = "false"; } - if (underlined) { u = "true"; - } else { u = "false"; } - SendMsg("textAttributes('%s',%u,%s,%s,%s)", font, pixel_size, - b, i, u); -} - -// Draw text at the given coordinates. -void ScrollView::Text(int x, int y, const char* mystring) { - SendMsg("drawText(%d,%d,'%s')", x, TranslateYCoordinate(y), mystring); -} - -// Open and draw an image given a name at (x,y). -void ScrollView::Image(const char* image, int x_pos, int y_pos) { - SendMsg("openImage('%s')", image); - SendMsg("drawImage('%s',%d,%d)", - image, x_pos, TranslateYCoordinate(y_pos)); -} - -// Add new checkboxmenuentry to menubar. -void ScrollView::MenuItem(const char* parent, const char* name, - int cmdEvent, bool flag) { - if (parent == nullptr) { parent = ""; } - if (flag) { SendMsg("addMenuBarItem('%s','%s',%d,true)", - parent, name, cmdEvent); - } else { SendMsg("addMenuBarItem('%s','%s',%d,false)", - parent, name, cmdEvent); } -} - -// Add new menuentry to menubar. -void ScrollView::MenuItem(const char* parent, const char* name, int cmdEvent) { - if (parent == nullptr) { parent = ""; } - SendMsg("addMenuBarItem('%s','%s',%d)", parent, name, cmdEvent); -} - -// Add new submenu to menubar. -void ScrollView::MenuItem(const char* parent, const char* name) { - if (parent == nullptr) { parent = ""; } - SendMsg("addMenuBarItem('%s','%s')", parent, name); -} - -// Add new submenu to popupmenu. -void ScrollView::PopupItem(const char* parent, const char* name) { - if (parent == nullptr) { parent = ""; } - SendMsg("addPopupMenuItem('%s','%s')", parent, name); -} - -// Add new submenuentry to popupmenu. -void ScrollView::PopupItem(const char* parent, const char* name, - int cmdEvent, const char* value, const char* desc) { - if (parent == nullptr) { parent = ""; } - char* esc = AddEscapeChars(value); - char* esc2 = AddEscapeChars(desc); - SendMsg("addPopupMenuItem('%s','%s',%d,'%s','%s')", parent, name, - cmdEvent, esc, esc2); - delete[] esc; - delete[] esc2; -} - -// Send an update message for a single window. -void ScrollView::UpdateWindow() { - SendMsg("update()"); -} - -// Note: this is an update to all windows -void ScrollView::Update() { - svmap_mu->Lock(); - for (std::map::iterator iter = svmap.begin(); - iter != svmap.end(); ++iter) { - if (iter->second != nullptr) - iter->second->UpdateWindow(); - } - svmap_mu->Unlock(); -} - -// Set the pen color, using an enum value (e.g. ScrollView::ORANGE) -void ScrollView::Pen(Color color) { - Pen(table_colors[color][0], table_colors[color][1], - table_colors[color][2], table_colors[color][3]); -} - -// Set the brush color, using an enum value (e.g. ScrollView::ORANGE) -void ScrollView::Brush(Color color) { - Brush(table_colors[color][0], - table_colors[color][1], - table_colors[color][2], - table_colors[color][3]); -} - -// Shows a modal Input Dialog which can return any kind of String -char* ScrollView::ShowInputDialog(const char* msg) { - SendMsg("showInputDialog(\"%s\")", msg); - SVEvent* ev; - // wait till an input event (all others are thrown away) - ev = AwaitEvent(SVET_INPUT); - char* p = new char[strlen(ev->parameter) + 1]; - strcpy(p, ev->parameter); - delete ev; - return p; -} - -// Shows a modal Yes/No Dialog which will return 'y' or 'n' -int ScrollView::ShowYesNoDialog(const char* msg) { - SendMsg("showYesNoDialog(\"%s\")", msg); - SVEvent* ev; - // Wait till an input event (all others are thrown away) - ev = AwaitEvent(SVET_INPUT); - int a = ev->parameter[0]; - delete ev; - return a; -} - -// Zoom the window to the rectangle given upper left corner and -// lower right corner. -void ScrollView::ZoomToRectangle(int x1, int y1, int x2, int y2) { - y1 = TranslateYCoordinate(y1); - y2 = TranslateYCoordinate(y2); - SendMsg("zoomRectangle(%d,%d,%d,%d)", - std::min(x1, x2), std::min(y1, y2), std::max(x1, x2), std::max(y1, y2)); -} - -// Send an image of type Pix. -void ScrollView::Image(struct Pix* image, int x_pos, int y_pos) { - l_uint8* data; - size_t size; - pixWriteMem(&data, &size, image, IFF_PNG); - int base64_len = (size + 2) / 3 * 4; - y_pos = TranslateYCoordinate(y_pos); - SendMsg("readImage(%d,%d,%d)", x_pos, y_pos, base64_len); - // Base64 encode the data. - const char kBase64Table[64] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', - 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', - 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', - 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', - 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', - 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '0', '1', '2', '3', - '4', '5', '6', '7', '8', '9', '+', '/', - }; - char* base64 = new char[base64_len + 1]; - memset(base64, '=', base64_len); - base64[base64_len] = '\0'; - int remainder = 0; - int bits_left = 0; - int code_len = 0; - for (size_t i = 0; i < size; ++i) { - int code = (data[i] >> (bits_left + 2)) | remainder; - base64[code_len++] = kBase64Table[code & 63]; - bits_left += 2; - remainder = data[i] << (6 - bits_left); - if (bits_left == 6) { - base64[code_len++] = kBase64Table[remainder & 63]; - bits_left = 0; - remainder = 0; - } - } - if (bits_left > 0) - base64[code_len++] = kBase64Table[remainder & 63]; - SendRawMessage(base64); - delete [] base64; - lept_free(data); -} - -// Escapes the ' character with a \, so it can be processed by LUA. -// Note: The caller will have to make sure he deletes the newly allocated item. -char* ScrollView::AddEscapeChars(const char* input) { - const char* nextptr = strchr(input, '\''); - const char* lastptr = input; - char* message = new char[kMaxMsgSize]; - int pos = 0; - while (nextptr != nullptr) { - strncpy(message+pos, lastptr, nextptr-lastptr); - pos += nextptr - lastptr; - message[pos] = '\\'; - pos += 1; - lastptr = nextptr; - nextptr = strchr(nextptr+1, '\''); - } - strcpy(message+pos, lastptr); - return message; -} - -// Inverse the Y axis if the coordinates are actually inversed. -int ScrollView::TranslateYCoordinate(int y) { - if (!y_axis_is_reversed_) { return y; - } else { return y_size_ - y; } -} - -#endif // GRAPHICS_DISABLED diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/scrollview.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/scrollview.h deleted file mode 100644 index d0d675c9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/scrollview.h +++ /dev/null @@ -1,418 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: scrollview.h -// Description: ScrollView -// Author: Joern Wanke -// Created: Thu Nov 29 2007 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// -// -// ScrollView is designed as an UI which can be run remotely. This is the -// client code for it, the server part is written in java. The client consists -// mainly of 2 parts: -// The "core" ScrollView which sets up the remote connection, -// takes care of event handling etc. -// The other part of ScrollView consists of predefined API calls through LUA, -// which can basically be used to get a zoomable canvas in which it is possible -// to draw lines, text etc. -// Technically, thanks to LUA, its even possible to bypass the here defined LUA -// API calls at all and generate a java user interface from scratch (or -// basically generate any kind of java program, possibly even dangerous ones). - -#ifndef TESSERACT_VIEWER_SCROLLVIEW_H_ -#define TESSERACT_VIEWER_SCROLLVIEW_H_ -// TODO(rays) Move ScrollView into the tesseract namespace. -#ifndef OCR_SCROLLVIEW_H__ - -#include - -class ScrollView; -class SVNetwork; -class SVMutex; -class SVSemaphore; -struct SVPolyLineBuffer; - -enum SVEventType { - SVET_DESTROY, // Window has been destroyed by user. - SVET_EXIT, // User has destroyed the last window by clicking on the 'X'. - SVET_CLICK, // Left button pressed. - SVET_SELECTION, // Left button selection. - SVET_INPUT, // There is some input (single key or a whole string). - SVET_MOUSE, // The mouse has moved with a button pressed. - SVET_MOTION, // The mouse has moved with no button pressed. - SVET_HOVER, // The mouse has stayed still for a second. - SVET_POPUP, // A command selected through a popup menu. - SVET_MENU, // A command selected through the menubar. - SVET_ANY, // Any of the above. - - SVET_COUNT // Array sizing. -}; - -struct SVEvent { - ~SVEvent() { delete [] parameter; } - SVEvent* copy(); - SVEventType type; // What kind of event. - ScrollView* window; // Window event relates to. - int x; // Coords of click or selection. - int y; - int x_size; // Size of selection. - int y_size; - int command_id; // The ID of the possibly associated event (e.g. MENU) - char* parameter; // Any string that might have been passed as argument. - int counter; // Used to detect which kind of event to process next. - - SVEvent() { - window = nullptr; - parameter = nullptr; - } - - SVEvent(const SVEvent&); - SVEvent& operator=(const SVEvent&); -}; - -// The SVEventHandler class is used for Event handling: If you register your -// class as SVEventHandler to a ScrollView Window, the SVEventHandler will be -// called whenever an appropriate event occurs. -class SVEventHandler { - public: - virtual ~SVEventHandler(); - -// Gets called by the SV Window. Does nothing on default, overwrite this -// to implement the desired behaviour - virtual void Notify(const SVEvent* sve) { (void)sve; } -}; - -// The ScrollView class provides the expernal API to the scrollviewer process. -// The scrollviewer process manages windows and displays images, graphics and -// text while allowing the user to zoom and scroll the windows arbitrarily. -// Each ScrollView class instance represents one window, and stuff is drawn in -// the window through method calls on the class. The constructor is used to -// create the class instance (and the window). - -class ScrollView { - public: -// Color enum for pens and brushes. - enum Color { - NONE, - BLACK, - WHITE, - RED, - YELLOW, - GREEN, - CYAN, - BLUE, - MAGENTA, - AQUAMARINE, - DARK_SLATE_BLUE, - LIGHT_BLUE, - MEDIUM_BLUE, - MIDNIGHT_BLUE, - NAVY_BLUE, - SKY_BLUE, - SLATE_BLUE, - STEEL_BLUE, - CORAL, - BROWN, - SANDY_BROWN, - GOLD, - GOLDENROD, - DARK_GREEN, - DARK_OLIVE_GREEN, - FOREST_GREEN, - LIME_GREEN, - PALE_GREEN, - YELLOW_GREEN, - LIGHT_GREY, - DARK_SLATE_GREY, - DIM_GREY, - GREY, - KHAKI, - MAROON, - ORANGE, - ORCHID, - PINK, - PLUM, - INDIAN_RED, - ORANGE_RED, - VIOLET_RED, - SALMON, - TAN, - TURQUOISE, - DARK_TURQUOISE, - VIOLET, - WHEAT, - GREEN_YELLOW // Make sure this one is last. -}; - - ~ScrollView(); - -#ifndef GRAPHICS_DISABLED - -// Create a window. The pixel size of the window may be 0,0, in which case -// a default size is selected based on the size of your canvas. -// The canvas may not be 0,0 in size! - ScrollView(const char* name, int x_pos, int y_pos, int x_size, int y_size, - int x_canvas_size, int y_canvas_size); -// With a flag whether the x axis is reversed. - ScrollView(const char* name, int x_pos, int y_pos, int x_size, int y_size, - int x_canvas_size, int y_canvas_size, bool y_axis_reversed); -// Connect to a server other than localhost. - ScrollView(const char* name, int x_pos, int y_pos, int x_size, int y_size, - int x_canvas_size, int y_canvas_size, bool y_axis_reversed, - const char* server_name); -/******************************************************************************* -* Event handling -* To register as listener, the class has to derive from the SVEventHandler -* class, which consists of a notifyMe(SVEvent*) function that should be -* overwritten to process the event the way you want. -*******************************************************************************/ - -// Add an Event Listener to this ScrollView Window. - void AddEventHandler(SVEventHandler* listener); - -// Block until an event of the given type is received. - SVEvent* AwaitEvent(SVEventType type); - -// Block until any event on any window is received. - SVEvent* AwaitEventAnyWindow(); - -/******************************************************************************* -* Getters and Setters -*******************************************************************************/ - -// Returns the title of the window. - const char* GetName() { return window_name_; } - -// Returns the unique ID of the window. - int GetId() { return window_id_; } - -/******************************************************************************* -* API functions for LUA calls -* the implementations for these can be found in svapi.cc -* (keep in mind that the window is actually created through the ScrollView -* constructor, so this is not listed here) -*******************************************************************************/ - -// Draw a Pix on (x,y). - void Image(struct Pix* image, int x_pos, int y_pos); - -// Flush buffers and update display. - static void Update(); - -// Exit the program. - static void Exit(); - -// Update the contents of a specific window. - void UpdateWindow(); - -// Erase all content from the window, but do not destroy it. - void Clear(); - -// Set pen color with an enum. - void Pen(Color color); - -// Set pen color to RGB (0-255). - void Pen(int red, int green, int blue); - -// Set pen color to RGBA (0-255). - void Pen(int red, int green, int blue, int alpha); - -// Set brush color with an enum. - void Brush(Color color); - -// Set brush color to RGB (0-255). - void Brush(int red, int green, int blue); - -// Set brush color to RGBA (0-255). - void Brush(int red, int green, int blue, int alpha); - -// Set attributes for future text, like font name (e.g. -// "Times New Roman"), font size etc.. -// Note: The underlined flag is currently not supported - void TextAttributes(const char* font, int pixel_size, - bool bold, bool italic, bool underlined); - -// Draw line from (x1,y1) to (x2,y2) with the current pencolor. - void Line(int x1, int y1, int x2, int y2); - -// Set the stroke width of the pen. - void Stroke(float width); - -// Draw a rectangle given upper left corner and lower right corner. -// The current pencolor is used as outline, the brushcolor to fill the shape. - void Rectangle(int x1, int y1, int x2, int y2); - -// Draw an ellipse centered on (x,y). -// The current pencolor is used as outline, the brushcolor to fill the shape. - void Ellipse(int x, int y, int width, int height); - -// Draw text with the current pencolor - void Text(int x, int y, const char* mystring); - -// Draw an image from a local filename. This should be faster than createImage. -// WARNING: This only works on a local machine. This also only works image -// types supported by java (like bmp,jpeg,gif,png) since the image is opened by -// the server. - void Image(const char* image, int x_pos, int y_pos); - -// Set the current position to draw from (x,y). In conjunction with... - void SetCursor(int x, int y); - -// ...this function, which draws a line from the current to (x,y) and then -// sets the new position to the new (x,y), this can be used to easily draw -// polygons using vertices - void DrawTo(int x, int y); - -// Set the SVWindow visible/invisible. - void SetVisible(bool visible); - -// Set the SVWindow always on top or not always on top. - void AlwaysOnTop(bool b); - -// Shows a modal dialog with "msg" as question and returns 'y' or 'n'. - int ShowYesNoDialog(const char* msg); - -// Shows a modal dialog with "msg" as question and returns a char* string. -// Constraint: As return, only words (e.g. no whitespaces etc.) are allowed. - char* ShowInputDialog(const char* msg); - -// Adds a messagebox to the SVWindow. This way, it can show the messages... - void AddMessageBox(); - -// ...which can be added by this command. -// This is intended as an "debug" output window. - void AddMessage(const char* format, ...); - -// Zoom the window to the rectangle given upper left corner and -// lower right corner. - void ZoomToRectangle(int x1, int y1, int x2, int y2); - -// Custom messages (manipulating java code directly) can be send through this. -// Send a message to the server and attach the Id of the corresponding window. -// Note: This should only be called if you are know what you are doing, since -// you are fiddling with the Java objects on the server directly. Calling -// this just for fun will likely break your application! -// It is public so you can actually take use of the LUA functionalities, but -// be careful! - void SendMsg(const char* msg, ...); - -// Custom messages (manipulating java code directly) can be send through this. -// Send a message to the server without adding the -// window id. Used for global events like Exit(). -// Note: This should only be called if you are know what you are doing, since -// you are fiddling with the Java objects on the server directly. Calling -// this just for fun will likely break your application! -// It is public so you can actually take use of the LUA functionalities, but -// be careful! - static void SendRawMessage(const char* msg); - -/******************************************************************************* -* Add new menu entries to parent. If parent is "", the entry gets added to the -* main menubar (toplevel). -*******************************************************************************/ -// This adds a new submenu to the menubar. - void MenuItem(const char* parent, const char* name); - -// This adds a new (normal) menu entry with an associated eventID, which should -// be unique among menubar eventIDs. - void MenuItem(const char* parent, const char* name, int cmdEvent); - - // This adds a new checkbox entry, which might initially be flagged. - void MenuItem(const char* parent, const char* name, - int cmdEvent, bool flagged); - -// This adds a new popup submenu to the popup menu. If parent is "", the entry -// gets added at "toplevel" popupmenu. - void PopupItem(const char* parent, const char* name); - -// This adds a new popup entry with the associated eventID, which should be -// unique among popup eventIDs. -// If value and desc are given, on a click the server will ask you to modify -// the value and return the new value. - void PopupItem(const char* parent, const char* name, - int cmdEvent, const char* value, const char* desc); - -// Returns the correct Y coordinate for a window, depending on whether it might -// have to be flipped (by ySize). - int TranslateYCoordinate(int y); - - private: -// Transfers a binary Image. - void TransferBinaryImage(struct Pix* image); -// Transfers a gray scale Image. - void TransferGrayImage(struct Pix* image); -// Transfers a 32-Bit Image. - void Transfer32bppImage(struct Pix* image); - -// Sets up ScrollView, depending on the variables from the constructor. - void Initialize(const char* name, int x_pos, int y_pos, int x_size, - int y_size, int x_canvas_size, int y_canvas_size, - bool y_axis_reversed, const char* server_name); - -// Send the current buffered polygon (if any) and clear it. - void SendPolygon(); - -// Start the message receiving thread. - static void* MessageReceiver(void* a); - -// Place an event into the event_table (synchronized). - void SetEvent(SVEvent* svevent); - -// Wake up the semaphore. - void Signal(); - -// Returns the unique, shared network stream. - static SVNetwork* GetStream() { return stream_; } - -// Starts a new event handler. Called whenever a new window is created. - static void* StartEventHandler(void* sv); - -// Escapes the ' character with a \, so it can be processed by LUA. - char* AddEscapeChars(const char* input); - - // The event handler for this window. - SVEventHandler* event_handler_; - // The name of the window. - const char* window_name_; - // The id of the window. - int window_id_; - // The points of the currently under-construction polyline. - SVPolyLineBuffer* points_; - // Whether the axis is reversed. - bool y_axis_is_reversed_; - // Set to true only after the event handler has terminated. - bool event_handler_ended_; - // If the y axis is reversed, flip all y values by ySize. - int y_size_; - // # of created windows (used to assign an id to each ScrollView* for svmap). - static int nr_created_windows_; - // Serial number of sent images to ensure that the viewer knows they - // are distinct. - static int image_index_; - - // The stream through which the c++ client is connected to the server. - static SVNetwork* stream_; - - // Table of all the currently queued events. - SVEvent* event_table_[SVET_COUNT]; - - // Mutex to access the event_table_ in a synchronized fashion. - SVMutex* mutex_; - - // Semaphore to the thread belonging to this window. - SVSemaphore* semaphore_; -#endif // GRAPHICS_DISABLED -}; - -#endif // OCR_SCROLLVIEW_H__ -#endif // TESSERACT_VIEWER_SCROLLVIEW_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/svmnode.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/svmnode.cpp deleted file mode 100644 index a8599f0e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/svmnode.cpp +++ /dev/null @@ -1,143 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: svmnode.cpp -// description_: ScrollView Menu Node -// Author: Joern Wanke -// Created: Thu Nov 29 2007 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// -// -// A SVMenuNode is an entity which contains the mapping from a menu entry on -// the server side to the corresponding associated commands on the client. -// It is designed to be a tree structure with a root node, which can then be -// used to generate the appropriate messages to the server to display the -// menu structure there. -// A SVMenuNode can both be used in the context_ of popup menus as well as -// menu bars. - -#include -#include - -#include "svmnode.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#ifndef GRAPHICS_DISABLED - -#include "scrollview.h" - -// Create the empty root menu node. with just a caption. All other nodes should -// be added to this or one of the submenus. -SVMenuNode::SVMenuNode() { - cmd_event_ = -1; - child_ = nullptr; - next_ = nullptr; - parent_ = nullptr; - toggle_value_ = false; - is_check_box_entry_ = false; -} - -SVMenuNode::~SVMenuNode() { -} - -// Create a new sub menu node with just a caption. This is used to create -// nodes which act as parent nodes to other nodes (e.g. submenus). -SVMenuNode* SVMenuNode::AddChild(const char* txt) { - SVMenuNode* s = new SVMenuNode(-1, txt, false, false, nullptr, nullptr); - this->AddChild(s); - return s; -} - -// Create a "normal" menu node which is associated with a command event. -void SVMenuNode::AddChild(const char* txt, int command_event) { - this->AddChild(new SVMenuNode(command_event, txt, false, false, nullptr, nullptr)); -} - -// Create a menu node with an associated value (which might be changed -// through the gui). -void SVMenuNode::AddChild(const char* txt, int command_event, - const char* val) { - this->AddChild(new SVMenuNode(command_event, txt, false, false, val, nullptr)); -} - -// Create a menu node with an associated value and description_. -void SVMenuNode::AddChild(const char* txt, int command_event, const char* val, - const char* desc) { - this->AddChild(new SVMenuNode(command_event, txt, false, false, val, desc)); -} - -// Create a flag menu node. -void SVMenuNode::AddChild(const char* txt, int command_event, int tv) { - this->AddChild(new SVMenuNode(command_event, txt, tv, true, nullptr, nullptr)); -} - -// Convenience function called from the different constructors to initialize -// the different values of the menu node. -SVMenuNode::SVMenuNode(int command_event, const char* txt, - int tv, bool check_box_entry, const char* val, - const char* desc) - : text_(txt), value_(val), description_(desc) { - cmd_event_ = command_event; - - child_ = nullptr; - next_ = nullptr; - parent_ = nullptr; - toggle_value_ = tv != 0; - is_check_box_entry_ = check_box_entry; -} - -// Add a child node to this menu node. -void SVMenuNode::AddChild(SVMenuNode* svmn) { - svmn->parent_ = this; - // No children yet. - if (child_ == nullptr) { - child_ = svmn; - } else { - SVMenuNode* cur = child_; - while (cur->next_ != nullptr) { cur = cur->next_; } - cur->next_ = svmn; - } -} - -// Build a menu structure for the server and send the necessary messages. -// Should be called on the root node. If menu_bar is true, a menu_bar menu -// is built (e.g. on top of the window), if it is false a popup menu is -// built which gets shown by right clicking on the window. -// Deletes itself afterwards. -void SVMenuNode::BuildMenu(ScrollView* sv, bool menu_bar) { - if ((parent_ != nullptr) && (menu_bar)) { - if (is_check_box_entry_) { - sv->MenuItem(parent_->text_.string(), text_.string(), cmd_event_, - toggle_value_); - } else { - sv->MenuItem(parent_->text_.string(), text_.string(), cmd_event_); } - } else if ((parent_ != nullptr) && (!menu_bar)) { - if (description_.length() > 0) { - sv->PopupItem(parent_->text_.string(), text_.string(), cmd_event_, - value_.string(), description_.string()); - } else { - sv->PopupItem(parent_->text_.string(), text_.string()); - } - } - if (child_ != nullptr) { - child_->BuildMenu(sv, menu_bar); delete child_; - } - if (next_ != nullptr) { - next_->BuildMenu(sv, menu_bar); delete next_; - } -} - -#endif // GRAPHICS_DISABLED diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/svmnode.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/svmnode.h deleted file mode 100644 index 326a88a1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/svmnode.h +++ /dev/null @@ -1,96 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: svmnode.h -// description_: ScrollView Menu Node -// Author: Joern Wanke -// Created: Thu Nov 29 2007 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// -// -// A SVMenuNode is an entity which contains the mapping from a menu entry on -// the server side to the corresponding associated commands on the client. -// It is designed to be a tree structure with a root node, which can then be -// used to generate the appropriate messages to the server to display the -// menu structure there. -// A SVMenuNode can both be used in the context_ of popup menus as well as -// menu bars. - -#ifndef TESSERACT_VIEWER_SVMNODE_H_ -#define TESSERACT_VIEWER_SVMNODE_H_ - -#include "strngs.h" - -class ScrollView; - -class SVMenuNode { - public: - // Creating the (empty) root menu node. - SVMenuNode(); - - // Destructor for every node. - ~SVMenuNode(); - - // Create a new sub menu node with just a caption. This is used to create - // nodes which act as parent nodes to other nodes (e.g. submenus). - SVMenuNode* AddChild(const char* txt); - - // Create a "normal" menu node which is associated with a command event. - void AddChild(const char* txt, int command_event); - - // Create a flag menu node. - void AddChild(const char* txt, int command_event, int tv); - - // Create a menu node with an associated value (which might be changed - // through the gui). - void AddChild(const char* txt, int command_event, const char* val); - - // Create a menu node with an associated value and description_. - void AddChild(const char* txt, int command_event, - const char* val, const char* desc); - - // Build a menu structure for the server and send the necessary messages. - // Should be called on the root node. If menu_bar is true, a menu_bar menu - // is built (e.g. on top of the window), if it is false a popup menu is - // built which gets shown by right clicking on the window. - void BuildMenu(ScrollView *sv, bool menu_bar = true); - - private: - // Constructor holding the actual node data. - SVMenuNode(int command_event, const char* txt, int tv, - bool check_box_entry, const char* val, const char* desc); - - // Adds a new menu node to the current node. - void AddChild(SVMenuNode* svmn); - - // The parent node of this node. - SVMenuNode* parent_; - // The first child of this node. - SVMenuNode* child_; - // The next "sibling" of this node (e.g. same parent). - SVMenuNode* next_; - // Whether this menu node actually is a flag. - bool is_check_box_entry_; - - // The command event associated with a specific menu node. Should be unique. - int cmd_event_; - // The caption associated with a specific menu node. - STRING text_; - // The value of the flag (if this menu node is a flag). - bool toggle_value_; - // The value of the menu node. (optional) - STRING value_; - // A description_ of the value. (optional) - STRING description_; -}; - -#endif // TESSERACT_VIEWER_SVMNODE_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/svpaint.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/svpaint.cpp deleted file mode 100644 index d8f3a53e..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/svpaint.cpp +++ /dev/null @@ -1,235 +0,0 @@ -// Copyright 2007 Google Inc. All Rights Reserved. -// -// Author: Joern Wanke -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Simple drawing program to illustrate ScrollView capabilities. -// -// Functionality: -// - The menubar is used to select from different sample styles of input. -// - With the RMB it is possible to change the RGB values in different -// popup menus. -// - A LMB click either draws point-to-point, point or text. -// - A LMB dragging either draws a line, a rectangle or ellipse. - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#ifndef GRAPHICS_DISABLED -#include "scrollview.h" -#include "svmnode.h" -#include -#include - -// The current color values we use, initially white (== ScrollView::WHITE). -static int rgb[3] = { 255, 255, 255 }; - -class SVPaint : public SVEventHandler { - public: - explicit SVPaint(const char* server_name); -// This is the main event handling function that we need to overwrite, defined -// in SVEventHandler. - void Notify(const SVEvent* sv_event); - private: -// The Handler take care of the SVET_POPUP, SVET_MENU, SVET_CLICK and -// SVET_SELECTION events. - void PopupHandler(const SVEvent* sv_event); - void MenuBarHandler(const SVEvent* sv_event); - void ClickHandler(const SVEvent* sv_event); - void SelectionHandler(const SVEvent* sv_event); - -// Convenience functions to build little menus. - SVMenuNode* BuildPopupMenu(); - SVMenuNode* BuildMenuBar(); - -// Our window. - ScrollView* window_; - -// The mode we are in when an SVET_CLICK or an SVET_SELECTION event occurs. - int click_mode_; - int drag_mode_; - -// In the point-to-point drawing mode, we need to set a start-point the first -// time we call it (e.g. call SetCursor). - bool has_start_point_; -}; - -// Build a sample popup menu. -SVMenuNode* SVPaint::BuildPopupMenu() { - SVMenuNode* root = new SVMenuNode(); // Empty root node - // Initial color is white, so we all values to 255. - root->AddChild("R", // Shown caption. - 1, // assoc. command_id. - "255", // initial value. - "Red Color Value?"); // Shown description. - root->AddChild("G", 2, "255", "Green Color Value?"); - root->AddChild("B", 3, "255", "Blue Color Value?"); - return root; -} - -// Build a sample menu bar. -SVMenuNode* SVPaint::BuildMenuBar() { - SVMenuNode* root = new SVMenuNode(); // Empty root node - - // Create some submenus and add them to the root. - SVMenuNode* click = root->AddChild("Clicking"); - SVMenuNode* drag = root->AddChild("Dragging"); - - // Put some nodes into the submenus. - click->AddChild("Point to Point Drawing", // Caption. - 1); // command_id. - click->AddChild("Point Drawing", 2); - click->AddChild("Text Drawing", 3); - drag->AddChild("Line Drawing", 4); - drag->AddChild("Rectangle Drawing", 5); - drag->AddChild("Ellipse Drawing", 6); - return root; -} - -// Takes care of the SVET_POPUP events. -// In our case, SVET_POPUP is used to set RGB values. -void SVPaint::PopupHandler(const SVEvent* sv_event) { - // Since we only have the RGB values as popup items, - // we take a shortcut to not bloat up code: - rgb[sv_event->command_id - 1] = atoi(sv_event->parameter); - window_->Pen(rgb[0], rgb[1], rgb[2]); -} - -// Takes care of the SVET_MENU events. -// In our case, we change either the click_mode_ (commands 1-3) -// or the drag_mode_ (commands 4-6). -void SVPaint::MenuBarHandler(const SVEvent* sv_event) { - if ((sv_event->command_id > 0) && (sv_event->command_id < 4)) { - click_mode_ = sv_event->command_id; - has_start_point_ = false; - } else { drag_mode_ = sv_event->command_id; } -} - -// Takes care of the SVET_CLICK events. -// Depending on the click_mode_ we are in, either do Point-to-Point drawing, -// point drawing, or draw text. -void SVPaint::ClickHandler(const SVEvent* sv_event) { - switch (click_mode_) { - case 1: //Point to Point - if (has_start_point_) { window_->DrawTo(sv_event->x, sv_event->y); - } else { - has_start_point_ = true; - window_->SetCursor(sv_event->x, sv_event->y); - } - break; - case 2: //Point Drawing..simulated by drawing a 1 pixel line. - window_->Line(sv_event->x, sv_event->y, sv_event->x, sv_event->y); - break; - case 3: //Text - // We show a modal input dialog on our window, then draw the input and - // finally delete the input pointer. - char* p = window_->ShowInputDialog("Text:"); - window_->Text(sv_event->x, sv_event->y, p); - delete [] p; - break; - } -} - -// Takes care of the SVET_SELECTION events. -// Depending on the drag_mode_ we are in, either draw a line, a rectangle or -// an ellipse. -void SVPaint::SelectionHandler(const SVEvent* sv_event) { - switch (drag_mode_) { - //FIXME inversed x_size, y_size - case 4: //Line - window_->Line(sv_event->x, sv_event->y, - sv_event->x - sv_event->x_size, - sv_event->y - sv_event->y_size); - break; - case 5: //Rectangle - window_->Rectangle(sv_event->x, sv_event->y, - sv_event->x - sv_event->x_size, - sv_event->y - sv_event->y_size); - break; - case 6: //Ellipse - window_->Ellipse(sv_event->x - sv_event->x_size, - sv_event->y - sv_event->y_size, - sv_event->x_size, sv_event->y_size); - break; - } -} - -// The event handling function from ScrollView which we have to overwrite. -// We handle CLICK, SELECTION, MENU and POPUP and throw away all other events. -void SVPaint::Notify(const SVEvent* sv_event) { - if (sv_event->type == SVET_CLICK) { ClickHandler(sv_event); } - else if (sv_event->type == SVET_SELECTION) { SelectionHandler(sv_event); } - else if (sv_event->type == SVET_MENU) { MenuBarHandler(sv_event); } - else if (sv_event->type == SVET_POPUP) { PopupHandler(sv_event); } - //throw other events away -} - -// Builds a new window, initializes the variables and event handler and builds -// the menu. -SVPaint::SVPaint(const char *server_name) { - window_ = new ScrollView("ScrollView Paint Example", // window caption - 0, 0, // x,y window position - 500, 500, // window size - 500, 500, // canvas size - false, // whether the Y axis is inversed. - // this is included due to legacy - // reasons for tesseract and enables - // us to have (0,0) as the LOWER left - // of the coordinate system. - server_name); // the server address. - - // Set the start modes to point-to-point and line drawing. - click_mode_ = 1; - drag_mode_ = 4; - has_start_point_ = false; - - // Bild our menus and add them to the window. The flag illustrates whether - // this is a menu bar. - SVMenuNode* popup_menu = BuildPopupMenu(); - popup_menu->BuildMenu(window_,false); - - SVMenuNode* bar_menu = BuildMenuBar(); - bar_menu->BuildMenu(window_,true); - - // Set the initial color values to White (could also be done by - // passing (rgb[0], rgb[1], rgb[2]). - window_->Pen(ScrollView::WHITE); - window_->Brush(ScrollView::WHITE); - - // Adds the event handler to the window. This actually ensures that Notify - // gets called when events occur. - window_->AddEventHandler(this); - - // Set the window visible (calling this is important to actually render - // everything. Without this call, the window would also be drawn, but the - // menu bars would be missing. - window_->SetVisible(true); - - // Rest this thread until its window is destroyed. - // Note that a special eventhandling thread was created when constructing - // the window. Due to this, the application will not deadlock here. - window_->AwaitEvent(SVET_DESTROY); - // We now have 3 Threads running: - // (1) The MessageReceiver thread which fetches messages and distributes them - // (2) The EventHandler thread which handles all events for window_ - // (3) The main thread which waits on window_ for a DESTROY event (blocked) -} - -// If a parameter is given, we try to connect to the given server. -// This enables us to test the remote capabilities of ScrollView. -int main(int argc, char** argv) { - const char* server_name; - if (argc > 1) { server_name = argv[1]; } else { server_name = "localhost"; } - SVPaint svp(server_name); -} -#endif // GRAPHICS_DISABLED diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/svutil.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/svutil.cpp deleted file mode 100644 index 1e80e6ad..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/svutil.cpp +++ /dev/null @@ -1,460 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: svutil.cpp -// Description: ScrollView Utilities -// Author: Joern Wanke -// Created: Thu Nov 29 2007 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// -// -// SVUtil contains the SVSync and SVNetwork classes, which are used for -// thread/process creation & synchronization and network connection. - -#include -#ifdef _WIN32 -#include -struct addrinfo { - struct sockaddr* ai_addr; - int ai_addrlen; - int ai_family; - int ai_socktype; - int ai_protocol; -}; -#else -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef __linux__ -#include -#endif -#include -#endif - -#include -#include -#include -#include -#include -#include - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "svutil.h" - -SVMutex::SVMutex() { -#ifdef _WIN32 - mutex_ = CreateMutex(0, FALSE, 0); -#else - pthread_mutex_init(&mutex_, nullptr); -#endif -} - -void SVMutex::Lock() { -#ifdef _WIN32 - WaitForSingleObject(mutex_, INFINITE); -#else - pthread_mutex_lock(&mutex_); -#endif -} - -void SVMutex::Unlock() { -#ifdef _WIN32 - ReleaseMutex(mutex_); -#else - pthread_mutex_unlock(&mutex_); -#endif -} - -// Create new thread. -void SVSync::StartThread(void* (*func)(void*), void* arg) { -#ifdef _WIN32 - LPTHREAD_START_ROUTINE f = (LPTHREAD_START_ROUTINE)func; - DWORD threadid; - HANDLE newthread = CreateThread(nullptr, // default security attributes - 0, // use default stack size - f, // thread function - arg, // argument to thread function - 0, // use default creation flags - &threadid); // returns the thread identifier -#else - pthread_t helper; - pthread_attr_t attr; - pthread_attr_init(&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); - pthread_create(&helper, &attr, func, arg); -#endif -} - -#ifndef GRAPHICS_DISABLED - -const int kMaxMsgSize = 4096; - -// Signals a thread to exit. -void SVSync::ExitThread() { -#ifdef _WIN32 - // ExitThread(0); -#else - pthread_exit(nullptr); -#endif -} - -// Starts a new process. -void SVSync::StartProcess(const char* executable, const char* args) { - std::string proc; - proc.append(executable); - proc.append(" "); - proc.append(args); - std::cout << "Starting " << proc << std::endl; -#ifdef _WIN32 - STARTUPINFO start_info; - PROCESS_INFORMATION proc_info; - GetStartupInfo(&start_info); - if (!CreateProcess(nullptr, const_cast(proc.c_str()), nullptr, nullptr, FALSE, - CREATE_NO_WINDOW | DETACHED_PROCESS, nullptr, nullptr, - &start_info, &proc_info)) - return; -#else - int pid = fork(); - if (pid != 0) { // The father process returns - } else { -#ifdef __linux__ - // Make sure the java process terminates on exit, since its - // broken socket detection seems to be useless. - prctl(PR_SET_PDEATHSIG, 2, 0, 0, 0); -#endif - char* mutable_args = strdup(args); - int argc = 1; - for (int i = 0; mutable_args[i]; ++i) { - if (mutable_args[i] == ' ') { - ++argc; - } - } - std::unique_ptr argv(new char*[argc + 2]); - argv[0] = strdup(executable); - argv[1] = mutable_args; - argc = 2; - bool inquote = false; - for (int i = 0; mutable_args[i]; ++i) { - if (!inquote && mutable_args[i] == ' ') { - mutable_args[i] = '\0'; - argv[argc++] = mutable_args + i + 1; - } else if (mutable_args[i] == '"') { - inquote = !inquote; - mutable_args[i] = ' '; - } - } - argv[argc] = nullptr; - execvp(executable, argv.get()); - free(argv[0]); - free(argv[1]); - } -#endif -} - -SVSemaphore::SVSemaphore() { -#ifdef _WIN32 - semaphore_ = CreateSemaphore(0, 0, 10, 0); -#elif defined(__APPLE__) - char name[50]; - snprintf(name, sizeof(name), "%ld", random()); - sem_unlink(name); - semaphore_ = sem_open(name, O_CREAT , S_IWUSR, 0); - if (semaphore_ == SEM_FAILED) { - perror("sem_open"); - } -#else - sem_init(&semaphore_, 0, 0); -#endif -} - -void SVSemaphore::Signal() { -#ifdef _WIN32 - ReleaseSemaphore(semaphore_, 1, nullptr); -#elif defined(__APPLE__) - sem_post(semaphore_); -#else - sem_post(&semaphore_); -#endif -} - -void SVSemaphore::Wait() { -#ifdef _WIN32 - WaitForSingleObject(semaphore_, INFINITE); -#elif defined(__APPLE__) - sem_wait(semaphore_); -#else - sem_wait(&semaphore_); -#endif -} - -// Place a message in the message buffer (and flush it). -void SVNetwork::Send(const char* msg) { - mutex_send_.Lock(); - msg_buffer_out_.append(msg); - mutex_send_.Unlock(); -} - -// Send the whole buffer. -void SVNetwork::Flush() { - mutex_send_.Lock(); - while (!msg_buffer_out_.empty()) { - int i = send(stream_, msg_buffer_out_.c_str(), msg_buffer_out_.length(), 0); - msg_buffer_out_.erase(0, i); - } - mutex_send_.Unlock(); -} - -// Receive a message from the server. -// This will always return one line of char* (denoted by \n). -char* SVNetwork::Receive() { - char* result = nullptr; -#if defined(_WIN32) || defined(__CYGWIN__) - if (has_content) { result = strtok (nullptr, "\n"); } -#else - if (buffer_ptr_ != nullptr) { result = strtok_r(nullptr, "\n", &buffer_ptr_); } -#endif - - // This means there is something left in the buffer and we return it. - if (result != nullptr) { return result; - // Otherwise, we read from the stream_. - } else { - buffer_ptr_ = nullptr; - has_content = false; - - // The timeout length is not really important since we are looping anyway - // until a new message is delivered. - struct timeval tv; - tv.tv_sec = 10; - tv.tv_usec = 0; - - // Set the flags to return when the stream_ is ready to be read. - fd_set readfds; - FD_ZERO(&readfds); - FD_SET(stream_, &readfds); - - int i = select(stream_+1, &readfds, nullptr, nullptr, &tv); - - // The stream_ died. - if (i == 0) { return nullptr; } - - // Read the message buffer. - i = recv(stream_, msg_buffer_in_, kMaxMsgSize, 0); - - // Server quit (0) or error (-1). - if (i <= 0) { return nullptr; } - msg_buffer_in_[i] = '\0'; - has_content = true; -#ifdef _WIN32 - return strtok(msg_buffer_in_, "\n"); -#else - // Setup a new string tokenizer. - return strtok_r(msg_buffer_in_, "\n", &buffer_ptr_); -#endif - } -} - -// Close the connection to the server. -void SVNetwork::Close() { -#ifdef _WIN32 - closesocket(stream_); -#else - close(stream_); -#endif - // Mark stream_ as invalid. - stream_ = -1; -} - - -// The program to invoke to start ScrollView -static const char* ScrollViewProg() { -#ifdef _WIN32 - const char* prog = "java -Xms512m -Xmx1024m"; -#else - const char* prog = "sh"; -#endif - return prog; -} - - -// The arguments to the program to invoke to start ScrollView -static std::string ScrollViewCommand(std::string scrollview_path) { - // The following ugly ifdef is to enable the output of the java runtime - // to be sent down a black hole on non-windows to ignore all the - // exceptions in piccolo. Ideally piccolo would be debugged to make - // this unnecessary. - // Also the path has to be separated by ; on windows and : otherwise. -#ifdef _WIN32 - const char cmd_template[] = "-Djava.library.path=%s -jar %s/ScrollView.jar"; - -#else - const char cmd_template[] = - "-c \"trap 'kill %%1' 0 1 2 ; java " - "-Xms1024m -Xmx2048m -jar %s/ScrollView.jar" - " & wait\""; -#endif - size_t cmdlen = sizeof(cmd_template) + 2 * scrollview_path.size() + 1; - std::vector cmd(cmdlen); - const char* sv_path = scrollview_path.c_str(); -#ifdef _WIN32 - snprintf(&cmd[0], cmdlen, cmd_template, sv_path, sv_path); -#else - snprintf(&cmd[0], cmdlen, cmd_template, sv_path); -#endif - std::string command(&cmd[0]); - return command; -} - - -// Platform-independent freeaddrinfo() -static void FreeAddrInfo(struct addrinfo* addr_info) { - #if defined(__linux__) - freeaddrinfo(addr_info); - #else - delete addr_info->ai_addr; - delete addr_info; - #endif -} - - -// Non-linux version of getaddrinfo() -#if !defined(__linux__) -static int GetAddrInfoNonLinux(const char* hostname, int port, - struct addrinfo** addr_info) { -// Get the host data depending on the OS. - struct sockaddr_in* address; - *addr_info = new struct addrinfo; - memset(*addr_info, 0, sizeof(struct addrinfo)); - address = new struct sockaddr_in; - memset(address, 0, sizeof(struct sockaddr_in)); - - (*addr_info)->ai_addr = (struct sockaddr*) address; - (*addr_info)->ai_addrlen = sizeof(struct sockaddr); - (*addr_info)->ai_family = AF_INET; - (*addr_info)->ai_socktype = SOCK_STREAM; - - struct hostent *name; -#ifdef _WIN32 - WSADATA wsaData; - WSAStartup(MAKEWORD(1, 1), &wsaData); - name = gethostbyname(hostname); -#else - name = gethostbyname(hostname); -#endif - - if (name == nullptr) { - FreeAddrInfo(*addr_info); - *addr_info = nullptr; - return -1; - } - - // Fill in the appropriate variables to be able to connect to the server. - address->sin_family = name->h_addrtype; - memcpy(&address->sin_addr.s_addr, name->h_addr_list[0], name->h_length); - address->sin_port = htons(port); - return 0; -} -#endif - - -// Platform independent version of getaddrinfo() -// Given a hostname:port, produce an addrinfo struct -static int GetAddrInfo(const char* hostname, int port, - struct addrinfo** address) { -#if defined(__linux__) - char port_str[40]; - snprintf(port_str, 40, "%d", port); - return getaddrinfo(hostname, port_str, nullptr, address); -#else - return GetAddrInfoNonLinux(hostname, port, address); -#endif -} - - -// Set up a connection to a ScrollView on hostname:port. -SVNetwork::SVNetwork(const char* hostname, int port) { - msg_buffer_in_ = new char[kMaxMsgSize + 1]; - msg_buffer_in_[0] = '\0'; - - has_content = false; - buffer_ptr_ = nullptr; - - struct addrinfo *addr_info = nullptr; - - if (GetAddrInfo(hostname, port, &addr_info) != 0) { - std::cerr << "Error resolving name for ScrollView host " - << std::string(hostname) << ":" << port << std::endl; - } - - stream_ = socket(addr_info->ai_family, addr_info->ai_socktype, - addr_info->ai_protocol); - - if (stream_ < 0) { - std::cerr << "Failed to open socket" << std::endl; - } else if (connect(stream_, addr_info->ai_addr, addr_info->ai_addrlen) < 0) { - // If server is not there, we will start a new server as local child process. - const char* scrollview_path = getenv("SCROLLVIEW_PATH"); - if (scrollview_path == nullptr) { -#ifdef SCROLLVIEW_PATH -#define _STR(a) #a -#define _XSTR(a) _STR(a) - scrollview_path = _XSTR(SCROLLVIEW_PATH); -#undef _XSTR -#undef _STR -#else - scrollview_path = "."; -#endif - } - const char *prog = ScrollViewProg(); - std::string command = ScrollViewCommand(scrollview_path); - SVSync::StartProcess(prog, command.c_str()); - - // Wait for server to show up. - // Note: There is no exception handling in case the server never turns up. - - Close(); - for (;;) { - stream_ = socket(addr_info->ai_family, addr_info->ai_socktype, - addr_info->ai_protocol); - if (stream_ >= 0) { - if (connect(stream_, addr_info->ai_addr, addr_info->ai_addrlen) == 0) { - break; - } - - Close(); - - std::cout << "ScrollView: Waiting for server...\n"; -#ifdef _WIN32 - Sleep(1000); -#else - sleep(1); -#endif - } - } - } - FreeAddrInfo(addr_info); -} - -SVNetwork::~SVNetwork() { - Close(); - delete[] msg_buffer_in_; -} - -#endif // GRAPHICS_DISABLED diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/svutil.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/svutil.h deleted file mode 100644 index 9484a5ae..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/viewer/svutil.h +++ /dev/null @@ -1,146 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: svutil.h -// Description: ScrollView Utilities -// Author: Joern Wanke -// Created: Thu Nov 29 2007 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// -// -// SVUtil contains the SVSync, SVSemaphore, SVMutex and SVNetwork -// classes, which are used for thread/process creation & synchronization -// and network connection. - -#ifndef TESSERACT_VIEWER_SVUTIL_H_ -#define TESSERACT_VIEWER_SVUTIL_H_ - -#ifdef _WIN32 -#include -#include "platform.h" -#else -#include -#include -#endif - -#include - -#ifndef MAX -#define MAX(a, b) ((a > b) ? a : b) -#endif - -#ifndef MIN -#define MIN(a, b) ((a < b) ? a : b) -#endif - -/// The SVSync class provides functionality for Thread & Process Creation -class SVSync { - public: - /// Create new thread. - static void StartThread(void *(*func)(void*), void* arg); - /// Signals a thread to exit. - static void ExitThread(); - /// Starts a new process. - static void StartProcess(const char* executable, const char* args); -}; - -/// A semaphore class which encapsulates the main signalling -/// and wait abilities of semaphores for windows and unix. -class SVSemaphore { - public: - /// Sets up a semaphore. - SVSemaphore(); - /// Signal a semaphore. - void Signal(); - /// Wait on a semaphore. - void Wait(); - private: -#ifdef _WIN32 - HANDLE semaphore_; -#elif defined(__APPLE__) - sem_t *semaphore_; -#else - sem_t semaphore_; -#endif -}; - -/// A mutex which encapsulates the main locking and unlocking -/// abilites of mutexes for windows and unix. -class SVMutex { - public: - /// Sets up a new mutex. - SVMutex(); - /// Locks on a mutex. - void Lock(); - /// Unlocks on a mutex. - void Unlock(); - private: -#ifdef _WIN32 - HANDLE mutex_; -#else - pthread_mutex_t mutex_; -#endif -}; - -// Auto-unlocking object that locks a mutex on construction and unlocks it -// on destruction. -class SVAutoLock { - public: - explicit SVAutoLock(SVMutex* mutex) : mutex_(mutex) { mutex->Lock(); } - ~SVAutoLock() { mutex_->Unlock(); } - - private: - SVMutex* mutex_; -}; - -/// The SVNetwork class takes care of the remote connection for ScrollView -/// This means setting up and maintaining a remote connection, sending and -/// receiving messages and closing the connection. -/// It is designed to work on both Linux and Windows. -class SVNetwork { - public: - /// Set up a connection to hostname on port. - SVNetwork(const char* hostname, int port); - - /// Destructor. - ~SVNetwork(); - - /// Put a message in the messagebuffer to the server and try to send it. - void Send(const char* msg); - - /// Receive a message from the server. - /// This will always return one line of char* (denoted by \n). - char* Receive(); - - /// Close the connection to the server. - void Close(); - - /// Flush the buffer. - void Flush(); - - private: - /// The mutex for access to Send() and Flush(). - SVMutex mutex_send_; - /// The actual stream_ to the server. - int stream_; - /// Stores the last received message-chunk from the server. - char* msg_buffer_in_; - - /// Stores the messages which are supposed to go out. - std::string msg_buffer_out_; - - bool has_content; // Win32 (strtok) - /// Where we are at in our msg_buffer_in_ - char* buffer_ptr_; // Unix (strtok_r) -}; - -#endif // TESSERACT_VIEWER_SVUTIL_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/vs2010/port/gettimeofday.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/vs2010/port/gettimeofday.cpp deleted file mode 100644 index 664ea730..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/vs2010/port/gettimeofday.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: gettimeofday.cpp -// Description: Implementation of gettimeofday based on leptonica -// Author: tomp2010, zdenop -// Created: Tue Feb 21 21:38:00 CET 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include -#include "gettimeofday.h" - -int gettimeofday(struct timeval *tp, struct timezone *tzp) { - l_int32 sec, usec; - if (tp == nullptr) - return -1; - - l_getCurrentTime(&sec, &usec); - tp->tv_sec = sec; - tp->tv_usec = usec; - return 0; -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/vs2010/port/gettimeofday.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/vs2010/port/gettimeofday.h deleted file mode 100644 index 9b44b3c3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/vs2010/port/gettimeofday.h +++ /dev/null @@ -1,36 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: gettimeofday.h -// Description: Header file for gettimeofday.cpp -// Author: tomp2010, zdenop -// Created: Tue Feb 21 21:38:00 CET 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef VS2008_PORT_GETTIMEOFDAY_H_ -#define VS2008_PORT_GETTIMEOFDAY_H_ - -#if WINAPI_FAMILY_APP -struct timeval { - long tv_sec; /* seconds */ - long tv_usec; /* and microseconds */ -}; -#elif defined (_WIN32) -#include // timeval is defined in here. -#endif - -typedef struct timezone tz; - -int gettimeofday(struct timeval * tp, struct timezone * tzp); - -#endif // VS2008_PORT_GETTIMEOFDAY_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/vs2010/tesseract/libtesseract.rc.in b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/vs2010/tesseract/libtesseract.rc.in deleted file mode 100644 index 85809cd9..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/vs2010/tesseract/libtesseract.rc.in +++ /dev/null @@ -1,100 +0,0 @@ -// Microsoft Visual C++ generated resource script. -// -#include "resource.h" - -#define APSTUDIO_READONLY_SYMBOLS -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 2 resource. -// -#include "windows.h" - -///////////////////////////////////////////////////////////////////////////// -#undef APSTUDIO_READONLY_SYMBOLS - -///////////////////////////////////////////////////////////////////////////// -// English (U.S.) resources - -#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) -#ifdef _WIN32 -LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US -#pragma code_page(1252) -#endif //_WIN32 - -#ifdef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// TEXTINCLUDE -// - -1 TEXTINCLUDE -BEGIN - "resource.h\0" -END - -2 TEXTINCLUDE -BEGIN - "#include ""afxres.h""\r\n" - "\0" -END - -3 TEXTINCLUDE -BEGIN - "\r\n" - "\0" -END - -#endif // APSTUDIO_INVOKED - - -///////////////////////////////////////////////////////////////////////////// -// -// Version -// - -VS_VERSION_INFO VERSIONINFO - FILEVERSION @GENERIC_MAJOR_VERSION@,@GENERIC_MINOR_VERSION@,@GENERIC_MICRO_VERSION@,0 - PRODUCTVERSION @GENERIC_MAJOR_VERSION@,@GENERIC_MINOR_VERSION@,@GENERIC_MICRO_VERSION@,0 - FILEFLAGSMASK 0x17L -#ifdef _DEBUG - FILEFLAGS 0x1L -#else - FILEFLAGS 0x0L -#endif - FILEOS 0x4L - FILETYPE 0x7L - FILESUBTYPE 0x0L -BEGIN - BLOCK "StringFileInfo" - BEGIN - BLOCK "040904b0" - BEGIN - VALUE "FileDescription", "Tesseract OCR library" - VALUE "FileVersion", "@GENERIC_MAJOR_VERSION@,@GENERIC_MINOR_VERSION@,@GENERIC_MICRO_VERSION@,0" - VALUE "InternalName", "libtesseract" - VALUE "LegalCopyright", "Copyright (C) 2018 Google, Inc. Licensed under the Apache License, Version 2.0" - VALUE "OriginalFilename", "tesseract@GENERIC_MAJOR_VERSION@@GENERIC_MINOR_VERSION@.dll" - VALUE "ProductName", "Tesseract OCR Library" - VALUE "ProductVersion", "@PACKAGE_VERSION@" - END - END - BLOCK "VarFileInfo" - BEGIN - VALUE "Translation", 0x409, 1200 - END -END - -#endif // English (U.S.) resources -///////////////////////////////////////////////////////////////////////////// - - - -#ifndef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 3 resource. -// - - -///////////////////////////////////////////////////////////////////////////// -#endif // not APSTUDIO_INVOKED diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/vs2010/tesseract/resource.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/vs2010/tesseract/resource.h deleted file mode 100644 index 393a9e00..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/vs2010/tesseract/resource.h +++ /dev/null @@ -1,23 +0,0 @@ -//{{NO_DEPENDENCIES}} -// Microsoft Visual C++ generated include file. -// Used by tesseract.rc -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Next default values for new objects -// -#ifdef APSTUDIO_INVOKED -#ifndef APSTUDIO_READONLY_SYMBOLS -#define _APS_NEXT_RESOURCE_VALUE 101 -#define _APS_NEXT_COMMAND_VALUE 40001 -#define _APS_NEXT_CONTROL_VALUE 1001 -#define _APS_NEXT_SYMED_VALUE 101 -#endif -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/vs2010/tesseract/tesseract.rc.in b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/vs2010/tesseract/tesseract.rc.in deleted file mode 100644 index b4d309a2..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/vs2010/tesseract/tesseract.rc.in +++ /dev/null @@ -1,100 +0,0 @@ -// Microsoft Visual C++ generated resource script. -// -#include "resource.h" - -#define APSTUDIO_READONLY_SYMBOLS -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 2 resource. -// -#include "windows.h" - -///////////////////////////////////////////////////////////////////////////// -#undef APSTUDIO_READONLY_SYMBOLS - -///////////////////////////////////////////////////////////////////////////// -// English (U.S.) resources - -#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) -#ifdef _WIN32 -LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US -#pragma code_page(1252) -#endif //_WIN32 - -#ifdef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// TEXTINCLUDE -// - -1 TEXTINCLUDE -BEGIN - "resource.h\0" -END - -2 TEXTINCLUDE -BEGIN - "#include ""afxres.h""\r\n" - "\0" -END - -3 TEXTINCLUDE -BEGIN - "\r\n" - "\0" -END - -#endif // APSTUDIO_INVOKED - - -///////////////////////////////////////////////////////////////////////////// -// -// Version -// - -VS_VERSION_INFO VERSIONINFO - FILEVERSION @GENERIC_MAJOR_VERSION@,@GENERIC_MINOR_VERSION@,@GENERIC_MICRO_VERSION@,0 - PRODUCTVERSION @GENERIC_MAJOR_VERSION@,@GENERIC_MINOR_VERSION@,@GENERIC_MICRO_VERSION@,0 - FILEFLAGSMASK 0x17L -#ifdef _DEBUG - FILEFLAGS 0x1L -#else - FILEFLAGS 0x0L -#endif - FILEOS 0x4L - FILETYPE 0x1L - FILESUBTYPE 0x0L -BEGIN - BLOCK "StringFileInfo" - BEGIN - BLOCK "040904b0" - BEGIN - VALUE "FileDescription", "Tesseract command-line OCR engine" - VALUE "FileVersion", "@GENERIC_MAJOR_VERSION@,@GENERIC_MINOR_VERSION@,@GENERIC_MICRO_VERSION@,0" - VALUE "InternalName", "tesseract" - VALUE "LegalCopyright", "Copyright (C) 2018 Google, Inc. Licensed under the Apache License, Version 2.0" - VALUE "OriginalFilename", "tesseract.exe" - VALUE "ProductName", "Tesseract-OCR" - VALUE "ProductVersion", "@PACKAGE_VERSION@" - END - END - BLOCK "VarFileInfo" - BEGIN - VALUE "Translation", 0x409, 1200 - END -END - -#endif // English (U.S.) resources -///////////////////////////////////////////////////////////////////////////// - - - -#ifndef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 3 resource. -// - - -///////////////////////////////////////////////////////////////////////////// -#endif // not APSTUDIO_INVOKED diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/associate.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/associate.cpp deleted file mode 100644 index fc58bf1d..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/associate.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: associate.cpp -// Description: Functions for scoring segmentation paths according to -// their character widths, gap widths and seam cuts. -// Author: Daria Antonova -// Created: Mon Mar 8 11:26:43 PDT 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - - -#include -#include - -#include "associate.h" -#include "normalis.h" -#include "pageres.h" - -namespace tesseract { - -const float AssociateUtils::kMaxFixedPitchCharAspectRatio = 2.0f; -const float AssociateUtils::kMinGap = 0.03f; - -void AssociateUtils::ComputeStats(int col, int row, - const AssociateStats *parent_stats, - int parent_path_length, - bool fixed_pitch, - float max_char_wh_ratio, - WERD_RES *word_res, - bool debug, - AssociateStats *stats) { - stats->Clear(); - - ASSERT_HOST(word_res != nullptr); - if (word_res->blob_widths.empty()) { - return; - } - if (debug) { - tprintf("AssociateUtils::ComputeStats() for col=%d, row=%d%s\n", - col, row, fixed_pitch ? " (fixed pitch)" : ""); - } - float normalizing_height = kBlnXHeight; - ROW* blob_row = word_res->blob_row; - // TODO(rays/daria) Can unicharset.script_has_xheight be useful here? - if (fixed_pitch && blob_row != nullptr) { - // For fixed pitch language like CJK, we use the full text height - // as the normalizing factor so we are not dependent on xheight - // calculation. - if (blob_row->body_size() > 0.0f) { - normalizing_height = word_res->denorm.y_scale() * blob_row->body_size(); - } else { - normalizing_height = word_res->denorm.y_scale() * - (blob_row->x_height() + blob_row->ascenders()); - } - if (debug) { - tprintf("normalizing height = %g (scale %g xheight %g ascenders %g)\n", - normalizing_height, word_res->denorm.y_scale(), - blob_row->x_height(), blob_row->ascenders()); - } - } - float wh_ratio = word_res->GetBlobsWidth(col, row) / normalizing_height; - if (wh_ratio > max_char_wh_ratio) stats->bad_shape = true; - // Compute the gap sum for this shape. If there are only negative or only - // positive gaps, record their sum in stats->gap_sum. However, if there is - // a mixture, record only the sum of the positive gaps. - // TODO(antonova): explain fragment. - int negative_gap_sum = 0; - for (int c = col; c < row; ++c) { - int gap = word_res->GetBlobsGap(c); - (gap > 0) ? stats->gap_sum += gap : negative_gap_sum += gap; - } - if (stats->gap_sum == 0) stats->gap_sum = negative_gap_sum; - if (debug) { - tprintf("wh_ratio=%g (max_char_wh_ratio=%g) gap_sum=%d %s\n", - wh_ratio, max_char_wh_ratio, stats->gap_sum, - stats->bad_shape ? "bad_shape" : ""); - } - // Compute shape_cost (for fixed pitch mode). - if (fixed_pitch) { - bool end_row = (row == (word_res->ratings->dimension() - 1)); - - // Ensure that the blob has gaps on the left and the right sides - // (except for beginning and ending punctuation) and that there is - // no cutting through ink at the blob boundaries. - if (col > 0) { - float left_gap = word_res->GetBlobsGap(col - 1) / normalizing_height; - SEAM *left_seam = word_res->seam_array[col - 1]; - if ((!end_row && left_gap < kMinGap) || left_seam->priority() > 0.0f) { - stats->bad_shape = true; - } - if (debug) { - tprintf("left_gap %g, left_seam %g %s\n", left_gap, - left_seam->priority(), stats->bad_shape ? "bad_shape" : ""); - } - } - float right_gap = 0.0f; - if (!end_row) { - right_gap = word_res->GetBlobsGap(row) / normalizing_height; - SEAM *right_seam = word_res->seam_array[row]; - if (right_gap < kMinGap || right_seam->priority() > 0.0f) { - stats->bad_shape = true; - if (right_gap < kMinGap) stats->bad_fixed_pitch_right_gap = true; - } - if (debug) { - tprintf("right_gap %g right_seam %g %s\n", right_gap, - right_seam->priority(), stats->bad_shape ? "bad_shape" : ""); - } - } - - // Impose additional segmentation penalties if blob widths or gaps - // distribution don't fit a fixed-pitch model. - // Since we only know the widths and gaps of the path explored so far, - // the means and variances are computed for the path so far (not - // considering characters to the right of the last character on the path). - stats->full_wh_ratio = wh_ratio + right_gap; - if (parent_stats != nullptr) { - stats->full_wh_ratio_total = - (parent_stats->full_wh_ratio_total + stats->full_wh_ratio); - float mean = - stats->full_wh_ratio_total / static_cast(parent_path_length+1); - stats->full_wh_ratio_var = - parent_stats->full_wh_ratio_var + pow(mean-stats->full_wh_ratio, 2); - } else { - stats->full_wh_ratio_total = stats->full_wh_ratio; - } - if (debug) { - tprintf("full_wh_ratio %g full_wh_ratio_total %g full_wh_ratio_var %g\n", - stats->full_wh_ratio, stats->full_wh_ratio_total, - stats->full_wh_ratio_var); - } - - stats->shape_cost = - FixedPitchWidthCost(wh_ratio, right_gap, end_row, max_char_wh_ratio); - - // For some reason Tesseract prefers to treat the whole CJ words - // as one blob when the initial segmentation is particularly bad. - // This hack is to avoid favoring such states. - if (col == 0 && end_row && wh_ratio > max_char_wh_ratio) { - stats->shape_cost += 10; - } - stats->shape_cost += stats->full_wh_ratio_var; - if (debug) tprintf("shape_cost %g\n", stats->shape_cost); - } -} - -float AssociateUtils::FixedPitchWidthCost(float norm_width, - float right_gap, - bool end_pos, - float max_char_wh_ratio) { - float cost = 0.0f; - if (norm_width > max_char_wh_ratio) cost += norm_width; - if (norm_width > kMaxFixedPitchCharAspectRatio) - cost += norm_width * norm_width; // extra penalty for merging CJK chars - // Penalize skinny blobs, except for punctuation in the last position. - if (norm_width+right_gap < 0.5f && !end_pos) { - cost += 1.0f - (norm_width + right_gap); - } - return cost; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/associate.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/associate.h deleted file mode 100644 index 12f19c74..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/associate.h +++ /dev/null @@ -1,123 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: associate.h -// Description: Structs, classes, typedefs useful for the segmentation -// search. Functions for scoring segmentation paths according -// to their character widths, gap widths and seam cuts. -// Author: Daria Antonova -// Created: Mon Mar 8 11:26:43 PDT 2010 -// -// (C) Copyright 2010, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef ASSOCIATE_H -#define ASSOCIATE_H - -#include "blobs.h" -#include "elst.h" -#include "ratngs.h" -#include "seam.h" -#include "split.h" - -class WERD_RES; - -namespace tesseract { - -// Statisitcs about character widths, gaps and seams. -struct AssociateStats { - AssociateStats() { Clear(); } - - void Clear() { - shape_cost = 0.0f; - bad_shape = false; - full_wh_ratio = 0.0f; - full_wh_ratio_total = 0.0f; - full_wh_ratio_var = 0.0f; - bad_fixed_pitch_right_gap = false; - bad_fixed_pitch_wh_ratio = false; - gap_sum = 0; - } - - void Print() { tprintf("AssociateStats: s(%g %d)\n", shape_cost, bad_shape); } - - float shape_cost; // cost of blob shape - bool bad_shape; // true if the shape of the blob is unacceptable - float full_wh_ratio; // width-to-hight ratio + gap on the right - float full_wh_ratio_total; // sum of width-to-hight ratios - // on the path terminating at this blob - float full_wh_ratio_var; // variance of full_wh_ratios on the path - bool bad_fixed_pitch_right_gap; // true if there is no gap before - // the blob on the right - bool bad_fixed_pitch_wh_ratio; // true if the blobs has width-to-hight - // ratio > kMaxFixedPitchCharAspectRatio - int gap_sum; // sum of gaps within the blob -}; - -// Utility functions for scoring segmentation paths according to their -// character widths, gap widths, seam characteristics. -class AssociateUtils { - public: - static const float kMaxFixedPitchCharAspectRatio; - static const float kMinGap; - - // Returns outline length of the given blob is computed as: - // rating_cert_scale * rating / certainty - // Since from Wordrec::SegSearch() in segsearch.cpp - // rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale - // And from Classify::ConvertMatchesToChoices() in adaptmatch.cpp - // Rating = Certainty = next.rating - // Rating *= rating_scale * Results->BlobLength - // Certainty *= -(getDict().certainty_scale) - static inline float ComputeOutlineLength(float rating_cert_scale, - const BLOB_CHOICE &b) { - return rating_cert_scale * b.rating() / b.certainty(); - } - static inline float ComputeRating(float rating_cert_scale, - float cert, int width) { - return static_cast(width) * cert / rating_cert_scale; - } - - // Computes character widths, gaps and seams stats given the - // AssociateStats of the path so far, col, row of the blob that - // is being added to the path, and WERD_RES containing information - // about character widths, gaps and seams. - // Fills associate_cost with the combined shape, gap and seam cost - // of adding a unichar from (col, row) to the path (note that since - // this function could be used to compute the prioritization for - // pain points, (col, row) entry might not be classified yet; thus - // information in the (col, row) entry of the ratings matrix is not used). - // - // Note: the function assumes that word_res, stats and - // associate_cost pointers are not nullptr. - static void ComputeStats(int col, int row, - const AssociateStats *parent_stats, - int parent_path_length, - bool fixed_pitch, - float max_char_wh_ratio, - WERD_RES *word_res, - bool debug, - AssociateStats *stats); - - // Returns the width cost for fixed-pitch text. - static float FixedPitchWidthCost(float norm_width, float right_gap, - bool end_pos, float max_char_wh_ratio); - - // Returns the gap cost for fixed-pitch text (penalizes vertically - // overlapping components). - static inline float FixedPitchGapCost(float norm_gap, bool end_pos) { - return (norm_gap < 0.05 && !end_pos) ? 5.0f : 0.0f; - } -}; - -} // namespace tesseract - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/chop.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/chop.cpp deleted file mode 100644 index c15b9a40..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/chop.cpp +++ /dev/null @@ -1,312 +0,0 @@ -/* -*-C-*- - ****************************************************************************** - * - * File: chop.cpp (Formerly chop.c) - * Description: - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Tue Jul 30 16:41:11 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *****************************************************************************/ - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ - -#include "chop.h" -#include "outlines.h" -#include "callcpp.h" -#include "plotedges.h" -#include "wordrec.h" - -#include - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -namespace tesseract { -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -/** - * @name point_priority - * - * Assign a priority to and edge point that might be used as part of a - * split. The argument should be of type EDGEPT. - */ -PRIORITY Wordrec::point_priority(EDGEPT *point) { - return (PRIORITY)angle_change(point->prev, point, point->next); -} - - -/** - * @name add_point_to_list - * - * Add an edge point to a POINT_GROUP containing a list of other points. - */ -void Wordrec::add_point_to_list(PointHeap* point_heap, EDGEPT *point) { - if (point_heap->size() < MAX_NUM_POINTS - 2) { - PointPair pair(point_priority(point), point); - point_heap->Push(&pair); - } - -#ifndef GRAPHICS_DISABLED - if (chop_debug > 2) - mark_outline(point); -#endif -} - -// Returns true if the edgept supplied as input is an inside angle. This -// is determined by the angular change of the vectors from point to point. -bool Wordrec::is_inside_angle(EDGEPT *pt) { - return angle_change(pt->prev, pt, pt->next) < chop_inside_angle; -} - -/** - * @name angle_change - * - * Return the change in angle (degrees) of the line segments between - * points one and two, and two and three. - */ -int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) { - VECTOR vector1; - VECTOR vector2; - - int angle; - - /* Compute angle */ - vector1.x = point2->pos.x - point1->pos.x; - vector1.y = point2->pos.y - point1->pos.y; - vector2.x = point3->pos.x - point2->pos.x; - vector2.y = point3->pos.y - point2->pos.y; - /* Use cross product */ - float length = std::sqrt(static_cast(LENGTH(vector1)) * LENGTH(vector2)); - if ((int) length == 0) - return (0); - angle = static_cast(floor(asin(CROSS (vector1, vector2) / - length) / M_PI * 180.0 + 0.5)); - - /* Use dot product */ - if (SCALAR (vector1, vector2) < 0) - angle = 180 - angle; - /* Adjust angle */ - if (angle > 180) - angle -= 360; - if (angle <= -180) - angle += 360; - return (angle); -} - -/** - * @name pick_close_point - * - * Choose the edge point that is closest to the critical point. This - * point may not be exactly vertical from the critical point. - */ -EDGEPT *Wordrec::pick_close_point(EDGEPT *critical_point, - EDGEPT *vertical_point, - int *best_dist) { - EDGEPT *best_point = nullptr; - int this_distance; - int found_better; - - do { - found_better = FALSE; - - this_distance = edgept_dist (critical_point, vertical_point); - if (this_distance <= *best_dist) { - - if (!(same_point (critical_point->pos, vertical_point->pos) || - same_point (critical_point->pos, vertical_point->next->pos) || - (best_point && same_point (best_point->pos, vertical_point->pos)) || - is_exterior_point (critical_point, vertical_point))) { - *best_dist = this_distance; - best_point = vertical_point; - if (chop_vertical_creep) - found_better = TRUE; - } - } - vertical_point = vertical_point->next; - } - while (found_better == TRUE); - - return (best_point); -} - - -/** - * @name prioritize_points - * - * Find a list of edge points from the outer outline of this blob. For - * each of these points assign a priority. Sort these points using a - * heap structure so that they can be visited in order. - */ -void Wordrec::prioritize_points(TESSLINE *outline, PointHeap* points) { - EDGEPT *this_point; - EDGEPT *local_min = nullptr; - EDGEPT *local_max = nullptr; - - this_point = outline->loop; - local_min = this_point; - local_max = this_point; - do { - if (this_point->vec.y < 0) { - /* Look for minima */ - if (local_max != nullptr) - new_max_point(local_max, points); - else if (is_inside_angle (this_point)) - add_point_to_list(points, this_point); - local_max = nullptr; - local_min = this_point->next; - } - else if (this_point->vec.y > 0) { - /* Look for maxima */ - if (local_min != nullptr) - new_min_point(local_min, points); - else if (is_inside_angle (this_point)) - add_point_to_list(points, this_point); - local_min = nullptr; - local_max = this_point->next; - } - else { - /* Flat area */ - if (local_max != nullptr) { - if (local_max->prev->vec.y != 0) { - new_max_point(local_max, points); - } - local_max = this_point->next; - local_min = nullptr; - } - else { - if (local_min->prev->vec.y != 0) { - new_min_point(local_min, points); - } - local_min = this_point->next; - local_max = nullptr; - } - } - - /* Next point */ - this_point = this_point->next; - } - while (this_point != outline->loop); -} - - -/** - * @name new_min_point - * - * Found a new minimum point try to decide whether to save it or not. - * Return the new value for the local minimum. If a point is saved then - * the local minimum is reset to nullptr. - */ -void Wordrec::new_min_point(EDGEPT *local_min, PointHeap* points) { - int16_t dir; - - dir = direction (local_min); - - if (dir < 0) { - add_point_to_list(points, local_min); - return; - } - - if (dir == 0 && point_priority (local_min) < 0) { - add_point_to_list(points, local_min); - return; - } -} - - -/** - * @name new_max_point - * - * Found a new minimum point try to decide whether to save it or not. - * Return the new value for the local minimum. If a point is saved then - * the local minimum is reset to nullptr. - */ -void Wordrec::new_max_point(EDGEPT *local_max, PointHeap* points) { - int16_t dir; - - dir = direction (local_max); - - if (dir > 0) { - add_point_to_list(points, local_max); - return; - } - - if (dir == 0 && point_priority (local_max) < 0) { - add_point_to_list(points, local_max); - return; - } -} - - -/** - * @name vertical_projection_point - * - * For one point on the outline, find the corresponding point on the - * other side of the outline that is a likely projection for a split - * point. This is done by iterating through the edge points until the - * X value of the point being looked at is greater than the X value of - * the split point. Ensure that the point being returned is not right - * next to the split point. Return the edge point in *best_point as - * a result, and any points that were newly created are also saved on - * the new_points list. - */ -void Wordrec::vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, - EDGEPT** best_point, - EDGEPT_CLIST *new_points) { - EDGEPT *p; /* Iterator */ - EDGEPT *this_edgept; /* Iterator */ - EDGEPT_C_IT new_point_it(new_points); - int x = split_point->pos.x; /* X value of vertical */ - int best_dist = LARGE_DISTANCE;/* Best point found */ - - if (*best_point != nullptr) - best_dist = edgept_dist(split_point, *best_point); - - p = target_point; - /* Look at each edge point */ - do { - if (((p->pos.x <= x && x <= p->next->pos.x) || - (p->next->pos.x <= x && x <= p->pos.x)) && - !same_point(split_point->pos, p->pos) && - !same_point(split_point->pos, p->next->pos) && - !p->IsChopPt() && - (*best_point == nullptr || !same_point((*best_point)->pos, p->pos))) { - - if (near_point(split_point, p, p->next, &this_edgept)) { - new_point_it.add_before_then_move(this_edgept); - } - - if (*best_point == nullptr) - best_dist = edgept_dist (split_point, this_edgept); - - this_edgept = - pick_close_point(split_point, this_edgept, &best_dist); - if (this_edgept) - *best_point = this_edgept; - } - - p = p->next; - } - while (p != target_point); -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/chop.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/chop.h deleted file mode 100644 index 76eab99f..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/chop.h +++ /dev/null @@ -1,44 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: chop.h (Formerly chop.h) - * Description: - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Wed Jul 10 14:47:37 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *******************************************************************************/ - -#ifndef CHOP_H -#define CHOP_H - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "genericheap.h" -#include "kdpair.h" -#include "seam.h" - -/*---------------------------------------------------------------------- - T y p e s ----------------------------------------------------------------------*/ -#define MAX_NUM_POINTS 50 -// The PointPair elements do NOT own the EDGEPTs. -using PointPair = tesseract::KDPairInc; -using PointHeap = tesseract::GenericHeap; - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/chopper.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/chopper.cpp deleted file mode 100644 index f7816f71..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/chopper.cpp +++ /dev/null @@ -1,661 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: chopper.cpp (Formerly chopper.c) - * Description: - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Tue Jul 30 16:18:52 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **************************************************************************/ - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ - -#include "chopper.h" -#include "blamer.h" // for BlamerBundle, IRR_CORRECT -#include "blobs.h" // for TPOINT, TBLOB, EDGEPT, TESSLINE, divisible_blob -#include "callcpp.h" // for Red -#include "dict.h" // for Dict -#include "host.h" // for FALSE, TRUE -#include "lm_pain_points.h" // for LMPainPoints -#include "lm_state.h" // for BestChoiceBundle -#include "matrix.h" // for MATRIX -#include "normalis.h" // for DENORM -#include "pageres.h" // for WERD_RES -#include "params.h" // for IntParam, BoolParam -#include "ratngs.h" // for BLOB_CHOICE (ptr only), BLOB_CHOICE_LIST (ptr ... -#include "rect.h" // for TBOX -#include "render.h" // for display_blob -#include "seam.h" // for SEAM -#include "split.h" // for remove_edgept -#include "stopper.h" // for DANGERR -#include "tprintf.h" // for tprintf -#include "wordrec.h" // for Wordrec, SegSearchPending (ptr only) - -class CHAR_FRAGMENT; - -template class GenericVector; - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -// Even though the limit on the number of chunks may now be removed, keep -// the same limit for repeatable behavior, and it may be a speed advantage. -static const int kMaxNumChunks = 64; - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -/** - * @name preserve_outline_tree - * - * Copy the list of outlines. - */ -void preserve_outline(EDGEPT *start) { - EDGEPT *srcpt; - - if (start == nullptr) - return; - srcpt = start; - do { - srcpt->flags[1] = 1; - srcpt = srcpt->next; - } - while (srcpt != start); - srcpt->flags[1] = 2; -} - - -/**************************************************************************/ -void preserve_outline_tree(TESSLINE *srcline) { - TESSLINE *outline; - - for (outline = srcline; outline != nullptr; outline = outline->next) { - preserve_outline (outline->loop); - } -} - - -/** - * @name restore_outline_tree - * - * Copy the list of outlines. - */ -EDGEPT *restore_outline(EDGEPT *start) { - EDGEPT *srcpt; - EDGEPT *real_start; - - if (start == nullptr) - return nullptr; - srcpt = start; - do { - if (srcpt->flags[1] == 2) - break; - srcpt = srcpt->next; - } - while (srcpt != start); - real_start = srcpt; - do { - srcpt = srcpt->next; - if (srcpt->prev->flags[1] == 0) { - remove_edgept(srcpt->prev); - } - } - while (srcpt != real_start); - return real_start; -} - - -/******************************************************************************/ -void restore_outline_tree(TESSLINE *srcline) { - TESSLINE *outline; - - for (outline = srcline; outline != nullptr; outline = outline->next) { - outline->loop = restore_outline (outline->loop); - outline->start = outline->loop->pos; - } -} - -// Helper runs all the checks on a seam to make sure it is valid. -// Returns the seam if OK, otherwise deletes the seam and returns nullptr. -static SEAM* CheckSeam(int debug_level, int32_t blob_number, TWERD* word, - TBLOB* blob, TBLOB* other_blob, - const GenericVector& seams, SEAM* seam) { - if (seam == nullptr || blob->outlines == nullptr || other_blob->outlines == nullptr || - total_containment(blob, other_blob) || check_blob(other_blob) || - !seam->ContainedByBlob(*blob) || !seam->ContainedByBlob(*other_blob) || - any_shared_split_points(seams, seam) || - !seam->PrepareToInsertSeam(seams, word->blobs, blob_number, false)) { - word->blobs.remove(blob_number + 1); - if (seam) { - seam->UndoSeam(blob, other_blob); - delete seam; - seam = nullptr; -#ifndef GRAPHICS_DISABLED - if (debug_level) { - if (debug_level >2) - display_blob(blob, Red); - tprintf("\n** seam being removed ** \n"); - } -#endif - } else { - delete other_blob; - } - return nullptr; - } - return seam; -} - - -/** - * @name attempt_blob_chop - * - * Try to split the this blob after this one. Check to make sure that - * it was successful. - */ -namespace tesseract { -SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, - bool italic_blob, - const GenericVector& seams) { - if (repair_unchopped_blobs) - preserve_outline_tree (blob->outlines); - TBLOB *other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */ - // Insert it into the word. - word->blobs.insert(other_blob, blob_number + 1); - - SEAM *seam = nullptr; - if (prioritize_division) { - TPOINT location; - if (divisible_blob(blob, italic_blob, &location)) { - seam = new SEAM(0.0f, location); - } - } - if (seam == nullptr) - seam = pick_good_seam(blob); - if (chop_debug) { - if (seam != nullptr) - seam->Print("Good seam picked="); - else - tprintf("\n** no seam picked *** \n"); - } - if (seam) { - seam->ApplySeam(italic_blob, blob, other_blob); - } - - seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, - seams, seam); - if (seam == nullptr) { - if (repair_unchopped_blobs) - restore_outline_tree(blob->outlines); - if (allow_blob_division && !prioritize_division) { - // If the blob can simply be divided into outlines, then do that. - TPOINT location; - if (divisible_blob(blob, italic_blob, &location)) { - other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */ - word->blobs.insert(other_blob, blob_number + 1); - seam = new SEAM(0.0f, location); - seam->ApplySeam(italic_blob, blob, other_blob); - seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, - seams, seam); - } - } - } - if (seam != nullptr) { - // Make sure this seam doesn't get chopped again. - seam->Finalize(); - } - return seam; -} - - -SEAM *Wordrec::chop_numbered_blob(TWERD *word, int32_t blob_number, - bool italic_blob, - const GenericVector& seams) { - return attempt_blob_chop(word, word->blobs[blob_number], blob_number, - italic_blob, seams); -} - - -SEAM *Wordrec::chop_overlapping_blob(const GenericVector& boxes, - bool italic_blob, WERD_RES *word_res, - int *blob_number) { - TWERD *word = word_res->chopped_word; - for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) { - TBLOB *blob = word->blobs[*blob_number]; - TPOINT topleft, botright; - topleft.x = blob->bounding_box().left(); - topleft.y = blob->bounding_box().top(); - botright.x = blob->bounding_box().right(); - botright.y = blob->bounding_box().bottom(); - - TPOINT original_topleft, original_botright; - word_res->denorm.DenormTransform(nullptr, topleft, &original_topleft); - word_res->denorm.DenormTransform(nullptr, botright, &original_botright); - - TBOX original_box = TBOX(original_topleft.x, original_botright.y, - original_botright.x, original_topleft.y); - - bool almost_equal_box = false; - int num_overlap = 0; - for (int i = 0; i < boxes.size(); i++) { - if (original_box.overlap_fraction(boxes[i]) > 0.125) - num_overlap++; - if (original_box.almost_equal(boxes[i], 3)) - almost_equal_box = true; - } - - TPOINT location; - if (divisible_blob(blob, italic_blob, &location) || - (!almost_equal_box && num_overlap > 1)) { - SEAM *seam = attempt_blob_chop(word, blob, *blob_number, - italic_blob, word_res->seam_array); - if (seam != nullptr) - return seam; - } - } - - *blob_number = -1; - return nullptr; -} - -} // namespace tesseract - - -/** - * @name any_shared_split_points - * - * Return true if any of the splits share a point with this one. - */ -int any_shared_split_points(const GenericVector& seams, SEAM *seam) { - int length; - int index; - - length = seams.size(); - for (index = 0; index < length; index++) - if (seam->SharesPosition(*seams[index])) return TRUE; - return FALSE; -} - - -/** - * @name check_blob - * - * @return true if blob has a non whole outline. - */ -int check_blob(TBLOB *blob) { - TESSLINE *outline; - EDGEPT *edgept; - - for (outline = blob->outlines; outline != nullptr; outline = outline->next) { - edgept = outline->loop; - do { - if (edgept == nullptr) - break; - edgept = edgept->next; - } - while (edgept != outline->loop); - if (edgept == nullptr) - return 1; - } - return 0; -} - - -namespace tesseract { -/** - * @name improve_one_blob - * - * Finds the best place to chop, based on the worst blob, fixpt, or next to - * a fragment, according to the input. Returns the SEAM corresponding to the - * chop point, if any is found, and the index in the ratings_matrix of the - * chopped blob. Note that blob_choices is just a copy of the pointers in the - * leading diagonal of the ratings MATRIX. - * Although the blob is chopped, the returned SEAM is yet to be inserted into - * word->seam_array and the resulting blobs are unclassified, so this function - * can be used by ApplyBox as well as during recognition. - */ -SEAM* Wordrec::improve_one_blob(const GenericVector& blob_choices, - DANGERR *fixpt, - bool split_next_to_fragment, - bool italic_blob, - WERD_RES* word, - int* blob_number) { - float rating_ceiling = FLT_MAX; - SEAM *seam = nullptr; - do { - *blob_number = select_blob_to_split_from_fixpt(fixpt); - if (chop_debug) tprintf("blob_number from fixpt = %d\n", *blob_number); - bool split_point_from_dict = (*blob_number != -1); - if (split_point_from_dict) { - fixpt->clear(); - } else { - *blob_number = select_blob_to_split(blob_choices, rating_ceiling, - split_next_to_fragment); - } - if (chop_debug) tprintf("blob_number = %d\n", *blob_number); - if (*blob_number == -1) - return nullptr; - - // TODO(rays) it may eventually help to allow italic_blob to be true, - seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob, - word->seam_array); - if (seam != nullptr) - return seam; // Success! - if (blob_choices[*blob_number] == nullptr) - return nullptr; - if (!split_point_from_dict) { - // We chopped the worst rated blob, try something else next time. - rating_ceiling = blob_choices[*blob_number]->rating(); - } - } while (true); - return seam; -} - -/** - * @name chop_one_blob - * - * Start with the current one-blob word and its classification. Find - * the worst blobs and try to divide it up to improve the ratings. - * Used for testing chopper. - */ -SEAM* Wordrec::chop_one_blob(const GenericVector& boxes, - const GenericVector& blob_choices, - WERD_RES* word_res, - int* blob_number) { - if (prioritize_division) { - return chop_overlapping_blob(boxes, true, word_res, blob_number); - } else { - return improve_one_blob(blob_choices, nullptr, false, true, word_res, - blob_number); - } -} - -/** - * @name chop_word_main - * - * Classify the blobs in this word and permute the results. Find the - * worst blob in the word and chop it up. Continue this process until - * a good answer has been found or all the blobs have been chopped up - * enough. The results are returned in the WERD_RES. - */ -void Wordrec::chop_word_main(WERD_RES *word) { - int num_blobs = word->chopped_word->NumBlobs(); - if (word->ratings == nullptr) { - word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks); - } - if (word->ratings->get(0, 0) == nullptr) { - // Run initial classification. - for (int b = 0; b < num_blobs; ++b) { - BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b, - "Initial:", word->chopped_word, - word->blamer_bundle); - word->ratings->put(b, b, choices); - } - } else { - // Blobs have been pre-classified. Set matrix cell for all blob choices - for (int col = 0; col < word->ratings->dimension(); ++col) { - for (int row = col; row < word->ratings->dimension() && - row < col + word->ratings->bandwidth(); ++row) { - BLOB_CHOICE_LIST* choices = word->ratings->get(col, row); - if (choices != nullptr) { - BLOB_CHOICE_IT bc_it(choices); - for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { - bc_it.data()->set_matrix_cell(col, row); - } - } - } - } - } - - // Run Segmentation Search. - BestChoiceBundle best_choice_bundle(word->ratings->dimension()); - SegSearch(word, &best_choice_bundle, word->blamer_bundle); - - if (word->best_choice == nullptr) { - // SegSearch found no valid paths, so just use the leading diagonal. - word->FakeWordFromRatings(TOP_CHOICE_PERM); - } - word->RebuildBestState(); - // If we finished without a hyphen at the end of the word, let the next word - // be found in the dictionary. - if (word->word->flag(W_EOL) && - !getDict().has_hyphen_end(*word->best_choice)) { - getDict().reset_hyphen_vars(true); - } - - if (word->blamer_bundle != nullptr && this->fill_lattice_ != nullptr) { - CallFillLattice(*word->ratings, word->best_choices, - *word->uch_set, word->blamer_bundle); - } - if (wordrec_debug_level > 0) { - tprintf("Final Ratings Matrix:\n"); - word->ratings->print(getDict().getUnicharset()); - } - word->FilterWordChoices(getDict().stopper_debug_level); -} - -/** - * @name improve_by_chopping - * - * Repeatedly chops the worst blob, classifying the new blobs fixing up all - * the data, and incrementally runs the segmentation search until a good word - * is found, or no more chops can be found. - */ -void Wordrec::improve_by_chopping(float rating_cert_scale, - WERD_RES* word, - BestChoiceBundle* best_choice_bundle, - BlamerBundle* blamer_bundle, - LMPainPoints* pain_points, - GenericVector* pending) { - int blob_number; - do { // improvement loop. - // Make a simple vector of BLOB_CHOICEs to make it easy to pick which - // one to chop. - GenericVector blob_choices; - int num_blobs = word->ratings->dimension(); - for (int i = 0; i < num_blobs; ++i) { - BLOB_CHOICE_LIST* choices = word->ratings->get(i, i); - if (choices == nullptr || choices->empty()) { - blob_choices.push_back(nullptr); - } else { - BLOB_CHOICE_IT bc_it(choices); - blob_choices.push_back(bc_it.data()); - } - } - SEAM* seam = improve_one_blob(blob_choices, &best_choice_bundle->fixpt, - false, false, word, &blob_number); - if (seam == nullptr) break; - // A chop has been made. We have to correct all the data structures to - // take into account the extra bottom-level blob. - // Put the seam into the seam_array and correct everything else on the - // word: ratings matrix (including matrix location in the BLOB_CHOICES), - // states in WERD_CHOICEs, and blob widths. - word->InsertSeam(blob_number, seam); - // Insert a new entry in the beam array. - best_choice_bundle->beam.insert(new LanguageModelState, blob_number); - // Fixpts are outdated, but will get recalculated. - best_choice_bundle->fixpt.clear(); - // Remap existing pain points. - pain_points->RemapForSplit(blob_number); - // Insert a new pending at the chop point. - pending->insert(SegSearchPending(), blob_number); - - // Classify the two newly created blobs using ProcessSegSearchPainPoint, - // as that updates the pending correctly and adds new pain points. - MATRIX_COORD pain_point(blob_number, blob_number); - ProcessSegSearchPainPoint(0.0f, pain_point, "Chop1", pending, word, - pain_points, blamer_bundle); - pain_point.col = blob_number + 1; - pain_point.row = blob_number + 1; - ProcessSegSearchPainPoint(0.0f, pain_point, "Chop2", pending, word, - pain_points, blamer_bundle); - if (language_model_->language_model_ngram_on) { - // N-gram evaluation depends on the number of blobs in a chunk, so we - // have to re-evaluate everything in the word. - ResetNGramSearch(word, best_choice_bundle, pending); - blob_number = 0; - } - // Run language model incrementally. (Except with the n-gram model on.) - UpdateSegSearchNodes(rating_cert_scale, blob_number, pending, - word, pain_points, best_choice_bundle, blamer_bundle); - } while (!language_model_->AcceptableChoiceFound() && - word->ratings->dimension() < kMaxNumChunks); - - // If after running only the chopper best_choice is incorrect and no blame - // has been yet set, blame the classifier if best_choice is classifier's - // top choice and is a dictionary word (i.e. language model could not have - // helped). Otherwise blame the tradeoff between the classifier and - // the old language model (permuters). - if (word->blamer_bundle != nullptr && - word->blamer_bundle->incorrect_result_reason() == IRR_CORRECT && - !word->blamer_bundle->ChoiceIsCorrect(word->best_choice)) { - bool valid_permuter = word->best_choice != nullptr && - Dict::valid_word_permuter(word->best_choice->permuter(), false); - word->blamer_bundle->BlameClassifierOrLangModel(word, - getDict().getUnicharset(), - valid_permuter, - wordrec_debug_blamer); - } -} - - -/********************************************************************** - * select_blob_to_split - * - * These are the results of the last classification. Find a likely - * place to apply splits. If none, return -1. - **********************************************************************/ -int Wordrec::select_blob_to_split( - const GenericVector& blob_choices, - float rating_ceiling, bool split_next_to_fragment) { - BLOB_CHOICE *blob_choice; - int x; - float worst = -FLT_MAX; - int worst_index = -1; - float worst_near_fragment = -FLT_MAX; - int worst_index_near_fragment = -1; - const CHAR_FRAGMENT **fragments = nullptr; - - if (chop_debug) { - if (rating_ceiling < FLT_MAX) - tprintf("rating_ceiling = %8.4f\n", rating_ceiling); - else - tprintf("rating_ceiling = No Limit\n"); - } - - if (split_next_to_fragment && blob_choices.size() > 0) { - fragments = new const CHAR_FRAGMENT *[blob_choices.length()]; - if (blob_choices[0] != nullptr) { - fragments[0] = getDict().getUnicharset().get_fragment( - blob_choices[0]->unichar_id()); - } else { - fragments[0] = nullptr; - } - } - - for (x = 0; x < blob_choices.size(); ++x) { - if (blob_choices[x] == nullptr) { - delete[] fragments; - return x; - } else { - blob_choice = blob_choices[x]; - // Populate fragments for the following position. - if (split_next_to_fragment && x+1 < blob_choices.size()) { - if (blob_choices[x + 1] != nullptr) { - fragments[x + 1] = getDict().getUnicharset().get_fragment( - blob_choices[x + 1]->unichar_id()); - } else { - fragments[x + 1] = nullptr; - } - } - if (blob_choice->rating() < rating_ceiling && - blob_choice->certainty() < tessedit_certainty_threshold) { - // Update worst and worst_index. - if (blob_choice->rating() > worst) { - worst_index = x; - worst = blob_choice->rating(); - } - if (split_next_to_fragment) { - // Update worst_near_fragment and worst_index_near_fragment. - bool expand_following_fragment = - (x + 1 < blob_choices.size() && - fragments[x+1] != nullptr && !fragments[x+1]->is_beginning()); - bool expand_preceding_fragment = - (x > 0 && fragments[x-1] != nullptr && !fragments[x-1]->is_ending()); - if ((expand_following_fragment || expand_preceding_fragment) && - blob_choice->rating() > worst_near_fragment) { - worst_index_near_fragment = x; - worst_near_fragment = blob_choice->rating(); - if (chop_debug) { - tprintf("worst_index_near_fragment=%d" - " expand_following_fragment=%d" - " expand_preceding_fragment=%d\n", - worst_index_near_fragment, - expand_following_fragment, - expand_preceding_fragment); - } - } - } - } - } - } - delete[] fragments; - // TODO(daria): maybe a threshold of badness for - // worst_near_fragment would be useful. - return worst_index_near_fragment != -1 ? - worst_index_near_fragment : worst_index; -} - -/********************************************************************** - * select_blob_to_split_from_fixpt - * - * Given the fix point from a dictionary search, if there is a single - * dangerous blob that maps to multiple characters, return that blob - * index as a place we need to split. If none, return -1. - **********************************************************************/ -int Wordrec::select_blob_to_split_from_fixpt(DANGERR *fixpt) { - if (!fixpt) - return -1; - for (int i = 0; i < fixpt->size(); i++) { - if ((*fixpt)[i].begin + 1 == (*fixpt)[i].end && - (*fixpt)[i].dangerous && - (*fixpt)[i].correct_is_ngram) { - return (*fixpt)[i].begin; - } - } - return -1; -} - - -} // namespace tesseract - - -/********************************************************************** - * total_containment - * - * Check to see if one of these outlines is totally contained within - * the bounding box of the other. - **********************************************************************/ -int16_t total_containment(TBLOB *blob1, TBLOB *blob2) { - TBOX box1 = blob1->bounding_box(); - TBOX box2 = blob2->bounding_box(); - return box1.contains(box2) || box2.contains(box1); -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/chopper.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/chopper.h deleted file mode 100644 index f1ebe2e4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/chopper.h +++ /dev/null @@ -1,47 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: chopper.h (Formerly chopper.h) - * Description: - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Wed May 15 14:24:26 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ - -#ifndef CHOPPER_H -#define CHOPPER_H - -#include "matrix.h" -#include "seam.h" -#include "stopper.h" - - -void preserve_outline(EDGEPT *start); - -void preserve_outline_tree(TESSLINE *srcline); - -EDGEPT *restore_outline(EDGEPT *start); - -void restore_outline_tree(TESSLINE *srcline); - -int any_shared_split_points(const GenericVector& seams, SEAM *seam); - -int check_blob(TBLOB *blob); - -int16_t total_containment(TBLOB *blob1, TBLOB *blob2); -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/drawfx.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/drawfx.cpp deleted file mode 100644 index 0577c87c..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/drawfx.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/********************************************************************** - * File: drawfx.cpp (Formerly drawfx.c) - * Description: Draw things to do with feature extraction. - * Author: Ray Smith - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "drawfx.h" -#include "normalis.h" -#include "werd.h" - -#ifndef GRAPHICS_DISABLED -#define FXDEMOWIN "FXDemo" -#define FXDEMOXPOS 250 -#define FXDEMOYPOS 0 -#define FXDEMOXSIZE 600 -#define FXDEMOYSIZE 256 -#define BLN_MAX 512 //max coord for bln -#define WERDWIDTH (BLN_MAX*20) -#define DECENT_WERD_WIDTH (5*kBlnXHeight) - //title of window -#define DEBUG_WIN_NAME "FXDebug" -#define DEBUG_XPOS 0 -#define DEBUG_YPOS 120 -#define DEBUG_XSIZE 80 -#define DEBUG_YSIZE 32 -#define YMAX 3508 -#define XMAX 2550 -#define MAXEDGELENGTH 1024 //max steps inoutline - -#define EXTERN - -EXTERN STRING_VAR (fx_debugfile, DEBUG_WIN_NAME, "Name of debugfile"); - -EXTERN ScrollView* fx_win = nullptr; -EXTERN FILE *fx_debug = nullptr; - -/********************************************************************** - * create_fx_win - * - * Create the fx window used to show the fit. - **********************************************************************/ - -void create_fx_win() { //make features win - fx_win = new ScrollView (FXDEMOWIN, - FXDEMOXPOS, FXDEMOYPOS, FXDEMOXSIZE, FXDEMOYSIZE, - WERDWIDTH*2, BLN_MAX*2, true); -} - - -/********************************************************************** - * clear_fx_win - * - * Clear the fx window and draw on the base/mean lines. - **********************************************************************/ - -void clear_fx_win() { //make features win - fx_win->Clear(); - fx_win->Pen(64,64,64); - fx_win->Line(-WERDWIDTH, kBlnBaselineOffset, WERDWIDTH, kBlnBaselineOffset); - fx_win->Line(-WERDWIDTH, kBlnXHeight + kBlnBaselineOffset, WERDWIDTH, - kBlnXHeight + kBlnBaselineOffset); -} - -#endif // GRAPHICS_DISABLED - -/********************************************************************** - * create_fxdebug_win - * - * Create the fx window used to show the fit. - **********************************************************************/ - -void create_fxdebug_win() { //make gradients win -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/drawfx.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/drawfx.h deleted file mode 100644 index 5947390a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/drawfx.h +++ /dev/null @@ -1,31 +0,0 @@ -/********************************************************************** - * File: drawfx.h (Formerly drawfx.h) - * Description: Draw things to do with feature extraction. - * Author: Ray Smith - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef DRAWFX_H -#define DRAWFX_H - -#include "params.h" -#include "scrollview.h" - -extern STRING_VAR_H (fx_debugfile, DEBUG_WIN_NAME, "Name of debugfile"); -extern ScrollView* fx_win; -extern FILE *fx_debug; -void create_fx_win(); //make features win -void clear_fx_win(); //make features win -void create_fxdebug_win(); //make gradients win -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/findseam.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/findseam.cpp deleted file mode 100644 index 81055392..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/findseam.cpp +++ /dev/null @@ -1,373 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: findseam.cpp (Formerly findseam.c) - * Description: - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Tue Jul 30 15:44:59 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "findseam.h" -#include "plotedges.h" -#include "outlines.h" -#include "seam.h" -#include "wordrec.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -/********************************************************************** - * partial_split_priority - * - * Assign a priority to this split based on the features that it has. - * Grade it according to the different rating schemes and return the - * value of its goodness. - **********************************************************************/ - -#define partial_split_priority(split) \ - (grade_split_length(split) + grade_sharpness(split)) - -/*---------------------------------------------------------------------- - T y p e s -----------------------------------------------------------------------*/ -#define SPLIT_CLOSENESS 20/* Difference in x value */ - /* How many to keep */ -#define MAX_NUM_SEAMS 150 - /* How many to keep */ -#define MAX_OLD_SEAMS 150 -#define NO_FULL_PRIORITY -1/* Special marker for pri. */ - /* Evaluate right away */ -#define BAD_PRIORITY 9999.0 - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -namespace tesseract { - -/********************************************************************** - * add_seam_to_queue - * - * Adds the given new_seam to the seams priority queue, unless it is full - * and the new seam is worse than the worst. - **********************************************************************/ -void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, - SeamQueue* seams) { - if (new_seam == nullptr) return; - if (chop_debug) { - tprintf("Pushing new seam with priority %g :", new_priority); - new_seam->Print("seam: "); - } - if (seams->size() >= MAX_NUM_SEAMS) { - SeamPair old_pair(0, nullptr); - if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) { - if (chop_debug) { - tprintf("Old seam staying with priority %g\n", old_pair.key()); - } - delete new_seam; - seams->Push(&old_pair); - return; - } else if (chop_debug) { - tprintf("New seam with priority %g beats old worst seam with %g\n", - new_priority, old_pair.key()); - } - } - SeamPair new_pair(new_priority, new_seam); - seams->Push(&new_pair); -} - - -/********************************************************************** - * choose_best_seam - * - * Choose the best seam that can be created by assembling this a - * collection of splits. A queue of all the possible seams is - * maintained. Each new split received is placed in that queue with - * its partial priority value. These values in the seam queue are - * evaluated and combined until a good enough seam is found. If no - * further good seams are being found then this function returns to the - * caller, who will send more splits. If this function is called with - * a split of nullptr, then no further splits can be supplied by the - * caller. - **********************************************************************/ -void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, - PRIORITY priority, SEAM **seam_result, - TBLOB *blob, SeamPile *seam_pile) { - SEAM *seam; - char str[80]; - float my_priority; - /* Add seam of split */ - my_priority = priority; - if (split != nullptr) { - TPOINT split_point = split->point1->pos; - split_point += split->point2->pos; - split_point /= 2; - seam = new SEAM(my_priority, split_point, *split); - if (chop_debug > 1) seam->Print("Partial priority "); - add_seam_to_queue(my_priority, seam, seam_queue); - - if (my_priority > chop_good_split) - return; - } - - TBOX bbox = blob->bounding_box(); - /* Queue loop */ - while (!seam_queue->empty()) { - SeamPair seam_pair; - seam_queue->Pop(&seam_pair); - seam = seam_pair.extract_data(); - /* Set full priority */ - my_priority = seam->FullPriority(bbox.left(), bbox.right(), - chop_overlap_knob, chop_centered_maxwidth, - chop_center_knob, chop_width_change_knob); - if (chop_debug) { - sprintf (str, "Full my_priority %0.0f, ", my_priority); - seam->Print(str); - } - - if ((*seam_result == nullptr || (*seam_result)->priority() > my_priority) && - my_priority < chop_ok_split) { - /* No crossing */ - if (seam->IsHealthy(*blob, chop_min_outline_points, - chop_min_outline_area)) { - delete *seam_result; - *seam_result = new SEAM(*seam); - (*seam_result)->set_priority(my_priority); - } else { - delete seam; - seam = nullptr; - my_priority = BAD_PRIORITY; - } - } - - if (my_priority < chop_good_split) { - delete seam; - return; /* Made good answer */ - } - - if (seam) { - /* Combine with others */ - if (seam_pile->size() < chop_seam_pile_size) { - combine_seam(*seam_pile, seam, seam_queue); - SeamDecPair pair(seam_pair.key(), seam); - seam_pile->Push(&pair); - } else if (chop_new_seam_pile && - seam_pile->size() == chop_seam_pile_size && - seam_pile->PeekTop().key() > seam_pair.key()) { - combine_seam(*seam_pile, seam, seam_queue); - SeamDecPair pair; - seam_pile->Pop(&pair); // pop the worst. - // Replace the seam in pair (deleting the old one) with - // the new seam and score, then push back into the heap. - pair.set_key(seam_pair.key()); - pair.set_data(seam); - seam_pile->Push(&pair); - } else { - delete seam; - } - } - - my_priority = seam_queue->empty() ? NO_FULL_PRIORITY - : seam_queue->PeekTop().key(); - if ((my_priority > chop_ok_split) || - (my_priority > chop_good_split && split)) - return; - } -} - - -/********************************************************************** - * combine_seam - * - * Find other seams to combine with this one. The new seams that result - * from this union should be added to the seam queue. The return value - * tells whether or not any additional seams were added to the queue. - **********************************************************************/ -void Wordrec::combine_seam(const SeamPile& seam_pile, - const SEAM* seam, SeamQueue* seam_queue) { - for (int x = 0; x < seam_pile.size(); ++x) { - const SEAM *this_one = seam_pile.get(x).data(); - if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) { - SEAM *new_one = new SEAM(*seam); - new_one->CombineWith(*this_one); - if (chop_debug > 1) new_one->Print("Combo priority "); - add_seam_to_queue(new_one->priority(), new_one, seam_queue); - } - } -} - -/********************************************************************** - * pick_good_seam - * - * Find and return a good seam that will split this blob into two pieces. - * Work from the outlines provided. - **********************************************************************/ -SEAM *Wordrec::pick_good_seam(TBLOB *blob) { - SeamPile seam_pile(chop_seam_pile_size); - EDGEPT *points[MAX_NUM_POINTS]; - EDGEPT_CLIST new_points; - SEAM *seam = nullptr; - TESSLINE *outline; - int16_t num_points = 0; - -#ifndef GRAPHICS_DISABLED - if (chop_debug > 2) - wordrec_display_splits.set_value(true); - - draw_blob_edges(blob); -#endif - - PointHeap point_heap(MAX_NUM_POINTS); - for (outline = blob->outlines; outline; outline = outline->next) - prioritize_points(outline, &point_heap); - - while (!point_heap.empty() && num_points < MAX_NUM_POINTS) { - points[num_points++] = point_heap.PeekTop().data; - point_heap.Pop(nullptr); - } - - /* Initialize queue */ - SeamQueue seam_queue(MAX_NUM_SEAMS); - - try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob); - try_vertical_splits(points, num_points, &new_points, - &seam_queue, &seam_pile, &seam, blob); - - if (seam == nullptr) { - choose_best_seam(&seam_queue, nullptr, BAD_PRIORITY, &seam, blob, &seam_pile); - } else if (seam->priority() > chop_good_split) { - choose_best_seam(&seam_queue, nullptr, seam->priority(), &seam, blob, - &seam_pile); - } - - EDGEPT_C_IT it(&new_points); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - EDGEPT *inserted_point = it.data(); - if (seam == nullptr || !seam->UsesPoint(inserted_point)) { - for (outline = blob->outlines; outline; outline = outline->next) { - if (outline->loop == inserted_point) { - outline->loop = outline->loop->next; - } - } - remove_edgept(inserted_point); - } - } - - if (seam) { - if (seam->priority() > chop_ok_split) { - delete seam; - seam = nullptr; - } -#ifndef GRAPHICS_DISABLED - else if (wordrec_display_splits) { - seam->Mark(edge_window); - if (chop_debug > 2) { - update_edge_window(); - edge_window_wait(); - } - } -#endif - } - - if (chop_debug) - wordrec_display_splits.set_value(false); - - return (seam); -} - - -/********************************************************************** - * try_point_pairs - * - * Try all the splits that are produced by pairing critical points - * together. See if any of them are suitable for use. Use a seam - * queue and seam pile that have already been initialized and used. - **********************************************************************/ -void Wordrec::try_point_pairs(EDGEPT * points[MAX_NUM_POINTS], - int16_t num_points, - SeamQueue* seam_queue, - SeamPile* seam_pile, - SEAM ** seam, - TBLOB * blob) { - int16_t x; - int16_t y; - PRIORITY priority; - - for (x = 0; x < num_points; x++) { - for (y = x + 1; y < num_points; y++) { - if (points[y] && - points[x]->WeightedDistance(*points[y], chop_x_y_weight) < - chop_split_length && - points[x] != points[y]->next && points[y] != points[x]->next && - !is_exterior_point(points[x], points[y]) && - !is_exterior_point(points[y], points[x])) { - SPLIT split(points[x], points[y]); - priority = partial_split_priority(&split); - - choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile); - } - } - } -} - - -/********************************************************************** - * try_vertical_splits - * - * Try all the splits that are produced by vertical projection to see - * if any of them are suitable for use. Use a seam queue and seam pile - * that have already been initialized and used. - * Return in new_points a collection of points that were inserted into - * the blob while examining vertical splits and which may safely be - * removed once a seam is chosen if they are not part of the seam. - **********************************************************************/ -void Wordrec::try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS], - int16_t num_points, - EDGEPT_CLIST *new_points, - SeamQueue* seam_queue, - SeamPile* seam_pile, - SEAM ** seam, - TBLOB * blob) { - EDGEPT *vertical_point = nullptr; - int16_t x; - PRIORITY priority; - TESSLINE *outline; - - for (x = 0; x < num_points; x++) { - vertical_point = nullptr; - for (outline = blob->outlines; outline; outline = outline->next) { - vertical_projection_point(points[x], outline->loop, - &vertical_point, new_points); - } - - if (vertical_point && points[x] != vertical_point->next && - vertical_point != points[x]->next && - points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) < - chop_split_length) { - SPLIT split(points[x], vertical_point); - priority = partial_split_priority(&split); - choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile); - } - } -} - -} diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/findseam.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/findseam.h deleted file mode 100644 index 1e6f6def..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/findseam.h +++ /dev/null @@ -1,44 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: findseam.h (Formerly findseam.h) - * Description: - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Thu May 16 17:05:17 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ - -#ifndef FINDSEAM_H -#define FINDSEAM_H - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "seam.h" -#include "genericheap.h" -#include "kdpair.h" -#include "chop.h" - -// The SeamPair elements own their SEAMs and delete them upon destruction. -using SeamPair = tesseract::KDPtrPairInc; -using SeamQueue = tesseract::GenericHeap; - -using SeamDecPair = tesseract::KDPtrPairDec; -using SeamPile = tesseract::GenericHeap; - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/gradechop.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/gradechop.cpp deleted file mode 100644 index 55df3756..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/gradechop.cpp +++ /dev/null @@ -1,90 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: gradechop.cpp (Formerly gradechop.c) - * Description: - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Tue Jul 30 16:06:27 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ - -#include "wordrec.h" -#include "chop.h" -#include -#include - -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ - -namespace tesseract { - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ - -/********************************************************************** - * grade_split_length - * - * Return a grade for the length of this split. - * 0 = "perfect" - * 100 = "no way jay" - **********************************************************************/ -PRIORITY Wordrec::grade_split_length(SPLIT *split) { - PRIORITY grade; - float split_length; - - split_length = - split->point1->WeightedDistance(*split->point2, chop_x_y_weight); - - if (split_length <= 0) - grade = 0; - else - grade = sqrt (split_length) * chop_split_dist_knob; - - return (std::max(0.0f, grade)); -} - - -/********************************************************************** - * grade_sharpness - * - * Return a grade for the sharpness of this split. - * 0 = "perfect" - * 100 = "no way jay" - **********************************************************************/ -PRIORITY Wordrec::grade_sharpness(SPLIT *split) { - PRIORITY grade; - - grade = point_priority (split->point1) + point_priority (split->point2); - - if (grade < -360.0) - grade = 0; - else - grade += 360.0; - - grade *= chop_sharpness_knob; /* Values 0 to -360 */ - - return (grade); -} - - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/language_model.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/language_model.cpp deleted file mode 100644 index dd051275..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/language_model.cpp +++ /dev/null @@ -1,1480 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: language_model.cpp -// Description: Functions that utilize the knowledge about the properties, -// structure and statistics of the language to help recognition. -// Author: Daria Antonova -// Created: Mon Nov 11 11:26:43 PST 2009 -// -// (C) Copyright 2009, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "language_model.h" -#include // for assert -#include // for log2, pow -#include "blamer.h" // for BlamerBundle -#include "ccutil.h" // for CCUtil -#include "dawg.h" // for NO_EDGE, Dawg, Dawg::kPatternUn... -#include "errcode.h" // for ASSERT_HOST -#include "lm_state.h" // for ViterbiStateEntry, ViterbiState... -#include "matrix.h" // for MATRIX_COORD -#include "pageres.h" // for WERD_RES -#include "params.h" // for IntParam, BoolParam, DoubleParam -#include "params_training_featdef.h" // for ParamsTrainingHypothesis, PTRAI... -#include "tprintf.h" // for tprintf -#include "unichar.h" // for UNICHAR_ID, INVALID_UNICHAR_ID -#include "unicharset.h" // for UNICHARSET -#include "unicity_table.h" // for UnicityTable - -template class GenericVector; -template class UnicityTable; - -namespace tesseract { - -class LMPainPoints; -struct FontInfo; - -#if defined(ANDROID) -static inline double log2(double n) { - return log(n) / log(2.0); -} -#endif // ANDROID - -const float LanguageModel::kMaxAvgNgramCost = 25.0f; - -LanguageModel::LanguageModel(const UnicityTable *fontinfo_table, - Dict *dict) - : INT_MEMBER(language_model_debug_level, 0, "Language model debug level", - dict->getCCUtil()->params()), - BOOL_INIT_MEMBER(language_model_ngram_on, false, - "Turn on/off the use of character ngram model", - dict->getCCUtil()->params()), - INT_MEMBER(language_model_ngram_order, 8, - "Maximum order of the character ngram model", - dict->getCCUtil()->params()), - INT_MEMBER(language_model_viterbi_list_max_num_prunable, 10, - "Maximum number of prunable (those for which" - " PrunablePath() is true) entries in each viterbi list" - " recorded in BLOB_CHOICEs", - dict->getCCUtil()->params()), - INT_MEMBER(language_model_viterbi_list_max_size, 500, - "Maximum size of viterbi lists recorded in BLOB_CHOICEs", - dict->getCCUtil()->params()), - double_MEMBER(language_model_ngram_small_prob, 0.000001, - "To avoid overly small denominators use this as the " - "floor of the probability returned by the ngram model.", - dict->getCCUtil()->params()), - double_MEMBER(language_model_ngram_nonmatch_score, -40.0, - "Average classifier score of a non-matching unichar.", - dict->getCCUtil()->params()), - BOOL_MEMBER(language_model_ngram_use_only_first_uft8_step, false, - "Use only the first UTF8 step of the given string" - " when computing log probabilities.", - dict->getCCUtil()->params()), - double_MEMBER(language_model_ngram_scale_factor, 0.03, - "Strength of the character ngram model relative to the" - " character classifier ", - dict->getCCUtil()->params()), - double_MEMBER(language_model_ngram_rating_factor, 16.0, - "Factor to bring log-probs into the same range as ratings" - " when multiplied by outline length ", - dict->getCCUtil()->params()), - BOOL_MEMBER(language_model_ngram_space_delimited_language, true, - "Words are delimited by space", dict->getCCUtil()->params()), - INT_MEMBER(language_model_min_compound_length, 3, - "Minimum length of compound words", - dict->getCCUtil()->params()), - double_MEMBER(language_model_penalty_non_freq_dict_word, 0.1, - "Penalty for words not in the frequent word dictionary", - dict->getCCUtil()->params()), - double_MEMBER(language_model_penalty_non_dict_word, 0.15, - "Penalty for non-dictionary words", - dict->getCCUtil()->params()), - double_MEMBER(language_model_penalty_punc, 0.2, - "Penalty for inconsistent punctuation", - dict->getCCUtil()->params()), - double_MEMBER(language_model_penalty_case, 0.1, - "Penalty for inconsistent case", - dict->getCCUtil()->params()), - double_MEMBER(language_model_penalty_script, 0.5, - "Penalty for inconsistent script", - dict->getCCUtil()->params()), - double_MEMBER(language_model_penalty_chartype, 0.3, - "Penalty for inconsistent character type", - dict->getCCUtil()->params()), - // TODO(daria, rays): enable font consistency checking - // after improving font analysis. - double_MEMBER(language_model_penalty_font, 0.00, - "Penalty for inconsistent font", - dict->getCCUtil()->params()), - double_MEMBER(language_model_penalty_spacing, 0.05, - "Penalty for inconsistent spacing", - dict->getCCUtil()->params()), - double_MEMBER(language_model_penalty_increment, 0.01, "Penalty increment", - dict->getCCUtil()->params()), - INT_MEMBER(wordrec_display_segmentations, 0, "Display Segmentations", - dict->getCCUtil()->params()), - BOOL_INIT_MEMBER(language_model_use_sigmoidal_certainty, false, - "Use sigmoidal score for certainty", - dict->getCCUtil()->params()), - dawg_args_(nullptr, new DawgPositionVector(), NO_PERM), - fontinfo_table_(fontinfo_table), - dict_(dict), - fixed_pitch_(false), - max_char_wh_ratio_(0.0), - acceptable_choice_found_(false) { - ASSERT_HOST(dict_ != nullptr); -} - -LanguageModel::~LanguageModel() { delete dawg_args_.updated_dawgs; } - -void LanguageModel::InitForWord(const WERD_CHOICE *prev_word, - bool fixed_pitch, float max_char_wh_ratio, - float rating_cert_scale) { - fixed_pitch_ = fixed_pitch; - max_char_wh_ratio_ = max_char_wh_ratio; - rating_cert_scale_ = rating_cert_scale; - acceptable_choice_found_ = false; - correct_segmentation_explored_ = false; - - // Initialize vectors with beginning DawgInfos. - very_beginning_active_dawgs_.clear(); - dict_->init_active_dawgs(&very_beginning_active_dawgs_, false); - beginning_active_dawgs_.clear(); - dict_->default_dawgs(&beginning_active_dawgs_, false); - - // Fill prev_word_str_ with the last language_model_ngram_order - // unichars from prev_word. - if (language_model_ngram_on) { - if (prev_word != nullptr && prev_word->unichar_string() != nullptr) { - prev_word_str_ = prev_word->unichar_string(); - if (language_model_ngram_space_delimited_language) prev_word_str_ += ' '; - } else { - prev_word_str_ = " "; - } - const char *str_ptr = prev_word_str_.string(); - const char *str_end = str_ptr + prev_word_str_.length(); - int step; - prev_word_unichar_step_len_ = 0; - while (str_ptr != str_end && (step = UNICHAR::utf8_step(str_ptr))) { - str_ptr += step; - ++prev_word_unichar_step_len_; - } - ASSERT_HOST(str_ptr == str_end); - } -} - -/** - * Helper scans the collection of predecessors for competing siblings that - * have the same letter with the opposite case, setting competing_vse. - */ -static void ScanParentsForCaseMix(const UNICHARSET& unicharset, - LanguageModelState* parent_node) { - if (parent_node == nullptr) return; - ViterbiStateEntry_IT vit(&parent_node->viterbi_state_entries); - for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { - ViterbiStateEntry* vse = vit.data(); - vse->competing_vse = nullptr; - UNICHAR_ID unichar_id = vse->curr_b->unichar_id(); - if (unicharset.get_isupper(unichar_id) || - unicharset.get_islower(unichar_id)) { - UNICHAR_ID other_case = unicharset.get_other_case(unichar_id); - if (other_case == unichar_id) continue; // Not in unicharset. - // Find other case in same list. There could be multiple entries with - // the same unichar_id, but in theory, they should all point to the - // same BLOB_CHOICE, and that is what we will be using to decide - // which to keep. - ViterbiStateEntry_IT vit2(&parent_node->viterbi_state_entries); - for (vit2.mark_cycle_pt(); !vit2.cycled_list() && - vit2.data()->curr_b->unichar_id() != other_case; - vit2.forward()) {} - if (!vit2.cycled_list()) { - vse->competing_vse = vit2.data(); - } - } - } -} - -/** - * Helper returns true if the given choice has a better case variant before - * it in the choice_list that is not distinguishable by size. - */ -static bool HasBetterCaseVariant(const UNICHARSET& unicharset, - const BLOB_CHOICE* choice, - BLOB_CHOICE_LIST* choices) { - UNICHAR_ID choice_id = choice->unichar_id(); - UNICHAR_ID other_case = unicharset.get_other_case(choice_id); - if (other_case == choice_id || other_case == INVALID_UNICHAR_ID) - return false; // Not upper or lower or not in unicharset. - if (unicharset.SizesDistinct(choice_id, other_case)) - return false; // Can be separated by size. - BLOB_CHOICE_IT bc_it(choices); - for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { - BLOB_CHOICE* better_choice = bc_it.data(); - if (better_choice->unichar_id() == other_case) - return true; // Found an earlier instance of other_case. - else if (better_choice == choice) - return false; // Reached the original choice. - } - return false; // Should never happen, but just in case. -} - -/** - * UpdateState has the job of combining the ViterbiStateEntry lists on each - * of the choices on parent_list with each of the blob choices in curr_list, - * making a new ViterbiStateEntry for each sensible path. - * - * This could be a huge set of combinations, creating a lot of work only to - * be truncated by some beam limit, but only certain kinds of paths will - * continue at the next step: - * - paths that are liked by the language model: either a DAWG or the n-gram - * model, where active. - * - paths that represent some kind of top choice. The old permuter permuted - * the top raw classifier score, the top upper case word and the top lower- - * case word. UpdateState now concentrates its top-choice paths on top - * lower-case, top upper-case (or caseless alpha), and top digit sequence, - * with allowance for continuation of these paths through blobs where such - * a character does not appear in the choices list. - * - * GetNextParentVSE enforces some of these models to minimize the number of - * calls to AddViterbiStateEntry, even prior to looking at the language model. - * Thus an n-blob sequence of [l1I] will produce 3n calls to - * AddViterbiStateEntry instead of 3^n. - * - * Of course it isn't quite that simple as Title Case is handled by allowing - * lower case to continue an upper case initial, but it has to be detected - * in the combiner so it knows which upper case letters are initial alphas. - */ -bool LanguageModel::UpdateState( - bool just_classified, - int curr_col, int curr_row, - BLOB_CHOICE_LIST *curr_list, - LanguageModelState *parent_node, - LMPainPoints *pain_points, - WERD_RES *word_res, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle) { - if (language_model_debug_level > 0) { - tprintf("\nUpdateState: col=%d row=%d %s", - curr_col, curr_row, just_classified ? "just_classified" : ""); - if (language_model_debug_level > 5) - tprintf("(parent=%p)\n", parent_node); - else - tprintf("\n"); - } - // Initialize helper variables. - bool word_end = (curr_row+1 >= word_res->ratings->dimension()); - bool new_changed = false; - float denom = (language_model_ngram_on) ? ComputeDenom(curr_list) : 1.0f; - const UNICHARSET& unicharset = dict_->getUnicharset(); - BLOB_CHOICE *first_lower = nullptr; - BLOB_CHOICE *first_upper = nullptr; - BLOB_CHOICE *first_digit = nullptr; - bool has_alnum_mix = false; - if (parent_node != nullptr) { - int result = SetTopParentLowerUpperDigit(parent_node); - if (result < 0) { - if (language_model_debug_level > 0) - tprintf("No parents found to process\n"); - return false; - } - if (result > 0) - has_alnum_mix = true; - } - if (!GetTopLowerUpperDigit(curr_list, &first_lower, &first_upper, - &first_digit)) - has_alnum_mix = false;; - ScanParentsForCaseMix(unicharset, parent_node); - if (language_model_debug_level > 3 && parent_node != nullptr) { - parent_node->Print("Parent viterbi list"); - } - LanguageModelState *curr_state = best_choice_bundle->beam[curr_row]; - - // Call AddViterbiStateEntry() for each parent+child ViterbiStateEntry. - ViterbiStateEntry_IT vit; - BLOB_CHOICE_IT c_it(curr_list); - for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { - BLOB_CHOICE* choice = c_it.data(); - // TODO(antonova): make sure commenting this out if ok for ngram - // model scoring (I think this was introduced to fix ngram model quirks). - // Skip nullptr unichars unless it is the only choice. - //if (!curr_list->singleton() && c_it.data()->unichar_id() == 0) continue; - UNICHAR_ID unichar_id = choice->unichar_id(); - if (unicharset.get_fragment(unichar_id)) { - continue; // Skip fragments. - } - // Set top choice flags. - LanguageModelFlagsType blob_choice_flags = kXhtConsistentFlag; - if (c_it.at_first() || !new_changed) - blob_choice_flags |= kSmallestRatingFlag; - if (first_lower == choice) blob_choice_flags |= kLowerCaseFlag; - if (first_upper == choice) blob_choice_flags |= kUpperCaseFlag; - if (first_digit == choice) blob_choice_flags |= kDigitFlag; - - if (parent_node == nullptr) { - // Process the beginning of a word. - // If there is a better case variant that is not distinguished by size, - // skip this blob choice, as we have no choice but to accept the result - // of the character classifier to distinguish between them, even if - // followed by an upper case. - // With words like iPoc, and other CamelBackWords, the lower-upper - // transition can only be achieved if the classifier has the correct case - // as the top choice, and leaving an initial I lower down the list - // increases the chances of choosing IPoc simply because it doesn't - // include such a transition. iPoc will beat iPOC and ipoc because - // the other words are baseline/x-height inconsistent. - if (HasBetterCaseVariant(unicharset, choice, curr_list)) - continue; - // Upper counts as lower at the beginning of a word. - if (blob_choice_flags & kUpperCaseFlag) - blob_choice_flags |= kLowerCaseFlag; - new_changed |= AddViterbiStateEntry( - blob_choice_flags, denom, word_end, curr_col, curr_row, - choice, curr_state, nullptr, pain_points, - word_res, best_choice_bundle, blamer_bundle); - } else { - // Get viterbi entries from each parent ViterbiStateEntry. - vit.set_to_list(&parent_node->viterbi_state_entries); - int vit_counter = 0; - vit.mark_cycle_pt(); - ViterbiStateEntry* parent_vse = nullptr; - LanguageModelFlagsType top_choice_flags; - while ((parent_vse = GetNextParentVSE(just_classified, has_alnum_mix, - c_it.data(), blob_choice_flags, - unicharset, word_res, &vit, - &top_choice_flags)) != nullptr) { - // Skip pruned entries and do not look at prunable entries if already - // examined language_model_viterbi_list_max_num_prunable of those. - if (PrunablePath(*parent_vse) && - (++vit_counter > language_model_viterbi_list_max_num_prunable || - (language_model_ngram_on && parent_vse->ngram_info->pruned))) { - continue; - } - // If the parent has no alnum choice, (ie choice is the first in a - // string of alnum), and there is a better case variant that is not - // distinguished by size, skip this blob choice/parent, as with the - // initial blob treatment above. - if (!parent_vse->HasAlnumChoice(unicharset) && - HasBetterCaseVariant(unicharset, choice, curr_list)) - continue; - // Create a new ViterbiStateEntry if BLOB_CHOICE in c_it.data() - // looks good according to the Dawgs or character ngram model. - new_changed |= AddViterbiStateEntry( - top_choice_flags, denom, word_end, curr_col, curr_row, - c_it.data(), curr_state, parent_vse, pain_points, - word_res, best_choice_bundle, blamer_bundle); - } - } - } - return new_changed; -} - -/** - * Finds the first lower and upper case letter and first digit in curr_list. - * For non-upper/lower languages, alpha counts as upper. - * Uses the first character in the list in place of empty results. - * Returns true if both alpha and digits are found. - */ -bool LanguageModel::GetTopLowerUpperDigit(BLOB_CHOICE_LIST *curr_list, - BLOB_CHOICE **first_lower, - BLOB_CHOICE **first_upper, - BLOB_CHOICE **first_digit) const { - BLOB_CHOICE_IT c_it(curr_list); - const UNICHARSET &unicharset = dict_->getUnicharset(); - BLOB_CHOICE *first_unichar = nullptr; - for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { - UNICHAR_ID unichar_id = c_it.data()->unichar_id(); - if (unicharset.get_fragment(unichar_id)) continue; // skip fragments - if (first_unichar == nullptr) first_unichar = c_it.data(); - if (*first_lower == nullptr && unicharset.get_islower(unichar_id)) { - *first_lower = c_it.data(); - } - if (*first_upper == nullptr && unicharset.get_isalpha(unichar_id) && - !unicharset.get_islower(unichar_id)) { - *first_upper = c_it.data(); - } - if (*first_digit == nullptr && unicharset.get_isdigit(unichar_id)) { - *first_digit = c_it.data(); - } - } - ASSERT_HOST(first_unichar != nullptr); - bool mixed = (*first_lower != nullptr || *first_upper != nullptr) && - *first_digit != nullptr; - if (*first_lower == nullptr) *first_lower = first_unichar; - if (*first_upper == nullptr) *first_upper = first_unichar; - if (*first_digit == nullptr) *first_digit = first_unichar; - return mixed; -} - -/** - * Forces there to be at least one entry in the overall set of the - * viterbi_state_entries of each element of parent_node that has the - * top_choice_flag set for lower, upper and digit using the same rules as - * GetTopLowerUpperDigit, setting the flag on the first found suitable - * candidate, whether or not the flag is set on some other parent. - * Returns 1 if both alpha and digits are found among the parents, -1 if no - * parents are found at all (a legitimate case), and 0 otherwise. - */ -int LanguageModel::SetTopParentLowerUpperDigit( - LanguageModelState *parent_node) const { - if (parent_node == nullptr) return -1; - UNICHAR_ID top_id = INVALID_UNICHAR_ID; - ViterbiStateEntry* top_lower = nullptr; - ViterbiStateEntry* top_upper = nullptr; - ViterbiStateEntry* top_digit = nullptr; - ViterbiStateEntry* top_choice = nullptr; - float lower_rating = 0.0f; - float upper_rating = 0.0f; - float digit_rating = 0.0f; - float top_rating = 0.0f; - const UNICHARSET &unicharset = dict_->getUnicharset(); - ViterbiStateEntry_IT vit(&parent_node->viterbi_state_entries); - for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { - ViterbiStateEntry* vse = vit.data(); - // INVALID_UNICHAR_ID should be treated like a zero-width joiner, so scan - // back to the real character if needed. - ViterbiStateEntry* unichar_vse = vse; - UNICHAR_ID unichar_id = unichar_vse->curr_b->unichar_id(); - float rating = unichar_vse->curr_b->rating(); - while (unichar_id == INVALID_UNICHAR_ID && - unichar_vse->parent_vse != nullptr) { - unichar_vse = unichar_vse->parent_vse; - unichar_id = unichar_vse->curr_b->unichar_id(); - rating = unichar_vse->curr_b->rating(); - } - if (unichar_id != INVALID_UNICHAR_ID) { - if (unicharset.get_islower(unichar_id)) { - if (top_lower == nullptr || lower_rating > rating) { - top_lower = vse; - lower_rating = rating; - } - } else if (unicharset.get_isalpha(unichar_id)) { - if (top_upper == nullptr || upper_rating > rating) { - top_upper = vse; - upper_rating = rating; - } - } else if (unicharset.get_isdigit(unichar_id)) { - if (top_digit == nullptr || digit_rating > rating) { - top_digit = vse; - digit_rating = rating; - } - } - } - if (top_choice == nullptr || top_rating > rating) { - top_choice = vse; - top_rating = rating; - top_id = unichar_id; - } - } - if (top_choice == nullptr) return -1; - bool mixed = (top_lower != nullptr || top_upper != nullptr) && - top_digit != nullptr; - if (top_lower == nullptr) top_lower = top_choice; - top_lower->top_choice_flags |= kLowerCaseFlag; - if (top_upper == nullptr) top_upper = top_choice; - top_upper->top_choice_flags |= kUpperCaseFlag; - if (top_digit == nullptr) top_digit = top_choice; - top_digit->top_choice_flags |= kDigitFlag; - top_choice->top_choice_flags |= kSmallestRatingFlag; - if (top_id != INVALID_UNICHAR_ID && dict_->compound_marker(top_id) && - (top_choice->top_choice_flags & - (kLowerCaseFlag | kUpperCaseFlag | kDigitFlag))) { - // If the compound marker top choice carries any of the top alnum flags, - // then give it all of them, allowing words like I-295 to be chosen. - top_choice->top_choice_flags |= - kLowerCaseFlag | kUpperCaseFlag | kDigitFlag; - } - return mixed ? 1 : 0; -} - -/** - * Finds the next ViterbiStateEntry with which the given unichar_id can - * combine sensibly, taking into account any mixed alnum/mixed case - * situation, and whether this combination has been inspected before. - */ -ViterbiStateEntry* LanguageModel::GetNextParentVSE( - bool just_classified, bool mixed_alnum, const BLOB_CHOICE* bc, - LanguageModelFlagsType blob_choice_flags, const UNICHARSET& unicharset, - WERD_RES* word_res, ViterbiStateEntry_IT* vse_it, - LanguageModelFlagsType* top_choice_flags) const { - for (; !vse_it->cycled_list(); vse_it->forward()) { - ViterbiStateEntry* parent_vse = vse_it->data(); - // Only consider the parent if it has been updated or - // if the current ratings cell has just been classified. - if (!just_classified && !parent_vse->updated) continue; - if (language_model_debug_level > 2) - parent_vse->Print("Considering"); - // If the parent is non-alnum, then upper counts as lower. - *top_choice_flags = blob_choice_flags; - if ((blob_choice_flags & kUpperCaseFlag) && - !parent_vse->HasAlnumChoice(unicharset)) { - *top_choice_flags |= kLowerCaseFlag; - } - *top_choice_flags &= parent_vse->top_choice_flags; - UNICHAR_ID unichar_id = bc->unichar_id(); - const BLOB_CHOICE* parent_b = parent_vse->curr_b; - UNICHAR_ID parent_id = parent_b->unichar_id(); - // Digits do not bind to alphas if there is a mix in both parent and current - // or if the alpha is not the top choice. - if (unicharset.get_isdigit(unichar_id) && - unicharset.get_isalpha(parent_id) && - (mixed_alnum || *top_choice_flags == 0)) - continue; // Digits don't bind to alphas. - // Likewise alphas do not bind to digits if there is a mix in both or if - // the digit is not the top choice. - if (unicharset.get_isalpha(unichar_id) && - unicharset.get_isdigit(parent_id) && - (mixed_alnum || *top_choice_flags == 0)) - continue; // Alphas don't bind to digits. - // If there is a case mix of the same alpha in the parent list, then - // competing_vse is non-null and will be used to determine whether - // or not to bind the current blob choice. - if (parent_vse->competing_vse != nullptr) { - const BLOB_CHOICE* competing_b = parent_vse->competing_vse->curr_b; - UNICHAR_ID other_id = competing_b->unichar_id(); - if (language_model_debug_level >= 5) { - tprintf("Parent %s has competition %s\n", - unicharset.id_to_unichar(parent_id), - unicharset.id_to_unichar(other_id)); - } - if (unicharset.SizesDistinct(parent_id, other_id)) { - // If other_id matches bc wrt position and size, and parent_id, doesn't, - // don't bind to the current parent. - if (bc->PosAndSizeAgree(*competing_b, word_res->x_height, - language_model_debug_level >= 5) && - !bc->PosAndSizeAgree(*parent_b, word_res->x_height, - language_model_debug_level >= 5)) - continue; // Competing blobchoice has a better vertical match. - } - } - vse_it->forward(); - return parent_vse; // This one is good! - } - return nullptr; // Ran out of possibilities. -} - -bool LanguageModel::AddViterbiStateEntry( - LanguageModelFlagsType top_choice_flags, - float denom, - bool word_end, - int curr_col, int curr_row, - BLOB_CHOICE *b, - LanguageModelState *curr_state, - ViterbiStateEntry *parent_vse, - LMPainPoints *pain_points, - WERD_RES *word_res, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle) { - ViterbiStateEntry_IT vit; - if (language_model_debug_level > 1) { - tprintf("AddViterbiStateEntry for unichar %s rating=%.4f" - " certainty=%.4f top_choice_flags=0x%x", - dict_->getUnicharset().id_to_unichar(b->unichar_id()), - b->rating(), b->certainty(), top_choice_flags); - if (language_model_debug_level > 5) - tprintf(" parent_vse=%p\n", parent_vse); - else - tprintf("\n"); - } - ASSERT_HOST(curr_state != nullptr); - // Check whether the list is full. - if (curr_state->viterbi_state_entries_length >= - language_model_viterbi_list_max_size) { - if (language_model_debug_level > 1) { - tprintf("AddViterbiStateEntry: viterbi list is full!\n"); - } - return false; - } - - // Invoke Dawg language model component. - LanguageModelDawgInfo *dawg_info = - GenerateDawgInfo(word_end, curr_col, curr_row, *b, parent_vse); - - float outline_length = - AssociateUtils::ComputeOutlineLength(rating_cert_scale_, *b); - // Invoke Ngram language model component. - LanguageModelNgramInfo *ngram_info = nullptr; - if (language_model_ngram_on) { - ngram_info = GenerateNgramInfo( - dict_->getUnicharset().id_to_unichar(b->unichar_id()), b->certainty(), - denom, curr_col, curr_row, outline_length, parent_vse); - ASSERT_HOST(ngram_info != nullptr); - } - bool liked_by_language_model = dawg_info != nullptr || - (ngram_info != nullptr && !ngram_info->pruned); - // Quick escape if not liked by the language model, can't be consistent - // xheight, and not top choice. - if (!liked_by_language_model && top_choice_flags == 0) { - if (language_model_debug_level > 1) { - tprintf("Language model components very early pruned this entry\n"); - } - delete ngram_info; - delete dawg_info; - return false; - } - - // Check consistency of the path and set the relevant consistency_info. - LMConsistencyInfo consistency_info( - parent_vse != nullptr ? &parent_vse->consistency_info : nullptr); - // Start with just the x-height consistency, as it provides significant - // pruning opportunity. - consistency_info.ComputeXheightConsistency( - b, dict_->getUnicharset().get_ispunctuation(b->unichar_id())); - // Turn off xheight consistent flag if not consistent. - if (consistency_info.InconsistentXHeight()) { - top_choice_flags &= ~kXhtConsistentFlag; - } - - // Quick escape if not liked by the language model, not consistent xheight, - // and not top choice. - if (!liked_by_language_model && top_choice_flags == 0) { - if (language_model_debug_level > 1) { - tprintf("Language model components early pruned this entry\n"); - } - delete ngram_info; - delete dawg_info; - return false; - } - - // Compute the rest of the consistency info. - FillConsistencyInfo(curr_col, word_end, b, parent_vse, - word_res, &consistency_info); - if (dawg_info != nullptr && consistency_info.invalid_punc) { - consistency_info.invalid_punc = false; // do not penalize dict words - } - - // Compute cost of associating the blobs that represent the current unichar. - AssociateStats associate_stats; - ComputeAssociateStats(curr_col, curr_row, max_char_wh_ratio_, - parent_vse, word_res, &associate_stats); - if (parent_vse != nullptr) { - associate_stats.shape_cost += parent_vse->associate_stats.shape_cost; - associate_stats.bad_shape |= parent_vse->associate_stats.bad_shape; - } - - // Create the new ViterbiStateEntry compute the adjusted cost of the path. - ViterbiStateEntry *new_vse = new ViterbiStateEntry( - parent_vse, b, 0.0, outline_length, - consistency_info, associate_stats, top_choice_flags, dawg_info, - ngram_info, (language_model_debug_level > 0) ? - dict_->getUnicharset().id_to_unichar(b->unichar_id()) : nullptr); - new_vse->cost = ComputeAdjustedPathCost(new_vse); - if (language_model_debug_level >= 3) - tprintf("Adjusted cost = %g\n", new_vse->cost); - - // Invoke Top Choice language model component to make the final adjustments - // to new_vse->top_choice_flags. - if (!curr_state->viterbi_state_entries.empty() && new_vse->top_choice_flags) { - GenerateTopChoiceInfo(new_vse, parent_vse, curr_state); - } - - // If language model components did not like this unichar - return. - bool keep = new_vse->top_choice_flags || liked_by_language_model; - if (!(top_choice_flags & kSmallestRatingFlag) && // no non-top choice paths - consistency_info.inconsistent_script) { // with inconsistent script - keep = false; - } - if (!keep) { - if (language_model_debug_level > 1) { - tprintf("Language model components did not like this entry\n"); - } - delete new_vse; - return false; - } - - // Discard this entry if it represents a prunable path and - // language_model_viterbi_list_max_num_prunable such entries with a lower - // cost have already been recorded. - if (PrunablePath(*new_vse) && - (curr_state->viterbi_state_entries_prunable_length >= - language_model_viterbi_list_max_num_prunable) && - new_vse->cost >= curr_state->viterbi_state_entries_prunable_max_cost) { - if (language_model_debug_level > 1) { - tprintf("Discarded ViterbiEntry with high cost %g max cost %g\n", - new_vse->cost, - curr_state->viterbi_state_entries_prunable_max_cost); - } - delete new_vse; - return false; - } - - // Update best choice if needed. - if (word_end) { - UpdateBestChoice(new_vse, pain_points, word_res, - best_choice_bundle, blamer_bundle); - // Discard the entry if UpdateBestChoice() found flaws in it. - if (new_vse->cost >= WERD_CHOICE::kBadRating && - new_vse != best_choice_bundle->best_vse) { - if (language_model_debug_level > 1) { - tprintf("Discarded ViterbiEntry with high cost %g\n", new_vse->cost); - } - delete new_vse; - return false; - } - } - - // Add the new ViterbiStateEntry and to curr_state->viterbi_state_entries. - curr_state->viterbi_state_entries.add_sorted(ViterbiStateEntry::Compare, - false, new_vse); - curr_state->viterbi_state_entries_length++; - if (PrunablePath(*new_vse)) { - curr_state->viterbi_state_entries_prunable_length++; - } - - // Update lms->viterbi_state_entries_prunable_max_cost and clear - // top_choice_flags of entries with ratings_sum than new_vse->ratings_sum. - if ((curr_state->viterbi_state_entries_prunable_length >= - language_model_viterbi_list_max_num_prunable) || - new_vse->top_choice_flags) { - ASSERT_HOST(!curr_state->viterbi_state_entries.empty()); - int prunable_counter = language_model_viterbi_list_max_num_prunable; - vit.set_to_list(&(curr_state->viterbi_state_entries)); - for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { - ViterbiStateEntry *curr_vse = vit.data(); - // Clear the appropriate top choice flags of the entries in the - // list that have cost higher thank new_entry->cost - // (since they will not be top choices any more). - if (curr_vse->top_choice_flags && curr_vse != new_vse && - curr_vse->cost > new_vse->cost) { - curr_vse->top_choice_flags &= ~(new_vse->top_choice_flags); - } - if (prunable_counter > 0 && PrunablePath(*curr_vse)) --prunable_counter; - // Update curr_state->viterbi_state_entries_prunable_max_cost. - if (prunable_counter == 0) { - curr_state->viterbi_state_entries_prunable_max_cost = vit.data()->cost; - if (language_model_debug_level > 1) { - tprintf("Set viterbi_state_entries_prunable_max_cost to %g\n", - curr_state->viterbi_state_entries_prunable_max_cost); - } - prunable_counter = -1; // stop counting - } - } - } - - // Print the newly created ViterbiStateEntry. - if (language_model_debug_level > 2) { - new_vse->Print("New"); - if (language_model_debug_level > 5) - curr_state->Print("Updated viterbi list"); - } - - return true; -} - -void LanguageModel::GenerateTopChoiceInfo(ViterbiStateEntry *new_vse, - const ViterbiStateEntry *parent_vse, - LanguageModelState *lms) { - ViterbiStateEntry_IT vit(&(lms->viterbi_state_entries)); - for (vit.mark_cycle_pt(); !vit.cycled_list() && new_vse->top_choice_flags && - new_vse->cost >= vit.data()->cost; vit.forward()) { - // Clear the appropriate flags if the list already contains - // a top choice entry with a lower cost. - new_vse->top_choice_flags &= ~(vit.data()->top_choice_flags); - } - if (language_model_debug_level > 2) { - tprintf("GenerateTopChoiceInfo: top_choice_flags=0x%x\n", - new_vse->top_choice_flags); - } -} - -LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo( - bool word_end, - int curr_col, int curr_row, - const BLOB_CHOICE &b, - const ViterbiStateEntry *parent_vse) { - // Initialize active_dawgs from parent_vse if it is not nullptr. - // Otherwise use very_beginning_active_dawgs_. - if (parent_vse == nullptr) { - dawg_args_.active_dawgs = &very_beginning_active_dawgs_; - dawg_args_.permuter = NO_PERM; - } else { - if (parent_vse->dawg_info == nullptr) return nullptr; // not a dict word path - dawg_args_.active_dawgs = &parent_vse->dawg_info->active_dawgs; - dawg_args_.permuter = parent_vse->dawg_info->permuter; - } - - // Deal with hyphenated words. - if (word_end && dict_->has_hyphen_end(b.unichar_id(), curr_col == 0)) { - if (language_model_debug_level > 0) tprintf("Hyphenated word found\n"); - return new LanguageModelDawgInfo(dawg_args_.active_dawgs, COMPOUND_PERM); - } - - // Deal with compound words. - if (dict_->compound_marker(b.unichar_id()) && - (parent_vse == nullptr || parent_vse->dawg_info->permuter != NUMBER_PERM)) { - if (language_model_debug_level > 0) tprintf("Found compound marker\n"); - // Do not allow compound operators at the beginning and end of the word. - // Do not allow more than one compound operator per word. - // Do not allow compounding of words with lengths shorter than - // language_model_min_compound_length - if (parent_vse == nullptr || word_end || - dawg_args_.permuter == COMPOUND_PERM || - parent_vse->length < language_model_min_compound_length) - return nullptr; - - int i; - // Check a that the path terminated before the current character is a word. - bool has_word_ending = false; - for (i = 0; i < parent_vse->dawg_info->active_dawgs.size(); ++i) { - const DawgPosition &pos = parent_vse->dawg_info->active_dawgs[i]; - const Dawg *pdawg = pos.dawg_index < 0 - ? nullptr : dict_->GetDawg(pos.dawg_index); - if (pdawg == nullptr || pos.back_to_punc) continue;; - if (pdawg->type() == DAWG_TYPE_WORD && pos.dawg_ref != NO_EDGE && - pdawg->end_of_word(pos.dawg_ref)) { - has_word_ending = true; - break; - } - } - if (!has_word_ending) return nullptr; - - if (language_model_debug_level > 0) tprintf("Compound word found\n"); - return new LanguageModelDawgInfo(&beginning_active_dawgs_, COMPOUND_PERM); - } // done dealing with compound words - - LanguageModelDawgInfo *dawg_info = nullptr; - - // Call LetterIsOkay(). - // Use the normalized IDs so that all shapes of ' can be allowed in words - // like don't. - const GenericVector& normed_ids = - dict_->getUnicharset().normed_ids(b.unichar_id()); - DawgPositionVector tmp_active_dawgs; - for (int i = 0; i < normed_ids.size(); ++i) { - if (language_model_debug_level > 2) - tprintf("Test Letter OK for unichar %d, normed %d\n", - b.unichar_id(), normed_ids[i]); - dict_->LetterIsOkay(&dawg_args_, dict_->getUnicharset(), normed_ids[i], - word_end && i == normed_ids.size() - 1); - if (dawg_args_.permuter == NO_PERM) { - break; - } else if (i < normed_ids.size() - 1) { - tmp_active_dawgs = *dawg_args_.updated_dawgs; - dawg_args_.active_dawgs = &tmp_active_dawgs; - } - if (language_model_debug_level > 2) - tprintf("Letter was OK for unichar %d, normed %d\n", - b.unichar_id(), normed_ids[i]); - } - dawg_args_.active_dawgs = nullptr; - if (dawg_args_.permuter != NO_PERM) { - dawg_info = new LanguageModelDawgInfo(dawg_args_.updated_dawgs, - dawg_args_.permuter); - } else if (language_model_debug_level > 3) { - tprintf("Letter %s not OK!\n", - dict_->getUnicharset().id_to_unichar(b.unichar_id())); - } - - return dawg_info; -} - -LanguageModelNgramInfo *LanguageModel::GenerateNgramInfo( - const char *unichar, float certainty, float denom, - int curr_col, int curr_row, float outline_length, - const ViterbiStateEntry *parent_vse) { - // Initialize parent context. - const char *pcontext_ptr = ""; - int pcontext_unichar_step_len = 0; - if (parent_vse == nullptr) { - pcontext_ptr = prev_word_str_.string(); - pcontext_unichar_step_len = prev_word_unichar_step_len_; - } else { - pcontext_ptr = parent_vse->ngram_info->context.string(); - pcontext_unichar_step_len = - parent_vse->ngram_info->context_unichar_step_len; - } - // Compute p(unichar | parent context). - int unichar_step_len = 0; - bool pruned = false; - float ngram_cost; - float ngram_and_classifier_cost = - ComputeNgramCost(unichar, certainty, denom, - pcontext_ptr, &unichar_step_len, - &pruned, &ngram_cost); - // Normalize just the ngram_and_classifier_cost by outline_length. - // The ngram_cost is used by the params_model, so it needs to be left as-is, - // and the params model cost will be normalized by outline_length. - ngram_and_classifier_cost *= - outline_length / language_model_ngram_rating_factor; - // Add the ngram_cost of the parent. - if (parent_vse != nullptr) { - ngram_and_classifier_cost += - parent_vse->ngram_info->ngram_and_classifier_cost; - ngram_cost += parent_vse->ngram_info->ngram_cost; - } - - // Shorten parent context string by unichar_step_len unichars. - int num_remove = (unichar_step_len + pcontext_unichar_step_len - - language_model_ngram_order); - if (num_remove > 0) pcontext_unichar_step_len -= num_remove; - while (num_remove > 0 && *pcontext_ptr != '\0') { - pcontext_ptr += UNICHAR::utf8_step(pcontext_ptr); - --num_remove; - } - - // Decide whether to prune this ngram path and update changed accordingly. - if (parent_vse != nullptr && parent_vse->ngram_info->pruned) pruned = true; - - // Construct and return the new LanguageModelNgramInfo. - LanguageModelNgramInfo *ngram_info = new LanguageModelNgramInfo( - pcontext_ptr, pcontext_unichar_step_len, pruned, ngram_cost, - ngram_and_classifier_cost); - ngram_info->context += unichar; - ngram_info->context_unichar_step_len += unichar_step_len; - assert(ngram_info->context_unichar_step_len <= language_model_ngram_order); - return ngram_info; -} - -float LanguageModel::ComputeNgramCost(const char *unichar, - float certainty, - float denom, - const char *context, - int *unichar_step_len, - bool *found_small_prob, - float *ngram_cost) { - const char *context_ptr = context; - char *modified_context = nullptr; - char *modified_context_end = nullptr; - const char *unichar_ptr = unichar; - const char *unichar_end = unichar_ptr + strlen(unichar_ptr); - float prob = 0.0f; - int step = 0; - while (unichar_ptr < unichar_end && - (step = UNICHAR::utf8_step(unichar_ptr)) > 0) { - if (language_model_debug_level > 1) { - tprintf("prob(%s | %s)=%g\n", unichar_ptr, context_ptr, - dict_->ProbabilityInContext(context_ptr, -1, unichar_ptr, step)); - } - prob += dict_->ProbabilityInContext(context_ptr, -1, unichar_ptr, step); - ++(*unichar_step_len); - if (language_model_ngram_use_only_first_uft8_step) break; - unichar_ptr += step; - // If there are multiple UTF8 characters present in unichar, context is - // updated to include the previously examined characters from str, - // unless use_only_first_uft8_step is true. - if (unichar_ptr < unichar_end) { - if (modified_context == nullptr) { - size_t context_len = strlen(context); - modified_context = - new char[context_len + strlen(unichar_ptr) + step + 1]; - memcpy(modified_context, context, context_len); - modified_context_end = modified_context + context_len; - context_ptr = modified_context; - } - strncpy(modified_context_end, unichar_ptr - step, step); - modified_context_end += step; - *modified_context_end = '\0'; - } - } - prob /= static_cast(*unichar_step_len); // normalize - if (prob < language_model_ngram_small_prob) { - if (language_model_debug_level > 0) tprintf("Found small prob %g\n", prob); - *found_small_prob = true; - prob = language_model_ngram_small_prob; - } - *ngram_cost = -1.0*log2(prob); - float ngram_and_classifier_cost = - -1.0*log2(CertaintyScore(certainty)/denom) + - *ngram_cost * language_model_ngram_scale_factor; - if (language_model_debug_level > 1) { - tprintf("-log [ p(%s) * p(%s | %s) ] = -log2(%g*%g) = %g\n", unichar, - unichar, context_ptr, CertaintyScore(certainty)/denom, prob, - ngram_and_classifier_cost); - } - delete[] modified_context; - return ngram_and_classifier_cost; -} - -float LanguageModel::ComputeDenom(BLOB_CHOICE_LIST *curr_list) { - if (curr_list->empty()) return 1.0f; - float denom = 0.0f; - int len = 0; - BLOB_CHOICE_IT c_it(curr_list); - for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { - ASSERT_HOST(c_it.data() != nullptr); - ++len; - denom += CertaintyScore(c_it.data()->certainty()); - } - assert(len != 0); - // The ideal situation would be to have the classifier scores for - // classifying each position as each of the characters in the unicharset. - // Since we can not do this because of speed, we add a very crude estimate - // of what these scores for the "missing" classifications would sum up to. - denom += (dict_->getUnicharset().size() - len) * - CertaintyScore(language_model_ngram_nonmatch_score); - - return denom; -} - -void LanguageModel::FillConsistencyInfo( - int curr_col, - bool word_end, - BLOB_CHOICE *b, - ViterbiStateEntry *parent_vse, - WERD_RES *word_res, - LMConsistencyInfo *consistency_info) { - const UNICHARSET &unicharset = dict_->getUnicharset(); - UNICHAR_ID unichar_id = b->unichar_id(); - BLOB_CHOICE* parent_b = parent_vse != nullptr ? parent_vse->curr_b : nullptr; - - // Check punctuation validity. - if (unicharset.get_ispunctuation(unichar_id)) consistency_info->num_punc++; - if (dict_->GetPuncDawg() != nullptr && !consistency_info->invalid_punc) { - if (dict_->compound_marker(unichar_id) && parent_b != nullptr && - (unicharset.get_isalpha(parent_b->unichar_id()) || - unicharset.get_isdigit(parent_b->unichar_id()))) { - // reset punc_ref for compound words - consistency_info->punc_ref = NO_EDGE; - } else { - bool is_apos = dict_->is_apostrophe(unichar_id); - bool prev_is_numalpha = (parent_b != nullptr && - (unicharset.get_isalpha(parent_b->unichar_id()) || - unicharset.get_isdigit(parent_b->unichar_id()))); - UNICHAR_ID pattern_unichar_id = - (unicharset.get_isalpha(unichar_id) || - unicharset.get_isdigit(unichar_id) || - (is_apos && prev_is_numalpha)) ? - Dawg::kPatternUnicharID : unichar_id; - if (consistency_info->punc_ref == NO_EDGE || - pattern_unichar_id != Dawg::kPatternUnicharID || - dict_->GetPuncDawg()->edge_letter(consistency_info->punc_ref) != - Dawg::kPatternUnicharID) { - NODE_REF node = Dict::GetStartingNode(dict_->GetPuncDawg(), - consistency_info->punc_ref); - consistency_info->punc_ref = - (node != NO_EDGE) ? dict_->GetPuncDawg()->edge_char_of( - node, pattern_unichar_id, word_end) : NO_EDGE; - if (consistency_info->punc_ref == NO_EDGE) { - consistency_info->invalid_punc = true; - } - } - } - } - - // Update case related counters. - if (parent_vse != nullptr && !word_end && dict_->compound_marker(unichar_id)) { - // Reset counters if we are dealing with a compound word. - consistency_info->num_lower = 0; - consistency_info->num_non_first_upper = 0; - } - else if (unicharset.get_islower(unichar_id)) { - consistency_info->num_lower++; - } else if ((parent_b != nullptr) && unicharset.get_isupper(unichar_id)) { - if (unicharset.get_isupper(parent_b->unichar_id()) || - consistency_info->num_lower > 0 || - consistency_info->num_non_first_upper > 0) { - consistency_info->num_non_first_upper++; - } - } - - // Initialize consistency_info->script_id (use script of unichar_id - // if it is not Common, use script id recorded by the parent otherwise). - // Set inconsistent_script to true if the script of the current unichar - // is not consistent with that of the parent. - consistency_info->script_id = unicharset.get_script(unichar_id); - // Hiragana and Katakana can mix with Han. - if (dict_->getUnicharset().han_sid() != dict_->getUnicharset().null_sid()) { - if ((unicharset.hiragana_sid() != unicharset.null_sid() && - consistency_info->script_id == unicharset.hiragana_sid()) || - (unicharset.katakana_sid() != unicharset.null_sid() && - consistency_info->script_id == unicharset.katakana_sid())) { - consistency_info->script_id = dict_->getUnicharset().han_sid(); - } - } - - if (parent_vse != nullptr && - (parent_vse->consistency_info.script_id != - dict_->getUnicharset().common_sid())) { - int parent_script_id = parent_vse->consistency_info.script_id; - // If script_id is Common, use script id of the parent instead. - if (consistency_info->script_id == dict_->getUnicharset().common_sid()) { - consistency_info->script_id = parent_script_id; - } - if (consistency_info->script_id != parent_script_id) { - consistency_info->inconsistent_script = true; - } - } - - // Update chartype related counters. - if (unicharset.get_isalpha(unichar_id)) { - consistency_info->num_alphas++; - } else if (unicharset.get_isdigit(unichar_id)) { - consistency_info->num_digits++; - } else if (!unicharset.get_ispunctuation(unichar_id)) { - consistency_info->num_other++; - } - - // Check font and spacing consistency. - if (fontinfo_table_->size() > 0 && parent_b != nullptr) { - int fontinfo_id = -1; - if (parent_b->fontinfo_id() == b->fontinfo_id() || - parent_b->fontinfo_id2() == b->fontinfo_id()) { - fontinfo_id = b->fontinfo_id(); - } else if (parent_b->fontinfo_id() == b->fontinfo_id2() || - parent_b->fontinfo_id2() == b->fontinfo_id2()) { - fontinfo_id = b->fontinfo_id2(); - } - if(language_model_debug_level > 1) { - tprintf("pfont %s pfont %s font %s font2 %s common %s(%d)\n", - (parent_b->fontinfo_id() >= 0) ? - fontinfo_table_->get(parent_b->fontinfo_id()).name : "" , - (parent_b->fontinfo_id2() >= 0) ? - fontinfo_table_->get(parent_b->fontinfo_id2()).name : "", - (b->fontinfo_id() >= 0) ? - fontinfo_table_->get(b->fontinfo_id()).name : "", - (fontinfo_id >= 0) ? fontinfo_table_->get(fontinfo_id).name : "", - (fontinfo_id >= 0) ? fontinfo_table_->get(fontinfo_id).name : "", - fontinfo_id); - } - if (!word_res->blob_widths.empty()) { // if we have widths/gaps info - bool expected_gap_found = false; - float expected_gap = 0.0f; - int temp_gap; - if (fontinfo_id >= 0) { // found a common font - ASSERT_HOST(fontinfo_id < fontinfo_table_->size()); - if (fontinfo_table_->get(fontinfo_id).get_spacing( - parent_b->unichar_id(), unichar_id, &temp_gap)) { - expected_gap = temp_gap; - expected_gap_found = true; - } - } else { - consistency_info->inconsistent_font = true; - // Get an average of the expected gaps in each font - int num_addends = 0; - int temp_fid; - for (int i = 0; i < 4; ++i) { - if (i == 0) { - temp_fid = parent_b->fontinfo_id(); - } else if (i == 1) { - temp_fid = parent_b->fontinfo_id2(); - } else if (i == 2) { - temp_fid = b->fontinfo_id(); - } else { - temp_fid = b->fontinfo_id2(); - } - ASSERT_HOST(temp_fid < 0 || fontinfo_table_->size()); - if (temp_fid >= 0 && fontinfo_table_->get(temp_fid).get_spacing( - parent_b->unichar_id(), unichar_id, &temp_gap)) { - expected_gap += temp_gap; - num_addends++; - } - } - if (num_addends > 0) { - expected_gap /= static_cast(num_addends); - expected_gap_found = true; - } - } - if (expected_gap_found) { - float actual_gap = - static_cast(word_res->GetBlobsGap(curr_col-1)); - float gap_ratio = expected_gap / actual_gap; - // TODO(rays) The gaps seem to be way off most of the time, saved by - // the error here that the ratio was compared to 1/2, when it should - // have been 0.5f. Find the source of the gaps discrepancy and put - // the 0.5f here in place of 0.0f. - // Test on 2476595.sj, pages 0 to 6. (In French.) - if (gap_ratio < 0.0f || gap_ratio > 2.0f) { - consistency_info->num_inconsistent_spaces++; - } - if (language_model_debug_level > 1) { - tprintf("spacing for %s(%d) %s(%d) col %d: expected %g actual %g\n", - unicharset.id_to_unichar(parent_b->unichar_id()), - parent_b->unichar_id(), unicharset.id_to_unichar(unichar_id), - unichar_id, curr_col, expected_gap, actual_gap); - } - } - } - } -} - -float LanguageModel::ComputeAdjustedPathCost(ViterbiStateEntry *vse) { - ASSERT_HOST(vse != nullptr); - if (params_model_.Initialized()) { - float features[PTRAIN_NUM_FEATURE_TYPES]; - ExtractFeaturesFromPath(*vse, features); - float cost = params_model_.ComputeCost(features); - if (language_model_debug_level > 3) { - tprintf("ComputeAdjustedPathCost %g ParamsModel features:\n", cost); - if (language_model_debug_level >= 5) { - for (int f = 0; f < PTRAIN_NUM_FEATURE_TYPES; ++f) { - tprintf("%s=%g\n", kParamsTrainingFeatureTypeName[f], features[f]); - } - } - } - return cost * vse->outline_length; - } else { - float adjustment = 1.0f; - if (vse->dawg_info == nullptr || vse->dawg_info->permuter != FREQ_DAWG_PERM) { - adjustment += language_model_penalty_non_freq_dict_word; - } - if (vse->dawg_info == nullptr) { - adjustment += language_model_penalty_non_dict_word; - if (vse->length > language_model_min_compound_length) { - adjustment += ((vse->length - language_model_min_compound_length) * - language_model_penalty_increment); - } - } - if (vse->associate_stats.shape_cost > 0) { - adjustment += vse->associate_stats.shape_cost / - static_cast(vse->length); - } - if (language_model_ngram_on) { - ASSERT_HOST(vse->ngram_info != nullptr); - return vse->ngram_info->ngram_and_classifier_cost * adjustment; - } else { - adjustment += ComputeConsistencyAdjustment(vse->dawg_info, - vse->consistency_info); - return vse->ratings_sum * adjustment; - } - } -} - -void LanguageModel::UpdateBestChoice( - ViterbiStateEntry *vse, - LMPainPoints *pain_points, - WERD_RES *word_res, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle) { - bool truth_path; - WERD_CHOICE *word = ConstructWord(vse, word_res, &best_choice_bundle->fixpt, - blamer_bundle, &truth_path); - ASSERT_HOST(word != nullptr); - if (dict_->stopper_debug_level >= 1) { - STRING word_str; - word->string_and_lengths(&word_str, nullptr); - vse->Print(word_str.string()); - } - if (language_model_debug_level > 0) { - word->print("UpdateBestChoice() constructed word"); - } - // Record features from the current path if necessary. - ParamsTrainingHypothesis curr_hyp; - if (blamer_bundle != nullptr) { - if (vse->dawg_info != nullptr) vse->dawg_info->permuter = - static_cast(word->permuter()); - ExtractFeaturesFromPath(*vse, curr_hyp.features); - word->string_and_lengths(&(curr_hyp.str), nullptr); - curr_hyp.cost = vse->cost; // record cost for error rate computations - if (language_model_debug_level > 0) { - tprintf("Raw features extracted from %s (cost=%g) [ ", - curr_hyp.str.string(), curr_hyp.cost); - for (int deb_i = 0; deb_i < PTRAIN_NUM_FEATURE_TYPES; ++deb_i) { - tprintf("%g ", curr_hyp.features[deb_i]); - } - tprintf("]\n"); - } - // Record the current hypothesis in params_training_bundle. - blamer_bundle->AddHypothesis(curr_hyp); - if (truth_path) - blamer_bundle->UpdateBestRating(word->rating()); - } - if (blamer_bundle != nullptr && blamer_bundle->GuidedSegsearchStillGoing()) { - // The word was constructed solely for blamer_bundle->AddHypothesis, so - // we no longer need it. - delete word; - return; - } - if (word_res->chopped_word != nullptr && !word_res->chopped_word->blobs.empty()) - word->SetScriptPositions(false, word_res->chopped_word, language_model_debug_level); - // Update and log new raw_choice if needed. - if (word_res->raw_choice == nullptr || - word->rating() < word_res->raw_choice->rating()) { - if (word_res->LogNewRawChoice(word) && language_model_debug_level > 0) - tprintf("Updated raw choice\n"); - } - // Set the modified rating for best choice to vse->cost and log best choice. - word->set_rating(vse->cost); - // Call LogNewChoice() for best choice from Dict::adjust_word() since it - // computes adjust_factor that is used by the adaption code (e.g. by - // ClassifyAdaptableWord() to compute adaption acceptance thresholds). - // Note: the rating of the word is not adjusted. - dict_->adjust_word(word, vse->dawg_info == nullptr, - vse->consistency_info.xht_decision, 0.0, - false, language_model_debug_level > 0); - // Hand ownership of the word over to the word_res. - if (!word_res->LogNewCookedChoice(dict_->tessedit_truncate_wordchoice_log, - dict_->stopper_debug_level >= 1, word)) { - // The word was so bad that it was deleted. - return; - } - if (word_res->best_choice == word) { - // Word was the new best. - if (dict_->AcceptableChoice(*word, vse->consistency_info.xht_decision) && - AcceptablePath(*vse)) { - acceptable_choice_found_ = true; - } - // Update best_choice_bundle. - best_choice_bundle->updated = true; - best_choice_bundle->best_vse = vse; - if (language_model_debug_level > 0) { - tprintf("Updated best choice\n"); - word->print_state("New state "); - } - // Update hyphen state if we are dealing with a dictionary word. - if (vse->dawg_info != nullptr) { - if (dict_->has_hyphen_end(*word)) { - dict_->set_hyphen_word(*word, *(dawg_args_.active_dawgs)); - } else { - dict_->reset_hyphen_vars(true); - } - } - - if (blamer_bundle != nullptr) { - blamer_bundle->set_best_choice_is_dict_and_top_choice( - vse->dawg_info != nullptr && vse->top_choice_flags); - } - } - if (wordrec_display_segmentations && word_res->chopped_word != nullptr) { - word->DisplaySegmentation(word_res->chopped_word); - } -} - -void LanguageModel::ExtractFeaturesFromPath( - const ViterbiStateEntry &vse, float features[]) { - memset(features, 0, sizeof(float) * PTRAIN_NUM_FEATURE_TYPES); - // Record dictionary match info. - int len = vse.length <= kMaxSmallWordUnichars ? 0 : - vse.length <= kMaxMediumWordUnichars ? 1 : 2; - if (vse.dawg_info != nullptr) { - int permuter = vse.dawg_info->permuter; - if (permuter == NUMBER_PERM || permuter == USER_PATTERN_PERM) { - if (vse.consistency_info.num_digits == vse.length) { - features[PTRAIN_DIGITS_SHORT+len] = 1.0; - } else { - features[PTRAIN_NUM_SHORT+len] = 1.0; - } - } else if (permuter == DOC_DAWG_PERM) { - features[PTRAIN_DOC_SHORT+len] = 1.0; - } else if (permuter == SYSTEM_DAWG_PERM || permuter == USER_DAWG_PERM || - permuter == COMPOUND_PERM) { - features[PTRAIN_DICT_SHORT+len] = 1.0; - } else if (permuter == FREQ_DAWG_PERM) { - features[PTRAIN_FREQ_SHORT+len] = 1.0; - } - } - // Record shape cost feature (normalized by path length). - features[PTRAIN_SHAPE_COST_PER_CHAR] = - vse.associate_stats.shape_cost / static_cast(vse.length); - // Record ngram cost. (normalized by the path length). - features[PTRAIN_NGRAM_COST_PER_CHAR] = 0.0; - if (vse.ngram_info != nullptr) { - features[PTRAIN_NGRAM_COST_PER_CHAR] = - vse.ngram_info->ngram_cost / static_cast(vse.length); - } - // Record consistency-related features. - // Disabled this feature for due to its poor performance. - // features[PTRAIN_NUM_BAD_PUNC] = vse.consistency_info.NumInconsistentPunc(); - features[PTRAIN_NUM_BAD_CASE] = vse.consistency_info.NumInconsistentCase(); - features[PTRAIN_XHEIGHT_CONSISTENCY] = vse.consistency_info.xht_decision; - features[PTRAIN_NUM_BAD_CHAR_TYPE] = vse.dawg_info == nullptr ? - vse.consistency_info.NumInconsistentChartype() : 0.0; - features[PTRAIN_NUM_BAD_SPACING] = - vse.consistency_info.NumInconsistentSpaces(); - // Disabled this feature for now due to its poor performance. - // features[PTRAIN_NUM_BAD_FONT] = vse.consistency_info.inconsistent_font; - - // Classifier-related features. - features[PTRAIN_RATING_PER_CHAR] = - vse.ratings_sum / static_cast(vse.outline_length); -} - -WERD_CHOICE *LanguageModel::ConstructWord( - ViterbiStateEntry *vse, - WERD_RES *word_res, - DANGERR *fixpt, - BlamerBundle *blamer_bundle, - bool *truth_path) { - if (truth_path != nullptr) { - *truth_path = - (blamer_bundle != nullptr && - vse->length == blamer_bundle->correct_segmentation_length()); - } - BLOB_CHOICE *curr_b = vse->curr_b; - ViterbiStateEntry *curr_vse = vse; - - int i; - bool compound = dict_->hyphenated(); // treat hyphenated words as compound - - // Re-compute the variance of the width-to-height ratios (since we now - // can compute the mean over the whole word). - float full_wh_ratio_mean = 0.0f; - if (vse->associate_stats.full_wh_ratio_var != 0.0f) { - vse->associate_stats.shape_cost -= vse->associate_stats.full_wh_ratio_var; - full_wh_ratio_mean = (vse->associate_stats.full_wh_ratio_total / - static_cast(vse->length)); - vse->associate_stats.full_wh_ratio_var = 0.0f; - } - - // Construct a WERD_CHOICE by tracing parent pointers. - WERD_CHOICE *word = new WERD_CHOICE(word_res->uch_set, vse->length); - word->set_length(vse->length); - int total_blobs = 0; - for (i = (vse->length-1); i >= 0; --i) { - if (blamer_bundle != nullptr && truth_path != nullptr && *truth_path && - !blamer_bundle->MatrixPositionCorrect(i, curr_b->matrix_cell())) { - *truth_path = false; - } - // The number of blobs used for this choice is row - col + 1. - int num_blobs = curr_b->matrix_cell().row - curr_b->matrix_cell().col + 1; - total_blobs += num_blobs; - word->set_blob_choice(i, num_blobs, curr_b); - // Update the width-to-height ratio variance. Useful non-space delimited - // languages to ensure that the blobs are of uniform width. - // Skip leading and trailing punctuation when computing the variance. - if ((full_wh_ratio_mean != 0.0f && - ((curr_vse != vse && curr_vse->parent_vse != nullptr) || - !dict_->getUnicharset().get_ispunctuation(curr_b->unichar_id())))) { - vse->associate_stats.full_wh_ratio_var += - pow(full_wh_ratio_mean - curr_vse->associate_stats.full_wh_ratio, 2); - if (language_model_debug_level > 2) { - tprintf("full_wh_ratio_var += (%g-%g)^2\n", - full_wh_ratio_mean, curr_vse->associate_stats.full_wh_ratio); - } - } - - // Mark the word as compound if compound permuter was set for any of - // the unichars on the path (usually this will happen for unichars - // that are compounding operators, like "-" and "/"). - if (!compound && curr_vse->dawg_info && - curr_vse->dawg_info->permuter == COMPOUND_PERM) compound = true; - - // Update curr_* pointers. - curr_vse = curr_vse->parent_vse; - if (curr_vse == nullptr) break; - curr_b = curr_vse->curr_b; - } - ASSERT_HOST(i == 0); // check that we recorded all the unichar ids. - ASSERT_HOST(total_blobs == word_res->ratings->dimension()); - // Re-adjust shape cost to include the updated width-to-height variance. - if (full_wh_ratio_mean != 0.0f) { - vse->associate_stats.shape_cost += vse->associate_stats.full_wh_ratio_var; - } - - word->set_rating(vse->ratings_sum); - word->set_certainty(vse->min_certainty); - word->set_x_heights(vse->consistency_info.BodyMinXHeight(), - vse->consistency_info.BodyMaxXHeight()); - if (vse->dawg_info != nullptr) { - word->set_permuter(compound ? COMPOUND_PERM : vse->dawg_info->permuter); - } else if (language_model_ngram_on && !vse->ngram_info->pruned) { - word->set_permuter(NGRAM_PERM); - } else if (vse->top_choice_flags) { - word->set_permuter(TOP_CHOICE_PERM); - } else { - word->set_permuter(NO_PERM); - } - word->set_dangerous_ambig_found_(!dict_->NoDangerousAmbig(word, fixpt, true, - word_res->ratings)); - return word; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/language_model.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/language_model.h deleted file mode 100644 index 055ab0b1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/language_model.h +++ /dev/null @@ -1,427 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: language_model.h -// Description: Functions that utilize the knowledge about the properties, -// structure and statistics of the language to help segmentation -// search. -// Author: Daria Antonova -// Created: Mon Nov 11 11:26:43 PST 2009 -// -// (C) Copyright 2009, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_WORDREC_LANGUAGE_MODEL_H_ -#define TESSERACT_WORDREC_LANGUAGE_MODEL_H_ - -#include // for exp -#include "associate.h" // for AssociateStats (ptr only), AssociateUtils -#include "dawg.h" // for DawgPositionVector -#include "dict.h" // for DawgArgs, Dict -#include "lm_consistency.h" // for LMConsistencyInfo -#include "lm_state.h" // for ViterbiStateEntry, LanguageModelFlagsType -#include "params.h" // for DoubleParam, double_VAR_H, IntParam, Boo... -#include "params_model.h" // for ParamsModel -#include "ratngs.h" // for BLOB_CHOICE (ptr only), BLOB_CHOICE_LIST... -#include "stopper.h" // for DANGERR -#include "strngs.h" // for STRING - -class UNICHARSET; -class WERD_RES; - -struct BlamerBundle; - -template class UnicityTable; - -namespace tesseract { - -class LMPainPoints; -struct FontInfo; - -// This class that contains the data structures and functions necessary -// to represent and use the knowledge about the language. -class LanguageModel { - public: - // Masks for keeping track of top choices that should not be pruned out. - static const LanguageModelFlagsType kSmallestRatingFlag = 0x1; - static const LanguageModelFlagsType kLowerCaseFlag = 0x2; - static const LanguageModelFlagsType kUpperCaseFlag = 0x4; - static const LanguageModelFlagsType kDigitFlag = 0x8; - static const LanguageModelFlagsType kXhtConsistentFlag = 0x10; - - // Denominator for normalizing per-letter ngram cost when deriving - // penalty adjustments. - static const float kMaxAvgNgramCost; - - LanguageModel(const UnicityTable *fontinfo_table, Dict *dict); - ~LanguageModel(); - - // Fills the given floats array with features extracted from path represented - // by the given ViterbiStateEntry. See ccstruct/params_training_featdef.h - // for feature information. - // Note: the function assumes that features points to an array of size - // PTRAIN_NUM_FEATURE_TYPES. - static void ExtractFeaturesFromPath(const ViterbiStateEntry &vse, - float features[]); - - // Updates data structures that are used for the duration of the segmentation - // search on the current word; - void InitForWord(const WERD_CHOICE *prev_word, - bool fixed_pitch, float max_char_wh_ratio, - float rating_cert_scale); - - // Updates language model state of the given BLOB_CHOICE_LIST (from - // the ratings matrix) a its parent. Updates pain_points if new - // problematic points are found in the segmentation graph. - // - // At most language_model_viterbi_list_size are kept in each - // LanguageModelState.viterbi_state_entries list. - // At most language_model_viterbi_list_max_num_prunable of those are prunable - // (non-dictionary) paths. - // The entries that represent dictionary word paths are kept at the front - // of the list. - // The list ordered by cost that is computed collectively by several - // language model components (currently dawg and ngram components). - bool UpdateState( - bool just_classified, - int curr_col, int curr_row, - BLOB_CHOICE_LIST *curr_list, - LanguageModelState *parent_node, - LMPainPoints *pain_points, - WERD_RES *word_res, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle); - - // Returns true if an acceptable best choice was discovered. - inline bool AcceptableChoiceFound() { return acceptable_choice_found_; } - inline void SetAcceptableChoiceFound(bool val) { - acceptable_choice_found_ = val; - } - // Returns the reference to ParamsModel. - inline ParamsModel &getParamsModel() { return params_model_; } - - protected: - - inline float CertaintyScore(float cert) { - if (language_model_use_sigmoidal_certainty) { - // cert is assumed to be between 0 and -dict_->certainty_scale. - // If you enable language_model_use_sigmoidal_certainty, you - // need to adjust language_model_ngram_nonmatch_score as well. - cert = -cert / dict_->certainty_scale; - return 1.0f / (1.0f + exp(10.0f * cert)); - } else { - return (-1.0f / cert); - } - } - - inline float ComputeAdjustment(int num_problems, float penalty) { - if (num_problems == 0) return 0.0f; - if (num_problems == 1) return penalty; - return (penalty + (language_model_penalty_increment * - static_cast(num_problems-1))); - } - - // Computes the adjustment to the ratings sum based on the given - // consistency_info. The paths with invalid punctuation, inconsistent - // case and character type are penalized proportionally to the number - // of inconsistencies on the path. - inline float ComputeConsistencyAdjustment( - const LanguageModelDawgInfo *dawg_info, - const LMConsistencyInfo &consistency_info) { - if (dawg_info != nullptr) { - return ComputeAdjustment(consistency_info.NumInconsistentCase(), - language_model_penalty_case) + - (consistency_info.inconsistent_script ? - language_model_penalty_script : 0.0f); - } - return (ComputeAdjustment(consistency_info.NumInconsistentPunc(), - language_model_penalty_punc) + - ComputeAdjustment(consistency_info.NumInconsistentCase(), - language_model_penalty_case) + - ComputeAdjustment(consistency_info.NumInconsistentChartype(), - language_model_penalty_chartype) + - ComputeAdjustment(consistency_info.NumInconsistentSpaces(), - language_model_penalty_spacing) + - (consistency_info.inconsistent_script ? - language_model_penalty_script : 0.0f) + - (consistency_info.inconsistent_font ? - language_model_penalty_font : 0.0f)); - } - - // Returns an adjusted ratings sum that includes inconsistency penalties, - // penalties for non-dictionary paths and paths with dips in ngram - // probability. - float ComputeAdjustedPathCost(ViterbiStateEntry *vse); - - // Finds the first lower and upper case letter and first digit in curr_list. - // Uses the first character in the list in place of empty results. - // Returns true if both alpha and digits are found. - bool GetTopLowerUpperDigit(BLOB_CHOICE_LIST *curr_list, - BLOB_CHOICE **first_lower, - BLOB_CHOICE **first_upper, - BLOB_CHOICE **first_digit) const; - // Forces there to be at least one entry in the overall set of the - // viterbi_state_entries of each element of parent_node that has the - // top_choice_flag set for lower, upper and digit using the same rules as - // GetTopLowerUpperDigit, setting the flag on the first found suitable - // candidate, whether or not the flag is set on some other parent. - // Returns 1 if both alpha and digits are found among the parents, -1 if no - // parents are found at all (a legitimate case), and 0 otherwise. - int SetTopParentLowerUpperDigit(LanguageModelState *parent_node) const; - - // Finds the next ViterbiStateEntry with which the given unichar_id can - // combine sensibly, taking into account any mixed alnum/mixed case - // situation, and whether this combination has been inspected before. - ViterbiStateEntry* GetNextParentVSE( - bool just_classified, bool mixed_alnum, - const BLOB_CHOICE* bc, LanguageModelFlagsType blob_choice_flags, - const UNICHARSET& unicharset, WERD_RES* word_res, - ViterbiStateEntry_IT* vse_it, - LanguageModelFlagsType* top_choice_flags) const; - // Helper function that computes the cost of the path composed of the - // path in the given parent ViterbiStateEntry and the given BLOB_CHOICE. - // If the new path looks good enough, adds a new ViterbiStateEntry to the - // list of viterbi entries in the given BLOB_CHOICE and returns true. - bool AddViterbiStateEntry( - LanguageModelFlagsType top_choice_flags, float denom, bool word_end, - int curr_col, int curr_row, BLOB_CHOICE *b, - LanguageModelState *curr_state, ViterbiStateEntry *parent_vse, - LMPainPoints *pain_points, WERD_RES *word_res, - BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle); - - // Determines whether a potential entry is a true top choice and - // updates changed accordingly. - // - // Note: The function assumes that b, top_choice_flags and changed - // are not nullptr. - void GenerateTopChoiceInfo(ViterbiStateEntry *new_vse, - const ViterbiStateEntry *parent_vse, - LanguageModelState *lms); - - // Calls dict_->LetterIsOk() with DawgArgs initialized from parent_vse and - // unichar from b.unichar_id(). Constructs and returns LanguageModelDawgInfo - // with updated active dawgs, constraints and permuter. - // - // Note: the caller is responsible for deleting the returned pointer. - LanguageModelDawgInfo *GenerateDawgInfo(bool word_end, - int curr_col, int curr_row, - const BLOB_CHOICE &b, - const ViterbiStateEntry *parent_vse); - - // Computes p(unichar | parent context) and records it in ngram_cost. - // If b.unichar_id() is an unlikely continuation of the parent context - // sets found_small_prob to true and returns nullptr. - // Otherwise creates a new LanguageModelNgramInfo entry containing the - // updated context (that includes b.unichar_id() at the end) and returns it. - // - // Note: the caller is responsible for deleting the returned pointer. - LanguageModelNgramInfo *GenerateNgramInfo( - const char *unichar, float certainty, float denom, - int curr_col, int curr_row, float outline_length, - const ViterbiStateEntry *parent_vse); - - // Computes -(log(prob(classifier)) + log(prob(ngram model))) - // for the given unichar in the given context. If there are multiple - // unichars at one position - takes the average of their probabilities. - // UNICHAR::utf8_step() is used to separate out individual UTF8 characters, - // since probability_in_context() can only handle one at a time (while - // unicharset might contain ngrams and glyphs composed from multiple UTF8 - // characters). - float ComputeNgramCost(const char *unichar, float certainty, float denom, - const char *context, int *unichar_step_len, - bool *found_small_prob, float *ngram_prob); - - // Computes the normalization factors for the classifier confidences - // (used by ComputeNgramCost()). - float ComputeDenom(BLOB_CHOICE_LIST *curr_list); - - // Fills the given consistenty_info based on parent_vse.consistency_info - // and on the consistency of the given unichar_id with parent_vse. - void FillConsistencyInfo( - int curr_col, bool word_end, BLOB_CHOICE *b, - ViterbiStateEntry *parent_vse, - WERD_RES *word_res, - LMConsistencyInfo *consistency_info); - - // Constructs WERD_CHOICE by recording unichar_ids of the BLOB_CHOICEs - // on the path represented by the given BLOB_CHOICE and language model - // state entries (lmse, dse). The path is re-constructed by following - // the parent pointers in the the lang model state entries). If the - // constructed WERD_CHOICE is better than the best/raw choice recorded - // in the best_choice_bundle, this function updates the corresponding - // fields and sets best_choice_bunldle->updated to true. - void UpdateBestChoice(ViterbiStateEntry *vse, - LMPainPoints *pain_points, - WERD_RES *word_res, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle); - - // Constructs a WERD_CHOICE by tracing parent pointers starting with - // the given LanguageModelStateEntry. Returns the constructed word. - // Updates best_char_choices, certainties and state if they are not - // nullptr (best_char_choices and certainties are assumed to have the - // length equal to lmse->length). - // The caller is responsible for freeing memory associated with the - // returned WERD_CHOICE. - WERD_CHOICE *ConstructWord(ViterbiStateEntry *vse, - WERD_RES *word_res, - DANGERR *fixpt, - BlamerBundle *blamer_bundle, - bool *truth_path); - - // Wrapper around AssociateUtils::ComputeStats(). - inline void ComputeAssociateStats(int col, int row, - float max_char_wh_ratio, - ViterbiStateEntry *parent_vse, - WERD_RES *word_res, - AssociateStats *associate_stats) { - AssociateUtils::ComputeStats( - col, row, - (parent_vse != nullptr) ? &(parent_vse->associate_stats) : nullptr, - (parent_vse != nullptr) ? parent_vse->length : 0, - fixed_pitch_, max_char_wh_ratio, - word_res, language_model_debug_level > 2, associate_stats); - } - - // Returns true if the path with such top_choice_flags and dawg_info - // could be pruned out (i.e. is neither a system/user/frequent dictionary - // nor a top choice path). - // In non-space delimited languages all paths can be "somewhat" dictionary - // words. In such languages we can not do dictionary-driven path pruning, - // so paths with non-empty dawg_info are considered prunable. - inline bool PrunablePath(const ViterbiStateEntry &vse) { - if (vse.top_choice_flags) return false; - if (vse.dawg_info != nullptr && - (vse.dawg_info->permuter == SYSTEM_DAWG_PERM || - vse.dawg_info->permuter == USER_DAWG_PERM || - vse.dawg_info->permuter == FREQ_DAWG_PERM)) return false; - return true; - } - - // Returns true if the given ViterbiStateEntry represents an acceptable path. - inline bool AcceptablePath(const ViterbiStateEntry &vse) { - return (vse.dawg_info != nullptr || vse.Consistent() || - (vse.ngram_info != nullptr && !vse.ngram_info->pruned)); - } - - public: - // Parameters. - INT_VAR_H(language_model_debug_level, 0, "Language model debug level"); - BOOL_VAR_H(language_model_ngram_on, false, - "Turn on/off the use of character ngram model"); - INT_VAR_H(language_model_ngram_order, 8, - "Maximum order of the character ngram model"); - INT_VAR_H(language_model_viterbi_list_max_num_prunable, 10, - "Maximum number of prunable (those for which PrunablePath() is" - " true) entries in each viterbi list recorded in BLOB_CHOICEs"); - INT_VAR_H(language_model_viterbi_list_max_size, 500, - "Maximum size of viterbi lists recorded in BLOB_CHOICEs"); - double_VAR_H(language_model_ngram_small_prob, 0.000001, - "To avoid overly small denominators use this as the floor" - " of the probability returned by the ngram model"); - double_VAR_H(language_model_ngram_nonmatch_score, -40.0, - "Average classifier score of a non-matching unichar"); - BOOL_VAR_H(language_model_ngram_use_only_first_uft8_step, false, - "Use only the first UTF8 step of the given string" - " when computing log probabilities"); - double_VAR_H(language_model_ngram_scale_factor, 0.03, - "Strength of the character ngram model relative to the" - " character classifier "); - double_VAR_H(language_model_ngram_rating_factor, 16.0, - "Factor to bring log-probs into the same range as ratings" - " when multiplied by outline length "); - BOOL_VAR_H(language_model_ngram_space_delimited_language, true, - "Words are delimited by space"); - INT_VAR_H(language_model_min_compound_length, 3, - "Minimum length of compound words"); - // Penalties used for adjusting path costs and final word rating. - double_VAR_H(language_model_penalty_non_freq_dict_word, 0.1, - "Penalty for words not in the frequent word dictionary"); - double_VAR_H(language_model_penalty_non_dict_word, 0.15, - "Penalty for non-dictionary words"); - double_VAR_H(language_model_penalty_punc, 0.2, - "Penalty for inconsistent punctuation"); - double_VAR_H(language_model_penalty_case, 0.1, - "Penalty for inconsistent case"); - double_VAR_H(language_model_penalty_script, 0.5, - "Penalty for inconsistent script"); - double_VAR_H(language_model_penalty_chartype, 0.3, - "Penalty for inconsistent character type"); - double_VAR_H(language_model_penalty_font, 0.00, - "Penalty for inconsistent font"); - double_VAR_H(language_model_penalty_spacing, 0.05, - "Penalty for inconsistent spacing"); - double_VAR_H(language_model_penalty_increment, 0.01, "Penalty increment"); - INT_VAR_H(wordrec_display_segmentations, 0, "Display Segmentations"); - BOOL_VAR_H(language_model_use_sigmoidal_certainty, false, - "Use sigmoidal score for certainty"); - - - protected: - // Member Variables. - - // Temporary DawgArgs struct that is re-used across different words to - // avoid dynamic memory re-allocation (should be cleared before each use). - DawgArgs dawg_args_; - // Scaling for recovering blob outline length from rating and certainty. - float rating_cert_scale_; - - // The following variables are set at construction time. - - // Pointer to fontinfo table (not owned by LanguageModel). - const UnicityTable *fontinfo_table_; - - // Pointer to Dict class, that is used for querying the dictionaries - // (the pointer is not owned by LanguageModel). - Dict *dict_; - - // TODO(daria): the following variables should become LanguageModel params - // when the old code in bestfirst.cpp and heuristic.cpp is deprecated. - // - // Set to true if we are dealing with fixed pitch text - // (set to assume_fixed_pitch_char_segment). - bool fixed_pitch_; - // Max char width-to-height ratio allowed - // (set to segsearch_max_char_wh_ratio). - float max_char_wh_ratio_; - - // The following variables are initialized with InitForWord(). - - // String representation of the classification of the previous word - // (since this is only used by the character ngram model component, - // only the last language_model_ngram_order of the word are stored). - STRING prev_word_str_; - int prev_word_unichar_step_len_; - // Active dawg vector. - DawgPositionVector very_beginning_active_dawgs_; // includes continuation - DawgPositionVector beginning_active_dawgs_; - // Set to true if acceptable choice was discovered. - // Note: it would be nice to use this to terminate the search once an - // acceptable choices is found. However we do not do that and once an - // acceptable choice is found we finish looking for alternative choices - // in the current segmentation graph and then exit the search (no more - // classifications are done after an acceptable choice is found). - // This is needed in order to let the search find the words very close to - // the best choice in rating (e.g. what/What, Cat/cat, etc) and log these - // choices. This way the stopper will know that the best choice is not - // ambiguous (i.e. there are best choices in the best choice list that have - // ratings close to the very best one) and will be less likely to mis-adapt. - bool acceptable_choice_found_; - // Set to true if a choice representing correct segmentation was explored. - bool correct_segmentation_explored_; - - // Params models containing weights for for computing ViterbiStateEntry costs. - ParamsModel params_model_; -}; - -} // namespace tesseract - -#endif // TESSERACT_WORDREC_LANGUAGE_MODEL_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_consistency.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_consistency.cpp deleted file mode 100644 index bc7308a1..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_consistency.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: lm_consistency.cpp -// Description: Struct for recording consistency of the paths representing -// OCR hypotheses. -// Author: Rika Antonova -// Created: Mon Jun 20 11:26:43 PST 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//////////////////////////////////////////////////////////////////////// - -#include "lm_consistency.h" - -#include "associate.h" -#include "dict.h" -#include "ratngs.h" - -namespace tesseract { - -void LMConsistencyInfo::ComputeXheightConsistency( - const BLOB_CHOICE *b, bool is_punc) { - if (xht_decision == XH_INCONSISTENT) - return; // It isn't going to get any better. - - // Compute xheight consistency. - bool parent_null = xht_sp < 0; - int parent_sp = xht_sp; - // Debug strings. - if (b->yshift() > LMConsistencyInfo::kShiftThresh) { - xht_sp = LMConsistencyInfo::kSUP; - } else if (b->yshift() < -LMConsistencyInfo::kShiftThresh) { - xht_sp = LMConsistencyInfo::kSUB; - } else { - xht_sp = LMConsistencyInfo::kNORM; - } - xht_count[xht_sp]++; - if (is_punc) xht_count_punc[xht_sp]++; - if (!parent_null) { - xpos_entropy += abs(parent_sp - xht_sp); - } - // TODO(eger): Figure out a better way to account for small caps. - // For the first character not y-shifted, we only care if it is too small. - // Too large is common in drop caps and small caps. - // int16_t small_xht = b->min_xheight(); - // if (parent_vse == nullptr && sp == LanguageModelConsistencyInfo::kNORM) { - // small_xht = 0; - // } - IntersectRange(b->min_xheight(), b->max_xheight(), - &(xht_lo[xht_sp]), &(xht_hi[xht_sp])); - - - // Compute xheight inconsistency kinds. - if (parent_null) { - if (xht_count[kNORM] == 1) { - xht_decision = XH_GOOD; - } else { - xht_decision = XH_SUBNORMAL; - } - return; - } - - // When we intersect the ranges of xheights in pixels for all characters in - // each position (subscript, normal, superscript), - // How much range must be left? 0? [exactly one pixel height for xheight] 1? - // TODO(eger): Extend this code to take a prior for the rest of the line. - const int kMinIntersectedXHeightRange = 0; - for (int i = 0; i < kNumPos; i++) { - if (xht_lo[i] > xht_hi[i] - kMinIntersectedXHeightRange) { - xht_decision = XH_INCONSISTENT; - return; - } - } - - // Reject as improbable anything where there's much punctuation in subscript - // or superscript regions. - if (xht_count_punc[kSUB] > xht_count[kSUB] * 0.4 || - xht_count_punc[kSUP] > xht_count[kSUP] * 0.4) { - xht_decision = XH_INCONSISTENT; - return; - } - - // Now check that the subscript and superscript aren't too small relative to - // the mainline. - double mainline_xht = static_cast(xht_lo[kNORM]); - double kMinSizeRatio = 0.4; - if (mainline_xht > 0.0 && - (static_cast(xht_hi[kSUB]) / mainline_xht < kMinSizeRatio || - static_cast(xht_hi[kSUP]) / mainline_xht < kMinSizeRatio)) { - xht_decision = XH_INCONSISTENT; - return; - } - // TODO(eger): Check into inconsistency of super/subscript y offsets. - if (xpos_entropy > kMaxEntropy) { - xht_decision = XH_INCONSISTENT; - return; - } - if (xht_count[kSUB] == 0 && xht_count[kSUP] == 0) { - xht_decision = XH_GOOD; - return; - } - xht_decision = XH_SUBNORMAL; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_consistency.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_consistency.h deleted file mode 100644 index e45a1fc5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_consistency.h +++ /dev/null @@ -1,143 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: lm_consistency.h -// Description: Struct for recording consistency of the paths representing -// OCR hypotheses. -// Author: Rika Antonova -// Created: Mon Jun 20 11:26:43 PST 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_WORDREC_LM_CONSISTENCY_H_ -#define TESSERACT_WORDREC_LM_CONSISTENCY_H_ - -#include // for INT16_MAX -#include "dawg.h" // for EDGE_REF, NO_EDGE -#include "dict.h" // for XH_GOOD, XH_INCONSISTENT, XHeightConsi... - -class BLOB_CHOICE; - -namespace tesseract { - -static const char * const XHeightConsistencyEnumName[] = { - "XH_GOOD", - "XH_SUBNORMAL", - "XH_INCONSISTENT", -}; - -// Struct for keeping track of the consistency of the path. -struct LMConsistencyInfo { - enum ChartypeEnum { CT_NONE, CT_ALPHA, CT_DIGIT, CT_OTHER}; - - // How much do characters have to be shifted away from normal parameters - // before we say they're not normal? - static const int kShiftThresh = 1; - - // How much shifting from subscript to superscript and back - // before we declare shenanigans? - static const int kMaxEntropy = 1; - - // Script positions - order important for entropy calculation. - static const int kSUB = 0, kNORM = 1, kSUP = 2; - static const int kNumPos = 3; - - explicit LMConsistencyInfo(const LMConsistencyInfo* parent_info) { - if (parent_info == nullptr) { - // Initialize from scratch. - num_alphas = 0; - num_digits = 0; - num_punc = 0; - num_other = 0; - chartype = CT_NONE; - punc_ref = NO_EDGE; - invalid_punc = false; - num_non_first_upper = 0; - num_lower = 0; - script_id = 0; - inconsistent_script = false; - num_inconsistent_spaces = 0; - inconsistent_font = false; - // Initialize XHeight stats. - for (int i = 0; i < kNumPos; i++) { - xht_count[i] = 0; - xht_count_punc[i] = 0; - xht_lo[i] = 0; - xht_hi[i] = 256; // kBlnCellHeight - } - xht_sp = -1; // This invalid value indicates that there was no parent. - xpos_entropy = 0; - xht_decision = XH_GOOD; - } else { - // Copy parent info - *this = *parent_info; - } - } - inline int NumInconsistentPunc() const { - return invalid_punc ? num_punc : 0; - } - inline int NumInconsistentCase() const { - return (num_non_first_upper > num_lower) ? num_lower : num_non_first_upper; - } - inline int NumInconsistentChartype() const { - return (NumInconsistentPunc() + num_other + - ((num_alphas > num_digits) ? num_digits : num_alphas)); - } - inline bool Consistent() const { - return (NumInconsistentPunc() == 0 && NumInconsistentCase() == 0 && - NumInconsistentChartype() == 0 && !inconsistent_script && - !inconsistent_font && !InconsistentXHeight()); - } - inline int NumInconsistentSpaces() const { - return num_inconsistent_spaces; - } - inline int InconsistentXHeight() const { - return xht_decision == XH_INCONSISTENT; - } - void ComputeXheightConsistency(const BLOB_CHOICE *b, bool is_punc); - float BodyMinXHeight() const { - if (InconsistentXHeight()) - return 0.0f; - return xht_lo[kNORM]; - } - float BodyMaxXHeight() const { - if (InconsistentXHeight()) - return static_cast(INT16_MAX); - return xht_hi[kNORM]; - } - - int num_alphas; - int num_digits; - int num_punc; - int num_other; - ChartypeEnum chartype; - EDGE_REF punc_ref; - bool invalid_punc; - int num_non_first_upper; - int num_lower; - int script_id; - bool inconsistent_script; - int num_inconsistent_spaces; - bool inconsistent_font; - // Metrics clumped by position. - float xht_lo[kNumPos]; - float xht_hi[kNumPos]; - int16_t xht_count[kNumPos]; - int16_t xht_count_punc[kNumPos]; - int16_t xht_sp; - int16_t xpos_entropy; - XHeightConsistencyEnum xht_decision; -}; - -} // namespace tesseract - -#endif // TESSERACT_WORDREC_LM_CONSISTENCY_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_pain_points.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_pain_points.cpp deleted file mode 100644 index 03c8aef3..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_pain_points.cpp +++ /dev/null @@ -1,219 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: pain_points.cpp -// Description: Functions that utilize the knowledge about the properties -// of the paths explored by the segmentation search in order -// to "pain points" - the locations in the ratings matrix -// which should be classified next. -// Author: Rika Antonova -// Created: Mon Jun 20 11:26:43 PST 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "lm_pain_points.h" - -#include "associate.h" -#include "dict.h" -#include "genericheap.h" -#include "lm_state.h" -#include "matrix.h" -#include "pageres.h" - -#include - -namespace tesseract { - -const float LMPainPoints::kDefaultPainPointPriorityAdjustment = 2.0f; -const float LMPainPoints::kLooseMaxCharWhRatio = 2.5f; - -LMPainPointsType LMPainPoints::Deque(MATRIX_COORD *pp, float *priority) { - for (int h = 0; h < LM_PPTYPE_NUM; ++h) { - if (pain_points_heaps_[h].empty()) continue; - *priority = pain_points_heaps_[h].PeekTop().key; - *pp = pain_points_heaps_[h].PeekTop().data; - pain_points_heaps_[h].Pop(nullptr); - return static_cast(h); - } - return LM_PPTYPE_NUM; -} - -void LMPainPoints::GenerateInitial(WERD_RES *word_res) { - MATRIX *ratings = word_res->ratings; - AssociateStats associate_stats; - for (int col = 0; col < ratings->dimension(); ++col) { - int row_end = std::min(ratings->dimension(), col + ratings->bandwidth() + 1); - for (int row = col + 1; row < row_end; ++row) { - MATRIX_COORD coord(col, row); - if (coord.Valid(*ratings) && - ratings->get(col, row) != NOT_CLASSIFIED) continue; - // Add an initial pain point if needed. - if (ratings->Classified(col, row - 1, dict_->WildcardID()) || - (col + 1 < ratings->dimension() && - ratings->Classified(col + 1, row, dict_->WildcardID()))) { - GeneratePainPoint(col, row, LM_PPTYPE_SHAPE, 0.0, - true, max_char_wh_ratio_, word_res); - } - } - } -} - -void LMPainPoints::GenerateFromPath(float rating_cert_scale, - ViterbiStateEntry *vse, - WERD_RES *word_res) { - ViterbiStateEntry *curr_vse = vse; - BLOB_CHOICE *curr_b = vse->curr_b; - // The following pain point generation and priority calculation approaches - // prioritize exploring paths with low average rating of the known part of - // the path, while not relying on the ratings of the pieces to be combined. - // - // A pain point to combine the neighbors is generated for each pair of - // neighboring blobs on the path (the path is represented by vse argument - // given to GenerateFromPath()). The priority of each pain point is set to - // the average rating (per outline length) of the path, not including the - // ratings of the blobs to be combined. - // The ratings of the blobs to be combined are not used to calculate the - // priority, since it is not possible to determine from their magnitude - // whether it will be beneficial to combine the blobs. The reason is that - // chopped junk blobs (/ | - ') can have very good (low) ratings, however - // combining them will be beneficial. Blobs with high ratings might be - // over-joined pieces of characters, but also could be blobs from an unseen - // font or chopped pieces of complex characters. - while (curr_vse->parent_vse != nullptr) { - ViterbiStateEntry* parent_vse = curr_vse->parent_vse; - const MATRIX_COORD& curr_cell = curr_b->matrix_cell(); - const MATRIX_COORD& parent_cell = parent_vse->curr_b->matrix_cell(); - MATRIX_COORD pain_coord(parent_cell.col, curr_cell.row); - if (!pain_coord.Valid(*word_res->ratings) || - !word_res->ratings->Classified(parent_cell.col, curr_cell.row, - dict_->WildcardID())) { - // rat_subtr contains ratings sum of the two adjacent blobs to be merged. - // rat_subtr will be subtracted from the ratings sum of the path, since - // the blobs will be joined into a new blob, whose rating is yet unknown. - float rat_subtr = curr_b->rating() + parent_vse->curr_b->rating(); - // ol_subtr contains the outline length of the blobs that will be joined. - float ol_subtr = - AssociateUtils::ComputeOutlineLength(rating_cert_scale, *curr_b) + - AssociateUtils::ComputeOutlineLength(rating_cert_scale, - *(parent_vse->curr_b)); - // ol_dif is the outline of the path without the two blobs to be joined. - float ol_dif = vse->outline_length - ol_subtr; - // priority is set to the average rating of the path per unit of outline, - // not counting the ratings of the pieces to be joined. - float priority = ol_dif > 0 ? (vse->ratings_sum-rat_subtr)/ol_dif : 0.0; - GeneratePainPoint(pain_coord.col, pain_coord.row, LM_PPTYPE_PATH, - priority, true, max_char_wh_ratio_, word_res); - } else if (debug_level_ > 3) { - tprintf("NO pain point (Classified) for col=%d row=%d type=%s\n", - pain_coord.col, pain_coord.row, - LMPainPointsTypeName[LM_PPTYPE_PATH]); - BLOB_CHOICE_IT b_it(word_res->ratings->get(pain_coord.col, - pain_coord.row)); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOB_CHOICE* choice = b_it.data(); - choice->print_full(); - } - } - - curr_vse = parent_vse; - curr_b = curr_vse->curr_b; - } -} - -void LMPainPoints::GenerateFromAmbigs(const DANGERR &fixpt, - ViterbiStateEntry *vse, - WERD_RES *word_res) { - // Begins and ends in DANGERR vector now record the blob indices as used - // by the ratings matrix. - for (int d = 0; d < fixpt.size(); ++d) { - const DANGERR_INFO &danger = fixpt[d]; - // Only use dangerous ambiguities. - if (danger.dangerous) { - GeneratePainPoint(danger.begin, danger.end - 1, - LM_PPTYPE_AMBIG, vse->cost, true, - kLooseMaxCharWhRatio, word_res); - } - } -} - -bool LMPainPoints::GeneratePainPoint( - int col, int row, LMPainPointsType pp_type, float special_priority, - bool ok_to_extend, float max_char_wh_ratio, - WERD_RES *word_res) { - MATRIX_COORD coord(col, row); - if (coord.Valid(*word_res->ratings) && - word_res->ratings->Classified(col, row, dict_->WildcardID())) { - return false; - } - if (debug_level_ > 3) { - tprintf("Generating pain point for col=%d row=%d type=%s\n", - col, row, LMPainPointsTypeName[pp_type]); - } - // Compute associate stats. - AssociateStats associate_stats; - AssociateUtils::ComputeStats(col, row, nullptr, 0, fixed_pitch_, - max_char_wh_ratio, word_res, debug_level_, - &associate_stats); - // For fixed-pitch fonts/languages: if the current combined blob overlaps - // the next blob on the right and it is ok to extend the blob, try extending - // the blob until there is no overlap with the next blob on the right or - // until the width-to-height ratio becomes too large. - if (ok_to_extend) { - while (associate_stats.bad_fixed_pitch_right_gap && - row + 1 < word_res->ratings->dimension() && - !associate_stats.bad_fixed_pitch_wh_ratio) { - AssociateUtils::ComputeStats(col, ++row, nullptr, 0, fixed_pitch_, - max_char_wh_ratio, word_res, debug_level_, - &associate_stats); - } - } - if (associate_stats.bad_shape) { - if (debug_level_ > 3) { - tprintf("Discarded pain point with a bad shape\n"); - } - return false; - } - - // Insert the new pain point into pain_points_heap_. - if (pain_points_heaps_[pp_type].size() < max_heap_size_) { - // Compute pain point priority. - float priority; - if (pp_type == LM_PPTYPE_PATH) { - priority = special_priority; - } else { - priority = associate_stats.gap_sum; - } - MatrixCoordPair pain_point(priority, MATRIX_COORD(col, row)); - pain_points_heaps_[pp_type].Push(&pain_point); - if (debug_level_) { - tprintf("Added pain point with priority %g\n", priority); - } - return true; - } else { - if (debug_level_) tprintf("Pain points heap is full\n"); - return false; - } -} - -/** - * Adjusts the pain point coordinates to cope with expansion of the ratings - * matrix due to a split of the blob with the given index. - */ -void LMPainPoints::RemapForSplit(int index) { - for (int i = 0; i < LM_PPTYPE_NUM; ++i) { - GenericVector* heap = pain_points_heaps_[i].heap(); - for (int j = 0; j < heap->size(); ++j) - (*heap)[j].data.MapForSplit(index); - } -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_pain_points.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_pain_points.h deleted file mode 100644 index 19b368aa..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_pain_points.h +++ /dev/null @@ -1,142 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: lm_pain_points.h -// Description: Functions that utilize the knowledge about the properties -// of the paths explored by the segmentation search in order -// to generate "pain points" - the locations in the ratings -// matrix which should be classified next. -// Author: Rika Antonova -// Created: Mon Jun 20 11:26:43 PST 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_WORDREC_PAIN_POINTS_H_ -#define TESSERACT_WORDREC_PAIN_POINTS_H_ - -#include "genericheap.h" // for GenericHeap -#include "matrix.h" // for MATRIX_COORD (ptr only), MatrixCoordPair -#include "stopper.h" // for DANGERR - -class WERD_RES; - -namespace tesseract { - -class Dict; -struct ViterbiStateEntry; - -// Heap of pain points used for determining where to chop/join. -using PainPointHeap = GenericHeap; - -// Types of pain points (ordered in the decreasing level of importance). -enum LMPainPointsType { - LM_PPTYPE_BLAMER, - LM_PPTYPE_AMBIG, - LM_PPTYPE_PATH, - LM_PPTYPE_SHAPE, - - LM_PPTYPE_NUM -}; - -static const char * const LMPainPointsTypeName[] = { - "LM_PPTYPE_BLAMER", - "LM_PPTYPE_AMBIGS", - "LM_PPTYPE_PATH", - "LM_PPTYPE_SHAPE", -}; - -class LMPainPoints { - public: - - static const float kDefaultPainPointPriorityAdjustment; - // If there is a significant drop in character ngram probability or a - // dangerous ambiguity make the thresholds on what blob combinations - // can be classified looser. - static const float kLooseMaxCharWhRatio; - // Returns a description of the type of a pain point. - static const char* PainPointDescription(LMPainPointsType type) { - return LMPainPointsTypeName[type]; - } - - LMPainPoints(int max, float rat, bool fp, const Dict *d, int deb) : - max_heap_size_(max), max_char_wh_ratio_(rat), fixed_pitch_(fp), - dict_(d), debug_level_(deb) {} - ~LMPainPoints() {} - - // Returns true if the heap of pain points of pp_type is not empty(). - inline bool HasPainPoints(LMPainPointsType pp_type) const { - return !pain_points_heaps_[pp_type].empty(); - } - - // Dequeues the next pain point from the pain points queue and copies - // its contents and priority to *pp and *priority. - // Returns LM_PPTYPE_NUM if pain points queue is empty, otherwise the type. - LMPainPointsType Deque(MATRIX_COORD *pp, float *priority); - - // Clears pain points heap. - void Clear() { - for (int h = 0; h < LM_PPTYPE_NUM; ++h) pain_points_heaps_[h].clear(); - } - - // For each cell, generate a "pain point" if the cell is not classified - // and has a left or right neighbor that was classified. - void GenerateInitial(WERD_RES *word_res); - - // Generate pain points from the given path. - void GenerateFromPath(float rating_cert_scale, ViterbiStateEntry *vse, - WERD_RES *word_res); - - // Generate pain points from dangerous ambiguities in best choice. - void GenerateFromAmbigs(const DANGERR &fixpt, ViterbiStateEntry *vse, - WERD_RES *word_res); - - // Generate a pain point for the blamer. - bool GenerateForBlamer(double max_char_wh_ratio, WERD_RES *word_res, - int col, int row) { - return GeneratePainPoint(col, row, LM_PPTYPE_BLAMER, 0.0, false, - max_char_wh_ratio, word_res); - } - - // Adds a pain point to classify chunks_record->ratings(col, row). - // Returns true if a new pain point was added to an appropriate heap. - // Pain point priority is set to special_priority for pain points of - // LM_PPTYPE_AMBIG or LM_PPTYPE_PATH, for other pain points - // AssociateStats::gap_sum is used. - bool GeneratePainPoint(int col, int row, LMPainPointsType pp_type, - float special_priority, bool ok_to_extend, - float max_char_wh_ratio, - WERD_RES *word_res); - - // Adjusts the pain point coordinates to cope with expansion of the ratings - // matrix due to a split of the blob with the given index. - void RemapForSplit(int index); - - private: - // Priority queues containing pain points generated by the language model - // The priority is set by the language model components, adjustments like - // seam cost and width priority are factored into the priority. - PainPointHeap pain_points_heaps_[LM_PPTYPE_NUM]; - // Maximum number of points to keep in the heap. - int max_heap_size_; - // Maximum character width/height ratio. - float max_char_wh_ratio_; - // Set to true if fixed pitch should be assumed. - bool fixed_pitch_; - // Cached pointer to dictionary. - const Dict *dict_; - // Debug level for print statements. - int debug_level_; -}; - -} // namespace tesseract - -#endif // TESSERACT_WORDREC_PAIN_POINTS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_state.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_state.cpp deleted file mode 100644 index 0e6c9922..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_state.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: lm_state.cpp -// Description: Structures and functionality for capturing the state of -// segmentation search guided by the language model. -// Author: Rika Antonova -// Created: Mon Jun 20 11:26:43 PST 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "lm_state.h" - -namespace tesseract { - -ELISTIZE(ViterbiStateEntry) - -void ViterbiStateEntry::Print(const char *msg) const { - tprintf("%s ViterbiStateEntry", msg); - if (updated) tprintf("(NEW)"); - if (this->debug_str != nullptr) { - tprintf(" str=%s", this->debug_str->string()); - } - tprintf(" with ratings_sum=%.4f length=%d cost=%.6f", - this->ratings_sum, this->length, this->cost); - if (this->top_choice_flags) { - tprintf(" top_choice_flags=0x%x", this->top_choice_flags); - } - if (!this->Consistent()) { - tprintf(" inconsistent=(punc %d case %d chartype %d script %d font %d)", - this->consistency_info.NumInconsistentPunc(), - this->consistency_info.NumInconsistentCase(), - this->consistency_info.NumInconsistentChartype(), - this->consistency_info.inconsistent_script, - this->consistency_info.inconsistent_font); - } - if (this->dawg_info) tprintf(" permuter=%d", this->dawg_info->permuter); - if (this->ngram_info) { - tprintf(" ngram_cl_cost=%g context=%s ngram pruned=%d", - this->ngram_info->ngram_and_classifier_cost, - this->ngram_info->context.string(), - this->ngram_info->pruned); - } - if (this->associate_stats.shape_cost > 0.0f) { - tprintf(" shape_cost=%g", this->associate_stats.shape_cost); - } - tprintf(" %s", - XHeightConsistencyEnumName[this->consistency_info.xht_decision]); - - tprintf("\n"); -} - -/// Clears the viterbi search state back to its initial conditions. -void LanguageModelState::Clear() { - viterbi_state_entries.clear(); - viterbi_state_entries_prunable_length = 0; - viterbi_state_entries_prunable_max_cost = FLT_MAX; - viterbi_state_entries_length = 0; -} - -void LanguageModelState::Print(const char *msg) { - tprintf("%s VSEs (max_cost=%g prn_len=%d tot_len=%d):\n", - msg, viterbi_state_entries_prunable_max_cost, - viterbi_state_entries_prunable_length, viterbi_state_entries_length); - ViterbiStateEntry_IT vit(&viterbi_state_entries); - for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { - vit.data()->Print(""); - } -} - - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_state.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_state.h deleted file mode 100644 index 299ded23..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/lm_state.h +++ /dev/null @@ -1,240 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: lm_state.h -// Description: Structures and functionality for capturing the state of -// segmentation search guided by the language model. -// -// Author: Rika Antonova -// Created: Mon Jun 20 11:26:43 PST 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_ -#define TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_ - -#include "associate.h" // for AssociateStats -#include "dawg.h" // for DawgPositionVector -#include "elst.h" // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK -#include "genericvector.h" // for PointerVector -#include "lm_consistency.h" // for LMConsistencyInfo -#include "ratngs.h" // for BLOB_CHOICE, PermuterType -#include "stopper.h" // for DANGERR -#include "strngs.h" // for STRING -#include "unichar.h" // for UNICHAR_ID -#include "unicharset.h" // for UNICHARSET - -namespace tesseract { - -/// Used for expressing various language model flags. -using LanguageModelFlagsType = unsigned char; - -/// The following structs are used for storing the state of the language model -/// in the segmentation search graph. In this graph the nodes are BLOB_CHOICEs -/// and the links are the relationships between the underlying blobs (see -/// segsearch.h for a more detailed description). -/// -/// Each of the BLOB_CHOICEs contains LanguageModelState struct, which has -/// a list of N best paths (list of ViterbiStateEntry) explored by the Viterbi -/// search leading up to and including this BLOB_CHOICE. -/// -/// Each ViterbiStateEntry contains information from various components of the -/// language model: dawgs in which the path is found, character ngram model -/// probability of the path, script/chartype/font consistency info, state for -/// language-specific heuristics (e.g. hyphenated and compound words, -/// lower/upper case preferences, etc). -/// -/// Each ViterbiStateEntry also contains the parent pointer, so that the path -/// that it represents (WERD_CHOICE) can be constructed by following these -/// parent pointers. - -/// Struct for storing additional information used by Dawg language model -/// component. It stores the set of active dawgs in which the sequence of -/// letters on a path can be found. -struct LanguageModelDawgInfo { - LanguageModelDawgInfo(const DawgPositionVector *a, PermuterType pt) - : active_dawgs(*a), permuter(pt) {} - DawgPositionVector active_dawgs; - PermuterType permuter; -}; - -/// Struct for storing additional information used by Ngram language model -/// component. -struct LanguageModelNgramInfo { - LanguageModelNgramInfo(const char *c, int l, bool p, float nc, float ncc) - : context(c), context_unichar_step_len(l), pruned(p), ngram_cost(nc), - ngram_and_classifier_cost(ncc) {} - STRING context; //< context string - /// Length of the context measured by advancing using UNICHAR::utf8_step() - /// (should be at most the order of the character ngram model used). - int context_unichar_step_len; - /// The paths with pruned set are pruned out from the perspective of the - /// character ngram model. They are explored further because they represent - /// a dictionary match or a top choice. Thus ngram_info is still computed - /// for them in order to calculate the combined cost. - bool pruned; - /// -ln(P_ngram_model(path)) - float ngram_cost; - /// -[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ] - float ngram_and_classifier_cost; -}; - -/// Struct for storing the information about a path in the segmentation graph -/// explored by Viterbi search. -struct ViterbiStateEntry : public ELIST_LINK { - ViterbiStateEntry(ViterbiStateEntry *pe, - BLOB_CHOICE *b, float c, float ol, - const LMConsistencyInfo &ci, - const AssociateStats &as, - LanguageModelFlagsType tcf, - LanguageModelDawgInfo *d, - LanguageModelNgramInfo *n, - const char *debug_uch) - : cost(c), curr_b(b), parent_vse(pe), competing_vse(nullptr), - ratings_sum(b->rating()), - min_certainty(b->certainty()), adapted(b->IsAdapted()), length(1), - outline_length(ol), consistency_info(ci), associate_stats(as), - top_choice_flags(tcf), dawg_info(d), ngram_info(n), - updated(true) { - debug_str = (debug_uch == nullptr) ? nullptr : new STRING(); - if (pe != nullptr) { - ratings_sum += pe->ratings_sum; - if (pe->min_certainty < min_certainty) { - min_certainty = pe->min_certainty; - } - adapted += pe->adapted; - length += pe->length; - outline_length += pe->outline_length; - if (debug_uch != nullptr) *debug_str += *(pe->debug_str); - } - if (debug_str != nullptr && debug_uch != nullptr) *debug_str += debug_uch; - } - ~ViterbiStateEntry() { - delete dawg_info; - delete ngram_info; - delete debug_str; - } - /// Comparator function for sorting ViterbiStateEntry_LISTs in - /// non-increasing order of costs. - static int Compare(const void *e1, const void *e2) { - const ViterbiStateEntry *ve1 = - *static_cast(e1); - const ViterbiStateEntry *ve2 = - *static_cast(e2); - return (ve1->cost < ve2->cost) ? -1 : 1; - } - inline bool Consistent() const { - if (dawg_info != nullptr && consistency_info.NumInconsistentCase() == 0) { - return true; - } - return consistency_info.Consistent(); - } - /// Returns true if this VSE has an alphanumeric character as its classifier - /// result. - bool HasAlnumChoice(const UNICHARSET& unicharset) { - if (curr_b == nullptr) return false; - UNICHAR_ID unichar_id = curr_b->unichar_id(); - if (unicharset.get_isalpha(unichar_id) || - unicharset.get_isdigit(unichar_id)) - return true; - return false; - } - void Print(const char *msg) const; - - /// The cost is an adjusted ratings sum, that is adjusted by all the language - /// model components that use Viterbi search. - float cost; - - /// Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this). - BLOB_CHOICE *curr_b; - ViterbiStateEntry *parent_vse; - /// Pointer to a case-competing ViterbiStateEntry in the same list that - /// represents a path ending in the same letter of the opposite case. - ViterbiStateEntry *competing_vse; - - /// Various information about the characters on the path represented - /// by this ViterbiStateEntry. - float ratings_sum; //< sum of ratings of character on the path - float min_certainty; //< minimum certainty on the path - int adapted; //< number of BLOB_CHOICES from adapted templates - int length; //< number of characters on the path - float outline_length; //< length of the outline so far - LMConsistencyInfo consistency_info; //< path consistency info - AssociateStats associate_stats; //< character widths/gaps/seams - - /// Flags for marking the entry as a top choice path with - /// the smallest rating or lower/upper case letters). - LanguageModelFlagsType top_choice_flags; - - /// Extra information maintained by Dawg language model component - /// (owned by ViterbiStateEntry). - LanguageModelDawgInfo *dawg_info; - - /// Extra information maintained by Ngram language model component - /// (owned by ViterbiStateEntry). - LanguageModelNgramInfo *ngram_info; - - bool updated; //< set to true if the entry has just been created/updated - /// UTF8 string representing the path corresponding to this vse. - /// Populated only in when language_model_debug_level > 0. - STRING *debug_str; -}; - -ELISTIZEH(ViterbiStateEntry) - -/// Struct to store information maintained by various language model components. -struct LanguageModelState { - LanguageModelState() : - viterbi_state_entries_prunable_length(0), - viterbi_state_entries_prunable_max_cost(FLT_MAX), - viterbi_state_entries_length(0) {} - ~LanguageModelState() {} - - /// Clears the viterbi search state back to its initial conditions. - void Clear(); - - void Print(const char *msg); - - /// Storage for the Viterbi state. - ViterbiStateEntry_LIST viterbi_state_entries; - /// Number and max cost of prunable paths in viterbi_state_entries. - int viterbi_state_entries_prunable_length; - float viterbi_state_entries_prunable_max_cost; - /// Total number of entries in viterbi_state_entries. - int viterbi_state_entries_length; -}; - -/// Bundle together all the things pertaining to the best choice/state. -struct BestChoiceBundle { - explicit BestChoiceBundle(int matrix_dimension) - : updated(false), best_vse(nullptr) { - beam.reserve(matrix_dimension); - for (int i = 0; i < matrix_dimension; ++i) - beam.push_back(new LanguageModelState); - } - ~BestChoiceBundle() {} - - /// Flag to indicate whether anything was changed. - bool updated; - /// Places to try to fix the word suggested by ambiguity checking. - DANGERR fixpt; - /// The beam. One LanguageModelState containing a list of ViterbiStateEntry - /// per row in the ratings matrix containing all VSEs whose BLOB_CHOICE is - /// somewhere in the corresponding row. - PointerVector beam; - /// Best ViterbiStateEntry and BLOB_CHOICE. - ViterbiStateEntry *best_vse; -}; - -} // namespace tesseract - -#endif // TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/measure.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/measure.h deleted file mode 100644 index d7718cc5..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/measure.h +++ /dev/null @@ -1,127 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: measure.h (Formerly measure.h) - * Description: Statistics for a group of single measurements - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Mon Apr 8 09:42:28 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ******************************************************************************** - */ - -#ifndef MEASURE_H -#define MEASURE_H - -/* ----------------------------------------------------------------------- - I n c l u d e s ----------------------------------------------------------------------- -*/ - -#include - -/* ----------------------------------------------------------------------- - T y p e s ----------------------------------------------------------------------- -*/ - -typedef struct -{ - long num_samples; - float sum_of_samples; - float sum_of_squares; -} MEASUREMENT; - -/* ----------------------------------------------------------------------- - M a c r o s ----------------------------------------------------------------------- -*/ - -/********************************************************************** - * add_sample - * - * Add one more sample to a measurement. - **********************************************************************/ - -#define ADD_SAMPLE(m, s) \ - (m.sum_of_samples += (float)(s), \ - m.sum_of_squares += (float)(s) * (float)(s), ++m.num_samples) - -/********************************************************************** - * mean - * - * Return the mean value of the measurement. - **********************************************************************/ - -#define MEAN(m) \ - ((m).num_samples ? ((float)((m).sum_of_samples / (m).num_samples)) : 0) - -/********************************************************************** - * new_measurement - * - * Initialize a record to hold a measurement of a group of individual - * samples. - **********************************************************************/ - -#define new_measurement(m) \ - ((m).num_samples = 0, (m).sum_of_samples = 0, (m).sum_of_squares = 0) - -/********************************************************************** - * number_of_samples - * - * Return the number of samples in a measurement. - **********************************************************************/ - -#define number_of_samples(m) \ -((m).num_samples) - -/********************************************************************** - * standard_deviation - * - * Return the standard deviation of the measurement. - **********************************************************************/ - -#define standard_deviation(m) \ -((float) sqrt (VARIANCE (m))) - -/********************************************************************** - * variance - * - * Return the variance of the measurement. - **********************************************************************/ - -#define VARIANCE(m) \ - (((m).num_samples > 1) \ - ? ((float)(((m).num_samples * (m).sum_of_squares - \ - (m).sum_of_samples * (m).sum_of_samples) / \ - (((m).num_samples - 1) * (m).num_samples))) \ - : 0) - -/********************************************************************** - * print_summary - * - * Summarize a MEASUREMENT record. - **********************************************************************/ - -#define print_summary(string, measure) \ - cprintf("\t%-20s \tn = %d, \tm = %4.2f, \ts = %4.2f\n ", string, \ - number_of_samples(measure), MEAN(measure), \ - standard_deviation(measure)) -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/outlines.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/outlines.cpp deleted file mode 100644 index d52f2c45..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/outlines.cpp +++ /dev/null @@ -1,85 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: outlines.cpp (Formerly outlines.c) - * Description: Combinatorial Splitter - * Author: Mark Seaman, OCR Technology - * Created: Thu Jul 27 08:59:01 1989 - * Modified: Wed Jul 10 14:56:49 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1989, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ******************************************************************************** - * Revision 1.2 89/09/15 09:24:41 09:24:41 marks (Mark Seaman) - * First released version of Combinatorial splitter code - **/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "outlines.h" -#include "wordrec.h" - -namespace tesseract { -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -/********************************************************************** - * near_point - * - * Find the point on a line segment that is closest to a point not on - * the line segment. Return that point in near_pt. Returns whether - * near_pt was newly created. - **********************************************************************/ -bool Wordrec::near_point(EDGEPT *point, - EDGEPT *line_pt_0, EDGEPT *line_pt_1, - EDGEPT **near_pt) { - TPOINT p; - - float slope; - float intercept; - - float x0 = line_pt_0->pos.x; - float x1 = line_pt_1->pos.x; - float y0 = line_pt_0->pos.y; - float y1 = line_pt_1->pos.y; - - if (x0 == x1) { - /* Handle vertical line */ - p.x = (int16_t) x0; - p.y = point->pos.y; - } - else { - /* Slope and intercept */ - slope = (y0 - y1) / (x0 - x1); - intercept = y1 - x1 * slope; - - /* Find perpendicular */ - p.x = (int16_t) ((point->pos.x + (point->pos.y - intercept) * slope) / - (slope * slope + 1)); - p.y = (int16_t) (slope * p.x + intercept); - } - - if (is_on_line (p, line_pt_0->pos, line_pt_1->pos) && - (!same_point (p, line_pt_0->pos)) && (!same_point (p, line_pt_1->pos))) { - /* Intersection on line */ - *near_pt = make_edgept(p.x, p.y, line_pt_1, line_pt_0); - return true; - } else { /* Intersection not on line */ - *near_pt = closest(point, line_pt_0, line_pt_1); - return false; - } -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/outlines.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/outlines.h deleted file mode 100644 index 893fb5c8..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/outlines.h +++ /dev/null @@ -1,135 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: outlines.h - * Description: Combinatorial Splitter - * Author: Mark Seaman, OCR Technology - * Created: Thu Jul 27 11:27:55 1989 - * Modified: Wed May 15 17:28:47 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1989, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ - -#ifndef OUTLINES_H -#define OUTLINES_H - -#include // for abs -#include "blobs.h" // for TPOINT -#include "params.h" // for IntParam -#include "wordrec.h" // for Wordrec - -/*---------------------------------------------------------------------- - C o n s t a n t s -----------------------------------------------------------------------*/ -#define LARGE_DISTANCE 100000 /* Used for closest dist */ -#define MIN_BLOB_SIZE 10 /* Big units */ -#define MAX_ASPECT_RATIO 2.5 /* Widest character */ - -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ -/********************************************************************** - * same_point - * - * Return TRUE if the point values are the same. The parameters must - * be of type POINT. - **********************************************************************/ -#define same_point(p1,p2) \ - ((abs (p1.x - p2.x) < chop_same_distance) && \ - (abs (p1.y - p2.y) < chop_same_distance)) - -/********************************************************************** - * dist_square - * - * Return the square of the distance between these two points. The - * parameters must be of type POINT. - **********************************************************************/ - -#define dist_square(p1,p2) \ - ((p2.x - p1.x) * (p2.x - p1.x) + \ - (p2.y - p1.y) * (p2.y - p1.y)) - -/********************************************************************** - * closest - * - * The expression provides the EDGEPT that is closest to the point in - * question. All three parameters must be of type EDGEPT. - **********************************************************************/ - -#define closest(test_p,p1,p2) \ -(p1 ? \ - (p2 ? \ - ((dist_square (test_p->pos, p1->pos) < \ - dist_square (test_p->pos, p2->pos)) ? \ - p1 : \ - p2) : \ - p1) : \ - p2) - -/********************************************************************** - * edgept_dist - * - * Return the distance (squared) between the two edge points. - **********************************************************************/ - -#define edgept_dist(p1,p2) \ -(dist_square ((p1)->pos, (p2)->pos)) - -/********************************************************************** - * is_exterior_point - * - * Return TRUE if the point supplied is an exterior projection from the - * outline. - **********************************************************************/ - -#define is_exterior_point(edge,point) \ -(same_point (edge->prev->pos, point->pos) || \ - same_point (edge->next->pos, point->pos) || \ - (angle_change (edge->prev, edge, edge->next) - \ - angle_change (edge->prev, edge, point) > 20)) - -/********************************************************************** - * is_equal - * - * Return TRUE if the POINTs are equal. - **********************************************************************/ - -#define is_equal(p1,p2) \ -(((p1).x == (p2).x) && ((p1).y == (p2).y)) - -/********************************************************************** - * is_on_line - * - * Return TRUE if the point is on the line segment between the two end - * points. The two end points are included as part of the line. The - * parameters must be of type POINT. - **********************************************************************/ - -#define is_on_line(p,p0,p1) \ - (within_range ((p).x, (p0).x, (p1).x) && \ - within_range ((p).y, (p0).y, (p1).y)) - -/********************************************************************** - * within_range - * - * Return TRUE if the first number is in between the second two numbers. - * Return FALSE otherwise. - **********************************************************************/ - -#define within_range(x,x0,x1) \ - (((x0 <= x) && (x <= x1)) || ((x1 <= x) && (x <= x0))) - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/params_model.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/params_model.cpp deleted file mode 100644 index 8512eda4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/params_model.cpp +++ /dev/null @@ -1,174 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: params_model.cpp -// Description: Trained language model parameters. -// Author: David Eger -// Created: Mon Jun 11 11:26:42 PDT 2012 -// -// (C) Copyright 2012, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "params_model.h" - -#include -#include -#include - -#include "bitvector.h" -#include "tprintf.h" - -namespace tesseract { - -// Scale factor to apply to params model scores. -static const float kScoreScaleFactor = 100.0f; -// Minimum cost result to return. -static const float kMinFinalCost = 0.001f; -// Maximum cost result to return. -static const float kMaxFinalCost = 100.0f; - -void ParamsModel::Print() { - for (int p = 0; p < PTRAIN_NUM_PASSES; ++p) { - tprintf("ParamsModel for pass %d lang %s\n", p, lang_.string()); - for (int i = 0; i < weights_vec_[p].size(); ++i) { - tprintf("%s = %g\n", kParamsTrainingFeatureTypeName[i], - weights_vec_[p][i]); - } - } -} - -void ParamsModel::Copy(const ParamsModel &other_model) { - for (int p = 0; p < PTRAIN_NUM_PASSES; ++p) { - weights_vec_[p] = other_model.weights_for_pass( - static_cast(p)); - } -} - -// Given a (modifiable) line, parse out a key / value pair. -// Return true on success. -bool ParamsModel::ParseLine(char *line, char** key, float *val) { - if (line[0] == '#') - return false; - int end_of_key = 0; - while (line[end_of_key] && - !(isascii(line[end_of_key]) && isspace(line[end_of_key]))) { - end_of_key++; - } - if (!line[end_of_key]) { - tprintf("ParamsModel::Incomplete line %s\n", line); - return false; - } - line[end_of_key++] = 0; - *key = line; - if (sscanf(line + end_of_key, " %f", val) != 1) - return false; - return true; -} - -// Applies params model weights to the given features. -// Assumes that features is an array of size PTRAIN_NUM_FEATURE_TYPES. -// The cost is set to a number that can be multiplied by the outline length, -// as with the old ratings scheme. This enables words of different length -// and combinations of words to be compared meaningfully. -float ParamsModel::ComputeCost(const float features[]) const { - float unnorm_score = 0.0; - for (int f = 0; f < PTRAIN_NUM_FEATURE_TYPES; ++f) { - unnorm_score += weights_vec_[pass_][f] * features[f]; - } - return ClipToRange(-unnorm_score / kScoreScaleFactor, - kMinFinalCost, kMaxFinalCost); -} - -bool ParamsModel::Equivalent(const ParamsModel &that) const { - float epsilon = 0.0001; - for (int p = 0; p < PTRAIN_NUM_PASSES; ++p) { - if (weights_vec_[p].size() != that.weights_vec_[p].size()) return false; - for (int i = 0; i < weights_vec_[p].size(); i++) { - if (weights_vec_[p][i] != that.weights_vec_[p][i] && - fabs(weights_vec_[p][i] - that.weights_vec_[p][i]) > epsilon) - return false; - } - } - return true; -} - -bool ParamsModel::LoadFromFile( - const char *lang, - const char *full_path) { - TFile fp; - if (!fp.Open(full_path, nullptr)) { - tprintf("Error opening file %s\n", full_path); - return false; - } - return LoadFromFp(lang, &fp); -} - -bool ParamsModel::LoadFromFp(const char *lang, TFile *fp) { - const int kMaxLineSize = 100; - char line[kMaxLineSize]; - BitVector present; - present.Init(PTRAIN_NUM_FEATURE_TYPES); - lang_ = lang; - // Load weights for passes with adaption on. - GenericVector &weights = weights_vec_[pass_]; - weights.init_to_size(PTRAIN_NUM_FEATURE_TYPES, 0.0); - - while (fp->FGets(line, kMaxLineSize) != nullptr) { - char *key = nullptr; - float value; - if (!ParseLine(line, &key, &value)) - continue; - int idx = ParamsTrainingFeatureByName(key); - if (idx < 0) { - tprintf("ParamsModel::Unknown parameter %s\n", key); - continue; - } - if (!present[idx]) { - present.SetValue(idx, true); - } - weights[idx] = value; - } - bool complete = (present.NumSetBits() == PTRAIN_NUM_FEATURE_TYPES); - if (!complete) { - for (int i = 0; i < PTRAIN_NUM_FEATURE_TYPES; i++) { - if (!present[i]) { - tprintf("Missing field %s.\n", kParamsTrainingFeatureTypeName[i]); - } - } - lang_ = ""; - weights.truncate(0); - } - return complete; -} - -bool ParamsModel::SaveToFile(const char *full_path) const { - const GenericVector &weights = weights_vec_[pass_]; - if (weights.size() != PTRAIN_NUM_FEATURE_TYPES) { - tprintf("Refusing to save ParamsModel that has not been initialized.\n"); - return false; - } - FILE *fp = fopen(full_path, "wb"); - if (!fp) { - tprintf("Could not open %s for writing.\n", full_path); - return false; - } - bool all_good = true; - for (int i = 0; i < weights.size(); i++) { - if (fprintf(fp, "%s %f\n", kParamsTrainingFeatureTypeName[i], weights[i]) - < 0) { - all_good = false; - } - } - fclose(fp); - return all_good; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/params_model.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/params_model.h deleted file mode 100644 index 4d373eea..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/params_model.h +++ /dev/null @@ -1,90 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: params_model.h -// Description: Trained feature serialization for language parameter training. -// Author: David Eger -// Created: Mon Jun 11 11:26:42 PDT 2012 -// -// (C) Copyright 2011, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_WORDREC_PARAMS_MODEL_H_ -#define TESSERACT_WORDREC_PARAMS_MODEL_H_ - -#include "genericvector.h" // for GenericVector -#include "params_training_featdef.h" // for PTRAIN_NUM_FEATURE_TYPES -#include "strngs.h" // for STRING - -namespace tesseract { - -class TFile; - -// Represents the learned weights for a given language. -class ParamsModel { - public: - // Enum for expressing OCR pass. - enum PassEnum { - PTRAIN_PASS1, - PTRAIN_PASS2, - - PTRAIN_NUM_PASSES - }; - - ParamsModel() : pass_(PTRAIN_PASS1) {} - ParamsModel(const char *lang, const GenericVector &weights) : - lang_(lang), pass_(PTRAIN_PASS1) { weights_vec_[pass_] = weights; } - inline bool Initialized() { - return weights_vec_[pass_].size() == PTRAIN_NUM_FEATURE_TYPES; - } - // Prints out feature weights. - void Print(); - // Clears weights for all passes. - void Clear() { - for (int p = 0; p < PTRAIN_NUM_PASSES; ++p) weights_vec_[p].clear(); - } - // Copies the weights of the given params model. - void Copy(const ParamsModel &other_model); - // Applies params model weights to the given features. - // Assumes that features is an array of size PTRAIN_NUM_FEATURE_TYPES. - float ComputeCost(const float features[]) const; - bool Equivalent(const ParamsModel &that) const; - - // Returns true on success. - bool SaveToFile(const char *full_path) const; - - // Returns true on success. - bool LoadFromFile(const char *lang, const char *full_path); - bool LoadFromFp(const char *lang, TFile *fp); - - const GenericVector& weights() const { - return weights_vec_[pass_]; - } - const GenericVector& weights_for_pass(PassEnum pass) const { - return weights_vec_[pass]; - } - void SetPass(PassEnum pass) { pass_ = pass; } - - private: - bool ParseLine(char *line, char **key, float *val); - - STRING lang_; - // Set to the current pass type and used to determine which set of weights - // should be used for ComputeCost() and other functions. - PassEnum pass_; - // Several sets of weights for various OCR passes (e.g. pass1 with adaption, - // pass2 without adaption, etc). - GenericVector weights_vec_[PTRAIN_NUM_PASSES]; -}; - -} // namespace tesseract - -#endif // TESSERACT_WORDREC_PARAMS_MODEL_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/pieces.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/pieces.cpp deleted file mode 100644 index 76434e65..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/pieces.cpp +++ /dev/null @@ -1,342 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: pieces.cpp (Formerly pieces.c) - * Description: - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Mon May 20 12:12:35 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ - -#include "blobs.h" -#include "helpers.h" -#include "matrix.h" -#include "ratngs.h" -#include "seam.h" -#include "wordrec.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -using tesseract::ScoredFont; - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ - -/********************************************************************** - * classify_piece - * - * Create a larger piece from a collection of smaller ones. Classify - * it and return the results. Take the large piece apart to leave - * the collection of small pieces un modified. - **********************************************************************/ -namespace tesseract { -BLOB_CHOICE_LIST *Wordrec::classify_piece(const GenericVector& seams, - int16_t start, - int16_t end, - const char* description, - TWERD *word, - BlamerBundle *blamer_bundle) { - if (end > start) SEAM::JoinPieces(seams, word->blobs, start, end); - BLOB_CHOICE_LIST *choices = classify_blob(word->blobs[start], description, - White, blamer_bundle); - // Set the matrix_cell_ entries in all the BLOB_CHOICES. - BLOB_CHOICE_IT bc_it(choices); - for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { - bc_it.data()->set_matrix_cell(start, end); - } - - if (end > start) SEAM::BreakPieces(seams, word->blobs, start, end); - - return (choices); -} - -template -int SortByUnicharID(const void *void1, const void *void2) { - const BLOB_CHOICE *p1 = *static_cast(void1); - const BLOB_CHOICE *p2 = *static_cast(void2); - - return p1->unichar_id() - p2->unichar_id(); -} - -template -int SortByRating(const void *void1, const void *void2) { - const BLOB_CHOICE *p1 = *static_cast(void1); - const BLOB_CHOICE *p2 = *static_cast(void2); - - if (p1->rating() < p2->rating()) - return 1; - return -1; -} - - -/********************************************************************** - * fill_filtered_fragment_list - * - * Filter the fragment list so that the filtered_choices only contain - * fragments that are in the correct position. choices is the list - * that we are going to filter. fragment_pos is the position in the - * fragment that we are looking for and num_frag_parts is the the - * total number of pieces. The result will be appended to - * filtered_choices. - **********************************************************************/ -void Wordrec::fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices, - int fragment_pos, - int num_frag_parts, - BLOB_CHOICE_LIST *filtered_choices) { - BLOB_CHOICE_IT filtered_choices_it(filtered_choices); - BLOB_CHOICE_IT choices_it(choices); - - for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); - choices_it.forward()) { - UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id(); - const CHAR_FRAGMENT *frag = unicharset.get_fragment(choice_unichar_id); - - if (frag != nullptr && frag->get_pos() == fragment_pos && - frag->get_total() == num_frag_parts) { - // Recover the unichar_id of the unichar that this fragment is - // a part of - BLOB_CHOICE *b = new BLOB_CHOICE(*choices_it.data()); - int original_unichar = unicharset.unichar_to_id(frag->get_unichar()); - b->set_unichar_id(original_unichar); - filtered_choices_it.add_to_end(b); - } - } - - filtered_choices->sort(SortByUnicharID); -} - - -/********************************************************************** - * merge_and_put_fragment_lists - * - * Merge the fragment lists in choice_lists and append it to the - * ratings matrix. - **********************************************************************/ -void Wordrec::merge_and_put_fragment_lists(int16_t row, int16_t column, - int16_t num_frag_parts, - BLOB_CHOICE_LIST *choice_lists, - MATRIX *ratings) { - BLOB_CHOICE_IT *choice_lists_it = new BLOB_CHOICE_IT[num_frag_parts]; - - for (int i = 0; i < num_frag_parts; i++) { - choice_lists_it[i].set_to_list(&choice_lists[i]); - choice_lists_it[i].mark_cycle_pt(); - } - - BLOB_CHOICE_LIST *merged_choice = ratings->get(row, column); - if (merged_choice == nullptr) - merged_choice = new BLOB_CHOICE_LIST; - - bool end_of_list = false; - BLOB_CHOICE_IT merged_choice_it(merged_choice); - while (!end_of_list) { - // Find the maximum unichar_id of the current entry the iterators - // are pointing at - UNICHAR_ID max_unichar_id = choice_lists_it[0].data()->unichar_id(); - for (int i = 0; i < num_frag_parts; i++) { - UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id(); - if (max_unichar_id < unichar_id) { - max_unichar_id = unichar_id; - } - } - - // Move the each iterators until it gets to an entry that has a - // value greater than or equal to max_unichar_id - for (int i = 0; i < num_frag_parts; i++) { - UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id(); - while (!choice_lists_it[i].cycled_list() && - unichar_id < max_unichar_id) { - choice_lists_it[i].forward(); - unichar_id = choice_lists_it[i].data()->unichar_id(); - } - if (choice_lists_it[i].cycled_list()) { - end_of_list = true; - break; - } - } - - if (end_of_list) - break; - - // Checks if the fragments are parts of the same character - UNICHAR_ID first_unichar_id = choice_lists_it[0].data()->unichar_id(); - bool same_unichar = true; - for (int i = 1; i < num_frag_parts; i++) { - UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id(); - if (unichar_id != first_unichar_id) { - same_unichar = false; - break; - } - } - - if (same_unichar) { - // Add the merged character to the result - UNICHAR_ID merged_unichar_id = first_unichar_id; - GenericVector merged_fonts = - choice_lists_it[0].data()->fonts(); - float merged_min_xheight = choice_lists_it[0].data()->min_xheight(); - float merged_max_xheight = choice_lists_it[0].data()->max_xheight(); - float positive_yshift = 0, negative_yshift = 0; - int merged_script_id = choice_lists_it[0].data()->script_id(); - BlobChoiceClassifier classifier = choice_lists_it[0].data()->classifier(); - - float merged_rating = 0, merged_certainty = 0; - for (int i = 0; i < num_frag_parts; i++) { - float rating = choice_lists_it[i].data()->rating(); - float certainty = choice_lists_it[i].data()->certainty(); - - if (i == 0 || certainty < merged_certainty) - merged_certainty = certainty; - merged_rating += rating; - - choice_lists_it[i].forward(); - if (choice_lists_it[i].cycled_list()) - end_of_list = true; - IntersectRange(choice_lists_it[i].data()->min_xheight(), - choice_lists_it[i].data()->max_xheight(), - &merged_min_xheight, &merged_max_xheight); - float yshift = choice_lists_it[i].data()->yshift(); - if (yshift > positive_yshift) positive_yshift = yshift; - if (yshift < negative_yshift) negative_yshift = yshift; - // Use the min font rating over the parts. - // TODO(rays) font lists are unsorted. Need to be faster? - const GenericVector& frag_fonts = - choice_lists_it[i].data()->fonts(); - for (int f = 0; f < frag_fonts.size(); ++f) { - int merged_f = 0; - for (merged_f = 0; merged_f < merged_fonts.size() && - merged_fonts[merged_f].fontinfo_id != frag_fonts[f].fontinfo_id; - ++merged_f) {} - if (merged_f == merged_fonts.size()) { - merged_fonts.push_back(frag_fonts[f]); - } else if (merged_fonts[merged_f].score > frag_fonts[f].score) { - merged_fonts[merged_f].score = frag_fonts[f].score; - } - } - } - - float merged_yshift = positive_yshift != 0 - ? (negative_yshift != 0 ? 0 : positive_yshift) - : negative_yshift; - BLOB_CHOICE* choice = new BLOB_CHOICE(merged_unichar_id, - merged_rating, - merged_certainty, - merged_script_id, - merged_min_xheight, - merged_max_xheight, - merged_yshift, - classifier); - choice->set_fonts(merged_fonts); - merged_choice_it.add_to_end(choice); - } - } - - if (classify_debug_level) - print_ratings_list("Merged Fragments", merged_choice, - unicharset); - - if (merged_choice->empty()) - delete merged_choice; - else - ratings->put(row, column, merged_choice); - - delete [] choice_lists_it; -} - -/********************************************************************** - * get_fragment_lists - * - * Recursively go through the ratings matrix to find lists of fragments - * to be merged in the function merge_and_put_fragment_lists. - * current_frag is the position of the piece we are looking for. - * current_row is the row in the rating matrix we are currently at. - * start is the row we started initially, so that we can know where - * to append the results to the matrix. num_frag_parts is the total - * number of pieces we are looking for and num_blobs is the size of the - * ratings matrix. - **********************************************************************/ -void Wordrec::get_fragment_lists(int16_t current_frag, int16_t current_row, - int16_t start, int16_t num_frag_parts, - int16_t num_blobs, MATRIX *ratings, - BLOB_CHOICE_LIST *choice_lists) { - if (current_frag == num_frag_parts) { - merge_and_put_fragment_lists(start, current_row - 1, num_frag_parts, - choice_lists, ratings); - return; - } - - for (int16_t x = current_row; x < num_blobs; x++) { - BLOB_CHOICE_LIST *choices = ratings->get(current_row, x); - if (choices == nullptr) - continue; - - fill_filtered_fragment_list(choices, current_frag, num_frag_parts, - &choice_lists[current_frag]); - if (!choice_lists[current_frag].empty()) { - get_fragment_lists(current_frag + 1, x + 1, start, num_frag_parts, - num_blobs, ratings, choice_lists); - choice_lists[current_frag].clear(); - } - } -} - - -/********************************************************************** - * merge_fragments - * - * Try to merge fragments in the ratings matrix and put the result in - * the corresponding row and column - **********************************************************************/ -void Wordrec::merge_fragments(MATRIX *ratings, int16_t num_blobs) { - BLOB_CHOICE_LIST choice_lists[CHAR_FRAGMENT::kMaxChunks]; - for (int16_t start = 0; start < num_blobs; start++) { - for (int frag_parts = 2; frag_parts <= CHAR_FRAGMENT::kMaxChunks; - frag_parts++) { - get_fragment_lists(0, start, start, frag_parts, num_blobs, - ratings, choice_lists); - } - } - - // Delete fragments from the rating matrix - for (int16_t x = 0; x < num_blobs; x++) { - for (int16_t y = x; y < num_blobs; y++) { - BLOB_CHOICE_LIST *choices = ratings->get(x, y); - if (choices != nullptr) { - BLOB_CHOICE_IT choices_it(choices); - for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); - choices_it.forward()) { - UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id(); - const CHAR_FRAGMENT *frag = - unicharset.get_fragment(choice_unichar_id); - if (frag != nullptr) - delete choices_it.extract(); - } - } - } - } -} - - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/plotedges.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/plotedges.cpp deleted file mode 100644 index a839b6b4..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/plotedges.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: plotedges.cpp (Formerly plotedges.c) - * Description: Graphics routines for "Edges" and "Outlines" windows - * Author: Mark Seaman, OCR Technology - * Created: Fri Jul 28 13:14:48 1989 - * Modified: Tue Jul 9 17:22:22 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1989, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ - -#include "plotedges.h" -#include "render.h" -#include "split.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#ifndef GRAPHICS_DISABLED - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -ScrollView *edge_window = nullptr; - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -/********************************************************************** - * display_edgepts - * - * Macro to display edge points in a window. - **********************************************************************/ -void display_edgepts(LIST outlines) { - void *window; - /* Set up window */ - if (edge_window == nullptr) { - edge_window = c_create_window ("Edges", 750, 150, - 400, 128, -400.0, 400.0, 0.0, 256.0); - } - else { - c_clear_window(edge_window); - } - /* Render the outlines */ - window = edge_window; - /* Reclaim old memory */ - iterate(outlines) { - render_edgepts (window, (EDGEPT *) first_node (outlines), White); - } -} - - -/********************************************************************** - * draw_blob_edges - * - * Display the edges of this blob in the edges window. - **********************************************************************/ -void draw_blob_edges(TBLOB *blob) { - TESSLINE *ol; - LIST edge_list = NIL_LIST; - - if (wordrec_display_splits) { - for (ol = blob->outlines; ol != nullptr; ol = ol->next) - push_on (edge_list, ol->loop); - display_edgepts(edge_list); - destroy(edge_list); - } -} - - -/********************************************************************** - * mark_outline - * - * Make a mark on the edges window at a particular location. - **********************************************************************/ -void mark_outline(EDGEPT *edgept) { /* Start of point list */ - void *window = edge_window; - float x = edgept->pos.x; - float y = edgept->pos.y; - - c_line_color_index(window, Red); - c_move(window, x, y); - - x -= 4; - y -= 12; - c_draw(window, x, y); - - x -= 2; - y += 4; - c_draw(window, x, y); - - x -= 4; - y += 2; - c_draw(window, x, y); - - x += 10; - y += 6; - c_draw(window, x, y); - - c_make_current(window); -} - -#endif // GRAPHICS_DISABLED diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/plotedges.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/plotedges.h deleted file mode 100644 index ef799390..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/plotedges.h +++ /dev/null @@ -1,73 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: plotedges.h - * Description: Convert the various data type into line lists - * Author: Mark Seaman, OCR Technology - * Created: Fri Jul 28 13:14:48 1989 - * Modified: Mon May 13 09:34:51 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1989, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ - -#ifndef PLOTEDGES_H -#define PLOTEDGES_H - -#include "oldlist.h" // for LIST - -class ScrollView; - -struct EDGEPT; -struct TBLOB; - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -extern ScrollView *edge_window; /* Window for edges */ - -/*---------------------------------------------------------------------- - Macros -----------------------------------------------------------------------*/ -/********************************************************************** - * update_edge_window - * - * Refresh the display of the edge window. - **********************************************************************/ -#define update_edge_window() \ -if (wordrec_display_splits) { \ - c_make_current (edge_window); \ -} - - -/********************************************************************** - * edge_window_wait - * - * Wait for someone to click in the edges window. - **********************************************************************/ - -#define edge_window_wait() \ -if (wordrec_display_splits) window_wait (edge_window) - -/*---------------------------------------------------------------------- - F u n c t i o n s ----------------------------------------------------------------------*/ -void display_edgepts(LIST outlines); - -void draw_blob_edges(TBLOB *blob); - -void mark_outline(EDGEPT *edgept); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/render.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/render.cpp deleted file mode 100644 index b05139eb..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/render.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: render.cpp (Formerly render.c) - * Description: Convert the various data type into line lists - * Author: Mark Seaman, OCR Technology - * Created: Fri Jul 28 13:14:48 1989 - * Modified: Mon Jul 15 10:23:37 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1989, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -#include "render.h" -#include "blobs.h" - -#include - -#include "vecfuncs.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -ScrollView *blob_window = nullptr; - -C_COL color_list[] = { - Red, Cyan, Yellow, Blue, Green, White -}; - -BOOL_VAR(wordrec_display_all_blobs, 0, "Display Blobs"); - -BOOL_VAR(wordrec_display_all_words, 0, "Display Words"); - -BOOL_VAR(wordrec_blob_pause, 0, "Blob pause"); - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -#ifndef GRAPHICS_DISABLED -/********************************************************************** - * display_blob - * - * Macro to display blob in a window. - **********************************************************************/ -void display_blob(TBLOB *blob, C_COL color) { - /* Size of drawable */ - if (blob_window == nullptr) { - blob_window = c_create_window ("Blobs", 520, 10, - 500, 256, -1000.0, 1000.0, 0.0, 256.0); - } - else { - c_clear_window(blob_window); - } - - render_blob(blob_window, blob, color); -} - -/********************************************************************** - * render_blob - * - * Create a list of line segments that represent the expanded outline - * that was supplied as input. - **********************************************************************/ -void render_blob(void *window, TBLOB *blob, C_COL color) { - /* No outline */ - if (!blob) - return; - - render_outline (window, blob->outlines, color); -} - - -/********************************************************************** - * render_edgepts - * - * Create a list of line segments that represent the expanded outline - * that was supplied as input. - **********************************************************************/ -void render_edgepts(void *window, EDGEPT *edgept, C_COL color) { - if (!edgept) - return; - - float x = edgept->pos.x; - float y = edgept->pos.y; - EDGEPT *this_edge = edgept; - - c_line_color_index(window, color); - c_move(window, x, y); - do { - this_edge = this_edge->next; - x = this_edge->pos.x; - y = this_edge->pos.y; - c_draw(window, x, y); - } - while (edgept != this_edge); -} - - -/********************************************************************** - * render_outline - * - * Create a list of line segments that represent the expanded outline - * that was supplied as input. - **********************************************************************/ -void render_outline(void *window, - TESSLINE *outline, - C_COL color) { - /* No outline */ - if (!outline) - return; - /* Draw Compact outline */ - if (outline->loop) - render_edgepts (window, outline->loop, color); - /* Add on next outlines */ - render_outline (window, outline->next, color); -} - -#endif // GRAPHICS_DISABLED diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/render.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/render.h deleted file mode 100644 index 9346d01a..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/render.h +++ /dev/null @@ -1,64 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: render.h (Formerly render.h) - * Description: Convert the various data type into line lists - * Author: Mark Seaman, OCR Technology - * Created: Fri Jul 28 13:14:48 1989 - * Modified: Fri Apr 26 09:59:45 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1989, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -#ifndef RENDER_H -#define RENDER_H - -#include "callcpp.h" // for C_COL -#include "params.h" // for BOOL_VAR_H, BoolParam - -class ScrollView; - -struct EDGEPT; -struct TBLOB; -struct TESSLINE; - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -extern ScrollView *blob_window; /* Window for blobs */ -extern C_COL color_list[]; /* Colors for outlines */ - -extern BOOL_VAR_H(wordrec_display_all_blobs, 0, "Display Blobs"); - -extern BOOL_VAR_H(wordrec_display_all_words, 0, "Display Words"); - -extern BOOL_VAR_H(wordrec_blob_pause, 0, "Blob pause"); - -#define NUM_COLORS 6 - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -void display_blob(TBLOB *blob, C_COL color); - -void render_blob(void *window, TBLOB *blob, C_COL color); - -void render_edgepts(void *window, EDGEPT *edgept, C_COL color); - -void render_outline(void *window, - TESSLINE *outline, - C_COL color); - -#endif diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/segsearch.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/segsearch.cpp deleted file mode 100644 index 95ef3d44..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/segsearch.cpp +++ /dev/null @@ -1,343 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: segsearch.cpp -// Description: Segmentation search functions. -// Author: Daria Antonova -// Created: Mon Jun 23 11:26:43 PDT 2008 -// -// (C) Copyright 2009, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include // for INT32_MAX -#include "blamer.h" // for BlamerBundle -#include "errcode.h" // for ASSERT_HOST -#include "genericvector.h" // for GenericVector -#include "lm_pain_points.h" // for LMPainPoints, LM_PPTYPE_SHAPE, LMPainPoi... -#include "lm_state.h" // for BestChoiceBundle, ViterbiStateEntry -#include "matrix.h" // for MATRIX_COORD, MATRIX -#include "pageres.h" // for WERD_RES -#include "params.h" // for BoolParam, IntParam, DoubleParam -#include "ratngs.h" // for BLOB_CHOICE_LIST, BLOB_CHOICE_IT -#include "strngs.h" // for STRING -#include "tesscallback.h" // for TessResultCallback2 -#include "tprintf.h" // for tprintf -#include "wordrec.h" // for Wordrec, SegSearchPending (ptr only) - -namespace tesseract { - -void Wordrec::DoSegSearch(WERD_RES* word_res) { - BestChoiceBundle best_choice_bundle(word_res->ratings->dimension()); - // Run Segmentation Search. - SegSearch(word_res, &best_choice_bundle, nullptr); -} - -void Wordrec::SegSearch(WERD_RES* word_res, - BestChoiceBundle* best_choice_bundle, - BlamerBundle* blamer_bundle) { - LMPainPoints pain_points(segsearch_max_pain_points, - segsearch_max_char_wh_ratio, - assume_fixed_pitch_char_segment, - &getDict(), segsearch_debug_level); - // Compute scaling factor that will help us recover blob outline length - // from classifier rating and certainty for the blob. - float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale; - GenericVector pending; - InitialSegSearch(word_res, &pain_points, &pending, best_choice_bundle, - blamer_bundle); - - if (!SegSearchDone(0)) { // find a better choice - if (chop_enable && word_res->chopped_word != nullptr) { - improve_by_chopping(rating_cert_scale, word_res, best_choice_bundle, - blamer_bundle, &pain_points, &pending); - } - if (chop_debug) SEAM::PrintSeams("Final seam list:", word_res->seam_array); - - if (blamer_bundle != nullptr && - !blamer_bundle->ChoiceIsCorrect(word_res->best_choice)) { - blamer_bundle->SetChopperBlame(word_res, wordrec_debug_blamer); - } - } - // Keep trying to find a better path by fixing the "pain points". - - MATRIX_COORD pain_point; - float pain_point_priority; - int num_futile_classifications = 0; - STRING blamer_debug; - while (wordrec_enable_assoc && - (!SegSearchDone(num_futile_classifications) || - (blamer_bundle != nullptr && - blamer_bundle->GuidedSegsearchStillGoing()))) { - // Get the next valid "pain point". - bool found_nothing = true; - LMPainPointsType pp_type; - while ((pp_type = pain_points.Deque(&pain_point, &pain_point_priority)) != - LM_PPTYPE_NUM) { - if (!pain_point.Valid(*word_res->ratings)) { - word_res->ratings->IncreaseBandSize( - pain_point.row - pain_point.col + 1); - } - if (pain_point.Valid(*word_res->ratings) && - !word_res->ratings->Classified(pain_point.col, pain_point.row, - getDict().WildcardID())) { - found_nothing = false; - break; - } - } - if (found_nothing) { - if (segsearch_debug_level > 0) tprintf("Pain points queue is empty\n"); - break; - } - ProcessSegSearchPainPoint(pain_point_priority, pain_point, - LMPainPoints::PainPointDescription(pp_type), - &pending, word_res, &pain_points, blamer_bundle); - - UpdateSegSearchNodes(rating_cert_scale, pain_point.col, &pending, - word_res, &pain_points, best_choice_bundle, - blamer_bundle); - if (!best_choice_bundle->updated) ++num_futile_classifications; - - if (segsearch_debug_level > 0) { - tprintf("num_futile_classifications %d\n", num_futile_classifications); - } - - best_choice_bundle->updated = false; // reset updated - - // See if it's time to terminate SegSearch or time for starting a guided - // search for the true path to find the blame for the incorrect best_choice. - if (SegSearchDone(num_futile_classifications) && - blamer_bundle != nullptr && - blamer_bundle->GuidedSegsearchNeeded(word_res->best_choice)) { - InitBlamerForSegSearch(word_res, &pain_points, blamer_bundle, - &blamer_debug); - } - } // end while loop exploring alternative paths - if (blamer_bundle != nullptr) { - blamer_bundle->FinishSegSearch(word_res->best_choice, - wordrec_debug_blamer, &blamer_debug); - } - - if (segsearch_debug_level > 0) { - tprintf("Done with SegSearch (AcceptableChoiceFound: %d)\n", - language_model_->AcceptableChoiceFound()); - } -} - -// Setup and run just the initial segsearch on an established matrix, -// without doing any additional chopping or joining. -// (Internal factored version that can be used as part of the main SegSearch.) -void Wordrec::InitialSegSearch(WERD_RES* word_res, LMPainPoints* pain_points, - GenericVector* pending, - BestChoiceBundle* best_choice_bundle, - BlamerBundle* blamer_bundle) { - if (segsearch_debug_level > 0) { - tprintf("Starting SegSearch on ratings matrix%s:\n", - wordrec_enable_assoc ? " (with assoc)" : ""); - word_res->ratings->print(getDict().getUnicharset()); - } - - pain_points->GenerateInitial(word_res); - - // Compute scaling factor that will help us recover blob outline length - // from classifier rating and certainty for the blob. - float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale; - - language_model_->InitForWord(prev_word_best_choice_, - assume_fixed_pitch_char_segment, - segsearch_max_char_wh_ratio, rating_cert_scale); - - // Initialize blamer-related information: map character boxes recorded in - // blamer_bundle->norm_truth_word to the corresponding i,j indices in the - // ratings matrix. We expect this step to succeed, since when running the - // chopper we checked that the correct chops are present. - if (blamer_bundle != nullptr) { - blamer_bundle->SetupCorrectSegmentation(word_res->chopped_word, - wordrec_debug_blamer); - } - - // pending[col] tells whether there is update work to do to combine - // best_choice_bundle->beam[col - 1] with some BLOB_CHOICEs in matrix[col, *]. - // As the language model state is updated, pending entries are modified to - // minimize duplication of work. It is important that during the update the - // children are considered in the non-decreasing order of their column, since - // this guarantees that all the parents would be up to date before an update - // of a child is done. - pending->init_to_size(word_res->ratings->dimension(), SegSearchPending()); - - // Search the ratings matrix for the initial best path. - (*pending)[0].SetColumnClassified(); - UpdateSegSearchNodes(rating_cert_scale, 0, pending, word_res, - pain_points, best_choice_bundle, blamer_bundle); -} - -void Wordrec::UpdateSegSearchNodes( - float rating_cert_scale, - int starting_col, - GenericVector* pending, - WERD_RES *word_res, - LMPainPoints *pain_points, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle) { - MATRIX *ratings = word_res->ratings; - ASSERT_HOST(ratings->dimension() == pending->size()); - ASSERT_HOST(ratings->dimension() == best_choice_bundle->beam.size()); - for (int col = starting_col; col < ratings->dimension(); ++col) { - if (!(*pending)[col].WorkToDo()) continue; - int first_row = col; - int last_row = std::min(ratings->dimension() - 1, - col + ratings->bandwidth() - 1); - if ((*pending)[col].SingleRow() >= 0) { - first_row = last_row = (*pending)[col].SingleRow(); - } - if (segsearch_debug_level > 0) { - tprintf("\n\nUpdateSegSearchNodes: col=%d, rows=[%d,%d], alljust=%d\n", - col, first_row, last_row, - (*pending)[col].IsRowJustClassified(INT32_MAX)); - } - // Iterate over the pending list for this column. - for (int row = first_row; row <= last_row; ++row) { - // Update language model state of this child+parent pair. - BLOB_CHOICE_LIST *current_node = ratings->get(col, row); - LanguageModelState *parent_node = - col == 0 ? nullptr : best_choice_bundle->beam[col - 1]; - if (current_node != nullptr && - language_model_->UpdateState((*pending)[col].IsRowJustClassified(row), - col, row, current_node, parent_node, - pain_points, word_res, - best_choice_bundle, blamer_bundle) && - row + 1 < ratings->dimension()) { - // Since the language model state of this entry changed, process all - // the child column. - (*pending)[row + 1].RevisitWholeColumn(); - if (segsearch_debug_level > 0) { - tprintf("Added child col=%d to pending\n", row + 1); - } - } // end if UpdateState. - } // end for row. - } // end for col. - if (best_choice_bundle->best_vse != nullptr) { - ASSERT_HOST(word_res->StatesAllValid()); - if (best_choice_bundle->best_vse->updated) { - pain_points->GenerateFromPath(rating_cert_scale, - best_choice_bundle->best_vse, word_res); - if (!best_choice_bundle->fixpt.empty()) { - pain_points->GenerateFromAmbigs(best_choice_bundle->fixpt, - best_choice_bundle->best_vse, word_res); - } - } - } - // The segsearch is completed. Reset all updated flags on all VSEs and reset - // all pendings. - for (int col = 0; col < pending->size(); ++col) { - (*pending)[col].Clear(); - ViterbiStateEntry_IT - vse_it(&best_choice_bundle->beam[col]->viterbi_state_entries); - for (vse_it.mark_cycle_pt(); !vse_it.cycled_list(); vse_it.forward()) { - vse_it.data()->updated = false; - } - } -} - -void Wordrec::ProcessSegSearchPainPoint( - float pain_point_priority, - const MATRIX_COORD &pain_point, const char* pain_point_type, - GenericVector* pending, WERD_RES *word_res, - LMPainPoints *pain_points, BlamerBundle *blamer_bundle) { - if (segsearch_debug_level > 0) { - tprintf("Classifying pain point %s priority=%.4f, col=%d, row=%d\n", - pain_point_type, pain_point_priority, - pain_point.col, pain_point.row); - } - ASSERT_HOST(pain_points != nullptr); - MATRIX *ratings = word_res->ratings; - // Classify blob [pain_point.col pain_point.row] - if (!pain_point.Valid(*ratings)) { - ratings->IncreaseBandSize(pain_point.row + 1 - pain_point.col); - } - ASSERT_HOST(pain_point.Valid(*ratings)); - BLOB_CHOICE_LIST *classified = classify_piece(word_res->seam_array, - pain_point.col, pain_point.row, - pain_point_type, - word_res->chopped_word, - blamer_bundle); - BLOB_CHOICE_LIST *lst = ratings->get(pain_point.col, pain_point.row); - if (lst == nullptr) { - ratings->put(pain_point.col, pain_point.row, classified); - } else { - // We can not delete old BLOB_CHOICEs, since they might contain - // ViterbiStateEntries that are parents of other "active" entries. - // Thus if the matrix cell already contains classifications we add - // the new ones to the beginning of the list. - BLOB_CHOICE_IT it(lst); - it.add_list_before(classified); - delete classified; // safe to delete, since empty after add_list_before() - classified = nullptr; - } - - if (segsearch_debug_level > 0) { - print_ratings_list("Updated ratings matrix with a new entry:", - ratings->get(pain_point.col, pain_point.row), - getDict().getUnicharset()); - ratings->print(getDict().getUnicharset()); - } - - // Insert initial "pain points" to join the newly classified blob - // with its left and right neighbors. - if (classified != nullptr && !classified->empty()) { - if (pain_point.col > 0) { - pain_points->GeneratePainPoint( - pain_point.col - 1, pain_point.row, LM_PPTYPE_SHAPE, 0.0, - true, segsearch_max_char_wh_ratio, word_res); - } - if (pain_point.row + 1 < ratings->dimension()) { - pain_points->GeneratePainPoint( - pain_point.col, pain_point.row + 1, LM_PPTYPE_SHAPE, 0.0, - true, segsearch_max_char_wh_ratio, word_res); - } - } - (*pending)[pain_point.col].SetBlobClassified(pain_point.row); -} - -// Resets enough of the results so that the Viterbi search is re-run. -// Needed when the n-gram model is enabled, as the multi-length comparison -// implementation will re-value existing paths to worse values. -void Wordrec::ResetNGramSearch(WERD_RES* word_res, - BestChoiceBundle* best_choice_bundle, - GenericVector* pending) { - // TODO(rays) More refactoring required here. - // Delete existing viterbi states. - for (int col = 0; col < best_choice_bundle->beam.size(); ++col) { - best_choice_bundle->beam[col]->Clear(); - } - // Reset best_choice_bundle. - word_res->ClearWordChoices(); - best_choice_bundle->best_vse = nullptr; - // Clear out all existing pendings and add a new one for the first column. - (*pending)[0].SetColumnClassified(); - for (int i = 1; i < pending->size(); ++i) - (*pending)[i].Clear(); -} - -void Wordrec::InitBlamerForSegSearch(WERD_RES *word_res, - LMPainPoints *pain_points, - BlamerBundle *blamer_bundle, - STRING *blamer_debug) { - pain_points->Clear(); // Clear pain points heap. - TessResultCallback2* pp_cb = NewPermanentTessCallback( - pain_points, &LMPainPoints::GenerateForBlamer, - static_cast(segsearch_max_char_wh_ratio), word_res); - blamer_bundle->InitForSegSearch(word_res->best_choice, word_res->ratings, - getDict().WildcardID(), wordrec_debug_blamer, - blamer_debug, pp_cb); - delete pp_cb; -} - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/tface.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/tface.cpp deleted file mode 100644 index 0b1aa5e0..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/tface.cpp +++ /dev/null @@ -1,156 +0,0 @@ -/********************************************************************** - * File: tface.cpp (Formerly tface.c) - * Description: C side of the Tess/tessedit C/C++ interface. - * Author: Ray Smith - * Created: Mon Apr 27 11:57:06 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "callcpp.h" -#include "chop.h" -#include "chopper.h" -#include "globals.h" -#include "pageres.h" -#include "wordrec.h" -#include "featdefs.h" -#include "params_model.h" - -#include - -namespace tesseract { - -/** - * @name program_editup - * - * Initialize all the things in the program that need to be initialized. - * init_permute determines whether to initialize the permute functions - * and Dawg models. - */ -void Wordrec::program_editup(const char *textbase, - TessdataManager *init_classifier, - TessdataManager *init_dict) { - if (textbase != nullptr) imagefile = textbase; -#ifndef DISABLED_LEGACY_ENGINE - InitFeatureDefs(&feature_defs_); - InitAdaptiveClassifier(init_classifier); - if (init_dict) { - getDict().SetupForLoad(Dict::GlobalDawgCache()); - getDict().Load(lang, init_dict); - getDict().FinishLoad(); - } - pass2_ok_split = chop_ok_split; -#endif // ndef DISABLED_LEGACY_ENGINE -} - - -/** - * @name end_recog - * - * Cleanup and exit the recog program. - */ -int Wordrec::end_recog() { - program_editdown (0); - - return (0); -} - - -/** - * @name program_editdown - * - * This function holds any necessary post processing for the Wise Owl - * program. - */ -void Wordrec::program_editdown(int32_t elasped_time) { -#ifndef DISABLED_LEGACY_ENGINE - EndAdaptiveClassifier(); -#endif // ndef DISABLED_LEGACY_ENGINE - getDict().End(); -} - - -#ifndef DISABLED_LEGACY_ENGINE -/** - * @name set_pass1 - * - * Get ready to do some pass 1 stuff. - */ -void Wordrec::set_pass1() { - chop_ok_split.set_value(70.0); - language_model_->getParamsModel().SetPass(ParamsModel::PTRAIN_PASS1); - SettupPass1(); -} - - -/** - * @name set_pass2 - * - * Get ready to do some pass 2 stuff. - */ -void Wordrec::set_pass2() { - chop_ok_split.set_value(pass2_ok_split); - language_model_->getParamsModel().SetPass(ParamsModel::PTRAIN_PASS2); - SettupPass2(); -} - - -/** - * @name cc_recog - * - * Recognize a word. - */ -void Wordrec::cc_recog(WERD_RES *word) { - getDict().reset_hyphen_vars(word->word->flag(W_EOL)); - chop_word_main(word); - word->DebugWordChoices(getDict().stopper_debug_level >= 1, - getDict().word_to_debug.string()); - ASSERT_HOST(word->StatesAllValid()); -} -#endif // ndef DISABLED_LEGACY_ENGINE - - -/** - * @name dict_word() - * - * Test the dictionaries, returning NO_PERM (0) if not found, or one - * of the PermuterType values if found, according to the dictionary. - */ -int Wordrec::dict_word(const WERD_CHOICE &word) { - return getDict().valid_word(word); -} - - -#ifndef DISABLED_LEGACY_ENGINE -/** - * @name call_matcher - * - * Called from Tess with a blob in tess form. - * The blob may need rotating to the correct orientation for classification. - */ -BLOB_CHOICE_LIST *Wordrec::call_matcher(TBLOB *tessblob) { - // Rotate the blob for classification if necessary. - TBLOB* rotated_blob = tessblob->ClassifyNormalizeIfNeeded(); - if (rotated_blob == nullptr) { - rotated_blob = tessblob; - } - BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST(); // matcher result - AdaptiveClassifier(rotated_blob, ratings); - if (rotated_blob != tessblob) { - delete rotated_blob; - } - return ratings; -} -#endif // ndef DISABLED_LEGACY_ENGINE - -} // namespace tesseract diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/wordclass.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/wordclass.cpp deleted file mode 100644 index 136cad63..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/wordclass.cpp +++ /dev/null @@ -1,83 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: wordclass.cpp (Formerly wordclass.c) - * Description: Word classifier - * Author: Mark Seaman, OCR Technology - * - * (c) Copyright 1990, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -/*---------------------------------------------------------------------- - I N C L U D E S -----------------------------------------------------------------------*/ - -#include "blamer.h" // for blamer_bundle -#include "callcpp.h" // for window_wait, C_COL -#include "params.h" // for BoolParam -#include "render.h" // for display_blob, blob_window, wordrec_blob_pause -#include "wordrec.h" // for Wordrec - -class BLOB_CHOICE_LIST; - -struct TBLOB; - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -namespace tesseract { -/** - * @name classify_blob - * - * Classify the this blob if it is not already recorded in the match - * table. Attempt to recognize this blob as a character. The recognition - * rating for this blob will be stored as a part of the blob. This value - * will also be returned to the caller. - * @param blob Current blob - * @param string The string to display in ScrollView - * @param color The colour to use when displayed with ScrollView - */ -BLOB_CHOICE_LIST *Wordrec::classify_blob(TBLOB *blob, - const char *string, C_COL color, - BlamerBundle *blamer_bundle) { -#ifndef GRAPHICS_DISABLED - if (wordrec_display_all_blobs) - display_blob(blob, color); -#endif - // TODO(rays) collapse with call_matcher and move all to wordrec.cpp. - BLOB_CHOICE_LIST* choices = call_matcher(blob); - // If a blob with the same bounding box as one of the truth character - // bounding boxes is not classified as the corresponding truth character - // blame character classifier for incorrect answer. - if (blamer_bundle != nullptr) { - blamer_bundle->BlameClassifier(getDict().getUnicharset(), - blob->bounding_box(), - *choices, - wordrec_debug_blamer); - } - #ifndef GRAPHICS_DISABLED - if (classify_debug_level && string) - print_ratings_list(string, choices, getDict().getUnicharset()); - - if (wordrec_blob_pause) - window_wait(blob_window); -#endif - - return choices; -} - -} // namespace tesseract; diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/wordrec.cpp b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/wordrec.cpp deleted file mode 100644 index 158bc6ce..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/wordrec.cpp +++ /dev/null @@ -1,147 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: wordrec.cpp -// Description: wordrec class. -// Author: Samuel Charron -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "wordrec.h" - -#ifdef DISABLED_LEGACY_ENGINE - -#include "params.h" - - -namespace tesseract { -Wordrec::Wordrec() : - // control parameters - - BOOL_MEMBER(wordrec_debug_blamer, false, - "Print blamer debug messages", params()), - - BOOL_MEMBER(wordrec_run_blamer, false, - "Try to set the blame for errors", params()) { - prev_word_best_choice_ = nullptr; -} - -} // namespace tesseract - -#else // DISABLED_LEGACY_ENGINE not defined - -#include "language_model.h" -#include "params.h" - - -namespace tesseract { -Wordrec::Wordrec() : - // control parameters - BOOL_MEMBER(merge_fragments_in_matrix, TRUE, - "Merge the fragments in the ratings matrix and delete them" - " after merging", params()), - BOOL_MEMBER(wordrec_no_block, FALSE, "Don't output block information", - params()), - BOOL_MEMBER(wordrec_enable_assoc, TRUE, "Associator Enable", - params()), - BOOL_MEMBER(force_word_assoc, FALSE, - "force associator to run regardless of what enable_assoc is." - " This is used for CJK where component grouping is necessary.", - CCUtil::params()), - double_MEMBER(wordrec_worst_state, 1.0, "Worst segmentation state", - params()), - BOOL_MEMBER(fragments_guide_chopper, FALSE, - "Use information from fragments to guide chopping process", - params()), - INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped", - params()), - double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit", - params()), - INT_MEMBER(chop_debug, 0, "Chop debug", - params()), - BOOL_MEMBER(chop_enable, 1, "Chop enable", - params()), - BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep", - params()), - INT_MEMBER(chop_split_length, 10000, "Split Length", - params()), - INT_MEMBER(chop_same_distance, 2, "Same distance", - params()), - INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline", - params()), - INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile", - params()), - BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params()), - INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend", - params()), - INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area", - params()), - double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment", - params()), - double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment", - params()), - double_MEMBER(chop_center_knob, 0.15, "Split center adjustment", - params()), - INT_MEMBER(chop_centered_maxwidth, 90, "Width of (smaller) chopped blobs " - "above which we don't care that a chop is not near the center.", - params()), - double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment", - params()), - double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment", - params()), - double_MEMBER(chop_ok_split, 100.0, "OK split limit", - params()), - double_MEMBER(chop_good_split, 50.0, "Good split limit", - params()), - INT_MEMBER(chop_x_y_weight, 3, "X / Y length weight", - params()), - INT_MEMBER(segment_adjust_debug, 0, "Segmentation adjustment debug", - params()), - BOOL_MEMBER(assume_fixed_pitch_char_segment, FALSE, - "include fixed-pitch heuristics in char segmentation", - params()), - INT_MEMBER(wordrec_debug_level, 0, - "Debug level for wordrec", params()), - INT_MEMBER(wordrec_max_join_chunks, 4, - "Max number of broken pieces to associate", params()), - BOOL_MEMBER(wordrec_skip_no_truth_words, false, - "Only run OCR for words that had truth recorded in BlamerBundle", - params()), - BOOL_MEMBER(wordrec_debug_blamer, false, - "Print blamer debug messages", params()), - BOOL_MEMBER(wordrec_run_blamer, false, - "Try to set the blame for errors", params()), - INT_MEMBER(segsearch_debug_level, 0, - "SegSearch debug level", params()), - INT_MEMBER(segsearch_max_pain_points, 2000, - "Maximum number of pain points stored in the queue", - params()), - INT_MEMBER(segsearch_max_futile_classifications, 20, - "Maximum number of pain point classifications per chunk that" - " did not result in finding a better word choice.", - params()), - double_MEMBER(segsearch_max_char_wh_ratio, 2.0, - "Maximum character width-to-height ratio", params()), - BOOL_MEMBER(save_alt_choices, true, - "Save alternative paths found during chopping" - " and segmentation search", - params()), - pass2_ok_split(0.0f) { - prev_word_best_choice_ = nullptr; - language_model_.reset(new LanguageModel(&get_fontinfo_table(), - &(getDict()))); - fill_lattice_ = nullptr; -} - -} // namespace tesseract - -#endif // DISABLED_LEGACY_ENGINE diff --git a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/wordrec.h b/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/wordrec.h deleted file mode 100644 index a0aaa168..00000000 --- a/3rdparty/gpdf/3rdparty/tesseract/include/tesseract-ocr/wordrec/wordrec.h +++ /dev/null @@ -1,560 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: wordrec.h -// Description: wordrec class. -// Author: Samuel Charron -// -// (C) Copyright 2006, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_WORDREC_WORDREC_H_ -#define TESSERACT_WORDREC_WORDREC_H_ - -#ifdef DISABLED_LEGACY_ENGINE - -#include "config_auto.h" - -#include // for int16_t, int32_t -#include "callcpp.h" // for C_COL -#include "chop.h" // for PointHeap, MAX_NUM_POINTS -#include "classify.h" // for Classify -#include "elst.h" // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK -#include "findseam.h" // for SeamQueue, SeamPile -#include "genericvector.h" // for GenericVector -#include "oldlist.h" // for LIST -#include "params.h" // for INT_VAR_H, IntParam, BOOL_VAR_H, BoolP... -#include "points.h" // for ICOORD -#include "ratngs.h" // for BLOB_CHOICE_LIST (ptr only), BLOB_CHOI... -#include "seam.h" // for SEAM (ptr only), PRIORITY -#include "stopper.h" // for DANGERR - -class EDGEPT_CLIST; -class MATRIX; -class STRING; -class TBOX; -class UNICHARSET; -class WERD_RES; - -namespace tesseract { class LMPainPoints; } -namespace tesseract { class TessdataManager; } -namespace tesseract { struct BestChoiceBundle; } - -struct BlamerBundle; -struct EDGEPT; -struct MATRIX_COORD; -struct SPLIT; -struct TBLOB; -struct TESSLINE; -struct TWERD; - -namespace tesseract { - -/* ccmain/tstruct.cpp */ - -class Wordrec : public Classify { - public: - // config parameters - - BOOL_VAR_H(wordrec_debug_blamer, false, "Print blamer debug messages"); - - BOOL_VAR_H(wordrec_run_blamer, false, "Try to set the blame for errors"); - - // methods - Wordrec(); - virtual ~Wordrec() = default; - - // tface.cpp - void program_editup(const char *textbase, TessdataManager *init_classifier, - TessdataManager *init_dict); - void program_editdown(int32_t elasped_time); - int end_recog(); - int dict_word(const WERD_CHOICE &word); - - // Member variables - WERD_CHOICE *prev_word_best_choice_; -}; - -} // namespace tesseract - -#else // DISABLED_LEGACY_ENGINE not defined - -#include "associate.h" -#include "classify.h" -#include "dict.h" -#include "language_model.h" -#include "ratngs.h" -#include "matrix.h" -#include "seam.h" -#include "findseam.h" -#include "callcpp.h" - -#include - -class WERD_RES; - -namespace tesseract { - -// A class for storing which nodes are to be processed by the segmentation -// search. There is a single SegSearchPending for each column in the ratings -// matrix, and it indicates whether the segsearch should combine all -// BLOB_CHOICES in the column, or just the given row with the parents -// corresponding to *this SegSearchPending, and whether only updated parent -// ViterbiStateEntries should be combined, or all, with the BLOB_CHOICEs. -class SegSearchPending { - public: - SegSearchPending() - : classified_row_(-1), - revisit_whole_column_(false), - column_classified_(false) {} - - // Marks the whole column as just classified. Used to start a search on - // a newly initialized ratings matrix. - void SetColumnClassified() { - column_classified_ = true; - } - // Marks the matrix entry at the given row as just classified. - // Used after classifying a new matrix cell. - // Additional to, not overriding a previous RevisitWholeColumn. - void SetBlobClassified(int row) { - classified_row_ = row; - } - // Marks the whole column as needing work, but not just classified. - // Used when the parent vse list is updated. - // Additional to, not overriding a previous SetBlobClassified. - void RevisitWholeColumn() { - revisit_whole_column_ = true; - } - - // Clears *this to indicate no work to do. - void Clear() { - classified_row_ = -1; - revisit_whole_column_ = false; - column_classified_ = false; - } - - // Returns true if there are updates to do in the column that *this - // represents. - bool WorkToDo() const { - return revisit_whole_column_ || column_classified_ || classified_row_ >= 0; - } - // Returns true if the given row was just classified. - bool IsRowJustClassified(int row) const { - return row == classified_row_ || column_classified_; - } - // Returns the single row to process if there is only one, otherwise -1. - int SingleRow() const { - return revisit_whole_column_ || column_classified_ ? -1 : classified_row_; - } - - private: - // If non-negative, indicates the single row in the ratings matrix that has - // just been classified, and so should be combined with all the parents in the - // column that this SegSearchPending represents. - // Operates independently of revisit_whole_column. - int classified_row_; - // If revisit_whole_column is true, then all BLOB_CHOICEs in this column will - // be processed, but classified_row can indicate a row that is newly - // classified. Overridden if column_classified is true. - bool revisit_whole_column_; - // If column_classified is true, parent vses are processed with all rows - // regardless of whether they are just updated, overriding - // revisit_whole_column and classified_row. - bool column_classified_; -}; - - -/* ccmain/tstruct.cpp *********************************************************/ -class FRAGMENT:public ELIST_LINK -{ - public: - FRAGMENT() { //constructor - } - FRAGMENT(EDGEPT *head_pt, //start - EDGEPT *tail_pt); //end - - ICOORD head; //coords of start - ICOORD tail; //coords of end - EDGEPT *headpt; //start point - EDGEPT *tailpt; //end point -}; -ELISTIZEH(FRAGMENT) - - -class Wordrec : public Classify { - public: - // config parameters ******************************************************* - BOOL_VAR_H(merge_fragments_in_matrix, TRUE, - "Merge the fragments in the ratings matrix and delete them " - "after merging"); - BOOL_VAR_H(wordrec_no_block, FALSE, "Don't output block information"); - BOOL_VAR_H(wordrec_enable_assoc, TRUE, "Associator Enable"); - BOOL_VAR_H(force_word_assoc, FALSE, - "force associator to run regardless of what enable_assoc is." - "This is used for CJK where component grouping is necessary."); - double_VAR_H(wordrec_worst_state, 1, "Worst segmentation state"); - BOOL_VAR_H(fragments_guide_chopper, FALSE, - "Use information from fragments to guide chopping process"); - INT_VAR_H(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped"); - double_VAR_H(tessedit_certainty_threshold, -2.25, "Good blob limit"); - INT_VAR_H(chop_debug, 0, "Chop debug"); - BOOL_VAR_H(chop_enable, 1, "Chop enable"); - BOOL_VAR_H(chop_vertical_creep, 0, "Vertical creep"); - INT_VAR_H(chop_split_length, 10000, "Split Length"); - INT_VAR_H(chop_same_distance, 2, "Same distance"); - INT_VAR_H(chop_min_outline_points, 6, "Min Number of Points on Outline"); - INT_VAR_H(chop_seam_pile_size, 150, "Max number of seams in seam_pile"); - BOOL_VAR_H(chop_new_seam_pile, 1, "Use new seam_pile"); - INT_VAR_H(chop_inside_angle, -50, "Min Inside Angle Bend"); - INT_VAR_H(chop_min_outline_area, 2000, "Min Outline Area"); - double_VAR_H(chop_split_dist_knob, 0.5, "Split length adjustment"); - double_VAR_H(chop_overlap_knob, 0.9, "Split overlap adjustment"); - double_VAR_H(chop_center_knob, 0.15, "Split center adjustment"); - INT_VAR_H(chop_centered_maxwidth, 90, "Width of (smaller) chopped blobs " - "above which we don't care that a chop is not near the center."); - double_VAR_H(chop_sharpness_knob, 0.06, "Split sharpness adjustment"); - double_VAR_H(chop_width_change_knob, 5.0, "Width change adjustment"); - double_VAR_H(chop_ok_split, 100.0, "OK split limit"); - double_VAR_H(chop_good_split, 50.0, "Good split limit"); - INT_VAR_H(chop_x_y_weight, 3, "X / Y length weight"); - INT_VAR_H(segment_adjust_debug, 0, "Segmentation adjustment debug"); - BOOL_VAR_H(assume_fixed_pitch_char_segment, FALSE, - "include fixed-pitch heuristics in char segmentation"); - INT_VAR_H(wordrec_debug_level, 0, "Debug level for wordrec"); - INT_VAR_H(wordrec_max_join_chunks, 4, - "Max number of broken pieces to associate"); - BOOL_VAR_H(wordrec_skip_no_truth_words, false, - "Only run OCR for words that had truth recorded in BlamerBundle"); - BOOL_VAR_H(wordrec_debug_blamer, false, "Print blamer debug messages"); - BOOL_VAR_H(wordrec_run_blamer, false, "Try to set the blame for errors"); - INT_VAR_H(segsearch_debug_level, 0, "SegSearch debug level"); - INT_VAR_H(segsearch_max_pain_points, 2000, - "Maximum number of pain points stored in the queue"); - INT_VAR_H(segsearch_max_futile_classifications, 10, - "Maximum number of pain point classifications per word."); - double_VAR_H(segsearch_max_char_wh_ratio, 2.0, - "Maximum character width-to-height ratio"); - BOOL_VAR_H(save_alt_choices, true, - "Save alternative paths found during chopping " - "and segmentation search"); - - // methods from wordrec/*.cpp *********************************************** - Wordrec(); - virtual ~Wordrec() = default; - - // Fills word->alt_choices with alternative paths found during - // chopping/segmentation search that are kept in best_choices. - void SaveAltChoices(const LIST &best_choices, WERD_RES *word); - - // Fills character choice lattice in the given BlamerBundle - // using the given ratings matrix and best choice list. - void FillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, - const UNICHARSET &unicharset, BlamerBundle *blamer_bundle); - - // Calls fill_lattice_ member function - // (assumes that fill_lattice_ is not nullptr). - void CallFillLattice(const MATRIX &ratings, - const WERD_CHOICE_LIST &best_choices, - const UNICHARSET &unicharset, - BlamerBundle *blamer_bundle) { - (this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle); - } - - // tface.cpp - void program_editup(const char *textbase, TessdataManager *init_classifier, - TessdataManager *init_dict); - void cc_recog(WERD_RES *word); - void program_editdown(int32_t elasped_time); - void set_pass1(); - void set_pass2(); - int end_recog(); - BLOB_CHOICE_LIST *call_matcher(TBLOB* blob); - int dict_word(const WERD_CHOICE &word); - // wordclass.cpp - BLOB_CHOICE_LIST *classify_blob(TBLOB *blob, - const char *string, - C_COL color, - BlamerBundle *blamer_bundle); - - // segsearch.cpp - // SegSearch works on the lower diagonal matrix of BLOB_CHOICE_LISTs. - // Each entry in the matrix represents the classification choice - // for a chunk, i.e. an entry in row 2, column 1 represents the list - // of ratings for the chunks 1 and 2 classified as a single blob. - // The entries on the diagonal of the matrix are classifier choice lists - // for a single chunk from the maximal segmentation. - // - // The ratings matrix given to SegSearch represents the segmentation - // graph / trellis for the current word. The nodes in the graph are the - // individual BLOB_CHOICEs in each of the BLOB_CHOICE_LISTs in the ratings - // matrix. The children of each node (nodes connected by outgoing links) - // are the entries in the column that is equal to node's row+1. The parents - // (nodes connected by the incoming links) are the entries in the row that - // is equal to the node's column-1. Here is an example ratings matrix: - // - // 0 1 2 3 4 - // ------------------------- - // 0| c,( | - // 1| d l,1 | - // 2| o | - // 3| c,( | - // 4| g,y l,1 | - // ------------------------- - // - // In the example above node "o" has children (outgoing connection to nodes) - // "c","(","g","y" and parents (incoming connections from nodes) "l","1","d". - // - // The objective of the search is to find the least cost path, where the cost - // is determined by the language model components and the properties of the - // cut between the blobs on the path. SegSearch starts by populating the - // matrix with the all the entries that were classified by the chopper and - // finding the initial best path. Based on the classifier ratings, language - // model scores and the properties of each cut, a list of "pain points" is - // constructed - those are the points on the path where the choices do not - // look consistent with the neighboring choices, the cuts look particularly - // problematic, or the certainties of the blobs are low. The most troublesome - // "pain point" is picked from the list and the new entry in the ratings - // matrix corresponding to this "pain point" is filled in. Then the language - // model state is updated to reflect the new classification and the new - // "pain points" are added to the list and the next most troublesome - // "pain point" is determined. This continues until either the word choice - // composed from the best paths in the segmentation graph is "good enough" - // (e.g. above a certain certainty threshold, is an unambiguous dictionary - // word, etc) or there are no more "pain points" to explore. - // - // If associate_blobs is set to false no new classifications will be done - // to combine blobs. Segmentation search will run only one "iteration" - // on the classifications already recorded in chunks_record.ratings. - // - // Note: this function assumes that word_res, best_choice_bundle arguments - // are not nullptr. - void SegSearch(WERD_RES* word_res, - BestChoiceBundle* best_choice_bundle, - BlamerBundle* blamer_bundle); - - // Setup and run just the initial segsearch on an established matrix, - // without doing any additional chopping or joining. - // (Internal factored version that can be used as part of the main SegSearch.) - void InitialSegSearch(WERD_RES* word_res, LMPainPoints* pain_points, - GenericVector* pending, - BestChoiceBundle* best_choice_bundle, - BlamerBundle* blamer_bundle); - - // Runs SegSearch() function (above) without needing a best_choice_bundle - // or blamer_bundle. Used for testing. - void DoSegSearch(WERD_RES* word_res); - - // chop.cpp - PRIORITY point_priority(EDGEPT *point); - void add_point_to_list(PointHeap* point_heap, EDGEPT *point); - // Returns true if the edgept supplied as input is an inside angle. This - // is determined by the angular change of the vectors from point to point. - bool is_inside_angle(EDGEPT *pt); - int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3); - EDGEPT *pick_close_point(EDGEPT *critical_point, - EDGEPT *vertical_point, - int *best_dist); - void prioritize_points(TESSLINE *outline, PointHeap* points); - void new_min_point(EDGEPT *local_min, PointHeap* points); - void new_max_point(EDGEPT *local_max, PointHeap* points); - void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, - EDGEPT** best_point, - EDGEPT_CLIST *new_points); - - // chopper.cpp - SEAM *attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, - bool italic_blob, const GenericVector& seams); - SEAM *chop_numbered_blob(TWERD *word, int32_t blob_number, - bool italic_blob, const GenericVector& seams); - SEAM *chop_overlapping_blob(const GenericVector& boxes, - bool italic_blob, - WERD_RES *word_res, int *blob_number); - SEAM *improve_one_blob(const GenericVector &blob_choices, - DANGERR *fixpt, - bool split_next_to_fragment, - bool italic_blob, - WERD_RES *word, - int *blob_number); - SEAM *chop_one_blob(const GenericVector &boxes, - const GenericVector &blob_choices, - WERD_RES *word_res, - int *blob_number); - void chop_word_main(WERD_RES *word); - void improve_by_chopping(float rating_cert_scale, - WERD_RES *word, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle, - LMPainPoints *pain_points, - GenericVector* pending); - int select_blob_to_split(const GenericVector &blob_choices, - float rating_ceiling, - bool split_next_to_fragment); - int select_blob_to_split_from_fixpt(DANGERR *fixpt); - - // findseam.cpp - void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue* seams); - void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, - PRIORITY priority, SEAM **seam_result, TBLOB *blob, - SeamPile *seam_pile); - void combine_seam(const SeamPile& seam_pile, - const SEAM* seam, SeamQueue* seam_queue); - SEAM *pick_good_seam(TBLOB *blob); - void try_point_pairs (EDGEPT * points[MAX_NUM_POINTS], - int16_t num_points, - SeamQueue* seam_queue, - SeamPile* seam_pile, - SEAM ** seam, TBLOB * blob); - void try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS], - int16_t num_points, - EDGEPT_CLIST *new_points, - SeamQueue* seam_queue, - SeamPile* seam_pile, - SEAM ** seam, TBLOB * blob); - - // gradechop.cpp - PRIORITY grade_split_length(SPLIT *split); - PRIORITY grade_sharpness(SPLIT *split); - - // outlines.cpp - bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, - EDGEPT **near_pt); - - // pieces.cpp - virtual BLOB_CHOICE_LIST *classify_piece(const GenericVector& seams, - int16_t start, - int16_t end, - const char* description, - TWERD *word, - BlamerBundle *blamer_bundle); - // Try to merge fragments in the ratings matrix and put the result in - // the corresponding row and column - void merge_fragments(MATRIX *ratings, - int16_t num_blobs); - // Recursively go through the ratings matrix to find lists of fragments - // to be merged in the function merge_and_put_fragment_lists. - // current_frag is the position of the piece we are looking for. - // current_row is the row in the rating matrix we are currently at. - // start is the row we started initially, so that we can know where - // to append the results to the matrix. num_frag_parts is the total - // number of pieces we are looking for and num_blobs is the size of the - // ratings matrix. - void get_fragment_lists(int16_t current_frag, - int16_t current_row, - int16_t start, - int16_t num_frag_parts, - int16_t num_blobs, - MATRIX *ratings, - BLOB_CHOICE_LIST *choice_lists); - // Merge the fragment lists in choice_lists and append it to the - // ratings matrix - void merge_and_put_fragment_lists(int16_t row, - int16_t column, - int16_t num_frag_parts, - BLOB_CHOICE_LIST *choice_lists, - MATRIX *ratings); - // Filter the fragment list so that the filtered_choices only contain - // fragments that are in the correct position. choices is the list - // that we are going to filter. fragment_pos is the position in the - // fragment that we are looking for and num_frag_parts is the the - // total number of pieces. The result will be appended to - // filtered_choices. - void fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices, - int fragment_pos, - int num_frag_parts, - BLOB_CHOICE_LIST *filtered_choices); - - // Member variables. - - std::unique_ptr language_model_; - PRIORITY pass2_ok_split; - // Stores the best choice for the previous word in the paragraph. - // This variable is modified by PAGE_RES_IT when iterating over - // words to OCR on the page. - WERD_CHOICE *prev_word_best_choice_; - // Sums of blame reasons computed by the blamer. - GenericVector blame_reasons_; - // Function used to fill char choice lattices. - void (Wordrec::*fill_lattice_)(const MATRIX &ratings, - const WERD_CHOICE_LIST &best_choices, - const UNICHARSET &unicharset, - BlamerBundle *blamer_bundle); - - protected: - inline bool SegSearchDone(int num_futile_classifications) { - return (language_model_->AcceptableChoiceFound() || - num_futile_classifications >= - segsearch_max_futile_classifications); - } - - // Updates the language model state recorded for the child entries specified - // in pending[starting_col]. Enqueues the children of the updated entries - // into pending and proceeds to update (and remove from pending) all the - // remaining entries in pending[col] (col >= starting_col). Upon termination - // of this function all the pending[col] lists will be empty. - // - // The arguments: - // - // starting_col: index of the column in chunks_record->ratings from - // which the update should be started - // - // pending: list of entries listing chunks_record->ratings entries - // that should be updated - // - // pain_points: priority heap listing the pain points generated by - // the language model - // - // temp_pain_points: temporary storage for tentative pain points generated - // by the language model after a single call to LanguageModel::UpdateState() - // (the argument is passed in rather than created before each - // LanguageModel::UpdateState() call to avoid dynamic memory re-allocation) - // - // best_choice_bundle: a collection of variables that should be updated - // if a new best choice is found - // - void UpdateSegSearchNodes( - float rating_cert_scale, - int starting_col, - GenericVector* pending, - WERD_RES *word_res, - LMPainPoints *pain_points, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle); - - // Process the given pain point: classify the corresponding blob, enqueue - // new pain points to join the newly classified blob with its neighbors. - void ProcessSegSearchPainPoint(float pain_point_priority, - const MATRIX_COORD &pain_point, - const char* pain_point_type, - GenericVector* pending, - WERD_RES *word_res, - LMPainPoints *pain_points, - BlamerBundle *blamer_bundle); - // Resets enough of the results so that the Viterbi search is re-run. - // Needed when the n-gram model is enabled, as the multi-length comparison - // implementation will re-value existing paths to worse values. - void ResetNGramSearch(WERD_RES* word_res, - BestChoiceBundle* best_choice_bundle, - GenericVector* pending); - - // Add pain points for classifying blobs on the correct segmentation path - // (so that we can evaluate correct segmentation path and discover the reason - // for incorrect result). - void InitBlamerForSegSearch(WERD_RES *word_res, - LMPainPoints *pain_points, - BlamerBundle *blamer_bundle, - STRING *blamer_debug); -}; - -} // namespace tesseract - -#endif // DISABLED_LEGACY_ENGINE - -#endif // TESSERACT_WORDREC_WORDREC_H_ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/IlmImf.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/IlmImf.lib deleted file mode 100644 index 14799832..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/IlmImf.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/cvblob.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/cvblob.lib deleted file mode 100644 index 9b3fc9ec..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/cvblob.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/ittnotify.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/ittnotify.lib deleted file mode 100644 index c4559a55..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/ittnotify.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libjasper.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libjasper.lib deleted file mode 100644 index 97ac9044..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libjasper.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libjpeg-turbo.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libjpeg-turbo.lib deleted file mode 100644 index 7a28a843..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libjpeg-turbo.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libjpeg.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libjpeg.lib deleted file mode 100644 index bc56dcbb..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libjpeg.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libleptonica.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libleptonica.lib deleted file mode 100644 index f5a39711..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libleptonica.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libpng.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libpng.lib deleted file mode 100644 index ff0ac0a1..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libpng.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libprotobuf.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libprotobuf.lib deleted file mode 100644 index 65450617..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libprotobuf.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libtiff.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libtiff.lib deleted file mode 100644 index 3e3a2d68..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libtiff.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libwebp.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libwebp.lib deleted file mode 100644 index 3f4fbc3f..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/libwebp.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/quirc.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/quirc.lib deleted file mode 100644 index d3510bde..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/quirc.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_api.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_api.lib deleted file mode 100644 index f47ddf19..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_api.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_arch.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_arch.lib deleted file mode 100644 index 66793130..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_arch.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_ccmain.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_ccmain.lib deleted file mode 100644 index b66799ed..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_ccmain.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_ccstruct.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_ccstruct.lib deleted file mode 100644 index f74d788f..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_ccstruct.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_ccutil.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_ccutil.lib deleted file mode 100644 index 6a896e56..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_ccutil.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_classify.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_classify.lib deleted file mode 100644 index d1f78348..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_classify.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_cutil.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_cutil.lib deleted file mode 100644 index 97a6c7e3..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_cutil.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_dict.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_dict.lib deleted file mode 100644 index e4c5231d..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_dict.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_lstm.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_lstm.lib deleted file mode 100644 index 2c3b156d..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_lstm.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_opencl.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_opencl.lib deleted file mode 100644 index dda32439..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_opencl.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_textord.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_textord.lib deleted file mode 100644 index c149b6a9..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_textord.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_viewer.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_viewer.lib deleted file mode 100644 index ff0d48f4..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_viewer.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_wordrec.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_wordrec.lib deleted file mode 100644 index 9695048b..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/tesseract_wordrec.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/xtiff.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/xtiff.lib deleted file mode 100644 index 52e1dee4..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/xtiff.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/zlib.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/zlib.lib deleted file mode 100644 index a0f33a28..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/debug/zlib.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/IlmImf.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/IlmImf.lib deleted file mode 100644 index e7f61d3e..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/IlmImf.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/cvblob.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/cvblob.lib deleted file mode 100644 index 0d449a85..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/cvblob.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/ittnotify.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/ittnotify.lib deleted file mode 100644 index 651dc3f3..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/ittnotify.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libjasper.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libjasper.lib deleted file mode 100644 index 4a5ea304..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libjasper.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libjpeg-turbo.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libjpeg-turbo.lib deleted file mode 100644 index d068ce1c..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libjpeg-turbo.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libjpeg.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libjpeg.lib deleted file mode 100644 index 56e667b2..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libjpeg.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libleptonica.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libleptonica.lib deleted file mode 100644 index ce5f31ec..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libleptonica.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libpng.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libpng.lib deleted file mode 100644 index cce78dc8..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libpng.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libprotobuf.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libprotobuf.lib deleted file mode 100644 index fa41798b..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libprotobuf.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libtiff.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libtiff.lib deleted file mode 100644 index c67c8210..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libtiff.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libwebp.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libwebp.lib deleted file mode 100644 index c0034880..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/libwebp.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/quirc.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/quirc.lib deleted file mode 100644 index 693972a9..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/quirc.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_api.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_api.lib deleted file mode 100644 index b833f27d..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_api.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_arch.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_arch.lib deleted file mode 100644 index 85db5243..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_arch.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_ccmain.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_ccmain.lib deleted file mode 100644 index aedfb836..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_ccmain.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_ccstruct.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_ccstruct.lib deleted file mode 100644 index 8c67db8b..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_ccstruct.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_ccutil.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_ccutil.lib deleted file mode 100644 index 3cc423a7..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_ccutil.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_classify.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_classify.lib deleted file mode 100644 index 13fce29a..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_classify.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_cutil.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_cutil.lib deleted file mode 100644 index b04382b7..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_cutil.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_dict.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_dict.lib deleted file mode 100644 index 17f06212..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_dict.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_lstm.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_lstm.lib deleted file mode 100644 index 922e2f3f..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_lstm.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_opencl.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_opencl.lib deleted file mode 100644 index 1dce7493..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_opencl.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_textord.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_textord.lib deleted file mode 100644 index d23ecde9..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_textord.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_viewer.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_viewer.lib deleted file mode 100644 index 2aba22bc..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_viewer.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_wordrec.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_wordrec.lib deleted file mode 100644 index 0912ec95..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/tesseract_wordrec.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/xtiff.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/xtiff.lib deleted file mode 100644 index a5efe02a..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/xtiff.lib and /dev/null differ diff --git a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/zlib.lib b/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/zlib.lib deleted file mode 100644 index 73b11d49..00000000 Binary files a/3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/release/zlib.lib and /dev/null differ diff --git a/3rdparty/gpdf/hg_gpdf.cpp b/3rdparty/gpdf/hg_gpdf.cpp deleted file mode 100644 index bf0546c3..00000000 --- a/3rdparty/gpdf/hg_gpdf.cpp +++ /dev/null @@ -1,163 +0,0 @@ -#include "hg_gpdf.h" - -#if defined(WIN32) -#include "baseapi.h" -#include "allheaders.h" -#include "renderer.h" -#else -#include -#include -#include -#endif - -//#define USE_QT -#ifdef USE_QT -#include -#include -#endif - -Pix* createPix(const unsigned char * imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line, int dpi) -{ - int bpp = bytes_per_pixel * 8; - if (bpp == 0) bpp = 1; - Pix* pix = pixCreate(width, height, bpp == 24 ? 32 : bpp); - pixSetXRes(pix, dpi); - pixSetYRes(pix, dpi); - l_uint32* data = pixGetData(pix); - int wpl = pixGetWpl(pix); - switch (bpp) - { - case 1: - for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) - for (int x = 0; x < width; ++x) - if (imagedata[x / 8] & (0x80 >> (x % 8))) - CLEAR_DATA_BIT(data, x); - else - SET_DATA_BIT(data, x); - break; - - case 8: - // Greyscale just copies the bytes in the right order. - for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) - for (int x = 0; x < width; ++x) - SET_DATA_BYTE(data, x, imagedata[x]); - break; - - case 24: - // Put the colors in the correct places in the line buffer. - for (int y = 0; y < height; ++y, imagedata += bytes_per_line) - for (int x = 0; x < width; ++x, ++data) { - SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]); - SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]); - SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]); - } - break; - - case 32: - // Maintain byte order consistency across different endianness. - for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) - for (int x = 0; x < width; ++x) - data[x] = static_cast((imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) | - (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3]); - break; - - default: - break; - } - - pix->informat = bytes_per_pixel == 1 ? 1 : 2; - if (bytes_per_pixel == 1) - { - PIXCMAP* colormap = pixcmapCreate(8); - LEPT_FREE(colormap->array); - colormap->array = reinterpret_cast(LEPT_CALLOC(256, sizeof(RGBA_QUAD))); - colormap->n = 256; - colormap->nalloc = 256; - colormap->depth = 8; - l_uint8* ptr = reinterpret_cast(colormap->array); - for (int i = 0; i < 256; i++) - ptr[i * 4 + 0] = ptr[i * 4 + 1] = ptr[i * 4 + 2] = ptr[i * 4 + 3] = static_cast(i); - pixSetColormap(pix, colormap); - } - pixSetXRes(pix, 200); - pixSetYRes(pix, 200); - return pix; -} - -HG_OCR::HG_OCR() - : m_ptr(nullptr) -{ -} - -HG_OCR::~HG_OCR() -{ - if (m_ptr) - { - reinterpret_cast(m_ptr)->End(); - delete reinterpret_cast(m_ptr); - } -} - -int HG_OCR::init(const char* trainFile, RECOGNITION_MODE mode) -{ - if (m_ptr) delete reinterpret_cast(m_ptr); - tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI(); - api->SetPageSegMode(tesseract::PSM_AUTO_OSD); - m_ptr = reinterpret_cast(api); - return api->Init(trainFile, mode == RECOGNITION_OCR ? "chi_sim" : "osd"); -} - -bool HG_OCR::ocr(const char** inputFileNames, int numOfFiles, const char* outputFileName, SAVE_MODE flag) -{ - if (m_ptr == nullptr) return false; - if (inputFileNames == nullptr || numOfFiles == 0) return false; - - tesseract::PointerVector renderers; - tesseract::TessBaseAPI *api = reinterpret_cast(m_ptr); - if (flag & SAVE_PDF) - renderers.push_back(new tesseract::TessPDFRenderer(outputFileName, api->GetDatapath(), false)); - if (flag & SAVE_TXT) - renderers.push_back(new tesseract::TessTextRenderer(outputFileName)); - -#ifdef USE_QT - QTime timer; - timer.start(); -#endif - - int numOfRenderer = renderers.length(); - for (int i = 0; i < numOfRenderer; i++) - renderers[i]->BeginDocument(""); - - for (int i = 0; i < numOfFiles; i++) - { - Pix* img = pixRead(inputFileNames[i]); - api->SetInputName(outputFileName); - api->SetImage(img); - api->Recognize(nullptr); - for (int i = 0; i < numOfRenderer; i++) - renderers[i]->AddImage(api); - pixDestroy(&img); - } - - for (int i = 0; i < numOfRenderer; i++) - renderers[i]->EndDocument(); - - renderers.clear(); - -#ifdef USE_QT - qDebug() << timer.elapsed(); -#endif - - return true; -} - -int HG_OCR::orientation(const unsigned char *data, int width, int height, int channels, int dpi) -{ - Pix* pix = createPix(data, width, height, channels, (width * channels + 3) / 4 * 4, dpi); - tesseract::TessBaseAPI *api = reinterpret_cast(m_ptr); - api->SetImage(pix); - int orient_deg = 1; - api->DetectOrientationScript(&orient_deg, nullptr, nullptr, nullptr); - pixDestroy(&pix); - return orient_deg; -} diff --git a/3rdparty/gpdf/hg_gpdf.h b/3rdparty/gpdf/hg_gpdf.h deleted file mode 100644 index 23bba470..00000000 --- a/3rdparty/gpdf/hg_gpdf.h +++ /dev/null @@ -1,85 +0,0 @@ -/* -* ==================================================== - -* ܣͼƬOCRɶҳPDF -* ߣά -* ʱ䣺2020/3/5 -* ޸ʱ䣺2020/3/5 -* 汾ţv1.0 - -* ==================================================== -*/ - -#ifndef HG_GPDF_H -#define HG_GPDF_H - -#if defined (_WIN32) - #if !defined (HG_GPDF_API_BUILD) - #define HG_GPDF_API __declspec(dllexport) - #else - #define HG_GPDF_API __declspec(dllimport) - #endif -#else - #define HG_GPDF_API -#endif - -class HG_GPDF_API HG_OCR -{ -public: - - //ʶģʽ - enum RECOGNITION_MODE - { - RECOGNITION_OSD, //ĸ巽ʶģʽ - RECOGNITION_OCR //ַʶģʽ - }; - - //ַʶ𱣴ģʽ - enum SAVE_MODE - { - SAVE_PDF = 0x01, //PDFַʶ - SAVE_TXT = 0x02, //TXTַʶ - SAVE_PDF_TXT = 0x03 //ͬʱPDFTXTַʶ - }; -public: - - HG_OCR(); - - ~HG_OCR(); - - /* - * ܣʼPDF - * trainFile:[in] ѵļ·ļĩβ'/'β - * mode[in] ѡ enum RECOGNITION_MODEĬֵΪRECOGNITION_OCR - * ֵ0ΪɹΪ쳣쳣鿴ӡ - */ - int init(const char* trainFile, RECOGNITION_MODE mode = RECOGNITION_OCR); - - /* - * ܣַʶ𣬲ı - * inputFileNames:[in] شʶͼƬ·ͬʱͼƬ· - * numOfFiles[in] inputFileNames - * numOfFiles[in] inputFileNames - * outputFileName[in] ʶ·ļ֣ļ׺ - * flag[in] ַʶ𱣴ģʽ enum SAVE_MODE - * ֵtrueΪɹΪ쳣쳣鿴ӡ - */ - bool ocr(const char** inputFileNames, int numOfFiles, const char* outputFileName, SAVE_MODE flag = SAVE_PDF); - - /* - * ܣıʶ - * data:[in] ͼָ - * width[in] ͼ - * height[in] ͼ߶ - * channels[in] ͼͨ - * dpi[in] ͼƬDPI - * ֵ˳ʱм0,90,180,270ֽ - */ - int orientation(const unsigned char* data, int width, int height, int channels, int dpi); - -private: - void* m_ptr; -}; - - -#endif //HG_GPDF_H diff --git a/3rdparty/gpdf/hg_gpdf.pro b/3rdparty/gpdf/hg_gpdf.pro deleted file mode 100644 index a8ca0618..00000000 --- a/3rdparty/gpdf/hg_gpdf.pro +++ /dev/null @@ -1,82 +0,0 @@ -QT -= gui core - -TEMPLATE = lib -TARGET = hg_gpdf -CONFIG += c++11 - -#QT -= gui - -#CONFIG += c++11 console -#CONFIG -= app_bundle - -# The following define makes your compiler emit warnings if you use -# any Qt feature that has been marked deprecated (the exact warnings -# depend on your compiler). Please consult the documentation of the -# deprecated API in order to know how to port your code away from it. -DEFINES += QT_DEPRECATED_WARNINGS - -# You can also make your code fail to compile if it uses deprecated APIs. -# In order to do so, uncomment the following line. -# You can also select to disable deprecated APIs only up to a certain version of Qt. -#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0 - -win32 { - DEFINES += USE_STD_NAMESPACE _SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS _CRT_SECURE_NO_WARNINGS WIN32 - DEFINES -= UNICODE - contains(QT_ARCH, i386) { - CONFIG(release, debug|release) { - DESTDIR += ../bin/x86/release - LIBS += -L$$PWD/3rdparty/tesseract/staticlib/x86/release -lzlib -llibjpeg -llibpng -llibtiff -llibleptonica -ltesseract_api \ - -ltesseract_arch -ltesseract_ccmain -ltesseract_ccstruct -ltesseract_ccutil -ltesseract_classify \ - -ltesseract_cutil -ltesseract_dict -ltesseract_lstm -ltesseract_opencl -ltesseract_textord \ - -ltesseract_viewer -ltesseract_wordrec -lcvblob -lxtiff Ws2_32.lib - } - CONFIG(debug, debug|release) { - DESTDIR += ../bin/x86/debug - LIBS += -L$$PWD/3rdparty/tesseract/staticlib/x86/debug -lzlib -llibjpeg -llibpng -llibtiff -llibleptonica -ltesseract_api \ - -ltesseract_arch -ltesseract_ccmain -ltesseract_ccstruct -ltesseract_ccutil -ltesseract_classify \ - -ltesseract_cutil -ltesseract_dict -ltesseract_lstm -ltesseract_opencl -ltesseract_textord \ - -ltesseract_viewer -ltesseract_wordrec -lcvblob -lxtiff -lWs2_32 - } - } else { - CONFIG(release, debug|release) { - DESTDIR += ../bin/x64/release - LIBS += -L$$PWD/bin/x64/release/ -lleptonica \ - -L$$PWD/../bin/x64/release/ -lHG_OCR - } - CONFIG(debug, debug|release) { - DESTDIR += ../bin/x64/debug - LIBS += -L$$PWD/../bin/x64/debug/ -lleptonica \ - -L$$PWD/../bin/x64/debug/ -lHG_OCR - } - } - - -INCLUDEPATH += $$PWD/3rdparty/tesseract/include/leptonica \ - $$PWD/3rdparty/tesseract/include/tesseract-ocr/api \ - $$PWD/3rdparty/tesseract/include/tesseract-ocr/ccutil \ - $$PWD/3rdparty/tesseract/include/tesseract-ocr/ccstruct \ - $$PWD/3rdparty/tesseract/include/tesseract-ocr/ccmain - -DEPENDPATH += $$PWD/3rdparty/tesseract/include/leptonica \ - $$PWD/3rdparty/tesseract/include/tesseract-ocr -} - -unix { - DEFINES += USE_STD_NAMESPACE _SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS _CRT_SECURE_NO_WARNINGS UNIX - DEFINES -= UNICODE - - CONFIG(release, debug|release) { - DESTDIR += $$PWD/../bin/x86/release - } - CONFIG(debug, debug|release) { - DESTDIR += $$PWD/../bin/x86/debug - } - LIBS += -ltesseract -} - -SOURCES += \ - hg_gpdf.cpp - -HEADERS += \ - hg_gpdf.h diff --git a/3rdparty/gpdf/x86/staticlib/debug/hg_gpdf.lib b/3rdparty/gpdf/x86/staticlib/debug/hg_gpdf.lib deleted file mode 100644 index 0578c3f3..00000000 Binary files a/3rdparty/gpdf/x86/staticlib/debug/hg_gpdf.lib and /dev/null differ diff --git a/3rdparty/gpdf/x86/staticlib/release/hg_gpdf.lib b/3rdparty/gpdf/x86/staticlib/release/hg_gpdf.lib deleted file mode 100644 index 19fb9f5d..00000000 Binary files a/3rdparty/gpdf/x86/staticlib/release/hg_gpdf.lib and /dev/null differ diff --git a/3rdparty/hgOCR/include/hg_ocr4.h b/3rdparty/hgOCR/include/hg_ocr4.h deleted file mode 100644 index e979f336..00000000 --- a/3rdparty/hgOCR/include/hg_ocr4.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef HG_OCR4_H -#define HG_OCR4_H - -#if defined (_WIN32) - #if !defined (HG_OCR4_BUILD) - #define HG_OCR4_API __declspec(dllexport) - #else - #define HG_OCR4_API __declspec(dllimport) - #endif -#else - #define HG_OCR4_API -#endif - -class HG_OCR4_API HG_OCR4 -{ -public: - - enum PSM_TYPE - { - Orientation, - OCR - }; - -public: - HG_OCR4(); - - ~HG_OCR4(); - - int init (const char* filename, PSM_TYPE type); - - int getOrientation(unsigned char* imgData, int width, int height, int channels, int step); - - int getOCR(unsigned char* imageData, int width, int height, int channels, int dpi); - -private: - void init_orientation(const char *filename); - - void init_ocr(const char* filename); -private: - void* api; -}; - -#endif // HG_OCR4_H diff --git a/3rdparty/hgOCR/x86/bin/HG_OCR4.dll b/3rdparty/hgOCR/x86/bin/HG_OCR4.dll deleted file mode 100644 index 4ce44867..00000000 Binary files a/3rdparty/hgOCR/x86/bin/HG_OCR4.dll and /dev/null differ diff --git a/3rdparty/hgOCR/x86/lib/HG_OCR4.lib b/3rdparty/hgOCR/x86/lib/HG_OCR4.lib deleted file mode 100644 index 4899a91f..00000000 Binary files a/3rdparty/hgOCR/x86/lib/HG_OCR4.lib and /dev/null differ diff --git a/3rdparty/hgOCR/x86/staticlib/hg_gpdf.lib b/3rdparty/hgOCR/x86/staticlib/hg_gpdf.lib deleted file mode 100644 index bdd7c5de..00000000 Binary files a/3rdparty/hgOCR/x86/staticlib/hg_gpdf.lib and /dev/null differ diff --git a/3rdparty/turbojpeg/include/turbojpeg.h b/3rdparty/turbojpeg/include/turbojpeg.h deleted file mode 100644 index 9c0a3713..00000000 --- a/3rdparty/turbojpeg/include/turbojpeg.h +++ /dev/null @@ -1,1744 +0,0 @@ -/* - * Copyright (C)2009-2015, 2017 D. R. Commander. All Rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * - Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - Neither the name of the libjpeg-turbo Project nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __TURBOJPEG_H__ -#define __TURBOJPEG_H__ - -#if defined(_WIN32) && defined(DLLDEFINE) -#define DLLEXPORT __declspec(dllexport) -#else -#define DLLEXPORT -#endif -#define DLLCALL - - -/** - * @addtogroup TurboJPEG - * TurboJPEG API. This API provides an interface for generating, decoding, and - * transforming planar YUV and JPEG images in memory. - * - * @anchor YUVnotes - * YUV Image Format Notes - * ---------------------- - * Technically, the JPEG format uses the YCbCr colorspace (which is technically - * not a colorspace but a color transform), but per the convention of the - * digital video community, the TurboJPEG API uses "YUV" to refer to an image - * format consisting of Y, Cb, and Cr image planes. - * - * Each plane is simply a 2D array of bytes, each byte representing the value - * of one of the components (Y, Cb, or Cr) at a particular location in the - * image. The width and height of each plane are determined by the image - * width, height, and level of chrominance subsampling. The luminance plane - * width is the image width padded to the nearest multiple of the horizontal - * subsampling factor (2 in the case of 4:2:0 and 4:2:2, 4 in the case of - * 4:1:1, 1 in the case of 4:4:4 or grayscale.) Similarly, the luminance plane - * height is the image height padded to the nearest multiple of the vertical - * subsampling factor (2 in the case of 4:2:0 or 4:4:0, 1 in the case of 4:4:4 - * or grayscale.) This is irrespective of any additional padding that may be - * specified as an argument to the various YUV functions. The chrominance - * plane width is equal to the luminance plane width divided by the horizontal - * subsampling factor, and the chrominance plane height is equal to the - * luminance plane height divided by the vertical subsampling factor. - * - * For example, if the source image is 35 x 35 pixels and 4:2:2 subsampling is - * used, then the luminance plane would be 36 x 35 bytes, and each of the - * chrominance planes would be 18 x 35 bytes. If you specify a line padding of - * 4 bytes on top of this, then the luminance plane would be 36 x 35 bytes, and - * each of the chrominance planes would be 20 x 35 bytes. - * - * @{ - */ - - -/** - * The number of chrominance subsampling options - */ -#define TJ_NUMSAMP 6 - -/** - * Chrominance subsampling options. - * When pixels are converted from RGB to YCbCr (see #TJCS_YCbCr) or from CMYK - * to YCCK (see #TJCS_YCCK) as part of the JPEG compression process, some of - * the Cb and Cr (chrominance) components can be discarded or averaged together - * to produce a smaller image with little perceptible loss of image clarity - * (the human eye is more sensitive to small changes in brightness than to - * small changes in color.) This is called "chrominance subsampling". - */ -enum TJSAMP { - /** - * 4:4:4 chrominance subsampling (no chrominance subsampling). The JPEG or - * YUV image will contain one chrominance component for every pixel in the - * source image. - */ - TJSAMP_444 = 0, - /** - * 4:2:2 chrominance subsampling. The JPEG or YUV image will contain one - * chrominance component for every 2x1 block of pixels in the source image. - */ - TJSAMP_422, - /** - * 4:2:0 chrominance subsampling. The JPEG or YUV image will contain one - * chrominance component for every 2x2 block of pixels in the source image. - */ - TJSAMP_420, - /** - * Grayscale. The JPEG or YUV image will contain no chrominance components. - */ - TJSAMP_GRAY, - /** - * 4:4:0 chrominance subsampling. The JPEG or YUV image will contain one - * chrominance component for every 1x2 block of pixels in the source image. - * - * @note 4:4:0 subsampling is not fully accelerated in libjpeg-turbo. - */ - TJSAMP_440, - /** - * 4:1:1 chrominance subsampling. The JPEG or YUV image will contain one - * chrominance component for every 4x1 block of pixels in the source image. - * JPEG images compressed with 4:1:1 subsampling will be almost exactly the - * same size as those compressed with 4:2:0 subsampling, and in the - * aggregate, both subsampling methods produce approximately the same - * perceptual quality. However, 4:1:1 is better able to reproduce sharp - * horizontal features. - * - * @note 4:1:1 subsampling is not fully accelerated in libjpeg-turbo. - */ - TJSAMP_411 -}; - -/** - * MCU block width (in pixels) for a given level of chrominance subsampling. - * MCU block sizes: - * - 8x8 for no subsampling or grayscale - * - 16x8 for 4:2:2 - * - 8x16 for 4:4:0 - * - 16x16 for 4:2:0 - * - 32x8 for 4:1:1 - */ -static const int tjMCUWidth[TJ_NUMSAMP] = { 8, 16, 16, 8, 8, 32 }; - -/** - * MCU block height (in pixels) for a given level of chrominance subsampling. - * MCU block sizes: - * - 8x8 for no subsampling or grayscale - * - 16x8 for 4:2:2 - * - 8x16 for 4:4:0 - * - 16x16 for 4:2:0 - * - 32x8 for 4:1:1 - */ -static const int tjMCUHeight[TJ_NUMSAMP] = { 8, 8, 16, 8, 16, 8 }; - - -/** - * The number of pixel formats - */ -#define TJ_NUMPF 12 - -/** - * Pixel formats - */ -enum TJPF { - /** - * RGB pixel format. The red, green, and blue components in the image are - * stored in 3-byte pixels in the order R, G, B from lowest to highest byte - * address within each pixel. - */ - TJPF_RGB = 0, - /** - * BGR pixel format. The red, green, and blue components in the image are - * stored in 3-byte pixels in the order B, G, R from lowest to highest byte - * address within each pixel. - */ - TJPF_BGR, - /** - * RGBX pixel format. The red, green, and blue components in the image are - * stored in 4-byte pixels in the order R, G, B from lowest to highest byte - * address within each pixel. The X component is ignored when compressing - * and undefined when decompressing. - */ - TJPF_RGBX, - /** - * BGRX pixel format. The red, green, and blue components in the image are - * stored in 4-byte pixels in the order B, G, R from lowest to highest byte - * address within each pixel. The X component is ignored when compressing - * and undefined when decompressing. - */ - TJPF_BGRX, - /** - * XBGR pixel format. The red, green, and blue components in the image are - * stored in 4-byte pixels in the order R, G, B from highest to lowest byte - * address within each pixel. The X component is ignored when compressing - * and undefined when decompressing. - */ - TJPF_XBGR, - /** - * XRGB pixel format. The red, green, and blue components in the image are - * stored in 4-byte pixels in the order B, G, R from highest to lowest byte - * address within each pixel. The X component is ignored when compressing - * and undefined when decompressing. - */ - TJPF_XRGB, - /** - * Grayscale pixel format. Each 1-byte pixel represents a luminance - * (brightness) level from 0 to 255. - */ - TJPF_GRAY, - /** - * RGBA pixel format. This is the same as @ref TJPF_RGBX, except that when - * decompressing, the X component is guaranteed to be 0xFF, which can be - * interpreted as an opaque alpha channel. - */ - TJPF_RGBA, - /** - * BGRA pixel format. This is the same as @ref TJPF_BGRX, except that when - * decompressing, the X component is guaranteed to be 0xFF, which can be - * interpreted as an opaque alpha channel. - */ - TJPF_BGRA, - /** - * ABGR pixel format. This is the same as @ref TJPF_XBGR, except that when - * decompressing, the X component is guaranteed to be 0xFF, which can be - * interpreted as an opaque alpha channel. - */ - TJPF_ABGR, - /** - * ARGB pixel format. This is the same as @ref TJPF_XRGB, except that when - * decompressing, the X component is guaranteed to be 0xFF, which can be - * interpreted as an opaque alpha channel. - */ - TJPF_ARGB, - /** - * CMYK pixel format. Unlike RGB, which is an additive color model used - * primarily for display, CMYK (Cyan/Magenta/Yellow/Key) is a subtractive - * color model used primarily for printing. In the CMYK color model, the - * value of each color component typically corresponds to an amount of cyan, - * magenta, yellow, or black ink that is applied to a white background. In - * order to convert between CMYK and RGB, it is necessary to use a color - * management system (CMS.) A CMS will attempt to map colors within the - * printer's gamut to perceptually similar colors in the display's gamut and - * vice versa, but the mapping is typically not 1:1 or reversible, nor can it - * be defined with a simple formula. Thus, such a conversion is out of scope - * for a codec library. However, the TurboJPEG API allows for compressing - * CMYK pixels into a YCCK JPEG image (see #TJCS_YCCK) and decompressing YCCK - * JPEG images into CMYK pixels. - */ - TJPF_CMYK, - /** - * Unknown pixel format. Currently this is only used by #tjLoadImage(). - */ - TJPF_UNKNOWN = -1 -}; - -/** - * Red offset (in bytes) for a given pixel format. This specifies the number - * of bytes that the red component is offset from the start of the pixel. For - * instance, if a pixel of format TJ_BGRX is stored in char pixel[], - * then the red component will be pixel[tjRedOffset[TJ_BGRX]]. This - * will be -1 if the pixel format does not have a red component. - */ -static const int tjRedOffset[TJ_NUMPF] = { - 0, 2, 0, 2, 3, 1, -1, 0, 2, 3, 1, -1 -}; -/** - * Green offset (in bytes) for a given pixel format. This specifies the number - * of bytes that the green component is offset from the start of the pixel. - * For instance, if a pixel of format TJ_BGRX is stored in - * char pixel[], then the green component will be - * pixel[tjGreenOffset[TJ_BGRX]]. This will be -1 if the pixel format - * does not have a green component. - */ -static const int tjGreenOffset[TJ_NUMPF] = { - 1, 1, 1, 1, 2, 2, -1, 1, 1, 2, 2, -1 -}; -/** - * Blue offset (in bytes) for a given pixel format. This specifies the number - * of bytes that the Blue component is offset from the start of the pixel. For - * instance, if a pixel of format TJ_BGRX is stored in char pixel[], - * then the blue component will be pixel[tjBlueOffset[TJ_BGRX]]. This - * will be -1 if the pixel format does not have a blue component. - */ -static const int tjBlueOffset[TJ_NUMPF] = { - 2, 0, 2, 0, 1, 3, -1, 2, 0, 1, 3, -1 -}; -/** - * Alpha offset (in bytes) for a given pixel format. This specifies the number - * of bytes that the Alpha component is offset from the start of the pixel. - * For instance, if a pixel of format TJ_BGRA is stored in - * char pixel[], then the alpha component will be - * pixel[tjAlphaOffset[TJ_BGRA]]. This will be -1 if the pixel format - * does not have an alpha component. - */ -static const int tjAlphaOffset[TJ_NUMPF] = { - -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1 -}; -/** - * Pixel size (in bytes) for a given pixel format - */ -static const int tjPixelSize[TJ_NUMPF] = { - 3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4 -}; - - -/** - * The number of JPEG colorspaces - */ -#define TJ_NUMCS 5 - -/** - * JPEG colorspaces - */ -enum TJCS { - /** - * RGB colorspace. When compressing the JPEG image, the R, G, and B - * components in the source image are reordered into image planes, but no - * colorspace conversion or subsampling is performed. RGB JPEG images can be - * decompressed to any of the extended RGB pixel formats or grayscale, but - * they cannot be decompressed to YUV images. - */ - TJCS_RGB = 0, - /** - * YCbCr colorspace. YCbCr is not an absolute colorspace but rather a - * mathematical transformation of RGB designed solely for storage and - * transmission. YCbCr images must be converted to RGB before they can - * actually be displayed. In the YCbCr colorspace, the Y (luminance) - * component represents the black & white portion of the original image, and - * the Cb and Cr (chrominance) components represent the color portion of the - * original image. Originally, the analog equivalent of this transformation - * allowed the same signal to drive both black & white and color televisions, - * but JPEG images use YCbCr primarily because it allows the color data to be - * optionally subsampled for the purposes of reducing bandwidth or disk - * space. YCbCr is the most common JPEG colorspace, and YCbCr JPEG images - * can be compressed from and decompressed to any of the extended RGB pixel - * formats or grayscale, or they can be decompressed to YUV planar images. - */ - TJCS_YCbCr, - /** - * Grayscale colorspace. The JPEG image retains only the luminance data (Y - * component), and any color data from the source image is discarded. - * Grayscale JPEG images can be compressed from and decompressed to any of - * the extended RGB pixel formats or grayscale, or they can be decompressed - * to YUV planar images. - */ - TJCS_GRAY, - /** - * CMYK colorspace. When compressing the JPEG image, the C, M, Y, and K - * components in the source image are reordered into image planes, but no - * colorspace conversion or subsampling is performed. CMYK JPEG images can - * only be decompressed to CMYK pixels. - */ - TJCS_CMYK, - /** - * YCCK colorspace. YCCK (AKA "YCbCrK") is not an absolute colorspace but - * rather a mathematical transformation of CMYK designed solely for storage - * and transmission. It is to CMYK as YCbCr is to RGB. CMYK pixels can be - * reversibly transformed into YCCK, and as with YCbCr, the chrominance - * components in the YCCK pixels can be subsampled without incurring major - * perceptual loss. YCCK JPEG images can only be compressed from and - * decompressed to CMYK pixels. - */ - TJCS_YCCK -}; - - -/** - * The uncompressed source/destination image is stored in bottom-up (Windows, - * OpenGL) order, not top-down (X11) order. - */ -#define TJFLAG_BOTTOMUP 2 -/** - * When decompressing an image that was compressed using chrominance - * subsampling, use the fastest chrominance upsampling algorithm available in - * the underlying codec. The default is to use smooth upsampling, which - * creates a smooth transition between neighboring chrominance components in - * order to reduce upsampling artifacts in the decompressed image. - */ -#define TJFLAG_FASTUPSAMPLE 256 -/** - * Disable buffer (re)allocation. If passed to one of the JPEG compression or - * transform functions, this flag will cause those functions to generate an - * error if the JPEG image buffer is invalid or too small rather than - * attempting to allocate or reallocate that buffer. This reproduces the - * behavior of earlier versions of TurboJPEG. - */ -#define TJFLAG_NOREALLOC 1024 -/** - * Use the fastest DCT/IDCT algorithm available in the underlying codec. The - * default if this flag is not specified is implementation-specific. For - * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast - * algorithm by default when compressing, because this has been shown to have - * only a very slight effect on accuracy, but it uses the accurate algorithm - * when decompressing, because this has been shown to have a larger effect. - */ -#define TJFLAG_FASTDCT 2048 -/** - * Use the most accurate DCT/IDCT algorithm available in the underlying codec. - * The default if this flag is not specified is implementation-specific. For - * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast - * algorithm by default when compressing, because this has been shown to have - * only a very slight effect on accuracy, but it uses the accurate algorithm - * when decompressing, because this has been shown to have a larger effect. - */ -#define TJFLAG_ACCURATEDCT 4096 -/** - * Immediately discontinue the current compression/decompression/transform - * operation if the underlying codec throws a warning (non-fatal error). The - * default behavior is to allow the operation to complete unless a fatal error - * is encountered. - */ -#define TJFLAG_STOPONWARNING 8192 -/** - * Use progressive entropy coding in JPEG images generated by the compression - * and transform functions. Progressive entropy coding will generally improve - * compression relative to baseline entropy coding (the default), but it will - * reduce compression and decompression performance considerably. - */ -#define TJFLAG_PROGRESSIVE 16384 - - -/** - * The number of error codes - */ -#define TJ_NUMERR 2 - -/** - * Error codes - */ -enum TJERR { - /** - * The error was non-fatal and recoverable, but the image may still be - * corrupt. - */ - TJERR_WARNING = 0, - /** - * The error was fatal and non-recoverable. - */ - TJERR_FATAL -}; - - -/** - * The number of transform operations - */ -#define TJ_NUMXOP 8 - -/** - * Transform operations for #tjTransform() - */ -enum TJXOP { - /** - * Do not transform the position of the image pixels - */ - TJXOP_NONE = 0, - /** - * Flip (mirror) image horizontally. This transform is imperfect if there - * are any partial MCU blocks on the right edge (see #TJXOPT_PERFECT.) - */ - TJXOP_HFLIP, - /** - * Flip (mirror) image vertically. This transform is imperfect if there are - * any partial MCU blocks on the bottom edge (see #TJXOPT_PERFECT.) - */ - TJXOP_VFLIP, - /** - * Transpose image (flip/mirror along upper left to lower right axis.) This - * transform is always perfect. - */ - TJXOP_TRANSPOSE, - /** - * Transverse transpose image (flip/mirror along upper right to lower left - * axis.) This transform is imperfect if there are any partial MCU blocks in - * the image (see #TJXOPT_PERFECT.) - */ - TJXOP_TRANSVERSE, - /** - * Rotate image clockwise by 90 degrees. This transform is imperfect if - * there are any partial MCU blocks on the bottom edge (see - * #TJXOPT_PERFECT.) - */ - TJXOP_ROT90, - /** - * Rotate image 180 degrees. This transform is imperfect if there are any - * partial MCU blocks in the image (see #TJXOPT_PERFECT.) - */ - TJXOP_ROT180, - /** - * Rotate image counter-clockwise by 90 degrees. This transform is imperfect - * if there are any partial MCU blocks on the right edge (see - * #TJXOPT_PERFECT.) - */ - TJXOP_ROT270 -}; - - -/** - * This option will cause #tjTransform() to return an error if the transform is - * not perfect. Lossless transforms operate on MCU blocks, whose size depends - * on the level of chrominance subsampling used (see #tjMCUWidth - * and #tjMCUHeight.) If the image's width or height is not evenly divisible - * by the MCU block size, then there will be partial MCU blocks on the right - * and/or bottom edges. It is not possible to move these partial MCU blocks to - * the top or left of the image, so any transform that would require that is - * "imperfect." If this option is not specified, then any partial MCU blocks - * that cannot be transformed will be left in place, which will create - * odd-looking strips on the right or bottom edge of the image. - */ -#define TJXOPT_PERFECT 1 -/** - * This option will cause #tjTransform() to discard any partial MCU blocks that - * cannot be transformed. - */ -#define TJXOPT_TRIM 2 -/** - * This option will enable lossless cropping. See #tjTransform() for more - * information. - */ -#define TJXOPT_CROP 4 -/** - * This option will discard the color data in the input image and produce - * a grayscale output image. - */ -#define TJXOPT_GRAY 8 -/** - * This option will prevent #tjTransform() from outputting a JPEG image for - * this particular transform (this can be used in conjunction with a custom - * filter to capture the transformed DCT coefficients without transcoding - * them.) - */ -#define TJXOPT_NOOUTPUT 16 -/** - * This option will enable progressive entropy coding in the output image - * generated by this particular transform. Progressive entropy coding will - * generally improve compression relative to baseline entropy coding (the - * default), but it will reduce compression and decompression performance - * considerably. - */ -#define TJXOPT_PROGRESSIVE 32 -/** - * This option will prevent #tjTransform() from copying any extra markers - * (including EXIF and ICC profile data) from the source image to the output - * image. - */ -#define TJXOPT_COPYNONE 64 - - -/** - * Scaling factor - */ -typedef struct { - /** - * Numerator - */ - int num; - /** - * Denominator - */ - int denom; -} tjscalingfactor; - -/** - * Cropping region - */ -typedef struct { - /** - * The left boundary of the cropping region. This must be evenly divisible - * by the MCU block width (see #tjMCUWidth.) - */ - int x; - /** - * The upper boundary of the cropping region. This must be evenly divisible - * by the MCU block height (see #tjMCUHeight.) - */ - int y; - /** - * The width of the cropping region. Setting this to 0 is the equivalent of - * setting it to the width of the source JPEG image - x. - */ - int w; - /** - * The height of the cropping region. Setting this to 0 is the equivalent of - * setting it to the height of the source JPEG image - y. - */ - int h; -} tjregion; - -/** - * Lossless transform - */ -typedef struct tjtransform { - /** - * Cropping region - */ - tjregion r; - /** - * One of the @ref TJXOP "transform operations" - */ - int op; - /** - * The bitwise OR of one of more of the @ref TJXOPT_CROP "transform options" - */ - int options; - /** - * Arbitrary data that can be accessed within the body of the callback - * function - */ - void *data; - /** - * A callback function that can be used to modify the DCT coefficients - * after they are losslessly transformed but before they are transcoded to a - * new JPEG image. This allows for custom filters or other transformations - * to be applied in the frequency domain. - * - * @param coeffs pointer to an array of transformed DCT coefficients. (NOTE: - * this pointer is not guaranteed to be valid once the callback returns, so - * applications wishing to hand off the DCT coefficients to another function - * or library should make a copy of them within the body of the callback.) - * - * @param arrayRegion #tjregion structure containing the width and height of - * the array pointed to by coeffs as well as its offset relative to - * the component plane. TurboJPEG implementations may choose to split each - * component plane into multiple DCT coefficient arrays and call the callback - * function once for each array. - * - * @param planeRegion #tjregion structure containing the width and height of - * the component plane to which coeffs belongs - * - * @param componentID ID number of the component plane to which - * coeffs belongs (Y, Cb, and Cr have, respectively, ID's of 0, 1, - * and 2 in typical JPEG images.) - * - * @param transformID ID number of the transformed image to which - * coeffs belongs. This is the same as the index of the transform - * in the transforms array that was passed to #tjTransform(). - * - * @param transform a pointer to a #tjtransform structure that specifies the - * parameters and/or cropping region for this transform - * - * @return 0 if the callback was successful, or -1 if an error occurred. - */ - int (*customFilter) (short *coeffs, tjregion arrayRegion, - tjregion planeRegion, int componentIndex, - int transformIndex, struct tjtransform *transform); -} tjtransform; - -/** - * TurboJPEG instance handle - */ -typedef void *tjhandle; - - -/** - * Pad the given width to the nearest 32-bit boundary - */ -#define TJPAD(width) (((width) + 3) & (~3)) - -/** - * Compute the scaled value of dimension using the given scaling - * factor. This macro performs the integer equivalent of ceil(dimension * - * scalingFactor). - */ -#define TJSCALED(dimension, scalingFactor) \ - ((dimension * scalingFactor.num + scalingFactor.denom - 1) / \ - scalingFactor.denom) - - -#ifdef __cplusplus -extern "C" { -#endif - - -/** - * Create a TurboJPEG compressor instance. - * - * @return a handle to the newly-created instance, or NULL if an error - * occurred (see #tjGetErrorStr2().) - */ -DLLEXPORT tjhandle tjInitCompress(void); - - -/** - * Compress an RGB, grayscale, or CMYK image into a JPEG image. - * - * @param handle a handle to a TurboJPEG compressor or transformer instance - * - * @param srcBuf pointer to an image buffer containing RGB, grayscale, or - * CMYK pixels to be compressed - * - * @param width width (in pixels) of the source image - * - * @param pitch bytes per line in the source image. Normally, this should be - * width * #tjPixelSize[pixelFormat] if the image is unpadded, or - * #TJPAD(width * #tjPixelSize[pixelFormat]) if each line of the image - * is padded to the nearest 32-bit boundary, as is the case for Windows - * bitmaps. You can also be clever and use this parameter to skip lines, etc. - * Setting this parameter to 0 is the equivalent of setting it to - * width * #tjPixelSize[pixelFormat]. - * - * @param height height (in pixels) of the source image - * - * @param pixelFormat pixel format of the source image (see @ref TJPF - * "Pixel formats".) - * - * @param jpegBuf address of a pointer to an image buffer that will receive the - * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer - * to accommodate the size of the JPEG image. Thus, you can choose to: - * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and - * let TurboJPEG grow the buffer as needed, - * -# set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer - * for you, or - * -# pre-allocate the buffer to a "worst case" size determined by calling - * #tjBufSize(). This should ensure that the buffer never has to be - * re-allocated (setting #TJFLAG_NOREALLOC guarantees that it won't be.) - * . - * If you choose option 1, *jpegSize should be set to the size of your - * pre-allocated buffer. In any case, unless you have set #TJFLAG_NOREALLOC, - * you should always check *jpegBuf upon return from this function, as - * it may have changed. - * - * @param jpegSize pointer to an unsigned long variable that holds the size of - * the JPEG image buffer. If *jpegBuf points to a pre-allocated - * buffer, then *jpegSize should be set to the size of the buffer. - * Upon return, *jpegSize will contain the size of the JPEG image (in - * bytes.) If *jpegBuf points to a JPEG image buffer that is being - * reused from a previous call to one of the JPEG compression functions, then - * *jpegSize is ignored. - * - * @param jpegSubsamp the level of chrominance subsampling to be used when - * generating the JPEG image (see @ref TJSAMP - * "Chrominance subsampling options".) - * - * @param jpegQual the image quality of the generated JPEG image (1 = worst, - * 100 = best) - * - * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT - * "flags" - * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() - * and #tjGetErrorCode().) -*/ -DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf, - int width, int pitch, int height, int pixelFormat, - unsigned char **jpegBuf, unsigned long *jpegSize, - int jpegSubsamp, int jpegQual, int flags); - - -/** - * Compress a YUV planar image into a JPEG image. - * - * @param handle a handle to a TurboJPEG compressor or transformer instance - * - * @param srcBuf pointer to an image buffer containing a YUV planar image to be - * compressed. The size of this buffer should match the value returned by - * #tjBufSizeYUV2() for the given image width, height, padding, and level of - * chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be - * stored sequentially in the source buffer (refer to @ref YUVnotes - * "YUV Image Format Notes".) - * - * @param width width (in pixels) of the source image. If the width is not an - * even multiple of the MCU block width (see #tjMCUWidth), then an intermediate - * buffer copy will be performed within TurboJPEG. - * - * @param pad the line padding used in the source image. For instance, if each - * line in each plane of the YUV image is padded to the nearest multiple of 4 - * bytes, then pad should be set to 4. - * - * @param height height (in pixels) of the source image. If the height is not - * an even multiple of the MCU block height (see #tjMCUHeight), then an - * intermediate buffer copy will be performed within TurboJPEG. - * - * @param subsamp the level of chrominance subsampling used in the source - * image (see @ref TJSAMP "Chrominance subsampling options".) - * - * @param jpegBuf address of a pointer to an image buffer that will receive the - * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to - * accommodate the size of the JPEG image. Thus, you can choose to: - * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and - * let TurboJPEG grow the buffer as needed, - * -# set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer - * for you, or - * -# pre-allocate the buffer to a "worst case" size determined by calling - * #tjBufSize(). This should ensure that the buffer never has to be - * re-allocated (setting #TJFLAG_NOREALLOC guarantees that it won't be.) - * . - * If you choose option 1, *jpegSize should be set to the size of your - * pre-allocated buffer. In any case, unless you have set #TJFLAG_NOREALLOC, - * you should always check *jpegBuf upon return from this function, as - * it may have changed. - * - * @param jpegSize pointer to an unsigned long variable that holds the size of - * the JPEG image buffer. If *jpegBuf points to a pre-allocated - * buffer, then *jpegSize should be set to the size of the buffer. - * Upon return, *jpegSize will contain the size of the JPEG image (in - * bytes.) If *jpegBuf points to a JPEG image buffer that is being - * reused from a previous call to one of the JPEG compression functions, then - * *jpegSize is ignored. - * - * @param jpegQual the image quality of the generated JPEG image (1 = worst, - * 100 = best) - * - * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT - * "flags" - * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() - * and #tjGetErrorCode().) -*/ -DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf, - int width, int pad, int height, int subsamp, - unsigned char **jpegBuf, - unsigned long *jpegSize, int jpegQual, - int flags); - - -/** - * Compress a set of Y, U (Cb), and V (Cr) image planes into a JPEG image. - * - * @param handle a handle to a TurboJPEG compressor or transformer instance - * - * @param srcPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes - * (or just a Y plane, if compressing a grayscale image) that contain a YUV - * image to be compressed. These planes can be contiguous or non-contiguous in - * memory. The size of each plane should match the value returned by - * #tjPlaneSizeYUV() for the given image width, height, strides, and level of - * chrominance subsampling. Refer to @ref YUVnotes "YUV Image Format Notes" - * for more details. - * - * @param width width (in pixels) of the source image. If the width is not an - * even multiple of the MCU block width (see #tjMCUWidth), then an intermediate - * buffer copy will be performed within TurboJPEG. - * - * @param strides an array of integers, each specifying the number of bytes per - * line in the corresponding plane of the YUV source image. Setting the stride - * for any plane to 0 is the same as setting it to the plane width (see - * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then - * the strides for all planes will be set to their respective plane widths. - * You can adjust the strides in order to specify an arbitrary amount of line - * padding in each plane or to create a JPEG image from a subregion of a larger - * YUV planar image. - * - * @param height height (in pixels) of the source image. If the height is not - * an even multiple of the MCU block height (see #tjMCUHeight), then an - * intermediate buffer copy will be performed within TurboJPEG. - * - * @param subsamp the level of chrominance subsampling used in the source - * image (see @ref TJSAMP "Chrominance subsampling options".) - * - * @param jpegBuf address of a pointer to an image buffer that will receive the - * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to - * accommodate the size of the JPEG image. Thus, you can choose to: - * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and - * let TurboJPEG grow the buffer as needed, - * -# set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer - * for you, or - * -# pre-allocate the buffer to a "worst case" size determined by calling - * #tjBufSize(). This should ensure that the buffer never has to be - * re-allocated (setting #TJFLAG_NOREALLOC guarantees that it won't be.) - * . - * If you choose option 1, *jpegSize should be set to the size of your - * pre-allocated buffer. In any case, unless you have set #TJFLAG_NOREALLOC, - * you should always check *jpegBuf upon return from this function, as - * it may have changed. - * - * @param jpegSize pointer to an unsigned long variable that holds the size of - * the JPEG image buffer. If *jpegBuf points to a pre-allocated - * buffer, then *jpegSize should be set to the size of the buffer. - * Upon return, *jpegSize will contain the size of the JPEG image (in - * bytes.) If *jpegBuf points to a JPEG image buffer that is being - * reused from a previous call to one of the JPEG compression functions, then - * *jpegSize is ignored. - * - * @param jpegQual the image quality of the generated JPEG image (1 = worst, - * 100 = best) - * - * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT - * "flags" - * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() - * and #tjGetErrorCode().) -*/ -DLLEXPORT int tjCompressFromYUVPlanes(tjhandle handle, - const unsigned char **srcPlanes, - int width, const int *strides, - int height, int subsamp, - unsigned char **jpegBuf, - unsigned long *jpegSize, int jpegQual, - int flags); - - -/** - * The maximum size of the buffer (in bytes) required to hold a JPEG image with - * the given parameters. The number of bytes returned by this function is - * larger than the size of the uncompressed source image. The reason for this - * is that the JPEG format uses 16-bit coefficients, and it is thus possible - * for a very high-quality JPEG image with very high-frequency content to - * expand rather than compress when converted to the JPEG format. Such images - * represent a very rare corner case, but since there is no way to predict the - * size of a JPEG image prior to compression, the corner case has to be - * handled. - * - * @param width width (in pixels) of the image - * - * @param height height (in pixels) of the image - * - * @param jpegSubsamp the level of chrominance subsampling to be used when - * generating the JPEG image (see @ref TJSAMP - * "Chrominance subsampling options".) - * - * @return the maximum size of the buffer (in bytes) required to hold the - * image, or -1 if the arguments are out of bounds. - */ -DLLEXPORT unsigned long tjBufSize(int width, int height, int jpegSubsamp); - - -/** - * The size of the buffer (in bytes) required to hold a YUV planar image with - * the given parameters. - * - * @param width width (in pixels) of the image - * - * @param pad the width of each line in each plane of the image is padded to - * the nearest multiple of this number of bytes (must be a power of 2.) - * - * @param height height (in pixels) of the image - * - * @param subsamp level of chrominance subsampling in the image (see - * @ref TJSAMP "Chrominance subsampling options".) - * - * @return the size of the buffer (in bytes) required to hold the image, or - * -1 if the arguments are out of bounds. - */ -DLLEXPORT unsigned long tjBufSizeYUV2(int width, int pad, int height, - int subsamp); - - -/** - * The size of the buffer (in bytes) required to hold a YUV image plane with - * the given parameters. - * - * @param componentID ID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr) - * - * @param width width (in pixels) of the YUV image. NOTE: this is the width of - * the whole image, not the plane width. - * - * @param stride bytes per line in the image plane. Setting this to 0 is the - * equivalent of setting it to the plane width. - * - * @param height height (in pixels) of the YUV image. NOTE: this is the height - * of the whole image, not the plane height. - * - * @param subsamp level of chrominance subsampling in the image (see - * @ref TJSAMP "Chrominance subsampling options".) - * - * @return the size of the buffer (in bytes) required to hold the YUV image - * plane, or -1 if the arguments are out of bounds. - */ -DLLEXPORT unsigned long tjPlaneSizeYUV(int componentID, int width, int stride, - int height, int subsamp); - - -/** - * The plane width of a YUV image plane with the given parameters. Refer to - * @ref YUVnotes "YUV Image Format Notes" for a description of plane width. - * - * @param componentID ID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr) - * - * @param width width (in pixels) of the YUV image - * - * @param subsamp level of chrominance subsampling in the image (see - * @ref TJSAMP "Chrominance subsampling options".) - * - * @return the plane width of a YUV image plane with the given parameters, or - * -1 if the arguments are out of bounds. - */ -DLLEXPORT int tjPlaneWidth(int componentID, int width, int subsamp); - - -/** - * The plane height of a YUV image plane with the given parameters. Refer to - * @ref YUVnotes "YUV Image Format Notes" for a description of plane height. - * - * @param componentID ID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr) - * - * @param height height (in pixels) of the YUV image - * - * @param subsamp level of chrominance subsampling in the image (see - * @ref TJSAMP "Chrominance subsampling options".) - * - * @return the plane height of a YUV image plane with the given parameters, or - * -1 if the arguments are out of bounds. - */ -DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp); - - -/** - * Encode an RGB or grayscale image into a YUV planar image. This function - * uses the accelerated color conversion routines in the underlying - * codec but does not execute any of the other steps in the JPEG compression - * process. - * - * @param handle a handle to a TurboJPEG compressor or transformer instance - * - * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels - * to be encoded - * - * @param width width (in pixels) of the source image - * - * @param pitch bytes per line in the source image. Normally, this should be - * width * #tjPixelSize[pixelFormat] if the image is unpadded, or - * #TJPAD(width * #tjPixelSize[pixelFormat]) if each line of the image - * is padded to the nearest 32-bit boundary, as is the case for Windows - * bitmaps. You can also be clever and use this parameter to skip lines, etc. - * Setting this parameter to 0 is the equivalent of setting it to - * width * #tjPixelSize[pixelFormat]. - * - * @param height height (in pixels) of the source image - * - * @param pixelFormat pixel format of the source image (see @ref TJPF - * "Pixel formats".) - * - * @param dstBuf pointer to an image buffer that will receive the YUV image. - * Use #tjBufSizeYUV2() to determine the appropriate size for this buffer based - * on the image width, height, padding, and level of chrominance subsampling. - * The Y, U (Cb), and V (Cr) image planes will be stored sequentially in the - * buffer (refer to @ref YUVnotes "YUV Image Format Notes".) - * - * @param pad the width of each line in each plane of the YUV image will be - * padded to the nearest multiple of this number of bytes (must be a power of - * 2.) To generate images suitable for X Video, pad should be set to - * 4. - * - * @param subsamp the level of chrominance subsampling to be used when - * generating the YUV image (see @ref TJSAMP - * "Chrominance subsampling options".) To generate images suitable for X - * Video, subsamp should be set to @ref TJSAMP_420. This produces an - * image compatible with the I420 (AKA "YUV420P") format. - * - * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT - * "flags" - * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() - * and #tjGetErrorCode().) -*/ -DLLEXPORT int tjEncodeYUV3(tjhandle handle, const unsigned char *srcBuf, - int width, int pitch, int height, int pixelFormat, - unsigned char *dstBuf, int pad, int subsamp, - int flags); - - -/** - * Encode an RGB or grayscale image into separate Y, U (Cb), and V (Cr) image - * planes. This function uses the accelerated color conversion routines in the - * underlying codec but does not execute any of the other steps in the JPEG - * compression process. - * - * @param handle a handle to a TurboJPEG compressor or transformer instance - * - * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels - * to be encoded - * - * @param width width (in pixels) of the source image - * - * @param pitch bytes per line in the source image. Normally, this should be - * width * #tjPixelSize[pixelFormat] if the image is unpadded, or - * #TJPAD(width * #tjPixelSize[pixelFormat]) if each line of the image - * is padded to the nearest 32-bit boundary, as is the case for Windows - * bitmaps. You can also be clever and use this parameter to skip lines, etc. - * Setting this parameter to 0 is the equivalent of setting it to - * width * #tjPixelSize[pixelFormat]. - * - * @param height height (in pixels) of the source image - * - * @param pixelFormat pixel format of the source image (see @ref TJPF - * "Pixel formats".) - * - * @param dstPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes - * (or just a Y plane, if generating a grayscale image) that will receive the - * encoded image. These planes can be contiguous or non-contiguous in memory. - * Use #tjPlaneSizeYUV() to determine the appropriate size for each plane based - * on the image width, height, strides, and level of chrominance subsampling. - * Refer to @ref YUVnotes "YUV Image Format Notes" for more details. - * - * @param strides an array of integers, each specifying the number of bytes per - * line in the corresponding plane of the output image. Setting the stride for - * any plane to 0 is the same as setting it to the plane width (see - * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then - * the strides for all planes will be set to their respective plane widths. - * You can adjust the strides in order to add an arbitrary amount of line - * padding to each plane or to encode an RGB or grayscale image into a - * subregion of a larger YUV planar image. - * - * @param subsamp the level of chrominance subsampling to be used when - * generating the YUV image (see @ref TJSAMP - * "Chrominance subsampling options".) To generate images suitable for X - * Video, subsamp should be set to @ref TJSAMP_420. This produces an - * image compatible with the I420 (AKA "YUV420P") format. - * - * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT - * "flags" - * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() - * and #tjGetErrorCode().) -*/ -DLLEXPORT int tjEncodeYUVPlanes(tjhandle handle, const unsigned char *srcBuf, - int width, int pitch, int height, - int pixelFormat, unsigned char **dstPlanes, - int *strides, int subsamp, int flags); - - -/** - * Create a TurboJPEG decompressor instance. - * - * @return a handle to the newly-created instance, or NULL if an error - * occurred (see #tjGetErrorStr2().) -*/ -DLLEXPORT tjhandle tjInitDecompress(void); - - -/** - * Retrieve information about a JPEG image without decompressing it. - * - * @param handle a handle to a TurboJPEG decompressor or transformer instance - * - * @param jpegBuf pointer to a buffer containing a JPEG image - * - * @param jpegSize size of the JPEG image (in bytes) - * - * @param width pointer to an integer variable that will receive the width (in - * pixels) of the JPEG image - * - * @param height pointer to an integer variable that will receive the height - * (in pixels) of the JPEG image - * - * @param jpegSubsamp pointer to an integer variable that will receive the - * level of chrominance subsampling used when the JPEG image was compressed - * (see @ref TJSAMP "Chrominance subsampling options".) - * - * @param jpegColorspace pointer to an integer variable that will receive one - * of the JPEG colorspace constants, indicating the colorspace of the JPEG - * image (see @ref TJCS "JPEG colorspaces".) - * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() - * and #tjGetErrorCode().) -*/ -DLLEXPORT int tjDecompressHeader3(tjhandle handle, - const unsigned char *jpegBuf, - unsigned long jpegSize, int *width, - int *height, int *jpegSubsamp, - int *jpegColorspace); - - -/** - * Returns a list of fractional scaling factors that the JPEG decompressor in - * this implementation of TurboJPEG supports. - * - * @param numscalingfactors pointer to an integer variable that will receive - * the number of elements in the list - * - * @return a pointer to a list of fractional scaling factors, or NULL if an - * error is encountered (see #tjGetErrorStr2().) -*/ -DLLEXPORT tjscalingfactor *tjGetScalingFactors(int *numscalingfactors); - - -/** - * Decompress a JPEG image to an RGB, grayscale, or CMYK image. - * - * @param handle a handle to a TurboJPEG decompressor or transformer instance - * - * @param jpegBuf pointer to a buffer containing the JPEG image to decompress - * - * @param jpegSize size of the JPEG image (in bytes) - * - * @param dstBuf pointer to an image buffer that will receive the decompressed - * image. This buffer should normally be pitch * scaledHeight bytes - * in size, where scaledHeight can be determined by calling - * #TJSCALED() with the JPEG image height and one of the scaling factors - * returned by #tjGetScalingFactors(). The dstBuf pointer may also be - * used to decompress into a specific region of a larger buffer. - * - * @param width desired width (in pixels) of the destination image. If this is - * different than the width of the JPEG image being decompressed, then - * TurboJPEG will use scaling in the JPEG decompressor to generate the largest - * possible image that will fit within the desired width. If width is - * set to 0, then only the height will be considered when determining the - * scaled image size. - * - * @param pitch bytes per line in the destination image. Normally, this is - * scaledWidth * #tjPixelSize[pixelFormat] if the decompressed image - * is unpadded, else #TJPAD(scaledWidth * #tjPixelSize[pixelFormat]) - * if each line of the decompressed image is padded to the nearest 32-bit - * boundary, as is the case for Windows bitmaps. (NOTE: scaledWidth - * can be determined by calling #TJSCALED() with the JPEG image width and one - * of the scaling factors returned by #tjGetScalingFactors().) You can also be - * clever and use the pitch parameter to skip lines, etc. Setting this - * parameter to 0 is the equivalent of setting it to - * scaledWidth * #tjPixelSize[pixelFormat]. - * - * @param height desired height (in pixels) of the destination image. If this - * is different than the height of the JPEG image being decompressed, then - * TurboJPEG will use scaling in the JPEG decompressor to generate the largest - * possible image that will fit within the desired height. If height - * is set to 0, then only the width will be considered when determining the - * scaled image size. - * - * @param pixelFormat pixel format of the destination image (see @ref - * TJPF "Pixel formats".) - * - * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT - * "flags" - * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() - * and #tjGetErrorCode().) - */ -DLLEXPORT int tjDecompress2(tjhandle handle, const unsigned char *jpegBuf, - unsigned long jpegSize, unsigned char *dstBuf, - int width, int pitch, int height, int pixelFormat, - int flags); - - -/** - * Decompress a JPEG image to a YUV planar image. This function performs JPEG - * decompression but leaves out the color conversion step, so a planar YUV - * image is generated instead of an RGB image. - * - * @param handle a handle to a TurboJPEG decompressor or transformer instance - * - * @param jpegBuf pointer to a buffer containing the JPEG image to decompress - * - * @param jpegSize size of the JPEG image (in bytes) - * - * @param dstBuf pointer to an image buffer that will receive the YUV image. - * Use #tjBufSizeYUV2() to determine the appropriate size for this buffer based - * on the image width, height, padding, and level of subsampling. The Y, - * U (Cb), and V (Cr) image planes will be stored sequentially in the buffer - * (refer to @ref YUVnotes "YUV Image Format Notes".) - * - * @param width desired width (in pixels) of the YUV image. If this is - * different than the width of the JPEG image being decompressed, then - * TurboJPEG will use scaling in the JPEG decompressor to generate the largest - * possible image that will fit within the desired width. If width is - * set to 0, then only the height will be considered when determining the - * scaled image size. If the scaled width is not an even multiple of the MCU - * block width (see #tjMCUWidth), then an intermediate buffer copy will be - * performed within TurboJPEG. - * - * @param pad the width of each line in each plane of the YUV image will be - * padded to the nearest multiple of this number of bytes (must be a power of - * 2.) To generate images suitable for X Video, pad should be set to - * 4. - * - * @param height desired height (in pixels) of the YUV image. If this is - * different than the height of the JPEG image being decompressed, then - * TurboJPEG will use scaling in the JPEG decompressor to generate the largest - * possible image that will fit within the desired height. If height - * is set to 0, then only the width will be considered when determining the - * scaled image size. If the scaled height is not an even multiple of the MCU - * block height (see #tjMCUHeight), then an intermediate buffer copy will be - * performed within TurboJPEG. - * - * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT - * "flags" - * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() - * and #tjGetErrorCode().) - */ -DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf, - unsigned long jpegSize, unsigned char *dstBuf, - int width, int pad, int height, int flags); - - -/** - * Decompress a JPEG image into separate Y, U (Cb), and V (Cr) image - * planes. This function performs JPEG decompression but leaves out the color - * conversion step, so a planar YUV image is generated instead of an RGB image. - * - * @param handle a handle to a TurboJPEG decompressor or transformer instance - * - * @param jpegBuf pointer to a buffer containing the JPEG image to decompress - * - * @param jpegSize size of the JPEG image (in bytes) - * - * @param dstPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes - * (or just a Y plane, if decompressing a grayscale image) that will receive - * the YUV image. These planes can be contiguous or non-contiguous in memory. - * Use #tjPlaneSizeYUV() to determine the appropriate size for each plane based - * on the scaled image width, scaled image height, strides, and level of - * chrominance subsampling. Refer to @ref YUVnotes "YUV Image Format Notes" - * for more details. - * - * @param width desired width (in pixels) of the YUV image. If this is - * different than the width of the JPEG image being decompressed, then - * TurboJPEG will use scaling in the JPEG decompressor to generate the largest - * possible image that will fit within the desired width. If width is - * set to 0, then only the height will be considered when determining the - * scaled image size. If the scaled width is not an even multiple of the MCU - * block width (see #tjMCUWidth), then an intermediate buffer copy will be - * performed within TurboJPEG. - * - * @param strides an array of integers, each specifying the number of bytes per - * line in the corresponding plane of the output image. Setting the stride for - * any plane to 0 is the same as setting it to the scaled plane width (see - * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then - * the strides for all planes will be set to their respective scaled plane - * widths. You can adjust the strides in order to add an arbitrary amount of - * line padding to each plane or to decompress the JPEG image into a subregion - * of a larger YUV planar image. - * - * @param height desired height (in pixels) of the YUV image. If this is - * different than the height of the JPEG image being decompressed, then - * TurboJPEG will use scaling in the JPEG decompressor to generate the largest - * possible image that will fit within the desired height. If height - * is set to 0, then only the width will be considered when determining the - * scaled image size. If the scaled height is not an even multiple of the MCU - * block height (see #tjMCUHeight), then an intermediate buffer copy will be - * performed within TurboJPEG. - * - * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT - * "flags" - * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() - * and #tjGetErrorCode().) - */ -DLLEXPORT int tjDecompressToYUVPlanes(tjhandle handle, - const unsigned char *jpegBuf, - unsigned long jpegSize, - unsigned char **dstPlanes, int width, - int *strides, int height, int flags); - - -/** - * Decode a YUV planar image into an RGB or grayscale image. This function - * uses the accelerated color conversion routines in the underlying - * codec but does not execute any of the other steps in the JPEG decompression - * process. - * - * @param handle a handle to a TurboJPEG decompressor or transformer instance - * - * @param srcBuf pointer to an image buffer containing a YUV planar image to be - * decoded. The size of this buffer should match the value returned by - * #tjBufSizeYUV2() for the given image width, height, padding, and level of - * chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be - * stored sequentially in the source buffer (refer to @ref YUVnotes - * "YUV Image Format Notes".) - * - * @param pad Use this parameter to specify that the width of each line in each - * plane of the YUV source image is padded to the nearest multiple of this - * number of bytes (must be a power of 2.) - * - * @param subsamp the level of chrominance subsampling used in the YUV source - * image (see @ref TJSAMP "Chrominance subsampling options".) - * - * @param dstBuf pointer to an image buffer that will receive the decoded - * image. This buffer should normally be pitch * height bytes in - * size, but the dstBuf pointer can also be used to decode into a - * specific region of a larger buffer. - * - * @param width width (in pixels) of the source and destination images - * - * @param pitch bytes per line in the destination image. Normally, this should - * be width * #tjPixelSize[pixelFormat] if the destination image is - * unpadded, or #TJPAD(width * #tjPixelSize[pixelFormat]) if each line - * of the destination image should be padded to the nearest 32-bit boundary, as - * is the case for Windows bitmaps. You can also be clever and use the pitch - * parameter to skip lines, etc. Setting this parameter to 0 is the equivalent - * of setting it to width * #tjPixelSize[pixelFormat]. - * - * @param height height (in pixels) of the source and destination images - * - * @param pixelFormat pixel format of the destination image (see @ref TJPF - * "Pixel formats".) - * - * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT - * "flags" - * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() - * and #tjGetErrorCode().) - */ -DLLEXPORT int tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf, - int pad, int subsamp, unsigned char *dstBuf, - int width, int pitch, int height, int pixelFormat, - int flags); - - -/** - * Decode a set of Y, U (Cb), and V (Cr) image planes into an RGB or grayscale - * image. This function uses the accelerated color conversion routines in the - * underlying codec but does not execute any of the other steps in the JPEG - * decompression process. - * - * @param handle a handle to a TurboJPEG decompressor or transformer instance - * - * @param srcPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes - * (or just a Y plane, if decoding a grayscale image) that contain a YUV image - * to be decoded. These planes can be contiguous or non-contiguous in memory. - * The size of each plane should match the value returned by #tjPlaneSizeYUV() - * for the given image width, height, strides, and level of chrominance - * subsampling. Refer to @ref YUVnotes "YUV Image Format Notes" for more - * details. - * - * @param strides an array of integers, each specifying the number of bytes per - * line in the corresponding plane of the YUV source image. Setting the stride - * for any plane to 0 is the same as setting it to the plane width (see - * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then - * the strides for all planes will be set to their respective plane widths. - * You can adjust the strides in order to specify an arbitrary amount of line - * padding in each plane or to decode a subregion of a larger YUV planar image. - * - * @param subsamp the level of chrominance subsampling used in the YUV source - * image (see @ref TJSAMP "Chrominance subsampling options".) - * - * @param dstBuf pointer to an image buffer that will receive the decoded - * image. This buffer should normally be pitch * height bytes in - * size, but the dstBuf pointer can also be used to decode into a - * specific region of a larger buffer. - * - * @param width width (in pixels) of the source and destination images - * - * @param pitch bytes per line in the destination image. Normally, this should - * be width * #tjPixelSize[pixelFormat] if the destination image is - * unpadded, or #TJPAD(width * #tjPixelSize[pixelFormat]) if each line - * of the destination image should be padded to the nearest 32-bit boundary, as - * is the case for Windows bitmaps. You can also be clever and use the pitch - * parameter to skip lines, etc. Setting this parameter to 0 is the equivalent - * of setting it to width * #tjPixelSize[pixelFormat]. - * - * @param height height (in pixels) of the source and destination images - * - * @param pixelFormat pixel format of the destination image (see @ref TJPF - * "Pixel formats".) - * - * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT - * "flags" - * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() - * and #tjGetErrorCode().) - */ -DLLEXPORT int tjDecodeYUVPlanes(tjhandle handle, - const unsigned char **srcPlanes, - const int *strides, int subsamp, - unsigned char *dstBuf, int width, int pitch, - int height, int pixelFormat, int flags); - - -/** - * Create a new TurboJPEG transformer instance. - * - * @return a handle to the newly-created instance, or NULL if an error - * occurred (see #tjGetErrorStr2().) - */ -DLLEXPORT tjhandle tjInitTransform(void); - - -/** - * Losslessly transform a JPEG image into another JPEG image. Lossless - * transforms work by moving the raw DCT coefficients from one JPEG image - * structure to another without altering the values of the coefficients. While - * this is typically faster than decompressing the image, transforming it, and - * re-compressing it, lossless transforms are not free. Each lossless - * transform requires reading and performing Huffman decoding on all of the - * coefficients in the source image, regardless of the size of the destination - * image. Thus, this function provides a means of generating multiple - * transformed images from the same source or applying multiple - * transformations simultaneously, in order to eliminate the need to read the - * source coefficients multiple times. - * - * @param handle a handle to a TurboJPEG transformer instance - * - * @param jpegBuf pointer to a buffer containing the JPEG source image to - * transform - * - * @param jpegSize size of the JPEG source image (in bytes) - * - * @param n the number of transformed JPEG images to generate - * - * @param dstBufs pointer to an array of n image buffers. dstBufs[i] - * will receive a JPEG image that has been transformed using the parameters in - * transforms[i]. TurboJPEG has the ability to reallocate the JPEG - * buffer to accommodate the size of the JPEG image. Thus, you can choose to: - * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and - * let TurboJPEG grow the buffer as needed, - * -# set dstBufs[i] to NULL to tell TurboJPEG to allocate the buffer - * for you, or - * -# pre-allocate the buffer to a "worst case" size determined by calling - * #tjBufSize() with the transformed or cropped width and height. Under normal - * circumstances, this should ensure that the buffer never has to be - * re-allocated (setting #TJFLAG_NOREALLOC guarantees that it won't be.) Note, - * however, that there are some rare cases (such as transforming images with a - * large amount of embedded EXIF or ICC profile data) in which the output image - * will be larger than the worst-case size, and #TJFLAG_NOREALLOC cannot be - * used in those cases. - * . - * If you choose option 1, dstSizes[i] should be set to the size of - * your pre-allocated buffer. In any case, unless you have set - * #TJFLAG_NOREALLOC, you should always check dstBufs[i] upon return - * from this function, as it may have changed. - * - * @param dstSizes pointer to an array of n unsigned long variables that will - * receive the actual sizes (in bytes) of each transformed JPEG image. If - * dstBufs[i] points to a pre-allocated buffer, then - * dstSizes[i] should be set to the size of the buffer. Upon return, - * dstSizes[i] will contain the size of the JPEG image (in bytes.) - * - * @param transforms pointer to an array of n #tjtransform structures, each of - * which specifies the transform parameters and/or cropping region for the - * corresponding transformed output image. - * - * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT - * "flags" - * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() - * and #tjGetErrorCode().) - */ -DLLEXPORT int tjTransform(tjhandle handle, const unsigned char *jpegBuf, - unsigned long jpegSize, int n, - unsigned char **dstBufs, unsigned long *dstSizes, - tjtransform *transforms, int flags); - - -/** - * Destroy a TurboJPEG compressor, decompressor, or transformer instance. - * - * @param handle a handle to a TurboJPEG compressor, decompressor or - * transformer instance - * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2().) - */ -DLLEXPORT int tjDestroy(tjhandle handle); - - -/** - * Allocate an image buffer for use with TurboJPEG. You should always use - * this function to allocate the JPEG destination buffer(s) for the compression - * and transform functions unless you are disabling automatic buffer - * (re)allocation (by setting #TJFLAG_NOREALLOC.) - * - * @param bytes the number of bytes to allocate - * - * @return a pointer to a newly-allocated buffer with the specified number of - * bytes. - * - * @sa tjFree() - */ -DLLEXPORT unsigned char *tjAlloc(int bytes); - - -/** - * Load an uncompressed image from disk into memory. - * - * @param filename name of a file containing an uncompressed image in Windows - * BMP or PBMPLUS (PPM/PGM) format - * - * @param width pointer to an integer variable that will receive the width (in - * pixels) of the uncompressed image - * - * @param align row alignment of the image buffer to be returned (must be a - * power of 2.) For instance, setting this parameter to 4 will cause all rows - * in the image buffer to be padded to the nearest 32-bit boundary, and setting - * this parameter to 1 will cause all rows in the image buffer to be unpadded. - * - * @param height pointer to an integer variable that will receive the height - * (in pixels) of the uncompressed image - * - * @param pixelFormat pointer to an integer variable that specifies or will - * receive the pixel format of the uncompressed image buffer. The behavior of - * #tjLoadImage() will vary depending on the value of *pixelFormat - * passed to the function: - * - @ref TJPF_UNKNOWN : The uncompressed image buffer returned by the function - * will use the most optimal pixel format for the file type, and - * *pixelFormat will contain the ID of this pixel format upon - * successful return from the function. - * - @ref TJPF_GRAY : Only PGM files and 8-bit BMP files with a grayscale - * colormap can be loaded. - * - @ref TJPF_CMYK : The RGB or grayscale pixels stored in the file will be - * converted using a quick & dirty algorithm that is suitable only for testing - * purposes (proper conversion between CMYK and other formats requires a color - * management system.) - * - Other @ref TJPF "pixel formats" : The uncompressed image buffer will use - * the specified pixel format, and pixel format conversion will be performed if - * necessary. - * - * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP - * "flags". - * - * @return a pointer to a newly-allocated buffer containing the uncompressed - * image, converted to the chosen pixel format and with the chosen row - * alignment, or NULL if an error occurred (see #tjGetErrorStr2().) This - * buffer should be freed using #tjFree(). - */ -DLLEXPORT unsigned char *tjLoadImage(const char *filename, int *width, - int align, int *height, int *pixelFormat, - int flags); - - -/** - * Save an uncompressed image from memory to disk. - * - * @param filename name of a file to which to save the uncompressed image. - * The image will be stored in Windows BMP or PBMPLUS (PPM/PGM) format, - * depending on the file extension. - * - * @param buffer pointer to an image buffer containing RGB, grayscale, or - * CMYK pixels to be saved - * - * @param width width (in pixels) of the uncompressed image - * - * @param pitch bytes per line in the image buffer. Setting this parameter to - * 0 is the equivalent of setting it to - * width * #tjPixelSize[pixelFormat]. - * - * @param height height (in pixels) of the uncompressed image - * - * @param pixelFormat pixel format of the image buffer (see @ref TJPF - * "Pixel formats".) If this parameter is set to @ref TJPF_GRAY, then the - * image will be stored in PGM or 8-bit (indexed color) BMP format. Otherwise, - * the image will be stored in PPM or 24-bit BMP format. If this parameter - * is set to @ref TJPF_CMYK, then the CMYK pixels will be converted to RGB - * using a quick & dirty algorithm that is suitable only for testing (proper - * conversion between CMYK and other formats requires a color management - * system.) - * - * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP - * "flags". - * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2().) - */ -DLLEXPORT int tjSaveImage(const char *filename, unsigned char *buffer, - int width, int pitch, int height, int pixelFormat, - int flags); - - -/** - * Free an image buffer previously allocated by TurboJPEG. You should always - * use this function to free JPEG destination buffer(s) that were automatically - * (re)allocated by the compression and transform functions or that were - * manually allocated using #tjAlloc(). - * - * @param buffer address of the buffer to free - * - * @sa tjAlloc() - */ -DLLEXPORT void tjFree(unsigned char *buffer); - - -/** - * Returns a descriptive error message explaining why the last command failed. - * - * @param handle a handle to a TurboJPEG compressor, decompressor, or - * transformer instance, or NULL if the error was generated by a global - * function (but note that retrieving the error message for a global function - * is not thread-safe.) - * - * @return a descriptive error message explaining why the last command failed. - */ -DLLEXPORT char *tjGetErrorStr2(tjhandle handle); - - -/** - * Returns a code indicating the severity of the last error. See - * @ref TJERR "Error codes". - * - * @param handle a handle to a TurboJPEG compressor, decompressor or - * transformer instance - * - * @return a code indicating the severity of the last error. See - * @ref TJERR "Error codes". - */ -DLLEXPORT int tjGetErrorCode(tjhandle handle); - - -/* Deprecated functions and macros */ -#define TJFLAG_FORCEMMX 8 -#define TJFLAG_FORCESSE 16 -#define TJFLAG_FORCESSE2 32 -#define TJFLAG_FORCESSE3 128 - - -/* Backward compatibility functions and macros (nothing to see here) */ -#define NUMSUBOPT TJ_NUMSAMP -#define TJ_444 TJSAMP_444 -#define TJ_422 TJSAMP_422 -#define TJ_420 TJSAMP_420 -#define TJ_411 TJSAMP_420 -#define TJ_GRAYSCALE TJSAMP_GRAY - -#define TJ_BGR 1 -#define TJ_BOTTOMUP TJFLAG_BOTTOMUP -#define TJ_FORCEMMX TJFLAG_FORCEMMX -#define TJ_FORCESSE TJFLAG_FORCESSE -#define TJ_FORCESSE2 TJFLAG_FORCESSE2 -#define TJ_ALPHAFIRST 64 -#define TJ_FORCESSE3 TJFLAG_FORCESSE3 -#define TJ_FASTUPSAMPLE TJFLAG_FASTUPSAMPLE -#define TJ_YUV 512 - -DLLEXPORT unsigned long TJBUFSIZE(int width, int height); - -DLLEXPORT unsigned long TJBUFSIZEYUV(int width, int height, int jpegSubsamp); - -DLLEXPORT unsigned long tjBufSizeYUV(int width, int height, int subsamp); - -DLLEXPORT int tjCompress(tjhandle handle, unsigned char *srcBuf, int width, - int pitch, int height, int pixelSize, - unsigned char *dstBuf, unsigned long *compressedSize, - int jpegSubsamp, int jpegQual, int flags); - -DLLEXPORT int tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, int width, - int pitch, int height, int pixelSize, - unsigned char *dstBuf, int subsamp, int flags); - -DLLEXPORT int tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, int width, - int pitch, int height, int pixelFormat, - unsigned char *dstBuf, int subsamp, int flags); - -DLLEXPORT int tjDecompressHeader(tjhandle handle, unsigned char *jpegBuf, - unsigned long jpegSize, int *width, - int *height); - -DLLEXPORT int tjDecompressHeader2(tjhandle handle, unsigned char *jpegBuf, - unsigned long jpegSize, int *width, - int *height, int *jpegSubsamp); - -DLLEXPORT int tjDecompress(tjhandle handle, unsigned char *jpegBuf, - unsigned long jpegSize, unsigned char *dstBuf, - int width, int pitch, int height, int pixelSize, - int flags); - -DLLEXPORT int tjDecompressToYUV(tjhandle handle, unsigned char *jpegBuf, - unsigned long jpegSize, unsigned char *dstBuf, - int flags); - -DLLEXPORT char *tjGetErrorStr(void); - - -/** - * @} - */ - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/3rdparty/turbojpeg/x86/bin/turbojpeg.dll b/3rdparty/turbojpeg/x86/bin/turbojpeg.dll deleted file mode 100644 index fe5a1c85..00000000 Binary files a/3rdparty/turbojpeg/x86/bin/turbojpeg.dll and /dev/null differ diff --git a/3rdparty/turbojpeg/x86/lib/turbojpeg.lib b/3rdparty/turbojpeg/x86/lib/turbojpeg.lib deleted file mode 100644 index 015ee255..00000000 Binary files a/3rdparty/turbojpeg/x86/lib/turbojpeg.lib and /dev/null differ diff --git a/huagao/CAdvancedDLG.cpp b/huagao/CAdvancedDLG.cpp index b5218f7f..c94a1e28 100644 --- a/huagao/CAdvancedDLG.cpp +++ b/huagao/CAdvancedDLG.cpp @@ -42,7 +42,7 @@ BOOL CAdvancedDLG::OnInitDialog() CTabPageSSL::OnInitDialog(); m_Edit_noise.SetSlideLink(this, IDC_SLIDER_NOISE); - m_Edit_noise.SetParams(1, 10, 1); + m_Edit_noise.SetParams(1, 20, 1); m_Edit_noise.SetValue(noise); m_Edit_indent.SetSlideLink(this, IDC_SLIDER_INDENT); diff --git a/huagao/CBasicPage.cpp b/huagao/CBasicPage.cpp index 379291d4..254b70e9 100644 --- a/huagao/CBasicPage.cpp +++ b/huagao/CBasicPage.cpp @@ -295,6 +295,7 @@ void CBasicPage::updateCmbDuplex(BOOL insert) m_cmBoxDuplex->InsertString(4, TEXT("对折")); } } +#ifdef G200 if (m_Slider_Dpi.m_iPosition > 300 && m_cmBoxSS->GetCount() > 20) { if (m_cmBoxSS->GetCurSel() >= 20) @@ -309,6 +310,7 @@ void CBasicPage::updateCmbDuplex(BOOL insert) m_cmBoxSS->InsertString(21, _T("最大扫描尺寸")); m_cmBoxSS->InsertString(22, _T("三联试卷")); } +#endif // G200 #ifndef G300 if (tmp_paperindex == 19 || tmp_paperindex == 20 || tmp_paperindex == 21 || tmp_paperindex == 22 || tmp_paperindex == 1|| tmp_paperindex == 4|| tmp_paperindex == 5){ (((CButton*)GetDlgItem(IDC_CKBSIZEDETECT)))->SetCheck(false); diff --git a/huagao/CMakeLists.txt b/huagao/CMakeLists.txt index e0a29299..aad7b9c4 100644 --- a/huagao/CMakeLists.txt +++ b/huagao/CMakeLists.txt @@ -21,7 +21,7 @@ execute_process( ) #string (REGEX REPLACE ".*\t| .*" "" GIT_PATH ${GIT_REMOTE_PATH}) -string (REGEX REPLACE "[\n\t\r]" "" GIT_VERSION ${GIT_VERSION}) +#string (REGEX REPLACE "[\n\t\r]" "" GIT_VERSION ${GIT_VERSION}) file(GLOB DEV_SRC ${PROJECT_SOURCE_DIR}/Device/*.h ${PROJECT_SOURCE_DIR}/Device/*.cpp) file(GLOB IMGPROC_SRC ${PROJECT_SOURCE_DIR}/ImageProcess/*.h ${PROJECT_SOURCE_DIR}/ImageProcess/*.cpp) @@ -41,20 +41,24 @@ ${PROJECT_SOURCE_DIR}/../twpp ${PROJECT_SOURCE_DIR}/../3rdparty/nick ${PROJECT_SOURCE_DIR}/../3rdparty/cyusb/inc ${PROJECT_SOURCE_DIR}/../3rdparty/libtiff/include -${PROJECT_SOURCE_DIR}/../3rdparty/gpdf/ +${PROJECT_SOURCE_DIR}/../3rdparty/hgOCR/include/ccstruct +${PROJECT_SOURCE_DIR}/../3rdparty/hgOCR/include/ccmain +${PROJECT_SOURCE_DIR}/../3rdparty/hgOCR/include/ccutil +${PROJECT_SOURCE_DIR}/../3rdparty/hgOCR/leptonica +${PROJECT_SOURCE_DIR}/../3rdparty/hgOCR/include/api ${PROJECT_SOURCE_DIR}/../3rdparty/opencv/include ${PROJECT_SOURCE_DIR}/../3rdparty/log4cplus/include ) message($ENV{windir}) -link_directories(${PROJECT_SOURCE_DIR}/../3rdparty/opencv/x86/staticlib/ +link_directories( +${PROJECT_SOURCE_DIR}/../3rdparty/opencv/x86/staticlib/ ${PROJECT_SOURCE_DIR}/../3rdparty/libtiff/ ${PROJECT_SOURCE_DIR}/../3rdparty/log4cplus/lib/ -${PROJECT_SOURCE_DIR}/../3rdparty/gpdf/x86/staticlib/ ${PROJECT_SOURCE_DIR}/../3rdparty/cyusb/lib/windows/x86/ -${PROJECT_SOURCE_DIR}/../3rdparty/gpdf/3rdparty/tesseract/staticlib/x86/ -${PROJECT_SOURCE_DIR}/../3rdparty/turbojpeg/x86/lib/) +${PROJECT_SOURCE_DIR}/../3rdparty/hgOCR/x86/staticlib/ +) MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}") @@ -63,11 +67,14 @@ link_libraries(optimized ${FULL_LIBRARY_NAME}) link_libraries(debug ${FULL_LIBRARY_NAME}d) ENDMACRO() +MACRO(RW_LINK_OCR_LIBRARY FULL_LIBRARY_NAME) +link_libraries(optimized ${FULL_LIBRARY_NAME}) +link_libraries(debug ${FULL_LIBRARY_NAME}) +ENDMACRO() + IF (CMAKE_SYSTEM_NAME MATCHES "Linux") MESSAGE(STATUS "current platform: Linux ") ELSEIF (CMAKE_SYSTEM_NAME MATCHES "Windows") - link_libraries(hg_gpdf) - link_libraries(libjpeg) RW_LINK_3RD_PART_LIBRARY(opencv_world3414) RW_LINK_3RD_PART_LIBRARY(IlmImf) RW_LINK_3RD_PART_LIBRARY(libjasper) @@ -77,24 +84,10 @@ ELSEIF (CMAKE_SYSTEM_NAME MATCHES "Windows") RW_LINK_3RD_PART_LIBRARY(libwebp) RW_LINK_3RD_PART_LIBRARY(zlib) RW_LINK_3RD_PART_LIBRARY(ittnotify) - link_libraries(libwebp) - link_libraries(libleptonica) - link_libraries(libprotobuf) - link_libraries(quirc) - link_libraries(tesseract_api) - link_libraries(tesseract_arch) - link_libraries(tesseract_ccmain) - link_libraries(tesseract_ccstruct) - link_libraries(tesseract_ccutil) - link_libraries(tesseract_classify) - link_libraries(tesseract_cutil) - link_libraries(tesseract_dict) - link_libraries(tesseract_lstm) - link_libraries(tesseract_opencl) - link_libraries(tesseract_textord) - link_libraries(tesseract_viewer) - link_libraries(tesseract_wordrec) - link_libraries(turbojpeg) + #link_libraries(libwebp) + RW_LINK_3RD_PART_LIBRARY(quirc) + RW_LINK_OCR_LIBRARY(leptonica) + RW_LINK_OCR_LIBRARY(HG_OCR) link_libraries(CyAPI) RW_LINK_3RD_PART_LIBRARY(log4cplusS) MESSAGE(STATUS "current platform: Windows") diff --git a/huagao/Device/GScan.h b/huagao/Device/GScan.h index ef9bba92..cc16661f 100644 --- a/huagao/Device/GScan.h +++ b/huagao/Device/GScan.h @@ -149,32 +149,44 @@ public: /// ñɨ /// /// - void set_scannum(int val) { scannum = val; }; + void set_scannum(int val) { + scannum = val; + }; /// /// ȡɨ /// /// - int get_scannum() { return scannum; }; + int get_scannum() { + return scannum; + }; /// /// ȡPCȡͼҳ /// /// - int get_imgnReaded() { return imgreadednum; }; + int get_imgnReaded() { + return imgreadednum; + }; /// /// ȡͼ /// /// - int get_imgTransfered() { return imgtransfered; }; + int get_imgTransfered() { + return imgtransfered; + }; /// /// ȡʧͼ /// /// - int get_lose_image_num() { return lose_image_num; }; + int get_lose_image_num() { + return lose_image_num; + }; /// /// öʧͼ /// /// - void set_lose_image_num(int value) { lose_image_num = value; }; + void set_lose_image_num(int value) { + lose_image_num = value; + }; /// /// ȡԭʼͼԼһ /// diff --git a/huagao/Device/GScanO1003399.cpp b/huagao/Device/GScanO1003399.cpp index d2a81a6a..85f243a3 100644 --- a/huagao/Device/GScanO1003399.cpp +++ b/huagao/Device/GScanO1003399.cpp @@ -71,6 +71,7 @@ GScanO1003399::~GScanO1003399() { m_imgprocthread->join(); } + autoroate.reset(); } void GScanO1003399::open(int vid, int pid) @@ -246,6 +247,7 @@ void GScanO1003399::config_params(GScanCap& param) } cfg.g200params.dpi = SupResolutions.count(param.resolution_native)>0 ? SupResolutions[param.resolution_native] : 1; cfg.g200params.double_feed_enbale = (unsigned int)param.hardwarecaps.en_doublefeed; + //cfg.g200params.stable_enbale = 0; cfg.g200params.stable_enbale = (unsigned int)param.hardwarecaps.en_stapledetect; cfg.g200params.screw_detect_enable = (unsigned int)param.hardwarecaps.en_skrewdetect; cfg.g200params.screw_detect_level = (unsigned int)cfg.g200params.screw_detect_enable ? secrewMaps[param.hardwarecaps.skrewdetectlevel] : 0; @@ -313,6 +315,7 @@ void GScanO1003399::config_params(GScanCap& param) param39.papertype = param.papertype; param39.pixtype = param.pixtype; param39.resolution_dst = param.resolution_dst; + //param39.resolution_dst = param.resolution_dst > 300 ? 300 : param.resolution_dst; param39.resolution_native = param.resolution_native; param39.scannum = param.scannum; param39.sharpen = param.sharpen; @@ -781,12 +784,22 @@ void GScanO1003399::imgproce(std::shared_ptr>& buff) #ifdef LANXUM _tcscat(szIniFile, _T("\\twain_32\\LANXUMSCAN\\tessdata")); #else - _tcscat(szIniFile, _T("\\twain_32\\HuaGoScan\\tessdata")); + _tcscat(szIniFile, _T("\\twain_32\\HuaGoScan\\tessdata\\osd.traineddata")); #endif // int iLen = WideCharToMultiByte(CP_ACP, 0, szIniFile, -1, NULL, 0, NULL, NULL); char* chRtn = new char[iLen * sizeof(char)]; WideCharToMultiByte(CP_ACP, 0, szIniFile, -1, chRtn, iLen, NULL, NULL); - CImageApplyRotation(type, m_param.is_backrotate180, m_param.resolution_dst, chRtn).apply(mats,m_param.is_duplex); + + if (!autoroate.get()) + { + autoroate = std::make_shared< CImageApplyRotation>(type, m_param.is_backrotate180, m_param.resolution_dst, chRtn); + } + if (autoroate.get() && m_param.is_autotext) + { + StopWatch sw; + autoroate->apply(mats, m_param.is_duplex); + } + delete[] chRtn; } if (m_param.automaticcolor) diff --git a/huagao/Device/GScanO1003399.h b/huagao/Device/GScanO1003399.h index 62ef67c4..e0867e64 100644 --- a/huagao/Device/GScanO1003399.h +++ b/huagao/Device/GScanO1003399.h @@ -81,7 +81,7 @@ private: volatile bool b_usbthread; BlockingQueue>> imgs; BlockingQueue m_paths; - + std::shared_ptr autoroate; //std::queue> fu_imgpro; std::shared_ptr m_imgprocthread; std::shared_ptr m_usbthread; diff --git a/huagao/Device/GScanO400.cpp b/huagao/Device/GScanO400.cpp index 461592bb..b4e2b33c 100644 --- a/huagao/Device/GScanO400.cpp +++ b/huagao/Device/GScanO400.cpp @@ -755,7 +755,7 @@ USBCB GScanO400::Get_Scanner_Status() if (m_usb.get() && m_usb->is_connected()) m_usb->read_bulk(&usbcb, 512); FileTools::writelog(log_ERROR, "get dsp status error"); - return { NO_COMMAND,USB_BULK_ERROR,0 }; + //return { NO_COMMAND,USB_BULK_ERROR,0 }; } return usbcb; } diff --git a/huagao/Device/ImageMatQueue.cpp b/huagao/Device/ImageMatQueue.cpp index c4918e94..158f3d52 100644 --- a/huagao/Device/ImageMatQueue.cpp +++ b/huagao/Device/ImageMatQueue.cpp @@ -314,7 +314,7 @@ void ImageMatQueue::setparam(const GScanCap& param) #elif defined HANVON _tcscat(szIniFile, _T("\\twain_32\\HanvonScan\\tessdata")); #else - _tcscat(szIniFile, _T("\\twain_32\\HuaGoScan\\tessdata")); + _tcscat(szIniFile, _T("\\twain_32\\HuaGoScan\\tessdata\\osd.traineddata")); #endif // int iLen = WideCharToMultiByte(CP_ACP, 0, szIniFile, -1, NULL, 0, NULL, NULL); char* chRtn = new char[iLen * sizeof(char)]; @@ -435,6 +435,7 @@ bool ImageMatQueue::queuesempty() return atm_orgin_image_remains <= 0 && m_imagedata.Size() == 0; } +static int indeximg = 0; void ImageMatQueue::proc() { @@ -485,6 +486,10 @@ void ImageMatQueue::proc() { mats.push_back(scanParam.is_switchfrontback ? imgback : imgfront); mats.push_back(scanParam.is_switchfrontback ? imgfront : imgback); + //if (!mats[0].empty()) + // cv::imwrite("D:\\testimage\\" + to_string(++indeximg) + "prc.jpg", mats[0]); + //if (!mats[1].empty()) + // cv::imwrite("D:\\testimage\\" + to_string(++indeximg) + "prc.jpg", mats[1]); remove(info.path.c_str()); remove(back.path.c_str()); } @@ -514,18 +519,19 @@ void ImageMatQueue::proc() // cvtColor(mat, mat, CV_RGB2GRAY); if (!mat.empty()) { - Mat front = mat(Rect(0, 0, mat.cols / 2, mat.rows)); - Mat back = mat(Rect(mat.cols / 2, 0, mat.cols / 2, mat.rows)); + Mat front = mat(Rect(0, 0, mat.cols / 2, mat.rows-10));//ͼβЧݶ10 + Mat back = mat(Rect(mat.cols / 2, 0, mat.cols / 2, mat.rows-10));//ͼβЧݶ10 #ifdef UV mats.push_back(scanParam.is_switchfrontback ? front : back); mats.push_back(scanParam.is_switchfrontback ? back : front); #else + if (scanParam.imageRotateDegree != 0.0 && scanParam.imageRotateDegree != 180.0) { cv::flip(front, front, 0); cv::flip(front, front, 1); } - mats.push_back(back); - mats.push_back(front); + mats.push_back(scanParam.is_switchfrontback ? front : back); + mats.push_back(scanParam.is_switchfrontback ? back : front); #endif front.release(); back.release(); @@ -558,7 +564,8 @@ void ImageMatQueue::proc() std::vector rects; std::vector angleResults; bool isDesaskew = false; - + //cv::imwrite("D:\\testimage\\" + to_string(++indeximg) + ".jpg", mats[0]); + //cv::imwrite("D:\\testimage\\" + to_string(++indeximg) + ".jpg", mats[1]); sw.reset(); for (int j = 0; j < m_iaList.size(); j++) { m_iaList[j]->apply(mats, scanParam.is_duplex); @@ -571,6 +578,7 @@ void ImageMatQueue::proc() else if (typeid(*ptr) == typeid(CImageApplyRotation)) angleResults = dynamic_cast(ptr)->angleResults(); } + #ifdef UV if (!uvmats.empty()) { diff --git a/huagao/Device/ImageMatQueue.h b/huagao/Device/ImageMatQueue.h index 62ca9173..bb97bbdc 100644 --- a/huagao/Device/ImageMatQueue.h +++ b/huagao/Device/ImageMatQueue.h @@ -49,17 +49,17 @@ public: } memcpy(m_data->data() + 54, colortable, 256 * 4); } - //cv::imencode(".bmp", mat, *(m_data.get())); + cv::imencode(".bmp", mat, *(m_data.get())); setBmpFileHeader(mat); setBmpInfoHeader(mat, res); - uchar* data = m_data->data() + headersize + bmpdatasize; - uchar* matdata = mat.data; - for (int i = 0; i < mat.rows; i++) { - data -= m_datalinesize; - memcpy(data, matdata, step); - matdata += step; - } + //uchar* data = m_data->data() + headersize + bmpdatasize; + //uchar* matdata = mat.data; + //for (int i = 0; i < mat.rows; i++) { + // data -= m_datalinesize; + // memcpy(data, matdata, step); + // matdata += step; + //} } private: void setBmpFileHeader(const cv::Mat& mat) diff --git a/huagao/Device/UsbScanEx.cpp b/huagao/Device/UsbScanEx.cpp index dc382834..837dbffc 100644 --- a/huagao/Device/UsbScanEx.cpp +++ b/huagao/Device/UsbScanEx.cpp @@ -167,10 +167,10 @@ int UsbScanEx::read_bulk(void* data, int len) switch (error_code) { case ERROR_IO_PENDING: { - auto ret = WaitForSingleObject(lp_overlap->hEvent, 300); + //auto ret = WaitForSingleObject(lp_overlap->hEvent, 300); //FileTools::writelog(log_FATAL, "WaitForSingleObject return " + std::to_string(ret));; - GetOverlappedResult(h_pipe, lp_overlap, &pdw_ret, FALSE); - //GetOverlappedResult(h_pipe, lp_overlap, &pdw_ret, TRUE); + //GetOverlappedResult(h_pipe, lp_overlap, &pdw_ret, FALSE); + GetOverlappedResult(h_pipe, lp_overlap, &pdw_ret, TRUE); return pdw_ret; } case ERROR_FILE_NOT_FOUND: @@ -210,9 +210,9 @@ int UsbScanEx::write_bulk(void* data, int len) switch (GetLastError()) { case ERROR_IO_PENDING: - WaitForSingleObject(lp_overlap->hEvent, 500); - GetOverlappedResult(h_pipe, lp_overlap, &dw_size, FALSE); - //GetOverlappedResult(h_pipe, lp_overlap, &dw_size, TRUE); + //WaitForSingleObject(lp_overlap->hEvent, 500); + //GetOverlappedResult(h_pipe, lp_overlap, &dw_size, FALSE); + GetOverlappedResult(h_pipe, lp_overlap, &dw_size, TRUE); return dw_size; case ERROR_FILE_NOT_FOUND: case ERROR_ACCESS_DENIED: diff --git a/huagao/ImageProcess/ImageApplyAutoCrop.cpp b/huagao/ImageProcess/ImageApplyAutoCrop.cpp index 45af1f28..5e885587 100644 --- a/huagao/ImageProcess/ImageApplyAutoCrop.cpp +++ b/huagao/ImageProcess/ImageApplyAutoCrop.cpp @@ -53,7 +53,6 @@ cv::Mat concatenateMatrix(const cv::Mat& first, const cv::Mat& second) return mul_r; } - std::vector comMat() { std::vector mats; @@ -61,7 +60,7 @@ std::vector comMat() srcTri[0] = cv::Point2f(1, 1); srcTri[1] = cv::Point2f(1, 0); srcTri[2] = cv::Point2f(0, 1); - const float fact = 0.1f; + const float fact = 0.33f; float pos[] = { 0, 2 * fact, fact }; cv::Point2f dstTri[3]; @@ -125,6 +124,13 @@ void CImageApplyAutoCrop::apply(cv::Mat& pDib, int side) cv::Mat element = getStructuringElement(cv::MORPH_RECT, cv::Size(m_noise, 1)); cv::morphologyEx(thre, thre, cv::MORPH_OPEN, element); } + + if (m_indent > 0) + { + cv::Mat element = getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(m_indent, m_indent)); + cv::morphologyEx(thre, thre, cv::MORPH_ERODE, element); + } + std::vector hierarchy; std::vector> contours; @@ -134,7 +140,7 @@ void CImageApplyAutoCrop::apply(cv::Mat& pDib, int side) if (m_maxContour.size() == 0) { thre.release(); - //����ǹ̶����棬�뷵�ز��к�ijߴ� + // if (!m_isCrop) pDib = pDib(cv::Rect((pDib.cols - m_fixedSize.width) / 2, (pDib.rows - m_fixedSize.height) / 2, m_fixedSize.width, m_fixedSize.height) & cv::Rect(0, 0, pDib.cols, pDib.rows)).clone(); #ifdef LOG @@ -142,6 +148,7 @@ void CImageApplyAutoCrop::apply(cv::Mat& pDib, int side) #endif // LOG return; } + thre.release(); dst.release(); @@ -178,18 +185,15 @@ void CImageApplyAutoCrop::apply(cv::Mat& pDib, int side) auto mats = comMat(); warp_mat = cv::getAffineTransform(srcTri, dstTri); warp_mat = concatenateMatrix(mats[0], warp_mat); - //warp_mat = mats[0]; cv::warpAffine(bgr[0], bgr[0], warp_mat, rect.size, cv::INTER_LINEAR); warp_mat = cv::getAffineTransform(srcTri, dstTri); warp_mat = concatenateMatrix(mats[1], warp_mat); - //warp_mat = mats[1]; cv::warpAffine(bgr[1], bgr[1], warp_mat, rect.size, cv::INTER_LINEAR); warp_mat = cv::getAffineTransform(srcTri, dstTri); warp_mat = concatenateMatrix(mats[2], warp_mat); - //warp_mat = mats[2]; cv::warpAffine(bgr[2], bgr[2], warp_mat, rect.size, cv::INTER_LINEAR); cv::merge(bgr, 3, dst); @@ -208,6 +212,13 @@ void CImageApplyAutoCrop::apply(cv::Mat& pDib, int side) p.x = static_cast(a * p.x + b * p.y + c); p.y = static_cast(d * p.x + e * p.y + f); } + + for (std::vector& sub : contours) + for (cv::Point& p : sub) + { + p.x = static_cast(a * p.x + b * p.y + c); + p.y = static_cast(d * p.x + e * p.y + f); + } } else { @@ -219,45 +230,22 @@ void CImageApplyAutoCrop::apply(cv::Mat& pDib, int side) cv::split(dst, bgr); auto mats = comMat(); for (int i = 0; i < 3; i++) - { cv::warpAffine(bgr[i], bgr[i], mats[i], t_rect.size(), cv::INTER_LINEAR); - } cv::merge(bgr, 3, dst); } } - cv::Scalar autoBGColor; if (m_isFillBlank) { - cv::Mat thre_dst; - hg::threshold_Mat(dst, thre_dst, m_threshold); - - if (m_indent > 0) - { - for (size_t i = 0, length = m_maxContour.size() - 1; i < length; i++) - cv::line(thre_dst, m_maxContour[i], m_maxContour[i + 1], cv::Scalar::all(0), m_indent * 2); - cv::line(thre_dst, *m_maxContour.begin(), *m_maxContour.rbegin(), cv::Scalar::all(0), m_indent * 2); - } - - //cv::imwrite("abc.jpg", thre_dst); - hierarchy.clear(); - contours.clear(); - m_maxContour.clear(); - hg::findContours(thre_dst, contours, hierarchy, cv::RETR_EXTERNAL); if (m_isConvexHull) { - m_maxContour = hg::getMaxContour(contours, hierarchy); - if (m_maxContour.size() == 0) { thre.release(); //����ǹ̶����棬�뷵�ز��к�ijߴ� if (!m_isCrop) pDib = pDib(cv::Rect((pDib.cols - m_fixedSize.width) / 2, (pDib.rows - m_fixedSize.height) / 2, m_fixedSize.width, m_fixedSize.height) & cv::Rect(0, 0, pDib.cols, pDib.rows)).clone(); -#ifdef LOG - FileTools::write_log("imgprc.txt", "exit CImageApplyAutoCrop apply"); -#endif // LOG return; } hg::convexHull(m_maxContour, m_maxContour); @@ -306,8 +294,6 @@ void CImageApplyAutoCrop::apply(cv::Mat& pDib, int side) #endif // LOG } - - void CImageApplyAutoCrop::apply(std::vector& mats, bool isTwoSide) { if (mats.empty()) return; @@ -387,4 +373,3 @@ uchar CImageApplyAutoCrop::getBackGroudChannelMean(const cv::Mat& gray, int tota } return 255; } - diff --git a/huagao/ImageProcess/ImageApplyRotation.cpp b/huagao/ImageProcess/ImageApplyRotation.cpp index eb05c4b5..2ac2c098 100644 --- a/huagao/ImageProcess/ImageApplyRotation.cpp +++ b/huagao/ImageProcess/ImageApplyRotation.cpp @@ -4,7 +4,7 @@ //#define USE_HANWANG //#define HG_GPDF_API_BUILD -#include "hg_gpdf.h" +#include "hg_ocr.h" CImageApplyRotation::CImageApplyRotation(RotationType rotation, bool isBackTransposed, int dpi, const char* tessdataPath) : m_rotation(rotation) @@ -16,7 +16,8 @@ CImageApplyRotation::CImageApplyRotation(RotationType rotation, bool isBackTrans { #ifdef USE_TESSERCAT osd = new HG_OCR(); - reinterpret_cast(osd)->init(tessdataPath, HG_OCR::RECOGNITION_OSD); + std::string strpath(tessdataPath); + reinterpret_cast(osd)->init(strpath.c_str(), HG_OCR::PSM_TYPE::Orientation); #endif } } @@ -88,26 +89,28 @@ void CImageApplyRotation::apply(cv::Mat& pDib, int side) else temp = pDib(cv::Rect(0, 0, pDib.cols / 4 * 4, pDib.rows)).clone(); - if (temp.channels() == 3) - cv::cvtColor(temp, temp, cv::COLOR_BGR2GRAY); - cv::threshold(temp, temp, 180, 255, cv::THRESH_OTSU); HG_OCR* ptr_osd = reinterpret_cast(osd); - int orientation = ptr_osd->orientation(temp.data, temp.cols, temp.rows, temp.channels(), 200); + int ori = -1; + int direction = -1; + int order = -1; + float angle = -1; + ptr_osd->getOrientation(temp.data, temp.cols, temp.rows, temp.channels(), temp.step1(), + ori, direction, order, angle); - switch (orientation) + switch (ori) { - case 90: + case 1: cv::transpose(pDib, pDib); cv::flip(pDib, pDib, 0); m_angleResult = 90; break; - case 180: + case 2: cv::flip(pDib, pDib, 0); cv::flip(pDib, pDib, 1); m_angleResult = 180; break; - case 270: + case 3: cv::transpose(pDib, pDib); cv::flip(pDib, pDib, 1); m_angleResult = 270; diff --git a/huagao/huagaods.cpp b/huagao/huagaods.cpp index f16ac2cc..bd886f8d 100644 --- a/huagao/huagaods.cpp +++ b/huagao/huagaods.cpp @@ -120,7 +120,7 @@ static constexpr const Identity srcIdent( #elif defined HANVON "HW-7000W Series", #else // ISG100 - "G300 Series", + "G200 Series",//ݾɼ pm changed G300 Series #endif #elif defined(G400) // G200 @@ -159,7 +159,7 @@ static constexpr const Identity srcIdent( #ifdef UV "HUAGOSCAN G300UV TWAIN" #else - "HUAGOSCAN G300 TWAIN" + "HUAGOSCAN G200 TWAIN"// ݾɼ pm changed "HUAGOSCAN G300 TWAIN" #endif #elif defined HANVON @@ -295,7 +295,8 @@ static std::vector DeviceID{ {0x3072,0x200}, #endif // ISG100 #elif defined G300 - {0x3072,0x300}, + {0x064b,0x7823},//ݾɼ pm add + {0x3072,0x0300} #elif defined G400 {0x3072,0x400}, #endif // ISG100 @@ -373,31 +374,30 @@ void HuagaoDs::showmsg(std::string caption, std::string text, int retcode) { if (scanner.get()) { int num = scanner->get_scannum() * (m_scanparam->is_duplex ? 2 : 1) * (m_scanparam->multi_output_red ? 2 : 1) / (m_scanparam->en_fold ? 2 : 1); -//#ifdef G1003399 int readnum =scanner->get_scannum()* (m_scanparam->is_duplex ? 2 : 1) / (m_scanparam->en_fold ? 2 : 1); -//#else -// int readnum = scanner->get_scannum(); -//#endif // G1003399 + IScanner* ptr = scanner.get(); + if (typeid(*ptr) != typeid(GScanO1003399)) + { + readnum = readnum / 2; + } if (!(m_scanparam->is_autodiscradblank_normal || m_scanparam->is_autodiscradblank_vince)) { - if ((retcode == 64 || retcode == 8 || retcode == 16)) { - num = (scanner->get_scannum()-1) * (m_scanparam->is_duplex ? 2 : 1) * (m_scanparam->multi_output_red ? 2 : 1) / (m_scanparam->en_fold ? 2 : 1); -//#ifdef G1003399 - int readnum = (scanner->get_scannum() -1)* (m_scanparam->is_duplex ? 2 : 1) / (m_scanparam->en_fold ? 2 : 1); -//#else -// int readnum = scanner->get_scannum()-1; -//#endif // G1003399 - if (((num - scanner->get_imgTransfered()) > 0)||((readnum-scanner->get_imgnReaded())>0)) { - text += "ֽ" + to_string(scanner->get_scannum()-1) + - "ɨ" + to_string(scanner->get_imgnReaded()) + "ϴ" + to_string(scanner->get_imgTransfered()) + - "ɨ" + to_string(readnum - scanner->get_imgnReaded()) + "ļϴ" + to_string(num - scanner->get_imgTransfered()) + "ļ"; - scanner->set_lose_image_num(0); - } - } - else{ - if (((num - scanner->get_imgTransfered()) != 0) || ((readnum - scanner->get_imgnReaded()) != 0)) { + //if ((retcode == 64 || retcode == 8 || retcode == 16)) { + // num = (scanner->get_scannum()-1) * (m_scanparam->is_duplex ? 2 : 1) * (m_scanparam->multi_output_red ? 2 : 1) / (m_scanparam->en_fold ? 2 : 1); + // int readnum = (scanner->get_scannum() -1)* (m_scanparam->is_duplex ? 2 : 1) / (m_scanparam->en_fold ? 2 : 1); + + // if (((num - scanner->get_imgTransfered()) > 0)||((readnum-scanner->get_imgnReaded())>0)) { + // text += "ֽ" + to_string(scanner->get_scannum()-1) + + // "ɨ" + to_string(scanner->get_imgnReaded()) + "ϴ" + to_string(scanner->get_imgTransfered()) + + // "ɨ" + to_string(readnum - scanner->get_imgnReaded()) + "ļϴ" + to_string(num - scanner->get_imgTransfered()) + "ļ"; + // scanner->set_lose_image_num(0); + // } + //} + //else + { + if (((num - scanner->get_imgTransfered()) > 0) || ((readnum - scanner->get_imgnReaded()) > 0)) { text += "ֽ" + to_string(scanner->get_scannum()) + "ɨ" + to_string(scanner->get_imgnReaded()) + "ϴ" + to_string(scanner->get_imgTransfered()) + "ɨ" + to_string(readnum - scanner->get_imgnReaded()) + "ļϴ" + to_string(num - scanner->get_imgTransfered()) + "ļ"; @@ -823,6 +823,7 @@ Result HuagaoDs::identityOpenDs(const Identity&) { + auto usblist = UsbScan_List::find_all_usb(); if (!usblist.empty()) { @@ -1557,7 +1558,7 @@ Result HuagaoDs::identityOpenDs(const Identity&) { m_caps[(CapType)(CapTypeEx::TwEx_CropNoise)] = [this](Msg msg, Capability& data)->Result { if (Msg::Set == msg) { auto mech = data.currentItem(); - if ((mech > 10 || mech < 0) && ((bool)m_scanparam->is_autocrop == true)) + if ((mech > 20 || mech < 0) && ((bool)m_scanparam->is_autocrop == true)) return badValue(); m_scanparam->noise = mech; return success(); diff --git a/huagao/huagaotwds.rc b/huagao/huagaotwds.rc index cd08bf99..be75bf93 100644 Binary files a/huagao/huagaotwds.rc and b/huagao/huagaotwds.rc differ diff --git a/huagao/stdafx.h b/huagao/stdafx.h index 2d9daad8..4659855e 100644 Binary files a/huagao/stdafx.h and b/huagao/stdafx.h differ